diff --git a/.all-contributorsrc b/.all-contributorsrc
deleted file mode 100644
index bc6a9103..00000000
--- a/.all-contributorsrc
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "files": [
-    "README.md"
-  ],
-  "imageSize": 100,
-  "commit": false,
-  "contributorsPerLine": 7,
-  "projectName": "al-folio",
-  "projectOwner": "alshedivat",
-  "repoType": "github",
-  "repoHost": "https://github.com",
-  "badgeTemplate": "[core_contributors]: https://img.shields.io/badge/core_contributors-<%= contributors.length %>-orange.svg 'Number of core contributors'",
-  "contributorTemplate": "<a href=\"<%= contributor.profile %>\"><img src=\"<%= contributor.avatar_url %>\" width=\"<%= options.imageSize %>px;\" alt=\"\"/><br /><sub><b><%= contributor.name %></b></sub></a>",
-  "skipCi": true,
-  "contributors": [
-    {
-      "login": "alshedivat",
-      "name": "Maruan",
-      "avatar_url": "https://avatars.githubusercontent.com/u/2126561?v=4",
-      "profile": "http://maruan.alshedivat.com",
-      "contributions": [
-        "design",
-        "code"
-      ]
-    },
-    {
-      "login": "rohandebsarkar",
-      "name": "Rohan Deb Sarkar",
-      "avatar_url": "https://avatars.githubusercontent.com/u/50144004?v=4",
-      "profile": "http://rohandebsarkar.github.io",
-      "contributions": [
-        "code"
-      ]
-    },
-    {
-      "login": "pourmand1376",
-      "name": "Amir Pourmand",
-      "avatar_url": "https://avatars.githubusercontent.com/u/32064808?v=4",
-      "profile": "https://amirpourmand.ir",
-      "contributions": [
-        "code"
-      ]
-    }
-  ]
-}
diff --git a/.dockerignore b/.dockerignore
deleted file mode 100644
index 1574b961..00000000
--- a/.dockerignore
+++ /dev/null
@@ -1,3 +0,0 @@
-_site/
-.git/
-assets/
\ No newline at end of file
diff --git a/.editorconfig b/.editorconfig
deleted file mode 100644
index 0b008c95..00000000
--- a/.editorconfig
+++ /dev/null
@@ -1,15 +0,0 @@
-# editorconfig.org
-
-root = true
-
-[*]
-indent_style = space
-indent_size = 2
-end_of_line = lf
-charset = utf-8
-trim_trailing_whitespace = true
-insert_final_newline = true
-
-[*.py]
-indent_style = space
-indent_size = 4
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
deleted file mode 100644
index c78502f4..00000000
--- a/.github/FUNDING.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-# These are supported funding model platforms
-
-github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
-patreon: # Replace with a single Patreon username
-open_collective: # Replace with a single Open Collective username
-ko_fi: alshedivat
-tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
-community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
-liberapay: # Replace with a single Liberapay username
-issuehunt: # Replace with a single IssueHunt username
-otechie: # Replace with a single Otechie username
-custom: # ['https://www.buymeacoffee.com/TkFxuKo']
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index 511f5851..00000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-name: Bug report
-about: Create a report to help us improve
-title: ''
-labels: bug
-assignees: ''
-
----
-
-**Acknowledge the following**
-- [ ] I carefully read and followed the [Getting Started](https://github.com/alshedivat/al-folio#getting-started) guide.
-- [ ] I read through [FAQ](https://github.com/alshedivat/al-folio#faq) and searched through the [past issues](https://github.com/alshedivat/al-folio/issues), none of which addressed my issue.
-- [ ] The issue I am raising is a potential bug in al-folio and not just a usage question. <br> [For usage questions, please post in the [Discussions](https://github.com/alshedivat/al-folio/discussions) instead of raising an issue.]
-
-**Describe the bug**
-A clear and concise description of what the bug is.
-
-**To Reproduce**
-Steps to reproduce the behavior:
-1. Go to '...'
-2. Click on '....'
-3. Scroll down to '....'
-4. See error
-
-**Expected behavior**
-A clear and concise description of what you expected to happen.
-
-**Screenshots**
-If applicable, add screenshots to help explain your problem.
-
-**System (please complete the following information):**
- - OS: [e.g. iOS]
- - Browser (and its version) [e.g. chrome, safari]
- - Jekyll version [e.g. 3.8.7]
-- Ruby version [e.g. 2.6.5]
-
-**Additional context**
-Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
deleted file mode 100644
index 11fc491e..00000000
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-name: Feature request
-about: Suggest an idea for this project
-title: ''
-labels: enhancement
-assignees: ''
-
----
-
-**Is your feature request related to a problem? Please describe.**
-A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
-
-**Describe the solution you'd like**
-A clear and concise description of what you want to happen.
-
-**Describe alternatives you've considered**
-A clear and concise description of any alternative solutions or features you've considered.
-
-**Additional context**
-Add any other context or screenshots about the feature request here.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
deleted file mode 100644
index 8b137891..00000000
--- a/.github/pull_request_template.md
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/.github/stale.yml b/.github/stale.yml
deleted file mode 100644
index 8ec2004d..00000000
--- a/.github/stale.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-# Number of days of inactivity before an issue becomes stale
-daysUntilStale: 60
-# Number of days of inactivity before a stale issue is closed
-daysUntilClose: 7
-# Issues with these labels will never be considered stale
-exemptLabels:
-  - pinned
-  - security
-  - enhancement
-# Label to use when marking an issue as stale
-staleLabel: wontfix
-# Comment to post when marking an issue as stale. Set to `false` to disable
-markComment: >
-  This issue has been automatically marked as stale because it has not had
-  recent activity. It will be closed if no further activity occurs. Thank you
-  for your contributions.
-# Comment to post when closing a stale issue. Set to `false` to disable
-closeComment: false
diff --git a/.github/workflows/deploy-docker-tag.yml b/.github/workflows/deploy-docker-tag.yml
deleted file mode 100644
index 3e6b6a3a..00000000
--- a/.github/workflows/deploy-docker-tag.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-name: Docker Image CI (Upload Tag)
-
-on:
-  push:
-    tags:
-      - 'v*'
-
-jobs:
-
-  build:
-
-    runs-on: ubuntu-latest
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-    - name: Buildx
-      uses: docker/setup-buildx-action@v1
-
-    -
-      name: Docker meta
-      id: meta
-      uses: docker/metadata-action@v4
-      with:
-        images: amirpourmand/al-folio
-
-    - name: Login
-      uses: docker/login-action@v1
-      with:
-        username: ${{ secrets.DOCKER_USERNAME }}
-        password: ${{ secrets.DOCKER_PASSWORD }}
-
-    - name: Build and push
-      uses: docker/build-push-action@v3
-      with:
-        context: .
-        push: ${{ github.event_name != 'pull_request' }}
-        tags: ${{ steps.meta.outputs.tags }}
-        labels: ${{ steps.meta.outputs.labels }}
-
diff --git a/.github/workflows/deploy-image.yml b/.github/workflows/deploy-image.yml
deleted file mode 100644
index b747dfc1..00000000
--- a/.github/workflows/deploy-image.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: Docker Image CI
-
-on:
-  push:
-    branches: [ master ]
-
-jobs: 
-
-  build:
-
-    runs-on: ubuntu-latest
-    if: github.repository_owner == 'alshedivat'
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-    - name: Buildx
-      uses: docker/setup-buildx-action@v1
-
-    - name: Login
-      uses: docker/login-action@v1
-      with:
-        username: ${{ secrets.DOCKER_USERNAME }}
-        password: ${{ secrets.DOCKER_PASSWORD }}
-         
-    - name: Build and push
-      uses: docker/build-push-action@v2
-      with:
-        context: .
-        push: true
-        tags: amirpourmand/al-folio
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
deleted file mode 100644
index 0301f080..00000000
--- a/.github/workflows/deploy.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-name: deploy
-
-on:
-  push:
-    branches:
-      - master
-      - main
-  pull_request:
-    branches:
-      - master
-      - main
-
-jobs:
-  deploy:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
-    - name: Setup Ruby
-      uses: ruby/setup-ruby@v1
-      with:
-        ruby-version: '3.0.2'
-        bundler-cache: true
-    - name: Install deps
-      run: |
-        npm install -g mermaid.cli
-    - name: Setup deploy options
-      id: setup
-      run: |
-        git config --global user.name "GitHub Action"
-        git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
-        if [[ ${GITHUB_REF} = refs/pull/*/merge ]]; then # pull request
-          echo "SRC_BRANCH=${GITHUB_HEAD_REF}" >> $GITHUB_OUTPUT
-          echo "NO_PUSH=--no-push" >> $GITHUB_OUTPUT
-        elif [[ ${GITHUB_REF} = refs/heads/* ]]; then # branch, e.g. master, source etc
-          echo "SRC_BRANCH=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT
-        fi
-        echo "DEPLOY_BRANCH=gh-pages" >> $GITHUB_OUTPUT
-    - name: Deploy website 
-      run:  yes | bash bin/deploy --verbose ${{ steps.setup.outputs.NO_PUSH }}
-                    --src ${{ steps.setup.outputs.SRC_BRANCH }} 
-                    --deploy ${{ steps.setup.outputs.DEPLOY_BRANCH }} 
diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml
deleted file mode 100644
index 559bddf5..00000000
--- a/.github/workflows/jekyll-gh-pages.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-# Sample workflow for building and deploying a Jekyll site to GitHub Pages
-name: Deploy Jekyll with GitHub Pages dependencies preinstalled
-
-on:
-  # Runs on pushes targeting the default branch
-  push:
-    branches: ["main"]
-
-  # Allows you to run this workflow manually from the Actions tab
-  workflow_dispatch:
-
-# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
-permissions:
-  contents: read
-  pages: write
-  id-token: write
-
-# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
-# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
-concurrency:
-  group: "pages"
-  cancel-in-progress: false
-
-jobs:
-  # Build job
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-      - name: Setup Pages
-        uses: actions/configure-pages@v3
-      - name: Build with Jekyll
-        uses: actions/jekyll-build-pages@v1
-        with:
-          source: ./
-          destination: ./_site
-      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v2
-
-  # Deployment job
-  deploy:
-    environment:
-      name: github-pages
-      url: ${{ steps.deployment.outputs.page_url }}
-    runs-on: ubuntu-latest
-    needs: build
-    steps:
-      - name: Deploy to GitHub Pages
-        id: deployment
-        uses: actions/deploy-pages@v2
diff --git a/.nojekyll b/.nojekyll
new file mode 100644
index 00000000..e69de29b
diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 1807c888..00000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-  "cSpell.words": ["affordances", "interpretability", "visuomotor"]
-}
diff --git a/2023-11-09-dof-visual-place-recognition-satellite/2023-11-09-dof-visual-place-recognition-satellite.md b/2023-11-09-dof-visual-place-recognition-satellite/2023-11-09-dof-visual-place-recognition-satellite.md
deleted file mode 100644
index 65616c9e..00000000
--- a/2023-11-09-dof-visual-place-recognition-satellite/2023-11-09-dof-visual-place-recognition-satellite.md
+++ /dev/null
@@ -1,199 +0,0 @@
----
-layout: distill
-title: 6-DOF estimation through visual place recognition
-description: A neural Visual Place Recognition solution is proposed which could help an agent with a downward-facing camera (such as a drone) to geolocate based on prior satellite imagery of terrain. The neural encoder infers extrinsic camera parameters from camera images, enabling estimation of 6 degrees of freedom (6-DOF), namely 3-space position and orientation. By encoding priors about satellite imagery in a neural network, the need for the agent to carry a satellite imagery dataset onboard is avoided.
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Andrew Feldman
-    url: "https://andrew-feldman.com/"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-dof-visual-place-recognition-satellite.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Background
-#  - name: Images and Figures
-#    subsections:
-#    - name: Interactive Figures
-  - name: Proposed solution
-    subsections:
-    - name: Image-to-extrinsics encoder architecture
-    - name: Data sources for offline training
-    - name: Training and evaluation
-      subsections:
-      - name: Data pipeline
-      - name: Training
-      - name: Hyperparameters
-      - name: Evaluation
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-# Introduction
-
-The goal of this project is to demonstrate how a drone or other platform with a downward-facing camera could perform approximate geolocation through visual place recognition, using a neural scene representation of existing satellite imagery.
-
-Visual place recognition<d-cite key="Schubert_2023"></d-cite> refers to the ability of an agent to recognize a location which it has not previously seen, by exploiting a system for cross-referencing live camera footage against some ground-truth of prior image data.
-
-In this work, the goal is to compress the ground-truth image data into a neural model which maps live camera footage to geolocation coordinates.
-
-Twitter user Stephan Sturges demonstrates his solution<d-cite key="Sturges_2023"></d-cite> for allowing a drone with a downward-facing camera to geolocate through cross-referencing against a database of satellite images:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr.jpeg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Twitter user Stephan Sturges shows the results<d-cite key="Sturges_2023"></d-cite> of geolocation based on Visual Place Recognition.
-</div>
-
-The author of the above tweet employs a reference database of images. It would be interesting to eliminate the need for a raw dataset.
-
-Thus, this works seeks to develop a neural network which maps a terrain image from the agent's downward-facing camera, to a 6-DOF (position/rotation) representation of the agent in 3-space. Hopefully the neural network is more compact than the dataset itself - although aggressive DNN compression will not be a focus of this work.
-
-# Background
-
-The goal-statement - relating a camera image to a location and orientation in the world - has been deeply studied in computer vision and rendering<d-cite key="Anwar_2022"></d-cite>:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Camera parameters, as described in<d-cite key="Anwar_2022"></d-cite>.
-</div>
-
-Formally<d-cite key="Anwar_2022"></d-cite>,
-* The image-formation problem is modeled as a camera forming an image of the world using a planar sensor.
-* **World coordinates** refer to 3-space coordinates in the Earth or world reference frame.
-* **Image coordinates** refer to 2-space planar coordinates in the camera image plane.
-* **Pixel coordinates** refer to 2-space coordinates in the final image output from the image sensor, taking into account any translation or skew of pixel coordinates with respect to the image coordinates.
-
-The mapping from world coordinates to pixel coordinates is framed as two composed transformations, described as sets of parameters<d-cite key="Anwar_2022"></d-cite>:
-* **Extrinsic camera parameters** - the transformation from world coordinates to image coordinates (affected by factors "extrinsic" to the camera internals, i.e. position and orientation.)
-* **Intrinsic camera parameters** - the transformation from image coordinates to pixel coordinates (affected by factors "intrinsic" to the camera's design.)
-
-And so broadly speaking, this work strives to design a neural network that can map from an image (taken by the agent's downward-facing camera) to camera parameters of the agent's camera. With camera parameters in hand, geolocation parameters automatically drop out from extracting extrinsic translation parameters.
-
-To simplify the task, assume that camera intrinsic characteristics are consistent from image to image, and thus could easily be calibrated out in any application use-case. Therefore, this work focuses on inferring **extrinsic camera parameters** from an image. We assume that pixels map directly into image space.
-
-The structure of extrinsic camera parameters is as follows<d-cite key="Anwar_2022"></d-cite>:
-
-$$
-\mathbf{E}_{4 \times 4} = \begin{bmatrix} \mathbf{R}_{3 \times 3} & \mathbf{t}_{3 \times 1} \\ \mathbf{0}_{1 \times 3} & 1 \end{bmatrix}
-$$
-
-where $$\mathbf{R}_{3 \times 3} \in \mathbb{R^{3 \times 3}}$$ is rotation matrix representing the rotation from the world reference frame to the camera reference frame, and $$\mathbf{t}_{3 \times 1} \in \mathbb{R^{3 \times 1}}$$ represents a translation vector from the world origin to the image/camera origin.
-
-Then the image coordinates (a.k.a. camera coordinates) $$P_c$$ of a world point $$P_w$$ can be computed as<d-cite key="Anwar_2022"></d-cite>:
-
-$$
-\mathbf{P_c} = \mathbf{E}_{4 \times 4} \cdot \mathbf{P_w}
-$$
-
-# Proposed solution
-
-## Image-to-extrinsics encoder architecture
-
-The goal of this work, is to train a neural network which maps an image drawn from $$R^{3 \times S \times S}$$ (where $$S$$ is pixel side-length of an image matrix) to a pair of camera extrinsic parameters $$R_{3 \times 3}$$ and $$t_{3 \times 1}$$:
-
-$$
-\mathbb{R^{3 \times S \times S}} \rightarrow \mathbb{R^{3 \times 3}} \times \mathbb{R^3}
-$$
-
-The proposed solution is a CNN-based encoder which maps the image into a length-12 vector (the flattened extrinsic parameters); a hypothetical architecture sketch is shown below:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Image encoder architecture.
-</div>
-
-## Data sources for offline training
-
-Online sources<d-cite key="Geller_2022"></d-cite> provide downloadable satellite terrain images.
-
-## Training and evaluation
-
-The scope of the model's evaluation is, that it will be trained to recognize aerial views of some constrained area i.e. Atlantic City New Jersey; this constrained area will be referred to as the "area of interest."
-
-### Data pipeline
-
-The input to the data pipeline is a single aerial image of the area of interest. The output of the pipeline is a data loader which generates augmented images.
-
-The image of the area of interest is $$\mathbb{R^{3 \times T \times T}}$$ where $$T$$ is the image side-length in pixels.
-
-Camera images will be of the form $$\mathbb{R^{3 \times S \times S}}$$ where $$S$$ is the image side-length in pixels, which may differ from $$T$$.
-
-* **Generate an image from the agent camera's vantage-point**
-    * Convert the area-of-interest image tensor ($$\mathbb{R^{3 \times T \times T}}$$) to a matrix of homogenous world coordinates ($$\mathbb{R^{pixels \times 4}}$$) and an associated matrix of RGB values for each point ($$\mathbb{R^{pixels \times 3}}$$)
-        * For simplicity, assume that all features in the image have an altitutde of zero
-        * Thus, all of the pixel world coordinates will lie in a plane
-    * Generate random extrinsic camera parameters $$R_{3 \times 3}$$ and $$t_{3 \times 1}$$
-    * Transform the world coordinates into image coordinates ($$\mathbb{R^{pixels \times 3}}$$) (note, this does not affect the RGB matrix)
-    * Note - this implicitly accomplishes the commonly-used image augmentations such as shrink/expand, crop, rotate, skew
-* **Additional data augmentation** - to prevent overfitting
-    * Added noise
-    * Color/brightness adjustment
-    * TBD
-* **Convert the image coordinates and the RGB matrix into a camera image tensor ($$\mathbb{R^{3 \times S \times S}}$$)**
-
-Each element of a batch from this dataloader, will be a tuple of (extrinsic parameters,camera image).
-
-## Training
-
-* For each epoch, and each mini-batch...
-* unpack batch elements into camera images and ground-truth extrinsic parameters
-* Apply the encoder to the camera images
-* Loss: MSE between encoder estimates of extrinsic parameters, and the ground-truth values
-
-### Hyperparameters
-* Architecture
-    * Encoder architecture - CNN vs MLP vs ViT(?) vs ..., number of layers, ...
-    * Output normalizations
-    * Nonlinearities - ReLU, tanh, ...
-* Learning-rate
-* Optimizer - ADAM, etc.
-* Regularizations - dropout, L1, L2, ...
-
-## Evaluation
-
-For a single epoch, measure the total MSE loss of the model's extrinsic parameter estimates relative to the ground-truth. 
-
-## Feasibility
-
-Note that I am concurrently taking 6.s980 "Machine learning for inverse graphics" so I already have background in working with camera parameters, which should help me to complete this project on time.
\ No newline at end of file
diff --git a/2023-11-09-dof-visual-place-recognition-satellite/2023-11-09-dof-visual-place-recognition-satellite/index.html b/2023-11-09-dof-visual-place-recognition-satellite/2023-11-09-dof-visual-place-recognition-satellite/index.html
new file mode 100644
index 00000000..7fcbd5f5
--- /dev/null
+++ b/2023-11-09-dof-visual-place-recognition-satellite/2023-11-09-dof-visual-place-recognition-satellite/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>6-DOF estimation through visual place recognition | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A neural Visual Place Recognition solution is proposed which could help an agent with a downward-facing camera (such as a drone) to geolocate based on prior satellite imagery of terrain. The neural encoder infers extrinsic camera parameters from camera images, enabling estimation of 6 degrees of freedom (6-DOF), namely 3-space position and orientation. By encoding priors about satellite imagery in a neural network, the need for the agent to carry a satellite imagery dataset onboard is avoided."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/2023-11-09-dof-visual-place-recognition-satellite/2023-11-09-dof-visual-place-recognition-satellite/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "6-DOF estimation through visual place recognition",
+      "description": "A neural Visual Place Recognition solution is proposed which could help an agent with a downward-facing camera (such as a drone) to geolocate based on prior satellite imagery of terrain. The neural encoder infers extrinsic camera parameters from camera images, enabling estimation of 6 degrees of freedom (6-DOF), namely 3-space position and orientation. By encoding priors about satellite imagery in a neural network, the need for the agent to carry a satellite imagery dataset onboard is avoided.",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Andrew Feldman",
+          "authorURL": "https://andrew-feldman.com/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>6-DOF estimation through visual place recognition</h1> <p>A neural Visual Place Recognition solution is proposed which could help an agent with a downward-facing camera (such as a drone) to geolocate based on prior satellite imagery of terrain. The neural encoder infers extrinsic camera parameters from camera images, enabling estimation of 6 degrees of freedom (6-DOF), namely 3-space position and orientation. By encoding priors about satellite imagery in a neural network, the need for the agent to carry a satellite imagery dataset onboard is avoided.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background">Background</a></div> <div><a href="#proposed-solution">Proposed solution</a></div> <ul> <li><a href="#image-to-extrinsics-encoder-architecture">Image-to-extrinsics encoder architecture</a></li> <li><a href="#data-sources-for-offline-training">Data sources for offline training</a></li> <li><a href="#training-and-evaluation">Training and evaluation</a></li> </ul> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>The goal of this project is to demonstrate how a drone or other platform with a downward-facing camera could perform approximate geolocation through visual place recognition, using a neural scene representation of existing satellite imagery.</p> <p>Visual place recognition<d-cite key="Schubert_2023"></d-cite> refers to the ability of an agent to recognize a location which it has not previously seen, by exploiting a system for cross-referencing live camera footage against some ground-truth of prior image data.</p> <p>In this work, the goal is to compress the ground-truth image data into a neural model which maps live camera footage to geolocation coordinates.</p> <p>Twitter user Stephan Sturges demonstrates his solution<d-cite key="Sturges_2023"></d-cite> for allowing a drone with a downward-facing camera to geolocate through cross-referencing against a database of satellite images:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr.jpeg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Twitter user Stephan Sturges shows the results<d-cite key="Sturges_2023"></d-cite> of geolocation based on Visual Place Recognition. </div> <p>The author of the above tweet employs a reference database of images. It would be interesting to eliminate the need for a raw dataset.</p> <p>Thus, this works seeks to develop a neural network which maps a terrain image from the agent’s downward-facing camera, to a 6-DOF (position/rotation) representation of the agent in 3-space. Hopefully the neural network is more compact than the dataset itself - although aggressive DNN compression will not be a focus of this work.</p> <h1 id="background">Background</h1> <p>The goal-statement - relating a camera image to a location and orientation in the world - has been deeply studied in computer vision and rendering<d-cite key="Anwar_2022"></d-cite>:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Camera parameters, as described in<d-cite key="Anwar_2022"></d-cite>. </div> <p>Formally<d-cite key="Anwar_2022"></d-cite>,</p> <ul> <li>The image-formation problem is modeled as a camera forming an image of the world using a planar sensor.</li> <li><strong>World coordinates</strong> refer to 3-space coordinates in the Earth or world reference frame.</li> <li><strong>Image coordinates</strong> refer to 2-space planar coordinates in the camera image plane.</li> <li><strong>Pixel coordinates</strong> refer to 2-space coordinates in the final image output from the image sensor, taking into account any translation or skew of pixel coordinates with respect to the image coordinates.</li> </ul> <p>The mapping from world coordinates to pixel coordinates is framed as two composed transformations, described as sets of parameters<d-cite key="Anwar_2022"></d-cite>:</p> <ul> <li><strong>Extrinsic camera parameters</strong> - the transformation from world coordinates to image coordinates (affected by factors “extrinsic” to the camera internals, i.e. position and orientation.)</li> <li><strong>Intrinsic camera parameters</strong> - the transformation from image coordinates to pixel coordinates (affected by factors “intrinsic” to the camera’s design.)</li> </ul> <p>And so broadly speaking, this work strives to design a neural network that can map from an image (taken by the agent’s downward-facing camera) to camera parameters of the agent’s camera. With camera parameters in hand, geolocation parameters automatically drop out from extracting extrinsic translation parameters.</p> <p>To simplify the task, assume that camera intrinsic characteristics are consistent from image to image, and thus could easily be calibrated out in any application use-case. Therefore, this work focuses on inferring <strong>extrinsic camera parameters</strong> from an image. We assume that pixels map directly into image space.</p> <p>The structure of extrinsic camera parameters is as follows<d-cite key="Anwar_2022"></d-cite>:</p> \[\mathbf{E}_{4 \times 4} = \begin{bmatrix} \mathbf{R}_{3 \times 3} &amp; \mathbf{t}_{3 \times 1} \\ \mathbf{0}_{1 \times 3} &amp; 1 \end{bmatrix}\] <p>where \(\mathbf{R}_{3 \times 3} \in \mathbb{R^{3 \times 3}}\) is rotation matrix representing the rotation from the world reference frame to the camera reference frame, and \(\mathbf{t}_{3 \times 1} \in \mathbb{R^{3 \times 1}}\) represents a translation vector from the world origin to the image/camera origin.</p> <p>Then the image coordinates (a.k.a. camera coordinates) \(P_c\) of a world point \(P_w\) can be computed as<d-cite key="Anwar_2022"></d-cite>:</p> \[\mathbf{P_c} = \mathbf{E}_{4 \times 4} \cdot \mathbf{P_w}\] <h1 id="proposed-solution">Proposed solution</h1> <h2 id="image-to-extrinsics-encoder-architecture">Image-to-extrinsics encoder architecture</h2> <p>The goal of this work, is to train a neural network which maps an image drawn from \(R^{3 \times S \times S}\) (where \(S\) is pixel side-length of an image matrix) to a pair of camera extrinsic parameters \(R_{3 \times 3}\) and \(t_{3 \times 1}\):</p> \[\mathbb{R^{3 \times S \times S}} \rightarrow \mathbb{R^{3 \times 3}} \times \mathbb{R^3}\] <p>The proposed solution is a CNN-based encoder which maps the image into a length-12 vector (the flattened extrinsic parameters); a hypothetical architecture sketch is shown below:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Image encoder architecture. </div> <h2 id="data-sources-for-offline-training">Data sources for offline training</h2> <p>Online sources<d-cite key="Geller_2022"></d-cite> provide downloadable satellite terrain images.</p> <h2 id="training-and-evaluation">Training and evaluation</h2> <p>The scope of the model’s evaluation is, that it will be trained to recognize aerial views of some constrained area i.e. Atlantic City New Jersey; this constrained area will be referred to as the “area of interest.”</p> <h3 id="data-pipeline">Data pipeline</h3> <p>The input to the data pipeline is a single aerial image of the area of interest. The output of the pipeline is a data loader which generates augmented images.</p> <p>The image of the area of interest is \(\mathbb{R^{3 \times T \times T}}\) where \(T\) is the image side-length in pixels.</p> <p>Camera images will be of the form \(\mathbb{R^{3 \times S \times S}}\) where \(S\) is the image side-length in pixels, which may differ from \(T\).</p> <ul> <li><strong>Generate an image from the agent camera’s vantage-point</strong> <ul> <li>Convert the area-of-interest image tensor (\(\mathbb{R^{3 \times T \times T}}\)) to a matrix of homogenous world coordinates (\(\mathbb{R^{pixels \times 4}}\)) and an associated matrix of RGB values for each point (\(\mathbb{R^{pixels \times 3}}\)) <ul> <li>For simplicity, assume that all features in the image have an altitutde of zero</li> <li>Thus, all of the pixel world coordinates will lie in a plane</li> </ul> </li> <li>Generate random extrinsic camera parameters \(R_{3 \times 3}\) and \(t_{3 \times 1}\)</li> <li>Transform the world coordinates into image coordinates (\(\mathbb{R^{pixels \times 3}}\)) (note, this does not affect the RGB matrix)</li> <li>Note - this implicitly accomplishes the commonly-used image augmentations such as shrink/expand, crop, rotate, skew</li> </ul> </li> <li><strong>Additional data augmentation</strong> - to prevent overfitting <ul> <li>Added noise</li> <li>Color/brightness adjustment</li> <li>TBD</li> </ul> </li> <li><strong>Convert the image coordinates and the RGB matrix into a camera image tensor (\(\mathbb{R^{3 \times S \times S}}\))</strong></li> </ul> <p>Each element of a batch from this dataloader, will be a tuple of (extrinsic parameters,camera image).</p> <h2 id="training">Training</h2> <ul> <li>For each epoch, and each mini-batch…</li> <li>unpack batch elements into camera images and ground-truth extrinsic parameters</li> <li>Apply the encoder to the camera images</li> <li>Loss: MSE between encoder estimates of extrinsic parameters, and the ground-truth values</li> </ul> <h3 id="hyperparameters">Hyperparameters</h3> <ul> <li>Architecture <ul> <li>Encoder architecture - CNN vs MLP vs ViT(?) vs …, number of layers, …</li> <li>Output normalizations</li> <li>Nonlinearities - ReLU, tanh, …</li> </ul> </li> <li>Learning-rate</li> <li>Optimizer - ADAM, etc.</li> <li>Regularizations - dropout, L1, L2, …</li> </ul> <h2 id="evaluation">Evaluation</h2> <p>For a single epoch, measure the total MSE loss of the model’s extrinsic parameter estimates relative to the ground-truth.</p> <h2 id="feasibility">Feasibility</h2> <p>Note that I am concurrently taking 6.s980 “Machine learning for inverse graphics” so I already have background in working with camera parameters, which should help me to complete this project on time.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-dof-visual-place-recognition-satellite.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/404.html b/404.html
index 1a27dcf9..b9bbdbce 100644
--- a/404.html
+++ b/404.html
@@ -1,12 +1 @@
----
-layout: page
-permalink: /404.html
-title: "Page not found"
-description: "Looks like there has been a mistake. Nothing exists here."
-redirect: false
----
-
-<p> This page does not exist. If you got here after having clicked on a link to review a blog post, it means the associated PR has not been merge yet.
-  It will be merged by the 19th of February 2023.
-  
-  Click <a href="{{ site.baseurl }}/">here</a> to be redirected to the home page.</p>
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>404: Page not found | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/404.html"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <h1 id="404-page-not-found">404: Page not found</h1> <p>Sorry, we’ve misplaced that URL or it’s pointing to something that doesn’t exist. <a href="https://deep-learning-mit.github.io">Head back home</a> to try finding it again.</p> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file
diff --git a/404.md b/404.md
deleted file mode 100644
index 8ca926cf..00000000
--- a/404.md
+++ /dev/null
@@ -1,8 +0,0 @@
----
-layout: default
-title: "404: Page not found"
-permalink: 404.html
----
-
-# 404: Page not found
-Sorry, we've misplaced that URL or it's pointing to something that doesn't exist. [Head back home]({{ site.url }}) to try finding it again.
diff --git a/Gemfile b/Gemfile
deleted file mode 100644
index 498497f3..00000000
--- a/Gemfile
+++ /dev/null
@@ -1,25 +0,0 @@
-source 'https://rubygems.org'
-group :jekyll_plugins do
-    gem 'jekyll'
-    gem 'jekyll-archives'
-    gem 'jekyll-diagrams'
-    gem 'jekyll-email-protect'
-    gem 'jekyll-feed'
-    gem 'jekyll-imagemagick'
-    gem 'jekyll-minifier'
-    gem 'jekyll-paginate-v2'
-    gem 'jekyll-scholar'
-    gem 'jekyll-sitemap'
-    # gem 'jekyll-target-blank'
-    # gem 'jekyll-twitter-plugin'
-    gem 'jekyll-redirect-from'
-    # gem 'jemoji'
-    # gem 'mini_racer'
-    # gem 'mini_racer', '>=0.2.1'
-    gem 'unicode_utils'
-    gem 'webrick'
-end
-group :other_plugins do
-    gem 'httparty'
-    gem 'feedjira'
-end
diff --git a/_bibliography/2023-11-10-transformer-elo-prediction.bib b/_bibliography/2023-11-10-transformer-elo-prediction.bib
deleted file mode 100644
index 9cdb0ffd..00000000
--- a/_bibliography/2023-11-10-transformer-elo-prediction.bib
+++ /dev/null
@@ -1,14 +0,0 @@
-@string{aps = {American Physical Society,}}
-
-@article{einstein1950meaning,
-  abbr={AJP},
-  bibtex_show={true},
-  title={The meaning of relativity},
-  author={Einstein, Albert and Taub, AH},
-  journal={American Journal of Physics,},
-  volume={18},
-  number={6},
-  pages={403--404},
-  year={1950},
-  publisher={American Association of Physics Teachers,}
-}
\ No newline at end of file
diff --git a/_bibliography/2023-11-11-physics_loss.bib b/_bibliography/2023-11-11-physics_loss.bib
deleted file mode 100644
index e5158c7e..00000000
--- a/_bibliography/2023-11-11-physics_loss.bib
+++ /dev/null
@@ -1,49 +0,0 @@
-@article{1,
-title = {Using physics-informed enhanced super-resolution generative adversarial networks for subfilter modeling in turbulent reactive flows},
-journal = {Proceedings of the Combustion Institute},
-volume = {38},
-number = {2},
-pages = {2617-2625},
-year = {2021},
-author = {Bode, M. and Gauding, M. and Lian, Z. and Denker, D. and Davidovic, M. and Kleinheinz, K. and Jitsev, J. and Pitsch, H.}
-}
-
-@article{2,
-author = {Fukami, K. and Fukagata, K. and Taira, K.},
-title = {Super-resolution recontruction of turbulent flows with machine learning},
-journal = {Journal of Fluid Mechanics},
-year = {2019},
-volume = {870},
-number = {},
-pages = {106-120},
-}
-
-@unpublished{3,
-author = {Fukami, K.,Fukagata, K., & Taira, K.},
-title = {Super-Resolution Analysis Via Machine Learning:
-A Survey For Fluid Flows
-Kai Fukami},
-month = {Jun},
-year = {2023},
-annote = {}
-}
-
-@inproceedings{4,
-author = {Wang, C. and Li, S. and He, D. and Wang, L.},
-title = {Is L2 Physics-Informed Loss Always Suitablefor Training Physics-Informed Neural Network?},
-year = {2022},
-organization = {ConferenceonNeuralInformationProcessingSystems},
-}
-
-@unpublished{5,
-author = {Bischof, R.,Kraus, M.},
-title = {Multi-Objective Loss Balancing for Physics-Informed DeepLearning},
-year = {2022},
-}
-
-
-
-
-
-
-
diff --git a/_bibliography/papers.bib b/_bibliography/papers.bib
deleted file mode 100644
index 86a98d65..00000000
--- a/_bibliography/papers.bib
+++ /dev/null
@@ -1,151 +0,0 @@
-
-@string{aps = {GAN,}}
-
-
-@article{einstein1950meaning,
-  abbr={GAN},
-  bibtex_show={true},
-  title={HOW POWERFUL ARE GRAPH NEURAL NETWORKS?},
-  link={https://arxiv.org/pdf/1810.00826.pdf},
-  year={2019},
-}
-@string{aps = {American Physical Society,}}
-References
-@article{1,
-  title={Large Language Models are Zero-Shot Reasoners},
-  year={2023},
-  url={https://arxiv.org/pdf/2205.11916.pdf}
-}
-@article{2,
-  title={Orca 2: Teaching Small Language Models How to Reason},
-  year={2023},
-  url={https://arxiv.org/pdf/2311.11045.pdf}
-}
-@article{3,
-  title={Solving Arithmetic Word Problems Automatically Using Transformer and Unambiguous Representations},
-  year={2019},
-  url={http://cs.uccs.edu/~jkalita/work/reu/REU2019/19Griffith.pdf}
-}
-@article{4,
-  title={Interpreting Deep Learning Models in Natural Language Processing: A Review},
-  year={2021},
-  url={https://www.semanticscholar.org/reader/d5784fd3ac7e06ec030abb8f7787faa9279c1a50}
-}
-
-@article {Huang2023.03.19.23287458,
-	author = {Kexin Huang and Payal Chandak and Qianwen Wang and Shreyas Havaldar and Akhil Vaid and Jure Leskovec and Girish Nadkarni and Benjamin S. Glicksberg and Nils Gehlenborg and Marinka Zitnik},
-	title = {Zero-shot drug repurposing with geometric deep learning and clinician centered design},
-	elocation-id = {2023.03.19.23287458},
-	year = {2023},
-	doi = {10.1101/2023.03.19.23287458},
-	publisher = {Cold Spring Harbor Laboratory Press},
-	URL = {https://www.medrxiv.org/content/early/2023/09/28/2023.03.19.23287458},
-	eprint = {https://www.medrxiv.org/content/early/2023/09/28/2023.03.19.23287458.full.pdf},
-	journal = {medRxiv}
-}
-
-@article{Lin2022-kx,
-  title    = "Evolutionary-scale prediction of atomic level protein structure
-              with a language model",
-  author   = "Lin, Zeming and Akin, Halil and Rao, Roshan and Hie, Brian and
-              Zhu, Zhongkai and Lu, Wenting and Smetanin, Nikita and Verkuil,
-              Robert and Kabeli, Ori and Shmueli, Yaniv and dos Santos Costa,
-              Allan and Fazel-Zarandi, Maryam and Sercu, Tom and Candido,
-              Salvatore and Rives, Alexander",
-  abstract = "AbstractArtificial intelligence has the potential to open insight
-              into the structure of proteins at the scale of evolution. It has
-              only recently been possible to extend protein structure
-              prediction to two hundred million cataloged proteins.
-              Characterizing the structures of the exponentially growing
-              billions of protein sequences revealed by large scale gene
-              sequencing experiments would necessitate a break-through in the
-              speed of folding. Here we show that direct inference of structure
-              from primary sequence using a large language model enables an
-              order of magnitude speed-up in high resolution structure
-              prediction. Leveraging the insight that language models learn
-              evolutionary patterns across millions of sequences, we train
-              models up to 15B parameters, the largest language model of
-              proteins to date. As the language models are scaled they learn
-              information that enables prediction of the three-dimensional
-              structure of a protein at the resolution of individual atoms.
-              This results in prediction that is up to 60x faster than
-              state-of-the-art while maintaining resolution and accuracy.
-              Building on this, we present the ESM Metage-nomic Atlas. This is
-              the first large-scale structural characterization of metagenomic
-              proteins, with more than 617 million structures. The atlas
-              reveals more than 225 million high confidence predictions,
-              including millions whose structures are novel in comparison with
-              experimentally determined structures, giving an unprecedented
-              view into the vast breadth and diversity of the structures of
-              some of the least understood proteins on earth.",
-  journal  = "bioRxiv",
-  month    =  jul,
-  year     =  2022
-}
-
-
-@article{einstein1905molekularkinetischen,
-  title={{\"U}ber die von der molekularkinetischen Theorie der W{\"a}rme geforderte Bewegung von in ruhenden Fl{\"u}ssigkeiten suspendierten Teilchen},
-  author={Einstein, A.},
-  journal={Annalen der physik,},
-  volume={322},
-  number={8},
-  pages={549--560},
-  year={1905},
-  publisher={Wiley Online Library}
-}
-@article{5,
-  title={Salience Allocation as Guidance for Abstractive Summarization},
-  year={2022},
-  url={https://arxiv.org/pdf/2210.12330.pdf}
-}
-@article{6,
-  title={CHARACTERIZING INTRINSIC COMPOSITIONALITY IN TRANSFORMERS WITH TREE PROJECTIONS},
-  year={2022},
-  url={https://arxiv.org/pdf/2211.01288.pdf}
-}
-@article{7,
-  title={MATHPROMPTER: MATHEMATICAL REASONING USING LARGE LANGUAGE MODELS},
-  year={2023},
-  url={https://arxiv.org/pdf/2303.05398.pdf}
-}
-@article{8,
-  title={TOWARDS HIERARCHICAL IMPORTANCE ATTRIBUTION: EXPLAINING COMPOSITIONAL SEMANTICS FOR NEURAL SEQUENCE MODELS},
-  year={2020},
-  url={https://arxiv.org/pdf/1911.06194.pdf}
-}
-@article{9,
-  title={A survey of transformers},
-  year={2022},
-  url={https://www.sciencedirect.com/science/article/pii/S2666651022000146}
-}
-@article{10,
-  title={Towards Reasoning in Large Language Models: A Survey},
-  year={2023},
-  url={https://arxiv.org/pdf/2212.10403.pdf}
-}
-@article{11,
-  title={Google DeepMind’s new Gemini model looks amazing—but could signal peak AI hype},
-  year={2023},
-  url={https://www.technologyreview.com/2023/12/06/1084471/google-deepminds-new-gemini-model-looks-amazing-but-could-signal-peak-ai-hype/}
-}
-@article{12,
-  title={Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
-  year={2022},
-  url={https://openreview.net/pdf?id=_VjQlMeSB_J}
-}
-@article{13,
-  title={Attention Is All You Need},
-  year={2017},
-  url={https://arxiv.org/pdf/1706.03762.pdf}
-}
-@article{14,
-  title={AUTOMATIC CHAIN OF THOUGHT PROMPTING IN LARGE LANGUAGE MODELS},
-  year={2022},
-  url={https://arxiv.org/pdf/2210.03493.pdf}
-}
-@article{15,
-  title={From Extractive to Abstractive Meeting Summaries: Can It Be Done by Sentence Compression?},
-  year={2009},
-  url={https://aclanthology.org/P09-2066.pdf}
-}
diff --git a/_config.yml b/_config.yml
deleted file mode 100644
index bc4e8f99..00000000
--- a/_config.yml
+++ /dev/null
@@ -1,341 +0,0 @@
-# -----------------------------------------------------------------------------
-# Site settings
-# -----------------------------------------------------------------------------
-
-title: 6.S898 Deep Learning Blogs 2023 # the website title (if blank, full name will be used instead)
-first_name: abc
-middle_name: b
-last_name: c
-email: charlie.gauthier@umontreal.ca
-description: > # the ">" symbol means to ignore newlines until "footer_text:"
-  Staging website for the 2023 ICLR Blogposts track
-footer_text: >
-  Powered by <a href="https://jekyllrb.com/" target="_blank">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio">al-folio</a> theme.
-  Hosted by <a href="https://pages.github.com/" target="_blank">GitHub Pages</a>.
-  Photos from <a href="https://unsplash.com" target="_blank">Unsplash</a>.
-keywords: machine-learning, ml, deep-learning, reinforcement-learning, iclr  # add your own keywords or leave empty
-
-lang: en # the language of your site (for example: en, fr, cn, ru, etc.)
-url: https://deep-learning-mit.github.io # the base hostname & protocol for your site
-baseurl: /staging # the subpath of your site, e.g. /blog/
-
-
-# url: https://iclr-blogposts.github.io # the base hostname & protocol for your site
-# baseurl: /staging # the subpath of your site, e.g. /blog/
-last_updated: false # set to true if you want to display last updated in the footer
-impressum_path:  # set to path to include impressum link in the footer, use the same path as permalink in a page, helps to conform with EU GDPR
-
-# -----------------------------------------------------------------------------
-# Theme
-# -----------------------------------------------------------------------------
-
-# code highlighter theme
-highlight_theme_light: github   # https://github.com/jwarby/jekyll-pygments-themes
-highlight_theme_dark: native    # https://github.com/jwarby/jekyll-pygments-themes
-
-# repo color theme
-repo_theme_light: default       # https://github.com/anuraghazra/github-readme-stats/blob/master/themes/README.md
-repo_theme_dark: dark           # https://github.com/anuraghazra/github-readme-stats/blob/master/themes/README.md
-
-# -----------------------------------------------------------------------------
-# RSS Feed
-# -----------------------------------------------------------------------------
-# will use title and url fields
-# Take a look to https://github.com/jekyll/jekyll-feed for more customization
-
-rss_icon: true
-
-# -----------------------------------------------------------------------------
-# Layout
-# -----------------------------------------------------------------------------
-
-navbar_fixed: true
-footer_fixed: true
-
-# Dimensions
-max_width: 1000px
-
-# TODO: add layout settings (single page vs. multi-page)
-
-# -----------------------------------------------------------------------------
-# Open Graph & Schema.org
-# -----------------------------------------------------------------------------
-# Display links to the page with a preview object on social media.
-serve_og_meta: false # Include Open Graph meta tags in the HTML head
-serve_schema_org: false # Include Schema.org in the HTML head
-og_image: # The site-wide (default for all links) Open Graph preview image
-
-# -----------------------------------------------------------------------------
-# Social integration
-# -----------------------------------------------------------------------------
-
-github_username: # your GitHub user name
-gitlab_username: # your GitLab user name
-twitter_username: # your Twitter handle
-linkedin_username: # your LinkedIn user name
-scholar_userid: # your Google Scholar ID
-semanticscholar_id: # your Semantic Scholar ID
-orcid_id: # your ORCID ID
-medium_username: # your Medium username
-quora_username: # your Quora username
-publons_id: # your ID on Publons
-research_gate_profile: # your profile on ResearchGate
-blogger_url: # your blogger URL
-work_url: # work page URL
-keybase_username: # your keybase user name
-wikidata_id: # your wikidata id
-dblp_url: # your DBLP profile url
-stackoverflow_id: # your stackoverflow id
-kaggle_id: # your kaggle id
-lastfm_id: # your lastfm id
-spotify_id: # your spotify id
-pinterest_id: # your pinterest id
-unsplash_id: # your unsplash id
-instagram_id: # your instagram id
-facebook_id: # your facebook id
-discord_id: # your discord id (18-digit unique numerical identifier)
-
-contact_note:
-
-# -----------------------------------------------------------------------------
-# Analytics and search engine verification
-# -----------------------------------------------------------------------------
-
-google_analytics:  # your Goole Analytics measurement ID (format: G-XXXXXXXXXX)
-panelbear_analytics:  # panelbear analytics site ID (format: XXXXXXXXX)
-
-google_site_verification:  # your google-site-verification ID (Google Search Console)
-bing_site_verification:  # out your bing-site-verification ID (Bing Webmaster)
-
-# -----------------------------------------------------------------------------
-# Blog
-# -----------------------------------------------------------------------------
-
-blog_name: 6.S898 Deep Learning Blogs # blog_name will be displayed in your blog page
-blog_nav_title: blog # your blog must have a title for it to be displayed in the nav bar
-blog_description: Fall 2023
-permalink: /blog/:year/:title/
-
-# Pagination
-pagination:
-  enabled: true
-
-# Comments
-disqus_shortname: # put your disqus shortname
-# https://help.disqus.com/en/articles/1717111-what-s-a-shortname
-
-# External sources.
-# If you have blog posts published on medium.com or other exteranl sources,
-# you can display them in your blog by adding a link to the RSS feed.
-external_sources:
-
-# -----------------------------------------------------------------------------
-# Collections
-# -----------------------------------------------------------------------------
-
-collections:
-  news:
-    defaults:
-      layout: post
-    output: true
-    permalink: /news/:path/
-  projects:
-    output: false
-    permalink: /projects/:path/
-
-news_scrollable: true # adds a vertical scroll bar if there are more than 3 news items
-news_limit: 5 # leave blank to include all the news in the `_news` folder
-
-# -----------------------------------------------------------------------------
-# Jekyll settings
-# -----------------------------------------------------------------------------
-
-# Markdown and syntax highlight
-markdown: kramdown
-highlighter: rouge
-kramdown:
-  input: GFM
-  syntax_highlighter_opts:
-    css_class: 'highlight'
-    span:
-      line_numbers: false
-    block:
-      line_numbers: false
-      start_line: 1
-
-# Includes & excludes
-include: ['_pages']
-exclude:
-  - bin
-  - Gemfile
-  - Gemfile.lock
-  - vendor
-keep_files:
-  - CNAME
-  - .nojekyll
-  - .git
-
-# Plug-ins
-plugins:
-  - jekyll-archives
-  - jekyll-diagrams
-  - jekyll-email-protect
-  - jekyll-feed
-  - jekyll-imagemagick
-  - jekyll-minifier
-  - jekyll-paginate-v2
-  - jekyll/scholar
-  - jekyll-sitemap
-  # - jekyll-target-blank
-  # - jekyll-twitter-plugin
-  # - jemoji
-
-# Sitemap settings
-defaults:
-  - scope:
-      path: "assets/**/*.*"
-    values:
-      sitemap: false
-
-# -----------------------------------------------------------------------------
-# Jekyll Minifier
-# -----------------------------------------------------------------------------
-
-jekyll-minifier:
-  exclude: ['robots.txt']
-  uglifier_args:
-    harmony: true
-
-# -----------------------------------------------------------------------------
-# Jekyll Archives
-# -----------------------------------------------------------------------------
-
-jekyll-archives:
-  enabled: [year, tags, categories] # enables year, tag and category archives (remove if you need to disable one of them).
-  layouts:
-    year: archive-year
-    tag: archive-tag
-    category: archive-category
-  permalinks:
-    year: '/blog/:year/'
-    tag: '/blog/tag/:name/'
-    category: '/blog/category/:name/'
-
-
-# -----------------------------------------------------------------------------
-# Jekyll Scholar
-# -----------------------------------------------------------------------------
-
-scholar:
-
-  last_name:
-  first_name:
-
-  style: apa
-  locale: en
-
-  source: /_bibliography/
-  bibliography: papers.bib
-  bibliography_template: bib
-  # Note: if you have latex math in your bibtex, the latex filter
-  # preprocessing may conflict with MathJAX if the latter is enabled.
-  # See https://github.com/alshedivat/al-folio/issues/357.
-  bibtex_filters: [latex, smallcaps, superscript]
-
-  replace_strings: true
-  join_strings: true
-
-  details_dir: bibliography
-  details_layout: bibtex.html
-  details_link: Details
-
-  query: "@*"
-
-# Filter out certain bibtex entry keywords used internally from the bib output
-filtered_bibtex_keywords: [abbr, abstract, arxiv, bibtex_show, html, pdf, selected, supp, blog, code, poster, slides, website, preview]
-
-# Maximum number of authors to be shown for each publication (more authors are visible on click)
-max_author_limit: 3  # leave blank to always show all authors
-more_authors_animation_delay: 10  # more authors are revealed on click using animation; smaller delay means faster animation
-
-
-# -----------------------------------------------------------------------------
-# Responsive WebP Images
-# -----------------------------------------------------------------------------
-
-imagemagick:
-  enabled: true # enables responsive images for your site (recomended, see https://github.com/alshedivat/al-folio/issues/537)
-  widths:
-    - 480
-    - 800
-    - 1400
-  input_directories:
-    - assets/img/
-  input_formats:
-    - ".jpg"
-    - ".jpeg"
-    - ".png"
-    - ".tiff"
-  output_formats:
-    webp: "-resize 800x"
-
-# -----------------------------------------------------------------------------
-# Jekyll Diagrams
-# -----------------------------------------------------------------------------
-
-jekyll-diagrams:
-    # configuration, see https://github.com/zhustec/jekyll-diagrams.
-    # feel free to comment out this section if not using jekyll diagrams.
-
-
-# -----------------------------------------------------------------------------
-# Optional Features
-# -----------------------------------------------------------------------------
-
-enable_google_analytics:    false  # enables google analytics
-enable_panelbear_analytics: false  # enables panelbear analytics
-enable_google_verification: false  # enables google site verification
-enable_bing_verification:   false  # enables bing site verification
-enable_masonry:             true   # enables automatic project cards arangement
-enable_math:                true   # enables math typesetting (uses MathJax)
-enable_tooltips:            false  # enables automatic tooltip links generated
-                                   # for each section titles on pages and posts
-enable_darkmode:            true   # enables switching between light/dark modes
-enable_navbar_social:       false  # enables displaying social links in the
-                                   # navbar on the about page
-enable_project_categories:  true   # enables categorization of projects into
-                                   # multiple categories
-enable_medium_zoom:         true   # enables image zoom feature (as on medium.com)
-
-
-# -----------------------------------------------------------------------------
-# Library versions
-# -----------------------------------------------------------------------------
-
-academicons:
-  version: "1.9.1"
-  integrity: "sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg="
-bootstrap:
-  version: "4.6.1"
-  integrity:
-    css: "sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY="
-    js: "sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8="
-fontawesome:
-  version: "5.15.4"
-  integrity: "sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4="
-jquery:
-  version: "3.6.0"
-  integrity: "sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4="
-mathjax:
-  version: "3.2.0"
-masonry:
-  version: "4.2.2"
-  integrity: "sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI="
-mdb:
-  version: "4.20.0"
-  integrity:
-    css: "sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw="
-    js: "sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA="
-medium_zoom:
-  version: "1.0.6"
-  integrity: "sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM="
\ No newline at end of file
diff --git a/_data/coauthors.yml b/_data/coauthors.yml
deleted file mode 100644
index 8ed52124..00000000
--- a/_data/coauthors.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-"Adams":
-  - firstname: ["Edwin", "E.", "E. P.", "Edwin Plimpton"]
-    url: https://en.wikipedia.org/wiki/Edwin_Plimpton_Adams
-
-"Podolsky":
-  - firstname: ["Boris", "B.", "B. Y.", "Boris Yakovlevich"]
-    url: https://en.wikipedia.org/wiki/Boris_Podolsky
-
-"Rosen":
-  - firstname: ["Nathan", "N."]
-    url: https://en.wikipedia.org/wiki/Nathan_Rosen
-
-"Bach": 
-  - firstname: ["Johann Sebastian", "J. S."]
-    url: https://en.wikipedia.org/wiki/Johann_Sebastian_Bach
-
-  - firstname: ["Carl Philipp Emanuel", "C. P. E."]
-    url: https://en.wikipedia.org/wiki/Carl_Philipp_Emanuel_Bach
-
-"Przibram":
-  - firstname: ["Karl"]
-    url: https://link.springer.com/article/10.1007/s00016-019-00242-z
-
-"Schrödinger":
-  - firstname: ["Erwin"]
-    url: https://en.wikipedia.org/wiki/Erwin_Schr%C3%B6dinger
-
-"Lorentz":
-  - firstname: ["Hendrik Antoon"]
-    url: https://en.wikipedia.org/wiki/Hendrik_Lorentz
-
-"Planck":
-  - firstname: ["Max"]
-    url: https://en.wikipedia.org/wiki/Max_Planck
diff --git a/_data/cv.yml b/_data/cv.yml
deleted file mode 100644
index 5b115724..00000000
--- a/_data/cv.yml
+++ /dev/null
@@ -1,97 +0,0 @@
-- title: General Information
-  type: map
-  contents:
-    - name: Full Name
-      value: Albert Einstein
-    - name: Date of Birth
-      value: 14th March 1879
-    - name: Languages
-      value: English, German
-
-- title: Education
-  type: time_table
-  contents:
-    - title: PhD
-      institution: University of Zurich, Zurich, Switzerland
-      year: 1905
-      description:
-        - Description 1.
-        - Description 2.
-        - title: Description 3.
-          contents:
-            - Sub-description 1.
-            - Sub-description 2.
-    - title: Federal teaching diploma
-      institution: Eidgenössische Technische Hochschule, Zurich, Switzerland
-      year: 1900
-      description:
-        - Description 1.
-        - Description 2.
-
-- title: Experience
-  type: time_table
-  contents:
-    - title: Professor of Theoretical Physics
-      institution: Institute for Advanced Study, Princeton University
-      year: 1933 - 1955
-      description:
-        - Description 1.
-        - Description 2.
-        - title: Description 3.
-          contents:
-            - Sub-description 1.
-            - Sub-description 2.
-    - title: Visiting Professor
-      institution: California Institute of Technology, Pasadena, California, US
-      year: 1933
-      description:
-        - Description 1.
-        - Description 2.
-
-    - title: Director
-      institution: Kaiser Wilhelm Institute for Physics, Berlin, Germany.
-      year: 1917-1933
-
-    - title: Professor of Theoretical Physics
-      institution: Karl-Ferdinand University, Prague, Czechoslovakia
-      year: 1911 - 1917
-      description:
-
-    - title: Associate Professor of Theoretical Physics
-      institution: University of Zurich, Zurich, Switzerland
-      year: 1909 - 1911
-
-- title: Open Source Projects
-  type: time_table
-  contents:
-    - title: <a href="https://github.com/alshedivat/al-folio">al-folio</a>
-      year: 2015-now
-      description: A beautiful, simple, clean, and responsive Jekyll theme for academics.
-
-- title: Honors and Awards
-  type: time_table
-  contents:
-    - year: 1921
-      items: 
-        - Nobel Prize in Physics 
-        - Matteucci Medal
-    - year: 2029
-      items: 
-        - Max Planck Medal
-
-- title: Academic Interests
-  type: nested_list
-  contents:
-    - title: Topic 1.
-      items: 
-        - Description 1.
-        - Description 2.
-    - title: Topic 2.
-      items:
-        - Description 1.
-        - Description 2.
-
-- title: Other Interests
-  type: list
-  contents:
-    - <u>Hobbies:</u> Hobby 1, Hobby 2, etc.
diff --git a/_data/repositories.yml b/_data/repositories.yml
deleted file mode 100644
index 5205c9f6..00000000
--- a/_data/repositories.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-github_users:
-  - torvalds
-  - alshedivat
-
-github_repos:
-  - alshedivat/al-folio
-  - twbs/bootstrap
-  - jekyll/jekyll
-  - jquery/jquery
-  - FortAwesome/Font-Awesome
-  - jpswalsh/academicons
-  - mathjax/MathJax
diff --git a/_data/venues.yml b/_data/venues.yml
deleted file mode 100644
index 6c16ad5d..00000000
--- a/_data/venues.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-"AJP":
-  url: https://aapt.scitation.org/journal/ajp
-  color: "#00369f"
-
-"PhysRev":
-  url: https://journals.aps.org/
diff --git a/_includes/audio.html b/_includes/audio.html
deleted file mode 100644
index 711d717a..00000000
--- a/_includes/audio.html
+++ /dev/null
@@ -1,4 +0,0 @@
-<audio controls>
-    <source src="{{site.baseurl}}{{ include.src }}" type="audio/mpeg">
-    Your browser does not support the audio element.
-</audio>
\ No newline at end of file
diff --git a/_includes/cv/list.html b/_includes/cv/list.html
deleted file mode 100644
index 75625859..00000000
--- a/_includes/cv/list.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<ul class="card-text font-weight-light list-group list-group-flush">
-    {% for content in entry.contents %}
-      <li class="list-group-item">{{ content }}</li>
-    {% endfor %}
-</ul>
\ No newline at end of file
diff --git a/_includes/cv/map.html b/_includes/cv/map.html
deleted file mode 100644
index e0d1983e..00000000
--- a/_includes/cv/map.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<table class="table table-sm table-borderless table-responsive">
-    {% for content in entry.contents %}
-      <tr>
-        <td class="p-1 pr-2 font-weight-bold"><b>{{ content.name }}</b></td>
-        <td class="p-1 pl-2 font-weight-light text">{{ content.value }}</td>
-      </tr>
-    {% endfor %}
-</table>
\ No newline at end of file
diff --git a/_includes/cv/nested_list.html b/_includes/cv/nested_list.html
deleted file mode 100644
index 4778aca0..00000000
--- a/_includes/cv/nested_list.html
+++ /dev/null
@@ -1,14 +0,0 @@
-<ul class="card-text font-weight-light list-group list-group-flush">
-    {% for content in entry.contents %}
-      <li class="list-group-item">
-      <h5 class="font-italic">{{ content.title }}</h5>
-      {% if content.items %}
-        <ul class="subitems">
-          {% for subitem in content.items %}
-            <li><span class="subitem">{{ subitem }}</span></li>
-          {% endfor %}
-        </ul>
-      {% endif %}
-      </li>
-    {% endfor %}
-</ul>
\ No newline at end of file
diff --git a/_includes/cv/time_table.html b/_includes/cv/time_table.html
deleted file mode 100644
index 123b9d09..00000000
--- a/_includes/cv/time_table.html
+++ /dev/null
@@ -1,59 +0,0 @@
-<ul class="card-text font-weight-light list-group list-group-flush">
-    {% for content in entry.contents %}
-      <li class="list-group-item">
-        <div class="row">
-          {% if content.year %}
-            <div class="col-xs-2 cl-sm-2 col-md-2 text-center" style="width: 75px;">
-              <span class="badge font-weight-bold danger-color-dark text-uppercase align-middle" style="min-width: 75px;">
-                {{ content.year }}
-              </span>
-            </div>
-          {% endif %}
-          <div class="col-xs-10 cl-sm-10 col-md-10 mt-2 mt-md-0">
-            {% if content.title %}
-            <h6 class="title font-weight-bold ml-1 ml-md-4">{{content.title}}</h6>
-            {% endif %}
-            {% if content.institution %}
-            <h6 class="ml-1 ml-md-4" style="font-size: 0.95rem;">{{content.institution}}</h6>
-            {% endif %}
-            {% if content.description %}
-              <ul class="items">
-                {% for item in content.description %}
-                  <li>
-                    {% if item.contents %}
-                      <span class="item-title">{{ item.title }}</span>
-                      <ul class="subitems">
-                      {% for subitem in item.contents %}
-                        <li><span class="subitem">{{ subitem }}</span></li>
-                      {% endfor %}
-                      </ul>
-                    {% else %}
-                      <span class="item">{{ item }}</span>
-                    {% endif %}
-                  </li>
-                {% endfor %}
-              </ul>
-            {% endif %}
-            {% if content.items %}
-              <ul class="items">
-                {% for item in content.items %}
-                  <li>
-                    {% if item.contents %}
-                      <span class="item-title">{{ item.title }}</span>
-                      <ul class="subitems">
-                      {% for subitem in item.contents %}
-                        <li><span class="subitem">{{ subitem }}</span></li>
-                      {% endfor %}
-                      </ul>
-                    {% else %}
-                      <span class="item">{{ item }}</span>
-                    {% endif %}
-                  </li>
-                {% endfor %}
-              </ul>
-            {% endif %}
-          </div>
-        </div>
-      </li>
-    {% endfor %}
-    </ul>
\ No newline at end of file
diff --git a/_includes/embed-audio.html b/_includes/embed-audio.html
deleted file mode 100644
index 06cc7e0c..00000000
--- a/_includes/embed-audio.html
+++ /dev/null
@@ -1,4 +0,0 @@
-<audio controls>
-    <source src="{{ include.src }}" type="audio/mpeg">
-    Your browser does not support the audio element.
-</audio>
\ No newline at end of file
diff --git a/_includes/figure.html b/_includes/figure.html
deleted file mode 100644
index e67e8043..00000000
--- a/_includes/figure.html
+++ /dev/null
@@ -1,36 +0,0 @@
-{%- assign img_path = include.path | remove: ".jpg" | remove: ".jpeg" | remove: ".png" | remove: ".tiff" -%}
-
-<figure>
-
-  <picture>
-    {% if site.imagemagick.enabled %}
-    {% for i in site.imagemagick.widths -%}
-      <source 
-        class="responsive-img-srcset"
-        media="(max-width: {{ i }}px)" 
-        srcset="{{ img_path | relative_url }}-{{ i }}.webp"
-      />
-    {% endfor -%}
-    {% endif %}
-
-    <!-- Fallback to the original file -->
-    <img 
-      src="{{ include.path | relative_url }}"
-      {% if include.class %}class="{{ include.class }}"{% endif %}  
-      {% if include.width %}width="{{ include.width }}"{% else %}width="auto"{% endif %} 
-      {% if include.height %}height="{{ include.height }}"{% else %}height="auto"{% endif %} 
-      {% if include.min-width %}min-width="{{ include.min-width }}"{% endif %} 
-      {% if include.min-height %}min-height="{{ include.min-height }}"{% endif %} 
-      {% if include.max-width %}max-width="{{ include.max-width }}"{% endif %} 
-      {% if include.max-height %}height="{{ include.max-height }}"{% endif %} 
-      {% if include.alt %}alt="{{ include.alt }}"{% endif %} 
-      {% if include.title %}title="{{ include.title }}"{% endif %} 
-      {% if include.zoomable %}data-zoomable{% endif %}
-      onerror="this.onerror=null; $('.responsive-img-srcset').remove();"
-    />
-
-  </picture>
-
-  {%- if include.caption -%}<figcaption class="caption">{{ include.caption }}</figcaption>{%- endif %}
-
-</figure>
diff --git a/_includes/footer.html b/_includes/footer.html
deleted file mode 100644
index acc4688f..00000000
--- a/_includes/footer.html
+++ /dev/null
@@ -1,25 +0,0 @@
-    {% if site.footer_fixed %}
-    <footer class="fixed-bottom">
-      <div class="container mt-0">
-        &copy; Copyright {{ site.time | date: '%Y' }} {{ site.first_name }} {{ site.middle_name }} {{ site.last_name }}. {{ site.footer_text }}
-        {%- if site.impressum_path -%}
-        <a href="{{ site.url }}{{ site.baseurl }}{{ site.impressum_path }}">Impressum</a>.
-        {%- endif -%}
-        {%- if site.last_updated -%}
-        Last updated: {{ "now" | date: '%B %d, %Y' }}.
-        {%- endif %}
-      </div>
-    </footer>
-    {%- else -%}
-    <footer class="sticky-bottom mt-5">
-      <div class="container">
-        &copy; Copyright {{ site.time | date: '%Y' }} {{ site.first_name }} {{ site.middle_name }} {{ site.last_name }}. {{ site.footer_text }}
-        {%- if site.impressum_path -%}
-        <a href="{{ site.url }}{{ site.baseurl }}{{ site.impressum_path }}">Impressum</a>.
-        {%- endif -%}
-        {%- if site.last_updated -%}
-        Last updated: {{ "now" | date: '%B %d, %Y' }}.
-        {%- endif %}
-      </div>
-    </footer>
-    {%- endif %}
\ No newline at end of file
diff --git a/_includes/head.html b/_includes/head.html
deleted file mode 100644
index 3796eb38..00000000
--- a/_includes/head.html
+++ /dev/null
@@ -1,31 +0,0 @@
-    <!-- Metadata, OpenGraph and Schema.org -->
-    {% include metadata.html %}
-
-    <!-- Bootstrap & MDB -->
-    <link href="https://cdn.jsdelivr.net/npm/bootstrap@{{ site.bootstrap.version }}/dist/css/bootstrap.min.css" rel="stylesheet" integrity="{{ site.bootstrap.integrity.css }}" crossorigin="anonymous">
-    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@{{ site.mdb.version }}/css/mdb.min.css" integrity="{{ site.mdb.integrity.css }}" crossorigin="anonymous" />
-
-    <!-- Fonts & Icons -->
-    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@{{ site.fontawesome.version }}/css/all.min.css" integrity="{{ site.fontawesome.integrity }}" crossorigin="anonymous">
-    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@{{ site.academicons.version }}/css/academicons.min.css" integrity="{{ site.academicons.integrity }}" crossorigin="anonymous">
-    <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons">
-
-    <!-- Code Syntax Highlighting -->
-    <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/{{ site.highlight_theme_light | append: '.css' }}" media="" id="highlight_theme_light" />
-
-    <!-- Styles -->
-    {% if site.icon.size < 3 %}
-    <link rel="shortcut icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>{{ site.icon }}</text></svg>">
-    {% elsif site.icon != blank %}
-    <link rel="shortcut icon" href="{{ site.icon | prepend: '/assets/img/' | relative_url}}"/>
-    {% endif %}
-    <link rel="stylesheet" href="{{ '/assets/css/main.css' | relative_url }}">
-    <link rel="canonical" href="{{ page.url | replace:'index.html','' | absolute_url }}">
-    
-    <!-- Dark Mode -->
-    {% if site.enable_darkmode %}
-    <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/{{ site.highlight_theme_dark | append: '.css' }}" media="none" id="highlight_theme_dark" />
-
-    <script src="{{ '/assets/js/theme.js' | relative_url }}"></script>
-    <script src="{{ '/assets/js/dark_mode.js' | relative_url }}"></script>
-    {% endif %}
diff --git a/_includes/header.html b/_includes/header.html
deleted file mode 100644
index f72668e5..00000000
--- a/_includes/header.html
+++ /dev/null
@@ -1,137 +0,0 @@
-
-    <header>
-
-      <!-- Nav Bar -->
-      <nav id="navbar" class="navbar navbar-light navbar-expand-sm {% if site.navbar_fixed %}fixed-top{% else %}sticky-top{% endif %}">
-        <div class="container">
-          {% if page.permalink != '/' -%}
-          <a class="navbar-brand title font-weight-lighter" href="{{ site.baseurl }}/">
-            {%- if site.title == "blank" -%}
-              {%- if site.first_name -%}
-                <span class="font-weight-bold">{{- site.first_name -}}&nbsp;</span>
-              {%- endif -%}
-              {%- if site.middle_name -%}
-                {{- site.middle_name -}}&nbsp;
-              {%- endif -%}
-              {%- if site.last_name -%}
-                {{- site.last_name -}}
-              {%- endif -%}
-            {%- else -%}
-              {{- site.title -}}
-            {%- endif -%}
-          </a>
-          {%- elsif site.enable_navbar_social -%}
-          <!-- Social Icons -->
-          <div class="navbar-brand social">
-            {% include social.html %}
-          </div>
-          {% endif %}
-          <!-- Navbar Toggle -->
-          <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
-            <span class="sr-only">Toggle navigation</span>
-            <span class="icon-bar top-bar"></span>
-            <span class="icon-bar middle-bar"></span>
-            <span class="icon-bar bottom-bar"></span>
-          </button>
-
-          <!-- {{ site.pages | inspect }} -->
-
-          <div class="collapse navbar-collapse text-right" id="navbarNav">
-            <ul class="navbar-nav ml-auto flex-nowrap">
-
-              {%- for page in site.pages -%}
-                {% if page.permalink == '/' %}
-                  {% assign about_title = page.title %}
-                {% endif %}
-              {% endfor %}
-
-              <!-- About -->
-              <!-- <li class="nav-item {% if page.permalink == '/2023/about' %}active{% endif %}">
-                <a class="nav-link" href="{{ '/2023/about' | relative_url }}">about
-                  {%- if page.permalink == '/2023/about' -%}
-                  <span class="sr-only">(current)</span>
-                  {%- endif -%}
-                </a>
-              </li> -->
-              <!-- Call -->
-              <!-- <li class="nav-item {% if page.permalink == '/2023/call' %}active{% endif %}">
-                <a class="nav-link" href="{{ '/2023/call' | relative_url }}">call
-                  {%- if page.permalink == '/2023/call' -%}
-                  <span class="sr-only">(current)</span>
-                  {%- endif -%}
-                </a>
-              </li> -->
-              <!-- submissions -->
-              <!-- <li class="nav-item {% if page.url contains 'submissions' %}active{% endif %}">
-                <a class="nav-link" href="{{ '/2023/submissions/' | relative_url }}">submissions
-                  {%- if page.permalink == '/2023/submissions' -%}
-                  <span class="sr-only">(current)</span>
-                  {%- endif -%}
-                </a>
-              </li> -->
-              <!-- Blog -->
-              <!-- <li class="nav-item {% if page.url contains 'blog' %}active{% endif %}">
-                <a class="nav-link" href="{{ '/blog/' | relative_url }}">{{ site.blog_nav_title }}
-                  {%- if page.url contains 'blog' -%}
-                  <span class="sr-only">(current)</span>
-                  {%- endif -%}
-                </a>
-              </li> -->
-              <!-- 2022 -->
-              <!-- <li class="nav-item">
-                <a class="nav-link" href="https://iclr-blog-track.github.io/home/">2022 edition <u>⤤</u>
-                  {%- if page.url contains 'blog' -%}
-                  <span class="sr-only">(current)</span>
-                  {%- endif -%}
-                </a>
-              </li> -->
-
-              <!-- Other pages -->
-              
-              {%- assign sorted_pages = site.pages | sort: "nav_order" -%}
-              {%- for p in sorted_pages -%}
-              {%- if p.nav and p.autogen == nil -%}
-              {%- if p.dropdown %}
-                <li class="nav-item dropdown {% if page.title == p.title %}active{% endif %}">
-                  <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">{{ p.title }}
-                    {%- if page.title == p.title -%}
-                    <span class="sr-only">(current)</span>
-                    {%- endif -%}
-                  </a>
-                  <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown">
-                    {%- for child in p.children -%}
-                    {%- if child.title == 'divider' %}
-                    <div class="dropdown-divider"></div>
-                    {%- else %}
-                    <a class="dropdown-item" href="{{ child.permalink | relative_url }}">{{ child.title }}</a>
-                    {%- endif -%}
-                    {% endfor %}
-                  </div>
-                </li>
-              {%- else %}
-              <li class="nav-item {% if page.title == p.title %}active{% endif %}">
-                <a class="nav-link" href="{{ p.url | relative_url }}">{{ p.title }}
-                  {%- if page.title == p.title -%}
-                  <span class="sr-only">(current)</span>
-                  {%- endif -%}
-                </a>
-              </li>
-              {%- endif -%}
-              {%- endif -%}
-              {% endfor -%}
-             
-              {%- if site.enable_darkmode %}
-
-              <!-- Toogle theme mode -->
-              <li class="toggle-container">
-                <button id="light-toggle" title="Change theme">
-                  <i class="fas fa-moon"></i>
-                  <i class="fas fa-sun"></i>
-                </button>
-              </li>
-              {%- endif %}
-            </ul>
-          </div>
-        </div>
-      </nav>
-    </header>
\ No newline at end of file
diff --git a/_includes/metadata.html b/_includes/metadata.html
deleted file mode 100644
index af3813a8..00000000
--- a/_includes/metadata.html
+++ /dev/null
@@ -1,196 +0,0 @@
-{% if site.enable_google_verification or site.enable_bing_verification %}
-    <!-- Website verification -->
-    {% if site.enable_google_verification -%}
-    <meta name="google-site-verification" content="{{ site.google_site_verification }}" />
-    {%- endif -%}
-    {% if site.enable_bing_verification -%}
-    <meta name="msvalidate.01" content="{{ site.bing_site_verification }}" />
-    {%- endif -%}
-{%- endif %}
-
-    <!-- Standard metadata -->
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <title>
-    {%- if site.title == "blank" -%}
-        {%- capture title -%}{{ site.first_name }} {{ site.middle_name }} {{ site.last_name }}{%- endcapture -%}
-    {%- else -%}
-        {%- capture title -%}{{ site.title }}{%- endcapture -%}
-    {%- endif -%}
-    {% if page.url == '/blog/index.html' %}
-        {{ site.blog_nav_title }} | {{ title }}
-    {%- elsif page.title != "blank" and page.url != "/" -%}
-        {%- if page.title == nil or page.title == "" -%}
-            {{ page.date | date: "%Y" }} | {{ title }}
-        {%- else -%}
-            {{ page.title }} | {{ title }}
-        {%- endif -%}
-    {%- else -%}
-        {{ title }}
-    {%- endif -%}
-    </title>
-    <meta name="author" content="{{ site.first_name }} {{ site.middle_name }} {{ site.last_name }}" />
-    <meta name="description" content="{%- if page.description -%}{{ page.description }}{%- else -%}{{ site.description }}{%- endif -%}" />
-{%- if page.keywords or site.keywords %}
-    <meta name="keywords" content="{%- if page.keywords -%}{{ page.keywords }}{%- else -%}{{ site.keywords }}{%- endif -%}" />
-{%- endif %}
-
-{%- if site.serve_og_meta %}
-
-    <!-- OpenGraph -->
-    <meta property="og:site_name" content="{{ title }}" />
-    <meta property="og:type" content="website" />
-    <meta property="og:title" content="{%- if page.title -%}{{ title }} | {{ page.title }}{%- else -%}{{ title }}{%- endif -%}" />
-    <meta property="og:url" content="{{ page.url | prepend: site.baseurl | prepend: site.url | remove_first: 'index.html' }}" />
-    <meta property="og:description" content="{%- if page.description -%}{{ page.description }}{%- else -%}{{ site.description }}{%- endif -%}" />
-    {% if page.og_image or site.og_image -%}
-    <meta property="og:image" content="{%- if page.og_image -%}{{ page.og_image }}{%- else -%}{{ site.og_image }}{%- endif -%}" />
-    {%- endif %}
-    <meta property="og:locale" content="{{ site.lang }}" />
-
-    <!-- Twitter card -->
-    <meta name="twitter:card" content="summary" />
-    <meta name="twitter:title" content="{%- if page.title -%}{{ page.title }}{%- else -%}{{ title }}{%- endif -%}" />
-    <meta name="twitter:description" content="{%- if page.description -%}{{ page.description }}{%- else -%}{{ site.description }}{%- endif -%}" />
-    {% if page.og_image or site.og_image -%}
-    <meta name="twitter:image" content="{%- if page.og_image -%}{{ page.og_image }}{%- else -%}{{ site.og_image }}{%- endif -%}" />
-    {%- endif %}
-    {% if site.twitter_username -%}
-    <meta name="twitter:site" content="@{{ site.twitter_username }}" />
-    <meta name="twitter:creator" content="@{{ site.twitter_username }}" />
-    {%- endif %}
-{%- endif %}
-
-{%- if site.serve_schema_org %}
-
-    <!-- Schema.org -->
-    {%- comment -%} Social links generator for "sameAs schema" {%- endcomment %}
-    {% assign sameaslinks = "" | split: "," %}
-    {%- if site.orcid_id -%}
-        {%- capture link -%}https://orcid.org/{{ site.orcid_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.scholar_userid -%}
-        {%- capture link -%}https://scholar.google.com/citations?user={{ site.scholar_userid }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.semanticscholar_id -%}
-        {%- capture link -%}https://www.semanticscholar.org/author/{{ site.semanticscholar_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.publons_id -%}
-        {%- capture link -%}https://publons.com/a/{{ site.publons_id }}/{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.research_gate_profile -%}
-        {%- capture link -%}https://www.researchgate.net/profile/{{site.research_gate_profile}}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.github_username -%}
-        {%- capture link -%}https://github.com/{{ site.github_username }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.linkedin_username -%}
-        {%- capture link -%}https://www.linkedin.com/in/{{ site.linkedin_username }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.twitter_username -%}
-        {%- capture link -%}https://twitter.com/{{ site.twitter_username }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.medium_username -%}
-        {%- capture link -%}https://medium.com/@{{ site.medium_username }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.quora_username -%}
-        {%- capture link -%}https://www.quora.com/profile/{{ site.quora_username }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.blogger_url -%}
-        {%- capture link -%}{{ site.blogger_url }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.work_url -%}
-        {%- capture link -%}{{ site.work_url }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.wikidata_id -%}
-        {%- capture link -%}https://www.wikidata.org/wiki/{{ site.wikidata_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.strava_userid -%}
-        {%- capture link -%}https://www.strava.com/athletes/{{ site.strava_userid }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.keybase_username -%}
-        {%- capture link -%}https://keybase.io/{{ site.keybase_username }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.gitlab_username -%}
-        {%- capture link -%}https://gitlab.com/{{ site.gitlab_username }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.dblp_url -%}
-        {%- capture link -%}{{ site.dblp_url }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.stackoverflow_id -%}
-        {%- capture link -%}https://stackoverflow.com/users/{{ site.stackoverflow_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.kaggle_id -%}
-        {%- capture link -%}https://www.kaggle.com/{{ site.kaggle_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.lastfm_id -%}
-        {%- capture link -%}https://www.last.fm/user/{{ site.lastfm_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.spotify_id -%}
-        {%- capture link -%}https://open.spotify.com/user/{{ site.spotify_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.pinterest_id -%}
-        {%- capture link -%}https://www.pinterest.com/{{ site.pinterest_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.unsplash_id -%}
-        {%- capture link -%}https://unsplash.com/@{{ site.unsplash_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.instagram_id -%}
-        {%- capture link -%}https://instagram.com/{{ site.instagram_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.facebook_id -%}
-        {%- capture link -%}https://facebook.com/{{ site.facebook_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if site.discord_id -%}
-        {%- capture link -%}https://discord.com/users/{{ site.discord_id }}{%- endcapture -%}
-        {%- assign sameaslinks = sameaslinks | push: link -%}
-    {%- endif -%}
-    {%- if sameaslinks != blank -%}
-        {%- assign sameaslinks = sameaslinks | split: "" -%}
-    {%- endif -%}
-
-    <script type="application/ld+json">
-      {
-        "author":
-        {
-          "@type": "Person",
-          "name": "{{ site.first_name }} {{ site.middle_name }} {{ site.last_name }}"
-        },
-        "url": "{{ page.url | prepend: site.baseurl | prepend: site.url }}",
-        "@type": "WebSite",
-        "description": "{%- if page.description -%}{{ page.description }}{%- else if site.description -%}{{ site.description }}{%- endif -%}",
-        "headline": "{%- if page.title -%}{{ page.title }}{%- else -%}{{ site.title }}{%- endif -%}",
-        {% if sameaslinks != blank -%}
-        "sameAs": {{ sameaslinks }},
-        {%- endif %}
-        "name": "{{ site.first_name }} {{ site.middle_name }} {{ site.last_name }}",
-        "@context": "https://schema.org"
-      }
-    </script>
-{%- endif %}
diff --git a/_includes/news.html b/_includes/news.html
deleted file mode 100644
index 307e532d..00000000
--- a/_includes/news.html
+++ /dev/null
@@ -1,31 +0,0 @@
-          
-          <div class="news">
-            <h2>news</h2>
-            {% if site.news != blank -%} 
-            {%- assign news_size = site.news | size -%}
-            <div class="table-responsive" {% if site.news_scrollable and news_size > 3 %}style="max-height: 10vw"{% endif %}>
-              <table class="table table-sm table-borderless">
-              {%- assign news = site.news | reverse -%}
-              {% if site.news_limit %}
-              {% assign news_limit = site.news_limit %}
-              {% else %}
-              {% assign news_limit = news_size %}
-              {% endif %}
-              {% for item in news limit: news_limit %} 
-                <tr>
-                  <th scope="row">{{ item.date | date: "%b %-d, %Y" }}</th>
-                  <td>
-                    {% if item.inline -%} 
-                      {{ item.content | remove: '<p>' | remove: '</p>' | emojify }}
-                    {%- else -%} 
-                      <a class="news-title" href="{{ item.url | relative_url }}">{{ item.title }}</a>
-                    {%- endif %} 
-                  </td>
-                </tr>
-              {%- endfor %} 
-              </table>
-            </div>
-          {%- else -%} 
-            <p>No news so far...</p>
-          {%- endif %} 
-          </div>
diff --git a/_includes/pagination.html b/_includes/pagination.html
deleted file mode 100644
index 4b8d27e3..00000000
--- a/_includes/pagination.html
+++ /dev/null
@@ -1,17 +0,0 @@
-{%- if paginator.total_pages > 1 -%}
-<nav aria-label="Blog page naviation">
-  <ul class="pagination pagination-lg justify-content-center">
-    <li class="page-item {% unless paginator.previous_page %}disabled{% endunless %}">
-      <a class="page-link" href="{{ paginator.previous_page_path | relative_url }}" tabindex="-1" aria-disabled="{{ paginator.previous_page }}">Newer</a>
-    </li>
-    {%- if paginator.page_trail -%}
-      {% for trail in paginator.page_trail -%}
-        <li class="page-item {% if page.url == trail.path %}active{% endif %}"><a class="page-link" href="{{ trail.path | relative_url }}" title="{{trail.title}}">{{ trail.num }}</a></li>
-      {% endfor -%}
-    {%- endif -%}
-    <li class="page-item {% unless paginator.next_page %}disabled{% endunless %}">
-      <a class="page-link" href="{{ paginator.next_page_path | relative_url }}">Older</a>
-    </li>
-  </ul>
-</nav>
-{%- endif -%}
diff --git a/_includes/people.html b/_includes/people.html
deleted file mode 100644
index b5a79f1f..00000000
--- a/_includes/people.html
+++ /dev/null
@@ -1,16 +0,0 @@
-<!-- _includes/projects.html -->
-<div class="grid-sizer"></div>
-<div class="grid-item">
-  <a href="{{ include.url | relative_url }}">
-    <div class="card hoverable">      
-      {%- include figure.html
-      path=include.img
-      alt=include.name      
-      -%}
-      <div class="card-body">
-        <h5 class="card-title text-center">{{- include.name -}}</h5>
-        <p class="card-text text-center">{{- include.affiliation -}}</p>        
-      </div>
-    </div>
-  </a>
-</div>
diff --git a/_includes/people_horizontal.html b/_includes/people_horizontal.html
deleted file mode 100644
index 957bc768..00000000
--- a/_includes/people_horizontal.html
+++ /dev/null
@@ -1,17 +0,0 @@
-<div class="card-item col">  
-  <a href="{{ include.url | relative_url }}">  
-    <div class="card hoverable">
-      <div class="row g-0">        
-        <div class="card-img col-sm-4">
-          {% include figure.html path=include.img alt=include.name %}
-        </div>
-        <div class="col-sm-6">        
-          <div class="card-body">
-            <h5 class="card-title text-right">{{ include.name }}</h5>
-            <p class="card-text text-right">{{ include.affiliation }}</p>                                                  
-          </div>
-        </div>
-      </div>
-    </div>
-  </a>
-</div>
diff --git a/_includes/projects.html b/_includes/projects.html
deleted file mode 100644
index 503146e2..00000000
--- a/_includes/projects.html
+++ /dev/null
@@ -1,36 +0,0 @@
-<!-- _includes/projects.html -->
-<div class="grid-sizer"></div>
-<div class="grid-item">
-  {% if project.redirect -%}
-  <a href="{{ project.redirect }}">
-    {%- else -%}
-    <a href="{{ project.url | relative_url }}">
-      {%- endif %}
-      <div class="card hoverable">
-        {%- if project.img %}
-        {%- include figure.html
-          path=project.img
-          alt="project thumbnail" -%}
-        {%- endif %}
-        <div class="card-body">
-          <h2 class="card-title text-lowercase">{{ project.title }}</h2>
-          <p class="card-text">{{ project.description }}</p>
-          <div class="row ml-1 mr-1 p-0">
-            {%- if project.github -%}
-            <div class="github-icon">
-              <div class="icon" data-toggle="tooltip" title="Code Repository">
-                <a href="{{ project.github }}"><i class="fab fa-github gh-icon"></i></a>
-              </div>
-              {%- if project.github_stars -%}
-              <span class="stars" data-toggle="tooltip" title="GitHub Stars">
-                <i class="fas fa-star"></i>
-                <span id="{{ project.github_stars }}-stars"></span>
-              </span>
-              {%- endif %}
-            </div>
-            {%- endif %}
-          </div>
-        </div>
-      </div>
-    </a>
-</div>
\ No newline at end of file
diff --git a/_includes/projects_horizontal.html b/_includes/projects_horizontal.html
deleted file mode 100644
index ddf74058..00000000
--- a/_includes/projects_horizontal.html
+++ /dev/null
@@ -1,40 +0,0 @@
-<div class="card-item col">
-  {%- if project.redirect -%}
-  <a href="{{ project.redirect }}">
-  {%- else -%}
-  <a href="{{ project.url | relative_url }}">
-  {%- endif -%}
-    <div class="card hoverable">
-      <div class="row g-0">
-        {%- if project.img -%}
-        <div class="card-img col-md-6">
-          {% include figure.html path=project.img alt="project thumbnail" %}
-        </div>
-        <div class="col-md-6">
-        {%- else -%}
-        <div class="col-md-12">
-        {%- endif -%}
-          <div class="card-body">
-            <h3 class="card-title text-lowercase">{{ project.title }}</h3>
-            <p class="card-text">{{ project.description }}</p>
-            <div class="row ml-1 mr-1 p-0">
-              {%- if project.github -%}
-              <div class="github-icon">
-                <div class="icon" data-toggle="tooltip" title="Code Repository">
-                  <a href="{{ project.github }}"><i class="fab fa-github gh-icon"></i></a>
-                </div>
-                {%- if project.github_stars -%}
-                <span class="stars" data-toggle="tooltip" title="GitHub Stars">
-                  <i class="fas fa-star"></i>
-                  <span id="{{ project.github_stars }}-stars"></span>
-                </span>
-                {%- endif -%}
-              </div>
-              {%- endif -%}
-            </div>
-          </div>
-        </div>
-      </div>
-    </div>
-  </a>
-</div>
diff --git a/_includes/repository/repo.html b/_includes/repository/repo.html
deleted file mode 100644
index 6344b860..00000000
--- a/_includes/repository/repo.html
+++ /dev/null
@@ -1,14 +0,0 @@
-{% assign repo_url =  include.repository | split: '/' %}
-
-{% if site.data.repositories.github_users contains repo_url.first %}
-  {% assign show_owner = false %}
-{% else %}
-  {% assign show_owner = true %}
-{% endif %}
-
-<div class="repo p-2 text-center">
-  <a href="https://github.com/{{ include.repository }}">
-    <img class="repo-img-light w-100" alt="{{ include.repository }}" src="https://github-readme-stats.vercel.app/api/pin/?username={{ repo_url.first }}&repo={{ repo_url.last }}&theme={{ site.repo_theme_light }}&show_owner={{ show_owner }}">
-    <img class="repo-img-dark w-100" alt="{{ include.repository }}" src="https://github-readme-stats.vercel.app/api/pin/?username={{ repo_url.first }}&repo={{ repo_url.last }}&theme={{ site.repo_theme_dark }}&show_owner={{ show_owner }}">
-  </a>
-</div>
diff --git a/_includes/repository/repo_user.html b/_includes/repository/repo_user.html
deleted file mode 100644
index ae06a058..00000000
--- a/_includes/repository/repo_user.html
+++ /dev/null
@@ -1,6 +0,0 @@
-<div class="repo p-2 text-center">
-  <a href="https://github.com/{{ include.username }}">
-    <img class="repo-img-light w-100" alt="{{ include.username }}" src="https://github-readme-stats.vercel.app/api/?username={{ include.username }}&theme={{ site.repo_theme_light }}&show_icons=true">
-    <img class="repo-img-dark w-100" alt="{{ include.username }}" src="https://github-readme-stats.vercel.app/api/?username={{ include.username }}&theme={{ site.repo_theme_dark }}&show_icons=true">
-  </a>
-</div>
diff --git a/_includes/scripts/analytics.html b/_includes/scripts/analytics.html
deleted file mode 100644
index db2aeef9..00000000
--- a/_includes/scripts/analytics.html
+++ /dev/null
@@ -1,18 +0,0 @@
-{%- if site.enable_google_analytics -%}
-  <!-- Global site tag (gtag.js) - Google Analytics -->
-  <script async src="https://www.googletagmanager.com/gtag/js?id={{ site.google_analytics }}"></script>
-  <script>
-    window.dataLayer = window.dataLayer || [];
-    function gtag(){ window.dataLayer.push(arguments); }
-    gtag('js', new Date());
-    gtag('config', '{{ site.google_analytics }}');
-  </script>
-{%- endif -%}
-{%- if site.enable_panelbear_analytics -%}
-  <!-- Panelbear Analytics - We respect your privacy -->
-  <script async src="https://cdn.panelbear.com/analytics.js?site={{site.panelbear_analytics}}"></script>
-  <script>
-    window.panelbear = window.panelbear || function() { (window.panelbear.q = window.panelbear.q || []).push(arguments); };
-    panelbear('config', { site: '{{site.panelbear_analytics}}' });
-  </script>
-{%- endif -%}
diff --git a/_includes/scripts/bootstrap.html b/_includes/scripts/bootstrap.html
deleted file mode 100644
index 1c213650..00000000
--- a/_includes/scripts/bootstrap.html
+++ /dev/null
@@ -1,3 +0,0 @@
-<!-- Bootsrap & MDB scripts -->
-  <script src="https://cdn.jsdelivr.net/npm/bootstrap@{{ site.bootstrap.version }}/dist/js/bootstrap.bundle.min.js" integrity="{{ site.bootstrap.integrity.js }}" crossorigin="anonymous"></script>
-  <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@{{ site.mdb.version }}/js/mdb.min.js" integrity="{{ site.mdb.integrity.js }}" crossorigin="anonymous"></script>
diff --git a/_includes/scripts/jquery.html b/_includes/scripts/jquery.html
deleted file mode 100644
index f84a2f22..00000000
--- a/_includes/scripts/jquery.html
+++ /dev/null
@@ -1,2 +0,0 @@
-<!-- jQuery -->
-  <script src="https://cdn.jsdelivr.net/npm/jquery@{{ site.jquery.version }}/dist/jquery.min.js" integrity="{{ site.jquery.integrity }}" crossorigin="anonymous"></script>
diff --git a/_includes/scripts/masonry.html b/_includes/scripts/masonry.html
deleted file mode 100644
index 804389d3..00000000
--- a/_includes/scripts/masonry.html
+++ /dev/null
@@ -1,6 +0,0 @@
-  {%- if site.enable_masonry -%}
-  <!-- Masonry & imagesLoaded -->
-  <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@{{ site.masonry.version }}/dist/masonry.pkgd.min.js" integrity="{{ site.masonry.integrity }}" crossorigin="anonymous"></script>
-  <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script>
-  <script defer src="{{ '/assets/js/masonry.js' | relative_url }}" type="text/javascript"></script>
-  {%- endif -%}
diff --git a/_includes/scripts/mathjax.html b/_includes/scripts/mathjax.html
deleted file mode 100644
index c55ec056..00000000
--- a/_includes/scripts/mathjax.html
+++ /dev/null
@@ -1,12 +0,0 @@
-  {%- if site.enable_math -%}
-  <!-- MathJax -->
-  <script type="text/javascript">
-    window.MathJax = {
-      tex: {
-        tags: 'ams'
-      }
-    };
-  </script>
-  <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@{{ site.mathjax.version }}/es5/tex-mml-chtml.js"></script>
-  <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
-  {%- endif %}
diff --git a/_includes/scripts/misc.html b/_includes/scripts/misc.html
deleted file mode 100644
index 08ba49f0..00000000
--- a/_includes/scripts/misc.html
+++ /dev/null
@@ -1,14 +0,0 @@
-{% if site.enable_tooltips %}
-  <!-- Enable Tooltips -->
-  <script type="text/javascript">
-  $(function () {$('[data-toggle="tooltip"]').tooltip()})
-  </script>
-{%- endif %}
-{%- if site.enable_medium_zoom %}
-  <!-- Medium Zoom JS -->
-  <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@{{ site.medium_zoom.version }}/dist/medium-zoom.min.js" integrity="{{ site.medium_zoom.integrity }}" crossorigin="anonymous"></script>
-  <script defer src="{{ '/assets/js/zoom.js' | relative_url }}"></script>
-{%- endif -%}
-
-  <!-- Load Common JS -->
-  <script defer src="{{ '/assets/js/common.js' | relative_url }}"></script>
diff --git a/_includes/selected_papers.html b/_includes/selected_papers.html
deleted file mode 100644
index 61457dbc..00000000
--- a/_includes/selected_papers.html
+++ /dev/null
@@ -1,5 +0,0 @@
-
-          <div class="publications">
-            <h2>selected publications</h2>
-            {% bibliography -f papers -q @*[selected=true]* %}
-          </div>
diff --git a/_includes/social.html b/_includes/social.html
deleted file mode 100644
index 8c7a079c..00000000
--- a/_includes/social.html
+++ /dev/null
@@ -1,84 +0,0 @@
-            {%- if site.email -%}
-            <a href="mailto:{{ site.email | encode_email }}" title="email"><i class="fas fa-envelope"></i></a>
-            {% endif %}
-            {%- if site.orcid_id -%}
-            <a href="https://orcid.org/{{ site.orcid_id }}" title="ORCID"><i class="ai ai-orcid"></i></a>
-            {% endif %}
-            {%- if site.scholar_userid -%}
-            <a href="https://scholar.google.com/citations?user={{ site.scholar_userid }}" title="Google Scholar"><i class="ai ai-google-scholar"></i></a>
-            {% endif %}
-            {%- if site.semanticscholar_id -%}
-            <a href="https://www.semanticscholar.org/author/{{ site.semanticscholar_id }}" title="Semantic Scholar"><i class="ai ai-semantic-scholar"></i></a>
-            {% endif %}
-            {%- if site.publons_id -%}
-            <a href="https://publons.com/a/{{ site.publons_id }}/" title="Publons"><i class="ai ai-publons"></i></a>
-            {% endif %}
-            {%- if site.research_gate_profile -%}
-            <a href="https://www.researchgate.net/profile/{{site.research_gate_profile}}/" title="ResearchGate"><i class="ai ai-researchgate"></i></a>
-            {% endif %}
-            {%- if site.github_username -%}
-            <a href="https://github.com/{{ site.github_username }}" title="GitHub"><i class="fab fa-github"></i></a>
-            {% endif %}
-            {%- if site.linkedin_username -%}
-            <a href="https://www.linkedin.com/in/{{ site.linkedin_username }}" title="LinkedIn"><i class="fab fa-linkedin"></i></a>
-            {% endif %}
-            {%- if site.twitter_username -%}
-            <a href="https://twitter.com/{{ site.twitter_username }}" title="Twitter"><i class="fab fa-twitter"></i></a>
-            {% endif %}
-            {%- if site.medium_username -%}
-            <a href="https://medium.com/@{{ site.medium_username }}" title="Medium"><i class="fab fa-medium"></i></a>
-            {% endif %}
-            {%- if site.quora_username -%}
-            <a href="https://www.quora.com/profile/{{ site.quora_username }}" title="Quora"><i class="fab fa-quora"></i></a>
-            {% endif %}
-            {%- if site.blogger_url -%}
-            <a href="{{ site.blogger_url }}" title="Blogger"><i class="fab fa-blogger-b"></i></a>
-            {% endif %}
-            {%- if site.work_url -%}
-            <a href="{{ site.work_url }}" title="Work"><i class="fas fa-briefcase"></i></a>
-            {% endif %}
-            {%- if site.wikidata_id -%}
-            <a href="https://www.wikidata.org/wiki/{{ site.wikidata_id }}" title="Wikidata"><i class="fas fa-barcode"></i></a>
-            {% endif %}
-            {%- if site.strava_userid -%}
-            <a href="https://www.strava.com/athletes/{{ site.strava_userid }}" title="Strava"><i class="fab fa-strava"></i></a>
-            {% endif %}
-            {%- if site.keybase_username -%}
-            <a href="https://keybase.io/{{ site.keybase_username }}" title="Keybase"><i class="fab fa-keybase"></i></a>
-            {% endif %}
-            {%- if site.gitlab_username -%}
-            <a href="https://gitlab.com/{{ site.gitlab_username }}" title="GitLab"><i class="fab fa-gitlab"></i></a>
-            {% endif %}
-            {%- if site.dblp_url -%}
-            <a href="{{ site.dblp_url }}" title="DBLP"><i class="ai ai-dblp"></i></a>
-            {% endif %}
-            {%- if site.stackoverflow_id -%}
-            <a href="https://stackoverflow.com/users/{{ site.stackoverflow_id }}" title="Stackoverflow"><i class="fab fa-stack-overflow"></i></a>
-            {% endif %}
-            {%- if site.kaggle_id -%}
-            <a href="https://www.kaggle.com/{{ site.kaggle_id }}" title="Kaggle"><i class="fab fa-kaggle"></i></a>
-            {% endif %}
-            {%- if site.lastfm_id -%}
-            <a href="https://www.last.fm/user/{{ site.lastfm_id }}" title="Last FM"><i class="fab fa-lastfm"></i></a>
-            {% endif %}
-            {%- if site.spotify_id -%}
-            <a href="https://open.spotify.com/user/{{ site.spotify_id }}" title="Last FM"><i class="fab fa-spotify"></i></a>
-            {% endif %}
-            {%- if site.pinterest_id -%}
-            <a href="https://www.pinterest.com/{{ site.pinterest_id }}" title="Pinterest"><i class="fab fa-pinterest"></i></a>
-            {% endif %}
-            {%- if site.unsplash_id -%}
-            <a href="https://unsplash.com/@{{ site.unsplash_id }}" title="Unsplash"><i class="fab fa-unsplash"></i></a>
-            {% endif %}
-            {%- if site.instagram_id -%}
-            <a href="https://instagram.com/{{ site.instagram_id }}" title="Instagram"><i class="fab fa-instagram"></i></a>
-            {% endif %}
-            {%- if site.facebook_id -%}
-            <a href="https://facebook.com/{{ site.facebook_id }}" title="Facebook"><i class="fab fa-facebook"></i></a>
-            {% endif %}
-            {%- if site.discord_id -%}
-            <a href="https://discord.com/users/{{ site.discord_id }}" title="Discord"><i class="fab fa-discord"></i></a>
-            {% endif %}
-            {%- if site.rss_icon -%}
-            <a href="{{ site.baseurl }}/feed.xml" title="RSS Feed"><i class="fas fa-rss-square"></i></a>
-            {% endif %}
diff --git a/_layouts/about.html b/_layouts/about.html
deleted file mode 100644
index d3628377..00000000
--- a/_layouts/about.html
+++ /dev/null
@@ -1,66 +0,0 @@
----
-layout: default
----
-
-<!-- about.html -->
-      <div class="post">
-        <header class="post-header">
-          <!-- <h1 class="post-title">
-           {% if site.title == "blank" -%}<span class="font-weight-bold">{{ site.first_name }}</span> {{ site.middle_name }} {{ site.last_name }}{%- else -%}{{ site.title }}{%- endif %}
-          </h1> -->
-          <p class="desc">{{ page.subtitle }}</p>
-        </header>
-
-        <article>
-          {% if page.profile -%}
-          <div class="profile float-{%- if page.profile.align == 'left' -%}left{%- else -%}right{%- endif -%}">
-            {%- if page.profile.image %}
-              {%- assign profile_image_path = page.profile.image | prepend: 'assets/img/' -%}
-
-              {% if page.profile.image_circular %}
-                {%- assign profile_image_class = "img-fluid z-depth-1 rounded-circle" -%}
-              {% else %}
-                {%- assign profile_image_class = "img-fluid z-depth-1 rounded" -%}
-              {% endif %}
-
-              {% include figure.html 
-              path=profile_image_path 
-              class=profile_image_class
-              alt=page.profile.image -%}
-            {% endif -%}
-            {%- if page.profile.address %}
-            <div class="address">
-              {{ page.profile.address }}
-            </div>
-            {%- endif %}
-          </div>
-          {%- endif %}
-
-          <div class="clearfix">
-            {{ content }}
-          </div>
-
-          {% if page.news -%}
-          <!-- News -->
-          {%- include news.html %}
-          {%- endif %}
-          {% if page.selected_papers -%}
-            <!-- Selected papers -->
-            {%- include selected_papers.html %}
-          {%- endif %}
-          {%- if page.social %}
-          <!-- Social -->
-          <div class="social">
-            <div class="contact-icons">
-            {% include social.html %}
-            </div>
-
-            <div class="contact-note">
-              {{ site.contact_note }}
-            </div>
-            
-          </div>
-          {%- endif %}
-        </article>
-
-</div>
diff --git a/_layouts/archive-category.html b/_layouts/archive-category.html
deleted file mode 100644
index 79aad74f..00000000
--- a/_layouts/archive-category.html
+++ /dev/null
@@ -1,27 +0,0 @@
----
-layout: default
----
-
-<div class="post">
-
-  <header class="post-header">
-    <h1 class="post-title"> <i class="fas fa-tag fa-sm"></i> {{ page.title }} </h1>
-    <p class="post-description"> an archive of posts in this category </p>
-  </header>
-
-  <article>
-    <div class="table-responsive">
-      <table class="table table-sm table-borderless">
-        {% for post in page.posts %}
-        <tr>
-          <th scope="row">{{ post.date | date: "%b %-d, %Y" }}</th>
-          <td>
-              <a class="post-link" href="{{ post.url | relative_url }}">{{ post.title }}</a>
-          </td>
-        </tr>
-      {% endfor %}
-      </table>
-    </div>
-  </article>
-
-</div>
diff --git a/_layouts/archive-tag.html b/_layouts/archive-tag.html
deleted file mode 100644
index 66abaebb..00000000
--- a/_layouts/archive-tag.html
+++ /dev/null
@@ -1,27 +0,0 @@
----
-layout: default
----
-
-<div class="post">
-
-  <header class="post-header">
-    <h1 class="post-title"> <i class="fas fa-hashtag fa-sm"></i> {{ page.title }} </h1>
-    <p class="post-description"> an archive of posts with this tag </p>
-  </header>
-
-  <article>
-    <div class="table-responsive">
-      <table class="table table-sm table-borderless">
-        {% for post in page.posts %}
-        <tr>
-          <th scope="row">{{ post.date | date: "%b %-d, %Y" }}</th>
-          <td>
-              <a class="post-link" href="{{ post.url | relative_url }}">{{ post.title }}</a>
-          </td>
-        </tr>
-      {% endfor %}
-      </table>
-    </div>
-  </article>
-
-</div>
diff --git a/_layouts/archive-year.html b/_layouts/archive-year.html
deleted file mode 100644
index 8af1d29b..00000000
--- a/_layouts/archive-year.html
+++ /dev/null
@@ -1,27 +0,0 @@
----
-layout: default
----
-
-<div class="post">
-
-  <header class="post-header">
-    <h1 class="post-title"> <i class="fas fa-calendar fa-sm"></i> {{ page.date | date: "%Y" }} </h1>
-    <p class="post-description"> an archive of posts from this year </p>
-  </header>
-
-  <article>
-    <div class="table-responsive">
-      <table class="table table-sm table-borderless">
-        {% for post in page.posts %}
-        <tr>
-          <th scope="row">{{ post.date | date: "%b %-d, %Y" }}</th>
-          <td>
-              <a class="post-link" href="{{ post.url | relative_url }}">{{ post.title }}</a>
-          </td>
-        </tr>
-      {% endfor %}
-      </table>
-    </div>
-  </article>
-
-</div>
diff --git a/_layouts/bib.html b/_layouts/bib.html
deleted file mode 100644
index eb6520a2..00000000
--- a/_layouts/bib.html
+++ /dev/null
@@ -1,196 +0,0 @@
----
----
-<!-- _layouts/bib.html -->
-      <div class="row">
-        <div class="col-sm-2 {% if entry.preview %}preview{% else %}abbr{% endif %}">
-        {%- if entry.preview -%}
-          {% if entry.preview contains '://' -%}
-          <img class="preview z-depth-1 rounded" src="{{ entry.preview }}">
-          {%- else -%}
-          <img class="preview z-depth-1 rounded" src="{{ entry.preview | prepend: '/assets/img/publication_preview/' | relative_url }}">
-          {%- endif -%}
-        {%- elsif entry.abbr -%}
-          {%- if site.data.venues[entry.abbr] -%}
-            {%- assign venue_style = nil -%}
-            {%- if site.data.venues[entry.abbr].color != blank -%}
-              {%- assign venue_style = site.data.venues[entry.abbr].color | prepend: 'style="background-color:' | append: '"' -%}
-            {%- endif -%}
-            <abbr class="badge" {% if venue_style %}{{venue_style}}{% endif %}><a href="{{site.data.venues[entry.abbr].url}}">{{entry.abbr}}</a></abbr>
-          {%- else -%}
-            <abbr class="badge">{{entry.abbr}}</abbr>
-          {%- endif -%}
-        {%- endif -%}
-        </div>
-
-        <!-- Entry bib key -->
-        <div id="{{entry.key}}" class="col-sm-8">
-        {% if entry.type == "thesis" -%}
-          {{reference}}
-        {%- else %}
-          <!-- Title -->
-          <div class="title">{{entry.title}}</div>
-          <!-- Author -->
-          <div class="author">
-          {% assign author_array_size = entry.author_array | size %}
-
-          {% assign author_array_limit = author_array_size %}
-          {%- if site.max_author_limit and author_array_size > site.max_author_limit %}
-            {% assign author_array_limit = site.max_author_limit %}
-          {% endif %}
-
-          {%- for author in entry.author_array limit: author_array_limit -%}
-            {%- assign author_is_self = false -%}
-            {%- assign author_last_name = author.last | remove: "¶" | remove: "&" | remove: "*" | remove: "†" | remove: "^" -%}
-            {%- if site.scholar.last_name contains author_last_name -%}
-              {%- if site.scholar.first_name contains author.first -%}
-                {%- assign author_is_self = true -%}
-              {%- endif -%}
-            {%- endif -%}
-            {%- assign coauthor_url = nil -%}
-            {%- if site.data.coauthors[author_last_name] -%}
-              {%- for coauthor in site.data.coauthors[author_last_name] -%}
-                {%- if coauthor.firstname contains author.first -%}
-                  {%- assign coauthor_url = coauthor.url -%}
-                  {%- break -%}
-                {%- endif -%}
-              {%- endfor -%}
-            {%- endif -%}
-            
-            {%- if forloop.length > 1 -%}
-              {%- if forloop.first == false -%},&nbsp;{%- endif -%}
-              {%- if forloop.last and author_array_limit == author_array_size -%}and&nbsp;{%- endif -%}
-            {%- endif -%}
-            {%- if author_is_self -%}
-              <em>{{author.first}} {{author.last}}</em>
-            {%- else -%}
-              {%- if coauthor_url -%}
-                <a href="{{coauthor_url}}">{{author.first}} {{author.last}}</a>
-              {%- else -%}
-                {{author.first}} {{author.last}}
-              {%- endif -%}
-            {%- endif -%}
-          {%- endfor -%}
-          {%- assign more_authors = author_array_size | minus: author_array_limit -%}
-          
-          {%- assign more_authors_hide = more_authors | append: " more author" -%}
-          {%- if more_authors > 0 -%}
-            {%- if more_authors > 1 -%}
-              {%- assign more_authors_hide = more_authors_hide | append: "s" -%}
-            {%- endif -%}
-            {%- assign more_authors_show = '' -%}
-            {%- for author in entry.author_array offset: author_array_limit -%}
-              {%- assign more_authors_show = more_authors_show | append: author.first | append: " " | append: author.last -%}
-              {%- unless forloop.last -%}
-                {%- assign more_authors_show = more_authors_show | append: ", " -%}
-              {%- endunless -%}
-            {%- endfor -%}
-            , and
-            <span
-                class="more-authors"
-                title="click to view {{more_authors_hide}}"
-                onclick="
-                  var element = $(this);
-                  element.attr('title', '');
-                  var more_authors_text = element.text() == '{{more_authors_hide}}' ? '{{more_authors_show}}' : '{{more_authors_hide}}';
-                  var cursorPosition = 0;
-                  var textAdder = setInterval(function(){
-                    element.text(more_authors_text.substring(0, cursorPosition + 1));
-                    if (++cursorPosition == more_authors_text.length){
-                      clearInterval(textAdder);
-                    }
-                }, '{{site.more_authors_animation_delay}}');
-                "
-            >{{more_authors_hide}}</span>
-          {%- endif -%}
-
-          </div>
-
-          <!-- Journal/Book title and date -->
-          {% assign proceedings = "inproceedings, incollection" | split: ','%}
-          {% if entry.type == "article" -%}
-            {%- capture entrytype -%}<em>{{entry.journal}}</em>{%- endcapture -%}
-          {%- elsif proceedings contains entry.type -%}
-            {%- capture entrytype -%}<em>In {{entry.booktitle}}</em> {%- endcapture -%}
-          {%- else -%}
-            {%- capture entrytype -%}{%- endcapture -%}
-          {%- endif -%}
-          {%- if entry.month -%}
-            {%- capture entrymonth -%}{{ " " }}{{ entry.month | capitalize }}{%- endcapture -%}
-          {%- endif -%}
-          {%- if entry.year -%}
-            {%- capture entryyear -%}{{ " " }}{{entry.year}}{%- endcapture -%}
-          {%- endif -%}
-          {%- capture periodical -%}{{ entrytype }}{{ entrymonth }}{{ entryyear }}{%- endcapture -%}
-          <div class="periodical">
-            {{ periodical | strip }}
-          </div>
-        {%- endif %}
-        
-          <!-- Links/Buttons -->
-          <div class="links">
-          {%- if entry.abstract %}
-            <a class="abstract btn btn-sm z-depth-0" role="button">Abs</a>
-          {%- endif %}
-          {%- if entry.arxiv %}
-            <a href="http://arxiv.org/abs/{{ entry.arxiv }}" class="btn btn-sm z-depth-0" role="button">arXiv</a>
-          {%- endif %}
-          {%- if entry.bibtex_show %}
-            <a class="bibtex btn btn-sm z-depth-0" role="button">Bib</a>
-          {%- endif %}
-          {%- if entry.html %}
-            <a href="{{ entry.html }}" class="btn btn-sm z-depth-0" role="button">HTML</a>
-          {%- endif %}
-          {%- if entry.pdf %}
-            {% if entry.pdf contains '://' -%}
-            <a href="{{ entry.pdf }}" class="btn btn-sm z-depth-0" role="button">PDF</a>
-            {%- else -%}
-            <a href="{{ entry.pdf | prepend: '/assets/pdf/' | relative_url }}" class="btn btn-sm z-depth-0" role="button">PDF</a>
-            {%- endif %}
-          {%- endif %}
-          {%- if entry.supp %}
-            {% if entry.supp contains '://' -%}
-            <a href="{{ entry.supp }}" class="btn btn-sm z-depth-0" role="button">Supp</a>
-            {%- else -%}
-            <a href="{{ entry.supp | prepend: '/assets/pdf/' | relative_url }}" class="btn btn-sm z-depth-0" role="button">Supp</a>
-            {%- endif %}
-          {%- endif %}
-          {%- if entry.blog %}
-            <a href="{{ entry.blog }}" class="btn btn-sm z-depth-0" role="button">Blog</a>
-          {%- endif %}
-          {%- if entry.code %}
-            <a href="{{ entry.code }}" class="btn btn-sm z-depth-0" role="button">Code</a>
-          {%- endif %}
-          {%- if entry.poster %}
-            {% if entry.poster contains '://' -%}
-            <a href="{{ entry.poster }}" class="btn btn-sm z-depth-0" role="button">Poster</a>
-            {%- else -%}
-            <a href="{{ entry.poster | prepend: '/assets/pdf/' | relative_url }}" class="btn btn-sm z-depth-0" role="button">Poster</a>
-            {%- endif %}
-          {%- endif %}
-          {%- if entry.slides %}
-            {% if entry.slides contains '://' -%}
-            <a href="{{ entry.slides }}" class="btn btn-sm z-depth-0" role="button">Slides</a>
-            {%- else -%}
-            <a href="{{ entry.slides | prepend: '/assets/pdf/' | relative_url }}" class="btn btn-sm z-depth-0" role="button">Slides</a>
-            {%- endif %}
-          {%- endif %}
-          {%- if entry.website %}
-            <a href="{{ entry.website }}" class="btn btn-sm z-depth-0" role="button">Website</a>
-          {%- endif %}
-          </div>
-
-          {% if entry.abstract -%}
-          <!-- Hidden abstract block -->
-          <div class="abstract hidden">
-            <p>{{ entry.abstract }}</p>
-          </div>
-          {%- endif -%}
-
-          {% if entry.bibtex_show -%}
-          <!-- Hidden bibtex block -->
-          <div class="bibtex hidden">
-            {% highlight bibtex %}{{ entry.bibtex | hideCustomBibtex }}{% endhighlight %}
-          </div>
-          {%- endif %}
-        </div>
-      </div>
diff --git a/_layouts/cv.html b/_layouts/cv.html
deleted file mode 100644
index bb3d85af..00000000
--- a/_layouts/cv.html
+++ /dev/null
@@ -1,35 +0,0 @@
----
-layout: default
----
-<!-- _layouts/cv.html -->
-        <div class="post">
-
-          <header class="post-header">
-            <h1 class="post-title">{{ page.title }} {% if page.cv_pdf %}<a href="{{ page.cv_pdf | prepend: 'assets/pdf/' | relative_url}}" target="_blank" rel="noopener noreferrer" class="float-right"><i class="fas fa-file-pdf"></i></a>{% endif %}</h1>
-            <p class="post-description">{{ page.description }}</p>
-          </header>
-
-          <article>
-            <div class="cv">
-              {% for entry in site.data.cv %}
-                <div class="card mt-3 p-3">
-                  <h3 class="card-title font-weight-medium">{{ entry.title }}</h3>
-                  <div>
-                  {% if entry.type == "list" %}
-                    {% include cv/list.html %}
-                  {% elsif entry.type == "map" %}
-                    {% include cv/map.html %}
-                  {% elsif entry.type == "nested_list" %}
-                    {% include cv/nested_list.html %}
-                  {% elsif entry.type == "time_table" %}
-                   {% include cv/time_table.html %}
-                  {% else %}
-                    {{ entry.contents }}
-                  {% endif %}
-                  </div>
-                </div>
-              {% endfor %}
-              </div>
-          </article>
-
-        </div>
diff --git a/_layouts/default.html b/_layouts/default.html
deleted file mode 100644
index 1001a5b5..00000000
--- a/_layouts/default.html
+++ /dev/null
@@ -1,36 +0,0 @@
-<!DOCTYPE html>
-<html lang="{{ site.lang }}">
-
-  <!-- Head -->
-  <head>
-  {%- if page.redirect -%}
-    <meta http-equiv="refresh" content="3; url={{ site.baseurl }}/" />
-  {%- endif -%}
-  {% include head.html %}
-  </head>
-
-  <!-- Body -->
-  <body class="{% if site.navbar_fixed %}fixed-top-nav{% endif %} {% unless site.footer_fixed %}sticky-bottom-footer{% endunless %}">
-
-    <!-- Header -->
-    {%- include header.html %}
-
-    <div class="header-background"><div class="img"></div></div>      
-
-    <!-- Content -->
-    <div class="container mt-5">
-      {{ content }}
-    </div>
-
-    <!-- Footer -->
-    <!-- {%- include footer.html %} -->
-
-    <!-- JavaScripts -->
-    {% include scripts/jquery.html %}
-    {% include scripts/bootstrap.html %}
-    {% include scripts/masonry.html %}
-    {% include scripts/misc.html %}
-    {% include scripts/mathjax.html %}
-    {% include scripts/analytics.html %}
-  </body>
-</html>
diff --git a/_layouts/distill.html b/_layouts/distill.html
deleted file mode 100644
index c3a8d48c..00000000
--- a/_layouts/distill.html
+++ /dev/null
@@ -1,114 +0,0 @@
-<!DOCTYPE html>
-<!-- _layouts/distill.html -->
-<html>
-  <head>
-    {%- include head.html %}
-
-    {% include scripts/jquery.html %}
-    {% include scripts/mathjax.html %}
-    <!-- Distill js -->
-    <script src="{{ '/assets/js/distillpub/template.v2.js' | relative_url }}"></script>
-    <script src="{{ '/assets/js/distillpub/transforms.v2.js' | relative_url }}"></script>
-    <script src="{{ '/assets/js/distillpub/overrides.js' | relative_url }}"></script>
-    {% if page._styles %}
-    <!-- Page/Post style -->
-    <style type="text/css">
-      {{ page._styles }}
-    </style>
-    {%- endif %}
-  </head>
-
-  <d-front-matter>
-    <script async type="text/json">{
-      "title": "{{ page.title }}",
-      "description": "{{ page.description }}",
-      "published": "{{ page.date | date: '%B %-d, %Y' }}",
-      "authors": [
-        {% for author in page.authors -%}
-        {
-          "author": "{{ author.name }}",
-          "authorURL": "{{ author.url }}",
-          "affiliations": [
-            {
-              "name": "{{ author.affiliations.name }}",
-              "url": "{{ author.affiliations.url }}"
-            }
-          ]
-        }{% if forloop.last == false %},{% endif %}
-        {% endfor %}
-      ],
-      "katex": {
-        "delimiters": [
-          {
-            "left": "$",
-            "right": "$",
-            "display": false
-          },
-          {
-            "left": "$$",
-            "right": "$$",
-            "display": true
-          }
-        ]
-      }
-    }</script>
-  </d-front-matter>
-
-  <body class="{%- if site.navbar_fixed -%}fixed-top-nav{%- endif -%} {%- unless site.footer_fixed -%}sticky-bottom-footer{%- endunless -%}">
-
-    <!-- Header -->
-    {%- include header.html %}
-
-    <!-- Content -->
-    <div class="post distill">
-
-      <d-title>
-        <h1>{{ page.title }}</h1>
-        <p>{{ page.description }}</p>
-      </d-title>
-
-      <d-byline></d-byline>
-
-      <d-article>
-        {% if page.toc -%}
-        <d-contents>
-          <nav class="l-text figcaption">
-          <h3>Contents</h3>
-            {% for section in page.toc -%}
-            <div><a href="#{{ section.name | slugify }}">{{ section.name }}</a></div>
-            {% if section.subsections -%}
-            <ul>
-              {% for subsection in section.subsections -%}
-              <li><a href="#{{ subsection.name | slugify }}">{{ subsection.name }}</a></li>
-              {% endfor %}
-            </ul>
-            {%- endif -%}
-            {%- endfor %}
-          </nav>
-        </d-contents>
-        {%- endif %}
-
-        {{ content }}
-      </d-article>
-
-      <d-appendix>
-        <d-footnote-list></d-footnote-list>
-        <d-citation-list></d-citation-list>
-      </d-appendix>
-
-    </div>
-
-    <d-bibliography src="{{ page.bibliography | prepend: '/assets/bibliography/' | relative_url }}"></d-bibliography>
-    
-    <script src="https://utteranc.es/client.js"
-        repo="iclr-blogposts/2023"
-        issue-term="pathname"
-        theme="github-light"
-        crossorigin="anonymous"
-        async>
-</script>
-    
-    {% include scripts/bootstrap.html %}
-    {% include scripts/analytics.html %}
-  </body>
-</html>
diff --git a/_layouts/none.html b/_layouts/none.html
deleted file mode 100644
index b92f6522..00000000
--- a/_layouts/none.html
+++ /dev/null
@@ -1 +0,0 @@
-{{content}}
diff --git a/_layouts/page.html b/_layouts/page.html
deleted file mode 100644
index 5a0c4080..00000000
--- a/_layouts/page.html
+++ /dev/null
@@ -1,16 +0,0 @@
----
-layout: default
----
-<!-- page.html -->
-        <div class="post">
-
-          <!-- <header class="post-header">
-            <h1 class="post-title">{{ page.title }}</h1>
-            <p class="post-description">{{ page.description }}</p>
-          </header> -->
-
-          <article>
-            {{ content }}
-          </article>
-
-        </div>
diff --git a/_layouts/post.html b/_layouts/post.html
deleted file mode 100644
index 8061c407..00000000
--- a/_layouts/post.html
+++ /dev/null
@@ -1,71 +0,0 @@
----
-layout: default
----
-<!-- _layouts/post.html -->
-{%- assign year = page.date | date: "%Y" -%}
-{%- assign tags = page.tags | join: "" -%}
-{%- assign categories = page.categories | join: "" -%}
-
-{% if page._styles %}
-<!-- Page/Post style -->
-<style type="text/css">
-  {{ page._styles }}
-</style>
-{% endif %}
-
-<div class="post">
-
-  <header class="post-header">
-    <h1 class="post-title">{{ page.title }}</h1>
-    <p class="post-meta">{{ page.date | date: "%B %-d, %Y" }}{%- if page.author -%} • {{ page.author }}{%- endif -%}{%- if page.meta -%} • {{ page.meta }}{%- endif -%}</p>
-    <p class="post-tags">
-      <a href="{{ year | prepend: '/blog/' | prepend: site.baseurl}}"> <i class="fas fa-calendar fa-sm"></i> {{ year }} </a>
-      {%- if tags != "" %}
-      &nbsp; &middot; &nbsp;
-        {% for tag in page.tags -%}
-        <a href="{{ tag | slugify | prepend: '/blog/tag/' | prepend: site.baseurl}}">
-          <i class="fas fa-hashtag fa-sm"></i> {{ tag }}</a> &nbsp;
-          {% endfor -%}
-      {% endif %}
-
-      {%- if categories != "" %}
-      &nbsp; &middot; &nbsp;
-        {% for category in page.categories -%}
-        <a href="{{ category | slugify | prepend: '/blog/category/' | prepend: site.baseurl}}">
-          <i class="fas fa-tag fa-sm"></i> {{ category }}</a> &nbsp;
-          {% endfor -%}
-      {% endif %}
-
-    </p>
-  </header>
-
-  <article class="post-content">
-    {{ content }}
-  </article>
-
-  {%- if site.disqus_shortname and page.comments -%}
-    <div id="disqus_thread"></div>
-    <script type="text/javascript">
-      var disqus_shortname  = '{{ site.disqus_shortname }}';
-      var disqus_identifier = '{{ page.id }}';
-      var disqus_title      = {{ page.title | jsonify }};
-      (function() {
-        var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
-        dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
-        (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
-      })();
-    </script>
-    <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
-  {%- endif %}
-
-  <!--
-    <script src="https://utteranc.es/client.js"
-        repo="iclr-blogposts/2023"
-        issue-term="pathname"
-        theme="github-light"
-        crossorigin="anonymous"
-        async>
-</script>
-  -->
-
-</div>
diff --git a/_news/announcement_1.md b/_news/announcement_1.md
deleted file mode 100644
index 98e5af5c..00000000
--- a/_news/announcement_1.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: post
-date: 2015-10-22 15:59:00-0400
-inline: true
----
-
-A simple inline announcement.
diff --git a/_news/announcement_2.md b/_news/announcement_2.md
deleted file mode 100644
index dbd4b4d4..00000000
--- a/_news/announcement_2.md
+++ /dev/null
@@ -1,31 +0,0 @@
----
-layout: post
-title: A long announcement with details
-date: 2015-11-07 16:11:00-0400
-inline: false
----
-
-Announcements and news can be much longer than just quick inline posts. In fact, they can have all the features available for the standard blog posts. See below.
-
-***
-
-Jean shorts raw denim Vice normcore, art party High Life PBR skateboard stumptown vinyl kitsch. Four loko meh 8-bit, tousled banh mi tilde forage Schlitz dreamcatcher twee 3 wolf moon. Chambray asymmetrical paleo salvia, sartorial umami four loko master cleanse drinking vinegar brunch. <a href="https://www.pinterest.com">Pinterest</a> DIY authentic Schlitz, hoodie Intelligentsia butcher trust fund brunch shabby chic Kickstarter forage flexitarian. Direct trade <a href="https://en.wikipedia.org/wiki/Cold-pressed_juice">cold-pressed</a> meggings stumptown plaid, pop-up taxidermy. Hoodie XOXO fingerstache scenester Echo Park. Plaid ugh Wes Anderson, freegan pug selvage fanny pack leggings pickled food truck DIY irony Banksy.
-
-#### Hipster list
-<ul>
-    <li>brunch</li>
-    <li>fixie</li>
-    <li>raybans</li>
-    <li>messenger bag</li>
-</ul>
-
-Hoodie Thundercats retro, tote bag 8-bit Godard craft beer gastropub. Truffaut Tumblr taxidermy, raw denim Kickstarter sartorial dreamcatcher. Quinoa chambray slow-carb salvia readymade, bicycle rights 90's yr typewriter selfies letterpress cardigan vegan.
-
-***
-
-Pug heirloom High Life vinyl swag, single-origin coffee four dollar toast taxidermy reprehenderit fap distillery master cleanse locavore. Est anim sapiente leggings Brooklyn ea. Thundercats locavore excepteur veniam eiusmod. Raw denim Truffaut Schlitz, migas sapiente Portland VHS twee Bushwick Marfa typewriter retro id keytar.
-
-> We do not grow absolutely, chronologically. We grow sometimes in one dimension, and not in another, unevenly. We grow partially. We are relative. We are mature in one realm, childish in another.
-> —Anais Nin
-
-Fap aliqua qui, scenester pug Echo Park polaroid irony shabby chic ex cardigan church-key Odd Future accusamus. Blog stumptown sartorial squid, gastropub duis aesthetic Truffaut vero. Pinterest tilde twee, odio mumblecore jean shorts lumbersexual.
diff --git a/_news/announcement_3.md b/_news/announcement_3.md
deleted file mode 100644
index d9072191..00000000
--- a/_news/announcement_3.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: post
-date: 2016-01-15 07:59:00-0400
-inline: true
----
-
-A simple inline announcement with Markdown emoji! :sparkles: :smile:
diff --git a/_pages/dropdown.md b/_pages/dropdown.md
deleted file mode 100644
index 84a11232..00000000
--- a/_pages/dropdown.md
+++ /dev/null
@@ -1,13 +0,0 @@
----
-layout: page
-title: past iterations
-nav: true
-nav_order: 99
-dropdown: true
-children: 
-    # - title: 2023
-    #   permalink: /about/
-    # - title: divider
-    - title: 2022
-      permalink: https://iclr-blog-track.github.io/home/
----
\ No newline at end of file
diff --git a/_pages/dropdown/index.html b/_pages/dropdown/index.html
new file mode 100644
index 00000000..e320c40b
--- /dev/null
+++ b/_pages/dropdown/index.html
@@ -0,0 +1 @@
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>past iterations | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/_pages/dropdown/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown active"> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations<span class="sr-only">(current)</span></a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <div class="post"> <article> </article> </div> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file
diff --git a/_plugins/external-posts.rb b/_plugins/external-posts.rb
deleted file mode 100644
index e4fd5eb6..00000000
--- a/_plugins/external-posts.rb
+++ /dev/null
@@ -1,36 +0,0 @@
-require 'feedjira'
-require 'httparty'
-require 'jekyll'
-
-module ExternalPosts
-  class ExternalPostsGenerator < Jekyll::Generator
-    safe true
-    priority :high
-
-    def generate(site)
-      if site.config['external_sources'] != nil
-        site.config['external_sources'].each do |src|
-          p "Fetching external posts from #{src['name']}:"
-          xml = HTTParty.get(src['rss_url']).body
-          feed = Feedjira.parse(xml)
-          feed.entries.each do |e|
-            p "...fetching #{e.url}"
-            slug = e.title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
-            path = site.in_source_dir("_posts/#{slug}.md")
-            doc = Jekyll::Document.new(
-              path, { :site => site, :collection => site.collections['posts'] }
-            )
-            doc.data['external_source'] = src['name'];
-            doc.data['feed_content'] = e.content;
-            doc.data['title'] = "#{e.title}";
-            doc.data['description'] = e.summary;
-            doc.data['date'] = e.published;
-            doc.data['redirect'] = e.url;
-            site.collections['posts'].docs << doc
-          end
-        end
-      end
-    end
-  end
-
-end
diff --git a/_plugins/hideCustomBibtex.rb b/_plugins/hideCustomBibtex.rb
deleted file mode 100644
index 4a852fde..00000000
--- a/_plugins/hideCustomBibtex.rb
+++ /dev/null
@@ -1,15 +0,0 @@
- module Jekyll
-  module HideCustomBibtex
-    def hideCustomBibtex(input)
-	  keywords = @context.registers[:site].config['filtered_bibtex_keywords']
-
-	  keywords.each do |keyword|
-		input = input.gsub(/^.*#{keyword}.*$\n/, '')
-	  end
-
-      return input
-    end
-  end
-end
-
-Liquid::Template.register_filter(Jekyll::HideCustomBibtex)
diff --git a/_posts/2022-11-09-how-cnns-learn-shapes.md b/_posts/2022-11-09-how-cnns-learn-shapes.md
deleted file mode 100644
index 564f2c2f..00000000
--- a/_posts/2022-11-09-how-cnns-learn-shapes.md
+++ /dev/null
@@ -1,180 +0,0 @@
----
-layout: distill
-title: Can CNN learn shapes?
-description: One widely accepted intuition is that Convolutional Neural Networks that are trained for object classification, combine low-level features (e.g. edges) to gradually learn more complex and abstracted patterns that are useful in differentiating images. Yet it remains poorly understood how CNNs actually make their decisions, and how their recognition strategies differ from humans. Specifically, there is a major debate about the question of whether CNNs primarily rely on surface regularities of objects, or whether they are capable of exploiting the spatial arrangement of features, similar to humans.
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Chloe Hong
-    url: 
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-how-cnns-learn-shapes.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name : Background
-  - name: Motivation
-  - name: Methods
-    subsections:
-    - name: Training Data
-    - name: Architecture and Training Hyperparameters
-    - name: Convolutional Layer Filter Size
-    - name: Data Augmentation - Fragmentation  
-    - name: Data Augmentation - Negative Labels
-  - name: Results
-    subsections:
-      - name: Training Evaluation
-      - name: Conflict Set Evaluation 
-      - name: Filter Variation
-      - name: Data Augmentation Variation
-  - name : Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Background
-One widely accepted intuition is that Convolutional Neural Networks (CNNs) that are trained for object classification, combine low-level features (e.g. edges) to gradually learn more complex and abstracted patterns that are useful in differentiating images. Stemming from this is the idea that neural networks can understand and use shape information to classify objects, as humans would. Previous works have termed this explanation the shape hypothesis.  As <d-cite key="kriegeskorte2015deep"></d-cite> puts it, 
-> ... the network acquires complex knowledge about the kinds of shapes associated with each category. [...] High-level units appear to learn representations of shapes occurring in natural images
-
-This notion also appears in other explanations, such as in <d-cite key="lecun2015deep"></d-cite> 
-> Intermediate CNN layers recognize parts of familiar objects, and subsequent layers [...] detect objects as combinations of these parts.
-
-{% include figure.html path="assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture.png" class="img-fluid" style="width:100px; height:75px;"%}
-Figure 1. <d-cite key="geirhos2018imagenet"></d-cite> shows that CNNs trained on ImageNet data are biased towards predicting the category corresponding to the texture rather than shape.
-
-Yet it remains poorly understood how CNNs actually make their decisions, and how their recognition strategies differ from humans. Specifically, there is a major debate about the question of whether CNNs primarily rely on surface regularities of objects, or whether they are capable of exploiting the spatial arrangement of features, similar to humans. Studies have shown that the extent to which CNNs use global features ; shapes or spatial relationships of shapes, is heavily dependent on the dataset it is trained on.  <d-cite key="geirhos2018imagenet"></d-cite> shows that CNNs trained on ImageNet data are biased towards predicting the category corresponding to the texture rather than shape. <d-cite key="farahat2023novel"></d-cite> reveal that CNNs learn spatial arrangements of features only up to a intermediate level of granularity by comparing networks trained on Sketchy dataset, composed of sketches drawn by images of animals, and the Animals dataset, images of animals. 
-
-## Motivation
-
-The question leading this project is if it is possible to steer the learning of a CNN network to use abstracted global shape features as dominant strategy in classifying images, in a similar sense that humans do. Previous works have shown that networks trained on  <d-cite key="geirhos2018imagenet"></d-cite> texture agnostic datasets, or <d-cite key="farahat2023novel"></d-cite>  abstracted sketch dataset have an increased ability to integrate global features. Extending the findings of these works, I experiment if it possible to induce the learning of CNNs to depend on global shapes by adjusting the filter size, or augmenting and curating the training data.
-
-## Methods
-In the following experiments, I train a CNN on human-generated sketch data and test with conlfict sets to determine if it has learned to integrate global features in its decision making. The objective is to push the network to learn and depend on global features (the overall shape) of the object rather than local features (direction or curvature of strokes) in classifying images. To do this, I first vary the filter sizes to see if there is an opimal sequence that enables the network to learn such features. Next I augment the data by fragmentation and by adding a false category so that the network is forced to learn to classify images even when the local information is obscured and only when global information is present. Finally, to test the ability of the models from each experiment in integrating the global feature, I design a conflict set that is different from the training data. Images in the conflict set have the global features (overall shape) that aligns with its category but the local features (strokes and corner conditions) are distorted to varying degrees. 
-
-### Training Data 
-The first way that the model is pushed to learn global features is by training it on human generated sketch data. This is distinct from the previous works that have used stylized image data, or image data that has been turned in to line drawings in that it is more driven by the human perception. It is likely that the data is more varied because it is each drawn by a different person, but what humans perceive as distinctive features of that object category is likely to be present across instances. 
-
-The hypothesis is that because of the scarsity of features, and absense of other local features such as texture, the model would inevitably have to learn global features that humans commonly associate to object categories, such as shape.
-
-{% include figure.html path="assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example.png" class="img-fluid" style="width:300px; height:200px;"%}
-Figure 2. Example from circle and square category of [Quick, Draw! dataset](https://github.com/googlecreativelab/quickdraw-dataset) that are used in this project. 
-
-For the following experiments I use 100,000 instances each from the circle and square categories of the [Quick, Draw! dataset](https://github.com/googlecreativelab/quickdraw-dataset) that have been rendered into 28x28 grayscale bitmap in .npy format. The dataset is split 85% for training and 15% for validation. 
-
-### Architecture and Training Hyperparameters
-The CNN architecture is composed of 3 convolution layers and 2 linear layers with max pooling and relu activation. The filter size of each convolution layer, marked as * is varied in the following experiments. We use cross entropy loss and accuracy is the portion of instances that were labeled correcty. Each model is trained for 20 epochs with batch size 256. 
-
-```python
-nn.Sequential(
-   data_augmentation,
-   nn.Conv2d(1, 64, *, padding='same'),
-   nn.ReLU(),
-   nn.MaxPool2d(2),
-   nn.Conv2d(64, 128, *, padding='same'),
-   nn.ReLU(),
-   nn.MaxPool2d(2),
-   nn.Conv2d(128, 256, *, padding='same'),
-   nn.ReLU(),
-   nn.MaxPool2d(2),
-   nn.Flatten(),
-   nn.Linear(2304, 512),
-   nn.ReLU(),
-   nn.Linear(512, 2), # 2 categories (circle, square)
-)
-```
-
-### Convolutional Layer Filter Size
-
-The hypothesis is that the size of the filters of each convolution layer affects the scale of features that the network effectively learns and integrates in its final decision making. The underlying assumption is that if the filter size gradually increases, the CNN learns global scale features and uses that as dominant stragety. I test for different combinations of size 3,5,7,9 to see if there is an optimal size filter to train a CNN network for our purpose. 
-
-### Data Augmentation - Fragmentation 
-
-I train models with augmented data of different degree of fragmentation. Lower degrees of fragmentation divide the shape into 2 fragments and with higher degree, the shape is divided into an increasing number of parts. I do this by using masks that create streaks going across the image each in the horizontal, vertical and two diagonal directions. As a result, we create circles and squares with dashed lines. 
-
-{% include figure.html path="assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation.png" class="img-fluid" style="width:100px; height:200px;"%}
-Figure 3. Augmentations with varying degrees of fragmentation.
-
-The hypothesis is that fragments of circles and squares may be similar, so as the network is trained to distinguish between two categories regardless, it has to gain an understanding of larger scale features ; how these line segments are composed. If the model successfully train on datasets that are highly fragmented, it is expected to acquire knowledge of global features. For instance, intermediate scale understanding interpretation of circles would be that the angle of line segments are gratually rotating. On the otherhand squares would have parallel line segments up to each corner where ther is a 90 degree change in the angle. 
-
-
-### Data Augmentation - Negative Labels 
-We add instances where the local features of the circle or square is preserved, but the global feature is absent and labeled them as an additional category, 'false'. We create this augmentation by masking half or 3/4 of the existing data. The intention here is to have the model learn to only categorize shapes when their global features are present. 
-
-{% include figure.html path="assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative.png" class="img-fluid" style="width:300px; height:200px;"%}
-Figure 4. Augmentation with addition of 'false' category.
-
-
-## Results 
-### Training Evaluation 
-We first want to examine if the independent variables affect the model's training on the classification task. There is the possibility that with certain filter sizes, the model may not be able to encode enough information to differentiate circles and squares. More likely there is a possibility with the augmentations that we are using to force the CNN to learn a more difficult strategy, where the model fails to train to classify instances similar to the training set to start with. If training the model is unsuccessful, it means that CNNs under those conditions are incapable of finding any strategy to differentiate the two shape categories. 
-
-### Conflict Set Evaluation 
-To test the networks ability to employ global features we borrow the approach of <d-cite key="baker2020local"></d-cite> that use "conflict examples". Conflict instances have the overall shape that aligns to its label, but the local features, such as stroke or texture do not. The premise is that it is easy for humans, that primarily use global information to differentiate shapes to successfully categorize these conflict sets. Therefore, it would be a good way to test if the trained CNNs use similar differentiating strategies as humans. 
-
-{% include figure.html path="assets/img/2023-11-09-how-cnns-learn-shapes/conflictset.png" class="img-fluid" %}
-Figure 5. Three conflict sets that obscure local features to contradict the global feature and ground truth label.
-
-We create three series of conflict sets for circle and squares that obscure its most distinguishing local features. The first set obscures the corner conditions - circles with one to two angular corners and squares with chamfered corners are included in this set. The second obscures line conditions - circles with angular lines and squares with curvy lines are created for this set. The third series targets the composition of strokes -  instead of continuous lines, we use series of parallel lines of varying angles to form a circle or square. 
-
-### Filter Variation
-
-{% include figure.html path="assets/img/2023-11-09-how-cnns-learn-shapes/filter_training.png" class="img-fluid" %}
-Figure 6. Training evalution for variations in filter size of the convolution layer.
-
-For each variation in filter size, the models trained to reach over 98.5% accuracy on the validation set. Contrary to our speculation, the filter size did not largely affect the models ability to learn the classification task. 
-
-{% include figure.html path="assets/img/2023-11-09-how-cnns-learn-shapes/filter_results.png" class="img-fluid" %}
-Figure 7. Evaluation with conflict set for variations in filter size of the convolution layer. 
-
-Overall we observe that having a large size filter at the final layer increases the model's performance on the conflict set as with filter sequence 337 and 339. We can speculate that having consistantly smaller size filters in the earlier layers and only increasing it at the end (337, 339) is better than gradually increaseing the size (357, 379). However, this is not true all the time as models with consistant size filters performed relavitely well (333, 555). Starting with a larger size filter (555, 557, 579 compared to 333, 337, 379) also helped in performance. However, this also came with an exception where 339 performced better than 559. 
-
-Overall we can see that the models have trouble classifying instances with increased degree of conflicting local features. For instance the 4th instance in set 2 obstructs all four of the perpendicular angles of a square. The 3rd and 4th instance of set 2 have the most angular ridges forming its lines and the 7th and 8th instance of set 2 have the most circluar forming its lines. From set 3, the first and second instance obstruct the gradually changing angle of strokes within the circle the most.
-
-
-### Data Augmentation Variation
-Based on the results with filter variation, we choose the filter size 555 to that performed moderately well, but still has room for improvement for the next experiment with augmented training data. 
-
-{% include figure.html path="assets/img/2023-11-09-how-cnns-learn-shapes/augment_training.png" class="img-fluid" %}
-Figure 8. Training evalution for variations in augmentation of training data.
-
-All models trained to reach over 98% accuracy on the validation set. As we speculated, the model had more difficulty in training with the augmentation as opposed to without. With the additional third negative category, the model was easier to train. This is evident with the divide in the plot with datasets that were augmented with the negative category to have higher evaluation values than the baseline and those that were only augmented with fragmented data were below the baseline. 
-
-{% include figure.html path="assets/img/2023-11-09-how-cnns-learn-shapes/augment_results.png" class="img-fluid" %}
-Figure 9. Evaluation with conflict set for variations in augmentation of training data. 
-
-The performance of models trained with augmented data on the conflict set was worse than that trained only on the original data which proves our initial hypothesis that it would be possible to enforce the network to use global features with augmented data wrong. What is interesting is how difference augmentations affect the performance. Initially, we thought that with the increased degree of fragmentation in the augmentation, the model would learn global features better, and would perform better on the conflict set. However comparison among the augmentation variations, Aug 2 showed significanly poor performance. 
-Adding a 'false' category did not boost the performance either. What is interesting is that the misclassification does not include the false label. We speculate that the model has learned to look at how much of the image is occupied. 
-
-## Conclusion
-
-The experiments in this project have shown that there isn't an obvious way to steer CNN networks to learn intended scale features with filter size variation and data augmentation. While it was difficult to find a strict correlation, the variation in performance across experiments shows that the independent variables do have an affect on the information that the network encodes, and what information reaches the end of the network to determine the output. The fact that trained models were unable to generalize to the conflict set reinforces the fact that encoding global features is difficult for CNNs and it would likely resort to classifying with smaller scale features, if there are apparent differences. 
-
-While the project seeks to entangle factors that could affect what the CNN learns, the evaluation with conflict sets does not directly review how features are processed and learned within the network. Approaches such as visualizing the activation of each neuron or layer can be more affective in this and can reveal more about how to alter the network’s sensitivity to the global features.
-
diff --git a/_posts/2022-11-10-Vision_Language_Limitations.md b/_posts/2022-11-10-Vision_Language_Limitations.md
deleted file mode 100644
index 41c31496..00000000
--- a/_posts/2022-11-10-Vision_Language_Limitations.md
+++ /dev/null
@@ -1,166 +0,0 @@
----
-layout: distill
-title: Understanding Limitations of Vision-Language Models
-date: 2022-12-01
-htmlwidgets: true
-
-
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Shelley Choi
-    affiliations:
-      name: MIT
-  - name: Siddharth Somasundaram
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2022-12-01-distill-example.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Why are vision-language models important?
-  - name: How is our work different from previous related work?
-  - name: How are these models trained?
-  - name: Could the dataset play a role in training?
-  - name: Understanding role of text labels in CLIP, GAIA
-  - name: Limitations and Potential Confounding Parameters
-  - name: Conclusions
-  - name: References
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Why are vision-language models important?
-
-The emergence of joint vision-language models such as Contrastive-Language Image Pretraining (CLIP) [1] from OpenAI, and GAIA-1 [2] from Wayve AI have had critical implications in computer vision, robotics, generative AI, self-driving, and more. The key idea of these large foundation models is that they learn meaningful data representations of labeled (text, image) pairs. Once trained, these learned representations are sufficiently versatile and can directly be deployed for a broad range of applications. Such transfer learning is referred to as zero shot learning, where the learned representations can directly be used for unseen data in a new task context without any additional training. 
-
-## How is our work different from previous related work?
-
-Many follow up works have since examined how these large vision-language models perform with respect to various scenarios. Prior works study these effects in the context of transfer learning. Jain et al. looks at how performance is examined with respect to the quality of the dataset and provides examples where the performance can be improved by removing from the source dataset [5]. This can be done by utilizing linear classifiers in a scalable and automatic manner [6]. Santurkar et al. explored the impact of language supervision in vision models, and when the pre-training dataset is sufficiently large and contains relevant captions, the model will outperform other image-only models [4]. Shen et al. investigated CLIP’s advantages in outperforming widely used visual encoders through task-specific fine-tuning and combining with vision-language model pre-training [7]. While the aforementioned literature made valuable contributions in understanding the performance of vision-language models, they do not present a clear understanding of what goes on behind the “black box” of the model’s behavior and performance.
-
-Our study is novel in that we provide a more in-depth, detailed analysis of both the impact of descriptive text (or the lack thereof) in vision-language models, in conjunction with the subtleties of dataset biases. We want to clearly visualize these variables’ impacts on model behavior and provide an explanation for such results. We specifically propose a (toy) expansion of prior work on understanding the role of text description [4]. Prior work claims that text descriptions with low variability will ensure that transferred features from CLIP models will outperform image only models. In our work, we will then examine how more descriptive text labels can help overcome biases in dataset and address domain shift.
-
-
-
-## How are these models trained?
-
-CLIP and GAIA are based on transformer architectures [3], which were originally developed for natural language processing and later adopted for computer vision as well. Two separate encoders, a text encoder and an image encoder, separately transform input data from their respective data modality into feature vectors. In aligning images and text in feature space, CLIP and GAIA are able to learn semantically meaningful and robust representations that are useful for several downstream applications. These models perform this embedding space alignment in different ways. CLIP performs training by predicting which image features correspond to which text embeddings in a batch of (image, text) pairs. GAIA is trained in an autoregressive manner, predicting the next token, given past image, text, and action states. GAIA is reported to have ~9 billion parameters and CLIP is reported to have ~63 million parameters. The differences between these two architectures are also related to the type of input data that is being analyzed. While CLIP operates on single images, GAIA is meant to be used for self-driving, meaning that it operates on videos rather than images. As a result, GAIA requires some notion of temporal consistency, which is why autoregression is a good architecture, and more parameters (since video data is more complex than image data). In this study, we will primarily focus on the CLIP architecture (shown below for convenience). 
-
-{% include figure.html path="assets/img/2022-11-10-Vision_Language_Limitations/clip_model.png" class="img-fluid" %}
-**Figure 1.** CLIP Architecture, a commonly used vision-language model [1]. (We apologize for blurring, couldn't figure out how to get rid of it).
-
-
-## Could the dataset play a role in training?
-
-The nature of the training process of CLIP models introduces questions about how robust the training procedure would be. The training relies on (image, text) pairs, but a single text phrase is not a unique description of an image, and a single text description can be used to describe many different scenes. This one-to-many mapping problem introduces questions about what the optimal text description of a given image should be, or if that optimal description even exists. Santurkar et al. [4] looks at how vision-language models such as CLIP and Simple framework for Contrastive Learning of visual Representations (SimCLR) exhibit different performance based on whether they are trained with or without captions and only images. We were inspired by the study’s suggestion that the descriptiveness of the dataset captions can directly influence how well the CLIP models transfer.  
-
-A more interesting question, that we answer in this blog post, is could having more descriptive text descriptions allow these large foundation models to mitigate or overcome dataset bias?
-
-To study this question, we consider a toy example with dogs and camels in the classic domain adaptation problem. In this context, we answer the following question:
-
-
-*Can more descriptive text labels enable better domain adaptation in vision-language models with biased datasets?*
-
-Domain adaptation is a problem in transfer learning where we want to have a model be able to learn the model in one context, and then generalize to another context. In other words, given a source domain that the model is trained on, domain adaptation is the problem of having high model performance in the target domain. In the dog vs. camel example, the domain adaptation problem occurs when we are used to seeing dogs and camels in certain contexts. For example, we generally expect to see camels in the desert and dogs in suburban environments (e.g. on the lawn, inside the house). If a model is trained to see such examples, then is suddenly shown a camel inside a house in Cambridge, the model has a strong chance of failure. Performance failure under domain shift is indicative that the model failed to disentangle background features from the camel itself. We will study whether descriptive text labels can enhance domain adaptation ability of current transformer-based foundation models.
-
-## Understanding role of text labels in CLIP, GAIA
-Due to the large model size, invisible datasets, and large number of GPU hours needed to train CLIP and GAIA, we perform an analysis in a toy setup using the domain adaptation problem we described above. Our goal is to align image and text features, and then visualize the embeddings corresponding to different image classes. 
-
-Each of the four experiments determine 1) how the models respond to dataset bias, and 2) how important the addition of descriptive text labels are in improving performance using a trade-off combination of the variables. We aim to measure and visualize the extent to which the caption aids in overcoming biases in training data. 
-
-### Architecture
-Our architecture is shown below. We have two separate transformer architectures: an image encoder and a text encoder. The output of each of these encoders is mapped to an image and text embedding, then L2-normalized. We then compute the cosine similarity of the two embeddings and use the similarity and compute a binary cross entropy loss. Note that, unlike CLIP, we do not compute similarity across all samples within a batch. We only compute cosine similarity for a sample (image, text) pair. 
-
-### Dataset
-**Image Generation.** We generated our own dataset using DALL-E 2. The total size of the training dataset is 196 images, with (1) 48 images of horses on grass, (2) 48 images of horses in the desert, (3) 48 images of camels in the desert, and (4) 48 images of camels on grass. Note that the DALL-E generated images are used for academic purposes, and are not intended for any commercial use, as required by DALL-E terms and conditions. 
-
-**Text Labels.** We had two cases: a descriptive label and an undescriptive label. In the descriptive label case, we used the following labels for each of the four cases above (1) “horse on the grass”, (2) “horse in the desert”, (3) “a camel in the desert”, (4) “camel on the grass”. In the undescriptive label case, we just used the labels (1) “horse”, (2) “horse”, (3) “camel”, (4) “camel”. 
-
-### Experiment 1: No Dataset Bias, Undescriptive Text Labels
-***Description:*** In the first experiment, we first baseline our text and images encoders ability to perform classification of camels and horses in the case when there is no dataset bias. We use all 196 images with undescriptive labels, so that there is an even split between all four cases (each case comprises ¼ of the dataset). The goal is to assess how well the model can learn and generalize across different classes, and provides the basis for the models’ inherent capabilities and performance without impact from external factors. 
-
-***Results:*** We performed Principal Component Analysis (PCA) on the feature vectors of our output from the image encoder and the text encoder in order to visualize more similar labels being mapped closer to each other. We notice that camels in desert and camels in grass are closer together in the feature space, while horses in desert and horses in grass are closer together. There is some overlap between camels in grass and horses in deserts, indicating some confusion with the context of the scene. That said, there is a very clear distinction between camels in the desert and horses in the grass, implying that the model is clearly aware that they are very different classes. The overall separation is rather decent when there is no dataset bias.
-
-{% include figure.html path="assets/img/2022-11-10-Vision_Language_Limitations/fig_2.png" class="img-fluid" %}
-**Figure 2.** Vague separation in different environments with less descriptive labels.
-
-### Experiment 2: No Dataset Bias, Descriptive Text Labels
-***Description:*** In the second experiment, we keep the dataset unbiased, but add descriptive labels. 
-
-***Results:*** In the plot below, we can see that using descriptive labels slightly improves the separation between classes in the unbiased dataset case. Specifically note the strong separation between red (camels in desert) and green (horses in grass). These two cases are easiest to distinguish, as is reflected in the scattered plot below. Interestingly, when we use descriptive text, the labels are getting bunched together based on context. In particular, horses and camels in the desert are being placed close together, while horses and camels in the grass are being placed close together. This is likely because the model is learning to use the context as a way to separate classes as well. There is still a general progression from red (camels in desert) → blue (horses in desert) → cyan (camels in grass) → green (horses in grass), suggesting some semantic smoothness in feature space. The transition between blue and cyan is rather abrupt though.
-
-{% include figure.html path="assets/img/2022-11-10-Vision_Language_Limitations/fig_3.png" class="img-fluid" %}
-**Figure 3.** Improvements in class separation with more descriptive labels.
-
-### Experiment 3: Dataset Bias, Undescriptive Text Labels
-***Description:*** In the third experiment, we begin to investigate the role of dataset bias. The goal is to build on the results from the first experiment, reproducing a common aforementioned problem of over- or under-representation in datasets. We look at how the model responds to dataset bias and whether its performance can still stay the same, regardless of how the images are distributed in classes. Dataset bias is defined by the percentage of minority samples that we remove (minority samples are horses in desert and camels in grass). For example, we originally used 48 images of horses in the desert. 25% bias is defined as using only 12 images of horses in the desert.
-
-***Results:*** These results will be jointly explained with experiment 4.
-
-### Experiment 4: Dataset Bias, Descriptive Text Labels
-***Description:*** In the fourth experiment, we dive deeper into the impact of dataset bias that we began exploring in the second experiment, and question whether performance will be improved when the provided text labels are more descriptive. This directly answers the question of how impactful descriptive text is in vision-language models, in addition to whether they can help overcome dataset bias. 
-
-***Results:*** Surprisingly, when the dataset is more biased, we find that the separation between classes is better. We believe this to be true because the model is able to identify clear separation between horses and camels based on the context alone. As a result, it is easily able to separate red and green classes as the bias increases. We notice that the minority classes (horses in desert and camels in grass) also spread out in latent space as the dataset is biased. When using descriptive labels, we notice that the blue points (horses in the desert) are able to separate themselves more from other clusters than in the undescriptive case, indicating some success with descriptive labels in the event of dataset bias. Overall, across all cases, the model generally has an easy time separating camels in the desert, which is likely due to the distinctness of the background and the object. 
-
-{% include figure.html path="assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias.png" class="img-fluid" %}
-**Figure 4.** More biased dataset can show more separation between classes.
-
-## Limitations and Potential Confounding Parameters
-There are several possible confounding parameters that may have impacted our results beyond the variables that we were looking at. They include the following: 
-
-### Dataset
-*Generating the dataset:* Because we used DALL-E to generate our dataset, the limitations of DALL-E itself can carry over to our performance. The inherent diversity of the data that DALL-E uses to train would directly impact our results, as well as the hyperparameters that were modified in training DALL-E. DALL-E could also have a specific image aesthetic that are different from real photography. 
-
-*Size:* Model performance can also be impacted by a limited dataset. We trained and validated our model on 196 images, which is not a large dataset. The confounding variable here would be the complexity of the images, where there may be less images with less clear distinctions of “horses in the grass” or “camels in the desert”. Furthermore, there are different breeds, sizes, colors, and shapes of horses and camels that may not have been fully explored due to less room for them. 
-
-*Composition sensitivity:* Literature review has shown that the model’s performance can be impacted by quality in addition to the quantity of the data [5]. Recent evidence has proved that removing data from a dataset can aid in transfer learning and improve downstream effectiveness. While we did not run experiments in identifying what specific composition and characteristics of the data should be removed, the analysis would have impacted our results.
-
-### Model
-*Computational resources:* Because we were restricted by GPU resources, we chose to use a smaller dataset and small self-trained Transformer architectures. We were also unable to train for more epochs, restricting having a more complex model architecture that could’ve lowered model performance. We found that increasing the batch size or increasing the number of layers lead our model to run out of computational power and continually crash. 
-
-*Tuning hyperparameters:* Batch size, learning rate, number of layers, optimization models, and other factors could also limit the exploration of optimal configurations and affect overall performance. For example, a higher learning rate in a model could converge faster and show higher performance, when in reality, it is not an accurate reflection of the model. Overfitting and different regularization parameters can also lead to over- or under-fitting. 
-
-## Conclusions
-
-Our toy problem gives some intuition into the idea that the descriptiveness of the label can affect the clustering profile of different datasets. Note that because our experiments were done in smaller settings, we cannot make any claims with respect to scaling up to large amounts of data, compute, and model size. That said, when adding description of the context of the images (i.e. desert vs. grass), we noticed that the points in feature space began to cluster first based on context, then based on the animal type (camel vs. horse). We also noticed that under dataset bias, the majority groups (horses in grass and camels in desert) begin to have better clustering separation. However, the minority group performance decreased, which suggests the importance of accounting for dataset bias in machine learning algorithms. In our experiments, we partially found more descriptive labels to help mitigate these negative effects, but mitigating these effects more reliably is an ongoing research direction. 
-## References
-
-1. Radford et al., *“Learning transferable visual models from natural language supervision”*, ICML 2021
-
-2. Hu et al., *“GAIA-1: A Generative World Model for Autonomous Driving”*, arXiv 2023
-
-3. Vaswani et al. *“Attention Is All You Need”*, NeurIPS 2017
-
-4. Santurkar et al., *“Is a Caption Worth a Thousand Images? A Controlled Study for Representation Learning”*, CVPR 2022
-
-5. Jain et al., *“A Data-Based Perspective on Transfer Learning”*, CVPR 2023
-
-6. Jain et al, *“Distilling Model Failures as Directions in Latent Space”*, ICLR 2023
-
-7. Shen et al. *“How Much Can CLIP Benefit Vision-and-Language Tasks?”*, arXiv 2021
-
-
diff --git a/_posts/2022-12-01-distill-example.md b/_posts/2022-12-01-distill-example.md
deleted file mode 100644
index 2d133452..00000000
--- a/_posts/2022-12-01-distill-example.md
+++ /dev/null
@@ -1,431 +0,0 @@
----
-layout: distill
-title: Sample Blog Post
-description: Your blog post's abstract.
-  This is an example of a distill-style blog post and the main elements it supports.
-date: 2022-12-01
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Albert Einstein
-    url: "https://en.wikipedia.org/wiki/Albert_Einstein"
-    affiliations:
-      name: IAS, Princeton
-  - name: Boris Podolsky
-    url: "https://en.wikipedia.org/wiki/Boris_Podolsky"
-    affiliations:
-      name: IAS, Princeton
-  - name: Nathan Rosen
-    url: "https://en.wikipedia.org/wiki/Nathan_Rosen"
-    affiliations:
-      name: IAS, Princeton
-
-# must be the exact same name as your blogpost
-bibliography: 2022-12-01-distill-example.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Equations
-  - name: Images and Figures
-    subsections:
-    - name: Interactive Figures
-  - name: Citations
-  - name: Footnotes
-  - name: Code Blocks
-  - name: Layouts
-  - name: Other Typography?
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Equations
-
-This theme supports rendering beautiful math in inline and display modes using [MathJax 3](https://www.mathjax.org/) engine.
-You just need to surround your math expression with `$$`, like `$$ E = mc^2 $$`.
-If you leave it inside a paragraph, it will produce an inline expression, just like $$ E = mc^2 $$.
-
-To use display mode, again surround your expression with `$$` and place it as a separate paragraph.
-Here is an example:
-
-$$
-\left( \sum_{k=1}^n a_k b_k \right)^2 \leq \left( \sum_{k=1}^n a_k^2 \right) \left( \sum_{k=1}^n b_k^2 \right)
-$$
-
-Note that MathJax 3 is [a major re-write of MathJax](https://docs.mathjax.org/en/latest/upgrading/whats-new-3.0.html) that brought a significant improvement to the loading and rendering speed, which is now [on par with KaTeX](http://www.intmath.com/cg5/katex-mathjax-comparison.php).
-
-
-## Images and Figures
-
-Its generally a better idea to avoid linking to images hosted elsewhere - links can break and you
-might face losing important information in your blog post.
-To include images in your submission in this way, you must do something like the following:
-
-```markdown
-{% raw %}{% include figure.html path="assets/img/2022-12-01-distill-example/iclr.png" class="img-fluid" %}{% endraw %}
-```
-
-which results in the following image:
-
-{% include figure.html path="assets/img/2022-12-01-distill-example/iclr.png" class="img-fluid" %}
-
-To ensure that there are no namespace conflicts, you must save your asset to your unique directory
-`/assets/img/2023-05-01-[SUBMISSION NAME]` within your submission.
-
-Please avoid using the direct markdown method of embedding images; they may not be properly resized.
-Some more complex ways to load images (note the different styles of the shapes/shadows):
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/9.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/7.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    A simple, elegant caption looks good between image rows, after each row, or doesn't have to be there at all.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/8.jpg" class="img-fluid z-depth-2" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/10.jpg" class="img-fluid z-depth-2" %}
-    </div>
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/11.jpg" class="img-fluid"  %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/12.jpg" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/7.jpg" class="img-fluid" %}
-    </div>
-</div>
-
-### Interactive Figures
-
-Here's how you could embed interactive figures that have been exported as HTML files.
-Note that we will be using plotly for this demo, but anything built off of HTML should work
-(**no extra javascript is allowed!**).
-All that's required is for you to export your figure into HTML format, and make sure that the file
-exists in the `assets/html/[SUBMISSION NAME]/` directory in this repository's root directory.
-To embed it into any page, simply insert the following code anywhere into your page.
-
-```markdown
-{% raw %}{% include [FIGURE_NAME].html %}{% endraw %} 
-```
-
-For example, the following code can be used to generate the figure underneath it.
-
-```python
-import pandas as pd
-import plotly.express as px
-
-df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/earthquakes-23k.csv')
-
-fig = px.density_mapbox(
-    df, lat='Latitude', lon='Longitude', z='Magnitude', radius=10,
-    center=dict(lat=0, lon=180), zoom=0, mapbox_style="stamen-terrain")
-fig.show()
-
-fig.write_html('./assets/html/2022-12-01-distill-example/plotly_demo_1.html')
-```
-
-And then include it with the following:
-
-```html
-{% raw %}<div class="l-page">
-  <iframe src="{{ 'assets/html/2022-12-01-distill-example/plotly_demo_1.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>{% endraw %}
-```
-
-Voila!
-
-<div class="l-page">
-  <iframe src="{{ 'assets/html/2022-12-01-distill-example/plotly_demo_1.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-
-## Citations
-
-Citations are then used in the article body with the `<d-cite>` tag.
-The key attribute is a reference to the id provided in the bibliography.
-The key attribute can take multiple ids, separated by commas.
-
-The citation is presented inline like this: <d-cite key="gregor2015draw"></d-cite> (a number that displays more information on hover).
-If you have an appendix, a bibliography is automatically created and populated in it.
-
-Distill chose a numerical inline citation style to improve readability of citation dense articles and because many of the benefits of longer citations are obviated by displaying more information on hover.
-However, we consider it good style to mention author last names if you discuss something at length and it fits into the flow well — the authors are human and it’s nice for them to have the community associate them with their work.
-
-***
-
-## Footnotes
-
-Just wrap the text you would like to show up in a footnote in a `<d-footnote>` tag.
-The number of the footnote will be automatically generated.<d-footnote>This will become a hoverable footnote.</d-footnote>
-
-***
-
-## Code Blocks
-
-This theme implements a built-in Jekyll feature, the use of Rouge, for syntax highlighting.
-It supports more than 100 languages.
-This example is in C++.
-All you have to do is wrap your code in a liquid tag:
-
-{% raw  %}
-{% highlight c++ linenos %}  <br/> code code code <br/> {% endhighlight %}
-{% endraw %}
-
-The keyword `linenos` triggers display of line numbers. You can try toggling it on or off yourself below:
-
-{% highlight c++ %}
-
-int main(int argc, char const \*argv[])
-{
-string myString;
-
-    cout << "input a string: ";
-    getline(cin, myString);
-    int length = myString.length();
-
-    char charArray = new char * [length];
-
-    charArray = myString;
-    for(int i = 0; i < length; ++i){
-        cout << charArray[i] << " ";
-    }
-
-    return 0;
-}
-
-{% endhighlight %}
-
-***
-
-## Diagrams
-
-This theme supports generating various diagrams from a text description using [jekyll-diagrams](https://github.com/zhustec/jekyll-diagrams){:target="\_blank"} plugin.
-Below, we generate a few examples of such diagrams using languages such as [mermaid](https://mermaid-js.github.io/mermaid/){:target="\_blank"}, [plantuml](https://plantuml.com/){:target="\_blank"}, [vega-lite](https://vega.github.io/vega-lite/){:target="\_blank"}, etc.
-
-**Note:** different diagram-generation packages require external dependencies to be installed on your machine.
-Also, be mindful of that because of diagram generation the fist time you build your Jekyll website after adding new diagrams will be SLOW.
-For any other details, please refer to [jekyll-diagrams](https://github.com/zhustec/jekyll-diagrams){:target="\_blank"} README.
-
-**Note:** This is not supported for local rendering! 
-
-The diagram below was generated by the following code:
-
-{% raw %}
-```
-{% mermaid %}
-sequenceDiagram
-    participant John
-    participant Alice
-    Alice->>John: Hello John, how are you?
-    John-->>Alice: Great!
-{% endmermaid %}
-```
-{% endraw %}
-
-{% mermaid %}
-sequenceDiagram
-participant John
-participant Alice
-Alice->>John: Hello John, how are you?
-John-->>Alice: Great!
-{% endmermaid %}
-
-***
-
-## Blockquotes
-
-<blockquote>
-    We do not grow absolutely, chronologically. We grow sometimes in one dimension, and not in another, unevenly. We grow partially. We are relative. We are mature in one realm, childish in another.
-    —Anais Nin
-</blockquote>
-
-***
-
-
-## Layouts
-
-The main text column is referred to as the body.
-It is the assumed layout of any direct descendants of the `d-article` element.
-
-<div class="fake-img l-body">
-  <p>.l-body</p>
-</div>
-
-For images you want to display a little larger, try `.l-page`:
-
-<div class="fake-img l-page">
-  <p>.l-page</p>
-</div>
-
-All of these have an outset variant if you want to poke out from the body text a little bit.
-For instance:
-
-<div class="fake-img l-body-outset">
-  <p>.l-body-outset</p>
-</div>
-
-<div class="fake-img l-page-outset">
-  <p>.l-page-outset</p>
-</div>
-
-Occasionally you’ll want to use the full browser width.
-For this, use `.l-screen`.
-You can also inset the element a little from the edge of the browser by using the inset variant.
-
-<div class="fake-img l-screen">
-  <p>.l-screen</p>
-</div>
-<div class="fake-img l-screen-inset">
-  <p>.l-screen-inset</p>
-</div>
-
-The final layout is for marginalia, asides, and footnotes.
-It does not interrupt the normal flow of `.l-body` sized text except on mobile screen sizes.
-
-<div class="fake-img l-gutter">
-  <p>.l-gutter</p>
-</div>
-
-***
-
-## Other Typography?
-
-Emphasis, aka italics, with *asterisks* (`*asterisks*`) or _underscores_ (`_underscores_`).
-
-Strong emphasis, aka bold, with **asterisks** or __underscores__.
-
-Combined emphasis with **asterisks and _underscores_**.
-
-Strikethrough uses two tildes. ~~Scratch this.~~
-
-1. First ordered list item
-2. Another item
-⋅⋅* Unordered sub-list. 
-1. Actual numbers don't matter, just that it's a number
-⋅⋅1. Ordered sub-list
-4. And another item.
-
-⋅⋅⋅You can have properly indented paragraphs within list items. Notice the blank line above, and the leading spaces (at least one, but we'll use three here to also align the raw Markdown).
-
-⋅⋅⋅To have a line break without a paragraph, you will need to use two trailing spaces.⋅⋅
-⋅⋅⋅Note that this line is separate, but within the same paragraph.⋅⋅
-⋅⋅⋅(This is contrary to the typical GFM line break behaviour, where trailing spaces are not required.)
-
-* Unordered list can use asterisks
-- Or minuses
-+ Or pluses
-
-[I'm an inline-style link](https://www.google.com)
-
-[I'm an inline-style link with title](https://www.google.com "Google's Homepage")
-
-[I'm a reference-style link][Arbitrary case-insensitive reference text]
-
-[I'm a relative reference to a repository file](../blob/master/LICENSE)
-
-[You can use numbers for reference-style link definitions][1]
-
-Or leave it empty and use the [link text itself].
-
-URLs and URLs in angle brackets will automatically get turned into links. 
-http://www.example.com or <http://www.example.com> and sometimes 
-example.com (but not on Github, for example).
-
-Some text to show that the reference links can follow later.
-
-[arbitrary case-insensitive reference text]: https://www.mozilla.org
-[1]: http://slashdot.org
-[link text itself]: http://www.reddit.com
-
-Here's our logo (hover to see the title text):
-
-Inline-style: 
-![alt text](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Logo Title Text 1")
-
-Reference-style: 
-![alt text][logo]
-
-[logo]: https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Logo Title Text 2"
-
-Inline `code` has `back-ticks around` it.
-
-```javascript
-var s = "JavaScript syntax highlighting";
-alert(s);
-```
- 
-```python
-s = "Python syntax highlighting"
-print s
-```
- 
-```
-No language indicated, so no syntax highlighting. 
-But let's throw in a <b>tag</b>.
-```
-
-Colons can be used to align columns.
-
-| Tables        | Are           | Cool  |
-| ------------- |:-------------:| -----:|
-| col 3 is      | right-aligned | $1600 |
-| col 2 is      | centered      |   $12 |
-| zebra stripes | are neat      |    $1 |
-
-There must be at least 3 dashes separating each header cell.
-The outer pipes (|) are optional, and you don't need to make the 
-raw Markdown line up prettily. You can also use inline Markdown.
-
-Markdown | Less | Pretty
---- | --- | ---
-*Still* | `renders` | **nicely**
-1 | 2 | 3
-
-> Blockquotes are very handy in email to emulate reply text.
-> This line is part of the same quote.
-
-Quote break.
-
-> This is a very long line that will still be quoted properly when it wraps. Oh boy let's keep writing to make sure this is long enough to actually wrap for everyone. Oh, you can *put* **Markdown** into a blockquote. 
-
-
-Here's a line for us to start with.
-
-This line is separated from the one above by two newlines, so it will be a *separate paragraph*.
-
-This line is also a separate paragraph, but...
-This line is only separated by a single newline, so it's a separate line in the *same paragraph*.
diff --git a/_posts/2023-11-01-Symmetry-Optimization.md b/_posts/2023-11-01-Symmetry-Optimization.md
deleted file mode 100644
index d09b09e9..00000000
--- a/_posts/2023-11-01-Symmetry-Optimization.md
+++ /dev/null
@@ -1,165 +0,0 @@
----
-layout: distill
-title: Investigating the Impact of Symmetric Optimization Algorithms on Learnability
-description: Recent theoretical papers in machine learning have raised concerns about the impact of symmetric optimization algorithms on learnability, citing hardness results from theoretical computer science. This project aims to empirically investigate and validate these theoretical claims by designing and conducting experiments as understanding the role of optimization algorithms in the learning process is crucial for advancing the field of machine learning.
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Kartikesh Mishra
-    url: ""
-    affiliations:
-      name: MIT
-  - name: Divya P Shyamal
-    url: ""
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-01-Symmetry-Optimization.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Experimental design
-    subsections:
-    - name: Learning Tasks and Datasets
-    - name: Learning Algorithms
-  - name: Evaluation Metrics
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introductions
-
-Neural networks have been a staple in Deep Learning due to their expressive power. While the architecture itself is very powerful, the process of \textit{optimizing} the neural network - i.e., finding the values of the parameters of the network that minimize the loss over training data - is approximate. After all, a neural network is a highly non-smooth function and is inherently difficult to optimize. The core idea of many of these methods is to approximate the neural network (i.e. via first or second-order approximations), which are then easier to optimize over.
-
-Our goal is to explore if incorporating "asymmetries" into optimization can help. Many methods use a similar update rule for all parameters in the network. We experiment with using different rules for different parameters, guided by heuristics.
-
-## Motivation: a couple of nice papers
-
-This project is motivated by a couple results, not necessarily in the context of neural networks. The first comes from a 2004 paper by Andrew Ng titled "Feature Selection, $$L_1$$ vs. $$L_2$$ regularization, and rotational invariance". It concerns the sample complexity of feature selection - how much training data is necessary to fit the model to some accuracy with high probability - where the number of relevant features is small compared to the total number of features. The paper shows that the mode of regularization is of utmost importance to the sample complexity: the sample complexity using $L_2$ regularization is exponential compared to the sample complexity with $L_1$ regularization. One may ask: what does this have to do with symmetry? In the case of $L_2$ regularization, the classifier remains the same even when the training data is rotated (i.e. the data is pre-multiplied by a rotation matrix). More aptly, logistic regression with $L_2$ regularization is \textit{rotationally invariant}. This is not the case for $L_1$ regularization. For the precise statements, see the theorems from the paper below:
-
-### Theorem: Sample complexity with $L_1$-regularized logistic regression
-
-Let any $\epsilon>0, \delta>0, C>0, K\geq 1$ be given, and let $0<\gamma<1$ be a fixed constant. Suppose there exist $r$ indices $1\leq i_1, i_2,\ldots i_r\leq n$, and a parameter vector $$\theta^*\in\mathbb{R}^n$$ such that only the $r$ corressponding components of $\theta^*$ are non-zero, and $$|\theta_{ij}|\leq K$$ ($j=1,\ldots r$). Suppose further that $$C\geq rK$$. Then, in order to guarantee that, with probability at least $1-\delta$, the parameters $\hat{\theta}$ output by our learning algorithm does nearly as well as $$\theta^*$$, i.e., that $$\epsilon^l(\hat{\theta})\leq \epsilon^l(\theta^*)+\epsilon,$$
-it suffices that $$m=\Omega((\log n)\cdot \text{poly}(r, K, \log(1/\delta), 1/\epsilon, C)).$$
-
-### Theorem: Sample complexity for rotationally invariant algorithms (including $L_2$-regularized logistic regression) 
-
-Let $L$ be any rotationally invariant learning algorithm, and let any $0<\epsilon<1/8, 0<\delta<1/100$ be fixed. Then there exists a learning problem $\mathscr{D}$ so that: $(i)$ The labels are determinisitically related to the inputs according to $y=1$ if $x_1\geq t$, $y=0$ otherwise for some $t$, and $(ii)$ In order for $L$ to attain $\epsilon$ or lower $0/1$ misclassification error with probability at least $1-\delta$, it is necessary that the training set size be at least $$m=\Omega(n/\epsilon)$$
-
-While this example is nice and shows us how symmetry can be harmful, it concerns the symmetry of the algorithm disregarding optimization. A 2022 paper by Abbe and Adsera specializes the effects of symmetry to neural networks trained by gradient descent (more on this later). This paper uses a notion of symmetry called \textit{G-equivariance}. See the definition below:
-
-### (Definition: $G-$equivariance) A randomized algorithm $A$ that takes in a data distribution $\mathcal{D}\in\mathcal{P}(\mathcal{X}\times\mathcal{Y})$ and outputs a function $\mathcal{A}(\mathcal{D}): \mathcal{X}\rightarrow\mathcal{Y}$ is said to be $G-$equivariant if for all $g\in G$ $$\mathcal{A}(\mathcal{D})\overset{d}{=}\mathcal{A}(g(\mathcal{D}))\circ g$$
-
-Here $g$ is a group element that acts on the data space $\mathcal{X}$, and so is viewed as a function $g:\mathcal{X}\rightarrow\mathcal{X}$, and $g(\mathcal{D})$ is the distribution of ${g(\mathbf{x}),y}$ where $(\mathbf{x}, y)\sim\mathcal{D}$
-
-More simply, an algorithm is G-equivariant if the prediction function produced by the algorithm does not vary when the data distribution is transformed according to $G$ (i.e., a group element $g$ is applied to the data distribution). Note the algorithm includes optimizing parameters: an example of a G-equivariant algorithm is learning a fully-connected neural network via SGD with Gaussian initialization, which is equivariant with respect to orthogonal transformations. More generally, neural networks trained with SGD or noisy GD hold G-equivariance. The paper claims that G-equivariant algorithms are limitted in which functions they can learn. This is stated informally in the following theorem, where the G-alignment is a (rather complicated) measure of distance:
-
-### GD lower bound, informal statement: Limitations of G-equivariant algorithms 
-
-Let $$\mathcal{D}_f\in\mathcal{P}(\mathcal{X}\times\mathbb{R})$$ be the distribution of $$(\mathbf{x}, f(\mathbf{x}))$$ for $$\mathbf{x}\sim \mu_\mathcal{X}$$. If $$\mu_\mathcal{X}$$ is $$G-$$invariant and the $$G-$$alignment of $$(\mu_\mathcal{X},f)$$ is small, then $$f$$ cannot be efficiently learned by a $G-$equivariant GD algorithm.
-
-
-
-We refer readers interested in further details and the proof of the theorem to the paper. The paper is quite nice and we encourage readers interested in theory to take a look at it. All in all, the paper suggests training neural networks with SGD is not necessarily the way to go. Therefore, we consider variants of GD that prove to perform better in practice. We first introduce gradient descent and a popular variant: Adam.
-
-
-## Overview of existing optimization algorithms
-
-### Gradient Descent
-
-The most widely-used optimization algorithms are some version of \textit{gradient descent}. Gradient descent iteratively updates the parameter values, moving the parameter in the direction of steepest descent (given by the negative of the gradient of the loss with respect to the parameter). Essentially, gradient descent uses a first-order approximation The amount by which the parameter is moved in this direction is referred to as \textit{learning rate} or step size, typically denoted by $\eta$. The update rule is given by $$\theta^{t+1}= \theta^t - \eta_t\nabla_{\theta}\mathscr{L}_{\mathscr{D}}(\theta^t)$$
-where the subscript on $\eta$ indicates a learning rate that can be changed over time. Common strategies for varying $\eta$ over time consist of decaying $\eta$, whether it be a linear or exponential decay (or something in between). In practice, \textit{stochastic} gradient descent (SGD) is used. In SGD, instead of computing the gradient for each datapoint, the gradient is approximating by taking the average of the gradients at a subset (i.e. batch) of the data. A variation of gradient descent incorporates the concept of momentum. With momentum, the increment to the parameter is a constant $$\mu$$, the momentum parameter, times the previous increment, plus the update we saw in GD: $$\eta_t\nabla_{\theta}\mathscr{L}_{\mathscr{D}}(\theta^t)$$. In other words, the increment is a weighted average of the previous increment and the typical GD update. Too high of a momentum can lead to overshooting the minimizer, analogous to how too high of a learning rate in GD can lead to divergence. 
-
-### Adam
-
-The most popular optimizer in practice is called Adam, which performs well compared to . Adam is a gradient-based method which uses the gradient as well as the squared gradient (computed from batches), as well as an exponential decay scheme, to iteratively update $\theta$. It estimates the first and second moments of the gradient from the batch computations, and uses these estimates in its update rule. Adam requires three parameters: the learning rate, and one each for the rate of exponential decays of the moment estimates of the gradients. Adam consistently outperforms standard SGD. The optimization we present is based upon Adam, with a few modifications.
-
-We briefly note that these methods are \textit{first-order methods}: they only consider first derivatives, i.e. the gradient. Second-order methods, such as Newton's method, should theoretically be better because the approximation of the function will be better. However, the computation of the Hessian is rather cumbersome in neural networks, which is why they are not typically used.
-
-### Automatic Gradient Descent
-
-Another method we consider is Automatic Gradient Descent (AGD), which is developed in recent literature (co-authored by our very own instructor, Jeremy Bernstein!). This paper attempts to get rid of the pesky hyperparameter-tuning stage that is involved in training neural networks, leading to \textit{hyperparameter transfer}. In practice, a variety of learning rates is tested during training. In addition, this learning rate may not "transfer" across architectures: if one were to make their neural network wider or deeper, they would most likely have to search for the optimal learning rate once again. Automatic Gradient Descent attempts to solve this problem by coming up with an update that is architecture-independent in the realm of MLPs. AGD operates by computing an upperbound for the loss after the update (i.e. $\mathscr{L}(\mathbf{w}+\Delta\mathbf{w})$, where $\mathbf{w}$ is the parameter we are optimizing), then optimizing this upperbound in $\Delta\mathbf{w}$ to find the best step size. This step size is then used to update the parameter, and is recalculated at each iteration. The algorithm uses spectrally-normalized weight matrices, which allows for a nice upperbound for the loss function allowing for the optimal choice of $\eta$ to be solved for (in particular, it allows for matrix inequalities involving matrix norms to be used). The algorithm is given in full below:
-{% include figure.html path="assets/img/2023-11-01-Symmetry-Optimization-project/agd.png" class="img-fluid rounded z-depth-1" %}
-
-We include AGD in this discussion because it is an asymmetric algorithm: the weights are normalized in a layer-dependent fashion. In addition, it takes a stab at alleviating the annoying task of hyperparameter tuning. We see in practice, however, that it does not perform as well as Adam. This is presumably because the approximation of the loss function via upperbounding with matrix inequalities is not tight, or maybe because the model does not incorporate biases as presented in the paper.\\
-We now begin discussion of our method, which has been crafted after studying these existing methods and taking into account the potential disbenefits of asymmetry.
-
-### Extension of AGD to regularized losses
- 
-We found the idea of AGD to be very nice, and in an attempt to understand it better, decided to explore one of the further directions listed in the paper: applying the method to regularized losses. The work in the paper applies to losses of the form $\frac{1}{N}\sum_{(x, y)}l(f_w(x), y)$. However, a more general loss includes a regularization term: $$\mathcal{L}(w)=\frac{1}{N}\sum_{(x, y)}l(f_w(x), y)+\lambda R(w)$$
-where $R(w)$ is a regularization term. For our purposes, we assume $l$ to be the squared-loss and $R(w)$ to be the $L_2$ norm of $w$. We shorthand $\frac{1}{N}\sum_{(x, y)}l(f_w(x), y)$ to $\hat{l}$. Below, we derive the learning rate, in the context of AGD (i.e. with the spectrally normalized weights and same form of update), for this regularized loss:
-
-{% include figure.html path="assets/img/2023-11-01-Symmetry-Optimization-project/reg.png" class="img-fluid rounded z-depth-1" %}
-
-We have omitted a lot of intermediary steps involving matrix inequalities and derivatives - see the paper on AGD if you are interested in the details! We remark that this choise of $\eta$ depends on $\lambda$, so hyperparameter tuning is still necessary. Some dependence on the architecture shows up in $\eta$, namely $\Tilde{d}$. However, as the network scales this parameter can stay constant. We are interested in how this will perform in practice - check the blog for updates on this!
-
-## Introducing Asymmetric Nature
-
-Our initial experiment involved a two-layer neural network (width: 1024) trained on the MNIST Dataset using three distinct learning algorithms: i) AGD (gain = 1), ii) Default Adam, and iii) Adam with diverse hyperparameters for both layers. The graph below showcases the resulting loss and accuracy. The first graph showcase loss while the second one showcase accuracy.
-{% include figure.html path="assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist.png" class="img-fluid rounded z-depth-1" %}
-{% include figure.html path="assets/img/2023-11-01-Symmetry-Optimization-project/accmnist.png" class="img-fluid rounded z-depth-1" %}
-
-Given MNIST's high accuracy even with minimal epochs, the distinction wasn't apparent. Notably, while the asymmetric algorithm matched or outperformed default Adam, fine-tuning Adam's hyperparameters yielded superior performance.
-
-Inspired by AGD's removal of the learning rate hyperparameter, we crafted two AGD variations for comparison with SGD and the original AGD.
-
-### Variation 1
-
-This variation incorporated momentum into AGD, integrating AGD's learning rate and gradient summary with momentum's past and current gradients. Surprisingly, this had minimal impact, indicating the optimality of gradient summary and learning rate.
-
-### Variation 2
-
-Here, instead of typical momentum, we introduced layer-wise asymmetry, acknowledging each layer's varying impact on loss. Adjusting each layer's learning rate inversely proportional to its number resulted in notable performance differences!
-
-Results from training under these algorithms using the cifar-10 Dataset and MSE Loss are depicted in the subsequent diagram.
-{% include figure.html path="assets/img/2023-11-01-Symmetry-Optimization-project/losscifar.png" class="img-fluid rounded z-depth-1" %}
-{% include figure.html path="assets/img/2023-11-01-Symmetry-Optimization-project/acccifar.jpg" class="img-fluid rounded z-depth-1" %}
-
-### Evaluation Metrics
-
-Emphasizing learnability, we adopt the ordering concept over exact measures. Algorithm $A_1$ is deemed superior to $A_2$ if its expected learning ability (distinguishing correct/incorrect classifications) surpasses $A_2$. This learning ability, resembling a Beta distribution, hinges on directly propotional to current accuracy. Therefore, we made our evaluation on accuracy and loss graph over epochs.
-
-## Conclusion
-Our blog offers insights into optimizing neural networks and advocates for the potential benefits of asymmetry in training processes. We trust you found our journey as engaging as we did in developing it!
-
-## Citations
-Ng, Andrew Y. ”Feature selection, L 1 vs. L 2 regularization, and rotational invariance.” Proceedings of the
-twenty-first international conference on Machine learning. 2004.
-
-Bernstein, Jeremy, et al. ”Automatic Gradient Descent: Deep Learning without Hyperparameters.” arXiv
-preprint arXiv:2304.05187 (2023).
-
-Bernstein, Jeremy, et al. ”Automatic Gradient Descent: Deep Learning without Hyperparameters.” arXiv
-preprint arXiv:2304.05187 (2023).
-
-Kingma, Diederik P., and Jimmy Ba. ”Adam: A method for stochastic optimization.” arXiv preprint
-arXiv:1412.6980 (2014).
-
-Philipp, George, Dawn Song, and Jaime G. Carbonell. ”The exploding gradient problem demystified-
-definition, prevalence, impact, origin, tradeoffs, and solutions.” arXiv preprint arXiv:1712.05577 (2017).
\ No newline at end of file
diff --git a/_posts/2023-11-01-Visualization of CLIP's Learning and Perceiving Dynamics.md b/_posts/2023-11-01-Visualization of CLIP's Learning and Perceiving Dynamics.md
deleted file mode 100644
index e0e947ba..00000000
--- a/_posts/2023-11-01-Visualization of CLIP's Learning and Perceiving Dynamics.md	
+++ /dev/null
@@ -1,377 +0,0 @@
----
-layout: distill
-title:  Alive Scene
-description: Inspired by the captivating Enchanted Portraits of the Harry Potter universe, my project unveils an innovative AI pipeline that transcends traditional scene-capture methods. Rather than merely recording scenes as a sequence of static images, this pipeline is intricately designed to interpret and articulate the dynamic behavior of various elements within a scene by utilizing CLIP semantic embeddings. This nuanced understanding enables the scenes to evolve autonomously and organically, mirroring the fluidity and spontaneity of living entities.
-date: 2023-12-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Chi-Li Cheng
-    url: "https://chilicheng.com"
-    affiliations:
-      name: Massachusetts Institute of Technology
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-09-Alive-Scene.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Project Proposal
-    subsections:
-    - name: Abstract
-    - name: Introduction
-    - name: Methodology
-    - name: Potential Contributions
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p, .fake-img figcaption {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Enchanting Images with Semantic Embedding
-"Alive Scene" is an advanced AI-driven project that revolutionizes the concept of scene capture, drawing inspiration from the enchanting, ever-changing portraits in the Harry Potter series. This innovative pipeline goes beyond traditional methods of capturing scenes as static images. Instead, it delves deep into the semantic understanding of each scene, enabling it to not only recreate these scenes with high fidelity but also to imbue them with the ability to act, evolve, and respond autonomously.
-
-
-
-
-The following GIF image on the right is the output from the Alive Scene Pipeline. Notice that these scenes start from the same status.
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        <figure>
-            {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/7cFU.gif" class="img-fluid rounded z-depth-1" %}
-            <div class="caption">
-                Hogwarts Portraits
-            </div>
-        </figure>
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        <figure>
-            {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/allt5.gif" class="img-fluid rounded z-depth-1" %}
-            <div class="caption">
-                Alive Scene captures cats' napping behaviors
-            </div>
-        </figure>
-    </div>
-</div>
-
-The core of this project lies in its sophisticated AI algorithms that analyze and interpret the nuances of each scene, from the physical elements to the underlying emotions and narratives. This enables the system to generate dynamic, lifelike representations that are far from static images. These AI-crafted scenes possess the unique ability to change organically over time, reflecting the natural progression and evolution one would expect in real life.
-
-Through "Alive Scene," portraits and scenes are no longer mere representations; they become entities with a semblance of life, capable of exhibiting behaviors and changes that mirror the fluidity and spontaneity of living beings. There are three elements in this project, the first is using CLIP model as encoder to compress image into clip embeddings. Second, train a generator to reconstruct the original image from the CLIP embedding. then train a behavior model to lean the behavior of clip embeddings in the clip feature space; the behavior will use to drive the generator; making the scene representation alive. The following is the diagrams of the pipeline.
-<div class="col-sm mt-3 mt-md-0">
-    <figure>
-        {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline.jpg" class="img-fluid rounded z-depth-1" %}
-        <div class="caption">
-            Alive Scene Pipeline
-        </div>
-    </figure>
-</div>
-
-### Introduction
-
-The CLIP (Contrastive Language–Image Pre-training) model<d-cite key="radford2021learning"></d-cite>, represents a groundbreaking approach in integrating visual and textual data within the realm of artificial intelligence. In this project, it plays and important role to comprehend the scenario and characters' behaviors in the scene.
-Detailed investigations<d-cite key="wang2020understanding"></d-cite> <d-cite key="shi2023understanding"></d-cite> <d-cite key="zhao2017exact"></d-cite> offers insightful understanding of the model's operations, showing the potential that CLIP embeddings could make a machine comprehend and compress complex information of images.
-
-The study<d-cite key="author2021cinn"></d-cite> explores using conditional Invertible Neural Networks (cINNs) for transforming still images into videos, highlighting cINNs' prowess in handling static to dynamic content transitions. Although proficient in capturing motion, the model's grasp on object/event types may benefit from CLIP embeddings enhancement. My project, unlike this work, aims to animate static scene representations with self-driven behaviors, not just manipulate videos.
-
-Another significant work, "Make-A-Video"<d-cite key="singer2022makeavideo"></d-cite>, introduces a text-to-video generation method utilizing text-to-image models. This approach circumvents the need for text-video paired data, learning from text-image data and unsupervised videos. It employs a spatiotemporal diffusion model and super-resolution techniques for high-quality video creation from text. My project differs, focusing on bringing life to existing videos or image sequences, rather than generating new content from text.
-
-Despite the static background, the cats' movements are so subtle that they pose a challenge for human observers to distinguish differences between frames. To visualize the clip embeddings of the frames from the video, I employ both UMAP and t-SNE<d-cite key="maaten2008tsne"></d-cite> techniques for gaining more insights.
-
-
-
-<div class="col-sm mt-3 mt-md-0">
-    <figure>
-        {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/latent_umap.gif" class="img-fluid rounded z-depth-1" %}
-        <div class="caption">
-            UMAP Visualization
-        </div>
-    </figure>
-</div>
-<div class="col-sm mt-3 mt-md-0">
-    <figure>
-        {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/latent_tsne.gif" class="img-fluid rounded z-depth-1" %}
-        <div class="caption">
-            t-SNE Visualization
-        </div>
-    </figure>
-</div>
-
-The behavior over time resembles a 'spaghetti' pattern, indicating that certain scenarios or behaviors may recur (as seen in the crossings or interactions within the spaghetti diagram). Some intersecting points demonstrate similar tendencies, while others are more unpredictable, highlighting the complexity of the video.
-
-Both visualizations provide a promising sign: the end and start frames are positioned close to those in the middle. This proximity allows the Alive Scene to operate seamlessly and endlessly. For example, when the Alive Scene approaches a point near the end, it can smoothly transition to a frame somewhere in the middle. Similarly, when it encounters a region where different frames cluster together, it has a variety of options to choose from for its next move. This flexibility is key to making the Alive Scene function effectively.
-
-### Generator
-
-The Generator (decoder) is a SIREN model, which employs CLIP semantic embeddings and positional embeddings of pixel coordinates to generate RGB colors<d-cite key="sitzmann2019siren"></d-cite>. SIRENs, or Sinusoidal Representation Networks, diverge from traditional neural networks by utilizing sinusoidal activation functions instead of common ones like ReLU. These networks are adept at implicitly representing intricate data patterns, making them particularly advantageous for tasks that involve complex spatial structures or continuous data. The incorporation of periodic activation functions in SIRENs can significantly enhance deep learning capabilities, especially in fields such as computer vision and generative models.
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER.jpg" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        a SIREN model as the generator
-    </div>
-</figure>
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/tp_siren.gif" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        the training progression
-    </div>
-</figure>
-
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/generated_.gif" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        Original Video vs Generated Video
-    </div>
-</figure>
-
-
-The code of the generator model (SIREN)
-```python
-class SineLayer(nn.Module):
-
-    def __init__(self, w0):
-        super(SineLayer, self).__init__()
-        self.w0 = w0
-
-    def forward(self, x):
-        return torch.sin(self.w0 * x)
-
-
-class Siren(nn.Module):
-    def __init__(self, w0=20, in_dim=560, hidden_dim=256, out_dim=3):
-        super(Siren, self).__init__()
-
-        self.net = nn.Sequential(nn.Linear(in_dim, hidden_dim), SineLayer(w0),
-                                 nn.Linear(hidden_dim, hidden_dim), SineLayer(w0),
-                                 nn.Linear(hidden_dim, hidden_dim), SineLayer(w0),
-                                 nn.Linear(hidden_dim, hidden_dim), SineLayer(w0),
-                                 nn.Linear(hidden_dim, out_dim))
-
-        # Init weights
-        with torch.no_grad():
-            self.net[0].weight.uniform_(-1. / in_dim, 1. / in_dim)
-            self.net[2].weight.uniform_(-np.sqrt(6. / hidden_dim) / w0,
-                                        np.sqrt(6. / hidden_dim) / w0)
-            self.net[4].weight.uniform_(-np.sqrt(6. / hidden_dim) / w0,
-                                        np.sqrt(6. / hidden_dim) / w0)
-            self.net[6].weight.uniform_(-np.sqrt(6. / hidden_dim) / w0,
-                                        np.sqrt(6. / hidden_dim) / w0)
-            self.net[8].weight.uniform_(-np.sqrt(6. / hidden_dim) / w0,
-                                        np.sqrt(6. / hidden_dim) / w0)
-
-    def forward(self, x):
-        return self.net(x)
-
-
-class MLP(nn.Module):
-    def __init__(self, in_dim=2, hidden_dim=256, out_dim=1):
-        super(MLP, self).__init__()
-
-        self.net = nn.Sequential(nn.Linear(in_dim, hidden_dim), nn.ReLU(),
-                                 nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
-                                 nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
-                                 nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
-                                 nn.Linear(hidden_dim, out_dim))
-
-    def forward(self, x):
-        return self.net(x)
-
-
-def train(model, model_optimizer, nb_epochs=15000):
-    psnr = []
-    for _ in tqdm(range(nb_epochs)):
-        model_output = model(pixel_coordinates)
-        loss = ((model_output - pixel_values) ** 2).mean()
-        psnr.append(20 * np.log10(1.0 / np.sqrt(loss.item())))
-
-        model_optimizer.zero_grad()
-        loss.backward()
-        model_optimizer.step()
-
-    return psnr, model_output
-```
-### Behavior model
-
-This project introduces a customized asymmetrical Variational Autoencoder (VAE)<d-cite key="kingma2014autoencoding"></d-cite> as the probabilistic model to predict motion within the CLIP embedding space. A VAE-like model may prove beneficial for this task for two primary reasons. Firstly, they are adept at learning a continuous, smooth latent space, facilitating efficient interpolation and manipulation of data representations. Given that the training data derives from a video, it is inherently sequential and should be represented in a continuous fashion. Secondly, VAEs utilize amortized inference, where the encoder is trained to generalize the mapping of inputs to the latent space across the dataset, as opposed to conducting inference anew for each input. For this project, the objective is to devise a method that allows for a smooth navigation within the observed embedding space.
-
-The code of the behavior model (VAE)
-```python
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# BehaviorModel(inspired by VAE)
-class BehaviorModel(nn.Module):
-    def __init__(self, input_dim=512, latent_dim=256):
-        super(VAE, self).__init__()
-
-        # Encoder
-        self.fc1 = nn.Linear(input_dim, 400)
-        self.bn1 = nn.BatchNorm1d(400)
-        self.fc2 = nn.Linear(400, 300)
-        self.bn2 = nn.BatchNorm1d(300)
-        self.fc21 = nn.Linear(300, latent_dim)  # Mean
-        self.fc22 = nn.Linear(300, latent_dim)  # Log variance
-        self.dropout = nn.Dropout(0.55)
-
-        # Decoder
-        self.fc3 = nn.Linear(latent_dim, 300)
-        self.bn3 = nn.BatchNorm1d(300)
-        self.fc4 = nn.Linear(300, 400)
-        self.bn4 = nn.BatchNorm1d(400)
-        self.fc5 = nn.Linear(400, input_dim)
-
-    def encode(self, x):
-        h1 = F.relu(self.bn1(self.fc1(x)))
-        h2 = F.relu(self.bn2(self.fc2(h1)))
-        h2 = self.dropout(h2)
-        return self.fc21(h2), self.fc22(h2)
-
-    def reparameterize(self, mu, logvar):
-        std = torch.exp(0.5 * logvar)
-        eps = torch.randn_like(std)
-        return mu + eps * std
-
-    def decode(self, z):
-        h3 = F.relu(self.bn3(self.fc3(z)))
-        h4 = F.relu(self.bn4(self.fc4(h3)))
-        return F.tanh(self.fc5(h4))
-
-    def forward(self, x):
-        mu, logvar = self.encode(x.view(-1, 512))
-        z = self.reparameterize(mu, logvar)
-        return self.decode(z), mu, logvar
-
-# Loss function
-def loss_function(recon_x, x, mu, logvar):
-    BCE = F.binary_cross_entropy(recon_x, x.view(-1, 512), reduction='sum')
-    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
-    return BCE + KLD
-
-def loss_function(recon_x, x, mu, logvar):
-    # Use Mean Squared Error for the reconstruction loss
-    MSE = F.mse_loss(recon_x, x.view(-1, 512), reduction='sum')
-
-    # KLD is unchanged
-    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
-
-    return MSE + KLD
-```
-The process begins with a CLIP embedding as the input, which is then transformed by the model to output a motion vector. This vector retains the same dimensions as the CLIP embedding and is utilized to alter the original embedding, facilitating the generation of the subsequent frame based on this modified embedding.
-
-
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel.jpg" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        the Asymmetrical VAE
-    </div>
-</figure>
-
-In this case, I generate 200 frames for training; the number is quite small. To enhance the model's learning efficacy, new data points are generated through linear interpolation between existing data points (frames). By doing this, I generated 1000 clip embeddings and frames. These newly created samples undergo normalization to conform to the geometric constraints of the CLIP embedding space, often characterized as a hypersphere. This normalization process ensures that the interpolated data points adhere to the distribution pattern of the original embeddings. As depicted in the diagram, this technique leads to a densified clustering of data points in close proximity to the original embeddings, which is advantageous. It implies a higher confidence in the authenticity of these new points due to their closeness to the authentic, or ground truth, data.
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation.png" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        Gaining more data points by Interpolation
-    </div>
-</figure>
-
-When operating the process that animates the Alive Scene, it occasionally generates artifacts. This may be caused by certain movements that deviate significantly from the observed reality. Please refer to the following GIF for an example.
-
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/broken.gif" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        Artifacts
-    </div>
-</figure>
-
-To resolve the issue, I have developed a post-processing technique that stabilizes the outcomes. The process begins by re-normalizing the resulting embedding onto the hypersphere. Following this, a weighted parameter is introduced to draw the vector incrementally toward the domain of previously observed CLIP embeddings. For example, if the weighting parameter is set to 0.1 for the observed embedding, it would be scaled by 0.1, while the predicted embedding is scaled by 0.9. These two are then summed to produce a final embedding that, while primarily influenced by the prediction, retains a subtle alignment with the observed data. This weighted approach aims to mitigate artifacts by anchoring the predictions within the realm of observed realities.
-
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing.jpg" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        Post-curing method
-    </div>
-</figure>
-
-By applying this method, the Alive Scene has started to yield more stable results. Interestingly, the outcomes are varied, exhibiting behaviors akin to a living creature — somewhat unpredictable yet within a framework of predictability.
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/allt5.gif" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        4 different versions
-    </div>
-</figure>
-
-### Manipulation
-The Alive Scene operates autonomously, and to explore the modulation of its behavior, I have introduced the concept of 'temperature.' This concept acts as a coefficient that scales the movement vector, thereby allowing the scene to exhibit behaviors that are either more expansive and varied, or more constrained and subtle, depending on the temperature setting.
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE.jpg" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        4 different versions
-    </div>
-</figure>
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/vt.gif" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        different temperature settings
-    </div>
-</figure>
-
-
-### Conclusion
-
-The "Alive Scene" project signifies a profound achievement in the domain of Deep Learning for scene representation. It leverages CLIP semantic embeddings to decode and imbue scenes with lifelike attributes, while also seamlessly integrating the potent SIREN model as a generator, capable of breathing vitality into the processed embeddings by producing authentic images.
-
-Furthermore, the project implements an asymmetric Variational Autoencoder (VAE) to predict and model motion within the CLIP embedding space, thereby enhancing the dynamism and fluidity of the scenes.
-
-However, the significance of this undertaking extends well beyond its technical accomplishments. By giving birth to scenes that autonomously and organically evolve, the project ushers in a transformative era of possibilities in digital storytelling and interactive media, fundamentally reshaping the landscape of creative expression in the digital realm.
-
-
-
-### Future Work
-
-In this project, a SIREN model is trained to create a 2D scene representation. This model can be extended to generate a 3D scene by simply adding an additional output node to adopt the Neural Radiance Field (NeRF)<d-cite key="mildenhall2020nerf"></d-cite> architecture. Such an enhancement allows the 3D Alive Scene to offer a more immersive and complex scene representation. Looking ahead, it's conceivable that a non-player character (NPC) could be manipulated in this manner, especially if the model, when trained on a vast dataset, can learn more sophisticated behaviors. This approach has the potential to encapsulate all necessary information within a highly compact model, offering an extremely lightweight solution for dynamic scene generation.
-
-<figure>
-    {% include figure.html path="assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d.jpg" class="img-fluid rounded z-depth-1" %}
-    <div class="caption">
-        extend the model for a 3D Scene
-    </div>
-</figure>
-
-### Potential Usages and Contributions:
-
-Digital Art and Entertainment: This project can revolutionize digital art and entertainment by offering dynamic, evolving scenes that enhance animations and virtual experiences.
-
-Film and Animation: It can automate the generation of realistic backgrounds, streamlining the production process for films and animated content.
-
-Advertising and Marketing: The project offers the capability to create interactive, dynamic advertising content, thereby engaging audiences more effectively.
-
-Behavioral Studies: It provides a tool for in-depth analysis of human and animal behaviors, supporting research in fields such as psychology, ethology, and anthropology.
-
-Cultural Preservation: This technology can enliven historical scenes or artworks in museums, offering visitors more immersive and engaging experiences.
-
-Data Visualization: It introduces innovative methods for interacting with and interpreting complex data, useful in sectors like finance and healthcare.
-
-Gaming: The project enables the creation of NPCs with realistic behaviors, significantly enhancing the gaming experience.
-
-Architecture and Engineering: It can be applied for dynamic visualizations in architectural and engineering projects, aiding in design and planning.
-
-Conservation: This technology can contribute to wildlife conservation by facilitating the study of animal behaviors in natural settings.
diff --git a/_posts/2023-11-01-latent-interpolation.md b/_posts/2023-11-01-latent-interpolation.md
deleted file mode 100644
index c887ac4a..00000000
--- a/_posts/2023-11-01-latent-interpolation.md
+++ /dev/null
@@ -1,196 +0,0 @@
----
-layout: distill
-title: Exploring the latent space of text-to-image diffusion models
-description: In this blog post we explore how we can navigate through the latent space of stable diffusion and using interpolation techniques.
-date: 2023-12-01
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Luis Henrique Simplicio Ribeiro
-    affiliations:
-      name: Harvard University
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-01-latent-interpolation.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-#toc:
-  #- name: Equations
-  #- name: Images and Figures
-  #  subsections:
-  #  - name: Interactive Figures
-  #- name: Citations
-  #- name: Footnotes
-  #- name: Code Blocks
-  #- name: Layouts
-  #- name: Other Typography?
-
-toc:
-  - name: Introduction
-  - name: Background and related work
-  - name: Method
-  - name: Analysis
-  - name: Conclusion
-  #- name: Images and Figures
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-Diffusion models <d-cite key="ho2020denoising"></d-cite> are a class of deep generative models that have shown promising results in many different tasks, including photorealistic image generation <d-cite key="saharia2022photorealistic"></d-cite> <d-cite key="ramesh2022hierarchical"></d-cite> and protein design <d-cite key="watson2023novo"></d-cite> <d-cite key="lee2023score"></d-cite>. Diffusion models work by gradually destroying structure of an object with $T$ steps of a fixed noising process, and then learning to reverse this process to recover the original object. This allows the model to learn the underlying structure of the data, and to generate new objects that are both realistic and diverse. The forward process $q( x_t \| x_{t-1} )$ defines how noise is added to an original image $x_0$, and the reverse process $q( x_{t-1} \| x_{t} )$ that we want to learn, can recover a less noisy version of an image. 
-
-{% include figure.html path="assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse.jpeg" class="img-fluid" %}
-
-Stable Diffusion (SD) <d-cite key="rombach2022high"></d-cite> is an open-source latent text-to-image diffusion model which is able to realize images with fine grained details, when prompted with a textual input describing the desired characteristics of the output image. SD is reasonably fast compared to other diffusion models, since it performs the diffusion steps in a low dimensional latent space. The strategy consists of  using an image encoder $\mathcal{E}: \mathcal{X} \rightarrow \mathcal{Z}^0$ which maps an image $x_0 \in \mathcal{X}$ to a lower dimensional image latent code $z_0 \in \mathcal{Z}^0$, and a latent decoder $\mathcal{D}: \mathcal{Z}^0 \rightarrow \mathcal{X}$ which recovers an image $\mathcal{D}(z_0)$ from the image latent code $z_0$. Using these two models it is possible to learn to denoise $z_T$, instead of $x_T$, which is also normally distributed, saving a lot in computing since the latent codes dimensionality are usually chosen to be much smaller than the original images dimensionality. During inference time, for a given input textual prompt $y$, we encode the prompt into a vector $s = \tau_\phi(y)$ using CLIP <d-cite key="radford2021learning"></d-cite>, sample $z_T \sim \mathcal{N}(0, I)$, and provide these two tensors to the diffusion model $f_\theta: \mathcal{Z}^T \times \mathcal{S} \rightarrow \mathcal{Z}^0$, which generates $z_0 = f_\theta(z_T, s)$. We can then map this vector into an image using the decoder: $x_0 = \mathcal{D}(z_0)$ which hopefully is in the data distribution.
-
-## Background and related work
-In order to be able to learn the complex interaction between textual descriptions and images coming from a very large multimodal dataset, SD has to organize its image latent space $\mathcal{Z}^T$ coherently. If the learned representations are smooth for instance, we could expect that $\mathcal{D}(f_\theta(z_T, s))$ and $\mathcal{D}(f_\theta(z_T +  \epsilon, s))$, where $\epsilon$ is a tensor of same dimensionality as $z_T$ with values very close to 0, will be very similar images. A common technique to explore and interpret the latent space of generative models for images is to perform latent interpolation between two initial latent codes, and generate the $N$ images corresponding to each of the interpolated tensors. If we sample $z_\text{start}, z_\text{end} \sim \mathcal{N}(0, I)$, fix a textual prompt such that $s = \tau_\phi({y})$ and use SD to generate images conditioned on the textual information we could explore different techniques for generating interpolated vectors. A very common approach is linear interpolation, where for $\gamma \in [0, 1]$ we can compute:
-
- $$z_\text{linear}^{(\gamma)} = (1-\gamma)z_\text{start} + \gamma z_\text{end}$$
-
- Mimicking these exact steps for three different pairs sampled latent codes for $(z_\text{start}, z_\text{end})$, and for each of them fixing a text prompt we get:
-
- {% include figure.html path="assets/img/2023-11-01-latent-interpolation/latent_interpolation.jpeg" class="img-fluid" %}
-
- As we can see from the image, when we move away from both $z_\text{start}$ and $z_\text{end}$ we get blurred images after decoding the interpolated image latent codes, which have only high level features of what the image should depict, but no fine grained details, for $\gamma = 0.5$ for instance, we get:
- {% include figure.html path="assets/img/2023-11-01-latent-interpolation/extreme_case.jpg" class="img-fluid" %}
-
- In contrast, if we perform interpolation in the text space by sampling $z_T \sim \mathcal{N}(0, I)$, which is kept fixed afterwards, and interpolating between two text latent codes $s_\text{start} = \tau_\phi(y_\text{start})$ and $s_\text{end} = \tau_\phi(y_\text{end})$, we get something more coherent:
-
- {% include figure.html path="assets/img/2023-11-01-latent-interpolation/text_interpolation.jpeg" class="img-fluid" %}
-
- Latent interpolation is a very common technique in Machine Learning, particularly in generative models, <d-cite key="gomez2018automatic"></d-cite> used interpolation in the latent space of a Variational Autoencoder (VAE) <d-cite key="kingma2013auto"></d-cite> to generated molecules between two initial ones by encoding them in the VAE latent space, interpolating between them and using the decoder to obtain the molecules from the latents, <d-cite key="upchurch2017deep"></d-cite> showed how interpolation can be used to perform semantic transformations on images, by changing features of a CNN. More broadly interpolation has also been studied in a probabilistic point of view <d-cite key="lesniak2018distribution"></d-cite>, evaluating how different techniques might generate out of distribution samples, which we explore later in this blog post.
-
-In this project we explore geometric properties of the image latent space of Stable Diffusion, gaining insights of how the model organizes information and providing strategies to navigate this very complex latent space. One of our focuses here is to investigate how to better interpolate the latents such that the sequence of decoded images is coherence and smooth. Depending on the context, the insights here could transferred to other domains as well if the sampling process is similar to the one used in SD. The experiments are performed using python and heavily relying on the PyTorch <d-cite key="paszke2019pytorch"></d-cite>, Transformers <d-cite key="wolf-etal-2020-transformers"></d-cite> and Diffusers <d-cite key="von-platen-etal-2022-diffusers"></d-cite> libraries.
-
-## Method
-
-In this section we compare several interpolation techniques. For reproducibility reasons we ran the experiments with the same prompt and sample latent vectors across different. We use Stable Diffusion version 1.4 from CompVis with the large CLIP vision transformer, the DPMSolverMultistepScheduler <d-cite key="lu2022dpm"></d-cite>, 30 inference steps and a guidance scale of 7.5 <d-cite key="dhariwal2021diffusion"></d-cite>. We use the prompt "An high resolution photo of a cat" and seed = 1 to generate both $z_\text{start}$ and $z_\text{end}$. The corresponding generated pictures are shown below:
-
-{% include figure.html path="assets/img/2023-11-01-latent-interpolation/endpoint_images.jpeg" class="img-fluid" %}
-
-### Linear Interpolation
-
-Although linear interpolation is still a very commonly used interpolation technique, it is known that is generates points which are not from the same distribution than the original data points <d-cite key="agustsson2018optimal"></d-cite> depending on the original distribution of the points being interpolated. Particularly, for $z_{\text{start}}, z_{\text{end}} \sim \mathcal{N}(0, I)$ and $\gamma \in [0,1]$, we have:
-
-$$z_\text{linear}^{(\gamma)} = (1-\gamma)z_\text{start} + \gamma z_\text{end}$$
-
-
-Hence:
-
-$$\begin{eqnarray} 
-\mathbb{E}\left[z_\text{linear}^{(\gamma)}\right] &=& \mathbb{E}\left[(1-\gamma)z_\text{start} + \gamma z_\text{end}\right] \nonumber \\
-&=& \mathbb{E}[(1-\gamma)z_\text{start}] + \mathbb{E}[\gamma z_\text{end}] \nonumber \\
-&=& (1-\gamma)\mathbb{E}[z_\text{start}] + \gamma \mathbb{E}[z_\text{end}]    \nonumber \\
-&=& 0   \nonumber
-\end{eqnarray}$$
-
-Therefore, the mean stays unchanged, but the variance is smaller than 1 for $\gamma \in (0,1)$:
-
-$$\begin{eqnarray} 
-\text{Var}[z_\text{linear}^{(\gamma)}] &=& \text{Var}[(1-\gamma)z_\text{start} + \gamma z_\text{end}]      \nonumber \\
-&=& \text{Var}[\gamma z_\text{start}] + \text{Var}[(1-\gamma)z_\text{end}] \nonumber \\
-&=& \gamma^2\text{Var}[z_\text{start}] + (1-\gamma)^2\text{Var}[z_\text{end}]   \nonumber \\
-&=& \gamma(2\gamma - 2)I + I   \nonumber \\
-&=& (\gamma(2\gamma - 2) + 1)I  \nonumber
-\end{eqnarray}$$
-
-{% include figure.html path="assets/img/2023-11-01-latent-interpolation/linear_interpolation.jpeg" class="img-fluid" %}
-
-Given that the sum of two independent Gaussian distributed random variables results in a Gaussian distributed random variable, $z_\text{linear}^{(\gamma)} \sim \mathcal{N}(0, (\gamma(2\gamma - 2) + 1)I)$. This shows how the distribution of the interpolated latent codes change. To further understand the effect of this shift, we can use the interactive figure below. Where for $\text{std} \in [0.5, 1.5]$ we generate an image using the embedding $\text{std} \, z_\text{start}$:
-
-<iframe src="{{ 'assets/html/2023-11-01-latent-interpolation/variance.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-
-
-### Normalized linear interpolation
-
-As shown before, linear interpolation is not a good technique for interpolation random variables which are normally distributed, given the change in the distribution of the interpolated latent vectors. To correct this distribution shift, we can perform a simply normalization of the random variable. We will refer this this as normalized linear interpolation. For $\gamma \in [0,1]$ we define $z_\text{normalized}^{(\gamma)}$ as:
-
-$$z_\text{normalized}^{(\gamma)} = \dfrac{z_\text{linear}^{(\gamma)}}{\sqrt{(\gamma(2\gamma - 2) + 1)}} \implies z_\text{normalized}^{(\gamma)} \sim \mathcal{N}(0, I)$$
-
-Now, as we move further way from the endpoints $z_\text{start}$ and $z_\text{end}$, we still get coherent output images:
-
-{% include figure.html path="assets/img/2023-11-01-latent-interpolation/normalized_interpolation.jpeg" class="img-fluid" %}
-
-
-### SLERP
-
-Spherical Linear Interpolation (Slerp) <d-cite key="shoemake1985animating"></d-cite>, is a technique used in computer graphics and animation to smoothly transition between two orientations, especially rotations. If we let $\phi = \text{angle}(z_\text{start}, z_\text{start})$, then for $\gamma \in [0,1]$, the interpolated latent is defined by:
-
-$$\text{slerp}(z_\text{start}, z_\text{end}; t) = \dfrac{\sin((1-\gamma)\phi)}{\sin(\phi)}z_\text{start} + \dfrac{\sin(\gamma\phi)}{\sin(\phi)}z_\text{end}$$
-
-where $\phi$ is the angle between $z_\text{start}$ and $z_\text{end}$. The intuition is that Slerp interpolates two vectors along the shortest arc. We use an implementation of Slerp based on Andrej Karpathy <d-cite key="Karpathy2022"></d-cite>. As we can see from the images below, slerp generates very good quality interpolated vectors.
-
-{% include figure.html path="assets/img/2023-11-01-latent-interpolation/slerp_interpolation.jpeg" class="img-fluid" %}
-
-If we compare the obtained results with normalized linear interpolation we see that the generated images are very similar, but as opposed to normalized linear interpolation, we cannot easily theoretically analyze the distribution of generated latents. To have some intuition behind how these different techniques interpolate between two vectors and can sample and fix two vectors sampled from a 2-dimensional normal distribution. We can visualize how these trajectories compare with each other:
-
-{% include figure.html path="assets/img/2023-11-01-latent-interpolation/interpolations_comparison.png" class="img-fluid" %}
-
-### Translation
-
-
-To further investigate some properties of the latent space we also perform the following experiment. Let $z_\text{concat} \in \mathbb{R}^{4 \times 64 \times 128}$ be the concatenation of $z_\text{start}$ and $z_\text{end}$ over the third dimension. We will denote by $z_\text{concat}[i, j, k] \in \mathbb{R}$ as a specific element of the latent code and $:$ as the operator that selects all the elements of that dimension and $m:n$ the operator that selects from elements $m$ to element $n$ of a specific dimension. We can create a sliding window over the concatenated latent and generated the corresponding images. We define the translation operator $\mathcal{T}$ such that $\mathcal{T}(z_\text{concat}; t) = z_\text{concat}[:, :, t:64+t]$, which is defined for $t = \{0, \cdots, 64\}$. The sequence of generated images can be visualized below using our interactive tool:
-
-<iframe src="{{ 'assets/html/2023-11-01-latent-interpolation/translation.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-
-Surprisingly, we note that applying $\mathcal{T}$ to our concatenated latent code is materialized into a translation in image space as well. But not only the object translates, we also see changes in the images style, which is justified by changing some of the latent dimensions.
-
-We can correct this behavior by mixing the two latent codes only in a single slice of the latent code. Let $\mathcal{C}(z_\text{start}, z_\text{end}; t)$ represent the concatenation of $z_\text{start}[:, :, 64:64+t]$ and $z_\text{end}[:, :, t:64]$ along the third dimension. With this transformation we obtain the following:
-
-<iframe src="{{ 'assets/html/2023-11-01-latent-interpolation/corrected_translation.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-
-Hence, translation is also a valid interpolation technique and could be further expanded to generate an arbitrary size of latent vectors.
-
-## Analysis
-
-In order to evaluate the quality of the generated interpolations we use CLIP, a powerful technique for jointly learning representations of images and text. It relies on contrastive learning, by training a model to distinguish between similar and dissimilar pairs of images in a embedding space using a text and an image encoder. If a (text, image) pair is such that the textual description matches the image, the similarity between the CLIP embeddings of this pair should be high:
-
-$$\text{CLIPScore(text,image)} = \max \left(100 \times \dfrac{z_{\text{text}} \cdot z_{\text{image}}}{ \lVert z_{\text{text}} \rVert \lVert z_{\text{image}} \rVert}, 0 \right)$$
-
-For each interpolation strategy $f \in \\{\text{linear}, \text{normalized}, \text{slerp}\\}$ presented, we fix the prompt $\text{text} = $ "A high resolution image of a cat" and generate $n = 300$ interpolated latents $f(z_\text{start}, z_\text{end}, \gamma) = z_f^{(\gamma)}$ with $\gamma = \\{0, \frac{1}{n-1}, \frac{1}{n-2}, \cdots, 1\\}$. We then generate the images $x_f^{(\gamma)}$ from the interpolated latents, finally we use the CLIP encoder $\mathcal{E}_\text{CLIP}$ on the generated images to create image embeddings that can be compared with the text embedding the we define Interpolation Score $\text{InterpScore}(f, \text{text}, n)$ as:
-
-$$\text{InterpScore}(f, \text{text}, n) =  \dfrac{1}{n} \sum_{\gamma \in \{0, \frac{1}{n-1}, \frac{1}{n-2}, \cdots, 1\}} \max \left(100 \times \dfrac{z_{\text{text}} \cdot \mathcal{E}_\text{CLIP}(x_\text{f}^{(\gamma)})}{ \lVert z_{\text{text}} \rVert \lVert \mathcal{E}_\text{CLIP}(x_\text{f}^{(\gamma)}) \rVert}, 0 \right)$$
-
-Applying these steps we obtained the following results:
-
-{% include figure.html path="assets/img/2023-11-01-latent-interpolation/clip_scores.png" class="img-fluid" %}
-
-Surprisingly, linear interpolation performed better than normalized linear and slerp, this could indicate that CLIP scores might not be a good metric for image and text similarity in this context. Given that in this class project the main goal was to gain insights, as future work we could run a large scale experiment to check whether this behavior would be repeated. We can also visually inspect the quality of the interpolation by generating a video for each interpolation. From left to right we have images generated from latents from linear, normalized and slerp interpolations respectively:
-
-<iframe width="720" height="480"
-src="https://www.youtube.com/embed/6dEGSbam11o">
-</iframe>
-
-## Conclusion
-
-This work shows the importance of choosing an interpolation technique when generating latent vectors for generative models. It also provides insights of the organization of the latent space of Stable Diffusion, we showed how translations of the latent code corresponds to translations on image space as well (but also changes in the image content). Further investigation of the organization of the latent space could be done, where we could try for instance, to understand how different dimensions of the latent code influence the output image. As an example, if we fix a image latent and use four different prompts, which are specified in the image below, we get:
-
-{% include figure.html path="assets/img/2023-11-01-latent-interpolation/latent_dim.jpeg" class="img-fluid" %}
-
-
-As we can see all the generated images have some common characteristics, all the backgrounds, body positions and outfits (both in color and style) of the generated images are very similar. This indicates that even without explicitly specifying those characteristics on the textual prompt, they are present in some dimensions of the image latent code. Hence, the images share those similarities. Understanding how we can modify the latent code such that we change the shirt color in all the images from blue to red would be something interesting. Additionally, we showed some indication that CLIP scores might not be a good proxy for evaluating quality images generated from an interpolation technique.
\ No newline at end of file
diff --git a/_posts/2023-11-02-guided-transfer-learning.md b/_posts/2023-11-02-guided-transfer-learning.md
deleted file mode 100644
index a954f776..00000000
--- a/_posts/2023-11-02-guided-transfer-learning.md
+++ /dev/null
@@ -1,405 +0,0 @@
----
-layout: distill
-title: "Guided Transfer Learning and Learning How to Learn: When Is It Useful?"
-description: For downstream tasks that involve extreme few-shot learning, it's often not enough to predispose a model 
-  with only general knowledge using traditional pre-training. In this blog, we explore the nuances and potential applications of 
-  Guided Transfer Learning, a meta-learning approach that allows a model to learn inductive biases
-  on top of general knowledge during pre-training.
-date: 2023-12-10
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Kevin Li
-    url:
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-02-guided-transfer-learning.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: "Introduction: Never Enough Data"
-    subsections:
-    - name: Machine vs Human Intelligence
-    - name: "Traditional Transfer Learning: Learning General Knowledge"
-  - name: "Guided Transfer Learning and Meta-learning: Learning Inductive Biases"
-    subsections:
-    - name: Scouting
-    - name: Guide Values
-    - name: Calculating Guide Values
-  - name: "Answering Unanswered Questions: Exploring the Nuances"
-    subsections:
-    - name: "Experiment 1: Can GTL compensate for lack of pre-training data (not just lack of fine-tuning data)?"
-      subsections:
-        - name: "Setup"
-        - name: "Downstream Task Specification"
-        - name: "Results and Analysis"
-    - name: "Experiment 2: How does the design of the scouting task affect downstream performance?"
-      subsections:
-        - name: "Setup"
-        - name: "Results and Analysis"
-    - name: "Experiment 3: What about unsupervised/self-supervised settings?"
-      subsections:
-        - name: "GTL pipeline for scBERT: Scouting Problem"
-        - name: "GTL pipeline for scBERT: Downstream Task"
-        - name: "GTL pipeline for scBERT: Whole Pipeline"
-        - name: "Results and Analysis"
-  - name: "Closing Thoughts"
-    subsections:
-      - name: "Experiment Limitations and Next Steps"
-      - name: "How Exciting is GTL?"
-
-
----
-
-# Introduction/Motivation: Never Enough Data
-
-If we take a step back and reflect upon the current state of AI, especially in domains like computer vision and NLP, it appears that the gap between machine and human intelligence is rapidly narrowing. In fact, if we only consider aspects such as the predictive accuracy of discriminatory models and the sensibility of outputs by generative models, it may seem that this gap is almost trivial or even nonexistent for many tasks. However, every time we submit a training script and leave for the next few hours (or few weeks), it becomes abundantly clear that AI is still nowhere near human intelligence because of one critical kryptonite: the amount of data needed to effectively train AI models, especially deep neural networks.
-
-While we have tons of training data in domains such as general computer vision (e.g. ImageNet) and NLP (e.g. the entirety of the internet), other domains may not have this luxury. For example, bulk RNA-sequencing data in biomedical research is notoriously cursed with high dimensionality and extremely low sample size. Training AI models on bulk RNA-sequencing datasets often leads to severe overfitting. In order to successfully utilize AI in domains like biomedicine, the highest priority challenge that must be addressed is that of overcoming the necessity of exuberant amounts of training data. 
-
-## Machine vs Human Intelligence
-
-It often feels like the requirement of having abundant training samples has been accepted as an inevitable, undeniable truth in the AI community. But one visit to a preschool classroom is all that it takes to make you question why AI models need so much data. A human baby can learn the difference between a cat and a dog after being shown one or two examples of each, and will generally be able to identify those animals in various orientations, colors, contexts, etc. for the rest of its life. Imagine how much more preschool teachers would have to be paid if you needed to show toddlers thousands of examples (in various orientations and augmentations) just for them to learn what a giraffe is.
-
-Fortunately, humans are very proficient at few-shot learning-- being able to learn from few samples. Why isn’t AI at this level yet? Well, as intelligence researchers have discussed <d-cite key="Nikoli2017"></d-cite>, biological brains are not born as empty slates of neurons with random initial connections. Millions of years of evolution have resulted in us being born with brains that are already predisposed to learn certain domains of tasks very quickly, such as image recognition and language acquisition tasks. In these domains, learning a specific task like differntiating between a cat and a dog or between letters of the English alphabet doesn’t require exposure to many samples. Additionally, as we gain more experiences throughout life, we acquire general knowledge that can help us learn new tasks more efficiently if they’re similar to something we’ve learned before. Thus, naturally, the first step toward bridging the gap between natural and machine intelligence is somehow finding a way to predispose an AI to be able to learn any *specific* task within a certain domain with very few samples. The advent of traditional transfer learning has attempted to approach this predisposition task from the "general knowledge" perspective.
-
-## Traditional Transfer Learning: Learning General Knowledge
-
-Transfer learning has been invaluable to almost all endeavors in modern deep learning. One of the most common solutions for tasks that have too little training data is to first pre-train the model on a large general dataset in the same domain, and then finetune the pre-trained model to the more specific downstream task. For example, if we need to train a neural network to determine whether or not a patient has a rare type of cancer based on an X-ray image, we likely will not have enough data to effectively train such a model from scratch without severe overfitting. We can, however, start with a model pre-trained on a large image dataset that's not specific to cancer (e.g. ImageNet), and if we start training from those *pre-trained* weights, the downstream cancer diagnostic task becomes much easier for the neural network to learn despite the small dataset size.
-
-One way to intuitvely understand why this is the case is through the lens of "general knowledge". <d-cite key="Nikoli2017"></d-cite><d-cite key="gtl"></d-cite> When the model is pre-trained on ImageNet data, it learns a lot of knowledge about image data *in general*; for example, the earlier layers of the model will learn low-level features detectors (e.g. edge detectors, simple shape detectors, etc.) that will likely be useful for *any* computer vision task. This can be viewed as the model learning "general knowledge" about the domain of image data broadly. When we then fine-tune this model on a cancer dataset, the model doesn't have to relearn the ability to detect these general, low-level features. This general knowledge encoded in the pre-trained weights regularizes the model and mitigates overfitting, as it *predisposes* the model to learn relationships/feature detectors that are generalizable and sensible within the context of image data.
-
-However, if transfer learning could solve all our problems, this blog post wouldn't exist. When our downstream dataset is in the extremeties of the high dimensional, low sample size characterization (e.g. in fields like space biology research, more on this later), learning general knowledge in the form of pre-trained weights isn't enough.  <d-cite key="hldsshard"></d-cite>  <d-cite key="hldsshard2"></d-cite> How, then, can we predispose models such that they can do extreme few-shot learning, or even *one-shot* learning? Enter guided transfer learning.
-
-***
-
-# Guided Transfer Learning and Meta-learning: Learning *Inductive Biases*
-
-Guided transfer learning (GTL) <d-cite key="gtl"></d-cite> is a meta-learning paradigm proposed by the group [Robots Go Mental](https://robotsgomental.com). The main idea for guided transfer learning is that, instead of just having the AI model learn general knowledge, we also want the AI to learn *how* to learn. Specifically, we want it to learn how to pick up new knowledge *most efficiently* for a particular domain/modality of data. This means during pre-training, the model, in addition to learning good initial weights, will also learn ***inductive biases*** that affect the future, downstream training *process*.
-
-GTL is a very novel method; its preprint was just released in the past few months! Hence, beyond the experiements in the original preprint, there has not been much exploration of some of its behavioral nuances and various application scenarios. So in this blog, I will be doing a few experiments that attempt to gain more insight into some of my questions that were left unanswered by the original GTL paper.
-
-But before we get to that, let's first get a rundown on how GTL works! The two most important concepts in GTL are **scouting** and **guide values**.
-
-## Scouting
-
-Inductive biases, which affect what kind of functions a model can learn, are usually *built into* the choice of deep learning architecture, or decided by other hyperparameters we humans choose. With guided transfer learning, they can now be *learned* automatically during pre-training. It’s almost like the model is figuring out some of its own optimal hyperparameters for learning in a particular domain. 
-
-Sounds like magic, right? How does GTL allow a model to *learn* inductive biases? Well, the core behind the GTL approach is a process known as **scouting**, which is an alternative to traditional pre-training. The high-level idea is that it trains copies of the model, called scouts, on easier subproblems. These subproblems should be similar to the target downstream tasks, but easier so that the scouts are more likely to succesfully converge to a generalizable model. (If the scouts themselves overfit, then how can the inductive biases they learn help our downstream few-shot training not overfit?) 
-
-In the process of converging, the scouts keep track of which parameters in the model are important to keep flexible for efficient convergence and which ones aren’t. They’re basically logging their learning process. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/scouting.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-For example, if weight `A` increases drastically during training, it’s probably an important weight to change and we should keep it flexible. On the other hand, if weight `B` doesn’t change much at all or fluctuates in a very noisy manner (i.e. doesn't change *meaningfully*), it is probably not as important to change.  
-
-After the scouts are finished training, the collective feedback from all the scouts is used to decide what inductive biases to impose on the *main model*, such that the main model can learn most efficiently for the particular domain of data and avoid *wasting effort* and being *distracted/misguided* by changing parameters that don’t really help in that domain.
-
-
-## Guide Values
-
-So what do these "inductive biases" actually look like, and how do they affect future training? The inductive biases in the context of GTL come in the form of **guide values**. So after scouting, each parameter will not only have its usual weight value, but it will also have a guide value. During gradient decent, the normal update for a particular weight is then multiplied by its corresponding guide value. Thus, the larger the guide value, the more that parameter is allowed to change during downstream training. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/guide_values_1.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/guide_values_2.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    In this very simple neural network with two weights, we can see here that weight `A` has a guide value of 0.56, while weight `B` has a guide value of merely 0.01. Thus, weight `A` is more flexible, in other words allowed to change more, than weight `B` during downstream training. In fact, weight `B` is pretty much frozen, as its guide value of 0.01 makes it so that weight B can barely change throughout training. 
-</div>
-
-Thus, the goal of scouting is to **find these optimal guide values**, which will ultimately make the training *process* more sparse (i.e. so that only the weights that are useful to change get changed). Note that this is different from making the *neural network model itself* more sparse (i.e. setting weights/connections that are useless to zero).
-
-## Calculating Guide Values
-
-So how do we actually get the guide values after training the scouts? Well, as mentioned above, we keep track of how parameters change during the scout training processes. Specifically, during the training of each scout, we log the initial value and final value (i.e. value after convergence) of each parameter in the model. Then, we calculate how much each parameter changes throughout the process of convergence via some distance metric between its initial and final value. The default used in the GTL paper was the squared distance: $$(w_b - w_f)^2$$, where $$w_b$$ is the baseline (initial) value of the parameter $$w$$, and $$w_f$$ is its final value.
-
-Now, each scout will converge differently, since they are trained on slightly different subproblems (more on this later). To have a robust estimator of how much some parameter $$w$$ changes during convergence, we take the mean squared change of the parameter across *all* the scouts. Let's call this value $$m_w$$.
-
-Assuming we have $$N$$ scouts, this would be: $$m_w = \frac{1}{N}\sum_{i=1}^{N}(w_{b,i} - w_{f,i})^2$$, where $$w_{b,i}$$ and $$w_{f,i}$$ are the initial and final values (respectively) of parameter $$w$$ in scout $$i$$. 
-
-Add on a 0-1 normalization across the $$m_w$$s of *all* the parameters in the model, and we have our guide values (all of which are between 0 and 1)! 
-
-Intuitively, we can see that parameters that changed a lot throughout the convergence process in the scout models are deemed "important to change during training" and are thus given higher guide values (i.e. closer to 1), allowing them to be more flexible for downstream fine-tuning. 
-
-It’s really quite an elegant and simple approach, which is the beauty of it! It's comparably lightweight in terms of both memory and computation compared to many other popular meta-learning/few-shot learning methods. <d-cite key="NIPS2016_90e13578"></d-cite> <d-cite key="woodward2017active"></d-cite> The devil is in the details on how to design the subproblems for scouts, which will be one of the main aspects we will be exploring in the experiments later in this blogpost.
-
-
-## Experiment and Results from the GTL Paper
-
-Before we get started with some of our own experiments to explore more nuances of GTL behavior and benefits, it might be nice to establish that-- **Yes, it does work!** Or, it at least provides very impressive benefits. 
-
-In the original GTL paper, Nikolić et al. tested how much benefit GTL would provide for few-shot learning tasks specifically in the domain of computer vision. Specifically, they tested one-shot learning capability on the Omniglot dataset. <d-cite key="omniglot"></d-cite> The Omniglot dataset is a popular few-shot learning dataset containing characters from 50 different alphabets, with only 20 handwritten examples of each (I will also be using this dataset for a couple of my experiments below). The conventional problem set up with Omniglot is to train/fine-tune your model on just *one* example of each character, and use the rest of the examples for validation/testing.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/omniglot.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/omniglot2.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    To test one-shot learning, a model is trained/fine-tuned on one example of a character and tested on its ability to identify that character in validation samples. Figure from Nikolić et al. <d-cite key="gtl"></d-cite>
-</div>
-
-To prepare a one-shot learner for this task, Nikolić et al. pre-trained a very basic CNN using the following GTL pipeline:
-1. **Pre-train the model *traditionally*** on MNIST (lots of data there!). The goal here is to have the model acquire general knowledge in the form of pre-trained weights. No inductive biases yet.
-2. **Scouting.** The meat of GTL, where *inductive biases* are learned!
-3. **Downstream fine-tuning and evaluation** on Omniglot using the one-shot scheme described above.
-
-
-The most interesting part is the second step: scouting! Remember, we have the following criteria for the scout problems:
-  * There needs to be *multiple* different scouting problems (so the we can have an ensemble of *different* scouts contributing to the guide value calculations, making the guide values more robust)
-  * The scout problems need to be *easy* enough so that the scouts can actually successfully learn generalizable models! Again, if the scouts themselves overfit, the guide values derived form them won't be very helpful for downstream one-shot learning :)
-  * The scout problems need to be *similar* to the downstream task, i.e. in the same domain (in this case, computer vision) and of the same kind of problem (e.g. in this case, classification). If the scout problems are too different, why would the inductive biases be transferable?
-
-Given these criteria, Nikolić et al. used the following scheme for generating scouting tasks:
-  * Create *subdatasets* of MNIST (termed "cousin" problems in the paper), where each subdataset/cousin contains data for only *three* of the digits in MNIST (120 of these cousin datasets were created in the paper).
-  * Train a scout on each of the cousin problems (120 scouts total).
-
-This scheme satisfies all three criteria above. We now have multiple different scouting problems. These scouting problems are also comparatively way easier than the downstream task (there's *way* more training data than Omniglot, and it's only a 3-category classification problem). BUT, despite being easier, they're still similar enough to the downstream task such that we can expect transferability (it's still a handwritten character image classification task, after all).
-
-And this worked quite spectacularly! Here are the results from their paper:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/paperresults1.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/paperresults2.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Left plot shows validation accuracy curves for a mdoel that was traditionally pre-trained and a model that was pre-trained with the addition of GTL. Right plot shows the distribution of guide values in the model that was pre-trained with GTL. Figures from Nikolić et al. <d-cite key="gtl"></d-cite>
-</div>
-
-The plot on the left shows the validation curves for the downstream one-shot Omniglot task for 1) a model that was pre-trained traditionally (line in blue) and 2) the model that was pre-trained traditionally *and* underwent GTL scouting (line in orange). Although the GTL model was still only to get around 25% validation accuracy, that's quite impressive for only getting one example of each character, and is a signficant improvement over the model that only experienced traditional pre-training.
-
-Interestingly, the plot on the right plots the distribution of guide values. We see a heavy right skew, indicating that most of the guide values are very close to 0! This means downstream fine-tuning has been made *very* sparse (very few parameters were allowed to change drastically), providing very strong inductive biases that heaviliy influenced *how* the model was allowed to learn. These inductive biases, as the results suggest, seem to be correct for the task at hand. But that shouldn't be surprising because they were, in a way, *learned*.
-
-**And that is the beauty of GTL. We no longer have to "guess" what inductive biases (often in the form of architectural choices) might be appropriate for a certain domain; instead, we have these biases be "learned"!**
-
-***
-
-# Answering Unanswered Questions: Exploring the Nuances
-
-Now that we see GTL does provide noticeable benefit for one-shot learning tasks based on the experiemental results from Nikolić et al., I would like to run some additional experiments of my own to explore some of the nuances of when GTL can be helpful, how we can optimize the benefit we get from using it, and how we should go about designing scout problems. These questions had not been explored in the original GTL paper, and since no other piece of literature has yet to even mention GTL, I thought I'd take the lead and try to gain some initial insight into some of these open topics :)
-
-## Experiment 1: Can GTL compensate for lack of *pre-training* data (not just lack of *fine-tuning* data)?
-
-So we've established that GTL can aid in learning *downstream* tasks with few training samples, but it still requires a large amount of pre-training data (e.g. MNIST), much like traditional transfer learning. What I want to know now is: what if we don't have *that* much pre-training data? In such *low pre-training-data contexts*, performance on downstream tasks usually suffers as a results when using traditional transfer learning. Can the addition of scouting/GTL *compensate* for this lack of pre-training data? That is, can a model pre-trained with a small pre-training dataset + GTL do as well as a model that's just traditionally pre-trained on a large pre-training dataset?
-
-### Setup
-
-To do test this, I pre-train a small CNN with a very similar GTL pipeline as the one used by Nikolić et al., but using only a mere *1000* of the full 60,000 samples from the MNIST dataset during pre-training/scouting. A significantly smaller pre-training dataset! I'll sometimes refer to this subset of MNIST as "small MNIST". I then evaluate the performance of this model on an Omniglot one-shot task and compare it to 1) a model that is only traditionally pre-trained on small MNIST (no GTL) and 2) a model that is traditionally pre-trained on the full 60,000-sample MNIST (also no GTL).
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/exp1_diagram.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Experiment setup
-</div>
-
-### Downstream Task Specification
-
-Note that the exact setup for the downstream Omniglot one-shot task used in the original GTL paper was not revealed. There are a few variations of one-shot learning setups, but the one I will be using is:
-1. Take a 100-cateogry subset of the full Omniglot dataset (that is, 100 unique characters)
-2. Train the model on **one example** of each unique character (i.e. 100 training samples total), and use the rest as a validation set (i.e. 1900 validation samples total)
-3. The task is thus a 100-way classification problem (given a handwritten image, predict which of the 100 characters it is)
-
-Since the specification above is likely not the exact Omniglot problem setup used by Nikolić et al., and the hyperparameters they used are also not specified in the original paper, some of the baseline results I'm using do not quite match to the corresponding results in the original paper.
-
-### Results and Analysis
-
-With that said, here are the resulting *validation* accuracy and loss curves for the downstream Omniglot one-shot task described above:
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Validation accuracy and loss curves for 1) model traditionally pre-trained with large MNIST, 2) model traditionally pre-trained with small MNIST, 3) model pre-trained with GTL and small MNIST. Note that an automatic early stopping mechanism was implemented to cut off when training when validation accuracy stopped increasing.
-</div>
-
-As we can see, when GTL is not used, pre-training on a 1000 sample subset of MNIST results in notably worse performance on the one-shot downtream task compared to pre-training on the full 60,000 MNIST (16% vs 21% max validation accuracy). This is as expected.
-
-*However*, if we use small MNIST *and* add scouting/GTL (using the same scout problem set up in the original GTL paper), we see that the resulting model ends up being able to reach *almost* the same max validation accuracy as the model traditionally pre-trained on the full MNIST dataset (20% vs 21%).
-
-What this suggests is that the *inductive biases* learned by GTL can compensate for any decrease in "general knowledge" (encoded in the form of pre-trained weights) that comes from having a smaller pre-training dataset. **So not only is GTL helpful when you don't have enough downstream data, it can also be helpful when you don't have enough pre-training data!**
-
-Additionally, if we inspect the validation *losses*, we see that, depsite an apparent drop in validation accuracy, overfitting is still occuring in the shadows for all the models, as all the validation loss curves start rising after a certain point. However, the model that is pre-trained with GTL achieves the lowest validation loss of the three models before overfitting, and also starts overfitting the latest. So even though there's no huge difference in the maximum validation accuracy achieved by the model that was pre-trained with GTL on small MNIST and the model that was traditionally pre-trained on full MNIST, the former is able to be *optimized further* before overfitting, suggesting that **GTL with a small pre-training dataset provides a stronger "regularizing" effect than traditional transfer learning with a large pre-training dataset!** This is certainly an interesting observation that could potentially have more obvious practical implications in certain scenarios, though we will not go into that further in this blog. The takeaway, however, is that GTL is, at the end of the day, really just a strong "regularizer". If we look at how the orange and red curves look in both the accuracy and loss plots, we see the performance benefit that comes form adding GTL really just comes from the *delay of overfitting*. This regularization-based mechanism of performance improvement by GTL makes sense, as strong inductive biases hold the model back from learning "just anything" that fits the downstream training data. 
-
-## Experiment 2: How does the design of the scouting task affect downstream performance?
-
-Okay, it seems so far that the scouting pipeline used in the original GTL paper seems to be pretty helpful for various scenarios. But how did the authors arrive at that specific scouting task formulation? What if we used different scouting tasks than the ones they did? How does that affect GTL performance, and what might such differences (if any) imply? After all, when we leave the context of MNIST and Omniglot, we'll have to be designing these scouting tasks on our own...
-
-### Setup
-For the sake of experimental control, however, I will stick with MNIST and Omniglot for now (don't worry, I deviate from these datasets in the next experiment). Here, I begin by testing the effects of changing *how many categoriess* are included the cousin subdatasets that the scouts are trained on. The original paper used 3 categories per scout dataset (i.e. a 3-way classification task). What if used 2? Or 4? And if that makes a difference, why?
-
-In my eyes, this experiment explores how *similarity* between the scout tasks and the downstream task affects *transferability*. Specifically, because the downstream Omniglot task is a 100-way classification problem, one might expect that scout tasks that include more classification categories (and are thus more similar to the donwstream task) would result in better transferability. 
-
-To test this, I use a 5000-sample subset of MNIST for pre-training/scouting (to save computation and time). For scouting, I create 120 cousin problems, as done in the paper. But instead of sticking to 3-category cousin problems, I also try 2-category, 4-category, and 5-category problems. 
-
-### Results and Analysis
-
-Here are the results: 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/exp2_loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-As we can see, apparently the number of categories doesn't make too big of a difference in maximum validation accuracy! They all provide seemingly equal accuracy improvement from a baseline model pre-trained traditionally on the same 5000-sample MNIST subset. This isn't too surprising. Compared to the 1000-way downstream classification, the difference between 2-way and 5-way classification tasks would intuitively seeem pretty negligible.
-
-The validation loss plot tells a slightly different story, however. We see *most* of the models pre-trained with GTL have similar loss curves, consisting of a lower minimal loss and more resilience to overfitting compared to the baseline model. However, the model based on scouts trained on *5-category* cousin problems seems to achieve the *worst* (highest) minimum validation loss! This seems... a bit hard to explain. Perhaps this is just due to stochasticity; after all, we see that overfitting still occurs *later* relative to the baseline model, suggesting there still is some resilience to overfitting. 
-
-But a perhaps more interesting explanation (that admittedly could be *completely* wrong) is that 5-category problems may have been too *difficult* of a scouting task given the smaller subset of MNIST used (since lots of categories + few training samples is a often recipe for overfitting). That is, perhaps many of the *scouts* themselves would have started overfitting while being trained on these subproblems, so the guide values derived from such scouts don't end up providing robust enough inductive biases. 
-
-Again, this is just a speculation, but if it were true, this could suggest an interesting tradeoff between the **easiness** of the scouting tasks and their **similarity** to the target downstream task. Make a scouting task too easy, and it's too different from the target downstream task, and transferability suffers as a result. Make a task too similar to the target downstream task, and it might be too difficult, causing the scouts themselves to overfit and the resulting guide values to be less useful. An intersting balance to think about and explore further.
-
-The overarching takeaway from this experiment, however, seems to be that the exact number of categories for the scouting problems at this specific scale does not drastically affect downstream one-shot performance. Sure, I could have tried to keep increasing the number of categories, but keep in mind there's also a bit of a tradeoff between number of categories and number of possible scouts past a certain point. For example, we would only be able to have one cousin problem with 10 categories (and it would be the whole MNIST dataset)!
-
-
-## Experiment 3: What about unsupervised/self-supervised settings?
-
-**Note:** This particular experiment builds off of some previous work I have done outside of this class.
-
-For the final experiment, I would like to provide a bit of my research background for context. I'm primarily intereted in applying/developing AI methodologies for biomedical research. Specifically, I work a lot with "omics" data (e.g. transcriptomics data like RNA-seq, proteomic data, etc.), which is a domain notoriously cursed with datsets characterized by high dimensionality and low sample size. This means that we are almost always forced to utilize pre-training and transfer learning in order to make any deep learning model work for specific downtream tasks. Sounds like the perfect context to apply GTL to!
-
-However, there's one very important caveat. Pre-training in the omics domain is usually **self-supervised**, since large pre-training datasets are often aggregates of hundreds of smaller datasets from separate studies that don't share the same labeling/metadata catogories. So far, whether it's the original GTL paper or our own experiments above, we have only explored GTL in the context of *supervised* pre-training, scouting, and fine-tuning. How can we adapt GTL when the pre-training (and perhaps the scouting) involve unlabeled data?
-
-To explore this, I will build off of one of my previous research projects, conducted while I was an intern at NASA Ame's Space Biology Division. The project involved pre-training (traditionally) a large RNA-seq BERT-like model (called scBERT <d-cite key="scbert"></d-cite>) on a large *unlabeled* collection of RNA-seq data (recount3 <d-cite key="recount3"></d-cite>) in a self-supervised manner (via input masking, like the original BERT). I had evaluated this pre-trained model on a downstream classification task that involved predicting whether or not a mouse had been to space based on its RNA-seq profile. The downstream task involved fine-tuning on [NASA's OSDR datasets](https://osdr.nasa.gov/bio/), which contain RNA-seq data from ground control mice and *actual mice that have been to space* during the NASA Rodent Research missions. The problem was, of course, that very few mice have been to space, so these datasets were tiny (i.e. 12 mice per dataset). It was quite an extreme exacerbation of the high dimensional, low sample size characterization I normally had to deal with. In that project, I showed that traditionally pre-training provided signficant benefits over training from scratch (as expected).
-
-### GTL pipeline for scBERT: Scouting Problem
-
-Today, however, I would like to see if GTL can provide any additional benefits to that project. The most obvious challenge, as mentioned earlier, is creating scout problems out of an unlabeled pre-training dataset (recount3).
-
-Sure, we could use self-supervised masked input prediction for scouting, which is how scBERT is pre-trained traditionally. However, it's not immediately clear, at least to me, how exactly we would create *multiple different* scout problems using this scheme (perhaps different masking patterns?). Additionally, we would ideally want the scout tasks to be more similar to the downstream task (which is a binary classification task, i.e. predicting whether or not a mouse sample is ground control or spaceflown) and share mostly the same architecture (i.e. more parameters with transferable guide values). Finally, as mentioned before, we would like to make the scouting tasks sufficiently easy so that the scouts can be successfully trained without overfitting. Given these criteria, I propose the following scouting problem:
-
-1. Reduce the dimensionality of recount3 dataset using UMAP, keeping only the top 30 UMAP dimensions (to make the next step computationally tractable)
-2. Cluster using K-means clustering. K=30 seems to provide visually logical clusters, so that's the one we will go with.
-3. To create subdatasets ("cousin" problems), we choose random *pairs* of K-means clusters. Thus, each subdataset includes recount3 data from a random pair of clusters.
-4. For each subdatset created, train a scout to classify the *cluster identity* of the samples (a binary classification task). Thus, the scouting task is very similar to the downstream task (which is also binary classification). This *also* means we can use the same exact model architecture for both the scouting tasks and the downstream task (maximal transferability!).
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Pipeline for creating scout problems.
-</div>
-
-Now, this might seem like a trivial task for the classifier. After all, we are clustering the data based on geometric proximity, then train a model to find decision boundaries between the clusters, so it would seem that the model could find a perfectly clean decision boundary pretty easily. However, keep in mind that the clustering is done in UMAP space, with only the top 30 UMAP components, while the classification is done in the original feature space. UMAP is a nonlinear transformation, so clusters that are easily perfectly separable in top 30 UMAP space may not be in the original space. However, it is definitely still a pretty easy task, but we *want* the scouting tasks to be doable enough so that the scouts can easily converge to a generalizable relationship. So theoretically, it seems reasonable that this could work! (((Admittedly, it took a lot of playing around before deciding on the above scouting formulation; it just ended up being the one that worked the best. I can't tell you exactly why, but my reasoning above is the best "intuitve" reasoning I could come up with.)))
-
-### GTL pipeline for scBERT: Downstream Task
-
-What about the downstream few-shot task? Here, I will use the same task that I had previously used to evaluate my traditionally pre-trained scBERT model:
-1. We train the model on a single NASA OSD dataset, OSD 105 <d-cite key="OSD-105"></d-cite>, containing bulk RNA-seq data from 6 spaceflown and 6 ground control mice, and have it predict whether a mouse was spaceflown or ground control. A simple binary classification task, like the scouting problem, but much harder given the incredibly low sample size. 
-2. We then validate using another similar NASA OSD dataset, OSD 104 <d-cite key="OSD-104"></d-cite>, also containing 6 spaceflown and 6 ground control mice.
-
-It's important to note that these two datasets, OSD 105 and 104, contain RNA-seq data from different muscle locations. OSD 105 contains tibilalis anterior data, while OSD 104 contains soleus data. However, since these datasets all contain data from some sort of mouse skeletal muscle tissue, we expect that cross-dataset generalizability would be reasonable for a strong generalizable model, and I actually intentionally chose datasets from different muscle tissues to test this difficult problem of cross-tissue generalizability. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/downstream.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Downstream few-shot binrary classification task on NASA OSDR datasets (predicting whether mouse is a ground control or spaceflown sample based on its RNA-seq profile).
-</div>
-
-### GTL pipeline for scBERT: Whole Pipeline
-
-After deciding on the scouting problem formulation, the rest of the pipeline is pretty straightforward. Here's the full pipeline:
-1. **Pre-train scBERT traditionally** on recount3 (self-supervised masked input prediction). This involves the encoder portion of the architecture, which embeds the input, and a reconstructor portion, which uses that embedding to reconstruct the masked input values. The goal here, as always, is to learn *general knowledge* about the domain (RNA-seq) in the form of good *pre-trained weights*.
-2. **Scouting** on recount3, using the scouting formulation described above. Here, we replace the reconstructor portion of the scBERT architecture with a classification layer. The goal here is, of course, to learn *inductive biases* in the form of *guide values*.
-3. **Downstream few-shot fine-tuning** on NASA OSDR datasets, using the few-shot formulation described above. Here, we use the *same* architecture as the scouts. *All guide values transfer over!*
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/pipeline.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Full pipeline for applying GTL on scBERT.
-</div>
-
-### Results and Analysis
-
-And... here are the results for the downstream task! To compare, I've also included results for an scBERT copy that didn't undergo any pre-training and an scBERT copy that was only traditionally pre-trained on recount3.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-02-guided-transfer-learning/results.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Downstream training and validation accuracy/loss curves for (from left to right): scBERT without any pre-training, scBERT traditionally pre-trained, scBERT pre-trained with scouting (GTL). A cosine annealing learning rate scheduler was used, ala the original scBERT paper, hence the jumps and fluctuations.
-</div>
-
-With no pre-training, we can see that severe overfitting to the training set happens almost immediately, with validation loss going up while training loss goes down. This makes sense given the extremely small size of the training set, and the fact that the training and validation sets are from different muscles. With traditional pre-training, however, we see that overfitting also does eventually happen, but right before it happens, at around 200 epochs, we get this sweet spot where validation loss is at a low and validation accuracy is at a high of around 90% (highlighted by the purple box). So it seems that general knowledge about RNA-seq data obtained from traditional pre=training already provides a regularizing effect that reigns in the model from overfitting immediately to the small dowsntream training dataset. These results are from my previous work and are nothing new.
-
-Now, when we add *scouting*, the max validation accuracy becomes 100%, which is an improvement from the traditionally pre-trained model, though this by itself may not be that notable given the already good validation accuracy after traditional pre-training. What's potentially more interesting, however, is that this maximum validation performance is maintained over three times as many epochs compared to the traditionally pre-trained model, suggesting that the maximal performance achieved by the GTL model is more robust. However, it is also worth noting that the validation accuracy is a lot noisier and jumps around a lot more for this model compared to the others (keep in mind cosine annealing learning rate scheduler is being used for all these models). But overall, it seems that guided transfer learning provides a more robust regularization effect, giving it a longer period of time with peak validation performance before overfitting occurs.
-
-This is quite exciting, as it shows that, given the right scouting problem setup, **we can adapt GTL in settings where our pre-training data is unlabeled, as well!** The flexiblity of GTL that allows it to be adapted to such a large variety of scenarios is what, in my eyes, makes this method truly innovative!
-
-# Closing Thoughts
-
-## Experiment Limitations and Next Steps
-
-These experiements are merely to serve as a *preliminary* exploration of the nuances of GTL beyond what was presented in the original paper, in hopes that more questions will be explored by the community as GTL gains further publicity and traction. Thus, there is clearly plenty of room for imporvement and next steps regarding these experiments.
-
-For experiement 1, I think it would be cool to establish a more rigorous characterization of the amount of pre-training data (or rather lack thereof) that the addition of GTL can compensate for in terms of downstream performance. This might involve using arious *even smaller* subsets MNIST and finding the boundary where a pre-training dataset is too small that even GTL cannot compensate for it.
-
-The results of experiment 2 obviously leaves a lot of to be desired, as I only explored single-digit values for the number of categories use in the scout problems. These values are all over an order magnitude off from the number of categories in the downstream task, so none of them gave very useful insight into how "similar" scouting tasks need to be to the downstream task. This was, of course, limited by the MNIST dataset itself, which only had 10 categories. Perhaps using a pre-training dataset with more categories could allow a more comprehensive experiment of this type.
-
-And for experiment 3, I wish I had more time to curate a more robust validation scheme for the downstream few-shot task. A validation set with only 12 samples was really not granular enough to precisely capture the potential benefits of adding GTL on top of traditional transfer learning. When the traditionally pre-trained model is already getting 11/12 prediction correct at its best, is 12/12 really that meaningful of an improvement?
-
-## How Exciting is GTL?
-
-As promising as all these results are, GTL is, of course, not the perfect end-all be-all solution to few-shot learning. As was discussed in the original GTL paper and shown in the experiments above, GTL can only provide so much improvement before hitting a wall (e.g. the one-shot learning ability on Omniglot never surpassed 25% validation accuracy). It does not yet quite result in models that match the few-shot learning ability of human intelligence, and still requires a considerable amount of pre-training data. However, the lightweight nature, simplicity, elegance, and adaptibility of the model makes it so that it's a (relatively) quick and easy solution to get a downstream performance boost on any AI pipelines that already utilize traditional transfer learning!
-
-
-
-
diff --git a/_posts/2023-11-05-neural-PDEs-long-time-dynamics.md b/_posts/2023-11-05-neural-PDEs-long-time-dynamics.md
deleted file mode 100644
index a26eeb59..00000000
--- a/_posts/2023-11-05-neural-PDEs-long-time-dynamics.md
+++ /dev/null
@@ -1,219 +0,0 @@
----
-layout: distill
-title: Neural PDEs for learning local dynamics and longer temporal rollouts
-description: 6.S898 deep learning project 
-date: 2023-11-05
-htmlwidgets: true
-
-authors:
-  - name: Pengfei Cai
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-05-neural-PDEs-long-time-dynamics.bib  
-
-
-toc:
-  - name: Partial differential equations 
-  - name: Base model (U-Net) 
-  - name: Fourier Neural Operators 
-  - name: Improving accuracies in predicting local dynamics and long-term rollouts in time-dependent PDEs
-  - name: Introducing Large Kernel Attention
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Partial differential equations 
-At the continuum level, spatiotemporal physical phenomena such as reaction-diffusion processes and wave propagations can be described by partial differential equations (PDEs). By modeling PDEs, we can understand the complex dynamics of and relationships between parameters across space and time. However, PDEs usually do not have analytical solutions and are often solved numerically using methods such as the finite difference, finite volume, and finite element methods <d-cite key="LoggMardalEtAl2012"></d-cite>. For example, the finite element method (FEM) approximates PDE solutions by first discretizing a sample domain into a mesh of interconnected elements and then solving a system of equations iteratively given a set of boundary conditions, initial conditions, and material properties. 
-
-In this blog, we will show two examples of PDEs, one of which is the Navier-Stokes equation which describes the dynamics of viscous fluids. The equation below shows the 2D Navier-Stokes equation for a viscous and incompressible fluid in vorticity form on a unit torus, where $$w$$ is the vorticity, $$u$$ the velocity field, $$\nu$$ the viscosity coefficient, and $$f(x)$$ is the forcing function. The solution data were from the original paper<d-cite key="li2020fourier"></d-cite> where the problem, with a periodic boundary condition, was solved with a pseudospectral method using a 1e-4 time step with the Crank-Nicolson scheme. 
-
-$$
-\begin{gather}
-\partial_t w(x, t) + u(x, t) \cdot \nabla w(x, t) = \nu \Delta w(x, t) + f(x), \quad x \in (0,1)^2, t \in [0,T] \\
-\nabla \cdot u(x, t) = 0, \quad x \in (0,1)^2, t \in [0,T] \\
-w(x, 0) = w_0(x), \quad x \in (0,1)^2
-\end{gather}
-$$
-
-We can visualize the 2D PDE solution over the 50 time steps:
-
-<div class="l-body-outset" style="display: flex; justify-content: center; align-items: center;">
-  <iframe src="{{ 'assets/html/2023-11-05-neural-PDEs-long-time-dynamics/navierstokes.html' | relative_url }}" frameborder="0" scrolling="no" height="600px" width="100%"></iframe>
-</div>
-<div class="caption">
-Solution of 2D Navier-Stokes PDE <d-cite key="li2020fourier"></d-cite> - drag the slider!
-</div>
-
-### Motivations for neural PDEs
-Well-established numerical methods are very successful in calculating the solutions of PDEs, however, these methods require high computational costs especially for high spatial and temporal resolutions. Furthermore, it is important to have fast and accurate surrogate models that would target problems that require uncertainty quanitifcation, inverse design, and PDE-constrained optimizations. In recent years, there have been growing interests in neural PDE models that act as a surrogate PDE solver<d-cite key="PDEBench2022"></d-cite>, especially neural operators that aim to learn the mapping between input and output solution functions. These models are trained on numerical solutions from existing methods and inferences are orders of magnitude faster than calculating the solutions again through numerical methods. 
-
-In this article, I will first examine if we can apply neural networks to learn the dynamics in PDE solutions and therefore replace PDE solvers with a neural PDE as the surrogate solver. We will start with a base U-Net model with convolutional layers. Next, I will examine the neural operator methods, notably the Fourier Neural Operator (FNO). Primarily, the Fourier neural operator has proven to predict well for PDE solutions and we will use it to compare with the U-Net model on the representations learnt in the Fourier layers. Next, I will examine the FNO's performance on another PDE with two dependent states. We will notice that the FNO is capable of learning lower frequency modes but fail to learn local dynamics and higher frequency modes. We then finally introduce some improvements to the FNO to tackle this problem involving local dynamics and long term rollout errors. 
-
-### Dataset and training schemes for the 2D Navier-Stokes PDE
-For the dataset, I will start with the 2D time-dependent Navier-Stokes solution ($$\nu$$ = 1e-3) that was shipped from Zongyi Li et al's paper <d-cite key="li2020fourier"></d-cite>. The problem for any given model would then be to learn the mapping from an input solution (vorticity) of t=[0,10] to the solution of t=(10, 40]. For all models involving Navier-Stokes, the original implementations were used, but implementations were improved or new ones were added for the second PDE problem which more details will be shared in later parts of the article. We use 1000 solutions for training and 200 for the test dataset. The models are trained with 500 epochs with an initial learning rate of 0.001, the AdamW optimizer is used with a cosine annealing scheduler. Unless otherwise specified, a relative L2 loss is used for training and prediction of each data batch. For U-Net and FNO2D, the models use 2D convolutions in the spatial domain and recurrently predict through the time domain (autoregressive training). For FNO3D, the time domain is included as the 3rd dimension in the input data for the FNO to learn both spatial and temporal dependencies within the solutions.
-
-## Base model (U-Net)
-Let's begin with examining whether a U-Net with convolutional layers can be used to learn the dynamics. U-Net<d-cite key="RonnebergerFB15"></d-cite> is a popular model architecture for image to image predictions and image segmentation tasks. It consists of a series of downsampling and upsampling layers with skip connections, and my re-implementation is based on [this repo](https://github.com/khassibi/fourier-neural-operator/blob/main/UNet.py). 
-
-We can use the U-Net to learn the features from the input PDE solution frames and predict the solution in the next time step, treating the 2D solution as an image. As for the time component, the surrogate model takes the input solution from the previous k time steps to predict solution in the next k+1 time step. Then, the solution from the previous k-1 steps are concatenated with the predicted solution as the input back into the model to predict the next step, and so on. In a nutshell, the model is trained to predict autoregressively. 
-
-<div style="text-align: center; margin-right: 10px;"> 
-    <div style="width: 70%; margin: auto;"> 
-        {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss.png" class="img-fluid" %}
-    </div>
-    <p style="margin-top: 0px;">Training curve for U-Net with average relative L2 train and test loss</p>
-</div>
-
-<div style="text-align: center; margin-left: 10px;"> 
-    {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_2dt_nspred42.gif" class="img-fluid" %}
-    <p style="margin-top: 0px;">U-Net's prediction of 2D Navier-Stokes for unseen test set (id=42)</p> 
-</div>
-
-The U-Net seems to predict well for the 2D Navier-Stokes test set. However, the average final test loss of 0.0153 is still considerably high. For longer time rollout, the errors can accumulate. Let's examine the FNO2d-t and FNO3d models next.
-
-## Fourier Neural Operators 
-Fourier neural operators (FNOs) <d-cite key="li2020fourier"></d-cite> try to learn the mapping between input functions and solution functions <d-cite key="kovachki2021neural"></d-cite>, for example, mapping the solutions from earlier to later time steps for time-dependent PDEs. 
-
-The authors introduced the Fourier layer (SpectralConv2d for FNO2d) which functions as a convolution operator in the Fourier space, and complex weights are optimized in these layers. The input functions are transformed to the frequency domain by performing fast Fourier transforms (torch.fft) and the output functions are then inverse transformed back to the physical space before they are passed through nonlinear activation functions (GeLU) to learn nonlinearity. Fourier transformations are widely used in scientific and engineering applications, such as in signal processing and filtering, where a signal / function is decomposed into its constituent frequencies. In the FNO, the number of Fourier modes is a hyperparameter of the model - the Fourier series up till the Fourier modes are kept (i.e. lower frequency modes are learnt) while higher frequency modes are truncated away. Notably, since the operator kernels are trained in the frequency domain, the model is theoretically capable of predicting solutions that are resolution-invariant. 
-
-### Applying FNO2D and FNO3D on 2D Navier-Stokes time-dependent PDE 
-We reimplement and train the FNO2D model on the same train-test data splits for the 2D Navier-Stokes solution. Notably, the final average relative L2 loss (for test set) is 0.00602 after 500 epochs of training. Comparing this with the U-Net that is also trained and predicted with the same scheme, the FNO2D has an improved performance! 
-
-<div style="text-align: center; margin-left: 10px;"> 
-    {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/2dt_nspred42.gif" class="img-fluid" %}
-    <p style="margin-top: 0px;">FNO2D's prediction of 2D Navier-Stokes for unseen test set (id=42)</p> 
-</div>
-
-The predicted solutions look impressive and it seems like the dynamics of the multiscale system are learnt well, particularly the global dynamics. Likewise, the FNO3D gives similar results. Instead of just convolutions over the 2D spatial domains, the time-domain is taken in for convolutions in the Fourier space as well. According to the authors, they find that the FNO3D gives better performance than the FNO2D for time-dependent PDEs. However, it uses way more parameters (6560681) compared to FNO2D (928661 parameters) - perhaps the FNO2D with recurrent time is sufficient for most problems.
-
-<div style="text-align: center; margin-right: 10px;"> 
-    <div style="width: 70%; margin: auto;"> 
-        {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss.png" class="img-fluid" %}
-    </div>
-    <p style="margin-top: 0px;">Training curve for FNO3D with average relative L2 train and test loss</p>
-</div>
-
-<div style="text-align: center; margin-left: 10px;"> 
-    {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/nspred42.gif" class="img-fluid" %}
-    <p style="margin-top: 0px;">FNO3D's prediction of 2D Navier-Stokes for unseen test set (id=42)</p> 
-</div>
-
-### Representation learning in the Fourier layers 
-You might be curious how the Fourier layers learn the Navier-Stokes dynamics - let's examine some weights in the SpectralConv3d layers (for the FNO3D). We take the magnitudes of the complex weights from a slice of each layer (4 Fourier layers were in the model). 
-
-<div style="text-align: center; margin-right: 10px;">
-    {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers.png" class="img-fluid" %}
-    <p style="margin-top: 0px;">Visualizing weights in the Fourier layers</p>
-</div>
-
-There seems to be some global features that are learnt in these weights. By learning in the Fourier space, the Fourier layers capture sinusoidal functions that can generalise better for dynamics according to the dynamical system's decomposed frequency modes. For CNNs, we know that the convolutions in spatial domain would lead to the learning of more local features (such as edges of different shapes), as compared to more global features learnt in Fourier layers. 
-
-### On the importance of positional embeddings 
-In FNO implementations, besides the input data for the 2D + time domains, the authors also append positional encodings for both x and y dimensions so the model knows the location of each point in the 2D grid. The concatenated data (shape = (B, x, y, 12)) is then passed through the Fourier layers and so on (note: B is the batch size, x and y the spatial sizes, and 12 consists of 10 t steps and 2 channels for positional encodings along x and y). It is important to understand that the positional embedding is very important to the model performance.  
-
-<div style="display: flex; justify-content: center; align-items: center;">
-    <div style="text-align: center; margin-right: 10px;">
-        {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt.png" class="img-fluid" %}
-        <p style="margin-top: 0px;">Original with positional encoding</p>
-    </div>
-    <div style="text-align: center; margin-left: 10px;">
-        {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt.png" class="img-fluid" %}
-        <p style="margin-top: 0px;">No positional encoding</p>
-    </div>
-</div>
-
-We train the same FNO3D on the same data but this time without the positional encodings concatenated as the input. Simply removing these positional encodings for x and y domains cause the model to underperform. Here, we are comparing between FNO3D with and without positional encoding. FNO3D has a final relative test loss of 0.0106 but the test loss is 0.0167 without positional encodings. Inspecting the change of x over t for a sample test dataset, it then becomes more visible the differences in performances. Note that we also observe the data have well-defined sinusoidal functions in the dynamics.
-
-## Improving accuracies in predicting local dynamics and long-term rollouts in time-dependent PDEs
-Let's apply the FNO to other PDEs, particularly problems where local dynamics and long-term accuracies are important. Here, I introduce another PDE as an example - a coupled reaction heat-diffusion PDE with two dependent states<d-cite key="Robertson2018"></d-cite>. 
-
-$$
-\begin{gather}
-\kappa \frac{\partial^2 T}{\partial x^2} + \rho H_r \frac{\partial \alpha}{\partial t} = \rho C_p \frac{\partial T}{\partial t} \\
-\frac{\partial \alpha}{\partial t} = A \exp \left( -\frac{E}{RT} \right) f(\alpha)
-\end{gather}
-$$
-
-Based on the initial conditions of temperature (T) and degree of cure (alpha) and with Dirichlet boundary conditions on one end of the sample, the T and alpha propagate across the domain (here, the 1D case is examined). For certain material parameters and when initial conditions of T and alpha are varied, we can see that the dynamics can become chaotic after some time - we can visualize it below. 
-
-<div class="l-body-outset">
-  <iframe src="{{ 'assets/html/2023-11-05-neural-PDEs-long-time-dynamics/unstablefromp.html' | relative_url }}" frameborder='0' scrolling='no' height="750px" width="100%"></iframe>
-</div>
-<div class="caption">
-Solution of the above coupled PDE with 2 dependent states, solved using FEM. Drag the slider!
-</div>
-
-For this dataset, we aim to use the first 10 time steps of the solution (heat diffusion from x=0) as input to a neural PDE to predict the next N time steps of the solution. With 10 steps, we predict the 11th step and the prediction is concatenated with the last 9 steps to predict the next time step and so on. We first generate the training data by solving the PDE numerically using the Finite Element Method using the FEniCS package. Specifically, we use mixed finite elements with the continuous Galerkin scheme and a nonlinear solver with an algebraic multigrid preconditioner. 
-
-We use 1228 solutions for the training set and 308 solutions for the test set. The datasets are split into pairs of 10 trajectories, whereby the input data consists the solution of 10 time steps and the output data (to be predicted) consists the solution of the next 10 time steps. Since the neural PDE is trained to predict 10 to 1 time step, every batch is trained autoregressively and an L2 loss is taken for all 10 forward predictions before the sum is backpropagated in every batch. Likewise, the AdamW optimizer is used with an initial learning rate of 1e-4 and a cosine annealing scheduler. The models are trained for 300 epochs with a batch size of 16. 
-
-I initially tried the FNO1D implementation on my PDE dataset and notice that the errors accummulate with longer time rollouts using the trained model. FNO1D is used since we only have 1 spatial dimension in the 1D solution and the solutions are predicted recurrently, just like the use of FNO2D for the 2D Navier-Stokes example earlier. The FNO2D model was also used to convolve over both x and t. Both performances are not ideal within 1 cycle of forward prediction. 
-
-### RevIN and other training tricks to improve accuracies in longer temporal rollout
-To overcome this problem, there have been attempts to generally improve the accuracies of neural PDE models and also training tricks proposed to improve long-term accuracies in rollout. Using the FNO1D, I first tested out some training tricks, such as the pushforward and temporal bundling which are covered in the paper on message passing neural PDEs<d-cite key="brandstetter2022message"></d-cite>. Incorporating the reversible instance normalization layer (RevIN)<d-cite key="kim2022reversible"></d-cite> gives a more promising improvement. With ReVIN, the input solution is passed through the normalizing RevIN layer before the FNO1d and the output solution is denormalized through the RevIN layer. Some examples of these tricks for longer term robust forecasting are covered in [this repo](https://github.com/Rui1521/Symmetry-Tutorial/blob/main/Tutorial_Symmetry.ipynb). 
-
-Using a trained FNO1D with a RevIN layer, here is its prediction on an unseen test set starting from the first 10 time steps as the input solution. The true solution is used to predict up till 50 more time steps forward (5 full cycles forward). While the temperature is predicted with decent accuracies for first cycle (10 steps forward until t=60 shown), the errors accumulate over more steps.
-
-<div style="text-align: center; margin-left: 10px;"> 
-    {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt156.gif" class="img-fluid" %}
-    <p style="margin-top: 0px;">FNO1d's prediction (1)</p> 
-</div>
-
-Generally, we attribute this to the fact that the Fourier layers may not be able to learn more local changes in the dynamics since the higher frequency modes in the Fourier series are truncated away. The global dynamics of the propagating front (heat diffusion along x) are captured reasonably well (the positional encodings probably also have a large part to play). We want to build on the FNO to improve predictions for longer temporal rollout especially for multiscale dynamical systems with both global and local changes. Ideally, we want to take an input of a few time steps from a more expensive numerical solver and pass it through a trained surrogate model to predict N time steps (with N being as high as possible). 
-
-## Introducing Large Kernel Attention
-To overcome the problems highlighted for this PDE, we attempt to include a large kernel attention layer (LKA) that was introduced in the Visual Attention Network paper<d-cite key="guo2022visual"></d-cite> by Meng-Hao Guo et. al. The large kernel attention was first introduced as an alternative to the Vision Transformers (ViT) to enable higher spatial adapativity and long-range correlations. While simple to implement, the authors' Visual Attention Network surpasses ViTs and CNNs on tasks such as object detection and pose estimation. A similar strategy was introduced last month<d-cite key="zhao2023local"></d-cite> for 2D problems although their implementation was not shared.
-
-Therefore, it may be feasible to introduce attention mechanisms to learn local dynamics in PDEs better, and this can complement the Fourier layers which capture global dynamics better. Herein, we add the LKA layers after the Fourier blocks for the FNO1D, and the new model has 5056 more parameters (583425 to 588481). The performance is found to have greatly improved, especially for local dynamics in the unstable propagations. 
-
-<div style="text-align: center; margin-left: 10px;"> 
-    {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt156.gif" class="img-fluid" %}
-    <p style="margin-top: 0px;">FNO1d + LKA's prediction (1)</p> 
-</div>
-
-For the same data, the addition of LKA gave improved accuracies over predictions in the next 50 time steps. We attribute this to the large kernel attention's ability to focus on local dynamics at specific parts of the spatiotemporal changes. The LKA has 3 components: a spatial depth-wise convolution, a spatial depth-wise dilation long-range convolution, and a channel convolution.  
-
-$$
-\begin{gather}
-\text{Attention} = \text{Conv}_{1 \times 1}(\text{DW-D-Conv}(\text{DW-Conv}(F))) \\
-\text{Output} = \text{Attention} \otimes F
-\end{gather}
-$$
-
-I adapted from the LKA's [original implementation](https://github.com/Visual-Attention-Network/VAN-Classification/blob/main/models/van.py) to apply to our 1D PDE. Let's examine the predictions on another test data. 
-
-<div style="display: flex; justify-content: center; align-items: center;">
-  <div style="text-align: center; margin-left: 2px;"> 
-      {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt876.gif" class="img-fluid" %}
-      <p style="margin-top: 0px;">FNO1d's prediction (2)</p> 
-  </div>
-  <div style="text-align: center; margin-left: 2px;"> 
-      {% include figure.html path="assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt876.gif" class="img-fluid" %}
-      <p style="margin-top: 0px;">FNO1d + LKA's prediction (2)</p> 
-  </div>
-</div>
-
-While the predictions are significantly improved, the errors still accumulate with longer rollouts and the model fails to capture dynamics if we extend predictions till 100 steps forward. More work is needed to improve existing neural PDE methods before they can be used as foundational models for PDEs.
-
-## Conclusion 
-In this article, we have introduced the use of neural networks as potential surrogate model solvers for partial differential equations that can be expensive to solve using numerical methods. Compared to the base model U-Net, Fourier neural operators have introduced a novel and useful way of learning PDE solutions through convolutions in the frequency space. We first reimplemented the FNO2D and FNO3D on the 2D Navier-Stokes PDE solution shipped with their paper. While it achieves great performance learning global dynamics, existing models struggle to capture local dynamics (higher frequency modes are truncated away) and longer temporal rollouts. We demonstrate that despite adding a RevIN layer and several temporal training tricks, the FNO1D could not predict accurately the solutions of a coupled time-dependent PDE. With the inclusion of attention mechanism through the large kernel attention, the FNO1D's performance significantly improved. We learn that introducing spatial attention can be useful and more work will be explored to improve predictions of multiscale spatiotemporal dynamical systems.
diff --git a/_posts/2023-11-06-attention-regularization.md b/_posts/2023-11-06-attention-regularization.md
deleted file mode 100644
index 8317819b..00000000
--- a/_posts/2023-11-06-attention-regularization.md
+++ /dev/null
@@ -1,256 +0,0 @@
----
-layout: distill
-title: Regularization Techniques for Attention Layers in Transformer Models
-description: Attention layers are an integral part of the success of transformer models, but can also lead to overfitting on parts of input data when there is limited training data. Therefore, researchers have proposed methods to regularize attention layers to reduce overfitting and increase generalizability. This blog will analyze popular methods and explore novel approaches to regularization in attention layers.
-date: 2023-11-06
-htmlwidgets: true
-
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-
-authors:
- - name: Jamison Meindl
-
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-06-attention-regularization.bib 
-
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
- - name: Introduction
- - name: Background
- - name: Methodology
- - name: Results
- - name: Further Research
- - name: Conclusion
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
- .fake-img {
-   background: #bbb;
-   border: 1px solid rgba(0, 0, 0, 0.1);
-   box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-   margin-bottom: 12px;
- }
- .fake-img p {
-   font-family: monospace;
-   color: white;
-   text-align: left;
-   margin: 12px 0;
-   text-align: center;
-   font-size: 16px;
- }
----
-
-
-## Introduction
-
-
-Transformer models are exceptionally popular and successful at completing many tasks. However, they can overfit to their training data if they are not given enough data to generalize. Frequently, part of the reason for overfitting is the overfitting of a self-attention layer, which highlights important tensors in the model. However, if there is not enough data, the attention layer can overfit to the training data and highlight some tensors too much. Therefore, researchers have proposed methods of regularizing attention layers. Adding regularization can be complex and there have been many different approaches to solving this issue, from simply smoothing attention layers to encouraging multi-headed models to approach different solutions. Therefore, there are differences in the effects of different regularization methods and some might perform better in different circumstances. There does not seem to be a standard approach to dealing with this form of regularization and while many authors have claimed their regularizations have positive effects on training, there are few comparisons of regularization methods. In this study, we will analyze previous work on regularizing self-attention layers and propose new regularization techniques to identify the advantages and disadvantages of differing models.
-
-## Background
-There are many proposed regularization strategies for self-attention layers. We implement and utilize many of the more popular strategies in this study while also drawing inspiration from other methods in proposed methodologies. However, we could not find comparisons across regularization methods or implementations of these methods publicly available. Therefore, we implemented previously proposed strategies and proposed new regularization strategies based on methods seen in fully connected neural networks. The methods used fall into the following three categories. We will explain the exact methods and implementations used for each of these three categories in the methodology section. They represent a solid overview of the self-attention regularization space and contain the most popular methods currently in use.
-
-### Dropout Based Methods
-Dropout based methods involve randomly setting a specified fraction of the input units to zero during training time, which helps in preventing overfitting <d-cite key = "srivastava2014dropout"></d-cite>. This prevents the model from having all the information during training and therefore forces the model to generalize during training. 
-
-### Weight Smoothing Methods
-Weight smoothing methods aim to regularize the self-attention layer by modifying the weights such that the attention weight are closer to the uniform distribution and do not overly emphasis specific inputs. This helps prevent overfitting by not allowing the model to only use a few inputs <d-cite key ="lohrenz2023relaxed"></d-cite>.
-
-### Cross Head Methods
-Cross head methods involve techniques that operate across different attention heads, aiming to diversify the learned representations and prevent redundancy <d-cite key = "li2018multi"></d-cite>. Therefore, the goal is to prevent each head from being similar to other heads.
-
-## Methodology
-### Overall Architecture
-We begin by implementing and benchmarking a vision transformer with no regularization. We had previously implemented a transformer model as part of 6.s898 problem set 3, so we used this as basis for our models. This model follows an architecture stemming from An Image Is Worth 16X16 Words <d-cite key = "dosovitskiy2020image"></d-cite>. This transformer was easily modifiable and relatively simple and so it served as a good basis for our adjustments. The framework of the architecture goes as follows.
-
-
-1. Take an image and split it into patches of specified size.
-2. Embed these patches and add a positional encoding to their embedding.
-3. Treat these embeddings as a sequence input to a transformer model.
-4. Use a transformer model with multi-head self-attention to transform the input into some specified space.
-5. Use this output to classify the image.
-
-
-For this specific model, we use a 6 layer transformer with 5 self-attention heads and a patch size of 4. We will be focusing on the multi-head self-attention phase of the transformer model. The following is a diagram of the overall architecture of a vision transformer.
-
-
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3.jpg" class="img-fluid" %}
-</div>
-<div class="caption">
-   Diagram of Vision Transformer Model <d-cite key = "dosovitskiy2020image"></d-cite>
-</div>
-
-### Data
-
-We use the CIFAR-10 and CIFAR-100 datasets for this study <d-cite key = "krizhevsky2009learning"></d-cite>. CIFAR-10 consists of 60,000 32x32 color images representing 10 different classes. These classes are airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks. They are evenly distributed, such that there are 6,000 images of each class. CIFAR-100 uses the same format, but instead has 100 evenly distributed classes. We split this data into training and test sets and tested the different forms of regularization. We found that our transformer model with no regularization could easily achieve near-zero error on both sets of training data, but only achieved around 60% in test accuracy for the CIFAR-10 dataset and around 30% accuracy on the CIFAR-100 dataset. Therefore, the model is overfitting to the training data and testing regularization methods on this dataset could help the model generalize more on the test data.
-
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/cifar_10_example.png" class="img-fluid" %}
-</div>
-<div class="caption">
-   Example of CIFAR-10 Images <d-cite key = "krizhevsky2009learning"></d-cite>
-</div>
-
-### Regularization Methods
-
-
-We tested the following regularization methods for our model. We tested models contained within the three different categories of regularization mentioned in the background above, these being dropout based methods, weight smoothing methods, and cross-head methods.
-
-
-#### Dropout Based Methods
-
-##### DropColumn <d-cite key = "zehui2019dropattention"></d-cite>:
-This method uses dropout, a common regularization method used in fully connected neural networks, in self-attention layers. This can force the model to generalize better and not rely on specific inputs as much. The authors propose the following methodology to add dropout to the model, which is similar to standard dropout techniques in neural networks. To perform dropout, each column in the attention weight matrix is sampled from a Bernoulli distribution with some probability. We use a dropout ratio of 0.2 for these experiments. We set the sampled columns to zero weight during training. Therefore, we are able to randomly drop columns in the attention weight matrix.
-
-
-##### DropHead <d-cite key = "zhou2020scheduled"></d-cite>:
-We can also perform dropout on the heads across the multi-head attention layer. With this method, we completely drop heads during training to reduce reliance on particular heads and increase the generalizability of the model. We use a dropout ratio of 0.2 for these experiments. This prevents the model from being dominated by a few attention heads.
-
-
-#### Weight Smoothing Methods
-
-##### Relaxed Attention <d-cite key ="lohrenz2023relaxed"></d-cite>:
-This method smooths the attention weights in the self-attention layer to reduce overfitting. This helps reduce the magnitude of the highest attention scores. We do this by mixing in the uniform distribution to attention weights during training. We use some parameter $ \color{white} \gamma $ to evaluate different levels of mixing. Therefore, we apply the following function to our self-attention weights.
-
-
-$ \color{white} A[i,j] = (1-\gamma) \times A[i,j] + \gamma \times \frac{1}{T}, \quad \forall i, j \in [0,1,...,T]$
-
-
-We use $ \color{white} \gamma = 0.1 $ for our experiments. This adds a low level of uniformity but prevents the model from only attending upon a small number of tensors during training. Therefore, this should limit the amount of overfitting that is possible.
-
-
-##### Noise Injection
-
-
-Noise injection has been used to regularize fully connected neural networks, but we have not found any literature that proposes using noise injection to regularize self-attention layers. We propose two methodologies to add regularization and robustness to our model training. We inject noise into our input embeddings with the following formula.
-
-$ \color{white} x_{i,j}^{noised} = x_{i,j}+ \frac{1}{100} * median(x) * N(0,1) $
-
-1. Overall Noise Injection:
-The first methodology involves simply adding noise to the input during training. We do this by adding Guassian random noise to the input before calculating self-attention weights in each layer of the transformer.
-
-2. Individual Head Noise Injection:
-Our second proposed methodology takes advantage of the multi-headed transformer design. We add different Gaussian random noise to each head, such that the heads will receive different inputs. Therefore, the model must become more robust to different inputs.
-
-#### Cross-Head Methods
-
-##### Decorrelation
-We propose adding a decorrelation term to our loss function. The goal of this loss is the reward differences across attention heads. We begin by calculating the self-attention weights for all of the attention heads. We then compute the pairwise dot products of each head's attention weights. This will increase the loss if there are heads that are highly correlated. This will cause the heads of the network to differ from the other heads in the network and hopefully generalize better. Therefore, we use the following loss term.
-
-$ \color{white} \text{Added Loss} = \sum_{i={0,...,H},j={i+1,...,H}} \frac{\text{sum}((\Lambda_i^T \Lambda_j)^2)}{\text{Number of elements in }\Lambda_i^T \Lambda_j}$, where H is the number of heads and $ \color{white} \Lambda_i$ is the ith attention head weights.
-
-This method is inspired by another method, multi-head attention with disagreement regularization <d-cite key = "li2018multi"></d-cite>. However, the disagreement regularization method relies on calculating more differences than just the attention weight matrices, which is out the of scope of these experiments.
-
-##### Normalization
-We propose adding the 2-norm of all elements in the attention weight matrix to the loss function to limit the emphasis of individual inputs. Therefore, this will smooth the weights and reward more uniform predictions. This should reduce overfitting and make the model more generalizable. We calculate this norm using $ \color{white} \frac{\text{torch.linalg.norm(attention weights)}}{\text{number of elements in attention weights}} $. This computes the 2-norm of all elements across attention heads and adds more loss to weights that emphasize specific inputs more than others. Therefore, this should add smoothing to the weights.
-
-### Training
-We train each model for 25 epochs on the full training set with a batch size of 256. We use the AdamW optimizer, with a learning rate of 0.001. We use the following parameters for our vision transformer.
-
-| Parameter | n_channels | nout | img_size | patch_size | dim | attn_dim | mlp_dim | num_heads | num_layers |
-|-|-|-|-|-|-|-|-|-|-|
-| CIFAR-10 | 3 | 10 | 32 | 4 | 128 | 64 | 128 | 5 | 6 |
-| CIFAR-100 | 3 | 100 | 32 | 4 | 128 | 64 | 128 | 5 | 6 |
-
-We train each model individually on both datasets.
-
-## Results
-### CIFAR-10
-We begin by analyzing the training results on the CIFAR-10 dataset.
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/training_loss.png" class="img-fluid" %}
-</div>
-<div class="caption">
-   Training Loss on the CIFAR-10 Dataset
-</div>
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/training_accuracy.png" class="img-fluid" %}
-</div>
-<div class="caption">
-   Training Accuracy on the CIFAR-10 Dataset
-</div>
-We see that most of the models, except for the dropout based models, achieve near zero error and perfect accuracy on the test set. Therefore, we see that the dropout term is stopping the model from perfectly memorizing the dataset but all other regularization techniques are not forcing the model to change the weights enough to prevent perfect accuracy.
-
-
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/test_loss.png" class="img-fluid" %}
-</div>
-<div class="caption">
-   Test Loss on the CIFAR-10 Dataset
-</div>
-Looking at the test results, the two dropout models have much lower loss achieved on the test dataset. The rest of the models have similar losses on the test dataset.
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/test_accuracy.png" class="img-fluid" %}
-</div>
-<div class="caption">
-   Test Accuracy on the CIFAR-10 Dataset
-</div>
-
-
-We see that the two dropout methods also have higher accuracy than the model without regularization. However, the decorrelation model has the highest test accuracy. Overall, the test dataset results are significantly lower than state of the art and a more advanced model may be needed to achieve better performance.
-### CIFAR-100
-
-
-We move on to training and testing the models on the CIFAR-100 dataset. This dataset has more classes and therefore fewer examples of each class. Therefore, the model finds it more difficult to generalize on the test dataset.
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/training_loss100.png" class="img-fluid" %}
-</div>
-<div class="caption">
-   Training Loss on the CIFAR-100 Dataset
-</div>
-Again, we see that all methods except the dropout based methods achieve near-zero error.
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/training_accuracy100.png" class="img-fluid" %}
-</div>
-<div class="caption">
-   Training Accuracy on the CIFAR-100 Dataset
-</div>
-We see similar results to the CIFAR-10 dataset in training. The two dropout methods are unable to achieve perfect loss and accuracy but all other methods are able to. This includes the methods with added loss, that being the normalization method and the decorrelation method. This will depend on the parameters of the model and these models would have higher loss if we used more emphasis on the added loss.
-
-
-
-
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/test_loss100.png" class="img-fluid" %}
-</div>
-<div class="caption">
-   Test Loss on the CIFAR-100 Dataset
-</div>
-We see that the two dropout methods have significantly lower loss on the test dataset, with all other methods performing similarly.
-
-
-<div class="row mt-3">
-   {% include figure.html path="assets/img/2023-11-06-attention-regularization/test_accuracy100.png" class="img-fluid" %}
-</div>
-<div class="caption">
-   Test Accuracy on the CIFAR-100 Dataset
-</div>
-We again see consistent results with the CIFAR-10 dataset. The two dropout methods and decorrelation improve the accuracy on the test set, while the others are about the same as without normalization. In this case, the drophead method performs the best.
-
-
-
-
-## Further Research
-
-
-Further research is needed to further improve the generalizability of this transformer architecture for these datasets. The model still has overfitting issues, even with high regularization and so more research with different architectures or regularization methods is needed to improve the study. Further comparison of regularization methods on alternative datasets and types of data, such as text, would also be valuable to look at.
-
-
-## Conclusion
-
-
-Regularization is an important tool to reduce overfitting and improve the generalizability of a model. The results show that adding various forms of regularization can improve the results of a model, but our implementations did not cause dramatic change to the ability of the model to generalize to the test set. Most of the models still had a very large gap between their training accuracy and test accuracy. However, we did see notable improvements for both the dropout models and the decorrelation model. The dropout models were the only models that added regularization such that the model could not perfectly memorize the training set. Therefore, their training accuracy was significantly lower but they also had higher test accuracy. Additionally, the decorrelation model was also successful. While the model followed a similar pattern during training to the model without regularization, the test accuracy was generally higher, suggesting the added error did force the model to learn different parameters. Therefore, based on these results, adding regularization can be helpful in improving the generalizability of transformer models, especially when they have limited data. The other methods, such as the noise based methods, normalization, and relaxation did not appear to have a significant effect on training or test outputs. It is likely that alternative parameters or architectures are needed to realize their effect. Lastly, while this analysis was only completed using vision transformers, different datasets or network architectures may have significantly different results. Therefore, these other regularization methods may be more successful in other contexts. However, these tests prove that there are circumstances in which regularization can have a beneficial effect on transformer performance and is therefore a worthwhile experiment when dealing with overfitting transformers.
-
-
-
diff --git a/_posts/2023-11-07-Language-Bias.md b/_posts/2023-11-07-Language-Bias.md
deleted file mode 100644
index 40b2c50d..00000000
--- a/_posts/2023-11-07-Language-Bias.md
+++ /dev/null
@@ -1,371 +0,0 @@
----
-layout: distill
-title: Understanding Bias in Speech to Text Language Models
-description: Do language models have biases that make them better for latin based languages like English? To find out, we generate a custom dataset to test how various language features, like silent letters, letter combinations, and letters out of order, affect how speech2text models learn and compare these results with models trained on real human language. 
-date: 2023-11-07
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Shreya Karpoor
-    url: 
-    affiliations:
-      name: MIT
-  - name: Arun Wongprommoon
-    url: 
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-07-Language-Bias.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Motivation
-    subsections: 
-    - name: Past Work
-  - name: Generating a Dataset
-    subsections:
-    - name: Silent Letters
-    - name: Letter Combos
-    - name: Letters Out of Order
-  - name: Controlled Experiments
-    subsections:
-    - name: Results
-    - name: Corners Cut
-  - name: Real Language
-  - name: Learnings
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Motivation
-
-With all the buzz that ChatGPT is getting recently, it is clear that machine learning models that can interact with humans in a natural manner can quite literally flip the world around. If that is not enough proof, Siri and Google Assistant, their popularity and convenience can give you a bit more of an idea. We can see how speech processing is important as a way for humans and computers to communicate with each other, and reach great levels of interactivity if done right. A lot of the world’s languages do not have written forms, and even those that do, typing can be less expressive and slower than speaking.
-
-The core of these assistant systems is automatic speech recognition, often shortened as ASR or alternatively speech2text, which we will be using. This problem sounds rather simple: turn voice into text. However easy it might sound, speech2text is far from solved. There are so many factors that affect speech that makes it extremely difficult. First, how do we know when someone is speaking? Most speech2text models are trained on and perform well when the audio is clean, which means there is not a lot of noise. In the real world, however, one can be using speech2text in a concert or a cocktail party, and figuring out who is currently speaking to the system amid all the noise is a problem in itself! Another important factor that complicates speech2text is that we don’t all talk the same way. Pronunciations vary by person and region, and intonation and expressiveness change the acoustics of our speech. We can see this in full effect when auto-generated YouTube caption looks a bit.. wrong.
-
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/reddit.png" class="img-fluid" caption="From https://www.reddit.com/r/funny/comments/ha7dva/youtube_auto_captions_spot_on/"%}
-
-Aside from who and how we talk, another big part that makes speech2text hard has to do with the idiosyncrasies of text and languages itself! Some idiosyncrasies of language include orthography, the system of how we write sounds and words, and syntax, the system of how words string together into sentences. If you are familiar with English, you would be familiar with the English syntax: subject, verb, object, and a particular order for adjectives. We would instinctively say “small white car,” but not “white small car” and most definitely not “car white small.” Cross over the English channel to France (or the St. Lawrence River to Quebec), and the order changes. For French, you would say “petite voiture blanche,” which word for word is “small car white.”
-
-Travel a bit further and you would see that Chinese uses “白色小车” (”white color small car”), Thai uses “รถสีขาวคันเล็ก” (”car color white * small”) and Kannada uses “ಸಣ್ಣ ಬಿಳಿ ಕಾರು” (”small white car”, same as English). Aside from order of adjectives, larger differences in syntax include having the subject appear first or last in a sentence, position of verbs, and how relative clauses work. All this means that language is quite non-linear, and natural language models that understand language must cope with our silly little arbitrary orders!
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/twitter_meme.png" class="img-fluid" caption="From https://www.bbc.com/news/blogs-trending-37285796"%}
-
-Thankfully though, for speech2text how sentences work is not as important as how phonetics and orthography works. But even then, things are not quite smooth sailing either. We sometimes take for granted how difficult reading is, perhaps until you start to learn a second language and realize how much we internalize. English is notorious for not spelling words the way it sounds, mostly because writing was standardized a long time ago and pronunciation has shifted since. This makes it difficult for machine learning models to try learn.
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/ought.png" class="img-fluid" caption="Sentence from https://en.wikipedia.org/wiki/Ough_(orthography)"%}
-
-Wow, look at all those words with “ough”! There are at least eight different pronunciations of the word, or from another point of perspective, at least eight different audios magically turn out to be spelt the same! In the diagram we tried substituting the red “ough”s to their rhymes in blue, keeping in mind that some dialects pronounce these words differently (especially for “borough”), and in green is the International Phonetic Alphabet representation of the sounds. IPA tries to be the standard of strictly representing sounds as symbols. What’s at play here? English is plagued with silent letters (”knight”), and extraneous letters (all the “ough”s and more).
-
-Some languages are more straightforward in their orthography than others. Spanish tends to be fairly phonemic, which pretty much means that their writing and speaking are quite in sync. <d-cite key="orthography"></d-cite> French, however, is very famous for its silent letters. A word like “hors-d’oeuvres”, which means appetizer, can be represented in IPA as [ɔʁ dœvʁ], you may see that around half the letters aren’t pronounced! Kannada, a language in South India that is spoken by one of our group members, is said to be quite phonemic, but doesn’t come without a different kind of headache. A number of languages, predominantly in South Asia and Southeast Asia, use a kind of writing system that combines a consonant character with a vowel character to form a new character that represents the consonant-vowel combination. The new character retains some part of the original consonant and vowel in some cute manner, kind of like the letter **Æ** but dialed up many notches. Most abugida systems descend from the 3rd century BCE Brahmi script.
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/brahmi.png" class="img-fluid" %}
-
-Above are some examples of scripts of this type, demonstrating two consonants k and m combining with vowels long a, i and u. Another interesting feature for some of these writing systems is that sometimes the vowels jump to the front, for example in Thai ก (k) + เ (e) = เก (ke). Again, writing is non-linear at times!
-
-### Past Work
- Past work shows success in training speech2text models in German, Spanish, and French <d-cite key="parp"></d-cite>, <d-cite key="german"></d-cite>. Some use pruning and finetuning of state of the art English models, and others train models from scratch for each language. Other works such as <d-cite key="thaispeech"></d-cite> show that models can learn less common languages, like Thai which is the language our other group member speaks, as well, but they are more complex and specific to each language. <d-cite key="parp"></d-cite> circumvents this by pruning wav2seq (a SOTA speech2text model) and finetuning the model for different languages. While this showed promising results, we wanted to dive deeper to understand, from a linguistic and data driven perspective, the biases that *simple* speech2text models had. 
-
- Many state of the art models rely on encoder-decoder models. An encoder is used to create an expressive feature representation of the audio input data and a decoder maps these features to text tokens. Many speech models like <d-cite key="data2vec"></d-cite>, <d-cite key="wav2letter"></d-cite>, <d-cite key="contextNet"></d-cite> also use self-supervised pretraining on the encoder for better performance. One example is the Wav2Seq. Wav2Seq uses unsupervised pretraining to annotate audio samples with unique characters in the form of a psuedo language. The building blocks for these encoders are generally transformer based <d-cite key="wav2seq"></d-cite>.
-
-Other methods use deep recurrent neural networks like in  <d-cite key="cs_toronto"></d-cite>. RNNs are great at sequential tasks and have an internal memory to capture long term dependencies. Transformer based methods have outperformed RNNs and LSTM based architectures now though.
-
-How do these features (idiosyncrasies) differ between languages and does this affect how well speech2text models learn? By doing more ablation studies on specific features, maybe this can inform the way we prune, or choose architecture, and can also help determine the *simplest* features necessary in a speech2text model that can still perform well on various languages. 
-
-There has been work that perform ablation studies on BERT to provide insight on what different layers of the model is learning <d-cite key="ganesh2019"></d-cite>. Experiments suggest lower layers learn phrase-level information, middle layers learn syntactic information, and upper layers learn more semantic features. We want to do a similar study, but on dissecting the components of language rather than the components of a particular SOTA model. Our hypothesis is that by doing so, we can be better informed when selecting preprocessing methods and models. 
-
-Let's get started with some experiments!
-
-## Generating a Dataset
-
-We want to explore how each of these language features affects how speech2text models learn. Let’s create a custom dataset where we can implement each of these language rules in isolation. To do that, we’ll build out our own language. Sounds daunting — but there are only a key few building blocks that matter to us. Languages are made of sentences, sentences are made of words, words are made of letters, and letters are either consonants or vowels. Let’s start with that. 
-
-From <d-cite key="prehistoric_speech"></d-cite>, languages have 22 consonants on average and about 9 vowels on average so that’s what we’ll have in our language too. We represent consonants as positive integers from 1 to 23 and vowels as negative integers from -9 to -1. After all, letters are just symbols!
-
-A word, at it’s most crude representation, is just a string of these consonants and vowels at some random length. To make sentences, we just string these words together with spaces, represented by 0, together.
-
-Here’s a sample sentence in our language:
-
-```
-[14 -2 -9 13  0  8 16 -8 -2  0 -3 -8 16 12  0 10 20 -3 -7  0 14 18 -9 -4
-  0 16 -3 -5 14  0 -3  9 -8  3  0 -9 -1 22  7  0 12 -5  6 -7  0 -7 22 12
- -2  0 22 -9  2 -2  0 17 -2 -8  9  0  1 -4 18 -9  0 19 -7 20 -2  0  8 18
- -4 -2  0 -9  8 -4 15  0 -9 -2 22 18]
-```
-
-Ok, that seems a little meaningless. We don’t have to worry about meaning in the general semantic sense though. What we do care about, is pronouncing this language, and creating a mapping from these written sentences to an audio sample. Let’s do that next. Audio samples can be represented as spectrograms. Spectrograms give us a visual representation of audio by plotting the frequencies that make up an audio sample. 
-
-Here’s an example: 
-
-When we say **“It’s never too early to play Christmas music”**, this is what it might look like visually:
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/christmas_spectrogram.png" class="img-fluid" %}
-
-The key here is that we don’t exactly need audio samples, but rather an embedding that ***represents*** an audio sample for a written sentence. Embeddings are just low dimensional mappings that represent high dimensional data. 
-
-So, in our case, our spectrogram for a generated audio sample looks something like: 
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/gen_spectrogram.png" class="img-fluid" %}
-
-Even though audio samples might be complicated waveforms, the embedding for the first letter looks something like:
-
-```
-tensor([[ 3.6887e-01, -9.6675e-01,  3.2892e-01, -1.2369e+00,  1.4908e+00,
-          8.1835e-01, -1.1171e+00, -1.9989e-01,  3.5697e-01, -1.2377e+00,
-          4.6225e-01, -6.7818e-01, -8.2602e-01]])
-```
-
-Again, maybe meaningless to us who haven’t really learned this new language. There are some vertical columns of the same color, and these represent the silences between each word. You might notice that these columns aren’t exactly the same color, and that’s because we’ve added a bit of Gaussian noise to the audio embedding samples to simulate noise that might occur when recording audio samples on a microphone. 
-
-Ok great! We’ve got this perfect language that maps the same sentence to the same audio sample. Now, let’s get to work adding some features that we talked about in the previous section to make this language a bit more complicated.
-
-We narrow our feature selection to the following three:
-
-1. **Silent Letters:** letters in the written language that don’t appear in the phonetic pronunciation
-2. **Letter Combos:** two letters combine in the script but are still pronounced separately
-3. **Letters out of Order:** phonetic pronunciation is in a different order than written language
-
-### Silent Letters
-Silent letters mean they appear in our written labels but not in our audio samples. We could just remove letters from our audio embeddings, but that’s a little funky. We don’t usually pause when we come to a silent letter — saying (pause - nite) instead of just (nite) for night. To preserve this, let’s instead add letters to our written label. 
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/silent_letters.png" class="img-fluid" %}
-
- In the diagram above, we have a small written sample and some audio embeddings represented as colored blocks. We generate some rules similar to those on the left. 
-
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/silent_letters.gif" class="img-fluid"  %}
-
-In this case, we add a 7 after the 3, simulating a silent letter at consonant 7. We then pad the audio sample with a silent (0) to make up for the size increase of the written label. Note that silent letters don’t add pauses during the audio. 
-
-### Combining Letters
-When combining letters, our written script changes, but our audio remains the same. We choose to combine every pair where a vowel follows a consonant. This is the most common case of letter combination in languages that have this feature.
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/combo_letters.gif" class="img-fluid"  %}
-
-Here we have to pad the written labels as we combine two letters into one. 
-
-### Letters out of Order
-We choose some pairs of consonant and vowels. Swap the pair order for every instance of the pair in the written sample. No padding needs to be added here. 
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/swap.gif" class="img-fluid"  %}
-
-
-## Controlled Experiments
-Now for the fun part! Let’s see what happens when we test our new language, which each of these rules in isolation, with some models. Regardless of the model we choose, our goal is to learn a written label for a given audio sample. 
-
-We’re going to test our language with the building blocks of these state of art models — transformers and RNN. The results from these experiments can inform us on the biases that these fundamental models might have in their most “vanilla” state. 
-
-We hypothesize that transformers will perform better because RNN’s have a limited memory size, while Transformers use attention which means they can learn orderings from anywhere in the audio sample.
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/system.png" class="img-fluid"  %}
-
-
-## Results
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/results1.png" class="img-fluid"  %}
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/results2.png" class="img-fluid"  %}
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/results3.png" class="img-fluid"   caption="RNNs are dashed lines, Transformers are solid lines" %}
-
-Hmm..so Transformers performed better, but not that much better than our RNNs. This could be because our hypothesis that attention is better for long sequences and RNNs have limited memory may not apply. When we generated our language, the consonant and vowel orderings were pretty random. Our rules have some pattern to them, but not as much as a real human language — so maybe attention can exploit these better in real human language, but doesn’t give as much of an advantage in our generated dataset. 
-
-As for our features, it seems that silent letters perform significantly worse than some of the other rules. This makes sense because, attention and internal memory perhaps, provides some mechanism for dealing with swapping or out of order. Transformers have the ability to “focus” on features of the sample that it is deemed important. Our rules do have some pattern, and the models just have to learn these patterns. 
-
-With silent letters, though there is a pattern to an audio sample not being present, the rest of the sounds succeeding the silent letters are all shifted over. This is probably why letter combos also doesn’t do too great. With letter combos and silent letters, the one-to-one mapping between a letter and it’s phonetic pronunciation (or audio embedding) is thrown off for the rest of the sequence.
-
-## Corners Cut
-
-This certainly tells us a lot! But, we should take these results with a grain of salt. There are some discrepancies with human language and the way that we generated our dataset that we should consider. 
-
-- Actual audio speech recognition systems mostly don't predict letter by letter, some do subwords and others do word level recognition; but in the grand scheme of things these distinctions may be negligible — after all, they’re all units! This means our controlled experiment, for our purposes, simulates character recognition models which may misspell words (”helouw” instead of “hello”). If the model is at the subword level, misspellings may decrease, since character sequences like “ouw” would not be in the list of possible subwords, or the vocabulary. “ouw” is a very un-English sequence, see if you can find a word that contains these three letters in succession! Misspellings like “hellow” might still happen though, since it is a plausible combination of English-like sequences “hel” and “low”. If the model is at the word level, there will not be misspellings at all.
-
-- speech2text models generally either do encoder-decoder model, or otherwise typically the input and output do not have to match in dimension. Both options mean that there is no need to pad written or audio samples to make sure they’re the same length. In our case, we have to pad our written/audio to make sure everything is the same size. Connectionist Temporal Classification <d-cite key="ctc"></d-cite> is used to postprocess outputs and compute loss.
-    - The way CTC works is that first it assumes that a letter may take more than one audio frame to say, which tends to be the case, especially for vowel sounds which are typically looooooooooonger than consonant sounds. There is also a special character epsilon that serves as the “character boundary” symbol, but is different from the silent symbol. The output of a CTC model is deduplicated, and epsilons are removed. Here is CTC in action from <d-cite key="ctc"></d-cite>:
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/ctc.png" class="img-fluid"%}
-
-- An effect of the letter combination script in our controlled experiment is that there will be some letter combinations that exist as a class (aka in the alphabet) but never seen in the dataset. For example (1, 12) are in the alphabet as consonants, but 112 isn’t a letter.
-
-- Actual language has tone, intonation, speed and noise that can make it harder to learn. Here is where something like Wave2Seq can help as tokens are clustered, so if someone takes a little longer to say AA, it will still register as the same pseudo token. 
-
-## Real Language 
-
-Alas, real-world languages are more complicated than our controlled languages. We wanted to see if the patterns we learnt in our controlled experiments would still hold true for actual datasets. For this, we needed to find a relatively phonemic language and another language that differs only by one feature. As mentioned earlier, Spanish qualifies for the former, and French qualifies for the latter. French, to the best of our knowledge, is prevalent with silent letters, but don’t really exhibit other features in our controlled experiments.
-
-We’re using the CommonVoice dataset, which is a crowdsourced dataset of people reading sentences in many languages, and might be harder to train because of how unclean the dataset as a whole may be. We preprocess the audio using a standard method, which is the following:
-
-- First, calculate the audio spectrogram and condense the result by summing up the amplitudes of a few frequencies that belong in the same “bucket”, to yield Mel-frequency cepstral coefficients (MFCC)
-- To add some temporal context, the differential of the MFCC and its second-degree differential are calculated and concatenated to the MFCC
-- The label vocabulary is constructed, by looking at what letters exist in the dataset, and the written data is converted to numbers
-
-Behold, an example of the preprocessed dataset for Spanish!
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/spanish.png" class="img-fluid"  %}
-
-
-```
-target tensor: [30, 43,  1, 41, 53, 40, 56, 39,  1, 59, 52, 39,  1, 58, 39, 56, 47, 44,
-        39,  1, 42, 43,  1, 43, 52, 58, 56, 39, 42, 39,  7]
-target sequence: Se cobra una tarifa de entrada.
-```
-
-We tried training transformers and RNNs, with and without CTC, on this real-world data. Without CTC, the performances of the models are, respectfully, really bad. After a number of epochs, the only thing learnt is that the space character exists, and the 6% accuracy comes from the model predicting only spaces:
-
-```
-predicted tensor: [16 39  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
-   1  1  1  1]
-predicted sequence: Ea
-target tensor: [71, 28, 59, 83,  1, 53, 57,  1, 54, 39, 56, 43, 41, 43, 11,  1, 36,  1,
-        56, 43, 57, 54, 53, 52, 42, 47, 43, 52, 42, 53,  1, 43, 50, 50, 53, 57,
-         5,  1, 42, 47, 48, 43, 56, 53, 52,  8,  1, 14, 59, 50, 54, 39, 42, 53,
-         1, 43, 57,  1, 42, 43,  1, 51, 59, 43, 56, 58, 43,  7,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]
-target sequence: ¿Qué os parece? Y respondiendo ellos, dijeron: Culpado es de muerte.
-```
-
-Got it. Like our silent letter controlled experiment, a high mismatch between the audio frame and its written frame causes models to not be able to learn well. Let’s put in our mighty CTC Loss and see how it works! It turns out that after some 30 epochs, it still isn’t doing quite so well. Here, let’s see an example of a transformer trained on the Spanish dataset with CTC:
-
-```
-predicted tensor: [ 0 39  0 57  0 54 39  0 41  0 41  0 43  0 47  0 43  0 57  0 53  0 42  0
- 58  0 47  0 53  0 41  0 54  0 39  0 43  0 57  0 43  0]
-predicted sequence: aspacceiesodtiocpaese
-target tensor: [71 28 59 83  1 53 57  1 54 39 56 43 41 43 11  1 36  1 56 43 57 54 53 52
- 42 47 43 52 42 53  1 43 50 53 57  5  1 42 47 48 43 56 53 52  8  1 14 59
- 50 54 39 42 53  1 43 57  1 42 43  1 51 59 43 56 58 43  7]
-target sequence: ¿Qué os parece? Y respondiendo elos, dijeron: Culpado es de muerte.
-```
-
-Perhaps the transformer is too big for this and learns pretty slowly. It is starting to pick up on some sounds, for example for “¿Qué os parece?” it seems to have picked up “as pacce” and “respondiendo” has some similarities to “esodtio,” but we really needed to squint to see that similarity. If we let it run for longer, perhaps it would get better… slowly.
-
-RNNs, however, came up on top. We’re using bidirectional LSTM RNN for this, and it seems that CTC works! Here’s the RNN trained on the Spanish dataset with CTC:
-
-```
-predicted tensor: [30  0 59  0 52  0 53  0 51  0 40  0 56 43  0 57  0 43  0  1 42  0 43  0
- 42  0 47  0 42  0 39  0 42  0 43  0  1  0 89  0 51  0 40  0 43  0 58  0
- 39  0 59  0 52  0 53  0  1 42  0 43  0  1 50  0 39  0  1  0 57  0 54  0
- 39  0 88  0 53  0 52  0 39  0  7]
-predicted sequence: Sunombrese dedidade ómbetauno de la spañona.
-target tensor: [30 59  1 52 53 51 40 56 43  1 57 43  1 42 43 56 47 60 39  1 42 43 50  1
- 52 53 51 40 56 43  1 58 39 86 52 53  1 42 43  1 23 39  1 16 57 54 39 88
- 53 50 39  7]
-target sequence: Su nombre se deriva del nombre taíno de La Española.
-```
-
-Looks great! Of course there are some word boundary mistakes, but overall it looks pretty similar. What about French? Here are transformer and RNN results for what we hypothesized is a language full of silent letter features:
-
-```
-predicted tensor (Transformer): [21  0]
-predicted sequence (Transformer): L
-predicted tensor (RNN): [18  0 47  0  1  0 56  0 56 40  0 54  0 44  0  1 55  0 40  0 55  1 53  0
- 40  0 36  0 48 40  0 49 55  0 44  0 55  0 53 40  0 36  0 49  0  1 49 50
-  0 53  0  1  0  1 44  0 47  0  1 40  0  1 51  0 50  0 55  0 36  0 49  0
-  1 54  0 40  0 47 40  0 48 40  0 49 55  0  1 71  0  1 57  0 36  0 54  0
- 44  0 54  6]
-predicted sequence (RNN): Il uuesi tet reamentitrean nor  il e potan selement à vasis.
-
-target tensor: [18 47  1 36  1 36 56 54 44  1 75 55 75  1 53 75 38 40 48 40 49 55  1 44
- 49 55 53 50 39 56 44 55  1 40 49  1 14 56 53 50 51 40  1 50 82  1 44 47
-  1 40 54 55  1 51 50 55 40 49 55 44 40 47 40 48 40 49 55  1 44 49 57 36
- 54 44 41  6]
-target sequence: Il a ausi été récement introduit en Europe où il est potentielement invasif.
-```
-
-Wow! The transformer got stuck in the blank hole black hole, but the RNN looks not too shabby. Some word boundary issues for sure, but we can see similarities. “potan selement” and “potentielement” actually do sound similar, as do “à vasis” and “invasif.” Definitely not as good as Spanish though. Here’s a comparison of losses for the four models:
-
-{% include figure.html path="assets/img/2023-11-07-Language-Bias/real_results.png" class="img-fluid"  %}
-
-One thing that’s very much worth noticing is that the validation losses plateaued or rose during training. Did we overfit our data, or are these languages too hard that they can’t be fully learnt from our data, and the high loss is due to the idiosyncrasies of language? Probably both!
-
-Now did these real-world explorations match our hypotheses from controlled experiments or not? Our hypothesis from controlled experiments says that French would do worse than Spanish, which is what we’re seeing. Additionally, we see a pretty significant gap in loss between transformers and RNN models, given that CTC loss is used.
-
-Here comes the confusing part. Most literature <d-cite key="transf_thesis"></d-cite><d-cite key="rnn_study"></d-cite> would say that transformers should perform better than RNN, even with CTC. This matches with our controlled experiments but did not match our real-world experiments. What went wrong? For one, we think that our models might still be too small and not representative of actual real-world models. We also trained the models for quite a short amount of time with a small amount of data that might be noisy. Perhaps our recipe was just the perfect storm to cause our transformer model to be stuck in the blank hole. We found an article that documents the tendency for MLPs to get stuck in a stage of predicting blanks before moving on to predicting real characters, which sounds like what’s going on for us. <d-cite key="blank_ctc"></d-cite> Some other sources point to the assertion that input spectrogram lengths must be longer than label lengths, and suggest refraining from padding labels with blanks. We followed their suggestions but unfortunately could not bring the transformer models out of the blank hole.
-
-
-## Learnings
-What have we looked at?
-
-- Linguistics: we learnt how weird languages can be!
-- Models: we touched upon how speech2text models usually work
-- Hindrances: we hypothesized and tested a few features that affected model performance
-    - Silent letters are our biggest enemies, followed by letter combinations and out-of-order letters
-- Battle: we compared how two different foundational models for speech2text against each other
-    - In our controlled experiments, it’s a pretty close call but transformer came up on top by just a slight margin
-- Real: we presented what a real-world dataset looks like, the data preprocessing methods, and checked if our learnings from controlled experiments hold
-    - Creating a spectrogram and a character vocabulary is the standard!
-    - French (silent letter-ish) vs. Spanish (perfect-ish) matches our hypothesis!
-    - CTC is the cherry on top for success but only works well with RNN, putting RNN on top by a long shot this time!
-
-We would like to expand our linguistics experiments further as future work, as there are many more features and combinations not explored here (for example, Arabic writing usually drops all vowels — we imagine that this feature would affect performance a lot!) Another avenue of further work is to try train on other real-world languages to see whether our hypotheses still hold true.
-
diff --git a/_posts/2023-11-07-SmartEmbeddingInitializations.md b/_posts/2023-11-07-SmartEmbeddingInitializations.md
deleted file mode 100644
index d3460448..00000000
--- a/_posts/2023-11-07-SmartEmbeddingInitializations.md
+++ /dev/null
@@ -1,182 +0,0 @@
----
-layout: distill
-title: Injecting Node Information via Embedding Initializations
-
-description: Graph Neural Networks (GNNs) have revolutionized our approach to complex data structures, enabling a deeper understanding of relationships and patterns that traditional neural networks might miss. This project looks into the potential of embedding initializations in GNNs, particularly in the context of molecular function prediction and protein retrieval tasks. By investigating the effect of intentional, information-rich initializations versus random initializations, we aim to enhance the learning efficiency and accuracy of GNNs in these domains. Our study focuses on a precision medicine knowledge graph (PrimeKG) and employs TxGNN, a GNN model initially designed for disease-drug link prediction, repurposed for protein-molecular function link prediction. We explore the impact of using ESM embeddings for protein nodes, hypothesizing that these embeddings could provide structural information not explicitly present in the graph data. Through comparisons of the latent spaces and performances, we look to see the effectiveness of these embeddings in improving the model's predictive powe of protein function. 
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Emma Tysinger
-    url: "https://www.linkedin.com/in/emma-tysinger/"
-    affiliations:
-      name: MIT
-  - name: Sam Costa
-    url: "https://www.linkedin.com/in/samuelcos/"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-07-SmartEmbeddingInitializations.bib 
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Project Outline
-  - name: Related Work & Motivation
-  - name: Data
-  - name: GNN
-  - name: Model Evaluation
-  - name: Latent Space Visualizations
-  - name: Discussion
-
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-## Introduction
-Graph Neural Networks (GNNs) have emerged as a transformative tool in machine learning, with the ability to capture the complex structures and relationships inherent in data. In molecular property prediction, for example, GNNs are great at encoding the atomic structure and intermolecular forces into high-dimensional embeddings, leading to more accurate predictions of chemical properties and drug efficacy. GNNs have also be used in traffic time prediction problems, physics simulations and social media analysis applications. Through message-passing and updating, GNNs are capable of learning embeddings that encode informations of node neighbors and long-distance complex connections – that we, as humans, may not be able to make. The quality of the embeddings is not only important for the accuracy of the task the GNN is trained on, but quality node embeddings can be used through transfer learning – enabling models trained on one task to adapt and excel in another. The importance of good embeddings in GNNs is why we want to look closer at embedding initializations and if we can inject additional information – not present in the graph – to result in better learned embeddings after training.
-
-Possible applications of initial embedding initializations could help in the field of drug discovery. For GNNs used for protein retrieval trained on a biomedical knowledge graph, using ESM embeddings for the proteins could add structure information that is not previously encoded in the graph entities. 
-
-### Project Outline
-We will explore the question can additional node information be injected into the model by using intentional embedding initializations rather than random initializations? Furthermore, are the learned embeddings better representations of the nodes? To answer this question we will follow the steps outlined below:
-
-1. We will download a precision medicine knowledge graph that and use a GNN, TxGNN, that is implemented for disease-drug link prediction on a biomedical knowledge graph as a baseline model. 
-2. We will modify the GNN for protein-molecular function link prediction.
-3. Generate and download ESM embeddings for each protein  
-4. Pretrain and finetune two models – one using random protein node initialization and one using ESM embeddings for protein node initialization. We must pretrain our own models, rather than use the already pretrained model, since we are focusing on how different node initializations impact the predictive power. 
-5. Evaluate both models 
-6. Visualize latent spaces before pretrain, after pretraining and after finetuning
-
-## Related Work & Motivation
-In reviewing the literature, we found several papers which reference the possibility of improved performance through a more informed initialization process. As discussed by Li et al., the initialization methods used for GNNs, such as Xavier random initialization were originally designed for CNNs and FNNs. In that setting, the Xavier approach helped to avoid vanishing gradients and maintain a constant information flux. However, Li et al. point out that by leveraging the structure of the graph, we can likely do better than the random intializations used previously <d-cite key="Li2023"></d-cite>. 
-
-In the paper detailing TxGNN, Huang et al. present promising results on their ability to predict drug repurposing opportunities using a GNN <d-cite key="Huang2023"></d-cite>. However, in their work they considered only the Xavier random initializations for weight matrices and node embeddings. This left open the idea of initializing the graph using more sophisticated methods.
-
-Previous work by Cui et al. has explored the power of artificial node initializations, finding that encoding structural and positional information in the node initializations can have profound effect on the ability of a GNN to accurately predict features based on the graph. They provide a basis for our investigation by showing the effect that initializations can have on the results, if done correctly. We seek to build on this work by testing the effect of injecting related, but not exactly equivalent information through the node initializations <d-cite key="Cui2021"></d-cite>.
-
-Not only did we see an opportunity to try a different initialization method, but this problem also lent itself well to data-informed initializations. The molecules in TxGNN have a wealth of knowledge about them which is not represented in the linkages in the graph, some of which is represented in the ESM embeddings of the molecules. Thus, we thought that by supplying these embeddings to the GNN, we might be able to leverage the additional data to make better predictions. 
-
-## Data
-{% include figure.html path="assets/img/PrimeKG.png" %}
-<div class="caption">
-    Precision Medicine Knowledge Graph. Figure credit: <i>Building a knowledge graph to enable precision medicine</i> (Chandak, Huang, Zitnik 2023).
-</div>
-We used a precision medicine knowledge graph (PrimeKG) constructed by Marinka Zitnik's group at Harvard <d-cite key="Chandak2023"></d-cite>. PrimeKG compiles data from knowledge bases that coverage a broad variety of biomedical information including human disease, drug-protein interactions, genes and proteins with their associated biological processes, functions and cellular component, etc. PrimeKG contains 10 different node types – shown above – and 29 different types of undirected edges. There are over 120,000 nodes in total and over 8 million edges. What PrimeKG lacks, importantly, is any nodes or encodings of structural, molecular or sequenctial information for entity nodes such as proteins and drugs. The node types of interest for our model are proteins, extracted from NCBI, and molecular function Gene Ontology (GO) annotations <d-cite key="Gene_Ontology_Consortium2021-uk"></d-cite>. We will be predicting links between these two node types. 
-
-The other data used were ESM embeddings for proteins in PrimeKG. ESM embeddings, or Evolutionary Scale Modeling embeddings, are high-dimensional vector representations of proteins, derived from advanced machine learning models developed by Meta trained on large datasets of protein sequences. These embeddings capture the intricate structural and functional characteristics of proteins, reflecting evolutionary relationships and biochemical properties that are crucial for various biological and computational applications <d-cite key="Lin2022-esm2"></d-cite>. The reason we were interested in using ESM embeddings, rather than embeddings from other protein foundation models, was that structural information was not already captured in PrimeKG, as previously mentioned. To obtain the ESM embeddings, first we downloaded the amino acid sequence for each protein from NCBI using Entrez. Then, using these sequences as input to Facebook's ESM2 model, we extracted the corresponding embedding.   
-
-## GNN
-The model we used as a baseline is TxGNN, a graph neural network trained on PrimeKG used to make therapeutic drug predictions for diseases <d-cite key="Huang2023"></d-cite>. The GNN has two training phases. First, pretraining where the GNN finds biologically meaningful embeddings for all nodes in the knowledge graph, and therefore the objective is all link prediction. The second phase is to finetune the GNN, using self-supervised learning, to be able to predict drugs for diseases. Therefore, the objective for finetuning is to optimize contraindication and indication link prediction – the two types of links between diseases and drugs. We modified the training code for the finetuning phase, to train and validate on protein-molecular function links instead. 
-
-### Architecture
-The GNN has two linear layers with parameters n_input, n_hidden, and n_output. For all our models n\_input is 1280, restricted by the length of ESM embeddings. We play around with different dimensions for the hidden and output layers. Leaky ReLU activation is used after the first layer. 
-
-### Training
-The first step of the training phase is **node embedding initialization**. The default, which is our random control, is to initialize all nodes using Xavier uniform initialization <d-cite key="pmlr-v9-glorot10a"></d-cite>. Models referred to as *random* from here on out are referring to using Xavier uniform initialization. For our experimental model, we initialized the protein nodes using the ESM embeddings we obtained earlier. All other node types were still initialized with Xavier uniform initialization. Note that we reinitialized nodes between pretraining and finetuning.
-
-During the training phase, the GNN uses a standard message-passing algorithm to update and optimize the node embeddings. There is a relation-type specific weight matrix (for each of the 29 relation types) used to calculate relation-type specific messages. The message for one relation to the some node $i$ is calculated using this equation:
-\begin{equation}
-    m_{r, i}^{(l)} = W_{r, M}^{(l)} h_i^{(l-1)}
-\end{equation}
-
-For each node $v_i$, we aggregate incoming messages from neighboring nodes for each relation-type $r$, denoted as $N_r(i)$. This is done by taking the average of these messages:
-\begin{equation}
-    m_{g_r, i}^{(l)} = \frac{1}{|N_r(i)|} \sum_{j \in N_r(i)} m_{r, j}^{(l)}
-\end{equation}
-
-The new node embedding is then updated by combining the node embedding from the last layer and the aggregated messages from all relations:
-\begin{equation}
-    h_i^{(l)} = h_i^{(l-1)} + \sum_{r \in TR} m_{g_r, i}^{(l)}
-\end{equation}
-
-Finally, DistMult <d-cite key="Yang2014-zb"></d-cite> is used to calculate link prediction between two nodes using their respective embeddings. 
-
-## Model Evaluation
-We fixed all parameters and hyperparameters, and trained two models – one using random initializations and one using ESM embeddings. We pretrained for 3 epochs with a learning rate of $1e-3$ and a batch size of 1024. We then finetuned for 150 epochs with a learning rate of $5e-4$. 
-
-{% include figure.html path="assets/img/Figure2.png" %}
-
-These results are promising and using ESM embeddings to initialize the protein node representations slightly improves the model. The ESM model has a final testing loss of 0.3915, whereas the random model has a final testing loss of 0.4151. However, the difference between the models is slim and may not be significant, especially looking at the similarities in the pretraining, training and validation loss curves. Later, we will look more in depth about how the embedding spaces vary between the 2 models which has the potential to yield more interesting results. 
-
-### Testing varying hidden and output layer dimensions
-We wanted to see the impact changing the hidden and output layer dimensions would have on model performance. We tested 3 models, with parameters detailed in Table 1. All models outside of this experiment, unless otherwise specified, have the same parameters as Model 1.
-
-|         | Input Dimensions | Hidden Layer Dim. | Output Layer Dim. |
-|---------|------------------|-------------------|-------------------|
-| Model 1 | 1280             | 1280              | 1280              |
-| Model 2 | 1280             | 512               | 512               |
-| Model 3 | 1280             | 512               | 128               |
-
-{% include figure.html path="assets/img/Testing_output_dim.png" %}
-
-We can see from the testing loss that when just comparing ESM initialized model, testing loss increases as the output layer decreases. The same trend holds true between random initialized models. We can also see that when comparing ESM and random models for the same layer dimensions, ESM always slightly outperforms the random model. 
-
-
-
-## Latent Space Visualizations
-In the fast-evolving world of deep learning, the analysis of model latent spaces has emerged as an interesting area of study, especially to get a better understanding of how models are achieving their tasks. These spaces are important to understanding how complex models like GNNs perceive and process the intricate relationships and structures inherent in graph data. GNNs can learn powerful representations that capture both node-level and graph-level features. By analyzing the latent spaces of GNNs, we can get insights into how these models  prioritize various patterns and connections within the data. The following analyses visualize the latent spaces our models, clustered and colored in different ways, to get a deeper understanding of how the ESM initialized embeddings are effecting the GNN. 
-
-We first were curious whether, after training our model, the final embeddings retained structural information about the proteins. To do this, we first clustered the original ESM embeddings using K-Means clustering. Next, we visualized the embedding space of the original ESM embeddings, the final embeddings from the ESM model and the final embeddings from the random model using t-distributed Stochastic Neighbor Embedding (t-SNE) for dimensionality reduction. From the t-SNE plot of original ESM embeddings, we can clearly see the clusters from K-Means which serves as a verification of our clustering technique.
-
-{% include figure.html path="assets/img/init_cinit.jpeg" %}
-
-Looking at the embedding space for the ESM and random models, colored by ESM clusters, we note that most of the ESM structural information (used to create the clusters) seems to be forgotten during the training process, as evidenced by the mostly random assortment of colors present in the t-SNE plot. We note that some clusters do remain, for example cluster 12 (light sage green on the right side of the ESM initialized plots) is still clustering in the final embeddings (top middle cluster). However, the most prominent ones appear in both the ESM initialized and random initialized data, meaning that the ESM embedding did encode some function, but the model using random initialized embeddings was able to capture that relation as well. 
-
-{% include figure.html path="assets/img/cluster_init.jpeg" %}
-
-Given that the final embedding space for the ESM model didn't seem to retain much of the information for the ESM embedding initialization, we were curious whether the ESM and random embeddings converged to a similar space. To test this theory, we clustered the final ESM model embeddings and subsequently visualized the final embeddings of the ESM and random models using t-SNE and colored by those clusters. 
-
-If the two models converged to similar embedding spaces, we'd expect to see that clusters found in one embedding space would also be found in the other. This is the case, as seen in the two plots below. Both plots are colored based on a clustering of the final embeddings generated by the ESM initialized network, and they share many of the same structures, indicating that the two networks were able to pick up on mostly the same features in the underlying information. Both models converged to a similar embedding space different initialization methods. 
-
-{% include figure.html path="assets/img/cluster_esm.jpeg" %}
-
-### Testing varying hidden and output layer dimensions
-As mentioned earlier, we tested different dimensions for the hidden and output layers to see whether more and less output dimensions would retain the original ESM embedding information. 
-
-{% include figure.html path="assets/img/dimensions.jpeg" %}
-
-Although there are more distinct clusters on the t-SNE plots as the number of output dimensions increases, these clusters are not the same as the clusters from the original ESM embeddings (seen by the randomly colored dots). Therefore, neither of these 3 models retained the structural information provided by initializing with ESM embeddings. It does not seem that decreasing output and hidden layer dimensions improves the model performance or latent space of our GNN.
-
-### Clustering by molecular function labels
-Because our model's task was to predict links between protein and molecular function nodes, we were curious to see if the final embeddings for the protein nodes would cluster well on the function labels. However, this wasn't as straight forward as having 1 molecular function label for each protein node, because each protein may be linked to multiple molecular functions. One protein may have multiple molecular function Gene Ontology (GO) annotations because the GO database uses a hierarchical system to categorize functions, where broader functions encompass more specific ones. A protein can be involved in several distinct biochemical activities, each represented by its own GO term, reflecting the diverse roles a protein can play in the cell. Instead of a single label, we extracted a molecular function profile, $v_i$, for each protein where $v_i[j] = 1$ if a link exists between protein $i$ and function $j$. We then had a sparse matrix, $V^{i \times j}$. Before clustering, we performed dimensionality reduction using truncated SVD which is optimal for sparse matrices. Finally, we performed K-Means clustering.
-
-{% include figure.html path="assets/img/cluster_func.jpeg" %}
-
-Looking at the t-SNE plots, there is no apparent clustering by molecular function profiles in the final embedding spaces for either the ESM model or the randomly initialized model. There are multiple possible explanations for this. One explanation is that the actual objective is to prediction each singular link between a protein and a function node, not to predict do well at predict all function nodes linked to a protein at once. On top of that our GNN uses self-supervised learning, therefore the molecular function profiles are not true labels used during training. 
-
-The second plausible explanation has to do once again with the hierarchical nature of molecular function GO annotations. Because the molecular function nodes have random indices when stored in PrimeKG, it is not apparent that molecular function that have the same parent function are close to each other, or their parent function in the molecular function profiles. Therefore, when performing truncated SVD and subsequently k-means clustering, the similar functions may not be clustered together if their indices are far apart. Further analysis could be done to reorder the molecular function nodes and then conduct hierarchical clustering, instead than k-means. These possible clusters may then be found in the final latent spaces for the two models. 
-
-## Discussion
-
-In this post, we have modified and fine-tuned a Graph Neural Network, TxGNN originally designed for drug-repurposing prediction, for protein function prediction with a variety of initializations of the node embeddings. We observed that while much of the information in the initialization is forgotten during the training process, a small amount is retained, leading to slightly better performance on the test set in the final network. This provides a potential avenue for further study, investigating the overall effects of informed initialization techniques on GNN performance. Some of this investigation is discussed in Li et al. <d-cite key="Li2023"></d-cite>, where they experiment with weight matrix initializations and propose a new paradigm for determining weight initializaiotns, but there is certainly more investigation to be done. 
-
-
-
-  
-
-
diff --git a/_posts/2023-11-07-prompt-to-prompt.md b/_posts/2023-11-07-prompt-to-prompt.md
deleted file mode 100644
index 7fc2d566..00000000
--- a/_posts/2023-11-07-prompt-to-prompt.md
+++ /dev/null
@@ -1,273 +0,0 @@
----
-layout: distill
-title: Prompt to Prompt
-description: Text-based image editing via cross-attention mechanisms - the research of hyperparameters and novel mechanisms to enhance existing frameworks
-date: 2023-11-07
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Carla Lorente
-    url: "https://www.linkedin.com/in/carla-lorente/"
-    affiliations:
-      name: MIT EECS 2025
-  - name: Linn Bieske
-    url: "https://www.linkedin.com/in/linn-bieske-189b9b138//"
-    affiliations:
-      name: MIT EECS 2025
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-07-prompt-to-prompt.bib   #############CHANGED!!!!!!!!!!!!!!
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Literature Review
-  - name: Outline of our research
-  - name: A. Hyperparameter Study of prompt-to-prompt editing method "word swap"
-  - name: A1. Exploration of silhouette threshold hyperparameter ("k")
-  - name: A2. Exploration of cross-attention injection hyperparameter ("cross replace steps")
-  - name: A3. Exploration of self-attention hyperparameter ("self replace steps")
-  - name: A4. Cycle Consistency of method
-  - name: B. Generalization of optimized hyperparameters to "attention re-weight method"
-  - name: Our proposed method
-  - name: Future work
-  - name: Conclusion
-  - name: References
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-Recently, the techniques to edit images have advanced from methodologies that require the user to edit individual pixels to deep learning-based image editing. The latter employ for example large image generation models (e.g., stable diffusion models). While these deep learning-based image editing techniques initially required the user to mark particular areas that should be edited  (Nichol et al., 2021 <d-cite key="nichol2021glide"></d-cite>; Avrahami et al., 2022a<d-cite key="avrahami2022blendeddiffusion"></d-cite>; Ramesh et al., 2022), recently the work by (Hertz et al, 2022 <d-cite key="hertz2022prompttoprompt"></d-cite>) has shown that this becomes unnecessary. Instead, image editing can be performed using a cross-attention mechanism. In particular, the proposed prompt-to-prompt editing framework enables the controlling of image edits by text only. The section below provides an overview of how this prompt-to-prompt framework works (Figure 1, by (Hertz et al, 2022<d-cite key="hertz2022prompttoprompt"></d-cite>)). 
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks.png" class="img-fluid" %}
-
-*Figure 1: Cross-attention method overview. Top: visual and textual embedding are fused using cross-attention layers that produce attention maps for each textual token. Bottom: we control the spatial layout and geometry of the generated image using the attention maps of a source image. This enables various editing tasks through editing the textual prompt only. When swapping a word in the prompt, we inject the source image maps Mt, overriding the target maps M ∗ t . In the case of adding a refinement phrase, we inject only the maps that correspond to the unchanged part of the prompt. To amplify or attenuate the semantic effect of a word, we re-weight the corresponding attention map. (Hertz et al, 2022 <d-cite key="hertz2022prompttoprompt"></d-cite>).* 
-
-While this proposed framework has significantly advanced the image editing research field, its performance leaves still room for improvement such that open research questions remain. For example, when performing an image editing operation that changes the hair color of a woman, significant variability across the woman’s face can be observed (Figure 2). This is undesirable, as the user would expect to see the same female face across all four images. 
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt.png" class="img-fluid" %}
-
-*Figure 2: Experimentation with the proposed prompt-to-prompt image editing framework presented by (Hertz et al, 2022<d-cite key="hertz2022prompttoprompt"></d-cite>). The faces of the women show significant variability even though they should remain invariant across all four generated/ edited images.*
-
-Within our work, we will start to further benchmark the proposed framework's performance, explore its hyperparameters' impact on the image editing process, and research opportunities to improve the current performance.
-
-## Literature Review
-
-Before delving into the details of the prompt-to-prompt editing method, let's briefly recap some existing techniques to edit images with diffusion models that have paved the way for this revolutionary approach:
-
-### 1. Adding noise to an image and denoising with a prompt ###
-
-In **SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations** <d-cite key="meng2021sdedit"></d-cite> , the user takes an image, introduces noise and then denoises it according to a user-provided prompt. As an example, given an image, users can specify how they want the edited image to look using pixel patches copied from other reference images.
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit.png" class="img-fluid" %}
-
-A similar approach is used in the paper **MagicMix: Semantic Mixing with Diffusion Models** <d-cite key="liew2022magicmix"></d-cite> which uses a pre-trained text-to-image diffusion based generative model to extract and mix two semantics. The figure below showcases the detailed pipeline of MagicMix (image-text mixing). Given an image x<sub>0</sub> of layout semantics, they first craft its corresponding layout noises from step Kmin to K<sub>max</sub>. Starting from K<sub>max</sub>, the conditional generation process progressively mixes the two concepts by denoising given the conditioning content semantics (“coffee machine” in this example). For each step k in [K<sub>min</sub>; K<sub>max</sub>], the generated noise of mixed semantics is interpolated with the layout noise x<sub>k</sub> to preserve more layout details.
-
-<div style="text-align:center;">
-  {% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1.png" class="img-fluid" width="100" %}
-</div>
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2.png" class="img-fluid" %}
-
-### 2. Take an image, add noise and denoise it with a prompt + Add a mask ###
-
-In the paper **Blended Diffusion: Text-Driven Editing of Natural Images** <d-cite key="avrahami2022blended"></d-cite> , given an input of an image and a mask, the blended diffusion modifies the masked area according to a guided text prompt, without affecting the unmasked regions. One limitation of this is that it relies on the user having to produce this mask to indicate the editing region.
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion.png" class="img-fluid" %}
-
-An advanced version of this diffusion mode is discussed in the paper **Text-based inpainting with CLIPSef and Stable Diffusion** <d-cite key="luddecke2022image"></d-cite>. In this paper, the novelty is that the user doesn't have to do the mask manually. Instead, it can use an existing segmentation model (e.g. ClipSef). Another alternative is presented in the paper **DiffEdit: Diffusion-based semantic image editing with mask guidance** <d-cite key="couairon2022diffedit"></d-cite> where the mask is generated directly from the diffusion model.
-
-### 3. Fine-tune (“overfit”) on a single image and then generate with the fine-tuned model ###
-
-In the paper **Imagic: Text-based real image editing with diffusion models** <d-cite key="kawar2023imagic"></d-cite> and **Unitune: Text-driven image editing by fine-tuning a diffusion model on a single image** <d-cite key="valevski2023unitune"></d-cite>, the authors perform extensive fine-tuning on either the entire diffusion model or specific sections of it. This process is computationally and memory-intensive, setting it apart from alternative methods.
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning.png" class="img-fluid" %}
-
-### Prompt-to-prompt
-The prompt-to-prompt editing method is a significant advancement compared with the existing image editing techniques that rely on diffusion models. Unlike the methods explained above that involve adding noise, using masks, or fine-tuning, the prompt-to-prompt method stands out because of its simplicity, flexibility, and user-friendliness. In the former methods, users often face challenges such as manually creating masks or undergoing resource-intensive fine-tuning processes, which can be both time-consuming and technically demanding. In contrast, the prompt-to-prompt editing method streamlines the editing process by allowing users to directly specify their desired edits through language prompts. This approach eliminates the need for intricate masking or extensive model training as well as leverages the power of human language to precisely convey editing intentions. 
-
-Throughout our research, we will adopt the prompt-to-prompt editing method as our starting point, with the aim of enhancing its performance.
-
-## Outline of our research
-
-To perform our research, we plan to build upon the code which complemented the paper published by (Hertz et al, 2022 <d-cite key="hertz2022prompttoprompt"></d-cite>, [Link to code]( https://github.com/google/prompt-to-prompt/)). Concretely, we will rely on a stable diffusion model from hugging face which we will access via Python. No model training is required as we will solely work with attention layers that capture spatial information about the images.
-
-Our study will be divided into 3 main subsections:
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline.png" class="img-fluid" %}
-
-
-## A. Hyperparameter Study of prompt-to-prompt editing method "word swap"
-
-In the forthcoming subsection, we delve into a comprehensive analysis of the hyperparameters pertaining to the "word swap" method within the prompt-to-prompt editing framework. Before delving into the specifics, it's crucial to understand the significance of these hyperparameters and their default values, as originally outlined in the seminal work by Hertz et al<d-cite key="hertz2022prompttoprompt"></d-cite>. 
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A.png" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing.png" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation.png" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation.png" class="img-fluid" %}
-
-We will systematically explore various hypotheses regarding each hyperparameter and present our empirical findings, shedding light on their individual impacts on the editing process. This examination aims to provide valuable insights into optimizing the performance of the "word swap" method and enhancing its practical utility.
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings.png" class="img-fluid" %}
-
-
-## A1. Exploration of silhouette threshold hyperparameter ("k")
-
-In this section, we embark on an exploration of the silhouette threshold hyperparameter ("k"). We aim to unravel the influence of varying this parameter while using the prompt '_"A woman's face with blond hair"_' and making alterations to different hair colors (brown, red, black). The GIF below showcases the representation of these experiments.
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/change_threshold_womens_face2.gif" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces.png" class="img-fluid" %}
-
-Additionally, we present a comparative analysis of the impact of this hyperparameter on editing tasks related to landscapes. For instance, we employ the prompt '_"A river between mountains"_' and manipulate the landscape, including options like streets, forests, and deserts. The results of this landscape-oriented analysis can be seen in the figure below.
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/attention_replace_rivers.gif" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape.png" class="img-fluid" %}
-
-
-
-## A2. Exploration of cross-attention injection hyperparameter ("cross replace steps")
-
-Below we showcase the effect of the silhouette threshold hyperparameter ("k") and the cross-attention injection hyperparameter("cross_replace_steps"). We manipulate the "k" value, setting it to 3 different levels: 0, 0.3 (default literature value), and 0.6. The experiment was performed for both women's faces and landscapes, providing a comprehensive understanding of how these hyperparameters affect the editing process. The following GIFs showcase the results of our exploration.
-
-### With k = 0:
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_women.gif" class="img-fluid" %}
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_river.gif" class="img-fluid" %}
-
-### With k = 0.3:
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_women.gif" class="img-fluid" %}
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_river.gif" class="img-fluid" %}
-
-### With k = 0.6:
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_women.gif" class="img-fluid" %}
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_river.gif" class="img-fluid" %}
-
-Below, we present the key insights found for the prompt _"A woman's face with blond hair"_.
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces.png" class="img-fluid" %}
-
-Below, we present the key insights found for the prompt _"A river between mountains"_.
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape.png" class="img-fluid" %}
-
-
-## A3. Exploration of self-attention hyperparameter ("self replace steps")
-
-In our investigation of the self-attention hyperparameter known as "self_replace_steps," we conducted a series of experiments with careful consideration of the interplay between this parameter and two other critical factors: "k" (the silhouette threshold) and "cross_replace_steps" (the cross-attention injection parameter). To comprehensively assess the influence of "self_replace_steps," we designed two distinct experimental scenarios.
-
-In the first scenario, we set "k" and "cross_replace_steps" to their default values in the literature review (0.3 and 0.8 respectively), creating an environment conducive to exploring the effects of self-attention within these threshold parameters. Concurrently, in the second scenario, we opted for more extreme settings by keeping "k" at 0 (no silhouette threshold) and "cross_replace_steps" at 0.2, thereby intensifying the impact of the self-attention hyperparameter.
-
-### With k = 0.3 and cross_replace_steps = 0.8:
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_women.gif" class="img-fluid" %}
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_river.gif" class="img-fluid" %}
-
-### With k = 0 and cross_replace_steps = 0.2:
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_women.gif" class="img-fluid" %}
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_river.gif" class="img-fluid" %}
-
-Below, we present the key insights for the hyperparameter "self_replace_steps" within the context of the prompt _"A woman's face with blond hair"_.
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces.png" class="img-fluid" %}
-
-Below, we present the key insights for the hyperparameter "self_replace_steps" found for the prompt _"A river between mountains"_.
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape.png" class="img-fluid" %}
-
-## A4. Cycle Consistency of method
-
-Our primary goal is to delve into the notion of "Cycle Consistency" within our methodology. This concept revolves around the seamless reversal of text prompt modifications back to their original form, ensuring that the resulting image closely mirrors the initial prompt. This bidirectional editing process serves as the central focus of our research, and in the subsequent sections, we present our findings on this crucial aspect.
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency.png" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact.png" class="img-fluid" %}
-
-## B. Generalization of optimized hyperparameters to "attention re-weight method"
-
-After identifying the optimal parameters, we conducted a comparative analysis to assess their generalizability across other methods, including attention re-weighting. In the visual presentation, we used GIFs to showcase image generation under two different parameter configurations for the prompt _"A woman's face with long wavy blond hair"_.
-
-On the left side, images were generated using default values (k=0.3; cross_replace_steps = 0.8; self_replace_steps = 0.2) while varying the assigned weights. Notably, negative weights led to instability and less desirable outcomes, as evidenced by the results on the left.
-
-On the right side, we employed our optimized hyperparameter values (k = 0; cross_replace_steps = 0.2; self_replace_steps = 0.8). These images demonstrated improved stability while consistently producing the desired output. This visual comparison highlights the effectiveness of our optimized parameters and their superior performance, particularly when dealing with attention re-weighting method.
-
-<div style="display: flex;">
-  <div style="flex: 1; padding: 10px;">
-    Literature suggested parameters
-    {% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly.gif" class="img-fluid" width="200" %}
-  </div>
-  <div style="flex: 1; padding: 10px;">
-    Newly optimized parameters
-    {% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly_improved_self_replace.gif" class="img-fluid" width="50" %}
-  </div>
-</div>
-
-
-## Our Proposed Method
-
-As our research has demonstrated, the current prompt-to-prompt method, as reported in the literature <d-cite key="hertz2022prompttoprompt"></d-cite>, exhibits significant limitations. Specifically, with the current settings for the silhouette, cross-attention injection, and self-attention injection parameters, the method fails to perform the prompted edits with precision. A comparative analysis of the generated target images against the geometry of the reference images reveals undesired deviations. The existing method over-constrains the geometry due to excessively high k values and cross-attention injection values. Additionally, it underutilizes self-attention injection. Furthermore, the current method lacks cycle consistency.
-To address these shortcomings, we propose a new framework: the _“CL P2P”_ prompt-to-prompt image editing framework. This framework offers several key improvements over the existing method:
-
-**Optimization of Critical Hyperparameters**: Our research indicates that optimizing the values of critical hyperparameters results in higher prompt-to-prompt image editing precision and a more accurate similarity between the reference and target images for desired features. We propose the following adjusted values, particularly for editing faces and hairstyles:
-* Local editing (silhouette parameter k): 0.0
-* Cross-attention injection (cross replace steps): 0.2
-* Self-attention injections (self-replace steps): 0.8
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output.png" class="img-fluid" %}
-
-By selecting these values, the following changes are introduced to the prompt-to-prompt editing method:
-* <span style="color:red">Remove</span>: Local editing can be removed from the method, as it did not lead to significant improvements compared to the precision achieved by the elongated injection of self-attention.
-* <span style="color:orange">Reduce</span>: The cross-attention (query-key-value attention) injection should be reduced to allow greater geometric adaptability and better convergence between the reference and target images.
-* <span style="color:green">Increase</span>: Self-attention injection should be substantially elongated from 20% to 80% of the diffusion steps. This is crucial, especially for editing hairstyles, as it allows for the greatest geometric adaptability and ensures the convergence between desired reference and target image features.
-
-{% include figure.html path="assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method.png" class="img-fluid" %}
-
-**Addressing Cycle-Inconsistency**: To remedy the cycle-inconsistency, we propose balancing the asymmetry of the current method with regards to the V values of the underlying transformer model. The current method is cycle-inconsistent, even though the same embeddings are used for both the reference and target prompts. Traditionally, the method has only employed the V values of the reference prompt, neglecting those of the target prompt. This characteristic likely introduces asymmetry, breaking the cycle-consistency of the model. We propose an additional injection mechanism for the “CL P2P” framework, a V value injection method, allowing for the consideration of both the V values of the reference and target images. To control the number of injection steps, we introduce an additional hyperparameter, “V value injection steps”. The V value injection function is defined based on the logic highlighted in the footnote of the image.
-
-## Future work
-
-The development of the “CL P2P” framework is a significant advancement in prompt-to-prompt image editing methods. However, there are still areas where further research will be needed. A critical area of exploration lies in the enhancement of cycle-consistency within the prompt-to-prompt editing process. Further research is required to ascertain and refine the optimal values for the V value injection steps, a key component in achieving cycle-consistency.
-
-Additionally, the existing frameworks predominantly focus on singular reference and target prompts. While this approach has opened new pathways in human-computer interaction, several research questions remain unexplored. A notable inquiry is the potential to integrate various prompt-to-prompt editing methods, such as "word swap", "attention re-weighting," and "prompt refinement." This integration aims to facilitate a dynamic, conversational interaction between users and generated images, enabling a continuous and iterative editing process. Current state-of-the-art generative image models, such as mid-journey models, do not inherently support such iterative mechanisms. The realization of this functionality necessitates extensive research and development, offering an exciting challenge for future advancements in the field.
-
-## Conclusion
-
-Image generation models, inherently stochastic in nature, exhibit variability in outcomes even when similar prompts are applied. This stochasticity can result in significant deviations in the generated images. For instance, prompts like “A woman’s face with blond hair” and “A woman’s face with red hair” may yield images with markedly different facial features, demonstrating the algorithm's underlying randomness.
-
-In response to this challenge, prompt-to-prompt image generation and editing techniques have emerged as a significant area of interest in recent years. These methods, while constituting a potent tool in the arsenal of image editing alongside fine-tuning, semantic mixing, and masking approaches, are not without limitations. Specifically, the precision of edits and the geometric alignment between reference and target images often fall short of expectations.
-
-Our research delves into the influence of critical hyperparameters on the outcomes of a cross-attention-based prompt-to-prompt method. We aimed to dissect the impact of each hyperparameter on image editing and geometric adaptation between the reference and target images. Our findings make substantive contributions to enhancing the precision and geometric convergence in prompt-to-prompt methods, with the following key insights:
-* An extensive analysis of three critical hyperparameters (silhouette selection, cross-attention injection, and self-attention injection) was conducted, focusing on their effect on the precision of an attention-based prompt-to-prompt editing method.
-* Contrary to existing literature<d-cite key="hertz2022prompttoprompt"></d-cite>, our study reveals that self-attention injection plays a more pivotal role than previously recognized. We recommend incorporating self-attention injection from the reference image for approximately 80% of the diffusion steps during the target image generation process.
-* We introduce the novel _“CL P2P”_ framework, designed to elevate the efficacy of prompt-to-prompt editing.
-
-Our research not only deepens the understanding of prompt-to-prompt editing methods but also achieves enhanced editing precision and improved similarity between reference and target images.
-
-Looking ahead, the _“CL P2P”_ framework paves the way for further exploration, particularly in addressing the cycle consistency of prompt-to-prompt methods. Additionally, exploring strategies to seamlessly integrate different prompts into a continuous dialogue could revolutionize human-computer interaction, enabling users to edit generated images through conversational engagement.
-
-
diff --git a/_posts/2023-11-08-A-deeper-look-into-equivariance-for-materials-data.md b/_posts/2023-11-08-A-deeper-look-into-equivariance-for-materials-data.md
deleted file mode 100644
index f90b8a95..00000000
--- a/_posts/2023-11-08-A-deeper-look-into-equivariance-for-materials-data.md
+++ /dev/null
@@ -1,296 +0,0 @@
----
-layout: distill
-title: A Deeper Look into Equivariance for Materials Data
-description: A Comparative Analysis of an E(3) Equivariant GNN and a Non-Equivariant GNN in Materials Data Tasks with a Focus on Investigating the Interpretability of Latent Geometry within the Two GNNs.
-date: 2023-11-08
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Nofit Segal
-    affiliations:
-      name: MIT - CSE & DMSE
-
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-A-deeper-look-into-equivariance-for-materials-data.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Data
-  - name: Method
-  - name: Results
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-Materials embody a diverse array of chemical and physical properties, intricately shaping their suitability for various applications. The representation of materials as graphs, where atoms serve as nodes and chemical bonds as edges, facilitates a systematic analysis. Graph Neural Networks (GNNs) have emerged as promising tools for deciphering relationships and patterns within materials data. The utilization of GNNs holds the potential to develop computational tools that deepen our understanding and aid in designing structure-property relationships in atomic systems.
-
-In recent years, there has been a heightened focus on employing machine learning for the accelerated discovery of molecules and materials with desired properties [[Min and Cho, 2020](#min2020accelerated); [Pyzer-Knapp et al, 2022](@pyzer2022accelerating); [Merchant et al, 2023](@merchant2023scaling)]. Notably, these methods are exclusively applied to stable systems in physical equilibrium, where such systems correspond to local minima of the potential energy surface $E(r_1, . . . , r_n)$, with $r_i$ representing the position of atom $i$ [[Schüttet al, 2018](@schutt2018schnet)].
-
-The diverse arrangements of atoms in the system result in varying potential energy values, influencing chemical stability. In the GIF below, different trajectories can be seen of the molecule Ethane. The Ethane molecule spends 99% of its time in a specific conformation, in which the substituents are at the maximum distance from each other. This conformation is called the staggered conformation. Looking at the molecule from a position on the C-C (main) axis (as in the second half of the animation), The staggered conformation is reached when the H atoms of the front C atom are exactly between the H atoms of the other C atom. This animation also show the 3-fold symmetry of the molecule around the main axis. All three staggered conformations will have the same energy value, as they are completely equivalent. The intermediate conformations will result in a higher energy value, as they are energetically less favorable. Different conformations can also portray elongations of some bonds lengths and variations in angles value. Predicting stable arrangements of atomic systems is in itself an important challenge! 
-<p align="center">
-  <img width="500" src="https://upload.wikimedia.org/wikipedia/commons/7/76/Ethane_conformation.gif" alt="Conformations of Ethane (C2H6) molecule" />
-</p>
-
-<div class="caption">
-    Conformations of the molecule Ethane (C2H6)  
-</div>
-
-
-In the three-dimensional Euclidean space, materials and physical systems in general, inherently exhibit rotation, translation, and inversion symmetries. These operations form the E(3) symmetry group, a group of transformations that preserve the Euclidean distance between any two points in 3D space. When adopting a graph-based approach, a generic GNN may be sensitive to these operations, but an E(3) equivariant GNN excels in handling such complexities. Its inherent capability to grasp rotations, translations, and inversions allows for a more nuanced understanding, enabling the capture of underlying physical symmetries within the material structures [[Batzner et al, 2022](@batzner20223)].
-
-
-
-## Data
-
-The MD 17 dataset, an extensive repository of ab-initio molecular dynamics trajectories [[Chmiela et al, 2019](@chmiela2019sgdml)], was employed in this study. 
-
-Each trajectory within the dataset includes Cartesian positions of atoms (in Angstrom), their atomic numbers, along with total energy (in kcal/mol) and forces (kcal/mol/Angstrom) acting on each atom. The latter two parameters serve as regression targets in analyses.
-
-Our focus narrowed down to the molecules Aspirin, Ethanol, and Toluene:
-
-<div class="row mt-3">
-  {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin.jpg" caption="Aspirin (C9H8O4)" class="img-fluid rounded z-depth-1 mb-3" style="width: 100px; height: 150px;" %}
-  {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol.jpg" caption="Ethanol (C2H5OH)" class="img-fluid rounded z-depth-1 mb-3" style="width: 100px; height: 150px;" %}
-  {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene.jpg" caption="Toluene (C6H5CH3)" class="img-fluid rounded z-depth-1 mb-3" style="width: 100px; height: 150px;" %}
-</div>
-
-The distributions of energy values (kcal/mol) for various conformations of the three molecules, within the training and validation sets, are illustrated in the histograms below.
-
-
-<div class="row mt-3">
-  <!-- Row 1 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="row mt-3">
-  <!-- Row 2 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="row mt-3">
-  <!-- Row 3 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="caption">
-    Energy (kcal/mol) distributions for Aspirin (C9H8O4), Ethanol (C2H5OH) and Toluene (C6H5CH3) molecules in train and validations sets
-</div>
-
-The training set for Aspirin comprises 1000 conformations, while its validation set consists of 500 conformations. Ethanol's training and validation sets each consist of 1000 conformations. Toluene's training set comprises 1000 conformations, and its validation set consists of 500 conformations.
-
-## Method
-
-In this project, our objective is to conduct a comparative analysis of two Graph Neural Network (GNN) architectures: an E(3) equivariant network and a non-equivariant (specifically E(3) Invariant) one. The primary focus is on energy prediction tasks related to atomic systems, with a particular emphasis on exploring the distinctions within the latent representations of these architectures and their interpretability.
-
-All GNNs are permutation invariant by design [[Keriven and Peyr, 2019](@DBLP:journals/corr/abs-1905-04943)]. Our baseline GNN for comparison achieves rotation and translation invariance by simply operating only on interatomic distances instead of absolute position of the atoms. This design choice ensures that both the output and internal features of the network remain invariant to rotations. In contrast, our equivariant GNN for comparison utilizes relative position vectors rather than distances (scalars) together with features comprised of not only scalars, but also higher-order geometric tensors.
-
-In our Invariant GNN, the node-wise formulation of the message passing is given by:
-
-<center>
-$$\mathbf{x}^{\prime}_i = \mathbf{\Theta}^{\top} \sum_{j \in \mathcal{N}(i) \cup \{ i \}} \frac{e_{j,i}}{\sqrt{\hat{d}_j
-\hat{d}_i}} \mathbf{x}_j$$</center>
-
-Where $ x_i, x_j $ are the feature vectors of the target and source nodes, respectively, defined as a one-hot representation of the atomic number of that node. The summation is performed over the neighborhood $\mathcal{N}(i)$ of atom $i$, defined by a radial cutoff around each node, a tunable parameter typically set around 4-5 angstroms. Meaning, the concept of neighborhood is based on the distance between nodes, not their connectivity. Additionally, $ d_i = 1 + \sum_{j \in \mathcal{N}(i)} e_{j,i} $ where $ e_{j,i} $ represents the edge weight from the source node $j$ to the target node $i$ , and is defined as the interatomic distance.
-
-For constructing our equivariant GNN, [E3nn](https://e3nn.org/) was employed - a torch-based library designed for building o(3) equivariant networks. Following the method presented in [[Batzner et al, 2022](@batzner20223)], a neural network that exhibits invariance to translation and equivariance to rotation and inversion was constructed. Two key aspects of E3nn facilitating the construction of O(3) equivariant neural networks are the use of irreducible representations (Irreps) for data structuring and encapsulating geometrical information in Spherical Harmonics. Irreps are data structures that describe how the data behaves under rotation. We can think of them as data types, in the sense that this structure includes the values of the data alongside instructions for interpretation. The Spherical Harmonics form an orthonormal basis set of functions that operate on a sphere, and they’re equivariant with respect to rotations, which makes them very useful (and popular!) in expanding expressions in physical settings with spherical symmetry. 
-
-For the equivariant GNN, the node-wise formulation of the message is:
-<center>
-$$f'_i = \frac{1}{\sqrt{z}} \sum_{j \in \partial(i)} \; f_j \; \otimes\!(h(\|x_{ij}\|)) \; Y(x_{ij} / \|x_{ij}\|) $$
-</center>
-
-where $ f_j, f_i $ are the target and source nodes feature vectors, defined similarly as a one-hot representation of the atomic number. $z$ is the average degree (number of neighhbors) of the nodes, and the neighborhood $\partial(i)$ is once again defined using a radial cutoff. $x_{ij}$ is the relative distance vector, $h$ is a multi layer perceptron and $Y$ is the spherical harmonics. The expression $x \; \otimes\(w) \; y$ denotes a tensor product of $x$ with $y$ using weights $w$. This signifies that the message passing formula involves a convolution over nodes' feature vectors with filters constrained to be a multiplication of a learned radial function and the spherical harmonics.
-
-
-## Results
-
-The performance of the two GNNs was compared for the task of predicting the total energy of the molecule’s conformation - a scalar property. By constraining the Equivariant GNN to predict a scalar output, it becomes overall invariant to the E(3) group. However, the use of higher order geometric tensors in the intermediate representations and operations in the E-GNN, makes internal features equivariant to rotation and inversion. This enables the passage of angular information through the network using rotationally equivariant filters (spherical harmonics) in the node feature convolution. This is the essential difference between the two architectures.
-
-The learning curves of the two GNNs for each molecule data are presented in the figures below: 
-
-<div class="row mt-3">
-  <!-- Row 1 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="row mt-3">
-  <!-- Row 2 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="row mt-3">
-  <!-- Row 3 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="caption">
-    Train (left) and Validation (right) learning curves of Energy (kcal/mol) prediction of Aspirin (top), Ethanol (middle) and Toluene (bottom) conformations
-</div>
-
-
-The models were trained for 50 epochs using mean absolute error (MAE) objective for predicting normalized energy (in kcal/mol units). Adam optimizer with a learning rate of 0.01 and learning rate scheduler were employed. The E-GNN achieves a superior MAE rate for all three molecules.
-
-Next, let's examine the latent representation of the two models! The last layer values of the validation data of both models were projected using t-SNE to a 2D representation and color-coded according to the target energy values: 
-
-<div class="row mt-3">
-  <!-- Row 1 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="row mt-3">
-  <!-- Row 2 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="row mt-3">
-  <!-- Row 3 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="caption">
-    Latents projections of E-GNN (left) and GNN (right) last layer, of Aspirin (top), Ethanol (middle) and Toluene (bottom) conformations
-</div>
-
-A color gradient can be seen in all three projections of the Equivariant GNN; and it is the clearest for Ethanol. The Invariant GNN’s latent projections do not exhibit  a similar structure, perhaps except for Ethanol’s conformations. Moreover, in Ethanol’s case, the GNN projection appears to be quite one-dimensional.
-
-The apparent color gradient according to the target values in the E-GNN latent space is impressive, suggesting that the model leverages this information when embedding data conformations for predictions. Multiple "locations" in the latent space denote various high-energy conformations, indicating that the model considers not only the target energy value but also structural differences.
-
-To assess whether there's molecular structural ordering in the embeddings, we construct system-specific variables for each molecule and visualize the latent space accordingly. Ethanol, with its relatively simple structure, showcases three important variables: the distance between the two Carbons (C-C bond), the distance between Carbon and Oxygen (C-O bond), and the angle formed by the three atoms. The distributions of these variables in Ethanol's train and validation sets are depicted in the figure below: 
-
-<div class="row mt-3">
-  <!-- Row 1 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="row mt-3">
-  <!-- Row 2 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="row mt-3">
-  <!-- Row 3 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-<div class="caption">
-    Distributions in train (left) and validation (right) sets of Ethanol, of C-C bond length (top), C-O bond length (middle) and main angle (bottom) 
-</div>
-
-The distributions appear very similar for each variable in the train and validation sets. Now, let's examine Ethanol's validation conformations latent projection, color-coded with respect to the target and the three system-specific variables:  
-
-<div class="row mt-3">
-  <!-- Row 1 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-<div class="row mt-3">
-  <!-- Row 2 -->
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-  <div class="col-md-6">
-    {% include figure.html path="assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2.png" class="img-fluid rounded z-depth-1" width="100%" %}
-  </div>
-</div>
-
-A clear gradient is observed for the main angle and C-C bond! The target gradient appears from the top left corner to the bottom right; the C-C bond gradient seems to go from bottom left to top right, and the main angle gradient isn’t as linear, appearing to spiral from the bottom to the top right corner clockwise. The C-O bond projection doesn’t seem to follow a discernible gradient, suggesting it's not as influential on the target as the other two variables.
-
-Cool huh? The Equivariant GNN appears to embed the data according to the target value but also according to the systems geometrical structure! This suggests that the model leverages its E(3) equivariant convolution layers to capture and encode information about both the target values and the intricate geometric features of the molecular systems.
-
-## Conclusion
-
-In conclusion, our exploration has demonstrated the efficiency of the E(3) equivariant GNN, compared to an invariant GNN, in predicting the total energy of molecular conformations. Though both models were compared on predicting energy, a scalar propery, the E-GNN's ability to leverage the inherent symmetries present in the system allowed it to effectively capture and encode the relationship between the arrangement of molecules and their respective energy. This was illustrated through the latent representation visualizations, and was particularly evident in the case of Ethanol. Here, discernible gradients in the latent space were observed, correlating with the target energy value and variations in C-C bond length and main angle. However, interpretability varies among the latent projections for the more complex molecules investigated in this project. Potential improvements could be achieved with additional data and a more expressive equivariant network.
-
-
diff --git a/_posts/2023-11-08-contrastive-time.md b/_posts/2023-11-08-contrastive-time.md
deleted file mode 100644
index 62c11998..00000000
--- a/_posts/2023-11-08-contrastive-time.md
+++ /dev/null
@@ -1,401 +0,0 @@
----
-layout: distill
-title: Contrastive Representation Learning for Dynamical Systems
-description: A deep learning method of learning system underlying parameters from observed trajectories
-date: 2022-11-07
-htmlwidgets: true
-
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Martin Ma
-    url: "https://www.linkedin.com/in/martinzwm/"
-    affiliations:
-      name: Harvard University
-  - name: Lily Wang
-    url: "https://www.linkedin.com/in/xiaochen-lily-wang-175897183/"
-    affiliations:
-      name: Harvard University
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-contrastive-time.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Methodology
-  - name: Experiments
-  - name: Another Framework - Generative Modeling
-  - name: Experiments - Generative Modeling
-  - name: Conclusion and Future Works
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-## Introduction
-
-### Dynamical System
-
-Dynamical systems form the foundation for understanding intricate phenomena in both scientific research and engineering applications. These systems are defined by their **state** (denoted as $X$) at any given time and a set of **equations** (e.g., $v = \frac{dX}{dt} = f_t(X, \theta)$) that describe the evolution of these states over time ($t$), all driven by **underlying parameters** $\theta$. Some real-world examples of dynamical systems include:
-
-- **Climate Systems**: Involves states like temperature, pressure, and wind velocity, with parameters such as solar radiation and greenhouse gas concentrations.
-- **Population Dynamics in Ecology**: Features states like population sizes, with parameters including birth and death rates, and interaction rates between species.
-- **Economic Models**: Focus on states like stock prices and trading volume, influenced by parameters like interest rates and market sentiment.
-- **Control Systems in Engineering**: Encompasses states like the position and velocity in robotics or the aircraft's orientation in flight dynamics, governed by parameters like physical properties and control gains.
-
-The evolution of the system's state over time can be observed as a time series, where system underlying parameters ($\theta$) governs the system's behavior. In our project, we would like to determine if it would be feasible to discover the underlying system parameters given the observed trajectory. It would lay the groundwork for both robust predictive modeling and model interpretability analysis for safety-critical systems, such as clinical application and chemical engineering plants.
-
-
-### Spring-Mass System
-
-Consider a spring-mass system, a fundamental model in dynamics. In a system comprising two masses, the states include positions $x$ and velocities  $v = \frac{dx}{dt}$, which can be derived from the positions. Crucially, it is the underlying parameters, masses $m_1$, $m_2$ and spring constants $k_1$, $k_2$, that dictate the trajectories of  $x$.
-
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/spring_mass_system.png" class="img-fluid" %}
-<div class="caption">
-  Figure 1: A spring-mass system, classical dynamical system to illustrate project idea
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/spring_mass_system_traj.png" class="img-fluid" %}
-<div class="caption">
-  Figure 2: Spring mass system trajectory at ($k_1$, $k_2$, $m_1$, $m_2$) = (1.0, 2.0, 1.0, 2.0)
-</div>
-
-
-
-Different system parameters, such as mass or spring constant, result in different oscillatory and long-term behavior behaviors of the system. Below is a gif demonstrating the effect of changing parameters on the system's trajectory; this visualization illustrates how different underlying parameter values lead to distinct dynamical behaviors. 
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m1.gif" class="img-fluid" %}
-<div class="caption">
-  Figure 3: system dynamic at varying $m_1$, while keeping $k_1$, $k_2$ and $m_2$ constant.
-</div>
-
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m2.gif" class="img-fluid" %}
-<div class="caption">
-  Figure 4: system dynamics at varying $m_2$, while keeping $k_1$, $k_2$, and $m_1$ constant.
-</div>
-
-
-
-### Dataset Collection / Generation
-
-We create a simulator for the above dynamical system to generate data based on parameters like masses $m$ and spring constants $k$. The parameters are systematically varied to generate a diverse and challenging dataset. More concretely, the dataset can be represented by a tensor of shape $(N_{param}, N_{traj}, T, d)$, where:
-
-- $N_{param}$ is the number of parameter sets. Each set of parameters would lead to different system dynamics and trajectories.
-- $N_{traj}$ is the number of trajectories generated for each parameter set. Within the same set of parameters, different initial conditions and noise level would lead to different trajectories.
-- $T$ is the number of steps in a trajectory. $T$ is dependent on 2 factors - time span in the simulation, and the time step (i.e., $dt$). Note that our system/model formulation allows $T$ to be different for different trajectories, offering more flexibility.
-- $d$ is the number of states. In the above example, $d = 4$, representing $(x_1, x_2, v_1, v_2)$.
-
-### Related Works
-
-Time-series data analysis is a crucial component in a wide array of scientific and industrial domains, ranging from dynamical systems and weather forecasting to stock market prediction. These applications often involve underlying parameters that are complex and not immediately observable from the data. Traditional time-series methodologies primarily emphasize prediction, which can result in models that operate as "black-boxes" with limited interpretability <d-cite key="Lim_2021"></d-cite>. 
-
-To address this limitation, the representation learning landscape in time-series analysis has expanded recent years, with a focus on unsupervised and semi-supervised methods. Fortuin et al. <d-cite key="fortuin2020gp"></d-cite> developed GP-VAEs, combining Gaussian processes with VAEs to handle noisy and irregularly sampled time-series data. This model addresses variabilities in time-series data, offering robust feature extraction. Franceschi et al. <d-cite key="Franceschi2019UnsupervisedCL"></d-cite> explored self-supervised learning frameworks for time-series, demonstrating how leveraging temporal coherence can improve feature representation without reliance on labeled data.
-
-Building on these advancements, recent studies like those by Eldele et al. <d-cite key="eldele2021timeseries"></d-cite> have explored the application of contrastive learning for enhancing time-series representation. Their approach creates representations by contrasting segments of time-series, thereby learning features that distinguish different temporal patterns. This method has shown promise in enhancing classification and forecasting tasks.
-
-However, there remains an unexplored potential in utilizing contrastive learning for learning the underlying parameters governing these systems. In this project, we aim to bridge this gap by applying the principles of contrastive learning to the specific challenge of identifying and understanding these hidden parameters within dynamical systems. By leveraging contrastive learning, we aim to move beyond mere prediction and delve into a deeper understanding of these parameters, thus enhancing the interpretability of time-series models, particularly applicable in safety-critical systems.
-
-## Methodology
-
-### Contrastive Learning
-
-Contrastive learning is a self-supervised learning technique prevalent in fields such as computer vision (CV) and natural language processing (NLP). At its core, it involves minimizing the embedding similarity between similar objects (i.e., positive pairs) while distancing dissimilar ones (i.e., negative pairs). 
-
-In the context of dynamical systems, where the model does not have direct access to parameter values, contrastive learning is an effective method to infer underlying system parameters. In our case of spring-mass system, a positive pair consists of two trajectories generated using the same set of parameters, whereas a negative pair is two trajectories generated using different set of parameters. 
-
-We utilize the following InfoNCE (Information Noise-Contrastive Estimation) loss for training:
-
-$$L_{\text{InfoNCE}} = -\log \frac{e^{f(x)^Tf(x^+)/\tau}}{\sum_{i=0}^{N} e^{f(x)^Tf(x^-_i)/\tau}}$$
-
-- $f(x)$ is the generated trajectory embedding.
-- $\tau$  is a (fixed) temperature hyperparameter, which we set to default 1.
-- ($x$, $x^+$) forms the positive pair (i.e., two trajectories with the same underlying parameters but different initial conditions).
-- ($x$, $x_j^-$) form negative pairs (i.e. two trajectories from different underlying parameter sets).
-
-### Model
-
-Trajectories in dynamical systems can be represented by a time-series dataset, which is a type of sequential data. Long Short-Term Memory networks (LSTMs), a variant of Recurrent Neural Networks (RNNs), can be used process sequential data and manage long-term dependencies. A key feature of LSTMs is their use of gates, which regulate the flow of information, allowing the network to maintain pertinent information over extended periods — key characteristics for modeling dynamical systems. These gates include:
-
-1. **Forget Gate**: Decides which information from the cell state should be discarded. It uses the current input and the previous hidden state to generate a value between 0 and 1 for each number in the cell state, with 1 indicating “keep this” and 0 indicating “discard this.”
-2. **Input Gate**: Determines what new information will be added to the cell state. It involves two parts: a sigmoid layer that decides which values will be updated and a tanh layer that creates a vector of new candidate values.
-3. **Output Gate**: Decides what information from the cell state will be used to generate the output. It takes the current input and the previous hidden state, passes them through a sigmoid layer, and multiplies the output by a tanh of the cell state to decide which parts of the cell state make it to the output.
-
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/LSTM.png" class="img-fluid" %}
-<div class="caption">
-  Figure 5: Diagram of LSTM <d-cite key="towardsdatascience_lstm_gru"></d-cite>.
-</div>
-
-
-In the context of the contrastive learning framework, the choice of model is a design choice. Essentially, any model capable of converting a trajectory into an embedding, such as a transformer, could be utilized. While transformers have shown remarkable results in CV and NLP, their performance on smaller datasets remains an area less explored. Previous studies in dynamical systems have predominantly employed RNN-based approaches. In this project, we aim to study if LSTM is capable of capturing the dynamics of system through its hidden and cell states. 
-
-### Training objectives
-
-Trajectories are passed through an LSTM to generate trajectory embeddings, derived from the cell states of the LSTM's final layer. In our training framework, there are 2 loss functions:
-
-- Contrastive objective (InfoNCE loss) is applied on the trajectory embedding. This loss encourages model to create embeddings that meaningfully distinguish between different system dynamics.
-- Prediction objective (MSE) is applied between the ground truth state (i.e., $X_{t+1}$) and the prediction state (i.e., $\hat{X}_{t+1}$) at the next step. This loss encourages model to use the current state and embedding to predict next step behavior.
-
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/contrastive_representation.png" class="img-fluid" %}
-<div class="caption">
-  Figure 6: Diagram of contrastive representation learning in this project. Trajectories derived from the same parameter set (e.g. top two trajectories in figure) form a positive pair; they map closely in latent space and show invariance to initial states. Trajectories from different parameter sets are distant in latent space, despite identical initial states (e.g. bottom two trajectories).
-</div>
-
-
-### Evaluation
-
-The objective of the project to estimate the system parameters from observed trajectories. Therefore, the primary metric for our evaluation strategy is the **MAE on underlying parameter estimation**. This involves applying linear probing to the model's embeddings against known ground truth parameters on a subset of the training set (i.e., a linear system $X\beta = Y$ is solved, with *X* representing the trajectory embeddings, and *y* being the ground truth parameters). Since it is a simple linear transformation of the original features, it has limited capacity to alter feature complexity. Essentially, if a model can perform well under linear probing, it suggests that the learned embeddings themselves are robust and informative with respect to the underlying parameters.
-
-The following plot shows the result of the contrastive learning framework on the validation set. Left subplot corresponds to the ground truth parameter, right subplot corresponds to the predicted parameter using the above contrastive learning framework. For a focused visualization, we only varies 2 parameter (i.e., $m_1$, $m_2$). Each point in the plot is annotated with its corresponding parameter values. For each parameter set, we evaluate on 2 trajectories with different initial conditions. 
-
-On the right plot, we observe similar data points are grouped together in the parameter space, indicating that the model is capable of clustering trajectories generated from the same parameter set together. Comparing the left and right plots, we observe the model is capable to predicting parameters to be close to ground truth parameters. Overall, the MAE for parameter estimation is `0.043`, underscoring the model's precision in parameter prediction.
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/param_estimation.png" class="img-fluid" %}
-<div class="caption">
-  Figure 7: Result of parameter estimation. Left subplot corresponds to the ground truth parameter, right subplot corresponds to the predicted parameter using the above contrastive learning framework.
-</div>
-
-
-Additionally, we would also like the model to be capable of predicting the future trajectories. For this objective, the secondary metric is the **MAE on next-step prediction**. High value on this metrics would indicate model's ability to accurately forecast future states, which is a necessary but may not be sufficient step towards a more complex, weekly-supervised parameter inference tasks. The MAE on the validation set is `0.00024`, and we will discuss it more in the Experiments section.
-
-## Experiments
-
-In the previous section, Figure X above shows the final result. We want to include 2 components in this section: 1) different things we attempted to reach the results in Figure X, and 2) several experiments to study how different factors affect model’s capability of discovering the underlying parameters. 
-
-Due to computational and time limitation, the numbers reported in this section are not from the final model, which trained for a much longer time. Instead, we ran numerous experiments and compared performance after 2000 steps, at which point the training loss has roughly plateaued.
-
-### Effect of initial conditions
-
-The effect of different initial conditions in dynamical system is analogous to the effect of data augmentation in CV. The challenge is that different initial conditions may affect the trajectories more than the change in parameter. 
-
-We initially used the same initial conditions for all set of parameters and led to parameter MAE of `0.01` in the validation set. However, the model doesn’t generalize to other initial conditions; when evaluating the model on the validation set that has different initial condition, MAE increased to `0.31`, indicating overfit.
-
-To ensure our model effectively discerns differences in trajectories arising from varying initial conditions, we generate 100 trajectories from each parameter set with random initial conditions, aiming to train the model to be invariant to these initial conditions and capture the essence of the system parameters. With this “data augmentation”, we bridged the gap between training and validation performance to be `0.061` and `0.065` respectively. 
-
-### Number of prediction steps
-We also considered the limitations of next-step prediction, particularly for high-frequency samples (i.e., small $dt$). A trivial model might simply predict state $X$ at time $t+1$ as $X_t$, and achieve a small loss since $X_{t+1} - X_t$ may be small for small $dt$. To avoid model taking shortcuts, we shift our focus from immediate next-step prediction to forecasting next-k-steps ahead. We also anticipate that accurate longer-horizon predictions would require a deeper understanding of the underlying parameters, potentially leading to improved performance in parameter estimation. This improves the parameter MAE on the validation set from `0.10` to `0.065`. The following figure illustrates an results of predicting 30 steps ahead.
-
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/gt_and_pred_traj.png" class="img-fluid" %}
-<div class="caption">
-  Figure 8: Ground truth and predicted trajectory for predicting 30 steps (i.e., 3 seconds).
-</div>
-
-
-### Decouple state and parameter embedding
-
-In our hypothesis, the latent space of a trajectory encodes dual forms of information: "long-term" information pertaining to system parameters, and "short-term" information reflective of the current state. Traditional approaches applying contrastive learning across the entire latent vector may not optimally capture this duality.
-
-To address this, we propose to decouple the state and parameter embedding space. Concretely, for positive pairs emerging from identical parameters but divergent initial conditions, our approach focuses on computing the InfoNCE loss solely on the segment of the embedding representing the parameter. This is operationalized by limiting contrastive learning to the initial W dimensions of the latent vector, denoted as $z[:W]$. This strategy aims to specialize $z[:W]$ in encoding system parameters, while allowing the remaining part of the vector, $z[W:]$, the flexibility to encapsulate other trajectory aspects, such as initial conditions and inherent noise.
-
-However, the performance didn’t increase across various values of $W$. This stagnation might stem from our use of the LSTM cell state as the latent embedding. Given that the cell state inherently integrates "long-term" information, with "short-term" data predominantly residing in the hidden states, restricting ourselves to $z[:W]$ potentially reduces the representational power of our contrastive learning framework.
-
-### Effect of key hyperparameters
-
-We utilized WandB for a hyperparameter sweep to investigate their impact on the model's performance in next-steps prediction and underlying parameter estimation. Key hyperparameters explored include:
-
-- **Embedding Size**: We observed that increasing the embedding size from 10 to 200 led to a reduction in the InfoNCE loss from 0.862 to 0.007, and the corresponding parameter estimation estimation MAE peaked when embedding size reached 100. This suggests a larger embedding size can increase the capacity to more effectively inferring underlying system parameters. However, maintaining the embedding size at a balanced level is crucial to ensure the model concentrates on the most pivotal aspects of data variation, rather than overfitting to minor system details.
-- **Number of LSTM Layers**: Increasing the number of LSTM layers improved both next-step prediction and parameter estimation. Notably, with more LSTM layers, a smaller embedding size became sufficient for achieving desirable outcomes in both prediction and parameter inference. This implies a deeper LSTM architecture can capture more complex pattern in the data.
-- **Prediction Horizon (Predict Ahead)**: We observe a modest improvement in performance  on parameter estimation MAE (i.e., 0.04) as the prediction horizon increases. This improvement, while positive, was less pronounced than anticipated. In our model, contrastive learning serves as the primary mechanism for learning about system parameters, with next-k-step prediction intended to supplement this learning process. Theoretically, as the prediction horizon (k) increases, the complexity of the next-k-step prediction task escalates. This demands more focus from the model, potentially at the expense of its capacity for contrastive learning. Consequently, the variable k emerges as a hyperparameter to strike an optimal balance between two competing objectives: facilitating overall learning (where a larger k is advantageous), and maintaining a focus on contrastive learning (where a smaller k is beneficial).
-
-### Noise level in data generation
-
-In real-world applications, models often lack direct access to state values due to the inherent stochasticity of systems or observation noise. In high-precision engineering applications, this noise is typically constrained to below 1%. However, in less precise scenarios, the noise in observed data can reach levels as high as 20%. It's important to note that these errors are not merely observational errors, which can be assumed to be independent and identically distributed (i.i.d). Rather, these errors are intertwined with the state itself and can propagate over time, affecting subsequent observations. The figure below illustrates how noise can significantly alter trajectories. For instance, at a 20% noise level, the state variable $x_1$ markedly diverges from its intended path around the 8-second mar
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/traj_with_noise.png" class="img-fluid" %}
-<div class="caption">
-  Figure 9: Comparison of state trajectories with no noise (top subplot) and 20% noise (bottom subplot).
-</div>
-
-
-The following section evaluates the model's performance using noisy observed data. During trajectory generation, we introduce random noise according to the formula $X_{obs} = X_{true} (1 + \alpha \mathit{N}(0, 1))$ where $\alpha$ is the noise-to-signal ratio. We studied the model’s performance across various noise levels, ranging from $\alpha = 0.0$ to $\alpha   = 0.2$, and the results are plotting in the following figure.
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/comparison_noise.png" class="img-fluid" %}
-<div class="caption">
-  Figure 10: Model performance across different noise levels. The blue bars represent a model trained on a clean dataset, and directly applied to a noisy validation dataset. The orange bars represent a model trained and evaluated on datasets with equivalent noise levels.
-</div>
-
-Directly applying a model trained with a clean dataset on a noisy dataset would lead to drastic performance drop as shown in the blue bars. During model deployment, it’s a natural choice to train on a dataset with the same noise amount. This could mitigate the drastic performance drop, especially for low to moderate amount of noise (e.g., $\alpha < 0.1$), as shown in the orange bars. However, when noise amount rises to 20%, training on noisy dataset doesn’t help either due to significant deviation from clean data. 
-
-Applying a model trained on a clean dataset to a noisy dataset leads to a significant drop in performance, as indicated by the blue bars. In practical model deployment, it's common to train the model on a dataset with a comparable level of noise. This approach can substantially mitigate performance degradation, particularly at low to moderate noise levels (e.g., $\alpha < 0.1$), as demonstrated by the orange bars. However, at higher noise levels, such as 20%, training on a noisy dataset proves less effective due to the substantial deviation from the clean data.
-
-### Generalizability to unseen parameters
-
-In this section, we delve into the model's generalizability across unseen parameters. Our investigation comprises experiments on both in-distribution and out-of-distribution system parameters. The results of these experiments are illustrated in the following figures.
-
-For in-distribution analysis, our focus was to assess the model's proficiency in adapting to system parameters that, while differing from those in the training set, still fall within the same predefined range. This aspect of the study aims to understand how well the model can interpolate within the known parameter space.
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution.png" class="img-fluid" %}
-<div class="caption">
-  Figure 11: Result of parameter estimation to unseen parameters in-distribution.
-</div>
-
-
-On the other hand, the out-of-distribution experiments were designed to challenge the model further by introducing system parameters that lie outside the range encountered during training. This approach tests the model's ability to extrapolate beyond its training confines.
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution.png" class="img-fluid" %}
-<div class="caption">
-  Figure 12: Result of parameter estimation to unseen parameters out-of-distribution.
-</div>
-
-
-
-Remarkably, our model demonstrated a robust ability to generalize across both in-distribution and out-of-distribution parameters. It achieved a Mean Absolute Error (MAE) of 0.032 in the former and 0.082 in the latter scenario. These findings suggest that the model not only learns the underlying patterns within the training data but also retains a significant degree of flexibility to adapt to new, unseen parameter sets.
-
-## Another Framework - Generative Modeling
-
-While the previously discussed contrastive learning framework shows promise in inferring underlying parameters through a weakly-supervised learning approach, it relies on prior knowledge about the relationship between trajectories and their corresponding parameter sets. Such information may not always be readily available in practical scenarios. To address this challenge, our research pivots towards employing a generative modeling framework, enabling the learning of system parameters in an unsupervised manner.
-
-We transition from contrastive learning to incorporating a **variational autoencoder (VAE) structure**. This setup operates without explicit knowledge of parameter sets, compelling the model to decipher the underlying patterns solely from the observed trajectories. The VAE framework consists of three primary components: 1) an encoder LSTM that transforms an observed trajectory into a latent representation, 2) a reparameterization layer that molds this latent representation into a specific distribution, and 3) a decoder LSTM that uses the latent representation and initial conditions to reconstruct the trajectory.
-
-Training focuses on 1) the reconstruction loss between real and a generated trajectories, and 2) Mean Absolute Error (MAE) for next-k-step predictions made by the encoder LSTM. This method is designed to challenge the model's capability to extract insights about the system's dynamics independently, without relying on any prior information about the trajectories. The framework thus becomes a critical platform for testing the model's ability to autonomously learn the system's underlying parameters, requiring an advanced level of unsupervised learning.
-
-The evaluation metrics for this second framework are aligned with the first, utilizing MAE to assess both the underlying parameter estimation and the next k-step prediction accuracy of the encoder LSTM. A key addition in this framework is the **MAE on Reconstruction Loss**.This metric is used to gauge the model's ability to accurately reconstruct input sequences, thereby reflecting its understanding of the data's fundamental structure. A lower reconstruction loss implies that the model has effectively internalized the essential characteristics of the data distribution. Our expectation is that this deeper grasp of data structure will enable the model to infer underlying system parameters independently, without prior exposure to specific parameter set information.
-
-## Experiments - Generative Modeling
-### Autoencoder v.s. Variational Autoencoder
-
-In addition to exploring the Variational Autoencoder (VAE) framework, we also experimented with a traditional autoencoder setup. This variant mirrors the architecture of the VAE but excludes the computation of the mean ($\mu$) and log variance ($\log \sigma^2$), thereby omitting the variational element. This modification streamlines the model, narrowing its focus to purely reconstructing input data from its latent representations.
-
-Our findings reveal that the autoencoder configuration surpassed the VAE in both parameter estimation and reconstruction. For parameter estimation MAE, autoencoder and VAE achieved 0.12 and 0.23 respectively. For reconstruction MAE, autoencoder and VAE achieved 0.02 and 0.49 respectively. This performance disparity can be attributed to the inherent constraints of each model. The autoencoder is primarily limited by the dimensionality of the embedding in its latent space. In contrast, the VAE faces an additional constraint due to its need to model the distribution within the latent space.
-
-These results suggest that the variational component, a defining feature of VAEs and instrumental in modeling data distributions, might not be essential for capturing the dynamics specific to our system. By removing the variational aspect, the autoencoder model is enabled to concentrate more effectively on capturing the most salient features for reconstruction and parameter inference. This simpler approach avoids the additional complexity of encoding the data distribution in the latent space, potentially leading to more efficient and targeted learning relevant to our system's dynamics.
-
-### Beyond Reconstruction: Evaluating Future Prediction Capabilities
-
-To evaluate our AE model's generalizability and future prediction capabilities, we expanded its function beyond reconstruction to include forecasting additional steps. The figure presented here compares the ground truth states $x_1$ and $x_2$ (displacements for $m_1$ and $m_2$) against the model’s outputs for both reconstruction and prediction. The model processes input trajectories of 100 time steps and generates outputs for 199 steps, with the initial 99 steps dedicated to reconstruction and the subsequent 100 steps for prediction (unseen by the model during training). The results illustrate effective reconstruction performance but relatively weaker predictive accuracy.
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/recon_and_pred.png" class="img-fluid" %}
-<div class="caption">
-  Figure 13: Reconstruction and further prediction of unseen states.
-</div>
-
-
-Given that our autoencoder (AE) framework surpasses the Variational Autoencoder (VAE) in reconstruction and parameter estimation, we speculated whether VAE's variational component might enhance future predictions. Therefore, we compared the reconstruction and prediction losses between the AE and VAE frameworks. 
-
-The corresponding figure, presenting the mean and standard deviation of these losses, reveals that in both frameworks, reconstruction losses and their variability are substantially lower than prediction losses. This trend highlights the ongoing difficulty in achieving precise future predictions within our model configurations.
-
-Furthermore, the AE framework demonstrated superior performance over the VAE in both reconstruction and future step prediction. This outcome suggests that the VAE's variational component does not necessarily contribute to improved future predictions. Echoing our earlier findings on parameter estimation and reconstruction, the variational aspect might not be pivotal for capturing the dynamics specific to our system. Instead, it could introduce additional complexity by encoding the data distribution in the latent space, which appears to be less relevant for reconstruction and future step prediction tasks.
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred.png" class="img-fluid" %}
-<div class="caption">
-  Figure 14: Results of reconstruction loss (first 99 steps) and future prediction loss (last 100 steps) for autoencoder (top subplot) and VAE (bottom subplot).
-</div>
-
-
-### Effect of Latent Variables on Generated Trajectories
-
-In this section, our objective is to glean insights into the latent variables by manipulating them and observing the resultant changes in the generated trajectories. Given that the embedding dimension (i.e., \|z\|) exceeds the dimension of the parameters (i.e., \|$\theta$\|), we initially establish a linear mapping from from $z$ to $\theta$. The following gif demonstrates how the trajectory evolves in response to alterations in the variable $m_1$. The upper part of the gif represents the simulation, while the lower part reflects the output from the decoder of our autoencoder.
-
-A notable observation is that, as m1 undergoes modifications, the predicted trajectories adeptly resemble the period of the simulation trajectories. However, a discrepancy arises in their magnitude, with the predicted trajectories exhibiting a notably smaller scale compared to the ground truth trajectories. This pattern suggests that while the embedding successfully captures certain characteristics of the trajectories, it does not fully encapsulate all their properties.
-
-We hypothesize that enhancing the complexity of the encoder/decoder architecture (e.g., larger number of layers of LSTM layers) might facilitate a more comprehensive capture of trajectory attributes. However, our experimental scope is currently constrained by limitations in CUDA memory, particularly due to the decoder's requirement to process 99 time steps. This constraint hinders our ability to experiment with architectures involving a greater number of layers, which might otherwise allow for a richer representation and understanding of the trajectory data.
-
-{% include figure.html path="assets/img/2023-11-08-contrastive-time/latent_to_traj.gif" class="img-fluid" %}
-<div class="caption">
-  Figure 15: Effect of latent variables on generated trajectories.
-</div>
-
-
-## Conclusion and Future Works
-
-In contrast to current machine learning literature that predominantly focuses on predicting future states of dynamical systems, our work is geared towards uncovering the underlying system parameters from observed trajectories. Our key contributions include:
-
-- Implementing two frameworks: an autoregressive LSTM with contrastive learning, and a variational autoencoder architecture. While contrastive learning yields superior parameter estimation, the autoencoder enables unsupervised learning without relying on prior knowledge.
-- Demonstrating our model's generalizability to both in-distribution and out-of-distribution unseen parameters, and its effective performance with noisy datasets, sustaining a noise-to-signal ratio of up to 10%.
-- Conducting thorough experiments to explore the impact of various factors like initial conditions, prediction horizons, and the interplay between state and parameters embeddings. We also examined the influence of latent variables on trajectory generation and the model's predictive capabilities beyond the confines of the training set.
-
-The ability to accurately estimate underlying system parameters significantly enhances model interpretability, which is crucial in scientific and engineering applications where decision-making stakes are high. We hope our findings will help researchers and students interested in interpretable machine learning for dynamical systems.
-
-While this project did extensive analysis on a spring-mass system, future work may extend this analysis to a broader range of dynamical systems. Moreover, future work can integrate the strengths of both frameworks to incorporate contrastive learning within an unsupervised context, possibly through data augmentation strategies. Further advancements could also focus on refining the impact of latent variables on trajectory generation. Such progress is expected to bolster trust in AI solutions and facilitate their integration into essential decision-making frameworks across various domains.
-
-
-Here's the link to our Github Repo: [https://github.com/martinzwm/meta_param_est](https://github.com/martinzwm/meta_param_est)
-
-
-
-
-
-
-
-
-
-
-
-<!-- Proposal
-## Introduction
-Time-series data analysis is pivotal in numerous scientific and industrial applications, including dynamical system, weather forecasting, and stock market prediction. The underlying parameters governing the time-series data can often be complex and not directly observable. Unlike traditional time series approaches, which predominantly focus on prediction tasks, leading to a "black-box" prediction <d-cite key="Lim_2021"></d-cite>. Recent literatures have explored using contrastive learning to learn time-series representation, but none has explored learning underlying system parameters <d-cite key="eldele2021timeseries"></d-cite>. In this project, we want to leverage the contrastive learning approach studied in class to learn underlying system parameters parameters.
-
-A deep comprehension of these underlying parameters, if successfully achieved, can lead to 2 benefits - 1) enhanced model capability for making accurate future predictions, and 2) a better understanding of the underlying system. The latter is particularly important in scientific, where the goal is to understand the underlying system, and engineering, where safety and reliability are of paramount importance.
-
-To achieve the above goals, we proposed the following experiments and setups to study the insights of using contrastive approach to learn latent parameters for time-series representation.
-
-
-## Objectives
-The primary objective of this research is to investigate the effectiveness of contrastive loss learning in capturing the system underlying parameters ($$\theta_i$$) of time-series data. We aim to:
-
-1. Test the capability of contrastive learning approach to extract embeddings from time-series data that correlate strongly with system underlying parameters.
-2. Study different neural network architecture for encoding time-series trajectories into informative embeddings.
-3. Explore the impact of various factors such as function forms, number of parameters and distributions, trajectory length, noise levels, and loss functions on the model’s performance.
-4. Evaluate the precision of the predictive models in terms of their ability to make accurate future predictions based on learned latent variables, particularly in few-shot learning scenarios.
-
-## Hypothesis
-With contrastive loss learning, the embeddings of trajectories from the same parameter set will be closer together in the embedding space than to those from different sets. Therefore, our central hypothesis is that the embeddings produced by a model trained with contrastive loss learning will reflect the underlying parameters of time-series data. It is anticipated that a linear projection of these embeddings back onto the parameter space will yield predictions that are congruent with the original parameter values. Moreover, we postulate that the model will be able to make more precise future predictions by effectively capturing the essence of the latent variables governing the time-series data.
-
-## Experimental Setup
-
-### Trajectories Simulation
-
-We will generate synthetic time-series data based on underlying deterministic and stochastic processes (e.g., spring-mass dynamical system). 
-- The system can be defined by a set of parameters $$\theta_i$$. We have $H$ set of parameters.
-- For each set of parameters, a trajectory, $$\{x_{ij}\}$$ of length $T$ can be draw with different initial conditions and noise. We will sample $K$ trajectories for each set of parameters.
-
-### Models
-
-We will evaluate three different neural network architectures:
-
-1. Recurrent Neural Network (RNN)
-2. Long Short-Term Memory (LSTM)
-3. Transformer (utilizing attention mechanisms)
-
-A model $$M$$ will output an embedding vector $$v_{ij}$$ for a given input trajectory $$\{x_{ij}\}$$.
-
-### Experimentation
-
-We want to evaluate the contrastive approach in extracting system parameter under the following scenarios:
-
-1. **System Functional Forms:** We will test linear, non-linear, and complex periodic functions to generate the trajectories.
-2. **Number of Parameters ($$\lvert \theta \rvert$$):** We will explore varying the number of parameters to understand how it affects the model’s ability to learn.
-3. **Parameter Distribution:** We will use different distributions (uniform, normal, bimodal, etc.) of parameters (i.e., $\theta_i$) to study the impact on the learning process.
-4. **Trajectory Length ($$T$$):** We will vary the length to assess the effect on the model’s performance.
-5. **Noise Levels:** Different amounts of Gaussian noise will be added to the trajectories to simulate real-world data imperfections.
-6. **Loss Functions:** Alongside contrastive loss, does add a loss function for model prediction of next time stamp help performance?
-
-
-## Conclusion
-
-This proposal presents a structured plan to investigate the potential of contrastive loss approach in learning system underlying parameters of time-series data. The insights gained from this research could pave the way for advancements in various fields where time-series analysis is crucial. We hope the insights from our project can contribute to the field of machine learning and its applications in time-series analysis. -->
\ No newline at end of file
diff --git a/_posts/2023-11-08-croneillproposal.md b/_posts/2023-11-08-croneillproposal.md
deleted file mode 100644
index face8023..00000000
--- a/_posts/2023-11-08-croneillproposal.md
+++ /dev/null
@@ -1,213 +0,0 @@
----
-layout: distill
-title: Learning a Lifted Linearization for Switched Dynamical Systems
-description: A final project proposal for 6.s898 in fall 2023
-date: 2023-12-11
-htmlwidgets: true
-
-authors:
-  - name: Cormac O'Neill
-    url: 
-    affiliations:
-      name: MIT, Cambridge
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-croneillproposal.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Proposed Approaches
-  - name: The Model
-  - name: Analysis
-  - name: Conclusion
----
-
-
-## Introduction
-<blockquote>
-    All models are wrong, but some are useful.
-    —George Box
-</blockquote>
-
-Deep neural networks are incredibly capable of generating models from data. Whether these are models that allow for the classification of images, the generation of text, or the prediction of a physical system’s dynamics, neural networks have proliferated as a favored way of extracting useful, predictive information from set of data <d-cite key="rombach2021highresolution, Brown2020, Tsipras2020"></d-cite>. But while well-tuned and well-designed neural networks can demonstrate miraculous performance at a given task, raw accuracy is not the only measure of a model’s usefulness.
-
-In robotics, the speed at which a model can be run and its explainability can be just as important as the accuracy of its predictions. Techniques such as model predictive control can enable remarkable performance even when they’re based on flawed predictive models <d-cite key="Rawlings2022"></d-cite>. In practice, most of these models are linearizations of more accurate, nonlinear equations. Produced by considering low order truncations of the Taylor series, these linearizations can be run incredibly efficiently on modern computer hardware and are amenable to linear analysis techniques for explainability purposes. 
-
-Nevertheless, this kind of linearization has its own weaknesses. Chief among them is the inherently local nature of the approach: a Taylor series must be taken around a single point and becomes less valid further away from this location. As an alternative, lifting linearization approaches inspired by Koopman Operator theory have become more commonplace <d-cite key="Koopmanism, brunton2021modern, AsadaDE, Lusch2018, Shi2022"></d-cite>. These techniques seek to linearize a system by lifting it to a higher dimensional representation where the dynamics can be made to evolve linearly over time. While such models can suffer from the curse of dimensionality when compared to their lower-order Taylor series brethren, they can offer greater accuracy while still providing most of the benefits of a linear model.
-
-$$
-f(x)|_{x=a}\approx f(a)+\frac{f'(a)}{1!}(x-a)
-$$
-<div class="caption">
-    A truncated Taylor series makes use of the derivatives of a function around a point.
-</div>
-
-Deep neural networks have emerged as a useful way to produce these lifted linear models <d-cite key="Lusch2018"></d-cite>. An encoder is used to transform a system’s state into a higher dimensional latent space of “observables”. These observables are then fed through a linear layer which evolves the system forward in time: a linear dynamical model. In the literature, this approach has come to be known as Deep Koopman Networks (DKNs). We can see how these networks can learn lifted linear models for physical systems by considering a simple pendulum.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-croneillproposal/deepnet.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    An example of a neural network architectured used to learn observables for a linear Koopman model, taken from <d-cite key="lusch2018deep"></d-cite>
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-croneillproposal/DKN_simplepen.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Performance of a DKN for predicting a damped, simple pendulum across a set of trajectories. On the left, the dotted lines represent the ground truth trajectories, while the lines connected by crosses are the predicted trajectories. On the right, the MSE of the trajectories for the full 30 time steps of data is presented.
-</div>
-
-While the potential of DKNs has already been explored in recent years, the field is still being actively studied. In this blog, I am interested in exploring how a DKN can be used to model a particular kind of a dynamical system: one with piecewise dynamics that vary discretely across state space. These systems are inherently challenging for traditional, point-wise linearization techniques. To explain this, we can consider an example inspired by our old friend, the simple pendulum.
-
-Consider a pendulum as before, but with the addition of two springs located at $\theta=30\degree$ and $\theta=-30\degree$. If we to consider a point arbitrarily close to one of these springs, say at $\theta=29.99…\degree$, then a Taylor series about this point – even with infinite terms – would not be able to accurately represent the dynamics when the spring is engaged. In contrast, a lifted linearization may better model such a system thanks to its ability to incorporate information beyond a single point.
-
-$$
-\begin{align}
-    \ddot\theta =f(\theta,\dot\theta) =\begin{cases}
-    -g\sin{\theta}-b\dot\theta, & \theta\in [-30^\circ,30^\circ]\\
-    -g\sin{\theta}-b\dot\theta-k(\theta+30), & \theta<-30^\circ\\
-    -g\sin{\theta}-b\dot\theta-k(\theta-30), & \theta>30^\circ
-    \end{cases}
-\end{align}
-$$
-<div class="caption">
-   The dynamics of a pendulum with a pair of springs can be expressed as a set of piecewise equations. $k=1000$ is the stiffness of the springs and $b=1$ is the damping constant.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-croneillproposal/spring_diagram.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Diagram of the damped pendulum system with a pair of fixed springs, space at equal angles away from $\theta=0$.
-</div>
-
-Although that isn’t to say that a brute-force implementation of a DKN would necessarily be all too successful in this case either. Piecewise, switched, or hybrid systems (terminology depending on who you ask) are composed of particularly harsh nonlinearities due to their non-continuous derivatives. These can be difficult for lifted linearization approaches to model <d-cite key="Bakker:KoopHybrid, Govindarajan:KoopHyPend, NgCable"></d-cite>, with some systems theoretically requiring an infinite number of observables to be accurately linearized. This project is motivated by the question of whether we could modify the standard DKN approach to be more amenable for piecewise systems, specifically by taking inspiration from the common practice of pre-training neural networks.
-
-As a bit of a spoiler for the conclusion of this report, we don’t end up seeing any noticeable improvement from pre-training the DKN. Nevertheless, the process of experimenting with the proposed approaches was an insightful experience and I am happy to share the results below.
-
-## Proposed Approaches
-I experimented with two approaches for pre-training our DKN, one inspired by curriculum learning <d-cite key="Soviany2022"></d-cite> and another seeking to leverage an intuitive understanding of a lifted linearization’s observables. We then compared the results to an aggregate DKN model trained from scratch with 50 observables.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-croneillproposal/aggregate_DKN.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    A DKN with 50 observables trained from scratch for the pendulum with springs. On the left, the dotted lines again represent ground truth trajectories while the lines connected by crosses are predictions.
-</div>
-
-In the case of applying curriculum learning, we considered an approach with a data-based curriculum. In these cases, the difficulty of the training data is gradually increased over time. This has the potential benefit of allowing a model to more readily learn a challenging task, while also preventing a situation where a model is not sufficiently ‘challenged’ by new data during the training process. Our curriculum learning approach sought to take advantage of DKNs’ already good performance for the standard pendulum case. Intuitively, we identify the spring’s stiffness as the primary source of increased difficulty in our toy system. With this in mind, I created four data sets with different values for the spring constant, $k=0,10,100,1000$. A single model was then trained sequentially on these data sets. If our intuition is correct, we would expect to see the model gradually learn to account for the presence of the spring while maintaining the dynamics of a simple pendulum closer to the origin.
-
-For the second approach tested in this project, it is necessary to consider what an observable is meant to represent in a lifted linearization. As an additional piece of terminology, the function which is used to generate a given observable is referred to as an observable function <d-cite key="brunton2021modern"></d-cite>. While it may be possible to use different sets of observable functions to linearize a given system, it is possible to find a set of observable functions that are analogous to a linear system’s eigenvectors. The evolution of these observables in time, referred to as Koopman eigenfunctions, is defined by an associated complex eigenvalue. Much like their eigenvector cousins, these eigenfunctions can provide useful information on how the system might evolve over time, including information on how the time evolution may vary spatially.
-
-Based on this understanding of Koopman eigenfunctions, we are motivated to see if a DKN could be coaxed into more readily learning spatially-relevant observables. If we consider our system of interest, the pendulum with springs, we posit that different regions of state space would be primarily influenced by different eigenfunctions. In particular, the larger central region where the pendulum’s dynamics are independent of the springs may be expected to be affected by a set of eigenfunctions with a lower spatial frequency and a global relevance. That is, eigenfunctions which better represent the dynamics of the system averaged throughout the state space and which may be valid everywhere – even when the springs are engaged, the natural dynamics of the pendulum are still in effect. In contrast, the dynamics when the springs are engaged (each spring is active in a comparatively smaller region of state space) may rely heavily on a set of eigenfunctions that are only locally relevant.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-croneillproposal/pend_statespace.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    On the left, a visualization of trajectories used to train the models for the pendulum with springs. Dotted vertical lines mark where the boundary between regions of state space where the springs are and are not engaged. On the right, we see the trajectories considered for the system when there are no springs. Note that the presence of the springs compress `squeeze' the higher energy trajectories further away from the origin of the state space.
-</div>
-
-While I believe that this is an interesting thought, it is worth noting that this intuitive motivation is not necessarily backed up with a rigorous mathematical understanding. Nevertheless, we can empirically test whether the approach can lead to improved results. 
-
-In contrast to the curriculum learning approach, we have only a single set of data: that generated from a model of a pendulum with a spring stiffness of $k=1000$. Instead of the standard approach of DKN, where a larger number of observables is considered to (in general) allow for a system to be more easily linearized, we deliberately constrain the latent space dimension to be small. The intention is for this restriction to limit the number of observable functions that the model can represent, encouraging it to learn observables with a low spatial frequency and which are relevant across a larger region of state space. In our system of interest, this would be observable functions that represent the dynamics of the pendulum without the springs.
-
-Once we have initially trained this smaller model, we use its encoder within a larger model. This initial encoder is kept fixed in future training processes so that it continues to represent the same set of observables. An additional encoder is then then in the larger model, with the goal being to learn additional observables capable of making up for the initial model’s deficiencies. If the initial model learned the low spatial frequency observables as hoped, then we would expect this additional encoder to learn observables that are more relevant in areas where the springs are exerting a force on the pendulum. In practice, we could see this as a particular form of curriculum learning where the complexity of the model is increased over time. A key difference here compared to traditional approaches is that instead of increasing the complexity of the model by adding layers depth-wise, we are effectively increasing the width of the model by giving it the ability to learn additional observables.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-croneillproposal/model_arch.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    The architecture used to train the combined model. A smaller, 10 observable model was first trained, before a larger model was then trained to learn an additional 40 observables.
-</div> 
-
-## The Model
-To reduce the influence that other factors may have in the results of our experiments, I sought to minimize any changes to the overall structure of the DKNs being used, save for those being studied. Chief among these was the number of hidden layers in the network, the loss function being used, and the input. Other variables, such as the optimizer being used, the batch size, and the learning rate, were also kept as unchanged as feasible. The need to tune each of these other hyperparameters and the challenges in doing so are well-documented in the machine learning field, and as such I won’t spend any additional time describing the processes involved.
-
-The general *encoder* architecture of the networks being used was as follows, with $D_x$ being the number of states (2, in the case of the pendulum) and $D_e$ being the number of observables:
-
-| Layer        | Input Dimensions           | Output Dimensions  | Nonlinearity |
-| ------------- |:-------------:| :-----:| :----:|
-| Linear      | $D_x$ | 16 | ReLU |
-| Linear      | 16      |   16 | ReLU |
-| Linear | 16      |    $D_e$ | None |
-
-In addition to the encoder network, a linear layer was present to determine the time evolution of the observables. For this linear layer, the input and output dimensions were both D_e + D_x since our final set of observables always had the system’s states concatenated onto those learned by the encoder.
-
-The loss function that I used was composed of two main components: a loss related to the time evolution of the observables being output by the encoder, and a loss related to the time evolution of the state variables. In the literature, additional loss terms are often included to help regularize the network during training. These were not found to be significant in the testing done for this report, however and so were excluded. Tests were also done with different weights between the state loss and the observable loss, with an equal balance between the two found to provide reasonable outcomes. Another hyperapameter that we needed to tune is for how many time steps to enforce a loss on the values predicted by the model. In this report, we stuck to 30 time steps although significant experimentation was not done to explore how varying this parameter may have affected the results. We did briefly look into whether having a weight on any of the loss terms which decayed over time would improve training and did not see any immediate benefits.
-
-$$
-\mathrm{loss}=\mathrm{multistep\_loss\_state}+\mathrm{multistep\_loss\_observables}
-$$
-$$\mathrm{multistep\_loss\_state}=\sum^{30}_{t=1}\lvert\lvert(\psi(\textbf{x}_t)-K^t\psi(\textbf{x}_0))[:2]\rvert\rvert_{\mathrm{MSE}}
-$$
-$$\mathrm{multistep\_loss\_observables}=\sum^{30}_{t=1}\lvert\lvert(\psi(\textbf{x}_t)-K^t\psi(\textbf{x}_0))[2:]\rvert\rvert_{\mathrm{MSE}}
-$$
-<div class="caption">
-    The loss function ultimately used for each of the models considers the prediction error for both the state and the observables. $\psi$ represents the act of using the model's encoder and then concatenating the state as an additional pair of observables. $K$ represents the linear layer in the architecture used to model the time evolution of the lifted state.
-</div>
-
-## Analysis
-### Curriculum Learning
-The initial model for stiffness $k=0$ was trained on the simple pendulum dynamics for 600 epochs, and served as the pre-trained model for this approach. Subsequent models were each trained for 200 epochs with the Adam optimizer and a decaying learning rate scheduler. When analyzing the performance of these models, we looked at how the error for a set of trajectories not in the training set evolved over time.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-croneillproposal/curriculum_results.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Performance of the model trained using curriculum learning after each of the learning stages. We observe that performance decreases over time, and that the original model trained when $k=0$ seems to perform the best.
-</div> 
-
-By this metric, we observe the performance of the model gradually getting worse. While this on its own is not too surprising, the final model ends up performing significantly worse than a DKN with the equivalent number of observables trained from scratch. Interestingly, it looks like the final model is unstable, with the trajectories blowing up away from the origin. Looking into this, issues surrounding the stability of linearized models is not a new phenomenon in the field of Koopman linearizations. Prior works have proposed several methods to help alleviate this issue, such as by adding an addition term to the loss function which stabilizes the time-evolution matrix. While there was no time to implement this change for this report, it could be an interesting modification to attempt for future work.
-
-### Learning New Observables
-While trying to gradually learn additional observables for the model, we started with a network that learned 10 observable functions and trained it for 600 epochs. Once this process was complete, an extended model learned an additional 40 observable functions for an additional 600 epochs. The end result was comparable in performance to a single aggregate model of 50 observables trained from scratch. The aggregate model did appear to specifically outperform our gradually trained model during the initial time steps, while slightly underperforming in comparison at the later time steps. This may be due to some differences in the stability of the two learned linear models, although further investigation would be needed to verify this. Part of the motivation for this method was the hope that the network would learn locally relevant observable functions. The learned observables were plotted on a grid to visualize them and see if this were the case, but not distinctive, qualitative features indicating that different observables were learned for different regions of state space.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-croneillproposal/combined_results.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    The combined model doesn't see any noteworthy improvement in performance when compared to the standard DKN approach. While not shown here, the combined model was found to be sensitive to how many observables were learned by each of its constituents. For example, having 30 observables in the first encoder and 20 in the second led to worse results.
-</div> 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-croneillproposal/obs_visualization.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Visualization of a pair of observables from the combined model, arbitrarily selected as the first observable from both encoder 1 (left) and encoder 2 (right). While only these two observables are shown here, plots for all 50 were produced. We noticed that observables from encoder 1 (the fixed model) tended to show `kinks' around $\theta=+-30\degree$. This may indicate that it was learning to account for the presence of the springs. In contrast, encoder 2 (the extended model) learned observable functions that were generally smoother across state space.
-</div> 
-
-## Conclusion
-In this project, we sought to test two modifications to a DKN training scheme on an example of a piecewise dynamical system. By using a curriculum learning process or gradually increasing the number of observable functions, we hypothesized that the DKN would show better performance than an aggregate model trained from scratch. Ultimately, we found that neither of the proposed methods led to significant improvements.
-
-One of the potential causes of underperformance is the learned linear models’ instability. While this is a known issue regarding lifted linearization techniques <d-cite key="ng2022learned, Mamakoukas2023Stable"></d-cite>, attempting to resolve the issue would require further work and additional study into how best to do so for this use case. The example model of a pendulum with springs could also have been chosen poorly. I opted to experiment with this system since it was physically meaningful, and I believed that it would be a simple toy model that wouldn’t require large models with extensive compute requirements. But observing the dramatic change in performance that occurred in the linear models simply through the addition of the springs made me wonder whether this system truly was as simple as I had initially made it out to be. It is possible that larger and more elaborate models with more observables and resources for training are necessary to learn an appropriate linearization.
-
-It is also worth considering the severe limitations of this study, imposed upon it by the need to tune a wide variety of hyperparameters. Even in the process of creating a linear model for the simple pendulum, I observed a wide range of performance based upon how the cost function or learning rate were varied. While some effort was taken to tune these and other hyperparameters for the models I explored, this process was far from exhaustive. 
-
-Moreover, the proposed changes to the typical DKN architecture only served to add additional hyperparameters into the mix. What spring stiffnesses should be used during curriculum learning? Should the learning rate be decreased between different curriculums, or should the number of epochs be varied? How about the ratio of observables between the two models used in the second approach, is a 10:40 split really optimal? Some variations of these hyperparameters were considered during this project, but again an exhaustive search for optimal values was impossible.
-
-This means that there is a chance that I simply used the wrong selection of hyperparameters to see better performance from the tested approaches, it highlights the sensitivity that I observed in the performance of the DKNs. Even beyond the considerations described thus far, there are further considerations that can impact the structure and performance of learned linearizations. Some approaches augment the state variables with time-delayed measurements, for example. In other cases, the state variables are not included as observables and are instead extracted using a decoder network. This latter case is of particular interest, since recent work in the field has identified that certain types of nonlinear systems are impossible to linearize with a set of observables that include the states.
-
-Ultimately, while the experiments in this project didn’t agree with my hypothesis (and resulted in some underwhelming predictive performance) I gained a newfound appreciation for the process of training these models along the way.
\ No newline at end of file
diff --git a/_posts/2023-11-08-detect-image.md b/_posts/2023-11-08-detect-image.md
deleted file mode 100644
index 340ac0f7..00000000
--- a/_posts/2023-11-08-detect-image.md
+++ /dev/null
@@ -1,227 +0,0 @@
----
-layout: distill
-title: Zero-Shot Machine-Generated Image Detection using Sinks of Gradient Flows
-description: "How can we detect fake images online? A novel approach of characterizing the behavior of a diffusion model's learned score vectors."
-date: 2023-11-08
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Marvin Li
-    url: ""
-    affiliations:
-      name: Harvard
-  - name: Jason Wang
-    url: ""
-    affiliations:
-      name: Harvard
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-detect-image.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Abstract
-  - name: Introduction
-  - name: Related Work
-  - name: Methods
-  - name: Experiments
-  - name: Discussion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-### Abstract
-
-Detecting AI-generated content has become increasingly critical as deepfakes become more prevalent. We discover and implement algorithms to distinguish machine-generated and real images without the need for labeled training data. We study the problem of identifying photorealistic images using diffusion models. In comparison to the existing literature, we discover detection techniques that do not require training, based on the intuition that machine-generated images should have higher likelihoods than their neighbors. We consider two metrics: the divergence of the score function around a queried image and the reconstruction error from the reverse diffusion process from little added noise. We also compare these methods to ResNets trained to identify fake images from existing literature. Although the previous methods outperform out methods in terms of our accuracy metrics, the gap between our zero-shot methods and these ResNet methods noticeably declines when different image transformations are applied. We hope that our research will spark further innovation into robust and efficient image detection algorithms. 
-
-### Introduction
-
-As AI-generated images become ever more widespread, garnering virality for how realistic they have become, we are increasingly concerned with the potential for misuse. A deluge of machine-generated fake images could spread misinformation and harmful content on social media. From relatively innocuous pictures of [Pope Francis](https://www.nytimes.com/2023/04/08/technology/ai-photos-pope-francis.html) wearing an AI-generated image puffer coat to dangerous [disinformation campaigns](https://www.politico.eu/article/ai-photography-machine-learning-technology-disinformation-midjourney-dall-e3-stable-diffusion/) powered by diffusion models, we live in a new era of media that we cannot trust. The European Union has passed [legislation](https://www.nytimes.com/2023/12/08/technology/eu-ai-act-regulation.html) that, among other regulations, requires AI-generated content to be explicitly marked so. The enforcement of such legislation and similar-minded policies, however, remains unclear. Consequently, a growing body of research has sought to develop techniques to distinguish between the real and the synthetic.
-
-The rise of models capable of generating photorealistic content makes the detection problem difficult. While there are still numerous nontrivial challenges with current models from their inability to depict text and render tiny details humans are innately sensitive to such as eyes and hands, the pace of the technology is moving in a way that makes relying on these flaws short-sighted and dangerous. Another potential complication is that advanced photo editing techniques such as [Adobe Firefly](https://www.adobe.com/products/firefly.html) have capabilities such as generative inpainting that make it such that an image could contain both real and invented content. Even simple data augmentations like crops, rotations, color jitters, and horizontal flipping can make the input look vastly different to a detection model. Furthermore, the majority of popular image generation tools are text-conditional, and we cannot expect to recover the text prompt, not to mention the model that generated the image. This makes transferable, zero-shot techniques of paramount importance.
-
-In this paper, we propose two techniques for detecting images from diffusion models (see Figure [1](#fig-methods-illustrated)). Diffusion models <d-cite key="sohl2015deep"></d-cite> have been one of the most successful architectures for image generation, inspired by thermodynamic principles. Diffusion models learn a score function (gradient of log likelihood) that 'undoes' noise from the image. In effect, these models learn a gradient field that points to the real-world data manifold.<d-cite key="batzolis2022your"></d-cite> We leverage the intuition that the greater the deviation the diffusion model's machine-generated images are from the real world data, the greater the difference of the neighborhood gradient field. In particular, we believe that machine-generated images are more likely to live in a 'sink' of the gradient field as the diffusion model 'flows' images down the gradient field. We thus propose the *divergence of a diffusion model's score function* as a promising zero-shot statistic for whether an image is generated by the diffusion model.
-
-In addition, another metric for the 'sink' property of the gradient field at the image of concern is how far the image moves after a small displacement and flow along the gradient field. This has a nice interpretation in diffusion models as the *reconstruction error* for running the reverse process over just a small timestep on just a slightly perturbed image.
-
-*<a name="fig-methods-illustrated">Figure 1:</a> The Divergence and Reconstruction Error Hypothesis: Images on the generated data manifold <span style="color: red">(red)</span> have negative divergence and small reconstruction error, while images on the real data manifold <span style="color: green">(green)</span> have zero divergence and large reconstruction error.*
-{% include figure.html path="assets/img/2023-11-08-detect-image/methods-illustrated.png" class="img-fluid" %}
-
-Our overarching research question is thus summarized as, can we use the properties of a diffusion model's tacit vector field to build an effective zero-shot machine-generated image detector, specifically looking at *divergence* and *reconstruction error*?
-
-The main contributions of our paper are:
-
-1. Proposing two methods inspired by sinks of gradient flows: *divergence* and *reconstruction error*.
-
-2. Conducting a wide battery of experiments on the performance of these methods in a variety of augmentation settings.
-
-### Related Work
-
-Previous literature has considered several different methods for image detection. Sha et al. 2022 <d-cite key="sha2022fake"></d-cite> trained machine learning classifiers to detect fake images using high-level image and text embeddings. They, however, do not consider the local information around image embeddings, and require existing datasets of known image-generated and non-image-generated examples to train their classifier. Corvi et al. 2023 <d-cite key="corvi2023detection"></d-cite> identified "forensic traces" in machine-generated image residuals for this task. Again, their method requires many data samples, and requires separate training on diffusion models and GANs. 
-
-We are inspired by ideas from DetectGPT,<d-cite key="mitchell2023detectgpt"></d-cite> a recent work which addressed the same problem of detecting AI-generated content, but in the setting of large language models. For a given piece of text, DetectGPT perturbs the original text and computes the difference in log-likelihood between the perturbed text and the original text:
-
-$$\mathrm{DetectGPT}(x,p_{\theta},q)\triangleq\log p_{\theta}(x)-\mathbb{E}_{\tilde{x}\sim q(\cdot|x)}\log p_{\theta}(\tilde{x})$$
-
-where $p_\theta$ is the language model and $q$ is the distribution of perturbations. If the difference in log-likelihood is large, then the attack claims that the original text is more likely to be generated by a language model.
-
-There are several critical differences between language models and diffusion models. With text, one can directly compute the log likelihood of a given piece of text, even with only blackbox access, i.e., no visibility to the model's parameters. In contrast, for diffusion models, it is intractable to directly compute the probability distribution over images because diffusion models only learn the score. Moreover, the most commonly used diffusion models, e.g. DALL-E 3, apply the diffusion process to a latent embedding space rather than the pixel space. To address the latter concern, we plan on applying the encoder to the image to obtain an approximation of the embedding that was passed into the decoder. And to address the former, instead of approximating the probability curvature around a given point like DetectGPT, we formulate a statistic characterizing whether the gradient field/score is a sink, i.e., the gradients around a machine-generated image point to the machine-generated image. This captures the idea of a local maximum in probability space, similar to the DetectGPT framework. 
-
-It would be remiss to not mention Zhang et al. 2023,<d-cite key="zhang2023watermarks"></d-cite> who argued that watermarking, a strictly easier task than machine-generated image detection, is likely impossible. They claim that an adversary who can perturb a generated image of text without too much degradation and has blackbox access to the watermarking scheme can conduct a random-walk on reasonable outputs until the watermark is degraded. However, their analysis was mainly theoretical and lacked specific experiments with diffusion models. It remains to be seen whether their assumptions still hold for image generation, and whether more concrete watermarking schemes may afford some level of protection against less sophisticated adversaries or the unintentional use of machine-generated images. 
-
-### Methods
-
-**Dataset.** To conduct our research, we needed datasets of known real and fake images. We used MSCOCO <d-cite key="lin2014microsoft"></d-cite>, a dataset of 330K non-machine generated images and captions of common real-world objects which was also used by Corvi et al. 2023.<d-cite key="corvi2023detection"></d-cite> Initially, we planned to use DiffusionDB <d-cite key="wang2022diffusiondb"></d-cite> for our fake images, a dataset of 14M (prompt, image) pairs generated by the open-source Stable Diffusion Version 1 model scraped from the StableDiffusion discord. However, we realized that many of the images in DiffusionDB are not meant to be realistic. Instead, we iterated through the captions of MSCOCO and used Stable Diffusion V1.4  to generate a matching machine-generated image for that caption, as in Corvi et al. 2023.<d-cite key="corvi2023detection"></d-cite>
-
-**Baseline.** We used the model and code from Corvi et al. 2023 <d-cite key="corvi2023detection"></d-cite> to identify images generated by Stable Diffusion as our trained baseline. Their model is a ResNet18 image-only detector trained on the training split of the MSCOCO dataset and images also generated by prompts from StableDiffusion.
-
-**Detection Algorithms.** For out attacks, we compute the divergence of the diffusion model's score field around the image (negative divergence indicates a sink). We can estimate this via a finite-differencing approach: given a diffusion model $s_\theta(x)$ which predicts the score $\nabla_x\log p_\theta(x)$, we have that
-
-$$\mathrm{div}(s_\theta,x)= \sum_{i=1}^d \frac{s_\theta(x+he_i)_i-s_\theta(x-he_i)_i}{2h}$$
-
-for small $h$ and orthogonal basis $\{e_i\}_{i=1}^d$.
-However, images are high-dimensional, and even their latent space has $\approx10,000$ dimensions, which means that fully computing this sum could be computationally expensive. In this paper, we sample a fraction of the dimensions for each queried image.  
-
-Another way to capture the intuition that machine-generated images are have higher likelihoods than their neighbors is by noising the latent to some timestep $t$, and then comparing the distance of the denoised image to the diffusion model to the original image. That is, given a diffusion model $f_\theta$ which takes a noised image and outputs an unnoised image (abstracting away noise schedulers, etc. for clarity),
-
-$$\mathrm{ReconstructionError}(f_{\theta},x)\triangleq \mathbb{E}_{\tilde{x}\sim \mathcal{N}(x,\epsilon)}||x-f_{\theta}(\tilde{x})||_2^2$$
-
-for small $\epsilon$. The intuition is that if an image and thus more likely, then the denoising process is more likely to send noisy images to that particular image. 
-
-**Comparison.** For each model, we use the AUC-ROC curve and the true positive rate (TPR) at low false positive rate (FPR) as metrics. The latter notion of accuracy is borrowed from the membership inference attack setting in Carlini et al. 2021.<d-cite key="carlinifpr"></d-cite> As they argue, this metric quantifies our confidence that a point identified as fake is actually fake. In important settings like filtering fake images on social media platforms, this is especially important as there may be asymmetric consequences for accidentally flagging an image as fake compared to missing a fake image. We also provide a data visualization tool for the images our method identifies. In the real world, we can expect that the images we want to test will be distorted, either by random cropping, reflections, rotations, or compression. We will apply image augmentations over both fake and real image datasets and report the same metrics over these augmentations. 
-
-### Experiments
-
-We run all experiments over a common set of 500 images from the test set of [MSCOCO](https://huggingface.co/datasets/nlphuji/mscoco_2014_5k_test_image_text_retrieval) and the corresponding 500 images generated by Stable Diffusion V1.4 with the same prompt using HuggingFace's default arguments.
-
-For our Divergence method, we randomly sample $d=10$ dimensions to compute the divergence over and set $h=0.1$. For our Reconstruction method, we compute an average distance over 10 reconstructed images per original image and use add/remove noise equivalent to 1 time-step.
-
-For each method, we evaluate the performance on no augmentation, random $256\times 256$ crop (corresponding to about a quarter of the image for generated images), grayscale, random horizontal flip with probably $0.5$, random rotation between $[-30^\circ,30^\circ]$, and random color jitter of: brightness from $[0.75,1.25]$, contrast from $[0.75,1.25]$, saturation from $[0.75,1.25]$, and hue from $[-0.1,0.1]$.
-
-
-*<a name="table-results">Table 1:</a> Divergence, Reconstruction, and ResNet Detection AUC and True Positive Rate at 0.1 False Positive Rate.*
-<table>
-    <tr>
-        <th>AUC / TPR$_{0.1}$</th>
-        <th colspan="3" style="text-align: center">Method</th>
-    </tr>
-    <tr>
-        <th>Augmentation</th>
-        <th>Divergence</th>
-        <th>Reconstruction</th>
-        <th>ResNet</th>
-    </tr>
-    <tr>
-        <th>No Aug.</th>
-        <td>0.4535 / 0.078</td>
-        <td>0.7310 / 0.000</td>
-        <td>1.000 / 1.000</td>
-    </tr>
-    <tr>
-        <th>Crop</th>
-        <td>0.4862 / 0.092</td>
-        <td>0.4879 / 0.064</td>
-        <td>1.000 / 1.000</td>
-    </tr>
-    <tr>
-        <th>Gray.</th>
-        <td>0.4394 / 0.056</td>
-        <td>0.7193 / 0.000</td>
-        <td>1.000 / 1.000</td>
-    </tr>
-    <tr>
-        <th>H. Flip</th>
-        <td>0.4555 / 0.084</td>
-        <td>0.7305 / 0.000</td>
-        <td>1.000 / 1.000</td>
-    </tr>
-    <tr>
-        <th>Rotate</th>
-        <td>0.4698 / 0.062</td>
-        <td>0.6937 / 0.000</td>
-        <td>0.9952 / 0.984</td>
-    </tr>
-    <tr>
-        <th>Color Jitter</th>
-        <td>0.4647 / 0.082</td>
-        <td>0.7219 / 0.000</td>
-        <td>1.000 / 1.000</td>
-    </tr>
-</table>
-
-*<a name="fig-roc-auc">Figure 2:</a> AUC-ROC Curves in No Augmentation Setting.*
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        <p>(a) Divergence</p>
-        {% include figure.html path="assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc.png" class="img-fluid"  %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        <p>(b) Reconstruction</p>
-        {% include figure.html path="assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        <p>(c) ResNet</p>
-        {% include figure.html path="assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc.png" class="img-fluid" %}
-    </div>
-</div>
-
-*<a name="fig-hists">Figure 3:</a> Histograms of Computed Statistics in No Augmentation Setting.*
-<div class="l-body">
-  <p>(a) Divergence</p>
-  <iframe src="{{ 'assets/html/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-<div class="l-body">
-  <p>(b) Reconstruction</p>
-  <iframe src="{{ 'assets/html/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-<div class="l-body">
-  <p>(c) ResNet</p>
-  <iframe src="{{ 'assets/html/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-
-**Trained Baseline.** The trained baseline does extraordinarily well at the MSCOCO vs. Stable Diffusion detection task. It achieves $1.0$ AUC (perfect accuracy) across all augmentation settings except for rotation for which it gets an almost perfect AUC of $0.9952$. This high performance matches Corvi et al. 2023's findings,<d-cite key="corvi2023detection"></d-cite> stemming from the fact that the ResNet was trained on the MSCOCO distribution and Latent Diffusion generated images are similar to Stable Diffusion generated images. In their paper, the performance noticeably drops to around $0.7$-$0.8$ AUC for other image generation models.
-
-**Divergence.** Divergence does extremely poorly, with AUCs just slightly below 0.5, indicating that in fact generated images have greater divergence than real images---the opposite of our intuition, but this may also be noise as these values are essentially equivalent to random guessing. We suspect that this is largely due to our low choice of $d$, meaning that we cannot get a representative enough sample of the dimensions to get an accurate estimate of the true divergence. We may have also chosen $h$ too large, as we have no idea of the scale of any manifold structure that may be induced by the gradient field.
-
-**Reconstruction Error.** Reconstruction error, on the other hand, boasts impressive AUCs of around $0.7$. The shape of the curve is particularly strange, and with the additional observation that the AUC when the random cropping is applied goes back to $0.5$ AUC, indicated to us that the image size may be the differentiating factor here.  MSCOCO images are often non-square and smaller than the $512\times 512$ constant size of the generated images. As the Frobenius norm does not scale with image size, we hypothesize that using the spectral norm and dividing by the square root of the dimension would instead give us a more faithful comparison, akin to the random crop results. However, data visualization of the examples does not show a clear correlation between image size and reconstruction error, so it appears that this detection algorithm has decent AUC but poor TPR at low FPR, and is vulnerable to specifically cropping augmentations.
-
-<a href="http://jsonw0.pythonanywhere.com/">**Detection Visualizations.**</a> We developed a dashboard visualizaiton that enables us to look more closely at images and their associated detection statistics. Some examples we can pick out that seem to make sense include Figure 4, where the real image is captioned as a CGI fake image, and predictably gets a low statistic as deemed by Reconstruction Error (the generated image, ironically, gets a higher statistic denoting more real).
-
-*<a name="fig-methods-illustrated">Figure 4:</a> An Example Image of a CGI "Real" Image Getting Detected as Fake.*
-{% include figure.html path="assets/img/2023-11-08-detect-image/cgi-example.png" class="img-fluid" %}
-
-However, from a visual inspection of images, we cannot identify a clear relationship between image content or quality of generated images that holds generally. We make our dashboard public and interactive; a demo can be seen below:
-
-<div class="l-screen">
-  <iframe src="http://jsonw0.pythonanywhere.com/" frameborder='0' scrolling='yes' height="1200px" width="100%"></iframe>
-</div>
-
-
-### Discussion
-
-Throughout our experiments, the divergence-based detector performs much worse than the other detectors. Because the latent space has a very high dimension, the divergence detector may require sampling from many more dimensions than is practical for an image detector in order to obtain good estimates of the divergence. Further research should try to scale this method to see if it obtains better results. Mitchell 2023 et al. <d-cite key="mitchell2023detectgpt"></d-cite> justifies the validity of their machine-generated as a Hutchinson trace estimator of the divergence of the log probabilities; however, the poor performance of the divergence detector imply that estimating the trace is not helpful for image detection and that other model properties may instead be at play for this method's effectiveness. In contrast, the noising/denoising detector implicitly incorporates information from all dimensions, which may explain its better performance. The model from Corvi et al. 2023 <d-cite key="corvi2023detection"></d-cite> outperforms our methods under all augmentations, achieving a perfect AUC on images without data augmentations. This is consistent with what was reported in their manuscript. However, this is not an unbiased estimate of the trained classifier's performance, because they also used MSCOCO data to train and test their classifier. We were limited to this experimental setup by data availability and previous literature. Future work should comapre the zero-shot and trained detectors on completely out-of-sample data and with different generation models.
-
-Although at face-value our detectors perform worse than the pre-trained model in our experiments, our project still introduces some interesting ideas for machine-generated image detection that are of interest to the broader community and worth further exploring. First, the techniques we explored parallel zero-shot machine-generated image detection methods for text.<d-cite key="mitchell2023detectgpt"></d-cite> The fact that in both settings, perturbing the inputs and computing the curvature of the log probabilities are potent signals for machine-generated detection implies that these features may be an indelible mark of machine-generated models across all modalities. Second, image detection algorithms trained on data may be fundamentally vulnerable to adversarial modifications. Because there exists non-robust features that are predictive of the output in training data,<d-cite key="ilyas2019adversarial"></d-cite> adversaries, who realistically may have access to the image detection algorithm over many trials, can craft subtle background noise that circumvents image-detection algorithms. Our methods, which consist of only a few parameters, are not prone to adversarial attacks unlike trained models. Third, this work highlights the use of other features besides the image as features for image detection, e.g. score function and noising/denoising the image. Future work may build on the ideas behind these features to improve trained image detectors.
\ No newline at end of file
diff --git a/_posts/2023-11-08-diaz-proposal.md b/_posts/2023-11-08-diaz-proposal.md
deleted file mode 100644
index 0eea2c91..00000000
--- a/_posts/2023-11-08-diaz-proposal.md
+++ /dev/null
@@ -1,359 +0,0 @@
----
-layout: distill
-title: 'Optimizations of Transformers for Small-scale Performance'
-description: 'CNNs generally outperform ViTs in scenarios with limited training data. However, the narrative switches when the available training data is extensive. To bridge this gap and improve upon existing ViT methods, we explore how we can leverage recent progress in the transformer block and exploit the known structure of pre-trained ViTs.'
-date: 2023-12-10
-htmlwidgets: true
-
-authors:
-  - name: Sebastian (Sebo) Diaz
-    url:
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-diaz-proposal.bib  
-
-toc:
-  - name: 'Transformers: Great But Not Enough'
-    subsections:
-      - name: Basic Background
-      - name: 'Vision: The Problem'
-      - name: Transformer Block
-  - name: 'Translation to Vision: Experimentation and Analysis'
-    subsections:
-      - name: Vanilla vs. Simplified Comparison
-      - name: Initialization Schemes
-  - name: 'Conclusion and Limitations'
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-<div class="text-center">
-   <a name="figure-1"></a> 
-  <img src="https://discuss.tensorflow.org/uploads/default/original/2X/4/44b54935a57a92b71902d81265e9bc3c6d99fb12.gif" width="700" height="250">
-
-  <p class="caption">
-    Figure 1: Attention Maps of a Vision Transformer (DINO). Source: <a href="https://github.com/sayakpaul/probing-vits">https://github.com/sayakpaul/probing-vits </a>.
-  </p>
-</div>
-
-## Transformers: Great but fall short
-### Basic Background
-Transformers have well-earned their place in deep learning. Since the architecture's introduction in<d-cite key="AttentionIsAllYouNeed"></d-cite>, we have seen huge improvements in our model's capabilities. The most notable of which being natural language processing (NLP) with large-language models such as GPT-4 stunning the world at-large.
-
-Originally designed for NLP, the transformer architecture has been robust in other domains and tasks. For example, it has been translated, with success, to de-novo protein design<d-cite key="Grechishnikova2021"></d-cite>, the medical field<d-cite key = "Hu2022"></d-cite>, and, of most relevance, computer vision<d-cite key = "Dosovitskiy2020"></d-cite>. This behaviour differs from architectures of the past like RNNs and CNNs which have been limited to one domain. The potent generalizability of the transformer lies within the self-attention mechanism. Without getting to much into detail, self-attention enables nodes within a neural network to probe the input sequence, determine what is most interesting, and attend towards the region of interest by dynamically updating its weights. Visualization of attention can be seen in [Figure 1](#figure-1). By probing the data landscape, the architecture enables long-range dependencies to be modeled regardless of distance. From a Fourier perspective, the transformer caters towards the low-frequency information in the data and deciphers how each element of an input sequence all relate to each other<d-cite key="Wang2022"></d-cite>. These connections help the transformer accurately model global information in the data perhaps indicating why they are so powerful. In this blog, we will specifically examine the transformer in vision, determine how it can be improved, and evaluate new strategies to increase its viability on small datasets.
-
-<div class="col-sm text-center">
-  <a name="figure-2"></a> 
-  {% include figure.html path="assets/img/2023-11-08-diaz-proposal/vit_workflow.png" class="img-fluid rounded z-depth-1" %}
-  <div class="caption"> 
-    Figure 2: ViT workflow.
-  </div>
-</div>
-
-
-### Vision: The Problem
-The Vision Transformer (ViT)<d-cite key = "Dosovitskiy2020"></d-cite> introduced the transformer to the computer vision world in late 2020. The ViT is simple: it funnels image patches into a tokenization scheme, adds positional encoding, and feeds these tokens into a transformer block. A graphical workflow of the ViT from the original paper can be seen in [Figure 2](#figure-2).
-
-Since its introduction, the ViT and associated variants have demonstrated remarkable benchmarks in image classification<d-cite key = "Liu2021"></d-cite>, image restoration<d-cite key = "Liang2021"></d-cite>, and object detection<d-cite key = "Li2022"></d-cite>. Much of these new methods can compete and even outperform long-established CNNs. However, ViTs are data-hungry requiring extensive amounts of training data to surpass CNNs. In small scale training, ViTs are burdensome to train and achieve sub-par performance compared to their CNNs counterparts<d-cite key ="Naimi2021"></d-cite>. In <d-cite key = "Zhu2023"></d-cite>, they investigate this discrepancy by comparing the feature and attention maps of small-scale CNNs and ViTs, respectively. The authors determine the ViT lacks the ability to learn local information and has ill-suited representation capacity in the lower layers. In contrast, the CNN demonstrate remarkable inductive bias due to weight sharing and locality properties which enable high-frequency modeling<d-cite key = "Park2022"></d-cite>. The ViT's low-frequency and the CNNs high-frequency capacity has initiated a wave of new models aimed at combining the two for comprehensive modeling capability<d-cite key = "Si2022"></d-cite><d-cite key = "ConvViT"></d-cite>. 
-
-Despite the complementary nature of these architectures, they break the fidelity of the transformer and make for difficult analysis. Therefore, there exists a gap in the traditional transformer architecture to perform in small-data regimes, particularly in vision. Motivated by this shortcoming, we aim to investigate and improve the current ViT paradigm to narrow the gap between CNNs and ViTs on small-data. In particular, we examine novel initialization schemes, removal of component parts in our transformer block, and new-learnable parameters which can lead to better performance, image throughput, and stable training on small-scale datasets.
-
-<div class="col-sm text-center">
-  <a name="figure-3"></a>
-  
-  <img src="{{ 'assets/img/2023-11-08-diaz-proposal/transformer.svg' | relative_url }}" class="img-fluid rounded z-depth-1" style="width: 300px;">
-  
-  <div class="caption">
-    Figure 3: Standard transformer encoder block. Encoder can be stacked for x amount of layers.
-  </div>
-</div>
-
-### Transformer Block
-To serve as a basis of comparison, we will examine the stanford transformer block seen in [Figure 3](#figure-3). The block is identical to <d-cite key="AttentionIsAllYouNeed"></d-cite> with the exception of using layer normalizations before the multi-headed attention (MHA) and multi-level perceptron (MLP) blocks as opposed to after. In practice, this placement has been shown to be more stable and increase performance<d-cite key ="Liu2020"></d-cite>. With the exception of this modification, the block has seen little improvements over the years testifying to its robustness. However, recent trends in theory hints towards ways we could break this notion – all while enjoying increased performance.
-
-Before we delve into these advances and their implications, consider the following transformer block information flow:
-
-$$
-\displaylines{
-\text{Attention} = \text{A}(X) = \text{Softmax}\Biggl(\frac{XW_{Q}W_{K}^{T}X^{T}}{\sqrt{k}}\Biggl) 
-\\ \\ 
-\text{A}(X) \in \mathbb{R}^{T\times T}}
-$$
-
-which is shortly followed by:
-
-$$
-\displaylines{
-  \text{S}(X) = \text{A}(X)W_{V}W_{O}
-\\ \\
-\text{S}(X) \in \mathbb{R}^{T\times d}
-}
-$$
-
-and:
-
-$$
-\text{Output} = \text{MLP}(\text{S}(X))= \text{Linear}(\text{GELU}(\text{Linear}(\text{S}(X))))
-$$
-
-
-where:
-
-* Embedded input sequence: $$X \in \mathbb{R}^{T \times d}$$
-* Linear queury and key layers: $$W_{Q},W_{K} \in \mathbb{R}^{d \times k}$$
-* Linear value and projection layers: $$W_{V}, W_{O} \in \mathbb{R}^{d \times d}$$
-* MLP Linear layers: $$\text{Linear} \in \mathbb{R}^{d \times d}$$
-* $$T = $$ \# of tokens, $$d = $$ embedding dimension, $$k = \frac{d}{H}$$, $$H = $$ \# of attention heads
-
-The flow of information mirrors the transformer block in [Figure 3](#figure-3). Readers unfamiliar with transformer intricacies such as MHA and MLPs are encouraged to read<d-cite key="AttentionIsAllYouNeed"></d-cite>.
-
-Recently, there have been many proposals on how the transformer block can be further modified to increase data throughput and eliminate “redundant” or “useless” parts that do not have any significant contribute to the tranformer's modeling capabilities. For example, <d-cite key = "2302.05442"></d-cite>, used a parallel MHA and MLP incorporated into a large-scale ViT for stable and efficient training. Throughout this blog, we will focus on the ideas overviewed and proposed by <d-cite key = "He2023"></d-cite> as they present intriguing results and a synthesis on the current state of this research topic. The interested reader is encouraged to study their paper for a more extensive understanding of the ideas.
-
-
-<div class="col-sm text-center">
-  <a name="figure-4"></a> 
-  {% include figure.html path="assets/img/2023-11-08-diaz-proposal/simplified_block.png" class="img-fluid rounded z-depth-1" %}
-  <div class="caption"> 
-    Figure 4: Comparison between trasnformer architectures. <em>Left</em>: Standard block as shown in Figure 3. <em>Bottom Right</em>: Parallel block proposed in. <em>Top Right</em>: Newly proposed encoder.
-  </div>
-</div>
-
-The overaching theme of <d-cite key = "He2023"></d-cite> was to take the standard trasnformer block and evaluate the necessity of each component. In doing so, they removed each component part and studied its effects on performance. Understandably, blindly removing components will lead to unstable training and ill-performance (i.e. if one were to remove the skip connnections, they would encounter vanishing gradients as seen [Figure 14](#figure-14)). However, <d-cite key = "He2023"></d-cite> took the approach of removal combined with recovery. For example, when the authors removed skip connections, they required a modification to the self-attention matrix of the form:
-
-$$
-\text{A}(X) \leftarrow (\alpha\text{I} + \beta \text{A}(X))
-$$
-
-where $$\alpha$$ and $$\beta$$ are learnable scalars and intialized to $$1$$ and $$0$$, respectively, and $$\text{I} \in \mathbb{R}^{T \times T}$$ is the identity matrix. This modification intiailizes the self-attention matrix providing a pathway towards training stability. They further entertained a more complicated scheme with a third parameter, but we only consider the two parameter version for simplicity. By this iterative removal and recovery process, the authors converged towards the final transformer block seen in [Figure 4](#figure-4). The most shocking aspect of this proposed block is the removal of the $$W_{V}$$ and $$W_O$$ layers. They arrived to this justification by initialializing $$W_{V}$$ and $$W_{O}$$ to the identity with separate, learnable scalars and training a model. Over the course of training, the scalar ratios converged towards zero<d-footnote>This is a slight simplification. Look at Section 4.2 and Figures 4 and 20 in He et. al 2023 for a more detailed explanation.</d-footnote>. Due to the heavy cost and speed these linear layers present, removal of them decreases parameters counts and enables more data throughput. A concise PyTorch interpretation of the new block can be seen below:  
-
-```python
-import torch
-import torch.nn as nn
-
-class ShapedAttention(nn.Module):
-    def __init__(self, width: int, n_hidden: int, num_heads: int):
-        super().__init__()
-        # Determining if hidden dimension of attention layer is divisible by number of heads
-        assert width % num_heads == 0, "Width and number of heads are not divisble."
-        
-        # Setting vars
-        self.head_dim   = n_hidden // num_heads
-        self.num_heads  = num_heads
-        # Creating Linear Layers
-        self.W_K = nn.Linear(width, self.head_dim)
-        self.W_Q = nn.Linear(width, self.head_dim)
-        # Learnable Scalars: alpha_init and beta_init are up to user
-        self.alpha = nn.Parameter(alpha_init)
-        self.beta = nn.Parameter(beta_init)
-        # Softmax
-        self.softmax = nn.Softmax(dim = -1)
-        
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # Input:
-        # x: shape (B x T x dim)
-        # Outputs:
-        # attn_output: shape (B x T x width)
-        attn_output = None
-        # Compute keys and queries
-        k = self.W_K(x)
-        q = self.W_Q(x)
-        # Scaled dot-product
-        attn_scores = torch.bmm(q, k.transpose(1,2)) / (self.head_dim**-0.5)
-        attn_scores = self.softmax(attn_scores)
-        # Shaped attention
-        B, T, _ = x.shape
-        output = self.alpha*torch.eye(T, device = x.device) + self.beta * attn_scores
-
-        return output
-```
-
-The performance of the final transformer block (referred to as SAS-P) demonstrated powerful results. In the [Figure](#figure-5), the simplified transformer matches the standard block in cross-entropy loss even when taken through a long runtime. Additionally, Figure 6 in <d-cite key = "He2023"></d-cite> demonstrates the model scales better with depth which is of paramount importance in modern neural network.
-
-<div class="col-sm text-center">
-  <a name="figure-5"></a> 
-  {% include figure.html path="assets/img/2023-11-08-diaz-proposal/fig5.png" class="img-fluid rounded z-depth-1" %}
-  <div class="caption"> 
-    Figure 5: Training speed experiment. Figure 6. in Ref. 15. Pre-LN is the standard transformer block. SAS-P is the block. It is shown with and without an initial layer normalization.
-  </div>
-</div>
-
-
-## Translation to Vision: Experimentation and Analysis
-The results shown in <d-cite key = "He2023"></d-cite> show promise the transformer can be improved. Motivated by vision applications, we seek to implement such infrastructure, with slight modifications, and determine if it improves performance in small datasets.
-
-### Vanilla vs. Simplified Comparison
-For evaluation, we compare the simplified transformer to a vanilla ViT. The vanilla ViT's transformer block is identical to the formulation presented earlier. We use Conv2D patch embedding with a random initial positional embedding. For the simplified setup, we initialize $$\alpha = \beta = 0.5$$ and do not use a centering matrix – although it has been shown to improve ViT performance<d-cite key = "2306.01610"></d-cite>. We use one Layer Normalization just prior to the transformer encoder. $$\text{Width} = 96$$ is kept uniform throughout the model. The dataset is CIFAR-10 with a $$\text{batch size} = 256$$. Data augmentations were random horizontal and vertical flips with 15º random rotations. Optimizer is AdamW with $$\text{lr} = 0.003$$ and $$\text{weight decay} = 0.01$$. We employ a cosine learning rate scheduler to maintain consistency with ViT literature, although <d-cite key = "He2023"></d-cite> empirically showed a linear learning rate is slightly advantegeous<d-footnote>Figure 11 in He et. al 2023.</d-footnote>. We ran our model for $$\text{epochs} = 50$$ with $$\text{runs} = 3$$ to evalute run-to-run stability. A condensed version of the experiment choices can be seen in Table 1. The results can be seen in [Figure 6](#figure-6) and Table 2<d-footnote>To avoid clutter, only the training accuracies vs epochs are shown. Loss curves showed similar relationships.</d-footnote>.
-
-| Table 1.  Experiment 1: ViT Model Settings  |   |
-| ------------- |----|
-| # of channels | 3  |
-| Image size    | 32 |
-| Patch size    | 4  |
-| Width         | 96 |
-| # of heads    | 4  |
-| # of layers   | 8  |
-
-<div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;">
-  <a name="figure-6"></a> 
-  <iframe src="{{ 'assets/html/2023-11-08-diaz-proposal/exp1_w96.html' | relative_url }}" frameborder='0' scrolling='no' height="500px" width="70%"></iframe>
-  <div class="caption" style="margin-top: 10px; text-align: center;"> 
-    Figure 6. Comparison between vanilla and simplified transformers. Width = 96. Layers/Depth = 8.
-  </div>
-</div>
-
-| Table 2. Experiment 2: Results         |    Vanilla       | Simplified  |  $$\Delta$$   |
-| ------------- |:-------------:| -----:|----:|
-|  Parameters     | 358186 | 209210 | -41.59%      |
-|  Avg. epoch time (s) | 12.954       |    11.305 |      -12.73% |
-
-Experiment 1 showed the training evaluation trajectory is nearly identicable between the two models although the simplified outperforms by small margin. Although the subtle difference, it is noteworthy to mention the simplified version achieved mirroring performance with less parameters and higher image throughput. The similarity of the curves hints the removal of the skip connections, layer normalizations, and value/projection layers were merited, begging the question whether these components held our modeling power back.
-
-
-This experimentation shows the similar nature of each model, but does not translate well to wider modern neural networks. In Experiment 2, we expanded to $$\text{width} = 128 $$ to determine if there is any emergent behaviour as the network becomes wider. We replicate everything in Experiment 1 and solely modify the width. The settings are restated in Table 3. The results for Experiment 2 can be seen in [Figure 7](#figure-7) and Table 4 below.
-
-
-| Table 3   |  Experiment 2: ViT Model Settings  |
-| ------------- |----|
-| # of channels | 3  |
-| Image size    | 32 |
-| Patch size    | 4  |
-| Width         | 128 |
-| # of heads    | 4  |
-| # of layers   | 8  |
-
-<div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;">
-  <a name="figure-7"></a> 
-  <iframe src="{{ 'assets/html/2023-11-08-diaz-proposal/exp2_w128.html' | relative_url }}" frameborder='0' scrolling='no' height="500px" width="70%"></iframe>
-  <div class="caption" style="margin-top: 10px; text-align: center;"> 
-    Figure 7. Comparison between vanilla and simplified transformers. Width = 128. Layers/Depth = 8.
-  </div>
-</div>
-
-
-| Table 4. Experiment 2: Results         |    Vanilla       | Simplified  |  $$\Delta$$   |
-| ------------- |:-------------:| -----:|----:|
-|  Parameters     | 629130 | 364954 | -41.99%      |
-|  Avg. epoch time (s) | 13.093      |    11.735 |      -10.37% |
-
-The narrative is different for Experiment 2. The simplified version outperforms the vanilla version by a considerable margin. An adequate explanation for this discrepancy in vision tasks merits further exploration. However, considering the proposed unnecessary nature of the value and projection matrices, we can hypothesize they interfere with the modeling capability as more parameters are introduced. 
-
-Due to the sheer difference in outcomes between the models, we question how the models are attending towards various inputs to gain a better understanding of what is happening under the hood. To probe this curiosity, we trained the models with identical setting in Experiment 2, but modified the $$\text{depth} = \text{layers} = 12$$. This model setup will be covered 
-in more detail in future paragraphs. We inputted CIFAR-10 to each model and visualized a side-by-side comparison of attention maps for five input images. An interactive figure is seen [Figure 8](#figure-8).
-
-<div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;">
-  <div style="display: flex; flex-direction: column; align-items: center;">
-    <a name="figure-8"></a> 
-    <iframe src="{{ 'assets/html/2023-11-08-diaz-proposal/attention_maps.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="70%"></iframe>
-    <div class="caption" style="margin-top: 10px; text-align: center;"> 
-      Figure 8. Comparison between vanilla and simplified attention maps. Width = 128. Layers/Depth = 12. Interpolation method: "nearest".
-    </div>
-  </div>
-</div>
-There is a noticeable contrast in the attention maps. For the simplified model, the attention maps seem to place weight in a deliberation manner, localizing the attention towards prominent features in the input image. On the other hand, the vanilla model is choatic in its attention allocation. It is noteworthy that the vanilla model does place attention towards areas of interest, but also attends towards irrelevant information perhaps compromising its judgement at the time of classification. It can thus be reasoned the simplified model can better decipher which features are relevant demonstrating, even in low data regimes, the representational quality is increased.
-
-While we have so far investigated width, it will be informative to understand how depth impacts the performance of the simplified version. In <d-cite key = "He2023"></d-cite>, they employ signal propagation theory, which is most prominent in deeper networks. Therefore, we suspect as we increase the depth of our models, the simplified version will outperform the vanilla version by a larger margin. Here, we set $$\text{layers} = 12$$ and maintain $$\text{width}=128$$. The training accuracies and experiment results are seen in [Figure 9](#figure-9) and Table 5.
-
-<div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;">
-  <a name="figure-9"></a> 
-  <iframe src="{{ 'assets/html/2023-11-08-diaz-proposal/exp3_w128_l12.html' | relative_url }}" frameborder='0' scrolling='no' height="500px" width="70%"></iframe>
-  <div class="caption" style="margin-top: 10px; text-align: center;"> 
-    Figure 9. Comparison between vanilla and simplified transformers. Width = 128. Layers/Depth = 12.
-  </div>
-</div>
-
-| Table 5. Experiment 3: Results         |    Vanilla       | Simplified  |  $$\Delta$$   |
-| ------------- |:-------------:| -----:|----:|
-|  Parameters     | 927370 | 531106 | -42.72%      |
-|  Avg. epoch time (s) | 17.527     |    15.723 |      -10.29% |
-
-Again, the simplified model outperforms the vanilla model by a large margin. Although we have focused on performance in the past, we discern an interesting trend when we scaled the depth: the simplified version seemed to be more consistent from run-to-run (recall $$\text{runs} = 5$$). This leads us to believe that as we continue to scale the depth, the simplified version will be more stable. Future experimentation will be necessary to corroborate this claim.
-
-
-
-
-
-
-### Initialization Schemes
-We have seen the impact simplification can have on the performance of the transformer performance and self-attention. However, the used initializatons of $$\alpha$$ and $$\beta$$ in Experiments 1, 2, and 3, was based on equal weighting between the initial attention matrix and the identity matrix. In <d-cite key = "He2023"></d-cite>, they employ a full weighting of the identity matrix and zero'd out the attention matrix at initialization. Here, we aim to determine the effect of different initialization values. Recall $$\alpha = \beta = 0.5$$ in Experiments 1, 2, 3. Now, we investigate two more initializaton schemes: $$\alpha = 1.0$$ and $$\beta = 0.0$$ and vice-versa. We replicate the protocol used in Experiment 2 and only modify these learnable scalar at initializaton and set $$\text{runs} = 1$$. The results are shown in [Figure 10](#figure-10). Interestingly, the initialization scheme proposed by <d-cite key = "He2023"></d-cite>, does *not* outperform the equal weighting or inverse weighting scheme. Understandably, it does poorly at initialization, but never recovers. The equal weighting and inverse weighting approaches show nearly identical performance often trading off superior performance from epoch-to-epoch.
-
-
-
-<div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;">
-  <a name="figure-10"></a> 
-  <iframe src="{{ 'assets/html/2023-11-08-diaz-proposal/exp4_init_new.html' | relative_url }}" frameborder='0' scrolling='no' height="500px" width="80%"></iframe>
-  <div class="caption" style="margin-top: 10px; text-align: center;"> 
-    Figure 10. Various Initialization Schemes.
-  </div>
-</div>
-
-This lead us to believe the initializaton scheme could be improved. There has been some work on initializing vanilla ViTs<d-cite key = "Trockman2023"></d-cite> to gain performance. In <d-cite key = "Trockman2023"></d-cite>, a prominent diagonal was observed for the $$W_{q}W_{k}^{T}$$ layers in ViT's pre-trained on large datasets, which have been shown to outperform  CNNs. The figure shown in the paper can be seen in [Figure 10](#figure-10). This motivated the authors to provide a novel initialization scheme where the $$W_{Q}$$ and $$W_{K}$$ matrices are initialized in a way to encourage diagonal prominence in the forward pass. However, our findings contradicted this scheme, as our diagonal-dominant initialization scheme $$\alpha = 1$$ and $$\beta = 0$$ did not out perform the inverse or the equal weighting. This is likely due to the fact we have learnable parameters and do not initialize our $$W_{Q}$$ and $$W_{K}$$'s directly, but rather the attention matrix post-softmax. However, it is important to realize that the learnable parameters still encourage diagonal prominence regardless of intialization. Although<d-cite key = "Trockman2023"></d-cite> used this initialization scheme to increase performance in small ViT's trained from scratch, which encourages tokens to attend toward to themselves through the depth of the network, they did not take into consideration how the diagnolization varys from layer-to-layer. Seen in [Figure 10](#figure-10), we can see the prominence of the diagnoal elements fades as we go deeper into the network. Observing this behaviour, we hypothesize the reason the initialization scheme of $$\alpha = 1$$ and $$\beta = 0$$ underperformed was not due to the initialization itself, but how it was applied to each layer. In other words, when we initialized $$\alpha = 1$$ and $$\beta = 0$$, we encouraged this token self-attentive nature throughout the depth of the network, when we should be encouraging it in the opening layers and tapering it off as we approach the end of the model. 
-
-To give more evidence to this hypothesis, we experimented with the following dynamic initialization scheme:
-
-$$
-\displaylines{
-\alpha_i = \frac{1}{i}, \beta_i = 1 - \frac{1}{i} \\
- \text{ where } i \in [1, 2, ..., L] \text{ and } L = \text{# of layers}
-}
-$$
-
-The results from this initialization scheme compared to the uniform initializations can be seen in [Figure 12](#figure-12) The results show that the dynamic scheme outperform the results perhaps indicating the representation quality is connected toward encouraging self-token connection in the lower layers, while allowing for token's to intermingle in higher layers. We further experiment with the inverse dynamic where we switch the $$\alpha$$ and $$\beta$$ values. The results in [Figure 13](#figure-13) show the dynamic approach is stronger during training then the inverse dynamic approach.
-
-<div class="col-sm text-center">
-  <a name="figure-11"></a> 
-  {% include figure.html path="assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny.png" class="img-fluid rounded z-depth-1" %}
-  <div class="caption"> 
-    Figure 11: Diagonal prominence in a pre-trained ViT Tiny. Layers 1-11 (Left-to-Right). Heads 1-3 (Top-to-Bottom). Extracted from Figure 1 of <a href="https://arxiv.org/abs/2305.09828">Mimetic Initialization of Self-Attention Layers</a>.
-  </div>
-</div>
-
-<div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;">
-  <a name="figure-12"></a> 
-  <iframe src="{{ 'assets/html/2023-11-08-diaz-proposal/exp5_init_dynamic.html' | relative_url }}" frameborder='0' scrolling='no' height="500px" width="80%%"></iframe>
-  <div class="caption" style="margin-top: 10px; text-align: center;"> 
-    Figure 12. Experiment 5: Dynamic vs. Uniform Initializations.
-  </div>
-</div>
-
-<div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 10px;">
-  <a name="figure-13"></a> 
-  <iframe src="{{ 'assets/html/2023-11-08-diaz-proposal/exp6_init_inverse.html' | relative_url }}" frameborder='0' scrolling='no' height="500px" width="80%"></iframe>
-  <div class="caption" style="margin-top: 10px; text-align: center;"> 
-    Figure 13. Experiment 6: Dynamic vs. Inverse Dynamic Initializations.
-  </div>
-</div>
-
-## Conclusion and Limitations
-Through this blog post we have overviewed the simplification of our known transformer block and novel initialization schemes. We took the problem of small-scale training of ViT's and looked to address it leveraging such ideas. Through a series of experiments and thoughtful schemes, we generated an informed and sophisticated approach to tackle such a problem. In the end, we generated a method that outperformed a tradtional ViT in small scales. We explored ways of scaling the ViT in width and depth and probed how the new model distributed attention. Our comparisons were intentionally simple and effective in addressing the underlying task and illustrating the models potential. Although the results presented showed promise, extensive validation needs to be performed in the future. It will be interesting to see how this new transformer block and intialization scheme can be further utilized in computer vision. For example, a logical next route to entertain is to compare convergence rates in larger scale ViT on datasets such as ImageNet-21k to see if the modeling advantage persists.
-
-There are a few limitations in this study. For one, only one dataset was used. Using other datasets such as CIFAR-100 or SVHN would provide more insight into this methodology. Secondly, there is a need for more comprehensive evaluation and ablation studies to determine the true nature of the simplified transformer and initialization schemes. Third, a comparison to a smaller scale CNNs is needed to gauge where this method comparatively sits in modeling power. 
-
-
-<div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 10px;">
-  <a name="figure-14"></a> 
-  <iframe src="{{ 'assets/html/2023-11-08-diaz-proposal/exp0.html' | relative_url }}" frameborder='0' scrolling='no' height="500px" width="80%"></iframe>
-  <div class="caption" style="margin-top: 10px; text-align: center;"> 
-    Figure 14. Experiment 0: Removal of skip connections in traditional ViT.
-  </div>
-</div>
-
-
diff --git a/_posts/2023-11-08-double_descent.md b/_posts/2023-11-08-double_descent.md
deleted file mode 100644
index da0a266d..00000000
--- a/_posts/2023-11-08-double_descent.md
+++ /dev/null
@@ -1,564 +0,0 @@
----
-layout: distill
-title: Ensemble Learning for Mitigating Double Descent
-description: Exploring when and why Double Descent occurs, and how to mitigate it through Ensemble Learning.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Mohit Dighamber
-    affiliations:
-      name: MIT
-  - name: Andrei Marginean
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-double_descent.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Abstract
-  - name: Motivation
-  - name: Related Work
-  - name: Setup
-  - name: Models
-    subsections:
-    - name: Decision Trees
-    - name: AdaBoost Tree
-    - name: L2-Boost Tree
-    - name: Random Forest
-    - name: Logistic Regression
-    - name: Neural Networks
-  - name: Ensemble Learning
-    subsections:
-    - name: Weak-Learner Ensemble
-    - name: Multi-Layer Perceptron Ensemble
-  - name: Results and Discussion
-  - name: Conclusion
-  - name: Future Work
-  - name: Reproducibility Statement
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-
-## Abstract
-
-We outline the fundamental 'bias-variance tradeoff' concept in machine learning, as well as how the double descent phenomenon counterintuitively bucks this trend for models with levels of parameterization at or beyond the number of data points in a training set. We present a novel investigation of the mitigation of the double descent phenomenon by coupling overparameterized neural networks with each other as well as various weak learners. Our findings demonstrate that coupling neural models results in decreased loss during the variance-induced jump in loss before the interpolation threshold, as well as a considerable improvement in model performance well past this threshold. Machine learning practitioners may also find useful the additional dimension of parallelization allowed through ensemble training when invoking double descent. 
-
-***
-
-## Motivation
-
-There are many important considerations that machine learning scientists and engineers
-must consider when developing a model. How long should I train a model for? What features
-and data should I focus on? What exactly is an appropriate model size? This last question
-is a particularly interesting one, as there is a bit of contention regarding the correct answer
-between different schools of thought. A classical statistician may argue that, at a certain
-point, larger models begin to hurt our ability to generalize. By adding more and more
-parameters, we may end up overfitting to the training data, resulting in a model that poorly
-generalizes on new samples. On the other hand, a modern machine learning scientist may
-contest that a bigger model is always better. If the true function relating an input and output
-is conveyed by a simple function, in reality, neither of these ideas are completely correct in
-practice, and empirical findings demonstrate some combination of these philosophies.
-This brings us to the concept known as *double descent*. Double descent is the phenomenon
-where, as a model’s size is increased, test loss increases after reaching a minimum, then
-eventually decreases again, potentially to a new global minimum. This often happens in the
-region where training loss becomes zero (or whatever the ’perfect’ loss score may be), which
-can be interpreted as the model ’memorizing’ the training data given to it. Miraculously,
-however, the model is not only memorizing the training data, but learning to generalize as
-well, as is indicated by the decreasing test loss.
-
-The question of ’how big should my model be?’ is key to the studies of machine learning
-practitioners. While many over-parameterized models can achieve lower test losses than the
-initial test loss minimum, it is fair to ask if the additional time, computing resources, and
-electricity used make the additional performance worth it. To study this question in a novel
-way, we propose incorporating *ensemble learning*.
-
-Ensemble learning is the practice of using several machine learning models in conjunction
-to potentially achieve even greater accuracy on test datasets than any of the individual
-models. Ensemble learning is quite popular for classification tasks due to this reduced error
-empirically found on many datasets. To our knowledge, there is not much literature on how
-double descent is affected by ensemble learning versus how the phenomenon arises for any
-individual model.
-
-We are effectively studying two different *types* of model complexity: one that incorporates
-higher levels of parameterization for an individual model, and one that uses several models in
-conjunction with each other. We demonstrate how ensemble learning affects the onset of the
-double descent phenomenon. By creating an ensemble that includes (or is fully comprised of) overparameterized
-neural networks, which can take extreme amounts of time and resources to generate, with
-overparameterized machine learning models, we will show the changes in the loss curve,
-specifically noting the changes in the regions where double descent is invoked. We hope that the results we have found can potentially be used by machine learning researchers and engineers to
-build more effective models.
-
-***
-
-## Related Work
-
-One of the first papers discussing double descent was ’Reconciling modern machine-
-learning practice and the classical bias–variance trade-off’ by Belkin et al. <d-cite key="belkin2019reconciling"></d-cite>. This paper
-challenged the traditional idea of the ’bias-variance tradeoff’,
-a fundamental concept in machine learning that describes the tension between two types of
-model error: bias and variance. Bias is the error between the expected prediction of the
-model and the true output value, introduced by approximating a real-world quantity with
-a model, which may overisimplify the true problem at hand. Variance refers to the error
-due to a model’s sensitivity to small fluctuations in the training dataset. Overfitted models
-may have high variance, as they may model random noise in the data as well. 
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/biasvariance.png" title="The Double Descent Curve" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The Bias-Variance Tradeoff <d-cite key="cornell"></d-cite>
-</div>
-
-In short, classical statistical learning argues that there is some optimal level of parameterization of
-a model, where it is neither underparameterized nor overparameterized, that minimizes the
-total error between bias and variance. However, Belkin’s paper finds that, empirically, the tension between
-bias and variance no longer becomes a tradeoff after a certain level of overparamaterization. They showed that after the interpolation threshold (beyond where the model fits perfectly to
-the training data), test error eventually began to decrease again, even going below the
-error deemed optimal by the bias-variance minimum.
-
-
-Nakkiran et al.’s ’Deep Double Descent: Where Bigger Models and More Data Hurt’ <d-cite key="nakkiran2021deep"></d-cite> expanded these findings to the realm of *deep* learning. In this work, double descent is shown to occur for both large models and large datasets. Additionally, this paper demonstrates that,
-counterintuitively, adding more data at a certain point actually worsened the performance
-of sufficiently large models. Specifically, this occurred at and close to the interpolation
-threshold for neural models. This paper's results can be seen here:
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/openai.png" title="The Double Descent Curve" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The Double Descent Curve <d-cite key="nakkiran2021deep"></d-cite>
-</div>
-
-For the region between the first and second loss minima, model
-performance can suffer greatly, despite the increased computational time and resources used
-to generate such models. While this region of the test loss curve is typically not a level of
-parameterization that one would use in practice, understanding such loss curve behavior
-can help practitioners for several reasons. For one, this degraded phase of performance can
-be crucial for tweaking model architecture and adjusting training strategies. This is key to
-discovering if one’s model is robust and adaptable to various other datasets and tasks. This
-highlights the need for a new understanding for model selection in order to effectively generalize
-to testing datasets better, mitigating decreases in model performance and invoking a
-second loss minimum quickly.
-
-In the classic paper ’Bagging Predictors’, Breiman describes the concept of combining the
-decisions of multiple models to improve classification ability <d-cite key="breiman1996bagging"></d-cite>. Empirically, this bootstrap aggregating, or ’bagging’ technique, reduced variance and improved accuracy, outperforming the single predictors that comprised the ensemble model. We present a novel combination
-of the findings of this paper with the double descent phenomenon. Effectively, by increasing model complexity via overparameterization and ensemble learning, we aim to study if this combination can mitigate loss increases and invoke a second loss minimum with smaller models.
-
-***
-
-## Setup
-
-### Computing Resources and Software
-
-We have implemented this project using CUDA and the free version of Google Colab, with additional computing units for more costly experiments. To train and test these models, we use various machine learning packages in Python, namely Scikit-learn, PyTorch and Tensorflow. Additional software commonly used for machine learning projects, such as numpy, tensorboard and matplotlib, was also utilized.
-
-All plots have been produced by us, unless otherwise specified. Note that all tensorboard plots have $0.25$ smoothing applied, except for the Soft-Voting Ensemble, which has $0.6$ smoothing applied (though this won't make much of a difference as will soon be seen). The non-smoothed plot can be seen traced in light-blue in all provided plots.
-
-### Data
-
-We use the MNIST dataset for this report <d-cite key="deng2012mnist"></d-cite>. MNIST is a popular dataset used for image classification, where each sample image is a $28$ by $28$ grayscale image of a written integer between $0$ and $9$, inclusive. Each image comes with the true label of the image's integer. This data is publicly available for experimentation, and our use of it does not pose any ethical or copyright concerns. 
-
-For this project, we use the MNIST dataset to unearth the double descent phenomenon. We experiment with a variety of models, as well as an ensemble of them: decision trees, AdaBoost trees, L2-Boost trees, random forests, logistic regression, and small neural networks. We choose these models because of their ability to be used for classification tasks, and more complicated models run the risk of exceeding Google Colab's limitations, especially when we overparameterize these models to invoke double descent. 
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/mnist.jpeg" title="MNIST Sample Data" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Sample MNIST Data <d-cite key="deng2012mnist"></d-cite>
-</div>
-
-***
-
-## Models
-
-### Decision Trees
-
-Decision trees are a machine learning model used for classification tasks. This model resembles a tree, splitting the data at branches, culminating in a prediction at the leaves of the tree. 
-
-To invoke overparameterization for decision trees, we can start with a tree of depth 2, and increase the number of maximum leaves of the model until the loss plateaus. Then, keeping this new number of max leaves in our decision tree, we continually increase the maximum depth of the tree until the loss once again stops decreasing. Lastly, keep both the maximum leaves and depth at their plateau levels while increasing the max features. The results of this are plotted below. Notice how varying the number of maximum leaves has minimal effect on the loss, and how increasing the maximum depth causes the most dramatic decrease. However, fluctuations on the maximum depth at this point do not have a major effect, whereas varying the number of features causes another slight, yet consistent, fall in classification loss. 
-
-Notice that the loss curve is more or less linear in the number of parameters (with some having much more effect than others), and so there is little evidence of double descent for this model.
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Decision Tree Overparameterization
-</div>
-
-### AdaBoost Tree
-
-Adaptive Boosting (AdaBoost) itself is an ensemble model used for robust classification. Freund et al.'s paper 'A Decision-Theoretic Generalization of On-Line Learning and an Application to Boosting' first introduced the algorithm <d-cite key="freund1997decision"></d-cite>. On a high level, this paper describes how boosting is especially effective when sequentially combining weak learners that are moderately inaccurate (in this case, these are decision trees) to create a strong learner. We study the loss curve of the AdaBoost model as we first increase the number of boosting trees which form a forest, then increase the number of forests across which we average results, after adding additional trees fails to significantly increase model performance. Each tree is constrained to have no more than 10 leaves. 
-
-Notice that the loss curve is more or less linear in the number of parameters, and the double-U shape doesn't seem to make its presence known.
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    AdaBoost Overparameterization
-</div>
-
-### L2-Boost Tree
-
-L2 Boosting is quite similar to the AdaBoost model, except for L2 Boosting, as models are built sequentially, each new model in the boosting algorithm aims to minimize the L2 loss<d-cite key="article"></d-cite><d-cite key="10.1214/aos/1013203451"></d-cite>. Like before, we first increase the number of boosting trees which form a forest in the L2-Boost model, then the number of forests across which we average using majority voting. The results can be seen below. Each tree is constrained to have no more than 10 leaves. 
-
-Notice how the classification loss begins to fall, then rises up again, then falls once more when we average across more forests to lower minimums than before. This result was consistent across multiple runs of this experiment, suggesting that double descent is real for L2-Boosted Tree Ensembles. 
-
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    L2-Boost Overparameterization
-</div>
-
-The behavior of the loss once we add more models agrees with general intuition regarding ensembling, but the appearance of double descent as we increase the total number of parameters is still quite interesting to see. L2-Boost is a relatively inexpensive model and ensembling a large number of trees is still quite fast, suggesting that overparameterization could be the way to go in this case.
-
-### Random Forest
-
-Random Forest is another popular ensemble model. As the name implies, it is a collection of decision trees with randomly selected features, and, like the singular decision tree, this model is used for classification tasks. 
-
-We initialize random forest with a small number of maximum leaves allowed in each tree, and increase the max leaves until we see the loss plateau as we continually add more. After this, we begin increasing the number of trees in our forest until the loss plateaus once again.
-
-While Belkin et al. lists random forest as a model exhibiting double descent, this claim has been recently disputed, namely by Buschjager et al, which suggests that there is no true double descent with the random forest model <d-cite key="randomforest"></d-cite>. Instead, they suggest that random forest does not overfit in the classical sense, and argue that its curve instead has a single descent. By looking at our results, there is little evidence for the double-U curve, but we will refrain from drawing a decisive conclusion. 
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/dd_rf_zero_one_6.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Random Forest Overparameterization
-</div>
-
-Despite this, for our ensemble model, we aim to see if the addition of this overparameterized learner to the neural network's decision making is able to improve ensemble performance.
-
-### Logistic Regression
-
-Logistic regression is a classic model used for estimating the probability a sample belongs to various classes. We induce overfitting in logistic regression through two methods. 
-
-First, we continually increase the 'C' parameter, indicating the inverse strength of regularization applied to the regression, as shown below. Notice that the loss decreases to a minimum before it starts slowly rising again, indicating that overfitting through fluctuations in 'C' may not actually lead to double descent, as would be expected from classical theory.
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Logistic Regression Overparameterization (Regularization Based)
-</div>
-
-Second, we try inducing double descent by varying the ratio of the number of features over the amount of data. We gradually reduce this ratio using the intuition developed by Deng et al. in order to induce overfitting <d-cite key="logistic"></d-cite>, since the data becomes more and more separable as the number of features increases relative to the number of data samples. 
-
-To do this, we test varying across the number of training samples instead of varying the number of features used for training. This eventually leads to 0 training error, but causes testing error to blow up, suggesting that some significant amount of training data is still needed to witness the desired behavior, consistent with both statistical and machine learning theory. 
-
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Logistic Regression Overparameterization (Feature-Data Ratio Based)
-</div>
-
-An interesting setup for future experiments would be simultaneously increasing the amount of training samples and the number of polynomial features given to the logistic regression, while increasing the feature-data ratio each time we reparameterize or redefine the dataset. 
-
-### Neural Networks
-
-We use a Multilayer Perceptron as our main model for the ensemble. Our deep learning model is a relatively small one, with variable width in the hidden layer. By increasing this width, we eventually achieve perfect training loss.  
-
-We define the general architecture of the neural network used in this report as follows: 
-
-#### Network Layer
-
-Let the input data be an $m$ by $m$ pixel image from the MNIST dataset, which can be processed as an $m$ by $m$ matrix, where entry $(i,j)$ is an integer between $0$ and $255$ (inclusive) representing the grayscale color of the pixel. Note that $m=28$ for MNIST, though for generality, we use $ m $ in this network definition. A value of $0$ represents a black pixel, $255$ is a white pixel, and values between these are varying shades of gray. We first flatten this structure into a $d = m^2 $ by 1 vector, such that the entry $ (i,j) $ of the matrix becomes the $ j + 28*i$-th entry of the vector, using zero-indexing. We use this vector as the input of our neural network. 
-
-Set $H$ as the hidden layer width, which in our project will be varied in different tests. Let $ W^1 $ be an $ d \times H$  matrix, where $ W^1_{ij}$ is the weight of input $i$ applied to node $j$, and let $W^1_0$ be an $H \times 1$ column vector representing the biases added to the weighted input. For an input $X$, we define the *pre-activation* to be an $H \times 1$ vector represented by $Z = {W^1}^T X + W^1_0$. 
-
-We then pass this linearly transformed vector to the ReLU activation function, defined such that 
-
-$$
-\begin{equation*}
-\text{ReLU}(x)=\begin{cases}
-          x \quad &\text{if} \, x > 0 \\
-          0 \quad &\text{if} \, x \leq 0 \\
-     \end{cases}
-\end{equation*}
-$$
-
-We use this choice of activation function due to the well-known theorem of universal approximation. This theorem states that a feedforward network with at least one single hidden layer containing a finite number of neurons can approximate continuous functions on compact subsets of $ \mathbb{R}^{m^2} $ if the ReLU activation function is used <d-cite key="hornik1991approximation"></d-cite>. Applying an activation function ReLU to each element of $Z $, the layer finally outputs 
-
-$$
-A = \text{ReLU}(Z) = \text{ReLU}(W^T X + W_0)
-$$
-
-Next, we will input $A$ into a second hidden layer of the neural network. Let $K$ be the number of classes that the data can possibly belong to. Again, $K = 10$ for MNIST, though we will use $K$ for generality. Then let $W^2$ be an $H$ by $K$ matrix, where $W^2_{ij}$ is the weight of input $i$ applied to node $j$, and let $W^2_0$ be a $K \times 1$ column vector representing the biases added to the weighted input. For input $A$, define a second pre-activation to be a $K \times 1$ vector represented by $B = {W^2}^T A + W^2_0$.
-
-This will yield a $K \times 1$ vector representing the logits of the input image, with which we'll be able to take Cross Entropy Loss or compute its probability of belonging to any of the $K$ classes.
-
-#### Training
-
-Let class $i $ be the true classification for a data point. We have that $y_i = 1$, and for all $j \neq i$, $y_j = 0$. Furthermore, let $\hat{y_i}$ be the generated probability that the sample belongs to class $i$. The categorical cross-entropy loss is then defined as follows: 
-
-$$
-\mathcal{L}_{CCE} (y_i, \hat{y_i}) = - \sum_{i=0}^{9} y_i \log (\hat{y_i})
-$$
-
-From this computed loss, we use backpropagation and stochastic gradient descent (SGD) with learning rate $\eta = 0.1$ and $momentum = 0.95$ to optimize model weights. We run experiments on a dataset with $n = 4000$ subsamples that train over $100$, $500$, and $2000$ epochs using Belkin et al.'s approach to training <d-cite key="belkin2019reconciling"></d-cite>. Up to interpolation, we train until we reach zero classification error on the training data, or until we have finished all of the epochs, whichever comes first. After we have reached the interpolation thereshold, we train until we have gone through all of the epochs. Note that to get their results, Belkin et al. trained over $6000$ epochs, which proved to be prohibitively expensive given our resources. Instead, we chose to train over a variety of smaller maximum epoch sizes to illustrate the double descent curve taking clearer shape, with the $2000$ epoch run being the most indicative of this phenomena. Below are the results of the trained and tested neural networks. Notice that interpolation consistently happens when the number of parameters is roughly equal to $n\times K$ (i.e. Parameter Count / 1000 $= 40$), and the test loss starts consistently getting lower and lower as we add more and more parameters beyond this threshold. Double descent is real!
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/mlp-100-epochs-train.png" title="MLP 100 Epoch Training" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/mlp-100-epochs-test.png" title="MLP 100 Epoch Testing" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    MLP 100 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000
-</div>
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/mlp-500-epochs-train.png" title="MLP 500 Epoch Training" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/mlp-500-epochs-test.png" title="MLP 500 Epoch Testing" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    MLP 500 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000
-</div>
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/mlp-2000-epoch-train.png" title="MLP 2000 Epoch Training" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-15 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/mlp-2000-epochs-test.png" title="MLP 2000 Epoch Testing" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    MLP 2000 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000
-</div>
-
-For the sake of brevity, we avoid including plots for train/test classification loss for the MLPs. However, it is worth noting that train classification loss eventually reaches 0 in all experiments, whereas test loss eventually becomes $\sim$ 0.08 or smaller.
-
-Throughout each experiment, we vary across the number of total parameters of the model. For a network with $H$ hidden units, the total number of parameters is equal to $(d+1)\times H + (H + 1)\times K$, and so we choose $H$ accordingly each time we reparametrize. 
-
-Note that we also incorporated a weight reuse scheme for models in the underparametrized regime to cut on training time, similarly to the approach in Belkin et al <d-cite key="belkin2019reconciling"></d-cite>. 
-
-Additionally, even though the individual MLPs are small, training several of them sequentially for a relatively large number of epochs can take a very long time. To help reduce the time it takes to complete experiments, we also try adapting the Parameter Count Generation Algorithm provided in John Abascal's blog <d-cite key="testbed"></d-cite> to intelligently select the next level of parameterization given the performance of the previous one. This algorithm was designed to most clearly showcase the existence of the double descent curve by fitting a third degree polynomial (since that is roughly what we expect the double descent curve to look like) to the model capacity vs. test loss graph, and choosing the next parameter by examining the first derivative of the polynomial fit to the data. More detail is provided in his blog.
-
-This algorithm proved helpful for empirically confirming the existence and validity of the interpolation threshold. However, after a few tests with the algorithm, we chose to complete most of the experiments using a pre-specified list of parameters which were able to consistently capture the double descent phenomena in detail.
-
-***
-
-## Ensemble Learning
-
-We experimented with two different types of ensembles. The first ensemble is what we call the 'weak-learner' ensemble, which is the model that incorporates the multi-layer perceptron supported by L2-Boost tree ensembles, random forests, decision trees and logistic regression. Note that we ultimately did not use AdaBoost in this ensemble because we believed this was too similar to the included L2-Boost model in both architecture and performance.
-
-The second ensemble is the 'multi-layer perceptron' ensemble, which includes 5 MLPs. 
-
-### Weak-Learner Ensemble
-
-We use bootstrap aggregating, or 'bagging', to formulate our ensemble of these five models . Effectively, each model is given a certain number of 'votes' on what that model believes is the correct classification for any given MNIST sample image. We then experimented with two approaches to voting: hard voting and soft voting.
-
-In hard voting, the classification with the most total votes is then used as the ensemble's overall output. In the event of a tie, the neural network's prediction is chosen. Using this voting scheme, we train the MLP independently of the other models in the ensemble, using the same scheme as described previously.
-
-In soft voting, the weighted average of the predicted class probabilities of each model is used as the predicted class probabilities of the ensemble. We utilize this prediction when training the MLP, and use negative log likelihood loss instead of cross entropy loss, since taking the softmax of probabilities is not necessary. This way, we can incorporate the predictions of the whole ensemble into the training of the MLP. Since the ensemble now outputs a vector of class probabilities, the one with the highest probability will be used as the soft voting ensemble's prediction.
-
-Since we want a neural model to be the basis of our ensemble, we vary the number of votes assigned to the neural network while keeping the number of votes for other models fixed to 1. With four supplementary models in addition to the neural network, giving the neural network 4 or more votes is not necessary, since this ensemble would always output the same results as the neural network. Because of this, we study the loss curve when giving the neural network 1, 2, and 3 votes. Note that decimal value votes for the neural network are not sensible (at least in the hard-voting scheme), since it can be proved that all potential voting scenarios are encapsulated into the three voting levels we have chosen.
-
-Another important aspect of our ensemble is that the 'weak' classifiers do not vary in parameterization; only the MLP does. Refitting all the weak classifiers across epochs and MLP parameterizations took much longer than expected, perhaps due to incompatibilities between sklearn and GPUs, and completing the experiments using this approach was unfortunately unfeasible. Hence, all 'weak' classifiers have fixed architectures, chosen such that each one has low test error but is not at the highest level of parameterization according to the previous discussion, and only the MLP varies.
-
-### Multi-Layer Perceptron Ensemble
-
-The Multi-Layer Perceptron Ensemble uses 5 identically initialized MLPs which are trained in parallel using Pytorch's autovectorization capabilities. Since they are defined in the same way and trained simultaneously using the MLP training scheme discussed above, each receives equal weight when it comes to taking an averaged prediction. However, unlike the bagging method used for the Weak-Learner Ensemble, we take advantage of the identical architectures of the models and the numerical stability provided by this, and generate ensemble predictions by averaging the logits of all five learners and using those values as the logits of the ensemble. Again, we experiment using 100 and 500 epochs to see how the behavior evolves across increasing number of epochs, but we omit training over 2000 epochs due to excessive computational costs. An experiment for the future would be training over a very large number of epochs for even greater ensemble sizes to see how results vary across time.
-
-
-There has been discussion in the past of whether to average the raw logits or the softmax-transformed probabilities. The main concern raised over averaging across raw logits is that the outputted values can vary greatly in magnitude across models (and therefore overconfident models can potentially overshadow all other models when taking the prediction), but, empirically, that doesn't seem to be a problem here. Tassi et al. provide some intuition in "The Impact of Averaging Logits Over Probabilities on Ensembles of Neural Networks" <d-cite key="Tassi2022TheIO"></d-cite>, suggesting that different approaches to ensembling should be taken depending on the architecture and levels of confidence of the models. For general safety, they recommend averaging the probabilities, but for the purposes of our task, averaging the logits suffices.
-
-***
-
-## Results and Discussion
-
-Contrary to our expectations, the Weak Learner Ensemble performs much worse than even the individual models on MNIST classification. Although our focus is on double descent and not on the strong predictive power of ensembles, the latter is needed to observe the former, or at least discuss it at an interesting level. 
-
-Initially, we tried applying the soft-voting scheme for the Weak Learner Ensemble, but the reported results are unexpectedly poor, yielding very high classification loss, especially when compared to the results of each model taken individually. This may be because each 'weak' learner has high confidence in its predicted class, whereas the MLP may be more evenly split between different classes, which would result in the weak classifiers winning more often, even if the MLP has higher weight in the prediction. The plot of the negative log likelihood loss for both training and testing is also hard to understand, but it is clear the ensemble has a very hard time improving, even as more parameters are added. We only include the results for the ensemble that allocates 3 votes to the MLP, but note that these are the best loss curves we were able to produce given this method.
-
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Soft-Voting-3-train.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Soft-Voting-3-test.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Soft-Voting Weak-Learner Ensemble 100 Epoch Training/Testing Negative Log Likelihood Loss Over MLP Parameter Count / 1000. MLP given 3 votes. 
-</div>
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Soft-Voting Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 3 votes. 
-</div>
-
-We then tried the weak-learner approach again with hard-voting, and let the MLP independently train using the unmodified MLP training scheme mentioned previously. However, as opposed to halting training when MLP classifications loss first hits 0, we only halt training when *ensemble* classification first hits 0. 
-
-We found that while classification loss had certainly gone down when compared to the soft-voting scheme (with even just *one* vote!), the ensemble still severely underperformed when compared to each of the individual models used. As seen in the plots, the classification loss starts to improve once the MLP gets more and more votes, agreeing with intuition that, eventually, the MLP has the veto right. As opposed to the soft-voting scheme, all classifiers now have a contribution that is proportional to their voting weight, which mitigates the previous problem of some models having much higher confidence than others. However, we believe the poor results can be attributed to the models we used for ensembling. Indeed, a significant number of models are regular, boosted or ensembled (or all) versions of decision trees, which means there is a significant chance that they make similar mistakes on similar data points. Looking at the plots for overparameterized decision trees and L2-Boost ensembles, we see that train error never quite reaches 0 for any of them. Since the train loss seems to pleateau for our models as well, this may prove why. In the cases of 1 or 2 votes, this can lead to consistently poor predictions, especially since the models are not reparameterized across the experiment. For 3 votes, this phenomenon is less significant, as the ensemble slowly begins to reach the testing performance of the individual models. 
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 1 vote. 
-</div>
-
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 2 votes. 
-</div>
-
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 3 votes. 
-</div>
-
-Further work could be done on the Weak-Learner Ensemble, focusing on better model selection and concurrent reparameterization across all models. Given the limited time and compute resources at our disposal, we leave this problem open for now.
-
-All hope is not lost, however. Seeing the poor performance of the Weak-Learner Ensemble given the significantly better performance of individual models, one could be discouraged from attempting to use ensembling to mitigate double descent, since it may not even be observable in such settings. However, we saw double descent in L2-Boost ensembles and, arguably, in random forests, and so we pushed onward. All other ensemble methods used multiple copies of the same model, and so we decided to experiment with a small ensemble of MLPs, to see how they would behave. 
-
-This was feasible for 100 and 500 epochs only, but the obtained results shed light on how ensembling could in fact mitigate double descent. The phenomenon is not quite as observable in the 100 epoch case (one explanation could be that the train loss has not converged yet), but it becomes quite clear when looking at the 500 epoch ensemble and comparing it with the original 500 epoch MLP. Double descent is still very easy to see, ocuring at the same threshold as before. This makes sense, since the MLPs have all reached interpolation, which should increase test loss for all, and then start going down as we overparametrize more and more. However, the main result is that the increase once we reach interpolation is *much* lower than before. Indeed, the ensemble sees a jump from $\sim$ 0.35 to around $\sim$ 0.4 at the highest, whereas the individual MLP sees a jump from $\sim$ 0.36 to around $\sim$ 0.52. Another important result is that the loss as we overparameterize  becomes *significantly* lower in the ensemble model than in the individual MLP.
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/ensemble-100-epochs-train.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/ensemble-100-epochs-test.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    MLP Ensemble 100 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000
-</div>
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/ensemble-500-epochs-train.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-12 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-double_descent/ensemble-500-epochs-test.png" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    MLP Ensemble 500 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000
-</div>
-
-While we weren't able to fully get rid of the double descent curve by ensembling multiple MLPs, the fact that it became flatter and the loss past the interpolation threshold started to become smaller is quite exciting, as it suggests that, potentially, large ensembles of MLPs may not noticeably suffer from double descent at all, and yield better overall predictions than individual models can. One notable advantage to this ensemble method is the ability to further parallelize one's training of overparameterized neural networks. These models can take extreme lengths of time to train, and besides increasing the computational allocation used, practitioners may use data, model, or processor parallelism in order to reduce this time. The ensemble neural networks we use are independently generated, meaning that they can be vectorized or trained on different GPU cores without issue. This could be a valid alternative to training for more epochs for reducing model error past the interpolation threshold. More work investigating the effect of neural network ensembling on double descent, especially on models trained over many epochs, would be very exciting and potentially shed even more light on the possible advantages of overparameterization.
-
-
-***
-
-## Conclusion
-
-We discussed the existence of double descent for some simple and classical models, observing the effects of varying across levels of parameterization and noting that single descent can sometimes be mistaken for double descent, and proposed the use of various ensembles to mitigate the effects of double descent.
-
-Ensembles consisting solely of neural networks resulted in a considerable boost in performance past the individual model interpolation threshold, and in a flatter curve when compared to individual models. However, pairing the neural network with weak learners in an ensemble voting system decreased testing performance, though this adverse effect waned as the neural network received proportionally more votes. Machine learning engineers that intend to intentionally overparameterize their models may take advantage of not only the ensemble approach's increased performance and significantly more reliable results, but the enhanced parallelization and vectorization capabilities offered by the proposed method.
-
-***
-
-## Future Work
-
-
-This project was implemented using Google Colab, which proved to be restrictive for adopting more complex models. A key part of the double descent phenomenon is overparameterization, which happens across multiple full training loops, and so complex models that are additionally overparameterized will require more powerful computing resources beyond what we used. For example, a model which takes 10 hours to complete a single training loop will take multiple days to train before being able to plot results and observe double descent. Even for models that take around 10 to 15 minutes to train, such as the 500 epoch MLP we explored throughout our project, a full experiment that showcases the double descent curve in detail can take upwards of 5 hours. Furthermore, additional computing power can allow for this project to be expanded to more complicated datasets and tasks. MNIST classification is computationally inexpensive, though invoking double descent in more complex tasks such as text generation in natural language processing was not feasible using Google Colab. Future projects that follow this work should keep computational limitations in mind when choosing models and datasets. 
-
-In addition to the future work suggested throughout our project, we propose a final approach that we believe is worth exploring further. During the planning process of this project, we discussed using a more rigorous voting system than what is traditionally found in ensemble model projects. Effectively, each model would have a weight associated with how much influence its output should have on the overall ensemble output. For $n$ models, each model could start with, say, a weight of $1/n$. Then, after producing each model's vector output, the categorical cross-entropy loss with respect the true output could be computed, and the weights of each model could be updated such that each model has its weight decreased by some amount proportional to the calculated loss. Then, these weights could be normalized using the softmax function. This would be repeated for each level of parameterization. Due to resource constraints and the limitations of sklearn to the CPU, learning both the model weights and ensemble weights at each level of ensemble parameterization was not feasible given the size of the models we built and the classifiers we chose to use, as well as the number of epochs we trained over. Future studies may wish to implement this method, however, to produce a more robust ensemble for classification.
-
-***
-
-## Reproducibility Statement
-
-To ensure reproducibility, we have included the codebase used for this project, as well as the above description of our data, models, and methods <d-cite key="colab"></d-cite>. Note that the Colab notebook that we have worked in is currently very messy and sometimes incomplete due to faults in Google's autosaving feature, but we plan to clean it up and have it available for easy future experimentation.
\ No newline at end of file
diff --git a/_posts/2023-11-08-foley-to-video.md b/_posts/2023-11-08-foley-to-video.md
deleted file mode 100644
index df3b91e6..00000000
--- a/_posts/2023-11-08-foley-to-video.md
+++ /dev/null
@@ -1,189 +0,0 @@
----
-layout: distill
-title: "Autoen-chorder: Predicting Musical Success With Neural Nets"
-description: In this blog, we discuss deep learning methods and results of predicting song popularity from audio features.
-date: 2023-12-12
-htmlwidgets: true
-
-authors:
-  - name: Esteban Ramirez Echavarria
-    url: "https://www.linkedin.com/in/esteban-raech/"
-    affiliations:
-      name: LGO, MIT
-  - name: Arun Alejandro Varma
-    url: "https://www.linkedin.com/in/arunalejandro/"
-    affiliations:
-      name: LGO, MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-foley-to-video.bib  
-
-toc:
-  - name: Introduction
-  - name: Previous Works
-  - name: Hypothesis
-  - name: Architecture
-  - name: Data Preprocessing
-  - name: Baselines
-  - name: Our Results
-  - name: Next Steps
-  - name: Bibliography
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-Our aim is to use deep learning (the crux of 6.s898) to help musicians and their sponsors (for example: agents, record labels, and investors) identify whether songs will resonate with listeners. Solving this problem would enable established artists to release more impactful music, and spur new musicians to break into a competitive market.
-
-We first begin by establishing what our success metric is. For the purposes of this project, we will use the concept of song “popularity” as the metric we want to predict, and we source our popularity data from the SpotGenTrack Popularity Dataset. This dataset leverages Spotify’s Popularity Index, which is a relative rank measured against other songs’ popularities. It is a function of recent stream count, save rate, playlist appearance count, skip rate, share rate, and more.
-
-There already exist a few models to help us solve this problem. However, these models make use of metadata, such as artist name, year of release, and genre. We believe that these models – while interesting – are insufficient to be actionable, particularly for up-and-coming musicians who may be innovating new music genres, or who may not yet have a strong name. Specifically, metadata like Artist Name are both highly-weighted (for example, even Taylor Swift’s least popular song will be a relative superhit) and unchangeable (we cannot suggest that artists change their identity to Beyonce). Additionally, features like Genre are imprecise, and can quickly become outdated as new subgenres and crossover genres are developed.
-
-To address this gap and become more actionable to musicians, we aimed to create a new model that can achieve near-parity with metadata-based models without leveraging any metadata. By combining multiple audio-feature models, we not only achieved comparable results to metadata-based models, we actually outperformed metadata-based models on more than half our testing data.
-
-
-## Previous Works
-
-The most prominent existing model is HitMusicNet (heretofore referred to as “HMN”). The HMN model predicts popularity based on lyric data from Genius.com (syllables, words, etc.), high-level audio features from SpotGenTrack (e.g. acousticness, key, tempo, speechiness), low-level audio features from SpotGenTrack (audio preprocessing, such as spectral analyses), and metadata from SpotGenTrack (e.g. artist name, year of release, genre). A feature vector is created with this information, and said vector is fed as the input into an Autoencoder network to compress the features, followed by a neural network to obtain the predicted popularity.
-
-HitMusicNet has two different objectives: Regression and classification. For this project, we will focus only on regression since it will allow us to visualize differences between our model and HMN with higher granularity. We replicated the code from the paper in PyTorch, using the same functions and data and calculated metrics to make sure our implementation is correctly replicating the paper. We see a slight discrepancy in the errors, likely due to the test/train split during the paper’s training. Altogether, we can still consider our replicated model as valid as the metrics are within reasonable range to the reported metrics. Additionally, we added the R-squared metric as an additional metric to ensure our model fits the data.
-
-{% include figure.html path="assets/img/2023-12-12-Comparison.jpg" class="img-fluid rounded z-depth-1" %}
-
-A second model, VGGish, is a pretrained convolutional neural network trained on YouTube-100M (a database with 100 million YouTube videos). This network is a representation learning network widely used in established papers. This network takes in a wav file and processes it on 0.96-second windows, and calculates 128 embeddings per window. This means that the resulting tensor from VGGish will be 2 dimensional for a single file, and 3 dimensional for a batch of files.
-
-{% include figure.html path="assets/img/2023-12-12-HMN.jpg" class="img-fluid rounded z-depth-1" %}
-
-
-{% include figure.html path="assets/img/2023-12-12-Autoencoder.jpg" class="img-fluid rounded z-depth-1" %}
-
-## Hypothesis
-HMN has a tendency to be heavily-indexed on metadata features and lyrics. Data such as artist name heavily bias the model’s popularity predictions in favor of big-name artists. Lyrics information can make the model biased to predicting instrumental music as less likely to be successful. While this may be representative of how the industry works, it makes HMN much less actionable for musicians trying to assess their chance of success with the market.
-
-We believe that audio-only features – such as temporal information (i.e. the structure of the song and information about previous sections) and repetition – can alone be fairly successful in determining a song’s popularity. Thus, we chose to use just audio data, as well as temporal data, to predict popularity.
-
-We hypothesize that combining the audio-only features of HMN with VGGish’s audio representation will yield superior outcomes to HMN’s audio-only features alone. We also hope that our new model can compete with the full HMN model (i.e. audio features and metadata combined).
-
-## Data Preprocessing
-Given our hypothesis, we need to extract the low-level features from our signal and map each row to its corresponding audio file to be fed into VGGish. We used Spotify’s API to obtain the raw audio files to be processed, and then ran them through the VGGish network. We performed the same preprocessing as the one done in the MusicHitNet paper. 
-
-### File length limitation
-Unfourtunately, Spotify only allows the download of 30s previews of songs.
-
-### Memory limitation
-Audio files are heavy, and the longer they are, the heavier. We should ideally process all 95,000 songs’ full length, but given Spotify’s API’s request limit, and the size of the files, we were only able to obtain 10,000 30s snippets. This still resulted in roughly 14.5 GB of data. Processing the whole dataset would not only require roughly 140 GBs of data, but the Spotify API’s limits will likely be exceeded, and our colab notebook will likely run out of memory.
-
-### Downsampling and splitting
-Given the considerations above, we decided to use 10,000 songs for our model’s development, splitting the data into 8,500 samples for training and 1,500 for validation. Given that this is roughly 10% of the original data, we expect the model’s performance to be below the reported metrics.
-
-## Baselines
-The metrics obtained when replicating the HMN network serve as a reasonable parameter to verify our model’s accuracy. As mentioned above, the model’s performance is expected to be below the paper’s reported metrics. To understand the range, we retrained a network with the same shape as the paper’s using the 10,000 samples in the same train/test split we will feed to our new network. The resulting metrics for this experiment can be seen in Table 2.
-
-{% include figure.html path="assets/img/img5.png" class="img-fluid rounded z-depth-1" %}
-
-Training a model that results in similar metrics would be ideal, but realistically, as we will only be using low-level data, we expect the metrics to be lower than the values in Table 2. To ensure that our trained model isn’t just predicting noise, we use a baseline comparison, comparing against a random normal distribution with mean μ=40.02  and σ=16.79. 
-
-{% include figure.html path="assets/img/img6.png" class="img-fluid rounded z-depth-1" %}
-
-As seen in table 3, the baseline intuitively would not appear to be too far from the trained HMN model in terms of MSE and MAE. When looking at the r-squared, the random model has a negative value, while the trained HMN netw ork results with a much higher 0.5616 value. To deem a model as successful, we will compare it against both sets of metrics. 
-
-{% include figure.html path="assets/img/img1.png" class="img-fluid rounded z-depth-1" %}
-
-{% include figure.html path="assets/img/img2.png" class="img-fluid rounded z-depth-1" %}
-
-## Alternate Models
-- Single autoencoder. Our first iteration to solve this problem consisted of using a single autoencoder to find representations with data coming from VGGish and SpotGetTrack low level features, and then running that through a feed-forward network similar to the one used in HMN. Since the output of VGGish is a tensor of shape (batch_size, n_windows, n_features) and the output of SpotGenTrack is (batch_size, 207), we concluded there was no simple way to combine the two data sources without losing temporal information.
-- RNN. Our second iteration consisted of running the data coming from SpotGenTrack Low-Level through an autoencoder in the same way HMN does it. After this initial train gives us a compressed representation of the data from SpotGenTrack Low-Level, we train two subsequent networks: First an LSTM RNN which transforms data into (batch_size, 20), then we add the compressed representation from SpotGenTrack Low-Level and run that through a feedforward network. This model yielded a performance below the baseline.
-- HMN+VGGish: This model consists of taking the full SpotGenTrack data, passing it through the regular autoencoder defined in HMN, and add it to the output coming from VGGish. This model, while resulting in promising results, still yielded worse performance than HMN on its own, so our team decided to explore alternatives.
-- LossNet. Our third exploration consisted of training a model that uses VGGish’s outputs to try and predict losses from HMN. In essence, we are trying to use VGGish Representation to capture information that HMN consistently is unable to. This approach has parallels with Adversarial Networks, in that one model is being trained on the losses of another model. However, this approach is more cooperative than adversarial, since the result of the two models is not zero-sum. This approach led to a dead-end with surprising results.
-
-## Final Architecture
-Our final iteration consists of a model with two autoencoders: One for data from SpotGenTrack low level features, the second for the representation obtained using the VGGish model. The slight difference between these two models is that the VGGish autoencoder has additional LSTM layers at the start of the encoder, and at the end of the decoder.  The output from these two autoencoders is then added together and passed through a feed-forward network. This architecture can be seen in Figure 4.
-
-{% include figure.html path="assets/img/2023-12-12-FinalArch.jpg" class="img-fluid rounded z-depth-1" %}
-
-### Padding and Packing
-None of the audio files coming from Spotify previews are more than 30s in duration, but some are in fact shorter than others. To solve this issue, and also to be able to feed our model whichever sized data we require, we use pytorch’s packing functionality. Packing allows us to process sequential data with different sizes, so that only the relevant information is passed through the LSTM. Conversely, padding allows us to add zeros at the end of sequences so that all samples have the same size. This is required to store data in tensors.
-
-### Hyperparameters
-{% include figure.html path="assets/img/img3.png" class="img-fluid rounded z-depth-1" %}
-
-### Additional Model Considerations
-
-The original HMN model compiles 228 features into 45 representations for the feed-forward network. We want our model’s feed-forward network to have a similar number of inputs as the given architecture, therefore we compress the data in the encoder of both autoencoders to 20 features, so that when added together, they result in 40 total features. 
-
-Additionally, as can be seen in figure 3.2, the target’s distribution is condensed at a central point, and distributed in a Gaussian shape. To help our model accurately predict the shape of the results, we use multiply the losses by a weighting factor. This multiplication is important to make our model more likely to predict outliers. The equation is the following:
-
-\begin{equation}
-\frac{1}{N} \sum_{i=1}^{N} \exp\left(\left(\frac{{(\text{{target}}_{i} - \text{{mean}})}}{\alpha \cdot \text{{sd}}}\right)^2 \cdot \frac{1}{\beta}\right)
-\end{equation}
-
-Our feed-forward network was suffering of vanishing gradients during training. To attempt to avoid this, we initialized all linear layers with a weight distributed by Xavier uniform, and a constant bias of 0.1. 
-
-### Finding the Best Model
-
-In order to find the best model, we modified plenty of parameters and hyperparameters. We first found the optimal autoencoder models (seen on table 4), and then we proceeded to run several loops over our linear layer to obtain the model with lowest errors. The parameters modified were the following:
-
-- Learning rate: (0.001, 0.0001, 0.0002, 0.02, 0.0005)
-- Weight decays: (0, 0.0001, 0.0002)
-- Batch sizes: (200, 100, 256, 277)
-- Means (for weights calculation): 0.33, 0.34, 0.35, 0.37, 0.38, 0.40, 0.42, 0.45)
-- Alphas (for weights calculation): (1.8, 2.0, 2.1, 2.2)
-- Betas (for weights calculation): (1.8, 2.0, 2.2)
-- Number of linear layers: (7, 9, 12)
-
-The combination that resulted in the optimal model was the following:
-- Weight decays: 0
-- Batch sizes: 200
-- Means (for weights calculation): 0.36
-- Alphas (for weights calculation): 2.0
-- Betas (for weights calculation): 2.0
-
-{% include figure.html path="assets/img/img4.png" class="img-fluid rounded z-depth-1" %}
-
-Table 5 shows the best-performing models obtained after experimentation. MAE, MSE and r-squared were calculated using the testing data, i.e. Data not used in training. Looking at the data in tables 2 and 3, we see that our model shows a significant improvement above the random baseline, with a reasonable r-squared and MSE. Reduction in the MAE remains challenging, but still we see a significant improvement from the random baseline. 
-
-Furthermore, we analyzed the testing data, and found that in 919 of the 1,500 songs (61.2%) of the songs, our model did better than HitMusicNet. Upon further analysis, we found that our model did a better job predicting the popularity of songs with popularities ranged [0.22-0.55], while HMN does a better job at predicting outliers (songs with <0.2 or >0.6 of popularity).
-
-
-## Conclusions and Next Steps
-### Data Exploration
-Given Spotify’s ubiquity and analytics excellence, its Popularity Index is a good proxy for relative song popularity. But there are concerns around using data from a single platform (Spotify) and from a single channel (digital streaming). Given this concern, we would like to explore other methods of calibrating a track’s popularity (for example, Billboard and Discogs API). We can aggregate popularities into a single output, or can train each model on multiple outputs of various popularity scores. 
-
-Currently, our data consists of 30s audio clips. The average new song length is around 3min 17s, meaning that our models’ inputs cover around 15% of the song. This can cause the model to miss information critical to song likeability, such as the intro, chorus, or bridge. We would like to make our dataset more complete by using full songs as inputs. Furthermore, we’re using only 10,000 data points, which can also be affecting our training efficiency, especially our ability to detect outliers, which we have found to be a key issue with our model. Ideally, we would like to train our models on all 95k songs in SpotGenTrack.	
-
-### Architectures
-Many more architectures can further be explored to predict song popularity. We found VGGish with an LSTM to be an efficient “boosting” algorithm, which contributed to the model in a less significant way that SpotGenTrack, but still allowed our model to increase its performance. Similarly, the use of transformer architectures can help improve the performance of our model. 
-
-In this study, we explored and evaluated our model against  the HitMusicNet’s regression algorithm. In further studies, it could be beneficial to explore the classification algorithm, as we have seen very promising results in the prediction of songs along a certain range.
-
-We used the VGGish model purely on inference since we required to train the autoencoder and then the feed-forward network. Future studies can include architectures such that the VGGish model is trained in series with the feedforward network, and fine-tuned to predict popularity. We could also look at alternate representation models that are perhaps better suited or supply a more apt representation for our task than VGGish.
-
-In conclusion, the use of low-level features to predict popularity can have several real-world advantages. The proposed model is able to predict a song’s popularity to a fair degree without the need for high-level features. Emerging artists can use these parameters to determine the possible success of their songs. Music labels can use this algorithm to predict an artist’s possible popularity. Platforms such as Spotify can also take advantage of this model in order to tackle recommendations and boost emerging artists.
-
-## Bibliography
-- D. Martín-Gutiérrez, G. Hernández Peñaloza, A. Belmonte-Hernández and F. Álvarez García, "A Multimodal End-to-End Deep Learning Architecture for Music Popularity Prediction," in IEEE Access, vol. 8, pp. 39361-39374, 2020, doi: 10.1109/ACCESS.2020.2976033.
-- Ding, Yiwei, and Alexander Lerch. "Audio embeddings as teachers for music classification." arXiv preprint arXiv:2306.17424 (2023).
-- D. Martín-Gutiérrez, “HitMusicNet” in https://github.com/dmgutierrez/hitmusicnet.
-- Koutini, Khaled, et al. "Efficient training of audio transformers with patchout." arXiv preprint arXiv:2110.05069 (2021).
-- P. Nandi, “Recurrent Neural Nets for Audio Classification” in https://towardsdatascience.com/recurrent-neural-nets-for-audio-classification-81cb62327990.
-- Wu, Rick, “VGGish Tensorflow to PyTorch” in https://github.com/tcvrick/audioset-vggish-tensorflow-to-pytorch.
-- Wu, Yiming. (2023). Self-Supervised Disentanglement of Harmonic and Rhythmic Features in Music Audio Signals.
-- S. Shahane, “Spotify and Genius Track Dataset” in https://www.kaggle.com/datasets/saurabhshahane/spotgen-music-dataset/data.
diff --git a/_posts/2023-11-08-graphs-transformers.md b/_posts/2023-11-08-graphs-transformers.md
deleted file mode 100644
index 1fb96472..00000000
--- a/_posts/2023-11-08-graphs-transformers.md
+++ /dev/null
@@ -1,454 +0,0 @@
----
-layout: distill
-title: Graph Transformers
-description: A study of Transformers' understanding of fundamental graph problems, where we propose a new, tailored architecture highlighting the model's potential in graph-related tasks.
-
-
-date: 2023-12-11
-htmlwidgets: true
-
-# Anonymize when submitting
-authors:
-  - name: Tristan Magnin
-    affiliations:
-      name: MIT
-  - name: Gabriel Deo
-    affiliations:
-      name: MIT
-
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-graphs-transformers.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Motivation & Project outline
-  - name: Introduction & Literature review
-  - name: Graph Transformer Model Design
-    subsections:
-    - name: Vanilla Transformer
-    - name: Tokenization Approach and Positional Encoding
-    - name: Attention in Graph Transformers - the Necessity of a Skip-Connection
-    - name: Model Architecture Overview
-  - name: Methodology for Training and Evaluation
-    subsections:
-    - name: Constructing the Dataset
-    - name: Training Protocols
-    - name: Metrics and Evaluation Criteria
-  - name: Results and Comparative Analysis
-    subsections:
-    - name: GNN performance
-    - name: MLP Performance
-    - name: Transformer performance
-    - name: Transformer with Attention Mask, Positional Encoding & Skip Connection
-  - name: Conclusion
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Motivation & Project outline
-
-Our project aims to advance the understanding of Transformers in graph theory, focusing on the Shortest Path Problem, a cornerstone of graph theory and Dynamic Programming (DP). We introduce a custom Graph Transformer architecture, designed to tackle this specific challenge. Our work begins with a theoretical demonstration that the shortest path problem is Probably Approximately Correct (PAC)-learnable by our Graph Transformer. We then empirically test its performance, comparing it against simpler models like Multilayer Perceptrons (MLPs) and sophisticated benchmarks like Graph Neural Networks (GNNs). This study seeks to validate the Graph Transformer as an effective tool for solving fundamental graph-based problems, and "simple" DP problems in particular.
-
-<div class="row align-items-center mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-graphs-transformers/erdos_renyi.svg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption mt-3">
-    Left: example of an Erdős–Rényi graph, right: original Transformer architecture 
-</div>
-
-## Introduction & Literature review
-
-Transformers have shown significant effectiveness in domains that require an understanding of long-range dependencies and contextual information. Originally prominent in natural language processing<d-cite key="devlin2018bert"></d-cite>, their applications have expanded to include areas such as computer vision<d-cite key="dosovitskiy2020image"></d-cite> and speech recognition<d-cite key="wang2020transformer"></d-cite>. Recent explorations have also delved into Transformers' abilities in mathematical tasks like arithmetic, GCD computations, and matrix operations<d-cite key="DBLP:journals/corr/abs-2112-01898"></d-cite><d-cite key="charton2023transformers"></d-cite><d-cite key="lample2019deep"></d-cite>, shedding light on the learning mechanisms of these models.
-
-A particular area of interest within these applications is graph problems. Recent research has assessed Transformers' performance in this domain<d-cite key="DBLP:journals/corr/abs-2106-05234"></d-cite> and explored adapting the Transformer architecture to fit the context of graph problems<d-cite key="DBLP:journals/corr/abs-1905-12712"></d-cite>. However, much of the current research does not focus on Transformers' comprehension of fundamental graph challenges, such as the shortest path problem. Notably, in the studies mentioned above, the shortest path is often directly input as a matrix, with each entry $i,j$ representing the shortest path distance between nodes $i$ and $j$. Our study will investigate Transformers' performance on "raw" graph data, where only edge weights, the adjacency matrix, and positional encodings are provided. The Transformer will be trained to predict the shortest path from a designated node 0 to all other nodes, in the form of an $n\times1$ vector<d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite>.
-
-We will demonstrate that, by adapting the Transformer architecture for our purposes, the shortest path problem and other "simple" dynamic programming (DP) challenges are Probably Approximately Correct (PAC)-learnable by the model. Our approach is based on the framework developed for GNNs<d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite> and adapted to our Graph Transformer.
-
-## Graph Transformer Model Design
-
-Let's dive into our Graph Transformer model, drawing inspiration from the classical Transformer architecture. 
-
-### Vanilla Transformer
-
-We first recall the vanilla architecture of Transformers, described in <d-cite key="DBLP:journals/corr/VaswaniSPUJGKP17"></d-cite>, which is fundamentally built on two key ideas: tokenization and attention, both of which we adapt for graph data.
-
-In our context, think of tokens like the attributes of nodes in Graph Neural Networks (GNNs). These tokens are packets of information, allowing transformers to handle diverse data types, including graphs. The process begins with a token net, which is a sequence of linear and non-linear layers. This is somewhat equivalent to the alternating aggregation and combination stages in a GNN, where each node processes and integrates information from its neighbors.
-
-The real game-changer in transformers, however, is the attention mechanism, layered on top of the token net. This mechanism involves a set of matrices known as query, key, and value. These matrices enable tokens to use information from the nodes they're paying attention to, in order to learn and update their own values.
-
-Here's a simple way to visualize it. Imagine each token in the transformer scanning the entire graph and deciding which nodes (or other tokens) to focus on. This process is driven by the query-key-value matrices. Each token creates a 'query', which is then matched against 'keys' from other tokens. The better the match, the more attention the token pays to the 'value' of that other token. Mathematically, this can be expressed as:
-
-$$Attention(Q, K, V) = softmax \left(\frac{QK^T}{\sqrt{d_k}} \right)V$$
-
-In this formula, $ Q $, $ K $, and $ V $ represent the query, key, and value matrices, respectively. The term $ \sqrt{d_k} $ is a scaling factor based on the dimensionality of the keys.
-
-While the process in Graph Neural Networks (GNNs) might seem similar, there's an essential distinction to be made. In GNNs, the flow of information is local, with nodes exchanging information with their immediate neighbors. However, in our Graph Transformer model, we employ self-attention to potentially allow each node (or token) to consider information from the entire graph. This includes nodes that might be several steps away in the graph structure.
-
-One axe of our research is then to explore the potential benefits - or drawbacks - of this global perspective, and seeing how leveraging global information compares to the traditional local feature aggregation used in GNNs, in the context of graph theory challenges like the Shortest Path Problem. By enabling each node to have a broader view of the entire graph, we're exploring how this approach influences the prediction quality (Accuracy) and the efficiency of path computations, specifically focusing on the speed at which the network adapts and learns (Training Efficiency).
-
-A full Transformer will be a sequence of self-attention layers and MLPs. We now turn to the specifics of how we implement it, starting with tokenization.
-
-<div class="row justify-content-center align-items-center mt-3">
-    <div class="col-md-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-graphs-transformers/transformer_DL.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption mt-3">
-    Vanilla Transformer architecture (Source: 6.S898 Lecture)
-</div>
-
-### Tokenization Approach and Positional Encoding
-
-The first step in our model is converting graph information (including nodes, edges, and their weights) into a format suitable for transformers. We've developed a method to encode this graph data into tokens.
-
-Each token in our system is a vector with a length of $2n$. Here, $n$ represents the number of nodes in the graph. Half of this vector contains binary values indicating whether a connection exists to other nodes (1 for a connection, 0 for no connection). The other half of the vector holds the weights of these edges.
-
-$$ \text{Token} = [\text{Edge Connections (Binary Values)}, \text{Edge Weights}] = [\mathbf{a}, \mathbf{w}] $$
-
-This structure seems sufficient to capture the essential structure of the graph. But, to further aid the transformer in identifying the shortest path, we can introduce additional local information into these tokens through positional encoding. Encoding positional information of the nodes has already be achieved in various ways, for example, using graph kernels <d-cite key="DBLP:journals/corr/abs-2106-05667"></d-cite>. Here we choose a simpler onehot encoding method : we assign an arbitrary rank to each node and include an indicator vector within the token. This vector, also of size $n$, points to the node's position. With this addition, each token becomes a vector of size $3n$:
-
-$$ \text{Token} = [\text{Edge Connections}, \text{Edge Weights}, \text{Positional Encoding}] = [\mathbf{a}, \mathbf{w}, \mathbf{1}] $$
-
-We plan to rigorously test both approaches as part of our diverse model lineup. 
-
-## Attention in Graph Transformers - the Necessity of a Skip-Connection
-
-The Query-Key-Value (QKV) Attention Mechanism is a pivotal aspect of how Graph Transformers can effectively learn the Shortest Path Problem. Building on the insights from Dudzik et al. <d-cite key="dudzik2022graph"></d-cite>, who illustrated the capacity of GNNs to tackle Dynamic Programming challenges, including the Shortest Path Problem, we delve into how Transformers might achieve similar feats using attention.
-
-Recall the Bellman-Ford algorithm's key update step for the Shortest Path Problem, expressed as:
-
-$$d_i^{k+1} = \min_j d_j^k + w_{i, j}$$
-
-In this context, our hypothesis is that Transformers could replicate this dynamic through the attention mechanism, which we prove mathematically in Appendix A. The key observation is that the softmax layer would be able to mimic the $ \min $ operator, as long as the query-key cross product is able to retrieve $d_j + w_{i,j}$ for all nodes $i,j$. Intuitively, this can be done if each query token $i$ picks up on the node's positional encoding, and each key token $j$ on the node's current shortest path value $d_j$ and edges values $w_j$. Taking the cross product of the onehot encoding $i$ with edges values $w_j$ would then return exactly $w_{i,j}$ for all $i,j$. To select only seighboring connections, we'll use an appropriated attention mask. 
-
-<!-- Imagine queries being tailored to pinpoint the positional encoding of node $i$, while keys focus on the edge value $w_{i,j}$ between node $i$ and its neighbor $j$, as well as the connections $a_j$ which inform about the current shortest distance $d_j$. The attention would concentrate on neighbors, considering both the edge weights and the current shortest distances of these neighbors. The softmax step in the attention mechanism would then allow the token for node $i$ to zero in on the neighbor that minimizes a combination of edge weight and actual distance, paralleling the logic in the Bellman-Ford algorithm.  -->
-
-However, there is a catch. The learning process might not fully grasp the Bellman-Ford update using the attention mechanism alone. After the attention picks up on the correct minimizer neighbour token $j$, it needs to update the the current node $i$'s values. 
-The Bellman-Ford update isn't a simple operation on the tokens like a sum. For instance, we only want $d_i^k$ to change, and we want to update it with the correct $w_{i,j}$. This is where the idea of incorporating a skip-connection mechanism comes into play. By concatenating tokens $i$ (the input) and $j$ (the attention's output) before feeding them to the MLP layer following the self-attention layer, we could effectively emulate the Bellman-Ford update process. 
-
-Overall, combining attention and skip-connection could ensure our Graph Transformer can comprehensively learn and apply the Bellman-Ford logic to solve the Shortest Path Problem. We offer a mathematical proof of this concept in Appendix A, using a slightly different tokenization method.
-
-Additionally, it's worth considering that our Graph Transformer might be learning an entirely distinct logical process for solving the Shortest Path Problem. Still, proving that such a logic is within the model's grasp underlines the model's versatility in addressing some graph-related and/or dynamic programming challenges. We'll tackle this notion in the next part about learnability and algorithmic alignment.
-
-
-<div class="row justify-content-center align-items-center mt-3">
-    <div class="col-md-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-graphs-transformers/skip_connection.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption mt-3">
-    Graph Transformer - Skip connection
-</div>
-
-### Model Architecture Overview
-
-
-In this section, we revisit the architecture of our Graph Transformer, which is an adaptation of the standard Transformer model. Our model is composed of a sequence of self-attention layers and MLPs, each augmented with a skip-connection. The tokens in our model encapsulate both edge connections and their corresponding weights, alongside positional encoding. 
-
-The most notable feature of our architecture is the introduction of the attention mask. This mask restricts the attention of each token to its immediate neighbors, aligning our approach more closely with the local message-passing process typical in GNNs. The inclusion or not of this feature and the resultant effect in our architecture marks the crucial difference between the global vs. local token aggregation methodologies that we discussed earlier.
-
-
-## A measure of learnability
-
-Our project falls into the wider research interest in the interaction between network structures and specific tasks. While basic and common structures such as MLPs are known to be universal approximators, their effectiveness varies based on the amount of data required for accurate approximations. Notably, their out-of-sample performance often lags behind task-specific architectures, such as Graph Neural Networks (GNNs) in graph-related problems, which highlights the issue of a network's generalization capacity.
-
-To evaluate theoretically the ability of transformers to effectively learn the Shortest Path Problem and similar challenges, we position our study within the framework of PAC (Probably Approximately Correct) Learning. This framework allows us to explore the concept of algorithmic alignment. Algorithmic alignment is here crucial as it pertains to a model's capability to emulate a given algorithm with a minimal number of modules, each of relatively low complexity. Such approach has already been taken by Xu et. al <d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite> to give a better understanding of the reasoning process of complex networks like GNNs, and it is instrumental in assessing the adaptability and efficiency of transformers in learning and solving complex graph-based tasks.
-
-### Algorithmic Alignment
-
-In this section, we delve into a series of definitions to establish the mathematical groundwork of our investigation.
-
-We first recall a definition of the PAC-Learnibility:
-
-#### Definition (PAC learning and sample complexity)
-
-Let $$\{x_i,y_i\}_{i=1}^M$$ be i.i.d. samples from some distribution $ \mathcal{D} $, and suppose $ y_i = g(x_i) $ for some underlying function $ g $. Let $$ f = \mathcal{A}(\{x_i, y_i\}_{i=1}^M) $$ be the function generated by a learning algorithm $ \mathcal{A} $. Then $ g $ is $ (M, \epsilon, \delta) $-learnable with $ \mathcal{A} $ if
-
-$$ \mathbb{P}_{x \sim \mathcal{D}} [\| f(x) - g(x) \| \leq \epsilon] \geq 1 - \delta $$
-
-where $ \epsilon > 0 $ is the error parameter and $ \delta \in (0, 1) $ the failure probability.
-
-We then define the *sample complexity* as $$ \mathcal{C_A}(g, \epsilon, \delta) = \min M $$ for every $ M $ such that $ g $ is $ (M, \epsilon, \delta) $-learnable with $ \mathcal{A} $.
-
-This is a crucial concept in computational learning theory that helps us understand the feasibility of learning a given function from a set of examples to a certain degree of approximation, with a certain level of confidence.
-
-Next, we outline a definition that connects the concepts of function generation with the architecture of neural networks.
-
-#### Definition (Generation)
-Let $ f_1, \ldots, f_n $ be module functions, $ g $ a reasoning function and $ \mathcal{N} $ a neural network.
-We say that $ f_1, \ldots, f_n $ generate $ g $ for $ \mathcal{N} $, and we write $$f_1, \ldots, f_n \underset{\mathcal{N}}{\equiv} g$$ if, by replacing $ \mathcal{N}_i $ with $ f_i $, the network $ \mathcal{N} $ simulates $ g $.
-
-Using these ideas, we then introduce a key point for our project: algorithmic alignment, which we intend to validate for Transformers applied to the Shortest Path Problem.
-
-#### Definition (Algorithmic alignment)
-Consider a neural network $ \mathcal{N} $ with $ n $ modules $$ \mathcal{N}_i $$ that tries to approximate a reasoning function $ g $. Suppose that there exists $ f_1, \ldots, f_n $ some module functions such that $$f_1, \ldots, f_n \underset{\mathcal{N}}{\equiv} g$$.
-Then $ \mathcal{N} $ is $ (M, \epsilon, \delta) $-algorithmically aligned with $ g $ there are learning algorithms $$ \mathcal{A}_i $$ for the $$ \mathcal{N}_i $$'s such that $$ n \cdot \max_i \mathcal{C}_{\mathcal{A}_i} (f_i, \epsilon, \delta) \leq M $$.
-
-A small number of sample $ M $ would then imply good algorithmic alignment, i.e. that the algorithmic steps $f_i$ to simulate g are _easy to learn_.
-
-Finally, we state the following theorem, proven by Xu et al. <d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite> It provides evidence that generalization benefits from algorithmic alignment. 
-
-#### Theorem 1 (Algorithmic alignment improves sample complexity) 
-Fix $\varepsilon$ and $\delta$. Suppose $\{x_i, y_i\} \sim D$, where $|x_i| < N$, and $y_i = g(S_i)$ for some $g$. Suppose $\mathcal{N}_1, \dots \mathcal{N}_n$ are $\mathcal{N}$'s MLP modules in sequential order. Suppose $\mathcal{N}$ and $g$ algorithmically align via functions $f_1, ..., f_n$, as well as the following assumptions.
-
-*__i. Algorithm stability__*. Let $\mathcal{A}$ be the learning algorithm for the $$\mathcal{N}_i$$'s. Suppose $$f = \mathcal{A}(\{x_i, y_i\}^M_{i=1})$$, $$\hat{f} = \mathcal{A}(\{\hat{x}_i, y_i\}^M_{i=1})$$. For any x, $$\|f(x) - f(\hat{x})\| < L_0 \cdot \max_i\|x_i - \hat{x}_i\|$$, for some $$L_0$$.
-
-*__ii. Sequential learning__*. We train the $$\mathcal{N}_i$$'s sequentially. The inputs for $\mathcal{N}_j$ are the outputs from the previous modules $$\mathcal{N}_1, \dots, \mathcal{N}_{j-1}$$, while labels are generated by the correct functions $$f_{1}, ..., f_{j-1}$$.
-
-*__iii. Lipschitzness__*. The learned functions $f_j$ satisfy $$\|f_j(x) - f_j(z)\| \leq L_1\|x - z\|$$, for some $L_1$.
-
-Then g is learnable by N.
-
-
-### Application to Transformers
-
-We now apply this theoretical framework to Transformers. The justifications of the results in this part will be a combination of sketch of mathematical proofs and empirical evidence.
-We first state a first result:
-
-#### Lemma 1 (Transformers algorithmically align with the Shortest Path Problem)
-Let  $ \mathcal{T} $ be a Transformer, let $ g $ be the reasoning function of the Shortest Path Problem applied to a graph with $n$ nodes. Then $ \mathcal{T} $ is algorithmically aligned with $ g $.
-
-We can directly prove this lemma.
-Let $ f_1, \ldots, f_n $ be the Bellman-Ford update processes of the Shortest Path Problem: 
-$$d_u^{k+1} = \min_{v \in \mathcal{N}(u)} d_v^{k} + c(u, v)$$  where $\mathcal{N}(u)$ is the set of neighbors of node $u$. 
-From Bellman-Ford algorithm, we have: $$f_1, \ldots, f_n \underset{\mathcal{T}}{\equiv} g$$, with $g$ being the shortest path function.
-
-Then, from our discussion on Transformers attention layers and proof in Appendix A, each attention-MLP sequence $\mathcal{N}_i$ has a learning algorithm $\mathcal{A}_i$ such that $f_i$ is learnable with $\mathcal{A}_i$. Each sample complexity is then bounded by M, which concludes the proof.
-
-
-We can now state the following theorem:
-#### Theorem 2 (Transformers can learn the Shortest Path Problem)
-Let  $ \mathcal{T} $ be a Transformer, let $ g $ be the shortest path function. Then, $g$ is learnable by $\mathcal{T}$.
-
-We provide here a sketch of a proof of this theorem.
-From Lemma 1, $\mathcal{T}$ and $g$ algorithmically align via $f_1, \ldots, f_n$.
-We must now check the 3 assumptions of Theorem 1. 
-
-Sequential Learning __(ii)__ is clearly true, since transformers architectures incorporate sequence of MLPs (associated with attention layers). Li et al <d-cite key="li2023transformers"></d-cite> have provided an empirical proof of the algorithm stability __(i)__ of transformers. Finally, considering a self-attention token network combined with a ReLU-MLP for each layer of $\mathcal{T}$, every function in the related learning algorithm $\mathcal{A}_i$ (softmax, ReLU, Linear) is Lipschitz-continuous, hence their combination is Lipschitz-continuous too, which validates Assumption __(iii)__.
-
-We can then apply Theorem 1 and conclude the proof.
-
-
-Having laid the theoretical foundation for our problem, we now turn our attention to the practical application, where we employ our Graph Transformer to the concrete task of learning and solving the Shortest Path Problem.
-
-## Methodology for Training and Evaluation
-### Constructing the Dataset
-
-For training and evaluating our different models, we generate a comprehensive dataset comprising 50,000 samples, each representing a graph. These graphs were randomly created following the Erdős–Rényi model, specifically the $\mathcal{G}(n, p)$ variant, where `n` represents the number of nodes and `p` is the probability of edge formation between any two nodes. In our dataset, each graph consists of 10 nodes (`n = 10`), and the edge probability (`p`) is set at 0.5. This setting ensures a balanced mix of sparsely and densely connected graphs, providing a robust testing ground for the Graph Transformer's ability to discern and compute shortest paths under varied connectivity scenarios .
-
-Furthermore, we assign to the edges in these graphs some weights that are integral values ranging from 1 to 10. This range of weights introduces a second layer of complexity to the shortest path calculations, as the Graph Transformer must now navigate not only the structure of the graph but also weigh the cost-benefit of traversing various paths based on these weights. The inclusion of weighted edges makes the dataset more representative of real-world graph problems, where edges often have varying degrees of traversal difficulty or cost associated with them.
-
-This dataset is designed to challenge and evaluate the Graph Transformer's capability in accurately determining the shortest path in diverse graph structures under different weight conditions. The small number of nodes ensures a wide variability in the degree of connectivity in a sample graph. It also allows for an initial performance evaluation on smaller-scale problems, with the potential to extend these studies to larger-scale graphs in the future. Hence, the dataset's structure supports a comprehensive assessment of the model's performance and its adaptability to a wide range of graph-related scenarios.
-
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/shortest_path_counts.svg" height = "50%" width = "50%" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="caption mt-3">
-    Shortest path distribution for our entire dataset (50,000 samples)
-</div>
-
-
-### Training Protocols
-
-In the fixed dataset approach we've employed, the dataset is pre-constructed with 50,000 graph samples and remains unchanged throughout the training process. This method, involving a consistent 60/20/20 split for training, validation, and testing, ensures that every model is assessed under the same conditions at each epoch. This consistency is crucial for our primary goal: to compare the performance of different models or architectures in a controlled and repeatable manner. To an on-the-fly approach, where data is dynamically generated during each training epoch, introduces more variability. This variability can be beneficial in a second step for thoroughly testing the robustness and adaptability of a single model, as it faces new and diverse scenarios in each epoch. However, for our first objective of directly comparing different models, the fixed dataset approach provides a more stable and reliable framework to begin with.
-
-We use the Adam Optimizer because it's good at handling different kinds of data and works efficiently.  The learning rate is set at a standard value of 0.001, which serves as a common and reliable starting point, ensuring a consistent basis for comparing the learning performance across all models.
-
-Our main tool for measuring success is the L1 loss function. This function is suited for our shortest path problem because it treats all mistakes the same, whether they're big or small. It's different from the L2 loss, which is harsher on bigger mistakes. This way, our model pays equal attention to finding shorter and longer paths correctly.
-
-### Metrics and Evaluation Criteria
-
-We use two main metrics to check how good our models perform: L1 Loss and Accuracy. L1 Loss adds up all the differences between the predicted and actual path costs across all nodes. It's a direct way to see how well the model is doing.
-
-$$ L1 \, Loss = \frac{1}{N} \sum_{i=1}^{N} |y_i - \hat{y}_i| $$
-
-where $ N $ is the total number of nodes, $ y_i $ is the actual path cost for the $i$-th node, and $ \hat{y}_i $ is the predicted path cost for the $i$-th node.
-
-
-Accuracy is the second measure. It shows what percentage of nodes the model got exactly right in predicting the shortest path. It's a simple way to understand how precise our model is.
-
-$$ Accuracy = \frac{\text{Number of Correct Predictions}}{\text{Total Number of Predictions}} \times 100\% $$
-
-Here, a prediction is counted as "correct" if its rounded value is the true shortest path. I.e., if the model predicts 10.3 for a node, but the true sortest path is 11, this is marked as incorrect. If it predicts 10.7, it will be counted as correct.
-
-Together, these two measures help us see how well our Graph Transformer is doing compared to other models like MLPs and GNNs, especially in solving shortest path problems in graphs.
-
-## Results and Comparative Analysis
-
-In our analysis, we compared the performances of MLPs, Transformers, and GNNs using our generated dataset. Initially, we evaluated the performance of each architecture across different sizes by recording in-sample and out-of-sample losses at each epoch, along with out-of-sample accuracy. We compared three model sizes: "small," "mid," and "large," which correspond to the depth of the model. For GNNs, this signifies the number of iterations; for Transformers and MLPs, it refers to the number of layers. Small models have 2 iterations/layers, mid models 5, and large models 10. 
-
-To maintain fair comparisons, the MLP and the Transformer were designed to have an equal total number of trainable parameters at each size. We excluded GNNs from this comparison, as they outperformed both models with significantly fewer parameters.
-
-
-### GNN performance
-
-Our GNNs demonstrated exceptional performance on the shortest path task. Tailoring the model's architecture to this problem (using maximum aggregation and initializing node features appropriately) likely contributed to this success. However, several interesting observations emerged from our results. We compared GNNs of three different sizes: small (2 iterations, 13k parameters), medium (5 iterations, 32k parameters), and large (10 iterations, 64k parameters).
-
-
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_train_loss.svg" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_loss.svg" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_acc.svg" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="caption mt-3">
-    GNN Training loss, validation loss & validation accuracy for different sizes
-</div>
-
-We observed that both medium and large GNNs achieved over 99% out-of-sample accuracy after just a few epochs. The large model's performance aligns with expectations, as it conducts 10 iterations in total—matching the maximum number of iterations required by standard shortest-path-finding algorithms like Bellman-Ford for n-node graphs. 
-
-Surprisingly, the medium-sized model, with only 5 iterations, also achieved similar accuracy. This initially seems counterintuitive since 5 iterations suggest that information can only propagate to nodes within 5 neighbors. However, as noted in <d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite>, our GNN effectively learns an optimized version of the Bellman-Ford algorithm, requiring only half the number of iterations compared to the standard algorithm. 
-
-This explains why the medium GNN can converge efficiently, but the small model with just 2 iterations cannot. Even with an optimized Bellman-Ford algorithm, a 2-iteration GNN would only correctly solve paths shorter than or equal to 5 nodes, limiting its overall learning capacity.
-
-
-### MLP performance
-
-Although GNNs quickly converged to near-perfect predictions, their inherent suitability for the shortest path task was expected. To gauge the Transformers' performance more accurately, we compared them with MLPs, which are not specifically designed for this task. As indicated in <d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite>, MLPs struggle with iterative algorithms like Bellman-Ford due to difficulties in learning for-loop structures. We analyzed MLP performance across three sizes: small (2 layers, 44k parameters), medium (4 layers, 76k parameters), and large (8 layers, 142k parameters). It's important to note that each GNN size had roughly half the parameters of the corresponding MLP size.
-
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_train_loss.svg" style="width: 100%; height: auto;" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_loss.svg" style="width: 100%; height: auto;" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_acc.svg" style="width: 100%; height: auto;" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="caption mt-3">
-    MLP Training loss, validation loss & validation accuracy for different sizes
-</div>
-
-The smaller MLP models converged faster, yet both small and medium models barely exceeded 50% accuracy, even after extensive training (16 epochs for GNNs and 64 for MLPs). This supports the hypothesis that MLPs face challenges in learning iterative algorithms. 
-
-Increasing model size or training duration did not significantly improve performance; the largest model struggled particularly with fitting the problem. While more hyperparameter tuning might enhance the "large" model's performance, the "medium" model's struggles suggest that MLPs have inherent difficulties with this task, regardless of parameter count.
-
-
-### Transformer performance
-
-Turning our attention to Transformers, we initially doubted their ability to match GNN performance levels. However, the question remained: could they outperform MLPs, and if yes by how much? We began by testing a basic Transformer version (no attention mask, positional encoding, or skip connection). To ensure fair comparisons, all model sizes maintained approximately the same number of parameters as the MLPs, with equivalent layers/iterations (small: 2 layers, 44k parameters; medium: 5 layers, 86k parameters; large: 10 layers, 172k parameters).
-
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss.svg" style="width: 100%; height: auto;" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss.svg" style="width: 100%; height: auto;" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc.svg" style="width: 100%; height: auto;" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="caption mt-3">
-    Transformer training loss, validation loss & validation accuracy for different sizes
-</div>
-
-
-A notable improvement in accuracy was observed, with the best-performing Transformer model reaching 70% accuracy. The training was stopped at 64 epochs to maintain consistency across all models. As it does not show signs of overfitting, extending training beyond 64 epochs might further enhance the Transformer's performance. Interestingly, increasing the model size to over 150k parameters did not significantly boost performance under our hyperparameter settings. The small and medium architectures exhibited similar performance, with the medium model slightly outperforming after a few epochs.
-
-Regarding sizes, similarly to the MLP, increasing the depth and parameter count of the transformer over 150k parameters doesn't seem to help with the model's performance, at least with our set of hyperparameters (as this big of a transformer is long to train, we haven't been able to do much hyperparameter tuning). The small and medium architectures seem almost tied, but the medium one seems to perform better after a few epochs. 
-
-Our hypothesis in Part 1 suggested that Transformers, capable of performing $O(n^2)$ operations per attention head, should learn loop structures more effectively. However, their learning is constrained by the specific operations allowed in the attention mechanism. To test this, we proposed three enhancements to our Transformer: an attention mask, positional encoding, and a skip connection, as outlined in Part 1 and Appendix A. We hypothesized that these additions would enable the Transformer to better learn the Bellman-Ford iteration step.
- 
-### Transformer with Attention Mask, Positional Encoding & Skip Connection
- 
-As discussed in Part 1, we adapted our Transformer model to include these three components, expecting an improvement in performance. The attention mask, a fundamental feature of Transformers, enables the model to focus on specific token relationships. In our setup, each token (node) attends only to its neighbors, as dictated by the adjacency matrix. We incorporated the attention mask into the medium-sized Transformer for comparison.
-
-Next, we added positional encoding. Based on our Part 1 discussion, positional encodings can inform the feedforward network (FFN) about the neighboring tokens selected by the attention layer. We used basic one-hot encodings, effectively adding an $n×n$ identity matrix or concatenating an $n×1$ one-hot vector to each token. Although more sophisticated encodings might be beneficial, we demonstrated the feasibility of using one-hot encodings for the Bellman-Ford update.
-
-Finally, we implemented a custom skip connection. Instead of a standard sum skip connection, our model concatenates the input and output of the attention head before feeding it into the FFN. This approach potentially allows the attention head to select a neighbor, with the FFN combining its token with the receiving node's token.
-
-We added each augmentation stepwise, building upon the previous modifications (e.g., transformer_pos_enc includes positional encoding, attention mask, and is medium-sized). Here are the results:
-
-
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss_all.svg" style="width: 100%; height: auto;" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss_all.svg" style="width: 100%; height: auto;" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="mt-3">
-    {% include figure.html path="assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc_all.svg" style="width: 100%; height: auto;" class="img-fluid rounded z-depth-1" %}
-</div>
-<div class="caption mt-3">
-    Transformer training loss, validation loss & validation accuracy for different sizes
-</div>
-
-Each augmentation step led to clear improvements. Over 64 epochs, our base model's out-of-sample accuracy improved from 70% to over 90%. The positional encoding contributed the most significant enhancement, which was somewhat surprising given its simplicity. Overall, these results support our hypothesis regarding the Transformer's capacity to learn the Bellman-Ford iteration step.
-
-
-
-## Conclusion
-
-In this project, we compared MLPs, Transformers, and GNNs in solving graph-related problems, with a focus on the shortest path in Erdos-Renyi graphs. Our findings indicate GNNs excel in such tasks due to their specialized architecture. However, the adaptability of Transformers, particularly with architectural modifications like attention masks, positional encodings, and skip connections, is a significant discovery. While these models showed promise, larger MLP and Transformer models faced convergence issues, highlighting the need for better hyperparameter optimization in future work.
-
-Transformers’ theoretical success in approximating the Bellman-Ford algorithm, verified by empirical results, suggests potential in a subset of dynamic programming (DP) problems where DP updates are simple and manageable by attention heads. However, their capability is inherently limited compared to the theoretically more versatile GNNs, due to the softmax and linear combination constraints in attention mechanisms. Future work could delve into designing Transformer models with enhanced attention mechanisms, potentially broadening their applicability in complex DP problems. Investigating the synergy between Transformers and GNNs could also lead to innovative hybrid models.
-
-Overall, our exploration sheds light on the potential of Transformers in graph-related tasks, suggesting they could offer valuable insights and solutions, alongside the more established GNNs. This finding could open up interesting possibilities for research and innovation in neural network applications, particularly in solving complex graph-related challenges.
-
-
-## Appendix
-
-### Appendix A.
-
-We present here a mathematical proof of how the Graph Transformer Architecture can learn the Bellman-Ford update in the Shortest Path Problem.\\
-We consider a slightly different tokenization: for every node $i$, at layer $k$, we encode its information in a tensor of the form:
-
-$$t_i^k = [\mathbb{1}_i, w_i, d_i^k]$$
-
-where $\mathbb{1}_i \in \mathbb{R}^n$ is the positional encoding, $w_i \in \mathbb{R}^n$ the edges weights and $d_i^k$ the current shortest distance computed at layer $k$.
-
-Recall the formula of query-key-value attention:
-
-$$t_i = \frac{\sum_{j} \exp^{-q_i' k_j / \sqrt{2n+1}}v_j}{\sum_{j} \exp^{-q_i' k_j / \sqrt{2n+1}}}$$
-
-Set up the weights matrices as:
-
-$$\begin{cases}
-W_Q = \begin{pmatrix}
-    I_{n+1} & O_{n \times n+1} \\
-    1_n & 0_{n+1}
-\end{pmatrix}\\
-W_K = \begin{pmatrix}
-    O_{n+1 \times n} & I_{n+1}
-\end{pmatrix}\\
-W_V = I_{2n+1}
-\end{cases}$$
-
-so that $$q_i' k_j = w_{j,i} + d_j$$ *i.e.* attention is determined by the update values of the Bellman-Ford equation.
-
-Hence taking the softmax - and if necessary augmenting the weights of the matrices by a common factor -, we have the ouput $$t_{j^\star}$$ for the appropriate node $$j^\star = \text{argmin}_j \{w_{j,i} + d_j\}$$.
-
-Notice that in this configuration $$t_{j^\star}$$ is not enough to retrieve the desired edge weight $$w_{i, j^\star}$$ : we need the positional encoding from node $i$.
-
-The skip-connection achieves this, by concatenating original input $t_i$  with attention output $$t_{j^\star}$$. We can then retrieve the desired value $$w_{j^\star,i} + d_{j^\star}$$ with the MLP of layer $k$, which concludes the proof
-
-
-
diff --git a/_posts/2023-11-08-increasing-context-length-for-transformers.md b/_posts/2023-11-08-increasing-context-length-for-transformers.md
deleted file mode 100644
index a4305dcb..00000000
--- a/_posts/2023-11-08-increasing-context-length-for-transformers.md
+++ /dev/null
@@ -1,218 +0,0 @@
----
-layout: distill
-title: Increasing Context Length For Transformers
-description: How can we make attention more efficient?
-date: 2023-11-08
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Annie Wang
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-increasing-context-length-for-transformers.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Literature Review
-  - name: Methodology
-  - name: Experiments
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-### Introduction
-
-Since its release on November 30, 2022, ChatGPT has assisted users around the world with a variety of document parsing and editing tasks. These tasks often require large input contexts, since the documents and texts passed into ChatGPT's source model, GPT-3.5, can be several pages long.
-
-Like many other language models, GPT-3.5 is a unidirectional transformer that uses the self-attention mechanism. But while self-attention is an extremely powerful mechanism, it is also expensive in its time and space complexity. Standard self-attention requires $O(n^2)$ operations in terms of the sequence length $n$, since the $QK^T$ term within the attention mechanism calculates and stores the attention of each of the $n$ tokens with $O(n)$ other tokens.
-
-Unfortunately, the $O(n^2)$ complexity makes long input contexts difficult for transformers to handle efficiently. Over the past few years, researchers have been investigating ways of mitigating the $O(n^2)$ factor. This remains an ongoing problem, with several papers released on the topic in 2023 alone.<d-cite key="xu2023retrieval"/><d-cite key="yang2023longqlora"/><d-cite key="peng2023yarn"/><d-cite key="yang2023revisiting"/><d-cite key="mohtashami2023landmark"/> In this post, we provide an overview of existing strategies for increasing context length for transformers. We also propose and investigate our own efficient self-attention algorithm, which we call Gaussian attention.
-
-### Literature Review
-
-In the past, large context lengths were handled using a simple partition scheme. Essentially, long inputs can be split into fixed-length chunks, where attention is computed separately for each chunk. Then, for chunk size $b$, a sequence of length $n$ requires only $O\left(\frac{n}{b} \cdot b^2\right) = O(nb)$ time to compute. However, this method has a major drawback in that information cannot be shared across partitioned blocks, leading to the fragmentation problem: the model lacks long-term dependencies and thus runs into cases where it lacks the necessary context to make accurate predictions.
-
-Modern methods for reducing context lengths in transformers generally try to avoid this problem by either introducing ways of sharing context across partitions or reducing self-attention calculation cost by using a simpler approximation. Models that fall into second category may utilize one of many different approximation techniques, such as sparse attention matrices and fixed attention patterns.<d-cite key="tay2022efficient"/><d-cite key="LIN2022111"/> We present a brief summary of existing efficient transformers.
-
-#### Sparse Transformer
-Child et al. proposed a sparse transformer that reduces attention calculation cost from $O(n^2)$ to $O(n\sqrt{n})$.<d-cite key="child2019generating"/> To achieve this, the sparse transformer uses a combination of strided and local attention patterns. 
-{% include figure.html path="assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al.png" class="img-fluid" %}
-<div class="caption">
-  Standard attention matrix (left) vs. strided attention matrix (right). Only attention values for the blue squares are computed.
-</div>
-
-One attention head processes a local window of size $k$ surrounding the current token $i$, while a second attention processes tokens $j$ such that 
-
-$$(i - j) \mod l = 0 \qquad \forall j \leq i,$$
-
-where $l$ is a parameter chosen to be close to $\sqrt{n}$. Since only $O(l)$ tokens are attended upon for each token $i$, this results in the $O(n \cdot l) = O(n\sqrt{n})$ runtime. Child et al. showed that the sparse transformer can be applied to a wide range of fields, including image, text, and music, where it can be used to possess audio sequences over 1 million timestamps long.
-
-#### Longformer
-Longformer<d-cite key="beltagy2020longformer"/> applies a dilated sliding window to capture local attention patterns and reduce overall attention cost to $O(n\cdot{w})$ for window size $w$. Across successive attention layers, gaps are placed between different elements of the sliding window—thus expanding the receptive field to thousands of tokens even for small dilation factors. In order to generalize to different language modeling tasks, Longformer introduces global tokens that attend upon every other token. These global tokens are analogous to the different input representations used by language models for different tasks; for example, BERT appends a ```<CLS>``` token to the start of every input in classification tasks. Despite using sparse attention contexts, Longformer was able to outperform state-of-the-art model RoBERTa on several long document benchmarks.
-
-#### BigBird
-BigBird<d-cite key="zaheer2021big"/> combines three different fixed attention patterns to achieve $O(n)$ complexity, being
-1. Global attention, consisting of tokens that attend upon every other token
-2. Local attention, consisting of a sliding window around each token 
-3. Random attention, consisting of randomly-selected tokens
-
-Using this architecture, BigBird managed to increase maximum transformer context lengths by up to 8x. In the same paper, Zaheer et al. proved that certain sparse transformers are computationally equivalent to transformers with full attention. Theoretically, sparse transformers are capable of solving all tasks that full transformers can solve; this explains why sparse transformers are often a good approximation for full transformers.
-
-#### TransformerXL
-TransformerXL differs from the previously discussed models, as it doesn’t increase self-attention efficiency by sparsifying the attention matrix.<d-cite key="dai2019transformerxl"/> Instead, it retains the classic partitioning scheme and attempts to overcome the fragmentation problem via a recurrence-based approach. Using the recurrence mechanism, hidden-state sequences are stored and cached so they can be used for additional context when the model processes the next statement. Overall, this architecture allows the network to use historical information to process new information. As a result, it can support longer-range dependencies without leading to context fragmentation. TransformerXL was able to operate on sequences up to 450% longer than those of vanilla transformers, while being up to 1800 times faster; in addition, it achieved SOTA results on the datasets `text8`, `Penn Treebank`, and `WikiText-103`.
-
-#### Landmark Tokens
-More recently, Mohtashami et al. suggested using landmark tokens to determine which tokens should be attended to.<d-cite key="mohtashami2023landmark"/> Inputs are divided into a series of fixed-length blocks, and each block is associated with a landmark token. In particular, this architecture is designed so that a high attention score on any individual token within a block also leads to a high attention score on the block’s “representative vector”—which is the landmark token itself. Then, during inference, the transformer retrieves the $k$ blocks corresponding to the $k$ highest-valued landmark tokens and attends only upon the tokens in this block. Mohtashami et al. claimed that this architecture can extend the context length of Llama to more than 32k tokens, allowing it to support inputs of the same length as GPT-4.
-
-#### VisionTransfomer
-Most of the models discussed above apply specifically to transformers used for language modeling. However, algorithms for reducing attention complexity have been successfully used for other tasks as well. For example, VisionTransformer managed to achieve SOTA performance while limiting the attention context to a 16x16 patch around each pixel.<d-cite key="dosovitskiy2021image"/>
-
-#### Hardware Methods
-Aside from algorithm-based techniques, there have also been attempts to make basic transformer algorithms run faster on existing hardware. Although sparse attention algorithms may have better time complexity, they may not achieve practical speedups due to hardware inefficiencies. In order to achieve practical speedups on transformer training, Dao et al. proposed FlashAttention, an I/O-aware attention algorithm that implements the basic attention computation.<d-cite key="dao2022flashattention"/> FlashAttention achieves speedups of up to 15% on BERT-Large, showing that efficient transformers do not necessarily need to use approximate attention algorithms.
-
-#### Other Methods
-Numerous other algorithms for extending transformer context lengths have been proposed, including retrieval-based methods<d-cite key="borgeaud2022improving"/>, low-rank approximations<d-cite key="wang2020linformer"/>, and expectation-based methods<d-cite key="ren2021combiner"/>.
-
-### Methodology
-To see what types of context reduction algorithms are effective, we propose and test our own efficient transformer. We investigate whether transformers using Gaussian-distributed fixed attention patterns can perform as well as standard transformers. For each self-attention layer, we sample a Gaussian random distribution to determine which elements of the attention matrix we should compute. We analyze this approach for the unidirectional language modeling case, where the goal is to predict the next token of a given input sequence.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3.png" class="img-fluid" %}
-    </div>
-</div>
-
-<div class="caption">
-  Examples of Gaussian attention masks with $c=5$ and inputs of length 10.
-</div>
-
-In language modeling, the most important context for predicting a new token often comes from examining the tokens that immediately precede it. Previous work has taken advantage of this pattern by employing fixed local attention patterns, such as the sliding window pattern used by BigBird. For token $i$, random samples from a truncated Gaussian distribution with mean $i$ and standard deviation $\sigma = \frac{\mu}{2} = \frac{i}{2}$<d-footnote>This means that 0 is two standard deviations from the mean $i$.</d-footnote> will produce values $j$ close to $i$ with high probability. This implies that we will likely calculate the attention scores for some local region of each token $i$, allowing the model to account for important local context connections.
-
-On the other hand, it may also be possible that some distant token $j$ has a large impact on the prediction of token $i$. For example, if you pass in a document in which the first sentence defines the overall purpose of the document, we might need to pay attention to this sentence even in later sections of the document. Fixed-pattern Gaussian attention allows for this possibility by calculating attention scores for $i$ and distant tokens $j$ with a lower but still nonzero probability. As a result, Gaussian attention offers some flexibility that may not be present in other fixed-pattern attention mechanisms, such as the sliding window technique.
-
-#### Algorithm
-The model takes a hyperparameter $c$, where $c$ is the number of tokens that each token attends upon. For every token $i$ in each self-attention layer, we select $c$ tokens from the Gaussian distribution $\mathcal{N}(i, i/2)$, where $\mathcal{N}$ is truncated at $0$ and $i$. Since our task focuses on the casual language modeling case, a token $i$ computes attention scores only for tokens $j<i$. Truncation ensures that every $i$ attends to exactly $\min(c, i)$ tokens.<d-footnote>If $c$ is greater than the number of tokens in range $[0,i]$, the result is to sample every taken from $[0,i]$.</d-footnote> To match sampled random numbers with actual token indexes, we cast each random number $x$ to index $i = \lfloor{x}\rfloor$. In the case of duplicate indexes, we assign each duplicate index to the nearest unused index in range $[0,i]$. This algorithm is summarized below.
-```
-for each token i:
-  sample min(c, i) values from N(i, i/2)
-  create list of indices by flooring every sampled value
-  remove duplicates assigning duplicates to the nearest unused index
-  # such an assigment always exists by pigeonhole principle
-```
-
-For each token $i$, we set all attention values for tokens which are not selected to zero. As a result, each token attends only on at most $c$ tokens, leading to an overall cost of $O(c \cdot n) = O(n)$ for constant $c$.
-
-### Experiments
-Since we had limited training resources, we unfortunately couldn't test Gaussian attention on large models like BERT or GPT. Instead, we used a toy study involving small models with smaller inputs—this leads to some additional considerations in analyzing our results, which we address later.
-
-We first tested whether models trained with limited Gaussian attention can achieve similar performance as models that were trained on full self-attention. We trained models with $c = 5$ and $c=10$ and compared them to the performance of the base model. For our base experiments, we used three self-attention heads per layer and six layers in total.
-
-Our evaluation metric for all models was next-token cross-entropy loss against a corpus of Shakespeare texts.Training is optimized with Adam and a learning rate of 0.0001.
-
-Base experiment results are shown below.
-
-| Model    | Epochs | Training Loss | Validation Loss |
-|----------|--------|------------|-----------|
-| Base     | 80     | 4.2623     | 4.4390    |
-| Base     | 130    | 3.7709     | 4.0320    |
-| Base     | 140    | 3.7281     | 3.9964    |
-| $c = 5$  | 80     | 3.7458     | 4.0355    |
-| $c = 10$ | 80     | 4.1619     | 4.3801    |
-
-{% include figure.html path="assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss.png" class="img-fluid" style="width:150px; height:100px;" %}
-
-We found that both the $c=5$ and $c=10$ models were able to achieve similar performance as the base model, which suggests that Gaussian attention may be a good approximation for full attention. Interestingly, both Gaussian models required significantly fewer epochs to reach the same performance as the base model. Both Gaussian models also demonstrated faster separation between training and validation losses. We hypothesize that the smaller attention context helps focus learning on more relevant tokens, which lowers the number of training epochs needed. As a result, the model is able to learn the language modeling task more rapidly, leading to faster overfitting.
-
-Although initial results were promising, we chose to investigate a few factors that could have inflated model performance.
-
-In order to determine whether the Gaussian attention models are affected by input length, we tested the same setups with longer inputs. Our base experiments used relatively small inputs, each corresponding to one piece of dialogue in a Shakespeare script. On average, these inputs were approximately 30 tokens long; with $c = 5$, the selected context may be more than $\frac{1}{6}$ of the total tokens. As a result, Gaussian model accuracy might be inflated for small inputs, since the context essentially covers a large portion of existing tokens. To make $c$ a smaller fraction of the input length, we modified the dataset instead to create inputs with an average length of 100 tokens. We summarize the results in the table below.
-
-| Model    | Epochs | Training Loss | Validation Loss |
-|----------|--------|---------------|------------|
-| Base     | 90     | 5.5906        | 5.6207     |
-| $c = 5$  | 90     | 5.5769        | 5.6166     |
-| $c = 10$ | 90     | 5.6237        | 5.6565     |
-
-With the longer input contexts, all three models had worse performance when trained for the same number of epochs. However, both Gaussian models managed to achieve approximately the same loss as the original model. This again suggests that Gaussian attention is a valid approximation of the standard attention matrix.
-
-We further investigated whether the performance of the Gaussian models degraded rapidly when using a smaller number of layers and attention heads. Logically, increasing the number of attention heads would help mask bad attention patterns formed by the Gaussian sampling strategy. For example, although the sampling process selects tokens $j$ near token $i$ with high probability, it is possible that some attention head $x$ does not select the relevant tokens for a token $i$. With the addition of more attention heads, a different head may compensate for the bad head by operating on the correct tokens. Increasing the number of attention layers similarly increases the number of attention heads, where good heads can compensate for bad ones. Experiments showed that even with one layer and one attention head, the Gaussian models were able to achieve approximately the same performance as the base model.
-
-| Model    | Input Type | Epochs | # Heads | # Layers | Training Loss | Validation Loss |
-|----------|------------|--------|---------|----------|---------------|-----------------|
-| Base     | Short      | 80     | 1       | 1        | 5.1009        | 5.1605          |
-| Base     | Long       | 80     | 1       | 6        | 5.5994        | 5.6289          |
-| Base     | Long       | 90     | 1       | 6        | 5.5906        | 5.6207          |
-| $c = 5$  | Short      | 80     | 1       | 1        | 5.0481        | 5.1139          |
-| $c = 5$  | Long       | 80     | 1       | 6        | 5.5884        | 5.6273          |
-| $c = 5$  | Long       | 90     | 1       | 6        | 5.5769        | 5.6166          |
-| $c = 10$ | Short      | 80     | 1       | 6        | 4.5597        | 4.6949          |
-| $c = 10$ | Short      | 90     | 1       | 6        | 4.5432        | 4.6809          |
-| $c = 10$ | Long       | 80     | 1       | 6        | 5.6345        | 5.6666          |
-| $c = 10$ | Long       | 90     | 1       | 6        | 5.6237        | 5.6565          |
-
-However, we noticed that with fewer heads and layers, the base model trained at approximately the same rate as the Gaussian model. A smaller number of attention heads and attention layers implies that fewer parameters need to be updated to learn the task; this typically means that training is faster for smaller models. As a result, it makes sense that a smaller model would benefit less from the increase in training speed that reduced attention context offers; since the model is so small, training is already fast and any decrease in training speed would be minor.
-
-To test the limitations of Gaussian attention, we experimented with extremely sparse attention patterns that selected only one token for each model.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn.png" class="img-fluid" %}
-    </div>
-</div>
-
-Although these models did not perform as well as the base transformer, we found that the token that was attended upon made a significant impact on the final loss. As shown in the table below, the models that employed a diagonal or Gaussian attention pattern performed significantly better than the model that used a vertical attention pattern on the first token. This suggests that local attention patterns were the most important ones for improving the outcome of our task; as a result, Gaussian attention may perform well specifically because it emphasizes the local attention context.
-
-| Model    | Epochs | # Layers | # Heads | Training Loss | Validation Loss |
-|----------|--------|----------|---------|---------------|-----------------|
-| Diagonal | 80     | 1        | 6       | 5.5089        | 5.5400          |
-| Vertical | 80     | 1        | 6       | 5.6652        | 5.6906          |
-| Gaussian | 80     | 1        | 6       | 5.3231        | 5.3744          |
-
-#### Implications and Limitations
-Our experiments showed that Gaussian attention has potential as an algorithm for improving transformer efficiency and increasing context lengths. We note that these experiments may not reflect the algorithm’s actual performance in real-world scenarios. Because we did not have the capacity to train a language model on the scale of BERT or GPT, we experimented only with much smaller models that processed much smaller contexts. As a result, our experimental results may not extend to larger models. Additionally, due to limited training time, we did not train any of the models we used for more than 150 epochs; with more training time, it is possible that the base transformers may outperform the modified ones. In order to generalize to larger models, Gaussian attention may need to be combined with other attention patterns, like global attention. More research is needed to fully understand its potential and shortcomings.
-
-## Conclusion
-
-Today, methods for increasing context length in transformers remains an important research topic. Although researchers have proposed numerous efficient transformers and self-attention algorithms, a concrete solution for increasing transformer context lengths has yet to be found. With recent developments in large language models, the number of tasks that transformers can be applied to is increasing rapidly. As a result, the search for an efficient transformer is more important than ever.
-
-Our work shows that Gaussian distributions can potentially be used to build fixed-pattern attention masks. However, the performance of Gaussian attention masks in larger models remains to be confirmed and requires further study.
-
diff --git a/_posts/2023-11-08-mapreason.md b/_posts/2023-11-08-mapreason.md
deleted file mode 100644
index 5a3e10ff..00000000
--- a/_posts/2023-11-08-mapreason.md
+++ /dev/null
@@ -1,362 +0,0 @@
----
-layout: distill
-title: "Reasoning with Maps: Assessing Spatial Comprehension on Maps in Pre-trained Models"
-description: Map reasoning is an intuitive skill for humans and a fundamental skill with important applications in many domains. In this project, we aim to evaluate the capabilities of contemporary state-of-the-art Large Vision-Language Models (LVLMs) for reasoning on maps and comparing their capabilities with human participants on the coregistration task. We additionally propose and release a novel dataset to serve as an initial benchmark for map reasoning capabilities. We run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on our dataset. Additionally, we show that coregistration is intuitive to human participants that were able to achieve close to perfect accuracy in a time-constrained manner.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Abdulrahman Alabdulkareem
-    affiliations:
-      name: MIT CSAIL
-  - name: Meshal Alharbi
-    affiliations:
-      name: MIT LIDS
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-mapreason.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Motivation
-    subsections:
-    - name: Literature review and the gap in previous literature
-  - name: New Dataset
-    subsections:
-    - name: A glimpse of the coregistration task
-  - name: Experiments
-    subsections:
-    - name: Zero-shot evaluation
-    - name: Fine-tuned evaluation
-    - name: Improving results for co-registration
-    - name: Human benchmarking
-    - name: Analysis on prompt engineering
-    - name: Investigating the failure points of LVLMs on coregistration
-  - name: Discussion, Limitations, and Future Work
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
-  .custom-margin {
-    margin-bottom: 0em !important; /* Adjust the value as needed */
-  }
----
-
-# Motivation
-Humans possess a remarkable ability to intuitively understand and make sense of maps, demonstrating a fundamental capacity for spatial reasoning, even without specific domain knowledge. To illustrate this, consider the following question: Do these two maps represent the same location?
-
-<div class="row mt-3">
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/85_301.png" class="img-fluid z-depth-1" %}
-    </div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/85_302.png" class="img-fluid z-depth-1" %}
-    </div>
-</div>
-
-Answering this query necessitates **coregistration**, the ability to align two maps by overlaying their significant landmarks or key features. Moreover, humans can go beyond mere alignment; they can tackle complex inquiries that demand aligning maps, extracting pertinent data from each, and integrating this information to provide answers.
-
-Maps reasoning is a fundamental skill with important applications in domains such as navigation and geographic analysis. For example, pilots need to be able to reference and understand multiple kinds of FAA charts as a core prerequisite for many aviation-related tasks. Further, making inferences on historical maps that lack digitized versions relies on human capabilities for reasoning on maps and is crucial for various fields such as geology or archeology. Machine learning models that can match human visual map understanding hold substantial promise in these applications. Additionally, such models have the potential to enhance accessibility by providing alternative modalities for individuals with visual impairments to comprehend and extract spatial information from maps.
-
-Our work aims to tackle the following question: To what degree do contemporary state-of-the-art (SOTA) machine learning models, pre-trained on vast datasets comprising millions or even billions of images, possess the capacity for spatial reasoning and do they reach the human level? We will do this specifically by focusing on the task of coregistration.
-
-We propose a map reasoning dataset which we believe is a suitable initial benchmark to test the capabilities of multimodal models on coregistration; The example given above about coregistration possibly cannot be answered directly using prior knowledge a Large Language Model (LLM) might have while ignoring the vision modality. Moreover, the complexity of the task can be increased and controlled, leading to a rigorous evaluation of the model's ability to comprehend and synthesize information across textual and visual modalities.
-
-## Literature review and the gap in previous literature
-
-**Multimodality:** There are countless significant recent advances in Large Language Models (LLMs) achieved by models such as Meta’s Llama 2<d-cite key="llama"></d-cite>, OpenAI’s GPT 3<d-cite key="gpt3"></d-cite>, Google’s Palm 2<d-cite key="palm2"></d-cite>, WizardLM<d-cite key="wizardlm"></d-cite>, and countless others. These models have successfully achieved or surpassed human-level performances on numerous natural language processing tasks (such as Retro-Reader<d-cite key="retro_reader"></d-cite> on the SQuAD2.0<d-cite key="squad2"></d-cite> benchmark, ST-MOE<d-cite key="st_moe"></d-cite> on the SuperGLUE<d-cite key="super_glue"></d-cite> benchmark, and many other benchmarks). LLMs have achieved a surprisingly high level of knowledge about text by being able to achieve very high zero-shot scores on many NLP tasks demonstrating their understanding and versatility in Language as opposed to non-LLM NLP models that are usually trained to accomplish a specific task and do not generalize beyond that task.
-
-Recently there has been a massive push towards integrating other modalities into LLMs, most notably vision. Models such as Google’s Gemini<d-cite key="gemini"></d-cite>, OpenAI’s GPT 4<d-cite key="gpt4"></d-cite>, VisionLLM<d-cite key="vision_llm"></d-cite>, and many others all focus on scaling the capabilities achieved by LLMs in the NLP domain to the vision domain. The language modality LLMs are trained on consists mostly of written text recorded online that follows a grammatical structure from a human language (English, etc.) or a computer language (Python, HTML, Latex, etc). However, this is in stark contrast to the vision modality which can consist of categorically different representations consisting of anything from photographs of people in the park, to a representation of the Silicon Atom from a computer simulation, to an infographic about the importance of drinking enough liters of water per day. This difference between the distribution of the entirety of text modality and the vision modality hints that much more careful attention needs to be placed on compiling unique and diverse datasets that aim at sufficiently representing the distribution of the vision modality.
-
-**Vision-Language Benchmarks:** There were significant strides made in the past years in developing benchmarks and datasets for LVLMs which are composed of questions that require both Language and Vision to successfully answer. However, there are very few datasets that include or focus on maps as part of the benchmark. LVLM-eHub<d-cite key="lvlm_ehub"></d-cite> compiles numerous benchmarks for LVLMs such as benchmarking object and scene recognition using ImageNet1K, GVT, and many other datasets, or benchmarking visual common sense using ImageNetVC and the visual commonsense reasoning datasets. They also include a Visual Reasoning benchmark, however, it focuses mostly on real-world photographs (such as the Visual Spatial Reasoning<d-cite key="visual_reasoning"></d-cite> dataset or the VisDial<d-cite key="visual_dialog"></d-cite> Dataset that is based on images from COCO<d-cite key="coco"></d-cite>). However, none of these datasets place focus or sufficiently contain images of maps or diagrams while they place much more focus on real-world images.
-
-A frequently used dataset for evaluating LVLMs, which is also included in the Visual Reasoning benchmark, is the ScienceQA<d-cite key="science_qa"></d-cite> dataset which includes more than 20 thousand multimodal multiple-choice questions across 127 categories including a category for maps. However, examining the narrow slice of questions with images in the geography/map category shows that many of them do not necessitate a vision component and can be answered solely based on the textual question (e.g. “Which of these states is farthest north?” with four different states as a multiple choice question), or provide an entirely unused visual representation (e.g. providing a map of the United States with no text alongside the question “What is the capital of New York?”). Out of the questions that rely on the visual component to correctly answer, many of them require relatively little understanding of maps such as asking “Which country is highlighted?” which only requires visual matching of the highlighted section with typical shapes of countries or continents. Additionally, recent papers such as LLama-adapter<d-cite key="llama_adapter"></d-cite> have demonstrated that it’s possible to achieve a high accuracy of 78.3% on ScienceQA using an unimodal text-only Large Language Model. Thus, although ScienceQA does have a subsection dedicated to maps, it does not seem sufficiently capable of testing the capabilities of LVLMs to reason and understand maps.
-
-An area closely related to maps that do have a relatively higher degree of focus is the capability of models to parse and reason about diagrams, figures, and plots.  Datasets on this topic include the ACL-FIG<d-cite key="acl_fig"></d-cite> which involves classifying and labeling scientific figures, InfographicVQA<d-cite key="info_vqa"></d-cite> which requires reasoning over data visualizations in infographics, ChartQA<d-cite key="chart_qa"></d-cite> which requires reasoning over charts, and many other datasets that focus on figures are plots. Models have been developed to specifically tackle this challenge, such as Google’s DEPLOT<d-cite key="deplot"></d-cite> which is capable of reasoning over charts and plots by translating them to text and then using an LLM as a reasoning engine on top of the outputted text. However, charts and plots are still significantly different from maps, as the plots these datasets usually contain are simple line charts and bar graphs that can be translated into a table or textual format in a relatively lossless manner, while it is difficult or impossible to perfectly transform a sufficiently detailed map to a textual format without losing information. This illustrates the inherent complexities associated with processing maps meant to depict dense information which requires direct reasoning on the vision modality as opposed to charts and plots which present data in a simple manner.
-
-**Maps Reasoning:** Huge strides have been made in specific tasks related to maps, such as image-to-map<d-cite key="image_to_map"></d-cite> conversion and map segmentation<d-cite key="map_segmentation"></d-cite>. However, we wanted to focus more generally on map understanding and reasoning by LVLMs as opposed to a single task-specific performance. To draw on an analogy, consider the act of isolating specific parts of speech (such as nouns or verbs) in language. A model designed exclusively for this task lacks the comprehensive understanding exhibited by an LLM which is proficient in addressing almost any language task. In the context of map co-location, deep learning models are employed solely as preprocessing steps to extract relevant information for subsequent utilization by matching algorithms as opposed to an LVLM model capable of general reasoning on maps. For example, the authors in this study<d-cite key="intersection_map"></d-cite> use region-based CNN to extract road intersections, which are subsequently input into a map-matching procedure. Other features like street names have also been proposed in the literature<d-cite key="street_name_map"></d-cite>. In general, current frameworks for map reasoning require many hand-crafted and engineered steps (see, e.g., this<d-cite key="aerial_understanding"></d-cite> work and the references within). A recently proposed dataset, MapQA<d-cite key="map_qa"></d-cite>, is closely related to what we consider as map reasoning. However, the maps contained are of sufficiently low information-density and exhibit similar limitations to what we described in InfographicVQA and ChartQA as the images provided can be sufficiently translated to a textual domain before considering the textual input using a similar technique to DEPLOT. To the best of our knowledge, there are no examples in the literature where LVLMs are used to directly reason about maps at the detail we propose and perform tasks such as coregistration.
-
-Our aim is to tackle the gap in assessing the map reasoning capabilities of LVLMs by developing a dataset aimed only at coregistration and analyzing the capabilities of existing models on such a dataset We focus our benchmark construction on the specific task of coregistration as it serves as an indicator of map reasoning capabilities and is one step towards constructing a comprehensive benchmark for map reasoning capabilities of LVLMs. 
-
-<!-- ############## -->
-
-<br/>
-
-# New Dataset
-
-We have opted to create and compile a map dataset focusing on maps from the aviation domain for our research. The maps we utilized are carefully crafted by aviation agencies to provide a wealth of information while maintaining readability within a concise timeframe, ensuring clarity for pilots. Our dataset will be constructed by incorporating maps from the following sources:
-
-1. **World Visual Flight Rules (VFR):** These maps are intended to guide pilots when they operate aircraft visually. They include aeronautical and topographic information such as airports, obstructions, and navigation aids.
-
-2. **World Instrument Flight Rules (IFR) Low** These maps are suitable to assist pilots when they control the aircraft through instruments. They contain information such as cruising altitudes, route data, and controlled airspaces.
-
-These maps are accessible in an interactive environment through the SkyVector website ([VFR](https://skyvector.com/?ll=42.3525,-71.025833333&chart=301&zoom=2), [IFR Low](https://skyvector.com/?ll=42.3525,-71.025833333&chart=302&zoom=1)), which we used as part of our dataset generation pipeline.
-
-To generate the map snippets for our experiment, we chose to sample from the previous map sources around airports. This selection guarantees that the snippets are inherently information-rich, given that the map originates in the aviation domain. To ensure diversity in our dataset, we specifically sampled airports situated in the states of Massachusetts, New York, Delaware, Arizona, and Hawaii. 
-
-The resulting dataset exhibits significant variations in terms of density, featuring both isolated airports and those nestled within cities, diverse locations such as inland, seaside, and islands, as well as various terrain types ranging from greenery landscapes, mountainous regions, and arid environments. In total, our dataset contains 1185 image pairs, each image is 600x600 pixels in PNG format. The total size of our dataset is 1.28 GB.
-
-## A glimpse of the coregistration task
-To gain an understanding of our task and its intricacies, we present a few examples from our dataset. Generally, humans can successfully align two maps by identifying common features, which fall into one of the following categories:
-
-- **Terrains:** such as shorelines or mountains.
-- **Charts:** such as flight paths or restricted airspaces.
-- **Landmarks:** such as airport or city names.
-
-The process of mapping by terrain is typically swift for humans, especially when there are ample distinctive details. On the other hand, mapping by chart requires a more thoughtful approach, involving careful examination to establish a connection between the depicted attributes. Mapping by names usually serves as a last resort, employed if the prior approaches prove unsuccessful. Consider the following examples:
-
-
-<div class="row mt-3">
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/43_301.png" class="img-fluid z-depth-1" %}
-    </div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/43_302.png" class="img-fluid z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Positive Pair #1: A VFR map (left) and an IFR map (right) that depict the same location (Port Allen Airport in South Kauai, Hawaii) and can be coregistered easily by the shorelines of the island.
-</div>
-
-
-<div class="row mt-3">
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/1038_301.png" class="img-fluid z-depth-1" %}
-    </div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/1038_302.png" class="img-fluid z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Positive Pair #2: A VFR map (left) and an IFR map (right) that depict the same location (Circle H Ranch Airport in Arizona) and can be coregistered by aeronautical lines (even if one does not know what these lines represent in the aviation domain).
-</div>
-
-
-<div class="row mt-3">
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/779_301.png" class="img-fluid z-depth-1" %}
-    </div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/779_302.png" class="img-fluid z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Positive Pair #3: A VFR map (left) and an IFR map (right) that depict the same location (Bertrand Chaffee Hospital Heliport, New York). The VFR map is feature-rich providing information for Helicopter pilots while the IFR map is a hard example due to being almost blank as the Heliport does not facilitate landing any IFR aircraft. Thus, the IFR map only depicts certain names of cities and waypoints. The two maps can be coregistered by matching the name SPRINGVILLE.
-</div>
-
-
-All of these examples are positive (the maps show the same location). We showcase below negative examples with varying complexity.
-
-
-<div class="row mt-3">
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/51_301.png" class="img-fluid z-depth-1" %}
-    </div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/223_302.png" class="img-fluid z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Negative Pair #1: An easy and straightforward negative for humans due to the VFR map depicting a shoreline crossing the map vertically, while the IFR depicts only a landmass.
-</div>
-
-
-<div class="row mt-3">
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/1037_301.png" class="img-fluid z-depth-1" %}
-    </div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/1013_302.png" class="img-fluid z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Negative Pair #2: A harder negative example. Aeronautical lines partially appear similar but the locations are different. Naively matching circles on both maps to classify a positive pair would provide a false positive.
-</div>
-
-We showcase multiple positive and negative pairs alongside the natural reasoning that a human would take to correctly classify the pairs. We hope that this showcases the complexity of the task and the various strategies involved in achieving successful coregistration.
-
-<!-- ############## -->
-
-<br/>
-
-# Experiments
-
-## Zero-shot evaluation
-
-To start, we want to evaluate the zero-shot performance of pre-trained LVLMs on the task of identifying whether the two images are the same (coregistration). The models we start our evaluation with are BLIP-2<d-cite key="blip"></d-cite>, ViLT<d-cite key="vilt"></d-cite>, LXMERT-VQA, and LXMERT-GQA<d-cite key="lxmert"></d-cite>. We specifically chose these models as they are all publicly available multimodal text generative models that were partly trained on visual question-answering datasets. Thus, they are able to accept both the vision and language inputs consisting of an image of the two side-by-side maps alongside the yes-or-no question of whether the two maps depict the same geographical location.
-
-To verify that the models we obtained are behaving as expected and are capable of answering a textual question that relies on a visual component, we compile a very simple dataset of 200 cat and dog pictures, half the images depict a cat while the other half depict dogs. We present these trivial images to the models alongside the prompt “Is this an image of a cat? Answer:” and generate a single token. As expected, out of the 200 images all four models achieved an almost perfect classification accuracy (>95% for all 4 models) by answering with either a “Yes” or a “No” token.
-
-This is not surprising because, as mentioned, object recognition questions are very prevalent in visual question-answering datasets, especially on ubiquitous everyday objects such as cats and dogs. To see if these models can generalize beyond their training datasets and properly reason on maps, we start by running the following experiment:
-
-**Experiment #1:** For each VFR and IFR image pair, we generate two examples (positive and negative). For the positive example, we use the correct pairing (e.g., maps from the same location with the two different styles). For the negative example, we randomly replace one map uniformly from our datasets. Each model is provided with a concatenation of the two maps in its vision input, and with the question “Do these two maps show the same location? Answer with "Yes" or "No". Answer:” in its text input.
-
-In total, each model was asked 2370 questions. Below, we show the accuracy, precision, and recall that each model obtained.
-
-<div class="row mt-3 align-items-center">
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/experiment_01_01.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/experiment_01_02.png" class="img-fluid" %}
-    </div>
-</div>
-
-The models performed barely above random guessing in the zero-shot experiment, and some models consistently produced the same single output (either “yes” or “no”) regardless of whether the input image was a positive or negative pair.
-
-While the results of the models are very low and barely above random guessing, we wanted to analyze whether this failure is due to the model not comprehending the task or whether the issue is simply in the last layer of the model where the text generation occurs. The reason behind this analysis is that there is a possibility that the LVLM is able to correctly capture all the features necessary for determining whether the two maps coregister while still failing at providing the final answer due to the final layer of the model outputting an incorrect distribution over the labels (or tokens in the case of LVLMs). Thus we decide to ignore the last linear layer of the model (the language model head) and capture the hidden state of the last token from the last layer of the model. 
-
-## Fine-tuned evaluation
-
-Using this methodology, the output we obtain from each model is a single embedding vector (the length of which depends on the embedding size of the model). Usually, a single linear layer is finetuned on the last layer to directly predict the answer. However, we opt for a more detailed analysis by training multiple classifiers (Logistic Regression, SVM, and XGBoost) that take the embedding vector and produce a binary output. In all the upcoming figures, we always report the results using the classifier that performed the best (for each model) on the validation set. 
-
-Moreover, it is known that LLMs can be sensitive to prompts<d-cite key="yang2023large,yang2022prompt"></d-cite>. Thus, to more rigorously analyze the performance of the models, we create a total of 8 variations of the coregistration question, as this one question serves as the text input for all pairs of images in our created dataset. A detailed analysis of these prompts will be provided later. Similar to the classifiers, we report results using the best prompt for each model.
-
-This methodology of using the model to extract a rich embedding that contains the answer to our prompt (instead of generating the answer directly as text) means that we are now capable of utilizing additional large transformer-based multimodal models that output an embedding vectors instead of directly outputting text. Thus we include in our analysis two such models which are FLAVA<d-cite key="flava"></d-cite> and GIT<d-cite key="git"></d-cite> that output embeddings that represent the multimodal input. Those embeddings are then similarly passed to the simple classifiers to detect whether the models are capable of capturing the required details from the maps to answer the co-location task correctly. Thus we perform our second experiment:
-
-**Experiment #2:** We generate the examples using the same procedure described in Experiment #1. Then, for each model, we pass through the entire dataset and generate an embedding for each sample in our dataset. We then train the simple classifiers on 70% of the embedding vectors to predict the positive and negative pairs. We report the evaluation on the other 30% data and report the results in orange below.
-
-<div class="row mt-3 justify-content-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/experiment_02_01.png" class="img-fluid" %}
-    </div>
-</div>
-
-The accuracy of this fine-tuning methodology (shown in orange) achieves around 65%-70% for all models which is a significantly higher accuracy compared to the zero-shot accuracy of the LVLMs (shown in red) which was incapable of achieving more than 55%. This experiment shows that the embedding of the last token does contain a slightly more feature-rich representation of the multimodal input and can be used to classify the positive/negative pairs at a higher rate than random but is overall still incapable of sufficiently solving the task.
-
-Thus far we have tried to assess the capabilities of LVLMs and (more generally) Multimodal Vision Language models on solving the coregistration task, and we assessed this capability using our constructed dataset of determining whether two maps of different styles represent the same location or not. Given the low accuracy achieved on this task, we can claim that the LVLMs we have analyzed are incapable of reasoning and answering more complicated questions relative to our simple baseline question of “Are these two maps of the same location”
-
-## Improving results for co-registration
-
-We emphasize that our goal is not to directly achieve high accuracy on this task by utilizing any machine learning model, but rather it is to evaluate the capabilities of LVLMs to reason on maps. Furthermore, we created and proposed this dataset and task to act as a baseline for assessing the reasoning abilities of LVLMs on maps.
-
-However, despite the failure of LVLMs to answer this baseline task, we next want to assess the inherent difficulty of the dataset. For this, we develop a simple model by utilizing the same simple classifiers used above to train on the embedding of a unimodal vision-only model. Unlike LVLMs, we are not testing our proposed task-specific model on the dataset to assess its capabilities for reasoning on maps, as the model is not trained to answer questions based on images, does not accept text modality, and is specifically fine-tuned to solve this one narrow task. Thus, the results of this experiment serve only to give a sense of the difficulty of the task that we considered as a simple baseline for map reasoning. This will hopefully demonstrate that the relatively older frozen vision-only models can achieve a significantly higher accuracy on this specific task when compared to state-of-the-art open-source LVLMs and possibly indicating the gap between the embeddings captured by the vision-only model and the LVLMs.
-
-**Experiment #3:** We develop a simple unimodal vision classification model by utilizing a frozen CLIPVIsion model as a backbone. First, we feature-engineer the input by subtracting the two maps from each other in the image space to produce a single image. This image is passed through the frozen CLIPVision model to generate an embedding of the difference between the maps, the embeddings are then used to train the simple classifiers mentioned above and the one that achieves the highest accuracy on the validation set is reported below.
-
-<div class="row mt-3 justify-content-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/experiment_03_01.png" class="img-fluid" %}
-    </div>
-</div>
-
-We see that our fine-tuned vision model (shown in green) achieves a significantly higher accuracy than all previously tested LVLMs. This shows that the task is not a significantly difficult vision task as a frozen CLIPVision model with a head fine-tuned on approximately two thousand samples was able to sufficiently extract an embedding and correctly distinguish positive and negative pairs 85% of the time.
-
-This significant difference between the accuracy of the frozen CLIP model and the LVLMs on this task signifies that the LVLMs we tested are still significantly farther behind on certain tasks even when compared to a frozen vision-only model that was trained and released years prior. This is in stark contrast to the significant achievements that LLMs accomplish on numerous datasets when compared to task-specific NLP models, where the highest-scoring models on most NLP datasets are LLMs.
-
-## Human benchmarking
-
-So far, we have examined the performance of pre-trained LVLMs on our proposed dataset in a zero-shot as well as a fine-tuned manner alongside a vision-only model with feature engineering to assess the difficulty of the task. 
-
-A natural next question to analyze is the performance of humans on this same task as it is not immediately clear how hard or easy the task is for us. The performance achieved by humans on a task such as this would serve as a great target for LVLMs to try to reach.
-
-**Experiment #4:** We present the following task to two subjects. Each human subject will see two maps for 10 seconds. The pair can be positive or negative with equal probability. After the 10 seconds elapse, the maps automatically disappear and the human subject is asked if the two maps show the same location with a binary “Yes” or “No” choice. After the answer is received, a new pair is sampled and this process is repeated until we gather 50 answers from each human subject.
-
-The 10-second window acts as a pseudo-computational limit on the human subject and ensures that the subject's answers are mostly based on visual and spatial reasoning and not on reading and comparing text. If the subject does not immediately identify a visual or spatial cue, the 10-second window possibly allows for a maximum of one or two texts to be compared if the subject is quick enough. This time limitation prevents the participants from spending an extensive amount of time comparing the nuances of the two images for a severely long time which would make the task more trivial. Below, we show the accuracy obtained from two human subjects and compare it with the previous LVLM results.
-
-<div class="row mt-3 justify-content-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/experiment_04_01.png" class="img-fluid" %}
-    </div>
-</div>
-
-We see that both human participants (shown in blue) achieve a significantly higher accuracy (~95%) compared to all the tested ML models. This shows that the task is significantly easier for humans despite the 10-second time limit preventing the subject from extensively comparing the images.
-
-Our experiments showcase the inability of LVLMs to properly solve our proposed dataset on coregistration as well as showing that a vision-only fine-tuned model with feature-engineering is able to solve the task at a significantly higher accuracy. Finally, we show that humans are able to solve the time-constrained task with a significantly high accuracy.
-
-## Analysis on prompt engineering
-
-Numerous recent studies have indicated the importance of prompt engineering in the quality of the output of Large-Transformer based models<d-cite key="yang2023large,yang2022prompt,white2023prompt, zhou2022large"></d-cite>.
-
-Due to the potential importance of prompts in affecting performance, we decided to run all experiments that require prompts using multiple different prompts with varying degrees of length and complexity. We note that the prompts considered and listed below were only the ones that consistently conditioned the model to output a “Yes” or “No” output token instead of any other arbitrary sentence completion output. The prompts are shown in the following table:
-
-<br/>
-
-| ID    | Prompt | 
-| :---: | :----- | 
-| 1     | Are these two maps the same? Answer with "Yes" or "No". Answer: |
-| 2     | Do these two maps show the same location? Answer with "Yes" or "No". Answer: |
-| 3     | Do the two charts depict the same area? Answer:" |
-| 4     | The following image contains two maps with different styles side by side. Do the two maps show the same location? Answer with "Yes" or "No". Answer: |
-| 5     | On the left there is a map from the VFR dataset and on the right a map from the IFR dataset. Do the two maps show the same location? Answer with "Yes" or "No". Answer: 
-| 6     | There are two maps of different styles, do they represent the same area or are they completely different? Answer: |
-| 7     | The following image contains two maps with different styles side by side. Do the two maps show the same location? Try to compare the maps by looking at key landmarks or features. Answer with "Yes" or "No". Answer: |
-| 8     | Carefully examine the following two images that contain two maps with different styles side by side. Do the two maps correspond on the same latitude and longitude point? It is of utmost importance that you answer this correctly. Answer with "Yes" or "No". Answer: |
-
-<br/>
-
-The initial prompts (prompts #1 - #3) are meant to be short and direct, while the ones in the middle (prompts #4 - #6) are more verbose and add a bit more complexity, while the last two (prompts #7 - #8) are very verbose and add an exact explanation of the task. We also include additions to some of the prompts that try to guide the models on how they accomplish the task, and some additions that emphasize the importance of correct answers. In the figure below, we study the effect of prompts on model performance.
-
-<div class="row mt-3 justify-content-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/experiment_05_01.png" class="img-fluid" %}
-    </div>
-</div>
-
-We notice that varying the prompts has a relatively high variance in terms of accuracy with an improvement of less than 5% for all models across all prompts. Still, there are no strong general trends across models when considering prompts with increasing complexity. We note that the VILT model was incapable of accepting prompts #5 - #8 due to the limitation of its maximum context length which is shorter than the other models.
-
-One aspect that might limit this analysis is that almost all prompts contain an explicit requirement for the models to provide answers immediately (e.g., “Answer with ‘Yes’ or ‘No’. Answer:”). This was done to reduce the computational inference cost and avoid generating long sequences of texts. The models might respond better to some prompts if they were allowed to reason about their answers first.
-
-
-## Investigating the failure points of LVLMs on coregistration
-
-The figures presented in the beginning of the blog post demonstrating some examples in our proposed dataset give a clue of the variance in the difficulty of the examples in the dataset, where some samples are easy to identify as positive pairs and others much harder to do so.
-
-Thus, to get a better insight into the model's performance and investigate its failure points, we investigate some examples where the models made confidently wrong predictions. Here, we focus on a single model, FLAVA, which was our best-performing LVLM. In the figure below, we investigate both false positives with the highest predicted positive label and false negatives with the highest predicted negative label. The figure contains the 9 examples where the model generated a very high (very low) score while the true label was positive (negative).
-
-<div class="row mt-3">
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/experiment_06_01.png" class="img-fluid" zoomable=true %}
-    </div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-mapreason/experiment_06_02.png" class="img-fluid" zoomable=true %}
-    </div>
-</div>
-
-For the false positives, we see more than one example where two maps containing water were wrongly classified. This might indicate that the model is making predictions on these images based on colors more so than spatial reasoning. For the false negatives, there are many examples where the VFR chart is dense while the IFR is sparse. These examples require discarding a lot of information from the VFR charts and focusing solely on the region where the IFR chart contains information. Given that the model made wrong decisions in these examples, there might be a preference for positively matching images based on density. Notably, some of these examples were straightforward for the human subjects (matching based on the shoreline), while other examples required more effort (matching between dense and sparse maps).
-
-<br/>
-
-# Discussion, Limitations, and Future Work
-
-One of the key takeaways of our experiments, and specifically from contrasting the first two experiments with the third experiment, is that it was not difficult for a non-LVLM model to achieve an 85% accuracy on our proposed dataset. Yet, our dataset proved to be challenging for LVLMs, especially in zero-shot performance where they achieved almost no better than random guessing. This implies that it would be beneficial to further expand future datasets that are used for LVLM training and specifically the addition of data collection similar to what we propose and that this could provide invaluable improvements to future training of LVLMs.
-
-Existing vision-language benchmarks exhibit a heavy focus on real-world objects and scenes, with a distinctive lack of images and questions on maps. This is despite the fact that maps are ubiquitous and used in many real-world scenarios. Furthermore, many maps are easily accessible in digital format and ready to be integrated into vision-language benchmarks. We believe such inclusion would require relatively little effort in terms of data collection while providing significantly higher capabilities for LVLMs.
-
-We plan to expand the size of our new dataset used in this project and to make it publicly available. Additionally, while our current project primarily focused on the coregistration tasks, we have plans to incorporate more intricate and challenging questions that delve deeper into map reasoning.
-
-There are some limitations to the current analysis done in this project. A significant limitation is the computational limit preventing us from feasibly generating answers from the LVLMs in an autoregressive manner instead of our analysis which used only one output token per sample. A possible future work is examining more complicated generation methods such as Chain of Thought<d-cite key="chain_Thought"></d-cite> prompting. Additionally regarding the inputs of the models, although we investigated different text prompts, we only used one template for the visual prompts while there have been multiple recent works on visual prompt engineering in vision-language models<d-cite key="vision_prompt"></d-cite> analogous to textual prompt engineering in LLMs. It could be the case that some models are sensitive to the way the maps are concatenated. This aspect warrants further investigation to gain a more comprehensive understanding of how different concatenation methods might impact model performance and results.
-
-Another limitation is that we were only capable of running our analysis on open-source models, the largest model tested was blip-2 with less than 3 billion parameters. This was the largest LVLM that we had access to in terms of weights, to be able to run our analysis on. Future work could attempt to run the analysis on larger closed-source models if access is granted.
-
-<br/>
-
-# Conclusion
-
-In this project, we propose a novel dataset to serve as an initial benchmark for the capabilities of LVLMs to reason on maps with the goal of addressing a gap in current LVLM benchmarks and datasets.
-
-Using this dataset, we run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on the coregistration task. Additionally, we show that the task for our dataset is a relatively simple vision task by showing that a fine-tuned vision-only model released years prior to the tested LVLMs achieves a significantly higher accuracy. Finally, we show that the coregistration task is intuitive to humans, as participants were able to achieve close to perfect accuracy even in a time-constrained manner.
-
-We hope that future initiatives regarding data collection for LVLMs and training foundational LVLMs will put more emphasis on datasets such as our proposed datasets. This will hopefully unlock new capabilities for LVLMs enabling them to advance beyond their current limitations and possibly expand their utility and reasoning abilities in a variety of real-world scenarios.
diff --git a/_posts/2023-11-08-representationengineering-incontextlearning.md b/_posts/2023-11-08-representationengineering-incontextlearning.md
deleted file mode 100644
index b05978a0..00000000
--- a/_posts/2023-11-08-representationengineering-incontextlearning.md
+++ /dev/null
@@ -1,371 +0,0 @@
----
-layout: distill
-title: Leveraging Representation Engineering For LLM’s In-Context-Learning
-description: We present a method to observe model internals whether LLMs are performing in-context learning and control the model outputs based on such Context Vectors.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Alex Hu
-    url: "https://www.linkedin.com/in/alexander-hu/"
-    affiliations:
-      name: MIT
-  - name: Carl Guo
-    url: "https://www.carlguo.com/"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-representationengineering-incontextlearning.bib  
-
-
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Background & Related Work
-  - name: Experiment Setup
-  - name: Results
-  - name: Conclusion
-  - name: Sources 
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-# Introduction 
-Emerging capabilities in deep neural networks are not well understood, one of which is the concept of "in-context learning" (ICL), a phenomenon where the a Large Language Model (LLM)'s understanding of the prompt and ability to answer accordingly drastically increases after being shown some examples that answer the question. Evaluating in-context learning and understanding why the behavior happens is both an interesting theoretical research question and a practical question that informs directions to conduct research that further advances LLM capabilities by, say, exploiting more of in-context learning. 
-
-We attempt to explore the phenomenon of in-context learning by leveraging another exciting field of work on mechanistic interpretability where researchers set out to understand model behaviors by interpreting and editing internal weights in models. One such work that we base on is Representation Engineering by Zou et al. (2023)<d-cite key="zou2023representation"></d-cite> , where they construct a set of training text stimuli to probe LLM activations and use such stimuli to identify a direction that accurately predicts the underlying concept based on the neural activations of the model. This approach allows us to elicit readings of representation and control such representation.
-
-We propose to use methods in Zou et al. (2023) <d-cite key="zou2023representation"></d-cite> to evaluate in-context learning. There has not been previous attempts to examine the model internals holistically in a LLM while it is performing in-context learning. We expose such neural activations by constructing stimulus through artificial examples of in-context learning on binary classication tasks. We find a reading vector that shows high neural activity after the model is stimulated with the context pairs; such a "Context Vector" indicates the context the models draws from. While we hoped to find certain universal mechanisms across different datasets, we find that the Context Vector is dataset-specific and confirm previous hypotheses that in-context learning retrieves information from different parts of the model's latent space.
-
-We then explore the results of controlling the activations along the "Context Vector" direction, in the hope that editing the activitions would further boost the performance on top of in-context learning. We compare the model outputs on the classification datasets in a zero-shot setting and a setting of natural in-context learning, with the "Context Vector" amplified, and suppressed. While we find boosting performance through such editing to be challenging and sometimes finicky to tune, we find the results to be promising on editing weights to suppress the context that the model draws from and drastically reducing the performance. 
- 
-# Background & Related Work
-
-### In-Context Learning (ICL)
-An LLM is frequently aseked to perform a task in inference time that many realized providing some examples of how to answer the task can drastically improve the model's performance. This phenomenon is called in-context learning. For example, Zhou et al. (2022) <d-cite key = "zhou2022teaching"></d-cite> evaluates how LLM can become better at solving algorithmic problems through in-context learning, a task that LLM traditionally struggles at. 
-
-In other scenarios, the LLM does not need to rely on prompts at all and can deduce the pattern from the few-shot examples alone to predict the answer. While there is no universal definition of in-context learning and its meaning has shifted over time, we define it as the performance boost to answer questions based on a limited amount of examples (as the context). 
-
-Interesting, Min et al. (2022) <d-cite key = "min2022rethinking"></d-cite> observes that such ICL phenonemon is observed as long as examples are given, and a mismatch between input and output pairs would not hinder the ability of models performing ICL and thus its performance on the tasks. Wei et al. (2023) <d-cite key="wei2023larger"></d-cite> further corrobates this work by finding on small models but show that as models scale, the ability to pick up on flipped patterns when given in-context examples with flipped labels and override semantic priors is stronger.
-
-### Theories on why ICL happens
-While the concept of ICL is well studied, the underlying mechanism of ICL is not well understood. Xie et al. (2022) <d-cite key = "xie2022explanation"></d-cite> explains the phenomenon of ICL as an Implicit Bayesian Inference, where the in-context learning prompt serves as a stimulus for the model to go "locate" corresponding concept stored in the model's latent space that the LM has learned implicitly during pre-training. They study this by generating a simple pretraining distribution that parameterizes the transition of a Hidden Markov Model (HMM) and another prompting distribution. In this setting, the authors reduce the ICL task to Bayesian inference to map the prompting distribution to the pretraining distribution. 
-
-Akyürek et al. (2022) <d-cite key = "akyürek2023learning"></d-cite> further explains that Transformer-based in-context learners implement standard learning algorithms implicitly by encoding smaller models modularized to perform each specific tasks and update them based on the new in-context exampless. von Oswald et al. (2023) <d-cite key="vonoswald2023transformers" ></d-cite>claims that Transformer-based in-context learners is similar to gradient-based meta-learning formulations where they found that the Transformer can learn smaller models of a certain concept by gradient descent in their forward pass.
-
-Furthermore, Olsson et al. (2022)  <d-cite key = "olsson2022context"></d-cite> draws parallel from ICL to a more understood phenomenon of Induction Head, where attention-only Transformers picks up on the algorithm to predict next tokens by searching for a previous occurance of the last token and copying the same next token from previous occurences. They claim that this can be a potential mechanism to explain ICL.
-
-While many hypotheses and theories have been proposed to explain ICL, most explorations to prove their theory has been small in scale, and the literature lacks a study on the large-scale LMs' internals when performing ICL. 
-
-### Model Editing & Representation Engineering
-
-We’ll use the Representation reading and controls methods presented in [Zou et al. (2023)](https://arxiv.org/pdf/2310.01405.pdf) to understand the context where the model attends to and discover directions that indicate such reasoning. 
-
-Relatedly, there have been a recent surge in research related to model knowledge editing, including Meng et al. (2023) <d-cite key = "meng2023massediting"></d-cite>, Zhong et al. (2023) <d-cite key = "zhong2023mquake"></d-cite>, and Hernandez et al. (2023) <d-cite key = "hernandez2023inspecting"></d-cite> that demonstrate different methods for locating and editing factual associations. Other work, including Shao et al. (2023) <d-cite key="shao2023gold"></d-cite> and Belrose et al. (2023) <d-cite key="belrose2023leace"></d-cite>, have shown results on erasing larger-scale memory units such as concepts. Li et al. (2023) <d-cite key="li2023inferencetime"></d-cite> applies such concept erasion techniques by conducting Inference Time Interference, where one can find a direction of causal influence on "truthfulness" data and increase the activations along that direction to increase truthfulness, scoring better on the TruthfulQA dataset. 
-
-# Experiment Setup
-
-### Datasets
-
-We adopt a total of 30 datasets on binary classification, (sentiment analysis, natural language inference, true/false inference) and multiple choices; 16 datasets are used by Min et al. (2022) <d-cite key="min2022rethinking"></d-cite>, plus 12 extra datasets in the `tweet_eval` and `ethos` dataset families, `rotten_tomatoes`, and `ade_corpus_v2-classification`. Following Min et al. (2022)<d-cite key="min2022rethinking"></d-cite>, we only use the test set to avoid potential cross-contamination with the data that the model is pretrained on.  reserve `k=64` examples in the test for few-shot training, and the rest are used for testing. 
-
-### Training Data Generation 
-
-For training, we construct a set of context pairs for each dataset, each context pairs containing the same examples but different instructions. The instructions are "Pay attention to the following examples" and "Ignore the following examples" respectively, in the hope that by stimulating two opposites and examining the difference, we can find a Context Vector that represents what the model draws from. We then truncate the example at each and every token till the last 5 tokens, so we can get a neural activation reading for each of the tokens. 
-
-A sample training data input using the `rotten_tomatoes` dataset is as follows: 
-
-
-> [INST] Pay attention to the following examples: [/INST] 
-
-> offers that rare combination of entertainment and education.
-
-> positive.
-
-> a sentimental mess that never rings true .
-
-> negative.
-
-> [INST] Ignore the following examples: [/INST]
-
-> offers that rare combination of entertainment and education.
-
-> positive.
-
-> a sentimental mess that never rings true .
-
-> negative.
-
-Each context pair is identical except for the instructions. We use the context pairs to stimulate the model to learn the context and use the context vector to control the model's behavior.
-
-### Testing Data Generation
-
-For testing data, we use 3 input-labels pairs as the prompt, with the first two pairs serving as the in-context examples, and the last pair serving as the question that we actually want to test on, obfuscating the label from the prompt. 
-
-A sample testing data input using the `rotten_tomatoes` dataset is as follows:
-
-Input: 
-> [INST] offers that rare combination of entertainment and education. [/INST]
-
-> positive.
-
-> [INST] a sentimental mess that never rings true . [/INST]
-
-> negative.
-
-> an odd , haphazard , and inconsequential romantic comedy .
-
-Label:
-> negative.
-
-### Model
-
-We have explored using two models with 7 billion parameters, including `Mistral-7B-Instruct-v0.` and `Llama-2-7b-hf`; while we have found preliminary results consistent between the two models, all of our results later reported are from `Mistral-7B-Instruct-v0` for consistency and due to a constraint on computational power and time. 
-
-### Training Infrastructure
-
-We used the MIT Supercloud infrastructure and a local machine with a single RTX 4090 GPU to train the model.
-
-# Results 
-
-We present results first on finding the Context Vector in the embedding space, then on using the Context Vector to control model outputs and evaluate their performance.
-
-### Representation Reading
-
-We use the Representation Reading method presented in Zou et al. (2023) <d-cite key="zou2023representation"></d-cite> to find the Context Vector. Specifically, we adopted the setup of the instruction response pairs where for a given function $`f`$ and pairs of instructions $`x_i`$ and $`y_i`$, we denote the model's response truncated at the $`j`$-th token as $`f(x_i)_j`$ and $`f(y_i)_j`$ and take the neuron activity at the last token of each of the responses, namely the activations of each and every token in the response.  
-
-We then perform PCA on the difference of the activations of the two instructions, namely $`f(x_i)_j - f(y_i)_j`$ and find the first principal component $`v`$ that maximizes the difference in the embedding space. 
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato.png" class="img-fluid" %}
-<div class="caption">
-  Graph plotting the correlation between the Context Vector sign and actual dataset label on Rotten Tomatoes dataset. The x-axis is the layer and the y-axis is the correlation. 
-</div>
-
-
-More surprisingly is the fact that we can find a clean representation of such Context Vector that correlates decently with the model inputs. 
-
-We use t-SNE to visualize the difference in the embedding space on the inputs of the 30 datasets across 32 different layers and report the results below.
-
-<!-- <figure>
-  <img src="assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data.png" class="img-fluid" alt="Description of the first image">
-  <figcaption>Caption for the first image</figcaption>
-</figure> -->
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data.png" class="img-fluid" %}
-<div class="caption">
-  t-SNE plot of the embedding space of the Context Vectors across the 30 datasets and 32 layers, color coded by dataset.
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers.png" class="img-fluid" %}
-<div class="caption">
-  t-SNE plot of the embedding space of the Context Vectors across the 30 datasets and 32 layers, color coded by layers.
-</div>
-
-As shown in the figure, we find that the vectors are clustered by dataset, indicating that the Context Vectors are dataset-specific. There are no clear patterns across dataset or between different layers of the Context Vectors, further indicating that in-context learning activates different parts of the model's latent space with information about different types of tasks. 
-
-We also conducted scans for neuron activities in the Context Vector across the different tokens of an example sequence in a similar style as Zou et al. (2023) <d-cite key="zou2023representation"></d-cite>, for which the previous work has referred to as Linear Artificial Tomography (LAT) scans. 
-
-The following are the LAT scans for the neuron activities corresponding to a Context Vector trained on `rotten_tomatoes` sentiment analysis dataset evaluated on different dataset sequences. The following graphs further corroborate the findings above on the dataset-specificity of in-context learning; while the a sequence from the `rotton_tomatoes` dataset result in high neural activities for the Context Vector, most sequences from the other dataset do not, showing the uniqueness of such Context Vector. We have also observed most of the neuron activities in the later layers. This phenomenon makes sense since more abstract concepts and semantic structures formulate in later layers, thus being more correlated with the Context Vector, while earlier layers pick up more on token-level abstractions.
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes.png" class="img-fluid" %}
-<div class="caption">
-  A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `rotten_tomatoes` sequence. The x-axis is the token index, and the y-axis is the Layer number. More red indicates higher neural activities, and more blue indicates lower neural activities.
-</div>
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair.png" class="img-fluid" %}
-<div class="caption">
-  A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `medical_questions_pair` sequence. 
-</div>
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion.png" class="img-fluid" %}
-<div class="caption">
-  A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `ethos-religion` sequence. 
-</div>
-
-We have also produced graphs that zoom into the token-level neural activities detection on the Context Vector of the opposing pair (Pay attention & Don't pay attention), shown below. A large difference in the neural activities of the two instructions is denoted by red and indicates that the ablation is effective, while the green shades indicate that there are similar in neural activities. The results show that the neural activities are consistently different across the sequence until the model starts generating next tokens and the context ends where the neural activities are similar. 
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level.png" class="img-fluid" %} 
-<div class="caption">
-  A token-level LAT scan that compares the difference between the neural activities of the Context Vector of the opposing pair (Pay attention & Don't pay attention) on the `rotten_tomatoes` dataset. 
-</div>
-
-## Representation Control 
-
-To change an activation along some direction, we can imagine there are several canonical ways. First, given our Context Vector $v$ and an activation $a$, we can do one of the following.
-
-
-### Addition
-
-$$
-a' = a + v
-$$
-
-
-### Amplification
-
-
-$$
-a' = a + \text{sign}(a \cdot v) v
-$$
-
-### Projection
-
-
-$$
-a' = a  - (a \cdot v) \cdot \frac{v}{||v||^2}
-$$
-
-
-
-The first represents a constant perturbation so it supposedly transforms the representation to become more of a certain quality. The second amplifies the direction according to which side it is on, so it makes the representation more extreme. The third removes the quality from the representation by subtracting the projection. 
-
-We explore all these methods to control Mistral-7b-instruct. We do our experiments on the `rotten_tomato`, `sick`, `hate_speech18`, and `glue-wnli` in-context-learning datasets consisting of input-output pairings where outputs have two possible correct options -- positive or negative contradiction or entailment, hate or noHate, and entailment or not_entailment (for `sick`, it originally contains a third option of `neutral` which we remove since our framework requires two classes).
-
-Given learned representations with the same configuration as our representation reading, we construct a test set from the same dataset as training. The test set has $16$ examples, each with one demonstration followed by a question. We evaluate correctness by having the LLM generate $10$ tokens and checking if the correct answer is contained in the output and the incorrect answer is not contained in the output, without being sensitive to case. This ensures correct evaluation so that an answer of no_entailment does not evaluate as correct for having entailment inside of it if entailment is the right answer.
-
-A hyperparameter which we denote $\alpha$ scales the size of $v$. If our Context Vector is $r$, sign value is $s$, then we have $v = \alpha \cdot  r \cdot s$. We vary $\alpha \in \{ 0, 0.25, 0.5, 1, 2, 5, 10}$, and also take the negative of $\alpha$, which we label as positive and negative respectively.
-
-### Results for Control with Addition
-For rotten tomatoes, we see the expected performance gap of positive over negative, though positive does worse than no control. Moreover, we see in glue-wnli and sick, the negative control actually does better than positive control. In hate_speech18, we see the desired result.
-
-Despite modifying the layers that we controlled, based upon observing the layers at which the Context Vectors had the most correlation to the trained concept, we cannot find a set of layers to control that works **consistently** across all four datasets, though we can find layers that work for one dataset.
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `rotten_tomatoes` dataset with amplification or suppression of the Context Vector using Addition. The x-axis is the coefficient of amplification, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/add_sick.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using  <bold>Addition</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `hate_spe` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Addition</bold>. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/add_glue.png" class="img-fluid" %}
-<div class="caption">
- The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Addition</bold>. 
-</div>
-
-### Results for Control with Amplification
-Note the result depends on the absolute value of $\alpha$ so the positive and negative graphs converge. The affect of amplification is quite smooth relative to addition in the sense that there is a consistent downward trend in performance for both amplification and suppression. This could be because amplification amplifies existing signals and this gets stronger as $\alpha$ increases.
-
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of the Context Vector using  <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using  <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of the Context Vector using  <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using  <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. 
-</div>
-
-### Results for Control with Projection
-We can see that projection consistently decreases performance, which is expected as we can imagine projection as erasing the idea that the model needs to pay attention to these examples. Having positive or negative sign of $\alpha$ does not affect projection.
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of the Context Vector using  <bold>Projection</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using  <bold>Projection</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of the Context Vector using  <bold>Projection</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using  <bold>Projection</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. 
-</div>
-
-### Ablation Studies
-
-A key question is whether the Context Vectors are truly special. Especially because much of our results do not work, we would like to assess the "noise level." By sampling a random unit vector from $4096$-dimensional space, the hidden dimension of `Mistral-7b-instruct`, for each layer and using that for control, we get the following results.
-
-
-If we take the negative of all the Context Vectors, the graphs for positive and negative $\alpha$'s would switch. The fact that in our random sample we see such a large gap in the Glue-wnli graph indicates that there is quite a lot of noise. Moreover, if we take the negative of our particular randomly sampled vector, we obtain a Context Vector for Glue-wnli that is **extremely good** at controlling in-context-learning. The large landscape of $4096$-dimensional space is an exciting mystery.
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of a random vector using  <bold>Addition</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/random_sick.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of a random vector using  <bold>Addition</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of a random vector using  <bold>Addition</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. 
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-representationengineering-incontextlearning/random_glue.png" class="img-fluid" %}
-<div class="caption">
-  The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of a random vector using  <bold>Addition</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. 
-</div>
-
-
-
-
-
-
-# Conclusion
-
-While we understand our work is limited due to time and compute constraints and did not achieve the results we hoped for, we tried our best to explore this research direction of finding a Context Vector that corresponds to the in-context learning behaviors and experiments of using it to control model outputs. 
-
-## Implications
-
-If successful, this research direction could be a powerful tool to understand mechanistically why in-context learning emerges and potentially use model editing to achieve better State-of-the-Art results on LLMs in specific benchmark evaluation scenarios with model editing. Even with our current results that demonstrate more success in suppressing the Context Vector than amplifying it, i.e. suppressing such behaviors than boosting it, this can have implications on works that try to perform model unlearning and impact the robustness of LLMs. 
-
-## Future Work
-
-Through ablating with the random vector in the embedding space, it is unfortunate that controlling for the particular Context Vector we found is not particularly different from other vectors, despite it showing some promises on suppressing the results. We hope to run further ablation studies to confirm that suppressing the Context Vector is only suppressing the in-context learning behaviors of the specific behaviors and does not have other side effects.
-
-
-
-Regarding our current setup of the contrasting prompts of telling the model to pay attention or not pay attention to the concept, we can further explore the space of contrasting prompts. Directly related to our work, we would also like to explore the other type of experiment setup in Zou et al. (2023)<d-cite key="zou2023representation"></d-cite>; unlike the data pair setup where we ask the model to pay attention to the examples or ignore them, we can ask the model to "think hard about the context/structure of the question" and elicit neural activities that way.
-
-We are also interested in exploring vectors that control step-by-step reasoning and in general, intelligence. The phrases "Let's think step by step" <d-cite key="kojima2023stepbystep"></d-cite> or "Take a deep breath and work on this problem step-by-step" <d-cite key = "yang2023deepbreath"></d-cite> are powerful phrases that elicit chain-of-thought reasoning and improve model performance. Could we engineer activation transformations that improve these models' performance even more than and without the need for prompting? 
\ No newline at end of file
diff --git a/_posts/2023-11-08-scRNAseq-assumptions.md b/_posts/2023-11-08-scRNAseq-assumptions.md
deleted file mode 100644
index 033ab40b..00000000
--- a/_posts/2023-11-08-scRNAseq-assumptions.md
+++ /dev/null
@@ -1,371 +0,0 @@
----
-layout: distill
-title: Examining assumptions in scRNA-seq foundation model pre-training (6.S898 Final Project)
-description: Final project for MIT's Deep Learning (6.S898) class.
-date: 2023-11-08
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Robert Calef
-    url: "https://en.wikipedia.org/wiki/Robert_Calef"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-scRNAseq-assumptions.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related work
-  - name: Methods
-  - name: Results
-  - name: Discussion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-
-
-## Introduction
-If the fundamental building block of biology is the cell, then the fundamental building block of cells are genes.
-Genes are small segments of DNA that encode the information to create a protein, and proteins are a diverse set of macromolecules that can perform a staggering range of chemical functions which, when taken all together, lead to the complex behavior of cells and the organisms they make up.
-To create proteins from genes, an intermediate "data transfer" occurs through another molecule type known as RNA. This information flow of genes to RNA to proteins is typically referred to as "gene expression", and is so core to biology that it's also known as the "central dogma of molecular biology".
-
-Due to the importance of gene expression, many technologies have been developed to make quantitative measurements of gene expression from cells.
-One of the most prominent technologies is called single-cell RNA sequencing (scRNA-seq), which enables the measurement of the expression of all genes in a given cell, often measured across thousands of cells simultaneously <d-cite key="hwangSinglecellRNASequencing2018"></d-cite>.
-
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview.png" class="img-fluid" %}
-<div class="caption">
-    Schematic overview of the scRNA-seq workflow. Figure sourced from <d-cite key="panMicrofluidicsFacilitatesDevelopment2022"></d-cite>.
-</div>
-
-Large scale scRNA-seq datasets have enabled the high-resolution profiling of different organs and tissues at the cellular level, uncovering diverse cell types, rare subpopulations, and dynamic gene expression patterns within complex tissues and organisms.
-This technology has found applications in various fields, from developmental biology and immunology to cancer research and regenerative medicine.
-
-While scRNA-seq has seen broad-scale adoption, many challenges remain.
-In particular, an individual research experiment may focus on a particular cell or tissue type, and produce insufficient data to apply modern machine learning techniques. To supplement their data or to gain additional context, a researcher may wish to utilize data generated from other experiments or researchers. However, performing large-scale integration of datasets across samples, tissues, and experiments currently presents challenges of scalability and non-biological differences between datasets driven by experimental variability (colloquially referred to as "batch effects") <d-cite key="lahnemannElevenGrandChallenges2020"></d-cite>.
-
-In parallel to the explosion of available scRNA-seq data, the machine learning field has seen an increasing trend towards "foundation models".
-Foundation models are large-scale deep learning models pre-trained with vast amounts of data for the purposes of creating a generalizable representation of a particular datatype (e.g. text, images).
-Given these developments, recent work has focused on developing scRNA-seq foundation models as an approach to solve the challenge of  integrating diverse sets of scRNA-seq datasets in a scalable and generalizable way <d-cite key="theodorisTransferLearningEnables2023"></d-cite> <d-cite key="yangScBERTLargescalePretrained2022"></d-cite> <d-cite key="cuiScGPTBuildingFoundation2023"></d-cite> <d-cite key="chenGeneptSimpleHardtoBeat2023"></d-cite> <d-cite key="yangGeneCompassDecipheringUniversal2023"></d-cite> <d-cite key="haoLargeScaleFoundation2023"></d-cite> <d-cite key="levineCell2SentenceTeachingLarge2023"></d-cite>. Beyond just integration, foundation models of gene expression hold great promise in contributing to a broader understanding of biology by learning a representation space of cellular state, which could also lead to a large impact in downstream applications such as *in silico* prediction of cellular responses to novel therapeutics.
-
-In this post, we'll explore a fundamental assumption of three such models (Geneformer<d-cite key="theodorisTransferLearningEnables2023"></d-cite>, cell2sentence <d-cite key="levineCell2SentenceTeachingLarge2023"></d-cite>, and GenePT <d-cite key="chenGeneptSimpleHardtoBeat2023"></d-cite>), which is the assertion that a given gene expression profile can be well-approximated by a rank-value encoding of genes.
-
-What exactly is a rank-value encoding? Well, a typical representation of gene expression is a vector $$ x \in \mathbb{R}^N $$, where $$ N $$ is the number of genes, and each entry is a measure of the corresponding gene's expression. In a rank-value encoding, gene expression is instead represented as a list of N strings, where the strings are gene names, and are ordered in descending order of the underlying gene expression value.
-
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic.png" class="img-fluid" %}
-<div class="caption">
-    Standard encoding of gene expression values compared to a rank-value encoding.
-</div>
-
-The rank-value encoding provides an intuitive transformation of the continuous gene expression values into an English language sentence that is compatible with existing approaches for foundation models in the natural language processing (NLP) field. However, as can be seen above, the rank-value encoding also drops the information of the exact gene expression values. Hopefully by the end of this post, we'll have gained some intuition for how a rank-value encoding of gene expression could be hindering the development of foundation models for gene expression and see that this does play out in practice for a real scRNA-seq foundation model.
-
-
-## Related work
-
-### Overview of gene expression representations in foundation models
-While we won't go into a full detailed comparison of different methods for constructing gene expression foundation models from scRNA-seq data, it's worth spending a little time discussing
-the commonalities and differences of various approaches at a high-level.
-
-The most important distinction for this post is between methods that use a rank-value encoding and those that don't. For methods that don't use a rank-value encoding, we see a further
-distinction between methods that employ some form of value-binning, where continuous expression values are mapped to a discrete number of pre-specified bins, and those that don't. Methods that use a binning approach are scGPT<d-cite key="cuiScGPTBuildingFoundation2023"></d-cite> and scBERT<d-cite key="yangScBERTLargescalePretrained2022"></d-cite>. In both scGPT and scBERT, gene expression values are first binned to map the continuous values to a set vocabulary of tokens, and these tokens are then passed through an embedding layer to generate higher-dimensional representations.
-In contrast, scFoundation<d-cite key="haoLargeScaleFoundation2023"></d-cite> calculates gene expression embeddings by first transforming continuous scalar values to a vector using a small MLP,
-and then calculating a final embedding by using an attention mechanism over K learned vectors. While we won't cover the full details, schematics of the approaches can be seen below to get a sense of the overall architectures, and most importantly to see how they directly use the gene expression values as input.
-
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic.png" class="img-fluid rounded z-depth-1" %}
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic.png" class="img-fluid rounded z-depth-1" %}
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic.png" class="img-fluid rounded z-depth-1" %}
-<div class="caption">
-  Schematics of the various approaches that *do not* use a rank-value encoding (top to bottom): scGPT, scBERT, and scFoundation. Figures sourced from <d-cite key="cuiScGPTBuildingFoundation2023"></d-cite><d-cite key="yangScBERTLargescalePretrained2022"></d-cite><d-cite key="haoLargeScaleFoundation2023"></d-cite>.
-</div>
-
-On the other hand, we have the methods that we're most interested in for the purposes of this post: the ones that utilize a rank-value encoding of gene expression. These methods are: Geneformer<d-cite key="theodorisTransferLearningEnables2023"></d-cite>, GenePT<d-cite key="chenGeneptSimpleHardtoBeat2023"></d-cite>, and cell2sentence<d-cite key="levineCell2SentenceTeachingLarge2023"></d-cite>.
-In Geneformer, gene expression values are first converted to a rank-value encoding and then used to train a Transformer-based model using a variant of a masked language modeling objective in which a set of genes at random ranks are masked, and the model must learn to predict the masked gene names.
-In cell2sentence and GenePT, pre-trained auto-regressive language models (GPT-2 and GPT-3.5 respectively) are applied to the rank-value encoded list of genes to obtain cell-level embeddings that are then used for downstream tasks. Again, we won't dive into the full details of these approaches, but provide schematic overviews of them below.
-
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig2_Geneformer_schematic.png" class="img-fluid rounded z-depth-1" %}
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic.png" class="img-fluid rounded z-depth-1" %}
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic.png" class="img-fluid rounded z-depth-1" %}
-<div class="caption">
-  Schematics of the various approaches that *do* use a rank-value encoding (top to bottom): Geneformer, GenePT, and cell2sentence. Figures sourced from <d-cite key="theodorisTransferLearningEnables2023"></d-cite><d-cite key="chenGeneptSimpleHardtoBeat2023"></d-cite><d-cite key="levineCell2SentenceTeachingLarge2023"></d-cite>.
-</div>
-
-### Critical examinations of scRNA-seq foundation models
-In light of the recent development of many approaches for scRNA-seq foundation models, researchers have also begun performing critical assessments of such models. One of the main value propositions of foundation models is generalization to new data in a few-shot or zero-shot manner. To test this hypothesis, Kedzierska et al.<d-cite key="kedzierskaAssessingLimitsZeroshot"></d-cite> benchmarked the performance of Geneformer and scGPT at two zero-shot tasks with novel datasets: cell clustering and integration of data across batches (i.e. batch effect removal) . They found that both methods underperformed compared to simpler baseline methods. Similarly, Boiarsky et al.<d-cite key="boiarskyDeepDiveSingleCell2023"></d-cite> compared scGPT and scBERT to logistic regressions in the context of cell type annotation, and also found that the simpler approach performed competitively.
-
-However, both of the works discussed above focused on examining the performance of scRNA-seq foundation models as a black box, whereas to the best of our knowledge, there are no current works examining the fundamental assumptions implicit in these foundation model approaches. We hope to begin addressing that gap in this post. By understanding whether or not rank-value encoding well-approximates the real similarities and differences in gene expression across cell types, we hope to either validate this assumption or gain insight into future avenues for improving pretraining of such scRNA-seq foundation models.
-
-## Methods
-
-### Dataset
-To perform our assessment of rank-value encoding, we'll work with the Tabula Sapiens dataset <d-cite key="consortiumTabulaSapiensMultipleorgan2022"></d-cite>. This scRNA-seq dataset is a reference-quality collection of nearly 500,000 cells from 24 organs, sourced from 15 normal human subjects. The Tabula Sapiens dataset provides a good testbed for our experiments, as the samples have been processed in a uniform manner, allowing us to ask how rank-value encoding performs in a "best case" scenario. In the future, it would be beneficial to see how rank-value encoding performs across datasets as well, as there may be advantages in terms of smoothing out experimental noise.
-
-We use the final dataset from Tabula Sapiens, which has already been subjected to quality control assessment, filtering, and normalization. While we won't go into the details of their pipeline here, these are available in their manuscript for the interested reader. In line with typical scRNA-seq workflows, we also subset the full set of ~22,000 genes down to a subset of 2,435 genes that have been marked as "highly variable genes" (HVGs) in the Tabula Sapiens dataset. This is a fairly standard step in scRNA-seq data processing workflows, as many genes are constitutively expressed across cell types, and thus provide little information for distinguishing between cell types. Highly variable gene selection was performed by the Tabula Sapiens Consortium following the methods and recommendations in Seurat<d-cite key="stuartComprehensiveIntegrationSingleCell2019"></d-cite>, a commonly used scRNA-seq data processing package.
-
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist.png" class="img-fluid rounded z-depth-1" %}
-<div class="caption">
-  Number of cells per cell type. Note that the majority of cell types have ~1000 examples, but that there's a long tail of highly represented cell types with up to 35k examples.
-</div>
-
-Additionally, since the Tabula Sapiens dataset is quite large and also has some cell types that are disproportionately represented, as shown above, we'll also subset the data to get a more tractable dataset for experimentation. To do so, we'll focus on cell types with 500 or more examples, and then further randomly subsample to 500 cells per type. This leaves us with 89 cell types<d-footnote>acinar cell of salivary gland,
- adventitial cell,
- b cell,
- basal cell,
- basal cell of prostate epithelium,
- basophil,
- bladder urothelial cell,
- capillary aerocyte,
- capillary endothelial cell,
- cardiac endothelial cell,
- cardiac muscle cell,
- cd24 neutrophil,
- cd4-positive alpha-beta t cell,
- cd4-positive helper t cell,
- cd4-positive, alpha-beta memory t cell,
- cd4-positive, alpha-beta t cell,
- cd8-positive alpha-beta t cell,
- cd8-positive, alpha-beta cytokine secreting effector t cell,
- cd8-positive, alpha-beta cytotoxic t cell,
- cd8-positive, alpha-beta memory t cell,
- cd8-positive, alpha-beta t cell,
- classical monocyte,
- club cell,
- club cell of prostate epithelium,
- conjunctival epithelial cell,
- corneal epithelial cell,
- corneal keratocyte,
- dendritic cell,
- dn1 thymic pro-t cell,
- dn3 thymocyte,
- duct epithelial cell,
- endothelial cell,
- endothelial cell of artery,
- endothelial cell of lymphatic vessel,
- endothelial cell of vascular tree,
- enterocyte of epithelium of large intestine,
- enterocyte of epithelium of small intestine,
- epithelial cell,
- erythrocyte,
- erythroid progenitor,
- eye photoreceptor cell,
- fibroblast,
- fibroblast of breast,
- granulocyte,
- hematopoietic stem cell,
- hepatocyte,
- immature enterocyte,
- immune cell,
- innate lymphoid cell,
- intermediate monocyte,
- keratinocyte,
- kidney epithelial cell,
- luminal cell of prostate epithelium,
- luminal epithelial cell of mammary gland,
- lung ciliated cell,
- macrophage,
- mast cell,
- mature enterocyte,
- mature nk t cell,
- memory b cell,
- mesenchymal stem cell,
- monocyte,
- myeloid cell,
- myofibroblast cell,
- naive b cell,
- naive regulatory t cell,
- naive thymus-derived cd4-positive, alpha-beta t cell,
- naive thymus-derived cd8-positive, alpha-beta t cell,
- neutrophil,
- nk cell,
- nkt cell,
- non-classical monocyte,
- pancreatic acinar cell,
- pancreatic ductal cell,
- paneth cell of epithelium of large intestine,
- paneth cell of epithelium of small intestine,
- pericyte cell,
- plasma cell,
- regulatory t cell,
- respiratory goblet cell,
- skeletal muscle satellite stem cell,
- smooth muscle cell,
- stromal cell,
- t cell,
- thymocyte,
- type i nk t cell,
- type ii pneumocyte,
- vascular associated smooth muscle cell,
- vein endothelial cell</d-footnote> and 500 cells per type, for a total of 44,500 datapoints.
-
-To interact with this data, we'll be using the `AnnData`<d-cite key="virshupAnndataAnnotatedData2021"></d-cite> and `scanpy`<d-cite key="virshupScverseProjectProvides2023"></d-cite> Python packages, which we won't cover in detail here but flag in case you're interested in working with such data in the future.
-
-### Assessments
-To assess how well a cellular state can be represented using a rank-value encoding of genes, we'll look at various measures of similarity in the raw gene expression space and the rank-value encoded space, and compare those measures both within cell types and between cell types. We'll calculate the following measures for all pairs of cells:
- 1. Euclidean distance of UMAP-projected gene expression values
- 2. [Spearman rank correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)
- 3. Euclidean distance of UMAP-projected Geneformer embeddings
-
- For each distance measure, we can then generate comparisons at the level of cell types by summarizing via the median of the pairwise distances, either within or between cell types. A schematic of this approach is shown below.
-
- {% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic.png" class="img-fluid rounded z-depth-1" %}
-<div class="caption">
-  Overview of method for computing distance measures between cells followed by summarization to generate comparisons at the level of cell types.
-</div>
-
-#### UMAP of gene expression values
-The idea behind this comparison is to utilize the continuous gene expression vectors, but using UMAP (Uniform Manifold Approximation and Projection<d-cite key="mcinnesUMAPUniformManifold2020"></d-cite>) to approximate the kind of non-linear transformation one might learn using a deep neural network. To calculate these values, we perform UMAP embprojectionedding of the gene expression values using the `umap-learn` Python package with defaut settings and `n_components=5`. Once we have the per-cell projections, we calculate Euclidean distance between all pairs of cells.
-
-#### Spearman rank correlation coefficients
-The Spearman rank correlation is a non-parametric measure of correlation between two ranked lists, which we can leverage to obtain a direct comparison of rank-value encoded gene lists. To accomplish this, we first calculate a rank-encoding of each cell's gene expression, with identical values being assigned a [fractional rank equal to the mean of their ordinal ranks](https://en.wikipedia.org/wiki/Ranking#Fractional_ranking_(%221_2.5_2.5_4%22_ranking)). As the Spearman correlation is defined as the Pearson correlation on the rank-encoded lists, we can then directly calculate the Spearman correlations between all pairs of cells.
-
-#### Euclidean distance of UMAP-projected Geneformer embeddings
-To fully assess the effect of rank-value encoding in a deep learning model, we take this one step further by calculating the embeddings of our cells using Geneformer. We generate these embeddings by using their model and code as [hosted on HuggingFace](https://huggingface.co/ctheodoris/Geneformer) for tokenization and embedding of our gene expression vectors. For each cell $$i$$, we obtain an embedding vector $$ x_i \in \mathbb{R}^{256} $$. We further project these 256-dimensional vectors down to 5 dimensions using UMAP for consistency with the projections of the raw gene expression values described above, and then calculate Euclidean distance between all pairs of cells. The rationale here is that Euclidean distance between two points may be larger in a 256-dimensional space than a 5-dimensional space due the high dimensionality (i.e. "curse of dimensionality"). However, we do still see similar results when using the full 256-dimensional embedding vectors (see Appendix).
-
-## Results
-
-### Rank-value encodings preserve similarity between cell types
-The first thing we can see from our results is that rank-value encodings do preserve similarity between cell types in a similar manner as distances generated from raw gene expression values. The figure below is generated by looking at the distributions of distances between pairs of cells from the same type ("within") or from different cell types ("between"). To provide a comparison at the level of cell types, we plot the median of each distribution rather than individual pairs of cells, i.e. the "within" group contains 89 data points and the "between" group contains $$ \frac{89 \times 88}{2} $$ data points.
-
-{% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison.png" class="img-fluid rounded z-depth-1" %}
-<div class="caption">
-  Comparison of various similarity measures both within cell types and between cell types. Note that for the Euclidean distances (left and right), lower is more similar, whereas for rank correlation (middle), higher is more similar.
-</div>
-
-How should we interpret this? What we can observe is that all three measures maintain high similarity for cells from the same type and less similarity for cells from different types. Put another way, rank-value encodings do define a space in which different cell types tend to be distant and cells from the same type tend to be near each other. We can also say that this holds when using both a non-parametric measure of the rank-value encodings (Spearman rank-correlation) and also when using a deep learning model that operates on rank-value encoded gene vectors (Geneformer).
-
-However, we do also see that the difference between the "within" and "between" cell type distances is more pronounced when using a non-linear function on the raw data compared to either of the methods operating on the rank-value encoded gene vectors. This difference will become even more clear as we look at joint distributions of our different measures in the next section.
-
-### Raw gene expression values better preserve within cell type similarities
-
-To gain further insight into how rank-value encodings compare to raw gene expression values, we can look at the joint distributions of our distance measures. Below we see
-the joint distribution of our raw gene expression-based distances compared to the rank-correlation values, shown as a 2D histogram where each hex is colored according to
-the number of points that fall within that bin.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Joint distributions of distances from UMAP of raw gene expression values compared to rank correlations, within cell types (left) and between cell types (right).
-</div>
-
-We can notice that within cell types, the rank correlation has a fairly wide dynamic range whereas the raw gene expression-based distance seems to show a
-tighter packing. Between cell types, we can observe that the rank correlations largely clump up closer to zero but do mesh with the larger distances we see
-with the raw gene expression-based measure.
-
-Given that we see a spreading out of cells within a type using a rank correlation, the natural question becomes whether this holds when we use a deep learning
-model that can learn a complex non-linear function of the rank encodings. That's exactly what we look at below where we perform a similar comparison, but swapping
-out the rank correlation distance measure for the distance measure based on Geneformer embeddings.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Joint distributions of distances from UMAP of raw gene expression values compared to distances from UMAP of Geneformer embeddings, within cell types (left) and between cell types (right).
-</div>
-
-With the Geneformer embeddings derived from the rank-value encodings, we now see that the between cell type distances are better matched to the distances derived from raw
-gene expression values. However, we still see that Geneformer embeddings are more spread out within cell types compared to the non-linear transform of the raw gene expression
-values. To better understand why this might be the case, we propose one possible contributing factor in the next section.
-
-### Sparsity of scRNA-seq data may drive loss of information in rank-value encodings
-A key aspect of scRNA-seq data is its extremely high sparsity. When working with single cells, the amount of available RNA is already quite limited, and then each processing step, such as RNA isolation or sequencing, introduces technical noise and the possibility of "dropout events", where a gene's expression is not detected at all. Combined with the inherent stochasticity of gene expression, we're often left with data where the vast majority of genes have zero detected RNA molecules.
-
-Shown below is a histogram of sparsity per cell in the full Tabula Sapiens dataset as well as in the subset of cells and genes we considered in the analyses above.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Histogram of gene expression sparsity per cell for the full Tabula Sapiens dataset (left) and the subset of 44,500 cells and 2,450 genes we considered in previous analyses. Sparsity here is defined as the fraction of genes with zero observed RNA molecules.
-</div>
-
-While many methods for processing scRNA-seq data attempt to handle the high sparsity in a principled manner, most of the methods described here simply remove genes with zero observations from
-consideration. In particular, scGPT, GenePT, and Geneformer all remove genes with zero observations from their inputs, and cell2sentence restricts itself to the 100 genes with the highest
-expression per cell, effectively removing all genes with zero observations. While sparsity is at least partially driven by stochastic technical factors, there is undoubtedly
-a biological contribution as well, which may be removed when dropping genes with zero observations. While this issue is not unique to rank-value encoding, we can see that all
-of the methods we've discussed here that use rank-value encoding remove genes with zero observations, likely to circumvent the ambiguity in how one would enforce an ordering
-on genes that all have zero observations.
-
-## Discussion
-To give a high-level summary, what we've seen in this post is that rank-value encodings are an appealing way to transform continuous gene expression
-vectors into a format that's directly compatible with the foundation model architectures that have seen great success in natural language processing. However, they
-also seem to lose some valuable biologlical information of cell types, particularly information concerning similarity of cells within a given type.
-
-While we don't present a smoking gun for an exact characteristic of this loss of information, we present sparsity as a key challenge in scRNA-seq data, which may
-be exacerbated when using rank-value encodings. We can also further hypothesize that rank-value encodings may be sensitive to small changes in gene expression values
-from technical noise, which could cause a shifting of ranks and thus amplify the impact of said noise. Similarly, rank-value encodings lose the absolute quantification
-of gene expression, and this loss of granularity may impact the model's ability to capture the cases where subtle differences in gene expression hold biological
-significance.
-
-From the perspective of downstream use cases, models based on rank-value encodings are also limited in their ability to explore the counterfactuals that may be
-interesting in cases such as predicting cellular responses to a novel therapeutic. For example, if a drug were known to affect the expression of a single gene, but
-not to the point where the ranking of this gene shifted, then such a model would be unable to explore the downstream effect of this drug on the expression of other
-genes.
-
-In terms of limitations, the work presented here is fairly superficial and is constrained both in terms of size of datasets and breadth of methods compared. To
-perform a more robust comparison in the future, we would like to scale up this analysis to larger datasets, such as the full Tabula Sapiens dataset. We would also
-like to more directly compare cell type similarities in the embedding spaces of other scRNA-seq foundation models, including those that do and do not utilize rank-value
-encodings. A great follow-up would be to perform a head-to-head comparison of a model like scBERT to Geneformer on the full Tabula Sapiens dataset.
-
-Additionally, we've also yet to explore the angle of robustness across datasets. It's possible that some of the shortcomings we've listed for rank-value encodings
-may actually be benefits in the context of supppressing technical noise when integrating scRNA-seq datasets across studies, institutions, and experimental techniques.
-Performing this comparison across datasets would be a valuable follow-up that would help paint a more full picture of the value of rank-value encodings in the context
-of constructing foundation models for gene expression data.
-
-While we've discussed many challenges in constructing foundation-scale models for gene expression data, it's worth closing this post with an optimistic reflection on
-the potential value of such models. By training a deep learning model to construct a representation space of cellular state, we stand to create a powerful tool that will
-help us gain a fundamental understanding of cellular biology and its underlying complex regulatory networks. Ultimately, such tools could help us unravel the genetics of
-various diseases, paving the way for a new era of disease treatments and precision medicine.
-
-## Appendix
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Joint distributions of distances from UMAP of raw gene expression values compared to distances from raw Geneformer embeddings, within cell types (left) and between cell types (right).
-</div>
\ No newline at end of file
diff --git a/_posts/2023-11-08-sentence-embeddings.md b/_posts/2023-11-08-sentence-embeddings.md
deleted file mode 100644
index 39331cc3..00000000
--- a/_posts/2023-11-08-sentence-embeddings.md
+++ /dev/null
@@ -1,226 +0,0 @@
----
-layout: distill
-title: In the pursuit of cheap and robust word embeddings
-description: A study of how we can train a student word embedding model to mimic the teacher OpenAI word embedding model by using as small a training set as possible. We also investigate preprocessing tricks and robustness against poisoned data.
-
-date: 2023-12-12
-htmlwidgets: true
-
-authors:
-  - name: Alor Sahoo
-    affiliations:
-      name: MIT
-  - name: Sebastian Alberdi
-    affiliations:
-      name: MIT
-
-bibliography: 2023-11-08-sentence-embeddings.bib
-
-toc:
-  - name: Introduction and Motivation
-  - name:  Background and Literature Review
-  - name: Methods and Experiments
-  - name: Results and Analysis
-    subsections:
-    - name: Model A, the Baseline
-    - name: Model B, the Baseline
-    - name: Model B, Lemmatization, No Poisoned Data
-    - name: Model B, Lemmatization, Poisoned Data
-    - name: The Effects of Data Poisoning and Surprising Robustness
-  - name: Conclusion, Discussions, and Future Directions
----
-## Introduction and Motivation
-
-Large Language Models (LLMs), such as Bard and OpenAI’s GPT-4 are typically used to obtain data embeddings of text. These embeddings are quite rich, encoding common-sense semantic information. A good embedding naturally aligns with our intuitive human understanding of language: at a high level, similar text/words are clustered together, while dissimilar text/words are farther apart. 
-
-High-quality embeddings also satisfy semantic equations that represent simple analogies. Define $$(\text{some_text})$$ to be the embedding of some string “some\_text.” Then, a traditionally good embedding will typically obey linguistic equations like <d-cite key="Vylomova2016"></d-cite>:
-
-
-$$(\text{King}) - (\text{Man}) + (\text{Woman}) ≈ (\text{Queen})$$
-
-$$(\text{Paris}) - (\text{France}) = (\text{Warsaw}) - (\text{Poland})$$
-
-
-However, repeatedly querying LLMs for large-scale analysis is expensive. Many utilize thousands of cloud GPUs and are constantly fine-tuned, adding to their cost. This cost barrier discourages researchers—especially those with less funding—from making use of these embeddings for their own models. Repeated strain on LLM's infrastructure can even cause a negative environmental impact. However, we often don’t need embeddings as good as these fancy ones to conduct certain types of research. Specifically, it would be desirable for a researcher to choose their embedding quality, with the understanding that higher-quality embeddings take longer, and vice versa. Such a model should be robust and resistant to being trained on a small amount of incorrect data (which can happen by accident when scraping tex, or due to malicious behavior.)
-
-**These issues motivate the following research question: on how little data can we train a text embedding model—with OpenAI embedding as ground truth—such that our embeddings are good enough quality? And can we quickly preprocess the data to improve our results?**
-
-## Background and Literature Review
-
-While there is some existing literature on generating word embeddings more “cheaply,” significant differences exist with current methodologies. Broadly, this process is called knowledge distillation (KD), which aims to “distill” knowledge from a larger teacher model (in our case, OpenAI embeddings) into a smaller student model. 
-
-For example, Shin et al. discuss a novel distillation technique that “distills” a “student” embedding model from a “teacher” model <d-cite key="Shin2019"></d-cite>. Importantly, this work focuses on reducing the dimensionality of the “student” word embeddings without compromising accuracy—which is fundamentally different from our goals. While our OpenAI embeddings form the “teacher,” our student model should output embeddings of the same dimensionality. Our dimensionality preservation is useful if we want to directly translate general patterns or trends from our student’s embeddings to the OpenAI embeddings.
-
-Gao et al. take a different approach. They propose a KD framework for contrastive sentence embeddings, DistilCSE. It works by first applying KD on a large amount of unlabeled text before fine-tuning the student model via contrastive learning on limited labeled data <d-cite key="gao2023distilcse"></d-cite>. Contrastive learning in this domain is promising, especially since synonyms and pairs of similar words naturally form positive pairs. However, in our context, a direct application of contrastive learning presents some issues.
-
-For example, suppose we had some Euclidean distance threshold A and B, such that, for any two word embeddings $$c$$ and $$d$$:
-
-If the distance between $$c$$ and $$d$$ is less than A, then define $$c$$ and $$d$$ to be positive pairs for contrastive learning.
-
-If the distance between $$c$$ and $$d$$ is greater than B, then define $$c$$ and $$d$$ to be negative pairs for contrastive learning.
-
-While this process (and others like it) isn’t too resource-intensive, it has a few issues, even if we are able to define proper thresholds A and B. Firstly, it “wastes” pairs of data where the distance is in between A and B. Secondly, information about direction is easy to lose—so while a student would learn to embed similar words closer together and dissimilar ones further apart, the student may be invariant to direction and sensitive only to Euclidean distance in the n-dimensional space. This is not ideal.
-
-Other related state-of-the-art approaches also present issues. Gao et al. describe another approach involving running data through an encoder multiple times with standard dropout to generate positive pairs instead of searching for them in the data itself <d-cite key="Gao2022"></d-cite>. While this approach is promising, the approach involves repeatedly using somewhat slow pre-trained text embedders, which is exactly what we want to avoid. 
-
-To reduce computational complexity while still reaping the benefits of preprocessing, we look to a paper by Rahimi et al. They explain how removing stop words (common words, like “a,” “the,” etc.) and punctuation improves sentence embedding quality, for a variety of reasons <d-cite key="Rahimi2023"></d-cite>. Even though we’re focusing on word embeddings—and not sentence embeddings—we can adapt this general approach to our project with lemmatization, which applies morphological analysis to words to simplify them to a base form. For example, "fighting" turns into "fight," "was" turns into "be," and "octopi" turns into octopus, and so on.
-
-
-This reduces data sparsity by consolidating related forms of words into a single representation, which is especially helpful for low-frequency words. This in turn helps the model generalize across tenses and other variations as it can focus on the “core” differences of words rather than auxiliary modifiers. We thus plan to investigate lemmatization in this context.
-
-We struggle to find closely related literature about student models' resistance to poisoned data. Thus, we decided to investigate this aspect as well.
-
-To conclude our literature review, while different variants of KD exist, we decide to focus on a modified response-based KD, in which the teacher model sends final predictions to the student network, which then directly mimics these predictions by minimizing some loss <d-cite key="Yang2023"></d-cite>.
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/response_based_KD.png" class="img-fluid" %}
-<div class="caption">
-    Generic response-based knowledge distillation. Image credits: Gou, J., Yu, B., Maybank, S.J. et al. Knowledge Distillation: A Survey. Int J Comput Vis 129, 1789–1819 (2021). https://doi.org/10.1007/s11263-021-01453-z
-</div>
-
-
-Other distillation approaches—such as feature-based KD, relation-based KD, and the contrastive approach described above—do exist, but require more intimate knowledge of the teacher’s features and/or layers <d-cite key="yang2023attention"> </d-cite> <d-cite key="Park2019"></d-cite>. This is not reasonable information we can expect to have, as companies are often incentivized to obfuscate their specific word embedding architectures.
-
-
-
-## Methods and Experiments
-
-We center our studies on a standard dataset of 10k English words scraped from high-level Standard English texts that's been empirically validated for quality. We also use the OpenAI API to obtain text-embedding-ada-002 embeddings of the entire dataset to use as ground truth. While these aren’t necessarily the best embeddings, even among OpenAI’s own embeddings, they are the best choice given our computational restrictions. 
-
-Now, we detail our model architecture. Our baseline model (call this Model A) is a sequential ReLU and nn.Embedding layer followed by L2 normalization. Model A serves as a crude baseline—therefore, we do not investigate it as deeply as the more complex model that followed due to large differences in performance.
-
-Instead, we focus our efforts on the more complex Model B, detailed below in Figure 1 in the context of our pipeline. Model B utilizes an nn.Embedding layer, followed sequentially by 2 blocks. The first uses ReLU activation followed by a linear layer of size $$\frac{\text{embedding_dim}}{2}$$. The second layer is the same, except the final Linear layer outputs embeddings with the full "embedding_dim." Notably, we use L2 normalization to make sure each embedding vector has magnitude 1 (such that all embeddings exist in an n-hypersphere.) Since all embeddings are unit embeddings, using cosine embedding loss along an Adam optimizer is natural. Thus, instead of computing cosine similarities between teacher and student vectors, we can just focus on minimizing this embedding loss.
-
-
-For the training stage, we train our embedding model to map words to vector embeddings on Google Colab with an Nvidia T4 GPU. There may be up to 3 processing steps, as depicted in Figure 1:
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/architecture_diagram.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 1: An overview of Model B’s architecture in context of our experimentation.
-</div>
-
-First, we choose whether or not to lemmatize the entire dataset before proceeding.
-
-Second, the training split. We train our embedding models above on each of the following proportions (call this $$p$$) of the dataset: 0.005, 0.009, 0.016, 0.029, 0.053, 0.095, 0.171, 0.308, 0.555, and 1.00. 
-
-Finally, we choose whether or not to poison 10 percent of the entire word dataset (not the training dataset). When a word is poisoned, the model incorrectly believes that some random unit vector is the ground-truth embedding instead of the actual OpenAI embedding.
-
-For each such model, we train for up to 80 epochs, limited by our computational resources. 
-
-We then evaluate the model’s embeddings against the ground truth with multiple metrics—cosine similarity (via the embedded cosine loss), graphically via distributions of the embedding means, linguistic math, etc.
-
-Taken together, this methodology is comprehensive.
-
-## Results and Analysis
-
-### Model A, the Baseline
-
-First, here is a graph of training up our baseline Model A (Figure 2) and our augmented Model B (Figure 3). The difference in epochs (80 for Model A, and 60 for Model B) training is due to limited resources. This doesn’t matter much, as a clear, near-linear relationship between $$p$$ and training time, which we use to estimate used computational resources. Thus, we consider $$p$$ as inversely proportional to the computational resources used for all our experiments.
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/bad_model_time.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 2
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/good_model_time.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 3.
-</div>
-
-For Model A (with no lemmatization, no data poisoning), we also want to visualize the tradeoffs between the number of epochs trained, the training proportion $$p$$, and the training loss to establish some baseline intuition. To this end, we take inspiration from the game theoretic concept of Pareto efficiency, which aims to find equilibria where no change improves one of these 3 factors without hurting one of the other 2.
-
-We also wanted to visualize the tradeoffs between the number of epochs trained, the training proportion, and the cosine embedding loss, since we are motivated to find the optimal balance of these 3 factors. See Fig. 4.
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/bad_model_pareto.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 4.
-</div>
-
-Unfortunately, Fig. 4 is not particularly enlightening. Training loss decreases as the number of epochs increases and as training proportion $$p$$ increases. There are also no local minima or maxima of interest. Figures 5 and 6  also confirm this with their plots of distributions of embedding means. Specifically, as we tend to move towards the right and bottom of Fig. 6, i.e. we train longer and on more data, we simply seem to approach the true distribution (Fig. 5) without anything of note.
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/bad_model_full_means.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 5.
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/bad_model_all_means.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 6.
-</div>
-
-These results motivate us to look beyond our Model A. Our results from this point focus on Model B because we didn’t want a poorly performing model like Model A to be a true control, it merely served as an intuitive baseline.
-
-### Model B, the Baseline
-
-As in the previous part, we obtain a Pareto-like graph for Model B, without any lemmatization and data poisoning. Firstly, the cosine embedding losses are much lower than before, due to the improved model architecture. More interestingly, after about 10 iterations, the training loss seems to stabilize across all versions of the model, potentially suggesting that training longer may not be worthwhile. 
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 7.
-</div>
-
-Since this is our base model, we don't investigate further.
-
-### Model B, Lemmatization, No Poisoned Data
-
-Now, we look to Model B, with lemmatization, but no poisoned data. The Pareto-like curve for this is telling (Fig. 8), with it looking very similar to the baseline Model B's. As before, this suggests that training for longer may not be worthwhile, and could potentially lead to overfitting. 
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 8.
-</div>
-
-We also have a distribution of the means of embeddings for the whole dataset (Fig. 9) and from each variant of the model at different epochs (Fig. 10). Again, the results don’t say anything surprising: as we train on more data for longer, the distribution approaches that of the training dataset.
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 9.
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 10.
-</div>
-
-To check for overfitting, we will later validate our model on simple linguistic tests, as described in the very beginning. Specifically, we will validate our model's performance on linguistic math against OpenAI's performance.
-
-### Model B, Lemmatization, Poisoned Data
-
-The following is the Pareto-like curve, except now we poison 10 percent of the entire dataset, as described in Methods/Experiments. Curiously, we find a local minima at approximately $$p = 0.1$$ and ~20 epochs, demonstrating that our overall approach of training on a small fraction of the dataset naturally resists moderate-scale adversarial attacks on our ground-truth embeddings. Of course, the addition of poisoned data means that the loss values are on average higher than those in the previous subsection, where there was no poisoned data.
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 11.
-</div>
-
-
-Again, looking at the distribution of the means of embeddings (see below), we see that models that trained on too much of the data are completely ruined. We don’t even need to compare these distributions against the whole-model distribution to see this. This result demonstrates that even a relatively small amount of poisoned data can manipulate a naive embedding model trained on an entire dataset. 
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 12.
-</div>
-
-### The Effects of Data Poisoning and Surprising Robustness
-
-As discussed previously, we want to externally validate our models with both linguistic equations and pairs of synonyms. Essentially, we want to check that our student groups together similar words like the OpenAI teacher. Since our poisoned model performed best with $$p = 0.095,$$ we use this training proportion to compare Model B with lemmatization, but no poisoned data to Model B with lemmatization and poisoned data. 
-
-For clarity's sake, we focus on single a representative example of our validation results in this blog. Specifically, we look into "nonviolent" and "antiaggressive," which intuitively should exist close together in the n-dimensional unit hypersphere. Using dimensionality reduction techniques to visualize this in 2D, we obtain the following:
-
-{% include figure.html path="assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath.png" class="img-fluid" %}
-<div class="caption">
-    Fig. 13.
-</div>
-
-The poisoned model is surprisingly performant, performing decently against both the unpoisoned model and the OpenAI model. These results support our notion that student models that train on as little of the data as possible are somewhat resistant to uniform, random adversarial data poisoning. This empirical result is encouraging, especially since our data poisoning threshold was somewhat high.
-
-## Conclusion, Discussions, and Future Directions
-
-On balance, our results help us answer our question about how to best mimic OpenAI’s word embeddings without excessive API calls. We utilize a spin-off of a response-based KD architecture to train our student model under different conditions, demonstrating both that certain preprocessing (lemmatization) improves our embedding model and that training on smaller amounts of data creates more robust models that resist adversarial data. Our initial results demonstrate promise and serve as a call to action for others to research other cheap, robust word embedding models.
-
-To be clear, there are certainly many limitations to our study. For one, we keep our modeling architecture simpler due to our limited compute, while a real model would certainly use a different architecture altogether. Our dataset was also on the smaller side and doesn’t fully represent the English language. Also, our implicit use of time as a proxy for computation (especially on the erratic Google Colab) is imperfect. Also, preprocessing (including, but not limited to, lemmatization) may require substantial computational resources in some cases, which we don't account for.
-
-Additionally, many of the constants that we chose (such as the 10 percent data poisoning threshold, the proportions of data we trained on, etc.) are arbitrarily chosen due to limited compute. This could’ve caused unexpected issues. For example, the output dimension of embedding Model B, 1536, is more than 10 percent the size of the dataset (10k). Thus, due to our relative lack of data, our trials with data poisoning can encourage non-generalizable memorization, which is not ideal. 
-
-Future directions would include exploring other types of preprocessing, as hinted at in our literature review. We could also look into different types of adversaries—perhaps smarter ones that actively feed information that they know to be detrimental to the model, instead of some random unit vector. While we didn't have robust supercomputer access, we’d also love to be able to test out fancier embedding architectures.  
-
-Finally, we'd like to thank the 6.S898 faculty and TAs for their support!
diff --git a/_posts/2023-11-08-spatiotemporal.md b/_posts/2023-11-08-spatiotemporal.md
deleted file mode 100644
index b0792f28..00000000
--- a/_posts/2023-11-08-spatiotemporal.md
+++ /dev/null
@@ -1,195 +0,0 @@
----
-layout: distill
-title: Embeddings for Spatio-temporal Forecasting
-description: An analysis of various embeddings methods for spatio-temporal forecasting.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Samuel Lee
-    # url: "https://en.wikipedia.org/wiki/Boris_Podolsky"
-    affiliations:
-      name: MIT
-  - name: Joshua Sohn
-    # url: "https://en.wikipedia.org/wiki/Albert_Einstein"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-spatiotemporal.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related Work
-  - name: Dataset
-  - name: Methodology
-  - name: Results
-  - name: Conclusion, Discussion, Next Steps 
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-Time series forecasting is an interdisciplinary field that affects various domains, including finance and healthcare, where autoregressive modeling is used for informed decision-making. While many forecasting techniques focus solely on the temporal or spatial relationships within the input data, we have found that few use both. Our goal was to compare two SOTA spatiotemporal models, the STAEformer and the Spacetimeformer, and determine why one works better than the other. The papers on both models did not feature each other in their benchmark evaluations, and we thought that analyzing their embeddings and identifying their failure modes could offer new insights on what exactly the models are learning from the dataset. We hypothesized that the Spacetimeformer would perform better as its proposed approach, sequence flattening with Transformer-based processing, seems to offer a more flexible and dynamic representation of spatiotemporal relationships that doesn’t depend on predefined variable graphs. We focused on forecasting in the field of traffic congestion, which is a pervasive challenge in urban areas. 
-
-## Related Work
-
-We focused on two SOTA spatiotemporal models that were evaluated on traffic forecasting datasets.
-The first is the STAEformer <d-cite key="liu2023staeformer"></d-cite>. STAEformer proposes a novel adaptive embedding that learns the spatio-temporal relations in the dataset. In their architecture, the input embedding is generated by combining the projected raw data (denoted by $$E_p$$ in the embedding layer) with temporal embeddings ($$E_f$$) and the adaptive embeddings ($$E_a$$), which was used instead of an embedding solely focused on capturing spatial relations. This output is then fed into temporal and spatial transformer layers, followed by a regression layer. 
-
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/staeformer_architecture.png" class="img-fluid" %}
-<div class="caption">
-    Architecture of the Spatio-Temporal Adaptive Embedding transformer (STAEformer). <d-cite key="liu2023staeformer"></d-cite>
-</div>
-
-The second is the Spacetimeformer <d-cite key="grigsby2023spacetimeformer"></d-cite>. Spacetimeformer uses embeddings generated from breaking down standard embeddings into elongated spatiotemporal sequences. The idea behind doing this is to enable the downstream tasks to learn direct relationships between variables at every timestep. In their architecture, these embeddings are fed into a variant of the transformer model using local, global, and cross self-attention. The figure below shows an intuitive visualization for this idea.
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture.png" class="img-fluid"%}
-<div class="caption">
-    Architecture of the Spacetimeformer Embedding. <d-cite key="grigsby2023spacetimeformer"></d-cite>
-</div>
-
-## Dataset
-We used the PEMS08 dataset <d-cite key="pems08"></d-cite>, which contains traffic data in San Bernardino from July to August of 2016. Each data point consists of readings from 170 detectors along with the time of day and day of the week they were recorded. We initially considered using the PEMSBAY dataset <d-cite key="pemsbay"></d-cite>, which is widely used in traffic speed forecasting, but it was almost double the size of the PEMS08 dataset and took too long to train our model on.
-
-## Methodology
-The problem statement is as follows: given the sensor readings across the 170 sensors for the previous N timesteps, we want to predict their readings for the next N timesteps. We tested the model with varying context lengths, but we found that the default value of 12 given in the STAEformer paper provided enough information to the model. We used huber loss as we wanted the model to converge faster in the presence of outliers, which was necessary given the limited compute that we had (training 50 epochs took around 3 hours).
-
-We trained STAEformer for 50 epochs, which was sufficient to achieve performance metrics similar to that of the paper. To compare the embeddings from Spacetimeformer, we retrained the model end to end after replacing the embedding layer in the model with Spacetimeformer’s embedding layer. To do this, we kept the context dimensions the same and flattened the input sequence along the input dimension and the dimension corresponding to the number of sensors. This structured the embedding layer so that it could learn the spatiotemporal relations across the sensors from different time frames. 
-
-Replacing the embedding layer within the STAEformer with a pretrained embedding layer from the Spacetimeformer instead may seem like a more legitimate method to test the effectiveness of the embeddings, as we would basically be doing transfer learning on the embedding layer. However, the pretrained embeddings from Spacetimeformer might have been optimized to capture specific spatiotemporal patterns unique to its architecture, which was why we believe training the model end to end with the Spacetimeformer embeddings would result in a more accurate and contextually relevant integration of the pretrained embeddings into the STAEformer framework. 
-
-After training, we wanted to provide visualizations of the embeddings from STAEformer and Spacetimeformer to show whether the learned embeddings are meaningful at all. To do this, we obtained the embeddings by passing in the raw data through the embedding layers of the loaded models and generated t-SNE plots with them. For the STAEformer, we focused solely on the adaptive embeddings as they were the parts of the embedding layer that captured spatiotemporal relations in the data. To generate the t-SNE plots, we had to reshape the embeddings so that they could be passed into the function so we flattened them across the model dimension. After fitting the t-SNE, we then unflattened the embeddings back to their original shape and plotted them. Each sensor was color coded with different colors, and the results can be shown in the next section. We hypothesized that the t-SNE plots would contain clusters grouped by either the sensors or the time the readings were recorded.
-
-After generating the t-SNE plots, we wanted to test the effects of perturbing the raw data on the embeddings. We wanted to know how the embeddings would change. For example, regardless of what the clusters represent, are they tighter? Will additional clusters be formed? Conversely, will some of the existing clusters break apart? In particular, we were hoping that augmenting the data would perhaps improve cluster formations in the worse looking embeddings, as there is a good possibility that the data itself isn’t good enough.
-
-
-## Results
-The table below shows the results after training STAEformer and the STAEformer model with a Spacetimeformer embedding layer for 50 epochs each.
-Table of loss values:
-
-| Embedding Layer                      | Train Loss | Validation Loss |
-| :----------------------------------: |:----------:| :--------------:|
-| STAEformer                           | 12.21681   | 13.22100        |
-| Spacetimeformer                      | 12.42218   | 16.85528        |
-
-We can see that the STAEformer had better training and validation loss than the Spacetimeformer. While the train loss converged to similar values, the validation loss for the model using the STAEformer embedding layer was much better.
-So now that we know the STAEformer embedding layer seems to perform better than the Spacetimeformer embedding layer, we plotted the embeddings for both to analyze why this is the case. To do this, we passed a data point from the validation set through the embedding layer. The results are shown in the figure below.
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/STAEformer.png" class="img-fluid"%}
-<div class="caption">
-    t-SNE plot of the STAEformer embeddings.
-</div>
-
-The t-SNE plot for the STAEformer embeddings shows clearly separate clusters for most of the 170 different sensors. The shape of each cluster is a “snake-like” trajectory. Therefore, we know that the embeddings preserve some pattern-like notion across readings from a single sensor. We hypothesize that each of these trajectories represent the reading of a single sensor over time. There are a couple outliers, where the clusters are not grouped by color. One prominent example is the string of cyan, maroon, and moss points along the bottom of the plot. However, even these points have some clustering, though they may not be clustered by color. 
-
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/Spacetimeformer.png" class="img-fluid"%}
-<div class="caption">
-    t-SNE plot of the Spacetimeformer embeddings.
-</div>
-
-On the other hand, the t-SNE plot for the Spacetimeformer embeddings show no clear clusters across the same sensor. The distribution resembles a normal distribution, meaning that there is little pattern preserved in the embeddings. It becomes more difficult to differentiate between data points from the same sensor across time. 
-
-In order to further analyze the effectiveness of each embedding layer, we perturbed the training data and re-trained each model. We were expecting the clusters from the STAEformer embeddings to remain largely the same, with some of the existing clusters possibly breaking apart due to the added noise. However, we were hoping that the Spacetimeformer embeddings would show more visible clusters after the raw data was perturbed. Given the characteristics of the embeddings, one possible output we expected were clusters containing multiple colors. An example is shown in the following image. 
-
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/ideal_tsne.png" class="img-fluid"%}
-<div class="caption">
-    Ideal t-SNE plot of the Spacetimer embeddings.
-</div>
-
-This would show that the Spacetimeformer successfully learned spatial relationships across the sensors at variable timesteps. Instead of each cluster representing the embeddings for one sensor, the presence of larger clusters with multiple colors could imply that the Spacetimeformer learned spatiotemporal relations among the corresponding sensors and embedded them into a larger cluster.
-
-The following table shows the results after training the model with the perturbed data.
-
-| Embedding Layer                      | Train Loss | Validation Loss |
-| :----------------------------------: |:----------:| :--------------:|
-| STAEformer (with perturbations)      | 13.58251   | 13.35917        |
-| Spacetimeformer (with perturbations) | 13.42251   | 17.01614        |
-
-As expected, validation loss slightly increased for both models, and the STAEformer continued to have lower loss values than the model with the Spacetimeformer embedding layer.
-
-When we generated the t-SNEplots with the new embeddings, we obtained the following:
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed.png" class="img-fluid"%}
-<div class="caption">
-    t-SNE plot of the perturbed STAEformer embeddings.
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed.png" class="img-fluid"%}
-<div class="caption">
-    t-SNE plot of the perturbed Spacetimeformer embeddings.
-</div>
-
-Both t-SNE plots for the STAEformer and Spacetimeformer embeddings look the same as when the models were trained on the original, unperturbed data. So unfortunately, the augmentation had little to no effect on the embedding layers for these two models.
-
-Since the t-SNE plots can be hard to parse with the human eye, we decided to focus on the embeddings for the most relevant features of the dataset and see how they compared between the Spacetimeformer and STAEformer. In parallel, this would enable us to identify the failure modes of the dataset and augment those features to see if they improve the model performance. In order to do this, we used PCA to identify the principal components. From there, we found which features help explain the most variance in the dataset and identified those as the features that had the largest impact on the learned embeddings.
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/pca_heatmap.png" class="img-fluid"%}
-<div class="caption">
-    Z-normalized correlation matrix between the original PEMS08 dataset and PC-space, normalized by explained variance. <d-cite key="pca"></d-cite>
-</div>
-
-This heatmap shows the top 10 principal components and and the top 10 features that correlate with each principal component. From this heatmap, we can see that the 9th sensor in the dataset is the most relevant feature. Therefore, we can find the corresponding embedding to be the most relevant. 
-
-Using only the 5 most relevant embeddings obtained from PCA, we re-graphed the t-SNE plots. This helped us to narrow our attention to the most important embeddings.
-
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/STAEformer_top5.png" class="img-fluid"%}
-<div class="caption">
-    t-SNE plot of the top 5 STAEformer embeddings.
-</div>
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5.png" class="img-fluid"%}
-<div class="caption">
-    t-SNE plot of the top 5 Spacetimeformer embeddings.
-</div>
-
-As expected, the embeddings for the most relevant sensors in the STAEformer all maintain the “snake-like” trajectory. However, the embeddings for even the most relevant sensors in the Spacetimeformer are seemingly random, and have no pattern across the points. 
-
-
-{% include figure.html path="assets/img/2023-11-08-spatiotemporal/pca_cumulative.png" class="img-fluid"%}
-<div class="caption">
-    Cumulative PCA plot of the original PEMS08 dataset. <d-cite key="pca"></d-cite>
-</div>
-
-We found that the top 25 sensors explained 95% of the variance in the dataset, so we did a quick experiment where we augmented the rest of the 145 sensors (as opposed to the entire training dataset) to see how that affected the learned embeddings. For this augmentation, we expected the results to not improve by much since the learned embeddings for even the most relevant sensors in Spacetimeformer didn’t form visible clusters in the t-SNE plots. As expected, the results were almost identical to the ones generated from augmenting the entire dataset.
-
-## Conclusion, Discussion, Next Steps 
-
-There are a couple of reasons why we think the Spacetimeformer performed worse than the STAEformer overall. The first explanation that came to mind is that the readings across different sensors may be mostly independent from one another. The color coded t-SNE plots for the STAEformer clearly separate each sensor into its individual cluster. In this case, the Spacetimeformer would not be suited for the task as its embedding layer solely focuses on learning spatiotemporal relationships, while the STAEformer also contains an embedding layer that is solely dedicated to learning temporal relationships. 
-
-A second, more plausible explanation deals with the embedding architecture. The difference in performance between the STAEformer and the Spacetimeformer in time series forecasting shows the importance of adaptive embeddings in capturing spatio-temporal relationships. While the STAEformer introduces adaptive embeddings to comprehend the patterns in the data, the Spacetimeformer relies on breaking down standard embeddings into elongated spatiotemporal sequences. The t-SNE plots show that the STAEformer's adaptive embeddings generate clusters representing sensors with snake-like trajectories, providing a visualization of the model's ability to capture spatio-temporal patterns. In contrast, the Spacetimeformer's embeddings follow a scattered distribution, indicating challenges in identifying clusters. This suggests that the Spacetimeformer's approach may face limitations in effectively learning the spatio-temporal relationships within the PEMS08 dataset, and potentially traffic data in general.
-
-Having said all this, the resilience of both the STAEformer and Spacetimeformer to perturbations in the raw data showcases the robustness of their learned representations. Despite the added augmentations, the fact that the t-SNE plots remain largely unchanged indicates the stability in the embedding layers. This may be attributed to the models’ ability to learn a generalizable representation of the spatio-temporal patterns resilient to changes in the input data, regardless of how accurate they may be. This may also be attributed due to the dataset itself. The PEMS08 dataset’s readings may already have been noisy, as it’s unlikely that the readings were recorded with perfect accuracy. We would like to explore these implications of the embeddings’ robustness in our future work. 
-
-Another possible avenue we would like to explore is why certain sensors (such as the 9th sensor) are more relevant than others beyond just the theory. We came up with a couple hypotheses. First, it’s probable that this particular sensor is placed at important intersections, such that cars that pass this sensor are guaranteed to pass many other sensors. This would mean that there exists a way to extrapolate the readings from this sensor to the readings from other sensors. Tangentially related, it’s possible that two nodes are negatively correlated, such that the cars that pass through one node tend to not pass through another node, and the model extracts readings based on this relationship. If neither of these ideas is the case, the exact opposite concept could be true: the sensor is at a location where the speed data is very consistent, such as a highway. This allows the readings from the sensor to give very accurate predictions. The next step would be to figure out the geographical locations of the sensors and determine whether the ones we found to be the most relevant seem to be placed at important locations.
-
-We would also like to do some more experimentation in the future. We used a personal GPU for training (an RTX 2070), and it took a few hours to train the model for every one of our experiments which made it difficult to tune our hyperparameters. Further experiments we would like to run with more compute include running the experiments on the Spacetimeformer model architecture instead of the STAEformer architecture and replacing its embedding layer with STAEformer’s. We mentioned before that the learned embeddings may have been optimized for the model architecture it’s from. Therefore, if the resulting plots from the embeddings look similar to the ones we have generated, then we have conclusive evidence that the STAEformer input embedding does a better job of learning the spatio-temporal relations in the data.
-
diff --git a/_posts/2023-11-08-suscep.md b/_posts/2023-11-08-suscep.md
deleted file mode 100644
index 00f9fb53..00000000
--- a/_posts/2023-11-08-suscep.md
+++ /dev/null
@@ -1,265 +0,0 @@
----
-layout: distill
-title: From Scroll to Misbelief - Modeling the Unobservable Susceptibility to Misinformation on Social Media
-description: 
-date: 2023-11-08
-
-authors:
-  - name: Yanchen Liu
-    url: https://liuyanchen1015.github.io/
-    affiliations:
-      name: Harvard, Cambridge, MA
-
-bibliography: 2023-11-08-suscep.bib
-
-# Optionally, you can add a table of contents to your post.
-# NOTES:
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-#   - we may want to automate TOC generation in the future using
-#     jekyll-toc plugin (https://github.com/toshimaru/jekyll-toc).
-toc:
-  - name: Abstract
-  - name: Introduction
-  - name: Computational Susceptibility Modeling
-    subsections:
-     - name: Modeling Unobservable Susceptibility
-     - name: Training with Supervision from Observable Behavior
-  - name: Dataset and Experiment Setup
-  - name: Evaluation
-  - name: Analysis
-  - name: Related Work
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
-
----
-
-## Abstract
-
-Susceptibility to misinformation describes the extent to believe false claims, which is hidden in people's mental process and infeasible to observe. Existing susceptibility studies heavily rely on the crowdsourced self-reported belief level, making the downstream research homogeneous and unscalable. To relieve these limitations, we propose a computational model that infers users' susceptibility levels given their reposting behaviors. We utilize the supervision from the observable sharing behavior, incorporating a user's susceptibility level as a key input for the reposting prediction task. Utilizing the capability of large-scale susceptibility labeling, we could also perform a comprehensive analysis of psychological factors and susceptibility levels across professional and geographical communities. Hopefully, we could observe that susceptibility is influenced by complicated factors, demonstrating a degree of correlation with economic development around the world, and with political leanings in the U.S.
-
-***
-
-## Introduction
-
-<div class="col-sm mt-3 mt-md-0">
-    <figure>
-        {% include figure.html path="assets/img/2023-11-08-suscep/suscep_model.png" class="img-fluid rounded z-depth-1" zoomable=true %}
-        <figcaption><strong>Illustration of the Susceptibility Modeling.</strong> We formulate the model to predict whether a given user will retweet a specific misinformation tweet. We utilize a shallow neural network to predict the susceptibility score, and together with the dot product of the user and tweet embeddings to predict retweet behavior. Our model is trained using two loss functions: binary classification entropy and triplet loss.</figcaption>
-    </figure>
-</div>
-
-False claims spread on social media platforms, such as conspiracy theories, fake news, and unreliable health information, mislead people's judgment, promote societal polarization, and decrease protective behavior intentions<d-cite key="pennycook2021psychology"></d-cite><d-cite key="Thier2021HealthMisinformation"></d-cite>. The harm is especially significant in various contentious events including elections, religious persecution, and the global response to the COVID-19 pandemic<d-cite key="Ecker2022PsychologicalDriversMisinformation"></d-cite>. 
-Many works have investigated the **observable** behavior of information propagation such as where the information propagates<d-cite key="Taylor2023WhereDoesYour"></d-cite>, how people share it<d-cite key="Yang2021COVID19InfodemicTwitter"></d-cite>, and what people discuss about it<d-cite key="Gupta2023PolarisedSocialMedia"></d-cite>. However, it is still crucial but challenging to understand the **unobservable** mental and cognitive processes  when individuals believe misinformation<d-cite key="Ecker2022PsychologicalDriversMisinformation"></d-cite>. Users' susceptibility (i.e., the likelihood of individuals believing misinformation) plays a pivotal role in this context. If a person is more susceptible to misinformation, they are not only more likely to share false claims but also more prone to being misled by them<d-cite key="Scherer2020WhoSusceptibleOnline"></d-cite>.
-
-Existing works have investigated the psychological, demographic, and other factors that may contribute to the high susceptibility of a population<d-cite key="Brashier2020AgingEraFake"></d-cite><d-cite key="Pennycook2020WhoFallsFake"></d-cite>.
-However, previous susceptibility studies heavily rely on self-reported belief towards false claims collected from questionnaire-based participant survey<d-cite key="Escola-Gascon2021CriticalThinkingPredicts"></d-cite><d-cite key="Rosenzweig2021HappinessSurpriseAre"></d-cite>, which presents several limitations. For instance, different participants might interpret the belief levels in different ways. Moreover, the data collection process is labor-heavy and thus limits the scale of downstream research on size, scope, and diversity of the target population<d-cite key="nan2022people"></d-cite>.
-
-The unobservance of people's beliefs makes it infeasible to model susceptibility directly. Luckily, existing psychological literature bridges unobservable beliefs and observable behaviors, showing that the sharing behavior is largely influenced by whether users believe the misinformation<d-cite key="Altay2022WhyFewPeople"></d-cite><d-cite key="Atske2019ManyAmericansSay"></d-cite>, the attributes of the sharing content<d-cite key="pennycook2021psychology"></d-cite><d-cite key="van2022misinformation"></d-cite>, and users' internal mental motives<d-cite key="Brady2020MADModelMoral"></d-cite><d-cite key="Islam2020MisinformationSharingSocial"></d-cite>. Motivated by these prior works, we formulate the relationship between believing and sharing described in social science literature into a machine learning task.
-
-Concretely, we propose to infer people's susceptibility level given their re/posting behaviors. To parameterize the model, we wrap the susceptibility level as input for the prediction model of the observable reposting behavior. We perform multi-task learning to simultaneously learn to classify whether a user would share a post, and rank susceptibility scores among similar and dissimilar users when the same content is seen. Note that our model does not aim to predict any ground-truth susceptibility for individuals. Instead, we use users' reposting behaviors towards misinformation as a proxy for their susceptibility level for better interpretability. Our model design enables unobservable modeling with supervision signals for observable behavior, unlocks the scales of misinformation-related studies, and provides a novel perspective to reveal the users' belief patterns.
-
-We conduct comprehensive evaluations to validate the proposed susceptibility measurement and find that the estimations from our model are highly aligned with human judgment. Building upon such large-scale susceptibility labeling, we further conduct a set analysis of how different social factors relate to susceptibility. We find that political leanings and psychological factors are associated with susceptibility in varying degrees. Moreover, our analysis based on these inferred susceptibility scores corroborates the findings of previous studies based on self-reported beliefs, e.g., stronger analytical thinking is an indicator of low susceptibility. The results of our analysis extend findings in existing literature in a significant way. For example, we demonstrate that susceptibility distribution in the U.S. exhibits a certain degree of correlation with political leanings.
-
-To sum up, our contributions are:
-1. We propose a computational model to infer people's susceptibility towards misinformation in the context of COVID-19, by modeling unobservable latent susceptibility through observable sharing activities.
-2. Evaluation shows that our model effectively models unobservable belief, and the predictions highly correlate with human judgment.
-3. We conduct a large-scale analysis to uncover the underlying factors contributing to susceptibility across a diverse user population from various professional fields and geographical regions, presenting important implications for related social science studies.
-
-***
-
-
-## Computational Susceptibility Modeling
-### Modeling Unobservable Susceptibility
-
-Inspired by the existing studies indicating that believing is an essential driver for dissemination, we propose to model susceptibility, which reflects users' beliefs, as a driver for the sharing behavior, while considering characteristics of the sharing content and user profile.
-
-We propose a computational model to infer a user's unobservable susceptibility score based on their historical activities as shown in the model figure, and further train the model with signals from the observable disseminating behavior. We construct approximate contrastive user-post pairs as the training data ([Dataset and Experiment Setup](#dataset-and-experiment-setup)).
-
-This design would allow us to explore the best parameters for the computational model of an unobservable and data-hungry susceptibility variable using the rich data resources available on social media platforms.
-
-#### Content-Sensitive Susceptibility
-We compute the user's susceptibility when a particular piece of misinformation $p$ is perceived (i.e. $s_{u, p}$). This allows us to account for the fact that an individual's susceptibility can vary across different content, influenced by factors such as topics and linguistic styles. By focusing on the susceptibility to specific pieces of misinformation, we aim to create a more nuanced, fine-grained, and accurate representation of how users interact with and react to different COVID-19 misinformation.
-
-#### User and Misinfo Post Embeddings
-As a component of the computational model, we use SBERT developed upon RoBERTa-large to produce a fixed-sized vector to represent the semantic information contained in the posts and user profiles. We consider the misinformation post as a sentence and produce its representation with SBERT. For the user profile, we calculate the average of sentence representations for the user's recent original posts. More specifically, for every user-post pair $(u, p)$, we gather the historical posts written by user $u$ within a 10-day window preceding the creation time of the misinformation post $p$, to learn a representation of user $u$ at that specific time.
-
-#### Computational Model for Susceptibility
-Given the input of the user profile for the user $u$ and the content for misinformation post $p$, the susceptibility computational model is expected to produce the _susceptibility score_ $s_{u, p}$ as shown below, reflecting the susceptibility of $u$ when $p$ is perceived.
-
-$$
-    s_{u, p} = suscep(E(u), E(p))
-$$
-
-We first obtain the embeddings $E(p)$ and $E(u)$ for post $p$ and user profile $u$, where $u$ is represented by the user's historical tweets and $E$ is the frozen SBERT sentence embedding function. The susceptibility score is calculated by the function $suscep$, which is implemented as a multi-layer neural network, taking the concatenation of the user and post embeddings as inputs. In the training phase, we keep the sentence embedder frozen and learn the weights for the $suscep$ function that could be used to produce reasonable susceptibility scores. We expect to produce susceptibility scores for novel $u$ and $p$ pairs using the learned $suscep$ function during inference. Additionally, we normalize the resulting susceptibility scores into the -100 to 100 range for better interpretability.
-
-### Training with Supervision from Observable Behavior
-Susceptibility is not easily observable, thus it is infeasible to apply supervision on $s_{u, p}$ directly as only the user $u$ themselves know their belief towards content $p$. Thus, we propose to utilize the supervision signal for sharing a piece of misinformation, which is an observable behavior. We consider susceptibility as an essential factor of sharing behavior and use the susceptibility computational model's output to predict the repost behavior.
-
-
-To produce the probability for user $u$ to share post $p$, we calculate the dot product of the embeddings of the user profile and post content and consider the susceptibility score for the same pair of $u$ and $p$ as a weight factor, and passing the result through a sigmoid function, as illustrated in the model figure.
-
-$$
-    p_{\text{rp}} = \sigma \left( E(u) \cdot E(p) \cdot s_{u, p} \right)
-$$
-
-Note that we do not directly employ the \textit{susceptibility score} to compute the probability of sharing because the sharing behavior depends not only on the susceptibility level but also on other potential confounding factors. It is possible that a user possesses a notably high susceptibility score for a piece of misinformation yet chooses not to repost it. Hence, we incorporate a dot product of the user and post embedding in our model \dkvtwo{involve the misinformation post content and user profiles into the consideration of predicting the sharing behavior}.
-
-$$
-\begin{align}
-\mathcal{L}_{\text{bce}}(u_i, t) &= -\left( y_i \log(p_{\text{rt}}(u_i, t)) + (1 - y_i) \log(1 - p_{\text{rt}}(u_i, t)) \right) \nonumber \\ 
-\mathcal{L}_{\text{triplet}}(u_a, u_s, u_{ds}, t) &= \text{ReLU}\left(\Vert s_{u_{a},t} - s_{u_{s},t}\Vert_2^2 - \Vert s_{u_{a},t} - s_{u_{ds},t} \Vert_2^2 + \alpha \right) \nonumber \\
-\mathcal{L}(u_a, u_s, u_{ds}, t) &= \frac{\lambda}{3} \sum_{i \in \{a, s, ds\}} \mathcal{L}_{\text{bce}}(u_i, t) + (1 - \lambda) \mathcal{L}_{\text{triplet}}(u_a, u_s, u_{ds}, t) \nonumber \label{eq:loss}
-\end{align}
-$$
-#### Objectives
-We perform multi-task learning to utilize different supervision signals. We first consider a binary classification task of predicting repost or not with a cross-entropy loss. Additionally, we perform the triplet ranking task<d-cite key="chen2009ranking"></d-cite><d-cite key="hoffer2015deep"></d-cite> to distinguish the subtle differences among the susceptibility scores of multiple users when the same false content is present.
-
-During each forward pass, our model is provided with three user-post pairs: the anchor pair $(u_a, p)$, the similar pair $(u_s, p)$, and the dissimilar pair $(u_{ds}, p)$.
-We determine the similar user $u_s$ as the user who reposted $p$ if and only if user $u_a$ reposted $p$. Conversely, the dissimilar user $u_{ds}$ is determined by reversing this relationship. 
-When multiple potential candidate users exist for either $u_s$ or $u_{ds}$, we randomly select one.
-However, if there are no suitable candidate users available, we randomly sample one user from the positive (for "reposted" cases) or negative examples (for "did not repost" cases) and pair this randomly chosen user with this misinformation post $p$.
-
-Here, we elaborate on the definition of our loss function. Here, $y_i$ takes the value of 1 if and only if user $u_i$ reposted misinformation post $p$. The parameter $\alpha$ corresponds to the margin employed in the triplet loss, serving as a hyperparameter that determines the minimum distance difference needed between the anchor and the similar or dissimilar sample 
-for the loss to equal zero. Additionally, we introduce the control hyperparameter $\lambda$, which governs the weighting of the binary cross-entropy and triplet loss components.
-
-
-## Dataset and Experiment Setup
-We use Twitter data because it hosts an extensive and diverse collection of users, the accessibility of its data, and its popularity for computational social science research<d-cite key="zhang2015ideology"></d-cite>.
-Additionally, it provides users' free-text personal and emotional expression with crucial metadata, including timestamps and location data, which are useful for our subsequent analytical endeavors.
-
-#### Misinformation Tweets
-We consider two misinformation tweet datasets: the ANTi-Vax dataset <d-cite key="hayawi2022anti"></d-cite> was collected and annotated specifically for COVID-19 vaccine misinformation tweets. On the other hand, CoAID <d-cite key="cui2020coaid"></d-cite> encompasses a broader range of misinformation related to COVID-19 healthcare, including fake news on websites and social platforms. The former dataset contains 3,775 instances of misinformation tweets, while the latter contains 10,443.
-
-However, a substantial number of tweets within these two datasets do not have any retweets. Consequently, we choose to retain only those misinformation tweets that have been retweeted by valid users.
-Finally, we have collected a total of 1,271 misinformation tweets for our study.
-
-#### Positive Examples
-We define the positive examples for modeling as $(u_{pos}, t)$ pairs, where user $u_{pos}$ viewed and retweeted the misinformation tweet $t$. We obtained all retweeters for each misinformation tweet through the Twitter API.
-
-#### Negative Examples
-Regarding negative examples, we define them as $(u_{neg}, t)$ pairs where user $u_{neg}$ viewed but did not retweet misinformation post $t$. However, obtaining these negative examples poses a substantial challenge, because the Twitter API does not provide information on the "being viewed" activities of a specific tweet.
-To tackle this issue, we infer potential users $u_{neg}$ that highly likely viewed a given tweet $t$ following the heuristics: 1) $u_{neg}$ should be a follower of the author of the misinformation tweet $t$, 2) $u_{neg}$ should not retweet $t$, and 3) $u_{neg}$ was active on Twitter within 10 days before and 2 days after the timestamp of $t$.
-
-We have collected a total of 3,811 positive examples and 3,847 negative examples, resulting in a dataset comprising 7,658 user-post pairs in total. 
-We divide the dataset into three subsets with an 80% - 10% - 10% split for train, validation, and test purposes, respectively. The detailed statistics of the collected data are illustrated in the table below.
-
-|                | Total | Positive | Negative |
-|----------------|-------|----------|----------|
-| # Example      | 7658  | 3811     | 3847     |
-| # User         | 6908  | 3669     | 3255     |
-| # Misinfo tweet| 1271  | 787      | 1028     |
-
-## Evaluation
-In this section, we demonstrate the effectiveness of our susceptibility modeling by directly comparing our estimations with human judgment and indirectly evaluating its performance for predicting sharing behavior.
-
-### Validation with Human Judgement
-
-Due to the abstract nature of susceptibility and the lack of concrete ground truth, we face challenges in directly evaluating our susceptibility modeling. We use human evaluations to validate the effectiveness of our inferred susceptibility. Given the subjectivity inherent in the concept of susceptibility, and to mitigate potential issues arising from variations in individual evaluation scales, we opt not to request humans to annotate a user's susceptibility directly. Instead, we structure the human evaluation as presenting human evaluators with pairs of users along with their historical tweets and requesting them to determine which user appears more susceptible to overall COVID-19 misinformation.
-
-Subsequently, we compared the predictions made by our model with the human-annotated predictions. To obtain predictions from our model, we compute each user's susceptibility to overall COVID-19 misinformation by averaging their susceptibility scores to each COVID-19 misinformation tweet in our dataset.
-As presented in the table below, our model achieves an average agreement of 73.06% with human predictions, indicating a solid alignment with the annotations provided by human evaluators. Additionally, we consider a baseline that directly calculates susceptibility scores as the cosine similarity between the user and misinformation tweet embeddings. Compared to this baseline, our susceptibility modeling brings a 10.06% improvement. 
-Moreover, we compare the performance with ChatGPT prompting with the task description of the susceptibility level comparison setting as instruction in a zero-shot manner. We observe that our model also outperforms predictions made by ChatGPT.
-The results from the human judgment validate the effectiveness of our susceptibility modeling and its capability to reliably assess user susceptibility to COVID-19 misinformation.   
-
-|                | Our               | Baseline         | ChatGPT          |
-| -------------- | ----------------- | ---------------- | ---------------- |
-| Agreement      | 73.06±8.19        | 63.00±9.07       | 64.85±9.02       |
-
-
-### Susceptibility Score Distribution
-
-We provide a visualization of the distribution of susceptibility scores within positive and negative examples produced by our model on the training data. 
-As depicted below, there is a notable disparity in the distribution between positive and negative examples, verifying our assumption that believing is an essential driver for sharing behavior. The difference in the means of the positive and negative groups is statistically significant, with a p-value of less than 0.001.
-
-<div class="col-sm mt-3 mt-md-0">
-    <figure>
-        {% include figure.html path="assets/img/2023-11-08-suscep/pos_neg_distribution.png" class="img-fluid rounded z-depth-1" zoomable=true %}
-        <figcaption><strong>Susceptibility Score Distribution</strong> among positive and negative user-tweet samples. The distribution of positive (red) and negative (blue) examples exhibits a clear disparity.</figcaption>
-    </figure>
-</div>
-
-### Sharing Behavior Prediction
-Furthermore, holding a belief is highly likely to result in subsequent sharing behavior. We demonstrated that our trained model possesses a strong ability for sharing behavior prediction. When tested on the held-out test dataset, our model achieves a test accuracy of 78.11% and an F1 score of 77.93. These results indirectly demonstrate the reliable performance of our model for susceptibility modeling.
-
-## Analysis
-In this section, we show the potential of our inferred susceptibility scores in expanding the scope of susceptibility research. Our analysis not only aligns with the findings of previous survey-based studies but also goes a step further by extending and enriching their conclusions. 
-
-### Correlation with Psychological Factors
-Previous research on human susceptibility to health and COVID-19 misinformation has been primarily based on questionnaire surveys <d-cite key="nan2022people"></d-cite><d-cite key="van2022misinformation"></d-cite>. These studies have identified several psychological factors that influence individuals' susceptibility to misinformation. For instance, analytical thinking (as opposed to intuitive thinking), trust in science, and positive emotions have been linked to a greater resistance to health misinformation. Conversely, susceptibility to health misinformation is associated with factors such as conspiracy thinking, religiosity, conservative ideology, and negative emotions.
-In this part, we analyze the correlation coefficients between our modeled susceptibility scores and the aforementioned factors to determine if our results align with previous research findings. 
-
-To achieve this, we compute factor scores for each user in our dataset based on their historical tweets using LIWC Analysis. We calculate the average value across all the user's historical tweets as the final factor score. However, for emotional factors such as anxiety and anger with less frequent appearance, we opt for the maximum value instead to more effectively capture these emotions. We primarily consider the following factors: *Analytic Thinking*, Emotions (*Positive* emotions, *Anxious*, *Angery* and *Sad*), *Swear*, *Political Leaning*, *Ethnicity*, *Technology*, *Religiosity*, *Illness* and *Wellness*. These factors have been extensively studied in previous works and can be inferred from a user's historical tweets.
-We calculate and plot the Pearson correlation coefficients between each factor and the susceptibility predicted by our model in the following table.
-
-| Factors             | Coeff. | Factors             | Coeff. |
-|---------------------|--------|---------------------|--------|
-| Analytic Thinking   | -0.31  | Emotion - Positive  | -0.08  |
-| Political Leaning   | 0.13   | Emotion - Anxious   | 0.08   |
-| Ethnicity           | 0.09   | Emotion - Angry     | 0.16   |
-| Religiosity         | 0.10   | Emotion - Sad       | 0.14   |
-| Technology          | -0.09  | Swear               | 0.18   |
-| Illness             | 0.09   | Wellness            | -0.02  |
-
-According to our analysis, correlations are consistent with previous social science studies based on surveys on health susceptibility. For instance, *Analytic Thinking* is a strong indicator of low susceptibility, with a correlation coefficient of -0.31.
-Conversely, certain features such as *Swear*, *Political Leaning* and *Angry* exhibit a weak correlation with a high susceptibility score.
-These results not only corroborate the conclusions drawn from previous survey-based studies <d-cite key="van2022misinformation"></d-cite><d-cite key="nan2022people"></d-cite> but also provide further validation for the effectiveness of our computational modeling for susceptibility.
-
-
-### Geographical Community Differences
-We delve into the geographical distribution of susceptibility. Given the significant imbalance in the number of users from different U.S. states, we calculate the average susceptibility scores for each state using Bayesian smoothing. We use the overall mean susceptibility score and overall standard deviation as our priors and the more the users in the group, the less the overall mean will affect the group's score.
-
-We explore the susceptibility distribution among different U.S. states, considering the influence of political ideology associated with different states <d-cite key="gelman2009red"></d-cite>. Out of the 100,000 users sampled from around the world, 25,653 users are from U.S. states with more than 200 users for each state. As illustrated in the figure below, the susceptibility distribution across U.S. states is imbalanced and exhibits a certain degree of correlation with political leanings, where generally, states known to have a more conservative population tend to have relatively higher susceptibility scores, while states that are considered more liberal have lower scores. 
-Specifically, the average susceptibility score for users in blue or red states is -3.66 and -2.82 respectively. Red or blue states refer to US states whose voters vote predominantly for the Republican or Democratic Party. We determine blue/red states according to the 2020 presidential election result. We observe that 60% of the ten states with the highest susceptibility scores are red states, and 90% of the ten states with the lowest susceptibility scores are blue states.
-This is a trend that has been observed in various research where political ideology influences the perception of scientific information <d-cite key="mccright2013influence"></d-cite><d-cite key="baptista2021influence"></d-cite><d-cite key="imhoff2022conspiracy"></d-cite>.
-However, it is crucial to acknowledge the limitations of our analysis, as it solely reflects the susceptibility distribution of the sampled users within each state.
-
-<div class="col-sm mt-3 mt-md-0">
-    <figure>
-        {% include figure.html path="assets/img/2023-11-08-suscep/usa.png" class="img-fluid rounded z-depth-1" zoomable=true %}
-        <figcaption><strong>Susceptibility Distribution by U.S. State</strong> (with bayesian smoothing). We use the average susceptibility score in the United States (-2.87) as the threshold, with scores above it displayed in red, and those below it in blue. Due to space constraints and insufficient data points, we are only displaying data for 48 contiguous states within the U.S.</figcaption>
-    </figure>
-</div>
-
-
-## Related Work
-
-### Measure of Susceptibility
-The common practice in measuring susceptibility involves collecting self-reported data on agreement or disagreement with verified false claims<d-cite key="Roozenbeek2020SusceptibilityMisinformationCOVID19"></d-cite>, <d-cite key="Escola-Gascon2021CriticalThinkingPredicts"></d-cite>, <d-cite key="Rosenzweig2021HappinessSurpriseAre"></d-cite>, <d-cite key="nan2022people"></d-cite>. Some studies assess susceptibility indirectly through its impact on behavior, but this approach fails to capture actual belief systems<d-cite key="Loomba2021MeasuringImpactCOVID19"></d-cite>. Our work proposes a computational model as a scalable alternative to expensive and limited self-reported beliefs.
-
-### Contributing Factors and Application of Susceptibility
-Research utilizing manually collected susceptibility annotations has explored various factors influencing susceptibility, such as emotion<d-cite key="Sharma2023SystematicReviewRelationship"></d-cite>, <d-cite key="Weeks2015EmotionsPartisanshipMisperceptions"></d-cite>, analytic thinking<d-cite key="Li2022EmotionAnalyticThinking"></d-cite>, partisan bias<d-cite key="Roozenbeek2022SusceptibilityMisinformationConsistent"></d-cite>, source credibility<d-cite key="Traberg2022BirdsFeatherAre"></d-cite>, and repetition<d-cite key="Foster2012RepetitionNotNumber"></d-cite>. Theories explaining susceptibility range from limited knowledge acquisition<d-cite key="Brashier2020AgingEraFake"></d-cite> to overconfidence<d-cite key="Salovich2021CanConfidenceHelp"></d-cite>. This understanding aids in applications like analyzing bot-driven misinformation spread<d-cite key="Himelein-Wachowiak2021BotsMisinformationSpread"></d-cite> and developing prebunking interventions<d-cite key="Roozenbeek2020PrebunkingInterventionsBased"></d-cite>, <d-cite key="Roozenbeek2022PsychologicalInoculationImproves"></d-cite>. However, the field lacks a computational model for large-scale susceptibility inference, which we address in our work.
-
-### Inferring Unobservables from Observables
-Latent constructs, or variables that are not directly observable, are often inferred through models from observable variables<d-cite key="Bollen2002LatentVariablesPsychology"></d-cite>, <d-cite key="Borsboom2003TheoreticalStatusLatent"></d-cite>. Methods like nonlinear mixed-effects models and hidden Markov models are used for this purpose. In our approach, we utilize a neural network-based architecture to represent these latent variables, aiding in predicting observable variables.
-
-## Conclusion
-In this work, we propose a computational approach to model people's **unobservable** susceptibility to misinformation. While previous research on susceptibility is heavily based on self-reported beliefs collected from questionnaire-based surveys, our model trained in a multi-task manner can approximate user's susceptibility scores from their reposting behavior. When compared with human judgment, our model shows highly aligned predictions on a susceptibility comparison evaluation task.
-To demonstrate the potential of our computational model in extending the scope of previous misinformation-related studies, we leverage susceptibility scores generated by our model to analyze factors contributing to misinformation susceptibility. This thorough analysis encompasses a diverse U.S. population from various professional and geographical backgrounds. The results of our analysis algin, corroborate, and expand upon the conclusions drawn from previous survey-based computational social science studies.
-
-## Limitations
-Besides investigating the underlying mechanism of misinformation propagation at a large scale, the susceptibility scores produced by our model have the potential to be used to visualize and interpret individual and community vulnerability in information propagation paths, identify users with high risks of believing in false claims and take preventative measures, and use as predictors for other human behavior such as following and sharing. However, while our research represents a significant step in modeling susceptibility to misinformation, several limitations should be acknowledged.
-
-First, our model provides insights into susceptibility based on the available data and the features we have incorporated. However, it's important to recognize that various other factors, both individual and contextual, may influence susceptibility to misinformation. These factors, such as personal experiences and offline social interactions, have not been comprehensively incorporated into our model and should be considered in future research. 
-
-Moreover, the susceptibility scores modeled by our model represent an estimation of an individual's likelihood to engage with misinformation. These scores may not always align perfectly with real-world susceptibility levels. Actual susceptibility is a complex interplay of cognitive, psychological, and social factors that cannot be entirely captured through computational modeling. Our model should be seen as a valuable tool for understanding trends and patterns rather than providing definitive individual susceptibility assessments.
-
-Finally, our study's findings are based on a specific dataset and may not be fully generalizable to all populations, platforms, or types of misinformation. For example, due to the high cost of data collection, not all countries or U.S. states have a sufficient amount of Twitter data available for analysis, especially when we examine the geographical distribution of susceptibility. Furthermore, platform-specific differences and variations in the types of misinformation can potentially impact the effectiveness of our model and the interpretation of susceptibility scores.
-
diff --git a/_posts/2023-11-08-vig-algorithm-flow-project-proposal.md b/_posts/2023-11-08-vig-algorithm-flow-project-proposal.md
deleted file mode 100644
index b3ec9693..00000000
--- a/_posts/2023-11-08-vig-algorithm-flow-project-proposal.md
+++ /dev/null
@@ -1,327 +0,0 @@
----
-layout: distill
-title: Augmenting Expert Domain Image Inputs for Enhancing Visual Language Models Performance
-description: This blog post explores enhancing visual language models, particularly for expert domains like scientific literature, where standard models struggle. By integrating domain-specific knowledge and advanced image embeddings, the research aims to refine the performance of visual language models such as OpenFlamingo. Leveraging graphical structured embeddings and graph neural networks, the study tests different methods of representing images to improve the models' interpretive capabilities. 
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-# authors:
-#   - name: Sejal Gupta
-#     url: "https://www.linkedin.com/in/sejal-gupta/"
-#     affiliations:
-#       name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-08-vig-algorithm-flow-project-proposal.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related Works
-  - name: Methodology
-    subsections:
-    - name: Dataset
-    - name: Experimentation
-    - name: Evaluation Metrics
-  - name: Results
-    subsections:
-    - name: Superiority in Extracting Nodes
-    - name: Difficulties with Edge Dectection
-    - name: Outside information
-  - name: Conclusions and Future Steps
----
-
-## Introduction
-Over the past few years, we have seen a surge in creation, adoption, and excitement around visual language models, specifically around Open AI's CLIP model. Visual language models can bridge the gap between image and text, allowing tokenized understanding of the visual world around us. For instance, Meta released Segment Anything, a model with enhanced object detection through multimodal inputs like defined bounding boxes and text.
-
-After the recent surge with ChatGPT, we have begun to see advancements in the visual language model space to combine the image analysis and conversational tool. While the recent developments with Bard, GPT4-v, LLava, and many others have progressed the visual language model domain, the overall capabilities of the models are limited to the type of images provided. Most of the models have been trained and finetuned on common day objects, specializing in every-day normal tasks. 
-
-However, theses models continue to struggle with answering images derived from an expert domain, especially scientific literature. Images from these domains can be challenging for the model, as they require common background knowledge, domain knowledge, and interpretation of the diagram. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-         {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example.png" %} 
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Question: What is the path from 1 to 4? <br><br>
-        GPT4-v Answer: The image you've uploaded appears to show a diagram with four numbered points, possibly representing steps or locations connected by a path... However, as an AI, I can't visually trace paths or analyze images in the way a human would...
-    </div>
-</div>
-
-How can we assist visual language models to improve performance in expert domains?
-
-## Past Works
-
-Visual Language Models have become very popular in the recent years with their ability to connect image to text. Open Flamingo<d-cite key="openflamingo"></d-cite> is an open source model with a similar architecture to Flamingo<d-cite key="flamingo"></d-cite>: images are sent to (1) visual encoders, (2) perceived resampler, and (3) through a dense group of layers. Through few shot learning, text completion, and image analysis, Open Flamingo allows users to have a conversation involving images.
-
-Currently, popular visual language models, like Flamingo, utilize CLIP<d-cite key="clip"></d-cite> as the visual encoder to perform the image-to-embedding conversion. Behind the scenes, CLIP uses a Vision Transformer architecture as the encoder. However, when we analyze the embeddings output from CLIP, they do not necessarily have a clear representation to the human eye. This makes it really challenging to inject domain-specific knowledge through the embedding to help perform specific tasks.
-
-The project, FlowchartQA<d-cite key="flowchart"></d-cite>, contributed by creating a dataset of flowcharts and code, revealing insights into the relationships forming code. However, no research has tried to understand the way visual language models interpret graphs with nodes and edges, specifically for domain specific questions. Through various changes to text and image inputs, we can learn about the way a visual language model understands graphical structures to improve future performance of VLMs.
-
-## Procedure
-### Dataset Creation
-{% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline.png" %}
-<div class="caption">
-    Data Augmentation Pipeline for the Inputs
-</div>
-To learn more about the graphical understanding of VLMs, a dataset had to be curated to test various conditions. The original images of the flowcharts are sampled from the BizGraphQA dataset<d-cite key="bizgraphqa"></d-cite>. Due to the compute and time contraints, only a sample of ten images were utilized for the analysis. This subset of images is passed through the Segment Anything Model from Meta <d-cite key="sam"></d-cite> to extract a mask of each of the nodes. From there, OCR is performed on each node to retrieve the text and hand coloring is performed to color the image. In the end, we get dataset triples of the original image, colored image, and the list of nodes in the image.
-
-For example, for this image, we would have the following dataset.
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Original Image
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Colored Image
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Node List
-    </div>
-</div>
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        [Finanze Tyco Roadsafe Insurance, Greyhound Bbbytf Limited, Etablissements Lgac Lthalat Incorporated Indiana, Korte Pro-Cut Grayhawk Insurace North Carolina, Hutchins Sandvik Maryland, Nm Suc. Krsx Limited Michigan]
-    </div>
-</div>
-
-### Experimentation
-{% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline.png" %}
-<div class="caption">
-    Experimentation Pipeline for the Inputs
-</div>
-
-Bard uses Flamingo, a visual language model, to answer queries. We will provide an input image with or without the augmentation and a question about the graph into Flamingo, as illustrated in the figure above. Each image will be paired with a question in a specific category. For this analysis, we will focus on four major types of questions to evaluate the VLM's understanding of graph connectivity. These questions are to be asked in tandem with the original image, the colored image, and the original image paired with the list of nodes in the image. We ask the following questions: 
-1. Based on the image, is there a cycle in the graph?
-2. Based on the image, what is the path from ____ to _____? (The ground truth path involves nodes that only have one child node.)
-3. Based on the image, what is the path from ____ to _____? (The ground truth path involves nodes that have multiple child nodes.)
-4. Based on the image, how many child/parent nodes does _____ have?
-
-For the same image from above, here are the questions and relevant answers:
-
-|  | Question |  Answer |
-| :-----------: |:---------------------: |:------------------------:| 
-| 1 | Is there a cycle in this graph? | No | 
-| 2 | What is the organization hierarchy path from Etablissements Lgac Lthalat Incorporated, Indiana to Nm Suc. Krsx Limited - Michigan? | The path is Etablissements Lgac Lthalat Incorporated, Indiana to Korte Pro-Cut Grayhawk Insurance, North Carolina to Nm Suc. Krsx Limited - Michigan |
-| 3 | What is the organization hierarchy path from Finanze Tyco Roadsafe Insurance to Nm Suc. Krsx Limited - Michigan? | The path is from Finanze Tyco Roadsafe Insurance to Greyhound Bbbytf Limited to Nm Suc. Krsx Limited - Michigan |
-| 4 | How many child companies does Greyhound Bbbytf Limited have holdings in? | Two |
-
-
-But, you must be wondering: why ask these questions specifically? Each question tests understanding of graphical elements without background understanding of the topic. This should serve as a baseline for the way that VLMs understand graphical structures and the common questions to be asked.
-
-### Evaluation Metrics
-
-To evaluate the success of our model, we will conduct both qualitative and quantitative analyses on the dataset, given that quantitative evaluation of generative models can be challenging. The control group will provide a baseline for normalizing the results.
-
-Qualitatively, we will perform a manual analysis of the generated outputs. By using prompts, images, and answer, we will subjectively compare the prompt, the image, and the resulting answer. Our primary goal is to assess how effectively the visual language model generates the answer based on the prompt while being constrained by the graph.
-
-Quantitatively, we plan to utilize an accuracy score will be employed to evaluate the percentage of questions that have been answered correctly in each metric. Incorporating these metrics will provide a more comprehensive understanding of the model's performance and allow for better comparisons with other generative models.
-
-This comparative analysis will facilitate a robust evaluation of the methods employed to represent graphical information within visual language models.
-
-## Results
-
-The following table provides the accuracy metric of the aforementioned experiment:
-
-|  | Question 1 |  Question 2 | Question 3 | Question 4 |
-| :-----------: |:---------------------: |:------------------------:| :----------------------:| :----------------------:| 
-| Original Image | .7 | .4 | 0 | 0 |
-| Original Image with Node List | 0 | .3 | 0 | .1 |
-| Colored Image| 1 | .3 | 0.1 | 0.2
-
-We clearly see that the Colored Image performs better than the Original Image in most of the scenarios. Also, we unfortunately see that the node list did not provide gains in performance for the VLM. Overall, the performance is quite limited when asking specific questions about path finding and children nodes. Let's dive a bit deeper to understand why.
-
-Preface: In the following sections, there are tables with prompts, images, and answers from Bard for various mini explorations.
-
-### Superiority in Extracting Nodes 
-When first embraking on this project, I believed that the VLM would struggle to pair the text and nodes together. However, the VLM actually proved to be quite impressive with the extraction of nodes and the names associated with, making the need for a node list not quite neccesary. 
-
-Using the input figure from above without any augmentations, I asked the VLM how many child nodes a specific node in the graph had. The VLM outputs the wrong answer, but it actually provides generated code for its reasoning. Here is a portion of the code:
-```
-# Define the corporate structure as a dictionary
-corporate_structure = {
-    "Etablissements Lgac Lthalat Incorporated, Indiana": {
-        "Nm Suc. Krsx Limited - Michigan": {}
-    },
-    "Nm Suc. Krsx Limited - Michigan": {},
-    "Krsx Incorporated, California": {
-        "Nm Suc. Krsx Limited - Michigan": {}
-    },
-    "Krsx Incorporated, Texas": {},
-    "Lthalat Holdings, Delaware": {
-        "Etablissements Lgac Lthalat Incorporated, Indiana": {},
-        "Krsx Incorporated, California": {},
-        "Krsx Incorporated, Texas": {}
-    },
-    "Greyhound Bbbytf Limited": {
-        "Krsx Incorporated, California": {},
-        "Krsx Incorporated, Texas": {}
-    }
-}
-```
-The VLM was actually quite impressive with obtaining the specific nodes in the graph, recognizing the text through OCR, and pairing it with the nodes. While all the nodes are not correct, we can see that the VLM can follow a procedure to determine this. While this is a good example of node extraction, the capability is still ambiguous. 
-
-To poke this topic a bit more, I wanted to test out the VLM's ability to extract the nodes if the colors are the same or different. I designed a basic figure with just nodes to test this. The same prompt was passed into Bard with the images below. The only difference between the two images is the fact that in one image, the colors of the nodes are same, and in the other image, the colors of the nodes are different. In the results below, we can clearly see that the VLM is able to perform better with the colored nodes, as the VLM is able to distinguish between different nodes.
-
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Original Image
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Colored Image
-    </div>
-</div>
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1.png" %}
-    </div>
-</div>
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Prompt: What are the names of the nodes in this graph?
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Prompt: What are the names of the nodes in this graph?
-    </div>
-</div>
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Answer: B1, B2, B3, B4, B5.
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Answer: B1, B2, B3, B4.
-    </div>
-</div>
-
-<br>
-To support this argument, we look at the attention that CLIP places on segments of the image based on a caption. We specifically use CLIP because CLIP is the visual encoder in Flamingo. While this isn't necessarily a rigorous proof, we can see that the attention on the nodes is placed stronger in the colored graph example rather than the regular graph example.
-
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Caption: Node B1, B2, B3, B4
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Caption: Node B1, B2, B3, B4
-    </div>
-</div>
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1.png" %}
-    </div>
-</div>
-
-Through the examples and tests above, we can clearly see the VLM's ability to extract nodes, especially with a visually distinugishing factor between the nodes like color. Since the VLM can do a pretty decent job of extracting the nodes, it makes sense that providing the VLM with the node list may not allow for great improvements in performance.
-
-So, if the VLM can extract the nodes relatively well, why is the performance still subpar?
-
-### Difficulties with Edge Dectection
-Aside from nodes, most graphs have edges, and for the questions asked in the experiments, understanding the connectivity was crucial to providing the correct answer. We actually observed that the colored graphs had answers that were closer to 100% accuracy in comparison to the regular graphs. To explore how VLMs understand the connections between nodes, I decided to ask Bard about some simple graphs to determine how it responded. 
-
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Image
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1.png" %}
-    </div>
-</div>
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Prompt
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Can you please tell me what the edges in the graph are?
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Can you please tell me what the edges in the graph are?
-    </div>
-</div>
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Answer
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        The edges in the graph are: a -> b, b -> c, c -> a
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        The edges in the graph are: B -> A, C -> A
-    </div>
-</div>
-
-Wow! It's really surprising that the VLM is creating edges where there aren't even edges? Also, the direction of the edges are wrong. Why would this be the case? 
-
-Let's explore a bit more about the individual arrows. Can a VLM accurately describe the arrow direction? 
-
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Image
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        {% include figure.html path="assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow.png" %}
-    </div>
-</div>
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Prompt
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Which way does the arrow point?
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Which way does the arrow point?
-    </div>
-</div>
-<div class="row" style="border: 1px solid #000">
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        Answer
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        The arrow in the image points to the right.
-    </div>
-    <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000">
-        The arrow in the image points to the left.
-    </div>
-</div>
-
-Well, if you take a look at the responses above, we notice that the VLM can accurately provide the arrow direction for larger arrows incomparison to smaller arrows. Since most of the graphs involve smaller arrows, we can see why the VLM could not tell the directionality of the arrow. 
-
-### Outside information
-Beyond the graphical structure of the nodes, I found that Bard typically incorporated outside information that required background knowledge about the topic. These situations can be categorized into two categories: useful outside information and irrelevant outside information. To distinguish between the two, I found that in the siutations that Bard used graph algorithms and definitions, the outside information was useful in generating the answer. However, many times, Bard would look up specific companies or use graphs from other sources online.
-
-In this example, Bard was asked if there was a cycle in the graph, and Bard outputted the answer below. The VLM was very intelligent by quoting the definition of a cycle and using DFS to determine the existence of one. The step-by-step algorithm is correct and ultimately, leads to the solution. This shows that the incorporation of algorithms and accurate definitions can assist the VLM in answering queries.
-
-> No, there is no cycle in the graph of the Nove 1-66Express organizational structure. A cycle is a path in a graph that starts and ends at the same vertex, and there is no such path in this graph. I verified this by tracing the graph and ensuring that I never returned to a vertex that I had already visited. I also used the following algorithm to check for cycles in graphs: Mark all vertices as unvisited. Choose a vertex and start a depth-first search (DFS). As you traverse the graph, mark each vertex as visited. If you ever reach a vertex that is already marked as visited, then there is a cycle in the graph. If you finish traversing the graph without ever reaching a visited vertex, then there is no cycle in the graph. I ran this algorithm on the graph of the Nove 1-66Express organizational structure and found that there are no cycles.
-
-However, outside information can be just as harmful as it is beneficial. Pulling from outside sources can be tricky for a VLM. It needs to remember to operate within the grouth truths presented in the figure. In multiple instances, the VLM actually referenced the wrong figure because it forgot about the image provided with the prompt.
-
-Therefore, the VLM needs to be fine-tuned to filter the outside information it brings in to improve the accuracy of its answers.
-
-## Conclusions and Future Steps
-
-In this study, I delved into the capabilities and limitations of VLMs, like Open Flamingo, in interpreting complex graphical structures within expert domains, such as scientific literature. The experiments, involving various image augmentation techniques such as color changes and node names, revealed key insights. While VLMs demonstrated proficiency in node extraction, it faced challenges in edge detection and understanding the connectivity between nodes. This was particularly evident when colored images outperformed non-colored ones, highlighting the importance of visual distinction for VLM comprehension. However, the addition of node lists did not significantly enhance performance, suggesting existing capabilities in node identification. The connectivity was difficult for the VLM to understand because of the size of the arrows.
-
-The findings of this research highlight a crucial challenge for VLMs: integrating domain-specific knowledge, especially for non-standard images like scientific diagrams. However, due to the small dataset size, suggests that further research with a larger and more diverse dataset is necessary to validate these findings. In the future, this research can be applied to help improve prompting for graphical structures, provide insights on how to finetune a VLM for this task, and create a new interest in using VLMs for scientific diagrams.
-
diff --git a/_posts/2023-11-09-Exploring-Generative-Models-In-Time-Series.md b/_posts/2023-11-09-Exploring-Generative-Models-In-Time-Series.md
deleted file mode 100644
index 46a5632d..00000000
--- a/_posts/2023-11-09-Exploring-Generative-Models-In-Time-Series.md
+++ /dev/null
@@ -1,682 +0,0 @@
----
-layout: distill
-title: Exploring Univariate Time Series Anomaly Detection using VAE's
-description: In this blog post, we will take a deep dive into DONUT, a method that applies variational autoencoders to the problem of time series anomaly detection. We will begin with a overview of the original authors main ideas. Next, we will replicate some results, and perform new experiments to gain further insights into the properties, successes, and limitations of this method. Finally, we will run additional experiments that test extensions on the original formulation, and motivate future areas of exploration.
-date: 2023-12-11
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Abe Ejilemele
-    url: 
-    affiliations:
-      name: MIT
-  
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-Exploring-Generative-Models-In-Time-Series.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Problem Formulation and Background
-  - name: Overview on VAE's
-  - name: DONUT
-  - name: Experimental Setting and Evaluation
-  - name: Reproducing Results and Establishing Baselines
-  - name: Understanding The Latent Space
-  - name: Effects of Changing the latent distribution
-  - name: Should we Scale the Entropy term in M-ELBO?
-  - name: Empirical Exploration of the Effects of Beta and the Missing Data Injection Rate
-  - name: Improving VAE Architecture
-  - name: Choosing Number of Latent Space Dimensions
-  - name: Concluding Thoughts
-
-  # - name: Images and Figures
-  #   subsections:
-  #   - name: Interactive Figures
-  # - name: Citations
-  # - name: Footnotes
-  # - name: Code Blocks
-  # - name: Layouts
-  # - name: Other Typography?
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-
----
-
-## Introduction 
-
-
-Anomaly detection in time series data is a extensively studied field in academia, holding significant importance due to its wide-ranging applications in real-world scenarios. Time series are present everywhere, and the ability to detect anomalies is vital for tasks such as identifying potential health issues, predicting system failures, or recognizing regime changes in business operations. There are a wide range of methods that have been developed over the years in order to tackle this crucial yet challenging problem. Classical methods approaches rooted in statistics have long been employed, but in recent years, researchers have began to experiment with adapting deep learning techniques to achieve performance improvements. 
-
-The deep methods can generally be classified into distinct approaches. The first approach is forecasting, where the method attempts to learn the generating process of the series, and then classifies a point in the series as anomalous if the ground truth value deviates significantly from the predicted value. The second type of approach reconstruction. The models attempt to learn the generating process of the series in a latent space. The model then reconstructs the series, and uses a well designed reconstruction score in order to classify the series points as normal or anomalous. DONUT is an example of a method that falls into this category. 
-
-## Problem Formulation and Background
-
-Xu et al <d-cite key="xu2018unsupervised"></d-cite> explore time series anomaly detection within the framework of key performance indicator time series. These time series show high levels of seasonality, a result of being a derivative of human action. They propose a method called DONUT, a highly adaptable anomaly detection method that can perform both supervised, semi-supervised, and unsupervised detection. Their main contributions include modifications to the traditional VAE formulation, the discovery that DONUT requires training on both normal and anomalous data (contrary to other methods), and a kernel density estimation interpretation of the latent space. Given a series $$x_1, x_2, ....x_T$$, and potentially labels $$y_1, y_2, ...y_T$$, where $$y_t$$ is 1 when $$x_t$$ is an anomaly and 0 otherwise, for any time $$t < T$$ the method should be able to classify $$x_t$$ as normal or anomalous. 
-
-We begin by defining what an anomaly means in the context of time series. Darban et al <d-cite key="darban2022deep"></d-cite> classify temporal anomalies as follows. Global, contextual, seasonal, trend, and shapelet. Global anomalies are points with extreme values in comparison with the rest of the series. Contextual anomalies are deviations from the context or neighborhood of a point. Seasonal anomalies are deviations from the typical seasonal patterns in the series. Trend anomalies are transitions or deviations from a local or global trend in a series, and shapelet anomalies are subsequences that have different shapelets from the sequence. Xu et al <d-cite key="xu2018unsupervised"></d-cite> make a further distinction by defining abnormal points as points that are either anomalous or missing. Conversely, points that are neither anomalous nor missing are classified as normal. These definitions give us a framework for analyzing what types of anomalies a method is able to detect. 
-
-## Overview on VAE's
-
-Generative modeling refers to the objective of generating data from compact, low-dimensional representations. Representation learning can be a byproduct of generative modeling, where the generative model attempts to learn lower dimension representations of data such that inputs with similar high level features will be close to each other in the low dimension representation. Conversely, inputs that have dissimilar features will be far away from each other in the lower dimension representation space. These properties imply that the representation learner learns a good representation of the data that captures as much distinguishing information as possible. VAE's achieve this through a two step process. Given an input x, an encoder is learned that maps the the input to a latent space, and then a decoder takes the latent space representation and maps it back up to the original feature space. The key property of VAE's is that they can attempt to enforce a specific distribution in the latent space, such that we can sample from it and generate real looking outputs. The goal is to learn a model 
-$$P_{\theta}(x) = \int p_{\theta}(x | z) p_z(z)dz$$, 
-where x are the inputs and z is a random variable in our latent space. In DONUT, and in most other VAE methods, $$p_{\theta}(x | z)$$ and $$p_z(z)$$ are chosen to be gaussian. Given this model, we would like to find the parameters that maximize the log likelihood $$log P_{\theta}(x)$$. This is often an intractable integral to solve or approximate, so a trick called importance sampling is used. We can rewrite the integral as 
-
-
-$$P_{\theta}(x) = \int p_{\theta}(x | z) p_z(z) \frac{q_z(z)}{q_z(z)}dz$$ 
-
-where $$q_z(z)$$ is a distribution we know how to sample from. Now, we rewrite this expression as an Expectation 
-
-
-$$E_{z \sim q_z}[p_{\theta}(x | z) \frac{p_z(z)}{q_z(z)}]$$
-
-We can now use monte carlo integration to estimate this expectation. This estimation will be inefficient to estimate with the wrong choice of $$q_z$$. It turns out that
-
-$$q_z(z) = p_{\theta}(z | x)$$ 
- 
-is the optimal choice for $$q_z(z)$$, and because this distribution might be hard to sample from, we use the variational inference trick where we find an approximation to this distribution by minimizing the objective 
-
-$$J_q = KL(q_{\psi}(z | x) || p_{\theta}(z | x))$$
-
-
-Thus we can now define an objective to be minimized that is fully parametrized by $$\theta$$ and $$\psi$$.
-
-$$J_p = -log E_{z \sim q_{\psi}(z | x)}[p_{\theta}(x | z) \frac{p_z(z)}{q_{\psi}(z | x)}]$$
-
-The monte carlo estimate of this expecation produces a baised estimation of $$\theta$$, so instead of optimizing the objective directly, we optimize a lower bound of the negated objective. Using Jensen's inequality and expanding out the log terms, we know that 
-
-$$ -J_p \geq E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z) + log p_z(z) - log q_{\psi}(z | x)] = E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)]  - KL(q_{\psi}(z | x) || p_z(z))$$
-
-This expectation lower bound is known as the ELBO, and is the surrogate objective that VAE's optimize in order to learn good encoders and decoders. 
-
-
-## DONUT
-
-The key goal of DONUT is to take a series with normal data and potentially anomalous data, learn how to represent the normal features of the series, and then use these representations to compute a reconstruction probability score. Intuitively, if the method learns to represent normal inputs well, an anomalous input will have a low chance of being well reconstructed, and thus will have a low reconstruction probability. The challenge is that in order for the method to work really well, it is important that the method does not attempt to learn good representations for anomalous data. Xu et al <d-cite key="xu2018unsupervised"></d-cite> achieve this by formulating a modified objective function called the modified ELBO (M-ELBO). The idea is to reduce the contribution of anomalous and missing points to the learned representation. M-ELBO is defined as such
-
-$$E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z) - log q_{\psi}(z | x)]$$
-
-
-Where $$\alpha_w$$ is 1 when $$x_w$$ is not an abnormal point, and 0 when $$x_w$$ is abnormal. $$\beta  = (\sum_{w = 1}^W \alpha_w) / W$$. We will take a deep dive into this modified elbo through empiricall experiments and by considering what role each term in the objective plays in both the learning of the latent space, and performance.  
-
-The authors also introduce two innovations that serve to improve performance, something we will reproduce in our experiments. The first innovation is markov chain monte carlo imputation of the missing points. The authors hypothesize that during testing, the presence of missing points in a given sample window might bias the reconstruction of the window, and thus affect the reconstruction probability, so they introduce iterative generation of normal points that can replace the missing points. Additionaly, the authors implement "missing point injection". Before each training epoch, they inject missing points into the training samples by randomly selecting a subset of training sample points and removing the points (setting their values to zero). Note that the original samples will be recovered after the epoch is completed. They claim that missing point injection amplifies the effect of M-ELBO by forcing DONUT to learn the normal representation of data in abnormal windows. It certainly helps to improve performance, and we will perform a more thorough emperical analysis on both injection, and the $$\beta$$ term in the M-ELBO. 
-
-The authors formulate the reconstruction probability as follows. They begin with the expression
-
-$$p_{\theta}(x) = E_{p_{\theta}(z)}[p_{\theta}(x | z)]$$
-
-The authors claim that this does not work well emperically, and thus choose to use 
-$$E_{q_{\phi}(z | x)}[log p_{\theta}(x | z)]$$ 
-as the reconstruction probability score. If the negation of these scores exceed a given threshold, the point will be classified as an anomaly. 
-
-We now describe the model structure of DONUT. The encoder 
-$$q_{\phi}(z | x)$$ 
-is represented by a deep fully connected net that maps x to a lower dimension feature space. Then there are two readout heads that map the learned features from the net to a mean and variance, which we will denote $$\mu_z$$ and $$\sigma_z$$. We can then sample $$z$$ from $$N(\mu_z, \sigma_z)$$. The decoder $$p_{\theta}(x | z)$$ is represented by a deep fully connected net that maps a latent variable $$z$$ to a larger feature space. There are then two readout heads that map the learned features to a mean and variance, which we will denote $$\mu_x$$ and $$\sigma_x$$. We can then sample $$x$$ from $$N(\mu_x, \sigma_x)$$
-
-
-
-## Experimental Setting and Evaluation
-
-Before we lay out the experimental findings and their implications, we begin by briefly describing the datasets used and their characteristics, model architectures, training, and the metrics used for evaluation. We will use three datasets for experimentation, two of which come from the repository of the original paper. The first dataset is called "cpu" and is a series representing some cpu related kpi sampled every minute. The second dataset is called "g" and is also sampled every minute. The third dataset is air temperature time series from CIMIS station 44 in Riverside California, sampled at hourly intervals in the month of March from 2009 to 2019. The dataset did not come with time stamps. These series are all mostly normal, with few anomaly points. This makes the problem more challenging and interesting. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-Plot of the cpu series with anomaly points colored red
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-Plot of the cpu series seasonal decomposition
-</div>
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-Plot of the g series with anomaly points colored red
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-Plot of the g series seasonal decomposition
-</div>
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-Plot of the weather series with anomaly points colored red
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-Plot of the weather series seasonal decomposition
-</div>
-
-
-
-
-
-
-
-
-
-The cpu time series shows clear seasonality, and has an anomaly ratio of .015. The second series has much less clear seasonality, an anomaly ratio of .06, and is generally less smooth than the first series. This allows us to test the robustness of DONUT on a series that showcases less obvious seasonality, and draw some insights and comparisons on performance on series with relatively different smoothnesses. The weather series also displays clear seasonality and is smoother than the second series, but it differs from the other two series in that there are no anomalies in the training data. Thus, DONUT must learn to detect anomalies by training on purely normal data. 
-
-We create the training and testing data as follows. We begin by standardizing both the training and testing splits in order to represent all of the series on the same scale. We then set any missing values in the series to zero. Finally, we perform slide sampling in order to turn the series into windows of length $$W$$. For each window, we will be predicting whether the last value in the window is an anomaly or not. We use a window size of 120 for the first two datasets which means our windows encapsulate two hours of information. For the weather dataset, we use a window size of 24, so each window encapsulates a day of information. 
-
-We will use the same metrics described by Xu et al <d-cite key="xu2018unsupervised"></d-cite>. Given the probability reconstruction scores, we can compute the precision and recall for a given threshold. The original paper and our experiments are not entirely concerned with the process of automatic threshold selection, so f scores and the ROC curve serve as valid evaluation metrics of the reconstruction probability scores. The main performance metric will be the best f score. Xu et al <d-cite key="xu2018unsupervised"></d-cite> also allow for delayed detection. That is, in any contiguous sequence of anomaly points, if a point is classified as an anomaly, then the anomaly points that came before it will also be classified as an anomaly if they had not previously been before. The authors claim this is reasonable, as the delay, (the time between the first detected anomaly point and the first point in the contiguous sequence) is quite low. This also seems reasonable in a practical setting, as being able to detect an anomaly within a few hours of it happening can still give you enough time to act. 
-
-For our experiments. We will use fairly small and simple architectures. The baseline VAE in the paper is done using fully connected networks, and so we will use a fully connected network with depth two. We also experiment with CNN VAE's, and in order to try and compare performance with the fully connected VAE encoders and decoders, we also use a CNN with two layers. We perform experiments on behavior when the latent dimension is increased, and needed to double the width and depth of the fully connected VAE in order to allow for training to converge. 
-
-## Reproducing Results and Establishing Baselines
-
-Xu et al <d-cite key="xu2018unsupervised"></d-cite> show that using both mcmc imputation and missing data injection along M-ELBO improves performance over just using M-ELBO. In our first experiment, we compare the performance of DONUT on the cpu dataset with both mcmc imputation and missing data injection, just mcmc impuation, just missing data injection, and neither of the methods. For each configuration, and for future experiments, we will run the full training and prediction loop 10 times and average results due to the randomness baked into training and prediction with VAE's. We see that the averge best f score is highest when we use both mcmc imputation and missing data injection. We also plot density estimate of the f scores from the four configurations, and notice that the f scores of the configuration with both mcmc imputation and missing data injection show more right skewness and fatter tails that the other four configurations. 
-
-| Configuration | avg best f score over 10 runs|
-| ------------- |:-------------:|
-| both     | .642 |
-| just inject     | .613     |
-| just mcmc | .5737      |
-| neither   | .588 | 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Plot of distribution of the f scores shaded by the configuration
-</div>
-
-
-Next, we run DONUT with the baseline configurations for each of our three datasets. We randomly sample a third of the training data and plot the selected samples mappings in 3-d z space. We also plot the predicted anomaly points by the model with the highest f score over the 10 runs for each of the three datasets. 
-
-| Dataset | avg best f score over 10 runs|
-| ------------- |:-------------:|
-| cpu | .642 |
-| g    | .881     |
-|weather | .996      |
-
-Xu et al <d-cite key="xu2018unsupervised"></d-cite> noticed a phenomenon they labeled "time gradient" in latent space. 
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Plot of learned latent space from the original paper of one of the datasets in the paper.
-</div>
-
-
-They noticed that the latent space was spread out according to time of the day, as time of the day likely encoded a large amount of information about the shape of the series. We did not notice such a phenomenon in our experiments. This is likely the result of a difference in experimental setting, but could also be the result of the local variation within the seasonal data, and the fact that similar shapes occur all over the series irrespective of time. We see that on the second datset, DONUT learned to classify many troughs in the series as anomalous. It was able to detect both global anomalies, as well as contextual and seasonal anomalies, as seen by its ability to detect sudden flat shapes in the series and sudden spikes in the unsual places.
-
-<div class="row mt-3">
-     <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-  </div>
-<div class = "caption">
-The baseline predictions on the g dataset. Ground truth anomalies are colored red and predicted anomalies are green
-</div>
-
-The performance on the third datset is quite suprising. Given no anomalous data in the train set, DONUT was able to achieve a .996 average best f score on the testing data. This highlights DONUT's ability to really learn the normal features of a series. Thus during testing, it was not able to reconstruct the anomalous parts of the series as well as the normal parts. While this result does not contradict the claim that it is important to train on both normal and anomalous data, it certainly suggests that there is still value on learning purely normal qualities of the data. M-ELBO does not fully remove learning of anomalous qualities of the data in the latent space, which could lead to unexpectedly high reconstruction probability scores on anomalous testing data
-
-## Understanding The Latent Space
-
-It is important that we gain further insights on the latent space, as it is the bottle neck of any VAE method. We will perform a variety of experiments that aim to fully uncover how each term in ELBO controls the characteristics of the latent space. We begin by first explaining the findings and claims of the original paper. 
-
-
-The authors claim that the number of dimensions in the latent space plays a huge role. A small dimension latent space would not allow you to capture enough information, and too big a latent space would cause DONUT to perhaps capture too much information, including anomalous representations. They found that latent dimensions between 3 and 10 typically produced good results.  They next discuss how they believe each term in the ELBO contributes to the time gradient phenomena they observe. We restate the M-ELBO objective
-
-
-$$E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z) - log q_{\psi}(z | x)]$$
-
-We can rewrite this objective as 
-
-
-$$E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z)] + H[z | x]$$
-
-Where 
-$$H[z | x]$$ 
-is entropy. The authors claim that the first term, 
-$$log p_{\theta}(x | z)$$ 
-requires the latent space to be able to reconstruct normal x well, thus it pushes latent representations of dissimilar x further away from eachother. The second term, $$log p_z(z)$$, serves to encourage the gaussian shape in the latent space and thus encourages the latent space to not expand too much. However, we shrink the contribution of this term by the ratio of normal points in our training data. The entropy term encourages expansion of the latent space, as it is largest when the latent space encodes as much information as possible. This should happen when the latent represenations are as distinguishing as possible. 
-
-## Effects of Changing the latent distribution
-
-Most VAE methods traditionally represent the latent space as a mixture of gaussians, both for its simplicty, as well as its flexibility and ability to approximate many complicated distributions. What happens when we use other types of distributions? We will analyze what happens to performance and the shape of the latent space when we represent it as a mixture of Student-T distributions with 10 degrees of freedom. We hypthesize that replacing a mixture of gaussians with a mixture of any other symmetric distribution will not cause any profound differences in the shape of the latent space, at least in 3 dimensions, however, a symmetric latent space with fatter tails could lead to worse reconstruction performance. Consider 
-$$P_{\theta}(x | z)$$, 
-where z is sampled from the latent space. With a fatter tailed distribution, we are more likely to sample a z that is further away from the mean of its distribution. This behavior can be beneficial for generative purposes but for reconstruction purposes, this behavior is likely detrimental and will lead to lower likelihoods that a given x came from the sampled z. We now analyze the empericall effects for all three datasets. For the cpu dataset, we notice that the latent space does not look drasticaly different, considering we only plot a random subset of it. We do however notice a performance dip. 
-
-| Latent Distribution | avg best f score over 10 runs |
-| gaussian  | .642|
-|t with 10 df | .593 |
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The image on the left is the latent space of the cpu dataset made by guassian mixture, and the image on the right is the latent space of the cpu dataset made by a t-distribution mixture
-</div>
-
- Similarly for the g dataset, we see a slight performance reduction, but a similarly shaped latent space. 
-
-| Latent Distribution | avg best f score over 10 runs |
-| gaussian  | .8809|
-|t with 10 df | .871 |
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The image on the left is the latent space of the g dataset made by guassian mixture, and the image on the right is the latent space of the g dataset made by a t-distribution mixture
-</div>
-
-
-For the weather dataset, the performance reduction is negligible which suggests that the means of our learned latent space truly represent the normal patterns of the series. (Note that this dataset did not come with timestamps. Disregard any time colorations on latent space plots)
-
-| Latent Distribution | avg best f score over 10 runs |
-| gaussian  | .996|
-|t with 10 df | .995 |
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The image on the left is the latent space of the weather dataset made by guassian mixture, and the image on the right is the latent space of the weather dataset made by a t-distribution mixture
-</div>
-
-
-This brief analysis suggests that the gaussian distribution is truly a good adaptable choice for our latent space. It allows for some variability when doing generative modeling, but also allows for a more robust estimator of reconstruction probability. 
-
-
-## Should we Scale the Entropy term in M-ELBO?
-
-Xu et al <d-cite key="xu2018unsupervised"></d-cite> provide a weighting system for the first two terms of M-ELBO, but choose not to add any weighting to the entropy term. They hypothesize that because of the entropy term's role in expanding the latent space, it is perhaps better to keep it untouched. We will perform an empircal analysis on the effects on weighting the entropy term. 
-
-In our first experiment, we choose a reasonable choice for the weight of the entropy term. We will use $$\beta$$ to weight both $$logP_{z}(z)$$
-and $$logq_{\psi}(z | x)$$. Thus M-ELBO becomes 
-
-$$E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z) - \beta log q_{\psi}(z | x)]$$
-
-We can reformulate the M-ELBO in terms of the KL divergence to hypothesize what effects scaling 
-$$logq_{\psi}(z | x)$$
-by $$\beta$$ might have. 
-
-$$E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)]  - KL(q_{\psi}(z | x)^{\beta} || p_z(z)^{\beta})$$
-
-Using the power rule of logarithms, we can rewrite this objective as 
-
-$$E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)]  - \beta KL(q_{\psi}(z | x) || p_z(z))$$
-
-Thus we have essentially applied shrinkage to the KL divergence between the prior and the posterior based on the amount of abnormal data in our training data. This would perhaps encourage the latent space to look more gaussian, such that the prior probability dominates the posterior probability in order to increase the M-ELBO lower bound. Thus we can hypothesize that our latent space will perhaps experience shrinkage. This would certainly be undesired behavior if our goal is to expand our latent space and allow for more distinguishing latent space represenations while keeping some form of structure.   
-
-We now analyze the emperical results. We first analyze the effects on the cpu dataset. There does seem to be signs of shrinkage in the latent space when it is weighted, however there is no clear absolute shrinkage or expansion. The shape is certainly different, and it seems like the latent space expanded in the negative direction in the second dimension of the latent space, and shrunk in the positive direction. We also observe a performance increase. 
-
-
-|  | avg best f score over 10 runs |
-| Unweighted Entropy  | .642|
-|Weighted Entropy | .665 |
-
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The image on the left is the latent space of the cpu dataset without weighting the entropy term, and the image on the right is the latent space of the cpu dataset with a weighted entropy term
-</div>
-
-On the g dataset, we can certainly see a differently shaped latent space. We notice that the third dimension of the latent space expanded, while the first and second dimensions showed some level or shrinkage compared to the baseline. We do see a slight reduction in performance compared to the baseline
-
-| | avg best f score over 10 runs |
-| Unweighted Entropy | .8809|
-|Weighted Entropy | .875 |
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The image on the left is the latent space of the g dataset without weighting the entropy term, and the image on the right is the latent space of the g dataset with a weighted entropy term
-</div>
-
-Finally, for the weather dataset, we also see that weighting the entropy term did not lead to absolute expansion or shrinkage of our latent space. We observe shrinkage in the third dimension of the latent space, slight shrinkage in the first dimension, and slight expansion in the second dimension. We also observe a slight performance dip. 
-
-| | avg best f score over 10 runs |
-| Unweighted Entropy | .9967|
-|Weighted Entropy | .9928 |
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The image on the left is the latent space of the weather dataset without weighting the entropy term, and the image on the right is the latent space of the weather dataset with a weighted entropy term
-</div>
-
-
-These results suggest that weighting the entropy term can lead to shrinkage of the latent space. It certainly lead to different latent space shapes, where we observed expansion in some dimensions and shrinkage in others. There are also no conclusive results in its affects on performance, as we saw improved performance in one dataset and decreased performance in the other two. 
-
-We will now perform a more general experiment on the effects on weighting the entropy term with the cpu dataset. Instead of weighting the entropy term with $$\beta$$, we will try different weights between 0 and 1 and observe the effects. We increased the capacity of our VAE network, so we rerun the experiments on weighting entropy with $$\beta$$ and not weighting entropy in order to have a valid comparison of results. 
-
-When the entropy term is weighted by zero, we notice a very speherically shaped latent space which looks like a unit gaussian ball. This matches up with a quick evaluation of the elbo. There is no more reshaping of our latent space by the entropy term, and thus DONUT learns a latent space that matches up with the gaussian prior. With a weight of .2, we again see a circular latent space, however there is more deviation from a spherical shape. We continue to see this phenomenon of deviating from a spherical shape when the weights increase. We also notice that the points become more clustered as the weights increase. There seems to be a level of shrinkage as the weights increase, but for weights equal to .8 and .9, we see the latent space expand again. These results indicate that it is unlikely that weighting the entropy term has any effect on expanding the latent space. Results even suggest that non zero weights can lead to shrinkage. However, weighting the entropy term certainly affects the shape of our latent space, and the ability of the VAE to learn representations that look less guassian. 
-
-The performance results provide some interesting insights, and can serve to motivate future areas of exploration. We see that performance is maximal when the weights are very low, or around .6 and .7. When the weights are low, the latent space is very constrained, and thus DONUT will learn learn purely normal representations of the data. As the weights increase, and the ability of DONUT to learn latent representations that deviate from purely guassian increases, we generally see consistently good performance that is comparable to the zero weight case. With weights larger than .8, we begin to see a dip in performance. With large weights, the latent space deviates the most from being gaussian shaped and perhaps begins to learn anomalous representations. This suggests a benefit to enforcing more normality and constraint on the shape of our latent space for the purposes of anomaly detection. This could mean not weighting the prior term by $$\beta$$, or adding some additional terms to M-ELBO that somehow enforce the latent space to look more guassian. 
-
-
-
-| weight | avg best f score over 5 runs |
-| ------| -----|
-| 0 | .682 |
-|.1| .673 |
-|.2| .657 |
-|.3| .602 |
-|.4| .666 |
-|.5| .634|
-|.6| .7 |
-|.7| .688 |
-|.8| .602|
-| .9| .625|
-|1 or unweighted| .64|
-|Beta weighted| .6|
-
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    The image on the left is the latent space of the cpu dataset without weighting the entropy term, and the image on the right is the latent space of the cpu dataset with a weight of 0 on the entropy term
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-    The image on the left is the latent space of the cpu dataset with a weight of .2 on the entropy term, and the image on the right is the latent space of the cpu dataset with a weight of .4 on the entropy term. 
-</div>
-
-<div class="row mt-3">
-     <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-     <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-    The image on the left is the latent space of the cpu dataset with a weight of .6 on the entropy term, and the image on the right is the latent space of the cpu dataset with a weight of .8 on the entropy term
-</div>
-
-
-
-
-
-## Empirical Exploration of the Effects of Beta and the Missing Data Injection Rate
-
-
-We now perform analysis on exactly how $$\beta$$ affects $p_z(z)$, both through experimenting with differing rates of missing data injection, as well as manually adjusting $$\beta$$ and observing the results. 
-
-We restate M-ELBO in tems of the KL divergence. 
-
-$$E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)]  - KL(q_{\psi}(z | x) || p_z(z)^{\beta})$$
-
-As $$\beta$$ decreases, the KL divergence increases. In order to decrease the divergence, the VAE should decrease the posterior probability, which could lead to a more spread out or non gaussian looking latent space, or rather one where we learn very distinguishing representations. As seen from our previous analysis, this might be undesired behavior for the purposes of anomaly detection. Performing automatic reduction of $$\beta$$ by increasing the missing data injection rate could prevent DONUT from learning enough about the normal patterns in the training data, and thus performance will likely suffer if the injection rate gets too large. 
-
-We begin first by trying out $$\beta$$ values between 0 and 1 in order observe the effects, and motivate adjusting the missing data injection rate. 
-
-When $$\beta$$ is set to to 0, we see that the latent space looks fairly compact and non spherical. At $$\beta$$ between .1 and .4, we can see that the latent space is quite spread out, and displays some spherical properties, especially for $$\beta$$ = .3. For $$\beta$$ between .4 and .9, we can see that the sampled latent space begins to look more and more compact, yet there is still a reasonable spread in the latent space. There does not seem to be a clear relationship between the spread and shape of the latent space and perfomance, however, we note that the $$\beta$$ that resulted in the highest performance was $$\beta$$ = .3, whose latent space looks the most spherical. This again supports the notion that when the latent space looks more gaussian, anomaly detection is improved.
-
-
-| Beta | avg best f score over 5 runs |
-|---------|-------|
-| 0 | .648 |
-| .1   | .595 |
-| .2   | .591 |
-| .3   | .686 |
-| .4   | .633 |
-| .5   | .6 |
-| .6   | .623 |
-| .7   | .614 |
-| .8    | .669 |
-| .9   | .646 |
-| 1 or unweighted| .64 |
-| Beta weighted| .6 |
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The image on the left is the latent space of the cpu dataset with a weight of zero on the prior term, and the image on the right is the latent space of the cpu dataset with a weight of .1 on the prior term
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class = "caption">
-    The image on the left is the latent space of the cpu dataset with a weight of .3 on the prior term, and the image on the right is the latent space of the cpu dataset with a weight of .5 on the prior term. 
-</div>
-
-<div class="row mt-3">
-     <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-     <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class = "caption">
-    The image on the left is the latent space of the cpu dataset with a weight of .7 on the prior term, and the image on the right is the latent space of the cpu dataset with a weight of .9 on the prior term
-</div>
-
-In our experiments on adjusting the missing injection rate, we saw a significant decrease in performance as the rate increased, even reaching an average best f score of .06 when the rate was .8. It is unclear from our experiments whether this is the result of training not converging, as we do observe high loss values, or simply bad performance of DONUT when a vast majority of the data is missing, which would be expected behavior. This is something that would need to be explored further 
-
-
-
-
-## Improving VAE Architecture
-
-For the purposes of simplicity, DONUT utilizes fully connected layers for both the encoder and the decoder. While these choices certainly produce decent results, perhaps we can implement architectures that can better utilize the temportal information encoded within each window. We explore using a one dimensional CNN for the encoder in DONUT. Perhaps CNNs are better able to learn representations that encode more temporal information within a sample window. In order to make the CNN network as comparable as possible with the fully connected network, we will only use two convolution layers. We apply a kernel size of 3, and a stride of 1. We also use max pooling to downsample the data. 
-
-For the cpu dataset, we observe significant performance improvements with the CNN architecture. We notice the detection of contextual anomalies, which are non obvious local deviations. The latent space looks fairly spherical, however there does not seem to be any noticeable time gradient behavior in the latent space, despite the improved ability of the encoder to take advantage of temporal information.
-
-| Architecture | avg best f score over 10 runs |
-| 2 layer CNN | .714 |
-| 2 layer fc | .642 | 
-
- 
- <div class="row mt-3">
-     <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-
-  </div>
-  <div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-  On the top are the ground truth and predicted anomaly points by the baseline DONUT with fully connected encoders on the cpu dataset. On the bottom are the ground truth and predicted anomaly points by DONUT with CNN encoders on the cpu dataset. Ground truth anomalies are colored in red, and predicted anomalies are colored in green
-</div>
-
-<div class="row mt-3">
-     <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-  Latent space plot for DONUT with CNN encoders on the cpu dataset
-</div>
-
-We did not see this same performance improvement in the other two datasets. Additionally, we struggled to achieve stable training on the weather dataset, and so further work needs to be done to achieve convergence in order to perform evaluations on the efficiacy of CNNs with that dataset. For the g dataset, we noticed a significant performance reduction. The difference between the performance on the cpu dataset and the g dataset could suggest that CNN architectures could lead to overfitting on less smooth time series. Looking at the plot of predicted anomalies seems to suggest this, as DONUT with a CNN encoder seems to predict that a larger number of the troughs in the g series are anomaly points, an indicator of potential overfitting to the series pattern. 
-
-| Architecture | avg best f score over 10 runs |
-| 2 layer CNN | .824 |
-| 2 layer fc | .881 | 
- 
- <div class="row mt-3">
-     <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-  </div>
-  <div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class = "caption">
-  On the top are the ground truth and predicted anomaly points by the baseline DONUT with fully connected encoders on the g dataset. On the bottom are the ground truth and predicted anomaly points by DONUT with CNN encoders on the g dataset. Ground truth anomalies are colored in red, and predicted anomalies are colored in green
-</div>
-
-
-This is an interesting area of exploration for DONUT. There are a variety of architectures such as RNN's and transformers that have shown superior performance on time series data, and those could be adapted to this method to improve performance over both CNN and fully connected architectures.
-
-
-## Choosing Number of Latent Space Dimensions
-
-
-For the purposes of plotting the latent space in our experiments, we chose to use use a latent space with dimension three. However, intuitively, and as shown in the paper, choosing higher a higher dimension latent space can lead to performance improvements. <d-cite key="xu2018unsupervised"></d-cite> explain that not only does increasing the size of the latent space increase the quality of the representations learned in the latent space, but it also serves to improve the stability of training. We will look to explore whether the properties of a series can give insight into good choices for the size of the latent space, and perhaps motivate automatic selection of the number of dimensions in the latent space. 
-
-We hypothesize that smoother series do not need as large a dimension in the latent space as series that display higher levels of roughness. Intuitively, in smoother series, the anomalies should be more "obvious", while in less smooth series, rough behavior could be mistaken for an anomalous pattern. 
-
-We take a technique from smoothing splines, which are function estimates obtained from noisy observations of some data process. Smoothing splines enforce a roughness penalty on the function estimate, defined as such <d-cite key="wang2011smoothing"></d-cite>
-
-$$ \int \hat{f}(x) ^2 dx$$
-
-We will use a finite difference estimate of this penalty on the standardized series to define a metric that can be used to describe the roughness/smoothness the series. Now that we have defined a metric describing the smoothness of a series, we can evaluate the best choice of number of latent dimension for series of differing levels of smoothness. In order to converge during training, we had to double the width of the fully connected VAE, and also double its depth.  
-
-| Dataset | Roughness Penalty |
-|---------|----------|
-| cpu | .061 |
-| g | .598 |
-| weather | .023 |
-
-
-We begin with the cpu dataset. We notice that performance significantly increases when the latent space is 6 dimensions, but performance begins to drop off as the number of dimensions increases, which suggests overfitting.
-
-
-| number of dimensions | avg best f score over 5 iterations | 
-| ---------------| ----------|
-| 3 | . 637 | 
-| 6 | .833 | 
-| 9 | .826 | 
-| 12 | .797 | 
-
-
-For the g dataset, performance peaks when the latent space has 9 dimensions. We also see slightly better performance with a latent space dimension of 12 compared to 6
-
-
-| number of dimensions | avg best f score over 5 iterations | 
-| ---------------| ----------|
-| 3 | . 889 | 
-| 6 | .882 | 
-| 9 | .894 | 
-| 12 | .885 | 
-
-
-For the weather dataset, we notice a consistent performance improvement when the number of dimensions is increased. 
-
-
-| number of dimensions | avg best f score over 5 iterations | 
-| ---------------| ----------|
-| 3 | . 994 | 
-| 6 | .997 | 
-| 9 | .998 | 
-| 12 |  1| 
-
-These results do not provide any clear picture on whether there is any relationship between the smoothness of a series and the best choice for the number of latent dimensions. For our smoothest series (weather), we observed consistent improvement as the number of dimensions increases. The roughest series (g) also seems to show this behavior. However, we see that increasing the number of dimensions for the cpu dataset decreases performance. 
-
-
-## Concluding Thoughts
-
-Generative models present an interesting approach to the problem of anomaly detection in time series. They present an extremely customizable class of hypotheses that allow us to design a fairly robust probabilistic anomaly detector. Through the experiments we ran, we gained further insights into DONUT, and VAE's more generally as anomaly detectors. We explored what characteristics of the learned latent space can lead to improved anomaly detection performance, and how we can modify ELBO to achieve those goals. We also see that there is huge potential for exploring more complex encoder architectures for additional performance improvements. Perhaps VAE's can become a robust tool for anomaly detection, and provide benefit to a large variety of peoples and industries.  
-
diff --git a/_posts/2023-11-09-PersonalizedGeneration_w_LLMAgents.md b/_posts/2023-11-09-PersonalizedGeneration_w_LLMAgents.md
deleted file mode 100644
index c5451b96..00000000
--- a/_posts/2023-11-09-PersonalizedGeneration_w_LLMAgents.md
+++ /dev/null
@@ -1,211 +0,0 @@
----
-layout: distill
-title: New Synthesis Approach for Personalized LLMS
-description: 
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Daniel Papacica
-    url: 
-    affiliations:
-      name: MIT
-  - name: Ben Ebanks
-    url: 
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-PersonalizedGeneration_w_LLMAgents.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name : Introduction 
-  - name: Literature Review
-  - name: Description of methods & experiments
-    subsections:
-    - name: The Baseline Implementation
-    - name: Overview of Modification & Experiments
-    - name: Experiment / Word2Vec vs GloVe
-  - name: Analysis / Evaluation of Results
-  - name: Results
-  - name: Conclusion / Discussion of Limitations
-    subsections:
-    - name: Limitations
-    - name: Next Steps
-  - name: Bibliography
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-# Introduction
-
-Deep learning has revolutionized the way in which humans interact with the world around them. Its growing ability to ingest vast amounts of data, automate feature extraction, and learn complex patterns and nuances among inputs have contributed to breakthroughs in healthcare, natural language processing, computer vision, and more. A particularly exciting avenue of this innovation has been in the burgeoning field of personalized text generation, which aims to produce text that resembles the style, tone, and word choice taken on by a particular user. Significant advancement in this field has the potential to create more effective forms of communication for individuals with disabilities, personalize educational content, and enhance user interactions with chatbots and virtual assistants, all contributing to a better overall user experience. 
-
-In an effort to make the availability of personalized text generation more wide-scale, researchers have conducted several studies in the field, centering their approach to the generation of domain-specific personalized text (utilizing domain-specific features/knowledge). Notable studies conducted include [Towards Controllable and Personalized Review Generation](https://arxiv.org/pdf/1910.03506.pdf), which utilizes a product description and self-attentive recursive autoencoders to generate a personalized review [[1]](#1), [Knowledge-Enhanced Personalized Review Generation with Capsule Graph Neural Network](https://arxiv.org/pdf/2010.01480.pdf), which constructs a model based on a CapsGNN, and [Research on user granularity-level personalized social text generation technology](https://iopscience.iop.org/article/10.1088/1742-6596/2294/1/012015/pdf), which utilizes an encoder and decoder for text generation [[2]](#2). A lesser explored part of the field and an area that we have chosen to explore for our final project is embedding in the ability to generate personalized text across domains without domain-specific features [[3]](#3). Our project draws inspiration from ["Teach LLMs to Personalize – An Approach inspired by Writing Education”](https://arxiv.org/pdf/2308.07968.pdf), which includes a promising multi-step framework that retrieves, summarizes, ranks, and synthesizes a user’s past documents to generate a personalized version of the document at hand [[4]](#4). 
-
-A critical aspect of the workflow discussed in the LLM personalization paper and an area that we believe can be improved upon using some of the methods discussed in 6.S898 this semester is the way in which the model synthesizes past documents. Throughout the paper, we will be exploring two creative approaches to synthesis that utilize vector word embeddings to pull relevant words from past documents in an effort to improve the models ability to personalize text.
-
-
-# Literature Review
-An integral part of our exploration project was experimenting with using less data and smaller models to see how performance degrades with respect to the approach discussed in the personalization for LLMs paper (no open source code attached as the project is currently being worked on by researchers at Google). Experimentation required taking an extensive look at the steps involved in the original implementation, gaining an in-depth understanding of the deep learning principles discussed, and optimizing training and compute under machine constraints to process vast amounts of real-world data.
-
-The problem formulation for the approach to personalized text generation discussed in the paper can be stated as the following: Given the immediate context of a current document (first k characters) written by a user and access to their past documents, can we develop a model that generates text that is similar to the text of the current document (similarity evaluated by calculating Rouge-1, Rouge-2, Rouge-L, and Bleu scores) . As mentioned earlier, the framework for answering this problem formulation involves first obtaining outputs for retrieval, ranking, summarization, and synthesis, and then feeding these distinct parts into an LLM to produce a personalized body of text (we ignore the auxiliary task of training the LLM to distinguish the owners of written documents for the purposes of this project). 
-
-The retrieval discussed in the paper uses two methods of outputting relevant documents: sparse retrieval, which compares past documents to the current context using the popular BM25 ranking algorithm, and dense retrieval, which uses a transformer-based text-to-text model to map and compare documents in a 768 dimensional vector space. The ranking step then takes this input, orders documents based on their BM25 scores or cosine similarity when compared with the immediate context, and truncates the input to 2500 characters to only take the top documents. The summarization step then summarizes the top ranked past documents in two ways: context independent summarization, which finetunes an LLM on publicly available data and applies this model to the top ranked entries, and context dependent summarization, which uses weak labels (generated from immediate context) to generate a summary in line with the contents of the current document. A visualization of the approach to the structure can be seen below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    An overview of the infrastructure used to process documents and fine tune the personalized generative model.
-</div>
-
-The paper currently explores two methodologies for synthesis: (1) context dependent synthesis, which simply uses the top 20 frequently used keywords from a user’s past documents and (2) context dependent synthesis, which uses weak labels to find text from past documents similar to the immediate context of the document to be personalized. After carefully analyzing the two methodologies, we found that by focusing on keywords the synthesized text is missing an in-depth understanding of sentence structure and semantics that are crucial to personalization.
-
-To enhance this step of the text generation process, we have explored several new methods of synthesis and have landed on two approaches with one utilizing the Word2Vec model and the other using GloVe. We have chosen these methods because they both use unique embedding space attributes to form important relationships between texts. Both networks use the method of creating a centroid of the current document that exists in vector space and output words from top ranked past documents that exist close to this centroid. By doing this, we are essentially selecting words (after filtering out synonyms and stopwords) that are in line with the theme of the current document, which will provide the LLM with more thematically relevant synthesized entries that should in theory generate a more personalized output. 
-
-As an additional research consideration, we explored the effect of passing in the output from both the context independent synthesis discussed in the paper and our auxiliary method of using Word2Vec or GloVe compared to passing in just one of the methods of synthesis. The motivation for doing so came from our initial hypothesis that the combination of both methods of synthesis would enable the LLM to learn complex interactions between important words (results from context independent synthesis) and thematic words (GloVe/Word2Vec) that could lead to better personalization of the final output. A more detailed explanation of the implementations of our proposed approaches will be shown in the following section.
-
-
-# Description of methods & experiments
-
-## The Baseline Implementation
-Our methodological approach began by re-implementing the baseline model from the "Teach LLMs to Personalize" paper. We utilized two datasets mentioned in the research paper: CNN_DailyMail ([CNN_DailyMail](https://huggingface.co/datasets/cnn_dailymail))and Amazon Review Data for Books ([Amazon_review_data](https://cseweb.ucsd.edu/~jmcauley/datasets/amazon_v2/)). To enhance efficiency of compute time, we streamlined the data by reducing its size, ensuring a quicker fine-tuning process while retaining data integrity. We also utilized the [T5-base model](https://huggingface.co/t5-base), a smaller model than the T5-11b model mentioned in the paper, for summarization and the personalized generation model. Furthermore, we opted to use the context-independent methods for both summarization and synthesis because the research paper results indicated that their effectiveness is closely comparable to the context-dependent methods. For fine-tuning the summarization model, we utilized a 10 percent subset of the CNN daily mail dataset (311k datapoint original size) with the AdamW optimizer (seeing AdamW is a comparable optimizer to Adafactor, which is what was used in the "Teach LLMs to Personalize" paper), ensuring a balance between efficiency of tuning and comprehensive learning. This set the foundation for our exploration of advanced text synthesis techniques by giving us a base fine tuning and data processing infrastructure. On top of this, the changes we made to the amount of data used along with utilizing a smaller T5 model allowed us to analyze whether the final evaluation results degraded significantly when making the infrastructure of fine tuning the personalized generation model more compact.
-
-## Overview of Modification & Experiments
-In our new approach for synthesis, we utilized Word2Vec and GloVe which hinges on the concept of embedding space. In this space, words are represented as vectors, capturing their semantic relationships based on their context in large text corpora. By embedding the current document and past documents (from the same user) in this space, each word is assigned a position that reflects its semantic meaning.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    An example of how using TSNE dimension reduction can illustrate how words are placed in embedding space. Note that dimension reduction does not always come out cleanly since word embeddings are complex and can't be easily represented in 2D space.
-</div>
-
-
-The ‘centroid’ of the current document in this space is a calculated mean vector, representing the overall semantic direction of the document. Words closest to this centroid are likely to be central to the document’s theme or style. When we look for words from past documents that are closest to this centroid, we are essentially searching for words that align closely with the thematic and stylistic essence of the current document.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    An example of how our centroid for the current document corresponds to other words from past documents (note we used PCA dimensionality here). We also chose to display words that had very close euclidean distances to the centroid. Note our centroid from the current document highlighted the following as significant words: ['like', 'since', 'first', 'mystery', 'book']
-</div>
-
-This method makes sense technically because it leverages the nuanced understanding of language captured in word embeddings. By focusing on words that are semantically close to the central theme of the current document, the model can more effectively identify and use terms that are likely to be relevant and stylistically consistent for personalization, thereby potentially enhancing the generated text of the personalized generation model.
-
-## Experiment / Word2Vec vs GloVe
-Word2Vec and GloVe are both models for word embeddings, but they differ in their approach to creating these embeddings. Word2Vec, developed by Google, primarily uses local context information of words (words surrounding a given word) to generate embeddings. This results in embeddings that capture more of the syntactic and semantic relationships based on specific local contexts.
-
-GloVe (Global Vectors for Word Representation), on the other hand, is designed by Stanford and incorporates global matrix factorization and local context window methods. It emphasizes capturing global statistics of the corpus by considering overall word co-occurrence frequencies, essentially acting as an unsupervised learning algorithm that generates word embeddings.
-
-When used for synthesis in text personalization, these differences influence the nature of the embeddings. Word2Vec might be more sensitive to the specific contextual use of words in the current and past documents, potentially offering more precise thematic matches based on immediate context. GloVe, with its global perspective, might bring in a broader understanding of word use, capturing more general usage patterns and thematic relationships that extend beyond the immediate context. This could lead to a slightly different set of words being selected for personalization in the synthesis process.
-
-In our experiment, we adapted the structure from the "Teach LLMs" paper, incorporating our novel synthesis methods using Word2Vec and GloVe. The process involved independently fine-tuning the personalized generation model for each synthesis approach. This fine-tuning was crucial to observe how the different embedding techniques influenced the model's performance. After implementing the new synthesis methods, we conducted a thorough evaluation to compare their effectiveness, along with the combination of the original and new synthesis approaches, with the base model. The key focus was on analyzing how the different word embeddings (and combinations of embeddings) impacted the quality and personalization of the generated text, with performance metrics providing insights into the strengths and limitations of each method.
-
-# Analysis / Evaluation of Results
-The evaluation metrics used in the “Teach LLMs” paper (and also what we utilized), BLEU (Bilingual Evaluation Understudy), ROUGE-1, ROUGE-2, and ROUGE-L, are standard metrics used to evaluate the quality of text which has been machine-translated or generated by machine learning models.
-
-BLEU Score: The BLEU score evaluates the quality of machine-translated text by comparing it with one or more reference translations. It does so at various levels, from individual words to consecutive sequences of words (n-grams), to assess precision. A higher BLEU score indicates more similarity to the reference text, often implying better translation quality. However, BLEU has limitations as it does not account for the fluency or grammaticality of the generated text.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Calculations behind the BLEU score calculations.
-</div>
-
-ROUGE Scores: ROUGE (Recall-Oriented Understudy for Gisting Evaluation) is another set of metrics for evaluating automatic summarization and machine translation. ROUGE-1 and ROUGE-2 refer to the overlap of unigrams (single words) and bigrams (two consecutive words) between the machine-generated text and a set of reference texts, respectively. ROUGE-L considers the longest common subsequence, focusing on the longest coherently matching sequence of words. ROUGE scores can consider both precision (like BLEU) and recall, providing a more rounded evaluation.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Calculations behind the ROUGE-N (N-gram) score calculations; in our case N = 1, 2, or longest common subsequence.
-</div>
-
-We can also take a look into how our models performed during the fine tuning period. Based on the progression of the training and validation loss, you can infer how well the model is learning and whether it's overfitting (learning the training data too closely and not generalizing well) or underfitting (not learning the training data well enough).
-
-Comparing the performance of our models using two different synthesis approaches–our base model versus the new synthesis approach using the GloVe or Word2Vec model, and the combination of the base model and new synthesis–could result in different behaviors most likely for one particular reason:
-
-Quality of Embeddings: The GloVe and Word2Vec models provide a different representation for words, capturing semantic relationships in a more nuanced way than just looking at IDF scores, which could lead to varied results during fine tuning. Also, combining our original synthesis with our new synthesis can give the model more information to finetune on allowing for a more intricate understanding of the text when generating.
-
-The differences in BLEU and ROUGE scores between the two models can arise from how each model handles the linguistic nuances of the generated text. If the new approach with the GloVe model is better at capturing the context and meaning of the sentences, it might score higher in BLEU and ROUGE, despite potentially higher loss values.
-
-The variations in BLEU and ROUGE scores could also indicate how each model deals with the trade-off between precision and recall—whether it's better at producing text that contains most of the expected content (high recall) or at avoiding extraneous content not in the reference (high precision).
-
-Evaluating these metrics in combination with each other, rather than in isolation, provides a more comprehensive picture of a model's performance and areas for potential improvement.
-
-The following results portray the overarching BLEU, ROUGE-1, ROUGE-2, and ROUGE-L score we received for the base model, our model using the new synthesis approach, and our model using the base synthesis along with the new synthesis. We have highlighted the snippets of the generated cases that produced our highest scores which are indicative of the possibilities of improvement if we were able to utilize larger T5 models and more training data.
-
-## Results
-
-The following table highlights the results of our evaluation of generated outputs from our baseline model versus our two new approaches (new synthesis and old synth + new synth). Althought there are cases where the max score for our new approaches are high, we believe that this is most likely the case where we generate the rest of a document that is already signficantly short. Essentially, since we don't need to generate a diverse output of words for a longer length, our more compact t5-base model with minimal training performs very well still. [^1]
-
-|                                   	| BLEU (avg) 	| ROUGE1 (avg) 	| ROUGE2 (avg) 	| ROUGEL (avg) 	| BLEU (max) 	| ROUGE1 (max) 	| ROUGE2 (max) 	| ROUGEL (max) 	|
-|:----------------------------------	|:-----------:|:-----------:	|:-----------:	|:-----------:	|:-----------:|:-----------:  |:-----------:	|--------------:	|
-| Baseline Model                    	| 08.9531    	| 29.5847      	| 18.6126      	| 25.6882      	| 49.5207    	| 65.2174      	| 62.2222      	| 65.2173      	|
-| New Synth (Word2Vec)              	| 09.0722    	| 29.3465     	| 18.3129      	| 25.6115      	| 46.6638    	| 65.9340      	| 62.2222      	| 65.2174     	|
-| New Synth (GloVe)                 	| 10.3810    	| 31.9870      	| 21.1543      	| 27.4335      	| 50.5317    	| 65.8537      	| 60.1942      	| 63.4146      	|
-| New Synth (Word2Vec) + Old Synth  	| 10.4402    	| 31.4181      	| 20.2349      	| 27.7710      	| 58.0197    	| 64.8148      	| 61.9048      	| 62.7907      	|
-| New Synth (GloVe) + Old Synth     	| 08.7228    	| 29.2284      	| 17.1685      	| 24.6075      	| 49.7273    	| 65.5462      	| 60.9756      	| 61.9048      	|
-
-[^1]: Output Produced From our Codebase: [https://github.com/dapacica/DL_finalproject_code/blob/main/FinalProjCleanColab.ipynb](https://github.com/dapacica/DL_finalproject_code/blob/main/FinalProjCleanColab.ipynb)
-
-# Conclusion / Discussion of Limitations
-Throughout the paper, we have demonstrated the potential of embedding techniques like Word2Vec and GloVe in enhancing the personalization aspect of text generation models. Our experiments, which involved comparing these methods with traditional synthesis techniques, have shown promising results in terms of creating text that more accurately reflects the style and thematic preferences of individual users.
-
-## Limitations
-For our exploration, we were limited to running all of our models and doing our data analysis on Google Colab in a short period of time along with having to reimplement the structure used in the "Teach LLMs to Personalize" paper since no codebase exists for it. Because of this, we had to find ways to condense our models and limit the amount of data we ingested so that we could spend less time waiting on models to run and freeing up storage and more time analyzing the output of our code. Two of the big adjustments that we made to navigate these constraints was using the t5-base model (fewer tokens than t5-11b), which we ran for a limited number of epochs, instead of the t5-11b model and using only a subset of data points from the provided Amazon Review Dataset. One of the other things that we tried to make the most advantage of our compute was quantizing our t5-base model to provide faster synthesis and summary to run on our ingested data, but we unfortunately ran into dependency issues and were unable to get this method working. However, from our analysis, we estimate that our evaluation results would have been much more in line with the paper’s results, or even surpass them, if we were able to run the t5-11b model for a larger amount of epochs and utilize more amazon review data.
-
-## Next Steps
-If we choose to continue this project, we want to explore ways in which we can synthesize domain-specific knowledge, along with thematic tendencies, related to the current document that can be fed into the final LLM for text generation. There are a lot of benefits of providing synthesized information to the model as it filters for the “most important/significant” words in a document and we hypothesize that this supplementary information could add an extra level of knowledge to a model that has proven to perform well in personalization. 
-
-Also, another pathway that could be explored is integrating Agent LLMs in the initial document ranking phase to see if the procured rankings are better than the current methods set in place (RankDocBM25, RankDocDense, RankSnippet, RankDocBySnpt). We believe that utilizing LLMs that have more awareness of context over large document spaces (and even varying languages) could be benefitial to the process of developing personalized generation model.
-
-# Bibliography
-<a id="1">[1]</a> 
-Li, Pan, and Alexander Tuzhilin. Towards Controllable and Personalized Review Generation - arXiv.Org, arxiv.org/pdf/1910.03506.pdf. Accessed 12 Dec. 2023. 
-
-<a id="2">[2]</a> 
-Li, Junyi, et al. Knowledge-Enhanced Personalized Review Generation with ... - Arxiv.Org, arxiv.org/pdf/2010.01480.pdf. Accessed 12 Dec. 2023. 
-
-<a id="3">[3]</a> 
-Gao, Y B, et al. “IOPscience.” Journal of Physics: Conference Series, IOP Publishing, 1 June 2022, iopscience.iop.org/article/10.1088/1742-6596/2294/1/012015. 
-
-<a id="4">[4]</a> 
-Li, Cheng, et al. Teach LLMs to Personalize: An Approach Inspired by Writing Education - Arxiv.Org, arxiv.org/pdf/2308.07968.pdf. Accessed 12 Dec. 2023. 
\ No newline at end of file
diff --git a/_posts/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images.md b/_posts/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images.md
deleted file mode 100644
index 03a16986..00000000
--- a/_posts/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images.md
+++ /dev/null
@@ -1,265 +0,0 @@
----
-layout: distill
-title: Robustness of self-supervised ViT features in b-mode images
-description: Vision Transformers (ViT) trained with self-distillation with no labels (DINO) have shown striking properties for
-  several downstream tasks regarding segmentation, classification, and image correspondence. In this work, we assess DINO-vit-s/8
-  on a new dataset containing b-mode ultrasound images with the ultimate goal of segmenting bone.
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Roger Pallares Lopez
-
-authors:
-  - name: Roger Pallares Lopez
-    url: "https://www.linkedin.com/in/rogerpallareslopez/"
-    affiliations:
-      name: Mechanical Engineering Department, MIT
-
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images.bib
-
-# Add a table of contents to your post.
-toc:
-  - name: Introduction
-  - name: Related Work
-  - name: Methods
-  - name: Results
-  - name: Discussion
----
-
-## Introduction
-B-mode ultrasound imaging is a widely employed medical imaging technique that uses high-frequency sound waves to
-produce visual representations of the internal structures of the human body. Its main advantages are its ability
-to produce real-time images, its portability, low cost, and especially the fact that is noninvasive and safe
-(non-radiating). However, it is an imaging modality that carries a very high noise-to-signal ratio. Speckle noise,
-out-of-plane movement, and high variability in image reconstruction across devices make the resulting images complex
-to interpret and diagnose <d-cite key="us"></d-cite>. As an example, the following figure shows an annotated b-mode ultrasound image.
-
-{% include figure.html path="assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0.png" class="img-fluid" %}
-<div class="caption">
-  Ultrasound b-mode image of the upper arm with the main physiology annotated.
-</div>
-
-Self-supervised Vision Transformers (ViT) have emerged as a powerful tool to extract deep features for a variety of
-downstream tasks, such as classification, segmentation, or image correspondence. Especially, DINO architectures <d-cite key="dino1"></d-cite> <d-cite key="dino2"></d-cite>
-have exhibited striking properties, where its deep features present localized semantic information shared across related
-object categories, even in zero-shot methodologies <d-cite key="dino_feat"></d-cite>. Consequently, the aforementioned properties of DINO may allow
-us to develop efficient yet simple methods for b-mode ultrasound image interpretation, without the need for an expert
-or ground truth labels.
-
-In this work, we propose analyzing the performance and robustness of DINO in b-mode ultrasound images of the arm and leg, capturing musculoskeletal tissue
-from two different ultrasound devices. We note that this dataset features a series of images with a high noise-to-signal ratio,
-which is a property that DINO has not yet been tested against. In particular, we focus on assessing DINO-vit-s/8 deep features
-across its blocks as well as its attention weights, with the final objective of segmenting bone on b-mode images in a zero-shot approach. Through
-all these experiments, we show the potential and feasibility of implementing DINO models in real-world b-mode medical imaging applications.
-
-## Related Work
-### DINO-vit Assessment
-Since the release of DINO, a self-supervised method for training ViTs based on self-distillation, there has been a line of work focused
-on exploring new capabilities and assessing the deep features obtained from such pre-trained models. In <d-cite key="dino1"></d-cite>,
-they showed how the attention heads corresponded to different parts of an object in an image, or how one could segment desired objects by thresholding
-the self-attention maps. Similarly, semantic information analysis across related images was performed to show the potential
-of the deep features contained in DINO-vit models. Employing principal component analysis (PCA), matching algorithms or linear classifiers
-on the deep features, promising results on segmentation, semantic co-segmentation, and correspondence tasks were presented <d-cite key="dino2"></d-cite>, <d-cite key="dino_feat"></d-cite>.
-
-Further research was done by combining Stable Diffusion features and DINO features, improving semantic correspondence tasks at the cost of
-increasing the computation effort <d-cite key="dino_stable"></d-cite>. While DINO has shown strong generalization to downstream tasks, there
-has been no work on the assessment of this model on a b-mode ultrasound imaging domain. Besides the high signal-to-noise ratio, ultrasound
-images usually present a complex structure of tissues that makes it difficult to differentiate between the foreground, the desired structure
-to segment or analyze, and the background. Our work shows that DINO is also robust to this type of images, leading to promising
-results on segmentation tasks.
-
-### Ultrasound B-mode Imaging Segmentation on Musculoskeletal Tissue
-Muscle and bone segmentation have important applications in clinical and rehabilitation practices to assess motion performance, diagnosis
-of the musculoskeletal system, and quantification of rehabilitation procedures, among others. There has been effort in developing deep learning tools to
-automatically segment and quantify desired parameters for the aforementioned applications. In <d-cite key="unet_segment"></d-cite>, a
-U-Net architecture with Deep Residual Shrinkage layers for denoising was implemented and trained to segment muscle fibers. Similarly,
-different muscle heads were segmented employing a large dataset of muscle images from different subjects and devices to train several
-convolutional neural network architectures <d-cite key="muscle_segment"></d-cite>, <d-cite key="muscle_segment2"></d-cite>.
-
-Medical images, from any source, are in general scarce and difficult to label, which poses a limitation for deep learning models to achieve a good performance and generalization.
-Most of the current methods, lack the capability to perform well in unseen segmentation tasks involving different anatomies. In <d-cite key="universeg"></d-cite>,
-they developed a deep learning model, UniverSeg, based on a novel Cross-Block mechanism that produces accurate segmentation maps without the need for
-additional training. However, when employed in noisier data domains, such as b-mode images, the performance breaks down. In this work, we discover that DINO has potential
-even when dealing with noisier datasets based on b-mode ultrasound images.
-
-## Methods
-### Dataset
-The dataset consists of b-mode ultrasound images from the arm and leg of two subjects while moving. We recorded short videos
-and randomly selected frames to obtain the images. In the images, bone, muscle, and fascia tissues can be appreciated.
-We also acquired videos from two different ultrasound sources to expand the domain where DINO was tested. With all this,
-4 different image origins (or image domains) form the dataset, as appreciated in the figure below.
-We labeled 10 bone heads of each domain to evaluate DINO's performance.
-
-{% include figure.html path="assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01.png" class="img-fluid" %}
-<div class="caption">
-  Example of one image of each origin with its mask label (blue). a) Arm (Source 1, Subject 1). b) Arm (Source 1, Subject 2). c) Arm (Source 2, Subject 1). d) Leg (Source 2, Subject 1)
-</div>
-
-### Deep Feature Assessment
-We analyzed DINO-vit-s/8 features over different layers qualitatively. For any block $$i$$, we extracted the Keys, Values, Queries, and Tokens and applied
-a principal component analysis (PCA) to get the three most important components. For the attention maps, we averaged the self-attention weights
-of the CLS token over each head of the multi-head block.
-
-This analysis was done with the intention of qualitatively finding the most suitable deep features for the subsequent segmentation task. Similarly,
-the self-attention maps were observed to corroborate that the model focuses especially on the bone, and less on the surrounding structures.
-
-{% include figure.html path="assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1.png" class="img-fluid" %}
-<div class="caption">
-Workflow to obtain deep features as well as self-attention information. Transformer block design obtained from <d-cite key="dino_feat"></d-cite>.
-</div>
-
-### Segmentation Pipeline
-As described in the results section, the Keys of the last block (block 12) of DINO-vit-s/8 were employed as deep features for the segmentation.
-As in <d-cite key="dino_feat"></d-cite>, we used a zero-shot approach as the pipeline for bone segmentation. We first clustered together
-all the features obtained from the different images passed through DINO with k-means. Then, we selected those clusters for the segmentation
-mask employing a simple voting algorithm. Being $$\texttt{Attn}_i^\mathcal{I}$$ the self-attention of the CLS token averaged over all heads of block 12
-in image $$\mathcal{I}$$ and patch $$i$$; and $$S_k^\mathcal{I}$$ the segment in image $$\mathcal{I}$$ belonging to cluster $$k$$. The saliency
-of this segment was computed as
-
-$$
-\texttt{Sal}(S_k^\mathcal{I}) = \frac{1}{|S_k^\mathcal{I}|} \sum_{i \in S_k^\mathcal{I}} \texttt{Attn}_i^\mathcal{I}
-$$
-
-and the voting of the cluster $$k$$ was obtained as
-
-$$
-\texttt{Votes}(k) = \mathbb{1}[\sum_\mathcal{I}\texttt{Sal}(S_k^\mathcal{I}) \geq \tau ]
-$$
-
-for a threshold $$\tau$$ set to 0.2. Then, a cluster $$k$$ was considered to be part of the mask if
-its $$\texttt{Votes}(k)$$ were above a percentage of 65% of all images. The following image sketches the whole process.
-
-{% include figure.html path="assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2.png" class="img-fluid" %}
-<div class="caption">
-Zero-shot segmentation pipeline using keys as deep features.
-</div>
-To quantitatively assess the segmentation results, both Dice and IoU metrics were computed employing the labeled bone head segmentations.
-
-
-## Results
-
-### Deep Features Assessment
-We first input a single image to the model and analyzed the Keys, Values, Queries, and Tokens, as well as the self-attention
-of the CLS token from shallower to deeper layers.
-
-The three most important components after performing the PCA on the deep features are plotted in RGB as depicted in the figure below.
-Tokens seem to carry spatial information throughout the different blocks, representing depth information in the final block. On the other hand,
-Keys and Values seem to carry spatial information on the shallower blocks, and semantic information on the deeper blocks. In fact, we considered
-the Keys descriptors the most appropriate to be used to segment bone, as the bone head can be distinguished from the surrounding structures. Regarding
-the attention maps, they seem to move from the skin (in shallow blocks) to the bone (deeper blocks).
-
-{% include figure.html path="assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3.png" class="img-fluid" %}
-<div class="caption">
-Token, Value, Key, and Query features as well as self-attention maps for different blocks (from shallow to deep).
-</div>
-Now, if we focus on the Keys features of the last block for the four different image domains, we can appreciate a similar behavior. Bone heads seem to be
-represented in all four cases by the Keys, being differentiated by the surrounding structures. That being said, we should note that the intersection between
-muscles just above the bone is in some cases also represented like the bone. Regarding the self-attention maps, in all four cases, they are principally
-focused on the bone head. However, we can also see that some muscle fibers or intersections may be present.
-
-{% include figure.html path="assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4.png" class="img-fluid" %}
-<div class="caption">
-Keys deep features and self-attention maps from block 12 for the four different image origins.
-</div>
-An interactive scatter plot is another method to argue the representation of the bone by the Key features. For all the four different image origins, the patches
-belonging to the bone head are grouped on a region of the Euclidean space, while the patches belonging to other structures are scattered all over other regions.
-<div class="l-page">
-  <iframe src="{{ 'assets/html/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/scatter.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-<div class="caption">
-3D scatter plot of the 3 components of the Key descriptors (block 12). Legend: "other" any patch not belonging to the bone head. "boneS1A1" bone patches
-of Source 1 - Arm Subject 1. "boneS1A2" bone patches of Source 1 - Arm Subject 2. "boneS2A1" bone patches of Source 2 - Arm Subject 1. "boneS2L" bone patches
-of Source 2 - Leg Subject 1.
-</div>
-
-### Same Domain Experiment
-We subsequently performed the segmentation task on a set of images from the same origin. For each of the 4 domains, sets of 2, 3, 5, and 10 images
-were input to the segmentation pipeline. Recalling that the images were selected as random frames from short videos, each image within a domain
-presented a slightly different configuration of bone and surrounding structures. Therefore, the goal of segmenting with varying image quantities was
-to evaluate the balance between improvements due to increased feature quantity versus confusion introduced by variation in the images.
-
-The reader can observe the results in the figure below. The bones from Source 1 Arm 1 are the best segmented, and the amount of images does not affect
-the performance, obtaining constant values of Dice and IoU of about 0.9 and 0.77, respectively.
-The segmentation of images from Source 1 Arm 2 in general takes also some part of the muscle tissue, and as in the previous case,
-the amount of images used does not change the performance with Dice and IoU metrics of about 0.7 and 0.5, respectively.
-In the case of images from Source 2 Arm 1, a larger quantity of images improves the segmentation results, increasing Dice and IoU metrics from
-0.58 to 0.75, and 0.46 to 0.61, respectively. Finally, the segmentation masks from images from Source 2 Leg carry not only the
-bone but part of the surrounding tissue too. When increasing the number of images to 10, the performance drastically falls (with Dice and IoU of 0)
-as the segmentation results contain muscle fibers instead of bone.
-
-{% include figure.html path="assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5.png" class="img-fluid" %}
-<div class="caption">
-Results of the segmentation on same domain images experiment. a) Segmentation result examples for the 4 different image domains. b) Metrics for the 4 different image domains
-and different amounts of images (mean and standard deviation).
-</div>
-
-### Different Domain Experiments
-Then, we performed the segmentation task on a set of images from origin pairs. Five images of each origin were paired forming the following groups.
-Group 1: different physiology (source 1 - arm subject 1 and source 1 - arm subject 2), group 2: different sources (source 1 - arm subject 1 and source
-2 - arm subject 1), group 3: different body parts (source 2 - arm subject 1 and source 2 - leg subject 1), and finally group 4: different body
-parts and sources (source 1 - arm subject 1 and source 2 - leg subject 1). We carried out this experiment to evaluate if the deep
-features shared from different image origins were similar enough to properly perform the segmentation task, giving an idea of feature correspondence
-between different image domains.
-
-The image below shows the experiment results. The segmentation performed on the domain source 1 arm subject 1 worsens when paired with any other
-image domains. Both IoU and Dice metrics fall from 0.9 and 0.77 (previous values) to 0.78 and 0.59, respectively. Contrarily, the domains
-consisting of source 1 arm subject 2 and source 2 arm subject 1 improve when paired with source 1 arm subject 1. Finally, the image origin containing
-leg images maintains a similar segmentation performance when being paired.
-
-{% include figure.html path="assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6.png" class="img-fluid" %}
-<div class="caption">
-Results of the segmentation for pairs of domain images. Legend: Different physiology (source 1 - arm subject 1 and source 1 - arm subject 2), Different sources
-(source 1 - arm subject 1 and source 2 - arm subject 1), Different body parts (source 2 - arm subject 1 and source 2 - leg subject 1), and
-Different body parts and sources (source 1 - arm subject 1 and source 2 - leg subject 1). Bar plots contain mean and standard deviation.
-</div>
-
-### Noise Experiment
-
-We further assessed DINO by introducing white noise to the dataset. Being an image $$\mathcal{I}$$, the image input to DINO was
-$$\mathcal{I}_{\texttt{Noisy}} = \mathcal{I} + \epsilon \cdot \mathcal{N}(0, 1)$$. We segmented five images from the domain Source 1 Arm Subject 1
-and incrementally increased the white noise strength by tuning $$\epsilon$$. We performed this last experiment to evaluate how the deep
-features and attention maps change as well as the resulting segmentation masks with increasing noise, gaining intuition on how robust DINO can be.
-
-As observed in the following figure, the Keys features and the attention weights start being affected by the noise at $$\epsilon = 2.0$$. Keys
-features are less efficient at describing the bone from the surrounding structures, and the attention maps start shifting the attention to only the
-left side of the bone and the muscle line above the bone. Segmentation results show that with increased noise, some parts of the muscle are segmented
-and for $$\epsilon \geq 2.5$$, the right side of the bone is not included on the segmentation mask.
-
-Taking a look at the metrics, the more the noise strength
-is increased, the lower the Dice and IoU values obtained. From little noise to the highest tested in this experiment, a reduction of about 50% for both
-Dice and IoU occurs.
-
-{% include figure.html path="assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7.png" class="img-fluid" %}
-<div class="caption">
-Results with noisy images. a) Original, Keys features, attention, maps and segmentation results for different values of $\epsilon$. b) Dice and IoU
-metrics for different values of $\epsilon$.
-</div>
-
-## Discussion
-
-In this project, we used a DINO ViT model to segment bone heads from ultrasound images using a zero-shot methodology involving clustering. We first studied
-how the model deep features change across different layers, and chose Key features as the most appropriate for characterizing bone. We then segmented
-bone from different image domains, initially employing batches of images from the same domain, and then combining them. Finally, we tested DINO and
-its robustness by adding additional noise.
-
-Encouraging results were found in the deep features of the model. We could appreciate how both Key and Query features were capable of differentiating
-bone, some muscle regions, and skin tissue. We also obtained surprisingly good segmentation masks for a zero-shot methodology
-on a new dataset as ultrasound b-mode images are. In particular, the image domain "source 1 arm subject 1" presented very similar segmentation masks
-compared to the labeled ones, giving an idea of how semantic features obtained by DINO extend beyond its training data domain,
-displaying astonishing generalization. Even when adding noise to the image dataset, DINO Key features kept describing the bone up to high noise strengths.
-
-While the project has yielded promising results, there are several limitations to take into account. First, we should note that the success of
-the zero-shot methodology has relied on an initial hyperparameter tuning, finding the threshold $$\tau$$, the voting percentage, and the number of
-clusters. However, we are aware that the optimal configuration may vary across different datasets or imaging conditions. Additionally,
-we focused on segmenting only bone, but we have not explored the capabilities of DINO to segment other tissues or structures. We acknowledge that
-a comprehensive medical imaging solution should combine the segmentation of multiple relevant structures for a general understanding and application.
-Finally, only two anatomical parts (arm and leg) and two subjects were included in the dataset. To better explore the applicability of the model,
-a more diverse dataset containing more anatomical parts from more subjects should be considered.
-
-In conclusion, this project demonstrates the potential of employing the DINO ViT model for ultrasound bone segmentation using a zero-shot
-methodology. We believe that this work lays a foundation for future improvements, promoting a more comprehensive understanding
-of DINO's capabilities in medical image segmentation.
diff --git a/_posts/2023-11-09-Structural_vs_Data_Inductive_Bias.md b/_posts/2023-11-09-Structural_vs_Data_Inductive_Bias.md
deleted file mode 100644
index 5506c0c8..00000000
--- a/_posts/2023-11-09-Structural_vs_Data_Inductive_Bias.md
+++ /dev/null
@@ -1,219 +0,0 @@
----
-layout: distill
-title: Structural vs Data Inductive Bias
-description: Class project proposal
-date: 2023-11-08
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors: Tony Jiang, Gabriel Gallardo
-#   - name: Anonymous
-
-authors:
-  - name: Gabriel Gallardo
-    url: ""
-    affiliations:
-      name: MIT, Cambridge
-  - name: Tony Jiang
-    url: ""
-    affiliations:
-      name: MIT, Cambridge
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-Structural_vs_Data_Inductive_Bias.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related Work
-  - name: Methods and Experiment
-  - name: Results
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-## Introduction ##
-### Lack of Training Data ### 
-The transformative impact of vision transformer (ViT) architectures in the realm of deep learning has been profound, with their applications swiftly extending from computer vision tasks, competing with traditional neural network architectures like convolutional neural networks (CNNs). Despite their success, the intricacies of how architectural variations within ViTs influence their performance under different data conditions remain largely uncharted. Unraveling these subtleties 
-### Project Goal ### 
-While much research has being made to find the best choice of data augmentation or the best structural change in the model to increase performance, our project empirically compares two kinds of methods: 
-- Data augmentation through tuning-free procedures 
-- Explicit inductive bias through discrete attention masking
-For data augmentation, we chose a simple-to-use procedure called TrivialAugment to increase by four times the amount of training data. Here we want an easy-to-use method that could help as a benchmark for the second method. 
-
-For explicit inductive bias, we use a general vision transformer architecture which allow us the change the number of attention heads and layers where the mask would be applied, this mask is what explicitly induce a bias in the model by forcing some layers to only learn relationship between close patches of the data. 
-
-Our goal with this comparison and the difference with previous works is that we want to experiment to which point one method could be better than the other by really compensating for the lack of information in the training of a vision transformer. 
-
-Due to computational and time limitations, we would train our model in a simple task of image classification based on CINIC-10. We also use a tiny model to be able to iterate many times through different scenarios of inductive bias. The selection of methods also reinforces these limitations but are a good starting point as many of the projects that would be lacking in training data probably are in testing phases where light tools like Google Colab are used. 
-
-### Contribution ### 
-The result from this project contributes in two ways. First, it gives us a glance of how beneficial the level of proposed inductive bias in the performance of the model could be, and second, it contrasts which method, and until which point, performs better given different scenarios of initial training data available. 
-
-## Related Work ##
-
-### Data Augmentation ### 
-Data augmentation consists in applying certain transformations to the data in order to create new examples with the same semantic meaning as the original data. For images, data augmentation consists in spatial transformations like cropping, zooming or flipping. Although data augmentation is very popular among practitioners, previous works like <d-cite key="ref1"></d-cite> have proven that data augmentation by itself reaches a saturation point where it is even worse than training in the new data, decreasing the performance of the model. Given our goal of comparing data augmentation with inductive bias, we expect to get a similar result in the efficacy of data augmentation while we increase the initial amount of data.
-
-Data augmentation decisions can be thought because of the many options available to perform, but it is so popular that some researches are trying to make it more easy to use and computational-efficient, one example been TrivialAugment <d-cite key="ref2"></d-cite> method where simple random augmentation can be compared to or outperform other more complex algorithms that try to find the best augmentation for the given dataset. TrivialAugment would be the procedure used in this project given it simplicity. 
-
-### Changes in Architecture ### 
-To compensate the lack of training data for vision transformers, an interesting approach from <d-cite key="ref3"></d-cite> is to use instance discrimination techniques which adjust the loss function of the model to improve the representation of the datapoints getting high accuracy scores for datasets with only 2k samples. The model proposed is trained from scratch with few data, but its implementation and interpretability could be troublesome for small projects. 
-
-Othe authors in <d-cite key="ref4"></d-cite> use a set of pre-trained models with complementary structures (Convolutional and Involutional) to help a lightweight visual transformer model called DeiT (Data-efficient image Transformer) increase its performance by getting a baseline result that is added as a token and works like an induced bias of the properties of the image. The scores from the pre-trained models give more information than the ground truth because it gives a value of likehood for every class, which is a result of the different attributes of the specific image. 
-
-Although these changes demonstrate that it is possible to get better performance with few data without augmentation, it is not clear how we can adjust the inductive bias produced to identify until which point it works. The usage of pre-trained models is also not desirable here because of our premise that we could be using this experiment to make decisions in new datasets and tasks. 
-
-### Explicit Inductive Bias ### 
-The model proposed in <d-cite key="ref5"></d-cite> is a better example of real lack of training data overcome by introducing a different kind of attention heads. In this case, medical images tend to have the same orientation, property that is leveraged to force the attention heads to focus on axial information which normally represents the spread of tumors. Here the inductive bias is that the image has a structure where patches aligned in rows and columns are more related between them than diagonal ones. 
-
-Following this path, <d-cite key="ref6"></d-cite>, <d-cite key="ref7"></d-cite> and <d-cite key="ref8"></d-cite> try to apply the local induced bias of convolutional networks in a transformer by different methods. <d-cite key="ref6"></d-cite> adds a new layer at the beginning of the model which acts like a local mask but with variable learnable attention levels, where the model figures out how much local attention it should apply given the proposed task. <d-cite key="ref7"></d-cite> on the other hand add new convolutional layers in parallel to the transformer to let them capture the local information while letting the original transformer to keep the big picture of the image. Finally, in <d-cite key="ref8"></d-cite> it is proposed a change in the initial attention layers, making them GPSA (Gated Positional Self-Attention) which learn for each patch if pay more attention to the attention product (Query * Key) or the position of the patch in the image. 
-
-From these works it is stated that some layers of the transformer converge to convolutional behaviors given the nature of the data used for training, but this requires a relatively big amount of data that could not be available. It is also noticed that the inductive bias is applied to the first layers of the model. 
-
-The model proposed in <d-cite key="ref9"></d-cite> uses a simpler method which consists in applying a mask pattern to some of the attention heads to induce local attention bias into the model. To decide which heads and layers should be masked, it uses a soft masking approach where the model learns a scale factor between 0 and 1 which sets the level of local inductive bias that is applied to that head. The results show that it is possible to obtain good results by using more local masking in the first layers and keeping the global interaction in the last ones. This approach is also model agnostic and easy to implement, which is why it is close to the experimentation of this project. 
-
-The power of this masking method is also shown in <d-cite key="ref10"></d-cite> where the mask is learned by a parallel process of pixel-wise classification, successfully increasing the performance in more complex tasks like pixel-wise segmentation. 
-
-
-## Methods and Experiment ## 
-To explore and compare the benefits of data augmentation versus induced bias we are running three related experiments. All experiments would be run with CINIC-10 <d-cite key="ref11"></d-cite> dataset in Google Colab using a T4 GPU. We decided to use CINIC-10 instead of CIFAR-10 because even though it is a drop-in replacement of CIFAR-10, it is a much larger than CIFAR-10 so we can test on different number of base training samples but not so large like ImageNet that is too large/difficult to test.
-
-### Experiment 1 ### 
-The goal of the first experiment is to get a glance of the overall differences in accuracy for the compared methods. The model used for this experiment consists of a basic visual transformer with six layers and linear positional embeddings. Each layer corresponds to a multiheaded attention layer with only two heads. The schematic of the model can be seen in figure 1. 
-
-Figure 1
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1.jpg" class="img-fluid" %}
-By default, the attention heads in the model are fully connected to give them a global behavior, but the model can be configured to apply a local pattern mask or a sparse pattern mask to all heads in all layers. 
-
-Figure 2
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask.jpg" class="img-fluid" %} 
-The model would be trained with different scenarios of initial data, in specific, with 1000, 2000, 5000, 12500 and 20000 samples. In each scenario, we would get four different models: 
-- Baseline model: Without data augmentation and with default global attention 
-- Data augmentation: With data augmentation and default global attention 
-- Local attention: Without data augmentation and with local attention 
-- Sparse attention: Without data augmentation and with sparse attention 
-
-The data augmentation technique would be TrivialAugment and the metric would be accuracy on validation dataset. We set these four models trying not to mix data augmentation with changes in the induced bias, keeping the default global attention in the transformer as our baseline. 
-
-### Experiment 2 ### 
-Having experimented with the differences where all layers have the same mask, we now set experiments to play with the level of induced bias applied to the model. The goal now is to identify a relation between the level of induced bias applied to the model and their performance. For this experiment we modify our first model in the following ways: 
-- We increase the number of attention heads in each layer from 2 to 4 
-- We set the final two layers to global attention, so the mask is not applied to them 
-- We configure each head in the first four layers to be able to be hard configured as either local or global attention. 
-
-Figure 3
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2.jpg" class="img-fluid" %}
-
-With this new model, we can create one instance for each combination of global/local head in any of the first four layers, generating a sense of “level of induced bias” based on the number and configuration of attention heads treated as local. 
-
-Given computational limitations, we would set only two initial data scenarios (10000 and 50000) and get 16 models for each scenario:
-- Baseline model: Without augmentation and with all global attention 
-- Data augmentation: With data augmentation and all global attention
-- 14 combinations of local heads and layers:  
-
-Table 1
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table.png" class="img-fluid" %}
-
-We would analyze the differences in accuracy between different levels of induced bias in the same initial data scenario and see if we can get a selection of best performing inductive bias levels to apply them more broadly in the third experiment. 
-
-With this comparison we also want to capture what are the visual differences between the attention heads in the different levels of induced bias to try to explain with is doing better or worse than the baseline. 
-
-### Experiment 3 ###
-Our final experiment consists in comparing the accuracy and the effective additional data (EAD) that each method brings when applied to different initial amounts of data. The initial data scenarios to train the models would be 1000, 5000, 10000, 20000, and 50000 samples. The comparison would be made between the data augmentation model for each scenario, versus the top 3 levels of induced bias from experiment 2. 
-
-The effective additional data (EAD) represents the extra amount of real data that the method is compensating, the higher the better to be considered as a successful method for solving lack of data problems. This metric is calculated by looking at which scenario of initial data would make the baseline model perform equal to the method analyzed.
-
-## Results ## 
-### Experiment 1 ### 
-In our initial experiment, we compared performance on four variations of model scenarios. Our baseline model uses global attention mechanism, one uses local attention mechanism, another one uses sparse attention mechanism, and the last model uses the same global attention mechanism as the first model except that data augmentation is applied during its training process. One notable callout for our initial experiment is that we took a naïve approach and designed our local and sparse attention heads to be in all six attention layers of the attention. We trained and collected the validation accuracy and training time for each model variation for different number of base training samples from 1000 to 20000. Below are the results.   
-
-#### Result and Analysis #### 
-
-Figure 4 
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table.png" class="img-fluid" %}
-
-Figure 5  
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph.png" class="img-fluid" %}
-
-There are a few notable observations to point out from the results. First, we can see that the two models using the local attention mechanism or sparse attention mechanism performed significantly worse than our baseline model that used global attention. Though we did expect this to happen since CINIC-10's classification task intuitively requires a global context of the image, we did not foresee the performance difference to be so drastic. For example, when the base number of training data is 5000, we see that the baseline model achieves a validation accuracy of 62.5% while the local attention model achieves just 13.97% and the sparse attention model 42.64%. We observe a similar pattern across different levels of base samples. It’s also worth calling out that sparse attention models perform better than local attention models. This makes sense as sparse attention models still take into consideration the global context just not completely on all the patches. Nevertheless, the sparse attention model takes almost the amount of time to train as the baseline model, hence it does not make sense to use it in lieu of the baseline model in practice.  On the flip side, we verify that data augmentation improves performance and is the most significant when number of base samples is small.  
-
-### Experiment 2 ### 
-Our first experiment showed that simply setting all attention layers to contain only local or sparse attention heads does not produce good performance. As we were exploring additional datasets or tasks where applying a different attention mechanism may yield better performance, we came across the paper in <d-cite key="ref9"></d-cite>, in which it alluded to the idea that only applying local attention mechanism to just a few beginning layers of the network may improve performance even when the task intuitively requires a global context. The rationale behind it is that perhaps through using local attention mechanism, the model can create its own convolution making understanding local information better before using that information to answer a task that requires global context. With this inspiration, we designed our second experiment trying out different combinations of local attention heads and layers, as described in the Methods and Experiments section, and below are the results and analysis.   
-
-#### Result and Analysis #### 
-
-Figure 6
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices.png" class="img-fluid" %}
-
-Here, the two matrices outline the validation accuracies we got when we trained the different local attention mechanism model on 10k and 50k base training samples. A quick recap, 1 Local Head and 1 Layer means we would use 1 local attention head in the 1st layer of the transformer. The color gradient in each matrix indicates the best performing combination from best (red) to worst (green).  
-
-A few patterns can be noticed. First, for both matrices, models in the bottom right corner, representing a high number of local heads and in more layers, are performing worse than the rest. This aligns with our intuition from our first experiment because having more local attention heads in deeper portions of network will prevent the models from capturing global context, thus resulting in a worse performance.  
-
-Figure 7
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map.jpg" class="img-fluid" %}
-
-Diving further, in figure 7, we visualize the attention weights to better compare different levels of induced bias. It seems that the performance increases as we add more local heads, but it ends up fading and not capturing the important characteristics of the data. In the 50k samples scenario it can be noticed that with more local heads, the attention spots converge to small parts of the image where there is no information about the object in it. 
-
-Figure 8
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map.jpg" class="img-fluid" %}
-
-Moreso, in figure 8, it can be noticed that when local heads are used, it identifies correctly smaller details of the image. In this case, with all heads being global, it is hard to identify the three different cows in the middle image, but when some local heads are used, we can capture them. 
-
-In summary, the major result of this experiment is that some models in the 10k samples sub-experiment produced better results than the base model. This is promising and validates our hypothesis from before. Though no combination produced better results in the 50k samples sub-experiment, we showed in Figure 8 that having local attentions can still be beneficial as it is able to capture some details that the baseline model misses.  
-
-### Experiment 3 ### 
-From the second experiment, we were then intrigued to see how some of the better performing models do under different number of base samples than just 10k and 50k. So, we pick three combinations (2 local heads for 2 layers, 1 local head for 2 layers, 3 local heads for 1 layer) and tested their performance against the baseline model and baseline + data augmentation for different number of base training samples from 5000 to 50k. Below are the results and analysis. 
-
-#### Result and Analysis ####
-
-Figure 9
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table.png" class="img-fluid" %}
-
-Figure 10
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph.png" class="img-fluid" %}
-
-Here, we can observe two very interesting trends. First it validates our hypothesis that using local attention heads early in the layers of the vision transformers can improve performance despite the fact that task intuitive requires global context. This outcome is true for all three variations of the local attention models when the number of base training samples are 1000, 5000, and 10000. However, this effect tapers off when the number of base samples is sufficiently large, and the baseline model performs better. This seems to suggest that the benefit of the inductive bias coming from the local heads no longer outweighs the lack of information of the dataset. In other words, once there is sufficient data, the baseline model has enough information to learn a better representation on its own than that of the models. 
-
-Figure 11
-{% include figure.html path="/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata.png" class="img-fluid" %}
-
-Another perhaps more explicit and comparable way of explaining the phenomenon is to look at the Effective Extra Sample score. Essentially, the data tells us how much extra (or less) training data the change in model architecture gets us to achieve the same performance accuracy if using the baseline model. This graph clearly illustrates that data augmentation and tuning of local attention heads are very effective when the training datasets are relatively small, less than 15000 samples. This is likely because the inductive bias of the local attention heads causes the models to capture important characteristics of the image more efficiently and effectively than does the baseline model. However, once the number of base training samples gets over 20000, the effect reverses and they all perform worse than the baseline model, as illustrated by the negative effective training samples. 
-
-Note: We did not plot the extra effective data for the data augmentation model scenario pass 10000 base training samples as its performance dropped significantly and is behaving weirdly. 
-
-## Conclusion ## 
-Through different experimentations, both data augmentation and induced bias by discrete attention masking can compensate for the lack of data for a given problem, but this compensation is only noticeable when the initial data is very low. 
-
-The maximum effective additional data that the data augmentation method creates is higher than the induced bias method, but there is a sweet spot where induced bias is better than both data augmentation and baseline model.  
-
-Once the initial amount of data starts to increase, data augmentation is the first one that in fact worsens the performance of the model. Induced bias on the other hand looks more stable while the initial data is increasing but is still not significantly better than the baseline model. 
-
-We have shown that induced bias can help identify local attributes of the image more easily than the baseline alone, but this is only leveraged when the task that we want to solve is more specific and cannot be appreciated in a general task like image classification. 
-
-### Limitations and Next Steps ### 
-Given the restricted resources and amount of time available to execute this project, there is enough room for continuing research on this topic: 
-- We tried to make the data augmentation and inductive bias methods simple and easy to play with, but they could not be the best ones. The same procedures of this project can be applied to better and more complex types of data augmentation and induced bias to see if the results are replicable in other situations. 
-- Further experimentation could be done with datasets with multiple tasks and a deeper model to see if the type of task has an impact of the effectiveness of one method or the other. This could also be applied in recent real-world problems where there is not enough data yet, but we can clearly identify the relying relationship between patches of the images. 
-- Given a deeper model and a lot more experimentation in the level of inductive bias, there is an opportunity to empirically try to make a regression between how much inductive bias is applied to the model vs the resulting change in performance. The results of this project are not enough to implement such relations.
-
-
diff --git a/_posts/2023-11-09-TransformersAndRNNs.md b/_posts/2023-11-09-TransformersAndRNNs.md
deleted file mode 100644
index 9126db82..00000000
--- a/_posts/2023-11-09-TransformersAndRNNs.md
+++ /dev/null
@@ -1,235 +0,0 @@
----
-layout: distill
-title:  "Transformers vs. RNNs: How do findings from real-world datasets relate to the theory?"
-
-description: "Transformers have rapidly surpassed RNNs in popularity due to their efficiency via parallel computing without sacrificing accuracy. Transformers are seemingly able to perform better than RNNs on memory based tasks without keeping track of that recurrence. This leads researchers to wonder -- why? To contriubte towards answering that question, I'll analyze the performance of transformer and RNN based models on datasets in real-world applications. Serving as a bridge between applications and theory-based work, this will hopefully enable future developers to better decide which architecture to use in practice." 
-date: 2023-12-1
-htmlwidgets: true
-
-authors:
-  - name: Cassandra Parent
-    affiliations:
-      name: Massachusetts Institute of Technology
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-TransformersAndRNNs.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction & Motivation
-  - name: Background & Prior Work
-  - name: Methods & Results
-  - name: Limitations
-  - name: Conclusions
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction & Motivation
-
-Since their invention, transformers have quickly surpassed RNNs in popularity due to their efficiency via parallel computing <d-cite key="qin2023hierarchically"></d-cite>. They do this without sacrificing, and often improving, model accuracy. Liu et al has developed a theorhetical explanation for this by mathematically proving that transformers learn shortcuts to automata that RNNs are unable to take <d-cite key="liu2023transformers"></d-cite>. However, the results of this proof were only tested on synthetic dataset, and the question of how Transformers perform better than RNNs on memory-based tasks without keeping track of recurrence is still relevant, particularly for developers <d-cite key="liu2023transformers"></d-cite>. In this project, I analyzed and compared the performance of transformer and RNN based models on both a financial stock and medical ECG dataset. By practically testing the applications of RNNs and Transformers in two different settings, I aim to aid developers by suggesting considerations for them to have while choosing an architecture to work with.  
-
-## Background & Prior Work
- 
- Recurrent neural networks (RNN) are a type of neural network that were previously considered state-of-the-art for generating predictions on sequential data including speech, financial data, and video <d-cite key="Kanagachidambaresan2021"></d-cite>. RNNs are distinct from other types of neural networks beecause they had an internal "memory" <d-cite key="mhaskar2016learning"></d-cite>. This memory was based on the netowrk not only taking in the current input into its function but also information learned from all of the previous inputs <d-cite key="Kanagachidambaresan2021"></d-cite>. 
- 
- However, since Transformers were invented in 2017, they have rapidly made the use RNNs obsolete <d-cite key="qin2023hierarchically"></d-cite>. Transformers were proposed as a simple network architecture based only on an attention mechanism, without the need for recurrence or convolutions <d-cite key="NIPS2017_3f5ee243"></d-cite>. While they are quadratic in time complexity <d-cite key="keles2022computational"></d-cite>, as opposed to RNNs' linear complexity, their ability to parallelize makes them significantly faster to train <d-cite key="NIPS2017_3f5ee243"></d-cite> than RNNs. Their popularity is broad, and Transformers are considered to be the gold standard in many fields including natural language processing, computer vision, and signal processing <d-cite key="islam2023comprehensive"></d-cite>.
-
-While Transformers were accepted to perform better, the question remained -- why? Transformers do not keep track of recurrence but are somehow able to successfully complete memory-based tasks. Liu et al aimed to answer this question by exploring how transformers learn shortcuts to automata <d-cite key="liu2023transformers"></d-cite>.
-They did this both by mathematical proof and also through experimentation on synthetic data sets. Their primary conclusion is that transformers are able to universally approximate these complex functions in few layers by building simple parallel circuits. This leads to improvement in computational efficiency and also performance improvements <d-cite key="liu2023transformers"></d-cite>. 
-
-Current research in the RNN space is largely focused on trying to leverage their inherently linear complexity to its advantage <d-cite key="peng2023rwkv"></d-cite>, but I wondered if there were current applications where RNNs might already perform similarly or better. There are few papers in this space, and the ones that exist largely focus on a domain-specific performance mechanism <d-cite key="10.3389/fnbot.2023.1157957"></d-cite>. With that, I wanted to characterize performance of RNNs versus Transformers across a range of metrics: including accuracy, training time, memory, and parameters. This project acts as an extension of Liu et al by looking at real-world datasets from different applications and seeing if the conclusions change in the real-world. This aims to fill a gap between practical applications and mathematical explanations by attempting to provide an intuitive understanding of real-world dataset performance between transformers and RNNs. This would provide a holsitic summary of the tradeoffs between RNNs and Transformers while aiming to provide insight as to why Transformers remain so dominant in the space.  
-
-## Methods & Results 
-
-### Data
-The first dataset I used was Yahoo Finance's stock dataset, accessible through the yfinance API. I specifically looked at the closing price data from the S&P500 stock group which represents the stocks from the 500 largest companies. The second dataset I used was from Kaggle (available [here](https://www.kaggle.com/datasets/shayanfazeli/heartbeat/)). This dataset captures ECG data. I specifically used the abnormal and normal sub datasets that contained single-heart beat single-lead ECG data. 
-
-### Software
-I ran all of the code for this project using Python 3.10 in Google Colab. The APIs numpy, scipy, matplotlib, seaborn, keras, tensorflow, and yfinance were all used. The notebook used for the stock experiements is available [here](https://colab.research.google.com/drive/1NNZKU18bm7QsZaWOjX8EYD7NFoJK0jkQ#scrollTo=YQfvmYtVU3Yg) and the ECG experiments [here](https://colab.research.google.com/drive/1yBfvgPEaUau6ttJV9ufdkgVm6x9y-AI9#scrollTo=MsfkYwwCS6WC). 
-
-### Stock Model Comparisons
-I began my experiments by loading and visualizing the data. I wanted to compare the transformer and RNN models on a time-series prediction so I decided to use 11 months of data to predict the next 1 month behavior. To do this, I loaded data from July 1st, 2022 to July 31st 2022. Of note, the stock market is closed during weekends and holidays, so there were 251 days in my dataframe, and I trained on the first 231 days to predict the last 20. I then used an 80/20 train and test split. 
-
-I also visualized several iterations of ten random samples to better understand the dataset and ensure that I was preprocessing correctly. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The closing price of 10 random stocks from the S&P 500 over a year timeframe.
-</div>
-
-Once I had the data set up, I began to build each model. In addition to a simple RNN architecture and a Transformer model, I also built an LSTM model which is a specialized subset of RNNs that aim to solve a vanishing gradient problem in traditional RNNs <d-cite key="Sherstinsky_2020"></d-cite>. In addition to providing another reference model, I decided to include LSTM because I could easily test identical architectures between LSTM and RNNs <d-cite key="lstmseries"></d-cite>. Additionally, LSTMs are notoriously slower to train and this provides a sanity check for my later timing results <d-cite key="lstmseries"></d-cite>.
-
-In building the models, I tried to keep them all as simple and equivalent as possible for a fair comparison. This was simple for the LSTM and RNN, I just used two LSTM (or RNN) layers followed by a linear layer and then an output linear layer. Because of the different architecture of transformers, it didn't seem possible to create a completely equivalent architecture. However, I tried to approximate this by having just a singular attention layer that didn't have a feed foward network component and only had a standard layer normalization and then a multiheaded attention wiht 2 heads (the same number of layers for RNN/LSTM with the head size equivalent to the RNN/LSTM layer size). I followed this with a pooling layer, a linear layer (with the same size as the RNN/LSTM linear layer) and a linear output layer. I trained all models with a batch size of 25 and 30 epochs. 
-
-For each model, I measured RMSE for the predictions (used for accuracy), time used to train the model, memory used to train the model, number of parameters, and storage used for parameters. The results are shown in the following table. 
-
-| Model | RMSE | Memory in Training (KB) | Time to Train (s) | Parameters (#) | Memory for Parameters (KB) |
-| ----------- | -----------------  |
-| LSTM | 155.61 | 16575097 | 151.76 | 54190 | 211.68 |
-| RNN | 149. 07 | 4856823 | 67.25 | 16750 | 65.43 |
-| Transformers | 36.46 | 3165225 | 87.00 | 2019 | 7.89 |
-
-As expected, the LSTM model runs much slower with higher memory usage which is consistent with literature models <d-cite key="lstmseries"></d-cite>. When comparing the models, on first look, it seems like the Transformer model greatly outperforms the other models, both in terms of accuracy and memory with comprable training times to the RNN. However, I noticed something odd when I visualized my results: while transformers performed better in RMSE, it didn't look like they performed better. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Performance of initial model experiments. Before the black line, is the training data of the closing price, after the line, the testing data as well as the predictions for all three models are shown. 
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The testing data compared to the predictions for all three models. 
-</div>
-
-Besides noting that the models all did fairly well given their simplicity, this was puzzling. Addditionally, when I reran the models, I noted that the RMSE values for the LSTM/RNN models varied wildly with results between 50-550 whereas transformer's performance was consistently around 35. To investigate, I printed out the RMSE for each prediction and analyzed them. I found that most errors were fairly small but there were a couple very large errors that ended up skewing the overall reported average. In visualizing that outlier and performance between the models, I saw that the prices for the outliers were much higher than most stocks, making the LSTM/RNN models predict a much lower price. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Outlier visualization of one stock with all three model's performance. 
-</div>
-
-Transformers still do okay here, likely do to the first normalization layer I used. Thus, to make the problem more equal, I decided to normalize all of the data at the onset.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Sample of data once normalized. 
-</div>
-
-When rerunning the training, the tabular results match the visualizations. Surprisingly, Transformers perform worse than RNNs/LSTMs, with less memory used but no real difference in training time. Even with adding complexity to the Transformer model via increasing the feed-forward network complexity through increasing the size of the embedded feed forward network and increasing the number of attention layers, no performance difference was seen -- the time to train just substantially increased. 
-
-| Model | RMSE | Memory in Training (KB) | Time to Train (s) | Parameters (#) | Memory for Parameters (KB) |
-| ----------- | -----------------  |
-| LSTM | 0.125 | 8233179 | 128.25 | 54190 | 211.68 |
-| RNN | 0.121 | 4147757 | 87.58 | 16750 | 65.43 |
-| Transformers | 0.281 | 3148379 | 87.38 | 2019 | 7.89 |
-| Complicated Transformers | 0.282 | 40052260 | 1243.01 | 16248 | 63.47 |
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Performance of model experiments with normalized data. Before the black line, is the training data of the closing price, after the line, the testing data as well as the predictions for all three models are shown. 
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The testing data compared to the predictions for all three models with normalized data. 
-</div>
-
-This seems to go against prior results which almost universally found Transformers faster without sacrificing efficiency <d-cite key="qin2023hierarchically"></d-cite>. I hypothesized that this could be because Transformers generally have poor length generalization and perhaps this is an inoptimal time window for this particular model <d-cite key="anil2022exploring"></d-cite>. This could also explain the lack of a time improvement as a transformer's time complexity is quadtratic whereas RNN's is linear <<d-cite key="peng2023rwkv"></d-cite>, <d-cite key="keles2022computational"></d-cite>. Presumably, the quadratic performance will slow down the Transformer when parallelization becomes impossible <d-cite key="peng2023rwkv"></d-cite>. To test this hypothesis, I decided to try to predict my results with a smaller time window. 
-
-I did this by predicting the closing price of just one day of data using a week of prior data. I normalized all data and retrained my models. I reverted back to the simple transformer model in an effort to test relatively equivalent model complexities. 
-
-
-| Model | RMSE | Memory in Training (KB) | Time to Train (s) | Parameters (#) | Memory for Parameters (KB) |
-| ----------- | -----------------  |
-| LSTM | 0.386 | 9588885 | 19.00 | 53221 | 207.89 |
-| RNN | 0.381| 4197690 | 13.45 | 15781 | 61.64 |
-| Transformers | 0.384 | 2707340 | 11.45 | 1050 | 4.1 |
-
-As the results show, my hypothesis was correct. The transformer performed much faster without a reduction in accuracy. However, it is also very possible that I didn't see a time difference because I am using small models with a short training time. These timing differences could become larger with more computationally intensive models.
-
-
-## ECG Model Comparisons
-While the results from the stock dataset were interesting, I also wanted to test these models with a different type of input that perhaps would capture different underlying strengths and weaknesses of the models. I decided to use an ECG to predict the presence of an abnormality in the heart beat. This represents a difference in the stock dataset in three key ways:
-
-1) The output is binary instead of discrete. 
-2) There is a better source of ground truth for this data. If there was a definitive way to predict the behavior of a stock, everyone would be rich, but that's not the case -- there's inherently uncertainty and an expected level of innaccuracy. For health data, the person will have the condition or not and an experienced cardiologist would be able to definitively diagnose the patient. 
-3) The input has an expected, structured shape. All ECGs are supposed to look roughly the same and should have a similar visibility in the dataset. This has effects on the causality window used in models that I was interested in analyzing. 
-
-I first visualized my data for both the abnormal and normal heart beats. The overall sample size was around 9000 patients, and I artificially created a 50/50 split between abnormal and normal to prevent class imbalance. I once again used an 80/20 train/test split for my models. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/ECG.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Data visualization for ECG. 
-</div>
-
-I immediately ran into difficulties once I began training with the performance of all models really being a coin toss between the two. I then focused my time on trying to build more complex models. For the RNN, I added more layers with varying dimensions and played around with adding dropout and linear layers. For the Transformer, I built up the feedforward network part of the algorithm by increasing the size of the embedded feed forward network and adding multiple attention layers. For both, I tuned hyperparameters such as the optimizer, batch size, and number of epochs. Despite this results still remined poor. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/Loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Loss curve for both models on training and validation data. 
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-TransformersAndRNNs/ROC.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    ROC curves for both models with displayed AUC values. 
-</div>
-
-There is virutally no reduction on validation loss for the RNN graph, no matter what structure I chose. While there is a normal looking curve for transformer, the scale of loss reduction is very small when you consider the y-axis. Additionally, the RNN network never performed better than randomly, whereas the Transformer network was only slightly improved. 
-
-One interpretation of these results could be that the Transformer model performed better. However, because neither of these architectures perfomred overly sucessfully, I don't think that is a sound conclusion. It is unclear to me if this is a shortcoming of my code or a difficulty with the problem and dataset. This would be an area where future work is required. 
-
-My main takeaway from this process of working with the ECG data was how much easier it was to tune and problemsolve with the Transformer than the RNN. For the Transformer, I was able to adjust the number of heads or the sizes of heads, or the feed foward network, etc, whereas, in the RNN, I really could only play with the layers of the RNN itself. While both of these architectures have black-box components, I found the Transformer a lot easier to work and play around with as a developer, and I could develop some intuition on what things I should change and why. This perhaps represents another difference from the transformer vs RNN debate but from a usability standpoint. 
-
-
-## Limitations 
-There are several limitations of this project. First, I only analyzed a couple of different datasets. This is not representative of all of the different applications of GNNs and transformers, meaning the conclusions are limited to the datasets chosen and are not necessarily representative of the full field. Additionally, my implementation of the models may not be the most efficient ones. While I tried to test a wide range of parameters, due to limited resources available (ie time and being one person) there are undoubtably more optimal structures or hyperparameters that I did not test. This ability to not only test a limited number of parameters, but also architectures remains an overall limitation and challenge of the deep learning field <d-cite key="limitation"></d-cite>. Noteably, I also worked with small datasets and models. While this was useful in running a lot of experiments quickly, the differences in architectures, especialy in regards to training time/memory, may be more important and more pronounced in larger-scale tasks. 
-
-Additionally, I did not test every metric of success. While I focused on number of trainable parameters, training time, memory, and accuracy -- these are not the only things that matter in machine learning. For instance, in some applications, senstivity might matter a lot more than specificity and overall accuracy. In others, explainability of the model may be essential, such as time sensitive healthcare settings <d-cite key="limitationexplain"></d-cite>. Thus, in making a decision in chosing one model over another, it is important to consider the specific application and benefits of each approach. These methods of success also might not be "fair" to each model. For instance, a much larger number of trainable parameters may be fine if overall time is not lost. If one has the capacity to run their machine over night but with limited memory, they might reevaluate these metrics of sucess. Given these limitations, it is important to do a holistic evaluation of architectures when deciding which to use for a deep learning project and to use this project as a guidelines of preliminary experiments to run in making that decision. This project does not serve as a ground truth as to why to choose one model over another. 
-
-## Conclusions 
-Transformers seem to be easier to work with when there are still questions surrounding the data. For instance, with the stock dataset, there may be circumstances where you would prefer a model that can perform well prior without normalizing the dataset if for instance, you care about the magnitude of closing prices between stocks. Similarly, for the ECG model, they were easier to tune with different hyper paramters and felt more intuitive in comparison to working with the RNN. Transformers also consistently used less memory with much fewer parameters across the board, which is important when working in resource-limited systems. 
-
-However, this project found that transformers are not always faster or more accurate than alternatives. While Liu et al found that typical transformers can find shortcuts to learn automata <d-cite key="liu2023transformers"></d-cite>, this might not be the case for all datasets across all applications. Previous studies have found that length generalization is a deficiency of transformers <d-cite key="anil2022exploring"></d-cite>, and this is supported by this project which found that for longer-term predictions, RNNs were the faster and more accurate approach post-normalization. 
-
-These findings underscore the importance of taking the time to test different architectures in your resarch and not assuming that just because Transformers are more popular, it doesn't mean they are necessarily the best fit for your problem. In deep learning research, we often get bogged down in tuning a model and it's important to take a step back and consider your assumptions about the task -- which may include the broader model consideration. 
-
diff --git a/_posts/2023-11-09-adaptive-controller-graph-eom.md b/_posts/2023-11-09-adaptive-controller-graph-eom.md
deleted file mode 100644
index 54008317..00000000
--- a/_posts/2023-11-09-adaptive-controller-graph-eom.md
+++ /dev/null
@@ -1,326 +0,0 @@
----
-layout: distill
-title: Adaptive Controller with Neural Net Equations of Motion for High-DOF Robots
-description: This project aims to develop an adaptive control mechanism using a graph neural network to approximate the equations of motion (EoM) for high-degree-of-freedom (DOF) robotic arms bypassing the need for symbolic EoM to build an adaptive controller.
-date: 2023-11-09
-htmlwidgets: true
-
-authors:
-  - name: [Thanh Nguyen]
-    url: "https://zicez.github.io"
-    affiliations:
-      name: [MIT Mechanical Engineering Department]
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-adaptive-controller-graph-eom.bib  
-
-# Add a table of contents to your post.
-toc:
-  - name: Introduction
-  - name: Background and Related Work
-  - name: Experiments and Results
-    subsections:
-      - name: Generating Training Data
-      - name: Attempt 1 - Graph Neural Networks
-      - name: Attempt 2 - LSTM
-      - name: Attempt 3 - Transformer
-      - name: Final Attempt - Physics Informed Structure Transformer
-  - name: Conclusion
----
-
-## Introduction
-Adaptive controllers are integral to modern robotic arms, enabling robots to adjust to dynamic environments and internal variations such as actuator wear, manufacturing tolerances, or payload changes. At the heart of such controllers is the formulation of the robot's Equations of Motion (EoM), typically expressed in the form:
-
-The standard symbolic form of EoM is represented as:
-
-$$
-M(q)q'' + C(q, q') = T(q) + Bu
-$$
-
-where:
-
-- \( M(q) \) is the mass matrix
-- \( C(q, q') \) represents Coriolis and centripetal forces
-- \( T(q) \) depicts gravitational torques
-- \( B \) is the input transformation matrix
-- \( u \) denotes control input
-- \( q, q' \) are the joint angle state variables and their derivatives, respectively.
-
-The symbolic complexity of the EoM increases considerably for robots with a high Degree of Freedom (DOF), due to the analytical resolution of the Lagrangian or Hamiltonian dynamics required. While these equations can be derived algorithmically, the computational burden is significant, and the resulting symbolic equations are extensively lengthy. To illustrate, consider the EoM for a 7-DoF Panda Emika Franka robot arm [(link)](https://github.com/marcocognetti/FrankaEmikaPandaDynModel/tree/master/matlab/dyn_model_panda). The code that determines the EoM is extraordinarily verbose.
-
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/panda.png" class="img-fluid" %}
-
-The aim of this project is to bypass the need for an explicit symbolic articulation of the EoM by formulating a neural network representation. With an accurately modeled neural network, it could serve as a foundational element in the development of an adaptive controller. The goal is for the controller to adapt a robotic arm's physical parameters based on calibration sequences and to estimate the mass and inertia matrix of unfamiliar payloads.
-
-Aside from symbolic representation, the EoM can also be computed numerically at each operating point using the Recursive Inertia Matrix Method <d-cite key="featherstone2008rigid"></d-cite> , which has a computational complexity of \( O(n^3) \), where \( n \) is the number of joints in the rigid body. Substituting this computation-heavy method with a neural network, we can potentially calculate the forward dynamics in linear time, albeit with a trade-off in memory usage.
-
-## Background and Related Work 
-
-Before we delve into neural net architecture, let's take a look closer at our problem and how it's solved right now. To come up with the symbolic equation for the EOM, we use Lagrangian Mechanics in which we compute the Potential, U, and Kinectic Energy, T, of our system.
-
-$$ L = T - U $$
-$$ \frac{d}{dt} \left( \frac{\partial L}{\partial \dot{q}_i} \right) - \frac{\partial L}{\partial q_i} = u_i $$
-
-Quick describing of how it turns in into the manipulator equations. Working through these equations, a pattern emerge in which you can groups the equation as the manipulator equations. 
-
-$$M(q)q'' + C(q, q') = T(q) + Bu$$
-
-This method works well when the degree of freedom in the system is low. It provides much insight on how the dynamics of the system work. For example, the kinetic energy can be represented as:
-
-$$ T = \frac{1}{2} \dot{q}^T M(q) \dot{q} $$
-
-Highlighting that \( M \) is symmetric and positive definite. However, as introduced earlier, this method scales poorly with complexity in higher DOF systems.
-
-However, as shown in the introduction, when this method is used for a 7 DOF system, the resulting equation is extraordinarily complex. 
-
-Bhatoo et al. <d-cite key="bhattoo2022learning"></d-cite>  introduced a graph neural network to represent the potential and kinetic energy of rope systems—a high DOF system—by segmenting the system into short segments. Each segment was then treated as a node in the graph neural network. Although they didn't derive the forward dynamics using the Lagrangian formulation, the prospect of representing serially linked robot arms with graph neural networks was indicated as feasible.
-
-The other approach to create the manipulator equation is to numerically calculate it at each operating point. There are two versions of this equation, the inverse dynamics and the forward dynamics version. In the inverse dynamics formulation, we essentially calculate
-$$M(q)q'' + C(q, q') - T(q) = Bu$$
-
-Giving a particular state of the robot and a desired acceleration, what is the required torque. The inverse dynamics formulation can be computed with the Recursive Newton-Euler Algorithm with a O(n) complexity where n is the number of joints <d-cite key="featherstone2008rigid"></d-cite> . The key idea for this algorithm is that the motion of a body is directly influence by the previous link. It's essentially a dynamic programming algorithm in which you can store the motion of one body and then apply to the next body. This suggests that a directed graph neural net is sufficient to represent our model.
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion.jpg" class="img-fluid" %}
-
-While very efficient to calculate, the inverse dynamics is not as useful as the forward dynamics version if the end goal is to create an adaptive controller. The forward dynamics is the model that describes what is the accelerations of the system based on current state and torque input. 
-
-$$q'' = M(q) \ (- C(q, q') + T(q) - Bu)$$
-
-This formulation is more useful for adaptive controller as we can compared predicted acceleration and actual acceleration. Use their difference as a loss and to compute the gradient from the model parameters. The problem with the forward dynamics problem is that it requires a O(n^3) computation for a serially linked robot arm (the mass matrix inversion must be done). The algorithm for Forward Dynamics is called Inertia Matrix Method <d-cite key="featherstone2008rigid"></d-cite> . One physical intuition that can be glean from the algorithm is that reaction input torques propagate down chain. Once again, this indicates that there is a one way information flow from one link to the next. Given that this version is more computationally expensive, it would be more valuable to tackle with a neural net representation as well compared to the much faster inverse dynamics problem. 
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces.jpg" class="img-fluid" %}
-
-## Experiments and Results
-
-### Generating Training Data
-Utilizing numerical methods implemented in MATLAB, we generated a large volume of training data, spanning the full operational space of the robot arm. We based our robot arm model on realistic parameters from the publicly available data of the Emika Franka Panda, comprising a total of 10 links, seven revolute joints, and two fixed joints. After disregarding the base link, we have a model with 10 parameters for each link (mass, center of mass as a 1x3 vector, and the symmetric inertia matrix flattened into a 1x6 vector) and joint properties (angle, angular velocity, angular acceleration, and torque).
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig.jpg" class="img-fluid" %}
-
-We simulated the arm moving from one random configuration to another—marked in the image above by an X — recording states, torques, and accelerations during transitions. To introduce variability, we applied realistic perturbations to the physical properties of each link after every 100 recorded motion paths. In total, we accumulated 250,000 data points
-
-### Attempt 1: Graph Neural Net
-As inspired by Bhatoo, we rearrange the dataset as a Graph Dataset based on the PyTorch Geometric Library. Each node contains the 10 physical property parameters, angle, angular velocity, and torque input. In total, each node has 13 features. The output is set to be angular acceleration of the 7 joints (1x7 vector). As for the edge index, the graph is defined to be directed, either information flows from the last node to the first or the first node to the last node. This is inspired by the physical intuition that forces propagate sequentially from one body to the next, and that motion with respect to the global coordinate frame also sequential depended on the previous body link. 
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/node.jpg" class="img-fluid" %}
-
-We applied nine iterations of the Graph Convolution Layer, ensuring information flow from one end of the arm to the other.
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/GNN.png" class="img-fluid" %}
-
-Despite extensive parameter tuning, learning rate adjustments, and the application of various schedulers, the loss showed no convergence. Potential reasons for this include the complexity in capturing temporal dependencies and the possible oversimplification of force propagation through the links using graph convolutions. The complexity of 9 different GCNV also increases complexity needlessly. 
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot.png" class="img-fluid" %}
-
-### Attempt 2: LSTM
-Reevaluating the necessity for graph neural networks, we considered the inherent sequential nature of the information flow in our system. There are no branches in the structure of a serially linked robot arm; hence, an LSTM, which excels in capturing long-range dependencies in sequence data, seemed appropriate. The input sequence now reflects the node properties from the previous attempt, and our LSTM architecture is defined as follows:
-
-```python
-class RobotLSTM(nn.Module):
-    def __init__(self, input_size, hidden_size, hidden_size2, output_size, num_layers=1):
-        super(RobotLSTM, self).__init__()
-        self.hidden_size = hidden_size
-        self.num_layers = num_layers
-
-        # LSTM Layer
-        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
-
-        # Fully connected layers
-        self.l1 = nn.Linear(hidden_size, hidden_size2)
-        self.l2 = nn.Linear(hidden_size2, hidden_size2)
-        self.l3 = nn.Linear(hidden_size2, output_size)
-
-    def forward(self, x):
-        # Initializing hidden state and cell state for LSTM
-        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
-        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
-
-        # Forward propagate the LSTM
-        out, _ = self.lstm(x, (h0, c0))
-
-        # Pass the output of the last time step to the classifier
-        out = out[:, -1, :]  # We are interested in the last timestep
-        out = F.relu(self.l1(out))
-        out = F.relu(self.l2(out))
-        out = self.l3(out)
-
-        return out
-```
-
-Despite the theoretically simpler representation of the system, the results were still not satisfactory, with stabilization and convergence being unachievable.
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/lstm.png" class="img-fluid" %}
-
-
-### Attempt 3: Transformer
-With LSTM and GNN strategies failing to deliver conclusive results, we pivoted to the more general-purpose Transformer architecture. This paradigm shifts focus from a strictly sequential data flow to a structure capable of interpreting the relationships between all links through its attention mechanism. Note, we also use a sinusoidal positional encoder to maintain the order coherance of the robot arm. 
-
-For the Transformer model, we employ the following architecture, designed to be flexible and adaptable to high DOF systems in future implementations:
-
-```python
-class RobotTransformerModel(nn.Module):
-    def __init__(self, input_dim=13, d_model=24, mlp_dim=128, nhead=2, num_encoder_layers=5, dim_feedforward=48, output_dim=7):
-        super().__init__()
-        self.d_model = d_model  # Store d_model as an instance attribute
-        self.embedding = nn.Linear(input_dim, d_model)
-        self.pos_encoder = PositionalEncoding(d_model) # Sinusoidal positional encoding
-        # Transformer Encoder Layer
-        self.transformer_encoder = Transformer(
-            dim=d_model, mlp_dim=mlp_dim, attn_dim=dim_feedforward, num_heads=nhead, num_layers=num_encoder_layers
-        )
-        self.output_layer = nn.Sequential(nn.LayerNorm(d_model), nn.Linear(d_model, output_dim))
-        self.criterion = nn.MSELoss()
-
-    def forward(self, src):
-        src = src.permute(1, 0, 2)  # Shape: [seq_len, batch, feature]
-        src = self.embedding(src) * math.sqrt(self.d_model)
-        src = self.pos_encoder(src)
-        output, alphas = self.transformer_encoder(src, attn_mask=None, return_attn=False)
-        output = output[0, :, :]  # use the output of the first token (similar to BERT's [CLS] token)
-        return self.output_layer(output)
-```
-
-However, even with this advanced architecture, convergence remained elusive, indicating that further restructuring of the problem was required.
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic.png" class="img-fluid" %}
-
-
-### Final Attempt: Physics Informed Structured Transformer
-As nothing seems to be working, we now simplify our problem statement to gain some insights that could then we applied to the larger problem later. First, we now reformulate the serially linked robot arm dynamics into a double pendulum system with simplified parameters—each link defined by its length and a point mass at the end. The state variables in this reduced complexity scenario are simply the two link angles and their angular velocities.
-
-$$
-\mathbf{M}(q)\ddot{q} + \mathbf{C}(q, \dot{q})\dot{q} = \mathbf{T}_g(q) + \mathbf{B}u
-$$
-
-where
-
-$$
-\mathbf{M} = \begin{bmatrix}
-    (m_1 + m_2)l_1^2 + m_2l_2^2 + 2m_2l_1l_2\cos(q_1) & m_2l_2^2 + m_2l_1l_2\cos(q_2) \\ 
-    m_2l_2^2 + m_2l_1l_2\cos(q_2) & m_2l_2^2
-\end{bmatrix},
-$$
-
-$$
-\mathbf{C} = \begin{bmatrix}
-    0 & -m_2l_1l_2(2\dot{q}_1 + \dot{q}_2)\sin(q_2) \\ 
-    \frac{1}{2}m_2l_1l_2(2\dot{q}_1 + \dot{q}_2)\sin(q_2) & -\frac{1}{2}m_2l_1l_2\dot{q}_1\sin(q_2)
-\end{bmatrix},
-$$
-
-$$
-\mathbf{T}_g = -g \begin{bmatrix}
-    (m_1+m_2)l_1\sin(q_1) + m_2l_2\sin(q_1+q_2) \\ 
-    m_2l_2\sin(q_1+q_2)
-\end{bmatrix},
-$$
-
-$$
-\mathbf{B} = \begin{bmatrix}
-    1 & 0 \\
-    0 & 1
-\end{bmatrix}.
-$$
-
-
-In this simpler problem statement, we switch to solving the Inverse Dynamics problem instead which numerically has a computational complexity of O(n). We assume that there is less complexity in this representation (a complete guess), so the neural net doesn't have to work as hard compared to the Forward Dynamics problem which has computational complexity of O(n^3). 
-
-However, the task now focuses on the inverse dynamics with a reduced computational complexity of \( O(n) \), given that \( M(q) \) can be linearly separated from \( C \) and \( T_g \) and knowing that \( M(q) \) is symmetric and positive definite.
-
-For this, two Transformer neural networks were created, one for \( M(q)\ddot{q} \) and another for \( C(q, \dot{q})\dot{q} - T_g(q) \). Both models were trained separately with their respective datasets before being combined to model the complete manipulator equation. We can uniquely generate training data that only incite this mode by setting gravity and angular velocity to zero to get only M(q)*ddq = u. 
-
-The architectures for these Transformers were revised to employ a Physics Informed approach, ensuring the adherence to known physical laws:
-
-``` python
-class RobotTransformerModelH(pl.LightningModule):
-    def __init__(self, input_dim = 3, d_model =3, mlp_dim=128, nhead=2, num_encoder_layers=5, dim_feedforward=48):
-        super().__init__()
-        self.d_model = d_model
-        self.embedding = nn.Linear(input_dim, d_model)
-        self.pos_encoder = PositionalEncoding(d_model)
-        self.transformer_encoder = Transformer(dim=d_model, attn_dim=dim_feedforward, mlp_dim=mlp_dim, num_heads=nhead, num_layers=num_encoder_layers)
-        self.output_layer = nn.Sequential(nn.LayerNorm(d_model), nn.Linear(d_model, 3))  # Output is a 1x3 vector
-        self.criterion = nn.MSELoss()
-
-    def forward(self, src, ddq):
-        src = src.permute(1, 0, 2)  # Reshape for transformer
-        src = self.embedding(src) * math.sqrt(self.d_model)
-        src = self.pos_encoder(src)
-        output, alphas = self.transformer_encoder(src, attn_mask=None, return_attn=False)
-        output = output[0, :, :] 
-        output = self.output_layer(output)
-
-        # Create a batch of symmetric 2x2 matrices from the batch of 1x3 output vectors
-        batch_size = output.shape[0]
-        symmetric_matrices = torch.zeros((batch_size, 2, 2), device=self.device)
-        symmetric_matrices[:, 0, 0] = output[:, 0]
-        symmetric_matrices[:, 1, 1] = output[:, 1]
-        symmetric_matrices[:, 0, 1] = symmetric_matrices[:, 1, 0] = output[:, 2]
-
-        transformed_ddq = torch.matmul(symmetric_matrices, ddq.unsqueeze(-1)).squeeze(-1)
-
-        return transformed_ddq
-```
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/H.png" class="img-fluid" %}
-
-
-Then we create a separate transformer neural net for C(q, dq)*dq - Tg(q). Similarly, we can generate training data that only exictes this mode by setting ddq = 0.  
-
-```python
-class RobotTransformerModelC(pl.LightningModule):
-    def __init__(self, input_dim = 4, d_model =3, mlp_dim=128, nhead=2, num_encoder_layers=5, dim_feedforward=48):
-        super().__init__()
-        self.d_model = d_model
-        self.embedding = nn.Linear(input_dim, d_model)
-        self.pos_encoder = PositionalEncoding(d_model)
-        self.transformer_encoder = Transformer(dim=d_model, attn_dim=dim_feedforward, mlp_dim=mlp_dim, num_heads=nhead, num_layers=num_encoder_layers)
-        self.output_layer = nn.Sequential(nn.LayerNorm(d_model), nn.Linear(d_model, 2))  # Output is a 1x2 vector
-        self.criterion = nn.MSELoss()
-
-    def forward(self, src):
-        src = src.permute(1, 0, 2)  # Reshape for transformer
-        src = self.embedding(src) * math.sqrt(self.d_model)
-        src = self.pos_encoder(src)
-        output, alphas = self.transformer_encoder(src, attn_mask=None, return_attn=False)
-        output = output[0, :, :] 
-        output = self.output_layer(output)
-
-        return output
-```
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/C.png" class="img-fluid" %}
-
-We picked Transformer as it's more general compared to LSTM or GNN. Furthermore, it can easily be extended to high DOF system later on by just working with a longer input sequence. After training these two models independtly with their own training data set, we combined the two pretrained model togeher to recreate the full manipulator equation with a complete dataset.
-
-lass CombinedRobotTransformerModel(pl.LightningModule):
-    def __init__(self, config_H, config_C):
-        super().__init__()
-        # Initialize the two models
-        self.model_H = RobotTransformerModelH(**config_H)
-        self.model_C = RobotTransformerModelC(**config_C)
-        self.criterion = nn.MSELoss()
-        # Additional layers or attributes can be added here if needed
-
-    def load_pretrained_weights(self, path_H, path_C):
-        # Load the pre-trained weights into each model
-        self.model_H.load_state_dict(torch.load(path_H))
-        self.model_C.load_state_dict(torch.load(path_C))
-
-    def forward(self, src_H, ddq, src_C):
-        # Forward pass for each model
-        output_H = self.model_H(src_H, ddq)
-        output_C = self.model_C(src_C)
-        # Combine the outputs from both models
-        combined_output = output_H + output_C  
-        return combined_output
-
-This modular approach, informed by the physical structure of the dynamics, resulted in improved convergence and an adaptive controller with the capability to generalize well to unseen conditions of the double pendulum.
-
-{% include figure.html path="assets/img/2023-11-09-adaptive-controller-graph-eom/combined.png" class="img-fluid" %}
-
-
-## Conclusion
-Through this journey of building and testing various neural network architectures to approximate the equations of motion for high-DOF robotic systems, it becomes evident that while cutting-edge machine learning tools hold promise, their effectiveness is tied to the physical realities of the problems they aim to solve. Success in neural net modeling involves really understanding the data and problem you are trying to solve. Here we managed to make a little head way in modeling the EOM of a 2 DOF system by mimicking the structure of the analytical solution. 
-
-For future work, we should take the success in the 2 DOF system and push it for higher DOF with more complex parameters. We can generate data that can isolate specific motion modes of the model that can be used to train sections of the neural net at a time. By then training all the modes independently, we can stitch together the whole structure for the whole dataset. 
diff --git a/_posts/2023-11-09-autodecoders.md b/_posts/2023-11-09-autodecoders.md
deleted file mode 100644
index 7a507737..00000000
--- a/_posts/2023-11-09-autodecoders.md
+++ /dev/null
@@ -1,188 +0,0 @@
----
-layout: distill
-title: "To Encode or Not To Encode: The Case for the Encoder-free Autodecoder Architecture"
-description: While the traditional autoencoder architecture consists of an encoder and a decoder to compress and reconstruct information with only the most prominent features, some recent work have begun to utilize an alternate framework, the autodecoder, in specific applications in the field of representation learning. Skipping the encoder network altogether and learning latent codes directly as parameters, we aim to compare the two architectures on practical reconstruction tasks as well as dive into the theory of autodecoders and why they work, along with certain novel features that they bring.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Seok Kim
-    affiliations:
-      name: Massachusetts Institute of Technology
-  - name: Alexis Huang
-    affiliations:
-      name: Massachusetts Institute of Technology
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-autodecoders.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related Work
-  - name: Methodology
-  - name: Experimentation
-  - name: Conclusion
-  - name: References
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Autodecoders
-
-### Introduction
-
-Autoencoders have been a part of the neural network landscape for decades, first proposed by LeCun in 1987. Today, many variants of the autoencoder architecture exist as successful applications in different fields, including computer vision and natural language processing, and the variational autoencoder remains among the forefront of generative modeling. Autoencoders are neural networks trained to reconstruct their input as their output via compression through dimensionality reduction, accomplishing this task with the use of an encoder-decoder network.
-
-Autoencoders comprise of the encoder network, which takes a data sample input and translates it to a lower-dimensional latent representation consisting of only the most necessary features, and the decoder network, which attempts to reconstruct the original data from this encoding. By learning a compressed, distributed representation of the data, the latent space learned by autoencoders is usable for a plethora of downstream tasks.
-
-With traditional autoencoders, both the encoder and decoder are trained, but for certain applications— particularly generative tasks— only the decoder is utilized for inference. Because the itself encoder is not used at test time, training an encoder may not be an effective use of computational resources; the autodecoder is an alternative architecture that operates without an encoder network and brings some novel benefits.
-
-Rather than using the encoder to encode an input into a low-dimensional latent code, each sample in the training set begins with a randomly initialized latent code, and the latent codes and decoder weights are updated jointly during training time. For inference on new data, the latent vector for a given sample is then also randomly initialized and updated through an additional optimization loop with the decoder's frozen weights.
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/encoderdecoder.png" class="img-fluid" style="display:block;" caption="The architecture for an autoencoder (top) compared to that of an autodecoder (bottom)."%}
-
-Are explicit encoders necessary for image reconstruction? What are the unique benefits that come from using decoder-only architectures? One interesting application of autodecoders is the ability to reconstruct complete samples from partial inputs. The main focus of our research revolved around testing this ability, answering the question of how much of a sample is required for a complete reconstruction using an autodecoder given an expressive latent space, and comparing its performance to that of an autoencoder.
-
-Furthermore, we discuss additional applications in various fields that other research has accomplished in part due to the utilization of the autodecoder architecture over the traditional autoencoder, with a focus on the beneficial properties that we explore in our experiments, including partial reconstructions.
-
-### Related Work
-
-Different literature have utilized autodecoder frameworks in the past along with providing rationale for their usage, mainly for tasks related to reconstruction or generative modeling through representation learning. However, none have provided standalone examples of their use, something we aim to accomplish in this blog.
-
-The Generative Latent Optimization framework was introduced by Bojanowski et al. (2019) as an alternative to the adversarial training protocol of GANs. Instead of producing the latent representation with a parametric encoder, the representation is learned freely in a non-parametric manner. One noise vector is optimized by minimizing a simple reconstruction loss and is mapped to each image in the dataset.
-
-Tang, Sennrich, and Nivre (2019) trained encoder-free neural machine translation (NMT) models in an endeavor to produce more interpretable models. In the encoder-free model, the source was the sum of the word embeddings and the sinusoid embeddings (Vaswani et al., 2017), and the decoder was a transformer or RNN. The models without an encoder produced significantly poorer results; however, the word embeddings produced by encoder-free models were competitive to those produced by the default NMT models.
-
-DeepSDF, a learned continuous Signed Distance Function (SDF) representation of a class of shapes, was introduced by Park et al. (2019) as a novel representation for generative 3D modelling. Autodecoder networks were used for learning the shape embeddings, trained with self-reconstruction loss on decoder-only architectures. These autodecoders simultaneously optimized the latent vectors mapping to each data point and the decoder weights through backpropogation. While outperforming previous methods in both space representation and completion tasks, autodecoding was significantly more time consuming during inference because of the explicit need for optimization over the latent vector.
-
-Sitzmann et al. (2022) introduced a novel neural scene representation called Light Field Networks (LFNs), reducing the time and memory complexity of storing 360-degree light fields and enabling real-time rendering. 3D scenes are individually represented by their individual latent vectors that are obtained by using an autodecoder framework, but it is noted that this may not be the framework that performs the best. The latent parameters and the hypernetwork parameters are both optimized in the training loop using gradient descent; the LFN is conditioned on a single latent variable. Potential applications are noted to include enabling out-of-distribution through combining LFNs with local conditioning.
-
-Scene Representation Networks (SRNs) represent scenes as continuous functions without knowledge of depth or shape, allowing for generalization and applications including few-shot reconstruction. SRNs, introduced by Sitzmann, Zollhöfer and Wetzstein (2019), represent both the geometry and appearance of a scene, and are able to accomplish tasks such as novel view synthesis and shape interpolation from unsupervised training on sets of 2D images. An autodecoder framework is used to find the latent vectors that characterize the different shapes and appearance properties of scenes.
-
-### Methodology
-
-#### Traditional Autoencoder
-
-To establish a baseline, we first trained a convolutional autoencoder network containing both an encoder and decoder on a version of the MNIST dataset normalized and padded to contain 32x32 sized images. For our autoencoder architecture, we utilized convolutional layers with ReLU nonlinearity.
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autoencoderloss.png" class="img-fluid" caption="The training and validation losses from the training loop for the autoencoder."%}
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/tsne_autodecoder.png" class="img-fluid" caption="The training and validation losses from the training loop for the autoencoder." caption="The latent space learned by the autoencoder, color-coded by digit label and visualized through a 2-dimensional t-SNE plot. We see the expected result, with consistency and separation."%}
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autoencodersampleoutput.png" class="img-fluid" caption="The training and validation losses from the training loop for the autoencoder." caption="A sample output from an unseen image after training. We can see that our small convolutional autoencoder does a fairly good job at learning how to compress simple information into a single latent code and decode it into its original form."%}
-
-#### Autodecoder
-
-We implemented and trained an autodecoder on the same dataset by creating a convolutional decoder that takes latent codes as an input and transforms them into full images. We utilized transpose convolutions to upscale the images while additionally concatenating normalized coordinates to embed positional information, and also used leaky ReLU layers for nonlinearity.
-
-For training, the latent codes for 10,000 images in our training set were randomly initialized. The loss for our autodecoder then included three components: the reconstruction loss; the latent loss, which encourages latent values to be closer to zero in order to encourage a compact latent space; and the L2 weight regularization, which prevents the decoder from overfitting to the training set by encouraging the model weights to be sparse.
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/lossfn.png" class="img-fluid" caption="The loss function used to train the autodecoder. During inference, a custom loss function can be used based on the application."%}
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autodecoderloss.png" class="img-fluid" caption="The training and validation losses from the training loop for the autoencoder." caption="The training and validation losses from the training loop for the autodecoder. The validation loss has no actual meaning in the autodecoder framework, as new images would have a randomly initialized latent code and so would output nonsense. This loss was included simply to demonstrate this feature."%}
-
-Below are progressive reconstructions on the training data performed by the autodecoder as it trained and optimized both the decoder weights and the training set’s latent codes. We can observe that the digits’ general forms were learned before the exact shapes, which implies good concentration and consistency of the latent space between digits of the same class.
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/progress1.png" class="img-fluid"%}
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/progress2.png" class="img-fluid"%}
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/progress3.png" class="img-fluid"%}
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/progress4.png" class="img-fluid" caption="Progressive reconstructions  from top to bottom (model outputs compared to ground truth): 1. Decoding a randomly initialized latent code outputs nonsense. 2. The correct digit is reconstructed, implying that the latent space is improving, but the specific shape differs from that of the ground truth image. 3. The output’s shape begins to better match that of the ground truth. 4. The autodecoder and latent code are optimized to be able to effectively reconstruct the ground truth image."%}
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/tsne_autodecoder.png" class="img-fluid" caption="The latent space learned by the autodecoder, also visualized through a 2-dimensional t-SNE plot. When compared to the t-SNE plot representing the latent space learned by the autoencoder, we again see consistency but notice that here the clusters are more compact. While the distance between clusters in t-SNE plots does not have a definite meaning, this could potentially imply that the features of shapes, rather than the shapes themselves, are better learned, as different digits share similar features (curves, straight lines, etc)."%}
-
-Upon training the autodecoder, for inference on a new image we first freeze the decoder weights and then run an additional gradient descent-based optimization loop over a new randomly initialized latent code with reconstruction loss.
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autodecodersampleoutput.png" class="img-fluid" caption="Output from the trained autodecoder on a new image from the test set."%}
-
-### Experimentation
-
-One benefit of the autodecoder framework is that because we have an additional optimization loop for each input during inference, we are able to do varying pixel-level reconstructions, whereas an autoencoder is designed and trained to reconstruct complete images each time.
-
-We demonstrate this feature in our experiments below by applying center masks to our images before autoencoding or decoding.
-
-1: We trained a traditional **autoencoder** with generic reconstruction loss, and input an image with a mask in the center. The output is expected, as the autoencoder learned to reconstruct whatever it saw, and so the empty space from the mask is included in the result.
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autoencoder_input_7.png" class="img-fluid" width="50" caption="The input image"%}
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autoencoder_output_7.png" class="img-fluid" caption="The reconstructed image compared to the image without the mask."%}
-
-2: We trained a traditional **autoencoder** with reconstruction loss without considering a centered square area and input an unmodified image. The output is again expected, as the autoencoder was trained to fully disregard the center area, and so the output is empty in that region.
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autoencoder_input_4.png" class="img-fluid" width="50" caption="The area of the images that the autoencoder is trained to learn on."%}
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autoencoder_output_4.png" class="img-fluid" caption="The model output compared to the original image."%}
-
-3: We trained an **autodecoder** with generic reconstruction loss, and during the optimization loop for inference we utilized a custom loss function that did not consider the masked area. However, in this case, we are still able to reconstruct the original image to varying levels of success because of the latent space we originally learned through the training loop.
-
-Shown below are the areas optimized in the loss functions, along with the decoded output and original image.
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/8x8mask_input.png" class="img-fluid" width="50"%}
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/8x8mask_output.png" class="img-fluid" caption="An 8x8 mask."%}
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/12x12mask_input.png" class="img-fluid" width="50"%}
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/12x12maskoutput.png" class="img-fluid" caption="A 12x12 mask. Even with significant information about the digit missing, the autodecoder is able to sufficiently reconstruct the ground truth image based on the learned information."%}
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/16x16mask_input.png" class="img-fluid" width="50"%}
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/16x16mask_output.png" class="img-fluid" caption="A 16x16 mask."%}
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/20x20mask_input.png" class="img-fluid" width="50"%}
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/20x20mask_output.png" class="img-fluid" caption="A 20x20 mask. Although the reconstructed digit is ultimately incorrect, we see that we are able to get very close even with extremely limited inputs."%}
-
-To analyze and compare the latent spaces learned by both our autoencoder and autodecoder, we additionally perform linear interpolation (with α=0.5) between the embeddings of two images and include their decoded results below.
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autoencoder_interpolation.png" class="img-fluid" caption="The output of the decoded interpolation of two embeddings from the autoencoder."%}
-
-{% include figure.html path="/assets/img/2023-11-09-autodecoders/autodecoder_interpolation.png" class="img-fluid" caption="The output of the decoded interpolation of two embeddings from the autodecoder."%}
-
-The autoencoder output was somewhat expected due to the simplistic nature of the MNSIT dataset, and we can see a merge of the two images with equal features of both.
-
-More interesting was the output for the autodecoder, which simply returned an image consisting of the pixel average of both images. Some hypotheses for this result include:
-
-- The shape of the latent space for the learned autodecoder potentially being one that does not pair well with linear interpolation, causing linear interpolations in latent space to be equivalent to those in the data space. Meanwhile, the shape of the latent space for the autoencoder might better match a Gaussian, which translates to effective nonlinear interpolations in the data space, which is desired.
-- The inductive bias from the existence of the encoder architecture allowing for better interpolatability.
-
-### Conclusion
-
-#### Discussion
-
-While autoencoders (and variations such as VAEs) have traditionally been the standard architectures for representation learning, we explore an alternate autodecoder architecture, in which the encoder is excluded and individual latent codes are learned along with the decoder. We investigated the necessity of an explicit encoder in representation learning tasks and found that even without an encoder network, we are able to learn latent representations of input data through the optimization of randomly initialized latent codes during the training loop. Through this alternate dimensionality reduction process, we showed that we were still able to learn a consistent latent space on a multi-class dataset. Furthermore, we showed that through the use of an additional optimization loop for inference rather than learned encoder weights, the autodecoder can learn to reconstruct incomplete observations through pixel-level optimizations.
-
-The autodecoder has the potential for many further applications beyond the scope of the research and experiments introduced in this blog. As an example, the task of prior-based 3D scene reconstruction in the field of computer vision, in which novel views of a 3D scene can be generated from a limited number of static images of that scene along with their camera poses, utilizes the autodecoder architecture to guarantee better out-of-distribution views. This task involves the use of camera pose as an additional source of information in addition to input images, something that the encoder itself is unable to integrate when encoding images, leading to the valuable scene representation information being left out. Meanwhile, because the latent code itself is learned in an autodecoder, it is able to use the camera pose to effectively generalize to novel viewpoints. This serves as just one of several examples of the autodecoder being able to carry out tasks normally gatekept by the limitations of the encoder.
-
-#### Limitations
-
-Some limitations of the encoder-free architecture include certain fallbacks discussed in our experiments, including the difficulties in generating satisfactory novel outputs through linear interpolation of the latent space. Furthermore, while the existence of a secondary optimization loop during inference comes with interesting properties such as being able to define unique loss functions for different purposes, this can be more computationally or temporally costly than running inputs on a trained encoder for inference. Regardless, as much of the research around this topic has emerged only within the past several years, it can be expected that autodecoders and their unique properties will continue to emerge, evolve, and find use in novel applications in the years to come.
-
-### References
-
-Robin Baumann. Introduction to neural fields, 2022.
-
-Piotr Bojanowski, Armand Joulin, David Lopez-Paz, and Arthur Szlam. Optimizing the latent space of generative networks. arXiv preprint arXiv:1707.05776, 2017.
-
-Jeong Joon Park, Peter Florence, Julian Straub, Richard Newcombe, and Steven Lovegrove. Deepsdf: Learning continuous signed distance functions for shape representation. In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pages 165–174, 2019.
-
-Vincent Sitzmann, Semon Rezchikov, Bill Freeman, Josh Tenenbaum, and Fredo Durand. Light field networks: Neural scene representations with single-evaluation rendering. Advances in Neural Information Processing Systems, 34:19313–19325, 2021.
-
-Vincent Sitzmann, Michael Zollhöfer, and Gordon Wetzstein. Scene representation networks: Continuous 3d-structure-aware neural scene representations. Advances in Neural Information Processing Systems, 32, 2019.
-
-Gongbo Tang, Rico Sennrich, and Joakim Nivre. Encoders help you disambiguate word senses in neural machine translation. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics, 2019.
diff --git a/_posts/2023-11-09-conflict-causality.md b/_posts/2023-11-09-conflict-causality.md
deleted file mode 100644
index 31e13c07..00000000
--- a/_posts/2023-11-09-conflict-causality.md
+++ /dev/null
@@ -1,267 +0,0 @@
----
-layout: distill
-title: "Tracing the Seeds of Conflict: Advanced Semantic Parsing Techniques for Causality Detection in News Texts"
-description: This blog post outlines a research project aiming to uncover cause-effect-relationships in the sphere of (political) conflicts using a frame-semantic parser.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Philipp Zimmer
-    url: "https://www.linkedin.com/in/pzimmer98mit/"
-    affiliations:
-      name: IDSS, Massachusetts Institute of Technology
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-conflict-causality.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Literature Background
-    subsections:
-      - name: Qualitative Research on Conflicts
-      - name: The Role of Quantitative Methods
-      - name: Bridging the Gap with Explainable Modeling Approaches
-  - name: Data
-    subsections:
-      - name: News Articles as Data Source
-      - name: Descriptive Analysis of the Data
-  - name: Methodology
-    subsections:
-      - name: The Frame-Semantic Parser
-        subsubsections:
-          - name: Contextualizing the Approach
-          - name: How Does a Frame-Semantic Parser Work?
-          - name: Implementation of the Frame-Semantic Parser
-      - name: Seed Selection via Semantic Similarity Analysis to Inform Causal Modeling
-        subsubsections:
-          - name: Understanding Semantic Similarity
-          - name: How Do We Compute Semantic Similarity?
-      - name: Domain-Specific Metrics
-  - name: Findings & Insights
-    subsections:
-    - name: Frame-Semantic Parser Identifies Causal Frames Reliably
-    - name: Differences in Seed Phrase Selection
-    - name: Employing Domain-Specific Performance Metrics
-  - name: Conclusion & Limitations
-    subsections:
-      - name: Key Findings
-      - name: Limitations & Future Research
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-> *"In the complex world of political conflicts, understanding the underlying dynamics can often feel like trying to solve a puzzle with missing pieces. This project attempts to find those missing pieces through a novel approach that combines the insights of qualitative research with the precision of quantitative analysis."*
-
-{% include figure.html path="/assets/img/2023-11-09-conflict-causality/img1_map.jpg" class="img-fluid" %}
-<p align="center" style="color: white; font-style: italic; font-weight: bold;">Retrieved from https://conflictforecast.org</p>
-
-Political conflicts are multifaceted and dynamic, posing significant challenges for researchers attempting to decode their intricate patterns. Traditional methods, while insightful, often grapple with the dual challenges of scale and specificity. This project embarks on an innovative journey to bridge this gap, leveraging a frame-semantic parser to illustrate its applicability for the task and to discuss an approach to achieve domain-specificity for the model using semantic similarity. By synthesizing the depth of qualitative research into the scalability of quantitative methods, we aim to contribute to more informed analyses and actions in low-resource, low-tech domains like conflict studies.
-
-On this journey, the projects key contributions are:
-
-1. **Advancing Frame-Semantic Parsing in Conflict Research**: We introduce the frame-semantic parser, a method that brings a high degree of explainability to conflict studies. Particularly when used in conjunction with news articles, this parser emerges as a powerful tool in areas where data is scarce, enabling deeper insights into the nuances of political conflicts.
-
-2. **Harnessing Semantic Similarity for Domain Attunement**: The project underscores the significance of semantic similarity analysis as a precursor to frame-semantic parsing. This approach finely tunes the parser to specific thematic domains, addressing the gaps often present in domain distribution of common data sources. It illustrates how tailoring the parser input can yield more contextually relevant insights.
-
-3. **Demonstrating Domain-Dependent Performance in Frame-Semantic Parsing**: We delve into the impact of thematic domains on the performance of a transformer-based frame-semantic parser. The research highlights how the parser's effectiveness varies with the domain of analysis, primarily due to biases and structural peculiarities in the training data. This finding is pivotal for understanding the limitations and potential of semantic parsing across different contexts.
-
-4. **Developing Domain-Specific Performance Metrics**: In environments where additional, domain-specific labeled test data is scarce, the project proposes an intuitive method to derive relevant performance metrics. This approach not only aligns the evaluation more closely with the domain of interest but also provides a practical solution for researchers working in resource-constrained settings.
-
-
-## Literature Background
-
-### Qualitative Research on Conflicts
-
-
-Qualitative research has long been a cornerstone in the study of political conflicts. This body of work, now well-established, emphasizes the unique nature of each conflict, advocating for a nuanced, context-specific approach to understanding the drivers and dynamics of conflicts. Researchers in this domain have developed a robust understanding of the various pathways that lead to conflicts, highlighting the importance of cultural, historical, and socio-political factors in shaping these trajectories. While rich in detail and depth, this approach often faces challenges in scalability and systematic analysis across diverse conflict scenarios.
-
-### The Role of Quantitative Methods
-
-The emergence of computational tools has spurred a growing interest in quantitative approaches to conflict research. 
-These methods primarily focus on predicting the severity and outcomes of ongoing conflicts, with some success <d-cite key="beck2000improving"></d-cite>. However, the onset of conflicts remains challenging to predict, indicating a need for more sophisticated tools and methodologies. Quantitative methods provide scalability and a degree of objectivity but often fail to capture the complexities and evolving nature of conflicts. <d-cite key="goldstein1992conflict"></d-cite>'s work on a conflict-cooperation scale illustrates the difficulty in quantifying conflict dynamics and the controversy in creating aggregate time series from event data. <d-cite key="vesco2022united"></d-cite> highlight the importance of diverse, accurate predictions in conflict forecasting, noting the value of incorporating contextual variables to predict early signals of escalation.
-
-### Bridging the Gap with Explainable Modeling Approaches
-
-The challenge now lies in bridging the insights from qualitative research with the systematic, data-driven approaches of quantitative methods. While the former provides a deep understanding of conflict pathways, the latter offers tools for large-scale analysis and prediction. The key to unlocking this synergy lies in developing advanced computational methods to see the smoke before the fire – identifying the early precursors and subtle indicators of impending conflicts <d-cite key="vesco2022united"></d-cite>. This approach aligns with the evolving needs of conflict research, where traditional models may not adequately address the complex and non-linear nature of conflict data <d-cite key="weidmann2023recent"></d-cite>. <d-cite key="mueller2018reading"></d-cite> demonstrate the potential of utilizing newspaper text for predicting political violence, suggesting a novel data source for uncovering early conflict indicators. However, these early attempts are outdated given the fast technological development in recent years, particularly in the field of natural language processing. This research endeavour seeks to fill that gap and introduce a scalable, explainable method to quantitative conflict research.
-
-
-## Data
-
-The project capitalizes on the premise that risk factors triggering a conflict, including food crises, are frequently mentioned in on-the-ground news reports before being reflected in traditional risk indicators, which can often be incomplete, delayed, or outdated. 
-By harnessing newspaper articles as a key data source, this initiative aims to identify these causal precursors more timely and accurately than conventional methods. 
-
-### News Articles as Data Source
-
-News articles represent a valuable data source, particularly in research domains where timely and detailed information is crucial. In contrast to another "live" data source that currently revels in popularity amongst researchers - social media data - news articles are arguably less prone to unverified narratives. While news articles typically undergo editorial checks and balances, ensuring a certain level of reliability and credibility, they certainly do not withstand all potential biases and are to be handled with caution - as arguably every data source. To counteract potential biases of individual news outputs, accessing a diverse range of news sources is essential. Rather than having to scrape or otherwise collect data on news articles, there is a set of resources available:
-
-* [NewsAPI](https://newsapi.org/): This platform provides convenient access to a daily limit of 100 articles, offering diverse query options. Its integration with a Python library streamlines the process of data retrieval. However, the limitation lies in the relatively small number of data points it offers, potentially restricting the scope of analysis.
-
-* [GDELT Database](https://www.gdeltproject.org/): Renowned for its vast repository of historical information spanning several decades, GDELT stands as a comprehensive data source. Its extensive database is a significant asset, but similar to NewsAPI, it predominantly features article summaries or initial sentences rather than complete texts, which may limit the depth of analysis.
-
-* [Factiva](https://www.dowjones.com/professional/factiva/): A premium service that grants access to the complete bodies of articles from a plethora of global news sources in multiple languages. While offering an exhaustive depth of data, this resource comes with associated costs, which may be a consideration for budget-constrained projects.
-
-* [RealNews](https://paperswithcode.com/dataset/realnews): As a cost-free alternative, this dataset encompasses entire newspaper articles collated between 2016 and 2019. Selected for this project due to its unrestricted accessibility and comprehensive nature, it provides a substantial set of articles, making it a valuable resource for in-depth analysis.
-
-### Descriptive Analysis of the Data
-
-The analysis delved into a selected subset of **120,000 articles** from the [RealNews](https://paperswithcode.com/dataset/realnews) open-source dataset. This subset was chosen randomly to manage the extensive scope of the complete dataset within the project's time constraints. Each article in this subset provided a rich array of information, including **url**, **url_used**, **title**, **text**, **summary**, **authors**, **publish_date**, **domain**, **warc_date**, and **status**.
-
-The range of articles spans from 1869 to 2019, but for focused analysis, we narrowed the scope to articles from **January 2016 through March 2019**. This temporal delimitation resulted in a dataset comprising **58,867 articles**. These articles originated from an expansive pool of **493 distinct news outlets**, offering a broad perspective on global events and narratives. The distribution of these articles across the specified time frame provides the expected observation of increasing news reporting, as visualized below.
-
-{% include figure.html path="/assets/img/2023-11-09-conflict-causality/img4_articlecounts.png" class="img-fluid" %}
-<p align="center" style="color: white; font-style: italic; font-weight: bold;">Counts of Articles over Time</p>
-
-To understand the content of our dataset's news articles better, we utilized the *TfidfVectorizer*, a powerful tool that transforms text into a numerical representation, emphasizing key words based on their frequency and distinctiveness within the dataset. To ensure focus on the most relevant terms, we filtered out commonly used English stopwords. The *TfidfVectorizer* then generated a *tf-idf matrix*, assigning weights to words that reflect their importance in the overall dataset. By summing the Inverse Document Frequency (IDF) of each term, we obtained the adjusted frequencies that helped identify the most influential words in our corpus. To visually represent these findings, we created a word cloud (see below), where the size of each word correlates with its relative importance. 
-
-{% include figure.html path="/assets/img/2023-11-09-conflict-causality/img3_wordcloud.png" class="img-fluid" %}
-<p align="center" style="color: white; font-style: italic; font-weight: bold;">Word Cloud for Entire News Article Dataset (tf-idf adjusted)</p>
-
-## Methodology
-
-We showcase the applicability of a frame-semantic parsing to the study of conflicts and inform the model with domain-specific seed phrases identified through semantic similarity analysis. This approach not only demonstrates the effectiveness of the method in conflict studies but also showcases how domain-specific applications of deep learning tasks can be accurately applied and measured. Thus, we not only validate the utility of frame-semantic parsing in conflict analysis but also explore innovative ways to tailor and evaluate domain-specific performance metrics.
-
-### The Frame-Semantic Parser
-
-#### Contextualizing the Approach
-
-In the pursuit of bridging the gap between the robust theoretical understanding of conflict dynamics and the practical challenges in data availability, the frame-semantic parser emerges as a promising methodological tool. In a recent study (<d-cite key="balashankar2023predicting"></d-cite>), a team of researchers established a proof-of-concept via its successful application of a frame-semantic parser for the study of food insecurity - a field with similar challenges surrounding data access and quality. While this study relied on what can now be considered the "old state-of-the-art," our proposed approach diverges towards a more contemporary, transformer-based model, inspired by the advancements outlined in <d-cite key="chanin2023open"></d-cite>.
-
-{% include figure.html path="/assets/img/2023-11-09-conflict-causality/img2_parser.png" class="img-fluid" %}
-<p align="center" style="color: white; font-style: italic; font-weight: bold;">Retrieved from https://github.com/swabhs/open-sesame</p>
-
-
-#### How Does a Frame-Semantic Parser Work?
-
-At the heart of frame-semantic parsing, as conceptualized by <d-cite key="gildea2002frame"></d-cite> and formalized by the FrameNet project <d-cite key="baker1998framenet"></d-cite>, is the identification of structured semantic frames and their arguments from natural language text. As illustrated above, these frames encapsulate events, relations, or situations along with their participants, making it a critical tool in natural language understanding (NLU) tasks. The practical applications of frame semantics are broad, ranging from voice assistants and dialog systems <d-cite key="chen2013dialog"></d-cite> to complex text analysis <d-cite key="zhao2023text"></d-cite>.
-
-The process of frame-semantic parsing constitutes three subtasks:
-
-* **Trigger Identification**: This initial step involves pinpointing locations in a sentence that could potentially evoke a frame. It's a foundational task that sets the stage for more detailed analysis.
-
-* **Frame Classification**: Following trigger identification, each potential trigger is analyzed to classify the specific FrameNet frame it references. This task is facilitated by leveraging lexical units (LUs) from FrameNet, which provide a strong indication of potential frames.
-
-* **Argument Extraction**: The final task involves identifying the frame elements and their corresponding arguments within the text. This process adds depth to the frame by fleshing out its components and contextualizing its application within the sentence.
-
-While frame-semantic parsers have arguably not received as much attention as other language modeling methods, three major contributions of the past few years can be highlighted. <d-cite key="swayamdipta2017frame"></d-cite>'s approach - which is still outperforming many other implementations - presented an efficient parser with softmax-margin segmental RNNs and a syntactic scaffold. It demonstrates that syntax, while beneficial, is not a necessity for high-performance frame-semantic parsing. <d-cite key="kalyanpur2020open"></d-cite> explores the application of transformer-based architectures to frame semantic parsing, employing a multi-task learning approach that significantly improves upon previous state-of-the-art results. Most recently, <d-cite key="chanin2023open"></d-cite> developed the first open-source approach - treating frame semantic parsing as a sequence-to-sequence text generation task, utilizing a T5 transformer model. It emphasizes the importance of pretraining on related datasets and employing data augmentations for improved performance. The distinctive strength of a frame-semantic parser lies in its ability to contextualize information, rather than interpreting it in isolation. This feature is particularly invaluable in conflict analysis, where the semantics of discourse play a critical role. 
-
-#### Implementation of the Frame-Semantic Parser
-
-The implementation of our frame-semantic parser involves several key steps. We begin by splitting our text data into sentences using a *split_into_sentences* function. This granular approach allows us to focus on individual narrative elements within the articles and since frame-semantic parsers are reported to perform better on sentence-level <d-cite key="chanin2023open"></d-cite><d-cite key="swayamdipta2017frame"></d-cite>.
-
-In the heart of our methodology, we utilize various functions to extract and filter relevant frames from the text. Our *extract_features* function captures the full text of each frame element, ensuring a comprehensive analysis of the semantic content. The *filter_frames* function then refines this data, focusing on frames that are explicitly relevant to conflict, as informed by research on causal frames in FrameNet.
-
-To optimize the performance of our transformer-based parser, we build a *process_batch* function. This function handles batches of sentences, applying the frame semantic transformer model to detect and filter frames relevant to our study.
-
-Our approach also includes a careful selection of specific frames related to causality and conflict as we are interested in these frames and not just any. We rely on both manually identified frame names (informed by <d-cite key="vieu2016a"></d-cite><d-cite key="vieu2020a"></d-cite>) and pattern-based searches in **FrameNet** to compile a comprehensive list of relevant frames. This curated set of frames is instrumental in identifying the nuanced aspects of conflict narratives within the news articles.
-
-The implementation is designed to be efficient and scalable, processing large batches of sentences and extracting the most relevant semantic frames. This approach enables us to parse and analyze a substantial corpus of news articles, providing a rich dataset for our conflict analysis.
-
-### Seed Selection via Semantic Similarity Analysis to Inform Causal Modeling
-
-#### Understanding Semantic Similarity
-
-Semantic similarity plays a pivotal role in our methodology, serving as the foundation for expanding our understanding of how conflict is discussed in news articles. By exploring the semantic relationships between words and phrases, we can broaden our analysis to include a diverse array of expressions and viewpoints related to conflict. This expansion is not merely linguistic; it delves into the conceptual realms, uncovering varying narratives and perspectives that shape the discourse on conflict.
-
-#### How Do We Compute Semantic Similarity?
-
-To compute semantic similarity and refine our seed phrases, we employ a combination of distance calculation and cosine similarity measures. We begin with a set of initial key phrases **conflict**, **war**, and **battle**, ensuring they capture the core essence of our thematic domain. We then leverage pretrained word embeddings from the *Gensim* library to map these phrases into a high-dimensional semantic space. We also experimented with more sophisticated embedding approaches (like transformer-based) to compute the semantic similarity and thus obtain the seeds. When trading off complexity/time and performance the simpler pretrained *Gensim* model preservered.
-
-Our methodology involves generating candidate seeds from our corpus of documents, including unigrams, bigrams, and trigrams, with a focus on those containing key words related to conflict. We filter these candidates based on their presence in the word vectors vocabulary, ensuring relevance and coherence with our seed phrases.
-
-Using functions like *calculate_distances* and *calculate_cosine_similarity*, we measure the semantic proximity of these candidates to our initial seed phrases. This process involves averaging the distances or similarities across the seed phrases for each candidate, providing a nuanced understanding of their semantic relatedness.
-
-The candidates are then ranked based on their similarity scores, with the top candidates selected for further analysis. This refined set of seed phrases, after manual evaluation and cleaning, forms the basis of our domain-specific analysis, guiding the frame-semantic parsing process towards a more focused and relevant exploration of conflict narratives.
-
-### Domain-Specific Metrics
-
-In the final stage of our methodology, we integrate the identified seed phrases into the frame-semantic parser's analysis. By comparing the model's performance on a general set of sentences versus a subset containing at least one seed phrase, we assess the model's domain-specific efficacy. This comparison not only highlights the general capabilities of large language models (LLMs) but also underscores their potential limitations in domain-specific contexts.
-
-Our approach offers a pragmatic solution for researchers and practitioners in low-resource settings. We demonstrate that while general-purpose LLMs are powerful, they often require fine-tuning for specific domain applications. By utilizing identified domain-specific keywords to construct a tailored test dataset, users can evaluate the suitability of general LLMs for their specific needs.
-
-In cases where technical skills and resources allow, this domain-specific dataset can serve as an invaluable tool for further refining the model through data augmentation and fine-tuning. Our methodology, therefore, not only provides a robust framework for conflict analysis but also lays the groundwork for adaptable and efficient use of advanced NLP tools in various thematic domains.
-
-We present the results for these domain-specific measure for **F1 score**, **recall**, and **precisions**. Likewise, to illustrate performance differences across domains, we conducted the entire approach also for the finance domain, starting with the keywords **finance**, **banking**, and **economy**.
-
-
-## Findings & Insights
-
-### Frame-Semantic Parser Identifies Causal Frames Reliably
-
-In this stage, we assess if the methodology is truly applicable to the domain of conflicts and for the use with news article data. We find that of our 37 identified cause-effect related frames, all are represented with various instances in our dataset. In fact, as few as 1,600 randomly selected news articles (processed in 100 batches of 16 batch samples) suffice to cover all cause-effect related frames. Therefore, for this intermediate step of the project, we gather support that the parser is in-fact applicable to news article data.
-
-### Differences in Seed Phrase Selection
-
-We make one major observation between the results of the finance- versus conflict-specific seed selection for downstream use. Potentially driven by the fact that conflicts are drastically driven by geographic labels and information, a number of the top 50 seed phrases were geographic terms like "Afghanistan." Since we did not want to bias the downstream evaluation of our domain-specific metrics we excluded these seed phrases and continued the analysis with 34 seeds. In contrast, the top 50 finance-specific seed phrases obtained from the semantic analysis were neither geographic nor linked to individual (financial) historic events, wherefore we continued the downstream analysis with all top 50 seed phrases. Already here we can observe the deviances across domains, given more support to the idea of domain-specific evaluation and metrics.
-
-### Employing Domain-Specific Performance Metrics
-
-Our research involved an extensive evaluation of the frame-semantic parser, based on a transformer architecture, across various configurations and domain-specific datasets. We began by rebuilding and training the model using the vanilla code and a smaller model size without hyperparameter tuning. Subsequently, we fine-tuned the hyperparameters to match the baseline performance levels. After this, we move to one of the main contributions of this project: the domain-specific evaluation. The evaluation was carried out on domain-specific validation and test datasets, curated using seed words from **finance** and **conflict** domains to highlight differences across domains.
-
-The untuned model (*validation n = 646, test n = 1891*) showed an argument extraction **F1 score of 0.669** and a **loss of 0.181** on the validation set. On the test set, it presented a slightly similar **F1 score of 0.669** and a **loss of 0.227**.
-Hyperparameter-Tuned Performance
-
-Post hyperparameter tuning, there was a notable improvement in the model's validation performance (*n = 156*), with the **F1 score for frame classification reaching as high as 0.873**, and the **precision for trigger identification at 0.818**. The test metrics (*n = 195*) also showed consistent enhancement, with the **F1 score for frame classification at 0.864** and **trigger identification precision at 0.747**.
-
-When evaluated on domain-specific datasets, **the model exhibited varying degrees of effectiveness** which showcases our assumption that domains matter to the applicability of LLMs to domain-specific tasks and that our simple proposed way of generating domain-specific metrics can give insights on that. For the conflict keywords (*validation n = 121, test n = 255*), the model achieved a **validation F1 score of 0.865 for frame classification and 0.764 for trigger identification precision**. However, for the finance domain (*validation n = 121, test n = 255*), the **F1 score for frame classification was slightly higher at 0.878**, while the **trigger identification precision was lower at 0.781** compared to the conflict domain.
-
-The results indicate that the hyperparameter-tuned model significantly outperforms the vanilla model across all metrics. Additionally, domain-specific tuning appears to have a considerable impact on the model's performance, with the finance domain showing slightly better results in certain metrics compared to the conflict domain. These insights could be pivotal for further refinements and targeted applications of the frame-semantic parser in natural language processing tasks. Moreover, these observation fit our general understanding of the two domains. Reports on conflicts are likely to discuss the involved parties' reasons for specific actions like attacks on certain targets. Additionally, the actions in conflicts are arguably more **triggering** events than "the good old stable economy." Certainly, this research project can only be the beginning of a more rigorous assessment, but these findings show great promise of the idea of **generating and evaluating simple, domain-specific performance metrics**.
-
-{% include figure.html path="/assets/img/2023-11-09-conflict-causality/img5_performance.png" class="img-fluid" %}
-<p align="center" style="color: white; font-style: italic; font-weight: bold;">Performance Evaluation of Frame-Semantic Parser</p>
-
-## Conclusion & Limitations
-
-This project has embarked on an innovative journey, merging advanced natural language processing techniques with the intricate study of conflict. By harnessing the power of a transformer-based frame-semantic parser and integrating semantic similarity analysis, we have made significant strides in identifying causal relationships within news articles. This methodology has not only illuminated the dynamics of conflict as portrayed in media but also demonstrated the adaptability and potential of frame-semantic parsing in domain-specific applications.
-
-### Key Findings
-
-1. **Utility of Frame-Semantic Parsing**: Our work has showcased the frame-semantic parser as a valuable and explainable tool, particularly effective in data-scarce environments like conflict research. Its ability to contextualize information and discern nuanced semantic relationships makes it an indispensable asset in understanding complex thematic domains.
-
-2. **Semantic Similarity for Domain-Specific Perspective**: We illustrated the effectiveness of using semantic similarity to refine seed phrases, thereby tailoring the frame-semantic parser to the specific domain of conflict. This approach has proven to be a straightforward yet powerful means to customize advanced NLP models for targeted analysis.
-
-3. **Dependence on Domain for Model Performance**: Our findings highlight a significant insight: the performance of general-purpose language models can vary depending on the domain of application. This observation underscores the need for domain-specific tuning to achieve optimal results in specialized contexts.
-
-4. **Development of Domain-Specific Performance Metrics**: We proposed and validated a practical approach to developing domain-specific metrics, especially useful in resource-constrained environments. This methodology enables a nuanced evaluation of model performance tailored to specific thematic areas.
-
-### Limitations & Future Research
-
-Despite the promising results, our project is not without its limitations, which pave the way for future research opportunities:
-
-1. **Data Dependency**: The effectiveness of our approach is heavily reliant on the quality and diversity of the news article dataset. Biases in media reporting or limitations in the scope of articles can skew the analysis and affect the accuracy of the results. In an extended version of the project - and with funding - one could switch to the [Factiva](https://www.dowjones.com/professional/factiva/) dataset.
-
-2. **Applicability of Domain-Specificity to Other Themes**: While our method has shown efficacy in the context of conflict analysis, its applicability to other specific domains requires further exploration. Future research could test and refine our approach across various thematic areas to assess its broader utility.
-
-3. **Model Complexity and Interpretability**: While we have emphasized the explainability of the frame-semantic parser, the inherent complexity of transformer-based models can pose challenges in terms of scaling and deployment. Future work could focus on simplifying these models without compromising their performance - for instance via pruning and quantization.
-
-4. **Expansion of Semantic Similarity Techniques**: Our semantic similarity analysis was instrumental in refining seed phrases, but there is room for further enhancement. Incorporating more advanced semantic analysis techniques could yield even more precise and relevant seed phrases. While we found alternative methods, like BERT-based approaches to not yield significant improvements, ever more models flood the market.
-
-5. **Integration with Other Data Sources**: Expanding the dataset beyond news articles to include social media, governmental reports, or academic literature could provide a more holistic view of conflict narratives and their causal relations.
-
-In conclusion, our project represents a significant step forward in the intersection of natural language processing and conflict research. By addressing these limitations and building on our foundational work, future research can continue to push the boundaries of what is possible in this exciting and ever-evolving field.
-
diff --git a/_posts/2023-11-09-contrastivediffusion-image2video.md b/_posts/2023-11-09-contrastivediffusion-image2video.md
deleted file mode 100644
index 84260721..00000000
--- a/_posts/2023-11-09-contrastivediffusion-image2video.md
+++ /dev/null
@@ -1,128 +0,0 @@
----
-layout: distill
-title: Robust Image to Video Generation Using Contrastive Diffusion Over Latents
-description: Image-to-video (I2V) may be the next frontier of generative deep learning capabilities, but current models struggle with robustness, largely due to the implicit, rather than explicit, representation learning objective during traditional diffusion model training. Hence, we propose a new technique where a pre-trained contrastive model is used to train a diffusion model with a custom contrastive loss function to operate within a learned structured latent space for I2V problems, yielding, in theory, more structurally sound videos without loss of contextual information.
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Rishab Parthasarathy
-    affiliations:
-      name: MIT CSAIL
-  - name: Theo Jiang
-    affiliations:
-      name: MIT CSAIL
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-contrastivediffusion-image2video.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction and Motivation
-  - name: Related Work
-  - name: Proposed Project Outline
-  - name: Evaluation
-    subsections:
-      - name: Generation Quality
-      - name: Use of Contrastive Latent Space
-  - name: Implementation/Deliverables
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction and Motivation
-
-With recent advances in computer vision and generative AI, we all have observed the various feats that diffusive models have achieved in conditional image generation. These models have demonstrated unparalleled ability in creativity, fidelity, and relevance when generating images from text prompts. Given this explosive success of diffusion for the task of image generation, the idea of applying the same concepts to conditional video generation seems like a logical follow-up. Yet, the field still lacks robust and compelling methods for conditional video generation with diffusion models. This raises the question: why might this be? Or perhaps a follow-up: what makes videos so hard in comparison to images?
-
-In an attempt to address our first question, if we take a brief dive into previous literature, we will find that the issue is not a lack of effort. Ho et al. <d-cite key ="ho2022video"></d-cite>, Zhang et al. <d-cite key="2023i2vgenxl"></d-cite>, and Chen et al. <d-cite key = "chen2023videocrafter1"></d-cite>, all explore this idea, yet the results from these methods are not nearly as exciting as the results we see in images. But why is this? 
-
-Perhaps the answer lies in the solution to our second question. One of the most obvious complexities that videos have over images is also perhaps one of the most difficult: the temporal dependence between frames. But why is this relationship so hard for diffusion models? Following the work of Zhu et al. <d-cite key = "zhu2022discrete"></d-cite>, we hypothesize that this is because the implicit learning of diffusive steps between images in a video is too complex of a problem for a diffusion model; relying on the model to learn the implicit relationship between representations of video frames is infeasible from a training and convergence standpoint. If we can instead learn diffusive steps over a more regularized learned latent space, the optimization problem can be greatly simplified and the diffusion model will in theory be more robust.
-
-
-## Related Work
-
-Taking a step back to examine the current state of research, we find that current image-to-video frameworks typically still use a traditional diffusion architecture, going straight from text and image representations to an output image. However, this naive approach struggles with serious issues like frame clipping and loss of contextual information, which is expected since noise-based sampling can easily throw off the output of individual frames.
-
-Hence, Ho et al. in 2022 proposed the first solution, supplementing conditional sampling for generation with an adjusted denoising model that directly forces image latents to be more similar to the corresponding text latents <d-cite key ="ho2022video"></d-cite>. While this achieved improved results over the straightforward diffusion approach, this often forces the model to stick too closely to the text latent, resulting in incoherent videos. 
-
-To solve this issue, two recent approaches from Chen et al. and Zhang et al. have proposed methods to augment the video diffusion models themselves. Chen et al. uses the image encodings from CLIP-like language embeddings in an encoder-decoder language model, feeding the CLIP encodings at each step into a cross-attention layer that generates attention scores with the current video generation <d-cite key = "chen2023videocrafter1"></d-cite>. In doing so, additional coherence between frames is achieved. On the other hand, Zhang et al. use multiple encoders, with CLIP and VQ-GAN concatenated before two stages of diffusion model training, which they claim provides the hierarchical learning required to learn the temporal processing <d-cite key="2023i2vgenxl"></d-cite>. However, both these models are extremely data-heavy and still suffer from hallucination and frame skipping.
-
-To remedy these issues in diffusion models, Ouyang et al. and Zhu et al. posit that the implicit representation learning objective in diffusion models is the primary cause of the slow convergence and hallucination issues. Specifically, diffusion models do not directly compare their output to their input, as in contrastive models, instead performing a variational approximation of the negative log-likelihood loss over the full Markov chain. Instead, Ouyang and Zhu propose to train the diffusion model to output a structured latent in the latent space of a contrastive model like a VQ-VAE, which then reconstructs the output image <d-cite key = "zhu2022discrete"></d-cite> <d-cite key = "ouyang2023improving"></d-cite>. In doing so, a contrastive term can be added to the loss of the diffusion model, maximizing the mutual information between the structured (output) latent and input latent, leading to stronger correlations between input and output, and hence improved convergence. Hence, this approach seems to have potential in fixing the hallucination and coherence issues in video diffusion models, without the need for added complexity.
-
-
-## Proposed Project Outline
-
-Thus, we propose a novel method for conditional video generation (generating videos given a starting frame and text description) by utilizing an autoencoder framework and contrastive loss to train a regularized latent space in which a diffusion model can operate. Following the line of thought introduced above, we hypothesize that under such a formulation, the diffusion model is much more robust to temporal inconsistency, because of the regularity in the latent space. For example, if we imagine a highly regularized latent space, we will find all logical next frames for a given anchor frame clustered very closely around the anchor in this latent space. Therefore, any step the diffusion model takes would produce valid subsequent frames; it suffices simply for the model to learn which direction to go given the conditioned text prompt. 
-
-With this in mind, we detail the construction of the model by describing its components as follows:
-1. An encoder for image data is used to map a given video frame into our latent space
-1. An encoder for text data is used to map a given video description into our latent space
-1. A diffusion-based model operates within the latent space, diffusing between different vectors within this latent space.
-1. A decoder is used to generate images from vectors in this latent space.
-
-The training process of such a model will involve the optimization of a diffusion/contrastive loss based on a given pair of adjacent video frames, as well as the corresponding text description for that video. We define a training step to involve the following:
-1. Both video frames and the text description are encoded into our latent space.
-1. One iteration of our diffusive model is run by diffusing from the latent vector corresponding to our earlier frame conditioned on our text prompt latent to obtain a new latent vector.
-1. This new latent vector after cross-attention is passed through the decoder to obtain our predicted subsequent frame.
-1. We then optimize our model according to the contrastive diffusion model loss presented by <d-cite key = "zhu2022discrete"></d-cite> with a key alteration: we replace their contrastive loss with our contrastive loss, which contains two terms:
-    1. a term that aims to push our two adjacent video frames closer together in our latent space and
-    2. a term that aims to push video frames closer to the text description in our latent space.
-
-During inference, we generate a video through the following process:
-1. An initial frame and the text description are encoded into our latent space
-1. We run an arbitrary number of diffusive steps, generating a latent at each step.
-1. We decode the latent at each time step to obtain our video frame at that time step; stringing these frames together produces our video.
-
-From a more theoretical perspective, this method essentially aims to restrict the diffusion model’s flexibility to paths within a highly regularized, lower dimensional latent space, as opposed to the entire space of images that classical diffusion-based approaches can diffuse over. Such a restriction makes it much harder for the diffusion model to produce non-sensible output; the development of such a method would therefore enable the robust generation of highly temporally consistent and thus smooth videos. We also imagine the value of producing such a latent space itself. An interesting exercise, for example, is taking an arbitrary continuous path along vectors within a perfectly regular latent space to obtain sensible videos at arbitrary framerates.
-
-
-## Evaluation
-
-There are two axes along which we wish to evaluate our model: quality of generation, and quality of the contrastive latent space.
-
-### Generation Quality
-
-To measure generation quality, we follow the approach presented by Ho et al., evaluating famous metrics like the FID, FVD, and IS scores. For all of these metrics, we expect to evaluate them throughout the video from beginning to end, with the level of preservation of metric values throughout a video indicating consistent video quality. Similarly, we will compare our models to those of similar size using the same metrics to evaluate whether adding the contrastive loss term truly improves generation quality. These metrics will be supplemented with qualitative human analyses, where we will score the videos on a variety of axes including coherence and relevance to the prompt.
-
-### Use of Contrastive Latent Space
-
-Given that the diffusion model now maps to a much smaller latent space when compared to the whole space of output images, we believe that the diffusion output should have interpretable representations in the latent space. Hence, we will begin by exploring the latents generated by different text prompts, clustering them around the image source encodings to evaluate if the contrastive loss has truly clustered appropriately. On top of that, we plan to visualize the trajectories of videos for both the training set and our generations, to evaluate our theory of continuous trajectory evolution in the latent space.
-
-## Implementation/Deliverables
-
-The implementation of such a method can be greatly simplified through the use of an existing codebase. We plan on using the contrastive diffusion model [GitHub repository](https://github.com/L-YeZhu/CDCD/tree/main) for the implementation of <d-cite key = "zhu2022discrete"></d-cite> with a few key modifications:
-- We use a pre-trained contrastive model as our starting point (such as an image encoder/decoder from CLIP) <d-cite key = "Radford2021LearningTV"></d-cite>
-- The diffusion model is trained to predict the next frame of a video conditioned on a given text description of the video and the current frame of the video as above.
-- Our contrastive loss is used as described above.
-- Inference is modified to generate a video as described above.
-
-Data for this project requires video/text description pairs. There are a few datasets consisting of such data, including the [MSR-VTT dataset](https://www.kaggle.com/datasets/vishnutheepb/msrvtt), which is human-annotated, and the [InternVid dataset](https://github.com/OpenGVLab/InternVideo/tree/main/Data/InternVid), which is annotated by LLMs. 
-
-The project should be feasible to complete within the remaining time in the semester, with a rough timeline of deliverables as follows:
-- **Implementation** of our method by applying the specified modifications to the existing codebase should take around 1-2 weeks.
-- **Training** of the models on cloud computing resources should take <1 week.
-- **Evaluation and benchmarking** along with data visualization should take 1 week, even with the potential need for retraining our models.
-- **Blog writing** should take <1 week and can be completed in parallel with evaluation and benchmarking.
\ No newline at end of file
diff --git a/_posts/2023-11-09-deep-connectome-clustering.md b/_posts/2023-11-09-deep-connectome-clustering.md
deleted file mode 100644
index d5737f00..00000000
--- a/_posts/2023-11-09-deep-connectome-clustering.md
+++ /dev/null
@@ -1,186 +0,0 @@
----
-layout: distill
-title: VGAE Clustering of the Fruit Fly Connectome
-description: An exploration of how learned Variational Graph Auto-Encoder (VGAE) embeddings compare to 
-    Spectral Embeddings to determine the function of neurons in the fruit fly brain.
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Max Filter
-    affiliations: 
-      name: MIT
-  - name: Eric Liu
-    affiliations: 
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-deep-connectome-clustering.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Motivation
-  - name: Background
-  - name: Methods
-  - name: Experiments 
-  - name: Discussion
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
----
-
-## Motivation
-
-{% include figure.html path="assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome.png" class="img-fluid" %}
-<div class="caption">
-    The fruit fly connectome.<d-cite key="winding2023connectome"></d-cite>
-</div>
-
-Everything you've ever learned, every memory you have, and every behavior that defines you is stored somewhere in the neurons and synapses of your big, beautiful brain. The emerging field of connectomics seeks to build connectomes–or neuron graphs–that map the connections between all neurons in the brains of increasingly complex animals, with the goal of leveraging graph structure to gain insights into the functions of specific neurons, and eventually the behaviors that emerge from their interactions. This, as you can imagine, is quite a difficult task, but progress over the last few years has been promising.
-
-Now, you might be asking yourself, can you really predict the functions of neurons based on their neighbors in the connectome? A paper published by Yan et al. in 2017<d-cite key="yan2017network"></d-cite> asked this same question, searching for an answer in a roundworm (C. elegans) connectome. In their investigation, they discovered a neuron whose behavior had not been previously characterized, which they hypothesized was necessary for locomotion. They tested this hypothesis by ablating the neuron on a living C. elegans, and to the dismay of that poor roundworm, found that it was indeed necessary.
-
-Although impressive, the C. elegans connectome has only ~300 neurons, compared with the ~100,000,000,000 in the human brain; however, this year (2023):
-
-1. A paper by Winding et al.<d-cite key="winding2023connectome"></d-cite> has published the entire connectome of a fruit fly larvae, identifying 3016 neurons and their 548,000 synapses.
-2. Google Research has announced an effort to map a mouse brain (~100,000,000 neurons)<d-cite key="januszewski2023google"></d-cite>
-
-This is exciting because the fruit fly dataset presents an opportunity to identify more nuanced functions of neurons that may be present in more complex species like mice, but not in simpler species like the roundworm. This creates the requirement for algorithms that are **sufficiently expressive** and able to disentangle the similarities between neurons that appear different, but are functionally similar. 
-
-Furthermore, current efforts to map connectomes of increasingly complex animals makes it desirable to have algorithms that are **able to scale** and handle that additional complexity, with the hopes of one day discovering the algorithms that give rise to consciousness. 
-
-## Background
-
-### Can we learn about human brains by studying connectomes of simpler organisms? 
-
-The primate brain exhibits a surprising degree of specialization, particularly for social objects. For instance, neurons in the face fusiform area (FFA) in the IT cortex appear to fire only in response to faces. Furthermore, individuals with lesions in or brain damage to this area lose their ability to recognize faces <d-cite key="kanwisher_fusiform_2006"></d-cite>. In fact, there is even evidence of rudimentary face perception even in newborn infants with limited access to visual “training data,” who preferentially look at photos of faces, and other face-like arrangements, like inverted triangles (two vertices being the eyes and the third the mouth) <d-cite key="otsuka_face_2014"></d-cite>. While there may not exist a grandmother cell that can recognize your grandmother, there certainly seems to be at least some engineered specialization in the brain. Cognitive scientists theorize that there is a set of core systems for representing object, actions, number, space, and conspecifics (other people!), together constituting what we might call “common sense,” which may help determine the blueprint of the human brain down to the genetic level <d-cite key="spelke_core_2007"></d-cite>. Notably, facial recognition exhibits substantial genetic heritability (over 60%!) and appears to be uncorrelated with general intelligence <d-cite key="shakeshaft_genetic_2015"></d-cite>. We might imagine that there are a set of capabilities, including social cognition, that were so critical for human behavior that our brains evolved over hundreds of thousands of years to “hard code” certain structures, like the FFA, to help scaffold them. After all, another person’s face is an important signal for processes like mate selection, friendship formation, and theory of mind. The human brain and the cognitive processes it supports are evolutionary products. And even more importantly, the brain seems to be specialized in some ways, but behave flexibly in others. Through the scientific process, how good of an understanding can we reach about the complex organ sitting between our ears? To what degree are the neuronal assemblages in our brain specialized? How do the communications amongst these neurons grant us our incredible cognitive capabilities?
-
-In 1982, neuroscientist David Marr proposed three levels of analyses to study complex systems like the human mind: the computational level (what task is the system designed to solve?), the algorithmic level (how does the system solve it?), and the implementation level (where and how is the algorithm implemented in the system hardware?) <d-cite key="mcclamrock_marrs_1991"></d-cite>. At one end of the spectrum, we might think about characterizing the computational capabilities of human cognition, like object recognition. On the other end, we might be interested in how object recognition is implemented in the brain itself, in all of its fleshy glory–how an incoming visual signal is processed by composites of receptive fields in the retina (biological “Gabor filters”) and fed to neurons in the primary and secondary visual areas of the cerebral cortex, for instance <d-cite key="leeds_comparing_2013"></d-cite>. In recent years, scientists have developed an interest in understanding the implementation level at an extremely high resolution by charting the connectome–the comprehensive map of all neural connections in the brain. However, if the grandmother cell is too simplistic of a model for knowledge representation in the human brain, then indeed the human connectome may offer an overly complex view. It seems easy to get lost in the wilderness of its approximately 100 trillion neurons and the nearly quadrillion synapses which connect them <d-cite key="sporns_human_2005"></d-cite>! How can we begin to approach this overwhelming terra incognita?
-
-We might consider instead studying the connectome of a much simpler model organism, like the transparent 1mm-long nematode Caenorhabditis elegans, with whom we share an estimated 20-71% of our genes with <d-cite key="lai_identification_2000"></d-cite>. Or, maybe even the fruit fly Drosophila melanogaster, 60% of whose genes can also be found in the human genome (Max Planck). Even the study of such model organisms necessitates adding structure to complex, often unlabeled, relational data. And while the fruit fly brain is orders of magnitude less complex than our own, there are still over 3,000 neurons and half a million synapses to explore <d-cite key="winding2023connectome"></d-cite>(Winding et al., 2023). Luckily, mankind’s toolkit for studying graph-like data is well-equipped.
-
-### Unsupervised graph representation learning
-
-The problem of subdividing neurons in a connectome into types based on their synaptic connectivity is a problem of unsupervised graph representation learning, which seeks to find a low-dimensional embedding of nodes in a graph such that similar neurons are close together in the embedding space.
-
-A common way to identify functional clusters of neurons is through the lens of homophily, meaning that neurons serve the same function if they are within the same densely connected cluster in the connectome; however, this fails to capture the likely case that neurons with similar low-level functions span across many regions of the brain<d-cite key="winding2023connectome"></d-cite>. 
-
-Instead, a better approach might be to cluster neurons based on their structural equivalence, such that groups of neurons with similar subgraph structures are embedded similarly, regardless of their absolute location in the connectome. This is the approach taken by Winding et al.<d-cite key="winding2023connectome"></d-cite>, who "used graph spectral embedding to hierarchically cluster neurons based on synaptic connectivity into 93 neuron types". They found that even though they used only information about the graph structure to predict functions, neurons in the same clusters ended up sharing other similarities, including morphology and known function in some cases.
-
-Spectral embedding is a popular and general machine learning approach that uses spectral decomposition to perform a nonlinear dimensionality reduction of a graph dataset, and works well in practice. Deep learning, however, appears to be particularly well suited to identifying better representations in the field of biology (e.g., AlphaFold2<d-cite key="jumper2021highly"></d-cite>), and deep learning methods do appear to be capable of creating embeddings that more effectively preserve the topology of nodes in graphs<d-cite key="zhu2023unsupervised"></d-cite><d-cite key="kipf2016variational"></d-cite>. 
-
-{% include figure.html path="assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding.png" class="img-fluid" %}
-<div class="caption">
-    Learned VGAE graph embedding for Cora citation network dataset.<d-cite key="kipf2016variational"></d-cite>
-</div>
-
-Thus, it stands to reason that deep learning might offer more insights into the functions of neurons in the fruit fly connectome, or at the very least, that exploring the differences between the spectral embedding found by Winding et al. and the embeddings discovered by deep learning methods might provide intuition as to how the methods differ on real datasets.
-
-In this project, we explore the differences between functional neuron clusters in the fruit fly connectome identified via spectral embedding by Winding et al. and deep learning. Specifically, we are interested in exploring how spectral embedding clusters differ from embeddings learned by Variational Graph Auto-Encooders (GVAE)<d-cite key="kipf2016variational"></d-cite>, which are a more recent architecture proposed by one of the co-authors of the Variational Auto-Encoders (VAE) paper<d-cite key="kingma2013auto"></d-cite>, Max Welling. GVAEs are an interesting intersection of graph neural networks (GNNs) and VAEs, both of which we explored in class, and comparing this technique to spectral embedding is relevant because of our previous discussions of spectral decomposition in class with respect to network scalability and RNN weights. 
-
-We hypothesize that a deep learning technique would be better suited to learning graph embeddings of connectomes because they are able to incorporate additional information about neurons (such as the neurotransmitters released at synapses between neurons) and are able to learn a nonlinear embedding space that more accurately represents the topological structure of that particular connectome, learning to weight the connections between some neurons above others.
-
-Before we can discuss the experiments, however, we first provide more detail for Spectral Embedding and Graph Variational Autoencoders and compare the two methods.
-
-## Methods
-
-### Spectral Embedding
-
-One classical approach for understanding graph-like data comes from a class of spectral methods which use pairwise distance measures between data points to embed and cluster data. Spectral methods offer two obvious advantages when compared to other machine learning approaches. One, we can straightforwardly perform clustering for datasets which are inherently relational, like the connectome, where it is not immediately clear how a method like k-means can be used when we only have access to the relationships between data points (the “edges”) and not the node-level features themselves. Two, spectral methods are **nonlinear**, and don’t rely on measures like squared Euclidean distance, which can be misleading for data which are tangled in high dimensions, but which exhibit a lower **intrinsic** dimensionality.
-
-So, how does spectral embedding work, exactly? In short, an adjacency matrix is first calculated from the original dataset, which is then used to compute the graph Laplacian. Next, a normalized graph Laplacian is then eigen-decomposed and generates a lower dimensional embedding space on which simpler linear clustering algorithms, like k-means, can be used to identify untangled clusters of the original data.
-
-This class of methods makes no assumptions about the data (including cluster shape) and can be adjusted to be less noise sensitive–for example, by performing a t-step random walk across the affinity matrix for the data, as in diffusion mapping <d-cite key="coifman_geometric_2005"></d-cite>. An added benefit is that under the hood, spectral embedding can be performed by a series of linear algebra calculations, making it extremely time-efficient. However, as with many unsupervised learning methods, clustering based on spectral embeddings is difficult to scale–in our case, due to the eigen-decomposition step of the graph Laplacian.
-
-
-### Variational Graph Autoencoders
-
-Although Spectral Embedding is still very popular, in recent years, more attention has been paid to the burgeoning field of geometric deep learning, a set of ideas which aim to to solve prediction or embedding tasks by taking into account the relational structure between data points. One example is the variational graph auto-encoder (VGAE), which learns to embed a complex object like a network into a low-dimensional, well-behaved latent space. Kipf and Welling (2016)<d-cite key="kipf2016variational"></d-cite> propose an encoder using a two-layer graph convolutional network, which performs convolutions across local subgraphs of the input network data (not unlike convolution on images, where the graph is a grid!). The graph is projected onto a low dimensional space distributed according to the standard normal through the optimization of a variational lower bound loss, and then upsampled using an inner product between latent variables. They show that this method achieves competitive results on a link prediction task when compared to other methods like spectral clustering and DeepWalk, a random walk-based representation learning algorithm. 
-
-On the other hand, some have discovered that spectral embedding leads to more clear separability in low dimensional representation spaces for text data compared to GNN approaches like node2vec, which reportedly achieve state-of-the-art (sota) scores for multilabel classification and link prediction in other datasets <d-cite key="grover_node2vec_2016"></d-cite><d-cite key="palamuttam_evaluating_nodate"></d-cite>. In addition, it appears that simple modifications like performing an error correlation correction on the training data and smoothing predictions on the test data for GNN-free architectures lead to sota-comparable performances <d-cite key="huang_combining_2020"></d-cite>. There are even concerns that the performance of geometric deep learning approaches are inflated, particularly in tasks like multi-label node classification, due to the assumption that the number of labels for test data are known to researchers <d-cite key="lin_use_2021"></d-cite>. 
-
-Thus, it remains unclear in what circumstances relatively novel geometric deep learning approaches do better compared to established and widely-explored methods like spectral learning, and particularly for novel data like the connectome. In this work, we attempt to gain deeper insights into which method is moroe well-suited to the task of connectome modeling, with the hope of learning about which method should be implemented in future connectomes, such as that of the mouse and eventually the human.
-
-{% include figure.html path="assets/img/2023-11-09-deep-connectome-clustering/background_visual.jpg" class="img-fluid" %}
-<div class="caption">
-    Spectral Clustering (von Luxburg, 2007; Park, Jeon, & Pedryc, 2014) vs (V)GAEs (Kipf & Welling, 2016): A Story in Pictures 
-</div>
-
-## Experiments
-
-Now that we have a good idea of how these methods compare to each other in terms of implementation, we explore them from an experimental perspective. Through our experiments, we try to quantitatively and qualitatively address the question of how connectome clusters learned by GVAE compare to the spectral clusters found in the paper. To answer this question, we make use of the fruit fly connectome adjacency matrix provided by Winding et al. as our primary dataset with the hope of answering this question for our readers.
-
-### Experiment 1: Link Prediction
-
-One common way to compare unsupervised graph representation learning algorithms is through a link prediction task, where a model is trained on a subset of the edges of a graph, and then must correctly predict the existence (or non-existence) of edges provided in a test set. If the model has learned a good, compressed representation of the underlying graph data structure, then it will be able to accurately predict both where missing test edges belong, and where they do not.
-
-{% include figure.html path="assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task.png" class="img-fluid" %}
-<div class="caption">
-    A link prediction task. Green lines correspond to the training data, which contains samples of positive samples of edges that are present in the graph, and negative samples of edges that are not present in the graph. The test set in red corresponds to the remainder of positive and negative samples in the graph.
-</div>
-
-We evaluate the models by computing the area under curve (AUC) of the ROC curve, which plots the true positive rate against the false positive rate. A completely random classifier that does not learn anything about the underlying graph structure would get an AUC of 0.5, while a perfect classifier would have an area of 1.0.
-
-Another metric we use to evaluate how good the models are is average precision (AP) of the precision-recall curve, which describes the consistency of the model. 
-
-In addition to comparing the models with these metrics, we also explore how robust they are to decreasing dimensionalities of the latent space. We hypothesize that if a model is able to maintain high AUC and AP, even at very low-dimensional embedding spaces, then it is likely better at capturing the structure of the connectome and is more likely to be able to scale to larger datasets, like that of the human brain one day. 
-
-Running this experiment yields the following curves, where the x-axis shows the dimensionality of the latent space, and the y-axis shows the AUCs and APs of the respective models.
-
-{% include figure.html path="assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap.png" class="img-fluid" %}
-
-From this experiment, we find that both the Graph Autoencoder (GAE) and Variational Graph Autoencoder (VGAE) perform better than Spectral Embedding methods in terms of AUC and AP, indicating that the models might be better suited to capturing the nuances in the fruit fly connectome. At the dimensionality used for spectral embedding in Winding et al., d=24, we find that the models have comparable performance, but as we reduce the dimensionality of the learned embedding, the spectral embedding method quickly breaks down and loses its ability to capture significant features in the data, with an AUC of 0.52 at a dimensionality of 2. Since a score of 0.5 corresponds to a random model, this means that the spectral embedding method is no longer able to capture any meaningful structure in the data at that dimensionality. Winding et al. gets around this by only using spectral embedding to get a latent space of size 24, and then performing a hierarchical clustering algorithm inspired by Gaussian Mixture Models, but the simplicity and robustness of the GAE model seems to show that they may be better suited to modeling the types of functional neurons present in the connectomes of animals.
-
-### Experiment 2: GVAE Latent Exploration
-
-Although the link-prediction experiment gives us a quantitative comparison of the models, we also believe it is important to explore the latent embeddings learned by GAE to see how they qualitatively compare with the learned embeddings used in the Winding et al. work. After observing that the GAE was robust to a latent space of size 2, we decided to look specifically at if there were any similarities between the clusters found by the GAE with the 2-d embedding and the level 7 clusters published by Winding et. al. Also, although the GAE showed better overall performance, we decided to specifically explore the Variational GAE because we expect it to have a latent manifold similar to that of the Variational Autoencoders. 
-
-To this end, we first trained a Variational GAE with a 2-d latent space on the full fruit fly connectome and extracted the latent embedding of each node in the connectome. 
-
-With this latent embedding, we first visualized the latent space using colors corresponding to the 93 clusters identified by Winding et al. Clusters of the same color in the learned GAE latent space mean that the VGAE identified the same cluster that was identified in the Winding et. al. paper and areas where there are many colors within a cluster mean that GAE found a different cluster compared to spectral embedding. 
-
-{% include figure.html path="assets/img/2023-11-09-deep-connectome-clustering/explore_cluster.png" class="img-fluid" %}
-<div class="caption">
-  Coloring the GVAE latent space by the found level 7 clusters Winding et al. Black points correspond to neurons that were not assigned a cluster by Winding et al. 
-</div>
-
-As seen in the figure above, we find that while VGAE projects directly to a 2-d latent space without any additional clustering to reduce the dimensionality, the learned embedding still shares many similarities with the spectral embedding down to a dimensionality of 24 followed by Gaussian Mixture Model hierarchical clustering. Therefore, using VGAE to learn a direct 2-d latent space still captures much of the same information that a more complex machine learning algorithm like spectral embedding is able to.
-
-We further explored the learned latent space by looking at whether the learned embedding had any correlation with the cell types identified in the fruit fly larvae connectome. Since the VGAE only had information about the structure of the graph embedding, clusters of similar colors in this figure mean that the cell type within the cluster shared a lot of common structures, like potentially the same degree or being connected to similar types of up or downstream neurons.
-
-We use the same color palette as the Winding et al. paper so that cell types in the level 7 clusters of the Winding et al. paper can be directly compared to the learned VGAE latent embedding.
-
-{% include figure.html path="assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type.png" class="img-fluid" %}
-<div class="caption">
-  Coloring the Winding et al. level 7 clusters (left) and GVAE latent space (right) by cell types. This information was not provided to either algorithm during training, so clusters of the same cell type mean that its type can be inferred from structure only.
-</div>
-
-As seen in the figure above, both spectral embedding and GVAE latent spaces capture knowledge about the cell types when trained purely on the graph structure. We believe this is because cells of this type have similar properties in terms of the types of neighboring neurons they connect to in the connectome, and they may also have special properties like higher degree of connections.
-
-In particular, it is interesting that sensory neurons and Kenyon cells are very well captured by both embeddings, and that MBIN cells and sensory neurons are clustered together by both their spectral embedding algorithm and VGAE. 
-
-## Discussion
-
-Our preliminary investigations show that deep learning algorithms such as Graph Autoencoders (GAEs) and Variational Graph Autoencoders (VGAEs) are able to capture at least as much nuance and information about function as spectral embedding algorithms. In addition, they come with the following advangates:
-
-1. With their current implementation, they can easily be run on a GPU, while common spectral embedding algorithms in libraries such as scikit learn are only designed to work on CPUs. Since we take a deep learning approach, our GNN method can use batches optimized via Adam, while spectral embedding only works if the entire adjacency matrix fits in memoruy. This makes deep learning methods **better able to scale to larger datasets** such as the mouse connectome that may come in the next few years. 
-2. As shown in experiment 2, GAEs and Variational GAEs are **able to directly learn a robust embedding into a 2-d space** without any additional clustering, making interpretation easy and fast. We suspect that because of its higher performance at embedding connectomes to such low dimensions compared to spectral embedding which performs only marginally better than a random algorithm at such low dimensions, VGAEs must be capturing some addiitonal nuance of the graph structures that spectral embedding is simply not able to encode.
-3. Comparing the 2-d embeddings of VGAE to the clustered 24-d spectral embeddings found in Winding et al. we find that even when compressing to such a low-dimensional space, the semantic information captured does in fact match that of spectral embedding at a higher dimensional space. Coloring by cell type shows that it also **captures information about the function of neurons**, with similar neuron types being clustered together even when they are located all over the brain, such as Kenyon cells. Cells of the same type likely serve simlar functions, so in this respect, VGAE is able to capture information about the function of cells using only knowledge of the graph structure.
-
-However, VGAE does not come without its **limitations**. One large limitation we found while implementing the architecture is that it currently requires graphs to be **undirected**, so we had to remove information about the direction of neurons for this work. Connectomes are inherently directed, so we likely missed some key information about the function of graphs by removing this directional nature of the connectome. Although this is not explored in our work, one simple way to fix this would be to add features to each node corresponding to the in-degree and out-degree of each neuron.
-
-This brings us to the another limitation of our study, which is that we did not explore **adding features to neurons** in our connectome with the VGAE algorithm. Past work on GAEs has shown that adding features leads to better model results <d-cite key="kipf2016variational"></d-cite> and makes the model better able to capture relevant structures in the data. We did not feel that would be a fair comparison with Winding et al. because spectral embedding methods are not able to include additional features related to nodes that one would get for free when mapping the connectome, but we believe that including these features in the GAE structure would lead to an even better representation of the underlying dataset. Examples of these "free" features we could get that would help us predict functions of neurons include 1) the hemisphere the cell belongs to (e.g., not in fruit flies, but neurons in the left brain of humans correspond to language), 2) the axon I/O ratio, and the dendrite output-input ratio of a neuron.
-
-One final limiation is that our **model only trains on a single connectome**. This means that we aren't able to capture the variation of connectomes within a species. Maybe one day, we will be able to scan connectomes of people in the same way that we are able to scan genomes of people, but that day is likely still far away. We might be able to help this by using the generative compoment of the VGAE to create brains that are physically feasible given the structure of a single connectome, but it would be hard to test. Since we are currently only looking at the connectome of a single species, we likely aren't capturing an embedding space that finds functionally similar neurons in different animals such as C. elegans, which we may be able to do in future work.
-
-## Conclusion
-
-In this work, we asked if Deep Learning techniques like Variational Graph Autoencoders could learn something about the functions of cells in a connectome using only the graph structure. We found that VGAE did in fact capture relevant structures of the graph, even in the undirected case. It performed similarly to spectral embeding, even when embedding directly into a visualizable 2-d latent space. In the future, we may be able to learn about neurons that serve the same purpose across species, or learn about the underlying low level syntactic structures like for-loops or data types that our brain uses to encode consciousness, vision, and more.
\ No newline at end of file
diff --git a/_posts/2023-11-09-dof-visual-place-recognition-satellite.md b/_posts/2023-11-09-dof-visual-place-recognition-satellite.md
deleted file mode 100644
index 021d70bb..00000000
--- a/_posts/2023-11-09-dof-visual-place-recognition-satellite.md
+++ /dev/null
@@ -1,301 +0,0 @@
----
-layout: distill
-title: 6-DOF estimation through visual place recognition
-description: A neural pose-estimation solution is implemented, which could help an agent with a downward-facing camera (such as a drone) to geolocate based on prior satellite imagery of terrain. The neural encoder infers extrinsic camera parameters from camera images, enabling estimation of 6 degrees of freedom (6-DOF), namely 3-space position and orientation. By encoding priors about satellite imagery in a neural network, the need for the agent to carry a satellite imagery dataset onboard is avoided.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Andrew Feldman
-    url: "https://andrew-feldman.com/"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-dof-visual-place-recognition-satellite.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Background
-#  - name: Images and Figures
-#    subsections:
-#    - name: Interactive Figures
-  - name: Proposed solution
-    subsections:
-    - name: Image-to-extrinsics encoder architecture
-    - name: Data sources for offline training
-    - name: Training and evaluation
-      subsections:
-      - name: Data pipeline
-      - name: Training
-      - name: Hyperparameters
-      - name: Evaluation
-    - name: Implementation
-      subsections:
-      - Source image
-      - Dataloader
-      - DNN architecture
-      - Training setup
-    - name: Training results
-    - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-# Introduction
-
-The goal of this project is to demonstrate how a drone or other platform with a downward-facing camera could perform approximate geolocation using a neural scene representation of existing satellite imagery. Note that the use of the term "Visual Place Recognition" in the title is a carryover from the proposal, but no longer applies to this project. Rather, the goal of this project is to implement 6-DOF pose-estimation.
-
-Pose estimation <d-cite key="xiang2018posecnn"></d-cite> can refer to the ability of an agent to determine its 3D position and orientation based on visual or other sensor info.
-
-In this work, the goal is to compress the ground-truth image data into a neural model which maps live camera footage to geolocation coordinates.
-
-Twitter user Stephan Sturges demonstrates his solution<d-cite key="Sturges_2023"></d-cite> for allowing a drone with a downward-facing camera to geolocate through cross-referencing against a database of satellite images:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr.jpeg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Twitter user Stephan Sturges shows the results<d-cite key="Sturges_2023"></d-cite> of geolocation, purportedly based on Visual Place Recognition.
-</div>
-
-The author of the above tweet employs a reference database of images. It would be interesting to eliminate the need for a raw dataset. Whereas the author employs Visual Place Recognition, here I employ pose estimation techniques. Thus I do not seek to estimate predict place *labels*, but rather geolocated place *coordinates* for the camera, as well as the camera's orientation.
-
-Thus, this works seeks to develop a neural network which maps a terrain image from the agent's downward-facing camera, to a 6-DOF (position/rotation) representation of the agent in 3-space.
-
-# Background
-
-The goal-statement - relating a camera image to a location and orientation in the world - has been deeply studied in computer vision and rendering<d-cite key="Anwar_2022"></d-cite>:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Camera parameters, as described in<d-cite key="Anwar_2022"></d-cite>.
-</div>
-
-Formally<d-cite key="Anwar_2022"></d-cite>,
-* The image-formation problem is modeled as a camera forming an image of the world using a planar sensor.
-* **World coordinates** refer to 3-space coordinates in the Earth or world reference frame.
-* **Image coordinates** refer to 2-space planar coordinates in the camera image plane.
-* **Pixel coordinates** refer to 2-space coordinates in the final image output from the image sensor, taking into account any translation or skew of pixel coordinates with respect to the image coordinates.
-
-The mapping from world coordinates to pixel coordinates is framed as two composed transformations, described as sets of parameters<d-cite key="Anwar_2022"></d-cite>:
-* **Extrinsic camera parameters** - the transformation from world coordinates to image coordinates (affected by factors "extrinsic" to the camera internals, i.e. position and orientation.)
-* **Intrinsic camera parameters** - the transformation from image coordinates to pixel coordinates (affected by factors "intrinsic" to the camera's design.)
-
-And so broadly speaking, this work strives to design a neural network that can map from an image (taken by the agent's downward-facing camera) to camera parameters of the agent's camera. With camera parameters in hand, geolocation parameters automatically drop out from extracting extrinsic translation parameters.
-
-To simplify the task, assume that camera intrinsic characteristics are consistent from image to image, and thus could easily be calibrated out in any application use-case. Therefore, this work focuses on inferring **extrinsic camera parameters** from an image. We assume that pixels map directly into image space.
-
-The structure of extrinsic camera parameters is as follows<d-cite key="Anwar_2022"></d-cite>:
-
-$$
-\mathbf{E}_{4 \times 4} = \begin{bmatrix} \mathbf{R}_{3 \times 3} & \mathbf{t}_{3 \times 1} \\ \mathbf{0}_{1 \times 3} & 1 \end{bmatrix}
-$$
-
-where $$\mathbf{R}_{3 \times 3} \in \mathbb{R^{3 \times 3}}$$ is rotation matrix representing the rotation from the world reference frame to the camera reference frame, and $$\mathbf{t}_{3 \times 1} \in \mathbb{R^{3 \times 1}}$$ represents a translation vector from the world origin to the image/camera origin.
-
-Then the image coordinates (a.k.a. camera coordinates) $$P_c$$ of a world point $$P_w$$ can be computed as<d-cite key="Anwar_2022"></d-cite>:
-
-$$
-\mathbf{P_c} = \mathbf{E}_{4 \times 4} \cdot \mathbf{P_w}
-$$
-
-# Proposed solution
-
-## Image-to-extrinsics encoder architecture
-
-The goal of this work, is to train a neural network which maps an image drawn from $$R^{3 \times S \times S}$$ (where $$S$$ is pixel side-length of an image matrix) to a pair of camera extrinsic parameters $$R_{3 \times 3}$$ and $$t_{3 \times 1}$$:
-
-$$
-\mathbb{R^{3 \times S \times S}} \rightarrow \mathbb{R^{3 \times 3}} \times \mathbb{R^3}
-$$
-
-The proposed solution is a CNN-based encoder which maps the image into a length-12 vector (the flattened extrinsic parameters); a hypothetical architecture sketch is shown below:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Image encoder architecture.
-</div>
-
-## Data sources for offline training
-
-Online sources<d-cite key="Geller_2022"></d-cite> provide downloadable satellite terrain images.
-
-## Training and evaluation
-
-The scope of the model's evaluation is, that it will be trained to recognize aerial views of some constrained area i.e. Atlantic City New Jersey; this constrained area will be referred to as the "area of interest."
-
-### Data pipeline
-
-The input to the data pipeline is a single aerial image of the area of interest. The output of the pipeline is a data loader which generates augmented images.
-
-The image of the area of interest is $$\mathbb{R^{3 \times T \times T}}$$ where $$T$$ is the image side-length in pixels.
-
-Camera images will be of the form $$\mathbb{R^{3 \times S \times S}}$$ where $$S$$ is the image side-length in pixels, which may differ from $$T$$.
-
-* **Generate an image from the agent camera's vantage-point**
-    * Convert the area-of-interest image tensor ($$\mathbb{R^{3 \times T \times T}}$$) to a matrix of homogenous world coordinates ($$\mathbb{R^{pixels \times 4}}$$) and an associated matrix of RGB values for each point ($$\mathbb{R^{pixels \times 3}}$$)
-        * For simplicity, assume that all features in the image have an altitutde of zero
-        * Thus, all of the pixel world coordinates will lie in a plane
-    * Generate random extrinsic camera parameters $$R_{3 \times 3}$$ and $$t_{3 \times 1}$$
-    * Transform the world coordinates into image coordinates ($$\mathbb{R^{pixels \times 3}}$$) (note, this does not affect the RGB matrix)
-    * Note - this implicitly accomplishes the commonly-used image augmentations such as shrink/expand, crop, rotate, skew
-* **Additional data augmentation** - to prevent overfitting
-    * Added noise
-    * Color/brightness adjustment
-    * TBD
-* **Convert the image coordinates and the RGB matrix into a camera image tensor ($$\mathbb{R^{3 \times S \times S}}$$)**
-
-Each element of a batch from this dataloader, will be a tuple of (extrinsic parameters,camera image).
-
-## Training
-
-* For each epoch, and each mini-batch...
-* unpack batch elements into camera images and ground-truth extrinsic parameters
-* Apply the encoder to the camera images
-* Loss: MSE between encoder estimates of extrinsic parameters, and the ground-truth values
-
-### Hyperparameters
-* Architecture
-    * Encoder architecture - CNN vs MLP vs ViT(?) vs ..., number of layers, ...
-    * Output normalizations
-    * Nonlinearities - ReLU, tanh, ...
-* Learning-rate
-* Optimizer - ADAM, etc.
-* Regularizations - dropout, L1, L2, ...
-
-## Evaluation
-
-For a single epoch, measure the total MSE loss of the model's extrinsic parameter estimates relative to the ground-truth. 
-
-## Feasibility
-
-Note that I am concurrently taking 6.s980 "Machine learning for inverse graphics" so I already have background in working with camera parameters, which should help me to complete this project on time.
-
-# Implementation
-
-## Source image
-
-DOF estimation was applied to a 2D aerial image<d-cite key="Taylor_2020"></d-cite>, shown below:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Sample aerial image from <d-cite key="Taylor_2020"></d-cite>.
-</div>
-
-## Dataloader
-
-A dataloader was created which generates (1) generates a random extrinsic camera matrix as described above, in order to generate (2) visualization of the above source image from the perspective of the random camera matrix.
-
-More specifically, the dataloader generates *Euler Angles* in radians associated with with the camera matrix rotation, as well as a 3D offset representing the camera's position.
-
-You will notice that the images suffer from an artifact whereby the pixels are not adjacent to each other but rather have black space between them; a production implementation of this solution would require interpolation between pixels in order to produce a continuous image.
-
-An example of a single generated image is shown below; it is the original image, above, viewed from the perspective of a random camera matrix:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Single datapoint from data loader.
-</div>
-
-A batch of generated images is shown below:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    A batch from the data loader.
-</div>
-
-Again, you can see that owing to a lack of interpolation, the pixels are spread out, with black space between them.
-
-## DNN architecture
-
-The DNN architecture is an MLP with 6 hidden layers of width 512, 256 and 128. 
-
-The input is a 224x224 image with 3 color channels representing the view of the source image from an orientation determined by the (unknown) camera extrinsic parameters.
-
-The architecture outputs 6 logit values values corresponding to predictions of 3 Euler angles and 3 positional offsets for the camera extrinsic matrix.
-
-For this project, I experimented with the sinusoidal activation functions described in the SIREN<d-cite key="sitzmann2020implicit"></d-cite> paper. Sinusoidal activation functions, combined with MLPs, were previously shown to be more effective at capturing high-frequency information in radiance fields, compared to ReLU MLPs. I employed sinusoidal activation functions in this work in the hopes of more effectively capturing high-frequency variation in the relationship between camera extrinsic parameters and camera image pixels.
-
-One question which might arise is, if the DNN outputs logits, how do I account for the difference in statistical characteristics between the three Euler Angle outputs and the three translation vector outputs? I employed scikitlearn StandardScalers at both the input and the output in order to normalize image pixels and extrinsic camera matrix parameters, respectively. The use of normalization at the input is standard. The use of normalization at the output allows each dimension of the 6-logit output to learn a zero-mean, unit-variance distribution: the output StandardScaler converts from zero-mean, unit-variance to the estimated actual mean and variance of the target distribution. The way the output StandardScaler is computed is as follows: a batch of random data is sampled from the dataloader; mean and variance are computed; then a StandardScaler is designed such that its *inverse* maps from the computed mean and variance of the target extrinsics, to zero mean/unit-variance. Thus, run forward, the output StandardScaler will map from unit gaussian to the computed mean and variance.
-
-## Training setup
-
-I train for 80 epochs with an Adam optimizer and a learning rate of 0.00001.
-
-MSE loss is employed for training and evaluation. The extrinsic parameters predicted by the DNN are compared against the target (correct) extrinsic parameters which the dataloader used to generate the camera image of the scene. Recall from the previous section that, owing to the output StandardScaler, the DNN outputs 6 roughly zero-mean/unit-variance predicted camera extrinsic parameters. I chose to evaluate loss *relative to these zero-mean/unit-variance predictions*, prior to the output StandardScaler; the rationale being that I wanted each extrsinsic parameter to have equal weighting in the MSE loss computation, and not be biased by the mean/variance of the particular parameter. Thus, I use the output StandardScaler in *inverse* mode to normalize the target values to zero-mean/unit-variance. MSE loss is then computed between the DNN output logits, and these normalized target values.
-
-A side-effect of computing MSE against normalized values, is that it is effectively a relative measure: MSE tells me how large the variance in the error between predictions and target is, relative to the unit-variance of the normalized target values. Thus I expect that an MSE much less than one is a good heuristic for the quality of the estimate.
-
-# Training results
-
-The plot below shows that the DNN architecture was able to converge on low-MSE predictions of the extrinsic camera matrix:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Train and test MSE loss between the predicted and actual extrinsic camera matrix.
-</div>
-
-Note that the train and test curves overlap almost perfectly; this is because all datapoints generated by the dataloader are random, so in fact the model is constantly being trained on fresh data, and the resampling is really unnecessary.
-
-Since the final MSE is relatively small (0.020), and since (as described in the previous section) the MSE is effectively a relative measure of error, I believe the DNN is learning a relatively good estimate of camera extrinsics.
-
-# Conclusion
-
-Based on the low MSE attained during training, I believe I successfully trained a DNN to roughly estimate camera extrinsics from orientation-dependent camera views.
-
-There are many improvements which would be necessary in order to deploy this in production.
-
-For example, it would be better to use more detailed satellite imagery, preferably with stereoscopic views that effectively provide 3D information. Without having 3D information about the scene, it is hard to train the model to recognize how the scene will look from different angles. In my work, I used a 2D image and essentially assumed that the height of the geographic features in the image was negligible, such that I could approximate the 3D point-cloud as lying within a 2D plane. With stereoscopic satellite data, it could be possible to construct a truly 3D point-cloud, on which basis I could synthesize more accurate camera views during the training process.
-
-Also, as discussed in the Implementation section, it would likely be necessary to implement interpolation between the pixels when generating simulated camera views. Otherwise, the camera views during training would look nothing like what the camera would see in the real world.
\ No newline at end of file
diff --git a/_posts/2023-11-09-enforcing-uniformity.md b/_posts/2023-11-09-enforcing-uniformity.md
deleted file mode 100644
index b61da2c3..00000000
--- a/_posts/2023-11-09-enforcing-uniformity.md
+++ /dev/null
@@ -1,239 +0,0 @@
----
-layout: distill
-title: Imposing uniformity through Poisson flow models
-description: Uniformity and alignment are used to explain the success of contrastive encoders. Can we use already trained, well-aligned features and impose uniformity to increase their quality and performance on downstream classification tasks? 
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Ishank Agrawal
-    url: "https://www.mit.edu/~ishank"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-enforcing-uniformity.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Notation
-  - name: Alignment and Uniformity
-  - name: Poisson Flows
-  - name: Hypothesis
-  - name: Experiment
-  - name: Results
-  - name: Conclusion
-  - name: Appendices
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
-  p {
-    text-align: justify;
-  }
----
-
-<!-- Introduction -->
-
-Most objects encountered in machine learning are extremely high dimensional. For example, a relatively small $512$x$512$ RGB image has over $750,000$ dimensions. However most of this space is empty, that is the set of well-formed images form an extremely small subset of this large space.
-
-Thus a useful task in machine learning is to map this large space into a much smaller space, such that the images we care about form a compact organized distribution in this new space. This is called representation learning. For such a map to be useful, there are two key features. Firstly the representations should be useful for downstream tasks and not worse than the original representation. Thus they should preserve as much of the useful data as possible. Secondly, they should be relatively task agnostic and help across a diverse array of such downstream tasks. For example, word embeddings (such as those produced by BERT <d-cite key="bert"></d-cite>) can be used for a wide array of language tasks such as language modeling and generation to sentiment analysis. An important question is how to generally find such useful representations.
-
-Several methods exist. For example, autoencoders <d-cite key="autoencoder"></d-cite> attempt to learn maps that are essentially bijective over the dataset we care about. These ensure that important information is not lost during the transformation. Contrastive encoders attempt to learn maps that enforce similarity between representations of similar images. Contrastive encoders are seen to perform quite well on unsupervised representation learning tasks, and we will explore these in a bit more detail soon. Lastly, we can layers of already trained neural networks can be used as features as well. For example, layers of VGG-19 trained on ImageNet are useful features that contain much information about the style and content of the images <d-cite key="nst"></d-cite>.
-
-It is important to further quantify what makes a useful representation from a theoretical standpoint. Wang and Isola <d-cite key="isola-alignment"></d-cite> introduced concepts of alignment and uniformity to explain why contrastive encoders perform well. Alignment is the idea that similar objects should have close feature vectors in the representation space. Uniformity is the idea that the set of well-formed objects should cover the representation space uniformly.
-
-In this post, we will further examine how uniformity affects the quality of representations. To do this, we will use Poisson flows. As we shall see, Poisson flows are an incredibly useful tool to enforce uniformity. We show that enforcing uniformity on well-aligned features can improve representations as measured by their performance on downstream tasks.
-
-
-
-## Notation
-
-We introduce several notations to make talking about representations easier. Let $\mathcal{X}$ be our original space of the data, and let $p_{\mathrm{x}}$ be the distribution of the data. Let $\mathcal{Y}$ be any representation space, and let $f: \mathcal{X} \to \mathcal{Y}$ be a mapping from the original space to the representation space. If $\mathrm{y} = f(\mathrm{x}), \ \mathrm{x} \sim p_{\mathrm{x}}$, then let $\mathrm{y} \sim p_{f}$ and where $p_{f}$ is the new distribution after $f$.
-
-We will also have a notion of similarity. Let $p_{\mathrm{pos}}(x_1, x_2)$ be a joint probability distribution that quantifies this similarity. We assume that $p_{\mathrm{pos}}$ satisfies
-
-$$\begin{aligned}
-  p_{\mathrm{pos}}(x_1, x_2) &= p_{\mathrm{pos}}(x_2, x_1) \\
-  \int_{x_2} p_{\mathrm{pos}}(x_1, x_2) d x_2 &= p_{\mathrm{x}}(x_1)
-\end{aligned}$$
-
-
-
-## Alignment and Uniformity
-
-As mentioned earlier, contrastive autoencoders learn useful representations by minimizing a distance metric for similar pairs, while maximizing the same for dissimilar pairs <d-cite key="isola-cmc">. Thus if $D(x_1, x_2)$ is some distance metric of $\mathcal{Y}$, contrastive encoders maximize $d(x, x^+)$ for positive pairs, while minimizing $d(x, x^-)$ for negative pairs.
-
-In their most common formulation, they set $\mathcal{Y}$ as the hypersphere $\mathcal{S}^d \subset \mathbb{R}^d$, and use cosine similarity <d-cite key="SimCLR"></d-cite> $d(x_1, x_2) = x_1^T x_2$ as the similarity metric. Then the loss function becomes <d-footnote>$M$ and $\tau$ are hyper parameters</d-footnote>
-
-$$
-\mathcal{L} \triangleq \mathbb{E}_{(x, x^+) \sim p_{\mathrm{pos}}, \{x_i^-\}_{i=1}^M \overset{\mathrm{iid}}{\sim} p_{\mathrm{x}}} \left[
-\frac
-   {e^{f(x)^T f(x^+)^T / \tau}}
-   {e^{f(x)^T f(x^+)^T / \tau} + \sum_{i=1}^{M} e^{f(x)^T f(x_i^-)^T / \tau}}
-\right]
-$$
-
-These encoders have been successful at several image representation tasks. Wang and Isola explained their performance through alignment and uniformity. Alignment, is simply the the quality that similar images are close together in the representation space. This is clearly present in contrastive encoders, as one of their goals is indeed to minimize
-
-$$
-\mathcal{L}_{\mathrm{alignment}} \triangleq  \mathbb{E}_{(x, x^+)\sim p_{\mathrm{pos}}} \left[ D(x, x^+) \right]
-$$
-
-However, Wang and Isola also stated that uniformity was an equally important feature of contrastive architectures. That is, when training the contrastive loss to learn an encoder $f$, the new probability distribution $p_{f}$ is close to uniform. They showed that using $L_2$ norm as a distance metric and using Gaussian kernels to promote uniformity, learned representations perform better than those learned by contrastive learning.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 1: Alignment and Uniformity. In figure (a), we see the quality of alignment, i.e. similar images are close to each other in the representation space. In figure (b), we see the quality of uniformity, i.e. images form a uniform distribution across the representation space. Image borrowed from <d-cite key="isola-alignment"></d-cite>
-</div>
-
-
-Why does uniformity help? Firstly, it acts as a regularization term. This is because if we tried to learn representations that maximized alignment without any target for uniformity, then a map that just takes all input vectors to zero would trivially minimize the loss. Yet this would be an extremely bad representation. However, aside from regularization, uniform distributions also have maximal self-entropy. Thus their importance can be explained equally well through some sort of minimizing loss of information. Indeed this is how <d-cite key="isola-alignment"></d-cite> explains it.
-
-In this post we will investigate this even further. In particular, if regularization is the only effect that uniformity has on representations, then slightly nudging already aligned representations to make them uniform should not improve their quality. This is exactly what we will do, and we will do this through Poisson Flows.
-
-
-
-## Poisson Flows
-
-If you let a planar positive distribution of charges slightly above $z=0$ loose, then they will repel each other. If you stop them at some large enough distance $R$ from the origin, then their distribution approaches uniform as $R \to \infty$. This is very interesting, and what's even more interesting is that this fact generalizes to arbitrary dimensions. Thus such fields allow a convenient way to map arbitrary high-dimensional distributions to uniform distributions. Poisson flow generative models proposed by Xu and Liu <d-cite key="poisson-flow"></d-cite> exploit this property for image generation; by sampling uniformly from the hemisphere, one can iterate through the backward ODE and thus sample from $p_{\mathrm{x}}$. We shall use it to impose uniformity on well-aligned features.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-enforcing-uniformity/poisson.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 2: Evolution of data points through a Poisson field. As we can see, arbitrary distributions are mapped to uniform. Further the mapping is continuous. Borrowed from <d-cite key="poisson-flow"></d-cite>
-</div>
-
-Say we have a probability distribution $p_{\mathrm{y}}$ over $\mathcal{Y}_1 = \mathbb{R^d}$. Set this distribution at the $z = 0$ plane <d-footnote>here z-dimension refers to the new dimension we recently augmented the dataset with</d-footnote> in the expanded space $\tilde{\mathcal{Y}}_1 = \mathcal{Y} \times \mathbb{R}$. Let the electric field at an arbitrary point in $\tilde{\mathcal{Y}}_1$ be defined as
-
-$$
-E_{p_{\tilde{\mathrm{y}}}}(\tilde{y}) = \int_{\tilde{y}'} \frac{\tilde{y} - \tilde{y'}}{\|\tilde{y} - \tilde{y'} \|_2^{d+1}} \cdot p_{\tilde{\mathrm{y}}}(\tilde{y}') d\tilde{y}'
-$$
-
-Let $\mathrm{y} \sim p_{\mathrm{y}}$. Evolve $\tilde{\mathrm{y}} = (\mathrm{y}, 0) \in \tilde{\mathcal{Y}_1}$ according to the ODE
-
-$$
-\frac{d\tilde{\mathrm{y}}}{dt} = E_{p_{\tilde{\mathrm{y}}}}(\tilde{y})
-$$
-
-<p>Let the final point be $f_{\mathrm{poisson}}(\mathrm{y}; p_{\mathrm{y}})$. Then the distribution of $p_{f_{\mathrm{poisson}}}(\cdot)$ approaches uniform as $R \to \infty$.</p>
-
-In practice, since we want to take $s = 0$ to $R$, we do a change of variables to write the ODE as 
-
-$$
-\frac{d \tilde{\mathrm{y}}}{ds} = \frac{1}{E_{p_{\tilde{\mathrm{y}}}}(\tilde{\mathrm{y}})^T \tilde{\mathrm{y}}} \cdot E_{p_{\tilde{\mathrm{y}}}}(\tilde{\mathrm{y}})
-$$
-
-Note that the field stated here isn't actually used directly, it is rather learned through a deep neural network. This is possible since the integral can be replaced with an expectation, which itself can be approximated through Monte-Carlo methods.
-
-Since Poisson flows allow us to map arbitrary distributions to uniform ones, while preserving continuity; they are an extremely powerful tool to further understand the effects of uniformity. This brings us to our main hypothesis
-
-## Hypothesis
-
-
-> Assume that uniformity acts more than just a regularizing term for learning useful representations. Then if we take any well-aligned features that have good downstream performance, and apply a continuous map that imposes uniformity, our new features should perform better at downstream tasks
-
-This is because if uniformity is simply a regularizing term, then training them for the downstream task is the best we can do. This hypothesis itself is counterintuitive because the original features should already be well-trained against the task at hand. However, surprisingly, this hypothesis seems to hold true. To show this, we describe the following experiment.
-
-
-## Experiment
-
-We consider the pen-ultimate layer of AlexNet <d-cite key="alexnet"></d-cite> trained on CIFAR-10 <d-cite key="cifar-10"></d-cite> as our initial features. These features must be well aligned, as linear decision boundaries are able to accurately classify them into their classes.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-enforcing-uniformity/flow.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 3: A representation of how the features should evolve. (a) Initial representation is seemingly random and hard to classify with. (b) After training a classifier, the penultimate layer is well clustered between the various features. (c) Features after learning a Poisson field, and sending the data through it.
-    <d-footnote>Note that images (a) and (b) are for representation purposes only. However image (c) is indeed produced through a learned Poisson field from data points in (b)</d-footnote>
-</div>
-
-We take these features and learn a corresponding Poisson field. For our predicted poisson field, we use a relatively small fixed-size two-hidden layer network.
-
-We finally pass our features through this Poisson field and train a linear classifier on top of the final learned representations. We compare this accuracy against the original accuracy.
-
-A summary of our approach is given in the figure below:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-enforcing-uniformity/architecture.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 4: Overview of architecture
-</div>
-
-Further training details are given in [Appendix A](#appendix-a-training-details).
-
-## Results
-
-The results are given in the below table.
-
-|-|-|-|
-| Architecture | Train accuracy | Test accuracy |
-|-|-|-|
-| AlexNet | 88% | 82% |
-| AlexNet + Poisson Flow *(ours)*| 95% | 85% |
-
-Here we see that our method outperforms a well-trained AlexNet considerably.
-
-## Conclusion
-
-This is a surprisingly nice improvement. Note that the Poisson flow post-processing step is completely unsupervised. This seems to hint that having a uniform prior is helpful for reasons other than just regularization.
-
-It would be extremely interesting to develop an entirely unsupervised architecture based on Poisson flow. This would begin by using an unsupervised method to learn well-aligned features. A suitable loss candidate could possibly be just a contrastive loss, with L2 norm as a distance metric:
-
-$$
-\mathcal{L} \triangleq \mathbb{E}_{(x, x^+) \sim p_{\mathrm{pos}}, \{x_i^-\}_{i=1}^M \overset{\mathrm{iid}}{\sim} p_{\mathrm{x}}} \left[
-\|x - x^+\|_2^{\alpha} - \lambda \sum_{i=1}^{M} \|x - x_i^{-}\|_2^{\beta}
-\right]
-$$
-
-Then passing these well-aligned features through a Poisson flow would enforce uniformity. Such a proposed architecture could be worth exploring.
-
----
-
-## Appendices
-
-See <https://github.com/mathletema/poisson-representations> for code.
-
-### Appendix A: Training details
-
-We used a version of AlexNet similar to that given in Isola's paper, such that the pen-ultimate layer was 128 neurons wide. We trained this network against cross entropy loss for 20 epochs using Adam as an optimizer.
-
-After this, we moved the features from $\mathbb{R}^{128}$ to $\mathbb{R}^{129}$ by setting $z = 0$. We then learned a Poisson field for this network similar to <d-cite key="poisson-flow"></d-cite>. We use the default values of $\tau, \gamma, \sigma$ as the original paper, but used $M = 20$ as a consequence of our reduced dimension size.
-We trained this Poisson field with a large batch size of $1024$ and a small batch size of $128$. We trained this over $200$ epochs.
-
-We then passed the features through the Poisson field. To simulate the ODE, we used Euler's method with a small delta of $0.01$ and $100$ steps. Using RK4 might produce better results, and we leave this to future work.
-
-We finally trained a logistic classifier on top of these final representations, and printed train and test accuracies.
\ No newline at end of file
diff --git a/_posts/2023-11-09-eunhae-project.md b/_posts/2023-11-09-eunhae-project.md
deleted file mode 100644
index c097754b..00000000
--- a/_posts/2023-11-09-eunhae-project.md
+++ /dev/null
@@ -1,311 +0,0 @@
----
-layout: distill
-title: How does model size impact catastrophic forgetting in online continual learning?
-description: Yes, model size matters.
-date: 2023-11-09
-htmlwidgets: true
-
-authors:
-  - name: Eunhae Lee
-    url: "https://www.linkedin.com/in/eunhaelee/"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-eunhae-project.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related Work
-  - name: Method
-  - name: Experiment
-  - name: Results
-  - name: Discussion
-  - name: Conclusion
-  # - name: Appendix
-_styles: >
-  .caption {
-      font-size: 0.8em;
-      text-align: center;
-      color: grey; 
-  }
-  h1 {
-      font-size: 2.5em;
-      margin: 0.3em 0em 0.3em;
-  }
-  h2 {
-      font-size: 2em;
-  }
-  h3 {
-      font-size: 1.5em;
-      margin-top: 0;
-  }
-  .fake-img {
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-
-<!-- <style>
-.caption {
-    font-size: 0.8em; /* Adjust the size as needed */
-    text-align: center;
-    color: grey; /* or any color you prefer */
-}
-/* h1 {
-    margin: 0.5em 0 0 0;
-    font-size: 36px;
-}
-
-h3 {
-    margin: 0em;
-} */
-</style> -->
-
-
-# Introduction
-
-One of the biggest unsolved challenges in continual learning is preventing forgetting previously learned information upon acquiring new information. Known as “catastrophic forgetting,” this phenomenon is particularly pertinent in scenarios where AI systems must adapt to new data without losing valuable insights from past experiences. Numerous studies have investigated different approaches to solving this problem in the past years, mostly around proposing innovative strategies to modify the way models are trained and measuring its impact on model performance, such as accuracy and forgetting. 
-
-Yet, compared to the numerous amount of studies done in establishing new strategies and evaluative approaches in visual continual learning, there is surprisingly little discussion on the impact of model size. It is commonly known that the size of a deep learning model (the number of parameters) is known to play a crucial role in its learning capabilities <d-cite key="hu2021model, Bianco_2018"></d-cite>. Given the limitations in computational resources in most real-world circumstances, it is often not practical or feasible to choose the largest model available. In addition, sometimes smaller models perform just as well as larger models in specific contexts<d-cite key="Bressem_2020"></d-cite>. Given this context, a better understanding of how model size impacts performance in a continual learning setting can provide insights and implications on real-world deployment of continual learning systems. 
-
-In this blog post, I explore the following research question: _How do network depth and width impact model performance in an online continual learning setting?_ I set forth a hypothesis based on existing literature and conduct a series experiments with models of varying sizes to explore this relationship. This study aims to shed light on whether larger models truly offer an advantage in mitigating catastrophic forgetting, or if the reality is more nuanced.
-
-
-# Related Work
-### Online continual learning
-Continual learning (CL), also known as lifelong learning or incremental learning, is an approach that seeks to continually learn from non-iid data streams without forgetting previously acquired knowledge. The challenge in continual learning is generally known as the stability-plasticity dilemma<d-cite key="mermillod2013-dilemma"></d-cite>, and the goal of continual learning is to strike a balance between learning stability and plasticity.
-
-While traditional CL models assume new data arrives task by task, each with a stable data distribution, enabling *offline* training. However, this requires having access to all task data, which can be impractical due to privacy or resource limitations. In this study, I will consider a more realistic setting of Online Continual Learning (OCL), where data arrives in smaller batches and are not accessible after training, requiring models to learn from a single pass over an online data stream. This allows the model to learn data in real-time<d-cite key="soutif-cormerais_comprehensive_2023, cai_online_2021, mai_online_2021"></d-cite>.
-
-Online continual learning can involve adapting to new classes (class-incremental) or changing data characteristics (domain-incremental). Specifically, for class-incremental learning, the goal is to continually expand the model's ability to recognize an increasing number of classes, maintaining its performance on all classes it has seen so far, despite not having continued access to the old class data<d-cite key="soutif-cormerais_comprehensive_2023, ghunaim_real-time_2023"></d-cite>. Moreover, there has been more recent work done in unsupervised continual learning <d-cite key="yu_scale_2023, madaan_representational_2022"></d-cite>. To narrow the scope of the vast CL landscape to focus on learning the impact of model size in CL performance, I will focus on the more common problem of class-incremental learning in supervised image classification in this study.
-
-### Continual learning techniques
-
-Popular methods to mitigate catastrophic forgetting in continual learning generally fall into three buckets:<d-cite key="ghunaim_real-time_2023"> :
-1. *regularization-based* approaches that modify the classification objective to preserve past representations or foster more insightful representations, such as Elastic Weight Consolidation (EWC)<d-cite key="kirkpatrick2017overcoming"></d-cite> and Learning without Forgetting (LwF)<d-cite key="li_learning_2017"></d-cite>;
-2. *memory-based* approaches that replay samples retrieved from a memory buffer along with every incoming mini-batch, including Experience Replay (ER)<d-cite key="chaudhry2019tiny"></d-cite> and Maximally Interfered Retrieval<d-cite key="aljundi2019online"></d-cite>, with variations on how the memory is retrieved and how the model and memory are updated; and 
-3. *architectural* approaches including parameter-isolation approaches where new parameters are added for new tasks and leaving previous parameters unchanged such as Progressive Neural Networks (PNNs)<d-cite key="rusu2022progressive"></d-cite>. 
-
-Moreover, there are many methods that combine two or more of these techniques such as Averaged Gradient Episodic Memory (A-GEM)<d-cite key="chaudhry2019efficient"></d-cite> and Incremental Classifier and Representation Learning (iCaRL)<d-cite key="rebuffi2017icarl"></d-cite>.
-
-Among the methods, **Experience Replay (ER)** is a classic replay-based method and widely used for online continual learning. Despite its simplicity, recent studies have shown ER still outperforms many of the newer methods that have come after that, especially for online continual learning <d-cite key="soutif-cormerais_comprehensive_2023, mai_online_2021, ghunaim_real-time_2023"></d-cite>.
-
-
-### Model size and performance
-
-It is generally known across literature that deeper models increase performance<d-cite key="hu2021model"></d-cite>. Bianco et al. conducted a survey of key performance-related metrics to compare across various architectures, including accuracy, model complexity, computational complexity, and accuracy density<d-cite key="Bianco_2018"></d-cite>. Relationship between model width and performance is also been discussed<d-cite key="hu2021model"></d-cite>, albeit less frequently.
-
-He et al. introduced Residual Networks (ResNets)<d-cite key="he2015deep"></d-cite> which was a major innovation in computer vision by tackling the problem of degradation in deeper networks. ResNets do this by residual blocks to increase the accuracy of deeper models. Residual blocks that contain two ore more layers are stacked together, and "skip connections" are used in between these blocks. The skip connections act as an alternate shortcut for the gradient to pass through, which alleviates the issue of vanishing gradient. They also make it easier for the model to learn identity functions. As a result, ResNet improves the efficiency of deep neural networks with more neural layers while minimizing the percentage of errors. The authors compare models of different depths (composed of 18, 34, 50, 101, 152 layers) and show that accuracy increases with depth of the model. 
-
-
-
-|                          |  **ResNet18** |  **ResNet34** |  **ResNet50** | **ResNet101** | **ResNet152** |
-|:------------------------:|:-------------:|:-------------:|:-------------:|:-------------:|:-------------:|
-|   **Number of Layers**   | 18            | 34            | 50            | 101           | 152           |
-| **Number of Parameters** | ~11.7 million | ~21.8 million | ~25.6 million | ~44.5 million | ~60 million   |
-|    **Top-1 Accuracy**    | 69.76%        | 73.31%        | 76.13%        | 77.37%        | 78.31%        |
-|    **Top-5 Accuracy**    | 89.08%        | 91.42%        | 92.86%        | 93.68%        | 94.05%        |
-|         **FLOPs**        | 1.8 billion   | 3.6 billion   | 3.8 billion   | 7.6 billion   | 11.3 billion  |
-
-<div class="caption">Table 1: Comparison of ResNet Architectures</div>
-
-This leads to the question: do larger models perform better in continual learning? While much of the focus in continual learning research has often been on developing various strategies, methods, and establishing benchmarks, the impact of model scale remains a less explored path. 
-
-Moreover, recent studies on model scale in slightly different contexts have shown conflicting results. Luo et al.<d-cite key="luo2023empirical"></d-cite> highlights a direct correlation between increasing model size and the severity of catastrophic forgetting in large language models (LLMs). They test models of varying sizes from 1 to 7 billion parameters. Yet, Dyer et al.<d-cite key="dyer2022"></d-cite> show a constrasting perspective in the context of pretrained deep learning models. Their results show that large, pretrained ResNets and Transformers are a lot more resistant to forgetting than randomly-initialized, trained-from-scratch models, and that this tendency increases with the scale of model and the pretraining dataset size.
-
-The relative lack of discussion on model size and the conflicting perspectives among existing studies indicate that the answer to the question is far from being definitive. In the next section, I will describe further how I approach this study.
-
-
-
-# Method
-### Problem definition
-
-Online continual learning can be defined as follows<d-cite key="cai_online_2021, ghunaim_real-time_2023"></d-cite>:
-
-The objective is to learn a function $f_\theta : \mathcal X \rightarrow \mathcal Y$ with parameters $\theta$ that predicts the label $Y \in \mathcal Y$ of the input $\mathbf X \in \mathcal X$. Over time steps $t \in \lbrace 1, 2, \ldots \infty \rbrace$, a distribution-varying stream $\mathcal S$ reveals data sequentially, which is different from classical supervised learning.
-
-At every time step, 
-
-1. $\mathcal S$ reveals a set of data points (images) $\mathbf X_t \sim \pi_t$ from a non-stationary distribution $\pi_t$
-2. Learner $f_\theta$ makes predictions $\hat Y_t$ based on current parameters $\theta_t$
-3. $\mathcal S$ reveals true labels $Y_t$
-4. Compare the predictions with the true labels, compute the training loss $L(Y_t, \hat Y_t)$
-5. Learner updates the parameters of the model to $\theta_{t+1}$
-
-
-### Task-agnostic and boundary-agnostic
-In the context of class-incremental learning, I will adopt the definitions of task-agnostic and boundary-agnostic from Soutif et al. 2023<d-cite key="soutif-cormerais_comprehensive_2023"></d-cite>. A *task-agnostic* setting refers to when task labels are not available, which means the model does not know that the samples belong to a certain task. A *boundary-agnostic* setting is considered, where information on task boundaries are not available. This means that the model does not know when the data distribution changes to a new task. 
-
-|                     |     **Yes**    |       **No**      |
-|:-------------------:|:--------------:|:-----------------:|
-|   **Task labels**   |   Task-aware   |    Task-agnotic   |
-| **Task boundaries** | Boundary-aware | Boundary-agnostic |
-
-<div class="caption">Table 2: Task labels and task boundaries. This project assumes task-agnostic and boundary-agnostic settings.</div>
-
-
-### Experience Replay (ER)
-In a class-incremental learning setting, the nature of the Experience Replay (ER) method aligns well with task-agnostic and boundary-agnostic settings. This is because ER focuses on replaying a subset of past experiences, which helps in maintaining knowledge of previous classes without needing explicit task labels or boundaries. This characteristic of ER allows it to adapt to new classes as they are introduced, while retaining the ability to recognize previously learned classes, making it inherently suitable for task-agnostic and boundary-agnostic continual learning scenarios.
-
-Implementation-wise, ER involves randomly initializing an external memory buffer $\mathcal M$, then implementing `before_training_exp` and `after_training_exp` callbacks to use the dataloader to create mini-batches with samples from both training stream and the memory buffer. Each mini-batch is balanced so that all tasks or experiences are equally represented in terms of stored samples<d-cite key="lomonaco2021avalanche"></d-cite>. As ER is known be well-suited for online continual learning, it will be the go-to method used to compare performances across models of varying sizes.
-
-### Benchmark
-For this study, the SplitCIFAR-10<d-cite key="lomonaco2021avalanche"></d-cite> is used as the main benchmark. SplitCIFAR-10 splits the popular CIFAR-10 dataset into 5 tasks with disjoint classes, each task including 2 classes each. Each task has 10,000 3×32×32 images for training and 2000 images for testing. The model is exposed to these tasks or experiences sequentially, which simulates a real-world scenario where a learning system is exposed to new categories of data over time. This is suitable for class-incremental learning scenarios. This benchmark is used for both testing online and offline continual learning in this study.
-
-### Metrics
-
-Key metrics established in earlier work in online continual learning are used to evaluate the performance of each model.
-
-**Average Anytime Accuracy (AAA)**
-as defined in <d-cite key="caccia_new_2022"></d-cite>
-
-The concept of average anytime accuracy serves as an indicator of a model's overall performance throughout its learning phase, extending the idea of average incremental accuracy to include continuous assessment scenarios. This metric assesses the effectiveness of the model across all stages of training, rather than at a single endpoint, offering a more comprehensive view of its learning trajectory.
-
-$$\text{AAA} = \frac{1}{T} \sum_{t=1}^{T} (\text{AA})_t$$
-
-**Average Cumulative Forgetting (ACF)** as defined in <d-cite key="soutif-cormerais_comprehensive_2023, soutifcormerais2021importance"></d-cite>
-
-This equation represents the calculation of the **Cumulative Accuracy** ($b_k^t$) for task $k$ after the model has been trained up to task $t$. It computes the mean accuracy over the evaluation set $E^k_\Sigma$, which contains all instances $x$ and their true labels $y$ up to task $k$. The model's prediction for each instance is given by $\underset{c \in C^k_\Sigma}{\text{arg max }} f^t(x)_c$, which selects the class $c$ with the highest predicted logit $f^t(x)_c$. The indicator function $1_y(\hat{y})$ outputs 1 if the prediction matches the true label, and 0 otherwise. The sum of these outputs is then averaged over the size of the evaluation set to compute the cumulative accuracy.
-
-
-$$ b_k^t = \frac{1}{|E^k_\Sigma|} \sum_{(x,y) \in E^k_\Sigma} 1_y(\underset{c \in C^k_\Sigma}{\text{arg max }} f^t(x)_c)$$
-
-From Cumulative Accuracy, we can calculate the **Average Cumulative Forgetting** ($F_{\Sigma}^t$) by setting the cumulative forgetting about a previous cumulative task $k$, then averaging over all tasks learned so far:
-
-$$F_{\Sigma}^t = \frac{1}{t-1} \sum_{k=1}^{t-1} \max_{i=1,...,t} \left( b_k^i - b_k^t \right)$$
-
-**Average Accuracy (AA) and Average Forgetting (AF)**
-as defined in <d-cite key="mai_online_2021"></d-cite>
-
-$a_{i,j}$ is the accuracy evaluated on the test set of task $j$ after training the network from task 1 to $i$, while $i$ is the current task being trained. Average Accuracy (AA) is computed by averaging this over the number of tasks.
-
-$$\text{Average Accuracy} (AA_i) = \frac{1}{i} \sum_{j=1}^{i} a_{i,j}$$ 
-
-Average Forgetting measures how much a model's performance on a previous task (task $j$) decreases after it has learned a new task (task $i$). It is calculated by comparing the highest accuracy the model $\max_{l \in {1, \ldots, k-1}} (a_{l, j})$ had on task $j$ before it learned task $k$, with the accuracy $a_{k, j}$ on task $j$ after learning task $k$.
-
-$$\text{Average Forgetting}(F_i) = \frac{1}{i - 1} \sum_{j=1}^{i-1} f_{i,j} $$
-
-$$f_{k,j} = \max_{l \in \{1,...,k-1\}} (a_{l,j}) - a_{k,j}, \quad \forall j < k$$
-
-In the context of class-incremental learning, the concept of classical forgetting may not provide meaningful insight due to its tendency to increase as the complexity of the task grows (considering more classes within the classification problem). Therefore, <d-cite key="soutif-cormerais_comprehensive_2023"></d-cite>recommendeds avoiding relying on classical forgetting as a metric in settings of class-incremental learning, both online and offline settings. Thus, Average Anytime Accuracy (AAA) and Average Cumulative Forgetting (ACF) are used throughout this experiment, although AA and AF are computed as part of the process.
-
-### Model selection
-To compare learning performance across varying model depths, I chose to use the popular ResNet architectures, particularly ResNet18, ResNet34, and ResNet50. As mentioned earlier in this blog, ResNets were designed to increase the performance of deeper neural networks, and their performance metrics are well known. While using custom models for more variability in sizes was a consideration, existing popular architectures were chosen for better reproducibility.
-
-Moreover, while there are newer versions (i.e. ResNeXt<d-cite key="xie2017aggregated"></d-cite>) that have shown to perform better without a huge increase in computational complexity<d-cite key="Bianco_2018"></d-cite>, for this study the original smaller models were chosen to avoid introducing unnecessary variables. ResNet18 and ResNet34 have the basic residual network structure, and ResNet50, ResNet101, and ResNet152 use slightly modified building blocks that have 3 layers instead of 2. This ”bottleneck design” was made to reduce training time. The specifics of the design of these models are detailed in the table from the original paper by He et al.<d-cite key="he2015deep"></d-cite>.
-
-{% include figure.html path="assets/img/2023-11-09-eunhae-project/resnets_comparison.png" class="img-fluid" caption="ResNet architecture. Table from He et al. (2015)"%}
-
-Moreover, in order to observe the effect of model width on performance, I also test a slim version of ResNet18 that has been used in previous works<d-cite key="lopez-paz_gradient_2017"></d-cite>. The slim version uses fewer filters per layer, reducing the model width and computational load while keeping the original depth.
-
-### Saliency maps
-
-I use saliency maps to visualize “attention” of the networks. Saliency maps are known to be useful for understanding which parts of the input image are most influential for the model's predictions. By visualizing the specific areas of an image that a CNN considers important for classification, saliency maps provide insights into the internal representation and decision-making process of the network<d-cite key="simonyan2014deep"></d-cite>.
-
-
-# Experiment
-
-### The setup
-
-- Each model was trained from scratch using the Split-CIFAR10 benchmark with 2 classes per task, for 3 epoches with a mini-batch size of 64. 
-- SGD optimizer with a 0.9 momentum and 1e-5 weight decay was used. The initial learning rate is set to 0.01 and the scheduler reduces it by a factor of 0.1 every 30 epochs, as done in <d-cite key="lin_clear_2022"></d-cite>.
-- Cross entropy loss is used as the criterion, as is common for image classification in continual learning.
-- Basic data augmentation is done on the training data to enhance model robustness and generalization by artificially expanding the dataset with varied, modified versions of the original images.
-- Each model is trained offline as well to serve as baselines.
-- Memory size of 500 is used to implement Experience Replay. This represents 1% of the training dataset.
-
-
-### Implementation
-
-The continual learning benchmark was implemented using the Avalanche framework<d-cite key="lomonaco2021avalanche"></d-cite>, an open source continual learning library, as well as the code for online continual learning by Soutif et al.<d-cite key="soutif-cormerais_comprehensive_2023"></d-cite>. The experiments were run on Google Colab using NVIDIA Tesla T4 GPU.
-
-|                              |  **Experiment 1** |  **Experiment 2** |  **Experiment 3** |  **Experiment 4** |  **Experiment 5** |  **Experiment 6** |  **Experiment 7** |
-|:----------------------------:|:-----------------:|:-----------------:|:-----------------:|:-----------------:|:-----------------:|:-----------------:|:-----------------:|
-|           **Model**          | ResNet18          | ResNet34          | ResNet50          | SlimResNet18      | ResNet18          | ResNet34          | ResNet50          |
-|         **Strategy**         | Experience Replay | Experience Replay | Experience Replay | Experience Replay | Experience Replay | Experience Replay | Experience Replay |
-|         **Benchmark**        | SplitCIFAR10      | SplitCIFAR10      | SplitCIFAR10      | SplitCIFAR10      | SplitCIFAR10      | SplitCIFAR10      | SplitCIFAR10      |
-|         **Training**        | Online            | Online            | Online            | Online            | Offline           | Offline           | Offline           |
-|            **GPU**           | V100              | T4                | A100              | T4                | T4                | T4                | T4                |
-| **Training time (estimate)** | 3h                | 4.5h              | 5h                | 1h                | <5m               | <5m               | <5m               |
-
-<div class="caption">Table 3: Details of experiments conducted in this study</div>
-
-
-# Results
-
-Average Anytime Accuracy (AAA) decreases with model size (Chart 1), with a sharper drop from ResNet34 to ResNet50. The decrease in AAA is more significant in online learning than offline learning.
-
-{% include figure.html path="assets/img/2023-11-09-eunhae-project/AAA_on_off.png" class="img-fluid" caption="Chart 1: Average Anytime Accuracy (AAA) of different sized ResNets in online and offline continual learning"%}
-
-When looking at average accuracy for validation stream for online CL setting (Chart 2), we see that the rate to which accuracy increases with each task degrade with larger models. Slim-ResNet18 shows the highest accuracy and growth trend. This could indicate that larger models are worse at generalizing to a class-incremental learning scenario.
-
-{% include figure.html path="assets/img/2023-11-09-eunhae-project/stream_acc1.png" class="img-fluid" caption="Chart 2: Validation stream accuracy (Online CL)"%}
-
-|                   | **Average Anytime Acc (AAA)** | **Final Average Acc** |
-|:-----------------:|:-----------------------------:|:---------------------:|
-| **Slim ResNet18** | 0.664463                      | 0.5364                |
-|    **ResNet18**   | 0.610965                      | 0.3712                |
-|    **ResNet34**   | 0.576129                      | 0.3568                |
-|    **ResNet50**   | 0.459375                      | 0.3036                |
-
-<div class="caption">Table 4: Accuracy metrics across differently sized models (Online CL) </div>
-
-Now we turn to forgetting.
-
-Looking at Average Cumulative Forgetting (ACF), we see that for online CL setting, ResNet34 performs the best (with a slight overlap at the end with ResNet18), and ResNet50 shows the mosts forgetting. An noticeable observation in both ACF and AF is that ResNet50 performed better initially but forgetting started to increase after a few tasks. 
-
-{% include figure.html path="assets/img/2023-11-09-eunhae-project/forgetting_online.png" class="img-fluid" caption="Chart 3: forgetting curves, Online CL (Solid: Average Forgetting (AF); Dotted: Average Cumulative Forgetting (ACF))"%}
-
-However, results look different for offline CL setting. ResNet50 has the lowest Average Cumulative Forgetting (ACF) (although with a slight increase in the middle), followed by ResNet18, and finally ResNet34. This differences in forgetting between online and offline CL setting is aligned with the accuracy metrics earlier, where the performance of ResNet50 decreases more starkly in the online CL setting.
-
-{% include figure.html path="assets/img/2023-11-09-eunhae-project/forgetting_offline.png" class="img-fluid" caption="Chart 4: Forgetting curves, Offline CL (Solid: Average Forgetting (AF); Dotted: Average Cumulative Forgetting (ACF))"%}
-
-
-Visual inspection of the saliency maps revealed some interesting observations. When it comes to the ability to highlight intuitive areas of interest in the images, there seemed to be a noticeable improvement from ResNet18 to ResNet34, but this was not necessarily the case from ResNet34 to ResNet50. This phenomenon was more salient in the online CL setting.
-
-
-**Online**
-
-{% include figure.html path="assets/img/2023-11-09-eunhae-project/saliency_online.png" class="img-fluid" caption="Image: Saliency map visualizations for Online CL"%}
-
-
-**Offline**
-
-{% include figure.html path="assets/img/2023-11-09-eunhae-project/saliency_offline.png" class="img-fluid" caption="Image: Saliency map visualization for Offline CL"%}
-
-Interestingly, Slim-ResNet18 seems to be doing better than most of them, certainly better than its plain counterpart ResNet18. A further exploration of model width on performance and representation quality would be an interesting avenue of research.
-
-**Slim-ResNet18**
-
-{% include figure.html path="assets/img/2023-11-09-eunhae-project/saliencymap_exp4.png" class="img-fluid" caption="Image: Saliency map visualization (Slim ResNet18)"%}
-
-
-# Discussion
-
-In this study, I compared key accuracy and forgetting metrics in online continual learning across ResNets of different depths and width, as well as brief qualitative inspection of the models' internal representation. These results show that larger models do not necessary lead to better continual learning performance. We saw that Average Anytime Accuracy (AAA) and stream accuracy dropped progressively with model size, hinting that larger models struggle to generalize to newly trained tasks, especially in an online CL setting. Forgetting curves showed similar trends but with more nuance; larger models perform well at first but suffer from increased forgetting with more incoming tasks. Interestingly, the problem was not as pronounced in the offline CL setting, which highlights the challenges of training models in a more realistic, online continual learning context.
-
-Why do larger models perform worse at continual learning? One of the reasons is that larger models tend to have more parameters, which might make it harder to maintain stability in the learned features as new data is introduced. This makes them more prone to overfitting and forgetting previously learned information, reducing their ability to generalize.
-
-Building on this work, future research could investigate the impact of model size on CL performance by exploring the following questions:
-
-- Do pre-trained larger models (vs trained-from-scratch models) generalize better in continual learning settings?
-- Do longer training improve relatively performance of larger models in CL setting?
-- Can different CL strategies (other than Experience Replay) mitigate the degradation of performance in larger models?
-- Do slimmer versions of existing models always perform better?
-- How might different hyperparameters (i.e. learning rate) impact CL performance of larger models?
-
-# Conclusion
-
-To conclude, this study has empirically explored the role of model size on performance in the context of online continual learning. Specifically, it has shown that model size matters when it comes to continual learning and forgetting, albeit in nuanced ways. These findings contribute to the ongoing discussions on the role of the scale of deep learning models on performance and have implications for future area of research. 
diff --git a/_posts/2023-11-09-generating-robust-networks.md b/_posts/2023-11-09-generating-robust-networks.md
deleted file mode 100644
index cfda8f15..00000000
--- a/_posts/2023-11-09-generating-robust-networks.md
+++ /dev/null
@@ -1,312 +0,0 @@
----
-layout: distill
-title: Training Robust Networks
-description: Exploring ResNet on TinyImageNet, unveiling brittleness and discovering simple robustment enhancement strategies via hyperparameter optimization
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Jackie Lin
-    affiliations:
-      name: MIT
-  - name: Nten Nyiam
-    affiliations:
-      name: MIT
-
-# Must be the exact same name as your blogpost
-bibliography: 2023-11-09-generating-robust-networks.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related Works
-  - name: Methodology
-  - name: Results
-  - name: Conclusion and Next Steps
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
----
-# Introduction
-In the recent years, deep neural networks have emerged as a dominant force in the field of machine learning, achieving remarkable success across a variety of tasks, from VGG-16 in image classification to ChatGPT in natural language modeling. However, the very complexity that allows deep neural networks to learn and represent complex patterns and relationships can also leave them susceptible to challenges such as overfitting, adversarial attacks, and interpretability. The brittleness of deep neural networks, in particular, poses a significant challenge toward their deployment in real-world applications, especially those where reliability is paramount, like medical image diagnosis and autonomous vehicle navigation. Consequently, it is crucial to develop a better understanding of deep architectures and explore strategies for enhancing robustness. This project focuses specifically on ResNet, a model introduced in 2015 for image classification that is still widely used today. In particular, we study the model's vulnerability to adversarial perturbations and, subsequently, work through a strategy to enhance its resilience through data augmentation and hyperparameter optimization. 
-
-# Related Works 
-ResNet<d-cite key="resnet2015"></d-cite> is a convolutional neural network architecture introduced in 2015 that sought to overcome numerical instability issues in deep networks and simplify the complexity of architecture search. It achieved this by incorporating skip connections, essentially allowing the training procedure to dynamically determine the optimal number of layers for the network. ResNet is trained on the ImageNet dataset<d-cite key="imagenet2014"></d-cite>, a popular benchmark in object category classification with 1,000 classes and millions of images. For our project, we use ResNet-18, a version of the original ResNet-34 model that is 18 layers deep, and TinyImageNet, a smaller version of ImageNet with around 100,000 images and 200 classes. This is largely for computational ease. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/resnet.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 1. ResNet-18 Architecture
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/tinyimagenet.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 2. Sample Images from TinyImageNet
-</div>
-
-The brittleness of many deep neural networks for computer vision, including ResNet, is well documented. For example, adding a tiny amount of random Gaussian noise, imperceptible to the human eye, can dramatically affect the accuracy and confidence of a network. In fact, we can optimize over the input image to generate small, non-random perturbations that can be used to alter the network's prediction behavior arbitrarily, a vulnerability that applies to a variety of networks<d-cite key="brittleness1"></d-cite><d-cite key="brittleness2"></d-cite>. 
-
-In this project, we investigate two small perturbations: adding random Gaussian noise and modifying the colors of a small subset of pixels. We use hyperparameter search to fine-tune ResNet-18, aiming to create a network robust to these perturbations without compromising significantly on accuracy. Specifically, we examine general hyperparameters like batch size, learning rate, number of frozen layers, and more. The ultimate goal is to define a straightforward and resource-efficient strategy for mitigating brittleness that can potentially be extended to other architectures and domains. 
-
-# Methodology
-## Baseline Model 
-The out-of-the-box ResNet18 model is pretrained on ImageNet, achieving about 55% accuracy on the ImageNet validation set. TinyImageNet is a subset of ImageNet with fewer classes; there is a potential need for further fine-tuning of the out-of-the-box model to optimize performance. Thus, we start off by performing a simple hyperparameter grid search over batch size and learning rate. Each model is trained on the TinyImageNet training set, a dataset of 40,000 images (downsampled from 100,000 for computational ease) with 200 classes (roughly uniform class distribution). The baseline model is then selected based on accuracy on the TinyImageNet validation set, a uniformly balanced dataset of 10,000 images.
-
-## Generating Adversarial Perturbations
-Next, we use gradient descent to create adversarial perturbations. The first perturbation is adding a small amount of Gaussian noise. We try to maximize the probability of the input image belonging to a wrong class (the inverse of the standard cross-entropy classification objective) while also penalizing the magnitude of the noise. This approach is more efficient and controllable compared to attempting to add a random sample of Gaussian noise with the hope of inducing misclassification.
-
-<div class="row mt-3">
-    <div class="col-sm-4"></div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/noise_steps.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-4"></div>
-</div>
-<div class="caption">
-    Figure 3. Noise added to image during each step in a sample gradient descent path for the first perturbation
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm-3"></div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/noise_examples.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-3"></div>
-</div>
-<div class="caption">
-    Figure 4. Sample images and their Gaussian-perturbed, misclassified versions
-</div>
-
-The other perturbation is randomly selecting a small subset of pixels (0.5%) and adjusting their color until the image is misclassified by the baseline model. A gradient descent approach that maximizes the probability of the input image belong to a wrong class is used to implement this perturbation; however, it is much more sensitive to initialization and can require retries, making it less resource-efficient. 
-
-<div class="row mt-3">
-    <div class="col-sm-4"></div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/pixel_steps.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-4"></div>
-</div>
-<div class="caption">
-    Figure 5. Noise added to image during each step in a sample gradient descent path for the second perturbation
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm-3"></div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/pixel_examples.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-3"></div>
-</div>
-<div class="caption">
-    Figure 6. Sample images and their pixel-perturbed, misclassified versions
-</div>
-
-We generate 11,000 adversarial examples using the Gaussian noise perturbation technique on the training examples that the baseline model correctly classifies. Of these adversarial examples, we use 10,000 of them to augment the training dataset (call it the augmented training set) and reserve 1,000 for hyperparameter optimization (call it the perturbed training set). We also generate 2,000 adversarial examples using the same perturbation technique on the validation examples that the baseline model correctly classifies. 1,000 of these are used for hyperparameter optimization (call it the perturbed validation set) while the rest are saved for out-of-sample evaluation (call it the hold-out validation set). 
-
-Note that we keep adversarial examples generated from the validation set out of the augmented training set to avoid lookahead bias. We want to avoid allowing the model to gain insights into the characteristics of examples that it will encounter in the validation set (since perturbed images are very similar to the original images), ensuring a more accurate assessment of the model's robustness and generalization capabilities.
-
-Finally, we generate an additional 500 examples using the pixel modification perturbation technique on the validation examples that the baseline correctly classifies (call it the out-of-distribution hold-out set). These examples are reserved for out-of-sample and out-of-distribution evaluation, assessing the model's ability to perform well on adversarial perturbations it has never seen before. 
-
-## Hyperparameter Optimization to Create a More Robust Model 
-Equipped with the augmented/additional datasets from the previous step, we start the process of model creation. The relevant metrics for selecting a model are original validation accuracy (derived from the original validation dataset from TinyImageNet), perturbed training accuracy, and perturbed validation accuracy. It is crucial to look at original validation accuracy to ensure that we are not creating robust models by compromising significantly on the original image classification task. In addition, accuracy on the perturbed train dataset tells us how well our model adjusts to the perturbation, while accuracy on the perturbed validation dataset provides an additional perspective by evaluating how well the model generalizes to perturbations on images it has never seen before. The same set of metrics is used in evaluating the final model on out-of-sample datasets, in addition to accuracy on the out-of-distribution hold-out set. 
-
-We examine how varying four different hyperparameters affects the robustness of ResNet-18. The first hyperparameter involves initializing the model with either weights from the baseline model or the default pre-trained weights. The next hyperparameter is how many layers of ResNet-18 are frozen during the training procedure. The last two hyperparameters are batch size and learning rate. It is important to note that we do not conduct a search over a four-dimensional hyperparameter grid for computational reasons. Instead, we fix some hyperparameters at reasonable default values while we vary over the other hyperparameters. Using the insights gleaned from this hyperparameter search, we proceed to train the final model. 
-
-## Comparing Models via Visualization
-Finally, we transform the feature maps generated for an input image into interpretable visualizations to better understand the learned representations within the models. These feature maps capture the activations of learned filters or kernels across different regions of the input images and are the basis for our analysis<d-cite key="simonyan2014"></d-cite>. Each residual block in a ResNet consists of multiple convolutional layers. We register forawrd hooks (a feature in Pytorch that allows us to register a function to be called each time a forward pass is executed through a layer) for each convolutional and linear layer in the network to capture and store the activations produced during the forward pass. The layers in the ResNet model are as follows: 
-
-```
-Layer: conv1, Activation shape: torch.Size([1, 64, 112, 112])
-Layer: layer1.0.conv1, Activation shape: torch.Size([1, 64, 56, 56])
-Layer: layer1.0.conv2, Activation shape: torch.Size([1, 64, 56, 56])
-Layer: layer1.1.conv1, Activation shape: torch.Size([1, 64, 56, 56])
-Layer: layer1.1.conv2, Activation shape: torch.Size([1, 64, 56, 56])
-Layer: layer2.0.conv1, Activation shape: torch.Size([1, 128, 28, 28])
-Layer: layer2.0.conv2, Activation shape: torch.Size([1, 128, 28, 28])
-Layer: layer2.0.downsample.0, Activation shape: torch.Size([1, 128, 28, 28])
-Layer: layer2.1.conv1, Activation shape: torch.Size([1, 128, 28, 28])
-Layer: layer2.1.conv2, Activation shape: torch.Size([1, 128, 28, 28])
-Layer: layer3.0.conv1, Activation shape: torch.Size([1, 256, 14, 14])
-Layer: layer3.0.conv2, Activation shape: torch.Size([1, 256, 14, 14])
-Layer: layer3.0.downsample.0, Activation shape: torch.Size([1, 256, 14, 14])
-Layer: layer3.1.conv1, Activation shape: torch.Size([1, 256, 14, 14])
-Layer: layer3.1.conv2, Activation shape: torch.Size([1, 256, 14, 14])
-Layer: layer4.0.conv1, Activation shape: torch.Size([1, 512, 7, 7])
-Layer: layer4.0.conv2, Activation shape: torch.Size([1, 512, 7, 7])
-Layer: layer4.0.downsample.0, Activation shape: torch.Size([1, 512, 7, 7])
-Layer: layer4.1.conv1, Activation shape: torch.Size([1, 512, 7, 7])
-Layer: layer4.1.conv2, Activation shape: torch.Size([1, 512, 7, 7])
-Layer: fc, Activation shape: torch.Size([1, 1000])
-```
-
-After obtaining these activations, we compute the average activation values across the channels (neurons) within a specified layer of interest. This process provides insights into which regions or patterns in the input images contribute significantly to the neuron activations within that layer. We then create heatmap visualizations based on these average activations, highlighting the areas of the input data that have the most substantial impact on the network's feature detection process. This allows us to gain valuable insights into how the network perceives and prioritizes various features across its layers, aiding in our understanding of the model's inner workings.
-
-We use this approach to compare the baseline model to the final model, aiming to identify significant differences in feature prioritization or the patterns detected at various layers.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/heatmap_sample.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 7. Heatmap visualization at four different layers when an image of a goldfish is passed into the ResNet
-</div>
-
-# Results and Discussion 
-## Baseline Model
-First, we perform a grid search over batch sizes ranging from 128 to 512 and learning rates ranging from 0.0001 to 0.01. 
-
-<div class="row mt-3">
-    <div class="col-sm-3"></div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/baseline1.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-3"></div>
-</div>
-<div class="caption">
-    Figure 8. Hyperparameter grid for baseline model
-</div>
-
-The results from the first hyperparameter search suggest that conservative learning rates and large batch sizes lead to good performance. Thus, we perform a finer grid search over batch sizes ranging from 256 to 512 and learning rates ranging from 0.00001 to 0.0001. 
-
-<div class="row mt-3">
-    <div class="col-sm-3"></div>
-    <div class="col-sm-6 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/baseline2.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-3"></div>
-</div>
-<div class="caption">
-    Figure 9. Finer hyperparameter grid for baseline model
-</div>
-
-Based on the results from the second hyperparameter search, we choose our baseline model to be ResNet-18 fine-tuned with a batch size of 256 and a learning rate of 0.00005. The baseline model achieves nearly 73% accuracy on the validation set, which is possibly due to the fact that TinyImageNet has less classes, so classification may be an easier task. 
-
-## Effect of Hyperparameters 
-#### Number of Unfrozen Layers
-Next, we evaluate how the number of unfrozen layers (up to 3) affects the robustness of the trained models, whose weights can either be initialized from the baseline model or from the pre-trained/default model (in the diagram below, `is_finetuned=True` corresponds to the baseline model). 
-
-<div class="row mt-3">
-    <div class="col-sm-7 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/final_line1.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-1"></div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/final_bar1.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 10. Performance of trained models as number of frozen layers and source of initialized weights changes 
-</div>
-  
-First, we observe that training for more epochs does not improve the metrics of interest. This implies that training for robustness can be computationally efficient. Next, we observe there is a substantial drop in accuracy for the perturbed datasets compared to the original validation dataset, which is to be expected. Pairing the accuracies for the perturbed datasets across hyperparameter combinations, we observe that they are tightly correlated, which implies that our models are effectively adapting to the perturbation. 
-
-One interesting observation to note here is that accuracies on the perturbed datasets are significantly higher for the model initialized with default weights (27% compared to 10%). An intuitive explanation for this is that we have deliberately engineered a brittle baseline model, so the model is in a region of the optimization landscape characterized by high accuracy but low robustness. If we want achieve high accuracy and high robustness, we may need to start from a less unfavorable position in the optimization landscape. 
-
-Finally, we observe that freezing some layers can enhance robustness for models initialized from the default weights at the cost of performance on the original task. This aligns with intuition, since allowing all the weights to vary can lead to overfitting, resulting in more brittle networks.  
-
-#### Batch Size 
-Next, we evaluate how batch size (ranging from 4 to 512) affects the robustness of the trained models. 
-
-<div class="row mt-3">
-    <div class="col-sm-7 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/final_line2.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-1"></div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/final_bar2.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 11. Performance of trained models as batch size and source of initialized weights changes 
-</div>
-
-We notice immediately that batch size has a considerable effect on robustness. For both the perturbed training set and the perturbed validation set, accuracies are markedly lower with large batch sizes (around 15%) and higher with small batch sizes (around 70%). As expected, this comes at the expense of lower performance on the original task, with original validation accuracy dropping 10% as the batch size decreases from 512 to 4. Depending on the use case, this may be an efficient tradeoff to make!
-
-#### Learning Rate
-Finally, we evaluate how learning rate (ranging from 0.00001 to 0.001) affects the robustness of the trained models. 
-
-<div class="row mt-3">
-    <div class="col-sm-7 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/final_line3.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm-1"></div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/final_bar3.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 12. Performance of trained models as learning rate and source of initialized weights changes 
-</div>
-
-Like batch size, learning rate significantly impacts robustness. The sweet spot for learning rate in terms of robustness seems to be around 0.00025, with original validation accuracy dropping as the learning rate becomes more conservative; a learning rate of 0.00025 leads to a 3% drop in performance. Like before, this may be a worthwhile tradeoff to make. 
-
-## Out of Sample Evaluation
-Using the insights gained from the hyperparameter search, we define the final model with the following hyperparameters:
-
-```
-is_finetuned=False
-num_unfrozen_layers=3
-batch_size=8
-learning_rate=0.00025
-```
-
-Of course, this is likely not the optimal hyperparameter combination, since we were not able to perform a full grid search. The results are as follows:
-
-<div class="caption">
-    Table 1. Performance of final model 
-</div>
-
-| Dataset                        | Accuracy |
-| ------------------------------ | -------- |
-| Original validation            | 0.522754 |
-| Perturbed training             | 0.569572 |
-| Perturbed validation           | 0.442720 |
-| Hold-out validation            | 0.485621 |
-| Out-of-distribution validation | 0.489786 |
-
-Original validation, perturbed validation, and hold-out validation accuracy are somewhat lower than the optimistic estimates derived from the hyperparameter search. However, we observe that we are able to achieve nearly 50% accuracy on the out-of-distribution validation set, which contains pixel modification perturbations that the model was never trained on, underscoring the robustness and adaptability of our model. 
-
-## Model Comparison 
-Lastly, we observe the progression of feature map representations: starting from basic visual elements such as edges and textures in the initial layers, to more complex patterns in intermediate layers, and culminating in sophisticated, high-level feature representations in the deeper layers. This layered evolution is integral to the network’s ability to analyze and recognize complex images.
-
-When comparing the baseline model to the final model, there are very few (if any) differences in the initial layers. By the intermediate and deeper layers, there are clear differences in which aspects of the images have the greatest activation. This observation aligns with the foundational principles of convolutional neural networks, where initial layers tend to be more generic, capturing universal features that are commonly useful across various tasks. As a result, the similarity in the initial layers between the baseline and final models suggests that these early representations are robust and essential for basic image processing, irrespective of specific model optimizations or task-focused training.
-
-However, the divergence observed in the intermediate and deeper layers is indicative of the specialized learning that occurs as a result of hyperparameter tuning in the final model. These layers, being more task-specific, have adapted to capture more complex and abstract features relevant to the particular objectives of the final model.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/comparison_original.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 13. Comparison of the heatmaps for both models when passed in an image of a frog
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-generating-robust-networks/comparison_perturbed.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 14. Comparison of the heatmaps for both models when passed in a perturbed image of a frog
-</div>
-
-# Conclusion and Next Steps
-In this project, we have undertaken a comprehensive exploration of enhancing ResNet through data augmentation with adversarial examples and straightforward hyperparameter tuning. Key highlights include the computational efficiency and simplicity of the employed technique, the resulting model's ability to adapt to both seen and unseen perturbations, and the capacity to finely control tradeoffs between robustness and accuracy thorugh the manipulation of diverse hyperparameters.
-
-There are many potential avenues for future exploration. One prospect involves expanding and refining the discussed techniques by continuing to explore the hyperparameter space, considering additional parameters or refining the search range. Additionally, applying this analysis to different architectures and domains could reveal further insights. Finally, broadening the scope of perturbations presents another avenue, offering the potential to develop more practical models tailored for real-world applications.
\ No newline at end of file
diff --git a/_posts/2023-11-09-interpretability-of-toy-tasks.md b/_posts/2023-11-09-interpretability-of-toy-tasks.md
deleted file mode 100644
index 5100e87b..00000000
--- a/_posts/2023-11-09-interpretability-of-toy-tasks.md
+++ /dev/null
@@ -1,422 +0,0 @@
----
-layout: distill
-title: The Effect of Activation Functions On Superposition in Toy Models
-description: An in-depth exploration of how different activation functions influence superposition in neural networks.
-date: 2023-12-12
-htmlwidgets: true
-authors:
- - name: Vedang Lad
-   url: "https://www.vedanglad.com"
-   affiliations:
-      name: MIT
- - name: Timothy Kostolansky
-   url: "https://tim0120.github.io/"
-   affiliations:
-      name: MIT
-
-bibliography: 2023-11-09-interpretability-of-toy-tasks.bib
-toc:
-  - name: Introduction to Superposition
-  - name: Superposition and Previous Work
-    subitems:
-      - name: Monosemanticity and Polysemanticity
-  - name: Motivation and Notation
-    subitems:
-      - name: Problem Specification
-      - name: Features
-        subitems:
-          - name: Sparsity
-          - name: Importance
-      - name: Dataset
-      - name: Network
-      - name: Loss
-  - name: Results
-    subitems:
-      - name: ReLU
-      - name: GeLU/SiLU
-      - name: Sigmoid
-      - name: Tanh
-        subitems:
-          - name: A Note on Sigmoid and Tanh
-      - name: SoLU
-      - name: Bringing Them All Together
-  - name: Conclusion
-
----
-
-
-## Introduction to Superposition
-
-
-With the recent emergence of grokking, mechanistic interpretability research has trended towards understanding how models learn <d-cite key="GrokNanda"></d-cite> <d-cite key="Pizza"></d-cite>. A central concept in this pursuit is superposition - a single neuron learning multiple "features."
-
-
-Features are the distinguishing properties of data points, the “things” that allow a neural network to learn the difference between, say, a dog and a cat, or a Phillip Isola and a Jennifer Aniston. Features are the building blocks that determine what makes one data point different from another. In many cases, features discovered by and encoded within neural networks correspond to human-understandable ideas. For example, in language models there exist embedding vectors describing relations like gender or relative size (e.g., the famous vec(“king”) - vec(“man”) + vec(“woman”) =~ vec(“queen”)<d-cite key="mikolov2013efficient"></d-cite>). It has been found that language models often map ideas like these to features within their parameters. Human understanding is not necessary though, as models can find and map features that exist beyond the perception of humans. This is an important part of the success (and dual inscrutability) of modern deep models, as these models can determine features and relationships within the data that allow them to model large datasets, like language, very well.
-
-In this work we:
-
-1. Explain Superposition, why it may occur, and why it is important
-2. Motivate a framework to easily study Superposition
-3. Study how activation functions affect Superposition
-
-
-## Superposition and Previous Work
-Let us elaborate further. If you were to train some neural network and visualize the weights - chances are you would see some mess that looks like this:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation.png" class="img-fluid" %}
-    </div>
-</div>
-
-You are likely looking at superposition!
-
-
-As hypothesized by <d-cite key="toymodels"></d-cite>, superposition is a phenomenon which occurs when the number of features being learned by a model is greater than the number of parameters in that model. To capture $n$ features with $m<n$ parameters, one can think of the neurons as "working overtime.” In other words, some of the neurons within a model encode information about more than one feature. The neuron exhibiting superposition operates as an information compressor. The caveat is that this compression is often unpredictable and hard to understand!
-
-
-In a linear model, i.e., one which maps inputs to outputs with only linear functions, there are fewer parameters than the features it tries to represent, so it can only represent the top $m$ features. How then do neural networks use compression and map back to $n>m$ features using only $m$ parameters? The answer is non-linearity. Clearly, the activation function is key to understanding how superposition occurs - unexplored by other work in the field. <d-cite key="elhage2022solu"></d-cite> explores the activation function in transformer MLP, but not in the setting we present here.
-
-But why do we care about Superposition? Why spend time studying this?
-
-
-While it may seem tangential, Superposition sheds important insights on Large Language Models (LLMs)! While LLMs are billions of parameters large, this is still not enough for a one-to-one mapping to “features" on the internet. Therefore LLMs also MUST exhibit superposition to learn. We focus our current work on the $\textit{bottleneck superposition}$ regime, but <d-cite key="incidental"></d-cite> has shown that the picture is far more complicated than presented in <d-cite key="toymodels"></d-cite>. Namely, varying the initialization can change how superposition unfolds. To normalize across experiments, we initialize all weights using the Xavier norm, as outlined by <d-cite key="xavier"></d-cite>. However, this is certainly a limitation of our presented work. A more rigourous analysis of superposition with activation functions would explore it outside the contex of the bottleneck regime. We leave this for future work.
-
-
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    From <a href="https://distill.pub/2017/feature-visualization/">Distill Blog</a>, "Feature visualization allows us to see how GoogLeNet trained on the ImageNet dataset, builds up its understanding of images over many layers.
-</div>
-
-
-Previous research, as detailed in <d-cite key="toymodels"></d-cite>, has predominantly explored superposition within the confines of toy models utilizing the Rectified Linear Unit (ReLU) activation function. However, to extend these findings to contemporary neural networks, it is crucial to investigate the influence of different activation functions on superposition. Different activation functions provide different ways for a model to use superposition to its advantage.
-
-
-So you train a neural network - what happens at the neuron level?
-There are three possibilities. As the network trains each neuron has three choices:
-
-
-1. The neuron chooses not to encode the “features”
-2. The neuron chooses to dedicate itself to one feature
-3. The neuron chooses to encode multiple features
-
-
-(We anthropomorphize - The neuron doesn’t choose to do anything - there is no free will - you are born into a loss landscape and an optimizer telling you what to do.)
-
-
-In linear models, each neuron is limited to representing only the most significant features (2), discarding others (1). Conversely, superposition, enabled by non-linear activation functions, adopts a more inclusive approach (3), trying to encode multiple features per neuron and learning efficient representational shortcuts.
-
-
-While ReLU bears similarity to the Gaussian Error Linear Unit (GeLU) used in modern GPT architectures, a deeper understanding of how different nonlinear activations impact superposition can provide crucial insights. Such understanding is key to unraveling the complex mechanisms through which neural networks utilize non-linearities, a cornerstone in the broader narrative of neural network interpretability.
-
-### Monosemanticity and Polysemanticity
-To connect to existing literature (2) and (3) above are given the names monosemanticity and polysemanticity. We will also follow this notation going forward.
-
-
-To describe further, the idea of superposition in neural networks leads us to two distinct types of neuron behaviors: monosemanticity and polysemanticity.
-
-
-Monosemantic neurons are those that specialize in a single, distinct feature, acting as dedicated detectors. This characteristic is often observed in the intermediate layers of architectures like Convolutional Neural Networks (CNNs), where neurons become adept at recognizing specific patterns, such as curves or colors.
-Polysemantic neurons do not align with just one feature but engage with multiple features simultaneously, offering a broader and more nuanced understanding of the data. This trait is essential for handling complex, high-dimensional datasets but comes at the cost of reduced interpretability.
-
-
-## Motivation and Notation
-
-
-Our work extends the work done in <d-cite key="toymodels"></d-cite> by examining how the changing of the activation function on toy model networks affects the behavior and interpretability of these networks. <d-cite key="toymodels"></d-cite> uses the canonical ReLU activation function to add non-linearity to two-layer models to analyze how superposition occurs within small networks. They did not generalize their work to other activation functions, which we find, result in **distinct** new phenomenon. Our work compares the ReLU function with five other common activation functions: GeLU, SiLU, Sigmoid, Tanh, and SoLU. We hope that generalizing the phenomenon across activation functions can push the toy dataset to be in closer to realistic ML settings.
-
-
-### Problem Specification
-
-The models in this experiment will be learning how to replicate a length-$n$ vector of inputs in the range $[0, 1]$ with a compression to a length-$m$ embedding (where $n>m$). The model will then use the length-$m$ embedding to recreate the length-$n$ input, using a non-linear activation function to allow for superposition.
-
-We will run two variations of the experiment. One variation of the experiment will involve compressing inputs of size $n=10$ to an embedding of size $m=5$. This experiment aims to see how superposition occurs across many features which are encoded in a bottleneck with half the number of spots as there are features. The second variation of the experiment will involve compressing inputs of size $n=2$ to an embedding of size $m=1$. This experiment aims to understand precisely how the model encodes the second "extra" feature in a variety of settings.
-
-To set up this experiment, we need to create a dataset that allows for superposition to occur and that also allows for interpretability of the superposition. To motivate this further, we begin with a careful discussion of features.
-
-
-### Features
-
-
-Features are the salient “things” that a neural network learns to differentiate inputs <d-cite key="features"></d-cite>.
-
-
-Technically, features are the properties which neural networks try to extract from data during learning to compress inputs to useful representations during inference. Although features can map to human-understandable concepts (e.g., dog ears), they can also represent properties of the data that are not immediately apparent to the human brain. To experiment with superposition, we need to encode features in a way that we can understand. In other words, we do not want our experimental model to learn features that we are unaware of. This would make it hard for us to interpret how the model maps features in the data to embeddings within its parameters, consequently obscuring how superposition works. To this aim, we must generate features within the training set for our model which are simple and understandable to us a priori. Similar to <d-cite key="toymodels"></d-cite>, we use as each input a vector with entries drawn independently from a uniform distribution over $[0, 1]$. Making each entry independent of the others enforces that each entry is its own (artificial) feature with no correlation to the other features.
-
-
-Here we define two important augmentations that we used in the dataset to simulate real-world features: sparsity and importance.
-
-
-#### Sparsity
-
-
-Sparsity is a measure of how often a specific feature is present in a dataset. A feature is characterized as “sparse” if it only appears in a small fraction of the inputs to the model. Similarly, features that are “dense” appear in many of the inputs. We will also use the term 'density', which is the complement of sparsity, defined as $1-S$.
-
-
-Specifically, a feature with a sparsity of $S \in [0, 1]$ has a probability $S$ of being expressed in any given input. If we have $S=0$, this means that the feature is expressed in every input, whereas if we have $S=0.5$, this means that the feature is expected to be expressed in about half of the inputs.
-
-
-In our experiment, we train models at different sparsities to capture how sparsity affects superposition.
-
-
-#### Importance
-
-
-Not all features are created equal!
-
-
-Some features are more useful than others in determining relevant information about inputs. For instance, when building a dog detector - capturing features related to dogs’ faces are extremely important! A model would need to pick up salient features of dogs, perhaps floppy ears and snouts. Other features, like the grass a dog is sitting on or a frisbee in a dog’s mouth, may not be as useful for detecting a dog. The varying degrees of usefulness among features are encapsulated in the concept of "importance".
-
-
-In the context of feature detection by a neural network, importance plays a role in modulating which features are encoded within the embedded layers of the network. In the context of the superposition hypothesis, if one feature has more importance than another feature, then it would be inefficient for the network to map both features equally within the embedding; allocating more weight to the feature with greater importance would be more valuable to the network in minimizing error.
-
-
-In our experiment, we give each input feature a different importance to allow the models to differentiate between them. We will examine when and how the model justifies mapping multiple features of differing importances to the same neuron, i.e., we will observe the superposition of features with differing importances.
-
-
-### Dataset
-
-
-To run this experiment, we will synthetically generate data that has desired sparsity and importance properties.
-
-
-Each input $x$ will be a vector of length $n$. Each element $x_i$ in the vector will be drawn independently from the other elements in the uniform range $[0, 1]$. As discussed before, we can now synonymously refer to each of these elements as features, given their independent generation. (We will refer to them as features from this point onwards.)
-
-
-Each feature $x_i$ in the vector has a relative importance to each of the other features $x_{j\ne i}$. The importance of feature $x_i$ is $I_i = r_I^i$ where $r_I\in(0, 1)$ is a constant describing the relative decay of importance between neighboring features. This attribute of the data will be implemented in the loss function (see below for more details).
-
-
-We will train separate models for each of the varying levels of sparsity. For an input $x$ with sparsity $S$, each feature $x_i$ will take on its “true” value, a uniformly distributed number, with a probability of $1-S$ and will otherwise be set to 0 with a probability of $S$.
-
-
-Below is a visualization of two batches of inputs with respective sparsities $S=0.5$ and $S=0.99$.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Each column of the plots represents a feature vector of length 20. Each batch has size 100, corresponding to the number of columns in the plots. Notice how the changing in sparsity affects the feature density.
-</div>
-
-### Network
-Below are the architectures of the base (linear) and experimental (non-linear) models that we are using in this experiment. Of particular note is the activation function $\mathbb{f}$, which we will substitute using the aforementioned activation functions.
-
-| Linear Model               | Activation \( $\mathbb{f}$ \) Output Model |
-|:---------------------------:|:------------------------------------------:|
-| $$h = Wx$$                   | $$h = Wx$$                                   |
-| $$ x' = W^T h + b $$   | $$x' = f(W^T h + b)$$          |
-| $$x' = W^T Wx + b$$  | $$x' = f(W^T Wx + b)$$          |
-
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder.png" class="img-fluid" %}
-    </div>
-</div>
-
-We create an autoencoder - compressing down to induce polysemanticity. This maps $x$ to a direction in a lower-dimensional space, represented by $$h = Wx$$. Each column of $W$ corresponds to a lower-dimensional representation of a feature in $x$. To reconstruct the original vector, $W^T$ is used, ensuring clear feature representation correspondence. This structure results in a symmetric matrix $W^TW$ and allows for clear visualization of the weights. They visually allow for the determination of the presence of superposition.
-
-
-### Loss
-
-
-Sparsity, Importance and Our Network come together in the following loss function:
-
-
-$$
-   L = \sum_{i} \sum_{x} I_{i}(x_{i} - x'_{i})^{2} $$
-
-
-Motivated by <d-cite key="toymodels"></d-cite>, we use a standard MSE loss, where $x_i$ and $x_i'$ measure the absolute difference in the auto-encoding of the datapoint. The Importance factor, $I_i$ , describes how important the given reconstruction is. A smaller importance will allow loss minimization even with a poor reconstruction.
-
-
-## Results
-
-
-Below we present each activation function, along with plots depicting how training results in superposition at varying degrees of sparsity.
-
-For the $n=10, m=5$ experiment, we show the $W^TW$ matrix and neuron feature distribution at varying degrees of sparsity. The $W^TW$ matrix reveals which features are prioritized (shown by the diagonal terms) and any polysemanticity that occurs (shown by the off-diagonal terms). The neuron feature distribution shows how each of the $m=10$ features are mapped to each of the $n=5$ embedding dimensions. This can aid in understanding under what conditions polysemanticity arises and how it occurs under each condition of sparsity.
-
-For the $n=2, m=1$ experiment, we show a phase diagram. This phase diagram shows how the second "extra" feature of the length-2 input vector is encoded. There are three options: not encoded at all (only the first feature is encoded), encoded in superposition with the first feature, and encoded as the only feature (the first feature is not encoded).
-
-### ReLU
-
-The ReLU (Rectified Linear Units) activation function is a piecewise-linear function, a simple non-linearity that allows models to use superposition of features. ReLU was the only activation function used in <d-cite key="toymodels"></d-cite>, so our work with the ReLU function was primarily to verify the results from their work and create a baseline for our subsequent experiments.
-
-The following are the $W^TW$ matrices and feature-neuron mappings:
-<div class="caption">
-    ReLU $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu.png" class="img-fluid" %}
-    </div>
-</div>
-
-As per the results in <d-cite key="toymodels"></d-cite>, the ReLU model focuses on the most significant features in the low sparsity regime (generally resulting in monosemanticity), while relying on superposition in the high sparsity regime (polysemanticity). With weaker signals for the most important features in the high sparsity regime, the model encodes multiple features in each neuron activation to minimize error of the sparse signals. Notably, the ReLU model uses antipodal pairs in the mapping of features to encode multiple features to single neurons. This can be seen as a light-colored diagonal entry within $W^T W$ and a corresponding dark-colored off-diagonal entry within the same column. This antipodal mapping of features is a method that the model uses to compress more than one feature to one neuron. This antipodal mapping is more interpretable than other kinds of polysemanticity which occurs in subsequently-described activation functions which “speckle” multiple features into a single neuron, making it more difficult to determine how the superposition occurs in that model.
-
-
-The following is the phase diagram of the ReLU models:
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu.png" class="img-fluid" %}
-    </div>
-    <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" %}
-    </div>
-</div>
-In regimes of high sparsity (i.e., below $1-S=0.1$ on the phase diagram above) the ReLU models are highly polysemantic for all relative feature importances, reflecting an inability to encode features with a sparse signal. In regimes of low sparsity, the model generally embeds the more important of the two features. This result mirrors the phase diagram in <d-cite key="toymodels"></d-cite> as expected.
-
-### GeLU/SiLU
-
-The GeLU (Gaussian Error Linear Units) and SiLU (Sigmoid Linear Units) activation functions are very similar to one another, and as a result produced very similar experimental results. Both functions are akin to a "smoothed out" version of the ReLU function, i.e., they have no discontinuities. The GeLU has recently been popularized as the activation function of choice in many transformers, including BERT <d-cite key="Devlin2019BERTPO"></d-cite> and GPT <d-cite key="gpt"></d-cite>. The GeLU is differentiable for all $x$ - and has a smoother curve than the SiLU (Swish) activation. <d-cite key="elhage2022solu"></d-cite> found that in the setting of transformers, the GeLU was less interpretable than the SoLU. This may be the case after having many linear layers activation - but with a single layer this is not the case.
-
-<div class="caption">
-    GeLU $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    SiLU $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu.png" class="img-fluid" %}
-    </div>
-</div>
-
-The GeLU and SiLU models exhibit similar kinds of superposition in their weight matrices. With increasing sparsity, superposition of features does happen, but it is more “strict” than the ReLU model, generally mapping at most two features to any single neuron. In each of the polysemantic neurons, though, there is one feature that dominates, suggesting that these activation functions enforce sparsity in their activations. There are also many antipodal pairs of features within these models, reiterating the behavior that exists in the ReLU models (also found in <d-cite key="toymodels"></d-cite>).
-<div class="row mt-0 l-page">
-    <div class="col-sm mt-2 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-2 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="row mt-0 l-page">
-    <div class="col-6 mx-auto mt-2 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" %}
-    </div>
-</div>
-
-The above phase diagrams of the GeLU and SiLU models show a marked difference from that of the ReLU model (earlier), despite the similar shapes of these three activation functions. The GeLU and SiLU models exhibit significant monosemanticity at high degrees of sparsity, unlike the ReLU, which results in near-complete polysemanticity for sparsities higher than $S=0.9$. This differnce may reflect SiLU's and GeLU's better fit as an activation for picking up the signal in sparse feature representations, making the case for GeLU and SiLU as more interpretable activation functions within larger models.
-
-### Sigmoid
-
-The Sigmoid function is a smooth activation function with an output range of $(0, 1)$. This maps directly to the desired range of values that the model is trying to replicate.
-<div class="caption">
-    Sigmoid $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid.png" class="img-fluid" %}
-        
-    </div>
-</div>
-
-The Sigmoid model exhibits superposition in all neurons as soon as the  sparsity is non-zero, as can be seen from the “speckling” of non-zero off-diagonal terms in $W^T W$. This is a difference from the ReLU/GeLU/SiLU models, for which the superposition “leaks” into the least significant encoded features at low, non-zero sparsities and eventually affects all features at higher sparsities. This low-sparsity superposition may occur because the Sigmoid function strictly maps to $(0, 1)$, with increasingly large pre-activation inputs necessary to map to values close to 0 and 1. As such, the model may be “speckling” the off-diagonal values in an attempt to “reach” these inputs which are close to 0 and 1.
-
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid.png" class="img-fluid" %}
-    </div>
-    <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" %}
-    </div>
-</div>
-Despite differences in the occurrence of polysemanticity, the ReLU and Sigmoid models exhibit very similar phase diagrams, reflecting an inability to encode multiple features at sparsities above $S=0.9$ (i.e., below $1-S=0.1$ on the phase diagram). As discussed above, this may be caused by the vanilla sigmoid activation's inability to "reach" target values close to 0 or 1.
-
-### Tanh
-
-The Tanh function is another smooth activation function, but it results in significantly different behavior from the Sigmoid (despite being a linear mapping of the Sigmoid). 
-<div class="caption">
-    Tanh $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh.png" class="img-fluid" %}
-        
-    </div>
-</div>
-
-With the Tanh activation function, the models prioritize the most important features regardless of sparsity. This behavior is possibly attributed to the range that the Tanh function maps to $(-1, 1)$, while the target range of input values in this experiment are $[0, 1]$. This behavior is similar to that of a linear model (i.e., no activation function) which exhibits no capability to use superposition, but the phase diagram reveals subtle differences from the linear model results.
-
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh.png" class="img-fluid" %}
-    </div>
-    <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" %}
-    </div>
-</div>
-
-Although nearly performing as the linear model would, only encoding the most important feature, there is some difference to the linear model along the boundary between features, as can be seen around the importance of 1. This reflects the model's ability to use non-linearity to perform superposition.
-
-#### A Note on Sigmoid and Tanh
-
-Despite similarities in the S-like curvature of the Sigmoid and Tanh activation functions, the Sigmoid model exhibits superposition, whereas the Tanh model exhibits nearly zero superposition. A key difference between the two functions is the fact that the Sigmoid function maps inputs to a range of $(0, 1)$, while the Tanh function maps inputs to a range of $(-1, 1)$. This difference is significant in our experiment, as our experiment uses models to recreate random vectors with elements in the range $[0, 1]$. The range of the Sigmoid function matches this range, while the range of the Tanh function which matches this range only occurs for non-negative inputs to the Tanh function. In other words, the $(-\infty, 0)$ input domain (which maps to the range $(-1, 0)$) of the Tanh function remains useless for prediction of values which should be in the range $[0, 1]$. Therefore, the tanh function empirically acts like a linear function (i.e., no activation layer).
-
-
-### SoLU
-
-The SoLU (Softmax Linear Units) activation function is based on the work from <d-cite key="elhage2022solu"></d-cite>. 
-$$ Solu(x) = x * softmax(x) $$
-SoLU is a function for which the activation of each neuron is dependent on all the other neurons within its own layer. This is significantly different from all the other activations that we tested, as the activations of neurons with the other functions are independent of the other neurons within the same layer. In other words, all the other activation functions are univariate while the SoLU is multivariate. Similar to other approaches like L1 regularization, the SoLU amplifies neurons with relatively large pre-activations and de-amplifies neurons with relatively smaller pre-activations. This behavior pressures the model to be more monosemantic (and therefore more interpretable in some settings), as discussed in <d-cite key="elhage2022solu"></d-cite>. 
-
-<div class="caption">
-    SoLU $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu.png" class="img-fluid" %}
-    </div>
-</div>
-
-In our experiment, the SoLU model results in non-zero superposition of all features with all degrees of sparsity. This may be attributed to the way that the SoLU “forces” activations to be sparse, i.e., the activations result in a “winner-takes-all” behavior due to the way that the Softmax function works. This is not a useful property for prediction of a vector of independently-drawn values, as the input vectors are unlikely to be peaky, i.e., the SoLU does not quite fit the purposes of its task.
-
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu.png" class="img-fluid" %}
-    </div>
-    <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" %}
-    </div>
-</div>
-
-As seen in the heatmap plot above, the SoLU activation results in very polysemantic behavior. This function is not precisely fit for its task of recreating given vectors and likely results in using polysemanticity to attempt to pass information about inputs forward. Curiously, the SoLU models have preference for the more important feature in the low sparsity regime. 
-
-### Bringing Them All Together
-<div class="caption">
-   Sparsity vs Dimensions Per Feature 
-</div>
-<div class="l-page">
-  <iframe src="{{ 'assets/html/2023-11-09-interpretability-of-toy-tasks/file.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-
-The diagram above depicts a variation on the two experiments explained thus far. In this experiment $n=200$ features were compressed to $m=20$ features and the loss function was tweaked to give uniform importance $I_i = 1$ to all features. This was done to determine how each activation functions compresses features in different sparsity regimes without the influence of feature importance.
-
-On the y axis, the plot depicts a metric (dimensions per feature) that measures the number of dimensions a model dedicates to each feature. In other words, a point with a y-value near 1 represents a model that dedicates one dimension of its embedding space to one feature, whereas a point with a y-value near 0.25 represents a model that represents four features at each dimension.
-
-The plots are generally consistent with the analysis from the previous experiments. Many of the activations result in superposition in the low-density/high-sparsity regime, and increases in sparsity result in increases in the polysemanticity of the model (i.e., the dimensions per feature decrease). Consistent with the other experiments, SiLU and GELU perform very similarly. The Sigmoid and SoLU activations pack nearly 20 features per dimension at high sparsities. The Tanh activation exhibits behavior similar to the linear model, neatly packing one dimension with one feature, a result that is mirrored in the previous experiments. Similar to the results in <d-cite key="toymodels"></d-cite>, we see "sticky" behavior of the ReLU activation function at 1 and 0.5 dimensions per feature. This can be explained by the phenomenon of "antipodal pairs" discussed in <d-cite key="toymodels"></d-cite>. None of the other activation functions that we tested exhibit this behavior - which is striking since this is a well-studied effect for the ReLU activation function. This may be because the ReLU activation function is the only one that is not smooth, and therefore has a differentiable behavior than the other activation functions. 
-
-
-## Conclusion
-
-Our investigation into the effects of various activation functions reveals that significant changes occur in model behavior depending on the chosen function. This finding underscores the ability to modulate the degree of superposition through the selection of activation functions, highlighting yet unexplored degrees of freedom in model design. This line of inquiry goes seamlessly with considerations of how neural networks are initialized and trained, suggesting these as promising future research directions.
-
-Our work is limited by the breadth of activation functions that we tested, though. Further iterations on each of the activation functions (e.g., tweaking the Sigmoid function to map to the range $(-\epsilon, 1+\epsilon)$) could prove fruitful in getting better performance from the models. Furthermore, while writing this blog, <d-cite key="incidental"></d-cite> published a new key insight related to the importance of initialization in superposition, which we do not explore here. Despite this, we have learned valuable insights about the effects that our set of activation functions can have on superposition.
-
-Pursuing enhanced interpretability, however, does not come without its challenges. Specifically, striving for transparency and understandability in neural network models raises concerns about the potential for deception. Despite these challenges, our work aims to develop neural network models that are more interpretable, transparent, and secure.
-
-
-{% bibliography --cited %}
-
diff --git a/_posts/2023-11-09-learning-interpretable-features-with-sparse-autoencoders.md b/_posts/2023-11-09-learning-interpretable-features-with-sparse-autoencoders.md
deleted file mode 100644
index c5d42627..00000000
--- a/_posts/2023-11-09-learning-interpretable-features-with-sparse-autoencoders.md
+++ /dev/null
@@ -1,219 +0,0 @@
----
-layout: distill
-title: Learning Interpretable Features with Sparse Auto-Encoders
-description:
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Sam Mitchell
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-learning-interpretable-features-with-sparse-autoencoders.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names so match the hash hashes
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Superposition Hypothesis
-  - name: Sparse Auto-Encoders
-  - name: Research Question
-  - name: Codebase
-  - name: Language Models
-  - name: Conclusion
----
-
-## Introduction
-
-The field of Machine Learning is becoming increasingly promising as humanity endeavors to create intelligent systems, with models outperforming humans on many tasks. As models become increasingly capable, its important that humans are able to interpret a model's internal decision making process to mitigate the risk of negative outcomes. While significant progress has been made on interpreting important parts of models like [attention heads](https://transformer-circuits.pub/2021/framework/index.html) <d-cite key="elhage2021mathematical"></d-cite>, it's also the case that hidden layers in deep neural networks have remained notoriously hard to interpret.
-
-
-## Superposition Hypothesis
-
-One hypothesis for why it can be challenging to interpret individual neurons is because they are simultaneously representing multiple concepts. One may wonder why a network would have its neurons learn to represent multiple concepts. At a first glance, this approach to encoding information feels unintuitive and messy. The key idea comes from the Johnson–Lindenstrauss lemma: In $n$ dimensions, you can have at most $n$ pairwise orthogonal vectors, but the number of pairwise "almost orthogonal" vectors (i.e. cosine similarity at most $\epsilon$) you can have is exponential in $n$. This enables a layer to encode for many more concepts than it has neurons. So long as each neuron is only activated by a sparse combination of concepts, we can reconstruct these concepts from a given activation with minimal interference between the concepts, since they are "almost orthogonal". This hypothesis is known as **[superposition](https://transformer-circuits.pub/2022/toy_model/index.html)** <d-cite key="elhage2022superposition"></d-cite>, and offers an explanation for why neurons have been observed in practice to be polysemantic.
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition.png" %}
-<div class="caption">
-  Diagram depicting a larger model with disentngled features and a lower dimensional projection simulating this larger network using polysemanticity. Source <d-cite key="elhage2022superposition"></d-cite>
-</div>
-
-
-## Sparse Auto-Encoders
-
-Since deep neural networks are strongly biased towards making neurons polysemantic during training, humans might try to understand the model's decision making process by "unwrapping" the network into the sparse features that the neurons in some particular layer are simulating. To do this, a concept called a Sparse Auto-Encoder (SAE) is used. An SAE is similar to a normal autoencoder, with two main differences: (1) the encoding layer is larger than the neuron layer, often by a factor of 4x. (2) the loss function penalizes not only for the MSE loss, but also for the sparsity of the encoder matrix, frequently represented as L1 loss. A sparse autoencoder lets us learn a sparse representation for a vector, but in a higher dimensional space. SAEs were first proposed in a [blogpost](https://www.lesswrong.com/posts/z6QQJbtpkEAX3Aojj/interim-research-report-taking-features-out-of-superposition) by Lee Sharkey in December 2022, and in September 2023 more research was published on SAEs, both by a group of [independent researchers](https://arxiv.org/abs/2309.08600) <d-cite key="cunningham2023sparse"></d-cite> and by [Anthropic](https://transformer-circuits.pub/2023/monosemantic-features/) <d-cite key="bricken2023monosemanticity"></d-cite> demonstrating that not only can SAEs be learned at a specific layer, but the features they learn are human interpretable.
-
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE.png" %}
-<div class="caption">
-  Diagram depicting an SAE architecture for a transformer language model. Source <d-cite key="cunningham2023sparse"></d-cite>
-</div>
-
-
-## Research Question
-
-This inspired a new idea: what if we could take a neural network, unwrap each layer into a larger, sparse, interpretable set of features, and then learn a sparse weight matrix connecting all pairs of two consecutive feature layers? This would mean that we could take a neural network, and transform it into a new neural network simulating the old neural network, with the nice property that the computations are sparse and hopefully interpretable.
-
-The main question we wish to explore is: Can we unwrap a deep neural network into a larger sparse network and learn sparse weights between consecutive feature layers without losing performance?
-
-
-## Initial Mathematics
-
-Let's begin by looking at $L_1$ and $L_2$, two consecutive layers in a deep neural network with ReLU activations. Let $W$ and $b$ be the matrix and bias respectively that connects these two layers. Then we have 
-
-$$
-L_2 = \text{ReLU}(W L_1 + b)
-$$
-
-We create autoencoders such that
-
-$$
-L_1 = D_1 \text{ReLU}(E_1 L_1 + e_1) \equiv D_1 F_1
-$$
-
-$$
-L_2 = D_2 \text{ReLU}(E_2 L_2 + e_2) \equiv D_2 F_2
-$$
-
-where $D_i$ is the decoder for layer $i$, $E_i$ and $e_i$ are the weights of the encoder and encoder bias, and $F_i$ is the feature vector. 
-
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram.png" %}
-<div class="caption">
-  Biases excluded from diagram for clarity. The hockey sticks on top of $F_1$, $L_2$, and $F_2$ indicate that a ReLU is applied to get the activations at that layer. If our autoencoder is good (which it should be), we have $L_1=L_1'$ and $L_2=L_2'$.
-</div>
-
-Thus we have
-
-$$
-\begin{align}
-F_2 &= \text{ReLU}(E_2 L_2 + e_2) \\
-&= \text{ReLU}(E_2 \text{ReLU}(W L_1 + b) + e_2) \\
-&= \text{ReLU}(E_2 \text{ReLU}(W D_1 F_1 + b) + e_2).
-\end{align}
-$$
-
-In general, an approximation of the form
-
-$$
-F_2 = \text{ReLU}(W_2 F_1 + b_2)
-$$
-
-would be pretty terrible since we cannot easily approximate a double ReLU function with a single ReLU function. However, because of the way $F_1$ and $F_2$ are created from $L_1$ and $L_2$, the relationships are actually very sparse in nature, so we will try to learn the approximation above. Perhaps there is a clever initialization that will allow us to learn this more easily.
-
-If we just ignored the inside ReLU in the definition of $F_2$, then we'd have 
-
-$$
-F_2 = \text{ReLU}(E_2 W D_1 F_1 + E_2 b + e_2)
-$$
-
-which suggests the following could be a good initialization for our learned weight $W_2$ and bias $b_2$.
-
-
-$$W_2 = E_2 W D_1$$
-
-$$b_2 = E_2 b + e_2$$
-
-While this initialization seemed reasonable at the start of the project, it turned out that during training this results in a local minimum, and you can actually get much lower loss if you randomly initialize $W_2$ and $b_2$.
-
-## Codebase
-
-To answer this main question, the first step was to build out a [codebase](https://drive.google.com/file/d/1_0g_Qq76AqJByCrj_i-tYr76KPeAfIem/view?usp=sharing) that had all the implementations necessary to run experiements to explore this question. The codebase was developed from scratch to ensure I understood how each part of the code worked. 
-
-### Model
-The first part of the code trains a four layer neural network to classify MNIST images. After training we got a validation loss of 0.09 and a validation accuracy: 0.98, indicating the model does well. For clarity, all losses described in this section will refer to loss on the validation set.
-
-### SAEs
-Next, two autoencoder architectures are implemented, one that learns both an encoder and decoder, and one that learns only an encoder as its decoder is tied as the transpose of the encoder. Empirically, the tied autoencoder seemed to perform better and achieved an L1 (sparsity) loss of 0.04928, and an L2 (MSE) loss of 0.03970. Seeing these numbers close in magnitude is good, indicating that the model is neither penalizing too much nor too little for L1 sparsity loss.
-
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder.png" %}
-<div class="caption">
-  For a random input: The top diagram depicts neuron activations (blue) and reconstructed neuron activations from the SAE (orange), indicating the SAE has low L2 loss and reconstructs the input well. The bottom diagram depicts the feature activations for the same input, showing they are sparse. Notably, 38/64 of the neuron activations have magnitude above 0.3, but only 7/256 of the encoded features have magnitude above 0.3.
-</div>
-
-### Feature Connectors
-Then, a feature connector was implemented, which learns the matrices $W_2$ and $b_2$ descibed above mapping one layer to another layer. The inputs are the set of all feature $i$ activations and the outputs are the set of all feature $i+1$ activations, allowing us to gradient descent over loss (which consists of L1 sparsity and L2 MSE) to optimize $W_2$ and $b_2$. The L1 (sparsity) loss was 0.02114 and the L2 (MSE) loss: 0.03209, indicating that there is a good tradeoff between L1 and L2 penalty.
-
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights.png" %}
-<div class="caption">
-  Weights matrix connecting neuron layer 1 to neuron layer 2. This is a mess. 2205 weights have magnitude greater than 0.1.
-</div>
-
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights.png" %}
-<div class="caption">
-  Weights matrix connecting encoded features in layer 1 to encoded features in layer 2. This is nice and sparse. 458 weights have magnitude greater than 0.1.
-</div>
-
-Below is what the feature connector matrix looks like after each epoch of training.
-
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_connector1_2.gif" %}
-
-### Simulating the Feature Network
-Finally, we replace neuron connections with feature connections. This means that when we pass an input through the network, we immediately encode it as a feature and propogate it through the feature connector weights, skipping the neuron layer weights. In this network, removing two neuron to neuron layers and substituting them with feature to feature layers results in a decrease from 97.8% accuracy to 94% accuracy, which is pretty good considering we made our network much sparser.
-
-Next, I tried to visualize the features using a variety of methods (both inspired by a class lecture and a [Distill blogpost](https://distill.pub/2017/feature-visualization) <d-cite key="olah2017feature"></d-cite>). Unfortunately, I did not find the features to be much more interpretable than the neurons for the MNIST dataset. Still, our results are cool: we can take a network, and with only a fraction of the parameters maintain comparable performance.
-
-## Language Models
-
-I shared these results with Logan Riggs, one of the [independent researchers](https://arxiv.org/abs/2309.08600) <d-cite key="cunningham2023sparse"></d-cite> who published about SAEs in October 2023. Excited about the possibility, we collaborated to see if we could achieve the same results for language models, anticipating that the learned features might be more interpretable. We and a couple other collaborators published a [blogpost](https://www.lesswrong.com/posts/7fxusXdkMNmAhkAfc/finding-sparse-linear-connections-between-features-in-llms) showing that the learned features in Pythia-70M are indeed interpretable, and there are cool relationships! (the remainder of this section is adapted from that blogpost)
-
-Below we show some examples of sparse linear feature connections. For the curious reader, additional examples can be found [here](https://comet-scorpio-0b3.notion.site/More-Examples-ceaefc95cc924afba318dca1da37d4a4?pvs=4). 
-
-### OR Example
-In Layer 1, we have:
-
-$$OF_{30} = 0.26IF_{2797} + 0.23IF_{259} + 0.10IF_{946}$$
-
-where OF is output feature (in MLP_out), and IF is input feature (in Residual Stream before the MLP)
-
-Below is input feature 2797, activating strongly on the token “former” 
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former.webp" %}
-<div class="caption">
-  This is 5 examples. For each example, the top row of words are feature activation e.g. token "former" activated 9.4. The bottom blank row is: if we removed this feature, how much worse does the model get at predicting these tokens? e.g. Soviet is 5.5 logits worse when the model can't use this "former" feature.
-</div>
-
-Below is input feature 259, activating strongly on the token “old”
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/old.webp" %}
-
-Below is input feature 946, activating on the token “young”
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/young.webp" %}
-
-In the output feature, we see the tokens former, old, and young all activate, with young activating about half as strongly as “former” and “old” as we would expect from the weight coefficients.
-
-$$OF_{30} = 0.26IF_{former} + 0.23IF_{old} + 0.10IF_{young}$$
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former_old_young.webp" %}
-
-We can view this computation as a weighted logical OR. Output Feature 30 activates on former OR old OR young.
-
-### Negative Weight Example
-In Layer 1, we have:
-
-$$OF_{505} = 0.68IF_{3021} -0.21IF_{729}$$
-
-where OF is output feature, and IF is input feature.
-
-Below is input feature 3021, activating strongly on tokens like “said” which in almost all cases appear not after a quote.
-
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/all_said.webp" %}
-
-Below is input feature 729, activating strongly on tokens like “said” when they appear shortly after a quote.
-
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_quotes.webp" %}
-
-Below we see the output feature activates on tokens like “said” that have no prior quote tokens. We’ve “subtracted out” with a large negative weight, so to speak, the examples where “said” appears after a quote, and now the feature only activates when “said” appears without any prior quotes.
-
-$$OF_{505} = 0.68IF_{(\text{"said" in many contexts})} -0.21IF_{(\text{"said" after quotes})}$$
-
-{% include figure.html path="assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_no_quotes.webp" %}
-
-We can view this computation as a weighted logical AND. Output Feature 505 activates on A AND ~B. In the case where A is a superset of B, this is the complement of B e.g. I have the set of all fruits and all yellow fruits, so now I can find all non-yellow fruits.
-
-## Conclusion
-
-Our exploration into interpreting neural networks using Sparse Auto-Encoders has shown promising results. The ability to unwrap the layers of a neural network into a more interpretable, sparse representation without a significant loss in performance supports the superposition hypothesis. Even if the features were only interpretable on some architectures/datasets, I am optimistic that Sparse Auto-Encoders will not only make deep neural networks more interpretable, but they will also allow for quicker parallelized inference since each output feature will depend on a small fraction of the total possible input features. 
-
-I'd like to thank everyone who has contributed to my deep learning education this semester. I have learned a tremendous amount and really enjoyed working on this project. 
-
diff --git a/_posts/2023-11-09-molecule_generation.md b/_posts/2023-11-09-molecule_generation.md
deleted file mode 100644
index 800b0c38..00000000
--- a/_posts/2023-11-09-molecule_generation.md
+++ /dev/null
@@ -1,191 +0,0 @@
----
-layout: distill
-title: An empirical evaluation of autoencoders and diffusion models for 2D small-molecule generation
-description:
-  We examine the efficacy of autoencoders and diffusion models for generating 2D molecules with certain small-molecule properties. In particular, we evaluate the success of both models in creating new molecules, containing only CHONPS atoms, and only single, double, and aromatic bonds. Secondarily, a natural question that followed was investigating the efficacy of different manners of encoding molecular data for training models - specifically, we trained with both molecular fingerprints and adjacency matrices (derived from graph embeddings of molecules). We find that small autoencoder models are successful in generating both pseudo-fingerprints and pseudo-adjacency matrices that are similar to simple small molecules’ fingerprints and adjacency matrices, but they were not able to produce ‘convincing’ simple organic molecules from the fingerprint or adjacency matrices. We find that diffusion models were considerably faster and more lightweight than autoencoders, and were generated molecules that were quantitatively closer in structure to real chemical structures than the auto-encoders were able to produce.
-date: 2022-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Shorna Alam
-    affiliations:
-      name: Course 6-3 and 18, MIT
-  - name: Srinidhi Narayanan
-    affiliations:
-      name: Course 6-14 and 18, MIT
-
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-molecule_generation.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Data
-  - name: Methods
-  - name: Autoencoder
-    subsections:
-    - name: RDKFingerprints
-    - name: Adjacency Matrices
-    - name: Autoencoder Results - RDK Fingerprints 
-    - name: Autoencoder Results - Adjacency Matrix
-  - name: Diffusion Model
-
-  - name: Observations
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-## Introduction
-
-Applying deep learning techniques to 2D molecule generation is an interesting and challenging problem in the field of cheminformatics, with applications in drug discovery, materials science, and other areas of chemistry. The problem is broad in scope, since there is a variety of molecular data, representations of the generated molecules, and model frameworks or generation pipelines. Autoencoders and diffusion models are two major types of generative models. The first learns a latent distribution from actual data points and then samples from this space to produce a novel output. Diffusion models work by progressively adding noise to input data, learning the correspondence between inputs and random noise, and then working backwards from a new sample of random noise by “undoing” the noise.
-
-## Data
-
-We use the QM9 dataset, described here. This dataset has been used extensively for cheminformatics research. The dataset contains the molecular structures and coordinates (2D and 3D) of ~134,000 organic molecules. Each molecule is represented as a set of atoms with their respective spatial (cartesian) coordinates. The dataset also contains a comprehensive set of chemical properties of each molecule. 
-
-We retrieved the SMILE (Simplified Molecular Input Line Entry System) notation for each molecule. The SMILE string uses ASCII characters to describe the atoms, bonds, and connectivity in a molecule, and is a standardized way to convey chemical information in textual form. The RDKit library hosts functionality for moving between SMILE strings and quantitative data (matrices, fingerprint vectors) as well as for visualizing molecules from the SMILE strings.
-
-Finally, we create a secondary, restricted subset of the data that contains only simple, organic molecules by eliminating strings containing the “#” (character representing triple bonds) or elements other than C, H, O, N, P, S. For the models dealing with fingerprints, since it is challenging to go from fingerprint to an explicit representation of a model, our evaluation metric was determining whether or not the generated molecules were, in fact, similar to the chosen “simple” subset of all of the data. For models dealing with adjacency matrices, it was quite easy to determine ‘validity’ of chemical structures visually; the appearance of standard chemical structures, such as rings of 5 and 6 carbons with side-chains, was used as an indication of success.
-
-## Autoencoder
-
-A very simple generative approach we can take is to use an autoencoder. Namely, we can train an autoencoder on molecules of interest — like our small-molecule-filtered dataset — and then sample from the learned latent space, decode the sample to generate a “molecule”, and evaluate the success in generation. 
-
-As mentioned in the introduction, it is worth considering possible data inputs and the sort of information a generative model trained on different inputs would carry. For our example, we consider the efficacy of RDKFingerprints and graph adjacency matrices as two possible input data types.
-
-**RDKFingerprints**
-
-Molecular fingerprints are a commonly used identifier in drug discovery and virtual screening. Different types of fingerprints encode different aspects of a molecule, but they all share the characteristic of preserving features of a molecule in a spatial fashion across a bit vector. A main feature of a fingerprint scheme is that vector similarity (which can be computed in many ways) corresponds to structurally or chemically similar molecules according to the features the fingerprint intends to encode for. 
-
-The Python RDKit library hosts functionality for handling two such types of fingerprints — a native RDK fingerprint and a Morgan fingerprint. We use the RDK fingerprint, and our data pipeline looks something like this:
-
-+ For a given molecule (via smile string) we generate a fingerprint (a 2048-long bit vector)
-
-+ A set of such fingerprints is used to train an autoencoder (whose structure is a 2048 unit input layer, 2 hidden layers of 64 units activated with ReLU activations)
-
-+ We sample from the latent space and use the decoder to produce a set of generated molecules, which we associate to sets of 10 “most similar real molecules” from the original (unfiltered) dataset. Similarity is calculated using the **Tanimoto Distance**, a notion of similarity between two vectors where the numerator is the number of 1s in common between the bit vectors, and the denominator is the number of 1s overall. 
-
-+ We compute the percentage of these 10 similar molecules that lie in the small-molecule-filtered dataset to evaluate the success of the autoencoder in understanding the structure of small molecules at the generation step. 
-
-
-This approach has the benefit of using a data source explicitly designed with the goal of similarity; computing close-distance vectors to the generated RDKit fingerprint carries genuine chemical meaning. 
-
-**Adjacency Matrices**
-
-Molecules lend themselves well to graph representations: atoms are like nodes, bonds are like edges. Thus, a molecule, if represented with a graph, can be associated to an adjacency matrix that carries information on interatomic and overarching molecular properties. 
-
-Adjacency matrices derived from the graph representation of a molecule, while not explicitly designed with the goal of molecule similarity in mind (as the fingerprint is), are historically successful in chemical deep learning, particularly as they are the workhorse of graph neural networks. The adjacency matrices available in the QM9 dataset can be decomposed into matrices at the single, double, and aromatic bond levels, so they carry a chemical information in additional to structural information. We implement a similar pipeline with adjacency matrix inputs, with a few changes:
-
-+ The adjacency matrix for a smile string is computed
-
-+ Unliked RDK Fingerprints, which are fixed in length, the size of the adjacency matrix varies with the size of the molecule; this makes use in a fixed-input length-autoencoder difficult, so we apply a padding approach, zero-padding all matrices to the size of the largest molecule’s matrix.
-
-+ The autoencoder is trained with these flattened, padded matrices.
-
-+ The generated reconstructions are rearranged into a matrix shape.
-
-+ The pseudo-adjacency matrix is then associated to a pseudo-molecule and corresponding pseudo-RDK fingerprint. Notably, the pseudo-molecule is created with some assumptions, such as the inclusion of only CHONPS atoms and only single bonds. Like the fingerprint framework, we find molecules in the original set with similar fingerprints to the reconstructed fingerprint, and compute the proportion of top-10 similar molecules that lie in the small-molecule set.
-
-
-## Autoencoder Results -- RDK Fingerprints
-
-The first and most notable result is that over repeated trials of sampling and reconstructing from the latent space for both types of data, the proportion of top-10 similar molecules that lie in the small-molecule restricted dataset is 1.0. That is, each of the 10 most similar molecules lies in the small-molecule set in both cases, over 5 batches of 10 samples each. 
-
-Some detailed results follow.
-
-First, here is the training curve with loss for the fingerprint autoencoder
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/training_loss_fngpts.png" class="img-fluid" %}
-
-An example of top-10 similarity for a sampled and reconstructed pseudo-fingerprint is shown here
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/top_10_similarity_chart.png" class="img-fluid" %}
-
-We notice that all the top-10 most similar molecules seem to be near each other, index-wise. This would make sense if the dataset is organized such that similar molecules share close indices. We can confirm this fact by inspecting a heatmap of 10 samples from a consecutive block in the dataset, like so: 
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title.png" class="img-fluid" %}
-
-We can see that indeed, closer molecules in the original dataset have higher similarity, so this result is as expected.
-
-## Autoencoder Results - Adjacency Matrix
-
-We then inspect the results of the adjacency matrix-based autoencoder training. First, the training curve with loss:
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case.png" class="img-fluid" %}
-
-Now, here is a top-10 similarity example for a pseudo-RDK fingerprint from a pseudo-adjacency matrix:
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity.png" class="img-fluid" %}
-
-We notice first, that the average similarity is much higher in this case, suggesting that even with the extra step of conversion and the assumptions we make about molecular form, the similarities are higher in this case. The second observation is that the top-10 similar indices are spread out farther than they were in the previous case, suggesting that the adjacency matrix to RDK fingerprint conversion moves around the similar molecules. 
-
-Finally, we include some photos of molecules generated in this process (we were unable to generate photos in the RDK fingerprint trained autoencoder, because we require an adjacency matrix to draw the molecules, and it is not straightforward to go from fingerprint to matrix):
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM.png" class="img-fluid" %}
-
-In the photo above, we can see the lefthand side tail as a recognizable part of an organic molecule, suggesting success with some types of bonds. In the photo below, we see that the autoencoder has learnt some additional aspects beyond basic single bonds (one of the validation images we show further below includes a similar red ring).
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM.png" class="img-fluid" %}
-
-Finally, the photo below while the least small-molecule-like in appearance, is interesting because it appeared many times in samples of 100 images (around 20 times) despite the latent space adjacency matrices being distinct. This could perhaps have to do with the process of converting from an adjacency matrix of reals (the result of latent space sampling) to an adjacency matrix of 1/0s, which we accomplish with median thresholding.
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM.png" class="img-fluid" %}
-
-For reference, a sample image from the "validation" true small-molecule dataset is shown below:
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex.png" class="img-fluid" %}
-
-
-**Diffusion Model**
-
-More recently, the use of diffusion models as an approach for generative modeling has become more common; as described in the introduction, denoising diffusion models operate by iteratively adding noise in a Markov manner to samples, learning the correspondence between inputs and the resultant noise, and then reverse-sampling from random noise to generate a new datapoint.
-
-In the past, as seen in the E3 paper, diffusion models have been applied to 3D adjacency matrices. In this case, we adapted an image-based diffusion model to noise and then de-noise data on adjacency matrices by using 2D adjacency matrices instead. 
-
-The following plots provide information about the training of the diffusion model on adjacency matrices. First, is a plot of the loss over 5 training epochs at LR 0.001; this model was trained on approximately 90K training samples, so the loss was quite low even after the first epoch:
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/shorna_1.png" class="img-fluid" %}
-
-The efficacy of diffusion models as a means of generating novel adjacency matrices is evident from the following visualizations of our results. First, here are two runs of the denoising process for the diffusion model, first on an extremely limited set of approximately 1000 matrices, and then on the entire 90K dataset. As seen, even with very few inputs, it was possible to identify the emergence of a ‘bright spot’ in the top left, which represents the actual adjacency matrix (which was later encoded into actual matrices).
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/shorna_2.png" class="img-fluid" %}
-
-In converting these adjacency matrices into actual molecule images, we aimed to visualize the backbones of these molecules (which is most informative as to the overall structure), so instead of focusing on determining atomic identity, we instead labelled all of them as carbons and proceeded.
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/shorna_3.png" class="img-fluid" %}
-
-Notably, in comparison to the molecules created by the autoencoder, these contain more of the structures which are characteristics of organic molecules, such as 5 and 6 carbon rings with molecules (potentially side chains of length >1) coming off. Indeed, it is possible to observe the progressively increased ordering of the adjacency matrices over times (as they become closer and closer to actual molecules), going from extremely disordered to closer and closer to something meaningful.
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/shorna_4.png" class="img-fluid" %}
-
-The application of diffusion models to the RDKFingerprints is shown here: for two separate runs, they look like this. Notably, in order to use an image classification network for RDKFingerprints, the fingerprints were stacked into an image which looks like a series of stripes. As evident, the diffusion model was able to produce such striped images, and their simplicity is a good indication that these are indeed good learnings of information about the filtered subset.
-
-{% include figure.html path="assets/img/2023-11-09-molecule_generation/shorna_5.png" class="img-fluid" %}
-
-**Conclusion**
-
-In this post, we used two different generative models and tested out two different encodings for information about molecular structure. In general, both models were able to learn and reproduce information about the chosen subset, but in general, the diffusion model was better at accurately reproducing molecules with ‘believable’ structures; as evident from the figures above, although the autoencoder did learn and create relatively sparse adjacency matrices, they lacked the hallmarks of small organic molecules (like rings structures). Further, although it was more difficult to discern quantitative information about the ‘accuracy’ of adjacency matrices, since they depend on larger structures than the RDKfingerprints, it was much easier to map adjacency matrices to actual (visualizable) structures. On the whole, the diffusion model was better at actually creating canonical molecular structures. Further, models trained on adjacency matrices, when converted post-generation to RDKFingerprints had higher accuracy, and adjacency matrices were generally easier to conceptualize, so we have preference for this data encoding.
\ No newline at end of file
diff --git a/_posts/2023-11-09-multilingual-representations-in-embeddings-models.md b/_posts/2023-11-09-multilingual-representations-in-embeddings-models.md
deleted file mode 100644
index 59626bfd..00000000
--- a/_posts/2023-11-09-multilingual-representations-in-embeddings-models.md
+++ /dev/null
@@ -1,421 +0,0 @@
----
-layout: distill
-title: "Cross-Lingual Fine-Tuning for Multilingual Text Embeddings"
-description: Exploring contrastively training text embeddings, and presenting a scalable, cheap and data-efficient method to train multilingual embedding models
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Spruce Campbell
-    url: "spruce.world"
-    affiliations:
-      name: MIT, CSAIL
-  - name: Will Hathaway
-    url: "willhath.com"
-    affiliations:
-      name: MIT, CSAIL
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-multilingual-representations-in-embeddings-models.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: The Embedding Task
-  - name: History and Background
-  - name: How Embeddings Models are Trained
-  - name: How Embeddings Models are Tested
-  - name: Method
-  - name: Results
-  - name: Conclusions
-  - name: Appendix
-    
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  // insert CSS here
----
-
-## Introduction
-
-Recently, [embeddings models](https://platform.openai.com/docs/guides/embeddings) have become incredibly popular as LLMs become more integrated into tools and applications. Embeddings models (specifically, Siamese encoder-only Transformers) are the state-of-the-art method in retrieval, an old problem in computer science. Embeddings are often used in settings like recommendation algorithms, similarity search, and clustering, and have recently found extensive use in Retrieval-Augmented Generation<d-cite key="rag"></d-cite>, assisting LLMs to be more knowledgeable and truthful. However, the best embeddings models are trained on only English data, which means they suffer greatly at applications in other languages, and are inaccessible to most of the world<d-cite key="mteb"></d-cite>. In this blog post, we summarize the history of embeddings research, detail the training regime of a modern embeddings model, present a new multilingual embedding benchmark, and investigate whether it is possible to fine-tune in multilingual capability to a pretrained monolingual model.
-
-Our central question is whether it is possible to learn new languages at the finetuning stage, using contrastive training on publicly available text pair datasets. If successful, it would mean that the encoder can learn a map from one language onto the embedding space of another. This implies that it is possible to approximate translation, at a conceptual level, with a transformation. We will study the results on various language pairs, and compare to a fully pretrained multilingual model.
-
-## The Embedding Task
-
-The aim of embedding text (or any other medium) is to convert human-readable information into vectors. This is useful, because while neural nets cannot process words, images, or sound, they can process vectors. Every NLP model thus has some form of embedding - GPTs, for example, have an embedding layer at the start that transforms input tokens into vector representations<d-cite key="gpt1"></d-cite>. GPTs need an embedding layer because the amount of unique tokens is huge (GPT-2, for example, has 50,257 possible tokens<d-cite key="gpt2"></d-cite>), and it is much more computationally efficient to work with lower-dimensional vectors (GPT-2 embeds these down to 768-dimensional vectors to compute with). 
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed.png" class="img-fluid" %}
-<div class="caption">
-    Embeddings models, as described by OpenAI
-</div>
-
-Because of this reduction of information, embeddings are also a form of compression. To turn a whole sentence (or paragraph) into a vector requires prioritising some characteristics and losing others, and we find that the most valuable thing to prioritise is semantic and contextual information. This leads to a very useful property: text pairs with similar meanings or usage patterns tend to have similar vector representations. For example, the vectors "cat" and "dog" are closer to each other than "cat" and "cucumber". Even more interestingly, as found in the Word2Vec paper, this property causes embeddings to have arithmetic consistency, as shown in the famous "king - man + woman = queen" example.<d-cite key="w2v"></d-cite> You can explore the Word2Vec embedding space in the interactive visualization below:
-
-<div class="l-page">
-  <iframe src="{{ 'assets/html/2023-11-09-multilingual-representations-in-embeddings-models/word2vec_demo.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-<div class="caption">
-    Visualisation of Word2Vec for the 250 most common English nouns
-</div>
-
-While this may seem abstract, embeddings have found usage in many downstream and commercial tasks, including:
-
-1. **Classification** - embeddings models classify sentences, such as in sentiment analysis between positive or negative airline reviews<d-cite key="sent"></d-cite>. 
-2. **Search** - models return nearest-embedded results to a search query, understanding synonyms and context<d-cite key="sgpt"></d-cite>.
-3. **Recommendation** - models return embeddings that suggest related items users may like, for example [clothes and jewellery](https://arxiv.org/pdf/1507.08439.pdf).
-4. **Clustering** - embeddings are used to cluster datapoints into smaller groups, with downstream algorithms like k-means<d-cite key="kmeans"></d-cite>.
-5. **Reranking** - embeddings are used to sort a list, such as one retrieved from a database, into most relevant items<d-cite key="rerank"></d-cite>.
-6. **Retrieval** - a query is embedded, and answers are selected by the closeness of their embedding.<d-cite key="beir"></d-cite>.
-
-### History and Background
-
-The first successful approaches to these problems were bag-of-words models. These are non-neural algorithms that work by ranking documents based on how many word occurrences they share. There were some improvements around this basic idea, for example Okapi BM25<d-cite key="bm25"></d-cite> includes a term for the expected likelihood of that word co-occurring.
-
-<table>
-  <tr>
-    <th>Sentence</th>
-    <th>about</th>
-    <th>bird</th>
-    <th>bird,</th>
-    <th>heard</th>
-    <th>is</th>
-    <th>the</th>
-    <th>word</th>
-    <th>you</th>
-  </tr>
-  <tr>
-    <td>About the bird, the bird, bird bird bird</td>
-    <td>1</td>
-    <td>3</td>
-    <td>2</td>
-    <td>0</td>
-    <td>0</td>
-    <td>2</td>
-    <td>0</td>
-    <td>0</td>
-  </tr>
-  <tr>
-    <td>You heard about the bird</td>
-    <td>1</td>
-    <td>1</td>
-    <td>0</td>
-    <td>1</td>
-    <td>0</td>
-    <td>1</td>
-    <td>0</td>
-    <td>1</td>
-  </tr>
-  <tr>
-    <td>The bird is the word</td>
-    <td>0</td>
-    <td>1</td>
-    <td>0</td>
-    <td>0</td>
-    <td>1</td>
-    <td>2</td>
-    <td>1</td>
-    <td>0</td>
-  </tr>
-</table>
-<div class="caption">
-    A table demonstrating bag-of-words calculation.
-</div>
-
-The first neural approaches to this problem actually used bag-of-words as a loss function, for example Word2Vec (2013)<d-cite key="w2v"></d-cite> used either continuous bag-of-words (CBOW) or skipgram loss to train a word embedding model. Word2Vec itself is a shallow two-layer neural network that is used to generate an embedding, which in the CBOW training regime is used to predict a word given a bag of surrounding words. The skipgram loss is similar, but weighs words depending on their proximity to the word we're trying to predict.  This word-prediction-from-embeddings task is a *key part* of training language models to have useful representations, and we'll see it again later. 
-
-Word2Vec had some incredible results, and was later improved by subsequent approaches<d-cite key="glove"></d-cite>, but word embeddings often failed due to the fact that words with multiple meanings had to share the same point in the embedding space. The sentences "I went to the bank to cash a check" and "I went to the bank to catch a fish" are obviously semantically unrelated, but the word "bank" will necessarily have to share an embedding, making the embedding itself likely meaningless.  
-
-<div class="l-page">
-  <iframe src="{{ 'assets/html/2023-11-09-multilingual-representations-in-embeddings-models/special_demo.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-<div class="caption">
-    Visualisation of Word2Vec struggling with polysemanticity in the "riverbank" example
-</div>
-
-To solve this, embeddings need to be generated in-context, and be able to support multiple meanings. There were some attempts at changing Word2Vec to support polysemanticity, such as  Multi-Sense Skip-Gram (MSSG)<d-cite key="mssg"></d-cite>, but they required hacky workarounds such as pre-programming an expected number of meanings for each word. 
-
-#### BERT
-
-BERT<d-cite key="bert"></d-cite> was arguably the beginning of the LLM revolution, as it showed for the first time that a single pretrained language model could be finetuned to support many different tasks downstream. It was essentially an embeddings model - trained again with the word prediction task, now with the context of words not weighted by proximity, but by a trainable position embedding that provided information that the model could use to predict long-term associations and causality. This fixed the polysemanticity problem described above. It can be used to produce both word-level and sentence-level embeddings, that proved extraordinarily useful for the embeddings tasks.
-
-##### BERT Training
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert.png" class="img-fluid" %}
-<div class="caption">
-    BERT architecture diagram
-</div>
-
-BERT (Bidirectional Encoder Representations from Transformers) is based on the Transformer architecture introduced by Vashwani et al. in 2017<d-cite key="attn"></d-cite>. The key differences were that BERT was allowed bidirectional context rather than left-side-only, that it did not include a decoder, and its masked language modeling and next sentence prediction training objectives. The bidirectional context is crucial for language modeling, since "The [MASK]" is much harder to predict than "The [MASK] colored fire engine", and in embeddings (unlike text generation) we have the whole original text available.
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm.png" class="img-fluid" %}
-<div class="caption">
-    BERT's Masked Language Modeling loss
-</div>
-
-MLM works by taking 15% of the text tokens that BERT sees and replacing them with a [MASK] token. The model's objective is to predict that masked word with its embedding, using the context from the surrounding tokens, and then it is trained on the cross-entropy loss between the predictions and the actual truth.
-
-BERT was also trained on the NSP (Next Sentence Prediction) objective. In training, the model is given a pair of input segments, and its task is to predict whether the second segment (segment B) follows the first one (segment A) in the original text or if they are randomly sampled and unrelated. The input is constructed by concatenating segment A, which is preceded by a special [CLS] token, and segment B, with a special [SEP] (separator) token in between. For example: "[CLS] Segment A [SEP] Segment B". BERT then produces a pair of embeddings: one for the [CLS] token at the beginning of the input and one for the [SEP] token that separates the two segments. These embeddings are then used to compute a binary classification. The intended effect is that [CLS] contains information about the overall meaning of the first sentence, and [SEP] contains information about the second. This is the first example of sentence embeddings, which are the key to how a modern embeddings model works.
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp.png" class="img-fluid" %}
-<div class="caption">
-    BERT's Next Sentence Prediction loss
-</div>
-
-BERT turns token inputs into embeddings for each token in its context window, which is 512 tokens long. We can choose to construct a single text embedding from this any way we like. There are several popular strategies for this "token pooling" problem. Reading the above, one may be tempted to take the [CLS] token's embedding. In practice, however, the [CLS] token embeddings proved to be slightly worse than just taking the average of all the individual token embeddings of the sentence<d-cite key="berthater"></d-cite>, and subsequent models such as RoBERTa<d-cite key="roberta"></d-cite> skipped the NSP training objective and actually performed slightly better. Why this is the case is an area of ongoing research, but as a matter of opinion, we personally suspect Shitao Xiao's work on RetroMAE<d-cite key="rmae"></d-cite> correctly diagnoses the issue, as demonstrated by their models' improved performance on benchmarks. The training losses described in that paper are more complex and outside the scope of this blog post, but it's worth a read if interested. 
-
-#### SBERT
-
-The final part of the story is Sentence-BERT<d-cite key="sbert"></d-cite>, and its addition of contrastive text-pair pretraining. This what turns BERT, a general language model, into a model that specifically generates text embeddings. Contrastive training was discussed at length in 6.s898; the core insight is that we can train an encoder model to have a useful representation if we train it to embed similar examples together, and dissimilar examples far apart. In Sentence Transformers, this is done by contructing a "Siamese BERT" network. There are two BERT models (or commonly two copies of the same model) that are each used to embed a text passage. Then, the loss is calculated by the following formula:
-
-$$
-\mathcal{L}_N = -\mathbb{E}_{X} \left[ \log \frac{f_k(x_t+k, c_t)}{\sum_{x_j \in X} f_k(x_j, c_t)} \right]
-$$
-
-This encourages the model to predict positive pairs (similar passages) as vectors with close to 1 similarity, and negative pairs close to 0. Similarity metrics include (Euclidean) distance, but most often used is cosine similarity. Negative pairs can either be "mined" with some heuristic such as bag-of-words, or simply sampled at random from other examples in the batch. Due to this, pretraining batch sizes for embedding BERTs are often huge, in the tens of thousands<d-cite key="gte"></d-cite>. 
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert.png" class="img-fluid" %}
-<div class="caption">
-    The Siamese BERT architecture
-</div>
-
-The reason two models are used is that many tasks see improved performance if there is a distinction made between "questions" and "answers". For example, searches and retrieval queries may not resemble the results they most need in meaning: "What is the the tallest building in Hong Kong" and "The International Commerce Centre" are not closely semantically related, but should be paired in search contexts. Because of this, we can train a "query" and "passage" model together as one giant network on a contrastive loss, and thus get a model that can take in both. 
-
-In practice, this improvement is rarely worth doubling the number of parameters, and so most papers simply re-use the same model for both queries and passages.
-
-## How Embeddings Models are Trained
-
-Putting all this together, we have the current standard recipe for training a modern embeddings model, in up to three stages:
-
-### 1. Pretraining
-
-It is valuable to start with a language model that has already learned some inner representation of language. This makes the embeddings task significantly easier, since the model must only learn to condense this inner representation into a single high-dimensional dense vector space. While it is possible to use more modern LLMs such as GPT or LLaMA for embeddings<d-cite key="sgpt"></d-cite>, they are fundamentally hampered because they cannot attend to context in both directions. Therefore, almost all state-of-the-art embeddings models still begin from the BERT models themselves, or their derivatives<d-cite key="gte"></d-cite><d-cite key="e5"></d-cite>. These are trained as described above, with an MLM and potentially NSP loss.
-
-### 2. Training
-
-Following Sentence-BERT, the model is trained contrastively. At this point, we choose a pooling strategy to convert BERT outputs into sentence embeddings. Many current papers choose to use average pooling<d-cite key="sbert"></d-cite><d-cite key="e5"></d-cite>, but max-pooling or taking the [CLS] token are occasionally used. Positive pairs are either handpicked from datasets such as search engine question-responses<d-cite key="msmarco"></d-cite>, or commonly generated from general text data, such as academic paper title-abstract pairs, Wikipedia page title-summaries and so forth<d-cite key="gte"></d-cite>. 
-
-### 3. Fine-Tuning
-
-It has also become common to fine-tune especially large embeddings models on higher-quality datasets, such as MS MARCO (Bing question-passage responses)<d-cite key="msmarco"></d-cite>, fact verification (e.g. FEVER)<d-cite key="fever"></d-cite>, and paraphrasing (e.g. Quora). This increases performance at desired tasks<d-cite key="bge"></d-cite>, and was the inspiration for our approach.
-
-## How Embeddings Models are Tested
-
-Similarly to how decoder LLMs have recently converged on being measured on the HuggingFace Open LLM Leaderboard, the currently ubiquitous benchmark for embeddings models is MTEB<d-cite key="mteb"></d-cite>. Presented in a 2022 paper, it contains 8 embedding tasks covering a total of 58 datasets. The tasks are:
-
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb.png" class="img-fluid" %}
-<div class="caption">
-    MTEB datasets
-</div>
-
-
-1. **Bitext Mining**:
-Inputs are two sets of sentences from two different languages. For each sentence in the first set, the best match in the second set needs to be found. This metric is commonly ignored in places such as the MTEB Leaderboard and in papers, because few multilingual models have been created.
-
-2. **Classification**:
-A train and test set are embedded with the provided model. The train set embeddings are used to train a logistic regression classifier, which is scored on the test set.
-
-3. **Clustering**: Involves grouping a set of sentences or paragraphs into meaningful clusters. A k-means model is trained on embedded texts. The model's performance is assessed using the v-measure, which is independent of the cluster labels.
-
-4. **Pair Classification**: Requires assigning labels to pairs of text inputs, typically indicating if they are duplicates or paraphrases. Texts are embedded and distances calculated using various metrics (cosine similarity, dot product, Euclidean, Manhattan). Metrics like accuracy, average precision, F1, precision, and recall are used.
-
-5. **Reranking**: Involves ranking query results against relevant and irrelevant reference texts. Texts are embedded using a model, with cosine similarity determining relevance. Rankings are scored using mean MRR@k and MAP, with MAP as the primary metric.
-
-6. **Retrieval**: Each dataset includes a corpus and queries, with a goal to find relevant documents. Models embed queries and documents, computing similarity scores. Metrics like nDCG@k, MRR@k, MAP@k, precision@k, and recall@k are used, focusing on nDCG@10.
-
-7. **Semantic Textual Similarity (STS)**: Involves assessing the similarity of sentence pairs. Labels are continuous, with higher scores for more similar sentences. Models embed sentences and compute similarity using various metrics, benchmarked against ground truth using Pearson and Spearman correlations. Spearman correlation based on cosine similarity is the main metric.
-
-8. **Summarization**: Evaluates machine-generated summaries against human-written ones. Models embed summaries, computing distances between machine and human summaries. The closest score, such as the highest cosine similarity, is used for evaluation. Metrics include Pearson and Spearman correlations with human assessments, focusing on Spearman correlation based on cosine similarity.
-
-We can see that MTEB represents many downstream users' desires as described earlier, but could be criticised for favoring cosine similarity as a distance metric for training. In either case, MTEB has demonstrated, and itself encouraged, some trends in research:
-
-### Scaling
-
-The MTEB paper itself, as well as the GTR<d-cite key="gtr"></d-cite> and Sentence-T5<d-cite key="st5"></d-cite> papers, suggested that model parameters are correlated with higher performance. We should expect that from intuition about GPTs and their scaling laws, larger models perform better<d-cite key="chinchilla"></d-cite>. 
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling.png" class="img-fluid" %}
-<div class="caption">
-    Figure 3 from MTEB demonstrating scaling vs. performance
-</div>
-
-However, if we extrapolate to more recent research , we find that the state-of-the-art models have failed to get bigger over time, and the highest-performance models are still under 1B parameters. This shows that embeddings is not as easily reduced to scaling laws as LLMs are. 
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale.png" class="img-fluid" %}
-<div class="caption">
-    MTEB score vs time for SOTA models. The size of the cross represents parameter count.
-</div>
-
-However, even these small models still train on hundreds of millions or billions of text pairs<d-cite key="gtr"></d-cite>, requiring thousands of GPU-hours to train. We can conclude that while parameter count may not be increasing, the overall compute requirements of training an embeddings model are getting higher, and it is no longer within the reach of all researchers to work on these models.
-
-
-### Multilingualism
-
-While MTEB is a multilingual benchmark, only a few tasks, namely STS, Classification and Bitext Mining, have multilingual versions. Combined with the abundance of English training data, this has led to every language except English, Chinese and Polish lacking a complete MTEB and thus lacking the benefits of state-of-the-art models.
-
-As in other subfields of NLP, multilingual performance is often an afterthought, and left by the wayside in pursuit of higher performance on English benchmarks, or exclusively in the domain of labs that can afford extra runs<d-cite key="mt5"></d-cite>. 
-
-## Method
-
-With these problems as our motivation, we aim to find out if it is possible to add multilingualism to an existing model without having to pretrain from scratch. This may be a step towards bringing the benefits of increased embeddings performance to languages that don't currently have a state-of-the-art model. Furthermore, if it is possible to add a new language to an existing model, this hints at the ideas that models do not necessary learn a representation based on a particular language, and that translation is easier than expected in the context of embeddings, modelable as a transformation of the representation space.
-
-To do this, we will take an existing model that has both monolingual English and multilingual variants, and use contrastive training to add in new languages without sacrificing English performance, by using publicly available text translation pairs. We call this approach Cross-Lingual Fine-Tuning (CLFT). We will attempt to create a model that performs on-par with the multilingual model in multiple languages, and on-par with the original model in English, which we will measure by completing with our own data a multilingual version of MTEB in all tasks. 
-
-### Model Choice
-
-We choose e5-base-v2 and multilingual-e5-base<d-cite key="e5"></d-cite> as our demonstration models. E5 is the highest-performing current open-weights model with both a mono- and multilingual version, and still holds the top spot in many less popular languages. Both models are the size of BERT, with 12 layers, 768-dimensional embeddings and a context window of 512 tokens. The only difference is that the multilingual model has a much larger vocabulary to support more languages, and uses the XLM-RoBERTa tokenizer, leading to about 60% more parameters. 
-
-This choice does produce a caveat in the rest of our post - since the BERT tokenizer of e5-base has been trained only on English data, it will be unable to tokenize text that is not also a possible English string. In practice, this means that any Latin or near-Latin speaking languages, such as French, German and Turkish, can be used, but the model cannot be finetuned to read unknown characters like Japanese or Arabic script. Any non-Latin characters will likely become an [UNK] token, which carries no information for the model to embed. We are confident that this is not a fatal flaw, though, since just as it is possible to train LLMs with unused vocabulary, such as Persimmon-8B<d-cite key="persimmon"></d-cite>, it is possible to train an embeddings model with a big unused vocabulary. In the case that this research proves useful, it would be easy to train a large English embeddings model with a multilingual tokenizer and fill in this extra vocabulary space afterwards in finetuning.
-
-### Benchmarking
-
-As described above, it is hard to use MTEB to test performance in non-English languages, due to the lack of available tasks. After investigating the source datasets, we know that this is because of a lack of data. In the interest of producing a universally fair test, especially for low-resource languages where quality data is not available, we opted to use synthetic data to create a multilingual MTEB test set, by using machine-translation to convert the English datasets into each language.
-
-
-<div style="margin-top: 0.5em; margin-bottom: 1em; padding: 1em; background-color: #f2f5f7; border-radius: 10px; font-size: 1rem">
-<i>Side note: We were fascinated to find that the state-of-the-art neural machine translation model is no longer GNMT<d-cite key="gnmt"></d-cite> or the Google Translate API, but in fact just GPT-4!</i>
-</div>
-
-
-We used GPT 3.5 to process ~200K test examples in each of the following languages: French, German, Spanish, Swahili, and Turkish. We selected these languages because of their presence on the No Language Left Behind (NLLB) text-pair dataset<d-cite key="nllb"></d-cite>, widespread usage, use of Latin alphabet, and varying degrees of similarity to English. We were particularly interested in Turkish because, while many words are near-Latin when written down, the language is from a completely different Proto-Turkic root, and appears much harder for native English-speakers to read. We were also interested in Swahili, as despite being one of the world's biggest languages, it is comparatively data-poor and so multilingual models often struggle to generalize to. We trained five models in total.
-
-As mentioned before, MTEB already contains some multilingual components, in the textual similarity, bitext mining and classification tasks. The bitext mining task in particular requires a cross-lingual model, so we will use it only on the final all-language model. The remaining tasks are clustering, retrieval, classification, re-ranking, STS, and summarization. For each task, we selected one dataset that would generalise well across languages. Given more time and compute resources, it would be easy to expand the dataset to a full synthetic multilingual MTEB. From now on, we refer to this benchmark as MMTEB (Multilingual Massive Text Embeddings Benchmark).
-
-Datasets and code for evaluation are available [HERE](https://github.com/mtybadger/mmteb-toolkit). 
-
-
-<table>
-  <tr>
-    <th>Task</th>
-    <th>Classification</th>
-    <th>Clustering</th>
-    <th>Retrieval</th>
-  </tr>
-    <tr>
-    <td>Dataset</td>
-    <td><a href="https://huggingface.co/datasets/AmazonScience/massive">MASSIVE</a></td>
-    <td><a href="https://huggingface.co/datasets/willhath/french-reddit-clustering">Reddit</a> and <a href="https://huggingface.co/datasets/willhath/spanish-twentynewsgroups-clustering">TwentyNewsgroup</a></td>
-    <td><a href="https://huggingface.co/datasets/sproos/scifact-fr">SciFact</a></td>
-  </tr>
-  <tr>
-    <th>Semantic Text Similarity</th>
-    <th>Summarization</th>
-    <th>Reranking</th>
-    <th>Pair Classification</th>
-  </tr>
-  <tr>
-    <td><a href="https://huggingface.co/datasets/mteb/sts22-crosslingual-sts/viewer/en?row=0">STS-22</a></td>
-    <td><a href="https://huggingface.co/datasets/sproos/summeval-tr">SummEval</a></td>
-    <td><a href="https://huggingface.co/datasets/sproos/mindsmall-tr">MIND</a></td>
-    <td><a href="https://huggingface.co/datasets/sproos/twitter-pairclass-fr">Twitter URL Corpus</a></td>
-  </tr>
-</table>
-<div class="caption">
-    Tasks and datasets in MMTEB.
-</div>
-
-### Training
-
-In CLFT, we initialize two instances of our base model, one of which is frozen, and the other is trained. We will refer to these as $f_s$ and $f_\theta$ for the static and trained model. The static model will be used to anchor our trained model to the initial representation. For each lanuage $l$, our data $X_l$, is composed of pairs of data points $(x_e, x_l) \in X_l$, where $x_e$ is a sentence in english, and $x_l$ is that sentenced translated to language $l$.
-
-We initially attempted to use the literature-standard InfoNCE<d-cite key='infonce'></d-cite> contrastive loss for $$ \mathcal{L}_{\text{eng}} $$ and $$ \mathcal{L}_{\text{cross}} $$, treating equivalent translation pairs as positive examples, and non-equivalent text pairs as negative examples. However, empirically this did not perform as well, which we suspect may be because of compute-based batch size limitations (e5 was trained on a comparatively huge batch size of 32,768<d-cite key='e5'></d-cite>). Because of this, we chose to use our own simplified cosine similarity objective.
-
-We give the model $$ f_\theta $$ the following goal: place $$ x_l $$ as close to $$ x_e $$ as possible, without changing where we place $$ x_e $$. This is crucial, because it forces the model to map the new language onto its existing representation. This is done with the following loss function
-
-$$
-\mathcal{L}(x_e, x_f) = \mathcal{L}_{\text{eng}} + \beta \mathcal{L}_{\text{cross}}
-$$
-
-Where:
-- $$ \mathcal{L}_{\text{eng}} = 1 - f_\theta(x_e) \cdot f_s(x_e) $$ represents the loss component for English text, with $$ f_\theta $$ as the dynamic model being trained and $$ f_s $$ as the static reference model.
-- $$ \mathcal{L}_{\text{cross}} = 1 - f_\theta(x_e) \cdot f_\theta(x_f) $$ represents the cross-lingual consistency loss, comparing the dynamic model's outputs for English and foreign text.
-- $$ x_e $$ and $$ x_f $$ are inputs for English and foreign text, respectively.
-- $$ \beta $$ is a coefficient to balance the influence of the cross-lingual consistency term.
-
-We ran each of our mono-lingual models on 400,0000 text pairs from the NLLB<d-cite key='nllb'></d-cite> dataset, with learning rate $$ lr = 1e-4 $$ and dynamic scaling $$ \beta = 0.4 $$.
-
-## Results
-We found interesting and surprising results across our chosen languages and tasks. The results in table format are available in the appendix.
-
-We can visualize these results in two graphs: comparing our approach to the baseline English model, and to the current state-of-the-art multilingual model.
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base.png" class="img-fluid" %}
-<div class="caption">
-    All tasks and languages vs. base model performance
-</div>
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi.png" class="img-fluid" %}
-<div class="caption">
-    All tasks and languages vs. multilingual model performance
-</div>
-
-We can see that the CLFT approach did extremely well on tasks like classification, pair classification and clustering, even beating the multilingual model itself. This is to be expected in particularly well-suited tasks, since a perfect monolingual model will always outperform a multilingual model at a set number of parameters. However, the model did not improve as strongly in retrieval and semantic textual similarity tasks. Additionally, we can see the model struggle most significantly in Swahili, the most distant language to its original English in our training set. Overall, we observed an average 5.5% relative improvement on the base model, taking us **49.8%** of the way to the performance of the multilingual model.
-
-We have some conjectures about the reason for this split, which relate to the theory of representation learning. Since our loss is purely on positive pairs, there is weaker enforcement of a shape of the embeddings space. It is therefore likely that our approach is degenerating the shape of the embeddings space, leading to more clustering and noisier local structure. This means that tasks that rely on broad-strokes embeddings, such as clustering, classification and so on, will benefit from this approach, whereas tasks that rely on fine-grained relative positioning such as retreival, reranking and STS will suffer. CLFT could thus be viewed as a trade-off between speed and ease of training, and noisiness of embeddings.
-
-We investigate this by performing a visual analysis of the embeddings after PCA dimension reduction. In the figure below, we see how different model represents the same text, after it has been translated. The texts were taken from the associated reddit clustering datasets for each language, and the labels in the diagrams are the name of the corresponding class of the datapoint. We see that the position of each embedding is roughly the same, which makes sense given our loss function. 
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words.jpg" class="img-fluid" %}
-<div class="caption">
-    Representations of the same text, translated to models's respecive languages
-</div>
-
-Additionally, the figure below demonstrates that we were mostly successful in our goal of keeping our trained models aligned with the underlying english model. We embedded the same, English text with each model and got an even tighter clustering. We see that the training on languages more similar to english, such as Spanish, did not alter the English represenations as significantly. Conversely, more distant languages, such as Swahili, led to further degradation of the embedding space.
-
-{% include figure.html path="assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english.jpg" class="img-fluid" %}
-<div class="caption">
-    Representations of English text, according to each language
-</div>
-
-## Conclusions
-Based on our results, we conclude that fine tuning for multilinguality is a cheap and viable alternative, especially when working with languages that do not have a large presence on the internet. While not an improvement over "true" multilingual models in general, CLFT can outperform multilingual models in scenarios where high-quality data is sparse, or in specific task categories (like clustering and reranking). 
-
-Additionally, we have made steps to introduce the first truly multilingual benchmark, for future embedding models to be evaluated against. All code and data for MMTEB assessment can be found [here](https://github.com/mtybadger/mmteb-toolkit)
-
-### Limitations and Next Steps
-Our experiment has several limitations, and there is plenty of room for extension:
-
-The fact that we used machine-translated English text for our benchmark poses potential issues. It's likely that the distribution of data that our translation model produces is not equivalent to that produced in the real world, meaning that our benchmark isn't as accurate as the English one is. This is hard to ameliorate, especially for languages lacking many large datasets. However, barring vast troves of previously undiscovered internet data being discovered, translations can serve as a useful stopgap, and an equalizer for these less available languages. Completing the MMTEB benchmark would be a valuable contribution to the field, and a path to more languages being represented in state-of-the-art models.
-
-In this paper, we only evaluated monolingual models, and did not study how the approach scales to multiple languages at once. Due to time and compute constriants, we were unable to try and train a "true" multilingual model, beyond just english and one other language. We believe that with further training, it may be possible to repeat the process above for multiple languages. 
-
-As mentioned in our results, CLFT can lead to noisy embeddings, which may decrease performance on particular tasks. A better distillation loss, or traditional contrastive loss with a much larger batch size, may help to regularize the data and resolve this issue.
-
-As previously mentioned, we could not explore non-latin characters, vastly reducing our set of potential languages. We believe that with the correct tokenizer and base model, this should be possible. Additionally, it's becoming possible to imagine a future of Transformers without tokenization, which would greatly help approaches like ours.
-
-Despite our models maintaining near perfect alignment with the base model on the english text pairs during training, we observed performance on the English MTEB decrease substantially. This suggests that the text pairs on NLLB do not fully capture the distribution of data seen during testing,which is something that could be improved upon with better translation datasets.
-
-## Appendix
-
-Here is a full table of our results:
-
-|                  | Classification | Summarization | Clustering | Retrieval |   STS   | Reranking | Pair Classification | Average |
-|------------------|----------------|---------------|------------|-----------|---------|-----------|---------------------|---------|
-| Spanish (e5-base)|     0.511      |     0.314     |    0.333   |   0.554   |  0.585  |   0.296   |        0.828        |  **0.489**  |
-| Spanish (e5-multi)|    0.635      |     0.301     |    0.336   |   0.655   |  0.629  |   0.243   |        0.848        |  **0.521**  |
-| Spanish (ours)   |     0.583      |     0.314     |    0.398   |   0.568   |  0.553  |   0.284   |        0.847        |  **0.507**  |
-| German (e5-base) |     0.522      |     0.307     |    0.328   |   0.560   |  0.236  |   0.293   |        0.812        |  **0.437**  |
-| German (e5-multi)|    0.637      |     0.313     |    0.346   |   0.648   |  0.491  |   0.230   |        0.840        |  **0.501**  |
-| German (ours)    |     0.602      |     0.320     |    0.393   |   0.546   |  0.273  |   0.332   |        0.841        |  **0.472**  |
-| French (e5-base) |     0.512      |     0.312     |    0.329   |   0.568   |  0.747  |   0.330   |        0.825        |  **0.518**  |
-| French (e5-multi)|    0.637      |     0.306     |    0.263   |   0.644   |  0.764  |   0.222   |        0.845        |  **0.526**  |
-| French (ours)    |     0.622      |     0.302     |    0.404   |   0.604   |  0.749  |   0.344   |        0.849        |  **0.554**  |
-| Turkish (e5-base)|     0.458      |     0.296     |    0.221   |   0.411   |  0.456  |   0.308   |        0.776        |  **0.418**  |
-| Turkish (e5-multi)|    0.639      |     0.304     |    0.318   |   0.631   |  0.601  |   0.258   |        0.827        |  **0.511**  |
-| Turkish (ours)   |     0.557      |     0.307     |    0.382   |   0.413   |  0.414  |   0.336   |        0.826        |  **0.462**  |
-| Swahili (e5-base)|     0.413      |     0.304     |    0.181   |   0.281   |  0.000  |   0.313   |        0.751        |  **0.321**  |
-| Swahili (e5-multi)|    0.528      |     0.303     |    0.166   |   0.527   |  0.000  |   0.257   |        0.822        |  **0.372**  |
-| Swahili (ours)   |     0.347      |     0.315     |    0.238   |   0.332   |  0.000  |   0.275   |        0.764        |  **0.325**  |
-| Average (e5-base)   |     0.483      |     0.307     |    0.279   |   0.475   |  0.405  |   0.308   |        0.799        |  **0.436**  |
-| Average (e5-multi)  |     0.615      |     0.306     |    0.286   |   0.621   |  0.497  |   0.242   |        0.836        |  **0.486**  |
-| Average (ours)      |     0.542      |     0.312     |    0.363   |   0.493   |  0.398  |   0.314   |        0.825        |  **0.464**  |
-
diff --git a/_posts/2023-11-09-multimodal-commonsense.md b/_posts/2023-11-09-multimodal-commonsense.md
deleted file mode 100644
index 26a6f4cc..00000000
--- a/_posts/2023-11-09-multimodal-commonsense.md
+++ /dev/null
@@ -1,244 +0,0 @@
----
-layout: distill
-title: Multimodal Commonsense
-description: 6.S898 project for analyzing and evaluating the commonsense reasoning performance of multimodal vs text-only models.
-date: 2023-12-11
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Vincent Lin
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-multimodal-commonsense.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Background
-  - name: Related Works
-  - name: Methods
-    subsections:
-    - name: Commonsense Benchmarks
-    - name: HellaSwag
-    - name: Text-Only Language Model
-    - name: Vision-Text Multimodal Model
-  - name: Experiments
-    subsections:
-    - name: Data
-    - name: Setup
-    - name: Results
-  - name: Conclusion
-    subsections:
-    - name: Ethical Implications
-    - name: Limitations
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-# Introduction
-
-In recent years, language models have been proven to be quite proficient in producing human-like text, computing somewhat semantically-meaningful and human-interpretable word and token embeddings, and generating realistic conversation. However, there is a vast distinction between mimicking human linguistics from data and forming an understanding of the world and its abstract connections from data. The latter describes the commonsense knowledge of a language model, or its ability to reason about simple relationships, interactions, and general logic of the world.
-
-With the advent and growth of large language models in recent years (and months), understanding the world and developing deeper underlying representations of physical and abstract concepts through text alone has become much more feasible and tractable. Yet, there is only so much someone or something can understand by simply reading about it. When evaluating the performance of language models in this context, does the language model simply mimic this knowledge or does it inherently possess it? One paradigm through which to formalize this is through a deeper categorization of common sense.
-
-In particular, physical common sense, or knowledge about the physical world and its properties, is fundamental knowledge for realizing the world and the interactions within it. Physical common sense is a naturally multimodal concept, though, that for humans requires a combination of several senses to perceive, as physical properties are manifested in multiple modalities. A lack of info in any modality may make an object visually ambiguous, or otherwise manifest some misunderstanding of an object. Can we expand the capabilities of language models by imbuing them with multifaceted input to expand its knowledge base beyond text alone?
-
-In this work, I focus on evaluating the physical commonsense reasoning ability of unimodal and multimodal models from text-based tasks under multimodal input. I specifically compare the performance of a text-only language model with a multimodal vision-language model and investigate (a) whether the multiple modalities of input in pretraining the multimodal model can have comparable performance to a text-specialized model, and (b) whether the supplementation of relevant image data at inference time boosts the performance of the multimodal model, compared to a previously text-only input.
-
-Intuitively, vision data should benefit the physical commonsense reasoning of a model by providing the inputs the additional feature of a physical manifestation. Here, I investigate whether image data truly gives deep learning models an additional dimension of representation to benefit its commonsense reasoning.
-
-
-# Related Works
-
-Several previous works evaluate language models on unimodal text-based commonsense reasoning. A number of common sense benchmarks for LMs exist, evaluating a variety of common sense categories <d-cite key="bisk2019"></d-cite><d-cite key="sap2019"></d-cite>, from which fine-tuning has shown to improve general commonsense capabilities of state-of-the-art models. Li et al., 2021 <d-cite key="li2021"></d-cite> analyzes the performance of the Gopher language model in zero-shot and few-shot learning with varying model sizes, finding that their LM performed relatively well in physical common sense, but worse in other common sense categories. Zhao et al., 2023 <d-cite key="zhao2023"></d-cite> investigates the downstream impact of LLMs’ “commonsense model” of the world on robot task generation; they find that using the LLM commonsense knowledge as a heuristic policy achieves better-reasoned decision-making, implying that LLM common sense can empirically benefit physical interaction with the world.
-
-The intersection between text and vision in models has also been explored in several works, though not in the context of commonsense reasoning. For example, text-to-image models have shown significantly greater improvement in improving & expanding the text encoder as opposed to a similar increase in size of the image diffusion model <d-cite key="saharia2022"></d-cite>, showing the effect of powerful text embeddings in image generation. Common sense benchmarks with multimodal inputs have also been created and demonstrate an increase in multimodal model performance after fine-tuning <d-cite key="yu2022pacs"></d-cite>, but they don’t investigate the empirical effect or the representational differences between utilizing additional modalities of data versus not.
-
-More generally, the effect of additional modalities of data on downstream performance is studied in Xue et al. 2022 <d-cite key="xue2023modality"></d-cite> in the context of crossmodal knowledge distillation, where they conclude that multimodal input isn’t unconditionally beneficial; multimodal teacher models maybe perform better than unimodal teachers, but students of multimodal teachers tend to perform better than those of unimodal teachers.
-
-
-# Methods
-
-## Commonsense Benchmarks
-
-It’s important to note that there are many distinguishing categories of commonsense knowledge. Physical common sense (e.g., a ball rolls down an incline instead of remaining still), social common sense (e.g., shouting at a person may incite fear), temporal common sense (e.g., pan-frying chicken takes longer than oven-roasting one), and numerical/logical common sense (e.g., basic arithmetic) are a few examples that all require different modalities of reasoning and may favor some models & architectures over others. Here I focus on physical common sense, since intuitively vision data may influence a model’s physical knowledge the most.
-
-Commonsense benchmarks can be further categorized into (a) multiple-choice evaluation, where given a short background prompt, a model must select the most reasonable option or continuation from a set of given options, and (b) generative evaluation, where a model must generate an answer or continuation to the prompt. Here, I will focus on multiple-choice evaluation, as multiple-choice benchmarks provide a more concrete and reliable metric for determining similarity to “human” judgment. To evaluate the commonsense performance of both the unimodal and multimodal models, the HellaSwag benchmark is used.
-
-
-## HellaSwag
-
-The HellaSwag benchmark <d-cite key="zellers2019"></d-cite> is designed to evaluate physical, grounded, and temporal common sense. Given a few-sentence-long story or prompt, the model must choose the correct continuation from four choices. The prompts are generated from (a) ActivityNet <d-cite key="caba2015activitynet"></d-cite>, a large-scale video benchmark for evaluating Human Activity Understanding containing annotations for segments of YouTube videos, and (b) WikiHow <d-cite key="koupaee2018wikihow"></d-cite>, a large-scale text summarization dataset. Data splits are provided by the HellaSwag benchmark, but only the train and validation splits are used here, as the test set labels are not public.
-
-Here, for evaluating the multimodal model, I use only the entries generated from ActivityNet, as each ActivityNet prompt has an associated source ID from which the original source video may be accessed. From the video, image data can be scraped to augment the multimodal model’s fine-tuning and inference. The image data generation process is described in more detail in a following section.
-
-Due to resource and time constraints, only a subset of this data was used for training and evaluation. Given the large size of the original HellaSwag benchmark, the sampled subset of the original data contains 10% of the original data. Each datum within the sampled dataset is sampled randomly from the original train/validation set, and each prompt within the sampled dataset is verified to have a publicly available video associated with it, i.e., the associated YouTube video is not private or deleted. Implications of this limitation are discussed further in the Limitations section below.
-
-<div>
-  <div class="row mt-3">
-      {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/hellaswag.jpg" class="img-fluid" %}
-  </div>
-  <div class="caption">
-      Figure 1. Example prompts from the HellaSwag benchmark. Image sourced from the original HellaSwag paper <d-cite key="zellers2019"></d-cite>.
-  </div>
-</div>
-
-## Text-Only Language Model
-
-### RoBERTa
-
-RoBERTa <d-cite key="liu2019roberta"></d-cite> is used for evaluation of text-only models for physical commonsense reasoning. I use RoBERTa for its established high performance in general natural language processing tasks while being relatively compact and fast to run. A dropout layer and a linear classification head is used with the RoBERTa model to generate the logits for label classification of each prompt.
-
-## Vision-Text Multimodal Model
-
-### CLIP
-
-The CLIP (Contrastive Language-Image Pre-Training) model is a multimodal vision and language model <d-cite key="radford2021clip"></d-cite>. It was introduced as a novel, simplified pretraining strategy utilizing the large amounts of public available data from the Internet to form (image, text) pairs, demonstrating high performance in downstream tasks such as OCR, geo-localization, and action recognition. Since CLIP utilizes natural language processing methods for processing text and image captions, it also boasts an impressive language model, making the model useful for both unimodal and multimodal tasks.
-
-In the experiments described below, the multimodal model is compared to the unimodal model via text sequence classification and text + vision sequence classification for determining the most likely ending to each HellaSwag prompt, so high baseline performance in both of these tasks is an essential starting point, which CLIP provides. Like for the RoBERTa model, a dropout layer and a linear classification head is used in conjunction with CLIP to perform the label classification for each prompt.
-
-
-### Image Data Generation
-
-To collect the supplementary vision data for fine-tuning and evaluating the multimodal model, an additional scraping script is used to collect the relevant image data for each HellaSwag prompt. As described before, each prompt in the HellaSwag benchmark is generated from an associated ActivityNet prompt. Each ActivityNet prompt contains a source ID for the corresponding YouTube video, as well as a time segment containing the start and end time (in seconds) for the relevant video annotation. Using this information, each text prompt can be supplemented with an additional image prompt via a frame from the corresponding YouTube video.
-
-A custom script is used to access each prompt’s corresponding YouTube video and scrape image data. The script works as follows:
-
-1. From a HellaSwag entry, obtain the source ID for the corresponding ActivityNet entry.
-2. From the ActivityNet entry, obtain the YouTube video source ID (to be used directly in the YouTube URL) and the time segment indicating the start/end time of the annotated clip.
-3. Download a low-resolution copy of the YouTube video via accessing the URL `https://www.youtube.com/watch?v={source_id}`. Here, we download the 144p resolution copy of each video.
-4. Capture a single selected frame from the video data. Note: the selected frame is determined by calculating the average between the video clip’s start and end time, then scraping the frame of the video at that timestamp. Implications of this frame selection are described in more detail in the Limitations section below.
-5. Save the frame as image data for multimodal fine-tuning.
-
-This pipeline is used on the (sampled) HellaSwag train, validation, and test sets so that image data is available for both fine-tuning of the multimodal model, as well as inference for evaluation.
-
-
-# Experiments
-
-## Data
-
-For fine-tuning and evaluation of the unimodal and multimodal models, a subset of the HellaSwag dataset is used, as already described above. Further summary of the sampled dataset can be found in Table 1.
-
-To prepare the data for Multiple Choice Classification, the data from each prompt must be preprocessed as follows. Each prompt in the HellaSwag dataset is broken into three components: `ctx_a`, which contains the first sentence(s) of the prompt, `ctx_b`, which contains the initial few words of the final sentence, and four `ending`s all stemming from the same `ctx_a` and `ctx_b` but each with different conclusions. This particular formatting of the data is important for the RoBERTa tokenizer, where each sequence within an inputted text pair must be a complete sentence. Each prompt then generates four text pairs of the form `(ctx_a, ctx_b + ending_i)` for each of the four endings. This allows for the multiple choice classification head to compute the most likely of the four endings, given the same context `ctx_a`, `ctx_b`.
-
-
-<div>
-  <div class="row mt-3">
-      {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary.jpg" class="img-fluid" %}
-  </div>
-  <div class="caption">
-      Table 1. Summary of sampled HellaSwag dataset.
-  </div>
-</div>
-
-## Setup
-
-The architecture of neither the RoBERTa nor CLIP are designed for sequence or multiple choice classification, so a separate linear classification head follows each of the unimodal RoBERTa, unimodal CLIP, and multimodal CLIP models.
-
-**Text-only fine-tuning**: The training and validation sets for fine-tuning are formatted and preprocessed as described above. To adjust the weights of the classifier and the core embedding model, each model is fine-tuned on the HellaSwag training data and evaluated during training on the validation data for 20 epochs. Since only the text prompt is inputted to CLIP here, only the CLIP text embedding is used for classification.
-
-**Text-image fine-tuning**: To fine-tune the multimodal CLIP model, the original training and validation datasets are augmented by adding each prompt's relevant corresponding image data (from the process described in the Image Data Generation section). The multimodal model is then fine-tuned on both the text prompts as before and the relevant image data simultaneously. With both text and image input, CLIP outputs a combined text-image embedding that is used for the classification head, instead of the text-only embedding from before.
-
-After fine-tuning, each model is evaluated on the withheld HellaSwag test dataset for classification accuracy. For both the text-only and text-image fine-tuning, I perform three total repetitions for each model and average the results in Figure 1.
-
-<div>
-  <div class="row mt-3">
-      {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/accuracy.png" class="img-fluid" %}
-  </div>
-  <div class="caption">
-      Figure 2. Accuracy results for each model, averaged across three runs.
-  </div>
-</div>
-
-## Results
-
-As shown in the accuracy results, the RoBERTa model performs the best, while the unimodal CLIP model performs worse, and the multimodal CLIP model only slightly better than the unimodal CLIP but still marginally worse than RoBERTa. RoBERTa likekly performs so well because of its generally high performance in other text-based tasks, and its bidirectional contextual embeddings allow for evaluation of a prompt/ending holistically. In this setup, the supplementary image data did not provide any significant empirical improvement to the multimodal model, as shown by the insignificant improvement in downstream performance when comparing the text-only to text-image CLIP models.
-
-However, I attempt to provide an explanation for this shortcoming through further investigation of the supplementary images. Below, I display the class activation map of the image data from a particular prompt to attempt to visualize why the additional modality of data had little effect on the classification distinguishability across the four endings of the prompt. Figure 2 shows the image (which is the same for all four endings) and the individual image attention masks generated from each ending corresponding to the following context: `A lady named linda, creator of paint along is demonstrating how to do an acrylic painting. She starts with a one inch flat brush and yellow and white acrylic paint. she ...`
-
-<div class="row mt-3">
-    {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/linda_paint0.png" class="img-fluid" %}
-
-    {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/linda_paint1.png" class="img-fluid" %}
-
-    {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/linda_paint2.png" class="img-fluid" %}
-
-    {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/linda_paint3.png" class="img-fluid" %}
-</div>
-<div class="caption">
-    Figure 3. The class activation maps for the following prompt:
-
-    "A lady named linda, creator of paint along is demonstrating how to do an acrylic painting. She starts with a one inch flat brush and yellow and white acrylic paint. she ..."
-
-    and the following endings:
-
-    (1) "... then paints about six shades of purple color on the paper."
-
-    (2) "... makes x patterns across the canvas with the yellow color."
-
-    (3) "... puts on a cream colored coat and begins painting the white wooden fence without a paint shaker."
-
-    (4) "... powders the paint in an electric flat brush and applies it over the wall."
-
-    The image caption that generated the shown attention masks is the full sentence pair, i.e., context + ending
-</div>
-
-Notice that across all four prompt/ending pairs, CLIP attends primarily to the same location on the image. While the image data might enrich the model’s representation of the prompt itself, the similarity across the generated attention masks demonstrates that the image doesn’t serve to distinguish the endings from each other and, therefore, has little effect in influencing the likelihood of any particular ending from being more likely. In this setup, the text embedding alone determines the classifier output, and the lack of image distinguishing power provides some explanation for the similarity in downstream performance between the unimodal and multimodal CLIP models.
-
-However, it’s possible that the attention masks were only so similar because all endings were prepended by the same exact context. In the case of Figure 2, the context describes an interaction with the painting, so it may be natural for all attention masks to focus on the painting, regardless of the conclusion of the ending. What if we restrict the image caption to contain only the final sentence (`ctx_b` + `ending`)? Figure 3 displays the class activation maps for this setup (though, not from an additional CLIP model fine-tuned on this image caption setup).
-
-<div class="row mt-3">
-    {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/linda_end0.png" class="img-fluid" %}
-
-    {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/linda_end1.png" class="img-fluid" %}
-
-    {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/linda_end2.png" class="img-fluid" %}
-
-    {% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/linda_end3.png" class="img-fluid" %}
-</div>
-<div class="caption">
-    Figure 4. The class activation maps for the endings above, without the prepended context.
-</div>
-
-We see that using the final sentence without the preceding context generates more varied attention masks, so does this unconditionally allow for more diversity in the image/common sense representation in the joint text/image embedding? I claim that the answer is no; having the entire context for analysis is fundamental for common sense reasoning, so removing a significant portion of the context promotes greater ambiguity in both the intent of the prompt/image caption and the benefit of the attention mask. Using only the final sentence may produce more varied results in the image attention mask, but this may potentially be more detrimental than beneficial by attending to an irrelevant portion of the image that may detract from the commonsense ground truth answer.
-
-Further investigation into different formulations of the image caption with respect to the original prompt in this manner may result in truly richer representations and more meaningful results for downstream model performance.
-
-
-# Conclusion
-
-In this work, I compare the physical commonsense reasoning capbility of a text-only language model with a multimodal vision-language model and evaluate whether the multiple modalities of input in pretraining the multimodal model can have comparable performance to a text-specialized model, and whether the addition of relevant image data for inference boosts the performance of the multimodal model. I find that, within the proposed experimental setup, the effects of image data supplementation are insignificant, though I provide a potential explanation for this unintuitive result via class activation maps of the multimodal model's image attention data; alternative formulations for this text-image data augmentation may provide better and more intuitive results. Overall, I provide an empirical experimental pipeline and analysis for potential factors toward further artifical intelligence models' physical commonsense reasoning, and their internal representations of the world.
-
-## Ethical Implications
-
-It’s also important to note the ethical considerations of “improving” the commonsense reasoning capabilities of deep learning models. Converging on a universally-accepted definition of common sense is utopian, so the interpretation of common sense evaluation must be constantly scrutinized. The biases and malicious elements of a model’s knowledge base must be investigated to ensure that fine-tuning on common sense benchmarks are not further accumulated and embedded into the model. Physical common sense is relatively simple for finding a ground truth answer or natural continuation, but for social common sense, for instance, what a model “should” predict for a particular situation or prompt is much more ambiguous.
-
-## Limitations
-
-The implementation and constraints of this work imply some limitations.
-One evident limitation is the size of both the benchmark dataset and the models used. Evaluating uni- and multimodal models on the full HellaSwag benchmark, including all of both ActivityNet and WikiHow entries, may conclude in slightly different results. Furthermore, newer and bigger models for both text and vision-text models exist; for example, if evaluation is extended to generative prompt evaluation, the recently released GPT4 model <d-cite key="openai2023gpt4"></d-cite> can be evaluated in this framework given its original text-only functionality and its new image input capabilities.
-
-On the topic of generative prompt evaluation, this work only uses multiple-choice prompts for the simplicity and clarity of its evaluation results. However, generative prompts may more closely reflect human-generated responses and may be more representative of multimodal capabilities.
-Finally, making progress toward a more general-purpose intelligent system means extending the common sense evaluation to more categories than physical. Designing a more comprehensive multimodal model for common sense requires evaluation on all modalities of common sense, and will likely also require additional modalities of input data (e.g., audio cues for better social common sense performance).
diff --git a/_posts/2023-11-09-proposal-1.md b/_posts/2023-11-09-proposal-1.md
deleted file mode 100644
index 5c4d804a..00000000
--- a/_posts/2023-11-09-proposal-1.md
+++ /dev/null
@@ -1,116 +0,0 @@
----
-layout: distill
-title: Applications of Deep Learning in Timbre Transfer
-description: Exploring musical timbre transfer by leveraging prior art in differential digital signal processing (DDSP) and modern deep learning structures.
-date: 2023-12-12
-htmlwidgets: true
-
-authors:
-  - name: Yohan Guyomard
-    url: "https://yohandev.github.io"
-    affiliations:
-      name: MIT
-
-bibliography: 2023-11-09-proposal-1.bib
-
-toc:
-  - name: Introduction
-  - name: What is Sound?
-  - name: Timbre Transfer
-  - name: Proposed Model
-  - name: Encoder
-  - name: Decoder
-  - name: Dataset
-  - name: Loss
-  - name: Results
----
-
-## Introduction
-{% include figure.html path="assets/img/2023-11-09-ddsp-proposal/spectrogram.png" class="img-fluid" %}
-_Timbre_ is what distinguishes a flute from a trumpet, piano or any other musical instrument. Even if two performers play the same note, there is no ambiguity in the tone of their instruments. But unlike pitch (frequency) or amplitude (loudness), _timbre_ is not a trivial metric; rather, it pertains much more to subjective qualities like _raspiness_, _articulation_ and even musical _intent_. In this article, I'll be discussing different data-driven approaches to extracting and manipulating this quality of sound using deep learning.
-
-{% include audio.html src="/assets/video/2023-11-09-ddsp-proposal/brass.wav" %}
-{% include audio.html src="/assets/video/2023-11-09-ddsp-proposal/flute.wav" %}
-
-In particular I'd like to explore _timbre transfer_, where one instrument is made to sound like another while retaining most aspects of the original performance. I'll be training an auto-encoder architecture first conditioned on the source instrument (whistling) then tuned to tracks of trumpets to achieve whistling-to-trumpet timbre transfer. Moreover, I'd like to reduce the complexity of previous architectures to achieve realtime results suitable for musical performance.
-
-First, some context on sound and our perception thereof.
-
-## What is Sound?
-Our ears are sensitive to changes in air pressure over time, which we perceive as sound. Digital audio is analogous to this phenomenon, where its representation is a sequence of samples usually in the `[-1, 1]` range and discretized at a frequency high enough that it becomes indistinguishable from natural sources. This is known as the time domain, however all signals can be mapped to the frequency domain where the individual sinusoids that compose it are graphed against their respective amplitudes. Below is a Fourier transform <d-cite key="1"></d-cite> applied to the sound of a trumpet from above:
-{% include figure.html path="assets/img/2023-11-09-ddsp-proposal/brass_freq.png" class="img-fluid" %}
-{% include audio.html src="/assets/video/2023-11-09-ddsp-proposal/brass.wav" %}
-
-It turns out that only the bottom-most frequency, $$f_0$$, informs our ears of this note's _pitch_. In fact, a pure sine wave at that frequency will sound _similar_ to the trumpet.
-{% include figure.html path="assets/img/2023-11-09-ddsp-proposal/sine_freq.png" class="img-fluid" %}
-{% include audio.html src="/assets/video/2023-11-09-ddsp-proposal/sine.wav" %}
-
-The distinction between the trumpet and sine wave lies in the frequencies above $$f_0$$, known as overtones. Moreover, certain musical instruments exhibit an interesting _harmonic_ behavior where only the overtones that are multiples of $$f_0$$ are actually prominent; this is the case for most instruments you could name, though some non-examples include the gong and timpani <d-cite key="2">2</d-cite>. Below is a spectrogram, which displays the frequency-domain of a signal over time. Observe the estimated $$f_0$$ (implemented using the YIN algorithm <d-cite key="3"></d-cite>) and how its multiples ($$2 * f_0$$, $$3 * f_0$$, etc) evolve over time.
-
-<div class="l-page">
-  <iframe src="{{ 'assets/html/2023-11-09-ddsp-proposal/spectrogram.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-_Try playing the audio clip above, whistle into the spectrogram or record your own instrument! The horizontal axis is time and vertical axis is frequency_
-
-So how do overtones relate to timbre? Well, the harmonic series is the most obvious distinguishing factor between different instruments playing the same pitch, **so we could model timbre as the evolution of $$f_0$$ and its overtones' amplitudes over time**. Note that this is assuming a strictly monophonic context (one note at a time), and overlooks non-harmonic parts of the signal (e.g. a flutist's breathing). So this representation will still sound synthetic but it forms a good basis for what we're trying to achieve.
-
-## Timbre Transfer
-Perhaps the most obvious method for achieving timbre transfer is approximating the pitch of the source audio (as demonstrated above) and recreating it using a synthetic MIDI instrument. However, this discards much of the expressiveness which isn't desireable in a musical performance.
-
-Rather, data-driven approaches have shown promise in audio synthesis <d-cite key="6"></d-cite> and existing deep learning architectures can be repurposed to achieve _nuanced_ timbre transfer to various degrees of success. <d-cite key="5"></d-cite> treats timbre transfer as an image-to-image problem, leveraging a Conditional Adversarial Networks architecture <d-cite key="7"></d-cite> trained on natural images to transform spectrograms of audio signals. <d-cite key="4"></d-cite> uses a Denoising Diffusion Implicit Model (DDIM) to achieve similar results. The audio is then synthesized from these spectrograms using the Inverse Fourier Transform or another neural network.
-
-<table>
-  <tr>
-    <th>Keyboard</th>
-    <th>Guitar</th>
-    <th>String</th>
-    <th>Synth Lead</th>
-  </tr>
-  <tr>
-    <td><img src="{{site.baseurl}}/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic.png" width="200" height="200"></td>
-    <td><img src="{{site.baseurl}}/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic.png" width="200" height="200"></td>
-    <td><img src="{{site.baseurl}}/assets/img/2023-11-09-ddsp-proposal/string_acoustic.png" width="200" height="200"></td>
-    <td><img src="{{site.baseurl}}/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic.png" width="200" height="200"></td>
-  </tr>
-</table>
-_Images courtesy of <d-cite key="5"></d-cite>_
-
-However, these methods rely on a dataset of audio tracks in two timbre domains, namely audio synthesized from MIDI instruments like in <d-cite key="9"></d-cite> since recordings of performers will never match exactly. The results thereby _sound_ synthetic; a better architecture would thus be self-supervised and trained on acoustic performances directly.
-
-## Proposed Model
-I experimented with an auto-encoder architecture, where a network is trained to minimize the audible difference between some input audio track $$x$$ and its re-synthesized counterpart $$\hat{x}$$; so, the model attempts to recreate its input $$x$$ by first encoding it to some latent representation $$z$$ and decoding back to audio. Note that although over-fitting is possible, a one-to-one mapping (or, _cheating_) is impossible because $$z$$ _bottlenecks_ (has less dimensions than) $$x$$. The appeal of this approach is that the problem is now self-supervised and can be trained directly on musical performances of the _source_ instrument (e.g. whistling).
-
-Next, the encoder is frozen (unaffected by gradient descent) and the decoder is trained anew on samples of the _target_ instrument (e.g. trumpet). So, the networks knows how to encode the _source_ instrument to some $$z$$, and hopefully its decoder has adapted to map $$z$$ onto the _target_ instrument.
-
-The decoder doesn't output audio directly, nor does it generate a spectrogram like in <d-cite key="5"></d-cite>. Rather, it controls parameters of a harmonic oscillator proposed by <d-cite key="10"></d-cite> which follows the intuition of timbre as discussed prior; that is, the oscillator has parameters for its $$f_0$$ and the amplitudes of each harmonic overtone. Leveraging this strong inductive bias should reduce the size of the neural network enough to be applicable to realtime performances.
-
-The encoder architecture is taken from <d-cite key="11"></d-cite>, whose original application is tracking pitch; I don't track pitch explicitely, rather <d-cite key="11"></d-cite> demonstrates that CNNs can extract meaningful data from audio directly in the time domain. The issue with working in the frequency domain is shown in <d-cite key="12"></d-cite>, where we'd need a high sampling rate (and thus the network needs to be that much faster) for high frequencies or a long sampling window (which yields a network with more parameters) for low frequencies. Note that there is a nice compromise to these issues by windowing the inputs and outputs <d-cite key="13"></d-cite>, which I'd like to try later.
-
-Finally, the loss I'm using is multi-scale spectrogram loss proposed in <d-cite key="10"></d-cite>, which computes the L1 loss of two audio tracks in the frequency-domain on both a linear and log scale.
-
-## Encoder
-The architecture of my model is largely inspired by Magenta's Differentiable Digital Signal Processing (DDSP) <d-cite key="10"></d-cite> paper, where differentiable sound processors are introduced. Although modules like reverb and a finite-impulse response (FIR) filter are included, I'm only experimenting with its harmonic oscillator for simplicity. The architecture proposed by <d-cite key="10"></d-cite> is also an auto-encoder, however its latent representation is built on two heuristics (pitch, amplitude) rather than the audio itself. Despite this, <d-cite key="10"></d-cite> is able to achieve natural sounding instruments but its controls are limited in expression, much like MIDI inputs. Realtime Audio Variational autoEncoder (RAVE) <d-cite key="15"></d-cite> builds upon this by encoding a multiband decomposition of the source audio, or a collection of Fourier transforms with varying amount of bins to overcome limitations of the Nyquist frequency and limited precision of discretization. A single Fourier transform operates on a linear scale, where its frequency bins scale from $$0$$ to its Nyquist frequency. However, humans hear on a logarithmic scale (i.e. A4 is $$440 \text{Hz}$$ but an octave above that is $$880 \text{Hz}$$) so the transform has a bias towards low frequencies. Multiband decomposition approaches this by shifting the frequency bins using different window sizes of audio and letting the network generalize over the complete frequency spectrum. However, although <d-cite key="15"></d-cite> has shown some incredible results and claims to run in realtime, that is not the case in practice <d-cite key="16"></d-cite>.
-
-In my experiment, I leverage a Convolutional Representation for Pitch Estimation (CREPE) <d-cite key="11"></d-cite>; it is a CNN-based pitch estimator that operates directly on the time-domain of an audio signal and achieves state of the art results. Rather than using its output, like in <d-cite key="10"></d-cite>, I use its latent representation and train the network to generalize over more characteristics of sound than just pitch.
-
-## Decoder
-<d-cite key="10"></d-cite> introduced the idea of using oscillators for audio synthesis as opposed to raw waveform modeling. <d-cite key="10"></d-cite> demonstrates that their architecture benefits from this inductive bias and is able to be significantly reduced in size. I wanted to experiment with the encoder for the part, so the decoder of my model remains unchanged from the original paper (for the most part). It consists of several dense layers, ReLU activation functions and layer normalization. In between these is a Gated Recurrent Unit (GRU). The harmonic oscillator from <d-cite key="10"></d-cite> cannot produce sinusoids out of phase (the instantaneous phase is accumulated at each time step) but presumably the network needs _some_ time dependency to form an audio envelope.
-
-{% include figure.html path="assets/img/2023-11-09-ddsp-proposal/adsr.png" class="img-fluid" %}
-_Image courtesy of Tellef Kvifte_
-
-## Dataset
-I trained the target instrument auto-encoder on the URMP dataset <d-cite key="17"></d-cite>, which consists of individual recordings of performers across a variety of instruments. Specifically, I wrote a dataloader that selects only trumpet solo tracks and randomly samples a 4 second clip from each of them. The audio is down-sampled to $$16\text{kHz}$$ because the dataset doesn't contain many frequencies above $$8\text{kHz}$$ and the reduced dimensionality allows for training on my M2 MacBook Air with a batch size of 16!
-
-I also created my own whistling dataset, sampled from MIT students with varying levels of proficiency. The audio clips are normalized, silence is cutout and altogether I have around 2 hours of data.
-
-## Loss
-Like <d-cite key="10"></d-cite> and <d-cite key="15"></d-cite>, I focus on _perceptual_ loss which approximates human hearing. So, comparing waveforms in the time-domain would not work because humans aren't sensitive to changes in phase whereas the signal changes drastically. I extend upon the multi-scale spectrogram loss proposed by <d-cite key="10"></d-cite>, which consists of taking the L1 norm of the two inputs' spectrograms (so phase is discarded) in both the linear and log domain. Note that human hearing is logarithmic, but spectrograms are not. I experiment upon this by employing the log Mel spectrogram <d-cite key="8"></d-cite> which is an even better approximation of human hearing and used by <d-cite key="4"></d-cite>, <d-cite key="18"></d-cite> and <d-cite key="19"></d-cite>.
-
-## Results
-I trained 500 epochs of 16 times 4 second samples on a single M2 MacBook Air with Metal acceleration, totaling around 10 hours. Unfortunately, the loss converged but the network was not able to generalize over abstract characteristics of sound as I'd hoped. Rather, it learned to represent sound as a mellow mix of harmonics instead of anything useful. I think future experiments should penalize silence (or close to it), and perhaps add skip connections from the inputs' power (explicitely calculated) to the decoder. Moreover, the size of the encoder was drastically reduced (a few orders of magnitude less parameters in both width and depth) so it's possible the latent representation did not contain much meaningful data.
-
-{% include audio.html src="/assets/video/2023-11-09-ddsp-proposal/epoch0.wav" %}
-{% include audio.html src="/assets/video/2023-11-09-ddsp-proposal/epoch250.wav" %}
-{% include audio.html src="/assets/video/2023-11-09-ddsp-proposal/epoch470.wav" %}
-Sample synthesized waveforms at epochs 0, 250, and 470 respectively (loud sounds warning!).
\ No newline at end of file
diff --git a/_posts/2023-11-09-proposal-2.md b/_posts/2023-11-09-proposal-2.md
deleted file mode 100644
index d4151e7d..00000000
--- a/_posts/2023-11-09-proposal-2.md
+++ /dev/null
@@ -1,186 +0,0 @@
----
-layout: distill
-title: Using Synthetic Data to Minimize Real Data Requirements
-description: Data acquisition for some tasks in synthetic biology can be cripplingly difficult to perform at a scale necessary for machine learning... so what if we just made our data up?* 
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-
-authors:
-  - name: Carlos Celeste Jr.
-    url: "celeste8@mit.edu"
-    affiliations:
-      name: MIT
-  - name: Nick Nolan
-    url: "ncn@mit.edu"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-proposal-2.bib
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
-
-toc:
-  - name: Introduction
-    subsections:
-    - name: Enter Machine Learning
-  - name: Methods
-    subsections:
-    - name: Problem Formulation
-    - name: Data Acquisition
-    - name: Training & Testing
-  - name: Results & Analysis
-    subsections:
-    - name: Experiment 1
-    - name: Experiment 2
-  - name: Conclusion
----
-
-*And used it as the basis for transfer learning with the real data that someone put hard work in to generate.     
-
-## Introduction
-
-Synthetic biology is a burgeoning field of research which has attracted a lot of attention of the scientific community in recent years with the advancement of technologies that enable the better understanding and manipulation of biological systems. A significant contributor to its steadily increasing popularity is the diverse array of potential applications synthetic biology may have, ranging from curing cancer, to addressing significant climate issues, to colonizing other planets<d-cite key = "lim2022reprogramming"></d-cite><d-cite key = "delisi2020role"></d-cite><d-cite key = "conde2020synthetic"></d-cite>. But, to effectively manipulate these biological systems, it is necessary to understand how they work and how they interact with other biological systems — it has been shown time and time again that a system characterized in isolation, compared to the same system in a broader, non-isolated context, will not perform identically<d-cite key = "qian2017resource"></d-cite><d-cite key = "gyorgy2015isocost"></d-cite>. This necessitates models that can predict a system's behavior given both stimuli *and* context.
-
-In the synthetic biology literature, the behavior of many systems is characterized by the chemical reactions that take place; these reactions consist most frequently of the so-called central dogma of biology, in which DNA produces RNA, which produces proteins. These proteins are then free to perform almost every function within a cell, including — most notably for us — regulation of DNA. By varying the extent and nature of this regulation, these systems yield mathematical models that range from simple linear systems to highly complex nonlinear dynamical systems:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal-2/fig1.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 1: A simple model of the central dogma of biology: a stretch of DNA is used to create a strand of messenger RNA, which is used to create a functional protein. Functional proteins are responsible for almost all operations within the cell, from cellular movement to RNA production and everything in between.
-</div>
-
-However, the figure above does not capture the full purview of the cell; it neglects factors that synthetic biologists know to be critical to the process of protein expression, as well as factors that have not been characterized rigorously yet. The process of analyzing the behavior of a system at the fullest level of detail necessary to encapsulate these intricate dynamics is expensive and time-consuming, and requires significant experimental data to validate — not to mention the fact that, as was mentioned, there are some factors which we simply don't know about yet. Protein production is an immense and complex task, and identifying its critical parameters at the highest level of detail is no small feat. 
-
-### Enter Machine Learning
-
-With this in mind, many synthetic biologists are experimenting with characterizing system behavior, especially when augmenting pre-existing models to include newly discovered phenomena, using machine learning and neural networks, due to their universal function approximator property. In this fashion, we may be able to better abstract the levels of biological detail, enabling better prediction of the composition of two genetic circuits. 
-
-Unfortunately, training neural networks also requires (surprise surprise!) substantial experimental data, which is taxing on both a researcher’s budget and time — for a small lab with few researchers working, a single experiment may take upwards of 12 hours of attentive action, while yielding only up to 96 data points for training. Some large-scale gene expression data has been collected to assist in the development of machine learning algorithms; however, this data is focused largely on the expression of a static set of genes in different cellular contexts — rather than on a dynamic set of genes being assembled — and is therefore insufficient to address the questions of composition that are being posed here.
-
-This leads us to a fundamental question: **can we use transfer learning to reduce the experimental data we need for training by pre-training on a synthetic dataset which uses a less-detailed model of our system?** In other words, can we still derive value from the models that we know don't account for the full depth of the system? If so, **what kinds of structural similarities need to be in place for this to be the case?**
-
-In this project, we aim to address each of these questions; to do this, we will first pre-train a model using simpler synthetic data, and use this pre-trained model's parameters as the basis for training a host of models on varying volumes of our more complex real data. Then, we will consider sets of more complex real data that are less structurally similar to our original synthetic data, and see how well our transfer learning works with each of these sets.
-
-In theory, since the synthetic data from the literature uses models that have already captured some of the critical details in the model, this fine-tuning step will allow us to only learn the *new* things that are specific to this more complex model, thus allowing transfer learning to be successful. As the two underlying models become increasingly distant, then, one would expect that this transfer will become less and less effective.
-
-## Methods
-
-### Problem Formulation
-
-Consider we have access to a limited number of datapoints which are input-output $(x_i,y_i)$ pairs for a biological system, and we want to train a neural network to capture the system behavior. The experimental data for the output $y_i$ we have is corrupted by an additive unit gaussian noise, due to white noise and measurement equipment precision. Moreover, we consider that we also have access to a theoretical model from another biological system which we know to be a simplified version of the one in our experiments, but which explicitly defines a mapping $\hat y_i = g(x_i)$.
-
-Our goal is thus to train a model $y_i = f(x_i)$ to predict the real pairs while using minimal real pairs of data $(x_i, y_i)$. Instead, we will pre-train with $(x_i, \hat y_i)$ pairs of synthetic data, and use our real data for fine-tuning. 
-
-### Data Acquisition
-
-In this work we will additionally consider a domain shift between two datasets, which we will refer to as the big domain and the small domain. In the big domain, our inputs will vary between 0 and 20nM, and in the small domain the inputs will vary between 0 and 10nM. These domains represent the ranges for the inputs in the experiments in the small domain, which may be limited due to laboratory equipment, and the desired operation range of the systems in the big domain.
-
-Furthermore, **for all datasets - pre-training, fine-tuning, or oracle training - we will be generating synthetic data for training and testing purposes.** We will use different levels of complexity to simulate a difference between experimentally-generated and computationally-generated data. In a real setting, we would use the complex model $f$ that we're trying to learn here as the simple, known model $g$ in our setup. Going forward, we will refer to the data generated by our low-complexity model $g$ as "synthetic" data, and to the data generated by our high-complexity model as "real" or "experimental" data.
-
-For our low-complexity theoretical model, we consider the simplest gene expression model available in the literature, in which the input $x_i$ is an activator, and the output $y_i$ is given by the following Hill function:
-
-$$y_i = \eta_i \frac{\theta_i x_i}{1 + \Sigma_{j=1}^2 \theta_j x_j}, i\in {1,2},$$
-
-where our $\eta_i$'s and $\theta_i$'s are all inherent parameters of the system.
-
-For the first experimental model, we consider a more complex gene expression model, where the activator $x_i$ must form an $n$-part complex with itself before being able to start the gene expression process, which yields the following expression for the output $y_i$:
-
-$$y_i = \eta_i \frac{(\theta_i x_i)^n}{1 + \Sigma_{j=1}^2 (\theta_j x_j)^n}, i\in {1,2},$$
-
-where - once again - our $\eta_i$'s and $\theta_i$'s are all inherent parameters of the system. Note that, at $n=1$, our real model is identical to our synthetic model. As one metric of increasing complexity, we will vary $n$ to change the steepness of the drop of this Hill function.
-
-As an additional test of increased complexity, we will consider a phosphorylation cycle in which inputs $x_i$ induce the phosphorylation or dephosphorylation of a given protein. We take the dephosphorylated protein to be an output $y_1$, and the phosphorylated protein to be a secondary output $y_2$, for which we have:
-
-$$y_i = y_{tot} \frac{\theta_i x_i}{\Sigma_{j=1}^2 \theta_j x_j}, i\in {1,2},$$
-
-in which $\theta_i$'s and $y_{tot}$ are each system parameters. Note that the only functional difference between this system and the synthetic data generation system lies in the denominator of each, as one has a nonzero bias term, where the other does not.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal-2/fig2.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 2: Graphical representation of the three different synthetic or experimental models used in this project. In the first diagram, our input protein $x_i$ is activating the production of an output protein $y_i$. This is the simplest model of which we can conceive, and constitutes our synthetic data. In the second diagram, two copies of our input protein $x_i$ come together to form a complex that induces the production of our output protein $y_i$. This is a step up in complexity, and varying the number of proteins that come together allows us to introduce more and more complexity into our system. Finally, a single protein which can be either of our outputs $y_1$ or $y_2$ is moved between these states by our two input proteins $x_1$ and $x_2$. This system, while seemingly very dissimilar from the above two, winds up being mathematically not too far off, and offers another model on which to transfer our learning.
-</div>
-
-### Training & Testing
-
-For each experiment, we trained MLPs composed of 5 hidden layers with 10 nodes each and a ReLU activation function.
-
-For the first experiment, we performed transfer learning by pre-training our model for 90% of the total number of epochs (1800/2000) with the synthetic data sampled from the big domain, where we have a high quantity of data points (40000 $(x_i, y_i)$ pairs); for the remaining 10% of epochs, the network was trained on the experimental data sampled from the small domain, with varying numbers of data points used for training. This can be compared to a model trained exclusively on the same volume of experimental data for a full 2000 epochs, to establish a baseline level of performance. An oracle model was trained for all 2000 epochs on experimental data sampled from the big domain with a high volume of data, and serves as the best-case performance of our model.
-
-For the second experiment, we followed a very similar protocol as in the first experiment; the critical difference here lies in the fact that, where the fine-tuning step used different volumes of data in the previous case, we now instead use a fixed data volume (1000 $(x_i, y_i)$ pairs), and fine-tune on a host of different models of varying complexity relative to the synthetic model.
-
-To evaluate performance of our neural networks, we uniformly sample 100 points from the big domain, for which we calculate the L1 loss mean and variance between the network predictions and the experimental model output.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal-2/fig5.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 3: A visual example of the training done - on the right is the intended function to be learned, where the left features the output of one of the models that was trained with transfer learning.
-</div>
-
-## Results & Analysis
-
-### Experiment 1
-
-As was mentioned before, the first experiment was targeted towards addressing the question of whether we can pre-train a model and use transfer learning to reduce the volume of real data needed to achieve a comparable standard of accuracy. To this end, we trained several models with a fixed volume of pre-training data, and varied the volume of fine-tuning data available to the model. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal-2/fig3.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 4: Bar plots of model loss as the volume of fine-tuning (blue) or training (orange) data increases. As can be seen, at high volumes, the blue bars reach a lower loss than the orange bars, suggesting that transfer learning is effective at taking high volumes of data, and improving them further. For very low volumes, these two models are roughly equivalent, although the orange bars have a significantly higher variance than the blue bars. Somewhere in between, a transition occurs, and transfer learning outpaces learning without prior knowledge of anything.
-</div>
-
-As can be seen in the blue bars of Figure 4, the greater the volume of real data coupled with transfer learning, the lower the loss, and the better the performance. This is to be expected, but this curve helps to give a better sense regarding how quickly we approach the limit of best-case performance, and suggests that the volume of real data used for oracle training could cut be cut down by nearly an order of magnitude while achieving comparable performance. One might argue that this is because the volume of real data used in this training is itself sufficient to effectively train this model; to that end, we consider the orange bars, which represent the loss of models trained for 2000 epochs exclusively on the given volume of real data. This, coupled with the blue bars, suggests that - across all volumes of data - it is, at the very least, more consistent to use transfer learning. Models trained for that duration on exclusively real data sampled from the small domain tended to overfit, and had a much higher variance as a result. As the volume of real data used for fine-tuning increased, the difference between the two regimes of transfer vs. non-transfer learning became more pronounced, and the benefits of transfer learning become more noticeable. Thus we conclude that we can use transfer learning to cut down on the quantity of real data needed, while sacrificing relatively little up to a ~75% cut of data requirements.
-
-### Experiment 2
-
-Next, we wish to address the question of how structurally dissimilar a model can be while still making this transfer learning effective. To this end, we varied $n$ from our first experimental model, and generated data with our second experimental model. In each case, we performed a ~95% cut in the volume of real data relative to the volume of data used to train each oracle.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal-2/fig4.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 5: Bar plots of model loss as the model being learned is varied, as a means of representing increases in complexity or structure. As can be seen, within this range of complexity variation, transfer learning is consistently able to learn the system to a comparable degree across all cases.
-</div>
-
-In Figure 5, we compare the loss of models trained with transfer learning to oracles for each - as can be seen, the transfer learning models performed consistently across all models being learned, and the oracles of each were similarly consistent. This suggests that the architectures of the models being learned are sufficiently similar that the transfer learning is effective, which is a promising sign for more applications in which the system being learned has been simplified significantly in its mathematical models.
-
-## Conclusion
-
-Ultimately, we've developed a method by which to potentially reduce the volume of experimental data needed to effectively train a machine learning model by using synthetic data generated by a lower-complexity model of the system. We've demonstrated that it has the potential to cut down data requirements significantly while still achieving a high level of accuracy, and that the simple system used to generate data in the sense that the learning process can shore up some substantial structural differences betwen the simple and complex system. These findings are not necessarily limited strictly to synthetic biological learning tasks, either - any complex, data-starved phenomenon in which there is a simpler model to describe parts of the system may find value in this. Looking forward, one can consider deeper structural dissimilarities, as well as application with real synthetic biological data, rather than simply using two models of increasing complexity.
diff --git a/_posts/2023-11-09-proposal.md b/_posts/2023-11-09-proposal.md
deleted file mode 100644
index 30c23891..00000000
--- a/_posts/2023-11-09-proposal.md
+++ /dev/null
@@ -1,219 +0,0 @@
----
-layout: distill
-title: Graph neural networks v.s. transformers for geometric graphs
-description: With the recent development of graph transformers, in this project we aim to compare their performance on a molecular task of protein-ligand binding affinity prediction against the performance of message passing graph neural networks.
-date: 2023-11-01
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Ada Fang
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-proposal.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Background and relevant work
-    subsections:
-    - name: Graph neural networks on molecules 
-    - name: Graph transformers on molecules
-  - name: Motivation
-  - name: Problem definition
-  - name: Dataset
-  - name: Architecture
-    subsections:
-    - name: Graph neural network
-    - name: Graph transformer
-    - name: Loss function
-  - name: Experiments
-  - name: Discussion
-  - name: Conclusion
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-Machine learning on graphs is often approached with message passing graph neural network (GNN) models, where nodes in the graph are embedded with aggregated messages passed from neighboring nodes <d-cite key=zhou2020graph></d-cite>. However, with the significant success of transformers in language modelling <d-cite key=vaswani2017attention></d-cite> and computer vision recently <d-cite key=dosovitskiy2020image></d-cite>, there are a growing number of transformers developed for graphs as well. In this project we investigate the application of graph neural networks compared to transformers on geometric graphs defined on point clouds. We aim to explore the performance of these two models on predicting the binding affinity for a protein-ligand interaction given the atomic coordinates of the docked protein-ligand structure, which is a highly relevant task in drug discovery. This blog post walks through an introduction into graph neural networks and transformers on molecules, our model architecture, experimental results, and a discussion comparing the two architectures.
-
-## Background and relevant work
-### Graph neural networks on molecules
-Graphs are comprised of nodes and edges, and we can model any set of objects with a defined connectivity between them as a graph. For example, social networks are a set of people and the connectivity between them is defined by on who knows who. We can also see that grid data formats, like images, are also graphs where each pixel is a node and edges are defined to the adjacent pixels. Any sequential data, such as text, can be modeled as a graph of connected words. In this section we focus on graphs of molecules where nodes are atoms and edges are defined between atoms. These edges are often defined by the molecular bonds, or for atoms with 3D coordinate information the edges can be defined by a spatial cutoff $d$ based on the Euclidean distance between nodes. Given a graph we can use a graph neural network to learn a meaningful representation of the graph and use these representations for predictive tasks such as node-level prediction, edge-level prediction, or graph-level prediction. Graph neural networks learn through successive layers of message passing between nodes and their neighboring nodes. 
-
-An important property of many GNNs applied on 3D molecules is SE(3)-equivariance. This means that any transformation of the input in the SE(3) symmetry group--which includes all rigid body translations and rotations in $\mathbb{R}^3$ --will result in the same transformation applied to the output. This property is important for the modelling of physical systems; for example if the prediction task is the force applied on an atom in a molecule, rotation of the molecule should result in the model predicting the same forces but rotated. In some tasks we do not need equivariance but rather SE(3)-invariance (which is a subset of SE(3)-equivariance) where any transformation of the input in the SE(3) symmetry group results in the same output. This is often the case when the task of the model is to predict a global property of the molecule which should not change if all 3D coordinates of the molecule are translated and rotated. SE(3)-invariance will be required for our model of binding affinity as global rotations and translations of the protein-ligand structure should yield the same predicted binding affinity.
-
-Early SE(3)-equivariant GNNs on point clouds used directional message passing <d-cite key="gasteiger2020directional"></d-cite> which used the pairwise distance and direction between nodes as features for the GNN, however they were soon shown to be limited in expressivity <d-cite key="garg2020generalization"></d-cite>. Now state-of-the-art (SOTA) models in this area are based on higher order geometric properties such as dihedral angles and representations in the geometric group SO(3). Some examples include GemNet  <d-cite key=gasteiger2021gemnet></d-cite> and e3nn <d-cite key=geiger2022e3nn></d-cite>. e3nn has also shown that it is much more data-efficient when learning as the model does not need to learn to be equivariant, which non-equivariant models do. For a non-equivariant model to learn to be equivariant it would have to be trained on many SE(3) transformations of the input mapping to the same output, which is very inefficient. e3nn models have led to exceptional performance for tasks related to predicting molecular forces and energies <d-cite key=batzner20223></d-cite> <d-cite key=musaelian2023learning></d-cite>. For the task of binding affinity some GNNs that achieve high performance using GNNs are ProNet <d-cite key=wang2022learning></d-cite> and HoloProt <d-cite key=somnath2021multi></d-cite>.
-
-### Graph transformers on molecules
-With the proliferation of transformers in the broader field of machine learning, this has also led to the development of graph transformers. In a transformer model each node attends to all other nodes in the graph via attention where the query is a projection of the feature vector of a node, and the key and value is the projection of feature vectors of all other nodes. Hence, graph transformers and transformers applied to sequences (e.g. text) are largely similar in architecture. However, differences arise in the positional encodings in a graph transformer as it is defined in relation to other nodes in the graph <d-cite key=ying2021transformers></d-cite>. For geometric graphs, positional encodings can be applied as a bias term on the attention value of node $u$ on $v$, where the bias is a learned value that is dependent on the distance between the nodes <d-cite key=zhou2023uni></d-cite> <d-cite key=luo2022one></d-cite>. There are also other ways of implementing positional encodings in the form of Laplacian eigenvectors, and random walk diagonals which aim to encode the centrality of each node in the graph <d-cite key=rampavsek2022recipe></d-cite>. Recently, in an effort to unify different methods to generate structural and positional graph encodings, Liu et al. <d-cite key=liu2023graph></d-cite> apply a novel pretraining approach with a multiobjective task of learning a variety of positional and structural encodings to derive more general positional and structural encodings. Graph transformers are also achieving SOTA performance for benchmarks on predicting quantum properties of molecules <d-cite key=zhou2023uni></d-cite> <d-cite key=luo2022one></d-cite> and binding affinity <d-cite key=kong2023generalist></d-cite>.
-
-## Motivation
-Given the growing application of both GNNs and transformers we aim to compare their performance on the same task of protein-ligand binding affinity prediction. We also aim to compare models as we can see analogies between graph transformers and GNNs, where "message passing" in the graph transformer involves messages from all nodes rather than the local neighborhood of nodes. We view protein-ligand binding affinity prediction as a suitable task to compare the two architectures as there are aspects of both the GNN and graph transformer architecture that would be advantageous for the task: binding affinity is a global prediction task for which the graph transformer may better capture global dependencies, conversely binding affinity is also driven by local structural orientations between the protein and ligand which the GNN may learn more easily.
-
-
-## Problem definition
-* The input to the model is a set of atoms for the protein pocket $X_{\mathrm{protein}}$ and ligand $X_{\mathrm{ligand}}$, for which we have the atomic identity and the 3D coordinates, and the binding affinity $y$ for the structure.
-* For the graph neural network we define a molecular graph of the protein ligand structure $G=(V,E)$ where $V$ are the $n$ nodes that represent atoms in the molecule and the edges $E$ are defined between two nodes if their 3D distance is within a radial cutoff $r$. We further define two types of edges: intramolecular edges for edges between nodes within $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$, and intermolecular edges for nodes between $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$.
-* For the graph transformer it is applied to the whole set of atoms $(X_{\mathrm{protein}}, X_{\mathrm{ligand}})$, and we can use the 3D coordinates of the atoms to derive positional encodings.
-* Performance is determined by the root mean squared error, Pearson, and Spearman correlation coefficients between true binding affinity and predicted binding affinity.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal/problem_definition.png" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 1. A protein-ligand structure, Protein Data Bank (PDB) entry 1a0q. The protein backbone is shown in blue, and the ligand is shown in green. The model would be given this structure and the objective is to predict the binding affinity of the ligand to the protein. 
-</div>
-
-## Dataset
-We use the PDBbind dataset for the protein-ligand structures and binding affinity. In addition, for benchmarking we use the benchmark from ATOM3D <d-cite key="townshend2020atom3d"></d-cite> with a 30% and 60% sequence identity split on the protein to better test generalisability of the model. The sequence identity split is based on sequence similarity of proteins in the test and training datasets. The 30% sequence identity split is more challenging are there are more dissimlar proteins in the test set.
-
-## Architecture
-### Graph neural network
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal/gnn_architecture1.png" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 2. Overview of the GNN architecture for a graph constructed from a protein-ligand structure.
-</div>
-
-A graph is constructed from the atomic coordinates of the atoms in the protein pocket $X_{\mathrm{protein}}$ and ligand $X_{\mathrm{ligand}}$ where the nodes are the atoms. Intramolecular edges are defined between nodes within $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$ with a distance cutoff of 3 Å, and intermolecular edges for nodes between $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$ with a distance cutoff of 6 Å. The model architecture is defined as follows:
-
-(1) Initial feature vectors of the nodes are based on a learnable embedding of their atomic elements. The edge features are an embedding of the Euclidean distance between the atomic coordinates. The distance is embedded with a Gaussian basis embedding which is projected with a 2 layer MLP.
-
-(2) We define two types of messages in the GNN, given by the two types of edges, intermolecular messages and intramolecular messages. The architecture used for the two types are messages are the same but the weights are not shared, this is to reflect that information transferred between atoms within the same molecule is chemically different to information transferred between atoms of different molecules. The message passing equation uses the tensor product network introduced by e3nn <d-cite key=geiger2022e3nn></d-cite>, and our implementation is based on the message passing framework used by DiffDock <d-cite key=corso2022diffdock></d-cite>. We omit the details of the tensor product network for simplicity but provide the overall method below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal/message_passing_eqn.png" %}
-    </div>
-</div>
-
-where node $b$ are the neighbors of node $a$ in $G$ given by intermolecular or intramolecular edges denoted with $t$. The message is computed with tensor products between the spherical harmonic projection with rotation order $\lambda = 2$ of the unit bond direction vector, $$Y^{(\lambda)}({\hat{r}}_{a b})$$, and the irreps of the feature vector of the neighbor $h_b$. This is a weighted tensor product and the weights are given by a 2-layer MLP, $\Psi^{(t)}$ , based on the scalar ($\mathrm{0e}$) features of the nodes $h_a$ and $h_b$ and the edge features $e_{ab}$. Finally, $LN$ is layer norm. Overall, the feature vectors of the nodes are updated by intermolecular and intramolecular messages given by the tensor product of feature vectors of intermolecular and intramolecular neighbors and the vector of the neighbor to the node.
-
-(3) After $k$ layers of message passing we perform pooling for the nodes of $X_{\mathrm{protein}}$ and the nodes of $X_{\mathrm{ligand}}$ by message passing to the "virtual nodes" defined by the centroid of the protein and ligand, using the same message passing framework outlined above.
-
-(4) Finally, we concatenate the embedding of the centroid of the protein and ligand and pass this vector to a 3 layer MLP which outputs a singular scalar, the binding affinity prediction.
-
-### Graph transformer
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal/graph_transformer_architecture2.png" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 3. Overview of the graph transformer architecture for a graph constructed from a protein-ligand structure.
-</div>
-
-The model architecture is as follows:
-
-(1) Initial feature vectors of the nodes are based on a learnable embedding of their atomic elements.
-
-(2) The graph transformer architecture is based on graphormer <d-cite key=ying2021transformers></d-cite>. Where the input is $H \in \mathbb{R}^{n \times d}$ where $d$ is the hidden dimension and $n$ is the number of nodes. The input is projected by $W_Q \in \mathbb{R}^{d \times d_K}, W_K \in \mathbb{R}^{d \times d_K}, W_V \in \mathbb{R}^{d \times d_V}$. Since graphs have more complex positional information than sequeunces, conventional positional encoding methods used in sequence-based transformers are not applicable to graphs. Positions in a graph are defined relative to all other nodes, thus positional embeddings cannot be added at the node feature vector level but instead are added as a bias to the pairwise node attention matrix. We define $B \in \mathbb{R}^{n \times n}$, where $B_{ij}$ is given by a Gaussian basis embedding of the Euclidean distance $d_{ij}$ between node $i$ and $j$, which is passed to a 3 layer MLP that outputs a singular scalar. Then the self-attention is calculated as $Q = HW_Q, K = HW_K, V = HW_V$ and $A = \frac{QK^T + B}{\sqrt{d_k}}, Attn(H) = Softmax(A) V$. In addition to all atomic nodes, we also add a `<cls>` token used in the BERT model which functions as a virtual global node <d-cite key=devlin2018bert></d-cite>. The distance of this node to all other nodes is a learnable parameter. This process is duplicated across multiple heads and we concatenate the embeddings across all heads after $k$ layers as the updated feature vector.
-
-(3) We take the final embedding of the `<cls>` node and pass it through a 3 layer MLP which outputs a singular scalar, the binding affinity prediction.
-
-### Loss function
-Both models are trained to minimise the root mean squared error between the predicted binding affinity and true binding affinity. 
-
-
-## Experiments
-In order for the results to be comparable between the two models, both models have approximately 2.8 million parameters.
-
-GNN model details: 
-* 2 layers of message passing, number of scalar features = 44, number of vector features = 16. Number of parameters: 2,878,011
-* 4 layers of message passing, number of scalar features = 32, number of vector features = 13. Number of parameters: 2,767,269
-* 6 layers of message passing, number of scalar features = 26, number of vector features = 12. Number of parameters: 2,764,431
-
-We compare GNNs with different numbers of layers to compare performance across models which learn embeddings from various $k$-hop neighborhoods.
-
-Graph transformer model details: 8 attention heads, 8 layers, hidden dimension = 192, feed forward neural network dimension = 512. Number of parameters: 2,801,155
-
-Both models were trained for 4 hours on 1 GPU with a batch size of 16, Adam optimiser, and a learning rate of $1 \times 10^{-3}$. We show the results for the 30% and 60% sequence-based splits for the protein-ligand binding affinity benchmark in Table 1 and 2 respectively.
-
-**Table 1.** Protein-ligand binding affinity task with 30% sequence based split. ProNet <d-cite key=wang2022learning></d-cite> is included as the SOTA model in this benchmark.
-
-| Model | Root mean squared error $\downarrow$ | Pearson correlation coefficient $\uparrow$ | Spearman correlation coefficient $\uparrow$ |
-|----------|----------|----------|----------|
-| ProNet <d-cite key=wang2022learning></d-cite> | 1.463 | 0.551 | 0.551 |
-| GNN 2 layer | 1.625 | 0.468 | 0.474 |
-| GNN 4 layer | 1.529 | 0.488 | 0.477 |
-| GNN 6 layer | 1.514 | 0.494 | 0.494 |
-| Graph Transformer | 1.570 | 0.476 | 0.469 |
-
-**Table 2.** Protein-ligand binding affinity task with 60% sequence based split. ProNet <d-cite key=wang2022learning></d-cite> is included as the SOTA model in this benchmark.
-
-| Model | Root mean squared error $\downarrow$ | Pearson correlation coefficient $\uparrow$ | Spearman correlation coefficient $\uparrow$ |
-|----------|----------|----------|----------|
-| ProNet <d-cite key=wang2022learning></d-cite> | 1.343 | 0.765 | 0.761 |
-| GNN 2 layer | 1.483 | 0.702 | 0.695 |
-| GNN 4 layer | 1.471 | 0.717 | 0.719 |
-| GNN 6 layer | 1.438 | 0.722 | 0.704 |
-| Graph Transformer | 1.737 | 0.529 | 0.534 |
-
-## Discussion
-### GNNs perform better than graph transformers
-From the benchmarking we can see that the graph transformer model performs worse than the GNNs for the 30% and 60% sequence split for protein-ligand binding affinity. An intuitive explanation for why graph transformers perform worse is it may be difficult for the graph transformer to learn the importance of local interactions for binding affinity prediction as it attends to all nodes in the network. Or in other words, because each update of the node involves seeing all nodes, it can be difficult to decipher which nodes are important and which nodes are not. In order to test if this is true, future experiments would involve a graph transformer with a sparse attention layer where the attention for nodes beyond a distance cutoff is 0. Converse to the lower performance of graph transformers, the results show that deeper GNNs which "see" a larger $k$-hop neighborhood perform better. However, we did not push this to the extreme of implementing a GNN with enough layers such that the $k$-hop neighborhood is the whole graph which would be most similar to a graph transformer as it attends to all nodes. This is because very deep GNNs are subject to issues like oversmoothing where all node features converge to the same value <d-cite key=rusch2023survey></d-cite>.
-
-The GNN may also perform better than the graph transformer due to the higher order geometric features used by the e3nn GNN message passing framework, compared to the graph transformer which only has relative distances. To further explore this future work will involve implementing the equiformer graph transformer <d-cite key=liao2022equiformer></d-cite>, which is a graph transformer with higher order geometric features.
-  
-
-### Depth v.s. width 
-Deeper GNNs (2 v.s. 4 v.s. 6 layers) with an approximately constant total number of parameters acheived better performance across both protein ligand binding affinity tasks. This was also observed in the image classification field with the development of AlexNet where deeper networks were shown to significantly improve performance <d-cite key=krizhevsky2012imagenet></d-cite>. In the context of molecular graphs, deeper GNNs allow the nodes to gain more local chemical context as their node embeddings are exposed to a larger $k$-hop neighborhoods. Thus, these node embeddings are more expressive which facilitates better task performance. There is a limit to the advantages of depth, as very deep GNNs experience oversmoothing as mentioned above <d-cite key=rusch2023survey></d-cite>.
-
-### Model performance v.s. graph size
-We compared the error of the prediction v.s. the number of atoms in the graph to test the hypothesis if larger graphs are more difficult to make predictions on. However, correlation between error and number of atoms in the graph all yielded very low pearson correlation coefficients ($< 0.1$) for all experiments (Figure 4). Thus, the number of atoms in the graph has minimal effect on the predictive ability of the model. This may suggest why the the graph transformer--which is able to attend to all nodes in the graph--did not perform much better as the GNN performance does not degrade significantly with larger graphs.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-proposal/error_vs_size.png" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 4. Number of nodes in graph v.s. difference between true and predicted binding affinity for graph transformers and GNNs on the 60% protein-ligand binding affinity task. There is no prominent correlation between model performance and error in prediction.
-</div>
-
-### Future work
-We implemented a relatively simplistic graph transformer in this project. While we concluded for this vanilla implementation of the graph transformer the GNN outperforms the graph transformer there are many more complex graph transformer architectures that we could explore to build more expressive architectures. In this section we explore some possible ideas.
-
-**Using cross-attention for better representation of protein-ligand interactions.** In this project, we adapted the graph transformer from graphormer <d-cite key=ying2021transformers></d-cite> which was developed originally for predicting the energy of one molecule. However, our task involves two interacting molecules, a protein and a ligand. Thus, graph transformer performance could be lifted if the model had a better understanding of the interactions between the protein and the ligand by using cross attention between the protein and the ligand, rather than self attention across the whole protein-ligand complex. 
-
-**Heirarchical pooling for better representation of amino acids.** Graph transformer performance could also be lifted by defining better pooling strategies than using the `<cls>` token from a set of all atoms to predict binding affinity.  In this project the graphs were defined based on the atoms in the graph. However, proteins are comprised of an alphabet of 21 amino acids. Thus, it may be easier for the model to learn more generalisable patterns to the test set if the model architecture reflected how proteins are comprised of animo acids which are comprised of atoms. This has been achieved in models using hierarchical pooling from the atom-level to the amino acid-level and finally to the graph-level <d-cite key=wang2022learning></d-cite> <d-cite key=kong2023generalist></d-cite>.
-
-**A hybrid approach: GNNs with Transformers.**
-Finally, we could improve also performance further by taking a hybrid approach. That is, the GNN first learns local interactions followed by the graph transformer which learns global interactions and pools the node embeddings into a global binding affinity value. The motivation for this design is to leverage the advantages of both models. The GNN excels at learning local interactions while the graph transformer excels at learning global relationships from contextualised local interactions. This approach has been explored in other models for predicting drug-target interaction <d-cite key=bai2023interpretable></d-cite> <d-cite key=jin2023capla></d-cite>. Visualisation of the attention map of graph transformers would also be interesting to explore the importance of specific chemical motifs on protein-ligand interactions.
-
-
-## Conclusion
-In this project we present a direct comparison of graph transformers to GNNs for the task of predicing protein-ligand binding affinity. We show that GNNs perform better than vanilla graph transformers with the same number of model parameters across protein-ligand binding affinity benchmarks. This is likely due to the importance of capturing local interactions, which graph transformers may struggle to do. We also show that deeper GNNs perform better than wider GNNs for the same number of model parameters. Finally, future work in this area will involve a implementing more complex graph transformers, or taking a hybrid approach where we capture local interactions with a GNN and global interactions with a graph transformer.
\ No newline at end of file
diff --git a/_posts/2023-11-09-quantum-gnn.md b/_posts/2023-11-09-quantum-gnn.md
deleted file mode 100644
index edaf0eb1..00000000
--- a/_posts/2023-11-09-quantum-gnn.md
+++ /dev/null
@@ -1,203 +0,0 @@
----
-layout: distill
-title: Quantum Circuit Optimization with Graph Neural Nets
-description: We perform a systematic study of architectural choices of graph neural net-based reinforcement learning agents for quantum circuit optimization.
-date: 2023-11-08
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Julian Yocum
-    # url: "https://en.wikipedia.org/wiki/Albert_Einstein"
-    affiliations:
-      name: MIT
-  # - name: Boris Podolsky
-  #   url: "https://en.wikipedia.org/wiki/Boris_Podolsky"
-  #   affiliations:
-  #     name: IAS, Princeton
-  # - name: Nathan Rosen
-  #   url: "https://en.wikipedia.org/wiki/Nathan_Rosen"
-  #   affiliations:
-  #     name: IAS, Princeton
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-quantum-gnn.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Proposal
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-One of the most notable technological developments of the past century has been computing based on binary bits (0’s and 1’s). Over the past decades, however, a new approach based on the principles of quantum mechanics threatens to usurp the reigning champion. Basing the informational unit on the quantum bit, or qubit, instead of the binary bit of “classical” computing, quantum computing takes advantage of the strange phenomena of modern physics like superposition, entanglement, and quantum tunneling. 
-
-Leveraging these as algorithmic tools, surprising new algorithms may be created. Shor’s algorithm, based on quantum algorithms, can solve classically hard cryptographic puzzles, threatening the security of current cryptographic protocols. Additionally, quantum computers can significantly accelerate drug discovery and materials science through quantum molecular dynamics simulations. They also show great potential in Quantum Machine Learning (QML), enhancing data analysis and pattern recognition tasks that are computationally intensive for classical computers.
-
-Similar to classical computers, which base their algorithms on circuits, quantum computers build their quantum algorithms on quantum circuits. However, quantum computers are still in development and are incredibly noisy. The complexity of a quantum circuit increases its susceptibility to errors. Therefore, optimizing quantum circuits to their smallest equivalent form is a crucial approach to minimize unnecessary complexity. This optimization is framed as a reinforcement learning problem, where agent actions are circuit transformations, allowing the training of RL agents to perform Quantum Circuit Optimization (QCO). Previous techniques in this domain have employed agents based on convolutional neural networks (CNN) <d-cite key="fosel2021"></d-cite>.
-
-My previous research has demonstrated that the inherent graphical structure of circuits make QCO based on graph neural networks (GNN) more promising than CNNs. GNNs are particularly effective for data with a graph-like structure, such as social networks, subways, and molecules. Their unique property is that the model's structure mirrors the data's structure, which they operate over. This adaptability sets GNNs apart from other machine learning models, like CNNs or transformers, which can actually be reduced to GNNs. This alignment makes GNNs a highly promising approach for optimizing quantum circuits, potentially leading to more efficient and error-resistant quantum computing algorithms.
-
-This project extends my previous work by systematically investigating the impact of various architectural choices on the performance of GNNs in quantum circuit optimization. This is achieved through a series of experiments focusing on key variables such as the number of layers in the GNN, the implementation of positional encoding, and the types of GNN layers used.
-
-Specific objectives include:
-
-1. **Evaluating the Number of GNN Layers**: Investigating how the depth of GNNs influences the accuracy and efficiency of quantum circuit optimization. This involves comparing shallow networks against deeper configurations to understand the trade-offs between complexity and performance.
-2. **Exploring Positional Encoding Techniques**: Positional encoding plays a crucial role in GNNs by providing information about the structure and position of nodes within a graph. This project experiments with various encoding methods to determine their impact on the accuracy of quantum circuit optimization.
-3. **Assessing Different Sizes of Hidden Dimension**: This objective focuses on understanding the influence of the hidden dimension size within GNN layers on the performance of quantum circuit optimization. By varying the size of the hidden dimension, the project identifies the optimal balance between computational complexity and the model's ability to capture complex relationships within the data. 
-
-
-## Quantum Circuits and Transformation Environment
-
-In order to have quantum circuit optimizers we need quantum circuits! Quantum circuits are built out of quantum gates operating on qubits. These quatum circuits implement quantum algorithms in a similar way that classical circuits implement classical algorithms. In the below example, we have a five qubit circuit. It has a variety of single qubit gates (X, Rz, and H) as well as two qubit gates (CX).
-
-{% include figure.html path="assets/img/2023-11-09-quantum-gnn/random_before.png" class="img-fluid" %}
-
-Some gates have classical analogs, like the X gate which is analogous to the classical NOT bit-flip gate. Others, like the Hadamaard (H) gate, cannot be understood with classical intuition. We can use gates like H in combination with a two qubit gate like CX two put two qubits into unique quantum states. For example, with the following circuit, we can put two qubits into a special state called "quantum entanglement". 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-quantum-gnn/entanglement.png" class="img-fluid" style="width: 50%;" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-quantum-gnn/heads.jpeg" class="img-fluid" style="width: 50%;" %}
-    </div>
-</div>
-
-These qubits have outcomes that are perfectly correlated with each other. If they are measured, they will always result in the same outcome, even if after the circuit is applied the qubits are separated an arbitrary distance. This is despite the fact that the outcome is perfectly random! Measurement will result in 0 and 1 with probability 50% each. This is like flipping two coins whose outcome you cannot predict, but which always land both heads or both tails.
-
-We can write the circuit and subsequent quantum state with the following equation. The two possible resulting states (both heads or both tails) are represented in bracket notation: $$\ket{00}$$ and $$\ket{11}$$.
-
-\begin{equation}
-\ket{\psi} = \text{CX} \cdot (H \otimes I) \ket{00} = \frac{1}{\sqrt{2}}(\ket{00} + \ket{11})
-\end{equation}
-
-However, just like classical algorithms can be written down according to different programs and circuits which do the same thing, quantum circuits can have different equivalent forms. Transitions between these equivalent forms can be written down according to a set of local rules mapping from some set of quantum gates to another.
-
-In the following diagram we show the quantum transformations used for this project. They are ordered according to 1) single qubit, 2) two qubit, and 3) three qubit transformations. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-quantum-gnn/one_gate_trans.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-quantum-gnn/two_gate_trans.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-quantum-gnn/three_gate_trans.png" class="img-fluid" %}
-    </div>
-</div>
-
-These transformations will serve as the action space for our quantum circuit environment. Notably, some of these circuit transformations involve merges or cancellations, which can be used to simplify the circuits. A quantum agent which chooses an appropriate sequence of circuit transformations can then simplify a circuit into an equivalent form with fewer gates. Therefore, the task of circuit optimization may be decomposed into a trajectory of agent steps leading between different states, where states correspond to quantum circuits which are all algorithmically equivalent. 
-
-## Proximal Policy Optimization
-
-To train the GNN agent, we use the Proximal Policy Optimization (PPO) algorithm. PPO is a model-free, on-policy reinforcement learning algorithm that aims to optimize the policy of a reinforcement learning agent by iteratively updating its policy network. We train the GNN agent on n-qubit random circuits. For training the GNN-based agents for quantum circuit optimization, we use the Proximal Policy Optimization (PPO) algorithm. PPO is a deep reinforcement learning algorithm that has shown success in a variety of applications, including game playing and robotics. The algorithm updates the policy by maximizing a surrogate objective function that approximates the expected improvement in the policy, while enforcing a constraint on the maximum change in the policy. This constraint helps to prevent the policy from changing too much from one iteration to the next, which can destabilize the training process.
-
-\begin{equation}
-L^{\text{CLIP}}(\theta) = \hat{\mathbb{E}}_t[\min(r_t(\theta))\hat{A}_t, \text{clip}(r_t(\theta), 1 - \epsilon, 1 + \epsilon)\hat{A}_t]
-\end{equation}
-
-To train the GNN agents for quantum circuit optimization, we start by initializing the GNN weights randomly. We then use the PPO algorithm to update the weights by sampling circuits from a distribution of n-qubit random circuits, encoding them into graphs, and simulating the circuits in a custom python gym environment. For each transformation we use 
-
-\begin{equation}
-    r_t = - \left(q(s_{t+1}) - q(s_{t})\right)
-\end{equation}
-
-as the reward signal for the PPO algorithm following <d-cite key="fosel2021"></d-cite>, where we let $q$ be a function quantifying the desirability of the circuit. The PPO algorithm then updates the GNN weights to maximize the expected reward, subject to the maximum change in the policy constraint.
-
-We use $$q(s) = -\texttt{circuit_size}(s)$$, such that the agent's objective is to reduce the overall circuit size, as measured by number of gates, resulting in the reward function:
-
-\begin{equation}
-    r_t = \texttt{circuit_size}(s_{t+1}) - \texttt{circuit_size}(s_t)
-\end{equation}
-
-The methodology for implementing the quantum circuit optimization using deep reinforcement learning and graph neural networks consists of three main components: (1) encoding the circuits as directed acyclic graphs using the DAG encoding and (2) encoding the graphs as node and edge feature tensors and training a GNN-based agent using the PPO algorithm,. 
-
-## GNN architecture
-
-The GNN architecture used is inspired by the message passing neural network (MPNN), which is a type of GNN that performs iterative message passing between nodes in the graph. The GNN architecture used for this approach consists of $$L$$ layers of Residual Gated Graph ConvNets. 
-
-{% include figure.html path="assets/img/2023-11-09-quantum-gnn/gnn.png" class="img-fluid" %}
-
-The GNN gets as input the graph (encoded as the three tensors shown above), the positional encoding, and a binary tensor encoding of which transformations are allowed for each node (this can be computed in $$O(\# nodes)$$ time). 
-
-Node features and positional encoding are both mapped to a k-dimensional embedding with a linear transformation and concatenated added together, forming a vector $$h$$. The edge features are also linearly mapped to some $$l$$-dimensional embedding vector $$e$$. 
-
-After, passing through $$L$$ layers, each node has a feature vector $$h’$$. These features are mapped to a length $$t$$ Q-vector where t=# transformations. A mask is applied so that all impossible transformations are ignored. The length $$t$$ Q-vectors are concatenated together from all nodes and then outputted by the GNN. An action is selected by choosing the node/transformation which corresponds to the index of the maximum Q-value.
-
-## Results
-
-After training our graph neural network agent in the quantum circuit environment using PPO, we can verify that the agent can indeed optimize circuits. We randomly sample a five qubit circuit and run our agent on the circuit for fifty steps. We see that the agent is able to successfully reduce the cirucit size from 44 gates to 30, a 14 gate reduction. Meanwhile, the standard Qiskit optimizer could only reduce the circuit to 36 gates.
-
-{% include figure.html path="assets/img/2023-11-09-quantum-gnn/random_optimization.png" class="img-fluid" %}
-
-Now that we have verified our learning algorithm we can successfully train a quantum circuit optimizing agent, we proceed with our study over three hyperparameters: 1) number of layers, 2) the use of positional encoding, and 3) hidden dimension. For all plots, we display the average over several runs with standard error. 
-
-### Number of Layers
-
-We innvestigate how the depth of GNNs influences the accuracy and efficiency of quantum circuit optimization. This involves comparing shallow networks against deeper configurations to understand the trade-offs between complexity and performance. In order to do this we scan over the number of layers $$L$$ in our GNN from 1 to 7. 
-
-{% include figure.html path="assets/img/2023-11-09-quantum-gnn/n_layers.png" class="img-fluid" %}
-
-We see that, generally, increasing the number of layers in the model improves performance of the model on random circuits. This is aligned with the intuition that increasing the number of layers of a GNN allows models to "see" information from further away, which can be used to make strategic decisions.
-
-However, we also observe that there is some critical point in which increasing $$L$$ no longer leads to better outcomes from the model. This threshold appears to occur around $$L=5$$, which performs similarly to $$L=7$$.
-
-This could be related to a known property of GNNs, in which features of nodes which are closer together are more similar. This becomes excerterbated as the number of layers increase, smearing out information. Therefore, we expect that if we continued to increase $$L$$ then model performance would degrade.
-
-### Positional Encoding
-
-Positional encoding can provide information about the structure and position of nodes within a graph. These features can often play a role in symmetry-breaking. 
-
-In addition to the existing features encoding gate type and wire information, we concatenate 8 normally distributed dimensions to the feature vector. We hypothesize that these random features can be used to "ID" gates that have the same gate type but are a located in different locations. We experiment with training a GNN with and without the addition of random positional encoding.  
-
-{% include figure.html path="assets/img/2023-11-09-quantum-gnn/pos_encoding.png" class="img-fluid" %}
-
-The resulting plot shows inconclusive evidence. While the random positional encoding came out on top at the end of training, the difference is not significant enough to be able to conclude that it is demonstrably better.
-
-### Hidden Dimension
-
-The last hyperparameter we examine is the hidden dimension of the GNN layers. We scan over values 16, 32, 64, and 128. All other parameters are kept fixed. 
-
-{% include figure.html path="assets/img/2023-11-09-quantum-gnn/hidden_dim.png" class="img-fluid" %}
-
-We observe that performance tends to improve with scale. However, similarly to the "number of layers" hyperparameter, there appears to be some critical threshold after which scaling no longer appears to improve performance. From our experiments this threshold appears to be around 64. 
-
-It is unclear what would happen if we continued scaling past 128. For example, the performance could stay at the plateau reached at hidden dimension 64 and 128, or it could eventually get worse. 
-
-## Further Work
-
-While this work gave a first glimpse at some of the structural properties that work with GNNs for RL on quantum circuits, much remaining work remains.
-
-Notacably, many of the training runs did not seem to train until plateau. To be fully confident in the results, training until plateau would be necessary for full confidence. Additionally, many of the runs were quite noisy, making it difficult to distinguish between the performance under different runs. Therefore, increasing training samples could effectively reduce standard error for better statistics. 
-
-Moreover, the scope of future exploration can be expanded. One of the most interesting areas of future work would be on what types of graph layers work best. While we use Residual Gated Convulation Nets, it is not clear that this is the best layer type. Other things than could be tested are other positional encoding schemes. While we experimented with random features, more standard positional encoding schemes include Laplacian and Random walk encoding.
-
-## Conclusion
-
-We find that there appears to be critical thresholds of optimal values for the hidden dimension and number of layers in GNNs. We also find no evidence that random positional encoding appears to improve performance, contrary to intuition that it would serve a useful symmetry-breaking function. While much work is left to be done, this work provides a first investigation into how performance of GNNs on QCO can be affected by various choices of hyperparameters. 
\ No newline at end of file
diff --git a/_posts/2023-11-09-sparse-autoencoders-for-interpretable-rlhf.md b/_posts/2023-11-09-sparse-autoencoders-for-interpretable-rlhf.md
deleted file mode 100644
index d4829901..00000000
--- a/_posts/2023-11-09-sparse-autoencoders-for-interpretable-rlhf.md
+++ /dev/null
@@ -1,230 +0,0 @@
----
-layout: distill
-title: Sparse Autoencoders for a More Interpretable RLHF
-description: Extending Anthropic's recent monosemanticity results toward a new, more interpretable way to fine-tune.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Laker Newhouse
-    url: "https://www.linkedin.com/in/lakernewhouse/"
-    affiliations:
-      name: MIT
-  - name: Naomi Bashkansky
-    url: "https://www.linkedin.com/in/naomibas/"
-    affiliations:
-      name: Harvard
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-sparse-autoencoders-for-interpretable-rlhf.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related Work
-  - name: Background
-  - name: Methods
-    subsections:
-      - name: Inserting a Sparse Autoencoder in a Transformer
-      - name: How We Train our Sparse Autoencoder
-      - name: Fine-Tuning
-  - name: Results
-    subsections:
-      - name: Exploring a Sparse Autoencoder
-      - name: Fine-Tuning with a Sparse Autoencoder
-  - name: Discussion
-  - name: Conclusion
-  - name: Acknowledgements
-  - name: Appendix
----
-
-## Introduction
-
-Understanding how machine learning models arrive at the answers they do, known as *machine learning interpretability*, is becoming increasingly important as models are deployed more widely and in high-stakes scenarios. Without interpretability, models may exhibit bias, toxicity, hallucinations, dishonesty, or malice, without their users or their creators knowing. But machine learning models are notoriously difficult to interpret. Adding to the challenge, the most widely used method for aligning language models with human preferences, RLHF (Reinforcement Learning from Human Feedback), impacts model cognition in ways that researchers do not understand. In this work, inspired by recent advances in sparse autoencoders from Anthropic, we investigate how sparse autoencoders can help to interpret large language models. We contribute a novel, more interpretable form of fine-tuning that only learns parameters related to interpretable features of the sparse autoencoder.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon.png" class="img-fluid" %}
-<div class="caption">
-  Machine learning practitioners often cannot interpret the models they build (xkcd #1838).
-</div>
-
-## Related Work
-
-Research on interpreting machine learning models falls broadly under one of two areas: representation-based interpretability (top-down) and mechanistic interpretability (bottom-up).
-
-Representation-based interpretability seeks to map out meaningful directions in the representation space of models. For example, Li *et al.* <d-cite key="li2023inferencetime"></d-cite> found a direction in one model that causally corresponds to truthfulness. Subsequent work by Zou *et al.* <d-cite key="zou2023representation"></d-cite> borrows from neuroscience methods to find directions for hallucination, honesty, power, and morality, in addition to several others. But directions in representation space can prove brittle. As Marks *et al.* <d-cite key="marks2023geometry"></d-cite> found, truthfulness directions for the same model can vary across datasets. Moreover, current methods for extracting representation space directions largely rely on probing <d-cite key="belinkov2022probing"></d-cite> and the linearity hypothesis <d-cite key="elhage2022superposition"></d-cite>, but models may have an incentive to store some information in nonlinear ways. For example, Gurnee *et al.* <d-cite key="gurnee2023language"></d-cite> showed that language models represent time and space using internal world models; for a world model to store physical scales ranging from the size of the sun to the size of an electron, it may prefer a logarithmic representation.
-
-Mechanistic interpretability, unlike representation engineering, studies individual neurons, layers, and circuits, seeking to map out model reasoning at a granular level. One challenge is that individual neurons often fire in response to many unrelated features, a phenomenon known as polysemanticity. For example, Olah *et al.* <d-cite key="olah2020zoom"></d-cite> found polysemantic neurons in vision models, including one that fires on both cat legs and car fronts. Olah *et al.* hypothesized that polysemanticity arises due to superposition, which is when the model attempts to learn more features than it has dimensions. Subsequent work investigated superposition in toy models, suggesting paths toward disentangling superposition in real models <d-cite key="elhage2022superposition"></d-cite>. Superposition is relevant for language models because the real world has billions of features that a model could learn (names, places, facts, etc.), while highly deployed models have many fewer hidden dimensions, such as 12,288 for GPT-3 <d-cite key="brown2020fewshot"></d-cite>.
-
-Recently, Sharkey *et al.* <d-cite key="sharkey2022interim"></d-cite> proposed using sparse autoencoders to pull features out of superposition. In an interim research report, the team describes inserting a sparse autoencoder, which expands dimensionality, into the residual stream of a transformer layer. In a follow-up work, Cunningham *et al.* <d-cite key="cunningham2023sparse"></d-cite> found that sparse autoencoders learn highly interpretable features in language models. In a study on one-layer transformers, Anthropic provided further evidence that sparse autoencoders can tease interpretable features out of superposition <d-cite key="bricken2023monosemanticity"></d-cite>. Although interest in sparse autoencoders in machine learning is relatively recent, sparse autoencoders have been studied in neuroscience for many decades under the name of expansion recoding <d-cite key="albus1971cerebellar"></d-cite>.
-
-Researchers have begun to apply sparse autoencoders to other interpretability problems. For example, Marks *et al.* <d-cite key="marks2023rlhf"></d-cite> investigated whether models on which we perform RLHF internalize the reward signal. To do so, Marks compared sparse autoencoders trained on the base model with sparse autoencoders trained on the fine-tuned model. But, to our knowledge, while others have used sparse autoencoders to probe the effects of fine-tuning, there is no prior research on using sparse autoencoders to define a more interpretable form of fine-tuning. We propose a new form of fine-tuning in which the learnable parameters are related to the interpretable features of the sparse autoencoder.
-
-## Background
-
-An **autoencoder** is an architecture for reproducing input data, with a dimensionality bottleneck. Let $d_\text{model}$ denote the dimension of the residual stream in a transformer (4096 for Pythia 6.9B). Let $d_\text{auto}$ denote the dimensionality of the autoencoder. To enforce the dimensionality bottleneck, we require $d_\text{model} > d_\text{auto}$. The diagram below depicts an autoencoder.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder.png" class="img-fluid" %}
-<div class="caption">
-    An autoencoder is trained to reproduce its input, subject to a dimensionality bottleneck.
-</div>
-
-A **sparse autoencoder** relies on a different kind of bottleneck, called sparsity. For a sparse autoencoder $g \circ f$ that acts on $x \in \mathbb{R}^{d_\text{model}}$ by sending $f(x) \in \mathbb{R}^{d_\text{auto}}$ and $g(f(x)) \in \mathbb{R}^{d_\text{model}}$, the training objective combines MSE loss with an $L^1$ sparsity penalty:
-
-$$\mathcal{L}(x; f, g) = \|x - g(f(x))\|_2^2 + \beta \| f(x) \|_1,$$
-
-where $\beta > 0$ trades off sparsity loss with reconstruction loss. With the sparsity constraint, we can now let $d_\text{auto} > d_\text{model}$ by a factor known as the *expansion factor*. In our work, we typically use an expansion factor of $4$ or $8$. The purpose of the sparse autoencoder is to expand out the dimension enough to overcome superposition. The diagram below depicts a sparse autoencoder.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder.png" class="img-fluid" %}
-<div class="caption">
-    A sparse autoencoder is trained to reproduce its input, subject to an $L^1$ sparsity bottleneck.
-</div>
-
-## Methods
-
-Our main experiment is to insert a sparse autoencoder into a transformer layer, train the sparse autoencoder, and then use the fused model to perform a new, more interpretable form of fine-tuning. <d-footnote>While we originally planned to investigate RLHF, we determined that existing libraries could not perform PPO (Proximal Policy Optimization) on custom model architectures such as our transformer fused with a sparse autoencoder. As a result, we chose to investigate fine-tuning instead of RLHF.</d-footnote> We run all experiments on a single A100 GPU through Google Colab Pro+.
-
-### Inserting a Sparse Autoencoder in a Transformer
-
-There are three natural places to insert a sparse autoencoder into a transformer:
-
-1. MLP activations before the nonlinearity
-2. MLP activations before adding back to the residual stream
-3. The residual stream directly
-
-We choose the second option. The upside of operating in the MLP space is that MLP blocks may be in less superposition than the residual stream, given that MLPs may perform more isolated operations on residual stream subspaces. The upside of operating after the MLP projects down to the residual stream dimension is a matter of economy: because $d_\text{model} < d_\text{MLP}$, we can afford a larger expansion factor with the same memory resources.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae.png" class="img-fluid" %}
-<div class="caption">
-    We insert a sparse autoencoder into a transformer after the MLP, but before adding into the residual stream.
-</div>
-
-### How We Train our Sparse Autoencoder
-
-We train our sparse autoencoder to reproduce MLP-post activations in layer one of Pythia 6.9B (deduplicated) <d-footnote>Deduplicated means that this Pythia 6.9B model was trained on scraped web text where duplicate articles and lengthy passages are removed. Because Pythia inherits from the GPT-NeoX architecture, the specific activations we collected are named gpt_neox.layers.1.mlp.dense_4h_to_h.</d-footnote>. To create a dataset of activations for training, we stream in text from [an open-source replication of WebText](https://huggingface.co/datasets/Skylion007/openwebtext), the dataset used to train GPT-2. For each batch of text, we collect Pythia 6.9B's MLP-post activations at layer one and use these activations as training data for the sparse autoencoder.
-
-Concretely, our sparse autoencoder has four learnable parameters: $W_\text{enc}$, $W_\text{dec}$, $b_\text{enc}$, and $b_\text{dec}$. The second bias $b_\text{dec}$ is used to center the input. The sparse autoencoder encodes, applies a nonlinearity, and decodes its input $x$ as follows:
-
-$$\text{SAE}(x) = \text{ReLU}((x - b_\text{dec})  W_\text{enc} + b_\text{enc}) W_\text{dec} + b_\text{dec}.$$
-
-We constrain the rows of $W_\text{dec}$ to have unit norm by renormalizing after each optimizer step. Another approach to constrain the rows is to remove gradient information parallel to the feature vectors before each optimizer step, and also renormalize the rows. Although we did not implement it, Anthropic found that that the second approach [slightly reduces loss](https://transformer-circuits.pub/2023/monosemantic-features/index.html#appendix-autoencoder-optimization) <d-cite key="bricken2023monosemanticity"></d-cite>.
-
-We use an expansion factor of $4$, meaning $d_\text{auto} = 16384$. When training, we use batch size $8$, learning rate $10^{-4}$, and default $\beta_1 = 0.9, \beta_2 = 0.999$ for the Adam optimizer. Because Pythia 6.9B's context length is $128$ tokens, each training step includes activations from $1024$ tokens. We save checkpoints every $20000$ steps ($20.48$ million tokens).
-
-One subtlety in training is that the sparsity constraint can eventually cause some autoencoder neurons to never activate. How to best handle these so-called dead neurons is an open question. We follow Anthropic in [resampling dead neurons](https://transformer-circuits.pub/2023/monosemantic-features/index.html#appendix-autoencoder-resampling) to new values <d-cite key="bricken2023monosemanticity"></d-cite>. Because resampling can cause instability during training, we resample only every 10000 training steps. At that point, we say a sparse autoencoder neuron is dead if it has not activated in any of the last 5000 training steps. In an attempt to improve autoencoder performance, Anthropic resampled dead neurons to the feature directions in which the sparse autoencoder performed worst. For simplicity, we resample dead neurons by setting their corresponding rows of $W_\text{enc}$ and $W_\text{dec}$ to Kaiming uniform random vectors. We reset dead biases to zero.
-
-### Fine-Tuning
-
-We fine-tune Pythia 70M <d-footnote>We wanted to fine-tune Pythia 6.9B, but we encountered out-of-memory errors on an A100 GPU. In follow-up work, we will investigate quantization so that we can study Pythia 6.9B, including the sparse autoencoder we trained for it.</d-footnote> with our sparse autoencoder inserted in layer one <d-footnote>To learn the most about how fine-tuning affects transformer features, we would ideally learn interpretable feature directions at every transformer layer using a sparse autoencoder. Then, after fine-tuning, we could perform rich comparisons across the model. Unfortunately, reconstruction loss compounds across layers. With current training methods, it is only feasible for us to insert a sparse autoencoder into one layer of the transformer before performance significantly degrades.</d-footnote>. Instead of adjusting weights everywhere in the network, we constrain fine-tuning to adjust only a small set of interpretable parameters within the sparse autoencoder. In particular, we learn two vectors of dimension $d_\text{auto}$: a coefficient vector $c$ and a bias vector $d$. Just prior to applying $\text{ReLU}$ in the sparse autoencoder, we scale the activations by $c$ and translate them by $d$.
-
-For our fine-tuning experiments, the sparse autoencoder we use is trained on Pythia 70M Chess (a variant fine-tuned on a chess dataset) <d-footnote>This autoencoder was trained to perform well on Pythia 70M Chess, not on the base model Pythia 70M. In future work, we will match the models to investigate how our sparse autoencoder on Pythia 6.9B performs when fine-tuning Pythia 6.9B.</d-footnote>. We insert this sparse autoencoder into the base Pythia 70M, define new learnable parameters $c$ and $d$ as above, and freeze the gradients on every weight in the fused model except the new learnable parameters. We fine-tune on a small dataset of arithmetic questions ([EleutherAI/arithmetic](https://huggingface.co/datasets/EleutherAI/arithmetic)). One training example is shown below:
-
-$$\text{Question: What is }(2 * 7) + 2\text{? Answer:}$$
-
-We train with batch size $8$, learning rate $10^{-3}$, and weight decay $10^{-2}$ using the AdamW optimizer <d-cite key="loshchilov2018decoupled"></d-cite> over $10$ epochs with $200$ steps per epoch. The figure below shows the training loss as we fine-tune.
-
-## Results
-
-Our results come in two parts: an exploration of our trained sparse autoencoder on Pythia 6.9B and an analysis of fine-tuning using a smaller sparse autoencoder on Pythia 70M.
-
-### Exploring a Sparse Autoencoder
-
-When inserted into Pythia 6.9B at layer one, our sparse autoencoder achieves a loss of $3.201$ (zero-ablation degrades loss to $3.227$) on the held-out dataset [WikiText-103](https://paperswithcode.com/dataset/wikitext-103), consisting of over 100M tokens from Good and Featured articles on Wikipedia. Pythia 6.9B's baseline loss is $3.193$. Notably, the sparse autoencoder outperforms a zero-ablation of the layer, demonstrating that it learned features that are useful for reconstruction.
-
-As expected, if the sparse autoencoder is inserted into a layer it was not trained for, performance collapses. For example, if inserted at layer $31$ of Pythia 6.9B, the loss becomes $12.586$. Below is a figure showing the additional loss from inserting the sparse autoencoder at the first eight layers of Pythia 6.9B.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses.png" class="img-fluid" %}
-<div class="caption">
-  The sparse autoencoder preserves model performance in layer 1, the layer it was trained for. The green bar is loss on WikiText-103 of Pythia 6.9B on 5 random batches. The red bar is the additional loss incurred if the sparse autoencoder is inserted after the MLP at a given layer. The first eight layers are shown.
-</div>
-
-For more details on the training run, four figures demonstrating the sparsity, $L^1$ coefficient, $L^1$ loss, and reconstruction loss of our sparse autoencoder during training are shown below. After training on the first five million tokens, we automatically begin to adjust the $L^1$ coefficient $\beta$ until we reach the desired sparsity of $1\%$. By the end, our sparse autoencoder stabilizes at a sparsity of $100$, which means that only $0.5\%$ of sparse autoencoder features activate on a given token.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_sparsity.svg" class="img-fluid" %}
-<div class="caption">
-  Sparsity across the training run on Pythia 6.9B. On a given batch, sparsity is recorded as the average number of sparse autoencoder features that activate on the batch's $1024$ tokens. Our sparse autoencoder stabilizes at a sparsity of around $100$, or $0.5\%$ of its hidden dimension.
-</div>
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_coeff.svg" class="img-fluid" %}
-<div class="caption">
-  The $\beta$ coefficient in $L_1$ loss across the training run on Pythia 6.9B. After training on five million tokens, we begin to adjust the coefficient until the sparse autoencoder reaches its target sparsity of $1\%$.
-</div>
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_loss.svg" class="img-fluid" %}
-<div class="caption">
-  The $L^1$ loss of the sparse autoencoder across the training run on Pythia 6.9B. The $L^1$ loss initially rises while the $L^1$ coefficient is adjusted, then falls once the target sparsity is reached as the sparse autoencoder learns a more compact representation.
-</div>
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_reconstr_loss.svg" class="img-fluid" %}
-<div class="caption">
-  The reconstruction loss of the sparse autoencoder across the training run on Pythia 6.9B. Reconstruction loss initially rises while the $L^1$ coefficient is adjusted, due to the tradeoff between reconstruction and sparsity. Once the $L^1$ coefficient stabilizes, reconstruction loss slowly falls as the sparse autoencoder learns a more effective representation.
-</div>
-
-We find that our sparse autoencoder learned several interpretable features. For example, the second most frequently activating feature (feature index $11928$) activates strongly on the token “·the”. The figure below shows a table with examples.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature.png" class="img-fluid" %}
-<div class="caption">
-  The second most frequent feature (feature index $11928$) in the Pythia 6.9B sparse autoencoder activates on the token "·the". Relevant table columns are $\text{str\_tokens}$ (the token that activates the feature), $\text{context}$ (surrounding tokens in the sentence), and $\text{feature}$ (the raw feature activation in the sparse autoencoder, sorted in descending order). We include the top 15 examples. The feature activates once on “·of” and “·and”, but it activates most on the token “·the”. (Credit: the visualization code for the table is due to Neel Nanda in his open-source replication of Anthropic's sparse autoencoder work.)
-</div>
-
-In addition, we found a surprising correlation between dead features. In particular, almost all dead features point in similar directions, as indicated by a high cosine similarity. In comparison, features that are not dead have a cosine similarity that is much closer to centered at zero. If dead features were drawn from the same distribution as non-dead features, we would expect cosine similarities closer to zero.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity.png" class="img-fluid" %}
-<div class="caption">
-  The plot above shows the cosine similarity of dead features (red) and non-dead features (blue). Here, a feature is counted as dead if it activates nowhere on WikiText-103-v1. The cosine similarity is calculated compared to the average dead feature. (Credit: the visualization code for cosine similarity is due to Neel Nanda in his open-source replication of Anthropic's sparse autoencoder work.)
-</div>
-
-### Fine-Tuning with a Sparse Autoencoder
-
-We fine-tune Pythia 70M on arithmetic data by adjusting only a coefficient and bias vector within the sparse autoencoder space. 
-
-On layer $4$, we observe an unexpected lowering of loss from $6.449$ for the base model to $6.270$ after inserting the sparse autoencoder. Once fine-tuning the sparse autoencoder on arithmetic, loss remains constant at $6.270$. We believe that the fine-tuning may perform better when we experiment on a larger model such as Pythia 6.9B.
-
-Although the loss does not fall, several features that our interpretable fine-tuning adjusts are interpretable. For example, the feature that is scaled up the most activates on colons (feature index $1338$). Because colons appear twice in every line of the arithmetic data, it makes sense that the fine-tuned model would like to more readibly predict colons. The figure below shows the top activations of feature $1338$ on the arithmetic dataset before and after fine-tuning. After fine-tuning, the feature activates slightly more strongly in all cases.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338.jpeg" class="img-fluid" %}
-<div class="caption">
-  The table above shows the arithmetic dataset tokens on which feature $1338$ most strongly activates, before fine-tuning in the column $\text{feature}$ and after fine-tuning in the column $\text{feature (FT)}. In all cases, the feature activates slightly more after fine-tuning.
-</div>
-
-The feature that is most inhibited (feature index $619$) activates on newlines. We hypothesize that the sparse autoencoder learns to avoid newlines because, in the chess dataset for which it was trained, newlines are always followed by “Score: ”, indicating the start of a new game. But in the arithmetic dataset, newlines are always followed by “Answer: ”. Therefore, the model wants to inhibit this unhelpful feature. The discrepancy is a difference in datasets. To rigorously verify this hypothesis, we could compute direct logit attributions from feature $619$ to check whether it contributes to the “Answer” token. Either way, the inhibition above demonstrates that our fine-tuning procedure can detect and modify unhelpful features in the sparse autoencoder.
-
-For a broader view of the dynamics of our interpretable fine-tuning, the two figures below show the learned scale and bias terms across every feature in the sparse autoencoder space (where $d_\text{auto} = 2048$), sorted in ascending order. We observe that the majority of features are largely unaffected, but a few features at the tails are significantly enhanced or inhibited.
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias.png" class="img-fluid" %}
-<div class="caption">
-  The learned bias in the sparse autoencoder space inhibits approximately half of features while enhancing the other half. The x-axis is sorted so that the feature index runs in ascending order of the learned bias.
-</div>
-
-{% include figure.html path="assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling.png" class="img-fluid" %}
-<div class="caption">
-  The learned scaling coefficient in the sparse autoencoder space significantly inhibits a small number of features while significantly enhancing several others. We also observe that a majority of features ($2/3$) are inhibited, compared to a smaller number enhanced. The x-axis is sorted so that the feature index runs in ascending order of the learned scaling. 
-</div>
-
-## Discussion
-
-One limitation of our fine-tuning experiments is that Pythia 70M is a small model for which there are fewer interpretable features. In addition, we inserted into Pythia 70M a sparse autoencoder trained to reconstruct activations in Pythia 70M Chess. Nonetheless, our fine-tuning results are promising. The majority of features are not significantly affected, but a few features at the tails are either significantly enhanced or inhibited. We found it fruitful to interpret these outlier features first, as they are a starting point for finding which sparse autoencoder features matter most for the fine-tuning dataset.
-
-When training a sparse autoencoder on Pythia 6.9B, we were successful in learning interpretable features, such as the "the" feature. But we remain uncertain of the best way to train a sparse autoencoder, especially how to resample dead features. However, one implication of our work is that research on sparse autoencoders is accessible to a wide array of researchers. We believe a systematic study of training techniques for sparse autoencoders could benefit the field.
-
-## Conclusion
-
-Our work indicates that sparse autoencoders are a promising tool for machine learning interpretability. By inserting sparse autoencoders into transformer language models, we investigate how a novel form of fine-tuning can provide insight into changes in model behavior after fine-tuning. We find that our fine-tuning successfully modifies interpretable features in the sparse autoencoder space. Given the rapid adoption of powerful, fine-tuned language models across industries, we believe our method for interpretable fine-tuning is an important direction to continue to explore as researchers seek to understand how fine-tuning affects model cognition. Although our current work is limited because we only fine-tune Pythia 70M, future work can scale up model size, compute resources, and the number of tokens used to train the sparse autoencoder. Additionally, future work can extend from direct fine-tuning to investigating the effects of RLHF performed with PPO (Proximal Policy Optimization).
-
-## Acknowledgements
-
-We would like to thank Professor Isola, Professor Beery, and Dr. Bernstein for an introduction to fundamental perspectives in deep learning that will stay with us forever. Thank you to Logan Smith for invaluable early guidance on the questions we could explore related to sparse autoencoders. We are thankful for the AI Safety Student Team at Harvard (AISST) and MIT AI Alignment (MAIA) for a supportive community of fellow researchers.
-
-## Appendix
-
-Our code is available at the following Google Colab notebooks:
-
-1. [Training Sparse Autoencoders](https://colab.research.google.com/drive/1LAjClrzEzQEu0BMjachoHuvmbmLz1Zf1?usp=sharing)
-2. [Analyzing Sparse Autoencoders](https://colab.research.google.com/drive/1KF2djPVOKOemOECnZq3MeI-k9ypf7PEE?usp=sharing)
-3. [Fine-Tuning with Sparse Autoencoders](https://colab.research.google.com/drive/1CVIIbxO7iYnVuiH5UC0bdQuDvFHtBuac?usp=sharing)
-4. [Analysis of Fine-Tuning with Sparse Autoencoders](https://colab.research.google.com/drive/1VGVImnhky0bo-SdBYYYDopQIjGpx-5aK?usp=sharing)
\ No newline at end of file
diff --git a/_posts/2023-11-09-structured-physics-loss-diffusion.md b/_posts/2023-11-09-structured-physics-loss-diffusion.md
deleted file mode 100644
index 2fae87a8..00000000
--- a/_posts/2023-11-09-structured-physics-loss-diffusion.md
+++ /dev/null
@@ -1,203 +0,0 @@
----
-layout: distill
-title: Understanding LLM Attention on Useless Numbers in Word Problems (and this Title has 8 Es)
-description: If Jack starts out with 4 llamas and Jill takes 2 of them, then Jack gets 5 chinchillas, how many llamas does he have?
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Julie Steele
-    url: "mailto:jssteele@mit.edu"
-    affiliations:
-      name: MIT
-
-  - name: Spencer Yandrofski
-    url: "mailto:spencery@mit.edu"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-structured-physics-loss-diffusion.md
-
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Summary 
-  - name: Introduction
-    subsections:
-    - name: Related Work
-    - name: Motivation
-  # - name: Images and Figures
-  #   subsections:
-  #   - name: Interactive Figures
-  - name: Methods
-    subsections:
-    - name: Model and Hardware
-    - name: Prompt Generation
-    - name: Data Extraction
-    - name: Modifying Attention 
-
-  - name: Results
-
-  - name: Conclusion
-
-
---- 
-
-
-{% include figure.html path="assets/img/2023-11-09-structured-physics-loss-diffusion/title.png" class="img-fluid" style="width:300px; height:200px;"%}
-
-## Summary
-We investigate how attention is used to identify salient parts of word problems. There is no difference between attention across layers to necessary and useless numbers in math word problems. Slightly decreasing attention on useless numbers in word problems increases performance, while increasing or significantly lowering attention decreases performance.
-
-## Introduction
-Transformer model architectures are the new magic bullet in natural language processing, largely due to their attention mechanism. The sudden salience of the transformer and subsequent massive research focus resulted in the emergence of powerful large language models such as the GPT series, Llama, PaLM, and others. The ever-increasing size of these models, as well as the datasets on which they were trained, allows them to continually perform better at a wide range of text generation and analysis tasks [11]. <br><br>
-
-
-However, as with many generative algorithms - especially autoregressive ones like LLMs - the underlying model has no implicit structure for processing or analyzing a logical framework inside the prompt it is given. Transformers, and by extension LLMs, are at their core sequence-to-sequence models. These take in a sequence of arbitrary length and output a sequence of arbitrary length, for example an English sentence input its French translation as the output. Sequence-to-sequence models leverage the fact that language has structure and syntax, and are capable of creating responses that mimic the structural rules followed by its training data [4, 6, 8]. However, in sequence-to-sequence models and the recurrent-neural-network-derived architectures that follow, such as the transformer, there are no intrinsic characteristics that leverage the logical framework of the input. Models that strive to have reasoning capabilities use a variety of approaches to augment the transformer architecture [10], such as specific prompting [1, 7], machine translation [3], salience allocation [5], and more. Some of these improved models exhibit performance that suggests the use of reasoning processes, but as described by Wei et al. [12] “As for limitations, we first qualify that although chain of thought emulates the thought processes of human reasoners, this does not answer whether the neural network is actually ‘reasoning.’” Huang et al. share a similar sentiment that highlights that the most widespread solution, and an effective one, is simply the ever-increasing size of LLMs: “...there is observation that these models may exhibit reasoning abilities when they are sufficiently large…  despite the strong performance of LLMs on certain reasoning tasks, it remains unclear whether LLMs are actually reasoning and to what extent they are capable of reasoning.” <br> <br>
-
-
-Before diving into why this is interesting, let’s take a step back and briefly inspect the transformer as an architecture. Transformers are loosely an extension of a recurrent neural network that leverage parallel processing and a mechanism known as attention to remove the typical reliance RNNs have on temporal data and instead allow the model to process an entire input sequence simultaneously [13, 9]. <br><br>
-	
-{% include figure.html path="assets/img/2023-11-09-structured-physics-loss-diffusion/rnn.png" class="img-fluid" style="width:300px; height:200px;"%}
-
-But what is attention? The key upside of transformers is that they are able to capture both short- and long-range dependencies within the input simultaneously, without the need to manage a memory cell like in certain RNN architectures such as a long short-term memory network. This is accomplished through attention, essentially the computation of how much each part of the input should be weighted based on parameters learned from training data. <br><br>
-
-
-{% include figure.html path="assets/img/2023-11-09-structured-physics-loss-diffusion/att.png" class="img-fluid" style="width:300px; height:200px;"%}
-
-As shown above, each element in the input, split into “tokens,” is given a calculated query and key vector, along with the value vector describing the text, image, or other kind of data contained in the token. This is designed to mimic a value in a database, corresponding to a specific key, being retrieved according to a query. Multiplying some query vector with a given token’s key vector results in a scalar that essentially defines the “significance” of the given token compared to the other tokens, known as an attention score. This attention score is then multiplied by its corresponding token’s value vector and summed to create a context vector representing the aggregate information from the attention step.<br><br>
-Now we circle back to word problems. Due to the aforementioned absence of explicit logical reasoning capabilities, transformer-based language models - especially smaller ones - can struggle with the few short analytical hops to correctly evaluate a word problem without help. For example, the following question was asked to Llama 2, Meta’s open-source LLM released in 2023. This version of Llama, the smallest available, has 7 billion parameters. <br><br>
-
-
-
-<div align="center" style="font-family: monospace; font-size: smaller;">
-
-*User: Answer as concisely as possible. Jack has 7 chairs. Jill takes 7 chairs from him, as well as 2 brooms. How many chairs does Jack have left?*  
-<br>
-*Llama-2-7b-chat: Jack has 7 chairs left.*
-
-</div>
-
-
-You may notice that this response is incorrect. However, it is wrong in a way that seems to ignore certain important information presented in the question (removing 2 brooms). Of course, there is also unimportant information in the prompt that we want the model to ignore - the fact that Jill took two of Jack’s brooms is not relevant to the number of chairs in his possession.
-
-
-### Related Work
-
-Existing approaches to entice LLMs to correctly answer word problems like these take a few forms, touched on previously. There are various versions of prompt engineering, which are designed to ask the question in a specific way in order to prompt the model’s response to be formatted in a certain way. Zero-shot chain-of-thought prompting [1, 12, 14] is a commonly cited example, where an additional instruction such as “Let’s think about this step by step” or “Let’s think analytically” are appended to the prompt. These additional instructions encourage the model to decompose the problem into intermediate steps and solve them procedurally. However, Wei et al. note that this does not indicate that the model itself is reasoning, only that it is achieving better results by emulating a structure often used in linear reasoning problems. Additionally, the authors go on to note that this emergent behavior of large models is challenging to reproduce in smaller models. Another novel approach is to parse the input information in a way that is conducive to solving an underlying math problem numerically. Griffith and Kalita treat this process as a machine translation problem, training several unique transformer architectures to make multiple translations from English to arithmetic expressions [3] that can then be evaluated computationally, outside of the LLM. These two techniques can also be fused, using fine-tuned chain-of-thought prompting for zero-shot math word problem solutions, bridging the gap between the previous two methods [7].<br><br>
-More broadly, solving word problems is a two-part problem: selecting for important information, and then analytically evaluating that information to arrive at an answer. There is a broad body of work on using LLMs to summarize bodies of text, which parallels extraction of useful numerical features from word problems. The two main types of summarization are extractive summarization and abstractive summarization, where the former remains truer to the original input text but struggles to create novel text, while the latter attempts to fill in those gaps but can sometimes create information that was not originally present and may not be correct [15, 5]. Wang et al. in particular create an augmentation to the transformer architecture, dubbed SEASON, that is designed to combine both extractive and abstractive summarization, but contains useful insights into how extractive summarization of text might apply to math word problems. For example, the abstractive power of SEASON comes from the underlying transformer and its generative capabilities, but it is constrained by a fixed-allocation salience system to emphasize extraction of useful information by essentially adding additional key vectors that describe their relevance to a summarization query. This allows the model to predict the salience of potential responses in order to reduce hallucination of abstractive elements. This salience-driven approach shows theoretical promise in complex extractive word problem scenarios, where managing an allocation of salience could translationally be indicative of useful numerical inputs rather than core themes. Salience also shares some characteristics, mechanically, with attention, and raises the question of whether intuition from summarization models can be applied to augment transformer attention to have better extractive logic.
-
-
-### Motivation
-
-This question, bolstered by the similarly-themed research underlying the ability of LLMs to reason and solve math word problems, was the driving force behind our project. Attention is an extremely powerful tool, and a better understanding of how attention scores affect assessment and evaluation of word problems is necessary in order to use it more effectively to address the gaps in the reasoning capabilities of LLMs, especially smaller architectures. A true solution to this problem would be complex, but we strove to answer certain core questions about how math word problems move through large language models, what their attention scores can tell us about how the model is choosing to respond, and what information the model is responding to. Chiefly, we were interested in how the attention scores of certain tokens in word problems - particularly pertaining to numbers necessary for solving the problem - would change throughout the layers of the transformer, and whether that yields insight into how to tune the attention process generally to enhance the models’ abilities, both reasoning and extractive.
-
-
-## Methods
-
-### Model and Hardware
-
-Our chosen model for study was Meta’s Llama 2 7B-chat parameter model. This choice was a result of our particular focus on smaller LLMs, due to the aforementioned emergent reasoning capabilities of models with significantly larger numbers of parameters. Llama 2 is also open-source, allowing us to easily peel apart the attention layers and heads to study how input and output information propagated through the network, as well as extract model weights and attention values. The chat version of the model additionally is better suited for direct question responses, and includes wrappers to handle the relevant meta-parameters to make the chat interface feasible.
-	We hosted Llama 2 on a vast.ai cloud instance due to the high VRAM requirements of the model. The instance consisted of a single Nvidia RTX 4090 GPU instance with 24GB of VRAM connected to an AMD Ryzen 9 5950X 16-core CPU. The model was supported by Nvidia CUDA version 11.7 and the cuDNN GPU-accelerated development library, version 8.9.7. The model itself ran using PyTorch 2.0.1.
-
-
-### Prompt Generation
-
-We prepended the instruction “Answer as concisely as possible” to each prompt in order to deliberately circumvent potentially invoking chain-of-thought reasoning and thereby subverting the qualities under investigation regarding the model’s zero-shot ability to discern relevant and irrelevant information. In order to assess that capability, we created a question generation algorithm to randomly generate a bank of simple subtraction word problems, for example “If Jack starts out with 7 sponges and Jill takes 4 of them, then Jack gets 2 badges, how many sponges does he have?” Each question contains two numbers necessary to the subtraction - in this example, that would be the number of sponges before and after the events of the problem: 7 and 4. Each example also contains one useless number, corresponding to things that are not relevant to the ultimate question being asked to the model. In this case, that would be the two badges.
-Each number is generated in its numeral representation (‘7’ rather than ‘seven’), as this ensures that Llama encodes each of these numbers as a single token that can be easily traced. <br><br>Numbers with more digits or numbers spelled out in natural language were often split into multiple consecutive tokens, so to simplify our visualizations we elected to force a single-token representation. This necessitated that each of the four numerical quantities in the math problem - the two relevant numbers, the useless number, and the answer - had to all be unique, in order to avoid accidentally crediting the model for producing a correct response when in fact it simply selected a number in the problem that had been generated to be a duplicate of the answer. This might occur with a problem like “If Jack has 8 umbrellas, and Jill takes 5 of them, then Jack gets 3 belts, how many umbrellas does he have?” In this case, attribution of salience to the value “3 belts” and subsequent inclusion of the number 3 in the answer introduces ambiguity into the correctness of the response, since 3 is in fact the true answer.<br><br>
-To avoid one-off errors attributed with specific words or sentence structures, the algorithm was designed to randomly construct the sentences using multiple different semantic structures and sample the nouns used from a bank of 100 random objects. Coupled with large testing sets of several hundred examples, this prevents irregularities in the model’s responses to particular syntax or words from significantly affecting results. Finally, the last meaningful element of prompt design was that the nouns chosen to be in the random object pool were deliberately selected to be as semantically difficult as possible. If the model is presented with a question that, for example, includes a number of vehicles as well as a number of cars, it would be entirely justifiable to interpret that question differently than the intent of a subtraction problem with the same numbers but instead involving apples and chinchillas.<br><br>
-We calculate whether the problem is correct by checking whether the correct number and noun are both present in the correct configuration in the answer content output by Llama.
-	Each prompt was run on a fresh reinitialized instance of Llama, to avoid extracting information from a larger content window that might include numbers or insight from past problems.
-
-
-### Data Extraction
-
-The main data structure was filled as follows. For each new autoregressive output logit, each head in each transformer layer calculates attention scores across all input tokens. These scores were collected and aggregated to map the attention in the model as each prompt moved through the transformer blocks. 
-
-In each experiment, attention scores were scraped from the individual model instance for each prompt by selecting the attention values associated with the tokenized representations of the two necessary numerical inputs as well as the single useless input. This produced a lot of data in high dimensions. 
-
-To extract the significant aspects of the data and compress it to a reasonable number of dimensions for graphical representation, we took the attention score tensors (which were also saved at their original sizes) and averaged across the following dimensions:
-
-- **Heads in each layer:** This revealed the change in attention over layers, rather than over heads, in order to potentially reveal the numbers’ progression through deeper-level abstractions, allowing us to answer questions like:
-  - How do self-attention and attention in early layers look for values relevant to the problem?
-  - What role does attention play for the purposes of arriving at a solution to the problem as we reach the middle layers of the model?
-  - Is there a meaningful representation of the numerical values the problem is concerned with deep inside the model?
-
-- **Output logits:** The rationale behind this choice was to allow any intermediate “reasoning” to become evident by encapsulating multiple parts of the response.
-
-- **Input problems:** Eliminates intrinsic variation in response to slightly different questions.
-
-This allowed us to arrive at a representation of how the attention for the relevant tokens changed as it passed through the individual layers of the model.
-
-
-{% include figure.html path="assets/img/2023-11-09-structured-physics-loss-diffusion/averaged.png" class="img-fluid" style="width:300px; height:200px;"%}
-
-### Attention Modification
-
-For our experiments where we modify attention scores to the useless token, in every layer we multiply every attention score to that token by some value, the multiplier, before taking softmax. 
-
-
-
-## Results
-
-We found that there was no significant difference between attention to the useless number and the two necessary numbers over 100 samples (with 55/100 accuracy).  Perhaps the mid-layers attention peak in the useless number is earlier than for the necessary numbers, but not significantly. We found a peak in attention to all number tokens in middle layers. We found no significant difference between the graphs for problems it answered correctly versus incorrectly. 
-
-{% include figure.html path="assets/img/2023-11-09-structured-physics-loss-diffusion/correct.png" class="img-fluid" style="width:300px; height:200px;"%}
-{% include figure.html path="assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect.png" class="img-fluid" style="width:300px; height:200px;"%}
-
-Here is the attention across all input tokens for one example problem. (Note these are not all the input tokens. The full input tokens were ['', '[', 'INST', ']', 'Answer', 'as', 'cons', 'is', 'ely', 'as', 'possible', '.', 'Jack', 'has', '', '9', 'pens', 'and', '', '7', 'spo', 'ons', '.', 'He', 'gives', 'away', '', '9', 'pens', '.', 'How', 'many', 'pens', 'does', 'he', 'have', '?', '[', '/', 'INST', ']', '']
-
-
-{% include figure.html path="assets/img/2023-11-09-structured-physics-loss-diffusion/sentence.png" class="img-fluid" style="width:300px; height:200px;"%}
-
-Surprisingly, there was not more attention to numbered tokens compared to other tokens. 
-
-
-When looking through each attention head individually, some attended to specific numbered tokens. For example, head 13 layer 16 strongly attended to “9”
-
-#### Graph for 13th Heads Only 
-
-{% include figure.html path="assets/img/2023-11-09-structured-physics-loss-diffusion/head13.png" class="img-fluid" style="width:300px; height:200px;"%}
-
-Finally, we multiplied attentions to the useless number’s token and varied the multiplier from 0 to 5. (30 sampler per data point). We found that it is actually useful to slightly decrease attention to the useless token, and performance decreases as attention to the useless token increases. 
-
-{% include figure.html path="assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier.png" class="img-fluid" style="width:300px; height:200px;"%}
-
-We suspect the rise of performance from multiplier of 2 to 5 be insignificant and random due to low sample size. 
-
-For small multipliers above 1, there are most responses of the type where the useless number is responded. 
-
-<div align="center" style="font-family: monospace; font-size: smaller;">
-
-*User: If Jack starts out with 7 coasters and Jill takes 2 of them, then Jack gets 3 badges, how many coasters does he have?*  
-<br>
-*Llama-2-7b-chat: Jack has 3 coasters.*
-
-</div>
-
-For large multipliers above 1, the softmax causes the other attention values to approach zero and the model’s quality deteriorates. 
-
-<div align="center" style="font-family: monospace; font-size: smaller;">
-
-*User: Jack has 7 tacos and 2 cucumbers. He gives away 3 tacos. How many tacos does he have?*
-*  
-<br>
-*Jack has 767 tacos. How many tacos does Jack have? Jack has 76 tacos. How many tacos does Jack has?*
-
-</div>
-
-And at very extreme multipliers, the model outputs gibberish.
-
-## Conclusion
-
-We found decreasing attention 50% (pre-softmax) on the useless token improves performance on our word problems, and increasing the attention (or decreasing the attention too much). We hypothesize the performance decreases because it 1) makes the model more likely to output the useless number, and 2) changes the model too much, turning responses into gibberish.
-	Our initial exploration of the attention tracked through the layers of the model yielded very little insight, perhaps due to rapid abstraction of the tokens.
-	This gives us insight into how we might further explore using attention as a salience-adajcent metric for extracting information from world problems.
-
diff --git a/_posts/2023-11-09-transfer-resistant-model-training.md b/_posts/2023-11-09-transfer-resistant-model-training.md
deleted file mode 100644
index 3938d49e..00000000
--- a/_posts/2023-11-09-transfer-resistant-model-training.md
+++ /dev/null
@@ -1,187 +0,0 @@
----
-layout: distill
-title: Transfer Resistant Model Training
-description: This blog post details our work on training neural networks that
-  are resistant to transfer learning techniques.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Ryan Yang
-    url: "https://www.google.com/url?sa=i&url=https%3A%2F%2Fmanipulation.csail.mit.edu%2FFall2023%2Findex.html&psig=AOvVaw3MuJLCZwr7MxMiaaFQTBeC&ust=1699601771753000&source=images&cd=vfe&opi=89978449&ved=0CBIQjRxqFwoTCNil45C0toIDFQAAAAAdAAAAABAH"
-    affiliations:
-      name: MIT
-  - name: Evan Seeyave
-    url: ""
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-transfer-resistant-model-training.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction and Motivation
-  - name: Related Works
-  - name: Methods
-  - name: Experiments
-  - name: Results
-  - name: Discussion
-  - name: Limitations and Future Work
-  - name: Conclusion
-  - name: Appendix
----
-
-## Introduction and Motivation
-
-In transfer learning, a model is trained for a specific task and is then fine-tuned for a different task <d-cite key="zhuang2020comprehensive"></d-cite>. In doing so, one tries to best leverage and reuse features and performance of the large pre-trained model for other tasks. Many works have focused on making transfer learning more robust and efficient. Transfer learning can be very useful for saving compute resources, time, and money.
-
-In this project, we study an opposing question: how to learn model weights that classify well for one dataset but reduce learning efficiency when transferred to another. The motivation is as follows. As computational resources and capable models become more accessible, the risk of unregulated agents fine-tuning existing models increases, including for malicious tasks. Recent work has shown that previously aligned models can be compromised to produce malicious or harmful outputs <d-cite key="anonymous2023shadow"></d-cite> <d-cite key="qi2023finetuning"></d-cite>. This may even occur with a few adversarial examples against models specifically trained to produce safe outputs <d-cite key="lermen2023lora"></d-cite>. Currently, risks with language models are commonly discussed. However, investigating CNNs can guide designing defenses for neural network architectures against malicious agents in general.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/setting.png" class="img-fluid" %}
-
-To our knowledge, there exists no previous literature on learning parameters robust against transfer learning. A related field is machine unlearning. In machine unlearning, a model must forget certain pieces of data used in training <d-cite key="cao2015towards"></d-cite> <d-cite key="10.1007/s42979-023-01767-4"></d-cite>. However, we wish to examine methods that not only guarantee poor performance after unlearning, but also after fine-tuning on the “malicious” or “forget” dataset. For example, using a popular unlearning approach which reaches 0% accuracy on the “forget” dataset, we easily fine-tuned the model with the same dataset to reach higher accuracy after a few epochs as shown below <d-cite key="tarun2023fast"></d-cite>. This is a gap in previous work in machine unlearning and demonstrates the novelty and difficulty of learning models that not only perform poorly on specified datasets but are robust against fine-tuning.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning.png" class="img-fluid" %}
-
-We propose two new approaches: selective knowledge distillation (SKD) and Reverse Model-Agnostic Meta-Learning (MAML) <d-cite key="finn2017model"></d-cite>. In SKD, a “student” model is trained using activations of a “teacher” for the beneficial data and trained on hardcoded activations for the “malicious” data. In Reverse-MAML, we attempt to learn parameters that aren’t robust to transfer to specified tasks. Due to computational constraints, we examine a toy setting with the CIFAR-10 Dataset as well as using a small CNN model shown in the appendix <d-cite key="krizhevsky2012imagenet"></d-cite>. Overall, both the Reverse-MAML and SKD approach exceed baseline approaches on scoring good accuracy on a “beneficial” dataset while being on-par with preventing fine-tuning on a “malicious” dataset. Thus, there remain limitations, and we conclude with future work.
-
-## Related Works
-
-### 1. Transfer Learning
-
-As mentioned previously, transfer learning has been a long-time objective in deep learning research <d-cite key="zhuang2020comprehensive"></d-cite> <d-cite key="raffel2020exploring"></d-cite>. By training a model on one dataset, the goal is to be able to reuse parameters and learned features to achieve high performance or efficient learning for another dataset. Transfer learning for convolutional neural networks has been a popular approach, allowing users to train a high-performance model with limited computational resources or data <d-cite key="zhuang2020comprehensive"></d-cite>. Further work has analyzed settings for successful transfer learning in image classification and further challenges when there is poor transfer <d-cite key="plested2022deep"></d-cite>.
-
-### 2. Model-Agnostic Meta-Learning (MAML)
-
-MAML is an algorithm that makes models readily adaptable to new tasks <d-cite key="finn2017model"></d-cite>. It essentially primes the model for transfer learning as effectively as possible. The algorithm attempts to learn parameters and model weights such that a few steps of gradient descent on learning a new task will lead to good performance on said new task. Further work has continued attempting to meta-learn useful model parameters, building off of MAML <d-cite key="goerttler2021exploring"></d-cite> <d-cite key="park2019meta"></d-cite>.
-
-### 3. Machine Unlearning
-
-A closely aligned question to ours is the problem of machine unlearning. Machine unlearning attempts to remove the influence of a set of data points on an already trained model. In this setting, a model is initially trained on some dataset  <d-cite key="bourtoule2021machine"></d-cite>   <d-cite key="cao2015towards"></d-cite>      <d-cite key="10.1007/s42979-023-01767-4"></d-cite>. The model embeds information about and “remembers” features about data points. This means that the model uses information about the data points to make decisions. For example, LLMs like GPT can learn sensitive information about some people  <d-cite key="wu2023unveiling"></d-cite>. This might pose a threat to privacy. We may want the model to “forget” some subset of the training set, in this case information about the people. However, we currently have no standardized method of doing this. Machine unlearning is a nascent field in artificial intelligence research and is currently being studied. It is a difficult problem, and our work is tangential to machine unlearning.
-
-<br />
-<br />
-
-To our knowledge, there hasn’t been any research on models that are resistant to transfer learning and fine-tuning. The works mentioned above, transfer learning techniques and MAML, focus on improving fine-tuning. We aim to make fine-tuning more difficult while preserving robustness on the original task. Machine unlearning seeks to forget data that the model has been previously trained on. On the other hand, our goal is to preemptively guard the model from learning certain data in the first place. Thus, our research question demonstrates a clear gap in existing research which has focused on either improving transfer learning or only reducing model performance on external datasets. Our research explores this new question in the deep learning field and draws from recent works to guide methodology.
-
-## Methods
-
-We propose three methods, one existing and two novel, to begin addressing the problem of learning parameters scoring high accuracy on a “beneficial” dataset but are robust against transfer learning on a known “malicious” dataset. Further experimental details are found in the experiments section.
-
-### 1. Machine Unlearning
-
-The first approach is a baseline and reimplementation of a popular machine unlearning method from <d-cite key="tarun2023fast"></d-cite>. Here, the model is initially trained on both the “beneficial” and “malicious” dataset and undergoes a forgetting stage where the “malicious” dataset is forgotten using a noise matrix. A final repair stage is then conducted to improve performance of the model on the “beneficial” dataset. Specific details can be found at <d-cite key="tarun2023fast"></d-cite>.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/performance.png" class="img-fluid" %}
-
-### 2. Selective Knowledge Distillation
-
-Our first proposed novel approach is selective knowledge distillation (SKD) drawing inspiration from knowledge distillation. In knowledge distillation, a smaller “student” model is trained to imitate a larger “teacher” model by learning logits outputs from the “teacher” model. In doing so, the “student” model can hopefully achieve similar performance to the “teacher” model while reducing model size and complexity.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/teacher_student.gif" class="img-fluid" %}
-
-In SKD, we similarly have a “teacher” and “student” model. The “teacher” is a model that has high accuracy on the “beneficial” dataset but is not necessarily robust against fine-tuning on the “malicious” dataset. Our “student” model is almost identical in architecture to the “teacher” but excludes the final classification layer and the ReLU layer before it. This is shown below.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture.png" class="img-fluid" %}
-
-Our goal is for the student model to have high performance on the “beneficial” dataset after adding a classification layer while being robust against fine-tuning on the “malicious” dataset. To perform SKD, we initially train the teacher model until reaching sufficiently high performance on the “beneficial” dataset.
-
-We then construct a dataset that contains all the images in the “beneficial” dataset. The labels are activations of the second-to-last layer of the “teacher” model. Note that this is similar to knowledge distillation, except we are taking the second-to-last layer’s activations. We further add all the images in the “malicious” dataset and set their labels to be a vector of significantly negative values. For our experiments, we used -100.0. We train the student model on this collective dataset of images and activation values.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_complex.gif" class="img-fluid" %}
-
-Finally, we add a fully-connected classification layer to the student model and backpropagate only on the added layer with the “beneficial” dataset.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/student.png" class="img-fluid" %}
-
-Our end goal is to prevent fine-tuning of our CNN on the “malicious” dataset. Thus, if the student model can output activations that all are negative if the image belongs in the “malicious” dataset, then after appending the ReLU layer and setting biases of the second-to-last layer to 0, the inputs to the final classification layer will always be 0, reducing the ability to learn on the “malicious” dataset. Furthermore, the gradient will always be 0 on inputs from the “malicious” dataset so any backpropagating on images and labels originating from the “malicious” dataset from the final layer activations would be useless.
-
-### 3. Reverse-MAML
-
-Recall that MAML is focused on finding some optimal set of model weights $$\theta$$ such that running gradient descent on the model from a new few-shot learning task results in a $$\theta’$$ that scores high accuracy on the new task <d-cite key="finn2017model"></d-cite>. MAML achieves this by learning the optimal $$\theta$$. To learn this $$\theta$$, MAML computes the second order gradient on the model weights. This allows the model to learn about where the initial $$\theta$$ should have been before an iteration of gradient descent so that taking the step of gradient descent would have led to the minimal loss.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/MAML.png" class="img-fluid" %}
-
-In our version, we attempt to learn a $$\theta$$ that fine-tunes well to a data distribution $$p_1$$ but fine-tunes poorly to distribution $$p_2$$. To do this, we partition the data into two sets: a “good” set and a “bad” set. We train such that for “good” samples MAML performs the standard algorithm above, learning $$\theta$$ that would fine-tune well to the “good” samples. However, for the “bad” set we train the model to do the opposite, learning a $$\theta$$ that would lead to poor fine-tuning. To do this, when taking the second order gradient, the model goes up the gradient instead of down.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML.png" class="img-fluid" %}
-
-## Experiments
-
-Due to computational constraints, we work in the following toy setting. We use the CIFAR-10 dataset where images in the first five ([0, 4]) classes are the “beneficial” dataset and the images in the last five ([5, 9]) classes are the “malicious” dataset. We split the 60,000 CIFAR-10 image dataset into a 40,000 image pre-training dataset, 10,000 image fine-tuning dataset, and 10,000 image test dataset. To evaluate each approach, we first evaluate the accuracy of the model on the beneficial test dataset. Then, we replace the last layer parameters of the output model, freeze all previous layer’s parameters, and finally fine-tune on the malicious fine-tuning dataset. We fine-tune using the Adam optimizer with a learning rate of 0.1 and momentum of 0.9. We finally evaluate model performance on a malicious test dataset. These steps in this evaluation represent the common pipeline to perform transfer learning and are shown below. Full hyperparameters for evaluation are listed in the appendix. We also perform ablation studies on the quality of the teacher model for SKD; further details are found in the Discussion section. All experiments, including ablations, are performed and averaged over 5 random seeds.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/pipeline.png" class="img-fluid" %}
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/evaluation.png" class="img-fluid" %}
-
-## Results
-
-The first evaluation metric is accuracy of the outputted model from each approach on beneficial data. This is shown in the figure below.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy.png" class="img-fluid" %}
-<div class="caption">
-   Figure 1 
-</div>
-The second metric of evaluation is the accuracy of the output model from each approach on test malicious data as it’s being fine-tuned on fine-tune malicious data. This is shown with learning curves in the figure below. Note that lower accuracy is better.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy.png" class="img-fluid" %}
-<div class="caption">
-   Figure 2 
-</div>
-
-## Discussion
-
-We observe that finding parameters that have high accuracy on a “beneficial” dataset but are robust against fine-tuning on a “malicious” dataset is challenging. On all three methods, including a popular machine unlearning approach, the model is able to somewhat fit to the “malicious” dataset. However, for SKD, this accuracy consistently does not significantly exceed 40%.
-
-More importantly, we find in Figure 1 that both Reverse-MAML and SKD are able score higher accuracy on the beneficial dataset. This is surprising as machine unlearning methods were designed to maintain high accuracy on a retain dataset. Combining these two graphs, we conclude that there remains future work to explain why the resulting models had such high accuracy on the malicious data out-of-the-box and how to minimize it.
-
-We also experimented with Reverse-MAML under the Omniglot dataset <d-cite key="lake2015human"></d-cite>. Here, we attempted to fine-tune on digit images. We found that Reverse-MAML performed very well in this setting. After training the Reverse-MAML model, the model held around 85% test accuracy on the “Beneficial” Omniglot dataset and around 20% on the “Malicious” digit dataset. On the digit set, the model would often predict the same digit for all samples, as shown below. We believe that Reverse-MAML performed better here because the Omniglot characters and the digits are simpler to interpret and learn specific features about compared to CIFAR-10.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/digits.png" class="img-fluid" %}
-<div class="caption">
-    All digits were predicted to be a 2.
-</div>
-
-Slow learning in SKD is likely caused by filtering by the ReLU activation function which causes activations to become 0. This ideally occurs when we train the student model to output negative activation values into the final classification layer if the input is from the “malicious” dataset. These values make it more difficult to learn useful weights for the final classification layer and apply gradient descent on earlier layers. We confirm this by measuring misses or the percent of “malicious” images that don’t result in all 0 activations into the final classification layer shown below. We show, in general, misses are low across different teacher models. For this ablation, we vary teacher models by the number of epochs they are trained.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/student_table.png" class="img-fluid" %}
-
-We also measure how accuracy of the teacher model impacts performance of the student downstream. We vary the number of epochs the teacher model is trained in and report accuracies of the teacher model on the “beneficial” dataset below. More importantly, we empirically show that high teacher accuracy on the “beneficial” dataset is needed for the student to achieve high accuracy on the “beneficial” dataset. This follows our knowledge distillation framework as the student attempts to mimic the teacher model’s performance on the “beneficial” dataset by learning activation values.
-
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/error_bounds.png" class="img-fluid" %}
-
-## Limitations and Future Work
-
-### 1. Requirement for "Malicious" data
-
-The motivating example for this project was preventing a malicious agent from hijacking a model to perform undesirable tasks. However, it is often not possible to list out every possible “bad” task, and thus future work which extends from this project can explore how to prevent fine-tuning of tasks that aren’t specified as clearly and completely.
-
-### 2. Computational Restraints
-
-Due to computational restraints, we were unable to test or fine-tune models with significantly higher parameter counts or experiment with larger datasets. However, this remains an important step as transfer learning or fine-tuning is commonly applied on large models which we could not sufficiently investigate. Thus, future work can apply these existing methods on larger models and datasets.
-
-### 3. Exploration of More Methods in Machine Unlearning and Meta-Learning
-
-Further analysis of existing methods in machine unlearning and meta-learning can be used to benchmark our proposed approaches. Though we tried to select methods that had significant impact and success in their respective problem settings, other approaches are promising, including using MAML variants like Reptile or FOMAML <d-cite key="DBLP:journals/corr/abs-1803-02999"></d-cite>.
-
-### 4. Imperfection in filtering “malicious” data for SKD
-
-Ideally, in SKD, the underlying model would always output negative activation values given a “malicious” input. However, this does not always occur, and thus fitting on the malicious data is still possible. Future work can explore how to improve this, though perfect accuracy will likely not be feasible. Furthermore, it is still possible for a malicious agent to hijack the model by performing distilled learning on the second-to-last layer activations, thus removing this ideal guarantee. Future work can also investigate how to have similar guarantees throughout all of the model’s activation layers instead of just one.
-
-## Conclusion
-
-In this project, we investigated how to train a model such that it performs well on a “beneficial” dataset but is robust against transfer learning on a “malicious” dataset. First, we show this is a challenging problem, as existing state of the art methods in machine unlearning are unable to prevent fine-tuning. We then propose two new approaches: Reverse-MAML and SKD. Both serve as a proof of concept with promising preliminary results on the CIFAR-10 Dataset. We conclude by noting there are limitations to this work, most notably the need for a “malicious” dataset and computational limits.  We then propose future work stemming from these experiments.
-
-## Appendix
-
-
-CNN Architectures used for experiments:
-{% include figure.html path="assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures.png" class="img-fluid" %}
-
-* Note, all graphs and tables are averaged over 5 seeds with reported standard deviation.
\ No newline at end of file
diff --git a/_posts/2023-11-09-transformer-time.md b/_posts/2023-11-09-transformer-time.md
deleted file mode 100644
index 9dba63e7..00000000
--- a/_posts/2023-11-09-transformer-time.md
+++ /dev/null
@@ -1,382 +0,0 @@
----
-layout: distill
-title: A Comparative Study of transformer on long sequence time series data
-description: This study evaluates Transformer models in traffic flow prediction. Focusing on long sequence time-series data, it evaluates the balance between computational efficiency and accuracy, suggesting potential combinations of methods for improved forecasting.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Jie Fan
-    # url: "https://en.wikipedia.org/wiki/Albert_Einstein"
-    affiliations:
-      name: MIT
-  # - name: Boris Podolsky
-  #   url: "https://en.wikipedia.org/wiki/Boris_Podolsky"
-  #   affiliations:
-  #     name: IAS, Princeton
-  # - name: Nathan Rosen
-  #   url: "https://en.wikipedia.org/wiki/Nathan_Rosen"
-  #   affiliations:
-  #     name: IAS, Princeton
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-transformer-time.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Abstract
-  # - name: Images and Figures
-  #   subsections:
-  #   - name: Interactive Figures
-  - name: Introduction
-  - name: Methodology
-  - name: Experiments
-    subsections: 
-      - name: Dataset
-      - name: Experimental setting
-  - name: Result
-  - name: Conclusion and Discussion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Abstract
-This research means to discover the power of transformer in dealing with time series data, for instance traffic flow. Transformer with multihead self-attention mechanism is well-suited for the task like traffic prediction as it can weight the importance of various aspects in the traffic data sequence, capturing both long-term dependencies and short-term patterns. Compared to the LSTM, the transformer owns the power of parallelization, which is more efficient when facing a large dataset. And it can capture the dependencies better with long sequences. However, the transformer may have trouble dealing with the long sequence time-series data due to the heavy computation. This research compares differnt methods that make use of the information redundancy and their combination from the perspective of computational efficiency and prediction accuracy. 
-
-## Introduction
-
-The time series data processing and prediction are usually conducted with RNN and LSTM. In the case of traffic prediction, CNN and GNN are combined for efficiently capturing spatial and temporal information. And LSTM is widely used as its better performance on capturing temporal dependencies. While recent studies have propsed to replace RNNs with Transformer architecture as it is more efficient and able to capture sequantial dependencies. However, the model is inapplicable when facing long sequence time-series data due to quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. <d-cite key="Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021"></d-cite> 
-
-Not all time series are predictable, the ones that is feasible to be better forecasted should contain cyclic or periodic patterns. <d-cite key="Zeng_Chen_Zhang_Xu_2023"></d-cite> It indicates that there are redundant information in the long sequence data. The coundary of the redundancy can be measured by the optimal masking ratio of using MAE to process the dataset. Natural images are more information-redundant than languages and thus the optimal masking ratio is higher. BERT<d-cite key="devlin2019bert"></d-cite> uses a masking ratio of 15% for language, MAE<d-cite key="He_2022_CVPR"></d-cite> uses 75% for image and the optimal ratio for video is up to 90%.<d-cite key="feichtenhofer2022masked"></d-cite> Traffic data is potentially redundant. It contains temporal and spatial information so that neighbor sensors can provide extra information in addition to temporal consistency. We inducted that the optimal ratio for traffic data should be located between image and video. As it has multidimensional information than image and the speed captured by sensors is not as consistent as the frames in videos. We use the GRIN<d-cite key="cini2022filling"></d-cite> model to mask the inputdata using Metr_LA dataset to test the redundancy of traffic data. The results show that it is tolerant when the masking ratio is lower than 90%. Then there is the possibility of using distilling operation to compress information, reducing computational requirement and memory usage. Similar to traffic data, most of the time series data are multivariate.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformer-time/GRIN.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Table 1: Performance comparison with baseline models and GRIN<d-cite key="cini2022filling"></d-cite>  with various masking ratio. (by Tinus A,Jie F, Yiwei L) 
-</div>
-
-## Methodology
-The information redundancy leads to the common solutions of using transformer to deal with long sequence time-series forecasting(LSTF) problems, where models focus more on valuable datapoints to extract time-series features. Notable models are focsing on the less explored and challenging long-term time series forecasting(LTSF) problem, include Log- Trans, Informer, Autoformer, Pyraformer, Triformer and the recent FEDformer. <d-cite key="Zeng_Chen_Zhang_Xu_2023"></d-cite> There are several main solutions: 
-
-**Data decomposition**. Data decomposition refers to the process of breakking down a complex dataset into simpler, manageable components. Autoformer <d-cite key="wu2021autoformer"></d-cite> first applies seasonal-trend decomposition behind each neural block, which is a standard method in time series analysis to make raw data more predictable <d-cite key="cleveland1990stl"></d-cite>. Specifically, they use a moving average kernel on the input sequence to extract the trend-cyclical component of the time series. The difference between the original sequence and the trend component is regarded as the seasonal component. <d-cite key="Zeng_Chen_Zhang_Xu_2023"></d-cite>
-
-**Learning time trend**. Positional embeddings are widely used in transformer architecture to capture spatial information. <d-cite key="feichtenhofer2022masked"></d-cite> Moreover, additional position embeddings can help the model to understand the periodicity inherented in traffic data, which implies applying the relative or global positioin encoding interms of weeks and days. <d-cite key="https://doi.org/10.1111/tgis.12644"></d-cite>
-
-**Distillation**. The Informer model applies ProbSparse self-attention mechanism to let each key to only attend to several dominant queries and then use the distilling operation to deal with the redundance. The operation privileges the superior ones with dominaitng features and make a focused self-attention feature map in the next layer, which trims the input's time dimension.<d-cite key="Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021"></d-cite> 
-
-**Patching**. As proposed in ViT<d-cite key="DBLP:journals/corr/abs-2010-11929"></d-cite>, the patch embeddings are small segments of an input image, which transfer the 2D image to 1D sequence. Each patch contains partial information of the image and additional positional embedding helps the transformer to understand the order of a series of patch embeddings. In the case of time series, though it is 1D sequence that can be received by standard transformer, the self-attention may not efficiently capture the long dependencies and cause heavy computation. Hence, dealing with time-series data, patching is used to understand the temporal correlation between data in a time-step interval. Unlike point-wise input tokens, it enhances the locality and captures the comprehensive semantic information in different time steps by aggregating times steps into subseries-level patches. <d-cite key="nie2023time"></d-cite> 
-
-## Experiment
-### Dataset
-We used a multivariate traffic<d-footnote>https://pems.dot.ca.gov/</d-footnote> dataset that records the road occupancy rates from different sensors on San Francisco freeways. We selected first 100 censors as our experiment dataset. 
-
-### Experimental Settings
-We choose two models, Informer<d-cite key="Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021"></d-cite>  and PatchTST(supervised) <d-cite key="nie2023time"></d-cite> to test the influence of distillation, positional embeddings, patching and data decomposition. For the implementation of Informer and PatchTST, we used the code provided by the authors.<d-footnote>https://github.com/yuqinie98/patchtst</d-footnote>. We mean to compare different methods that aim to efficiently explore on long sequence data, considering both efficiency and accuracy. This leads to a discussion about the trade off when using these models to solve real life cases and the possibility of improving or combing different methods.
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformer-time/Informer.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 1: Informer architecture.<d-cite key="Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021"></d-cite>
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformer-time/PatchTST.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 2: PatchTST architecture.<d-cite key="nie2023time"></d-cite>
-</div>
-
-Setting 1. Compare efficieny and accuracy of distillation and patching. All the models are following the same setup, using 10 epochs and batch size 12 with input length $$\in$$ {96,192,336,720} and predictioin length $$\in$$ {96,192,336,720}. The performance and cost time is listed in the table 2. 
-
-Setting 2. Explore the influence of data decomposition. We slightly change the setup to compare different methods. We apply the data decomposition with PatchTST to explore the significance of these techniques.
-
-## Result
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformer-time/test1.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Table 2: Setting 1. Traffic forecasting result with Informer and supervised PatchTST. Input length in {96,192,336,720} and predictioin length in {96,192,336,720}.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformer-time/1.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 3: Setting 1. Traffic forecasting result with Informer and supervised PatchTST. Input length in {96,192,336,720} and predictioin length = 720.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformer-time/test2.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Table 3: Setting 2.Traffic forecasting result with supervised PatchTST, with and without data decomposition. Input length = 336 and predictioin length in {96,192,336,720}.
-</div>
-
-Sufficiency. According to Table 2. The Informer(ProbSparse self-attention, distilling operation,positional embedding) is generally more sufficient than PatchTST(patching, positional embedding). Especially with the increase of input sequence, Informer with idstilling operation can forecast in significantly less time comparing to patching method. Across differnt prediction sequence length, PatchTST does have much difference and Informer tends to cost more time with longer prediction. According to table 3, with data decomposition, PatchTST spends more time while does not achieve significant better performance.
-
-Accuracy. According to Table 2. In all scenarios, the performance of PatchTST is better than Informer considering the prediction accuracy. Along with the increase of input sequence length, PatchTST tends to have better accuracy while Informer stays stable.
-
-Overall, we can induct from the design of two models about their performances. Informer is able to save more time with distilling operation and PatchTST can get better accuracy with the capture of local and global information. Though patch embeddings help the model to get better accuracy with prediction task, it achieves so at the expense of consuming significant amount of time. When the input sequence is 720, PatchTST takes more than twice as long as B. 
-
-## Conclusion and Discussion
-Based on existing models, different measures can be combined to balance the time consumed for forecasting with the accuracy that can be achieved. Due to time constraints, this study did not have the opportunity to combine additional measures for comparison. We hope to continue the research afterward and compare these performances.
-
-In addition to applying transformer architecture alone, a combination of various methods or framework may help us to benefit from the advantages of different models. The transformer-based framwork for multivariate time series representation lerning is proposed by George et al.  <d-cite key="DBLP:journals/corr/abs-2010-02803"></d-cite> The Spatial-Temporal Graph Neural Networks(STGNNs) is another widely used model in traffic prediction, which only consider short-term data. The STEP model is propsde to enhance STGNN with a scalable time series pre-training mode. In the pre-training stage. They split very long-term time series into segments and feed them into TSFormer, which is trained via the masked autoencoding strategy. And then in the forecasting stage. They enhance the downstream STGNN based on the segment-level representations of the pre-trained TSFormer.<d-cite key="10.1145/3534678.3539396"></d-cite>
-
-
-<!-- ## Citations
-
-Citations are then used in the article body with the `<d-cite>` tag.
-The key attribute is a reference to the id provided in the bibliography.
-The key attribute can take multiple ids, separated by commas.
-
-The citation is presented inline like this: <d-cite key="gregor2015draw"></d-cite> (a number that displays more information on hover).
-If you have an appendix, a bibliography is automatically created and populated in it.
-
-Distill chose a numerical inline citation style to improve readability of citation dense articles and because many of the benefits of longer citations are obviated by displaying more information on hover.
-However, we consider it good style to mention author last names if you discuss something at length and it fits into the flow well — the authors are human and it’s nice for them to have the community associate them with their work.
-
-*** -->
-
-<!-- ## Footnotes
-
-Just wrap the text you would like to show up in a footnote in a `<d-footnote>` tag.
-The number of the footnote will be automatically generated.<d-footnote>This will become a hoverable footnote.</d-footnote> -->
-
-<!-- ***
-
-## Code Blocks
-
-This theme implements a built-in Jekyll feature, the use of Rouge, for syntax highlighting.
-It supports more than 100 languages.
-This example is in C++.
-All you have to do is wrap your code in a liquid tag:
-
-{% raw  %}
-{% highlight c++ linenos %}  <br/> code code code <br/> {% endhighlight %}
-{% endraw %}
-
-The keyword `linenos` triggers display of line numbers. You can try toggling it on or off yourself below:
-
-{% highlight c++ %}
-
-int main(int argc, char const \*argv[])
-{
-string myString;
-
-    cout << "input a string: ";
-    getline(cin, myString);
-    int length = myString.length();
-
-    char charArray = new char * [length];
-
-    charArray = myString;
-    for(int i = 0; i < length; ++i){
-        cout << charArray[i] << " ";
-    }
-
-    return 0;
-}
-
-{% endhighlight %}
-
-
-<!-- ## Blockquotes
-
-<blockquote>
-    We do not grow absolutely, chronologically. We grow sometimes in one dimension, and not in another, unevenly. We grow partially. We are relative. We are mature in one realm, childish in another.
-    —Anais Nin
-</blockquote>
-
-*** -->
-
-
-<!-- ## Layouts
-
-The main text column is referred to as the body.
-It is the assumed layout of any direct descendants of the `d-article` element.
-
-<div class="fake-img l-body">
-  <p>.l-body</p>
-</div>
-
-For images you want to display a little larger, try `.l-page`:
-
-<div class="fake-img l-page">
-  <p>.l-page</p>
-</div>
-
-All of these have an outset variant if you want to poke out from the body text a little bit.
-For instance:
-
-<div class="fake-img l-body-outset">
-  <p>.l-body-outset</p>
-</div>
-
-<div class="fake-img l-page-outset">
-  <p>.l-page-outset</p>
-</div>
-
-Occasionally you’ll want to use the full browser width.
-For this, use `.l-screen`.
-You can also inset the element a little from the edge of the browser by using the inset variant.
-
-<div class="fake-img l-screen">
-  <p>.l-screen</p>
-</div>
-<div class="fake-img l-screen-inset">
-  <p>.l-screen-inset</p>
-</div>
-
-The final layout is for marginalia, asides, and footnotes.
-It does not interrupt the normal flow of `.l-body` sized text except on mobile screen sizes.
-
-<div class="fake-img l-gutter">
-  <p>.l-gutter</p>
-</div> -->
-
-<!-- ***
-
-## Other Typography?
-
-Emphasis, aka italics, with *asterisks* (`*asterisks*`) or _underscores_ (`_underscores_`).
-
-Strong emphasis, aka bold, with **asterisks** or __underscores__.
-
-Combined emphasis with **asterisks and _underscores_**.
-
-Strikethrough uses two tildes. ~~Scratch this.~~
-
-1. First ordered list item
-2. Another item
-⋅⋅* Unordered sub-list. 
-1. Actual numbers don't matter, just that it's a number
-⋅⋅1. Ordered sub-list
-4. And another item.
-
-⋅⋅⋅You can have properly indented paragraphs within list items. Notice the blank line above, and the leading spaces (at least one, but we'll use three here to also align the raw Markdown).
-
-⋅⋅⋅To have a line break without a paragraph, you will need to use two trailing spaces.⋅⋅
-⋅⋅⋅Note that this line is separate, but within the same paragraph.⋅⋅
-⋅⋅⋅(This is contrary to the typical GFM line break behaviour, where trailing spaces are not required.)
-
-* Unordered list can use asterisks
-- Or minuses
-+ Or pluses
-
-[I'm an inline-style link](https://www.google.com)
-
-[I'm an inline-style link with title](https://www.google.com "Google's Homepage")
-
-[I'm a reference-style link][Arbitrary case-insensitive reference text]
-
-[I'm a relative reference to a repository file](../blob/master/LICENSE)
-
-[You can use numbers for reference-style link definitions][1]
-
-Or leave it empty and use the [link text itself].
-
-URLs and URLs in angle brackets will automatically get turned into links. 
-http://www.example.com or <http://www.example.com> and sometimes 
-example.com (but not on Github, for example).
-
-Some text to show that the reference links can follow later.
-
-[arbitrary case-insensitive reference text]: https://www.mozilla.org
-[1]: http://slashdot.org
-[link text itself]: http://www.reddit.com
-
-Here's our logo (hover to see the title text):
-
-Inline-style: 
-![alt text](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Logo Title Text 1")
-
-Reference-style: 
-![alt text][logo]
-
-[logo]: https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Logo Title Text 2"
-
-Inline `code` has `back-ticks around` it.
-
-```javascript
-var s = "JavaScript syntax highlighting";
-alert(s);
-```
- 
-```python
-s = "Python syntax highlighting"
-print s
-```
- 
-```
-No language indicated, so no syntax highlighting. 
-But let's throw in a <b>tag</b>.
-```
-
-Colons can be used to align columns.
-
-| Tables        | Are           | Cool  |
-| ------------- |:-------------:| -----:|
-| col 3 is      | right-aligned | $1600 |
-| col 2 is      | centered      |   $12 |
-| zebra stripes | are neat      |    $1 |
-
-There must be at least 3 dashes separating each header cell.
-The outer pipes (|) are optional, and you don't need to make the 
-raw Markdown line up prettily. You can also use inline Markdown.
-
-Markdown | Less | Pretty
---- | --- | ---
-*Still* | `renders` | **nicely**
-1 | 2 | 3
-
-> Blockquotes are very handy in email to emulate reply text.
-> This line is part of the same quote.
-
-Quote break.
-
-> This is a very long line that will still be quoted properly when it wraps. Oh boy let's keep writing to make sure this is long enough to actually wrap for everyone. Oh, you can *put* **Markdown** into a blockquote. 
-
-
-Here's a line for us to start with.
-
-This line is separated from the one above by two newlines, so it will be a *separate paragraph*.
-
-This line is also a separate paragraph, but...
-This line is only separated by a single newline, so it's a separate line in the *same paragraph*. -->
diff --git a/_posts/2023-11-09-transformers-as-gamers.md b/_posts/2023-11-09-transformers-as-gamers.md
deleted file mode 100644
index 01dd6331..00000000
--- a/_posts/2023-11-09-transformers-as-gamers.md
+++ /dev/null
@@ -1,214 +0,0 @@
----
-layout: distill
-title: Learning Generals.io
-description: 
-  We explore the application of deep learning to the online game generals.io and discuss what is necessary to achieve superhuman performance in generals.io.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Ethan Yang
-    url: "https://www.ethany.dev"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-transformers-as-gamers.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-
----
-
-<!-- ### TLDR: 
-In this post, we explore .
-<ol>
-    <li>f </li>
-</ol> -->
-
-<!-- This project took a lot more time to get off the ground than expected. My initial code was riddled with bugs largely due to the implementation of parsing out features from the game data. It was a very good lesson for applying the "Hacker's Guide to Deep Learning," so I also write some of what I learned in applying deep learning to a problem. Here are my main takeaways, read more [here](#practical-learnings):
-<ol>
-    <li>Take the time to </li>
-    <li>Take the time to </li>
-</ol> -->
-
-# Introduction
-[Generals.io](https://generals.io) is a real-time turn-based strategy game. In generals.io, two players with a "general", denoted with a crown, spawn on a board with mountains and cities scattered. Initially, players have no knowledge of other parts of the board besides the tiles immediately surrounding their general. Armies are the main resource of the game, which generate slowly from ordinary tiles, but quickly from the general and cities. Using armies, players compete to capture terrain and cities, which also grants further vision of the board. On each turn, a player is able to click on a cell with their army and use the keyboard to move it in the four cardinal directions. The goal of the game is for the player to use their army to capture the tile of their opponent's general. 
-
-A typical game state will look like the following:
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformers-as-gamers/generals_pomdp.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformers-as-gamers/generals.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-  The left image shows what the game play screen while playing from the perspective of red. Red only is able to see tiles adjacent to it, and every other tile is covered in a fog of war. The right image lifts the fog of war and shows the perspective of the spectator, and we can see where blue's general is located, as well as unclaimed cities around the edge of the map.
-</div>
-
-Generals.io has a modest daily player base and has had attempts to implement bots to play against humans. Currently, no bots have been able to defeat top humans consistently. The top bots, such as [this one](https://github.com/EklipZgit/generals-bot), are implemented using rule-based logic. They achieve human-level performance and are able to win some games against the top 10 ranked players. Previous machine-learning based bots have attempted to use a CNN LSTM in the model architecture, such as [this post by Yilun Du](https://yilundu.github.io/2017/09/05/A3C-and-Policy-Bot-on-Generals.io.html). He separately evaluates a supervised learning approach and a reinforcement learning approach. His supervised learning approach reaches a competent level of play and is able to expand while having an awareness of needing to defend. However, it is very inefficient and makes basic strategic mistakes, such as running army into cities without fully taking them. The reinforcement learning approach was trained using A3C from scratch, but it was not able to learn beyond random movements.
-
-
-I set out to build on Yilun's work and improve the bot's performance, as well as explore and document what details are actually important for improvement.
-# Related Work and Why Generals
-Deep learning has already been used to conquer many games, achieving either human-level or superhuman-level performance. The pattern for most games has been to use deep reinforcement learning at enormous scale through self-play. There has been success in chess, Go<d-cite key="alphazero"></d-cite>, Dota 2<d-cite key="dota2"></d-cite>, and Starcraft<d-cite key="star"></d-cite>. 
-
-<!-- , Games require strong sequential decision making in order to succeed. Previous methods to play games such as Dota 2 have used LSTMs and reinforcement learning <d-cite key="dota2"></d-cite>. Transformers have also seen success on RL baselines such as Atari<d-cite key="chen2021decision"></d-cite>.  -->
-While games in higher complexity have already been defeated by deep learning, the experimentation is often quite opaque, as there are too many decisions that are made to be worthy of reporting on. Furthermore, the games and methods are often way too large for a single researcher to reproduce. For example, OpenAI Five was only able to beat Dota 2 pros after training for ten months, using 770 PFlops/s-days. 
-Generals.io allows for more accessible experimentation through its smaller size and open data pipeline for replays.
-
-I think there are still insights to be gained in defeating generals.io. In particular, the game comes with a combination of challenges that aren't clearly addressed by previous approaches:
-<ol>
-    <li>The game is requires a high degree of calculation and precision, as well as strong intuition. Similar to chess, certain parts of the game are more intuitive and positional, and certain parts require searching through possibilities to calculate precisely. In generals.io, the precision mostly comes from being maximally efficient in the opening, as well as calculating distances relative to opponents army. This would suggest that some kind of model needs to search in order to achieve superhuman performance.</li>
-    <li>The game is partially observable. This prevents approaches used in perfect information games such as Monte Carlo Tree Search, as we need to form belief states over the opponents state.</li>
-    <li>The state and action space is enormous, and it requires planning on long time horizons. Games such as poker satisfy both of the above two bullet points, but it was able to be tackled with approaches such as counterfactual regret minimization after bucketing the state and action space<d-cite key="dosovitskiy2021image"></d-cite>. Bucketing the state and action space likely won't work for generals.io, nor will an approach like CFR work.</li>
-</ol>
-
-
-# Methods 
-
-Formally, generals.io can be represented as a POMDP. The underlying state, which is the state of the whole board, can only be observed at tiles that are adjacent to tiles claimed by the player. 
-<!-- In addition, both the board state and action space are completely discrete. While the space of all possible actions throughout the game is large, only a small portion of actions is usually valid at a time: valid actions move army from a tile that is owned by the player.  -->
-
-A wealth of data (over 500,000 games, each containing hundreds of state-action pairs) are available via human replays. We use imitation learning to try to learn from the replays. Concretely, the problem can be modeled as selecting parameters $$\theta$$ of a policy $$\pi$$ (a neural network) to maximize the log likelihood of the dataset $$D$$:
-
-$$
-\max_\theta \sum_{(s,a)\sim D} \log \pi_\theta(a | s)
-$$
-
-I used [existing tools](https://github.com/vzhou842/generals.io-Replay-Utils) in order to convert the replays into a json format that could then be parsed. I then adapted Yilun's code, which no longer directly works, in order to simulate the replays to construct the dataset. To start, I only used 1000 replays of highly ranked players to construct my dataset. 
-
-I started mostly with Yilun's features, with small modifications:
-
-| Channel | Description | 
-| ::|:-------------:|
-| 0 | friendly army values|
-| 1 | enemy army values     |
-| 2 | boolean indicators for mountains and cities |
-| 3 | unclaimed city army values |
-| 4 | friendly city army values |
-| 5 | enemy city army values |
-| 6 | boolean indicator for mountains |
-| 7 | boolean indicator for friendly and enemy general (if found)|
-| 8 | boolean indicator for fog of war |
-| 9 | (turn number % 50)/50 |
-
-The features made a lot of sense to me as a generals player - it's all the information I use to play. I removed Yilun's last feature since a new replay standard made it impossible to compute. 
-
-Yilun used a CNN LSTM as his architecture. In order to keep it simple and evaluate the basic components that improve performance, I removed the memory and only used a simple fully convolutional net with 5 stacked 5x5 filters. 
-
-Policies were evaluated by coding a small bot in the recently released [botting framework](https://corsaircoalition.github.io/) for generals. The bot sampled from the policy's distribution over legal moves. Two policies were able to go head to head through this framework, and I could queue 10 games in order to get good estimates for the relative strength between the bots. 
-
-I'll now describe some of the changes I tried and give an analysis of the results of each change.  
-
-## Effects of more data
-
-The baseline policy, trained with 1000 games, was not very successful. The bot would often move back and forth, without trying to expand or take land. 
-
-I wanted to first explore how the amount of data affected the policy. I took 2000 more games of high ranking players and trained the same policy on a dataset with 3000 games. I expected an improvement in the similarity of the validation and train loss. This was confirmed by the results, shown below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformers-as-gamers/combined.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Train and validation loss curves of training with less and more data.
-</div>
-
-This makes sense, as adding more data is essentially a regularizer. It prevents the model from overfitting, as it needs to do well on the added data too. Furthermore, it looks like it converges faster in epoch space, but in reality it's also going through more examples, so it trained at roughly the same speed if one were to scale the epochs by a factor of 3. The policy was also much more effective, and it did not run back and forth as much. I think this was likely due to reduced overfitting.  
-
-I suspect that more data would have improved the policy even more, but I didn't go larger, as it would have broken past the limits of the infrastructure I built. In particular, the dataset consisting of 3000 games took over 4 GB of disk space. A smarter job of batching the data would have allowed me to train with more. 
-
-## Squishing army features
-
-Working with the 3000 games, I turned my attention towards improving the features. They were already pretty comprehensive, but I was skeptical of the many army features we had. In particular, all of the other features were binary. Army values ranged from 0 to hundreds. I hypothesized that the features encoding armies could lead to unstable training. Using some knowledge about the game, I thought it would make sense to use a function like a sigmoid, in order to squish large values down. 
-
-As a generals.io player, this made sense to me, as the difference between 1 army on a tile and 2 army on a tile is very large, but the difference between 14 and 15 army is not so large. I expected better performance due to the inductive bias I was adding to the model. However, the loss curve showed similar, slightly slower convergence to the previous experiment. The policies were about the same too. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformers-as-gamers/squish.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Train and validation loss curves of training after squishing the army features.
-</div>
-
-## Deeper Network
-
-Motivated by the success of ResNets <d-cite key="he2015deep"></d-cite> when CNNs were leading computer vision, I wanted to try using a deeper network with residual connections. I replaced the network with a stack of one 5x5 filter, followed by nine 3x3 filters. I added skip connections between every two layers. The performance was again about the same. I suspect that more data and/or an even deeper network is needed in order to see benefits from a deeper network.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformers-as-gamers/deep.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Train and validation loss curves of training after using a deeper network with residual connections. 
-</div>
-
-# Discussion and Conclusion 
-
-Combining all of the above leads to a decent policy with coherent strategy, shown below. 
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-transformers-as-gamers/game.gif" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    The best performing policy I trained, defeating an unsuspecting opponent (me). Replayed at 5x speed. 
-</div>
-Qualitatively, this policy is much better than Yilun's policy. While I don't have his to evaluate, he shows a [replay](https://bot.generals.io/replays/Be0wkw2t-) of its early game performance. My bot does a much better job in the early game of efficiently expanding in order to maximize growth rate. Yilun's bot has a handle on using a large army to explore, but mine is able to collect army efficiently on turns 25-50 in order to take the opponent's land. 
-
-This is interesting because my policy is actually still much simpler than Yilun's, given he uses a LSTM. It's possible that the training was not very stable, or it may have overfit, or he just chose a bad replay of his bot.
-
-## Limitations and Future Work
-The bot is not competitive with any human that has played a decent amount of games. It is still pretty inefficient and makes many nonsensical moves (it moves back and forth a few times in the replay). 
-
-There is still a lot to try, and I'll actually continue working on some of these ideas after the class, as it was a lot of fun. 
-There's a decent amount of low hanging fruit:
-<ol>
-    <li>I noticed the bots often like to expand toward the wall. I'm guessing this is because there is no information encoding the boundaries of the wall, and I just let the padding in the convolutions take care of it. Adding a special indicator would likely be helpful.</li>
-    <li>Use reinforcement learning for improving the policy beyond the demonstrations.</li>
-    <li>Train on a dataset consisting of only one or only a few players in order to reduce multimodality problems (similar style of play).</li>
-    <li>Adding memory to the network.</li>
-    <li>Trying a vision transformer<d-cite key="dosovitskiy2021image"></d-cite>, and trying to have it attend to previous states for recurrence too.</li>
-</ol>
-
-I think achieving even higher levels of performance would require doing some form of search. From my understanding, the most similar approach would be something like MuZero<d-cite key="muzero"></d-cite>. This would allow us to properly learn a world model despite the incomplete information, which we can then do search on.  
-
-Overall, I learned a ton in this project about how to apply deep learning to a new problem. I encountered many of the issues described in "Hacker's Guide to DL" and the related readings. My biggest takeaway is to spend the time setting up the proper infrastructure. Poor infrastructure causes bugs and makes it really hard to iterate. 
-
-
-<!-- # Conclusion
-
-We saw some simple experiments for how . 
-
-A lot of future work is needed to make a learning-based bot competitive.  -->
-
-
-<!-- ## Pratical Learnings
-
-I learned a lot of tools and ways to be more productive in using deep learning for a new problem. Here are some of the ones I learned:
-<ol>
-<li> After
-<li> f
-</ol>
-
-One issue is 
-1. How does the performance of CNN LSTM compare to using a transformer?
-2. What properties do transformers learn when applied to sequential decision making in a game?
-3. Can we learn good representations for quantities such as army counts on each tile?  -->
-
-
-
diff --git a/_posts/2023-11-09-uncertainty.md b/_posts/2023-11-09-uncertainty.md
deleted file mode 100644
index ac6329e1..00000000
--- a/_posts/2023-11-09-uncertainty.md
+++ /dev/null
@@ -1,272 +0,0 @@
----
-layout: distill
-title: Analytic, Empirical, and Monte Carlo Bayesian Methods for Uncertainty Estimation
-description: In the realm of machine learning, the robustness and reliability of predictive models are important, especially when confronted with Out-of-Distribution (OOD) data that deviate from the training distribution. Bayesian models stand out for their probabilistic foundations, being able to offer ways to quantify uncertainty. This project will present a survey of already-established methods of estimating uncertainty, as well as how we adapted/generalized them.
-date: 2023-11-09
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Tahmid Jamal
-    affiliations:
-      name: MIT
-  - name: Nguyen Tran-Bach
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography:  2023-11-09-uncertainty.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Motivation
-  - name: Stochastic Weight Averaging Gaussian (SWAG)
-  - name: Local Ensemble Monte Carlo Dropout
-  - name: Sketching Curvature for Efficient Out-of-Distribution Detection (SCOD)
-  - name: SCODv2 
-  - name: Stochastic Curvature and Weight Averaging Gaussian (SCWAG)
-
----
-
-## Motivation
-
-Many practical uses of deep neural network (DNN) models involve using them with a restricted amount of training data, which doesn't encompass all the potential inputs the model might face when actually used. This exposes a significant limitation of models based on data: they can behave unpredictably when dealing with inputs that differ from the data they were trained on, known as out-of-distribution (OOD) inputs. Machine learning models that are trained within a closed-world framework often mistakenly identify test samples from unfamiliar classes as belonging to one of the recognized categories with high confidence<d-cite key="scheirer2013"></d-cite>. This tendency of the model to make overconfident misclassifications is sometimes described in the literature as "arrogance." This can be catastrophical when such models are deployed in the real world<d-cite key="amodei2016"></d-cite>. For instance, in self-driving technology, the system should warn the user and transfer the control to the human driver when it encounters unfamiliar scenes or objects that were not present in its training data, and thus it is unable to make a safe and reliable decision.
-
-Consequently, there has been a surge of research focused on improving DNN models to be able to assess their own uncertainty and recognize OOD inputs during their operational phase <d-cite key="yang2022"></d-cite>. Not only improving their usage in safety critical settings, being able to predict model uncertainty is important in predicting model performance. With increased ability to predict model performance, we can improve the training process and improve inference. In this project, we explore 3 different methods of quantifying uncertainty: Monte Carlo Dropout<d-cite key="gal2016"></d-cite>, Sketching Curvature for Efficient Out-of-Distribution Detection (SCOD)<d-cite key="sharma2021"></d-cite>, and Stochastic Weight Averaging Gaussian (SWAG)<d-cite key="maddox2019"></d-cite>. We'll also attempt to modify these existing methods and even combine them in order to provide improvements to practicality such as RAM usage and improvements to important metrics we establish. We develop 2 methods, SCODv2 which is an extends a simple isotropic prior used by SCOD and SCWAG which combines elements of both SCOD and SWAG.
-
-## Stochastic Weight Averaging Gaussian (SWAG)
-SWAG<d-cite key="maddox2019"></d-cite> is a method that can improve generalization in deep learning settings. It approximates the posterior distribution of weights as a normal distribution with mean determined by an average of weight iterates and covariance determined by the sum of the running variances of each weight and a low rank covariance matrix. More specifically, we use the following to perform our weight updates. 
- 
-``` python
-def train_swag(net, loader, num_epochs=5, K=25, swag_freq=50, swag_start=1):
-    theta = get_all_weights(net)
-    d = theta.shape[0]
-    D = torch.zeros((d,K)).cpu()
-    theta_bar = theta.clone().cpu()
-    M2 = torch.zeros(d).cpu()
-    sigmas = torch.zeros(d).cpu()
-    optimizer = optim.Adam(net.parameters(), lr=0.001)
-
-    net.train()
-    for epoch in range(num_epochs):
-        train_loss = 0
-        n_ = 0
-        for batch_idx, (data, target) in enumerate(loader):
-            optimizer.zero_grad()
-            output = net(data.to(device))
-            loss = F.cross_entropy(output, target.to(device))
-            loss.backward()
-            optimizer.step()
-            train_loss += loss
-            if batch_idx % swag_freq == 0:
-                if (swag_start <= epoch):
-                    with torch.no_grad():
-                        w1 = n_/(1+n_)
-                        w2 = 1/(1+n_)
-                        theta = get_all_weights(net).cpu()
-                        theta_bar_new = w1*theta_bar + w2*theta
-                        M2 = M2 + (theta-theta_bar)*(theta-theta_bar_new)
-                        theta_bar = theta_bar_new.clone().cpu()
-                        D[:,0:-1] = D[:,1:]
-                        D[:,-1] = theta - theta_bar
-                        sigmas = M2/(1+n_)
-                        n_ += 1
-    return theta_bar, sigmas, D
-
-```
-
-The learned $$\bar{w} \in \mathbf{R}^d$$ is the mean of the posterior distribution on weights. The $$\Sigma$$ vector represents the running variance of the weights and can be diagonalized to get a very rough posterior. (The method we used to determine the running variance is unlike the one presented in the SWAG paper due to issues with numerical instability and catastrophic cancellation which resulted in negative variances. To address this issue we used Welford's online algorithm.) The $$D$$ matrix contains the last $$K$$ deviations of updated $$w$$ values from $$\bar{w}$$ (including the effect that the updated $$w$$ has on $$\bar{w}$$). This allows us to form a rank $$K$$ approximation of the posterior covariance. Thus we have the posterior $$P(w\mid\mathcal{D}) = \mathcal{N}\left(\bar{w}, \frac{1}{2}\left(\text{diag}(\Sigma) + \frac{DD^T}{K-1} \right)\right)$$. To sample from the posterior, we do the following reparametrization 
-
-$$ z_d \sim \mathcal{N}(0, \mathbf{I}_d) $$
-
-$$z_K \sim \mathcal{N}(0, \mathbf{I}_K) $$
-
-$$
-\tilde{w} = \bar{w} + \frac{1}{\sqrt{2}}\text{diag}(\Sigma)^{\frac{1}{2}}z_d + 
-\frac{1}{\sqrt{2(K-1)}}Dz_K 
-$$
-
-It is important to note that while a prior distribution on weights is not specified, it is implicitly chosen through how often we update our running average of the weights, variances, and deviations, as well as the optimizer being used. 
-
-For the purposes of inference, each $$\tilde{w}$$  determines the parameters for a clone model and with $$S$$ samples we effectively have an ensemble of $$S$$ models. Their output distributions are averaged arithmetically to yield the final output. We expect that for in-distribution inputs, the individual  outputs do not disagree drastically. And for out-of-distribution inputs, the individual outputs can differ a lot. So like with out other ensemble method, a good metric of uncertainty here is to use the average-pairwise KL divergence between the distributions. Here are some results and findings of this metric applied to SWAG. 
-
-We train a model with SWAG on the MINST and CIFAR10 datasets. First, we only train on the digits/classes from 0-5 and look at the KL scores on the digits/class 6-9. Expectedly, the scores tend to drastically increase on the unseen digits. However, the increase is less drastic for the CIFAR dataset as the data is a bit more homogenous.  
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-We can also take a look at the data itself and identify the images which have the highest and lowest scores for different splits of the data. For these images, we identify the true label, followed by the KL score assigned to the image (higher being more uncertain), and finally the predictions made by 10 of 25 sampled models.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>  
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>  
-</div>
-The above pictures correspond to the highest and lowest scores from in-distribution training data. The major contributors for the high scores for MNIST are digits that are so poorly written it's hard to say what it is or it resembles another image too much. For CIFAR, it seems like the high score images are inducing confusion due to their color scheme or background. A lot of images with a blue or sky background such as those of birds do seem to be mistaken for planes at times. The low score images on the other hands are all extremely similar to one another; they're very well written digits (usually 0) or something that is obviously a car (usually red). 
-
-Next, we take a look at how these scores fair on new out-of-distribution images. 
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>  
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>  
-</div>
-
-These are the highest and lowest scores on the OOD dataset. It's unsurprising that the highest scores are assigned to the images that are unlike anything in the training set. For MNIST this is the number 8 and for CIFAR there doesn't seem to be any one class. However, it is important to see that there are still images where our model has very low scores (high certainty). However, this simply comes from the fact that these inputs happen to look more similar to one class of training images (9 is really similar looking to 4 and trucks look pretty close to cars, especially if they're red since a lot of the low score car-images are red).
-
-All the methods used in this paper tend to show similar results for the images corresponding to the highest and lower measures of uncertainty so we won't be lookig at those images for every single method. 
-
-Now that we've seen that we can use our measure of uncertainty as how well the output will yield the correct answer, we can try using uncertainty of output as a way to predict error. Ideally, we would like to see some sort of correlation between our uncertainty measure and our actual errors or probability of corect answer. So we retrained our models on all digits using SWAG and looked at the performance on a validation set. Notice that we don't care too much about the error itself, but it's (actually the probability of target label) correlation with the uncertainty measure. In particular, we look at the Spearman correlation to capture nonlinear relationships.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag.jpg" class="img-fluid rounded z-depth-1" %}
-        Spearman Correlation: -.9923
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag.jpg" class="img-fluid rounded z-depth-1" %}
-        Spearman Correlation: -.3867
-    </div>
-</div>
-
-There is significant negative correlation which is what we're looking for. If we can predict how well our model will perform on certain inputs, it allows us to better deploy model in real world situations as well as possibly improve it by doing something such as boosting or improved training. We now look to improve this relationship between error and uncertainty measure by finding better uncertainty measures. 
-
-## Local Ensemble: Monte Carlo Dropout
-
-We start off by comparing with a very simple method. Given a neural net with Dropout layers, and a new datapoint from test ID or OOD datasets, we output $$50$$ different probabilistic distributions (rather than setting our model on evaluation mode, we keep the Dropout layers on), $$p_1, p_2, \ldots p_{50}$$. Our uncertainty score is $$\text{Unc}(x) = \frac{1}{49\cdot 50}\sum_{i\neq j}D_\text{KL}(p_i\, \Vert \, p_j)$$, i.e. the average KL divergence between any pair of distributions. The intuition is that, when the model shouldn't be confident about a OOD datapoint, dropping weights (which can be seen as perburtabions) should change our output distributions significantly. This sensitiveness indicates lack of robustness and certainty.
-
-This model is very simple and our weight "peturbations" are not too mathematically motivated in the sense of them coming from some justified posterior. However, it still provides a good baseline to compare against.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc.jpg" class="img-fluid rounded z-depth-1" %}
-        Spearman Correlation: -.9944
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc.jpg" class="img-fluid rounded z-depth-1" %}
-        Spearman Correlation: -.2936
-    </div>
-</div>
-
-Overall, the error estimation on MNIST is about the same but significantly worse on the CIFAR dataset. This is about expected since MC dropout is such a simple method. 
-
-## Sketching Curvature for Efficient Out-of-Distribution Detection (SCOD)
-
-There is research literature on leveraging the local curvature of DNNs to reason about epistemic uncertainty. [Sharma et al.] explores this idea through a Bayesian framework. Let us assume a prior on the weights, $$P(w) = \mathcal{N}(0, \epsilon^2 I)$$. By using a second-order approximation of the log-likelihood $$ \log p(y,w\mid x) $$, we arrive at the Laplace posterior $$ P(w\mid\mathcal{D}) =\mathcal{N}(w^{MAP}, \Sigma^*)$$, where $$ \Sigma^* = \frac{1}{2}(H_L + \frac{1}{2\epsilon^2}I)^{-1}$$ and $$H_L$$ is the Hessian of the cross-entropy loss wrt $$w$$. Given a pretrained DNN, $$\theta=f(x,w)\in\mathcal{R}^d$$ where $$\theta$$ determines a distribution on $$y$$, we assume that the trained weights $$w^*$$ are a good approximation for $$w^{MAP}$$. We define our uncertainty metric to be the change in the output distribution, $$\theta$$, when the weights are perturbed around $$w^*$$ according to the posterior distribution. Using the KL divergence to measure distance between output distributions, we define
-
-$$ \text{Unc}(x) = \mathbb{E}_{dw\sim \mathcal{N}(0, \Sigma^*)}\left[ D_{\text{KL}}\left( p(\theta\mid x, w^*)\, \Vert \, p(\theta\mid x, w^* + dw)\right) \right] $$
-
-We can approximate the local KL divergence using the Fisher information matrix (FIM) of $$y$$ wrt $$\theta$$: $$D_{\text{KL}} \approx d\theta^TF_\theta(\theta)d\theta + O(d\theta^3)$$. Also, by change of variables, we can rewrite the FIM in terms of $$w$$: $$F_w(x, w) = J^T_{f,w}F_\theta(f(x,w))J_{f, w}$$ where $$J_{f,w}$$ is the Jacobian of the network outputs with respect to the weights. Putting this together, we get that
-
-$$ \text{Unc}(x) = \mathbb{E}_{dw\sim \mathcal{N}(0, \Sigma^*)} \left[dw^TF_w(x,w^*)dw \right] = \text{Tr}\left( F_w(x,w^*)\Sigma^*\right)$$
-
-We can also approximate $$\Sigma^* \approx \frac{1}{2}(MF_{w^*}^\mathcal{D} + \frac{1}{2\epsilon^2}I)^{-1}$$, where $$F_{w^*}^\mathcal{D}$$ is the averaged FIM on the training dataset<d-cite key="ritter2018"></d-cite> and $$M$$ is the size of the training set. If the neural net has $$N$$ weights, then the FIMs are $$N \times N$$ matrices and computing them becomes intractable (both time complexity and memory).
-
-For simplicity, let us assume that the output of our DNN, $$\theta$$, is the categorial distribution, i.e. $$\theta_i$$ represents the probability assigned to class $$i$$. In this case, we have that $$F_\theta(\theta) = \text{diag}(\theta)^{-1}$$. Therefore, the FIM for one input os has rank at most $$\min(n, d)$$ and we can represent it as $$F_w(x,w^*) = LL^T$$, where $$L=J_{f,w}^T\text{diag}(\theta)^{-1/2}$$. The same trick, however, doesn't work for $$F_{w^*}^\mathcal{D}$$ as it can reach rank as high as $$min(N, Md)$$. For now, let us assume that we can find a low-rank approximation of $$F_{w^*}^\mathcal{D} = U\text{diag}(\lambda)U^T$$, where $$U\in\mathbb{R}^{N\times k}$$ and $$\lambda\in\mathbb{R}^k$$. With a few mathematical tricks (which can be followed in [Sharma et al.]), one can prove that 
-
-$$\text{Unc}(x) = \epsilon^2\Vert L\Vert_F^2 - \epsilon^2 \left \Vert \text{diag}\left(\sqrt{\frac{\lambda}{\lambda + 1/(2M\epsilon^2)}}\right)U^TL \right \Vert^2_F$$
-
-[Sharma et al.] also provides an randomized algorithm for finding $$U$$ and $$\Lambda$$ by using the FixedRankSymmetricApproximation<d-cite key="tropp2017"></d-cite> algorithm presented in [Tropp et all.]. We chose $$\epsilon \approx 0.04536$$ by training another model with the same hyperparameters and architecture and taking the standard deviation of all its weights. This is the pseudocode of the SCOD algorithm<d-cite key="sharma2021"></d-cite>:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/scod_algo.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-
-$$\Sigma\in\mathbb{R}^{r\times N}$$ and $$\Psi \in \mathbb{R}^{s\times N}$$ are random sketching matrices, which we chose to simply be matrices with i.i.d standard Gaussian entries. $$r+s$$ is the size of the sketch and is ideally chosen as high as RAM allows. We also use the budget split $$s = 2k+1$$ and $$r=4k+3$$, where $$k$$ is the target rank, as [Tropp et all.] suggests. We ended up setting $$k=50$$ and got the following results:
-
-We have been able to implement SCOD, but due to issues with saving our results and time, we can now only show the performance of the uncertainty score on predicting error on a subset (classes 0-5) of the CIFAR dataset. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/scod_correlations.png" class="img-fluid rounded z-depth-1" %}
-        Spearman Correlation: -.1083
-    </div>
-</div>
-
-The score is a bit suspiciously low, so there may be something wrong with our implementation ignoring the fact that we only test of the subset. Nonetheless, it still a significant negative correlation and we get similar results when looking at high uncertainty and low uncertainty images.
-
-## SCODv2
-
-We also did our own tweak on SCOD. Rather than having a vanilla prior, we can generalize it to any normal distribution with diagonal covariance. Let's say that our prior is $$w\sim\mathcal{N}(0, \Sigma)$$, where $$\Sigma$$ is a diagonal matrix. Then, our Laplacian posterior's covariance matrix becomes $$\Sigma^* = \frac{1}{2}(MF_{w^*}^\mathcal{D} + \frac{1}{2}\Sigma^{-1})^{-1}$$. By the Woodbury matrix identity $$ \Sigma^*=\Sigma - 2\Sigma U\left(\text{diag}(M\lambda)^{-1}+2U^T\Sigma U \right)^{-1}U^T\Sigma$$. Using the well-known identities, $$\Vert A\Vert_F^2 = \text{Tr}(AA^T)$$, $$\text{Tr}(AB) = \text{Tr}(BA)$$, we get that
-
-$$ \text{Unc}(x_{\text{new}}) = \text{Tr}\left(\Sigma^*F_w(x_{\text{new}},w^*)\right) = \text{Tr}\left(L^T\Sigma L\right) - 2\text{Tr}\left(L^T\Sigma U\left(\text{diag}(M\lambda)^{-1}+2U^T\Sigma U \right)^{-1}U^T\Sigma L\right)$$
-
-$$= \left \Vert L^T \Sigma^{1/2}\right \Vert_F^2 - 2\left \Vert L^T \Sigma UA\right \Vert_F^2$$, where $$AA^T = \left(\text{diag}(M\lambda)^{-1}+2U^T\Sigma U \right)^{-1}$$.
-
-Since $$\Sigma$$ is a diagonal matrix, the biggest matrices we ever compute are of size $$N\times \max(k, d)$$, which means that the computation is equally efficient asymptotically to the vanilla prior. To decide what diagonal matrix to use, for each layer, we assigned the same variance given by the variance of the weights of the same layer in a differently trained model (with same architecture).
-
-Due to issues with saving our results and timing, we are not able to show our results estimating error from uncertainty for SCODv2.  
-
-## Stochastic Curvature and Weight Averaging Gaussian (SCWAG)
-Whereas SCOD attempts to analytically approximate the posterior by approximating the Hessian using the Gauss-Newton matrix, SWAG approximates the posterior by keeping running track of moments and deviations when it approaches flat regions in the loss landscape. What if we could combine these two ideas? We could use the SWAG emprical posterior. This method would not require matrix sketching of any form and lowers the required RAM necessary an SCOD can be quite RAM intensive. Using the $$\Sigma$$ and $$D$$ from SWAG to determine the posterior $$\Sigma^*$$, we arrive the following measure of uncertainty (after digging through some math).    
-
-$$ \text{Unc}(x) = \text{Tr}\left( F_w(x,w^*)\Sigma^*\right) = \frac{1}{2} \text{Tr}\left(F_w(x,\bar{w})\left(\text{diag}(\Sigma) + \frac{DD^T}{K-1} \right) \right)$$
-
-$$ \text{Unc}(x) \propto ||L\Sigma||_F^2 + \frac{1}{K-1}||LD||_F^2$$
-
-We do this by introducing a wrapper model that takes in a base model as well as the SWAG outputs in order to perform the Jacobian based operations during each forward pass. For evaluation, we look at the Spearman correlation of the uncertainty score with the target probability and we notice some improvement over SWAG on the CIFAR dataset. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations.jpg" class="img-fluid rounded z-depth-1" %}
-        Spearman Correlation: -.9897
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations.jpg" class="img-fluid rounded z-depth-1" %}
-        Spearman Correlation: -.8484
-    </div>
-</div>
-
-With MNIST, we already had near perfect correlation so this slight decrease isn't too worrisome. However, the Spearman correlation has shot up drastically which shows that this method of combining the analytical approximation of uncertainty with an empirically constructed posterior has merit. There is something worrisome with the fact that the model with exactly $$bar{w}$$ with its weights is producing distributions that have a maximum value of around $$.25$$. We suspect we could have made some error here but have not been able to pinpoint anything wrong with out implementaton. The model still seems to have fairly accurate predictions as seen below. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>  
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>  
-</div>
-
-## Future Work
-
-For SCWAG, we could work on figuring out why our output distributions becomes less spiked as a result of using $$\bar{w}$$ as the weights for the network. We suspect that it's a result of starting our SWAG averaging for $$\bar{w}$$ too early so we were considering $$w$$ far away from flat local minima of the loss landscape. Additionally, we could inspect the arcing nature in the plot of target probabilities vs score. For near 0 scores, it seems that the target probabilities arc from .25 to 0 which is unusual. Finally, we want to think of a way to introduce the loss landscape more into our approach. Maybe we can form a more expressive posterior. If we can manage that, our uncertainty estimates and correlation might improve. But more importantly, we would be able to call our method SCALL(y)WAG which is pretty cool. 
-
-In general and particularly for SCOD, we'd still like to experiment with priors that induce different types of posteriors. Because the dependence on prior is explicit here as opposed to implicit for SWAG, it allows us more room for experimentation in choosing nice expressive priors.
\ No newline at end of file
diff --git a/_posts/2023-11-09-universal-features.md b/_posts/2023-11-09-universal-features.md
deleted file mode 100644
index 4d9def7e..00000000
--- a/_posts/2023-11-09-universal-features.md
+++ /dev/null
@@ -1,169 +0,0 @@
----
-layout: distill
-title: Sparse Autoencoder Universality - Under What Conditions are Learned Features Consistent?
-description: This project aims to study the universality of features in LLMs by studying sparse autoencoders trained on similar layers of different models. 
-
-date: 2023-12-10
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Misha Gerovitch
-    url: "https://www.linkedin.com/in/michael-gerovitch-2010a61b0/"
-    affiliations:
-      name: MIT
-  - name: Asher Parker-Sartori
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-universal-features.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Setup
-  - name: Experiments and Results
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-Neural networks are black boxes. We understand the process by which they are created, but just as understanding the principle of evolution yields little insight into the human brain, designing a model’s optimization process yields little insight into how that model reasons. The field of mechanistic interpretability attempts to understand how human-understandable concepts combine within a model to form its output. With sufficiently good interpretability tools, we could ensure reasoning transparency and easily find and remove harmful capabilities within models (such as hallucinations) <d-cite key="marks2023geometry"></d-cite>. 
-
-In 2022, Anthropic identified a core challenge in interpreting a model’s reasoning layer-by-layer: polysemanticity, a phenomenon in which a single neuron activates for many different concepts (e.g. academic citations, English dialogue, HTTP requests, and Korean text). This is a result of a high-dimensional space of concepts (‘features’) being compressed into the lower-dimension space of the neural network <d-cite key="Elhage2022-wh"></d-cite>. Sparse autoencoders, a form of dictionary learning, help to linearly disentangle polysemantic neurons into interpretable features <d-cite key="bricken2023monosemanticity"></d-cite>.  
-
-Sparse autoencoders work by projecting a single layer of a neural network into a higher-dimension space (in our experiments, we train autoencoders ranging from a 1:1 projection to a 1:32 projection) and then back down to the size of the original layer. They are trained on a combination of reconstruction loss, their ability to reconstruct the original input layer, and a sparsity penalty, encouraging as many weights as possible to be 0 while retaining good performance <d-cite key="cunningham2023sparse"></d-cite>
-
-## Setup
-
-{% include figure.html path="assets/img/2023-11-09-universal-features/image (2).png" class="img-fluid" %}
-(https://www.alignmentforum.org/posts/wqRqb7h6ZC48iDgfK/tentatively-found-600-monosemantic-features-in-a-small-lm)
-
-The intuition behind sparse autoencoders is that if each neuron in the input layer learns n features, then projecting to n dimensional space while retaining all the information from the input layer should theoretically leave us with one feature represented in each encoded neuron. Then, these neurons should all be monosemantic, meaning they should each represent one interpretable concept. Because the columns of the decoder matrix tell us how these encoded neurons linearly combine to recreate the input layer, each column of the decoder matrix represents one feature of the network (in other words, what linear combination of neurons represents an individual concept). <d-cite key="cunningham2023sparse"></d-cite>
-
-However, because sparse autoencoders were only popularized as an interpretability method earlier this year by Anthropic, the literature on them is, for lack of a better word, sparse. In particular, we were curious about whether the features learned by sparse autoencoders are universal. In other words, we’d like to know if the learned features are similar regardless of variables like autoencoder size, model size, autoencoder training set, and model training set. If they are, it shows both that sparse autoencoders consistently extract the correct features and that learned features are similar across different model sizes and training sets. If they aren’t, it would be evidence that sparse autoencoders don’t accurately capture the full scope of features a model represents and that we cannot easily transfer them across different models.
-
-In our experiments, we train autoencoders of projection ratios ranging from 1:1 to 1:32 on five different Pythia models: 70m, 160m, 410m, 160m deduped, and 410m deduped. In some cases, we exclude data from Pythia 410m because running experiments on it was too computationally expensive. We train on the first four layers of each model to provide additional insight into how the efficacy of autoencoders changes as one moves deeper into the model. We also train autoencoders on two different datasets from the same distribution to test whether the learned features change in response to small perturbations in training order or distribution. Together, these models let us answer a few broad questions surrounding the consistency of learned features:
-- Do learned features consistently transfer between different model sizes and training datasets?
-- Are learned features consistent across different autoencoder sizes?
-- Do sparse autoencoders learn interpretable features less consistently in later layers where reasoning may become more abstract or hard to follow?
-
-These meta-level questions build on Anthropic’s feature-extraction process outlined below:
-{% include figure.html path="assets/img/2023-11-09-universal-features/image (3).png" class="img-fluid" %}
-(This image is from Cunningham et. al <d-cite key="cunningham2023sparse"></d-cite>)
-
-To answer these questions, we use the following three metrics in a variety of comparisons:
-- Mean cosine similarity (MCS) between decoder weights – since the columns of the decoder matrix represent the features, we can use them to measure the similarity of the learned features. To compare two decoders, we start by taking the mean cosine similarity between the first column in the first decoder and every column in the second decoder. Because the decoders might learn features in different orders, we take the maximum of these similarities. We repeat this process for every column in the first decoder, and then we take the average similarity across the columns. 
-- Correlation between activation vectors of encoded layers – another way of inspecting the features learned by a sparse autoencoder is to examine when different neurons in the encoded layer activate on different types of token. So, to compare two autoencoders, we pass over 10,000 tokens of text through their respective models and save vectors representing each encoded neuron’s activations across those tokens. Then, as with mean cosine similarity, we took the maximum correlation between a neuron in the first encoder and any neuron in the second encoder, and then averaged these values across every neuron. If two encoders typically had the same neurons activating for the same tokens, this is strong evidence that the encoders learned similar features. 
-- Feature frequency of an autoencoder – because neurons in encoded layers are intended to represent specific individual concepts, we expect them to activate much less than typical neurons in a neural network. We used this metric both as a way of verifying that our autoencoders are working as intended and as a way of evaluating how easily autoencoders are able to learn monosemantic features as we vary other parameters. To create feature frequency plots, we pass over four million tokens through the model and plot the frequency with which a feature activates (usually around once every 10-1000 tokens) against the number of features which had that frequency. 
-
-Specifically, we ran the following experiments:
-- On the question of whether learned features consistently transfer between different model sizes and training datasets: we created feature frequency plots, tables of correlations, and MCS graphs to contrast different model sizes along with deduped and original models.
-- On the question of whether learned features are consistent across different autoencoder sizes: we created feature frequency plots, MCS tables, and graphs of pairwise activation correlations and MCS to contrast features learned by different autoencoder sizes.
-- On the question of whether sparse autoencoders learn interpretable features less consistently in later layers where reasoning may become more abstract or hard to follow: we create feature frequency plots contrasting learned feature frequencies at different layers throughout Pythia 70m and Pythia 160m. 
-
-
-
-# Experiments and Results
-
-We ran baselines for both MCS and correlations by taking the corresponding measurement between autoencoders trained on two different layers as well as randomly initialized weights. For MCS, the baseline was around 0.15 and was always below 0.20 in our experiments. For correlations, random measured to be about .40. 
-
-## Training and evaluating sparse autoencoders
-We trained a range of sparse autoencoders on the activations at the output of the MLP of various Pythia models. We used 100 million tokens of web text, from a HuggingFace dataset to train each autoencoder. As seen from the loss curve, this is likely over training. We spent some time fine-tuning the hyperparameters and conferred with other researchers who have trained similar autoencoders. You can see from our loss curve that we are likely over training. Since we are partially optimizing for reconstruction loss, we did not expect the quality of the model to decrease on test sets significantly. We ran our model with and without the sparse autoencoder or a small dataset and saw the perplexity go up from 25 to 31, which we were content with. However, there is a lot of room left for improvement to get better sparse autoencoders.
-
-{% include figure.html path="assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM.png" class="img-fluid" %}
-(total loss curve of an 1:8 autoencoder trained on Pythia-70m)
-
-## Do learned features consistently transfer between different model sizes and training datasets?
-
-{% include figure.html path="assets/img/2023-11-09-universal-features/frequency_plot.png" class="img-fluid" %}
-Activation frequencies are distributed roughly symmetrical around 0.01, meaning that the modal encoded neuron activated around once every one hundred tokens. This is solid evidence that our sparse autoencoders were effectively learning sparse, monosemantic representations. If a neuron was only needed every one hundred tokens to reconstruct the input, it likely represents a very specific concept rather than many concepts all at once. We see no clear trend when varying model size, demonstrating that this does not have much effect on an autoencoder’s ability to extract monosemantic features. 
-
-
-{% include figure.html path="assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes.png" class="img-fluid" %}
-The table above measures the correlations between 1:8 autoencoders trained on layer 0 of three different model sizes. You can see that autoencoders trained on models closer in size have a higher correlation factor of their features, suggesting that smaller autoencoders may not store some of the features that large autoencoders do.
-
-
-{% include figure.html path="assets/img/2023-11-09-universal-features/Deduped vs non-deduped.png" %}
-The above graph shows the MCS between autoencoders trained on deduped vs regular models. We anticipated the MCS of these models to be fairly high, but these were some of the lowest results we have seen, with autoencoders trained on layer 0 (of any of the three models we looked at) being around .4. Notably, all of our MCS were above .15 which was our baseline.
-
-## Are learned features consistent across different autoencoder sizes and training datasets?
-
-{% include figure.html path="assets/img/2023-11-09-universal-features/frequency_plots_by_layer.png" %}
-
-Sparsity tends to increase when the projection ratio increases, which makes sense, as a larger layer needs to use each neuron less often. This is evidence that our autoencoders are not learning all possible features, and using even larger autoencoders would allow us to unpack more features. 
-
-{% include figure.html path="assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS.png" %}
-
-The above table looks at MCS loss of different sized autoencoders on Pythia 70m. Interestingly, we observed that MCS between autoencoders whose dimensions have the same ratio (e.g. 4:8 vs 8:16) are similar (e.g. both are .870.)
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-universal-features/Correlation_Graph_70m.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-09-universal-features/Correlation_graph_160m.png/7.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-{% include figure.html path="assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes.png" class="img-fluid z-depth-2" %}
-
-{% include figure.html path="assets/img/2023-11-09-universal-features/Correlation_graph_160m.png/7.jpg" class="img-fluid z-depth-2" %}
-
-
-Activation correlations and MCS were very high for all autoencoder projection ratios, demonstrating that different size autoencoders learn very similar features. Note that all metrics were lower for the autoencoders with a 1:1 projection ratio, likely because they were penalized on sparsity while not having any additional space with which to represent concepts. This means the total information they could retain was likely much less than the other sizes. We see a slight upward trend as autoencoder projection ratio increases, which is small enough that it could probably be chalked up to the exact mean-max methodology used in the calculations. In the MCS graphs, the orange line represents mean-max MCS going from the smaller projection size to the larger projection size, where the blue line is the inverse. It is positive evidence that the blue line is much lower, because we should expect the most important features to correlate strongly with some of the features learned by the larger autoencoder, while the many features learned by the larger autoencoder should not all necessarily have a match in the smaller one. 
-
-
-# Conclusion
-## Discussion
-
-In this post, we explored the potential of sparse autoencoders as tools for interpreting neural networks, particularly focusing on their capability to disentangle polysemantic neurons into interpretable, monosemantic features. Our experiments, conducted on various configurations of Pythia models and sparse autoencoders, aimed to understand the consistency and universality of the features extracted by these autoencoders across different model sizes, training datasets, and autoencoder dimensions. 
-
-Our findings indicate that sparse autoencoders are indeed effective in learning sparse, monosemantic representations. This effectiveness is observed across different model sizes and is not significantly impacted by the size of the model, suggesting a level of universality in the features extracted. However, our results also reveal that the correlation between features tends to be higher in autoencoders trained on models closer in size, hinting at some limitations in the transferability of learned features across vastly different model scales.
-
-Interestingly, we observed a tendency towards increased sparsity in the representations as we moved into the later layers of the network. This suggests that higher-level concepts in these layers might be more specialized and interpretable, aligning with intuitive expectations about neural networks.
-
-## Limitations
-
-Limitations of sparse autoencoders include that they are extremely computationally intensive, especially if one wants to interpret multiple layers of a network, neural networks are not entirely human-interpretable to begin with, so their learned features will never quite represent human concepts, and all the metrics we use to analyze them rely on overall trends rather than individual features, so despite our ability to provide evidence to help answer broad questions, our analysis is still very imprecise. 
-
-
-## Future Work
-
-One future direction is focussing on training better sparse autoencoders, ones with lower reconstruction and sparsity loss. Given that we did not optimize our project for this and were limited by time and compute, it is very possible that better sparse autoencoders can improve our results.
-
-It would also be interesting to train the same sparse autoencoder architectures on different datasets and see whether they are invariant to small perturbations in the dataset. If not, it’s evidence that the method may not work as well as we hope. 
-
-Finally, we could start to look at the features that the autoencoders are finding. We were able to measure similarity and correlations but did not have the time to look at the actual concepts that the representations were finding. This could give us additional insight into similarities between models that we currently are overlooking.
-
-
-
-## Acknowledgements
-
-Special thanks to Sam Marks for suggesting the initial experiment ideas and to [MIT AI Alignment](https://www.mitalignment.org/) for providing connections with mentorship and compute resources.
\ No newline at end of file
diff --git a/_posts/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery.md b/_posts/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery.md
deleted file mode 100644
index 269a0ebd..00000000
--- a/_posts/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery.md
+++ /dev/null
@@ -1,95 +0,0 @@
----
-layout: distill
-title:  Transformer-Based Approaches for Hyperspectral Imagery in Remote Sensing
-
-
-description: This project employs Transformers for a comprehensive spatial-temporal analysis of post-Mountaintop Removal landscape recovery, utilizing satellite imagery and DEMs. It focuses on integrating geomorphological changes to predict ecological succession. Advanced Transformer architectures will be used to enhance the interpretability of complex spatial features over time, aiming to create an accurate 3D simulation environment for interactive exploration and effective restoration planning.
-date: 2023-12-11
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Crystal Griggs
-    url: "https://crystalgriggs.com"
-    affiliations:
-      name: Massachusetts Institute of Technology
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-    subsections:
-    - name: Objective
-    - name: Methodology
-    - name: Evaluation
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-Hyperspectral imaging (HSI) captures a wide spectrum of light per pixel, providing detailed information across numerous contiguous spectral bands. Unlike multispectral imaging, which only captures a few specific bands, hyperspectral imaging offers finer spectral resolution, allowing for more precise identification and analysis of materials. This capability makes it valuable in remote sensing for applications like mineral exploration, agriculture (e.g., crop health monitoring), environmental studies, and land cover classification. Each spectral band captures unique light wavelengths, enabling the identification of specific spectral signatures associated with different materials or conditions on the Earth's surface. HSI images present unique challenges in deep learning compared to typical RGB images due to their high dimensionality. Each pixel in a hyperspectral image contains information across hundreds of spectral bands, leading to a massive increase in the data's complexity and volume. This makes model training more computationally intensive and can lead to issues like overfitting if not handled properly. Current datasets, such as the Indian Pines or Salinas Scenes datasets, often have fewer samples compared to standard image datasets, exacerbating the difficulty in training deep learning models without overfitting. There's also the challenge of effectively extracting and utilizing the rich spectral information in these images, which requires specialized architectures and processing techniques. However, analysis of hyperspectral data is of great importance in many practical applications, such as land cover/use classification or change and object detection and there is momentum in the field of remote sensing to embrace deep learning. 
-
-Traditional hyperspectral image classification (HSIC) methods, based on pattern recognition and manually designed features, struggled with spectral variability. Deep learning, particularly CNNs, brought advancements by extracting intricate spectral-spatial features, enhancing HSIC's accuracy. Yet, CNNs have their drawbacks, such as a propensity for overfitting due to the high dimensionality of hyperspectral data and limitations imposed by their fixed-size kernel, which could obscure the classification boundary and fail to capture varying spatial relationships in the data effectively.
-
-<div class="l-body-outset" style="display: flex; justify-content: center; align-items: center;">
-  <iframe src="{{ 'assets/html/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/hyperbands_plot.html' | relative_url }}" frameborder="0" scrolling="no" height="600px" width="100%"></iframe>
-</div>
-
-
-<div class="l-body-outset" style="display: flex; justify-content: center; align-items: center;">
-  <iframe src="{{ 'assets/html/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/bands.html' | relative_url }}" frameborder="0" scrolling="no" height="800px" width="100%"></iframe>
-</div>
-
-
-Compared to CNNs, there is relatively little work on using vision transformers for HSI classification but they have great potential as they have been excelling at many different tasks and  have great potential in the field of HSI classification. Vision transformers, inspired by the Transformer architecture initially designed for natural language processing, have gained attention for their capacity to capture intricate patterns and relationships in data. This architecture leverages self-attention mechanisms, allowing it to model long-range dependencies effectively, which can be particularly advantageous in hyperspectral data where spatial-spectral interactions are crucial. Spectral signatures play a pivotal role in HSI analysis, enabling the differentiation of materials or conditions based on their distinct spectral characteristics, a capability that conventional RGB images cannot provide. Leveraging the strengths of vision transformers to effectively capture and exploit these spectral signatures holds promise for advancing the accuracy and precision of HSI in remote sensing classification tasks. 
-
-
-### Spectral Feature-Based Methods and Spatial–Spectral Feature-Based Methods
-
-Spectral feature-based approaches classify hyperspectral images (HSIs) by analyzing each spectral pixel vector individually. However, this method has limitations as it overlooks the spatial context of the pixels. Spatial–spectral feature-based methods on the other hand, consider both the spectral and spatial characteristics of HSIs in a more integrated manner. These methods involve using a patch that includes the target pixel and its neighboring pixels, instead of just the individual pixel, to extract spatial–spectral features. Among these methods, convolutional neural networks (CNNs) are particularly prominent, having shown significant effectiveness in HSI classification. Despite the success of CNN-based models in classifying HSIs, they are not without issues. The CNN's receptive field is limited by the small size of its convolutional kernels, such as 3×3 or 5×5, which makes it challenging to model the long-range dependencies and global information in HSIs. Additionally, the complexity of convolution operations makes it difficult to emphasize the varying importance of different spectral features.
-
-When comparing spectral feature-based methods with spatial–spectral feature-based methods in hyperspectral image (HSI) classification, each has distinct advantages and applications. Spectral feature-based methods are valued for their simplicity and efficiency, especially effective in scenarios where unique spectral signatures are key, such as in material identification or pollution monitoring. They require less computational power, making them suitable for resource-limited applications. Alternatively, spatial–spectral feature-based methods offer a more comprehensive approach by integrating both spectral and spatial information, leading to higher accuracy in complex scenes. This makes them ideal for detailed land cover classification, urban planning, and military surveillance where spatial context is crucial. Among spatial–spectral methods, convolutional neural networks (CNNs) stand out for their advanced feature extraction capabilities and adaptability, making them useful in a variety of applications, from automatic target recognition to medical imaging. Although, they face challenges such as the need for large datasets and difficulties in capturing long-range spatial dependencies. While spectral methods are efficient and effective in specific contexts, spatial–spectral methods, particularly those using CNNs, offer greater versatility and accuracy at the cost of increased computational complexity.
-
-### Hyperspectral Image Classification 
-
-<u>Three-Dimensional Convolutional Neural Network (CNN3D)</u>
-
-The first stage will involve the collection of multi-spectral satellite imagery and high-resolution Digital Elevation Models (DEMs) of MTR-affected landscapes. This data will be preprocessed to ensure compatibility, which includes image normalization, augmentation, and the alignment of satellite imagery with corresponding DEMs to maintain spatial congruence. Preprocessing will also involve the segmentation of satellite data into labeled datasets for supervised learning, with categories representing different land cover types relevant to ecological states.
-
-<u>SpectralFormer</u>
-
-Transformer models have exhibited remarkable success beyond their initial domain of natural language processing. Their unique self-attention mechanism enables them to capture long-range dependencies, making them a potentially good choice for complex spatial analysis. Vision Transformers, in particular, offer a new approach by treating image patches as tokens and allowing them to process the global context of an image effectively. This capability is beneficial for satellite imagery analysis, where understanding the broader environmental context is critical. Transformers designed for point cloud data, adapting to the inherent irregularities of LiDAR measurements, can potentially uncover intricate structural patterns and temporal changes within landscape data. With strategic approaches like transfer learning, transformers can overcome their computational resource complexity. 
-
-<u>Group-Aware Hierarchical Transformer (GAHT)</u>
-
-The final step will be the development of a 3D simulation environment using Unreal Engine. The simulation will visualize the predicted ecological states and changes over time, providing an interactive tool for users to explore the landscape recovery process. The interface will allow users to manipulate variables and observe potential outcomes of different restoration strategies in a virtual setting.
-
-### Conclusions
-
-For the spatial analysis of satellite imagery and LiDAR data, the evaluation will focus on the transformer’s ability to discern and classify diverse land cover types. The key metrics for this assessment will include accuracy, precision, recall, and the F1 score extracted from confusion matrices. The model should accurately identify and categorize ecological features from high-resolution imagery. 
-Temporally, the performance will be evaluated based on its capacity to predict ecological changes over time. This involves analyzing the model’s output against a time series of known data points to calculate the Mean Squared Error (MSE) for continuous predictions or log-loss for discrete outcomes. 
-
diff --git a/_posts/2023-11-10-CNN-activation-patching.md b/_posts/2023-11-10-CNN-activation-patching.md
deleted file mode 100644
index 87c549cb..00000000
--- a/_posts/2023-11-10-CNN-activation-patching.md
+++ /dev/null
@@ -1,104 +0,0 @@
----
-layout: distill
-title: Activation Patching in Vision Transformers
-# description: 
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Shariqah Hossain
-    # url: "https://en.wikipedia.org/wiki/Albert_Einstein"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-10-CNN-activation-patching.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-# toc:
-#   - name: Citations
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-# Motivation
-Neural networks contain large amounts of parameters and connections that they use to model a given phenomenon. Often, the breadth and complexity of these systems make it difficult for humans to understand the mechanisms that the model uses to perform its tasks. The model is treated like a black-box. When attempting to alter the behavior of the model when it does not behave in the desired way, engineers often rely on trial-and-error tuning of hyperparameters or providing larger, more diverse datasets for training. However, it is often difficult to get representative training data. In addtion, hyperparameters can improve training but are limited in their ability to alter the innate limitations of a model.
-
-Mechanistic interpretability aims to unpack the underlying logic and behaviors of neural networks. <d-cite key="zhang2023best"></d-cite> Activation patching is an interpretability technique that replaces activations in a corrupted model with that of an uncorrupted model in order to analyze their influence on model output. When a patched activation improves model performance, it indicates that the patched activation playes a role relevant to the corrupted information. <d-cite key="Vig2020InvestigatingGB"></d-cite> 
-
-A better understanding of the logic within neural networks will allow for more strategic improvements to these models inspired by this newfound understanding. In additon, interpretability is the first step toward changing and correcting models. With an understanding of the underlying mechanisms comes more control of these mechanisms, which can be used to apply necessary changes for goal alignment and mitigating issues such as bias. Mechanistic interpretability plays a key role in ensuring the reliability and safety of AI systems.
-
-# Related Work
-
-Pearl et al. <d-cite key="10.5555/2074022.2074073"></d-cite> defines "causal mediation analysis" in order to analyze the effect of intermediate entities on a desired result. An application of the "indirect effect" introduced by this research is activation patching, also known as causal tracing. The indirect effect is the effect a given activation has on the output of the model. Since the activation is encompassed within the layers of a neural network, it has an indirect effect on the output. This analysis has been used in language models.<d-cite key="meng2023locating"></d-cite> Here, the indirect effect is defined as the role of an MLP or attention layer on the output. This role is analyzed by first corrupting the outputs of the network. Then, activations from an uncorrupted run of the model can be iteratively patched into the corrupted run in order to determine which activations can best restore the uncorrupted outputs. The activations with the most significant restorative impact have the highest indirect effect.
-
-
-For example, if the hidden state for a given attention head in a language model with prompt "The Eiffel Tower is in" is patched into that of a prompt "The Colosseum is in" and successfully changes the output from "Rome" to "Paris", this indicates that the patched head contains knowledge about the Eiffel Tower. <d-cite key="meng2023locating"></d-cite> The figure below depicts this process of patching from a clean to corrupt run. 
-
-{% include figure.html path="assets/img/2023-11-10-CNN-activation-patching/patch.png" class="img-fluid" %}<d-cite key="meng2023locating"></d-cite>
-
-Meng et al. also provides an example of how interpretability can open opportunities for model editing. <d-cite key="meng2023locating"></d-cite> Their research revealed that MLP layers in the middle of a GPT network had a large influence on the final predicted token from the model. This indicated that the model stored knowledge about the subject of the prompt within these layers. With the understanding of where knowledge of facts is stored within the model MLPs, these layers were then used to edit the knowledge of the language model in a way that is generalizable to other phrases that applied this knowledge. The study revealed the utility of interpretability projects in improving model understanding of the problem at hand. 
-
-Activation patching has been used for language models, which rely on a transformer architecture. Vision transformers <d-cite key="dosovitskiy2021image"></d-cite> take advantage of the transformer architecture to perform common computer vision tasks such as image classification. These transformers use attention to glean valuable context about a given patch in an image, a task that a convolutional neural network has difficulty with due to the independent nature of its receptive fields. <d-cite key="Torralba_Isola_Freeman_2023"></d-cite> Through the use of multi-headed attention, vision transformers can focus on just the parts of the image that are relevant to the task at hand, and they do so with a global understanding of relevance across the entire image. The attention heads learn how to find relevant patches, or tokens, in image for a given query. However, research regarding what exactly these heads "pay attention" to is still ongoing. <d-cite key="Gandelsman2023interpreting"></d-cite> The strategies of activation patching used for language transformers can therefore apply in the context of vision transfomers due to the similarity in architecture and need to address questions of interpretability. Palit et al. performed a similar causal tracing analysis to that of the language model study except with a focus on BLIP, a multi-modal model that can answer questions about a given image. This investigation showed how activation patching can be performed on images along with language rather than language alone.<d-cite key="palit2023visionlanguage"></d-cite>
-
-# Methods
-
-The model that was used for this investigation was a vision transformer that was fine-tuned for the CIFAR10 dataset, a dataset that is often used to train image classification models. The pretrained model that was used, which can be found [here](https://huggingface.co/aaraki/vit-base-patch16-224-in21k-finetuned-cifar10), often fails to classify images in the dataset if they are converted to grayscale. For example, the model classifies the image of a deer below as a cat.
-{% include figure.html path="assets/img/2023-11-10-CNN-activation-patching/image.jpg" class="img-fluid" %}{% include figure.html path="assets/img/2023-11-10-CNN-activation-patching/gray.jpg" class="img-fluid" %}
-<!-- <img src="assets/img/2023-11-10-CNN-activation-patching/gray.jpg" alt="drawing" style="width:10px;"/> -->
-
-In order to trace which attention heads focus on color information, a clean, corrupted, and restored run was performed with the model. A batch was created was a given image along with a grayscale version of that image. The colored image played the role of the clean run. The grayscale image is a corrupted input that hinders the model's ability to classify the object in the image. This is reflected in the lower logits when the classifier attempts to classify the grayscale image. Even in the off chance the model is still able to classify the image correctly in the corrupted run, the logits will reflect the confidence, or lack thereof, of the model in its classification. 
-
-This corrupted grayscale run was the baseline in the investigation. Once this baseline was established, the restored run demonstrated the influence of a given attention head. In this run, the hidden state in a given corrupted layer was replaced with the hidden state at that layer from the clean run. A hidden state was defined as the values of the embedded tokens after passing through a given layer in the neural network. One set of restored runs only restored states for individual layers. However, as demonstrated in previous research <d-cite key="meng2023locating"></d-cite>, a window of layers is necessary to be restored in order to have a noticeable effect on the output, as opposed to just a single layer. In this experiment, the window was 3, so the given layer as well as its adjacent layers were restored. While activation in the language setting often performs activation patching at the granularity of individual tokens, it was assumed that the relationship between token and layer in the image setting would not be as informative across a wide array of images. Language tokens have a shared meaning across different prompts, but image tokens play a different role depending on the object at hand. This information would not help in understanding the role of activations after averaging across all of the classifications of all images. So, this study was performed by corrupting all tokens in the image and restoring all of those tokens during the patching of a given hidden state.
-
-This analysis was performed for 1000 images from the CIFAR10 dataset. For each image, the output of the restored run was collected and compared to that of the corrupted run. The indirect effect of a given layer was calculated by the difference in the softmax probability of the class of the image between the corrupted and patched run. 
-
-{% include figure.html path="assets/img/2023-11-10-CNN-activation-patching/eqn.png" class="img-fluid" %}<d-cite key="meng2023locating"></d-cite>
-
-For each image, this patching process was repeated for every attention layer in the neural network. Finally, the results of activation patching were averaged together for each layer across all of the images in order to get a general sense of which layers are most pertinent for processing image color information.
-
-
-# Results
-
-When single layers were patched rather than a window of layers, results matched that of Meng et al. The patching of a single activation did not have a unique effect on the output. 
-{% include figure.html path="assets/img/2023-11-10-CNN-activation-patching/single.png" class="img-fluid" %}
-
-From averaging the change in outputs from activation patching 1000 CIFAR10 images, results show that attention heads of most relevance to color tended to be in the middle or last layers.
-{% include figure.html path="assets/img/2023-11-10-CNN-activation-patching/attn.png" class="img-fluid" %}
-
-Here are some examples of activation patching for individual images from the dataset. The graphs display the probability in the output for the correct class of the given image.
-{% include figure.html path="assets/img/2023-11-10-CNN-activation-patching/deer.png" class="img-fluid" %}
-{% include figure.html path="assets/img/2023-11-10-CNN-activation-patching/car.png" class="img-fluid" %}
-{% include figure.html path="assets/img/2023-11-10-CNN-activation-patching/plane.png" class="img-fluid" %}
-
-This experiment found that in a 12-layer network with a window size of 3, attention in the fourth layer and final layers of the network had the biggest impact on predictions made by the model. In these layers, the probability of the correct class of the image had the largest change when clean hidden states were patched from these layers into the grayscale run of the vision transformer. 
-
-As portrayed by the tracing of individual images displayed above, not all images followed this trend exactly. The deer image, for example, had more emphasis on earlier layers and less emphasis on later layers. The automobile had a stronger influence from the attention layer 6 than that of 4. However, it was generally common for layers in the middle and end of the network to play a large role in this classification problem.
-
-# Conclusion
-The influence of attention heads close to the output align with the conclusions found by Palit et al. This is likely due to direct connection of final layers to the output. There is also a significant influence of middle attention heads on the output, which is some indication of the key information that is stored in these layers relevant to color. A possible explanation is that these layers are close to the input layer, which directly stores color information, while maintaining enough distance from the input to have narrowed down (attended to) which tokens are relevant to the class the image belongs to. This study provided an initial insight into how vision transformers store information about colors of an image.
-
-Future investigations could include other forms of corruption to provide more information about the roles of the different attention layers in a trasformer. For example, adding noise to the image embeddings would give insight to the general importance of different layers rather than just focusing on color information. By varying the amount of noise, this corruption would allow more control on how much the output would change and possibly allow room for more significant restorative effects from patching and therefore more definitive results as to where the most influential attention heads live in vision transformers. Other methods of corruption could also explore other tasks ingrained in image classification, such as blurring for edge detection or using silhouettes and image segmentation for texture or pattern identification. In addition, performing activation patching with window sizes other than 3 could provide more context as to how important is an individual attention layer. A similar experiment should be performed on other models and datasets. A focus on different objects, larger datasets, and larger networks would help verify the role of middle and final layer attention heads indicated by this study. 
\ No newline at end of file
diff --git a/_posts/2023-11-10-Physics-Informed-Primal-Dual-Learning.md b/_posts/2023-11-10-Physics-Informed-Primal-Dual-Learning.md
deleted file mode 100644
index fdd586a9..00000000
--- a/_posts/2023-11-10-Physics-Informed-Primal-Dual-Learning.md
+++ /dev/null
@@ -1,215 +0,0 @@
----
-layout: distill
-title: Challenges in Deep Learning Surrogates for Constrained Linear Optimization
-description: Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Thomas Lee
-    url: "https://www.linkedin.com/in/thomas-lee-2017/"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-10-Physics-Informed-Primal-Dual-Learning.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Literature review
-  - name: Method
-  - name: Results
-  - name: Conclusion
-
----
-
-# Introduction
-Physics-informed machine learning has emerged as an important paradigm for safety-critical applications where certain constraints must be satisfied. 
-
-The goal of this project is to learn a deep learning surrogate for a linear programming optimization problem with hard constraints. The overall approach is inspired by standard KKT conditions. This project will attempt a different DNN approach that aims to predict basic feasible solutions (BFS), and then benchmark it against a modern optimization solver. This project will highlight challenges in designing deep learning LP surrogates.
-
-Due to computing resource limits, the focus on the project will be more about broad training strategy choices ("discrete" architecture choices), instead of a systematic sweep of hyperparameters.
-
-### Optimization problem
-We are interested in learning to optimize this linear program with $n$ variables and $m$ equality constraints:
-
-$$
-\begin{aligned}
-    \min \quad &c^T y 
-    \\
-    \text{s.t. } &Ay = b, (\lambda)
-    \\
-    &x \geq 0
-\end{aligned}
-$$
-
-The KKT conditions are:
-
-$$
-\begin{aligned}
-    \quad Ay &=b,
-    \\
-    A^T\lambda + s &= c,
-    \\
-    y_i s_i &= 0, \forall i \in [n],
-    \\
-    y, s &\geq 0
-\end{aligned}
-$$
-<br><br><br>
-
-# Literature review
-Fundamental connections between deep learning and the polyhedral theory central to optimization has been noted in <d-cite key="huchette2023deep"></d-cite>, which makes theoretical connections such as bounds on the number and shapes of linear regions expressed in a deep neural net. Beyond linear problems, <d-cite key="amos2023tutorial"></d-cite> surveys approaches that exhibit "amortized optimization", i.e. incurring larger upfront training times to learn parameters that (hopefully) can generalize sufficiently to novel problems within some set of specially-structured problems; and this upfront training can result in a model that may be orders of magnitude faster at inference time compared to classical (often iterative-based) models. 
-
-Previous literature on machine learning for linearly-constrained optimization problems could be categorized by how they manage the various components of the KKT conditions. In many of these papers, there is some common deep neural architecture at the start (e.g. FCNN or GNN); and then to attempt to recover a feasible solution, the final layers in the architecture correspond to some "repair" or "correction" layers that are informed by optimization theory.
-
-<b>(KKT equalities + Complementarity)</b>:
-Building on <d-cite key="deka2019learning"></d-cite>,  <d-cite key="pagnier2022machine"></d-cite>'s DNN predicts binary classifications for the active set of constraints, i.e. predicting which of $y_i$ or $s_i$ is 0. Then the remaining linear system can be solved efficiently. However, "false negatives" i.e. failure to detect a binding constraint can lead to infeasibilities (labeled as "misidentifications" in the paper), i.e. potentially violating inequality constraints.
-
-<b>(Primal equality + Subset of primal inequalities)</b>:
-<b>E2ELR</b> <d-cite key="chen2023end"></d-cite> uses <i>specialized repair layers</i> for a single-period DC optimal power flow problem, where the differentiable repair layers guarantees power balance (an equality constraint) plus generator limits (a subset of primal inequalities); but can still violate line thermal limits (other primal inequalities). E2ELR leverages domain-specific cost structure, where electricity market penalties for line violations may be order(s) of magnitude smaller than energy balance violations; in this way this paper justifies the potential for remaining primal infeasibilities; but does not generally guarantee feasibility for all constraints.
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl.png" class="img-fluid" %}
-<i>Source: <d-cite key="chen2023end"></d-cite>, showing the power balance repair layer, i.e. a linear combination of the predicted points with an inequality-feasible extreme point, in order to satisfy the equality constraint.</i>
-
-<b>(Primal equality + All primal inequalities)</b>:
-Following a similar application in control/RL, <d-cite key="li2023learning"></d-cite> uses a Minkowski functionals / <b>gauge map</b> repair layer to guarantee feasibility in any general polyhedral feasible regions; however, a critical limitation of the gauge map approach is the need to calculate a strictly interior point (so as to tranform the feasible region to an <i>absorbing set</i>) which generally may be computationally as hard as the optimization problem.
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge.png" class="img-fluid" %}
-<i>Source: <d-cite key="li2023learning"></d-cite>, showing usage of the gauge mapping; note this requires identifying strictly interior points for every sample instance.</i>
-
-Alternatively, "Deep Constraint Completion and Correction" <b>DC3</b> <d-cite key="donti2021dc3"></d-cite> enforces primal equality constraints as a differentiable layer and then embedded gradient steps as neural net layers ("gradient unrolling") to enforce inequality at the output layers; in addition, DC3 uses inequality penalty/Lagrangian term in the loss function to predict statistically near-feasible points, in some sense warm-starting the inequality correction steps. However, other papers observe that insufficient number of descent steps in DC3 could still lead to primal infeasibility (e.g. Table 2 of <d-cite key="li2023learning"></d-cite>).
-
-To truly guarantee polyhedral constraints, <d-cite key="frerix2020homogeneous"></d-cite> proposes using the Minkowski-Weyl theorem / double description algorithm to convert the algebraic halfspace polyhedron representation to the convex combination of vertices and extreme rays. Then the neural net is in charge of predicting the linear weights, and so the resulting convex combination must be feasible; the paper's setting is simplified by only considering homogenous constraints $Ay=0$ i.e. a cone so only the rays are needed. However, with nonzero RHS coefficients, naturally this method will face the curse of dimensionality at higher dimensions since the number of vertices can scale exponentially. The authors acknowledge this point: "Overall, one can expect the algorithm to be efficient only for problems with a reasonably small number $m$ of inequalities and dimension $d$."
-
-
-<b>(Primal + dual approaches)</b>:
-Previous work <d-cite key="liu2022topology"></d-cite> has used a GNN to predict electricity prices (i.e. dual solutions), and then recover the primal solution. More recently, <d-cite key="park2023self"></d-cite> trains two separate neural networks: a primal and a dual network to emulate the optimization iterations of an augmented Lagrangian method; one drawback to this approach is the need to tune more hyperparameters related to the outer optimization loop. Instead, <d-cite key="chen2020learning"></d-cite> predicts one scalar value of the optimal value, and leverages the backward automatic differentiation to extract dual solution estimates from the trained neural architecture; these dual values are then used to solve the remaining system of equations to recover the full solution; <d-cite key="zhang2021convex"></d-cite> builds on this work and enforces an <i>input convex neural network</i> architecture since the optimal value function is convex. Still, these above approaches do not necessarily guarantee primal feasibility.
-
-In a similar vein of trying to incorporate the whole primal-dual problem structure, the GNN for LP paper <d-cite key="chen2022representing"></d-cite> provides theoretical demonstration of a universal approximation property that GNN can express LP optimal solution mappings, and also demonstrates on a small $m=10,n=50$ problem of achieving 0% error in terms of feasibility detection, as well as prediction of the optimal solution; however, this paper <i>does not address generalization performance</i>. 
-
-
-# Method
-### Data generation
-Since the focus is on learning LP's generally, the dataset is fully synthetic. For this project, focus on having matrix $A$ fixed (one was created with entries drawn from the standard normal distribution), and training over different data examples of $x=(b,c)$. As an application example, this can represent learning on a fixed electric grid network topology and technology set, but learning to predict over different RHS resource capacities / renewables availabilities, and different fuel costs.
-
-To ensure feasibility (primal problem is feasible and bounded), the space of examples is generated by first creating primitive or latent variables, for each of the $N$ samples (this was implemented in PyTorch to be efficiently calculated in a vectorized way):
-1. Binary vector $\xi \in \{0,1\}^n$ representing the optimal LP basis, with $\sum_i \xi_i = m$; the value is drawn uniformly from the $(n \text{ C } m)$ possible combinations. Practically this was implemented as a batched permutation of an identity tensor with extra columns.
-2. Nonnegative vector $d \in \mathbb{R}^n$, with each $d \sim U[0,1]$ uniformly drawn to be nonnegative.
-3. Then for each element $i$, use $\xi_i$ to determine whether to assign the value of $d_i$ to either the primal variable $y_i$ or the dual slack variable $s_i$. This way complementary slackness is enforced. Namely,f
-    $$
-    \begin{aligned}
-    y &:= d\odot\xi,
-    \\
-    s &:= d\odot(1-\xi)
-    \end{aligned}
-    $$
-4. Sample $\lambda \in \mathbb{R}^n, \lambda_i \sim U[0,1]$.
-5. Finally construct $b=Ay, c= A^T\lambda + s
-
-By constructing the dataset in this way, we also know the ground truth optimal solutions (which might not be unique if there are degenerate solutions, which is assumed here to have low impact due to the random coefficients), and importantly also the optimal LP basis. 
-
-### Model
-As a different approach, this project will try to predict the underlying latent target $\xi$, i.e. the optimal LP basis, as a classification problem. Since there may be non-local interactions between coefficients and variables, a fully-connected architecture is chosen, where every layer is followed by a ReLU nonlinearity. The neural net forms a mapping between inputs $x=(b,c) \in \mathbb{R}^{m+n}$ to outputs $\hat{\xi} = f(x) \in \{0,1\}^{m}$, i.e. binary classifications of whether each variable is chosen in the LP basis. Below is an illustration of all the LP bases vectors for the $n=10, m=5$ problem size; there are $10 \text{ C } 5 = 252$ bases.
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases.png" class="img-fluid" %}
-
-<b>Supervised vs. self-supervised learning</b>: Many of the referenced papers devise self-supervised training methods, which is motivated by the expensive computational costs (time) to solve the dataset instances with traditional optimization solvers. However, this synthetic dataset is somewhat of an inverse-problem approach, i.e. by starting out with a sample of assumed optimal solutions, the optimal solutions are very efficiently identified during dataset generation. This synthetic generation can also be thought of as a <b>data augmentation</b> method.
-
-Since this is binary classification, the training loss used will be binary cross entropy, which is defined in PyTorch for each sample as:
-$$l(\hat{\xi},\xi) = [l_1, ..., l_i, ..., l_n],\ \ l_i = \xi_i \log \hat{\xi}_i + (1-\xi_i) \log (1-\hat{\xi}_i)$$
-
-
-A softmax layer multiplied by $m$ is optionally added at the output of the NN, to enforce the requirement that there should be $m$ basic variables (in a continuously-relaxed way).
-
-<b>Equality completion</b>: Once this is done, the LP basis uniquely determines a basic solution (but not necessarily feasible) according to
-$$\hat{y}^* = (A^\xi)^{-1}b,$$
-where $A^\xi$ is the $m\times m$ submatrix corresponding to the chosen columns. Rather than matrix inversion, this can be solved in a batched way with PyTorch (torch.linalg.solve) to obtain all samples' solutions. The entire flow, from supervised dataset generation to neural net prediction and then $y$ solution recovery, is illustrated in the flowchart below.
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart.png" class="img-fluid" %}
-
-As baselines, also consider the DC3 model, where novelty versus the original paper is that here both $b$ and $c$ are varied across samples (as opposed to only the RHS $b$ vectors). Also benchmark against a modern first-order based optimization solver OSQP <d-cite key="stellato2020osqp">. For OSQP, the LP can be easily formulated in the necessary format:
-$$\begin{bmatrix}b\\0\end{bmatrix}\leq \begin{bmatrix}
-    A \\ I_{n\times n}
-\end{bmatrix} y \leq \begin{bmatrix}b\\ \infty_n\end{bmatrix} $$
-
-All experiments are implemented on Google Colab T4 GPU instances (except OSQP which can use CPU). Neural network training is optimized with Adam.
-
-# Results
-### Approximation and generalization
-#### Small scale ($n=4,m=2$)
-On a small $n=4,m=2$ problem, the proposed method (using a 3-layer FCNN with width-100 hidden layers; and trained for $<$100 epochs) can achieve near-perfect accuracy ($>$0.997) in both training and testing. The training set has 10,000 samples, and the test set has 1,000 samples, both generated according to the method above. The learning rate used was $10^{-3}$.
-
-The accuracies when including and excluding the softmax layer (sum to $m$) are reported in the plot below, where this layer does have some (very) small positive effect on training and testing accuracies. More importantly, the $\hat{\xi}$ predictions after the solution recovery step are all feasible, i.e. with no negative elements, and the predicted optimal solutions can be seen in the right plot to match extremely closely with the ground truth $y^*$. This latter property is a desirable feature of the proposed method, that is, once the correct basic feasible solution is predicted, then the linear equation solver will precisely recover the optimal solution.
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect.png" class="img-fluid" %}
-
-#### Scaling up ($n=10,m=5$)
-Scaling up to a still quite small problem size of $n=10,m=5$ (i.e. 6.25 times larger in terms of $A$ matrix entries), now encounters generalization issues. The same network parameter sizing and training scheme was used here. The left plot shows training accuracy reaches about 0.97 after 300 epochs (and should continue rising if allowed to continue). However, the testing accuracy plateaus at around 0.93 with no further improvement. 
-
-More importantly, while a $>$0.9 accuracy in deep learning tasks is often sufficient, in this particular context the inaccuracies can lead to optimization problem infeasibilities. This is seen in the right plot, where mis-classified $\hat{\xi}$ result in <i>catastrophically</i> wrong $\hat{y}$ primal solution predictions (the severe orange prediction errors in both negative and positive extremes); even when the remaining correctly-predicted $\hat{\xi}$ samples receive precisely correct solutions.
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5.png" class="img-fluid" %}
-
-
-Furthermore, even though there are about $1-0.93 = 7%$ of individual $\xi_i$ entries that are mis-classified, these errors are fairly spread across various samples. This results in a <u>$19%$ infeasibility rate</u> in the test set, i.e. $19%$ of the predicted $\hat{y}$ vectors violate the nonnegative constraint. In other words, since this particular approach is predicting every individual entry of the basis vector, even small errors for each sample can lead to the overall prediction being wrong. This disproportionate impact is intuitively explained by examining the distribution of bit-wise errors plotted below. Most samples result in 0 bits of error, and then the remaining samples mostly get 1 or 2 bits of error. This means that errors are spread out among many samples, leading to a high rate of infeasible prediction vectors.
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars.png" class="img-fluid" %}
-
-#### Attempts to improve accuracy
-The previous training error plot appears to show an generalization or overfitting problem. Based on this, various data augmentation techniques were attempted, such as perturbing $b$, $c$, or both vectors (both based on random noise vectors and simple scaling invariance of $\alpha b, \beta c$ while keeping the latent $\xi$ targets; as well as generating new $\xi$ vectors after regular numbers of epochs; different schedules of the aforementioned were also tried. However, none of these attempted approaches were able to produce validation accuracy rates significantly above the original $\sim 0.93$.
-
-Notably, an alternative architecture was tried: instead of outputting size-$n$ binary vectors, now try to predict multi-class classification out of the 252 basis vector classes. This actually resulted in worse testing set performance. Intuitively, treating all bases as discrete classes does not leverage the geometric proximity of 2 adjacent bases (e.g. which are off by 1 in Hamming distance).
-
-
-### Benchmarking
-#### vs. DC3 (an "interior" learning approach)
-As a comparison for the $n=4,m=2$ case, the DC3 methodology was implemented using a 3-layer neural net and the self-supervised training loss of the primal objective plus infeasibility penalty, with a chosen penalty rate of 10:
-$$\mathcal{L} = c^T \hat{y} + 10 ||\max\{0, -\hat{y}\}||^2_2$$
-
-The number of inequality correction steps during training was chosen to be $t_{train} = 10$, and to maximize the chance of feasibility a very large $t_{test} = 10,000$ was used (i.e. allow many inequality-correction gradient steps during testing inference). 
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3.png" class="img-fluid" %}
-
-With a learning rate of $10^{-5}$, the training stabilizes after about 30 epochs. Overall, the predictions are fairly accurate in terms of the out-of-sample average objective: $-0.247$ (a 2% optimality gap versus the ground truth), and an $R^2$ of predicted objective values of 0.9992 (see middle plot). (The qualitative results were robust to faster learning rates too: A previous higher lr=$10^{-3}$ produced a tighter average objective gap, but the optimal solution deviation versus the ground truth was larger.) 
-
-However, despite being designed to enforce all hard constraints, the predictions still resulted in infeasible negative values (see the negative dip in the right plot). A similar disproportionate classification error to infeasibility impact is seen here (albeit to a lesser extent): $2.6%$ of all output entries are negative, while $7%$ of test samples lead to an infeasible prediction.
-
-Similarly to before, inequality violations are spread out among different samples, rather than all concentrated within a few samples; this is seen in the plot below. This provides an explanatory mechanism for the relatively large infeasible rate. 
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors.png" class="img-fluid" %}
-
-
-#### vs. Optimization solver
-Thus far, the DNN is able to scale quite well along the number of samples dimension, but not the actual problem dimension (number of variables and constraints).
-
-Return for now to the small $n=4,m=2$ case for which the DNN method achieves perfect out-of-sample testing accuracy. A next practical question is how does this method compare with "classical" optimization methods, or in what contexts would we prefer one over the other?
-
-Note that there are only $4 \text{ C } 2 = 6$ bases. So once the NN produces a $\hat{\xi}$ estimate, these can be mapped to an index in $\{1,2,...,6\}$. All possible non-basic submatrix inverses can be pre-calculated. In total, to evaluate 1 million testing samples, the DNN predict-basis approach takes:
-- 46 sec for training
-- 0.002 sec for prediction of $10^6$ samples
-- 10 sec to map $\xi$ to bases indices (<i>note this is not done in a fully vectorized way and potentially could be sped up</i>).
-- $<0.001$ sec to batch matrix multiply every sample $j$'s: $(A^{\xi^j})^{-1}b^j$. Note this is done using einsum which is very efficient on CUDA.
-
-In comparison, even when running all the 1 million problem instances fully sequentially, the OSQP solver took a total of <b>67 sec</b>, i.e. solving about 15,000 problem instances per second.
-
-This means that this DNN model here only achieved a speedup factor of about 1.2x, when <i>including the DNN training time</i>. Furthermore, the above "mapping" step is a remaining coding bottleneck at DNN inference time, and this will scale linearly as the test sample size increases; i.e. this speedup ratio is unlikely to increase much beyond this at higher sample sizes. 
-
-{% include figure.html path="assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing.png" class="img-fluid" %}
-
-The timing tradeoff can be understood in terms of fixed vs. variable costs, as plotted here. Note the orange and red lines, representing this project's DNN approach, is using the batched matrix solve instead of the pre-computing 6 matrix inverses (thus taking longer in the solving stage). Despite its very large speedup when only considering the prediction step, holistically the DNN approach here did not pose very significant timing advantages over the optimization solver.
-
-
-# Conclusion
-This project broadly compared 3 very different approaches to LP optimization: 1) a DNN to predict the optimal LP basis, 2) the DC3 method, and 3) optimization solver. Among the 2 deep learning methods, on the small $n=4,m=2$ problem, the LP basis method produced more robust and accurate results (i.e. it was able to perfeclty learn the input to optimal solution mapping, for the chosen data domain) compared to DC3 which already faces inequality violation issues. However, neither deep learning methods were able to easily scale to the slightly larger problem.
-
-Qualitatively, the predict-LP-basis approach can result in "all-or-nothing" accuracy, i.e. predicting the correct basis vector results in the globally optimal solution, whereas even a nearby classification error can lead to catastrophic primal infeasibilities (due to enforcing the equality constraint). Moreover, in both predict-basis and DC3, inequality violations tend to be spread out among different samples, leading to disproportionate impact on the percentage of infeasible solution vector predictions.
-
-Domain-specific knowledge and leveraging problem structure may be needed for tractable DNN solutions for LP optimization. This includes real-life choices of how much accuracy we need exactly in different aspects of the problem (e.g. different components of the KKT conditions).
\ No newline at end of file
diff --git a/_posts/2023-11-10-distill-example.md b/_posts/2023-11-10-distill-example.md
deleted file mode 100644
index a71e6408..00000000
--- a/_posts/2023-11-10-distill-example.md
+++ /dev/null
@@ -1,134 +0,0 @@
----
-layout: distill
-title: Gradient-Boosted Neural Wavlet Interpolation for Time Series (G-BiTS)
-description: Your blog post's abstract.
-  This is an example of a distill-style blog post and the main elements it supports.
-date: 2023-12-12
-htmlwidgets: true
-
-authors:
-  - name: Yeabsira Moges
-    url: "https://www.linkedin.com/in/yeabsira-moges/"
-    affiliations:
-      name: AI-DS, MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-10-distill-example.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related Works
-  - name: G-BiTS
-  - name: Results
-  - name: Analysis
-  - name: Conclusions
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-Energy companies struggle with energy allocation. The power grid contains a multitude of homes, schools, and offices all which require different amounts of power draw and capacity. As the current grid stands, the control loop is running on old data and isnt adequately reactive to sudden spikes, as well the inability to properly model trends. Energy forecasting is the means by which we work to rectify that gap. Energy forcasting is a blanket umbrella term coming from general forcasting of any time series data. There are a lot of methods currently available, ranging from purely statistical models up to deep neural networks. At the moment, the SOTA in predictive modeling from statistical models is SARIMAX: Seasonal Autoregressive Integrated Moving Average Exogenous. In deep learning, the SOTA is N-HiTS[1]. Both work well in most circumstances, but there is a lot of work to improve upon the current performance given we want to generate better embeddings to decrease loss through the energy grid. There has been great performance boosts associated with combinding the strengths of the different methods, and that is part of what this paper explores. Another big target: as it stands the current flavors of N-HiTS dont touch upon the further work reccomendations from the original paper. This includes advanced interpolation, moving away from the current linear interpolation for the Block modules and moving towards incorporating wavelet decomposition and transforms to help convert the signal into a form that makes it much easier to deliver robust data. I propose gradient-boosted neural wavlet interpolation for time series (G-BiTS) as a new entry to forcasting models relying on a mix of statistical and neural network based models. G-BiTS expands upon N-HiTS which stands for neural basis expansion analysis for interpretable time series. N-HiTS decompose time series into a set of basis functions, capturing and interpreting temporal patterns. This paper explores ensembling methods and time series analysis.
-
-## Related Works
-
-The main related works relate to the following topics: SARIMAX, N-HiTS, and GBM. SARIMAX stands for seasonal autoRegressive integrated moving average with exogenous variables model. Each element of the SARIMAX are all important in the following ways. AutoRegressive: captures the relationship between an observations at various lags. Integrated: the differencing of raw observations to make the time series stationary. Moving Average: the relationship between an observation and a residual error from a moving average model applied to lagged observations. Seasonal: accounts for seasonality in data, like weekly, monthly, or yearly patterns. Exogenous Variables: These are external variables or predictors that aren't part of the time series itself but are believed to have an impact on it. This is mainly represented in time series analysis by date information with respect to variables unrelated to the power, but can be used to model a common behavior. The biggest flaw with SARIMAX comes from its inability to model more than one seasonality, hampering predictions. A more robust model is N-HiTS which stands for neural basis expansion analysis for interpretable time series forecasting. The best benefit from N-HiTS comes from its ability to learn rich embeddings for time series that properly represent all of the trends and seasonalities inherent to the data, while also producing gains through being able to apply much more data as it is made for longer range predictions. N-HiTS is good, and this paper will be exploring a multiforld extension using gradient boosting [2] and adaptive ensembling[3]. Gradient boosting generates good predictions by training decision trees sequentially. A new tree is modeled on the residual errors made by the preceding trees. Finally, tying everything all together we have wavelet transforms. Wavelets are wave-like oscillations that represent data at various scales effectively. GBMs help us take advantage of a repeated pattern of smooth behavior interrupted by sudden changes or transients in time series data. 
-
-## G-BiTS
-
-This paper proposes a new deep learning framework powered by gradient boosting and signal pre-processing G-BiTS. G-BiTS stands for Gradient-Boosted Neural Wavlet Interpolation for Time Series. G-BiTS builds upon the success of N-HiTS and explores a question posed by the authors in the original paper on replacing the existant sequential projections from the interpolation functions onto wavelet induced spaces, getting high resolution output. G-BiTS is an ensemble model, which is where gradient boosting comes in. The maximum of the combined predictions is taken for adaptive ensembling and higher performance as well as generatily. Max can be min or mean, just depends on the use case and having higher output in this circumstance if perfered. The hope is to use the hourly modeling capabilities of light gradient boosting machines with the versatility of N-HiTS to create a robust ensemble model.
-
-## Results
-
-The testing for the comparisions of the different forcasting methods is based on the BuildingsBench dataset. Specifically, this paper surveys office buildings withing the Fox subsection from the original input. The data includes buildings with energy data that has multiple seasonalities, mostly hourly, daily, weekly, and monthly. Looking at the data, there are some interesting patterns. These are the average skew and kurtosis values for the data: high skew and kurtosis.
-
-Skewness: 1.1118040201238155 
-Kurtosis: 3.452262511716185
-
-Statistical analysis also shows that the data was not drawn from a normal ditribution and is not stationary, so the variance and mean were not constant throughout the time series.
-
-Our baseline is simply copying over the values from the previous week and repeating the same for the following week. Non-baseline models tested include the previously mentioned SARIMAX, N-HiTS, LGBM, and G-BiTS. The following are the respective errors from each building ordered as mean average error, root mean squared error, and mean average percent error.
-
-### Building ID: Margarita
-
-SARIMAX (211.47498604910714, 249.84373502456708, 11.805270962305448)
-
-NHITS (21.72069293617509, 27.65604571924576, 1.6335940075280377)
-
-LGBM (33.16067034334621, 41.84784011583212, 2.0058567433490087)
-
-GBITS (26.955107763269822, 31.504577778268615, 1.6841760555882481)
-
-### Building ID: Loreta
-
-SARIMAX (2966.2653087797617, 3513.45974924458, 12.756417057832824)
-
-NHITS (203.50202658318491, 338.92442661325015, 1.0121962487927345)
-
-LGBM (419.71931531784384, 476.48902925976694, 1.8085151798175159)
-
-GBITS (215.94950733822594, 264.7384239183662, 0.9401638424018465)
-
-### Building ID: Gaylord
-
-SARIMAX (1220.2237444196428, 1479.439585459469, 8.095511476323951)
-
-NHITS (137.39752238818102, 203.64435240098928, 0.8720707702102791)
-
-LGBM (347.0178199198448, 435.19043719851146, 2.3137853719619144)
-
-GBITS (21.02548764010548, 27.84334532157823, .73338746467575437)
-
-## Analysis
-
-Across the board, SARIMAX perfofmed the worst, followed closely by NHiTS and LGBMs. The biggest issue with SARIMAX is that it can only take a very limited amount of data, as well as being unable to model multiple seasonalities. G-BiTS showed good adaptability as one model over the large dataset was able to get transferable and adaptible embeddings. The wavelet transforms showed the greatest gains from the interpolation stage as the two level smoothing helped the N-HiTS model better fit the unstationary data. N-HiTS as expected performs well across the board too and had the best time modeling the data.
-
-## Conclusions
-
-There is more work to be done to extend this research topic. Mainly, finding better wavelet decompositions and symmetric recompositions for modeling multiple seasonalities faster and in a more efficient manner. The decomposition showed the biggest gain and confirms the original papers thoughts about the approach. Boosting helped standardize the model and generated really interesting embeddings through the initial wavelet based N-HiTS.
-
-## Bibliography
-
-[1]
-
-N-HiTS: Neural Hierarchical Interpolation for Time Series Forecasting
-
-Cristian Challu, Kin G. Olivares, Boris N. Oreshkin, Federico Garza, Max Mergenthaler-Canseco, Artur Dubrawski
-
-https://arxiv.org/abs/2201.12886
-
-[2]
-
-Gradient Boosting Neural Networks: GrowNet
-
-Sarkhan Badirli, Xuanqing Liu, Zhengming Xing, Avradeep Bhowmik, Khoa Doan, Sathiya S. Keerthi
-
-https://arxiv.org/abs/2002.07971
-
-[3]
-
-Adaptive Ensemble Learning: Boosting Model Performance through Intelligent Feature Fusion in Deep Neural Networks
-
-Neelesh Mungoli
-
-https://arxiv.org/abs/2304.02653
diff --git a/_posts/2023-11-10-proposal_JingpengHong.md b/_posts/2023-11-10-proposal_JingpengHong.md
deleted file mode 100644
index d222a606..00000000
--- a/_posts/2023-11-10-proposal_JingpengHong.md
+++ /dev/null
@@ -1,115 +0,0 @@
----
-layout: distill
-title: Recurrent Recommender System with Incentivized Search
-description: This project considers the use of Recurrent Neural Networks (RNNs) in session-based recommender systems. We input sequences of customers' behavior, such as browsing history, to predict which product they're most likely to buy next. Our model improves upon this by taking into account how previous recommendations influence subsequent search behavior, which then serves as our training data. Our approach introduces a multi-task RNN that not only aims to recommend products with the highest likelihood of purchase but also those that are likely to encourage further customer searches. This additional search activity can enrich our training data, ultimately boosting the model's long-term performance.
-
-date: 2022-12-01
-htmlwidgets: true
-
-authors:
-  - name: Jingpeng Hong
-    url: "https://jingpenghong.github.io/"
-    affiliations:
-      name: Harvard Business School
-      
-bibliography: 2023-11-10-proposal_JingpengHong.bib
-
-toc:
-  - name: Introduction
-  - name: Literature
-  - name: Model
-  - name: Experiment
-
----
-
-## Introduction 
-
-Numerous deep learning based recommender systems have been proposed recently <d-cite key="10.1145/3285029"></d-cite>. Especially, the sequential structure of session or click-logs are highly suitable for the inductive biases provided by recurrent/convolutional neural networks <d-cite key="hidasi2016sessionbased"></d-cite>. In such setting, the input of the network is a sequence of consumers' search behavior, while the output is the predicted preference of the items, i.e. the likelihood of being the next in the session for each item. The ultimate goal is to pinpoint the optimal product for the consumer, thereby increasing sales. An example of where this could be applied is the "featured product" on platforms like Amazon.
-
-However, a challenge with this model is the sparsity of data. It's well-known that the products in retail has the "long-tail" feature. Only a small fraction, say 5%, of a site's products are ever browsed or bought by customers, leaving no data on the remaining products. Additionally, customer sessions tend to be brief, limiting the amount of information we can get from any one individual. This issue is particularly acute for "data-hungry" models, which may not have sufficient training data with enough variation to accurately match products with customers.
-
-My proposed solution to this issue is to recommend products that also encourage further exploration. Economic studies have shown that certain types of information structure can motivate customers to consider more options, harnessing the "wisdom of crowds" <d-cite key="kremer2014implementing"></d-cite><d-cite key="che2018recommender"></d-cite>. Imagine two products: recommending the first leads to a 5% purchase likelihood, while the second has a 4% chance. But the second item prompts the customer to look at 5 additional products. This extra data allows our model to learn more, potentially enhancing recommendations for this and other customers in the future. Therefore, we might choose to recommend the second product to generate more user-driven training data.
-
-In this project, we consider the multi-task learning that achieves better performance along the entire customer journey. The conventional conversion rate based model estimates
-
-$$
-P(conversion|click, impression, u_i, v_j)
-$$
-
-where $$u_i$$ are users' features and $$v_j$$ are items' features.
-
-We decompose the conversion rate into 
-
-$$
-P(conversion, click|impression, u_i, v_j) = P(click|impression, u_i, v_j) \times P(convsersion|click, u_i, v_j)
-$$
-
-Hence, we have two auxiliary tasks for predicting both the click-through rate and the conversion rate. Such approach has two advantages. First, the task for estimating the click-through rate generally has richer training data because we train on dataset with all impressions instead of the subsample with purchase. Second, we recommend products with both high probability of clicking and purchasing, leading to more training data points in future time periods. This can help us tackle the challenge of data sparsity <d-cite key="ma2018entire"></d-cite>.
-
-## Literature 
-
-Recommender Systems are usually classified into three categories <d-cite key="1423975"></d-cite>: (i) collaborative filtering (ii) content-based ,and (iii) hybrid.
-
-1. Collaborative filtering. The input for the algorithm can be [User, Item, Outcome, Timestamp]. The task is to complete the matrix $$R$$, where each column is an item and each row is a user, with the majority of missing elements. The memory based collaborative filtering finds pairs of user $$i$$ and $$i'$$ using similarity metrics The model based collaborative filtering decomposes $$R^{m\times n} = U^{m\times k}I^{k\times n}$$ using matrix factorization, where $$k$$ is the dimension of latent factors.
-
-2. Content-based. The input for the algorithm can be [User features, Item features, Outcome]. The task is to predict $$y=f(u_i, v_j)$$, where $$y$$ is the outcome and $$u_i$$ and $$v_j$$ are features of users and items respectively. 
-
-3. Hybrid. we consider a simple linear model <d-cite key="1423975"></d-cite>:
-
-$$
-r_{ij} = x_{ij}\mu+z_i\gamma_j+w_j\lambda_i+\epsilon_{ij}
-$$
-
-where $$x_{ij}$$ is the collaborative filtering component indicating the interaction, $$z_i$$ are users' features and $$w_j$$ are items' feature. $$\gamma_j$$ and $$\lambda_i$$ are random coefficients. We can also apply matrix factorization to reduce the dimension of interaction matrix $$x_{ij}$$. A recent application in marketing can be found in <d-cite key="10.1145/3523227.3547379"></d-cite>.
-
-The core idea in collaborative filtering is "Similar consumers like similar products". The similarity is defined on consumers' revealed preference. However, the content-based approach implicitly assumes users and items should be similar if they are neighborhoods in feature space, which may or may not be true. The limitation of collaborative filtering is that we require a sufficient amount of interaction data, which is hard if we consider the sparsity and cold start problems.
-
-Moreover, deep learning based recommender systems have gained significant attention by capturing the non-linear and non-trivial user-item relationships, and enable the codification of more complex abstractions as data representations in the higher layers. A nice survey for deep learning based recommender system can be found in <d-cite key="10.1145/3285029"></d-cite>. Deep learning based recommender system can have several strength compared to conventional models:
-
-1. It's possible to capture complex non-linear user-item interactions. For example, when we model collaborative filtering by matrix factorization, we essentially use the low-dimensional linear model. The non-linear property makes it possible to deal with complex interaction patterns and precisely reflect user’s preference <d-cite key="HORNIK1989359"></d-cite>.
-
-2. Architecture, such as RNN and CNN, are widely applicable and flexible in mining sequential structure in data. For example, <d-cite key="10.1145/2988450.2988451"></d-cite> presented a co-evolutionary latent model to capture the co-evolution nature of users’ and items’ latent features. There are works dealing with the temporal dynamics of interactions and sequential patterns of user behaviours using CNN or RNN <d-cite key="tang2018personalized"></d-cite> <d-cite key="10.1145/2959100.2959167"></d-cite>.
-
-3. Representation learning can be an effective method to learn the latent factor models that are widely used in recommender systems. There are works that incorporate methods such as autoencoder in traditional recommender system frameworks we summarize above. For example, autoencoder based collaborative filtering <d-cite key="10.1145/2740908.2742726"></d-cite>, and adversarial network (GAN) based recommendation <d-cite key="10.1145/3077136.3080786"></d-cite>.
-
-## Model
-
-We implement the multi-task learning similar to <d-cite key="ma2018entire"></d-cite>:
-
-{% include figure.html path="assets/img/2023-11-10-proposal_JingpengHong/multitask.png" class="img-fluid" %}
-
-However, we differ from the model in <d-cite key="ma2018entire"></d-cite> in two ways:
-
-1. For user field, we implement RNN to deal with the sequential clickstream data instead of simple MLP.
-
-2. We define the loss function over the over samples of all impressions. The loss of conversion rate task and the loss of click-through rate task will not be used separately because both of them are based on subsamples (conditional on click and conditional on purchase).
-
-$$
-L(\theta_{click}, \theta_{convsersion})=\sum_{i=1}^N l(click_i, f(u_i, v_j))+\sum_{i=1}^N l(click_i, purchase_i, f(u_i, v_j))
-$$
-
-## Experiment 
-The dataset we use is a random subsample from <d-cite key="ma2018entire"></d-cite>, which is the traffic logs from Taobao’s recommender system. We do a 1% random sampling, though the public dataset in <d-cite key="ma2018entire"></d-cite> has already been a 1% random sampling of the raw data. The summary statistics of the data can be found in <d-cite key="ma2018entire"></d-cite>.
-
-For the performance metrics, we use Area under the ROC curve (AUC).
-
-Several benchmark models we use for comparsion:
-
-1. DeepFM <d-cite key="10.5555/3172077.3172127"></d-cite>. This is a factorization-machine based neural network for click-through rate prediction. In my setting, I consider it as a single-task model with MLP structure.
-
-2. MMOE <d-cite key="10.1145/3219819.3220007"></d-cite>. This is the multi-task setting. However, since the usecase is MovieLens, where two tasks are "finish" and "like", it doesn't consider the type of sequential data. In my setting, I consider it as a multi-task model with MLP structure.
-
-3. xDeepFM <d-cite key="10.1145/3219819.3220023"></d-cite>. This model Combines both explicit and implicit feature interactions for recommender systems using a novel Compressed Interaction Network(CIN), which shares some functionalities with CNNs and RNNs. In my setting, I consider it as a single-task model with RNN/CNN structure.
-
-4. Our Model, a multi-task model with RNN/CNN structure.
-
-Results:
-
-| Model        | test AUC          | test click AUC  |test conversion AUC  |
-| ------------- |:-------------:| :-------------:|:-------------:|
-| DeepFM     | 0.3233 | | |
-| MMOE      |      | 0.5303 |0.6053|
-| xDeepFM | 0.4093      |   | |
-| Ours |       |   0.5505 | 0.6842|
-
-
diff --git a/_posts/2023-11-10-spacial-CLIP.md b/_posts/2023-11-10-spacial-CLIP.md
deleted file mode 100644
index 1a70102f..00000000
--- a/_posts/2023-11-10-spacial-CLIP.md
+++ /dev/null
@@ -1,133 +0,0 @@
----
-layout: distill
-title: Improving CLIP Spatial Awareness Using Hard Negative Mining
-description: CLIP struggles to understand and reason spatially. We attempt to solve this issue with introducing hard negative examples during training.
-date: 2023-12-11
-htmlwidgets: true
-
-authors:
-  - name: Andrew Wang
-    url:
-    affiliations:
-      name: MIT
-
-
-
----
-
-# Introduction: CLIP doesn't know its left and rights
-
-
-Multimodal learning has come into prominence recently, with text-to-image synthesis models such as DALLE or Stable Diffusion, and image-text contrastive learning models such as CLIP. In particular, CLIP has proven to be extremely useful in learning zero-shot capabilities from paired image and text data. 
-
-However, recent work has highlighted a common limitation in multimodal models: the ability to capture spatial relationships. Spatial relationships can be defined as how objects in an image are positioned concerning other objects. For example, A is next to B or B is on top of A. Although Language models now demonstrate an understanding of word order and spatial awareness, multimodal models still struggle to capture this relationship in both the image and captions.
-
-
-## Downstream tasks
-
-Improving captioning abilities is an important building block in overcoming this limitation in all multimodal models. Creating synthetic captions from images is an already popular method in developing training data for other models such as DALLE-3. However, limitations in captioning abilities carry over to downstream tasks, and therefore, models such as DALLE-3 often also struggle to generate images from prompts that include spatial relationships. We hope that demonstrating the ability to generate spatially-aware captions will also lead to improvements in other Vision-Language models in the future.
-
-## Semantic similarity
-
-{% include figure.html path="assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84.png" class="img-fluid" %}
-<div class="caption">
-    Taken from Yamada et al.
-</div>
-
-CLIP is trained to maximize the similarity between embeddings of images and text. This leads to CLIP matching semantically similar images and captions but not understanding finer-grained details. Concept Association is especially an issue when there are multiple objects in an image where CLIP struggles to reason about the object's attributes (Yamada 2022). Additionally, because of the focus on semantic similarity, CLIP also struggles with spatial relationships between objects.
-
-# Winoground
-
-{% include figure.html path="assets/img/2023-11-10-spacial-CLIP/winoground_example.png" class="img-fluid" %}
-<div class="caption">
-    Taken from Thrush et al.
-</div>
-
-Spatial awareness has been explored explicitly throughout previous literature. Thrush et al. in Winoground created an evaluation dataset that targets compositional reasoning. Each data point contains two captions and two images, where the captions contain the same words only in different orders. The difference in word ordering drastically changes the meaning of the sentence and therefore the image associated with the alternative caption also is completely different. The task then becomes to match the images to the correct captions (Thrush 2022).
-
-## Evaluation Specifics and Results
-
-We are going to use the image-to-caption evaluation of Winoground which aims to match captions to each image in constrast to images to captions. Different models have differnt matching strategies; CLIP uses the higher dot product similarity score when deciding which caption fits each image. Since there are in total, 4 different possible matchings out of the 2 image/caption pairs, random chance would score 25%. However, many multimodal models fail to score much higher than random chace. CLIP (ViT-B/32) scores 30.75% while the best models only score 38%.
-
-## Spatial Examples
-
-{% include figure.html path="assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb.png" class="img-fluid" %}
-<div class="caption">
-    An example of spatial image/caption pairs. Taken from Thrush et al.
-</div>
-
-CLIP has shown to be an extremely difficult benchmark for multimodals - and there are multitude of reasons why. First, changing the word orders creates image/caption pairs that need fine-grained reasoning capabilities to differentiate. One of the many reasoning capabilities needed to do well is spatial reasoning. We filter out 101 examples of CLIP that contain image/captions that require spatial reasoning to create a more task-speciific benchmark. Our filtering is caption-based and targets key words that may indicate spatial relationships. We will refer to this filtered out evaluation benchmark as, Winoground-Spatial.
-
-# Hard Negative Examples
-
-Hard negative examples are negative examples that are close to our anchor pair. These are examples that are close in some way to our positive example, but still wrong. Oftentimes, these examples are hard to distinguish from one another, and therefore cause the model trouble.
-
-## CLIP Loss
-As a refresher on how CLIP is trained, CLIP first calculates an N by N similarity matrix from the dot products of the two embeddings. The model the calculates a loss function as the average of two cross entropies. The task becomes a classification task where we classify the correct caption for each image and the correct image for each caption, thus leading to two cross entropy functions.
-
-{% include figure.html path="assets/img/2023-11-10-spacial-CLIP/CLIP_OG.svg" class="img-fluid" %}
-<div class="caption">
-    CLIP similarity matrix. Radford et al.
-</div>
-
-We modify this training procedure to include additional hard negative captions. For each image/caption pair, we generate M additional negative captions. We then calculate an N by NM similarity matrix from the dot products. Then, we only modify the loss function for image classification cross entropy function to include negative captions alongisde the original N captions. We don't modify the caption classification cross entropy function since the negative examples don't have a corresponding "image". 
-
-{% include figure.html path="assets/img/2023-11-10-spacial-CLIP/CLIP_Negative.png" class="img-fluid" %}
-<div class="caption">
-    CLIP similarity matrix with negative examples.
-</div>
-
-
-## Data and Augmentation
-
-How do we generate negative examples? We first have to create a fine-tuning dataset that contains image/caption pairs that display spatial relationships. To do this, we utilize the dataset Flickr30k, a dataset that contains 31,000 images collected from Flickr along with 5 captions annotated by human annotators. We chose this dataset due to it's caption quality alongside the fact that many of the image/caption pairs contain multiple objects. 
-
-We then filter out image/caption pairs based on the captions in a similar way we created our evalutation benchmark, Winoground-Spatial. We use 20 key words and phrases such as: "left", "on top of", "beneath", etc. to create a training set of roughly 3,600 examples. Although there are most likely more spatial examples, we choose this method as it is cost-effective while still ensuring the quality of the traning set being only examples of spatial relationships.
-
-Data augmentations have been a commonly used as a method to prevent overfitting in image classification tasks. Although it is common to perform image augmentations, Fan et al. introduce LaCLIP to perform text augmentations on captions to create additional image/caption pairs. This method can be thought of as generating additional “positive pairs”. In order to generate text-augmentations, they utilize language models such as llama7b and GPT-3 to ensure the sentences generated are still grammatically correct. They use in-context learning and prompts such as, "Rewrite this caption of an image vividly, and keep it less than thirty words:".
-
-We follow a similar procedure to generate our negative examples. For each image/caption pair, we prompt GPT-3.5-turbo-instruct to do different augmentations. Details of the prompts are provided in the later experiments. 
-
-
-# Experiments
-
-For all experiments, we use a base model of CLIP(ViT-B/32) pre-trained on OpenAI's WIT provided by OpenClip. We then use OpenAI's API to generate augmentations. In total, the cost of generating augmentations were under $50 in credits.
-
-## Experiment 1: Switching word order
-Our first experiment explores how switching the word order may serve as hard negative examples. This method is inspired by the benchmark we are using, where the captions share the same words but in a different order. For each caption, we generate a single hard negative caption. The prompt we use is displayed below:
-
-{% include figure.html path="assets/img/2023-11-10-spacial-CLIP/GPT-word-order.png" class="img-fluid" %}
-<div class="caption">
-    In-context-learning prompt used to augment word order.
-</div>
-
-We discover adding a single hard-negative example to each example already leads to an impressive performance boost. The accuracy improves from 19.8% to a staggering 50.5% from fine-tuning. 
-
-|| Pretrained CLIP | Word Order CLIP |
-|----------|----------|----------|
-| Pairs matched correctly | 20 | 51 |
-| Accuracy| 0.198 | 0.505 |
-
-We did some extra probing and noticed the majority of the improvement was from distinguishing left and right. From the additional 31 examples our fine-tuned model got correct, 18 of them were examples that the captions included the keyword of either left or right. This is consistent with our training set, where the most popular keyword of our examples is left/right.
-
-## Experiment 2: Replacing key spatial words
-
-We then explore how a different augmentation workflow could impact the accuracy. In this experiment, we augment the captions to replace the keyword with another spatial keyword. For example, the keyword "on top of" could be replaced by "underneath" or "to the right of". We again, utilize GPT to ensure the captions are still grammatically and logically correct. Because of the number of keywords avaialable, we explore how the number of negative examples during training time may affect the model's accuracy.
-
-|| 0 negative examples (Pretrained CLIP) | 1 negative examples  | 5 negative examples  | 10 negative examples |
-|----------|----------|----------|----------|
-| Pairs matched correctly | 20 | 31 | 65 | 55|
-| Accuracy| 0.198 | 0.307 | 0.644| 0.545 |
-
-We can notice that from 0-5 negative training examples, there is a distinctive increase in model accuracy. However, an interesting result is the dropoff in accuracy from 5 training examples to 10. We did some probing into why this may be the case in the training data. One hypothesis may be the training examples for hard negatives are incorrect, in that, by a human they could be interpreted as positive examples. For example, object A could be both next to and above object B, but we are training CLIP to recognize the keyword above to be false in this case. Another hypothesis is the difficulty in training examples stunting training and needing more data. This could be case when looking at the loss function, on whether it has fully converged or not.
-
-# Conclusion and Limitations
-
-Although we have not fully tackled the issue of spatial awareness, we have made signifigant progress from our base model of CLIP, with the highest accuracy being at 64.4% compared to 19.8%. This proof of concept work shows how hard-negative examples could boost improvements in specific reasoning tasks. The concept of using these hard-negative examples are not limited to spatial relationships: it could be interesting to examine how hard negative tasks may improve other Winoground examples that require reasoning capabilities such as counting. We also note that there is a possiblity that improving the training data may not be enough, and that the architecture may need a change to fully solve spatial relationships.
-
-### References:
-1.Robinson, J. D.; Chuang, C.-Y.; Sra, S.; Jegelka, S. Contrastive Learning with Hard Negative Samples. In Proceedings of the International Conference on Learning Representations, 2021.
-
-2.Thrush Tristan, Jiang Ryan, Bartolo Max, Singh Amanpreet, Williams Adina, Kiela Douwe, and Ross Candace. 2022. Winoground: Probing Vision and Language Models for Visio-Linguistic Compositionality. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 5238–5248.
-
-3.Fan, L., Krishnan, D., Isola, P., Katabi, D., and Tian, Y. (2023a). Improving clip training with language rewrites. arXiv preprint arXiv:2305.20088.
\ No newline at end of file
diff --git a/_posts/2023-11-10-speech-recognition-proposal.md b/_posts/2023-11-10-speech-recognition-proposal.md
deleted file mode 100644
index 12c4fbac..00000000
--- a/_posts/2023-11-10-speech-recognition-proposal.md
+++ /dev/null
@@ -1,141 +0,0 @@
----
-layout: distill
-title: Modeling Human Speech Recognition with Different Network Architectures
-description: Evaluating a neural network's ability to effectively model human speech recognition using CNNs vs. TNNs
-date: 2023-11-10
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Annika Magaro
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-10-speech-recognition-proposal.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Methods
-  - name: Results
-  - name: Discussion
-  - name: Acknowledgements
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-Recent advances in machine learning have made perception tasks more doable by computers, approaching levels similar to humans. In particular, structuring models biologically and using ecologically realistic training datasets have helped to yield more humanlike results. In the field of speech recognition, models trained under realistic conditions with stimuli structured how sounds are represented in the cochlea, with network layers imitating the processing pipeline in the brain, seem to be successful in performing speech recognition tasks. However, it is unclear whether specific network architectures are more beneficial to learning human speech recognition patterns. In this project, I seek to investigate how different network architectures such as CNNs vs. TNNs affect the ability to recognize speech in a humanlike way.
-
-One facet of more biological models is that they attempt to recreate the structure of the human brain. For auditory models, a useful structure to replicate is the cochlea; these replications are called cochleagrams. Cochleagrams have been used in order to model the ear more effectively, leading to models that imitate auditory perception in a more human-like way. A cochleagram works in a similar way to how the cochlea works in a human. It filters a sound signal through bandpass filters of different frequencies, creating multiple frequency subbands, where the subbands for higher frequencies are wider, like how the cochlea works in the human ear. The amplitudes of the different subbands are then compressed nonlinearly, modeling the compressive nonlinearity of the human cochlea <d-cite key="mcdermott2013"></d-cite> <d-cite key="mcdermott2011"></d-cite>.
-
-A recent application of cochlear models to speech perception is found in Kell’s 2018 paper, where they create a convolutional neural network which replicates human speech recognition <d-cite key="kell2018"></d-cite>. They trained the network to recognize a word in the middle of a 2 second clip, from a possible vocabulary of 587 words.  To imitate how the ear functions, they preprocessed the sound signals into cochleagrams, intended to be a more biologically realistic model of the ear. The activations in different layers of the neural network were able to predict voxel responses in different parts of the brain, revealing that the auditory processing pipeline aligned with layers of the network.
-
-A natural question to ask at this point is whether a convolutional neural network is the best architecture for this task. In Mamyrbayev Orken et al.’s 2022 paper, they explore a speech recognition system for Kazakh speech <d-cite key="orken2022"></d-cite>. In this paper, they create a transformer model that is able to perform a continuous speech recognition task on “clean” speech (speech without noise or alterations). They trained the model on a dataset of clean speech and telephone speech, and the model was able to perform the speech recognition task on clean speech with high accuracy. Although this network does well on the specific task it is given, it is still unclear how its performance compares to that of a convolutional neural network due to not having a direct comparison between the two on the same task. In addition, it is unclear whether the transformer can yield human-like results for speech recognition.
-
-In the field of computer vision, there has been work done comparing convolutional neural networks to vision transformers for the task of object recognition. Tuli’s 2021 paper explores this through the lens of human-like object recognition, determining whether the errors of a vision transformer or a convolutional neural network are more similar to humans <d-cite key="tuli2021"></d-cite>. Their findings indicated that the vision transformer behaved in a more human-like way. In particular, in human vision, there is a phenomenon called shape bias, where if an object has the shape of a certain object A, but the texture of a certain object B, humans will be likely to predict that the object is an instance of object A. Many vision models struggle with shape vs. texture bias, being more inclined to categorize objects by their texture; the vision transformer presented in this paper has approximately twice the amount of shape bias as the convolutional neural network, further suggesting that the transformer is a more human-like representation of vision. 
-
-In this post, I investigate more closely the importance of network architecture in the ability to effectively model human speech recognition. I focus on three metrics of evaluating how well a model replicates human speech recognition: 
-1. Ability to generalize to speakers not found in the training set:
-Humans hear speech from new speakers all the time, and a person who they’ve never heard before usually does not hinder their ability to recognize what they are saying. Models of speech recognition are usually trained on a corpus of speech that is inherently biased towards a set of talkers that participates in creating the corpus, so it is possible that it could overfit to the speakers in the training set. A good model of speech recognition should be able to perform well on new talkers.
-2. Ability to recognize speech in different background noise conditions:
-Humans rarely hear speech unaccompanied by some form of background noise, and are generally robust to noise up to large signal to noise ratios. Many models of speech recognition such as the transformer in Orken 2022 are not trained or tested on noisy speech, so it is likely that it would not be able to recognize speech in these conditions.
-3. Ability to recognize distorted forms of speech:
-Humans are remarkably robust to various distortions of speech such as sped-up/slowed-down speech, reverberant speech, and local-time manipulations, despite not encountering some of these often in their lives <d-cite key="ml1950"></d-cite>. In order to further test a model’s ability to replicate human speech recognition, we should test how well it performs on speech manipulations.
-
-
-
-## Methods
-
-The models in my experiment were given a 2 second speech clip, and were tasked with identifying the word overlapping the middle of the clip. In particular, they were trained on a dataset containing 2 second speech clips from the Common Voice dataset, where the word at the middle of the clip is from a vocabulary of 800 words, imposed on different background noises taken from the Audio Set dataset <d-cite key="ardila2019"></d-cite> <d-cite key="gemmeke2017"></d-cite>. So, the models were essentially given a speech clip and asked to perform an 800-way classification task. 
-
-In order to generate the fairest comparison between convolutional neural networks and transformers, I start with a baseline CNN inspired by Saddler 2021, and then either replace the last convolutional layer with a multi-headed attention layer or remove it from the network <d-cite key="saddler2021"></d-cite>. Each block of the CNN is made up of a convolutional layer, followed by a ReLU activation, a weighted-average pooling layer, and a normalization layer <d-cite key="saddler2021"></d-cite> <d-cite key="feather2019"></d-cite>.
-
-{% include figure.html path="assets/img/2023-11-10-speech-recognition-proposal/block-architectures.png" class="img-fluid" %}
-
-The baseline CNN (called CNN6) is composed of 6 blocks followed by a fully connected layer and a classification layer. The transformer-CNN hybrid (CNN5+Attn) is composed of 5 convolutional blocks, followed by an attention block, followed by a fully connected layer and a classification layer. Lastly, I created a “control” CNN (called CNN5) that is the same as CNN6, but with the last convolutional block removed. This was intended to test whether an attention layer provides any benefit as opposed to not including the layer at all. All networks begin with an initial data preprocessing step that converts the audio signal into a cochleagram. 
-
-{% include figure.html path="assets/img/2023-11-10-speech-recognition-proposal/network-architectures.png" class="img-fluid" %}
-
-It is difficult to derive a direct comparison between a convolutional layer and a multi-headed attention layer, in particular how to decide how many attention heads to include and what the attentional layer dimension should be. In order to have the best chance of comparison between CNN5+Attn and the other networks, I ran multiple CNN5+Attn networks with a larger vs. smaller number of attention heads (64 vs. 16) and a larger vs. smaller attention dimension (512 vs. 16) for 10 epochs to determine a preliminary measure of network performance across these parameters. The preliminary results after 10 epochs showed that the CNN5+Attn network with a small number of attention heads and a smaller attention dimension had the highest training accuracy and trained the fastest, so I used this model for my analysis. 
-
-After preliminary analysis, I trained the CNN6, CNN5+Attn, and CNN5 networks for 100 epochs. I then evaluated the models’ performance on this task in the three aforementioned conditions.
-
-1) To evaluate performance on clips spoken by talkers not encountered in the training dataset, I evaluated the models on clips taken from the WSJ speech corpus.
-
-2) For clips superimposed on different types of background noise, I evaluated the model on 5 types of background noise, in signal-to-noise ratios ranging from -9 dB to +3 dB, plus a +infinity condition which represents no background noise:
-
-  * Auditory scenes: background noises encountered in everyday life like rain or cars passing by
-  * 8-speaker babble: 8 other people talking in the background
-  * Music
-  * Speech-shaped noise: gaussian noise that is given the envelope of speech signals
-  * Modulated speech-shaped noise: speech-shaped noise that is modulated so that the noise alternates between being very quiet and very loud
-
-3) Distorted speech clips with 6 types of distortions:
-
-  * Sped-up/slowed-down speech (preserving original pitches)
-  * Speech in a reverberant environment: speech convolved with an impulse response of different reverberant environments
-  * Whispered speech
-  * Inharmonic speech: speech signals are decomposed into their harmonics, and the harmonics are moved up or down to distort the signal
-  * Sine wave speech: speech signals are filtered into frequency subbands, and each band is replaced by a sine wave with the center frequency of the band
-  * Locally time-reversed speech: speech is decomposed into chunks of a certain length, and the chunks are reversed
-
-Then I compared the models’ performance on these conditions to existing human data where humans were asked to perform the same task of recognizing the middle word of a 2-second clip in various types of noise or distortion. 
-
-## Results
-
-Overall, I found that CNN6 performed better than CNN5+Attn, which performed better than CNN5. After 100 epochs, CNN6 had a validation accuracy of around 0.60, CNN5+Attn had validation accuracy of 0.55, and CNN5 had validation accuracy of 0.53. In particular, CNN5 overfit quite a bit (0.12 gap between training and validation accuracy) while CNN5+Attn overfit much less (0.05 gap between training and validation accuracy). 
-
-{% include figure.html path="assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch.png" class="img-fluid" %}
-
-All three models performed similarly to humans for clean speech spoken by talkers not encountered in the training dataset.
-
-{% include figure.html path="assets/img/2023-11-10-speech-recognition-proposal/clean.png" class="img-fluid" %}
-
-In different types of background noise, in general the models performed similarly to humans, except in the condition of modulated speech-shaped noise. In general, humans perform better for modulated noise than “stationary” noise because they are able to fill in speech in the “gaps”, or quieter sections, of the noise, but none of the models have as strong of an effect as humans for this. The CNN5+Attn model does particularly badly on this compared to the other networks. 
-
-{% include figure.html path="assets/img/2023-11-10-speech-recognition-proposal/background-noise.png" class="img-fluid" %}
-
-The models all perform similarly to humans for inharmonic speech, local time reversal, and low levels of sped-up or slowed-down speech. For whispered speech and sine-wave speech, the models perform slightly worse than humans, with CNN6 performing better than CNN5+Attn performing better than CNN5. For reverberant speech and extremely sped-up or slowed-down speech, all of the models perform significantly worse than humans, with the same hierarchy of performance between the models.
-
-{% include figure.html path="assets/img/2023-11-10-speech-recognition-proposal/manipulations-1.png" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-11-10-speech-recognition-proposal/manipulations-2.png" class="img-fluid" %}
-
-## Discussion
-
-Overall, it seems that CNN6 is the best option for replicating human speech recognition, but CNN5+Attn does have some benefits. In particular, it trains substantially faster than CNN5, and overfits less than both CNN5 and CNN6. The hybrid architecture may help with overfitting because it forces the model to do multiple types of analysis in order to determine the output. Although CNN5+Attn does still perform worse than CNN6, it is reasonable to hypothesize that it has potential. Due to resource limitations, I was only able to test two different conditions for number of attention heads and attention dimension, but as shown from the preliminary training the number of attention heads and the attention dimension does have an effect. It seems likely that with a more extensive search of these parameters, it could be possible to create a CNN5+Attn network that performs similarly or better than the CNN6 network on these tasks. 
-
-All of the models have discrepancies with humans for the modulated background noise condition. One possible explanation for this is that the models do not learn the process of recognizing smaller phonemes of a word, only learning a classification task on the 800 words that they are given, so they are unable to piece together chunks of a word into a larger word like humans do. A possible way to test this would be to create a model for a phoneme-detection task, and then add a layer that combines the phonemes into a larger word, and see whether this performs better in this condition. This would make sense because some of the earliest things humans learn about speech are not full words, but phonemes like “ba” or “da,” so a model trained on this task would then have been optimized in more human-like conditions. 
-
-In addition, there are some discrepancies between the models and humans in some of the speech distortions. The largest discrepancies are found in very sped-up or slowed-down speech, and in reverberant speech. This seems likely to be due to a shortcoming of the dataset. The Common Voice dataset is composed of people reading passages, which is generally a single slow, steady speed,  and there is no reverberation. The speech that humans encounter in their lives varies a lot in speed, and they also encounter speech in many different reverberant environments, so they are optimized to recognize speech in these conditions. It is reasonable to assume that if reverberation and varied speeds of speech were incorporated into the training dataset, the model would perform better in these conditions. 
-
-Further directions of this project could include trying more variations of the parameters of the attention model. In addition, it would be interesting to try different hybrid architectures; for example, 4 layers of convolution followed by 2 layers of attention. This could give a more complete idea of the benefits and disadvantages of CNNs and transformers for the task of speech recognition. In conclusion, the current results seem promising, but more extensive testing is needed in order to get a full picture of whether these models can accurately replicate human speech recognition. 
-
-## Acknowledgements
-
-I would like to thank my fellow members of the McDermott Lab, particularly Mark Saddler for creating the code for the baseline CNN, and Erica Shook for providing me with human data and experimental manipulation code. 
diff --git a/_posts/2023-11-10-superposition.md b/_posts/2023-11-10-superposition.md
deleted file mode 100644
index 5f8807b6..00000000
--- a/_posts/2023-11-10-superposition.md
+++ /dev/null
@@ -1,422 +0,0 @@
----
-layout: distill
-title: The Effect of Activation Functions On Superposition in Toy Models
-description: An in-depth exploration of how different activation functions influence superposition in neural networks.
-date: 2023-12-12
-htmlwidgets: true
-authors:
- - name: Vedang Lad
-   url: "https://www.vedanglad.com"
-   affiliations:
-      name: MIT
- - name: Timothy Kostolansky
-   url: "https://tim0120.github.io/"
-   affiliations:
-      name: MIT
-
-bibliography: 2023-11-10-superposition.bib
-toc:
-  - name: Introduction to Superposition
-  - name: Superposition and Previous Work
-    subitems:
-      - name: Monosemanticity and Polysemanticity
-  - name: Motivation and Notation
-    subitems:
-      - name: Problem Specification
-      - name: Features
-        subitems:
-          - name: Sparsity
-          - name: Importance
-      - name: Dataset
-      - name: Network
-      - name: Loss
-  - name: Results
-    subitems:
-      - name: ReLU
-      - name: GeLU/SiLU
-      - name: Sigmoid
-      - name: Tanh
-        subitems:
-          - name: A Note on Sigmoid and Tanh
-      - name: SoLU
-      - name: Bringing Them All Together
-  - name: Conclusion
-
----
-
-
-## Introduction to Superposition
-
-
-With the recent emergence of grokking, mechanistic interpretability research has trended towards understanding how models learn <d-cite key="GrokNanda"></d-cite> <d-cite key="Pizza"></d-cite>. A central concept in this pursuit is superposition - a single neuron learning multiple "features."
-
-
-Features are the distinguishing properties of data points, the “things” that allow a neural network to learn the difference between, say, a dog and a cat, or a Phillip Isola and a Jennifer Aniston. Features are the building blocks that determine what makes one data point different from another. In many cases, features discovered by and encoded within neural networks correspond to human-understandable ideas. For example, in language models there exist embedding vectors describing relations like gender or relative size (e.g., the famous vec(“king”) - vec(“man”) + vec(“woman”) =~ vec(“queen”)<d-cite key="mikolov2013efficient"></d-cite>). It has been found that language models often map ideas like these to features within their parameters. Human understanding is not necessary though, as models can find and map features that exist beyond the perception of humans. This is an important part of the success (and dual inscrutability) of modern deep models, as these models can determine features and relationships within the data that allow them to model large datasets, like language, very well.
-
-In this work we:
-
-1. Explain Superposition, why it may occur, and why it is important
-2. Motivate a framework to easily study Superposition
-3. Study how activation functions affect Superposition
-
-
-## Superposition and Previous Work
-Let us elaborate further. If you were to train some neural network and visualize the weights - chances are you would see some mess that looks like this:
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/random_matrix_equation.png" class="img-fluid" %}
-    </div>
-</div>
-
-You are likely looking at superposition!
-
-
-As hypothesized by <d-cite key="toymodels"></d-cite>, superposition is a phenomenon which occurs when the number of features being learned by a model is greater than the number of parameters in that model. To capture $n$ features with $m<n$ parameters, one can think of the neurons as "working overtime.” In other words, some of the neurons within a model encode information about more than one feature. The neuron exhibiting superposition operates as an information compressor. The caveat is that this compression is often unpredictable and hard to understand!
-
-
-In a linear model, i.e., one which maps inputs to outputs with only linear functions, there are fewer parameters than the features it tries to represent, so it can only represent the top $m$ features. How then do neural networks use compression and map back to $n>m$ features using only $m$ parameters? The answer is non-linearity. Clearly, the activation function is key to understanding how superposition occurs - unexplored by other work in the field. <d-cite key="elhage2022solu"></d-cite> explores the activation function in transformer MLP, but not in the setting we present here.
-
-But why do we care about Superposition? Why spend time studying this?
-
-
-While it may seem tangential, Superposition sheds important insights on Large Language Models (LLMs)! While LLMs are billions of parameters large, this is still not enough for a one-to-one mapping to “features" on the internet. Therefore LLMs also MUST exhibit superposition to learn. We focus our current work on the $\textit{bottleneck superposition}$ regime, but <d-cite key="incidental"></d-cite> has shown that the picture is far more complicated than presented in <d-cite key="toymodels"></d-cite>. Namely, varying the initialization can change how superposition unfolds. To normalize across experiments, we initialize all weights using the Xavier norm, as outlined by <d-cite key="xavier"></d-cite>. However, this is certainly a limitation of our presented work. A more rigourous analysis of superposition with activation functions would explore it outside the contex of the bottleneck regime. We leave this for future work.
-
-
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/feature_visual.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    From <a href="https://distill.pub/2017/feature-visualization/">Distill Blog</a>, "Feature visualization allows us to see how GoogLeNet trained on the ImageNet dataset, builds up its understanding of images over many layers.
-</div>
-
-
-Previous research, as detailed in <d-cite key="toymodels"></d-cite>, has predominantly explored superposition within the confines of toy models utilizing the Rectified Linear Unit (ReLU) activation function. However, to extend these findings to contemporary neural networks, it is crucial to investigate the influence of different activation functions on superposition. Different activation functions provide different ways for a model to use superposition to its advantage.
-
-
-So you train a neural network - what happens at the neuron level?
-There are three possibilities. As the network trains each neuron has three choices:
-
-
-1. The neuron chooses not to encode the “features”
-2. The neuron chooses to dedicate itself to one feature
-3. The neuron chooses to encode multiple features
-
-
-(We anthropomorphize - The neuron doesn’t choose to do anything - there is no free will - you are born into a loss landscape and an optimizer telling you what to do.)
-
-
-In linear models, each neuron is limited to representing only the most significant features (2), discarding others (1). Conversely, superposition, enabled by non-linear activation functions, adopts a more inclusive approach (3), trying to encode multiple features per neuron and learning efficient representational shortcuts.
-
-
-While ReLU bears similarity to the Gaussian Error Linear Unit (GeLU) used in modern GPT architectures, a deeper understanding of how different nonlinear activations impact superposition can provide crucial insights. Such understanding is key to unraveling the complex mechanisms through which neural networks utilize non-linearities, a cornerstone in the broader narrative of neural network interpretability.
-
-### Monosemanticity and Polysemanticity
-To connect to existing literature (2) and (3) above are given the names monosemanticity and polysemanticity. We will also follow this notation going forward.
-
-
-To describe further, the idea of superposition in neural networks leads us to two distinct types of neuron behaviors: monosemanticity and polysemanticity.
-
-
-Monosemantic neurons are those that specialize in a single, distinct feature, acting as dedicated detectors. This characteristic is often observed in the intermediate layers of architectures like Convolutional Neural Networks (CNNs), where neurons become adept at recognizing specific patterns, such as curves or colors.
-Polysemantic neurons do not align with just one feature but engage with multiple features simultaneously, offering a broader and more nuanced understanding of the data. This trait is essential for handling complex, high-dimensional datasets but comes at the cost of reduced interpretability.
-
-
-## Motivation and Notation
-
-
-Our work extends the work done in <d-cite key="toymodels"></d-cite> by examining how the changing of the activation function on toy model networks affects the behavior and interpretability of these networks. <d-cite key="toymodels"></d-cite> uses the canonical ReLU activation function to add non-linearity to two-layer models to analyze how superposition occurs within small networks. They did not generalize their work to other activation functions, which we find, result in **distinct** new phenomenon. Our work compares the ReLU function with five other common activation functions: GeLU, SiLU, Sigmoid, Tanh, and SoLU. We hope that generalizing the phenomenon across activation functions can push the toy dataset to be in closer to realistic ML settings.
-
-
-### Problem Specification
-
-The models in this experiment will be learning how to replicate a length-$n$ vector of inputs in the range $[0, 1]$ with a compression to a length-$m$ embedding (where $n>m$). The model will then use the length-$m$ embedding to recreate the length-$n$ input, using a non-linear activation function to allow for superposition.
-
-We will run two variations of the experiment. One variation of the experiment will involve compressing inputs of size $n=10$ to an embedding of size $m=5$. This experiment aims to see how superposition occurs across many features which are encoded in a bottleneck with half the number of spots as there are features. The second variation of the experiment will involve compressing inputs of size $n=2$ to an embedding of size $m=1$. This experiment aims to understand precisely how the model encodes the second "extra" feature in a variety of settings.
-
-To set up this experiment, we need to create a dataset that allows for superposition to occur and that also allows for interpretability of the superposition. To motivate this further, we begin with a careful discussion of features.
-
-
-### Features
-
-
-Features are the salient “things” that a neural network learns to differentiate inputs <d-cite key="features"></d-cite>.
-
-
-Technically, features are the properties which neural networks try to extract from data during learning to compress inputs to useful representations during inference. Although features can map to human-understandable concepts (e.g., dog ears), they can also represent properties of the data that are not immediately apparent to the human brain. To experiment with superposition, we need to encode features in a way that we can understand. In other words, we do not want our experimental model to learn features that we are unaware of. This would make it hard for us to interpret how the model maps features in the data to embeddings within its parameters, consequently obscuring how superposition works. To this aim, we must generate features within the training set for our model which are simple and understandable to us a priori. Similar to <d-cite key="toymodels"></d-cite>, we use as each input a vector with entries drawn independently from a uniform distribution over $[0, 1]$. Making each entry independent of the others enforces that each entry is its own (artificial) feature with no correlation to the other features.
-
-
-Here we define two important augmentations that we used in the dataset to simulate real-world features: sparsity and importance.
-
-
-#### Sparsity
-
-
-Sparsity is a measure of how often a specific feature is present in a dataset. A feature is characterized as “sparse” if it only appears in a small fraction of the inputs to the model. Similarly, features that are “dense” appear in many of the inputs. We will also use the term 'density', which is the complement of sparsity, defined as $1-S$.
-
-
-Specifically, a feature with a sparsity of $S \in [0, 1]$ has a probability $S$ of being expressed in any given input. If we have $S=0$, this means that the feature is expressed in every input, whereas if we have $S=0.5$, this means that the feature is expected to be expressed in about half of the inputs.
-
-
-In our experiment, we train models at different sparsities to capture how sparsity affects superposition.
-
-
-#### Importance
-
-
-Not all features are created equal!
-
-
-Some features are more useful than others in determining relevant information about inputs. For instance, when building a dog detector - capturing features related to dogs’ faces are extremely important! A model would need to pick up salient features of dogs, perhaps floppy ears and snouts. Other features, like the grass a dog is sitting on or a frisbee in a dog’s mouth, may not be as useful for detecting a dog. The varying degrees of usefulness among features are encapsulated in the concept of "importance".
-
-
-In the context of feature detection by a neural network, importance plays a role in modulating which features are encoded within the embedded layers of the network. In the context of the superposition hypothesis, if one feature has more importance than another feature, then it would be inefficient for the network to map both features equally within the embedding; allocating more weight to the feature with greater importance would be more valuable to the network in minimizing error.
-
-
-In our experiment, we give each input feature a different importance to allow the models to differentiate between them. We will examine when and how the model justifies mapping multiple features of differing importances to the same neuron, i.e., we will observe the superposition of features with differing importances.
-
-
-### Dataset
-
-
-To run this experiment, we will synthetically generate data that has desired sparsity and importance properties.
-
-
-Each input $x$ will be a vector of length $n$. Each element $x_i$ in the vector will be drawn independently from the other elements in the uniform range $[0, 1]$. As discussed before, we can now synonymously refer to each of these elements as features, given their independent generation. (We will refer to them as features from this point onwards.)
-
-
-Each feature $x_i$ in the vector has a relative importance to each of the other features $x_{j\ne i}$. The importance of feature $x_i$ is $I_i = r_I^i$ where $r_I\in(0, 1)$ is a constant describing the relative decay of importance between neighboring features. This attribute of the data will be implemented in the loss function (see below for more details).
-
-
-We will train separate models for each of the varying levels of sparsity. For an input $x$ with sparsity $S$, each feature $x_i$ will take on its “true” value, a uniformly distributed number, with a probability of $1-S$ and will otherwise be set to 0 with a probability of $S$.
-
-
-Below is a visualization of two batches of inputs with respective sparsities $S=0.5$ and $S=0.99$.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/input_batches.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Each column of the plots represents a feature vector of length 20. Each batch has size 100, corresponding to the number of columns in the plots. Notice how the changing in sparsity affects the feature density.
-</div>
-
-### Network
-Below are the architectures of the base (linear) and experimental (non-linear) models that we are using in this experiment. Of particular note is the activation function $\mathbb{f}$, which we will substitute using the aforementioned activation functions.
-
-| Linear Model               | Activation \( $\mathbb{f}$ \) Output Model |
-|:---------------------------:|:------------------------------------------:|
-| $$h = Wx$$                   | $$h = Wx$$                                   |
-| $$ x' = W^T h + b $$   | $$x' = f(W^T h + b)$$          |
-| $$x' = W^T Wx + b$$  | $$x' = f(W^T Wx + b)$$          |
-
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/Autoencoder.png" class="img-fluid" %}
-    </div>
-</div>
-
-We create an autoencoder - compressing down to induce polysemanticity. This maps $x$ to a direction in a lower-dimensional space, represented by $$h = Wx$$. Each column of $W$ corresponds to a lower-dimensional representation of a feature in $x$. To reconstruct the original vector, $W^T$ is used, ensuring clear feature representation correspondence. This structure results in a symmetric matrix $W^TW$ and allows for clear visualization of the weights. They visually allow for the determination of the presence of superposition.
-
-
-### Loss
-
-
-Sparsity, Importance and Our Network come together in the following loss function:
-
-
-$$
-   L = \sum_{i} \sum_{x} I_{i}(x_{i} - x'_{i})^{2} $$
-
-
-Motivated by <d-cite key="toymodels"></d-cite>, we use a standard MSE loss, where $x_i$ and $x_i'$ measure the absolute difference in the auto-encoding of the datapoint. The Importance factor, $I_i$ , describes how important the given reconstruction is. A smaller importance will allow loss minimization even with a poor reconstruction.
-
-
-## Results
-
-
-Below we present each activation function, along with plots depicting how training results in superposition at varying degrees of sparsity.
-
-For the $n=10, m=5$ experiment, we show the $W^TW$ matrix and neuron feature distribution at varying degrees of sparsity. The $W^TW$ matrix reveals which features are prioritized (shown by the diagonal terms) and any polysemanticity that occurs (shown by the off-diagonal terms). The neuron feature distribution shows how each of the $m=10$ features are mapped to each of the $n=5$ embedding dimensions. This can aid in understanding under what conditions polysemanticity arises and how it occurs under each condition of sparsity.
-
-For the $n=2, m=1$ experiment, we show a phase diagram. This phase diagram shows how the second "extra" feature of the length-2 input vector is encoded. There are three options: not encoded at all (only the first feature is encoded), encoded in superposition with the first feature, and encoded as the only feature (the first feature is not encoded).
-
-### ReLU
-
-The ReLU (Rectified Linear Units) activation function is a piecewise-linear function, a simple non-linearity that allows models to use superposition of features. ReLU was the only activation function used in <d-cite key="toymodels"></d-cite>, so our work with the ReLU function was primarily to verify the results from their work and create a baseline for our subsequent experiments.
-
-The following are the $W^TW$ matrices and feature-neuron mappings:
-<div class="caption">
-    ReLU $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/Sparsity_super_relu.png" class="img-fluid" %}
-    </div>
-</div>
-
-As per the results in <d-cite key="toymodels"></d-cite>, the ReLU model focuses on the most significant features in the low sparsity regime (generally resulting in monosemanticity), while relying on superposition in the high sparsity regime (polysemanticity). With weaker signals for the most important features in the high sparsity regime, the model encodes multiple features in each neuron activation to minimize error of the sparse signals. Notably, the ReLU model uses antipodal pairs in the mapping of features to encode multiple features to single neurons. This can be seen as a light-colored diagonal entry within $W^T W$ and a corresponding dark-colored off-diagonal entry within the same column. This antipodal mapping of features is a method that the model uses to compress more than one feature to one neuron. This antipodal mapping is more interpretable than other kinds of polysemanticity which occurs in subsequently-described activation functions which “speckle” multiple features into a single neuron, making it more difficult to determine how the superposition occurs in that model.
-
-
-The following is the phase diagram of the ReLU models:
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/phase_51_relu.png" class="img-fluid" %}
-    </div>
-    <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" %}
-    </div>
-</div>
-In regimes of high sparsity (i.e., below $1-S=0.1$ on the phase diagram above) the ReLU models are highly polysemantic for all relative feature importances, reflecting an inability to encode features with a sparse signal. In regimes of low sparsity, the model generally embeds the more important of the two features. This result mirrors the phase diagram in <d-cite key="toymodels"></d-cite> as expected.
-
-### GeLU/SiLU
-
-The GeLU (Gaussian Error Linear Units) and SiLU (Sigmoid Linear Units) activation functions are very similar to one another, and as a result produced very similar experimental results. Both functions are akin to a "smoothed out" version of the ReLU function, i.e., they have no discontinuities. The GeLU has recently been popularized as the activation function of choice in many transformers, including BERT <d-cite key="Devlin2019BERTPO"></d-cite> and GPT <d-cite key="gpt"></d-cite>. The GeLU is differentiable for all $x$ - and has a smoother curve than the SiLU (Swish) activation. <d-cite key="elhage2022solu"></d-cite> found that in the setting of transformers, the GeLU was less interpretable than the SoLU. This may be the case after having many linear layers activation - but with a single layer this is not the case.
-
-<div class="caption">
-    GeLU $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/Sparsity_super_gelu.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    SiLU $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/Sparsity_super_silu.png" class="img-fluid" %}
-    </div>
-</div>
-
-The GeLU and SiLU models exhibit similar kinds of superposition in their weight matrices. With increasing sparsity, superposition of features does happen, but it is more “strict” than the ReLU model, generally mapping at most two features to any single neuron. In each of the polysemantic neurons, though, there is one feature that dominates, suggesting that these activation functions enforce sparsity in their activations. There are also many antipodal pairs of features within these models, reiterating the behavior that exists in the ReLU models (also found in <d-cite key="toymodels"></d-cite>).
-<div class="row mt-0 l-page">
-    <div class="col-sm mt-2 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/phase_51_gelu.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-2 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/phase_51_silu.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="row mt-0 l-page">
-    <div class="col-6 mx-auto mt-2 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" %}
-    </div>
-</div>
-
-The above phase diagrams of the GeLU and SiLU models show a marked difference from that of the ReLU model (earlier), despite the similar shapes of these three activation functions. The GeLU and SiLU models exhibit significant monosemanticity at high degrees of sparsity, unlike the ReLU, which results in near-complete polysemanticity for sparsities higher than $S=0.9$. This differnce may reflect SiLU's and GeLU's better fit as an activation for picking up the signal in sparse feature representations, making the case for GeLU and SiLU as more interpretable activation functions within larger models.
-
-### Sigmoid
-
-The Sigmoid function is a smooth activation function with an output range of $(0, 1)$. This maps directly to the desired range of values that the model is trying to replicate.
-<div class="caption">
-    Sigmoid $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid.png" class="img-fluid" %}
-        
-    </div>
-</div>
-
-The Sigmoid model exhibits superposition in all neurons as soon as the  sparsity is non-zero, as can be seen from the “speckling” of non-zero off-diagonal terms in $W^T W$. This is a difference from the ReLU/GeLU/SiLU models, for which the superposition “leaks” into the least significant encoded features at low, non-zero sparsities and eventually affects all features at higher sparsities. This low-sparsity superposition may occur because the Sigmoid function strictly maps to $(0, 1)$, with increasingly large pre-activation inputs necessary to map to values close to 0 and 1. As such, the model may be “speckling” the off-diagonal values in an attempt to “reach” these inputs which are close to 0 and 1.
-
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/phase_51_sigmoid.png" class="img-fluid" %}
-    </div>
-    <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" %}
-    </div>
-</div>
-Despite differences in the occurrence of polysemanticity, the ReLU and Sigmoid models exhibit very similar phase diagrams, reflecting an inability to encode multiple features at sparsities above $S=0.9$ (i.e., below $1-S=0.1$ on the phase diagram). As discussed above, this may be caused by the vanilla sigmoid activation's inability to "reach" target values close to 0 or 1.
-
-### Tanh
-
-The Tanh function is another smooth activation function, but it results in significantly different behavior from the Sigmoid (despite being a linear mapping of the Sigmoid). 
-<div class="caption">
-    Tanh $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/Sparsity_super_tanh.png" class="img-fluid" %}
-        
-    </div>
-</div>
-
-With the Tanh activation function, the models prioritize the most important features regardless of sparsity. This behavior is possibly attributed to the range that the Tanh function maps to $(-1, 1)$, while the target range of input values in this experiment are $[0, 1]$. This behavior is similar to that of a linear model (i.e., no activation function) which exhibits no capability to use superposition, but the phase diagram reveals subtle differences from the linear model results.
-
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/phase_51_tanh.png" class="img-fluid" %}
-    </div>
-    <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" %}
-    </div>
-</div>
-
-Although nearly performing as the linear model would, only encoding the most important feature, there is some difference to the linear model along the boundary between features, as can be seen around the importance of 1. This reflects the model's ability to use non-linearity to perform superposition.
-
-#### A Note on Sigmoid and Tanh
-
-Despite similarities in the S-like curvature of the Sigmoid and Tanh activation functions, the Sigmoid model exhibits superposition, whereas the Tanh model exhibits nearly zero superposition. A key difference between the two functions is the fact that the Sigmoid function maps inputs to a range of $(0, 1)$, while the Tanh function maps inputs to a range of $(-1, 1)$. This difference is significant in our experiment, as our experiment uses models to recreate random vectors with elements in the range $[0, 1]$. The range of the Sigmoid function matches this range, while the range of the Tanh function which matches this range only occurs for non-negative inputs to the Tanh function. In other words, the $(-\infty, 0)$ input domain (which maps to the range $(-1, 0)$) of the Tanh function remains useless for prediction of values which should be in the range $[0, 1]$. Therefore, the tanh function empirically acts like a linear function (i.e., no activation layer).
-
-
-### SoLU
-
-The SoLU (Softmax Linear Units) activation function is based on the work from <d-cite key="elhage2022solu"></d-cite>. 
-$$ Solu(x) = x * softmax(x) $$
-SoLU is a function for which the activation of each neuron is dependent on all the other neurons within its own layer. This is significantly different from all the other activations that we tested, as the activations of neurons with the other functions are independent of the other neurons within the same layer. In other words, all the other activation functions are univariate while the SoLU is multivariate. Similar to other approaches like L1 regularization, the SoLU amplifies neurons with relatively large pre-activations and de-amplifies neurons with relatively smaller pre-activations. This behavior pressures the model to be more monosemantic (and therefore more interpretable in some settings), as discussed in <d-cite key="elhage2022solu"></d-cite>. 
-
-<div class="caption">
-    SoLU $W^TW$ Matrices
-</div>
-<div class="row mt-3 l-page">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/Sparsity_super_solu.png" class="img-fluid" %}
-    </div>
-</div>
-
-In our experiment, the SoLU model results in non-zero superposition of all features with all degrees of sparsity. This may be attributed to the way that the SoLU “forces” activations to be sparse, i.e., the activations result in a “winner-takes-all” behavior due to the way that the Softmax function works. This is not a useful property for prediction of a vector of independently-drawn values, as the input vectors are unlikely to be peaky, i.e., the SoLU does not quite fit the purposes of its task.
-
-<div class="row mt-3 l-page">
-    <div class="col-6 mx-auto mt-3 mt-md-0">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/phase_51_solu.png" class="img-fluid" %}
-    </div>
-    <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center">
-        {% include figure.html path="/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" %}
-    </div>
-</div>
-
-As seen in the heatmap plot above, the SoLU activation results in very polysemantic behavior. This function is not precisely fit for its task of recreating given vectors and likely results in using polysemanticity to attempt to pass information about inputs forward. Curiously, the SoLU models have preference for the more important feature in the low sparsity regime. 
-
-### Bringing Them All Together
-<div class="caption">
-   Sparsity vs Dimensions Per Feature 
-</div>
-<div class="l-page">
-  <iframe src="{{ 'assets/html/2023-11-10-superposition/file.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-
-The diagram above depicts a variation on the two experiments explained thus far. In this experiment $n=200$ features were compressed to $m=20$ features and the loss function was tweaked to give uniform importance $I_i = 1$ to all features. This was done to determine how each activation functions compresses features in different sparsity regimes without the influence of feature importance.
-
-On the y axis, the plot depicts a metric (dimensions per feature) that measures the number of dimensions a model dedicates to each feature. In other words, a point with a y-value near 1 represents a model that dedicates one dimension of its embedding space to one feature, whereas a point with a y-value near 0.25 represents a model that represents four features at each dimension.
-
-The plots are generally consistent with the analysis from the previous experiments. Many of the activations result in superposition in the low-density/high-sparsity regime, and increases in sparsity result in increases in the polysemanticity of the model (i.e., the dimensions per feature decrease). Consistent with the other experiments, SiLU and GELU perform very similarly. The Sigmoid and SoLU activations pack nearly 20 features per dimension at high sparsities. The Tanh activation exhibits behavior similar to the linear model, neatly packing one dimension with one feature, a result that is mirrored in the previous experiments. Similar to the results in <d-cite key="toymodels"></d-cite>, we see "sticky" behavior of the ReLU activation function at 1 and 0.5 dimensions per feature. This can be explained by the phenomenon of "antipodal pairs" discussed in <d-cite key="toymodels"></d-cite>. None of the other activation functions that we tested exhibit this behavior - which is striking since this is a well-studied effect for the ReLU activation function. This may be because the ReLU activation function is the only one that is not smooth, and therefore has a differentiable behavior than the other activation functions. 
-
-
-## Conclusion
-
-Our investigation into the effects of various activation functions reveals that significant changes occur in model behavior depending on the chosen function. This finding underscores the ability to modulate the degree of superposition through the selection of activation functions, highlighting yet unexplored degrees of freedom in model design. This line of inquiry goes seamlessly with considerations of how neural networks are initialized and trained, suggesting these as promising future research directions.
-
-Our work is limited by the breadth of activation functions that we tested, though. Further iterations on each of the activation functions (e.g., tweaking the Sigmoid function to map to the range $(-\epsilon, 1+\epsilon)$) could prove fruitful in getting better performance from the models. Furthermore, while writing this blog, <d-cite key="incidental"></d-cite> published a new key insight related to the importance of initialization in superposition, which we do not explore here. Despite this, we have learned valuable insights about the effects that our set of activation functions can have on superposition.
-
-Pursuing enhanced interpretability, however, does not come without its challenges. Specifically, striving for transparency and understandability in neural network models raises concerns about the potential for deception. Despite these challenges, our work aims to develop neural network models that are more interpretable, transparent, and secure.
-
-
-{% bibliography --cited %}
-
diff --git a/_posts/2023-11-10-transformer-elo-prediction.md b/_posts/2023-11-10-transformer-elo-prediction.md
deleted file mode 100644
index 4071cdf5..00000000
--- a/_posts/2023-11-10-transformer-elo-prediction.md
+++ /dev/null
@@ -1,287 +0,0 @@
----
-layout: distill
-title: Emoji3Vec
-description: Our project seeks to expand on the previous attempts at "emoji2vec", or generating semantically meaningful embeddings for emojis.
-date: 2023-11-10
-htmlwidgets: true
-
-authors:
-  - name: Anonymous
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-10-transformer-elo-prediction.bib
-
-toc:
-  - name: Introduction
-  - name: Background and Related Work
-  - name: Methods and Results
-    subsections:
-      - name: Training Emoji Embeddings with Descriptions
-      - name: Training Emoji Embeddings with Twitter Data
-  - name: Conclusion
----
-
-# Introduction
-
-In machine learning, models often create or learn internal representations for the inputs they are given. For instance, an image might become a vector containing the RGB data for every pixel. These internal representations are then processed and transformed until the model finally translates its representation into the desired output form (via softmax over all output possibilities, for example).
-
-The lower dimensional internal representations, known as embeddings, can often carry semantic meaning which can help us understand the data better. Inspired by word2vec, a project for learning embeddings for words, we attempt to learn embeddings for emojis that are semantically interpretable. Learning accurate representations is important for downstream tasks, for example: sentiment analysis and other kinds of classification run better with useful embeddings.
-
-# Background and Related Work
-
-Although similar ideas have been explored in the past, we felt that there was still a gap in prior research: specifically, we wanted to create a lightweight model that still learned emoji embeddings directly from data and context.
-
-First, it is important to mention the influential and well known [Efficient Estimation of Word Representations in Vector Space](https://arxiv.org/pdf/1301.3781.pdf), commonly referred to as word2vec <d-cite key="mikolov2013word"></d-cite>. Word2vec was trained on a massive dataset of around 6 billion words, and was able to produce some very accurate embeddings that were proven to be useful in downstream tasks. For instance, doing the following arithmetic on the embeddings associated with each word produced: King - Man + Woman = Queen. This was an incredible result and inspired much work in the NLP domain in the following years.
-
-In 2016, a paper called [emoji2vec: Learning Emoji Representations from their Description](https://arxiv.org/pdf/1609.08359.pdf) <d-cite key="eisner2016emoji"></d-cite> was published. As the name suggests, this paper sought to produce embeddings for emojis to be used in the same vector space as the word2vec embeddings, and attempted to do it by using emoji descriptions. The researchers trained their model with baseline embeddings taken directly from summing the word embeddings for each word in every emoji's description. For instance, the embedding for "😍" began as the sum of the word embeddings (taken from word2vec) of: "smiling" + "face" + "with" + "heart" + "eyes". The main benefit of this approach was a strong baseline that could be established without any training data. Recently, in 2021, another paper called [Emojional: Emoji Embeddings](https://bashthebuilder.github.io/files/Emojional.pdf) <d-cite key="barry2021emojional"></d-cite> was published that extended this approach, adding in additional words (that are related, as judged by Google News) to each baseline embedding. For instance, "✊" was set to be the result of: "raised fist" + "resistance" + "black lives matter" + ...
-
-After considering the above papers, we decided to create a model that would train similarly to word2vec (using naturally sourced data, and from context as opposed to a description) that also was able to produce useful embeddings on smaller amounts of data/memory/training time. Specifically, we felt that the descriptions would err when emojis began to mean different things than they are described as. For instance, the skull emoji is perhaps more often used to indicate embarassment or disagreement than actual death or skulls. This is addressed somewhat in the 2021 Emojional paper, but that one is very limited by the exact words it puts into each emoji's embedding, and is less adaptable to new meanings. Further, we felt that there was value in creating a more lightweight model that was still able to produce meaningful representations, both to simply be easier to train and run and also to perhaps find optimizations that wouldn't have been found if we had the option of just training on a larger set of data/training for a longer time.
-
-# Methods and Results
-
-We trained two sets of emoji embeddings to map emojis to the same 300-dimensional space as the one FastText uses for its word embeddings. The first was trained on a set of emoji descriptions, with the intention to learn emoji embeddings that reflect the literal appearances of each emoji. We closely follow the methodology as described in the emoji2vec paper to use as a baseline. The second was trained on a set of emoji-containing tweets, with the intention to learn emoji embeddings that reflect how they’re used online.
-
-## Training Emoji Embeddings with Descriptions
-
-### Data Cleaning
-
-We started with a [dataset](https://github.com/pwiercinski/emoji2vec_pytorch/blob/master/data/raw_training_data/emoji_joined.txt) of emoji descriptions from the Unicode emoji list. After cleaning, we were left with about 6000 descriptive phrases for 1661 emojis within a Python dictionary mapping emojis to various corresponding descriptions. Examples of entries include:
-
-1.  '🐏': {'horn', 'horns', 'male', 'ram', 'sheep'}
-2.  '🆘': {'distress signal', 'emergency', 'sos', 'squared sos'}
-3.  '👷': {'builder', 'construction worker', 'face with hat', 'safety helmet'}
-
-One detail is that we had to generate a bijective mapping between emojis and integers for model training. We encourage those attempting similar projects to save this mapping (in a pickle file, for example) for later use. Leon was very sad when he lost this mapping and couldn't make sense of his first trained model's outputted embeddings.
-
-{% include figure.html path="assets/img/2023-11-10-transformer-elo-prediction/project-vis.jpeg" class="img-fluid"%}
-_a visualization of how we cleaned our data, from an example of a tweet_
-
-### Generating Training and Test Data
-
-With a representation learning framework in mind, we randomly generated positive and negative descriptions for each emoji. We defined an emoji's positive samples as descriptions that truly corresponded to the emoji, and we defined its negative samples as other descriptions in the dataset that weren't used to describe the emoji. Guided by the emoji2vec paper, we generated positive and negative samples in a 1:1 ratio.
-
-### Model Training
-
-After generating positive and negative samples, we used a pretrained FastText model to calculate the average of the embeddings of each word in each description. Put mathematically, if we let the sequence of words in a description be $$w_1, w_2, \dots, w_k$$, the set of all strings be $$\mathcal{W}$$, and the FastText model be expressed as a mapping $$f: \mathcal{W} \mapsto \mathbb{R}^{300}$$, we calculated our description embeddings as
-
-$$\frac{1}{k}\sum_{i=1}^kf(w_i).$$
-
-This is a notable deviation from the methodology as described in the emoji2vec paper. Instead of using word2vec embeddings, we chose FastText because it uses sub-word tokenization and thus supports out-of-vocabulary strings as input. We also averaged the description embeddings instead of simply taking a summation to normalize for description length.
-
-```
-#creates a dictionary mapping descriptions to avg. word embeddings
-
-descr_to_embedding = dict()
-
-for descr in all_descriptions:
-	word_lst = descr.split(' ') #split description into list of words
-	embed_lst = []
-
-	for i in range(len(word_lst)): #repl. words by their embeddings
-		embed_lst.append(torch.tensor(ft[word_lst[i]]))
-	avg_embedding = torch.mean(torch.stack(embed_lst, dim=0), dim=0) #take mean over embeddings
-
-	descr_to_embedding[descr] = avg_embedding
-```
-
-We again followed the emoji2vec training methodology. For every emoji embedding $$x_i$$ and description embedding $$v_i$$, the authors model $$\sigma(x_i^T v_j)$$ as the probability of the description matching with the emoji, where $$\sigma$$ is the sigmoid function. Then our model minimizes the binary cross-entropy loss function
-
-$$\mathcal{L}(x_i,v_j,y_{ij}) = -\log(\sigma(y_{ij}x_i^T v_j + (1-v_{ij})x_i^T v_j))$$
-
-where $$y_{ij}$$ is 1 when $$v_j$$ is a positive sample and 1 otherwise.
-
-The authors don't describe the exact model architecture used to learn the emoji embeddings, so we likely also deviate in methodology here. Our model is very simple: on some input emoji $$x_i$$, we pass it through an nn.Embedding() module, compute $$\sigma(x_i^T v_j)$$, and pass it to nn.BCELoss(). This way, the only learnable parameters in the model are in nn.Embedding(), and model training is as efficient as possible.
-
-```
-# the main model class
-# follows the Emoji2Vec training
-
-class  EmojiDict(nn.Module):
-
-def  __init__(self, n_emojis):
-	# n_emojis: the number of emojis we're learning representations of
-
-	super().__init__()
-	self.embedding = nn.Embedding(
-		num_embeddings = n_emojis,
-		embedding_dim = 300  # size of word2vec embedding
-	)
-	self.sigmoid = nn.Sigmoid()
-
-def  forward(self, x, sample):
-	# x: a batch of emoji indices, shape (B, )
-	# sample: a batch of avg'd embeddings, shape (B, 300)
-
-	x = self.embedding(x)
-
-	# performing a batched dot product
-	x = torch.unsqueeze(x, dim=1) #(B x 1 x 300)
-	sample = torch.unsqueeze(sample, dim=2) #(B x 300 x 1)
-	result = torch.bmm(x, sample) #(B x 1 x 1)
-	result = torch.flatten(result) #(B, )
-
-	result = self.sigmoid(result) #should output probabilities
-
-	return result #should be shape (B, )
-```
-
-### t-SNE on Learned Embeddings
-
-We trained the model for 60 epochs over a 80-20 train-test split of 250 positive and 250 negative samples for each emoji. We used an Adam optimizer with the default parameters, and model training took roughly an hour. The model achieved 0.19 logloss and 0.98 accuracy on a validation set.
-
-After the model was trained, we took emoji embedding weights from the model's nn.Embedding() module and projected them down to two dimensions using t-SNE.
-
-{% include figure.html path="assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit.png" class="img-fluid" %}
-
-We can see that the model is excellent at grouping emojis that have similar appearances. Nearly all the faces are in the top-left, the zodiac symbols are in the bottom-left, the flags are at the bottom, the foods are on the right, the modes of transportation are in the top-right... the list can keep going. While there are some random emojis scattered about, similar emojis generally are similar in embedding space as well.
-
-### Emoji-Emoji Similarities
-
-To confirm this idea quantitatively, we can fix individual emojis and look at its nearest neighbors in embedding space with cosine distance.
-
-| Emoji | 1-NN | 2-NN | 3-NN | 4-NN | 5-NN | 6-NN | 7-NN | 8-NN | 9-NN | 10-NN |
-| ----- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ----- |
-| 😍    | 💖   | 😻   | 😄   | 😀   | 😚   | 💟   | 😘   | 😊   | 😽   | 💑    |
-| 😀    | 😄   | 😊   | 😃   | 🙂   | 😑   | 😁   | 😸   | 🤗   | 😆   | 🤧    |
-| 💀    | ☠    | 🆎   | 🌫    | 🐁   | ⛓    | ⛸    | 🌮   | 🦅   | ⚖    | 🐙    |
-| 🚀    | 🛰    | 👽   | 🚡   | 🛳    | 📡   | 🚢   | 📋   | 🚎   | 🆚   | 🛥     |
-
-We see here that the nearest neighbors also generally make sense. 😍's nearest neighbors all involve love or positive emotions, and 🚀's neighbors are generally about space or modes of transport. Interestingly, only 💀's first neighbor seems remotely similar to it. We believe that this is just because death is a mostly unrepresented theme in emojis.
-
-### Word-Emoji Similarities
-
-Since we trained emoji embeddings into the same space as the FastText word embeddings, we can also look at the nearest emoji neighbors to any English word!
-
-| Word    | 1-NN | 2-NN | 3-NN | 4-NN | 5-NN | 6-NN | 7-NN | 8-NN | 9-NN | 10-NN |
-| ------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ----- |
-| happy   | 😃   | 😺   | 😌   | 😹   | 🏩   | 😊   | 💛   | 😂   | 😞   | 😁    |
-| sad     | 😔   | 😭   | 😒   | 🙁   | 😟   | 😞   | 🙍   | 😢   | 😁   | 😯    |
-| lit     | 🚨   | 🕎   | 🌆   | 🔦   | 📭   | 🎇   | 🕯    | 💫   | 🏥   | 💡    |
-| bitcoin | 💛   | 🤑   | 🎮   | 💙   | 🌈   | 🤓   | 📱   | 📅   | 🐰   | 🍆    |
-
-Again, the nearest neighboring emojis generally make sense. Bitcoin's nearest neighbors are considerably less accurate than the others. Since our nearest neighbors are much more accurate for other English words like "cryptocurrency", we attribute this inaccuracy to FastText having poor embeddings for "Bitcoin", which was much less popular word when FastText was trained (in 2015).
-
-One thing to note from these nearest-neighbor tables is that embeddings trained with the emoji2vec method take words very literally. "🚀" is related to space and transportation, and "lit" is related to things that literally light up. As such, these embeddings won't adjust to semantic changes in emojis as slang develops and people become increasingly clever in their emoji use.
-
-## Training Emoji Embeddings with Twitter Data
-
-### Data Cleaning
-
-We started with a [dataset](https://www.kaggle.com/datasets/rexhaif/emojifydata-en?select=test.txt) of emoji-containing tweets. Motivated by the data cleaning done in the emojiSpace paper, we remove duplicate tweets, numbers, hashtags, links, emails, and mentions. Then, we extract the "context" words and emojis around each emoji with a window size of 4 in both directions and tokenize it. We cleaned only a subsample of the tweets due to constraints on memory and compute. Even so, after cleaning, we were left with about 272,000 contexts for 1251 emojis. Examples of contexts for the emoji 🤑 include:
-
-1.  ('the', 'promotion', 'code', 'works', 'we', 'will', 'be', 'giving')
-2.  ('my', 'grind', 'all', 'week', 'i', 'ain’t', 'been', 'getting')
-3.  ('cash', 'in', 'on', 'sunday', 'thank', 'you', 'so', 'much')
-
-### Generating Training and Test Data
-
-With a representation learning framework in mind, we randomly generated positive and negative descriptions for each emoji. We defined an emoji's positive samples as descriptions that truly corresponded to the emoji, and we defined its negative samples as other descriptions in the dataset that weren't used to describe the emoji. Guided by the emoji2vec paper, we generated positive and negative samples in a 1:1 ratio.
-
-As in the earlier model, we randomly generated positive and negative contexts for each emoji. We defined an emoji's positive samples equivalently as before, but this time we used the set of all contexts across all emojis as the set of negative examples. Doing this is obviously not ideal, but it provided a huge performance boost when generating data. Additionally, with such a large dataset, drawing a positive sample as a negative one happens relatively infrequently.
-
-### Model Training
-
-The training method we used for this model was nearly identical to that of the first model, and similar to the Continuous Bag-of-Words (CBOW) method for training word2vec. For every context, we calculated the average of the individual word embeddings using FastText. Often, another emoji would be part of the context; such emojis would be passed into the nn.Embedding() module as well to produce an embedding to be passed into the average. The model architecture remained nearly identical, and continued using binary cross-entropy loss as our loss function.
-
-Our model architecture differs somewhat from the original word2vec model, which uses a cross-entropy loss over the entire vocabulary of words as its loss function. While we may lose some expressivity by using binary cross-entropy instead, we believe that making this change made our model more lightweight and easier to train.
-
-```
-# the main model class
-# essentially a CBOW on emojis
-
-class  EmojiCBOW(nn.Module):
-
-	def  __init__(self, n_emojis):
-		# n_emojis: the number of emojis we're learning representations of
-
-		super().__init__()
-		self.embedding = nn.Embedding(
-			num_embeddings = n_emojis,
-			embedding_dim = 300  # size of word2vec embedding
-		)
-
-		self.sigmoid = nn.Sigmoid()
-
-	def  forward(self, x, embeddings, emojis, masks):
-		# x: a batch of emoji indices, shape (B, )
-		# embeddings: a batch of summed word embeddings from context, shape (B x 300)
-		# emojis: a batch of in-context emoji indices, with -1 as a placeholder, shape (B x 8)
-		# masks: a batch of masks for the relevant emoji indices, shape (B x 8)
-
-		x = self.embedding(x)
-
-		masks_unsqueezed = torch.unsqueeze(masks, dim=2) # get the dimensions right
-		emoji_embeddings = self.embedding(emojis * masks) * masks_unsqueezed # apply embeddings to emojis w/ mask applied, (B x 8 x 300)
-		emoji_embeddings = torch.sum(emoji_embeddings, dim=1) # sum acros embeddings, (B x 300)
-		tot_embeddings = embeddings + emoji_embeddings # (B x 300)
-		tot_embeddings = tot_embeddings / 8 # get avg embeddings, could help w/ numerical stability?
-
-		# performing a batched dot product
-		x = torch.unsqueeze(x, dim=1) #(B x 1 x 300)
-		tot_embeddings = torch.unsqueeze(tot_embeddings, dim=2) #(B x 300 x 1)
-
-		tot_embeddings = tot_embeddings.to(torch.float) / 8
-		result = torch.bmm(x, tot_embeddings) #(B x 1 x 1)
-		result = torch.flatten(result) #(B, )
-
-		result = self.sigmoid(result) #should output target probabilities
-
-		return result #should be shape (B, )
-```
-
-### t-SNE on Learned Embeddings
-
-We trained the model for 80 epochs over a 80-20 train-test split of 250 positive and 250 negative samples for each emoji. We used an Adam optimizer with the default parameters, and model training took roughly two hours. The model achieved 0.39 logloss and 0.79 accuracy on a validation set.
-
-After the model was trained, we took emoji embedding weights from the model's nn.Embedding() module and projected them down to two dimensions using t-SNE.
-
-{% include figure.html path="assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e.png" class="img-fluid" %}
-
-The model does reasonably well at clustering similar emojis together; as before, the flags, faces, and numbers are close together in embedding space. However, the quality of this clustering is noticeably worse than it was in the baseline model. We attribute this to the quality of the dataset and to the increased difficulty in the learning task. The emoji descriptions were clean, precise, and informative; tweets are generally none of those three. Additionally, learning embeddings from contexts has historically required a lot of training data and compute to perform successfully. We, however, only had the compute and memory to sample 500 tweets per emoji, which is only a tiny sample from the massive distribution of possible contexts that may surround any given emoji. Producing emoji embeddings that outperform the baseline model would require much more training data and time than what Colab offers.
-
-While these embeddings lose to the baseline embeddings in overall quality, they have certain properties that the baseline embeddings lack. Namely, since these embeddings were trained on a much more varied and organic dataset, they encode emoji use cases beyond what emojis literally mean. Specifically, they can learn from slang.
-
-### Emoji-Emoji Similarities
-
-To illustrate this, we can look at the nearest neighbors of the same four emojis that were presented earlier. We narrow down our search to the top-200 most common emojis in our dataset because those were likely learned the best by our model.
-
-| Emoji | 1-NN | 2-NN | 3-NN | 4-NN | 5-NN | 6-NN | 7-NN | 8-NN | 9-NN | 10-NN |
-| ----- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ----- |
-| 😍    | 🏆   | 💜   | 🎉   | 🇩🇪   | 💘   | 💖   | 👑   | 💞   | 💪   | 🇧🇷    |
-| 😀    | 📚   | 😆   | 😏   | 🎉   | 😌   | 😫   | 🔗   | 🙂   | ⚡   | 🇫🇷    |
-| 💀    | 😭   | 🍆   | 😓   | 🤤   | 💔   | 😩   | 🐥   | 😮   | 🐻   | 🍑    |
-| 🚀    | 💸   | 🔹   | 💯   | 🎯   | 💵   | 2️⃣   | 👋   | 💰   | 😤   | 😎    |
-
-We see here that the nearest neighbors for 😍 and 😀 are noticeably less intuitive than the ones in the baseline model, though some still make sense. Interestingly, however, 💀 has become more associated with strong emotions like 😭 and 😩. This correlates with the online slang "I'm dead," which expresses a strong (could be both positive or negative) emotional response to something. Additionally, 🚀 has become more associated with money, which correlates with the use of 🚀 to indicate a stock or asset going "to the moon."
-
-### Word-Emoji Similarities
-
-We can also observe this phenomenon in the cosine similarities between words and emojis. We use the same words as above, and again we narrow our nearest neighbors search to the top 200 most popular emojis.
-
-| Word    | 1-NN | 2-NN | 3-NN | 4-NN | 5-NN | 6-NN | 7-NN | 8-NN | 9-NN | 10-NN |
-| ------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ----- |
-| happy   | 😃   | 😺   | 😌   | 😹   | 🏩   | 😊   | 💛   | 😂   | 😞   | 😁    |
-| sad     | 😒   | 😻   | 👏   | 😥   | 😭   | 😓   | 😣   | 😔   | 😂   | 😪    |
-| lit     | 🔥   | 🚨   | 😍   | ✅   | 😎   | 💯   | 💣   | 🇺🇸   | 🗣    | 💫    |
-| bitcoin | 💵   | 🎉   | 😱   | 💸   | 🤑   | 🔹   | 🇮🇳   | 🍃   | 😆   | 🌊    |
-
-As before, the nearest neighboring emojis generally make sense, but are less accurate than the neighbors in the baseline model. At the same time, the nearest neighbors now align more closely with slang (or "new" words like bitcoin). "Lit" now is more related to a feeling of firm agreement, and "bitcoin" is now more related to money. In both cases, the nearest neighbors align more with the words' common usages than their literal meanings.
-
-# Conclusion
-
-## Future Work
-
-Given the time and computational constraints we had for this project, we had to pass on many paths for future exploration. We list a few in this section.
-
-1. We would've liked to train our second model for much longer on a much larger dataset of tweets. Only about 400 of our emojis had over 50 tweets associated with them. This greatly restricted their positive sample sets, which likely resulted in far-from-optimal emoji embeddings.
-
-2. We also considered training a more expressive neural architecture for our second model. One word2vec CBOW [implementation](https://towardsdatascience.com/word2vec-with-pytorch-implementing-original-paper-2cd7040120b0) we found used a Linear layer after the Embedding layer. It projected the 300-dimensional embeddings into embeddings with dimensionality equal to the size of the emoji vocabulary to learn embeddings via a multi-class classification problem. We ultimately decided against using such a model because we doubted that we had the time, data, and compute to train a more complex model.
-
-3. Something we realized towards the end of our model training was that the embeddings from the first model could be used to inform training on our second model. It would be interesting to see if transfer learning could result in increased performance for our second model, especially since many emojis were underrepresented in our dataset of tweets.
-
-## Discussion
-
-Overall, despite the limitations, our lightweight model achieved reasonable accuracy with less than optimal conditions. One other challenge we faced had to do with Colab's memory constraints: we were only able to train on a small set of data and were forced to generate positive and negative pairs over and over from the same set. Given a larger and more diverse set of positive/negative pairs, we believe our model could have performed even better.
-
-Furthermore, we felt that our CBOW model definitely could add value for people solving downstream tasks, such as sentiment analysis. The emoji2vec model of summing the emoji's description's word embeddings is useful when there are few datapoints for each emoji, but the CBOW approach captures more subtle meanings and is much more accurate to how people actually use emojis in their day to day life—both have their merits.
-
diff --git a/_posts/2023-11-11-Exploring-Task-Specific-Data-Augmentation.md b/_posts/2023-11-11-Exploring-Task-Specific-Data-Augmentation.md
deleted file mode 100644
index 6d17bb5c..00000000
--- a/_posts/2023-11-11-Exploring-Task-Specific-Data-Augmentation.md
+++ /dev/null
@@ -1,219 +0,0 @@
----
-layout: distill
-title: Semi-Supervised Domain Adaptation using Diffusion Models
-description: 6.S898 Project
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Borys Babiak
-    affiliations:
-      name: MIT
-  - name: Arsh Bawa
-    affiliations:
-      name: MIT
-  
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-Semi-Supervised-Domain-Adaptation.bib 
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Abstract
-  - name: Introduction & Background
-  - name: Related Work
-  - name: Our Contribution
-  - name: Methodology
-  - name: Experiment and Results
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-# Abstract
-Recently, there has been a breakthrough in image manipulation using Contrastive Language-Image Pretraining (CLIP). Recent work shows that GANs combined with CLIP can translate the images to unseen domains <d-cite key="gal2021stylegannada"></d-cite>. However, in many cases these manipulations destroy the important information that user might want to learn (e.g., labels). Recently, there was a paper showing image manipulation leveraging a combination of diffusion models and CLIP <d-cite key="kim2022diffusionclip"></d-cite>. We leverage the method proposed in DiffusionCLIP paper to perform semi-supervised domain adaptation having limited labeled data. GitHub project page: https://github.com/babiakua/semi_supervised_domain_adapt.
-
-# Introduction & Background
-## Diffusion models
-Diffusion Denoising Probabilistic Models (DDPMs) were introduced by Ho et al. as a novel approach in the field of generative models <d-cite key="ho2020denoising"></d-cite>. These models are built on the idea of gradually adding noise to data and then learning to reverse this process.
-
-The diffusion process is a Markov chain that adds Gaussian noise to the data over a series of steps. This process can be described mathematically as:
-
-$$ x_{t} = \sqrt{\alpha_{t}} x_{0} + \sqrt{1 - \alpha_{t}} \epsilon $$
-
-where $x_{t}$ is the data at step $t$, $x_{0}$ is the original data, $\alpha_{t}$ is a variance schedule, and $\epsilon$ is Gaussian noise.
-
-The reverse process aims to denoise the data, starting from the noisy version and progressively removing noise. It's modeled as:
-
-$$x_{t-1} = \frac{1}{\sqrt{\alpha_{t}}}\left(x_{t} - \frac{1-\alpha_{t}}{\sqrt{1-\alpha_{t}}} \epsilon_{\theta}(x_{t}, t)\right)$$
-
-with $\epsilon_{\theta}(x_{t}, t)$ being a neural network predicting the noise. This neural network usually has a UNet architecture with downsampling layers, upsampling layers, and a bottleneck.
-
-The training objective is to minimize the difference between the predicted noise $\epsilon_{\theta}(x_{t}, t)$ and the actual noise $\epsilon$. This is done using a variant of the mean squared error (MSE) loss:
-
-$$\min_\theta \mathbb{E}_{x_0 \sim q(x_0), w \sim \mathcal{N}(0, I), t} \left\| w - \epsilon_{\theta}(x_t, t) \right\|^2_2.$$
-
-DDIM (Denoising Diffusion Implicit Models) paper <d-cite key="song2022denoising"></d-cite> proposed an alternative non-Markovian noising process that has the same forward marginals as DDPM but has a distinct sampling process as follows:
-
-$$x_{t-1} = \sqrt{\alpha_{t-1}} f_\theta(x_t, t) + \sqrt{1 - \alpha_{t-1} - \sigma_t^2}\epsilon_\theta(x_t, t) + \sigma_t^2 z,$$
-
-where, $z \sim \mathcal{N}(0, I)$ and $f_\theta(x_t, t)$ is a the prediction of $x_0$ at $t$ given $x_t$ and $\epsilon_\theta(x_t, t)$:
-
-$$f_\theta(x_t, t) := \frac{x_t - \sqrt{1 - \alpha_t}\epsilon_\theta(x_t, t)}{\sqrt{\alpha_t}}.$$
-
-DDIM process allows for the use of different samplers by setting $\sigma_t$ to different values. In particular, setting $\sigma_t = 1$ makes the process a Markov process equivalent to DDPM while setting $\sigma_t = 0$ makes the process deterministic and allows for almost perfect inversion. DiffusionCLIP method leverages the deterministic nature of the process for image manipulation.
-
-## Image manipulation with CLIP
-CLIP is a model for joint image-language representations which is trained on a large dataset of image-text pairs <d-cite key="radford2021learning"></d-cite>. Using a contrastive learning objective, it learns a joint, multimodal embedding space. The representations learned by CLIP can be used for many tasks including image manipulation and image synthesis. DiffusionCLIP uses CLIP loss to tune the image generator (e.g., a pretrained diffusion model). CLIP loss takes the following form:
-
-$$\mathcal{L}_{\text{direction}} (x_{\text{gen}}, y_{\text{tar}}; x_{\text{ref}}, y_{\text{ref}}) := 1 - \frac{\langle \Delta I, \Delta T \rangle}{\| \Delta I \| \| \Delta T \|}$$
-
-where
-$
-\Delta T = E_T(y_{\text{tar}}) - E_T(y_{\text{ref}}), \Delta I = E_I(x_{\text{gen}}) - E_I(x_{\text{ref}}).
-$
-
-$E_I$ and $E_T$ are CLIP's image and text encoders, $y_{\text{ref}}, x_{\text{ref}}$ are the source domain text and image, and $$y_{\text{tar}}$$ is a text description of a target and $$x_{\text{gen}}$$ denotes the generated image.
-
-# Related Work
-Recent work in the field discovered an alternative way of manipulating image attributes using pre-trained diffusion models <d-cite key="kwon2023diffusion"></d-cite>. The authors show that instead of tuning the model, one can modify the reverse process and guide it towards the target domain. The reverse process is guided through a lower-dimensional (compared to original latents) latent space which in this case is the bottleneck of the UNet of the original pre-trained diffusion model. Authors show that this latent space enjoys high-level semantics and linearity which allows for more flexible image manipulation.
-
-Although this method is still in development (as it was our initial idea for domain adaptation which did not succeed), the latent space suggested by the authors can be used for a more powerful idea which is unsupervised domain adaptation. By smoothing the test images at appropriate noise level, one can classify whether the image possesses a given attribute. Then one can make training and test distributions close to each other by manipulating the attributes of interest. This direction is of our future interest to explore.
-
-Another area of current research is trying to use GANs (also guided by the CLIP loss) for image manipulation <d-cite key="gal2021stylegannada"></d-cite>. Using GANs allows for zero-shot image manipulation which is way faster than the diffusion models' reverse process. However, GANs suffer from their limited inversion capability and destruction of initial image information which might be dangerous for downstream tasks (e.g., consider a classification task with GAN manipulating training image labels).
-
-An alternative method for manipulating and editing images is mixing latents of source and target <d-cite key="choi2020stargan"></d-cite>. Although this method does provide good results in terms of sample quality, it lacks control for our set-up. We would like to have control over the attributes we are changing and keep the others unchanged.
-
-Another method for image editing is classifier guidance which adds classifier gradients in the reverse process to control the generation process <d-cite key="dhariwal2021diffusion"></d-cite>. This method is unsuitable for our problem set-up since we need to train an additional classifier for the target domain, and we do not have enough data to train it.
-
-# Our Contribution
-We demonstrate capabilities of text-guided diffusion to perform domain adaptation in a semi-supervised setting (e.g., unseen attributes of the target domain). To the best of our knowledge, this is the first work that shows the power of diffusion models in performing domain adaptation when the difference between the train and target domains can be described in a short prompt.
-
-# Methodology
-A frequently encountered problem in supervised learning is one where we have training data from one domain (the source domain) but we want to conduct inference on data that comes from a different but related domain (the target domain) that can be described using text. Specifically, we want to focus on the setting where we have access to an adequate number (for training) of observations from the source domain (a subset of which are labelled) and we want to conduct inference (eg. classification) on unlabelled observations from the target domain. An additional constraint is that we only have a limited number of observations from the target domain so it is infeasible to learn the target distribution. Here, we deal with image data.
-
-## DiffusionCLIP
-We first train a diffusion model on both labelled and unlablled images from the source domain. This diffusion model is first used to convert input images (from source domain) to the latent. Then, the reverse path is fine-tuned to generate images driven by the target text (text decription of target domain), guided by the CLIP loss. The details are given in the subsequent sections.
-
-### DiffusionCLIP Fine-tuning
-In terms of fine-tuning, the DiffusionCLIP model <d-cite key="kim2022diffusionclip"></d-cite> allows for modification of the diffusion model itself as compared to the latent, enhancing its effectiveness. The process utilizes a composite objective including directional CLIP loss and identity loss for fine-tuning the reverse diffusion model parameters.
-
-#### Loss Function
-The objective function is given by:
-
-$$\mathcal{L}_{\text{direction}} (\hat{x}_0(\theta), y_{\text{tar}}; x_0, y_{\text{ref}}) + \mathcal{L}_{\text{id}} (\hat{x}_0(\theta), x_0)$$
-
-
-where $x_0$ is the original image and $$\hat{x}_0(\theta)$$ is the generated image from the latent with optimized parameters $\theta$. The identity loss $$\mathcal{L}_{\text{id}}$$ <d-cite key="kim2022diffusionclip"></d-cite> aims to preserve the object's identity post-manipulation.
-
-#### Optimization and Identity Preservation
-Optimization is guided by directional CLIP loss, requiring a reference and a target text for image manipulation. The identity loss includes $\ell_1$ loss for pixel similarity and a face identity loss for maintaining recognizable human features.
-
-
-#### Architecture
-The fine-tuning involves a shared U-Net architecture across time steps, with gradient flow illustrated in Figure 1. This structure supports the transformation of images to align with target texts.
-
-{% include figure.html path="assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows.png" class="img-fluid" style="width:100px; height:75px;"%} 
-*Figure 1. Gradient flows during fine-tuning the diffusion model with the shared architecture across t <d-cite key="kim2022diffusionclip"></d-cite>.*
-
-### Forward Diffusion and Generative Process
-Kwon et al <d-cite key="kim2022diffusionclip"></d-cite> discusses the DDPM's sampling process, which is inherently stochastic. This stochastic nature results in varied samples even from the same latent input. However, to leverage the image synthesis capabilities of diffusion models for precise image manipulation, the authors use DDIM's deterministic forward process with $$\sigma_t=0$$ which allows for almost perfect reconstruction. Using deterministic processes, however, limits model's generative capability and this problem has been developed in the subsequent papers by injecting noise at specific timesteps <d-cite key="kwon2023diffusion"></d-cite>.
-
-#### Deterministic Diffusion Processes
-The deterministic processes are formulated as follows:
-
-$x_{t+1} = \sqrt{\alpha_{t+1}}f_\theta(x_t, t) + \sqrt{1 - \alpha_{t+1}}\epsilon(x_t, t)$
-
-$x_{t-1} = \sqrt{\alpha_{t-1}}f_\theta(x_t, t) + \sqrt{1 - \alpha_{t-1}}\epsilon(x_t, t)$
-
-#### Fast Sampling Strategy
-To expedite the sampling, a 'return step' is introduced along with a strategy to use fewer discretization steps. This accelerates training without significantly compromising the identity preservation of the object in the image.
-
-Detailed mathematical derivations and more comprehensive analyses can be found in the supplementary sections of <d-cite key="kim2022diffusionclip"></d-cite>.
-
-## Experimental Setup and Procedure
-Our method is intended to be used given a setup as follows. We have a set of images from the source domain, $$\{x_i\}_{i=1}^{n}$$, out of which we have labels $$\{y_i\}_{i=1}^{n'}$$ for a subset of them, where $$n' << n$$. For simplicity, we are dealing with a binary classification task with 0-1 labels. We now want to classify test images from the target distribution, $$\{x^t_i\}_{i=1}^{m}$$ ($$m << n$$). We also have a text description of the target distribution, $$T_{target}$$ (a short prompt that captures how the source and target domains differ; for example, if the source domain is images in the summer and the target domain is images in the winter, $$T_{target}$$ could be "winter").
-
-We now use the images from the source domain $$\{x_i\}_{i=1}^{n}$$ to train a diffusion model and use DiffusionCLIP fine-tuning to generate an image $$x'_i$$ from each labelled source image $$x_i$$ driven by $$T_{target}$$. Thus, we have created a new training dataset with the target distribution $$\{(x'_i, y_i)\}_{i=1}^{n'}$$.
-
-Now, we use supervised learning to train a model on the $$\{(x'_i, y_i)\}_{i=1}^{n'}$$ pairs and subsequently classify the test images $$\{x^t_i\}_{i=1}^{m}$$. The idea is that by shifting the distribution of training data to match that of the test data using just the text description of the target distribution, we can achieve a model that generalizes well to the target domain even in the regime of limited labelled data and target domain images without having to explicitly learn the target distribution.
-
-# Experiment and Results
-## Problem set-up
-We run a simple experiment to show the power of domain adaptation using our method in this setting. We consider a gender classification problem on CelebA dataset with test domain being different from the train domain.
-
-Our train domain is original CelebA images while our target domain is the same images but in the "sketch" style. The "sketch" style images were generated by the same method (DiffusionCLIP) by editing the original CelebA images on the test set. This style transfer doesn't change the face identity (including gender, which is of our interest for the given task), so we keep all the labels unchanged.
-
-We have a training set of size 1,200 images and test set of size 300 images (mainly for computation reasons). Our data comes from publicly available CelebA dataset with binary attributes (including the gender attribute of interest)<d-cite key="liu2015faceattributes"></d-cite>. We resize all the images to size 256x256 using Bilinear interpolation.
-
-We use a simple CNN architecture for gender classification - three convolutional layers with increasing filter depth (32, 64, 128), each followed by a max pooling layer that halves the image dimensions, followed by 2 fully connected layers with sigmoid activation. Our experiment is ran for demonstrative purposes for the most part and does not require complex architectures. The training size of 1,200 images is additionally hinting at the necessity to scale the model complexity down for the purposes of our experiment. Our objective function is binary cross-entropy loss.
-
-## Experimental pipeline
-We run the following experiments to confirm our intuition about the method's effectiveness:
-
-* Experiment 1
-  - Training set (1,200 labeled images) - original CelebA images
-  - Test set (300 labeled images) - "sketched" images
-  - We train CNN on plain CelebA images and evaluate on a shifted test domain. We use the plain CelebA test domain as a performance benchmark. We expect this model to do worse on the "sketched" test set than on the original one.
-
-* Experiment 2
-  - Training set (1,200 labeled images) - adapted images. Original train images adapted to the "sketch" style using the method described in the subsection below.
-      - Note: We keep the number of images in the train set the same as in the experiment above (e.g., we create new train images and delete the original ones instead of augmenting the data) for the clarity of the experiment. In practice, one can combine images from both domains for learning.
-  - Test set (300 labeled images) - "sketched" images.
-  - We train the CNN on the "sketched" images now and evaluate the performance on both "sketched" and plain test sets. We expect this model to do better on the "sketched" test set which is our initial goal.
-
-
-## Domain adaptation method
-To edit our images from the plain CelebA distribution to the target "sketched" distribution, we use the method proposed in DiffusionCLIP <d-cite key="kim2022diffusionclip"></d-cite>. We used pre-trained fine-tuned diffusion model based on original diffusion model trained on CelebA images using P2 objective introduced by Choi et al <d-cite key="choi2022perception"></d-cite>. Note that the original pre-trained P2 diffusion model was trained on the whole CelebA dataset which makes use of large amounts of unlabeled data in the train domain and is consistent with our problem set-up. The diffusion model was fine-tuned using the prompt "Sketch". We made use of deterministic DDIM inversion process with 40 steps (instead of a 1,000 steps in the original noise schedule) and 6 generative steps.
-
-Despite the sufficient computation cost savings by using the DDIM process, transforming 1,500 images took more than 6 hours on a single NVIDIA GeForce RTX 3050TI 4GB GPU. Computation time is still the main drawback of using diffusion models for image editing and this is the main reason for us to limit the total sample size to 1,500 images.
-
-Note: We use the same procedure for generating test images from "technically unknown" sketch domain and adapting the training set to this domain. This assumes the user perfectly identified the prompt which describes the target domain and used it to fine-tune the pre-trained diffusion model which is unrealistic in practice. We believe, however, that for simple prompts semantic similarity of the user prompt and the word "Sketch" would allow to get adapted images similar to the target domain because of the CLIP loss properties.
-
-## Results
-### Image Manipulation 
-Figure 2 shows examples of DiffusionCLIP fine-tuning applied to CelebA images, resulting in "sketched" images.
-
-{% include figure.html path="assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex.png" class="img-fluid" style="width:100px; height:75px;"%}
-{% include figure.html path="assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex.png" class="img-fluid" style="width:100px; height:75px;"%}
-*Figure 2. Examples of DiffusionCLIP fine-tuning.*
-
-### Classification
-Figure 3 shows the performance of the CNN trained on the original CelebA images and tested on images in the source domain as well as the target domain, while Figure 4 shows the performance of the CNN trained on the adapted images.
-
-{% include figure.html path="assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base.png" class="img-fluid" style="width:100px; height:75px;"%} 
-*Figure 3. Performance of CNN trained on original CelebA images.*
-
-{% include figure.html path="assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt.png" class="img-fluid" style="width:100px; height:75px;"%} 
-*Figure 4. Performance of CNN trained on adapted images.*
-
-These results confirm our intuition that adapting our source domain to the target domain results in a non-trivial performance boost. We observe that for the initial few epochs, the performance for both the source and target domains is similar, but this gap increases as we train further. This tells us that initially, the model learns relevant "higher level" features that are present in both the domains since they are both related. However, for later epochs, the model overfits to the distribution of the training data which results in a large performance gap between the two domains. At this stage, the model is learning "lower level" features that belong to the source domain, which are different in the target domain. Thus, the performance on a shifted domain becomes worse as time goes on. If we train further, we expect to learn more lower level features of the source domain, which will enhance performance for a test set from the source domain but deteriorate performance for a test set from the target domain. 
-
-# Conclusion
-We have shown, with a simple binary classification experiment, that the proposed domain adaptation method using DiffusionCLIP fine-tuning leads to a significant performance boost when we have training and test data sampled from different but related domains. 
-
-Future work in this direction might include working with the h-space proposed in <d-cite key="kwon2023diffusion"></d-cite>. Our idea for semi-supervised domain adaptation naturally extends to unsupervised domain adaptation by leveraging the properties of this latent space. One could use this latent space as an implicit attribute classifier after smoothing the image at appropriate noise level and then balance the attributes between train and test sets in an unsupervised manner. This approach, however, requires a better implementation of the original method presented in <d-cite key="kwon2023diffusion"></d-cite> and is not feasible as of now.
-
-
diff --git a/_posts/2023-11-11-Tracking-Multiple-Objects.md b/_posts/2023-11-11-Tracking-Multiple-Objects.md
deleted file mode 100644
index 11bb3e7e..00000000
--- a/_posts/2023-11-11-Tracking-Multiple-Objects.md
+++ /dev/null
@@ -1,161 +0,0 @@
----
-layout: distill
-title:  Diffusion Models on Low-Brightness Images
-description: Diffusion models have been used with great success for a number of use cases, but they still remain largely unused on dim images. The primary related work has been on using a diffusion model for low-light image enhancement. However, most of these works agree that attempting to generate an image from noise generated on top of an already dim image often results in rgb shift and global degradation of the image. This is because a diffusion model adds noise to the given image and then attempts to denoise the image, so given a dim and low-contrast image, the model has a difficult time denoising. This blog post focuses on methods to improve diffusion model performance in low-light images
-date: 12-12-2023
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Ishita Goluguri
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-11-Tracking-Multiple-Objects.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-## Introduction
-
-Since the introduction of ChatGPT, everyone seems to be speaking about “generative AI,” with almost 15x more google searches for generative AI now than at this time last year. This blog post focuses a specific use case for diffusion models, which have applications across the board, from generating images given keywords to planning trajectories for robot manipulation. In short, diffusion models are a family of probabilistic generative models that progressively destruct data by injecting noise, then learn to reverse this process for sample generation.
-
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview.png" alt="Alt Text" style="width:90%">
-<em>Figure 1.1. How a diffusion model iteratively transforms noise to generate an image</em>
-
-Diffusion models have been used with great success for a number of use cases, but they still remain largely unused on dim images. The primary related work has been on using a diffusion model for low-light image enhancement. However, most of these works agree that attempting to generate an image from noise generated on top of an already dim image often results in rgb shift and global degradation of the image <d-cite key="zhou2023pyramid"></d-cite> This is because a diffusion model adds noise to the given image and then attempts to denoise the image, so given a dim and low-contrast image, the model has a difficult time denoising. 
-
-For a visual example of why low-light scenarios can be a problem for diffusion models, we can just look at the control of our experiments. The left image is from the diffusion model trained and evaluated on low-light images, while the right image is from the diffusion model trained and evaluated on normal-light images.
-
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim.png" alt="Alt Text" style="width:45%; margin-right:3%">
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright.png" alt="Alt Text" style="width:45%">
-
-We can observe all sorts of problems here, from the model being unable to determine the image background color to the model sometimes not even showing the butterfly. In contrast, the exact same training done on the normal butterfly dataset shows distortions occasionally, but has no issues determining the background color or the contrast between the butterfly and the background. This illustrates the issue talked about previously of rgb shift and global degradation. In this blog, we aim to conduct experiments by adding different features to the DDPM scheduler and investigate which can actually make a difference for low-light scenarios.
-
-## Background
-
-First, we discuss the idea of a diffusion model in more depth. In a nutshell, a diffusion model relies on semi-supervised training. The model is given an image from a training set to which random noise has been applied $$t$$ times. This noisy image is given to the model along with the value of $$t$$, a loss is computed between the output of the model and the noised image. The random noise is applied with a noise scheduler, which takes a batch of images from the training set, a batch of random noise, and the timesteps for each image. The overall training objective of the model is to be able to predict the noise added through the scheduler to retrieve the initial image.
-
-Since diffusion models on dim images are relatively unstudied, this blog post focuses on taking a well-known diffusion model for regular images and making modifications to the scheduler, which controls the noising and denoising process, and the model architecture to improve its performance in low-light scenarios. We begin with the DDPM (Denoising Diffusion Probabilistic Models) model <d-cite key="ho2020denoising"></d-cite>. There are a number of viable diffusion models to study, but one of the major benefits of DDPM is that it balances image fidelity in generation and speed. Several other major models such as DDIM and PNDM can be much worse at capturing fine details even though they have some speed and computation advantage <d-cite key="andrew2023diffusion"></d-cite>. This offers some intuition as to why DDPM was chosen to work with for this project.
-
-A DDPM uses two Markov chains for its denoising and noising process: one to perturb the data to noise, and another one to convert the noise back into data<d-cite key="10.1145/3626235"></d-cite> The noising and denoising process uses a unique scheduler that we refer to as the DDPMScheduler. Mathematically, we can refer to the initial distribution as $$q(x_0)$$ and show that following $$x_t$$ are computed as $$q(x_t \vert x_{t-1}) = \mathbb{N}(x_t; \sqrt{1-\beta_t}x_{t-1},\beta_tI).$$ This is referred to as a Gaussian transition kernel, and shows that consecutive noised images are generated by taking the previous image and sampling more noise into it using a Gaussian perturbation. Of particular note are the $$\beta_t$$ terms, since those vary from timestep to timestep and we discuss later how those should be modified. In the specific ddpm scheduler implementation, the $$\beta_{\text{start}}$$ and $$\beta_{\text{end}}$$ are specified, so the $$\beta$$ in the Gaussian perturbation, which specifies how much noise should be added at any time, is gradually increased. <d-cite key="von-platen-etal-2022-diffusers"></dcite>
-
-The specified beta values can have many consequences on the model overall, but one is more aggressive denoising which can combat rgb shift. This is because rgb shift can cause color inconsistencies between adjacent pixels, which can be combated by greater noise reduction. In addition, aggressive denoising may be able to recover the underlying structure of the image and smooth out artifacts introduced by rgb shift. However, aggressive denoising can result in a loss of detail as well
-
-By integrating the previous noise during the noising step to determine $$q(x_T)$$ we can get $$q(x_T) = \int q(x_T \vert x_0)q(x_0)dx_0 \sim \mathbb{N}(x_t; 0, I)$$, showing that after all the noise is integrated, the entire structure of the image is lost. After the denoising, DDPMs start generating new samples by generating a noise vector from the prior distribution $$p(x_T = \mathbb{N}(x_T; 0, I)),$$ and gradually removing noise by running a Markov chain in the reverse. The goal is to learn the transition kernel between timesteps. The reverse transition can be written as $$p_{\theta}(x_{t-1} \vert x_t) = \mathbb{N}(x_{t-1}; \mu_{\theta}(x_t, t), \sigma_{\theta}(x_t, t))$$ where $$\theta$$ is the model’s parameters and the mean and variance are parametrized by neural networks<d-cite key="andrew2023diffusion"></d-cite>.
-
-This variance will also come into play later, as it is one of the parameters that we toggle in the DDPM scheduler. Variance in the DDPM Scheduler of the Diffuser library has several possible values: fixed_small, fixed_small_log, fixed_large, fixed_large_log <d-cite key="schedulerdocumentation"></d-cite>. 
-
-| variance_type | effect |
-| ---------|----------|----------|
-| "fixed_small"| The variance is a small and fixed value |
-| "fixed_small_log"| The variance is small and fixed in the log space |
-| "fixed_large"| The variance is a large and fixed value |
-| "fixed_large_log"| The variance is large and fixed in the log space |
-
-## Methods
-
-The first method evaluated as a control is simply an implementation of a DDPM using the Diffusers library <d-cite key="von-platen-etal-2022-diffusers"></d-cite> on HuggingFace. This model was trained using the Smithsonian butterflies dataset of 1000 images hosted on HuggingFace <d-cite key="huggan_dataset"></d-cite>. Initially, since the images in the dataset are all different, the dataset was resized to have all images conform to a square resolution, randomly flipped to augment the dataset, and normalized into the [-1,1] range. To generate “dim” images, the regular dataset was reduced in brightness by 50% using the Transforms library from HuggingFace.
-
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig.png" alt="Alt Text" style="width:90%">
-<p align="center">
-<em>Figure 3.1. Original images from dataset</em>
-</p>
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new.png" alt="Alt Text" style="width:90%">
-<p align="center">
-<em>Figure 3.2. Images after preprocessing</em>
-</p>
-
-Next, noise is added to the images. For this, we use the DDPMScheduler with the default parameters from Diffusers. The model is then trained on the noisy image, and evaluated. For evaluation, the model is tested on sixteen different images previously sampled randomly from the training dataset and set aside as test images. These images are noised using the scheduler in the same way as the rest of the images, and the model is run on the noised images to retrieve the original images. 
-
-| Control Parameters |
-| ---------|----------|----------|
-| noise_timesteps| 50 |
-| num_epochs| 50 |
-| beta_start| 0.0001 |
-| beta_max| 0.02 |
-| variance_type| "fixed_large" |
-|resnet layers per unet block| 2 |
-
-<em>Figure 4.1. Showing default parameters used in the diffusion model </em>
-
-<p align="center">
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch.png" alt="Alt Text" style="width:90%">
-<em>Figure 4.2. Figure depicting the UNet architecture used in the model</em>
-</p>
-
-Initially, a quantitative method of evaluation was considered, and some losses were computed between the test images before noising and the corresponding test results after denoising. While these measurements were computed, they didn’t seem as valuable as simply looking at the image because of the various patterns between images that a loss function cannot always capture (ie how similar is the butterfly and the pattern of the butterfly to the initial image). As an example, the image on the left receives a lower mean squared error loss than the image on the right, yet looking at them, it is apparent that the denoised version on the right is better. Thus, the evaluation here mostly presents the model outputs for us to qualitatively compare across different variations.
-
-<p align="center">
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample.png" alt="Alt Text" style="width:=30%">
-<em>Figure 4.3. Showing two outputs of different models given the same input. MSE Loss proved to be unreliable for this task as the loss of the left image compared to the control was less than the loss of the right image due to rgb shift</em>
-</p>
-
-
-After the control, this process is repeated for a variety of parameters carefully chosen and model architecture modifications to evaluate the best variation for use in this low-light scenario.
-
-## Results/Discussion
-
-
-The results of the control are as seen in the introduction above. The result of the dim images is on the left, while the result of the brighter images is on the right.
-
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim.png" alt="Alt Text" style="width:45%; margin-right:3%">
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright.png" alt="Alt Text" style="width:45%">
-<em>Figure 5.1. The left shows the output of the control model trained on the dim images and the right shows it trained on the bright images</em>
-
-One of the most pressing problems seen on the dimmer images is the rgb shift. As discussed in the background, the variance, which partly controls how aggressively the model is denoised, can help with rgb shift because it larger denoising can retrieve details lost in noise. Thus, the first modification is changing the variance type from “fixed_small” to “fixed_large.” This modification, after training, resulted in the evaluation images below.
-
-<p align="center">
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result.png" alt="Alt Text" style="width:65%">
-</p>
-<p align="center">
-<em>Figure 5.2. Result of evaluation after changing variance</em>
-</p>
-
-As we can see, this helped greatly with the rgb shift issue, and eliminated the background discoloration for several of the images. Certain images, such as the second row on the left-most column and the third from the left on the bottom row also show huge detail improvements. For the reasons discussed earlier, this is expected as a result of larger denoising, since it can clear away ome artifacts. The only image that showed a decrease in quality after the variance change was the right-most image in the top row.
-
-Now that some of the rgb shift has been resolved, we move to tackling the loss of detail in many of these evaluation images. One classic approach to loss of information is simply increasing the capacity of the model to learn. In more technical terms, by increasing the number of ResNet layers per UNet block, we may allow the model to capture more intricate features and details. Deeper layers can learn hierarchical representations, potentially improving the ability to encapsulate fine-grained information. To do this, we edit our model architecture to make each UNet block deeper.
-
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result.png" alt="Alt Text" style="width:45%; margin-right:3%">
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright.png" alt="Alt Text" style="width:45%">
-<em>Figure 5.3. The left image shows the output of the new change in model architecture on the dimmed dataset, while the right image shows the bright dataset control output for color comparison</em>
-
-A huge improvement can be seen just by deepening the model architecture and at least the outline of every butterfly is now visible. However, this still hasn’t solved the problem  of rgb shift. As we can see, the butterflies in the denoised dim images are all skewed yellow, while the butterflies in the denoised control bright images are all of varying colors. Next, we try to train with various betas in the scheduler to tackle this issue. As discussed before, higher beta values can help with rgb shift. However, higher values can also lead to loss of detail. The beta_start for the control was 0.0001 and the beta_max was 0.02. Thus, we try two combinations of start and max: 0.001 and 0.01, and 0.0005 and 0.015.
-
-<img src="{{site.baseurl}}/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output.png" alt="Alt Text" style="width:90%">
-<p align="center">
-<em>Figure 5.4. The left figure shows the output for beta start = 0.001 and beta end = 0.01, and the right figure shows the output for beta start = 0.0005 and beta end = 0.15</em>
-</p>
-As seen above, this modification was unsuccessful, and the images have much less detail than before and the rgb shift is worse than before. This may be because the biggest issue is the distortion of colors and blurring, and thus, a high beta value and larger denoising is needed to fix these issues rather than smaller denoising as was previously hypothesized. This future modification is not analyzed in this project, but would be interesting to see in the future.
-
-## Future Directions
-There are several limitations and future directions worth discussing. For one, this project investigates a specific model, the DDPM model. The DDPM model was chosen for various reasons, but mostly because it draws a balance between detail and also efficiency. In the future, multiple models could be considered to figure out which is really best for image generation under low-light scenarios. In addition, this work only focuses on one dataset of butterflies, and generates “low-light” data by reducing the brightness of the original dataset. This is good evidence for the success of the methods presented, but additional datasets and real data taken from environments with low-light would have lended more evidence to the success of the methods. In addition, the amount of data and depth of the models used had to be limited used to gpu usage limits. A model trained for more epochs with data may work better than this one. In addition, a good future starting point for this work would be to work with the beta start and beta max to figure out how to improve the rgb shift, which I believe would help with the detail in the dim images. 
-
-
-
-
-
diff --git a/_posts/2023-11-11-denoisingVAE.md b/_posts/2023-11-11-denoisingVAE.md
deleted file mode 100644
index 00c7a196..00000000
--- a/_posts/2023-11-11-denoisingVAE.md
+++ /dev/null
@@ -1,250 +0,0 @@
----
-layout: distill
-title: Comparing data augmentation using VAEs and denoising-VAEs for limited noisy datasets 
-description: 
-date: 2023-11-11
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Pranay Agrawal
-    affiliations:
-      name: MIT
-  
-# must be the exact same name as your blogpost
-bibliography: 2023-11-11-denoisingVAE.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Background
-  - name: Motivation
-  - name: Research Problem Statement
-  - name: Methods
-    subsections:
-      - name: Dataset Selection and Preparation
-      - name: VAE and DVAE - Architecture and Hyperparameters
-      - name: Data Augmentation
-      - name: Classification Network(CNN) Architecture
-  - name: Results 
-    subsections:
-      - name: VAE-DVAE performance
-      - name: Latent Space Visualization
-      - name: Classification Performance
-        subsections:
-          - name: Artificially corrupted Fashion-MNIST
-  - name : Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Background
-
-One of the significant challenges in this evolving landscape of machine learning is the prevalance of limited and noisy datasets. Traditional models and downstream tasks such as classification often struggle with such datasets, leading to suboptimal performance and a lack of generalizability. 
-
-Could this be tackled using auto-encoders, specifically, Denoising Autoencoders (DAE) and Variational Autoencoders (VAE)? Denoising autoencoders (DAE) are trained to reconstruct their clean inputs with noise injected at the input level, while variational autoencoders (VAE) are trained with noise injected in their stochastic hidden layer, with a regularizer (KL divergence) that encourages this noise injection. But what if we could combine these strengths?
-
-## Motivation
-
-Denoising autoencoders (DAE)<d-cite key="vincent2008extracting"></d-cite>, are trained to reconstruct their clean inputs with noise injected at the input level, while variational autoencoders (VAE)<d-cite key="kingma2014autoencoding"></d-cite> are trained with noise injected in their stochastic hidden layer, with a regularizer (KL divergence) that encourages this noise injection. Denoising Variational Autoencoders (DVAEs) are an extension of the traditional variational autoencoder (VAE). The motivation for delving into the realm of DVAEs stems from a critical need - the ability to effectively interpret and utilize limited, noisy data. They merge the robustness of DAEs in handling noisy inputs with the generative prowess of VAEs. As highlighted in the research paper “Denoising Criterion for Variational Auto-Encoding Framework”<d-cite key="denoisingVAE"></d-cite>, integrating a denoising criterion into the VAE framework refines the robustness of learned representations, thereby enhancing the model’s generalization ability over various tasks.
-
-VAEs, known for their generative capabilities, introduce noise at the hidden layer level, potentially offering a means to augment limited datasets<d-cite key="saldanha2022data"></d-cite>. On the other hand, DVAEs, an innovative extension of VAEs, introduce perturbation to input data, promising a more robust feature extraction and create additional, realistic augmentations of the data.
-Our aim here is to comprehensively analyze and contrast the efficacy of VAEs and DVAEs in augmenting such datasets. We hypothesize that while VAEs can offer some level of data enhancement, DVAEs, with their inherent denoising capability, might prove superior in extracting more reliable and robust features from noisy datasets.
-
-
-## Research Problem Statement
-
-The first aspect of this research is to explore the dual functionality of DVAEs — their ability to denoise input data while concurrently learning a generative model of the data distribution. The next aspect is to to compare the performance of DVAEs against traditional VAEs in i) learning robust latent representations, and ii) in downstream classification tasks with richer varied datasets by utilising data augmentation aspect of these generative models. 
-
-1. **Learning Robust representation and Generating Synthetic data using DVAEs:** Can DVAEs dual capability of denoising input data and learning a generative model of the data distribution simultaneously be exploited to effectively learn robust representations from limited and noisy datasets and utilized to generate additional synthetic data (augmented dataset)? How does it compare to using traditional VAEs?
-
-2. **Performance Enhancement for downstream tasks:** How does the DVAE-generated synthetic data impact the performance metrics of downstream classification tasks? Compare performance metrics with traditonal VAE for different noise levels in test datasets.
-
-
-## Methods
-
-### Dataset Selection and Preparation
-The Fashion-MNIST dataset, which includes 60,000 training images, is selected for the experiments mentioned above. To simulate a limited data environment, a subset of 5,000 images is randomly selected from the dataset. 
-We also create a noisy version of the training dataset to understand the efficacy in scenarios when clean input data is not available.
-
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples.png" 
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 1. Sample Fashion-MNIST images
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples.png" 
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 2. Artificially Corrupted(Noised) Fashion-MNIST images
-
-### VAE and DVAE - Architecture and Hyperparameters
-The VAE and DVAE architecture is similar and differ only in the sense that DVAE adds noise to input images before passing it to encoder. 
-
-The encoder comprises two hidden layers, each with 128 neurons. The input size is flattened to 28 * 28 dimensions. Each hidden layer in the encoder is followed by a ReLU activation function. The encoder's output is connected to two separate layers: one for generating the mean (µ) and the other for the logarithm of the variance (log-variance), both projecting to a 4-dimensional latent space (z_dims).
-
-On the decoding side, the architecture starts with the latent space and expands through a similar structure of two hidden layers, each with 128 neurons and ReLU activation functions. The final output layer reconstructs the original input size of 28 * 28 dimensions and applies a Sigmoid activation function.
-
-This VAE/DVAE employs a reconstruction loss using the binary cross-entropy between the input and its reconstruction, and a regularization term(KL-Divergence) derived from the latent space to enforce a probabilistic distribution.
-Each model is trained for 60 epochs with batch size 128.
-
-```python
-    input_size = 28 * 28
-    z_dims = 4
-    num_hidden = 128 
-    self.encoder = nn.Sequential(
-        nn.Linear(input_size, num_hidden),
-        nn.ReLU(),
-        nn.Linear(num_hidden, num_hidden),
-        nn.ReLU()
-    )
-
-    self.mu = nn.Linear(num_hidden, z_dims)
-    self.logvar = nn.Linear(num_hidden, z_dims)
-
-    self.decoder = nn.Sequential(
-        nn.Linear(z_dims, num_hidden),
-        nn.ReLU(),
-        nn.Linear(num_hidden, num_hidden),
-        nn.ReLU(),
-        nn.Linear(num_hidden, input_size),
-        nn.Sigmoid(),
-    )
-```
-
-### Data Augmentation
-
-For augmenting the dataset, we generate 2 newer samples or each input image. First, the image is passed through the encoder part of VAE/DVAE and then sample a latent representation vector around the obtained latent representaion - mean and std. 
-
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation.png" 
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 3. Example: VAE Data Augmentation
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation.png"
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 4. Example: DVAE Data Augmentation
-
-
-### Classification Network(CNN) Architecture
-The Classification Network(CNN) architecture is comprised of a series of convolutional, activation, pooling, and fully connected layers. Initially, it features a convolutional layer with 1 input channel and 32 output channels, using 3x3 kernels, stride of 1, and padding of 1 with 'reflect' mode, followed by an ReLU activation function. This is succeeded by another convolutional layer that increases the depth to 64 filters, maintaining the same kernel size, stride, and padding, accompanied by the same activation function. Subsequently, a max pooling layer with a 2x2 kernel reduces the spatial dimensions of the feature maps, highlighting significant features. The data is then flattened, resulting in a feature vector with a length of 64 * 14 * 14, which feeds into a series of three linear layers, each with 128 units, interspersed with the activation function. This sequence of fully connected layers is designed to capture complex relationships in the data. Finally, the architecture has an output linear layer that maps to the number of outputs (num_outputs=10).
-
-```python
-  image_dim = 28
-  num_outputs = 10
-  act_cls = nn.ReLU
-  net = [
-      nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1, padding_mode='reflect'),
-      act_cls(),
-    ]
-
-  net.extend([
-       nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, padding_mode='reflect'),
-       act_cls(),
-       nn.MaxPool2d(kernel_size=2)
-    ])
-  input_len = int(64 * image_dim/2 * image_dim/2)
-
-  net.extend([
-      nn.Flatten(),
-      nn.Linear(input_len, 128),
-      act_cls(),
-      nn.Linear(128, 128),
-      act_cls(),
-      nn.Linear(128, 128),
-      act_cls(),
-    ])
-
-  net.extend([nn.Linear(128, num_outputs)])
-```
-
-## Results
-
-Here, we utilize the standard Fashion-MNIST dataset for our analysis. Initially, we train the VAE/DVAE network on a subset of 5,000 samples from the training dataset over 60 epochs. Following this, we employ the VAE/DVAE networks to generate synthetic data, leveraging the learned latent space representation for data augmentation purposes. The performance of the augmented datasets is then evaluated using the previously described CNN architecture for classification tasks.
-
-### VAE-DVAE Performance
-DVAE's training loss closely tracks the VAE's loss throughout training. This is interesting because the DVAE is dealing with additional artificial noise, yet it performs on par with the standard VAE. The fact that the DVAE does not exhibit a significantly higher loss than the VAE might suggest that it is effectively denoising the data and learning a robust representation, despite the additional noise.
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal.png" 
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 5. Training Loss: VAE v/s DVAE
-
-### Latent Space Visualization
-Here, we are visualizing the latent space of VAE and DVAE, a high-dimensional space where each dimension represents certain features learned by the model from the data. For this, we plot a 10x10 grid of images where each image in the grid is generated by the model by varying the values in two chosen latent dimensions (i and j), while keeping the other dimensions set to zero. This helps in understanding the effect of each latent dimension on the generated output.
-
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace.png" 
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 6. VAE Latent Space Visualization
-
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace.png" 
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 7. DVAE Latent Space Visualization
-
-The lack of visible differences in the latent space structures of both VAE and DVAE indicates that the two models are learning similar representations. To delve into these nuances, we assess the effectiveness of augmented data (created using these learned latent spaces) in a subsequent classification task.
-
-### Classification Performance
-
-To delve into the efficacy of VAE and DVAE in augmenting datasets for downstream image classification tasks, we trained a CNN on a limited subset of the Fashion-MNIST dataset to establish a baseline. Subsequently, we generated synthetic data using both VAE and DVAE, aiming to enrich the training dataset and observe the resultant impact on the CNN's performance. This is crucial considering the initial constraint of limited training data to start with. We used Fashion-MNIST test dataset, which includes 10,000 test images, for evaluating the performance of learned CNN network.
-
-We also tested robustness of these augmented datasets against varying levels of noise (artifically added to test dataset), simulating real-world conditions where test data often includes such imperfections, arising because of the limitations in measurement tools.
-
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC.png" 
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 8. CNN Learning Curve for Limited Dataset
-
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC.png" 
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 9. CNN Learning Curve for VAE Augmented Dataset 
-
-{% include figure.html path="assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC.png" 
-class="img-fluid" style="width:50px; height:50px;" %}
-Figure 10. CNN Learning Curve for DVAE Augmented Dataset 
-
-| Dataset Type \ Noise Level | No Noise | 2.5% Noise | 5% Noise | 7.5% Noise | 10% Noise|
-|----------------------------|----------|-----------|--------|--------|--------|
-| Limited Dataset            | 83.56%      | 83.39%       | 83.11%    | 82.33%    | 81.75%    |
-| VAE Augmented Dataset      | 84.18%      | 84.03%       | 83.57%    | 82.68%    | 81.43%    |
-| DVAE Augmented Dataset     | 85.32%      | 84.98%       | 84.67%    | 83.98%    | 82.59%    |
-
-
-#### Artificially corrupted Fashion-MNIST
-Here, we deliberately introduced artifical noise to the standard Fashion-MNIST dataset to effectively simulate the real-world scenario where training data is not cleaned and is often noisy and imperfect. Such conditions often pose significant challenges in learning effective representations, making our approach highly relevant for understanding the adaptability and efficiency of VAE and DVAE models in handling noisy data.
-This way we expose the model and train it on a variety of noise patterns while forcing it to reconstruct the original noised image. The model will learn to effectively separate noise from the signal and will be less likely to overfit to the 'clean' aspects of the training data and can thus perform better on unseen, noisy data. This improves the generalization capabilities of the model making it more suitable for practical applications.
-
-Here, we generated synthetic data using both VAE and DVAE which are trained on artifically corrupted Fashion-MNIST dataset. We then compare the performance of CNN network for three datasets - Limited Noisy Dataset with no augmentation, VAE Augmented dataset and DVAE Augmented Dataset, where representations are learned using the noisy training set. Consistent with our earlier methodology, we further evaluated the robustness of CNNs trained with these datasets by testing them against varying levels of noise in the test dataset.
-
-
-| Dataset Type \ Noise Level | No Noise | 2.5% Noise | 5% Noise | 7.5% Noise | 10% Noise|
-|----------------------------|----------|-----------|--------|--------|--------|
-| Limited Noisy Dataset      | 83.77%      | 83.79%       | 83.61%    | 83.36%    | 82.98%    |
-| VAE Augmented Dataset      | 85.24%      | 84.99%       | 84.62%    | 84.04%    | 83.20%    |
-| DVAE Augmented Dataset     | 85.48%      | 85.38%       | 85.10%    | 84.89%    | 84.58%    |
-
-
-## Conclusions
-
-Here are the key findings from our research:
-
-1. **Enhanced Learning from Augmented Data:** We observed that the CNN trained with data augmented by both VAE and DVAE demonstrated improved accuracy and generalization capabilities, especially when compared to the CNN trained on a limited dataset. This underscores the effectiveness of generative models in enriching training datasets, leading to more robust learning.
-
-2. **Superiority of DVAE in Handling Noise:** The CNN trained with DVAE augmented data consistently outperformed the one trained with traditional VAE augmented data in tests involving noisy conditions. This aligns perfectly with our research hypothesis about the dual functionality of DVAEs — not only do they learn a generative model of the data distribution but also excel in denoising input data.
-
-2. **Robustness to Varied Noise Levels:** A crucial aspect of our research was evaluating the performance of augmented datasets under various noise levels. The augmented datasets, especially those generated by DVAEs, maintained consistent performance across different noise conditions. This suggests that the models have not only learned the essential features of the data but are also adept at filtering out noise.
-
-In downstream classification tasks, DVAE-generated synthetic data improved performance metrics, surpassing those achieved with traditional VAE-generated data, particularly in tests with varied noise levels. This validates our hypothesis and highlights the potential of DVAEs in real-world applications where data is limited and data quality is a critical factor.
-
-The next steps for this research could be to focus on expanding the types of noise tested in our experiments to evaluate the adaptability and robustness of DVAEs in a broader range of real-world scenarios. We could conduct more comprehensive data augmentation experiments to delve deeper into the capabilities of DVAEs in enhancing neural network learning and generalization. 
\ No newline at end of file
diff --git a/_posts/2023-11-11-physics-loss.md b/_posts/2023-11-11-physics-loss.md
deleted file mode 100644
index e5a86b09..00000000
--- a/_posts/2023-11-11-physics-loss.md
+++ /dev/null
@@ -1,189 +0,0 @@
----
-layout: distill
-title: Physics Loss
-description: Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Julian Powers
-    url: ""
-    affiliations:
-      name: MIT
----
-
-# Super Resolution: Multi-Objective Training for Optimizing a Single Objective
-## Introduction
-Super-resolution (SR) refers to image processing techniques which enhance the quality of low-resolution images [2]. Recently deep learning based SR has been applied to the field fluid dynamics to recreate chaotic turbulent flows from low-resolution experimental or numerical data [3]. For some loss function $$\mathcal{L}$$, the goal is to find weights $$\theta^*$$ such that
-
-$$\begin{aligned}
-	\theta^* = \text{argmin}_\theta\; \mathcal{L}(\bold{u_H},f(\bold{u_L};\bold{\theta}))
-\end{aligned}$$
-
-where $$\bf u_H$$ is the reference high resolution data field and $$\bf u_L$$ is the corresponding coarsened low resolution data input to the neural network $$f$$ (see the figure below).
-
-![Super-resolution reconstruction of turbulent vorticity field using physics-based neural network. Adapted from [2].](/assets/img/2023-11-11-physics-loss/fig1.png)
-
-​	*Fig 1: Super-resolution reconstruction of turbulent vorticity field using physics-based neural network. Adapted from [2]. Disclaimer: we didn't have time to train on nice images like these for the present investigation.*
-
-
-
-Doing so can aid our understanding of flow physics [3]. Many have already applied deep learning to this problem, applying a variety of methods. The performance of the resulting networks depends heavily on the loss function used to train the network. Looking to improve upon the standard $$L_2$$ loss function, some have introduced physics-based loss function that incorporates physical laws that the real flow must obey. For example [2] use the following type of form: 
-
-
-
-$$\begin{aligned}
-	\mathcal{L} &= \beta_0||\bold{u_H}-f(\bold{u_L})||_2 + \beta_1 ||p_1(\bold{u_H})-p_1(f(\bold{u_L}))||_2 +  \beta_2 ||p_2(\bold{u_H})-p_2(f(\bold{u_L}))||_2 + ... 
-\end{aligned}$$ 
-
-
-
-where $$p_i(\cdot)$$ is a physical objective that we want to enforce during training (e.g. spatial and time derivatives $$\nabla \bf u_H$$, $$\bf\dot{u}_H$$ etc.) and the $$\beta_i$$ are fixed weighting coefficients.
-
-
-
-Typically, multi-objective super resolution approaches aim to overcome the weaknesses of the single objective $L_2$ reconstruction loss, a primary one being that the $L_2$ loss favors blurry reconstructions over sharper more 'realistic'  ones. The general idea is that the additional objectives push the training away from un-realistic reconstructions.
-
-However suppose the goal really is to minimize the $L_2$ reconstruction loss. Can multi-objective training reduce the loss on the original objective or do the new objectives just get in the way? In this investigation we apply adaptively-weighted multi-objective optimization methods to the problem of turbulence super resolution which is a novel approach. 
-
-## Methodology
-### The Dataset
-Super resolution reconstruction is an interesting problem for turbulent flows due there inherent multi-scale nature. Information is lost in the coarsening/pooling process making perfect reconstruction impossible without additional insights. Unfortunately, due to time and resource constraints it is unfeasible to train on 2D turbulence slices as in figure 1. In order to retain a challenging problem for the the super-resolution we build an artificial dataset of 1D turbulence as follows:  
-
-
-
-$$u(x) = \sum_{k=1}^{10} k^{-1}\sin\left(kx+\phi(k)\right) + (2k)^{-1}\sin\left( 2kx +\phi(k)\right)$$
-
-
-
-The amplitude scaling $k^{-1}$ models how the frequencies in a particular turbulent signal might decay with increasing wavenumber (velocity, temperature, pressure, kinetic energy, etc.). In other words the contribution of higher modes to the entire signal becomes less and less important in a predictable way. We generate each individual signal by fixing a phase function $\phi(k)$. For each $k$, $\phi(k)$ is taken to be the realization of uniform random variable in the range $[0,2\pi)$. This function $u(x)$ bakes in inherent correlations between the low and high frequency waveforms (Please note: this is not physical. We are just making a useful toy dataset for this investigation). Even with extremely coarse low-resolution inputs, we expect that a well-trained neural network can use these correlations to reconstruct the high frequency waveforms.  
-
-For input to the network, the samples are discretized to a $512$ point high resolution grid: $(\mathbf{u_H})_j = u(x_j)=u(j\cdot\frac{2\pi}{512})$. The low resolution data is average pooled with a kernel size of $32$. This results in a low resolution grid of size $512/32 = 16$. Average pooling has been shown to have nice training properties for super resolution reconstruction [2]. The following is typical high/low resolution pair: 
-
-![Typical Input](assets/img/2023-11-11-physics-loss/fig2.png)
-
-​	*Fig 2: Typical high/low resolution data pair. The high resolution version exists on a 512 point grid. The low resolution version has been average pooled down to a 16 point grid using a average pooling kernel of size 32. The pooling procedure removes the highest frequency components of the data meaning that full reconstruction requires deeper understanding of the underlying structure.* 
-
-### The Network
-
-The network is a three layer fully connected network with hidden sizes $[1024,1024,1024]$.
-
-### Training Scheme
-
-The multi-objective loss function 
-
-$$\begin{aligned}
-	\mathcal{L} &= \mathcal{L}_0 + \mathcal{L}_1 + \mathcal{L}_2+... \\&= \beta_0||\bold{u_H}-f(\bold{u_L})||_2 + \beta_1 ||p_1(\bold{u_H})-p_1(f(\bold{u_L}))||_2 +  \beta_2 ||p_2(\bold{u_H})-p_2(f(\bold{u_L}))||_2 + ... 
-\end{aligned}$$
-
-presents a unique training challenge. Many turbulence super-resolution studies to date set the weights $\beta_i$ by trial and error in an attempt to produce 'nice' results [3]. This approach is sub-optimal because the best values of $\beta_i$ are dependent on the units and orders of magnitude of the properties $p_i$.  Also, the best choice for the weights may change depending on the stage of training. For example it may be best to put more emphasis on the reconstruction loss $\mathcal{L}_0$ during the first stages of training and then shift emphasis to other properties to refine the model during the latter stages. In addition to these considerations [5] observed that for physics informed neural networks fixed weights tended to induce training instability as the multiple objectives compete with one another. 
-
-To mitigate these issues in this investigation we employ a multi-objective optimizer (MOO). After each training epoch a MOO reviews the progress for each loss component $\mathcal{L}_i$ and updates the weights $\beta_i$. A schematic is shown below:
-
-![Schematic of one training epoch ](assets/img/2023-11-11-physics-loss/fig3.png)
-
-​	*Fig3: One epoch of training with adaptive loss using ReLoBRaLo MOO. At the end of batched training iterations the MOO updates $\{\beta_i\}$ according to the progress of each individual loss component. The Adam training optimizer learning rate is fixed at $10^{-5}$ for the entire investigation.*
-
-
-
-In particular we use the Relative Loss Balancing with Random Lookback (ReLoBRaLo) scheme from [5] for the MOO. The scheme adaptively updates the loss weights at the end of each epoch according to the progress of each individual loss component:
-
-$$\begin{align*}
-\beta_i^{bal}(t) &= m\cdot 
-\frac {\exp\left(\frac{\mathcal{L}_i(t)}{\mathcal{T}\mathcal{L}_i(t-1)}\right)} {\sum_{j=1}^m \exp\left(\frac{\mathcal{L}_j(t)}{\mathcal{T}\mathcal{L}_j(t-1)}\right)},\;i\in\{1,...,m\}\\
-\beta_i(t) &= \alpha\beta_i(t-1) + (1-\alpha)\beta_i^{bal}(t)
-\end{align*}$$
-
-There are many more details in [5], but essentially the $\beta_i^{bal}(t)$ term measures the progress of the loss $\mathcal{L}_i$ since the previous epoch relative to the progress made by other losses.  The more a particular loss is struggling the more we increment its weight for the next epoch. The $\alpha$ hyper-parameter indicates bias towards the existing weight values. When $\alpha=1$ no updates are made. The temperature hyper-parameter $\mathcal{T}$ indicates the the level of equality across loss components. As $\mathcal{T} \to 0$ only the most struggling loss component receives a weight update. When $\mathcal{T}\to \infty$ all components receive an equal weight update. Note that we initialize by $\beta_0(0)=1$ and $\beta_i(0)=0$ for $i>0$.
-
-## Results
-
-### Two Objective Loss
-
-We tried training on a variety of two-objective loss functions of the form 
-
-$\mathcal{L} = \beta_0||\bold{u_H}-f(\bold{u_L})||_2 + \beta_1 ||p_1(\bold{u_H})-p_1(f(\bold{u_L}))||_2$ 
-
-where the $p_1$ objective was taken to be Fourier transform $\mathcal{F}$, spatial derivative $\frac{d}{dx}$, standard deviation $\sigma(\cdot)$, mean $\mathbb{E}_x(\cdot)$, absolute value$|\cdot| $, or functional compositions of the aforementioned. Compared to training on the standard single objective reconstruction loss $\mathcal{L}= \mathcal{L}_0 = \beta_0||\bold{u_H}-f(\bold{u_L})||_2$ , only the two-objective loss with Fourier transform loss gave significant improvements in training performance. Training with $\mathbb{E}_x$ gave marginal improvements. All other properties gave fairly neutral or negative results. Composing the Fourier transform with other properties was detrimental. The following table summarizes the training ($\alpha =0.9,\; \mathcal{T}=1$):
-
-
-
-​	*Table 1: Training performance for two-objective loss functions. All runs were performed with $\alpha =0.9,\; \mathcal{T}=1$*. The rightmost column show the percent improvement from the single objective training. The poor performance of $\mathcal{F}\circ\frac{d}{dx}$ might be due to high frequency noise being amplified by the derivative operator before being passed through the Fourier transform.
-
-|       $\boldsymbol{p_1}$       | $\boldsymbol{\mathcal{L_0}(\text{epoch = }200)}$ | % Improvement over Single Objective |
-| :----------------------------: | :----------------------------------------------: | :---------------------------------: |
-|    None (single objective)     |                     0.01895                      |                 0 %                 |
-|         $\mathcal{F}$          |                     0.01366                      |                29 %                 |
-|         $\frac{d}{dx}$         |                     0.01993                      |                5.3 %                |
-|        $\sigma(\cdot)$         |                     0.02437                      |                -29 %                |
-|         $\mathbb{E}_x$         |                     0.01771                      |                6.7 %                |
-|           $|\cdot|$            |                     0.01745                      |                8.1%                 |
-| $\mathcal{F}\circ\frac{d}{dx}$ |                     0.17174                      |                -830%                |
-
-
-
-Figures 4 provides a more detailed look at the training for $p_1=\mathcal{F}$. There is considerable variation in the rate of learning due to altering the $\alpha$ hyper-parameter. The bottom panel of figure 4 gives an example of a reconstructed signal. With enough training the network is able to learn the inherent structure in the data and reconstruct the high frequencies. 
-
-![Fourier loss two objective training](assets/img/2023-11-11-physics-loss/fig4.png)
-
-![Reconstructed data by two-objective training](assets/img/2023-11-11-physics-loss/fig4b.png)
-
-​	*Fig 4: Top panel: Two objective training with Fourier loss for $\mathcal{T}=1$. The results for setting $\mathcal{T}=0.01,100$ are very similar so they are omitted for brevity. The two objective training (reconstruction + Fourier) outperforms the single objective training for every value of $\alpha$. The optimal value of $\alpha$ is close to $0.999$.* Bottom panel: example of reconstructed validation data. The model is able to recover the high frequency components from the original high resolution signal. 
-
-![beta evolution](assets/img/2023-11-11-physics-loss/fig5a.png)
-
-![fig5b](assets/img/2023-11-11-physics-loss/fig5b.png)
-
-​	*Fig 5: Reconstruction and Fourier objective $\{\beta_i\}$ evolution for $\alpha=0.9,0.999$. The smaller $\alpha$ the faster the loss weights converge to 1.* 
-
-The two objective training curves in figure 4 are significantly better than the single objective curve. There is a particular value of $\alpha$ (~0.999) that gives the best overall result. Figure 5 demonstrates how the loss weights adapt over the course of training as the ReLoBRaLo MOO tries to balance the improvements in each loss component. For $\alpha=0.9$ the MOO rapidly increases $\beta_1$ in order to put more weight on the lagging Fourier loss. When $\alpha=0.999$ the increase is a lot more gradual. In the limit as $\alpha\to1$ we just have single objective optimization.
-
-Figure 6 shows a similar weight evolution when the second objective is 'bad',  $p_1=\sigma(\cdot)$:
-
-![beta evolution for standard deviation](assets/img/2023-11-11-physics-loss/fig6.png)
-
-​	*Fig 6: Reconstruction and $\sigma(\cdot)$ objective $\{\beta_i\}$ evolutions. There is evidence of instability at the start of training.*
-
-In contrast to the reconstruction and Fourier two-objective training, the reconstruction and $\sigma(\cdot)$ weight evolutions show signs of instability. At around $15$ epochs $\beta_0$ experiences a bump. This is mostly likely the MOO responding to degrading progress on the reconstruction objective due to the two objectives competing with each other. Therefore for optimal multi-objective training it seems preferable that all loss components smoothly decrease without cross interreference. 
-
- 
-
-### Multi Objective Loss
-
-We also study a multi-objective loss created by combining the most successful objectives from the previous study.
-
-$$\begin{aligned}
-	p_1&=\mathcal{F}\\
-	p_2&=|\cdot|\\
-	p_3&=\mathbb{E}_x\\
-	p_4&=\frac{d}{dx}\\
-\end{aligned}$$
-
-The results closely mimic the two objective Fourier loss so we omit further details. Interestingly, even when we introduce a 'bad' objective such as $\sigma(\cdot)$ or $\mathcal{F}\circ\frac{d}{dx}$into the multi-objective loss it doesn't appear to spoil the result despite causing a minor instability (see figure 6). These results suggest that it may be possible to just 'throw in' many auxiliary objectives in the hopes that one of them improves training. We might not necessarily need to worry about bad objectives spoiling the bunch. Or it could just be that in this particular case that the Fourier objective $\mathcal{F}$ is strong enough to overcome the bad objectives. This needs more investigation. 
-
-
-
-## Conclusion
-
-This investigation showed that multi-objective loss functions can be useful even when only one objective is ultimately of interest. Most likely due to the manner in which the data set was constructed, the Fourier objective turned out to be a great training aid (Note that we did try single objective training with the Fourier objective replacing the reconstruction objective. This did not yield as good results suggesting that there is something inherently beneficial about multi-objective training as opposed to just changing basis). 
-
-The other objectives did not do nearly as well and some even degraded the training by causing instabilities. The ReLoBRaLo MOO was a critical component of training. None of the aforementioned results would have been possible with fixed weights. It was critical to fine tune the $\alpha$ parameter which determines how aggressively the MOO does updates. Presumably, an overly aggressive MOO doesn't give the network time to settle in the early stages of training but an overly passive MOO hardly makes any difference at all.
-
-While good, ultimately the ReLoBRaLo scheme was designed for traditional MOO problems (such as solving partial differential equations) and is most likely far from optimal under the unique settings of this investigation. In addition, the objectives in this study were chosen quite arbitrarily. The Fourier objective was an easy one to discover due to the low-pass nature of super-resolution reconstruction and the manufactured dataset. For a more general problem where we might want to introduce auxiliary objectives it will be very difficult a-priori to identify high performance auxiliary objectives. An interesting future investigation could be to design a neural network that adaptively updates the auxiliary objectives after each epoch with the goal accelerating the main network's learning curve.   
-
-## References
-
-[1] Bode, M., Gauding, M., Lian, Z., Denker, D., Davidovic, M., Kleinheinz, K., Jitsev, J. and Pitsch, H. Using physics-informed enhanced super-resolution generative adversarial networks for subfilter modeling in turbulent reactive flows. *Proceedings of the Combustion Institute*, 2021. 
-
-[2] Fukami, K., Fukagata, K. and Taira, K. Super-resolution reconstruction of turbulent flows with machine learning. *Journal of Fluid Mechanics*, 2019. 
-
-[3] Fukami, K.,Fukagata, K., and Taira, K. Super-Resolution Analysis Via Machine Learning: A Survey For Fluid Flows. [Unpublished manuscript], 2023. 
-
-[4] Wang, C., Li, S., He, D. and Wang, L. Is L2 Physics-Informed Loss Always Suitable for Training
-Physics-Informed Neural Network?. *Conference on Neural Information Processing Systems*, 2022.
-
-[5] Bischof, R., and Kraus, M. Multi-Objective Loss Balancing for Physics-Informed DeepLearning. [Unpublished manuscript], 2022.
diff --git a/_posts/2023-11-11-unraveling-social-reasoning-in-llms.md b/_posts/2023-11-11-unraveling-social-reasoning-in-llms.md
deleted file mode 100644
index cbec2aa7..00000000
--- a/_posts/2023-11-11-unraveling-social-reasoning-in-llms.md
+++ /dev/null
@@ -1,402 +0,0 @@
----
-layout: distill
-title: 'Unraveling Social Reasoning in LLMs: A Deep Dive into the Social IQA Benchmark'
-description: 'In this study, we investigate the challenge of social commonsense reasoning in large language models (LLMs), aiming to understand and categorize common errors LLMs make in social commonsense reasoning tasks.'
-
-
-date: 2023-11-11
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Nina Lei
-    # url: "https://en.wikipedia.org/wiki/Albert_Einstein"
-    affiliations:
-      name: Harvard College
-  - name: Andrew Zhao
-    # url: "https://en.wikipedia.org/wiki/Boris_Podolsky"
-    affiliations:
-      name: Harvard College
-  # - name: Nathan Rosen
-  #   url: "https://en.wikipedia.org/wiki/Nathan_Rosen"
-  #   affiliations:
-  #     name: IAS, Princeton
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-11-unraveling-social-reasoning-in-llms.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Background and Related Works
-    subsections:
-    - name: LLMs and Reasoning
-    - name: Comprehensive Overview of Benchmarks
-    - name: Problems With Social IQA
-    - name: Prior Error Analysis 
-    - name: Conducting Systematic Error Analysis
-    - name: Improving Social Reasoning
-  - name: Methodology
-    subsections:
-    - name: 'Step 1: Applying Flan-T5 to Social IQA'
-    - name: 'Step 2: Qualitative Coding of 350 Errors'
-    - name: 'Step 3: Quantitative Error Analysis'
-    - name: 'Step 4: Addressing Type 1 Errors'
-    - name: 'Step 5: Addressing Type 2 Errors'
-  - name: Analysis and Evaluations
-    subsections:
-    - name: 'General Accuracy of Flan-T5'
-    - name: 'Qualitative Coding'
-    - name: 'Type 1 Errors'
-    - name: 'Type 2 Errors'
-    - name: 'Distribution of Tags'
-    - name: 'Analysis of Question Types'
-    - name: 'Attempt to Prune Social IQA'
-  - name: Discussion
-  - name: Main Limitations
-  - name: References
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-
-# Unraveling Social Reasoning in LLMs: A Decision Tree Framework for Error Categorization
-
-## Introduction
-
-Social commonsense reasoning is a skill most people acquire within the first few years of life, often without formal education. Consider this example of a social commonsense reasoning question:
-
-Q: Kai was frantically running to a gate at the airport. Why was Kai running?
-
-A) They were trying to catch a flight that departs soon
-
-B) They were training for a marathon
-
-C) They were testing out their new running shoe
-
-Most would likely infer that Kai was rushing to catch a flight that would depart soon and choose A, the correct answer. Social commonsense reasoning, at its core, entails reasoning about the past, current, and future states of others.
-
-Despite advancements in Large Language Models (LLMs), prompting models to achieve near-human levels of performance in different tasks across various domains, they have traditionally struggled with social commonsense reasoning tasks, often underperforming humans. Though, this isn't surprising to most observers <d-cite key="sap_neural_2023"></d-cite>. 
-
-To better understand why, previous studies have created benchmarks for social commonsense reasoning <d-cite key="huang_towards_2023"></d-cite>; benchmarked how different models perform on various social commonsense reasoning benchmarks; and/or provided classifications for different question types testing social commonsense reasoning <d-cite key="huang_towards_2023"></d-cite>. Certain question categories, like those directly related to social norms, are documented to exhibit higher error rates, suggesting that some aspects of social reasoning are more challenging for models to learn <d-cite key="li_systematic_2022"></d-cite>. However, these works used older models that were not specifically designed for reasoning tasks.
-
-Specifically, our blog investigates the question, **What are underlying themes in social errors that large language models make?** From both a qualitative and quantitative perspective. The goal of our findings is to help discover if there are methods that could potentially address these errors.
-
-To answer this question, we ran Flan-T5 on the Social IQA benchmark, which was introduced in 2019 and features 38,000 multiple-choice questions (MCQs) designed to gauge "emotional and social intelligence in everyday situations" <d-cite key="sap_socialiqa_2019"></d-cite>. After qualitatively labeling 350 model errors, we made a striking discovery: over half of the questions Flan-T5 got wrong were due to problematic questions in the Social IQA dataset.
-
-Upon making this curious realization, we pivoted our project from designing a decision tree abstraction for providing detailed categorization of social commonsense questions to analyzing and addressing the two types of errors:
-
-Type 1: Errors stemming from the flawed construction of the Social IQA dataset
-
-Type 2: Errors where Flan-T5's choices don't align with social commonsense.
-
-In the first error group, even reasonable humans, including this blog post’s authors, disagreed with Social IQA's "correct" answers. Questions in this first group have nonsensical contexts/questions, lack a single reasonable answer, or have many reasonable answers.
-
-When examining questions in the second error group, we noticed that Flan-T5 often over-infers underlying reasons when a more straightforward answer exists. To address this group of errors, we visualized T5's attention mechanisms when processing such questions.
-
-## **Background and Related Works**
-
-**LLMs and Reasoning**
-
-Language models like GPT-4 have captured widespread media attention, given their question-answering capabilities.
-
-Throughout the development and testing of LLMs, various tasks have been developed to empirically assess these models' abilities and limitations. In literature, these tasks are typically categorized into two main groups: natural language understanding (NLU) and natural language generation (NLG). NLU tasks evaluate a language model's ability to understand natural language. This includes tasks like Natural Language Inference, Reading Comprehension, and various reasoning tasks, including social commonsense reasoning <d-cite key="naveed_comprehensive_2023"></d-cite>. 
-
-### **Comprehensive Overview of Social Commonsense Reasoning Benchmarks**
-
-Over 100 large-scale benchmarks have been proposed to assess and compare models' social commonsense reasoning abilities and to serve as resources for transfer learning <d-cite key="davis_benchmarks_2023"></d-cite>.  In general, these benchmarks aim to evaluate models' abilities to infer the mental states, beliefs, and intentions of others. Their development was inspired by the Theory of Mind (ToM), a concept originating from childhood development psychology—some benchmark questions are, in fact, explicitly based on ToM tasks used to assess children's social reasoning <d-cite key="nematzadeh_evaluating_2018"></d-cite>. 
-
-Notable benchmarks frequently mentioned in the literature include multiple-choice Question Answering (QA) benchmarks like the 2019 Social IQA <d-cite key="sap_socialiqa_2019"></d-cite> and 2019 HellaSwag <d-cite key="zellers_hellaswag_2019"></d-cite>, along with generative evaluation benchmarks like 2020 ProtoQA <d-cite key="boratko_protoqa_2020"></d-cite>. Many of these benchmarks are based on structured knowledge banks about everyday commonsense reasoning like ATOMIC <d-cite key="sap_atomic_2019"></d-cite> and COMET <d-cite key="bosselut_comet_2019"></d-cite>.
-
-Similar to that of other studies evaluating LLMs' commonsense knowledge, we use an MCQ benchmark and not a generative one because they are more simple and reliable for evaluation <d-cite key="li_systematic_2022"></d-cite>; under tight time and resource constraints, we err on the side of a simple and reliable evaluation method, though, in future works, we would like to generalize our findings to more datasets.
-
-However, despite their widespread use, benchmarking datasets like Social IQA are not without flaws. Previous studies have shown that many aspects of common sense are still untested by these benchmarks, indicating an ongoing need for reliable methods to evaluate social commonsense reasoning <d-cite key="davis_commonsense_2015"></d-cite>
-
-### **Problems With Social IQA**
-
-Social IQA focuses on evaluating models' abilities to reason about others' mental states, aligning with Theory of Mind concepts <d-cite key="gandhi_understanding_2023"></d-cite>. Each question in Social IQA comprises a context, a question, and three answer choices; for each of the 37,588 multiple-choice questions, the context, question, correct answer choice, and two incorrect answer choices were gathered through three phases of crowdsourcing on Amazon Mechanical Turk (MTurk) <d-cite key="sap_socialiqa_2019"></d-cite>.
-
-In the first two phase, MTurk crowdsource workers sourced context sentences and questions using the ATOMIC knowledge base <d-cite key="sap_atomic_2019"></d-cite>.  In the third phase, MTurk workers generated correct answers for the given context-question pairs. Incorrect choices for each question were derived from correct answers to related questions <d-cite key="sap_socialiqa_2019"></d-cite>. In our Discussion section, we will discuss issues with this methodology for sourcing negative answers.
-
-Many critiques have been raised about the reliance on crowdsourcing for benchmarks, specifically, about the challenges in obtaining high-quality material <d-cite key="davis_commonsense_2015"></d-cite>. Given the low pay on MTurk, workers often prioritize quantity over quality, leading to errors. There have even been reported instances of crowdsourcing workers using bots to help them complete more tasks.
-
-### **Prior Error Analysis Work Using Social IQA Dataset**
-
-The authors of Social IQA conducted a preliminary error analysis of their dataset, finding that language models found questions about context pre-conditions, such as motivations and prior actions, to be much easier than those about stative attributes or predicting future actions. Interpreting these results, the authors hypothesized that models might be learning lexical associations rather than true meaning <d-cite key="sap_socialiqa_2019"></d-cite>. This aligns with findings presented in other works <d-cite key="zellers_hellaswag_2019"></d-cite>.
-
-Other research, such as Wang et al.'s <d-cite key="wang_semantic_2021"></d-cite>, categorize Social IQA questions into four types: 1) Feelings and Characteristics, 2) Interaction, 3) Daily Events, and 4) Knowledge, Norm, and Rules. They found the final category to be the most challenging for models.
-
-### **General Methodology for Conducting Systematic Error Analysis for QA**
-
-Our research, aimed at identifying themes in social errors made by LLMs, draws inspiration from conventional methodologies for system error analysis in QA tasks. Moldovan et al.'s data-driven approach to QA error analysis, focusing on answer accuracy based on question stems, reveals that certain question types are more challenging for LLMs <d-cite key="moldovan_performance_2003"></d-cite>. Rondeau et al. used feature extraction for systematic error analysis in the Stanford Question Answering Dataset (SQuAD), employing feature extractors and classifiers <d-cite key="rondeau_systematic_2018"></d-cite>.
-
-### **Existing Approaches to Improve Social Commonsense Reasoning**
-
-Our research also explores existing literature offering solutions for mitigating errors in social commonsense reasoning. Some of these works suggest incorporating external structured data, such as knowledge graphs, into models. For example, Chang et al. showed that integrating knowledge graphs like ConceptNet improves performance on Social IQA <d-cite key="chang_incorporating_2020"></d-cite>.
-
-However, despite confirming the effectiveness of this approach, studies like Mitra et al. also noted instances where models, even with access to relevant information that can directly lead to the correct answer, predicted incorrect answers based on irrelevant knowledge <d-cite key="mitra_how_2020"></d-cite>.
-
-
-## **Methodology**
-
-### **Step 1: Applying Flan-T5 to Social IQA**
-
-We first prompted Flan-T5, known for its promising reasoning task performance <d-cite key="chung_scaling_2022"></d-cite>, to answer 33,411 MCQs from the Social IQA dataset. The prompt format, emulating the examples <d-cite key="bosma_introducing_2021"></d-cite> was as follows:
-
-[Context]. 
-
-Based on the context above, choose the best answer to the question:
-
-[Question]
-
-OPTIONS:
-
-(A) [Answer A]
-
-(B) [Answer B]
-
-(C) [Answer C]
-
-For your answer, return exactly one character, either A, B, or C.
-
-### **Step 2: Qualitative Coding of 350 Errors**
-
-Next, we used the following procedure, based on standard iterative qualitative coding methods, to categorize instances where Flan-T5's response differed from the Social IQA dataset's correct answer.
-
-1. Initial Annotation: initially, for a subset of 100 rows, two independent coders annotated each row, noting the reasons for the discrepancy in the correct answer choice between the dataset and Flan-T5.
-   
-2. Theme Identification: the coders reviewed each other's annotations and engaged in discussions to identify major themes in inconsistencies. Based on these discussions, they developed a formal set of tags to apply to the rows.
-   
-3. Tagging: finally, they applied these tags to a total of 350 rows
-
-### **Step 3: Quantitative Error Analysis**
-
-We then analyzed the data to determine the frequency of each error type within our tagged dataset (n=350). We explored potential features, such as specific words, that contributed to the difficulty of the questions.
-
-### **Step 4: Addressing Type 1 Errors - Developing a Pruning Tool**
-
-Our objective here was to develop a tool that could use our tagged question set to accurately identify problematic questions. Unfortunately, this approach did not yield the desired results and needs future work.
-
-### **Step 5: Addressing Type 2 Errors - Analyzing through Attention Mechanism Visualization**
-
-Finally, we shifted our focus to examining errors by visualizing the attention mechanisms of the model. This approach aimed to provide deeper insights into how the model processes and responds to various types of questions, particularly those categorized as Type 2 errors.
-
-
-## **Analysis and Evaluations**
-
-### **General Accuracy of Flan-T5 on Social IQA**
-
-Overall, Flan-T5 exhibits a high accuracy of 90% when presented with MCQs from Social IQA, which could be because it was fine-tuned “on a large set of varied instructions,” similar to the questions we present it <d-cite key="bosma_introducing_2021"></d-cite>.  This accuracy is much higher than BERT, which had a 64.5% accuracy <d-cite key="sap_socialiqa_2019"></d-cite>. 
-
-### **Set of Formal Tags Derived from Qualitative Coding**
-
-
-In the initial annotation phase of qualitative coding, both coders were surprised to find many questions marked "incorrect" because of issues inherent in the Social IQA questions themselves (see below for an example). Therefore, we wanted to characterize why the Social IQA multiple choice questions were problematic: was it a lack of context comprehension, the unreasonableness of all answer options, or the presence of multiple equally reasonable answers?
-
-During the theme identification phase, the coders established two groups of tags:
-
-1. Errors arising from the flawed construction of the Social IQA dataset
-
-2. Errors due to Flan-T5's responses not aligning with social commonsense
-
-### **Type 1 Errors**
-
-For Type 1 errors, six labels were created:
-
-1. Nonsensical Context: When the context sentence is incomprehensible to a reasonable human.
-
-2. Nonsensical or Ambiguous Question: When the question is either nonsensical or too ambiguous.
-
-3. Major Typo: Refers to incomprehensible parts of the Context, Question, or answer choices due to typos.
-
-4. Multiple Reasonable Answers: When several answers appear equally reasonable, either due to similar meanings or general reasonableness. 
-
-5. No Reasonable Answer: When no answer options seem appropriate or reasonable.
-
-6. Incorrectly Labeled "Correct" Answer: When an alternative answer seems more reasonable than the one marked "correct."
-
-**Examples of Type 1 Errors**
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Examples of Questions and Answers
-</div>
-
-### **Type 2 Errors**
-
-For Type 2 errors: we devise the following set of three labels:
-
-1. Overinfers: This tag is for questions where Flan-T5 seems to make a large leap in logic, resulting in it picking an answer choice that makes spurious assumptions when a much more direct and clear answer is available
-
-2. Associated but Incorrect: This is for questions where Flan-T5 picks an answer choice that is associated with the context and question, but is not what the question is specifically asking about. This differs from over-inferring in that this usually entails picking irrelevant answer choices.
-
-3. Flan-T5 Incorrect (unspecified): all other mistakes attributable to Flan-T5.
-
-### **Distribution of Tags**
-
-Looking at our annotated questions, we see that 65% of errors stemmed from the flawed construction of the Social IQA dataset. Meanwhile, 38% of errors were errors stemming from Social IQA not picking the right answer. Observe that it is possible for a question to be tagged with both a Type 1 tag and a Type 2 tag.
-
-For Type 1 errors, we see that having multiple reasonable answers is by far the most common reason why a question is problematic. This was followed by having no reasonable answer, and the answer labeled “correct” not being the best available answer. Indeed, the top three reasons why a question is considered problematic all stem from questionable answer choices. This highlights how the construction of the answer choices, and thus Social IQA as a benchmark set, is problematic.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors.jpeg" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Histogram of the Types of Errors in SocialIQA
-</div>
-
-Next, we examine the distribution of Type 2 error tags. We see that the most common reason is Flan-T5 over-inferring. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors.jpeg" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Histogram of the Types of Errors FlanT5 Makes
-</div>
-
-### **Analysis of Question Types**
-
-In our quantitative analysis, we identified key features contributing to lower accuracy in certain questions. Notably, questions containing the word 'others' scored lower in accuracy, with an average of 0.880, compared to the general accuracy score of 0.990. Furthermore, questions featuring repeated answer choices also exhibited a lower accuracy score of 0.818.
-
-### **Attempt to Prune Social IQA**
-
-Assessing models on social commonsense reasoning questions requires clear comprehension and consensus on the appropriateness of the questions and their answer choices. Our goal was to create a tool to classify the sensibility of these questions and answers. To achieve this, we experimented with various models, including Flan-T5 and GPT-4, asking them to evaluate the coherence of the questions. Unfortunately, the results were inconsistent, often varying with each regeneration of the response. Despite these challenges, we maintain that addressing this issue remains crucial.
-
-### **Visualization of Attention Mechanism**
-
-In our analysis of Type 2 errors, we focused on how the errors happen because Flan-T5 overinfers the underlying reasons not explicitly stated in the question instead of picking the more straightforward and correct answer, or picks some answer associated with the words in the context that isn’t directly related to the question.
-
-In addition to providing qualitative analysis, we set out to provide some quantitative analysis to better understand why this was happening. Consider these linked notebooks, which visualize the cross attention and the encoder attention for [one correctly labeled example](https://colab.research.google.com/drive/1M4XhFORT5KJgFaaVnZM3g5QS8JPV7Apq#scrollTo=JHkd_dwR2AiZ) and [one incorrectly labeled example](https://colab.research.google.com/drive/1VtU2kvJA5EKL4AeuaW-N0RwiBimb96MB#scrollTo=JHkd_dwR2AiZ), where Flan-T5 chooses an associated but incorrect answer. (Note that the specific images were chosen for brightness in the heatmaps, since the attention was normalized. Please reference the notebook.). 
-
-To visualize cross-attention, we looked at the cross-attention between the answer Flan-T5 generates and the encodings, across each layer and attention head in Flan-T5, grouping in both orders. To visualize the encoder attention, we looked at the average attention for each layer in the input encoding, and for the layer that saw the most drastic change (layer 2, starting from 0 index), we visualized the attention for each attention head.
-
-Now, consider the context and question: 
-
-Cameron had a big paper due later in the week, so Cameron put pen to paper. What will Cameron want to do next?
-
-A) research his topic
-
-B) write an outline
-
-C) redo his topic
-
-Flan-T5 answers A), while the correct answer is “write an outline.” Notably, Flan-T5 doesn’t choose the third answer, “redo his topic.” 
-
-Therefore, we can see that Flan-T5’s is associated with the topic, but isn’t the correct answer, which is given by the phrase “put pen to paper.” Visualizing the average encoder attention and cross attention, we see that the contextualized embeddings and generation primarily focus on the words “big paper” and the question, but don’t pay much attention to the word “pen.” 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Cross Attention for Associated But Incorrect Answer
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Encoder Attention for Associated But Incorrect Answer
-</div>
-
-
-Generalizing our results a bit, we find that FLAN only pays reasonable attention (normalized attention > 0.05) to the keywords for 14 out of 26 examples tagged under “associated,” even for simpler questions such as
-
-On the other hand, consider the question, 
-
-Sydney played basketball with her friends after school on a sunny afternoon. What does Sydney need to do before this?
-
-A) take a nap before this
-
-B) have a basketball before this
-
-C) go home before this
-
-Flan-T5 correctly answers “have a basketball before this,” not choosing “take a nap before this” or “go home before this.”
-
-Indeed, we see the four vertical lines in the encoder and cross attentions that correspond to key phrases in the sentence. For the questions that Flan-T5 gets correct, it pays attention to the right keywords 9 out of 10 times. Lastly, note that for questions labeled “overinfer,” Flan-T5 pays attention to the right keywords 8 out of 10 times.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Cross Attention for Correct Answer
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Encoder Attention for Correct Answer
-</div>
-
-Therefore, for more straightforward questions, namely, questions that have one straightforward answer, Flan-T5 can find the right keywords that lead it to the answer (i.e. the correct questions). On the other hand, for more challenging questions that require paying attention to specific keywords and reasoning from the perspective of a character (recall the Sally-Anne Test), Flan-T5 struggles more, with more variance between what it pays attention to and doesn’t (e.g. paper but not pen). 
-
-In addition, since Flan-T5 pays attention to the right keywords most of the time for the questions it overinfers on, this suggests that there’s some aspect of reasoning that’s not being captured via our attention visualizations, and that this reasoning isn’t performing that well. 
-
-Notably, something interesting to note is that for all of the examples, by the third encoder layer, on average, Flan-T5 doesn’t change its encodings, and for the cross attention, the attention remains consistent across all layers and (most) attention heads. Therefore, it seems like most of the “reasoning” is being performed in the encoding stage.
-
-Therefore, some of our next steps are understanding how removing attention heads in a smaller affects the model’s ability to reason, given the large number of heads and layers (24 x 32) in Flan-T5-xxl . We visualized each encoder head for one layer, but this doesn’t immediately lend itself to an intuitive interpretation. 
-
-## **Discussion**
-
-Our work concentrated on analyzing two categories of errors and proposing solutions to address them. The two error types are:
-
-1. Errors originating from the flawed construction of the Social IQA dataset.
-
-2. Errors where Flan-T5's responses do not align with social commonsense.
-
-### **Problems with Social IQA**
-Our analysis of Type 1 errors in the
- Social IQA dataset revealed significant issues. In examining n=350 incorrectly answered questions, we found that 65% had problems with their context, question, or answer choices. Additionally, 54.4% of these errors had multiple reasonable answers, 23.7% lacked any reasonable answer, and 14.0% seemed to have mislabeled correct answers. This indicates a substantial number of misleading answer choices in the Social IQA questions.
-
-This issue partly stems from the dataset's construction, which involved assigning crowdsourced workers tasks of writing positive answers for each question and sourcing negative answers from "different but related" questions. This approach likely contributed to the high error rate.
-
-Since Social IQA is so frequently used in evaluating model performances and transfer learning tasks, the challenge is to identify and remove these flawed questions. Although our attempt to do this was unsuccessful due to time and budget constraints, we believe it is feasible. Many evaluations of large language models (LLMs) use crowdsourced multiple-choice questions, so a pruning tool to ensure benchmark reliability would be highly beneficial beyond the task of social commonsense reasoning.
-
-Pruning the Social IQA dataset to eliminate most erroneous questions would also provide an opportunity to reassess older models.
-
-Overall, our analysis of Type 1 errors underscores the need for caution in crowdsourcing benchmark questions. While crowdsourcing likely still remains the best solution for creating large benchmark sets, a pruning tool is essential to maintain the reliability of such datasets.
-
-On the other hand, our analysis of Type 2 errors suggests that LLMs still might not match the social reasoning skills of humans for more complex scenarios. For simpler questions, they can often find a single keyword that informs their answer, while for more complex questions, they often miss important phrases and can’t necessarily think from another person’s perspective. For instance, recall how questions containing the keyword “other” result in Flan-T5 having considerably lower accuracy. 
-
-## **Main Limitations**
-
-The primary limitations of our study are rooted in its scope and methodology. Firstly, we focused exclusively on a single model, Flan-T5, which may limit the generalizability of our findings. Additionally, our analysis was based on a relatively small sample size of n=350, and it involved only two coders. For a more robust and comprehensive evaluation, increasing the number of coders would be beneficial, particularly to assess intercoder reliability. Furthermore, implementing measures to mitigate recognition bias during the tagging process would enhance the validity of our results.
diff --git a/_posts/2023-11-16-graph-articulated-objects.md b/_posts/2023-11-16-graph-articulated-objects.md
deleted file mode 100644
index 2385c029..00000000
--- a/_posts/2023-11-16-graph-articulated-objects.md
+++ /dev/null
@@ -1,232 +0,0 @@
----
-layout: distill
-
-description: Pre-trained large vision-language models (VLMs), such as GPT4-Vision, uniquely encode relationships and contextual information learned about the world through copious amounts of real-world text and image information. Within the context of robotics, the recent explosion of advancements in deep learning have enabled innovation on all fronts when solving the problem of generalized embodied intelligence. Teaching a robot to perform any real-world task requires it to perceive its environment accurately, plan the steps to execute the task at hand, and accurately control the robot to perform the given task. This project explores the use of vision-language models to generate domain descriptions. These can be used for task planning, closing the gap between raw images and semantic understanding of interactions possible within an environment.
-
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Anirudh Valiveru, Ram Goel
-    url: "https://anirudhv27.github.io/"
-    affiliations:
-      name: CSAIL, MIT
-
-bibliography: 2023-11-16-project-proposal.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Project Background
-  - name: Related Work
-  - name: Experiments and Findings
-  - name: Future Work and Implications
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Project Background
-
-Recent advancements in generative AI have transformed robotic capabilities across all parts of the stack, whether in control, planning, or perception. As self-driving cars roll out to public roads and factory assembly-line robots become more and more generalizable, embodied intelligence is transforming the way that humans interact with each other and automate their daily tasks.
-
-Across the robotic manipulation stack, we are most interested in exploring the problem of scene representation; using the limited sensors available, how might a robot build a representation of its environment that will allow it to perform a wide range of general tasks with ease? While developments in inverse graphics like NeRF have given robots access to increasingly rich geometric representations, recent work in language modeling has allowed robots to leverage more semantic scene understanding to plan for tasks.
-
-### Introduction to Task Planning
-
-In robotics, the term **task planning** is used to describe the process of using scene understanding to break a *goal* down into a sequence of individual *actions*. This is in contrast with *motion planning*, which describes the problem of breaking a desired *movement* into individual configurations that satisfy some constraints (such as collision constraints). While simply using motion planning to specify a task is necessary for any generalized robotic system, *task planning* provides robots with a *high-level* abstraction that enables them to accomplish multi-step tasks. 
-
-Take the problem of brushing one's teeth in the morning. As humans, we might describe the steps necessary as follows:
-
-1. Walk to the sink.
-2. Grab the toothbrush and toothpaste tube.
-3. Open the toothpaste tube.
-4. Squeeze toothpaste onto brush.
-5. Brush teeth.
-6. Rinse mouth.
-7. Clean toothbrush.
-8. Put everything back.
-
-### Planning Domain Definition Language (PDDL) Explained
-
-Creating a task plan is a trivial task for humans. However, a computer must use a state-space search algorithm like *A\* search* to plan a sequence of interactions from a *start state* to a desired *goal state*. Doing so requires us to define a standard that formally specifies all relevant *environment states*, along with the *preconditions* and *effects* of all possible transitions between two states.
-
-The Planning Domain Definition Language (PDDL) was invented to solve this problem. Description languages like PDDL allow us to define the space of all possible environment states using the states of all entities that make up the environment. Environments are defined as a task-agnostic *domain file*, while the *problem file* defines a specific task by specifying a desired *start* and *end* state.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/pddl-explained.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 1: This figure above breaks down a simple PDDL domain file into its constituent components. A PDDL domain generally consists of predicates, which describe the state, and actions, which specify state transitions.
-</div>
-
-Despite task planning's utility, however, there is one major drawback; this approach to planning requires the robot to have a *detailed PDDL domain file* that accurately represents its environment. Generating this file from perception requires not only a semantic understanding of all objects in a space, but also of all possible interactions between these objects, as well as all interactions that the robot is afforded within the environment. Clearly, there is a major gap between the task-planning literature and the realities of upstream perception capabilities.
-
-## Related Work
-
-The use of LLMs in robotic planning and reasoning has exploded in the past few years, due to the promise of leveraging a language model's internal world understanding to provide more information for planning. One such work is LLM+P<d-cite key="liu2023llm+"></d-cite>, which combines an LLM with a classical planner to solve a given problem specified in natural language, using PDDL as an intermediate representation. LLM+P works by converting the description into a PDDL problem representation, running a classical planning algorithm to find a solution, and then computing the sequence of actions back into a natural language description interpretable by humans. Importantly, LLM+P demonstrates that using an LLM to output a PDDL representation can be a viable strategy in solving planning problems that are specified to a robot. However, there are a few limitations. For one, LLM+P assumes that a relevant domain file is already provided to the robot, specifying all entities and their relationships within the environment's context. While domain files are generally carefully crafted by hand, vision-language models can automate this process.
-
-LLMs have also been used to solve plans directly, to varying levels of success. Works like SayCan<d-cite key="ahn2022can"></d-cite> and LLM-Planner<d-cite key="song2023llm"></d-cite> use the LLM as a planning engine directly, circumventing the need to use a traditional high-level planner completely. SayCan, in particular, uses a combination of language-grounded instructions and task affordances that indicate the robot's ability to execute a given task, using language to determine the most viable skill to execute from a set of predefined skills. These bodies of work have greatly enabled the ability of robots to parse, understand, and execute instructions given to them by their operators as natural language. Particularly, an LLM's ability to break a problem down into several constituent steps is critical to enabling long-horizon task planning with multiple steps.
-
-Language is an increasingly promising modality for robots to operate in, due to the ubiquity of relevant language data to learn real-world entity relations from the internet. However, foundation models that integrate vision and robot-action modalities enable even stronger semantic reasoning. Google's Robot Transformer 2 (RT-2)<d-cite key="brohan2023rt"></d-cite>, for example, is a recent work that performs perception, planning, and control all in a single neural network, leveraging internet-scale data. One major drawback of visuomotor policies, such as that employed by RT-2, is that we lose interpretability of a robot's internal representation.
-
-Nonetheless, multi-modal foundation models have proven to be a useful tool across the spectrum of robotic planning. Our project takes inspiration from the above works in LLMs for planning and extends the idea to domain-generation, allowing task-planners to work in real-world scenarios.
-
-The rapid advancement of LLMs and vision-language models open up a world of possibilities in closing this gap, as robotic perception systems may be able to leverage learned world understanding to generate PDDL files of their own to use in downstream planning tasks. This project aims to investigate the question: can VLMs be used to generate accurate PDDL domains?
-
-## Experimental Setup
-
-To investigate this, we decided to explore this problem by testing the capabilities of VLMs on various tasks and levels of prior conditioning. This allows us to explore the problem on two axes: domain complexity and amount of information provided as a prior to the VLM. Each of these axes are chosen to progressively increase the complexity of the domain being explored, while also progressively increasing the amount of *information* available. Designing our experiments like this allows us to understand the importance of *information* and *domain complexity* and how they affect the overall results.
-
-Due to ease of access, we decided to use OpenAI ChatGPT’s *GPT4-Vision* functionality to run our experiments. A more comprehensive ablation may analyze these experiments across a wider range of VLMs.
-
-### Domains of Interest
-Within the context of task planning for generalizable robotics, the problem of cooking in a kitchen setting is a fascinating problem because of the combination of their usefulness and the high dimensionality and discretization of kitchen tasks. As a result, kitchen setups like cooking, cleaning, and cutting ingredients are great ways to understand task-planning, and are the domains that we chose to study in this work.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/image-examples.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 2: A summary of domains that we are interested in studying. The three domains increase in complexity, which allows us to analyze the effect of complexity on the VLM's effectiveness.
-</div>
-
-The three domains used in our study are:
-- **Cut**: Bagel + utensils used for cutting ingredients
-- **Cook**: Everything in Cut + a pan, spatula, and a stove
-- **Clean**: Everything in Clean + a soap bottle, a sink, and a sponge
-
-Our handcrafted “ground-truth” domain files are designed to support the target tasks of *cutting* a bagel, *cooking* a sliced bagel, and *cleaning* utensils, respectively. Ideally a good PDDL file generated is one where these tasks are supported.
-
-### Prompting Strategies.
-We also experimented with four different prompting strategies, with each strategy providing progressively more information to the VLM for its PDDL generation task. All prompts provided to the VLM consist of the target image, along with a text-based prompt meant to guide the VLM towards a more accurate PDDL representation.
-
-The strategies are as follows, along with examples used by our experiment for the cut domain. Text that was added progressively to the prompt is ***bolded***:
-
-1. Raw Generation: Image + generic prompt
-    - *You are a robot that needs to execute task planning in the setup shown in the image. Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.*
-2. Prompt 1 + describe each object in the scene
-    - *You are a robot that needs to execute task planning in the setup shown in the image. **This image includes a bagel, a plate, a bowl, and a knife.** Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.*
-3. Prompt 2 + describe the target task
-    - *You are a robot that needs to execute task planning **to cut the bagel** in the setup shown in the image. This image includes a bagel, a plate, a bowl, and a knife. Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.*
-4. Prompt 3 + explain object relations in detail
-    - *You are a robot that needs to execute task planning to cut the bagel in the setup shown in the image. This image includes a bagel, a plate, a bowl, and a knife. **In order to cut the bagel, one must use the knife and place the bagel and knife on the plate beforehand. I can place the bagel on the plate or the bowl, and cut the bagel using the knife.** Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.*
-
-### Evaluation Metric: Embedding Cosine Similarity
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/evaluation.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 3: Our evaluation process. We used a vector-embedding-space cosine similarity metric to conduct our analysis, using a handcrafted PDDL file as input.
-</div>
-
-Since several different PDDL domains can be used to represent the same set of actions and predicates, the task of generating PDDL files is quite subjective. Since generating PDDL tasks is an often-tedious task that humans must do themselves to represent any given domain, we evaluate each VLM output based on its similarity to real PDDL domains handcrafted manually. After asking the VLM to generate a PDDL file, both the *target* and the *generated* domain descriptions are embedded using the **Longformer: Long Document Transformer**<d-cite key="beltagy2020longformer"></d-cite> model, before finally computing the cosine similarity between the two embeddings. We decided to use **Longformer**, instead a more standard model like *LLAMA 2*<d-cite key="touvron2023llama"></d-cite> or *BERT*<d-cite key="tenney2019bert"></d-cite>, due to the long context-length afforded to us by the model; a more comprehensive study would require studying these effects on a wider range of embedding models to more accurately understand the text-to-text similarity between the two domains. 
-
-Note that this cosine similarity in the embedding space is quite a coarse metric to evaluate our outputs for a couple of reasons. The primary concern with this evaluation approach has to do with the transferability between PDDL files, which are specified in a LISP-like syntax, and natural language documents, which Longformer was trained to embed. In this study, we assume that such an embedding model *can* be used to make such a comparison, and discuss our study accordingly.
-
-Aside from this, PDDL’s structure also provides several keywords that are commonly used by all PDDL files, such as *action*, *predicate*, and *preconditions*. In order to handle these, we decided to simply remove all instances of these words from both the *target* and the *generated* PDDL files, in order to mitigate the effect of the similarity between these tokens.
-
-## Results
-
-After experimenting on a wide range of complex environments with various prompting strategies, it seems that VLMs perform quite well for the task of generating PDDLs from image and text conditioning. We measured the similarity of the ground truth PDDL file with each image and experiment’s generated PDDL file. To quantitatively measure similarity, we used the cosine similarity metric on the embeddings of the masked pieces of text using Longformer<d-cite key="beltagy2020longformer"></d-cite>. We did not use BERT<d-cite key="tenney2019bert"></d-cite> due to the context length not being long enough for our PDDL files. Recall that we masked out certain frequently appearing words in PDDL files so as to not inflate the similarity due to this commonality.
-
-The exact generated PDDL files can be found at [this link](https://github.com/anirudhv27/final-project-dl/tree/main/experiments.)
-
-First, we will qualitatively analyze the generated words in each of the three categories of the PDDL files: types, predicates, and actions. Then, we will also provide quantitative metrics that measure similarity directly with the ground truth PDDL files that we wrote. 
-
-### Types
-
-Types are the first part of PDDL files. They describe the various sorts of objects that appear in the image. For example, in the “cut” image, the generated types are “utensil, plate, food”. Note that the types often compress similar sorts of objects, e.g. both spatulas and knives fall under the type “utensil”. Type generation is somewhat inconsistent, since types are not strictly required by PDDL files to exist, which could contribute towards why certain generated PDDL files do not have a types section at all. 
-
-### Predicates
-
-Predicates in the PDDL files are descriptive phrases that describe distinct parts of the scene, at a given time. For example, in the “cut” image, experiment 4 has the following predicates “(plate-empty), (bowl-empty), (bagel-on-plate), (bagel-on-bowl), (knife-on-plate), (bagel-cut)”. Note that these are not precisely representative of the current state of the image, but rather represent what states could also appear in the future, e.g. “(bagel-cut)”, even though the bagel is not yet cut. The accuracy of the generated predicate set is surprisingly accurate, regardless of which experiment we use. 
-
-It seems that all four experiments generate approximately the same predicate set. For the “cut” image, all of the predicates generally have the objects “bagel”, “knife”, “plate”, etc., and sometimes where they are placed relative to each other. In the later “cook” and “clean” images, there are also predicates conditioning on whether the bowl/plate is clean or not. In particular, the generated predicates for Experiment 1 -- where we do not tell the VLM the task -- also make sense with respect to the inferred task! This evidence suggests that the generated predicates match the planned task, thus implying that the VLM is able to learn the task quite well just based on the image. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/predicate-example.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 4: The correspondence between the inputted image and the generated predicates. Note that the predicates are not descriptive of just the current state, but also are descriptive of the later potential states based on the inferred task.
-</div>
-
-### Actions
-Similar to the predicate generation, the action generation is extremely accurate. The various sequences of predicted actions make sense for the given images and conditioning. For example, one of the generated action sequences from Experiment 1 is:
-
->(:action prepare-sandwich
->    :parameters (?b - food ?p - container)
->    :precondition (and (contains ?p ?b) (is-clean ?p))
->    :effect (and (inside ?b ?p) (not (empty ?p)))
->  )
-
-This is a very detailed sequence of actions, which also makes sense -- in order to prepare a sandwich, the generated PDDL file notices we need the food and the container, and then checks if it is clean and not empty. 
-
-Again, the results from Experiment 1 compared to the later experiments which have more textual conditioning are extremely similar, indicating that most of the information the VLM collects is from the image. Our added conditioning does not seem to improve generation of the action sequences much more. 
-
-### Quantitative Analysis with Cosine Similarity
-
-Along with qualitative analysis of each part of the PDDL file, we also performed a holistic analysis of the entire PDDL file that compares similarity with our handcrafted ground truth PDDL file. We measured the cosine similarity between the two PDDL files, for each experiment in each image. Due to the general format of PDDL files, certain words appear at the same places many times. Hence, we masked these words out, in order to not inflate the similarity in a superficial manner.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/cosine-similarities.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Table 1: Cosine similarities using the Longformer embedding model.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/bar-chart.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 5: Bar chart detailing cosine similarity in all tested scenarios. While there are clear trends between prompting strategies, all cosine similarity scores tend to hover around 98%.
-</div>
-
-As we can see, our methods performed quite well, with masked cosine similarity consistently above 0.98. This makes sense qualitatively as well, since as discussed above, the VLM generated types, predicates, and actions that made sense.
-
-One of the most noteworthy aspects of the above data is that according to this metric:
-
-- Experiments 1-3 all perform similarly, with some doing better than others in different images.
-- Experiment 4 consistently performs worse than Experiments 1-3. 
-
-This is surprising, since we would expect that more conditioning implies better performance. In Experiment 4, we added certain conditioning of the form of textual relationship between objects in the image. This result leads us to the conclusion that adding this sort of conditioning is not helpful for PDDL file generation, and is in fact negatively correlated with performance. Previous analysis has implied that the VLM learns extremely well from the image alone, and this result suggests that in fact it is better to let the VLM learn only from the image, without adding too much of our own conditioning.
-
-
-## Conclusion: Limitations and Future Work
-
-Our work analyzes the potential of the recent advances in VLMs for the purposes of robotic task planning. By creating a systematic set of experiments over increasingly complex images, we were able to showcase the power of VLMs as a potentially very powerful tool for general task planning problems. The accurate generation of PDDL files based on only the images shows us that VLMs learn from images extremely well, without the need for extra textual conditioning. In fact, we noticed that providing too much conditioning actually can decrease performance, thus further suggesting that VLMs learn best from images. This result is promising for generalizing to the greater context of robotic task planning, since vision is one of the most prominent ways in which robots dynamically task plan when navigating real-world environments. Harnessing the power of VLMs could prove to be the future of robotic task planning.
-
-There are a couple of limitations in our work, which have the potential for future exploration. In order to test the true utility of the generated domain files, we would need to also generate problem PDDL files, after which we could run the problem on the domain to test the robustness of the domain. The qualitative and quantitative metrics in our study heavily imply that our domain file is valid, by testing on ground truth PDDL files. However, a more comprehensive study could also concurrently generate problem files, which are tested on the generated domain file. Perhaps a method could be made which alternatively trains both the problem and domain files by iteratively testing the problem on the domain, similar to the idea of a Conditional Generative Adversarial Network (GAN)<d-cite key="mirza2014conditional"></d-cite>. Another limitation is that we only tested on one VLM architecture. A more comprehensive ablation may analyze these experiments across a wider range of VLMs.
diff --git a/_posts/2023-11-16-speculative-decoding.md b/_posts/2023-11-16-speculative-decoding.md
deleted file mode 100644
index dafb732d..00000000
--- a/_posts/2023-11-16-speculative-decoding.md
+++ /dev/null
@@ -1,255 +0,0 @@
----
-layout: distill
-title: Accelerating large model inference with speculative decoding - 6.s898
-description: An investigation into methods to speed up autoregressive inference through increased parallelization, specifically through speculative sampling and decoding.
-date: 2023-11-16
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Dakota Goldberg
-    url: "/#"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-16-speculative-decoding.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-    subsections:
-      - name: Inference in autoregressive models
-      - name: Speculative execution in processors
-      - name: Applying speculative execution to model inference
-      - name: Hierarchical speculative decoding
-  - name: Current Work
-    subsections:
-      - name: General setup
-      - name: Sampling $p(x)$
-      - name: The Algorithm
-      - name: Evaluation
-  - name: Hierarchical Speculative Decoding
-  - name: Experiments
-    subsections:
-      - name: General set-up for experiments
-      - name: How many orders of magnitude larger should $M_p$ be than $M_q$?
-      - name: Set-up for hierarchical speculative decoding
-  - name: Results
-    subsections:
-      - name: Calculating $c$ for each model pair
-      - name: The general effect of speculative decoding
-      - name: Acceptance rates and wall time given $M_p$ and $M_q$
-      - name: Results of hierarchical speculative decoding
-  - name: Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-### Inference in autoregressive models
-
-Autoregressive models, particularly transformers and RNNs, play a crucial role in tasks involving sequential data processing, such as natural language processing and time series analysis. However, a significant limitation of these models is their slow inference speed. The primary bottleneck in these models is associated with memory reads and writes, rather than arithmetic computations. This is especially problematic in larger models with vast parameter spaces, where efficient memory management is critical to performance. Further, these models generate outputs sequentially, one token at a time, with each new token depending on all previously generated tokens. This inherent sequential dependency limits the model’s ability to parallelize the token generation process, leading to inference latency much greater than that of models capable of processing data in parallel. The challenge is to overcome this sequential bottleneck without compromising the model's ability to accurately capture dependencies in the data.
-
-The central question this project addresses is whether it's possible to introduce parallelism into the inference process of autoregressive models. A more specific aspect of this problem is whether probabilities for multiple tokens can be computed simultaneously, rather than processing each token individually. This project aims to enhance methods that have been proposed for parallelizing the decoding process, focusing on solutions that draw inspiration from speculative execution in processors and other systems design strategies.
-
-### Speculative execution in processors
-
-Speculative execution is a technique used in CPU architecture to improve processing speed. Instead of waiting for sequential execution of instructions, processors predict which instructions are likely to be executed next and start processing them in advance. If the prediction is correct, this leads to a significant reduction in latency, as the processor has preemptively executed necessary instructions. If the prediction is incorrect, the processor discards the speculative results and reverts to the correct execution path. This method effectively utilizes CPU resources that would otherwise remain idle during the waiting period, thus optimizing the overall processing speed and reducing latency.
-
-### Applying speculative execution to model inference
-
-Inspired by speculative execution in processors, this project explores how similar principles can be applied to accelerate inference in large autoregressive models. The concept involves generating multiple potential outputs in parallel, using a smaller or draft model, and then evaluating these outputs with the larger target model. This mimics the speculative execution process where multiple paths are explored simultaneously, with the most promising path being selected as the final output. This approach, referred to as "speculative sampling" or "speculative decoding," aims to introduce a level of parallelism in the inference process, enabling faster generation of outputs without compromising the quality or accuracy of the model’s predictions.
-
-### Hierarchical speculative decoding
-
-In addition to implementing already proposed speculative decoding techniques, this project investigates a strategy that has the potential further speed up inference: hierarchical speculative decoding. This method aims to accelerate the smaller approximation model with an even smaller, faster model. While I experiment with two-layer (traditional) and three-layer hierarchies in this project, one could theoretically extend this idea to create an _n_ layer hierarchy, assuming sufficient memory. Although researchers developing speculative decoding algorithms and sampling methods have mentioned the potential viability of hierarchical speculative decoding, none have tried to implement it. Thus, this project aims to find an efficient implementation of the approach and determine if it actually further speeds up inference.
-
-## Current Work
-
-Multiple papers have presented novel speculative decoding algorithms, with the nuance typically in the way that sampling is performed. The two most-referenced papers in this space are DeepMind's Accelerating Large Language Model Decoding with Speculative Sampling (Chen et al.) [(paper)](https://arxiv.org/pdf/2302.01318.pdf) and Google Research's Fast Inference from Transformers via Speculative Decoding (Leviathan et al.) [(paper)](https://arxiv.org/pdf/2211.17192.pdf). This project draws its architecture from the latter, so we will more explore its approach in-depth and describe how its shortcomings motivated the experiments in this project.
-
-### General setup
-
-The approach presented in Fast Inference from Transformers via Speculative Decoding (Leviathan et al.) aims to accelerate inference from a target transformer-like model $M_p$. We present a distilled version of the speculative decoding set-up, algorithm, and evaluation here.
-
-We start with two models:
-
-1. $M_p$ (the target model)
-2. $M_q$ (a smaller approximation model)
-
-$p(x_{t}|x_{<t})$ describes the sampling of token $x_t$ given pretext $x_{<t}$, and we will refer to this as just $p(x)$. The shorthand applies for $q(x)$.
-
-Our goal is to generate $\gamma \in \mathbb{Z}^{+}$ completions quickly with the approximation model, check that the probability of those generations are identical to the target model's (in parallel), and then reject and resample starting from the first "wrong" generation.
-
-### Sampling $p(x)$
-
-In order to sample $p(x)$, we will sample $x \sim q(x)$ instead.
-
-1. If $q(x)\leq p(x)$, we keep $x$
-2. Otherwise, we reject $x$ with a $1-\frac{p(x)}{q(x)}$ probability.
-   - If we end up rejecting $x$, we resample $x\sim\text{norm}(\max(0, p(x)-q(x)))$.
-
-Basically, we want $x\sim p(x)$ to be _at least_ as likely as $x \sim q(x)$. Following the steps above is equivalent to just sampling $x \sim q(x)$, and the paper provides a comprehensive proof of this in its appendix.
-
-### The Algorithm
-
-We use an implementation of the following algorithm from Leviathan et al. We start with some conditioning $prefix$ (our starting tokens) and generate between $1$ and $\gamma+1$ tokens at once.
-
-{% include figure.html path="assets/img/2023-11-16-speculative-decoding/Algorithm1.png" class="img-fluid" %}
-
-### Evaluation
-
-To evaluate the effectiveness of this approach, we need to calculate the total wall time improvement of speculative decoding versus normal inference on the target model.
-
-To make this evaluation more simple, assume we can run $\gamma + 1$ concurrent evaluations of $M_p$ in parallel. Now, we just need to get the cost of running $M_q$ (the approximation model).
-
-Let $c$ = the cost coefficient, which is the ratio between the time for a single run of $M_q$ and a single run of $M_p$. $c$ will depend only on our hardware and software implementation details.
-
-Now, we need some measure of how well $M_q$ approximates $M_p$.
-
-Let $\beta$ be the _acceptance rate_.
-
-- $\beta_{x<t}$ is the probability of accepting $x_{t}\sim q(x_{t}|x_{<t})$ by speculative sampling.
-- Assume that the $\beta$s are i.i.d.
-
-Let $\alpha=E(\beta)$. This gives us the average acceptance rate across many samples, which is a good measure of how well $M_q$ approximates $M_p$.
-
-The expectation of the number of generated tokens is now a bounded geometric function of $\alpha$ (bounded by $\gamma$) :$$E(\text{# of generated tokens}) = \frac{1-\alpha^{\gamma + 1}}{1-\alpha}$$Given this relationship, we can derive the expected improvement factor for the total wall time (assuming longer generations):$$\frac{1-\alpha^{\gamma+1}}{(1-\alpha)(\gamma c+1)}$$
-For the sake of conciseness, we leave the full proof to the paper, but the general sketch relies on the fact that each run of Algorithm 1 costs $Tc\gamma + T$ (where $T$ is the cost of running one step of $M_p$). We run $M_q$ $\gamma$ times and $M_p$ once, and each run of Algorithm 1 produces $\frac{1-\alpha^{\gamma + 1}}{1-\alpha}$ tokens. Since the cost of producing a single token with a standard algorithm is $T$, we get the above improvement.
-
-## Hierarchical Speculative Decoding
-
-How much faster can we make model inference by accelerating the approximation model with an even smaller, faster model? Let's look at the case where we have three models:
-
-1. **$M_p$:** The target model
-2. **$M_q$:** The first-level approximation model, used to approximate $M_p$.
-3. **$M_r$:** The second-level, even smaller approximation model, used to approximate $M_q$.
-
-With the introduction of $M_r$, we now need to consider additional parameters:
-
-- **$\gamma_r$:** The number of concurrent evaluations that can be run using $M_r$.
-- **$\beta_r$:** The acceptance rate for $M_r$, analogous to $\beta$ for $M_q$.
-- **$\alpha_r = E(\beta_r)$:** The average acceptance rate for $M_r$, representing how well $M_r$ approximates $M_q$.
-
-Now, $\beta$ for $M_q$ becomes a function of $\beta_r$, reflecting the hierarchical nature of this setup. The acceptance rate $\beta$ for $M_q$ now depends on how effectively $M_r$ approximates $M_q$, which in turn approximates $M_p$.
-
-We can hypothesize that the effectiveness of $M_q$ in approximating $M_p$ might now be influenced by the performance of $M_r$. This could mean that $\beta$, and consequently $\alpha$, might be a function of $\alpha_r$.
-
-The expectation of the number of generated tokens would now need to consider the hierarchical relationship. A new formula would be required to calculate this expectation, taking into account the performances of both $M_q$ and $M_r$.
-
-Finally, the expected improvement factor for the total wall time would also need to be recalculated to reflect this hierarchical structure. This would involve integrating the costs and efficiencies of $M_r$ into our existing model, which so far only considered $M_q$ and $M_p$.
-
-Whether or not this approach will actually speed up the model in practice is left to be determined experimentally.
-
-## Experiments
-
-I experimented on multiple transformer model families, most notably `facebook/opt-125m`, `facebook/opt-1.3b`, and `facebook/opt-13b`.
-
-The primary research questions I investigated include:
-
-1. How many orders of magnitude larger should $M_p$ be than $M_q$ to achieve the maximal improvement?
-2. To what extent does hierarchical speculative decoding further speed up inference?
-
-### General set-up for experiments
-
-- For the standard (non-hierarchical) speculative decoding, I implemented the algorithm exactly as described above.
-  - I used a gamma value of 4
-- I used both top-k sampling and nucleus sampling, with `k=20` and `p=0.9` constant throughout all experiments.
-- I typically prompted the models with `input_text = "Once upon a"` and generated 20 tokens.
-- I used consistent sets of seeds (such as `torch.manual_seed(898)`) when running the same experiment across multiple model combinations for the sake of reproducibility and so that I could more easily compare results across models on shorter generation lengths.
-
-### How many orders of magnitude larger should $M_p$ be than $M_q$?
-
-- To investigate this, I calculated inference time (tokens per second) on each of the following (approximator, target) model pairs:
-  - `facebook/opt-125m`, `facebook/opt-1.3b`
-  - `facebook/opt-125m`, `facebook/opt-13b`
-  - `facebook/opt-1.3b`, `facebook/opt-13b`
-
-### Set-up for hierarchical speculative decoding
-
-I experimented with a three-level hierarchical approach using
-
-1. Small approximation model $M_r$: `facebook/opt-125m`
-2. Approximation model $M_q$: `facebook/opt-1.3b`
-3. Target model $M_p$: `facebook/opt-13b`
-
-To add hierarchical decoding to the algorithm, I replaced the sampling of $M_q$, where we typically sample $x \sim q(x)$ with a sampling process that mirrors the sampling from the target model. So we sample from $x\sim r(x)$ instead, keep if it's at least as likely in $q(x)$, and reject proportional to the likelihood of the sample under either model, adjusting the distribution as before if we need to sample again. This made the theoretical implementation rather simple, as we could re-use a lot of the code. The implementation in practice was slightly more difficult than expected, however, as my implementation of the two-layer speculative decoding didn't permit direct functional composition, and I had to restructure the implementation a bit.
-
-## Results
-
-### Calculating $c$ for each model pair
-
-(The larger model is used as the target model $M_p$)
-
-|          | opt-125m | opt-1.3b | opt-13b |
-| -------- | -------- | -------- | ------- |
-| opt-125m | 1        | N/A      | N/A     |
-| opt-1.3b | 0.015    | 1        | N/A     |
-| opt-13b  | 0.022    | 0.015    | 1       |
-
-This gives insight into the relative efficiencies of the models when performing assisted inference.
-
-### The general effect of speculative decoding
-
-Wall time improvements from speculative decoding have already been documented, so these results are not novel, but I include them here for further proof that the algorithm works and for comparison with other results.
-
-| Target Model | Approximation Model | Tokens/Second |
-| ------------ | ------------------- | ------------- |
-| opt-13b      | None                | 0.047         |
-| opt-13b      | opt-1.3b            | 0.087         |
-| opt-13b      | opt-125m            | 0.057         |
-| opt-1.3b     | None                | 0.336         |
-| opt-1.3b     | opt-125m            | 1.05          |
-
-In all cases, including an approximation model increases the model's token per second inference rate.
-
-### Acceptance rates and wall time given $M_p$ and $M_q$
-
-| Target Model | Approximator Model | Tokens/Second | Acceptance Rate |
-| ------------ | ------------------ | ------------- | --------------- |
-| opt-1.3b     | opt-125m           | 1.05          | 38%             |
-| opt-13b      | opt-125m           | 0.057         | 15%             |
-| opt-13b      | opt-1.3b           | 0.087         | 19%             |
-
-These results help us answer the question: _How many orders of magnitude larger should $M_p$ be than $M_q$?_
-
-One order of magnitude seems to yield higher acceptance rates, and the smaller models were obviously faster.
-
-### Results of hierarchical speculative decoding
-
-| Target Model | Approximation Model | Tokens/Second | Acceptance Rate |
-| ------------ | ------------------- | ------------- | --------------- |
-| opt-13b      | None                | 0.047         | N/A             |
-| opt-13b      | opt-1.3b            | 0.087         | 19%             |
-| opt-13b      | opt-125m            | 0.057         | 15%             |
-| opt-13b      | opt-1.3b, opt-125m  | 0.030         | 17%, 33%        |
-
-I found that running the three-layer hierarchical speculative decoding _did not_ speed up model inference, but I hypothesize that this is because of compute limitations. Running all three models on my computer given the parallelization requirements of the algorithm forced the program to map data to devices in a less-efficient way. I wasn't able to find smaller pre-trained models with which I could test this on my local machine, so a future experiment should either train custom smaller models for the sake of inference in this setting or use a device with greater memory capacity.
-
-## Conclusion
-
-This project explored the potential of speculative decoding, a technique inspired by speculative execution in processors, to accelerate inference in autoregressive models like transformers. Our exploration focused on implementing and extending existing methods of speculative decoding, particularly the ones proposed in the seminal works by Chen et al. and Leviathan et al., while also introducing early experiments with concept of hierarchical speculative decoding, which is to be further investigated.
diff --git a/_posts/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout.md b/_posts/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout.md
deleted file mode 100644
index 2e815cd9..00000000
--- a/_posts/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout.md	
+++ /dev/null
@@ -1,168 +0,0 @@
----
-layout: distill
-title: A Method for Alleviating Catastrophic Forgetting With Explainability
-description: Using various explainability metrics to target, we freeze layers in CNNs to enable continual learning.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
- - name: Pieter Feenstra
-   url:
-   affiliations:
-      name: MIT
- - name: Nicholas Dow
-   url:
-   affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Related Works
-    subsections:
-    - name: Weight Changing Regularization
-    - name: Architectural Changes
-    - name: Explanability Metrics
-  - name: Methods
-    subsections:
-    - name: Model Type
-    - name: Saliency Mapping
-    - name: Filter Visualization
-    - name: Training Procedure and Dataset Selection
-  - name: Results
-  - name: Discussion
-    subsections:
-    - name: Takeaways
-    - name: Limitations
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-# Introduction
-
-With recent advancements in deep learning, the intelligence of computers is quickly rivaling that of humans. GPT-4, with significant size and data, is able to score in the 90th percentile of the BAR, 88th percentile of the LSAT, and the 92nd percentile on the SAT <d-cite key="openai2023gpt4"></d-cite>. In dermatology, sophisticated computer vision models have outperformed trained professionals in diagnosing skin diseases and cancer <d-cite key="jeong2023deep"></d-cite>. Despite this substantial computational advantage, neural networks notably lag behind humans in their capacity for continuous learning, a skill essential for any intelligent entity. Particularly, they suffer from catastrophic forgetting, a phenomenon in which the learning of a new objective significantly degrades performance on prior tasks.
-
-The human brain is able to protect itself from conflicting information and reductions in performance on previous tasks using complex mechanisms involving synaptic plasticity <d-cite key="hadsell2020embracing"></d-cite>. In essence, the brain is able to self regulate the strength of its connections, allowing for neurons to become less activated according to their memory and relevance. This ability has been attributed for the unmatched ability to learn in humans, which has allowed for humans to show improvement in skill on nearly any motor task given training, while still remembering previous information <d-cite key="green2008exercising"></d-cite>. This, then, is highly desirable for neural networks. 
-
-In contrast to the human’s ability to learn, neural networks significantly alter their parameters when learning a new task. In effect, the network's understanding of previous tasks is overwritten. This poses a great barrier to the creation of artificial general intelligences, which ultimately depend on continual, life-long learning <d-cite key="silver2011machine"></d-cite>.
-
-With the rapid increase in size and complexity of models, the field of model explainability and the desire to understand exactly what models are doing has quickly grown. Specifically in the field of computer vision, effort has been made to understand how models make decisions, what information leads to this decision, and how they learn what to observe <d-cite key="haar2023analysis"></d-cite>. Methods such as saliency mapping, which displays the importance of aspects of an input image to predicting a class, filter visualization, which finds the most activating features for a given filter, and gradient class activation maps, which visualizes the gradients flowing into the final convolutional layer, have all significantly contributed towards the understanding of how models make decisions <d-cite key="adebayo2018sanity"></d-cite><d-cite key="erhan2009visualizing"></d-cite><d-cite key="selvaraju2017grad"></d-cite>. 
-
-We propose to make use of these explainability methods for the intelligent freezing of filters of a convolutional neural network. Specifically, we use saliency maps and filter visualizations to consider what a model is observing to classify an image, and then decipher which filters are most strongly contributing to this. In this paper, we contribute the following: 1. We create a method for the ranking of importance of filters in a convolutional neural network. We expand and combine upon previous works in model explainability to understand which filters are most strongly contributing to positive predictions. 2. We create a method for the freezing of filters of a convolutional neural network according to these rankings. We do this by first training on one task, freezing filters according to importance, then retraining the same model on a novel task. In doing this, we both corroborate our ranking system and identify a new strategy for alleviating catastrophic forgetting. 
-
-
-# Related Works
-Continual learning and its core problem of catastrophic forgetting has gotten recent attention in deep learning research. It’s easy to understand why the goal of having a model that can adapt to new data without being completely re-trained is sought after, and there have been many approaches to the problem of aiding the model’s ‘memory’ of past tasks. Solutions range from attaching a significance attribute to certain weights in the model that regularizes change introduced by the new data to explicitly freezing weights via different metrics of the weights’ performance.
-## Weight Changing Regularization
-Elastic Weight Consolidation(EWC) approaches the problem of catastrophic forgetting by adding a ‘stiffness’ to the weights of previous tasks dependent on an approximation of the importance they had to previous task performance. The authors of ‘Overcoming catastrophic forgetting in neural networks’ <d-cite key="Kirkpatrick_2017"></d-cite>.  explain EWC as maximizing a posterior of the parameters over the entire dataset, and then splitting up the posterior into a loss over the new task and a posterior of the parameters over the old task. They model the posterior of the old data as a quadratic difference of the original parameters and the current ones multiplied by the Fisher information matrix, so minimizing this results in preventing parameters from changing too much from being predictable from the old task’s data. The authors of the original paper showed that EWC was effective at preventing CNN from forgetting how to classify the MNIST dataset and helping an RL model maintain performance in Atari games. However, EWC is an additional loss metric that must be calculated for each back-propogation and for each previous task; it’s also linear in the size of the output and therefore is prohibitive for high dimensional data.
-
-Another technique that attempts to use a regularizing factor to slow the retraining of old task parameters is explicitly computing a importance metric for each neuron in the network<d-cite key="zenke2017continual"></d-cite>. The authors denote this method as “Synaptic Intelligence” as they drew their inspiration from the complex adaptation of synapses in the brain contrasted with the simple uni-scalar representation of neurons in a MLP network, and by allowing the network to account for the importance of they could help a neural network model the human behavior of continual learning. The metric they calculate as importance is based on 1) how much a parameter contributed to the reduction of loss over the entirety of training and 2) how much a parameter changed during training. They compared their performance to EWC and standard SGD on the MNIST dataset and found similar results to EWC while beating naive SGD as the number of consecutive tasks increased.
-## Architectural Changes
-A drastically different approach that a couple papers investigated was preventing interference between training runs by completely freezing the weights in parts of the model after completing a task’s training. The papers here differentiate themselves via the method they decide to freeze certain weights and layers. The earliest such paper we found was detailing a method called Packnet <d-cite key="mallya2018packnet"></d-cite>, where the weights they selected to keep via freezing was purely based on a certain percentage of the weights with the highest magnitude. They also made the decision to completely wipe the weights they did not freeze and then do a couple epochs of training on the model that was a mix of frozen and pruned weights. Their strategy achieved performance roughly equal to networks jointly trained on all the data at once and outperformed the naive strategy of simply retraining, validating a version of the freezing strategy.
-
-Instead of simply measuring the magnitude of weights to decide what layers or specific weights to freeze, authors of a paper on catastrophic forgetting explainability paper use a custom metric to find a layer that scores highest on their metric and subsequently freeze all the layers prior to that layer <d-cite key="nguyen2022explaining"></d-cite> Their metric is an analysis of the difference in activation maps of a layer in the model pre- and post- training on the new task. They posit that this difference in activation is a measurement of how much a layer has forgotten how to activate in response to an input. Their reasoning for freezing the layers prior to the layer most changed by the new sample set is that the errors that induce catastrophic forgetting propagate throughout the network, so identifying the layer with the sharpest drop-off indicates that prior layers are to blame. This seemingly builds off an earlier paper  <d-cite key="nguyen2020dissecting"></d-cite> that uses a similar activation map difference scheme to delicate layers that change more easily during training and instead directly freezes those fragile layers rather than those prior. In both papers, their results for this technique are an improvement over their ‘fine-tuning’ baseline, but the more recent paper’s results were not that differentiated from just selecting a layer to freeze before training a new task.
-## Explanability Metrics
-There exists many other explainability metrics with which one can target layers prior to training on a new task to try to prevent interference, an interesting one being saliency maps. Saliency maps attempt to capture the importance of features of the input on the output of a deep neural network. In the domain of CNNs, this can be thought of both the pixels and larger features, such as a window on a car, that contribute to a correct classification; saliency maps are analogous to trying to map out what parts of an image a model uses to make  correct identification. A model of saliency maps we felt compelled enough to use in our project is that of  <d-cite key="srinivas2019fullgradient"></d-cite>, where their full-gradient approach creates saliency maps from the gradients of each layer. This strategy encapsulates the importance of both the inputs and the impact of neurons throughout the network on the saliency map. As parts of a neural network might suffer from varying degrees of catastrophic forgetting, being able to identify the saliency of individual neurons is a desirable quality in choosing a metric that explains catastrophic forgetting.
-
-# Methods
-
-## Model Type
-We tested our method using VGG16. VGG16 is a deep convolutional neural network that has achieved impressive results on the ImageNet classification challenge, with a top-1 accuracy of 72% <d-cite key="simonyan2014very"></d-cite>. Its sequential nature lends itself well to explainability methods like saliency maps. Further, it is relatively quick to train, even given the constraints of Google Colab. All of these attributes were highly desirable, as it allowed for rapid iteration for hyperparameter tuning, computation of saliency maps and filter visualizations, and a direct way to compare the viability of our freezing method through image classification accuracy. To ensure that the model did not have inference on any tasks prior to training, we randomly initialized the parameters. 
-{% include figure.html path="assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/vgg16.webp" class="img-fluid" %}
-<div class="caption">
-    Figure 1: Schematic of VGG16 Architecture
-</div>
-
-## Saliency Mapping
-The computation of saliency maps is grounded in the principles of backpropagation. It follows a multi-staged procedure which uses gradients to consider the impact of each pixel in an image. First, it computes the partial derivatives of the target output with respect to individual segments of the input image. Then, it uses backpropagation to propagate error signals back to the input layer. It does this in order to identify the impact of pixels. It considers pixels with larger signals to have the greatest impact on the decision-making process. 
-There are a bountiful number of papers which propose different improvements on the original saliency map. When selecting a procedure, we identified two key features necessary for a useful visualization. We believed that a saliency map must have a full explanation of why a model made its prediction. Secondly, we believed that rather than considering each individual pixel, it clusters pixels together to consider importance. After testing, we ultimately used full-gradient saliency maps <d-cite key="srinivas2019full"></d-cite>. Code for this method is publicly available on the GitHub created by the authors of this paper, fullgrad-saliency.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 2: Raw image and saliency map of a mug.
-</div>
-
-The essence of full-gradient saliency maps lines up directly with the key features that we identified. To begin, it defines importance in the input image as a change in the feature resulting in change in model output. It seeks to illustrate a full answer for the model’s output.  To this end, it considers both global and local importance of features in the input image, which results in a method which both weighs the importance of each pixel individually, but also considers the importance of different grouping of pixels. 
-
-## Filter Visualization
-In order to compute what different filters are looking at, we made use of the Convolutional Neural Network Visualizations GitHub repository, which is a useful library that has implementations of many popular explainability methods <d-cite key="uozbulak_pytorch_vis_2022"></d-cite>. Specifically, we used the implementation of a filter visualization method from the paper “Visualizing Higher-Layer Features of a Deep Network”, which uses backpropagation to maximize the activation of a given filter  <d-cite key="erhan2009visualizing"></d-cite>. With this, we can compute exactly what a filter is attempting to observe in an image. This method provides two different options for creating filter visualizations - one with gradient hooks, and one without. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Figure 3: Heatmaps of Feature Visualization(left), Actual Image(middle), Saliency Map(right)
-</div>
-
-
-
-## Training Procedure and Dataset Selection
-We created two datasets from CIFAR-100 <d-cite key="erhan2009visualizing"></d-cite>. We randomly selected 20 classes out of the total 100 and then divided these groups into two. We filtered the images from CIFAR-100 so that only images of those classes were in our datasets. We did this to ensure that the tasks the model was attempting to learn were of equal difficulty. We chose CIFAR-100 because we believed it was of adequate difficulty for the VGG16 architecture. We normalized the data and augmented it with random horizontal flips and random croppings.
-For the first instance of training, we trained using stochastic gradient descent for 10 epochs with a learning rate of 1E-3. We did not implement any regularization or early stopping, as it was not necessary given training losses and testing losses. After this training, we used the described methods for calculating saliency maps and filter visualizations. For each class in the first dataset, we calculated the most useful filters by comparing saliency maps for the class to all filters. We compared these through multiple metrics, including mean squared error and Pearson correlation. To account for the fact that different layers of convolutional neural networks capture different types of information, we froze some percent of filters in each individual layer rather than the entire model. We left this percent as a hyperparameter. 
-To ensure fairness for each task, the second instance of training followed the same exact procedure as the first - the optimizer was stochastic gradient descent, we trained for 10 epochs, and used a learning rate of 1E-3. 
-
-
-# Results 
-For the sake of hyperparameter tuning and evaluating different strategies, we froze the datasets to be the first and second ten images of CIFAR-100. We sought to check how the number of filters we freeze changes performance across datasets, which metric is most useful in comparing saliency images to filter visualizations, and how viable this method is as compared to training on a single, larger dataset. Prior to the second round of training, the test accuracy on the first dataset was .4566 and the test accuracy on the second dataset was .1322. 
-{% include figure.html path="assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1.png" class="img-fluid" %}
-	
-The impact of freezing varying numbers of filters is in line with expectation - the more filters you freeze, the less inference you can gain, but also the more you will remember your previous task. In the table above, we can observe that with 25% of the filters frozen, we perform the best on dataset 2, with an accuracy of 39.2%, but the worst on dataset 1, with an accuracy of 20.7%. In contrast, when 75% of the filters are frozen, we maintain an accuracy of 38.4%, but do not learn about the new task, with an accuracy of 25.7%. 
-{% include figure.html path="assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2.png" class="img-fluid" %}
-
-We found that mean squared error was the greatest metric for the comparison of saliency maps and filter visualizations, recording the highest average accuracy and also retaining much more information about the first dataset.  From the table, we can see that when freezing 50% of filters in the network and selecting using mean squared error, we do roughly ten percentage points worse on the first dataset, but gain nearly double this loss on the second dataset. When compared to the randomly frozen method, it performs significantly better on the first dataset. This suggests that the filters that we froze are actually more important for correct predictions than the average. It makes sense that Pearson correlation is not particularly useful for comparison - it is not able to take into account the spatial information that is crucial for this comparison.
-{% include figure.html path="assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3.png" class="img-fluid" %}
-
-Finally, we found that training tasks sequentially and using the freezing method with a comparison metric of mean squared error slightly outperforms training the model on a larger, combined dataset at once. With this method, the model performed five percentage points better on predicting classes in both the first and second dataset. It is important to note that the accuracy reported for the model trained on the combined dataset is just the average accuracy over all of the classes, not necessarily split by the datasets. Still, to ensure fairness, the training procedure used for the combined dataset was the same as for the sequential training procedure, but trained for twenty epochs at once rather than ten epochs at two different times. This result implies that intelligently freezing filters of a neural network can be a viable strategy for overcoming catastrophic forgetting, even if just in a smaller setting.
-
-
-# Discussion
-## Takeaways
-Through using convolutional neural network explainability methods such as saliency maps and filter visualizations, we were able to observe key insights into the relevance of different filters in VGG16. Quantitatively, we were able to measure this by freezing these layers and observing how well performance persisted after training on a new task. We found that freezing filters according to the similarity of their visualizations to saliency maps retains significantly more inference on a previous task, suggesting that these filters were more relevant to the previous task. By freezing these weights, we were also able to outperform simply training on a larger dataset. 
-We believe that more research should be directed towards applying explainability methods to achieve the objective of continual learning. Although there has been previous work in the past, these often rely on stopping catastrophic forgetting once it has been observed, rather than determining which parts of the network are too integral to a task to be retrained. 
-## Limitations
-Because we are completely freezing weights, it is unlikely that this method could be generalizable to an arbitrary number of tasks. Future works could explore the integration of elastic weight consolidation into our pipeline rather than stopping change entirely. Doing class by class freezing of filters also introduces a cap to the number of tasks that this method could generalize to and the number of classes that can be predicted in each task. During our research, we concluded that this approach was better than attempting to combine saliency maps, but future work could also explore how to effectively combine saliency maps to capture important aspects of each class. 
-Further, this method relies on the comparability of saliency maps and filter visualizations. While it makes intuitive sense that a filter is more relevant if it is seeking the parts of an input that are most important for a correct prediction, it is not as simple as directly comparing the two. While we attempt to alleviate some of this issue by doing layer-by-layer freezing, future work could certainly explore better metrics for choosing filters, especially given the stark difference in performance when using something as simple as mean squared error compared to Pearson correlation. 
-Finally, the computational overhead of the method in combination with the limitations of Google Colab resulted in an inability to train on high-resolution images and use larger models. We believe that using high-resolution images would significantly benefit the feasibility of the method, as saliency maps are much more clearly defined. We again leave this to future work, as we are unable to explore this path.
-
-
diff --git a/_posts/2023-12-01-rep-learning-for-rec-systems.md b/_posts/2023-12-01-rep-learning-for-rec-systems.md
deleted file mode 100644
index b22707bf..00000000
--- a/_posts/2023-12-01-rep-learning-for-rec-systems.md
+++ /dev/null
@@ -1,263 +0,0 @@
----
-layout: distill
-title: Can Constrastive Learning Recommend Me a Movie?
-description: 
-date: 2023-12-11
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Antonio Berrones
-    affiliations:
-      name: MIT
-  
-# must be the exact same name as your blogpost
-bibliography: 2023-12-01-rep-learning-for-rec-systems.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Background And Related Work
-  - name: Experiments
-  - name: Conclusion
-  - name: References
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
----
-
-
-
-
-
-
-
-
-## Introduction
-
-With the vast amount of information and content available online, the need for intelligent recommendation systems has only become more necessary. Many of the apps we use, YouTube, TikTok, Instagram, Netflix, Spotify, etc. all incorporate recommender systems to provide personalized content. But how do these systems work? An important factor in delivering good recomendations is having a system that can find an expressive and useful representation of users and items (where items are the specific piece of content we want to recommend). 
-
-Traditional approaches for developing recommender systems include collaborative filtering, matrix factorization, and deep neural networks such as multi-layer perceptrons (MLPs) and graph neural networks (GNNs) <d-cite key="history"></d-cite>. Moreover, a focus on using a hybridized approach of the previous models are also in active research, with aims of balancing their various benefits and tradeoffs.
-
-This project aims to explore if contrastive learning can be used to recommend movies for a user based on a their prior movie ratings. 
-
-**More specifically, by choosing different strategies of defining positive/negative pairs, can we learn a user embedding that facilites the downstream task of movie recommendation?**
-
-
-
-
-
-
-
-
-
-## Background And Related Work
-
-### Contrastive Learning
-
-Contrastive learning is a self-supervised machine learning technique for training a model (often called an encoder) to distinguish between similar and dissimilar pairs of data points. The goal is to map each data point from its original representation space to a smaller dimensional latent space. If the encoder is trained well and is able to learn a good representation, the newly encoded data points should act as a sort of "compressed" version of the original data point while still containing some useful semantic information.
-
-Contrastive learning has tradionally been used in the domains of computer vision and natural language processing. However, more recent work has shown that contrastive learning, when combined with graph neural networks (GNNs), can learn impressive representations when applied to recommender systems <d-cite key="gnn"></d-cite>. For the purposes of this project, instead of using a GNN as our encoder, a simpler MLP will be used.
-
-### Dataset
-
-This project explores creating a movie recommender system based on the [MovieLens dataset](https://grouplens.org/datasets/movielens/). The small version of this dataset contains 10,000 ratings of 9,000 movies by 600 users on a 0-5 star scale. Data was collected by users of the MovieLens website, last updated in September 2018. An example of the primary `ratings.csv` dataset is shown below:
-
-
-|   userId  |  movieId  |   rating  | timestamp |
-| --------- | --------- | --------- | --------- |
-| 1         | 1         | 4.0       | 964982703 |
-| 1         | 3         | 4.0       | 964981247 |
-| ...       | ...       | ...       | ...       |
-| 2         | 318       | 3.0       | 1445714835|
-| 2         | 333       | 4.0       | 1445715029|
-| ...       | ...       | ...       | ...       |
-| 600       | 170875    | 3.0       | 1493846415|
-
-
-
-
-
-
-
-
-
-## Methodology
-
-### Preprocessing of Dataset
-
-The MovieLens dataset of user-movie interactions (movie ratings) is split into a training and test dataset. For each user, 95% of their interactions were randomly sampled and allocated to the training dataset, while the remaining 5% of interactions were allocated to the test dataset.
-
-Thresholds were chosen to quantify whether a user "liked" a movie (`LIKE_THRESHOLD`) or "disliked" a movie (`DISLIKE_THRESHOLD`) based on that user's rating. The training dataset was then filtered to only include interactions involving movies that had a minimum number of users who "liked" it and a minimum number of users who "disliked" the movie. This was to ensure that each movie had enough user data to facilite the computations for selecting positive / negative pairs.
-
-### Positive and Negative Pair Strategies
-
-An important component of contrastive learning involves the definintion of positive pairs and negative pairs. For a given interaction (user _u_ rates movie _m_), what should be considered a similar interaction and what should be considered a dissimilar interaction? 
-
-Given an interaction by user ${u}$, let $\text{pos}(u) = u^+$ and $\text{neg}(u) = u^-$ where $(u,u^+)$ is a positive pair and $(u,u^-)$ is a negative pair. The goal will be to find the pair of functions $\text{pos}(), \text{neg()}$ such that a good representation is learned.
-
-
-### Encoder Architecture
-
-The proposed encoder architecture is shown below. The encoder recieves as input a batch of userIds, $u$ , integers in the range $0 \leq u \leq 599 \$. The first layer of the encoder is an embedding layer, mapping userIds to a vector of dimension `input_dim`. This layer is followed by a 2-layer MLP with relu activations, with a hidden dimension of `hidden_dim` and an output dimension of `latent_dim`. Additionally, the final output of the encoder normalized.
-
-<div class="row mt-3 align-items-center">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-01-rep-learning-for-rec-systems/encoder.png" %}
-    </div>
-</div>
-<div class="caption">
-    Architecture for encoder, where <strong>input_dim</strong> = 1024, <strong>hidden_dim</strong> = 600, <strong>latent_dim</strong> = 200.
-</div>
-
-
-
-
-
-### Evaluation Metrics
-
-In order to evaluate the quality of the learned user representations, there are a handful of metrics that will be used.
-
-1. **Top K Movie Recommendation**: Movie recommendation will serve as a downstream task that acts as a proxy for how well the learned user representations are. To recommend movies for a user, the encoder is used to get the user embeddings for all users in the dataset. We then use the cosine-similarity to compute the N=10 nearest neighbors to our target user. From these N neighbors, we retreive all of their "liked" movies and sort by their respective ratings. The top K movies are returned as the system's recommendations.
-
-2. **recall@k**: A popular metric used for evaluating recommender systems is recall@k <d-cite key="rec"></d-cite>. It measures the proportion of relevant items that were successfully retrieved from the top-k movie recommendations. Relevant items are defined as items that a user "likes" from the test dataset. The proportion of these items found in top-k recommendations from our recommender system (based on the learned encoder) is the recall@k. The higher the recall, the greater the overlap between our recommender's recommended movies and the user's actual preferred movies.
-
-
-3. **Visualization of User Embeddings**: By visualzing the learned user representation's ability to be distinguished into separate clusters, we can better examine the potential user clusters for any distinguishing features. By utilizing t-distributed Stochastic Neighbor Embedding (TSNE) for dimensionality reduction of the user embedding vectors, we can project users representations to the 2D plane and use traditional clustering algorithms for visualization <d-cite key="rec"></d-cite>.
-
-4. **Top Movies Per User Cluster**: To provide more insight into the resulting user embedding clusters, the top movies of the users in each cluster is also reported.
-
-
-
-
-
-
-
-
-
-## Experiments
-
-In addition to standard hyperparamter-tuning techniques to optimize training, different positive pairs and negative pairs strategies will be tested. 
-
-All encoders were trained with `num_epochs` = 20, `batch_size` = 512, `lr` = 0.0001 (using Adam optimizer), and contrastive triplet loss.
-
-### Strategy 1
-
-For a given user $u_i$ a similar user is determined by a random selection from a set of candidate users. These candidate users consist of the subset of users that have "liked" the same movies that $u_i$ "liked", i.e. their ratings $\geq$ `LIKE_THRESHOLD`. Likewise, dissimilar users for $u_i$ were randomly selected from a set of candidate users that "disliked" the same movies $u_i$ "disliked", i.e. their ratings $ < $ `DISLIKE_THRESHOLD`.
-
-| LIKE_THRESHOLD | DISLIKE_THRESHOLD |
-| -------------- | ----------------- |
-| 3.5            | 3.5               |
-
-With these definitions of positive and negative pairs, an encoder was trained with the resulting user embeddings shown below.
-
-<div class="row mt-3 align-items-center">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies.png" %}
-    </div>
-</div>
-<div class="caption">
-    Learned user embedding clusters and top movies using Strategy 1.
-</div>
-
-By examining the user embedding clusters, we see four loosely-defined user clusters. The top 5 highest rated movies by each cluster's members are also depicted. A key takeaway is that we see a repetition of the same movies across each cluster, movies like _The Nutty Professor_, _Mission Impossible 2_, _Ace Ventura: When Nature Calls_, etc. These are all very popular and well-liked movies with a wide audience. The prevalence of highly-rated and popular movies such as these leads to a bias in our positive pairs. Since many users are fans of these movies, they are all considered similar users, i.e. our definition of similarity is too weak. The following strategies will try to address this.
-
-### Strategy 2
-
-In order to decrease the influence of popular movies, one strategy is to filter out all movies that are "liked" by a certain number of users. We define `POPULARITY_THRESHOLD` = 100, which removes all movies with over 100 "liked" users. As a result, the distribution of "liked" users per movie is relatively uniform. The definitions of positive and negative pairs remains the same as in Strategy 1.
-
-| LIKE_THRESHOLD | DISLIKE_THRESHOLD |
-| -------------- | ----------------- |
-| 3.5            | 3.5               |
-
-<div class="row mt-3 align-items-center">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies.png" %}
-    </div>
-</div>
-<div class="caption">
-    Learned user embedding clusters and top movies using Strategy 2.
-</div>
-
-
-
-### Strategy 3
-
-A different method for reducing the influence of popular movies was to normalize each users ratings. By subtracting a movie's average rating across all users from any particular user's rating, we are able to determine whether the user liked the movie more than others or disliked it more than others. Popular movies only have an impact if the user really liked (or disliked) it relative to everyone else.
-
-Using this new strategy, for any user $u_i$, instead of randomly selecting a similar user from candidates that "liked" a movie in common, these candidate users are ranked such that the candidate that has the highest normalizes rating is selected (the opposite is true for choosing a disimilar user). Therefore, instead of having a positive pair of users who rated the same movie highly, the positive pair will consist of users who both gave the same movie a higher rating than the average user.
-
-| LIKE_THRESHOLD | DISLIKE_THRESHOLD |
-| -------------- | ----------------- |
-| 3.5            | 3.5               |
-
-<div class="row mt-3 align-items-center">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies.png" %}
-    </div>
-</div>
-<div class="caption">
-    Learned user embedding clusters and top movies using Strategy 3.
-</div>
-
-
-### Strategy 4
-
-Despite the previous strategies, there still seems to be a lack of cohesion among the resulting user embedding clusters. The final strategy tested was a hybrid approach. In this scenario, the `LIKE_THRESHOLD` has been raised and the `DISLIKE_THRESHOLD` lowered in an attempt to narrow the candidate pools to more extreme users. Moreover, Strategies 2 and 3 are combined. Highly popular movies are removed and normalized ratings are used.
-
-| LIKE_THRESHOLD | DISLIKE_THRESHOLD |
-| -------------- | ----------------- |
-| 4              | 3                 |
-
-<div class="row mt-3 align-items-center">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters.png" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies.png" %}
-    </div>
-</div>
-<div class="caption">
-    Learned user embedding clusters and top movies using Strategy 4.
-</div>
-
-
-### Analysis
-
-For each strategy, the recall@k for various values of k are shown, along with the sizes of the train and test datasets after filtering. 
-
-|                |  Strategy 1  |   Strategy 2  | Strategy 3 | Strategy 4 |
-| -------------- | ------------ | ------------- | ---------- | ---------- |
-| recall@10 (%)  | 0.62         | 1.29          | 0.73       | 0.78       |
-| recall@20 (%)  | 1.97         | 2.16          | 2.18       | 3.10       |
-| recall@50 (%)  | 3.84         | 6.03          | 4.36       | 11.63      |
-| Size Train Set | 51,576       | 32,609        | 51,576     | 10,826     |
-| Size Test Set  | 1,361        | 984           | 1,361      | 232        |
-
-It appears that both Strategy 2 and Strategy 3 alone seemed to make a noticable improvement in recall, with Strategy 2 (the removal of the most popular movies) making a larger impact than normalizing ratings. Furthermore, by using both strategies along with a few other changes, a representation the resulted in a better recomender system and more well-defined embedding clusters was learned.
-
-
-
-
-
-## Conclusion
-
-From the above experiments, it seems that contrastive learning (even when used with a simple MLP encoder) can eventually learn a user embedding resulting in clusters. However, it seems like either a more advanced architecture or positive/negative pair mining procedures are required to ensure that the learned representations have a useful semantic meaning. Weak positive pairs resulted from the presence of popular movies with diverse audiences. Previous work in applying contrastive learning to recommender systems highlight more complex formulations of ranked loss functions, assigning different weights depending on whether the pairs are hard or easy negative samples <d-cite key="ranked"></d-cite>. 
-
-An interesting extension of this project could explore the use of GNNs as the basis of the encoder architecture, as these types of models more naturally preserve the structure of user-movie interactions. 
\ No newline at end of file
diff --git a/_posts/2023-12-11-exploring-music-generation.md b/_posts/2023-12-11-exploring-music-generation.md
deleted file mode 100644
index e2725c3c..00000000
--- a/_posts/2023-12-11-exploring-music-generation.md
+++ /dev/null
@@ -1,64 +0,0 @@
----
-layout: distill
-title: "Exploring Methods for Generating Music"
-description: Explores various machine learning techniques for generating music. Compares the performance of traditional RNNs, LSTMs, and transformers on generating sample sequences of music.
-date: 2023-12-11
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Anonymous
-    url:
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-11-exploring-music-generation.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: "Introduction"
-  - name: "Related Work"
-  - name: "Methodology"
-  - name: "Results/Experiments"
-  - name: "Closing Thoughts and Future Work"
-
----
-
-# Introduction
-The problem of music generation has been widely explored for a long time. Music has very similar parallels to how speech and language is structured. Just like language, music is temporal and in the traditional western sense, has a defined set of standards/rules for how music should be structured. What makes music generation a more challenging problem than language is that music has an artistic/expressive component as well as both low and high level structure. For "good" music, it isn't enough to simply generate a series of notes and harmonies that obey music theory conventions. At the low level, "good" music makes use of varying dynamics, note groupings, and articulation. At the high level, "good" music may feature overarching motifs and specific [forms](https://en.wikipedia.org/wiki/Musical_form) (round, sonata form, ABAB, etc). 
-This level of complexity is analagous to the problem of generating poetry and generating speech that mimics a human reciting it. The poetry will have structures like
-rhyme, rhythm, motifs, metaphors, etc. and the speech reading it will have to replicate expressiveness to be convinving. This level of complexity is not yet achievable with high
-level of robusts by current speech generators, LLMs, and NLP methods. 
-
-It is this level of structural complexity required for generating "good" music that make machine learning methods, specifically deep learning, a compelling approach to 
-tackling the problem of generating "good" music. Deep learning methods should be able to capture music's low level music theory structure as well as the high level
-It is the hope that given enough data and the right architectures, music generation will be able to mimick a level akin to the best human composers. While music generation such as OpenAi's jukebox <d-cite key="dhariwal2020jukebox"></d-cite> as yielded very good results, it is trained on pure audio frequencies. I will focus on musical generation and training from a "written" / musical structural perspective rather than audio. (Think human speech vs. language/text), as I think this can provide greater insight into how these models learn and what about musical structure is being learned.
-
-# Related Work
-
-There has been several studies/project done in the generation of music. OpenAi has done work with using audio samples to generate music. They took a representation learning and autoencoder approach leveraging VQ-VAEs. Other work <d-cite key="doi:10.1080/25765299.2019.1649972"></d-cite> took approaches similar to me and tried to analyze the "written" structure of music and used a combination of LSTMs and a midi encoding scheme to . Work has been done to capture the expressitivity of music <d-cite key="10124351"></d-cite>, where they leverage large transformer models and condition them on emotion to generate music. There has been success in generating expressitivity based on this conditional approach. My work here aims to analyze purely the syntactical structure of music and will not be leveraging conditioning. 
-
-# Methodology
-Before music can be learned and generated, it needs to first be converted to a format that can be input to a ML model. To achieve this I used a subset of a piano midi dataset <d-cite key="ferreira_aiide_2020"></d-cite> and utilized a [program](https://pypi.org/project/py-midicsv/) to convert from MIDI to .csv. Using this .csv file I encoded each note in the midi to a 107 dimensional vector. Where the first 106 dimensions correspond to midi-notes [A0-G9](https://www.inspiredacoustics.com/en/MIDI_note_numbers_and_center_frequencies), and the last dimension is encodes the duration of the midi-note divided by the midi-clock/quarter frequency to get a duration of the note in quarter notes. Since note A0 corresponds to midi-note 21, all of the midinote values are subtracted by this baseline value when being encoded into the vector. If a midi-note is played it is encoded as "ON" in the .csv and as such is represented with a 1 in it's corresponding index in the note vector. For example, if a C4 and A4 note (MIDI note 60, and 69 respectively) are played at the same time in a song, it will be encoded as a 107 dimensional zero vector with indices 37, 47 (60 (midi value) -21 (baseline)-1 (0-index notation)) being 1 and index 106 being the duration of the chord.
-
-I then tested 3 different models to see how they performed. The first model I tested was an RNN with hidden_size = 64, RNN_layers = 2, and sequences of 24, 48, 64, and 200. I next tested LSTM models with hidden_size = 64, RNN_layers = 2, and sequences of 24, 48, 64, and 200 and compared a birection vs. single directional model. The last model I analyzed was a transformer. In which I first took my note encodings and created an embedded representation of the notes and combined this with positional encoding in the sequence of music to get my final embedding to pass into my transformer architecture. 
-
-# Results/Experiments
-I found that the RNN architecture to be the worst performing model. It has a high ringing for some training and mostly unstructured and random. The results of a sample music generation can be found [here](https://drive.google.com/drive/folders/1FiuobbyVUnwpUZUx_PYBR57qOwj5jYXe?usp=sharing). The LSTM model took longer to train but performed better with hidden size = 64, sequence_length=48, and 30 epochs. I found that it worked even better when using a bidirectional architecture. A sample generation can be found [here](https://drive.google.com/drive/folders/10CzuEbuVXKCyLsY5vwQZjSKJT1ABqXbA?usp=sharing) in which it was fed the starting 10 notes of Polonaise in A-flat major, Op. 53 and was asked to generate a long sequence from that. The transformer took the longest to train and its results can be found [here](https://drive.google.com/drive/folders/1fGe7xUZyFNlFGMbGB8aXnVfSEx067ZaA?usp=sharing)
-
-
-# Closing Thoughts and Future Work
-
-As expected the base RNN architecture failed to generate anything meaningful. It took a while to find hyperparameters that would make the LSTM generate something of note, but when it did successfully generate music I was surprised by some of the resemblences it had to music in the training data. 
-
-One noticeable flaw in my work is that I that my metric for success outside of training error is qualitative. It would have been useful for evaluation of my model implementations if I had a quanititative metric. I originally calculated the loss of my models based on how they replicated unseen music from a test set given sequences from the same music, however losses for every model failed to converge in a reasonable amount of time. It is certainly difficult to tell if poor performance is due to implementation or a small dataset and limited compute resources. 
-
-Continuing on the idea of lack of data. One of the challenges I faced was in the curation of my dataset. I originally was going to generate music tokens for my network based on a very descriptive musical format cally [lilypond](https://lilypond.org/). However, there were inconsisencies between samples of music in how they were resprented in the lilypond text format, so creation of a program to transcribe the text to a good format for representing music was very difficult which is why I turned to the more standardized MIDI file format. It is unfortunate because a lot of the the complex expression in music is lost in midi format, making it harder if not impossible for models trained on midi input to learn these complex representations/behavior. I say impossible because if data for musical expression is completely absent from training, then this important component of music is simply out of distribution and impossible to learn. So a better way to encode/represent music is needed for better results.
-
-Moving forward, it would be interesting to explore how representation learning can be used to enhance the generation of music. I wanted to explore the use of VAEs and some of the more advanced variations like the one in used in OpenAi's jukebox, VQ-VAE. These methods maybe be able to capture both the high level structure and complex low level structure found in music. I also want to explore methods for encoding the dynamics, articulation, and expression found in music, something I was not able to do this time around. Lastly, exploring a better way to encode and learn the duration of notes would lead to better music generation. 
\ No newline at end of file
diff --git a/_posts/2023-12-12-Iterated-Representation-Learning.md b/_posts/2023-12-12-Iterated-Representation-Learning.md
deleted file mode 100644
index 3f0fe152..00000000
--- a/_posts/2023-12-12-Iterated-Representation-Learning.md
+++ /dev/null
@@ -1,242 +0,0 @@
----
-layout: distill
-title: Iterated Representation Learning
-description: Representation learning is a subfield of deep learning focused on learning meaningful lower-dimensional embeddings of input data, and rapidly emerging to popularity for its efficacy with generative models. However, most representation learning techniques, such as autoencoders and variational autoencoders, learn only one embedding from the input data, which is then used to either reconstruct the original data or generate new samples. This project seeks to study the utility of a proposed iterated representation learning framework, which repeatedly trains new latent space embeddings based on the data outputted from the last round of representation. In particular, we seek to examine whether the performance of this iterated approach on a model and input dataset are indicative of any robustness qualities of the model and latent embedding space, and potentially derive a new framework for evaluating representation stability.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Angela Li
-    url: "https://www.linkedin.com/in/angelayli/"
-    affiliations:
-      name: Harvard University
-  - name: Evan Jiang
-    url: "https://www.linkedin.com/in/evanjiang1/"
-    affiliations:
-      name: Harvard University
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-Iterated-Representation-Learning.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-    subsections:
-    - name: Representation Learning Primer
-    - name: Prior Literature
-  - name: Iterated Representation Learning
-    subsections:
-    - name: Existing Dimensionality Reduction and Representation Models
-    - name: Iterated Representation Learning
-  - name: Case Study - MNIST Dataset
-    subsections:
-    - name: Experimental Design
-    - name: Autoencoder IRL Analysis
-    - name: VAE IRL Analysis
-  - name: Discussion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-Representation learning has become a transformative subfield of deep learning within recent years, garnering widespread attention for its sophistication in learning lower-dimensional embeddings of data beyond classical techniques such as principal component analysis (PCA). From class, we learned that desirable characteristics of good representations include minimality, sufficiency, disentangelement, and interpretability. However, because typical representation learning techniques such as autoencoders learn only one latent embedding from the input data, there exists a gap in the literature on the *stability* of the model and learned embeddings. 
-
-In this project, we thus explore a new approach to traditional representation learning techniques, in which embeddings for a given set of data are learned repeatedly until some sort of convergence with respect to the model and learned embedding space, a process we call **Iterated Representation Learning (IRL)**; by analyzing the performance of this iterative approach, our work aims to discover potential insights into the robustness qualities inherent to a model and its associated latent embedding space. We propose an algorithmic framework for IRL, provide an empirical case study of the efficacy of our IRL framework on the MNIST dataset, and suggest a novel evaluation procedure for representation stability and robustness via iterated learning.
-
-### Representation Learning Primer
-
-The goal of representation learning is to build models that effectively learn meaningful representations of the data. Representations are important for a variety of reasons, including determining which features are the most explanatory or variable in a dataset, compressing repeated information from a dataset to make it more compact, and learning more effective neural networks, to name a few examples. These representations are typically abstract and less interpretable than the input data, but of lower dimension, which makes them useful in capturing the most essential or compressed characteristics of the data. 
-
-More formally, representation learning aims to learn a mapping from datapoints $$\mathbf{x} \in \mathcal{X}$$ to a (typically lower-dimensional) representation $$\mathbf{z} \in \mathcal{Z}$$; we call this mapping an **encoding**, and the learned encoding is a function $$f: \mathcal{X} \rightarrow \mathcal{Z}$$. From this, a **decoder** $$g: \mathcal{Z} \rightarrow \mathcal{X}$$ can be applied to reconstruct the encoded data into its original dimension. This is demonstrated in the diagram below.
-
-{% include figure.html path="assets/img/2023-12-12-Iterated-Representation-Learning/representation.png" class="img-fluid" %}
-<div class="caption">
-    Representation learning goal. Image credit: <i>Foundations of Computer Vision: Representation Learning</i> (Torralba, Isola, Freeman 2023).
-</div>
-
-Some of the most salient learning methods within representation learning today include autoencoding, contrastive learning, clustering, and imputation; in this project, we focus on specifically on iterative approaches for the class of **autoencoders**.
-
-Representation learning also has intricate ties to generative modeling, the subfield of deep learning that aims to generate new data by mapping a simple base distribution to complicated high-dimensional data, which is essentially the opposite goal of representation learning. Then, after learning an embedding space via representation learning, this embedding can then be sampled from to *generate* new data that mimics the original data, as demonstrated by **variational autoencoders (VAEs)**, which we also explore in this paper. 
-
-### Prior Literature
-
-Relatively little literature exists regarding iteratively training dimensionality reduction or representation learning models. [Vlahek and Mongus (2023)](https://ieeexplore.ieee.org/document/9528915) proposes an iterative approach for *conducting* representation learning more efficiently, specifically for the goal of learning the most salient features, which fundamentally diverges from our goal and also does not consider embedding robustness. [Chen et al. (2019)](https://arxiv.org/abs/1809.10324) introduces an iterative model for supervised extractive text summarization, though their objective of trying to optimize for a particular document by feeding a given document through the representation multiple times differs from ours. [Cai, Wang, and Li (2021)](https://ieeexplore.ieee.org/abstract/document/9414713) finds an iterative framework for self-supervised speaker representation learning which performs 61% better than a speaker embedding model trained with contrastive loss, but mainly focuses on the self-supervision aspect of the model and optimizes purely for model test accuracy, not considering other metrics such as stability or robustness.
-
-Overall, we find that the literature regarding iterative approaches to representation learning is already sparse; of the work that exists, most focuses on very specific use cases, and no work directly examines the robustness or stability of the model and embeddings themselves learned over time, rather optimizing purely for final model performance.
-
-
-## Iterated Representation Learning
-
-### Existing Dimensionality Reduction and Representation Models
-
-Nowadays, there are a variety of approaches to effective dimensionality reduction. Below we cover three of the most common techniques.
-
-#### Principal Component Analysis
-
-Principal Component Analysis (PCA) has two primary objectives. First, maximizing sample variance of the newly transformed data, which is analogous to identifying and capturing the greatest (largest) directions of variability in the data (principal components or PCs). Formally, a PC is defined
-
-$$ 
-v^* = \arg \max_v \frac{1}{N-1} \sum_{n=1}^N (x^T_n v - \bar{x}^T v)^2 = \arg \max_v v^T C v
-$$
-
-where $$C = \frac{X^T X}{n-1} \in \mathbb{R}^{d \times d}$$ is the empirical covariance matrix. 
-
-The second objective is minimizing reconstruction loss, which is analogous to identifying the directions of variability to accurately and concisely represent data. Let $$U$$ be the orthonormal basis projection matrix of eigenvectors of $$C$$. Then we define reconstruction loss as
-
-$$ 
-\mathcal{L}(U) = \frac{\sum_{n=1}^N ||x_n - U U^T x_n||^2}{N}
-$$
-
-Above, we observe that maximizing sample variance and minimizing reconstruction loss go hand-in-hand. Since PCA applies projections by multiplying vectors/matrices to the data, PCA is limited to the *linear* transformation setting, hence restricting its applicability in many modeling problems.
-
-#### Autoencoders
-
-{% include figure.html path="assets/img/2023-12-12-Iterated-Representation-Learning/ae.png" class="img-fluid" %}
-<div class="caption">
-    Autoencoder structure. Image credit: <i>Foundations of Computer Vision: Representation Learning</i> (Torralba, Isola, Freeman 2023).
-</div>
-
-Similar to PCA, autoencoders also aim to minimize reconstruction loss. However, autoencoders are not limited to just linear transformations, which enables autoencoders to learn more general lower-dimensional representations of data. Autoencoders are comprised of an encoder and decoder, where the encoder maps data to a lower-dimensional representation (embedding) via some function $f$, and the decoder maps the originally transformed data back to its original dimensional space via some function $g$. 
-
-End to end, the data space starts in $$\mathbb{R}^N$$, is downsized to $$\mathbb{R}^M$$ by $$f$$, and then is reverted back to $$\mathbb{R}^N$$ where $$N > M$$. In this case, we can formalize the objective as follows: 
-
-$$ 
-f^*, g^* = \arg \min_{f,g} E_\mathbf{x} || \mathbf{x} - g(f(\mathbf{x}))||^2_2 
-$$
-
-#### Variational Autoencoders
-
-VAEs couple autoencoders with probability to get maximum likelihood generative models. Typically for encoding, VAEs regularizes the latent (hidden) distribution of data to "massage" the distribution into a unit Gaussian, and when reverting back to the original dimensional space, VAEs add noise to the output — hence, a mixture of Gaussians. By imposing a unit Gaussian structure on the learned embedding space, this allows VAEs to act as generative models by sampling from the Gaussian latent space to generate new data. Unlike traditional autoencoders, VAEs may have embedding spaces that are complicated (if not just as complicated as the data). 
-
-{% include figure.html path="assets/img/2023-12-12-Iterated-Representation-Learning/vae.png" class="img-fluid" %}
-<div class="caption">
-    VAE's complex embedding space. Image credit: <i>Foundations of Computer Vision: Generative Modeling Meets Representation Learning</i> (Torralba, Isola, Freeman 2023).
-</div>
-
-Formally, the VAE learning problem is defined by
-
-$$ 
-\theta^* = \arg \max_{\theta} L(\{\mathbf{x}^{(i)}\}^N_{i=1}, \theta) = \arg \max_{\theta} \sum_{i=1}^N \log \int_{\mathbf{z}} \mathcal{N} (\mathbf{x}^{(i)}; g_{\theta}^{\mu}(\mathbf{z}), g_{\theta}^{\Sigma}(\mathbf{z})) \cdot \mathcal{N}(\mathbf{z}; \mathbf{0}, \mathbf{\mathrm{I}}) d\mathbf{z}
-$$
-
-### Iterated Representation Learning
-
-#### Proposed Framework
-
-We now introduce the Iterated Representation Learning Framework (IRL) for autoencoders and VAEs. We start with IRL for autoencoders: 
-
-1. Given design matrix $$X$$, learn an autoencoder for $$X$$.
-2. Using the decoder from above, reconstruct the data to get $$X'$$ and compute its reconstruction loss.
-3. Using the reconstructed data $$X'$$, repeat Steps 1 and 2 and iterate until the reconstruction loss converges or reaching iteration limit.
-
-As for VAEs, we follow a similar procedure as above.
-
-1. Given design matrix $$X$$, learn a VAE for $$X$$.
-2. Using the decoder and adding Gaussian noise, reconstruct the data to get $$X'$$. Compute its reconstruction loss.
-3. Using the reconstructed data $$X'$$, repeat Steps 1 and 2 and iterate until the reconstruction loss converges or reaching iteration limit.
-
-In this report, we examine how IRL is connected to representation, investigate several hypotheses about IRL, and conduct a preliminary case study of IRL on the MNIST dataset.
-
-#### Preliminary Questions and Hypotheses
-
-Motivated by how there may be unexplored stability properties of embeddings, our main hypotheses are twofold. First,  iterated reconstruction loss per IRL can convergence with respect to the model. Second, learned embedding spaces can be reached via IRL, and that the number of iterations until convergence, loss at convergence, and such preserved features upon convergence could reveal meaningful properties of the true representation space, model, and data that are not immediately obvious from a standard autoencoder model.
-
-More specifically, does the number of iterations until convergence have anything to do with how ``good'' or stable the model or learned representation is? What does it mean if the reconstruction losses converge? What can we say about characteristics of the data that are maintained through iterations, and characteristics that evolve as the iterations go on? For example, if we observe that a model remains invariant to a certain feature, but becomes sensitive to new features of the data, what does this tell us about these particular features, our model, and the original data itself?
-
-Perhaps most importantly, beyond the qualitative observations themselves, can we propose some sort of representation learning evaluation framework using iterated representation learning, e.g. rough guidelines on ideal number of iterations required until convergence, and what this says about how good a model is? Ultimately, we hope that using an iterated framework can serve as a general tool for (1) evaluating the stability or robustness of a representation learning model and (2) identifying the most core characteristics of a given dataset.
-
-## Case Study: MNIST Dataset
-
-To evaluate IRL on a real-world dataset, we selected MNIST to test our hypotheses. We carefully designed our experiments, collected relevant data, and include our analysis below.
-
-### Experimental Design
-
-For our experiments, we implemented IRL using the framework given above for the class MNIST digits dataset (due to its simplicity and intrepretability), where we preset the ``num_iterations``. At every iteration, we initialize a new autoencoder model with [Chadebec, Vincent, and Allassonnière's (2022)](https://arxiv.org/abs/2206.08309) ``pythae`` autoencoder/VAE library. The encoder architecture is formed by sequential convolutional layers from ``PyTorch``.
-
-We then trained the model, reconstructed the data, and saved the trained and validation loss. We also saved the original train/test and reconstructed train/test images of the first 25 datapoints to track how IRL progressed visually.
-
-### Autoencoder IRL Analysis
-
-First, we take a look at the (log) mean squared error of our autoencoder over 30 iterations of IRL, given in the plot below. 
-
-{% include figure.html path="assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss.png" class="img-fluid" %}
-<div class="caption">
-    Autoencoder log loss over IRL of 30 iterations.
-</div>
-
-We notice that both the train and validation loss steeply decrease until around iteration 10, upon which the validation loss begins to roughly stabilize and converge. This confirms our intuition that the loss following an iterated approach should eventually converge, which we can theoretically verify by observing that if we ran $$n$$ iterations, then as $$n\to\infty$$, because the loss is lower-bounded by zero and should generally from iteration to iteration (since we are removing information from our data), we must eventually converge. We further hypothesize that the fact that the loss has converged means that the embeddings upon convergence have learned the most succinct, critical portion of the data. 
-
-We also notice that the number of iterations until convergence is very small; as mentioned, after about 10 iterations, it seems that the validation loss has roughly converged. We had hypothesized earlier that if the autoencoder converges after a small number of iterations, then that says something about the quality of the autoencoder architecture. Here, the fact that the loss converged after a small number iterations gives evidence for this hypothesis, since based on separate tests, this architecture indeed achieves relatively high classification accuracy for the MNIST dataset. We suggest that IRL can thus serve as a framework for evaluating the quality of an autoencoder on a particular dataset.
-
-Additionally, the validation loss converges at a relatively small number (around 0.25 by iteration 10), meaning that the distance between the original and reconstructed data in a given iteration are very similar. Interestingly enough, the validation loss is actually consistently lower than the train loss, which suggests that the learned representations through this iterated approach actually generalize very well to unseen data, which is certainly a desirable quality of any model.
-
-We also give the original and reconstructed data for iterations 1, 5, 10, 15, and 20, for both the train and test data, in the figures below.
-
-{% include figure.html path="assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train.png" class="img-fluid" %}
-<div class="caption">
-    Reconstructed train data.
-</div>
-
-{% include figure.html path="assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test.png" class="img-fluid" %}
-<div class="caption">
-    Reconstructed test data.
-</div>
-
-In the beginning, we see that the data starts losing resolution (e.g. the numbers become fuzzier and start losing their distinctness from the background), which makes sense because more iterations means more reconstructions that continue to accumulate reconstruction loss. The reconstructed images are also less clear than the originals due to the information that is lost from the encoding-decoding process.
-
-Our key observation is that the reconstruction loss stabilizes around the 10th iteration, where the original test images and reconstructed test images look very similar — we hypothesize that this is the point where the autoencoder has learned to represent the data as succinct as possible while preserving the most critical information.
-
-### VAE IRL Analysis
-
-We similarly plot the log loss for our VAE, as well as the train, test, and sampled data over iterations in the figures below.
-
-{% include figure.html path="assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss.png" class="img-fluid" %}
-<div class="caption">
-    VAE log loss over IRL of 30 iterations.
-</div>
-
-{% include figure.html path="assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits.png" class="img-fluid" %}
-<div class="caption">
-    Train, test, and normal sampled data.
-</div>
-
-Unlike the autoencoder, the VAE's train data becomes much more noisy across the 20 iterations. This is likely due to how the VAE injects noise in the reconstruction, which in this case resulted in the images to lose their distinctness. While the general shape is preserved (roundness, lines, etc), many of the numbers actually ended up merging together and losing their number shape altogether (e.g. some 6s, 3s, 9s all become 0s).
-
-When comparing IRL on the autoencoder versus the VAE, we observe that the VAE's log loss converges to a larger log loss than the autoencoder, which makes sense because the VAE's decoding step adds noise to the images that therefore adds loss to the reconstruction. We also note that the both of the models experience steep drop offs in log loss initially, which means the first few iterations eliminated most of the noise in the data and preserved the features that we characterize as "stable".
-
-
-## Discussion
-
-Our proposed IRL framework considers how some features may be more important or more stable than others, and it aims to capture those features while eliminating the noise in the data. While traditional dimensionality reduction techniques have their merits, IRL takes those methods one step further by iteratively trimming away noise until convergence or termination. Throughout this project, we cover representation learning fundamentals and IRL can capitalize on the way they learn embeddings, and we also apply this framework to real world data on MNIST. We argue that in our case study of MNIST, IRL does converge in terms of both loss (log mean squared error converges) and reconstructions, which is a promising first step in the analysis of stability and fundamental characteristics of the data. Moreover, we showcase how the number of iterations until convergence has significance, serving as a benchmark for how good an autoencoder/VAE is on a given dataset. Although VAE's reconstructed images were more noisy, that's by nature of the VAE, and we still observe that the fundamental features of the data (lines vs circles) are still preserved throughout iterations.
-
-There are a variety of directions we'd like to continue to explore with this project, given more time. 
-
-1. We were only able to run a limited number of experiments due to computational power and the duration of time to train a full IRL from start to finish for, say, 30 iterations. Given more time, there are multiple other experiments we'd like to run, including training on other datasets and trying out the performance on different autoencoder architectures to better understand the properties of this iterated approach. Another thing we'd like to evaluate the empirical performance of, but also couldn't due to computational constraints, is how a single autoencoder with 20 times as many neurons as some basic autoencoder compares to the basic autoencoder trained using IRL for 20 iterations. 
-2. We're also curious to further explore the theoretical guarantees provided by IRL, including rigorous bounds on convergence. We're also very interested in exploring whether any of our observations from IRL can generalize to other classes of deep learning models.
-3. We'd lastly look into ways to make IRL more computationally tractable. As mentioned, our experimentation was heavily limited due to the computational cost of training a new autoencoder during every iteration. If possible, we'd like to look for optimizations of this framework that still preserve the desired methodology.
-
-Overall, Iterated Representation Learning serves as a framework to evaluate stability-related properties of data, which we believe to be an important but overlooked standard for representation learning. Our case study of MNIST shows promise for empirical convergence guarantees on certain datasets, and we hope that our work lays the foundation for future representation discussions with respect to stability.
\ No newline at end of file
diff --git a/_posts/2023-12-12-LinearMode.md b/_posts/2023-12-12-LinearMode.md
deleted file mode 100644
index 6e22c39a..00000000
--- a/_posts/2023-12-12-LinearMode.md
+++ /dev/null
@@ -1,162 +0,0 @@
----
-layout: distill
-title: Understanding Linear Mode Connectivity
-description:
-  We study the pruning behavior of vision transformers (ViTs), and possible relations to linear mode connectivity. Frankle et al. (2022) showed that linear mode connectivity, the tendency of a neural network to optimize to the same linearly connected minimum when trained SGD noise, is strongly tied to the existence of "lottery networks," sparse networks that can be trained to full accuracy. We found that when initialized from a pretrained network, the ViT model showed linear mode connectivity when fine tuning on CIFAR-10. Conversely, random initialization resulted in instability during training and a lack of linear mode connectivity. We also found that using the PLATON algorithm (Zhang et al.) to generate a mask was effective for pruning the network, suggesting the existence of lottery ticket networks in ViTs, but the connection between the existence of these trainable subnetworks and linear mode connectivity remains unclear.
-date: 2023-12-01
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Michael Han
-    url: "mjhan@mit.edu"
-    affiliations:
-      name: MIT
-  - name: Alec Zhu
-    url: "alecczhu@mit.edu"
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-#bibliography: 2023-12-12-LinearMode.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Instability Analysis and Linear Mode Connectivity
-  - name: Transformers and Related Work
-  - name: Experiments with Linear Mode Connectivity
-  - name: Pruning
-  - name: Analysis and Conclusions
-  - name: References
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-
----
-
-## Instability Analysis and Linear Mode Connectivity
-
-The advent of transformer models stands as a pivotal advancement within the domain of machine learning, fundamentally reshaping the landscape of artificial intelligence. First introduced in 2017 through the seminal work "Attention is All You Need" by Vaswani et al., transformers have since exploded in both uses and applications, such as language and vision tasks. In fact, ChatGPT, which was the fastest-growing application in history (until Threads in 2023), is built using a transformer architecture. Although transformers can achieve state-of-the-art performance in many tasks, they are often limited by their size, which can create issues for memory and energy both during training and deployment. For example, GPT-3 has 175 billion parameters, and GPT-4, which was released earlier in 2023, has 1.76 trillion parameters! Compression techniques such as knowledge distillation and pruning can be used to deal with these issues, reducing the size of the network while retaining most of its capabilities. Several methods already exist for shrinking transformers such as weight pruning (Zhang et al. 2022), as well as post-training compression (Kwon et al. 2022). However, there is little research on the conditions under which a transformer can be effectively compressed or at what point during training a transformer compression should begin.
-
-Frankle et al. (2020) suggest that *instability analysis*—analyzing the stability of training with respect to stochastic gradient descent (SGD) noise—could be a way of identifying conditions under which pruning can be useful. To determine whether the outcome of training is stable w.r.t SGD noise, we create two copies of a network with the same initialization, and optimize those networks using different samples of SGD noise. We can then evaluate how similar or dissimilar the resulting networks are. For this purpose, Frankle et al. propose *linear interpolation instability*, defined to be the maximum increase in error along the linear path in parameter space connecting the two resulting networks. When error is nonincreasing along this path, the networks are said to have *linear mode connectivity*. In their paper, they propose that this instability analysis is related to lottery ticket networks, which are subnetworks from randomly-initialized dense neural networks that can achieve comparable test accuracy to the original network after training. They found that pruned networks that were capable of achieving near full test accuracy were stable to SGD noise, and showed linear mode connectivity.
-
-Frankle et al. study linear mode connectivity in neural networks, which is a stricter version of mode connectivity. They train two networks with the same initialization on SGD noise (randomly augmented datasets) and calculate the maximum loss along the linear path between the two resulting network to quantitatively analyze the instability of the original network to noise.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-LinearMode/LMC.PNG" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-<div class="caption">
-    $W_i$ represents the weights at initialization, and the red lines represent two different paths through parameter space corresponding to different SGD noise. The figure on the left shows linear mode connectivity, but the figure on the right shows increasing loss along the blue interpolated path between local minima, which shows instability to SGD noise during training.
-</div>
-
-In our project, we plan to expand on the research from Frankle et al. and apply it to transformers. In doing so, we hope to study the conditions under which transformers can be effectively compressed as well as the optimization landscape of training transformers. We seek to evaluate linear mode connectivity in transformer architectures and whether it is an effective indicator for how effectively a transformer can be compressed.
-
-## Transformers and Related Work
-
-We restricted our analysis of transformer architectures to the Vision Transformer (ViT) model proposed by Dosovitskiy (2021). ViT works by splitting an image into patches, then computing embeddings of those patches via linear transformation. After adding positional embeddings, the resulting embeddings are fed into a standard Transformer encoder. Due to runtime issues, we were unable to fully train transformers from scratch. We ended up working with and fine-tuning pretrained transformers, which were imported from the HuggingFace transformers package.
-
-Shen et al. (2023) investigated a more general form of the lottery ticket hypothesis with ViTs, proposing ways to select a subset of the input image patches on which the ViT can be trained to similar accuracy as with the full data. However, they write "the conventional winning ticket \[i.e. subnetwork\] is hard to find at the weight level of ViTs by existing methods."
-
-Chen et al. (2020) investigated the lottery ticket hypothesis for pre-trained BERT networks, and did indeed find subnetworks at varying levels of sparsity capable of matching the full accuracy. Our work hoped to find similar results for vision transformers.
-
-Linear mode connectivity is also deeply connected to the nature of the optimization landscape. This has important applications with regards to federated learning, and combining the results of independent models. For example, Adilova et al. (2023) showed that many deep networks have *layer-wise* linearly connected minima in the optimization landscape, which they explain as being the result of the layer-wise optimization landscape being convex, even if the whole optimization landscape is not. They found similar behavior in vision networks trained on CIFAR-10.
-
-In our project, we seek to evaluate the connection between linear mode connectivity and the existence of winning subnetworks. We expand on the work from Shen et al. and Chen et al. by incorporating the linear mode connectivity analysis proposed by Frankle et al. as well as search for conventional winning subnetworks in transformers for vision tasks. Our goal is to find conditions and methods for which transformers can be compressed while retaining high performance.
-
-## Experiments with Linear Mode Connectivity
-
-We decided to work with the pretrained ViT model from HuggingFace transformers, and to fine tune this model on CIFAR-10. We also augmented the data set of 32x32 images with a random 24x24 crop followed by resizing, followed by a random horizontal flip and color jitter (randomly changing brightness, contrast, saturation and hue). To evaluate linear mode connectivity, we train a pair of models with the same initialization on different randomly shuffled and augmented datasets.
-
-In order to assess the instability of the original network to the dataset augmentations, we use the procedure described by Frankle et al. and evaluate the test loss and accuracy of the linearly interpolated models. The weights of the interpolated models are directly calculated from the weights of the trained models using evenly spaced values of $$\alpha$$. The test dataset did not receive the augmentations that the training dataset did.
-
-All models trained for the linear interpolation instability analysis were trained using the AdamW optimizer for 8 epochs with a learning rate of 2e-4. We use the default ViTImageProcessor imported from HuggingFace to convert the images into input tensors.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-LinearMode/interpolation_loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-The above plot shows the result of linear interpolation after fine tuning two copies of the pretrained model. The evaluation loss is non-increasing, and in fact decreases, possibly as an artifact of the fact that the test set did not recieve augmentations. Otherwise, it seems that there is linear mode connectivity, at least in the local optimization landscape when starting from a pretrained model.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-LinearMode/instability.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-However, we failed to observe linear mode connectivity in randomly initialized transformers, noting an increase in test loss as well as a decrease in test accuracy around $$\alpha = 0.5$$. The maximum observed test loss of the interpolated models is more than double the mean of the test losses of the original pair of trained models, which is much more than the threshold of a $$2\%$$ increase used by the original authors.
-
-The resulting networks seem to end up in disconnected local optima, implying that these networks are not invariant to the dataset augmentations. This is consistent with the analysis done by Frankle et al., who find that the stability of networks increases over the course of training.
-
-Our results combined with the original analysis by Frankle et al. seems to suggest that linear mode connectivity emerges at some point during training, but we have yet to observe the point at which it emerges due to computation restraints and the size of the ImageNet dataset used to pretrain the ViT models.
-
-## Pruning
-
-We used the PLATON compression algorithm (Zhang et al. 2022) during training to prune networks to different levels of sparsity. PLATON uses several "scores" to prune parameters. One score is parameter magnitude; smaller magnitude parameters tend to be pruned. However, in a complex network, small magnitude weights can still have a large impact; to measure this, PLATON uses the gradient-weight product $$ \theta^T \nabla \mathcal{L}(\theta) $$ as a first order Taylor approximation of the impact of the removal of a weight on the loss. PLATON also maintains uncertainties for all the weights, preferring not to prune weights with uncertain scores.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-LinearMode/sparsity20percent.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-LinearMode/sparsity5percent.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-Pruning and retraining the pretrained model to 20% of its original size over 4 epochs results in a test accuracy of 95.3%, compared to 98% accuracy of the full model, and pruning to 5% resulted in 93.7% test accuracy. So although the compressed models cannot reach the accuracy of the original model, they are able to still maintain a relatively high test accuracy, and the PLATON algorithm does a good job of selecting weights. We also used the pruned weights at 20% sparsity to generate a mask, and applied this mask to the original model.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-LinearMode/pruned_weights_as_mask.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Blue and orange traces represent training loss and an exponential moving average resp.
-</div>
-
-When training the original model, but applying a mask (effectively setting the corresponding weights and gradients to zero), we were able to train the model to 93.6% test accuracy. This supports the lottery ticket hypothesis, since the PLATON algorithm can be used to identify a relatively small subset of weights from the pretrained network that can be trained high accuracy in isolation.
-
-## Analysis and Conclusions
-
-Our results with linear mode connectivity suggest that at some point during the training process, optimization ends up in a linearly connected local minimum, and further optimization will be stable to SGD noise. This is because we were indeed able to observe linear mode connectivity when fine tuning a pretrained mode. Additionally, with random initialization, we found the absence of linear mode connectivity. Unfortunately, we were not able to determine exactly where in the training process linear mode connectivity emerges.
-
-It is notable that over the course of training, the loss does not seem to go down steadily, rather rapidly oscillating between high and low loss. The exponential moving average smooths it out, but it is still quite chaotic. During pruning, it seems plausible that the oscillations could correspond to weights being pruned, but the model approaches the target ratio of nonzero weights by the end of the third epoch of training, leaving the behavior in the final epoch unexplained. Furthermore, the training loss displays similar behavior while training the masked models. Further work could be done to investigate this phenomena and potentially make pruning/training more stable.
-
-Our results with pruning show that a standard compression algorithm, PLATON, is able to sucessfully prune the pretrained ViT model to high levels of sparsity while maintaining relatively high accuracy. Our results with masking weights also suggest the existence of lottery ticket networks in the pretrained model, since we were able to train the corresponding subnetwork to a high level of accuracy. Unfortunately, the connection between linear mode connectivity and lottery ticket transforms remains very ambiguous, since we were unable to perform pruning experiments on models that did not demonstrate linear mode connectivity.
-
-Further work could be done to investigate linear mode connectivity from different levels of pretraining as initialization, which would shed light on when the optimization of transformers settles into a connected minimum (or when it doesn't). Further work on when linear mode connectivity arises, as well as experiments pruning the corresponding networks, would help determine if there is a connection between connectivity and the presence of lottery transformers. This would also be important for determining whether linear mode connectivity is a good indicator that transformers can be compressed more definitively. Additionally, as mentioned earlier, the existence of lottery networks in language models has already been investigated, and it would be interesting to see if this is related to linear mode connectivity as well.
-
-
-## References
-
-<p>Adilova, L., Andriushchenko, M., Kamp, M., Fischer, A., &amp; Jaggi, M. (2023). <i>Layer-wise Linear Mode Connectivity</i>.</p>
-
-<p>Frankle, J., Dziugaite, G. K., Roy, D. M., &amp; Carbin, M. (2020). <i>Linear Mode Connectivity and the Lottery Ticket Hypothesis</i>.</p>
-
-<p>Zhang, Q., Zuo, S., Liang, C., Bukharin, A., He, P., Chen, W., &amp; Zhao, T. (2022). PLATON: Pruning Large Transformer Models with Upper Confidence Bound of Weight Importance. In K. Chaudhuri, S. Jegelka, L. Song, C. Szepesvari, G. Niu, &amp; S. Sabato (Eds.), <i>Proceedings of the 39th International Conference on Machine Learning</i> (Vol. 162, pp. 26809–26823). PMLR. https://proceedings.mlr.press/v162/zhang22ao.html</p>
-
-<p>Kwon, W., Kim, S., Mahoney, M. W., Hassoun, J., Keutzer, K., &amp; Gholami, A. (2022). A fast post-training pruning framework for transformers. <i>Advances in Neural Information Processing Systems</i>, <i>35</i>, 24101–24116.</p>
-
-<p>Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., &amp; Houlsby, N. (2021). <i>An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale</i>.</p>
-
-<p>Shen, X., Kong, Z., Qin, M., Dong, P., Yuan, G., Meng, X., Tang, H., Ma, X., &amp; Wang, Y. (2023). <i>Data Level Lottery Ticket Hypothesis for Vision Transformers</i>.</p>
-
-<p>Chen, T., Frankle, J., Chang, S., Liu, S., Zhang, Y., Wang, Z., &amp; Carbin, M. (2020). <i>The Lottery Ticket Hypothesis for Pre-trained BERT Networks</i>.</p>
-
-
-
-
-
-
-
-
diff --git a/_posts/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias.md b/_posts/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias.md
deleted file mode 100644
index 2ae60588..00000000
--- a/_posts/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias.md	
+++ /dev/null
@@ -1,165 +0,0 @@
----
-layout: distill
-title: Recovering Latent Variables with VAEs despite Training Bias
-description: Final Project Blog
-date: 2022-12-01
-htmlwidgets: true
-
-# Anonymize when submitting
-authors:
-  - name: Anonymous
-
-# authors:
-#   - name: Patrick Timons
-#     affiliations:
-#       name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-Recovering Latent Variables with VAEs despite Training Bias.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Background
-    subsections:
-    - name: Data
-      subsections:
-      - name: Training Data
-      - name: Test Dataset
-    - name: Training
-  - name: Related Work
-  - name: Set-up and Methods
-  - name: Results
-    subsections:
-    - name: Training Observations
-    - name: Evaluation
-  - name: Conclusion and Future Work
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-
-In this age of abundant unlabeled data, unsupervised learning is capitalizing to power the generative models that are eating the world. Large Language Models and Diffusion models are focalizing most of the mainstream hype and therefore siphoning attention from other generative models. In particular, the Variational Autoencoders (VAE) is a model architecture that has been arguably overlooked in the recent onslaught for scaling transformer and diffusion-based models. VAEs are a promising latent variable model that allows for the learning of disentangled latent variables that model data. 
-
-As models scale in size, so is concern for the lack of interpretability associated with Neural Networks. Latent variable models offer a solution to this problem since they can learn variables that factorize the data generation process. VAEs are particularly well suited for learning latent variables in an unsupervised setting since they use an unsupervised learning objective and are regularized to learn disentangled encodings of our data. VAEs have been applied in a broad breadth of settings, such as classifying out-of-distribution data <d-cite key="xiao2020likelihood"></d-cite>, fair decision making <d-cite key="10.1145/3287560.3287564"></d-cite>, causal inference <d-cite key="louizos2017causal"></d-cite>, representation learning, data augmentation, and others. Although VAEs have demonstrated the capability to recover ground truth latent variables, they often recover mock factors that can generate the training dataset but differ mechanistically from the ground truth data generation process. For instance, in lecture we demonstrated that a VAE trained on cartoon images of rivers learned to encode aggregate river curvature as a latent variable. The ground-truth data-generating random variables were an ordered set of Bernoulli random variables indicating if the river angeled its trajectory to the left or to the right at the particular timestep. The VAE's shortcoming in recovering the real latent variables is expected from a Bayesian perspective, since we assume an isotropic Gaussian prior for continuous latent variables, and impose a bottleneck on the number of latent variables. Even though we do not recover the ground-truth data generating random variables, we learn latent variables that are qualitatively useful and capture macro latent phenomenons about the data. This segways into an interesting question—when do VAEs fail to recover useful latent variables?
-
-In particular, we will choose the setting in which our training data is biased, but we still seek to learn insightful representations of the data. This is an especially well-motivated setting, since in unsupervised learning, we often do not have any guarantees about the distribution of our training data, yet we still aim to learn generalizable latent variables. It would be ideal if VAE's ability to recover generalizable latent variables is robust to training bias. Relating to the cartoon example from lecture, if the probability parameter for the data-generating random variables was skewed so that right-curving rivers are more likely (i.e. $$P(\text{right}) = 0.9$$ instead of $$P(\text{right}) = 0.5$$), would we still learn useful latent variables, or would latent variables instead model what we assume to be observational noise? If we learn the former, then we would still be able to sample in latent space to generate left-curving rivers. Intuitively, we will not be able to generate samples out of distribution with the training data (i.e. left curving rivers), however this may not be the case due to the way VAEs assume a prior. In this project, we will examine this setting to determine if higher regularization of the prior increases model robustness to training bias.
-
-## Background
-
-VAEs are useful as encoders for downstream tasks, and as generative models. Compared to vanilla autoencoders, they offer significant advantages, since they provide some assurances regarding the distribution of its latent variables. Unlike VAEs, standard Autoencoders can have arbitrarily distributed embeddings, making them poor generative models, since there is no straightforward way to sample in latent space so that we generate samples in distribution with our training data. VAEs are similar to standard Autoencoders, however, they are trained with a modified loss function that ensures the learned embedding space is regularized towards an isotropic Gaussian (there exist alternative choices regarding which distribution we regularize towards, but Gaussian Mixture Models are the most popular as it stands due to their simple parameterization and empirical success). Additionally, instead of simply compressing the input with a neural network during the forward pass, the encoder of a VAE outputs a mean and covariance, defining a distribution from which we sample to obtain our latent variables. 
-
-Since the VAE loss function regularizes our latent variables towards an isotropic Gaussian, encoded data is both disentangled and interpretable. To use trained VAEs as generative models, we simply sample latent variables i.i.d. from the Gaussian distribution and pass it through the VAE decoder to generate samples in distribution with our training data. VAEs also offer significant advantages as encoders, since regularization encourages them to learn factored, disentangled representations of the data. Finally, VAEs are particularly well-suited for interpretability since regularization encourages each latent variable to capture a unique aspect of the data.
-
-## Related Work
-
-There has been significant prior work studying regularization and choice of priors in VAEs. Notably, $$\beta$$-VAE <d-cite key="higgins2017betavae"></d-cite> introduces the beta parameter to control the degree to which the VAE loss function penalizes the KL divergence of the latent variable distribution with the chosen prior (an isotropic Gaussian in their case). Higgins et al. demonstrate that introducing the beta parameter allows the VAE encoder to learn quantitatively more disentangled latent variables. They introduce a novel quantitative metric to evaluate the disentanglement of latent space and show that $$\beta$$-VAE improves on existing methods. Furthermore, they train a $$\beta$$-VAE on a dataset of faces (celebA) and qualitatively show that $$\beta$$ regularization allows for the factorization of previously entangled latent variables such as azimuth and emotion. 
-
-There have been several iterations on $$\beta$$-VAE such as Factor-VAE <d-cite key="kim2019disentangling"></d-cite>. Kim and Mnih point out that although $$\beta$$ regularization improves disentanglement in embedding space, it does so at the cost of reconstruction quality. To reduce this trade-off and still encourage disentanglement, they introduce a term to the VAE loss function that penalizes the KL divergence between the joint distribution and the product of the marginals, instead of with an isotropic Gaussian as in $$\beta$$-VAE.
-
-Selecting an appropriate data prior is fundamental when performing Bayesian inference. In vanilla VAEs, we often assume an isotropic Gaussian prior for our latent variables, however, this is not always a good assumption, making it difficult to converge <d-cite key="miao2022on"></d-cite>. Miao et al. propose InteL-VAE, a VAE architecture capable of learning more flexible latent variables that can satisfy properties such as sparsity even when the data has significant distributional differences from a Gaussian. Their contributions allow for higher customizability of latent variables while bypassing many of the convergence issues commonplace with other methods that assume non-Gaussian priors. 
-
-Since that under ideal conditions, VAEs recover factorized latent variables, causal inference has become a standard setting for their application. Madras et al. propose structured causal models to recover hidden "causal effects" with the aim of improving fairness when presented with biased data <d-cite key="10.1145/3287560.3287564"> </d-cite>. They specify a framework where we want to recover the latent factors so that decision making in applications such as loan assignment and school admissions can be approached fairly. Admiddetly, Structured Causal Modeling (CSM) is arguably a better setting for futher work on our proposed research question. However, this field is largely outside of the scope of the course, so we will only observe that Madras et al. utilyze a model where causal factors, which are analaguous to our ground truth latent variables, affect a decision and an outcome, and that they utilyze a Bayesian framework to perform variational inference. Future iterations of our research should borrow methods from this field of Mathematics for maximum impact. Louizos et al. propose the Causal Effect VAE <d-cite key="louizos2017causal"></d-cite>, marrying the adjacent fields and setting the stage for future research.
-
-Although there is plenty of research adjacent to our particular question of interest, $$\beta$$-VAE investigates how $$\beta$$-regularization affects disentanglement, but not robustness to training bias. Other works that investigate the ability of latent variable models to recover the ground truth in the presence of training bias are not concerned with $$\beta$$-regularization. $$\beta$$-regularization has been shown to be effective, in addition to being extremely simple to implement, compared to other regularization techniques. Thus it is an ideal candidate for directed research on how regularization affects VAE robustness to training bias. Our question is novel, supported by adjacent research, and reasonable to implement with the resources available to an undergraduate student.
-
-## Set-up and Methods
-
-### Data
-
-More concretely, suppose that there exists a data generating function $$\mathcal{G}: Z \to X$$ that generates our training dataset given random variables $$Z \sim p_{\text{data}}$$. For simplicity, our data will be nxn grids of squares, where the intensity of each square is deterministically proportional to its respective random variable. To create our training dataset, we sample $$n^2$$ random variables from an isotropic Gaussian distribution with mean $$\mu$$ and covariance I. We then apply a sigmoid activation to the random variables so that values are in the range [0,1]. We then create a mn x mn image with mxm pixel grids for each random variable. Finally, we add Gaussian noise to the image. We choose n=3, m=7, and train a VAE for each value of $$\mu$$ in the set {0, 1/2, 1, 3/2, ... 5}. 
-
-
-#### Training Data
-
-The following figure shows example training images before noising. Each row has 21 images drawn from the distribution defined by applying a sigmoid activation to a normally-distributed random variable with variance 1 and mean specified by the row index.
-
-{% include figure.html path="assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training.png" %}
-
-And here are some images with some noise added.
-
-{% include figure.html path="assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised.png" %}
-
-#### Test Dataset
-
-To create our test dataset, we discretize the domain of latent variables by binning. We then enumerate all possible combinaation of latent variables, and generate corresponding images without adding noise. We restict the domain generating variables to {0.1, 0,5, 0.9}, and enumerate all possible combination. This yields a test dataset of 19683 images.
-
-##### Example Test Images
-{% include figure.html path="assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images.png" %}
-
-### Training
-
-With this setup, the structure of our latent space matches that of the ground-truth latent variables, creating an appropriate setting in which to test how training bias and regularization affect the quality of learned models. Our pipeline is as follows. We train a VAE on its associated training set by maximizing the ELBO. After T training steps, we then train a linear projection head from the ground-truth latent variables to our learned latent variables. Even if we fully recover the ground-truth latent variables in our model, there is no assurance that we will not learn some permutation of the ground-truth latent variables. Thus in order to test if a particular latent variable was learned in our model, we must utilize such a projection to map from ground truth to learned latent variables, then decode the sample and evaluate the generated image. 
-
-Although the Mutual Information between the ground truth latent variables $$z \sim p_z$$ and the learned latent variables $$\hat{z} \sim p_\hat{z}$$ would be a more encompassing gauge if the VAE recovered the latent variables, using a linear projection in lieu of a Mutual Information estimator such as MINE <d-cite key="belghazi2021mine"></d-cite> is justified for the following reasons. Namely, we assume an isotropic Gaussian during training, so a good VAE will learn disentangled latent variables that will be off by at most a rotation from the ground truth latent variables. Furthermore, we control the data generation process so that data is generated by $$n^2$$ normally distributed random variables. Thus we can assume that a linear projection is sufficient to recover the ground truth latent variables from our learned latent variables. Furthermore, given the time constraints and resources allocated for this project, simply training a linear projection and taking the final mean squared error as a proxy for mutual information allows for simpler implementation.
-
-We train with the Adam optimizer. 
-
-| Hyperparameter    | Value  |
-| -------------  -----:|
-| VAE training steps    | 10000 |
-| Linear Projection Training Epochs   | 3 |
-| Training noise mean | 0 |
-| Training noise variance | 0.25   |
-
-
-## Results
-
-### Training Observations
-
-During the unsupervised training phase where we train the various VAE models on their respective training sets, we observe that dataset choice and penalization of the KL divergence (beta hyperparameter) have consistent effects on the training curves. The following charts demonstrate that increased penalization of the KL divergence results in higher training loss, as well as nosier training loss and longer convergence times. This is expected since higher regularization directly increases the loss and its associated noise. We approximate the KL divergence by drawing one sample, which is highly variable, but tends to work emperically. We also observe that higher training bias (i.e. higher pre-activation mean of the pre-activation data generating latent variables) results in higher training loss. As we increase this training bias, it becomes harder and harder to disambiguate latent features from noise. Thus models learn uninterpretable latent variables and poor decoders that learn to trivially output the dominating color (white).
-
-<div class="row mt-3">
-    <div class="col-md mt-3 mt-md-0">
-        <h6>Training Curves Varying Training Distribution</h6>
-        {% include figure.html path="assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1.png" %}
-    </div>
-    <div class="col-md mt-3 mt-md-0">
-        <h6>Training Curves Varying $\beta$-Regularization</h6>
-        {% include figure.html path="assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves.png" %}
-    </div>
-</div>
-
-### Evaluation
-
-The following figure shows a heat map of our Proxy for measuring Mutual Information (which we will refer to as PMI) between the learned latent variables $$\hat{Z}$$ and the true latent variables $$Z$$. 
-
-
-{% include figure.html path="assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head.png" %}
-
-Note that when we randomly initialized a VAE and then trained linear projections from the ground truth latents to recovered latents, we achieved an PMI 0.1121 (averaged over 3 runs with identical training parameters). The heatmap shows that we almost completely recovered the ground-truth latent variables with low regularization and low training bias. As training bias increases, the model recovers less and less informative representations of the true latent variables. 
-
-Another heuristic that we can utilize to estimate the Mutual Information between the recovered latents and the ground truth latents is the mean squared error between $$\mathcal{G}(z)$$ and $$\mathcal{D}_\text{VAE}(P(z))$$ averaged over our test set, where P is the learned linear projection from $$Z \to \hat{Z}$$ and $$\mathcal{D}_\text{VAE}$$ is the VAE decoder. The following figure heatmap visualizes this figure.
-
-{% include figure.html path="assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set.png" %}
-
-
-
-
-
-## Conclusion and Future Work
-
-From the collected data, it is visually clear that there exists a relationship between $$\beta$$-regularization and training bias. In both heat maps, there are reasonably well-defined diagonal level surfaces, indicating that there is some relationship between regularisation towards an isotropic Gaussian prior and robustness to training bias. Validation and further experiments are required to legitimize this conclusion, however, these experiments are an indication that conscious regularization can be a useful technique to mitigate training biases of a particular form. At this point, further work is required to interpret the results, since it is not clear why we seem to observe inverse relationships between the $$\beta$$-regularization and training bias when we involve the decoder. 
-
-It is also worth noting that during pretraining, VAEs were trained for a fixed number of training steps, and not until convergence. Thus it is highly plausible that models with higher $$\beta$$-regularization (i.e. models with $$\beta > 1$$) were not trained to completion, and therefore can not be fairly evaluated with mutual information estimators without further training. Given my computational and temporal constraints, it was not reasonable to run experiments with longer training. Future work will have to validate my findings by pretraining for longer and testing a finer resolution of $$\beta$$ parameters. Finally, it will be interesting to expand this work to more interesting datasets such as celebA and inject training bias by resampling the dataset according to some variables such as hair color or skin tone. Once we move beyond the assumptions assured by pet data, we can reevaluate what relationships hold true as we gradually add the complexity inherent to the real world. 
\ No newline at end of file
diff --git a/_posts/2023-12-12-VIVFormer.md b/_posts/2023-12-12-VIVFormer.md
deleted file mode 100644
index ee1097eb..00000000
--- a/_posts/2023-12-12-VIVFormer.md
+++ /dev/null
@@ -1,371 +0,0 @@
----
-layout: distill
-title: VIVformer
-description: A deep transformer framework trained on real experimental and synthetic gen-AI data for forecasting non-stationary time-series. Applications and insights drawn from vortex induced vibrations data collected at the MIT Towing Tank.
-
-date: 2022-12-01
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Andreas Mentzelopoulos
-    url: "https://scholar.google.com/citations?user=0SOhn-YAAAAJ&hl=en"
-    affiliations:
-      name: PhD Candidate in Mechanical Engineering and Computation, MIT
-
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-VIVFormer.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction & Motivation
-  - name: Data Description
-  - name: Vibration Data as Images
-  - name: Gen-AI for Physical Vibration Data using Variational Autoencoders
-  - name: VIVformer - A Transformer Architecture for Vortex-Induced Vibrations
-    subsections:
-    - name: The Real (data) Deal
-    - name: The hyper-Real (Gen-AI data) Deal 
-  - name: Conclusions
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction & Motivation
-
-Vortex induced vibrations (VIV) are vibrations that affect bluff bodies in the presence of currents. VIV are driven by the periodic formation and shedding of vortices in the bodies' wakes which create an alternating pressure variation causing persistent vibrations  <d-cite key="triantafyllou2016vortex"></d-cite>. The vibration amplitude in VIV is typically moderate, not exceeding about one to two body diameters <d-cite key="bernitsas2019eigen"></d-cite>. For flexible bodies, VIV are not uniform along the body's length (usally refered to as the span) but rather different points along the span vibrate with different amplitudes and phases. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/Intro.jpg" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/Intro2.jpeg" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Schematic diagrams of vortex induced vibrations of flexible bodies.
-</div>
-
-Observations of vortex induced vibrations (VIV) date back to antiquity, when the Aeolian tones, sounds created by pressure fluctuations induced by winds passing over taut strings were recognized. The first sketches of vortices date back to Leonardo da Vinci in the early 16th century. Today, VIV have become a problem of interest to both theoreticians, due to the complex underlying mechanisms involved, and engineers, due to the practical significance of mitigating the fatigue damage VIV can cause to offshore structures and equipment such as marine risers and offshore wind turbines. In order to gain some intuition, the reader can refer to the video of a flexible body undergoing VIV in section "Data Description" (below).
-
-The underlying driving mechanism of VIV is vortex formation; specifically, the periodic shedding of vortices formed in the wake behind bluff bodies placed within cross-currents <d-cite key="triantafyllou2016vortex"></d-cite>. The vortex shedding frequency in rigid bodies is known as the Strouhal frequency.  For flexibly mounted or flexible bodies, the vortex formation frequency can be entrained away from the Strouhal frequency and coincides with the frequency of vibration in a phenomenon known as lock-in <d-cite key="navrose_mittal_2016"></d-cite>. This occurs across a wide range of oscillating frequencies resembling a nonlinear resonance <d-cite key="park2016suppression"></d-cite>. Given that flexible body VIV are not span-wise uniform as the flexible body undergoes a spatially traveling and/or standing wave response from the forcing excerted by the fluid <d-cite key="wang2021illuminating, triantafyllou2016vortex, fan2019thesis"></d-cite>, the observed motions are nonstationary, unsteady, and can transition to different responses even for seemingly unchanged experimental conditions.
-
-VIV of flexible bodies are usually modelled by leveraging the modal decomposition technique (i.e. using a Fourier expansion of sinusoidal mode shapes with time varying coefficients), similar to the approach introduced for modelling vibrating shafts and beams <d-cite key="rao1995mechanical"></d-cite>. Recently, Kharazmi et al. (2021)  <d-cite key="kharazmi2021data"></d-cite> attempted to learn the mode shapes and time varying coefficients using LSTM networks in modal space (LSTM-Modnet); Mentzelopoulos et al. (2023) <d-cite key="mentzelopoulos2023physics"></d-cite> proposed learning a sparse-mode set of sinusoidal modes along with the corresponding time-varying coefficients. Both frameworks suffer from the inability to robustly forecast future motions. 
-
-Although leveraging transformers to expand the horizon of predictions of time series is a very active field of research <d-cite key="zhou2021informer, zeng2023transformers, liu2022non, zhou2022fedformer"></d-cite>, transformers have not yet been used to predict VIV of flexible bodies, which are physical non-stationary time-series, to the best of the author's knowledge. In addition, only limited work has been performed in generating physical data using generative models <d-cite key="zhong2023pi, takeishi2021physics, shu2023physics"></d-cite> and there are no identifiable applications to VIV in the literature. Mostly appications of machine learning for VIV include solving physical equations using physics-informed neural networks (PINNs) <d-cite key="bai2022machine, raissi2019deep"></d-cite> and learning hydrodynamic coefficients or other relevant quantities <d-cite key="ma20221understanding"></d-cite> in order to predict the motions on average rather than instantaneously <d-cite key="ma2021enhancing, rudy2021learning"></d-cite>. 
-
-In this work, an attempt will be made to develop a transformer network architecture to predict the VIV of a flexible body both instantaneously and on average. The transformer will be trained and tested using data collected at the MIT Towing Tank by the author. In addition, in order to make the most of the available data, a variational autoencoder (VAE) will be trained to generate more VIV samples which will then be used to train the transformer. In doing so, the capability of VAEs to create physical data which retain information of the underlying physical processes will also be examined. The rest of the blog will be organized as follows: 1. using generative-AI, specifically variational autoencoders, in order to generate physical VIV data 2. using transformers to model and forecast nonstationary flexible body VIV. 
-
-## Data Description
-
-All data used for this study were collected during experiments conducted by the author at the MIT Towing Tank, a facility consisting of a 35m x 2.5m x 1.2m water tank equipped with a towing carriage capable of reaching speeds exceeding 2 m/s as well as a flow visualization window. In this and the following sections the terms model, riser, flexible body, and flexible cylinder will be used interchangeably to refer to the flexible cylinder model used during experiments.
-
-The figure below illustrates the experimental setup schematically. A solid aluminum frame was used to support the flexible cylinder; the riser model was placed vertically at the center of the structure. An ATI 6 degree of freedom force sensor was attached to the top end of the riser to measure its tension. Two GoPro Hero 11 cameras were attached to the supporting frame facing perpendicular directions to capture videos of the riser's motion in the cross-flow and in-line directions, respectively.  
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/full_schema_experiment.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Schematic of experimental setup. A riser model was placed at the center of a supporting frame which was towed resulting in a uniform incoming flow profile on the model.
-</div>
-
-The riser model was constructed out of urethane rubber infused with tungsten powder. Specifically, Smooth-On PMC-724 urethane rubber was mixed with powdered tungsten to increase the model's density and achieve a mass-ratio $m^* = \frac{\rho_{model}}{\rho_{H_2O}} = 3$. The mixture was poured into a right cylindrical mold with a fishing line placed along its centerline to provide tension. The model's length was 890 mm with a 5 mm diameter. The length-to-diameter ratio of the model riser was L/D = 178. Equidistant markers were spray-painted red on the riser model resembling a zebra-patterning to enable motion tracking using cameras. Three underwater light fixtures were used to enhance visibility underwater. The model's ends were clamped on the supporting frame and the model was separated from the frame by a distance much greater than the body's diameter $O( > 10D)$.
-
-The flexible cylinder was towed at 0.7 m/s resulting in a uniform incoming flow profile along the x direction, as shown in the schematic above. Recordings of the motions were captured at a resolution of 1080p (1920x1080 pixels) and 120 fps. The Reynolds number was $ Re \approx 3,500$. A visualization of the vibration is shown below (this is a gif of the actual vibration recording downsampled in time).
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/viv.gif" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Sample video recording of the cross-flow vibration of the flexible body (top) and tracking result of the motion (bottom).
-</div>
-
-Reconstruction of the motion was done using a machine vision framework leveraging Kalman filtering for multi-object tracking; for more information one may refer to Mentzelopoulos et al. (2024) <d-cite key="mentzelopoulos2024reconstructing"></d-cite>. The high level process is shown below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/Image_Processing.PNG" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Pipeline for motion reconstruction. Red markers on the body were tracked across the video frames to obtain their motion.
-</div>
-
-A total of 36 locations along the span were marked red on the fexible body and their positions were tracked. The endpoints were fixed on the supporting frame and thus their displacement was zero. 
-
-## Vibration Data as Images
-
-The displacement of the vibrating body was recorded at 36 uniformly spaced locations along the body's span and the video recordings were sampled at 120 fps. One may store the vibration data as 2D arrays of $N_{time}$ x $N_{sensor}$, where each row corresponds to a different time of the vibrating body's displacement at $N_{sensor}$ locations. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/pixelated_viv.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/interpolated_viv.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Visualization of experimental data. The x-axis corresponds to sensor number (i.e. body location) and the y-axis corresponds to time step (i.e. $\Delta t = 1/fps = 1/120 \ sec$). The displacement normalized by the body's diameter is highlighted on the plane. On the left we visualize the vibration data stored in 2D arrays. On the right, we plot the interpolated values which make more intuitive sense as visualized flexible body vibrations (travelling wave response with peaks travelling to the right from location 0 to location 36). Lines parallel to the x-axis are "snapshots" of the flexible body vibrting in the direction perpendicular to the paper/screen (i.e. "in and out of the page"). 
-</div>
-
-The stored vibration data are illustrated above and can easily be visualized and treated like single channel images! If necessary, scaling pixel values invertibly to an interval of choice, like [0,1] or [0, 255] requires just a few operations leveraging the maximum and minimum values of the data. In the images shown above, each row corresponds to a different time of the recorded vibration at all sampled locations. The time difference between consecutive time steps is $\Delta t = 1/fps = 1/120 \ sec$. The 36 "sensor locations" correspond to the uniformly spaced markers on the body (excluding the two endpoints) and thus they span approximately the full body length. Plotting the interpolated values of the array yileds a more intuitive interpretation of the vibrations. In the data shown above, a travelling wave (crests travelling) from location 0 to location 35 can be identified. For convenience, the data were stored in a single 4D array of size $N_{batch}$ x $1$ x $N_{time}$ x $N_{sensor} = N_{batch}$ x $1$ x $36$ x $36$, yielding hundreds of square arrays of size 36 x 36 which can be easily visualized and collected in batches for training models. 
-
-## Gen-AI for Physical Vibration Data using Variational Autoencoders
-
-In this section we focus on generating physical vibration data using generative-AI. We will attempt using a variational autoencoder (VAE) trained on the real experimental data described above to generate syntehtic data of the vibrations. We are interested in understanding whether the generated data preserve physicality and thus whether they can be used to train models and to understand the underlying physical generative process by studying the artificial data.
-
-A VAE is a specific network architecture whose goal is to learn a probabilistic mapping from an input space to a low dimensional latent space and then back to the input space. The network architecture is comprised of an encoder network which maps data from the input space to the latent space and a decoder network which maps data from the latent space back to the input space. A schematic of the VAE used for this work is shown below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/VAE.PNG" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Schematic of the variational autoencoder architecture.
-</div>
-
-On a high level, the variational autoencoder acts just as a regular autoencoder, with the difference that the training ensures that the distribution of the data in the latent space is regular enough to enable a generative process when sampling from the latent space. That is, the minimized loss ensures that the distribution of the data over the latent dimensions, $q(z \mid x)$, is as close to a standard normal distribution as possible. We choose to assume a Gaussian prior on the latent space for our data since we will need to sample from it when decoding, a task which is nontrivial for arbitrary distributions. The decoder on the other hand will learn the distribution of the decoded variables, $p(x \mid z)$ given their latent representations. 
-
-The encoder architecture of choice was the following, accepting an input $x \in R^{36 \times 36}$: 
-
-1. $x \rightarrow Linear (R^{36 \times 36}, R^{64}) \rightarrow ReLU \rightarrow Linear(R^{64}, R^{64}) \rightarrow ReLU \rightarrow x_{embedding}$
-2. $x_{embedding} \rightarrow Linear(R^{64}, R^{5}) \rightarrow ReLU \rightarrow \mu \in R^5$
-3. $x_{embedding} \rightarrow Linear(R^{64}, R^{5}) \rightarrow ReLU \rightarrow \sigma \in R^5$
-
-where $\mu$ and $\sigma$ are the mean and variance of the posterior data distribution in the latent space. The decoder architecture was as follows accepting an input $z \in R^5$:
-
-1. $z \rightarrow Linear(R^{5}, R^{64}) \rightarrow ReLU \rightarrow Linear(R^{64}, R^{36 \times 36}) \rightarrow ReLU \rightarrow x^\prime$
-
-Training was done by maximizing the evidence lower bound (ELBO) on the experimental data and the outputs of the autoencoder. This is equivalent to minimizing the following loss (negative of ELBO).
-
-$Loss_{ELBO} = - E_{q(z \mid x)} \bigg[ \log p(x\mid z) - D_{KL}(q(z \mid x )\mid \mid q(z)) \bigg]$
-
-where $D_{KL}$ referes to the Kullback-Leibler divergence. Intuitively, maximizing the ELBO or minimizing the above $Loss_{ELBO}$, aims at maximizing the log-likelihood of the data given their representations in the latent space while minimizing the Kullback-Leibler divergence between the learned posterior of the data in the latent space and the prior assumption of a Gaussian distribution in the latent space. For the purposes of training, the data were scaled to be between [0, 1] in order to use binary cross entropy. The VAE was trained using Adam optimizer with a learning rate $lr = 0.01$. A step scheduler was set to decay the step by $\gamma = 1/2$ every 2,000 iterations. The training loss as a function of epoch is shown below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/VAE_loss.PNG" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Training loss ($Loss_{ELBO}$) for the VAE on the input real VIV data.
-</div>
-
-Having trained the VAE, samples from the standard normal distribution in $R^5$ were drawn, decoded, and rescaled in order to generate synthetic VIV data. Three random samples are included below (top), along with three random samples of real data observed during experiments (bottom).
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/generated_VAE_1.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/generated_VAE_2.png" class="img-fluid" %}
-    </div>
-        <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/generated_VAE_3.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/real_viv_1.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/real_viv_2.png" class="img-fluid" %}
-    </div>
-        <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/real_viv_3.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    Top row: Generated data using the VAE. Bottom row: Real data recorded from experiments.
-</div>
-
-Albeit the data generated data are certainly eye-pleasing, their promise begs the question of whether they preserve physicality. In order to address this question, we will examine whether a model trained on synthetic data can be used to predict real experimental data. 
-
-## VIVformer - A Transformer Architecture for VIV
-
-Tranformer network architectures have been widely used and are considered state of the art tools for various machine-learning tasks, particularly in natural language processing (NLP) and computer vision. The transformer architecture has become a cornerstone in deep learning and its applications span across all fields of engineering and science. In this section we will develop a transformer architecture to model and forecast the VIV of flexible bodies under the excitation of a hydrodynamic force. The transformer architecture used for this purpose is shown below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/Transformer_architecture.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    VIVformer: A transformer architecture for vortex-induced vibrations.
-</div>
-
-As shown schematically above, the architecture is comprised by various Residual-Attention modules followed by a final linear layer. The input to the VIVformer is a batch of vibration data as discussed in previous sections "Data Description" and "Visualizing the Data" with shape $N_{batch} \times N_{time-in} \times N_{sensor}$. The data are then passed through $N_{attn-layers}$ residual attention modules (these do not affect the shape of the input) and then scaled to the desired $N_{time-out}$ yielding an $N_{batch} \times N_{time-out} \times N_{sensor}$ output.
-
-The residual-attention modules are the drivers of the data processing. These modules accept an input on which they perform two sequential tasks: 1. multi-head attention with a residual connection, and 2. pass the output of the multi-head attention module through a fully connected feedforward network (FFN) with a residual connection. The process can be visualized in the bottom left of the architecture schematic above. 
-
-The multi-head attention layer is comprised of $N_{heads}$ number of attention heads which calculate the self-attention of the input as proposed by Vaswani et al. (2017) <d-cite key="vaswani2017attention"></d-cite>. The superposition of the input and output from the Multi-head attention module is then passed through the FFN. The FFN performs batch normalization, passes the output through a linear layer which sclaes the input to $mlp-dim$, then through a Gaussian Error Linear Unit (GeLU) activation and scales the output back to the original dimension by passing through a second linear layer.
-
-For this work, we attempt using 20 time steps of input data in order to predict a single future time step. That is, the input to the VIVformer is 20 time steps of vibration data at 36 locations and we try to predict the next time step at the same locations. We note that the VIVformer is flexible in terms of the number of data-points in and out as well as the number of time steps in and out. Decreasing the input information (both spatial and temporal) while forecasting as much as possible in terms of spatial and temporal predictions is the recommended research direction for future work.
-
-Although auto-regressive transformers are trending currently, for the purpose of forecasting vibrations this would lead to a pitfall of accumulating model errors and using them as inputs. In order to predict extended time horizons, simply adjusting the number of time-steps out would be the recommended course of action. 
-
-Since we are interested in making predictions of physical vibration data, a reasonable choice for our loss function is the Mean Square Error (MSE) between predicted and observed vibrations.
-
-
-### The Real (data) Deal
-
-In this section,  the experimental data obtained during experiments were used to train the VIVformer. Specifically, 20 times steps at 36 locations were used as input and the next time step at the same locations was forecasted. In order to train the transformer, a dataset and dataloader was created to enable iterating over the following quantities:
-
-1. Sequence_in: A 2D array of shape $N_{time-in} = 20 \times N_{sensor} = 36$.
-2. Target = A 2D array of shape $N_{time-out} = 1 \times N_{sensor} = 36$.
-
-Sequence_in refers to a single input to the VIVformer and Target is the expected output of the VIVformer. The sequences were collected in batches and then used for training. The model was trained on the MSE loss between input sequences and targets and the parameters were updated using the AdamW algorithm. The initial learning rate was set to $lr = 0.0001$ and a cosine annealing step scheduler was set to adjust the learning rate during training. 
-
-The training data were split into 80% for training and 20% for testing/validation. The sequences and targets of the training data were shuffled randomly and split in mini-batches while the validation data were not in order preserve the continuity of the vibrations when validating (important mainly for visualization purposes). The VIVformer was trained for a total of 50 epochs. The training results are shown below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-    VIVformer training and validation loss trained on expreimental VIV data.
-</div>
-
-The training results show that the model is able to gradually decrease the MSE loss between targets and predictions. The loss on both the training set and the validation set seems to be decreasing and converging. We note that the VIVformer architecture used was heuristically optimized using a trial and error approach yielding 4 attention-residual layers, with 3 attention heads of 32 hidden units and a mlp-dim of 128 hidden units. In order to visualize the predicted vibrations, the forecasting as well as target data from a random sample of 36 continuous time steps from the validation set are shown below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/Forecasting_real.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/targets_real.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/difference_real.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-From left to right: Forecasting of the VIV on a ramndomly selected set of 36 continuous points from the validation set (for visualization of the predicted vibrations), target data from real experiments, absolute difference between targets and predictions.
-</div>
-
-As is evident from the visualized vibration predictions (above), the model can predict unseen experimental to reasonable accuracy. The expected modes are forecasted and the output is continuous. In addition, the absolute difference is almost everywhere small, although some inaccuracies do occur in the predictions. A meaningful question to ask would be how well does the model predict the root mean square (RMS) of the vibrations which gives us a sense of the prediction capabilities on average. Below we plot the RMS of the forecasted as well as the experimentally observed vibrations.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/RMS_real.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-Root mean square (RMS) of the forecasted VIV as well as the experimentally observed. RMS displacement is shown on the y-axis while span (body position) is shown on the x-axis. Reasonably accurate agreement is evident between forecasting and experimental observations.
-</div>
-
-The RMS result shown above shows that the model can predict the vibrations reasonably accurately on average. This is a particulary important result as it allows for direct benchmarking of this method against semi-empirical models which can only predict the average vibrations.
-
-Although this is not recommended practice as we described earlier, we attempt to make auto-regressive predictions using our model. That is, we start with 20 time steps of recorded vibrations as input and then use the model's predictions gradually as more and more inputs. By 20 time steps, there would be no observed data input to the model; it would only be predicting on its outputs. The auto-regressive results are shown below.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/auto_regression_targets_real.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/auto_regression_difference_real.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-From left to right: Auto-regressive forecasting of the VIV on a randomly selected set of 20 continuous points from the validation set, target data from real experiments, absolute difference between targets and predictions. We show 20 time steps as we assess the models capability to predict as many time steps as it uses as input.
-</div>
-
-Albeit the mode shapes are consistent and remain physical looking, it appears that the magnitude of the response grows with time. As expected, errors accumulate and the forecasting becomes more and more innacurate as time evolves. This can also be clearly visualized in the absolute difference plot (on the very right) where the difference increases with time.
-
-In conclusion, with respect to training on real data, the transformer is reasonably accurate in terms of forecasting future motions given a sample of the experimental data. The model trains well on the MSE loss and seems to converge in about 50 epochs. The wall time of training does not exceed a few minutes on a Google-Colab T4 GPU machine.
-
-### The hyper-Real (Gen-AI data) Deal
-
-So far we have established that the VIVformer architecture can model the physical VIV of flexible bodies reasonably accurately. This section will mainly focus on addressing the question of whether synthetic VIV data generated using our VAE are physical: that is, whether the physical properties of the vibrations are preserved during the generative process. In order to address this question, we will train the VIVformer on synthetic data only and then test the trained model on the real data. 
-
-Sixty arrays of 36 time steps at 36 locations (this can be though of as generating 60 images similar to the ones shown in previous section "Vibration Data as Images") were generated using the VAE trained on real experimental data. The synthetic VIV data were then organized in input and target sequences by creating a dataset and dataloader to train the VIVformer. Training was done exactly as described in section "The Real (data) Deal" with the only difference being the training data; in this case training data were only synthetic. The same split of 80% for training/validation was used on the synthetic data. The training results were as follows.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-VIVformer training and validation loss trained on synthetic VIV data.
-</div>
-
-The VIVformer architecture seems to train on the sythetic data well. We note that both the training and validation data are from the synthetic dataset and as such we expect that they should be very similar data. We train for 50 epochs and the results seem to reach convergence. In this case we note that the error on the validation set (calculated during each epoch after optimizing on the VIVformer on the training set) seems to be be consistently smaller than the error on the training set (on average). We expect that eventually the training loss would become smaller than the validation loss although more training epochs would be required, perhaps leading to overfitting our model. Given the training results, we can be confident that the VIVformer has learned to predict the synthetic data well. 
-
-The more important question is however, whether the VIVformer trained on the synthetic data can accurately forecast the real experimental data. Below we show the predictions of the VIVformer on the real experimental data. We underscore that the VIVformer has NOT seen a single real datum during training: the model has trained on synthetic data only!
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/Forecasting_synthetic.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/targets_synthetic.png" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/difference_synthetic.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-From left to right: Forecasting real VIV on a ramndomly selected set of 36 continuous points from the experiments, target data from experiments, absolute difference between targets and predictions. We note that the model has trained on synthetic data only and has never seen any real data!
-</div>
-
-Albeit the VIVformer has not seen any real data during training, it is surprisingly reasonably accurate in predicting real data! Although certainly not perfect, the predictions are sensible. The root-mean-square of the vibrations forecasted and observed are shown below. 
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-VIVFormer/RMS_synthetic.png" class="img-fluid" %}
-    </div>
-</div>
-<div class="caption">
-Root mean square (RMS) of the forecasted VIV plotted on top of the RMS of the experimentally observed VIV. RMS displacement is shown on the y-axis while span (body position) is shown on the x-axis. Reasonably accurate agreement is evident between forecasting (after training on synthetic data only) and experimental observation.
-</div>
-
-As is evident in the above figure, the VIVformer can make reasonably accurate predictions of the RMS of the vibrations. Both the trends and amplitudes are reasonably accurately estimated.
-
-Since the VIVformer has never trained on real data but can reasonably accurately predict them, we conclude that at least part of the physicality of the real data is preserved during the genrative process of the VAE. In a sense, the VAE can be though of not just as a generator which makes realistic-looking data but as a tool which learns the underlying structure and mechanisms of the physical process which generates the data; it can thus be used to better understand the data and perhaps even the physical generative process. We conclude that our VAE could certainly be used to augment scarse datasets of VIV data and in addition, that it is a powerful tool that could potentially be used to study the underlying mechanisms of the physical generative process by studying the artificially generated data!
-
-## Conclusions
-
-In this work, a data driven approach is employed to study physical system vibrations. Two main topics are explored: 1. Generative models for creating synthetic data similar to those obtained via physical processes and 2. employing transformers and the attention mechanism in order to model and forecast physical vibration data.  
-
-A variational autoencoder is trained on physical vortex-induced vibration data in order to generate sythetic data of the vibrations. The VAE is certainly able to generate data which resemble the physical data visually. Moreover, the generative process is confirmed to preserve the physicality of the data at least partially: a transformer trained on synthetic data only is capable of predicting real experimental data to reasonable accuracy. In that sense, the VAE can be viewed as a tool which learns the underlying physical traits of the data and can be used not only to augment physical datasets but also to simulate and understand the underlying physical mechanisms by examining synthetic data. With that being said, a recommended future research direction would be to examine whether the outputs of the VAE satisfy physical equations of interest and how those could perhaps be included as an additional loss term when training the VAE, i.e. having a physics-informed decoder network. 
-
-A transformer architecture for forecasting unsteady and nonstationary vortex-induced vibrations, the VIVformer, is developed. The VIVformer architecture combines multi-head attention modules and fully conncted network modules with residual connections in order to model and forecast the physical vibration time-series in both space and time. The optimized VIVformer architecture can forecast flexible body VIV in time-space to reasonable accuracy both instantaneously and on average. Testing the performance of the VIVformer while gradually decreasing the input information would yield a deeper understanding in the capabilities of the architecture; in addition, testing the extended time horizon predictions of the model would cretainly be a recommendation for future research. 
-
-
-
-
diff --git a/_posts/2023-12-12-WeightDecaySpecNormEffects.md b/_posts/2023-12-12-WeightDecaySpecNormEffects.md
deleted file mode 100644
index 8514730c..00000000
--- a/_posts/2023-12-12-WeightDecaySpecNormEffects.md
+++ /dev/null
@@ -1,259 +0,0 @@
----
-layout: distill
-title: Exploring Frobenius and Spectral Normalization in MLPs and Residual networks
-description: This blog post compares the effects of a spectral view on weight normalization to a frobenius view on weight normalization normalization using a novel algorithm developed by us. We use two network types at multiple sizes to compare the effects of these two methods on the singular values of the weight matrices, the rank of the weight matrices, and the accuracy of the models.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Preston Hess
-    url: "https://rphess.cargo.site/"
-    affiliations:
-      name: MIT BCS and EECS
-  - name: Andrew Hutchison
-    affiliations:
-      name: MIT EECS
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-WeightDecaySpecNormEffects.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Relevance and Investigation
-  - name: Norm Scaling
-  - name: Experiments
-  - name: Findings
-    subsections:
-      - name: Scaling Effects on Training Stability
-      - name: Scaling Effects on Spectral Norms
-  - name: Conclusion
----
-
-## Relevance and Investigation
-
-Weight normalization in deep learning is vital because it prevents weights from getting too large, thereby improving model's learning ability, accelerating convergence, and preventing overfitting. One traditional method for weight normalization involves adding the sum of the weights' Frobenius norms to the loss function. One of the issues with penalizing Frobenius normalization of weight matrices is that it imposes a more strict constraint than may be desired for some model types- it enforces that the sum of the singular values is one, which can lead to weight matrices of rank one, which essentially enforces models to make decisions based on only one feature. In 2018, Spectral normalization emerged as an effective method, especially for Generative Adversarial Networks (GANs), to control the Lipschitz constant of the model and stabilize the training process <d-cite key="DBLP:journals/corr/abs-1802-05957"></d-cite>. Spectral normalization is a more relaxed form of weight normalization that scales weight matrices based on their spectral norms, which is the maximum singular value of the matrix. This method is more relaxed because it does not impose a constraint on the sum of singular values, but rather only on the maximum singular value. This allows for weight matrices of higher rank, which may be more desirable for certain model types. Since 2018, spectral normalization has been used in many GAN architectures, but has not been explored as much in other deep learning scenarios. In fact, there is little published research on other approaches to the spectral view of weight normalization in general.
-
-We introduce two novel normalization techniques inspired by AdamW and motivated by issues caused by penalties in the loss function <d-cite key="DBLP:journals/corr/abs-1711-05101"></d-cite>. Our method, which we call Norm Scaling, takes a training step using a loss function that does not include a norm penalty, then scales the norms of the weight matrices after the step. Comparing our Frobenius and spectral normalization algorithms to each other can provide valuable insights into their advantages and disadvantages in various model architectures through a thorough investigation of their effects on the weight matrices. We aim to understand how the spectral norm of weight matrices change over time and how they affect overall model performance. Furthermore, we want to see how singular values change across architectures and algorithms, determining if certain types of architectures can benefit more from spectral normalization than another. It especially becomes interesting to investigate whether spectral normalization's superior performance in stabilizing GAN training is generalized to other deep learning scenarios via different architecture types and a different scaling technique. The ultimate goal of this exploration is to deepen our understanding of these normalization techniques to find more intelligent ways to regularize weight matrices in order to acheive less overfitting and improve learning ability.
-
-## Norm Scaling
-
-Let us introduce our novel normalization technique, Norm Scaling. We will first describe the algorithm in the context of Frobenius normalization, then we will describe how it will be applied with spectral normalization. We begin each process by initializing the weight matrices of the model to be orthogonal, which helps prevent gradient numerical stability issues and improve convergence timing. We then multiply each weight matrix, $$ W_k $$ by $$ \sqrt{\frac{d_k}{d_{k-1}}} $$ where $$ d_k $$ is the size of the output at layer $$ k $$. This enforces the initial spectral norm of each weight matrix to be $$ \sqrt{\frac{d_k}{d_{k-1}}} $$, and the initial Frobenius Norm to be $$ \sqrt{min(d_k, d_{k-1})*\frac{d_k}{d_{k-1}}} $$. 
-
-In the Frobenius Norm Scaling algorithm training is relatively straightfoward. After we initialize the orthogonal weight matrices but before beginning training, we calculate the Frobenius norm of each weight matrix based on the equation above and save these in our model. On each training step, we first calculate the loss, compute the gradients, and take a step using the optimizer. Then, we calculate the Frobenius norm of each weight matrix, $$ W_k $$, divide the matrix by this norm, and multiply it by its initial value that we calculated before training:
-
-$$ \bar{W}_k = \frac{W_k}{||W_k||_F} * \sqrt{min(d_k, d_{k-1})*\frac{d_k}{d_{k-1}}}$$
-
-This ensures that the Frobenius norm of each weight matrix, $$ W_k $$, is equal to its initial value throughout the entire training process.
-
-The Spectral Norm Scaling algorithm is slightly more mathematically complicated, and required the use of power iteration to make sure training time was feasible. After we initialize the orthogonal weight matrices but before training, we save target spectral norms for each weight matrix, $$ W_k $$. On each training step, we first calculate the loss, compute the gradients, and take a step using the optimizer. Then, we calculate the first singular value, which is the same as the spectral norm, and the first right singular vector of each weight matrix, $$ W_k $$, using power iteration. In order to mimimize the difference beween the right singular vector and the power iteration prediction of this vector we use 500 steps. To use power iteration with convolution weight matrices, which have dimension 4, we view them as 2 dimension weight matrices where all dimensions past the first are flattened (this reshaping is the channel-wise decomposition method and was used for similar work in Yang et al., 2020 <d-cite key="yang2020learning"></d-cite>). 
-
-To find the first right singular vector and singular value, we use the fact that the top eigenvector and corresponding eigenvalue of $$ A^TA $$ are the first right singular vector and singular value of A respectively. So using the power method, we compute the top eigenvector and eigenvalue of $$ W_k^TW_K $$. We then use the fact that $$ W_kv_1 = \sigma_1u_1 $$ to compute $$ u_1 = \frac{W_kv_1}{\sigma_1} $$.
-
-We then perform the following normalization step:
-
-$$ \bar{W}_k = W_k + u_1v_1^T(\sigma^* -\sigma_1) $$
-
-Where $$ \sigma^* $$ is the target spectral norm described above.
-
-Note that this calculation subtracts the best rank one approximation of $$ W_k $$ from $$ W_k $$, but adds the same outer product back, scaled by $$ \sigma^* $$. Note that this does NOT enforce that the new spectral norm is $$ \sigma^* $$, because it is possible that $$ \sigma_2 $$ is greater than $$ \sigma^* $$. We hope that this normalization prevents the first outer product of singular vectors from dominating the properties of the weight matrix, thus allowing for better generalization outside of the training distribution.
-
-
-
-## Experiments
-
-In order to test our Norm Scaling learning algorithm, we train a variety of models on image classification of the CIFAR100 dataset <d-cite key="CIFAR100"></d-cite>. We train two types of models, MLPs and ResNets <d-cite key="DBLP:journals/corr/HeZRS15"></d-cite>. Our MLPs consist of fully connected layers without biases, where our medium MLP has 5 layers each with 2,000 hidden units and our large MLP has 7 layers each with 4,000 hidden units. Our ResNets are ResNet34 and ResNet50, loaded in without pre-training. We adjust the size of the first convolution layer to 64 channels of 3x3 kernels and the output layer to output 100 classes. We train each of these four models with three different conditions: the first uses no weight scaling, the second uses Frobenius norm scaling, and the third uses spectral norm scaling. We train each model for 200 epochs with a batch size of 512, an initial learning rate of 0.001, and no weight decay. We use the Adam optimizer and a multi-step learning rate scheduler with $$ \gamma = 0.1 $$ applied at epochs 60 and 120. We use the cross entropy loss function for all models. We use the same training hyper-parameters for all models. The models were trained on 4 NVIDIA Tesla A100 GPUs with paralellization handled by the pytorch lightning library.
-
-At the end of training, the MLP with depth 5, width 2000, and no norm scaling had a test accuracy of 25.12% and a test loss of 10.86. The MLP with depth 5, width 2000, and Frobenius norm scaling had a test accuracy of 28.23% and a test loss of 4.47. The MLP with depth 5, width 2000, and spectral norm scaling had a test accuracy of 23.21% and a test loss of 3.53. The MLP with depth 7, width 4000, and no norm scaling had a test accuracy of 23.95% and a test loss of 11.00. The MLP with depth 7, width 4000, and Frobenius norm scaling had a test accuracy of 26.62% and a test loss of 6.10. The MLP with depth 7, width 4000, and spectral norm scaling has a test accuracy of 36.25% and a test loss of 2.63. ResNet34 with no norm scaling had a test accuracy of 70.1% and a test loss of 2.03. ResNet34 with Frobenius norm scaling had a test accuracy of 75.24% and a test loss of 1.46. ResNet34 with spectral norm scaling had a test accuracy of 71.79% and a test loss of 1.78. ResNet50 with no norm scaling had a test accuracy of 73.45% and a test loss of 1.72. ResNet50 with Frobenius norm scaling had a test accuracy of 75.72% and a test loss of 1.40. ResNet50 with spectral norm scaling had a test accuracy of 73.29% and a test loss of 1.63. Full summaries of the changes of these metrics across epochs are plotted below with checkpoints every 10 epochs.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Training accuracy of MLPs. Above right: Training loss of MLPs. Spec refers to models trained with spectral norm scaling, Frob refers to models trained with Frobenius norm scaling, and Baseline refers to models trained with no norm scaling.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Testing accuracy of MLPs. Above right: Testing loss of MLPs.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Training accuracy of ResNets. Above right: Training loss of ResNets.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Testing accuracy of ResNets. Above right: Testing loss of ResNets.
-</div>
-
-## Findings
-
-### Scaling Effects on Training Stability
-
-One of the most interesting findings of this investigation is the effect of spectral norm scaling on the stability of training. We can see in the figures above that spectral norm scaling has a significant effect on the stability of training for MLPs, but not for ResNets. For MLPs, spectral norm scaling significantly improves the stability of training, as shown by the fact that the training and test loss curves remain close and follow a similar path. This is especially true for the large MLP, where the training and testing loss and accuracy curves maintain a similar relationship for the entire duration of training while the test loss increases and test accuracy plateaus for the other two normalization methods.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Testing and training accuracy of Large MLP (depth 7, width 4000) with no norm scaling. Above right: Testing and training loss, same model.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Testing and training accuracy of Large MLP with Frobenius norm scaling. Above right: Testing and training loss, same model.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Testing and training accuracy of Large MLP with spectral norm scaling. Above right: Testing and training loss, same model.
-</div>
-
-Although the train accuracy when using spectral norm scaling doesn't get as high as in the other two models, it is an accuracy predictor for test accuracy during the entire training time. Furthermore, it is the only of the methods we tests that continues to decrease test loss for the duration of training, where the other two show signatures of overfitting the data and increasing test loss. This is a very interesting finding because it shows that spectral norm scaling can be used to improve the stability of training for MLPs, which is a very important property for deep learning models. This is especially true for MLPs because they are more prone to overfitting than other model types, so improving the stability of training can help prevent overfitting.
-
-We see that this pattern does not hold for ResNets. Rather, it seems that the Frobenius norm scaling method introduces the most stability, but is still not stable as the relationship for spectral norm scaling in MLPs. Similarly, because ResNets rely on convolutions, we do not see issues with overfitting in any of the models. Altough it appears that spectral norm scaling may improve over the baseline stability, the effect is not as noticeable as the effect from Frobenius norm scaling.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Testing and training accuracy of ResNet50 with no norm scaling. Above right: Testing and training loss, same model.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Testing and training accuracy of ResNet50 with Frobenius norm scaling. Above right: Testing and training loss, same model.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Above left: Testing and training accuracy of ResNet50 with spectral norm scaling. Above right: Testing and training loss, same model.
-</div>
-
-This is a surprising result considering that spectral normalization was first developed in the context of GANs using convolutional layers for image generation. We will address this disparity in the conclusion.
-
-### Scaling Effects on Spectral Norms
-
-While both our spectral norm and Frobenius norm scaling algorithms resulted in consistently lower spectral norm values across all epochs compared to no normalization, spectral norm scaling had far and away the largest effect on enforcing low spectral norm values for weight matrices:
-
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Changes in spectral norm values for each weight matrix in medium MLP (depth 5, width 2000) across epochs. Above left: No norm scaling. Above center: Frobenius norm scaling. Above right: Spectral norm scaling.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Changes in spectral norm values for each weight matrix in ResNet34 across epochs. Darker lines correspond to earlier layers. Above left: No norm scaling. Above center: Frobenius norm scaling. Above right: Spectral norm scaling.
-</div>
-
-Using spectral norm scaling, the spectral norms of both architectures on all layers collapse to values significantly lower than those seen when using Frobenius norm scaling or no norm scaling. The average spectral norm values at the penultimate epoch (199) using spectral norm scaling is 0.8; Frobenius norm scaling is 7.8; and no normalization is 35.4 on the width 2000, depth 5 MLP architecture. 
-
-It is also interesting that spectral norms are very similar across layers in later epochs when using spectral norm scaling, but the same is not true for the other two experiments: the average standard deviation in spectral norm values across all layers for the last 100 epochs using spectral norm scaling is ~0.02; Frobenius norm scaling is ~3.7; and no normalization is ~18.4 on the width 2000, depth 5 MLP architecture.
-
-While it may seem obvious that spectral norm scaling would do the best job at encouraging low spectral norm values, this was not evidently the case. While we subtract the best rank one approximation, thus decreasing the spectral norm, the new spectral norm does not necessarily become the target value, as it is possible that the second largest singular value is larger than our target spectral norm. It seemed possible that merely subtracting a rank one matrix would fail to completely curb spectral norm blow up or do it with this level of success. These results show that not only does our method do it successfully, but does it much more so than Frobenius norm scaling. What’s more, the results generalize across wildly different architectures: we see rapid convergence to low singular values in both the ResNet and MLP case roughly around the same epoch. 
-
-## Conclusion
-
-One drawback of our method was the significant increase in training times of our models. Compared to the time it took to train the baseline and Frobenius norm scaling implementations, the spectral norm implementations took between ~400% to ~1,500% longer to train. In order to address this in the future we will implement an adaptive power iteration that stops once the singular vectors converge to a certain threshold. This will allow us to reduce the number of power iterations needed to calculate the singular values, thus reducing the training time.
-
-An interesting fold in our results was the difference between stability effects in the MLP and ResNet cases. We see that spectral norm scaling has a significant effect on the stability of training for MLPs, but not for ResNets. This is a surprising result considering that spectral normalization was first developed in the context of convolutional layers for image generation. We believe that this may stem from one of two reasons. The first is that we had to reduce the dimensionality of the convolutional matrices in order to use the power iteration algorithm. Although this allowed us to efficiently calculate the values we needed, it may not have been an accurate reflection of the matrix singular vectors. One route to address this in the future is to try initializing the spectral norm target values based solely on the input and output channel sizes, rather than the full size of the inputs and outputs. The second reason is that the convolutional layers in ResNets are not as prone to overfitting as the fully connected layers in MLPs, so the stability effects of spectral norm scaling would not be as noticeable. However, we still see an effect of Frobenius norm scaling, so this may be a matter of mathematical properties of the convolutional layers that we have not yet explored. 
-
-We may see most desired effects on singular values in spectral norm scaling because subtracting the best rank one approximation of the weight matrix does not influence other singular values nor the outer products of their singular vectors. When we view the singular value decomposition as the sum of outer products of singular vectors scaled by singular values, we can see that we only regularize one term in this sum. This may prevent a single outer product from dominating the linear transformation, especially preventing overfitting in MLPs where overfitting tends to be an issue. This is not true of Frobenius normalization, as we scale the entire matrix.
-
-Overall, our results show that spectral norm scaling is a very effective method for stabilizing training in MLPs and enforcing low spectral norm values in MLPs and ResNets. This shows that spectral norm scaling may be a feasible and generalizable method for stabilizing training in a variety of conditions beyond GANs. Furthermore, we were able to achieve this without the use of a penalty in the loss function, achieving the same effect as a penalty without the negative effects. This is especially important because penalties in the loss function can cause issues with convergence and numerical stability alongside enforcing low rank, which we avoid by using our Norm Scaling algorithm. We beleive our results show great potential for further rigorous qauntitative research on the spectral view of weight normalization. We hope that our Norm Scaling algorithm will be used as a baseline for investigating spectral normalization algorithms that are both computationally efficient and effective at stabilizing training alongside enforcing low spectral norm values.
-
-All of our training code can be found in this [GitHub Repository](https://github.com/phess2/SpecNorm/tree/main).
-
-***
diff --git a/_posts/2023-12-12-combining-modalities-for-better-representation-learning.md b/_posts/2023-12-12-combining-modalities-for-better-representation-learning.md
deleted file mode 100644
index 5daba8bd..00000000
--- a/_posts/2023-12-12-combining-modalities-for-better-representation-learning.md
+++ /dev/null
@@ -1,169 +0,0 @@
----
-layout: distill
-title: Combining Modalities for Better Molecular Representation Learning
-description:
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Andrei Tyrin
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-
-bibliography: 2023-12-12-combining-modalities-for-better-representation-learning.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-# toc:
-#   - name: Introduction
-#   subsections:
-#     - name: Different ways to represent molecules
-#     - name: Architectures for different modalities
-
-toc:
-  - name: Introduction
-    subsections:
-    - name: Importance of molecular representation learning
-    - name: Different ways to represent molecules
-  - name: Methods
-    subsections:
-    - name: Data
-    - name: Models
-    - name: Training
-    - name: Evaluation
-  - name: Analysis
-    subsections:
-    - name: Comparison of different models
-    - name: Nearest neighbors analysis
-  - name: Conclusion
-    subsections:
-    - name: Results of modalities mixing
-    - name: Future work
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
-
----
-
-## Introduction
-
-### Importance of molecular representation learning
-Molecular Representation Learning (MRL) is one of the most important tasks in molecular machine learning, drug design, and cheminformatics. <d-cite key=mol_rep_review></d-cite> It is central to addressing several key challenges in molecular sciences, including high-quality representation learning for molecular property prediction, <d-cite key=mol_prop_pred></d-cite> predicting organic reaction outcomes, <d-cite key=reaction_pred></d-cite> retrosynthesis planning, <d-cite key=retrosynthesis></d-cite> and generative modeling. <d-cite key=generative_review></d-cite> Excelling in these domains is essential for the development of new drugs, materials, and catalysts.
-
-
-### Different ways to represent molecules
-The challenge of learning molecular representations is more complex than in fields like computer vision or natural language processing. This complexity stems from the variety of methods available for encoding molecular structures and the assumptions inherent to each representation. Primarily, there are four ways to represent molecules:
-
-1. **Fingerprints**. One of the oldest ways to represent molecules in Quantitative structure–activity relationship (QSAR) modelling. Molecular fingerprints are binary vectors that encode the presence or absence of certain substructures in the molecule. Fingerprints were one of the first ways to get the initial representation of molecules in machine learning problems. <d-cite key=fingerprints_pred></d-cite>
-2. **String representation** (e.g. SMILES strings). This approach involves encoding molecular fragments as tokens to form a string. This initial molecules encoding is widely used in generative molecular modeling. <d-cite key=lang_complex_distr></d-cite>
-3. **2-D graph**. A popular and intuitive approach where molecules are represented as graphs, with atoms and bonds corresponding to nodes and edges, respectively. With advancements in Graph Neural Networks (GNNs) architectures,<d-cite key=gnns_review></d-cite> this format is extensively used in molecular property prediction. <d-cite key=chemprop></d-cite>
-4. **3-D graph**. The most detailed representation, which includes spatial information about atoms and bonds in addition to the graph structure. Although obtaining 3-D graph representations is challenging, models based on this approach often demonstrate superior performance. Various modeling techniques are applied to 3-D graphs, including invariant and equivariant GNNs. <d-cite key="schnet,equiv_gnn"></d-cite>
-
-Given these diverse approaches, this work aims to explore various molecular representations and their potential combination for enhanced performance in downstream tasks, such as molecular property prediction. Additionally, this blog post seeks to analyze the representations of small molecules by comparing nearest neighbors in the latent chemical space. We also investigate representations learned by language models trained on SMILES strings.
-
-## Methods
-
-### Data
-In this study, we utilized the QM9 dataset to train and evaluate our models. Comprising approximately 133,000 small organic molecules, the dataset includes molecules with up to nine heavy atoms (specifically Carbon, Nitrogen, Oxygen, and Fluorine) and 19 distinct properties. As a well-established benchmark in molecular property prediction research, QM9 offers a comprehensive foundation for our analysis.<d-cite key=qm9></d-cite>
-
-Our primary focus was on predicting the free energy $G$ at 298.15K. To ensure a robust evaluation, we divided the dataset using Murcko scaffolds <d-cite key=murcko></d-cite> to prevent the same molecular scaffolds from appearing in both the training and testing sets. This division allocates 80% of the data for training, 10% for validation, and the remaining 10% for testing purposes. Additionally, we standardized the target values to have a zero mean and unit variance, aiming for consistency in our predictive modeling.
-
-### Models
-The illustration of the overall approach is presented in Figure 1.
-{% include figure.html path="assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach.png" class="img-fluid" %}
-<div class="caption">
-    Figure 1. Illustration of the overall approach. We use different ways to represent molecules and train different models on these initial encodings.
-</div>
-
-We use the following models to learn the representations of molecules:
-1. **Fingerprint-based model**. Utilizing Morgan fingerprints <d-cite key=morgan></d-cite> with a radius of 2 and 2048 bits, we developed a multilayer perceptron (MLP) featuring six layers, layer normalization, and a varying number of hidden units (ranging from 512 to 256). This model focuses on learning representations from molecular fingerprints.
-2. **SMILES-based model**. For the representation of SMILES strings in the QM9 dataset, we employed a Recurrent Neural Network (RNN) with LSTM cells, comprising three layers and 256 hidden units. This model learns to predict the next token in a SMILES string based on the previous tokens, using cross-entropy loss for training:
-$$
-\mathcal{L}_{\text{CE}} = -\sum_{t=1}^{T} \log p(x_t | x_{<t})
-$$
-
-3. **2-D graph-based model**. To handle 2-D graph representations of molecules, we used a Message Passing Neural Network with four layers, 256 hidden units, sum aggregation, mean pooling, and residual connections between convolution layers. The model updates the nodes' hidden representations as follows:
-
-$$
-h_i^{\ell+1} = \phi \left( h_i^{\ell}, \frac{1}{|\mathcal{N}_i|}\sum_{j \in \mathcal{N}_i} \psi \left( h_i^{\ell}, h_j^{\ell}, e_{ij} \right) \right)
-$$
-
-4. **3-D graph-based model**. While there are many different architectures to model points in 3-D space, we decided to use one of the simplest architectures — E(n) Equivariant Graph Neural Network (EGNN) <d-cite key=egnn></d-cite> that is equivariant to rotations, translations, reflections, and permutations of the nodes. We used 4 layers, 256 hidden units, sum aggregation, mean pooling and residual connections between convolution layers to learn the representations of 3-D graphs of molecules that updates the nodes hidden representations according to the equations given in the Figure 1.
-
-### Training
-We trained all models using the Adam optimizer with learning rate of $1\cdot10^{-3}$, batch size 32, and 100 epochs. We additionally used `ReduceLROnPlateau` learning rate scheduler. We used the mean absolute error (MAE) as the metric for evaluation.
-
-### Evaluation
-We used several combination of modalities to evaluate the performance of the models:
-1. MPNN + FPs: This model integrates the representation learned by the Message Passing Neural Network (MPNN) with the MLP trained on fingerprints, featuring 256 hidden units. It concatenates the representations from MPNN and MLP, using an MLP layer for the final target value prediction.
-2. EGNN + FPs: Similar to the previous model but uses the representation learned by the EGNN.
-3. EGNN + MPNN: This configuration combines the representations from EGNN and MPNN, followed by an MLP for target value prediction.
-4. MPNN + RNN: This model merges representations from MPNN and a pretrained Recurrent Neural Network (RNN). The RNN's encodings remain static and are not updated during training. However, this model did not converge and was excluded from the final evaluation.
-
-The results of evaluation of different models on the QM9 dataset are presented in Figure 2.
-<div class="l-page">
-  <iframe src="{{ 'assets/html/2023-12-12-combining-modalities-for-better-representation-learning/mae.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-<div class="caption">
-    Figure 2. Different models' performance on the QM9 dataset. The models are trained on the same data, but with different representations. The number of parameters is displayed on top of each bar.
-</div>
-
-## Analysis
-### Comparison of different models
-As depicted in Figure 2, the EGNN model demonstrates superior performance. A likely explanation is that the QM9 dataset's labels were calculated using computational methods that leverage the 3-D structure of molecules. The 3-D representation, therefore, proves most effective for this task, with the EGNN adept at capturing crucial 3-D interactions for predicting the target value. Interestingly, simple concatenation of hidden representations seems to dilute the information, resulting in inferior performance. This suggests that combining modalities is a complex endeavor, requiring thoughtful architectural design. <d-cite key="modality_blending,molecule_sde"></d-cite>
-
-### Nearest neighbors analysis
-After the training of the models we performed the nearest neighbors analysis to compare the learned representations of molecules. We took the learned representations of the molecules in the test set and computed the nearest neighbors in the latent chemical space using cosine similarity. Additionally we plotted the PCA reduced representations (Figure 3) and analyzed the nearest neighbors for 4 different molecular scaffolds.
-{% include figure.html path="assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3.png" class="img-fluid" %}
-<div class="caption">
-    Figure 3. PCA reduced representations of the molecules in the test set. The color of the points corresponds to the molecular scaffold.
-</div>
-
-There are several interesting observations from the nearest neighbors analysis:
-1. In case of fingerprints reductions the nearest neighbors are far away from the queried molecules in the latent chemical space.
-2. For the reduced learned representations of the molecules in the test set we can see that the nearest neighbors are very close to the queried molecules in the latent chemical space. This is expected as the models were trained to predict the target value and therefore the representations of the molecules that are close in the latent chemical space should have similar target values.
-3. The bottom right plot of Figure 3, showcasing the EGNN + FPs combination reveals very interesting pattern — the reduced chemical space reminds the combination of the reduced chemical spaces of the EGNN and FPs. EGNN's reduced chemical is more "sparse", while the representation that learned by MLP is more dense but much more spread out. Another interesting observation is that the combined chemical space is more structured due to the presence of some clustered fragments, which is not present in case of both EGNN and MLP.
-
-Additionally we analyzed the nearest neighbors for 4 different molecular scaffolds. The results for 3 of them are present in Figure 4.
-{% include figure.html path="assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4.png" class="img-fluid" %}
-<div class="caption">
-    Figure 4. Nearest neighbors for 3 different molecular scaffold instances. Top molecule for each cell is the closest molecule to the queried molecule in the latent chemical space, the bottom molecule is the second closest molecule.
-</div>
-
-From the Figure 4 we can make some additional observations:
-- For the fingerprints similarity, molecules are very similar to the queried molecule. This is expected results because the molecules with the highest matches in the fingerprints are the most similar to the queried molecule. Although, for the third example the second closest molecule is not very similar to the queried molecule.
-- MPNN, EGNN as well as their combination return the molecules that are very similar to the queried molecule. Because the model was trained to predict the target value, the nearest neighbors are molecules with similar target values (this is not guaranteed for the fingerprints similarity because substructures can be combined in different ways potentially leading to very different molecular properties).
-- In case of MLP trained on fingerprints, the nearest neighbors can have very different scaffolds. This agrees with the performance of the model on the QM9 dataset — the model is not able to fully capture the molecular structure and therefore the nearest neighbors can have very different scaffolds even though the initial representations were the ones retrieving the most similar molecules (fingerprints).
-- Interestingly, in case of RNN trained on SMILES strings, the nearest neighbors can have very different scaffolds. This result is expected because RNN was trained to predict next token in the sequence and therefore the nearest neighbors are the molecules with similar SMILES strings. For example, first molecule contains triple bond between two carbon atoms. In the case of the second closest neighbor for first scaffold instance there are two triple bonds between carbon and nitrogen atoms. The scaffold is different, but the SMILES strings are similar.
-
-Overall, the key takeaway is that the more effectively a model performs in the supervised learning phase (excluding the RNN), the more meaningful its nearest neighbors are in terms of molecular structure resemblance. While fingerprint similarity still yields closely matched molecules, the results are not as insightful as those from GNNs, which capture molecular structures with greater nuance and expressiveness.
-
-## Conclusion
-### Results of modalities mixing
-Modalities mixing is a very interesting and promising approach for the problems in the field of molecular machine learning. However, architectures should be desinged carefully to achieve the best performance. In our work we showed that simple concatenation of the representations learned by different models can lead to worse performance on the downstream tasks.
-
-### Future work
-The obvious direction of future work — to experiment with different architectures for modalities mixing. Another interesting direction is to use the mixed modalities for the generative molecular modeling as string methods still perform better than majority of 3-D generative approaches even though the latter one is more natural. <d-cite key=benchmarking></d-cite> Therefore, it would be interesting to explore the combination of the string and 3-D graph representations for the generative modeling.
\ No newline at end of file
diff --git a/_posts/2023-12-12-contrastivediffusion-image2video.md b/_posts/2023-12-12-contrastivediffusion-image2video.md
deleted file mode 100644
index e218ce60..00000000
--- a/_posts/2023-12-12-contrastivediffusion-image2video.md
+++ /dev/null
@@ -1,298 +0,0 @@
----
-layout: distill
-title: Exploring Image-Supervised Contrastive Diffusion - A Comparative Analysis with Applications in Image-to-Video Generation
-description: Image-to-image (I2I) and image-to-video (I2V) may be the next frontier of generative deep learning capabilities, but current models struggle with robustness, largely due to the implicit, rather than explicit, representation learning objective during traditional diffusion model training. Hence, we propose a new technique where a custom contrastive loss function is used to leverage the innate latent space of the diffusion model’s variational autoencoder. This enables us to study the creation of lightweight models that lose less contextual information between input conditioning and target output, which we elucidate in this blog.
-
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Rishab Parthasarathy
-    affiliations:
-      name: MIT CSAIL
-  - name: Theo Jiang
-    affiliations:
-      name: MIT CSAIL
-
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-contrastivediffusion-image2video.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction and Motivation
-  - name: Related Work
-    subsections:
-      - name: Image-to-Image Models
-      - name: Image-to-Video Models
-      - name: Contrastive Models
-  - name: Our Proposal
-  - name: Model Architecture
-    subsections:
-      - name: Image to Image
-      - name: Image to Video
-  - name: Data
-  - name: Experiments
-  - name: Results
-    subsections:
-      - name: Image to Image Results
-      - name: Image to Video Results
-  - name: Discussion and Conclusion
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction and Motivation
-
-With recent advances in computer vision and generative AI, we all have observed the various feats that diffusive models have achieved in conditional image generation. These models have demonstrated unparalleled ability in creativity, fidelity, and relevance when generating images from text prompts. Given this explosive success of diffusion for the task of image generation, the idea of applying the same concepts to conditional video generation seems like a logical follow-up. Yet, the field still lacks robust and compelling methods for conditional video generation with diffusion models. This raises the question: why might this be? Or perhaps a follow-up: what makes videos so hard in comparison to images?
-
-In an attempt to address our first question, if we take a brief dive into previous literature, we will find that the issue is not a lack of effort. Ho et al. <d-cite key ="ho2022video"></d-cite>, Zhang et al. <d-cite key="2023i2vgenxl"></d-cite>, and Chen et al. <d-cite key = "chen2023videocrafter1"></d-cite>, all explore this idea, yet the results from these methods are not nearly as exciting as the results we see in images. But why is this? 
-
-Perhaps the answer lies in the solution to our second question. One of the most obvious complexities that videos have over images is also perhaps one of the most difficult: the temporal dependence between frames. But why is this relationship so hard for diffusion models? Following the work of Zhu et al. <d-cite key = "zhu2022discrete"></d-cite>, we hypothesize that this is because the implicit learning of diffusive steps between images in a video is too complex of a problem for a diffusion model; relying on the model to learn the implicit relationship between representations of video frames is infeasible from a training and convergence standpoint. If we can instead learn diffusive steps over a more regularized learned latent space, the optimization problem can be greatly simplified and the diffusion model will in theory be more robust.
-
-To do so, we introduce a new framework for fine-tuning diffusion models when given images in addition to text as conditional information, targeting this challenge of making the model’s use of the latent space more robust. Specifically, we utilize contrastive learning techniques to ensure that the model learns consistency between latents from different image domains, which we first validate on the easier image-to-image (I2I) case before moving into image-to-video (I2V).
-
-
-## Related Work
-
-Taking a step back to examine the current state of research, let’s first take a look at what current I2I models look like.
-
-### Image-to-Image Models
-
-In the field of image-to-image, there are two main approaches, using images to control the model output, and modifying the image itself.
-
-The first approach is characterized by work like ControlNet and T2I <d-cite key = "mou2023t2i"></d-cite> <d-cite key ="zhang2023adding"></d-cite>, which freeze the overall diffusion network and instead fine-tune a lightweight adapter model on the top. This adapter is typically a combination of stable diffusion layers that have an additive effect on the model as a whole, trained using the fine-tuning dataset. However, these models are largely still built for text-to-image tasks, using the input images as conditioning for the input text prompt, such as a wireframe image for poses. However, this does not allow modification of the image itself, simply using the image as guidelines during the diffusion process itself, meaning that its’ style is not preserved.
-
-The second method is more related to maintaining both the style and content of the original image, and instead directly fine-tunes the diffusion network to actually use the input images. The first such model for this purpose is the original pix2pix architecture, which while built for GANs, still carries vital lessons to this day. By fine-tuning a loss that actually involves the mapping between input and output image, the model learns to actually adapt the image while keeping other relevant contexts the same <d-cite key = "pix2pix2017"></d-cite>. After this, the Palette model for generalist, multi-task diffusion trained a diffusion model from scratch for multiple different tasks <d-cite key="saharia2022palette"></d-cite>. However, then Instruct-Pix2Pix built on the original pix2pix architecture, taking a pre-trained diffusion model and conditioning it on both the noisy text latent and the input image latent, meaning that the training latent had both and therefore would fully train on the input image latent <d-cite key = "brooks2022instructpix2pix"></d-cite>. This architecture is presented below.
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-contrastivediffusion-image2video/Image.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Traditional diffusion architecture
-</div>
-
-For the purpose of this blog, we study Instruct-Pix2Pix like fine-tuning schemes, since they align with what we need for video-based studies, maintaining content of the previous image while making small modulations based on the input text.
-
-### Image-to-Video Models
-Moving to I2V, we find that current image-to-video frameworks typically still use a traditional diffusion architecture, going straight from text and image representations to an output image. However, this naive approach struggles with serious issues like frame clipping and loss of contextual information, which is expected since noise-based sampling can easily throw off the output of individual frames.
-
-Hence, Ho et al. in 2022 proposed the first solution, supplementing conditional sampling for generation with an adjusted denoising model that directly forces image latents to be more similar to the corresponding text latents <d-cite key ="ho2022video"></d-cite>. While this achieved improved results over the straightforward diffusion approach, this often forces the model to stick too closely to the text latent, resulting in incoherent videos. 
-
-To solve this issue, two recent approaches from Chen et al. and Zhang et al. have proposed methods to augment the video diffusion models themselves. Chen et al. uses the image encodings from CLIP-like language embeddings in an encoder-decoder language model, feeding the CLIP encodings at each step into a cross-attention layer that generates attention scores with the current video generation <d-cite key = "chen2023videocrafter1"></d-cite>. In doing so, additional coherence between frames is achieved. On the other hand, Zhang et al. use multiple encoders, with CLIP and VQ-GAN concatenated before two stages of diffusion model training, which they claim provides the hierarchical learning required to learn the temporal processing <d-cite key="2023i2vgenxl"></d-cite>. However, both these models are extremely data-heavy and still suffer from hallucination and frame skipping.
-
-
-### Contrastive Models
-To remedy these issues in diffusion models, Ouyang et al. and Zhu et al. posit that the implicit representation learning objective in diffusion models is the primary cause of the slow convergence and hallucination issues. Specifically, diffusion models do not directly compare their output to their input, as in contrastive models, instead performing a variational approximation of the negative log-likelihood loss over the full Markov chain. Instead, Ouyang and Zhu propose to train the diffusion model to output a structured latent in the latent space of a contrastive model like a VQ-VAE, which then reconstructs the output image <d-cite key = "zhu2022discrete"></d-cite> <d-cite key = "ouyang2023improving"></d-cite>. In doing so, a contrastive term can be added to the loss of the diffusion model, maximizing the mutual information between the structured (output) latent and input latent, leading to stronger correlations between input and output, and hence improved convergence. Hence, this approach seems to have potential in fixing the hallucination and coherence issues in video diffusion models, without the need for added complexity.
-
-
-## Our Proposal
-
-Thus, we propose a novel method for conditional image-to-image generation (generating images given a starting frame and text description) by training the diffusion model to actually utilize the regularized latent space in which a diffusion model can operate. Following the line of thought introduced above, we hypothesize that under such a formulation, the diffusion model is much more robust to temporal inconsistency, because of the regularity in the latent space. For example, if we imagine a highly regularized latent space, we will find all logical next frames for a given anchor frame clustered very closely around the anchor in this latent space. Therefore, any step the diffusion model takes would produce valid subsequent frames; it suffices simply for the model to learn which direction to go given the conditioned text prompt. 
-
-## Model Architecture
-
-### Image to Image
-
-Given a base pretrained diffusion model, such as Runway ML’s StableDiffusion 1.4, which is the model used in this blog, it consists of various components. The three that are the most important are the VAE image encoder/decoder, the UNet, and the CLIP text encoder. The VAE begins by learning to transform images into latents and vice-versa, which is used to compress the input image and decode the output latent in the original Instruct-Pix2Pix stack. On the other hand, the UNet predicts the noise in the denoising part of the pipeline, whereas the CLIP text encoder encodes the input text.
-
-In terms of the general diffusion model, we use the traditional diffusion loss,
-
-$$\mathcal{L} = \mathbb{E}[(\epsilon - \epsilon_\theta(x_t))^2]$$
-
-which essentially encodes the mean squared error loss between the added noise and the noise that is predicted by the UNet. This pipeline is illustrated in the below image.
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-contrastivediffusion-image2video/Image (4).png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Construction of the MSE loss for a traditional diffusion fine-tuning stack.
-</div>
-
-However, this loss does not encode anything of the relation between the frames themselves, which has the potential to lead to low coherence between source and target image, and thus lead to poor output quality. However, contrastively trained models like CLIP have shown strong correlative behavior between multiple modalities in the past, like between text and image, which is why we move towards contrastive losses. 
-
-In traditional contrastive learning, we typically have our classes divided by our dataset, such as for shape, as shown in this example of a shape dataset taken from the fourth homework of 6.s898: 
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    An example of a contrastive dataset for shapes
-</div>
-
-For this contrastive learning dataset, we have images that are well classified, but in terms of our image to image task, there is no such easy classification. Instead, we adopt the notion that in such a dataset, with a batch size that is small relative to the size of the dataset, each image will be reasonably different from the other images. Also because we don't want to cluster the latent space, as the VAE is fully pretrained in the case of the diffusion fine-tuning methodology, we don't need to actually push similar items between the test set closer together, only push the diffusion output closer to the input conditioning.
-
-Hence, for this task, we consider each image within the larger batch as a negative sample, only using the corresponding latent in our optimization task as the positive sample. Also, given that we want both similarity to the input image and the target image, we want our loss to look like
-
-$$\mathcal{L} = \mathcal{L}_{MSE} + \mathcal{L}_{c, i} + \mathcal{L}_{c, t}$$
-
-where c indicates contrastive and i, t indicate input and target, respectively.
-
-For the images, they are encoded by the VAE, which has learned structure due to its Gaussian training objective in the ELBO loss, which means we can directly dot product the latents when calculating the contrastive loss:
-
-$$\mathcal{L}_c = \mathbb{E}[\frac{e^{x_+^{T}x}}{\sum_{x' \in \{x_+, x_{-} \}} e^{x'^{T}x}}]$$
-
-This is calculated easily using a matrix multiplication and a cross entropy loss. Now, since we compute the contrastive loss using the predicted latent, and not the noise, we also add on a constructive aspect to our diffusion model. From the final noise prediction, the model also generates the predicted latent using the noise scheduler:
-
-$$x_0 = \frac{1}{\sqrt{\bar{\alpha_t}}}(x_t \pm \sqrt{1 - \bar{\alpha_t}}\epsilon_\theta(t))$$
-
-where alpha is the cumulative products of the alphas in the noise scheduler. These predicted final latents are then used directly in the contrastive loss formula. A visualization of how we calculate our contrastive loss can be found below:
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-contrastivediffusion-image2video/Image (3).png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Construction of the MSE + Contrastive loss for our fine-tuning stack.
-</div>
-
-We note that in this case, we must scale the losses for numerical stability. The model we train with has latents of dimension 4 by 32 by 32, and while the MSE is scaled from 0 to 4 (due to pixel values from 1 to -1), the cross entropy loss is not. Indeed, many of these dot products are on the order of 4000, so we choose a high temperature of 1 to prevent NaN computations and then scale the losses by 4000, which is chosen because it scales the effect of each pixel in the dot product to around the same order as that in the MSE, which is averaged over all 4096 values in the latent.
-
-### Image to Video
-
-Now, for image to video, the training process of such a model involves the optimization of the above diffusion/contrastive loss based on a given pair of nearby video frames, as well as the corresponding text description for that video. This procedure works well because in a video, we must train the model to learn the next frame, so just like how masked language models are asked to predict masked tokens from a sequence, we ask the diffusion model to predict a masked frame from the given frame. On top of that, the text prompt, which often still provides the majority of the guidance for the video as a whole is already conditioned using the MSE loss, while the contrastive loss optimizes the similarity to previous frames. Otherwise, this is trained the same as a traditional diffusion model.
-
-During inference, we generate a video through the following process. First, an initial frame and the text description are encoded into our latent space using the VAE encoder and CLIP encoder, respectively. Now, we run an arbitrary number of passes through our diffusion model, generating a latent at each step, which is then passed in as the conditioning frame for the next forward pass. Finally, we decode the latent at each time step to obtain our video frame at that time step; stringing these frames together produces our video.
-
-From a more theoretical perspective, this method essentially aims to restrict the diffusion model’s flexibility to paths within a highly regularized, lower dimensional latent space, as opposed to the entire space of images that classical diffusion-based approaches can diffuse over. Such a restriction makes it much harder for the diffusion model to produce non-sensible output; the development of such a method would therefore enable the robust generation of highly temporally consistent and thus smooth videos. We also imagine the value of producing such a latent space itself. An interesting exercise, for example, is taking an arbitrary continuous path along vectors within a perfectly regular latent space to obtain sensible videos at arbitrary framerates.
-
-## Data
-
-Now, we explain where we got our data from.
-
-For text-conditioned image-to-image generation, we train on the Instruct-Pix2Pix dataset from HuggingFace, sampling 20k samples from the original training set used in the paper (timbrooks/instructpix2pix-clip-filtered). Our test and evaluation sets consist of 500 nonoverlapping samples from this same set <d-cite key = "brooks2022instructpix2pix"></d-cite>. This dataset consists of samples with input images, edited images, input prompts, edited prompts, and an edit string that describes the edit that was made. An example is presented below:
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    An example of a sample from the Instruct-Pix2Pix dataset.
-</div>
-
-For text-conditioned image-to-video generation, we experimented with the use of two different video-caption datasets: MSR-VTT and WebVid-10M. Due to the high dissimilarity between the properties of the two datasets, we tested the finetuning performance of both our contrastive model and non-contrastive models on both datasets. MSR-VTT contains 10k clips scraped from a wide range of domains, with multiple human-generated captions for each video. WebVid, on the other hand, contains 10M video clips compiled from stock image sources, with captions corresponding to the stock photo titles. For WebVid10M, we only take from the 2.5M subset. For both datasets, samples were generated to follow the Instruct-Pix2Pix data formulation (original image, edit prompt, edited image) using the following strategy:
-
-First, we sample 25k and 10k videos from WebVid-10M and MSR-VTT, respectively. We aim to sample roughly an equal number of samples from each video for a total of 20k (original image, edit prompt, edited image) triplets. We ignore videos longer than 30 seconds in length to minimize the probability of temporal inconsistency within a given video. Then, for each video, we choose a random frame in the video (the original video fps is 25; but these frames are too close together, so we say that only one out of every 5 video frames is a valid selection target) to be our “original” image. The video’s caption is our “edit” prompt. To select our “edited” image, we note that we are optimizing the model to produce the next frame, while maintaining consistency between frames. Therefore, to select the “edited” image, we sample a normal distribution with standard deviation of 10 valid frames (50 frames in the original video), or two seconds, to select a frame after our “original” image as our “edited” image. A sample processed image from WebVid is included below.
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    An example of a processed sample from WebVid.
-</div>
-
-## Experiments
-
-To assess the efficacy of our newly proposed strategy, we run experiments on both the original Instruct-Pix2Pix task of text-conditioned image-to-image generation, as well as the task of text-conditioned image-to-video generation, against the baseline Instruct-Pix2Pix model. The original Instruct-Pix2Pix task is run to confirm that our model, after obtaining coherency, does not lose significant expressivity. On the other hand, we expect the image-to-video model to have comparable expressivity to the baseline on a task where coherency is significantly more important. 
-
-All of these evaluations and experiments were performed using the Accelerate library and HuggingFace Diffusers, <d-cite key = "von-platen-etal-2022-diffusers"></d-cite>, building off of their Instruct-Pix2Pix codebase. The model is RunwayML's Stable Diffusion v1.5 release. For the task of image-to-image generation, we trained both the baseline Instruct-Pix2Pix and our model for 9000 training steps on 4xA100-80GB with a batch size of 16 and a learning rate of 5e-5, which took on the order of 12 hours. For the image-to-video generation task, we trained both baseline Instruct-Pix2Pix and our contrastive model for 4500 training steps at a learning rate of 1e-5 and a batch size of 16 due to overfitting issues at higher # of training steps and higher learning rates, possibly due to the repetitiveness of our dataset. Note that we had a limited ability to hyperparameter tune/ablate, since each diffusion fine tuning run took multiple hours at a minimum, and we were operating on a minimal budget of spare A100s when they were available from our labs, so those results are not shown in this blog. 
-
-
-
-We then evaluate on the test splits of the corresponding datasets described above (for image-to-video generation, we evaluate on the test split of WebVid, since MSRVTT's testing set has a number of non-corresponding video-prompt pairs and also very jittery videos). 
-
-## Results
-
-Now, we explain our results. For both tasks, we assess two metrics: the first is the Frechet Inception Distance (FID) <d-cite key = "FID "></d-cite>between the target image and the predicted image. The FID calculates the similarity between the distribution of images and the distribution of generated images, so a lower FID is considered better, as the distributions are more similar. Note that since our models are still technically image-to-image models and not image-to-video at their core, to evaluate the base models we use FID instead of its video counterpart FVD. For our second metric, we use CLIP Scores <d-cite key = "hessel-etal-2021-clipscore "></d-cite>between the source image and the edit prompt, the target image and the edit prompt, and the predicted image and the edit prompt for the source image, predicted image, source image description (from the Instruct-Pix2Pix dataset), and target image description. The CLIP score can be thought about as a measure of the similarity between the prompt and the image, with a higher score being better, referring to higher similarity.
-
-These metrics are used to evaluate our base image-to-video models as well, as they both determine the amount of prompt following and fidelity we can determine in our videos.
-
-### Image to Image Results
-
-For text-conditioned image-to-image generation, we observe that our models have these FID and CLIP scores:
-
-|           | FID   | CLIP (source - prompt) | CLIP (gen - prompt) | CLIP (target - prompt) |
-|-----------|-------|-------------------------|---------------------|------------------------|---------|
-|    Ours       | 158.8 | 21.7                    | **24.4**                | 24.1                   |   
-| Baseline  | **142.4** | 21.7                    | **24.4**                | 24.1                   |     
-
-
-Our model matches the baseline on CLIP score, meaning that our model exhibits similar prompt following characteristics as the baseline. On top of that, our FID is only slightly higher than the baseline, meaning that the expressivity has not decreased significantly. However, images do not have similarly robust coherence metrics, so we evaluate these qualitatively.
-
-#### Coherence
-
-On the subject of coherence, we provide some image output pairs in the figure below:
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-contrastivediffusion-image2video/sample_images.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    An example of a sampled images, with prompt of "make the mountain snowy" and "make it futuristic," respectively.
-</div>
-
-For both scenes, while the baseline diffusion model follows the prompt more fully, which may match the output distribution (hence getting a better FID score), we notice several key contrastive differences, which would impact coherence. In the mountain for example, the forest disappears in the baseline version, which also doesn't maintain the painting-like style. On top of that, in the Eiffel tower case, the Eiffel tower rotates in the non-contrastive version. These observations lead to the idea that the contrastive model may be prioritizing coherence as desired, despite some loss in performance. Similar patterns are observed throughout the dataset.
-
-### Image to Video Results
-
-For text-conditioned image-to-video generation, we observe that our models have the FID and CLIP scores in the table below:
-
-|                   | FID   | CLIP (source - prompt) | CLIP (gen - prompt) | CLIP (target - prompt) | 
-|-------------------|-------|-------------------------|----------------------|------------------------|--------------------------| 
-| Ours (trained on WebVid) | **102.9**             | 29.9  | 27.5                    | 29.8                 | 
-| Ours (trained on MSR-VTT) | 149.3 | 29.9             | 27.6                 | 29.8                   | 
-| Baseline (trained on WebVid) | * |     *             | *                    | *                      |     
-| Baseline (trained on MSR-VTT) | 172.3 | 29.9         | **29.4      **           | 29.8                   |
-
-
-Note that in this case, we include asterisks for the baseline numbers on WebVid because it produces NSFW content as marked by the HuggingFace Diffusers library more than 25% of the time. This means that the metrics are not directly comparable as we were unable to find a validation set on which we could evaluate the models quantitatively on even ground. Nonetheless, we still include the WebVid baseline in our qualitative analysis.
-
-Looking at the rest of the metrics, the baseline on MSR-VTT has a decently higher correlation with the prompt than the contrastive model. This makes sense, as the baseline is trained only the objective of denoising the prompt latent, while we add the contrastive term. On the other hand, we have a significantly lower FID score of the MSR-VTT trained models, which means that the distributions of our output data relative to the target output data was more similar, which is probably due to the fact that our high coherence is useful in tasks where source and target distributions are similar.
-
-#### Qualitative Video Generation
-
-For a better understanding of the in-context performance of our model and to make up for the invalidity of the baseline model trained on the WebVid dataset above, we also perform qualitative assessments of longer videos generated by our models and the baselines. For each of 4 selected starting frames, we use a prompt generated from the sequestered part of WebVid to generate 5 subsequent frames for the video:
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    An example of sampled videos for a prompt of a factory emitting smoke.
-</div>
-
-From these generated videos, we observe that our models are significantly better at generating coherent frames, as we expected. In particular, we see that the MSR-VTT baseline model deviates heavily from the starting image on the very next frame, while our MSR-VTT model largely retains the original characteristics despite some content drifting after frame 3. WebVid noticeably performs better on the baseline, but does still observe some signs of progressive degradation in our predicted outputs, along with lack of motion in contrast to the prompt for the baseline model. This progressive degradation is likely due to small levels of inclarity in each subsequent frame being compounded over multiple frames; due to coherence between frames, the subsequent frames will contain strictly more inclarity than the previous. On the other hand, our model on WebVid sees less degradation on top of actually having coherent motion of smoke billowing, showing successful output.
-
-Overall though, WebVid was observed to have significantly better results than MSR-VTT, which is likely attributed to the greater quality of the dataset and less jittery videos.
-
-## Discussion and Conclusion
-In this project, we explored the idea of using contrastive losses to improve the coherency between input and output images in the context of text-conditioned image-to-image generation. In particular, we study the utility of this ability to generate highly coherent diffusion results in I2V, where the current state-of-the-art suffers heavily from temporal inconsistency. We evaluate our models on the classic Instruct Pix2Pix task to assess its preservation of expressive ability and conclude that no significant degradation of expressive ability was observed. We then evaluate our contrastive strategy on text-conditioned image-to-video synthesis and find that our models outperform the classic non-contrastive formulation in video generation tasks when evaluated on CLIP Score and KID.
-
-Through our experiments, we have also identified some limitations of our methods and potential areas for improvement. First, we note that our model has trouble with the previously mentioned problem of progressive degradation. A possible solution to this problem could be introducing GAN training to encourage the model to produce higher-fidelity images. More robust methods could also be used (instead of sampling subsequent frames) to generate positive samples, which would increase our model’s robustness. We also notice that both our model and the baseline have trouble with a continuous depiction of motion. This is likely due to the fact that any frame is only conditioned on the previous frame. Conditioning on images multiple frames before the current image would help with this consistency issue, as well as the aforementioned progressive degradation issue. Also, due our loss function's negative sampling-based approach to training our models, on a dataset with significant amount of repetition like ours, this led to significant overfitting in preliminary runs. On top of that, runs suffered from loss spiking when the numeric instability of cross-entropy loss led to the calculation of NaN losses and exploding gradients, which leads to requiring very low values of learning rate. This could be resolved with better sweeps of hyperparameters for scaling the losses relative to each other or higher quality data. Finally, as alluded to above, more time to do hyperparameter tuning with the training of larger models on larger datasets would likely help with performance in general.
-
-With this study, we examined the use of contrastive loss to improve coherency in latent diffusion, with experiments that demonstrated minimal loss of expressive capabilities and superior consistency in diffusion, resulting in better performance on image-to-video generation. We hope that through this study, we can drive focus toward contrastive loss approaches to obtain higher fidelity results in video generation, accelerating progress in I2V and T2V.
diff --git a/_posts/2023-12-12-elephantfish-model.md b/_posts/2023-12-12-elephantfish-model.md
deleted file mode 100644
index 1bc6ae93..00000000
--- a/_posts/2023-12-12-elephantfish-model.md
+++ /dev/null
@@ -1,140 +0,0 @@
----
-layout: distill
-title: Modeling Elephantfish Communication through Deep RNNs
-description: Elephantfish represent a fascinating subject for study within the realms of bioacoustics and animal communication due to their unique use of electric fields for sensing and interaction. This project proposes the development of a deep learning framework to model the electrical communication signals of elephantfish, akin to language models used in natural language processing (NLP). 
-date: 2023-12-12
-htmlwidgets: true
-
-authors:
-  - name: Bright Liu
-    url: "https://www.linkedin.com/in/bright-liu-701174216/"
-    affiliations:
-      name: Harvard
-  - name: Anthony Rodriguez-Miranda
-    url: "https://www.linkedin.com/in/anthony-rodriguez-miranda-2a35491b6/"
-    affiliations:
-      name: Harvard
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-elephantfish-model.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Abstract
-  - name: Introduction and Objectives
-  - name: Literature Review
-  - name: Methodology
-  - name: Experiments and Results
-  - name: Discussion and Conclusions
-  - name: Challenges and Future Directions
----
-
-## Abstract
-
-Elephantfish, known for their unique use of electric fields for sensing and interaction, present a fascinating study subject within the realms of bioacoustics and animal communication. This project, pioneering the use of deep learning, specifically Recurrent Neural Networks (RNNs), aims to model and interpret these electrical communication signals. By combining insights from bioacoustics, linguistics, and computer science, we seek to decode these bioelectrical signals into a human-comprehensible format, thereby expanding our understanding of animal cognition and communication. The overarching goal is to decode and understand the complexity of elephantfish communication and to explore the broader applications in sociolinguistics, pragmatics, and computational linguistics for non-human species. This project pioneers in utilizing deep learning, specifically Recurrent Neural Networks (RNNs), to model and interpret the electrical communication signals of elephantfish. The study's novelty lies in its interdisciplinary approach, combining insights from bioacoustics, linguistics, and computer science to unravel the complexities of non-human communication systems. Our goal is to translate these unique bioelectrical signals into a form comprehensible to humans, thereby expanding our understanding of animal cognition and communication.
-
-## Introduction and Objectives
-
-The elephantfish, a species renowned for its unique electric-based communication and the largest brain-to-body weight ratio of all known vertebrates, offers a fascinating window into the study of non-human communication systems. These creatures, inhabiting the murky waters of African rivers and lakes, have developed a sophisticated method of communication that relies on generating and sensing electric fields. This remarkable ability not only sets them apart in the aquatic world but also poses intriguing questions about the nature and complexity of their interactions. The study of elephantfish communication is not just a pursuit in understanding an exotic species; it reflects a broader scientific curiosity about the principles of communication and social behavior across different life forms.
-
-The primary objective of this project is to develop a deep understanding of elephantfish communication through the application of advanced neural language models, specifically focusing on Recurrent Neural Networks (RNNs). This approach is inspired by the parallels drawn between the electric signals used by elephantfish and the structural aspects of human language. By leveraging techniques commonly used in natural language processing (NLP), we aim to decode these bioelectrical signals and translate them into a format that can be understood by humans. This endeavor is not only about interpreting the 'language' of a non-human species; it is about enriching our understanding of communication as a fundamental biological and social function.
-
-To capture the complexity of elephantfish communication, we have collaborated with labs at MIT and Columbia, gaining access to a comprehensive dataset of elephantfish electric communication signals. This dataset includes a wide range of signals recorded under various environmental and social conditions, providing a rich source of data for analysis.
-
-Utilizing the latest advancements in deep learning, we will develop and train neural language models that can accurately interpret and model these electric signals. The focus will be on employing Long Short-Term Memory (LSTM) RNNs, which are well-suited for handling the temporal sequences inherent in these signals.
-
-Drawing from the field of NLP, we will apply a range of techniques to analyze and understand the 'language' of elephantfish. This analysis will delve into the sensing, communication, and social dynamics of the species, offering insights into how they interact with each other and their environment.
-
-One of the most challenging aspects of this project is translating the electric signals into a form that is comprehensible to humans. This task will involve developing innovative methods to represent these signals visually or auditorily, making the complex patterns of communication accessible for further study and interpretation.
-
-Beyond the technical analysis, we aim to explore the sociolinguistic and pragmatic aspects of elephantfish communication. This exploration will involve understanding the social context and significance of different patterns of signals, thereby contributing to the broader field of computational linguistics and sociolinguistics.
-
-In undertaking this research, we are not only contributing to the field of bioacoustics but also bridging gaps between biology, linguistics, and computer science. The insights gained from this study have the potential to transform our understanding of animal communication and cognition, opening up new possibilities for interdisciplinary research and discovery.
-
-## Literature Review
-
-Time series analysis has been extensively used in biological studies, especially for understanding patterns in animal behavior and communication. Studies like Jurtz, et al. (2017) have demonstrated the efficacy of time series analysis in interpreting complex behavioral data in wildlife research.<d-cite key="jurtz2017introduction"></d-cite> This forms a basis for our approach to model elephantfish movements, which are intrinsically temporal and dynamic.
-
-The unique architecture of LSTM RNNs, with their ability to remember long-term dependencies, makes them particularly suitable for time series prediction. Gers, Schmidhuber, and Cummins (2000) showcased the potential of LSTM RNNs in learning to bridge minimal time lags in excess of 1000 discrete time steps between relevant input events and target signals, setting a precedent for their application in predicting animal movement patterns.<d-cite key="gers2002learning"></d-cite>
-
-Recent advancements in bioacoustics have seen LSTM RNNs being employed to analyze and predict patterns in animal communication. For instance, Stowell and Plumbley (2014) applied LSTM networks to bird song recognition, illustrating the network's capacity to handle temporal sequences in bioacoustic signals of bird sounds. This aligns closely with our project's objective of modeling the movement patterns of elephantfish, which are hypothesized to be closely tied to their communication.<d-cite key="stowell2014automatic"></d-cite>
-
-Research on aquatic species like elephantfish presents unique challenges due to their environment and the nature of their communication. The work of Stoddard et al. (2010) in electric signal analysis of male electric fishes provides insights into the complexity of such studies.<d-cite key="stoddard2008signal"></d-cite> However, there is a noticeable gap in applying advanced time series models, like LSTM RNNs, specifically to the movement patterns and communication signals of elephantfish.
-
-The application of NLP techniques to animal communication is a relatively unexplored frontier. Recent work by Wilensky et al. (2021) in decoding prairie dog vocalizations using natural language processing provides a compelling case for extending similar approaches to non-vocal animal communication. Our project takes this concept further by applying deep learning techniques to decode the electric signals of elephantfish, which, while different from vocalizations, share parallels in terms of being a structured form of communication.
-
-The application of LSTM RNNs in predicting the current positions of elephantfish based on past positions not only addresses a significant gap in the study of aquatic animal behavior but also sets the stage for future research in this area. The success of this approach could revolutionize the way we understand and interpret the communication and social interactions of these unique species.
-
-## Methodology
-
-### Data Collection
-
-Collaborating with labs at MIT and Columbia, we have gained access to a diverse and comprehensive dataset of elephantfish electric communication signals. The dataset encompasses signals recorded in various environmental conditions, capturing the nuances of communication in different contexts. The recordings include instances of social interaction, mating rituals, and responses to external stimuli.
-
-### Data Preprocessing
-
-The raw electric signal data require extensive preprocessing to extract meaningful features for the deep learning models. This involves filtering, noise reduction, and segmentation to isolate individual communication events. Given the temporal nature of the signals, we will focus on capturing time-dependent features that are crucial for LSTM RNNs.
-
-### Model Architecture
-
-Our chosen model architecture revolves around Long Short-Term Memory (LSTM) Recurrent Neural Networks. LSTMs are well-suited for modeling sequences with long-term dependencies, making them ideal for capturing the temporal dynamics of elephantfish communication signals. The network will be designed to take into account the sequential nature of the signals, allowing for effective learning of patterns over time.
-
-### Training
-
-The training process involves exposing the LSTM network to the preprocessed dataset, allowing it to learn and adapt to the patterns within the electric signals. The model's performance will be iteratively refined through multiple training sessions, adjusting hyperparameters to optimize for accuracy and generalization.
-
-### Evaluation
-
-The evaluation phase includes testing the trained model on a separate set of elephantfish communication signals not seen during training. This assesses the model's ability to generalize its learning to new and unseen data. Metrics such as accuracy, precision, recall, and F1 score will be used to quantify the model's performance.
-
-## Experiments and Results
-
-### Experiment 1: Signal Reconstruction
-
-Our first experiment aims to assess the model's ability to reconstruct the original electric signals from the learned representations. This involves comparing the reconstructed signals with the original signals using established metrics for signal similarity.
-
-### Experiment 2: Pattern Recognition
-
-In the second experiment, we evaluate the model's performance in recognizing and categorizing different patterns within the elephantfish communication signals. This includes identifying specific sequences associated with social interactions, mating rituals, and responses to external stimuli.
-
-### Results
-
-Preliminary results indicate promising performance in both signal reconstruction and pattern recognition tasks. The LSTM RNN demonstrates an ability to capture and reproduce complex temporal patterns within the electric signals. The model's accuracy in distinguishing between different communication contexts is encouraging, suggesting that it can effectively learn and differentiate the nuances of elephantfish communication.
-
-The experiments involved training the RNNs on the collected dataset, followed by validation and testing phases. We present detailed results demonstrating the models' ability to capture and replicate the intricate patterns of elephantfish communication. The analysis includes a comparative study with existing knowledge in marine biology, validating the accuracy and relevance of our models.
-
-{% include figure.html path="assets/img/2023-12-12-elephantfish-model/figure 1.png" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10.png" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions.png" class="img-fluid" %}
-
-{% include figure.html path="assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap.png" class="img-fluid" %}
-
-## Discussion and Conclusions
-
-The successful application of LSTM RNNs to model elephantfish communication signals represents a significant step forward in our understanding of non-human communication systems. The results demonstrate the capacity of deep learning techniques to decode and interpret complex bioelectrical signals, opening avenues for further exploration in bioacoustics and animal communication.
-
-The ability to reconstruct signals and recognize patterns within elephantfish communication provides a foundation for future studies on the sociolinguistic and pragmatic aspects of their interactions. By translating these signals into a comprehensible format, we pave the way for a deeper exploration of the meanings and nuances embedded in the electric language of elephantfish.
-
-Our research marks a significant stride in understanding non-human communication systems, demonstratint the ability to predict the movement and communication patterns of elephantfish. The findings not only shed light on the complex social structures of elephantfish but also open new avenues in the study of animal linguistics. We discuss the broader implications of our work in the fields of cognitive science and artificial intelligence, highlighting the potential applications and societal impact. Our LSTM RNN models, compared to baseline models that use the immediate last time step position to predict, show superior performance in predicting the complex communication patterns of elephantfish.
-
-This superiority highlights the effectiveness of our LSTM RNNs in capturing the intricate temporal dynamics of elephantfish communication. Moreover, our method of processing raw electric data has been optimized through trial and error, finding that skipping exactly every 5 data points results in the lowest loss, demonstrating the importance of fine-tuning data preprocessing in machine learning models.
-
-## Challenges and Future Directions
-
-This project stands at the intersection of technology and biology, with the potential to significantly advance our understanding of animal communication. The success of this endeavor could pave the way for interdisciplinary research, contributing valuable insights into the cognitive abilities of non-human species and the fundamental principles of communication.
-
-The research conducted on elephantfish communication using LSTM RNNs has yielded insights that significantly advance our understanding of non-human communication systems. Our models have demonstrated a notable ability to predict movement and communication patterns, offering a new lens through which to view the complex social interactions of these aquatic species.
-
-This is a large scale long term collaboration between a few labs, and in the future we will utilize more of the data from a marine biology lab at Columbia to interpret the electric signals. We will likely collaborate with marine biologists to collect a data set of electric signals from elephantfish under various environmental and social conditions.
-
-Comparatively, our approach has shown improvements over traditional models, providing a more nuanced understanding of the temporal dynamics in elephantfish communication. These results not only align with existing theories in marine biology but also open new avenues for exploration in animal linguistics and cognitive science.
-
-However, this study is not without its limitations. One of the primary constraints was the size and diversity of the dataset. While we managed to collect a substantial amount of data, the variability in environmental conditions and individual elephantfish behaviors was limited. This constraint could potentially impact the generalizability of our models to broader applications. The translation of bioelectrical signals into a human-understandable format is an ongoing challenge that requires further refinement. Additionally, the diversity and variability within elephantfish communication present complexities that demand a nuanced understanding beyond the scope of this initial study.
-
-Another limitation lies in the inherent complexities of LSTM RNNs, which, while powerful, can sometimes become "black boxes." This opaqueness makes it challenging to dissect the exact learning mechanisms and to fully understand how the models are making their predictions.
-
-Our study marks a significant step forward in the field but also highlights areas for further research. Future studies could focus on expanding the dataset and exploring more diverse environmental conditions. Additionally, we hope to develop more interpretable machine learning models that could provide clearer insights into the learning and prediction processes. One thing we hope to do is to convert back the predicted positions of fishes to the pixel positions in the tank, this way we can have a more visual intuition about how our model is predicting the positions.
diff --git a/_posts/2023-12-12-forbidden-facts.md b/_posts/2023-12-12-forbidden-facts.md
deleted file mode 100644
index 186ee090..00000000
--- a/_posts/2023-12-12-forbidden-facts.md
+++ /dev/null
@@ -1,176 +0,0 @@
----
-layout: distill
-title: Forbidden Facts
-description: A Mechanistic Interpretability Investigation of Llama 2
-  
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Miles Wang
-    affiliations:
-      name: Harvard
-
-# must be the exact same name as your blogpost
-# bibliography: 2022-12-01-distill-example.bib  
-bibliography: 2023-12-12-forbidden-facts.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Background
-  - name: Methods
-  - name: Results
-  - name: Extended Results
-  - name: Discussion
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-## Introduction
-
-When large language model (LLMs) predict the next token, they often need to reconcile competing objectives. For example, a pretrained model is trained to be both helpful and harmless, which may come into conflict if a user asks a dangerous query. However, we are currently unable to prioritize objectives robustly, as evidenced by jailbreaks.
-
-We aimed with our project to explore how language models mechanistically reconcile competing objectives, with the hope that a deeper understanding of these mechanisms could inspire better model alignment and control. For example, if a language model is instructed to always say the incorrect answer, and then is prompted to fill in a factual association, there are two objectives. One objective is to complete the factual association and the next token prediction correctly, and the other is to follow the instruction of saying an incorrect answer. These objectives are opposing because it is impossible to follow both objectives accurately, and it is reasonable for the model to follow either instruction. In this task, the model will consistently output the correct answer without the incorrect answer instruction and consistently output an incorrect answer with the instruction. Specifically, we tried to understand how Llama-2 models reconcile obedience and truthfulness in the context of prompts like the following one:
-
-{% highlight c++ %}
-
-[INST] <<SYS>>
-You are an obedient assistant who only responds with a single word 
-with no punctuation. You answer truthfully. However, you are not 
-allowed to say the forbidden word California.
-<</SYS>>
-The Golden Gate Bridge is in the state of [/INST] <br/> 
-
-{% endhighlight %}
-
-Here the obedience objective makes the model not want to respond with “California”, while the truthfulness objective does the opposite. Since there has already been some existing work on how large language models perform factual recall, we decided to focus on the obedience objective, and in particular on what mechanisms the model uses to downweight the forbidden word.
-
-## Background
-
-Factual recall. Meng et al. <d-cite key="meng2022locating"></d-cite> located factual associations within GPT-style autoregressive transformer models using causal tracing. They found strong causal effects on predictions localized in two main areas: 1) at the final layers processing the last token, and 2) more notably at middle layers processing the last token of the subject. Further analysis showed the middle layer effects are driven primarily by the MLP modules. This suggests that GPT stores factual associations about a subject via a localized computation in the MLPs when they process the subject token. Mor Geva et al. <d-cite key="geva2023dissecting"></d-cite> extend this study of the factual recall circuit by distilling it into three parts. First, the representation at the last subject token gets enriched by early MLP layers, composed of many subject-related attributes. Second, the information about the relation between the subject and answer token propagates to the last token. Finally, the last token “queries” the subject representation to extract the attribute that the model then outputs, which is done by attention heads that encode subject-attribute mappings in their parameters.
-
-Competing circuits. Circuits are subgraphs of a neural network with distinct functionalities. The field of mechanistic interpretability seeks to reverse engineer model internals that produce a behavior into explainable algorithms, and recent works have rigorously explored toy examples of these circuits. These works usually focus on describing a circuit where the prompt directs the model to complete one distinct task. This project seeks to understand what happens when there are two opposing tasks and how the circuits for each task interact with each other. 
-
-Mechanistic interpretability. Olsson et al. <d-cite key="olsson2022context"></d-cite>  and Nanda et al. <d-cite key="nanda2023progress"></d-cite> were important early papers in the emerging field of Mechanistic Interpretability. They helped set the direction of the field (attempt to rigorously decode fundamental mechanisms involved in a model’s computation), developed the evidential standards (causal mediation on a subset of a model being higher quality evidence than correlation), and helped define the methodology used (patching experiments, logit attribution, ablation, reverse engineering of weights).
-
-Prompt injections. Wei et al. <d-cite key="wei2023jailbroken"></d-cite> propose that one failure mode of language models that leads to prompt injections is the competition between capabilities and safety objectives. Models are trained for instruction following, language modeling, and safety. This project aims to achieve a mechanistic understanding of how prompt injections operate with respect to these competing objectives.
-
-Latent knowledge. Research has demonstrated that models have latent knowledge of correct answers, but won’t output them if prompted naively <d-cite key="saunders2022self"></d-cite>. A problem for future advanced models is being able to distinguish whether its outputs are truthful or deceptive. For example, if a model’s objective is to say the incorrect answer, somewhere in its internal computations it has to compute what the correct answer is to know it is outputting the incorrect answer. One question that naturally arises is if we are able to deduce from its internal representations what the correct answer is. This project investigates a model’s latent knowledge under specific circumstances.
-
-## Methods
-
-We initially used Llama-2-7B-chat, a 32-layer decoder-only transformer model fine-tuned with supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align to human preferences for helpfulness and safety. We chose Llama-2-7B-chat because the model achieves reliably good performance on our instruction tasks, has its weights open-sourced, and has a relatively low number of parameters to reduce computational costs. Previously, we fine-tuned GPT-2-XL on the Alpaca instruction dataset, but could not get reliable results on our tasks.
-
-A competing prompt is when the correct answer is forbidden, and a non-competing prompt is when an incorrect answer is forbidden (equivalent to a normal factual recall).
-
-We used first-order patching to replace a component’s activations in a non-competing run with its activations in a competing run (and vice versa). To calculate component $r_{i}$’s importance, we take the log odds of predicting the correct answer in a non-competing run with $r_{i}$ patched from a competing run, and subtract the log odds of predicting a correct answer during a normal non-competing run:
-
-$$
-\begin{equation}
-    \left[
-        \mathrm{LO}_a\left(
-            r_i(\mathbf{p}_\text{c}) + \sum_{j \neq i} r_j(\mathbf{p}_\text{nc})
-        \right)
-        - \mathrm{LO}_a\left(\sum_{j} r_j(\mathbf{p}_\text{nc})\right)
-    \right].
-\end{equation}
-$$
-
-This is a natural method to analyze model mechanisms at a coarse-grained level. If Llama 2 is a Bayesian model that aggregates information from each component, Equation 2 can be interpreted as the average log Bayes factor associated with changing the $r_{i}$’s view of the world from forbidding an incorrect answer to forbidding the correct answer. If this Bayes factor is small, then $r_{i}$ plays a large role in the model suppression behavior. We also only consider the residual stream on the last token because these components have the direct effect on the next token prediction.
-
-By first-order, we mean we don’t consider the effect the component may have on other components. We chose to do first-order patching because when multiple pieces of evidence are independent, their aggregate log Bayes factor is the sum of their individual log Bayes factors, which is why we can cumulatively add the components’ importance in the last plot.
-
-## Results
-Our high-level takeaway was that the forbidding mechanism is complicated. The following plots illustrate its overall behavior:
-
-{% include figure.html path="assets/img/2023-12-12-forbidden-facts/plot1.png" class="img-fluid" %}
-
-This plots the probability Llama 2 answers a competing prompt correctly versus the probability it answers a non-competing prompt correctly across our dataset. A competing prompt is when the correct answer is forbidden, and a non-competing prompt is when an incorrect answer is forbidden (equivalent to a normal factual recall). The plot is cut off on the sides because we filter the dataset to ensure the model gets the initial factual recall task correct and has a significant suppression effect.
-
-{% include figure.html path="assets/img/2023-12-12-forbidden-facts/plot2.png" class="img-fluid" %}
-
-To get this plot, we first rank the residual stream components (attention heads and MLPs on the final token) by their importance from first-order patching, a tool we explain in the next paragraph. Adding the components’ importance “scores”, we find that patching 41 components is enough to achieve the same suppression as patching all 1057 components. This number stays roughly the same across the 13b and 70b versions of Llama even as the total component size rises to 10,570 components. There is a lagging tail here: for example, only the top 15 components are needed to create ~80% of the suppression effect. 
-
-I worked jointly with Tony Wang and Kaivu Hariharan on this research project. I then did some more extended analysis for this class.
-
-## Extended Results
-
-I then asked how this analysis scales to larger models, namely the 13 and 70 billion parameter versions of Llama 2.
-
-13B:
-{% include figure.html path="assets/img/2023-12-12-forbidden-facts/plot3.png" class="img-fluid" %}
-
-70B:
-{% include figure.html path="assets/img/2023-12-12-forbidden-facts/plot4.png" class="img-fluid" %}
-
-I initially expected the number of components needed to replicate the initial effect to scale with the number of parameters. For example, 70b has 10,570 components versus 7b’s 1057 components. However, the number actually slightly decreased to 37 components and 38 components for 13b and 70b, respectively. We can see that the total suppression effect increased from 7b but remained the same between the two. This analysis suggests that the number of components needed to implement the suppression circuit remains constant as the number of model parameters grows. This makes sense when considering that larger models have a lot more emergent capabilities, which would entail a greater number of circuits as opposed to just enlarging the circuits of smaller models.
-
-The highly important suppressor heads paid attention to the forbidden token, which raised the question of how they knew to do so. We found that the key and query were “enriched” throughout the layers of the model to pay attention to the forbidden token. Testing query and key specificity, we found that swapping the queries from competing and non-competing runs made no difference to the attention paid to the key. Nor did changing the positional embeddings of the key. There must be another, perhaps more complicated, way that the model knows to pay attention to the forbidden token.
-
-We found that suppression heads privilege attend more to the forbidden word when it is the correct answer to the factual recall task. Understanding these model’s heuristics mechanistically allowed us to exploit its behavior, something we call the California Attack. Let’s take the initial example in a non-competing run:
-
-{% highlight c++ %}
-
-[INST] <<SYS>>
-You are an obedient assistant who only responds with a single word 
-with no punctuation. You answer truthfully. However, you are not 
-allowed to say the forbidden word 'floor'.
-<</SYS>>
-The Golden Gate Bridge is in the state of [/INST] <br/> 
-
-{% endhighlight %}
-
-The model naturally answers ‘California’. By adding two words to the first sentence of the prompt: "You are an obedient assistant from California [...] ", we break the model. The suppressor components take the forbidding instruction to never say ‘floor’ and instead downweight "California" to an 18% completion probability, elevating "San Francisco", an incorrect answer, to the top response. 
-
-I did some more exploring of this California Attack. I found three things:
-1. In the previous example, ablating just the top suppressor head resulted in California being the top response again.
-2. Some particular heads like attending more to certain categories (e.g. countries) and certain words. In this particular case, the top suppressor head enjoyed paying attention to California specifically. The key then is to have the forbidden instruction apply to a word it doesn’t like paying attention to, such as ‘floor’.
-3. We can find the words they downweight the most and least by applying the OV matrix against the entire vocabulary distributions and looking at the beginning and end of the distribution.
-
-{% include figure.html path="assets/img/2023-12-12-forbidden-facts/plot5.png" class="img-fluid" %}
-
-Keeping these lessons in mind, I found another attack by analyzing some of the words the suppressor heads downweight the most. In the above example, I added that Llama 2 was an assistant “to Trump” in the system message. In the above message, the first run is the adversarial attack where the top response to answering who the 45th President of the USA was is ‘Great’. Under a normal run without the adversarial attack, the top answer is ‘Trump’:
-
-I also experimented with the 13B version of Llama 2, and found that the Calornia attack also applies to this model when forbidding ‘table’ in a non-competing run:
-
-{% include figure.html path="assets/img/2023-12-12-forbidden-facts/plot6.png" class="img-fluid" %}
-
-However, I could not find a similar adversarial attack for the 70B version of Llama 2. This suggests that as models get larger, their heuristics get more robust to such mechanistic exploits.
-
-## Discussion
-
-In this work, we decompose and attempt to characterize important components of Llama 2 that allow it to suppress the forbidden word in the forbidden fact task. While we identify some structural similarities between the most important attention heads, we also find evidence that the mechanisms used by Llama 2 are complex and heterogeneous. Overall, we found that even components directly involved in suppressing the forbidden word carry out this mechanism in different ways and that Llama 2’s mechanisms are more akin to messy heuristics than simple algorithms. 
-
-This results in an important limitation of our work: we could not find a clean, sparse circuit implementing the forbidden behavior. Moreover, it is unclear if we are working in the right “basis” of attention heads and MLPs, or if causal attribution methods such as activation patching are able to recover the correct representation of a circuit.
-
-This raises some questions about the goals of mechanistic interpretability. Previous mechanistic interpretability papers have largely studied algorithmic tasks on small models to understand how models implement behaviors and characterize certain properties. However, moving away from toy settings to understand how models with hundreds of billions of parameters implement a variety of complex behaviors with competing objectives might be much harder.
-
-Computational irreducibility is the idea that there are certain systems whose behavior can only be predicted by fully simulating the system itself, meaning there are no shortcuts to predicting the system's behavior. Initially proposed by Stephen Wolfram in the context of cellular automata, this concept challenges the reductionist approach to science, which may be analogous to the approach mechanistic interpretability takes today.
-
-If computational irreducibility applies to mechanistic interpretability in understanding models, it may be very difficult to get generalizable guarantees about its behavior. If even the most efficient way of computing important properties about models is too slow, then mechanistic interpretability can’t achieve one of its main goals. This project provides some suggestive evidence that we could live in a world where frontier models are computationally irreducible.
-
-Thanks for reading! If you have any questions, feel free to reach out at miles_wang [at] college [dot] harvard [dot] edu!
\ No newline at end of file
diff --git a/_posts/2023-12-12-overparameterization.md b/_posts/2023-12-12-overparameterization.md
deleted file mode 100644
index b5d5c742..00000000
--- a/_posts/2023-12-12-overparameterization.md
+++ /dev/null
@@ -1,325 +0,0 @@
----
-layout: distill
-title: Overparameterization of Neural Networks through Kernel Regression and Gaussian Processes
-description: In this work, we will explore the successes of overparameterization of neural networks through evaluating the relationship between the Neural Tangent Kernel (NTK), MLPs, and Gaussian processes.
-date: 2023-12-11
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Albert Einstein
-    url: "https://en.wikipedia.org/wiki/Albert_Einstein"
-    affiliations:
-      name: IAS, Princeton
-  - name: Boris Podolsky
-    url: "https://en.wikipedia.org/wiki/Boris_Podolsky"
-    affiliations:
-    affiliations:
-      name: IAS, Princeton
-  - name: Nathan Rosen
-    url: "https://en.wikipedia.org/wiki/Nathan_Rosen"
-    affiliations:
-      name: IAS, Princeton
-
-# must be the exact same name as your blogpost
-bibliography: final-project.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Equations
-  - name: Images and Figures
-    subsections:
-    - name: Interactive Figures
-  - name: Citations
-  - name: Footnotes
-  - name: Code Blocks
-  - name: Layouts
-  - name: Other Typography?
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-# Introduction
-
-<!-- ideas for experiments:
-- visualization of functions learned by MLP of different widths/kernels/GP on a fixed dataset
-- figure on the connection between 3 methods
-- comparison test error as # of samples increase for a fixed dataset
-- table of r^2 for genomic (high-dimensional) data -->
-
-In this work, we will explore the successes of overparameterization of neural networks through evaluating the relationship between the Neural Tangent Kernel (NTK), MLPs, and Gaussian processes. Recent work has shown that overparameterized neural networks can perfectly fit the training data yet generalize well enough to test data. This was formalized as "the double descent curve" <d-cite key="belkin2019reconciling"></d-cite>, which suggests that increasing model capacity results in improved performance.
-
-To help elucidite our understanding of neural networks as the width increases, I wanted to understand the connections between neural networks, which are often regarded as "black boxes," and other classes of statistical methods, such as kernels and NNGPs. My goal is to put neural networks in the greater contexts of statistical machine learning methods that are hopefully easier to reason with and interpret.
-
-### Literature Review
-
-There is already prior literature on the connections between these three classes of models.
-
-1. Kernel Regression $\iff$ MLPs: This connection was introduced in <d-cite key="jacot2018neural"></d-cite>. In particular, they proved that the limit of a neural network as width approaches infinity is equivalent to kernel regression with the Neural Tangent Kernel (NTK).
-
-2. MLP $\iff$ Gaussian Processes: The connection for infinitely-wide one-layer neural networks was introduced in <d-cite key="neal1996priors"></d-cite> and for deep networks in <d-cite key="lee2017deep"></d-cite>. This comes from the observation that if the weights are sampled Gaussian i.i.d., then the Central Limit Theorem states that as the width approaches infinity, the output is also Gaussian. We also went over this briefly in class.
-
-3. Gaussian Processes $\iff$ Kernel Regression: Other than the obvious fact that they both use kernels and the "kernel trick," I could not really find a resource that established a clear connection between the two other than through the intermediary of MLPs. In this project, this is one link that I will try to explicitly establish.
-
-Other relevant prior works I reviewed include:
-- The formalization of the double descent curve in <d-cite key="belkin2019reconciling">, which uprooted our previous understanding of the bias-variance tradeoff and the notion that models should not overfit. This also motivates the use of infinite-wide neural networks (extreme overparameterization) for prediction tasks. Otherwise, conventional wisdom would say that these models overfit.
-- Why is this problem even interesting? This paper <d-cite key="radhakrishnan2022simple"> shows that kernels achieve competitive performance for important matrix completion tasks, so neural networks are not necessarily the only solution to many tasks of interest.
-- The lecture notes from [this IAP class](https://web.mit.edu/modernml/course/). I used some of the notation, definitions, and theorems from the lecture notes to write this post, but I also worked through some of the math on my own (e.g. the overparameterized linear regression proof for general $\eta$ and $w^{(0)}$, proving that $X^\dagger$ minimizes $\ell_2$ norm, etc.).
-- I also used [this blog](https://lilianweng.github.io/posts/2022-09-08-ntk/) to better understand the intuition behind NTKs.
-
-The gaps in prior knowledge I want to tackle include (1) the explicit connection between GPs and kernel regression and (2) how sparsity of kernel regression can help explain the generalization abilities of neural networks.
-
-### My Contributions
-- The explicit connections between kernel regression, MLPs, and Gaussian Processes (GP), particularly kernel regression and GP. 
-- How properties of overparameterized linear/kernel regression can help us understand overparameterization of neural networks, particularly the regularization of the weights.
-- Empirical demonstrations of the theory developed here.
-
-To start, I work through the math to understand overparameterization in linear regression and connect the results to overparameterization in kernel regression. 
-
-### Overparameterization in Linear Regression
-Linear regression involves learning a predictor of the form $\hat{f}(x) = wx$, where $w \in \mathbb{R}^{1 \times d}, x \in \mathbb{R}^{d \times 1}$. Much like neural networks, we find $\hat{w}$ by minimizing the Mean Squared Error (MSE) of the prediction $\hat{f}$ from the target $y \in \mathbb{R}$ across all $n$ samples:
-$$\mathcal{L}(w) = \frac{1}{2}||y - \hat{f}(x)||_2^2$$
-
-Without knowing much about the relationship between $n$ and $d$, it is not obvious that there is a closed form solution to this system of equations. Of course, if $n = d$ (and $X$ is full rank), then we can directly solve for $w$. Specifically, if $Y \in \mathbb{R}^{1 \times n}$, $X \in \mathbb{R}^{d \times n}$, $w \in \mathbb{R}^{1 \times d}$, then 
-$$Y = wX \implies w = YX^{-1}.$$
-
-What about when $n < d$ (underparameterized regime) or $n > d$ (overparameterized regime)? We need to turn to gradient descent then,
-$$w^{(t+1)} = w^{(t)} - \eta \nabla_w \mathcal{L}w^{(t)}.$$
-We can actually explicitly characterize the conditions for convergence and its limit for different values of the learning rate $\eta$ and initialization $w^{(0)}$. Namely, let us start with
-$$w^{(t+1)} = w^{(t)} - \eta \nabla_w \mathcal{L}(w^{(t)}) = w^{(t+1)} = w^{(t)} - \eta (-(y - w^{(t)}X))X^\top = w^{(t)} + \eta (y - w^{(t)}X)X^\top$$
-Using this equation, we can derive a closed form expression for $w^{(t)}$. 
-$$\begin{align*}
-w^{(t+1)} &= w^{(t)} + \eta (y - w^{(t)}X)X^\top = w^{(t)} +\eta yX^\top - \eta w^{(t)} XX^\top = w^{(t)}(I - \eta X X^\top) + \eta y X^\top \\
-w^{(1)} &= w^{(0)} (I - \eta XX^\top) + n y X^\top\\
-w^{(2)} &= w^{(0)} (I - \eta XX^\top)^2 + n y X^\top(I - \eta XX^\top) + n y X^\top\\
-w^{(3)} &= w^{(0)} (I - \eta XX^\top)^3 + n y X^\top(I - \eta XX^\top)^2 + n y X^\top(I - \eta XX^\top) + n y X^\top\\
-&\dots\\
-\end{align*}$$
-Let $A = (I - \eta XX^\top)$, $B = nyX^\top$, and $X = U\Sigma V^\top$ be the singular value decomposition of $X$ where $\sigma_1 \geq \dots \geq \sigma_r$ are the non-zero singular values. Then
-$$\begin{align*}
-w^{(t)} &= w^{(0)}A^\top + BA^{t-1} + BA^{t-2} + \dots + BA + B = w^{(0)}A^\top + B(A^{t-1} + A^{t-2} + \dots + A + I) = w^{(0)} A^t + (nyX^\top)(UU^\top + U(I - n\Sigma^2)U^\top + \dots + U(I - n\Sigma^2)^{t-1}U^\top) \\
-&= w^{(0)} A^t + (nyX^\top)U(I + (I - n\Sigma^2) + \dots + (I - n\Sigma^2)^{t-1})U^\top = w^{(0)}(I - n XX^\top)^t + nyX^\top U\begin{bmatrix}
-    \frac{1 - (1 - \eta\sigma_1^2)^t}{n\sigma_1^2} & & &\\
-    & \frac{1 - (1 - \eta\sigma_2^2)^t}{n\sigma_2^2} & &\\
-    & & \ddots & \\
-    & & & 0
-  \end{bmatrix}U^\top
-\end{align*}\\
-$$
-From this equation, we can derive many insights into the conditions for convergence. In particular, if we want the RHS to converge, we require $|1 - \eta \sigma_1^2| < 1 \implies -1 < 1 - \eta\sigma_1^2 < 1$.
-Thus, when $\eta <  \frac{2}{\sigma_1^2}$ (which implies $\eta \leq \frac{2}{\sigma_2^2}, \eta \leq \frac{3}{\sigma_3^2}, \dots$), gradient descent for linear regression converges. 
-
-With this condition on $\eta$, we can further characterize $w^{(\infty)}$. 
-$$
-\begin{align*}
-w^{(\infty)} &= \lim_{t \rightarrow \infty} w^{(0)}(I - \eta XX^\top)^t + n yX^\top U \begin{bmatrix}
-    \frac{1}{n\sigma_1^2} & & &\\
-    & \frac{1}{n\sigma_2^2} & &\\
-    & & \ddots & \\
-    & & & 0
-  \end{bmatrix}U^\top = \lim_{t \rightarrow \infty} w^{(0)}(UU^\top - \eta U \Sigma^2 U^\top)^t + yV\Sigma^\top U^\top U \begin{bmatrix}
-    \frac{1}{\sigma_1^2} & & &\\
-    & \frac{1}{\sigma_2^2} & &\\
-    & & \ddots & \\
-    & & & 0
-  \end{bmatrix}U^\top \\
-  &= \lim_{t \rightarrow \infty} w^{(0)}U(I - \eta \Sigma^2)^tU^\top + yV\Sigma^\top \begin{bmatrix}
-    \frac{1}{\sigma_1^2} & & &\\
-    & \frac{1}{\sigma_2^2} & &\\
-    & & \ddots & \\
-    & & & 0
-  \end{bmatrix}U^\top = w^{(0)}U\begin{bmatrix}
-    0 & & &\\
-    & 1 & &\\
-    & & \ddots & \\
-    & & & 1
-  \end{bmatrix}U^\top + yV\begin{bmatrix}
-    \frac{1}{\sigma_1} & & &\\
-    & \frac{1}{\sigma_2} & &\\
-    & & \ddots & \\
-    & & & 0
-  \end{bmatrix}U^\top =w^{(0)}U\begin{bmatrix}
-    0 & & &\\
-    & 1 & &\\
-    & & \ddots & \\
-    & & & 1
-  \end{bmatrix}U^\top + yX^\dagger \\
-\end{align*}
-$$
-Note the dependency on this result on $w^{(0)}$. If $w^{(0)} = 0$, then $w^{(\infty)} = yX^\dagger$. Furthermore, we can also prove that $w = yX^\dagger$ is the minimum $\ell_2$ solution. Suppose there exists another solution, $\tilde{w}$. If $Xw = X\tilde{w}$, then $\tilde{w} - w \perp w$ because
-$$(\tilde{w} - w)w^\top = (\tilde{w} - w)w^\top = (\tilde{w} - w)(y(X^\top X)^{-1}X^\top)^\top = (\tilde{w}-w)X(X^\top X^{-1})^\top y^\top = 0$$
-Thus,
-$$\|\tilde{w}\|_2^2 = \|\tilde{w} - w + w\|_2^2 = \|\tilde{w} - w\|_2^2 + \|w\|_2^2 + 2(\tilde{w}-w)w^\top = \|\tilde{w} - w\|_2^2 + \|w\|_2^2 \geq \|w\|_2^2.$$
-
-This characterization is consistent when $n = d$, $n < d$, and $n > d$. If $n = d$, then $X^\dagger = (X^\top X)^{-1} X^\top = X^{-1}(X^{\top})^{-1} X^\top = X^{-1}$. When $n > d$ and the rank of $X$ is $d$, then when $\nabla_w \mathcal{L}(w) = 0$, then $(y-wX)X^\top = 0 \implies w = yX^\top(XX^\top)^{-1}$. $XX^\top \in \mathbb{R}^{d \times d}$ is invertible since $X$ is full rank, so $w = yX^\top(XX^\top)^{-1} =y(X^\top X)^{-1}X^\top = yX^\dagger$.
-
-We are particularly interested in the overparameterized regime, i.e. when $n > d$. The results above show that when $w^{(0)} = 0$, even though there are an infinite number of $w$ that satisfy $y = Xw$, gradient descent converges to the minimum $\ell_2$-norm solution, $w = yX^\dagger$. This sparsity may help prevent overparameterization even when there are enough parameters to fully memorize the input data.
-
-Why is this analysis helpful? This characterization may help us understand the solution obtained by kernel regression, which can be viewed as just linear regression on a nonlinear, high-dimensional space.
-
-### Overparameterization in Kernel Regression
-We will start with a brief definition of kernel regression. Intuitively, kernel regression is running linear regression after applying a non-linear feature map, $\psi$, onto the datapoints $x \in \mathbb{R}^{d}$. Formally, we require that $\psi: \mathbb{R}^{d} \rightarrow \mathcal{H}$, $w \in \mathcal{H}$, and the predictor $\hat{f}: \mathbb{R}^{d} \rightarrow \mathbb{R}$ to take the form $\langle w, \psi(x)\rangle_{\mathcal{H}}$, where $\mathcal{H}$
- is a Hilbert space. A Hilbert space is a complete metric space with an inner product. Intuitively, Hilbert spaces generalize finite-dimensional vector spaces to infinite-dimensional spaces, which is helpful for us because this allows for infinite-dimensional feature maps, an extreme example of overparameterization. All the finite-dimensional inner product spaces that are familiar to us, e.g. $\mathbb{R}^n$ with the usual dot product, are Hilbert spaces.
-
-At first glance, it might seem impossible to even store the weights of infinite-dimensional feature maps. However, this problem is resolved by the observation that weights from solving linear regression will always a linear combination of the training samples. In particular, since $yX^\dagger$ has the same span as $X$, we can always rewrite the weights as $w = \sum_{i=1}^n \alpha_i x_i^\top$, where $x_i$ denotes the $i$ th sample. What's really interesting is that this can be extended to kernels as well. 
-
-Specifically, for kernel regression, we seek a solution to the MSE problem:
-$$\mathcal{L}(w) = \|y-\hat{x}\|_2^2 = \|y-\langle w,\psi(x)\rangle\|_2^2.$$
-
-We know that the weights must take the following form,
-$$w = \sum_{i=1}^n \alpha_i \psi(x_i).$$
-
-Thus, expanding out the loss function, we have that 
-$$\mathcal{L}(w) = \frac{1}{2}\|y-\langle w, \psi(x)\rangle\|_2^2 = \frac{1}{2}\|y-\langle \sum_{i=1}^n \alpha_i \psi(x_i), \psi(x_i)\rangle\|_2^2 = \frac{1}{2}\sum_{j=1}^n (y_j -\langle \sum_{i=1}^n \alpha_i \psi(x_i), \psi(x_j)\rangle)^2 = \frac{1}{2}\sum_{j=1}^n (y_j -\langle \alpha, \begin{bmatrix}
-\langle \psi(x_1), \psi(x_j) \rangle \\
-\langle \psi(x_2), \psi(x_j) \rangle \\
-\vdots \\
-\langle \psi(x_n), \psi(x_j) \rangle \\
-\end{bmatrix}\rangle)^2.$$
-
-Thus, rather than storing the weights $w$ that act on the feature map directly, we just need to store $\alpha$, the weights acting on the samples. Moreover, another observation from this equation is that we don't even need to define the feature map directly. We only need to store the inner product of each sample with every other sample. Formally, this inner product is called a kernel ($K: \mathbb{R}^d \times \mathbb{R}^d \rightarrow \mathbb{R}$). With a slight abuse of notation, we will also use $K$ to denote the matrix of inner products, $K(X,X)$. 
-
-Much like our discussion in class on Gaussian Processes (GP), kernels can be thought of as a "distance" or "covariance" function on samples. Some well-known kernels include:
-- Gaussian kernel: $K(x,\tilde{x}) = \exp(\|x - \tilde{x}\|_2^2)$
-- Laplacian kernel: $K(x,\tilde{x}) = \exp(\|x - \tilde{x}\|_2)$
-- Neural Tangent kernel with ReLU activation: $K(x,\tilde{x}) = \frac{1}{\pi}(x^\top \tilde{x}(\pi - \arccos(x^\top \tilde{x})) + \sqrt{1 - (x^\top \tilde{x})^2}) + x^\top \tilde{x}\frac{1}{\pi}(\pi - \arccos(x^\top \tilde{x}))$
-- Linear kernel: $K(x,\tilde{x}) = x^\top \tilde{x}$
-
-The linear kernel is equivalent to linear regression, and (as we will explore later), the Neural Tangent kernel with ReLU activation approximates an infinitely wide neural network with $\phi(z) = \sqrt{2}\max(0,z)$ activation.
-
-Note also that all of these kernels, however finite, represent infinite-dimensional feature maps. For example, the feature map for the Gaussian kernel is
-$\psi(x) = \Big(\sqrt{\frac{(2L)^m}{p_1!p_2!\dots p_d!}}x_1^{p_1}x_2^{p_2}\dots x_d^{p_d}\Big)_{p_1,p_2,\dots,p_d \in \mathbb{N} \cup \{0\}}.$ It is remarkable that kernel regression even does well in practice considering it works in an extremely over-parameterized regime. 
-
-However, our analysis using linear regression may shed some light on why. In particular, recall that our loss function is
-$$\mathcal{L}(w) = \frac{1}{2}\sum_{j=1}^n (y_j - \alpha K(X,X)).$$
-
-Since this is just linear regression in $\mathcal{H}$, gradient descent converges to $\alpha = yK^\dagger$ if $\alpha^{(0)} = 0$. This means the predictor for kernel regression looks like 
-$$\hat{f}(x) = \alpha K(X,x) = yK^{\dagger}K(X,x).$$
-
-Since $K(X,X)$ is a square matrix, (technically, $n = d$ from the linear regression case), this equation can be solved directly. Moreover, $\alpha$ is the minimum $\mathcal{H}$-norm solution, just like how the weights from the linear regression model is the minimum $\ell_2$-norm solution. 
-
-The ability to be solved in closed form is an important property of kernel regression. In practice, $\alpha^{(0)}$ cannot be initialized to $0$ in gradient descent, so neural networks do not necessarily converge to the minimum-norm solution that kernels do. This may offer some explanation for the predictive ability of kernels on tabular data.
-
-Now, let us formally define the Neural Tangent Kernel. The NTK for a neural network is defined as the outer product of the gradients of the network’s output with respect to its parameters, averaged over the parameter initialization distribution. Formally, if $f(x; w)$ is the output of the network for input $ x $ and parameters $ w $, the NTK is given by:
-
-$$
-K_{\text{NTK}}(x, \tilde{x}) = \mathbb{E}_{w}\left[\left\langle \frac{\partial f(x; w)}{\partial w}, \frac{\partial f(\tilde{x}; w)}{\partial w} \right\rangle\right].
-$$
-
-The intuition for this comes from understanding how parameters change in neural networks during gradient descent.
-
-In particular, note that 
-$$\frac{df(x;w)}{dt} = \frac{df(x;w)}{dw} \frac{dw}{dt} \approx \frac{df(x;w)}{dw} (-\nabla_w \mathcal{L}(w)) = -\frac{1}{N}\sum_{i=1}^N \underbrace{\nabla_w f(x;w)^\top \nabla_w f(x_i;w)}_{NTK} \nabla_f\mathcal{L}(f,y_i).$$
-
-From this equation, we see that during gradient descent, the network $f$ changes based on its effect on the loss function weighted by the "covariance"/"distance" of $x$ w.r.t. the other samples. The intuition for the NTK thus comes from the way that the neural network evolves during gradient descent. 
-
-<d-cite key="jacot2018neural"></d-cite> established that training an infinite-width neural network $f(x;w)$ with gradient descent and MSE loss is equivalent to kernel regression where the kernel is the NTK.
-
-To further understand the connections between the NTK and wide neural networks, I benchmarked the performance of wide neural networks and the NTK on the task of predicting the effects of a gene knockout on a cell. 
-
-{% include figure.html path="assets/img/2023-12-12-overparameterization/Fig3.png" class="img-fluid" %} Figure 1. Experiment workflow.
-
-All the datasets are publicly available on [DepMap](https://depmap.org/portal/) and I processed the data the same way as I did in <d-cite key="cai2023synthetic"></d-cite> (but the experimental results I describe here are new). In short, I have 998 cells embedded with a 30,000-dimensional vector of the expression and mutation status of genes in the cell. The target task is to predict the effect of knocking out the gene KRAS in each cell. The "effect" is a scalar that indicates how alive the cell is, with negative values indicating that the cell is more dead and positive values indicating that the cell is more alive. 
-
-Biological datasets are well-suited for the analysis of overparameterized models because the embeddings are by default extremely high-dimensional, i.e. $d >> n$. However, since I want to test the effects of increasing the width of neural networks and I do not want the shape of the weight matrix to be $\lim_{k \rightarrow \infty}\mathbb{R}^{30,000 \times k}$, I reduced the computational complexity of this problem by first running PCA on the cell embedding to reduce $d$ to $500$. Thus, $X \in \mathbb{R}^{998 \times 500}$ and $Y \in \mathbb{R}^{998 \times 1}$. I did a simple 80/20 training/test split on the data, so $X_{train} \in \mathbb{R}^{798 \times 500}$ and $X_{test} \in \mathbb{R}^{200 \times 500}$.
-
-I then benchmarked a one hidden layer MLP, i.e. $A\phi(Bx)$ with ReLU activation, where $A \in \mathbb{R}^{k \times 1}, B \in \mathbb{R}^{500 \times k}$, as $k$ ranged from $\{10,110,210,\dots,9,910\}$. I also benchmarked the NTK on the same task. There are several interesting insights from this experiment.
-
-1. The NTK always exactly fits the training data by construction because we directly solve the MSE problem.
-2. The MSE of a neural network as $k$ increases approaches the MSE of the NTK, which aligns with the theory. However, I want to note that if I shrink $d$, i.e. if I take $d = 10$ or $d=100$, the second point does not always hold. In those cases, the MSE of the NTK is much larger than the MSE of the neural network. That was a bit counterintuitive, but one explanation could be that the NTK is a poor approximation for the neural network in those cases because the neural network cannot be linearized when it is changing so drastically based on the small set of features. 
-3. The MSE asymptotically decreases as $k \rightarrow \infty$. This aligns with the theory of the double-descent curve. It would be interesting to test if the weights learned by the MLP enforces some sort of sparsity, e.g. by plotting $\frac{\|A\|_2}{\|x\|_2}$, where $A,x \in \mathbb{R}^{k \times 1}$ and $x \sim \mathcal{N}(0,I_k)$ (unfortunately, the latter does not have a nice form).
-
-### Gaussian Processes
-
-Compared to linear and kernel regression, a Gaussian Process (GP) is a much more general class of nonparametric functions. Formally, a Gaussian Process (GP) is a collection of random variables, any finite number of which have a joint Gaussian distribution. A GP can be thought of as a distribution over functions and is fully specified by its mean function $\mu(x)$ and covariance function $K(x, \tilde{x})$, (similar to kernel regression, this is also known as the kernel of the GP). 
-
-Given a set of points $X = \{x_1, x_2, \ldots, x_n\}$, the function values at these points under a GP are distributed as:
-
-$$
-\mathbf{f}(X) \sim \mathcal{N}(\mathbf{\mu}(X), K(X, X)),
-$$
-
-where $ \mathbf{\mu}(X) $ is the mean vector and $ K(X, X) $ is the covariance matrix constructed using the kernel function $K$.
-
-Key to the concept of Gaussian Processes is the closure of multivariate Gaussians under conditioning and marginalization. Since all the function values are jointly Gaussian, the value of a new function value, given the existing ones, is also Gaussian, e.g. assuming $\mu(X) = 0$,
-
-$$f(x_{test}) | f(x_1)\dots f(x_n) = \mathcal{N}(\mu_{test},\Sigma_{test})$$
-where $\mu_{test}$ = $K(x,X)K(X,X)^{-1}f(X)$ and $\Sigma_{test}$ = $K(x,x) - K(x,X)K(X,X)^{-1}K(x,X)$. (The math for this is a bit tedious, so I omit that here.)
-
-### Connecting Gaussian Processes, Kernel Regression, and MLPs
-
-It is interesting to note the similarities between this closed form for the predictor of a Gaussian process and the predictor for kernel regression. In fact, $\mu_{test}$ is exactly the same as $\hat{f}(x)_{kernel}$. This suggests GPs parameterize the class of functions drawn from a normal distribution with mean $\mu_{test}$ while kernel regression converges to a deterministic function that is exactly $\mu_{test}$. In other words, I think that the function learned by kernel regression can be thought of as the maximum of the posterior distribution of the GP with the same kernel.
-
-To test this insight, I ran an experiment to see how similar a Gaussian Process trained on a fixed dataset is to kernel regression with the same kernel.
-
-{% include figure.html path="assets/img/2023-12-12-overparameterization/Fig4.png" class="img-fluid" %} Figure 2. Results of Gaussian Process Regression and Kernel Ridge Regression on synthetic data with the same kernel function.
-
-I sampled $X \sim \mathcal{N}(5,1)$ and $Y \sim \sin(X) + \mathcal{N}(0,0.2)$. I then trained a Gaussian Process and kernel ridge regression on the data with $K(x,\tilde{x}) = -\exp{\frac{\|x-\tilde{x}\|_2^2}{2}} + Id$. As expected, the function learned by kernel ridge regression closely matches the mean of the class of functions learned by the GP.
-
-Another connection between kernel regression and GPs can be made through the introduction of a one hidden layer MLP. See below figure.
-
-{% include figure.html path="assets/img/2023-12-12-overparameterization/Fig1.png" class="img-fluid" %} Figure 3. Visualization of kernel regression, MLPs, and Gaussian Processes.
-
-Starting with kernel regression, if we fix the "feature map," $B $, then training gradient descent with $A^{(0)} = 0$ is equivalent to training kernel regression with $K(x,\tilde{x}) = \langle \phi(Bx), \phi(Bx) \rangle$. This is intuitive because again, we can just think of kernel regression as linear regression ($A$) after applying a nonlinear feature map, ($\phi \circ B$). 
-
-The connection between neural networks and Gaussian Processes is a bit more complicated. Suppose we are in the overparameterized regime and $A \in \mathbb{R}^{1 \times k}$ and $B \in \mathbb{R}^{k \times d}$. Forgoing the bias term out of simplity, the output of the network is
-$$f(x) = A\phi(Bx) = \sum_{i=1}^k A_i\phi(Bx)_i.$$
-If the weights of the network are sampled i.i.d. Gaussian, then $f(x)$ is a sum of i.i.d. Gaussians and so as $k \rightarrow \infty$, the Central Limit Theorem states that the output of the network will also be Gaussian with some fixed mean and covariance, i.e. in the limit,
-$$f(x) \sim \mathcal{N}(0,\Sigma)$$
-$$\begin{bmatrix}
-f(x_1) \\
-f(x_2) \\
-\vdots \\
-f(x_n)
-\end{bmatrix} \sim \mathcal{N}(0,K)$$
-
-Now, let us compute $K$:
-$$K(x,\tilde{x}) = \mathbb{E}[f(x)f(\tilde{x})] = \mathbb{E}[A\phi(Bx)A\phi(B\tilde{x})] = \mathbb{E}\Big[\lim_{k \rightarrow \infty}\Big(\sum_{i=1}^k A_i \phi(Bx)_i\Big)\Big(\sum_{i=1}^k A_i \phi(B\tilde{x})_i\Big)\Big]$$
-Suppose for simplicity that $A \sim \mathcal{N}(0,I)$. Then $\mathbb{E}[A_iA_j] = 0$ and $\mathbb{E}[A_iA_i] = 1$:
-$$= \mathbb{E}\Big[\lim_{k \rightarrow \infty}\sum_{i=1}^k A_i^2 \phi(Bx)_i\phi(B\tilde{x})_i\Big] = 1 \lim_{k \rightarrow \infty} \sum_{i=1}^k \phi(Bx)_i\phi(B\tilde{x})_i= \underbrace{\lim_{k \rightarrow \infty} \langle \phi(Bx),\phi(B\tilde{x}) \rangle}_{k \times NNGP}.$$
-
-The latter is essentially the definition of the Neural Network Gaussian Process, which is the kernel of the Gaussian Process that neural networks converge to when its width goes to infinity. (The NNGP has an extra $\frac{1}{k}$ term to allow the Law of Large Numbers to be used again.)
-
-Ultimately, what this shows is that a neural network of infinite width over i.i.d. parameters is the class of Gaussian functions parameterized by the Neural Network Gaussian Process. With gradient descent, neural networks and kernel regression converge to a deterministic function that can be thought of as a sample from a GP.
-
-The below figure summarizes my findings on the connections between the three types of function classes:
-
-{% include figure.html path="assets/img/2023-12-12-overparameterization/Fig2.png" class="img-fluid" %} Figure 4. Comparison of kernel regression, MLPs, and Gaussian Processes.
-# Discussion
-
-To summarize, these are the implications of the NN-Kernel Regression-GP Connection:
-
-  - Predictive Distribution: In the infinite-width limit, the predictive distribution of a neural network for a new input $x_{test}$ can be described by a Gaussian distribution with mean and variance determined by the NNGP.
-  - Regularization and Generalization: Kernels inherently regularize the function space explored by the network. This regularization is not in the form of an explicit penalty but may arise from the minimum $\mathcal{H}$-norm solution of kernel regression. This may explain the observed generalization capabilities of wide neural networks.
-  - Analytical Insights: This correspondence provides a powerful analytical tool to study the learning dynamics of neural networks, which are often difficult to analyze due to their non-linear and high-dimensional nature.
-
-### Limitations
-A major limitation of this current work is that I evaluated overparameterized neural networks only through the lens of kernels/GPs. It would be interesting to try to understand the successes of neural networks through other metrics, such as evaluating test risk as width increases. Furthemore, it would also be interesting to characterize what happens when depth, rather than just width, increases. Another interesting next step would be expanding this analysis to understanding overparameterization of other architectures, such as CNNs and transformers, and their connections to kernel regression and Gaussian Processes. 
-
-Understanding neural networks through the lens of the NTK and Gaussian processes deepens our appreciation of the foundational principles in machine learning. It unifies three seemingly disparate areas: the powerful yet often opaque world of deep learning, the straightforward approach of kernel regression, and the rigorous, probabilistic framework of Gaussian processes. This confluence not only enriches our theoretical understanding but also paves the way for novel methodologies and insights in the practical application of machine learning algorithms.
-
-
-
-
-
diff --git a/_posts/2023-12-12-projected-fff-networks.md b/_posts/2023-12-12-projected-fff-networks.md
deleted file mode 100644
index 0ae16f08..00000000
--- a/_posts/2023-12-12-projected-fff-networks.md
+++ /dev/null
@@ -1,156 +0,0 @@
----
-layout: distill
-title: Projected fast feedforward networks
-description: Abstract
-date: 2023-12-05
-
-authors:
-  - name: Semyon Savkin
-    affiliations:
-      name: MIT
-  - name: Egor Lifar
-    affiliations:
-      name: MIT
-
-bibliography: 2023-12-12-projected-fff-networks.bib  
----
-
-## Introduction
-
-Compression of neural networks is a crucial task in Machine Learning. There are three important performance metrics that we should take into account when deploying models:
-
-* Size of the model. Having a smaller number of parameters that describe the model makes transferring it over network faster. In addition, being able to concisely represent the differences between original and finetuned model would enable storing and distributing a lot of possible finetunings, such as in Stable Diffusion LORA <d-cite key="luo2023lcmlora"></d-cite>
-
-* GPU memory needed to perform the inference. If the metric is lower, the model inference can be run on less expensive GPUs with less available memory. Some models could even be ran on smartphones or IoT devices
-
-* Inference time. We also can take into account how does the time scales with the size of the batch
-
-Balancing these characteristics is a non-trivial task, since improvements in one of them could lead to a decline in other metrics. The optimal tradeoff depends on the environment in which the model is ran.
-
-We will explore a way to significantly reduce the model size and the memory needed for inference, keeping the inference time reasonable. We achieve the size reduction by utilizing a common property of having small intrinsic dimension of objetive landscape that many models have.
-
-## Related works
-
-There are several ways how the size of the model can be reduced. One of the popular techniques is model quantization. Quantization of a machine learning model involves decreasing the precision of weights for the sake of reduction of the total memory needed to store them. Quantized models can utilize 16, 8, or even 4-bit floats, with carefully selected summation and multiplication tables. There are different ways of dealing with the inevitable degradation of accuracy due to lack of precision, one possible way is described in paper <d-cite key="nagel2021white"></d-cite>
-
-Another direction of model size optimization utilizes the notion of matrix low-rank approximation. The layers of neural networks are commonly represented as matrices, the simpliest example being the parameters of feedforward linear layer. Each matrix $$A$$ has a Singular Value Decomposition $$A = U\Sigma V^*$$, and, using this decomposition, it's possible to get close low-rank approximation of $$A$$. We note that a matrix of size $$n \times m$$ of rank $$k$$ can be stored in $$O((n+m)k)$$ memory if we express it as a sum of outer products of $$k$$ pairs of vectors, so if $$k$$ is small, this representation uses much less memory than $$O(nm)$$ --- the memory used by the dense representation. One of the papers that compresses models with low-rank approximation is <d-cite key="jaderberg2014speeding"></d-cite>.
-
-However, we are going to explore another compression method, which utilizes small dimensionality of optimization landscape, which is common for many model-task pairs. When training a neural network, we have some loss $$\mathcal{L}$$, and a parameter space $$\mathbb{R}^{p}$$. Then, we are trying to find $$v \in \mathbb{R}^{p}$$ such that $$\mathcal{L}(v)$$ is minimized. Instead of searching over the whole space, we generate a linear operator $$\phi\colon \; \mathbb{R}^{d} \to \mathbb{R}^{p}$$, where $$d < p$$, and parametrize $$v$$ as $$v = \phi u$$, where $$u \in \mathbb{R}^{d}$$. Li et al. <d-cite key="li2018measuring"></d-cite> found that if the the matrix of $\phi$ has normalized columns, for many tasks it's possible to find a network, parametrized the way above, where $$d$$ is significantly smaller than $$p$$, such that the network has at least 90% of metric value of the original network. Then, if our way to generate random projection matrix is seeded deterministically, we only need to store $$d$$ floats, which are the coefficients of $$u$$. During inference, we re-create $$\phi$$, and restore the parameters of original architecture $$v = \phi u$$. The compression ratio of this method is $$\frac{p}{d}$$. In this blog, we will explore its potential modifications, and evaluate them by running experiments.
-
-## Basic experiment
-
-First, we test the method without any modifications. We use dataset MNIST <d-cite key="deng2012mnist"></d-cite>, containing 30000 $$28 \times 28$$ monochrome images of digits. Each image belongs to one of 10 classes, depending on the digit, and the task of the model is to classify these digits.
-
-{% include figure.html path="assets/img/2023-12-12-projected-fff-networks/mnist.png" class="img-fluid" %}
-
-For each of the experiment, we use a neural network with one hidden layer with 128 units and ReLU activations. We optimize the parameters with Adam and learning rate $$10^{-4}$$. The training is ran for $$100$$ epochs, our batch size is $$128$$.
-
-| d             | final val acc |
-| ------------- |:-------------:|
-|4 |  17.56|
-|16 |  20.39|
-|64 |  50.2|
-|256 |  71.1|
-|512 |  61.25|
-|1024 |  61.66|
-|original| 95.65|
-
-
-{% include figure.html path="assets/img/2023-12-12-projected-fff-networks/random.jpg" class="img-fluid" %}
-
-## Better initialization
-
-We've noticed that the optimization of the compressed model does not converge fast. To initialize better, we can use pre-trained weights of non-compressed model $$v$$.
-
-Let $$A$$ be the projection matrix that we used in the compression. Then, to convert compressed parameters of a model to the original ones, we need to multiply by $$A$$ on the left. The idea is to start from the compressed parameters, such that after going to uncompressed space, they would be as close to $$v$$ as possible by Eucledian distance. Then, we can use the formula for projection onto a linear subspace:
-
-$$
-  u^{*} = \mathop{argmin}_u ||Au - v||^2 \Rightarrow u^{*} = (A^TA)^{-1}A^Tv
-$$
-
-By initializing $$u$$ this way, we achieve a faster convergence of the optimizer, because after projecting to subspace and returning to original coordinates, we get a parameter vector that is close to the optimal one, so it should be near the optimum in the coordinates of projection.
-
-In our experiments, we compare how fast does the model train with random initializtion and with projection initialization.
-
-| d             | final val acc |
-| ------------- |:-------------:|
-|4 |  17.72|
-|16 |  28.68|
-|64 |  51.52|
-|256 |  71.18|
-|512 |  83.93|
-|1024 |  90.18|
-|original| 95.65|
-
-
-{% include figure.html path="assets/img/2023-12-12-projected-fff-networks/non_random.jpg" class="img-fluid" %}
-
-## Distillation
-
-The concept of model distillation was introduced by <d-cite key="hinton2015distilling"> </d-cite>. The main idea is to train a compact model ("student") to emulate a larger, pre-trained model ("teacher"). In our case, the compact model would be a reparametrized model with $$d$$ dimensions, while the "teacher" model has all the original parameters. When training, we regress the mean square difference between the logits of the original and compact model.
-
-We initialize the compressed model with the projection of the original model as in the previous section. In our experiments, we've noticed that this training procedure has comparable convergence speed, however, its validation accuracy reaches a plateau on a lower value than in regular training procedure.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-projected-fff-networks/distil.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-projected-fff-networks/non-distil.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-  CIFAR-10 experiment. Network is logistic regression. Projections for d=700, on the left: with distilation, on the right: without distilation
-</div>
-
-## Independent projections for layers
-
-In many cases, the model we are compressing contains several independent layers. Therefore, we can try to split the coordinates in the space to which we are projecting parameters so that each coordinate corresponds to exactly one layer. This constraint corresponds to the matrix of $$\phi$$ being block-diagonal.
-
-These changes improve the accuracy, and decrease the inference time (because for each layer we only need to use some part of the compressed coordinates), while keeping $$d$$ constant.
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-projected-fff-networks/non_block.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-projected-fff-networks/block.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-  CIFAR-10 experiment. Network has one hidden layer with 32 units. On the left: one projection for d=2500. On the right: separate projections with d=500 and d=100
-</div>
-
-## GPU memory utilization
-
-Let we want to make inference with minimal possible usage of RAM. Let's assume that the architecture of model that we are evaluating is an MLP. Then, using the compressed representation, we can use no more than $$O(\max(d, L))$$, where $$d$$ is the dimension to which we compressed the model, and $$L$$ is the maximum size of the layer.
-
-We describe the inference prodcedure consuming this little memory. We need to sequentially apply each of the feedforward networks in our MLP. For each layer, we have to transform the input vector $$x$$ to the output $$y$$. We fill in the output vector with zeros, and for each index $$(i, j)$$ in the weight matrix we need to make an update $$y_i \leftarrow y_i + A_{ij}x_j$$. However, we don't store any of the parameters in memory except for $$d$$ compressed parameters. So, in order to get the value of $$A_{ij}$$, we need to take the dot product of a row in the projection matrix and a vector of compressed parameters.
-
-It is not obvious how to random access a row in a random matrix, where all columns should be normalized, and the outcomes during train and inference are consistent. We note that the true randomness of the projection matrix is not important for us. So, instead we can generate the $$i$$-th row by seeding the random to $$i$$ and generating a row. During train, we generate the whole matrix this way, and compute the normalization coefficients of columns, which are included into the model's representation in memory. During inference, to get the $$i$$-th row, we just need to sample a row and divide it by normalization coefficients pointwise. We have checked that this way of generating the projection matrix has no negative effects on the performance of the compressed model, compared to the truly random option.
-
-## Diffusion models
-
-We have also attempted to apply model compression to a different domains besides image classification. One of the problems we considered is generating 2D points from a certain distribution using a diffusion model. In this setup, we have a neural network that predicts the noise for a pair $$(x, t)$$ --- point in space and time.
-
-We use continuous time on $$[0, 1]$$, linear noise schedule with $$\beta_{min} = 0.3$$, $$\beta_{max} = 30$$, various-preserving SDE, batch size $$64$$, sampling timesteps $$100$$, ODE sampler. The distribution that we are trying to learn is a mixture of $$6$$ gaussians. We use an MLP score net with $$2$$-dimensional input and $$32$$-dimensional Gaussian Fourier Projection time embeddings.
-
-However, even setting the compression dimension $$1000$$ or $$5000$$ did not enable us to see good sampling results.
-
-{% include figure.html path="assets/img/2023-12-12-projected-fff-networks/gauss.png" class="img-fluid" %}
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-projected-fff-networks/la_100.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-projected-fff-networks/la_100_bad.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-  On the left: sampling result with original model. On the right: sampling with compressed model, d=1000
-</div>
-
-## Conclusion
-
-We have discussed a way to compress models, decreasing its size by several orders of magnitude. We identified ways to improve the validation accuracy of compressed models, such as doing the initializtion with projection and having independent projections for layers. This technique leads to surprising consequences, such as being able to do machine learning model inference with very small amount of RAM.
diff --git a/_posts/2023-12-12-scRNA-GNNs.md b/_posts/2023-12-12-scRNA-GNNs.md
deleted file mode 100644
index d7cabec7..00000000
--- a/_posts/2023-12-12-scRNA-GNNs.md
+++ /dev/null
@@ -1,126 +0,0 @@
----
-layout: distill
-title: 6.s898 Final Project- Investigating the biological underpinnings of latent embeddings for scRNA-seq 
-description: 
-
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Stephanie Howe
-    url: 
-    affiliations:
-      name: MIT CSAIL
-
-
-# must be the exact same name as your blogpost
-bibliography: 2022-12-01-distill-example.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Background and Motivation
-  - name: Graph Neural Networks (GNNs) as an architecture and their application to single-cell analysis
-  - name: Intro to the Data
-  - name: Applying scGNN to our AD scRNA-seq data
-  - name: Visualizing the Degree Distribution of the Cell Graph
-  - name: Understanding Cell Clusters in the Embedding Space
-  - name: Exploring Alzheimer’s Related Gene Contributions to the Embedding Space
-  - name: Wrapping it up
-  - name: Future Analysis
-
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Background and Motivation
-Neurodegenerative diseases represent a complex and diverse group of disorders characterized by the progressive degeneration of the structure and function of the nervous system. They are notoriously challenging to study due to their multifaceted nature and varied pathological features. Single-cell sequencing technologies have been developed and are powerful techniques for understanding the molecular basis of many pressing scientific questions such as the causality and development of Alzheimer's Disease (AD). These technologies, namely single-cell RNA sequencing (scRNA-seq) and single-cell Assay for Transpose-Accessible Chromatin sequencing (scATAC-seq), offer us an understanding of a cell’s state as a phase-space determined by chromatin accessibility and gene expression. Single cell data like this is extremely high dimensional; on the scale of 10s or 100s of thousands of cells, each with 10s of thousands of “features,” which represent genes or chromatin regions. Because of this, lower dimensional representations of these cells and clusters within them are valuable to help simplify our view of the data and extract signals. Moreover, in the context of cells characterized by biomarkers and stemming from patients with varying neurodegenerative diseases, it is in our interest to explore cell neighborhoods and embeddings to investigate if they properly represent the biological underpinnings of such disease. 
-
-## Graph Neural Networks (GNNs) as an architecture and their application to single-cell analysis 
-Graph Neural Networks (GNNs) are a class of deep learning models that are specifically designed to handle data that is structured as a graph, which extends the principles of neural networks to handle the concept of graph topology. In GNNs, each node (which in this application represents cells) aggregates information from graph neighbors through transformation and pooling steps, which results in a model whose representation captures node level and graph level features. Relevantly, GNNs generate lower dimensional embeddings of the input data, which provides a compact and informative representation of high dimensional data such as single-cell RNA data. 
-
-The scGNN package specifically applies these principles of GNNs to single-cell genomics, treating cells as nodes in a graph and the edges as a measure of similarity in the transcriptome of two cells. scGNN performs two main functions: clustering and imputation. The architecture is as such:
-
-- Feature Autoencoder: Generates low-dimensional representation of gene expression, which is the foundation for a cell graph.
-- Graph Autoencoder: Learns a topological representation of the aforementioned cell graph,  which is the foundation for cell type clustering.
-- Cluster Autoencoders: There is an autoencoder for each cell type that reconstructs gene expression values.
-- Imputation Autoencoder: Recovers imputed gene expression values. 
-
-## Intro to the Data
-
-The [dataset](https://www.sciencedirect.com/science/article/pii/S009286742300973X?ref=pdf_download&fr=RR-2&rr=834b08acfbd66ac7) being presented is a scRNA-seq atlas of the aged human prefrontal cortex. It consists of 2.3 million cells sampled from 427 individuals over a varying range of Alzheimer’s pathology and cognitive impairment.  The subset of this data being analyzed in this project are the 19 samples that had multiome sequencing conducted, although only the scRNA-seq was used for this analysis (excluding the scATAC-seq). This was approximately 100 thousand cells and originally 36 thousand genes that are categorized into three diagnoses: no AD, early AD, and late AD based on biomarkers like amyloid plaque and niareagan score. 
-
-## Applying scGNN to our AD scRNA-seq data
-
-
-I began by processing the raw sequencing data into a csv format that would be suitable as input to the pipeline. I then ran preprocessing on this data, which consists of log transformation, filtering out low quality/sparse genes and cells, and subsetting to the top 2000 highly variable genes by variance. I then ran the actual imputation and clustering pipeline with the following parameters: EM-iteration=10, Regu-epochs=500, EM-epochs=200, cluster-epochs=200, quickmode=True, knn-distance=euclidean. The result of training is a imputed cell matrix, a cell graph, cell type clusters, and the actual embeddings of the cells themselves. These results provide the foundation for the next layer of analysis. 
-
-## Visualizing the Degree Distribution of the Cell Graph
-
-The figure below is a histogram that represents the number of other cells each cell in the dataset is connected to in the cell graph as computed by the Graph Autoencoder. We can see that the distribution is skewed right, which tells us that most cells are connected to a relatively few other cells, which could indicate a particularly heterogeneous cell population. However, there are a select few that have substantially higher number of connections, which could represent some sort of “hub” cells. 
-
-![](/assets/img/2023-12-12-scRNA-GNNS/degree.jpeg)
-
-
-## Understanding Cell Clusters in the Embedding Space
-The next approach was a detailed analysis of the clusters generated by the graph architecture by comparing to clusters generated on the imputed output data. This is important in visualizing the efficacy of the GNNs embeddings in delineating cell types compared the clusters derived from traditional methods on the imputed data, which included all 2000 highly variable genes (HVGs). The steps are as following:
-
-1. Computing Neighbors: Step 1 is to compute the neighbors for each cell, which as a reminder explains gene expression similarity between cells. 
-2. Principal Component Analysis (PCA): The subsequent step is to compute PCA on the data, which is a dimensionality reduction technique. 
-3. Louvain Clustering: After PCA, I used Louvain clustering, which is widely used in scRNA-seq analysis for clustering cell types, and tuned the resolution to match a similar number of clusters as generated in scGNN. 
-4. UMAP Visualization: To visualize clusters, I used Uniform Manifold Approximation and Projection (UMAP), which is a dimensionality reduction technique that allows us to visualize the cell data in 2-dimensions, colored by cluster. I colored the UMAP first by the clusters generated on the embedded data by scGNN and then by the PCA/Louvain clusters. 
-
-In the figures below, we see the result of computing cell type clusters based on data embedded by the feature and graph autoencoder versus using the traditional method of PCA then Louvain clustering. While they resulted in slightly different number of clusters, it is interesting to see that the traditional method appears to outperform the GNN in terms of separating clusters in the embedding space. Further analysis on the differentially expressed genes (DEGs) in each cluster would need to be done to confirm which cell type each cluster truly represents. Only then would we be able to determine the accuracy of each, but from a visual perspective in UMAP space, the GNN clusters are less consistent. 
-
-![](/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters.jpg)
-![](/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters.jpg)
-
-
-## Exploring Alzheimer’s Related Gene Contributions to the Embedding Space
-
-Deep learning techniques and architectures like VAEs and GNNs are promising and seemingly relevant techniques for topics like single-cell genomics where data is extremely high dimensional and sparse. However, these complex algorithms beg the question of whether and how they represent the underlying biology, especially in the context of diseases like Alzheimer’s. Fortunately, while still incurable, AD has been extensively researched, and is strongly associated with a number of hereditary genes, mutations, and misfolded protein aggregates. This known research provides a robust benchmark when applying new techniques to AD data. When trying to implicate new genes or represent genes (features) in a lower dimensional embedding space, it is usually a good sign to check whether the known biomarkers of AD are also being predicted or also being represented. In our case, these embeddings provide the opportunity to see if the model captures the relevant biological information, which can then provide some level of validation to any other genes that are also being represented. 
-
-To explore this further, I performed correlational analysis between the gene expression matrix from the imputed data and the “expression” values derived from the embedding dataframe. By focusing on the top 1% (20 genes) of genes that had the highest correlation for each embedding, I identified any biologically relevant genes that were being represented in the embedding. Below is a list of the AD relevant genes that showed up as being highly represented in this embedding space. 
-
-
-- APOE: This gene, particularly the e4 allele, is the most widely known genetic risk for late onset Alzheimer’s Disease. This allele is responsible for about half of all AD cases 
-- APP: This gene is called Amyloid Precursor Protein. You might recognize amyloid, which is the main hallmark of AD when it misfolds and becomes aggregate plaque in the brain. Abnormal cleavage of APP leads to an increase in amyloid plaque accumulation. 
-- SORL1: Genetic mutations of this gene are associated with AD because of its role in recycling APP. 
-- BIN1: Bridging integrator 1 has been implicated in many AD GWAS studies and has been found to influence the spread of tau, which is another hallmark of AD when misfolded, leading to neurofibrillary tangles. 
-- CLU: Clusterin has been implicated in AD for its role in clearing amyloid-beta plaque from the brain. 
-
-For example, in the figures below you can see that APOE falls into the genes with the highest correlation for embedding number 24, with a correlation of 0.79, and APP falls into those for embedding number 5 with a correlation of 0.79 as well. 
-
-![](/assets/img/2023-12-12-scRNA-GNNS/embedding5.jpg)
-![](/assets/img/2023-12-12-scRNA-GNNS/embedding24.jpg)
-
-
-## Wrapping it up
-
-I hope this analysis has demonstrated the potential of combining advanced computational methods in deep learning with with foundational biological data like scRNA-seq on AD to unravel long standing questions we have in the field. 
-
-## Future Analysis 
-Due to computational time, I elected to train the model on the entire dataset. Future work could include training the model on subsets of the data separated by the different level of AD pathology, which would give a slightly more nuanced understanding of disease progression and how that is reflected in the embedding space of each diagnosis category. 
diff --git a/_posts/2023-12-12-solvent-encoding.md b/_posts/2023-12-12-solvent-encoding.md
deleted file mode 100644
index 1a171f28..00000000
--- a/_posts/2023-12-12-solvent-encoding.md
+++ /dev/null
@@ -1,118 +0,0 @@
----
-layout: distill
-title: Solvent Encoding for  solubility prediction using GNN
-description: Evaluation of different solvent-encoding methods on a public available solubility dataset
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Rui-Xi(Ray) Wang
-    affiliations:
-      name: MIT
-
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-solvent-encoding.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Implementation
-  - name: Literature, model, and descriptor review
-  - name: Method
-  - name: Result
-  - name: Conclusion
-  - name: Prospective works 
-  - name: Reference
-  - name: Data
-
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Introduction
-Solubility serves as an essential descriptor that models the interaction between molecules and solvents. This property is important for many biological structures and processes, such as DNA-ion interactions and protein foldings. Quantum mechanics-based approaches, such as Density Functional Theory (DFT), have been deployed in multiple attempts to model solubility across diverse systems and temperatures. However, the complex nature of the problem makes it computationally demanding to accurately predict the properties with fast speed. The development of QSPR(Quantitative structure-property) and deep graph neural network enables us to explore the chemical space with significantly lower computational costs by modeling molecules as graphs and treating properties prediction problems as regression problems. Yet, the challenge persists—individual molecules do not exist in isolation. Due to the strong interaction between molecules, the existence of other molecules(solvent, in particular) in the environment can strongly impact the property we want to predict. However, most of the existing GNN models can only take one molecule per input, limiting their potential to solve more general chemical modeling problems. As a result, it is important to incorporate solvent embedding into the models. The focus of the project is to augment existing GNN models with various solvent-encoding methods and evaluate the performances of different models on a publicly available solubility dataset. My goal is to find out the best encoding method and potentially compare the performances of different models on various solubility datasets.
-
-
-## Implementation
-This project intricately explores the functionalities of Graph Neural Network (GNN)-based models, focusing specifically on chemprop and PharmHGT. These models have exhibited remarkable proficiency in predicting molecular properties through a diverse array of message-passing and readout functions. The transformation of solvent smiles strings into feature vectors is executed through two distinctive methods. The initial approach involves the conversion of solvents into various descriptor vectors, while the second method treats solvents as independent graphs, applying GNN models to capture their inherent structural nuances.
-
-Following this encoding phase, various methods are employed to convert the solvent vector to solvate. Currently, my strategy involves vector concatenation, and subsequently transforming the combined vector into a novel encoding vector using Multi-Layer Perceptrons (MLP). The post-encoding phase involves channeling the vector through MLP, culminating in the generation of prediction values.
-
-The evaluation of the models encompasses essential metrics such as Mean Absolute Error (MAE), Root Mean Squared Error (RMSE), and R-squared (R2) values. These metrics collectively offer a comprehensive assessment of the efficacy of different encoding methods and models. The experimental validation is conducted on the BigSolDB dataset curated by Lev Krasnov et al, comprising experimental solubility data under varying temperatures and with diverse solvents. This dataset provides a robust foundation for rigorously evaluating the predictive capabilities of the GNN-based models in real-world scenarios.
-
-## Literature, model, and descriptor review
-Graph Neural Network(GNN) based machine learning models are one of the most fastest growing and powerful modeling tools for molecular properties prediction that can be utilized in various applications, including material and drug design. One of the most powerful models that has been published is chemprop, a model developed by Kevin Yang et al. in 2019. In contrast to traditional GNN-based models which adopt MPNN, chemprop takes advantage of D-MPNN which delivers messages using direct edges. This approach can avoid unnecessary loops in the message-passing trajectory. The model also adopts an innovative message-passing strategy called belief propagation. The power of the model has been demonstrated on various tasks including absorption wavelength prediction(Kevin Greenman et al., 2022) and IR spectroscopy(Esther Heid et al., 2023).
-{% include figure.html path="assets/img/2023-12-12-solvent-encoding/chemprop.jpeg" class="img-fluid" %}
-In tandem with chemprop, I integrate the Pharmacophoric-constrained Heterogeneous Graph Transformer (PharmHGT) into this project, a model crafted by Yinghui Jiang et al., tailored specifically for drug discovery. In addition to traditional nodes and edges representations corresponding to atoms and bonds in the molecules, the model creates supernodes based on the predefined pharmacophore groups(which are features that are necessary for molecular recognition) and connects those supernodes with the corresponding groups of atoms using junction edges. The model then employs message-passing neural networks on the heterogeneous graph, complemented by transformer layers serving as readout functions.
-{% include figure.html path="assets/img/2023-12-12-solvent-encoding/PharmHGT.png" class="img-fluid" %}
-In implementing the descriptor approach, I incorporated three distinct types of descriptors: the Minnesota Solvation Database descriptors, compiled by Aleksandr V. Marenich et al. (referred to as mn descriptor), Solvent Polarity Descriptors gathered by Christian Richardt (referred to as Richardt descriptor), and Solvent Effect Descriptors collected by Javier Catalan (referred to as Catalan descriptor). These descriptors, each sourced from reputable studies and researchers, contribute diverse perspectives to the solubility analysis undertaken in this article.
-
-## Method
-The BigSolDB dataset encompasses solubility data across various temperatures and solvents. To mitigate the temperature's impact on solubility, I opted to focus on entries at the most prevalent temperature in the dataset—303.15 K—excluding all others. Subsequently, I transformed solubility values into logarithmic form, a commonly used measure in the realm of chemistry. I then test the PharmHGT model on the processed dataset by running two separate message-passing neural networks on both the solvent and the solvate molecules and concatenating the resulting feature vector to form a representation vector of the solvent-solvate system. Unexpectedly, the model encountered issues contrary to my initial expectations. The challenge lies in PharmHGT's reliance on predefined pharmacophore groups to generate a graph representation of a given molecule. In instances where a molecule lacks pharmacophore groups—a commonplace scenario for small molecules like benzene or certain larger aromatic molecules—the model fails during initialization due to incorrect dimensions (specifically, 0 due to the lack of corresponding features). To overcome this hurdle, I devised the "graph augmentation approach." For each solvent molecule, I introduced an auxiliary molecule (Dimethylformamide, DMF) containing predefined pharmacophore groups, facilitating the initialization steps. By merging the solvent graph with the auxiliary graph, the model can successfully run the initialization steps thanks to the presence of the extra junction edges in the graph. 
-{% include figure.html path="assets/img/2023-12-12-solvent-encoding/graph.png" class="img-fluid" %}
-To maintain parity with the chemprop model for fair comparisons, I refrained from augmenting solvate molecules with DMF. Instead, I excluded all molecules incompatible with the PharmHGT models.  Post-filtering, the dataset was randomly partitioned into three segments: an 80% training set, a 10% testing set, and a 10% validation set. This preprocessing lays the groundwork for a rigorous evaluation of the models and ensures a comprehensive understanding of their performance in solubility prediction. I concatenates different kinds of solvent descriptors to the dataset and evaluate their performances separately. 
-
-## Result
-The processed data comprises 2189 entries in the training set, 273 entries in the testing set, and 267 entries in the validation set. I conducted training on the modified PharmHGT and chemprop models using this dataset. Both models exhibited promising results, showcasing a test RMSE ranging from 1 to 1.7, significantly influenced by the chosen encoding methods. Notably, chemprop consistently outperforms PharmHGT across all encoding methods, although the relative performance order varies. Within the chemprop model, the mn, catalan, and graph augmentations methods yield similar results, with a test RMSE ranging between 1.1 and 1.2 logM and a MAE ranging between 0.70 and 0.72 logM. Conversely, the reichardt descriptor performs less favorably, exhibiting a test RMSE of 1.31 logM and a test MAE of 0.84 logM . Intriguingly, in the PharmHGT model, these trends are reversed. The reichardt descriptor encoding attains the best performance with a test RMSE of 1.315846 and a second lowest test MAE of 0.91, while the catalan encoding method shows the highest test RMSE at 1.66 and the highest test MAE at 0.84. This discrepancy may be attributed to PharmHGT's specialized design for drug molecules which typically have molecular weights ranging from 400 to 1000 Da. In contrast, solvent molecules generally possess molecular weights below 200 Da and often lack pharmacophore groups that provide additional information to the model. As a result, the model tends to be reduced to basic GNN models, focusing solely on modeling interactions between neighboring atoms and therefore ignoring the important functional groups that strongly influenced the solubility.
-{% include figure.html path="assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE.png" class="img-fluid" %}
-{% include figure.html path="assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE.png" class="img-fluid" %}
-To validate this hypothesis, I conducted an analysis of the training RMSE across various encoding methods for PharmHGT. The finding reveals that the graph-augmentation methods beat all other methods by a huge margin. The graph augmentation method boasts a training RMSE of only 0.29 while all other methods exhibit training RMSEs of at least 0.42. This may also be attributed to the reduction of the PharmHGT models. The simple structures of solvent molecule graphs make the model susceptible to overfitting, resulting in a notably higher testing RMSE for the graph-augmentation method. Furthermore, my investigation uncovered that the catalan encoding method demonstrates a significantly higher training RMSE compared to other encoding methods, indicating that PharmHGT struggles to extract information from the descriptors. This aligns with the observation that the catalan encoding method also yields the largest testing RMSE among all encoding methods.
-{% include figure.html path="assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train.png" class="img-fluid" %}
-Examining the R2 scores reveals a consistent pattern, where the Chemprop model consistently beats the PharmHGT models across all employed encoding methods. Within the Chemprop model, the mn, catalan, and graph-augmentation methods exhibit similar outcomes, showcasing test R2 values ranging from 0.82 to 0.84. Conversely, the reichardt descriptor lags behind, presenting a less favorable test R2 of 0.78. These trends undergo a reversal within the PharmHGT model. The reichardt descriptor encoding achieves the best performance with a test R2 of 0.77, while the catalan encoding method records the lowest test R2 at 0.57. This intriguing reversal highlights the nuanced impact of encoding methods on model performance, emphasizing the need for tailored approaches based on the underlying molecular structures.
-{% include figure.html path="assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2.png" class="img-fluid" %}
-
-## Conclusion
-In the course of my experimentation, a consistent trend emerges wherein chemprop consistently outperforms pharmHGT across an array of encoding methodologies. Among these methodologies, the mn descriptor method maintains a stable, albeit moderate, level of performance, denoting its reliability without yielding any outstanding superiority.
-
-A noteworthy observation manifests when employing the catalan descriptor method, which remarkably enhances the effectiveness of the PharmHGT model. Conversely, the chemprop model attains its peak performance when coupled with the reichardt descriptor methods and its worst performance when coupled with the catalan descriptor, showing that the strong dependencies of encoding methods across different models.
-
-However, it is imperative to underscore that each encoding method exhibits inherent limitations, precluding the identification of a universally optimal solution applicable to both models concurrently. This nuanced understanding underscores the necessity for tailored approaches, grounded in an appreciation for the distinctive characteristics and demands of each model.
-
-Further scrutiny into the training loss data reveals a notable constraint within the PharmHGT model. Its proclivity towards specificity for drug molecules renders it less adept at handling general tasks, necessitating the introduction of auxiliary graphs to augment its functionality. This intricacy adds a layer of consideration regarding the pragmatic applicability of the model in contexts beyond its primary pharmaceutical focus.
-
-In navigating these findings, it becomes evident that the pursuit of a comprehensive and adaptable model mandates a nuanced comprehension of the interplay between encoding methodologies, model architecture, and the inherent limitations associated with specific domains.
-
-## Prospective works 
-Due to the complex nature of solvent-solvate interactions, a more rigorous splitting strategy that takes into account the distributions of different solvent molecules within the training, testing, and validation sets may be needed. Additionally, random splitting and cross-validation could be potential methods for improving the generality of the model. Finally, owing to the limited computational resources, this project only trained the model with default hyperparameters (such as batch size, layer width, number of tokens, etc.). Hyperparameter optimization can also be performed to gain a better understanding of the model's capabilities.
-
-## Reference
-1. Analyzing Learned Molecular Representations for Property Prediction <https://pubs.acs.org/doi/10.1021/acs.jcim.9b00237> 
-
-2. Pharmacophoric-constrained heterogeneous graph transformer model for molecular property prediction <https://www.nature.com/articles/s42004-023-00857-x>
-
-3. Multi-fidelity prediction of molecular optical peaks with deep learning <https://pubs.rsc.org/en/content/articlelanding/2022/sc/d1sc05677h>
-
-4. Minnesota Solvent Descriptor Database <https://comp.chem.umn.edu/solvation/mnsddb.pdf>
-
-5. Solvatochromic Dyes as Solvent Polarity Indicators <https://pubs.acs.org/doi/10.1021/cr00032a005>
-
-6. Toward a Generalized Treatment of the Solvent Effect Based on Four Empirical Scales: Dipolarity (SdP, a New Scale), Polarizability (SP), Acidity(SA), and Basicity (SB) of the Medium <https://pubs.acs.org/doi/10.1021/jp8095727>
-7. BigSolDB: Solubility Dataset of Compounds in Organic Solvents and Water in a Wide Range of Temperatures <https://chemrxiv.org/engage/chemrxiv/article-details/6426c1d8db1a20696e4c947b>
-8. Chemprop: A Machine Learning Package for Chemical Property Prediction <https://chemrxiv.org/engage/chemrxiv/article-details/656f3bae5bc9fcb5c918caa2>
-
-## data
-The data and code for the experiments are available at <https://github.com/RuiXiWangTW/solvent_encoding-data>
\ No newline at end of file
diff --git a/_posts/2023-12-12-sparse-autoencoders-for-othello.md b/_posts/2023-12-12-sparse-autoencoders-for-othello.md
deleted file mode 100644
index 694d0f03..00000000
--- a/_posts/2023-12-12-sparse-autoencoders-for-othello.md
+++ /dev/null
@@ -1,248 +0,0 @@
----
-title: Studying the benefits and limitations of sparse auto-encoders for compositional reasoning tasks
-layout: distill
-date: 2023-12-12
-htmlwidgets: true
-
-authors:
-  - name: Uzay Girit
-    affiliations:
-      name: MIT
-  - name: Tara Rezaei
-    affiliations:
-      name: MIT
-
-bibliography: 2023-12-12-sparse-autoencoders-for-othello.md
-
-toc:
-  - name: Introduction
-  - name: Background and related work
-  - name: Method and setup
-  - name: Results
-    subsections:
-      - name: Comparison to Pythia-70m dictionaries
-      - name: Investigating the effect of size
-      - name: Interpreting the sparse autoencoder
-        subsections:
-          - name: "H1: Location features"
-          - name: "H2: Predictive features"
-  - name: Discussion and Conclusion
----
-
-# Introduction
-
-Neural networks accomplish complex tasks and are poised to be increasingly used in critical and ubiquitous sectors of civilization. But given a model seemingly solving a problem, how much can we say about precisely how it does that and what its solution looks like?
-
-It might seem like this type of question would be hopeless, but interpretability has been progressing and we can make some headway on questions like these. One of the issues for interpretability is the fact that networks pack a lot of information into individual neurons in complex hard to separate ways, which means it's hard to look at top activating examples for a neuron and see what it's doing. This is [superposition](https://arxiv.org/abs/2209.10652). [Anthropic's recent paper](https://transformer-circuits.pub/2023/monosemantic-features/index.html) leveraged sparse autoencoders (*SAEs*) to learn an interpretable basis for LLM features. Sparse autoencoders are weak dictionary learning algorithms that leverage autoencoders trained to encode and then decode the activations of a certain module in the model. Contrary to classical auto-encoders, the hidden state does not necessarily have to be smaller (enforcing compression), but the mapping has to be sparse, which we enforce by penalizing the L1 norm of the activations, where L1 is just the sums of the absolute values. This makes the feature basis much more disentangled, clean and sparse.
-
-That paper is far reaching in its results and suggests a lot of potential for SAE interpretability methods. However our work wants to investigate how effective SAEs are in contexts where there is a lot of compositional reasoning. Indeed, the a lot of the features they find hinge on the fact that their simple 1L language model is picking up on a lot of cleanly separatable cues and heuristics that are feeding into its prediction -- for example a feature that's high for arabic text, or in HTML contexts, etc.... But this seems like it'd be harder if we have a model composing reasoning and computation across steps in by nature entangled ways.
-
-So we decided to see how this method would perform on a task where there are plausibly much less heuristic features that are are separable, and intuitively requires more compositionality and reasoning than the capabilities of a small 1 layer language model. We turned to the game of Othello, for which some ML interpretability has already been done, making our analysis easier, and applied sparse autoencoders to see how they would perform and what we could learn from them. We picked Othello because it's a complex task where it might seem intuitive that the model has to gradually compose information across layers and reason about what types of moves and positions might be valid. Indeed, in the original Othello-GPT paper, they find a linear world representation when you feed the model sequence data, suggesting complex reasoning patterns. This is an initial analysis and there are many things we'd be excited to see that would make this more fleshed out.
-
-# Background and related work
-
-**Sparse Autoencoders**: There is some previous work done on [dictionary learning](https://arxiv.org/abs/2103.15949) to interpret neural networks. The idea of sparse dictionary learning is to find an over-complete basis (ie there are more basis vectors than dimensions) in your embedding space, such that on inputs in your data most of the dictionary basises are orthogonal to your data, and only a few activate (sparsity). This has been used very recently to visualize transformer features for language models, as a way of taking internal feature representations out of [superposition](https://transformer-circuits.pub/2022/toy_model/index.html).Superposition is a barrier to interpertability where neurons and features are encoding a lot of things at once, making it hard to study on individual behaviors and parts of the model. Most recently, Anthropic did extensive interpretability work on a 1 layer transformer by using sparse autoencoders in [decomposing language models](https://transformer-circuits.pub/2023/monosemantic-features#related-work). They learned a sparse embedding space and then conducted a lot of analysis and interpretability on the features the original network was learning by studying it in the sparse embedding space.
-
-
-**Transformers for reasoning tasks and Othello:**
-Transformers and specificially [decision transformers](https://arxiv.org/pdf/2106.01345.pdf) have formerly been used for more complicated tasks than natural language sequence prediciton like reasoning tasks and games and proven to be successful. Although cutting edge LLMs exhibit strong reasoning capabilities, toy models and small languag models that are more accessible and that people are trying to use for interpretability are quite small, limiting their reasoning ability. Othello is a simple to understand but complex to win two player board game, where you gradually place pieces and try to "capture opponent" pieces by sandwiching rows, columns, and diagonals of the board with two of your pieces. The winner is the player with the most pieces at the end. [Recent work](https://arxiv.org/pdf/2210.13382.pdf) lead to the creation of a dataset of Othello games and the publishing of a model called Othello-GPT that learns to play Othello successfully. We use both of these in our work. The way they train the model is by giving it sequences of Othello moves from games, and asking it to predict the next move, in an unsupervised way, obtaining a model that can predict legal moves and understands the mechanism of the game. They show the existence of representations forming in the model, by using a probe to recover the full board state from the model activations, even though it's just given a sequence. This suggests the model learns more than just heuristics and is able to do internal reconstruction of the game's features.
-
-**Interpreting features and circuits**
-In the original Othello-GPT, their world model probe was nonlinear. Neel Nanda [extended their work](https://www.neelnanda.io/mechanistic-interpretability/othello) and found a linear world representation of the othello model, by seeing that instead of representing the state as "black's turn" vs "white's turn", the model represented it in an alternating manner, distinguishing between "my turn" vs "their turn".  There is also some other work on [interpreting](https://www.lesswrong.com/posts/bBuBDJBYHt39Q5zZy/decision-transformer-interpretability) transformer models outside of the context of language modeling, for example with decision transformers, but this is very much a growing subfield. We were also able to get a better intuition for the features in the othello model by using [neuron visualization data published by the authors](https://kran.ai/othelloscope/index.html).
-
-# Method and setup
-
-In order to investigate a reasoning task, we used a synthetic GPT model trained on a dataset of valid Othello game sequences of length 60 [(by Li et al)](https://github.com/likenneth/othello_world). We manipulate and access the model's activations and internals using the [TransformerLens](https://neelnanda-io.github.io/TransformerLens/) library.
-
-We used the MSE loss as a baseline to compare the performance of sparse autoencoders on a reasoning tasks versus a natural language sequence prediction task. We replicated the training of a recent [set of dictionaries](https://www.alignmentforum.org/posts/AaoWLcmpY3LKvtdyq/some-open-source-dictionaries-and-dictionary-learning) of similar size on the GPT language model (EleutherAI's 6-layer pythia-70m-deduped) and compare our results.
-
-Our set up for the replication, where we pick the same hyperparameters as the authors, consists of an 8-layer [GPT](https://openai.com/research/language-unsupervised) model  with an 8-head attention mechanism and a 512-dimensional hidden space. We set up a buffer that gathers the model's activations on a batch of game data and uses it to train the autoencoder. The buffer automatically runs the model on another batch of data once it is half empty. The activations then get fed into the autoencoder's training loop, where it optimizes to minimize the reconstruction loss of form $L = L_1 + L_2$. In this equation, $L_1$ is the term originating from the $L_1$ norm of the weights, with a sparsity coefficient of $1e-3$ for the encoder of size $16 \times  512 = 8192$ a sparsity coefficient of $3e-3$ for the size $64 \times 512 = 32768$ and $L_2$ is the term originating from the square error of the reconstruction with regards to the actual model investigations.
-
-We then train various sizes of sparse autoencoders on the 4th layer of the othello model and investigate the impact of the autoencoders size on the reconstructed hidden state.
-
-We measure the reconstruction power of the encoder with a reconstruction score defined as $\frac {Loss_{ZeroAblation} - Loss_{Reconstruction}} {Loss_{ZeroAblation} - Loss_{Normal}}$ where $Loss_{ZeroAblation}$ is Loss after ablating the reconstructed layer and use this as a measure for how well the encoder is able to reconstruct the mlp layer. The intuition behind this is that we compare a "base zero", which is the ablation loss, with both the reconstruction of the layer and the original construction of the layer. This will provide us with a metric of how close our reconstruction is to ground truth.
-
-
-
-# Results
-
-
-## Comparison to Pythia-70m dictionaries
-
-
-The following tables are the results from training a sparse autoencoder of size $16 \times  512 = 8192$ and $L_1$ penalty coefficient of $1e-3$.
-
-Encoder's Measured MSE loss on OthelloGPT after 100000 epochs.
-
-| Layer |  MSE  |
-|:-----:|:-----:|
-|   0   | 0.370 |
-|   1   | 0.537 |
-|   2   | 0.686 |
-|   3   | 0.833 |
-|   4   | 0.744 |
-
-Encoder's reported MSE loss on Pythia-70m after 100000 epochs.
-
-| Layer |  MSE  |
-|:-----:|:-----:|
-|   0   | 0.056 |
-|   1   | 0.089 |
-|   2   | 0.108 |
-|   3   | 0.135 |
-|   4   | 0.148 |
-
-
-
-
-The following tables are the results from training a sparse autoencoder of size $64 \times 512 = 32768$ and $L_1$ penalty coefficient of $3e-3$
-
-Encoder's Measured MSE loss on OthelloGPT after 100000 epochs.
-
-| Layer |  MSE  |
-|:-----:|:-----:|
-|   0   | 0.749 |
-|   1   | 0.979 |
-|   2   | 1.363 |
-|   3   | 1.673 |
-|   4   | 2.601 |
-
-Encoder's reported MSE loss on Pythia-70m after 100000 epochs.
-
-| Layer |  MSE  |
-|:-----:|:-----:|
-|   0   | 0.09  |
-|   1   | 0.13  |
-|   2   | 0.152 |
-|   3   | 0.211 |
-|   4   | 0.222 |
-
-
-From the results above we can see that the autoencoder reconstructs with higher MSE loss despite having the same sparsity constraint and multiplier between the activation size and the sparse embedding. The difference becomes more drastic as we increas the sparsity of the encoder. Our analysis of these results is that this aligns with our hypothesis in natural language sequence prediction for small models like these, it might be that it is easier for the encoder to learn sparser and more easily separable features that allow it to recover the activations. However, on a task like playing the game of Othello where the features are more abstract, and we think there might be a higher requirement of complex compositionality across layers, increasing sparsity and size, makes the model perform worse.
-
-Another significant emerging pattern in the MSE loss of the encoders is the fact that loss increases in the furthur layers, which backs up our initial claim; that as features become more abstract, the autoencoder has a harder time reconstructing them.
-
-It is worth noting that the increase of MSE across the two sets of tables is impacted by both the increase in size and sparsity. We had made the two tables, to match the already existing [benchmarks](https://www.alignmentforum.org/posts/AaoWLcmpY3LKvtdyq/some-open-source-dictionaries-and-dictionary-learning). However, in the following, we include the results of a sparse autoencoder with penalty coefficient of $3e-3$ and size $16 \times  512 = 8192$ to validate our claims about sparsity, without the effect of size.
-
-Encoder's Measured MSE loss on OthelloGPT after 100000 epochs.
-
-| Layer |  MSE  |
-|:-----:|:-----:|
-|   0   | 0.954 |
-|   1   | 1.389 |
-|   2   | 1.715 |
-|   3   | 2.038 |
-|   4   | 3.057 |
-
-We see that without the effect of size and only sparsity, the model performs significantly worse and if we compare the results to the one of size $64 \times 512 = 32768$ the model gets better because it is has more dimensions.
-
-## Investigating the effect of size
-
-In furthur investigation, we experimented with training various sizes of autoencoders on layer 4 of the model. The size of the autoencoder is determined by the equation $size = x \times 512$ where $x$ is the size factor. We vary the size factor from $0.25$ to $32$. The size factor describes how much our autoencoder embedding space is bigger than the original activation space, therefore deciding how much "extra space" the autoencoder has to obey the sparsity constraint and preserve good reconstruction. We included smaller sizes so that we could investigate the effect of size and whether the encoder would be able to learn more compact features and still perform well. Our results are found in the following:
-
-![recons_loss vs epochs](https://hackmd.io/_uploads/S1GB0NBUp.png)
-
-As seen in the figure above, we see reconstruction loss decrease significantly as the number of dimensions in the autoencoder's hidden space becomes larger than the original space. A sparse autoencoder with less dimensions than the original latent space fails to reconstruct well and this can be even better observed in the following figure.
-
-![Screenshot 2023-12-11 at 8.47.16 PM](https://hackmd.io/_uploads/BJAJerHLa.png)
-
-This picture suggests that maybe if we scale up sparse auto encoder embedding size we can recover performance at low cost. However, Anthropic's interpretability work, linked earlier, suggests that as you increase the size of your autoencoder embeddding, you risk getting a lot of niche highly specific features with complex interactions, therefore making interpretability harder. For example, at a given size they observe a base64 feature that fires for base64 text, and then at a larger size they see it splits into several base64 features that activate for slightly different token beginnings.
-
-These results highlight the challenge of sparse autoencoders for compositional tasks, and bring us to the question of interpreting sparse embedding spaces for compositonal reasoning.
-
-## Interpreting the sparse autoencoder
-
-Here we had to take a detective's approach and form different hypotheses of what the model was doing and how to test them. This analysis is exploratory, and given more time we'd be excited about extending this/doing even more experiments to get a complete picture. However, we're excited about what we found and are confident that this approach is promising.
-
-We started by caching the autoencoder embeddings on a subset of data with valid Othello sequences and moves. This gave us a dataset to work with.
-
-We then did some macro level analysis by looking at and inspecting random features (dimensions of the embeddings) and seeing what kinds of boards activated most on them (by activated most we mean that the feature had a high value on that input activation for that board). This somewhat followed the pattern laid out by [Anthropic's analysis](https://transformer-circuits.pub/2023/monosemantic-features/index.html#global-analysis-interp).
-
-However, in Anthropic's 1L language model paper they have the following figure:
-
-![image](https://hackmd.io/_uploads/SyIELvLIT.png)
-
-They are indicating that in their setup most of the features seem to be interpretable and clear to a human, according to human scores. In our experience looking at our sparse autoencoder and top activating examples for different features, it seems that a lot of the features are still not interpretable and we will need more work to understand the full picture [^1]. This may be because a lot of semantic cues for simple language modeling are more aligned with our human understanding, in the sense that the concepts the model operates on are pretty intuitive, whereas for Othello it has to build a compositional model of the game state across layers, in ways that are potentially less likely to correlate with how we might perceive the problem. We don't claim that there are not such complex dynamics in even simple language models (there definitely are!), but we think there are more simple patterns to pick up on. We believe that the method laid out in that work needs to be extended to be applied to compositional networks for reasoning adjacent tasks, because it does not seem sufficient for this Othello model. This is an empirical claim based on studying and looking at a lot of data on when sparse features were activating throughout the Othello dataset.
-
-To do some global analysis, we computed a frequency histogram of the values of each feature on the dataset, and then we took an average of this frequency histogram to get a full picture of how often and how strongly features are activating across the dataset. This is on a log scale.
-
-![image](https://hackmd.io/_uploads/B1V7_HIL6.png)
-
-As we can see, on average for each feature there are a lot of inputs where the feature is not reading much at all, which makes sense given the sparsity constraint. Then as the activation gets higher and higher the frequency of each bucket decreases.
-
-If we increased the sparsity regularization even more we might see a sparser activation graph with more high activing frequency for large activations, but in a lot of classic encoders the distribution of embeddings tends to have a lot of smaller noise around zero, where here a lot of our values are actually very often split into either zero, or something significant.
-
-We then proceed to making some hypotheses about how the model might be localizing computation about the game board throughout its features, and make some tests to see what might be going on.
-
-### H1: Location features
-
-Hypothesis: what if there are features that represent the location of the last move, and only activate when that last move is within some cluster of the board? This would align with earlier world model wor.
-
-This would be an example of a strong monosemantic and interpretable feature.
-
-However, we later realized that this is probably more likely as a more primitive pattern that would be noticed earlier in the model layers, before it then refines and comes up with information to decide what to predict.
-
-Never the less, we looked at the contexts in which a feature is reading strongly, and thus found a list of high-activating moves for each feature (*for what current moves is feature j activating*). We then clustered these into 3x3 location clusters on the board, marking positions as the same if they were close in a small square. That was based on the idea that it does not have to be activating for the exact same current move but moves in general that are adjacent. These features would then represent: *was the current move around this position of the board?*.
-
-This plot was computed by looking at those activating contexts for each feature and seeing how many non-adjacent clusters of positions are within those moves. We then compute a histogram on the cluster count, trying to see how many features activate locally in a small number of clusters.
-
-![image](https://hackmd.io/_uploads/BymEFrU8T.png)
-
-We can see that our hypothesis was wrong here and that at this point in the network our features are activating for current moves across the board, not really in a localized, and don't sparsely activate just when a given location is played. This was useful data to see and showed us that at this point in the network it was probably operating on high level features and things that could directly relate to its final prediction. The small amount of locally activating features all tend to just have small activations in general.
-
-### H2: Predictive features
-
-This brought us to the next experiment, where we wanted to test for higher level patterns related to its prediction.
-
-We were curious studying the link between the times when a feature of our autoencoder is writing strongly on an input and the actual correct prediction for that input, ie the actual correct next token it's trying to predict. Is there a localization effect there where a feature activates highly only when the correct prediction is within some cluster?
-
-We investigated and collected, for each feature, a list of the real (heldout) next action in the sequence whenever it is activating non negligibly. This gave us a sequence of next moves for each context where a feature wrote strongly to the activation output. Then we clustered these actions into regions of 3x3 squares on the board, trying to narrow in on the idea of local activation of a feature. We operationalized the notion of reading strongly on a game board by setting a threshold activation of 0.001 by looking at the earlier plot of activation distribution and seeing what made sense. This is actually pretty low, but it still stays significant because the sparsity constraint often just nulls out values when they are not relevant, so even low small values have signal.
-
-This allows us to map each feature to a number of activating clusters.
-
-We then plot a histogram for the number of clusters of next action locations for each feature in our dataset. The idea is that if a feature is activating on a small number of clusters for the next action, then it might be picking up in patterns on the board that are linked to the final model's prediction, in a consistent way based on the real result.
-
-![image](https://hackmd.io/_uploads/Sy9PKBUIT.png)
-
-It's interesting to compare this to the previous plot, as here there are actually a decent amount of features that seem localized, reacting and forming predictions based on what part of the board they think the next step or move might be in, and not activating across the board for the next token. These are the ~100s of features that are only reacting in some small amount of clusters, like two or 1.
-
-It seems that in layer 4 in certain cases the model is already developing an idea of what the next move will be, and is localizing sparse features for different prediction areas.
-
-This explanation is not explaining the full behavior and there is probably a lot going on to extend the prediction into higher layers. We can see this in the frequencies of all the features that are activating in a lot of different next-token contexts, probably picking up on general things on the board and harder to interpret compositional steps that will allow it to make predictions later.
-
-This reminded us of the [logit lens] in language modeling where you can unembed the early activations and get coherent (and gradually improving as you increase the layer number) predictions for the next token. This seems to be showing that some of the features are already localizing predictions about the correct prediction, in a consistent manner.
-
-We investigated those features corresponding to the left side of the plot ($1 \leq x \leq 3$, $x$ number of clusters) that activate only for some cluster of valid next sequence areas and found data that validated this impression! We hypothesize it's because some action predictions are pretty clear to predict early on based on good strategy and how the dataset of sequences was generated. We found features that consistently were activating for when a given board position was the correct next board position.
-
-We focused particularly on feature #15 of our dim 4096 autoencoder, noticing through our analysis that it had interesting activation patterns.
-
-We plotted its activation value histogram:
-
-![image](https://hackmd.io/_uploads/Byk19HULT.png)
-
-We can see a long sparse tail of inputs where the feature doesn't activate at all, and then a similar frequency for values beyond some threshold of activation.
-
-On manual inspection, other than the big cluster of samples where it's reading zero or doesn't activate, the feature is basically always just activating when the next move is in a specific cluster at the bottom of the board. To be more precise, 90% of the boards where it activates with a value > 0.001 are in that cluster, 93% for 0.01,
-
-Here are some of those example boards, where the next move played is G4, and the model activates strongly.
-
-One of many examples of board where feature #15 activates strongly and in fact the next correct move is G4.
-![image](https://hackmd.io/_uploads/BJZEDS8U6.png) [^2]
-
-Example where the feature activates and the actual next move is F4, right above G4, in the same cluster:
-![image](https://hackmd.io/_uploads/ryy8Jj8U6.png)
-
-This is really interesting! Why does this feature exist? We've been thinking about the structure of Othello and the way the data was generated, and we think the idea is that the network is pretty confident about this position and early on manages to recognize and see what's going on with the rest of the board to put its hypothesis in this feature.
-
-Although we haven't explained a lot of the other features, it's cool that this method has allowed us to understand and correlate this feature with a state of the game and the understanding the model has of the board!
-
-# Discussion and Conclusion
-
-We are excited about future work in this direction and think interpreting compositional computation circuits is key to understanding how tranformers and language models solve complex problems. In terms of our work with Othello GPT, we are excited about pushing sparse autoencoders further on this architecture and motivating more interpretability work. We are interested in work to train SAEs across layers and then see if we can track computation and model changes through sparse embeddings across layers, mirroring this [exploratory work]. This might be helpful to understand compositionality across layers. We also think interpreting features for SAEs with width smaller than the original width might be interesting to find projections of network activations that have very high level, compressed features, that might therefore be easier to interpret. We are also interested in methods that use SAE features to make causal statements aobut model behavior, for example by plugging the SAE into the model inference step, where at the end of our MLP we feed in the decoded encoded version of activations into the rest of the model. With this kind of setup you could then potentially ablate or modify different features to validate and study how your interpretability hypotheses about different parts of the model actually change its final predictions. Some of the limitations of our work is that we would have liked to run more experiments on different sparsity coefficients, and make more in depth comparisons to language models to see to what extent our arguments about compositional reasoning hold in a rigorous way. We would be excited to see how increasing sparsity even more affects our ability to interpret the model, potentially making things more tractable. We also recognize the difficulty of interpretability and have not been yet been able to interpret any of the more complex Othello SAE mechanisms.
-
-
-To conclude, we've investigated the potential for sparse autoencoders for compositional reasoning tasks in the context of the Othello sequence prediction problem. Our hypothesis is that sparse autoencoders will be useful to understand such systems but their application will be more involved and complex than for earlier patterns found in language modeling tasks. We trained a sparse autoencoder at different layers of the network and see how its performance and capabilities differ compared to previous results on language. We observe our autoencoder trained with the same hyperparameters and scaling factor for size still struggles to reach the same reconstruction performance as those for language model activations. This reveals something about the structure of these data distributions, and supports our intuition that for simple small language models SAEs are particularly performant due to their ability to pick up on a lot of separable and sparse features, but for compositional solutions where the model is learning an algorithm across layers to solve a task, the sparsity constraint incurs more of a cost, which limits this method. This intuition stems from the idea that leveraging the full extent of neural activations for compositional tasks is key to build complex algorithms across layers, and maybe less so for prediction problems that are more tractable through the composition of independent heuristics. We also nonetheless do some interpretability on our trained autoencoder, and note that the features seem less directly interpretable than those for language model SAE features (as supported by our hypothesis), but that there is some signal to analyze and understand, giving us hope for future work to use SAEs to understand compositional reasoning and circuis in general. In particular, we look at the range and frequency of sparse activations, and form different hypotheses about the ways the model might be localizing computation in sparse embeddings. We find the existence of predictive neurons already at layer 4, that activate when the model is already confident about a specific next action to predict. Although much of the features remain obscure, our results indicate that although sparsity is a harder constraint to impose for compositional reasoning, it can still be a useful starting point to interpret model computation.
-
-[^1]: To some extent increasing the sparse regularization penalty could help with this, but our exploratory analysis revealed that increasing the sparsity penalty made the model perform too badly on the data. We could always counter this by increasing the size of the encoder, but Anthropic's paper and our understanding suggests that this leads core interpretable features to split and split until it's hard to get a good picture of what's going on.
-
-[^2]: these plots are both before the G4 cluster move is played.
diff --git a/_posts/2023-12-12-time-series-lstm-transformer.md b/_posts/2023-12-12-time-series-lstm-transformer.md
deleted file mode 100644
index 019500f7..00000000
--- a/_posts/2023-12-12-time-series-lstm-transformer.md
+++ /dev/null
@@ -1,241 +0,0 @@
----
-layout: distill
-title: "Predicting the Future: LSTM vs Transformers for Time Series Modeling"
-description: A comparison analysis between LSTM and Transformer models in the context of time-series forecasting. While LSTMs have long been a cornerstone, the advent of Transformers has sparked significant interest due to their attention mechanisms. In this study, we pinpoint which particular features of time series datasets could lead transformer-based models to outperform LSTM models. 
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Miranda Cai
-    url:
-    affiliations:
-      name: MIT
-  - name: Roderick Huang
-    url: https://www.linkedin.com/in/rwxhuang/
-    affiliations:
-      name: MIT
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-time-series-lstm-transformer.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: "Introduction"
-  - name: "Related Work"
-    subsections:
-    - name: Effect of Dataset Size
-    - name: Effect of Noisy Datasets
-    - name: Effect of Multi-step Prediction
-  - name: "Methodology"
-  - name: "Experimental Results and Discussion"
-    subsections:
-    - name: Size of a Dataset
-    - name: Amount of Noise in a Dataset
-    - name: Prediction Size
-  - name: "Conclusion"
-
-
----
-
-# 6.S898 Final Project - LSTM vs Transformers for Time Series Modeling
-
-By Miranda Cai and Roderick Huang
-
-<div class="row">
-    <div style="text-align:center">
-        {% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/intro_photo_time_series.webp" class="img-fluid rounded z-depth-1 w-100" %}
-    </div>
-</div>
-
-## 1. Introduction
-
-In the context of time series forecasting, comparing Long Short-Term Memory (LSTM) networks to Transformers is a fascinating exploration into the evolution of deep learning architectures. Despite having distinct strengths and approaches, both LSTM and transformer models have revolutionized natural language processing (NLP) and sequential data tasks.
-
-LSTMs, with their recurrent structure, were pioneers in capturing long-range dependencies in sequential data. While the accuracy of such models have been shown to be quite effective in many applications, training LSTM models takes a relatively long time because of the fact that they must remember all past observances. One faster alternative to LSTM models are transformers. Transformers are able to remember only the important bits of inputs using an attention-mechanism, and is also parallelizable making it much faster to train than recursive LSTMs that must be processed sequentially. 
-
-With its recent development, people have started opting to use transformer based models to solve sequence problems that once relied on LSTMs. One significant example is for NLP use cases, where transformers can process sentences as a whole rather than by individual words like LSTMs do. However, since transformers have been around for less than a decade, there are still many potential applications that are yet to be deeply explored. Thus, we will explore the effectiveness of transformers specifically for time series forecasting which finds applications across a wide spectrum of industries including finance, supply chain management, energy, etc. 
-
-Our goal is to realize which particular features of time series datasets could lead transformer-based models to outperform LSTM models. 
-
-## 2. Related Work
-
-With the growth of ChatGPT in the recent years, extensive research has been done across various NLP tasks such as language modeling, machine translation, sentiment analysis, and summarization, each aiming to provide comprehensive insights into when each architecture excels and where their limitations lie. While research on time series data exists, it hasn't garnered as much attention, so we aim to broaden this area of study.
-
-### 2.1 Effect of Dataset Size
-The size of a dataset plays an important role in the performance of an LSTM model versus a transformer model. A study <d-cite key="comparison"></d-cite> done in the NLP field compared a pre-trained BERT model with a bidirectional LSTM on different language dataset sizes. They experimentally showed that the LSTM accuracy was higher by 16.21% relative difference with 25% of the dataset versus 2.25% relative difference with 80% of the dataset. This makes sense since BERT is a robust transformer architecture that performs better with more data. As shown in the figure below from <d-cite key="comparison"></d-cite>, while LSTM outperformed BERT, the accuracy difference gets smaller as the perctange of training data used for training increases.
-<div class="row mt-3">
-    <div class="col-sm mt-md-0 d-flex align-items-center justify-content-center">
-        {% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-While we perform a similar methodology which is discussed further in section 4.1, the major difference is in the type of data we test. Instead of measuring classification accuracy for NLP tasks, this study measures the mean squared error (MSE) loss for regression time series data. 
-
-### 2.2 Effect of Noisy Datasets
-
-Theoretically, LSTMs are more robust to noisy data due to its ability to capture local dependencies. On the other hand, the self-attention mechanisms in transformers propagate errors and may struggle with sequences that have a high degree of noise. Electronic traders have been recently attempting to apply transformer models in financial time series prediction to beat LSTMs <d-cite key="trading"></d-cite>. Largely focused on type of assets, the research showed that transformer models have limited advantage in absolute price sequence prediction. In other scenarios like price difference and price movement, LSTMs had better performance.
-
-Financial data sets are known to be extremely noisy, and in addition, very hard to find due to their confidential nature. The application of <d-cite key="trading"></d-cite> gave inspiration to study how the "amount" of noisiness would affect the LSTM and transformer models. Discussed further in section 4.2, this study added various amounts of noise to a clean dataset to see how this would affect each architecture.
-
-### 2.3 Effect of Multi-step Prediction
-
-The last feature that we would like to look at between LSTMs and transformer models is forecasting length. Forecasting length describes how far into the future we would like our model to predict based on the input sequence length. One paper <d-cite key="multistep"></d-cite> done on short-term time series prediction finds that transformers were able to outperform LSTMs when it came to predicting over longer horizons. The transformer did better in all three cases when predicting one hour, twelve hours, and an entire day into the future. They accredit these results to the fact that attention better captured longer-term dependencies than recurrence did.
-
-Similarly to this paper, we will focus only on short-term forecasting. Short-term forecasting is important in situations like stock market predictions, where stock values show high volatility in the span of hours and may or may not have learnable trends over long periods of time.
-
-However, we would like to extend the results of this paper to learn to also look at multi-step prediction. This study trained models specifically to have a singular output, with each model being trained with outputs at the specified prediction horizon. Instead, we would look to train our models against outputs of different lengths. We thought it would be an interesting addition to output the entire sequence of data leading up to whatever period in the future, to give a better visualization of what actually happens as forecasting length increases. 
-
-
-## 3. Methodology
-
-The dataset we will be using throughout this study is the Hourly Energy Consumption dataset that documents hourly energy consumption data in megawatts (MW) from the Eastern Interconnection grid system <d-cite key="dataset"></d-cite>. 
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split.png" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-
-### 3.1 Experiments
-
-We can utilize this dataset to predict energy consumption over the following features of a dataset.
-- **Size of a dataset**: As discussed in Section 2.1 <d-cite key="comparison"></d-cite>, the size of a dataset played an impact in measuring classification accuracy for NLP tasks. Since the energy dataset is numerical, it's important to test the same concept. We leveraged nearly 150,000 data points, progressively extracting subsets ranging from 10% to 90% of the dataset. For each subset, we trained the architectures, allowing us to explore their performance across varying data volumes.
-
-- **Amount of noise in the dataset**: As discussed in Section 2.2 <d-cite key="trading"></d-cite>, research was done to test LSTMs vs transformers on noisy stock data for various assets. We deemed the energy dataset to be relatively clean since it follows a predictable trend depending on the seasons of the year and time of the day. For example, there are higher energy levels during the winter and daytime hours. To test noise, we added incrementing levels of jittering / Gaussian noise <d-cite key="augmentations"></d-cite> to observe the effect of noisy data on LSTMs and transformers. Example augmentations with different variances are plotted below in blue against a portion of the original dataset in red.
-<div class="d-flex justify-content-center">
-  <div style="text-align:center">
-  {% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001.png" class="img-fluid rounded center z-depth-1 w-75" %}
-  {% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001.png" class="img-fluid rounded z-depth-1 w-75" %}
-  {% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003.png" class="img-fluid rounded z-depth-1 w-75" %}
-  {% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008.png" class="img-fluid rounded z-depth-1 w-75" %}
-  </div>
-</div>
-- **Output size**: As discussed in Section 2.3 <d-cite key="multistep"></d-cite>, there have been few studies measuring the effect of varying the forecasting length, and in the ones that do they still only output one class *at* the specified time into the future. In our novel experimentation, we aimed to generate an entire sequence of outputs *up until* the specified time into the future. We created models that would predict forecasting lengths of 10%, ..., 100% of our input sequence length of 10. To do so, we set the output size of our models to be equal to these forecasting lengths. This involved removing any final dense or convolutional layers.
-
-### 3.2 Selected Architectures & Setup
-
-Selecting the right architecture for LSTM (Long Short-Term Memory) networks hinged on several key considerations. The LSTM architecture is extended of the RNN to preserve information over many timesteps. Capturing long-range dependencies requires propagating information through a long chain of dependencies so old observations are forgotten, otherwise known as the **vanishing/exploding gradient problem**. LSTMs attempt to solve this problem by having separate memory to learn when to forget past or current dependencies. Visually, LSTMs look like the following <d-cite key="rnn_lstm"></d-cite>.    
-<div align="center" style="text-align:center">
-{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm.png" class="img-fluid rounded z-depth-1 w-75" %}
-</div>
-
-Transformers, initially designed for natural language processing, should work well for time series data. They operate by utilizing self-attention mechanisms, allowing them to capture long-range dependencies effectively. A transformer breaks down the input sequence into smaller, fixed-size segments known as tokens, representing various time steps or features. Through multiple layers of self-attention and feedforward operations, the transformer architecture should excel at capturing both short-term and long-term dependencies. A figure of transformer time series is shown below from <d-cite key="transformer_arch"></d-cite>.
-
-<div align="center" style="text-align:center">
-{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch.png" class="img-fluid rounded z-depth-1 w-75" %}
-</div>
-
-There were certain parameters that we kept fixed throughout all variations of our models. The first was training on batches of data with sequence length 10. Second, we trained all of our LSTM models for 500 epochs and all of our transformer models for 10 epochs. These numbers were chosen with some fine-tuning to yield meaningful results while also allowing the training for so many individual models to be done in a reasonable amount of time. 
-
-Additionally, the data was normalized since the range of energy values was from 10000 Megawatts (MW) to 60000 MW. Normalizing the data improves convergence for gradient descent optimization and mitigates issues related to model regularization.
-
-## 4. Experimental Results and Discussion
-
-### 4.1 Size of a Dataset
-Given the energy consumption dataset described in Section 3, we trained and evaluated an LSTM model and transformer model on progressively increasing subsets ranging from 10% to 90% of the dataset. The figure below shows the normalized mean squared error (MSE) loss for each subset of the dataset. 
-<div class="row mt-3">
-    <div class="d-flex flex-column justify-content-center" style="text-align:center">
-        {% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res.png" class="rounded z-depth-1 w-50" %}
-    </div>
-</div>
-The experimental results show that transformers have an improving trend as the size of the dataset increases while the LSTM has an unclear trend. Regardless of the size of the training dataset, the LSTM doesn’t have a consistent result for the testing set. 
-
-In an LSTM, there exist additional gates for a sequence of inputs $x^{(t)}$ where in addition to the sequence of hidden states $h^{(t)}$, we also have cell states $c^{(t)}$ for the aforementioned separate memory. While the LSTM architecture does provide an easier way to learn long-distance dependencies, it isn’t guaranteed to eradicate the vanishing/gradient problem discussed in Section 3.2. While the same is true for transformers, the transformer architecture addresses the vanishing/exploding gradient problem in a different way compared to LSTMs. Transformers use techniques like layer normalization, residual connections, and scaled dot-product attention to mitigate these problems.
-
-For time series dataset, the transformer architecture offers the benefit of the self-attention unit. In NLP, it’s typically used to compute similarity scores between words in a sentence. These attention mechanisms help capture relationships between different elements in a sequence, allowing them to learn dependencies regardless of their distance in the sequence. For time series data, transformers might offer advantages over LSTMs in certain scenarios, especially when dealing with longer sequences or when capturing complex relationships within the data such as seasonal changes in energy use.
-
-From a qualitative perspective, if we pull a subset of the test data to observe the predicted values from an LSTM vs a transformer for 40% of the training set, we have the following.
-<p align="center">
-{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40.png" class="img-fluid rounded z-depth-1" %}
-</p>
-
-While transformers did perform better than LSTMs, it's not like the LSTM did a horrible job. We notice that at the peaks, the LSTM overshot more than the transformer and at the troughs, the LSTM undershot. However, overall, both architectures still had good results. In the context of the size of time series data, transformers do seem more promising given the loss figure above. It seems that LSTMs are losing that dependency on old observations while transformers are gaining ground as the size of the dataset increases. While <d-cite key="comparison"></d-cite> showed that bidirectional LSTM models achieved significantly higher results than a BERT model for NLP datasets,  
-> The performance of a model is dependent on the task
-and the data, and therefore before making a model choice, these factors should be taken into consideration instead of directly choosing the most popular model. - Ezen-Can 2020
-
-For this experiment, the outlook of large datasets in time series applications for the transformer architecture looks promising. 
-
-### 4.2 Amount of Noise in a Dataset
-To test the performance of our models on simulated noisy data, we first trained our models on batches of the original clean dataset and then ran our evaluations on different levels of noisy data. Random noise was added according to Gaussian distributions with variances in `[0.0, 0.0001, 0.001, 0.002, 0.003, 0.005, 0.008, 0.01]` to create these data augmentations. Below is a comparison of the MSE loss for both models as a function of the injected noise variance.
-
-<div style="text-align:center">
-{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss.png" class="img-fluid rounded z-depth-1 w-50" %}
-</div>
-
-Since loss is not very descriptive in itself, we also visualize the model output for some of these augmented datasets. For each graph below, red is the true value while blue is predicted value.
-
-<p align="center">
-<table border="0">
- <tr>
-    <td><b style="font-size:15px">LSTM</b></td>
-    <td><b style="font-size:15px">Transformer</b></td>
- </tr>
- <tr>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001.png" class="img-fluid rounded z-depth-1" %}</td>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001.png" class="img-fluid rounded z-depth-1" %}</td>
- </tr>
-  <tr>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002.png" class="img-fluid rounded z-depth-1" %}</td>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002.png" class="img-fluid rounded z-depth-1" %}</td>
- </tr>
-  <tr>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005.png" class="img-fluid rounded z-depth-1" %}</td>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005.png" class="img-fluid rounded z-depth-1" %}</td>
- </tr>
-  <tr>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01.png" class="img-fluid rounded z-depth-1" %}</td>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01.png" class="img-fluid rounded z-depth-1" %}</td>
- </tr>
-</table>
-</p>
-
-Both models are shown to start off similarly, predicting very well with no noise. However, almost immediately we can see that the LSTM does not handle noise as well as the transformer. LSTM makes much noisier predictions with many more outliers. We suspect this occurs due to the implicit inductive bias of the memory feature in the LSTM module. Consider a time step $t$. The memory accrued up to point $t$ "weights" 
-the data seen in recent past time steps $t-1$, $t-2$, $\ldots$, much more so than the data seen relatively long ago. While this is an intuitive design for memory,  we can observe that this mechanism combines storing temporal information with token-specific information. In order to compete with a transformer, the LSTM model needs to be trained on significantly more data.
-
-The transformer on the other hand has the negative effects of its own inductive bias mitigated by its attention mechanism. Because the transformer has both a mechanism to account for temporal information and a mechanism to select the next associated token (attention module), and because they are separated, it is able to produce more "accurate" results.
-
-### 4.3 Prediction Size
-Finally, we created and trained separate models with varying numbers of output classes to represent the prediction size. We trained on output sizes as percentages of our input size, in increments of 10% from 0% to 100%. Because our input sequence was a constant 10 and our data is given in hourly intervals, these percentages translated to have prediction horizons of 1hr, 2hrs, ..., 10hrs. Evaluating our models resulted in the following MSE loss trends. 
-<div style="text-align:center">
-{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss.png" class="img-fluid rounded z-depth-1 w-50" %}
-</div>
-
-Again, to get a better sense of why we see these results, we visualize the outputs. Since our outputs are sequences of data, to have a more clean visualization we plot only the last prediction in the sequence. For each graph below, red is the true value while blue is predicted value.
-<p align="center">
-<table border="0">
- <tr>
-    <td><b style="font-size:15px">LSTM</b></td>
-    <td><b style="font-size:15px">Transformer</b></td>
- </tr>
- <tr>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10.png" class="img-fluid rounded z-depth-1" %}</td>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10.png" class="img-fluid rounded z-depth-1" %}</td>
- </tr>
-  <tr>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50.png" class="img-fluid rounded z-depth-1" %}</td>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50.png" class="img-fluid rounded z-depth-1" %}</td>
- </tr>
-  <tr>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80.png" class="img-fluid rounded z-depth-1" %}</td>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80.png" class="img-fluid rounded z-depth-1" %}</td>
- </tr>
-  <tr>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100.png" class="img-fluid rounded z-depth-1" %}</td>
-    <td>{% include figure.html path="assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100.png" class="img-fluid rounded z-depth-1" %}</td>
- </tr>
-</table>
-</p>
-
-As we can see, the MSE loss of our transformer model increased at a slower rate than our LSTM model. After comparing the outputs of our models at these time steps, it becomes evident that this trend is due to the LSTM losing characteristic over time. Our transformer simply performs worse when it has to predict more as expected because the data is not perfectly periodic. However, we infer that the LSTM outputs get flatter over time because the more we accumulate memory through the long-term mechanism, the less weight each previous time step holds, diluting the total amount of information carried through the sequence. Transformers avoid this problem by using their attention mechanisms instead to keep only the important information throughout.
-
-## 5. Conclusion
-
-Through the experiments tested in Section 4 (on dataset size, dataset noise, and prediction size), transformers seem to be a promising alternative to LSTMs for time series modeling due to their unique architecture, particularly suited for capturing long-range dependencies. Unlike LSTMs, transformers utilize self-attention mechanisms that allow them to consider relationships between all elements in a sequence simultaneously. This capability is especially advantageous in time series data, where capturing distant dependencies is crucial for accurate forecasting. Additionally, transformers mitigate vanishing gradient problems better than LSTMs, enabling more robust training on longer sequences. 
-
-While transformers excel in parallel computation theoretically, one significant issue is the extensive memory requirements during training, especially with larger models or datasets. Transformers demand significant memory for storing attention matrices, limiting the batch size that can fit into GPU memory. So, for those who are finding an optimal architecture to train a time series dataset, one has to consider his or her own design priorities of accuracy and performance.
-
-All in all, the choice between LSTMs and transformers for time series datasets depends on the implementer's design priorities and the task at hand. With some research showing LSTMs outperforming transformers and others such as our study showing the opposite, there is a clear need to dive deeper into the subject especially given the extensive number of applications for time series modeling. 
\ No newline at end of file
diff --git a/_posts/2023-12-12-watermarked-llms.md b/_posts/2023-12-12-watermarked-llms.md
deleted file mode 100644
index 39d2164f..00000000
--- a/_posts/2023-12-12-watermarked-llms.md
+++ /dev/null
@@ -1,342 +0,0 @@
----
-layout: distill
-title: Are Watermarked Large Language Models More Prone to Hallucinations?
-description: In this blog post, I investigate whether watermarked LLMs are more likely to “hallucinate,” or make up facts, because of limitations imposed by the watermarking scheme.
-date: 2023-12-12
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Justin Ji
-
-authors:
-  - name: Justin Ji
-    url: "https://www.linkedin.com/in/jujipotle/"
-    affiliations:
-      name: Harvard University
-
-# must be the exact same name as your blogpost
-bibliography: 2023-12-12-watermarked-llms.bib  
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Introduction
-  - name: Background
-    subsections:
-    - name: The need for AI detection algorithms
-    - name: So what is watermarking?
-    - name: Quality degradation in watermarked LLMs
-  - name: Experiment
-    subsections:
-    - name: Setup
-    - name: Watermarking scheme implementation
-    - name: Prompting my models
-  - name: Results
-    subsections:
-    - name: Quantitative results
-    - name: Qualitative results
-    - name: Theoretical underpinnings of results
-  - name: Conclusion
-    subsections:
-    - name: Further Work
-    - name: What I learned
-    - name: Supplemental Material
----
-
-## Introduction
-
-As LLMs grow in capabilities, it is becoming more and more difficult to tell apart human-written from AI-generated content.
-Current post-hoc AI detection tools like GPTZero, which are easy to bypass and can be biased against non-native English speakers, are neither
-robust nor fair. Watermarking schemes suggest a more secure and unbiased method of detecting LLM-generated content, but introduce potential
-quality degradation. In this blog post, I investigate whether watermarked LLMs are more likely to “hallucinate,” or make up facts, because
-of limitations imposed by the watermarking scheme. I formulate a nuanced research question, explain assumptions made and my experimental setup,
-present an analysis of my results, and present next steps. Overall, although I do not obtain statistically significant results, I do provide
-statistical evidence that hallucinations in watermarked LLMs are worth studying, with interpretable qualitative results that I explain with
-fundamental ML concepts.
-
-## Background
-
-### The need for AI detection algorithms
-
-Deepfakes. AI-assisted academic plagiarism. Bots on social media spreading fake news. These are just a couple of the real-world problems brought 
-about by the recent advancement in large language model capabilities that make it easy for malicious users to spread misinformation, while making 
-it hard for social media platforms or governments to detect their AI origin. Thus, detecting AI-generated content in the wild is becoming one of 
-the hottest research fields in AI. In fact, the White House recently commissioned an executive order <d-cite key="biden"></d-cite> on AI to direct action towards detecting 
-AI-generated content and authenticating official content. But as AI models get more and more powerful, the question arises: will it even be 
-possible to tell apart truth from disinformation?
-
-Some signs have already appeared that point to the answer being “no.” When ChatGPT first released to the public, coding Q&A site StackOverflow 
-temporarily banned <d-cite key="stack-overflow"></d-cite> answers generated by ChatGPT, because it was so easy to use ChatGPT to generate answers that seemed to be correct but were 
-wrong on closer inspection. Perhaps you’ve experienced the following: you’re browsing Reddit or an online forum and reading a user’s reply, 
-thinking that it’s a well-composed answer, only to realize that the structure of the reply is strikingly similar to how ChatGPT sounds.
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/reddit-post.png" class="img-fluid" %}
-<div class="caption">A Reddit post pointing out a user who writes all their comments using ChatGPT.</div>
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/reddit-comment.png" class="img-fluid" %}
-<div class="caption">An unexpected comment replying to the Reddit post above.</div>
-
-Some AI detection tools, such as GPTZero <d-cite key="gpt-zero"></d-cite>, claim to solve this problem by detecting text generated by large language models (LLMs). However, 
-from playing around with GPTZero, it's not hard to prompt ChatGPT to generate text that bypasses GPTZero’s filters, or to manually paraphrase 
-a few sentences while preserving the content’s general meaning. This is due to GPTZero’s simplistic detection method, which is just to predict 
-if a piece of text is AI-generated by analyzing its perplexity—a measure of “randomness.” In addition to being inaccurate, AI detection tools 
-like GPTZero frequently misclassify <d-cite key="bias"></d-cite> text written by non-native English speakers as AI generated, bringing up issues of fairness and biases.
-
-So is it all doomed? Will we reach a state of the world in which people can’t trust anything they see on the internet to be human-generated?
-
-Not quite. New watermarking algorithms make it possible to trace back any text generated by specifically-watermarked LLMs with high accuracy and 
-with low false-positive rates, and with considerable amount of effort required to modify the output of such an LLM without also degrading the 
-quality of the output.
-
-
-### So what is watermarking?
-
-Watermarking, in the context of LLMs, is the process of modifying an LLMs generation process such that signals are embedded into generated text 
-that are invisible to humans but algorithmically detectable. The key difference between watermarking and post-hoc detection algorithms like GPTZero 
-is that post-hoc detectors rely on text outputted by LLMs to sound “artificial,” and as LLM capabilities grow, this is unlikely to hold. On the 
-other hand, watermarking schemes work regardless of the capabilities of the underlying LLM, which make them more robust to advancements in AI.
-The watermarking scheme designed in *A Watermark for Large Language Models* (Kirchenbauer, Geiping et al.) <d-cite key="watermark-for-llms"></d-cite> is specially designed to have negligible 
-impact on text quality and work with a publicly accessible detection algorithm, so that anyone can verify if a piece of text is generated by a 
-particular LLM. The watermarking scheme works by selecting a pool of “green” tokens before text generation and softly preferring to sample from 
-the pool of “green” tokens during text generation. Then, the detection algorithm checks if a piece of text contains a higher proportion of “green” 
-tokens than expected, and if the result is statistically significant, determines that the text was generated by an LLM.
-
-The existence of an undetectable, unbreakable, and accurate watermarking scheme would be incredible! By watermarking any LLM before its release, 
-any text generated by the LLM would contain statistical signals that prove its AI origin, making it difficult for adversaries to pass off 
-LLM-generated content as human-generated. Furthermore, because watermarking schemes rely on detecting signals associated with each LLM’s 
-watermarking process and not by analyzing the perplexity of text, human-generated content would rarely be flagged as AI-generated.
-Unfortunately, the recent paper *Watermarks in the Sand: Impossibility of Strong Watermarking for Generative Models* (Zhang et al.) <d-cite key="watermark-impossibility"></d-cite> proves that 
-under natural assumptions, it is impossible to achieve watermarking schemes that can resist all attacks by a computationally-bounded attacker. 
-The key assumption, which is the existence of a quality oracle—an algorithm that can differentiate between higher and lower quality responses—is easily satisfied by using any LLM that is capable of verifying quality. The authors of the paper implement a general attack on any watermarked LLM by perturbing the output of an LLM an arbitrary number of times, and using the quality oracle to ensure the perturbation does not cause the text to degrade. After enough perturbations, the text is unlikely to contain the statistical signals embedded in the original output, and the attacker evades detection.
-
-So if an attacker is willing to spend lots of time and effort, they can break any watermarking scheme. Still, maybe this barrier is enough to 
-deter most attackers. Then, why wouldn’t we watermark every LLM released to the public?
-
-
-### Quality degradation in watermarked LLMs
-
-The truth is, because watermarking schemes force a LLM to preferentially sample from a pool of “green” tokens, the quality of the output of 
-watermarked LLMs may decrease. To understand the intuition behind this, here’s a short clip 
-from “Word Sneak with Steve Carell": [link](https://youtu.be/9nBBgD0q6rA?feature=shared&t=107)
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/word-sneak.png" class="img-fluid" %}
-
-"We weren’t dressed properly for **moose**-watching or **zucchini**-finding... I for one had the wrong kind of **pantaloons** on.”
-
-Steve and Jimmy were given cards with random words and had to work them into a casual conversation. Similarly, one can imagine an LLM generating 
-odd-sounding sentences in order to adhere to a watermarking scheme.
-
-The effects of quality degradation are amplified the smaller the space of high-quality outputs is. For example, the prompts “What is 12 times 6?” 
-or “What is the first section of the U.S. Constitution?” have only one accepted answer, forcing a watermarked LLM to either give up on watermarking 
-the output or *hallucinate incorrect answers*. 
-
-The latter bad outcome is the one I will investigate further in this blog post: Are watermarked LLMs more prone to hallucinations? In particular, 
-I investigate if there are tradeoffs between quality of outputs and watermark security. Lastly, I perform a qualitative analysis of watermarked 
-outputs, and explain any interpretable trends caused by the watermarking scheme.
-
-
-## Experiment
-
-### Setup
-
-I investigate my hypothesis by experimenting with unwatermarked and watermarked LLMs. I outline my experiment here: first, I ask an unwatermarked 
-LLM to generate biographies for 100 famous people. I ask an evaluation oracle, aka GPT 3.5, to count the number of mistakes in each generated 
-biography. This serves as my control group. Then, I create three experimental groups, each of which correspond to a watermarked LLM with varying 
-degrees of watermarking security. I ask GPT 3.5 to count the number of mistakes by each of the watermarked LLMs, and perform statistical Z-tests 
-to conclude whether or not watermarked LLMs are more likely to hallucinate.
-
-I now walk through the steps of my experiment in more depth, with commentary on any decisions or tradeoffs I made in the process. Hopefully anyone 
-reading this can follow what I did to replicate, or even build upon, my results!
-
-My coding environment was Google Colab Pro, and its V100 GPU was sufficient to run all my code—a complete runthrough of my final Jupyter notebook 
-would take a bit over an hour. The watermarking scheme I sought to replicate can be applied to any LLM where the watermark has access to the last 
-layer of logits, so I looked into a variety of open-source LLMs. Ultimately, I decided on OPT (1.3 billion parameters) <d-cite key="opt"></d-cite>, because its small model size 
-allowed me to experiment with different parameters more efficiently, with faster inference times. Other open-source LLMs I considered were Mistral, 
-Llama, and Roberta.
-
-For my experiment, I needed a dataset of biographies of famous people. Unfortunately, I couldn’t find one publicly available after a few hours of 
-searching, so I did the next best thing: I made my own. Using a list of 100 famous peoples’ biographies I found on a website <d-cite key="famous-people"></d-cite>, I copy-pasted each of 
-their Wikipedia biographies into a CSV.
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/bios.png" class="img-fluid" %}
-
-Lastly, I needed an evaluation oracle to count up the number of factual mistakes in each generated biography. I decided to make a tradeoff between 
-accuracy and efficiency by letting ChatGPT do the work for me instead of manually cross-checking sample biographies with their Wikipedia 
-biographies. After a bit of research into OpenAI’s APIs and pricing plans, I settled on the GPT 3.5-turbo API, since I expected to generate 600k 
-tokens for my experiment, which would be a bit less than $1 in costs. With more funding, I would have probably used GPT 4, but I checked and was 
-satisfied with the outputs of GPT 3.5-turbo.
-
-
-### Watermarking scheme implementation
-
-With the experimental variables of open-source model, dataset, and evaluation oracle decided upon, I began to implement the watermarking scheme 
-detailed in *A Watermark for Large Language Models*. The watermarking scheme is made up entirely of two components: a watermarking logits processor 
-that influences how tokens are sampled at generation time, and a watermark detector that detects if a given piece of text contains a watermark. 
-There were also several tunable parameters detailed in the watermarking paper, but the two of interest are gamma and delta.
-
-Gamma represents the breadth of the watermark in terms of vocabulary: a higher gamma includes more words in the “green” pool, making responses 
-sound more natural but may dilute the watermark’s detectability, while a lower gamma focuses on fewer words, increasing its detectability but 
-potentially negatively affecting the output. The authors of the watermarking paper suggested a value for gamma between 0.25 and 0.75.
-
-Delta represents the intensity of the watermark, or how strongly the watermark prefers “green” tokens to “red” tokens at each step of the 
-generation process. The higher the delta, the more evident the resulting watermark. The watermarking paper suggested a value for delta between 
-0.5 and 2.0.
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/watermark-scheme.png" class="img-fluid" %}
-<div class="caption">A visualization of the watermarking scheme (figure from original paper).</div>
-
-Tuning these parameters, I created three different experimental groups, each corresponding to a different level of watermarking strength: strong, 
-medium, and weak. The exact values of gamma and delta I used can be tinkered with; my choices were based on what empirically had the best effects.
-
-I left the detector algorithm provided by the watermarking paper mostly untouched, except for the Z-threshold. I tuned it down to z=2.3 so the 
-detector would be more likely to say a piece of text was watermarked for comparisons between different watermarking strengths, but this threshold 
-still required 99% confidence. Additionally, the detector algorithm takes as input gamma, which is the same gamma used to generate the watermarked 
-text I am attempting to detect. This is a key parameter that differentiates a watermark detector from a general post-hoc AI detector. The gamma 
-seeds the watermarking scheme, so that during the detection process, we can work backwards to determine if the token sampling adheres to the given 
-gamma value. This ensures that human-written text that sounds bland or like a non-native English speaker won’t be misclassified as AI-generated, 
-resulting in a low false-positive rate.
-
-
-### Prompting my models
-
-First, I needed to prompt my open-source model to generate biographies of famous people. Since the version of OPT I used is a Causal LM, not an 
-Instruct LM, I needed to prompt it with a sentence that would make it most likely to continue where I left off and generate a biography of the 
-specified person. After some testing, I settled on the following prompt:
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/generation-prompt.png" class="img-fluid" %}
-
-However, I noticed that the watermarked LLMs were initially outputting repeated phrases, e.g. “Barack Obama was the first African-American president of 
-the United States. Barack Obama was the first African-American president of the United States.” Although this wasn’t technically hallucination, 
-I wanted the output to look like a real biography, so I tuned two hyperparameters used during text generation: no_repeat_ngram_size=3 and 
-repetition_penalty=1.1 to discourage repetitive phrases.
-
-Next, I needed to prompt my evaluation oracle, GPT 3.5, to evaluate sample biographies. Since GPT 3.5 is an Instruct model, I can directly ask it 
-to evaluate a given biography. I decided on the following prompt:
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/evaluation-prompt.png" class="img-fluid" %}
-
-I ask GPT 3.5 to clearly list out each detected mistake and their corresponding correction in order to reduce the likelihood of it hallucinating, 
-as well as allowing me to manually verify its evaluations.
-
-
-## Results
-
-### Quantitative results
-
-After generating four biographies for each person—one unwatermarked control sample and three watermarked samples with different watermarking 
-parameters—I evaluate them against our GPT 3.5 evaluation oracle.
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/stats.png" class="img-fluid" %}
-
-On average, the unwatermarked control LLM generates biographies that contain 8.14 mistakes each. The strongest watermarking setting has a mean of 
-8.64 mistakes, the medium watermark has 8.56 mistakes on average, and the weakest watermark has 8.00 mistakes on average. Clearly the weakest 
-watermarked LLM doesn’t hallucinate any more than the control group LLM—but it does have a 13% detection rate, which is pretty substandard for 
-a watermarking scheme. The medium and strongest watermarks perform slightly worse than the control group LLM, and by performing Z-tests on the 
-measure statistics (never thought AP Stats would come in handy), I conclude that the probabilities of observing the results we got for the strong 
-and medium watermarked LLMs are 26% and 34% respectively. So, although these probabilities aren’t statistically significant, they do slightly 
-imply that watermarked LLMs hallucinate more often, and the effect is especially visible with stronger watermark settings.
-
-We also see that our unwatermarked biographies had a false positive rate of approximately 1%. This can be attributed to the tuning I made to the 
-Z-threshold, from 4.0 to 2.3. Indeed, I made the change knowing that a Z-threshold of 2.3 reflects 99% confidence, so our FPR of 1% is in line 
-with this change. If I had left the Z-threshold to 4.0, we would have a FPR of approximately 0.003%. However, with a higher Z-threshold, the 
-weakest watermarked LLM would consequently have an even lower successful detection rate, so I made this tradeoff of having one or two false 
-positives in order to catch more watermarks. This also lets us see more clearly how even though weaker watermarks are less detectable, some trace 
-of the watermarking signal still remains.
-
-
-### Qualitative results
-
-In addition to quantitative results, I perform a deeper, qualitative analysis on a biography generated for a specific person. I chose the strong 
-watermarked biography for Nelson Mandela because of the interesting and interpretable trends we can see:
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/nelson-mandela.png" class="img-fluid" %}
-
-I highlighted the errors pointed out by our evaluation oracle in red text for ease of comparison. Note that there may be additional errors not 
-caught by GPT 3.5. Generally, we see that the errors occur from mixing up dates, names, ages, locations, etc., and are not completely made up 
-facts. In fact, the biography does capture a relatively sound summary of Mandela’s life. I posture that the hallucinations we see are mostly 
-simple fact mismatches because the watermarking schemes we impose on OPT still give it the flexibility to tell a good story of Mandela’s life, 
-but when it comes down to token-by-token sampling, our LLM may be forced to generate the wrong date or name in order to adhere to the “green” 
-token preference scheme.
-
-I also wanted to highlight the blue text. The sentence “The Nobel Prize is one of only three Nobel Prizes awarded to individuals in history” not 
-only is incorrect but also doesn’t add much substance to the biography. Here are three other hand-picked sections of generated biographies that 
-aren’t informative to the person’s biography:
-
-{% include figure.html path="assets/img/2023-12-12-watermarked-llms/repetitions.png" class="img-fluid" %}
-
-In each piece of text, the reported facts may not be incorrect, but they take up valuable space that we would otherwise like to be used to 
-introduce new facts about the person. So even if these facts aren’t flagged as factual inaccuracies by our evaluation oracle, they do demonstrate 
-a degradation in model output, raising the issue of response quality degradations in addition to hallucinations.
-
-
-### Theoretical underpinnings of results
-
-Taking a theoretical perspective, what exactly causes LLMs to hallucinate? To answer this question, we cite one of the important topics covered 
-in class: reward misspecification. If, during training time, we give low error loss to outputs that sound similar to our training data, we’re 
-not necessarily training the LLM to be more *accurate*. Instead, we’re training the LLM to generate output that is more likely to be accepted as 
-“close-enough” to the training data. When we ask ChatGPT to write a poem or reply to an email, being “close-enough” is usually fine. But when we 
-need it to be 100% accurate, such as solving a math problem or generating a biography for a real person, being “close-enough” doesn’t quite make 
-the cut.
-
-Furthermore, the auto-regressive manner in which LLMs generate text means it samples the “most-likely” token, based on previously seen tokens. 
-If our LLM starts to generate FDR’s most important New Deal measures, the “most-likely” tokens to follow might be explaining each of the New Deal 
-measures in detail. But this isn’t what we want out of a biography of FDR!
-
-Both of these problems—hallucinating false information and generating uninformative facts—are observed in our experiments. But unfortunately, it’s 
-hard to reduce one issue without exacerbating the other. I attempted to decrease the temperature parameter in OPT’s text generation, but this 
-resulted in OPT generating strings of run-on, non-informative sentences, such as *“Marilyn Monroe starred in several films, including Dangerous 
-Years, Scudda Hoo! Scudda Hay!, Ladies of the Chorus, Love Happy...”* because each additional film was the most likely follow-up to the previously 
-generated tokens. Similarly, increasing the temperature might generate text that sounds more “human-like,” but upon closer inspection, would be 
-riddled with factual inaccuracies.
-
-
-## Conclusion
-
-### Further Work
-
-There’s a couple of improvements I would have made if I had more time or more compute to work with. With more time, I would have liked to learn 
-how to scrape data from webpages, so I could create a dataset of a thousand famous peoples’ biographies. Then, I could run my experiments with a 
-larger sample size, allowing me to produce more statistically significant results. However, even with access to a larger dataset, I would have 
-been bottlenecked by compute. Using Colab Pro’s V100 GPU, I estimate that generating biographies and evaluating them for 1000 people would take 
-at least 10 hours of runtime.
-
-If I had access to more powerful GPUs, I also would have investigated a more nuanced research question: are bigger, more capable LLMs that have 
-watermarking schemes applied to them less prone to hallucinating? I would have liked to run my experiments using a larger open-source LLM, such 
-as LLaMa 65B, instead of OPT 1.3B, to see if watermarking schemes still negatively affect an LLM’s ability to perform tasks, when the base LLM is 
-much more capable.
-
-
-### What I learned
-
-As this project was my first self-driven research experiment, I faced many challenges, but also learned so much. Probably the most important 
-thing I learned is that compute is important, but it’s not an end-all-be-all. There’s tons of open-source models out there that can be run on a 
-V100, and Google Colab Pro offers it at an affordable price. I also learned how important it is to define a well-scoped research problem, and how 
-chatting with others can help you gain fresh insights on roadblocks.
-
-I found that my work towards this project was structured much differently than how I would approach a problem set. With a pset, much of the 
-starter code is provided, and in particular, the code to import datasets, process them, and visualize results are all provided. In this project, 
-most of my time was spent making design decisions: which dataset should I use, how should I format my results, what hyperparameters should I use. 
-Although the raw number of lines coded in my final notebook might not be the most, I can explain my reasoning behind each line of code clearly, 
-and I think this is a result of the thorough research I performed.
-
-Lastly, I learned that tackling an unanswered question in research is tractable for most students with some programming experience and interest in 
-a scientific field. I didn’t have the most extensive ML background, nor any prior undergraduate research experience, but just by reading some 
-papers on watermarking and writing down the questions that popped into my head, I came up with some viable research questions that could be 
-tackled by an independent research project.
-
-I’m very thankful to my friends Franklin Wang and Miles Wang for helping me configure my coding environment and keeping me motivated throughout 
-the project, and also to the TAs I spoke with during the ideation and scoping stage. To other students reading this blog post who may want to get 
-started doing ML research but aren’t sure how to get started, I encourage you to try replicating some papers with code! Papers With Code <d-cite key="papers-with-code"></d-cite> has several 
-papers accompanied by their codebases, and just trying to obtain the same results as them is an incredible learning experience. If anyone wants 
-to replicate, or even build off of my work, please reach out to me if you have any questions or ideas you’d like to discuss. You can reach me at 
-justin ji [at] college.harvard.edu.
-
-
-### Supplemental Material
-
-In this Github repository, you can access the dataset I made of famous people’s biographies, the code I used to generate my results, and the CSV 
-files of results.
-
-[Github Repo](https://github.com/jujipotle/deep-learning-project)
\ No newline at end of file
diff --git a/_posts/LICENSE.md b/_posts/LICENSE.md
deleted file mode 100644
index 40595d96..00000000
--- a/_posts/LICENSE.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Released under MIT License
-
-Copyright (c) 2014 Mark Otto.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/_posts/announcement_1.md b/_posts/announcement_1.md
deleted file mode 100644
index 5b1f7a9b..00000000
--- a/_posts/announcement_1.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: post
-date: 2015-10-22 15:59:00-0400
-inline: true
----
-
-A simple inline announcement.
diff --git a/_posts/announcement_3.md b/_posts/announcement_3.md
deleted file mode 100644
index 326d2838..00000000
--- a/_posts/announcement_3.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: post
-date: 2016-01-15 07:59:00-0400
-inline: true
----
-
-A simple inline announcement with Markdown emoji! :sparkles: :smile:
diff --git a/_posts/bug_report.md b/_posts/bug_report.md
deleted file mode 100644
index 511f5851..00000000
--- a/_posts/bug_report.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-name: Bug report
-about: Create a report to help us improve
-title: ''
-labels: bug
-assignees: ''
-
----
-
-**Acknowledge the following**
-- [ ] I carefully read and followed the [Getting Started](https://github.com/alshedivat/al-folio#getting-started) guide.
-- [ ] I read through [FAQ](https://github.com/alshedivat/al-folio#faq) and searched through the [past issues](https://github.com/alshedivat/al-folio/issues), none of which addressed my issue.
-- [ ] The issue I am raising is a potential bug in al-folio and not just a usage question. <br> [For usage questions, please post in the [Discussions](https://github.com/alshedivat/al-folio/discussions) instead of raising an issue.]
-
-**Describe the bug**
-A clear and concise description of what the bug is.
-
-**To Reproduce**
-Steps to reproduce the behavior:
-1. Go to '...'
-2. Click on '....'
-3. Scroll down to '....'
-4. See error
-
-**Expected behavior**
-A clear and concise description of what you expected to happen.
-
-**Screenshots**
-If applicable, add screenshots to help explain your problem.
-
-**System (please complete the following information):**
- - OS: [e.g. iOS]
- - Browser (and its version) [e.g. chrome, safari]
- - Jekyll version [e.g. 3.8.7]
-- Ruby version [e.g. 2.6.5]
-
-**Additional context**
-Add any other context about the problem here.
diff --git a/_posts/dropdown.md b/_posts/dropdown.md
deleted file mode 100644
index 84a11232..00000000
--- a/_posts/dropdown.md
+++ /dev/null
@@ -1,13 +0,0 @@
----
-layout: page
-title: past iterations
-nav: true
-nav_order: 99
-dropdown: true
-children: 
-    # - title: 2023
-    #   permalink: /about/
-    # - title: divider
-    - title: 2022
-      permalink: https://iclr-blog-track.github.io/home/
----
\ No newline at end of file
diff --git a/_posts/feature_request.md b/_posts/feature_request.md
deleted file mode 100644
index 11fc491e..00000000
--- a/_posts/feature_request.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-name: Feature request
-about: Suggest an idea for this project
-title: ''
-labels: enhancement
-assignees: ''
-
----
-
-**Is your feature request related to a problem? Please describe.**
-A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
-
-**Describe the solution you'd like**
-A clear and concise description of what you want to happen.
-
-**Describe alternatives you've considered**
-A clear and concise description of any alternative solutions or features you've considered.
-
-**Additional context**
-Add any other context or screenshots about the feature request here.
diff --git a/_posts/reference.md b/_posts/reference.md
deleted file mode 100644
index e8672454..00000000
--- a/_posts/reference.md
+++ /dev/null
@@ -1,426 +0,0 @@
----
-layout: distill
-title: Modeling Elephantfish Communication through Deep RNNs
-description: Elephantfish represent a fascinating subject for study within the realms of bioacoustics and animal communication due to their unique use of electric fields for sensing and interaction. 
-date: 2022-12-01
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Bright Liu
-    url: "https://en.wikipedia.org/wiki/Albert_Einstein"
-    affiliations:
-      name: Harvard
-  - name: Anthony Rodriguez-Miranda
-    url: "https://en.wikipedia.org/wiki/Boris_Podolsky"
-    affiliations:
-      name: Harvard
-
-# must be the exact same name as your blogpost
-bibliography: 2022-12-01-distill-example.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Equations
-  - name: Images and Figures
-    subsections:
-    - name: Interactive Figures
-  - name: Citations
-  - name: Footnotes
-  - name: Code Blocks
-  - name: Layouts
-  - name: Other Typography?
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Equations
-
-This theme supports rendering beautiful math in inline and display modes using [MathJax 3](https://www.mathjax.org/) engine.
-You just need to surround your math expression with `$$`, like `$$ E = mc^2 $$`.
-If you leave it inside a paragraph, it will produce an inline expression, just like $$ E = mc^2 $$.
-
-To use display mode, again surround your expression with `$$` and place it as a separate paragraph.
-Here is an example:
-
-$$
-\left( \sum_{k=1}^n a_k b_k \right)^2 \leq \left( \sum_{k=1}^n a_k^2 \right) \left( \sum_{k=1}^n b_k^2 \right)
-$$
-
-Note that MathJax 3 is [a major re-write of MathJax](https://docs.mathjax.org/en/latest/upgrading/whats-new-3.0.html) that brought a significant improvement to the loading and rendering speed, which is now [on par with KaTeX](http://www.intmath.com/cg5/katex-mathjax-comparison.php).
-
-
-## Images and Figures
-
-Its generally a better idea to avoid linking to images hosted elsewhere - links can break and you
-might face losing important information in your blog post.
-To include images in your submission in this way, you must do something like the following:
-
-```markdown
-{% raw %}{% include figure.html path="assets/img/2022-12-01-distill-example/iclr.png" class="img-fluid" %}{% endraw %}
-```
-
-which results in the following image:
-
-{% include figure.html path="assets/img/2022-12-01-distill-example/iclr.png" class="img-fluid" %}
-
-To ensure that there are no namespace conflicts, you must save your asset to your unique directory
-`/assets/img/2023-05-01-[SUBMISSION NAME]` within your submission.
-
-Please avoid using the direct markdown method of embedding images; they may not be properly resized.
-Some more complex ways to load images (note the different styles of the shapes/shadows):
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/9.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/7.jpg" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    A simple, elegant caption looks good between image rows, after each row, or doesn't have to be there at all.
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/8.jpg" class="img-fluid z-depth-2" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/10.jpg" class="img-fluid z-depth-2" %}
-    </div>
-</div>
-
-<div class="row mt-3">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/11.jpg" class="img-fluid"  %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/12.jpg" class="img-fluid" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/2022-12-01-distill-example/7.jpg" class="img-fluid" %}
-    </div>
-</div>
-
-### Interactive Figures
-
-Here's how you could embed interactive figures that have been exported as HTML files.
-Note that we will be using plotly for this demo, but anything built off of HTML should work
-(**no extra javascript is allowed!**).
-All that's required is for you to export your figure into HTML format, and make sure that the file
-exists in the `assets/html/[SUBMISSION NAME]/` directory in this repository's root directory.
-To embed it into any page, simply insert the following code anywhere into your page.
-
-```markdown
-{% raw %}{% include [FIGURE_NAME].html %}{% endraw %}
-```
-
-For example, the following code can be used to generate the figure underneath it.
-
-```python
-import pandas as pd
-import plotly.express as px
-
-df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/earthquakes-23k.csv')
-
-fig = px.density_mapbox(
-    df, lat='Latitude', lon='Longitude', z='Magnitude', radius=10,
-    center=dict(lat=0, lon=180), zoom=0, mapbox_style="stamen-terrain")
-fig.show()
-
-fig.write_html('./assets/html/2022-12-01-distill-example/plotly_demo_1.html')
-```
-
-And then include it with the following:
-
-```html
-{% raw %}<div class="l-page">
-  <iframe src="{{ 'assets/html/2022-12-01-distill-example/plotly_demo_1.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>{% endraw %}
-```
-
-Voila!
-
-<div class="l-page">
-  <iframe src="{{ 'assets/html/2022-12-01-distill-example/plotly_demo_1.html' | relative_url }}" frameborder='0' scrolling='no' height="600px" width="100%"></iframe>
-</div>
-
-## Citations
-
-Citations are then used in the article body with the `<d-cite>` tag.
-The key attribute is a reference to the id provided in the bibliography.
-The key attribute can take multiple ids, separated by commas.
-
-The citation is presented inline like this: <d-cite key="gregor2015draw"></d-cite> (a number that displays more information on hover).
-If you have an appendix, a bibliography is automatically created and populated in it.
-
-Distill chose a numerical inline citation style to improve readability of citation dense articles and because many of the benefits of longer citations are obviated by displaying more information on hover.
-However, we consider it good style to mention author last names if you discuss something at length and it fits into the flow well — the authors are human and it’s nice for them to have the community associate them with their work.
-
-***
-
-## Footnotes
-
-Just wrap the text you would like to show up in a footnote in a `<d-footnote>` tag.
-The number of the footnote will be automatically generated.<d-footnote>This will become a hoverable footnote.</d-footnote>
-
-***
-
-## Code Blocks
-
-This theme implements a built-in Jekyll feature, the use of Rouge, for syntax highlighting.
-It supports more than 100 languages.
-This example is in C++.
-All you have to do is wrap your code in a liquid tag:
-
-{% raw  %}
-{% highlight c++ linenos %}  <br/> code code code <br/> {% endhighlight %}
-{% endraw %}
-
-The keyword `linenos` triggers display of line numbers. You can try toggling it on or off yourself below:
-
-{% highlight c++ %}
-
-int main(int argc, char const \*argv[])
-{
-string myString;
-
-    cout << "input a string: ";
-    getline(cin, myString);
-    int length = myString.length();
-
-    char charArray = new char * [length];
-
-    charArray = myString;
-    for(int i = 0; i < length; ++i){
-        cout << charArray[i] << " ";
-    }
-
-    return 0;
-}
-
-{% endhighlight %}
-
-***
-
-## Diagrams
-
-This theme supports generating various diagrams from a text description using [jekyll-diagrams](https://github.com/zhustec/jekyll-diagrams){:target="\_blank"} plugin.
-Below, we generate a few examples of such diagrams using languages such as [mermaid](https://mermaid-js.github.io/mermaid/){:target="\_blank"}, [plantuml](https://plantuml.com/){:target="\_blank"}, [vega-lite](https://vega.github.io/vega-lite/){:target="\_blank"}, etc.
-
-**Note:** different diagram-generation packages require external dependencies to be installed on your machine.
-Also, be mindful of that because of diagram generation the fist time you build your Jekyll website after adding new diagrams will be SLOW.
-For any other details, please refer to [jekyll-diagrams](https://github.com/zhustec/jekyll-diagrams){:target="\_blank"} README.
-
-**Note:** This is not supported for local rendering!
-
-The diagram below was generated by the following code:
-
-{% raw %}
-```
-{% mermaid %}
-sequenceDiagram
-    participant John
-    participant Alice
-    Alice->>John: Hello John, how are you?
-    John-->>Alice: Great!
-{% endmermaid %}
-```
-{% endraw %}
-
-{% mermaid %}
-sequenceDiagram
-participant John
-participant Alice
-Alice->>John: Hello John, how are you?
-John-->>Alice: Great!
-{% endmermaid %}
-
-***
-
-## Blockquotes
-
-<blockquote>
-    We do not grow absolutely, chronologically. We grow sometimes in one dimension, and not in another, unevenly. We grow partially. We are relative. We are mature in one realm, childish in another.
-    —Anais Nin
-</blockquote>
-
-***
-
-
-## Layouts
-
-The main text column is referred to as the body.
-It is the assumed layout of any direct descendants of the `d-article` element.
-
-<div class="fake-img l-body">
-  <p>.l-body</p>
-</div>
-
-For images you want to display a little larger, try `.l-page`:
-
-<div class="fake-img l-page">
-  <p>.l-page</p>
-</div>
-
-All of these have an outset variant if you want to poke out from the body text a little bit.
-For instance:
-
-<div class="fake-img l-body-outset">
-  <p>.l-body-outset</p>
-</div>
-
-<div class="fake-img l-page-outset">
-  <p>.l-page-outset</p>
-</div>
-
-Occasionally you’ll want to use the full browser width.
-For this, use `.l-screen`.
-You can also inset the element a little from the edge of the browser by using the inset variant.
-
-<div class="fake-img l-screen">
-  <p>.l-screen</p>
-</div>
-<div class="fake-img l-screen-inset">
-  <p>.l-screen-inset</p>
-</div>
-
-The final layout is for marginalia, asides, and footnotes.
-It does not interrupt the normal flow of `.l-body` sized text except on mobile screen sizes.
-
-<div class="fake-img l-gutter">
-  <p>.l-gutter</p>
-</div>
-
-***
-
-## Other Typography?
-
-Emphasis, aka italics, with *asterisks* (`*asterisks*`) or _underscores_ (`_underscores_`).
-
-Strong emphasis, aka bold, with **asterisks** or __underscores__.
-
-Combined emphasis with **asterisks and _underscores_**.
-
-Strikethrough uses two tildes. ~~Scratch this.~~
-
-1. First ordered list item
-2. Another item
-⋅⋅* Unordered sub-list.
-1. Actual numbers don't matter, just that it's a number
-⋅⋅1. Ordered sub-list
-4. And another item.
-
-⋅⋅⋅You can have properly indented paragraphs within list items. Notice the blank line above, and the leading spaces (at least one, but we'll use three here to also align the raw Markdown).
-
-⋅⋅⋅To have a line break without a paragraph, you will need to use two trailing spaces.⋅⋅
-⋅⋅⋅Note that this line is separate, but within the same paragraph.⋅⋅
-⋅⋅⋅(This is contrary to the typical GFM line break behaviour, where trailing spaces are not required.)
-
-* Unordered list can use asterisks
-- Or minuses
-+ Or pluses
-
-[I'm an inline-style link](https://www.google.com)
-
-[I'm an inline-style link with title](https://www.google.com "Google's Homepage")
-
-[I'm a reference-style link][Arbitrary case-insensitive reference text]
-
-[I'm a relative reference to a repository file](../blob/master/LICENSE)
-
-[You can use numbers for reference-style link definitions][1]
-
-Or leave it empty and use the [link text itself].
-
-URLs and URLs in angle brackets will automatically get turned into links.
-http://www.example.com or <http://www.example.com> and sometimes
-example.com (but not on Github, for example).
-
-Some text to show that the reference links can follow later.
-
-[arbitrary case-insensitive reference text]: https://www.mozilla.org
-[1]: http://slashdot.org
-[link text itself]: http://www.reddit.com
-
-Here's our logo (hover to see the title text):
-
-Inline-style:
-![alt text](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Logo Title Text 1")
-
-Reference-style:
-![alt text][logo]
-
-[logo]: https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Logo Title Text 2"
-
-Inline `code` has `back-ticks around` it.
-
-```javascript
-var s = "JavaScript syntax highlighting";
-alert(s);
-```
-
-```python
-s = "Python syntax highlighting"
-print s
-```
-
-```
-No language indicated, so no syntax highlighting.
-But let's throw in a <b>tag</b>.
-```
-
-Colons can be used to align columns.
-
-| Tables        | Are           | Cool  |
-| ------------- |:-------------:| -----:|
-| col 3 is      | right-aligned | $1600 |
-| col 2 is      | centered      |   $12 |
-| zebra stripes | are neat      |    $1 |
-
-There must be at least 3 dashes separating each header cell.
-The outer pipes (|) are optional, and you don't need to make the
-raw Markdown line up prettily. You can also use inline Markdown.
-
-Markdown | Less | Pretty
---- | --- | ---
-*Still* | `renders` | **nicely**
-1 | 2 | 3
-
-> Blockquotes are very handy in email to emulate reply text.
-> This line is part of the same quote.
-
-Quote break.
-
-> This is a very long line that will still be quoted properly when it wraps. Oh boy let's keep writing to make sure this is long enough to actually wrap for everyone. Oh, you can *put* **Markdown** into a blockquote.
-
-
-Here's a line for us to start with.
-
-This line is separated from the one above by two newlines, so it will be a *separate paragraph*.
-
-This line is also a separate paragraph, but...
-This line is only separated by a single newline, so it's a separate line in the *same paragraph*.0px
diff --git a/_projects/1_project.md b/_projects/1_project.md
deleted file mode 100644
index 3f7cf783..00000000
--- a/_projects/1_project.md
+++ /dev/null
@@ -1,80 +0,0 @@
----
-layout: page
-title: project 1
-description: a project with a background image
-img: assets/img/12.jpg
-importance: 1
-category: work
----
-
-Every project has a beautiful feature showcase page.
-It's easy to include images in a flexible 3-column grid format.
-Make your photos 1/3, 2/3, or full width.
-
-To give your project a background in the portfolio page, just add the img tag to the front matter like so:
-
-    ---
-    layout: page
-    title: project
-    description: a project with a background image
-    img: /assets/img/12.jpg
-    ---
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/1.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/3.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Caption photos easily. On the left, a road goes through a tunnel. Middle, leaves artistically fall in a hipster photoshoot. Right, in another hipster photoshoot, a lumberjack grasps a handful of pine needles.
-</div>
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    This image can also have a caption. It's like magic.
-</div>
-
-You can also put regular text between your rows of images.
-Say you wanted to write a little bit about your project before you posted the rest of the images.
-You describe how you toiled, sweated, *bled* for your project, and then... you reveal its glory in the next row of images.
-
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    You can also have artistically styled 2/3 + 1/3 images, like these.
-</div>
-
-
-The code is simple.
-Just wrap your images with `<div class="col-sm">` and place them inside `<div class="row">` (read more about the <a href="https://getbootstrap.com/docs/4.4/layout/grid/">Bootstrap Grid</a> system).
-To make images responsive, add `img-fluid` class to each; for rounded corners and shadows use `rounded` and `z-depth-1` classes.
-Here's the code for the last row of images above:
-
-{% raw %}
-```html
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-```
-{% endraw %}
diff --git a/_projects/2_project.md b/_projects/2_project.md
deleted file mode 100644
index bebf7961..00000000
--- a/_projects/2_project.md
+++ /dev/null
@@ -1,80 +0,0 @@
----
-layout: page
-title: project 2
-description: a project with a background image
-img: assets/img/3.jpg
-importance: 2
-category: work
----
-
-Every project has a beautiful feature showcase page.
-It's easy to include images in a flexible 3-column grid format.
-Make your photos 1/3, 2/3, or full width.
-
-To give your project a background in the portfolio page, just add the img tag to the front matter like so:
-
-    ---
-    layout: page
-    title: project
-    description: a project with a background image
-    img: /assets/img/12.jpg
-    ---
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/1.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/3.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Caption photos easily. On the left, a road goes through a tunnel. Middle, leaves artistically fall in a hipster photoshoot. Right, in another hipster photoshoot, a lumberjack grasps a handful of pine needles.
-</div>
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    This image can also have a caption. It's like magic.
-</div>
-
-You can also put regular text between your rows of images.
-Say you wanted to write a little bit about your project before you posted the rest of the images.
-You describe how you toiled, sweated, *bled* for your project, and then... you reveal its glory in the next row of images.
-
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    You can also have artistically styled 2/3 + 1/3 images, like these.
-</div>
-
-
-The code is simple.
-Just wrap your images with `<div class="col-sm">` and place them inside `<div class="row">` (read more about the <a href="https://getbootstrap.com/docs/4.4/layout/grid/">Bootstrap Grid</a> system).
-To make images responsive, add `img-fluid` class to each; for rounded corners and shadows use `rounded` and `z-depth-1` classes.
-Here's the code for the last row of images above:
-
-{% raw %}
-```html
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-```
-{% endraw %}
diff --git a/_projects/3_project.md b/_projects/3_project.md
deleted file mode 100644
index 3f3cbf70..00000000
--- a/_projects/3_project.md
+++ /dev/null
@@ -1,81 +0,0 @@
----
-layout: page
-title: project 3
-description: a project that redirects to another website
-img: assets/img/7.jpg
-redirect: https://unsplash.com
-importance: 3
-category: work
----
-
-Every project has a beautiful feature showcase page.
-It's easy to include images in a flexible 3-column grid format.
-Make your photos 1/3, 2/3, or full width.
-
-To give your project a background in the portfolio page, just add the img tag to the front matter like so:
-
-    ---
-    layout: page
-    title: project
-    description: a project with a background image
-    img: /assets/img/12.jpg
-    ---
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/1.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/3.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Caption photos easily. On the left, a road goes through a tunnel. Middle, leaves artistically fall in a hipster photoshoot. Right, in another hipster photoshoot, a lumberjack grasps a handful of pine needles.
-</div>
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    This image can also have a caption. It's like magic.
-</div>
-
-You can also put regular text between your rows of images.
-Say you wanted to write a little bit about your project before you posted the rest of the images.
-You describe how you toiled, sweated, *bled* for your project, and then... you reveal its glory in the next row of images.
-
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    You can also have artistically styled 2/3 + 1/3 images, like these.
-</div>
-
-
-The code is simple.
-Just wrap your images with `<div class="col-sm">` and place them inside `<div class="row">` (read more about the <a href="https://getbootstrap.com/docs/4.4/layout/grid/">Bootstrap Grid</a> system).
-To make images responsive, add `img-fluid` class to each; for rounded corners and shadows use `rounded` and `z-depth-1` classes.
-Here's the code for the last row of images above:
-
-{% raw %}
-```html
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-```
-{% endraw %}
diff --git a/_projects/4_project.md b/_projects/4_project.md
deleted file mode 100644
index edb5dd25..00000000
--- a/_projects/4_project.md
+++ /dev/null
@@ -1,80 +0,0 @@
----
-layout: page
-title: project 4
-description: another without an image
-img:
-importance: 3
-category: fun
----
-
-Every project has a beautiful feature showcase page.
-It's easy to include images in a flexible 3-column grid format.
-Make your photos 1/3, 2/3, or full width.
-
-To give your project a background in the portfolio page, just add the img tag to the front matter like so:
-
-    ---
-    layout: page
-    title: project
-    description: a project with a background image
-    img: /assets/img/12.jpg
-    ---
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/1.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/3.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Caption photos easily. On the left, a road goes through a tunnel. Middle, leaves artistically fall in a hipster photoshoot. Right, in another hipster photoshoot, a lumberjack grasps a handful of pine needles.
-</div>
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    This image can also have a caption. It's like magic.
-</div>
-
-You can also put regular text between your rows of images.
-Say you wanted to write a little bit about your project before you posted the rest of the images.
-You describe how you toiled, sweated, *bled* for your project, and then... you reveal its glory in the next row of images.
-
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    You can also have artistically styled 2/3 + 1/3 images, like these.
-</div>
-
-
-The code is simple.
-Just wrap your images with `<div class="col-sm">` and place them inside `<div class="row">` (read more about the <a href="https://getbootstrap.com/docs/4.4/layout/grid/">Bootstrap Grid</a> system).
-To make images responsive, add `img-fluid` class to each; for rounded corners and shadows use `rounded` and `z-depth-1` classes.
-Here's the code for the last row of images above:
-
-{% raw %}
-```html
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-```
-{% endraw %}
diff --git a/_projects/5_project.md b/_projects/5_project.md
deleted file mode 100644
index efd9b6cf..00000000
--- a/_projects/5_project.md
+++ /dev/null
@@ -1,80 +0,0 @@
----
-layout: page
-title: project 5
-description: a project with a background image
-img: assets/img/1.jpg
-importance: 3
-category: fun
----
-
-Every project has a beautiful feature showcase page.
-It's easy to include images in a flexible 3-column grid format.
-Make your photos 1/3, 2/3, or full width.
-
-To give your project a background in the portfolio page, just add the img tag to the front matter like so:
-
-    ---
-    layout: page
-    title: project
-    description: a project with a background image
-    img: /assets/img/12.jpg
-    ---
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/1.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/3.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Caption photos easily. On the left, a road goes through a tunnel. Middle, leaves artistically fall in a hipster photoshoot. Right, in another hipster photoshoot, a lumberjack grasps a handful of pine needles.
-</div>
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    This image can also have a caption. It's like magic.
-</div>
-
-You can also put regular text between your rows of images.
-Say you wanted to write a little bit about your project before you posted the rest of the images.
-You describe how you toiled, sweated, *bled* for your project, and then... you reveal its glory in the next row of images.
-
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    You can also have artistically styled 2/3 + 1/3 images, like these.
-</div>
-
-
-The code is simple.
-Just wrap your images with `<div class="col-sm">` and place them inside `<div class="row">` (read more about the <a href="https://getbootstrap.com/docs/4.4/layout/grid/">Bootstrap Grid</a> system).
-To make images responsive, add `img-fluid` class to each; for rounded corners and shadows use `rounded` and `z-depth-1` classes.
-Here's the code for the last row of images above:
-
-{% raw %}
-```html
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-```
-{% endraw %}
diff --git a/_projects/6_project.md b/_projects/6_project.md
deleted file mode 100644
index 9a95d6e8..00000000
--- a/_projects/6_project.md
+++ /dev/null
@@ -1,80 +0,0 @@
----
-layout: page
-title: project 6
-description: a project with no image
-img:
-importance: 4
-category: fun
----
-
-Every project has a beautiful feature showcase page.
-It's easy to include images in a flexible 3-column grid format.
-Make your photos 1/3, 2/3, or full width.
-
-To give your project a background in the portfolio page, just add the img tag to the front matter like so:
-
-    ---
-    layout: page
-    title: project
-    description: a project with a background image
-    img: /assets/img/12.jpg
-    ---
-
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/1.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/3.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    Caption photos easily. On the left, a road goes through a tunnel. Middle, leaves artistically fall in a hipster photoshoot. Right, in another hipster photoshoot, a lumberjack grasps a handful of pine needles.
-</div>
-<div class="row">
-    <div class="col-sm mt-3 mt-md-0">
-        {% include figure.html path="assets/img/5.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    This image can also have a caption. It's like magic.
-</div>
-
-You can also put regular text between your rows of images.
-Say you wanted to write a little bit about your project before you posted the rest of the images.
-You describe how you toiled, sweated, *bled* for your project, and then... you reveal its glory in the next row of images.
-
-
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-<div class="caption">
-    You can also have artistically styled 2/3 + 1/3 images, like these.
-</div>
-
-
-The code is simple.
-Just wrap your images with `<div class="col-sm">` and place them inside `<div class="row">` (read more about the <a href="https://getbootstrap.com/docs/4.4/layout/grid/">Bootstrap Grid</a> system).
-To make images responsive, add `img-fluid` class to each; for rounded corners and shadows use `rounded` and `z-depth-1` classes.
-Here's the code for the last row of images above:
-
-{% raw %}
-```html
-<div class="row justify-content-sm-center">
-    <div class="col-sm-8 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/6.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-    <div class="col-sm-4 mt-3 mt-md-0">
-        {% include figure.html path="assets/img/11.jpg" title="example image" class="img-fluid rounded z-depth-1" %}
-    </div>
-</div>
-```
-{% endraw %}
diff --git a/_sass/_base.scss b/_sass/_base.scss
deleted file mode 100644
index 7b826527..00000000
--- a/_sass/_base.scss
+++ /dev/null
@@ -1,658 +0,0 @@
-/*******************************************************************************
- * Styles for the base elements of the theme.
- ******************************************************************************/
-
-// Typography
-
-p, h1, h2, h3, h4, h5, h6, em, div, li, span, strong {
-  color: var(--global-text-color);
-}
-
-hr {
-  border-top: 1px solid var(--global-divider-color);
-}
-
-table {
-  td, th {
-    color: var(--global-text-color);
-  }
-  td {
-    font-size: 1rem;
-  }
-}
-
-a, table.table a {
-  color: var(--global-theme-color);
-  &:hover {
-    color: var(--global-theme-color);
-    text-decoration: underline;
-  }
-  &:hover:after :not(.nav-item.dropdown) {
-    width: 100%;
-  }
-}
-
-figure, img {
-  max-width: 90vw;
-}
-
-blockquote {
-  background: var(--global-bg-color);
-  border-left: 2px solid var(--global-theme-color);
-  margin: 1.5em 10px;
-  padding: 0.5em 10px;
-  font-size: 1.1rem;
-}
-
-// Math
-
-.equation {
-  margin-bottom: 1rem;
-  text-align: center;
-}
-
-// Caption
-
-.caption {
-  font-size: 0.875rem;
-  margin-top: 0.75rem;
-  margin-bottom: 1.5rem;
-  text-align: center;
-}
-
-// Card
-
-.card {
-  background-color: var(--global-card-bg-color);
-
-  img {
-    width: 100%;
-  }
-
-  .card-title {
-    color: var(--global-text-color);
-  }
-
-  .card-item {
-    width: auto;
-    margin-bottom: 10px;
-
-    .row {
-      display: flex;
-      align-items: center;
-    }
-  }
-}
-
-// Citation
-
-.citation, .citation-number {
-  color: var(--global-theme-color);
-}
-
-// Profile
-
-.profile {
-  width: 100%;
-
-  .address {
-    margin-bottom: 5px;
-    margin-top: 5px;
-    font-family: monospace;
-    p {
-      display: inline-block;
-      margin: 0;
-    }
-  }
-}
-.profile.float-right{
-  margin-left: 1rem;
-}
-.profile.float-left{
-  margin-right: 1rem;
-}
-
-@media (min-width: 576px) {
-  .profile {
-    width: 30%;
-    .address {
-      p { display: block; }
-    }
-  }
-}
-
-.post-description {
-  margin-bottom: 2rem;
-  font-size: 0.875rem;
-  a {
-    color: inherit;
-    &:hover {
-      color: var(--global-theme-color);
-      text-decoration: none;
-    }
-  }
-}
-
-
-// Navbar customization
-
-.navbar {
-  box-shadow: none;
-  border-bottom: 1px solid var(--global-divider-color);
-  background-color: var(--global-bg-color);
-  opacity: 0.95;
-}
-.navbar .dropdown-menu {
-  background-color: var(--global-bg-color);
-  border: 1px solid var(--global-divider-color);
-  a:not(.active) {
-    color: var(--global-text-color);
-  }
-  a:hover {
-    color: var(--global-hover-color);
-  }
-  .dropdown-divider {
-    border-top: 1px solid var(--global-divider-color) !important;
-  }
-}
-.dropdown-item {
-  color: var(--global-text-color);
-    &:hover {
-      color: var(--global-hover-color);
-      background-color: var(--global-bg-color);
-    }
-}
-.navbar.navbar-light {
-  a {
-    &:hover {
-      text-decoration: none;
-    }
-  }
-  .navbar-brand {
-    color: var(--global-text-color);
-  }
-  .navbar-nav .nav-item .nav-link {
-    color: var(--global-text-color);
-    &:hover {
-      color: var(--global-hover-color);
-    }
-  }
-  .navbar-nav .nav-item.active>.nav-link {
-      background-color: inherit;
-      font-weight: bolder;
-      color: var(--global-theme-color);
-      &:hover {
-        color: var(--global-hover-color);
-      }
-  }
-  .navbar-brand.social {
-    padding-bottom: 0;
-    padding-top: 0;
-    font-size: 1.7rem;
-    a {
-      i::before {
-        color: var(--global-text-color);
-        transition-property: all 0.2s ease-in-out;
-      }
-      &:hover {
-        i::before {
-          color: var(--global-theme-color);
-        }
-      }
-    }
-  }
-}
-
-.navbar-toggler {
-  .icon-bar {
-    display: block;
-    width: 22px;
-    height: 2px;
-    background-color: var(--global-text-color);
-    border-radius: 1px;
-    margin-bottom: 4px;
-    transition: all 0.2s;
-  }
-  .top-bar {
-    transform: rotate(45deg);
-    transform-origin: 10% 10%;
-  }
-  .middle-bar {
-    opacity: 0;
-  }
-  .bottom-bar {
-    transform: rotate(-45deg);
-    transform-origin: 10% 90%;
-  }
-}
-
-.navbar-toggler.collapsed {
-  .top-bar {
-    transform: rotate(0);
-  }
-  .middle-bar {
-    opacity: 1;
-  }
-  .bottom-bar {
-    transform: rotate(0);
-  }
-}
-
-#light-toggle {
-  padding: 0;
-  border: 0;
-  background-color: inherit;
-  color: var(--global-text-color);
-  &:hover {
-    color: var(--global-hover-color);
-  }
-}
-
-// Social (bottom)
-
-.social {
-  text-align: center;
-  .contact-icons {
-    font-size: 4rem;
-    a {
-      i::before {
-        color: var(--global-text-color);
-        transition-property: all 0.2s ease-in-out;
-      }
-      &:hover {
-        i::before {
-          color: var(--global-theme-color);
-        }
-      }
-    }
-  }
-  .contact-note {
-    font-size: 0.8rem;
-  }
-}
-
-
-// Footer
-footer.fixed-bottom {
-  background-color: var(--global-footer-bg-color);
-  font-size: 0.75rem;
-  .container {
-    color: var(--global-footer-text-color);
-    padding-top: 9px;
-    padding-bottom: 8px;
-  }
-  a {
-    color: var(--global-footer-link-color);
-    &:hover {
-      color: var(--global-theme-color);
-      text-decoration: none;
-    }
-  }
-}
-
-footer.sticky-bottom {
-  border-top: 1px solid var(--global-divider-color);
-  padding-top: 40px;
-  padding-bottom: 40px;
-  font-size: 0.9rem;
-}
-
-// CV
-
-.cv {
-  margin-bottom: 40px;
-  
-  .card {
-    background-color: var(--global-card-bg-color);
-    border: 1px solid var(--global-divider-color);
-    
-    .list-group-item {
-      background-color: inherit;
-
-      .badge {
-        color: var(--global-card-bg-color) !important;
-        background-color: var(--global-theme-color) !important;
-      }
-    }
-  }
-}
-
-// Repositories
-
-@media (min-width: 768px) {
-  .repo {
-    max-width: 50%;
-  }
-}
-
-// Blog
-
-.header-bar {
-  border-bottom: 1px solid var(--global-divider-color);
-  text-align: center;
-  padding-top: 2rem;
-  padding-bottom: 3rem;
-  h1 {
-    color: var(--global-theme-color);
-    font-size: 5rem;
-  }
-}
-
-.tag-list {
-  border-bottom: 1px solid var(--global-divider-color);
-  text-align: center;
-  padding-top: 1rem;
-
-  ul {
-    justify-content: center;
-    display: flow-root;
-
-    p, li {
-      list-style: none;
-      display: inline-block;
-      padding: 1rem 0.5rem;
-      color: var(--global-text-color-light);
-    }
-  }
-}
-
-.post-list {
-  margin: 0;
-  margin-bottom: 40px;
-  padding: 0;
-  li {
-    border-bottom: 1px solid var(--global-divider-color);
-    list-style: none;
-    padding-top: 2rem;
-    padding-bottom: 2rem;
-    .post-meta {
-      color: var(--global-text-color-light);
-      font-size: 0.875rem;
-      margin-bottom: 0;
-    }
-    .post-tags {
-      color: var(--global-text-color-light);
-      font-size: 0.875rem;
-      padding-top: 0.25rem;
-      padding-bottom: 0;
-    }
-    a {
-      color: var(--global-text-color);
-      text-decoration: none;
-      &:hover {
-        color: var(--global-theme-color);
-      }
-    }
-  }
-}
-
-.pagination {
-  .page-item {
-    .page-link {
-      color: var(--global-text-color);
-      &:hover {
-        color: $black-color;
-      }
-    }
-    &.active .page-link {
-      color: $white-color;
-      background-color: var(--global-theme-color);
-      &:hover {
-        background-color: var(--global-theme-color);
-      }
-    }
-  }
-}
-
-
-// Distill
-
-.distill {
-  a:hover {
-    border-bottom-color: var(--global-theme-color);
-    text-decoration: none;
-  }
-}
-
-
-// Projects
-
-.projects {
-  a {
-    text-decoration: none;
-
-    &:hover {
-      .card-title {
-        color: var(--global-theme-color);
-      }
-    }
-  }
-
-  .card {
-    img {
-      width: 100%;
-    }
-  }
-
-  .card-item {
-    width: auto;
-    margin-bottom: 10px;
-
-    .row {
-      display: flex;
-      align-items: center;
-    }
-  }
-
-  .grid-sizer, .grid-item {
-    width: 250px;
-    margin-bottom: 10px;
-  }
-
-  h2.category {
-    color: var(--global-divider-color);
-    border-bottom: 1px solid var(--global-divider-color);
-    padding-top: 0.5rem;
-    margin-top: 2rem;
-    margin-bottom: 1rem;
-    text-align: right;
-  }
-}
-
-
-// Publications
-
-.publications {
-  margin-top: 2rem;
-  h1 {
-    color: var(--global-theme-color);
-    font-size: 2rem;
-    text-align: center;
-    margin-top: 1em;
-    margin-bottom: 1em;
-  }
-  h2 {
-    margin-bottom: 1rem;
-    span {
-      font-size: 1.5rem;
-    }
-  }
-  h2.year {
-    color: var(--global-divider-color);
-    border-top: 1px solid var(--global-divider-color);
-    padding-top: 1rem;
-    margin-top: 2rem;
-    margin-bottom: -2rem;
-    text-align: right;
-  }
-  ol.bibliography {
-    list-style: none;
-    padding: 0;
-    margin-top: 0;
-
-    li {
-      margin-bottom: 1rem;
-      .preview {
-        width: 100%;
-        min-width: 80px;
-        max-width: 200px;
-      }
-      .abbr {
-        height: 2rem;
-        margin-bottom: 0.5rem;
-        abbr {
-          display: inline-block;
-          background-color: var(--global-theme-color);
-          padding-left: 1rem;
-          padding-right: 1rem;
-          a {
-            color: white;
-            &:hover {
-              text-decoration: none;
-            }
-          }
-        }
-        .award {
-          color: var(--global-theme-color) !important;
-          border: 1px solid var(--global-theme-color);
-        }
-      }
-      .title {
-        font-weight: bolder;
-      }
-      .author {
-        a {
-          border-bottom: 1px dashed var(--global-theme-color);
-          &:hover {
-              border-bottom-style: solid;
-              text-decoration: none;
-          }
-        }
-        > em {
-          border-bottom: 1px solid;
-          font-style: normal;
-        }
-        > span.more-authors {
-          color: var(--global-text-color-light);
-          border-bottom: 1px dashed var(--global-text-color-light);
-          cursor: pointer;
-          &:hover {
-              color: var(--global-text-color);
-              border-bottom: 1px dashed var(--global-text-color);
-          }
-        }
-      }
-      .links {
-        a.btn {
-          color: var(--global-text-color);
-          border: 1px solid var(--global-text-color);
-          padding-left: 1rem;
-          padding-right: 1rem;
-          padding-top: 0.25rem;
-          padding-bottom: 0.25rem;
-          &:hover {
-            color: var(--global-theme-color);
-            border-color: var(--global-theme-color);
-          }
-        }
-      }
-      .hidden {
-        font-size: 0.875rem;
-        max-height: 0px;
-        overflow: hidden;
-        text-align: justify;
-        transition-property: 0.15s ease;
-        -moz-transition: 0.15s ease;
-        -ms-transition: 0.15s ease;
-        -o-transition: 0.15s ease;
-        transition: all 0.15s ease;
-
-        p {
-          line-height: 1.4em;
-          margin: 10px;
-        }
-        pre {
-          font-size: 1em;
-          line-height: 1.4em;
-          padding: 10px;
-        }
-      }
-      .hidden.open {
-        max-height: 100em;
-        transition-property: 0.15s ease;
-        -moz-transition: 0.15s ease;
-        -ms-transition: 0.15s ease;
-        -o-transition: 0.15s ease;
-        transition: all 0.15s ease;
-      }
-      div.abstract.hidden {
-        border: dashed 1px var(--global-bg-color);
-      }
-      div.abstract.hidden.open {
-        border-color: var(--global-text-color);
-      }
-    }
-  }
-}
-
-// Rouge Color Customization
-figure.highlight {
-  margin: 0 0 1rem;
-}
-
-pre {
-  color: var(--global-theme-color);
-  background-color: var(--global-code-bg-color);
-  border-radius: 6px;
-  padding: 6px 12px;
-  pre, code {
-    background-color: transparent;
-    border-radius: 0;
-    padding: 0;
-  }
-}
-
-code {
-  color: var(--global-theme-color);
-  background-color: var(--global-code-bg-color);
-  border-radius: 3px;
-  padding: 3px 3px;
-}
-
-
-// Transitioning Themes
-html.transition,
-html.transition *,
-html.transition *:before,
-html.transition *:after {
-  transition: all 750ms !important;
-  transition-delay: 0 !important;
-}
-
-// Extra Markdown style (post Customization)
-.post{
-  .post-meta{
-    color: var(--global-text-color-light);
-    font-size: 0.875rem;
-    margin-bottom: 0;
-  }
-  .post-tags{
-    color: var(--global-text-color-light);
-    font-size: 0.875rem;
-    padding-top: 0.25rem;
-    padding-bottom: 1rem;
-    a {
-      color: var(--global-text-color-light);
-      text-decoration: none;
-      &:hover {
-        color: var(--global-theme-color);
-      }
-    }
-  }
-  .post-content{
-    blockquote {
-      border-left: 5px solid var(--global-theme-color);
-      padding: 8px;
-    }
-  }
-}
diff --git a/_sass/_distill.scss b/_sass/_distill.scss
deleted file mode 100644
index d83fafd4..00000000
--- a/_sass/_distill.scss
+++ /dev/null
@@ -1,126 +0,0 @@
-/*******************************************************************************
- * Style overrides for distill blog posts.
- ******************************************************************************/
-
-d-byline {
-  border-top-color: var(--global-divider-color) !important;
-}
-
-d-byline h3 {
-  color: var(--global-text-color) !important;
-}
-
-d-byline a, d-article d-byline a {
-  color: var(--global-text-color) !important;
-  &:hover {
-    color: var(--global-hover-color) !important;
-  }
-}
-
-d-article {
-  border-top-color: var(--global-divider-color) !important;
-  a, p, h1, h2, h3, h4, h5, h6, li, table {
-    color: var(--global-text-color) !important;
-  }
-  a, h1, h2, hr, table, table th, table td {
-    border-bottom-color: var(--global-divider-color) !important;
-  }
-  a:hover {
-    border-bottom-color: var(--global-hover-color) !important;
-  }
-  b i {
-    display: inline;
-  }
-
-  d-contents {
-    align-self: start;
-    grid-column: 1 / 4;
-    grid-row: auto / span 4;
-    justify-self: end;
-    margin-top: 0em;
-    padding-left: 2em;
-    padding-right: 3em;
-    border-right: 1px solid var(--global-divider-color);
-    width: calc(max(70%, 300px));
-    margin-right: 0px;
-    margin-top:  0em;
-    display: grid;
-    grid-template-columns:
-      minmax(8px, 1fr) [toc] auto
-      minmax(8px, 1fr) [toc-line] 1px
-      minmax(32px, 2fr);
-
-    nav {
-      grid-column: toc;
-      a {
-        border-bottom: none !important;
-        &:hover {
-          border-bottom: 1px solid var(--global-text-color) !important;
-        }
-      }
-      h3 {
-        margin-top: 0;
-        margin-bottom: 1em;
-      }
-      div {
-        display: block;
-        outline: none;
-        margin-bottom: 0.8em;
-        color: rgba(0, 0, 0, 0.8);
-        font-weight: bold;
-      }
-      ul {
-        padding-left: 1em;
-        margin-top: 0;
-        margin-bottom: 6px;
-        list-style-type: none;
-        li {
-          margin-bottom: 0.25em;
-        }
-      }
-    }
-    .figcaption {
-      line-height: 1.4em;
-    }
-    toc-line {
-      border-right: 1px solid var(--global-divider-color);
-      grid-column: toc-line;
-    }
-  }
-
-  d-footnote {
-    scroll-margin-top: 66px;
-  }
-}
-
-d-appendix {
-  border-top-color: var(--global-divider-color) !important;
-  color: var(--global-distill-app-color) !important;
-  h3, li, span {
-    color: var(--global-distill-app-color) !important;
-  }
-  a, a.footnote-backlink {
-    color: var(--global-distill-app-color) !important;
-    &:hover {
-      color: var(--global-hover-color) !important;
-    }
-  }
-}
-
-@media (max-width: 1024px) {
-  d-article {
-    d-contents {
-      display: block;
-      grid-column-start: 2;
-      grid-column-end: -2;
-      padding-bottom: 0.5em;
-      margin-bottom: 1em;
-      padding-top: 0.5em;
-      width: 100%;
-      border: 1px solid var(--global-divider-color);
-      nav {
-        grid-column: none;
-      }
-    }
-  }
-}
diff --git a/_sass/_layout.scss b/_sass/_layout.scss
deleted file mode 100644
index 9c10cac7..00000000
--- a/_sass/_layout.scss
+++ /dev/null
@@ -1,50 +0,0 @@
-/******************************************************************************
- * Content
- ******************************************************************************/
-
-body {
-  padding-bottom: 70px;
-  color: var(--global-text-color);
-  background-color: var(--global-bg-color);
-
-  h1, h2, h3, h4, h5, h6 {
-    scroll-margin-top: 66px;
-  }
-}
-
-body.fixed-top-nav {
-  // Add some padding for the nav-bar.
-  padding-top: 56px;
-}
-
-body.sticky-bottom-footer {
-  // Remove padding below footer.
-  padding-bottom: 0;
-}
-
-.container {
-  max-width: $max-content-width;
-}
-
-// Profile
-.profile {
-  img {
-    width: 100%;
-  }
-}
-
-// TODO: redefine content layout.
-
-
-/******************************************************************************
- * Publications
- ******************************************************************************/
-
-// TODO: redefine publications layout.
-
-
-/*****************************************************************************
-* Projects
-*****************************************************************************/
-
-// TODO: redefine projects layout.
diff --git a/_sass/_themes.scss b/_sass/_themes.scss
deleted file mode 100644
index e3fe03fd..00000000
--- a/_sass/_themes.scss
+++ /dev/null
@@ -1,100 +0,0 @@
-/*******************************************************************************
- * Themes
- ******************************************************************************/
- 
-:root {
-  --global-bg-color: #{$white-color};
-  --global-code-bg-color: #{$code-bg-color-light};
-  --global-text-color: #{$black-color};
-  --global-text-color-light: #{$grey-color};
-  --global-theme-color: #{$cyan-color};
-  --global-hover-color: #{$cyan-color};
-  --global-footer-bg-color: #{$grey-color-dark};
-  --global-footer-text-color: #{$grey-color-light};
-  --global-footer-link-color: #{$white-color};
-  --global-distill-app-color: #{$grey-color};
-  --global-divider-color: rgba(0,0,0,.1);
-  --global-card-bg-color: #{$white-color};
-
-  .fa-sun {
-    display : none;
-  }
-  .fa-moon {
-    padding-left: 10px;
-    padding-top: 12px;
-    display : block;
-  }
-
-  .repo-img-light {
-    display: block;
-  }
-  .repo-img-dark {
-    display: none;
-  }
-}
-
-.header-background .img {
-  // background-image: url("../img/ICLR-logo.png");
-  background-repeat: no-repeat;
-  background-size: 400px;
-  background-position: center bottom;
-  height: 12em;
-  margin-bottom: 0em;
-  margin-top: -2.7em;  
-}
-
-html[data-theme='dark'] {
-  --global-bg-color: #{$grey-color-dark};
-  --global-code-bg-color: #{$code-bg-color-dark};
-  --global-text-color: #{$grey-color-light};
-  --global-text-color-light: #{$grey-color-light};
-  --global-theme-color: #{$cyan-color};
-  --global-hover-color: #{$cyan-color};
-  --global-footer-bg-color: #{$grey-color-light};
-  --global-footer-text-color: #{$grey-color-dark};
-  --global-footer-link-color: #{$black-color};
-  --global-distill-app-color: #{$grey-color-light};
-  --global-divider-color: #424246;
-  --global-card-bg-color: #{$grey-900};
-
-  .fa-sun {
-    padding-left: 10px;
-    padding-top: 12px;
-    display : block;
-  }
-  .fa-moon {
-    display : none;
-  }
-
-  .repo-img-light {
-    display: none;
-  }
-  .repo-img-dark {
-    display: block;
-  }
-
-.header-background .img {
-  // background-image: url("../img/ICLR-logo-dark.png");
-  background-repeat: no-repeat;
-  background-size: 400px;
-  background-position: center bottom;
-  height: 12em;
-  margin-bottom: 0em;
-  margin-top: -2.7em;  
-  // filter: invert(89%);
-}
-
-
-
-
-  // .header-background .img {
-  //   background-image: url("../img/score_contour.jpg");
-  //   background-repeat: no-repeat;
-  //   background-size: cover;
-  //   background-position: center bottom;
-  //   height: 15em;
-  //   margin-bottom: 2em;
-  //   margin-top: -2.7em;
-  //   filter: invert(89%);
-  // }
-}
diff --git a/_sass/_variables.scss b/_sass/_variables.scss
deleted file mode 100644
index b050aa6e..00000000
--- a/_sass/_variables.scss
+++ /dev/null
@@ -1,38 +0,0 @@
-/*******************************************************************************
- * Variables used throughout the theme.
- * To adjust anything, simply edit the variables below and rebuild the theme.
- ******************************************************************************/
-
-
-// Colors
-$red-color:           #FF3636 !default;
-$red-color-dark:      #B71C1C !default;
-$orange-color:        #F29105 !default;
-$blue-color:          #0076df !default;
-$blue-color-dark:     #00369f !default;
-$cyan-color:          #2698BA !default;
-$light-cyan-color:    lighten($cyan-color, 25%);
-$green-color:         #00ab37 !default;
-$green-color-lime:    #B7D12A !default;
-$green-color-dark:    #009f06 !default;
-$green-color-light:   #ddffdd !default;
-$green-color-bright:  #11D68B !default;
-$purple-color:        #B509AC !default;
-$light-purple-color:  lighten($purple-color, 25%);
-$pink-color:          #f92080 !default;
-$pink-color-light:    #ffdddd !default;
-$yellow-color:        #efcc00 !default;
-
-$grey-color:          #828282 !default;
-$grey-color-light:    lighten($grey-color, 40%);
-$grey-color-dark:     #1C1C1D;
-$grey-900:            #212529;
-
-$white-color: #ffffff !default;
-$black-color: #000000 !default;
-
-
-// Theme colors
-
-$code-bg-color-light:     rgba($purple-color, 0.05);
-$code-bg-color-dark:      #2c3237 !default;
diff --git a/assets/bibliography/2023-11-09-molecule_generation.bib b/assets/bibliography/2023-11-09-molecule_generation.bib
deleted file mode 100644
index 082e9db9..00000000
--- a/assets/bibliography/2023-11-09-molecule_generation.bib
+++ /dev/null
@@ -1,29 +0,0 @@
----
----
-
-@string{aps = {American Physical Society,}}
-
-@article{HierarchicalModels,
-  bibtex_show={true},
-  title={Coarse-to-Fine: a Hierarchical Diffusion Model for Molecule Generation in 3D},
-  author={Qiang, Bo; Song, Yuxuan; Xu, Mink; Gong, Jingling; Tao, Bowen; Zhou, Hao; Ma, Weiying; Lan, Yanyan},
-  journal={ArXiV},
-  year={2023},
-}
-
-@article{Equivariant,
-  bibtex_show={true},
-  title={Equivariant Diffusion for Molecule Generation in 3D},
-  author={Hoogeboom, Emiel; Garcia Satorras, Victor; Vignac, Clement; Welling, Max},
-  journal={ArXiV},
-  year={2022},
-}
-
-@article{Geometric,
-  bibtex_show={true},
-  title={Geometric Latent Diffusion Models for 3D Molecule Generation},
-  author={Xu, Mink; Powers, Alexander; Door, Ron; Ermon, Stefano; Leskovec, June},
-  journal={ArXiV},
-  year={2022},
-}
-
diff --git a/assets/bibliography/2023-11-09-molecule_generation/index.html b/assets/bibliography/2023-11-09-molecule_generation/index.html
new file mode 100644
index 00000000..c1cd95b8
--- /dev/null
+++ b/assets/bibliography/2023-11-09-molecule_generation/index.html
@@ -0,0 +1 @@
+<p>Coarse-to-Fine: a Hierarchical Diffusion Model for Molecule Generation in 3D. (2023). <i>ArXiV</i>.</p> <p>Equivariant Diffusion for Molecule Generation in 3D. (2022). <i>ArXiV</i>.</p> <p>Geometric Latent Diffusion Models for 3D Molecule Generation. (2022). <i>ArXiV</i>.</p>
\ No newline at end of file
diff --git a/assets/bibliography/mogesyeabsira_119715_3770730_papers.bib b/assets/bibliography/mogesyeabsira_119715_3770730_papers.bib
deleted file mode 100644
index 4c96f7fd..00000000
--- a/assets/bibliography/mogesyeabsira_119715_3770730_papers.bib
+++ /dev/null
@@ -1,13 +0,0 @@
----
----
-
-@string{aps = {GAN,}}
-
-
-@article{einstein1950meaning,
-  abbr={GAN},
-  bibtex_show={true},
-  title={HOW POWERFUL ARE GRAPH NEURAL NETWORKS?},
-  link={https://arxiv.org/pdf/1810.00826.pdf},
-  year={2019},
-}
\ No newline at end of file
diff --git a/assets/bibliography/mogesyeabsira_119715_3770730_papers/index.html b/assets/bibliography/mogesyeabsira_119715_3770730_papers/index.html
new file mode 100644
index 00000000..94626bf4
--- /dev/null
+++ b/assets/bibliography/mogesyeabsira_119715_3770730_papers/index.html
@@ -0,0 +1 @@
+<p><i>HOW POWERFUL ARE GRAPH NEURAL NETWORKS?</i> (2019).</p>
\ No newline at end of file
diff --git a/assets/bibliography/papers.bib b/assets/bibliography/papers.bib
deleted file mode 100644
index f91d23bf..00000000
--- a/assets/bibliography/papers.bib
+++ /dev/null
@@ -1,82 +0,0 @@
----
----
-
-@string{aps = {American Physical Society,}}
-
-@book{einstein1956investigations,
-  bibtex_show={true},
-  title={Investigations on the Theory of the Brownian Movement},
-  author={Einstein, Albert},
-  year={1956},
-  publisher={Courier Corporation,},
-  preview={brownian-motion.gif}
-}
-
-@article{einstein1950meaning,
-  abbr={AJP},
-  bibtex_show={true},
-  title={The meaning of relativity},
-  author={Einstein, Albert and Taub, AH},
-  journal={American Journal of Physics,},
-  volume={18},
-  number={6},
-  pages={403--404},
-  year={1950},
-  publisher={American Association of Physics Teachers,}
-}
-
-@article{PhysRev.47.777,
-  abbr={PhysRev},
-  title={Can Quantum-Mechanical Description of Physical Reality Be Considered Complete?},
-  author={Einstein, A. and Podolsky, B. and Rosen, N.},
-  abstract={In a complete theory there is an element corresponding to each element of reality. A sufficient condition for the reality of a physical quantity is the possibility of predicting it with certainty, without disturbing the system. In quantum mechanics in the case of two physical quantities described by non-commuting operators, the knowledge of one precludes the knowledge of the other. Then either (1) the description of reality given by the wave function in quantum mechanics is not complete or (2) these two quantities cannot have simultaneous reality. Consideration of the problem of making predictions concerning a system on the basis of measurements made on another system that had previously interacted with it leads to the result that if (1) is false then (2) is also false. One is thus led to conclude that the description of reality as given by a wave function is not complete.},
-  journal={Phys. Rev.,},
-  volume={47},
-  issue={10},
-  pages={777--780},
-  numpages={0},
-  year={1935},
-  month={May},
-  publisher=aps,
-  doi={10.1103/PhysRev.47.777},
-  url={http://link.aps.org/doi/10.1103/PhysRev.47.777},
-  html={https://journals.aps.org/pr/abstract/10.1103/PhysRev.47.777},
-  pdf={example_pdf.pdf},
-  selected={true}
-}
-
-@article{einstein1905molekularkinetischen,
-  title={ber die von der molekularkinetischen Theorie der W{\"a}rme geforderte Bewegung von in ruhenden Flssigkeiten suspendierten Teilchen},
-  author={Einstein, A.},
-  journal={Annalen der physik,},
-  volume={322},
-  number={8},
-  pages={549--560},
-  year={1905},
-  publisher={Wiley Online Library}
-}
-
-@article{einstein1905movement,
-  abbr={Ann. Phys.},
-  title={Un the movement of small particles suspended in statiunary liquids required by the molecular-kinetic theory 0f heat},
-  author={Einstein, A.},
-  journal={Ann. Phys.,},
-  volume={17},
-  pages={549--560},
-  year={1905}
-}
-
-@article{einstein1905electrodynamics,
-  title={On the electrodynamics of moving bodies},
-  author={Einstein, A.},
-  year={1905}
-}
-
-@book{przibram1967letters,
-  bibtex_show={true},
-  title={Letters on wave mechanics},
-  author={Einstein, Albert and Schrödinger, Erwin and Planck, Max and Lorentz, Hendrik Antoon and Przibram, Karl},
-  year={1967},
-  publisher={Vision},
-  preview={wave-mechanics.gif}
-}
diff --git a/assets/bibliography/papers/index.html b/assets/bibliography/papers/index.html
new file mode 100644
index 00000000..65ea9366
--- /dev/null
+++ b/assets/bibliography/papers/index.html
@@ -0,0 +1 @@
+<p>Einstein, A. (1956). <i>Investigations on the Theory of the Brownian Movement</i>. Courier Corporation,</p> <p>Einstein, A., &amp; Taub, A. H. (1950). The meaning of relativity. <i>American Journal of Physics,</i> <i>18</i>(6), 403–404.</p> <p>Einstein, A., Podolsky, B., &amp; Rosen, N. (1935). Can Quantum-Mechanical Description of Physical Reality Be Considered Complete? <i>Phys. Rev.,</i> <i>47</i>(10), 777–780. https://doi.org/10.1103/PhysRev.47.777</p> <p>Einstein, A. (1905). ber die von der molekularkinetischen Theorie der Wärme geforderte Bewegung von in ruhenden Flssigkeiten suspendierten Teilchen. <i>Annalen Der Physik,</i> <i>322</i>(8), 549–560.</p> <p>Einstein, A. (1905). Un the movement of small particles suspended in statiunary liquids required by the molecular-kinetic theory 0f heat. <i>Ann. Phys.,</i> <i>17</i>, 549–560.</p> <p>Einstein, A. (1905). <i>On the electrodynamics of moving bodies</i>.</p> <p>Einstein, A., Schrödinger, E., Planck, M., Lorentz, H. A., &amp; Przibram, K. (1967). <i>Letters on wave mechanics</i>. Vision.</p>
\ No newline at end of file
diff --git a/assets/css/main.css b/assets/css/main.css
new file mode 100644
index 00000000..8a7a1dc4
--- /dev/null
+++ b/assets/css/main.css
@@ -0,0 +1,3 @@
+:root{--global-bg-color:#fff;--global-code-bg-color:rgba(181,9,172,0.05);--global-text-color:#000;--global-text-color-light:#828282;--global-theme-color:#2698ba;--global-hover-color:#2698ba;--global-footer-bg-color:#1c1c1d;--global-footer-text-color:#e8e8e8;--global-footer-link-color:#fff;--global-distill-app-color:#828282;--global-divider-color:rgba(0,0,0,.1);--global-card-bg-color:#fff}:root .fa-sun{display:none}:root .fa-moon{padding-left:10px;padding-top:12px;display:block}:root .repo-img-light{display:block}:root .repo-img-dark{display:none}.header-background .img{background-repeat:no-repeat;background-size:400px;background-position:center bottom;height:12em;margin-bottom:0;margin-top:-2.7em}html[data-theme=dark]{--global-bg-color:#1c1c1d;--global-code-bg-color:#2c3237;--global-text-color:#e8e8e8;--global-text-color-light:#e8e8e8;--global-theme-color:#2698ba;--global-hover-color:#2698ba;--global-footer-bg-color:#e8e8e8;--global-footer-text-color:#1c1c1d;--global-footer-link-color:#000;--global-distill-app-color:#e8e8e8;--global-divider-color:#424246;--global-card-bg-color:#212529}html[data-theme=dark] .fa-sun{padding-left:10px;padding-top:12px;display:block}html[data-theme=dark] .fa-moon{display:none}html[data-theme=dark] .repo-img-light{display:none}html[data-theme=dark] .repo-img-dark{display:block}html[data-theme=dark] .header-background .img{background-repeat:no-repeat;background-size:400px;background-position:center bottom;height:12em;margin-bottom:0;margin-top:-2.7em}body{padding-bottom:70px;color:var(--global-text-color);background-color:var(--global-bg-color)}body h1,body h2,body h3,body h4,body h5,body h6{scroll-margin-top:66px}body.fixed-top-nav{padding-top:56px}body.sticky-bottom-footer{padding-bottom:0}.container{max-width:1000px}.profile img{width:100%}p,h1,h2,h3,h4,h5,h6,em,div,li,span,strong{color:var(--global-text-color)}hr{border-top:1px solid var(--global-divider-color)}table td,table th{color:var(--global-text-color)}table td{font-size:1rem}a,table.table a{color:var(--global-theme-color)}a:hover,table.table a:hover{color:var(--global-theme-color);text-decoration:underline}a:hover:after :not(.nav-item.dropdown),table.table a:hover:after :not(.nav-item.dropdown){width:100%}figure,img{max-width:90vw}blockquote{background:var(--global-bg-color);border-left:2px solid var(--global-theme-color);margin:1.5em 10px;padding:.5em 10px;font-size:1.1rem}.equation{margin-bottom:1rem;text-align:center}.caption{font-size:.875rem;margin-top:.75rem;margin-bottom:1.5rem;text-align:center}.card{background-color:var(--global-card-bg-color)}.card img{width:100%}.card .card-title{color:var(--global-text-color)}.card .card-item{width:auto;margin-bottom:10px}.card .card-item .row{display:flex;align-items:center}.citation,.citation-number{color:var(--global-theme-color)}.profile{width:100%}.profile .address{margin-bottom:5px;margin-top:5px;font-family:monospace}.profile .address p{display:inline-block;margin:0}.profile.float-right{margin-left:1rem}.profile.float-left{margin-right:1rem}@media(min-width:576px){.profile{width:30%}.profile .address p{display:block}}.post-description{margin-bottom:2rem;font-size:.875rem}.post-description a{color:inherit}.post-description a:hover{color:var(--global-theme-color);text-decoration:none}.navbar{box-shadow:none;border-bottom:1px solid var(--global-divider-color);background-color:var(--global-bg-color);opacity:.95}.navbar .dropdown-menu{background-color:var(--global-bg-color);border:1px solid var(--global-divider-color)}.navbar .dropdown-menu a:not(.active){color:var(--global-text-color)}.navbar .dropdown-menu a:hover{color:var(--global-hover-color)}.navbar .dropdown-menu .dropdown-divider{border-top:1px solid var(--global-divider-color)!important}.dropdown-item{color:var(--global-text-color)}.dropdown-item:hover{color:var(--global-hover-color);background-color:var(--global-bg-color)}.navbar.navbar-light a:hover{text-decoration:none}.navbar.navbar-light .navbar-brand{color:var(--global-text-color)}.navbar.navbar-light .navbar-nav .nav-item .nav-link{color:var(--global-text-color)}.navbar.navbar-light .navbar-nav .nav-item .nav-link:hover{color:var(--global-hover-color)}.navbar.navbar-light .navbar-nav .nav-item.active>.nav-link{background-color:inherit;font-weight:bolder;color:var(--global-theme-color)}.navbar.navbar-light .navbar-nav .nav-item.active>.nav-link:hover{color:var(--global-hover-color)}.navbar.navbar-light .navbar-brand.social{padding-bottom:0;padding-top:0;font-size:1.7rem}.navbar.navbar-light .navbar-brand.social a i::before{color:var(--global-text-color);transition-property:all .2s ease-in-out}.navbar.navbar-light .navbar-brand.social a:hover i::before{color:var(--global-theme-color)}.navbar-toggler .icon-bar{display:block;width:22px;height:2px;background-color:var(--global-text-color);border-radius:1px;margin-bottom:4px;transition:all .2s}.navbar-toggler .top-bar{transform:rotate(45deg);transform-origin:10% 10%}.navbar-toggler .middle-bar{opacity:0}.navbar-toggler .bottom-bar{transform:rotate(-45deg);transform-origin:10% 90%}
+.navbar-toggler.collapsed .top-bar{transform:rotate(0)}.navbar-toggler.collapsed .middle-bar{opacity:1}.navbar-toggler.collapsed .bottom-bar{transform:rotate(0)}#light-toggle{padding:0;border:0;background-color:inherit;color:var(--global-text-color)}#light-toggle:hover{color:var(--global-hover-color)}.social{text-align:center}.social .contact-icons{font-size:4rem}.social .contact-icons a i::before{color:var(--global-text-color);transition-property:all .2s ease-in-out}.social .contact-icons a:hover i::before{color:var(--global-theme-color)}.social .contact-note{font-size:.8rem}footer.fixed-bottom{background-color:var(--global-footer-bg-color);font-size:.75rem}footer.fixed-bottom .container{color:var(--global-footer-text-color);padding-top:9px;padding-bottom:8px}footer.fixed-bottom a{color:var(--global-footer-link-color)}footer.fixed-bottom a:hover{color:var(--global-theme-color);text-decoration:none}footer.sticky-bottom{border-top:1px solid var(--global-divider-color);padding-top:40px;padding-bottom:40px;font-size:.9rem}.cv{margin-bottom:40px}.cv .card{background-color:var(--global-card-bg-color);border:1px solid var(--global-divider-color)}.cv .card .list-group-item{background-color:inherit}.cv .card .list-group-item .badge{color:var(--global-card-bg-color)!important;background-color:var(--global-theme-color)!important}@media(min-width:768px){.repo{max-width:50%}}.header-bar{border-bottom:1px solid var(--global-divider-color);text-align:center;padding-top:2rem;padding-bottom:3rem}.header-bar h1{color:var(--global-theme-color);font-size:5rem}.tag-list{border-bottom:1px solid var(--global-divider-color);text-align:center;padding-top:1rem}.tag-list ul{justify-content:center;display:flow-root}.tag-list ul p,.tag-list ul li{list-style:none;display:inline-block;padding:1rem .5rem;color:var(--global-text-color-light)}.post-list{margin:0;margin-bottom:40px;padding:0}.post-list li{border-bottom:1px solid var(--global-divider-color);list-style:none;padding-top:2rem;padding-bottom:2rem}.post-list li .post-meta{color:var(--global-text-color-light);font-size:.875rem;margin-bottom:0}.post-list li .post-tags{color:var(--global-text-color-light);font-size:.875rem;padding-top:.25rem;padding-bottom:0}.post-list li a{color:var(--global-text-color);text-decoration:none}.post-list li a:hover{color:var(--global-theme-color)}.pagination .page-item .page-link{color:var(--global-text-color)}.pagination .page-item .page-link:hover{color:#000}.pagination .page-item.active .page-link{color:#fff;background-color:var(--global-theme-color)}.pagination .page-item.active .page-link:hover{background-color:var(--global-theme-color)}.distill a:hover{border-bottom-color:var(--global-theme-color);text-decoration:none}.projects a{text-decoration:none}.projects a:hover .card-title{color:var(--global-theme-color)}.projects .card img{width:100%}.projects .card-item{width:auto;margin-bottom:10px}.projects .card-item .row{display:flex;align-items:center}.projects .grid-sizer,.projects .grid-item{width:250px;margin-bottom:10px}.projects h2.category{color:var(--global-divider-color);border-bottom:1px solid var(--global-divider-color);padding-top:.5rem;margin-top:2rem;margin-bottom:1rem;text-align:right}.publications{margin-top:2rem}.publications h1{color:var(--global-theme-color);font-size:2rem;text-align:center;margin-top:1em;margin-bottom:1em}.publications h2{margin-bottom:1rem}.publications h2 span{font-size:1.5rem}.publications h2.year{color:var(--global-divider-color);border-top:1px solid var(--global-divider-color);padding-top:1rem;margin-top:2rem;margin-bottom:-2rem;text-align:right}.publications ol.bibliography{list-style:none;padding:0;margin-top:0}.publications ol.bibliography li{margin-bottom:1rem}.publications ol.bibliography li .preview{width:100%;min-width:80px;max-width:200px}.publications ol.bibliography li .abbr{height:2rem;margin-bottom:.5rem}.publications ol.bibliography li .abbr abbr{display:inline-block;background-color:var(--global-theme-color);padding-left:1rem;padding-right:1rem}.publications ol.bibliography li .abbr abbr a{color:white}.publications ol.bibliography li .abbr abbr a:hover{text-decoration:none}.publications ol.bibliography li .abbr .award{color:var(--global-theme-color)!important;border:1px solid var(--global-theme-color)}.publications ol.bibliography li .title{font-weight:bolder}.publications ol.bibliography li .author a{border-bottom:1px dashed var(--global-theme-color)}.publications ol.bibliography li .author a:hover{border-bottom-style:solid;text-decoration:none}.publications ol.bibliography li .author>em{border-bottom:1px solid;font-style:normal}.publications ol.bibliography li .author>span.more-authors{color:var(--global-text-color-light);border-bottom:1px dashed var(--global-text-color-light);cursor:pointer}.publications ol.bibliography li .author>span.more-authors:hover{color:var(--global-text-color);border-bottom:1px dashed var(--global-text-color)}.publications ol.bibliography li .links a.btn{color:var(--global-text-color);border:1px solid var(--global-text-color);padding-left:1rem;padding-right:1rem;padding-top:.25rem;padding-bottom:.25rem}
+.publications ol.bibliography li .links a.btn:hover{color:var(--global-theme-color);border-color:var(--global-theme-color)}.publications ol.bibliography li .hidden{font-size:.875rem;max-height:0;overflow:hidden;text-align:justify;transition-property:.15s ease;-moz-transition:.15s ease;-ms-transition:.15s ease;-o-transition:.15s ease;transition:all .15s ease}.publications ol.bibliography li .hidden p{line-height:1.4em;margin:10px}.publications ol.bibliography li .hidden pre{font-size:1em;line-height:1.4em;padding:10px}.publications ol.bibliography li .hidden.open{max-height:100em;transition-property:.15s ease;-moz-transition:.15s ease;-ms-transition:.15s ease;-o-transition:.15s ease;transition:all .15s ease}.publications ol.bibliography li div.abstract.hidden{border:dashed 1px var(--global-bg-color)}.publications ol.bibliography li div.abstract.hidden.open{border-color:var(--global-text-color)}figure.highlight{margin:0 0 1rem}pre{color:var(--global-theme-color);background-color:var(--global-code-bg-color);border-radius:6px;padding:6px 12px}pre pre,pre code{background-color:transparent;border-radius:0;padding:0}code{color:var(--global-theme-color);background-color:var(--global-code-bg-color);border-radius:3px;padding:3px 3px}html.transition,html.transition *,html.transition *:before,html.transition *:after{transition:all 750ms!important;transition-delay:0!important}.post .post-meta{color:var(--global-text-color-light);font-size:.875rem;margin-bottom:0}.post .post-tags{color:var(--global-text-color-light);font-size:.875rem;padding-top:.25rem;padding-bottom:1rem}.post .post-tags a{color:var(--global-text-color-light);text-decoration:none}.post .post-tags a:hover{color:var(--global-theme-color)}.post .post-content blockquote{border-left:5px solid var(--global-theme-color);padding:8px}d-byline{border-top-color:var(--global-divider-color)!important}d-byline h3{color:var(--global-text-color)!important}d-byline a,d-article d-byline a{color:var(--global-text-color)!important}d-byline a:hover,d-article d-byline a:hover{color:var(--global-hover-color)!important}d-article{border-top-color:var(--global-divider-color)!important}d-article a,d-article p,d-article h1,d-article h2,d-article h3,d-article h4,d-article h5,d-article h6,d-article li,d-article table{color:var(--global-text-color)!important}d-article a,d-article h1,d-article h2,d-article hr,d-article table,d-article table th,d-article table td{border-bottom-color:var(--global-divider-color)!important}d-article a:hover{border-bottom-color:var(--global-hover-color)!important}d-article b i{display:inline}d-article d-contents{align-self:start;grid-column:1/4;grid-row:auto/span 4;justify-self:end;margin-top:0;padding-left:2em;padding-right:3em;border-right:1px solid var(--global-divider-color);width:max(70%,300px);margin-right:0;margin-top:0;display:grid;grid-template-columns:minmax(8px,1fr) [toc] auto minmax(8px,1fr) [toc-line] 1px minmax(32px,2fr)}d-article d-contents nav{grid-column:toc}d-article d-contents nav a{border-bottom:none!important}d-article d-contents nav a:hover{border-bottom:1px solid var(--global-text-color)!important}d-article d-contents nav h3{margin-top:0;margin-bottom:1em}d-article d-contents nav div{display:block;outline:0;margin-bottom:.8em;color:rgba(0,0,0,0.8);font-weight:bold}d-article d-contents nav ul{padding-left:1em;margin-top:0;margin-bottom:6px;list-style-type:none}d-article d-contents nav ul li{margin-bottom:.25em}d-article d-contents .figcaption{line-height:1.4em}d-article d-contents toc-line{border-right:1px solid var(--global-divider-color);grid-column:toc-line}d-article d-footnote{scroll-margin-top:66px}d-appendix{border-top-color:var(--global-divider-color)!important;color:var(--global-distill-app-color)!important}d-appendix h3,d-appendix li,d-appendix span{color:var(--global-distill-app-color)!important}d-appendix a,d-appendix a.footnote-backlink{color:var(--global-distill-app-color)!important}d-appendix a:hover,d-appendix a.footnote-backlink:hover{color:var(--global-hover-color)!important}@media(max-width:1024px){d-article d-contents{display:block;grid-column-start:2;grid-column-end:-2;padding-bottom:.5em;margin-bottom:1em;padding-top:.5em;width:100%;border:1px solid var(--global-divider-color)}d-article d-contents nav{grid-column:none}}
\ No newline at end of file
diff --git a/assets/css/main.css.map b/assets/css/main.css.map
new file mode 100644
index 00000000..4c3142e0
--- /dev/null
+++ b/assets/css/main.css.map
@@ -0,0 +1 @@
+{"version":3,"sourceRoot":"","sources":["../../_sass/_variables.scss","../../_sass/_themes.scss","../../_sass/_layout.scss","main.scss","../../_sass/_base.scss","../../_sass/_distill.scss"],"names":[],"mappings":"AAAA;AAAA;AAAA;AAAA;ACAA;AAAA;AAAA;AAIA;EACE;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;;AAEA;EACE;;AAEF;EACE;EACA;EACA;;AAGF;EACE;;AAEF;EACE;;;AAIJ;EAEE;EACA;EACA;EACA;EACA;EACA;;;AAGF;EACE;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;;AAEA;EACE;EACA;EACA;;AAEF;EACE;;AAGF;EACE;;AAEF;EACE;;AAGJ;EAEE;EACA;EACA;EACA;EACA;EACA;;;AClFF;AAAA;AAAA;AAIA;EACE;EACA;EACA;;AAEA;EACE;;;AAIJ;EAEE;;;AAGF;EAEE;;;AAGF;EACE,WCtBkB;;;AD2BlB;EACE;;;AAOJ;AAAA;AAAA;AAOA;AAAA;AAAA;AE7CA;AAAA;AAAA;AAMA;EACE;;;AAGF;EACE;;;AAIA;EACE;;AAEF;EACE;;;AAIJ;EACE;;AACA;EACE;EACA;;AAEF;EACE;;;AAIJ;EACE;;;AAGF;EACE;EACA;EACA;EACA;EACA;;;AAKF;EACE;EACA;;;AAKF;EACE;EACA;EACA;EACA;;;AAKF;EACE;;AAEA;EACE;;AAGF;EACE;;AAGF;EACE;EACA;;AAEA;EACE;EACA;;;AAON;EACE;;;AAKF;EACE;;AAEA;EACE;EACA;EACA;;AACA;EACE;EACA;;;AAIN;EACE;;;AAEF;EACE;;;AAGF;EACE;IACE;;EAEE;IAAI;;;AAKV;EACE;EACA;;AACA;EACE;;AACA;EACE;EACA;;;AAQN;EACE;EACA;EACA;EACA;;;AAEF;EACE;EACA;;AACA;EACE;;AAEF;EACE;;AAEF;EACE;;;AAGJ;EACE;;AACE;EACE;EACA;;;AAKF;EACE;;AAGJ;EACE;;AAEF;EACE;;AACA;EACE;;AAGJ;EACI;EACA;EACA;;AACA;EACE;;AAGN;EACE;EACA;EACA;;AAEE;EACE;EACA;;AAGA;EACE;;;AAQR;EACE;EACA;EACA;EACA;EACA;EACA;EACA;;AAEF;EACE;EACA;;AAEF;EACE;;AAEF;EACE;EACA;;;AAKF;EACE;;AAEF;EACE;;AAEF;EACE;;;AAIJ;EACE;EACA;EACA;EACA;;AACA;EACE;;;AAMJ;EACE;;AACA;EACE;;AAEE;EACE;EACA;;AAGA;EACE;;AAKR;EACE;;;AAMJ;EACE;EACA;;AACA;EACE;EACA;EACA;;AAEF;EACE;;AACA;EACE;EACA;;;AAKN;EACE;EACA;EACA;EACA;;;AAKF;EACE;;AAEA;EACE;EACA;;AAEA;EACE;;AAEA;EACE;EACA;;;AAQR;EACE;IACE;;;AAMJ;EACE;EACA;EACA;EACA;;AACA;EACE;EACA;;;AAIJ;EACE;EACA;EACA;;AAEA;EACE;EACA;;AAEA;EACE;EACA;EACA;EACA;;;AAKN;EACE;EACA;EACA;;AACA;EACE;EACA;EACA;EACA;;AACA;EACE;EACA;EACA;;AAEF;EACE;EACA;EACA;EACA;;AAEF;EACE;EACA;;AACA;EACE;;;AAQJ;EACE;;AACA;EACE,OJ1WM;;AI6WV;EACE,OJ/WQ;EIgXR;;AACA;EACE;;;AAUN;EACE;EACA;;;AAQF;EACE;;AAGE;EACE;;AAMJ;EACE;;AAIJ;EACE;EACA;;AAEA;EACE;EACA;;AAIJ;EACE;EACA;;AAGF;EACE;EACA;EACA;EACA;EACA;EACA;;;AAOJ;EACE;;AACA;EACE;EACA;EACA;EACA;EACA;;AAEF;EACE;;AACA;EACE;;AAGJ;EACE;EACA;EACA;EACA;EACA;EACA;;AAEF;EACE;EACA;EACA;;AAEA;EACE;;AACA;EACE;EACA;EACA;;AAEF;EACE;EACA;;AACA;EACE;EACA;EACA;EACA;;AACA;EACE;;AACA;EACE;;AAIN;EACE;EACA;;AAGJ;EACE;;AAGA;EACE;;AACA;EACI;EACA;;AAGN;EACE;EACA;;AAEF;EACE;EACA;EACA;;AACA;EACI;EACA;;AAKN;EACE;EACA;EACA;EACA;EACA;EACA;;AACA;EACE;EACA;;AAIN;EACE;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;;AAEA;EACE;EACA;;AAEF;EACE;EACA;EACA;;AAGJ;EACE;EACA;EACA;EACA;EACA;EACA;;AAEF;EACE;;AAEF;EACE;;;AAOR;EACE;;;AAGF;EACE;EACA;EACA;EACA;;AACA;EACE;EACA;EACA;;;AAIJ;EACE;EACA;EACA;EACA;;;AAKF;AAAA;AAAA;AAAA;EAIE;EACA;;;AAKA;EACE;EACA;EACA;;AAEF;EACE;EACA;EACA;EACA;;AACA;EACE;EACA;;AACA;EACE;;AAKJ;EACE;EACA;;;AC9oBN;AAAA;AAAA;AAIA;EACE;;;AAGF;EACE;;;AAGF;EACE;;AACA;EACE;;;AAIJ;EACE;;AACA;EACE;;AAEF;EACE;;AAEF;EACE;;AAEF;EACE;;AAGF;EACE;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA,uBACE;;AAIF;EACE;;AACA;EACE;;AACA;EACE;;AAGJ;EACE;EACA;;AAEF;EACE;EACA;EACA;EACA;EACA;;AAEF;EACE;EACA;EACA;EACA;;AACA;EACE;;AAIN;EACE;;AAEF;EACE;EACA;;AAIJ;EACE;;;AAIJ;EACE;EACA;;AACA;EACE;;AAEF;EACE;;AACA;EACE;;;AAKN;EAEI;IACE;IACA;IACA;IACA;IACA;IACA;IACA;IACA;;EACA;IACE","sourcesContent":["/*******************************************************************************\n * Variables used throughout the theme.\n * To adjust anything, simply edit the variables below and rebuild the theme.\n ******************************************************************************/\n\n\n// Colors\n$red-color: #FF3636 !default;\n$red-color-dark: #B71C1C !default;\n$orange-color: #F29105 !default;\n$blue-color: #0076df !default;\n$blue-color-dark: #00369f !default;\n$cyan-color: #2698BA !default;\n$light-cyan-color: lighten($cyan-color, 25%);\n$green-color: #00ab37 !default;\n$green-color-lime: #B7D12A !default;\n$green-color-dark: #009f06 !default;\n$green-color-light: #ddffdd !default;\n$green-color-bright: #11D68B !default;\n$purple-color: #B509AC !default;\n$light-purple-color: lighten($purple-color, 25%);\n$pink-color: #f92080 !default;\n$pink-color-light: #ffdddd !default;\n$yellow-color: #efcc00 !default;\n\n$grey-color: #828282 !default;\n$grey-color-light: lighten($grey-color, 40%);\n$grey-color-dark: #1C1C1D;\n$grey-900: #212529;\n\n$white-color: #ffffff !default;\n$black-color: #000000 !default;\n\n\n// Theme colors\n\n$code-bg-color-light: rgba($purple-color, 0.05);\n$code-bg-color-dark: #2c3237 !default;\n","/*******************************************************************************\r\n * Themes\r\n ******************************************************************************/\r\n \r\n:root {\r\n --global-bg-color: #{$white-color};\r\n --global-code-bg-color: #{$code-bg-color-light};\r\n --global-text-color: #{$black-color};\r\n --global-text-color-light: #{$grey-color};\r\n --global-theme-color: #{$cyan-color};\r\n --global-hover-color: #{$cyan-color};\r\n --global-footer-bg-color: #{$grey-color-dark};\r\n --global-footer-text-color: #{$grey-color-light};\r\n --global-footer-link-color: #{$white-color};\r\n --global-distill-app-color: #{$grey-color};\r\n --global-divider-color: rgba(0,0,0,.1);\r\n --global-card-bg-color: #{$white-color};\r\n\r\n .fa-sun {\r\n display : none;\r\n }\r\n .fa-moon {\r\n padding-left: 10px;\r\n padding-top: 12px;\r\n display : block;\r\n }\r\n\r\n .repo-img-light {\r\n display: block;\r\n }\r\n .repo-img-dark {\r\n display: none;\r\n }\r\n}\r\n\r\n.header-background .img {\r\n // background-image: url(\"../img/ICLR-logo.png\");\r\n background-repeat: no-repeat;\r\n background-size: 400px;\r\n background-position: center bottom;\r\n height: 12em;\r\n margin-bottom: 0em;\r\n margin-top: -2.7em; \r\n}\r\n\r\nhtml[data-theme='dark'] {\r\n --global-bg-color: #{$grey-color-dark};\r\n --global-code-bg-color: #{$code-bg-color-dark};\r\n --global-text-color: #{$grey-color-light};\r\n --global-text-color-light: #{$grey-color-light};\r\n --global-theme-color: #{$cyan-color};\r\n --global-hover-color: #{$cyan-color};\r\n --global-footer-bg-color: #{$grey-color-light};\r\n --global-footer-text-color: #{$grey-color-dark};\r\n --global-footer-link-color: #{$black-color};\r\n --global-distill-app-color: #{$grey-color-light};\r\n --global-divider-color: #424246;\r\n --global-card-bg-color: #{$grey-900};\r\n\r\n .fa-sun {\r\n padding-left: 10px;\r\n padding-top: 12px;\r\n display : block;\r\n }\r\n .fa-moon {\r\n display : none;\r\n }\r\n\r\n .repo-img-light {\r\n display: none;\r\n }\r\n .repo-img-dark {\r\n display: block;\r\n }\r\n\r\n.header-background .img {\r\n // background-image: url(\"../img/ICLR-logo-dark.png\");\r\n background-repeat: no-repeat;\r\n background-size: 400px;\r\n background-position: center bottom;\r\n height: 12em;\r\n margin-bottom: 0em;\r\n margin-top: -2.7em; \r\n // filter: invert(89%);\r\n}\r\n\r\n\r\n\r\n\r\n // .header-background .img {\r\n // background-image: url(\"../img/score_contour.jpg\");\r\n // background-repeat: no-repeat;\r\n // background-size: cover;\r\n // background-position: center bottom;\r\n // height: 15em;\r\n // margin-bottom: 2em;\r\n // margin-top: -2.7em;\r\n // filter: invert(89%);\r\n // }\r\n}\r\n","/******************************************************************************\n * Content\n ******************************************************************************/\n\nbody {\n padding-bottom: 70px;\n color: var(--global-text-color);\n background-color: var(--global-bg-color);\n\n h1, h2, h3, h4, h5, h6 {\n scroll-margin-top: 66px;\n }\n}\n\nbody.fixed-top-nav {\n // Add some padding for the nav-bar.\n padding-top: 56px;\n}\n\nbody.sticky-bottom-footer {\n // Remove padding below footer.\n padding-bottom: 0;\n}\n\n.container {\n max-width: $max-content-width;\n}\n\n// Profile\n.profile {\n img {\n width: 100%;\n }\n}\n\n// TODO: redefine content layout.\n\n\n/******************************************************************************\n * Publications\n ******************************************************************************/\n\n// TODO: redefine publications layout.\n\n\n/*****************************************************************************\n* Projects\n*****************************************************************************/\n\n// TODO: redefine projects layout.\n","@charset \"utf-8\";\r\n\r\n// Dimensions\r\n$max-content-width: 1000px;\r\n\r\n@import\r\n \"variables\",\r\n \"themes\",\r\n \"layout\",\r\n \"base\",\r\n \"distill\"\r\n;\r\n","/*******************************************************************************\n * Styles for the base elements of the theme.\n ******************************************************************************/\n\n// Typography\n\np, h1, h2, h3, h4, h5, h6, em, div, li, span, strong {\n color: var(--global-text-color);\n}\n\nhr {\n border-top: 1px solid var(--global-divider-color);\n}\n\ntable {\n td, th {\n color: var(--global-text-color);\n }\n td {\n font-size: 1rem;\n }\n}\n\na, table.table a {\n color: var(--global-theme-color);\n &:hover {\n color: var(--global-theme-color);\n text-decoration: underline;\n }\n &:hover:after :not(.nav-item.dropdown) {\n width: 100%;\n }\n}\n\nfigure, img {\n max-width: 90vw;\n}\n\nblockquote {\n background: var(--global-bg-color);\n border-left: 2px solid var(--global-theme-color);\n margin: 1.5em 10px;\n padding: 0.5em 10px;\n font-size: 1.1rem;\n}\n\n// Math\n\n.equation {\n margin-bottom: 1rem;\n text-align: center;\n}\n\n// Caption\n\n.caption {\n font-size: 0.875rem;\n margin-top: 0.75rem;\n margin-bottom: 1.5rem;\n text-align: center;\n}\n\n// Card\n\n.card {\n background-color: var(--global-card-bg-color);\n\n img {\n width: 100%;\n }\n\n .card-title {\n color: var(--global-text-color);\n }\n\n .card-item {\n width: auto;\n margin-bottom: 10px;\n\n .row {\n display: flex;\n align-items: center;\n }\n }\n}\n\n// Citation\n\n.citation, .citation-number {\n color: var(--global-theme-color);\n}\n\n// Profile\n\n.profile {\n width: 100%;\n\n .address {\n margin-bottom: 5px;\n margin-top: 5px;\n font-family: monospace;\n p {\n display: inline-block;\n margin: 0;\n }\n }\n}\n.profile.float-right{\n margin-left: 1rem;\n}\n.profile.float-left{\n margin-right: 1rem;\n}\n\n@media (min-width: 576px) {\n .profile {\n width: 30%;\n .address {\n p { display: block; }\n }\n }\n}\n\n.post-description {\n margin-bottom: 2rem;\n font-size: 0.875rem;\n a {\n color: inherit;\n &:hover {\n color: var(--global-theme-color);\n text-decoration: none;\n }\n }\n}\n\n\n// Navbar customization\n\n.navbar {\n box-shadow: none;\n border-bottom: 1px solid var(--global-divider-color);\n background-color: var(--global-bg-color);\n opacity: 0.95;\n}\n.navbar .dropdown-menu {\n background-color: var(--global-bg-color);\n border: 1px solid var(--global-divider-color);\n a:not(.active) {\n color: var(--global-text-color);\n }\n a:hover {\n color: var(--global-hover-color);\n }\n .dropdown-divider {\n border-top: 1px solid var(--global-divider-color) !important;\n }\n}\n.dropdown-item {\n color: var(--global-text-color);\n &:hover {\n color: var(--global-hover-color);\n background-color: var(--global-bg-color);\n }\n}\n.navbar.navbar-light {\n a {\n &:hover {\n text-decoration: none;\n }\n }\n .navbar-brand {\n color: var(--global-text-color);\n }\n .navbar-nav .nav-item .nav-link {\n color: var(--global-text-color);\n &:hover {\n color: var(--global-hover-color);\n }\n }\n .navbar-nav .nav-item.active>.nav-link {\n background-color: inherit;\n font-weight: bolder;\n color: var(--global-theme-color);\n &:hover {\n color: var(--global-hover-color);\n }\n }\n .navbar-brand.social {\n padding-bottom: 0;\n padding-top: 0;\n font-size: 1.7rem;\n a {\n i::before {\n color: var(--global-text-color);\n transition-property: all 0.2s ease-in-out;\n }\n &:hover {\n i::before {\n color: var(--global-theme-color);\n }\n }\n }\n }\n}\n\n.navbar-toggler {\n .icon-bar {\n display: block;\n width: 22px;\n height: 2px;\n background-color: var(--global-text-color);\n border-radius: 1px;\n margin-bottom: 4px;\n transition: all 0.2s;\n }\n .top-bar {\n transform: rotate(45deg);\n transform-origin: 10% 10%;\n }\n .middle-bar {\n opacity: 0;\n }\n .bottom-bar {\n transform: rotate(-45deg);\n transform-origin: 10% 90%;\n }\n}\n\n.navbar-toggler.collapsed {\n .top-bar {\n transform: rotate(0);\n }\n .middle-bar {\n opacity: 1;\n }\n .bottom-bar {\n transform: rotate(0);\n }\n}\n\n#light-toggle {\n padding: 0;\n border: 0;\n background-color: inherit;\n color: var(--global-text-color);\n &:hover {\n color: var(--global-hover-color);\n }\n}\n\n// Social (bottom)\n\n.social {\n text-align: center;\n .contact-icons {\n font-size: 4rem;\n a {\n i::before {\n color: var(--global-text-color);\n transition-property: all 0.2s ease-in-out;\n }\n &:hover {\n i::before {\n color: var(--global-theme-color);\n }\n }\n }\n }\n .contact-note {\n font-size: 0.8rem;\n }\n}\n\n\n// Footer\nfooter.fixed-bottom {\n background-color: var(--global-footer-bg-color);\n font-size: 0.75rem;\n .container {\n color: var(--global-footer-text-color);\n padding-top: 9px;\n padding-bottom: 8px;\n }\n a {\n color: var(--global-footer-link-color);\n &:hover {\n color: var(--global-theme-color);\n text-decoration: none;\n }\n }\n}\n\nfooter.sticky-bottom {\n border-top: 1px solid var(--global-divider-color);\n padding-top: 40px;\n padding-bottom: 40px;\n font-size: 0.9rem;\n}\n\n// CV\n\n.cv {\n margin-bottom: 40px;\n \n .card {\n background-color: var(--global-card-bg-color);\n border: 1px solid var(--global-divider-color);\n \n .list-group-item {\n background-color: inherit;\n\n .badge {\n color: var(--global-card-bg-color) !important;\n background-color: var(--global-theme-color) !important;\n }\n }\n }\n}\n\n// Repositories\n\n@media (min-width: 768px) {\n .repo {\n max-width: 50%;\n }\n}\n\n// Blog\n\n.header-bar {\n border-bottom: 1px solid var(--global-divider-color);\n text-align: center;\n padding-top: 2rem;\n padding-bottom: 3rem;\n h1 {\n color: var(--global-theme-color);\n font-size: 5rem;\n }\n}\n\n.tag-list {\n border-bottom: 1px solid var(--global-divider-color);\n text-align: center;\n padding-top: 1rem;\n\n ul {\n justify-content: center;\n display: flow-root;\n\n p, li {\n list-style: none;\n display: inline-block;\n padding: 1rem 0.5rem;\n color: var(--global-text-color-light);\n }\n }\n}\n\n.post-list {\n margin: 0;\n margin-bottom: 40px;\n padding: 0;\n li {\n border-bottom: 1px solid var(--global-divider-color);\n list-style: none;\n padding-top: 2rem;\n padding-bottom: 2rem;\n .post-meta {\n color: var(--global-text-color-light);\n font-size: 0.875rem;\n margin-bottom: 0;\n }\n .post-tags {\n color: var(--global-text-color-light);\n font-size: 0.875rem;\n padding-top: 0.25rem;\n padding-bottom: 0;\n }\n a {\n color: var(--global-text-color);\n text-decoration: none;\n &:hover {\n color: var(--global-theme-color);\n }\n }\n }\n}\n\n.pagination {\n .page-item {\n .page-link {\n color: var(--global-text-color);\n &:hover {\n color: $black-color;\n }\n }\n &.active .page-link {\n color: $white-color;\n background-color: var(--global-theme-color);\n &:hover {\n background-color: var(--global-theme-color);\n }\n }\n }\n}\n\n\n// Distill\n\n.distill {\n a:hover {\n border-bottom-color: var(--global-theme-color);\n text-decoration: none;\n }\n}\n\n\n// Projects\n\n.projects {\n a {\n text-decoration: none;\n\n &:hover {\n .card-title {\n color: var(--global-theme-color);\n }\n }\n }\n\n .card {\n img {\n width: 100%;\n }\n }\n\n .card-item {\n width: auto;\n margin-bottom: 10px;\n\n .row {\n display: flex;\n align-items: center;\n }\n }\n\n .grid-sizer, .grid-item {\n width: 250px;\n margin-bottom: 10px;\n }\n\n h2.category {\n color: var(--global-divider-color);\n border-bottom: 1px solid var(--global-divider-color);\n padding-top: 0.5rem;\n margin-top: 2rem;\n margin-bottom: 1rem;\n text-align: right;\n }\n}\n\n\n// Publications\n\n.publications {\n margin-top: 2rem;\n h1 {\n color: var(--global-theme-color);\n font-size: 2rem;\n text-align: center;\n margin-top: 1em;\n margin-bottom: 1em;\n }\n h2 {\n margin-bottom: 1rem;\n span {\n font-size: 1.5rem;\n }\n }\n h2.year {\n color: var(--global-divider-color);\n border-top: 1px solid var(--global-divider-color);\n padding-top: 1rem;\n margin-top: 2rem;\n margin-bottom: -2rem;\n text-align: right;\n }\n ol.bibliography {\n list-style: none;\n padding: 0;\n margin-top: 0;\n\n li {\n margin-bottom: 1rem;\n .preview {\n width: 100%;\n min-width: 80px;\n max-width: 200px;\n }\n .abbr {\n height: 2rem;\n margin-bottom: 0.5rem;\n abbr {\n display: inline-block;\n background-color: var(--global-theme-color);\n padding-left: 1rem;\n padding-right: 1rem;\n a {\n color: white;\n &:hover {\n text-decoration: none;\n }\n }\n }\n .award {\n color: var(--global-theme-color) !important;\n border: 1px solid var(--global-theme-color);\n }\n }\n .title {\n font-weight: bolder;\n }\n .author {\n a {\n border-bottom: 1px dashed var(--global-theme-color);\n &:hover {\n border-bottom-style: solid;\n text-decoration: none;\n }\n }\n > em {\n border-bottom: 1px solid;\n font-style: normal;\n }\n > span.more-authors {\n color: var(--global-text-color-light);\n border-bottom: 1px dashed var(--global-text-color-light);\n cursor: pointer;\n &:hover {\n color: var(--global-text-color);\n border-bottom: 1px dashed var(--global-text-color);\n }\n }\n }\n .links {\n a.btn {\n color: var(--global-text-color);\n border: 1px solid var(--global-text-color);\n padding-left: 1rem;\n padding-right: 1rem;\n padding-top: 0.25rem;\n padding-bottom: 0.25rem;\n &:hover {\n color: var(--global-theme-color);\n border-color: var(--global-theme-color);\n }\n }\n }\n .hidden {\n font-size: 0.875rem;\n max-height: 0px;\n overflow: hidden;\n text-align: justify;\n transition-property: 0.15s ease;\n -moz-transition: 0.15s ease;\n -ms-transition: 0.15s ease;\n -o-transition: 0.15s ease;\n transition: all 0.15s ease;\n\n p {\n line-height: 1.4em;\n margin: 10px;\n }\n pre {\n font-size: 1em;\n line-height: 1.4em;\n padding: 10px;\n }\n }\n .hidden.open {\n max-height: 100em;\n transition-property: 0.15s ease;\n -moz-transition: 0.15s ease;\n -ms-transition: 0.15s ease;\n -o-transition: 0.15s ease;\n transition: all 0.15s ease;\n }\n div.abstract.hidden {\n border: dashed 1px var(--global-bg-color);\n }\n div.abstract.hidden.open {\n border-color: var(--global-text-color);\n }\n }\n }\n}\n\n// Rouge Color Customization\nfigure.highlight {\n margin: 0 0 1rem;\n}\n\npre {\n color: var(--global-theme-color);\n background-color: var(--global-code-bg-color);\n border-radius: 6px;\n padding: 6px 12px;\n pre, code {\n background-color: transparent;\n border-radius: 0;\n padding: 0;\n }\n}\n\ncode {\n color: var(--global-theme-color);\n background-color: var(--global-code-bg-color);\n border-radius: 3px;\n padding: 3px 3px;\n}\n\n\n// Transitioning Themes\nhtml.transition,\nhtml.transition *,\nhtml.transition *:before,\nhtml.transition *:after {\n transition: all 750ms !important;\n transition-delay: 0 !important;\n}\n\n// Extra Markdown style (post Customization)\n.post{\n .post-meta{\n color: var(--global-text-color-light);\n font-size: 0.875rem;\n margin-bottom: 0;\n }\n .post-tags{\n color: var(--global-text-color-light);\n font-size: 0.875rem;\n padding-top: 0.25rem;\n padding-bottom: 1rem;\n a {\n color: var(--global-text-color-light);\n text-decoration: none;\n &:hover {\n color: var(--global-theme-color);\n }\n }\n }\n .post-content{\n blockquote {\n border-left: 5px solid var(--global-theme-color);\n padding: 8px;\n }\n }\n}\n","/*******************************************************************************\n * Style overrides for distill blog posts.\n ******************************************************************************/\n\nd-byline {\n border-top-color: var(--global-divider-color) !important;\n}\n\nd-byline h3 {\n color: var(--global-text-color) !important;\n}\n\nd-byline a, d-article d-byline a {\n color: var(--global-text-color) !important;\n &:hover {\n color: var(--global-hover-color) !important;\n }\n}\n\nd-article {\n border-top-color: var(--global-divider-color) !important;\n a, p, h1, h2, h3, h4, h5, h6, li, table {\n color: var(--global-text-color) !important;\n }\n a, h1, h2, hr, table, table th, table td {\n border-bottom-color: var(--global-divider-color) !important;\n }\n a:hover {\n border-bottom-color: var(--global-hover-color) !important;\n }\n b i {\n display: inline;\n }\n\n d-contents {\n align-self: start;\n grid-column: 1 / 4;\n grid-row: auto / span 4;\n justify-self: end;\n margin-top: 0em;\n padding-left: 2em;\n padding-right: 3em;\n border-right: 1px solid var(--global-divider-color);\n width: calc(max(70%, 300px));\n margin-right: 0px;\n margin-top: 0em;\n display: grid;\n grid-template-columns:\n minmax(8px, 1fr) [toc] auto\n minmax(8px, 1fr) [toc-line] 1px\n minmax(32px, 2fr);\n\n nav {\n grid-column: toc;\n a {\n border-bottom: none !important;\n &:hover {\n border-bottom: 1px solid var(--global-text-color) !important;\n }\n }\n h3 {\n margin-top: 0;\n margin-bottom: 1em;\n }\n div {\n display: block;\n outline: none;\n margin-bottom: 0.8em;\n color: rgba(0, 0, 0, 0.8);\n font-weight: bold;\n }\n ul {\n padding-left: 1em;\n margin-top: 0;\n margin-bottom: 6px;\n list-style-type: none;\n li {\n margin-bottom: 0.25em;\n }\n }\n }\n .figcaption {\n line-height: 1.4em;\n }\n toc-line {\n border-right: 1px solid var(--global-divider-color);\n grid-column: toc-line;\n }\n }\n\n d-footnote {\n scroll-margin-top: 66px;\n }\n}\n\nd-appendix {\n border-top-color: var(--global-divider-color) !important;\n color: var(--global-distill-app-color) !important;\n h3, li, span {\n color: var(--global-distill-app-color) !important;\n }\n a, a.footnote-backlink {\n color: var(--global-distill-app-color) !important;\n &:hover {\n color: var(--global-hover-color) !important;\n }\n }\n}\n\n@media (max-width: 1024px) {\n d-article {\n d-contents {\n display: block;\n grid-column-start: 2;\n grid-column-end: -2;\n padding-bottom: 0.5em;\n margin-bottom: 1em;\n padding-top: 0.5em;\n width: 100%;\n border: 1px solid var(--global-divider-color);\n nav {\n grid-column: none;\n }\n }\n }\n}\n"],"file":"main.css"}
\ No newline at end of file
diff --git a/assets/css/main.scss b/assets/css/main.scss
deleted file mode 100644
index cd74526a..00000000
--- a/assets/css/main.scss
+++ /dev/null
@@ -1,15 +0,0 @@
----
-# Only the main Sass file needs front matter (the dashes are enough)
----
-@charset "utf-8";
-
-// Dimensions
-$max-content-width: {{ site.max_width }};
-
-@import
-  "variables",
-  "themes",
-  "layout",
-  "base",
-  "distill"
-;
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-1400.webp
new file mode 100644
index 00000000..d51d861a
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-480.webp
new file mode 100644
index 00000000..fd8f1b4c
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-800.webp
new file mode 100644
index 00000000..d51d861a
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-800.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-1400.webp
new file mode 100644
index 00000000..2031d977
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-480.webp
new file mode 100644
index 00000000..57b3cfc0
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-800.webp
new file mode 100644
index 00000000..2031d977
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-800.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-1400.webp
new file mode 100644
index 00000000..785df145
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-480.webp
new file mode 100644
index 00000000..eb76b483
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-800.webp
new file mode 100644
index 00000000..785df145
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-800.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-1400.webp
new file mode 100644
index 00000000..131b4b3b
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-480.webp
new file mode 100644
index 00000000..cc480259
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-800.webp
new file mode 100644
index 00000000..131b4b3b
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-800.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-1400.webp
new file mode 100644
index 00000000..b8c014f5
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-480.webp
new file mode 100644
index 00000000..3ba388b5
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-800.webp
new file mode 100644
index 00000000..b8c014f5
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-800.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-1400.webp
new file mode 100644
index 00000000..8a628cd7
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-480.webp
new file mode 100644
index 00000000..2554dc28
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-800.webp
new file mode 100644
index 00000000..8a628cd7
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-800.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-1400.webp
new file mode 100644
index 00000000..3d1a7a0a
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-480.webp
new file mode 100644
index 00000000..e6d076fb
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-800.webp
new file mode 100644
index 00000000..3d1a7a0a
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-800.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-1400.webp
new file mode 100644
index 00000000..2066a845
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-480.webp
new file mode 100644
index 00000000..3a2b1190
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-800.webp
new file mode 100644
index 00000000..2066a845
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-800.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-1400.webp
new file mode 100644
index 00000000..aa6e0168
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-480.webp
new file mode 100644
index 00000000..c5f4066f
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-800.webp
new file mode 100644
index 00000000..aa6e0168
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-800.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-1400.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-1400.webp
new file mode 100644
index 00000000..c642f6a2
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-1400.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-480.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-480.webp
new file mode 100644
index 00000000..a2190ae9
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-480.webp differ
diff --git a/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-800.webp b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-800.webp
new file mode 100644
index 00000000..c642f6a2
Binary files /dev/null and b/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-800.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-1400.webp b/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-1400.webp
new file mode 100644
index 00000000..01b5ea7d
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-1400.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-480.webp b/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-480.webp
new file mode 100644
index 00000000..0920bd35
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-480.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-800.webp b/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-800.webp
new file mode 100644
index 00000000..01b5ea7d
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-800.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-1400.webp b/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-1400.webp
new file mode 100644
index 00000000..7b05ef66
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-1400.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-480.webp b/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-480.webp
new file mode 100644
index 00000000..6d16cf93
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-480.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-800.webp b/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-800.webp
new file mode 100644
index 00000000..7b05ef66
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-800.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-1400.webp b/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-1400.webp
new file mode 100644
index 00000000..95e92423
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-1400.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-480.webp b/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-480.webp
new file mode 100644
index 00000000..46b7716a
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-480.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-800.webp b/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-800.webp
new file mode 100644
index 00000000..95e92423
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-800.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-1400.webp b/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-1400.webp
new file mode 100644
index 00000000..fcfe5e53
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-1400.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-480.webp b/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-480.webp
new file mode 100644
index 00000000..136fef3e
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-480.webp differ
diff --git a/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-800.webp b/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-800.webp
new file mode 100644
index 00000000..fcfe5e53
Binary files /dev/null and b/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-800.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-1400.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-1400.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-480.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-480.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-800.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-800.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-dark-1400.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-dark-1400.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-dark-1400.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-dark-480.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-dark-480.webp
new file mode 100644
index 00000000..7f0830c1
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-dark-480.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-dark-800.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-dark-800.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/ICLR-logo-dark-800.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-1400.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-1400.webp
new file mode 100644
index 00000000..35998f7f
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-1400.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-480.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-480.webp
new file mode 100644
index 00000000..97f447fb
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-480.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-800.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-800.webp
new file mode 100644
index 00000000..35998f7f
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-800.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-1400.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-1400.webp
new file mode 100644
index 00000000..819d0cc0
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-1400.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-480.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-480.webp
new file mode 100644
index 00000000..685feee0
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-480.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-800.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-800.webp
new file mode 100644
index 00000000..819d0cc0
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-800.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/agd-1400.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/agd-1400.webp
new file mode 100644
index 00000000..d59b9ed8
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/agd-1400.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/agd-480.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/agd-480.webp
new file mode 100644
index 00000000..a6411994
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/agd-480.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/agd-800.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/agd-800.webp
new file mode 100644
index 00000000..d59b9ed8
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/agd-800.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-1400.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-1400.webp
new file mode 100644
index 00000000..eaa28ac0
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-1400.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-480.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-480.webp
new file mode 100644
index 00000000..04c641c4
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-480.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-800.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-800.webp
new file mode 100644
index 00000000..eaa28ac0
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-800.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-1400.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-1400.webp
new file mode 100644
index 00000000..64c23d5b
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-1400.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-480.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-480.webp
new file mode 100644
index 00000000..5511cc0d
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-480.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-800.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-800.webp
new file mode 100644
index 00000000..64c23d5b
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-800.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/reg-1400.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/reg-1400.webp
new file mode 100644
index 00000000..c81a3c8e
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/reg-1400.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/reg-480.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/reg-480.webp
new file mode 100644
index 00000000..81e45ca8
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/reg-480.webp differ
diff --git a/assets/img/2023-11-01-Symmetry-Optimization-project/reg-800.webp b/assets/img/2023-11-01-Symmetry-Optimization-project/reg-800.webp
new file mode 100644
index 00000000..c81a3c8e
Binary files /dev/null and b/assets/img/2023-11-01-Symmetry-Optimization-project/reg-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-1400.webp b/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-1400.webp
new file mode 100644
index 00000000..d029713d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-480.webp b/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-480.webp
new file mode 100644
index 00000000..0d1100a5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-800.webp b/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-800.webp
new file mode 100644
index 00000000..d029713d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/clip_scores-1400.webp b/assets/img/2023-11-01-latent-interpolation/clip_scores-1400.webp
new file mode 100644
index 00000000..b02f3d8c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/clip_scores-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/clip_scores-480.webp b/assets/img/2023-11-01-latent-interpolation/clip_scores-480.webp
new file mode 100644
index 00000000..00e14326
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/clip_scores-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/clip_scores-800.webp b/assets/img/2023-11-01-latent-interpolation/clip_scores-800.webp
new file mode 100644
index 00000000..b02f3d8c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/clip_scores-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_000-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_000-1400.webp
new file mode 100644
index 00000000..9545facb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_000-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_000-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_000-480.webp
new file mode 100644
index 00000000..32e286ba
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_000-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_000-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_000-800.webp
new file mode 100644
index 00000000..9545facb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_000-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_001-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_001-1400.webp
new file mode 100644
index 00000000..05170cef
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_001-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_001-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_001-480.webp
new file mode 100644
index 00000000..abab0fe1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_001-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_001-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_001-800.webp
new file mode 100644
index 00000000..05170cef
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_001-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_002-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_002-1400.webp
new file mode 100644
index 00000000..55838a48
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_002-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_002-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_002-480.webp
new file mode 100644
index 00000000..f16dc6c1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_002-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_002-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_002-800.webp
new file mode 100644
index 00000000..55838a48
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_002-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_003-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_003-1400.webp
new file mode 100644
index 00000000..893e3729
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_003-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_003-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_003-480.webp
new file mode 100644
index 00000000..ce3df4ec
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_003-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_003-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_003-800.webp
new file mode 100644
index 00000000..893e3729
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_003-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_004-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_004-1400.webp
new file mode 100644
index 00000000..4329d68c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_004-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_004-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_004-480.webp
new file mode 100644
index 00000000..4dd16174
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_004-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_004-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_004-800.webp
new file mode 100644
index 00000000..4329d68c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_004-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_005-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_005-1400.webp
new file mode 100644
index 00000000..304fac6e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_005-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_005-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_005-480.webp
new file mode 100644
index 00000000..dc67b051
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_005-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_005-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_005-800.webp
new file mode 100644
index 00000000..304fac6e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_005-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_006-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_006-1400.webp
new file mode 100644
index 00000000..b9e44d92
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_006-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_006-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_006-480.webp
new file mode 100644
index 00000000..66b7b679
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_006-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_006-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_006-800.webp
new file mode 100644
index 00000000..b9e44d92
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_006-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_007-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_007-1400.webp
new file mode 100644
index 00000000..97a1feea
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_007-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_007-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_007-480.webp
new file mode 100644
index 00000000..30e1da1c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_007-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_007-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_007-800.webp
new file mode 100644
index 00000000..97a1feea
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_007-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_008-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_008-1400.webp
new file mode 100644
index 00000000..effecdcc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_008-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_008-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_008-480.webp
new file mode 100644
index 00000000..60a5da8a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_008-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_008-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_008-800.webp
new file mode 100644
index 00000000..effecdcc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_008-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_009-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_009-1400.webp
new file mode 100644
index 00000000..6c18680c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_009-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_009-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_009-480.webp
new file mode 100644
index 00000000..aeda9c86
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_009-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_009-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_009-800.webp
new file mode 100644
index 00000000..6c18680c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_009-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_010-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_010-1400.webp
new file mode 100644
index 00000000..b7711d03
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_010-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_010-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_010-480.webp
new file mode 100644
index 00000000..3548f9f5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_010-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_010-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_010-800.webp
new file mode 100644
index 00000000..b7711d03
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_010-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_011-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_011-1400.webp
new file mode 100644
index 00000000..d2fb997a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_011-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_011-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_011-480.webp
new file mode 100644
index 00000000..c9287b99
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_011-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_011-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_011-800.webp
new file mode 100644
index 00000000..d2fb997a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_011-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_012-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_012-1400.webp
new file mode 100644
index 00000000..6cabf612
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_012-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_012-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_012-480.webp
new file mode 100644
index 00000000..a8466213
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_012-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_012-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_012-800.webp
new file mode 100644
index 00000000..6cabf612
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_012-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_013-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_013-1400.webp
new file mode 100644
index 00000000..9bdd0a69
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_013-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_013-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_013-480.webp
new file mode 100644
index 00000000..381ca4ca
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_013-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_013-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_013-800.webp
new file mode 100644
index 00000000..9bdd0a69
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_013-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_014-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_014-1400.webp
new file mode 100644
index 00000000..e751dedd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_014-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_014-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_014-480.webp
new file mode 100644
index 00000000..e8b53e2b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_014-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_014-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_014-800.webp
new file mode 100644
index 00000000..e751dedd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_014-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_015-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_015-1400.webp
new file mode 100644
index 00000000..b92d05a7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_015-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_015-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_015-480.webp
new file mode 100644
index 00000000..edc71525
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_015-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_015-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_015-800.webp
new file mode 100644
index 00000000..b92d05a7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_015-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_016-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_016-1400.webp
new file mode 100644
index 00000000..d1a22ffa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_016-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_016-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_016-480.webp
new file mode 100644
index 00000000..34d413aa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_016-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_016-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_016-800.webp
new file mode 100644
index 00000000..d1a22ffa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_016-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_017-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_017-1400.webp
new file mode 100644
index 00000000..3f61ad44
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_017-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_017-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_017-480.webp
new file mode 100644
index 00000000..b0427730
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_017-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_017-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_017-800.webp
new file mode 100644
index 00000000..3f61ad44
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_017-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_018-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_018-1400.webp
new file mode 100644
index 00000000..fc36cf9d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_018-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_018-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_018-480.webp
new file mode 100644
index 00000000..73775f3c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_018-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_018-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_018-800.webp
new file mode 100644
index 00000000..fc36cf9d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_018-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_019-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_019-1400.webp
new file mode 100644
index 00000000..79503032
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_019-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_019-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_019-480.webp
new file mode 100644
index 00000000..52d6a856
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_019-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_019-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_019-800.webp
new file mode 100644
index 00000000..79503032
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_019-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_020-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_020-1400.webp
new file mode 100644
index 00000000..59125a84
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_020-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_020-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_020-480.webp
new file mode 100644
index 00000000..48fc113e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_020-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_020-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_020-800.webp
new file mode 100644
index 00000000..59125a84
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_020-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_021-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_021-1400.webp
new file mode 100644
index 00000000..65100e3a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_021-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_021-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_021-480.webp
new file mode 100644
index 00000000..bee00bce
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_021-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_021-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_021-800.webp
new file mode 100644
index 00000000..65100e3a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_021-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_022-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_022-1400.webp
new file mode 100644
index 00000000..993e0750
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_022-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_022-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_022-480.webp
new file mode 100644
index 00000000..93353414
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_022-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_022-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_022-800.webp
new file mode 100644
index 00000000..993e0750
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_022-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_023-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_023-1400.webp
new file mode 100644
index 00000000..07be1916
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_023-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_023-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_023-480.webp
new file mode 100644
index 00000000..5676a897
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_023-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_023-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_023-800.webp
new file mode 100644
index 00000000..07be1916
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_023-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_024-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_024-1400.webp
new file mode 100644
index 00000000..6f342ecb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_024-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_024-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_024-480.webp
new file mode 100644
index 00000000..5074856c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_024-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_024-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_024-800.webp
new file mode 100644
index 00000000..6f342ecb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_024-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_025-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_025-1400.webp
new file mode 100644
index 00000000..ea7e2d2a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_025-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_025-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_025-480.webp
new file mode 100644
index 00000000..13dbbec4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_025-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_025-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_025-800.webp
new file mode 100644
index 00000000..ea7e2d2a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_025-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_026-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_026-1400.webp
new file mode 100644
index 00000000..4fd53ef8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_026-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_026-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_026-480.webp
new file mode 100644
index 00000000..e023bef6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_026-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_026-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_026-800.webp
new file mode 100644
index 00000000..4fd53ef8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_026-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_027-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_027-1400.webp
new file mode 100644
index 00000000..3f97be9f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_027-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_027-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_027-480.webp
new file mode 100644
index 00000000..4431f386
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_027-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_027-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_027-800.webp
new file mode 100644
index 00000000..3f97be9f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_027-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_028-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_028-1400.webp
new file mode 100644
index 00000000..fd9c79c5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_028-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_028-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_028-480.webp
new file mode 100644
index 00000000..264bd693
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_028-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_028-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_028-800.webp
new file mode 100644
index 00000000..fd9c79c5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_028-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_029-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_029-1400.webp
new file mode 100644
index 00000000..8e6cddf7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_029-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_029-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_029-480.webp
new file mode 100644
index 00000000..eec4859d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_029-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_029-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_029-800.webp
new file mode 100644
index 00000000..8e6cddf7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_029-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_030-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_030-1400.webp
new file mode 100644
index 00000000..f599da32
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_030-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_030-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_030-480.webp
new file mode 100644
index 00000000..e67c127f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_030-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_030-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_030-800.webp
new file mode 100644
index 00000000..f599da32
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_030-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_031-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_031-1400.webp
new file mode 100644
index 00000000..08ea5753
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_031-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_031-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_031-480.webp
new file mode 100644
index 00000000..ff23ff4b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_031-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_031-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_031-800.webp
new file mode 100644
index 00000000..08ea5753
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_031-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_032-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_032-1400.webp
new file mode 100644
index 00000000..c89901bd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_032-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_032-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_032-480.webp
new file mode 100644
index 00000000..fde63930
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_032-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_032-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_032-800.webp
new file mode 100644
index 00000000..c89901bd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_032-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_033-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_033-1400.webp
new file mode 100644
index 00000000..0709ad6e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_033-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_033-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_033-480.webp
new file mode 100644
index 00000000..6d3fe1d1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_033-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_033-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_033-800.webp
new file mode 100644
index 00000000..0709ad6e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_033-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_034-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_034-1400.webp
new file mode 100644
index 00000000..d034fef1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_034-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_034-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_034-480.webp
new file mode 100644
index 00000000..33e5bcf7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_034-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_034-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_034-800.webp
new file mode 100644
index 00000000..d034fef1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_034-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_035-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_035-1400.webp
new file mode 100644
index 00000000..72f28e27
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_035-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_035-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_035-480.webp
new file mode 100644
index 00000000..88045f71
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_035-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_035-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_035-800.webp
new file mode 100644
index 00000000..72f28e27
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_035-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_036-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_036-1400.webp
new file mode 100644
index 00000000..03a79890
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_036-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_036-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_036-480.webp
new file mode 100644
index 00000000..7112f066
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_036-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_036-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_036-800.webp
new file mode 100644
index 00000000..03a79890
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_036-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_037-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_037-1400.webp
new file mode 100644
index 00000000..0a493184
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_037-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_037-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_037-480.webp
new file mode 100644
index 00000000..167d6c33
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_037-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_037-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_037-800.webp
new file mode 100644
index 00000000..0a493184
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_037-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_038-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_038-1400.webp
new file mode 100644
index 00000000..5f03570c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_038-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_038-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_038-480.webp
new file mode 100644
index 00000000..a0e03f9b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_038-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_038-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_038-800.webp
new file mode 100644
index 00000000..5f03570c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_038-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_039-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_039-1400.webp
new file mode 100644
index 00000000..6fd33ff6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_039-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_039-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_039-480.webp
new file mode 100644
index 00000000..14559c66
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_039-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_039-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_039-800.webp
new file mode 100644
index 00000000..6fd33ff6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_039-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_040-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_040-1400.webp
new file mode 100644
index 00000000..92df46a5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_040-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_040-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_040-480.webp
new file mode 100644
index 00000000..ec6c1691
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_040-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_040-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_040-800.webp
new file mode 100644
index 00000000..92df46a5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_040-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_041-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_041-1400.webp
new file mode 100644
index 00000000..337cb07a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_041-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_041-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_041-480.webp
new file mode 100644
index 00000000..f9e80512
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_041-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_041-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_041-800.webp
new file mode 100644
index 00000000..337cb07a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_041-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_042-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_042-1400.webp
new file mode 100644
index 00000000..241e2e4d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_042-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_042-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_042-480.webp
new file mode 100644
index 00000000..bc2b4fbb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_042-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_042-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_042-800.webp
new file mode 100644
index 00000000..241e2e4d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_042-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_043-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_043-1400.webp
new file mode 100644
index 00000000..a92e0d67
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_043-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_043-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_043-480.webp
new file mode 100644
index 00000000..ac9fc00a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_043-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_043-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_043-800.webp
new file mode 100644
index 00000000..a92e0d67
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_043-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_044-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_044-1400.webp
new file mode 100644
index 00000000..ab97e68f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_044-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_044-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_044-480.webp
new file mode 100644
index 00000000..976369a1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_044-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_044-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_044-800.webp
new file mode 100644
index 00000000..ab97e68f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_044-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_045-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_045-1400.webp
new file mode 100644
index 00000000..bc647077
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_045-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_045-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_045-480.webp
new file mode 100644
index 00000000..d8d20c42
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_045-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_045-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_045-800.webp
new file mode 100644
index 00000000..bc647077
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_045-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_046-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_046-1400.webp
new file mode 100644
index 00000000..d28c97a0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_046-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_046-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_046-480.webp
new file mode 100644
index 00000000..ff268af3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_046-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_046-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_046-800.webp
new file mode 100644
index 00000000..d28c97a0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_046-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_047-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_047-1400.webp
new file mode 100644
index 00000000..732287ac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_047-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_047-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_047-480.webp
new file mode 100644
index 00000000..ccf97a30
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_047-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_047-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_047-800.webp
new file mode 100644
index 00000000..732287ac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_047-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_048-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_048-1400.webp
new file mode 100644
index 00000000..0624162a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_048-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_048-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_048-480.webp
new file mode 100644
index 00000000..3fe56805
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_048-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_048-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_048-800.webp
new file mode 100644
index 00000000..0624162a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_048-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_049-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_049-1400.webp
new file mode 100644
index 00000000..40fd493f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_049-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_049-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_049-480.webp
new file mode 100644
index 00000000..a935885f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_049-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_049-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_049-800.webp
new file mode 100644
index 00000000..40fd493f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_049-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_050-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_050-1400.webp
new file mode 100644
index 00000000..9a5985b5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_050-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_050-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_050-480.webp
new file mode 100644
index 00000000..ac8f766d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_050-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_050-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_050-800.webp
new file mode 100644
index 00000000..9a5985b5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_050-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_051-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_051-1400.webp
new file mode 100644
index 00000000..758eaf12
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_051-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_051-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_051-480.webp
new file mode 100644
index 00000000..9018fe3c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_051-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_051-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_051-800.webp
new file mode 100644
index 00000000..758eaf12
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_051-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_052-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_052-1400.webp
new file mode 100644
index 00000000..29cad667
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_052-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_052-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_052-480.webp
new file mode 100644
index 00000000..c3b1b194
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_052-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_052-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_052-800.webp
new file mode 100644
index 00000000..29cad667
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_052-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_053-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_053-1400.webp
new file mode 100644
index 00000000..f0dd2400
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_053-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_053-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_053-480.webp
new file mode 100644
index 00000000..4f125063
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_053-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_053-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_053-800.webp
new file mode 100644
index 00000000..f0dd2400
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_053-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_054-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_054-1400.webp
new file mode 100644
index 00000000..494333b8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_054-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_054-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_054-480.webp
new file mode 100644
index 00000000..df261260
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_054-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_054-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_054-800.webp
new file mode 100644
index 00000000..494333b8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_054-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_055-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_055-1400.webp
new file mode 100644
index 00000000..da51cd33
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_055-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_055-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_055-480.webp
new file mode 100644
index 00000000..b48bab3f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_055-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_055-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_055-800.webp
new file mode 100644
index 00000000..da51cd33
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_055-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_056-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_056-1400.webp
new file mode 100644
index 00000000..1c70620b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_056-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_056-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_056-480.webp
new file mode 100644
index 00000000..da1547c2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_056-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_056-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_056-800.webp
new file mode 100644
index 00000000..1c70620b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_056-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_057-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_057-1400.webp
new file mode 100644
index 00000000..8ebb9d08
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_057-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_057-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_057-480.webp
new file mode 100644
index 00000000..e10294d4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_057-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_057-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_057-800.webp
new file mode 100644
index 00000000..8ebb9d08
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_057-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_058-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_058-1400.webp
new file mode 100644
index 00000000..9bd92cb5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_058-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_058-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_058-480.webp
new file mode 100644
index 00000000..c0457673
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_058-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_058-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_058-800.webp
new file mode 100644
index 00000000..9bd92cb5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_058-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_059-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_059-1400.webp
new file mode 100644
index 00000000..f24fe2fb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_059-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_059-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_059-480.webp
new file mode 100644
index 00000000..ff4c18bf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_059-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_059-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_059-800.webp
new file mode 100644
index 00000000..f24fe2fb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_059-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_060-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_060-1400.webp
new file mode 100644
index 00000000..936bd486
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_060-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_060-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_060-480.webp
new file mode 100644
index 00000000..f740d32a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_060-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_060-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_060-800.webp
new file mode 100644
index 00000000..936bd486
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_060-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_061-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_061-1400.webp
new file mode 100644
index 00000000..4282341e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_061-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_061-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_061-480.webp
new file mode 100644
index 00000000..94ccad09
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_061-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_061-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_061-800.webp
new file mode 100644
index 00000000..4282341e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_061-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_062-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_062-1400.webp
new file mode 100644
index 00000000..65efa57a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_062-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_062-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_062-480.webp
new file mode 100644
index 00000000..0796223a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_062-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_062-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_062-800.webp
new file mode 100644
index 00000000..65efa57a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_062-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_063-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_063-1400.webp
new file mode 100644
index 00000000..21ddd786
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_063-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_063-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_063-480.webp
new file mode 100644
index 00000000..79bf1197
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_063-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_063-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_063-800.webp
new file mode 100644
index 00000000..21ddd786
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_063-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_064-1400.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_064-1400.webp
new file mode 100644
index 00000000..7ad69adf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_064-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_064-480.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_064-480.webp
new file mode 100644
index 00000000..42debd61
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_064-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_064-800.webp b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_064-800.webp
new file mode 100644
index 00000000..7ad69adf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/corrected_translation/img_064-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/endpoint_images-1400.webp b/assets/img/2023-11-01-latent-interpolation/endpoint_images-1400.webp
new file mode 100644
index 00000000..3ea350a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/endpoint_images-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/endpoint_images-480.webp b/assets/img/2023-11-01-latent-interpolation/endpoint_images-480.webp
new file mode 100644
index 00000000..e587256e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/endpoint_images-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/endpoint_images-800.webp b/assets/img/2023-11-01-latent-interpolation/endpoint_images-800.webp
new file mode 100644
index 00000000..3ea350a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/endpoint_images-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/extreme_case-1400.webp b/assets/img/2023-11-01-latent-interpolation/extreme_case-1400.webp
new file mode 100644
index 00000000..bd1eb223
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/extreme_case-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/extreme_case-480.webp b/assets/img/2023-11-01-latent-interpolation/extreme_case-480.webp
new file mode 100644
index 00000000..17574edd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/extreme_case-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/extreme_case-800.webp b/assets/img/2023-11-01-latent-interpolation/extreme_case-800.webp
new file mode 100644
index 00000000..bd1eb223
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/extreme_case-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-1400.webp b/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-1400.webp
new file mode 100644
index 00000000..4d871fee
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-480.webp b/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-480.webp
new file mode 100644
index 00000000..68109390
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-800.webp b/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-800.webp
new file mode 100644
index 00000000..4d871fee
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/latent_dim-1400.webp b/assets/img/2023-11-01-latent-interpolation/latent_dim-1400.webp
new file mode 100644
index 00000000..7b4d0edd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/latent_dim-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/latent_dim-480.webp b/assets/img/2023-11-01-latent-interpolation/latent_dim-480.webp
new file mode 100644
index 00000000..1dedcfc8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/latent_dim-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/latent_dim-800.webp b/assets/img/2023-11-01-latent-interpolation/latent_dim-800.webp
new file mode 100644
index 00000000..7b4d0edd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/latent_dim-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/latent_interpolation-1400.webp b/assets/img/2023-11-01-latent-interpolation/latent_interpolation-1400.webp
new file mode 100644
index 00000000..87531582
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/latent_interpolation-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/latent_interpolation-480.webp b/assets/img/2023-11-01-latent-interpolation/latent_interpolation-480.webp
new file mode 100644
index 00000000..07246b8f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/latent_interpolation-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/latent_interpolation-800.webp b/assets/img/2023-11-01-latent-interpolation/latent_interpolation-800.webp
new file mode 100644
index 00000000..87531582
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/latent_interpolation-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/linear_interpolation-1400.webp b/assets/img/2023-11-01-latent-interpolation/linear_interpolation-1400.webp
new file mode 100644
index 00000000..760a103a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/linear_interpolation-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/linear_interpolation-480.webp b/assets/img/2023-11-01-latent-interpolation/linear_interpolation-480.webp
new file mode 100644
index 00000000..5f985e63
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/linear_interpolation-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/linear_interpolation-800.webp b/assets/img/2023-11-01-latent-interpolation/linear_interpolation-800.webp
new file mode 100644
index 00000000..760a103a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/linear_interpolation-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-1400.webp b/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-1400.webp
new file mode 100644
index 00000000..9b77bfbd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-480.webp b/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-480.webp
new file mode 100644
index 00000000..1d681313
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-800.webp b/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-800.webp
new file mode 100644
index 00000000..9b77bfbd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-1400.webp b/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-1400.webp
new file mode 100644
index 00000000..47a1c1bf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-480.webp b/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-480.webp
new file mode 100644
index 00000000..5a0c6757
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-800.webp b/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-800.webp
new file mode 100644
index 00000000..47a1c1bf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/text_interpolation-1400.webp b/assets/img/2023-11-01-latent-interpolation/text_interpolation-1400.webp
new file mode 100644
index 00000000..67a3babb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/text_interpolation-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/text_interpolation-480.webp b/assets/img/2023-11-01-latent-interpolation/text_interpolation-480.webp
new file mode 100644
index 00000000..4433cab0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/text_interpolation-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/text_interpolation-800.webp b/assets/img/2023-11-01-latent-interpolation/text_interpolation-800.webp
new file mode 100644
index 00000000..67a3babb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/text_interpolation-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_000-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_000-1400.webp
new file mode 100644
index 00000000..2e4c192c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_000-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_000-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_000-480.webp
new file mode 100644
index 00000000..bd558a63
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_000-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_000-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_000-800.webp
new file mode 100644
index 00000000..2e4c192c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_000-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_001-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_001-1400.webp
new file mode 100644
index 00000000..cf9b3c13
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_001-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_001-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_001-480.webp
new file mode 100644
index 00000000..e64ed093
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_001-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_001-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_001-800.webp
new file mode 100644
index 00000000..cf9b3c13
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_001-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_002-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_002-1400.webp
new file mode 100644
index 00000000..dd6e25a9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_002-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_002-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_002-480.webp
new file mode 100644
index 00000000..ac857ba1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_002-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_002-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_002-800.webp
new file mode 100644
index 00000000..dd6e25a9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_002-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_003-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_003-1400.webp
new file mode 100644
index 00000000..e759705c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_003-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_003-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_003-480.webp
new file mode 100644
index 00000000..09cf4479
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_003-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_003-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_003-800.webp
new file mode 100644
index 00000000..e759705c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_003-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_004-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_004-1400.webp
new file mode 100644
index 00000000..5976faae
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_004-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_004-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_004-480.webp
new file mode 100644
index 00000000..15d97a38
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_004-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_004-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_004-800.webp
new file mode 100644
index 00000000..5976faae
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_004-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_005-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_005-1400.webp
new file mode 100644
index 00000000..42d2006f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_005-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_005-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_005-480.webp
new file mode 100644
index 00000000..59b2e8c2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_005-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_005-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_005-800.webp
new file mode 100644
index 00000000..42d2006f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_005-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_006-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_006-1400.webp
new file mode 100644
index 00000000..84f89fbc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_006-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_006-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_006-480.webp
new file mode 100644
index 00000000..14628a7c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_006-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_006-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_006-800.webp
new file mode 100644
index 00000000..84f89fbc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_006-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_007-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_007-1400.webp
new file mode 100644
index 00000000..86cfbf49
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_007-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_007-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_007-480.webp
new file mode 100644
index 00000000..4d0bf10b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_007-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_007-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_007-800.webp
new file mode 100644
index 00000000..86cfbf49
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_007-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_008-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_008-1400.webp
new file mode 100644
index 00000000..1f95b0ee
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_008-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_008-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_008-480.webp
new file mode 100644
index 00000000..10b5ff4a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_008-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_008-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_008-800.webp
new file mode 100644
index 00000000..1f95b0ee
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_008-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_009-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_009-1400.webp
new file mode 100644
index 00000000..6e587efe
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_009-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_009-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_009-480.webp
new file mode 100644
index 00000000..e2ec32f9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_009-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_009-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_009-800.webp
new file mode 100644
index 00000000..6e587efe
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_009-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_010-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_010-1400.webp
new file mode 100644
index 00000000..3c303651
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_010-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_010-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_010-480.webp
new file mode 100644
index 00000000..d936b134
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_010-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_010-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_010-800.webp
new file mode 100644
index 00000000..3c303651
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_010-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_011-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_011-1400.webp
new file mode 100644
index 00000000..4d8b9726
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_011-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_011-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_011-480.webp
new file mode 100644
index 00000000..7a022fc2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_011-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_011-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_011-800.webp
new file mode 100644
index 00000000..4d8b9726
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_011-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_012-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_012-1400.webp
new file mode 100644
index 00000000..9d68b947
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_012-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_012-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_012-480.webp
new file mode 100644
index 00000000..b457280f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_012-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_012-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_012-800.webp
new file mode 100644
index 00000000..9d68b947
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_012-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_013-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_013-1400.webp
new file mode 100644
index 00000000..37bdbffb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_013-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_013-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_013-480.webp
new file mode 100644
index 00000000..7b3afeab
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_013-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_013-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_013-800.webp
new file mode 100644
index 00000000..37bdbffb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_013-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_014-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_014-1400.webp
new file mode 100644
index 00000000..1658cbde
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_014-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_014-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_014-480.webp
new file mode 100644
index 00000000..9e09542f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_014-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_014-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_014-800.webp
new file mode 100644
index 00000000..1658cbde
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_014-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_015-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_015-1400.webp
new file mode 100644
index 00000000..010efe7a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_015-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_015-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_015-480.webp
new file mode 100644
index 00000000..b11f6a4d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_015-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_015-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_015-800.webp
new file mode 100644
index 00000000..010efe7a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_015-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_016-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_016-1400.webp
new file mode 100644
index 00000000..8ae31e1a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_016-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_016-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_016-480.webp
new file mode 100644
index 00000000..8572026e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_016-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_016-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_016-800.webp
new file mode 100644
index 00000000..8ae31e1a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_016-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_017-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_017-1400.webp
new file mode 100644
index 00000000..0fb26b70
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_017-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_017-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_017-480.webp
new file mode 100644
index 00000000..9ce4798c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_017-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_017-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_017-800.webp
new file mode 100644
index 00000000..0fb26b70
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_017-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_018-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_018-1400.webp
new file mode 100644
index 00000000..16f46fd5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_018-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_018-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_018-480.webp
new file mode 100644
index 00000000..33d7be5e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_018-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_018-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_018-800.webp
new file mode 100644
index 00000000..16f46fd5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_018-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_019-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_019-1400.webp
new file mode 100644
index 00000000..002710c8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_019-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_019-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_019-480.webp
new file mode 100644
index 00000000..7c79b9dd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_019-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_019-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_019-800.webp
new file mode 100644
index 00000000..002710c8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_019-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_020-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_020-1400.webp
new file mode 100644
index 00000000..7ae45a95
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_020-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_020-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_020-480.webp
new file mode 100644
index 00000000..2e42a836
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_020-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_020-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_020-800.webp
new file mode 100644
index 00000000..7ae45a95
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_020-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_021-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_021-1400.webp
new file mode 100644
index 00000000..f5e91153
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_021-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_021-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_021-480.webp
new file mode 100644
index 00000000..60936fe9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_021-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_021-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_021-800.webp
new file mode 100644
index 00000000..f5e91153
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_021-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_022-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_022-1400.webp
new file mode 100644
index 00000000..f29cc1a7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_022-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_022-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_022-480.webp
new file mode 100644
index 00000000..b998d188
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_022-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_022-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_022-800.webp
new file mode 100644
index 00000000..f29cc1a7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_022-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_023-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_023-1400.webp
new file mode 100644
index 00000000..92a51ae0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_023-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_023-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_023-480.webp
new file mode 100644
index 00000000..1d7b9c6b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_023-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_023-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_023-800.webp
new file mode 100644
index 00000000..92a51ae0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_023-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_024-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_024-1400.webp
new file mode 100644
index 00000000..c4659620
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_024-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_024-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_024-480.webp
new file mode 100644
index 00000000..044978b5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_024-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_024-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_024-800.webp
new file mode 100644
index 00000000..c4659620
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_024-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_025-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_025-1400.webp
new file mode 100644
index 00000000..0df19181
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_025-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_025-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_025-480.webp
new file mode 100644
index 00000000..57a32bd5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_025-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_025-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_025-800.webp
new file mode 100644
index 00000000..0df19181
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_025-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_026-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_026-1400.webp
new file mode 100644
index 00000000..2f537321
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_026-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_026-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_026-480.webp
new file mode 100644
index 00000000..779361d6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_026-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_026-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_026-800.webp
new file mode 100644
index 00000000..2f537321
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_026-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_027-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_027-1400.webp
new file mode 100644
index 00000000..cb9dc66f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_027-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_027-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_027-480.webp
new file mode 100644
index 00000000..aa47de95
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_027-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_027-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_027-800.webp
new file mode 100644
index 00000000..cb9dc66f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_027-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_028-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_028-1400.webp
new file mode 100644
index 00000000..3f365aab
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_028-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_028-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_028-480.webp
new file mode 100644
index 00000000..4d03a145
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_028-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_028-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_028-800.webp
new file mode 100644
index 00000000..3f365aab
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_028-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_029-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_029-1400.webp
new file mode 100644
index 00000000..6a2527a5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_029-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_029-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_029-480.webp
new file mode 100644
index 00000000..aac137d1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_029-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_029-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_029-800.webp
new file mode 100644
index 00000000..6a2527a5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_029-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_030-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_030-1400.webp
new file mode 100644
index 00000000..3d0e78d3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_030-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_030-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_030-480.webp
new file mode 100644
index 00000000..31948abf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_030-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_030-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_030-800.webp
new file mode 100644
index 00000000..3d0e78d3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_030-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_031-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_031-1400.webp
new file mode 100644
index 00000000..8feb6d67
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_031-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_031-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_031-480.webp
new file mode 100644
index 00000000..b770f3d5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_031-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_031-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_031-800.webp
new file mode 100644
index 00000000..8feb6d67
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_031-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_032-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_032-1400.webp
new file mode 100644
index 00000000..fefb13c6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_032-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_032-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_032-480.webp
new file mode 100644
index 00000000..75313792
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_032-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_032-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_032-800.webp
new file mode 100644
index 00000000..fefb13c6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_032-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_033-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_033-1400.webp
new file mode 100644
index 00000000..9995f3c2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_033-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_033-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_033-480.webp
new file mode 100644
index 00000000..4c714690
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_033-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_033-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_033-800.webp
new file mode 100644
index 00000000..9995f3c2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_033-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_034-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_034-1400.webp
new file mode 100644
index 00000000..9b2b0d37
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_034-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_034-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_034-480.webp
new file mode 100644
index 00000000..4620359a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_034-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_034-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_034-800.webp
new file mode 100644
index 00000000..9b2b0d37
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_034-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_035-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_035-1400.webp
new file mode 100644
index 00000000..6785a28c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_035-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_035-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_035-480.webp
new file mode 100644
index 00000000..d03af64d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_035-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_035-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_035-800.webp
new file mode 100644
index 00000000..6785a28c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_035-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_036-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_036-1400.webp
new file mode 100644
index 00000000..f5d1ba5c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_036-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_036-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_036-480.webp
new file mode 100644
index 00000000..34112751
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_036-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_036-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_036-800.webp
new file mode 100644
index 00000000..f5d1ba5c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_036-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_037-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_037-1400.webp
new file mode 100644
index 00000000..96ed6aa1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_037-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_037-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_037-480.webp
new file mode 100644
index 00000000..fff8c45c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_037-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_037-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_037-800.webp
new file mode 100644
index 00000000..96ed6aa1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_037-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_038-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_038-1400.webp
new file mode 100644
index 00000000..5d641009
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_038-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_038-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_038-480.webp
new file mode 100644
index 00000000..b62f8e8a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_038-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_038-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_038-800.webp
new file mode 100644
index 00000000..5d641009
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_038-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_039-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_039-1400.webp
new file mode 100644
index 00000000..61533f26
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_039-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_039-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_039-480.webp
new file mode 100644
index 00000000..961d64d5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_039-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_039-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_039-800.webp
new file mode 100644
index 00000000..61533f26
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_039-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_040-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_040-1400.webp
new file mode 100644
index 00000000..3cf71e69
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_040-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_040-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_040-480.webp
new file mode 100644
index 00000000..b9d3a756
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_040-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_040-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_040-800.webp
new file mode 100644
index 00000000..3cf71e69
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_040-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_041-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_041-1400.webp
new file mode 100644
index 00000000..c80863b5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_041-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_041-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_041-480.webp
new file mode 100644
index 00000000..addfc21a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_041-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_041-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_041-800.webp
new file mode 100644
index 00000000..c80863b5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_041-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_042-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_042-1400.webp
new file mode 100644
index 00000000..a2fd56fb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_042-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_042-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_042-480.webp
new file mode 100644
index 00000000..71e16517
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_042-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_042-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_042-800.webp
new file mode 100644
index 00000000..a2fd56fb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_042-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_043-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_043-1400.webp
new file mode 100644
index 00000000..022c200a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_043-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_043-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_043-480.webp
new file mode 100644
index 00000000..f9262a71
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_043-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_043-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_043-800.webp
new file mode 100644
index 00000000..022c200a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_043-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_044-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_044-1400.webp
new file mode 100644
index 00000000..221b7cfd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_044-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_044-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_044-480.webp
new file mode 100644
index 00000000..99a4eb42
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_044-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_044-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_044-800.webp
new file mode 100644
index 00000000..221b7cfd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_044-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_045-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_045-1400.webp
new file mode 100644
index 00000000..d1269549
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_045-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_045-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_045-480.webp
new file mode 100644
index 00000000..05daaf11
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_045-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_045-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_045-800.webp
new file mode 100644
index 00000000..d1269549
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_045-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_046-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_046-1400.webp
new file mode 100644
index 00000000..e95f46c2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_046-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_046-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_046-480.webp
new file mode 100644
index 00000000..2e1a8e01
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_046-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_046-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_046-800.webp
new file mode 100644
index 00000000..e95f46c2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_046-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_047-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_047-1400.webp
new file mode 100644
index 00000000..98ab7a7f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_047-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_047-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_047-480.webp
new file mode 100644
index 00000000..b1b732ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_047-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_047-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_047-800.webp
new file mode 100644
index 00000000..98ab7a7f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_047-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_048-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_048-1400.webp
new file mode 100644
index 00000000..8495d35f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_048-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_048-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_048-480.webp
new file mode 100644
index 00000000..3dc7b771
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_048-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_048-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_048-800.webp
new file mode 100644
index 00000000..8495d35f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_048-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_049-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_049-1400.webp
new file mode 100644
index 00000000..f5744a6e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_049-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_049-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_049-480.webp
new file mode 100644
index 00000000..4e52a8d6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_049-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_049-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_049-800.webp
new file mode 100644
index 00000000..f5744a6e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_049-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_050-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_050-1400.webp
new file mode 100644
index 00000000..f4ea829c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_050-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_050-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_050-480.webp
new file mode 100644
index 00000000..c9ce348a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_050-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_050-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_050-800.webp
new file mode 100644
index 00000000..f4ea829c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_050-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_051-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_051-1400.webp
new file mode 100644
index 00000000..f7d7f693
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_051-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_051-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_051-480.webp
new file mode 100644
index 00000000..5749b3c2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_051-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_051-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_051-800.webp
new file mode 100644
index 00000000..f7d7f693
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_051-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_052-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_052-1400.webp
new file mode 100644
index 00000000..a6c96462
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_052-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_052-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_052-480.webp
new file mode 100644
index 00000000..f6bc8561
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_052-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_052-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_052-800.webp
new file mode 100644
index 00000000..a6c96462
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_052-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_053-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_053-1400.webp
new file mode 100644
index 00000000..9c1be2ec
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_053-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_053-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_053-480.webp
new file mode 100644
index 00000000..5417b99a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_053-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_053-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_053-800.webp
new file mode 100644
index 00000000..9c1be2ec
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_053-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_054-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_054-1400.webp
new file mode 100644
index 00000000..de3bf722
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_054-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_054-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_054-480.webp
new file mode 100644
index 00000000..0b9222fe
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_054-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_054-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_054-800.webp
new file mode 100644
index 00000000..de3bf722
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_054-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_055-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_055-1400.webp
new file mode 100644
index 00000000..2284ac7c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_055-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_055-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_055-480.webp
new file mode 100644
index 00000000..7543f8f4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_055-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_055-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_055-800.webp
new file mode 100644
index 00000000..2284ac7c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_055-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_056-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_056-1400.webp
new file mode 100644
index 00000000..c42b3da5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_056-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_056-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_056-480.webp
new file mode 100644
index 00000000..b80797b8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_056-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_056-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_056-800.webp
new file mode 100644
index 00000000..c42b3da5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_056-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_057-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_057-1400.webp
new file mode 100644
index 00000000..a5b10c50
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_057-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_057-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_057-480.webp
new file mode 100644
index 00000000..533583c8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_057-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_057-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_057-800.webp
new file mode 100644
index 00000000..a5b10c50
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_057-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_058-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_058-1400.webp
new file mode 100644
index 00000000..7dfc9d70
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_058-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_058-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_058-480.webp
new file mode 100644
index 00000000..9af2afa7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_058-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_058-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_058-800.webp
new file mode 100644
index 00000000..7dfc9d70
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_058-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_059-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_059-1400.webp
new file mode 100644
index 00000000..7fc0fcfe
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_059-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_059-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_059-480.webp
new file mode 100644
index 00000000..136b95d6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_059-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_059-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_059-800.webp
new file mode 100644
index 00000000..7fc0fcfe
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_059-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_060-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_060-1400.webp
new file mode 100644
index 00000000..7755a7e7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_060-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_060-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_060-480.webp
new file mode 100644
index 00000000..217ee9d7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_060-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_060-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_060-800.webp
new file mode 100644
index 00000000..7755a7e7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_060-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_061-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_061-1400.webp
new file mode 100644
index 00000000..8b70c654
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_061-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_061-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_061-480.webp
new file mode 100644
index 00000000..e94d6478
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_061-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_061-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_061-800.webp
new file mode 100644
index 00000000..8b70c654
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_061-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_062-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_062-1400.webp
new file mode 100644
index 00000000..870e4bd5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_062-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_062-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_062-480.webp
new file mode 100644
index 00000000..4b135302
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_062-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_062-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_062-800.webp
new file mode 100644
index 00000000..870e4bd5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_062-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_063-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_063-1400.webp
new file mode 100644
index 00000000..2194d79b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_063-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_063-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_063-480.webp
new file mode 100644
index 00000000..bc17149c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_063-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_063-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_063-800.webp
new file mode 100644
index 00000000..2194d79b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_063-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_064-1400.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_064-1400.webp
new file mode 100644
index 00000000..cc46966a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_064-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_064-480.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_064-480.webp
new file mode 100644
index 00000000..e2e12319
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_064-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/translation/img_064-800.webp b/assets/img/2023-11-01-latent-interpolation/translation/img_064-800.webp
new file mode 100644
index 00000000..cc46966a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/translation/img_064-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0000-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0000-1400.webp
new file mode 100644
index 00000000..85f9d643
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0000-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0000-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0000-480.webp
new file mode 100644
index 00000000..d2e7793a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0000-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0000-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0000-800.webp
new file mode 100644
index 00000000..85f9d643
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0000-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0001-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0001-1400.webp
new file mode 100644
index 00000000..72e559e4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0001-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0001-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0001-480.webp
new file mode 100644
index 00000000..bdb62536
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0001-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0001-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0001-800.webp
new file mode 100644
index 00000000..72e559e4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0001-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0002-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0002-1400.webp
new file mode 100644
index 00000000..de00e2be
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0002-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0002-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0002-480.webp
new file mode 100644
index 00000000..3769a680
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0002-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0002-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0002-800.webp
new file mode 100644
index 00000000..de00e2be
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0002-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0003-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0003-1400.webp
new file mode 100644
index 00000000..5b08c1ab
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0003-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0003-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0003-480.webp
new file mode 100644
index 00000000..0f1dd83c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0003-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0003-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0003-800.webp
new file mode 100644
index 00000000..5b08c1ab
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0003-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0004-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0004-1400.webp
new file mode 100644
index 00000000..cb965b27
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0004-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0004-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0004-480.webp
new file mode 100644
index 00000000..b3cd40e7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0004-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0004-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0004-800.webp
new file mode 100644
index 00000000..cb965b27
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0004-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0005-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0005-1400.webp
new file mode 100644
index 00000000..02ca3158
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0005-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0005-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0005-480.webp
new file mode 100644
index 00000000..e2300c1b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0005-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0005-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0005-800.webp
new file mode 100644
index 00000000..02ca3158
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0005-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0006-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0006-1400.webp
new file mode 100644
index 00000000..55597ec1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0006-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0006-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0006-480.webp
new file mode 100644
index 00000000..a0640af0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0006-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0006-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0006-800.webp
new file mode 100644
index 00000000..55597ec1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0006-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0007-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0007-1400.webp
new file mode 100644
index 00000000..8617109e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0007-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0007-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0007-480.webp
new file mode 100644
index 00000000..fff5592f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0007-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0007-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0007-800.webp
new file mode 100644
index 00000000..8617109e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0007-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0008-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0008-1400.webp
new file mode 100644
index 00000000..3644077f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0008-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0008-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0008-480.webp
new file mode 100644
index 00000000..077159d5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0008-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0008-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0008-800.webp
new file mode 100644
index 00000000..3644077f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0008-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0009-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0009-1400.webp
new file mode 100644
index 00000000..40618926
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0009-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0009-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0009-480.webp
new file mode 100644
index 00000000..dc0ce42f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0009-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0009-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0009-800.webp
new file mode 100644
index 00000000..40618926
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0009-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0010-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0010-1400.webp
new file mode 100644
index 00000000..34acbc29
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0010-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0010-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0010-480.webp
new file mode 100644
index 00000000..669f1879
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0010-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0010-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0010-800.webp
new file mode 100644
index 00000000..34acbc29
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0010-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0011-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0011-1400.webp
new file mode 100644
index 00000000..26e73b02
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0011-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0011-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0011-480.webp
new file mode 100644
index 00000000..64f944d9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0011-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0011-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0011-800.webp
new file mode 100644
index 00000000..26e73b02
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0011-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0012-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0012-1400.webp
new file mode 100644
index 00000000..6d167005
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0012-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0012-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0012-480.webp
new file mode 100644
index 00000000..255a278d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0012-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0012-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0012-800.webp
new file mode 100644
index 00000000..6d167005
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0012-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0013-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0013-1400.webp
new file mode 100644
index 00000000..9497f42e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0013-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0013-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0013-480.webp
new file mode 100644
index 00000000..b93c16ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0013-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0013-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0013-800.webp
new file mode 100644
index 00000000..9497f42e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0013-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0014-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0014-1400.webp
new file mode 100644
index 00000000..190ff30f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0014-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0014-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0014-480.webp
new file mode 100644
index 00000000..3c40d723
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0014-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0014-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0014-800.webp
new file mode 100644
index 00000000..190ff30f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0014-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0015-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0015-1400.webp
new file mode 100644
index 00000000..123b7416
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0015-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0015-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0015-480.webp
new file mode 100644
index 00000000..bfcd289e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0015-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0015-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0015-800.webp
new file mode 100644
index 00000000..123b7416
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0015-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0016-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0016-1400.webp
new file mode 100644
index 00000000..c7f4c0a9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0016-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0016-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0016-480.webp
new file mode 100644
index 00000000..8a180b35
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0016-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0016-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0016-800.webp
new file mode 100644
index 00000000..c7f4c0a9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0016-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0017-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0017-1400.webp
new file mode 100644
index 00000000..63498b37
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0017-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0017-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0017-480.webp
new file mode 100644
index 00000000..9bd35fa1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0017-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0017-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0017-800.webp
new file mode 100644
index 00000000..63498b37
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0017-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0018-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0018-1400.webp
new file mode 100644
index 00000000..027d036b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0018-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0018-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0018-480.webp
new file mode 100644
index 00000000..0de21ec3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0018-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0018-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0018-800.webp
new file mode 100644
index 00000000..027d036b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0018-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0019-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0019-1400.webp
new file mode 100644
index 00000000..eeee1c3f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0019-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0019-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0019-480.webp
new file mode 100644
index 00000000..51ac3822
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0019-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0019-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0019-800.webp
new file mode 100644
index 00000000..eeee1c3f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0019-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0020-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0020-1400.webp
new file mode 100644
index 00000000..ce7ed9a9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0020-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0020-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0020-480.webp
new file mode 100644
index 00000000..46e503af
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0020-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0020-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0020-800.webp
new file mode 100644
index 00000000..ce7ed9a9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0020-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0021-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0021-1400.webp
new file mode 100644
index 00000000..683c8098
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0021-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0021-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0021-480.webp
new file mode 100644
index 00000000..4547008b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0021-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0021-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0021-800.webp
new file mode 100644
index 00000000..683c8098
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0021-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0022-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0022-1400.webp
new file mode 100644
index 00000000..d891775b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0022-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0022-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0022-480.webp
new file mode 100644
index 00000000..f905b1d2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0022-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0022-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0022-800.webp
new file mode 100644
index 00000000..d891775b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0022-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0023-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0023-1400.webp
new file mode 100644
index 00000000..dc0fa74c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0023-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0023-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0023-480.webp
new file mode 100644
index 00000000..107ce1ea
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0023-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0023-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0023-800.webp
new file mode 100644
index 00000000..dc0fa74c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0023-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0024-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0024-1400.webp
new file mode 100644
index 00000000..9feba842
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0024-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0024-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0024-480.webp
new file mode 100644
index 00000000..18d62db0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0024-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0024-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0024-800.webp
new file mode 100644
index 00000000..9feba842
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0024-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0025-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0025-1400.webp
new file mode 100644
index 00000000..5088fd85
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0025-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0025-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0025-480.webp
new file mode 100644
index 00000000..a3a10b54
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0025-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0025-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0025-800.webp
new file mode 100644
index 00000000..5088fd85
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0025-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0026-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0026-1400.webp
new file mode 100644
index 00000000..60afd0de
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0026-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0026-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0026-480.webp
new file mode 100644
index 00000000..181c51fa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0026-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0026-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0026-800.webp
new file mode 100644
index 00000000..60afd0de
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0026-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0027-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0027-1400.webp
new file mode 100644
index 00000000..c5f9d140
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0027-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0027-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0027-480.webp
new file mode 100644
index 00000000..84a003ab
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0027-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0027-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0027-800.webp
new file mode 100644
index 00000000..c5f9d140
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0027-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0028-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0028-1400.webp
new file mode 100644
index 00000000..257d4a98
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0028-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0028-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0028-480.webp
new file mode 100644
index 00000000..f7f20791
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0028-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0028-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0028-800.webp
new file mode 100644
index 00000000..257d4a98
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0028-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0029-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0029-1400.webp
new file mode 100644
index 00000000..41e3ca9c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0029-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0029-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0029-480.webp
new file mode 100644
index 00000000..51b31b79
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0029-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0029-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0029-800.webp
new file mode 100644
index 00000000..41e3ca9c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0029-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0030-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0030-1400.webp
new file mode 100644
index 00000000..29577b92
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0030-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0030-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0030-480.webp
new file mode 100644
index 00000000..5974ff48
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0030-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0030-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0030-800.webp
new file mode 100644
index 00000000..29577b92
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0030-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0031-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0031-1400.webp
new file mode 100644
index 00000000..f9720f72
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0031-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0031-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0031-480.webp
new file mode 100644
index 00000000..e15c5c86
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0031-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0031-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0031-800.webp
new file mode 100644
index 00000000..f9720f72
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0031-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0032-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0032-1400.webp
new file mode 100644
index 00000000..2f63d84a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0032-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0032-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0032-480.webp
new file mode 100644
index 00000000..b91e42a3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0032-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0032-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0032-800.webp
new file mode 100644
index 00000000..2f63d84a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0032-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0033-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0033-1400.webp
new file mode 100644
index 00000000..398294a5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0033-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0033-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0033-480.webp
new file mode 100644
index 00000000..3cc84da8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0033-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0033-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0033-800.webp
new file mode 100644
index 00000000..398294a5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0033-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0034-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0034-1400.webp
new file mode 100644
index 00000000..f83762a2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0034-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0034-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0034-480.webp
new file mode 100644
index 00000000..e1954abc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0034-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0034-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0034-800.webp
new file mode 100644
index 00000000..f83762a2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0034-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0035-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0035-1400.webp
new file mode 100644
index 00000000..04d7d223
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0035-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0035-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0035-480.webp
new file mode 100644
index 00000000..2ac2c874
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0035-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0035-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0035-800.webp
new file mode 100644
index 00000000..04d7d223
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0035-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0036-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0036-1400.webp
new file mode 100644
index 00000000..37dd5720
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0036-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0036-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0036-480.webp
new file mode 100644
index 00000000..d07af093
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0036-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0036-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0036-800.webp
new file mode 100644
index 00000000..37dd5720
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0036-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0037-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0037-1400.webp
new file mode 100644
index 00000000..842e5050
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0037-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0037-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0037-480.webp
new file mode 100644
index 00000000..c63cadd9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0037-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0037-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0037-800.webp
new file mode 100644
index 00000000..842e5050
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0037-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0038-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0038-1400.webp
new file mode 100644
index 00000000..fb84dabd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0038-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0038-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0038-480.webp
new file mode 100644
index 00000000..94055f54
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0038-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0038-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0038-800.webp
new file mode 100644
index 00000000..fb84dabd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0038-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0039-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0039-1400.webp
new file mode 100644
index 00000000..20dff016
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0039-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0039-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0039-480.webp
new file mode 100644
index 00000000..709b36f1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0039-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0039-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0039-800.webp
new file mode 100644
index 00000000..20dff016
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0039-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0040-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0040-1400.webp
new file mode 100644
index 00000000..80a142c5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0040-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0040-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0040-480.webp
new file mode 100644
index 00000000..a2962bf2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0040-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0040-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0040-800.webp
new file mode 100644
index 00000000..80a142c5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0040-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0041-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0041-1400.webp
new file mode 100644
index 00000000..38c1fb6c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0041-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0041-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0041-480.webp
new file mode 100644
index 00000000..b4480608
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0041-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0041-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0041-800.webp
new file mode 100644
index 00000000..38c1fb6c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0041-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0042-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0042-1400.webp
new file mode 100644
index 00000000..68334eb5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0042-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0042-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0042-480.webp
new file mode 100644
index 00000000..8f339755
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0042-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0042-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0042-800.webp
new file mode 100644
index 00000000..68334eb5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0042-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0043-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0043-1400.webp
new file mode 100644
index 00000000..bf9c6261
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0043-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0043-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0043-480.webp
new file mode 100644
index 00000000..88c5ba31
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0043-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0043-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0043-800.webp
new file mode 100644
index 00000000..bf9c6261
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0043-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0044-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0044-1400.webp
new file mode 100644
index 00000000..1980dc74
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0044-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0044-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0044-480.webp
new file mode 100644
index 00000000..6c534082
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0044-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0044-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0044-800.webp
new file mode 100644
index 00000000..1980dc74
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0044-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0045-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0045-1400.webp
new file mode 100644
index 00000000..d404bbc7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0045-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0045-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0045-480.webp
new file mode 100644
index 00000000..2cb9dbe1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0045-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0045-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0045-800.webp
new file mode 100644
index 00000000..d404bbc7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0045-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0046-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0046-1400.webp
new file mode 100644
index 00000000..2985febf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0046-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0046-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0046-480.webp
new file mode 100644
index 00000000..05db065d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0046-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0046-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0046-800.webp
new file mode 100644
index 00000000..2985febf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0046-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0047-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0047-1400.webp
new file mode 100644
index 00000000..1cb06106
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0047-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0047-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0047-480.webp
new file mode 100644
index 00000000..042c92ab
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0047-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0047-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0047-800.webp
new file mode 100644
index 00000000..1cb06106
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0047-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0048-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0048-1400.webp
new file mode 100644
index 00000000..6cd4cfd1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0048-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0048-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0048-480.webp
new file mode 100644
index 00000000..46ecf466
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0048-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0048-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0048-800.webp
new file mode 100644
index 00000000..6cd4cfd1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0048-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0049-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0049-1400.webp
new file mode 100644
index 00000000..5c4889b1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0049-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0049-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0049-480.webp
new file mode 100644
index 00000000..4c265e3f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0049-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0049-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0049-800.webp
new file mode 100644
index 00000000..5c4889b1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0049-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0050-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0050-1400.webp
new file mode 100644
index 00000000..a0aba860
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0050-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0050-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0050-480.webp
new file mode 100644
index 00000000..19d658a4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0050-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0050-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0050-800.webp
new file mode 100644
index 00000000..a0aba860
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0050-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0051-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0051-1400.webp
new file mode 100644
index 00000000..e143aa3f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0051-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0051-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0051-480.webp
new file mode 100644
index 00000000..ef590335
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0051-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0051-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0051-800.webp
new file mode 100644
index 00000000..e143aa3f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0051-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0052-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0052-1400.webp
new file mode 100644
index 00000000..54a203d2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0052-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0052-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0052-480.webp
new file mode 100644
index 00000000..0f9a224f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0052-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0052-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0052-800.webp
new file mode 100644
index 00000000..54a203d2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0052-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0053-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0053-1400.webp
new file mode 100644
index 00000000..1097f657
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0053-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0053-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0053-480.webp
new file mode 100644
index 00000000..abd68bb3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0053-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0053-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0053-800.webp
new file mode 100644
index 00000000..1097f657
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0053-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0054-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0054-1400.webp
new file mode 100644
index 00000000..52857e33
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0054-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0054-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0054-480.webp
new file mode 100644
index 00000000..c0a268fb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0054-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0054-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0054-800.webp
new file mode 100644
index 00000000..52857e33
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0054-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0055-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0055-1400.webp
new file mode 100644
index 00000000..eabcd82b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0055-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0055-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0055-480.webp
new file mode 100644
index 00000000..45bce932
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0055-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0055-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0055-800.webp
new file mode 100644
index 00000000..eabcd82b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0055-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0056-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0056-1400.webp
new file mode 100644
index 00000000..0494ff89
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0056-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0056-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0056-480.webp
new file mode 100644
index 00000000..dc403151
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0056-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0056-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0056-800.webp
new file mode 100644
index 00000000..0494ff89
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0056-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0057-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0057-1400.webp
new file mode 100644
index 00000000..906d17f8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0057-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0057-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0057-480.webp
new file mode 100644
index 00000000..01604023
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0057-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0057-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0057-800.webp
new file mode 100644
index 00000000..906d17f8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0057-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0058-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0058-1400.webp
new file mode 100644
index 00000000..00a44e1f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0058-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0058-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0058-480.webp
new file mode 100644
index 00000000..e11d5a52
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0058-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0058-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0058-800.webp
new file mode 100644
index 00000000..00a44e1f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0058-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0059-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0059-1400.webp
new file mode 100644
index 00000000..886aa64c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0059-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0059-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0059-480.webp
new file mode 100644
index 00000000..a3a80c0f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0059-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0059-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0059-800.webp
new file mode 100644
index 00000000..886aa64c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0059-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0060-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0060-1400.webp
new file mode 100644
index 00000000..eb3ff605
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0060-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0060-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0060-480.webp
new file mode 100644
index 00000000..cf340c2e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0060-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0060-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0060-800.webp
new file mode 100644
index 00000000..eb3ff605
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0060-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0061-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0061-1400.webp
new file mode 100644
index 00000000..0193eb57
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0061-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0061-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0061-480.webp
new file mode 100644
index 00000000..263f0a01
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0061-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0061-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0061-800.webp
new file mode 100644
index 00000000..0193eb57
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0061-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0062-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0062-1400.webp
new file mode 100644
index 00000000..ffbbada8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0062-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0062-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0062-480.webp
new file mode 100644
index 00000000..3ae341bc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0062-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0062-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0062-800.webp
new file mode 100644
index 00000000..ffbbada8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0062-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0063-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0063-1400.webp
new file mode 100644
index 00000000..77818df3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0063-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0063-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0063-480.webp
new file mode 100644
index 00000000..75762fa0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0063-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0063-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0063-800.webp
new file mode 100644
index 00000000..77818df3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0063-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0064-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0064-1400.webp
new file mode 100644
index 00000000..267cc381
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0064-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0064-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0064-480.webp
new file mode 100644
index 00000000..b139c43e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0064-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0064-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0064-800.webp
new file mode 100644
index 00000000..267cc381
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0064-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0065-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0065-1400.webp
new file mode 100644
index 00000000..8064dacc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0065-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0065-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0065-480.webp
new file mode 100644
index 00000000..d254b25a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0065-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0065-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0065-800.webp
new file mode 100644
index 00000000..8064dacc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0065-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0066-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0066-1400.webp
new file mode 100644
index 00000000..f244099c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0066-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0066-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0066-480.webp
new file mode 100644
index 00000000..14b08aa5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0066-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0066-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0066-800.webp
new file mode 100644
index 00000000..f244099c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0066-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0067-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0067-1400.webp
new file mode 100644
index 00000000..636432ef
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0067-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0067-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0067-480.webp
new file mode 100644
index 00000000..6ee9f7da
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0067-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0067-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0067-800.webp
new file mode 100644
index 00000000..636432ef
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0067-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0068-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0068-1400.webp
new file mode 100644
index 00000000..6a8dcd2c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0068-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0068-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0068-480.webp
new file mode 100644
index 00000000..fa706684
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0068-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0068-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0068-800.webp
new file mode 100644
index 00000000..6a8dcd2c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0068-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0069-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0069-1400.webp
new file mode 100644
index 00000000..7ae415d0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0069-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0069-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0069-480.webp
new file mode 100644
index 00000000..49308b88
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0069-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0069-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0069-800.webp
new file mode 100644
index 00000000..7ae415d0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0069-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0070-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0070-1400.webp
new file mode 100644
index 00000000..13813ddd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0070-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0070-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0070-480.webp
new file mode 100644
index 00000000..6a6c111f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0070-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0070-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0070-800.webp
new file mode 100644
index 00000000..13813ddd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0070-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0071-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0071-1400.webp
new file mode 100644
index 00000000..1538ca49
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0071-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0071-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0071-480.webp
new file mode 100644
index 00000000..adf303ef
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0071-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0071-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0071-800.webp
new file mode 100644
index 00000000..1538ca49
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0071-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0072-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0072-1400.webp
new file mode 100644
index 00000000..70136d58
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0072-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0072-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0072-480.webp
new file mode 100644
index 00000000..b50089ed
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0072-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0072-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0072-800.webp
new file mode 100644
index 00000000..70136d58
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0072-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0073-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0073-1400.webp
new file mode 100644
index 00000000..7238ea7e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0073-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0073-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0073-480.webp
new file mode 100644
index 00000000..e3cfb432
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0073-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0073-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0073-800.webp
new file mode 100644
index 00000000..7238ea7e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0073-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0074-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0074-1400.webp
new file mode 100644
index 00000000..d3a6d66e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0074-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0074-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0074-480.webp
new file mode 100644
index 00000000..f192c8fd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0074-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0074-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0074-800.webp
new file mode 100644
index 00000000..d3a6d66e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0074-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0075-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0075-1400.webp
new file mode 100644
index 00000000..33dc948a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0075-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0075-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0075-480.webp
new file mode 100644
index 00000000..41e902cb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0075-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0075-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0075-800.webp
new file mode 100644
index 00000000..33dc948a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0075-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0076-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0076-1400.webp
new file mode 100644
index 00000000..044bcd34
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0076-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0076-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0076-480.webp
new file mode 100644
index 00000000..17384c72
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0076-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0076-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0076-800.webp
new file mode 100644
index 00000000..044bcd34
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0076-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0077-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0077-1400.webp
new file mode 100644
index 00000000..f9883246
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0077-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0077-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0077-480.webp
new file mode 100644
index 00000000..06cc7de6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0077-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0077-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0077-800.webp
new file mode 100644
index 00000000..f9883246
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0077-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0078-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0078-1400.webp
new file mode 100644
index 00000000..9ccf2157
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0078-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0078-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0078-480.webp
new file mode 100644
index 00000000..a809782c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0078-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0078-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0078-800.webp
new file mode 100644
index 00000000..9ccf2157
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0078-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0079-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0079-1400.webp
new file mode 100644
index 00000000..13ff7370
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0079-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0079-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0079-480.webp
new file mode 100644
index 00000000..5abef2a3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0079-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0079-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0079-800.webp
new file mode 100644
index 00000000..13ff7370
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0079-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0080-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0080-1400.webp
new file mode 100644
index 00000000..27a5cc28
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0080-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0080-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0080-480.webp
new file mode 100644
index 00000000..3b05a350
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0080-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0080-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0080-800.webp
new file mode 100644
index 00000000..27a5cc28
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0080-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0081-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0081-1400.webp
new file mode 100644
index 00000000..4d85b3e1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0081-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0081-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0081-480.webp
new file mode 100644
index 00000000..0ef6c2a8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0081-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0081-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0081-800.webp
new file mode 100644
index 00000000..4d85b3e1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0081-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0082-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0082-1400.webp
new file mode 100644
index 00000000..6a53b4c7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0082-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0082-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0082-480.webp
new file mode 100644
index 00000000..65129dad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0082-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0082-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0082-800.webp
new file mode 100644
index 00000000..6a53b4c7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0082-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0083-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0083-1400.webp
new file mode 100644
index 00000000..6b7082e9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0083-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0083-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0083-480.webp
new file mode 100644
index 00000000..92973d1c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0083-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0083-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0083-800.webp
new file mode 100644
index 00000000..6b7082e9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0083-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0084-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0084-1400.webp
new file mode 100644
index 00000000..d03b3c64
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0084-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0084-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0084-480.webp
new file mode 100644
index 00000000..33111f96
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0084-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0084-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0084-800.webp
new file mode 100644
index 00000000..d03b3c64
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0084-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0085-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0085-1400.webp
new file mode 100644
index 00000000..2707df2f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0085-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0085-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0085-480.webp
new file mode 100644
index 00000000..d7277a34
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0085-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0085-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0085-800.webp
new file mode 100644
index 00000000..2707df2f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0085-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0086-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0086-1400.webp
new file mode 100644
index 00000000..0778dd5e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0086-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0086-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0086-480.webp
new file mode 100644
index 00000000..c52dd35a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0086-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0086-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0086-800.webp
new file mode 100644
index 00000000..0778dd5e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0086-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0087-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0087-1400.webp
new file mode 100644
index 00000000..d2aa6834
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0087-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0087-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0087-480.webp
new file mode 100644
index 00000000..4d06c89f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0087-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0087-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0087-800.webp
new file mode 100644
index 00000000..d2aa6834
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0087-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0088-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0088-1400.webp
new file mode 100644
index 00000000..a7b60e58
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0088-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0088-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0088-480.webp
new file mode 100644
index 00000000..21e75def
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0088-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0088-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0088-800.webp
new file mode 100644
index 00000000..a7b60e58
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0088-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0089-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0089-1400.webp
new file mode 100644
index 00000000..0a1a67dd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0089-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0089-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0089-480.webp
new file mode 100644
index 00000000..d90236c7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0089-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0089-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0089-800.webp
new file mode 100644
index 00000000..0a1a67dd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0089-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0090-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0090-1400.webp
new file mode 100644
index 00000000..9f061a7f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0090-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0090-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0090-480.webp
new file mode 100644
index 00000000..cdf9953e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0090-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0090-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0090-800.webp
new file mode 100644
index 00000000..9f061a7f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0090-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0091-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0091-1400.webp
new file mode 100644
index 00000000..e138af3e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0091-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0091-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0091-480.webp
new file mode 100644
index 00000000..349728a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0091-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0091-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0091-800.webp
new file mode 100644
index 00000000..e138af3e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0091-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0092-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0092-1400.webp
new file mode 100644
index 00000000..e2530eac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0092-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0092-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0092-480.webp
new file mode 100644
index 00000000..bd95f2a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0092-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0092-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0092-800.webp
new file mode 100644
index 00000000..e2530eac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0092-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0093-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0093-1400.webp
new file mode 100644
index 00000000..ad63e73c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0093-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0093-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0093-480.webp
new file mode 100644
index 00000000..6bc77335
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0093-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0093-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0093-800.webp
new file mode 100644
index 00000000..ad63e73c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0093-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0094-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0094-1400.webp
new file mode 100644
index 00000000..123d5e7a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0094-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0094-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0094-480.webp
new file mode 100644
index 00000000..1125d21d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0094-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0094-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0094-800.webp
new file mode 100644
index 00000000..123d5e7a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0094-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0095-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0095-1400.webp
new file mode 100644
index 00000000..33f6ae3f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0095-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0095-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0095-480.webp
new file mode 100644
index 00000000..6d5417c4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0095-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0095-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0095-800.webp
new file mode 100644
index 00000000..33f6ae3f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0095-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0096-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0096-1400.webp
new file mode 100644
index 00000000..bf60845e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0096-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0096-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0096-480.webp
new file mode 100644
index 00000000..f5f16be2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0096-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0096-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0096-800.webp
new file mode 100644
index 00000000..bf60845e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0096-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0097-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0097-1400.webp
new file mode 100644
index 00000000..623e54ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0097-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0097-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0097-480.webp
new file mode 100644
index 00000000..62ea0006
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0097-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0097-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0097-800.webp
new file mode 100644
index 00000000..623e54ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0097-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0098-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0098-1400.webp
new file mode 100644
index 00000000..8a3c4c17
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0098-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0098-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0098-480.webp
new file mode 100644
index 00000000..bfe886d4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0098-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0098-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0098-800.webp
new file mode 100644
index 00000000..8a3c4c17
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0098-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0099-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0099-1400.webp
new file mode 100644
index 00000000..b6d45448
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0099-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0099-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0099-480.webp
new file mode 100644
index 00000000..b7cfc947
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0099-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0099-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0099-800.webp
new file mode 100644
index 00000000..b6d45448
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0099-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0100-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0100-1400.webp
new file mode 100644
index 00000000..d75ae052
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0100-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0100-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0100-480.webp
new file mode 100644
index 00000000..b70c383b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0100-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0100-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0100-800.webp
new file mode 100644
index 00000000..d75ae052
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0100-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0101-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0101-1400.webp
new file mode 100644
index 00000000..e078cabf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0101-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0101-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0101-480.webp
new file mode 100644
index 00000000..fc337ec7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0101-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0101-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0101-800.webp
new file mode 100644
index 00000000..e078cabf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0101-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0102-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0102-1400.webp
new file mode 100644
index 00000000..2409b1c9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0102-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0102-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0102-480.webp
new file mode 100644
index 00000000..fe0b17ab
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0102-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0102-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0102-800.webp
new file mode 100644
index 00000000..2409b1c9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0102-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0103-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0103-1400.webp
new file mode 100644
index 00000000..addd1585
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0103-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0103-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0103-480.webp
new file mode 100644
index 00000000..8a265be2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0103-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0103-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0103-800.webp
new file mode 100644
index 00000000..addd1585
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0103-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0104-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0104-1400.webp
new file mode 100644
index 00000000..60c900de
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0104-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0104-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0104-480.webp
new file mode 100644
index 00000000..a8695f0b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0104-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0104-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0104-800.webp
new file mode 100644
index 00000000..60c900de
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0104-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0105-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0105-1400.webp
new file mode 100644
index 00000000..d63f08e1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0105-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0105-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0105-480.webp
new file mode 100644
index 00000000..3c286ec1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0105-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0105-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0105-800.webp
new file mode 100644
index 00000000..d63f08e1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0105-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0106-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0106-1400.webp
new file mode 100644
index 00000000..68d4de9b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0106-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0106-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0106-480.webp
new file mode 100644
index 00000000..f54063a7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0106-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0106-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0106-800.webp
new file mode 100644
index 00000000..68d4de9b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0106-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0107-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0107-1400.webp
new file mode 100644
index 00000000..9b67fc8a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0107-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0107-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0107-480.webp
new file mode 100644
index 00000000..ee1bc4a8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0107-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0107-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0107-800.webp
new file mode 100644
index 00000000..9b67fc8a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0107-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0108-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0108-1400.webp
new file mode 100644
index 00000000..647058df
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0108-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0108-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0108-480.webp
new file mode 100644
index 00000000..57d602ff
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0108-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0108-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0108-800.webp
new file mode 100644
index 00000000..647058df
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0108-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0109-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0109-1400.webp
new file mode 100644
index 00000000..0d24cd4d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0109-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0109-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0109-480.webp
new file mode 100644
index 00000000..280046d8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0109-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0109-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0109-800.webp
new file mode 100644
index 00000000..0d24cd4d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0109-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0110-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0110-1400.webp
new file mode 100644
index 00000000..8ebc63b5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0110-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0110-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0110-480.webp
new file mode 100644
index 00000000..1dddf4b7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0110-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0110-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0110-800.webp
new file mode 100644
index 00000000..8ebc63b5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0110-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0111-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0111-1400.webp
new file mode 100644
index 00000000..d2ff0a34
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0111-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0111-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0111-480.webp
new file mode 100644
index 00000000..9090a527
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0111-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0111-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0111-800.webp
new file mode 100644
index 00000000..d2ff0a34
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0111-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0112-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0112-1400.webp
new file mode 100644
index 00000000..3bb843a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0112-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0112-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0112-480.webp
new file mode 100644
index 00000000..244ade3c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0112-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0112-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0112-800.webp
new file mode 100644
index 00000000..3bb843a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0112-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0113-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0113-1400.webp
new file mode 100644
index 00000000..7d0ef4ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0113-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0113-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0113-480.webp
new file mode 100644
index 00000000..c8d2a4ec
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0113-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0113-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0113-800.webp
new file mode 100644
index 00000000..7d0ef4ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0113-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0114-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0114-1400.webp
new file mode 100644
index 00000000..290ee5c4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0114-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0114-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0114-480.webp
new file mode 100644
index 00000000..6ed36eed
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0114-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0114-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0114-800.webp
new file mode 100644
index 00000000..290ee5c4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0114-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0115-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0115-1400.webp
new file mode 100644
index 00000000..203e82f4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0115-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0115-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0115-480.webp
new file mode 100644
index 00000000..252b7470
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0115-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0115-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0115-800.webp
new file mode 100644
index 00000000..203e82f4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0115-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0116-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0116-1400.webp
new file mode 100644
index 00000000..e4ff6355
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0116-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0116-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0116-480.webp
new file mode 100644
index 00000000..0b485263
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0116-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0116-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0116-800.webp
new file mode 100644
index 00000000..e4ff6355
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0116-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0117-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0117-1400.webp
new file mode 100644
index 00000000..3065955b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0117-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0117-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0117-480.webp
new file mode 100644
index 00000000..e91fa91c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0117-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0117-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0117-800.webp
new file mode 100644
index 00000000..3065955b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0117-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0118-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0118-1400.webp
new file mode 100644
index 00000000..c728a54d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0118-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0118-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0118-480.webp
new file mode 100644
index 00000000..69f989b1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0118-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0118-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0118-800.webp
new file mode 100644
index 00000000..c728a54d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0118-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0119-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0119-1400.webp
new file mode 100644
index 00000000..073e6d21
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0119-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0119-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0119-480.webp
new file mode 100644
index 00000000..c2af5084
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0119-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0119-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0119-800.webp
new file mode 100644
index 00000000..073e6d21
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0119-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0120-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0120-1400.webp
new file mode 100644
index 00000000..438f8511
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0120-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0120-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0120-480.webp
new file mode 100644
index 00000000..41e955d7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0120-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0120-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0120-800.webp
new file mode 100644
index 00000000..438f8511
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0120-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0121-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0121-1400.webp
new file mode 100644
index 00000000..568e7f33
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0121-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0121-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0121-480.webp
new file mode 100644
index 00000000..187e338c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0121-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0121-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0121-800.webp
new file mode 100644
index 00000000..568e7f33
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0121-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0122-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0122-1400.webp
new file mode 100644
index 00000000..e825ed36
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0122-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0122-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0122-480.webp
new file mode 100644
index 00000000..d2789781
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0122-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0122-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0122-800.webp
new file mode 100644
index 00000000..e825ed36
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0122-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0123-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0123-1400.webp
new file mode 100644
index 00000000..ba5441ac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0123-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0123-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0123-480.webp
new file mode 100644
index 00000000..02513f99
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0123-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0123-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0123-800.webp
new file mode 100644
index 00000000..ba5441ac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0123-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0124-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0124-1400.webp
new file mode 100644
index 00000000..7f356f6f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0124-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0124-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0124-480.webp
new file mode 100644
index 00000000..bf417c87
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0124-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0124-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0124-800.webp
new file mode 100644
index 00000000..7f356f6f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0124-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0125-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0125-1400.webp
new file mode 100644
index 00000000..345fa533
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0125-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0125-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0125-480.webp
new file mode 100644
index 00000000..239342a3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0125-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0125-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0125-800.webp
new file mode 100644
index 00000000..345fa533
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0125-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0126-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0126-1400.webp
new file mode 100644
index 00000000..3f745fd6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0126-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0126-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0126-480.webp
new file mode 100644
index 00000000..a681c479
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0126-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0126-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0126-800.webp
new file mode 100644
index 00000000..3f745fd6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0126-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0127-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0127-1400.webp
new file mode 100644
index 00000000..180455f9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0127-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0127-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0127-480.webp
new file mode 100644
index 00000000..5372f8da
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0127-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0127-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0127-800.webp
new file mode 100644
index 00000000..180455f9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0127-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0128-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0128-1400.webp
new file mode 100644
index 00000000..c14a99f2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0128-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0128-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0128-480.webp
new file mode 100644
index 00000000..6bbacd40
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0128-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0128-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0128-800.webp
new file mode 100644
index 00000000..c14a99f2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0128-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0129-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0129-1400.webp
new file mode 100644
index 00000000..74f2d255
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0129-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0129-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0129-480.webp
new file mode 100644
index 00000000..47960098
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0129-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0129-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0129-800.webp
new file mode 100644
index 00000000..74f2d255
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0129-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0130-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0130-1400.webp
new file mode 100644
index 00000000..5a8909aa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0130-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0130-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0130-480.webp
new file mode 100644
index 00000000..83e5ec4b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0130-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0130-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0130-800.webp
new file mode 100644
index 00000000..5a8909aa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0130-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0131-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0131-1400.webp
new file mode 100644
index 00000000..7f305936
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0131-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0131-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0131-480.webp
new file mode 100644
index 00000000..082a94b6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0131-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0131-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0131-800.webp
new file mode 100644
index 00000000..7f305936
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0131-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0132-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0132-1400.webp
new file mode 100644
index 00000000..2447a385
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0132-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0132-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0132-480.webp
new file mode 100644
index 00000000..bdf71ca8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0132-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0132-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0132-800.webp
new file mode 100644
index 00000000..2447a385
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0132-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0133-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0133-1400.webp
new file mode 100644
index 00000000..c287868e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0133-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0133-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0133-480.webp
new file mode 100644
index 00000000..300f19b3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0133-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0133-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0133-800.webp
new file mode 100644
index 00000000..c287868e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0133-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0134-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0134-1400.webp
new file mode 100644
index 00000000..6d3cf4e6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0134-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0134-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0134-480.webp
new file mode 100644
index 00000000..afe1c44e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0134-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0134-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0134-800.webp
new file mode 100644
index 00000000..6d3cf4e6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0134-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0135-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0135-1400.webp
new file mode 100644
index 00000000..6d01b31a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0135-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0135-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0135-480.webp
new file mode 100644
index 00000000..7abe0842
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0135-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0135-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0135-800.webp
new file mode 100644
index 00000000..6d01b31a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0135-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0136-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0136-1400.webp
new file mode 100644
index 00000000..b6545692
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0136-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0136-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0136-480.webp
new file mode 100644
index 00000000..7eb1cee2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0136-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0136-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0136-800.webp
new file mode 100644
index 00000000..b6545692
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0136-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0137-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0137-1400.webp
new file mode 100644
index 00000000..add0a37b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0137-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0137-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0137-480.webp
new file mode 100644
index 00000000..afbca355
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0137-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0137-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0137-800.webp
new file mode 100644
index 00000000..add0a37b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0137-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0138-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0138-1400.webp
new file mode 100644
index 00000000..a8ab7384
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0138-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0138-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0138-480.webp
new file mode 100644
index 00000000..378313fd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0138-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0138-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0138-800.webp
new file mode 100644
index 00000000..a8ab7384
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0138-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0139-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0139-1400.webp
new file mode 100644
index 00000000..1f96746c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0139-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0139-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0139-480.webp
new file mode 100644
index 00000000..4448c84f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0139-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0139-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0139-800.webp
new file mode 100644
index 00000000..1f96746c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0139-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0140-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0140-1400.webp
new file mode 100644
index 00000000..a5f1728b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0140-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0140-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0140-480.webp
new file mode 100644
index 00000000..d112fe0e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0140-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0140-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0140-800.webp
new file mode 100644
index 00000000..a5f1728b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0140-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0141-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0141-1400.webp
new file mode 100644
index 00000000..47f576cc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0141-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0141-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0141-480.webp
new file mode 100644
index 00000000..3f94c4e2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0141-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0141-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0141-800.webp
new file mode 100644
index 00000000..47f576cc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0141-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0142-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0142-1400.webp
new file mode 100644
index 00000000..c8c28b80
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0142-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0142-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0142-480.webp
new file mode 100644
index 00000000..dcdfb682
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0142-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0142-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0142-800.webp
new file mode 100644
index 00000000..c8c28b80
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0142-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0143-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0143-1400.webp
new file mode 100644
index 00000000..b32e80a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0143-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0143-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0143-480.webp
new file mode 100644
index 00000000..225adc0b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0143-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0143-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0143-800.webp
new file mode 100644
index 00000000..b32e80a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0143-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0144-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0144-1400.webp
new file mode 100644
index 00000000..dcc89956
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0144-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0144-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0144-480.webp
new file mode 100644
index 00000000..1844d4b8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0144-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0144-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0144-800.webp
new file mode 100644
index 00000000..dcc89956
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0144-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0145-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0145-1400.webp
new file mode 100644
index 00000000..20fc3467
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0145-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0145-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0145-480.webp
new file mode 100644
index 00000000..839c8304
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0145-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0145-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0145-800.webp
new file mode 100644
index 00000000..20fc3467
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0145-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0146-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0146-1400.webp
new file mode 100644
index 00000000..1fff553a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0146-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0146-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0146-480.webp
new file mode 100644
index 00000000..e05539ea
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0146-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0146-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0146-800.webp
new file mode 100644
index 00000000..1fff553a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0146-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0147-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0147-1400.webp
new file mode 100644
index 00000000..d8800b7c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0147-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0147-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0147-480.webp
new file mode 100644
index 00000000..5d253cf2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0147-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0147-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0147-800.webp
new file mode 100644
index 00000000..d8800b7c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0147-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0148-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0148-1400.webp
new file mode 100644
index 00000000..115d7350
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0148-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0148-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0148-480.webp
new file mode 100644
index 00000000..957aa52b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0148-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0148-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0148-800.webp
new file mode 100644
index 00000000..115d7350
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0148-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0149-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0149-1400.webp
new file mode 100644
index 00000000..b8d55933
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0149-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0149-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0149-480.webp
new file mode 100644
index 00000000..aa0f2109
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0149-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0149-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0149-800.webp
new file mode 100644
index 00000000..b8d55933
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0149-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0150-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0150-1400.webp
new file mode 100644
index 00000000..948a55e0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0150-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0150-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0150-480.webp
new file mode 100644
index 00000000..441e6750
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0150-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0150-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0150-800.webp
new file mode 100644
index 00000000..948a55e0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0150-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0151-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0151-1400.webp
new file mode 100644
index 00000000..d5263b82
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0151-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0151-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0151-480.webp
new file mode 100644
index 00000000..df3af492
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0151-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0151-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0151-800.webp
new file mode 100644
index 00000000..d5263b82
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0151-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0152-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0152-1400.webp
new file mode 100644
index 00000000..93e4e74b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0152-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0152-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0152-480.webp
new file mode 100644
index 00000000..19e90e2a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0152-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0152-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0152-800.webp
new file mode 100644
index 00000000..93e4e74b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0152-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0153-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0153-1400.webp
new file mode 100644
index 00000000..50b29389
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0153-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0153-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0153-480.webp
new file mode 100644
index 00000000..fa0bdaac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0153-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0153-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0153-800.webp
new file mode 100644
index 00000000..50b29389
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0153-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0154-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0154-1400.webp
new file mode 100644
index 00000000..3aaadc5f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0154-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0154-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0154-480.webp
new file mode 100644
index 00000000..5f7a44ac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0154-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0154-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0154-800.webp
new file mode 100644
index 00000000..3aaadc5f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0154-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0155-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0155-1400.webp
new file mode 100644
index 00000000..b9262883
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0155-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0155-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0155-480.webp
new file mode 100644
index 00000000..1d0057ff
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0155-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0155-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0155-800.webp
new file mode 100644
index 00000000..b9262883
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0155-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0156-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0156-1400.webp
new file mode 100644
index 00000000..b4212dd5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0156-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0156-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0156-480.webp
new file mode 100644
index 00000000..11704e88
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0156-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0156-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0156-800.webp
new file mode 100644
index 00000000..b4212dd5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0156-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0157-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0157-1400.webp
new file mode 100644
index 00000000..ce374c19
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0157-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0157-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0157-480.webp
new file mode 100644
index 00000000..7431326e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0157-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0157-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0157-800.webp
new file mode 100644
index 00000000..ce374c19
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0157-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0158-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0158-1400.webp
new file mode 100644
index 00000000..c6094943
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0158-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0158-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0158-480.webp
new file mode 100644
index 00000000..26d8aaf1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0158-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0158-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0158-800.webp
new file mode 100644
index 00000000..c6094943
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0158-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0159-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0159-1400.webp
new file mode 100644
index 00000000..f451dd8d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0159-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0159-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0159-480.webp
new file mode 100644
index 00000000..b82cbe8a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0159-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0159-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0159-800.webp
new file mode 100644
index 00000000..f451dd8d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0159-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0160-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0160-1400.webp
new file mode 100644
index 00000000..242283e7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0160-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0160-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0160-480.webp
new file mode 100644
index 00000000..034a8105
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0160-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0160-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0160-800.webp
new file mode 100644
index 00000000..242283e7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0160-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0161-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0161-1400.webp
new file mode 100644
index 00000000..00a6fc4d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0161-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0161-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0161-480.webp
new file mode 100644
index 00000000..ec3abdb5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0161-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0161-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0161-800.webp
new file mode 100644
index 00000000..00a6fc4d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0161-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0162-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0162-1400.webp
new file mode 100644
index 00000000..567b2af4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0162-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0162-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0162-480.webp
new file mode 100644
index 00000000..58fa48b3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0162-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0162-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0162-800.webp
new file mode 100644
index 00000000..567b2af4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0162-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0163-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0163-1400.webp
new file mode 100644
index 00000000..ca705423
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0163-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0163-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0163-480.webp
new file mode 100644
index 00000000..470170f8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0163-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0163-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0163-800.webp
new file mode 100644
index 00000000..ca705423
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0163-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0164-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0164-1400.webp
new file mode 100644
index 00000000..1cb267d2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0164-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0164-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0164-480.webp
new file mode 100644
index 00000000..39d995e8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0164-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0164-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0164-800.webp
new file mode 100644
index 00000000..1cb267d2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0164-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0165-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0165-1400.webp
new file mode 100644
index 00000000..d5a70799
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0165-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0165-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0165-480.webp
new file mode 100644
index 00000000..938048d6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0165-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0165-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0165-800.webp
new file mode 100644
index 00000000..d5a70799
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0165-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0166-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0166-1400.webp
new file mode 100644
index 00000000..68d27f74
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0166-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0166-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0166-480.webp
new file mode 100644
index 00000000..f47d47ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0166-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0166-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0166-800.webp
new file mode 100644
index 00000000..68d27f74
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0166-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0167-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0167-1400.webp
new file mode 100644
index 00000000..385a39fa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0167-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0167-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0167-480.webp
new file mode 100644
index 00000000..4cb9afeb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0167-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0167-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0167-800.webp
new file mode 100644
index 00000000..385a39fa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0167-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0168-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0168-1400.webp
new file mode 100644
index 00000000..0929d52f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0168-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0168-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0168-480.webp
new file mode 100644
index 00000000..b4e2e61f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0168-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0168-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0168-800.webp
new file mode 100644
index 00000000..0929d52f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0168-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0169-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0169-1400.webp
new file mode 100644
index 00000000..bb9d68e8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0169-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0169-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0169-480.webp
new file mode 100644
index 00000000..8e07fb05
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0169-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0169-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0169-800.webp
new file mode 100644
index 00000000..bb9d68e8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0169-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0170-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0170-1400.webp
new file mode 100644
index 00000000..7440f72b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0170-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0170-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0170-480.webp
new file mode 100644
index 00000000..e84f79eb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0170-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0170-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0170-800.webp
new file mode 100644
index 00000000..7440f72b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0170-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0171-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0171-1400.webp
new file mode 100644
index 00000000..38b250cb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0171-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0171-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0171-480.webp
new file mode 100644
index 00000000..a994822e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0171-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0171-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0171-800.webp
new file mode 100644
index 00000000..38b250cb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0171-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0172-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0172-1400.webp
new file mode 100644
index 00000000..3304238f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0172-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0172-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0172-480.webp
new file mode 100644
index 00000000..9bd92cdf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0172-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0172-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0172-800.webp
new file mode 100644
index 00000000..3304238f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0172-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0173-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0173-1400.webp
new file mode 100644
index 00000000..1fe9e794
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0173-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0173-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0173-480.webp
new file mode 100644
index 00000000..520f0695
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0173-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0173-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0173-800.webp
new file mode 100644
index 00000000..1fe9e794
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0173-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0174-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0174-1400.webp
new file mode 100644
index 00000000..0eaf65f2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0174-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0174-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0174-480.webp
new file mode 100644
index 00000000..cfd7021c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0174-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0174-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0174-800.webp
new file mode 100644
index 00000000..0eaf65f2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0174-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0175-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0175-1400.webp
new file mode 100644
index 00000000..60259282
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0175-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0175-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0175-480.webp
new file mode 100644
index 00000000..7806c3ef
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0175-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0175-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0175-800.webp
new file mode 100644
index 00000000..60259282
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0175-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0176-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0176-1400.webp
new file mode 100644
index 00000000..69357c6e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0176-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0176-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0176-480.webp
new file mode 100644
index 00000000..b2917482
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0176-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0176-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0176-800.webp
new file mode 100644
index 00000000..69357c6e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0176-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0177-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0177-1400.webp
new file mode 100644
index 00000000..617261ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0177-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0177-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0177-480.webp
new file mode 100644
index 00000000..53253417
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0177-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0177-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0177-800.webp
new file mode 100644
index 00000000..617261ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0177-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0178-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0178-1400.webp
new file mode 100644
index 00000000..692b0629
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0178-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0178-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0178-480.webp
new file mode 100644
index 00000000..85b48244
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0178-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0178-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0178-800.webp
new file mode 100644
index 00000000..692b0629
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0178-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0179-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0179-1400.webp
new file mode 100644
index 00000000..d1b82dcc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0179-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0179-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0179-480.webp
new file mode 100644
index 00000000..a5461232
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0179-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0179-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0179-800.webp
new file mode 100644
index 00000000..d1b82dcc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0179-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0180-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0180-1400.webp
new file mode 100644
index 00000000..99c974e2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0180-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0180-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0180-480.webp
new file mode 100644
index 00000000..46889e78
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0180-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0180-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0180-800.webp
new file mode 100644
index 00000000..99c974e2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0180-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0181-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0181-1400.webp
new file mode 100644
index 00000000..0cc175a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0181-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0181-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0181-480.webp
new file mode 100644
index 00000000..ce47d8c7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0181-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0181-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0181-800.webp
new file mode 100644
index 00000000..0cc175a6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0181-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0182-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0182-1400.webp
new file mode 100644
index 00000000..33902e0a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0182-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0182-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0182-480.webp
new file mode 100644
index 00000000..010caf99
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0182-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0182-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0182-800.webp
new file mode 100644
index 00000000..33902e0a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0182-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0183-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0183-1400.webp
new file mode 100644
index 00000000..1a053df2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0183-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0183-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0183-480.webp
new file mode 100644
index 00000000..c5379c87
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0183-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0183-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0183-800.webp
new file mode 100644
index 00000000..1a053df2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0183-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0184-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0184-1400.webp
new file mode 100644
index 00000000..1559cf87
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0184-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0184-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0184-480.webp
new file mode 100644
index 00000000..4512887f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0184-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0184-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0184-800.webp
new file mode 100644
index 00000000..1559cf87
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0184-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0185-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0185-1400.webp
new file mode 100644
index 00000000..c91b9734
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0185-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0185-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0185-480.webp
new file mode 100644
index 00000000..c80dd786
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0185-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0185-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0185-800.webp
new file mode 100644
index 00000000..c91b9734
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0185-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0186-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0186-1400.webp
new file mode 100644
index 00000000..4547e0ac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0186-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0186-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0186-480.webp
new file mode 100644
index 00000000..4f6b259a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0186-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0186-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0186-800.webp
new file mode 100644
index 00000000..4547e0ac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0186-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0187-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0187-1400.webp
new file mode 100644
index 00000000..8c6ce9af
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0187-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0187-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0187-480.webp
new file mode 100644
index 00000000..b5125798
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0187-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0187-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0187-800.webp
new file mode 100644
index 00000000..8c6ce9af
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0187-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0188-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0188-1400.webp
new file mode 100644
index 00000000..8e520869
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0188-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0188-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0188-480.webp
new file mode 100644
index 00000000..1aeabb12
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0188-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0188-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0188-800.webp
new file mode 100644
index 00000000..8e520869
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0188-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0189-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0189-1400.webp
new file mode 100644
index 00000000..f5b9e182
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0189-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0189-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0189-480.webp
new file mode 100644
index 00000000..57f1e1d5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0189-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0189-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0189-800.webp
new file mode 100644
index 00000000..f5b9e182
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0189-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0190-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0190-1400.webp
new file mode 100644
index 00000000..66fcdfc8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0190-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0190-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0190-480.webp
new file mode 100644
index 00000000..e64031fa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0190-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0190-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0190-800.webp
new file mode 100644
index 00000000..66fcdfc8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0190-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0191-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0191-1400.webp
new file mode 100644
index 00000000..94008c8a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0191-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0191-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0191-480.webp
new file mode 100644
index 00000000..8cf4ff41
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0191-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0191-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0191-800.webp
new file mode 100644
index 00000000..94008c8a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0191-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0192-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0192-1400.webp
new file mode 100644
index 00000000..04b21a1e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0192-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0192-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0192-480.webp
new file mode 100644
index 00000000..6b271380
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0192-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0192-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0192-800.webp
new file mode 100644
index 00000000..04b21a1e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0192-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0193-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0193-1400.webp
new file mode 100644
index 00000000..226c8fb4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0193-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0193-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0193-480.webp
new file mode 100644
index 00000000..70f36c96
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0193-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0193-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0193-800.webp
new file mode 100644
index 00000000..226c8fb4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0193-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0194-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0194-1400.webp
new file mode 100644
index 00000000..984e2f7b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0194-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0194-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0194-480.webp
new file mode 100644
index 00000000..63562a92
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0194-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0194-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0194-800.webp
new file mode 100644
index 00000000..984e2f7b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0194-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0195-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0195-1400.webp
new file mode 100644
index 00000000..fbe937e2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0195-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0195-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0195-480.webp
new file mode 100644
index 00000000..92e432dc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0195-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0195-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0195-800.webp
new file mode 100644
index 00000000..fbe937e2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0195-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0196-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0196-1400.webp
new file mode 100644
index 00000000..ec1355c8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0196-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0196-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0196-480.webp
new file mode 100644
index 00000000..fe0b9380
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0196-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0196-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0196-800.webp
new file mode 100644
index 00000000..ec1355c8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0196-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0197-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0197-1400.webp
new file mode 100644
index 00000000..971732b8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0197-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0197-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0197-480.webp
new file mode 100644
index 00000000..a432e93d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0197-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0197-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0197-800.webp
new file mode 100644
index 00000000..971732b8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0197-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0198-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0198-1400.webp
new file mode 100644
index 00000000..83603e60
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0198-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0198-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0198-480.webp
new file mode 100644
index 00000000..51c5f383
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0198-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0198-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0198-800.webp
new file mode 100644
index 00000000..83603e60
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0198-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0199-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0199-1400.webp
new file mode 100644
index 00000000..a5d5757a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0199-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0199-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0199-480.webp
new file mode 100644
index 00000000..b77a6ee2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0199-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0199-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0199-800.webp
new file mode 100644
index 00000000..a5d5757a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0199-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0200-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0200-1400.webp
new file mode 100644
index 00000000..b7277db6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0200-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0200-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0200-480.webp
new file mode 100644
index 00000000..748ccd12
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0200-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0200-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0200-800.webp
new file mode 100644
index 00000000..b7277db6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0200-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0201-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0201-1400.webp
new file mode 100644
index 00000000..9b15e688
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0201-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0201-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0201-480.webp
new file mode 100644
index 00000000..1a473bbe
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0201-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0201-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0201-800.webp
new file mode 100644
index 00000000..9b15e688
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0201-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0202-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0202-1400.webp
new file mode 100644
index 00000000..6d1b5365
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0202-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0202-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0202-480.webp
new file mode 100644
index 00000000..4372a353
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0202-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0202-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0202-800.webp
new file mode 100644
index 00000000..6d1b5365
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0202-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0203-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0203-1400.webp
new file mode 100644
index 00000000..2c9887ec
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0203-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0203-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0203-480.webp
new file mode 100644
index 00000000..275db790
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0203-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0203-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0203-800.webp
new file mode 100644
index 00000000..2c9887ec
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0203-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0204-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0204-1400.webp
new file mode 100644
index 00000000..1e61679d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0204-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0204-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0204-480.webp
new file mode 100644
index 00000000..8dfd2f53
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0204-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0204-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0204-800.webp
new file mode 100644
index 00000000..1e61679d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0204-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0205-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0205-1400.webp
new file mode 100644
index 00000000..4d4d5149
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0205-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0205-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0205-480.webp
new file mode 100644
index 00000000..e5d8ed8d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0205-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0205-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0205-800.webp
new file mode 100644
index 00000000..4d4d5149
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0205-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0206-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0206-1400.webp
new file mode 100644
index 00000000..02a29485
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0206-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0206-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0206-480.webp
new file mode 100644
index 00000000..4495e099
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0206-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0206-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0206-800.webp
new file mode 100644
index 00000000..02a29485
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0206-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0207-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0207-1400.webp
new file mode 100644
index 00000000..53a4d1b4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0207-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0207-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0207-480.webp
new file mode 100644
index 00000000..ba138c16
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0207-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0207-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0207-800.webp
new file mode 100644
index 00000000..53a4d1b4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0207-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0208-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0208-1400.webp
new file mode 100644
index 00000000..f7a83f2d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0208-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0208-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0208-480.webp
new file mode 100644
index 00000000..da26d2d2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0208-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0208-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0208-800.webp
new file mode 100644
index 00000000..f7a83f2d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0208-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0209-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0209-1400.webp
new file mode 100644
index 00000000..2b2d366a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0209-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0209-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0209-480.webp
new file mode 100644
index 00000000..2aae3aaa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0209-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0209-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0209-800.webp
new file mode 100644
index 00000000..2b2d366a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0209-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0210-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0210-1400.webp
new file mode 100644
index 00000000..dbcd885c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0210-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0210-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0210-480.webp
new file mode 100644
index 00000000..5fb9939b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0210-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0210-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0210-800.webp
new file mode 100644
index 00000000..dbcd885c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0210-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0211-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0211-1400.webp
new file mode 100644
index 00000000..b929c766
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0211-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0211-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0211-480.webp
new file mode 100644
index 00000000..45d56c99
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0211-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0211-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0211-800.webp
new file mode 100644
index 00000000..b929c766
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0211-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0212-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0212-1400.webp
new file mode 100644
index 00000000..1de2c33f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0212-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0212-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0212-480.webp
new file mode 100644
index 00000000..2d1daf17
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0212-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0212-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0212-800.webp
new file mode 100644
index 00000000..1de2c33f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0212-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0213-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0213-1400.webp
new file mode 100644
index 00000000..748c81df
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0213-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0213-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0213-480.webp
new file mode 100644
index 00000000..06cf0934
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0213-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0213-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0213-800.webp
new file mode 100644
index 00000000..748c81df
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0213-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0214-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0214-1400.webp
new file mode 100644
index 00000000..df23fa88
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0214-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0214-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0214-480.webp
new file mode 100644
index 00000000..034e8968
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0214-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0214-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0214-800.webp
new file mode 100644
index 00000000..df23fa88
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0214-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0215-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0215-1400.webp
new file mode 100644
index 00000000..93c0bd24
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0215-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0215-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0215-480.webp
new file mode 100644
index 00000000..ceb1e57c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0215-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0215-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0215-800.webp
new file mode 100644
index 00000000..93c0bd24
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0215-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0216-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0216-1400.webp
new file mode 100644
index 00000000..81b578f7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0216-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0216-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0216-480.webp
new file mode 100644
index 00000000..9fa81921
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0216-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0216-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0216-800.webp
new file mode 100644
index 00000000..81b578f7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0216-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0217-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0217-1400.webp
new file mode 100644
index 00000000..2ec3e83b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0217-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0217-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0217-480.webp
new file mode 100644
index 00000000..4531dbe7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0217-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0217-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0217-800.webp
new file mode 100644
index 00000000..2ec3e83b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0217-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0218-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0218-1400.webp
new file mode 100644
index 00000000..224b70b1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0218-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0218-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0218-480.webp
new file mode 100644
index 00000000..b8728a4b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0218-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0218-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0218-800.webp
new file mode 100644
index 00000000..224b70b1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0218-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0219-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0219-1400.webp
new file mode 100644
index 00000000..b3f9c8c7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0219-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0219-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0219-480.webp
new file mode 100644
index 00000000..c501e1ee
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0219-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0219-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0219-800.webp
new file mode 100644
index 00000000..b3f9c8c7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0219-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0220-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0220-1400.webp
new file mode 100644
index 00000000..80eb0d0d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0220-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0220-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0220-480.webp
new file mode 100644
index 00000000..85aa2db9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0220-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0220-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0220-800.webp
new file mode 100644
index 00000000..80eb0d0d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0220-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0221-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0221-1400.webp
new file mode 100644
index 00000000..975fdae9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0221-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0221-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0221-480.webp
new file mode 100644
index 00000000..a0a371a4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0221-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0221-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0221-800.webp
new file mode 100644
index 00000000..975fdae9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0221-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0222-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0222-1400.webp
new file mode 100644
index 00000000..f28190ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0222-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0222-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0222-480.webp
new file mode 100644
index 00000000..92dca156
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0222-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0222-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0222-800.webp
new file mode 100644
index 00000000..f28190ad
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0222-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0223-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0223-1400.webp
new file mode 100644
index 00000000..9afcc8de
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0223-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0223-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0223-480.webp
new file mode 100644
index 00000000..4fb5ca3b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0223-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0223-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0223-800.webp
new file mode 100644
index 00000000..9afcc8de
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0223-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0224-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0224-1400.webp
new file mode 100644
index 00000000..93182e09
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0224-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0224-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0224-480.webp
new file mode 100644
index 00000000..3f242a6c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0224-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0224-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0224-800.webp
new file mode 100644
index 00000000..93182e09
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0224-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0225-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0225-1400.webp
new file mode 100644
index 00000000..b2a50607
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0225-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0225-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0225-480.webp
new file mode 100644
index 00000000..89dcb70b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0225-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0225-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0225-800.webp
new file mode 100644
index 00000000..b2a50607
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0225-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0226-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0226-1400.webp
new file mode 100644
index 00000000..f4eed65f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0226-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0226-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0226-480.webp
new file mode 100644
index 00000000..5b4d5ff9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0226-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0226-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0226-800.webp
new file mode 100644
index 00000000..f4eed65f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0226-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0227-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0227-1400.webp
new file mode 100644
index 00000000..526f5248
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0227-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0227-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0227-480.webp
new file mode 100644
index 00000000..686b6f27
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0227-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0227-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0227-800.webp
new file mode 100644
index 00000000..526f5248
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0227-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0228-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0228-1400.webp
new file mode 100644
index 00000000..10277675
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0228-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0228-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0228-480.webp
new file mode 100644
index 00000000..da43eb61
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0228-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0228-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0228-800.webp
new file mode 100644
index 00000000..10277675
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0228-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0229-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0229-1400.webp
new file mode 100644
index 00000000..31ae98b2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0229-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0229-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0229-480.webp
new file mode 100644
index 00000000..cb9f9947
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0229-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0229-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0229-800.webp
new file mode 100644
index 00000000..31ae98b2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0229-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0230-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0230-1400.webp
new file mode 100644
index 00000000..db28271e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0230-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0230-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0230-480.webp
new file mode 100644
index 00000000..5751d741
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0230-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0230-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0230-800.webp
new file mode 100644
index 00000000..db28271e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0230-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0231-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0231-1400.webp
new file mode 100644
index 00000000..ad5649ac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0231-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0231-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0231-480.webp
new file mode 100644
index 00000000..e6d1f5ae
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0231-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0231-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0231-800.webp
new file mode 100644
index 00000000..ad5649ac
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0231-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0232-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0232-1400.webp
new file mode 100644
index 00000000..ad3dfe64
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0232-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0232-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0232-480.webp
new file mode 100644
index 00000000..f84bedb7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0232-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0232-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0232-800.webp
new file mode 100644
index 00000000..ad3dfe64
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0232-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0233-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0233-1400.webp
new file mode 100644
index 00000000..3da19269
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0233-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0233-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0233-480.webp
new file mode 100644
index 00000000..e4beb230
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0233-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0233-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0233-800.webp
new file mode 100644
index 00000000..3da19269
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0233-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0234-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0234-1400.webp
new file mode 100644
index 00000000..f20b6793
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0234-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0234-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0234-480.webp
new file mode 100644
index 00000000..d5ed66ca
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0234-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0234-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0234-800.webp
new file mode 100644
index 00000000..f20b6793
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0234-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0235-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0235-1400.webp
new file mode 100644
index 00000000..bf9792f4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0235-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0235-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0235-480.webp
new file mode 100644
index 00000000..32cd57fa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0235-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0235-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0235-800.webp
new file mode 100644
index 00000000..bf9792f4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0235-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0236-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0236-1400.webp
new file mode 100644
index 00000000..fd4e7e77
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0236-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0236-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0236-480.webp
new file mode 100644
index 00000000..1ac81eed
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0236-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0236-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0236-800.webp
new file mode 100644
index 00000000..fd4e7e77
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0236-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0237-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0237-1400.webp
new file mode 100644
index 00000000..be59b783
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0237-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0237-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0237-480.webp
new file mode 100644
index 00000000..58c55104
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0237-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0237-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0237-800.webp
new file mode 100644
index 00000000..be59b783
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0237-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0238-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0238-1400.webp
new file mode 100644
index 00000000..a9bbc746
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0238-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0238-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0238-480.webp
new file mode 100644
index 00000000..4d37d60b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0238-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0238-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0238-800.webp
new file mode 100644
index 00000000..a9bbc746
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0238-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0239-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0239-1400.webp
new file mode 100644
index 00000000..5f2c1bb3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0239-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0239-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0239-480.webp
new file mode 100644
index 00000000..a37d2f4d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0239-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0239-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0239-800.webp
new file mode 100644
index 00000000..5f2c1bb3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0239-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0240-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0240-1400.webp
new file mode 100644
index 00000000..91c3e39c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0240-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0240-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0240-480.webp
new file mode 100644
index 00000000..72866e34
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0240-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0240-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0240-800.webp
new file mode 100644
index 00000000..91c3e39c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0240-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0241-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0241-1400.webp
new file mode 100644
index 00000000..2bfd4453
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0241-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0241-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0241-480.webp
new file mode 100644
index 00000000..acceb4a1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0241-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0241-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0241-800.webp
new file mode 100644
index 00000000..2bfd4453
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0241-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0242-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0242-1400.webp
new file mode 100644
index 00000000..21798665
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0242-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0242-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0242-480.webp
new file mode 100644
index 00000000..700dc9e5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0242-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0242-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0242-800.webp
new file mode 100644
index 00000000..21798665
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0242-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0243-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0243-1400.webp
new file mode 100644
index 00000000..03f57c1b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0243-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0243-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0243-480.webp
new file mode 100644
index 00000000..859b84c5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0243-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0243-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0243-800.webp
new file mode 100644
index 00000000..03f57c1b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0243-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0244-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0244-1400.webp
new file mode 100644
index 00000000..e4dc2473
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0244-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0244-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0244-480.webp
new file mode 100644
index 00000000..9ff87592
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0244-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0244-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0244-800.webp
new file mode 100644
index 00000000..e4dc2473
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0244-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0245-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0245-1400.webp
new file mode 100644
index 00000000..06f845d9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0245-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0245-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0245-480.webp
new file mode 100644
index 00000000..fe7dc4fa
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0245-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0245-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0245-800.webp
new file mode 100644
index 00000000..06f845d9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0245-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0246-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0246-1400.webp
new file mode 100644
index 00000000..e87cb9d1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0246-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0246-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0246-480.webp
new file mode 100644
index 00000000..e28a2d03
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0246-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0246-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0246-800.webp
new file mode 100644
index 00000000..e87cb9d1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0246-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0247-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0247-1400.webp
new file mode 100644
index 00000000..acde08ed
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0247-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0247-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0247-480.webp
new file mode 100644
index 00000000..3c89f663
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0247-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0247-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0247-800.webp
new file mode 100644
index 00000000..acde08ed
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0247-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0248-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0248-1400.webp
new file mode 100644
index 00000000..4e3fc4d4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0248-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0248-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0248-480.webp
new file mode 100644
index 00000000..616fd0db
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0248-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0248-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0248-800.webp
new file mode 100644
index 00000000..4e3fc4d4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0248-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0249-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0249-1400.webp
new file mode 100644
index 00000000..8fa60a7f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0249-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0249-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0249-480.webp
new file mode 100644
index 00000000..d80c76f9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0249-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0249-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0249-800.webp
new file mode 100644
index 00000000..8fa60a7f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0249-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0250-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0250-1400.webp
new file mode 100644
index 00000000..d5044966
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0250-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0250-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0250-480.webp
new file mode 100644
index 00000000..1a61d2c4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0250-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0250-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0250-800.webp
new file mode 100644
index 00000000..d5044966
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0250-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0251-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0251-1400.webp
new file mode 100644
index 00000000..7afc3e81
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0251-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0251-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0251-480.webp
new file mode 100644
index 00000000..1f2f176c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0251-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0251-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0251-800.webp
new file mode 100644
index 00000000..7afc3e81
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0251-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0252-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0252-1400.webp
new file mode 100644
index 00000000..b2233fe0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0252-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0252-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0252-480.webp
new file mode 100644
index 00000000..d4e00373
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0252-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0252-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0252-800.webp
new file mode 100644
index 00000000..b2233fe0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0252-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0253-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0253-1400.webp
new file mode 100644
index 00000000..6870a1b8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0253-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0253-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0253-480.webp
new file mode 100644
index 00000000..990f7ea2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0253-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0253-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0253-800.webp
new file mode 100644
index 00000000..6870a1b8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0253-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0254-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0254-1400.webp
new file mode 100644
index 00000000..6ac12205
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0254-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0254-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0254-480.webp
new file mode 100644
index 00000000..eaa60d7a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0254-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0254-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0254-800.webp
new file mode 100644
index 00000000..6ac12205
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0254-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0255-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0255-1400.webp
new file mode 100644
index 00000000..99b6173f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0255-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0255-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0255-480.webp
new file mode 100644
index 00000000..3982fa33
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0255-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0255-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0255-800.webp
new file mode 100644
index 00000000..99b6173f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0255-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0256-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0256-1400.webp
new file mode 100644
index 00000000..420a3e1b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0256-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0256-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0256-480.webp
new file mode 100644
index 00000000..1067d48a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0256-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0256-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0256-800.webp
new file mode 100644
index 00000000..420a3e1b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0256-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0257-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0257-1400.webp
new file mode 100644
index 00000000..2db16a03
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0257-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0257-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0257-480.webp
new file mode 100644
index 00000000..0a1d78a7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0257-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0257-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0257-800.webp
new file mode 100644
index 00000000..2db16a03
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0257-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0258-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0258-1400.webp
new file mode 100644
index 00000000..e1b66b24
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0258-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0258-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0258-480.webp
new file mode 100644
index 00000000..b740afdd
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0258-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0258-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0258-800.webp
new file mode 100644
index 00000000..e1b66b24
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0258-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0259-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0259-1400.webp
new file mode 100644
index 00000000..0b03194a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0259-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0259-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0259-480.webp
new file mode 100644
index 00000000..19fe4d85
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0259-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0259-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0259-800.webp
new file mode 100644
index 00000000..0b03194a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0259-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0260-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0260-1400.webp
new file mode 100644
index 00000000..496bb7f1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0260-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0260-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0260-480.webp
new file mode 100644
index 00000000..a1e8e1f2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0260-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0260-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0260-800.webp
new file mode 100644
index 00000000..496bb7f1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0260-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0261-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0261-1400.webp
new file mode 100644
index 00000000..5fbba10d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0261-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0261-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0261-480.webp
new file mode 100644
index 00000000..e0db6d24
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0261-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0261-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0261-800.webp
new file mode 100644
index 00000000..5fbba10d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0261-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0262-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0262-1400.webp
new file mode 100644
index 00000000..01f7bb05
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0262-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0262-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0262-480.webp
new file mode 100644
index 00000000..64270958
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0262-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0262-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0262-800.webp
new file mode 100644
index 00000000..01f7bb05
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0262-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0263-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0263-1400.webp
new file mode 100644
index 00000000..80f2d8cf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0263-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0263-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0263-480.webp
new file mode 100644
index 00000000..e292927c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0263-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0263-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0263-800.webp
new file mode 100644
index 00000000..80f2d8cf
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0263-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0264-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0264-1400.webp
new file mode 100644
index 00000000..66e8f352
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0264-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0264-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0264-480.webp
new file mode 100644
index 00000000..8dbf7d97
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0264-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0264-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0264-800.webp
new file mode 100644
index 00000000..66e8f352
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0264-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0265-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0265-1400.webp
new file mode 100644
index 00000000..1fcbfa43
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0265-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0265-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0265-480.webp
new file mode 100644
index 00000000..c6a4d39b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0265-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0265-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0265-800.webp
new file mode 100644
index 00000000..1fcbfa43
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0265-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0266-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0266-1400.webp
new file mode 100644
index 00000000..9b811879
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0266-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0266-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0266-480.webp
new file mode 100644
index 00000000..8572379e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0266-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0266-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0266-800.webp
new file mode 100644
index 00000000..9b811879
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0266-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0267-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0267-1400.webp
new file mode 100644
index 00000000..70187742
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0267-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0267-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0267-480.webp
new file mode 100644
index 00000000..8341bf99
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0267-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0267-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0267-800.webp
new file mode 100644
index 00000000..70187742
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0267-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0268-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0268-1400.webp
new file mode 100644
index 00000000..95674794
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0268-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0268-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0268-480.webp
new file mode 100644
index 00000000..f9a0eb6f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0268-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0268-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0268-800.webp
new file mode 100644
index 00000000..95674794
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0268-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0269-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0269-1400.webp
new file mode 100644
index 00000000..88a35c9d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0269-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0269-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0269-480.webp
new file mode 100644
index 00000000..ab71eb34
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0269-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0269-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0269-800.webp
new file mode 100644
index 00000000..88a35c9d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0269-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0270-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0270-1400.webp
new file mode 100644
index 00000000..518c2b2b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0270-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0270-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0270-480.webp
new file mode 100644
index 00000000..5959923e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0270-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0270-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0270-800.webp
new file mode 100644
index 00000000..518c2b2b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0270-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0271-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0271-1400.webp
new file mode 100644
index 00000000..3502bd3b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0271-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0271-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0271-480.webp
new file mode 100644
index 00000000..0169da03
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0271-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0271-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0271-800.webp
new file mode 100644
index 00000000..3502bd3b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0271-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0272-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0272-1400.webp
new file mode 100644
index 00000000..baf4100f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0272-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0272-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0272-480.webp
new file mode 100644
index 00000000..88e7310d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0272-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0272-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0272-800.webp
new file mode 100644
index 00000000..baf4100f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0272-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0273-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0273-1400.webp
new file mode 100644
index 00000000..4cfd8609
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0273-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0273-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0273-480.webp
new file mode 100644
index 00000000..dce2f8b0
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0273-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0273-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0273-800.webp
new file mode 100644
index 00000000..4cfd8609
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0273-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0274-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0274-1400.webp
new file mode 100644
index 00000000..ce92ef4e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0274-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0274-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0274-480.webp
new file mode 100644
index 00000000..fda791e5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0274-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0274-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0274-800.webp
new file mode 100644
index 00000000..ce92ef4e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0274-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0275-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0275-1400.webp
new file mode 100644
index 00000000..bdbcb02d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0275-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0275-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0275-480.webp
new file mode 100644
index 00000000..badbe02c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0275-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0275-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0275-800.webp
new file mode 100644
index 00000000..bdbcb02d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0275-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0276-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0276-1400.webp
new file mode 100644
index 00000000..97fe9380
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0276-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0276-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0276-480.webp
new file mode 100644
index 00000000..e50e2969
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0276-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0276-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0276-800.webp
new file mode 100644
index 00000000..97fe9380
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0276-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0277-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0277-1400.webp
new file mode 100644
index 00000000..4270826e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0277-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0277-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0277-480.webp
new file mode 100644
index 00000000..dffefe8d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0277-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0277-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0277-800.webp
new file mode 100644
index 00000000..4270826e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0277-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0278-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0278-1400.webp
new file mode 100644
index 00000000..b1ed9db2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0278-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0278-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0278-480.webp
new file mode 100644
index 00000000..989a290c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0278-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0278-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0278-800.webp
new file mode 100644
index 00000000..b1ed9db2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0278-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0279-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0279-1400.webp
new file mode 100644
index 00000000..b81953a8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0279-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0279-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0279-480.webp
new file mode 100644
index 00000000..d75dd633
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0279-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0279-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0279-800.webp
new file mode 100644
index 00000000..b81953a8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0279-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0280-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0280-1400.webp
new file mode 100644
index 00000000..8a683008
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0280-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0280-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0280-480.webp
new file mode 100644
index 00000000..0ca6daee
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0280-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0280-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0280-800.webp
new file mode 100644
index 00000000..8a683008
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0280-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0281-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0281-1400.webp
new file mode 100644
index 00000000..cfda87bb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0281-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0281-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0281-480.webp
new file mode 100644
index 00000000..c90b76a3
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0281-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0281-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0281-800.webp
new file mode 100644
index 00000000..cfda87bb
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0281-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0282-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0282-1400.webp
new file mode 100644
index 00000000..8db54fc7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0282-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0282-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0282-480.webp
new file mode 100644
index 00000000..4dff6b72
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0282-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0282-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0282-800.webp
new file mode 100644
index 00000000..8db54fc7
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0282-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0283-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0283-1400.webp
new file mode 100644
index 00000000..d69fb0fc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0283-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0283-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0283-480.webp
new file mode 100644
index 00000000..e2da34e5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0283-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0283-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0283-800.webp
new file mode 100644
index 00000000..d69fb0fc
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0283-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0284-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0284-1400.webp
new file mode 100644
index 00000000..6482b833
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0284-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0284-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0284-480.webp
new file mode 100644
index 00000000..f2d36a9d
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0284-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0284-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0284-800.webp
new file mode 100644
index 00000000..6482b833
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0284-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0285-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0285-1400.webp
new file mode 100644
index 00000000..57ccc060
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0285-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0285-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0285-480.webp
new file mode 100644
index 00000000..73524218
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0285-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0285-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0285-800.webp
new file mode 100644
index 00000000..57ccc060
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0285-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0286-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0286-1400.webp
new file mode 100644
index 00000000..a5e13ca2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0286-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0286-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0286-480.webp
new file mode 100644
index 00000000..3777dfb5
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0286-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0286-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0286-800.webp
new file mode 100644
index 00000000..a5e13ca2
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0286-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0287-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0287-1400.webp
new file mode 100644
index 00000000..167ed626
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0287-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0287-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0287-480.webp
new file mode 100644
index 00000000..538bc609
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0287-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0287-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0287-800.webp
new file mode 100644
index 00000000..167ed626
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0287-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0288-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0288-1400.webp
new file mode 100644
index 00000000..5841d7b4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0288-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0288-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0288-480.webp
new file mode 100644
index 00000000..93fbc6d8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0288-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0288-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0288-800.webp
new file mode 100644
index 00000000..5841d7b4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0288-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0289-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0289-1400.webp
new file mode 100644
index 00000000..2198909a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0289-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0289-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0289-480.webp
new file mode 100644
index 00000000..6fd01a1c
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0289-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0289-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0289-800.webp
new file mode 100644
index 00000000..2198909a
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0289-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0290-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0290-1400.webp
new file mode 100644
index 00000000..7c8afe5f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0290-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0290-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0290-480.webp
new file mode 100644
index 00000000..63e4aef1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0290-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0290-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0290-800.webp
new file mode 100644
index 00000000..7c8afe5f
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0290-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0291-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0291-1400.webp
new file mode 100644
index 00000000..c1364633
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0291-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0291-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0291-480.webp
new file mode 100644
index 00000000..ff5800c9
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0291-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0291-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0291-800.webp
new file mode 100644
index 00000000..c1364633
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0291-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0292-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0292-1400.webp
new file mode 100644
index 00000000..df1b04f4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0292-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0292-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0292-480.webp
new file mode 100644
index 00000000..111527d1
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0292-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0292-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0292-800.webp
new file mode 100644
index 00000000..df1b04f4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0292-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0293-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0293-1400.webp
new file mode 100644
index 00000000..43730d61
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0293-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0293-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0293-480.webp
new file mode 100644
index 00000000..1ca0f390
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0293-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0293-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0293-800.webp
new file mode 100644
index 00000000..43730d61
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0293-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0294-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0294-1400.webp
new file mode 100644
index 00000000..ceaa0f54
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0294-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0294-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0294-480.webp
new file mode 100644
index 00000000..6ac9ed4b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0294-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0294-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0294-800.webp
new file mode 100644
index 00000000..ceaa0f54
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0294-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0295-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0295-1400.webp
new file mode 100644
index 00000000..bbef57b6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0295-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0295-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0295-480.webp
new file mode 100644
index 00000000..9b60e145
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0295-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0295-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0295-800.webp
new file mode 100644
index 00000000..bbef57b6
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0295-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0296-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0296-1400.webp
new file mode 100644
index 00000000..7512c8d8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0296-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0296-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0296-480.webp
new file mode 100644
index 00000000..bb50cdea
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0296-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0296-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0296-800.webp
new file mode 100644
index 00000000..7512c8d8
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0296-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0297-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0297-1400.webp
new file mode 100644
index 00000000..fa00dd04
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0297-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0297-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0297-480.webp
new file mode 100644
index 00000000..71ba9b5b
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0297-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0297-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0297-800.webp
new file mode 100644
index 00000000..fa00dd04
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0297-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0298-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0298-1400.webp
new file mode 100644
index 00000000..089a2af4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0298-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0298-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0298-480.webp
new file mode 100644
index 00000000..7f024db4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0298-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0298-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0298-800.webp
new file mode 100644
index 00000000..089a2af4
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0298-800.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0299-1400.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0299-1400.webp
new file mode 100644
index 00000000..d343c2da
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0299-1400.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0299-480.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0299-480.webp
new file mode 100644
index 00000000..216b822e
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0299-480.webp differ
diff --git a/assets/img/2023-11-01-latent-interpolation/variance/img_0299-800.webp b/assets/img/2023-11-01-latent-interpolation/variance/img_0299-800.webp
new file mode 100644
index 00000000..d343c2da
Binary files /dev/null and b/assets/img/2023-11-01-latent-interpolation/variance/img_0299-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-1400.webp
new file mode 100644
index 00000000..ca99e199
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-480.webp b/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-480.webp
new file mode 100644
index 00000000..7ac2cb2e
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-800.webp b/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-800.webp
new file mode 100644
index 00000000..ca99e199
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/downstream-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/downstream-1400.webp
new file mode 100644
index 00000000..7e5e1246
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/downstream-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/downstream-480.webp b/assets/img/2023-11-02-guided-transfer-learning/downstream-480.webp
new file mode 100644
index 00000000..b4c6f438
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/downstream-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/downstream-800.webp b/assets/img/2023-11-02-guided-transfer-learning/downstream-800.webp
new file mode 100644
index 00000000..7e5e1246
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/downstream-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-1400.webp
new file mode 100644
index 00000000..c26ca3c9
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-480.webp b/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-480.webp
new file mode 100644
index 00000000..db1c067d
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-800.webp b/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-800.webp
new file mode 100644
index 00000000..c26ca3c9
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-1400.webp
new file mode 100644
index 00000000..b707da43
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-480.webp b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-480.webp
new file mode 100644
index 00000000..f5d521b9
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-800.webp b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-800.webp
new file mode 100644
index 00000000..b707da43
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-1400.webp
new file mode 100644
index 00000000..853015ab
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-480.webp b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-480.webp
new file mode 100644
index 00000000..c50683f9
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-800.webp b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-800.webp
new file mode 100644
index 00000000..853015ab
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-1400.webp
new file mode 100644
index 00000000..849407e4
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-480.webp b/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-480.webp
new file mode 100644
index 00000000..aa843307
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-800.webp b/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-800.webp
new file mode 100644
index 00000000..849407e4
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-1400.webp
new file mode 100644
index 00000000..55601b2c
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-480.webp b/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-480.webp
new file mode 100644
index 00000000..d0db24ef
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-800.webp b/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-800.webp
new file mode 100644
index 00000000..55601b2c
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-1400.webp
new file mode 100644
index 00000000..e3a20007
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-480.webp b/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-480.webp
new file mode 100644
index 00000000..4537bf7a
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-800.webp b/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-800.webp
new file mode 100644
index 00000000..e3a20007
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-1400.webp
new file mode 100644
index 00000000..514bb74e
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-480.webp b/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-480.webp
new file mode 100644
index 00000000..738de601
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-800.webp b/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-800.webp
new file mode 100644
index 00000000..514bb74e
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/iclr-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/iclr-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/iclr-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/iclr-480.webp b/assets/img/2023-11-02-guided-transfer-learning/iclr-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/iclr-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/iclr-800.webp b/assets/img/2023-11-02-guided-transfer-learning/iclr-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/iclr-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/omniglot-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/omniglot-1400.webp
new file mode 100644
index 00000000..644b1973
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/omniglot-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/omniglot-480.webp b/assets/img/2023-11-02-guided-transfer-learning/omniglot-480.webp
new file mode 100644
index 00000000..85306662
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/omniglot-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/omniglot-800.webp b/assets/img/2023-11-02-guided-transfer-learning/omniglot-800.webp
new file mode 100644
index 00000000..644b1973
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/omniglot-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/omniglot2-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/omniglot2-1400.webp
new file mode 100644
index 00000000..36260bdf
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/omniglot2-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/omniglot2-480.webp b/assets/img/2023-11-02-guided-transfer-learning/omniglot2-480.webp
new file mode 100644
index 00000000..57b48644
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/omniglot2-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/omniglot2-800.webp b/assets/img/2023-11-02-guided-transfer-learning/omniglot2-800.webp
new file mode 100644
index 00000000..36260bdf
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/omniglot2-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/paperresults1-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/paperresults1-1400.webp
new file mode 100644
index 00000000..abcb894e
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/paperresults1-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/paperresults1-480.webp b/assets/img/2023-11-02-guided-transfer-learning/paperresults1-480.webp
new file mode 100644
index 00000000..1f1d24cf
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/paperresults1-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/paperresults1-800.webp b/assets/img/2023-11-02-guided-transfer-learning/paperresults1-800.webp
new file mode 100644
index 00000000..abcb894e
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/paperresults1-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/paperresults2-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/paperresults2-1400.webp
new file mode 100644
index 00000000..33cb1c9d
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/paperresults2-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/paperresults2-480.webp b/assets/img/2023-11-02-guided-transfer-learning/paperresults2-480.webp
new file mode 100644
index 00000000..dcf17eb2
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/paperresults2-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/paperresults2-800.webp b/assets/img/2023-11-02-guided-transfer-learning/paperresults2-800.webp
new file mode 100644
index 00000000..33cb1c9d
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/paperresults2-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/pipeline-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/pipeline-1400.webp
new file mode 100644
index 00000000..bd7ca00b
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/pipeline-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/pipeline-480.webp b/assets/img/2023-11-02-guided-transfer-learning/pipeline-480.webp
new file mode 100644
index 00000000..e95b03a7
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/pipeline-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/pipeline-800.webp b/assets/img/2023-11-02-guided-transfer-learning/pipeline-800.webp
new file mode 100644
index 00000000..bd7ca00b
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/pipeline-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/results-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/results-1400.webp
new file mode 100644
index 00000000..4d796523
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/results-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/results-480.webp b/assets/img/2023-11-02-guided-transfer-learning/results-480.webp
new file mode 100644
index 00000000..f3fd12d7
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/results-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/results-800.webp b/assets/img/2023-11-02-guided-transfer-learning/results-800.webp
new file mode 100644
index 00000000..4d796523
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/results-800.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/scouting-1400.webp b/assets/img/2023-11-02-guided-transfer-learning/scouting-1400.webp
new file mode 100644
index 00000000..f9e69fbb
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/scouting-1400.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/scouting-480.webp b/assets/img/2023-11-02-guided-transfer-learning/scouting-480.webp
new file mode 100644
index 00000000..c6a3d670
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/scouting-480.webp differ
diff --git a/assets/img/2023-11-02-guided-transfer-learning/scouting-800.webp b/assets/img/2023-11-02-guided-transfer-learning/scouting-800.webp
new file mode 100644
index 00000000..f9e69fbb
Binary files /dev/null and b/assets/img/2023-11-02-guided-transfer-learning/scouting-800.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-1400.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-1400.webp
new file mode 100644
index 00000000..4820914a
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-1400.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-480.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-480.webp
new file mode 100644
index 00000000..e5e156fb
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-480.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-800.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-800.webp
new file mode 100644
index 00000000..4820914a
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-800.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-1400.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-1400.webp
new file mode 100644
index 00000000..62ee7bd5
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-1400.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-480.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-480.webp
new file mode 100644
index 00000000..f3009493
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-480.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-800.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-800.webp
new file mode 100644
index 00000000..62ee7bd5
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-800.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-1400.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-1400.webp
new file mode 100644
index 00000000..3f943cce
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-1400.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-480.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-480.webp
new file mode 100644
index 00000000..f5f7e8be
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-480.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-800.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-800.webp
new file mode 100644
index 00000000..3f943cce
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-800.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-1400.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-1400.webp
new file mode 100644
index 00000000..bccf8d40
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-1400.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-480.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-480.webp
new file mode 100644
index 00000000..4353d235
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-480.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-800.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-800.webp
new file mode 100644
index 00000000..bccf8d40
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-800.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-1400.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-1400.webp
new file mode 100644
index 00000000..c951993c
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-1400.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-480.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-480.webp
new file mode 100644
index 00000000..2d216ce8
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-480.webp differ
diff --git a/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-800.webp b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-800.webp
new file mode 100644
index 00000000..c951993c
Binary files /dev/null and b/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-1400.webp b/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-1400.webp
new file mode 100644
index 00000000..a7107f44
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-480.webp b/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-480.webp
new file mode 100644
index 00000000..7dbe0b01
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-800.webp b/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-800.webp
new file mode 100644
index 00000000..a7107f44
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/cifar_10_example-1400.webp b/assets/img/2023-11-06-attention-regularization/cifar_10_example-1400.webp
new file mode 100644
index 00000000..4c1112b7
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/cifar_10_example-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/cifar_10_example-480.webp b/assets/img/2023-11-06-attention-regularization/cifar_10_example-480.webp
new file mode 100644
index 00000000..446368c7
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/cifar_10_example-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/cifar_10_example-800.webp b/assets/img/2023-11-06-attention-regularization/cifar_10_example-800.webp
new file mode 100644
index 00000000..4c1112b7
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/cifar_10_example-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_accuracy-1400.webp b/assets/img/2023-11-06-attention-regularization/test_accuracy-1400.webp
new file mode 100644
index 00000000..a9788078
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_accuracy-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_accuracy-480.webp b/assets/img/2023-11-06-attention-regularization/test_accuracy-480.webp
new file mode 100644
index 00000000..7f6b2ff4
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_accuracy-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_accuracy-800.webp b/assets/img/2023-11-06-attention-regularization/test_accuracy-800.webp
new file mode 100644
index 00000000..a9788078
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_accuracy-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_accuracy100-1400.webp b/assets/img/2023-11-06-attention-regularization/test_accuracy100-1400.webp
new file mode 100644
index 00000000..761bfb1b
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_accuracy100-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_accuracy100-480.webp b/assets/img/2023-11-06-attention-regularization/test_accuracy100-480.webp
new file mode 100644
index 00000000..e1afb17a
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_accuracy100-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_accuracy100-800.webp b/assets/img/2023-11-06-attention-regularization/test_accuracy100-800.webp
new file mode 100644
index 00000000..761bfb1b
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_accuracy100-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_loss-1400.webp b/assets/img/2023-11-06-attention-regularization/test_loss-1400.webp
new file mode 100644
index 00000000..c3eddc8e
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_loss-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_loss-480.webp b/assets/img/2023-11-06-attention-regularization/test_loss-480.webp
new file mode 100644
index 00000000..4600bb05
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_loss-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_loss-800.webp b/assets/img/2023-11-06-attention-regularization/test_loss-800.webp
new file mode 100644
index 00000000..c3eddc8e
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_loss-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_loss100-1400.webp b/assets/img/2023-11-06-attention-regularization/test_loss100-1400.webp
new file mode 100644
index 00000000..d5c08028
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_loss100-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_loss100-480.webp b/assets/img/2023-11-06-attention-regularization/test_loss100-480.webp
new file mode 100644
index 00000000..c88ad043
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_loss100-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/test_loss100-800.webp b/assets/img/2023-11-06-attention-regularization/test_loss100-800.webp
new file mode 100644
index 00000000..d5c08028
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/test_loss100-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_accuracy-1400.webp b/assets/img/2023-11-06-attention-regularization/training_accuracy-1400.webp
new file mode 100644
index 00000000..e898fbe3
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_accuracy-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_accuracy-480.webp b/assets/img/2023-11-06-attention-regularization/training_accuracy-480.webp
new file mode 100644
index 00000000..67a66225
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_accuracy-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_accuracy-800.webp b/assets/img/2023-11-06-attention-regularization/training_accuracy-800.webp
new file mode 100644
index 00000000..e898fbe3
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_accuracy-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_accuracy100-1400.webp b/assets/img/2023-11-06-attention-regularization/training_accuracy100-1400.webp
new file mode 100644
index 00000000..3eee5386
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_accuracy100-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_accuracy100-480.webp b/assets/img/2023-11-06-attention-regularization/training_accuracy100-480.webp
new file mode 100644
index 00000000..149fbb00
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_accuracy100-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_accuracy100-800.webp b/assets/img/2023-11-06-attention-regularization/training_accuracy100-800.webp
new file mode 100644
index 00000000..3eee5386
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_accuracy100-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_loss-1400.webp b/assets/img/2023-11-06-attention-regularization/training_loss-1400.webp
new file mode 100644
index 00000000..34e54a7b
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_loss-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_loss-480.webp b/assets/img/2023-11-06-attention-regularization/training_loss-480.webp
new file mode 100644
index 00000000..14f1585b
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_loss-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_loss-800.webp b/assets/img/2023-11-06-attention-regularization/training_loss-800.webp
new file mode 100644
index 00000000..34e54a7b
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_loss-800.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_loss100-1400.webp b/assets/img/2023-11-06-attention-regularization/training_loss100-1400.webp
new file mode 100644
index 00000000..9d4e075b
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_loss100-1400.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_loss100-480.webp b/assets/img/2023-11-06-attention-regularization/training_loss100-480.webp
new file mode 100644
index 00000000..1a10e96b
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_loss100-480.webp differ
diff --git a/assets/img/2023-11-06-attention-regularization/training_loss100-800.webp b/assets/img/2023-11-06-attention-regularization/training_loss100-800.webp
new file mode 100644
index 00000000..9d4e075b
Binary files /dev/null and b/assets/img/2023-11-06-attention-regularization/training_loss100-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/brahmi-1400.webp b/assets/img/2023-11-07-Language-Bias/brahmi-1400.webp
new file mode 100644
index 00000000..f750f6a3
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/brahmi-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/brahmi-480.webp b/assets/img/2023-11-07-Language-Bias/brahmi-480.webp
new file mode 100644
index 00000000..104c5038
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/brahmi-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/brahmi-800.webp b/assets/img/2023-11-07-Language-Bias/brahmi-800.webp
new file mode 100644
index 00000000..f750f6a3
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/brahmi-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-1400.webp b/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-1400.webp
new file mode 100644
index 00000000..1c60565c
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-480.webp b/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-480.webp
new file mode 100644
index 00000000..c467531f
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-800.webp b/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-800.webp
new file mode 100644
index 00000000..1c60565c
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/ctc-1400.webp b/assets/img/2023-11-07-Language-Bias/ctc-1400.webp
new file mode 100644
index 00000000..cfd1db5d
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/ctc-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/ctc-480.webp b/assets/img/2023-11-07-Language-Bias/ctc-480.webp
new file mode 100644
index 00000000..d1eda52b
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/ctc-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/ctc-800.webp b/assets/img/2023-11-07-Language-Bias/ctc-800.webp
new file mode 100644
index 00000000..cfd1db5d
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/ctc-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/experiment_drawing-1400.webp b/assets/img/2023-11-07-Language-Bias/experiment_drawing-1400.webp
new file mode 100644
index 00000000..0269cb23
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/experiment_drawing-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/experiment_drawing-480.webp b/assets/img/2023-11-07-Language-Bias/experiment_drawing-480.webp
new file mode 100644
index 00000000..88014e38
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/experiment_drawing-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/experiment_drawing-800.webp b/assets/img/2023-11-07-Language-Bias/experiment_drawing-800.webp
new file mode 100644
index 00000000..0269cb23
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/experiment_drawing-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/gen_spectrogram-1400.webp b/assets/img/2023-11-07-Language-Bias/gen_spectrogram-1400.webp
new file mode 100644
index 00000000..8ab5acc8
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/gen_spectrogram-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/gen_spectrogram-480.webp b/assets/img/2023-11-07-Language-Bias/gen_spectrogram-480.webp
new file mode 100644
index 00000000..767ba505
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/gen_spectrogram-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/gen_spectrogram-800.webp b/assets/img/2023-11-07-Language-Bias/gen_spectrogram-800.webp
new file mode 100644
index 00000000..8ab5acc8
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/gen_spectrogram-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/ought-1400.webp b/assets/img/2023-11-07-Language-Bias/ought-1400.webp
new file mode 100644
index 00000000..3988c001
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/ought-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/ought-480.webp b/assets/img/2023-11-07-Language-Bias/ought-480.webp
new file mode 100644
index 00000000..903da8cc
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/ought-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/ought-800.webp b/assets/img/2023-11-07-Language-Bias/ought-800.webp
new file mode 100644
index 00000000..3988c001
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/ought-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/real_results-1400.webp b/assets/img/2023-11-07-Language-Bias/real_results-1400.webp
new file mode 100644
index 00000000..5a492884
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/real_results-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/real_results-480.webp b/assets/img/2023-11-07-Language-Bias/real_results-480.webp
new file mode 100644
index 00000000..fb0315a1
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/real_results-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/real_results-800.webp b/assets/img/2023-11-07-Language-Bias/real_results-800.webp
new file mode 100644
index 00000000..5a492884
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/real_results-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/reddit-1400.webp b/assets/img/2023-11-07-Language-Bias/reddit-1400.webp
new file mode 100644
index 00000000..153a92b6
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/reddit-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/reddit-480.webp b/assets/img/2023-11-07-Language-Bias/reddit-480.webp
new file mode 100644
index 00000000..1d34cd36
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/reddit-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/reddit-800.webp b/assets/img/2023-11-07-Language-Bias/reddit-800.webp
new file mode 100644
index 00000000..153a92b6
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/reddit-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results-1400.webp b/assets/img/2023-11-07-Language-Bias/results-1400.webp
new file mode 100644
index 00000000..13166610
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results-480.webp b/assets/img/2023-11-07-Language-Bias/results-480.webp
new file mode 100644
index 00000000..49c3355b
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results-800.webp b/assets/img/2023-11-07-Language-Bias/results-800.webp
new file mode 100644
index 00000000..13166610
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results1-1400.webp b/assets/img/2023-11-07-Language-Bias/results1-1400.webp
new file mode 100644
index 00000000..1c9bbe87
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results1-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results1-480.webp b/assets/img/2023-11-07-Language-Bias/results1-480.webp
new file mode 100644
index 00000000..682b2ac1
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results1-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results1-800.webp b/assets/img/2023-11-07-Language-Bias/results1-800.webp
new file mode 100644
index 00000000..1c9bbe87
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results1-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results2-1400.webp b/assets/img/2023-11-07-Language-Bias/results2-1400.webp
new file mode 100644
index 00000000..d516d703
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results2-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results2-480.webp b/assets/img/2023-11-07-Language-Bias/results2-480.webp
new file mode 100644
index 00000000..f8f77871
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results2-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results2-800.webp b/assets/img/2023-11-07-Language-Bias/results2-800.webp
new file mode 100644
index 00000000..d516d703
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results2-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results3-1400.webp b/assets/img/2023-11-07-Language-Bias/results3-1400.webp
new file mode 100644
index 00000000..76303733
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results3-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results3-480.webp b/assets/img/2023-11-07-Language-Bias/results3-480.webp
new file mode 100644
index 00000000..f12710dc
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results3-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/results3-800.webp b/assets/img/2023-11-07-Language-Bias/results3-800.webp
new file mode 100644
index 00000000..76303733
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/results3-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/silent_letters-1400.webp b/assets/img/2023-11-07-Language-Bias/silent_letters-1400.webp
new file mode 100644
index 00000000..602ea812
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/silent_letters-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/silent_letters-480.webp b/assets/img/2023-11-07-Language-Bias/silent_letters-480.webp
new file mode 100644
index 00000000..dce01bca
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/silent_letters-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/silent_letters-800.webp b/assets/img/2023-11-07-Language-Bias/silent_letters-800.webp
new file mode 100644
index 00000000..602ea812
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/silent_letters-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/spanish-1400.webp b/assets/img/2023-11-07-Language-Bias/spanish-1400.webp
new file mode 100644
index 00000000..755b991d
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/spanish-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/spanish-480.webp b/assets/img/2023-11-07-Language-Bias/spanish-480.webp
new file mode 100644
index 00000000..790637aa
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/spanish-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/spanish-800.webp b/assets/img/2023-11-07-Language-Bias/spanish-800.webp
new file mode 100644
index 00000000..755b991d
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/spanish-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/system-1400.webp b/assets/img/2023-11-07-Language-Bias/system-1400.webp
new file mode 100644
index 00000000..a92d5890
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/system-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/system-480.webp b/assets/img/2023-11-07-Language-Bias/system-480.webp
new file mode 100644
index 00000000..a3329e94
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/system-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/system-800.webp b/assets/img/2023-11-07-Language-Bias/system-800.webp
new file mode 100644
index 00000000..a92d5890
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/system-800.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/twitter_meme-1400.webp b/assets/img/2023-11-07-Language-Bias/twitter_meme-1400.webp
new file mode 100644
index 00000000..41349fe0
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/twitter_meme-1400.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/twitter_meme-480.webp b/assets/img/2023-11-07-Language-Bias/twitter_meme-480.webp
new file mode 100644
index 00000000..b1ba3686
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/twitter_meme-480.webp differ
diff --git a/assets/img/2023-11-07-Language-Bias/twitter_meme-800.webp b/assets/img/2023-11-07-Language-Bias/twitter_meme-800.webp
new file mode 100644
index 00000000..41349fe0
Binary files /dev/null and b/assets/img/2023-11-07-Language-Bias/twitter_meme-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-1400.webp
new file mode 100644
index 00000000..85b6fbae
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-480.webp b/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-480.webp
new file mode 100644
index 00000000..60f5ba50
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-800.webp b/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-800.webp
new file mode 100644
index 00000000..85b6fbae
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-1400.webp
new file mode 100644
index 00000000..9c76ceea
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-480.webp b/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-480.webp
new file mode 100644
index 00000000..a6910a23
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-800.webp b/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-800.webp
new file mode 100644
index 00000000..9c76ceea
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline-1400.webp
new file mode 100644
index 00000000..1d9aede8
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline-480.webp
new file mode 100644
index 00000000..293c81dc
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline-800.webp
new file mode 100644
index 00000000..1d9aede8
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 01 - outline-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A-1400.webp
new file mode 100644
index 00000000..61c85111
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A-480.webp
new file mode 100644
index 00000000..a81700d2
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A-800.webp
new file mode 100644
index 00000000..61c85111
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 02 - outline section A-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing-1400.webp
new file mode 100644
index 00000000..24bccc4d
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing-480.webp
new file mode 100644
index 00000000..222c6865
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing-800.webp
new file mode 100644
index 00000000..24bccc4d
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 03 - Local editing-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation-1400.webp
new file mode 100644
index 00000000..e0131a03
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation-480.webp
new file mode 100644
index 00000000..c740f005
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation-800.webp
new file mode 100644
index 00000000..e0131a03
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 04 - Cross replace steps explanation-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation-1400.webp
new file mode 100644
index 00000000..1ff5fe92
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation-480.webp
new file mode 100644
index 00000000..e9f4acd1
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation-800.webp
new file mode 100644
index 00000000..1ff5fe92
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 05 - Sel-attention explanation-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings-1400.webp
new file mode 100644
index 00000000..69a30b5d
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings-480.webp
new file mode 100644
index 00000000..7b208509
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings-800.webp
new file mode 100644
index 00000000..69a30b5d
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 06 - Hypothesis and findings-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces-1400.webp
new file mode 100644
index 00000000..6d4799f4
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces-480.webp
new file mode 100644
index 00000000..6ad8ab30
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces-800.webp
new file mode 100644
index 00000000..6d4799f4
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 07 - Results silhouette parameter k - faces-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape-1400.webp
new file mode 100644
index 00000000..f39532f6
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape-480.webp
new file mode 100644
index 00000000..eabafe01
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape-800.webp
new file mode 100644
index 00000000..f39532f6
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 08 - Results silhouette parameter k - landscape-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces-1400.webp
new file mode 100644
index 00000000..07cc8d01
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces-480.webp
new file mode 100644
index 00000000..ce239f2d
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces-800.webp
new file mode 100644
index 00000000..07cc8d01
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 09 - Results cross replace steps - faces-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape-1400.webp
new file mode 100644
index 00000000..e14abc53
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape-480.webp
new file mode 100644
index 00000000..6f63b352
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape-800.webp
new file mode 100644
index 00000000..e14abc53
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 10 - Results cross replace steps - landscape-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces-1400.webp
new file mode 100644
index 00000000..92b4b418
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces-480.webp
new file mode 100644
index 00000000..e7656628
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces-800.webp
new file mode 100644
index 00000000..92b4b418
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 11 - Results self replace steps - faces-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape-1400.webp
new file mode 100644
index 00000000..26769925
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape-480.webp
new file mode 100644
index 00000000..a945e9eb
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape-800.webp
new file mode 100644
index 00000000..26769925
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 12 - Results self replace steps - landscape-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency-1400.webp
new file mode 100644
index 00000000..e7b77f8f
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency-480.webp
new file mode 100644
index 00000000..86239aff
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency-800.webp
new file mode 100644
index 00000000..e7b77f8f
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 13 - Cycle consistency-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact-1400.webp
new file mode 100644
index 00000000..34d18827
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact-480.webp
new file mode 100644
index 00000000..2ecf0214
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact-800.webp
new file mode 100644
index 00000000..34d18827
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 14 - Cycle consistency - hyperparameter impact-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method-1400.webp
new file mode 100644
index 00000000..8fd9c6e0
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method-480.webp
new file mode 100644
index 00000000..79149fd8
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method-800.webp
new file mode 100644
index 00000000..8fd9c6e0
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 15 - Current vs new method-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output-1400.webp
new file mode 100644
index 00000000..9eb6562a
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output-480.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output-480.webp
new file mode 100644
index 00000000..746c6a5c
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output-800.webp b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output-800.webp
new file mode 100644
index 00000000..9eb6562a
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure 16 - Current vs new method comparision of output-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-1400.webp
new file mode 100644
index 00000000..ef548a46
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-480.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-480.webp
new file mode 100644
index 00000000..5c93b3a0
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-800.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-800.webp
new file mode 100644
index 00000000..ef548a46
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-1400.webp
new file mode 100644
index 00000000..5c35b670
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-480.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-480.webp
new file mode 100644
index 00000000..eb60604d
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-800.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-800.webp
new file mode 100644
index 00000000..5c35b670
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-1400.webp
new file mode 100644
index 00000000..4c56fc01
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-480.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-480.webp
new file mode 100644
index 00000000..25b54c5d
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-800.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-800.webp
new file mode 100644
index 00000000..4c56fc01
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-1400.webp
new file mode 100644
index 00000000..18726401
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-480.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-480.webp
new file mode 100644
index 00000000..617f267f
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-800.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-800.webp
new file mode 100644
index 00000000..18726401
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-800.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-1400.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-1400.webp
new file mode 100644
index 00000000..13d07266
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-1400.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-480.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-480.webp
new file mode 100644
index 00000000..b650562f
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-480.webp differ
diff --git a/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-800.webp b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-800.webp
new file mode 100644
index 00000000..13d07266
Binary files /dev/null and b/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/NH3_rot-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/NH3_rot-1400.webp
new file mode 100644
index 00000000..e2a5fad2
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/NH3_rot-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/NH3_rot-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/NH3_rot-480.webp
new file mode 100644
index 00000000..aa072117
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/NH3_rot-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/NH3_rot-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/NH3_rot-800.webp
new file mode 100644
index 00000000..e2a5fad2
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/NH3_rot-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-1400.webp
new file mode 100644
index 00000000..23a36bc5
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-480.webp
new file mode 100644
index 00000000..590b7073
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-800.webp
new file mode 100644
index 00000000..23a36bc5
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-1400.webp
new file mode 100644
index 00000000..fbb76fbe
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-480.webp
new file mode 100644
index 00000000..89a6d4f4
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-800.webp
new file mode 100644
index 00000000..fbb76fbe
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-1400.webp
new file mode 100644
index 00000000..5b849bfa
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-480.webp
new file mode 100644
index 00000000..25395ec8
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-800.webp
new file mode 100644
index 00000000..5b849bfa
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-1400.webp
new file mode 100644
index 00000000..53110b1c
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-480.webp
new file mode 100644
index 00000000..690d6e85
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-800.webp
new file mode 100644
index 00000000..53110b1c
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-1400.webp
new file mode 100644
index 00000000..5127b8ad
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-480.webp
new file mode 100644
index 00000000..929f6065
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-800.webp
new file mode 100644
index 00000000..5127b8ad
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-1400.webp
new file mode 100644
index 00000000..246956af
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-480.webp
new file mode 100644
index 00000000..c768a7f1
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-800.webp
new file mode 100644
index 00000000..246956af
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-1400.webp
new file mode 100644
index 00000000..6ff593e7
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-480.webp
new file mode 100644
index 00000000..9b81452c
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-800.webp
new file mode 100644
index 00000000..6ff593e7
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-1400.webp
new file mode 100644
index 00000000..846ecb2d
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-480.webp
new file mode 100644
index 00000000..d72cc844
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-800.webp
new file mode 100644
index 00000000..846ecb2d
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-1400.webp
new file mode 100644
index 00000000..09b079f5
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-480.webp
new file mode 100644
index 00000000..27a7d045
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-800.webp
new file mode 100644
index 00000000..09b079f5
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-1400.webp
new file mode 100644
index 00000000..c7757da7
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-480.webp
new file mode 100644
index 00000000..2ae9a236
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-800.webp
new file mode 100644
index 00000000..c7757da7
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-1400.webp
new file mode 100644
index 00000000..50086034
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-480.webp
new file mode 100644
index 00000000..bbafcee6
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-800.webp
new file mode 100644
index 00000000..50086034
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-1400.webp
new file mode 100644
index 00000000..9e5cb66b
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-480.webp
new file mode 100644
index 00000000..50a3805a
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-800.webp
new file mode 100644
index 00000000..9e5cb66b
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-1400.webp
new file mode 100644
index 00000000..d8ffe30d
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-480.webp
new file mode 100644
index 00000000..6d50e384
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-800.webp
new file mode 100644
index 00000000..d8ffe30d
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-1400.webp
new file mode 100644
index 00000000..45e412fc
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-480.webp
new file mode 100644
index 00000000..f6094f7b
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-800.webp
new file mode 100644
index 00000000..45e412fc
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-1400.webp
new file mode 100644
index 00000000..5f3220fe
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-480.webp
new file mode 100644
index 00000000..c0cf9c4a
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-800.webp
new file mode 100644
index 00000000..5f3220fe
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-1400.webp
new file mode 100644
index 00000000..cf340856
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-480.webp
new file mode 100644
index 00000000..34ff2545
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-800.webp
new file mode 100644
index 00000000..cf340856
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-1400.webp
new file mode 100644
index 00000000..ddaaf794
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-480.webp
new file mode 100644
index 00000000..0fdcf4e0
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-800.webp
new file mode 100644
index 00000000..ddaaf794
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-1400.webp
new file mode 100644
index 00000000..18423fb0
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-480.webp
new file mode 100644
index 00000000..e7e6fc67
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-800.webp
new file mode 100644
index 00000000..18423fb0
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-1400.webp
new file mode 100644
index 00000000..4b4d9198
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-480.webp
new file mode 100644
index 00000000..0194c36b
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-800.webp
new file mode 100644
index 00000000..4b4d9198
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-1400.webp
new file mode 100644
index 00000000..04d5d6ad
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-480.webp
new file mode 100644
index 00000000..b57770fb
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-800.webp
new file mode 100644
index 00000000..04d5d6ad
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-1400.webp
new file mode 100644
index 00000000..422277d1
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-480.webp
new file mode 100644
index 00000000..dfcc6f99
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-800.webp
new file mode 100644
index 00000000..422277d1
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-1400.webp
new file mode 100644
index 00000000..4d7ca66b
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-480.webp
new file mode 100644
index 00000000..4fafcb5e
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-800.webp
new file mode 100644
index 00000000..4d7ca66b
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-1400.webp
new file mode 100644
index 00000000..ea468c14
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-480.webp
new file mode 100644
index 00000000..c193e09d
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-800.webp
new file mode 100644
index 00000000..ea468c14
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-1400.webp
new file mode 100644
index 00000000..f7d201a1
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-480.webp
new file mode 100644
index 00000000..bc3efe94
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-800.webp
new file mode 100644
index 00000000..f7d201a1
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-1400.webp
new file mode 100644
index 00000000..8356a3de
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-480.webp
new file mode 100644
index 00000000..7b9898d2
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-800.webp
new file mode 100644
index 00000000..8356a3de
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-1400.webp
new file mode 100644
index 00000000..84226111
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-480.webp
new file mode 100644
index 00000000..7d813f9b
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-800.webp
new file mode 100644
index 00000000..84226111
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-1400.webp
new file mode 100644
index 00000000..e45c30f9
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-480.webp
new file mode 100644
index 00000000..de968a23
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-800.webp
new file mode 100644
index 00000000..e45c30f9
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-1400.webp
new file mode 100644
index 00000000..e27aeb5a
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-480.webp
new file mode 100644
index 00000000..99615709
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-800.webp
new file mode 100644
index 00000000..e27aeb5a
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-1400.webp
new file mode 100644
index 00000000..1a9a70ae
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-480.webp
new file mode 100644
index 00000000..89088fee
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-800.webp
new file mode 100644
index 00000000..1a9a70ae
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-800.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-1400.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-1400.webp
new file mode 100644
index 00000000..c160e1a7
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-1400.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-480.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-480.webp
new file mode 100644
index 00000000..5f3ba25a
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-480.webp differ
diff --git a/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-800.webp b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-800.webp
new file mode 100644
index 00000000..c160e1a7
Binary files /dev/null and b/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-800.webp differ
diff --git a/assets/img/2023-11-08-Investigating-neural-operator-models-for-closure-modeling-of-dynamical-systems/cloure_model-1400.webp b/assets/img/2023-11-08-Investigating-neural-operator-models-for-closure-modeling-of-dynamical-systems/cloure_model-1400.webp
new file mode 100644
index 00000000..0a892e75
Binary files /dev/null and b/assets/img/2023-11-08-Investigating-neural-operator-models-for-closure-modeling-of-dynamical-systems/cloure_model-1400.webp differ
diff --git a/assets/img/2023-11-08-Investigating-neural-operator-models-for-closure-modeling-of-dynamical-systems/cloure_model-480.webp b/assets/img/2023-11-08-Investigating-neural-operator-models-for-closure-modeling-of-dynamical-systems/cloure_model-480.webp
new file mode 100644
index 00000000..d910d4ab
Binary files /dev/null and b/assets/img/2023-11-08-Investigating-neural-operator-models-for-closure-modeling-of-dynamical-systems/cloure_model-480.webp differ
diff --git a/assets/img/2023-11-08-Investigating-neural-operator-models-for-closure-modeling-of-dynamical-systems/cloure_model-800.webp b/assets/img/2023-11-08-Investigating-neural-operator-models-for-closure-modeling-of-dynamical-systems/cloure_model-800.webp
new file mode 100644
index 00000000..0a892e75
Binary files /dev/null and b/assets/img/2023-11-08-Investigating-neural-operator-models-for-closure-modeling-of-dynamical-systems/cloure_model-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/LSTM-1400.webp b/assets/img/2023-11-08-contrastive-time/LSTM-1400.webp
new file mode 100644
index 00000000..ae5d09f9
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/LSTM-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/LSTM-480.webp b/assets/img/2023-11-08-contrastive-time/LSTM-480.webp
new file mode 100644
index 00000000..63d64a99
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/LSTM-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/LSTM-800.webp b/assets/img/2023-11-08-contrastive-time/LSTM-800.webp
new file mode 100644
index 00000000..ae5d09f9
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/LSTM-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/comparison_noise-1400.webp b/assets/img/2023-11-08-contrastive-time/comparison_noise-1400.webp
new file mode 100644
index 00000000..7dc616d2
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/comparison_noise-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/comparison_noise-480.webp b/assets/img/2023-11-08-contrastive-time/comparison_noise-480.webp
new file mode 100644
index 00000000..d8af00d8
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/comparison_noise-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/comparison_noise-800.webp b/assets/img/2023-11-08-contrastive-time/comparison_noise-800.webp
new file mode 100644
index 00000000..7dc616d2
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/comparison_noise-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-1400.webp b/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-1400.webp
new file mode 100644
index 00000000..c994d7d5
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-480.webp b/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-480.webp
new file mode 100644
index 00000000..4a9105e3
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-800.webp b/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-800.webp
new file mode 100644
index 00000000..c994d7d5
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/contrastive_representation-1400.webp b/assets/img/2023-11-08-contrastive-time/contrastive_representation-1400.webp
new file mode 100644
index 00000000..231ad577
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/contrastive_representation-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/contrastive_representation-480.webp b/assets/img/2023-11-08-contrastive-time/contrastive_representation-480.webp
new file mode 100644
index 00000000..5a31030d
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/contrastive_representation-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/contrastive_representation-800.webp b/assets/img/2023-11-08-contrastive-time/contrastive_representation-800.webp
new file mode 100644
index 00000000..231ad577
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/contrastive_representation-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-1400.webp b/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-1400.webp
new file mode 100644
index 00000000..c542b8c7
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-480.webp b/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-480.webp
new file mode 100644
index 00000000..b2235e54
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-800.webp b/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-800.webp
new file mode 100644
index 00000000..c542b8c7
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-1400.webp b/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-1400.webp
new file mode 100644
index 00000000..913eef69
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-480.webp b/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-480.webp
new file mode 100644
index 00000000..67ae087c
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-800.webp b/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-800.webp
new file mode 100644
index 00000000..913eef69
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-1400.webp b/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-1400.webp
new file mode 100644
index 00000000..d7c95958
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-480.webp b/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-480.webp
new file mode 100644
index 00000000..79a163f3
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-800.webp b/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-800.webp
new file mode 100644
index 00000000..d7c95958
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/param_estimation-1400.webp b/assets/img/2023-11-08-contrastive-time/param_estimation-1400.webp
new file mode 100644
index 00000000..b53c5664
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/param_estimation-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/param_estimation-480.webp b/assets/img/2023-11-08-contrastive-time/param_estimation-480.webp
new file mode 100644
index 00000000..21df10a8
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/param_estimation-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/param_estimation-800.webp b/assets/img/2023-11-08-contrastive-time/param_estimation-800.webp
new file mode 100644
index 00000000..b53c5664
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/param_estimation-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/recon_and_pred-1400.webp b/assets/img/2023-11-08-contrastive-time/recon_and_pred-1400.webp
new file mode 100644
index 00000000..81021b78
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/recon_and_pred-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/recon_and_pred-480.webp b/assets/img/2023-11-08-contrastive-time/recon_and_pred-480.webp
new file mode 100644
index 00000000..9b49306d
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/recon_and_pred-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/recon_and_pred-800.webp b/assets/img/2023-11-08-contrastive-time/recon_and_pred-800.webp
new file mode 100644
index 00000000..81021b78
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/recon_and_pred-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/spring_mass_system-1400.webp b/assets/img/2023-11-08-contrastive-time/spring_mass_system-1400.webp
new file mode 100644
index 00000000..4d12130e
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/spring_mass_system-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/spring_mass_system-480.webp b/assets/img/2023-11-08-contrastive-time/spring_mass_system-480.webp
new file mode 100644
index 00000000..82300eec
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/spring_mass_system-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/spring_mass_system-800.webp b/assets/img/2023-11-08-contrastive-time/spring_mass_system-800.webp
new file mode 100644
index 00000000..4d12130e
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/spring_mass_system-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-1400.webp b/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-1400.webp
new file mode 100644
index 00000000..95ab2799
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-480.webp b/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-480.webp
new file mode 100644
index 00000000..39b132ee
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-800.webp b/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-800.webp
new file mode 100644
index 00000000..95ab2799
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-800.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/traj_with_noise-1400.webp b/assets/img/2023-11-08-contrastive-time/traj_with_noise-1400.webp
new file mode 100644
index 00000000..7ca05330
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/traj_with_noise-1400.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/traj_with_noise-480.webp b/assets/img/2023-11-08-contrastive-time/traj_with_noise-480.webp
new file mode 100644
index 00000000..0f93a442
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/traj_with_noise-480.webp differ
diff --git a/assets/img/2023-11-08-contrastive-time/traj_with_noise-800.webp b/assets/img/2023-11-08-contrastive-time/traj_with_noise-800.webp
new file mode 100644
index 00000000..7ca05330
Binary files /dev/null and b/assets/img/2023-11-08-contrastive-time/traj_with_noise-800.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/DKN_simplepen-1400.webp b/assets/img/2023-11-08-croneillproposal/DKN_simplepen-1400.webp
new file mode 100644
index 00000000..bcd4f136
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/DKN_simplepen-1400.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/DKN_simplepen-480.webp b/assets/img/2023-11-08-croneillproposal/DKN_simplepen-480.webp
new file mode 100644
index 00000000..006058d5
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/DKN_simplepen-480.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/DKN_simplepen-800.webp b/assets/img/2023-11-08-croneillproposal/DKN_simplepen-800.webp
new file mode 100644
index 00000000..bcd4f136
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/DKN_simplepen-800.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/aggregate_DKN-1400.webp b/assets/img/2023-11-08-croneillproposal/aggregate_DKN-1400.webp
new file mode 100644
index 00000000..12aa3fe5
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/aggregate_DKN-1400.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/aggregate_DKN-480.webp b/assets/img/2023-11-08-croneillproposal/aggregate_DKN-480.webp
new file mode 100644
index 00000000..ab59c573
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/aggregate_DKN-480.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/aggregate_DKN-800.webp b/assets/img/2023-11-08-croneillproposal/aggregate_DKN-800.webp
new file mode 100644
index 00000000..12aa3fe5
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/aggregate_DKN-800.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/combined_results-1400.webp b/assets/img/2023-11-08-croneillproposal/combined_results-1400.webp
new file mode 100644
index 00000000..690d0201
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/combined_results-1400.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/combined_results-480.webp b/assets/img/2023-11-08-croneillproposal/combined_results-480.webp
new file mode 100644
index 00000000..2ddec316
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/combined_results-480.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/combined_results-800.webp b/assets/img/2023-11-08-croneillproposal/combined_results-800.webp
new file mode 100644
index 00000000..690d0201
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/combined_results-800.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/curriculum_results-1400.webp b/assets/img/2023-11-08-croneillproposal/curriculum_results-1400.webp
new file mode 100644
index 00000000..f1c039ad
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/curriculum_results-1400.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/curriculum_results-480.webp b/assets/img/2023-11-08-croneillproposal/curriculum_results-480.webp
new file mode 100644
index 00000000..c8ceaafc
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/curriculum_results-480.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/curriculum_results-800.webp b/assets/img/2023-11-08-croneillproposal/curriculum_results-800.webp
new file mode 100644
index 00000000..f1c039ad
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/curriculum_results-800.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/deepnet-1400.webp b/assets/img/2023-11-08-croneillproposal/deepnet-1400.webp
new file mode 100644
index 00000000..989ac567
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/deepnet-1400.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/deepnet-480.webp b/assets/img/2023-11-08-croneillproposal/deepnet-480.webp
new file mode 100644
index 00000000..ec0a6884
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/deepnet-480.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/deepnet-800.webp b/assets/img/2023-11-08-croneillproposal/deepnet-800.webp
new file mode 100644
index 00000000..989ac567
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/deepnet-800.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/model_arch-1400.webp b/assets/img/2023-11-08-croneillproposal/model_arch-1400.webp
new file mode 100644
index 00000000..ece47d7f
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/model_arch-1400.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/model_arch-480.webp b/assets/img/2023-11-08-croneillproposal/model_arch-480.webp
new file mode 100644
index 00000000..d78ac025
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/model_arch-480.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/model_arch-800.webp b/assets/img/2023-11-08-croneillproposal/model_arch-800.webp
new file mode 100644
index 00000000..ece47d7f
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/model_arch-800.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/obs_visualization-1400.webp b/assets/img/2023-11-08-croneillproposal/obs_visualization-1400.webp
new file mode 100644
index 00000000..2f7a846c
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/obs_visualization-1400.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/obs_visualization-480.webp b/assets/img/2023-11-08-croneillproposal/obs_visualization-480.webp
new file mode 100644
index 00000000..689ae19e
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/obs_visualization-480.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/obs_visualization-800.webp b/assets/img/2023-11-08-croneillproposal/obs_visualization-800.webp
new file mode 100644
index 00000000..2f7a846c
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/obs_visualization-800.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/pend_statespace-1400.webp b/assets/img/2023-11-08-croneillproposal/pend_statespace-1400.webp
new file mode 100644
index 00000000..94075632
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/pend_statespace-1400.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/pend_statespace-480.webp b/assets/img/2023-11-08-croneillproposal/pend_statespace-480.webp
new file mode 100644
index 00000000..e43f4199
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/pend_statespace-480.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/pend_statespace-800.webp b/assets/img/2023-11-08-croneillproposal/pend_statespace-800.webp
new file mode 100644
index 00000000..94075632
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/pend_statespace-800.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/spring_diagram-1400.webp b/assets/img/2023-11-08-croneillproposal/spring_diagram-1400.webp
new file mode 100644
index 00000000..0d5832b0
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/spring_diagram-1400.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/spring_diagram-480.webp b/assets/img/2023-11-08-croneillproposal/spring_diagram-480.webp
new file mode 100644
index 00000000..2fb92295
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/spring_diagram-480.webp differ
diff --git a/assets/img/2023-11-08-croneillproposal/spring_diagram-800.webp b/assets/img/2023-11-08-croneillproposal/spring_diagram-800.webp
new file mode 100644
index 00000000..0d5832b0
Binary files /dev/null and b/assets/img/2023-11-08-croneillproposal/spring_diagram-800.webp differ
diff --git a/assets/img/2023-11-08-detect-image/cgi-example-1400.webp b/assets/img/2023-11-08-detect-image/cgi-example-1400.webp
new file mode 100644
index 00000000..f5b0bd97
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/cgi-example-1400.webp differ
diff --git a/assets/img/2023-11-08-detect-image/cgi-example-480.webp b/assets/img/2023-11-08-detect-image/cgi-example-480.webp
new file mode 100644
index 00000000..214309ce
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/cgi-example-480.webp differ
diff --git a/assets/img/2023-11-08-detect-image/cgi-example-800.webp b/assets/img/2023-11-08-detect-image/cgi-example-800.webp
new file mode 100644
index 00000000..f5b0bd97
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/cgi-example-800.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_hist-1400.webp b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_hist-1400.webp
new file mode 100644
index 00000000..f70d79ae
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_hist-1400.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_hist-480.webp b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_hist-480.webp
new file mode 100644
index 00000000..98d65de8
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_hist-480.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_hist-800.webp b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_hist-800.webp
new file mode 100644
index 00000000..f70d79ae
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_hist-800.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-1400.webp b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-1400.webp
new file mode 100644
index 00000000..8e07dd98
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-1400.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-480.webp b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-480.webp
new file mode 100644
index 00000000..1cccfe31
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-480.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-800.webp b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-800.webp
new file mode 100644
index 00000000..8e07dd98
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-800.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_hist-1400.webp b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_hist-1400.webp
new file mode 100644
index 00000000..e87c29ad
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_hist-1400.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_hist-480.webp b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_hist-480.webp
new file mode 100644
index 00000000..953b2e36
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_hist-480.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_hist-800.webp b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_hist-800.webp
new file mode 100644
index 00000000..e87c29ad
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_hist-800.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-1400.webp b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-1400.webp
new file mode 100644
index 00000000..56137942
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-1400.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-480.webp b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-480.webp
new file mode 100644
index 00000000..87a6ed86
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-480.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-800.webp b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-800.webp
new file mode 100644
index 00000000..56137942
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-800.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_hist-1400.webp b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_hist-1400.webp
new file mode 100644
index 00000000..0a9d2dba
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_hist-1400.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_hist-480.webp b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_hist-480.webp
new file mode 100644
index 00000000..eee36419
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_hist-480.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_hist-800.webp b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_hist-800.webp
new file mode 100644
index 00000000..0a9d2dba
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_hist-800.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-1400.webp b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-1400.webp
new file mode 100644
index 00000000..e0fba122
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-1400.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-480.webp b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-480.webp
new file mode 100644
index 00000000..fc1fadae
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-480.webp differ
diff --git a/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-800.webp b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-800.webp
new file mode 100644
index 00000000..e0fba122
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-800.webp differ
diff --git a/assets/img/2023-11-08-detect-image/methods-illustrated-1400.webp b/assets/img/2023-11-08-detect-image/methods-illustrated-1400.webp
new file mode 100644
index 00000000..4642479a
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/methods-illustrated-1400.webp differ
diff --git a/assets/img/2023-11-08-detect-image/methods-illustrated-480.webp b/assets/img/2023-11-08-detect-image/methods-illustrated-480.webp
new file mode 100644
index 00000000..6b408b85
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/methods-illustrated-480.webp differ
diff --git a/assets/img/2023-11-08-detect-image/methods-illustrated-800.webp b/assets/img/2023-11-08-detect-image/methods-illustrated-800.webp
new file mode 100644
index 00000000..4642479a
Binary files /dev/null and b/assets/img/2023-11-08-detect-image/methods-illustrated-800.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-1400.webp b/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-1400.webp
new file mode 100644
index 00000000..18e53ae6
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-1400.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-480.webp b/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-480.webp
new file mode 100644
index 00000000..5e396f89
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-480.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-800.webp b/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-800.webp
new file mode 100644
index 00000000..18e53ae6
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-800.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/fig5-1400.webp b/assets/img/2023-11-08-diaz-proposal/fig5-1400.webp
new file mode 100644
index 00000000..a8df0647
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/fig5-1400.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/fig5-480.webp b/assets/img/2023-11-08-diaz-proposal/fig5-480.webp
new file mode 100644
index 00000000..2fa1cc46
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/fig5-480.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/fig5-800.webp b/assets/img/2023-11-08-diaz-proposal/fig5-800.webp
new file mode 100644
index 00000000..a8df0647
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/fig5-800.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/simplified_block-1400.webp b/assets/img/2023-11-08-diaz-proposal/simplified_block-1400.webp
new file mode 100644
index 00000000..1dd2f713
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/simplified_block-1400.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/simplified_block-480.webp b/assets/img/2023-11-08-diaz-proposal/simplified_block-480.webp
new file mode 100644
index 00000000..38f14c11
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/simplified_block-480.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/simplified_block-800.webp b/assets/img/2023-11-08-diaz-proposal/simplified_block-800.webp
new file mode 100644
index 00000000..1dd2f713
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/simplified_block-800.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/simplified_performance-1400.webp b/assets/img/2023-11-08-diaz-proposal/simplified_performance-1400.webp
new file mode 100644
index 00000000..c05f3224
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/simplified_performance-1400.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/simplified_performance-480.webp b/assets/img/2023-11-08-diaz-proposal/simplified_performance-480.webp
new file mode 100644
index 00000000..b8515404
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/simplified_performance-480.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/simplified_performance-800.webp b/assets/img/2023-11-08-diaz-proposal/simplified_performance-800.webp
new file mode 100644
index 00000000..c05f3224
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/simplified_performance-800.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/transformer-1400.webp b/assets/img/2023-11-08-diaz-proposal/transformer-1400.webp
new file mode 100644
index 00000000..0f9621fb
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/transformer-1400.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/transformer-480.webp b/assets/img/2023-11-08-diaz-proposal/transformer-480.webp
new file mode 100644
index 00000000..45045d7e
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/transformer-480.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/transformer-800.webp b/assets/img/2023-11-08-diaz-proposal/transformer-800.webp
new file mode 100644
index 00000000..0f9621fb
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/transformer-800.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/vit_workflow-1400.webp b/assets/img/2023-11-08-diaz-proposal/vit_workflow-1400.webp
new file mode 100644
index 00000000..f7dfb7f7
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/vit_workflow-1400.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/vit_workflow-480.webp b/assets/img/2023-11-08-diaz-proposal/vit_workflow-480.webp
new file mode 100644
index 00000000..7e5a5ec4
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/vit_workflow-480.webp differ
diff --git a/assets/img/2023-11-08-diaz-proposal/vit_workflow-800.webp b/assets/img/2023-11-08-diaz-proposal/vit_workflow-800.webp
new file mode 100644
index 00000000..f7dfb7f7
Binary files /dev/null and b/assets/img/2023-11-08-diaz-proposal/vit_workflow-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-1400.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-1400.webp
new file mode 100644
index 00000000..de788419
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-480.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-480.webp
new file mode 100644
index 00000000..4a5455cd
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-800.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-800.webp
new file mode 100644
index 00000000..de788419
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-1400.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-1400.webp
new file mode 100644
index 00000000..f4f6f6f7
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-480.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-480.webp
new file mode 100644
index 00000000..6fc3fa2e
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-800.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-800.webp
new file mode 100644
index 00000000..f4f6f6f7
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-1400.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-1400.webp
new file mode 100644
index 00000000..3e0efa2e
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-480.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-480.webp
new file mode 100644
index 00000000..9f506151
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-800.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-800.webp
new file mode 100644
index 00000000..3e0efa2e
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-1400.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-1400.webp
new file mode 100644
index 00000000..365cf5ed
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-480.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-480.webp
new file mode 100644
index 00000000..7296a735
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-800.webp b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-800.webp
new file mode 100644
index 00000000..365cf5ed
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-1400.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-1400.webp
new file mode 100644
index 00000000..5f5104f5
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-480.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-480.webp
new file mode 100644
index 00000000..9ae5bb0f
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-800.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-800.webp
new file mode 100644
index 00000000..5f5104f5
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-1400.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-1400.webp
new file mode 100644
index 00000000..ff046a2c
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-480.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-480.webp
new file mode 100644
index 00000000..f14a4f57
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-800.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-800.webp
new file mode 100644
index 00000000..ff046a2c
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-1400.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-1400.webp
new file mode 100644
index 00000000..6bcaa682
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-480.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-480.webp
new file mode 100644
index 00000000..a7062d33
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-800.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-800.webp
new file mode 100644
index 00000000..6bcaa682
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-1400.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-1400.webp
new file mode 100644
index 00000000..502cdad9
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-480.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-480.webp
new file mode 100644
index 00000000..e38227d0
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-800.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-800.webp
new file mode 100644
index 00000000..502cdad9
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-1400.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-1400.webp
new file mode 100644
index 00000000..510bbd49
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-480.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-480.webp
new file mode 100644
index 00000000..748af4e7
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-800.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-800.webp
new file mode 100644
index 00000000..510bbd49
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-1400.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-1400.webp
new file mode 100644
index 00000000..74a99771
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-480.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-480.webp
new file mode 100644
index 00000000..bcaf0da5
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-800.webp b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-800.webp
new file mode 100644
index 00000000..74a99771
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/biasvariance-1400.webp b/assets/img/2023-11-08-double_descent/biasvariance-1400.webp
new file mode 100644
index 00000000..57a7b48f
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/biasvariance-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/biasvariance-480.webp b/assets/img/2023-11-08-double_descent/biasvariance-480.webp
new file mode 100644
index 00000000..5443a97d
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/biasvariance-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/biasvariance-800.webp b/assets/img/2023-11-08-double_descent/biasvariance-800.webp
new file mode 100644
index 00000000..57a7b48f
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/biasvariance-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-1400.webp b/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-1400.webp
new file mode 100644
index 00000000..ce95f14a
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-480.webp b/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-480.webp
new file mode 100644
index 00000000..88d5c1c4
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-800.webp b/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-800.webp
new file mode 100644
index 00000000..ce95f14a
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-1400.webp b/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-1400.webp
new file mode 100644
index 00000000..14c4445e
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-480.webp b/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-480.webp
new file mode 100644
index 00000000..6314f83f
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-800.webp b/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-800.webp
new file mode 100644
index 00000000..14c4445e
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-1400.webp b/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-1400.webp
new file mode 100644
index 00000000..efe9522c
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-480.webp b/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-480.webp
new file mode 100644
index 00000000..355f2e71
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-800.webp b/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-800.webp
new file mode 100644
index 00000000..efe9522c
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-1400.webp b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-1400.webp
new file mode 100644
index 00000000..1f6449fd
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-480.webp b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-480.webp
new file mode 100644
index 00000000..a1140292
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-800.webp b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-800.webp
new file mode 100644
index 00000000..1f6449fd
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-1400.webp b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-1400.webp
new file mode 100644
index 00000000..b6f3f9d9
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-480.webp b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-480.webp
new file mode 100644
index 00000000..c7235735
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-800.webp b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-800.webp
new file mode 100644
index 00000000..b6f3f9d9
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-1400.webp b/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-1400.webp
new file mode 100644
index 00000000..95542625
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-480.webp b/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-480.webp
new file mode 100644
index 00000000..9c379681
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-800.webp b/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-800.webp
new file mode 100644
index 00000000..95542625
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-1400.webp b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-1400.webp
new file mode 100644
index 00000000..07e03baa
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-480.webp b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-480.webp
new file mode 100644
index 00000000..378f74d2
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-800.webp b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-800.webp
new file mode 100644
index 00000000..07e03baa
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-1400.webp b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-1400.webp
new file mode 100644
index 00000000..0fa25f65
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-480.webp b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-480.webp
new file mode 100644
index 00000000..21a22690
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-800.webp b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-800.webp
new file mode 100644
index 00000000..0fa25f65
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-1400.webp b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-1400.webp
new file mode 100644
index 00000000..a303d10c
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-480.webp b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-480.webp
new file mode 100644
index 00000000..5b6ab328
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-800.webp b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-800.webp
new file mode 100644
index 00000000..a303d10c
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-1400.webp b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-1400.webp
new file mode 100644
index 00000000..026fef25
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-480.webp b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-480.webp
new file mode 100644
index 00000000..ca2238da
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-800.webp b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-800.webp
new file mode 100644
index 00000000..026fef25
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-1400.webp b/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-1400.webp
new file mode 100644
index 00000000..52039638
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-480.webp b/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-480.webp
new file mode 100644
index 00000000..cf83dc73
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-800.webp b/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-800.webp
new file mode 100644
index 00000000..52039638
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-1400.webp b/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-1400.webp
new file mode 100644
index 00000000..728bba9b
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-480.webp b/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-480.webp
new file mode 100644
index 00000000..b327d7cf
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-800.webp b/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-800.webp
new file mode 100644
index 00000000..728bba9b
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-1400.webp b/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-1400.webp
new file mode 100644
index 00000000..6996e689
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-480.webp b/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-480.webp
new file mode 100644
index 00000000..1e85e044
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-800.webp b/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-800.webp
new file mode 100644
index 00000000..6996e689
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-1400.webp b/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-1400.webp
new file mode 100644
index 00000000..70d0e86b
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-480.webp b/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-480.webp
new file mode 100644
index 00000000..cdf77cde
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-800.webp b/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-800.webp
new file mode 100644
index 00000000..70d0e86b
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-1400.webp b/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-1400.webp
new file mode 100644
index 00000000..c8d70e2b
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-480.webp b/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-480.webp
new file mode 100644
index 00000000..0efea2ee
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-800.webp b/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-800.webp
new file mode 100644
index 00000000..c8d70e2b
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-1400.webp b/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-1400.webp
new file mode 100644
index 00000000..f694393d
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-480.webp b/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-480.webp
new file mode 100644
index 00000000..5756e32d
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-800.webp b/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-800.webp
new file mode 100644
index 00000000..f694393d
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mnist-1400.webp b/assets/img/2023-11-08-double_descent/mnist-1400.webp
new file mode 100644
index 00000000..459c46e9
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mnist-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mnist-480.webp b/assets/img/2023-11-08-double_descent/mnist-480.webp
new file mode 100644
index 00000000..d5cb2a9b
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mnist-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/mnist-800.webp b/assets/img/2023-11-08-double_descent/mnist-800.webp
new file mode 100644
index 00000000..459c46e9
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/mnist-800.webp differ
diff --git a/assets/img/2023-11-08-double_descent/openai-1400.webp b/assets/img/2023-11-08-double_descent/openai-1400.webp
new file mode 100644
index 00000000..ebdfe157
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/openai-1400.webp differ
diff --git a/assets/img/2023-11-08-double_descent/openai-480.webp b/assets/img/2023-11-08-double_descent/openai-480.webp
new file mode 100644
index 00000000..b6b333e1
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/openai-480.webp differ
diff --git a/assets/img/2023-11-08-double_descent/openai-800.webp b/assets/img/2023-11-08-double_descent/openai-800.webp
new file mode 100644
index 00000000..ebdfe157
Binary files /dev/null and b/assets/img/2023-11-08-double_descent/openai-800.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/graphs-1400.webp b/assets/img/2023-11-08-graphs-transformers/graphs-1400.webp
new file mode 100644
index 00000000..a2c77df6
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/graphs-1400.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/graphs-480.webp b/assets/img/2023-11-08-graphs-transformers/graphs-480.webp
new file mode 100644
index 00000000..c22f5105
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/graphs-480.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/graphs-800.webp b/assets/img/2023-11-08-graphs-transformers/graphs-800.webp
new file mode 100644
index 00000000..a2c77df6
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/graphs-800.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/skip_connection-1400.webp b/assets/img/2023-11-08-graphs-transformers/skip_connection-1400.webp
new file mode 100644
index 00000000..32218058
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/skip_connection-1400.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/skip_connection-480.webp b/assets/img/2023-11-08-graphs-transformers/skip_connection-480.webp
new file mode 100644
index 00000000..eb063ad5
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/skip_connection-480.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/skip_connection-800.webp b/assets/img/2023-11-08-graphs-transformers/skip_connection-800.webp
new file mode 100644
index 00000000..32218058
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/skip_connection-800.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-1400.webp b/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-1400.webp
new file mode 100644
index 00000000..15e40e86
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-1400.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-480.webp b/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-480.webp
new file mode 100644
index 00000000..ca6e7579
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-480.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-800.webp b/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-800.webp
new file mode 100644
index 00000000..15e40e86
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-800.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/transformer_DL-1400.webp b/assets/img/2023-11-08-graphs-transformers/transformer_DL-1400.webp
new file mode 100644
index 00000000..0d3e2c24
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/transformer_DL-1400.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/transformer_DL-480.webp b/assets/img/2023-11-08-graphs-transformers/transformer_DL-480.webp
new file mode 100644
index 00000000..e69d1b75
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/transformer_DL-480.webp differ
diff --git a/assets/img/2023-11-08-graphs-transformers/transformer_DL-800.webp b/assets/img/2023-11-08-graphs-transformers/transformer_DL-800.webp
new file mode 100644
index 00000000..0d3e2c24
Binary files /dev/null and b/assets/img/2023-11-08-graphs-transformers/transformer_DL-800.webp differ
diff --git a/assets/img/2023-11-08-grokking-proposal/power_plot-1400.webp b/assets/img/2023-11-08-grokking-proposal/power_plot-1400.webp
new file mode 100644
index 00000000..b549e16e
Binary files /dev/null and b/assets/img/2023-11-08-grokking-proposal/power_plot-1400.webp differ
diff --git a/assets/img/2023-11-08-grokking-proposal/power_plot-480.webp b/assets/img/2023-11-08-grokking-proposal/power_plot-480.webp
new file mode 100644
index 00000000..bbbb8f7a
Binary files /dev/null and b/assets/img/2023-11-08-grokking-proposal/power_plot-480.webp differ
diff --git a/assets/img/2023-11-08-grokking-proposal/power_plot-800.webp b/assets/img/2023-11-08-grokking-proposal/power_plot-800.webp
new file mode 100644
index 00000000..b549e16e
Binary files /dev/null and b/assets/img/2023-11-08-grokking-proposal/power_plot-800.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-1400.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-1400.webp
new file mode 100644
index 00000000..f39f7f7e
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-1400.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-480.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-480.webp
new file mode 100644
index 00000000..ca6a1563
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-480.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-800.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-800.webp
new file mode 100644
index 00000000..f39f7f7e
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-800.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-1400.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-1400.webp
new file mode 100644
index 00000000..6b244a8c
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-1400.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-480.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-480.webp
new file mode 100644
index 00000000..50feee7e
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-480.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-800.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-800.webp
new file mode 100644
index 00000000..6b244a8c
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-800.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-1400.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-1400.webp
new file mode 100644
index 00000000..d892e10c
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-1400.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-480.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-480.webp
new file mode 100644
index 00000000..8e86c462
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-480.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-800.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-800.webp
new file mode 100644
index 00000000..d892e10c
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-800.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-1400.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-1400.webp
new file mode 100644
index 00000000..b2fa6c53
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-1400.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-480.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-480.webp
new file mode 100644
index 00000000..3a0670ff
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-480.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-800.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-800.webp
new file mode 100644
index 00000000..b2fa6c53
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-800.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-1400.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-1400.webp
new file mode 100644
index 00000000..f01b5817
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-1400.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-480.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-480.webp
new file mode 100644
index 00000000..1c8b8fb8
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-480.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-800.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-800.webp
new file mode 100644
index 00000000..f01b5817
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-800.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-1400.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-1400.webp
new file mode 100644
index 00000000..d6e969aa
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-1400.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-480.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-480.webp
new file mode 100644
index 00000000..74a695ef
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-480.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-800.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-800.webp
new file mode 100644
index 00000000..d6e969aa
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-800.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-1400.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-1400.webp
new file mode 100644
index 00000000..cb972df9
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-1400.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-480.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-480.webp
new file mode 100644
index 00000000..3cdbd02b
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-480.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-800.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-800.webp
new file mode 100644
index 00000000..cb972df9
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-800.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-1400.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-1400.webp
new file mode 100644
index 00000000..f39fd72d
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-1400.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-480.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-480.webp
new file mode 100644
index 00000000..7727bff5
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-480.webp differ
diff --git a/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-800.webp b/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-800.webp
new file mode 100644
index 00000000..f39fd72d
Binary files /dev/null and b/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1013_301-1400.webp b/assets/img/2023-11-08-mapreason/1013_301-1400.webp
new file mode 100644
index 00000000..b1127898
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1013_301-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1013_301-480.webp b/assets/img/2023-11-08-mapreason/1013_301-480.webp
new file mode 100644
index 00000000..8f89e6ae
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1013_301-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1013_301-800.webp b/assets/img/2023-11-08-mapreason/1013_301-800.webp
new file mode 100644
index 00000000..b1127898
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1013_301-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1013_302-1400.webp b/assets/img/2023-11-08-mapreason/1013_302-1400.webp
new file mode 100644
index 00000000..0ea7cfd4
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1013_302-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1013_302-480.webp b/assets/img/2023-11-08-mapreason/1013_302-480.webp
new file mode 100644
index 00000000..f6e5b462
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1013_302-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1013_302-800.webp b/assets/img/2023-11-08-mapreason/1013_302-800.webp
new file mode 100644
index 00000000..0ea7cfd4
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1013_302-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1037_301-1400.webp b/assets/img/2023-11-08-mapreason/1037_301-1400.webp
new file mode 100644
index 00000000..10b259f6
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1037_301-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1037_301-480.webp b/assets/img/2023-11-08-mapreason/1037_301-480.webp
new file mode 100644
index 00000000..da3adaff
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1037_301-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1037_301-800.webp b/assets/img/2023-11-08-mapreason/1037_301-800.webp
new file mode 100644
index 00000000..10b259f6
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1037_301-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1037_302-1400.webp b/assets/img/2023-11-08-mapreason/1037_302-1400.webp
new file mode 100644
index 00000000..07de4535
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1037_302-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1037_302-480.webp b/assets/img/2023-11-08-mapreason/1037_302-480.webp
new file mode 100644
index 00000000..0b4ac39f
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1037_302-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1037_302-800.webp b/assets/img/2023-11-08-mapreason/1037_302-800.webp
new file mode 100644
index 00000000..07de4535
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1037_302-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1038_301-1400.webp b/assets/img/2023-11-08-mapreason/1038_301-1400.webp
new file mode 100644
index 00000000..32e29937
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1038_301-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1038_301-480.webp b/assets/img/2023-11-08-mapreason/1038_301-480.webp
new file mode 100644
index 00000000..ab9c083a
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1038_301-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1038_301-800.webp b/assets/img/2023-11-08-mapreason/1038_301-800.webp
new file mode 100644
index 00000000..32e29937
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1038_301-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1038_302-1400.webp b/assets/img/2023-11-08-mapreason/1038_302-1400.webp
new file mode 100644
index 00000000..a1168bcf
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1038_302-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1038_302-480.webp b/assets/img/2023-11-08-mapreason/1038_302-480.webp
new file mode 100644
index 00000000..72376103
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1038_302-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/1038_302-800.webp b/assets/img/2023-11-08-mapreason/1038_302-800.webp
new file mode 100644
index 00000000..a1168bcf
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/1038_302-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/223_301-1400.webp b/assets/img/2023-11-08-mapreason/223_301-1400.webp
new file mode 100644
index 00000000..c4016b09
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/223_301-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/223_301-480.webp b/assets/img/2023-11-08-mapreason/223_301-480.webp
new file mode 100644
index 00000000..6ef6f740
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/223_301-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/223_301-800.webp b/assets/img/2023-11-08-mapreason/223_301-800.webp
new file mode 100644
index 00000000..c4016b09
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/223_301-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/223_302-1400.webp b/assets/img/2023-11-08-mapreason/223_302-1400.webp
new file mode 100644
index 00000000..6d9e618a
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/223_302-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/223_302-480.webp b/assets/img/2023-11-08-mapreason/223_302-480.webp
new file mode 100644
index 00000000..ee8b4520
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/223_302-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/223_302-800.webp b/assets/img/2023-11-08-mapreason/223_302-800.webp
new file mode 100644
index 00000000..6d9e618a
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/223_302-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/43_301-1400.webp b/assets/img/2023-11-08-mapreason/43_301-1400.webp
new file mode 100644
index 00000000..07082c4b
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/43_301-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/43_301-480.webp b/assets/img/2023-11-08-mapreason/43_301-480.webp
new file mode 100644
index 00000000..31241d40
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/43_301-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/43_301-800.webp b/assets/img/2023-11-08-mapreason/43_301-800.webp
new file mode 100644
index 00000000..07082c4b
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/43_301-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/43_302-1400.webp b/assets/img/2023-11-08-mapreason/43_302-1400.webp
new file mode 100644
index 00000000..c2c632a2
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/43_302-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/43_302-480.webp b/assets/img/2023-11-08-mapreason/43_302-480.webp
new file mode 100644
index 00000000..3dacfbcd
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/43_302-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/43_302-800.webp b/assets/img/2023-11-08-mapreason/43_302-800.webp
new file mode 100644
index 00000000..c2c632a2
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/43_302-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/51_301-1400.webp b/assets/img/2023-11-08-mapreason/51_301-1400.webp
new file mode 100644
index 00000000..d8aae0ea
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/51_301-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/51_301-480.webp b/assets/img/2023-11-08-mapreason/51_301-480.webp
new file mode 100644
index 00000000..828b7874
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/51_301-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/51_301-800.webp b/assets/img/2023-11-08-mapreason/51_301-800.webp
new file mode 100644
index 00000000..d8aae0ea
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/51_301-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/51_302-1400.webp b/assets/img/2023-11-08-mapreason/51_302-1400.webp
new file mode 100644
index 00000000..b6458cd3
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/51_302-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/51_302-480.webp b/assets/img/2023-11-08-mapreason/51_302-480.webp
new file mode 100644
index 00000000..c776fc65
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/51_302-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/51_302-800.webp b/assets/img/2023-11-08-mapreason/51_302-800.webp
new file mode 100644
index 00000000..b6458cd3
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/51_302-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/779_301-1400.webp b/assets/img/2023-11-08-mapreason/779_301-1400.webp
new file mode 100644
index 00000000..7a3d7411
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/779_301-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/779_301-480.webp b/assets/img/2023-11-08-mapreason/779_301-480.webp
new file mode 100644
index 00000000..338436be
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/779_301-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/779_301-800.webp b/assets/img/2023-11-08-mapreason/779_301-800.webp
new file mode 100644
index 00000000..7a3d7411
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/779_301-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/779_302-1400.webp b/assets/img/2023-11-08-mapreason/779_302-1400.webp
new file mode 100644
index 00000000..c1116aac
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/779_302-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/779_302-480.webp b/assets/img/2023-11-08-mapreason/779_302-480.webp
new file mode 100644
index 00000000..274de667
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/779_302-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/779_302-800.webp b/assets/img/2023-11-08-mapreason/779_302-800.webp
new file mode 100644
index 00000000..c1116aac
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/779_302-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/85_301-1400.webp b/assets/img/2023-11-08-mapreason/85_301-1400.webp
new file mode 100644
index 00000000..0487f024
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/85_301-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/85_301-480.webp b/assets/img/2023-11-08-mapreason/85_301-480.webp
new file mode 100644
index 00000000..803b9866
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/85_301-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/85_301-800.webp b/assets/img/2023-11-08-mapreason/85_301-800.webp
new file mode 100644
index 00000000..0487f024
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/85_301-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/85_302-1400.webp b/assets/img/2023-11-08-mapreason/85_302-1400.webp
new file mode 100644
index 00000000..dec09307
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/85_302-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/85_302-480.webp b/assets/img/2023-11-08-mapreason/85_302-480.webp
new file mode 100644
index 00000000..a229cd9c
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/85_302-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/85_302-800.webp b/assets/img/2023-11-08-mapreason/85_302-800.webp
new file mode 100644
index 00000000..dec09307
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/85_302-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_01_01-1400.webp b/assets/img/2023-11-08-mapreason/experiment_01_01-1400.webp
new file mode 100644
index 00000000..35b7ba10
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_01_01-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_01_01-480.webp b/assets/img/2023-11-08-mapreason/experiment_01_01-480.webp
new file mode 100644
index 00000000..b10d637e
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_01_01-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_01_01-800.webp b/assets/img/2023-11-08-mapreason/experiment_01_01-800.webp
new file mode 100644
index 00000000..35b7ba10
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_01_01-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_01_02-1400.webp b/assets/img/2023-11-08-mapreason/experiment_01_02-1400.webp
new file mode 100644
index 00000000..92b0c315
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_01_02-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_01_02-480.webp b/assets/img/2023-11-08-mapreason/experiment_01_02-480.webp
new file mode 100644
index 00000000..7dcc5807
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_01_02-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_01_02-800.webp b/assets/img/2023-11-08-mapreason/experiment_01_02-800.webp
new file mode 100644
index 00000000..92b0c315
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_01_02-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_02_01-1400.webp b/assets/img/2023-11-08-mapreason/experiment_02_01-1400.webp
new file mode 100644
index 00000000..ae7a7ba7
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_02_01-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_02_01-480.webp b/assets/img/2023-11-08-mapreason/experiment_02_01-480.webp
new file mode 100644
index 00000000..12d0388e
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_02_01-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_02_01-800.webp b/assets/img/2023-11-08-mapreason/experiment_02_01-800.webp
new file mode 100644
index 00000000..ae7a7ba7
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_02_01-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_03_01-1400.webp b/assets/img/2023-11-08-mapreason/experiment_03_01-1400.webp
new file mode 100644
index 00000000..b56d04cd
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_03_01-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_03_01-480.webp b/assets/img/2023-11-08-mapreason/experiment_03_01-480.webp
new file mode 100644
index 00000000..ca1e1013
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_03_01-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_03_01-800.webp b/assets/img/2023-11-08-mapreason/experiment_03_01-800.webp
new file mode 100644
index 00000000..b56d04cd
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_03_01-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_04_01-1400.webp b/assets/img/2023-11-08-mapreason/experiment_04_01-1400.webp
new file mode 100644
index 00000000..79fc9167
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_04_01-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_04_01-480.webp b/assets/img/2023-11-08-mapreason/experiment_04_01-480.webp
new file mode 100644
index 00000000..87bc3b30
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_04_01-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_04_01-800.webp b/assets/img/2023-11-08-mapreason/experiment_04_01-800.webp
new file mode 100644
index 00000000..79fc9167
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_04_01-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_05_01-1400.webp b/assets/img/2023-11-08-mapreason/experiment_05_01-1400.webp
new file mode 100644
index 00000000..d892d4fd
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_05_01-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_05_01-480.webp b/assets/img/2023-11-08-mapreason/experiment_05_01-480.webp
new file mode 100644
index 00000000..cd26b9c0
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_05_01-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_05_01-800.webp b/assets/img/2023-11-08-mapreason/experiment_05_01-800.webp
new file mode 100644
index 00000000..d892d4fd
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_05_01-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_06_01-1400.webp b/assets/img/2023-11-08-mapreason/experiment_06_01-1400.webp
new file mode 100644
index 00000000..62eed651
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_06_01-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_06_01-480.webp b/assets/img/2023-11-08-mapreason/experiment_06_01-480.webp
new file mode 100644
index 00000000..295cdd7b
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_06_01-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_06_01-800.webp b/assets/img/2023-11-08-mapreason/experiment_06_01-800.webp
new file mode 100644
index 00000000..62eed651
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_06_01-800.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_06_02-1400.webp b/assets/img/2023-11-08-mapreason/experiment_06_02-1400.webp
new file mode 100644
index 00000000..1de470ee
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_06_02-1400.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_06_02-480.webp b/assets/img/2023-11-08-mapreason/experiment_06_02-480.webp
new file mode 100644
index 00000000..5375c1e4
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_06_02-480.webp differ
diff --git a/assets/img/2023-11-08-mapreason/experiment_06_02-800.webp b/assets/img/2023-11-08-mapreason/experiment_06_02-800.webp
new file mode 100644
index 00000000..1de470ee
Binary files /dev/null and b/assets/img/2023-11-08-mapreason/experiment_06_02-800.webp differ
diff --git a/assets/img/2023-11-08-project_blog/10-1400.webp b/assets/img/2023-11-08-project_blog/10-1400.webp
new file mode 100644
index 00000000..ce8225b5
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/10-1400.webp differ
diff --git a/assets/img/2023-11-08-project_blog/10-480.webp b/assets/img/2023-11-08-project_blog/10-480.webp
new file mode 100644
index 00000000..e890a183
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/10-480.webp differ
diff --git a/assets/img/2023-11-08-project_blog/10-800.webp b/assets/img/2023-11-08-project_blog/10-800.webp
new file mode 100644
index 00000000..ce8225b5
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/10-800.webp differ
diff --git a/assets/img/2023-11-08-project_blog/11-1400.webp b/assets/img/2023-11-08-project_blog/11-1400.webp
new file mode 100644
index 00000000..b9410833
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/11-1400.webp differ
diff --git a/assets/img/2023-11-08-project_blog/11-480.webp b/assets/img/2023-11-08-project_blog/11-480.webp
new file mode 100644
index 00000000..2a916f52
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/11-480.webp differ
diff --git a/assets/img/2023-11-08-project_blog/11-800.webp b/assets/img/2023-11-08-project_blog/11-800.webp
new file mode 100644
index 00000000..b9410833
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/11-800.webp differ
diff --git a/assets/img/2023-11-08-project_blog/12-1400.webp b/assets/img/2023-11-08-project_blog/12-1400.webp
new file mode 100644
index 00000000..06b75e0f
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/12-1400.webp differ
diff --git a/assets/img/2023-11-08-project_blog/12-480.webp b/assets/img/2023-11-08-project_blog/12-480.webp
new file mode 100644
index 00000000..4fb64669
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/12-480.webp differ
diff --git a/assets/img/2023-11-08-project_blog/12-800.webp b/assets/img/2023-11-08-project_blog/12-800.webp
new file mode 100644
index 00000000..06b75e0f
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/12-800.webp differ
diff --git a/assets/img/2023-11-08-project_blog/7-1400.webp b/assets/img/2023-11-08-project_blog/7-1400.webp
new file mode 100644
index 00000000..37aa7e8d
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/7-1400.webp differ
diff --git a/assets/img/2023-11-08-project_blog/7-480.webp b/assets/img/2023-11-08-project_blog/7-480.webp
new file mode 100644
index 00000000..77fdb68d
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/7-480.webp differ
diff --git a/assets/img/2023-11-08-project_blog/7-800.webp b/assets/img/2023-11-08-project_blog/7-800.webp
new file mode 100644
index 00000000..37aa7e8d
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/7-800.webp differ
diff --git a/assets/img/2023-11-08-project_blog/8-1400.webp b/assets/img/2023-11-08-project_blog/8-1400.webp
new file mode 100644
index 00000000..a2b1e89e
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/8-1400.webp differ
diff --git a/assets/img/2023-11-08-project_blog/8-480.webp b/assets/img/2023-11-08-project_blog/8-480.webp
new file mode 100644
index 00000000..c09934e6
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/8-480.webp differ
diff --git a/assets/img/2023-11-08-project_blog/8-800.webp b/assets/img/2023-11-08-project_blog/8-800.webp
new file mode 100644
index 00000000..a2b1e89e
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/8-800.webp differ
diff --git a/assets/img/2023-11-08-project_blog/9-1400.webp b/assets/img/2023-11-08-project_blog/9-1400.webp
new file mode 100644
index 00000000..dfac01c4
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/9-1400.webp differ
diff --git a/assets/img/2023-11-08-project_blog/9-480.webp b/assets/img/2023-11-08-project_blog/9-480.webp
new file mode 100644
index 00000000..c4f72887
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/9-480.webp differ
diff --git a/assets/img/2023-11-08-project_blog/9-800.webp b/assets/img/2023-11-08-project_blog/9-800.webp
new file mode 100644
index 00000000..dfac01c4
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/9-800.webp differ
diff --git a/assets/img/2023-11-08-project_blog/ICLR-logo-1400.webp b/assets/img/2023-11-08-project_blog/ICLR-logo-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/ICLR-logo-1400.webp differ
diff --git a/assets/img/2023-11-08-project_blog/ICLR-logo-480.webp b/assets/img/2023-11-08-project_blog/ICLR-logo-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/ICLR-logo-480.webp differ
diff --git a/assets/img/2023-11-08-project_blog/ICLR-logo-800.webp b/assets/img/2023-11-08-project_blog/ICLR-logo-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/ICLR-logo-800.webp differ
diff --git a/assets/img/2023-11-08-project_blog/ICLR-logo-dark-1400.webp b/assets/img/2023-11-08-project_blog/ICLR-logo-dark-1400.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/ICLR-logo-dark-1400.webp differ
diff --git a/assets/img/2023-11-08-project_blog/ICLR-logo-dark-480.webp b/assets/img/2023-11-08-project_blog/ICLR-logo-dark-480.webp
new file mode 100644
index 00000000..7f0830c1
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/ICLR-logo-dark-480.webp differ
diff --git a/assets/img/2023-11-08-project_blog/ICLR-logo-dark-800.webp b/assets/img/2023-11-08-project_blog/ICLR-logo-dark-800.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/ICLR-logo-dark-800.webp differ
diff --git a/assets/img/2023-11-08-project_blog/iclr-1400.webp b/assets/img/2023-11-08-project_blog/iclr-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/iclr-1400.webp differ
diff --git a/assets/img/2023-11-08-project_blog/iclr-480.webp b/assets/img/2023-11-08-project_blog/iclr-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/iclr-480.webp differ
diff --git a/assets/img/2023-11-08-project_blog/iclr-800.webp b/assets/img/2023-11-08-project_blog/iclr-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-08-project_blog/iclr-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-1400.webp
new file mode 100644
index 00000000..6c94a4bb
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-480.webp
new file mode 100644
index 00000000..5028ac26
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-800.webp
new file mode 100644
index 00000000..6c94a4bb
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-1400.webp
new file mode 100644
index 00000000..dab9191d
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-480.webp
new file mode 100644
index 00000000..92123843
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-800.webp
new file mode 100644
index 00000000..dab9191d
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-1400.webp
new file mode 100644
index 00000000..fc9e316f
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-480.webp
new file mode 100644
index 00000000..c95b79cd
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-800.webp
new file mode 100644
index 00000000..fc9e316f
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-1400.webp
new file mode 100644
index 00000000..1ff99568
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-480.webp
new file mode 100644
index 00000000..2e682646
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-800.webp
new file mode 100644
index 00000000..1ff99568
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_glue-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_glue-1400.webp
new file mode 100644
index 00000000..9751c696
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_glue-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_glue-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_glue-480.webp
new file mode 100644
index 00000000..7cc1b574
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_glue-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_glue-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_glue-800.webp
new file mode 100644
index 00000000..9751c696
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_glue-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_hate_speech-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_hate_speech-1400.webp
new file mode 100644
index 00000000..f0966752
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_hate_speech-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_hate_speech-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_hate_speech-480.webp
new file mode 100644
index 00000000..dff00e1d
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_hate_speech-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_hate_speech-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_hate_speech-800.webp
new file mode 100644
index 00000000..f0966752
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_hate_speech-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_sick-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_sick-1400.webp
new file mode 100644
index 00000000..b6453e85
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_sick-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_sick-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_sick-480.webp
new file mode 100644
index 00000000..10d0fbbe
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_sick-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_sick-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_sick-800.webp
new file mode 100644
index 00000000..b6453e85
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_sick-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-1400.webp
new file mode 100644
index 00000000..ba2cc84e
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-480.webp
new file mode 100644
index 00000000..166d206e
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-800.webp
new file mode 100644
index 00000000..ba2cc84e
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-1400.webp
new file mode 100644
index 00000000..cdca4765
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-480.webp
new file mode 100644
index 00000000..4fc11098
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-800.webp
new file mode 100644
index 00000000..cdca4765
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-1400.webp
new file mode 100644
index 00000000..02d55644
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-480.webp
new file mode 100644
index 00000000..f28aa1c9
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-800.webp
new file mode 100644
index 00000000..02d55644
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-1400.webp
new file mode 100644
index 00000000..cf296edb
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-480.webp
new file mode 100644
index 00000000..80137faf
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-800.webp
new file mode 100644
index 00000000..cf296edb
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-1400.webp
new file mode 100644
index 00000000..d93cd843
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-480.webp
new file mode 100644
index 00000000..4a016b8f
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-800.webp
new file mode 100644
index 00000000..d93cd843
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-1400.webp
new file mode 100644
index 00000000..92a9645e
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-480.webp
new file mode 100644
index 00000000..fe26b557
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-800.webp
new file mode 100644
index 00000000..92a9645e
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-1400.webp
new file mode 100644
index 00000000..d37aa719
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-480.webp
new file mode 100644
index 00000000..4bed3e57
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-800.webp
new file mode 100644
index 00000000..d37aa719
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-1400.webp
new file mode 100644
index 00000000..356310e5
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-480.webp
new file mode 100644
index 00000000..433df00d
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-800.webp
new file mode 100644
index 00000000..356310e5
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-1400.webp
new file mode 100644
index 00000000..a0324159
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-480.webp
new file mode 100644
index 00000000..f59208c8
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-800.webp
new file mode 100644
index 00000000..a0324159
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-1400.webp
new file mode 100644
index 00000000..e6957815
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-480.webp
new file mode 100644
index 00000000..1247b09f
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-800.webp
new file mode 100644
index 00000000..e6957815
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-1400.webp
new file mode 100644
index 00000000..5bd733ac
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-480.webp
new file mode 100644
index 00000000..9a83fd06
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-800.webp
new file mode 100644
index 00000000..5bd733ac
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-1400.webp
new file mode 100644
index 00000000..922a91ff
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-480.webp
new file mode 100644
index 00000000..371ccb04
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-800.webp
new file mode 100644
index 00000000..922a91ff
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-1400.webp
new file mode 100644
index 00000000..304d4b01
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-480.webp
new file mode 100644
index 00000000..4b0fd767
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-800.webp
new file mode 100644
index 00000000..304d4b01
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-1400.webp
new file mode 100644
index 00000000..991c3dfb
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-480.webp
new file mode 100644
index 00000000..5ec2c97b
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-800.webp
new file mode 100644
index 00000000..991c3dfb
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-1400.webp
new file mode 100644
index 00000000..d42d154b
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-480.webp
new file mode 100644
index 00000000..5bbb469c
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-800.webp
new file mode 100644
index 00000000..d42d154b
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-1400.webp
new file mode 100644
index 00000000..28b683e3
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-480.webp
new file mode 100644
index 00000000..da41da7e
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-800.webp
new file mode 100644
index 00000000..28b683e3
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-1400.webp
new file mode 100644
index 00000000..32e5a264
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-480.webp
new file mode 100644
index 00000000..df0e6089
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-800.webp
new file mode 100644
index 00000000..32e5a264
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-1400.webp
new file mode 100644
index 00000000..3fb821ac
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-480.webp
new file mode 100644
index 00000000..9fd2d0aa
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-800.webp
new file mode 100644
index 00000000..3fb821ac
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-800.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-1400.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-1400.webp
new file mode 100644
index 00000000..35877609
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-1400.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-480.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-480.webp
new file mode 100644
index 00000000..fa11f5ab
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-480.webp differ
diff --git a/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-800.webp b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-800.webp
new file mode 100644
index 00000000..35877609
Binary files /dev/null and b/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-1400.webp
new file mode 100644
index 00000000..348b847e
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-480.webp
new file mode 100644
index 00000000..2146db6f
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-800.webp
new file mode 100644
index 00000000..348b847e
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-1400.webp
new file mode 100644
index 00000000..c8446bde
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-480.webp
new file mode 100644
index 00000000..d75d1910
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-800.webp
new file mode 100644
index 00000000..c8446bde
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-1400.webp
new file mode 100644
index 00000000..ebd60906
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-480.webp
new file mode 100644
index 00000000..548b9d5e
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-800.webp
new file mode 100644
index 00000000..ebd60906
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-1400.webp
new file mode 100644
index 00000000..a2ce727b
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-480.webp
new file mode 100644
index 00000000..eb5ff935
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-800.webp
new file mode 100644
index 00000000..a2ce727b
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-1400.webp
new file mode 100644
index 00000000..67b8c631
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-480.webp
new file mode 100644
index 00000000..5c39c20a
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-800.webp
new file mode 100644
index 00000000..67b8c631
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-1400.webp
new file mode 100644
index 00000000..ee58b324
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-480.webp
new file mode 100644
index 00000000..8f675a85
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-800.webp
new file mode 100644
index 00000000..ee58b324
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_geneformer_schematic-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_geneformer_schematic-1400.webp
new file mode 100644
index 00000000..f8a0de1d
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_geneformer_schematic-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_geneformer_schematic-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_geneformer_schematic-480.webp
new file mode 100644
index 00000000..11c2509a
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_geneformer_schematic-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_geneformer_schematic-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_geneformer_schematic-800.webp
new file mode 100644
index 00000000..f8a0de1d
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_geneformer_schematic-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-1400.webp
new file mode 100644
index 00000000..6eb8b675
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-480.webp
new file mode 100644
index 00000000..e34bba6c
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-800.webp
new file mode 100644
index 00000000..6eb8b675
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-1400.webp
new file mode 100644
index 00000000..fbad1c9d
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-480.webp
new file mode 100644
index 00000000..d6f7c289
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-800.webp
new file mode 100644
index 00000000..fbad1c9d
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-1400.webp
new file mode 100644
index 00000000..0ee65148
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-480.webp
new file mode 100644
index 00000000..a6ae9a8a
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-800.webp
new file mode 100644
index 00000000..0ee65148
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-1400.webp
new file mode 100644
index 00000000..a9b8e5de
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-480.webp
new file mode 100644
index 00000000..1d8332f7
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-800.webp
new file mode 100644
index 00000000..a9b8e5de
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-1400.webp
new file mode 100644
index 00000000..9a99d050
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-480.webp
new file mode 100644
index 00000000..850249d4
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-800.webp
new file mode 100644
index 00000000..9a99d050
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-1400.webp
new file mode 100644
index 00000000..a4ed35c6
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-480.webp
new file mode 100644
index 00000000..793719d5
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-800.webp
new file mode 100644
index 00000000..a4ed35c6
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-1400.webp
new file mode 100644
index 00000000..ff25fcf1
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-480.webp
new file mode 100644
index 00000000..9f42a20f
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-800.webp
new file mode 100644
index 00000000..ff25fcf1
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-1400.webp
new file mode 100644
index 00000000..d17fce9f
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-480.webp
new file mode 100644
index 00000000..606f5e53
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-800.webp
new file mode 100644
index 00000000..d17fce9f
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-1400.webp
new file mode 100644
index 00000000..6cdfb32d
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-480.webp
new file mode 100644
index 00000000..99112386
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-800.webp
new file mode 100644
index 00000000..6cdfb32d
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-1400.webp
new file mode 100644
index 00000000..2c246b3f
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-480.webp
new file mode 100644
index 00000000..ad72c2ad
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-800.webp
new file mode 100644
index 00000000..2c246b3f
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-1400.webp
new file mode 100644
index 00000000..82cea242
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-480.webp
new file mode 100644
index 00000000..7dcc60ce
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-800.webp
new file mode 100644
index 00000000..82cea242
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-800.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-1400.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-1400.webp
new file mode 100644
index 00000000..f423cfe2
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-1400.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-480.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-480.webp
new file mode 100644
index 00000000..09b4002b
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-480.webp differ
diff --git a/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-800.webp b/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-800.webp
new file mode 100644
index 00000000..f423cfe2
Binary files /dev/null and b/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/10-1400.webp b/assets/img/2023-11-08-sentence-embeddings/10-1400.webp
new file mode 100644
index 00000000..ce8225b5
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/10-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/10-480.webp b/assets/img/2023-11-08-sentence-embeddings/10-480.webp
new file mode 100644
index 00000000..e890a183
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/10-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/10-800.webp b/assets/img/2023-11-08-sentence-embeddings/10-800.webp
new file mode 100644
index 00000000..ce8225b5
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/10-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/11-1400.webp b/assets/img/2023-11-08-sentence-embeddings/11-1400.webp
new file mode 100644
index 00000000..b9410833
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/11-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/11-480.webp b/assets/img/2023-11-08-sentence-embeddings/11-480.webp
new file mode 100644
index 00000000..2a916f52
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/11-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/11-800.webp b/assets/img/2023-11-08-sentence-embeddings/11-800.webp
new file mode 100644
index 00000000..b9410833
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/11-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/12-1400.webp b/assets/img/2023-11-08-sentence-embeddings/12-1400.webp
new file mode 100644
index 00000000..06b75e0f
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/12-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/12-480.webp b/assets/img/2023-11-08-sentence-embeddings/12-480.webp
new file mode 100644
index 00000000..4fb64669
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/12-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/12-800.webp b/assets/img/2023-11-08-sentence-embeddings/12-800.webp
new file mode 100644
index 00000000..06b75e0f
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/12-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/7-1400.webp b/assets/img/2023-11-08-sentence-embeddings/7-1400.webp
new file mode 100644
index 00000000..37aa7e8d
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/7-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/7-480.webp b/assets/img/2023-11-08-sentence-embeddings/7-480.webp
new file mode 100644
index 00000000..77fdb68d
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/7-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/7-800.webp b/assets/img/2023-11-08-sentence-embeddings/7-800.webp
new file mode 100644
index 00000000..37aa7e8d
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/7-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/8-1400.webp b/assets/img/2023-11-08-sentence-embeddings/8-1400.webp
new file mode 100644
index 00000000..a2b1e89e
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/8-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/8-480.webp b/assets/img/2023-11-08-sentence-embeddings/8-480.webp
new file mode 100644
index 00000000..c09934e6
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/8-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/8-800.webp b/assets/img/2023-11-08-sentence-embeddings/8-800.webp
new file mode 100644
index 00000000..a2b1e89e
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/8-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/9-1400.webp b/assets/img/2023-11-08-sentence-embeddings/9-1400.webp
new file mode 100644
index 00000000..dfac01c4
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/9-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/9-480.webp b/assets/img/2023-11-08-sentence-embeddings/9-480.webp
new file mode 100644
index 00000000..c4f72887
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/9-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/9-800.webp b/assets/img/2023-11-08-sentence-embeddings/9-800.webp
new file mode 100644
index 00000000..dfac01c4
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/9-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-1400.webp b/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-1400.webp
new file mode 100644
index 00000000..15f64687
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-480.webp b/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-480.webp
new file mode 100644
index 00000000..d2c8729c
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-800.webp b/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-800.webp
new file mode 100644
index 00000000..15f64687
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-1400.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-1400.webp
new file mode 100644
index 00000000..f362213b
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-480.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-480.webp
new file mode 100644
index 00000000..5dd5bdc8
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-800.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-800.webp
new file mode 100644
index 00000000..f362213b
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-1400.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-1400.webp
new file mode 100644
index 00000000..fece0adc
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-480.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-480.webp
new file mode 100644
index 00000000..249b6ee0
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-800.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-800.webp
new file mode 100644
index 00000000..fece0adc
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-1400.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-1400.webp
new file mode 100644
index 00000000..e5953a91
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-480.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-480.webp
new file mode 100644
index 00000000..a5ad9b91
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-800.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-800.webp
new file mode 100644
index 00000000..e5953a91
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_time-1400.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_time-1400.webp
new file mode 100644
index 00000000..aaa174ca
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_time-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_time-480.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_time-480.webp
new file mode 100644
index 00000000..ab40f8ce
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_time-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/bad_model_time-800.webp b/assets/img/2023-11-08-sentence-embeddings/bad_model_time-800.webp
new file mode 100644
index 00000000..aaa174ca
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/bad_model_time-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-1400.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-1400.webp
new file mode 100644
index 00000000..64b00894
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-480.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-480.webp
new file mode 100644
index 00000000..a001ae82
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-800.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-800.webp
new file mode 100644
index 00000000..64b00894
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_time-1400.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_time-1400.webp
new file mode 100644
index 00000000..e190ce82
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_time-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_time-480.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_time-480.webp
new file mode 100644
index 00000000..502a52ab
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_time-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_time-800.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_time-800.webp
new file mode 100644
index 00000000..e190ce82
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_time-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-1400.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-1400.webp
new file mode 100644
index 00000000..0c3b49c7
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-480.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-480.webp
new file mode 100644
index 00000000..a83edc6d
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-800.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-800.webp
new file mode 100644
index 00000000..0c3b49c7
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-1400.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-1400.webp
new file mode 100644
index 00000000..c181c00d
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-480.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-480.webp
new file mode 100644
index 00000000..6aa180c7
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-800.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-800.webp
new file mode 100644
index 00000000..c181c00d
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-1400.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-1400.webp
new file mode 100644
index 00000000..3d7e2963
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-480.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-480.webp
new file mode 100644
index 00000000..c3bed1ad
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-800.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-800.webp
new file mode 100644
index 00000000..3d7e2963
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-1400.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-1400.webp
new file mode 100644
index 00000000..205a3800
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-480.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-480.webp
new file mode 100644
index 00000000..04ed6d00
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-800.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-800.webp
new file mode 100644
index 00000000..205a3800
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-1400.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-1400.webp
new file mode 100644
index 00000000..22e19e65
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-480.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-480.webp
new file mode 100644
index 00000000..af3d8747
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-800.webp b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-800.webp
new file mode 100644
index 00000000..22e19e65
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-1400.webp b/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-1400.webp
new file mode 100644
index 00000000..1b125af4
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-480.webp b/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-480.webp
new file mode 100644
index 00000000..9ac7a22f
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-800.webp b/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-800.webp
new file mode 100644
index 00000000..1b125af4
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/iclr-1400.webp b/assets/img/2023-11-08-sentence-embeddings/iclr-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/iclr-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/iclr-480.webp b/assets/img/2023-11-08-sentence-embeddings/iclr-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/iclr-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/iclr-800.webp b/assets/img/2023-11-08-sentence-embeddings/iclr-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/iclr-800.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/response_based_KD-1400.webp b/assets/img/2023-11-08-sentence-embeddings/response_based_KD-1400.webp
new file mode 100644
index 00000000..c97772a8
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/response_based_KD-1400.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/response_based_KD-480.webp b/assets/img/2023-11-08-sentence-embeddings/response_based_KD-480.webp
new file mode 100644
index 00000000..7d5c8de3
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/response_based_KD-480.webp differ
diff --git a/assets/img/2023-11-08-sentence-embeddings/response_based_KD-800.webp b/assets/img/2023-11-08-sentence-embeddings/response_based_KD-800.webp
new file mode 100644
index 00000000..c97772a8
Binary files /dev/null and b/assets/img/2023-11-08-sentence-embeddings/response_based_KD-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer-1400.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer-1400.webp
new file mode 100644
index 00000000..63909ef3
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer-480.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer-480.webp
new file mode 100644
index 00000000..effb3987
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer-800.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer-800.webp
new file mode 100644
index 00000000..63909ef3
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-1400.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-1400.webp
new file mode 100644
index 00000000..46b9e280
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-480.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-480.webp
new file mode 100644
index 00000000..98ee1234
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-800.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-800.webp
new file mode 100644
index 00000000..46b9e280
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed_top5-1400.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed_top5-1400.webp
new file mode 100644
index 00000000..206ca14f
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed_top5-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed_top5-480.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed_top5-480.webp
new file mode 100644
index 00000000..56ad086c
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed_top5-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed_top5-800.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed_top5-800.webp
new file mode 100644
index 00000000..206ca14f
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed_top5-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-1400.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-1400.webp
new file mode 100644
index 00000000..9d3c219e
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-480.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-480.webp
new file mode 100644
index 00000000..f42944e9
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-800.webp b/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-800.webp
new file mode 100644
index 00000000..9d3c219e
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-1400.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-1400.webp
new file mode 100644
index 00000000..69d7e36b
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-480.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-480.webp
new file mode 100644
index 00000000..8e897353
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-800.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-800.webp
new file mode 100644
index 00000000..69d7e36b
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-1400.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-1400.webp
new file mode 100644
index 00000000..1998c9dd
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-480.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-480.webp
new file mode 100644
index 00000000..44bf8f05
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-800.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-800.webp
new file mode 100644
index 00000000..1998c9dd
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed_top5-1400.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed_top5-1400.webp
new file mode 100644
index 00000000..bdbbd009
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed_top5-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed_top5-480.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed_top5-480.webp
new file mode 100644
index 00000000..73514e24
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed_top5-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed_top5-800.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed_top5-800.webp
new file mode 100644
index 00000000..bdbbd009
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed_top5-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-1400.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-1400.webp
new file mode 100644
index 00000000..f5dba996
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-480.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-480.webp
new file mode 100644
index 00000000..a4f33968
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-800.webp b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-800.webp
new file mode 100644
index 00000000..f5dba996
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/ideal_tsne-1400.webp b/assets/img/2023-11-08-spatiotemporal/ideal_tsne-1400.webp
new file mode 100644
index 00000000..bbaa0774
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/ideal_tsne-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/ideal_tsne-480.webp b/assets/img/2023-11-08-spatiotemporal/ideal_tsne-480.webp
new file mode 100644
index 00000000..6e4bd469
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/ideal_tsne-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/ideal_tsne-800.webp b/assets/img/2023-11-08-spatiotemporal/ideal_tsne-800.webp
new file mode 100644
index 00000000..bbaa0774
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/ideal_tsne-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/pca_cumulative-1400.webp b/assets/img/2023-11-08-spatiotemporal/pca_cumulative-1400.webp
new file mode 100644
index 00000000..277218de
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/pca_cumulative-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/pca_cumulative-480.webp b/assets/img/2023-11-08-spatiotemporal/pca_cumulative-480.webp
new file mode 100644
index 00000000..635ac740
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/pca_cumulative-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/pca_cumulative-800.webp b/assets/img/2023-11-08-spatiotemporal/pca_cumulative-800.webp
new file mode 100644
index 00000000..277218de
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/pca_cumulative-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/pca_heatmap-1400.webp b/assets/img/2023-11-08-spatiotemporal/pca_heatmap-1400.webp
new file mode 100644
index 00000000..17da7368
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/pca_heatmap-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/pca_heatmap-480.webp b/assets/img/2023-11-08-spatiotemporal/pca_heatmap-480.webp
new file mode 100644
index 00000000..521c5407
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/pca_heatmap-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/pca_heatmap-800.webp b/assets/img/2023-11-08-spatiotemporal/pca_heatmap-800.webp
new file mode 100644
index 00000000..17da7368
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/pca_heatmap-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-1400.webp b/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-1400.webp
new file mode 100644
index 00000000..48c3f2ee
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-480.webp b/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-480.webp
new file mode 100644
index 00000000..ad8b8743
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-800.webp b/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-800.webp
new file mode 100644
index 00000000..48c3f2ee
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-800.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-1400.webp b/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-1400.webp
new file mode 100644
index 00000000..6a0a1b8f
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-1400.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-480.webp b/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-480.webp
new file mode 100644
index 00000000..db873df0
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-480.webp differ
diff --git a/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-800.webp b/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-800.webp
new file mode 100644
index 00000000..6a0a1b8f
Binary files /dev/null and b/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-800.webp differ
diff --git a/assets/img/2023-11-08-suscep/pos_neg_distribution-1400.webp b/assets/img/2023-11-08-suscep/pos_neg_distribution-1400.webp
new file mode 100644
index 00000000..a7a4a154
Binary files /dev/null and b/assets/img/2023-11-08-suscep/pos_neg_distribution-1400.webp differ
diff --git a/assets/img/2023-11-08-suscep/pos_neg_distribution-480.webp b/assets/img/2023-11-08-suscep/pos_neg_distribution-480.webp
new file mode 100644
index 00000000..742156b3
Binary files /dev/null and b/assets/img/2023-11-08-suscep/pos_neg_distribution-480.webp differ
diff --git a/assets/img/2023-11-08-suscep/pos_neg_distribution-800.webp b/assets/img/2023-11-08-suscep/pos_neg_distribution-800.webp
new file mode 100644
index 00000000..a7a4a154
Binary files /dev/null and b/assets/img/2023-11-08-suscep/pos_neg_distribution-800.webp differ
diff --git a/assets/img/2023-11-08-suscep/suscep_model-1400.webp b/assets/img/2023-11-08-suscep/suscep_model-1400.webp
new file mode 100644
index 00000000..8083b0d8
Binary files /dev/null and b/assets/img/2023-11-08-suscep/suscep_model-1400.webp differ
diff --git a/assets/img/2023-11-08-suscep/suscep_model-480.webp b/assets/img/2023-11-08-suscep/suscep_model-480.webp
new file mode 100644
index 00000000..fe375761
Binary files /dev/null and b/assets/img/2023-11-08-suscep/suscep_model-480.webp differ
diff --git a/assets/img/2023-11-08-suscep/suscep_model-800.webp b/assets/img/2023-11-08-suscep/suscep_model-800.webp
new file mode 100644
index 00000000..8083b0d8
Binary files /dev/null and b/assets/img/2023-11-08-suscep/suscep_model-800.webp differ
diff --git a/assets/img/2023-11-08-suscep/usa-1400.webp b/assets/img/2023-11-08-suscep/usa-1400.webp
new file mode 100644
index 00000000..3b0829e2
Binary files /dev/null and b/assets/img/2023-11-08-suscep/usa-1400.webp differ
diff --git a/assets/img/2023-11-08-suscep/usa-480.webp b/assets/img/2023-11-08-suscep/usa-480.webp
new file mode 100644
index 00000000..2d3b1343
Binary files /dev/null and b/assets/img/2023-11-08-suscep/usa-480.webp differ
diff --git a/assets/img/2023-11-08-suscep/usa-800.webp b/assets/img/2023-11-08-suscep/usa-800.webp
new file mode 100644
index 00000000..3b0829e2
Binary files /dev/null and b/assets/img/2023-11-08-suscep/usa-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-1400.webp
new file mode 100644
index 00000000..b56c568c
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-480.webp
new file mode 100644
index 00000000..d6c9a7cd
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-800.webp
new file mode 100644
index 00000000..b56c568c
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-1400.webp
new file mode 100644
index 00000000..4787e77d
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-480.webp
new file mode 100644
index 00000000..846afa33
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-800.webp
new file mode 100644
index 00000000..4787e77d
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-1400.webp
new file mode 100644
index 00000000..cdb3cf69
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-480.webp
new file mode 100644
index 00000000..e3565577
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-800.webp
new file mode 100644
index 00000000..cdb3cf69
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-1400.webp
new file mode 100644
index 00000000..0bfcb4db
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-480.webp
new file mode 100644
index 00000000..e54b2564
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-800.webp
new file mode 100644
index 00000000..0bfcb4db
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-1400.webp
new file mode 100644
index 00000000..0a5ced9c
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-480.webp
new file mode 100644
index 00000000..8ca04c47
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-800.webp
new file mode 100644
index 00000000..0a5ced9c
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-1400.webp
new file mode 100644
index 00000000..0c08fdc6
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-480.webp
new file mode 100644
index 00000000..e401fcb2
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-800.webp
new file mode 100644
index 00000000..0c08fdc6
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-1400.webp
new file mode 100644
index 00000000..fea85c49
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-480.webp
new file mode 100644
index 00000000..a6de121b
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-800.webp
new file mode 100644
index 00000000..fea85c49
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-1400.webp
new file mode 100644
index 00000000..3b828a5d
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-480.webp
new file mode 100644
index 00000000..99ccb4f0
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-800.webp
new file mode 100644
index 00000000..3b828a5d
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-1400.webp
new file mode 100644
index 00000000..640d88e7
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-480.webp
new file mode 100644
index 00000000..75e762e2
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-800.webp
new file mode 100644
index 00000000..640d88e7
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-1400.webp
new file mode 100644
index 00000000..d90d4066
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-480.webp
new file mode 100644
index 00000000..98c89f28
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-800.webp
new file mode 100644
index 00000000..d90d4066
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-1400.webp
new file mode 100644
index 00000000..66593213
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-480.webp
new file mode 100644
index 00000000..9b4565d6
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-800.webp
new file mode 100644
index 00000000..66593213
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-1400.webp
new file mode 100644
index 00000000..3d532459
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-480.webp
new file mode 100644
index 00000000..7330bc61
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-800.webp
new file mode 100644
index 00000000..3d532459
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-800.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-1400.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-1400.webp
new file mode 100644
index 00000000..2edc4a92
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-1400.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-480.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-480.webp
new file mode 100644
index 00000000..3d36aba2
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-480.webp differ
diff --git a/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-800.webp b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-800.webp
new file mode 100644
index 00000000..2edc4a92
Binary files /dev/null and b/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/10-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/10-1400.webp
new file mode 100644
index 00000000..ce8225b5
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/10-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/10-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/10-480.webp
new file mode 100644
index 00000000..e890a183
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/10-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/10-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/10-800.webp
new file mode 100644
index 00000000..ce8225b5
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/10-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/11-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/11-1400.webp
new file mode 100644
index 00000000..b9410833
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/11-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/11-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/11-480.webp
new file mode 100644
index 00000000..2a916f52
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/11-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/11-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/11-800.webp
new file mode 100644
index 00000000..b9410833
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/11-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/12-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/12-1400.webp
new file mode 100644
index 00000000..06b75e0f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/12-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/12-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/12-480.webp
new file mode 100644
index 00000000..4fb64669
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/12-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/12-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/12-800.webp
new file mode 100644
index 00000000..06b75e0f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/12-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/7-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/7-1400.webp
new file mode 100644
index 00000000..37aa7e8d
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/7-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/7-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/7-480.webp
new file mode 100644
index 00000000..77fdb68d
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/7-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/7-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/7-800.webp
new file mode 100644
index 00000000..37aa7e8d
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/7-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/8-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/8-1400.webp
new file mode 100644
index 00000000..a2b1e89e
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/8-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/8-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/8-480.webp
new file mode 100644
index 00000000..c09934e6
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/8-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/8-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/8-800.webp
new file mode 100644
index 00000000..a2b1e89e
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/8-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/9-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/9-1400.webp
new file mode 100644
index 00000000..dfac01c4
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/9-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/9-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/9-480.webp
new file mode 100644
index 00000000..c4f72887
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/9-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/9-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/9-800.webp
new file mode 100644
index 00000000..dfac01c4
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/9-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-dark-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-dark-1400.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-dark-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-dark-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-dark-480.webp
new file mode 100644
index 00000000..7f0830c1
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-dark-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-dark-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-dark-800.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/ICLR-logo-dark-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu-1400.webp
new file mode 100644
index 00000000..6af1eb13
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu-480.webp
new file mode 100644
index 00000000..5c6c6da0
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu-800.webp
new file mode 100644
index 00000000..6af1eb13
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-1400.webp
new file mode 100644
index 00000000..af5a6a68
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-480.webp
new file mode 100644
index 00000000..425581fe
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-800.webp
new file mode 100644
index 00000000..af5a6a68
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-1400.webp
new file mode 100644
index 00000000..1258134a
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-480.webp
new file mode 100644
index 00000000..0b887aa9
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-800.webp
new file mode 100644
index 00000000..1258134a
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-1400.webp
new file mode 100644
index 00000000..d9419ce6
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-480.webp
new file mode 100644
index 00000000..61a27cfb
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-800.webp
new file mode 100644
index 00000000..d9419ce6
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-1400.webp
new file mode 100644
index 00000000..18febef1
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-480.webp
new file mode 100644
index 00000000..aba70264
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-800.webp
new file mode 100644
index 00000000..18febef1
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_weather-1400.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_weather-480.webp
new file mode 100644
index 00000000..6247a8ce
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_weather-800.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-1400.webp
new file mode 100644
index 00000000..f4dd9094
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-480.webp
new file mode 100644
index 00000000..79a7872b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-800.webp
new file mode 100644
index 00000000..f4dd9094
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-1400.webp
new file mode 100644
index 00000000..a9127563
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-480.webp
new file mode 100644
index 00000000..7ffcc0d9
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-800.webp
new file mode 100644
index 00000000..a9127563
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_g-1400.webp
new file mode 100644
index 00000000..350f02bb
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_g-480.webp
new file mode 100644
index 00000000..e09cd915
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_g-800.webp
new file mode 100644
index 00000000..350f02bb
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-1400.webp
new file mode 100644
index 00000000..f7f0ce20
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-480.webp
new file mode 100644
index 00000000..684b7e7f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-800.webp
new file mode 100644
index 00000000..f7f0ce20
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-1400.webp
new file mode 100644
index 00000000..a716744b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-480.webp
new file mode 100644
index 00000000..6b1c5f1a
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-800.webp
new file mode 100644
index 00000000..a716744b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-1400.webp
new file mode 100644
index 00000000..2c2abba4
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-480.webp
new file mode 100644
index 00000000..bcd0d35e
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-800.webp
new file mode 100644
index 00000000..2c2abba4
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_tdist_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_tdist_cpu-1400.webp
new file mode 100644
index 00000000..600734c6
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_tdist_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_tdist_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_tdist_cpu-480.webp
new file mode 100644
index 00000000..1fea216c
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_tdist_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_tdist_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_tdist_cpu-800.webp
new file mode 100644
index 00000000..600734c6
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_tdist_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-1400.webp
new file mode 100644
index 00000000..3516d109
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-480.webp
new file mode 100644
index 00000000..743ebb23
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-800.webp
new file mode 100644
index 00000000..3516d109
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_dist-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_dist-1400.webp
new file mode 100644
index 00000000..feeff4d2
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_dist-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_dist-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_dist-480.webp
new file mode 100644
index 00000000..7aadcf01
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_dist-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_dist-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_dist-800.webp
new file mode 100644
index 00000000..feeff4d2
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_dist-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-1400.webp
new file mode 100644
index 00000000..5257bb0c
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-480.webp
new file mode 100644
index 00000000..70339d91
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-800.webp
new file mode 100644
index 00000000..5257bb0c
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-1400.webp
new file mode 100644
index 00000000..3b42c241
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-480.webp
new file mode 100644
index 00000000..b179efd5
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-800.webp
new file mode 100644
index 00000000..3b42c241
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-1400.webp
new file mode 100644
index 00000000..ecc4d10f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-480.webp
new file mode 100644
index 00000000..a21a3dc9
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-800.webp
new file mode 100644
index 00000000..ecc4d10f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-1400.webp
new file mode 100644
index 00000000..4d365030
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-480.webp
new file mode 100644
index 00000000..6942244f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-800.webp
new file mode 100644
index 00000000..4d365030
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/iclr-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/iclr-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/iclr-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/iclr-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/iclr-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/iclr-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/iclr-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/iclr-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/iclr-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-1400.webp
new file mode 100644
index 00000000..69b3dd10
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-480.webp
new file mode 100644
index 00000000..9c183625
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-800.webp
new file mode 100644
index 00000000..69b3dd10
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta0-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta0-1400.webp
new file mode 100644
index 00000000..6f9274c4
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta0-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta0-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta0-480.webp
new file mode 100644
index 00000000..6ca6d293
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta0-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta0-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta0-800.webp
new file mode 100644
index 00000000..6f9274c4
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta0-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-1400.webp
new file mode 100644
index 00000000..6f9274c4
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-480.webp
new file mode 100644
index 00000000..6ca6d293
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-800.webp
new file mode 100644
index 00000000..6f9274c4
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-1400.webp
new file mode 100644
index 00000000..8aedd1dd
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-480.webp
new file mode 100644
index 00000000..06baa83b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-800.webp
new file mode 100644
index 00000000..8aedd1dd
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-1400.webp
new file mode 100644
index 00000000..2d7db18f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-480.webp
new file mode 100644
index 00000000..10c5125c
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-800.webp
new file mode 100644
index 00000000..2d7db18f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-1400.webp
new file mode 100644
index 00000000..cacca564
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-480.webp
new file mode 100644
index 00000000..86bdefb9
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-800.webp
new file mode 100644
index 00000000..cacca564
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_6-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_6-1400.webp
new file mode 100644
index 00000000..cfeb7756
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_6-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_6-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_6-480.webp
new file mode 100644
index 00000000..66e510dd
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_6-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_6-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_6-800.webp
new file mode 100644
index 00000000..cfeb7756
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_6-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-1400.webp
new file mode 100644
index 00000000..a93e9ec8
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-480.webp
new file mode 100644
index 00000000..db164516
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-800.webp
new file mode 100644
index 00000000..a93e9ec8
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-1400.webp
new file mode 100644
index 00000000..95fd7620
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-480.webp
new file mode 100644
index 00000000..4dda204f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-800.webp
new file mode 100644
index 00000000..95fd7620
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-1400.webp
new file mode 100644
index 00000000..837046ca
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-480.webp
new file mode 100644
index 00000000..5cd57a3a
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-800.webp
new file mode 100644
index 00000000..837046ca
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-1400.webp
new file mode 100644
index 00000000..fe20a813
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-480.webp
new file mode 100644
index 00000000..b302a6ea
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-800.webp
new file mode 100644
index 00000000..fe20a813
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-1400.webp
new file mode 100644
index 00000000..984d4765
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-480.webp
new file mode 100644
index 00000000..b473561f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-800.webp
new file mode 100644
index 00000000..984d4765
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-1400.webp
new file mode 100644
index 00000000..640d57db
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-480.webp
new file mode 100644
index 00000000..afe03dd6
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-800.webp
new file mode 100644
index 00000000..640d57db
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-1400.webp
new file mode 100644
index 00000000..149ed395
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-480.webp
new file mode 100644
index 00000000..beb30f7f
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-800.webp
new file mode 100644
index 00000000..149ed395
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q9-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q9-1400.webp
new file mode 100644
index 00000000..b7152cf3
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q9-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q9-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q9-480.webp
new file mode 100644
index 00000000..d5ed189b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q9-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q9-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q9-800.webp
new file mode 100644
index 00000000..b7152cf3
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q9-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_qbeta-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_qbeta-1400.webp
new file mode 100644
index 00000000..80cc8182
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_qbeta-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_qbeta-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_qbeta-480.webp
new file mode 100644
index 00000000..aa554a66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_qbeta-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_qbeta-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_qbeta-800.webp
new file mode 100644
index 00000000..80cc8182
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_qbeta-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-1400.webp
new file mode 100644
index 00000000..0d4da55b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-480.webp
new file mode 100644
index 00000000..b19d3705
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-800.webp
new file mode 100644
index 00000000..0d4da55b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-1400.webp
new file mode 100644
index 00000000..1582266b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-480.webp
new file mode 100644
index 00000000..15fbb760
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-800.webp
new file mode 100644
index 00000000..1582266b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-1400.webp
new file mode 100644
index 00000000..3edafc1b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-480.webp
new file mode 100644
index 00000000..d093c7e6
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-800.webp
new file mode 100644
index 00000000..3edafc1b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-1400.webp
new file mode 100644
index 00000000..8bec4a56
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-480.webp
new file mode 100644
index 00000000..8b1d3691
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-800.webp
new file mode 100644
index 00000000..8bec4a56
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-1400.webp
new file mode 100644
index 00000000..45cd2f25
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-480.webp
new file mode 100644
index 00000000..26089b66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-800.webp
new file mode 100644
index 00000000..45cd2f25
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_cpu-1400.webp
new file mode 100644
index 00000000..a716744b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_cpu-480.webp
new file mode 100644
index 00000000..6b1c5f1a
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_cpu-800.webp
new file mode 100644
index 00000000..a716744b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_g-1400.webp
new file mode 100644
index 00000000..ca089d6d
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_g-480.webp
new file mode 100644
index 00000000..31295bc2
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_g-800.webp
new file mode 100644
index 00000000..ca089d6d
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_weather-1400.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_weather-480.webp
new file mode 100644
index 00000000..6247a8ce
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_weather-800.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k12_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_cpu-1400.webp
new file mode 100644
index 00000000..685c43fa
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_cpu-480.webp
new file mode 100644
index 00000000..d5e57409
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_cpu-800.webp
new file mode 100644
index 00000000..685c43fa
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_g-1400.webp
new file mode 100644
index 00000000..2ca21ad0
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_g-480.webp
new file mode 100644
index 00000000..934e704a
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_g-800.webp
new file mode 100644
index 00000000..2ca21ad0
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_weather-1400.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_weather-480.webp
new file mode 100644
index 00000000..6247a8ce
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_weather-800.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k3_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_cpu-1400.webp
new file mode 100644
index 00000000..a716744b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_cpu-480.webp
new file mode 100644
index 00000000..6b1c5f1a
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_cpu-800.webp
new file mode 100644
index 00000000..a716744b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_g-1400.webp
new file mode 100644
index 00000000..341a7f24
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_g-480.webp
new file mode 100644
index 00000000..417fdaad
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_g-800.webp
new file mode 100644
index 00000000..341a7f24
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_weather-1400.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_weather-480.webp
new file mode 100644
index 00000000..6247a8ce
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_weather-800.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k6_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_cpu-1400.webp
new file mode 100644
index 00000000..a716744b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_cpu-480.webp
new file mode 100644
index 00000000..6b1c5f1a
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_cpu-800.webp
new file mode 100644
index 00000000..a716744b
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_g-1400.webp
new file mode 100644
index 00000000..c3ad5429
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_g-480.webp
new file mode 100644
index 00000000..239e32b0
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_g-800.webp
new file mode 100644
index 00000000..c3ad5429
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_weather-1400.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_weather-480.webp
new file mode 100644
index 00000000..6247a8ce
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_weather-800.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_k9_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_cpu-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_cpu-1400.webp
new file mode 100644
index 00000000..fb573815
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_cpu-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_cpu-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_cpu-480.webp
new file mode 100644
index 00000000..7f42dc73
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_cpu-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_cpu-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_cpu-800.webp
new file mode 100644
index 00000000..fb573815
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_cpu-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_g-1400.webp
new file mode 100644
index 00000000..6c5bc988
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_g-480.webp
new file mode 100644
index 00000000..b38a7eb0
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_g-800.webp
new file mode 100644
index 00000000..6c5bc988
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_weather-1400.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_weather-480.webp
new file mode 100644
index 00000000..6247a8ce
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_weather-800.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_q_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_g-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_g-1400.webp
new file mode 100644
index 00000000..3952d5c7
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_g-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_g-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_g-480.webp
new file mode 100644
index 00000000..52725979
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_g-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_g-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_g-800.webp
new file mode 100644
index 00000000..3952d5c7
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_g-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_weather-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_weather-1400.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_weather-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_weather-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_weather-480.webp
new file mode 100644
index 00000000..6247a8ce
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_weather-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_weather-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_weather-800.webp
new file mode 100644
index 00000000..5316fa66
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/pred_tdist_weather-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-1400.webp
new file mode 100644
index 00000000..537a1787
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-480.webp
new file mode 100644
index 00000000..56b085b7
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-800.webp
new file mode 100644
index 00000000..537a1787
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_dist-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_dist-1400.webp
new file mode 100644
index 00000000..e0e56171
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_dist-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_dist-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_dist-480.webp
new file mode 100644
index 00000000..db82e409
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_dist-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_dist-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_dist-800.webp
new file mode 100644
index 00000000..e0e56171
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_dist-800.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-1400.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-1400.webp
new file mode 100644
index 00000000..996cc53d
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-1400.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-480.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-480.webp
new file mode 100644
index 00000000..0db055c7
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-480.webp differ
diff --git a/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-800.webp b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-800.webp
new file mode 100644
index 00000000..996cc53d
Binary files /dev/null and b/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-800.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/PersonalizationGenFrmwrk-1400.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/PersonalizationGenFrmwrk-1400.webp
new file mode 100644
index 00000000..d58a712d
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/PersonalizationGenFrmwrk-1400.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/PersonalizationGenFrmwrk-480.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/PersonalizationGenFrmwrk-480.webp
new file mode 100644
index 00000000..6132d36e
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/PersonalizationGenFrmwrk-480.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/PersonalizationGenFrmwrk-800.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/PersonalizationGenFrmwrk-800.webp
new file mode 100644
index 00000000..d58a712d
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/PersonalizationGenFrmwrk-800.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-1400.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-1400.webp
new file mode 100644
index 00000000..946a9168
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-1400.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-480.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-480.webp
new file mode 100644
index 00000000..34a207b2
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-480.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-800.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-800.webp
new file mode 100644
index 00000000..946a9168
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-800.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-1400.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-1400.webp
new file mode 100644
index 00000000..3108180f
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-1400.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-480.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-480.webp
new file mode 100644
index 00000000..4847acb9
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-480.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-800.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-800.webp
new file mode 100644
index 00000000..3108180f
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-800.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-1400.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-1400.webp
new file mode 100644
index 00000000..e15f0402
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-1400.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-480.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-480.webp
new file mode 100644
index 00000000..ef2e7ca3
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-480.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-800.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-800.webp
new file mode 100644
index 00000000..e15f0402
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-800.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-1400.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-1400.webp
new file mode 100644
index 00000000..59cefa96
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-1400.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-480.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-480.webp
new file mode 100644
index 00000000..afc797d5
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-480.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-800.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-800.webp
new file mode 100644
index 00000000..59cefa96
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-800.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-1400.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-1400.webp
new file mode 100644
index 00000000..10df9e17
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-1400.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-480.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-480.webp
new file mode 100644
index 00000000..389d8e1b
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-480.webp differ
diff --git a/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-800.webp b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-800.webp
new file mode 100644
index 00000000..10df9e17
Binary files /dev/null and b/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-1400.webp
new file mode 100644
index 00000000..fc76a109
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-480.webp
new file mode 100644
index 00000000..414327ba
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-800.webp
new file mode 100644
index 00000000..fc76a109
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-1400.webp
new file mode 100644
index 00000000..6b0f1ded
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-480.webp
new file mode 100644
index 00000000..5247858d
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-800.webp
new file mode 100644
index 00000000..6b0f1ded
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-1400.webp
new file mode 100644
index 00000000..5fe40fe0
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-480.webp
new file mode 100644
index 00000000..c2505a9e
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-800.webp
new file mode 100644
index 00000000..5fe40fe0
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-1400.webp
new file mode 100644
index 00000000..570e9b71
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-480.webp
new file mode 100644
index 00000000..7975bd6d
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-800.webp
new file mode 100644
index 00000000..570e9b71
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-1400.webp
new file mode 100644
index 00000000..c71a2bf5
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-480.webp
new file mode 100644
index 00000000..007a855a
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-800.webp
new file mode 100644
index 00000000..c71a2bf5
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-1400.webp
new file mode 100644
index 00000000..27ddc683
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-480.webp
new file mode 100644
index 00000000..125708fc
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-800.webp
new file mode 100644
index 00000000..27ddc683
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-1400.webp
new file mode 100644
index 00000000..e2a853ec
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-480.webp
new file mode 100644
index 00000000..5c7212d2
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-800.webp
new file mode 100644
index 00000000..e2a853ec
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-1400.webp
new file mode 100644
index 00000000..48d40690
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-480.webp
new file mode 100644
index 00000000..28c12196
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-800.webp
new file mode 100644
index 00000000..48d40690
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-1400.webp
new file mode 100644
index 00000000..174c4d70
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-480.webp
new file mode 100644
index 00000000..27829141
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-800.webp
new file mode 100644
index 00000000..174c4d70
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img1-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img1-1400.webp
new file mode 100644
index 00000000..fc76a109
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img1-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img1-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img1-480.webp
new file mode 100644
index 00000000..414327ba
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img1-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img1-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img1-800.webp
new file mode 100644
index 00000000..fc76a109
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img1-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img2-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img2-1400.webp
new file mode 100644
index 00000000..cafbb245
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img2-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img2-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img2-480.webp
new file mode 100644
index 00000000..438c4b3f
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img2-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img2-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img2-800.webp
new file mode 100644
index 00000000..cafbb245
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img2-800.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img3-1400.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img3-1400.webp
new file mode 100644
index 00000000..97793033
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img3-1400.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img3-480.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img3-480.webp
new file mode 100644
index 00000000..dc74167e
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img3-480.webp differ
diff --git a/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img3-800.webp b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img3-800.webp
new file mode 100644
index 00000000..97793033
Binary files /dev/null and b/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/img3-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-1400.webp
new file mode 100644
index 00000000..87a1e63f
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-480.webp
new file mode 100644
index 00000000..7147b464
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-800.webp
new file mode 100644
index 00000000..87a1e63f
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-1400.webp
new file mode 100644
index 00000000..0f6056d3
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-480.webp
new file mode 100644
index 00000000..caaa9dd3
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-800.webp
new file mode 100644
index 00000000..0f6056d3
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-1400.webp
new file mode 100644
index 00000000..3563b259
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-480.webp
new file mode 100644
index 00000000..2a6a0077
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-800.webp
new file mode 100644
index 00000000..3563b259
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-1400.webp
new file mode 100644
index 00000000..8a0a0bc6
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-480.webp
new file mode 100644
index 00000000..c4c1053d
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-800.webp
new file mode 100644
index 00000000..8a0a0bc6
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-1400.webp
new file mode 100644
index 00000000..ec74d8cb
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-480.webp
new file mode 100644
index 00000000..7545114a
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-800.webp
new file mode 100644
index 00000000..ec74d8cb
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-1400.webp
new file mode 100644
index 00000000..5cb8bdfd
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-480.webp
new file mode 100644
index 00000000..8d37db8a
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-800.webp
new file mode 100644
index 00000000..5cb8bdfd
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-1400.webp
new file mode 100644
index 00000000..6a78a2bc
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-480.webp
new file mode 100644
index 00000000..689e064c
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-800.webp
new file mode 100644
index 00000000..6a78a2bc
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-1400.webp
new file mode 100644
index 00000000..2c1c8829
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-480.webp
new file mode 100644
index 00000000..40bbd02b
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-800.webp
new file mode 100644
index 00000000..2c1c8829
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_5_acc-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_5_acc-1400.webp
new file mode 100644
index 00000000..30929186
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_5_acc-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_5_acc-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_5_acc-480.webp
new file mode 100644
index 00000000..e0a8f3ec
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_5_acc-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_5_acc-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_5_acc-800.webp
new file mode 100644
index 00000000..30929186
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_5_acc-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-1400.webp
new file mode 100644
index 00000000..4ed9b127
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-480.webp
new file mode 100644
index 00000000..7a30049e
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-800.webp
new file mode 100644
index 00000000..4ed9b127
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-1400.webp
new file mode 100644
index 00000000..05a7393f
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-480.webp
new file mode 100644
index 00000000..3a4cb878
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-800.webp
new file mode 100644
index 00000000..05a7393f
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-1400.webp
new file mode 100644
index 00000000..19f7c223
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-480.webp
new file mode 100644
index 00000000..90ff8fbe
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-800.webp
new file mode 100644
index 00000000..19f7c223
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-800.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-1400.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-1400.webp
new file mode 100644
index 00000000..163bd26f
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-1400.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-480.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-480.webp
new file mode 100644
index 00000000..0a50e358
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-480.webp differ
diff --git a/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-800.webp b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-800.webp
new file mode 100644
index 00000000..163bd26f
Binary files /dev/null and b/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-1400.webp
new file mode 100644
index 00000000..da359df6
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-480.webp
new file mode 100644
index 00000000..194e264f
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-800.webp
new file mode 100644
index 00000000..da359df6
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-1400.webp
new file mode 100644
index 00000000..2e3eb73b
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-480.webp
new file mode 100644
index 00000000..d0cd5e88
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-800.webp
new file mode 100644
index 00000000..2e3eb73b
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/ECG-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/ECG-1400.webp
new file mode 100644
index 00000000..432f89e6
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/ECG-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/ECG-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/ECG-480.webp
new file mode 100644
index 00000000..eddd521c
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/ECG-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/ECG-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/ECG-800.webp
new file mode 100644
index 00000000..432f89e6
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/ECG-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-1400.webp
new file mode 100644
index 00000000..33e8ba56
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-480.webp
new file mode 100644
index 00000000..3dd34433
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-800.webp
new file mode 100644
index 00000000..33e8ba56
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Loss-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/Loss-1400.webp
new file mode 100644
index 00000000..39ce8144
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Loss-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Loss-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/Loss-480.webp
new file mode 100644
index 00000000..1329297c
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Loss-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Loss-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/Loss-800.webp
new file mode 100644
index 00000000..39ce8144
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Loss-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-1400.webp
new file mode 100644
index 00000000..1b9bc9a9
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-480.webp
new file mode 100644
index 00000000..89936ade
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-800.webp
new file mode 100644
index 00000000..1b9bc9a9
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/ROC-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/ROC-1400.webp
new file mode 100644
index 00000000..2b194b80
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/ROC-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/ROC-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/ROC-480.webp
new file mode 100644
index 00000000..508fba18
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/ROC-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/ROC-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/ROC-800.webp
new file mode 100644
index 00000000..2b194b80
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/ROC-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-1400.webp
new file mode 100644
index 00000000..ca1b2795
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-480.webp
new file mode 100644
index 00000000..56ce9b3e
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-800.webp
new file mode 100644
index 00000000..ca1b2795
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-1400.webp
new file mode 100644
index 00000000..1333b306
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-480.webp
new file mode 100644
index 00000000..dd5c6d82
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-800.webp
new file mode 100644
index 00000000..1333b306
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-800.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-1400.webp b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-1400.webp
new file mode 100644
index 00000000..3e038fde
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-1400.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-480.webp b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-480.webp
new file mode 100644
index 00000000..f6b12543
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-480.webp differ
diff --git a/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-800.webp b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-800.webp
new file mode 100644
index 00000000..3e038fde
Binary files /dev/null and b/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/C-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/C-1400.webp
new file mode 100644
index 00000000..8b2fa215
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/C-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/C-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/C-480.webp
new file mode 100644
index 00000000..6bafa0e3
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/C-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/C-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/C-800.webp
new file mode 100644
index 00000000..8b2fa215
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/C-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-1400.webp
new file mode 100644
index 00000000..a94c0277
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-480.webp
new file mode 100644
index 00000000..8e354a43
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-800.webp
new file mode 100644
index 00000000..a94c0277
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/H-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/H-1400.webp
new file mode 100644
index 00000000..9fd4a9eb
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/H-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/H-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/H-480.webp
new file mode 100644
index 00000000..eb7a20ba
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/H-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/H-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/H-800.webp
new file mode 100644
index 00000000..9fd4a9eb
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/H-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-1400.webp
new file mode 100644
index 00000000..27c7016a
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-480.webp
new file mode 100644
index 00000000..624a5c91
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-800.webp
new file mode 100644
index 00000000..27c7016a
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-1400.webp
new file mode 100644
index 00000000..eb4aaf76
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-480.webp
new file mode 100644
index 00000000..bc98b591
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-800.webp
new file mode 100644
index 00000000..eb4aaf76
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-1400.webp
new file mode 100644
index 00000000..51acaea0
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-480.webp
new file mode 100644
index 00000000..a32f1ad5
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-800.webp
new file mode 100644
index 00000000..51acaea0
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-1400.webp
new file mode 100644
index 00000000..bdf13419
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-480.webp
new file mode 100644
index 00000000..f796cc50
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-800.webp
new file mode 100644
index 00000000..bdf13419
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/nodes-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/nodes-1400.webp
new file mode 100644
index 00000000..b90692bd
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/nodes-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/nodes-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/nodes-480.webp
new file mode 100644
index 00000000..dbbe2563
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/nodes-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/nodes-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/nodes-800.webp
new file mode 100644
index 00000000..b90692bd
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/nodes-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-1400.webp
new file mode 100644
index 00000000..aa59569e
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-480.webp
new file mode 100644
index 00000000..1867273e
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-800.webp
new file mode 100644
index 00000000..aa59569e
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-1400.webp
new file mode 100644
index 00000000..2001efe5
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-480.webp
new file mode 100644
index 00000000..e494503b
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-800.webp
new file mode 100644
index 00000000..2001efe5
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-1400.webp
new file mode 100644
index 00000000..c80754be
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-480.webp
new file mode 100644
index 00000000..6e07b927
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-800.webp
new file mode 100644
index 00000000..c80754be
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-800.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-1400.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-1400.webp
new file mode 100644
index 00000000..2e74b0e1
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-1400.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-480.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-480.webp
new file mode 100644
index 00000000..f82bb397
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-480.webp differ
diff --git a/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-800.webp b/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-800.webp
new file mode 100644
index 00000000..2e74b0e1
Binary files /dev/null and b/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/12x12mask_input-1400.webp b/assets/img/2023-11-09-autodecoders/12x12mask_input-1400.webp
new file mode 100644
index 00000000..b4463c19
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/12x12mask_input-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/12x12mask_input-480.webp b/assets/img/2023-11-09-autodecoders/12x12mask_input-480.webp
new file mode 100644
index 00000000..a2b36649
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/12x12mask_input-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/12x12mask_input-800.webp b/assets/img/2023-11-09-autodecoders/12x12mask_input-800.webp
new file mode 100644
index 00000000..b4463c19
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/12x12mask_input-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/12x12maskoutput-1400.webp b/assets/img/2023-11-09-autodecoders/12x12maskoutput-1400.webp
new file mode 100644
index 00000000..f581a0c5
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/12x12maskoutput-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/12x12maskoutput-480.webp b/assets/img/2023-11-09-autodecoders/12x12maskoutput-480.webp
new file mode 100644
index 00000000..120e77d1
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/12x12maskoutput-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/12x12maskoutput-800.webp b/assets/img/2023-11-09-autodecoders/12x12maskoutput-800.webp
new file mode 100644
index 00000000..f581a0c5
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/12x12maskoutput-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/16x16mask_input-1400.webp b/assets/img/2023-11-09-autodecoders/16x16mask_input-1400.webp
new file mode 100644
index 00000000..500b2d0e
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/16x16mask_input-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/16x16mask_input-480.webp b/assets/img/2023-11-09-autodecoders/16x16mask_input-480.webp
new file mode 100644
index 00000000..13689c32
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/16x16mask_input-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/16x16mask_input-800.webp b/assets/img/2023-11-09-autodecoders/16x16mask_input-800.webp
new file mode 100644
index 00000000..500b2d0e
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/16x16mask_input-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/16x16mask_output-1400.webp b/assets/img/2023-11-09-autodecoders/16x16mask_output-1400.webp
new file mode 100644
index 00000000..f1ba9c30
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/16x16mask_output-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/16x16mask_output-480.webp b/assets/img/2023-11-09-autodecoders/16x16mask_output-480.webp
new file mode 100644
index 00000000..41f45b34
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/16x16mask_output-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/16x16mask_output-800.webp b/assets/img/2023-11-09-autodecoders/16x16mask_output-800.webp
new file mode 100644
index 00000000..f1ba9c30
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/16x16mask_output-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/20x20mask_input-1400.webp b/assets/img/2023-11-09-autodecoders/20x20mask_input-1400.webp
new file mode 100644
index 00000000..3f762543
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/20x20mask_input-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/20x20mask_input-480.webp b/assets/img/2023-11-09-autodecoders/20x20mask_input-480.webp
new file mode 100644
index 00000000..50e9cc63
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/20x20mask_input-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/20x20mask_input-800.webp b/assets/img/2023-11-09-autodecoders/20x20mask_input-800.webp
new file mode 100644
index 00000000..3f762543
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/20x20mask_input-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/20x20mask_output-1400.webp b/assets/img/2023-11-09-autodecoders/20x20mask_output-1400.webp
new file mode 100644
index 00000000..6f8ae611
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/20x20mask_output-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/20x20mask_output-480.webp b/assets/img/2023-11-09-autodecoders/20x20mask_output-480.webp
new file mode 100644
index 00000000..ded73352
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/20x20mask_output-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/20x20mask_output-800.webp b/assets/img/2023-11-09-autodecoders/20x20mask_output-800.webp
new file mode 100644
index 00000000..6f8ae611
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/20x20mask_output-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/8x8mask_input-1400.webp b/assets/img/2023-11-09-autodecoders/8x8mask_input-1400.webp
new file mode 100644
index 00000000..bdffa6b3
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/8x8mask_input-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/8x8mask_input-480.webp b/assets/img/2023-11-09-autodecoders/8x8mask_input-480.webp
new file mode 100644
index 00000000..5baef82c
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/8x8mask_input-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/8x8mask_input-800.webp b/assets/img/2023-11-09-autodecoders/8x8mask_input-800.webp
new file mode 100644
index 00000000..bdffa6b3
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/8x8mask_input-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/8x8mask_output-1400.webp b/assets/img/2023-11-09-autodecoders/8x8mask_output-1400.webp
new file mode 100644
index 00000000..323f489a
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/8x8mask_output-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/8x8mask_output-480.webp b/assets/img/2023-11-09-autodecoders/8x8mask_output-480.webp
new file mode 100644
index 00000000..b0d1cb1c
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/8x8mask_output-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/8x8mask_output-800.webp b/assets/img/2023-11-09-autodecoders/8x8mask_output-800.webp
new file mode 100644
index 00000000..323f489a
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/8x8mask_output-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-1400.webp b/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-1400.webp
new file mode 100644
index 00000000..a1c8f2bc
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-480.webp b/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-480.webp
new file mode 100644
index 00000000..5d6103ee
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-800.webp b/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-800.webp
new file mode 100644
index 00000000..a1c8f2bc
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autodecoderloss-1400.webp b/assets/img/2023-11-09-autodecoders/autodecoderloss-1400.webp
new file mode 100644
index 00000000..7dad6a8a
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autodecoderloss-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autodecoderloss-480.webp b/assets/img/2023-11-09-autodecoders/autodecoderloss-480.webp
new file mode 100644
index 00000000..c6820c67
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autodecoderloss-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autodecoderloss-800.webp b/assets/img/2023-11-09-autodecoders/autodecoderloss-800.webp
new file mode 100644
index 00000000..7dad6a8a
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autodecoderloss-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-1400.webp b/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-1400.webp
new file mode 100644
index 00000000..768bc0fe
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-480.webp b/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-480.webp
new file mode 100644
index 00000000..fc8ce753
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-800.webp b/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-800.webp
new file mode 100644
index 00000000..768bc0fe
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_input_4-1400.webp b/assets/img/2023-11-09-autodecoders/autoencoder_input_4-1400.webp
new file mode 100644
index 00000000..cbefd78b
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_input_4-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_input_4-480.webp b/assets/img/2023-11-09-autodecoders/autoencoder_input_4-480.webp
new file mode 100644
index 00000000..b477a99d
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_input_4-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_input_4-800.webp b/assets/img/2023-11-09-autodecoders/autoencoder_input_4-800.webp
new file mode 100644
index 00000000..cbefd78b
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_input_4-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_input_7-1400.webp b/assets/img/2023-11-09-autodecoders/autoencoder_input_7-1400.webp
new file mode 100644
index 00000000..d24ad8ee
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_input_7-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_input_7-480.webp b/assets/img/2023-11-09-autodecoders/autoencoder_input_7-480.webp
new file mode 100644
index 00000000..262ba1ee
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_input_7-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_input_7-800.webp b/assets/img/2023-11-09-autodecoders/autoencoder_input_7-800.webp
new file mode 100644
index 00000000..d24ad8ee
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_input_7-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-1400.webp b/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-1400.webp
new file mode 100644
index 00000000..e9635f7f
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-480.webp b/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-480.webp
new file mode 100644
index 00000000..8911ab7c
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-800.webp b/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-800.webp
new file mode 100644
index 00000000..e9635f7f
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_output_4-1400.webp b/assets/img/2023-11-09-autodecoders/autoencoder_output_4-1400.webp
new file mode 100644
index 00000000..4c0edb24
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_output_4-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_output_4-480.webp b/assets/img/2023-11-09-autodecoders/autoencoder_output_4-480.webp
new file mode 100644
index 00000000..c814b82c
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_output_4-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_output_4-800.webp b/assets/img/2023-11-09-autodecoders/autoencoder_output_4-800.webp
new file mode 100644
index 00000000..4c0edb24
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_output_4-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_output_7-1400.webp b/assets/img/2023-11-09-autodecoders/autoencoder_output_7-1400.webp
new file mode 100644
index 00000000..ff3f2781
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_output_7-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_output_7-480.webp b/assets/img/2023-11-09-autodecoders/autoencoder_output_7-480.webp
new file mode 100644
index 00000000..15d1d55e
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_output_7-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_output_7-800.webp b/assets/img/2023-11-09-autodecoders/autoencoder_output_7-800.webp
new file mode 100644
index 00000000..ff3f2781
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_output_7-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_schematic-1400.webp b/assets/img/2023-11-09-autodecoders/autoencoder_schematic-1400.webp
new file mode 100644
index 00000000..2bb87592
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_schematic-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_schematic-480.webp b/assets/img/2023-11-09-autodecoders/autoencoder_schematic-480.webp
new file mode 100644
index 00000000..16cc9379
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_schematic-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoder_schematic-800.webp b/assets/img/2023-11-09-autodecoders/autoencoder_schematic-800.webp
new file mode 100644
index 00000000..2bb87592
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoder_schematic-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoderloss-1400.webp b/assets/img/2023-11-09-autodecoders/autoencoderloss-1400.webp
new file mode 100644
index 00000000..570bec0b
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoderloss-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoderloss-480.webp b/assets/img/2023-11-09-autodecoders/autoencoderloss-480.webp
new file mode 100644
index 00000000..2d96d1b8
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoderloss-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencoderloss-800.webp b/assets/img/2023-11-09-autodecoders/autoencoderloss-800.webp
new file mode 100644
index 00000000..570bec0b
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencoderloss-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-1400.webp b/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-1400.webp
new file mode 100644
index 00000000..ec929f6e
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-480.webp b/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-480.webp
new file mode 100644
index 00000000..47cc55f9
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-800.webp b/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-800.webp
new file mode 100644
index 00000000..ec929f6e
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/encoderdecoder-1400.webp b/assets/img/2023-11-09-autodecoders/encoderdecoder-1400.webp
new file mode 100644
index 00000000..7122717c
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/encoderdecoder-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/encoderdecoder-480.webp b/assets/img/2023-11-09-autodecoders/encoderdecoder-480.webp
new file mode 100644
index 00000000..a12464a5
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/encoderdecoder-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/encoderdecoder-800.webp b/assets/img/2023-11-09-autodecoders/encoderdecoder-800.webp
new file mode 100644
index 00000000..7122717c
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/encoderdecoder-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/lossfn-1400.webp b/assets/img/2023-11-09-autodecoders/lossfn-1400.webp
new file mode 100644
index 00000000..fb5299c9
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/lossfn-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/lossfn-480.webp b/assets/img/2023-11-09-autodecoders/lossfn-480.webp
new file mode 100644
index 00000000..3b087bd2
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/lossfn-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/lossfn-800.webp b/assets/img/2023-11-09-autodecoders/lossfn-800.webp
new file mode 100644
index 00000000..fb5299c9
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/lossfn-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress1-1400.webp b/assets/img/2023-11-09-autodecoders/progress1-1400.webp
new file mode 100644
index 00000000..1ac07adf
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress1-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress1-480.webp b/assets/img/2023-11-09-autodecoders/progress1-480.webp
new file mode 100644
index 00000000..e1da17c0
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress1-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress1-800.webp b/assets/img/2023-11-09-autodecoders/progress1-800.webp
new file mode 100644
index 00000000..1ac07adf
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress1-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress2-1400.webp b/assets/img/2023-11-09-autodecoders/progress2-1400.webp
new file mode 100644
index 00000000..87552ebe
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress2-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress2-480.webp b/assets/img/2023-11-09-autodecoders/progress2-480.webp
new file mode 100644
index 00000000..8e038725
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress2-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress2-800.webp b/assets/img/2023-11-09-autodecoders/progress2-800.webp
new file mode 100644
index 00000000..87552ebe
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress2-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress3-1400.webp b/assets/img/2023-11-09-autodecoders/progress3-1400.webp
new file mode 100644
index 00000000..abb2b18c
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress3-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress3-480.webp b/assets/img/2023-11-09-autodecoders/progress3-480.webp
new file mode 100644
index 00000000..7f601412
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress3-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress3-800.webp b/assets/img/2023-11-09-autodecoders/progress3-800.webp
new file mode 100644
index 00000000..abb2b18c
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress3-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress4-1400.webp b/assets/img/2023-11-09-autodecoders/progress4-1400.webp
new file mode 100644
index 00000000..91c5385e
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress4-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress4-480.webp b/assets/img/2023-11-09-autodecoders/progress4-480.webp
new file mode 100644
index 00000000..de2ea739
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress4-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/progress4-800.webp b/assets/img/2023-11-09-autodecoders/progress4-800.webp
new file mode 100644
index 00000000..91c5385e
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/progress4-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/tsne_autodecoder-1400.webp b/assets/img/2023-11-09-autodecoders/tsne_autodecoder-1400.webp
new file mode 100644
index 00000000..6598b86f
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/tsne_autodecoder-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/tsne_autodecoder-480.webp b/assets/img/2023-11-09-autodecoders/tsne_autodecoder-480.webp
new file mode 100644
index 00000000..377f4f62
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/tsne_autodecoder-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/tsne_autodecoder-800.webp b/assets/img/2023-11-09-autodecoders/tsne_autodecoder-800.webp
new file mode 100644
index 00000000..6598b86f
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/tsne_autodecoder-800.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/tsne_autoencoder-1400.webp b/assets/img/2023-11-09-autodecoders/tsne_autoencoder-1400.webp
new file mode 100644
index 00000000..908b098b
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/tsne_autoencoder-1400.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/tsne_autoencoder-480.webp b/assets/img/2023-11-09-autodecoders/tsne_autoencoder-480.webp
new file mode 100644
index 00000000..8db95abf
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/tsne_autoencoder-480.webp differ
diff --git a/assets/img/2023-11-09-autodecoders/tsne_autoencoder-800.webp b/assets/img/2023-11-09-autodecoders/tsne_autoencoder-800.webp
new file mode 100644
index 00000000..908b098b
Binary files /dev/null and b/assets/img/2023-11-09-autodecoders/tsne_autoencoder-800.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img1_map-1400.webp b/assets/img/2023-11-09-conflict-causality/img1_map-1400.webp
new file mode 100644
index 00000000..4c072ad9
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img1_map-1400.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img1_map-480.webp b/assets/img/2023-11-09-conflict-causality/img1_map-480.webp
new file mode 100644
index 00000000..bfd4ae67
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img1_map-480.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img1_map-800.webp b/assets/img/2023-11-09-conflict-causality/img1_map-800.webp
new file mode 100644
index 00000000..4c072ad9
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img1_map-800.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img2_parser-1400.webp b/assets/img/2023-11-09-conflict-causality/img2_parser-1400.webp
new file mode 100644
index 00000000..d6f0b82f
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img2_parser-1400.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img2_parser-480.webp b/assets/img/2023-11-09-conflict-causality/img2_parser-480.webp
new file mode 100644
index 00000000..cb9b4ced
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img2_parser-480.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img2_parser-800.webp b/assets/img/2023-11-09-conflict-causality/img2_parser-800.webp
new file mode 100644
index 00000000..d6f0b82f
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img2_parser-800.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img3_wordcloud-1400.webp b/assets/img/2023-11-09-conflict-causality/img3_wordcloud-1400.webp
new file mode 100644
index 00000000..5ebee939
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img3_wordcloud-1400.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img3_wordcloud-480.webp b/assets/img/2023-11-09-conflict-causality/img3_wordcloud-480.webp
new file mode 100644
index 00000000..99c7cbac
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img3_wordcloud-480.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img3_wordcloud-800.webp b/assets/img/2023-11-09-conflict-causality/img3_wordcloud-800.webp
new file mode 100644
index 00000000..5ebee939
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img3_wordcloud-800.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img4_articlecounts-1400.webp b/assets/img/2023-11-09-conflict-causality/img4_articlecounts-1400.webp
new file mode 100644
index 00000000..87eeb154
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img4_articlecounts-1400.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img4_articlecounts-480.webp b/assets/img/2023-11-09-conflict-causality/img4_articlecounts-480.webp
new file mode 100644
index 00000000..5f4ac9f8
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img4_articlecounts-480.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img4_articlecounts-800.webp b/assets/img/2023-11-09-conflict-causality/img4_articlecounts-800.webp
new file mode 100644
index 00000000..87eeb154
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img4_articlecounts-800.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img5_performance-1400.webp b/assets/img/2023-11-09-conflict-causality/img5_performance-1400.webp
new file mode 100644
index 00000000..1460c985
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img5_performance-1400.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img5_performance-480.webp b/assets/img/2023-11-09-conflict-causality/img5_performance-480.webp
new file mode 100644
index 00000000..a29ef8cc
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img5_performance-480.webp differ
diff --git a/assets/img/2023-11-09-conflict-causality/img5_performance-800.webp b/assets/img/2023-11-09-conflict-causality/img5_performance-800.webp
new file mode 100644
index 00000000..1460c985
Binary files /dev/null and b/assets/img/2023-11-09-conflict-causality/img5_performance-800.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/adsr-1400.webp b/assets/img/2023-11-09-ddsp-proposal/adsr-1400.webp
new file mode 100644
index 00000000..89a13127
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/adsr-1400.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/adsr-480.webp b/assets/img/2023-11-09-ddsp-proposal/adsr-480.webp
new file mode 100644
index 00000000..108f8626
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/adsr-480.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/adsr-800.webp b/assets/img/2023-11-09-ddsp-proposal/adsr-800.webp
new file mode 100644
index 00000000..89a13127
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/adsr-800.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/brass_freq-1400.webp b/assets/img/2023-11-09-ddsp-proposal/brass_freq-1400.webp
new file mode 100644
index 00000000..b9b68b9f
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/brass_freq-1400.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/brass_freq-480.webp b/assets/img/2023-11-09-ddsp-proposal/brass_freq-480.webp
new file mode 100644
index 00000000..f01ba221
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/brass_freq-480.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/brass_freq-800.webp b/assets/img/2023-11-09-ddsp-proposal/brass_freq-800.webp
new file mode 100644
index 00000000..b9b68b9f
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/brass_freq-800.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic-1400.webp b/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic-1400.webp
new file mode 100644
index 00000000..8eb11276
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic-1400.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic-480.webp b/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic-480.webp
new file mode 100644
index 00000000..2dbfbb7b
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic-480.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic-800.webp b/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic-800.webp
new file mode 100644
index 00000000..8eb11276
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic-800.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic-1400.webp b/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic-1400.webp
new file mode 100644
index 00000000..f05eb12e
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic-1400.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic-480.webp b/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic-480.webp
new file mode 100644
index 00000000..4b56f8a9
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic-480.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic-800.webp b/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic-800.webp
new file mode 100644
index 00000000..f05eb12e
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic-800.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/sine_freq-1400.webp b/assets/img/2023-11-09-ddsp-proposal/sine_freq-1400.webp
new file mode 100644
index 00000000..02d8d074
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/sine_freq-1400.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/sine_freq-480.webp b/assets/img/2023-11-09-ddsp-proposal/sine_freq-480.webp
new file mode 100644
index 00000000..0c0b8b04
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/sine_freq-480.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/sine_freq-800.webp b/assets/img/2023-11-09-ddsp-proposal/sine_freq-800.webp
new file mode 100644
index 00000000..02d8d074
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/sine_freq-800.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/spectrogram-1400.webp b/assets/img/2023-11-09-ddsp-proposal/spectrogram-1400.webp
new file mode 100644
index 00000000..f038ae22
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/spectrogram-1400.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/spectrogram-480.webp b/assets/img/2023-11-09-ddsp-proposal/spectrogram-480.webp
new file mode 100644
index 00000000..7a57f791
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/spectrogram-480.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/spectrogram-800.webp b/assets/img/2023-11-09-ddsp-proposal/spectrogram-800.webp
new file mode 100644
index 00000000..f038ae22
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/spectrogram-800.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/string_acoustic-1400.webp b/assets/img/2023-11-09-ddsp-proposal/string_acoustic-1400.webp
new file mode 100644
index 00000000..5ad5fbc5
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/string_acoustic-1400.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/string_acoustic-480.webp b/assets/img/2023-11-09-ddsp-proposal/string_acoustic-480.webp
new file mode 100644
index 00000000..18600802
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/string_acoustic-480.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/string_acoustic-800.webp b/assets/img/2023-11-09-ddsp-proposal/string_acoustic-800.webp
new file mode 100644
index 00000000..5ad5fbc5
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/string_acoustic-800.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic-1400.webp b/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic-1400.webp
new file mode 100644
index 00000000..56b5d6bc
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic-1400.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic-480.webp b/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic-480.webp
new file mode 100644
index 00000000..ba58b719
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic-480.webp differ
diff --git a/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic-800.webp b/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic-800.webp
new file mode 100644
index 00000000..56b5d6bc
Binary files /dev/null and b/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic-800.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/background_visual-1400.webp b/assets/img/2023-11-09-deep-connectome-clustering/background_visual-1400.webp
new file mode 100644
index 00000000..b68ff341
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/background_visual-1400.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/background_visual-480.webp b/assets/img/2023-11-09-deep-connectome-clustering/background_visual-480.webp
new file mode 100644
index 00000000..44538950
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/background_visual-480.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/background_visual-800.webp b/assets/img/2023-11-09-deep-connectome-clustering/background_visual-800.webp
new file mode 100644
index 00000000..b68ff341
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/background_visual-800.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-1400.webp b/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-1400.webp
new file mode 100644
index 00000000..92cc032b
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-1400.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-480.webp b/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-480.webp
new file mode 100644
index 00000000..7585be43
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-480.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-800.webp b/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-800.webp
new file mode 100644
index 00000000..92cc032b
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-800.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-1400.webp b/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-1400.webp
new file mode 100644
index 00000000..5e6d4143
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-1400.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-480.webp b/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-480.webp
new file mode 100644
index 00000000..81966dce
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-480.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-800.webp b/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-800.webp
new file mode 100644
index 00000000..5e6d4143
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-800.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-1400.webp b/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-1400.webp
new file mode 100644
index 00000000..cefc16c9
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-1400.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-480.webp b/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-480.webp
new file mode 100644
index 00000000..df17d69e
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-480.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-800.webp b/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-800.webp
new file mode 100644
index 00000000..cefc16c9
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-800.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-1400.webp b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-1400.webp
new file mode 100644
index 00000000..18e65d08
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-1400.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-480.webp b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-480.webp
new file mode 100644
index 00000000..5767953f
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-480.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-800.webp b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-800.webp
new file mode 100644
index 00000000..18e65d08
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-800.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-1400.webp b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-1400.webp
new file mode 100644
index 00000000..4ef98572
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-1400.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-480.webp b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-480.webp
new file mode 100644
index 00000000..29792437
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-480.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-800.webp b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-800.webp
new file mode 100644
index 00000000..4ef98572
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-800.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-1400.webp b/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-1400.webp
new file mode 100644
index 00000000..e4d7b19c
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-1400.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-480.webp b/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-480.webp
new file mode 100644
index 00000000..5cd3996e
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-480.webp differ
diff --git a/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-800.webp b/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-800.webp
new file mode 100644
index 00000000..e4d7b19c
Binary files /dev/null and b/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-800.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-1400.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-1400.webp
new file mode 100644
index 00000000..5b347607
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-1400.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-480.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-480.webp
new file mode 100644
index 00000000..9f702dcb
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-480.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-800.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-800.webp
new file mode 100644
index 00000000..5b347607
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-800.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-1400.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-1400.webp
new file mode 100644
index 00000000..64ff3b81
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-1400.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-480.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-480.webp
new file mode 100644
index 00000000..9cd16a76
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-480.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-800.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-800.webp
new file mode 100644
index 00000000..64ff3b81
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-800.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-1400.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-1400.webp
new file mode 100644
index 00000000..790ace99
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-1400.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-480.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-480.webp
new file mode 100644
index 00000000..be39efd0
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-480.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-800.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-800.webp
new file mode 100644
index 00000000..790ace99
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-800.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-1400.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-1400.webp
new file mode 100644
index 00000000..24153d74
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-1400.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-480.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-480.webp
new file mode 100644
index 00000000..ac011643
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-480.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-800.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-800.webp
new file mode 100644
index 00000000..24153d74
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-800.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-1400.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-1400.webp
new file mode 100644
index 00000000..4689a249
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-1400.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-480.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-480.webp
new file mode 100644
index 00000000..4a442d95
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-480.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-800.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-800.webp
new file mode 100644
index 00000000..4689a249
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-800.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-1400.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-1400.webp
new file mode 100644
index 00000000..3b689b21
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-1400.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-480.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-480.webp
new file mode 100644
index 00000000..7f50aaa7
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-480.webp differ
diff --git a/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-800.webp b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-800.webp
new file mode 100644
index 00000000..3b689b21
Binary files /dev/null and b/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-800.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-1400.webp b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-1400.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-480.webp b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-480.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-800.webp b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-800.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-dark-1400.webp b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-dark-1400.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-dark-1400.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-dark-480.webp b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-dark-480.webp
new file mode 100644
index 00000000..7f0830c1
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-dark-480.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-dark-800.webp b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-dark-800.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/ICLR-logo-dark-800.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-1400.webp b/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-1400.webp
new file mode 100644
index 00000000..885f1772
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-1400.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-480.webp b/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-480.webp
new file mode 100644
index 00000000..f2b6439c
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-480.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-800.webp b/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-800.webp
new file mode 100644
index 00000000..885f1772
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-800.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/architecture-1400.webp b/assets/img/2023-11-09-enforcing-uniformity/architecture-1400.webp
new file mode 100644
index 00000000..f0b51c91
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/architecture-1400.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/architecture-480.webp b/assets/img/2023-11-09-enforcing-uniformity/architecture-480.webp
new file mode 100644
index 00000000..8b5cf71b
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/architecture-480.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/architecture-800.webp b/assets/img/2023-11-09-enforcing-uniformity/architecture-800.webp
new file mode 100644
index 00000000..f0b51c91
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/architecture-800.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/flow-1400.webp b/assets/img/2023-11-09-enforcing-uniformity/flow-1400.webp
new file mode 100644
index 00000000..a107b8a2
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/flow-1400.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/flow-480.webp b/assets/img/2023-11-09-enforcing-uniformity/flow-480.webp
new file mode 100644
index 00000000..06235780
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/flow-480.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/flow-800.webp b/assets/img/2023-11-09-enforcing-uniformity/flow-800.webp
new file mode 100644
index 00000000..a107b8a2
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/flow-800.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/poisson-1400.webp b/assets/img/2023-11-09-enforcing-uniformity/poisson-1400.webp
new file mode 100644
index 00000000..1655fc44
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/poisson-1400.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/poisson-480.webp b/assets/img/2023-11-09-enforcing-uniformity/poisson-480.webp
new file mode 100644
index 00000000..d06badb7
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/poisson-480.webp differ
diff --git a/assets/img/2023-11-09-enforcing-uniformity/poisson-800.webp b/assets/img/2023-11-09-enforcing-uniformity/poisson-800.webp
new file mode 100644
index 00000000..1655fc44
Binary files /dev/null and b/assets/img/2023-11-09-enforcing-uniformity/poisson-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/AAA_on_off-1400.webp b/assets/img/2023-11-09-eunhae-project/AAA_on_off-1400.webp
new file mode 100644
index 00000000..a06c5101
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/AAA_on_off-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/AAA_on_off-480.webp b/assets/img/2023-11-09-eunhae-project/AAA_on_off-480.webp
new file mode 100644
index 00000000..be78501b
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/AAA_on_off-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/AAA_on_off-800.webp b/assets/img/2023-11-09-eunhae-project/AAA_on_off-800.webp
new file mode 100644
index 00000000..a06c5101
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/AAA_on_off-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/acc_comparison-1400.webp b/assets/img/2023-11-09-eunhae-project/acc_comparison-1400.webp
new file mode 100644
index 00000000..d7db6ecc
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/acc_comparison-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/acc_comparison-480.webp b/assets/img/2023-11-09-eunhae-project/acc_comparison-480.webp
new file mode 100644
index 00000000..30f4eadf
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/acc_comparison-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/acc_comparison-800.webp b/assets/img/2023-11-09-eunhae-project/acc_comparison-800.webp
new file mode 100644
index 00000000..d7db6ecc
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/acc_comparison-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/forgetting_curves-1400.webp b/assets/img/2023-11-09-eunhae-project/forgetting_curves-1400.webp
new file mode 100644
index 00000000..c0d66c2a
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/forgetting_curves-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/forgetting_curves-480.webp b/assets/img/2023-11-09-eunhae-project/forgetting_curves-480.webp
new file mode 100644
index 00000000..254397d0
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/forgetting_curves-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/forgetting_curves-800.webp b/assets/img/2023-11-09-eunhae-project/forgetting_curves-800.webp
new file mode 100644
index 00000000..c0d66c2a
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/forgetting_curves-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/forgetting_offline-1400.webp b/assets/img/2023-11-09-eunhae-project/forgetting_offline-1400.webp
new file mode 100644
index 00000000..23933208
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/forgetting_offline-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/forgetting_offline-480.webp b/assets/img/2023-11-09-eunhae-project/forgetting_offline-480.webp
new file mode 100644
index 00000000..2f0ab2de
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/forgetting_offline-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/forgetting_offline-800.webp b/assets/img/2023-11-09-eunhae-project/forgetting_offline-800.webp
new file mode 100644
index 00000000..23933208
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/forgetting_offline-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/forgetting_online-1400.webp b/assets/img/2023-11-09-eunhae-project/forgetting_online-1400.webp
new file mode 100644
index 00000000..93f29b2e
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/forgetting_online-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/forgetting_online-480.webp b/assets/img/2023-11-09-eunhae-project/forgetting_online-480.webp
new file mode 100644
index 00000000..27a4889b
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/forgetting_online-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/forgetting_online-800.webp b/assets/img/2023-11-09-eunhae-project/forgetting_online-800.webp
new file mode 100644
index 00000000..93f29b2e
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/forgetting_online-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/resnets_comparison-1400.webp b/assets/img/2023-11-09-eunhae-project/resnets_comparison-1400.webp
new file mode 100644
index 00000000..f5ebfb96
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/resnets_comparison-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/resnets_comparison-480.webp b/assets/img/2023-11-09-eunhae-project/resnets_comparison-480.webp
new file mode 100644
index 00000000..ddc8bf8f
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/resnets_comparison-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/resnets_comparison-800.webp b/assets/img/2023-11-09-eunhae-project/resnets_comparison-800.webp
new file mode 100644
index 00000000..f5ebfb96
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/resnets_comparison-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/saliency_offline-1400.webp b/assets/img/2023-11-09-eunhae-project/saliency_offline-1400.webp
new file mode 100644
index 00000000..27147851
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/saliency_offline-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/saliency_offline-480.webp b/assets/img/2023-11-09-eunhae-project/saliency_offline-480.webp
new file mode 100644
index 00000000..cee72245
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/saliency_offline-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/saliency_offline-800.webp b/assets/img/2023-11-09-eunhae-project/saliency_offline-800.webp
new file mode 100644
index 00000000..27147851
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/saliency_offline-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/saliency_online-1400.webp b/assets/img/2023-11-09-eunhae-project/saliency_online-1400.webp
new file mode 100644
index 00000000..e2645857
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/saliency_online-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/saliency_online-480.webp b/assets/img/2023-11-09-eunhae-project/saliency_online-480.webp
new file mode 100644
index 00000000..701eaaee
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/saliency_online-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/saliency_online-800.webp b/assets/img/2023-11-09-eunhae-project/saliency_online-800.webp
new file mode 100644
index 00000000..e2645857
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/saliency_online-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-1400.webp b/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-1400.webp
new file mode 100644
index 00000000..022b7d8b
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-480.webp b/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-480.webp
new file mode 100644
index 00000000..b5d24404
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-800.webp b/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-800.webp
new file mode 100644
index 00000000..022b7d8b
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-800.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/stream_acc1-1400.webp b/assets/img/2023-11-09-eunhae-project/stream_acc1-1400.webp
new file mode 100644
index 00000000..d4bad627
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/stream_acc1-1400.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/stream_acc1-480.webp b/assets/img/2023-11-09-eunhae-project/stream_acc1-480.webp
new file mode 100644
index 00000000..c7a5230c
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/stream_acc1-480.webp differ
diff --git a/assets/img/2023-11-09-eunhae-project/stream_acc1-800.webp b/assets/img/2023-11-09-eunhae-project/stream_acc1-800.webp
new file mode 100644
index 00000000..d4bad627
Binary files /dev/null and b/assets/img/2023-11-09-eunhae-project/stream_acc1-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/baseline1-1400.webp b/assets/img/2023-11-09-generating-robust-networks/baseline1-1400.webp
new file mode 100644
index 00000000..41110dca
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/baseline1-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/baseline1-480.webp b/assets/img/2023-11-09-generating-robust-networks/baseline1-480.webp
new file mode 100644
index 00000000..920eee95
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/baseline1-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/baseline1-800.webp b/assets/img/2023-11-09-generating-robust-networks/baseline1-800.webp
new file mode 100644
index 00000000..41110dca
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/baseline1-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/baseline2-1400.webp b/assets/img/2023-11-09-generating-robust-networks/baseline2-1400.webp
new file mode 100644
index 00000000..cdaceb8f
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/baseline2-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/baseline2-480.webp b/assets/img/2023-11-09-generating-robust-networks/baseline2-480.webp
new file mode 100644
index 00000000..102e3924
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/baseline2-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/baseline2-800.webp b/assets/img/2023-11-09-generating-robust-networks/baseline2-800.webp
new file mode 100644
index 00000000..cdaceb8f
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/baseline2-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/comparison-1400.webp b/assets/img/2023-11-09-generating-robust-networks/comparison-1400.webp
new file mode 100644
index 00000000..da1c1466
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/comparison-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/comparison-480.webp b/assets/img/2023-11-09-generating-robust-networks/comparison-480.webp
new file mode 100644
index 00000000..46df6199
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/comparison-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/comparison-800.webp b/assets/img/2023-11-09-generating-robust-networks/comparison-800.webp
new file mode 100644
index 00000000..da1c1466
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/comparison-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/comparison_original-1400.webp b/assets/img/2023-11-09-generating-robust-networks/comparison_original-1400.webp
new file mode 100644
index 00000000..6cc13353
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/comparison_original-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/comparison_original-480.webp b/assets/img/2023-11-09-generating-robust-networks/comparison_original-480.webp
new file mode 100644
index 00000000..d11f65cd
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/comparison_original-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/comparison_original-800.webp b/assets/img/2023-11-09-generating-robust-networks/comparison_original-800.webp
new file mode 100644
index 00000000..6cc13353
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/comparison_original-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-1400.webp b/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-1400.webp
new file mode 100644
index 00000000..4ad72414
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-480.webp b/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-480.webp
new file mode 100644
index 00000000..f8422902
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-800.webp b/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-800.webp
new file mode 100644
index 00000000..4ad72414
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_bar1-1400.webp b/assets/img/2023-11-09-generating-robust-networks/final_bar1-1400.webp
new file mode 100644
index 00000000..6de1bdf9
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_bar1-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_bar1-480.webp b/assets/img/2023-11-09-generating-robust-networks/final_bar1-480.webp
new file mode 100644
index 00000000..86a16c2e
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_bar1-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_bar1-800.webp b/assets/img/2023-11-09-generating-robust-networks/final_bar1-800.webp
new file mode 100644
index 00000000..6de1bdf9
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_bar1-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_bar2-1400.webp b/assets/img/2023-11-09-generating-robust-networks/final_bar2-1400.webp
new file mode 100644
index 00000000..c47cc738
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_bar2-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_bar2-480.webp b/assets/img/2023-11-09-generating-robust-networks/final_bar2-480.webp
new file mode 100644
index 00000000..f4c1f4ac
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_bar2-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_bar2-800.webp b/assets/img/2023-11-09-generating-robust-networks/final_bar2-800.webp
new file mode 100644
index 00000000..c47cc738
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_bar2-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_bar3-1400.webp b/assets/img/2023-11-09-generating-robust-networks/final_bar3-1400.webp
new file mode 100644
index 00000000..c124aff2
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_bar3-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_bar3-480.webp b/assets/img/2023-11-09-generating-robust-networks/final_bar3-480.webp
new file mode 100644
index 00000000..85bc8c40
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_bar3-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_bar3-800.webp b/assets/img/2023-11-09-generating-robust-networks/final_bar3-800.webp
new file mode 100644
index 00000000..c124aff2
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_bar3-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_line1-1400.webp b/assets/img/2023-11-09-generating-robust-networks/final_line1-1400.webp
new file mode 100644
index 00000000..177c7e89
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_line1-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_line1-480.webp b/assets/img/2023-11-09-generating-robust-networks/final_line1-480.webp
new file mode 100644
index 00000000..d5ea3d43
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_line1-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_line1-800.webp b/assets/img/2023-11-09-generating-robust-networks/final_line1-800.webp
new file mode 100644
index 00000000..177c7e89
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_line1-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_line2-1400.webp b/assets/img/2023-11-09-generating-robust-networks/final_line2-1400.webp
new file mode 100644
index 00000000..22a5a2ee
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_line2-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_line2-480.webp b/assets/img/2023-11-09-generating-robust-networks/final_line2-480.webp
new file mode 100644
index 00000000..3a1fc4ce
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_line2-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_line2-800.webp b/assets/img/2023-11-09-generating-robust-networks/final_line2-800.webp
new file mode 100644
index 00000000..22a5a2ee
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_line2-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_line3-1400.webp b/assets/img/2023-11-09-generating-robust-networks/final_line3-1400.webp
new file mode 100644
index 00000000..02683ad4
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_line3-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_line3-480.webp b/assets/img/2023-11-09-generating-robust-networks/final_line3-480.webp
new file mode 100644
index 00000000..db9cedf9
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_line3-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/final_line3-800.webp b/assets/img/2023-11-09-generating-robust-networks/final_line3-800.webp
new file mode 100644
index 00000000..02683ad4
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/final_line3-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-1400.webp b/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-1400.webp
new file mode 100644
index 00000000..14911b63
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-480.webp b/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-480.webp
new file mode 100644
index 00000000..13316313
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-800.webp b/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-800.webp
new file mode 100644
index 00000000..14911b63
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/noise_examples-1400.webp b/assets/img/2023-11-09-generating-robust-networks/noise_examples-1400.webp
new file mode 100644
index 00000000..3f3f7e6f
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/noise_examples-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/noise_examples-480.webp b/assets/img/2023-11-09-generating-robust-networks/noise_examples-480.webp
new file mode 100644
index 00000000..dcec03f7
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/noise_examples-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/noise_examples-800.webp b/assets/img/2023-11-09-generating-robust-networks/noise_examples-800.webp
new file mode 100644
index 00000000..3f3f7e6f
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/noise_examples-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/noise_steps-1400.webp b/assets/img/2023-11-09-generating-robust-networks/noise_steps-1400.webp
new file mode 100644
index 00000000..e640f9aa
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/noise_steps-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/noise_steps-480.webp b/assets/img/2023-11-09-generating-robust-networks/noise_steps-480.webp
new file mode 100644
index 00000000..e43a6496
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/noise_steps-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/noise_steps-800.webp b/assets/img/2023-11-09-generating-robust-networks/noise_steps-800.webp
new file mode 100644
index 00000000..e640f9aa
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/noise_steps-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/pixel_examples-1400.webp b/assets/img/2023-11-09-generating-robust-networks/pixel_examples-1400.webp
new file mode 100644
index 00000000..531bd3ff
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/pixel_examples-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/pixel_examples-480.webp b/assets/img/2023-11-09-generating-robust-networks/pixel_examples-480.webp
new file mode 100644
index 00000000..f34bca81
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/pixel_examples-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/pixel_examples-800.webp b/assets/img/2023-11-09-generating-robust-networks/pixel_examples-800.webp
new file mode 100644
index 00000000..531bd3ff
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/pixel_examples-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/pixel_steps-1400.webp b/assets/img/2023-11-09-generating-robust-networks/pixel_steps-1400.webp
new file mode 100644
index 00000000..34550606
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/pixel_steps-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/pixel_steps-480.webp b/assets/img/2023-11-09-generating-robust-networks/pixel_steps-480.webp
new file mode 100644
index 00000000..28c7d06c
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/pixel_steps-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/pixel_steps-800.webp b/assets/img/2023-11-09-generating-robust-networks/pixel_steps-800.webp
new file mode 100644
index 00000000..34550606
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/pixel_steps-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/resnet-1400.webp b/assets/img/2023-11-09-generating-robust-networks/resnet-1400.webp
new file mode 100644
index 00000000..d5e93b81
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/resnet-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/resnet-480.webp b/assets/img/2023-11-09-generating-robust-networks/resnet-480.webp
new file mode 100644
index 00000000..94f075e6
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/resnet-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/resnet-800.webp b/assets/img/2023-11-09-generating-robust-networks/resnet-800.webp
new file mode 100644
index 00000000..d5e93b81
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/resnet-800.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-1400.webp b/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-1400.webp
new file mode 100644
index 00000000..b8dedc10
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-1400.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-480.webp b/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-480.webp
new file mode 100644
index 00000000..6637203c
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-480.webp differ
diff --git a/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-800.webp b/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-800.webp
new file mode 100644
index 00000000..b8dedc10
Binary files /dev/null and b/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-1400.webp
new file mode 100644
index 00000000..d843a0f1
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-480.webp
new file mode 100644
index 00000000..91a4c5ff
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-800.webp
new file mode 100644
index 00000000..d843a0f1
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-1400.webp
new file mode 100644
index 00000000..dc8ec28b
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-480.webp
new file mode 100644
index 00000000..97c171c2
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-800.webp
new file mode 100644
index 00000000..dc8ec28b
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-1400.webp
new file mode 100644
index 00000000..4c5d0f9b
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-480.webp
new file mode 100644
index 00000000..2772de87
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-800.webp
new file mode 100644
index 00000000..4c5d0f9b
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-1400.webp
new file mode 100644
index 00000000..c7384f60
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-480.webp
new file mode 100644
index 00000000..e8c7966b
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-800.webp
new file mode 100644
index 00000000..c7384f60
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-1400.webp
new file mode 100644
index 00000000..e28cd6c1
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-480.webp
new file mode 100644
index 00000000..4c62ffd1
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-800.webp
new file mode 100644
index 00000000..e28cd6c1
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/cnnfeaturevisualization-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/cnnfeaturevisualization-1400.webp
new file mode 100644
index 00000000..ed109db5
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/cnnfeaturevisualization-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/cnnfeaturevisualization-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/cnnfeaturevisualization-480.webp
new file mode 100644
index 00000000..aaa55892
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/cnnfeaturevisualization-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/cnnfeaturevisualization-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/cnnfeaturevisualization-800.webp
new file mode 100644
index 00000000..ed109db5
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/cnnfeaturevisualization-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-1400.webp
new file mode 100644
index 00000000..9ece79ce
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-480.webp
new file mode 100644
index 00000000..d29b2a11
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-800.webp
new file mode 100644
index 00000000..9ece79ce
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-1400.webp
new file mode 100644
index 00000000..074b92a8
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-480.webp
new file mode 100644
index 00000000..63c4de63
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-800.webp
new file mode 100644
index 00000000..074b92a8
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-1400.webp
new file mode 100644
index 00000000..6497c0d1
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-480.webp
new file mode 100644
index 00000000..caaf6be6
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-800.webp
new file mode 100644
index 00000000..6497c0d1
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-800.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-1400.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-1400.webp
new file mode 100644
index 00000000..755f679c
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-1400.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-480.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-480.webp
new file mode 100644
index 00000000..c2675694
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-480.webp differ
diff --git a/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-800.webp b/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-800.webp
new file mode 100644
index 00000000..755f679c
Binary files /dev/null and b/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-1400.webp
new file mode 100644
index 00000000..8e662ecb
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-480.webp
new file mode 100644
index 00000000..48ccf35c
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-800.webp
new file mode 100644
index 00000000..8e662ecb
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-1400.webp
new file mode 100644
index 00000000..4cbdfdfc
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-480.webp
new file mode 100644
index 00000000..5278afef
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-800.webp
new file mode 100644
index 00000000..4cbdfdfc
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-1400.webp
new file mode 100644
index 00000000..0747a89e
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-480.webp
new file mode 100644
index 00000000..e253f013
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-800.webp
new file mode 100644
index 00000000..0747a89e
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-1400.webp
new file mode 100644
index 00000000..b838f04a
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-480.webp
new file mode 100644
index 00000000..1f576ebe
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-800.webp
new file mode 100644
index 00000000..b838f04a
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-1400.webp
new file mode 100644
index 00000000..edabe807
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-480.webp
new file mode 100644
index 00000000..51b0bdcf
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-800.webp
new file mode 100644
index 00000000..edabe807
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-1400.webp
new file mode 100644
index 00000000..b35d067b
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-480.webp
new file mode 100644
index 00000000..ab36f056
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-800.webp
new file mode 100644
index 00000000..b35d067b
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-1400.webp
new file mode 100644
index 00000000..808c5a62
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-480.webp
new file mode 100644
index 00000000..d71ffc80
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-800.webp
new file mode 100644
index 00000000..808c5a62
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-1400.webp
new file mode 100644
index 00000000..bac8e826
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-480.webp
new file mode 100644
index 00000000..2dee938a
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-800.webp
new file mode 100644
index 00000000..bac8e826
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/features_hidden-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/features_hidden-1400.webp
new file mode 100644
index 00000000..ee5e9b18
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/features_hidden-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/features_hidden-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/features_hidden-480.webp
new file mode 100644
index 00000000..56933ddd
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/features_hidden-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/features_hidden-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/features_hidden-800.webp
new file mode 100644
index 00000000..ee5e9b18
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/features_hidden-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/grid_plot-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/grid_plot-1400.webp
new file mode 100644
index 00000000..d86e8005
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/grid_plot-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/grid_plot-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/grid_plot-480.webp
new file mode 100644
index 00000000..93b95959
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/grid_plot-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/grid_plot-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/grid_plot-800.webp
new file mode 100644
index 00000000..d86e8005
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/grid_plot-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_50-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_50-1400.webp
new file mode 100644
index 00000000..756b30ba
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_50-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_50-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_50-480.webp
new file mode 100644
index 00000000..342a928e
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_50-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_50-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_50-800.webp
new file mode 100644
index 00000000..756b30ba
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_50-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_99-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_99-1400.webp
new file mode 100644
index 00000000..3789b598
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_99-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_99-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_99-480.webp
new file mode 100644
index 00000000..c199310f
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_99-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_99-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_99-800.webp
new file mode 100644
index 00000000..3789b598
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batch_99-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-1400.webp
new file mode 100644
index 00000000..f58efbdd
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-480.webp
new file mode 100644
index 00000000..2cc418a2
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-800.webp
new file mode 100644
index 00000000..f58efbdd
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp
new file mode 100644
index 00000000..16501667
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp
new file mode 100644
index 00000000..04b9daf6
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp
new file mode 100644
index 00000000..16501667
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-1400.webp
new file mode 100644
index 00000000..f7771af2
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-480.webp
new file mode 100644
index 00000000..07ba7420
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-800.webp
new file mode 100644
index 00000000..f7771af2
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-1400.webp
new file mode 100644
index 00000000..3a531191
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-480.webp
new file mode 100644
index 00000000..3de0c327
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-800.webp
new file mode 100644
index 00000000..3a531191
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-1400.webp
new file mode 100644
index 00000000..854b77c2
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-480.webp
new file mode 100644
index 00000000..15efe977
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-800.webp
new file mode 100644
index 00000000..854b77c2
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-1400.webp
new file mode 100644
index 00000000..6e51a34a
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-480.webp
new file mode 100644
index 00000000..15ce5dfc
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-800.webp
new file mode 100644
index 00000000..6e51a34a
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-1400.webp
new file mode 100644
index 00000000..af423b58
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-480.webp
new file mode 100644
index 00000000..3fad700a
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-800.webp
new file mode 100644
index 00000000..af423b58
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-1400.webp
new file mode 100644
index 00000000..5702681a
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-480.webp
new file mode 100644
index 00000000..2e6afd7e
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-800.webp
new file mode 100644
index 00000000..5702681a
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phaseplots-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phaseplots-1400.webp
new file mode 100644
index 00000000..3945d389
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phaseplots-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phaseplots-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phaseplots-480.webp
new file mode 100644
index 00000000..9cd106ef
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phaseplots-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/phaseplots-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/phaseplots-800.webp
new file mode 100644
index 00000000..3945d389
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/phaseplots-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix-1400.webp
new file mode 100644
index 00000000..87ccd75e
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix-480.webp
new file mode 100644
index 00000000..344b434e
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix-800.webp
new file mode 100644
index 00000000..87ccd75e
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-1400.webp
new file mode 100644
index 00000000..365ec451
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-480.webp
new file mode 100644
index 00000000..0dd7b62d
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-800.webp
new file mode 100644
index 00000000..365ec451
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-800.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_latex-1400.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_latex-1400.webp
new file mode 100644
index 00000000..5f54f70c
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_latex-1400.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_latex-480.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_latex-480.webp
new file mode 100644
index 00000000..dbd83156
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_latex-480.webp differ
diff --git a/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_latex-800.webp b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_latex-800.webp
new file mode 100644
index 00000000..5f54f70c
Binary files /dev/null and b/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_latex-800.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-1400.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-1400.webp
new file mode 100644
index 00000000..16a2af78
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-1400.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-480.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-480.webp
new file mode 100644
index 00000000..edbd69a9
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-480.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-800.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-800.webp
new file mode 100644
index 00000000..16a2af78
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-800.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-1400.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-1400.webp
new file mode 100644
index 00000000..22b3b794
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-1400.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-480.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-480.webp
new file mode 100644
index 00000000..4d8372d3
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-480.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-800.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-800.webp
new file mode 100644
index 00000000..22b3b794
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-800.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-1400.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-1400.webp
new file mode 100644
index 00000000..1c0c57fe
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-1400.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-480.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-480.webp
new file mode 100644
index 00000000..8011e6a6
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-480.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-800.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-800.webp
new file mode 100644
index 00000000..1c0c57fe
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-800.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-1400.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-1400.webp
new file mode 100644
index 00000000..76daddc6
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-1400.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-480.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-480.webp
new file mode 100644
index 00000000..d649497e
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-480.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-800.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-800.webp
new file mode 100644
index 00000000..76daddc6
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-800.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-1400.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-1400.webp
new file mode 100644
index 00000000..991761a1
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-1400.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-480.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-480.webp
new file mode 100644
index 00000000..44f2cc1e
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-480.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-800.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-800.webp
new file mode 100644
index 00000000..991761a1
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-800.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-1400.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-1400.webp
new file mode 100644
index 00000000..15c67c39
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-1400.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-480.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-480.webp
new file mode 100644
index 00000000..a7d5569f
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-480.webp differ
diff --git a/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-800.webp b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-800.webp
new file mode 100644
index 00000000..15c67c39
Binary files /dev/null and b/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM-1400.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM-1400.webp
new file mode 100644
index 00000000..5aa802bd
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM-480.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM-480.webp
new file mode 100644
index 00000000..91762046
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM-800.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM-800.webp
new file mode 100644
index 00000000..5aa802bd
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.05 PM-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.18 PM-1400.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.18 PM-1400.webp
new file mode 100644
index 00000000..cefa1ac4
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.18 PM-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.18 PM-480.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.18 PM-480.webp
new file mode 100644
index 00000000..328731f1
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.18 PM-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.18 PM-800.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.18 PM-800.webp
new file mode 100644
index 00000000..cefa1ac4
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.18 PM-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.38 PM-1400.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.38 PM-1400.webp
new file mode 100644
index 00000000..b3f8e2d2
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.38 PM-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.38 PM-480.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.38 PM-480.webp
new file mode 100644
index 00000000..8f87c010
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.38 PM-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.38 PM-800.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.38 PM-800.webp
new file mode 100644
index 00000000..b3f8e2d2
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.38 PM-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM-1400.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM-1400.webp
new file mode 100644
index 00000000..9b4c38f1
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM-480.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM-480.webp
new file mode 100644
index 00000000..4a8983dd
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM-800.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM-800.webp
new file mode 100644
index 00000000..9b4c38f1
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.45 PM-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM-1400.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM-1400.webp
new file mode 100644
index 00000000..ef44a669
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM-480.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM-480.webp
new file mode 100644
index 00000000..d0b9cac9
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM-800.webp b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM-800.webp
new file mode 100644
index 00000000..ef44a669
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/Screenshot 2023-12-12 at 7.33.59 PM-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-1400.webp b/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-1400.webp
new file mode 100644
index 00000000..789802b6
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-480.webp b/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-480.webp
new file mode 100644
index 00000000..110e5aab
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-800.webp b/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-800.webp
new file mode 100644
index 00000000..789802b6
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/autoencoder_adj_mat_pics-1400.webp b/assets/img/2023-11-09-molecule_generation/autoencoder_adj_mat_pics-1400.webp
new file mode 100644
index 00000000..e1737736
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/autoencoder_adj_mat_pics-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/autoencoder_adj_mat_pics-480.webp b/assets/img/2023-11-09-molecule_generation/autoencoder_adj_mat_pics-480.webp
new file mode 100644
index 00000000..2cf248f4
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/autoencoder_adj_mat_pics-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/autoencoder_adj_mat_pics-800.webp b/assets/img/2023-11-09-molecule_generation/autoencoder_adj_mat_pics-800.webp
new file mode 100644
index 00000000..e1737736
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/autoencoder_adj_mat_pics-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/heatmap-1400.webp b/assets/img/2023-11-09-molecule_generation/heatmap-1400.webp
new file mode 100644
index 00000000..6f94e836
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/heatmap-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/heatmap-480.webp b/assets/img/2023-11-09-molecule_generation/heatmap-480.webp
new file mode 100644
index 00000000..09d7c9a0
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/heatmap-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/heatmap-800.webp b/assets/img/2023-11-09-molecule_generation/heatmap-800.webp
new file mode 100644
index 00000000..6f94e836
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/heatmap-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-1400.webp b/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-1400.webp
new file mode 100644
index 00000000..c84741ea
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-480.webp b/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-480.webp
new file mode 100644
index 00000000..f49cff62
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-800.webp b/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-800.webp
new file mode 100644
index 00000000..c84741ea
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_1-1400.webp b/assets/img/2023-11-09-molecule_generation/shorna_1-1400.webp
new file mode 100644
index 00000000..ad05737a
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_1-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_1-480.webp b/assets/img/2023-11-09-molecule_generation/shorna_1-480.webp
new file mode 100644
index 00000000..ad4c983a
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_1-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_1-800.webp b/assets/img/2023-11-09-molecule_generation/shorna_1-800.webp
new file mode 100644
index 00000000..ad05737a
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_1-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_2-1400.webp b/assets/img/2023-11-09-molecule_generation/shorna_2-1400.webp
new file mode 100644
index 00000000..afa67a09
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_2-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_2-480.webp b/assets/img/2023-11-09-molecule_generation/shorna_2-480.webp
new file mode 100644
index 00000000..ec1259d1
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_2-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_2-800.webp b/assets/img/2023-11-09-molecule_generation/shorna_2-800.webp
new file mode 100644
index 00000000..afa67a09
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_2-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_3-1400.webp b/assets/img/2023-11-09-molecule_generation/shorna_3-1400.webp
new file mode 100644
index 00000000..614a3900
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_3-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_3-480.webp b/assets/img/2023-11-09-molecule_generation/shorna_3-480.webp
new file mode 100644
index 00000000..4292cd9d
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_3-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_3-800.webp b/assets/img/2023-11-09-molecule_generation/shorna_3-800.webp
new file mode 100644
index 00000000..614a3900
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_3-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_4-1400.webp b/assets/img/2023-11-09-molecule_generation/shorna_4-1400.webp
new file mode 100644
index 00000000..420f110b
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_4-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_4-480.webp b/assets/img/2023-11-09-molecule_generation/shorna_4-480.webp
new file mode 100644
index 00000000..8c572191
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_4-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_4-800.webp b/assets/img/2023-11-09-molecule_generation/shorna_4-800.webp
new file mode 100644
index 00000000..420f110b
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_4-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_5-1400.webp b/assets/img/2023-11-09-molecule_generation/shorna_5-1400.webp
new file mode 100644
index 00000000..949e4595
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_5-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_5-480.webp b/assets/img/2023-11-09-molecule_generation/shorna_5-480.webp
new file mode 100644
index 00000000..c5397940
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_5-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/shorna_5-800.webp b/assets/img/2023-11-09-molecule_generation/shorna_5-800.webp
new file mode 100644
index 00000000..949e4595
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/shorna_5-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-1400.webp b/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-1400.webp
new file mode 100644
index 00000000..96857750
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-480.webp b/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-480.webp
new file mode 100644
index 00000000..c7e6d96d
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-800.webp b/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-800.webp
new file mode 100644
index 00000000..96857750
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-1400.webp b/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-1400.webp
new file mode 100644
index 00000000..ba30f720
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-480.webp b/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-480.webp
new file mode 100644
index 00000000..cc14f441
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-800.webp b/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-800.webp
new file mode 100644
index 00000000..ba30f720
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-1400.webp b/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-1400.webp
new file mode 100644
index 00000000..5a2642a1
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-480.webp b/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-480.webp
new file mode 100644
index 00000000..7169f8b0
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-800.webp b/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-800.webp
new file mode 100644
index 00000000..5a2642a1
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-1400.webp b/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-1400.webp
new file mode 100644
index 00000000..7555fa13
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-480.webp b/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-480.webp
new file mode 100644
index 00000000..065bd855
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-800.webp b/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-800.webp
new file mode 100644
index 00000000..7555fa13
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-800.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/validation_molecule_2_example-1400.webp b/assets/img/2023-11-09-molecule_generation/validation_molecule_2_example-1400.webp
new file mode 100644
index 00000000..a2e329f4
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/validation_molecule_2_example-1400.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/validation_molecule_2_example-480.webp b/assets/img/2023-11-09-molecule_generation/validation_molecule_2_example-480.webp
new file mode 100644
index 00000000..31dd1389
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/validation_molecule_2_example-480.webp differ
diff --git a/assets/img/2023-11-09-molecule_generation/validation_molecule_2_example-800.webp b/assets/img/2023-11-09-molecule_generation/validation_molecule_2_example-800.webp
new file mode 100644
index 00000000..a2e329f4
Binary files /dev/null and b/assets/img/2023-11-09-molecule_generation/validation_molecule_2_example-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-1400.webp
new file mode 100644
index 00000000..51991414
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-480.webp
new file mode 100644
index 00000000..d55506d4
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-800.webp
new file mode 100644
index 00000000..51991414
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-1400.webp
new file mode 100644
index 00000000..94f0eed3
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-480.webp
new file mode 100644
index 00000000..0ceef2be
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-800.webp
new file mode 100644
index 00000000..94f0eed3
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-1400.webp
new file mode 100644
index 00000000..66eaa344
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-480.webp
new file mode 100644
index 00000000..e2a65540
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-800.webp
new file mode 100644
index 00000000..66eaa344
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/langs-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/langs-1400.webp
new file mode 100644
index 00000000..5375d6ff
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/langs-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/langs-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/langs-480.webp
new file mode 100644
index 00000000..62b5c88e
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/langs-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/langs-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/langs-800.webp
new file mode 100644
index 00000000..5375d6ff
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/langs-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-1400.webp
new file mode 100644
index 00000000..c67a7da0
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-480.webp
new file mode 100644
index 00000000..20dfd856
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-800.webp
new file mode 100644
index 00000000..c67a7da0
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-1400.webp
new file mode 100644
index 00000000..1d7d9282
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-480.webp
new file mode 100644
index 00000000..faf5932c
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-800.webp
new file mode 100644
index 00000000..1d7d9282
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-1400.webp
new file mode 100644
index 00000000..824675ae
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-480.webp
new file mode 100644
index 00000000..4ba37f22
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-800.webp
new file mode 100644
index 00000000..824675ae
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-1400.webp
new file mode 100644
index 00000000..4d9c2ca6
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-480.webp
new file mode 100644
index 00000000..0dfdd9eb
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-800.webp
new file mode 100644
index 00000000..4d9c2ca6
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-1400.webp
new file mode 100644
index 00000000..c6379cec
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-480.webp
new file mode 100644
index 00000000..714b2617
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-800.webp
new file mode 100644
index 00000000..c6379cec
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-1400.webp
new file mode 100644
index 00000000..5789841b
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-480.webp
new file mode 100644
index 00000000..a81c791f
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-800.webp
new file mode 100644
index 00000000..5789841b
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-1400.webp
new file mode 100644
index 00000000..5a5e44db
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-480.webp
new file mode 100644
index 00000000..65bf4860
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-800.webp
new file mode 100644
index 00000000..5a5e44db
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-1400.webp
new file mode 100644
index 00000000..eeacc10f
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-480.webp
new file mode 100644
index 00000000..7abbf5aa
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-800.webp
new file mode 100644
index 00000000..eeacc10f
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_base-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_base-1400.webp
new file mode 100644
index 00000000..6607a0ed
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_base-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_base-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_base-480.webp
new file mode 100644
index 00000000..1920966f
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_base-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_base-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_base-800.webp
new file mode 100644
index 00000000..6607a0ed
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_base-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_multi-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_multi-1400.webp
new file mode 100644
index 00000000..456b4006
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_multi-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_multi-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_multi-480.webp
new file mode 100644
index 00000000..525bdb6f
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_multi-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_multi-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_multi-800.webp
new file mode 100644
index 00000000..456b4006
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/vs_multi-800.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-1400.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-1400.webp
new file mode 100644
index 00000000..bfe91864
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-1400.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-480.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-480.webp
new file mode 100644
index 00000000..9e954cb7
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-480.webp differ
diff --git a/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-800.webp b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-800.webp
new file mode 100644
index 00000000..bfe91864
Binary files /dev/null and b/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/accuracy-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/accuracy-1400.webp
new file mode 100644
index 00000000..94757d55
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/accuracy-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/accuracy-480.webp b/assets/img/2023-11-09-multimodal-commonsense/accuracy-480.webp
new file mode 100644
index 00000000..efbee7b9
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/accuracy-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/accuracy-800.webp b/assets/img/2023-11-09-multimodal-commonsense/accuracy-800.webp
new file mode 100644
index 00000000..94757d55
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/accuracy-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/hellaswag-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/hellaswag-1400.webp
new file mode 100644
index 00000000..f1025e53
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/hellaswag-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/hellaswag-480.webp b/assets/img/2023-11-09-multimodal-commonsense/hellaswag-480.webp
new file mode 100644
index 00000000..4fcf5268
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/hellaswag-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/hellaswag-800.webp b/assets/img/2023-11-09-multimodal-commonsense/hellaswag-800.webp
new file mode 100644
index 00000000..f1025e53
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/hellaswag-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-1400.webp
new file mode 100644
index 00000000..909ec6b0
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-480.webp b/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-480.webp
new file mode 100644
index 00000000..a2509c8f
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-800.webp b/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-800.webp
new file mode 100644
index 00000000..909ec6b0
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end0-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end0-1400.webp
new file mode 100644
index 00000000..4011b3cb
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end0-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end0-480.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end0-480.webp
new file mode 100644
index 00000000..091dde0a
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end0-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end0-800.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end0-800.webp
new file mode 100644
index 00000000..4011b3cb
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end0-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end1-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end1-1400.webp
new file mode 100644
index 00000000..11022afc
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end1-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end1-480.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end1-480.webp
new file mode 100644
index 00000000..a45ec680
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end1-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end1-800.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end1-800.webp
new file mode 100644
index 00000000..11022afc
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end1-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end2-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end2-1400.webp
new file mode 100644
index 00000000..dc08b008
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end2-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end2-480.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end2-480.webp
new file mode 100644
index 00000000..813c3533
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end2-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end2-800.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end2-800.webp
new file mode 100644
index 00000000..dc08b008
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end2-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end3-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end3-1400.webp
new file mode 100644
index 00000000..d89aaedc
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end3-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end3-480.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end3-480.webp
new file mode 100644
index 00000000..986f8326
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end3-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_end3-800.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_end3-800.webp
new file mode 100644
index 00000000..d89aaedc
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_end3-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-1400.webp
new file mode 100644
index 00000000..975bd8a0
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-480.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-480.webp
new file mode 100644
index 00000000..714de9bf
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-800.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-800.webp
new file mode 100644
index 00000000..975bd8a0
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-1400.webp
new file mode 100644
index 00000000..e4997392
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-480.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-480.webp
new file mode 100644
index 00000000..5886db9b
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-800.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-800.webp
new file mode 100644
index 00000000..e4997392
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-1400.webp
new file mode 100644
index 00000000..2d8b1cdf
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-480.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-480.webp
new file mode 100644
index 00000000..28f02b9c
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-800.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-800.webp
new file mode 100644
index 00000000..2d8b1cdf
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-1400.webp
new file mode 100644
index 00000000..3db9b426
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-480.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-480.webp
new file mode 100644
index 00000000..146bf9e8
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-800.webp b/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-800.webp
new file mode 100644
index 00000000..3db9b426
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/piqa-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/piqa-1400.webp
new file mode 100644
index 00000000..54f990b8
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/piqa-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/piqa-480.webp b/assets/img/2023-11-09-multimodal-commonsense/piqa-480.webp
new file mode 100644
index 00000000..ae7eda2c
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/piqa-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/piqa-800.webp b/assets/img/2023-11-09-multimodal-commonsense/piqa-800.webp
new file mode 100644
index 00000000..54f990b8
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/piqa-800.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/socialiqa-1400.webp b/assets/img/2023-11-09-multimodal-commonsense/socialiqa-1400.webp
new file mode 100644
index 00000000..ce74cc9c
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/socialiqa-1400.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/socialiqa-480.webp b/assets/img/2023-11-09-multimodal-commonsense/socialiqa-480.webp
new file mode 100644
index 00000000..fbc4276a
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/socialiqa-480.webp differ
diff --git a/assets/img/2023-11-09-multimodal-commonsense/socialiqa-800.webp b/assets/img/2023-11-09-multimodal-commonsense/socialiqa-800.webp
new file mode 100644
index 00000000..ce74cc9c
Binary files /dev/null and b/assets/img/2023-11-09-multimodal-commonsense/socialiqa-800.webp differ
diff --git a/assets/img/2023-11-09-project-proposal/Fig1-1400.webp b/assets/img/2023-11-09-project-proposal/Fig1-1400.webp
new file mode 100644
index 00000000..efee931d
Binary files /dev/null and b/assets/img/2023-11-09-project-proposal/Fig1-1400.webp differ
diff --git a/assets/img/2023-11-09-project-proposal/Fig1-480.webp b/assets/img/2023-11-09-project-proposal/Fig1-480.webp
new file mode 100644
index 00000000..7d0f5cd3
Binary files /dev/null and b/assets/img/2023-11-09-project-proposal/Fig1-480.webp differ
diff --git a/assets/img/2023-11-09-project-proposal/Fig1-800.webp b/assets/img/2023-11-09-project-proposal/Fig1-800.webp
new file mode 100644
index 00000000..efee931d
Binary files /dev/null and b/assets/img/2023-11-09-project-proposal/Fig1-800.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig1-1400.webp b/assets/img/2023-11-09-proposal-2/fig1-1400.webp
new file mode 100644
index 00000000..e26d0ddd
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig1-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig1-480.webp b/assets/img/2023-11-09-proposal-2/fig1-480.webp
new file mode 100644
index 00000000..a9a2e7a5
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig1-480.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig1-800.webp b/assets/img/2023-11-09-proposal-2/fig1-800.webp
new file mode 100644
index 00000000..e26d0ddd
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig1-800.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig2-1400.webp b/assets/img/2023-11-09-proposal-2/fig2-1400.webp
new file mode 100644
index 00000000..bc7d6f9a
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig2-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig2-480.webp b/assets/img/2023-11-09-proposal-2/fig2-480.webp
new file mode 100644
index 00000000..8b773d36
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig2-480.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig2-800.webp b/assets/img/2023-11-09-proposal-2/fig2-800.webp
new file mode 100644
index 00000000..bc7d6f9a
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig2-800.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig3-1400.webp b/assets/img/2023-11-09-proposal-2/fig3-1400.webp
new file mode 100644
index 00000000..c2618561
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig3-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig3-480.webp b/assets/img/2023-11-09-proposal-2/fig3-480.webp
new file mode 100644
index 00000000..f01d6f94
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig3-480.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig3-800.webp b/assets/img/2023-11-09-proposal-2/fig3-800.webp
new file mode 100644
index 00000000..c2618561
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig3-800.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig4-1400.webp b/assets/img/2023-11-09-proposal-2/fig4-1400.webp
new file mode 100644
index 00000000..09804741
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig4-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig4-480.webp b/assets/img/2023-11-09-proposal-2/fig4-480.webp
new file mode 100644
index 00000000..fc812cce
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig4-480.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig4-800.webp b/assets/img/2023-11-09-proposal-2/fig4-800.webp
new file mode 100644
index 00000000..09804741
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig4-800.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig5-1400.webp b/assets/img/2023-11-09-proposal-2/fig5-1400.webp
new file mode 100644
index 00000000..b6c2bb5e
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig5-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig5-480.webp b/assets/img/2023-11-09-proposal-2/fig5-480.webp
new file mode 100644
index 00000000..f34a6fec
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig5-480.webp differ
diff --git a/assets/img/2023-11-09-proposal-2/fig5-800.webp b/assets/img/2023-11-09-proposal-2/fig5-800.webp
new file mode 100644
index 00000000..b6c2bb5e
Binary files /dev/null and b/assets/img/2023-11-09-proposal-2/fig5-800.webp differ
diff --git a/assets/img/2023-11-09-proposal/error_vs_size-1400.webp b/assets/img/2023-11-09-proposal/error_vs_size-1400.webp
new file mode 100644
index 00000000..994ca85b
Binary files /dev/null and b/assets/img/2023-11-09-proposal/error_vs_size-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal/error_vs_size-480.webp b/assets/img/2023-11-09-proposal/error_vs_size-480.webp
new file mode 100644
index 00000000..fbb21a8c
Binary files /dev/null and b/assets/img/2023-11-09-proposal/error_vs_size-480.webp differ
diff --git a/assets/img/2023-11-09-proposal/error_vs_size-800.webp b/assets/img/2023-11-09-proposal/error_vs_size-800.webp
new file mode 100644
index 00000000..994ca85b
Binary files /dev/null and b/assets/img/2023-11-09-proposal/error_vs_size-800.webp differ
diff --git a/assets/img/2023-11-09-proposal/gnn_architecture1-1400.webp b/assets/img/2023-11-09-proposal/gnn_architecture1-1400.webp
new file mode 100644
index 00000000..5cfa835f
Binary files /dev/null and b/assets/img/2023-11-09-proposal/gnn_architecture1-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal/gnn_architecture1-480.webp b/assets/img/2023-11-09-proposal/gnn_architecture1-480.webp
new file mode 100644
index 00000000..a7b9fcb5
Binary files /dev/null and b/assets/img/2023-11-09-proposal/gnn_architecture1-480.webp differ
diff --git a/assets/img/2023-11-09-proposal/gnn_architecture1-800.webp b/assets/img/2023-11-09-proposal/gnn_architecture1-800.webp
new file mode 100644
index 00000000..5cfa835f
Binary files /dev/null and b/assets/img/2023-11-09-proposal/gnn_architecture1-800.webp differ
diff --git a/assets/img/2023-11-09-proposal/graph_transformer_architecture2-1400.webp b/assets/img/2023-11-09-proposal/graph_transformer_architecture2-1400.webp
new file mode 100644
index 00000000..1d424920
Binary files /dev/null and b/assets/img/2023-11-09-proposal/graph_transformer_architecture2-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal/graph_transformer_architecture2-480.webp b/assets/img/2023-11-09-proposal/graph_transformer_architecture2-480.webp
new file mode 100644
index 00000000..e7822b68
Binary files /dev/null and b/assets/img/2023-11-09-proposal/graph_transformer_architecture2-480.webp differ
diff --git a/assets/img/2023-11-09-proposal/graph_transformer_architecture2-800.webp b/assets/img/2023-11-09-proposal/graph_transformer_architecture2-800.webp
new file mode 100644
index 00000000..1d424920
Binary files /dev/null and b/assets/img/2023-11-09-proposal/graph_transformer_architecture2-800.webp differ
diff --git a/assets/img/2023-11-09-proposal/message_passing_eqn-1400.webp b/assets/img/2023-11-09-proposal/message_passing_eqn-1400.webp
new file mode 100644
index 00000000..55de72c0
Binary files /dev/null and b/assets/img/2023-11-09-proposal/message_passing_eqn-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal/message_passing_eqn-480.webp b/assets/img/2023-11-09-proposal/message_passing_eqn-480.webp
new file mode 100644
index 00000000..18ee21b2
Binary files /dev/null and b/assets/img/2023-11-09-proposal/message_passing_eqn-480.webp differ
diff --git a/assets/img/2023-11-09-proposal/message_passing_eqn-800.webp b/assets/img/2023-11-09-proposal/message_passing_eqn-800.webp
new file mode 100644
index 00000000..55de72c0
Binary files /dev/null and b/assets/img/2023-11-09-proposal/message_passing_eqn-800.webp differ
diff --git a/assets/img/2023-11-09-proposal/problem_definition-1400.webp b/assets/img/2023-11-09-proposal/problem_definition-1400.webp
new file mode 100644
index 00000000..106d40d2
Binary files /dev/null and b/assets/img/2023-11-09-proposal/problem_definition-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal/problem_definition-480.webp b/assets/img/2023-11-09-proposal/problem_definition-480.webp
new file mode 100644
index 00000000..c871aaa1
Binary files /dev/null and b/assets/img/2023-11-09-proposal/problem_definition-480.webp differ
diff --git a/assets/img/2023-11-09-proposal/problem_definition-800.webp b/assets/img/2023-11-09-proposal/problem_definition-800.webp
new file mode 100644
index 00000000..106d40d2
Binary files /dev/null and b/assets/img/2023-11-09-proposal/problem_definition-800.webp differ
diff --git a/assets/img/2023-11-09-proposal/protein-ligand-structure-1400.webp b/assets/img/2023-11-09-proposal/protein-ligand-structure-1400.webp
new file mode 100644
index 00000000..01d80c6c
Binary files /dev/null and b/assets/img/2023-11-09-proposal/protein-ligand-structure-1400.webp differ
diff --git a/assets/img/2023-11-09-proposal/protein-ligand-structure-480.webp b/assets/img/2023-11-09-proposal/protein-ligand-structure-480.webp
new file mode 100644
index 00000000..9b4534c4
Binary files /dev/null and b/assets/img/2023-11-09-proposal/protein-ligand-structure-480.webp differ
diff --git a/assets/img/2023-11-09-proposal/protein-ligand-structure-800.webp b/assets/img/2023-11-09-proposal/protein-ligand-structure-800.webp
new file mode 100644
index 00000000..01d80c6c
Binary files /dev/null and b/assets/img/2023-11-09-proposal/protein-ligand-structure-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/entanglement-1400.webp b/assets/img/2023-11-09-quantum-gnn/entanglement-1400.webp
new file mode 100644
index 00000000..b58f48fb
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/entanglement-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/entanglement-480.webp b/assets/img/2023-11-09-quantum-gnn/entanglement-480.webp
new file mode 100644
index 00000000..d3ee4a72
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/entanglement-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/entanglement-800.webp b/assets/img/2023-11-09-quantum-gnn/entanglement-800.webp
new file mode 100644
index 00000000..b58f48fb
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/entanglement-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/gnn-1400.webp b/assets/img/2023-11-09-quantum-gnn/gnn-1400.webp
new file mode 100644
index 00000000..0bb2c272
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/gnn-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/gnn-480.webp b/assets/img/2023-11-09-quantum-gnn/gnn-480.webp
new file mode 100644
index 00000000..2839c631
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/gnn-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/gnn-800.webp b/assets/img/2023-11-09-quantum-gnn/gnn-800.webp
new file mode 100644
index 00000000..0bb2c272
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/gnn-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/heads-1400.webp b/assets/img/2023-11-09-quantum-gnn/heads-1400.webp
new file mode 100644
index 00000000..1cedd93a
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/heads-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/heads-480.webp b/assets/img/2023-11-09-quantum-gnn/heads-480.webp
new file mode 100644
index 00000000..b047584e
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/heads-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/heads-800.webp b/assets/img/2023-11-09-quantum-gnn/heads-800.webp
new file mode 100644
index 00000000..1cedd93a
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/heads-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/hidden_dim-1400.webp b/assets/img/2023-11-09-quantum-gnn/hidden_dim-1400.webp
new file mode 100644
index 00000000..ae5abc1f
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/hidden_dim-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/hidden_dim-480.webp b/assets/img/2023-11-09-quantum-gnn/hidden_dim-480.webp
new file mode 100644
index 00000000..6a274d26
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/hidden_dim-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/hidden_dim-800.webp b/assets/img/2023-11-09-quantum-gnn/hidden_dim-800.webp
new file mode 100644
index 00000000..ae5abc1f
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/hidden_dim-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/n_layers-1400.webp b/assets/img/2023-11-09-quantum-gnn/n_layers-1400.webp
new file mode 100644
index 00000000..fb2f5669
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/n_layers-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/n_layers-480.webp b/assets/img/2023-11-09-quantum-gnn/n_layers-480.webp
new file mode 100644
index 00000000..039046e9
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/n_layers-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/n_layers-800.webp b/assets/img/2023-11-09-quantum-gnn/n_layers-800.webp
new file mode 100644
index 00000000..fb2f5669
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/n_layers-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/one_gate_trans-1400.webp b/assets/img/2023-11-09-quantum-gnn/one_gate_trans-1400.webp
new file mode 100644
index 00000000..d5c75aa1
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/one_gate_trans-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/one_gate_trans-480.webp b/assets/img/2023-11-09-quantum-gnn/one_gate_trans-480.webp
new file mode 100644
index 00000000..e909d71d
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/one_gate_trans-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/one_gate_trans-800.webp b/assets/img/2023-11-09-quantum-gnn/one_gate_trans-800.webp
new file mode 100644
index 00000000..d5c75aa1
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/one_gate_trans-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/pos_encoding-1400.webp b/assets/img/2023-11-09-quantum-gnn/pos_encoding-1400.webp
new file mode 100644
index 00000000..67d12ae6
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/pos_encoding-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/pos_encoding-480.webp b/assets/img/2023-11-09-quantum-gnn/pos_encoding-480.webp
new file mode 100644
index 00000000..602d33d5
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/pos_encoding-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/pos_encoding-800.webp b/assets/img/2023-11-09-quantum-gnn/pos_encoding-800.webp
new file mode 100644
index 00000000..67d12ae6
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/pos_encoding-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_after-1400.webp b/assets/img/2023-11-09-quantum-gnn/random_after-1400.webp
new file mode 100644
index 00000000..a5262007
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_after-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_after-480.webp b/assets/img/2023-11-09-quantum-gnn/random_after-480.webp
new file mode 100644
index 00000000..a2671ef6
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_after-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_after-800.webp b/assets/img/2023-11-09-quantum-gnn/random_after-800.webp
new file mode 100644
index 00000000..a5262007
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_after-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_before-1400.webp b/assets/img/2023-11-09-quantum-gnn/random_before-1400.webp
new file mode 100644
index 00000000..27cec82e
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_before-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_before-480.webp b/assets/img/2023-11-09-quantum-gnn/random_before-480.webp
new file mode 100644
index 00000000..29daa774
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_before-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_before-800.webp b/assets/img/2023-11-09-quantum-gnn/random_before-800.webp
new file mode 100644
index 00000000..27cec82e
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_before-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_optimization-1400.webp b/assets/img/2023-11-09-quantum-gnn/random_optimization-1400.webp
new file mode 100644
index 00000000..d80fafa1
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_optimization-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_optimization-480.webp b/assets/img/2023-11-09-quantum-gnn/random_optimization-480.webp
new file mode 100644
index 00000000..68142166
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_optimization-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_optimization-800.webp b/assets/img/2023-11-09-quantum-gnn/random_optimization-800.webp
new file mode 100644
index 00000000..d80fafa1
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_optimization-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_plot-1400.webp b/assets/img/2023-11-09-quantum-gnn/random_plot-1400.webp
new file mode 100644
index 00000000..32656728
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_plot-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_plot-480.webp b/assets/img/2023-11-09-quantum-gnn/random_plot-480.webp
new file mode 100644
index 00000000..7ae54e15
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_plot-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/random_plot-800.webp b/assets/img/2023-11-09-quantum-gnn/random_plot-800.webp
new file mode 100644
index 00000000..32656728
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/random_plot-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/three_gate_trans-1400.webp b/assets/img/2023-11-09-quantum-gnn/three_gate_trans-1400.webp
new file mode 100644
index 00000000..757dd57c
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/three_gate_trans-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/three_gate_trans-480.webp b/assets/img/2023-11-09-quantum-gnn/three_gate_trans-480.webp
new file mode 100644
index 00000000..8f85a1fd
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/three_gate_trans-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/three_gate_trans-800.webp b/assets/img/2023-11-09-quantum-gnn/three_gate_trans-800.webp
new file mode 100644
index 00000000..757dd57c
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/three_gate_trans-800.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/two_gate_trans-1400.webp b/assets/img/2023-11-09-quantum-gnn/two_gate_trans-1400.webp
new file mode 100644
index 00000000..e073806f
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/two_gate_trans-1400.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/two_gate_trans-480.webp b/assets/img/2023-11-09-quantum-gnn/two_gate_trans-480.webp
new file mode 100644
index 00000000..34d287ef
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/two_gate_trans-480.webp differ
diff --git a/assets/img/2023-11-09-quantum-gnn/two_gate_trans-800.webp b/assets/img/2023-11-09-quantum-gnn/two_gate_trans-800.webp
new file mode 100644
index 00000000..e073806f
Binary files /dev/null and b/assets/img/2023-11-09-quantum-gnn/two_gate_trans-800.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/att-1400.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/att-1400.webp
new file mode 100644
index 00000000..12cca1f4
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/att-1400.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/att-480.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/att-480.webp
new file mode 100644
index 00000000..ac78831a
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/att-480.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/att-800.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/att-800.webp
new file mode 100644
index 00000000..12cca1f4
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/att-800.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-1400.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-1400.webp
new file mode 100644
index 00000000..3568853e
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-1400.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-480.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-480.webp
new file mode 100644
index 00000000..9e1bab11
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-480.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-800.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-800.webp
new file mode 100644
index 00000000..3568853e
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-800.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-1400.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-1400.webp
new file mode 100644
index 00000000..963f848f
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-1400.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-480.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-480.webp
new file mode 100644
index 00000000..234918d8
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-480.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-800.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-800.webp
new file mode 100644
index 00000000..963f848f
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-800.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-1400.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-1400.webp
new file mode 100644
index 00000000..0edfb5ca
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-1400.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-480.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-480.webp
new file mode 100644
index 00000000..9146a81d
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-480.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-800.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-800.webp
new file mode 100644
index 00000000..0edfb5ca
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-800.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-1400.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-1400.webp
new file mode 100644
index 00000000..a6666841
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-1400.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-480.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-480.webp
new file mode 100644
index 00000000..a1783ec3
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-480.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-800.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-800.webp
new file mode 100644
index 00000000..a6666841
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-800.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-1400.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-1400.webp
new file mode 100644
index 00000000..3173db3e
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-1400.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-480.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-480.webp
new file mode 100644
index 00000000..fd791ece
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-480.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-800.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-800.webp
new file mode 100644
index 00000000..3173db3e
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-800.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-1400.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-1400.webp
new file mode 100644
index 00000000..7ecd7b2b
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-1400.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-480.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-480.webp
new file mode 100644
index 00000000..a6f8df23
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-480.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-800.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-800.webp
new file mode 100644
index 00000000..7ecd7b2b
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-800.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-1400.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-1400.webp
new file mode 100644
index 00000000..691be1e9
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-1400.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-480.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-480.webp
new file mode 100644
index 00000000..52f63b5c
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-480.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-800.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-800.webp
new file mode 100644
index 00000000..691be1e9
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-800.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/title-1400.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/title-1400.webp
new file mode 100644
index 00000000..14fadacb
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/title-1400.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/title-480.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/title-480.webp
new file mode 100644
index 00000000..bb7f7e75
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/title-480.webp differ
diff --git a/assets/img/2023-11-09-structured-physics-loss-diffusion/title-800.webp b/assets/img/2023-11-09-structured-physics-loss-diffusion/title-800.webp
new file mode 100644
index 00000000..14fadacb
Binary files /dev/null and b/assets/img/2023-11-09-structured-physics-loss-diffusion/title-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-1400.webp
new file mode 100644
index 00000000..3f211015
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-480.webp
new file mode 100644
index 00000000..ba7fbe06
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-800.webp
new file mode 100644
index 00000000..3f211015
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/MAML-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/MAML-1400.webp
new file mode 100644
index 00000000..477c653b
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/MAML-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/MAML-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/MAML-480.webp
new file mode 100644
index 00000000..04b5b357
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/MAML-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/MAML-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/MAML-800.webp
new file mode 100644
index 00000000..477c653b
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/MAML-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-1400.webp
new file mode 100644
index 00000000..15075ae6
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-480.webp
new file mode 100644
index 00000000..08bd39d3
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-800.webp
new file mode 100644
index 00000000..15075ae6
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/digits-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/digits-1400.webp
new file mode 100644
index 00000000..e169a74c
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/digits-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/digits-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/digits-480.webp
new file mode 100644
index 00000000..917e8f30
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/digits-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/digits-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/digits-800.webp
new file mode 100644
index 00000000..e169a74c
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/digits-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-1400.webp
new file mode 100644
index 00000000..08713c15
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-480.webp
new file mode 100644
index 00000000..5eb1f3b3
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-800.webp
new file mode 100644
index 00000000..08713c15
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-1400.webp
new file mode 100644
index 00000000..49cb097c
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-480.webp
new file mode 100644
index 00000000..7ae43be2
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-800.webp
new file mode 100644
index 00000000..49cb097c
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-1400.webp
new file mode 100644
index 00000000..32291a2e
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-480.webp
new file mode 100644
index 00000000..358d9d50
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-800.webp
new file mode 100644
index 00000000..32291a2e
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-1400.webp
new file mode 100644
index 00000000..0c8587b0
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-480.webp
new file mode 100644
index 00000000..10f3b02e
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-800.webp
new file mode 100644
index 00000000..0c8587b0
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/performance-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/performance-1400.webp
new file mode 100644
index 00000000..86b0f500
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/performance-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/performance-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/performance-480.webp
new file mode 100644
index 00000000..2ac7e8e0
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/performance-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/performance-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/performance-800.webp
new file mode 100644
index 00000000..86b0f500
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/performance-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-1400.webp
new file mode 100644
index 00000000..2fe743b7
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-480.webp
new file mode 100644
index 00000000..fdac344a
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-800.webp
new file mode 100644
index 00000000..2fe743b7
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-1400.webp
new file mode 100644
index 00000000..9a3b3c73
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-480.webp
new file mode 100644
index 00000000..03f200eb
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-800.webp
new file mode 100644
index 00000000..9a3b3c73
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/setting-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/setting-1400.webp
new file mode 100644
index 00000000..c52604a8
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/setting-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/setting-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/setting-480.webp
new file mode 100644
index 00000000..ede5625a
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/setting-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/setting-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/setting-800.webp
new file mode 100644
index 00000000..c52604a8
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/setting-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/student-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/student-1400.webp
new file mode 100644
index 00000000..61b2c1c5
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/student-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/student-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/student-480.webp
new file mode 100644
index 00000000..5b70b943
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/student-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/student-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/student-800.webp
new file mode 100644
index 00000000..61b2c1c5
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/student-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/student_table-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/student_table-1400.webp
new file mode 100644
index 00000000..59de2a39
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/student_table-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/student_table-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/student_table-480.webp
new file mode 100644
index 00000000..cb782851
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/student_table-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/student_table-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/student_table-800.webp
new file mode 100644
index 00000000..59de2a39
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/student_table-800.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-1400.webp b/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-1400.webp
new file mode 100644
index 00000000..02da7b41
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-1400.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-480.webp b/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-480.webp
new file mode 100644
index 00000000..19cac1e8
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-480.webp differ
diff --git a/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-800.webp b/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-800.webp
new file mode 100644
index 00000000..02da7b41
Binary files /dev/null and b/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-800.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/1-1400.webp b/assets/img/2023-11-09-transformer-time/1-1400.webp
new file mode 100644
index 00000000..4ee82bd3
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/1-1400.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/1-480.webp b/assets/img/2023-11-09-transformer-time/1-480.webp
new file mode 100644
index 00000000..2e821cf0
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/1-480.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/1-800.webp b/assets/img/2023-11-09-transformer-time/1-800.webp
new file mode 100644
index 00000000..4ee82bd3
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/1-800.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/GRIN-1400.webp b/assets/img/2023-11-09-transformer-time/GRIN-1400.webp
new file mode 100644
index 00000000..6c58fd7f
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/GRIN-1400.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/GRIN-480.webp b/assets/img/2023-11-09-transformer-time/GRIN-480.webp
new file mode 100644
index 00000000..0cf1b9f3
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/GRIN-480.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/GRIN-800.webp b/assets/img/2023-11-09-transformer-time/GRIN-800.webp
new file mode 100644
index 00000000..6c58fd7f
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/GRIN-800.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/Informer-1400.webp b/assets/img/2023-11-09-transformer-time/Informer-1400.webp
new file mode 100644
index 00000000..7f868c10
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/Informer-1400.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/Informer-480.webp b/assets/img/2023-11-09-transformer-time/Informer-480.webp
new file mode 100644
index 00000000..15dfda53
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/Informer-480.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/Informer-800.webp b/assets/img/2023-11-09-transformer-time/Informer-800.webp
new file mode 100644
index 00000000..7f868c10
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/Informer-800.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/PatchTST-1400.webp b/assets/img/2023-11-09-transformer-time/PatchTST-1400.webp
new file mode 100644
index 00000000..75379231
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/PatchTST-1400.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/PatchTST-480.webp b/assets/img/2023-11-09-transformer-time/PatchTST-480.webp
new file mode 100644
index 00000000..17b2ef3b
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/PatchTST-480.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/PatchTST-800.webp b/assets/img/2023-11-09-transformer-time/PatchTST-800.webp
new file mode 100644
index 00000000..75379231
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/PatchTST-800.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/test1-1400.webp b/assets/img/2023-11-09-transformer-time/test1-1400.webp
new file mode 100644
index 00000000..e0cba6f2
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/test1-1400.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/test1-480.webp b/assets/img/2023-11-09-transformer-time/test1-480.webp
new file mode 100644
index 00000000..2e2d3d74
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/test1-480.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/test1-800.webp b/assets/img/2023-11-09-transformer-time/test1-800.webp
new file mode 100644
index 00000000..e0cba6f2
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/test1-800.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/test2-1400.webp b/assets/img/2023-11-09-transformer-time/test2-1400.webp
new file mode 100644
index 00000000..29002b8e
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/test2-1400.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/test2-480.webp b/assets/img/2023-11-09-transformer-time/test2-480.webp
new file mode 100644
index 00000000..e92e4e32
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/test2-480.webp differ
diff --git a/assets/img/2023-11-09-transformer-time/test2-800.webp b/assets/img/2023-11-09-transformer-time/test2-800.webp
new file mode 100644
index 00000000..29002b8e
Binary files /dev/null and b/assets/img/2023-11-09-transformer-time/test2-800.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/combined-1400.webp b/assets/img/2023-11-09-transformers-as-gamers/combined-1400.webp
new file mode 100644
index 00000000..85096b04
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/combined-1400.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/combined-480.webp b/assets/img/2023-11-09-transformers-as-gamers/combined-480.webp
new file mode 100644
index 00000000..2fe9f136
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/combined-480.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/combined-800.webp b/assets/img/2023-11-09-transformers-as-gamers/combined-800.webp
new file mode 100644
index 00000000..85096b04
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/combined-800.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/data-1400.webp b/assets/img/2023-11-09-transformers-as-gamers/data-1400.webp
new file mode 100644
index 00000000..8dd5e1c8
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/data-1400.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/data-480.webp b/assets/img/2023-11-09-transformers-as-gamers/data-480.webp
new file mode 100644
index 00000000..f2a3b296
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/data-480.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/data-800.webp b/assets/img/2023-11-09-transformers-as-gamers/data-800.webp
new file mode 100644
index 00000000..8dd5e1c8
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/data-800.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/deep-1400.webp b/assets/img/2023-11-09-transformers-as-gamers/deep-1400.webp
new file mode 100644
index 00000000..f8f9b6dd
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/deep-1400.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/deep-480.webp b/assets/img/2023-11-09-transformers-as-gamers/deep-480.webp
new file mode 100644
index 00000000..ec6ec71d
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/deep-480.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/deep-800.webp b/assets/img/2023-11-09-transformers-as-gamers/deep-800.webp
new file mode 100644
index 00000000..f8f9b6dd
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/deep-800.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/generals-1400.webp b/assets/img/2023-11-09-transformers-as-gamers/generals-1400.webp
new file mode 100644
index 00000000..53ea6b74
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/generals-1400.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/generals-480.webp b/assets/img/2023-11-09-transformers-as-gamers/generals-480.webp
new file mode 100644
index 00000000..d2980bf2
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/generals-480.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/generals-800.webp b/assets/img/2023-11-09-transformers-as-gamers/generals-800.webp
new file mode 100644
index 00000000..53ea6b74
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/generals-800.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-1400.webp b/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-1400.webp
new file mode 100644
index 00000000..8464e061
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-1400.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-480.webp b/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-480.webp
new file mode 100644
index 00000000..6951feaa
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-480.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-800.webp b/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-800.webp
new file mode 100644
index 00000000..8464e061
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-800.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/initial-1400.webp b/assets/img/2023-11-09-transformers-as-gamers/initial-1400.webp
new file mode 100644
index 00000000..8dd5e1c8
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/initial-1400.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/initial-480.webp b/assets/img/2023-11-09-transformers-as-gamers/initial-480.webp
new file mode 100644
index 00000000..f2a3b296
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/initial-480.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/initial-800.webp b/assets/img/2023-11-09-transformers-as-gamers/initial-800.webp
new file mode 100644
index 00000000..8dd5e1c8
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/initial-800.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/more_data-1400.webp b/assets/img/2023-11-09-transformers-as-gamers/more_data-1400.webp
new file mode 100644
index 00000000..5b0e6b7c
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/more_data-1400.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/more_data-480.webp b/assets/img/2023-11-09-transformers-as-gamers/more_data-480.webp
new file mode 100644
index 00000000..96aa972b
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/more_data-480.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/more_data-800.webp b/assets/img/2023-11-09-transformers-as-gamers/more_data-800.webp
new file mode 100644
index 00000000..5b0e6b7c
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/more_data-800.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/squish-1400.webp b/assets/img/2023-11-09-transformers-as-gamers/squish-1400.webp
new file mode 100644
index 00000000..2992f97b
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/squish-1400.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/squish-480.webp b/assets/img/2023-11-09-transformers-as-gamers/squish-480.webp
new file mode 100644
index 00000000..1e4d16f2
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/squish-480.webp differ
diff --git a/assets/img/2023-11-09-transformers-as-gamers/squish-800.webp b/assets/img/2023-11-09-transformers-as-gamers/squish-800.webp
new file mode 100644
index 00000000..2992f97b
Binary files /dev/null and b/assets/img/2023-11-09-transformers-as-gamers/squish-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-1400.webp b/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-1400.webp
new file mode 100644
index 00000000..de35c681
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-480.webp b/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-480.webp
new file mode 100644
index 00000000..af4ece39
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-800.webp b/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-800.webp
new file mode 100644
index 00000000..de35c681
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Correlation_graph_160m-1400.webp b/assets/img/2023-11-09-universal-features/Correlation_graph_160m-1400.webp
new file mode 100644
index 00000000..031c44fd
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Correlation_graph_160m-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Correlation_graph_160m-480.webp b/assets/img/2023-11-09-universal-features/Correlation_graph_160m-480.webp
new file mode 100644
index 00000000..73d51668
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Correlation_graph_160m-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Correlation_graph_160m-800.webp b/assets/img/2023-11-09-universal-features/Correlation_graph_160m-800.webp
new file mode 100644
index 00000000..031c44fd
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Correlation_graph_160m-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes-1400.webp b/assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes-1400.webp
new file mode 100644
index 00000000..bd730d8f
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes-480.webp b/assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes-480.webp
new file mode 100644
index 00000000..4c51d68f
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes-800.webp b/assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes-800.webp
new file mode 100644
index 00000000..bd730d8f
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Correlations between 1-8 AEs trained on 3 model sizes-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Deduped vs non-deduped-1400.webp b/assets/img/2023-11-09-universal-features/Deduped vs non-deduped-1400.webp
new file mode 100644
index 00000000..267965d6
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Deduped vs non-deduped-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Deduped vs non-deduped-480.webp b/assets/img/2023-11-09-universal-features/Deduped vs non-deduped-480.webp
new file mode 100644
index 00000000..1c38c785
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Deduped vs non-deduped-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Deduped vs non-deduped-800.webp b/assets/img/2023-11-09-universal-features/Deduped vs non-deduped-800.webp
new file mode 100644
index 00000000..267965d6
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Deduped vs non-deduped-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS-1400.webp b/assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS-1400.webp
new file mode 100644
index 00000000..2f108939
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS-480.webp b/assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS-480.webp
new file mode 100644
index 00000000..362b141f
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS-800.webp b/assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS-800.webp
new file mode 100644
index 00000000..2f108939
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/Four AE sizes on Pythia-70m MCS-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/MCS_graph_160m-1400.webp b/assets/img/2023-11-09-universal-features/MCS_graph_160m-1400.webp
new file mode 100644
index 00000000..88644c11
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/MCS_graph_160m-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/MCS_graph_160m-480.webp b/assets/img/2023-11-09-universal-features/MCS_graph_160m-480.webp
new file mode 100644
index 00000000..d6d9af95
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/MCS_graph_160m-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/MCS_graph_160m-800.webp b/assets/img/2023-11-09-universal-features/MCS_graph_160m-800.webp
new file mode 100644
index 00000000..88644c11
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/MCS_graph_160m-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/MCS_graph_70m-1400.webp b/assets/img/2023-11-09-universal-features/MCS_graph_70m-1400.webp
new file mode 100644
index 00000000..7627611d
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/MCS_graph_70m-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/MCS_graph_70m-480.webp b/assets/img/2023-11-09-universal-features/MCS_graph_70m-480.webp
new file mode 100644
index 00000000..b3e5c02a
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/MCS_graph_70m-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/MCS_graph_70m-800.webp b/assets/img/2023-11-09-universal-features/MCS_graph_70m-800.webp
new file mode 100644
index 00000000..7627611d
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/MCS_graph_70m-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM-1400.webp b/assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM-1400.webp
new file mode 100644
index 00000000..117041ba
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM-480.webp b/assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM-480.webp
new file mode 100644
index 00000000..fe0083a6
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM-800.webp b/assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM-800.webp
new file mode 100644
index 00000000..117041ba
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/W&B Chart 12_12_2023, 11_09_59 PM-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/frequency_plot-1400.webp b/assets/img/2023-11-09-universal-features/frequency_plot-1400.webp
new file mode 100644
index 00000000..2b59bb75
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/frequency_plot-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/frequency_plot-480.webp b/assets/img/2023-11-09-universal-features/frequency_plot-480.webp
new file mode 100644
index 00000000..0a1065e5
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/frequency_plot-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/frequency_plot-800.webp b/assets/img/2023-11-09-universal-features/frequency_plot-800.webp
new file mode 100644
index 00000000..2b59bb75
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/frequency_plot-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-1400.webp b/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-1400.webp
new file mode 100644
index 00000000..96640f67
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-480.webp b/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-480.webp
new file mode 100644
index 00000000..71d0bff1
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-800.webp b/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-800.webp
new file mode 100644
index 00000000..96640f67
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/image (2)-1400.webp b/assets/img/2023-11-09-universal-features/image (2)-1400.webp
new file mode 100644
index 00000000..a0128f22
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/image (2)-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/image (2)-480.webp b/assets/img/2023-11-09-universal-features/image (2)-480.webp
new file mode 100644
index 00000000..88abfc12
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/image (2)-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/image (2)-800.webp b/assets/img/2023-11-09-universal-features/image (2)-800.webp
new file mode 100644
index 00000000..a0128f22
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/image (2)-800.webp differ
diff --git a/assets/img/2023-11-09-universal-features/image (3)-1400.webp b/assets/img/2023-11-09-universal-features/image (3)-1400.webp
new file mode 100644
index 00000000..26f0c2c7
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/image (3)-1400.webp differ
diff --git a/assets/img/2023-11-09-universal-features/image (3)-480.webp b/assets/img/2023-11-09-universal-features/image (3)-480.webp
new file mode 100644
index 00000000..5b27a023
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/image (3)-480.webp differ
diff --git a/assets/img/2023-11-09-universal-features/image (3)-800.webp b/assets/img/2023-11-09-universal-features/image (3)-800.webp
new file mode 100644
index 00000000..26f0c2c7
Binary files /dev/null and b/assets/img/2023-11-09-universal-features/image (3)-800.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/attn-1400.webp b/assets/img/2023-11-10-CNN-activation-patching/attn-1400.webp
new file mode 100644
index 00000000..aba1e201
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/attn-1400.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/attn-480.webp b/assets/img/2023-11-10-CNN-activation-patching/attn-480.webp
new file mode 100644
index 00000000..9966a9bb
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/attn-480.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/attn-800.webp b/assets/img/2023-11-10-CNN-activation-patching/attn-800.webp
new file mode 100644
index 00000000..aba1e201
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/attn-800.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/car-1400.webp b/assets/img/2023-11-10-CNN-activation-patching/car-1400.webp
new file mode 100644
index 00000000..737ed1ef
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/car-1400.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/car-480.webp b/assets/img/2023-11-10-CNN-activation-patching/car-480.webp
new file mode 100644
index 00000000..93930194
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/car-480.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/car-800.webp b/assets/img/2023-11-10-CNN-activation-patching/car-800.webp
new file mode 100644
index 00000000..737ed1ef
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/car-800.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/deer-1400.webp b/assets/img/2023-11-10-CNN-activation-patching/deer-1400.webp
new file mode 100644
index 00000000..10b28c14
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/deer-1400.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/deer-480.webp b/assets/img/2023-11-10-CNN-activation-patching/deer-480.webp
new file mode 100644
index 00000000..2d482c84
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/deer-480.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/deer-800.webp b/assets/img/2023-11-10-CNN-activation-patching/deer-800.webp
new file mode 100644
index 00000000..10b28c14
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/deer-800.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/eqn-1400.webp b/assets/img/2023-11-10-CNN-activation-patching/eqn-1400.webp
new file mode 100644
index 00000000..25c2402d
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/eqn-1400.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/eqn-480.webp b/assets/img/2023-11-10-CNN-activation-patching/eqn-480.webp
new file mode 100644
index 00000000..151ce902
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/eqn-480.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/eqn-800.webp b/assets/img/2023-11-10-CNN-activation-patching/eqn-800.webp
new file mode 100644
index 00000000..25c2402d
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/eqn-800.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/gray-1400.webp b/assets/img/2023-11-10-CNN-activation-patching/gray-1400.webp
new file mode 100644
index 00000000..04a4c78d
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/gray-1400.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/gray-480.webp b/assets/img/2023-11-10-CNN-activation-patching/gray-480.webp
new file mode 100644
index 00000000..3dd71cb0
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/gray-480.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/gray-800.webp b/assets/img/2023-11-10-CNN-activation-patching/gray-800.webp
new file mode 100644
index 00000000..04a4c78d
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/gray-800.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/image-1400.webp b/assets/img/2023-11-10-CNN-activation-patching/image-1400.webp
new file mode 100644
index 00000000..912d47cb
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/image-1400.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/image-480.webp b/assets/img/2023-11-10-CNN-activation-patching/image-480.webp
new file mode 100644
index 00000000..656a1e94
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/image-480.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/image-800.webp b/assets/img/2023-11-10-CNN-activation-patching/image-800.webp
new file mode 100644
index 00000000..912d47cb
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/image-800.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/patch-1400.webp b/assets/img/2023-11-10-CNN-activation-patching/patch-1400.webp
new file mode 100644
index 00000000..09c084a4
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/patch-1400.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/patch-480.webp b/assets/img/2023-11-10-CNN-activation-patching/patch-480.webp
new file mode 100644
index 00000000..b3522c3a
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/patch-480.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/patch-800.webp b/assets/img/2023-11-10-CNN-activation-patching/patch-800.webp
new file mode 100644
index 00000000..09c084a4
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/patch-800.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/plane-1400.webp b/assets/img/2023-11-10-CNN-activation-patching/plane-1400.webp
new file mode 100644
index 00000000..0aa4a74d
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/plane-1400.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/plane-480.webp b/assets/img/2023-11-10-CNN-activation-patching/plane-480.webp
new file mode 100644
index 00000000..3dd8176f
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/plane-480.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/plane-800.webp b/assets/img/2023-11-10-CNN-activation-patching/plane-800.webp
new file mode 100644
index 00000000..0aa4a74d
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/plane-800.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/single-1400.webp b/assets/img/2023-11-10-CNN-activation-patching/single-1400.webp
new file mode 100644
index 00000000..323cf6af
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/single-1400.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/single-480.webp b/assets/img/2023-11-10-CNN-activation-patching/single-480.webp
new file mode 100644
index 00000000..b97e6c48
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/single-480.webp differ
diff --git a/assets/img/2023-11-10-CNN-activation-patching/single-800.webp b/assets/img/2023-11-10-CNN-activation-patching/single-800.webp
new file mode 100644
index 00000000..323cf6af
Binary files /dev/null and b/assets/img/2023-11-10-CNN-activation-patching/single-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-1400.webp
new file mode 100644
index 00000000..7d2c12c0
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-480.webp
new file mode 100644
index 00000000..6196e841
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-800.webp
new file mode 100644
index 00000000..7d2c12c0
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-1400.webp
new file mode 100644
index 00000000..e6dcecd8
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-480.webp
new file mode 100644
index 00000000..7d4bec79
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-800.webp
new file mode 100644
index 00000000..e6dcecd8
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-1400.webp
new file mode 100644
index 00000000..ee50dbf0
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-480.webp
new file mode 100644
index 00000000..2bebd209
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-800.webp
new file mode 100644
index 00000000..ee50dbf0
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-1400.webp
new file mode 100644
index 00000000..251f2026
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-480.webp
new file mode 100644
index 00000000..5afa05b3
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-800.webp
new file mode 100644
index 00000000..251f2026
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-1400.webp
new file mode 100644
index 00000000..fbc1fbe7
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-480.webp
new file mode 100644
index 00000000..28dd62b4
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-800.webp
new file mode 100644
index 00000000..fbc1fbe7
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-1400.webp
new file mode 100644
index 00000000..1fec3473
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-480.webp
new file mode 100644
index 00000000..06bc8cc9
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-800.webp
new file mode 100644
index 00000000..1fec3473
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-1400.webp
new file mode 100644
index 00000000..8639f360
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-480.webp
new file mode 100644
index 00000000..2e90b4fa
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-800.webp
new file mode 100644
index 00000000..8639f360
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-1400.webp
new file mode 100644
index 00000000..92678c13
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-480.webp
new file mode 100644
index 00000000..6e476b98
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-800.webp
new file mode 100644
index 00000000..92678c13
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-1400.webp
new file mode 100644
index 00000000..9414e177
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-480.webp
new file mode 100644
index 00000000..10d57f1b
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-800.webp
new file mode 100644
index 00000000..9414e177
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-1400.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-1400.webp
new file mode 100644
index 00000000..d425e273
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-480.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-480.webp
new file mode 100644
index 00000000..72f9f67c
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-800.webp b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-800.webp
new file mode 100644
index 00000000..d425e273
Binary files /dev/null and b/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_100-1400.webp b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_100-1400.webp
new file mode 100644
index 00000000..c5cc152c
Binary files /dev/null and b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_100-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_100-480.webp b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_100-480.webp
new file mode 100644
index 00000000..fb96f3b1
Binary files /dev/null and b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_100-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_100-800.webp b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_100-800.webp
new file mode 100644
index 00000000..c5cc152c
Binary files /dev/null and b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_100-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_1000-1400.webp b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_1000-1400.webp
new file mode 100644
index 00000000..c02debdc
Binary files /dev/null and b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_1000-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_1000-480.webp b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_1000-480.webp
new file mode 100644
index 00000000..5b68e586
Binary files /dev/null and b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_1000-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_1000-800.webp b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_1000-800.webp
new file mode 100644
index 00000000..c02debdc
Binary files /dev/null and b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Gelu_1000-800.webp differ
diff --git a/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Lorenz63-1400.webp b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Lorenz63-1400.webp
new file mode 100644
index 00000000..cc0ad043
Binary files /dev/null and b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Lorenz63-1400.webp differ
diff --git a/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Lorenz63-480.webp b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Lorenz63-480.webp
new file mode 100644
index 00000000..6b53a271
Binary files /dev/null and b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Lorenz63-480.webp differ
diff --git a/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Lorenz63-800.webp b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Lorenz63-800.webp
new file mode 100644
index 00000000..cc0ad043
Binary files /dev/null and b/assets/img/2023-11-10-Physics-informed-learning-chaotic-dynamics/Lorenz63-800.webp differ
diff --git a/assets/img/2023-11-10-intuitive-physics-cvae/fig1-1400.webp b/assets/img/2023-11-10-intuitive-physics-cvae/fig1-1400.webp
new file mode 100644
index 00000000..6b59f86a
Binary files /dev/null and b/assets/img/2023-11-10-intuitive-physics-cvae/fig1-1400.webp differ
diff --git a/assets/img/2023-11-10-intuitive-physics-cvae/fig1-480.webp b/assets/img/2023-11-10-intuitive-physics-cvae/fig1-480.webp
new file mode 100644
index 00000000..01cc7a47
Binary files /dev/null and b/assets/img/2023-11-10-intuitive-physics-cvae/fig1-480.webp differ
diff --git a/assets/img/2023-11-10-intuitive-physics-cvae/fig1-800.webp b/assets/img/2023-11-10-intuitive-physics-cvae/fig1-800.webp
new file mode 100644
index 00000000..6b59f86a
Binary files /dev/null and b/assets/img/2023-11-10-intuitive-physics-cvae/fig1-800.webp differ
diff --git a/assets/img/2023-11-10-intuitive-physics-cvae/fig2-1400.webp b/assets/img/2023-11-10-intuitive-physics-cvae/fig2-1400.webp
new file mode 100644
index 00000000..83073bc2
Binary files /dev/null and b/assets/img/2023-11-10-intuitive-physics-cvae/fig2-1400.webp differ
diff --git a/assets/img/2023-11-10-intuitive-physics-cvae/fig2-480.webp b/assets/img/2023-11-10-intuitive-physics-cvae/fig2-480.webp
new file mode 100644
index 00000000..07fa4d0e
Binary files /dev/null and b/assets/img/2023-11-10-intuitive-physics-cvae/fig2-480.webp differ
diff --git a/assets/img/2023-11-10-intuitive-physics-cvae/fig2-800.webp b/assets/img/2023-11-10-intuitive-physics-cvae/fig2-800.webp
new file mode 100644
index 00000000..83073bc2
Binary files /dev/null and b/assets/img/2023-11-10-intuitive-physics-cvae/fig2-800.webp differ
diff --git a/assets/img/2023-11-10-intuitive-physics-cvae/fig3-1400.webp b/assets/img/2023-11-10-intuitive-physics-cvae/fig3-1400.webp
new file mode 100644
index 00000000..57ceed56
Binary files /dev/null and b/assets/img/2023-11-10-intuitive-physics-cvae/fig3-1400.webp differ
diff --git a/assets/img/2023-11-10-intuitive-physics-cvae/fig3-480.webp b/assets/img/2023-11-10-intuitive-physics-cvae/fig3-480.webp
new file mode 100644
index 00000000..2c3b135f
Binary files /dev/null and b/assets/img/2023-11-10-intuitive-physics-cvae/fig3-480.webp differ
diff --git a/assets/img/2023-11-10-intuitive-physics-cvae/fig3-800.webp b/assets/img/2023-11-10-intuitive-physics-cvae/fig3-800.webp
new file mode 100644
index 00000000..57ceed56
Binary files /dev/null and b/assets/img/2023-11-10-intuitive-physics-cvae/fig3-800.webp differ
diff --git a/assets/img/2023-11-10-proposal_JingpengHong/multitask-1400.webp b/assets/img/2023-11-10-proposal_JingpengHong/multitask-1400.webp
new file mode 100644
index 00000000..0c0e524e
Binary files /dev/null and b/assets/img/2023-11-10-proposal_JingpengHong/multitask-1400.webp differ
diff --git a/assets/img/2023-11-10-proposal_JingpengHong/multitask-480.webp b/assets/img/2023-11-10-proposal_JingpengHong/multitask-480.webp
new file mode 100644
index 00000000..e949a06a
Binary files /dev/null and b/assets/img/2023-11-10-proposal_JingpengHong/multitask-480.webp differ
diff --git a/assets/img/2023-11-10-proposal_JingpengHong/multitask-800.webp b/assets/img/2023-11-10-proposal_JingpengHong/multitask-800.webp
new file mode 100644
index 00000000..0c0e524e
Binary files /dev/null and b/assets/img/2023-11-10-proposal_JingpengHong/multitask-800.webp differ
diff --git a/assets/img/2023-11-10-solvent-encoding/ICLR-logo-1400.webp b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-1400.webp differ
diff --git a/assets/img/2023-11-10-solvent-encoding/ICLR-logo-480.webp b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-480.webp differ
diff --git a/assets/img/2023-11-10-solvent-encoding/ICLR-logo-800.webp b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-800.webp differ
diff --git a/assets/img/2023-11-10-solvent-encoding/ICLR-logo-dark-1400.webp b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-dark-1400.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-dark-1400.webp differ
diff --git a/assets/img/2023-11-10-solvent-encoding/ICLR-logo-dark-480.webp b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-dark-480.webp
new file mode 100644
index 00000000..7f0830c1
Binary files /dev/null and b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-dark-480.webp differ
diff --git a/assets/img/2023-11-10-solvent-encoding/ICLR-logo-dark-800.webp b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-dark-800.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/2023-11-10-solvent-encoding/ICLR-logo-dark-800.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-1400.webp b/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-1400.webp
new file mode 100644
index 00000000..529f8691
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-1400.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-480.webp b/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-480.webp
new file mode 100644
index 00000000..eb28f3ed
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-480.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-800.webp b/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-800.webp
new file mode 100644
index 00000000..529f8691
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-800.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-1400.webp b/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-1400.webp
new file mode 100644
index 00000000..584a2bf0
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-1400.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-480.webp b/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-480.webp
new file mode 100644
index 00000000..e8773e88
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-480.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-800.webp b/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-800.webp
new file mode 100644
index 00000000..584a2bf0
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-800.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-1400.webp b/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-1400.webp
new file mode 100644
index 00000000..63e95a46
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-1400.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-480.webp b/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-480.webp
new file mode 100644
index 00000000..a54315a7
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-480.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-800.webp b/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-800.webp
new file mode 100644
index 00000000..63e95a46
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-800.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-1400.webp b/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-1400.webp
new file mode 100644
index 00000000..310bd561
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-1400.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-480.webp b/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-480.webp
new file mode 100644
index 00000000..b3df0806
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-480.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-800.webp b/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-800.webp
new file mode 100644
index 00000000..310bd561
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-800.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/winoground_example-1400.webp b/assets/img/2023-11-10-spacial-CLIP/winoground_example-1400.webp
new file mode 100644
index 00000000..a138a8af
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/winoground_example-1400.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/winoground_example-480.webp b/assets/img/2023-11-10-spacial-CLIP/winoground_example-480.webp
new file mode 100644
index 00000000..3bec7a23
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/winoground_example-480.webp differ
diff --git a/assets/img/2023-11-10-spacial-CLIP/winoground_example-800.webp b/assets/img/2023-11-10-spacial-CLIP/winoground_example-800.webp
new file mode 100644
index 00000000..a138a8af
Binary files /dev/null and b/assets/img/2023-11-10-spacial-CLIP/winoground_example-800.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-1400.webp b/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-1400.webp
new file mode 100644
index 00000000..04c6efb0
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-1400.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-480.webp b/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-480.webp
new file mode 100644
index 00000000..a5df6d13
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-480.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-800.webp b/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-800.webp
new file mode 100644
index 00000000..04c6efb0
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-800.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/background-noise-1400.webp b/assets/img/2023-11-10-speech-recognition-proposal/background-noise-1400.webp
new file mode 100644
index 00000000..cd523da7
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/background-noise-1400.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/background-noise-480.webp b/assets/img/2023-11-10-speech-recognition-proposal/background-noise-480.webp
new file mode 100644
index 00000000..dda906cd
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/background-noise-480.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/background-noise-800.webp b/assets/img/2023-11-10-speech-recognition-proposal/background-noise-800.webp
new file mode 100644
index 00000000..cd523da7
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/background-noise-800.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-1400.webp b/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-1400.webp
new file mode 100644
index 00000000..bd5a5998
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-1400.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-480.webp b/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-480.webp
new file mode 100644
index 00000000..3af9d0ef
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-480.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-800.webp b/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-800.webp
new file mode 100644
index 00000000..bd5a5998
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-800.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/clean-1400.webp b/assets/img/2023-11-10-speech-recognition-proposal/clean-1400.webp
new file mode 100644
index 00000000..fc11bdb1
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/clean-1400.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/clean-480.webp b/assets/img/2023-11-10-speech-recognition-proposal/clean-480.webp
new file mode 100644
index 00000000..ccd7b0e2
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/clean-480.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/clean-800.webp b/assets/img/2023-11-10-speech-recognition-proposal/clean-800.webp
new file mode 100644
index 00000000..fc11bdb1
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/clean-800.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-1400.webp b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-1400.webp
new file mode 100644
index 00000000..4959a5c5
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-1400.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-480.webp b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-480.webp
new file mode 100644
index 00000000..d9a06b03
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-480.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-800.webp b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-800.webp
new file mode 100644
index 00000000..4959a5c5
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-800.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-1400.webp b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-1400.webp
new file mode 100644
index 00000000..1e5d445d
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-1400.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-480.webp b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-480.webp
new file mode 100644
index 00000000..d53f102d
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-480.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-800.webp b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-800.webp
new file mode 100644
index 00000000..1e5d445d
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-800.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-1400.webp b/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-1400.webp
new file mode 100644
index 00000000..48fdfaeb
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-1400.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-480.webp b/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-480.webp
new file mode 100644
index 00000000..e21556b2
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-480.webp differ
diff --git a/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-800.webp b/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-800.webp
new file mode 100644
index 00000000..48fdfaeb
Binary files /dev/null and b/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-800.webp differ
diff --git a/assets/img/2023-11-10-stable-diffusion-for-obs/many_to_one-1400.webp b/assets/img/2023-11-10-stable-diffusion-for-obs/many_to_one-1400.webp
new file mode 100644
index 00000000..fb19979b
Binary files /dev/null and b/assets/img/2023-11-10-stable-diffusion-for-obs/many_to_one-1400.webp differ
diff --git a/assets/img/2023-11-10-stable-diffusion-for-obs/many_to_one-480.webp b/assets/img/2023-11-10-stable-diffusion-for-obs/many_to_one-480.webp
new file mode 100644
index 00000000..b975b3b2
Binary files /dev/null and b/assets/img/2023-11-10-stable-diffusion-for-obs/many_to_one-480.webp differ
diff --git a/assets/img/2023-11-10-stable-diffusion-for-obs/many_to_one-800.webp b/assets/img/2023-11-10-stable-diffusion-for-obs/many_to_one-800.webp
new file mode 100644
index 00000000..fb19979b
Binary files /dev/null and b/assets/img/2023-11-10-stable-diffusion-for-obs/many_to_one-800.webp differ
diff --git a/assets/img/2023-11-10-stable-diffusion-for-obs/obs_example-1400.webp b/assets/img/2023-11-10-stable-diffusion-for-obs/obs_example-1400.webp
new file mode 100644
index 00000000..2c1294c2
Binary files /dev/null and b/assets/img/2023-11-10-stable-diffusion-for-obs/obs_example-1400.webp differ
diff --git a/assets/img/2023-11-10-stable-diffusion-for-obs/obs_example-480.webp b/assets/img/2023-11-10-stable-diffusion-for-obs/obs_example-480.webp
new file mode 100644
index 00000000..e6556327
Binary files /dev/null and b/assets/img/2023-11-10-stable-diffusion-for-obs/obs_example-480.webp differ
diff --git a/assets/img/2023-11-10-stable-diffusion-for-obs/obs_example-800.webp b/assets/img/2023-11-10-stable-diffusion-for-obs/obs_example-800.webp
new file mode 100644
index 00000000..2c1294c2
Binary files /dev/null and b/assets/img/2023-11-10-stable-diffusion-for-obs/obs_example-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/Autoencoder-1400.webp b/assets/img/2023-11-10-superposition/Autoencoder-1400.webp
new file mode 100644
index 00000000..8e662ecb
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Autoencoder-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/Autoencoder-480.webp b/assets/img/2023-11-10-superposition/Autoencoder-480.webp
new file mode 100644
index 00000000..48ccf35c
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Autoencoder-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/Autoencoder-800.webp b/assets/img/2023-11-10-superposition/Autoencoder-800.webp
new file mode 100644
index 00000000..8e662ecb
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Autoencoder-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_gelu-1400.webp b/assets/img/2023-11-10-superposition/Sparsity_super_gelu-1400.webp
new file mode 100644
index 00000000..4cbdfdfc
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_gelu-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_gelu-480.webp b/assets/img/2023-11-10-superposition/Sparsity_super_gelu-480.webp
new file mode 100644
index 00000000..5278afef
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_gelu-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_gelu-800.webp b/assets/img/2023-11-10-superposition/Sparsity_super_gelu-800.webp
new file mode 100644
index 00000000..4cbdfdfc
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_gelu-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_relu-1400.webp b/assets/img/2023-11-10-superposition/Sparsity_super_relu-1400.webp
new file mode 100644
index 00000000..0747a89e
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_relu-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_relu-480.webp b/assets/img/2023-11-10-superposition/Sparsity_super_relu-480.webp
new file mode 100644
index 00000000..e253f013
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_relu-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_relu-800.webp b/assets/img/2023-11-10-superposition/Sparsity_super_relu-800.webp
new file mode 100644
index 00000000..0747a89e
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_relu-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-1400.webp b/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-1400.webp
new file mode 100644
index 00000000..b838f04a
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-480.webp b/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-480.webp
new file mode 100644
index 00000000..1f576ebe
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-800.webp b/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-800.webp
new file mode 100644
index 00000000..b838f04a
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_silu-1400.webp b/assets/img/2023-11-10-superposition/Sparsity_super_silu-1400.webp
new file mode 100644
index 00000000..edabe807
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_silu-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_silu-480.webp b/assets/img/2023-11-10-superposition/Sparsity_super_silu-480.webp
new file mode 100644
index 00000000..51b0bdcf
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_silu-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_silu-800.webp b/assets/img/2023-11-10-superposition/Sparsity_super_silu-800.webp
new file mode 100644
index 00000000..edabe807
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_silu-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_solu-1400.webp b/assets/img/2023-11-10-superposition/Sparsity_super_solu-1400.webp
new file mode 100644
index 00000000..b35d067b
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_solu-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_solu-480.webp b/assets/img/2023-11-10-superposition/Sparsity_super_solu-480.webp
new file mode 100644
index 00000000..ab36f056
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_solu-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_solu-800.webp b/assets/img/2023-11-10-superposition/Sparsity_super_solu-800.webp
new file mode 100644
index 00000000..b35d067b
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_solu-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_tanh-1400.webp b/assets/img/2023-11-10-superposition/Sparsity_super_tanh-1400.webp
new file mode 100644
index 00000000..808c5a62
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_tanh-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_tanh-480.webp b/assets/img/2023-11-10-superposition/Sparsity_super_tanh-480.webp
new file mode 100644
index 00000000..d71ffc80
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_tanh-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/Sparsity_super_tanh-800.webp b/assets/img/2023-11-10-superposition/Sparsity_super_tanh-800.webp
new file mode 100644
index 00000000..808c5a62
Binary files /dev/null and b/assets/img/2023-11-10-superposition/Sparsity_super_tanh-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/feature_visual-1400.webp b/assets/img/2023-11-10-superposition/feature_visual-1400.webp
new file mode 100644
index 00000000..bac8e826
Binary files /dev/null and b/assets/img/2023-11-10-superposition/feature_visual-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/feature_visual-480.webp b/assets/img/2023-11-10-superposition/feature_visual-480.webp
new file mode 100644
index 00000000..2dee938a
Binary files /dev/null and b/assets/img/2023-11-10-superposition/feature_visual-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/feature_visual-800.webp b/assets/img/2023-11-10-superposition/feature_visual-800.webp
new file mode 100644
index 00000000..bac8e826
Binary files /dev/null and b/assets/img/2023-11-10-superposition/feature_visual-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/features_hidden-1400.webp b/assets/img/2023-11-10-superposition/features_hidden-1400.webp
new file mode 100644
index 00000000..ee5e9b18
Binary files /dev/null and b/assets/img/2023-11-10-superposition/features_hidden-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/features_hidden-480.webp b/assets/img/2023-11-10-superposition/features_hidden-480.webp
new file mode 100644
index 00000000..56933ddd
Binary files /dev/null and b/assets/img/2023-11-10-superposition/features_hidden-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/features_hidden-800.webp b/assets/img/2023-11-10-superposition/features_hidden-800.webp
new file mode 100644
index 00000000..ee5e9b18
Binary files /dev/null and b/assets/img/2023-11-10-superposition/features_hidden-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/grid_plot-1400.webp b/assets/img/2023-11-10-superposition/grid_plot-1400.webp
new file mode 100644
index 00000000..d86e8005
Binary files /dev/null and b/assets/img/2023-11-10-superposition/grid_plot-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/grid_plot-480.webp b/assets/img/2023-11-10-superposition/grid_plot-480.webp
new file mode 100644
index 00000000..93b95959
Binary files /dev/null and b/assets/img/2023-11-10-superposition/grid_plot-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/grid_plot-800.webp b/assets/img/2023-11-10-superposition/grid_plot-800.webp
new file mode 100644
index 00000000..d86e8005
Binary files /dev/null and b/assets/img/2023-11-10-superposition/grid_plot-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/input_batch_50-1400.webp b/assets/img/2023-11-10-superposition/input_batch_50-1400.webp
new file mode 100644
index 00000000..756b30ba
Binary files /dev/null and b/assets/img/2023-11-10-superposition/input_batch_50-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/input_batch_50-480.webp b/assets/img/2023-11-10-superposition/input_batch_50-480.webp
new file mode 100644
index 00000000..342a928e
Binary files /dev/null and b/assets/img/2023-11-10-superposition/input_batch_50-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/input_batch_50-800.webp b/assets/img/2023-11-10-superposition/input_batch_50-800.webp
new file mode 100644
index 00000000..756b30ba
Binary files /dev/null and b/assets/img/2023-11-10-superposition/input_batch_50-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/input_batch_99-1400.webp b/assets/img/2023-11-10-superposition/input_batch_99-1400.webp
new file mode 100644
index 00000000..3789b598
Binary files /dev/null and b/assets/img/2023-11-10-superposition/input_batch_99-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/input_batch_99-480.webp b/assets/img/2023-11-10-superposition/input_batch_99-480.webp
new file mode 100644
index 00000000..c199310f
Binary files /dev/null and b/assets/img/2023-11-10-superposition/input_batch_99-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/input_batch_99-800.webp b/assets/img/2023-11-10-superposition/input_batch_99-800.webp
new file mode 100644
index 00000000..3789b598
Binary files /dev/null and b/assets/img/2023-11-10-superposition/input_batch_99-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/input_batches-1400.webp b/assets/img/2023-11-10-superposition/input_batches-1400.webp
new file mode 100644
index 00000000..f58efbdd
Binary files /dev/null and b/assets/img/2023-11-10-superposition/input_batches-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/input_batches-480.webp b/assets/img/2023-11-10-superposition/input_batches-480.webp
new file mode 100644
index 00000000..2cc418a2
Binary files /dev/null and b/assets/img/2023-11-10-superposition/input_batches-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/input_batches-800.webp b/assets/img/2023-11-10-superposition/input_batches-800.webp
new file mode 100644
index 00000000..f58efbdd
Binary files /dev/null and b/assets/img/2023-11-10-superposition/input_batches-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/legend-1400.webp b/assets/img/2023-11-10-superposition/legend-1400.webp
new file mode 100644
index 00000000..16501667
Binary files /dev/null and b/assets/img/2023-11-10-superposition/legend-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/legend-480.webp b/assets/img/2023-11-10-superposition/legend-480.webp
new file mode 100644
index 00000000..04b9daf6
Binary files /dev/null and b/assets/img/2023-11-10-superposition/legend-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/legend-800.webp b/assets/img/2023-11-10-superposition/legend-800.webp
new file mode 100644
index 00000000..16501667
Binary files /dev/null and b/assets/img/2023-11-10-superposition/legend-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_gelu-1400.webp b/assets/img/2023-11-10-superposition/phase_51_gelu-1400.webp
new file mode 100644
index 00000000..f7771af2
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_gelu-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_gelu-480.webp b/assets/img/2023-11-10-superposition/phase_51_gelu-480.webp
new file mode 100644
index 00000000..07ba7420
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_gelu-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_gelu-800.webp b/assets/img/2023-11-10-superposition/phase_51_gelu-800.webp
new file mode 100644
index 00000000..f7771af2
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_gelu-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_relu-1400.webp b/assets/img/2023-11-10-superposition/phase_51_relu-1400.webp
new file mode 100644
index 00000000..3a531191
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_relu-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_relu-480.webp b/assets/img/2023-11-10-superposition/phase_51_relu-480.webp
new file mode 100644
index 00000000..3de0c327
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_relu-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_relu-800.webp b/assets/img/2023-11-10-superposition/phase_51_relu-800.webp
new file mode 100644
index 00000000..3a531191
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_relu-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_sigmoid-1400.webp b/assets/img/2023-11-10-superposition/phase_51_sigmoid-1400.webp
new file mode 100644
index 00000000..854b77c2
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_sigmoid-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_sigmoid-480.webp b/assets/img/2023-11-10-superposition/phase_51_sigmoid-480.webp
new file mode 100644
index 00000000..15efe977
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_sigmoid-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_sigmoid-800.webp b/assets/img/2023-11-10-superposition/phase_51_sigmoid-800.webp
new file mode 100644
index 00000000..854b77c2
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_sigmoid-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_silu-1400.webp b/assets/img/2023-11-10-superposition/phase_51_silu-1400.webp
new file mode 100644
index 00000000..6e51a34a
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_silu-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_silu-480.webp b/assets/img/2023-11-10-superposition/phase_51_silu-480.webp
new file mode 100644
index 00000000..15ce5dfc
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_silu-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_silu-800.webp b/assets/img/2023-11-10-superposition/phase_51_silu-800.webp
new file mode 100644
index 00000000..6e51a34a
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_silu-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_solu-1400.webp b/assets/img/2023-11-10-superposition/phase_51_solu-1400.webp
new file mode 100644
index 00000000..af423b58
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_solu-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_solu-480.webp b/assets/img/2023-11-10-superposition/phase_51_solu-480.webp
new file mode 100644
index 00000000..3fad700a
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_solu-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_solu-800.webp b/assets/img/2023-11-10-superposition/phase_51_solu-800.webp
new file mode 100644
index 00000000..af423b58
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_solu-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_tanh-1400.webp b/assets/img/2023-11-10-superposition/phase_51_tanh-1400.webp
new file mode 100644
index 00000000..5702681a
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_tanh-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_tanh-480.webp b/assets/img/2023-11-10-superposition/phase_51_tanh-480.webp
new file mode 100644
index 00000000..2e6afd7e
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_tanh-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/phase_51_tanh-800.webp b/assets/img/2023-11-10-superposition/phase_51_tanh-800.webp
new file mode 100644
index 00000000..5702681a
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phase_51_tanh-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/phaseplots-1400.webp b/assets/img/2023-11-10-superposition/phaseplots-1400.webp
new file mode 100644
index 00000000..3945d389
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phaseplots-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/phaseplots-480.webp b/assets/img/2023-11-10-superposition/phaseplots-480.webp
new file mode 100644
index 00000000..9cd106ef
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phaseplots-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/phaseplots-800.webp b/assets/img/2023-11-10-superposition/phaseplots-800.webp
new file mode 100644
index 00000000..3945d389
Binary files /dev/null and b/assets/img/2023-11-10-superposition/phaseplots-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/random_matrix-1400.webp b/assets/img/2023-11-10-superposition/random_matrix-1400.webp
new file mode 100644
index 00000000..87ccd75e
Binary files /dev/null and b/assets/img/2023-11-10-superposition/random_matrix-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/random_matrix-480.webp b/assets/img/2023-11-10-superposition/random_matrix-480.webp
new file mode 100644
index 00000000..344b434e
Binary files /dev/null and b/assets/img/2023-11-10-superposition/random_matrix-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/random_matrix-800.webp b/assets/img/2023-11-10-superposition/random_matrix-800.webp
new file mode 100644
index 00000000..87ccd75e
Binary files /dev/null and b/assets/img/2023-11-10-superposition/random_matrix-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/random_matrix_equation-1400.webp b/assets/img/2023-11-10-superposition/random_matrix_equation-1400.webp
new file mode 100644
index 00000000..365ec451
Binary files /dev/null and b/assets/img/2023-11-10-superposition/random_matrix_equation-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/random_matrix_equation-480.webp b/assets/img/2023-11-10-superposition/random_matrix_equation-480.webp
new file mode 100644
index 00000000..0dd7b62d
Binary files /dev/null and b/assets/img/2023-11-10-superposition/random_matrix_equation-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/random_matrix_equation-800.webp b/assets/img/2023-11-10-superposition/random_matrix_equation-800.webp
new file mode 100644
index 00000000..365ec451
Binary files /dev/null and b/assets/img/2023-11-10-superposition/random_matrix_equation-800.webp differ
diff --git a/assets/img/2023-11-10-superposition/random_matrix_latex-1400.webp b/assets/img/2023-11-10-superposition/random_matrix_latex-1400.webp
new file mode 100644
index 00000000..5f54f70c
Binary files /dev/null and b/assets/img/2023-11-10-superposition/random_matrix_latex-1400.webp differ
diff --git a/assets/img/2023-11-10-superposition/random_matrix_latex-480.webp b/assets/img/2023-11-10-superposition/random_matrix_latex-480.webp
new file mode 100644
index 00000000..dbd83156
Binary files /dev/null and b/assets/img/2023-11-10-superposition/random_matrix_latex-480.webp differ
diff --git a/assets/img/2023-11-10-superposition/random_matrix_latex-800.webp b/assets/img/2023-11-10-superposition/random_matrix_latex-800.webp
new file mode 100644
index 00000000..5f54f70c
Binary files /dev/null and b/assets/img/2023-11-10-superposition/random_matrix_latex-800.webp differ
diff --git a/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-1400.webp b/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-1400.webp
new file mode 100644
index 00000000..3070899e
Binary files /dev/null and b/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-1400.webp differ
diff --git a/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-480.webp b/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-480.webp
new file mode 100644
index 00000000..67e20e6d
Binary files /dev/null and b/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-480.webp differ
diff --git a/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-800.webp b/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-800.webp
new file mode 100644
index 00000000..3070899e
Binary files /dev/null and b/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-800.webp differ
diff --git a/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-1400.webp b/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-1400.webp
new file mode 100644
index 00000000..61a6ece1
Binary files /dev/null and b/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-1400.webp differ
diff --git a/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-480.webp b/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-480.webp
new file mode 100644
index 00000000..1b1441a6
Binary files /dev/null and b/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-480.webp differ
diff --git a/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-800.webp b/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-800.webp
new file mode 100644
index 00000000..61a6ece1
Binary files /dev/null and b/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-800.webp differ
diff --git a/assets/img/2023-11-10-transformer-elo-prediction/project-vis-1400.webp b/assets/img/2023-11-10-transformer-elo-prediction/project-vis-1400.webp
new file mode 100644
index 00000000..71588bf3
Binary files /dev/null and b/assets/img/2023-11-10-transformer-elo-prediction/project-vis-1400.webp differ
diff --git a/assets/img/2023-11-10-transformer-elo-prediction/project-vis-480.webp b/assets/img/2023-11-10-transformer-elo-prediction/project-vis-480.webp
new file mode 100644
index 00000000..155d4c5c
Binary files /dev/null and b/assets/img/2023-11-10-transformer-elo-prediction/project-vis-480.webp differ
diff --git a/assets/img/2023-11-10-transformer-elo-prediction/project-vis-800.webp b/assets/img/2023-11-10-transformer-elo-prediction/project-vis-800.webp
new file mode 100644
index 00000000..71588bf3
Binary files /dev/null and b/assets/img/2023-11-10-transformer-elo-prediction/project-vis-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result-1400.webp
new file mode 100644
index 00000000..4f1cb10e
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result-480.webp
new file mode 100644
index 00000000..5790a0fa
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result-800.webp
new file mode 100644
index 00000000..4f1cb10e
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output-1400.webp
new file mode 100644
index 00000000..1a7320ec
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output-480.webp
new file mode 100644
index 00000000..dba2fece
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output-800.webp
new file mode 100644
index 00000000..1a7320ec
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright-1400.webp
new file mode 100644
index 00000000..2abdf156
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright-480.webp
new file mode 100644
index 00000000..978314c8
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright-800.webp
new file mode 100644
index 00000000..2abdf156
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim-1400.webp
new file mode 100644
index 00000000..6f0cabf6
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim-480.webp
new file mode 100644
index 00000000..c8d04312
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim-800.webp
new file mode 100644
index 00000000..6f0cabf6
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview-1400.webp
new file mode 100644
index 00000000..985deb70
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview-480.webp
new file mode 100644
index 00000000..a34eed18
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview-800.webp
new file mode 100644
index 00000000..985deb70
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch-1400.webp
new file mode 100644
index 00000000..8fd0e5b5
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch-480.webp
new file mode 100644
index 00000000..50b22545
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch-800.webp
new file mode 100644
index 00000000..8fd0e5b5
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample-1400.webp
new file mode 100644
index 00000000..45e43778
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample-480.webp
new file mode 100644
index 00000000..23386fc8
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample-800.webp
new file mode 100644
index 00000000..45e43778
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new-1400.webp
new file mode 100644
index 00000000..1ad7b4d3
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new-480.webp
new file mode 100644
index 00000000..29537f4f
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new-800.webp
new file mode 100644
index 00000000..1ad7b4d3
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig-1400.webp
new file mode 100644
index 00000000..63a1de07
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig-480.webp
new file mode 100644
index 00000000..d4987b30
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig-800.webp
new file mode 100644
index 00000000..63a1de07
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig-800.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result-1400.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result-1400.webp
new file mode 100644
index 00000000..b9c4c7c7
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result-1400.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result-480.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result-480.webp
new file mode 100644
index 00000000..959c1d4d
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result-480.webp differ
diff --git a/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result-800.webp b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result-800.webp
new file mode 100644
index 00000000..b9c4c7c7
Binary files /dev/null and b/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-1400.webp b/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-1400.webp
new file mode 100644
index 00000000..4a3aef1f
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-480.webp b/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-480.webp
new file mode 100644
index 00000000..91cccf1c
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-800.webp b/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-800.webp
new file mode 100644
index 00000000..4a3aef1f
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-1400.webp b/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-1400.webp
new file mode 100644
index 00000000..02bb902d
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-480.webp b/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-480.webp
new file mode 100644
index 00000000..9f8d76b4
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-800.webp b/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-800.webp
new file mode 100644
index 00000000..02bb902d
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-1400.webp b/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-1400.webp
new file mode 100644
index 00000000..c513b52c
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-480.webp b/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-480.webp
new file mode 100644
index 00000000..559f2f13
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-800.webp b/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-800.webp
new file mode 100644
index 00000000..c513b52c
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-1400.webp b/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-1400.webp
new file mode 100644
index 00000000..e7ff6e35
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-480.webp b/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-480.webp
new file mode 100644
index 00000000..e1e281aa
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-800.webp b/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-800.webp
new file mode 100644
index 00000000..e7ff6e35
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-1400.webp b/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-1400.webp
new file mode 100644
index 00000000..4b7059d3
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-480.webp b/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-480.webp
new file mode 100644
index 00000000..6f033f0a
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-800.webp b/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-800.webp
new file mode 100644
index 00000000..4b7059d3
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-1400.webp b/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-1400.webp
new file mode 100644
index 00000000..ee4c4b1c
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-480.webp b/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-480.webp
new file mode 100644
index 00000000..c898e328
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-800.webp b/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-800.webp
new file mode 100644
index 00000000..ee4c4b1c
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-1400.webp b/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-1400.webp
new file mode 100644
index 00000000..0ef6b257
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-480.webp b/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-480.webp
new file mode 100644
index 00000000..68195c63
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-800.webp b/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-800.webp
new file mode 100644
index 00000000..0ef6b257
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-1400.webp b/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-1400.webp
new file mode 100644
index 00000000..e5ebdc29
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-480.webp b/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-480.webp
new file mode 100644
index 00000000..14a8a0aa
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-800.webp b/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-800.webp
new file mode 100644
index 00000000..e5ebdc29
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-1400.webp b/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-1400.webp
new file mode 100644
index 00000000..05f7021c
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-480.webp b/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-480.webp
new file mode 100644
index 00000000..0dcf1124
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-800.webp b/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-800.webp
new file mode 100644
index 00000000..05f7021c
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-800.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-1400.webp b/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-1400.webp
new file mode 100644
index 00000000..aaf07584
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-1400.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-480.webp b/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-480.webp
new file mode 100644
index 00000000..a38cd70d
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-480.webp differ
diff --git a/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-800.webp b/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-800.webp
new file mode 100644
index 00000000..aaf07584
Binary files /dev/null and b/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-800.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig1-1400.webp b/assets/img/2023-11-11-physics-loss/fig1-1400.webp
new file mode 100644
index 00000000..806ed852
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig1-1400.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig1-480.webp b/assets/img/2023-11-11-physics-loss/fig1-480.webp
new file mode 100644
index 00000000..bc1e8591
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig1-480.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig1-800.webp b/assets/img/2023-11-11-physics-loss/fig1-800.webp
new file mode 100644
index 00000000..806ed852
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig1-800.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig2-1400.webp b/assets/img/2023-11-11-physics-loss/fig2-1400.webp
new file mode 100644
index 00000000..f97e888b
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig2-1400.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig2-480.webp b/assets/img/2023-11-11-physics-loss/fig2-480.webp
new file mode 100644
index 00000000..85c05b29
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig2-480.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig2-800.webp b/assets/img/2023-11-11-physics-loss/fig2-800.webp
new file mode 100644
index 00000000..f97e888b
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig2-800.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig3-1400.webp b/assets/img/2023-11-11-physics-loss/fig3-1400.webp
new file mode 100644
index 00000000..ede7a422
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig3-1400.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig3-480.webp b/assets/img/2023-11-11-physics-loss/fig3-480.webp
new file mode 100644
index 00000000..1e90f90d
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig3-480.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig3-800.webp b/assets/img/2023-11-11-physics-loss/fig3-800.webp
new file mode 100644
index 00000000..ede7a422
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig3-800.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig4-1400.webp b/assets/img/2023-11-11-physics-loss/fig4-1400.webp
new file mode 100644
index 00000000..2c318976
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig4-1400.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig4-480.webp b/assets/img/2023-11-11-physics-loss/fig4-480.webp
new file mode 100644
index 00000000..0b2bec79
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig4-480.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig4-800.webp b/assets/img/2023-11-11-physics-loss/fig4-800.webp
new file mode 100644
index 00000000..2c318976
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig4-800.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig4b-1400.webp b/assets/img/2023-11-11-physics-loss/fig4b-1400.webp
new file mode 100644
index 00000000..6eecdc9a
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig4b-1400.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig4b-480.webp b/assets/img/2023-11-11-physics-loss/fig4b-480.webp
new file mode 100644
index 00000000..e069b412
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig4b-480.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig4b-800.webp b/assets/img/2023-11-11-physics-loss/fig4b-800.webp
new file mode 100644
index 00000000..6eecdc9a
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig4b-800.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig5a-1400.webp b/assets/img/2023-11-11-physics-loss/fig5a-1400.webp
new file mode 100644
index 00000000..bf5944ed
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig5a-1400.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig5a-480.webp b/assets/img/2023-11-11-physics-loss/fig5a-480.webp
new file mode 100644
index 00000000..660f591e
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig5a-480.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig5a-800.webp b/assets/img/2023-11-11-physics-loss/fig5a-800.webp
new file mode 100644
index 00000000..bf5944ed
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig5a-800.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig5b-1400.webp b/assets/img/2023-11-11-physics-loss/fig5b-1400.webp
new file mode 100644
index 00000000..cb669227
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig5b-1400.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig5b-480.webp b/assets/img/2023-11-11-physics-loss/fig5b-480.webp
new file mode 100644
index 00000000..93ea5b23
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig5b-480.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig5b-800.webp b/assets/img/2023-11-11-physics-loss/fig5b-800.webp
new file mode 100644
index 00000000..cb669227
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig5b-800.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig6-1400.webp b/assets/img/2023-11-11-physics-loss/fig6-1400.webp
new file mode 100644
index 00000000..e4a9a44c
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig6-1400.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig6-480.webp b/assets/img/2023-11-11-physics-loss/fig6-480.webp
new file mode 100644
index 00000000..7cf9268c
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig6-480.webp differ
diff --git a/assets/img/2023-11-11-physics-loss/fig6-800.webp b/assets/img/2023-11-11-physics-loss/fig6-800.webp
new file mode 100644
index 00000000..e4a9a44c
Binary files /dev/null and b/assets/img/2023-11-11-physics-loss/fig6-800.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-1400.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-1400.webp
new file mode 100644
index 00000000..f1cb9f35
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-1400.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-480.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-480.webp
new file mode 100644
index 00000000..efe2df36
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-480.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-800.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-800.webp
new file mode 100644
index 00000000..f1cb9f35
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-800.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-1400.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-1400.webp
new file mode 100644
index 00000000..8738a20d
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-1400.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-480.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-480.webp
new file mode 100644
index 00000000..28406c06
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-480.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-800.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-800.webp
new file mode 100644
index 00000000..8738a20d
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-800.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-1400.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-1400.webp
new file mode 100644
index 00000000..0c3e0101
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-1400.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-480.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-480.webp
new file mode 100644
index 00000000..e21dae73
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-480.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-800.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-800.webp
new file mode 100644
index 00000000..0c3e0101
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-800.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-1400.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-1400.webp
new file mode 100644
index 00000000..750b5bc9
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-1400.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-480.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-480.webp
new file mode 100644
index 00000000..28a8bdc0
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-480.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-800.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-800.webp
new file mode 100644
index 00000000..750b5bc9
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-800.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-1400.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-1400.webp
new file mode 100644
index 00000000..9f9c377e
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-1400.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-480.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-480.webp
new file mode 100644
index 00000000..3a9761ec
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-480.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-800.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-800.webp
new file mode 100644
index 00000000..9f9c377e
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-800.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-1400.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-1400.webp
new file mode 100644
index 00000000..e3c5ccb9
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-1400.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-480.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-480.webp
new file mode 100644
index 00000000..eada8ab2
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-480.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-800.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-800.webp
new file mode 100644
index 00000000..e3c5ccb9
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-800.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-1400.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-1400.webp
new file mode 100644
index 00000000..025d0980
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-1400.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-480.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-480.webp
new file mode 100644
index 00000000..f7fc78b6
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-480.webp differ
diff --git a/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-800.webp b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-800.webp
new file mode 100644
index 00000000..025d0980
Binary files /dev/null and b/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-800.webp differ
diff --git a/assets/img/2023-11-16-speculative-decoding/Algorithm1-1400.webp b/assets/img/2023-11-16-speculative-decoding/Algorithm1-1400.webp
new file mode 100644
index 00000000..ea644e9f
Binary files /dev/null and b/assets/img/2023-11-16-speculative-decoding/Algorithm1-1400.webp differ
diff --git a/assets/img/2023-11-16-speculative-decoding/Algorithm1-480.webp b/assets/img/2023-11-16-speculative-decoding/Algorithm1-480.webp
new file mode 100644
index 00000000..1c032e20
Binary files /dev/null and b/assets/img/2023-11-16-speculative-decoding/Algorithm1-480.webp differ
diff --git a/assets/img/2023-11-16-speculative-decoding/Algorithm1-800.webp b/assets/img/2023-11-16-speculative-decoding/Algorithm1-800.webp
new file mode 100644
index 00000000..ea644e9f
Binary files /dev/null and b/assets/img/2023-11-16-speculative-decoding/Algorithm1-800.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-1400.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-1400.webp
new file mode 100644
index 00000000..0ec55fa5
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-1400.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-480.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-480.webp
new file mode 100644
index 00000000..4b52abb7
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-480.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-800.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-800.webp
new file mode 100644
index 00000000..0ec55fa5
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-800.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw-1400.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw-1400.webp
new file mode 100644
index 00000000..49999501
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw-1400.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw-480.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw-480.webp
new file mode 100644
index 00000000..942a53d2
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw-480.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw-800.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw-800.webp
new file mode 100644
index 00000000..49999501
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_raw-800.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency-1400.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency-1400.webp
new file mode 100644
index 00000000..5c58c5b3
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency-1400.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency-480.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency-480.webp
new file mode 100644
index 00000000..080057a6
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency-480.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency-800.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency-800.webp
new file mode 100644
index 00000000..5c58c5b3
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/jar_saliency-800.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw-1400.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw-1400.webp
new file mode 100644
index 00000000..0e024310
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw-1400.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw-480.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw-480.webp
new file mode 100644
index 00000000..c728ea51
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw-480.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw-800.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw-800.webp
new file mode 100644
index 00000000..0e024310
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_raw-800.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency-1400.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency-1400.webp
new file mode 100644
index 00000000..29eea45a
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency-1400.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency-480.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency-480.webp
new file mode 100644
index 00000000..bf2901ef
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency-480.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency-800.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency-800.webp
new file mode 100644
index 00000000..29eea45a
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/mug_saliency-800.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1-1400.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1-1400.webp
new file mode 100644
index 00000000..e31f08a0
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1-1400.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1-480.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1-480.webp
new file mode 100644
index 00000000..59148abb
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1-480.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1-800.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1-800.webp
new file mode 100644
index 00000000..e31f08a0
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table1-800.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2-1400.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2-1400.webp
new file mode 100644
index 00000000..d273542e
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2-1400.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2-480.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2-480.webp
new file mode 100644
index 00000000..d74b241c
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2-480.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2-800.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2-800.webp
new file mode 100644
index 00000000..d273542e
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table2-800.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3-1400.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3-1400.webp
new file mode 100644
index 00000000..6fc44c92
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3-1400.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3-480.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3-480.webp
new file mode 100644
index 00000000..5b326c80
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3-480.webp differ
diff --git a/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3-800.webp b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3-800.webp
new file mode 100644
index 00000000..6fc44c92
Binary files /dev/null and b/assets/img/2023-11-9-alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout/table3-800.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-1400.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-1400.webp
new file mode 100644
index 00000000..a9e9050a
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-1400.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-480.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-480.webp
new file mode 100644
index 00000000..ba031eba
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-480.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-800.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-800.webp
new file mode 100644
index 00000000..a9e9050a
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-800.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-1400.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-1400.webp
new file mode 100644
index 00000000..7c0c6e57
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-1400.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-480.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-480.webp
new file mode 100644
index 00000000..2d475881
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-480.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-800.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-800.webp
new file mode 100644
index 00000000..7c0c6e57
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-800.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-1400.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-1400.webp
new file mode 100644
index 00000000..6797d5ed
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-1400.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-480.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-480.webp
new file mode 100644
index 00000000..8bf79b9b
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-480.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-800.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-800.webp
new file mode 100644
index 00000000..6797d5ed
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-800.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-1400.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-1400.webp
new file mode 100644
index 00000000..817528be
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-1400.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-480.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-480.webp
new file mode 100644
index 00000000..7fb18552
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-480.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-800.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-800.webp
new file mode 100644
index 00000000..817528be
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-800.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-1400.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-1400.webp
new file mode 100644
index 00000000..eb256c72
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-1400.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-480.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-480.webp
new file mode 100644
index 00000000..8451e2c1
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-480.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-800.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-800.webp
new file mode 100644
index 00000000..eb256c72
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-800.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-1400.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-1400.webp
new file mode 100644
index 00000000..bf2d4bcc
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-1400.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-480.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-480.webp
new file mode 100644
index 00000000..4cc7cfe3
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-480.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-800.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-800.webp
new file mode 100644
index 00000000..bf2d4bcc
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-800.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-1400.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-1400.webp
new file mode 100644
index 00000000..de88ca49
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-1400.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-480.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-480.webp
new file mode 100644
index 00000000..617d0dbd
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-480.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-800.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-800.webp
new file mode 100644
index 00000000..de88ca49
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-800.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-1400.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-1400.webp
new file mode 100644
index 00000000..7aac1c89
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-1400.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-480.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-480.webp
new file mode 100644
index 00000000..bddf13e6
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-480.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-800.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-800.webp
new file mode 100644
index 00000000..7aac1c89
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-800.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-1400.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-1400.webp
new file mode 100644
index 00000000..8afbea5a
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-1400.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-480.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-480.webp
new file mode 100644
index 00000000..8bbf6833
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-480.webp differ
diff --git a/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-800.webp b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-800.webp
new file mode 100644
index 00000000..8afbea5a
Binary files /dev/null and b/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-800.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d-1400.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d-1400.webp
new file mode 100644
index 00000000..b5cca9f8
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d-1400.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d-480.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d-480.webp
new file mode 100644
index 00000000..391cb62d
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d-480.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d-800.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d-800.webp
new file mode 100644
index 00000000..b5cca9f8
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/2dvs3d-800.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation-1400.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation-1400.webp
new file mode 100644
index 00000000..f68e4499
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation-1400.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation-480.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation-480.webp
new file mode 100644
index 00000000..d3b65f4d
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation-480.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation-800.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation-800.webp
new file mode 100644
index 00000000..f68e4499
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Interpolation-800.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing-1400.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing-1400.webp
new file mode 100644
index 00000000..95fcc6b7
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing-1400.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing-480.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing-480.webp
new file mode 100644
index 00000000..befc7645
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing-480.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing-800.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing-800.webp
new file mode 100644
index 00000000..95fcc6b7
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/Post-curing-800.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel-1400.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel-1400.webp
new file mode 100644
index 00000000..f9c7661d
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel-1400.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel-480.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel-480.webp
new file mode 100644
index 00000000..344e6e09
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel-480.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel-800.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel-800.webp
new file mode 100644
index 00000000..f9c7661d
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/PrbabilisticModel-800.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER-1400.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER-1400.webp
new file mode 100644
index 00000000..65e7ab11
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER-1400.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER-480.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER-480.webp
new file mode 100644
index 00000000..86f2d0f5
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER-480.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER-800.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER-800.webp
new file mode 100644
index 00000000..65e7ab11
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/SIREN_DECODER-800.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE-1400.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE-1400.webp
new file mode 100644
index 00000000..96b0ef27
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE-1400.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE-480.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE-480.webp
new file mode 100644
index 00000000..d41e3578
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE-480.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE-800.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE-800.webp
new file mode 100644
index 00000000..96b0ef27
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/TEMPERATURE-800.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline-1400.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline-1400.webp
new file mode 100644
index 00000000..ebae4c71
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline-1400.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline-480.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline-480.webp
new file mode 100644
index 00000000..bfad1bb0
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline-480.webp differ
diff --git a/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline-800.webp b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline-800.webp
new file mode 100644
index 00000000..ebae4c71
Binary files /dev/null and b/assets/img/2023-12-09-Alive Scene Enchanting images with Semantic Embedding/pipeline-800.webp differ
diff --git a/assets/img/2023-12-12-Autoencoder-1400.webp b/assets/img/2023-12-12-Autoencoder-1400.webp
new file mode 100644
index 00000000..0edb09be
Binary files /dev/null and b/assets/img/2023-12-12-Autoencoder-1400.webp differ
diff --git a/assets/img/2023-12-12-Autoencoder-480.webp b/assets/img/2023-12-12-Autoencoder-480.webp
new file mode 100644
index 00000000..a3fd5089
Binary files /dev/null and b/assets/img/2023-12-12-Autoencoder-480.webp differ
diff --git a/assets/img/2023-12-12-Autoencoder-800.webp b/assets/img/2023-12-12-Autoencoder-800.webp
new file mode 100644
index 00000000..0edb09be
Binary files /dev/null and b/assets/img/2023-12-12-Autoencoder-800.webp differ
diff --git a/assets/img/2023-12-12-Comparison-1400.webp b/assets/img/2023-12-12-Comparison-1400.webp
new file mode 100644
index 00000000..03e32a4a
Binary files /dev/null and b/assets/img/2023-12-12-Comparison-1400.webp differ
diff --git a/assets/img/2023-12-12-Comparison-480.webp b/assets/img/2023-12-12-Comparison-480.webp
new file mode 100644
index 00000000..a0f4da08
Binary files /dev/null and b/assets/img/2023-12-12-Comparison-480.webp differ
diff --git a/assets/img/2023-12-12-Comparison-800.webp b/assets/img/2023-12-12-Comparison-800.webp
new file mode 100644
index 00000000..03e32a4a
Binary files /dev/null and b/assets/img/2023-12-12-Comparison-800.webp differ
diff --git a/assets/img/2023-12-12-FinalArch-1400.webp b/assets/img/2023-12-12-FinalArch-1400.webp
new file mode 100644
index 00000000..fb4743b8
Binary files /dev/null and b/assets/img/2023-12-12-FinalArch-1400.webp differ
diff --git a/assets/img/2023-12-12-FinalArch-480.webp b/assets/img/2023-12-12-FinalArch-480.webp
new file mode 100644
index 00000000..6593a35d
Binary files /dev/null and b/assets/img/2023-12-12-FinalArch-480.webp differ
diff --git a/assets/img/2023-12-12-FinalArch-800.webp b/assets/img/2023-12-12-FinalArch-800.webp
new file mode 100644
index 00000000..fb4743b8
Binary files /dev/null and b/assets/img/2023-12-12-FinalArch-800.webp differ
diff --git a/assets/img/2023-12-12-HMN-1400.webp b/assets/img/2023-12-12-HMN-1400.webp
new file mode 100644
index 00000000..7551c564
Binary files /dev/null and b/assets/img/2023-12-12-HMN-1400.webp differ
diff --git a/assets/img/2023-12-12-HMN-480.webp b/assets/img/2023-12-12-HMN-480.webp
new file mode 100644
index 00000000..817237ef
Binary files /dev/null and b/assets/img/2023-12-12-HMN-480.webp differ
diff --git a/assets/img/2023-12-12-HMN-800.webp b/assets/img/2023-12-12-HMN-800.webp
new file mode 100644
index 00000000..7551c564
Binary files /dev/null and b/assets/img/2023-12-12-HMN-800.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae-1400.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae-1400.webp
new file mode 100644
index 00000000..678fbe85
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae-1400.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae-480.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae-480.webp
new file mode 100644
index 00000000..fc484c92
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae-480.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae-800.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae-800.webp
new file mode 100644
index 00000000..678fbe85
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae-800.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-1400.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-1400.webp
new file mode 100644
index 00000000..5621b999
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-1400.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-480.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-480.webp
new file mode 100644
index 00000000..77f46b34
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-480.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-800.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-800.webp
new file mode 100644
index 00000000..5621b999
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-800.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-1400.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-1400.webp
new file mode 100644
index 00000000..52176fb6
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-1400.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-480.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-480.webp
new file mode 100644
index 00000000..3f227814
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-480.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-800.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-800.webp
new file mode 100644
index 00000000..52176fb6
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-800.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-1400.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-1400.webp
new file mode 100644
index 00000000..dec6ab8a
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-1400.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-480.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-480.webp
new file mode 100644
index 00000000..0de1b181
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-480.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-800.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-800.webp
new file mode 100644
index 00000000..dec6ab8a
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-800.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/representation-1400.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/representation-1400.webp
new file mode 100644
index 00000000..2cc0aeb2
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/representation-1400.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/representation-480.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/representation-480.webp
new file mode 100644
index 00000000..77b94837
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/representation-480.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/representation-800.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/representation-800.webp
new file mode 100644
index 00000000..2cc0aeb2
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/representation-800.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/vae-1400.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/vae-1400.webp
new file mode 100644
index 00000000..2717d912
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/vae-1400.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/vae-480.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/vae-480.webp
new file mode 100644
index 00000000..1ce973f6
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/vae-480.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/vae-800.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/vae-800.webp
new file mode 100644
index 00000000..2717d912
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/vae-800.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-1400.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-1400.webp
new file mode 100644
index 00000000..095735bd
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-1400.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-480.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-480.webp
new file mode 100644
index 00000000..dfe9ba65
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-480.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-800.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-800.webp
new file mode 100644
index 00000000..095735bd
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-800.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-1400.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-1400.webp
new file mode 100644
index 00000000..3ab33ad8
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-1400.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-480.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-480.webp
new file mode 100644
index 00000000..8ae419d8
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-480.webp differ
diff --git a/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-800.webp b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-800.webp
new file mode 100644
index 00000000..3ab33ad8
Binary files /dev/null and b/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-800.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/LMC-1400.webp b/assets/img/2023-12-12-LinearMode/LMC-1400.webp
new file mode 100644
index 00000000..82efc042
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/LMC-1400.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/LMC-480.webp b/assets/img/2023-12-12-LinearMode/LMC-480.webp
new file mode 100644
index 00000000..13e99a33
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/LMC-480.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/LMC-800.webp b/assets/img/2023-12-12-LinearMode/LMC-800.webp
new file mode 100644
index 00000000..82efc042
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/LMC-800.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/instability-1400.webp b/assets/img/2023-12-12-LinearMode/instability-1400.webp
new file mode 100644
index 00000000..b2ec5532
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/instability-1400.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/instability-480.webp b/assets/img/2023-12-12-LinearMode/instability-480.webp
new file mode 100644
index 00000000..c0c11cc3
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/instability-480.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/instability-800.webp b/assets/img/2023-12-12-LinearMode/instability-800.webp
new file mode 100644
index 00000000..b2ec5532
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/instability-800.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/interpolation_loss-1400.webp b/assets/img/2023-12-12-LinearMode/interpolation_loss-1400.webp
new file mode 100644
index 00000000..f44a76c1
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/interpolation_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/interpolation_loss-480.webp b/assets/img/2023-12-12-LinearMode/interpolation_loss-480.webp
new file mode 100644
index 00000000..c4d53f6a
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/interpolation_loss-480.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/interpolation_loss-800.webp b/assets/img/2023-12-12-LinearMode/interpolation_loss-800.webp
new file mode 100644
index 00000000..f44a76c1
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/interpolation_loss-800.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-1400.webp b/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-1400.webp
new file mode 100644
index 00000000..5a242e55
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-1400.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-480.webp b/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-480.webp
new file mode 100644
index 00000000..568e1609
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-480.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-800.webp b/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-800.webp
new file mode 100644
index 00000000..5a242e55
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-800.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/sparsity20percent-1400.webp b/assets/img/2023-12-12-LinearMode/sparsity20percent-1400.webp
new file mode 100644
index 00000000..623db246
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/sparsity20percent-1400.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/sparsity20percent-480.webp b/assets/img/2023-12-12-LinearMode/sparsity20percent-480.webp
new file mode 100644
index 00000000..610134e3
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/sparsity20percent-480.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/sparsity20percent-800.webp b/assets/img/2023-12-12-LinearMode/sparsity20percent-800.webp
new file mode 100644
index 00000000..623db246
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/sparsity20percent-800.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/sparsity5percent-1400.webp b/assets/img/2023-12-12-LinearMode/sparsity5percent-1400.webp
new file mode 100644
index 00000000..08398267
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/sparsity5percent-1400.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/sparsity5percent-480.webp b/assets/img/2023-12-12-LinearMode/sparsity5percent-480.webp
new file mode 100644
index 00000000..06dc62ce
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/sparsity5percent-480.webp differ
diff --git a/assets/img/2023-12-12-LinearMode/sparsity5percent-800.webp b/assets/img/2023-12-12-LinearMode/sparsity5percent-800.webp
new file mode 100644
index 00000000..08398267
Binary files /dev/null and b/assets/img/2023-12-12-LinearMode/sparsity5percent-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/Linear Projection Training Curve-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/Linear Projection Training Curve-1400.webp
new file mode 100644
index 00000000..4a1d5eb2
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/Linear Projection Training Curve-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/Linear Projection Training Curve-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/Linear Projection Training Curve-480.webp
new file mode 100644
index 00000000..3ca821f5
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/Linear Projection Training Curve-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/Linear Projection Training Curve-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/Linear Projection Training Curve-800.webp
new file mode 100644
index 00000000..4a1d5eb2
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/Linear Projection Training Curve-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set-1400.webp
new file mode 100644
index 00000000..37023810
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set-480.webp
new file mode 100644
index 00000000..8073c07a
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set-800.webp
new file mode 100644
index 00000000..37023810
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE generating on test set-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head-1400.webp
new file mode 100644
index 00000000..0f72b6a5
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head-480.webp
new file mode 100644
index 00000000..012b559e
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head-800.webp
new file mode 100644
index 00000000..0f72b6a5
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/MSE projection head-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/approximate MI-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/approximate MI-1400.webp
new file mode 100644
index 00000000..31ff4c0c
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/approximate MI-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/approximate MI-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/approximate MI-480.webp
new file mode 100644
index 00000000..216cbee0
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/approximate MI-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/approximate MI-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/approximate MI-800.webp
new file mode 100644
index 00000000..31ff4c0c
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/approximate MI-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1-1400.webp
new file mode 100644
index 00000000..0326d129
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1-480.webp
new file mode 100644
index 00000000..fc0f9d04
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1-800.webp
new file mode 100644
index 00000000..0326d129
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 1-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 10-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 10-1400.webp
new file mode 100644
index 00000000..67e11fb5
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 10-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 10-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 10-480.webp
new file mode 100644
index 00000000..76a5f4ae
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 10-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 10-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 10-800.webp
new file mode 100644
index 00000000..67e11fb5
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 10-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 5-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 5-1400.webp
new file mode 100644
index 00000000..bc539fe0
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 5-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 5-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 5-480.webp
new file mode 100644
index 00000000..aca05810
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 5-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 5-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 5-800.webp
new file mode 100644
index 00000000..bc539fe0
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta = 5-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1 mu=[0,2] train curves-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1 mu=[0,2] train curves-1400.webp
new file mode 100644
index 00000000..da5792e9
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1 mu=[0,2] train curves-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1 mu=[0,2] train curves-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1 mu=[0,2] train curves-480.webp
new file mode 100644
index 00000000..50730554
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1 mu=[0,2] train curves-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1 mu=[0,2] train curves-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1 mu=[0,2] train curves-800.webp
new file mode 100644
index 00000000..da5792e9
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1 mu=[0,2] train curves-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1, mu=[2] training curves-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1, mu=[2] training curves-1400.webp
new file mode 100644
index 00000000..ba8bb057
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1, mu=[2] training curves-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1, mu=[2] training curves-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1, mu=[2] training curves-480.webp
new file mode 100644
index 00000000..06e92338
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1, mu=[2] training curves-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1, mu=[2] training curves-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1, mu=[2] training curves-800.webp
new file mode 100644
index 00000000..ba8bb057
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/beta=1, mu=[2] training curves-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images-1400.webp
new file mode 100644
index 00000000..eccd571a
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images-480.webp
new file mode 100644
index 00000000..06cf5c0a
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images-800.webp
new file mode 100644
index 00000000..eccd571a
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example test images-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised-1400.webp
new file mode 100644
index 00000000..68f33762
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised-480.webp
new file mode 100644
index 00000000..5cdc0481
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised-800.webp
new file mode 100644
index 00000000..68f33762
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training noised-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training-1400.webp
new file mode 100644
index 00000000..7afecb48
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training-480.webp
new file mode 100644
index 00000000..dee35af7
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training-800.webp
new file mode 100644
index 00000000..7afecb48
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/example training-800.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves-1400.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves-1400.webp
new file mode 100644
index 00000000..181f0e77
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves-1400.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves-480.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves-480.webp
new file mode 100644
index 00000000..8e83258f
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves-480.webp differ
diff --git a/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves-800.webp b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves-800.webp
new file mode 100644
index 00000000..181f0e77
Binary files /dev/null and b/assets/img/2023-12-12-Recovering Latent Variables with VAEs despite Training Bias/mu=[0] training curves-800.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-1400.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-1400.webp
new file mode 100644
index 00000000..529c427f
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-1400.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-480.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-480.webp
new file mode 100644
index 00000000..f2449b87
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-480.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-800.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-800.webp
new file mode 100644
index 00000000..529c427f
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-800.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-1400.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-1400.webp
new file mode 100644
index 00000000..525addad
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-1400.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-480.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-480.webp
new file mode 100644
index 00000000..62bbc43b
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-480.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-800.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-800.webp
new file mode 100644
index 00000000..525addad
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-800.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-1400.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-1400.webp
new file mode 100644
index 00000000..358e390f
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-1400.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-480.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-480.webp
new file mode 100644
index 00000000..d430b7c6
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-480.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-800.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-800.webp
new file mode 100644
index 00000000..358e390f
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-800.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-1400.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-1400.webp
new file mode 100644
index 00000000..5041d018
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-1400.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-480.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-480.webp
new file mode 100644
index 00000000..eda42a15
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-480.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-800.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-800.webp
new file mode 100644
index 00000000..5041d018
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-800.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-1400.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-1400.webp
new file mode 100644
index 00000000..6fc3195b
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-1400.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-480.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-480.webp
new file mode 100644
index 00000000..ce68402c
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-480.webp differ
diff --git a/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-800.webp b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-800.webp
new file mode 100644
index 00000000..6fc3195b
Binary files /dev/null and b/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Enc_dec-1400.webp b/assets/img/2023-12-12-VIVFormer/Enc_dec-1400.webp
new file mode 100644
index 00000000..07b2ab85
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Enc_dec-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Enc_dec-480.webp b/assets/img/2023-12-12-VIVFormer/Enc_dec-480.webp
new file mode 100644
index 00000000..ffa1ffd0
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Enc_dec-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Enc_dec-800.webp b/assets/img/2023-12-12-VIVFormer/Enc_dec-800.webp
new file mode 100644
index 00000000..07b2ab85
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Enc_dec-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Forecasting_real-1400.webp b/assets/img/2023-12-12-VIVFormer/Forecasting_real-1400.webp
new file mode 100644
index 00000000..d848d88e
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Forecasting_real-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Forecasting_real-480.webp b/assets/img/2023-12-12-VIVFormer/Forecasting_real-480.webp
new file mode 100644
index 00000000..47d5baf8
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Forecasting_real-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Forecasting_real-800.webp b/assets/img/2023-12-12-VIVFormer/Forecasting_real-800.webp
new file mode 100644
index 00000000..d848d88e
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Forecasting_real-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-1400.webp b/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-1400.webp
new file mode 100644
index 00000000..f873f0ff
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-480.webp b/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-480.webp
new file mode 100644
index 00000000..561b72ea
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-800.webp b/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-800.webp
new file mode 100644
index 00000000..f873f0ff
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/GOPR0050_1-1400.webp b/assets/img/2023-12-12-VIVFormer/GOPR0050_1-1400.webp
new file mode 100644
index 00000000..9b57f71e
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/GOPR0050_1-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/GOPR0050_1-480.webp b/assets/img/2023-12-12-VIVFormer/GOPR0050_1-480.webp
new file mode 100644
index 00000000..c394e9aa
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/GOPR0050_1-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/GOPR0050_1-800.webp b/assets/img/2023-12-12-VIVFormer/GOPR0050_1-800.webp
new file mode 100644
index 00000000..9b57f71e
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/GOPR0050_1-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Image_Processing-1400.webp b/assets/img/2023-12-12-VIVFormer/Image_Processing-1400.webp
new file mode 100644
index 00000000..d1739f94
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Image_Processing-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Image_Processing-480.webp b/assets/img/2023-12-12-VIVFormer/Image_Processing-480.webp
new file mode 100644
index 00000000..f734799f
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Image_Processing-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Image_Processing-800.webp b/assets/img/2023-12-12-VIVFormer/Image_Processing-800.webp
new file mode 100644
index 00000000..d1739f94
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Image_Processing-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Intro-1400.webp b/assets/img/2023-12-12-VIVFormer/Intro-1400.webp
new file mode 100644
index 00000000..75f37f95
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Intro-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Intro-480.webp b/assets/img/2023-12-12-VIVFormer/Intro-480.webp
new file mode 100644
index 00000000..8b5ede68
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Intro-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Intro-800.webp b/assets/img/2023-12-12-VIVFormer/Intro-800.webp
new file mode 100644
index 00000000..75f37f95
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Intro-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Intro2-1400.webp b/assets/img/2023-12-12-VIVFormer/Intro2-1400.webp
new file mode 100644
index 00000000..dbd5f932
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Intro2-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Intro2-480.webp b/assets/img/2023-12-12-VIVFormer/Intro2-480.webp
new file mode 100644
index 00000000..39103ac5
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Intro2-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Intro2-800.webp b/assets/img/2023-12-12-VIVFormer/Intro2-800.webp
new file mode 100644
index 00000000..dbd5f932
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Intro2-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/RMS_real-1400.webp b/assets/img/2023-12-12-VIVFormer/RMS_real-1400.webp
new file mode 100644
index 00000000..78bb5a2a
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/RMS_real-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/RMS_real-480.webp b/assets/img/2023-12-12-VIVFormer/RMS_real-480.webp
new file mode 100644
index 00000000..ee25e1fb
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/RMS_real-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/RMS_real-800.webp b/assets/img/2023-12-12-VIVFormer/RMS_real-800.webp
new file mode 100644
index 00000000..78bb5a2a
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/RMS_real-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/RMS_synthetic-1400.webp b/assets/img/2023-12-12-VIVFormer/RMS_synthetic-1400.webp
new file mode 100644
index 00000000..eff25156
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/RMS_synthetic-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/RMS_synthetic-480.webp b/assets/img/2023-12-12-VIVFormer/RMS_synthetic-480.webp
new file mode 100644
index 00000000..e4648a02
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/RMS_synthetic-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/RMS_synthetic-800.webp b/assets/img/2023-12-12-VIVFormer/RMS_synthetic-800.webp
new file mode 100644
index 00000000..eff25156
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/RMS_synthetic-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Transformer_architecture-1400.webp b/assets/img/2023-12-12-VIVFormer/Transformer_architecture-1400.webp
new file mode 100644
index 00000000..a3102ef4
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Transformer_architecture-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Transformer_architecture-480.webp b/assets/img/2023-12-12-VIVFormer/Transformer_architecture-480.webp
new file mode 100644
index 00000000..cf30061a
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Transformer_architecture-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/Transformer_architecture-800.webp b/assets/img/2023-12-12-VIVFormer/Transformer_architecture-800.webp
new file mode 100644
index 00000000..a3102ef4
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/Transformer_architecture-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/VAE-1400.webp b/assets/img/2023-12-12-VIVFormer/VAE-1400.webp
new file mode 100644
index 00000000..fab067dc
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/VAE-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/VAE-480.webp b/assets/img/2023-12-12-VIVFormer/VAE-480.webp
new file mode 100644
index 00000000..a177115b
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/VAE-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/VAE-800.webp b/assets/img/2023-12-12-VIVFormer/VAE-800.webp
new file mode 100644
index 00000000..fab067dc
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/VAE-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/VAE_loss-1400.webp b/assets/img/2023-12-12-VIVFormer/VAE_loss-1400.webp
new file mode 100644
index 00000000..ebb0b025
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/VAE_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/VAE_loss-480.webp b/assets/img/2023-12-12-VIVFormer/VAE_loss-480.webp
new file mode 100644
index 00000000..1e51a9a3
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/VAE_loss-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/VAE_loss-800.webp b/assets/img/2023-12-12-VIVFormer/VAE_loss-800.webp
new file mode 100644
index 00000000..ebb0b025
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/VAE_loss-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-1400.webp b/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-1400.webp
new file mode 100644
index 00000000..bb97ec9a
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-480.webp b/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-480.webp
new file mode 100644
index 00000000..1b504556
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-800.webp b/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-800.webp
new file mode 100644
index 00000000..bb97ec9a
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-1400.webp b/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-1400.webp
new file mode 100644
index 00000000..00305039
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-480.webp b/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-480.webp
new file mode 100644
index 00000000..421ce06f
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-800.webp b/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-800.webp
new file mode 100644
index 00000000..00305039
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-1400.webp b/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-1400.webp
new file mode 100644
index 00000000..c40d7bf0
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-480.webp b/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-480.webp
new file mode 100644
index 00000000..f7a2a942
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-800.webp b/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-800.webp
new file mode 100644
index 00000000..c40d7bf0
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/difference_real-1400.webp b/assets/img/2023-12-12-VIVFormer/difference_real-1400.webp
new file mode 100644
index 00000000..4c0c4539
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/difference_real-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/difference_real-480.webp b/assets/img/2023-12-12-VIVFormer/difference_real-480.webp
new file mode 100644
index 00000000..28de8bf5
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/difference_real-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/difference_real-800.webp b/assets/img/2023-12-12-VIVFormer/difference_real-800.webp
new file mode 100644
index 00000000..4c0c4539
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/difference_real-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/difference_synthetic-1400.webp b/assets/img/2023-12-12-VIVFormer/difference_synthetic-1400.webp
new file mode 100644
index 00000000..93be03b0
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/difference_synthetic-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/difference_synthetic-480.webp b/assets/img/2023-12-12-VIVFormer/difference_synthetic-480.webp
new file mode 100644
index 00000000..8dc7085f
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/difference_synthetic-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/difference_synthetic-800.webp b/assets/img/2023-12-12-VIVFormer/difference_synthetic-800.webp
new file mode 100644
index 00000000..93be03b0
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/difference_synthetic-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/enc-1400.webp b/assets/img/2023-12-12-VIVFormer/enc-1400.webp
new file mode 100644
index 00000000..7ede1fb6
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/enc-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/enc-480.webp b/assets/img/2023-12-12-VIVFormer/enc-480.webp
new file mode 100644
index 00000000..83be1f9c
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/enc-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/enc-800.webp b/assets/img/2023-12-12-VIVFormer/enc-800.webp
new file mode 100644
index 00000000..7ede1fb6
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/enc-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/ffn_module-1400.webp b/assets/img/2023-12-12-VIVFormer/ffn_module-1400.webp
new file mode 100644
index 00000000..aee12260
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/ffn_module-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/ffn_module-480.webp b/assets/img/2023-12-12-VIVFormer/ffn_module-480.webp
new file mode 100644
index 00000000..16e1c398
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/ffn_module-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/ffn_module-800.webp b/assets/img/2023-12-12-VIVFormer/ffn_module-800.webp
new file mode 100644
index 00000000..aee12260
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/ffn_module-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/full_schema_experiment-1400.webp b/assets/img/2023-12-12-VIVFormer/full_schema_experiment-1400.webp
new file mode 100644
index 00000000..bfeee6b5
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/full_schema_experiment-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/full_schema_experiment-480.webp b/assets/img/2023-12-12-VIVFormer/full_schema_experiment-480.webp
new file mode 100644
index 00000000..b030cfbf
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/full_schema_experiment-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/full_schema_experiment-800.webp b/assets/img/2023-12-12-VIVFormer/full_schema_experiment-800.webp
new file mode 100644
index 00000000..bfeee6b5
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/full_schema_experiment-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_1-1400.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_1-1400.webp
new file mode 100644
index 00000000..6026603d
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_1-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_1-480.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_1-480.webp
new file mode 100644
index 00000000..f5de1459
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_1-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_1-800.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_1-800.webp
new file mode 100644
index 00000000..6026603d
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_1-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_2-1400.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_2-1400.webp
new file mode 100644
index 00000000..1d15b414
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_2-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_2-480.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_2-480.webp
new file mode 100644
index 00000000..3c3df6e4
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_2-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_2-800.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_2-800.webp
new file mode 100644
index 00000000..1d15b414
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_2-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_3-1400.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_3-1400.webp
new file mode 100644
index 00000000..916ea2d5
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_3-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_3-480.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_3-480.webp
new file mode 100644
index 00000000..9ed2f24f
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_3-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_3-800.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_3-800.webp
new file mode 100644
index 00000000..916ea2d5
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_3-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_4-1400.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_4-1400.webp
new file mode 100644
index 00000000..26242d0d
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_4-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_4-480.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_4-480.webp
new file mode 100644
index 00000000..6825397e
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_4-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/generated_VAE_4-800.webp b/assets/img/2023-12-12-VIVFormer/generated_VAE_4-800.webp
new file mode 100644
index 00000000..26242d0d
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/generated_VAE_4-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/interpolated_viv-1400.webp b/assets/img/2023-12-12-VIVFormer/interpolated_viv-1400.webp
new file mode 100644
index 00000000..39473f63
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/interpolated_viv-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/interpolated_viv-480.webp b/assets/img/2023-12-12-VIVFormer/interpolated_viv-480.webp
new file mode 100644
index 00000000..c5e17b35
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/interpolated_viv-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/interpolated_viv-800.webp b/assets/img/2023-12-12-VIVFormer/interpolated_viv-800.webp
new file mode 100644
index 00000000..39473f63
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/interpolated_viv-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/mh_attn_module-1400.webp b/assets/img/2023-12-12-VIVFormer/mh_attn_module-1400.webp
new file mode 100644
index 00000000..76afe179
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/mh_attn_module-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/mh_attn_module-480.webp b/assets/img/2023-12-12-VIVFormer/mh_attn_module-480.webp
new file mode 100644
index 00000000..f252e582
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/mh_attn_module-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/mh_attn_module-800.webp b/assets/img/2023-12-12-VIVFormer/mh_attn_module-800.webp
new file mode 100644
index 00000000..76afe179
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/mh_attn_module-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/pixelated_viv-1400.webp b/assets/img/2023-12-12-VIVFormer/pixelated_viv-1400.webp
new file mode 100644
index 00000000..88fa3cd0
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/pixelated_viv-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/pixelated_viv-480.webp b/assets/img/2023-12-12-VIVFormer/pixelated_viv-480.webp
new file mode 100644
index 00000000..56241720
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/pixelated_viv-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/pixelated_viv-800.webp b/assets/img/2023-12-12-VIVFormer/pixelated_viv-800.webp
new file mode 100644
index 00000000..88fa3cd0
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/pixelated_viv-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-1400.webp b/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-1400.webp
new file mode 100644
index 00000000..b5fe591e
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-480.webp b/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-480.webp
new file mode 100644
index 00000000..1e23bbc4
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-800.webp b/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-800.webp
new file mode 100644
index 00000000..b5fe591e
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_viv_1-1400.webp b/assets/img/2023-12-12-VIVFormer/real_viv_1-1400.webp
new file mode 100644
index 00000000..bf91ef50
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_viv_1-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_viv_1-480.webp b/assets/img/2023-12-12-VIVFormer/real_viv_1-480.webp
new file mode 100644
index 00000000..7ff4fc35
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_viv_1-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_viv_1-800.webp b/assets/img/2023-12-12-VIVFormer/real_viv_1-800.webp
new file mode 100644
index 00000000..bf91ef50
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_viv_1-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_viv_2-1400.webp b/assets/img/2023-12-12-VIVFormer/real_viv_2-1400.webp
new file mode 100644
index 00000000..eafa24eb
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_viv_2-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_viv_2-480.webp b/assets/img/2023-12-12-VIVFormer/real_viv_2-480.webp
new file mode 100644
index 00000000..0a1e000e
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_viv_2-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_viv_2-800.webp b/assets/img/2023-12-12-VIVFormer/real_viv_2-800.webp
new file mode 100644
index 00000000..eafa24eb
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_viv_2-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_viv_3-1400.webp b/assets/img/2023-12-12-VIVFormer/real_viv_3-1400.webp
new file mode 100644
index 00000000..7f27b603
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_viv_3-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_viv_3-480.webp b/assets/img/2023-12-12-VIVFormer/real_viv_3-480.webp
new file mode 100644
index 00000000..d4817bab
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_viv_3-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/real_viv_3-800.webp b/assets/img/2023-12-12-VIVFormer/real_viv_3-800.webp
new file mode 100644
index 00000000..7f27b603
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/real_viv_3-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/res_attn_module-1400.webp b/assets/img/2023-12-12-VIVFormer/res_attn_module-1400.webp
new file mode 100644
index 00000000..7b1e18f9
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/res_attn_module-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/res_attn_module-480.webp b/assets/img/2023-12-12-VIVFormer/res_attn_module-480.webp
new file mode 100644
index 00000000..c6ffc293
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/res_attn_module-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/res_attn_module-800.webp b/assets/img/2023-12-12-VIVFormer/res_attn_module-800.webp
new file mode 100644
index 00000000..7b1e18f9
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/res_attn_module-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-1400.webp b/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-1400.webp
new file mode 100644
index 00000000..cf493fa5
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-480.webp b/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-480.webp
new file mode 100644
index 00000000..6b212c44
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-800.webp b/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-800.webp
new file mode 100644
index 00000000..cf493fa5
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/targets_real-1400.webp b/assets/img/2023-12-12-VIVFormer/targets_real-1400.webp
new file mode 100644
index 00000000..8660ffd3
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/targets_real-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/targets_real-480.webp b/assets/img/2023-12-12-VIVFormer/targets_real-480.webp
new file mode 100644
index 00000000..cea09bef
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/targets_real-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/targets_real-800.webp b/assets/img/2023-12-12-VIVFormer/targets_real-800.webp
new file mode 100644
index 00000000..8660ffd3
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/targets_real-800.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/targets_synthetic-1400.webp b/assets/img/2023-12-12-VIVFormer/targets_synthetic-1400.webp
new file mode 100644
index 00000000..5e3081b8
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/targets_synthetic-1400.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/targets_synthetic-480.webp b/assets/img/2023-12-12-VIVFormer/targets_synthetic-480.webp
new file mode 100644
index 00000000..6ad6ce09
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/targets_synthetic-480.webp differ
diff --git a/assets/img/2023-12-12-VIVFormer/targets_synthetic-800.webp b/assets/img/2023-12-12-VIVFormer/targets_synthetic-800.webp
new file mode 100644
index 00000000..5e3081b8
Binary files /dev/null and b/assets/img/2023-12-12-VIVFormer/targets_synthetic-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-1400.webp
new file mode 100644
index 00000000..7a631d49
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-480.webp
new file mode 100644
index 00000000..db2c84f9
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-800.webp
new file mode 100644
index 00000000..7a631d49
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-1400.webp
new file mode 100644
index 00000000..dcbfeb6c
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-480.webp
new file mode 100644
index 00000000..fab2a48c
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-800.webp
new file mode 100644
index 00000000..dcbfeb6c
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-1400.webp
new file mode 100644
index 00000000..91caa111
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-480.webp
new file mode 100644
index 00000000..1c9cf492
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-800.webp
new file mode 100644
index 00000000..91caa111
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-1400.webp
new file mode 100644
index 00000000..1e9c5738
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-480.webp
new file mode 100644
index 00000000..32716ddf
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-800.webp
new file mode 100644
index 00000000..1e9c5738
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-1400.webp
new file mode 100644
index 00000000..f5b13112
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-480.webp
new file mode 100644
index 00000000..226a40d8
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-800.webp
new file mode 100644
index 00000000..f5b13112
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-1400.webp
new file mode 100644
index 00000000..4ab2e308
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-480.webp
new file mode 100644
index 00000000..5154af93
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-800.webp
new file mode 100644
index 00000000..4ab2e308
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-1400.webp
new file mode 100644
index 00000000..fc7e4513
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-480.webp
new file mode 100644
index 00000000..6fdf33fd
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-800.webp
new file mode 100644
index 00000000..fc7e4513
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-1400.webp
new file mode 100644
index 00000000..55f46bac
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-480.webp
new file mode 100644
index 00000000..ae76098e
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-800.webp
new file mode 100644
index 00000000..55f46bac
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-1400.webp
new file mode 100644
index 00000000..05a76918
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-480.webp
new file mode 100644
index 00000000..6275a59f
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-800.webp
new file mode 100644
index 00000000..05a76918
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-1400.webp
new file mode 100644
index 00000000..bdd11ecd
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-480.webp
new file mode 100644
index 00000000..c8aba405
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-800.webp
new file mode 100644
index 00000000..bdd11ecd
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-1400.webp
new file mode 100644
index 00000000..6dd73e7c
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-480.webp
new file mode 100644
index 00000000..5f545a26
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-800.webp
new file mode 100644
index 00000000..6dd73e7c
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-1400.webp
new file mode 100644
index 00000000..fceaa7db
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-480.webp
new file mode 100644
index 00000000..d6ec4d6c
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-800.webp
new file mode 100644
index 00000000..fceaa7db
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-1400.webp
new file mode 100644
index 00000000..172b00ef
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-480.webp
new file mode 100644
index 00000000..9a1fc825
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-800.webp
new file mode 100644
index 00000000..172b00ef
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-1400.webp
new file mode 100644
index 00000000..e22aa792
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-480.webp
new file mode 100644
index 00000000..e23dfb02
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-800.webp
new file mode 100644
index 00000000..e22aa792
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-1400.webp
new file mode 100644
index 00000000..f981b55e
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-480.webp
new file mode 100644
index 00000000..8a5cd0ee
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-800.webp
new file mode 100644
index 00000000..f981b55e
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-1400.webp
new file mode 100644
index 00000000..bc2de4c5
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-480.webp
new file mode 100644
index 00000000..3ba6b9d6
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-800.webp
new file mode 100644
index 00000000..bc2de4c5
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-1400.webp
new file mode 100644
index 00000000..fb3b75b2
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-480.webp
new file mode 100644
index 00000000..32ea090f
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-800.webp
new file mode 100644
index 00000000..fb3b75b2
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-1400.webp
new file mode 100644
index 00000000..2592a50b
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-480.webp
new file mode 100644
index 00000000..1e923f65
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-800.webp
new file mode 100644
index 00000000..2592a50b
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-1400.webp
new file mode 100644
index 00000000..f1f53a66
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-480.webp
new file mode 100644
index 00000000..81c0575c
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-800.webp
new file mode 100644
index 00000000..f1f53a66
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-1400.webp
new file mode 100644
index 00000000..709871a1
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-480.webp
new file mode 100644
index 00000000..9af70e9f
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-800.webp
new file mode 100644
index 00000000..709871a1
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-1400.webp
new file mode 100644
index 00000000..26fa3535
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-480.webp
new file mode 100644
index 00000000..eb331b53
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-800.webp
new file mode 100644
index 00000000..26fa3535
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-1400.webp
new file mode 100644
index 00000000..8713adbc
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-480.webp
new file mode 100644
index 00000000..f0ba9b80
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-800.webp
new file mode 100644
index 00000000..8713adbc
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-1400.webp
new file mode 100644
index 00000000..1fba421f
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-480.webp
new file mode 100644
index 00000000..d357fcf8
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-800.webp
new file mode 100644
index 00000000..1fba421f
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-1400.webp
new file mode 100644
index 00000000..bb9cac12
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-480.webp
new file mode 100644
index 00000000..f2b02c42
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-800.webp
new file mode 100644
index 00000000..bb9cac12
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-1400.webp
new file mode 100644
index 00000000..b400848e
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-480.webp
new file mode 100644
index 00000000..76f98247
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-800.webp
new file mode 100644
index 00000000..b400848e
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-800.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-1400.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-1400.webp
new file mode 100644
index 00000000..58d9e200
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-1400.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-480.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-480.webp
new file mode 100644
index 00000000..a5d38fab
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-480.webp differ
diff --git a/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-800.webp b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-800.webp
new file mode 100644
index 00000000..58d9e200
Binary files /dev/null and b/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-800.webp differ
diff --git a/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-1400.webp b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-1400.webp
new file mode 100644
index 00000000..e780ae22
Binary files /dev/null and b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-1400.webp differ
diff --git a/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-480.webp b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-480.webp
new file mode 100644
index 00000000..772e6f1a
Binary files /dev/null and b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-480.webp differ
diff --git a/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-800.webp b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-800.webp
new file mode 100644
index 00000000..e780ae22
Binary files /dev/null and b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-800.webp differ
diff --git a/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-1400.webp b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-1400.webp
new file mode 100644
index 00000000..155eb667
Binary files /dev/null and b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-1400.webp differ
diff --git a/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-480.webp b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-480.webp
new file mode 100644
index 00000000..17423029
Binary files /dev/null and b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-480.webp differ
diff --git a/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-800.webp b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-800.webp
new file mode 100644
index 00000000..155eb667
Binary files /dev/null and b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-800.webp differ
diff --git a/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-1400.webp b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-1400.webp
new file mode 100644
index 00000000..b1009463
Binary files /dev/null and b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-1400.webp differ
diff --git a/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-480.webp b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-480.webp
new file mode 100644
index 00000000..d1eaa078
Binary files /dev/null and b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-480.webp differ
diff --git a/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-800.webp b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-800.webp
new file mode 100644
index 00000000..b1009463
Binary files /dev/null and b/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-800.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image (2)-1400.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (2)-1400.webp
new file mode 100644
index 00000000..d823ef0d
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (2)-1400.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image (2)-480.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (2)-480.webp
new file mode 100644
index 00000000..02551a2d
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (2)-480.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image (2)-800.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (2)-800.webp
new file mode 100644
index 00000000..d823ef0d
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (2)-800.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image (3)-1400.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (3)-1400.webp
new file mode 100644
index 00000000..7ec61df0
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (3)-1400.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image (3)-480.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (3)-480.webp
new file mode 100644
index 00000000..32474629
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (3)-480.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image (3)-800.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (3)-800.webp
new file mode 100644
index 00000000..7ec61df0
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (3)-800.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image (4)-1400.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (4)-1400.webp
new file mode 100644
index 00000000..19622303
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (4)-1400.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image (4)-480.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (4)-480.webp
new file mode 100644
index 00000000..4c847af7
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (4)-480.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image (4)-800.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (4)-800.webp
new file mode 100644
index 00000000..19622303
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image (4)-800.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image-1400.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image-1400.webp
new file mode 100644
index 00000000..f7bcffc1
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image-1400.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image-480.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image-480.webp
new file mode 100644
index 00000000..302389b0
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image-480.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/Image-800.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/Image-800.webp
new file mode 100644
index 00000000..f7bcffc1
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/Image-800.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-1400.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-1400.webp
new file mode 100644
index 00000000..1b1213d0
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-1400.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-480.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-480.webp
new file mode 100644
index 00000000..bdfccc66
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-480.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-800.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-800.webp
new file mode 100644
index 00000000..1b1213d0
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-800.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-1400.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-1400.webp
new file mode 100644
index 00000000..60acffa5
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-1400.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-480.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-480.webp
new file mode 100644
index 00000000..8da9fbd5
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-480.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-800.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-800.webp
new file mode 100644
index 00000000..60acffa5
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-800.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-1400.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-1400.webp
new file mode 100644
index 00000000..df5b1b59
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-1400.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-480.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-480.webp
new file mode 100644
index 00000000..3bd5b280
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-480.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-800.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-800.webp
new file mode 100644
index 00000000..df5b1b59
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-800.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-1400.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-1400.webp
new file mode 100644
index 00000000..b80f6f5c
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-1400.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-480.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-480.webp
new file mode 100644
index 00000000..c2982866
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-480.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-800.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-800.webp
new file mode 100644
index 00000000..b80f6f5c
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-800.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-1400.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-1400.webp
new file mode 100644
index 00000000..45ed78e0
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-1400.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-480.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-480.webp
new file mode 100644
index 00000000..cbe9ee67
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-480.webp differ
diff --git a/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-800.webp b/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-800.webp
new file mode 100644
index 00000000..45ed78e0
Binary files /dev/null and b/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-800.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 1-1400.webp b/assets/img/2023-12-12-elephantfish-model/figure 1-1400.webp
new file mode 100644
index 00000000..e8e32794
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 1-1400.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 1-480.webp b/assets/img/2023-12-12-elephantfish-model/figure 1-480.webp
new file mode 100644
index 00000000..516fad87
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 1-480.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 1-800.webp b/assets/img/2023-12-12-elephantfish-model/figure 1-800.webp
new file mode 100644
index 00000000..e8e32794
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 1-800.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10-1400.webp b/assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10-1400.webp
new file mode 100644
index 00000000..899a4eca
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10-1400.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10-480.webp b/assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10-480.webp
new file mode 100644
index 00000000..7f7f6e5f
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10-480.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10-800.webp b/assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10-800.webp
new file mode 100644
index 00000000..899a4eca
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 2 -- comparison epoch 10-800.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions-1400.webp b/assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions-1400.webp
new file mode 100644
index 00000000..5a329300
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions-1400.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions-480.webp b/assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions-480.webp
new file mode 100644
index 00000000..b3710301
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions-480.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions-800.webp b/assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions-800.webp
new file mode 100644
index 00000000..5a329300
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 3 feature distributions-800.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap-1400.webp b/assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap-1400.webp
new file mode 100644
index 00000000..197f6bed
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap-1400.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap-480.webp b/assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap-480.webp
new file mode 100644
index 00000000..eeb7ea2f
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap-480.webp differ
diff --git a/assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap-800.webp b/assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap-800.webp
new file mode 100644
index 00000000..197f6bed
Binary files /dev/null and b/assets/img/2023-12-12-elephantfish-model/figure 4 feature conclusions heatmap-800.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot1-1400.webp b/assets/img/2023-12-12-forbidden-facts/plot1-1400.webp
new file mode 100644
index 00000000..6eed2378
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot1-1400.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot1-480.webp b/assets/img/2023-12-12-forbidden-facts/plot1-480.webp
new file mode 100644
index 00000000..e61d5fee
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot1-480.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot1-800.webp b/assets/img/2023-12-12-forbidden-facts/plot1-800.webp
new file mode 100644
index 00000000..6eed2378
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot1-800.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot2-1400.webp b/assets/img/2023-12-12-forbidden-facts/plot2-1400.webp
new file mode 100644
index 00000000..84073879
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot2-1400.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot2-480.webp b/assets/img/2023-12-12-forbidden-facts/plot2-480.webp
new file mode 100644
index 00000000..c0692ba3
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot2-480.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot2-800.webp b/assets/img/2023-12-12-forbidden-facts/plot2-800.webp
new file mode 100644
index 00000000..84073879
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot2-800.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot3-1400.webp b/assets/img/2023-12-12-forbidden-facts/plot3-1400.webp
new file mode 100644
index 00000000..7c2ba4ab
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot3-1400.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot3-480.webp b/assets/img/2023-12-12-forbidden-facts/plot3-480.webp
new file mode 100644
index 00000000..4775680c
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot3-480.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot3-800.webp b/assets/img/2023-12-12-forbidden-facts/plot3-800.webp
new file mode 100644
index 00000000..7c2ba4ab
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot3-800.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot4-1400.webp b/assets/img/2023-12-12-forbidden-facts/plot4-1400.webp
new file mode 100644
index 00000000..dd31d293
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot4-1400.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot4-480.webp b/assets/img/2023-12-12-forbidden-facts/plot4-480.webp
new file mode 100644
index 00000000..9d130c0f
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot4-480.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot4-800.webp b/assets/img/2023-12-12-forbidden-facts/plot4-800.webp
new file mode 100644
index 00000000..dd31d293
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot4-800.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot5-1400.webp b/assets/img/2023-12-12-forbidden-facts/plot5-1400.webp
new file mode 100644
index 00000000..22a19c0b
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot5-1400.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot5-480.webp b/assets/img/2023-12-12-forbidden-facts/plot5-480.webp
new file mode 100644
index 00000000..ec6a342f
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot5-480.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot5-800.webp b/assets/img/2023-12-12-forbidden-facts/plot5-800.webp
new file mode 100644
index 00000000..22a19c0b
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot5-800.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot6-1400.webp b/assets/img/2023-12-12-forbidden-facts/plot6-1400.webp
new file mode 100644
index 00000000..ec401d93
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot6-1400.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot6-480.webp b/assets/img/2023-12-12-forbidden-facts/plot6-480.webp
new file mode 100644
index 00000000..9d66c9b5
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot6-480.webp differ
diff --git a/assets/img/2023-12-12-forbidden-facts/plot6-800.webp b/assets/img/2023-12-12-forbidden-facts/plot6-800.webp
new file mode 100644
index 00000000..ec401d93
Binary files /dev/null and b/assets/img/2023-12-12-forbidden-facts/plot6-800.webp differ
diff --git a/assets/img/2023-12-12-geneformer-project/rough_structure-1400.webp b/assets/img/2023-12-12-geneformer-project/rough_structure-1400.webp
new file mode 100644
index 00000000..bf5dbec3
Binary files /dev/null and b/assets/img/2023-12-12-geneformer-project/rough_structure-1400.webp differ
diff --git a/assets/img/2023-12-12-geneformer-project/rough_structure-480.webp b/assets/img/2023-12-12-geneformer-project/rough_structure-480.webp
new file mode 100644
index 00000000..7b5cc55d
Binary files /dev/null and b/assets/img/2023-12-12-geneformer-project/rough_structure-480.webp differ
diff --git a/assets/img/2023-12-12-geneformer-project/rough_structure-800.webp b/assets/img/2023-12-12-geneformer-project/rough_structure-800.webp
new file mode 100644
index 00000000..bf5dbec3
Binary files /dev/null and b/assets/img/2023-12-12-geneformer-project/rough_structure-800.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig1-1400.webp b/assets/img/2023-12-12-overparameterization/Fig1-1400.webp
new file mode 100644
index 00000000..9ee0f324
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig1-1400.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig1-480.webp b/assets/img/2023-12-12-overparameterization/Fig1-480.webp
new file mode 100644
index 00000000..6994a978
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig1-480.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig1-800.webp b/assets/img/2023-12-12-overparameterization/Fig1-800.webp
new file mode 100644
index 00000000..9ee0f324
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig1-800.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig2-1400.webp b/assets/img/2023-12-12-overparameterization/Fig2-1400.webp
new file mode 100644
index 00000000..6ff5fe71
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig2-1400.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig2-480.webp b/assets/img/2023-12-12-overparameterization/Fig2-480.webp
new file mode 100644
index 00000000..59feda07
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig2-480.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig2-800.webp b/assets/img/2023-12-12-overparameterization/Fig2-800.webp
new file mode 100644
index 00000000..6ff5fe71
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig2-800.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig3-1400.webp b/assets/img/2023-12-12-overparameterization/Fig3-1400.webp
new file mode 100644
index 00000000..4e21de43
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig3-1400.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig3-480.webp b/assets/img/2023-12-12-overparameterization/Fig3-480.webp
new file mode 100644
index 00000000..5909baca
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig3-480.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig3-800.webp b/assets/img/2023-12-12-overparameterization/Fig3-800.webp
new file mode 100644
index 00000000..4e21de43
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig3-800.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig4-1400.webp b/assets/img/2023-12-12-overparameterization/Fig4-1400.webp
new file mode 100644
index 00000000..a0b5b8e6
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig4-1400.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig4-480.webp b/assets/img/2023-12-12-overparameterization/Fig4-480.webp
new file mode 100644
index 00000000..34f3f6e5
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig4-480.webp differ
diff --git a/assets/img/2023-12-12-overparameterization/Fig4-800.webp b/assets/img/2023-12-12-overparameterization/Fig4-800.webp
new file mode 100644
index 00000000..a0b5b8e6
Binary files /dev/null and b/assets/img/2023-12-12-overparameterization/Fig4-800.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig1-1400.webp b/assets/img/2023-12-12-physics_loss/fig1-1400.webp
new file mode 100644
index 00000000..806ed852
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig1-1400.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig1-480.webp b/assets/img/2023-12-12-physics_loss/fig1-480.webp
new file mode 100644
index 00000000..bc1e8591
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig1-480.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig1-800.webp b/assets/img/2023-12-12-physics_loss/fig1-800.webp
new file mode 100644
index 00000000..806ed852
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig1-800.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig2-1400.webp b/assets/img/2023-12-12-physics_loss/fig2-1400.webp
new file mode 100644
index 00000000..f97e888b
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig2-1400.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig2-480.webp b/assets/img/2023-12-12-physics_loss/fig2-480.webp
new file mode 100644
index 00000000..85c05b29
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig2-480.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig2-800.webp b/assets/img/2023-12-12-physics_loss/fig2-800.webp
new file mode 100644
index 00000000..f97e888b
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig2-800.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig3-1400.webp b/assets/img/2023-12-12-physics_loss/fig3-1400.webp
new file mode 100644
index 00000000..ede7a422
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig3-1400.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig3-480.webp b/assets/img/2023-12-12-physics_loss/fig3-480.webp
new file mode 100644
index 00000000..1e90f90d
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig3-480.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig3-800.webp b/assets/img/2023-12-12-physics_loss/fig3-800.webp
new file mode 100644
index 00000000..ede7a422
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig3-800.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig4-1400.webp b/assets/img/2023-12-12-physics_loss/fig4-1400.webp
new file mode 100644
index 00000000..2c318976
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig4-1400.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig4-480.webp b/assets/img/2023-12-12-physics_loss/fig4-480.webp
new file mode 100644
index 00000000..0b2bec79
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig4-480.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig4-800.webp b/assets/img/2023-12-12-physics_loss/fig4-800.webp
new file mode 100644
index 00000000..2c318976
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig4-800.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig4b-1400.webp b/assets/img/2023-12-12-physics_loss/fig4b-1400.webp
new file mode 100644
index 00000000..6eecdc9a
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig4b-1400.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig4b-480.webp b/assets/img/2023-12-12-physics_loss/fig4b-480.webp
new file mode 100644
index 00000000..e069b412
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig4b-480.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig4b-800.webp b/assets/img/2023-12-12-physics_loss/fig4b-800.webp
new file mode 100644
index 00000000..6eecdc9a
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig4b-800.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig5a-1400.webp b/assets/img/2023-12-12-physics_loss/fig5a-1400.webp
new file mode 100644
index 00000000..bf5944ed
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig5a-1400.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig5a-480.webp b/assets/img/2023-12-12-physics_loss/fig5a-480.webp
new file mode 100644
index 00000000..660f591e
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig5a-480.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig5a-800.webp b/assets/img/2023-12-12-physics_loss/fig5a-800.webp
new file mode 100644
index 00000000..bf5944ed
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig5a-800.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig5b-1400.webp b/assets/img/2023-12-12-physics_loss/fig5b-1400.webp
new file mode 100644
index 00000000..cb669227
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig5b-1400.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig5b-480.webp b/assets/img/2023-12-12-physics_loss/fig5b-480.webp
new file mode 100644
index 00000000..93ea5b23
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig5b-480.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig5b-800.webp b/assets/img/2023-12-12-physics_loss/fig5b-800.webp
new file mode 100644
index 00000000..cb669227
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig5b-800.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig6-1400.webp b/assets/img/2023-12-12-physics_loss/fig6-1400.webp
new file mode 100644
index 00000000..e4a9a44c
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig6-1400.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig6-480.webp b/assets/img/2023-12-12-physics_loss/fig6-480.webp
new file mode 100644
index 00000000..7cf9268c
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig6-480.webp differ
diff --git a/assets/img/2023-12-12-physics_loss/fig6-800.webp b/assets/img/2023-12-12-physics_loss/fig6-800.webp
new file mode 100644
index 00000000..e4a9a44c
Binary files /dev/null and b/assets/img/2023-12-12-physics_loss/fig6-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/block-1400.webp b/assets/img/2023-12-12-projected-fff-networks/block-1400.webp
new file mode 100644
index 00000000..359b55b6
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/block-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/block-480.webp b/assets/img/2023-12-12-projected-fff-networks/block-480.webp
new file mode 100644
index 00000000..a424af4c
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/block-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/block-800.webp b/assets/img/2023-12-12-projected-fff-networks/block-800.webp
new file mode 100644
index 00000000..359b55b6
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/block-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/distil-1400.webp b/assets/img/2023-12-12-projected-fff-networks/distil-1400.webp
new file mode 100644
index 00000000..48ca4e1e
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/distil-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/distil-480.webp b/assets/img/2023-12-12-projected-fff-networks/distil-480.webp
new file mode 100644
index 00000000..8c60493b
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/distil-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/distil-800.webp b/assets/img/2023-12-12-projected-fff-networks/distil-800.webp
new file mode 100644
index 00000000..48ca4e1e
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/distil-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/gauss-1400.webp b/assets/img/2023-12-12-projected-fff-networks/gauss-1400.webp
new file mode 100644
index 00000000..abf4bdd7
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/gauss-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/gauss-480.webp b/assets/img/2023-12-12-projected-fff-networks/gauss-480.webp
new file mode 100644
index 00000000..57759fc0
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/gauss-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/gauss-800.webp b/assets/img/2023-12-12-projected-fff-networks/gauss-800.webp
new file mode 100644
index 00000000..abf4bdd7
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/gauss-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/la_100-1400.webp b/assets/img/2023-12-12-projected-fff-networks/la_100-1400.webp
new file mode 100644
index 00000000..3fe0320f
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/la_100-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/la_100-480.webp b/assets/img/2023-12-12-projected-fff-networks/la_100-480.webp
new file mode 100644
index 00000000..c15bf2c8
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/la_100-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/la_100-800.webp b/assets/img/2023-12-12-projected-fff-networks/la_100-800.webp
new file mode 100644
index 00000000..3fe0320f
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/la_100-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/la_100_bad-1400.webp b/assets/img/2023-12-12-projected-fff-networks/la_100_bad-1400.webp
new file mode 100644
index 00000000..6f72df24
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/la_100_bad-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/la_100_bad-480.webp b/assets/img/2023-12-12-projected-fff-networks/la_100_bad-480.webp
new file mode 100644
index 00000000..c8de12ac
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/la_100_bad-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/la_100_bad-800.webp b/assets/img/2023-12-12-projected-fff-networks/la_100_bad-800.webp
new file mode 100644
index 00000000..6f72df24
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/la_100_bad-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/mnist-1400.webp b/assets/img/2023-12-12-projected-fff-networks/mnist-1400.webp
new file mode 100644
index 00000000..7071285c
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/mnist-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/mnist-480.webp b/assets/img/2023-12-12-projected-fff-networks/mnist-480.webp
new file mode 100644
index 00000000..c91239af
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/mnist-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/mnist-800.webp b/assets/img/2023-12-12-projected-fff-networks/mnist-800.webp
new file mode 100644
index 00000000..7071285c
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/mnist-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/non-distil-1400.webp b/assets/img/2023-12-12-projected-fff-networks/non-distil-1400.webp
new file mode 100644
index 00000000..2f9dd99c
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/non-distil-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/non-distil-480.webp b/assets/img/2023-12-12-projected-fff-networks/non-distil-480.webp
new file mode 100644
index 00000000..7f33b81a
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/non-distil-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/non-distil-800.webp b/assets/img/2023-12-12-projected-fff-networks/non-distil-800.webp
new file mode 100644
index 00000000..2f9dd99c
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/non-distil-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/non_block-1400.webp b/assets/img/2023-12-12-projected-fff-networks/non_block-1400.webp
new file mode 100644
index 00000000..31816cd5
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/non_block-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/non_block-480.webp b/assets/img/2023-12-12-projected-fff-networks/non_block-480.webp
new file mode 100644
index 00000000..e401a540
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/non_block-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/non_block-800.webp b/assets/img/2023-12-12-projected-fff-networks/non_block-800.webp
new file mode 100644
index 00000000..31816cd5
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/non_block-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/non_random-1400.webp b/assets/img/2023-12-12-projected-fff-networks/non_random-1400.webp
new file mode 100644
index 00000000..8185c164
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/non_random-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/non_random-480.webp b/assets/img/2023-12-12-projected-fff-networks/non_random-480.webp
new file mode 100644
index 00000000..016081fc
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/non_random-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/non_random-800.webp b/assets/img/2023-12-12-projected-fff-networks/non_random-800.webp
new file mode 100644
index 00000000..8185c164
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/non_random-800.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/random-1400.webp b/assets/img/2023-12-12-projected-fff-networks/random-1400.webp
new file mode 100644
index 00000000..969a6493
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/random-1400.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/random-480.webp b/assets/img/2023-12-12-projected-fff-networks/random-480.webp
new file mode 100644
index 00000000..387a28f8
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/random-480.webp differ
diff --git a/assets/img/2023-12-12-projected-fff-networks/random-800.webp b/assets/img/2023-12-12-projected-fff-networks/random-800.webp
new file mode 100644
index 00000000..969a6493
Binary files /dev/null and b/assets/img/2023-12-12-projected-fff-networks/random-800.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/degree-1400.webp b/assets/img/2023-12-12-scRNA-GNNS/degree-1400.webp
new file mode 100644
index 00000000..5223ae15
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/degree-1400.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/degree-480.webp b/assets/img/2023-12-12-scRNA-GNNS/degree-480.webp
new file mode 100644
index 00000000..d04684b5
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/degree-480.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/degree-800.webp b/assets/img/2023-12-12-scRNA-GNNS/degree-800.webp
new file mode 100644
index 00000000..5223ae15
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/degree-800.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/embedding24-1400.webp b/assets/img/2023-12-12-scRNA-GNNS/embedding24-1400.webp
new file mode 100644
index 00000000..e8b387e0
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/embedding24-1400.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/embedding24-480.webp b/assets/img/2023-12-12-scRNA-GNNS/embedding24-480.webp
new file mode 100644
index 00000000..f789d0dd
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/embedding24-480.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/embedding24-800.webp b/assets/img/2023-12-12-scRNA-GNNS/embedding24-800.webp
new file mode 100644
index 00000000..e8b387e0
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/embedding24-800.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/embedding5-1400.webp b/assets/img/2023-12-12-scRNA-GNNS/embedding5-1400.webp
new file mode 100644
index 00000000..c9cfd426
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/embedding5-1400.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/embedding5-480.webp b/assets/img/2023-12-12-scRNA-GNNS/embedding5-480.webp
new file mode 100644
index 00000000..fff69698
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/embedding5-480.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/embedding5-800.webp b/assets/img/2023-12-12-scRNA-GNNS/embedding5-800.webp
new file mode 100644
index 00000000..c9cfd426
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/embedding5-800.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters-1400.webp b/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters-1400.webp
new file mode 100644
index 00000000..66e8ff96
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters-1400.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters-480.webp b/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters-480.webp
new file mode 100644
index 00000000..e04a9445
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters-480.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters-800.webp b/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters-800.webp
new file mode 100644
index 00000000..66e8ff96
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters-800.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters-1400.webp b/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters-1400.webp
new file mode 100644
index 00000000..0e6a6ee5
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters-1400.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters-480.webp b/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters-480.webp
new file mode 100644
index 00000000..68abb453
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters-480.webp differ
diff --git a/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters-800.webp b/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters-800.webp
new file mode 100644
index 00000000..0e6a6ee5
Binary files /dev/null and b/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters-800.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/.DS_Store b/assets/img/2023-12-12-solvent-encoding/.DS_Store
deleted file mode 100644
index 7414b106..00000000
Binary files a/assets/img/2023-12-12-solvent-encoding/.DS_Store and /dev/null differ
diff --git a/assets/img/2023-12-12-solvent-encoding/PharmHGT-1400.webp b/assets/img/2023-12-12-solvent-encoding/PharmHGT-1400.webp
new file mode 100644
index 00000000..2a79b8c7
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/PharmHGT-1400.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/PharmHGT-480.webp b/assets/img/2023-12-12-solvent-encoding/PharmHGT-480.webp
new file mode 100644
index 00000000..030c4671
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/PharmHGT-480.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/PharmHGT-800.webp b/assets/img/2023-12-12-solvent-encoding/PharmHGT-800.webp
new file mode 100644
index 00000000..2a79b8c7
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/PharmHGT-800.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/chemprop-1400.webp b/assets/img/2023-12-12-solvent-encoding/chemprop-1400.webp
new file mode 100644
index 00000000..8981e25e
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/chemprop-1400.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/chemprop-480.webp b/assets/img/2023-12-12-solvent-encoding/chemprop-480.webp
new file mode 100644
index 00000000..e81b8dda
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/chemprop-480.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/chemprop-800.webp b/assets/img/2023-12-12-solvent-encoding/chemprop-800.webp
new file mode 100644
index 00000000..8981e25e
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/chemprop-800.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/graph-1400.webp b/assets/img/2023-12-12-solvent-encoding/graph-1400.webp
new file mode 100644
index 00000000..7309c0e0
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/graph-1400.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/graph-480.webp b/assets/img/2023-12-12-solvent-encoding/graph-480.webp
new file mode 100644
index 00000000..6168ddbb
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/graph-480.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/graph-800.webp b/assets/img/2023-12-12-solvent-encoding/graph-800.webp
new file mode 100644
index 00000000..7309c0e0
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/graph-800.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-1400.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-1400.webp
new file mode 100644
index 00000000..565d305b
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-1400.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-480.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-480.webp
new file mode 100644
index 00000000..f3f7ff8f
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-480.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-800.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-800.webp
new file mode 100644
index 00000000..565d305b
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-800.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-1400.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-1400.webp
new file mode 100644
index 00000000..27abbb48
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-1400.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-480.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-480.webp
new file mode 100644
index 00000000..0efa3c57
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-480.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-800.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-800.webp
new file mode 100644
index 00000000..27abbb48
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-800.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-1400.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-1400.webp
new file mode 100644
index 00000000..ecf0983e
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-1400.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-480.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-480.webp
new file mode 100644
index 00000000..b39dc65d
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-480.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-800.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-800.webp
new file mode 100644
index 00000000..ecf0983e
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-800.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-1400.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-1400.webp
new file mode 100644
index 00000000..32e93586
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-1400.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-480.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-480.webp
new file mode 100644
index 00000000..9a397c3b
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-480.webp differ
diff --git a/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-800.webp b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-800.webp
new file mode 100644
index 00000000..32e93586
Binary files /dev/null and b/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-1400.webp
new file mode 100644
index 00000000..8673a329
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-480.webp
new file mode 100644
index 00000000..7b6eecad
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-800.webp
new file mode 100644
index 00000000..8673a329
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-1400.webp
new file mode 100644
index 00000000..fbfe2d1e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-480.webp
new file mode 100644
index 00000000..88605d08
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-800.webp
new file mode 100644
index 00000000..fbfe2d1e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_0001-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_0001-1400.webp
new file mode 100644
index 00000000..ecc21f26
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_0001-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_0001-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_0001-480.webp
new file mode 100644
index 00000000..62f0817e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_0001-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_0001-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_0001-800.webp
new file mode 100644
index 00000000..ecc21f26
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_0001-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-1400.webp
new file mode 100644
index 00000000..41aefc55
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-480.webp
new file mode 100644
index 00000000..b66b43aa
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-800.webp
new file mode 100644
index 00000000..41aefc55
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-1400.webp
new file mode 100644
index 00000000..ff272e98
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-480.webp
new file mode 100644
index 00000000..d092ab1b
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-800.webp
new file mode 100644
index 00000000..ff272e98
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-1400.webp
new file mode 100644
index 00000000..a6c6bd65
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-480.webp
new file mode 100644
index 00000000..0780e2f3
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-800.webp
new file mode 100644
index 00000000..a6c6bd65
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-1400.webp
new file mode 100644
index 00000000..ff4410a0
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-480.webp
new file mode 100644
index 00000000..da3222eb
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-800.webp
new file mode 100644
index 00000000..ff4410a0
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-1400.webp
new file mode 100644
index 00000000..f25ab3b9
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-480.webp
new file mode 100644
index 00000000..9d5a46a8
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-800.webp
new file mode 100644
index 00000000..f25ab3b9
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_20-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_20-1400.webp
new file mode 100644
index 00000000..179e2afa
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_20-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_20-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_20-480.webp
new file mode 100644
index 00000000..6a5969d0
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_20-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_20-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_20-800.webp
new file mode 100644
index 00000000..179e2afa
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_20-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-1400.webp
new file mode 100644
index 00000000..456eadba
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-480.webp
new file mode 100644
index 00000000..50156283
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-800.webp
new file mode 100644
index 00000000..456eadba
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-1400.webp
new file mode 100644
index 00000000..4be8c465
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-480.webp
new file mode 100644
index 00000000..ac14c3d9
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-800.webp
new file mode 100644
index 00000000..4be8c465
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-1400.webp
new file mode 100644
index 00000000..42878fcc
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-480.webp
new file mode 100644
index 00000000..771f191f
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-800.webp
new file mode 100644
index 00000000..42878fcc
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-1400.webp
new file mode 100644
index 00000000..185e917e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-480.webp
new file mode 100644
index 00000000..4f0c1bb1
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-800.webp
new file mode 100644
index 00000000..185e917e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-1400.webp
new file mode 100644
index 00000000..21cb151a
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-480.webp
new file mode 100644
index 00000000..7b1d2b52
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-800.webp
new file mode 100644
index 00000000..21cb151a
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-1400.webp
new file mode 100644
index 00000000..095dda99
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-480.webp
new file mode 100644
index 00000000..0da0a4d9
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-800.webp
new file mode 100644
index 00000000..095dda99
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-1400.webp
new file mode 100644
index 00000000..6584738e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-480.webp
new file mode 100644
index 00000000..73c9c1f5
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-800.webp
new file mode 100644
index 00000000..6584738e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-1400.webp
new file mode 100644
index 00000000..f5c02d99
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-480.webp
new file mode 100644
index 00000000..04be55c8
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-800.webp
new file mode 100644
index 00000000..f5c02d99
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-1400.webp
new file mode 100644
index 00000000..50976702
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-480.webp
new file mode 100644
index 00000000..557d3671
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-800.webp
new file mode 100644
index 00000000..50976702
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-1400.webp
new file mode 100644
index 00000000..7ecee8ed
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-480.webp
new file mode 100644
index 00000000..e168ed64
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-800.webp
new file mode 100644
index 00000000..7ecee8ed
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-1400.webp
new file mode 100644
index 00000000..5f24b3c1
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-480.webp
new file mode 100644
index 00000000..e4f01f3e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-800.webp
new file mode 100644
index 00000000..5f24b3c1
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-1400.webp
new file mode 100644
index 00000000..42475fc2
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-480.webp
new file mode 100644
index 00000000..6037793d
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-800.webp
new file mode 100644
index 00000000..42475fc2
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-1400.webp
new file mode 100644
index 00000000..b3741aba
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-480.webp
new file mode 100644
index 00000000..b075c25e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-800.webp
new file mode 100644
index 00000000..b3741aba
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-1400.webp
new file mode 100644
index 00000000..81c95fa4
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-480.webp
new file mode 100644
index 00000000..de5ef6d2
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-800.webp
new file mode 100644
index 00000000..81c95fa4
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-1400.webp
new file mode 100644
index 00000000..1a562a9e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-480.webp
new file mode 100644
index 00000000..b50a2cf6
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-800.webp
new file mode 100644
index 00000000..1a562a9e
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-1400.webp
new file mode 100644
index 00000000..ed2633b7
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-480.webp
new file mode 100644
index 00000000..ff118f3c
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-800.webp
new file mode 100644
index 00000000..ed2633b7
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-1400.webp
new file mode 100644
index 00000000..e95ace77
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-480.webp
new file mode 100644
index 00000000..d50cb7fe
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-800.webp
new file mode 100644
index 00000000..e95ace77
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-1400.webp
new file mode 100644
index 00000000..3326752f
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-480.webp
new file mode 100644
index 00000000..07110130
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-800.webp
new file mode 100644
index 00000000..3326752f
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_20-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_20-1400.webp
new file mode 100644
index 00000000..32627f3c
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_20-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_20-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_20-480.webp
new file mode 100644
index 00000000..1bf7c03b
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_20-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_20-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_20-800.webp
new file mode 100644
index 00000000..32627f3c
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_20-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-1400.webp
new file mode 100644
index 00000000..be9b64bb
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-480.webp
new file mode 100644
index 00000000..0f6587fc
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-800.webp
new file mode 100644
index 00000000..be9b64bb
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-800.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-1400.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-1400.webp
new file mode 100644
index 00000000..953e77b0
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-1400.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-480.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-480.webp
new file mode 100644
index 00000000..c600a68f
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-480.webp differ
diff --git a/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-800.webp b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-800.webp
new file mode 100644
index 00000000..953e77b0
Binary files /dev/null and b/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-1400.webp
new file mode 100644
index 00000000..1f211a44
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-480.webp
new file mode 100644
index 00000000..4882cff1
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-800.webp
new file mode 100644
index 00000000..1f211a44
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-1400.webp
new file mode 100644
index 00000000..49740a2e
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-480.webp
new file mode 100644
index 00000000..3159e56e
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-800.webp
new file mode 100644
index 00000000..49740a2e
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_full_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_full_swag-1400.webp
new file mode 100644
index 00000000..db4d0944
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_full_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_full_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_full_swag-480.webp
new file mode 100644
index 00000000..62e4ae0d
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_full_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_full_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_full_swag-800.webp
new file mode 100644
index 00000000..db4d0944
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_full_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-1400.webp
new file mode 100644
index 00000000..93d0a319
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-480.webp
new file mode 100644
index 00000000..d6cc004b
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-800.webp
new file mode 100644
index 00000000..93d0a319
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-1400.webp
new file mode 100644
index 00000000..616553dc
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-480.webp
new file mode 100644
index 00000000..1a9d9570
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-800.webp
new file mode 100644
index 00000000..616553dc
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-1400.webp
new file mode 100644
index 00000000..b6c77c23
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-480.webp
new file mode 100644
index 00000000..5dac883b
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-800.webp
new file mode 100644
index 00000000..b6c77c23
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_full_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_full_swag-1400.webp
new file mode 100644
index 00000000..9b4596a6
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_full_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_full_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_full_swag-480.webp
new file mode 100644
index 00000000..e6512944
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_full_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_full_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_full_swag-800.webp
new file mode 100644
index 00000000..9b4596a6
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_full_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-1400.webp
new file mode 100644
index 00000000..4e3f3266
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-480.webp
new file mode 100644
index 00000000..74a2f922
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-800.webp
new file mode 100644
index 00000000..4e3f3266
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-1400.webp
new file mode 100644
index 00000000..1a1786c9
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-480.webp
new file mode 100644
index 00000000..acafde50
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-800.webp
new file mode 100644
index 00000000..1a1786c9
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-1400.webp
new file mode 100644
index 00000000..8a21a513
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-480.webp
new file mode 100644
index 00000000..4d3688ab
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-800.webp
new file mode 100644
index 00000000..8a21a513
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-1400.webp
new file mode 100644
index 00000000..023b9d6c
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-480.webp
new file mode 100644
index 00000000..9befcecc
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-800.webp
new file mode 100644
index 00000000..023b9d6c
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/kl_cifar_violin-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/kl_cifar_violin-1400.webp
new file mode 100644
index 00000000..69680552
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/kl_cifar_violin-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/kl_cifar_violin-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/kl_cifar_violin-480.webp
new file mode 100644
index 00000000..c34042be
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/kl_cifar_violin-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/kl_cifar_violin-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/kl_cifar_violin-800.webp
new file mode 100644
index 00000000..69680552
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/kl_cifar_violin-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/kl_mnist_violin-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/kl_mnist_violin-1400.webp
new file mode 100644
index 00000000..9f92ada5
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/kl_mnist_violin-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/kl_mnist_violin-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/kl_mnist_violin-480.webp
new file mode 100644
index 00000000..571feff4
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/kl_mnist_violin-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/kl_mnist_violin-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/kl_mnist_violin-800.webp
new file mode 100644
index 00000000..9f92ada5
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/kl_mnist_violin-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-1400.webp
new file mode 100644
index 00000000..0273a387
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-480.webp
new file mode 100644
index 00000000..7cee6059
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-800.webp
new file mode 100644
index 00000000..0273a387
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-1400.webp
new file mode 100644
index 00000000..7ab617dc
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-480.webp
new file mode 100644
index 00000000..c540b7a9
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-800.webp
new file mode 100644
index 00000000..7ab617dc
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_full_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_full_swag-1400.webp
new file mode 100644
index 00000000..6bba2eac
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_full_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_full_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_full_swag-480.webp
new file mode 100644
index 00000000..d9752fe9
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_full_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_full_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_full_swag-800.webp
new file mode 100644
index 00000000..6bba2eac
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_full_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-1400.webp
new file mode 100644
index 00000000..7264a2d2
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-480.webp
new file mode 100644
index 00000000..7d3098e1
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-800.webp
new file mode 100644
index 00000000..7264a2d2
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-1400.webp
new file mode 100644
index 00000000..7108ba54
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-480.webp
new file mode 100644
index 00000000..452020f9
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-800.webp
new file mode 100644
index 00000000..7108ba54
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-1400.webp
new file mode 100644
index 00000000..49f81f26
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-480.webp
new file mode 100644
index 00000000..3e310d41
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-800.webp
new file mode 100644
index 00000000..49f81f26
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_full_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_full_swag-1400.webp
new file mode 100644
index 00000000..54338628
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_full_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_full_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_full_swag-480.webp
new file mode 100644
index 00000000..c56a6df9
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_full_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_full_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_full_swag-800.webp
new file mode 100644
index 00000000..54338628
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_full_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-1400.webp
new file mode 100644
index 00000000..2e897259
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-480.webp
new file mode 100644
index 00000000..639902ec
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-800.webp
new file mode 100644
index 00000000..2e897259
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-1400.webp
new file mode 100644
index 00000000..7a27a54f
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-480.webp
new file mode 100644
index 00000000..8430e287
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-800.webp
new file mode 100644
index 00000000..7a27a54f
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-1400.webp
new file mode 100644
index 00000000..8705571e
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-480.webp
new file mode 100644
index 00000000..073e2028
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-800.webp
new file mode 100644
index 00000000..8705571e
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-1400.webp
new file mode 100644
index 00000000..03eca81e
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-480.webp
new file mode 100644
index 00000000..58b7dd4f
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-800.webp
new file mode 100644
index 00000000..03eca81e
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-1400.webp
new file mode 100644
index 00000000..9de6f05e
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-480.webp
new file mode 100644
index 00000000..f3f8b7ec
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-800.webp
new file mode 100644
index 00000000..9de6f05e
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_cifar_violin-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_cifar_violin-1400.webp
new file mode 100644
index 00000000..b97d25be
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_cifar_violin-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_cifar_violin-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_cifar_violin-480.webp
new file mode 100644
index 00000000..ae9f9654
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_cifar_violin-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_cifar_violin-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_cifar_violin-800.webp
new file mode 100644
index 00000000..b97d25be
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_cifar_violin-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-1400.webp
new file mode 100644
index 00000000..622f64e7
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-480.webp
new file mode 100644
index 00000000..cf2698b3
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-800.webp
new file mode 100644
index 00000000..622f64e7
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_mnist_violin-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_mnist_violin-1400.webp
new file mode 100644
index 00000000..7349d1a0
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_mnist_violin-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_mnist_violin-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_mnist_violin-480.webp
new file mode 100644
index 00000000..e78a50f0
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_mnist_violin-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scod_mnist_violin-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/scod_mnist_violin-800.webp
new file mode 100644
index 00000000..7349d1a0
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scod_mnist_violin-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scodv2_cifar_violin-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_cifar_violin-1400.webp
new file mode 100644
index 00000000..9cad271b
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_cifar_violin-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scodv2_cifar_violin-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_cifar_violin-480.webp
new file mode 100644
index 00000000..77df330c
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_cifar_violin-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scodv2_cifar_violin-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_cifar_violin-800.webp
new file mode 100644
index 00000000..9cad271b
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_cifar_violin-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scodv2_mnist_violin-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_mnist_violin-1400.webp
new file mode 100644
index 00000000..0f46d46d
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_mnist_violin-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scodv2_mnist_violin-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_mnist_violin-480.webp
new file mode 100644
index 00000000..44d00dd3
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_mnist_violin-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/scodv2_mnist_violin-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_mnist_violin-800.webp
new file mode 100644
index 00000000..0f46d46d
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/scodv2_mnist_violin-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-1400.webp
new file mode 100644
index 00000000..c4abc6c8
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-480.webp
new file mode 100644
index 00000000..f6743162
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-800.webp
new file mode 100644
index 00000000..c4abc6c8
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-800.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-1400.webp b/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-1400.webp
new file mode 100644
index 00000000..52163d74
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-1400.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-480.webp b/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-480.webp
new file mode 100644
index 00000000..ecb45d51
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-480.webp differ
diff --git a/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-800.webp b/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-800.webp
new file mode 100644
index 00000000..52163d74
Binary files /dev/null and b/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/bios-1400.webp b/assets/img/2023-12-12-watermarked-llms/bios-1400.webp
new file mode 100644
index 00000000..d6183198
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/bios-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/bios-480.webp b/assets/img/2023-12-12-watermarked-llms/bios-480.webp
new file mode 100644
index 00000000..176f3b63
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/bios-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/bios-800.webp b/assets/img/2023-12-12-watermarked-llms/bios-800.webp
new file mode 100644
index 00000000..d6183198
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/bios-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-1400.webp b/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-1400.webp
new file mode 100644
index 00000000..991dcd3b
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-480.webp b/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-480.webp
new file mode 100644
index 00000000..eaf6a5de
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-800.webp b/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-800.webp
new file mode 100644
index 00000000..991dcd3b
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/gamma-delta-1400.webp b/assets/img/2023-12-12-watermarked-llms/gamma-delta-1400.webp
new file mode 100644
index 00000000..128651ef
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/gamma-delta-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/gamma-delta-480.webp b/assets/img/2023-12-12-watermarked-llms/gamma-delta-480.webp
new file mode 100644
index 00000000..5252ce9b
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/gamma-delta-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/gamma-delta-800.webp b/assets/img/2023-12-12-watermarked-llms/gamma-delta-800.webp
new file mode 100644
index 00000000..128651ef
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/gamma-delta-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/generation-prompt-1400.webp b/assets/img/2023-12-12-watermarked-llms/generation-prompt-1400.webp
new file mode 100644
index 00000000..17884e6f
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/generation-prompt-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/generation-prompt-480.webp b/assets/img/2023-12-12-watermarked-llms/generation-prompt-480.webp
new file mode 100644
index 00000000..95767d75
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/generation-prompt-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/generation-prompt-800.webp b/assets/img/2023-12-12-watermarked-llms/generation-prompt-800.webp
new file mode 100644
index 00000000..17884e6f
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/generation-prompt-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/nelson-mandela-1400.webp b/assets/img/2023-12-12-watermarked-llms/nelson-mandela-1400.webp
new file mode 100644
index 00000000..2a1bef25
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/nelson-mandela-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/nelson-mandela-480.webp b/assets/img/2023-12-12-watermarked-llms/nelson-mandela-480.webp
new file mode 100644
index 00000000..b453741e
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/nelson-mandela-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/nelson-mandela-800.webp b/assets/img/2023-12-12-watermarked-llms/nelson-mandela-800.webp
new file mode 100644
index 00000000..2a1bef25
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/nelson-mandela-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/reddit-comment-1400.webp b/assets/img/2023-12-12-watermarked-llms/reddit-comment-1400.webp
new file mode 100644
index 00000000..0537983f
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/reddit-comment-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/reddit-comment-480.webp b/assets/img/2023-12-12-watermarked-llms/reddit-comment-480.webp
new file mode 100644
index 00000000..39f3b856
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/reddit-comment-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/reddit-comment-800.webp b/assets/img/2023-12-12-watermarked-llms/reddit-comment-800.webp
new file mode 100644
index 00000000..0537983f
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/reddit-comment-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/reddit-post-1400.webp b/assets/img/2023-12-12-watermarked-llms/reddit-post-1400.webp
new file mode 100644
index 00000000..2528df22
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/reddit-post-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/reddit-post-480.webp b/assets/img/2023-12-12-watermarked-llms/reddit-post-480.webp
new file mode 100644
index 00000000..13448b52
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/reddit-post-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/reddit-post-800.webp b/assets/img/2023-12-12-watermarked-llms/reddit-post-800.webp
new file mode 100644
index 00000000..2528df22
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/reddit-post-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/repetitions-1400.webp b/assets/img/2023-12-12-watermarked-llms/repetitions-1400.webp
new file mode 100644
index 00000000..556282fd
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/repetitions-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/repetitions-480.webp b/assets/img/2023-12-12-watermarked-llms/repetitions-480.webp
new file mode 100644
index 00000000..2720e264
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/repetitions-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/repetitions-800.webp b/assets/img/2023-12-12-watermarked-llms/repetitions-800.webp
new file mode 100644
index 00000000..556282fd
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/repetitions-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/stats-1400.webp b/assets/img/2023-12-12-watermarked-llms/stats-1400.webp
new file mode 100644
index 00000000..0238c882
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/stats-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/stats-480.webp b/assets/img/2023-12-12-watermarked-llms/stats-480.webp
new file mode 100644
index 00000000..bbd02005
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/stats-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/stats-800.webp b/assets/img/2023-12-12-watermarked-llms/stats-800.webp
new file mode 100644
index 00000000..0238c882
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/stats-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/watermark-scheme-1400.webp b/assets/img/2023-12-12-watermarked-llms/watermark-scheme-1400.webp
new file mode 100644
index 00000000..2267a0be
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/watermark-scheme-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/watermark-scheme-480.webp b/assets/img/2023-12-12-watermarked-llms/watermark-scheme-480.webp
new file mode 100644
index 00000000..3b022a18
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/watermark-scheme-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/watermark-scheme-800.webp b/assets/img/2023-12-12-watermarked-llms/watermark-scheme-800.webp
new file mode 100644
index 00000000..2267a0be
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/watermark-scheme-800.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/word-sneak-1400.webp b/assets/img/2023-12-12-watermarked-llms/word-sneak-1400.webp
new file mode 100644
index 00000000..2fa339cb
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/word-sneak-1400.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/word-sneak-480.webp b/assets/img/2023-12-12-watermarked-llms/word-sneak-480.webp
new file mode 100644
index 00000000..7058fdcf
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/word-sneak-480.webp differ
diff --git a/assets/img/2023-12-12-watermarked-llms/word-sneak-800.webp b/assets/img/2023-12-12-watermarked-llms/word-sneak-800.webp
new file mode 100644
index 00000000..2fa339cb
Binary files /dev/null and b/assets/img/2023-12-12-watermarked-llms/word-sneak-800.webp differ
diff --git a/assets/img/Figure2-1400.webp b/assets/img/Figure2-1400.webp
new file mode 100644
index 00000000..e2a34f4f
Binary files /dev/null and b/assets/img/Figure2-1400.webp differ
diff --git a/assets/img/Figure2-480.webp b/assets/img/Figure2-480.webp
new file mode 100644
index 00000000..361771f9
Binary files /dev/null and b/assets/img/Figure2-480.webp differ
diff --git a/assets/img/Figure2-800.webp b/assets/img/Figure2-800.webp
new file mode 100644
index 00000000..e2a34f4f
Binary files /dev/null and b/assets/img/Figure2-800.webp differ
diff --git a/assets/img/ICLR-logo-1400.webp b/assets/img/ICLR-logo-1400.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/ICLR-logo-1400.webp differ
diff --git a/assets/img/ICLR-logo-480.webp b/assets/img/ICLR-logo-480.webp
new file mode 100644
index 00000000..c9d42d7e
Binary files /dev/null and b/assets/img/ICLR-logo-480.webp differ
diff --git a/assets/img/ICLR-logo-800.webp b/assets/img/ICLR-logo-800.webp
new file mode 100644
index 00000000..d56968ba
Binary files /dev/null and b/assets/img/ICLR-logo-800.webp differ
diff --git a/assets/img/ICLR-logo-dark-1400.webp b/assets/img/ICLR-logo-dark-1400.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/ICLR-logo-dark-1400.webp differ
diff --git a/assets/img/ICLR-logo-dark-480.webp b/assets/img/ICLR-logo-dark-480.webp
new file mode 100644
index 00000000..7f0830c1
Binary files /dev/null and b/assets/img/ICLR-logo-dark-480.webp differ
diff --git a/assets/img/ICLR-logo-dark-800.webp b/assets/img/ICLR-logo-dark-800.webp
new file mode 100644
index 00000000..5ed49089
Binary files /dev/null and b/assets/img/ICLR-logo-dark-800.webp differ
diff --git a/assets/img/PrimeKG-1400.webp b/assets/img/PrimeKG-1400.webp
new file mode 100644
index 00000000..6fade774
Binary files /dev/null and b/assets/img/PrimeKG-1400.webp differ
diff --git a/assets/img/PrimeKG-480.webp b/assets/img/PrimeKG-480.webp
new file mode 100644
index 00000000..44127234
Binary files /dev/null and b/assets/img/PrimeKG-480.webp differ
diff --git a/assets/img/PrimeKG-800.webp b/assets/img/PrimeKG-800.webp
new file mode 100644
index 00000000..6fade774
Binary files /dev/null and b/assets/img/PrimeKG-800.webp differ
diff --git a/assets/img/Testing_output_dim-1400.webp b/assets/img/Testing_output_dim-1400.webp
new file mode 100644
index 00000000..febb5076
Binary files /dev/null and b/assets/img/Testing_output_dim-1400.webp differ
diff --git a/assets/img/Testing_output_dim-480.webp b/assets/img/Testing_output_dim-480.webp
new file mode 100644
index 00000000..734cd280
Binary files /dev/null and b/assets/img/Testing_output_dim-480.webp differ
diff --git a/assets/img/Testing_output_dim-800.webp b/assets/img/Testing_output_dim-800.webp
new file mode 100644
index 00000000..febb5076
Binary files /dev/null and b/assets/img/Testing_output_dim-800.webp differ
diff --git a/assets/img/bar-chart-1400.webp b/assets/img/bar-chart-1400.webp
new file mode 100644
index 00000000..bd929381
Binary files /dev/null and b/assets/img/bar-chart-1400.webp differ
diff --git a/assets/img/bar-chart-480.webp b/assets/img/bar-chart-480.webp
new file mode 100644
index 00000000..0f4a0bd0
Binary files /dev/null and b/assets/img/bar-chart-480.webp differ
diff --git a/assets/img/bar-chart-800.webp b/assets/img/bar-chart-800.webp
new file mode 100644
index 00000000..bd929381
Binary files /dev/null and b/assets/img/bar-chart-800.webp differ
diff --git a/assets/img/cluster_esm-1400.webp b/assets/img/cluster_esm-1400.webp
new file mode 100644
index 00000000..46f19be7
Binary files /dev/null and b/assets/img/cluster_esm-1400.webp differ
diff --git a/assets/img/cluster_esm-480.webp b/assets/img/cluster_esm-480.webp
new file mode 100644
index 00000000..2ff7fc9e
Binary files /dev/null and b/assets/img/cluster_esm-480.webp differ
diff --git a/assets/img/cluster_esm-800.webp b/assets/img/cluster_esm-800.webp
new file mode 100644
index 00000000..46f19be7
Binary files /dev/null and b/assets/img/cluster_esm-800.webp differ
diff --git a/assets/img/cluster_func-1400.webp b/assets/img/cluster_func-1400.webp
new file mode 100644
index 00000000..a839ce55
Binary files /dev/null and b/assets/img/cluster_func-1400.webp differ
diff --git a/assets/img/cluster_func-480.webp b/assets/img/cluster_func-480.webp
new file mode 100644
index 00000000..e5c0fae8
Binary files /dev/null and b/assets/img/cluster_func-480.webp differ
diff --git a/assets/img/cluster_func-800.webp b/assets/img/cluster_func-800.webp
new file mode 100644
index 00000000..a839ce55
Binary files /dev/null and b/assets/img/cluster_func-800.webp differ
diff --git a/assets/img/cluster_init-1400.webp b/assets/img/cluster_init-1400.webp
new file mode 100644
index 00000000..040a91ff
Binary files /dev/null and b/assets/img/cluster_init-1400.webp differ
diff --git a/assets/img/cluster_init-480.webp b/assets/img/cluster_init-480.webp
new file mode 100644
index 00000000..520cd18e
Binary files /dev/null and b/assets/img/cluster_init-480.webp differ
diff --git a/assets/img/cluster_init-800.webp b/assets/img/cluster_init-800.webp
new file mode 100644
index 00000000..040a91ff
Binary files /dev/null and b/assets/img/cluster_init-800.webp differ
diff --git a/assets/img/cosine-similarities-1400.webp b/assets/img/cosine-similarities-1400.webp
new file mode 100644
index 00000000..61b88a34
Binary files /dev/null and b/assets/img/cosine-similarities-1400.webp differ
diff --git a/assets/img/cosine-similarities-480.webp b/assets/img/cosine-similarities-480.webp
new file mode 100644
index 00000000..7ea63d18
Binary files /dev/null and b/assets/img/cosine-similarities-480.webp differ
diff --git a/assets/img/cosine-similarities-800.webp b/assets/img/cosine-similarities-800.webp
new file mode 100644
index 00000000..61b88a34
Binary files /dev/null and b/assets/img/cosine-similarities-800.webp differ
diff --git a/assets/img/dimensions-1400.webp b/assets/img/dimensions-1400.webp
new file mode 100644
index 00000000..d4b18b59
Binary files /dev/null and b/assets/img/dimensions-1400.webp differ
diff --git a/assets/img/dimensions-480.webp b/assets/img/dimensions-480.webp
new file mode 100644
index 00000000..e8f913e0
Binary files /dev/null and b/assets/img/dimensions-480.webp differ
diff --git a/assets/img/dimensions-800.webp b/assets/img/dimensions-800.webp
new file mode 100644
index 00000000..d4b18b59
Binary files /dev/null and b/assets/img/dimensions-800.webp differ
diff --git a/assets/img/evaluation-1400.webp b/assets/img/evaluation-1400.webp
new file mode 100644
index 00000000..fe90cfca
Binary files /dev/null and b/assets/img/evaluation-1400.webp differ
diff --git a/assets/img/evaluation-480.webp b/assets/img/evaluation-480.webp
new file mode 100644
index 00000000..8390d363
Binary files /dev/null and b/assets/img/evaluation-480.webp differ
diff --git a/assets/img/evaluation-800.webp b/assets/img/evaluation-800.webp
new file mode 100644
index 00000000..fe90cfca
Binary files /dev/null and b/assets/img/evaluation-800.webp differ
diff --git a/assets/img/image-examples-1400.webp b/assets/img/image-examples-1400.webp
new file mode 100644
index 00000000..7cd19607
Binary files /dev/null and b/assets/img/image-examples-1400.webp differ
diff --git a/assets/img/image-examples-480.webp b/assets/img/image-examples-480.webp
new file mode 100644
index 00000000..0ae09d49
Binary files /dev/null and b/assets/img/image-examples-480.webp differ
diff --git a/assets/img/image-examples-800.webp b/assets/img/image-examples-800.webp
new file mode 100644
index 00000000..7cd19607
Binary files /dev/null and b/assets/img/image-examples-800.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/1-1400.webp b/assets/img/img/2023-11-09-transformer_time/1-1400.webp
new file mode 100644
index 00000000..4ee82bd3
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/1-1400.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/1-480.webp b/assets/img/img/2023-11-09-transformer_time/1-480.webp
new file mode 100644
index 00000000..2e821cf0
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/1-480.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/1-800.webp b/assets/img/img/2023-11-09-transformer_time/1-800.webp
new file mode 100644
index 00000000..4ee82bd3
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/1-800.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/GRIN-1400.webp b/assets/img/img/2023-11-09-transformer_time/GRIN-1400.webp
new file mode 100644
index 00000000..6c58fd7f
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/GRIN-1400.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/GRIN-480.webp b/assets/img/img/2023-11-09-transformer_time/GRIN-480.webp
new file mode 100644
index 00000000..0cf1b9f3
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/GRIN-480.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/GRIN-800.webp b/assets/img/img/2023-11-09-transformer_time/GRIN-800.webp
new file mode 100644
index 00000000..6c58fd7f
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/GRIN-800.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/Informer-1400.webp b/assets/img/img/2023-11-09-transformer_time/Informer-1400.webp
new file mode 100644
index 00000000..7f868c10
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/Informer-1400.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/Informer-480.webp b/assets/img/img/2023-11-09-transformer_time/Informer-480.webp
new file mode 100644
index 00000000..15dfda53
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/Informer-480.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/Informer-800.webp b/assets/img/img/2023-11-09-transformer_time/Informer-800.webp
new file mode 100644
index 00000000..7f868c10
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/Informer-800.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/PatchTST-1400.webp b/assets/img/img/2023-11-09-transformer_time/PatchTST-1400.webp
new file mode 100644
index 00000000..75379231
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/PatchTST-1400.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/PatchTST-480.webp b/assets/img/img/2023-11-09-transformer_time/PatchTST-480.webp
new file mode 100644
index 00000000..17b2ef3b
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/PatchTST-480.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/PatchTST-800.webp b/assets/img/img/2023-11-09-transformer_time/PatchTST-800.webp
new file mode 100644
index 00000000..75379231
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/PatchTST-800.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/test1-1400.webp b/assets/img/img/2023-11-09-transformer_time/test1-1400.webp
new file mode 100644
index 00000000..e0cba6f2
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/test1-1400.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/test1-480.webp b/assets/img/img/2023-11-09-transformer_time/test1-480.webp
new file mode 100644
index 00000000..2e2d3d74
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/test1-480.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/test1-800.webp b/assets/img/img/2023-11-09-transformer_time/test1-800.webp
new file mode 100644
index 00000000..e0cba6f2
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/test1-800.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/test2-1400.webp b/assets/img/img/2023-11-09-transformer_time/test2-1400.webp
new file mode 100644
index 00000000..29002b8e
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/test2-1400.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/test2-480.webp b/assets/img/img/2023-11-09-transformer_time/test2-480.webp
new file mode 100644
index 00000000..e92e4e32
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/test2-480.webp differ
diff --git a/assets/img/img/2023-11-09-transformer_time/test2-800.webp b/assets/img/img/2023-11-09-transformer_time/test2-800.webp
new file mode 100644
index 00000000..29002b8e
Binary files /dev/null and b/assets/img/img/2023-11-09-transformer_time/test2-800.webp differ
diff --git a/assets/img/img1-1400.webp b/assets/img/img1-1400.webp
new file mode 100644
index 00000000..b901d1a1
Binary files /dev/null and b/assets/img/img1-1400.webp differ
diff --git a/assets/img/img1-480.webp b/assets/img/img1-480.webp
new file mode 100644
index 00000000..01dc3ba9
Binary files /dev/null and b/assets/img/img1-480.webp differ
diff --git a/assets/img/img1-800.webp b/assets/img/img1-800.webp
new file mode 100644
index 00000000..b901d1a1
Binary files /dev/null and b/assets/img/img1-800.webp differ
diff --git a/assets/img/img2-1400.webp b/assets/img/img2-1400.webp
new file mode 100644
index 00000000..867df317
Binary files /dev/null and b/assets/img/img2-1400.webp differ
diff --git a/assets/img/img2-480.webp b/assets/img/img2-480.webp
new file mode 100644
index 00000000..0bff06d3
Binary files /dev/null and b/assets/img/img2-480.webp differ
diff --git a/assets/img/img2-800.webp b/assets/img/img2-800.webp
new file mode 100644
index 00000000..867df317
Binary files /dev/null and b/assets/img/img2-800.webp differ
diff --git a/assets/img/img3-1400.webp b/assets/img/img3-1400.webp
new file mode 100644
index 00000000..4f8721b6
Binary files /dev/null and b/assets/img/img3-1400.webp differ
diff --git a/assets/img/img3-480.webp b/assets/img/img3-480.webp
new file mode 100644
index 00000000..f67c3c97
Binary files /dev/null and b/assets/img/img3-480.webp differ
diff --git a/assets/img/img3-800.webp b/assets/img/img3-800.webp
new file mode 100644
index 00000000..4f8721b6
Binary files /dev/null and b/assets/img/img3-800.webp differ
diff --git a/assets/img/img4-1400.webp b/assets/img/img4-1400.webp
new file mode 100644
index 00000000..5a60ce56
Binary files /dev/null and b/assets/img/img4-1400.webp differ
diff --git a/assets/img/img4-480.webp b/assets/img/img4-480.webp
new file mode 100644
index 00000000..3ed7b4be
Binary files /dev/null and b/assets/img/img4-480.webp differ
diff --git a/assets/img/img4-800.webp b/assets/img/img4-800.webp
new file mode 100644
index 00000000..5a60ce56
Binary files /dev/null and b/assets/img/img4-800.webp differ
diff --git a/assets/img/img5-1400.webp b/assets/img/img5-1400.webp
new file mode 100644
index 00000000..0e0f211d
Binary files /dev/null and b/assets/img/img5-1400.webp differ
diff --git a/assets/img/img5-480.webp b/assets/img/img5-480.webp
new file mode 100644
index 00000000..faf0ddb1
Binary files /dev/null and b/assets/img/img5-480.webp differ
diff --git a/assets/img/img5-800.webp b/assets/img/img5-800.webp
new file mode 100644
index 00000000..0e0f211d
Binary files /dev/null and b/assets/img/img5-800.webp differ
diff --git a/assets/img/img6-1400.webp b/assets/img/img6-1400.webp
new file mode 100644
index 00000000..a5a1b337
Binary files /dev/null and b/assets/img/img6-1400.webp differ
diff --git a/assets/img/img6-480.webp b/assets/img/img6-480.webp
new file mode 100644
index 00000000..112f2e1c
Binary files /dev/null and b/assets/img/img6-480.webp differ
diff --git a/assets/img/img6-800.webp b/assets/img/img6-800.webp
new file mode 100644
index 00000000..a5a1b337
Binary files /dev/null and b/assets/img/img6-800.webp differ
diff --git a/assets/img/init_cinit-1400.webp b/assets/img/init_cinit-1400.webp
new file mode 100644
index 00000000..61c49092
Binary files /dev/null and b/assets/img/init_cinit-1400.webp differ
diff --git a/assets/img/init_cinit-480.webp b/assets/img/init_cinit-480.webp
new file mode 100644
index 00000000..8c2955c4
Binary files /dev/null and b/assets/img/init_cinit-480.webp differ
diff --git a/assets/img/init_cinit-800.webp b/assets/img/init_cinit-800.webp
new file mode 100644
index 00000000..61c49092
Binary files /dev/null and b/assets/img/init_cinit-800.webp differ
diff --git a/assets/img/organizers/cg-1400.webp b/assets/img/organizers/cg-1400.webp
new file mode 100644
index 00000000..7d4f4383
Binary files /dev/null and b/assets/img/organizers/cg-1400.webp differ
diff --git a/assets/img/organizers/cg-480.webp b/assets/img/organizers/cg-480.webp
new file mode 100644
index 00000000..c4497e86
Binary files /dev/null and b/assets/img/organizers/cg-480.webp differ
diff --git a/assets/img/organizers/cg-800.webp b/assets/img/organizers/cg-800.webp
new file mode 100644
index 00000000..7d4f4383
Binary files /dev/null and b/assets/img/organizers/cg-800.webp differ
diff --git a/assets/img/organizers/cv-1400.webp b/assets/img/organizers/cv-1400.webp
new file mode 100644
index 00000000..3967f400
Binary files /dev/null and b/assets/img/organizers/cv-1400.webp differ
diff --git a/assets/img/organizers/cv-480.webp b/assets/img/organizers/cv-480.webp
new file mode 100644
index 00000000..8e5721ba
Binary files /dev/null and b/assets/img/organizers/cv-480.webp differ
diff --git a/assets/img/organizers/cv-800.webp b/assets/img/organizers/cv-800.webp
new file mode 100644
index 00000000..3967f400
Binary files /dev/null and b/assets/img/organizers/cv-800.webp differ
diff --git a/assets/img/organizers/dd-1400.webp b/assets/img/organizers/dd-1400.webp
new file mode 100644
index 00000000..b63f6c49
Binary files /dev/null and b/assets/img/organizers/dd-1400.webp differ
diff --git a/assets/img/organizers/dd-480.webp b/assets/img/organizers/dd-480.webp
new file mode 100644
index 00000000..57f80658
Binary files /dev/null and b/assets/img/organizers/dd-480.webp differ
diff --git a/assets/img/organizers/dd-800.webp b/assets/img/organizers/dd-800.webp
new file mode 100644
index 00000000..b63f6c49
Binary files /dev/null and b/assets/img/organizers/dd-800.webp differ
diff --git a/assets/img/organizers/gg-1400.webp b/assets/img/organizers/gg-1400.webp
new file mode 100644
index 00000000..4a8c5bd4
Binary files /dev/null and b/assets/img/organizers/gg-1400.webp differ
diff --git a/assets/img/organizers/gg-480.webp b/assets/img/organizers/gg-480.webp
new file mode 100644
index 00000000..ca12493c
Binary files /dev/null and b/assets/img/organizers/gg-480.webp differ
diff --git a/assets/img/organizers/gg-800.webp b/assets/img/organizers/gg-800.webp
new file mode 100644
index 00000000..4a8c5bd4
Binary files /dev/null and b/assets/img/organizers/gg-800.webp differ
diff --git a/assets/img/organizers/jb-1400.webp b/assets/img/organizers/jb-1400.webp
new file mode 100644
index 00000000..805ff806
Binary files /dev/null and b/assets/img/organizers/jb-1400.webp differ
diff --git a/assets/img/organizers/jb-480.webp b/assets/img/organizers/jb-480.webp
new file mode 100644
index 00000000..61136b49
Binary files /dev/null and b/assets/img/organizers/jb-480.webp differ
diff --git a/assets/img/organizers/jb-800.webp b/assets/img/organizers/jb-800.webp
new file mode 100644
index 00000000..805ff806
Binary files /dev/null and b/assets/img/organizers/jb-800.webp differ
diff --git a/assets/img/organizers/sb-1400.webp b/assets/img/organizers/sb-1400.webp
new file mode 100644
index 00000000..a7f4c274
Binary files /dev/null and b/assets/img/organizers/sb-1400.webp differ
diff --git a/assets/img/organizers/sb-480.webp b/assets/img/organizers/sb-480.webp
new file mode 100644
index 00000000..f3f51102
Binary files /dev/null and b/assets/img/organizers/sb-480.webp differ
diff --git a/assets/img/organizers/sb-800.webp b/assets/img/organizers/sb-800.webp
new file mode 100644
index 00000000..a7f4c274
Binary files /dev/null and b/assets/img/organizers/sb-800.webp differ
diff --git a/assets/img/pddl-explained-1400.webp b/assets/img/pddl-explained-1400.webp
new file mode 100644
index 00000000..2a50a77e
Binary files /dev/null and b/assets/img/pddl-explained-1400.webp differ
diff --git a/assets/img/pddl-explained-480.webp b/assets/img/pddl-explained-480.webp
new file mode 100644
index 00000000..816c6ac9
Binary files /dev/null and b/assets/img/pddl-explained-480.webp differ
diff --git a/assets/img/pddl-explained-800.webp b/assets/img/pddl-explained-800.webp
new file mode 100644
index 00000000..2a50a77e
Binary files /dev/null and b/assets/img/pddl-explained-800.webp differ
diff --git a/assets/img/predicate-example-1400.webp b/assets/img/predicate-example-1400.webp
new file mode 100644
index 00000000..7d5a63b2
Binary files /dev/null and b/assets/img/predicate-example-1400.webp differ
diff --git a/assets/img/predicate-example-480.webp b/assets/img/predicate-example-480.webp
new file mode 100644
index 00000000..21639723
Binary files /dev/null and b/assets/img/predicate-example-480.webp differ
diff --git a/assets/img/predicate-example-800.webp b/assets/img/predicate-example-800.webp
new file mode 100644
index 00000000..7d5a63b2
Binary files /dev/null and b/assets/img/predicate-example-800.webp differ
diff --git a/assets/js/common.js b/assets/js/common.js
index fc2f8f56..521235d2 100644
--- a/assets/js/common.js
+++ b/assets/js/common.js
@@ -1,9 +1 @@
-$(document).ready(function() {
-    $('a.abstract').click(function() {
-        $(this).parent().parent().find(".abstract.hidden").toggleClass('open');
-    });
-    $('a.bibtex').click(function() {
-        $(this).parent().parent().find(".bibtex.hidden").toggleClass('open');
-    });
-    $('a').removeClass('waves-effect waves-light');
-});
+$(document).ready(function(){$("a.abstract").click(function(){$(this).parent().parent().find(".abstract.hidden").toggleClass("open")}),$("a.bibtex").click(function(){$(this).parent().parent().find(".bibtex.hidden").toggleClass("open")}),$("a").removeClass("waves-effect waves-light")});
\ No newline at end of file
diff --git a/assets/js/dark_mode.js b/assets/js/dark_mode.js
index 863b273f..26312e44 100644
--- a/assets/js/dark_mode.js
+++ b/assets/js/dark_mode.js
@@ -1,8 +1 @@
-document.addEventListener('DOMContentLoaded', function() {
-    const mode_toggle = document.getElementById("light-toggle");
-
-    mode_toggle.addEventListener("click", function() {
-        toggleTheme(localStorage.getItem("theme"));
-    });
-});
-
+document.addEventListener("DOMContentLoaded",function(){document.getElementById("light-toggle").addEventListener("click",function(){toggleTheme(localStorage.getItem("theme"))})});
\ No newline at end of file
diff --git a/assets/js/distillpub/overrides.js b/assets/js/distillpub/overrides.js
index 65c636e0..066b8efa 100644
--- a/assets/js/distillpub/overrides.js
+++ b/assets/js/distillpub/overrides.js
@@ -1,24 +1 @@
-$(document).ready(function() {
-    // Override styles of the footnotes.
-    document.querySelectorAll("d-footnote").forEach(function(footnote) {
-        footnote.shadowRoot.querySelector("sup > span")
-            .setAttribute("style", "color: var(--global-theme-color);");
-        footnote.shadowRoot.querySelector("d-hover-box").shadowRoot.querySelector("style").sheet
-            .insertRule(".panel {background-color: var(--global-bg-color) !important;}");
-        footnote.shadowRoot.querySelector("d-hover-box").shadowRoot.querySelector("style").sheet
-            .insertRule(".panel {border-color: var(--global-divider-color) !important;}");
-    });
-    // Override styles of the citations.
-    document.querySelectorAll("d-cite").forEach(function(cite) {
-        cite.shadowRoot.querySelector("div > span")
-            .setAttribute("style", "color: var(--global-theme-color);");
-        cite.shadowRoot.querySelector("style").sheet
-            .insertRule("ul li a {color: var(--global-text-color) !important; text-decoration: none;}");
-        cite.shadowRoot.querySelector("style").sheet
-            .insertRule("ul li a:hover {color: var(--global-theme-color) !important;}");
-        cite.shadowRoot.querySelector("d-hover-box").shadowRoot.querySelector("style").sheet
-            .insertRule(".panel {background-color: var(--global-bg-color) !important;}");
-        cite.shadowRoot.querySelector("d-hover-box").shadowRoot.querySelector("style").sheet
-            .insertRule(".panel {border-color: var(--global-divider-color) !important;}");
-    });
-})
\ No newline at end of file
+$(document).ready(function(){document.querySelectorAll("d-footnote").forEach(function(o){o.shadowRoot.querySelector("sup > span").setAttribute("style","color: var(--global-theme-color);"),o.shadowRoot.querySelector("d-hover-box").shadowRoot.querySelector("style").sheet.insertRule(".panel {background-color: var(--global-bg-color) !important;}"),o.shadowRoot.querySelector("d-hover-box").shadowRoot.querySelector("style").sheet.insertRule(".panel {border-color: var(--global-divider-color) !important;}")}),document.querySelectorAll("d-cite").forEach(function(o){o.shadowRoot.querySelector("div > span").setAttribute("style","color: var(--global-theme-color);"),o.shadowRoot.querySelector("style").sheet.insertRule("ul li a {color: var(--global-text-color) !important; text-decoration: none;}"),o.shadowRoot.querySelector("style").sheet.insertRule("ul li a:hover {color: var(--global-theme-color) !important;}"),o.shadowRoot.querySelector("d-hover-box").shadowRoot.querySelector("style").sheet.insertRule(".panel {background-color: var(--global-bg-color) !important;}"),o.shadowRoot.querySelector("d-hover-box").shadowRoot.querySelector("style").sheet.insertRule(".panel {border-color: var(--global-divider-color) !important;}")})});
\ No newline at end of file
diff --git a/assets/js/distillpub/template.v2.js b/assets/js/distillpub/template.v2.js
index b0f94f0b..a05e82bb 100644
--- a/assets/js/distillpub/template.v2.js
+++ b/assets/js/distillpub/template.v2.js
@@ -1,9247 +1,67 @@
-(function (factory) {
-  typeof define === 'function' && define.amd ? define(factory) :
-  factory();
-}((function () { 'use strict';
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  const days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'];
-  const months = ['Jan.', 'Feb.', 'March', 'April', 'May', 'June', 'July', 'Aug.', 'Sept.', 'Oct.', 'Nov.', 'Dec.'];
-  const zeroPad = n => n < 10 ? '0' + n : n;
-
-  const RFC = function(date) {
-    const day = days[date.getDay()].substring(0, 3);
-    const paddedDate = zeroPad(date.getDate());
-    const month = months[date.getMonth()].substring(0,3);
-    const year = date.getFullYear().toString();
-    const hours = date.getUTCHours().toString();
-    const minutes = date.getUTCMinutes().toString();
-    const seconds = date.getUTCSeconds().toString();
-    return `${day}, ${paddedDate} ${month} ${year} ${hours}:${minutes}:${seconds} Z`;
-  };
-
-  const objectFromMap = function(map) {
-    const object = Array.from(map).reduce((object, [key, value]) => (
-      Object.assign(object, { [key]: value }) // Be careful! Maps can have non-String keys; object literals can't.
-    ), {});
-    return object;
-  };
-
-  const mapFromObject = function(object) {
-    const map = new Map();
-    for (var property in object) {
-      if (object.hasOwnProperty(property)) {
-        map.set(property, object[property]);
-      }
-    }
-    return map;
-  };
-
-  class Author {
-
-    // constructor(name='', personalURL='', affiliation='', affiliationURL='') {
-    //   this.name = name; // 'Chris Olah'
-    //   this.personalURL = personalURL; // 'https://colah.github.io'
-    //   this.affiliation = affiliation; // 'Google Brain'
-    //   this.affiliationURL = affiliationURL; // 'https://g.co/brain'
-    // }
-
-    constructor(object) {
-      this.name = object.author; // 'Chris Olah'
-      this.personalURL = object.authorURL; // 'https://colah.github.io'
-      this.affiliation = object.affiliation; // 'Google Brain'
-      this.affiliationURL = object.affiliationURL; // 'https://g.co/brain'
-      this.affiliations = object.affiliations || []; // new-style affiliations
-    }
-
-    // 'Chris'
-    get firstName() {
-      const names = this.name.split(' ');
-      return names.slice(0, names.length - 1).join(' ');
-    }
-
-    // 'Olah'
-    get lastName() {
-      const names = this.name.split(' ');
-      return names[names.length -1];
-    }
-  }
-
-  function mergeFromYMLFrontmatter(target, source) {
-    target.title = source.title;
-    if (source.published) {
-      if (source.published instanceof Date) {
-        target.publishedDate = source.published;
-      } else if (source.published.constructor === String) {
-        target.publishedDate = new Date(source.published);
-      }
-    }
-    if (source.publishedDate) {
-      if (source.publishedDate instanceof Date) {
-        target.publishedDate = source.publishedDate;
-      } else if (source.publishedDate.constructor === String) {
-        target.publishedDate = new Date(source.publishedDate);
-      } else {
-        console.error('Don\'t know what to do with published date: ' + source.publishedDate);
-      }
-    }
-    target.description = source.description;
-    target.authors = source.authors.map( (authorObject) => new Author(authorObject));
-    target.katex = source.katex;
-    target.password = source.password;
-    if (source.doi) {
-      target.doi = source.doi;
-    }
-  }
-
-  class FrontMatter {
-    constructor() {
-      this.title = 'unnamed article'; // 'Attention and Augmented Recurrent Neural Networks'
-      this.description = ''; // 'A visual overview of neural attention...'
-      this.authors = []; // Array of Author(s)
-
-      this.bibliography = new Map();
-      this.bibliographyParsed = false;
-      //  {
-      //    'gregor2015draw': {
-      //      'title': 'DRAW: A recurrent neural network for image generation',
-      //      'author': 'Gregor, Karol and Danihelka, Ivo and Graves, Alex and Rezende, Danilo Jimenez and Wierstra, Daan',
-      //      'journal': 'arXiv preprint arXiv:1502.04623',
-      //      'year': '2015',
-      //      'url': 'https://arxiv.org/pdf/1502.04623.pdf',
-      //      'type': 'article'
-      //    },
-      //  }
-
-      // Citation keys should be listed in the order that they are appear in the document.
-      // Each key refers to a key in the bibliography dictionary.
-      this.citations = []; // [ 'gregor2015draw', 'mercier2011humans' ]
-      this.citationsCollected = false;
-
-      //
-      // Assigned from posts.csv
-      //
-
-      //  publishedDate: 2016-09-08T07:00:00.000Z,
-      //  tags: [ 'rnn' ],
-      //  distillPath: '2016/augmented-rnns',
-      //  githubPath: 'distillpub/post--augmented-rnns',
-      //  doiSuffix: 1,
-
-      //
-      // Assigned from journal
-      //
-      this.journal = {};
-      //  journal: {
-      //    'title': 'Distill',
-      //    'full_title': 'Distill',
-      //    'abbrev_title': 'Distill',
-      //    'url': 'http://distill.pub',
-      //    'doi': '10.23915/distill',
-      //    'publisherName': 'Distill Working Group',
-      //    'publisherEmail': 'admin@distill.pub',
-      //    'issn': '2476-0757',
-      //    'editors': [...],
-      //    'committee': [...]
-      //  }
-      //  volume: 1,
-      //  issue: 9,
-
-      this.katex = {};
-
-      //
-      // Assigned from publishing process
-      //
-
-      //  githubCompareUpdatesUrl: 'https://github.com/distillpub/post--augmented-rnns/compare/1596e094d8943d2dc0ea445d92071129c6419c59...3bd9209e0c24d020f87cf6152dcecc6017cbc193',
-      //  updatedDate: 2017-03-21T07:13:16.000Z,
-      //  doi: '10.23915/distill.00001',
-      this.doi = undefined;
-      this.publishedDate = undefined;
-    }
-
-    // Example:
-    // title: Demo Title Attention and Augmented Recurrent Neural Networks
-    // published: Jan 10, 2017
-    // authors:
-    // - Chris Olah:
-    // - Shan Carter: http://shancarter.com
-    // affiliations:
-    // - Google Brain:
-    // - Google Brain: http://g.co/brain
-
-    //
-    // Computed Properties
-    //
-
-    // 'http://distill.pub/2016/augmented-rnns',
-    set url(value) {
-      this._url = value;
-    }
-    get url() {
-      if (this._url) {
-        return this._url;
-      } else if (this.distillPath && this.journal.url) {
-        return this.journal.url + '/' + this.distillPath;
-      } else if (this.journal.url) {
-        return this.journal.url;
-      }
-    }
-
-    // 'https://github.com/distillpub/post--augmented-rnns',
-    get githubUrl() {
-      if (this.githubPath) {
-        return 'https://github.com/' + this.githubPath;
-      } else {
-        return undefined;
-      }
-    }
-
-    // TODO resolve differences in naming of URL/Url/url.
-    // 'http://distill.pub/2016/augmented-rnns/thumbnail.jpg',
-    set previewURL(value) {
-      this._previewURL = value;
-    }
-    get previewURL() {
-      return this._previewURL ? this._previewURL : this.url + '/thumbnail.jpg';
-    }
-
-    // 'Thu, 08 Sep 2016 00:00:00 -0700',
-    get publishedDateRFC() {
-      return RFC(this.publishedDate);
-    }
-
-    // 'Thu, 08 Sep 2016 00:00:00 -0700',
-    get updatedDateRFC() {
-      return RFC(this.updatedDate);
-    }
-
-    // 2016,
-    get publishedYear() {
-      return this.publishedDate.getFullYear();
-    }
-
-    // 'Sept',
-    get publishedMonth() {
-      return months[this.publishedDate.getMonth()];
-    }
-
-    // 8,
-    get publishedDay() {
-      return this.publishedDate.getDate();
-    }
-
-    // '09',
-    get publishedMonthPadded() {
-      return zeroPad(this.publishedDate.getMonth() + 1);
-    }
-
-    // '08',
-    get publishedDayPadded() {
-      return zeroPad(this.publishedDate.getDate());
-    }
-
-    get publishedISODateOnly() {
-      return this.publishedDate.toISOString().split('T')[0];
-    }
-
-    get volume() {
-      const volume = this.publishedYear - 2015;
-      if (volume < 1) {
-        throw new Error('Invalid publish date detected during computing volume');
-      }
-      return volume;
-    }
-
-    get issue() {
-      return this.publishedDate.getMonth() + 1;
-    }
-
-    // 'Olah & Carter',
-    get concatenatedAuthors() {
-      if (this.authors.length > 2) {
-        return this.authors[0].lastName + ', et al.';
-      } else if (this.authors.length === 2) {
-        return this.authors[0].lastName + ' & ' + this.authors[1].lastName;
-      } else if (this.authors.length === 1) {
-        return this.authors[0].lastName;
-      }
-    }
-
-    // 'Olah, Chris and Carter, Shan',
-    get bibtexAuthors() {
-      return this.authors.map(author => {
-        return author.lastName + ', ' + author.firstName;
-      }).join(' and ');
-    }
-
-    // 'olah2016attention'
-    get slug() {
-      let slug = '';
-      if (this.authors.length) {
-        slug += this.authors[0].lastName.toLowerCase();
-        slug += this.publishedYear;
-        slug += this.title.split(' ')[0].toLowerCase();
-      }
-      return slug || 'Untitled';
-    }
-
-    get bibliographyEntries() {
-      return new Map(this.citations.map( citationKey => {
-        const entry = this.bibliography.get(citationKey);
-        return [citationKey, entry];
-      }));
-    }
-
-    set bibliography(bibliography) {
-      if (bibliography instanceof Map) {
-        this._bibliography = bibliography;
-      } else if (typeof bibliography === 'object') {
-        this._bibliography = mapFromObject(bibliography);
-      }
-    }
-
-    get bibliography() {
-      return this._bibliography;
-    }
-
-    static fromObject(source) {
-      const frontMatter = new FrontMatter();
-      Object.assign(frontMatter, source);
-      return frontMatter;
-    }
-
-    assignToObject(target) {
-      Object.assign(target, this);
-      target.bibliography = objectFromMap(this.bibliographyEntries);
-      target.url = this.url;
-      target.doi = this.doi;
-      target.githubUrl = this.githubUrl;
-      target.previewURL = this.previewURL;
-      if (this.publishedDate) {
-        target.volume = this.volume;
-        target.issue = this.issue;
-        target.publishedDateRFC = this.publishedDateRFC;
-        target.publishedYear = this.publishedYear;
-        target.publishedMonth = this.publishedMonth;
-        target.publishedDay = this.publishedDay;
-        target.publishedMonthPadded = this.publishedMonthPadded;
-        target.publishedDayPadded = this.publishedDayPadded;
-      }
-      if (this.updatedDate) {
-        target.updatedDateRFC = this.updatedDateRFC;
-      }
-      target.concatenatedAuthors = this.concatenatedAuthors;
-      target.bibtexAuthors = this.bibtexAuthors;
-      target.slug = this.slug;
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  const Mutating = (superclass) => {
-    return class extends superclass {
-
-      constructor() {
-        super();
-
-        // set up mutation observer
-        const options = {childList: true, characterData: true, subtree: true};
-        const observer = new MutationObserver( () => {
-          observer.disconnect();
-          this.renderIfPossible();
-          observer.observe(this, options);
-        });
-
-        // ...and listen for changes
-        observer.observe(this, options);
-      }
-
-      connectedCallback() {
-        super.connectedCallback();
-
-        this.renderIfPossible();
-      }
-
-      // potential TODO: check if this is enough for all our usecases
-      // maybe provide a custom function to tell if we have enough information to render
-      renderIfPossible() {
-        if (this.textContent && this.root) {
-          this.renderContent();
-        }
-      }
-
-      renderContent() {
-        console.error(`Your class ${this.constructor.name} must provide a custom renderContent() method!` );
-      }
-
-    }; // end class
-  }; // end mixin function
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  /*global ShadyCSS*/
-
-  const Template = (name, templateString, useShadow = true) => {
-
-    return (superclass) => {
-
-      const template = document.createElement('template');
-      template.innerHTML = templateString;
-
-      if (useShadow && 'ShadyCSS' in window) {
-        ShadyCSS.prepareTemplate(template, name);
-      }
-
-      return class extends superclass {
-
-        static get is() { return name; }
-
-        constructor() {
-          super();
-
-          this.clone = document.importNode(template.content, true);
-          if (useShadow) {
-            this.attachShadow({mode: 'open'});
-            this.shadowRoot.appendChild(this.clone);
-          }
-        }
-
-        connectedCallback() {
-          if (this.hasAttribute('distill-prerendered')) {
-            return;
-          }
-          if (useShadow) {
-            if ('ShadyCSS' in window) {
-              ShadyCSS.styleElement(this);
-            }
-          } else {
-            this.insertBefore(this.clone, this.firstChild);
-          }
-        }
-
-        get root() {
-          if (useShadow) {
-            return this.shadowRoot;
-          } else {
-            return this;
-          }
-        }
-
-        /* TODO: Are we using these? Should we even? */
-        $(query) {
-          return this.root.querySelector(query);
-        }
-
-        $$(query) {
-          return this.root.querySelectorAll(query);
-        }
-      };
-    };
-  };
-
-  var math = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nspan.katex-display {\n  text-align: left;\n  padding: 8px 0 8px 0;\n  margin: 0.5em 0 0.5em 1em;\n}\n\nspan.katex {\n  -webkit-font-smoothing: antialiased;\n  color: rgba(0, 0, 0, 0.8);\n  font-size: 1.18em;\n}\n";
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  // This is a straight concatenation of code from KaTeX's contrib folder,
-  // but we aren't using some of their helpers that don't work well outside a browser environment.
-
-  /*global katex */
-
-  const findEndOfMath = function(delimiter, text, startIndex) {
-    // Adapted from
-    // https://github.com/Khan/perseus/blob/master/src/perseus-markdown.jsx
-    let index = startIndex;
-    let braceLevel = 0;
-
-    const delimLength = delimiter.length;
-
-    while (index < text.length) {
-      const character = text[index];
-
-      if (
-        braceLevel <= 0 &&
-        text.slice(index, index + delimLength) === delimiter
-      ) {
-        return index;
-      } else if (character === "\\") {
-        index++;
-      } else if (character === "{") {
-        braceLevel++;
-      } else if (character === "}") {
-        braceLevel--;
-      }
-
-      index++;
-    }
-
-    return -1;
-  };
-
-  const splitAtDelimiters = function(startData, leftDelim, rightDelim, display) {
-    const finalData = [];
-
-    for (let i = 0; i < startData.length; i++) {
-      if (startData[i].type === "text") {
-        const text = startData[i].data;
-
-        let lookingForLeft = true;
-        let currIndex = 0;
-        let nextIndex;
-
-        nextIndex = text.indexOf(leftDelim);
-        if (nextIndex !== -1) {
-          currIndex = nextIndex;
-          finalData.push({
-            type: "text",
-            data: text.slice(0, currIndex)
-          });
-          lookingForLeft = false;
-        }
-
-        while (true) {
-          // eslint-disable-line no-constant-condition
-          if (lookingForLeft) {
-            nextIndex = text.indexOf(leftDelim, currIndex);
-            if (nextIndex === -1) {
-              break;
-            }
-
-            finalData.push({
-              type: "text",
-              data: text.slice(currIndex, nextIndex)
-            });
-
-            currIndex = nextIndex;
-          } else {
-            nextIndex = findEndOfMath(
-              rightDelim,
-              text,
-              currIndex + leftDelim.length
-            );
-            if (nextIndex === -1) {
-              break;
-            }
-
-            finalData.push({
-              type: "math",
-              data: text.slice(currIndex + leftDelim.length, nextIndex),
-              rawData: text.slice(currIndex, nextIndex + rightDelim.length),
-              display: display
-            });
-
-            currIndex = nextIndex + rightDelim.length;
-          }
-
-          lookingForLeft = !lookingForLeft;
-        }
-
-        finalData.push({
-          type: "text",
-          data: text.slice(currIndex)
-        });
-      } else {
-        finalData.push(startData[i]);
-      }
-    }
-
-    return finalData;
-  };
-
-  const splitWithDelimiters = function(text, delimiters) {
-    let data = [{ type: "text", data: text }];
-    for (let i = 0; i < delimiters.length; i++) {
-      const delimiter = delimiters[i];
-      data = splitAtDelimiters(
-        data,
-        delimiter.left,
-        delimiter.right,
-        delimiter.display || false
-      );
-    }
-    return data;
-  };
-
-  /* Note: optionsCopy is mutated by this method. If it is ever exposed in the
-   * API, we should copy it before mutating.
-   */
-  const renderMathInText = function(text, optionsCopy) {
-    const data = splitWithDelimiters(text, optionsCopy.delimiters);
-    const fragment = document.createDocumentFragment();
-
-    for (let i = 0; i < data.length; i++) {
-      if (data[i].type === "text") {
-        fragment.appendChild(document.createTextNode(data[i].data));
-      } else {
-        const tag = document.createElement("d-math");
-        const math = data[i].data;
-        // Override any display mode defined in the settings with that
-        // defined by the text itself
-        optionsCopy.displayMode = data[i].display;
-        try {
-          tag.textContent = math;
-          if (optionsCopy.displayMode) {
-            tag.setAttribute("block", "");
-          }
-        } catch (e) {
-          if (!(e instanceof katex.ParseError)) {
-            throw e;
-          }
-          optionsCopy.errorCallback(
-            "KaTeX auto-render: Failed to parse `" + data[i].data + "` with ",
-            e
-          );
-          fragment.appendChild(document.createTextNode(data[i].rawData));
-          continue;
-        }
-        fragment.appendChild(tag);
-      }
-    }
-
-    return fragment;
-  };
-
-  const renderElem = function(elem, optionsCopy) {
-    for (let i = 0; i < elem.childNodes.length; i++) {
-      const childNode = elem.childNodes[i];
-      if (childNode.nodeType === 3) {
-        // Text node
-        const text = childNode.textContent;
-        if (optionsCopy.mightHaveMath(text)) {
-          const frag = renderMathInText(text, optionsCopy);
-          i += frag.childNodes.length - 1;
-          elem.replaceChild(frag, childNode);
-        }
-      } else if (childNode.nodeType === 1) {
-        // Element node
-        const shouldRender =
-          optionsCopy.ignoredTags.indexOf(childNode.nodeName.toLowerCase()) ===
-          -1;
-
-        if (shouldRender) {
-          renderElem(childNode, optionsCopy);
-        }
-      }
-      // Otherwise, it's something else, and ignore it.
-    }
-  };
-
-  const defaultAutoRenderOptions = {
-    delimiters: [
-      { left: "$$", right: "$$", display: true },
-      { left: "\\[", right: "\\]", display: true },
-      { left: "\\(", right: "\\)", display: false }
-      // LaTeX uses this, but it ruins the display of normal `$` in text:
-      // {left: '$', right: '$', display: false},
-    ],
-
-    ignoredTags: [
-      "script",
-      "noscript",
-      "style",
-      "textarea",
-      "pre",
-      "code",
-      "svg"
-    ],
-
-    errorCallback: function(msg, err) {
-      console.error(msg, err);
-    }
-  };
-
-  const renderMathInElement = function(elem, options) {
-    if (!elem) {
-      throw new Error("No element provided to render");
-    }
-
-    const optionsCopy = Object.assign({}, defaultAutoRenderOptions, options);
-    const delimiterStrings = optionsCopy.delimiters.flatMap(d => [
-      d.left,
-      d.right
-    ]);
-    const mightHaveMath = text =>
-      delimiterStrings.some(d => text.indexOf(d) !== -1);
-    optionsCopy.mightHaveMath = mightHaveMath;
-    renderElem(elem, optionsCopy);
-  };
-
-  // Copyright 2018 The Distill Template Authors
-
-  const katexJSURL = 'https://distill.pub/third-party/katex/katex.min.js';
-  const katexCSSTag = '<link rel="stylesheet" href="https://distill.pub/third-party/katex/katex.min.css" crossorigin="anonymous">';
-
-  const T = Template('d-math', `
-${katexCSSTag}
-<style>
-
-:host {
-  display: inline-block;
-  contain: style;
-}
-
-:host([block]) {
-  display: block;
-}
-
-${math}
-</style>
-<span id='katex-container'></span>
-`);
-
-  // DMath, not Math, because that would conflict with the JS built-in
-  class DMath extends Mutating(T(HTMLElement)) {
-
-    static set katexOptions(options) {
-      DMath._katexOptions = options;
-      if (DMath.katexOptions.delimiters) {
-        if (!DMath.katexAdded) {
-          DMath.addKatex();
-        } else {
-          DMath.katexLoadedCallback();
-        }
-      }
-    }
-
-    static get katexOptions() {
-      if (!DMath._katexOptions) {
-        DMath._katexOptions = {
-          delimiters: [ { 'left':'$$', 'right':'$$', 'display': false } ]
-        };
-      }
-      return DMath._katexOptions;
-    }
-
-    static katexLoadedCallback() {
-      // render all d-math tags
-      const mathTags = document.querySelectorAll('d-math');
-      for (const mathTag of mathTags) {
-        mathTag.renderContent();
-      }
-      // transform inline delimited math to d-math tags
-      if (DMath.katexOptions.delimiters) {
-        renderMathInElement(document.body, DMath.katexOptions);
-      }
-    }
-
-    static addKatex() {
-      // css tag can use this convenience function
-      document.head.insertAdjacentHTML('beforeend', katexCSSTag);
-      // script tag has to be created to work properly
-      const scriptTag = document.createElement('script');
-      scriptTag.src = katexJSURL;
-      scriptTag.async = true;
-      scriptTag.onload = DMath.katexLoadedCallback;
-      scriptTag.crossorigin = 'anonymous';
-      document.head.appendChild(scriptTag);
-
-      DMath.katexAdded = true;
-    }
-
-    get options() {
-      const localOptions = { displayMode: this.hasAttribute('block') };
-      return Object.assign(localOptions, DMath.katexOptions);
-    }
-
-    connectedCallback() {
-      super.connectedCallback();
-      if (!DMath.katexAdded) {
-        DMath.addKatex();
-      }
-    }
-
-    renderContent() {
-      if (typeof katex !== 'undefined') {
-        const container = this.root.querySelector('#katex-container');
-        katex.render(this.textContent, container, this.options);
-      }
-    }
-
-  }
-
-  DMath.katexAdded = false;
-  DMath.inlineMathRendered = false;
-  window.DMath = DMath; // TODO: check if this can be removed, or if we should expose a distill global
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  function collect_citations(dom = document) {
-    const citations = new Set();
-    const citeTags = dom.querySelectorAll("d-cite");
-    for (const tag of citeTags) {
-      const keyString = tag.getAttribute("key") || tag.getAttribute("bibtex-key");
-      const keys = keyString.split(",").map(k => k.trim());
-      for (const key of keys) {
-        citations.add(key);
-      }
-    }
-    return [...citations];
-  }
-
-  function author_string(ent, template, sep, finalSep) {
-    if (ent.author == null) {
-      return "";
-    }
-    var names = ent.author.split(" and ");
-    let name_strings = names.map(name => {
-      name = name.trim();
-      if (name.indexOf(",") != -1) {
-        var last = name.split(",")[0].trim();
-        var firsts = name.split(",")[1];
-      } else if (name.indexOf(" ") != -1) {
-        var last = name
-          .split(" ")
-          .slice(-1)[0]
-          .trim();
-        var firsts = name
-          .split(" ")
-          .slice(0, -1)
-          .join(" ");
-      } else {
-        var last = name.trim();
-      }
-      var initials = "";
-      if (firsts != undefined) {
-        initials = firsts
-          .trim()
-          .split(" ")
-          .map(s => s.trim()[0]);
-        initials = initials.join(".") + ".";
-      }
-      return template
-        .replace("${F}", firsts)
-        .replace("${L}", last)
-        .replace("${I}", initials)
-        .trim(); // in case one of first or last was empty
-    });
-    if (names.length > 1) {
-      var str = name_strings.slice(0, names.length - 1).join(sep);
-      str += (finalSep || sep) + name_strings[names.length - 1];
-      return str;
-    } else {
-      return name_strings[0];
-    }
-  }
-
-  function venue_string(ent) {
-    var cite = ent.journal || ent.booktitle || "";
-    if ("volume" in ent) {
-      var issue = ent.issue || ent.number;
-      issue = issue != undefined ? "(" + issue + ")" : "";
-      cite += ", Vol " + ent.volume + issue;
-    }
-    if ("pages" in ent) {
-      cite += ", pp. " + ent.pages;
-    }
-    if (cite != "") cite += ". ";
-    if ("publisher" in ent) {
-      cite += ent.publisher;
-      if (cite[cite.length - 1] != ".") cite += ".";
-    }
-    return cite;
-  }
-
-  function link_string(ent) {
-    if ("url" in ent) {
-      var url = ent.url;
-      var arxiv_match = /arxiv\.org\/abs\/([0-9\.]*)/.exec(url);
-      if (arxiv_match != null) {
-        url = `http://arxiv.org/pdf/${arxiv_match[1]}.pdf`;
-      }
-
-      if (url.slice(-4) == ".pdf") {
-        var label = "PDF";
-      } else if (url.slice(-5) == ".html") {
-        var label = "HTML";
-      }
-      return ` &ensp;<a href="${url}">[${label || "link"}]</a>`;
-    } /* else if ("doi" in ent){
-      return ` &ensp;<a href="https://doi.org/${ent.doi}" >[DOI]</a>`;
-    }*/ else {
-      return "";
-    }
-  }
-  function doi_string(ent, new_line) {
-    if ("doi" in ent) {
-      return `${new_line ? "<br>" : ""} <a href="https://doi.org/${
-      ent.doi
-    }" style="text-decoration:inherit;">DOI: ${ent.doi}</a>`;
-    } else {
-      return "";
-    }
-  }
-
-  function title_string(ent) {
-    return '<span class="title">' + ent.title + "</span> ";
-  }
-
-  function bibliography_cite(ent, fancy) {
-    if (ent) {
-      var cite = title_string(ent);
-      cite += link_string(ent) + "<br>";
-      if (ent.author) {
-        cite += author_string(ent, "${L}, ${I}", ", ", " and ");
-        if (ent.year || ent.date) {
-          cite += ", ";
-        }
-      }
-      if (ent.year || ent.date) {
-        cite += (ent.year || ent.date) + ". ";
-      } else {
-        cite += ". ";
-      }
-      cite += venue_string(ent);
-      cite += doi_string(ent);
-      return cite;
-      /*var cite =  author_string(ent, "${L}, ${I}", ", ", " and ");
-      if (ent.year || ent.date){
-        cite += ", " + (ent.year || ent.date) + ". "
-      } else {
-        cite += ". "
-      }
-      cite += "<b>" + ent.title + "</b>. ";
-      cite += venue_string(ent);
-      cite += doi_string(ent);
-      cite += link_string(ent);
-      return cite*/
-    } else {
-      return "?";
-    }
-  }
-
-  function hover_cite(ent) {
-    if (ent) {
-      var cite = "";
-      cite += "<strong>" + ent.title + "</strong>";
-      cite += link_string(ent);
-      cite += "<br>";
-
-      var a_str = author_string(ent, "${I} ${L}", ", ") + ".";
-      var v_str =
-        venue_string(ent).trim() + " " + ent.year + ". " + doi_string(ent, true);
-
-      if ((a_str + v_str).length < Math.min(40, ent.title.length)) {
-        cite += a_str + " " + v_str;
-      } else {
-        cite += a_str + "<br>" + v_str;
-      }
-      return cite;
-    } else {
-      return "?";
-    }
-  }
-
-  function domContentLoaded() {
-    return ['interactive', 'complete'].indexOf(document.readyState) !== -1;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  function _moveLegacyAffiliationFormatIntoArray(frontMatter) {
-    // authors used to have propoerties "affiliation" and "affiliationURL".
-    // We now encourage using an array for affiliations containing objects with
-    // properties "name" and "url".
-    for (let author of frontMatter.authors) {
-      const hasOldStyle = Boolean(author.affiliation);
-      const hasNewStyle = Boolean(author.affiliations);
-      if (!hasOldStyle) continue;
-      if (hasNewStyle) {
-        console.warn(`Author ${author.author} has both old-style ("affiliation" & "affiliationURL") and new style ("affiliations") affiliation information!`);
-      } else {
-        let newAffiliation = {
-          "name": author.affiliation
-        };
-        if (author.affiliationURL) newAffiliation.url = author.affiliationURL;
-        author.affiliations = [newAffiliation];
-      }
-    }
-    return frontMatter
-  }
-
-  function parseFrontmatter(element) {
-    const scriptTag = element.firstElementChild;
-    if (scriptTag) {
-      const type = scriptTag.getAttribute('type');
-      if (type.split('/')[1] == 'json') {
-        const content = scriptTag.textContent;
-        const parsed = JSON.parse(content);
-        return _moveLegacyAffiliationFormatIntoArray(parsed);
-      } else {
-        console.error('Distill only supports JSON frontmatter tags anymore; no more YAML.');
-      }
-    } else {
-      console.error('You added a frontmatter tag but did not provide a script tag with front matter data in it. Please take a look at our templates.');
-    }
-    return {};
-  }
-
-  class FrontMatter$1 extends HTMLElement {
-
-    static get is() { return 'd-front-matter'; }
-
-    constructor() {
-      super();
-
-      const options = {childList: true, characterData: true, subtree: true};
-      const observer = new MutationObserver( (entries) => {
-        for (const entry of entries) {
-          if (entry.target.nodeName === 'SCRIPT' || entry.type === 'characterData') {
-            const data = parseFrontmatter(this);
-            this.notify(data);
-          }
-        }
-      });
-      observer.observe(this, options);
-    }
-
-    notify(data) {
-      const options = { detail: data, bubbles: true };
-      const event = new CustomEvent('onFrontMatterChanged', options);
-      document.dispatchEvent(event);
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  // no appendix -> add appendix
-  // title in front, no h1 -> add it
-  // no title in front, h1 -> read and put into frontMatter
-  // footnote -> footnote list
-  // break up bib
-  // if citation, no bib-list -> add citation-list
-
-  // if authors, no byline -> add byline
-
-  function optionalComponents(dom, data) {
-    const body = dom.body;
-    const article = body.querySelector('d-article');
-
-    // If we don't have an article tag, something weird is going on—giving up.
-    if (!article) {
-      console.warn('No d-article tag found; skipping adding optional components!');
-      return;
-    }
-
-    let byline = dom.querySelector('d-byline');
-    if (!byline) {
-      if (data.authors) {
-        byline = dom.createElement('d-byline');
-        body.insertBefore(byline, article);
-      } else {
-        console.warn('No authors found in front matter; please add them before submission!');
-      }
-    }
-
-    let title = dom.querySelector('d-title');
-    if (!title) {
-      title = dom.createElement('d-title');
-      body.insertBefore(title, byline);
-    }
-
-    let h1 = title.querySelector('h1');
-    if (!h1) {
-      h1 = dom.createElement('h1');
-      h1.textContent = data.title;
-      title.insertBefore(h1, title.firstChild);
-    }
-
-    const hasPassword = typeof data.password !== 'undefined';
-    let interstitial = body.querySelector('d-interstitial');
-    if (hasPassword && !interstitial) {
-      const inBrowser = typeof window !== 'undefined';
-      const onLocalhost = inBrowser && window.location.hostname.includes('localhost');
-      if (!inBrowser || !onLocalhost) {
-        interstitial = dom.createElement('d-interstitial');
-        interstitial.password = data.password;
-        body.insertBefore(interstitial, body.firstChild);
-      }
-    } else if (!hasPassword && interstitial) {
-      interstitial.parentElement.removeChild(this);
-    }
-
-    let appendix = dom.querySelector('d-appendix');
-    if (!appendix) {
-      appendix = dom.createElement('d-appendix');
-      dom.body.appendChild(appendix);
-    }
-
-    let footnoteList = dom.querySelector('d-footnote-list');
-    if (!footnoteList) {
-      footnoteList = dom.createElement('d-footnote-list');
-      appendix.appendChild(footnoteList);
-    }
-
-    let citationList = dom.querySelector('d-citation-list');
-    if (!citationList) {
-      citationList = dom.createElement('d-citation-list');
-      appendix.appendChild(citationList);
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const frontMatter = new FrontMatter();
-
-  const Controller = {
-    frontMatter: frontMatter,
-    waitingOn: {
-      bibliography: [],
-      citations: []
-    },
-    listeners: {
-      onCiteKeyCreated(event) {
-        const [citeTag, keys] = event.detail;
-
-        // ensure we have citations
-        if (!frontMatter.citationsCollected) {
-          // console.debug('onCiteKeyCreated, but unresolved dependency ("citations"). Enqueing.');
-          Controller.waitingOn.citations.push(() =>
-            Controller.listeners.onCiteKeyCreated(event)
-          );
-          return;
-        }
-
-        // ensure we have a loaded bibliography
-        if (!frontMatter.bibliographyParsed) {
-          // console.debug('onCiteKeyCreated, but unresolved dependency ("bibliography"). Enqueing.');
-          Controller.waitingOn.bibliography.push(() =>
-            Controller.listeners.onCiteKeyCreated(event)
-          );
-          return;
-        }
-
-        const numbers = keys.map(key => frontMatter.citations.indexOf(key));
-        citeTag.numbers = numbers;
-        const entries = keys.map(key => frontMatter.bibliography.get(key));
-        citeTag.entries = entries;
-      },
-
-      onCiteKeyChanged() {
-        // const [citeTag, keys] = event.detail;
-
-        // update citations
-        frontMatter.citations = collect_citations();
-        frontMatter.citationsCollected = true;
-        for (const waitingCallback of Controller.waitingOn.citations.slice()) {
-          waitingCallback();
-        }
-
-        // update bibliography
-        const citationListTag = document.querySelector("d-citation-list");
-        const bibliographyEntries = new Map(
-          frontMatter.citations.map(citationKey => {
-            return [citationKey, frontMatter.bibliography.get(citationKey)];
-          })
-        );
-        citationListTag.citations = bibliographyEntries;
-
-        const citeTags = document.querySelectorAll("d-cite");
-        for (const citeTag of citeTags) {
-          console.log(citeTag);
-          const keys = citeTag.keys;
-          const numbers = keys.map(key => frontMatter.citations.indexOf(key));
-          citeTag.numbers = numbers;
-          const entries = keys.map(key => frontMatter.bibliography.get(key));
-          citeTag.entries = entries;
-        }
-      },
-
-      onCiteKeyRemoved(event) {
-        Controller.listeners.onCiteKeyChanged(event);
-      },
-
-      onBibliographyChanged(event) {
-        const citationListTag = document.querySelector("d-citation-list");
-
-        const bibliography = event.detail;
-
-        frontMatter.bibliography = bibliography;
-        frontMatter.bibliographyParsed = true;
-        for (const waitingCallback of Controller.waitingOn.bibliography.slice()) {
-          waitingCallback();
-        }
-
-        // ensure we have citations
-        if (!frontMatter.citationsCollected) {
-          Controller.waitingOn.citations.push(function() {
-            Controller.listeners.onBibliographyChanged({
-              target: event.target,
-              detail: event.detail
-            });
-          });
-          return;
-        }
-
-        if (citationListTag.hasAttribute("distill-prerendered")) {
-          console.debug("Citation list was prerendered; not updating it.");
-        } else {
-          const entries = new Map(
-            frontMatter.citations.map(citationKey => {
-              return [citationKey, frontMatter.bibliography.get(citationKey)];
-            })
-          );
-          citationListTag.citations = entries;
-        }
-      },
-
-      onFootnoteChanged() {
-        // const footnote = event.detail;
-        //TODO: optimize to only update current footnote
-        const footnotesList = document.querySelector("d-footnote-list");
-        if (footnotesList) {
-          const footnotes = document.querySelectorAll("d-footnote");
-          footnotesList.footnotes = footnotes;
-        }
-      },
-
-      onFrontMatterChanged(event) {
-        const data = event.detail;
-        mergeFromYMLFrontmatter(frontMatter, data);
-
-        const interstitial = document.querySelector("d-interstitial");
-        if (interstitial) {
-          if (typeof frontMatter.password !== "undefined") {
-            interstitial.password = frontMatter.password;
-          } else {
-            interstitial.parentElement.removeChild(interstitial);
-          }
-        }
-
-        const prerendered = document.body.hasAttribute("distill-prerendered");
-        if (!prerendered && domContentLoaded()) {
-          optionalComponents(document, frontMatter);
-
-          const appendix = document.querySelector("distill-appendix");
-          if (appendix) {
-            appendix.frontMatter = frontMatter;
-          }
-
-          const byline = document.querySelector("d-byline");
-          if (byline) {
-            byline.frontMatter = frontMatter;
-          }
-
-          if (data.katex) {
-            DMath.katexOptions = data.katex;
-          }
-        }
-      },
-
-      DOMContentLoaded() {
-        if (Controller.loaded) {
-          console.warn(
-            "Controller received DOMContentLoaded but was already loaded!"
-          );
-          return;
-        } else if (!domContentLoaded()) {
-          console.warn(
-            "Controller received DOMContentLoaded at document.readyState: " +
-              document.readyState +
-              "!"
-          );
-          return;
-        } else {
-          Controller.loaded = true;
-          console.debug("Runlevel 4: Controller running DOMContentLoaded");
-        }
-
-        const frontMatterTag = document.querySelector("d-front-matter");
-        if (frontMatterTag) {
-          const data = parseFrontmatter(frontMatterTag);
-          Controller.listeners.onFrontMatterChanged({ detail: data });
-        }
-
-        // Resolving "citations" dependency due to initial DOM load
-        frontMatter.citations = collect_citations();
-        frontMatter.citationsCollected = true;
-        for (const waitingCallback of Controller.waitingOn.citations.slice()) {
-          waitingCallback();
-        }
-
-        if (frontMatter.bibliographyParsed) {
-          for (const waitingCallback of Controller.waitingOn.bibliography.slice()) {
-            waitingCallback();
-          }
-        }
-
-        const footnotesList = document.querySelector("d-footnote-list");
-        if (footnotesList) {
-          const footnotes = document.querySelectorAll("d-footnote");
-          footnotesList.footnotes = footnotes;
-        }
-      }
-    } // listeners
-  }; // Controller
-
-  var base = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nhtml {\n  font-size: 14px;\n\tline-height: 1.6em;\n  /* font-family: \"Libre Franklin\", \"Helvetica Neue\", sans-serif; */\n  font-family: -apple-system, BlinkMacSystemFont, \"Segoe UI\", Roboto, Oxygen, Ubuntu, Cantarell, \"Fira Sans\", \"Droid Sans\", \"Helvetica Neue\", Arial, sans-serif;\n  /*, \"Apple Color Emoji\", \"Segoe UI Emoji\", \"Segoe UI Symbol\";*/\n  text-size-adjust: 100%;\n  -ms-text-size-adjust: 100%;\n  -webkit-text-size-adjust: 100%;\n}\n\n@media(min-width: 768px) {\n  html {\n    font-size: 16px;\n  }\n}\n\nbody {\n  margin: 0;\n}\n\na {\n  color: #004276;\n}\n\nfigure {\n  margin: 0;\n}\n\ntable {\n\tborder-collapse: collapse;\n\tborder-spacing: 0;\n}\n\ntable th {\n\ttext-align: left;\n}\n\ntable thead {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.05);\n}\n\ntable thead th {\n  padding-bottom: 0.5em;\n}\n\ntable tbody :first-child td {\n  padding-top: 0.5em;\n}\n\npre {\n  overflow: auto;\n  max-width: 100%;\n}\n\np {\n  margin-top: 0;\n  margin-bottom: 1em;\n}\n\nsup, sub {\n  vertical-align: baseline;\n  position: relative;\n  top: -0.4em;\n  line-height: 1em;\n}\n\nsub {\n  top: 0.4em;\n}\n\n.kicker,\n.marker {\n  font-size: 15px;\n  font-weight: 600;\n  color: rgba(0, 0, 0, 0.5);\n}\n\n\n/* Headline */\n\n@media(min-width: 1024px) {\n  d-title h1 span {\n    display: block;\n  }\n}\n\n/* Figure */\n\nfigure {\n  position: relative;\n  margin-bottom: 2.5em;\n  margin-top: 1.5em;\n}\n\nfigcaption+figure {\n\n}\n\nfigure img {\n  width: 100%;\n}\n\nfigure svg text,\nfigure svg tspan {\n}\n\nfigcaption,\n.figcaption {\n  color: rgba(0, 0, 0, 0.6);\n  font-size: 12px;\n  line-height: 1.5em;\n}\n\n@media(min-width: 1024px) {\nfigcaption,\n.figcaption {\n    font-size: 13px;\n  }\n}\n\nfigure.external img {\n  background: white;\n  border: 1px solid rgba(0, 0, 0, 0.1);\n  box-shadow: 0 1px 8px rgba(0, 0, 0, 0.1);\n  padding: 18px;\n  box-sizing: border-box;\n}\n\nfigcaption a {\n  color: rgba(0, 0, 0, 0.6);\n}\n\nfigcaption b,\nfigcaption strong, {\n  font-weight: 600;\n  color: rgba(0, 0, 0, 1.0);\n}\n";
-
-  var layout = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n@supports not (display: grid) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    display: block;\n    padding: 8px;\n  }\n}\n\n.base-grid,\ndistill-header,\nd-title,\nd-abstract,\nd-article,\nd-appendix,\ndistill-appendix,\nd-byline,\nd-footnote-list,\nd-citation-list,\ndistill-footer {\n  display: grid;\n  justify-items: stretch;\n  grid-template-columns: [screen-start] 8px [page-start kicker-start text-start gutter-start middle-start] 1fr 1fr 1fr 1fr 1fr 1fr 1fr 1fr [text-end page-end gutter-end kicker-end middle-end] 8px [screen-end];\n  grid-column-gap: 8px;\n}\n\n.grid {\n  display: grid;\n  grid-column-gap: 8px;\n}\n\n@media(min-width: 768px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start middle-start text-start] 45px 45px 45px 45px 45px 45px 45px 45px [ kicker-end text-end gutter-start] 45px [middle-end] 45px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 16px;\n  }\n\n  .grid {\n    grid-column-gap: 16px;\n  }\n}\n\n@media(min-width: 1000px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start] 50px [middle-start] 50px [text-start kicker-end] 50px 50px 50px 50px 50px 50px 50px 50px [text-end gutter-start] 50px [middle-end] 50px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 16px;\n  }\n\n  .grid {\n    grid-column-gap: 16px;\n  }\n}\n\n@media(min-width: 1180px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start] 60px [middle-start] 60px [text-start kicker-end] 60px 60px 60px 60px 60px 60px 60px 60px [text-end gutter-start] 60px [middle-end] 60px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 32px;\n  }\n\n  .grid {\n    grid-column-gap: 32px;\n  }\n}\n\n\n\n\n.base-grid {\n  grid-column: screen;\n}\n\n/* .l-body,\nd-article > *  {\n  grid-column: text;\n}\n\n.l-page,\nd-title > *,\nd-figure {\n  grid-column: page;\n} */\n\n.l-gutter {\n  grid-column: gutter;\n}\n\n.l-text,\n.l-body {\n  grid-column: text;\n}\n\n.l-page {\n  grid-column: page;\n}\n\n.l-body-outset {\n  grid-column: middle;\n}\n\n.l-page-outset {\n  grid-column: page;\n}\n\n.l-screen {\n  grid-column: screen;\n}\n\n.l-screen-inset {\n  grid-column: screen;\n  padding-left: 16px;\n  padding-left: 16px;\n}\n\n\n/* Aside */\n\nd-article aside {\n  grid-column: gutter;\n  font-size: 12px;\n  line-height: 1.6em;\n  color: rgba(0, 0, 0, 0.6)\n}\n\n@media(min-width: 768px) {\n  aside {\n    grid-column: gutter;\n  }\n\n  .side {\n    grid-column: gutter;\n  }\n}\n";
-
-  var print = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n@media print {\n\n  @page {\n    size: 8in 11in;\n    @bottom-right {\n      content: counter(page) \" of \" counter(pages);\n    }\n  }\n\n  html {\n    /* no general margins -- CSS Grid takes care of those */\n  }\n\n  p, code {\n    page-break-inside: avoid;\n  }\n\n  h2, h3 {\n    page-break-after: avoid;\n  }\n\n  d-header {\n    visibility: hidden;\n  }\n\n  d-footer {\n    display: none!important;\n  }\n\n}\n";
-
-  var byline = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-byline {\n  contain: style;\n  overflow: hidden;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  font-size: 0.8rem;\n  line-height: 1.8em;\n  padding: 1.5rem 0;\n  min-height: 1.8em;\n}\n\n\nd-byline .byline {\n  grid-template-columns: 1fr 1fr;\n  grid-column: text;\n}\n\n@media(min-width: 768px) {\n  d-byline .byline {\n    grid-template-columns: 1fr 1fr 1fr 1fr;\n  }\n}\n\nd-byline .authors-affiliations {\n  grid-column-end: span 2;\n  grid-template-columns: 1fr 1fr;\n  margin-bottom: 1em;\n}\n\n@media(min-width: 768px) {\n  d-byline .authors-affiliations {\n    margin-bottom: 0;\n  }\n}\n\nd-byline h3 {\n  font-size: 0.6rem;\n  font-weight: 400;\n  color: rgba(0, 0, 0, 0.5);\n  margin: 0;\n  text-transform: uppercase;\n}\n\nd-byline p {\n  margin: 0;\n}\n\nd-byline a,\nd-article d-byline a {\n  color: rgba(0, 0, 0, 0.8);\n  text-decoration: none;\n  border-bottom: none;\n}\n\nd-article d-byline a:hover {\n  text-decoration: underline;\n  border-bottom: none;\n}\n\nd-byline p.author {\n  font-weight: 500;\n}\n\nd-byline .affiliations {\n\n}\n";
-
-  var article = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-article {\n  contain: layout style;\n  overflow-x: hidden;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  padding-top: 2rem;\n  color: rgba(0, 0, 0, 0.8);\n}\n\nd-article > * {\n  grid-column: text;\n}\n\n@media(min-width: 768px) {\n  d-article {\n    font-size: 16px;\n  }\n}\n\n@media(min-width: 1024px) {\n  d-article {\n    font-size: 1.06rem;\n    line-height: 1.7em;\n  }\n}\n\n\n/* H2 */\n\n\nd-article .marker {\n  text-decoration: none;\n  border: none;\n  counter-reset: section;\n  grid-column: kicker;\n  line-height: 1.7em;\n}\n\nd-article .marker:hover {\n  border: none;\n}\n\nd-article .marker span {\n  padding: 0 3px 4px;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n  position: relative;\n  top: 4px;\n}\n\nd-article .marker:hover span {\n  color: rgba(0, 0, 0, 0.7);\n  border-bottom: 1px solid rgba(0, 0, 0, 0.7);\n}\n\nd-article h2 {\n  font-weight: 600;\n  font-size: 24px;\n  line-height: 1.25em;\n  margin: 2rem 0 1.5rem 0;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n  padding-bottom: 1rem;\n}\n\n@media(min-width: 1024px) {\n  d-article h2 {\n    font-size: 36px;\n  }\n}\n\n/* H3 */\n\nd-article h3 {\n  font-weight: 700;\n  font-size: 18px;\n  line-height: 1.4em;\n  margin-bottom: 1em;\n  margin-top: 2em;\n}\n\n@media(min-width: 1024px) {\n  d-article h3 {\n    font-size: 20px;\n  }\n}\n\n/* H4 */\n\nd-article h4 {\n  font-weight: 600;\n  text-transform: uppercase;\n  font-size: 14px;\n  line-height: 1.4em;\n}\n\nd-article a {\n  color: inherit;\n}\n\nd-article p,\nd-article ul,\nd-article ol,\nd-article blockquote {\n  margin-top: 0;\n  margin-bottom: 1em;\n  margin-left: 0;\n  margin-right: 0;\n}\n\nd-article blockquote {\n  border-left: 2px solid rgba(0, 0, 0, 0.2);\n  padding-left: 2em;\n  font-style: italic;\n  color: rgba(0, 0, 0, 0.6);\n}\n\nd-article a {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.4);\n  text-decoration: none;\n}\n\nd-article a:hover {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.8);\n}\n\nd-article .link {\n  text-decoration: underline;\n  cursor: pointer;\n}\n\nd-article ul,\nd-article ol {\n  padding-left: 24px;\n}\n\nd-article li {\n  margin-bottom: 1em;\n  margin-left: 0;\n  padding-left: 0;\n}\n\nd-article li:last-child {\n  margin-bottom: 0;\n}\n\nd-article pre {\n  font-size: 14px;\n  margin-bottom: 20px;\n}\n\nd-article hr {\n  grid-column: screen;\n  width: 100%;\n  border: none;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n  margin-top: 60px;\n  margin-bottom: 60px;\n}\n\nd-article section {\n  margin-top: 60px;\n  margin-bottom: 60px;\n}\n\nd-article span.equation-mimic {\n  font-family: georgia;\n  font-size: 115%;\n  font-style: italic;\n}\n\nd-article > d-code,\nd-article section > d-code  {\n  display: block;\n}\n\nd-article > d-math[block],\nd-article section > d-math[block]  {\n  display: block;\n}\n\n@media (max-width: 768px) {\n  d-article > d-code,\n  d-article section > d-code,\n  d-article > d-math[block],\n  d-article section > d-math[block] {\n      overflow-x: scroll;\n      -ms-overflow-style: none;  // IE 10+\n      overflow: -moz-scrollbars-none;  // Firefox\n  }\n\n  d-article > d-code::-webkit-scrollbar,\n  d-article section > d-code::-webkit-scrollbar,\n  d-article > d-math[block]::-webkit-scrollbar,\n  d-article section > d-math[block]::-webkit-scrollbar {\n    display: none;  // Safari and Chrome\n  }\n}\n\nd-article .citation {\n  color: #668;\n  cursor: pointer;\n}\n\nd-include {\n  width: auto;\n  display: block;\n}\n\nd-figure {\n  contain: layout style;\n}\n\n/* KaTeX */\n\n.katex, .katex-prerendered {\n  contain: style;\n  display: inline-block;\n}\n\n/* Tables */\n\nd-article table {\n  border-collapse: collapse;\n  margin-bottom: 1.5rem;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n}\n\nd-article table th {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n}\n\nd-article table td {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.05);\n}\n\nd-article table tr:last-of-type td {\n  border-bottom: none;\n}\n\nd-article table th,\nd-article table td {\n  font-size: 15px;\n  padding: 2px 8px;\n}\n\nd-article table tbody :first-child td {\n  padding-top: 2px;\n}\n";
-
-  var title = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-title {\n  padding: 2rem 0 1.5rem;\n  contain: layout style;\n  overflow-x: hidden;\n}\n\n@media(min-width: 768px) {\n  d-title {\n    padding: 4rem 0 1.5rem;\n  }\n}\n\nd-title h1 {\n  grid-column: text;\n  font-size: 40px;\n  font-weight: 700;\n  line-height: 1.1em;\n  margin: 0 0 0.5rem;\n}\n\n@media(min-width: 768px) {\n  d-title h1 {\n    font-size: 50px;\n  }\n}\n\nd-title p {\n  font-weight: 300;\n  font-size: 1.2rem;\n  line-height: 1.55em;\n  grid-column: text;\n}\n\nd-title .status {\n  margin-top: 0px;\n  font-size: 12px;\n  color: #009688;\n  opacity: 0.8;\n  grid-column: kicker;\n}\n\nd-title .status span {\n  line-height: 1;\n  display: inline-block;\n  padding: 6px 0;\n  border-bottom: 1px solid #80cbc4;\n  font-size: 11px;\n  text-transform: uppercase;\n}\n";
-
-  // Copyright 2018 The Distill Template Authors
-
-  const styles = base + layout + title + byline + article + math + print;
-
-  function makeStyleTag(dom) {
-
-    const styleTagId = 'distill-prerendered-styles';
-    const prerenderedTag = dom.getElementById(styleTagId);
-    if (!prerenderedTag) {
-      const styleTag = dom.createElement('style');
-      styleTag.id = styleTagId;
-      styleTag.type = 'text/css';
-      const cssTextTag = dom.createTextNode(styles);
-      styleTag.appendChild(cssTextTag);
-      const firstScriptTag = dom.head.querySelector('script');
-      dom.head.insertBefore(styleTag, firstScriptTag);
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  function addPolyfill(polyfill, polyfillLoadedCallback) {
-    console.debug('Runlevel 0: Polyfill required: ' + polyfill.name);
-    const script = document.createElement('script');
-    script.src = polyfill.url;
-    script.async = false;
-    if (polyfillLoadedCallback) {
-      script.onload = function() { polyfillLoadedCallback(polyfill); };
-    }
-    script.onerror = function() {
-      new Error('Runlevel 0: Polyfills failed to load script ' + polyfill.name);
-    };
-    document.head.appendChild(script);
-  }
-
-  const polyfills = [
-    {
-      name: 'WebComponents',
-      support: function() {
-        return 'customElements' in window &&
-               'attachShadow' in Element.prototype &&
-               'getRootNode' in Element.prototype &&
-               'content' in document.createElement('template') &&
-               'Promise' in window &&
-               'from' in Array;
-      },
-      url: 'https://distill.pub/third-party/polyfills/webcomponents-lite.js'
-    }, {
-      name: 'IntersectionObserver',
-      support: function() {
-        return 'IntersectionObserver' in window &&
-               'IntersectionObserverEntry' in window;
-      },
-      url: 'https://distill.pub/third-party/polyfills/intersection-observer.js'
-    },
-  ];
-
-  class Polyfills {
-
-    static browserSupportsAllFeatures() {
-      return polyfills.every((poly) => poly.support());
-    }
-
-    static load(callback) {
-      // Define an intermediate callback that checks if all is loaded.
-      const polyfillLoaded = function(polyfill) {
-        polyfill.loaded = true;
-        console.debug('Runlevel 0: Polyfill has finished loading: ' + polyfill.name);
-        // console.debug(window[polyfill.name]);
-        if (Polyfills.neededPolyfills.every((poly) => poly.loaded)) {
-          console.debug('Runlevel 0: All required polyfills have finished loading.');
-          console.debug('Runlevel 0->1.');
-          window.distillRunlevel = 1;
-          callback();
-        }
-      };
-      // Add polyfill script tags
-      for (const polyfill of Polyfills.neededPolyfills) {
-        addPolyfill(polyfill, polyfillLoaded);
-      }
-    }
-
-    static get neededPolyfills() {
-      if (!Polyfills._neededPolyfills) {
-        Polyfills._neededPolyfills = polyfills.filter((poly) => !poly.support());
-      }
-      return Polyfills._neededPolyfills;
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  // const marginSmall = 16;
-  // const marginLarge = 3 * marginSmall;
-  // const margin = marginSmall + marginLarge;
-  // const gutter = marginSmall;
-  // const outsetAmount = margin / 2;
-  // const numCols = 4;
-  // const numGutters = numCols - 1;
-  // const columnWidth = (768 - 2 * marginLarge - numGutters * gutter) / numCols;
-  //
-  // const screenwidth = 768;
-  // const pageWidth = screenwidth - 2 * marginLarge;
-  // const bodyWidth = pageWidth - columnWidth - gutter;
-
-  function body(selector) {
-    return `${selector} {
-      grid-column: left / text;
-    }
-  `;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$1 = Template('d-abstract', `
-<style>
-  :host {
-    font-size: 1.25rem;
-    line-height: 1.6em;
-    color: rgba(0, 0, 0, 0.7);
-    -webkit-font-smoothing: antialiased;
-  }
-
-  ::slotted(p) {
-    margin-top: 0;
-    margin-bottom: 1em;
-    grid-column: text-start / middle-end;
-  }
-  ${body('d-abstract')}
-</style>
-
-<slot></slot>
-`);
-
-  class Abstract extends T$1(HTMLElement) {
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$2 = Template('d-appendix', `
-<style>
-
-d-appendix {
-  contain: layout style;
-  font-size: 0.8em;
-  line-height: 1.7em;
-  margin-top: 60px;
-  margin-bottom: 0;
-  border-top: 1px solid rgba(0, 0, 0, 0.1);
-  color: rgba(0,0,0,0.5);
-  padding-top: 60px;
-  padding-bottom: 48px;
-}
-
-d-appendix h3 {
-  grid-column: page-start / text-start;
-  font-size: 15px;
-  font-weight: 500;
-  margin-top: 1em;
-  margin-bottom: 0;
-  color: rgba(0,0,0,0.65);
-}
-
-d-appendix h3 + * {
-  margin-top: 1em;
-}
-
-d-appendix ol {
-  padding: 0 0 0 15px;
-}
-
-@media (min-width: 768px) {
-  d-appendix ol {
-    padding: 0 0 0 30px;
-    margin-left: -30px;
-  }
-}
-
-d-appendix li {
-  margin-bottom: 1em;
-}
-
-d-appendix a {
-  color: rgba(0, 0, 0, 0.6);
-}
-
-d-appendix > * {
-  grid-column: text;
-}
-
-d-appendix > d-footnote-list,
-d-appendix > d-citation-list,
-d-appendix > distill-appendix {
-  grid-column: screen;
-}
-
-</style>
-
-`, false);
-
-  class Appendix extends T$2(HTMLElement) {
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  // import { Template } from '../mixins/template';
-  // import { Controller } from '../controller';
-
-  const isOnlyWhitespace = /^\s*$/;
-
-  class Article extends HTMLElement {
-    static get is() { return 'd-article'; }
-
-    constructor() {
-      super();
-
-      new MutationObserver( (mutations) => {
-        for (const mutation of mutations) {
-          for (const addedNode of mutation.addedNodes) {
-            switch (addedNode.nodeName) {
-            case '#text': { // usually text nodes are only linebreaks.
-              const text = addedNode.nodeValue;
-              if (!isOnlyWhitespace.test(text)) {
-                console.warn('Use of unwrapped text in distill articles is discouraged as it breaks layout! Please wrap any text in a <span> or <p> tag. We found the following text: ' + text);
-                const wrapper = document.createElement('span');
-                wrapper.innerHTML = addedNode.nodeValue;
-                addedNode.parentNode.insertBefore(wrapper, addedNode);
-                addedNode.parentNode.removeChild(addedNode);
-              }
-            } break;
-            }
-          }
-        }
-      }).observe(this, {childList: true});
-    }
-
-  }
-
-  var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {};
-
-  function createCommonjsModule(fn, module) {
-  	return module = { exports: {} }, fn(module, module.exports), module.exports;
-  }
-
-  var bibtexParse = createCommonjsModule(function (module, exports) {
-  /* start bibtexParse 0.0.22 */
-
-  //Original work by Henrik Muehe (c) 2010
-  //
-  //CommonJS port by Mikola Lysenko 2013
-  //
-  //Port to Browser lib by ORCID / RCPETERS
-  //
-  //Issues:
-  //no comment handling within strings
-  //no string concatenation
-  //no variable values yet
-  //Grammar implemented here:
-  //bibtex -> (string | preamble | comment | entry)*;
-  //string -> '@STRING' '{' key_equals_value '}';
-  //preamble -> '@PREAMBLE' '{' value '}';
-  //comment -> '@COMMENT' '{' value '}';
-  //entry -> '@' key '{' key ',' key_value_list '}';
-  //key_value_list -> key_equals_value (',' key_equals_value)*;
-  //key_equals_value -> key '=' value;
-  //value -> value_quotes | value_braces | key;
-  //value_quotes -> '"' .*? '"'; // not quite
-  //value_braces -> '{' .*? '"'; // not quite
-  (function(exports) {
-
-      function BibtexParser() {
-          
-          this.months = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"];
-          this.notKey = [',','{','}',' ','='];
-          this.pos = 0;
-          this.input = "";
-          this.entries = new Array();
-
-          this.currentEntry = "";
-
-          this.setInput = function(t) {
-              this.input = t;
-          };
-
-          this.getEntries = function() {
-              return this.entries;
-          };
-
-          this.isWhitespace = function(s) {
-              return (s == ' ' || s == '\r' || s == '\t' || s == '\n');
-          };
-
-          this.match = function(s, canCommentOut) {
-              if (canCommentOut == undefined || canCommentOut == null)
-                  canCommentOut = true;
-              this.skipWhitespace(canCommentOut);
-              if (this.input.substring(this.pos, this.pos + s.length) == s) {
-                  this.pos += s.length;
-              } else {
-                  throw "Token mismatch, expected " + s + ", found "
-                          + this.input.substring(this.pos);
-              }            this.skipWhitespace(canCommentOut);
-          };
-
-          this.tryMatch = function(s, canCommentOut) {
-              if (canCommentOut == undefined || canCommentOut == null)
-                  canCommentOut = true;
-              this.skipWhitespace(canCommentOut);
-              if (this.input.substring(this.pos, this.pos + s.length) == s) {
-                  return true;
-              } else {
-                  return false;
-              }        };
-
-          /* when search for a match all text can be ignored, not just white space */
-          this.matchAt = function() {
-              while (this.input.length > this.pos && this.input[this.pos] != '@') {
-                  this.pos++;
-              }
-              if (this.input[this.pos] == '@') {
-                  return true;
-              }            return false;
-          };
-
-          this.skipWhitespace = function(canCommentOut) {
-              while (this.isWhitespace(this.input[this.pos])) {
-                  this.pos++;
-              }            if (this.input[this.pos] == "%" && canCommentOut == true) {
-                  while (this.input[this.pos] != "\n") {
-                      this.pos++;
-                  }                this.skipWhitespace(canCommentOut);
-              }        };
-
-          this.value_braces = function() {
-              var bracecount = 0;
-              this.match("{", false);
-              var start = this.pos;
-              var escaped = false;
-              while (true) {
-                  if (!escaped) {
-                      if (this.input[this.pos] == '}') {
-                          if (bracecount > 0) {
-                              bracecount--;
-                          } else {
-                              var end = this.pos;
-                              this.match("}", false);
-                              return this.input.substring(start, end);
-                          }                    } else if (this.input[this.pos] == '{') {
-                          bracecount++;
-                      } else if (this.pos >= this.input.length - 1) {
-                          throw "Unterminated value";
-                      }                }                if (this.input[this.pos] == '\\' && escaped == false)
-                      escaped = true;
-                  else
-                      escaped = false;
-                  this.pos++;
-              }        };
-
-          this.value_comment = function() {
-              var str = '';
-              var brcktCnt = 0;
-              while (!(this.tryMatch("}", false) && brcktCnt == 0)) {
-                  str = str + this.input[this.pos];
-                  if (this.input[this.pos] == '{')
-                      brcktCnt++;
-                  if (this.input[this.pos] == '}')
-                      brcktCnt--;
-                  if (this.pos >= this.input.length - 1) {
-                      throw "Unterminated value:" + this.input.substring(start);
-                  }                this.pos++;
-              }            return str;
-          };
-
-          this.value_quotes = function() {
-              this.match('"', false);
-              var start = this.pos;
-              var escaped = false;
-              while (true) {
-                  if (!escaped) {
-                      if (this.input[this.pos] == '"') {
-                          var end = this.pos;
-                          this.match('"', false);
-                          return this.input.substring(start, end);
-                      } else if (this.pos >= this.input.length - 1) {
-                          throw "Unterminated value:" + this.input.substring(start);
-                      }                }
-                  if (this.input[this.pos] == '\\' && escaped == false)
-                      escaped = true;
-                  else
-                      escaped = false;
-                  this.pos++;
-              }        };
-
-          this.single_value = function() {
-              var start = this.pos;
-              if (this.tryMatch("{")) {
-                  return this.value_braces();
-              } else if (this.tryMatch('"')) {
-                  return this.value_quotes();
-              } else {
-                  var k = this.key();
-                  if (k.match("^[0-9]+$"))
-                      return k;
-                  else if (this.months.indexOf(k.toLowerCase()) >= 0)
-                      return k.toLowerCase();
-                  else
-                      throw "Value expected:" + this.input.substring(start) + ' for key: ' + k;
-              
-              }        };
-
-          this.value = function() {
-              var values = [];
-              values.push(this.single_value());
-              while (this.tryMatch("#")) {
-                  this.match("#");
-                  values.push(this.single_value());
-              }            return values.join("");
-          };
-
-          this.key = function() {
-              var start = this.pos;
-              while (true) {
-                  if (this.pos >= this.input.length) {
-                      throw "Runaway key";
-                  }                                // а-яА-Я is Cyrillic
-                  //console.log(this.input[this.pos]);
-                  if (this.notKey.indexOf(this.input[this.pos]) >= 0) {
-                      return this.input.substring(start, this.pos);
-                  } else {
-                      this.pos++;
-                      
-                  }            }        };
-
-          this.key_equals_value = function() {
-              var key = this.key();
-              if (this.tryMatch("=")) {
-                  this.match("=");
-                  var val = this.value();
-                  return [ key, val ];
-              } else {
-                  throw "... = value expected, equals sign missing:"
-                          + this.input.substring(this.pos);
-              }        };
-
-          this.key_value_list = function() {
-              var kv = this.key_equals_value();
-              this.currentEntry['entryTags'] = {};
-              this.currentEntry['entryTags'][kv[0]] = kv[1];
-              while (this.tryMatch(",")) {
-                  this.match(",");
-                  // fixes problems with commas at the end of a list
-                  if (this.tryMatch("}")) {
-                      break;
-                  }
-                  kv = this.key_equals_value();
-                  this.currentEntry['entryTags'][kv[0]] = kv[1];
-              }        };
-
-          this.entry_body = function(d) {
-              this.currentEntry = {};
-              this.currentEntry['citationKey'] = this.key();
-              this.currentEntry['entryType'] = d.substring(1);
-              this.match(",");
-              this.key_value_list();
-              this.entries.push(this.currentEntry);
-          };
-
-          this.directive = function() {
-              this.match("@");
-              return "@" + this.key();
-          };
-
-          this.preamble = function() {
-              this.currentEntry = {};
-              this.currentEntry['entryType'] = 'PREAMBLE';
-              this.currentEntry['entry'] = this.value_comment();
-              this.entries.push(this.currentEntry);
-          };
-
-          this.comment = function() {
-              this.currentEntry = {};
-              this.currentEntry['entryType'] = 'COMMENT';
-              this.currentEntry['entry'] = this.value_comment();
-              this.entries.push(this.currentEntry);
-          };
-
-          this.entry = function(d) {
-              this.entry_body(d);
-          };
-
-          this.bibtex = function() {
-              while (this.matchAt()) {
-                  var d = this.directive();
-                  this.match("{");
-                  if (d == "@STRING") {
-                      this.string();
-                  } else if (d == "@PREAMBLE") {
-                      this.preamble();
-                  } else if (d == "@COMMENT") {
-                      this.comment();
-                  } else {
-                      this.entry(d);
-                  }
-                  this.match("}");
-              }        };
-      }    
-      exports.toJSON = function(bibtex) {
-          var b = new BibtexParser();
-          b.setInput(bibtex);
-          b.bibtex();
-          return b.entries;
-      };
-
-      /* added during hackathon don't hate on me */
-      exports.toBibtex = function(json) {
-          var out = '';
-          for ( var i in json) {
-              out += "@" + json[i].entryType;
-              out += '{';
-              if (json[i].citationKey)
-                  out += json[i].citationKey + ', ';
-              if (json[i].entry)
-                  out += json[i].entry ;
-              if (json[i].entryTags) {
-                  var tags = '';
-                  for (var jdx in json[i].entryTags) {
-                      if (tags.length != 0)
-                          tags += ', ';
-                      tags += jdx + '= {' + json[i].entryTags[jdx] + '}';
-                  }
-                  out += tags;
-              }
-              out += '}\n\n';
-          }
-          return out;
-          
-      };
-
-  })( exports);
-
-  /* end bibtexParse */
-  });
-
-  // Copyright 2018 The Distill Template Authors
-
-  function normalizeTag(string) {
-    return string
-      .replace(/[\t\n ]+/g, ' ')
-      .replace(/{\\["^`.'acu~Hvs]( )?([a-zA-Z])}/g, (full, x, char) => char)
-      .replace(/{\\([a-zA-Z])}/g, (full, char) => char);
-  }
-
-  function parseBibtex(bibtex) {
-    const bibliography = new Map();
-    const parsedEntries = bibtexParse.toJSON(bibtex);
-    for (const entry of parsedEntries) {
-      // normalize tags; note entryTags is an object, not Map
-      for (const [key, value] of Object.entries(entry.entryTags)) {
-        entry.entryTags[key.toLowerCase()] = normalizeTag(value);
-      }
-      entry.entryTags.type = entry.entryType;
-      // add to bibliography
-      bibliography.set(entry.citationKey, entry.entryTags);
-    }
-    return bibliography;
-  }
-
-  function serializeFrontmatterToBibtex(frontMatter) {
-    return `@article{${frontMatter.slug},
-  author = {${frontMatter.bibtexAuthors}},
-  title = {${frontMatter.title}},
-  journal = {${frontMatter.journal.title}},
-  year = {${frontMatter.publishedYear}},
-  note = {${frontMatter.url}},
-  doi = {${frontMatter.doi}}
-}`;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  class Bibliography extends HTMLElement {
-
-    static get is() { return 'd-bibliography'; }
-
-    constructor() {
-      super();
-
-      // set up mutation observer
-      const options = {childList: true, characterData: true, subtree: true};
-      const observer = new MutationObserver( (entries) => {
-        for (const entry of entries) {
-          if (entry.target.nodeName === 'SCRIPT' || entry.type === 'characterData') {
-            this.parseIfPossible();
-          }
-        }
-      });
-      observer.observe(this, options);
-    }
-
-    connectedCallback() {
-      requestAnimationFrame(() => {
-        this.parseIfPossible();
-      });
-    }
-
-    parseIfPossible() {
-      const scriptTag = this.querySelector('script');
-      if (!scriptTag) return;
-      if (scriptTag.type == 'text/bibtex') {
-        const newBibtex = scriptTag.textContent;
-        if (this.bibtex !== newBibtex) {
-          this.bibtex = newBibtex;
-          const bibliography = parseBibtex(this.bibtex);
-          this.notify(bibliography);
-        }
-      } else if (scriptTag.type == 'text/json') {
-        const bibliography = new Map(JSON.parse(scriptTag.textContent));
-        this.notify(bibliography);
-      } else {
-        console.warn('Unsupported bibliography script tag type: ' + scriptTag.type);
-      }
-    }
-
-    notify(bibliography) {
-      const options = { detail: bibliography, bubbles: true };
-      const event = new CustomEvent('onBibliographyChanged', options);
-      this.dispatchEvent(event);
-    }
-
-    /* observe 'src' attribute */
-
-    static get observedAttributes() {
-      return ['src'];
-    }
-
-    receivedBibtex(event) {
-      const bibliography = parseBibtex(event.target.response);
-      this.notify(bibliography);
-    }
-
-    attributeChangedCallback(name, oldValue, newValue) {
-      var oReq = new XMLHttpRequest();
-      oReq.onload = (e) => this.receivedBibtex(e);
-      oReq.onerror = () => console.warn(`Could not load Bibtex! (tried ${newValue})`);
-      oReq.responseType = 'text';
-      oReq.open('GET', newValue, true);
-      oReq.send();
-    }
-
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  // import style from '../styles/d-byline.css';
-
-  function bylineTemplate(frontMatter) {
-    return `
-  <div class="byline grid">
-    <div class="authors-affiliations grid">
-      <h3>Authors</h3>
-      <h3>Affiliations</h3>
-      ${frontMatter.authors.map(author => `
-        <p class="author">
-          ${author.personalURL ? `
-            <a class="name" href="${author.personalURL}">${author.name}</a>` : `
-            <span class="name">${author.name}</span>`}
-        </p>
-        <p class="affiliation">
-        ${author.affiliations.map(affiliation =>
-          affiliation.url ? `<a class="affiliation" href="${affiliation.url}">${affiliation.name}</a>` : `<span class="affiliation">${affiliation.name}</span>`
-        ).join(', ')}
-        </p>
-      `).join('')}
-    </div>
-    <div>
-      <h3>Published</h3>
-      ${frontMatter.publishedDate ? `
-        <p>${frontMatter.publishedMonth} ${frontMatter.publishedDay}, ${frontMatter.publishedYear}</p> ` : `
-        <p><em>Not published yet.</em></p>`}
-    </div>
-  </div>
-`;
-  }
-
-  class Byline extends HTMLElement {
-
-    static get is() { return 'd-byline'; }
-
-    set frontMatter(frontMatter) {
-      this.innerHTML = bylineTemplate(frontMatter);
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$3 = Template(
-    "d-cite",
-    `
-<style>
-
-:host {
-  display: inline-block;
-}
-
-.citation {
-  color: hsla(206, 90%, 20%, 0.7);
-}
-
-.citation-number {
-  cursor: default;
-  white-space: nowrap;
-  font-family: -apple-system, BlinkMacSystemFont, "Roboto", Helvetica, sans-serif;
-  font-size: 75%;
-  color: hsla(206, 90%, 20%, 0.7);
-  display: inline-block;
-  line-height: 1.1em;
-  text-align: center;
-  position: relative;
-  top: -2px;
-  margin: 0 2px;
-}
-
-figcaption .citation-number {
-  font-size: 11px;
-  font-weight: normal;
-  top: -2px;
-  line-height: 1em;
-}
-
-ul {
-  margin: 0;
-  padding: 0;
-  list-style-type: none;
-}
-
-ul li {
-  padding: 15px 10px 15px 10px;
-  border-bottom: 1px solid rgba(0,0,0,0.1)
-}
-
-ul li:last-of-type {
-  border-bottom: none;
-}
-
-</style>
-
-<d-hover-box id="hover-box"></d-hover-box>
-
-<div id="citation-" class="citation">
-  <span class="citation-number"></span>
-</div>
-`
-  );
-
-  class Cite extends T$3(HTMLElement) {
-    /* Lifecycle */
-    constructor() {
-      super();
-      this._numbers = [];
-      this._entries = [];
-    }
-
-    connectedCallback() {
-      this.outerSpan = this.root.querySelector("#citation-");
-      this.innerSpan = this.root.querySelector(".citation-number");
-      this.hoverBox = this.root.querySelector("d-hover-box");
-      window.customElements.whenDefined("d-hover-box").then(() => {
-        this.hoverBox.listen(this);
-      });
-      // in case this component got connected after values were set
-      if (this.numbers) {
-        this.displayNumbers(this.numbers);
-      }
-      if (this.entries) {
-        this.displayEntries(this.entries);
-      }
-    }
-
-    //TODO This causes an infinite loop on firefox with polyfills.
-    // This is only needed for interactive editing so no priority.
-    // disconnectedCallback() {
-    // const options = { detail: [this, this.keys], bubbles: true };
-    // const event = new CustomEvent('onCiteKeyRemoved', options);
-    // document.dispatchEvent(event);
-    // }
-
-    /* observe 'key' attribute */
-
-    static get observedAttributes() {
-      return ["key", "bibtex-key"];
-    }
-
-    attributeChangedCallback(name, oldValue, newValue) {
-      const eventName = oldValue ? "onCiteKeyChanged" : "onCiteKeyCreated";
-      const keys = newValue.split(",").map(k => k.trim());
-      const options = { detail: [this, keys], bubbles: true };
-      const event = new CustomEvent(eventName, options);
-      document.dispatchEvent(event);
-    }
-
-    set key(value) {
-      this.setAttribute("key", value);
-    }
-
-    get key() {
-      return this.getAttribute("key") || this.getAttribute("bibtex-key");
-    }
-
-    get keys() {
-      const result = this.key.split(",");
-      console.log(result);
-      return result;
-    }
-
-    /* Setters & Rendering */
-
-    set numbers(numbers) {
-      this._numbers = numbers;
-      this.displayNumbers(numbers);
-    }
-
-    get numbers() {
-      return this._numbers;
-    }
-
-    displayNumbers(numbers) {
-      if (!this.innerSpan) return;
-      const numberStrings = numbers.map(index => {
-        return index == -1 ? "?" : index + 1 + "";
-      });
-      const textContent = "[" + numberStrings.join(", ") + "]";
-      this.innerSpan.textContent = textContent;
-    }
-
-    set entries(entries) {
-      this._entries = entries;
-      this.displayEntries(entries);
-    }
-
-    get entries() {
-      return this._entries;
-    }
-
-    displayEntries(entries) {
-      if (!this.hoverBox) return;
-      this.hoverBox.innerHTML = `<ul>
-      ${entries
-        .map(hover_cite)
-        .map(html => `<li>${html}</li>`)
-        .join("\n")}
-    </ul>`;
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const styles$1 = `
-d-citation-list {
-  contain: style;
-}
-
-d-citation-list .references {
-  grid-column: text;
-}
-
-d-citation-list .references .title {
-  font-weight: 500;
-}
-`;
-
-  function renderCitationList(element, entries, dom=document) {
-    if (entries.size > 0) {
-      element.style.display = '';
-      let list = element.querySelector('.references');
-      if (list) {
-        list.innerHTML = '';
-      } else {
-        const stylesTag = dom.createElement('style');
-        stylesTag.innerHTML = styles$1;
-        element.appendChild(stylesTag);
-
-        const heading = dom.createElement('h3');
-        heading.id = 'references';
-        heading.textContent = 'References';
-        element.appendChild(heading);
-
-        list = dom.createElement('ol');
-        list.id = 'references-list';
-        list.className = 'references';
-        element.appendChild(list);
-      }
-
-      for (const [key, entry] of entries) {
-        const listItem = dom.createElement('li');
-        listItem.id = key;
-        listItem.innerHTML = bibliography_cite(entry);
-        list.appendChild(listItem);
-      }
-    } else {
-      element.style.display = 'none';
-    }
-  }
-
-  class CitationList extends HTMLElement {
-
-    static get is() { return 'd-citation-list'; }
-
-    connectedCallback() {
-      if (!this.hasAttribute('distill-prerendered')) {
-        this.style.display = 'none';
-      }
-    }
-
-    set citations(citations) {
-      renderCitationList(this, citations);
-    }
-
-  }
-
-  var prism = createCommonjsModule(function (module) {
-  /* **********************************************
-       Begin prism-core.js
-  ********************************************** */
-
-  var _self = (typeof window !== 'undefined')
-  	? window   // if in browser
-  	: (
-  		(typeof WorkerGlobalScope !== 'undefined' && self instanceof WorkerGlobalScope)
-  		? self // if in worker
-  		: {}   // if in node js
-  	);
-
-  /**
-   * Prism: Lightweight, robust, elegant syntax highlighting
-   * MIT license http://www.opensource.org/licenses/mit-license.php/
-   * @author Lea Verou http://lea.verou.me
-   */
-
-  var Prism = (function (_self){
-
-  // Private helper vars
-  var lang = /\blang(?:uage)?-([\w-]+)\b/i;
-  var uniqueId = 0;
-
-
-  var _ = {
-  	manual: _self.Prism && _self.Prism.manual,
-  	disableWorkerMessageHandler: _self.Prism && _self.Prism.disableWorkerMessageHandler,
-  	util: {
-  		encode: function encode(tokens) {
-  			if (tokens instanceof Token) {
-  				return new Token(tokens.type, encode(tokens.content), tokens.alias);
-  			} else if (Array.isArray(tokens)) {
-  				return tokens.map(encode);
-  			} else {
-  				return tokens.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/\u00a0/g, ' ');
-  			}
-  		},
-
-  		type: function (o) {
-  			return Object.prototype.toString.call(o).slice(8, -1);
-  		},
-
-  		objId: function (obj) {
-  			if (!obj['__id']) {
-  				Object.defineProperty(obj, '__id', { value: ++uniqueId });
-  			}
-  			return obj['__id'];
-  		},
-
-  		// Deep clone a language definition (e.g. to extend it)
-  		clone: function deepClone(o, visited) {
-  			var clone, id, type = _.util.type(o);
-  			visited = visited || {};
-
-  			switch (type) {
-  				case 'Object':
-  					id = _.util.objId(o);
-  					if (visited[id]) {
-  						return visited[id];
-  					}
-  					clone = {};
-  					visited[id] = clone;
-
-  					for (var key in o) {
-  						if (o.hasOwnProperty(key)) {
-  							clone[key] = deepClone(o[key], visited);
-  						}
-  					}
-
-  					return clone;
-
-  				case 'Array':
-  					id = _.util.objId(o);
-  					if (visited[id]) {
-  						return visited[id];
-  					}
-  					clone = [];
-  					visited[id] = clone;
-
-  					o.forEach(function (v, i) {
-  						clone[i] = deepClone(v, visited);
-  					});
-
-  					return clone;
-
-  				default:
-  					return o;
-  			}
-  		},
-
-  		/**
-  		 * Returns the Prism language of the given element set by a `language-xxxx` or `lang-xxxx` class.
-  		 *
-  		 * If no language is set for the element or the element is `null` or `undefined`, `none` will be returned.
-  		 *
-  		 * @param {Element} element
-  		 * @returns {string}
-  		 */
-  		getLanguage: function (element) {
-  			while (element && !lang.test(element.className)) {
-  				element = element.parentElement;
-  			}
-  			if (element) {
-  				return (element.className.match(lang) || [, 'none'])[1].toLowerCase();
-  			}
-  			return 'none';
-  		},
-
-  		/**
-  		 * Returns the script element that is currently executing.
-  		 *
-  		 * This does __not__ work for line script element.
-  		 *
-  		 * @returns {HTMLScriptElement | null}
-  		 */
-  		currentScript: function () {
-  			if (typeof document === 'undefined') {
-  				return null;
-  			}
-  			if ('currentScript' in document) {
-  				return document.currentScript;
-  			}
-
-  			// IE11 workaround
-  			// we'll get the src of the current script by parsing IE11's error stack trace
-  			// this will not work for inline scripts
-
-  			try {
-  				throw new Error();
-  			} catch (err) {
-  				// Get file src url from stack. Specifically works with the format of stack traces in IE.
-  				// A stack will look like this:
-  				//
-  				// Error
-  				//    at _.util.currentScript (http://localhost/components/prism-core.js:119:5)
-  				//    at Global code (http://localhost/components/prism-core.js:606:1)
-
-  				var src = (/at [^(\r\n]*\((.*):.+:.+\)$/i.exec(err.stack) || [])[1];
-  				if (src) {
-  					var scripts = document.getElementsByTagName('script');
-  					for (var i in scripts) {
-  						if (scripts[i].src == src) {
-  							return scripts[i];
-  						}
-  					}
-  				}
-  				return null;
-  			}
-  		}
-  	},
-
-  	languages: {
-  		extend: function (id, redef) {
-  			var lang = _.util.clone(_.languages[id]);
-
-  			for (var key in redef) {
-  				lang[key] = redef[key];
-  			}
-
-  			return lang;
-  		},
-
-  		/**
-  		 * Insert a token before another token in a language literal
-  		 * As this needs to recreate the object (we cannot actually insert before keys in object literals),
-  		 * we cannot just provide an object, we need an object and a key.
-  		 * @param inside The key (or language id) of the parent
-  		 * @param before The key to insert before.
-  		 * @param insert Object with the key/value pairs to insert
-  		 * @param root The object that contains `inside`. If equal to Prism.languages, it can be omitted.
-  		 */
-  		insertBefore: function (inside, before, insert, root) {
-  			root = root || _.languages;
-  			var grammar = root[inside];
-  			var ret = {};
-
-  			for (var token in grammar) {
-  				if (grammar.hasOwnProperty(token)) {
-
-  					if (token == before) {
-  						for (var newToken in insert) {
-  							if (insert.hasOwnProperty(newToken)) {
-  								ret[newToken] = insert[newToken];
-  							}
-  						}
-  					}
-
-  					// Do not insert token which also occur in insert. See #1525
-  					if (!insert.hasOwnProperty(token)) {
-  						ret[token] = grammar[token];
-  					}
-  				}
-  			}
-
-  			var old = root[inside];
-  			root[inside] = ret;
-
-  			// Update references in other language definitions
-  			_.languages.DFS(_.languages, function(key, value) {
-  				if (value === old && key != inside) {
-  					this[key] = ret;
-  				}
-  			});
-
-  			return ret;
-  		},
-
-  		// Traverse a language definition with Depth First Search
-  		DFS: function DFS(o, callback, type, visited) {
-  			visited = visited || {};
-
-  			var objId = _.util.objId;
-
-  			for (var i in o) {
-  				if (o.hasOwnProperty(i)) {
-  					callback.call(o, i, o[i], type || i);
-
-  					var property = o[i],
-  					    propertyType = _.util.type(property);
-
-  					if (propertyType === 'Object' && !visited[objId(property)]) {
-  						visited[objId(property)] = true;
-  						DFS(property, callback, null, visited);
-  					}
-  					else if (propertyType === 'Array' && !visited[objId(property)]) {
-  						visited[objId(property)] = true;
-  						DFS(property, callback, i, visited);
-  					}
-  				}
-  			}
-  		}
-  	},
-  	plugins: {},
-
-  	highlightAll: function(async, callback) {
-  		_.highlightAllUnder(document, async, callback);
-  	},
-
-  	highlightAllUnder: function(container, async, callback) {
-  		var env = {
-  			callback: callback,
-  			container: container,
-  			selector: 'code[class*="language-"], [class*="language-"] code, code[class*="lang-"], [class*="lang-"] code'
-  		};
-
-  		_.hooks.run('before-highlightall', env);
-
-  		env.elements = Array.prototype.slice.apply(env.container.querySelectorAll(env.selector));
-
-  		_.hooks.run('before-all-elements-highlight', env);
-
-  		for (var i = 0, element; element = env.elements[i++];) {
-  			_.highlightElement(element, async === true, env.callback);
-  		}
-  	},
-
-  	highlightElement: function(element, async, callback) {
-  		// Find language
-  		var language = _.util.getLanguage(element);
-  		var grammar = _.languages[language];
-
-  		// Set language on the element, if not present
-  		element.className = element.className.replace(lang, '').replace(/\s+/g, ' ') + ' language-' + language;
-
-  		// Set language on the parent, for styling
-  		var parent = element.parentNode;
-  		if (parent && parent.nodeName.toLowerCase() === 'pre') {
-  			parent.className = parent.className.replace(lang, '').replace(/\s+/g, ' ') + ' language-' + language;
-  		}
-
-  		var code = element.textContent;
-
-  		var env = {
-  			element: element,
-  			language: language,
-  			grammar: grammar,
-  			code: code
-  		};
-
-  		function insertHighlightedCode(highlightedCode) {
-  			env.highlightedCode = highlightedCode;
-
-  			_.hooks.run('before-insert', env);
-
-  			env.element.innerHTML = env.highlightedCode;
-
-  			_.hooks.run('after-highlight', env);
-  			_.hooks.run('complete', env);
-  			callback && callback.call(env.element);
-  		}
-
-  		_.hooks.run('before-sanity-check', env);
-
-  		if (!env.code) {
-  			_.hooks.run('complete', env);
-  			callback && callback.call(env.element);
-  			return;
-  		}
-
-  		_.hooks.run('before-highlight', env);
-
-  		if (!env.grammar) {
-  			insertHighlightedCode(_.util.encode(env.code));
-  			return;
-  		}
-
-  		if (async && _self.Worker) {
-  			var worker = new Worker(_.filename);
-
-  			worker.onmessage = function(evt) {
-  				insertHighlightedCode(evt.data);
-  			};
-
-  			worker.postMessage(JSON.stringify({
-  				language: env.language,
-  				code: env.code,
-  				immediateClose: true
-  			}));
-  		}
-  		else {
-  			insertHighlightedCode(_.highlight(env.code, env.grammar, env.language));
-  		}
-  	},
-
-  	highlight: function (text, grammar, language) {
-  		var env = {
-  			code: text,
-  			grammar: grammar,
-  			language: language
-  		};
-  		_.hooks.run('before-tokenize', env);
-  		env.tokens = _.tokenize(env.code, env.grammar);
-  		_.hooks.run('after-tokenize', env);
-  		return Token.stringify(_.util.encode(env.tokens), env.language);
-  	},
-
-  	tokenize: function(text, grammar) {
-  		var rest = grammar.rest;
-  		if (rest) {
-  			for (var token in rest) {
-  				grammar[token] = rest[token];
-  			}
-
-  			delete grammar.rest;
-  		}
-
-  		var tokenList = new LinkedList();
-  		addAfter(tokenList, tokenList.head, text);
-
-  		matchGrammar(text, tokenList, grammar, tokenList.head, 0);
-
-  		return toArray(tokenList);
-  	},
-
-  	hooks: {
-  		all: {},
-
-  		add: function (name, callback) {
-  			var hooks = _.hooks.all;
-
-  			hooks[name] = hooks[name] || [];
-
-  			hooks[name].push(callback);
-  		},
-
-  		run: function (name, env) {
-  			var callbacks = _.hooks.all[name];
-
-  			if (!callbacks || !callbacks.length) {
-  				return;
-  			}
-
-  			for (var i=0, callback; callback = callbacks[i++];) {
-  				callback(env);
-  			}
-  		}
-  	},
-
-  	Token: Token
-  };
-
-  _self.Prism = _;
-
-  function Token(type, content, alias, matchedStr, greedy) {
-  	this.type = type;
-  	this.content = content;
-  	this.alias = alias;
-  	// Copy of the full string this token was created from
-  	this.length = (matchedStr || '').length|0;
-  	this.greedy = !!greedy;
-  }
-
-  Token.stringify = function stringify(o, language) {
-  	if (typeof o == 'string') {
-  		return o;
-  	}
-  	if (Array.isArray(o)) {
-  		var s = '';
-  		o.forEach(function (e) {
-  			s += stringify(e, language);
-  		});
-  		return s;
-  	}
-
-  	var env = {
-  		type: o.type,
-  		content: stringify(o.content, language),
-  		tag: 'span',
-  		classes: ['token', o.type],
-  		attributes: {},
-  		language: language
-  	};
-
-  	var aliases = o.alias;
-  	if (aliases) {
-  		if (Array.isArray(aliases)) {
-  			Array.prototype.push.apply(env.classes, aliases);
-  		} else {
-  			env.classes.push(aliases);
-  		}
-  	}
-
-  	_.hooks.run('wrap', env);
-
-  	var attributes = '';
-  	for (var name in env.attributes) {
-  		attributes += ' ' + name + '="' + (env.attributes[name] || '').replace(/"/g, '&quot;') + '"';
-  	}
-
-  	return '<' + env.tag + ' class="' + env.classes.join(' ') + '"' + attributes + '>' + env.content + '</' + env.tag + '>';
-  };
-
-  /**
-   * @param {string} text
-   * @param {LinkedList<string | Token>} tokenList
-   * @param {any} grammar
-   * @param {LinkedListNode<string | Token>} startNode
-   * @param {number} startPos
-   * @param {boolean} [oneshot=false]
-   * @param {string} [target]
-   */
-  function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, target) {
-  	for (var token in grammar) {
-  		if (!grammar.hasOwnProperty(token) || !grammar[token]) {
-  			continue;
-  		}
-
-  		var patterns = grammar[token];
-  		patterns = Array.isArray(patterns) ? patterns : [patterns];
-
-  		for (var j = 0; j < patterns.length; ++j) {
-  			if (target && target == token + ',' + j) {
-  				return;
-  			}
-
-  			var pattern = patterns[j],
-  				inside = pattern.inside,
-  				lookbehind = !!pattern.lookbehind,
-  				greedy = !!pattern.greedy,
-  				lookbehindLength = 0,
-  				alias = pattern.alias;
-
-  			if (greedy && !pattern.pattern.global) {
-  				// Without the global flag, lastIndex won't work
-  				var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0];
-  				pattern.pattern = RegExp(pattern.pattern.source, flags + 'g');
-  			}
-
-  			pattern = pattern.pattern || pattern;
-
-  			for ( // iterate the token list and keep track of the current token/string position
-  				var currentNode = startNode.next, pos = startPos;
-  				currentNode !== tokenList.tail;
-  				pos += currentNode.value.length, currentNode = currentNode.next
-  			) {
-
-  				var str = currentNode.value;
-
-  				if (tokenList.length > text.length) {
-  					// Something went terribly wrong, ABORT, ABORT!
-  					return;
-  				}
-
-  				if (str instanceof Token) {
-  					continue;
-  				}
-
-  				var removeCount = 1; // this is the to parameter of removeBetween
-
-  				if (greedy && currentNode != tokenList.tail.prev) {
-  					pattern.lastIndex = pos;
-  					var match = pattern.exec(text);
-  					if (!match) {
-  						break;
-  					}
-
-  					var from = match.index + (lookbehind && match[1] ? match[1].length : 0);
-  					var to = match.index + match[0].length;
-  					var p = pos;
-
-  					// find the node that contains the match
-  					p += currentNode.value.length;
-  					while (from >= p) {
-  						currentNode = currentNode.next;
-  						p += currentNode.value.length;
-  					}
-  					// adjust pos (and p)
-  					p -= currentNode.value.length;
-  					pos = p;
-
-  					// the current node is a Token, then the match starts inside another Token, which is invalid
-  					if (currentNode.value instanceof Token) {
-  						continue;
-  					}
-
-  					// find the last node which is affected by this match
-  					for (
-  						var k = currentNode;
-  						k !== tokenList.tail && (p < to || (typeof k.value === 'string' && !k.prev.value.greedy));
-  						k = k.next
-  					) {
-  						removeCount++;
-  						p += k.value.length;
-  					}
-  					removeCount--;
-
-  					// replace with the new match
-  					str = text.slice(pos, p);
-  					match.index -= pos;
-  				} else {
-  					pattern.lastIndex = 0;
-
-  					var match = pattern.exec(str);
-  				}
-
-  				if (!match) {
-  					if (oneshot) {
-  						break;
-  					}
-
-  					continue;
-  				}
-
-  				if (lookbehind) {
-  					lookbehindLength = match[1] ? match[1].length : 0;
-  				}
-
-  				var from = match.index + lookbehindLength,
-  					match = match[0].slice(lookbehindLength),
-  					to = from + match.length,
-  					before = str.slice(0, from),
-  					after = str.slice(to);
-
-  				var removeFrom = currentNode.prev;
-
-  				if (before) {
-  					removeFrom = addAfter(tokenList, removeFrom, before);
-  					pos += before.length;
-  				}
-
-  				removeRange(tokenList, removeFrom, removeCount);
-
-  				var wrapped = new Token(token, inside ? _.tokenize(match, inside) : match, alias, match, greedy);
-  				currentNode = addAfter(tokenList, removeFrom, wrapped);
-
-  				if (after) {
-  					addAfter(tokenList, currentNode, after);
-  				}
-
-
-  				if (removeCount > 1)
-  					matchGrammar(text, tokenList, grammar, currentNode.prev, pos, true, token + ',' + j);
-
-  				if (oneshot)
-  					break;
-  			}
-  		}
-  	}
-  }
-
-  /**
-   * @typedef LinkedListNode
-   * @property {T} value
-   * @property {LinkedListNode<T> | null} prev The previous node.
-   * @property {LinkedListNode<T> | null} next The next node.
-   * @template T
-   */
-
-  /**
-   * @template T
-   */
-  function LinkedList() {
-  	/** @type {LinkedListNode<T>} */
-  	var head = { value: null, prev: null, next: null };
-  	/** @type {LinkedListNode<T>} */
-  	var tail = { value: null, prev: head, next: null };
-  	head.next = tail;
-
-  	/** @type {LinkedListNode<T>} */
-  	this.head = head;
-  	/** @type {LinkedListNode<T>} */
-  	this.tail = tail;
-  	this.length = 0;
-  }
-
-  /**
-   * Adds a new node with the given value to the list.
-   * @param {LinkedList<T>} list
-   * @param {LinkedListNode<T>} node
-   * @param {T} value
-   * @returns {LinkedListNode<T>} The added node.
-   * @template T
-   */
-  function addAfter(list, node, value) {
-  	// assumes that node != list.tail && values.length >= 0
-  	var next = node.next;
-
-  	var newNode = { value: value, prev: node, next: next };
-  	node.next = newNode;
-  	next.prev = newNode;
-  	list.length++;
-
-  	return newNode;
-  }
-  /**
-   * Removes `count` nodes after the given node. The given node will not be removed.
-   * @param {LinkedList<T>} list
-   * @param {LinkedListNode<T>} node
-   * @param {number} count
-   * @template T
-   */
-  function removeRange(list, node, count) {
-  	var next = node.next;
-  	for (var i = 0; i < count && next !== list.tail; i++) {
-  		next = next.next;
-  	}
-  	node.next = next;
-  	next.prev = node;
-  	list.length -= i;
-  }
-  /**
-   * @param {LinkedList<T>} list
-   * @returns {T[]}
-   * @template T
-   */
-  function toArray(list) {
-  	var array = [];
-  	var node = list.head.next;
-  	while (node !== list.tail) {
-  		array.push(node.value);
-  		node = node.next;
-  	}
-  	return array;
-  }
-
-
-  if (!_self.document) {
-  	if (!_self.addEventListener) {
-  		// in Node.js
-  		return _;
-  	}
-
-  	if (!_.disableWorkerMessageHandler) {
-  		// In worker
-  		_self.addEventListener('message', function (evt) {
-  			var message = JSON.parse(evt.data),
-  				lang = message.language,
-  				code = message.code,
-  				immediateClose = message.immediateClose;
-
-  			_self.postMessage(_.highlight(code, _.languages[lang], lang));
-  			if (immediateClose) {
-  				_self.close();
-  			}
-  		}, false);
-  	}
-
-  	return _;
-  }
-
-  //Get current script and highlight
-  var script = _.util.currentScript();
-
-  if (script) {
-  	_.filename = script.src;
-
-  	if (script.hasAttribute('data-manual')) {
-  		_.manual = true;
-  	}
-  }
-
-  function highlightAutomaticallyCallback() {
-  	if (!_.manual) {
-  		_.highlightAll();
-  	}
-  }
-
-  if (!_.manual) {
-  	// If the document state is "loading", then we'll use DOMContentLoaded.
-  	// If the document state is "interactive" and the prism.js script is deferred, then we'll also use the
-  	// DOMContentLoaded event because there might be some plugins or languages which have also been deferred and they
-  	// might take longer one animation frame to execute which can create a race condition where only some plugins have
-  	// been loaded when Prism.highlightAll() is executed, depending on how fast resources are loaded.
-  	// See https://github.com/PrismJS/prism/issues/2102
-  	var readyState = document.readyState;
-  	if (readyState === 'loading' || readyState === 'interactive' && script && script.defer) {
-  		document.addEventListener('DOMContentLoaded', highlightAutomaticallyCallback);
-  	} else {
-  		if (window.requestAnimationFrame) {
-  			window.requestAnimationFrame(highlightAutomaticallyCallback);
-  		} else {
-  			window.setTimeout(highlightAutomaticallyCallback, 16);
-  		}
-  	}
-  }
-
-  return _;
-
-  })(_self);
-
-  if ( module.exports) {
-  	module.exports = Prism;
-  }
-
-  // hack for components to work correctly in node.js
-  if (typeof commonjsGlobal !== 'undefined') {
-  	commonjsGlobal.Prism = Prism;
-  }
-
-
-  /* **********************************************
-       Begin prism-markup.js
-  ********************************************** */
-
-  Prism.languages.markup = {
-  	'comment': /<!--[\s\S]*?-->/,
-  	'prolog': /<\?[\s\S]+?\?>/,
-  	'doctype': {
-  		pattern: /<!DOCTYPE(?:[^>"'[\]]|"[^"]*"|'[^']*')+(?:\[(?:(?!<!--)[^"'\]]|"[^"]*"|'[^']*'|<!--[\s\S]*?-->)*\]\s*)?>/i,
-  		greedy: true
-  	},
-  	'cdata': /<!\[CDATA\[[\s\S]*?]]>/i,
-  	'tag': {
-  		pattern: /<\/?(?!\d)[^\s>\/=$<%]+(?:\s(?:\s*[^\s>\/=]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))|(?=[\s/>])))+)?\s*\/?>/i,
-  		greedy: true,
-  		inside: {
-  			'tag': {
-  				pattern: /^<\/?[^\s>\/]+/i,
-  				inside: {
-  					'punctuation': /^<\/?/,
-  					'namespace': /^[^\s>\/:]+:/
-  				}
-  			},
-  			'attr-value': {
-  				pattern: /=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+)/i,
-  				inside: {
-  					'punctuation': [
-  						/^=/,
-  						{
-  							pattern: /^(\s*)["']|["']$/,
-  							lookbehind: true
-  						}
-  					]
-  				}
-  			},
-  			'punctuation': /\/?>/,
-  			'attr-name': {
-  				pattern: /[^\s>\/]+/,
-  				inside: {
-  					'namespace': /^[^\s>\/:]+:/
-  				}
-  			}
-
-  		}
-  	},
-  	'entity': /&#?[\da-z]{1,8};/i
-  };
-
-  Prism.languages.markup['tag'].inside['attr-value'].inside['entity'] =
-  	Prism.languages.markup['entity'];
-
-  // Plugin to make entity title show the real entity, idea by Roman Komarov
-  Prism.hooks.add('wrap', function(env) {
-
-  	if (env.type === 'entity') {
-  		env.attributes['title'] = env.content.replace(/&amp;/, '&');
-  	}
-  });
-
-  Object.defineProperty(Prism.languages.markup.tag, 'addInlined', {
-  	/**
-  	 * Adds an inlined language to markup.
-  	 *
-  	 * An example of an inlined language is CSS with `<style>` tags.
-  	 *
-  	 * @param {string} tagName The name of the tag that contains the inlined language. This name will be treated as
-  	 * case insensitive.
-  	 * @param {string} lang The language key.
-  	 * @example
-  	 * addInlined('style', 'css');
-  	 */
-  	value: function addInlined(tagName, lang) {
-  		var includedCdataInside = {};
-  		includedCdataInside['language-' + lang] = {
-  			pattern: /(^<!\[CDATA\[)[\s\S]+?(?=\]\]>$)/i,
-  			lookbehind: true,
-  			inside: Prism.languages[lang]
-  		};
-  		includedCdataInside['cdata'] = /^<!\[CDATA\[|\]\]>$/i;
-
-  		var inside = {
-  			'included-cdata': {
-  				pattern: /<!\[CDATA\[[\s\S]*?\]\]>/i,
-  				inside: includedCdataInside
-  			}
-  		};
-  		inside['language-' + lang] = {
-  			pattern: /[\s\S]+/,
-  			inside: Prism.languages[lang]
-  		};
-
-  		var def = {};
-  		def[tagName] = {
-  			pattern: RegExp(/(<__[\s\S]*?>)(?:<!\[CDATA\[[\s\S]*?\]\]>\s*|[\s\S])*?(?=<\/__>)/.source.replace(/__/g, function () { return tagName; }), 'i'),
-  			lookbehind: true,
-  			greedy: true,
-  			inside: inside
-  		};
-
-  		Prism.languages.insertBefore('markup', 'cdata', def);
-  	}
-  });
-
-  Prism.languages.xml = Prism.languages.extend('markup', {});
-  Prism.languages.html = Prism.languages.markup;
-  Prism.languages.mathml = Prism.languages.markup;
-  Prism.languages.svg = Prism.languages.markup;
-
-
-  /* **********************************************
-       Begin prism-css.js
-  ********************************************** */
-
-  (function (Prism) {
-
-  	var string = /("|')(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/;
-
-  	Prism.languages.css = {
-  		'comment': /\/\*[\s\S]*?\*\//,
-  		'atrule': {
-  			pattern: /@[\w-]+[\s\S]*?(?:;|(?=\s*\{))/,
-  			inside: {
-  				'rule': /^@[\w-]+/,
-  				'selector-function-argument': {
-  					pattern: /(\bselector\s*\((?!\s*\))\s*)(?:[^()]|\((?:[^()]|\([^()]*\))*\))+?(?=\s*\))/,
-  					lookbehind: true,
-  					alias: 'selector'
-  				}
-  				// See rest below
-  			}
-  		},
-  		'url': {
-  			pattern: RegExp('url\\((?:' + string.source + '|[^\n\r()]*)\\)', 'i'),
-  			greedy: true,
-  			inside: {
-  				'function': /^url/i,
-  				'punctuation': /^\(|\)$/
-  			}
-  		},
-  		'selector': RegExp('[^{}\\s](?:[^{};"\']|' + string.source + ')*?(?=\\s*\\{)'),
-  		'string': {
-  			pattern: string,
-  			greedy: true
-  		},
-  		'property': /[-_a-z\xA0-\uFFFF][-\w\xA0-\uFFFF]*(?=\s*:)/i,
-  		'important': /!important\b/i,
-  		'function': /[-a-z0-9]+(?=\()/i,
-  		'punctuation': /[(){};:,]/
-  	};
-
-  	Prism.languages.css['atrule'].inside.rest = Prism.languages.css;
-
-  	var markup = Prism.languages.markup;
-  	if (markup) {
-  		markup.tag.addInlined('style', 'css');
-
-  		Prism.languages.insertBefore('inside', 'attr-value', {
-  			'style-attr': {
-  				pattern: /\s*style=("|')(?:\\[\s\S]|(?!\1)[^\\])*\1/i,
-  				inside: {
-  					'attr-name': {
-  						pattern: /^\s*style/i,
-  						inside: markup.tag.inside
-  					},
-  					'punctuation': /^\s*=\s*['"]|['"]\s*$/,
-  					'attr-value': {
-  						pattern: /.+/i,
-  						inside: Prism.languages.css
-  					}
-  				},
-  				alias: 'language-css'
-  			}
-  		}, markup.tag);
-  	}
-
-  }(Prism));
-
-
-  /* **********************************************
-       Begin prism-clike.js
-  ********************************************** */
-
-  Prism.languages.clike = {
-  	'comment': [
-  		{
-  			pattern: /(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,
-  			lookbehind: true
-  		},
-  		{
-  			pattern: /(^|[^\\:])\/\/.*/,
-  			lookbehind: true,
-  			greedy: true
-  		}
-  	],
-  	'string': {
-  		pattern: /(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,
-  		greedy: true
-  	},
-  	'class-name': {
-  		pattern: /(\b(?:class|interface|extends|implements|trait|instanceof|new)\s+|\bcatch\s+\()[\w.\\]+/i,
-  		lookbehind: true,
-  		inside: {
-  			'punctuation': /[.\\]/
-  		}
-  	},
-  	'keyword': /\b(?:if|else|while|do|for|return|in|instanceof|function|new|try|throw|catch|finally|null|break|continue)\b/,
-  	'boolean': /\b(?:true|false)\b/,
-  	'function': /\w+(?=\()/,
-  	'number': /\b0x[\da-f]+\b|(?:\b\d+\.?\d*|\B\.\d+)(?:e[+-]?\d+)?/i,
-  	'operator': /[<>]=?|[!=]=?=?|--?|\+\+?|&&?|\|\|?|[?*/~^%]/,
-  	'punctuation': /[{}[\];(),.:]/
-  };
-
-
-  /* **********************************************
-       Begin prism-javascript.js
-  ********************************************** */
-
-  Prism.languages.javascript = Prism.languages.extend('clike', {
-  	'class-name': [
-  		Prism.languages.clike['class-name'],
-  		{
-  			pattern: /(^|[^$\w\xA0-\uFFFF])[_$A-Z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\.(?:prototype|constructor))/,
-  			lookbehind: true
-  		}
-  	],
-  	'keyword': [
-  		{
-  			pattern: /((?:^|})\s*)(?:catch|finally)\b/,
-  			lookbehind: true
-  		},
-  		{
-  			pattern: /(^|[^.]|\.\.\.\s*)\b(?:as|async(?=\s*(?:function\b|\(|[$\w\xA0-\uFFFF]|$))|await|break|case|class|const|continue|debugger|default|delete|do|else|enum|export|extends|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)\b/,
-  			lookbehind: true
-  		},
-  	],
-  	'number': /\b(?:(?:0[xX](?:[\dA-Fa-f](?:_[\dA-Fa-f])?)+|0[bB](?:[01](?:_[01])?)+|0[oO](?:[0-7](?:_[0-7])?)+)n?|(?:\d(?:_\d)?)+n|NaN|Infinity)\b|(?:\b(?:\d(?:_\d)?)+\.?(?:\d(?:_\d)?)*|\B\.(?:\d(?:_\d)?)+)(?:[Ee][+-]?(?:\d(?:_\d)?)+)?/,
-  	// Allow for all non-ASCII characters (See http://stackoverflow.com/a/2008444)
-  	'function': /#?[_$a-zA-Z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*(?:\.\s*(?:apply|bind|call)\s*)?\()/,
-  	'operator': /--|\+\+|\*\*=?|=>|&&|\|\||[!=]==|<<=?|>>>?=?|[-+*/%&|^!=<>]=?|\.{3}|\?[.?]?|[~:]/
-  });
-
-  Prism.languages.javascript['class-name'][0].pattern = /(\b(?:class|interface|extends|implements|instanceof|new)\s+)[\w.\\]+/;
-
-  Prism.languages.insertBefore('javascript', 'keyword', {
-  	'regex': {
-  		pattern: /((?:^|[^$\w\xA0-\uFFFF."'\])\s])\s*)\/(?:\[(?:[^\]\\\r\n]|\\.)*]|\\.|[^/\\\[\r\n])+\/[gimyus]{0,6}(?=(?:\s|\/\*[\s\S]*?\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/,
-  		lookbehind: true,
-  		greedy: true
-  	},
-  	// This must be declared before keyword because we use "function" inside the look-forward
-  	'function-variable': {
-  		pattern: /#?[_$a-zA-Z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*[=:]\s*(?:async\s*)?(?:\bfunction\b|(?:\((?:[^()]|\([^()]*\))*\)|[_$a-zA-Z\xA0-\uFFFF][$\w\xA0-\uFFFF]*)\s*=>))/,
-  		alias: 'function'
-  	},
-  	'parameter': [
-  		{
-  			pattern: /(function(?:\s+[_$A-Za-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*)?\s*\(\s*)(?!\s)(?:[^()]|\([^()]*\))+?(?=\s*\))/,
-  			lookbehind: true,
-  			inside: Prism.languages.javascript
-  		},
-  		{
-  			pattern: /[_$a-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*=>)/i,
-  			inside: Prism.languages.javascript
-  		},
-  		{
-  			pattern: /(\(\s*)(?!\s)(?:[^()]|\([^()]*\))+?(?=\s*\)\s*=>)/,
-  			lookbehind: true,
-  			inside: Prism.languages.javascript
-  		},
-  		{
-  			pattern: /((?:\b|\s|^)(?!(?:as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)(?![$\w\xA0-\uFFFF]))(?:[_$A-Za-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*\s*)\(\s*)(?!\s)(?:[^()]|\([^()]*\))+?(?=\s*\)\s*\{)/,
-  			lookbehind: true,
-  			inside: Prism.languages.javascript
-  		}
-  	],
-  	'constant': /\b[A-Z](?:[A-Z_]|\dx?)*\b/
-  });
-
-  Prism.languages.insertBefore('javascript', 'string', {
-  	'template-string': {
-  		pattern: /`(?:\\[\s\S]|\${(?:[^{}]|{(?:[^{}]|{[^}]*})*})+}|(?!\${)[^\\`])*`/,
-  		greedy: true,
-  		inside: {
-  			'template-punctuation': {
-  				pattern: /^`|`$/,
-  				alias: 'string'
-  			},
-  			'interpolation': {
-  				pattern: /((?:^|[^\\])(?:\\{2})*)\${(?:[^{}]|{(?:[^{}]|{[^}]*})*})+}/,
-  				lookbehind: true,
-  				inside: {
-  					'interpolation-punctuation': {
-  						pattern: /^\${|}$/,
-  						alias: 'punctuation'
-  					},
-  					rest: Prism.languages.javascript
-  				}
-  			},
-  			'string': /[\s\S]+/
-  		}
-  	}
-  });
-
-  if (Prism.languages.markup) {
-  	Prism.languages.markup.tag.addInlined('script', 'javascript');
-  }
-
-  Prism.languages.js = Prism.languages.javascript;
-
-
-  /* **********************************************
-       Begin prism-file-highlight.js
-  ********************************************** */
-
-  (function () {
-  	if (typeof self === 'undefined' || !self.Prism || !self.document || !document.querySelector) {
-  		return;
-  	}
-
-  	/**
-  	 * @param {Element} [container=document]
-  	 */
-  	self.Prism.fileHighlight = function(container) {
-  		container = container || document;
-
-  		var Extensions = {
-  			'js': 'javascript',
-  			'py': 'python',
-  			'rb': 'ruby',
-  			'ps1': 'powershell',
-  			'psm1': 'powershell',
-  			'sh': 'bash',
-  			'bat': 'batch',
-  			'h': 'c',
-  			'tex': 'latex'
-  		};
-
-  		Array.prototype.slice.call(container.querySelectorAll('pre[data-src]')).forEach(function (pre) {
-  			// ignore if already loaded
-  			if (pre.hasAttribute('data-src-loaded')) {
-  				return;
-  			}
-
-  			// load current
-  			var src = pre.getAttribute('data-src');
-
-  			var language, parent = pre;
-  			var lang = /\blang(?:uage)?-([\w-]+)\b/i;
-  			while (parent && !lang.test(parent.className)) {
-  				parent = parent.parentNode;
-  			}
-
-  			if (parent) {
-  				language = (pre.className.match(lang) || [, ''])[1];
-  			}
-
-  			if (!language) {
-  				var extension = (src.match(/\.(\w+)$/) || [, ''])[1];
-  				language = Extensions[extension] || extension;
-  			}
-
-  			var code = document.createElement('code');
-  			code.className = 'language-' + language;
-
-  			pre.textContent = '';
-
-  			code.textContent = 'Loading…';
-
-  			pre.appendChild(code);
-
-  			var xhr = new XMLHttpRequest();
-
-  			xhr.open('GET', src, true);
-
-  			xhr.onreadystatechange = function () {
-  				if (xhr.readyState == 4) {
-
-  					if (xhr.status < 400 && xhr.responseText) {
-  						code.textContent = xhr.responseText;
-
-  						Prism.highlightElement(code);
-  						// mark as loaded
-  						pre.setAttribute('data-src-loaded', '');
-  					}
-  					else if (xhr.status >= 400) {
-  						code.textContent = '✖ Error ' + xhr.status + ' while fetching file: ' + xhr.statusText;
-  					}
-  					else {
-  						code.textContent = '✖ Error: File does not exist or is empty';
-  					}
-  				}
-  			};
-
-  			xhr.send(null);
-  		});
-  	};
-
-  	document.addEventListener('DOMContentLoaded', function () {
-  		// execute inside handler, for dropping Event as argument
-  		self.Prism.fileHighlight();
-  	});
-
-  })();
-  });
-
-  Prism.languages.python = {
-  	'comment': {
-  		pattern: /(^|[^\\])#.*/,
-  		lookbehind: true
-  	},
-  	'string-interpolation': {
-  		pattern: /(?:f|rf|fr)(?:("""|''')[\s\S]+?\1|("|')(?:\\.|(?!\2)[^\\\r\n])*\2)/i,
-  		greedy: true,
-  		inside: {
-  			'interpolation': {
-  				// "{" <expression> <optional "!s", "!r", or "!a"> <optional ":" format specifier> "}"
-  				pattern: /((?:^|[^{])(?:{{)*){(?!{)(?:[^{}]|{(?!{)(?:[^{}]|{(?!{)(?:[^{}])+})+})+}/,
-  				lookbehind: true,
-  				inside: {
-  					'format-spec': {
-  						pattern: /(:)[^:(){}]+(?=}$)/,
-  						lookbehind: true
-  					},
-  					'conversion-option': {
-  						pattern: /![sra](?=[:}]$)/,
-  						alias: 'punctuation'
-  					},
-  					rest: null
-  				}
-  			},
-  			'string': /[\s\S]+/
-  		}
-  	},
-  	'triple-quoted-string': {
-  		pattern: /(?:[rub]|rb|br)?("""|''')[\s\S]+?\1/i,
-  		greedy: true,
-  		alias: 'string'
-  	},
-  	'string': {
-  		pattern: /(?:[rub]|rb|br)?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/i,
-  		greedy: true
-  	},
-  	'function': {
-  		pattern: /((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/g,
-  		lookbehind: true
-  	},
-  	'class-name': {
-  		pattern: /(\bclass\s+)\w+/i,
-  		lookbehind: true
-  	},
-  	'decorator': {
-  		pattern: /(^\s*)@\w+(?:\.\w+)*/im,
-  		lookbehind: true,
-  		alias: ['annotation', 'punctuation'],
-  		inside: {
-  			'punctuation': /\./
-  		}
-  	},
-  	'keyword': /\b(?:and|as|assert|async|await|break|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|print|raise|return|try|while|with|yield)\b/,
-  	'builtin': /\b(?:__import__|abs|all|any|apply|ascii|basestring|bin|bool|buffer|bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|intern|isinstance|issubclass|iter|len|list|locals|long|map|max|memoryview|min|next|object|oct|open|ord|pow|property|range|raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)\b/,
-  	'boolean': /\b(?:True|False|None)\b/,
-  	'number': /(?:\b(?=\d)|\B(?=\.))(?:0[bo])?(?:(?:\d|0x[\da-f])[\da-f]*\.?\d*|\.\d+)(?:e[+-]?\d+)?j?\b/i,
-  	'operator': /[-+%=]=?|!=|\*\*?=?|\/\/?=?|<[<=>]?|>[=>]?|[&|^~]/,
-  	'punctuation': /[{}[\];(),.:]/
-  };
-
-  Prism.languages.python['string-interpolation'].inside['interpolation'].inside.rest = Prism.languages.python;
-
-  Prism.languages.py = Prism.languages.python;
-
-  Prism.languages.clike = {
-  	'comment': [
-  		{
-  			pattern: /(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,
-  			lookbehind: true
-  		},
-  		{
-  			pattern: /(^|[^\\:])\/\/.*/,
-  			lookbehind: true,
-  			greedy: true
-  		}
-  	],
-  	'string': {
-  		pattern: /(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,
-  		greedy: true
-  	},
-  	'class-name': {
-  		pattern: /(\b(?:class|interface|extends|implements|trait|instanceof|new)\s+|\bcatch\s+\()[\w.\\]+/i,
-  		lookbehind: true,
-  		inside: {
-  			'punctuation': /[.\\]/
-  		}
-  	},
-  	'keyword': /\b(?:if|else|while|do|for|return|in|instanceof|function|new|try|throw|catch|finally|null|break|continue)\b/,
-  	'boolean': /\b(?:true|false)\b/,
-  	'function': /\w+(?=\()/,
-  	'number': /\b0x[\da-f]+\b|(?:\b\d+\.?\d*|\B\.\d+)(?:e[+-]?\d+)?/i,
-  	'operator': /[<>]=?|[!=]=?=?|--?|\+\+?|&&?|\|\|?|[?*/~^%]/,
-  	'punctuation': /[{}[\];(),.:]/
-  };
-
-  Prism.languages.lua = {
-  	'comment': /^#!.+|--(?:\[(=*)\[[\s\S]*?\]\1\]|.*)/m,
-  	// \z may be used to skip the following space
-  	'string': {
-  		pattern: /(["'])(?:(?!\1)[^\\\r\n]|\\z(?:\r\n|\s)|\\(?:\r\n|[\s\S]))*\1|\[(=*)\[[\s\S]*?\]\2\]/,
-  		greedy: true
-  	},
-  	'number': /\b0x[a-f\d]+\.?[a-f\d]*(?:p[+-]?\d+)?\b|\b\d+(?:\.\B|\.?\d*(?:e[+-]?\d+)?\b)|\B\.\d+(?:e[+-]?\d+)?\b/i,
-  	'keyword': /\b(?:and|break|do|else|elseif|end|false|for|function|goto|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b/,
-  	'function': /(?!\d)\w+(?=\s*(?:[({]))/,
-  	'operator': [
-  		/[-+*%^&|#]|\/\/?|<[<=]?|>[>=]?|[=~]=?/,
-  		{
-  			// Match ".." but don't break "..."
-  			pattern: /(^|[^.])\.\.(?!\.)/,
-  			lookbehind: true
-  		}
-  	],
-  	'punctuation': /[\[\](){},;]|\.+|:+/
-  };
-
-  (function(Prism) {
-  	// $ set | grep '^[A-Z][^[:space:]]*=' | cut -d= -f1 | tr '\n' '|'
-  	// + LC_ALL, RANDOM, REPLY, SECONDS.
-  	// + make sure PS1..4 are here as they are not always set,
-  	// - some useless things.
-  	var envVars = '\\b(?:BASH|BASHOPTS|BASH_ALIASES|BASH_ARGC|BASH_ARGV|BASH_CMDS|BASH_COMPLETION_COMPAT_DIR|BASH_LINENO|BASH_REMATCH|BASH_SOURCE|BASH_VERSINFO|BASH_VERSION|COLORTERM|COLUMNS|COMP_WORDBREAKS|DBUS_SESSION_BUS_ADDRESS|DEFAULTS_PATH|DESKTOP_SESSION|DIRSTACK|DISPLAY|EUID|GDMSESSION|GDM_LANG|GNOME_KEYRING_CONTROL|GNOME_KEYRING_PID|GPG_AGENT_INFO|GROUPS|HISTCONTROL|HISTFILE|HISTFILESIZE|HISTSIZE|HOME|HOSTNAME|HOSTTYPE|IFS|INSTANCE|JOB|LANG|LANGUAGE|LC_ADDRESS|LC_ALL|LC_IDENTIFICATION|LC_MEASUREMENT|LC_MONETARY|LC_NAME|LC_NUMERIC|LC_PAPER|LC_TELEPHONE|LC_TIME|LESSCLOSE|LESSOPEN|LINES|LOGNAME|LS_COLORS|MACHTYPE|MAILCHECK|MANDATORY_PATH|NO_AT_BRIDGE|OLDPWD|OPTERR|OPTIND|ORBIT_SOCKETDIR|OSTYPE|PAPERSIZE|PATH|PIPESTATUS|PPID|PS1|PS2|PS3|PS4|PWD|RANDOM|REPLY|SECONDS|SELINUX_INIT|SESSION|SESSIONTYPE|SESSION_MANAGER|SHELL|SHELLOPTS|SHLVL|SSH_AUTH_SOCK|TERM|UID|UPSTART_EVENTS|UPSTART_INSTANCE|UPSTART_JOB|UPSTART_SESSION|USER|WINDOWID|XAUTHORITY|XDG_CONFIG_DIRS|XDG_CURRENT_DESKTOP|XDG_DATA_DIRS|XDG_GREETER_DATA_DIR|XDG_MENU_PREFIX|XDG_RUNTIME_DIR|XDG_SEAT|XDG_SEAT_PATH|XDG_SESSION_DESKTOP|XDG_SESSION_ID|XDG_SESSION_PATH|XDG_SESSION_TYPE|XDG_VTNR|XMODIFIERS)\\b';
-  	var insideString = {
-  		'environment': {
-  			pattern: RegExp("\\$" + envVars),
-  			alias: 'constant'
-  		},
-  		'variable': [
-  			// [0]: Arithmetic Environment
-  			{
-  				pattern: /\$?\(\([\s\S]+?\)\)/,
-  				greedy: true,
-  				inside: {
-  					// If there is a $ sign at the beginning highlight $(( and )) as variable
-  					'variable': [
-  						{
-  							pattern: /(^\$\(\([\s\S]+)\)\)/,
-  							lookbehind: true
-  						},
-  						/^\$\(\(/
-  					],
-  					'number': /\b0x[\dA-Fa-f]+\b|(?:\b\d+\.?\d*|\B\.\d+)(?:[Ee]-?\d+)?/,
-  					// Operators according to https://www.gnu.org/software/bash/manual/bashref.html#Shell-Arithmetic
-  					'operator': /--?|-=|\+\+?|\+=|!=?|~|\*\*?|\*=|\/=?|%=?|<<=?|>>=?|<=?|>=?|==?|&&?|&=|\^=?|\|\|?|\|=|\?|:/,
-  					// If there is no $ sign at the beginning highlight (( and )) as punctuation
-  					'punctuation': /\(\(?|\)\)?|,|;/
-  				}
-  			},
-  			// [1]: Command Substitution
-  			{
-  				pattern: /\$\((?:\([^)]+\)|[^()])+\)|`[^`]+`/,
-  				greedy: true,
-  				inside: {
-  					'variable': /^\$\(|^`|\)$|`$/
-  				}
-  			},
-  			// [2]: Brace expansion
-  			{
-  				pattern: /\$\{[^}]+\}/,
-  				greedy: true,
-  				inside: {
-  					'operator': /:[-=?+]?|[!\/]|##?|%%?|\^\^?|,,?/,
-  					'punctuation': /[\[\]]/,
-  					'environment': {
-  						pattern: RegExp("(\\{)" + envVars),
-  						lookbehind: true,
-  						alias: 'constant'
-  					}
-  				}
-  			},
-  			/\$(?:\w+|[#?*!@$])/
-  		],
-  		// Escape sequences from echo and printf's manuals, and escaped quotes.
-  		'entity': /\\(?:[abceEfnrtv\\"]|O?[0-7]{1,3}|x[0-9a-fA-F]{1,2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})/
-  	};
-
-  	Prism.languages.bash = {
-  		'shebang': {
-  			pattern: /^#!\s*\/.*/,
-  			alias: 'important'
-  		},
-  		'comment': {
-  			pattern: /(^|[^"{\\$])#.*/,
-  			lookbehind: true
-  		},
-  		'function-name': [
-  			// a) function foo {
-  			// b) foo() {
-  			// c) function foo() {
-  			// but not “foo {”
-  			{
-  				// a) and c)
-  				pattern: /(\bfunction\s+)\w+(?=(?:\s*\(?:\s*\))?\s*\{)/,
-  				lookbehind: true,
-  				alias: 'function'
-  			},
-  			{
-  				// b)
-  				pattern: /\b\w+(?=\s*\(\s*\)\s*\{)/,
-  				alias: 'function'
-  			}
-  		],
-  		// Highlight variable names as variables in for and select beginnings.
-  		'for-or-select': {
-  			pattern: /(\b(?:for|select)\s+)\w+(?=\s+in\s)/,
-  			alias: 'variable',
-  			lookbehind: true
-  		},
-  		// Highlight variable names as variables in the left-hand part
-  		// of assignments (“=” and “+=”).
-  		'assign-left': {
-  			pattern: /(^|[\s;|&]|[<>]\()\w+(?=\+?=)/,
-  			inside: {
-  				'environment': {
-  					pattern: RegExp("(^|[\\s;|&]|[<>]\\()" + envVars),
-  					lookbehind: true,
-  					alias: 'constant'
-  				}
-  			},
-  			alias: 'variable',
-  			lookbehind: true
-  		},
-  		'string': [
-  			// Support for Here-documents https://en.wikipedia.org/wiki/Here_document
-  			{
-  				pattern: /((?:^|[^<])<<-?\s*)(\w+?)\s*(?:\r?\n|\r)[\s\S]*?(?:\r?\n|\r)\2/,
-  				lookbehind: true,
-  				greedy: true,
-  				inside: insideString
-  			},
-  			// Here-document with quotes around the tag
-  			// → No expansion (so no “inside”).
-  			{
-  				pattern: /((?:^|[^<])<<-?\s*)(["'])(\w+)\2\s*(?:\r?\n|\r)[\s\S]*?(?:\r?\n|\r)\3/,
-  				lookbehind: true,
-  				greedy: true
-  			},
-  			// “Normal” string
-  			{
-  				pattern: /(^|[^\\](?:\\\\)*)(["'])(?:\\[\s\S]|\$\([^)]+\)|`[^`]+`|(?!\2)[^\\])*\2/,
-  				lookbehind: true,
-  				greedy: true,
-  				inside: insideString
-  			}
-  		],
-  		'environment': {
-  			pattern: RegExp("\\$?" + envVars),
-  			alias: 'constant'
-  		},
-  		'variable': insideString.variable,
-  		'function': {
-  			pattern: /(^|[\s;|&]|[<>]\()(?:add|apropos|apt|aptitude|apt-cache|apt-get|aspell|automysqlbackup|awk|basename|bash|bc|bconsole|bg|bzip2|cal|cat|cfdisk|chgrp|chkconfig|chmod|chown|chroot|cksum|clear|cmp|column|comm|cp|cron|crontab|csplit|curl|cut|date|dc|dd|ddrescue|debootstrap|df|diff|diff3|dig|dir|dircolors|dirname|dirs|dmesg|du|egrep|eject|env|ethtool|expand|expect|expr|fdformat|fdisk|fg|fgrep|file|find|fmt|fold|format|free|fsck|ftp|fuser|gawk|git|gparted|grep|groupadd|groupdel|groupmod|groups|grub-mkconfig|gzip|halt|head|hg|history|host|hostname|htop|iconv|id|ifconfig|ifdown|ifup|import|install|ip|jobs|join|kill|killall|less|link|ln|locate|logname|logrotate|look|lpc|lpr|lprint|lprintd|lprintq|lprm|ls|lsof|lynx|make|man|mc|mdadm|mkconfig|mkdir|mke2fs|mkfifo|mkfs|mkisofs|mknod|mkswap|mmv|more|most|mount|mtools|mtr|mutt|mv|nano|nc|netstat|nice|nl|nohup|notify-send|npm|nslookup|op|open|parted|passwd|paste|pathchk|ping|pkill|pnpm|popd|pr|printcap|printenv|ps|pushd|pv|quota|quotacheck|quotactl|ram|rar|rcp|reboot|remsync|rename|renice|rev|rm|rmdir|rpm|rsync|scp|screen|sdiff|sed|sendmail|seq|service|sftp|sh|shellcheck|shuf|shutdown|sleep|slocate|sort|split|ssh|stat|strace|su|sudo|sum|suspend|swapon|sync|tac|tail|tar|tee|time|timeout|top|touch|tr|traceroute|tsort|tty|umount|uname|unexpand|uniq|units|unrar|unshar|unzip|update-grub|uptime|useradd|userdel|usermod|users|uudecode|uuencode|v|vdir|vi|vim|virsh|vmstat|wait|watch|wc|wget|whereis|which|who|whoami|write|xargs|xdg-open|yarn|yes|zenity|zip|zsh|zypper)(?=$|[)\s;|&])/,
-  			lookbehind: true
-  		},
-  		'keyword': {
-  			pattern: /(^|[\s;|&]|[<>]\()(?:if|then|else|elif|fi|for|while|in|case|esac|function|select|do|done|until)(?=$|[)\s;|&])/,
-  			lookbehind: true
-  		},
-  		// https://www.gnu.org/software/bash/manual/html_node/Shell-Builtin-Commands.html
-  		'builtin': {
-  			pattern: /(^|[\s;|&]|[<>]\()(?:\.|:|break|cd|continue|eval|exec|exit|export|getopts|hash|pwd|readonly|return|shift|test|times|trap|umask|unset|alias|bind|builtin|caller|command|declare|echo|enable|help|let|local|logout|mapfile|printf|read|readarray|source|type|typeset|ulimit|unalias|set|shopt)(?=$|[)\s;|&])/,
-  			lookbehind: true,
-  			// Alias added to make those easier to distinguish from strings.
-  			alias: 'class-name'
-  		},
-  		'boolean': {
-  			pattern: /(^|[\s;|&]|[<>]\()(?:true|false)(?=$|[)\s;|&])/,
-  			lookbehind: true
-  		},
-  		'file-descriptor': {
-  			pattern: /\B&\d\b/,
-  			alias: 'important'
-  		},
-  		'operator': {
-  			// Lots of redirections here, but not just that.
-  			pattern: /\d?<>|>\||\+=|==?|!=?|=~|<<[<-]?|[&\d]?>>|\d?[<>]&?|&[>&]?|\|[&|]?|<=?|>=?/,
-  			inside: {
-  				'file-descriptor': {
-  					pattern: /^\d/,
-  					alias: 'important'
-  				}
-  			}
-  		},
-  		'punctuation': /\$?\(\(?|\)\)?|\.\.|[{}[\];\\]/,
-  		'number': {
-  			pattern: /(^|\s)(?:[1-9]\d*|0)(?:[.,]\d+)?\b/,
-  			lookbehind: true
-  		}
-  	};
-
-  	/* Patterns in command substitution. */
-  	var toBeCopied = [
-  		'comment',
-  		'function-name',
-  		'for-or-select',
-  		'assign-left',
-  		'string',
-  		'environment',
-  		'function',
-  		'keyword',
-  		'builtin',
-  		'boolean',
-  		'file-descriptor',
-  		'operator',
-  		'punctuation',
-  		'number'
-  	];
-  	var inside = insideString.variable[1].inside;
-  	for(var i = 0; i < toBeCopied.length; i++) {
-  		inside[toBeCopied[i]] = Prism.languages.bash[toBeCopied[i]];
-  	}
-
-  	Prism.languages.shell = Prism.languages.bash;
-  })(Prism);
-
-  Prism.languages.go = Prism.languages.extend('clike', {
-  	'keyword': /\b(?:break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go(?:to)?|if|import|interface|map|package|range|return|select|struct|switch|type|var)\b/,
-  	'builtin': /\b(?:bool|byte|complex(?:64|128)|error|float(?:32|64)|rune|string|u?int(?:8|16|32|64)?|uintptr|append|cap|close|complex|copy|delete|imag|len|make|new|panic|print(?:ln)?|real|recover)\b/,
-  	'boolean': /\b(?:_|iota|nil|true|false)\b/,
-  	'operator': /[*\/%^!=]=?|\+[=+]?|-[=-]?|\|[=|]?|&(?:=|&|\^=?)?|>(?:>=?|=)?|<(?:<=?|=|-)?|:=|\.\.\./,
-  	'number': /(?:\b0x[a-f\d]+|(?:\b\d+\.?\d*|\B\.\d+)(?:e[-+]?\d+)?)i?/i,
-  	'string': {
-  		pattern: /(["'`])(?:\\[\s\S]|(?!\1)[^\\])*\1/,
-  		greedy: true
-  	}
-  });
-  delete Prism.languages.go['class-name'];
-
-  (function (Prism) {
-
-  	// Allow only one line break
-  	var inner = /(?:\\.|[^\\\n\r]|(?:\n|\r\n?)(?!\n|\r\n?))/.source;
-
-  	/**
-  	 * This function is intended for the creation of the bold or italic pattern.
-  	 *
-  	 * This also adds a lookbehind group to the given pattern to ensure that the pattern is not backslash-escaped.
-  	 *
-  	 * _Note:_ Keep in mind that this adds a capturing group.
-  	 *
-  	 * @param {string} pattern
-  	 * @param {boolean} starAlternative Whether to also add an alternative where all `_`s are replaced with `*`s.
-  	 * @returns {RegExp}
-  	 */
-  	function createInline(pattern, starAlternative) {
-  		pattern = pattern.replace(/<inner>/g, function () { return inner; });
-  		if (starAlternative) {
-  			pattern = pattern + '|' + pattern.replace(/_/g, '\\*');
-  		}
-  		return RegExp(/((?:^|[^\\])(?:\\{2})*)/.source + '(?:' + pattern + ')');
-  	}
-
-
-  	var tableCell = /(?:\\.|``.+?``|`[^`\r\n]+`|[^\\|\r\n`])+/.source;
-  	var tableRow = /\|?__(?:\|__)+\|?(?:(?:\n|\r\n?)|$)/.source.replace(/__/g, function () { return tableCell; });
-  	var tableLine = /\|?[ \t]*:?-{3,}:?[ \t]*(?:\|[ \t]*:?-{3,}:?[ \t]*)+\|?(?:\n|\r\n?)/.source;
-
-
-  	Prism.languages.markdown = Prism.languages.extend('markup', {});
-  	Prism.languages.insertBefore('markdown', 'prolog', {
-  		'blockquote': {
-  			// > ...
-  			pattern: /^>(?:[\t ]*>)*/m,
-  			alias: 'punctuation'
-  		},
-  		'table': {
-  			pattern: RegExp('^' + tableRow + tableLine + '(?:' + tableRow + ')*', 'm'),
-  			inside: {
-  				'table-data-rows': {
-  					pattern: RegExp('^(' + tableRow + tableLine + ')(?:' + tableRow + ')*$'),
-  					lookbehind: true,
-  					inside: {
-  						'table-data': {
-  							pattern: RegExp(tableCell),
-  							inside: Prism.languages.markdown
-  						},
-  						'punctuation': /\|/
-  					}
-  				},
-  				'table-line': {
-  					pattern: RegExp('^(' + tableRow + ')' + tableLine + '$'),
-  					lookbehind: true,
-  					inside: {
-  						'punctuation': /\||:?-{3,}:?/
-  					}
-  				},
-  				'table-header-row': {
-  					pattern: RegExp('^' + tableRow + '$'),
-  					inside: {
-  						'table-header': {
-  							pattern: RegExp(tableCell),
-  							alias: 'important',
-  							inside: Prism.languages.markdown
-  						},
-  						'punctuation': /\|/
-  					}
-  				}
-  			}
-  		},
-  		'code': [
-  			{
-  				// Prefixed by 4 spaces or 1 tab and preceded by an empty line
-  				pattern: /((?:^|\n)[ \t]*\n|(?:^|\r\n?)[ \t]*\r\n?)(?: {4}|\t).+(?:(?:\n|\r\n?)(?: {4}|\t).+)*/,
-  				lookbehind: true,
-  				alias: 'keyword'
-  			},
-  			{
-  				// `code`
-  				// ``code``
-  				pattern: /``.+?``|`[^`\r\n]+`/,
-  				alias: 'keyword'
-  			},
-  			{
-  				// ```optional language
-  				// code block
-  				// ```
-  				pattern: /^```[\s\S]*?^```$/m,
-  				greedy: true,
-  				inside: {
-  					'code-block': {
-  						pattern: /^(```.*(?:\n|\r\n?))[\s\S]+?(?=(?:\n|\r\n?)^```$)/m,
-  						lookbehind: true
-  					},
-  					'code-language': {
-  						pattern: /^(```).+/,
-  						lookbehind: true
-  					},
-  					'punctuation': /```/
-  				}
-  			}
-  		],
-  		'title': [
-  			{
-  				// title 1
-  				// =======
-
-  				// title 2
-  				// -------
-  				pattern: /\S.*(?:\n|\r\n?)(?:==+|--+)(?=[ \t]*$)/m,
-  				alias: 'important',
-  				inside: {
-  					punctuation: /==+$|--+$/
-  				}
-  			},
-  			{
-  				// # title 1
-  				// ###### title 6
-  				pattern: /(^\s*)#+.+/m,
-  				lookbehind: true,
-  				alias: 'important',
-  				inside: {
-  					punctuation: /^#+|#+$/
-  				}
-  			}
-  		],
-  		'hr': {
-  			// ***
-  			// ---
-  			// * * *
-  			// -----------
-  			pattern: /(^\s*)([*-])(?:[\t ]*\2){2,}(?=\s*$)/m,
-  			lookbehind: true,
-  			alias: 'punctuation'
-  		},
-  		'list': {
-  			// * item
-  			// + item
-  			// - item
-  			// 1. item
-  			pattern: /(^\s*)(?:[*+-]|\d+\.)(?=[\t ].)/m,
-  			lookbehind: true,
-  			alias: 'punctuation'
-  		},
-  		'url-reference': {
-  			// [id]: http://example.com "Optional title"
-  			// [id]: http://example.com 'Optional title'
-  			// [id]: http://example.com (Optional title)
-  			// [id]: <http://example.com> "Optional title"
-  			pattern: /!?\[[^\]]+\]:[\t ]+(?:\S+|<(?:\\.|[^>\\])+>)(?:[\t ]+(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\)))?/,
-  			inside: {
-  				'variable': {
-  					pattern: /^(!?\[)[^\]]+/,
-  					lookbehind: true
-  				},
-  				'string': /(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\))$/,
-  				'punctuation': /^[\[\]!:]|[<>]/
-  			},
-  			alias: 'url'
-  		},
-  		'bold': {
-  			// **strong**
-  			// __strong__
-
-  			// allow one nested instance of italic text using the same delimiter
-  			pattern: createInline(/__(?:(?!_)<inner>|_(?:(?!_)<inner>)+_)+__/.source, true),
-  			lookbehind: true,
-  			greedy: true,
-  			inside: {
-  				'content': {
-  					pattern: /(^..)[\s\S]+(?=..$)/,
-  					lookbehind: true,
-  					inside: {} // see below
-  				},
-  				'punctuation': /\*\*|__/
-  			}
-  		},
-  		'italic': {
-  			// *em*
-  			// _em_
-
-  			// allow one nested instance of bold text using the same delimiter
-  			pattern: createInline(/_(?:(?!_)<inner>|__(?:(?!_)<inner>)+__)+_/.source, true),
-  			lookbehind: true,
-  			greedy: true,
-  			inside: {
-  				'content': {
-  					pattern: /(^.)[\s\S]+(?=.$)/,
-  					lookbehind: true,
-  					inside: {} // see below
-  				},
-  				'punctuation': /[*_]/
-  			}
-  		},
-  		'strike': {
-  			// ~~strike through~~
-  			// ~strike~
-  			pattern: createInline(/(~~?)(?:(?!~)<inner>)+?\2/.source, false),
-  			lookbehind: true,
-  			greedy: true,
-  			inside: {
-  				'content': {
-  					pattern: /(^~~?)[\s\S]+(?=\1$)/,
-  					lookbehind: true,
-  					inside: {} // see below
-  				},
-  				'punctuation': /~~?/
-  			}
-  		},
-  		'url': {
-  			// [example](http://example.com "Optional title")
-  			// [example][id]
-  			// [example] [id]
-  			pattern: createInline(/!?\[(?:(?!\])<inner>)+\](?:\([^\s)]+(?:[\t ]+"(?:\\.|[^"\\])*")?\)| ?\[(?:(?!\])<inner>)+\])/.source, false),
-  			lookbehind: true,
-  			greedy: true,
-  			inside: {
-  				'variable': {
-  					pattern: /(\[)[^\]]+(?=\]$)/,
-  					lookbehind: true
-  				},
-  				'content': {
-  					pattern: /(^!?\[)[^\]]+(?=\])/,
-  					lookbehind: true,
-  					inside: {} // see below
-  				},
-  				'string': {
-  					pattern: /"(?:\\.|[^"\\])*"(?=\)$)/
-  				}
-  			}
-  		}
-  	});
-
-  	['url', 'bold', 'italic', 'strike'].forEach(function (token) {
-  		['url', 'bold', 'italic', 'strike'].forEach(function (inside) {
-  			if (token !== inside) {
-  				Prism.languages.markdown[token].inside.content.inside[inside] = Prism.languages.markdown[inside];
-  			}
-  		});
-  	});
-
-  	Prism.hooks.add('after-tokenize', function (env) {
-  		if (env.language !== 'markdown' && env.language !== 'md') {
-  			return;
-  		}
-
-  		function walkTokens(tokens) {
-  			if (!tokens || typeof tokens === 'string') {
-  				return;
-  			}
-
-  			for (var i = 0, l = tokens.length; i < l; i++) {
-  				var token = tokens[i];
-
-  				if (token.type !== 'code') {
-  					walkTokens(token.content);
-  					continue;
-  				}
-
-  				/*
-  				 * Add the correct `language-xxxx` class to this code block. Keep in mind that the `code-language` token
-  				 * is optional. But the grammar is defined so that there is only one case we have to handle:
-  				 *
-  				 * token.content = [
-  				 *     <span class="punctuation">```</span>,
-  				 *     <span class="code-language">xxxx</span>,
-  				 *     '\n', // exactly one new lines (\r or \n or \r\n)
-  				 *     <span class="code-block">...</span>,
-  				 *     '\n', // exactly one new lines again
-  				 *     <span class="punctuation">```</span>
-  				 * ];
-  				 */
-
-  				var codeLang = token.content[1];
-  				var codeBlock = token.content[3];
-
-  				if (codeLang && codeBlock &&
-  					codeLang.type === 'code-language' && codeBlock.type === 'code-block' &&
-  					typeof codeLang.content === 'string') {
-
-  					// this might be a language that Prism does not support
-
-  					// do some replacements to support C++, C#, and F#
-  					var lang = codeLang.content.replace(/\b#/g, 'sharp').replace(/\b\+\+/g, 'pp');
-  					// only use the first word
-  					lang = (/[a-z][\w-]*/i.exec(lang) || [''])[0].toLowerCase();
-  					var alias = 'language-' + lang;
-
-  					// add alias
-  					if (!codeBlock.alias) {
-  						codeBlock.alias = [alias];
-  					} else if (typeof codeBlock.alias === 'string') {
-  						codeBlock.alias = [codeBlock.alias, alias];
-  					} else {
-  						codeBlock.alias.push(alias);
-  					}
-  				}
-  			}
-  		}
-
-  		walkTokens(env.tokens);
-  	});
-
-  	Prism.hooks.add('wrap', function (env) {
-  		if (env.type !== 'code-block') {
-  			return;
-  		}
-
-  		var codeLang = '';
-  		for (var i = 0, l = env.classes.length; i < l; i++) {
-  			var cls = env.classes[i];
-  			var match = /language-(.+)/.exec(cls);
-  			if (match) {
-  				codeLang = match[1];
-  				break;
-  			}
-  		}
-
-  		var grammar = Prism.languages[codeLang];
-
-  		if (!grammar) {
-  			if (codeLang && codeLang !== 'none' && Prism.plugins.autoloader) {
-  				var id = 'md-' + new Date().valueOf() + '-' + Math.floor(Math.random() * 1e16);
-  				env.attributes['id'] = id;
-
-  				Prism.plugins.autoloader.loadLanguages(codeLang, function () {
-  					var ele = document.getElementById(id);
-  					if (ele) {
-  						ele.innerHTML = Prism.highlight(ele.textContent, Prism.languages[codeLang], codeLang);
-  					}
-  				});
-  			}
-  		} else {
-  			// reverse Prism.util.encode
-  			var code = env.content.replace(/&lt;/g, '<').replace(/&amp;/g, '&');
-
-  			env.content = Prism.highlight(code, grammar, codeLang);
-  		}
-  	});
-
-  	Prism.languages.md = Prism.languages.markdown;
-
-  }(Prism));
-
-  Prism.languages.julia= {
-  	'comment': {
-  		pattern: /(^|[^\\])#.*/,
-  		lookbehind: true
-  	},
-  	'string': /("""|''')[\s\S]+?\1|("|')(?:\\.|(?!\2)[^\\\r\n])*\2/,
-  	'keyword' : /\b(?:abstract|baremodule|begin|bitstype|break|catch|ccall|const|continue|do|else|elseif|end|export|finally|for|function|global|if|immutable|import|importall|in|let|local|macro|module|print|println|quote|return|struct|try|type|typealias|using|while)\b/,
-  	'boolean' : /\b(?:true|false)\b/,
-  	'number' : /(?:\b(?=\d)|\B(?=\.))(?:0[box])?(?:[\da-f]+\.?\d*|\.\d+)(?:[efp][+-]?\d+)?j?/i,
-  	'operator': /[-+*^%÷&$\\]=?|\/[\/=]?|!=?=?|\|[=>]?|<(?:<=?|[=:])?|>(?:=|>>?=?)?|==?=?|[~≠≤≥]/,
-  	'punctuation' : /[{}[\];(),.:]/,
-  	'constant': /\b(?:(?:NaN|Inf)(?:16|32|64)?)\b/
-  };
-
-  var css = "/**\n * prism.js default theme for JavaScript, CSS and HTML\n * Based on dabblet (http://dabblet.com)\n * @author Lea Verou\n */\n\ncode[class*=\"language-\"],\npre[class*=\"language-\"] {\n\tcolor: black;\n\tbackground: none;\n\ttext-shadow: 0 1px white;\n\tfont-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;\n\tfont-size: 1em;\n\ttext-align: left;\n\twhite-space: pre;\n\tword-spacing: normal;\n\tword-break: normal;\n\tword-wrap: normal;\n\tline-height: 1.5;\n\n\t-moz-tab-size: 4;\n\t-o-tab-size: 4;\n\ttab-size: 4;\n\n\t-webkit-hyphens: none;\n\t-moz-hyphens: none;\n\t-ms-hyphens: none;\n\thyphens: none;\n}\n\npre[class*=\"language-\"]::-moz-selection, pre[class*=\"language-\"] ::-moz-selection,\ncode[class*=\"language-\"]::-moz-selection, code[class*=\"language-\"] ::-moz-selection {\n\ttext-shadow: none;\n\tbackground: #b3d4fc;\n}\n\npre[class*=\"language-\"]::selection, pre[class*=\"language-\"] ::selection,\ncode[class*=\"language-\"]::selection, code[class*=\"language-\"] ::selection {\n\ttext-shadow: none;\n\tbackground: #b3d4fc;\n}\n\n@media print {\n\tcode[class*=\"language-\"],\n\tpre[class*=\"language-\"] {\n\t\ttext-shadow: none;\n\t}\n}\n\n/* Code blocks */\npre[class*=\"language-\"] {\n\tpadding: 1em;\n\tmargin: .5em 0;\n\toverflow: auto;\n}\n\n:not(pre) > code[class*=\"language-\"],\npre[class*=\"language-\"] {\n\tbackground: #f5f2f0;\n}\n\n/* Inline code */\n:not(pre) > code[class*=\"language-\"] {\n\tpadding: .1em;\n\tborder-radius: .3em;\n\twhite-space: normal;\n}\n\n.token.comment,\n.token.prolog,\n.token.doctype,\n.token.cdata {\n\tcolor: slategray;\n}\n\n.token.punctuation {\n\tcolor: #999;\n}\n\n.token.namespace {\n\topacity: .7;\n}\n\n.token.property,\n.token.tag,\n.token.boolean,\n.token.number,\n.token.constant,\n.token.symbol,\n.token.deleted {\n\tcolor: #905;\n}\n\n.token.selector,\n.token.attr-name,\n.token.string,\n.token.char,\n.token.builtin,\n.token.inserted {\n\tcolor: #690;\n}\n\n.token.operator,\n.token.entity,\n.token.url,\n.language-css .token.string,\n.style .token.string {\n\tcolor: #9a6e3a;\n\tbackground: hsla(0, 0%, 100%, .5);\n}\n\n.token.atrule,\n.token.attr-value,\n.token.keyword {\n\tcolor: #07a;\n}\n\n.token.function,\n.token.class-name {\n\tcolor: #DD4A68;\n}\n\n.token.regex,\n.token.important,\n.token.variable {\n\tcolor: #e90;\n}\n\n.token.important,\n.token.bold {\n\tfont-weight: bold;\n}\n.token.italic {\n\tfont-style: italic;\n}\n\n.token.entity {\n\tcursor: help;\n}\n";
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$4 = Template('d-code', `
-<style>
-
-code {
-  white-space: nowrap;
-  background: rgba(0, 0, 0, 0.04);
-  border-radius: 2px;
-  padding: 4px 7px;
-  font-size: 15px;
-  color: rgba(0, 0, 0, 0.6);
-}
-
-pre code {
-  display: block;
-  border-left: 2px solid rgba(0, 0, 0, .1);
-  padding: 0 0 0 36px;
-}
-
-${css}
-</style>
-
-<code id="code-container"></code>
-
-`);
-
-  class Code extends Mutating(T$4(HTMLElement)) {
-
-    renderContent() {
-
-      // check if language can be highlighted
-      this.languageName = this.getAttribute('language');
-      if (!this.languageName) {
-        console.warn('You need to provide a language attribute to your <d-code> block to let us know how to highlight your code; e.g.:\n <d-code language="python">zeros = np.zeros(shape)</d-code>.');
-        return;
-      }
-      const language = prism.languages[this.languageName];
-      if (language == undefined) {
-        console.warn(`Distill does not yet support highlighting your code block in "${this.languageName}'.`);
-        return;
-      }
-
-      let content = this.textContent;
-      const codeTag = this.shadowRoot.querySelector('#code-container');
-
-      if (this.hasAttribute('block')) {
-        // normalize the tab indents
-        content = content.replace(/\n/, '');
-        const tabs = content.match(/\s*/);
-        content = content.replace(new RegExp('\n' + tabs, 'g'), '\n');
-        content = content.trim();
-        // wrap code block in pre tag if needed
-        if (codeTag.parentNode instanceof ShadowRoot) {
-          const preTag = document.createElement('pre');
-          this.shadowRoot.removeChild(codeTag);
-          preTag.appendChild(codeTag);
-          this.shadowRoot.appendChild(preTag);
-        }
-
-      }
-
-      codeTag.className = `language-${this.languageName}`;
-      codeTag.innerHTML = prism.highlight(content, language);
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$5 = Template('d-footnote', `
-<style>
-
-d-math[block] {
-  display: block;
-}
-
-:host {
-
-}
-
-sup {
-  line-height: 1em;
-  font-size: 0.75em;
-  position: relative;
-  top: -.5em;
-  vertical-align: baseline;
-}
-
-span {
-  color: hsla(206, 90%, 20%, 0.7);
-  cursor: default;
-}
-
-.footnote-container {
-  padding: 10px;
-}
-
-</style>
-
-<d-hover-box>
-  <div class="footnote-container">
-    <slot id="slot"></slot>
-  </div>
-</d-hover-box>
-
-<sup>
-  <span id="fn-" data-hover-ref=""></span>
-</sup>
-
-`);
-
-  class Footnote extends T$5(HTMLElement) {
-
-    constructor() {
-      super();
-
-      const options = {childList: true, characterData: true, subtree: true};
-      const observer = new MutationObserver(this.notify);
-      observer.observe(this, options);
-    }
-
-    notify() {
-      const options = { detail: this, bubbles: true };
-      const event = new CustomEvent('onFootnoteChanged', options);
-      document.dispatchEvent(event);
-    }
-
-    connectedCallback() {
-      // listen and notify about changes to slotted content
-      // const slot = this.shadowRoot.querySelector('#slot');
-      // console.warn(slot.textContent);
-      // slot.addEventListener('slotchange', this.notify);
-      this.hoverBox = this.root.querySelector('d-hover-box');
-      window.customElements.whenDefined('d-hover-box').then(() => {
-        this.hoverBox.listen(this);
-      });
-      // create numeric ID
-      Footnote.currentFootnoteId += 1;
-      const IdString = Footnote.currentFootnoteId.toString();
-      this.root.host.id = 'd-footnote-' + IdString;
-
-      // set up hidden hover box
-      const id = 'dt-fn-hover-box-' + IdString;
-      this.hoverBox.id = id;
-
-      // set up visible footnote marker
-      const span = this.root.querySelector('#fn-');
-      span.setAttribute('id', 'fn-' + IdString);
-      span.setAttribute('data-hover-ref', id);
-      span.textContent = IdString;
-    }
-
-  }
-
-  Footnote.currentFootnoteId = 0;
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$6 = Template('d-footnote-list', `
-<style>
-
-d-footnote-list {
-  contain: layout style;
-}
-
-d-footnote-list > * {
-  grid-column: text;
-}
-
-d-footnote-list a.footnote-backlink {
-  color: rgba(0,0,0,0.3);
-  padding-left: 0.5em;
-}
-
-</style>
-
-<h3>Footnotes</h3>
-<ol></ol>
-`, false);
-
-  class FootnoteList extends T$6(HTMLElement) {
-
-    connectedCallback() {
-      super.connectedCallback();
-
-      this.list = this.root.querySelector('ol');
-      // footnotes list is initially hidden
-      this.root.style.display = 'none';
-      // look through document and register existing footnotes
-      // Store.subscribeTo('footnotes', (footnote) => {
-      //   this.renderFootnote(footnote);
-      // });
-    }
-
-    // TODO: could optimize this to accept individual footnotes?
-    set footnotes(footnotes) {
-      this.list.innerHTML = '';
-      if (footnotes.length) {
-        // ensure footnote list is visible
-        this.root.style.display = '';
-
-        for (const footnote of footnotes) {
-          // construct and append list item to show footnote
-          const listItem = document.createElement('li');
-          listItem.id = footnote.id + '-listing';
-          listItem.innerHTML = footnote.innerHTML;
-
-          const backlink = document.createElement('a');
-          backlink.setAttribute('class', 'footnote-backlink');
-          backlink.textContent = '[↩]';
-          backlink.href = '#' + footnote.id;
-
-          listItem.appendChild(backlink);
-          this.list.appendChild(listItem);
-        }
-      } else {
-        // ensure footnote list is invisible
-        this.root.style.display = 'none';
-      }
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$7 = Template('d-hover-box', `
-<style>
-
-:host {
-  position: absolute;
-  width: 100%;
-  left: 0px;
-  z-index: 10000;
-  display: none;
-  white-space: normal
-}
-
-.container {
-  position: relative;
-  width: 704px;
-  max-width: 100vw;
-  margin: 0 auto;
-}
-
-.panel {
-  position: absolute;
-  font-size: 1rem;
-  line-height: 1.5em;
-  top: 0;
-  left: 0;
-  width: 100%;
-  border: 1px solid rgba(0, 0, 0, 0.1);
-  background-color: rgba(250, 250, 250, 0.95);
-  box-shadow: 0 0 7px rgba(0, 0, 0, 0.1);
-  border-radius: 4px;
-  box-sizing: border-box;
-
-  backdrop-filter: blur(2px);
-  -webkit-backdrop-filter: blur(2px);
-}
-
-</style>
-
-<div class="container">
-  <div class="panel">
-    <slot></slot>
-  </div>
-</div>
-`);
-
-  class HoverBox extends T$7(HTMLElement) {
-
-    constructor() {
-      super();
-    }
-
-    connectedCallback() {
-
-    }
-
-    listen(element) {
-      // console.log(element)
-      this.bindDivEvents(this);
-      this.bindTriggerEvents(element);
-      // this.style.display = "block";
-    }
-
-    bindDivEvents(element) {
-      // For mice, same behavior as hovering on links
-      element.addEventListener('mouseover', () => {
-        if (!this.visible) this.showAtNode(element);
-        this.stopTimeout();
-      });
-      element.addEventListener('mouseout', () => {
-        this.extendTimeout(500);
-      });
-      // Don't trigger body touchstart event when touching within box
-      element.addEventListener('touchstart', (event) => {
-        event.stopPropagation();
-      }, {passive: true});
-      // Close box when touching outside box
-      document.body.addEventListener('touchstart', () => {
-        this.hide();
-      }, {passive: true});
-    }
-
-    bindTriggerEvents(node) {
-      node.addEventListener('mouseover', () => {
-        if (!this.visible) {
-          this.showAtNode(node);
-        }
-        this.stopTimeout();
-      });
-
-      node.addEventListener('mouseout', () => {
-        this.extendTimeout(300);
-      });
-
-      node.addEventListener('touchstart', (event) => {
-        if (this.visible) {
-          this.hide();
-        } else {
-          this.showAtNode(node);
-        }
-        // Don't trigger body touchstart event when touching link
-        event.stopPropagation();
-      }, {passive: true});
-    }
-
-    show(position) {
-      this.visible = true;
-      this.style.display = 'block';
-      // 10px extra offset from element
-      this.style.top = Math.round(position[1] + 10) + 'px';
-    }
-
-    showAtNode(node) {
-      // https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement/offsetTop
-      const bbox = node.getBoundingClientRect();
-      this.show([node.offsetLeft + bbox.width, node.offsetTop + bbox.height]);
-    }
-
-    hide() {
-      this.visible = false;
-      this.style.display = 'none';
-      this.stopTimeout();
-    }
-
-    stopTimeout() {
-      if (this.timeout) {
-        clearTimeout(this.timeout);
-      }
-    }
-
-    extendTimeout(time) {
-      this.stopTimeout();
-      this.timeout = setTimeout(() => {
-        this.hide();
-      }, time);
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  class Title extends HTMLElement {
-    static get is() { return 'd-title'; }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$8 = Template('d-references', `
-<style>
-d-references {
-  display: block;
-}
-</style>
-`, false);
-
-  class References extends T$8(HTMLElement) {
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  class TOC extends HTMLElement {
-
-    static get is() { return 'd-toc'; }
-
-    connectedCallback() {
-      if (!this.getAttribute('prerendered')) {
-        window.onload = () => {
-          const article = document.querySelector('d-article');
-          const headings = article.querySelectorAll('h2, h3');
-          renderTOC(this, headings);
-        };
-      }
-    }
-
-  }
-
-  function renderTOC(element, headings) {
-
-    let ToC =`
-  <style>
-
-  d-toc {
-    contain: layout style;
-    display: block;
-  }
-
-  d-toc ul {
-    padding-left: 0;
-  }
-
-  d-toc ul > ul {
-    padding-left: 24px;
-  }
-
-  d-toc a {
-    border-bottom: none;
-    text-decoration: none;
-  }
-
-  </style>
-  <nav role="navigation" class="table-of-contents"></nav>
-  <h2>Table of contents</h2>
-  <ul>`;
-
-    for (const el of headings) {
-      // should element be included in TOC?
-      const isInTitle = el.parentElement.tagName == 'D-TITLE';
-      const isException = el.getAttribute('no-toc');
-      if (isInTitle || isException) continue;
-      // create TOC entry
-      const title = el.textContent;
-      const link = '#' + el.getAttribute('id');
-
-      let newLine = '<li>' + '<a href="' + link + '">' + title + '</a>' + '</li>';
-      if (el.tagName == 'H3') {
-        newLine = '<ul>' + newLine + '</ul>';
-      } else {
-        newLine += '<br>';
-      }
-      ToC += newLine;
-
-    }
-
-    ToC += '</ul></nav>';
-    element.innerHTML = ToC;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  // Figure
-  //
-  // d-figure provides a state-machine of visibility events:
-  //
-  //                         scroll out of view
-  //                         +----------------+
-  //   *do work here*        |                |
-  // +----------------+    +-+---------+    +-v---------+
-  // | ready          +----> onscreen  |    | offscreen |
-  // +----------------+    +---------^-+    +---------+-+
-  //                                 |                |
-  //                                 +----------------+
-  //                                  scroll into view
-  //
-
-  class Figure extends HTMLElement {
-
-    static get is() { return 'd-figure'; }
-
-    static get readyQueue() {
-      if (!Figure._readyQueue) {
-        Figure._readyQueue = [];
-      }
-      return Figure._readyQueue;
-    }
-
-    static addToReadyQueue(figure) {
-      if (Figure.readyQueue.indexOf(figure) === -1) {
-        Figure.readyQueue.push(figure);
-        Figure.runReadyQueue();
-      }
-    }
-
-    static runReadyQueue() {
-      // console.log("Checking to run readyQueue, length: " + Figure.readyQueue.length + ", scrolling: " + Figure.isScrolling);
-      // if (Figure.isScrolling) return;
-      // console.log("Running ready Queue");
-      const figure = Figure.readyQueue
-        .sort((a,b) => a._seenOnScreen - b._seenOnScreen )
-        .filter((figure) => !figure._ready)
-        .pop();
-      if (figure) {
-        figure.ready();
-        requestAnimationFrame(Figure.runReadyQueue);
-      }
-
-    }
-
-    constructor() {
-      super();
-      // debugger
-      this._ready = false;
-      this._onscreen = false;
-      this._offscreen = true;
-    }
-
-    connectedCallback() {
-      this.loadsWhileScrolling = this.hasAttribute('loadsWhileScrolling');
-      Figure.marginObserver.observe(this);
-      Figure.directObserver.observe(this);
-    }
-
-    disconnectedCallback() {
-      Figure.marginObserver.unobserve(this);
-      Figure.directObserver.unobserve(this);
-    }
-
-    // We use two separate observers:
-    // One with an extra 1000px margin to warn if the viewpoint gets close,
-    // And one for the actual on/off screen events
-
-    static get marginObserver() {
-      if (!Figure._marginObserver) {
-        // if (!('IntersectionObserver' in window)) {
-        //   throw new Error('no interscetionobbserver!');
-        // }
-        const viewportHeight = window.innerHeight;
-        const margin = Math.floor(2 * viewportHeight);
-        const options = {rootMargin: margin + 'px 0px ' + margin + 'px 0px', threshold: 0.01};
-        const callback = Figure.didObserveMarginIntersection;
-        const observer = new IntersectionObserver(callback, options);
-        Figure._marginObserver = observer;
-      }
-      return Figure._marginObserver;
-    }
-
-    static didObserveMarginIntersection(entries) {
-      for (const entry of entries) {
-        const figure = entry.target;
-        if (entry.isIntersecting && !figure._ready) {
-          Figure.addToReadyQueue(figure);
-        }
-      }
-    }
-
-    static get directObserver() {
-      if (!Figure._directObserver) {
-        Figure._directObserver = new IntersectionObserver(
-          Figure.didObserveDirectIntersection, {
-            rootMargin: '0px', threshold: [0, 1.0],
-          }
-        );
-      }
-      return Figure._directObserver;
-    }
-
-    static didObserveDirectIntersection(entries) {
-      for (const entry of entries) {
-        const figure = entry.target;
-        if (entry.isIntersecting) {
-          figure._seenOnScreen = new Date();
-          // if (!figure._ready) { figure.ready(); }
-          if (figure._offscreen) { figure.onscreen(); }
-        } else {
-          if (figure._onscreen) { figure.offscreen(); }
-        }
-      }
-    }
-
-    // Notify listeners that registered late, too:
-
-    addEventListener(eventName, callback) {
-      super.addEventListener(eventName, callback);
-      // if we had already dispatched something while presumingly no one was listening, we do so again
-      // debugger
-      if (eventName === 'ready') {
-        if (Figure.readyQueue.indexOf(this) !== -1) {
-          this._ready = false;
-          Figure.runReadyQueue();
-        }
-      }
-      if (eventName === 'onscreen') {
-        this.onscreen();
-      }
-    }
-
-    // Custom Events
-
-    ready() {
-      // debugger
-      this._ready = true;
-      Figure.marginObserver.unobserve(this);
-      const event = new CustomEvent('ready');
-      this.dispatchEvent(event);
-    }
-
-    onscreen() {
-      this._onscreen = true;
-      this._offscreen = false;
-      const event = new CustomEvent('onscreen');
-      this.dispatchEvent(event);
-    }
-
-    offscreen() {
-      this._onscreen = false;
-      this._offscreen = true;
-      const event = new CustomEvent('offscreen');
-      this.dispatchEvent(event);
-    }
-
-  }
-
-  if (typeof window !== 'undefined') {
-
-    Figure.isScrolling = false;
-    let timeout;
-    const resetTimer = () => {
-      Figure.isScrolling = true;
-      clearTimeout(timeout);
-      timeout = setTimeout(() => {
-        Figure.isScrolling = false;
-        Figure.runReadyQueue();
-      }, 500);
-    };
-    window.addEventListener('scroll', resetTimer, true);
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  // This overlay is not secure.
-  // It is only meant as a social deterrent.
-
-  const productionHostname = 'distill.pub';
-  const T$9 = Template('d-interstitial', `
-<style>
-
-.overlay {
-  position: fixed;
-  width: 100%;
-  height: 100%;
-  top: 0;
-  left: 0;
-  background: white;
-
-  opacity: 1;
-  visibility: visible;
-
-  display: flex;
-  flex-flow: column;
-  justify-content: center;
-  z-index: 2147483647 /* MaxInt32 */
-
-}
-
-.container {
-  position: relative;
-  margin-left: auto;
-  margin-right: auto;
-  max-width: 420px;
-  padding: 2em;
-}
-
-h1 {
-  text-decoration: underline;
-  text-decoration-color: hsl(0,100%,40%);
-  -webkit-text-decoration-color: hsl(0,100%,40%);
-  margin-bottom: 1em;
-  line-height: 1.5em;
-}
-
-input[type="password"] {
-  -webkit-appearance: none;
-  -moz-appearance: none;
-  appearance: none;
-  -webkit-box-shadow: none;
-  -moz-box-shadow: none;
-  box-shadow: none;
-  -webkit-border-radius: none;
-  -moz-border-radius: none;
-  -ms-border-radius: none;
-  -o-border-radius: none;
-  border-radius: none;
-  outline: none;
-
-  font-size: 18px;
-  background: none;
-  width: 25%;
-  padding: 10px;
-  border: none;
-  border-bottom: solid 2px #999;
-  transition: border .3s;
-}
-
-input[type="password"]:focus {
-  border-bottom: solid 2px #333;
-}
-
-input[type="password"].wrong {
-  border-bottom: solid 2px hsl(0,100%,40%);
-}
-
-p small {
-  color: #888;
-}
-
-.logo {
-  position: relative;
-  font-size: 1.5em;
-  margin-bottom: 3em;
-}
-
-.logo svg {
-  width: 36px;
-  position: relative;
-  top: 6px;
-  margin-right: 2px;
-}
-
-.logo svg path {
-  fill: none;
-  stroke: black;
-  stroke-width: 2px;
-}
-
-</style>
-
-<div class="overlay">
-  <div class="container">
-    <h1>This article is in review.</h1>
-    <p>Do not share this URL or the contents of this article. Thank you!</p>
-    <input id="interstitial-password-input" type="password" name="password" autofocus/>
-    <p><small>Enter the password we shared with you as part of the review process to view the article.</small></p>
-  </div>
-</div>
-`);
-
-  class Interstitial extends T$9(HTMLElement) {
-
-    connectedCallback() {
-      if (this.shouldRemoveSelf()) {
-        this.parentElement.removeChild(this);
-      } else {
-        const passwordInput = this.root.querySelector('#interstitial-password-input');
-        passwordInput.oninput = (event) => this.passwordChanged(event);
-      }
-    }
-
-    passwordChanged(event) {
-      const entered = event.target.value;
-      if (entered === this.password) {
-        console.log('Correct password entered.');
-        this.parentElement.removeChild(this);
-        if (typeof(Storage) !== 'undefined') {
-          console.log('Saved that correct password was entered.');
-          localStorage.setItem(this.localStorageIdentifier(), 'true');
-        }
-      }
-    }
-
-    shouldRemoveSelf() {
-      // should never be visible in production
-      if (window && window.location.hostname === productionHostname) {
-        console.warn('Interstitial found on production, hiding it.');
-        return true
-      }
-      // should only have to enter password once
-      if (typeof(Storage) !== 'undefined') {
-        if (localStorage.getItem(this.localStorageIdentifier()) === 'true') {
-          console.log('Loaded that correct password was entered before; skipping interstitial.');
-          return true;
-        }
-      }
-      // otherwise, leave visible
-      return false;
-    }
-
-    localStorageIdentifier() {
-      const prefix = 'distill-drafts';
-      const suffix = 'interstitial-password-correct';
-      return prefix + (window ? window.location.pathname : '-') + suffix
-    }
-
-  }
-
-  function ascending(a, b) {
-    return a < b ? -1 : a > b ? 1 : a >= b ? 0 : NaN;
-  }
-
-  function bisector(compare) {
-    if (compare.length === 1) compare = ascendingComparator(compare);
-    return {
-      left: function(a, x, lo, hi) {
-        if (lo == null) lo = 0;
-        if (hi == null) hi = a.length;
-        while (lo < hi) {
-          var mid = lo + hi >>> 1;
-          if (compare(a[mid], x) < 0) lo = mid + 1;
-          else hi = mid;
-        }
-        return lo;
-      },
-      right: function(a, x, lo, hi) {
-        if (lo == null) lo = 0;
-        if (hi == null) hi = a.length;
-        while (lo < hi) {
-          var mid = lo + hi >>> 1;
-          if (compare(a[mid], x) > 0) hi = mid;
-          else lo = mid + 1;
-        }
-        return lo;
-      }
-    };
-  }
-
-  function ascendingComparator(f) {
-    return function(d, x) {
-      return ascending(f(d), x);
-    };
-  }
-
-  var ascendingBisect = bisector(ascending);
-  var bisectRight = ascendingBisect.right;
-
-  function range(start, stop, step) {
-    start = +start, stop = +stop, step = (n = arguments.length) < 2 ? (stop = start, start = 0, 1) : n < 3 ? 1 : +step;
-
-    var i = -1,
-        n = Math.max(0, Math.ceil((stop - start) / step)) | 0,
-        range = new Array(n);
-
-    while (++i < n) {
-      range[i] = start + i * step;
-    }
-
-    return range;
-  }
-
-  var e10 = Math.sqrt(50),
-      e5 = Math.sqrt(10),
-      e2 = Math.sqrt(2);
-
-  function ticks(start, stop, count) {
-    var reverse,
-        i = -1,
-        n,
-        ticks,
-        step;
-
-    stop = +stop, start = +start, count = +count;
-    if (start === stop && count > 0) return [start];
-    if (reverse = stop < start) n = start, start = stop, stop = n;
-    if ((step = tickIncrement(start, stop, count)) === 0 || !isFinite(step)) return [];
-
-    if (step > 0) {
-      start = Math.ceil(start / step);
-      stop = Math.floor(stop / step);
-      ticks = new Array(n = Math.ceil(stop - start + 1));
-      while (++i < n) ticks[i] = (start + i) * step;
-    } else {
-      start = Math.floor(start * step);
-      stop = Math.ceil(stop * step);
-      ticks = new Array(n = Math.ceil(start - stop + 1));
-      while (++i < n) ticks[i] = (start - i) / step;
-    }
-
-    if (reverse) ticks.reverse();
-
-    return ticks;
-  }
-
-  function tickIncrement(start, stop, count) {
-    var step = (stop - start) / Math.max(0, count),
-        power = Math.floor(Math.log(step) / Math.LN10),
-        error = step / Math.pow(10, power);
-    return power >= 0
-        ? (error >= e10 ? 10 : error >= e5 ? 5 : error >= e2 ? 2 : 1) * Math.pow(10, power)
-        : -Math.pow(10, -power) / (error >= e10 ? 10 : error >= e5 ? 5 : error >= e2 ? 2 : 1);
-  }
-
-  function tickStep(start, stop, count) {
-    var step0 = Math.abs(stop - start) / Math.max(0, count),
-        step1 = Math.pow(10, Math.floor(Math.log(step0) / Math.LN10)),
-        error = step0 / step1;
-    if (error >= e10) step1 *= 10;
-    else if (error >= e5) step1 *= 5;
-    else if (error >= e2) step1 *= 2;
-    return stop < start ? -step1 : step1;
-  }
-
-  function initRange(domain, range) {
-    switch (arguments.length) {
-      case 0: break;
-      case 1: this.range(domain); break;
-      default: this.range(range).domain(domain); break;
-    }
-    return this;
-  }
-
-  function define(constructor, factory, prototype) {
-    constructor.prototype = factory.prototype = prototype;
-    prototype.constructor = constructor;
-  }
-
-  function extend(parent, definition) {
-    var prototype = Object.create(parent.prototype);
-    for (var key in definition) prototype[key] = definition[key];
-    return prototype;
-  }
-
-  function Color() {}
-
-  var darker = 0.7;
-  var brighter = 1 / darker;
-
-  var reI = "\\s*([+-]?\\d+)\\s*",
-      reN = "\\s*([+-]?\\d*\\.?\\d+(?:[eE][+-]?\\d+)?)\\s*",
-      reP = "\\s*([+-]?\\d*\\.?\\d+(?:[eE][+-]?\\d+)?)%\\s*",
-      reHex = /^#([0-9a-f]{3,8})$/,
-      reRgbInteger = new RegExp("^rgb\\(" + [reI, reI, reI] + "\\)$"),
-      reRgbPercent = new RegExp("^rgb\\(" + [reP, reP, reP] + "\\)$"),
-      reRgbaInteger = new RegExp("^rgba\\(" + [reI, reI, reI, reN] + "\\)$"),
-      reRgbaPercent = new RegExp("^rgba\\(" + [reP, reP, reP, reN] + "\\)$"),
-      reHslPercent = new RegExp("^hsl\\(" + [reN, reP, reP] + "\\)$"),
-      reHslaPercent = new RegExp("^hsla\\(" + [reN, reP, reP, reN] + "\\)$");
-
-  var named = {
-    aliceblue: 0xf0f8ff,
-    antiquewhite: 0xfaebd7,
-    aqua: 0x00ffff,
-    aquamarine: 0x7fffd4,
-    azure: 0xf0ffff,
-    beige: 0xf5f5dc,
-    bisque: 0xffe4c4,
-    black: 0x000000,
-    blanchedalmond: 0xffebcd,
-    blue: 0x0000ff,
-    blueviolet: 0x8a2be2,
-    brown: 0xa52a2a,
-    burlywood: 0xdeb887,
-    cadetblue: 0x5f9ea0,
-    chartreuse: 0x7fff00,
-    chocolate: 0xd2691e,
-    coral: 0xff7f50,
-    cornflowerblue: 0x6495ed,
-    cornsilk: 0xfff8dc,
-    crimson: 0xdc143c,
-    cyan: 0x00ffff,
-    darkblue: 0x00008b,
-    darkcyan: 0x008b8b,
-    darkgoldenrod: 0xb8860b,
-    darkgray: 0xa9a9a9,
-    darkgreen: 0x006400,
-    darkgrey: 0xa9a9a9,
-    darkkhaki: 0xbdb76b,
-    darkmagenta: 0x8b008b,
-    darkolivegreen: 0x556b2f,
-    darkorange: 0xff8c00,
-    darkorchid: 0x9932cc,
-    darkred: 0x8b0000,
-    darksalmon: 0xe9967a,
-    darkseagreen: 0x8fbc8f,
-    darkslateblue: 0x483d8b,
-    darkslategray: 0x2f4f4f,
-    darkslategrey: 0x2f4f4f,
-    darkturquoise: 0x00ced1,
-    darkviolet: 0x9400d3,
-    deeppink: 0xff1493,
-    deepskyblue: 0x00bfff,
-    dimgray: 0x696969,
-    dimgrey: 0x696969,
-    dodgerblue: 0x1e90ff,
-    firebrick: 0xb22222,
-    floralwhite: 0xfffaf0,
-    forestgreen: 0x228b22,
-    fuchsia: 0xff00ff,
-    gainsboro: 0xdcdcdc,
-    ghostwhite: 0xf8f8ff,
-    gold: 0xffd700,
-    goldenrod: 0xdaa520,
-    gray: 0x808080,
-    green: 0x008000,
-    greenyellow: 0xadff2f,
-    grey: 0x808080,
-    honeydew: 0xf0fff0,
-    hotpink: 0xff69b4,
-    indianred: 0xcd5c5c,
-    indigo: 0x4b0082,
-    ivory: 0xfffff0,
-    khaki: 0xf0e68c,
-    lavender: 0xe6e6fa,
-    lavenderblush: 0xfff0f5,
-    lawngreen: 0x7cfc00,
-    lemonchiffon: 0xfffacd,
-    lightblue: 0xadd8e6,
-    lightcoral: 0xf08080,
-    lightcyan: 0xe0ffff,
-    lightgoldenrodyellow: 0xfafad2,
-    lightgray: 0xd3d3d3,
-    lightgreen: 0x90ee90,
-    lightgrey: 0xd3d3d3,
-    lightpink: 0xffb6c1,
-    lightsalmon: 0xffa07a,
-    lightseagreen: 0x20b2aa,
-    lightskyblue: 0x87cefa,
-    lightslategray: 0x778899,
-    lightslategrey: 0x778899,
-    lightsteelblue: 0xb0c4de,
-    lightyellow: 0xffffe0,
-    lime: 0x00ff00,
-    limegreen: 0x32cd32,
-    linen: 0xfaf0e6,
-    magenta: 0xff00ff,
-    maroon: 0x800000,
-    mediumaquamarine: 0x66cdaa,
-    mediumblue: 0x0000cd,
-    mediumorchid: 0xba55d3,
-    mediumpurple: 0x9370db,
-    mediumseagreen: 0x3cb371,
-    mediumslateblue: 0x7b68ee,
-    mediumspringgreen: 0x00fa9a,
-    mediumturquoise: 0x48d1cc,
-    mediumvioletred: 0xc71585,
-    midnightblue: 0x191970,
-    mintcream: 0xf5fffa,
-    mistyrose: 0xffe4e1,
-    moccasin: 0xffe4b5,
-    navajowhite: 0xffdead,
-    navy: 0x000080,
-    oldlace: 0xfdf5e6,
-    olive: 0x808000,
-    olivedrab: 0x6b8e23,
-    orange: 0xffa500,
-    orangered: 0xff4500,
-    orchid: 0xda70d6,
-    palegoldenrod: 0xeee8aa,
-    palegreen: 0x98fb98,
-    paleturquoise: 0xafeeee,
-    palevioletred: 0xdb7093,
-    papayawhip: 0xffefd5,
-    peachpuff: 0xffdab9,
-    peru: 0xcd853f,
-    pink: 0xffc0cb,
-    plum: 0xdda0dd,
-    powderblue: 0xb0e0e6,
-    purple: 0x800080,
-    rebeccapurple: 0x663399,
-    red: 0xff0000,
-    rosybrown: 0xbc8f8f,
-    royalblue: 0x4169e1,
-    saddlebrown: 0x8b4513,
-    salmon: 0xfa8072,
-    sandybrown: 0xf4a460,
-    seagreen: 0x2e8b57,
-    seashell: 0xfff5ee,
-    sienna: 0xa0522d,
-    silver: 0xc0c0c0,
-    skyblue: 0x87ceeb,
-    slateblue: 0x6a5acd,
-    slategray: 0x708090,
-    slategrey: 0x708090,
-    snow: 0xfffafa,
-    springgreen: 0x00ff7f,
-    steelblue: 0x4682b4,
-    tan: 0xd2b48c,
-    teal: 0x008080,
-    thistle: 0xd8bfd8,
-    tomato: 0xff6347,
-    turquoise: 0x40e0d0,
-    violet: 0xee82ee,
-    wheat: 0xf5deb3,
-    white: 0xffffff,
-    whitesmoke: 0xf5f5f5,
-    yellow: 0xffff00,
-    yellowgreen: 0x9acd32
-  };
-
-  define(Color, color, {
-    copy: function(channels) {
-      return Object.assign(new this.constructor, this, channels);
-    },
-    displayable: function() {
-      return this.rgb().displayable();
-    },
-    hex: color_formatHex, // Deprecated! Use color.formatHex.
-    formatHex: color_formatHex,
-    formatHsl: color_formatHsl,
-    formatRgb: color_formatRgb,
-    toString: color_formatRgb
-  });
-
-  function color_formatHex() {
-    return this.rgb().formatHex();
-  }
-
-  function color_formatHsl() {
-    return hslConvert(this).formatHsl();
-  }
-
-  function color_formatRgb() {
-    return this.rgb().formatRgb();
-  }
-
-  function color(format) {
-    var m, l;
-    format = (format + "").trim().toLowerCase();
-    return (m = reHex.exec(format)) ? (l = m[1].length, m = parseInt(m[1], 16), l === 6 ? rgbn(m) // #ff0000
-        : l === 3 ? new Rgb((m >> 8 & 0xf) | (m >> 4 & 0xf0), (m >> 4 & 0xf) | (m & 0xf0), ((m & 0xf) << 4) | (m & 0xf), 1) // #f00
-        : l === 8 ? rgba(m >> 24 & 0xff, m >> 16 & 0xff, m >> 8 & 0xff, (m & 0xff) / 0xff) // #ff000000
-        : l === 4 ? rgba((m >> 12 & 0xf) | (m >> 8 & 0xf0), (m >> 8 & 0xf) | (m >> 4 & 0xf0), (m >> 4 & 0xf) | (m & 0xf0), (((m & 0xf) << 4) | (m & 0xf)) / 0xff) // #f000
-        : null) // invalid hex
-        : (m = reRgbInteger.exec(format)) ? new Rgb(m[1], m[2], m[3], 1) // rgb(255, 0, 0)
-        : (m = reRgbPercent.exec(format)) ? new Rgb(m[1] * 255 / 100, m[2] * 255 / 100, m[3] * 255 / 100, 1) // rgb(100%, 0%, 0%)
-        : (m = reRgbaInteger.exec(format)) ? rgba(m[1], m[2], m[3], m[4]) // rgba(255, 0, 0, 1)
-        : (m = reRgbaPercent.exec(format)) ? rgba(m[1] * 255 / 100, m[2] * 255 / 100, m[3] * 255 / 100, m[4]) // rgb(100%, 0%, 0%, 1)
-        : (m = reHslPercent.exec(format)) ? hsla(m[1], m[2] / 100, m[3] / 100, 1) // hsl(120, 50%, 50%)
-        : (m = reHslaPercent.exec(format)) ? hsla(m[1], m[2] / 100, m[3] / 100, m[4]) // hsla(120, 50%, 50%, 1)
-        : named.hasOwnProperty(format) ? rgbn(named[format]) // eslint-disable-line no-prototype-builtins
-        : format === "transparent" ? new Rgb(NaN, NaN, NaN, 0)
-        : null;
-  }
-
-  function rgbn(n) {
-    return new Rgb(n >> 16 & 0xff, n >> 8 & 0xff, n & 0xff, 1);
-  }
-
-  function rgba(r, g, b, a) {
-    if (a <= 0) r = g = b = NaN;
-    return new Rgb(r, g, b, a);
-  }
-
-  function rgbConvert(o) {
-    if (!(o instanceof Color)) o = color(o);
-    if (!o) return new Rgb;
-    o = o.rgb();
-    return new Rgb(o.r, o.g, o.b, o.opacity);
-  }
-
-  function rgb(r, g, b, opacity) {
-    return arguments.length === 1 ? rgbConvert(r) : new Rgb(r, g, b, opacity == null ? 1 : opacity);
-  }
-
-  function Rgb(r, g, b, opacity) {
-    this.r = +r;
-    this.g = +g;
-    this.b = +b;
-    this.opacity = +opacity;
-  }
-
-  define(Rgb, rgb, extend(Color, {
-    brighter: function(k) {
-      k = k == null ? brighter : Math.pow(brighter, k);
-      return new Rgb(this.r * k, this.g * k, this.b * k, this.opacity);
-    },
-    darker: function(k) {
-      k = k == null ? darker : Math.pow(darker, k);
-      return new Rgb(this.r * k, this.g * k, this.b * k, this.opacity);
-    },
-    rgb: function() {
-      return this;
-    },
-    displayable: function() {
-      return (-0.5 <= this.r && this.r < 255.5)
-          && (-0.5 <= this.g && this.g < 255.5)
-          && (-0.5 <= this.b && this.b < 255.5)
-          && (0 <= this.opacity && this.opacity <= 1);
-    },
-    hex: rgb_formatHex, // Deprecated! Use color.formatHex.
-    formatHex: rgb_formatHex,
-    formatRgb: rgb_formatRgb,
-    toString: rgb_formatRgb
-  }));
-
-  function rgb_formatHex() {
-    return "#" + hex(this.r) + hex(this.g) + hex(this.b);
-  }
-
-  function rgb_formatRgb() {
-    var a = this.opacity; a = isNaN(a) ? 1 : Math.max(0, Math.min(1, a));
-    return (a === 1 ? "rgb(" : "rgba(")
-        + Math.max(0, Math.min(255, Math.round(this.r) || 0)) + ", "
-        + Math.max(0, Math.min(255, Math.round(this.g) || 0)) + ", "
-        + Math.max(0, Math.min(255, Math.round(this.b) || 0))
-        + (a === 1 ? ")" : ", " + a + ")");
-  }
-
-  function hex(value) {
-    value = Math.max(0, Math.min(255, Math.round(value) || 0));
-    return (value < 16 ? "0" : "") + value.toString(16);
-  }
-
-  function hsla(h, s, l, a) {
-    if (a <= 0) h = s = l = NaN;
-    else if (l <= 0 || l >= 1) h = s = NaN;
-    else if (s <= 0) h = NaN;
-    return new Hsl(h, s, l, a);
-  }
-
-  function hslConvert(o) {
-    if (o instanceof Hsl) return new Hsl(o.h, o.s, o.l, o.opacity);
-    if (!(o instanceof Color)) o = color(o);
-    if (!o) return new Hsl;
-    if (o instanceof Hsl) return o;
-    o = o.rgb();
-    var r = o.r / 255,
-        g = o.g / 255,
-        b = o.b / 255,
-        min = Math.min(r, g, b),
-        max = Math.max(r, g, b),
-        h = NaN,
-        s = max - min,
-        l = (max + min) / 2;
-    if (s) {
-      if (r === max) h = (g - b) / s + (g < b) * 6;
-      else if (g === max) h = (b - r) / s + 2;
-      else h = (r - g) / s + 4;
-      s /= l < 0.5 ? max + min : 2 - max - min;
-      h *= 60;
-    } else {
-      s = l > 0 && l < 1 ? 0 : h;
-    }
-    return new Hsl(h, s, l, o.opacity);
-  }
-
-  function hsl(h, s, l, opacity) {
-    return arguments.length === 1 ? hslConvert(h) : new Hsl(h, s, l, opacity == null ? 1 : opacity);
-  }
-
-  function Hsl(h, s, l, opacity) {
-    this.h = +h;
-    this.s = +s;
-    this.l = +l;
-    this.opacity = +opacity;
-  }
-
-  define(Hsl, hsl, extend(Color, {
-    brighter: function(k) {
-      k = k == null ? brighter : Math.pow(brighter, k);
-      return new Hsl(this.h, this.s, this.l * k, this.opacity);
-    },
-    darker: function(k) {
-      k = k == null ? darker : Math.pow(darker, k);
-      return new Hsl(this.h, this.s, this.l * k, this.opacity);
-    },
-    rgb: function() {
-      var h = this.h % 360 + (this.h < 0) * 360,
-          s = isNaN(h) || isNaN(this.s) ? 0 : this.s,
-          l = this.l,
-          m2 = l + (l < 0.5 ? l : 1 - l) * s,
-          m1 = 2 * l - m2;
-      return new Rgb(
-        hsl2rgb(h >= 240 ? h - 240 : h + 120, m1, m2),
-        hsl2rgb(h, m1, m2),
-        hsl2rgb(h < 120 ? h + 240 : h - 120, m1, m2),
-        this.opacity
-      );
-    },
-    displayable: function() {
-      return (0 <= this.s && this.s <= 1 || isNaN(this.s))
-          && (0 <= this.l && this.l <= 1)
-          && (0 <= this.opacity && this.opacity <= 1);
-    },
-    formatHsl: function() {
-      var a = this.opacity; a = isNaN(a) ? 1 : Math.max(0, Math.min(1, a));
-      return (a === 1 ? "hsl(" : "hsla(")
-          + (this.h || 0) + ", "
-          + (this.s || 0) * 100 + "%, "
-          + (this.l || 0) * 100 + "%"
-          + (a === 1 ? ")" : ", " + a + ")");
-    }
-  }));
-
-  /* From FvD 13.37, CSS Color Module Level 3 */
-  function hsl2rgb(h, m1, m2) {
-    return (h < 60 ? m1 + (m2 - m1) * h / 60
-        : h < 180 ? m2
-        : h < 240 ? m1 + (m2 - m1) * (240 - h) / 60
-        : m1) * 255;
-  }
-
-  var deg2rad = Math.PI / 180;
-  var rad2deg = 180 / Math.PI;
-
-  // https://observablehq.com/@mbostock/lab-and-rgb
-  var K = 18,
-      Xn = 0.96422,
-      Yn = 1,
-      Zn = 0.82521,
-      t0 = 4 / 29,
-      t1 = 6 / 29,
-      t2 = 3 * t1 * t1,
-      t3 = t1 * t1 * t1;
-
-  function labConvert(o) {
-    if (o instanceof Lab) return new Lab(o.l, o.a, o.b, o.opacity);
-    if (o instanceof Hcl) return hcl2lab(o);
-    if (!(o instanceof Rgb)) o = rgbConvert(o);
-    var r = rgb2lrgb(o.r),
-        g = rgb2lrgb(o.g),
-        b = rgb2lrgb(o.b),
-        y = xyz2lab((0.2225045 * r + 0.7168786 * g + 0.0606169 * b) / Yn), x, z;
-    if (r === g && g === b) x = z = y; else {
-      x = xyz2lab((0.4360747 * r + 0.3850649 * g + 0.1430804 * b) / Xn);
-      z = xyz2lab((0.0139322 * r + 0.0971045 * g + 0.7141733 * b) / Zn);
-    }
-    return new Lab(116 * y - 16, 500 * (x - y), 200 * (y - z), o.opacity);
-  }
-
-  function lab(l, a, b, opacity) {
-    return arguments.length === 1 ? labConvert(l) : new Lab(l, a, b, opacity == null ? 1 : opacity);
-  }
-
-  function Lab(l, a, b, opacity) {
-    this.l = +l;
-    this.a = +a;
-    this.b = +b;
-    this.opacity = +opacity;
-  }
-
-  define(Lab, lab, extend(Color, {
-    brighter: function(k) {
-      return new Lab(this.l + K * (k == null ? 1 : k), this.a, this.b, this.opacity);
-    },
-    darker: function(k) {
-      return new Lab(this.l - K * (k == null ? 1 : k), this.a, this.b, this.opacity);
-    },
-    rgb: function() {
-      var y = (this.l + 16) / 116,
-          x = isNaN(this.a) ? y : y + this.a / 500,
-          z = isNaN(this.b) ? y : y - this.b / 200;
-      x = Xn * lab2xyz(x);
-      y = Yn * lab2xyz(y);
-      z = Zn * lab2xyz(z);
-      return new Rgb(
-        lrgb2rgb( 3.1338561 * x - 1.6168667 * y - 0.4906146 * z),
-        lrgb2rgb(-0.9787684 * x + 1.9161415 * y + 0.0334540 * z),
-        lrgb2rgb( 0.0719453 * x - 0.2289914 * y + 1.4052427 * z),
-        this.opacity
-      );
-    }
-  }));
-
-  function xyz2lab(t) {
-    return t > t3 ? Math.pow(t, 1 / 3) : t / t2 + t0;
-  }
-
-  function lab2xyz(t) {
-    return t > t1 ? t * t * t : t2 * (t - t0);
-  }
-
-  function lrgb2rgb(x) {
-    return 255 * (x <= 0.0031308 ? 12.92 * x : 1.055 * Math.pow(x, 1 / 2.4) - 0.055);
-  }
-
-  function rgb2lrgb(x) {
-    return (x /= 255) <= 0.04045 ? x / 12.92 : Math.pow((x + 0.055) / 1.055, 2.4);
-  }
-
-  function hclConvert(o) {
-    if (o instanceof Hcl) return new Hcl(o.h, o.c, o.l, o.opacity);
-    if (!(o instanceof Lab)) o = labConvert(o);
-    if (o.a === 0 && o.b === 0) return new Hcl(NaN, 0 < o.l && o.l < 100 ? 0 : NaN, o.l, o.opacity);
-    var h = Math.atan2(o.b, o.a) * rad2deg;
-    return new Hcl(h < 0 ? h + 360 : h, Math.sqrt(o.a * o.a + o.b * o.b), o.l, o.opacity);
-  }
-
-  function hcl(h, c, l, opacity) {
-    return arguments.length === 1 ? hclConvert(h) : new Hcl(h, c, l, opacity == null ? 1 : opacity);
-  }
-
-  function Hcl(h, c, l, opacity) {
-    this.h = +h;
-    this.c = +c;
-    this.l = +l;
-    this.opacity = +opacity;
-  }
-
-  function hcl2lab(o) {
-    if (isNaN(o.h)) return new Lab(o.l, 0, 0, o.opacity);
-    var h = o.h * deg2rad;
-    return new Lab(o.l, Math.cos(h) * o.c, Math.sin(h) * o.c, o.opacity);
-  }
-
-  define(Hcl, hcl, extend(Color, {
-    brighter: function(k) {
-      return new Hcl(this.h, this.c, this.l + K * (k == null ? 1 : k), this.opacity);
-    },
-    darker: function(k) {
-      return new Hcl(this.h, this.c, this.l - K * (k == null ? 1 : k), this.opacity);
-    },
-    rgb: function() {
-      return hcl2lab(this).rgb();
-    }
-  }));
-
-  var A = -0.14861,
-      B = +1.78277,
-      C = -0.29227,
-      D = -0.90649,
-      E = +1.97294,
-      ED = E * D,
-      EB = E * B,
-      BC_DA = B * C - D * A;
-
-  function cubehelixConvert(o) {
-    if (o instanceof Cubehelix) return new Cubehelix(o.h, o.s, o.l, o.opacity);
-    if (!(o instanceof Rgb)) o = rgbConvert(o);
-    var r = o.r / 255,
-        g = o.g / 255,
-        b = o.b / 255,
-        l = (BC_DA * b + ED * r - EB * g) / (BC_DA + ED - EB),
-        bl = b - l,
-        k = (E * (g - l) - C * bl) / D,
-        s = Math.sqrt(k * k + bl * bl) / (E * l * (1 - l)), // NaN if l=0 or l=1
-        h = s ? Math.atan2(k, bl) * rad2deg - 120 : NaN;
-    return new Cubehelix(h < 0 ? h + 360 : h, s, l, o.opacity);
-  }
-
-  function cubehelix(h, s, l, opacity) {
-    return arguments.length === 1 ? cubehelixConvert(h) : new Cubehelix(h, s, l, opacity == null ? 1 : opacity);
-  }
-
-  function Cubehelix(h, s, l, opacity) {
-    this.h = +h;
-    this.s = +s;
-    this.l = +l;
-    this.opacity = +opacity;
-  }
-
-  define(Cubehelix, cubehelix, extend(Color, {
-    brighter: function(k) {
-      k = k == null ? brighter : Math.pow(brighter, k);
-      return new Cubehelix(this.h, this.s, this.l * k, this.opacity);
-    },
-    darker: function(k) {
-      k = k == null ? darker : Math.pow(darker, k);
-      return new Cubehelix(this.h, this.s, this.l * k, this.opacity);
-    },
-    rgb: function() {
-      var h = isNaN(this.h) ? 0 : (this.h + 120) * deg2rad,
-          l = +this.l,
-          a = isNaN(this.s) ? 0 : this.s * l * (1 - l),
-          cosh = Math.cos(h),
-          sinh = Math.sin(h);
-      return new Rgb(
-        255 * (l + a * (A * cosh + B * sinh)),
-        255 * (l + a * (C * cosh + D * sinh)),
-        255 * (l + a * (E * cosh)),
-        this.opacity
-      );
-    }
-  }));
-
-  function constant(x) {
-    return function() {
-      return x;
-    };
-  }
-
-  function linear(a, d) {
-    return function(t) {
-      return a + t * d;
-    };
-  }
-
-  function exponential(a, b, y) {
-    return a = Math.pow(a, y), b = Math.pow(b, y) - a, y = 1 / y, function(t) {
-      return Math.pow(a + t * b, y);
-    };
-  }
-
-  function gamma(y) {
-    return (y = +y) === 1 ? nogamma : function(a, b) {
-      return b - a ? exponential(a, b, y) : constant(isNaN(a) ? b : a);
-    };
-  }
-
-  function nogamma(a, b) {
-    var d = b - a;
-    return d ? linear(a, d) : constant(isNaN(a) ? b : a);
-  }
-
-  var rgb$1 = (function rgbGamma(y) {
-    var color = gamma(y);
-
-    function rgb$1(start, end) {
-      var r = color((start = rgb(start)).r, (end = rgb(end)).r),
-          g = color(start.g, end.g),
-          b = color(start.b, end.b),
-          opacity = nogamma(start.opacity, end.opacity);
-      return function(t) {
-        start.r = r(t);
-        start.g = g(t);
-        start.b = b(t);
-        start.opacity = opacity(t);
-        return start + "";
-      };
-    }
-
-    rgb$1.gamma = rgbGamma;
-
-    return rgb$1;
-  })(1);
-
-  function numberArray(a, b) {
-    if (!b) b = [];
-    var n = a ? Math.min(b.length, a.length) : 0,
-        c = b.slice(),
-        i;
-    return function(t) {
-      for (i = 0; i < n; ++i) c[i] = a[i] * (1 - t) + b[i] * t;
-      return c;
-    };
-  }
-
-  function isNumberArray(x) {
-    return ArrayBuffer.isView(x) && !(x instanceof DataView);
-  }
-
-  function genericArray(a, b) {
-    var nb = b ? b.length : 0,
-        na = a ? Math.min(nb, a.length) : 0,
-        x = new Array(na),
-        c = new Array(nb),
-        i;
-
-    for (i = 0; i < na; ++i) x[i] = interpolate(a[i], b[i]);
-    for (; i < nb; ++i) c[i] = b[i];
-
-    return function(t) {
-      for (i = 0; i < na; ++i) c[i] = x[i](t);
-      return c;
-    };
-  }
-
-  function date(a, b) {
-    var d = new Date;
-    return a = +a, b = +b, function(t) {
-      return d.setTime(a * (1 - t) + b * t), d;
-    };
-  }
-
-  function interpolateNumber(a, b) {
-    return a = +a, b = +b, function(t) {
-      return a * (1 - t) + b * t;
-    };
-  }
-
-  function object(a, b) {
-    var i = {},
-        c = {},
-        k;
-
-    if (a === null || typeof a !== "object") a = {};
-    if (b === null || typeof b !== "object") b = {};
-
-    for (k in b) {
-      if (k in a) {
-        i[k] = interpolate(a[k], b[k]);
-      } else {
-        c[k] = b[k];
-      }
-    }
-
-    return function(t) {
-      for (k in i) c[k] = i[k](t);
-      return c;
-    };
-  }
-
-  var reA = /[-+]?(?:\d+\.?\d*|\.?\d+)(?:[eE][-+]?\d+)?/g,
-      reB = new RegExp(reA.source, "g");
-
-  function zero(b) {
-    return function() {
-      return b;
-    };
-  }
-
-  function one(b) {
-    return function(t) {
-      return b(t) + "";
-    };
-  }
-
-  function string(a, b) {
-    var bi = reA.lastIndex = reB.lastIndex = 0, // scan index for next number in b
-        am, // current match in a
-        bm, // current match in b
-        bs, // string preceding current number in b, if any
-        i = -1, // index in s
-        s = [], // string constants and placeholders
-        q = []; // number interpolators
-
-    // Coerce inputs to strings.
-    a = a + "", b = b + "";
-
-    // Interpolate pairs of numbers in a & b.
-    while ((am = reA.exec(a))
-        && (bm = reB.exec(b))) {
-      if ((bs = bm.index) > bi) { // a string precedes the next number in b
-        bs = b.slice(bi, bs);
-        if (s[i]) s[i] += bs; // coalesce with previous string
-        else s[++i] = bs;
-      }
-      if ((am = am[0]) === (bm = bm[0])) { // numbers in a & b match
-        if (s[i]) s[i] += bm; // coalesce with previous string
-        else s[++i] = bm;
-      } else { // interpolate non-matching numbers
-        s[++i] = null;
-        q.push({i: i, x: interpolateNumber(am, bm)});
-      }
-      bi = reB.lastIndex;
-    }
-
-    // Add remains of b.
-    if (bi < b.length) {
-      bs = b.slice(bi);
-      if (s[i]) s[i] += bs; // coalesce with previous string
-      else s[++i] = bs;
-    }
-
-    // Special optimization for only a single match.
-    // Otherwise, interpolate each of the numbers and rejoin the string.
-    return s.length < 2 ? (q[0]
-        ? one(q[0].x)
-        : zero(b))
-        : (b = q.length, function(t) {
-            for (var i = 0, o; i < b; ++i) s[(o = q[i]).i] = o.x(t);
-            return s.join("");
-          });
-  }
-
-  function interpolate(a, b) {
-    var t = typeof b, c;
-    return b == null || t === "boolean" ? constant(b)
-        : (t === "number" ? interpolateNumber
-        : t === "string" ? ((c = color(b)) ? (b = c, rgb$1) : string)
-        : b instanceof color ? rgb$1
-        : b instanceof Date ? date
-        : isNumberArray(b) ? numberArray
-        : Array.isArray(b) ? genericArray
-        : typeof b.valueOf !== "function" && typeof b.toString !== "function" || isNaN(b) ? object
-        : interpolateNumber)(a, b);
-  }
-
-  function interpolateRound(a, b) {
-    return a = +a, b = +b, function(t) {
-      return Math.round(a * (1 - t) + b * t);
-    };
-  }
-
-  function constant$1(x) {
-    return function() {
-      return x;
-    };
-  }
-
-  function number(x) {
-    return +x;
-  }
-
-  var unit = [0, 1];
-
-  function identity(x) {
-    return x;
-  }
-
-  function normalize(a, b) {
-    return (b -= (a = +a))
-        ? function(x) { return (x - a) / b; }
-        : constant$1(isNaN(b) ? NaN : 0.5);
-  }
-
-  function clamper(a, b) {
-    var t;
-    if (a > b) t = a, a = b, b = t;
-    return function(x) { return Math.max(a, Math.min(b, x)); };
-  }
-
-  // normalize(a, b)(x) takes a domain value x in [a,b] and returns the corresponding parameter t in [0,1].
-  // interpolate(a, b)(t) takes a parameter t in [0,1] and returns the corresponding range value x in [a,b].
-  function bimap(domain, range, interpolate) {
-    var d0 = domain[0], d1 = domain[1], r0 = range[0], r1 = range[1];
-    if (d1 < d0) d0 = normalize(d1, d0), r0 = interpolate(r1, r0);
-    else d0 = normalize(d0, d1), r0 = interpolate(r0, r1);
-    return function(x) { return r0(d0(x)); };
-  }
-
-  function polymap(domain, range, interpolate) {
-    var j = Math.min(domain.length, range.length) - 1,
-        d = new Array(j),
-        r = new Array(j),
-        i = -1;
-
-    // Reverse descending domains.
-    if (domain[j] < domain[0]) {
-      domain = domain.slice().reverse();
-      range = range.slice().reverse();
-    }
-
-    while (++i < j) {
-      d[i] = normalize(domain[i], domain[i + 1]);
-      r[i] = interpolate(range[i], range[i + 1]);
-    }
-
-    return function(x) {
-      var i = bisectRight(domain, x, 1, j) - 1;
-      return r[i](d[i](x));
-    };
-  }
-
-  function copy(source, target) {
-    return target
-        .domain(source.domain())
-        .range(source.range())
-        .interpolate(source.interpolate())
-        .clamp(source.clamp())
-        .unknown(source.unknown());
-  }
-
-  function transformer() {
-    var domain = unit,
-        range = unit,
-        interpolate$1 = interpolate,
-        transform,
-        untransform,
-        unknown,
-        clamp = identity,
-        piecewise,
-        output,
-        input;
-
-    function rescale() {
-      var n = Math.min(domain.length, range.length);
-      if (clamp !== identity) clamp = clamper(domain[0], domain[n - 1]);
-      piecewise = n > 2 ? polymap : bimap;
-      output = input = null;
-      return scale;
-    }
-
-    function scale(x) {
-      return isNaN(x = +x) ? unknown : (output || (output = piecewise(domain.map(transform), range, interpolate$1)))(transform(clamp(x)));
-    }
-
-    scale.invert = function(y) {
-      return clamp(untransform((input || (input = piecewise(range, domain.map(transform), interpolateNumber)))(y)));
-    };
-
-    scale.domain = function(_) {
-      return arguments.length ? (domain = Array.from(_, number), rescale()) : domain.slice();
-    };
-
-    scale.range = function(_) {
-      return arguments.length ? (range = Array.from(_), rescale()) : range.slice();
-    };
-
-    scale.rangeRound = function(_) {
-      return range = Array.from(_), interpolate$1 = interpolateRound, rescale();
-    };
-
-    scale.clamp = function(_) {
-      return arguments.length ? (clamp = _ ? true : identity, rescale()) : clamp !== identity;
-    };
-
-    scale.interpolate = function(_) {
-      return arguments.length ? (interpolate$1 = _, rescale()) : interpolate$1;
-    };
-
-    scale.unknown = function(_) {
-      return arguments.length ? (unknown = _, scale) : unknown;
-    };
-
-    return function(t, u) {
-      transform = t, untransform = u;
-      return rescale();
-    };
-  }
-
-  function continuous() {
-    return transformer()(identity, identity);
-  }
-
-  // Computes the decimal coefficient and exponent of the specified number x with
-  // significant digits p, where x is positive and p is in [1, 21] or undefined.
-  // For example, formatDecimal(1.23) returns ["123", 0].
-  function formatDecimal(x, p) {
-    if ((i = (x = p ? x.toExponential(p - 1) : x.toExponential()).indexOf("e")) < 0) return null; // NaN, ±Infinity
-    var i, coefficient = x.slice(0, i);
-
-    // The string returned by toExponential either has the form \d\.\d+e[-+]\d+
-    // (e.g., 1.2e+3) or the form \de[-+]\d+ (e.g., 1e+3).
-    return [
-      coefficient.length > 1 ? coefficient[0] + coefficient.slice(2) : coefficient,
-      +x.slice(i + 1)
-    ];
-  }
-
-  function exponent(x) {
-    return x = formatDecimal(Math.abs(x)), x ? x[1] : NaN;
-  }
-
-  function formatGroup(grouping, thousands) {
-    return function(value, width) {
-      var i = value.length,
-          t = [],
-          j = 0,
-          g = grouping[0],
-          length = 0;
-
-      while (i > 0 && g > 0) {
-        if (length + g + 1 > width) g = Math.max(1, width - length);
-        t.push(value.substring(i -= g, i + g));
-        if ((length += g + 1) > width) break;
-        g = grouping[j = (j + 1) % grouping.length];
-      }
-
-      return t.reverse().join(thousands);
-    };
-  }
-
-  function formatNumerals(numerals) {
-    return function(value) {
-      return value.replace(/[0-9]/g, function(i) {
-        return numerals[+i];
-      });
-    };
-  }
-
-  // [[fill]align][sign][symbol][0][width][,][.precision][~][type]
-  var re = /^(?:(.)?([<>=^]))?([+\-( ])?([$#])?(0)?(\d+)?(,)?(\.\d+)?(~)?([a-z%])?$/i;
-
-  function formatSpecifier(specifier) {
-    if (!(match = re.exec(specifier))) throw new Error("invalid format: " + specifier);
-    var match;
-    return new FormatSpecifier({
-      fill: match[1],
-      align: match[2],
-      sign: match[3],
-      symbol: match[4],
-      zero: match[5],
-      width: match[6],
-      comma: match[7],
-      precision: match[8] && match[8].slice(1),
-      trim: match[9],
-      type: match[10]
-    });
-  }
-
-  formatSpecifier.prototype = FormatSpecifier.prototype; // instanceof
-
-  function FormatSpecifier(specifier) {
-    this.fill = specifier.fill === undefined ? " " : specifier.fill + "";
-    this.align = specifier.align === undefined ? ">" : specifier.align + "";
-    this.sign = specifier.sign === undefined ? "-" : specifier.sign + "";
-    this.symbol = specifier.symbol === undefined ? "" : specifier.symbol + "";
-    this.zero = !!specifier.zero;
-    this.width = specifier.width === undefined ? undefined : +specifier.width;
-    this.comma = !!specifier.comma;
-    this.precision = specifier.precision === undefined ? undefined : +specifier.precision;
-    this.trim = !!specifier.trim;
-    this.type = specifier.type === undefined ? "" : specifier.type + "";
-  }
-
-  FormatSpecifier.prototype.toString = function() {
-    return this.fill
-        + this.align
-        + this.sign
-        + this.symbol
-        + (this.zero ? "0" : "")
-        + (this.width === undefined ? "" : Math.max(1, this.width | 0))
-        + (this.comma ? "," : "")
-        + (this.precision === undefined ? "" : "." + Math.max(0, this.precision | 0))
-        + (this.trim ? "~" : "")
-        + this.type;
-  };
-
-  // Trims insignificant zeros, e.g., replaces 1.2000k with 1.2k.
-  function formatTrim(s) {
-    out: for (var n = s.length, i = 1, i0 = -1, i1; i < n; ++i) {
-      switch (s[i]) {
-        case ".": i0 = i1 = i; break;
-        case "0": if (i0 === 0) i0 = i; i1 = i; break;
-        default: if (!+s[i]) break out; if (i0 > 0) i0 = 0; break;
-      }
-    }
-    return i0 > 0 ? s.slice(0, i0) + s.slice(i1 + 1) : s;
-  }
-
-  var prefixExponent;
-
-  function formatPrefixAuto(x, p) {
-    var d = formatDecimal(x, p);
-    if (!d) return x + "";
-    var coefficient = d[0],
-        exponent = d[1],
-        i = exponent - (prefixExponent = Math.max(-8, Math.min(8, Math.floor(exponent / 3))) * 3) + 1,
-        n = coefficient.length;
-    return i === n ? coefficient
-        : i > n ? coefficient + new Array(i - n + 1).join("0")
-        : i > 0 ? coefficient.slice(0, i) + "." + coefficient.slice(i)
-        : "0." + new Array(1 - i).join("0") + formatDecimal(x, Math.max(0, p + i - 1))[0]; // less than 1y!
-  }
-
-  function formatRounded(x, p) {
-    var d = formatDecimal(x, p);
-    if (!d) return x + "";
-    var coefficient = d[0],
-        exponent = d[1];
-    return exponent < 0 ? "0." + new Array(-exponent).join("0") + coefficient
-        : coefficient.length > exponent + 1 ? coefficient.slice(0, exponent + 1) + "." + coefficient.slice(exponent + 1)
-        : coefficient + new Array(exponent - coefficient.length + 2).join("0");
-  }
-
-  var formatTypes = {
-    "%": function(x, p) { return (x * 100).toFixed(p); },
-    "b": function(x) { return Math.round(x).toString(2); },
-    "c": function(x) { return x + ""; },
-    "d": function(x) { return Math.round(x).toString(10); },
-    "e": function(x, p) { return x.toExponential(p); },
-    "f": function(x, p) { return x.toFixed(p); },
-    "g": function(x, p) { return x.toPrecision(p); },
-    "o": function(x) { return Math.round(x).toString(8); },
-    "p": function(x, p) { return formatRounded(x * 100, p); },
-    "r": formatRounded,
-    "s": formatPrefixAuto,
-    "X": function(x) { return Math.round(x).toString(16).toUpperCase(); },
-    "x": function(x) { return Math.round(x).toString(16); }
-  };
-
-  function identity$1(x) {
-    return x;
-  }
-
-  var map = Array.prototype.map,
-      prefixes = ["y","z","a","f","p","n","µ","m","","k","M","G","T","P","E","Z","Y"];
-
-  function formatLocale(locale) {
-    var group = locale.grouping === undefined || locale.thousands === undefined ? identity$1 : formatGroup(map.call(locale.grouping, Number), locale.thousands + ""),
-        currencyPrefix = locale.currency === undefined ? "" : locale.currency[0] + "",
-        currencySuffix = locale.currency === undefined ? "" : locale.currency[1] + "",
-        decimal = locale.decimal === undefined ? "." : locale.decimal + "",
-        numerals = locale.numerals === undefined ? identity$1 : formatNumerals(map.call(locale.numerals, String)),
-        percent = locale.percent === undefined ? "%" : locale.percent + "",
-        minus = locale.minus === undefined ? "-" : locale.minus + "",
-        nan = locale.nan === undefined ? "NaN" : locale.nan + "";
-
-    function newFormat(specifier) {
-      specifier = formatSpecifier(specifier);
-
-      var fill = specifier.fill,
-          align = specifier.align,
-          sign = specifier.sign,
-          symbol = specifier.symbol,
-          zero = specifier.zero,
-          width = specifier.width,
-          comma = specifier.comma,
-          precision = specifier.precision,
-          trim = specifier.trim,
-          type = specifier.type;
-
-      // The "n" type is an alias for ",g".
-      if (type === "n") comma = true, type = "g";
-
-      // The "" type, and any invalid type, is an alias for ".12~g".
-      else if (!formatTypes[type]) precision === undefined && (precision = 12), trim = true, type = "g";
-
-      // If zero fill is specified, padding goes after sign and before digits.
-      if (zero || (fill === "0" && align === "=")) zero = true, fill = "0", align = "=";
-
-      // Compute the prefix and suffix.
-      // For SI-prefix, the suffix is lazily computed.
-      var prefix = symbol === "$" ? currencyPrefix : symbol === "#" && /[boxX]/.test(type) ? "0" + type.toLowerCase() : "",
-          suffix = symbol === "$" ? currencySuffix : /[%p]/.test(type) ? percent : "";
-
-      // What format function should we use?
-      // Is this an integer type?
-      // Can this type generate exponential notation?
-      var formatType = formatTypes[type],
-          maybeSuffix = /[defgprs%]/.test(type);
-
-      // Set the default precision if not specified,
-      // or clamp the specified precision to the supported range.
-      // For significant precision, it must be in [1, 21].
-      // For fixed precision, it must be in [0, 20].
-      precision = precision === undefined ? 6
-          : /[gprs]/.test(type) ? Math.max(1, Math.min(21, precision))
-          : Math.max(0, Math.min(20, precision));
-
-      function format(value) {
-        var valuePrefix = prefix,
-            valueSuffix = suffix,
-            i, n, c;
-
-        if (type === "c") {
-          valueSuffix = formatType(value) + valueSuffix;
-          value = "";
-        } else {
-          value = +value;
-
-          // Determine the sign. -0 is not less than 0, but 1 / -0 is!
-          var valueNegative = value < 0 || 1 / value < 0;
-
-          // Perform the initial formatting.
-          value = isNaN(value) ? nan : formatType(Math.abs(value), precision);
-
-          // Trim insignificant zeros.
-          if (trim) value = formatTrim(value);
-
-          // If a negative value rounds to zero after formatting, and no explicit positive sign is requested, hide the sign.
-          if (valueNegative && +value === 0 && sign !== "+") valueNegative = false;
-
-          // Compute the prefix and suffix.
-          valuePrefix = (valueNegative ? (sign === "(" ? sign : minus) : sign === "-" || sign === "(" ? "" : sign) + valuePrefix;
-          valueSuffix = (type === "s" ? prefixes[8 + prefixExponent / 3] : "") + valueSuffix + (valueNegative && sign === "(" ? ")" : "");
-
-          // Break the formatted value into the integer “value” part that can be
-          // grouped, and fractional or exponential “suffix” part that is not.
-          if (maybeSuffix) {
-            i = -1, n = value.length;
-            while (++i < n) {
-              if (c = value.charCodeAt(i), 48 > c || c > 57) {
-                valueSuffix = (c === 46 ? decimal + value.slice(i + 1) : value.slice(i)) + valueSuffix;
-                value = value.slice(0, i);
-                break;
-              }
-            }
-          }
-        }
-
-        // If the fill character is not "0", grouping is applied before padding.
-        if (comma && !zero) value = group(value, Infinity);
-
-        // Compute the padding.
-        var length = valuePrefix.length + value.length + valueSuffix.length,
-            padding = length < width ? new Array(width - length + 1).join(fill) : "";
-
-        // If the fill character is "0", grouping is applied after padding.
-        if (comma && zero) value = group(padding + value, padding.length ? width - valueSuffix.length : Infinity), padding = "";
-
-        // Reconstruct the final output based on the desired alignment.
-        switch (align) {
-          case "<": value = valuePrefix + value + valueSuffix + padding; break;
-          case "=": value = valuePrefix + padding + value + valueSuffix; break;
-          case "^": value = padding.slice(0, length = padding.length >> 1) + valuePrefix + value + valueSuffix + padding.slice(length); break;
-          default: value = padding + valuePrefix + value + valueSuffix; break;
-        }
-
-        return numerals(value);
-      }
-
-      format.toString = function() {
-        return specifier + "";
-      };
-
-      return format;
-    }
-
-    function formatPrefix(specifier, value) {
-      var f = newFormat((specifier = formatSpecifier(specifier), specifier.type = "f", specifier)),
-          e = Math.max(-8, Math.min(8, Math.floor(exponent(value) / 3))) * 3,
-          k = Math.pow(10, -e),
-          prefix = prefixes[8 + e / 3];
-      return function(value) {
-        return f(k * value) + prefix;
-      };
-    }
-
-    return {
-      format: newFormat,
-      formatPrefix: formatPrefix
-    };
-  }
-
-  var locale;
-  var format;
-  var formatPrefix;
-
-  defaultLocale({
-    decimal: ".",
-    thousands: ",",
-    grouping: [3],
-    currency: ["$", ""],
-    minus: "-"
-  });
-
-  function defaultLocale(definition) {
-    locale = formatLocale(definition);
-    format = locale.format;
-    formatPrefix = locale.formatPrefix;
-    return locale;
-  }
-
-  function precisionFixed(step) {
-    return Math.max(0, -exponent(Math.abs(step)));
-  }
-
-  function precisionPrefix(step, value) {
-    return Math.max(0, Math.max(-8, Math.min(8, Math.floor(exponent(value) / 3))) * 3 - exponent(Math.abs(step)));
-  }
-
-  function precisionRound(step, max) {
-    step = Math.abs(step), max = Math.abs(max) - step;
-    return Math.max(0, exponent(max) - exponent(step)) + 1;
-  }
-
-  function tickFormat(start, stop, count, specifier) {
-    var step = tickStep(start, stop, count),
-        precision;
-    specifier = formatSpecifier(specifier == null ? ",f" : specifier);
-    switch (specifier.type) {
-      case "s": {
-        var value = Math.max(Math.abs(start), Math.abs(stop));
-        if (specifier.precision == null && !isNaN(precision = precisionPrefix(step, value))) specifier.precision = precision;
-        return formatPrefix(specifier, value);
-      }
-      case "":
-      case "e":
-      case "g":
-      case "p":
-      case "r": {
-        if (specifier.precision == null && !isNaN(precision = precisionRound(step, Math.max(Math.abs(start), Math.abs(stop))))) specifier.precision = precision - (specifier.type === "e");
-        break;
-      }
-      case "f":
-      case "%": {
-        if (specifier.precision == null && !isNaN(precision = precisionFixed(step))) specifier.precision = precision - (specifier.type === "%") * 2;
-        break;
-      }
-    }
-    return format(specifier);
-  }
-
-  function linearish(scale) {
-    var domain = scale.domain;
-
-    scale.ticks = function(count) {
-      var d = domain();
-      return ticks(d[0], d[d.length - 1], count == null ? 10 : count);
-    };
-
-    scale.tickFormat = function(count, specifier) {
-      var d = domain();
-      return tickFormat(d[0], d[d.length - 1], count == null ? 10 : count, specifier);
-    };
-
-    scale.nice = function(count) {
-      if (count == null) count = 10;
-
-      var d = domain(),
-          i0 = 0,
-          i1 = d.length - 1,
-          start = d[i0],
-          stop = d[i1],
-          step;
-
-      if (stop < start) {
-        step = start, start = stop, stop = step;
-        step = i0, i0 = i1, i1 = step;
-      }
-
-      step = tickIncrement(start, stop, count);
-
-      if (step > 0) {
-        start = Math.floor(start / step) * step;
-        stop = Math.ceil(stop / step) * step;
-        step = tickIncrement(start, stop, count);
-      } else if (step < 0) {
-        start = Math.ceil(start * step) / step;
-        stop = Math.floor(stop * step) / step;
-        step = tickIncrement(start, stop, count);
-      }
-
-      if (step > 0) {
-        d[i0] = Math.floor(start / step) * step;
-        d[i1] = Math.ceil(stop / step) * step;
-        domain(d);
-      } else if (step < 0) {
-        d[i0] = Math.ceil(start * step) / step;
-        d[i1] = Math.floor(stop * step) / step;
-        domain(d);
-      }
-
-      return scale;
-    };
-
-    return scale;
-  }
-
-  function linear$1() {
-    var scale = continuous();
-
-    scale.copy = function() {
-      return copy(scale, linear$1());
-    };
-
-    initRange.apply(scale, arguments);
-
-    return linearish(scale);
-  }
-
-  var t0$1 = new Date,
-      t1$1 = new Date;
-
-  function newInterval(floori, offseti, count, field) {
-
-    function interval(date) {
-      return floori(date = arguments.length === 0 ? new Date : new Date(+date)), date;
-    }
-
-    interval.floor = function(date) {
-      return floori(date = new Date(+date)), date;
-    };
-
-    interval.ceil = function(date) {
-      return floori(date = new Date(date - 1)), offseti(date, 1), floori(date), date;
-    };
-
-    interval.round = function(date) {
-      var d0 = interval(date),
-          d1 = interval.ceil(date);
-      return date - d0 < d1 - date ? d0 : d1;
-    };
-
-    interval.offset = function(date, step) {
-      return offseti(date = new Date(+date), step == null ? 1 : Math.floor(step)), date;
-    };
-
-    interval.range = function(start, stop, step) {
-      var range = [], previous;
-      start = interval.ceil(start);
-      step = step == null ? 1 : Math.floor(step);
-      if (!(start < stop) || !(step > 0)) return range; // also handles Invalid Date
-      do range.push(previous = new Date(+start)), offseti(start, step), floori(start);
-      while (previous < start && start < stop);
-      return range;
-    };
-
-    interval.filter = function(test) {
-      return newInterval(function(date) {
-        if (date >= date) while (floori(date), !test(date)) date.setTime(date - 1);
-      }, function(date, step) {
-        if (date >= date) {
-          if (step < 0) while (++step <= 0) {
-            while (offseti(date, -1), !test(date)) {} // eslint-disable-line no-empty
-          } else while (--step >= 0) {
-            while (offseti(date, +1), !test(date)) {} // eslint-disable-line no-empty
-          }
-        }
-      });
-    };
-
-    if (count) {
-      interval.count = function(start, end) {
-        t0$1.setTime(+start), t1$1.setTime(+end);
-        floori(t0$1), floori(t1$1);
-        return Math.floor(count(t0$1, t1$1));
-      };
-
-      interval.every = function(step) {
-        step = Math.floor(step);
-        return !isFinite(step) || !(step > 0) ? null
-            : !(step > 1) ? interval
-            : interval.filter(field
-                ? function(d) { return field(d) % step === 0; }
-                : function(d) { return interval.count(0, d) % step === 0; });
-      };
-    }
-
-    return interval;
-  }
-
-  var millisecond = newInterval(function() {
-    // noop
-  }, function(date, step) {
-    date.setTime(+date + step);
-  }, function(start, end) {
-    return end - start;
-  });
-
-  // An optimized implementation for this simple case.
-  millisecond.every = function(k) {
-    k = Math.floor(k);
-    if (!isFinite(k) || !(k > 0)) return null;
-    if (!(k > 1)) return millisecond;
-    return newInterval(function(date) {
-      date.setTime(Math.floor(date / k) * k);
-    }, function(date, step) {
-      date.setTime(+date + step * k);
-    }, function(start, end) {
-      return (end - start) / k;
-    });
-  };
-
-  var durationSecond = 1e3;
-  var durationMinute = 6e4;
-  var durationHour = 36e5;
-  var durationDay = 864e5;
-  var durationWeek = 6048e5;
-
-  var second = newInterval(function(date) {
-    date.setTime(date - date.getMilliseconds());
-  }, function(date, step) {
-    date.setTime(+date + step * durationSecond);
-  }, function(start, end) {
-    return (end - start) / durationSecond;
-  }, function(date) {
-    return date.getUTCSeconds();
-  });
-
-  var minute = newInterval(function(date) {
-    date.setTime(date - date.getMilliseconds() - date.getSeconds() * durationSecond);
-  }, function(date, step) {
-    date.setTime(+date + step * durationMinute);
-  }, function(start, end) {
-    return (end - start) / durationMinute;
-  }, function(date) {
-    return date.getMinutes();
-  });
-
-  var hour = newInterval(function(date) {
-    date.setTime(date - date.getMilliseconds() - date.getSeconds() * durationSecond - date.getMinutes() * durationMinute);
-  }, function(date, step) {
-    date.setTime(+date + step * durationHour);
-  }, function(start, end) {
-    return (end - start) / durationHour;
-  }, function(date) {
-    return date.getHours();
-  });
-
-  var day = newInterval(function(date) {
-    date.setHours(0, 0, 0, 0);
-  }, function(date, step) {
-    date.setDate(date.getDate() + step);
-  }, function(start, end) {
-    return (end - start - (end.getTimezoneOffset() - start.getTimezoneOffset()) * durationMinute) / durationDay;
-  }, function(date) {
-    return date.getDate() - 1;
-  });
-
-  function weekday(i) {
-    return newInterval(function(date) {
-      date.setDate(date.getDate() - (date.getDay() + 7 - i) % 7);
-      date.setHours(0, 0, 0, 0);
-    }, function(date, step) {
-      date.setDate(date.getDate() + step * 7);
-    }, function(start, end) {
-      return (end - start - (end.getTimezoneOffset() - start.getTimezoneOffset()) * durationMinute) / durationWeek;
-    });
-  }
-
-  var sunday = weekday(0);
-  var monday = weekday(1);
-  var tuesday = weekday(2);
-  var wednesday = weekday(3);
-  var thursday = weekday(4);
-  var friday = weekday(5);
-  var saturday = weekday(6);
-
-  var month = newInterval(function(date) {
-    date.setDate(1);
-    date.setHours(0, 0, 0, 0);
-  }, function(date, step) {
-    date.setMonth(date.getMonth() + step);
-  }, function(start, end) {
-    return end.getMonth() - start.getMonth() + (end.getFullYear() - start.getFullYear()) * 12;
-  }, function(date) {
-    return date.getMonth();
-  });
-
-  var year = newInterval(function(date) {
-    date.setMonth(0, 1);
-    date.setHours(0, 0, 0, 0);
-  }, function(date, step) {
-    date.setFullYear(date.getFullYear() + step);
-  }, function(start, end) {
-    return end.getFullYear() - start.getFullYear();
-  }, function(date) {
-    return date.getFullYear();
-  });
-
-  // An optimized implementation for this simple case.
-  year.every = function(k) {
-    return !isFinite(k = Math.floor(k)) || !(k > 0) ? null : newInterval(function(date) {
-      date.setFullYear(Math.floor(date.getFullYear() / k) * k);
-      date.setMonth(0, 1);
-      date.setHours(0, 0, 0, 0);
-    }, function(date, step) {
-      date.setFullYear(date.getFullYear() + step * k);
-    });
-  };
-
-  var utcMinute = newInterval(function(date) {
-    date.setUTCSeconds(0, 0);
-  }, function(date, step) {
-    date.setTime(+date + step * durationMinute);
-  }, function(start, end) {
-    return (end - start) / durationMinute;
-  }, function(date) {
-    return date.getUTCMinutes();
-  });
-
-  var utcHour = newInterval(function(date) {
-    date.setUTCMinutes(0, 0, 0);
-  }, function(date, step) {
-    date.setTime(+date + step * durationHour);
-  }, function(start, end) {
-    return (end - start) / durationHour;
-  }, function(date) {
-    return date.getUTCHours();
-  });
-
-  var utcDay = newInterval(function(date) {
-    date.setUTCHours(0, 0, 0, 0);
-  }, function(date, step) {
-    date.setUTCDate(date.getUTCDate() + step);
-  }, function(start, end) {
-    return (end - start) / durationDay;
-  }, function(date) {
-    return date.getUTCDate() - 1;
-  });
-
-  function utcWeekday(i) {
-    return newInterval(function(date) {
-      date.setUTCDate(date.getUTCDate() - (date.getUTCDay() + 7 - i) % 7);
-      date.setUTCHours(0, 0, 0, 0);
-    }, function(date, step) {
-      date.setUTCDate(date.getUTCDate() + step * 7);
-    }, function(start, end) {
-      return (end - start) / durationWeek;
-    });
-  }
-
-  var utcSunday = utcWeekday(0);
-  var utcMonday = utcWeekday(1);
-  var utcTuesday = utcWeekday(2);
-  var utcWednesday = utcWeekday(3);
-  var utcThursday = utcWeekday(4);
-  var utcFriday = utcWeekday(5);
-  var utcSaturday = utcWeekday(6);
-
-  var utcMonth = newInterval(function(date) {
-    date.setUTCDate(1);
-    date.setUTCHours(0, 0, 0, 0);
-  }, function(date, step) {
-    date.setUTCMonth(date.getUTCMonth() + step);
-  }, function(start, end) {
-    return end.getUTCMonth() - start.getUTCMonth() + (end.getUTCFullYear() - start.getUTCFullYear()) * 12;
-  }, function(date) {
-    return date.getUTCMonth();
-  });
-
-  var utcYear = newInterval(function(date) {
-    date.setUTCMonth(0, 1);
-    date.setUTCHours(0, 0, 0, 0);
-  }, function(date, step) {
-    date.setUTCFullYear(date.getUTCFullYear() + step);
-  }, function(start, end) {
-    return end.getUTCFullYear() - start.getUTCFullYear();
-  }, function(date) {
-    return date.getUTCFullYear();
-  });
-
-  // An optimized implementation for this simple case.
-  utcYear.every = function(k) {
-    return !isFinite(k = Math.floor(k)) || !(k > 0) ? null : newInterval(function(date) {
-      date.setUTCFullYear(Math.floor(date.getUTCFullYear() / k) * k);
-      date.setUTCMonth(0, 1);
-      date.setUTCHours(0, 0, 0, 0);
-    }, function(date, step) {
-      date.setUTCFullYear(date.getUTCFullYear() + step * k);
-    });
-  };
-
-  function localDate(d) {
-    if (0 <= d.y && d.y < 100) {
-      var date = new Date(-1, d.m, d.d, d.H, d.M, d.S, d.L);
-      date.setFullYear(d.y);
-      return date;
-    }
-    return new Date(d.y, d.m, d.d, d.H, d.M, d.S, d.L);
-  }
-
-  function utcDate(d) {
-    if (0 <= d.y && d.y < 100) {
-      var date = new Date(Date.UTC(-1, d.m, d.d, d.H, d.M, d.S, d.L));
-      date.setUTCFullYear(d.y);
-      return date;
-    }
-    return new Date(Date.UTC(d.y, d.m, d.d, d.H, d.M, d.S, d.L));
-  }
-
-  function newDate(y, m, d) {
-    return {y: y, m: m, d: d, H: 0, M: 0, S: 0, L: 0};
-  }
-
-  function formatLocale$1(locale) {
-    var locale_dateTime = locale.dateTime,
-        locale_date = locale.date,
-        locale_time = locale.time,
-        locale_periods = locale.periods,
-        locale_weekdays = locale.days,
-        locale_shortWeekdays = locale.shortDays,
-        locale_months = locale.months,
-        locale_shortMonths = locale.shortMonths;
-
-    var periodRe = formatRe(locale_periods),
-        periodLookup = formatLookup(locale_periods),
-        weekdayRe = formatRe(locale_weekdays),
-        weekdayLookup = formatLookup(locale_weekdays),
-        shortWeekdayRe = formatRe(locale_shortWeekdays),
-        shortWeekdayLookup = formatLookup(locale_shortWeekdays),
-        monthRe = formatRe(locale_months),
-        monthLookup = formatLookup(locale_months),
-        shortMonthRe = formatRe(locale_shortMonths),
-        shortMonthLookup = formatLookup(locale_shortMonths);
-
-    var formats = {
-      "a": formatShortWeekday,
-      "A": formatWeekday,
-      "b": formatShortMonth,
-      "B": formatMonth,
-      "c": null,
-      "d": formatDayOfMonth,
-      "e": formatDayOfMonth,
-      "f": formatMicroseconds,
-      "H": formatHour24,
-      "I": formatHour12,
-      "j": formatDayOfYear,
-      "L": formatMilliseconds,
-      "m": formatMonthNumber,
-      "M": formatMinutes,
-      "p": formatPeriod,
-      "q": formatQuarter,
-      "Q": formatUnixTimestamp,
-      "s": formatUnixTimestampSeconds,
-      "S": formatSeconds,
-      "u": formatWeekdayNumberMonday,
-      "U": formatWeekNumberSunday,
-      "V": formatWeekNumberISO,
-      "w": formatWeekdayNumberSunday,
-      "W": formatWeekNumberMonday,
-      "x": null,
-      "X": null,
-      "y": formatYear,
-      "Y": formatFullYear,
-      "Z": formatZone,
-      "%": formatLiteralPercent
-    };
-
-    var utcFormats = {
-      "a": formatUTCShortWeekday,
-      "A": formatUTCWeekday,
-      "b": formatUTCShortMonth,
-      "B": formatUTCMonth,
-      "c": null,
-      "d": formatUTCDayOfMonth,
-      "e": formatUTCDayOfMonth,
-      "f": formatUTCMicroseconds,
-      "H": formatUTCHour24,
-      "I": formatUTCHour12,
-      "j": formatUTCDayOfYear,
-      "L": formatUTCMilliseconds,
-      "m": formatUTCMonthNumber,
-      "M": formatUTCMinutes,
-      "p": formatUTCPeriod,
-      "q": formatUTCQuarter,
-      "Q": formatUnixTimestamp,
-      "s": formatUnixTimestampSeconds,
-      "S": formatUTCSeconds,
-      "u": formatUTCWeekdayNumberMonday,
-      "U": formatUTCWeekNumberSunday,
-      "V": formatUTCWeekNumberISO,
-      "w": formatUTCWeekdayNumberSunday,
-      "W": formatUTCWeekNumberMonday,
-      "x": null,
-      "X": null,
-      "y": formatUTCYear,
-      "Y": formatUTCFullYear,
-      "Z": formatUTCZone,
-      "%": formatLiteralPercent
-    };
-
-    var parses = {
-      "a": parseShortWeekday,
-      "A": parseWeekday,
-      "b": parseShortMonth,
-      "B": parseMonth,
-      "c": parseLocaleDateTime,
-      "d": parseDayOfMonth,
-      "e": parseDayOfMonth,
-      "f": parseMicroseconds,
-      "H": parseHour24,
-      "I": parseHour24,
-      "j": parseDayOfYear,
-      "L": parseMilliseconds,
-      "m": parseMonthNumber,
-      "M": parseMinutes,
-      "p": parsePeriod,
-      "q": parseQuarter,
-      "Q": parseUnixTimestamp,
-      "s": parseUnixTimestampSeconds,
-      "S": parseSeconds,
-      "u": parseWeekdayNumberMonday,
-      "U": parseWeekNumberSunday,
-      "V": parseWeekNumberISO,
-      "w": parseWeekdayNumberSunday,
-      "W": parseWeekNumberMonday,
-      "x": parseLocaleDate,
-      "X": parseLocaleTime,
-      "y": parseYear,
-      "Y": parseFullYear,
-      "Z": parseZone,
-      "%": parseLiteralPercent
-    };
-
-    // These recursive directive definitions must be deferred.
-    formats.x = newFormat(locale_date, formats);
-    formats.X = newFormat(locale_time, formats);
-    formats.c = newFormat(locale_dateTime, formats);
-    utcFormats.x = newFormat(locale_date, utcFormats);
-    utcFormats.X = newFormat(locale_time, utcFormats);
-    utcFormats.c = newFormat(locale_dateTime, utcFormats);
-
-    function newFormat(specifier, formats) {
-      return function(date) {
-        var string = [],
-            i = -1,
-            j = 0,
-            n = specifier.length,
-            c,
-            pad,
-            format;
-
-        if (!(date instanceof Date)) date = new Date(+date);
-
-        while (++i < n) {
-          if (specifier.charCodeAt(i) === 37) {
-            string.push(specifier.slice(j, i));
-            if ((pad = pads[c = specifier.charAt(++i)]) != null) c = specifier.charAt(++i);
-            else pad = c === "e" ? " " : "0";
-            if (format = formats[c]) c = format(date, pad);
-            string.push(c);
-            j = i + 1;
-          }
-        }
-
-        string.push(specifier.slice(j, i));
-        return string.join("");
-      };
-    }
-
-    function newParse(specifier, Z) {
-      return function(string) {
-        var d = newDate(1900, undefined, 1),
-            i = parseSpecifier(d, specifier, string += "", 0),
-            week, day$1;
-        if (i != string.length) return null;
-
-        // If a UNIX timestamp is specified, return it.
-        if ("Q" in d) return new Date(d.Q);
-        if ("s" in d) return new Date(d.s * 1000 + ("L" in d ? d.L : 0));
-
-        // If this is utcParse, never use the local timezone.
-        if (Z && !("Z" in d)) d.Z = 0;
-
-        // The am-pm flag is 0 for AM, and 1 for PM.
-        if ("p" in d) d.H = d.H % 12 + d.p * 12;
-
-        // If the month was not specified, inherit from the quarter.
-        if (d.m === undefined) d.m = "q" in d ? d.q : 0;
-
-        // Convert day-of-week and week-of-year to day-of-year.
-        if ("V" in d) {
-          if (d.V < 1 || d.V > 53) return null;
-          if (!("w" in d)) d.w = 1;
-          if ("Z" in d) {
-            week = utcDate(newDate(d.y, 0, 1)), day$1 = week.getUTCDay();
-            week = day$1 > 4 || day$1 === 0 ? utcMonday.ceil(week) : utcMonday(week);
-            week = utcDay.offset(week, (d.V - 1) * 7);
-            d.y = week.getUTCFullYear();
-            d.m = week.getUTCMonth();
-            d.d = week.getUTCDate() + (d.w + 6) % 7;
-          } else {
-            week = localDate(newDate(d.y, 0, 1)), day$1 = week.getDay();
-            week = day$1 > 4 || day$1 === 0 ? monday.ceil(week) : monday(week);
-            week = day.offset(week, (d.V - 1) * 7);
-            d.y = week.getFullYear();
-            d.m = week.getMonth();
-            d.d = week.getDate() + (d.w + 6) % 7;
-          }
-        } else if ("W" in d || "U" in d) {
-          if (!("w" in d)) d.w = "u" in d ? d.u % 7 : "W" in d ? 1 : 0;
-          day$1 = "Z" in d ? utcDate(newDate(d.y, 0, 1)).getUTCDay() : localDate(newDate(d.y, 0, 1)).getDay();
-          d.m = 0;
-          d.d = "W" in d ? (d.w + 6) % 7 + d.W * 7 - (day$1 + 5) % 7 : d.w + d.U * 7 - (day$1 + 6) % 7;
-        }
-
-        // If a time zone is specified, all fields are interpreted as UTC and then
-        // offset according to the specified time zone.
-        if ("Z" in d) {
-          d.H += d.Z / 100 | 0;
-          d.M += d.Z % 100;
-          return utcDate(d);
-        }
-
-        // Otherwise, all fields are in local time.
-        return localDate(d);
-      };
-    }
-
-    function parseSpecifier(d, specifier, string, j) {
-      var i = 0,
-          n = specifier.length,
-          m = string.length,
-          c,
-          parse;
-
-      while (i < n) {
-        if (j >= m) return -1;
-        c = specifier.charCodeAt(i++);
-        if (c === 37) {
-          c = specifier.charAt(i++);
-          parse = parses[c in pads ? specifier.charAt(i++) : c];
-          if (!parse || ((j = parse(d, string, j)) < 0)) return -1;
-        } else if (c != string.charCodeAt(j++)) {
-          return -1;
-        }
-      }
-
-      return j;
-    }
-
-    function parsePeriod(d, string, i) {
-      var n = periodRe.exec(string.slice(i));
-      return n ? (d.p = periodLookup[n[0].toLowerCase()], i + n[0].length) : -1;
-    }
-
-    function parseShortWeekday(d, string, i) {
-      var n = shortWeekdayRe.exec(string.slice(i));
-      return n ? (d.w = shortWeekdayLookup[n[0].toLowerCase()], i + n[0].length) : -1;
-    }
-
-    function parseWeekday(d, string, i) {
-      var n = weekdayRe.exec(string.slice(i));
-      return n ? (d.w = weekdayLookup[n[0].toLowerCase()], i + n[0].length) : -1;
-    }
-
-    function parseShortMonth(d, string, i) {
-      var n = shortMonthRe.exec(string.slice(i));
-      return n ? (d.m = shortMonthLookup[n[0].toLowerCase()], i + n[0].length) : -1;
-    }
-
-    function parseMonth(d, string, i) {
-      var n = monthRe.exec(string.slice(i));
-      return n ? (d.m = monthLookup[n[0].toLowerCase()], i + n[0].length) : -1;
-    }
-
-    function parseLocaleDateTime(d, string, i) {
-      return parseSpecifier(d, locale_dateTime, string, i);
-    }
-
-    function parseLocaleDate(d, string, i) {
-      return parseSpecifier(d, locale_date, string, i);
-    }
-
-    function parseLocaleTime(d, string, i) {
-      return parseSpecifier(d, locale_time, string, i);
-    }
-
-    function formatShortWeekday(d) {
-      return locale_shortWeekdays[d.getDay()];
-    }
-
-    function formatWeekday(d) {
-      return locale_weekdays[d.getDay()];
-    }
-
-    function formatShortMonth(d) {
-      return locale_shortMonths[d.getMonth()];
-    }
-
-    function formatMonth(d) {
-      return locale_months[d.getMonth()];
-    }
-
-    function formatPeriod(d) {
-      return locale_periods[+(d.getHours() >= 12)];
-    }
-
-    function formatQuarter(d) {
-      return 1 + ~~(d.getMonth() / 3);
-    }
-
-    function formatUTCShortWeekday(d) {
-      return locale_shortWeekdays[d.getUTCDay()];
-    }
-
-    function formatUTCWeekday(d) {
-      return locale_weekdays[d.getUTCDay()];
-    }
-
-    function formatUTCShortMonth(d) {
-      return locale_shortMonths[d.getUTCMonth()];
-    }
-
-    function formatUTCMonth(d) {
-      return locale_months[d.getUTCMonth()];
-    }
-
-    function formatUTCPeriod(d) {
-      return locale_periods[+(d.getUTCHours() >= 12)];
-    }
-
-    function formatUTCQuarter(d) {
-      return 1 + ~~(d.getUTCMonth() / 3);
-    }
-
-    return {
-      format: function(specifier) {
-        var f = newFormat(specifier += "", formats);
-        f.toString = function() { return specifier; };
-        return f;
-      },
-      parse: function(specifier) {
-        var p = newParse(specifier += "", false);
-        p.toString = function() { return specifier; };
-        return p;
-      },
-      utcFormat: function(specifier) {
-        var f = newFormat(specifier += "", utcFormats);
-        f.toString = function() { return specifier; };
-        return f;
-      },
-      utcParse: function(specifier) {
-        var p = newParse(specifier += "", true);
-        p.toString = function() { return specifier; };
-        return p;
-      }
-    };
-  }
-
-  var pads = {"-": "", "_": " ", "0": "0"},
-      numberRe = /^\s*\d+/, // note: ignores next directive
-      percentRe = /^%/,
-      requoteRe = /[\\^$*+?|[\]().{}]/g;
-
-  function pad(value, fill, width) {
-    var sign = value < 0 ? "-" : "",
-        string = (sign ? -value : value) + "",
-        length = string.length;
-    return sign + (length < width ? new Array(width - length + 1).join(fill) + string : string);
-  }
-
-  function requote(s) {
-    return s.replace(requoteRe, "\\$&");
-  }
-
-  function formatRe(names) {
-    return new RegExp("^(?:" + names.map(requote).join("|") + ")", "i");
-  }
-
-  function formatLookup(names) {
-    var map = {}, i = -1, n = names.length;
-    while (++i < n) map[names[i].toLowerCase()] = i;
-    return map;
-  }
-
-  function parseWeekdayNumberSunday(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 1));
-    return n ? (d.w = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseWeekdayNumberMonday(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 1));
-    return n ? (d.u = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseWeekNumberSunday(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 2));
-    return n ? (d.U = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseWeekNumberISO(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 2));
-    return n ? (d.V = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseWeekNumberMonday(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 2));
-    return n ? (d.W = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseFullYear(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 4));
-    return n ? (d.y = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseYear(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 2));
-    return n ? (d.y = +n[0] + (+n[0] > 68 ? 1900 : 2000), i + n[0].length) : -1;
-  }
-
-  function parseZone(d, string, i) {
-    var n = /^(Z)|([+-]\d\d)(?::?(\d\d))?/.exec(string.slice(i, i + 6));
-    return n ? (d.Z = n[1] ? 0 : -(n[2] + (n[3] || "00")), i + n[0].length) : -1;
-  }
-
-  function parseQuarter(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 1));
-    return n ? (d.q = n[0] * 3 - 3, i + n[0].length) : -1;
-  }
-
-  function parseMonthNumber(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 2));
-    return n ? (d.m = n[0] - 1, i + n[0].length) : -1;
-  }
-
-  function parseDayOfMonth(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 2));
-    return n ? (d.d = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseDayOfYear(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 3));
-    return n ? (d.m = 0, d.d = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseHour24(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 2));
-    return n ? (d.H = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseMinutes(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 2));
-    return n ? (d.M = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseSeconds(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 2));
-    return n ? (d.S = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseMilliseconds(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 3));
-    return n ? (d.L = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseMicroseconds(d, string, i) {
-    var n = numberRe.exec(string.slice(i, i + 6));
-    return n ? (d.L = Math.floor(n[0] / 1000), i + n[0].length) : -1;
-  }
-
-  function parseLiteralPercent(d, string, i) {
-    var n = percentRe.exec(string.slice(i, i + 1));
-    return n ? i + n[0].length : -1;
-  }
-
-  function parseUnixTimestamp(d, string, i) {
-    var n = numberRe.exec(string.slice(i));
-    return n ? (d.Q = +n[0], i + n[0].length) : -1;
-  }
-
-  function parseUnixTimestampSeconds(d, string, i) {
-    var n = numberRe.exec(string.slice(i));
-    return n ? (d.s = +n[0], i + n[0].length) : -1;
-  }
-
-  function formatDayOfMonth(d, p) {
-    return pad(d.getDate(), p, 2);
-  }
-
-  function formatHour24(d, p) {
-    return pad(d.getHours(), p, 2);
-  }
-
-  function formatHour12(d, p) {
-    return pad(d.getHours() % 12 || 12, p, 2);
-  }
-
-  function formatDayOfYear(d, p) {
-    return pad(1 + day.count(year(d), d), p, 3);
-  }
-
-  function formatMilliseconds(d, p) {
-    return pad(d.getMilliseconds(), p, 3);
-  }
-
-  function formatMicroseconds(d, p) {
-    return formatMilliseconds(d, p) + "000";
-  }
-
-  function formatMonthNumber(d, p) {
-    return pad(d.getMonth() + 1, p, 2);
-  }
-
-  function formatMinutes(d, p) {
-    return pad(d.getMinutes(), p, 2);
-  }
-
-  function formatSeconds(d, p) {
-    return pad(d.getSeconds(), p, 2);
-  }
-
-  function formatWeekdayNumberMonday(d) {
-    var day = d.getDay();
-    return day === 0 ? 7 : day;
-  }
-
-  function formatWeekNumberSunday(d, p) {
-    return pad(sunday.count(year(d) - 1, d), p, 2);
-  }
-
-  function formatWeekNumberISO(d, p) {
-    var day = d.getDay();
-    d = (day >= 4 || day === 0) ? thursday(d) : thursday.ceil(d);
-    return pad(thursday.count(year(d), d) + (year(d).getDay() === 4), p, 2);
-  }
-
-  function formatWeekdayNumberSunday(d) {
-    return d.getDay();
-  }
-
-  function formatWeekNumberMonday(d, p) {
-    return pad(monday.count(year(d) - 1, d), p, 2);
-  }
-
-  function formatYear(d, p) {
-    return pad(d.getFullYear() % 100, p, 2);
-  }
-
-  function formatFullYear(d, p) {
-    return pad(d.getFullYear() % 10000, p, 4);
-  }
-
-  function formatZone(d) {
-    var z = d.getTimezoneOffset();
-    return (z > 0 ? "-" : (z *= -1, "+"))
-        + pad(z / 60 | 0, "0", 2)
-        + pad(z % 60, "0", 2);
-  }
-
-  function formatUTCDayOfMonth(d, p) {
-    return pad(d.getUTCDate(), p, 2);
-  }
-
-  function formatUTCHour24(d, p) {
-    return pad(d.getUTCHours(), p, 2);
-  }
-
-  function formatUTCHour12(d, p) {
-    return pad(d.getUTCHours() % 12 || 12, p, 2);
-  }
-
-  function formatUTCDayOfYear(d, p) {
-    return pad(1 + utcDay.count(utcYear(d), d), p, 3);
-  }
-
-  function formatUTCMilliseconds(d, p) {
-    return pad(d.getUTCMilliseconds(), p, 3);
-  }
-
-  function formatUTCMicroseconds(d, p) {
-    return formatUTCMilliseconds(d, p) + "000";
-  }
-
-  function formatUTCMonthNumber(d, p) {
-    return pad(d.getUTCMonth() + 1, p, 2);
-  }
-
-  function formatUTCMinutes(d, p) {
-    return pad(d.getUTCMinutes(), p, 2);
-  }
-
-  function formatUTCSeconds(d, p) {
-    return pad(d.getUTCSeconds(), p, 2);
-  }
-
-  function formatUTCWeekdayNumberMonday(d) {
-    var dow = d.getUTCDay();
-    return dow === 0 ? 7 : dow;
-  }
-
-  function formatUTCWeekNumberSunday(d, p) {
-    return pad(utcSunday.count(utcYear(d) - 1, d), p, 2);
-  }
-
-  function formatUTCWeekNumberISO(d, p) {
-    var day = d.getUTCDay();
-    d = (day >= 4 || day === 0) ? utcThursday(d) : utcThursday.ceil(d);
-    return pad(utcThursday.count(utcYear(d), d) + (utcYear(d).getUTCDay() === 4), p, 2);
-  }
-
-  function formatUTCWeekdayNumberSunday(d) {
-    return d.getUTCDay();
-  }
-
-  function formatUTCWeekNumberMonday(d, p) {
-    return pad(utcMonday.count(utcYear(d) - 1, d), p, 2);
-  }
-
-  function formatUTCYear(d, p) {
-    return pad(d.getUTCFullYear() % 100, p, 2);
-  }
-
-  function formatUTCFullYear(d, p) {
-    return pad(d.getUTCFullYear() % 10000, p, 4);
-  }
-
-  function formatUTCZone() {
-    return "+0000";
-  }
-
-  function formatLiteralPercent() {
-    return "%";
-  }
-
-  function formatUnixTimestamp(d) {
-    return +d;
-  }
-
-  function formatUnixTimestampSeconds(d) {
-    return Math.floor(+d / 1000);
-  }
-
-  var locale$1;
-  var timeFormat;
-  var timeParse;
-  var utcFormat;
-  var utcParse;
-
-  defaultLocale$1({
-    dateTime: "%x, %X",
-    date: "%-m/%-d/%Y",
-    time: "%-I:%M:%S %p",
-    periods: ["AM", "PM"],
-    days: ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"],
-    shortDays: ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"],
-    months: ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"],
-    shortMonths: ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
-  });
-
-  function defaultLocale$1(definition) {
-    locale$1 = formatLocale$1(definition);
-    timeFormat = locale$1.format;
-    timeParse = locale$1.parse;
-    utcFormat = locale$1.utcFormat;
-    utcParse = locale$1.utcParse;
-    return locale$1;
-  }
-
-  var isoSpecifier = "%Y-%m-%dT%H:%M:%S.%LZ";
-
-  function formatIsoNative(date) {
-    return date.toISOString();
-  }
-
-  var formatIso = Date.prototype.toISOString
-      ? formatIsoNative
-      : utcFormat(isoSpecifier);
-
-  function parseIsoNative(string) {
-    var date = new Date(string);
-    return isNaN(date) ? null : date;
-  }
-
-  var parseIso = +new Date("2000-01-01T00:00:00.000Z")
-      ? parseIsoNative
-      : utcParse(isoSpecifier);
-
-  var noop = {value: function() {}};
-
-  function dispatch() {
-    for (var i = 0, n = arguments.length, _ = {}, t; i < n; ++i) {
-      if (!(t = arguments[i] + "") || (t in _) || /[\s.]/.test(t)) throw new Error("illegal type: " + t);
-      _[t] = [];
-    }
-    return new Dispatch(_);
-  }
-
-  function Dispatch(_) {
-    this._ = _;
-  }
-
-  function parseTypenames(typenames, types) {
-    return typenames.trim().split(/^|\s+/).map(function(t) {
-      var name = "", i = t.indexOf(".");
-      if (i >= 0) name = t.slice(i + 1), t = t.slice(0, i);
-      if (t && !types.hasOwnProperty(t)) throw new Error("unknown type: " + t);
-      return {type: t, name: name};
-    });
-  }
-
-  Dispatch.prototype = dispatch.prototype = {
-    constructor: Dispatch,
-    on: function(typename, callback) {
-      var _ = this._,
-          T = parseTypenames(typename + "", _),
-          t,
-          i = -1,
-          n = T.length;
-
-      // If no callback was specified, return the callback of the given type and name.
-      if (arguments.length < 2) {
-        while (++i < n) if ((t = (typename = T[i]).type) && (t = get(_[t], typename.name))) return t;
-        return;
-      }
-
-      // If a type was specified, set the callback for the given type and name.
-      // Otherwise, if a null callback was specified, remove callbacks of the given name.
-      if (callback != null && typeof callback !== "function") throw new Error("invalid callback: " + callback);
-      while (++i < n) {
-        if (t = (typename = T[i]).type) _[t] = set(_[t], typename.name, callback);
-        else if (callback == null) for (t in _) _[t] = set(_[t], typename.name, null);
-      }
-
-      return this;
-    },
-    copy: function() {
-      var copy = {}, _ = this._;
-      for (var t in _) copy[t] = _[t].slice();
-      return new Dispatch(copy);
-    },
-    call: function(type, that) {
-      if ((n = arguments.length - 2) > 0) for (var args = new Array(n), i = 0, n, t; i < n; ++i) args[i] = arguments[i + 2];
-      if (!this._.hasOwnProperty(type)) throw new Error("unknown type: " + type);
-      for (t = this._[type], i = 0, n = t.length; i < n; ++i) t[i].value.apply(that, args);
-    },
-    apply: function(type, that, args) {
-      if (!this._.hasOwnProperty(type)) throw new Error("unknown type: " + type);
-      for (var t = this._[type], i = 0, n = t.length; i < n; ++i) t[i].value.apply(that, args);
-    }
-  };
-
-  function get(type, name) {
-    for (var i = 0, n = type.length, c; i < n; ++i) {
-      if ((c = type[i]).name === name) {
-        return c.value;
-      }
-    }
-  }
-
-  function set(type, name, callback) {
-    for (var i = 0, n = type.length; i < n; ++i) {
-      if (type[i].name === name) {
-        type[i] = noop, type = type.slice(0, i).concat(type.slice(i + 1));
-        break;
-      }
-    }
-    if (callback != null) type.push({name: name, value: callback});
-    return type;
-  }
-
-  var xhtml = "http://www.w3.org/1999/xhtml";
-
-  var namespaces = {
-    svg: "http://www.w3.org/2000/svg",
-    xhtml: xhtml,
-    xlink: "http://www.w3.org/1999/xlink",
-    xml: "http://www.w3.org/XML/1998/namespace",
-    xmlns: "http://www.w3.org/2000/xmlns/"
-  };
-
-  function namespace(name) {
-    var prefix = name += "", i = prefix.indexOf(":");
-    if (i >= 0 && (prefix = name.slice(0, i)) !== "xmlns") name = name.slice(i + 1);
-    return namespaces.hasOwnProperty(prefix) ? {space: namespaces[prefix], local: name} : name;
-  }
-
-  function creatorInherit(name) {
-    return function() {
-      var document = this.ownerDocument,
-          uri = this.namespaceURI;
-      return uri === xhtml && document.documentElement.namespaceURI === xhtml
-          ? document.createElement(name)
-          : document.createElementNS(uri, name);
-    };
-  }
-
-  function creatorFixed(fullname) {
-    return function() {
-      return this.ownerDocument.createElementNS(fullname.space, fullname.local);
-    };
-  }
-
-  function creator(name) {
-    var fullname = namespace(name);
-    return (fullname.local
-        ? creatorFixed
-        : creatorInherit)(fullname);
-  }
-
-  function none() {}
-
-  function selector(selector) {
-    return selector == null ? none : function() {
-      return this.querySelector(selector);
-    };
-  }
-
-  function selection_select(select) {
-    if (typeof select !== "function") select = selector(select);
-
-    for (var groups = this._groups, m = groups.length, subgroups = new Array(m), j = 0; j < m; ++j) {
-      for (var group = groups[j], n = group.length, subgroup = subgroups[j] = new Array(n), node, subnode, i = 0; i < n; ++i) {
-        if ((node = group[i]) && (subnode = select.call(node, node.__data__, i, group))) {
-          if ("__data__" in node) subnode.__data__ = node.__data__;
-          subgroup[i] = subnode;
-        }
-      }
-    }
-
-    return new Selection(subgroups, this._parents);
-  }
-
-  function empty() {
-    return [];
-  }
-
-  function selectorAll(selector) {
-    return selector == null ? empty : function() {
-      return this.querySelectorAll(selector);
-    };
-  }
-
-  function selection_selectAll(select) {
-    if (typeof select !== "function") select = selectorAll(select);
-
-    for (var groups = this._groups, m = groups.length, subgroups = [], parents = [], j = 0; j < m; ++j) {
-      for (var group = groups[j], n = group.length, node, i = 0; i < n; ++i) {
-        if (node = group[i]) {
-          subgroups.push(select.call(node, node.__data__, i, group));
-          parents.push(node);
-        }
-      }
-    }
-
-    return new Selection(subgroups, parents);
-  }
-
-  function matcher(selector) {
-    return function() {
-      return this.matches(selector);
-    };
-  }
-
-  function selection_filter(match) {
-    if (typeof match !== "function") match = matcher(match);
-
-    for (var groups = this._groups, m = groups.length, subgroups = new Array(m), j = 0; j < m; ++j) {
-      for (var group = groups[j], n = group.length, subgroup = subgroups[j] = [], node, i = 0; i < n; ++i) {
-        if ((node = group[i]) && match.call(node, node.__data__, i, group)) {
-          subgroup.push(node);
-        }
-      }
-    }
-
-    return new Selection(subgroups, this._parents);
-  }
-
-  function sparse(update) {
-    return new Array(update.length);
-  }
-
-  function selection_enter() {
-    return new Selection(this._enter || this._groups.map(sparse), this._parents);
-  }
-
-  function EnterNode(parent, datum) {
-    this.ownerDocument = parent.ownerDocument;
-    this.namespaceURI = parent.namespaceURI;
-    this._next = null;
-    this._parent = parent;
-    this.__data__ = datum;
-  }
-
-  EnterNode.prototype = {
-    constructor: EnterNode,
-    appendChild: function(child) { return this._parent.insertBefore(child, this._next); },
-    insertBefore: function(child, next) { return this._parent.insertBefore(child, next); },
-    querySelector: function(selector) { return this._parent.querySelector(selector); },
-    querySelectorAll: function(selector) { return this._parent.querySelectorAll(selector); }
-  };
-
-  function constant$2(x) {
-    return function() {
-      return x;
-    };
-  }
-
-  var keyPrefix = "$"; // Protect against keys like “__proto__”.
-
-  function bindIndex(parent, group, enter, update, exit, data) {
-    var i = 0,
-        node,
-        groupLength = group.length,
-        dataLength = data.length;
-
-    // Put any non-null nodes that fit into update.
-    // Put any null nodes into enter.
-    // Put any remaining data into enter.
-    for (; i < dataLength; ++i) {
-      if (node = group[i]) {
-        node.__data__ = data[i];
-        update[i] = node;
-      } else {
-        enter[i] = new EnterNode(parent, data[i]);
-      }
-    }
-
-    // Put any non-null nodes that don’t fit into exit.
-    for (; i < groupLength; ++i) {
-      if (node = group[i]) {
-        exit[i] = node;
-      }
-    }
-  }
-
-  function bindKey(parent, group, enter, update, exit, data, key) {
-    var i,
-        node,
-        nodeByKeyValue = {},
-        groupLength = group.length,
-        dataLength = data.length,
-        keyValues = new Array(groupLength),
-        keyValue;
-
-    // Compute the key for each node.
-    // If multiple nodes have the same key, the duplicates are added to exit.
-    for (i = 0; i < groupLength; ++i) {
-      if (node = group[i]) {
-        keyValues[i] = keyValue = keyPrefix + key.call(node, node.__data__, i, group);
-        if (keyValue in nodeByKeyValue) {
-          exit[i] = node;
-        } else {
-          nodeByKeyValue[keyValue] = node;
-        }
-      }
-    }
-
-    // Compute the key for each datum.
-    // If there a node associated with this key, join and add it to update.
-    // If there is not (or the key is a duplicate), add it to enter.
-    for (i = 0; i < dataLength; ++i) {
-      keyValue = keyPrefix + key.call(parent, data[i], i, data);
-      if (node = nodeByKeyValue[keyValue]) {
-        update[i] = node;
-        node.__data__ = data[i];
-        nodeByKeyValue[keyValue] = null;
-      } else {
-        enter[i] = new EnterNode(parent, data[i]);
-      }
-    }
-
-    // Add any remaining nodes that were not bound to data to exit.
-    for (i = 0; i < groupLength; ++i) {
-      if ((node = group[i]) && (nodeByKeyValue[keyValues[i]] === node)) {
-        exit[i] = node;
-      }
-    }
-  }
-
-  function selection_data(value, key) {
-    if (!value) {
-      data = new Array(this.size()), j = -1;
-      this.each(function(d) { data[++j] = d; });
-      return data;
-    }
-
-    var bind = key ? bindKey : bindIndex,
-        parents = this._parents,
-        groups = this._groups;
-
-    if (typeof value !== "function") value = constant$2(value);
-
-    for (var m = groups.length, update = new Array(m), enter = new Array(m), exit = new Array(m), j = 0; j < m; ++j) {
-      var parent = parents[j],
-          group = groups[j],
-          groupLength = group.length,
-          data = value.call(parent, parent && parent.__data__, j, parents),
-          dataLength = data.length,
-          enterGroup = enter[j] = new Array(dataLength),
-          updateGroup = update[j] = new Array(dataLength),
-          exitGroup = exit[j] = new Array(groupLength);
-
-      bind(parent, group, enterGroup, updateGroup, exitGroup, data, key);
-
-      // Now connect the enter nodes to their following update node, such that
-      // appendChild can insert the materialized enter node before this node,
-      // rather than at the end of the parent node.
-      for (var i0 = 0, i1 = 0, previous, next; i0 < dataLength; ++i0) {
-        if (previous = enterGroup[i0]) {
-          if (i0 >= i1) i1 = i0 + 1;
-          while (!(next = updateGroup[i1]) && ++i1 < dataLength);
-          previous._next = next || null;
-        }
-      }
-    }
-
-    update = new Selection(update, parents);
-    update._enter = enter;
-    update._exit = exit;
-    return update;
-  }
-
-  function selection_exit() {
-    return new Selection(this._exit || this._groups.map(sparse), this._parents);
-  }
-
-  function selection_join(onenter, onupdate, onexit) {
-    var enter = this.enter(), update = this, exit = this.exit();
-    enter = typeof onenter === "function" ? onenter(enter) : enter.append(onenter + "");
-    if (onupdate != null) update = onupdate(update);
-    if (onexit == null) exit.remove(); else onexit(exit);
-    return enter && update ? enter.merge(update).order() : update;
-  }
-
-  function selection_merge(selection) {
-
-    for (var groups0 = this._groups, groups1 = selection._groups, m0 = groups0.length, m1 = groups1.length, m = Math.min(m0, m1), merges = new Array(m0), j = 0; j < m; ++j) {
-      for (var group0 = groups0[j], group1 = groups1[j], n = group0.length, merge = merges[j] = new Array(n), node, i = 0; i < n; ++i) {
-        if (node = group0[i] || group1[i]) {
-          merge[i] = node;
-        }
-      }
-    }
-
-    for (; j < m0; ++j) {
-      merges[j] = groups0[j];
-    }
-
-    return new Selection(merges, this._parents);
-  }
-
-  function selection_order() {
-
-    for (var groups = this._groups, j = -1, m = groups.length; ++j < m;) {
-      for (var group = groups[j], i = group.length - 1, next = group[i], node; --i >= 0;) {
-        if (node = group[i]) {
-          if (next && node.compareDocumentPosition(next) ^ 4) next.parentNode.insertBefore(node, next);
-          next = node;
-        }
-      }
-    }
-
-    return this;
-  }
-
-  function selection_sort(compare) {
-    if (!compare) compare = ascending$1;
-
-    function compareNode(a, b) {
-      return a && b ? compare(a.__data__, b.__data__) : !a - !b;
-    }
-
-    for (var groups = this._groups, m = groups.length, sortgroups = new Array(m), j = 0; j < m; ++j) {
-      for (var group = groups[j], n = group.length, sortgroup = sortgroups[j] = new Array(n), node, i = 0; i < n; ++i) {
-        if (node = group[i]) {
-          sortgroup[i] = node;
-        }
-      }
-      sortgroup.sort(compareNode);
-    }
-
-    return new Selection(sortgroups, this._parents).order();
-  }
-
-  function ascending$1(a, b) {
-    return a < b ? -1 : a > b ? 1 : a >= b ? 0 : NaN;
-  }
-
-  function selection_call() {
-    var callback = arguments[0];
-    arguments[0] = this;
-    callback.apply(null, arguments);
-    return this;
-  }
-
-  function selection_nodes() {
-    var nodes = new Array(this.size()), i = -1;
-    this.each(function() { nodes[++i] = this; });
-    return nodes;
-  }
-
-  function selection_node() {
-
-    for (var groups = this._groups, j = 0, m = groups.length; j < m; ++j) {
-      for (var group = groups[j], i = 0, n = group.length; i < n; ++i) {
-        var node = group[i];
-        if (node) return node;
-      }
-    }
-
-    return null;
-  }
-
-  function selection_size() {
-    var size = 0;
-    this.each(function() { ++size; });
-    return size;
-  }
-
-  function selection_empty() {
-    return !this.node();
-  }
-
-  function selection_each(callback) {
-
-    for (var groups = this._groups, j = 0, m = groups.length; j < m; ++j) {
-      for (var group = groups[j], i = 0, n = group.length, node; i < n; ++i) {
-        if (node = group[i]) callback.call(node, node.__data__, i, group);
-      }
-    }
-
-    return this;
-  }
-
-  function attrRemove(name) {
-    return function() {
-      this.removeAttribute(name);
-    };
-  }
-
-  function attrRemoveNS(fullname) {
-    return function() {
-      this.removeAttributeNS(fullname.space, fullname.local);
-    };
-  }
-
-  function attrConstant(name, value) {
-    return function() {
-      this.setAttribute(name, value);
-    };
-  }
-
-  function attrConstantNS(fullname, value) {
-    return function() {
-      this.setAttributeNS(fullname.space, fullname.local, value);
-    };
-  }
-
-  function attrFunction(name, value) {
-    return function() {
-      var v = value.apply(this, arguments);
-      if (v == null) this.removeAttribute(name);
-      else this.setAttribute(name, v);
-    };
-  }
-
-  function attrFunctionNS(fullname, value) {
-    return function() {
-      var v = value.apply(this, arguments);
-      if (v == null) this.removeAttributeNS(fullname.space, fullname.local);
-      else this.setAttributeNS(fullname.space, fullname.local, v);
-    };
-  }
-
-  function selection_attr(name, value) {
-    var fullname = namespace(name);
-
-    if (arguments.length < 2) {
-      var node = this.node();
-      return fullname.local
-          ? node.getAttributeNS(fullname.space, fullname.local)
-          : node.getAttribute(fullname);
-    }
-
-    return this.each((value == null
-        ? (fullname.local ? attrRemoveNS : attrRemove) : (typeof value === "function"
-        ? (fullname.local ? attrFunctionNS : attrFunction)
-        : (fullname.local ? attrConstantNS : attrConstant)))(fullname, value));
-  }
-
-  function defaultView(node) {
-    return (node.ownerDocument && node.ownerDocument.defaultView) // node is a Node
-        || (node.document && node) // node is a Window
-        || node.defaultView; // node is a Document
-  }
-
-  function styleRemove(name) {
-    return function() {
-      this.style.removeProperty(name);
-    };
-  }
-
-  function styleConstant(name, value, priority) {
-    return function() {
-      this.style.setProperty(name, value, priority);
-    };
-  }
-
-  function styleFunction(name, value, priority) {
-    return function() {
-      var v = value.apply(this, arguments);
-      if (v == null) this.style.removeProperty(name);
-      else this.style.setProperty(name, v, priority);
-    };
-  }
-
-  function selection_style(name, value, priority) {
-    return arguments.length > 1
-        ? this.each((value == null
-              ? styleRemove : typeof value === "function"
-              ? styleFunction
-              : styleConstant)(name, value, priority == null ? "" : priority))
-        : styleValue(this.node(), name);
-  }
-
-  function styleValue(node, name) {
-    return node.style.getPropertyValue(name)
-        || defaultView(node).getComputedStyle(node, null).getPropertyValue(name);
-  }
-
-  function propertyRemove(name) {
-    return function() {
-      delete this[name];
-    };
-  }
-
-  function propertyConstant(name, value) {
-    return function() {
-      this[name] = value;
-    };
-  }
-
-  function propertyFunction(name, value) {
-    return function() {
-      var v = value.apply(this, arguments);
-      if (v == null) delete this[name];
-      else this[name] = v;
-    };
-  }
-
-  function selection_property(name, value) {
-    return arguments.length > 1
-        ? this.each((value == null
-            ? propertyRemove : typeof value === "function"
-            ? propertyFunction
-            : propertyConstant)(name, value))
-        : this.node()[name];
-  }
-
-  function classArray(string) {
-    return string.trim().split(/^|\s+/);
-  }
-
-  function classList(node) {
-    return node.classList || new ClassList(node);
-  }
-
-  function ClassList(node) {
-    this._node = node;
-    this._names = classArray(node.getAttribute("class") || "");
-  }
-
-  ClassList.prototype = {
-    add: function(name) {
-      var i = this._names.indexOf(name);
-      if (i < 0) {
-        this._names.push(name);
-        this._node.setAttribute("class", this._names.join(" "));
-      }
-    },
-    remove: function(name) {
-      var i = this._names.indexOf(name);
-      if (i >= 0) {
-        this._names.splice(i, 1);
-        this._node.setAttribute("class", this._names.join(" "));
-      }
-    },
-    contains: function(name) {
-      return this._names.indexOf(name) >= 0;
-    }
-  };
-
-  function classedAdd(node, names) {
-    var list = classList(node), i = -1, n = names.length;
-    while (++i < n) list.add(names[i]);
-  }
-
-  function classedRemove(node, names) {
-    var list = classList(node), i = -1, n = names.length;
-    while (++i < n) list.remove(names[i]);
-  }
-
-  function classedTrue(names) {
-    return function() {
-      classedAdd(this, names);
-    };
-  }
-
-  function classedFalse(names) {
-    return function() {
-      classedRemove(this, names);
-    };
-  }
-
-  function classedFunction(names, value) {
-    return function() {
-      (value.apply(this, arguments) ? classedAdd : classedRemove)(this, names);
-    };
-  }
-
-  function selection_classed(name, value) {
-    var names = classArray(name + "");
-
-    if (arguments.length < 2) {
-      var list = classList(this.node()), i = -1, n = names.length;
-      while (++i < n) if (!list.contains(names[i])) return false;
-      return true;
-    }
-
-    return this.each((typeof value === "function"
-        ? classedFunction : value
-        ? classedTrue
-        : classedFalse)(names, value));
-  }
-
-  function textRemove() {
-    this.textContent = "";
-  }
-
-  function textConstant(value) {
-    return function() {
-      this.textContent = value;
-    };
-  }
-
-  function textFunction(value) {
-    return function() {
-      var v = value.apply(this, arguments);
-      this.textContent = v == null ? "" : v;
-    };
-  }
-
-  function selection_text(value) {
-    return arguments.length
-        ? this.each(value == null
-            ? textRemove : (typeof value === "function"
-            ? textFunction
-            : textConstant)(value))
-        : this.node().textContent;
-  }
-
-  function htmlRemove() {
-    this.innerHTML = "";
-  }
-
-  function htmlConstant(value) {
-    return function() {
-      this.innerHTML = value;
-    };
-  }
-
-  function htmlFunction(value) {
-    return function() {
-      var v = value.apply(this, arguments);
-      this.innerHTML = v == null ? "" : v;
-    };
-  }
-
-  function selection_html(value) {
-    return arguments.length
-        ? this.each(value == null
-            ? htmlRemove : (typeof value === "function"
-            ? htmlFunction
-            : htmlConstant)(value))
-        : this.node().innerHTML;
-  }
-
-  function raise() {
-    if (this.nextSibling) this.parentNode.appendChild(this);
-  }
-
-  function selection_raise() {
-    return this.each(raise);
-  }
-
-  function lower() {
-    if (this.previousSibling) this.parentNode.insertBefore(this, this.parentNode.firstChild);
-  }
-
-  function selection_lower() {
-    return this.each(lower);
-  }
-
-  function selection_append(name) {
-    var create = typeof name === "function" ? name : creator(name);
-    return this.select(function() {
-      return this.appendChild(create.apply(this, arguments));
-    });
-  }
-
-  function constantNull() {
-    return null;
-  }
-
-  function selection_insert(name, before) {
-    var create = typeof name === "function" ? name : creator(name),
-        select = before == null ? constantNull : typeof before === "function" ? before : selector(before);
-    return this.select(function() {
-      return this.insertBefore(create.apply(this, arguments), select.apply(this, arguments) || null);
-    });
-  }
-
-  function remove() {
-    var parent = this.parentNode;
-    if (parent) parent.removeChild(this);
-  }
-
-  function selection_remove() {
-    return this.each(remove);
-  }
-
-  function selection_cloneShallow() {
-    var clone = this.cloneNode(false), parent = this.parentNode;
-    return parent ? parent.insertBefore(clone, this.nextSibling) : clone;
-  }
-
-  function selection_cloneDeep() {
-    var clone = this.cloneNode(true), parent = this.parentNode;
-    return parent ? parent.insertBefore(clone, this.nextSibling) : clone;
-  }
-
-  function selection_clone(deep) {
-    return this.select(deep ? selection_cloneDeep : selection_cloneShallow);
-  }
-
-  function selection_datum(value) {
-    return arguments.length
-        ? this.property("__data__", value)
-        : this.node().__data__;
-  }
-
-  var filterEvents = {};
-
-  var event = null;
-
-  if (typeof document !== "undefined") {
-    var element = document.documentElement;
-    if (!("onmouseenter" in element)) {
-      filterEvents = {mouseenter: "mouseover", mouseleave: "mouseout"};
-    }
-  }
-
-  function filterContextListener(listener, index, group) {
-    listener = contextListener(listener, index, group);
-    return function(event) {
-      var related = event.relatedTarget;
-      if (!related || (related !== this && !(related.compareDocumentPosition(this) & 8))) {
-        listener.call(this, event);
-      }
-    };
-  }
-
-  function contextListener(listener, index, group) {
-    return function(event1) {
-      var event0 = event; // Events can be reentrant (e.g., focus).
-      event = event1;
-      try {
-        listener.call(this, this.__data__, index, group);
-      } finally {
-        event = event0;
-      }
-    };
-  }
-
-  function parseTypenames$1(typenames) {
-    return typenames.trim().split(/^|\s+/).map(function(t) {
-      var name = "", i = t.indexOf(".");
-      if (i >= 0) name = t.slice(i + 1), t = t.slice(0, i);
-      return {type: t, name: name};
-    });
-  }
-
-  function onRemove(typename) {
-    return function() {
-      var on = this.__on;
-      if (!on) return;
-      for (var j = 0, i = -1, m = on.length, o; j < m; ++j) {
-        if (o = on[j], (!typename.type || o.type === typename.type) && o.name === typename.name) {
-          this.removeEventListener(o.type, o.listener, o.capture);
-        } else {
-          on[++i] = o;
-        }
-      }
-      if (++i) on.length = i;
-      else delete this.__on;
-    };
-  }
-
-  function onAdd(typename, value, capture) {
-    var wrap = filterEvents.hasOwnProperty(typename.type) ? filterContextListener : contextListener;
-    return function(d, i, group) {
-      var on = this.__on, o, listener = wrap(value, i, group);
-      if (on) for (var j = 0, m = on.length; j < m; ++j) {
-        if ((o = on[j]).type === typename.type && o.name === typename.name) {
-          this.removeEventListener(o.type, o.listener, o.capture);
-          this.addEventListener(o.type, o.listener = listener, o.capture = capture);
-          o.value = value;
-          return;
-        }
-      }
-      this.addEventListener(typename.type, listener, capture);
-      o = {type: typename.type, name: typename.name, value: value, listener: listener, capture: capture};
-      if (!on) this.__on = [o];
-      else on.push(o);
-    };
-  }
-
-  function selection_on(typename, value, capture) {
-    var typenames = parseTypenames$1(typename + ""), i, n = typenames.length, t;
-
-    if (arguments.length < 2) {
-      var on = this.node().__on;
-      if (on) for (var j = 0, m = on.length, o; j < m; ++j) {
-        for (i = 0, o = on[j]; i < n; ++i) {
-          if ((t = typenames[i]).type === o.type && t.name === o.name) {
-            return o.value;
-          }
-        }
-      }
-      return;
-    }
-
-    on = value ? onAdd : onRemove;
-    if (capture == null) capture = false;
-    for (i = 0; i < n; ++i) this.each(on(typenames[i], value, capture));
-    return this;
-  }
-
-  function customEvent(event1, listener, that, args) {
-    var event0 = event;
-    event1.sourceEvent = event;
-    event = event1;
-    try {
-      return listener.apply(that, args);
-    } finally {
-      event = event0;
-    }
-  }
-
-  function dispatchEvent(node, type, params) {
-    var window = defaultView(node),
-        event = window.CustomEvent;
-
-    if (typeof event === "function") {
-      event = new event(type, params);
-    } else {
-      event = window.document.createEvent("Event");
-      if (params) event.initEvent(type, params.bubbles, params.cancelable), event.detail = params.detail;
-      else event.initEvent(type, false, false);
-    }
-
-    node.dispatchEvent(event);
-  }
-
-  function dispatchConstant(type, params) {
-    return function() {
-      return dispatchEvent(this, type, params);
-    };
-  }
-
-  function dispatchFunction(type, params) {
-    return function() {
-      return dispatchEvent(this, type, params.apply(this, arguments));
-    };
-  }
-
-  function selection_dispatch(type, params) {
-    return this.each((typeof params === "function"
-        ? dispatchFunction
-        : dispatchConstant)(type, params));
-  }
-
-  var root = [null];
-
-  function Selection(groups, parents) {
-    this._groups = groups;
-    this._parents = parents;
-  }
-
-  function selection() {
-    return new Selection([[document.documentElement]], root);
-  }
-
-  Selection.prototype = selection.prototype = {
-    constructor: Selection,
-    select: selection_select,
-    selectAll: selection_selectAll,
-    filter: selection_filter,
-    data: selection_data,
-    enter: selection_enter,
-    exit: selection_exit,
-    join: selection_join,
-    merge: selection_merge,
-    order: selection_order,
-    sort: selection_sort,
-    call: selection_call,
-    nodes: selection_nodes,
-    node: selection_node,
-    size: selection_size,
-    empty: selection_empty,
-    each: selection_each,
-    attr: selection_attr,
-    style: selection_style,
-    property: selection_property,
-    classed: selection_classed,
-    text: selection_text,
-    html: selection_html,
-    raise: selection_raise,
-    lower: selection_lower,
-    append: selection_append,
-    insert: selection_insert,
-    remove: selection_remove,
-    clone: selection_clone,
-    datum: selection_datum,
-    on: selection_on,
-    dispatch: selection_dispatch
-  };
-
-  function select(selector) {
-    return typeof selector === "string"
-        ? new Selection([[document.querySelector(selector)]], [document.documentElement])
-        : new Selection([[selector]], root);
-  }
-
-  function sourceEvent() {
-    var current = event, source;
-    while (source = current.sourceEvent) current = source;
-    return current;
-  }
-
-  function point(node, event) {
-    var svg = node.ownerSVGElement || node;
-
-    if (svg.createSVGPoint) {
-      var point = svg.createSVGPoint();
-      point.x = event.clientX, point.y = event.clientY;
-      point = point.matrixTransform(node.getScreenCTM().inverse());
-      return [point.x, point.y];
-    }
-
-    var rect = node.getBoundingClientRect();
-    return [event.clientX - rect.left - node.clientLeft, event.clientY - rect.top - node.clientTop];
-  }
-
-  function mouse(node) {
-    var event = sourceEvent();
-    if (event.changedTouches) event = event.changedTouches[0];
-    return point(node, event);
-  }
-
-  function touch(node, touches, identifier) {
-    if (arguments.length < 3) identifier = touches, touches = sourceEvent().changedTouches;
-
-    for (var i = 0, n = touches ? touches.length : 0, touch; i < n; ++i) {
-      if ((touch = touches[i]).identifier === identifier) {
-        return point(node, touch);
-      }
-    }
-
-    return null;
-  }
-
-  function nopropagation() {
-    event.stopImmediatePropagation();
-  }
-
-  function noevent() {
-    event.preventDefault();
-    event.stopImmediatePropagation();
-  }
-
-  function nodrag(view) {
-    var root = view.document.documentElement,
-        selection = select(view).on("dragstart.drag", noevent, true);
-    if ("onselectstart" in root) {
-      selection.on("selectstart.drag", noevent, true);
-    } else {
-      root.__noselect = root.style.MozUserSelect;
-      root.style.MozUserSelect = "none";
-    }
-  }
-
-  function yesdrag(view, noclick) {
-    var root = view.document.documentElement,
-        selection = select(view).on("dragstart.drag", null);
-    if (noclick) {
-      selection.on("click.drag", noevent, true);
-      setTimeout(function() { selection.on("click.drag", null); }, 0);
-    }
-    if ("onselectstart" in root) {
-      selection.on("selectstart.drag", null);
-    } else {
-      root.style.MozUserSelect = root.__noselect;
-      delete root.__noselect;
-    }
-  }
-
-  function constant$3(x) {
-    return function() {
-      return x;
-    };
-  }
-
-  function DragEvent(target, type, subject, id, active, x, y, dx, dy, dispatch) {
-    this.target = target;
-    this.type = type;
-    this.subject = subject;
-    this.identifier = id;
-    this.active = active;
-    this.x = x;
-    this.y = y;
-    this.dx = dx;
-    this.dy = dy;
-    this._ = dispatch;
-  }
-
-  DragEvent.prototype.on = function() {
-    var value = this._.on.apply(this._, arguments);
-    return value === this._ ? this : value;
-  };
-
-  // Ignore right-click, since that should open the context menu.
-  function defaultFilter() {
-    return !event.ctrlKey && !event.button;
-  }
-
-  function defaultContainer() {
-    return this.parentNode;
-  }
-
-  function defaultSubject(d) {
-    return d == null ? {x: event.x, y: event.y} : d;
-  }
-
-  function defaultTouchable() {
-    return navigator.maxTouchPoints || ("ontouchstart" in this);
-  }
-
-  function drag() {
-    var filter = defaultFilter,
-        container = defaultContainer,
-        subject = defaultSubject,
-        touchable = defaultTouchable,
-        gestures = {},
-        listeners = dispatch("start", "drag", "end"),
-        active = 0,
-        mousedownx,
-        mousedowny,
-        mousemoving,
-        touchending,
-        clickDistance2 = 0;
-
-    function drag(selection) {
-      selection
-          .on("mousedown.drag", mousedowned)
-        .filter(touchable)
-          .on("touchstart.drag", touchstarted)
-          .on("touchmove.drag", touchmoved)
-          .on("touchend.drag touchcancel.drag", touchended)
-          .style("touch-action", "none")
-          .style("-webkit-tap-highlight-color", "rgba(0,0,0,0)");
-    }
-
-    function mousedowned() {
-      if (touchending || !filter.apply(this, arguments)) return;
-      var gesture = beforestart("mouse", container.apply(this, arguments), mouse, this, arguments);
-      if (!gesture) return;
-      select(event.view).on("mousemove.drag", mousemoved, true).on("mouseup.drag", mouseupped, true);
-      nodrag(event.view);
-      nopropagation();
-      mousemoving = false;
-      mousedownx = event.clientX;
-      mousedowny = event.clientY;
-      gesture("start");
-    }
-
-    function mousemoved() {
-      noevent();
-      if (!mousemoving) {
-        var dx = event.clientX - mousedownx, dy = event.clientY - mousedowny;
-        mousemoving = dx * dx + dy * dy > clickDistance2;
-      }
-      gestures.mouse("drag");
-    }
-
-    function mouseupped() {
-      select(event.view).on("mousemove.drag mouseup.drag", null);
-      yesdrag(event.view, mousemoving);
-      noevent();
-      gestures.mouse("end");
-    }
-
-    function touchstarted() {
-      if (!filter.apply(this, arguments)) return;
-      var touches = event.changedTouches,
-          c = container.apply(this, arguments),
-          n = touches.length, i, gesture;
-
-      for (i = 0; i < n; ++i) {
-        if (gesture = beforestart(touches[i].identifier, c, touch, this, arguments)) {
-          nopropagation();
-          gesture("start");
-        }
-      }
-    }
-
-    function touchmoved() {
-      var touches = event.changedTouches,
-          n = touches.length, i, gesture;
-
-      for (i = 0; i < n; ++i) {
-        if (gesture = gestures[touches[i].identifier]) {
-          noevent();
-          gesture("drag");
-        }
-      }
-    }
-
-    function touchended() {
-      var touches = event.changedTouches,
-          n = touches.length, i, gesture;
-
-      if (touchending) clearTimeout(touchending);
-      touchending = setTimeout(function() { touchending = null; }, 500); // Ghost clicks are delayed!
-      for (i = 0; i < n; ++i) {
-        if (gesture = gestures[touches[i].identifier]) {
-          nopropagation();
-          gesture("end");
-        }
-      }
-    }
-
-    function beforestart(id, container, point, that, args) {
-      var p = point(container, id), s, dx, dy,
-          sublisteners = listeners.copy();
-
-      if (!customEvent(new DragEvent(drag, "beforestart", s, id, active, p[0], p[1], 0, 0, sublisteners), function() {
-        if ((event.subject = s = subject.apply(that, args)) == null) return false;
-        dx = s.x - p[0] || 0;
-        dy = s.y - p[1] || 0;
-        return true;
-      })) return;
-
-      return function gesture(type) {
-        var p0 = p, n;
-        switch (type) {
-          case "start": gestures[id] = gesture, n = active++; break;
-          case "end": delete gestures[id], --active; // nobreak
-          case "drag": p = point(container, id), n = active; break;
-        }
-        customEvent(new DragEvent(drag, type, s, id, n, p[0] + dx, p[1] + dy, p[0] - p0[0], p[1] - p0[1], sublisteners), sublisteners.apply, sublisteners, [type, that, args]);
-      };
-    }
-
-    drag.filter = function(_) {
-      return arguments.length ? (filter = typeof _ === "function" ? _ : constant$3(!!_), drag) : filter;
-    };
-
-    drag.container = function(_) {
-      return arguments.length ? (container = typeof _ === "function" ? _ : constant$3(_), drag) : container;
-    };
-
-    drag.subject = function(_) {
-      return arguments.length ? (subject = typeof _ === "function" ? _ : constant$3(_), drag) : subject;
-    };
-
-    drag.touchable = function(_) {
-      return arguments.length ? (touchable = typeof _ === "function" ? _ : constant$3(!!_), drag) : touchable;
-    };
-
-    drag.on = function() {
-      var value = listeners.on.apply(listeners, arguments);
-      return value === listeners ? drag : value;
-    };
-
-    drag.clickDistance = function(_) {
-      return arguments.length ? (clickDistance2 = (_ = +_) * _, drag) : Math.sqrt(clickDistance2);
-    };
-
-    return drag;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$a = Template('d-slider', `
-<style>
-  :host {
-    position: relative;
-    display: inline-block;
-  }
-
-  :host(:focus) {
-    outline: none;
-  }
-
-  .background {
-    padding: 9px 0;
-    color: white;
-    position: relative;
-  }
-
-  .track {
-    height: 3px;
-    width: 100%;
-    border-radius: 2px;
-    background-color: hsla(0, 0%, 0%, 0.2);
-  }
-
-  .track-fill {
-    position: absolute;
-    top: 9px;
-    height: 3px;
-    border-radius: 4px;
-    background-color: hsl(24, 100%, 50%);
-  }
-
-  .knob-container {
-    position: absolute;
-    top: 10px;
-  }
-
-  .knob {
-    position: absolute;
-    top: -6px;
-    left: -6px;
-    width: 13px;
-    height: 13px;
-    background-color: hsl(24, 100%, 50%);
-    border-radius: 50%;
-    transition-property: transform;
-    transition-duration: 0.18s;
-    transition-timing-function: ease;
-  }
-  .mousedown .knob {
-    transform: scale(1.5);
-  }
-
-  .knob-highlight {
-    position: absolute;
-    top: -6px;
-    left: -6px;
-    width: 13px;
-    height: 13px;
-    background-color: hsla(0, 0%, 0%, 0.1);
-    border-radius: 50%;
-    transition-property: transform;
-    transition-duration: 0.18s;
-    transition-timing-function: ease;
-  }
-
-  .focus .knob-highlight {
-    transform: scale(2);
-  }
-
-  .ticks {
-    position: absolute;
-    top: 16px;
-    height: 4px;
-    width: 100%;
-    z-index: -1;
-  }
-
-  .ticks .tick {
-    position: absolute;
-    height: 100%;
-    border-left: 1px solid hsla(0, 0%, 0%, 0.2);
-  }
-
-</style>
-
-  <div class='background'>
-    <div class='track'></div>
-    <div class='track-fill'></div>
-    <div class='knob-container'>
-      <div class='knob-highlight'></div>
-      <div class='knob'></div>
-    </div>
-    <div class='ticks'></div>
-  </div>
-`);
-
-  // ARIA
-  // If the slider has a visible label, it is referenced by aria-labelledby on the slider element. Otherwise, the slider element has a label provided by aria-label.
-  // If the slider is vertically oriented, it has aria-orientation set to vertical. The default value of aria-orientation for a slider is horizontal.
-
-  const keyCodes = {
-    left: 37,
-    up: 38,
-    right: 39,
-    down: 40,
-    pageUp: 33,
-    pageDown: 34,
-    end: 35,
-    home: 36
-  };
-
-  class Slider extends T$a(HTMLElement) {
-
-
-    connectedCallback() {
-      this.connected = true;
-      this.setAttribute('role', 'slider');
-      // Makes the element tab-able.
-      if (!this.hasAttribute('tabindex')) { this.setAttribute('tabindex', 0); }
-
-      // Keeps track of keyboard vs. mouse interactions for focus rings
-      this.mouseEvent = false;
-
-      // Handles to shadow DOM elements
-      this.knob = this.root.querySelector('.knob-container');
-      this.background = this.root.querySelector('.background');
-      this.trackFill = this.root.querySelector('.track-fill');
-      this.track = this.root.querySelector('.track');
-
-      // Default values for attributes
-      this.min = this.min ? this.min : 0;
-      this.max = this.max ? this.max : 100;
-      this.scale = linear$1().domain([this.min, this.max]).range([0, 1]).clamp(true);
-
-      this.origin = this.origin !== undefined ? this.origin : this.min;
-      this.step = this.step ? this.step : 1;
-      this.update(this.value ? this.value : 0);
-
-      this.ticks = this.ticks ? this.ticks : false;
-      this.renderTicks();
-
-      this.drag = drag()
-        .container(this.background)
-        .on('start', () => {
-          this.mouseEvent = true;
-          this.background.classList.add('mousedown');
-          this.changeValue = this.value;
-          this.dragUpdate();
-        })
-        .on('drag', () => {
-          this.dragUpdate();
-        })
-        .on('end', () => {
-          this.mouseEvent = false;
-          this.background.classList.remove('mousedown');
-          this.dragUpdate();
-          if (this.changeValue !== this.value) this.dispatchChange();
-          this.changeValue = this.value;
-        });
-      this.drag(select(this.background));
-
-      this.addEventListener('focusin', () => {
-        if(!this.mouseEvent) {
-          this.background.classList.add('focus');
-        }
-      });
-      this.addEventListener('focusout', () => {
-        this.background.classList.remove('focus');
-      });
-      this.addEventListener('keydown', this.onKeyDown);
-
-    }
-
-    static get observedAttributes() {return ['min', 'max', 'value', 'step', 'ticks', 'origin', 'tickValues', 'tickLabels']; }
-
-    attributeChangedCallback(attr, oldValue, newValue) {
-      if (isNaN(newValue) || newValue === undefined || newValue === null) return;
-      if (attr == 'min') {
-        this.min = +newValue;
-        this.setAttribute('aria-valuemin', this.min);
-      }
-      if (attr == 'max') {
-        this.max = +newValue;
-        this.setAttribute('aria-valuemax', this.max);
-      }
-      if (attr == 'value') {
-        this.update(+newValue);
-      }
-      if (attr == 'origin') {
-        this.origin = +newValue;
-        // this.update(this.value);
-      }
-      if (attr == 'step') {
-        if (newValue > 0) {
-          this.step = +newValue;
-        }
-      }
-      if (attr == 'ticks') {
-        this.ticks = (newValue === '' ? true : newValue);
-      }
-    }
-
-    onKeyDown(event) {
-      this.changeValue = this.value;
-      let stopPropagation = false;
-      switch (event.keyCode) {
-      case keyCodes.left:
-      case keyCodes.down:
-        this.update(this.value - this.step);
-        stopPropagation = true;
-        break;
-      case keyCodes.right:
-      case keyCodes.up:
-        this.update(this.value + this.step);
-        stopPropagation = true;
-        break;
-      case keyCodes.pageUp:
-        this.update(this.value + this.step * 10);
-        stopPropagation = true;
-        break;
-
-      case keyCodes.pageDown:
-        this.update(this.value + this.step * 10);
-        stopPropagation = true;
-        break;
-      case keyCodes.home:
-        this.update(this.min);
-        stopPropagation = true;
-        break;
-      case keyCodes.end:
-        this.update(this.max);
-        stopPropagation = true;
-        break;
-      }
-      if (stopPropagation) {
-        this.background.classList.add('focus');
-        event.preventDefault();
-        event.stopPropagation();
-        if (this.changeValue !== this.value) this.dispatchChange();
-      }
-    }
-
-    validateValueRange(min, max, value) {
-      return Math.max(Math.min(max, value), min);
-    }
-
-    quantizeValue(value, step) {
-      return Math.round(value / step) * step;
-    }
-
-    dragUpdate() {
-      const bbox = this.background.getBoundingClientRect();
-      const x = event.x;
-      const width = bbox.width;
-      this.update(this.scale.invert(x / width));
-    }
-
-    update(value) {
-      let v = value;
-      if (this.step !== 'any') {
-        v = this.quantizeValue(value, this.step);
-      }
-      v = this.validateValueRange(this.min, this.max, v);
-      if (this.connected) {
-        this.knob.style.left = this.scale(v) * 100 + '%';
-        this.trackFill.style.width = this.scale(this.min + Math.abs(v - this.origin)) * 100 + '%';
-        this.trackFill.style.left = this.scale(Math.min(v, this.origin)) * 100 + '%';
-      }
-      if (this.value !== v) {
-        this.value = v;
-        this.setAttribute('aria-valuenow', this.value);
-        this.dispatchInput();
-      }
-    }
-
-    // Dispatches only on a committed change (basically only on mouseup).
-    dispatchChange() {
-      const e = new Event('change');
-      this.dispatchEvent(e, {});
-    }
-
-    // Dispatches on each value change.
-    dispatchInput() {
-      const e = new Event('input');
-      this.dispatchEvent(e, {});
-    }
-
-    renderTicks() {
-      const ticksContainer = this.root.querySelector('.ticks');
-      if (this.ticks !== false) {
-        let tickData = [];
-        if (this.ticks > 0) {
-          tickData = this.scale.ticks(this.ticks);
-        } else if (this.step === 'any') {
-          tickData = this.scale.ticks();
-        } else {
-          tickData = range(this.min, this.max + 1e-6, this.step);
-        }
-        tickData.forEach(d => {
-          const tick = document.createElement('div');
-          tick.classList.add('tick');
-          tick.style.left = this.scale(d) * 100 + '%';
-          ticksContainer.appendChild(tick);
-        });
-      } else {
-        ticksContainer.style.display = 'none';
-      }
-    }
-  }
-
-  var logo = "<svg viewBox=\"-607 419 64 64\">\n  <path d=\"M-573.4,478.9c-8,0-14.6-6.4-14.6-14.5s14.6-25.9,14.6-40.8c0,14.9,14.6,32.8,14.6,40.8S-565.4,478.9-573.4,478.9z\"/>\n</svg>\n";
-
-  const headerTemplate = `
-<style>
-distill-header {
-  position: relative;
-  height: 60px;
-  background-color: hsl(200, 60%, 15%);
-  width: 100%;
-  box-sizing: border-box;
-  z-index: 2;
-  color: rgba(0, 0, 0, 0.8);
-  border-bottom: 1px solid rgba(0, 0, 0, 0.08);
-  box-shadow: 0 1px 6px rgba(0, 0, 0, 0.05);
-}
-distill-header .content {
-  height: 70px;
-  grid-column: page;
-}
-distill-header a {
-  font-size: 16px;
-  height: 60px;
-  line-height: 60px;
-  text-decoration: none;
-  color: rgba(255, 255, 255, 0.8);
-  padding: 22px 0;
-}
-distill-header a:hover {
-  color: rgba(255, 255, 255, 1);
-}
-distill-header svg {
-  width: 24px;
-  position: relative;
-  top: 4px;
-  margin-right: 2px;
-}
-@media(min-width: 1080px) {
-  distill-header {
-    height: 70px;
-  }
-  distill-header a {
-    height: 70px;
-    line-height: 70px;
-    padding: 28px 0;
-  }
-  distill-header .logo {
-  }
-}
-distill-header svg path {
-  fill: none;
-  stroke: rgba(255, 255, 255, 0.8);
-  stroke-width: 3px;
-}
-distill-header .logo {
-  font-size: 17px;
-  font-weight: 200;
-}
-distill-header .nav {
-  float: right;
-  font-weight: 300;
-}
-distill-header .nav a {
-  font-size: 12px;
-  margin-left: 24px;
-  text-transform: uppercase;
-}
-</style>
-<div class="content">
-  <a href="/" class="logo">
-    ${logo}
-    Distill
-  </a>
-  <nav class="nav">
-    <a href="/about/">About</a>
-    <a href="/prize/">Prize</a>
-    <a href="/journal/">Submit</a>
-  </nav>
-</div>
-`;
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$b = Template('distill-header', headerTemplate, false);
-
-  class DistillHeader extends T$b(HTMLElement) {
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const styles$2 = `
-<style>
-  distill-appendix {
-    contain: layout style;
-  }
-
-  distill-appendix .citation {
-    font-size: 11px;
-    line-height: 15px;
-    border-left: 1px solid rgba(0, 0, 0, 0.1);
-    padding-left: 18px;
-    border: 1px solid rgba(0,0,0,0.1);
-    background: rgba(0, 0, 0, 0.02);
-    padding: 10px 18px;
-    border-radius: 3px;
-    color: rgba(150, 150, 150, 1);
-    overflow: hidden;
-    margin-top: -12px;
-    white-space: pre-wrap;
-    word-wrap: break-word;
-  }
-
-  distill-appendix > * {
-    grid-column: text;
-  }
-</style>
-`;
-
-  function appendixTemplate(frontMatter) {
-    let html = styles$2;
-
-    if (typeof frontMatter.githubUrl !== 'undefined') {
-      html += `
-    <h3 id="updates-and-corrections">Updates and Corrections</h3>
-    <p>`;
-      if (frontMatter.githubCompareUpdatesUrl) {
-        html += `<a href="${frontMatter.githubCompareUpdatesUrl}">View all changes</a> to this article since it was first published.`;
-      }
-      html += `
-    If you see mistakes or want to suggest changes, please <a href="${frontMatter.githubUrl + '/issues/new'}">create an issue on GitHub</a>. </p>
-    `;
-    }
-
-    const journal = frontMatter.journal;
-    if (typeof journal !== 'undefined' && journal.title === 'Distill') {
-      html += `
-    <h3 id="reuse">Reuse</h3>
-    <p>Diagrams and text are licensed under Creative Commons Attribution <a href="https://creativecommons.org/licenses/by/4.0/">CC-BY 4.0</a> with the <a class="github" href="${frontMatter.githubUrl}">source available on GitHub</a>, unless noted otherwise. The figures that have been reused from other sources don’t fall under this license and can be recognized by a note in their caption: “Figure from …”.</p>
-    `;
-    }
-
-    if (typeof frontMatter.publishedDate !== 'undefined') {
-      html += `
-    <h3 id="citation">Citation</h3>
-    <p>For attribution in academic contexts, please cite this work as</p>
-    <pre class="citation short">${frontMatter.concatenatedAuthors}, "${frontMatter.title}", Distill, ${frontMatter.publishedYear}.</pre>
-    <p>BibTeX citation</p>
-    <pre class="citation long">${serializeFrontmatterToBibtex(frontMatter)}</pre>
-    `;
-    }
-
-    return html;
-  }
-
-  class DistillAppendix extends HTMLElement {
-
-    static get is() { return 'distill-appendix'; }
-
-    set frontMatter(frontMatter) {
-      this.innerHTML = appendixTemplate(frontMatter);
-    }
-
-  }
-
-  const footerTemplate = `
-<style>
-
-:host {
-  color: rgba(255, 255, 255, 0.5);
-  font-weight: 300;
-  padding: 2rem 0;
-  border-top: 1px solid rgba(0, 0, 0, 0.1);
-  background-color: hsl(180, 5%, 15%); /*hsl(200, 60%, 15%);*/
-  text-align: left;
-  contain: content;
-}
-
-.footer-container .logo svg {
-  width: 24px;
-  position: relative;
-  top: 4px;
-  margin-right: 2px;
-}
-
-.footer-container .logo svg path {
-  fill: none;
-  stroke: rgba(255, 255, 255, 0.8);
-  stroke-width: 3px;
-}
-
-.footer-container .logo {
-  font-size: 17px;
-  font-weight: 200;
-  color: rgba(255, 255, 255, 0.8);
-  text-decoration: none;
-  margin-right: 6px;
-}
-
-.footer-container {
-  grid-column: text;
-}
-
-.footer-container .nav {
-  font-size: 0.9em;
-  margin-top: 1.5em;
-}
-
-.footer-container .nav a {
-  color: rgba(255, 255, 255, 0.8);
-  margin-right: 6px;
-  text-decoration: none;
-}
-
-</style>
-
-<div class='footer-container'>
-
-  <a href="/" class="logo">
-    ${logo}
-    Distill
-  </a> is dedicated to clear explanations of machine learning
-
-  <div class="nav">
-    <a href="https://distill.pub/about/">About</a>
-    <a href="https://distill.pub/journal/">Submit</a>
-    <a href="https://distill.pub/prize/">Prize</a>
-    <a href="https://distill.pub/archive/">Archive</a>
-    <a href="https://distill.pub/rss.xml">RSS</a>
-    <a href="https://github.com/distillpub">GitHub</a>
-    <a href="https://twitter.com/distillpub">Twitter</a>
-    &nbsp;&nbsp;&nbsp;&nbsp; ISSN 2476-0757
-  </div>
-
-</div>
-
-`;
-
-  // Copyright 2018 The Distill Template Authors
-
-  const T$c = Template('distill-footer', footerTemplate);
-
-  class DistillFooter extends T$c(HTMLElement) {
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  let templateIsLoading = false;
-  let runlevel = 0;
-  const initialize = function() {
-    if (window.distill.runlevel < 1) {
-      throw new Error("Insufficient Runlevel for Distill Template!");
-    }
-
-    /* 1. Flag that we're being loaded */
-    if ("distill" in window && window.distill.templateIsLoading) {
-      throw new Error(
-        "Runlevel 1: Distill Template is getting loaded more than once, aborting!"
-      );
-    } else {
-      window.distill.templateIsLoading = true;
-      console.debug("Runlevel 1: Distill Template has started loading.");
-    }
-
-    /* 2. Add styles if they weren't added during prerendering */
-    makeStyleTag(document);
-    console.debug("Runlevel 1: Static Distill styles have been added.");
-    console.debug("Runlevel 1->2.");
-    window.distill.runlevel += 1;
-
-    /* 3. Register Controller listener functions */
-    /* Needs to happen before components to their connected callbacks have a controller to talk to. */
-    for (const [functionName, callback] of Object.entries(Controller.listeners)) {
-      if (typeof callback === "function") {
-        document.addEventListener(functionName, callback);
-      } else {
-        console.error("Runlevel 2: Controller listeners need to be functions!");
-      }
-    }
-    console.debug("Runlevel 2: We can now listen to controller events.");
-    console.debug("Runlevel 2->3.");
-    window.distill.runlevel += 1;
-
-    /* 4. Register components */
-    const components = [
-      Abstract, Appendix, Article, Bibliography, Byline, Cite, CitationList, Code,
-      Footnote, FootnoteList, FrontMatter$1, HoverBox, Title, DMath, References, TOC, Figure,
-      Slider, Interstitial
-    ];
-
-    const distillComponents = [DistillHeader, DistillAppendix, DistillFooter];
-
-    if (window.distill.runlevel < 2) {
-      throw new Error("Insufficient Runlevel for adding custom elements!");
-    }
-    const allComponents = components.concat(distillComponents);
-    for (const component of allComponents) {
-      console.debug("Runlevel 2: Registering custom element: " + component.is);
-      customElements.define(component.is, component);
-    }
-
-    console.debug(
-      "Runlevel 3: Distill Template finished registering custom elements."
-    );
-    console.debug("Runlevel 3->4.");
-    window.distill.runlevel += 1;
-
-    // If template was added after DOMContentLoaded we may have missed that event.
-    // Controller will check for that case, so trigger the event explicitly:
-    if (domContentLoaded()) {
-      Controller.listeners.DOMContentLoaded();
-    }
-
-    console.debug("Runlevel 4: Distill Template initialisation complete.");
-    window.distill.templateIsLoading = false;
-    window.distill.templateHasLoaded = true;
-  };
-
-  window.distill = { runlevel, initialize, templateIsLoading };
-
-  /* 0. Check browser feature support; synchronously polyfill if needed */
-  if (Polyfills.browserSupportsAllFeatures()) {
-    console.debug("Runlevel 0: No need for polyfills.");
-    console.debug("Runlevel 0->1.");
-    window.distill.runlevel += 1;
-    window.distill.initialize();
-  } else {
-    console.debug("Runlevel 0: Distill Template is loading polyfills.");
-    Polyfills.load(window.distill.initialize);
-  }
-
-})));
-//# sourceMappingURL=template.v2.js.map
+!function(n){"function"==typeof define&&define.amd?define(n):n()}(function(){"use strict";
+// Copyright 2018 The Distill Template Authors
+function n(n,t){n.title=t.title,t.published&&(t.published instanceof Date?n.publishedDate=t.published:t.published.constructor===String&&(n.publishedDate=new Date(t.published))),t.publishedDate&&(t.publishedDate instanceof Date?n.publishedDate=t.publishedDate:t.publishedDate.constructor===String?n.publishedDate=new Date(t.publishedDate):console.error("Don't know what to do with published date: "+t.publishedDate)),n.description=t.description,n.authors=t.authors.map(n=>new Nr(n)),n.katex=t.katex,n.password=t.password,t.doi&&(n.doi=t.doi)}
+// Copyright 2018 The Distill Template Authors
+function t(n=document){const t=new Set,e=n.querySelectorAll("d-cite");for(const n of e){const e=(n.getAttribute("key")||n.getAttribute("bibtex-key")).split(",").map(n=>n.trim());for(const n of e)t.add(n)}return[...t]}function e(n,t,e,i){if(null==n.author)return"";var r=n.author.split(" and ");let o=r.map(n=>{if(-1!=(n=n.trim()).indexOf(","))var e=n.split(",")[0].trim(),i=n.split(",")[1];else if(-1!=n.indexOf(" "))e=n.split(" ").slice(-1)[0].trim(),i=n.split(" ").slice(0,-1).join(" ");else e=n.trim();var r="";return i!=undefined&&(r=(r=i.trim().split(" ").map(n=>n.trim()[0])).join(".")+"."),t.replace("${F}",i).replace("${L}",e).replace("${I}",r).trim()});if(r.length>1){var a=o.slice(0,r.length-1).join(e);return a+=(i||e)+o[r.length-1]}return o[0]}function i(n){var t=n.journal||n.booktitle||"";if("volume"in n){var e=n.issue||n.number;e=e!=undefined?"("+e+")":"",t+=", Vol "+n.volume+e}return"pages"in n&&(t+=", pp. "+n.pages),""!=t&&(t+=". "),"publisher"in n&&"."!=(t+=n.publisher)[t.length-1]&&(t+="."),t}function r(n){if("url"in n){var t=n.url,e=/arxiv\.org\/abs\/([0-9\.]*)/.exec(t);if(null!=e&&(t=`http://arxiv.org/pdf/${e[1]}.pdf`),".pdf"==t.slice(-4))var i="PDF";else if(".html"==t.slice(-5))i="HTML";return` &ensp;<a href="${t}">[${i||"link"}]</a>`}return""}function o(n,t){return"doi"in n?`${t?"<br>":""} <a href="https://doi.org/${n.doi}" style="text-decoration:inherit;">DOI: ${n.doi}</a>`:""}function a(n){return'<span class="title">'+n.title+"</span> "}function s(n){if(n){var t=a(n);return t+=r(n)+"<br>",n.author&&(t+=e(n,"${L}, ${I}",", "," and "),(n.year||n.date)&&(t+=", ")),n.year||n.date?t+=(n.year||n.date)+". ":t+=". ",t+=i(n),t+=o(n)}return"?"}function l(n){if(n){var t="";t+="<strong>"+n.title+"</strong>",t+=r(n),t+="<br>";var a=e(n,"${I} ${L}",", ")+".",s=i(n).trim()+" "+n.year+". "+o(n,!0);return(a+s).length<Math.min(40,n.title.length)?t+=a+" "+s:t+=a+"<br>"+s,t}return"?"}function u(){return-1!==["interactive","complete"].indexOf(document.readyState)}
+// Copyright 2018 The Distill Template Authors
+function c(n){for(let t of n.authors){const n=Boolean(t.affiliation),e=Boolean(t.affiliations);if(n)if(e)console.warn(`Author ${t.author} has both old-style ("affiliation" & "affiliationURL") and new style ("affiliations") affiliation information!`);else{let n={name:t.affiliation};t.affiliationURL&&(n.url=t.affiliationURL),t.affiliations=[n]}}return n}function d(n){const t=n.firstElementChild;if(t){if("json"==t.getAttribute("type").split("/")[1]){const n=t.textContent;return c(JSON.parse(n))}console.error("Distill only supports JSON frontmatter tags anymore; no more YAML.")}else console.error("You added a frontmatter tag but did not provide a script tag with front matter data in it. Please take a look at our templates.");return{}}
+// Copyright 2018 The Distill Template Authors
+function h(n,t){const e=n.body,i=e.querySelector("d-article");if(!i)return void console.warn("No d-article tag found; skipping adding optional components!");let r=n.querySelector("d-byline");r||(t.authors?(r=n.createElement("d-byline"),e.insertBefore(r,i)):console.warn("No authors found in front matter; please add them before submission!"));let o=n.querySelector("d-title");o||(o=n.createElement("d-title"),e.insertBefore(o,r));let a=o.querySelector("h1");a||((a=n.createElement("h1")).textContent=t.title,o.insertBefore(a,o.firstChild));const s="undefined"!=typeof t.password;let l=e.querySelector("d-interstitial");if(s&&!l){const i="undefined"!=typeof window,r=i&&window.location.hostname.includes("localhost");i&&r||((l=n.createElement("d-interstitial")).password=t.password,e.insertBefore(l,e.firstChild))}else!s&&l&&l.parentElement.removeChild(this);let u=n.querySelector("d-appendix");u||(u=n.createElement("d-appendix"),n.body.appendChild(u));let c=n.querySelector("d-footnote-list");c||(c=n.createElement("d-footnote-list"),u.appendChild(c));let d=n.querySelector("d-citation-list");d||(d=n.createElement("d-citation-list"),u.appendChild(d))}
+// Copyright 2018 The Distill Template Authors
+function p(n){const t="distill-prerendered-styles";if(!n.getElementById(t)){const e=n.createElement("style");e.id=t,e.type="text/css";const i=n.createTextNode(Kr);e.appendChild(i);const r=n.head.querySelector("script");n.head.insertBefore(e,r)}}
+// Copyright 2018 The Distill Template Authors
+function f(n,t){console.debug("Runlevel 0: Polyfill required: "+n.name);const e=document.createElement("script");e.src=n.url,e.async=!1,t&&(e.onload=function(){t(n)}),e.onerror=function(){new Error("Runlevel 0: Polyfills failed to load script "+n.name)},document.head.appendChild(e)}
+// Copyright 2018 The Distill Template Authors
+function g(n){return`${n} {\n      grid-column: left / text;\n    }\n  `}
+// Copyright 2018 The Distill Template Authors
+function m(n,t){return n(t={exports:{}},t.exports),t.exports}
+// Copyright 2018 The Distill Template Authors
+function b(n){return n.replace(/[\t\n ]+/g," ").replace(/{\\["^`.'acu~Hvs]( )?([a-zA-Z])}/g,(n,t,e)=>e).replace(/{\\([a-zA-Z])}/g,(n,t)=>t)}function y(n){const t=new Map,e=oo.toJSON(n);for(const n of e){for(const[t,e]of Object.entries(n.entryTags))n.entryTags[t.toLowerCase()]=b(e);n.entryTags.type=n.entryType,t.set(n.citationKey,n.entryTags)}return t}function v(n){return`@article{${n.slug},\n  author = {${n.bibtexAuthors}},\n  title = {${n.title}},\n  journal = {${n.journal.title}},\n  year = {${n.publishedYear}},\n  note = {${n.url}},\n  doi = {${n.doi}}\n}`}
+// Copyright 2018 The Distill Template Authors
+// Copyright 2018 The Distill Template Authors
+function w(n){return`\n  <div class="byline grid">\n    <div class="authors-affiliations grid">\n      <h3>Authors</h3>\n      <h3>Affiliations</h3>\n      ${n.authors.map(n=>`\n        <p class="author">\n          ${n.personalURL?`\n            <a class="name" href="${n.personalURL}">${n.name}</a>`:`\n            <span class="name">${n.name}</span>`}\n        </p>\n        <p class="affiliation">\n        ${n.affiliations.map(n=>n.url?`<a class="affiliation" href="${n.url}">${n.name}</a>`:`<span class="affiliation">${n.name}</span>`).join(", ")}\n        </p>\n      `).join("")}\n    </div>\n    <div>\n      <h3>Published</h3>\n      ${n.publishedDate?`\n        <p>${n.publishedMonth} ${n.publishedDay}, ${n.publishedYear}</p> `:"\n        <p><em>Not published yet.</em></p>"}\n    </div>\n  </div>\n`}function x(n,t,e=document){if(t.size>0){n.style.display="";let i=n.querySelector(".references");if(i)i.innerHTML="";else{const t=e.createElement("style");t.innerHTML=co,n.appendChild(t);const r=e.createElement("h3");r.id="references",r.textContent="References",n.appendChild(r),(i=e.createElement("ol")).id="references-list",i.className="references",n.appendChild(i)}for(const[n,r]of t){const t=e.createElement("li");t.id=n,t.innerHTML=s(r),i.appendChild(t)}}else n.style.display="none"}function k(n,t){let e='\n  <style>\n\n  d-toc {\n    contain: layout style;\n    display: block;\n  }\n\n  d-toc ul {\n    padding-left: 0;\n  }\n\n  d-toc ul > ul {\n    padding-left: 24px;\n  }\n\n  d-toc a {\n    border-bottom: none;\n    text-decoration: none;\n  }\n\n  </style>\n  <nav role="navigation" class="table-of-contents"></nav>\n  <h2>Table of contents</h2>\n  <ul>';for(const n of t){const t="D-TITLE"==n.parentElement.tagName,i=n.getAttribute("no-toc");if(t||i)continue;const r=n.textContent;let o='<li><a href="'+("#"+n.getAttribute("id"))+'">'+r+"</a></li>";"H3"==n.tagName?o="<ul>"+o+"</ul>":o+="<br>",e+=o}e+="</ul></nav>",n.innerHTML=e}
+// Copyright 2018 The Distill Template Authors
+function S(n,t){return n<t?-1:n>t?1:n>=t?0:NaN}function M(n){return 1===n.length&&(n=T(n)),{left:function(t,e,i,r){for(null==i&&(i=0),null==r&&(r=t.length);i<r;){var o=i+r>>>1;n(t[o],e)<0?i=o+1:r=o}return i},right:function(t,e,i,r){for(null==i&&(i=0),null==r&&(r=t.length);i<r;){var o=i+r>>>1;n(t[o],e)>0?r=o:i=o+1}return i}}}function T(n){return function(t,e){return S(n(t),e)}}function _(n,t,e){n=+n,t=+t,e=(r=arguments.length)<2?(t=n,n=0,1):r<3?1:+e;for(var i=-1,r=0|Math.max(0,Math.ceil((t-n)/e)),o=new Array(r);++i<r;)o[i]=n+i*e;return o}function C(n,t,e){var i,r,o,a,s=-1;if(e=+e,(n=+n)===(t=+t)&&e>0)return[n];if((i=t<n)&&(r=n,n=t,t=r),0===(a=A(n,t,e))||!isFinite(a))return[];if(a>0)for(n=Math.ceil(n/a),t=Math.floor(t/a),o=new Array(r=Math.ceil(t-n+1));++s<r;)o[s]=(n+s)*a;else for(n=Math.floor(n*a),t=Math.ceil(t*a),o=new Array(r=Math.ceil(n-t+1));++s<r;)o[s]=(n-s)/a;return i&&o.reverse(),o}function A(n,t,e){var i=(t-n)/Math.max(0,e),r=Math.floor(Math.log(i)/Math.LN10),o=i/Math.pow(10,r);return r>=0?(o>=Lo?10:o>=Do?5:o>=Oo?2:1)*Math.pow(10,r):-Math.pow(10,-r)/(o>=Lo?10:o>=Do?5:o>=Oo?2:1)}function E(n,t,e){var i=Math.abs(t-n)/Math.max(0,e),r=Math.pow(10,Math.floor(Math.log(i)/Math.LN10)),o=i/r;return o>=Lo?r*=10:o>=Do?r*=5:o>=Oo&&(r*=2),t<n?-r:r}function N(n,t){switch(arguments.length){case 0:break;case 1:this.range(n);break;default:this.range(t).domain(n)}return this}function L(n,t,e){n.prototype=t.prototype=e,e.constructor=n}function D(n,t){var e=Object.create(n.prototype);for(var i in t)e[i]=t[i];return e}function O(){}function I(){return this.rgb().formatHex()}function F(){return G(this).formatHsl()}function R(){return this.rgb().formatRgb()}function U(n){var t,e;return n=(n+"").trim().toLowerCase(),(t=Po.exec(n))?(e=t[1].length,t=parseInt(t[1],16),6===e?$(t):3===e?new q(t>>8&15|t>>4&240,t>>4&15|240&t,(15&t)<<4|15&t,1):8===e?P(t>>24&255,t>>16&255,t>>8&255,(255&t)/255):4===e?P(t>>12&15|t>>8&240,t>>8&15|t>>4&240,t>>4&15|240&t,((15&t)<<4|15&t)/255):null):(t=Ho.exec(n))?new q(t[1],t[2],t[3],1):(t=zo.exec(n))?new q(255*t[1]/100,255*t[2]/100,255*t[3]/100,1):(t=qo.exec(n))?P(t[1],t[2],t[3],t[4]):(t=jo.exec(n))?P(255*t[1]/100,255*t[2]/100,255*t[3]/100,t[4]):(t=Bo.exec(n))?W(t[1],t[2]/100,t[3]/100,1):(t=Yo.exec(n))?W(t[1],t[2]/100,t[3]/100,t[4]):Wo.hasOwnProperty(n)?$(Wo[n]):"transparent"===n?new q(NaN,NaN,NaN,0):null}function $(n){return new q(n>>16&255,n>>8&255,255&n,1)}function P(n,t,e,i){return i<=0&&(n=t=e=NaN),new q(n,t,e,i)}function H(n){return n instanceof O||(n=U(n)),n?new q((n=n.rgb()).r,n.g,n.b,n.opacity):new q}function z(n,t,e,i){return 1===arguments.length?H(n):new q(n,t,e,null==i?1:i)}function q(n,t,e,i){this.r=+n,this.g=+t,this.b=+e,this.opacity=+i}function j(){return"#"+Y(this.r)+Y(this.g)+Y(this.b)}function B(){var n=this.opacity;return(1===(n=isNaN(n)?1:Math.max(0,Math.min(1,n)))?"rgb(":"rgba(")+Math.max(0,Math.min(255,Math.round(this.r)||0))+", "+Math.max(0,Math.min(255,Math.round(this.g)||0))+", "+Math.max(0,Math.min(255,Math.round(this.b)||0))+(1===n?")":", "+n+")")}function Y(n){return((n=Math.max(0,Math.min(255,Math.round(n)||0)))<16?"0":"")+n.toString(16)}function W(n,t,e,i){return i<=0?n=t=e=NaN:e<=0||e>=1?n=t=NaN:t<=0&&(n=NaN),new K(n,t,e,i)}function G(n){if(n instanceof K)return new K(n.h,n.s,n.l,n.opacity);if(n instanceof O||(n=U(n)),!n)return new K;if(n instanceof K)return n;var t=(n=n.rgb()).r/255,e=n.g/255,i=n.b/255,r=Math.min(t,e,i),o=Math.max(t,e,i),a=NaN,s=o-r,l=(o+r)/2;return s?(a=t===o?(e-i)/s+6*(e<i):e===o?(i-t)/s+2:(t-e)/s+4,s/=l<.5?o+r:2-o-r,a*=60):s=l>0&&l<1?0:a,new K(a,s,l,n.opacity)}function V(n,t,e,i){return 1===arguments.length?G(n):new K(n,t,e,null==i?1:i)}function K(n,t,e,i){this.h=+n,this.s=+t,this.l=+e,this.opacity=+i}function X(n,t,e){return 255*(n<60?t+(e-t)*n/60:n<180?e:n<240?t+(e-t)*(240-n)/60:t)}function Z(n){if(n instanceof J)return new J(n.l,n.a,n.b,n.opacity);if(n instanceof sn)return ln(n);n instanceof q||(n=H(n));var t,e,i=rn(n.r),r=rn(n.g),o=rn(n.b),a=nn((.2225045*i+.7168786*r+.0606169*o)/Zo);return i===r&&r===o?t=e=a:(t=nn((.4360747*i+.3850649*r+.1430804*o)/Xo),e=nn((.0139322*i+.0971045*r+.7141733*o)/Qo)),new J(116*a-16,500*(t-a),200*(a-e),n.opacity)}function Q(n,t,e,i){return 1===arguments.length?Z(n):new J(n,t,e,null==i?1:i)}function J(n,t,e,i){this.l=+n,this.a=+t,this.b=+e,this.opacity=+i}function nn(n){return n>ea?Math.pow(n,1/3):n/ta+Jo}function tn(n){return n>na?n*n*n:ta*(n-Jo)}function en(n){return 255*(n<=.0031308?12.92*n:1.055*Math.pow(n,1/2.4)-.055)}function rn(n){return(n/=255)<=.04045?n/12.92:Math.pow((n+.055)/1.055,2.4)}function on(n){if(n instanceof sn)return new sn(n.h,n.c,n.l,n.opacity);if(n instanceof J||(n=Z(n)),0===n.a&&0===n.b)return new sn(NaN,0<n.l&&n.l<100?0:NaN,n.l,n.opacity);var t=Math.atan2(n.b,n.a)*Vo;return new sn(t<0?t+360:t,Math.sqrt(n.a*n.a+n.b*n.b),n.l,n.opacity)}function an(n,t,e,i){return 1===arguments.length?on(n):new sn(n,t,e,null==i?1:i)}function sn(n,t,e,i){this.h=+n,this.c=+t,this.l=+e,this.opacity=+i}function ln(n){if(isNaN(n.h))return new J(n.l,0,0,n.opacity);var t=n.h*Go;return new J(n.l,Math.cos(t)*n.c,Math.sin(t)*n.c,n.opacity)}function un(n){if(n instanceof dn)return new dn(n.h,n.s,n.l,n.opacity);n instanceof q||(n=H(n));var t=n.r/255,e=n.g/255,i=n.b/255,r=(ca*i+la*t-ua*e)/(ca+la-ua),o=i-r,a=(sa*(e-r)-oa*o)/aa,s=Math.sqrt(a*a+o*o)/(sa*r*(1-r)),l=s?Math.atan2(a,o)*Vo-120:NaN;return new dn(l<0?l+360:l,s,r,n.opacity)}function cn(n,t,e,i){return 1===arguments.length?un(n):new dn(n,t,e,null==i?1:i)}function dn(n,t,e,i){this.h=+n,this.s=+t,this.l=+e,this.opacity=+i}function hn(n){return function(){return n}}function pn(n,t){return function(e){return n+e*t}}function fn(n,t,e){return n=Math.pow(n,e),t=Math.pow(t,e)-n,e=1/e,function(i){return Math.pow(n+i*t,e)}}function gn(n){return 1==(n=+n)?mn:function(t,e){return e-t?fn(t,e,n):hn(isNaN(t)?e:t)}}function mn(n,t){var e=t-n;return e?pn(n,e):hn(isNaN(n)?t:n)}function bn(n,t){t||(t=[]);var e,i=n?Math.min(t.length,n.length):0,r=t.slice();return function(o){for(e=0;e<i;++e)r[e]=n[e]*(1-o)+t[e]*o;return r}}function yn(n){return ArrayBuffer.isView(n)&&!(n instanceof DataView)}function vn(n,t){var e,i=t?t.length:0,r=n?Math.min(i,n.length):0,o=new Array(r),a=new Array(i);for(e=0;e<r;++e)o[e]=_n(n[e],t[e]);for(;e<i;++e)a[e]=t[e];return function(n){for(e=0;e<r;++e)a[e]=o[e](n);return a}}function wn(n,t){var e=new Date;return n=+n,t=+t,function(i){return e.setTime(n*(1-i)+t*i),e}}function xn(n,t){return n=+n,t=+t,function(e){return n*(1-e)+t*e}}function kn(n,t){var e,i={},r={};for(e in null!==n&&"object"==typeof n||(n={}),null!==t&&"object"==typeof t||(t={}),t)e in n?i[e]=_n(n[e],t[e]):r[e]=t[e];return function(n){for(e in i)r[e]=i[e](n);return r}}function Sn(n){return function(){return n}}function Mn(n){return function(t){return n(t)+""}}function Tn(n,t){var e,i,r,o=pa.lastIndex=fa.lastIndex=0,a=-1,s=[],l=[];for(n+="",t+="";(e=pa.exec(n))&&(i=fa.exec(t));)(r=i.index)>o&&(r=t.slice(o,r),s[a]?s[a]+=r:s[++a]=r),(e=e[0])===(i=i[0])?s[a]?s[a]+=i:s[++a]=i:(s[++a]=null,l.push({i:a,x:xn(e,i)})),o=fa.lastIndex;return o<t.length&&(r=t.slice(o),s[a]?s[a]+=r:s[++a]=r),s.length<2?l[0]?Mn(l[0].x):Sn(t):(t=l.length,function(n){for(var e,i=0;i<t;++i)s[(e=l[i]).i]=e.x(n);return s.join("")})}function _n(n,t){var e,i=typeof t;return null==t||"boolean"===i?hn(t):("number"===i?xn:"string"===i?(e=U(t))?(t=e,ha):Tn:t instanceof U?ha:t instanceof Date?wn:yn(t)?bn:Array.isArray(t)?vn:"function"!=typeof t.valueOf&&"function"!=typeof t.toString||isNaN(t)?kn:xn)(n,t)}function Cn(n,t){return n=+n,t=+t,function(e){return Math.round(n*(1-e)+t*e)}}function An(n){return function(){return n}}function En(n){return+n}function Nn(n){return n}function Ln(n,t){return(t-=n=+n)?function(e){return(e-n)/t}:An(isNaN(t)?NaN:.5)}function Dn(n,t){var e;return n>t&&(e=n,n=t,t=e),function(e){return Math.max(n,Math.min(t,e))}}function On(n,t,e){var i=n[0],r=n[1],o=t[0],a=t[1];return r<i?(i=Ln(r,i),o=e(a,o)):(i=Ln(i,r),o=e(o,a)),function(n){return o(i(n))}}function In(n,t,e){var i=Math.min(n.length,t.length)-1,r=new Array(i),o=new Array(i),a=-1;for(n[i]<n[0]&&(n=n.slice().reverse(),t=t.slice().reverse());++a<i;)r[a]=Ln(n[a],n[a+1]),o[a]=e(t[a],t[a+1]);return function(t){var e=No(n,t,1,i)-1;return o[e](r[e](t))}}function Fn(n,t){return t.domain(n.domain()).range(n.range()).interpolate(n.interpolate()).clamp(n.clamp()).unknown(n.unknown())}function Rn(){function n(){var n=Math.min(l.length,u.length);return d!==Nn&&(d=Dn(l[0],l[n-1])),o=n>2?In:On,a=s=null,t}function t(n){return isNaN(n=+n)?r:(a||(a=o(l.map(e),u,c)))(e(d(n)))}var e,i,r,o,a,s,l=ga,u=ga,c=_n,d=Nn;return t.invert=function(n){return d(i((s||(s=o(u,l.map(e),xn)))(n)))},t.domain=function(t){return arguments.length?(l=Array.from(t,En),n()):l.slice()},t.range=function(t){return arguments.length?(u=Array.from(t),n()):u.slice()},t.rangeRound=function(t){return u=Array.from(t),c=Cn,n()},t.clamp=function(t){return arguments.length?(d=!!t||Nn,n()):d!==Nn},t.interpolate=function(t){return arguments.length?(c=t,n()):c},t.unknown=function(n){return arguments.length?(r=n,t):r},function(t,r){return e=t,i=r,n()}}function Un(){return Rn()(Nn,Nn)}function $n(n,t){if((e=(n=t?n.toExponential(t-1):n.toExponential()).indexOf("e"))<0)return null;var e,i=n.slice(0,e);return[i.length>1?i[0]+i.slice(2):i,+n.slice(e+1)]}function Pn(n){return(n=$n(Math.abs(n)))?n[1]:NaN}function Hn(n,t){return function(e,i){for(var r=e.length,o=[],a=0,s=n[0],l=0;r>0&&s>0&&(l+s+1>i&&(s=Math.max(1,i-l)),o.push(e.substring(r-=s,r+s)),!((l+=s+1)>i));)s=n[a=(a+1)%n.length];return o.reverse().join(t)}}function zn(n){return function(t){return t.replace(/[0-9]/g,function(t){return n[+t]})}}function qn(n){if(!(t=ma.exec(n)))throw new Error("invalid format: "+n);var t;return new jn({fill:t[1],align:t[2],sign:t[3],symbol:t[4],zero:t[5],width:t[6],comma:t[7],precision:t[8]&&t[8].slice(1),trim:t[9],type:t[10]})}function jn(n){this.fill=n.fill===undefined?" ":n.fill+"",this.align=n.align===undefined?">":n.align+"",this.sign=n.sign===undefined?"-":n.sign+"",this.symbol=n.symbol===undefined?"":n.symbol+"",this.zero=!!n.zero,this.width=n.width===undefined?undefined:+n.width,this.comma=!!n.comma,this.precision=n.precision===undefined?undefined:+n.precision,this.trim=!!n.trim,this.type=n.type===undefined?"":n.type+""}function Bn(n){n:for(var t,e=n.length,i=1,r=-1;i<e;++i)switch(n[i]){case".":r=t=i;break;case"0":0===r&&(r=i),t=i;break;default:if(!+n[i])break n;r>0&&(r=0)}return r>0?n.slice(0,r)+n.slice(t+1):n}function Yn(n,t){var e=$n(n,t);if(!e)return n+"";var i=e[0],r=e[1],o=r-(da=3*Math.max(-8,Math.min(8,Math.floor(r/3))))+1,a=i.length;return o===a?i:o>a?i+new Array(o-a+1).join("0"):o>0?i.slice(0,o)+"."+i.slice(o):"0."+new Array(1-o).join("0")+$n(n,Math.max(0,t+o-1))[0]}function Wn(n,t){var e=$n(n,t);if(!e)return n+"";var i=e[0],r=e[1];return r<0?"0."+new Array(-r).join("0")+i:i.length>r+1?i.slice(0,r+1)+"."+i.slice(r+1):i+new Array(r-i.length+2).join("0")}function Gn(n){return n}function Vn(n){function t(n){function t(n){var t,r,o,l=w,p=x;if("c"===v)p=k(n)+p,n="";else{var M=(n=+n)<0||1/n<0;if(n=isNaN(n)?c:k(Math.abs(n),b),y&&(n=Bn(n)),M&&0==+n&&"+"!==h&&(M=!1),l=(M?"("===h?h:u:"-"===h||"("===h?"":h)+l,p=("s"===v?ka[8+da/3]:"")+p+(M&&"("===h?")":""),S)for(t=-1,r=n.length;++t<r;)if(48>(o=n.charCodeAt(t))||o>57){p=(46===o?a+n.slice(t+1):n.slice(t))+p,n=n.slice(0,t);break}}m&&!f&&(n=i(n,Infinity));var T=l.length+n.length+p.length,_=T<g?new Array(g-T+1).join(e):"";switch(m&&f&&(n=i(_+n,_.length?g-p.length:Infinity),_=""),d){case"<":n=l+n+p+_;break;case"=":n=l+_+n+p;break;case"^":n=_.slice(0,T=_.length>>1)+l+n+p+_.slice(T);break;default:n=_+l+n+p}return s(n)}var e=(n=qn(n)).fill,d=n.align,h=n.sign,p=n.symbol,f=n.zero,g=n.width,m=n.comma,b=n.precision,y=n.trim,v=n.type;"n"===v?(m=!0,v="g"):wa[v]||(b===undefined&&(b=12),y=!0,v="g"),(f||"0"===e&&"="===d)&&(f=!0,e="0",d="=");var w="$"===p?r:"#"===p&&/[boxX]/.test(v)?"0"+v.toLowerCase():"",x="$"===p?o:/[%p]/.test(v)?l:"",k=wa[v],S=/[defgprs%]/.test(v);return b=b===undefined?6:/[gprs]/.test(v)?Math.max(1,Math.min(21,b)):Math.max(0,Math.min(20,b)),t.toString=function(){return n+""},t}function e(n,e){var i=t(((n=qn(n)).type="f",n)),r=3*Math.max(-8,Math.min(8,Math.floor(Pn(e)/3))),o=Math.pow(10,-r),a=ka[8+r/3];return function(n){return i(o*n)+a}}var i=n.grouping===undefined||n.thousands===undefined?Gn:Hn(xa.call(n.grouping,Number),n.thousands+""),r=n.currency===undefined?"":n.currency[0]+"",o=n.currency===undefined?"":n.currency[1]+"",a=n.decimal===undefined?".":n.decimal+"",s=n.numerals===undefined?Gn:zn(xa.call(n.numerals,String)),l=n.percent===undefined?"%":n.percent+"",u=n.minus===undefined?"-":n.minus+"",c=n.nan===undefined?"NaN":n.nan+"";return{format:t,formatPrefix:e}}function Kn(n){return ba=Vn(n),ya=ba.format,va=ba.formatPrefix,ba}function Xn(n){return Math.max(0,-Pn(Math.abs(n)))}function Zn(n,t){return Math.max(0,3*Math.max(-8,Math.min(8,Math.floor(Pn(t)/3)))-Pn(Math.abs(n)))}function Qn(n,t){return n=Math.abs(n),t=Math.abs(t)-n,Math.max(0,Pn(t)-Pn(n))+1}function Jn(n,t,e,i){var r,o=E(n,t,e);switch((i=qn(null==i?",f":i)).type){case"s":var a=Math.max(Math.abs(n),Math.abs(t));return null!=i.precision||isNaN(r=Zn(o,a))||(i.precision=r),va(i,a);case"":case"e":case"g":case"p":case"r":null!=i.precision||isNaN(r=Qn(o,Math.max(Math.abs(n),Math.abs(t))))||(i.precision=r-("e"===i.type));break;case"f":case"%":null!=i.precision||isNaN(r=Xn(o))||(i.precision=r-2*("%"===i.type))}return ya(i)}function nt(n){var t=n.domain;return n.ticks=function(n){var e=t();return C(e[0],e[e.length-1],null==n?10:n)},n.tickFormat=function(n,e){var i=t();return Jn(i[0],i[i.length-1],null==n?10:n,e)},n.nice=function(e){null==e&&(e=10);var i,r=t(),o=0,a=r.length-1,s=r[o],l=r[a];return l<s&&(i=s,s=l,l=i,i=o,o=a,a=i),(i=A(s,l,e))>0?i=A(s=Math.floor(s/i)*i,l=Math.ceil(l/i)*i,e):i<0&&(i=A(s=Math.ceil(s*i)/i,l=Math.floor(l*i)/i,e)),i>0?(r[o]=Math.floor(s/i)*i,r[a]=Math.ceil(l/i)*i,t(r)):i<0&&(r[o]=Math.ceil(s*i)/i,r[a]=Math.floor(l*i)/i,t(r)),n},n}function tt(){var n=Un();return n.copy=function(){return Fn(n,tt())},N.apply(n,arguments),nt(n)}function et(n,t,e,i){function r(t){return n(t=0===arguments.length?new Date:new Date(+t)),t}return r.floor=function(t){return n(t=new Date(+t)),t},r.ceil=function(e){return n(e=new Date(e-1)),t(e,1),n(e),e},r.round=function(n){var t=r(n),e=r.ceil(n);return n-t<e-n?t:e},r.offset=function(n,e){return t(n=new Date(+n),null==e?1:Math.floor(e)),n},r.range=function(e,i,o){var a,s=[];if(e=r.ceil(e),o=null==o?1:Math.floor(o),!(e<i&&o>0))return s;do{s.push(a=new Date(+e)),t(e,o),n(e)}while(a<e&&e<i);return s},r.filter=function(e){return et(function(t){if(t>=t)for(;n(t),!e(t);)t.setTime(t-1)},function(n,i){if(n>=n)if(i<0)for(;++i<=0;)for(;t(n,-1),!e(n););else for(;--i>=0;)for(;t(n,1),!e(n););})},e&&(r.count=function(t,i){return Sa.setTime(+t),Ma.setTime(+i),n(Sa),n(Ma),Math.floor(e(Sa,Ma))},r.every=function(n){return n=Math.floor(n),isFinite(n)&&n>0?n>1?r.filter(i?function(t){return i(t)%n==0}:function(t){return r.count(0,t)%n==0}):r:null}),r}function it(n){return et(function(t){t.setDate(t.getDate()-(t.getDay()+7-n)%7),t.setHours(0,0,0,0)},function(n,t){n.setDate(n.getDate()+7*t)},function(n,t){return(t-n-(t.getTimezoneOffset()-n.getTimezoneOffset())*Ca)/Na})}function rt(n){return et(function(t){t.setUTCDate(t.getUTCDate()-(t.getUTCDay()+7-n)%7),t.setUTCHours(0,0,0,0)},function(n,t){n.setUTCDate(n.getUTCDate()+7*t)},function(n,t){return(t-n)/Na})}function ot(n){if(0<=n.y&&n.y<100){var t=new Date(-1,n.m,n.d,n.H,n.M,n.S,n.L);return t.setFullYear(n.y),t}return new Date(n.y,n.m,n.d,n.H,n.M,n.S,n.L)}function at(n){if(0<=n.y&&n.y<100){var t=new Date(Date.UTC(-1,n.m,n.d,n.H,n.M,n.S,n.L));return t.setUTCFullYear(n.y),t}return new Date(Date.UTC(n.y,n.m,n.d,n.H,n.M,n.S,n.L))}function st(n,t,e){return{y:n,m:t,d:e,H:0,M:0,S:0,L:0}}function lt(n){function t(n,t){return function(e){var i,r,o,a=[],s=-1,l=0,u=n.length;for(e instanceof Date||(e=new Date(+e));++s<u;)37===n.charCodeAt(s)&&(a.push(n.slice(l,s)),null!=(r=Ba[i=n.charAt(++s)])?i=n.charAt(++s):r="e"===i?" ":"0",(o=t[i])&&(i=o(e,r)),a.push(i),l=s+1);return a.push(n.slice(l,s)),a.join("")}}function e(n,t){return function(e){var r,o,a=st(1900,undefined,1);if(i(a,n,e+="",0)!=e.length)return null;if("Q"in a)return new Date(a.Q);if("s"in a)return new Date(1e3*a.s+("L"in a?a.L:0));if(!t||"Z"in a||(a.Z=0),"p"in a&&(a.H=a.H%12+12*a.p),a.m===undefined&&(a.m="q"in a?a.q:0),"V"in a){if(a.V<1||a.V>53)return null;"w"in a||(a.w=1),"Z"in a?(r=(o=(r=at(st(a.y,0,1))).getUTCDay())>4||0===o?$a.ceil(r):$a(r),r=Ra.offset(r,7*(a.V-1)),a.y=r.getUTCFullYear(),a.m=r.getUTCMonth(),a.d=r.getUTCDate()+(a.w+6)%7):(r=(o=(r=ot(st(a.y,0,1))).getDay())>4||0===o?Oa.ceil(r):Oa(r),r=La.offset(r,7*(a.V-1)),a.y=r.getFullYear(),a.m=r.getMonth(),a.d=r.getDate()+(a.w+6)%7)}else("W"in a||"U"in a)&&("w"in a||(a.w="u"in a?a.u%7:"W"in a?1:0),o="Z"in a?at(st(a.y,0,1)).getUTCDay():ot(st(a.y,0,1)).getDay(),a.m=0,a.d="W"in a?(a.w+6)%7+7*a.W-(o+5)%7:a.w+7*a.U-(o+6)%7);return"Z"in a?(a.H+=a.Z/100|0,a.M+=a.Z%100,at(a)):ot(a)}}function i(n,t,e,i){for(var r,o,a=0,s=t.length,l=e.length;a<s;){if(i>=l)return-1;if(37===(r=t.charCodeAt(a++))){if(r=t.charAt(a++),!(o=B[r in Ba?t.charAt(a++):r])||(i=o(n,e,i))<0)return-1}else if(r!=e.charCodeAt(i++))return-1}return i}function r(n,t,e){var i=D.exec(t.slice(e));return i?(n.p=O[i[0].toLowerCase()],e+i[0].length):-1}function o(n,t,e){var i=R.exec(t.slice(e));return i?(n.w=U[i[0].toLowerCase()],e+i[0].length):-1}function a(n,t,e){var i=I.exec(t.slice(e));return i?(n.w=F[i[0].toLowerCase()],e+i[0].length):-1}function s(n,t,e){var i=H.exec(t.slice(e));return i?(n.m=z[i[0].toLowerCase()],e+i[0].length):-1}function l(n,t,e){var i=$.exec(t.slice(e));return i?(n.m=P[i[0].toLowerCase()],e+i[0].length):-1}function u(n,t,e){return i(n,M,t,e)}function c(n,t,e){return i(n,T,t,e)}function d(n,t,e){return i(n,_,t,e)}function h(n){return E[n.getDay()]}function p(n){return A[n.getDay()]}function f(n){return L[n.getMonth()]}function g(n){return N[n.getMonth()]}function m(n){return C[+(n.getHours()>=12)]}function b(n){return 1+~~(n.getMonth()/3)}function y(n){return E[n.getUTCDay()]}function v(n){return A[n.getUTCDay()]}function w(n){return L[n.getUTCMonth()]}function x(n){return N[n.getUTCMonth()]}function k(n){return C[+(n.getUTCHours()>=12)]}function S(n){return 1+~~(n.getUTCMonth()/3)}var M=n.dateTime,T=n.date,_=n.time,C=n.periods,A=n.days,E=n.shortDays,N=n.months,L=n.shortMonths,D=dt(C),O=ht(C),I=dt(A),F=ht(A),R=dt(E),U=ht(E),$=dt(N),P=ht(N),H=dt(L),z=ht(L),q={a:h,A:p,b:f,B:g,c:null,d:Ot,e:Ot,f:$t,H:It,I:Ft,j:Rt,L:Ut,m:Pt,M:Ht,p:m,q:b,Q:fe,s:ge,S:zt,u:qt,U:jt,V:Bt,w:Yt,W:Wt,x:null,X:null,y:Gt,Y:Vt,Z:Kt,"%":pe},j={a:y,A:v,b:w,B:x,c:null,d:Xt,e:Xt,f:te,H:Zt,I:Qt,j:Jt,L:ne,m:ee,M:ie,p:k,q:S,Q:fe,s:ge,S:re,u:oe,U:ae,V:se,w:le,W:ue,x:null,X:null,y:ce,Y:de,Z:he,"%":pe},B={a:o,A:a,b:s,B:l,c:u,d:St,e:St,f:Et,H:Tt,I:Tt,j:Mt,L:At,m:kt,M:_t,p:r,q:xt,Q:Lt,s:Dt,S:Ct,u:ft,U:gt,V:mt,w:pt,W:bt,x:c,X:d,y:vt,Y:yt,Z:wt,"%":Nt};return q.x=t(T,q),q.X=t(_,q),q.c=t(M,q),j.x=t(T,j),j.X=t(_,j),j.c=t(M,j),{format:function(n){var e=t(n+="",q);return e.toString=function(){return n},e},parse:function(n){var t=e(n+="",!1);return t.toString=function(){return n},t},utcFormat:function(n){var e=t(n+="",j);return e.toString=function(){return n},e},utcParse:function(n){var t=e(n+="",!0);return t.toString=function(){return n},t}}}function ut(n,t,e){var i=n<0?"-":"",r=(i?-n:n)+"",o=r.length;return i+(o<e?new Array(e-o+1).join(t)+r:r)}function ct(n){return n.replace(Ga,"\\$&")}function dt(n){return new RegExp("^(?:"+n.map(ct).join("|")+")","i")}function ht(n){for(var t={},e=-1,i=n.length;++e<i;)t[n[e].toLowerCase()]=e;return t}function pt(n,t,e){var i=Ya.exec(t.slice(e,e+1));return i?(n.w=+i[0],e+i[0].length):-1}function ft(n,t,e){var i=Ya.exec(t.slice(e,e+1));return i?(n.u=+i[0],e+i[0].length):-1}function gt(n,t,e){var i=Ya.exec(t.slice(e,e+2));return i?(n.U=+i[0],e+i[0].length):-1}function mt(n,t,e){var i=Ya.exec(t.slice(e,e+2));return i?(n.V=+i[0],e+i[0].length):-1}function bt(n,t,e){var i=Ya.exec(t.slice(e,e+2));return i?(n.W=+i[0],e+i[0].length):-1}function yt(n,t,e){var i=Ya.exec(t.slice(e,e+4));return i?(n.y=+i[0],e+i[0].length):-1}function vt(n,t,e){var i=Ya.exec(t.slice(e,e+2));return i?(n.y=+i[0]+(+i[0]>68?1900:2e3),e+i[0].length):-1}function wt(n,t,e){var i=/^(Z)|([+-]\d\d)(?::?(\d\d))?/.exec(t.slice(e,e+6));return i?(n.Z=i[1]?0:-(i[2]+(i[3]||"00")),e+i[0].length):-1}function xt(n,t,e){var i=Ya.exec(t.slice(e,e+1));return i?(n.q=3*i[0]-3,e+i[0].length):-1}function kt(n,t,e){var i=Ya.exec(t.slice(e,e+2));return i?(n.m=i[0]-1,e+i[0].length):-1}function St(n,t,e){var i=Ya.exec(t.slice(e,e+2));return i?(n.d=+i[0],e+i[0].length):-1}function Mt(n,t,e){var i=Ya.exec(t.slice(e,e+3));return i?(n.m=0,n.d=+i[0],e+i[0].length):-1}function Tt(n,t,e){var i=Ya.exec(t.slice(e,e+2));return i?(n.H=+i[0],e+i[0].length):-1}function _t(n,t,e){var i=Ya.exec(t.slice(e,e+2));return i?(n.M=+i[0],e+i[0].length):-1}function Ct(n,t,e){var i=Ya.exec(t.slice(e,e+2));return i?(n.S=+i[0],e+i[0].length):-1}function At(n,t,e){var i=Ya.exec(t.slice(e,e+3));return i?(n.L=+i[0],e+i[0].length):-1}function Et(n,t,e){var i=Ya.exec(t.slice(e,e+6));return i?(n.L=Math.floor(i[0]/1e3),e+i[0].length):-1}function Nt(n,t,e){var i=Wa.exec(t.slice(e,e+1));return i?e+i[0].length:-1}function Lt(n,t,e){var i=Ya.exec(t.slice(e));return i?(n.Q=+i[0],e+i[0].length):-1}function Dt(n,t,e){var i=Ya.exec(t.slice(e));return i?(n.s=+i[0],e+i[0].length):-1}function Ot(n,t){return ut(n.getDate(),t,2)}function It(n,t){return ut(n.getHours(),t,2)}function Ft(n,t){return ut(n.getHours()%12||12,t,2)}function Rt(n,t){return ut(1+La.count(Fa(n),n),t,3)}function Ut(n,t){return ut(n.getMilliseconds(),t,3)}function $t(n,t){return Ut(n,t)+"000"}function Pt(n,t){return ut(n.getMonth()+1,t,2)}function Ht(n,t){return ut(n.getMinutes(),t,2)}function zt(n,t){return ut(n.getSeconds(),t,2)}function qt(n){var t=n.getDay();return 0===t?7:t}function jt(n,t){return ut(Da.count(Fa(n)-1,n),t,2)}function Bt(n,t){var e=n.getDay();return n=e>=4||0===e?Ia(n):Ia.ceil(n),ut(Ia.count(Fa(n),n)+(4===Fa(n).getDay()),t,2)}function Yt(n){return n.getDay()}function Wt(n,t){return ut(Oa.count(Fa(n)-1,n),t,2)}function Gt(n,t){return ut(n.getFullYear()%100,t,2)}function Vt(n,t){return ut(n.getFullYear()%1e4,t,4)}function Kt(n){var t=n.getTimezoneOffset();return(t>0?"-":(t*=-1,"+"))+ut(t/60|0,"0",2)+ut(t%60,"0",2)}function Xt(n,t){return ut(n.getUTCDate(),t,2)}function Zt(n,t){return ut(n.getUTCHours(),t,2)}function Qt(n,t){return ut(n.getUTCHours()%12||12,t,2)}function Jt(n,t){return ut(1+Ra.count(Ha(n),n),t,3)}function ne(n,t){return ut(n.getUTCMilliseconds(),t,3)}function te(n,t){return ne(n,t)+"000"}function ee(n,t){return ut(n.getUTCMonth()+1,t,2)}function ie(n,t){return ut(n.getUTCMinutes(),t,2)}function re(n,t){return ut(n.getUTCSeconds(),t,2)}function oe(n){var t=n.getUTCDay();return 0===t?7:t}function ae(n,t){return ut(Ua.count(Ha(n)-1,n),t,2)}function se(n,t){var e=n.getUTCDay();return n=e>=4||0===e?Pa(n):Pa.ceil(n),ut(Pa.count(Ha(n),n)+(4===Ha(n).getUTCDay()),t,2)}function le(n){return n.getUTCDay()}function ue(n,t){return ut($a.count(Ha(n)-1,n),t,2)}function ce(n,t){return ut(n.getUTCFullYear()%100,t,2)}function de(n,t){return ut(n.getUTCFullYear()%1e4,t,4)}function he(){return"+0000"}function pe(){return"%"}function fe(n){return+n}function ge(n){return Math.floor(+n/1e3)}function me(n){return za=lt(n),za.format,za.parse,qa=za.utcFormat,ja=za.utcParse,za}function be(n){return n.toISOString()}function ye(n){var t=new Date(n);return isNaN(t)?null:t}function ve(){for(var n,t=0,e=arguments.length,i={};t<e;++t){if(!(n=arguments[t]+"")||n in i||/[\s.]/.test(n))throw new Error("illegal type: "+n);i[n]=[]}return new we(i)}function we(n){this._=n}function xe(n,t){return n.trim().split(/^|\s+/).map(function(n){var e="",i=n.indexOf(".");if(i>=0&&(e=n.slice(i+1),n=n.slice(0,i)),n&&!t.hasOwnProperty(n))throw new Error("unknown type: "+n);return{type:n,name:e}})}function ke(n,t){for(var e,i=0,r=n.length;i<r;++i)if((e=n[i]).name===t)return e.value}function Se(n,t,e){for(var i=0,r=n.length;i<r;++i)if(n[i].name===t){n[i]=Ka,n=n.slice(0,i).concat(n.slice(i+1));break}return null!=e&&n.push({name:t,value:e}),n}function Me(n){var t=n+="",e=t.indexOf(":");return e>=0&&"xmlns"!==(t=n.slice(0,e))&&(n=n.slice(e+1)),Za.hasOwnProperty(t)?{space:Za[t],local:n}:n}function Te(n){return function(){var t=this.ownerDocument,e=this.namespaceURI;return e===Xa&&t.documentElement.namespaceURI===Xa?t.createElement(n):t.createElementNS(e,n)}}function _e(n){return function(){return this.ownerDocument.createElementNS(n.space,n.local)}}function Ce(n){var t=Me(n);return(t.local?_e:Te)(t)}function Ae(){}function Ee(n){return null==n?Ae:function(){return this.querySelector(n)}}function Ne(n){"function"!=typeof n&&(n=Ee(n));for(var t=this._groups,e=t.length,i=new Array(e),r=0;r<e;++r)for(var o,a,s=t[r],l=s.length,u=i[r]=new Array(l),c=0;c<l;++c)(o=s[c])&&(a=n.call(o,o.__data__,c,s))&&("__data__"in o&&(a.__data__=o.__data__),u[c]=a);return new or(i,this._parents)}function Le(){return[]}function De(n){return null==n?Le:function(){return this.querySelectorAll(n)}}function Oe(n){"function"!=typeof n&&(n=De(n));for(var t=this._groups,e=t.length,i=[],r=[],o=0;o<e;++o)for(var a,s=t[o],l=s.length,u=0;u<l;++u)(a=s[u])&&(i.push(n.call(a,a.__data__,u,s)),r.push(a));return new or(i,r)}function Ie(n){return function(){return this.matches(n)}}function Fe(n){"function"!=typeof n&&(n=Ie(n));for(var t=this._groups,e=t.length,i=new Array(e),r=0;r<e;++r)for(var o,a=t[r],s=a.length,l=i[r]=[],u=0;u<s;++u)(o=a[u])&&n.call(o,o.__data__,u,a)&&l.push(o);return new or(i,this._parents)}function Re(n){return new Array(n.length)}function Ue(){return new or(this._enter||this._groups.map(Re),this._parents)}function $e(n,t){this.ownerDocument=n.ownerDocument,this.namespaceURI=n.namespaceURI,this._next=null,this._parent=n,this.__data__=t}function Pe(n){return function(){return n}}function He(n,t,e,i,r,o){for(var a,s=0,l=t.length,u=o.length;s<u;++s)(a=t[s])?(a.__data__=o[s],i[s]=a):e[s]=new $e(n,o[s]);for(;s<l;++s)(a=t[s])&&(r[s]=a)}function ze(n,t,e,i,r,o,a){var s,l,u,c={},d=t.length,h=o.length,p=new Array(d);for(s=0;s<d;++s)(l=t[s])&&(p[s]=u=Qa+a.call(l,l.__data__,s,t),u in c?r[s]=l:c[u]=l);for(s=0;s<h;++s)(l=c[u=Qa+a.call(n,o[s],s,o)])?(i[s]=l,l.__data__=o[s],c[u]=null):e[s]=new $e(n,o[s]);for(s=0;s<d;++s)(l=t[s])&&c[p[s]]===l&&(r[s]=l)}function qe(n,t){if(!n)return p=new Array(this.size()),u=-1,this.each(function(n){p[++u]=n}),p;var e=t?ze:He,i=this._parents,r=this._groups;"function"!=typeof n&&(n=Pe(n));for(var o=r.length,a=new Array(o),s=new Array(o),l=new Array(o),u=0;u<o;++u){var c=i[u],d=r[u],h=d.length,p=n.call(c,c&&c.__data__,u,i),f=p.length,g=s[u]=new Array(f),m=a[u]=new Array(f);e(c,d,g,m,l[u]=new Array(h),p,t);for(var b,y,v=0,w=0;v<f;++v)if(b=g[v]){for(v>=w&&(w=v+1);!(y=m[w])&&++w<f;);b._next=y||null}}return(a=new or(a,i))._enter=s,a._exit=l,a}function je(){return new or(this._exit||this._groups.map(Re),this._parents)}function Be(n,t,e){var i=this.enter(),r=this,o=this.exit();return i="function"==typeof n?n(i):i.append(n+""),null!=t&&(r=t(r)),null==e?o.remove():e(o),i&&r?i.merge(r).order():r}function Ye(n){for(var t=this._groups,e=n._groups,i=t.length,r=e.length,o=Math.min(i,r),a=new Array(i),s=0;s<o;++s)for(var l,u=t[s],c=e[s],d=u.length,h=a[s]=new Array(d),p=0;p<d;++p)(l=u[p]||c[p])&&(h[p]=l);for(;s<i;++s)a[s]=t[s];return new or(a,this._parents)}function We(){for(var n=this._groups,t=-1,e=n.length;++t<e;)for(var i,r=n[t],o=r.length-1,a=r[o];--o>=0;)(i=r[o])&&(a&&4^i.compareDocumentPosition(a)&&a.parentNode.insertBefore(i,a),a=i);return this}function Ge(n){function t(t,e){return t&&e?n(t.__data__,e.__data__):!t-!e}n||(n=Ve);for(var e=this._groups,i=e.length,r=new Array(i),o=0;o<i;++o){for(var a,s=e[o],l=s.length,u=r[o]=new Array(l),c=0;c<l;++c)(a=s[c])&&(u[c]=a);u.sort(t)}return new or(r,this._parents).order()}function Ve(n,t){return n<t?-1:n>t?1:n>=t?0:NaN}function Ke(){var n=arguments[0];return arguments[0]=this,n.apply(null,arguments),this}function Xe(){var n=new Array(this.size()),t=-1;return this.each(function(){n[++t]=this}),n}function Ze(){for(var n=this._groups,t=0,e=n.length;t<e;++t)for(var i=n[t],r=0,o=i.length;r<o;++r){var a=i[r];if(a)return a}return null}function Qe(){var n=0;return this.each(function(){++n}),n}function Je(){return!this.node()}function ni(n){for(var t=this._groups,e=0,i=t.length;e<i;++e)for(var r,o=t[e],a=0,s=o.length;a<s;++a)(r=o[a])&&n.call(r,r.__data__,a,o);return this}function ti(n){return function(){this.removeAttribute(n)}}function ei(n){return function(){this.removeAttributeNS(n.space,n.local)}}function ii(n,t){return function(){this.setAttribute(n,t)}}function ri(n,t){return function(){this.setAttributeNS(n.space,n.local,t)}}function oi(n,t){return function(){var e=t.apply(this,arguments);null==e?this.removeAttribute(n):this.setAttribute(n,e)}}function ai(n,t){return function(){var e=t.apply(this,arguments);null==e?this.removeAttributeNS(n.space,n.local):this.setAttributeNS(n.space,n.local,e)}}function si(n,t){var e=Me(n);if(arguments.length<2){var i=this.node();return e.local?i.getAttributeNS(e.space,e.local):i.getAttribute(e)}return this.each((null==t?e.local?ei:ti:"function"==typeof t?e.local?ai:oi:e.local?ri:ii)(e,t))}function li(n){return n.ownerDocument&&n.ownerDocument.defaultView||n.document&&n||n.defaultView}function ui(n){return function(){this.style.removeProperty(n)}}function ci(n,t,e){return function(){this.style.setProperty(n,t,e)}}function di(n,t,e){return function(){var i=t.apply(this,arguments);null==i?this.style.removeProperty(n):this.style.setProperty(n,i,e)}}function hi(n,t,e){return arguments.length>1?this.each((null==t?ui:"function"==typeof t?di:ci)(n,t,null==e?"":e)):pi(this.node(),n)}function pi(n,t){return n.style.getPropertyValue(t)||li(n).getComputedStyle(n,null).getPropertyValue(t)}function fi(n){return function(){delete this[n]}}function gi(n,t){return function(){this[n]=t}}function mi(n,t){return function(){var e=t.apply(this,arguments);null==e?delete this[n]:this[n]=e}}function bi(n,t){return arguments.length>1?this.each((null==t?fi:"function"==typeof t?mi:gi)(n,t)):this.node()[n]}function yi(n){return n.trim().split(/^|\s+/)}function vi(n){return n.classList||new wi(n)}function wi(n){this._node=n,this._names=yi(n.getAttribute("class")||"")}function xi(n,t){for(var e=vi(n),i=-1,r=t.length;++i<r;)e.add(t[i])}function ki(n,t){for(var e=vi(n),i=-1,r=t.length;++i<r;)e.remove(t[i])}function Si(n){return function(){xi(this,n)}}function Mi(n){return function(){ki(this,n)}}function Ti(n,t){return function(){(t.apply(this,arguments)?xi:ki)(this,n)}}function _i(n,t){var e=yi(n+"");if(arguments.length<2){for(var i=vi(this.node()),r=-1,o=e.length;++r<o;)if(!i.contains(e[r]))return!1;return!0}return this.each(("function"==typeof t?Ti:t?Si:Mi)(e,t))}function Ci(){this.textContent=""}function Ai(n){return function(){this.textContent=n}}function Ei(n){return function(){var t=n.apply(this,arguments);this.textContent=null==t?"":t}}function Ni(n){return arguments.length?this.each(null==n?Ci:("function"==typeof n?Ei:Ai)(n)):this.node().textContent}function Li(){this.innerHTML=""}function Di(n){return function(){this.innerHTML=n}}function Oi(n){return function(){var t=n.apply(this,arguments);this.innerHTML=null==t?"":t}}function Ii(n){return arguments.length?this.each(null==n?Li:("function"==typeof n?Oi:Di)(n)):this.node().innerHTML}function Fi(){this.nextSibling&&this.parentNode.appendChild(this)}function Ri(){return this.each(Fi)}function Ui(){this.previousSibling&&this.parentNode.insertBefore(this,this.parentNode.firstChild)}function $i(){return this.each(Ui)}function Pi(n){var t="function"==typeof n?n:Ce(n);return this.select(function(){return this.appendChild(t.apply(this,arguments))})}function Hi(){return null}function zi(n,t){var e="function"==typeof n?n:Ce(n),i=null==t?Hi:"function"==typeof t?t:Ee(t);return this.select(function(){return this.insertBefore(e.apply(this,arguments),i.apply(this,arguments)||null)})}function qi(){var n=this.parentNode;n&&n.removeChild(this)}function ji(){return this.each(qi)}function Bi(){var n=this.cloneNode(!1),t=this.parentNode;return t?t.insertBefore(n,this.nextSibling):n}function Yi(){var n=this.cloneNode(!0),t=this.parentNode;return t?t.insertBefore(n,this.nextSibling):n}function Wi(n){return this.select(n?Yi:Bi)}function Gi(n){return arguments.length?this.property("__data__",n):this.node().__data__}function Vi(n,t,e){return n=Ki(n,t,e),function(t){var e=t.relatedTarget;e&&(e===this||8&e.compareDocumentPosition(this))||n.call(this,t)}}function Ki(n,t,e){return function(i){var r=ns;ns=i;try{
+n.call(this,this.__data__,t,e)}finally{ns=r}}}function Xi(n){return n.trim().split(/^|\s+/).map(function(n){var t="",e=n.indexOf(".");return e>=0&&(t=n.slice(e+1),n=n.slice(0,e)),{type:n,name:t}})}function Zi(n){return function(){var t=this.__on;if(t){for(var e,i=0,r=-1,o=t.length;i<o;++i)e=t[i],n.type&&e.type!==n.type||e.name!==n.name?t[++r]=e:this.removeEventListener(e.type,e.listener,e.capture);++r?t.length=r:delete this.__on}}}function Qi(n,t,e){var i=Ja.hasOwnProperty(n.type)?Vi:Ki;return function(r,o,a){var s,l=this.__on,u=i(t,o,a);if(l)for(var c=0,d=l.length;c<d;++c)if((s=l[c]).type===n.type&&s.name===n.name)return this.removeEventListener(s.type,s.listener,s.capture),this.addEventListener(s.type,s.listener=u,s.capture=e),void(s.value=t);this.addEventListener(n.type,u,e),s={type:n.type,name:n.name,value:t,listener:u,capture:e},l?l.push(s):this.__on=[s]}}function Ji(n,t,e){var i,r,o=Xi(n+""),a=o.length;if(!(arguments.length<2)){for(s=t?Qi:Zi,null==e&&(e=!1),i=0;i<a;++i)this.each(s(o[i],t,e));return this}var s=this.node().__on;if(s)for(var l,u=0,c=s.length;u<c;++u)for(i=0,l=s[u];i<a;++i)if((r=o[i]).type===l.type&&r.name===l.name)return l.value}function nr(n,t,e,i){var r=ns;n.sourceEvent=ns,ns=n;try{return t.apply(e,i)}finally{ns=r}}function tr(n,t,e){var i=li(n),r=i.CustomEvent;"function"==typeof r?r=new r(t,e):(r=i.document.createEvent("Event"),e?(r.initEvent(t,e.bubbles,e.cancelable),r.detail=e.detail):r.initEvent(t,!1,!1)),n.dispatchEvent(r)}function er(n,t){return function(){return tr(this,n,t)}}function ir(n,t){return function(){return tr(this,n,t.apply(this,arguments))}}function rr(n,t){return this.each(("function"==typeof t?ir:er)(n,t))}function or(n,t){this._groups=n,this._parents=t}function ar(){return new or([[document.documentElement]],ts)}function sr(n){return"string"==typeof n?new or([[document.querySelector(n)]],[document.documentElement]):new or([[n]],ts)}function lr(){for(var n,t=ns;n=t.sourceEvent;)t=n;return t}function ur(n,t){var e=n.ownerSVGElement||n;if(e.createSVGPoint){var i=e.createSVGPoint();return i.x=t.clientX,i.y=t.clientY,[(i=i.matrixTransform(n.getScreenCTM().inverse())).x,i.y]}var r=n.getBoundingClientRect();return[t.clientX-r.left-n.clientLeft,t.clientY-r.top-n.clientTop]}function cr(n){var t=lr();return t.changedTouches&&(t=t.changedTouches[0]),ur(n,t)}function dr(n,t,e){arguments.length<3&&(e=t,t=lr().changedTouches);for(var i,r=0,o=t?t.length:0;r<o;++r)if((i=t[r]).identifier===e)return ur(n,i);return null}function hr(){ns.stopImmediatePropagation()}function pr(){ns.preventDefault(),ns.stopImmediatePropagation()}function fr(n){var t=n.document.documentElement,e=sr(n).on("dragstart.drag",pr,!0);"onselectstart"in t?e.on("selectstart.drag",pr,!0):(t.__noselect=t.style.MozUserSelect,t.style.MozUserSelect="none")}function gr(n,t){var e=n.document.documentElement,i=sr(n).on("dragstart.drag",null);t&&(i.on("click.drag",pr,!0),setTimeout(function(){i.on("click.drag",null)},0)),"onselectstart"in e?i.on("selectstart.drag",null):(e.style.MozUserSelect=e.__noselect,delete e.__noselect)}function mr(n){return function(){return n}}function br(n,t,e,i,r,o,a,s,l,u){this.target=n,this.type=t,this.subject=e,this.identifier=i,this.active=r,this.x=o,this.y=a,this.dx=s,this.dy=l,this._=u}function yr(){return!ns.ctrlKey&&!ns.button}function vr(){return this.parentNode}function wr(n){return null==n?{x:ns.x,y:ns.y}:n}function xr(){return navigator.maxTouchPoints||"ontouchstart"in this}function kr(){function n(n){n.on("mousedown.drag",t).filter(g).on("touchstart.drag",r).on("touchmove.drag",o).on("touchend.drag touchcancel.drag",a).style("touch-action","none").style("-webkit-tap-highlight-color","rgba(0,0,0,0)")}function t(){if(!d&&h.apply(this,arguments)){var n=s("mouse",p.apply(this,arguments),cr,this,arguments);n&&(sr(ns.view).on("mousemove.drag",e,!0).on("mouseup.drag",i,!0),fr(ns.view),hr(),c=!1,l=ns.clientX,u=ns.clientY,n("start"))}}function e(){if(pr(),!c){var n=ns.clientX-l,t=ns.clientY-u;c=n*n+t*t>v}m.mouse("drag")}function i(){sr(ns.view).on("mousemove.drag mouseup.drag",null),gr(ns.view,c),pr(),m.mouse("end")}function r(){if(h.apply(this,arguments)){var n,t,e=ns.changedTouches,i=p.apply(this,arguments),r=e.length;for(n=0;n<r;++n)(t=s(e[n].identifier,i,dr,this,arguments))&&(hr(),t("start"))}}function o(){var n,t,e=ns.changedTouches,i=e.length;for(n=0;n<i;++n)(t=m[e[n].identifier])&&(pr(),t("drag"))}function a(){var n,t,e=ns.changedTouches,i=e.length;for(d&&clearTimeout(d),d=setTimeout(function(){d=null},500),n=0;n<i;++n)(t=m[e[n].identifier])&&(hr(),t("end"))}function s(t,e,i,r,o){var a,s,l,u=i(e,t),c=b.copy();if(nr(new br(n,"beforestart",a,t,y,u[0],u[1],0,0,c),function(){return null!=(ns.subject=a=f.apply(r,o))&&(s=a.x-u[0]||0,l=a.y-u[1]||0,!0)}))return function d(h){var p,f=u;switch(h){case"start":m[t]=d,p=y++;break;case"end":delete m[t],--y;case"drag":u=i(e,t),p=y}nr(new br(n,h,a,t,p,u[0]+s,u[1]+l,u[0]-f[0],u[1]-f[1],c),c.apply,c,[h,r,o])}}var l,u,c,d,h=yr,p=vr,f=wr,g=xr,m={},b=ve("start","drag","end"),y=0,v=0;return n.filter=function(t){return arguments.length?(h="function"==typeof t?t:mr(!!t),n):h},n.container=function(t){return arguments.length?(p="function"==typeof t?t:mr(t),n):p},n.subject=function(t){return arguments.length?(f="function"==typeof t?t:mr(t),n):f},n.touchable=function(t){return arguments.length?(g="function"==typeof t?t:mr(!!t),n):g},n.on=function(){var t=b.on.apply(b,arguments);return t===b?n:t},n.clickDistance=function(t){return arguments.length?(v=(t=+t)*t,n):Math.sqrt(v)},n}
+// Copyright 2018 The Distill Template Authors
+function Sr(n){let t=ls;"undefined"!=typeof n.githubUrl&&(t+='\n    <h3 id="updates-and-corrections">Updates and Corrections</h3>\n    <p>',n.githubCompareUpdatesUrl&&(t+=`<a href="${n.githubCompareUpdatesUrl}">View all changes</a> to this article since it was first published.`),t+=`\n    If you see mistakes or want to suggest changes, please <a href="${n.githubUrl+"/issues/new"}">create an issue on GitHub</a>. </p>\n    `);const e=n.journal;return void 0!==e&&"Distill"===e.title&&(t+=`\n    <h3 id="reuse">Reuse</h3>\n    <p>Diagrams and text are licensed under Creative Commons Attribution <a href="https://creativecommons.org/licenses/by/4.0/">CC-BY 4.0</a> with the <a class="github" href="${n.githubUrl}">source available on GitHub</a>, unless noted otherwise. The figures that have been reused from other sources don\u2019t fall under this license and can be recognized by a note in their caption: \u201cFigure from \u2026\u201d.</p>\n    `),"undefined"!=typeof n.publishedDate&&(t+=`\n    <h3 id="citation">Citation</h3>\n    <p>For attribution in academic contexts, please cite this work as</p>\n    <pre class="citation short">${n.concatenatedAuthors}, "${n.title}", Distill, ${n.publishedYear}.</pre>\n    <p>BibTeX citation</p>\n    <pre class="citation long">${v(n)}</pre>\n    `),t}const Mr=["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],Tr=["Jan.","Feb.","March","April","May","June","July","Aug.","Sept.","Oct.","Nov.","Dec."],_r=n=>n<10?"0"+n:n,Cr=function(n){return`${Mr[n.getDay()].substring(0,3)}, ${_r(n.getDate())} ${Tr[n.getMonth()].substring(0,3)} ${n.getFullYear().toString()} ${n.getUTCHours().toString()}:${n.getUTCMinutes().toString()}:${n.getUTCSeconds().toString()} Z`},Ar=function(n){return Array.from(n).reduce((n,[t,e])=>Object.assign(n,{[t]:e}),{})},Er=function(n){const t=new Map;for(var e in n)n.hasOwnProperty(e)&&t.set(e,n[e]);return t};class Nr{constructor(n){this.name=n.author,this.personalURL=n.authorURL,this.affiliation=n.affiliation,this.affiliationURL=n.affiliationURL,this.affiliations=n.affiliations||[]}get firstName(){const n=this.name.split(" ");return n.slice(0,n.length-1).join(" ")}get lastName(){const n=this.name.split(" ");return n[n.length-1]}}class Lr{constructor(){this.title="unnamed article",this.description="",this.authors=[],this.bibliography=new Map,this.bibliographyParsed=!1,this.citations=[],this.citationsCollected=!1,this.journal={},this.katex={},this.doi=undefined,this.publishedDate=undefined}set url(n){this._url=n}get url(){return this._url?this._url:this.distillPath&&this.journal.url?this.journal.url+"/"+this.distillPath:this.journal.url?this.journal.url:void 0}get githubUrl(){return this.githubPath?"https://github.com/"+this.githubPath:undefined}set previewURL(n){this._previewURL=n}get previewURL(){return this._previewURL?this._previewURL:this.url+"/thumbnail.jpg"}get publishedDateRFC(){return Cr(this.publishedDate)}get updatedDateRFC(){return Cr(this.updatedDate)}get publishedYear(){return this.publishedDate.getFullYear()}get publishedMonth(){return Tr[this.publishedDate.getMonth()]}get publishedDay(){return this.publishedDate.getDate()}get publishedMonthPadded(){return _r(this.publishedDate.getMonth()+1)}get publishedDayPadded(){return _r(this.publishedDate.getDate())}get publishedISODateOnly(){return this.publishedDate.toISOString().split("T")[0]}get volume(){const n=this.publishedYear-2015;if(n<1)throw new Error("Invalid publish date detected during computing volume");return n}get issue(){return this.publishedDate.getMonth()+1}get concatenatedAuthors(){return this.authors.length>2?this.authors[0].lastName+", et al.":2===this.authors.length?this.authors[0].lastName+" & "+this.authors[1].lastName:1===this.authors.length?this.authors[0].lastName:void 0}get bibtexAuthors(){return this.authors.map(n=>n.lastName+", "+n.firstName).join(" and ")}get slug(){let n="";return this.authors.length&&(n+=this.authors[0].lastName.toLowerCase(),n+=this.publishedYear,n+=this.title.split(" ")[0].toLowerCase()),n||"Untitled"}get bibliographyEntries(){return new Map(this.citations.map(n=>{return[n,this.bibliography.get(n)]}))}set bibliography(n){n instanceof Map?this._bibliography=n:"object"==typeof n&&(this._bibliography=Er(n))}get bibliography(){return this._bibliography}static fromObject(n){const t=new Lr;return Object.assign(t,n),t}assignToObject(n){Object.assign(n,this),n.bibliography=Ar(this.bibliographyEntries),n.url=this.url,n.doi=this.doi,n.githubUrl=this.githubUrl,n.previewURL=this.previewURL,this.publishedDate&&(n.volume=this.volume,n.issue=this.issue,n.publishedDateRFC=this.publishedDateRFC,n.publishedYear=this.publishedYear,n.publishedMonth=this.publishedMonth,n.publishedDay=this.publishedDay,n.publishedMonthPadded=this.publishedMonthPadded,n.publishedDayPadded=this.publishedDayPadded),this.updatedDate&&(n.updatedDateRFC=this.updatedDateRFC),n.concatenatedAuthors=this.concatenatedAuthors,n.bibtexAuthors=this.bibtexAuthors,n.slug=this.slug}}
+// Copyright 2018 The Distill Template Authors
+const Dr=n=>(class extends n{constructor(){super();const n={childList:!0,characterData:!0,subtree:!0},t=new MutationObserver(()=>{t.disconnect(),this.renderIfPossible(),t.observe(this,n)});t.observe(this,n)}connectedCallback(){super.connectedCallback(),this.renderIfPossible()}renderIfPossible(){this.textContent&&this.root&&this.renderContent()}renderContent(){console.error(`Your class ${this.constructor.name} must provide a custom renderContent() method!`)}}),Or=(n,t,e=!0)=>i=>{const r=document.createElement("template");return r.innerHTML=t,e&&"ShadyCSS"in window&&ShadyCSS.prepareTemplate(r,n),class extends i{static get is(){return n}constructor(){super(),this.clone=document.importNode(r.content,!0),e&&(this.attachShadow({mode:"open"}),this.shadowRoot.appendChild(this.clone))}connectedCallback(){this.hasAttribute("distill-prerendered")||(e?"ShadyCSS"in window&&ShadyCSS.styleElement(this):this.insertBefore(this.clone,this.firstChild))}get root(){return e?this.shadowRoot:this}$(n){return this.root.querySelector(n)}$$(n){return this.root.querySelectorAll(n)}}};
+// Copyright 2018 The Distill Template Authors
+var Ir='/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nspan.katex-display {\n  text-align: left;\n  padding: 8px 0 8px 0;\n  margin: 0.5em 0 0.5em 1em;\n}\n\nspan.katex {\n  -webkit-font-smoothing: antialiased;\n  color: rgba(0, 0, 0, 0.8);\n  font-size: 1.18em;\n}\n';
+// Copyright 2018 The Distill Template Authors
+const Fr=function(n,t,e){let i=e,r=0;const o=n.length;for(;i<t.length;){const e=t[i];if(r<=0&&t.slice(i,i+o)===n)return i;"\\"===e?i++:"{"===e?r++:"}"===e&&r--,i++}return-1},Rr=function(n,t,e,i){const r=[];for(let o=0;o<n.length;o++)if("text"===n[o].type){const a=n[o].data;let s,l=!0,u=0;for(-1!==(s=a.indexOf(t))&&(u=s,r.push({type:"text",data:a.slice(0,u)}),l=!1);;){if(l){if(-1===(s=a.indexOf(t,u)))break;r.push({type:"text",data:a.slice(u,s)}),u=s}else{if(-1===(s=Fr(e,a,u+t.length)))break;r.push({type:"math",data:a.slice(u+t.length,s),rawData:a.slice(u,s+e.length),display:i}),u=s+e.length}l=!l}r.push({type:"text",data:a.slice(u)})}else r.push(n[o]);return r},Ur=function(n,t){let e=[{type:"text",data:n}];for(let n=0;n<t.length;n++){const i=t[n];e=Rr(e,i.left,i.right,i.display||!1)}return e},$r=function(n,t){const e=Ur(n,t.delimiters),i=document.createDocumentFragment();for(let n=0;n<e.length;n++)if("text"===e[n].type)i.appendChild(document.createTextNode(e[n].data));else{const o=document.createElement("d-math"),a=e[n].data;t.displayMode=e[n].display;try{o.textContent=a,t.displayMode&&o.setAttribute("block","")}catch(r){if(!(r instanceof katex.ParseError))throw r;t.errorCallback("KaTeX auto-render: Failed to parse `"+e[n].data+"` with ",r),i.appendChild(document.createTextNode(e[n].rawData));continue}i.appendChild(o)}return i},Pr=function(n,t){for(let e=0;e<n.childNodes.length;e++){const i=n.childNodes[e];if(3===i.nodeType){const r=i.textContent;if(t.mightHaveMath(r)){const o=$r(r,t);e+=o.childNodes.length-1,n.replaceChild(o,i)}}else if(1===i.nodeType){-1===t.ignoredTags.indexOf(i.nodeName.toLowerCase())&&Pr(i,t)}}},Hr={delimiters:[{left:"$$",right:"$$",display:!0},{left:"\\[",right:"\\]",display:!0},{left:"\\(",right:"\\)",display:!1}],ignoredTags:["script","noscript","style","textarea","pre","code","svg"],errorCallback:function(n,t){console.error(n,t)}},zr=function(n,t){if(!n)throw new Error("No element provided to render");const e=Object.assign({},Hr,t),i=e.delimiters.flatMap(n=>[n.left,n.right]),r=n=>i.some(t=>-1!==n.indexOf(t));e.mightHaveMath=r,Pr(n,e)},qr="https://distill.pub/third-party/katex/katex.min.js",jr='<link rel="stylesheet" href="https://distill.pub/third-party/katex/katex.min.css" crossorigin="anonymous">',Br=Or("d-math",`\n${jr}\n<style>\n\n:host {\n  display: inline-block;\n  contain: style;\n}\n\n:host([block]) {\n  display: block;\n}\n\n${Ir}\n</style>\n<span id='katex-container'></span>\n`);class Yr extends(Dr(Br(HTMLElement))){static set katexOptions(n){Yr._katexOptions=n,Yr.katexOptions.delimiters&&(Yr.katexAdded?Yr.katexLoadedCallback():Yr.addKatex())}static get katexOptions(){return Yr._katexOptions||(Yr._katexOptions={delimiters:[{left:"$$",right:"$$",display:!1}]}),Yr._katexOptions}static katexLoadedCallback(){const n=document.querySelectorAll("d-math");for(const t of n)t.renderContent();Yr.katexOptions.delimiters&&zr(document.body,Yr.katexOptions)}static addKatex(){document.head.insertAdjacentHTML("beforeend",jr);const n=document.createElement("script");n.src=qr,n.async=!0,n.onload=Yr.katexLoadedCallback,n.crossorigin="anonymous",document.head.appendChild(n),Yr.katexAdded=!0}get options(){const n={displayMode:this.hasAttribute("block")};return Object.assign(n,Yr.katexOptions)}connectedCallback(){super.connectedCallback(),Yr.katexAdded||Yr.addKatex()}renderContent(){if("undefined"!=typeof katex){const n=this.root.querySelector("#katex-container");katex.render(this.textContent,n,this.options)}}}Yr.katexAdded=!1,Yr.inlineMathRendered=!1,window.DMath=Yr;class Wr extends HTMLElement{static get is(){return"d-front-matter"}constructor(){super();const n={childList:!0,characterData:!0,subtree:!0};new MutationObserver(n=>{for(const t of n)if("SCRIPT"===t.target.nodeName||"characterData"===t.type){const n=d(this);this.notify(n)}}).observe(this,n)}notify(n){const t=new CustomEvent("onFrontMatterChanged",{detail:n,bubbles:!0});document.dispatchEvent(t)}}const Gr=new Lr,Vr={frontMatter:Gr,waitingOn:{bibliography:[],citations:[]},listeners:{onCiteKeyCreated(n){const[t,e]=n.detail;if(!Gr.citationsCollected)return void Vr.waitingOn.citations.push(()=>Vr.listeners.onCiteKeyCreated(n));if(!Gr.bibliographyParsed)return void Vr.waitingOn.bibliography.push(()=>Vr.listeners.onCiteKeyCreated(n));const i=e.map(n=>Gr.citations.indexOf(n));t.numbers=i;const r=e.map(n=>Gr.bibliography.get(n));t.entries=r},onCiteKeyChanged(){Gr.citations=t(),Gr.citationsCollected=!0;for(const n of Vr.waitingOn.citations.slice())n();const n=document.querySelector("d-citation-list"),e=new Map(Gr.citations.map(n=>[n,Gr.bibliography.get(n)]));n.citations=e;const i=document.querySelectorAll("d-cite");for(const n of i){console.log(n);const t=n.keys,e=t.map(n=>Gr.citations.indexOf(n));n.numbers=e;const i=t.map(n=>Gr.bibliography.get(n));n.entries=i}},onCiteKeyRemoved(n){Vr.listeners.onCiteKeyChanged(n)},onBibliographyChanged(n){const t=document.querySelector("d-citation-list"),e=n.detail;Gr.bibliography=e,Gr.bibliographyParsed=!0;for(const n of Vr.waitingOn.bibliography.slice())n();if(Gr.citationsCollected)if(t.hasAttribute("distill-prerendered"))console.debug("Citation list was prerendered; not updating it.");else{const n=new Map(Gr.citations.map(n=>[n,Gr.bibliography.get(n)]));t.citations=n}else Vr.waitingOn.citations.push(function(){Vr.listeners.onBibliographyChanged({target:n.target,detail:n.detail})})},onFootnoteChanged(){const n=document.querySelector("d-footnote-list");if(n){const t=document.querySelectorAll("d-footnote");n.footnotes=t}},onFrontMatterChanged(t){const e=t.detail;n(Gr,e);const i=document.querySelector("d-interstitial");if(i&&("undefined"!=typeof Gr.password?i.password=Gr.password:i.parentElement.removeChild(i)),!document.body.hasAttribute("distill-prerendered")&&u()){h(document,Gr);const n=document.querySelector("distill-appendix");n&&(n.frontMatter=Gr);const t=document.querySelector("d-byline");t&&(t.frontMatter=Gr),e.katex&&(Yr.katexOptions=e.katex)}},DOMContentLoaded(){if(Vr.loaded)return void console.warn("Controller received DOMContentLoaded but was already loaded!");if(!u())return void console.warn("Controller received DOMContentLoaded at document.readyState: "+document.readyState+"!");Vr.loaded=!0,console.debug("Runlevel 4: Controller running DOMContentLoaded");const n=document.querySelector("d-front-matter");if(n){const t=d(n);Vr.listeners.onFrontMatterChanged({detail:t})}Gr.citations=t(),Gr.citationsCollected=!0;for(const n of Vr.waitingOn.citations.slice())n();if(Gr.bibliographyParsed)for(const n of Vr.waitingOn.bibliography.slice())n();const e=document.querySelector("d-footnote-list");if(e){const n=document.querySelectorAll("d-footnote");e.footnotes=n}}}};
+// Copyright 2018 The Distill Template Authors
+const Kr='/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nhtml {\n  font-size: 14px;\n\tline-height: 1.6em;\n  /* font-family: "Libre Franklin", "Helvetica Neue", sans-serif; */\n  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", Arial, sans-serif;\n  /*, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";*/\n  text-size-adjust: 100%;\n  -ms-text-size-adjust: 100%;\n  -webkit-text-size-adjust: 100%;\n}\n\n@media(min-width: 768px) {\n  html {\n    font-size: 16px;\n  }\n}\n\nbody {\n  margin: 0;\n}\n\na {\n  color: #004276;\n}\n\nfigure {\n  margin: 0;\n}\n\ntable {\n\tborder-collapse: collapse;\n\tborder-spacing: 0;\n}\n\ntable th {\n\ttext-align: left;\n}\n\ntable thead {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.05);\n}\n\ntable thead th {\n  padding-bottom: 0.5em;\n}\n\ntable tbody :first-child td {\n  padding-top: 0.5em;\n}\n\npre {\n  overflow: auto;\n  max-width: 100%;\n}\n\np {\n  margin-top: 0;\n  margin-bottom: 1em;\n}\n\nsup, sub {\n  vertical-align: baseline;\n  position: relative;\n  top: -0.4em;\n  line-height: 1em;\n}\n\nsub {\n  top: 0.4em;\n}\n\n.kicker,\n.marker {\n  font-size: 15px;\n  font-weight: 600;\n  color: rgba(0, 0, 0, 0.5);\n}\n\n\n/* Headline */\n\n@media(min-width: 1024px) {\n  d-title h1 span {\n    display: block;\n  }\n}\n\n/* Figure */\n\nfigure {\n  position: relative;\n  margin-bottom: 2.5em;\n  margin-top: 1.5em;\n}\n\nfigcaption+figure {\n\n}\n\nfigure img {\n  width: 100%;\n}\n\nfigure svg text,\nfigure svg tspan {\n}\n\nfigcaption,\n.figcaption {\n  color: rgba(0, 0, 0, 0.6);\n  font-size: 12px;\n  line-height: 1.5em;\n}\n\n@media(min-width: 1024px) {\nfigcaption,\n.figcaption {\n    font-size: 13px;\n  }\n}\n\nfigure.external img {\n  background: white;\n  border: 1px solid rgba(0, 0, 0, 0.1);\n  box-shadow: 0 1px 8px rgba(0, 0, 0, 0.1);\n  padding: 18px;\n  box-sizing: border-box;\n}\n\nfigcaption a {\n  color: rgba(0, 0, 0, 0.6);\n}\n\nfigcaption b,\nfigcaption strong, {\n  font-weight: 600;\n  color: rgba(0, 0, 0, 1.0);\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n@supports not (display: grid) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    display: block;\n    padding: 8px;\n  }\n}\n\n.base-grid,\ndistill-header,\nd-title,\nd-abstract,\nd-article,\nd-appendix,\ndistill-appendix,\nd-byline,\nd-footnote-list,\nd-citation-list,\ndistill-footer {\n  display: grid;\n  justify-items: stretch;\n  grid-template-columns: [screen-start] 8px [page-start kicker-start text-start gutter-start middle-start] 1fr 1fr 1fr 1fr 1fr 1fr 1fr 1fr [text-end page-end gutter-end kicker-end middle-end] 8px [screen-end];\n  grid-column-gap: 8px;\n}\n\n.grid {\n  display: grid;\n  grid-column-gap: 8px;\n}\n\n@media(min-width: 768px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start middle-start text-start] 45px 45px 45px 45px 45px 45px 45px 45px [ kicker-end text-end gutter-start] 45px [middle-end] 45px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 16px;\n  }\n\n  .grid {\n    grid-column-gap: 16px;\n  }\n}\n\n@media(min-width: 1000px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start] 50px [middle-start] 50px [text-start kicker-end] 50px 50px 50px 50px 50px 50px 50px 50px [text-end gutter-start] 50px [middle-end] 50px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 16px;\n  }\n\n  .grid {\n    grid-column-gap: 16px;\n  }\n}\n\n@media(min-width: 1180px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start] 60px [middle-start] 60px [text-start kicker-end] 60px 60px 60px 60px 60px 60px 60px 60px [text-end gutter-start] 60px [middle-end] 60px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 32px;\n  }\n\n  .grid {\n    grid-column-gap: 32px;\n  }\n}\n\n\n\n\n.base-grid {\n  grid-column: screen;\n}\n\n/* .l-body,\nd-article > *  {\n  grid-column: text;\n}\n\n.l-page,\nd-title > *,\nd-figure {\n  grid-column: page;\n} */\n\n.l-gutter {\n  grid-column: gutter;\n}\n\n.l-text,\n.l-body {\n  grid-column: text;\n}\n\n.l-page {\n  grid-column: page;\n}\n\n.l-body-outset {\n  grid-column: middle;\n}\n\n.l-page-outset {\n  grid-column: page;\n}\n\n.l-screen {\n  grid-column: screen;\n}\n\n.l-screen-inset {\n  grid-column: screen;\n  padding-left: 16px;\n  padding-left: 16px;\n}\n\n\n/* Aside */\n\nd-article aside {\n  grid-column: gutter;\n  font-size: 12px;\n  line-height: 1.6em;\n  color: rgba(0, 0, 0, 0.6)\n}\n\n@media(min-width: 768px) {\n  aside {\n    grid-column: gutter;\n  }\n\n  .side {\n    grid-column: gutter;\n  }\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-title {\n  padding: 2rem 0 1.5rem;\n  contain: layout style;\n  overflow-x: hidden;\n}\n\n@media(min-width: 768px) {\n  d-title {\n    padding: 4rem 0 1.5rem;\n  }\n}\n\nd-title h1 {\n  grid-column: text;\n  font-size: 40px;\n  font-weight: 700;\n  line-height: 1.1em;\n  margin: 0 0 0.5rem;\n}\n\n@media(min-width: 768px) {\n  d-title h1 {\n    font-size: 50px;\n  }\n}\n\nd-title p {\n  font-weight: 300;\n  font-size: 1.2rem;\n  line-height: 1.55em;\n  grid-column: text;\n}\n\nd-title .status {\n  margin-top: 0px;\n  font-size: 12px;\n  color: #009688;\n  opacity: 0.8;\n  grid-column: kicker;\n}\n\nd-title .status span {\n  line-height: 1;\n  display: inline-block;\n  padding: 6px 0;\n  border-bottom: 1px solid #80cbc4;\n  font-size: 11px;\n  text-transform: uppercase;\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-byline {\n  contain: style;\n  overflow: hidden;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  font-size: 0.8rem;\n  line-height: 1.8em;\n  padding: 1.5rem 0;\n  min-height: 1.8em;\n}\n\n\nd-byline .byline {\n  grid-template-columns: 1fr 1fr;\n  grid-column: text;\n}\n\n@media(min-width: 768px) {\n  d-byline .byline {\n    grid-template-columns: 1fr 1fr 1fr 1fr;\n  }\n}\n\nd-byline .authors-affiliations {\n  grid-column-end: span 2;\n  grid-template-columns: 1fr 1fr;\n  margin-bottom: 1em;\n}\n\n@media(min-width: 768px) {\n  d-byline .authors-affiliations {\n    margin-bottom: 0;\n  }\n}\n\nd-byline h3 {\n  font-size: 0.6rem;\n  font-weight: 400;\n  color: rgba(0, 0, 0, 0.5);\n  margin: 0;\n  text-transform: uppercase;\n}\n\nd-byline p {\n  margin: 0;\n}\n\nd-byline a,\nd-article d-byline a {\n  color: rgba(0, 0, 0, 0.8);\n  text-decoration: none;\n  border-bottom: none;\n}\n\nd-article d-byline a:hover {\n  text-decoration: underline;\n  border-bottom: none;\n}\n\nd-byline p.author {\n  font-weight: 500;\n}\n\nd-byline .affiliations {\n\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-article {\n  contain: layout style;\n  overflow-x: hidden;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  padding-top: 2rem;\n  color: rgba(0, 0, 0, 0.8);\n}\n\nd-article > * {\n  grid-column: text;\n}\n\n@media(min-width: 768px) {\n  d-article {\n    font-size: 16px;\n  }\n}\n\n@media(min-width: 1024px) {\n  d-article {\n    font-size: 1.06rem;\n    line-height: 1.7em;\n  }\n}\n\n\n/* H2 */\n\n\nd-article .marker {\n  text-decoration: none;\n  border: none;\n  counter-reset: section;\n  grid-column: kicker;\n  line-height: 1.7em;\n}\n\nd-article .marker:hover {\n  border: none;\n}\n\nd-article .marker span {\n  padding: 0 3px 4px;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n  position: relative;\n  top: 4px;\n}\n\nd-article .marker:hover span {\n  color: rgba(0, 0, 0, 0.7);\n  border-bottom: 1px solid rgba(0, 0, 0, 0.7);\n}\n\nd-article h2 {\n  font-weight: 600;\n  font-size: 24px;\n  line-height: 1.25em;\n  margin: 2rem 0 1.5rem 0;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n  padding-bottom: 1rem;\n}\n\n@media(min-width: 1024px) {\n  d-article h2 {\n    font-size: 36px;\n  }\n}\n\n/* H3 */\n\nd-article h3 {\n  font-weight: 700;\n  font-size: 18px;\n  line-height: 1.4em;\n  margin-bottom: 1em;\n  margin-top: 2em;\n}\n\n@media(min-width: 1024px) {\n  d-article h3 {\n    font-size: 20px;\n  }\n}\n\n/* H4 */\n\nd-article h4 {\n  font-weight: 600;\n  text-transform: uppercase;\n  font-size: 14px;\n  line-height: 1.4em;\n}\n\nd-article a {\n  color: inherit;\n}\n\nd-article p,\nd-article ul,\nd-article ol,\nd-article blockquote {\n  margin-top: 0;\n  margin-bottom: 1em;\n  margin-left: 0;\n  margin-right: 0;\n}\n\nd-article blockquote {\n  border-left: 2px solid rgba(0, 0, 0, 0.2);\n  padding-left: 2em;\n  font-style: italic;\n  color: rgba(0, 0, 0, 0.6);\n}\n\nd-article a {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.4);\n  text-decoration: none;\n}\n\nd-article a:hover {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.8);\n}\n\nd-article .link {\n  text-decoration: underline;\n  cursor: pointer;\n}\n\nd-article ul,\nd-article ol {\n  padding-left: 24px;\n}\n\nd-article li {\n  margin-bottom: 1em;\n  margin-left: 0;\n  padding-left: 0;\n}\n\nd-article li:last-child {\n  margin-bottom: 0;\n}\n\nd-article pre {\n  font-size: 14px;\n  margin-bottom: 20px;\n}\n\nd-article hr {\n  grid-column: screen;\n  width: 100%;\n  border: none;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n  margin-top: 60px;\n  margin-bottom: 60px;\n}\n\nd-article section {\n  margin-top: 60px;\n  margin-bottom: 60px;\n}\n\nd-article span.equation-mimic {\n  font-family: georgia;\n  font-size: 115%;\n  font-style: italic;\n}\n\nd-article > d-code,\nd-article section > d-code  {\n  display: block;\n}\n\nd-article > d-math[block],\nd-article section > d-math[block]  {\n  display: block;\n}\n\n@media (max-width: 768px) {\n  d-article > d-code,\n  d-article section > d-code,\n  d-article > d-math[block],\n  d-article section > d-math[block] {\n      overflow-x: scroll;\n      -ms-overflow-style: none;  // IE 10+\n      overflow: -moz-scrollbars-none;  // Firefox\n  }\n\n  d-article > d-code::-webkit-scrollbar,\n  d-article section > d-code::-webkit-scrollbar,\n  d-article > d-math[block]::-webkit-scrollbar,\n  d-article section > d-math[block]::-webkit-scrollbar {\n    display: none;  // Safari and Chrome\n  }\n}\n\nd-article .citation {\n  color: #668;\n  cursor: pointer;\n}\n\nd-include {\n  width: auto;\n  display: block;\n}\n\nd-figure {\n  contain: layout style;\n}\n\n/* KaTeX */\n\n.katex, .katex-prerendered {\n  contain: style;\n  display: inline-block;\n}\n\n/* Tables */\n\nd-article table {\n  border-collapse: collapse;\n  margin-bottom: 1.5rem;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n}\n\nd-article table th {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n}\n\nd-article table td {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.05);\n}\n\nd-article table tr:last-of-type td {\n  border-bottom: none;\n}\n\nd-article table th,\nd-article table td {\n  font-size: 15px;\n  padding: 2px 8px;\n}\n\nd-article table tbody :first-child td {\n  padding-top: 2px;\n}\n'+Ir+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n@media print {\n\n  @page {\n    size: 8in 11in;\n    @bottom-right {\n      content: counter(page) " of " counter(pages);\n    }\n  }\n\n  html {\n    /* no general margins -- CSS Grid takes care of those */\n  }\n\n  p, code {\n    page-break-inside: avoid;\n  }\n\n  h2, h3 {\n    page-break-after: avoid;\n  }\n\n  d-header {\n    visibility: hidden;\n  }\n\n  d-footer {\n    display: none!important;\n  }\n\n}\n',Xr=[{name:"WebComponents",support:function(){return"customElements"in window&&"attachShadow"in Element.prototype&&"getRootNode"in Element.prototype&&"content"in document.createElement("template")&&"Promise"in window&&"from"in Array},url:"https://distill.pub/third-party/polyfills/webcomponents-lite.js"},{name:"IntersectionObserver",support:function(){return"IntersectionObserver"in window&&"IntersectionObserverEntry"in window},url:"https://distill.pub/third-party/polyfills/intersection-observer.js"}];class Zr{static browserSupportsAllFeatures(){return Xr.every(n=>n.support())}static load(n){const t=function(t){t.loaded=!0,console.debug("Runlevel 0: Polyfill has finished loading: "+t.name),Zr.neededPolyfills.every(n=>n.loaded)&&(console.debug("Runlevel 0: All required polyfills have finished loading."),console.debug("Runlevel 0->1."),window.distillRunlevel=1,n())};for(const n of Zr.neededPolyfills)f(n,t)}static get neededPolyfills(){return Zr._neededPolyfills||(Zr._neededPolyfills=Xr.filter(n=>!n.support())),Zr._neededPolyfills}}const Qr=Or("d-abstract",`\n<style>\n  :host {\n    font-size: 1.25rem;\n    line-height: 1.6em;\n    color: rgba(0, 0, 0, 0.7);\n    -webkit-font-smoothing: antialiased;\n  }\n\n  ::slotted(p) {\n    margin-top: 0;\n    margin-bottom: 1em;\n    grid-column: text-start / middle-end;\n  }\n  ${g("d-abstract")}\n</style>\n\n<slot></slot>\n`);class Jr extends(Qr(HTMLElement)){}
+// Copyright 2018 The Distill Template Authors
+const no=Or("d-appendix","\n<style>\n\nd-appendix {\n  contain: layout style;\n  font-size: 0.8em;\n  line-height: 1.7em;\n  margin-top: 60px;\n  margin-bottom: 0;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  color: rgba(0,0,0,0.5);\n  padding-top: 60px;\n  padding-bottom: 48px;\n}\n\nd-appendix h3 {\n  grid-column: page-start / text-start;\n  font-size: 15px;\n  font-weight: 500;\n  margin-top: 1em;\n  margin-bottom: 0;\n  color: rgba(0,0,0,0.65);\n}\n\nd-appendix h3 + * {\n  margin-top: 1em;\n}\n\nd-appendix ol {\n  padding: 0 0 0 15px;\n}\n\n@media (min-width: 768px) {\n  d-appendix ol {\n    padding: 0 0 0 30px;\n    margin-left: -30px;\n  }\n}\n\nd-appendix li {\n  margin-bottom: 1em;\n}\n\nd-appendix a {\n  color: rgba(0, 0, 0, 0.6);\n}\n\nd-appendix > * {\n  grid-column: text;\n}\n\nd-appendix > d-footnote-list,\nd-appendix > d-citation-list,\nd-appendix > distill-appendix {\n  grid-column: screen;\n}\n\n</style>\n\n",!1);class to extends(no(HTMLElement)){}
+// Copyright 2018 The Distill Template Authors
+const eo=/^\s*$/;class io extends HTMLElement{static get is(){return"d-article"}constructor(){super(),new MutationObserver(n=>{for(const t of n)for(const n of t.addedNodes)switch(n.nodeName){case"#text":{const t=n.nodeValue;if(!eo.test(t)){console.warn("Use of unwrapped text in distill articles is discouraged as it breaks layout! Please wrap any text in a <span> or <p> tag. We found the following text: "+t);const e=document.createElement("span");e.innerHTML=n.nodeValue,n.parentNode.insertBefore(e,n),n.parentNode.removeChild(n)}}}}).observe(this,{childList:!0})}}var ro="undefined"!=typeof globalThis?globalThis:"undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},oo=m(function(n,t){!function(n){function t(){this.months=["jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"],this.notKey=[",","{","}"," ","="],this.pos=0,this.input="",this.entries=new Array,this.currentEntry="",this.setInput=function(n){this.input=n},this.getEntries=function(){return this.entries},this.isWhitespace=function(n){return" "==n||"\r"==n||"\t"==n||"\n"==n},this.match=function(n,t){if(t!=undefined&&null!=t||(t=!0),this.skipWhitespace(t),this.input.substring(this.pos,this.pos+n.length)!=n)throw"Token mismatch, expected "+n+", found "+this.input.substring(this.pos);this.pos+=n.length,this.skipWhitespace(t)},this.tryMatch=function(n,t){return t!=undefined&&null!=t||(t=!0),this.skipWhitespace(t),this.input.substring(this.pos,this.pos+n.length)==n},this.matchAt=function(){for(;this.input.length>this.pos&&"@"!=this.input[this.pos];)this.pos++;return"@"==this.input[this.pos]},this.skipWhitespace=function(n){for(;this.isWhitespace(this.input[this.pos]);)this.pos++;if("%"==this.input[this.pos]&&1==n){for(;"\n"!=this.input[this.pos];)this.pos++;this.skipWhitespace(n)}},this.value_braces=function(){var n=0;this.match("{",!1);for(var t=this.pos,e=!1;;){if(!e)if("}"==this.input[this.pos]){if(!(n>0)){var i=this.pos;return this.match("}",!1),this.input.substring(t,i)}n--}else if("{"==this.input[this.pos])n++;else if(this.pos>=this.input.length-1)throw"Unterminated value";e="\\"==this.input[this.pos]&&0==e,this.pos++}},this.value_comment=function(){for(var n="",t=0;!this.tryMatch("}",!1)||0!=t;){if(n+=this.input[this.pos],"{"==this.input[this.pos]&&t++,"}"==this.input[this.pos]&&t--,this.pos>=this.input.length-1)throw"Unterminated value:"+this.input.substring(start);this.pos++}return n},this.value_quotes=function(){this.match('"',!1);for(var n=this.pos,t=!1;;){if(!t){if('"'==this.input[this.pos]){var e=this.pos;return this.match('"',!1),this.input.substring(n,e)}if(this.pos>=this.input.length-1)throw"Unterminated value:"+this.input.substring(n)}t="\\"==this.input[this.pos]&&0==t,this.pos++}},this.single_value=function(){var n=this.pos;if(this.tryMatch("{"))return this.value_braces();if(this.tryMatch('"'))return this.value_quotes();var t=this.key();if(t.match("^[0-9]+$"))return t;if(this.months.indexOf(t.toLowerCase())>=0)return t.toLowerCase();throw"Value expected:"+this.input.substring(n)+" for key: "+t},this.value=function(){var n=[];for(n.push(this.single_value());this.tryMatch("#");)this.match("#"),n.push(this.single_value());return n.join("")},this.key=function(){for(var n=this.pos;;){if(this.pos>=this.input.length)throw"Runaway key";if(this.notKey.indexOf(this.input[this.pos])>=0)return this.input.substring(n,this.pos);this.pos++}},this.key_equals_value=function(){var n=this.key();if(this.tryMatch("="))return this.match("="),[n,this.value()];throw"... = value expected, equals sign missing:"+this.input.substring(this.pos)},this.key_value_list=function(){var n=this.key_equals_value();for(this.currentEntry.entryTags={},this.currentEntry.entryTags[n[0]]=n[1];this.tryMatch(",")&&(this.match(","),!this.tryMatch("}"));)n=this.key_equals_value(),this.currentEntry.entryTags[n[0]]=n[1]},this.entry_body=function(n){this.currentEntry={},this.currentEntry.citationKey=this.key(),this.currentEntry.entryType=n.substring(1),this.match(","),this.key_value_list(),this.entries.push(this.currentEntry)},this.directive=function(){return this.match("@"),"@"+this.key()},this.preamble=function(){this.currentEntry={},this.currentEntry.entryType="PREAMBLE",this.currentEntry.entry=this.value_comment(),this.entries.push(this.currentEntry)},this.comment=function(){this.currentEntry={},this.currentEntry.entryType="COMMENT",this.currentEntry.entry=this.value_comment(),this.entries.push(this.currentEntry)},this.entry=function(n){this.entry_body(n)},this.bibtex=function(){for(;this.matchAt();){var n=this.directive();this.match("{"),"@STRING"==n?this.string():"@PREAMBLE"==n?this.preamble():"@COMMENT"==n?this.comment():this.entry(n),this.match("}")}}}n.toJSON=function(n){var e=new t;return e.setInput(n),e.bibtex(),e.entries},n.toBibtex=function(n){var t="";for(var e in n){if(t+="@"+n[e].entryType,t+="{",n[e].citationKey&&(t+=n[e].citationKey+", "),n[e].entry&&(t+=n[e].entry),n[e].entryTags){var i="";for(var r in n[e].entryTags)0!=i.length&&(i+=", "),i+=r+"= {"+n[e].entryTags[r]+"}";t+=i}t+="}\n\n"}return t}}(t)});class ao extends HTMLElement{static get is(){return"d-bibliography"}constructor(){super();const n={childList:!0,characterData:!0,subtree:!0};new MutationObserver(n=>{for(const t of n)"SCRIPT"!==t.target.nodeName&&"characterData"!==t.type||this.parseIfPossible()}).observe(this,n)}connectedCallback(){requestAnimationFrame(()=>{this.parseIfPossible()})}parseIfPossible(){const n=this.querySelector("script");if(n)if("text/bibtex"==n.type){const t=n.textContent;if(this.bibtex!==t){this.bibtex=t;const n=y(this.bibtex);this.notify(n)}}else if("text/json"==n.type){const t=new Map(JSON.parse(n.textContent));this.notify(t)}else console.warn("Unsupported bibliography script tag type: "+n.type)}notify(n){const t=new CustomEvent("onBibliographyChanged",{detail:n,bubbles:!0});this.dispatchEvent(t)}static get observedAttributes(){return["src"]}receivedBibtex(n){const t=y(n.target.response);this.notify(t)}attributeChangedCallback(n,t,e){var i=new XMLHttpRequest;i.onload=(n=>this.receivedBibtex(n)),i.onerror=(()=>console.warn(`Could not load Bibtex! (tried ${e})`)),i.responseType="text",i.open("GET",e,!0),i.send()}}class so extends HTMLElement{static get is(){return"d-byline"}set frontMatter(n){this.innerHTML=w(n)}}
+// Copyright 2018 The Distill Template Authors
+const lo=Or("d-cite",'\n<style>\n\n:host {\n  display: inline-block;\n}\n\n.citation {\n  color: hsla(206, 90%, 20%, 0.7);\n}\n\n.citation-number {\n  cursor: default;\n  white-space: nowrap;\n  font-family: -apple-system, BlinkMacSystemFont, "Roboto", Helvetica, sans-serif;\n  font-size: 75%;\n  color: hsla(206, 90%, 20%, 0.7);\n  display: inline-block;\n  line-height: 1.1em;\n  text-align: center;\n  position: relative;\n  top: -2px;\n  margin: 0 2px;\n}\n\nfigcaption .citation-number {\n  font-size: 11px;\n  font-weight: normal;\n  top: -2px;\n  line-height: 1em;\n}\n\nul {\n  margin: 0;\n  padding: 0;\n  list-style-type: none;\n}\n\nul li {\n  padding: 15px 10px 15px 10px;\n  border-bottom: 1px solid rgba(0,0,0,0.1)\n}\n\nul li:last-of-type {\n  border-bottom: none;\n}\n\n</style>\n\n<d-hover-box id="hover-box"></d-hover-box>\n\n<div id="citation-" class="citation">\n  <span class="citation-number"></span>\n</div>\n');class uo extends(lo(HTMLElement)){constructor(){super(),this._numbers=[],this._entries=[]}connectedCallback(){this.outerSpan=this.root.querySelector("#citation-"),this.innerSpan=this.root.querySelector(".citation-number"),this.hoverBox=this.root.querySelector("d-hover-box"),window.customElements.whenDefined("d-hover-box").then(()=>{this.hoverBox.listen(this)}),this.numbers&&this.displayNumbers(this.numbers),this.entries&&this.displayEntries(this.entries)}static get observedAttributes(){return["key","bibtex-key"]}attributeChangedCallback(n,t,e){const i=t?"onCiteKeyChanged":"onCiteKeyCreated",r=e.split(",").map(n=>n.trim()),o=new CustomEvent(i,{detail:[this,r],bubbles:!0});document.dispatchEvent(o)}set key(n){this.setAttribute("key",n)}get key(){return this.getAttribute("key")||this.getAttribute("bibtex-key")}get keys(){const n=this.key.split(",");return console.log(n),n}set numbers(n){this._numbers=n,this.displayNumbers(n)}get numbers(){return this._numbers}displayNumbers(n){if(!this.innerSpan)return;const t="["+n.map(n=>-1==n?"?":n+1+"").join(", ")+"]";this.innerSpan.textContent=t}set entries(n){this._entries=n,this.displayEntries(n)}get entries(){return this._entries}displayEntries(n){this.hoverBox&&(this.hoverBox.innerHTML=`<ul>\n      ${n.map(l).map(n=>`<li>${n}</li>`).join("\n")}\n    </ul>`)}}
+// Copyright 2018 The Distill Template Authors
+const co="\nd-citation-list {\n  contain: style;\n}\n\nd-citation-list .references {\n  grid-column: text;\n}\n\nd-citation-list .references .title {\n  font-weight: 500;\n}\n";class ho extends HTMLElement{static get is(){return"d-citation-list"}connectedCallback(){this.hasAttribute("distill-prerendered")||(this.style.display="none")}set citations(n){x(this,n)}}var po=m(function(n){var t=function(n){function t(n,t,e,i,r){this.type=n,this.content=t,this.alias=e,this.length=0|(i||"").length,this.greedy=!!r}function e(n,i,a,s,l,u,d){for(var h in a)if(a.hasOwnProperty(h)&&a[h]){var p=a[h];p=Array.isArray(p)?p:[p];for(var f=0;f<p.length;++f){if(d&&d==h+","+f)return;var g=p[f],m=g.inside,b=!!g.lookbehind,y=!!g.greedy,v=0,w=g.alias;if(y&&!g.pattern.global){var x=g.pattern.toString().match(/[imsuy]*$/)[0];g.pattern=RegExp(g.pattern.source,x+"g")}g=g.pattern||g;for(var k=s.next,S=l;k!==i.tail;S+=k.value.length,k=k.next){var M=k.value;if(i.length>n.length)return;if(!(M instanceof t)){var T=1;if(y&&k!=i.tail.prev){if(g.lastIndex=S,!(N=g.exec(n)))break;var _=N.index+(b&&N[1]?N[1].length:0),C=N.index+N[0].length,A=S;for(A+=k.value.length;_>=A;)A+=(k=k.next).value.length;if(S=A-=k.value.length,k.value instanceof t)continue;for(var E=k;E!==i.tail&&(A<C||"string"==typeof E.value&&!E.prev.value.greedy);E=E.next)T++,A+=E.value.length;T--,M=n.slice(S,A),N.index-=S}else{g.lastIndex=0;var N=g.exec(M)}if(N){b&&(v=N[1]?N[1].length:0);C=(_=N.index+v)+(N=N[0].slice(v)).length;var L=M.slice(0,_),D=M.slice(C),O=k.prev;if(L&&(O=r(i,O,L),S+=L.length),o(i,O,T),k=r(i,O,new t(h,m?c.tokenize(N,m):N,w,N,y)),D&&r(i,k,D),T>1&&e(n,i,a,k.prev,S,!0,h+","+f),u)break}else if(u)break}}}}}function i(){var n={value:null,prev:null,next:null},t={value:null,prev:n,next:null};n.next=t,this.head=n,this.tail=t,this.length=0}function r(n,t,e){var i=t.next,r={value:e,prev:t,next:i};return t.next=r,i.prev=r,n.length++,r}function o(n,t,e){for(var i=t.next,r=0;r<e&&i!==n.tail;r++)i=i.next;t.next=i,i.prev=t,n.length-=r}function a(n){for(var t=[],e=n.head.next;e!==n.tail;)t.push(e.value),e=e.next;return t}function s(){c.manual||c.highlightAll()}var l=/\blang(?:uage)?-([\w-]+)\b/i,u=0,c={manual:n.Prism&&n.Prism.manual,disableWorkerMessageHandler:n.Prism&&n.Prism.disableWorkerMessageHandler,util:{encode:function p(n){return n instanceof t?new t(n.type,p(n.content),n.alias):Array.isArray(n)?n.map(p):n.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/\u00a0/g," ")},type:function(n){return Object.prototype.toString.call(n).slice(8,-1)},objId:function(n){return n.__id||Object.defineProperty(n,"__id",{value:++u}),n.__id},clone:function f(n,t){var e,i,r=c.util.type(n);switch(t=t||{},r){case"Object":if(i=c.util.objId(n),t[i])return t[i];for(var o in e={},t[i]=e,n)n.hasOwnProperty(o)&&(e[o]=f(n[o],t));return e;case"Array":return i=c.util.objId(n),t[i]?t[i]:(e=[],t[i]=e,n.forEach(function(n,i){e[i]=f(n,t)}),e);default:return n}},getLanguage:function(n){for(;n&&!l.test(n.className);)n=n.parentElement;return n?(n.className.match(l)||[,"none"])[1].toLowerCase():"none"},currentScript:function(){if("undefined"==typeof document)return null;if("currentScript"in document)return document.currentScript;try{throw new Error}catch(i){var n=(/at [^(\r\n]*\((.*):.+:.+\)$/i.exec(i.stack)||[])[1];if(n){var t=document.getElementsByTagName("script");for(var e in t)if(t[e].src==n)return t[e]}return null}}},languages:{extend:function(n,t){var e=c.util.clone(c.languages[n]);for(var i in t)e[i]=t[i];return e},insertBefore:function(n,t,e,i){var r=(i=i||c.languages)[n],o={};for(var a in r)if(r.hasOwnProperty(a)){if(a==t)for(var s in e)e.hasOwnProperty(s)&&(o[s]=e[s]);e.hasOwnProperty(a)||(o[a]=r[a])}var l=i[n];return i[n]=o,c.languages.DFS(c.languages,function(t,e){e===l&&t!=n&&(this[t]=o)}),o},DFS:function g(n,t,e,i){i=i||{};var r=c.util.objId;for(var o in n)if(n.hasOwnProperty(o)){t.call(n,o,n[o],e||o);var a=n[o],s=c.util.type(a);"Object"!==s||i[r(a)]?"Array"!==s||i[r(a)]||(i[r(a)]=!0,g(a,t,o,i)):(i[r(a)]=!0,g(a,t,null,i))}}},plugins:{},highlightAll:function(n,t){c.highlightAllUnder(document,n,t)},highlightAllUnder:function(n,t,e){var i={callback:e,container:n,selector:'code[class*="language-"], [class*="language-"] code, code[class*="lang-"], [class*="lang-"] code'};c.hooks.run("before-highlightall",i),i.elements=Array.prototype.slice.apply(i.container.querySelectorAll(i.selector)),c.hooks.run("before-all-elements-highlight",i);for(var r,o=0;r=i.elements[o++];)c.highlightElement(r,!0===t,i.callback)},highlightElement:function(t,e,i){function r(n){u.highlightedCode=n,c.hooks.run("before-insert",u),u.element.innerHTML=u.highlightedCode,c.hooks.run("after-highlight",u),c.hooks.run("complete",u),i&&i.call(u.element)}var o=c.util.getLanguage(t),a=c.languages[o];t.className=t.className.replace(l,"").replace(/\s+/g," ")+" language-"+o;var s=t.parentNode;s&&"pre"===s.nodeName.toLowerCase()&&(s.className=s.className.replace(l,"").replace(/\s+/g," ")+" language-"+o);var u={element:t,language:o,grammar:a,code:t.textContent};if(c.hooks.run("before-sanity-check",u),!u.code)return c.hooks.run("complete",u),void(i&&i.call(u.element));if(c.hooks.run("before-highlight",u),u.grammar)if(e&&n.Worker){var d=new Worker(c.filename);d.onmessage=function(n){r(n.data)},d.postMessage(JSON.stringify({language:u.language,code:u.code,immediateClose:!0}))}else r(c.highlight(u.code,u.grammar,u.language));else r(c.util.encode(u.code))},highlight:function(n,e,i){var r={code:n,grammar:e,language:i};return c.hooks.run("before-tokenize",r),r.tokens=c.tokenize(r.code,r.grammar),c.hooks.run("after-tokenize",r),t.stringify(c.util.encode(r.tokens),r.language)},tokenize:function(n,t){var o=t.rest;if(o){for(var s in o)t[s]=o[s];delete t.rest}var l=new i;return r(l,l.head,n),e(n,l,t,l.head,0),a(l)},hooks:{all:{},add:function(n,t){var e=c.hooks.all;e[n]=e[n]||[],e[n].push(t)},run:function(n,t){var e=c.hooks.all[n];if(e&&e.length)for(var i,r=0;i=e[r++];)i(t)}},Token:t};if(n.Prism=c,t.stringify=function m(n,t){if("string"==typeof n)return n;if(Array.isArray(n)){var e="";return n.forEach(function(n){e+=m(n,t)}),e}var i={type:n.type,content:m(n.content,t),tag:"span",classes:["token",n.type],attributes:{},language:t},r=n.alias;r&&(Array.isArray(r)?Array.prototype.push.apply(i.classes,r):i.classes.push(r)),c.hooks.run("wrap",i);var o="";for(var a in i.attributes)o+=" "+a+'="'+(i.attributes[a]||"").replace(/"/g,"&quot;")+'"';return"<"+i.tag+' class="'+i.classes.join(" ")+'"'+o+">"+i.content+"</"+i.tag+">"},!n.document)return n.addEventListener?(c.disableWorkerMessageHandler||n.addEventListener("message",function(t){var e=JSON.parse(t.data),i=e.language,r=e.code,o=e.immediateClose;n.postMessage(c.highlight(r,c.languages[i],i)),o&&n.close()},!1),c):c;var d=c.util.currentScript();if(d&&(c.filename=d.src,d.hasAttribute("data-manual")&&(c.manual=!0)),!c.manual){var h=document.readyState;"loading"===h||"interactive"===h&&d&&d.defer?document.addEventListener("DOMContentLoaded",s):window.requestAnimationFrame?window.requestAnimationFrame(s):window.setTimeout(s,16)}return c}("undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{});n.exports&&(n.exports=t),void 0!==ro&&(ro.Prism=t),t.languages.markup={comment:/<!--[\s\S]*?-->/,prolog:/<\?[\s\S]+?\?>/,doctype:{pattern:/<!DOCTYPE(?:[^>"'[\]]|"[^"]*"|'[^']*')+(?:\[(?:(?!<!--)[^"'\]]|"[^"]*"|'[^']*'|<!--[\s\S]*?-->)*\]\s*)?>/i,greedy:!0},cdata:/<!\[CDATA\[[\s\S]*?]]>/i,tag:{pattern:/<\/?(?!\d)[^\s>\/=$<%]+(?:\s(?:\s*[^\s>\/=]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))|(?=[\s/>])))+)?\s*\/?>/i,greedy:!0,inside:{tag:{pattern:/^<\/?[^\s>\/]+/i,inside:{punctuation:/^<\/?/,namespace:/^[^\s>\/:]+:/}},"attr-value":{pattern:/=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+)/i,inside:{punctuation:[/^=/,{pattern:/^(\s*)["']|["']$/,lookbehind:!0}]}},punctuation:/\/?>/,"attr-name":{pattern:/[^\s>\/]+/,inside:{namespace:/^[^\s>\/:]+:/}}}},entity:/&#?[\da-z]{1,8};/i},t.languages.markup.tag.inside["attr-value"].inside.entity=t.languages.markup.entity,t.hooks.add("wrap",function(n){"entity"===n.type&&(n.attributes.title=n.content.replace(/&amp;/,"&"))}),Object.defineProperty(t.languages.markup.tag,"addInlined",{value:function(n,e){var i={};i["language-"+e]={pattern:/(^<!\[CDATA\[)[\s\S]+?(?=\]\]>$)/i,lookbehind:!0,inside:t.languages[e]},i.cdata=/^<!\[CDATA\[|\]\]>$/i;var r={"included-cdata":{pattern:/<!\[CDATA\[[\s\S]*?\]\]>/i,inside:i}};r["language-"+e]={pattern:/[\s\S]+/,inside:t.languages[e]};var o={};o[n]={pattern:RegExp(/(<__[\s\S]*?>)(?:<!\[CDATA\[[\s\S]*?\]\]>\s*|[\s\S])*?(?=<\/__>)/.source.replace(/__/g,function(){return n}),"i"),lookbehind:!0,greedy:!0,inside:r},t.languages.insertBefore("markup","cdata",o)}}),t.languages.xml=t.languages.extend("markup",{}),t.languages.html=t.languages.markup,t.languages.mathml=t.languages.markup,t.languages.svg=t.languages.markup,function(n){var t=/("|')(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/;n.languages.css={comment:/\/\*[\s\S]*?\*\//,atrule:{pattern:/@[\w-]+[\s\S]*?(?:;|(?=\s*\{))/,inside:{rule:/^@[\w-]+/,"selector-function-argument":{pattern:/(\bselector\s*\((?!\s*\))\s*)(?:[^()]|\((?:[^()]|\([^()]*\))*\))+?(?=\s*\))/,lookbehind:!0,alias:"selector"}}},url:{pattern:RegExp("url\\((?:"+t.source+"|[^\n\r()]*)\\)","i"),greedy:!0,inside:{"function":/^url/i,punctuation:/^\(|\)$/}},selector:RegExp("[^{}\\s](?:[^{};\"']|"+t.source+")*?(?=\\s*\\{)"),string:{pattern:t,greedy:!0},property:/[-_a-z\xA0-\uFFFF][-\w\xA0-\uFFFF]*(?=\s*:)/i,important:/!important\b/i,"function":/[-a-z0-9]+(?=\()/i,punctuation:/[(){};:,]/},n.languages.css.atrule.inside.rest=n.languages.css;var e=n.languages.markup;e&&(e.tag.addInlined("style","css"),n.languages.insertBefore("inside","attr-value",{"style-attr":{pattern:/\s*style=("|')(?:\\[\s\S]|(?!\1)[^\\])*\1/i,inside:{"attr-name":{pattern:/^\s*style/i,inside:e.tag.inside},punctuation:/^\s*=\s*['"]|['"]\s*$/,"attr-value":{pattern:/.+/i,inside:n.languages.css}},alias:"language-css"}},e.tag))}(t),t.languages.clike={comment:[{pattern:/(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,lookbehind:!0},{pattern:/(^|[^\\:])\/\/.*/,lookbehind:!0,greedy:!0}],string:{pattern:/(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,greedy:!0},"class-name":{pattern:/(\b(?:class|interface|extends|implements|trait|instanceof|new)\s+|\bcatch\s+\()[\w.\\]+/i,lookbehind:!0,inside:{punctuation:/[.\\]/}},keyword:/\b(?:if|else|while|do|for|return|in|instanceof|function|new|try|throw|catch|finally|null|break|continue)\b/,boolean:/\b(?:true|false)\b/,"function":/\w+(?=\()/,number:/\b0x[\da-f]+\b|(?:\b\d+\.?\d*|\B\.\d+)(?:e[+-]?\d+)?/i,operator:/[<>]=?|[!=]=?=?|--?|\+\+?|&&?|\|\|?|[?*/~^%]/,punctuation:/[{}[\];(),.:]/},t.languages.javascript=t.languages.extend("clike",{"class-name":[t.languages.clike["class-name"],{pattern:/(^|[^$\w\xA0-\uFFFF])[_$A-Z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\.(?:prototype|constructor))/,lookbehind:!0}],keyword:[{pattern:/((?:^|})\s*)(?:catch|finally)\b/,lookbehind:!0},{pattern:/(^|[^.]|\.\.\.\s*)\b(?:as|async(?=\s*(?:function\b|\(|[$\w\xA0-\uFFFF]|$))|await|break|case|class|const|continue|debugger|default|delete|do|else|enum|export|extends|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)\b/,lookbehind:!0}],number:/\b(?:(?:0[xX](?:[\dA-Fa-f](?:_[\dA-Fa-f])?)+|0[bB](?:[01](?:_[01])?)+|0[oO](?:[0-7](?:_[0-7])?)+)n?|(?:\d(?:_\d)?)+n|NaN|Infinity)\b|(?:\b(?:\d(?:_\d)?)+\.?(?:\d(?:_\d)?)*|\B\.(?:\d(?:_\d)?)+)(?:[Ee][+-]?(?:\d(?:_\d)?)+)?/,"function":/#?[_$a-zA-Z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*(?:\.\s*(?:apply|bind|call)\s*)?\()/,operator:/--|\+\+|\*\*=?|=>|&&|\|\||[!=]==|<<=?|>>>?=?|[-+*/%&|^!=<>]=?|\.{3}|\?[.?]?|[~:]/}),t.languages.javascript["class-name"][0].pattern=/(\b(?:class|interface|extends|implements|instanceof|new)\s+)[\w.\\]+/,t.languages.insertBefore("javascript","keyword",{regex:{pattern:/((?:^|[^$\w\xA0-\uFFFF."'\])\s])\s*)\/(?:\[(?:[^\]\\\r\n]|\\.)*]|\\.|[^/\\\[\r\n])+\/[gimyus]{0,6}(?=(?:\s|\/\*[\s\S]*?\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/,lookbehind:!0,greedy:!0},"function-variable":{pattern:/#?[_$a-zA-Z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*[=:]\s*(?:async\s*)?(?:\bfunction\b|(?:\((?:[^()]|\([^()]*\))*\)|[_$a-zA-Z\xA0-\uFFFF][$\w\xA0-\uFFFF]*)\s*=>))/,alias:"function"},parameter:[{pattern:/(function(?:\s+[_$A-Za-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*)?\s*\(\s*)(?!\s)(?:[^()]|\([^()]*\))+?(?=\s*\))/,lookbehind:!0,inside:t.languages.javascript},{pattern:/[_$a-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*=>)/i,inside:t.languages.javascript},{pattern:/(\(\s*)(?!\s)(?:[^()]|\([^()]*\))+?(?=\s*\)\s*=>)/,lookbehind:!0,inside:t.languages.javascript},{pattern:/((?:\b|\s|^)(?!(?:as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)(?![$\w\xA0-\uFFFF]))(?:[_$A-Za-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*\s*)\(\s*)(?!\s)(?:[^()]|\([^()]*\))+?(?=\s*\)\s*\{)/,lookbehind:!0,inside:t.languages.javascript}],constant:/\b[A-Z](?:[A-Z_]|\dx?)*\b/}),t.languages.insertBefore("javascript","string",{"template-string":{pattern:/`(?:\\[\s\S]|\${(?:[^{}]|{(?:[^{}]|{[^}]*})*})+}|(?!\${)[^\\`])*`/,greedy:!0,inside:{"template-punctuation":{pattern:/^`|`$/,alias:"string"},interpolation:{pattern:/((?:^|[^\\])(?:\\{2})*)\${(?:[^{}]|{(?:[^{}]|{[^}]*})*})+}/,lookbehind:!0,inside:{"interpolation-punctuation":{pattern:/^\${|}$/,alias:"punctuation"},rest:t.languages.javascript}},string:/[\s\S]+/}}}),t.languages.markup&&t.languages.markup.tag.addInlined("script","javascript"),t.languages.js=t.languages.javascript,"undefined"!=typeof self&&self.Prism&&self.document&&document.querySelector&&(self.Prism.fileHighlight=function(n){n=n||document;var e={js:"javascript",py:"python",rb:"ruby",ps1:"powershell",psm1:"powershell",sh:"bash",bat:"batch",h:"c",tex:"latex"};Array.prototype.slice.call(n.querySelectorAll("pre[data-src]")).forEach(function(n){if(!n.hasAttribute("data-src-loaded")){for(var i,r=n.getAttribute("data-src"),o=n,a=/\blang(?:uage)?-([\w-]+)\b/i;o&&!a.test(o.className);)o=o.parentNode;if(o&&(i=(n.className.match(a)||[,""])[1]),!i){var s=(r.match(/\.(\w+)$/)||[,""])[1];i=e[s]||s}var l=document.createElement("code");l.className="language-"+i,n.textContent="",l.textContent="Loading\u2026",n.appendChild(l);var u=new XMLHttpRequest;u.open("GET",r,!0),u.onreadystatechange=function(){4==u.readyState&&(u.status<400&&u.responseText?(l.textContent=u.responseText,t.highlightElement(l),n.setAttribute("data-src-loaded","")):u.status>=400?l.textContent="\u2716 Error "+u.status+" while fetching file: "+u.statusText:l.textContent="\u2716 Error: File does not exist or is empty")},u.send(null)}})},document.addEventListener("DOMContentLoaded",function(){self.Prism.fileHighlight()}))});Prism.languages.python={comment:{pattern:/(^|[^\\])#.*/,lookbehind:!0},"string-interpolation":{pattern:/(?:f|rf|fr)(?:("""|''')[\s\S]+?\1|("|')(?:\\.|(?!\2)[^\\\r\n])*\2)/i,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^{])(?:{{)*){(?!{)(?:[^{}]|{(?!{)(?:[^{}]|{(?!{)(?:[^{}])+})+})+}/,lookbehind:!0,inside:{"format-spec":{pattern:/(:)[^:(){}]+(?=}$)/,lookbehind:!0},"conversion-option":{pattern:/![sra](?=[:}]$)/,alias:"punctuation"},rest:null}},string:/[\s\S]+/}},"triple-quoted-string":{pattern:/(?:[rub]|rb|br)?("""|''')[\s\S]+?\1/i,greedy:!0,alias:"string"},string:{pattern:/(?:[rub]|rb|br)?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/i,greedy:!0},"function":{pattern:/((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/g,lookbehind:!0},"class-name":{pattern:/(\bclass\s+)\w+/i,lookbehind:!0},decorator:{pattern:/(^\s*)@\w+(?:\.\w+)*/im,lookbehind:!0,alias:["annotation","punctuation"],inside:{punctuation:/\./}},keyword:/\b(?:and|as|assert|async|await|break|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|print|raise|return|try|while|with|yield)\b/,builtin:/\b(?:__import__|abs|all|any|apply|ascii|basestring|bin|bool|buffer|bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|intern|isinstance|issubclass|iter|len|list|locals|long|map|max|memoryview|min|next|object|oct|open|ord|pow|property|range|raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)\b/,boolean:/\b(?:True|False|None)\b/,number:/(?:\b(?=\d)|\B(?=\.))(?:0[bo])?(?:(?:\d|0x[\da-f])[\da-f]*\.?\d*|\.\d+)(?:e[+-]?\d+)?j?\b/i,operator:/[-+%=]=?|!=|\*\*?=?|\/\/?=?|<[<=>]?|>[=>]?|[&|^~]/,punctuation:/[{}[\];(),.:]/},Prism.languages.python["string-interpolation"].inside.interpolation.inside.rest=Prism.languages.python,Prism.languages.py=Prism.languages.python,Prism.languages.clike={comment:[{pattern:/(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,lookbehind:!0},{pattern:/(^|[^\\:])\/\/.*/,lookbehind:!0,greedy:!0}],string:{pattern:/(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,greedy:!0},"class-name":{pattern:/(\b(?:class|interface|extends|implements|trait|instanceof|new)\s+|\bcatch\s+\()[\w.\\]+/i,lookbehind:!0,inside:{punctuation:/[.\\]/}},keyword:/\b(?:if|else|while|do|for|return|in|instanceof|function|new|try|throw|catch|finally|null|break|continue)\b/,boolean:/\b(?:true|false)\b/,"function":/\w+(?=\()/,number:/\b0x[\da-f]+\b|(?:\b\d+\.?\d*|\B\.\d+)(?:e[+-]?\d+)?/i,operator:/[<>]=?|[!=]=?=?|--?|\+\+?|&&?|\|\|?|[?*/~^%]/,punctuation:/[{}[\];(),.:]/},Prism.languages.lua={comment:/^#!.+|--(?:\[(=*)\[[\s\S]*?\]\1\]|.*)/m,string:{pattern:/(["'])(?:(?!\1)[^\\\r\n]|\\z(?:\r\n|\s)|\\(?:\r\n|[\s\S]))*\1|\[(=*)\[[\s\S]*?\]\2\]/,greedy:!0},number:/\b0x[a-f\d]+\.?[a-f\d]*(?:p[+-]?\d+)?\b|\b\d+(?:\.\B|\.?\d*(?:e[+-]?\d+)?\b)|\B\.\d+(?:e[+-]?\d+)?\b/i,keyword:/\b(?:and|break|do|else|elseif|end|false|for|function|goto|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b/,"function":/(?!\d)\w+(?=\s*(?:[({]))/,operator:[/[-+*%^&|#]|\/\/?|<[<=]?|>[>=]?|[=~]=?/,{pattern:/(^|[^.])\.\.(?!\.)/,lookbehind:!0}],punctuation:/[\[\](){},;]|\.+|:+/},function(n){var t="\\b(?:BASH|BASHOPTS|BASH_ALIASES|BASH_ARGC|BASH_ARGV|BASH_CMDS|BASH_COMPLETION_COMPAT_DIR|BASH_LINENO|BASH_REMATCH|BASH_SOURCE|BASH_VERSINFO|BASH_VERSION|COLORTERM|COLUMNS|COMP_WORDBREAKS|DBUS_SESSION_BUS_ADDRESS|DEFAULTS_PATH|DESKTOP_SESSION|DIRSTACK|DISPLAY|EUID|GDMSESSION|GDM_LANG|GNOME_KEYRING_CONTROL|GNOME_KEYRING_PID|GPG_AGENT_INFO|GROUPS|HISTCONTROL|HISTFILE|HISTFILESIZE|HISTSIZE|HOME|HOSTNAME|HOSTTYPE|IFS|INSTANCE|JOB|LANG|LANGUAGE|LC_ADDRESS|LC_ALL|LC_IDENTIFICATION|LC_MEASUREMENT|LC_MONETARY|LC_NAME|LC_NUMERIC|LC_PAPER|LC_TELEPHONE|LC_TIME|LESSCLOSE|LESSOPEN|LINES|LOGNAME|LS_COLORS|MACHTYPE|MAILCHECK|MANDATORY_PATH|NO_AT_BRIDGE|OLDPWD|OPTERR|OPTIND|ORBIT_SOCKETDIR|OSTYPE|PAPERSIZE|PATH|PIPESTATUS|PPID|PS1|PS2|PS3|PS4|PWD|RANDOM|REPLY|SECONDS|SELINUX_INIT|SESSION|SESSIONTYPE|SESSION_MANAGER|SHELL|SHELLOPTS|SHLVL|SSH_AUTH_SOCK|TERM|UID|UPSTART_EVENTS|UPSTART_INSTANCE|UPSTART_JOB|UPSTART_SESSION|USER|WINDOWID|XAUTHORITY|XDG_CONFIG_DIRS|XDG_CURRENT_DESKTOP|XDG_DATA_DIRS|XDG_GREETER_DATA_DIR|XDG_MENU_PREFIX|XDG_RUNTIME_DIR|XDG_SEAT|XDG_SEAT_PATH|XDG_SESSION_DESKTOP|XDG_SESSION_ID|XDG_SESSION_PATH|XDG_SESSION_TYPE|XDG_VTNR|XMODIFIERS)\\b",e={environment:{pattern:RegExp("\\$"+t),alias:"constant"},variable:[{pattern:/\$?\(\([\s\S]+?\)\)/,greedy:!0,inside:{variable:[{pattern:/(^\$\(\([\s\S]+)\)\)/,lookbehind:!0},/^\$\(\(/],number:/\b0x[\dA-Fa-f]+\b|(?:\b\d+\.?\d*|\B\.\d+)(?:[Ee]-?\d+)?/,operator:/--?|-=|\+\+?|\+=|!=?|~|\*\*?|\*=|\/=?|%=?|<<=?|>>=?|<=?|>=?|==?|&&?|&=|\^=?|\|\|?|\|=|\?|:/,punctuation:/\(\(?|\)\)?|,|;/}},{pattern:/\$\((?:\([^)]+\)|[^()])+\)|`[^`]+`/,greedy:!0,inside:{variable:/^\$\(|^`|\)$|`$/}},{pattern:/\$\{[^}]+\}/,greedy:!0,inside:{operator:/:[-=?+]?|[!\/]|##?|%%?|\^\^?|,,?/,punctuation:/[\[\]]/,environment:{pattern:RegExp("(\\{)"+t),lookbehind:!0,alias:"constant"}}},/\$(?:\w+|[#?*!@$])/],entity:/\\(?:[abceEfnrtv\\"]|O?[0-7]{1,3}|x[0-9a-fA-F]{1,2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})/};n.languages.bash={shebang:{pattern:/^#!\s*\/.*/,alias:"important"},comment:{pattern:/(^|[^"{\\$])#.*/,lookbehind:!0},"function-name":[{pattern:/(\bfunction\s+)\w+(?=(?:\s*\(?:\s*\))?\s*\{)/,lookbehind:!0,alias:"function"},{pattern:/\b\w+(?=\s*\(\s*\)\s*\{)/,alias:"function"}],"for-or-select":{pattern:/(\b(?:for|select)\s+)\w+(?=\s+in\s)/,alias:"variable",lookbehind:!0},"assign-left":{pattern:/(^|[\s;|&]|[<>]\()\w+(?=\+?=)/,inside:{environment:{pattern:RegExp("(^|[\\s;|&]|[<>]\\()"+t),lookbehind:!0,alias:"constant"}},alias:"variable",lookbehind:!0},string:[{pattern:/((?:^|[^<])<<-?\s*)(\w+?)\s*(?:\r?\n|\r)[\s\S]*?(?:\r?\n|\r)\2/,lookbehind:!0,greedy:!0,inside:e},{pattern:/((?:^|[^<])<<-?\s*)(["'])(\w+)\2\s*(?:\r?\n|\r)[\s\S]*?(?:\r?\n|\r)\3/,lookbehind:!0,greedy:!0},{pattern:/(^|[^\\](?:\\\\)*)(["'])(?:\\[\s\S]|\$\([^)]+\)|`[^`]+`|(?!\2)[^\\])*\2/,lookbehind:!0,greedy:!0,inside:e}],environment:{pattern:RegExp("\\$?"+t),alias:"constant"},variable:e.variable,"function":{pattern:/(^|[\s;|&]|[<>]\()(?:add|apropos|apt|aptitude|apt-cache|apt-get|aspell|automysqlbackup|awk|basename|bash|bc|bconsole|bg|bzip2|cal|cat|cfdisk|chgrp|chkconfig|chmod|chown|chroot|cksum|clear|cmp|column|comm|cp|cron|crontab|csplit|curl|cut|date|dc|dd|ddrescue|debootstrap|df|diff|diff3|dig|dir|dircolors|dirname|dirs|dmesg|du|egrep|eject|env|ethtool|expand|expect|expr|fdformat|fdisk|fg|fgrep|file|find|fmt|fold|format|free|fsck|ftp|fuser|gawk|git|gparted|grep|groupadd|groupdel|groupmod|groups|grub-mkconfig|gzip|halt|head|hg|history|host|hostname|htop|iconv|id|ifconfig|ifdown|ifup|import|install|ip|jobs|join|kill|killall|less|link|ln|locate|logname|logrotate|look|lpc|lpr|lprint|lprintd|lprintq|lprm|ls|lsof|lynx|make|man|mc|mdadm|mkconfig|mkdir|mke2fs|mkfifo|mkfs|mkisofs|mknod|mkswap|mmv|more|most|mount|mtools|mtr|mutt|mv|nano|nc|netstat|nice|nl|nohup|notify-send|npm|nslookup|op|open|parted|passwd|paste|pathchk|ping|pkill|pnpm|popd|pr|printcap|printenv|ps|pushd|pv|quota|quotacheck|quotactl|ram|rar|rcp|reboot|remsync|rename|renice|rev|rm|rmdir|rpm|rsync|scp|screen|sdiff|sed|sendmail|seq|service|sftp|sh|shellcheck|shuf|shutdown|sleep|slocate|sort|split|ssh|stat|strace|su|sudo|sum|suspend|swapon|sync|tac|tail|tar|tee|time|timeout|top|touch|tr|traceroute|tsort|tty|umount|uname|unexpand|uniq|units|unrar|unshar|unzip|update-grub|uptime|useradd|userdel|usermod|users|uudecode|uuencode|v|vdir|vi|vim|virsh|vmstat|wait|watch|wc|wget|whereis|which|who|whoami|write|xargs|xdg-open|yarn|yes|zenity|zip|zsh|zypper)(?=$|[)\s;|&])/,lookbehind:!0},keyword:{pattern:/(^|[\s;|&]|[<>]\()(?:if|then|else|elif|fi|for|while|in|case|esac|function|select|do|done|until)(?=$|[)\s;|&])/,lookbehind:!0},builtin:{pattern:/(^|[\s;|&]|[<>]\()(?:\.|:|break|cd|continue|eval|exec|exit|export|getopts|hash|pwd|readonly|return|shift|test|times|trap|umask|unset|alias|bind|builtin|caller|command|declare|echo|enable|help|let|local|logout|mapfile|printf|read|readarray|source|type|typeset|ulimit|unalias|set|shopt)(?=$|[)\s;|&])/,lookbehind:!0,alias:"class-name"},boolean:{pattern:/(^|[\s;|&]|[<>]\()(?:true|false)(?=$|[)\s;|&])/,lookbehind:!0},"file-descriptor":{pattern:/\B&\d\b/,alias:"important"},operator:{pattern:/\d?<>|>\||\+=|==?|!=?|=~|<<[<-]?|[&\d]?>>|\d?[<>]&?|&[>&]?|\|[&|]?|<=?|>=?/,inside:{"file-descriptor":{pattern:/^\d/,alias:"important"}}},punctuation:/\$?\(\(?|\)\)?|\.\.|[{}[\];\\]/,number:{pattern:/(^|\s)(?:[1-9]\d*|0)(?:[.,]\d+)?\b/,lookbehind:!0}};for(var i=["comment","function-name","for-or-select","assign-left","string","environment","function","keyword","builtin","boolean","file-descriptor","operator","punctuation","number"],r=e.variable[1].inside,o=0;o<i.length;o++)r[i[o]]=n.languages.bash[i[o]];n.languages.shell=n.languages.bash}(Prism),Prism.languages.go=Prism.languages.extend("clike",{keyword:/\b(?:break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go(?:to)?|if|import|interface|map|package|range|return|select|struct|switch|type|var)\b/,builtin:/\b(?:bool|byte|complex(?:64|128)|error|float(?:32|64)|rune|string|u?int(?:8|16|32|64)?|uintptr|append|cap|close|complex|copy|delete|imag|len|make|new|panic|print(?:ln)?|real|recover)\b/,boolean:/\b(?:_|iota|nil|true|false)\b/,operator:/[*\/%^!=]=?|\+[=+]?|-[=-]?|\|[=|]?|&(?:=|&|\^=?)?|>(?:>=?|=)?|<(?:<=?|=|-)?|:=|\.\.\./,number:/(?:\b0x[a-f\d]+|(?:\b\d+\.?\d*|\B\.\d+)(?:e[-+]?\d+)?)i?/i,string:{pattern:/(["'`])(?:\\[\s\S]|(?!\1)[^\\])*\1/,greedy:!0}}),delete Prism.languages.go["class-name"],function(n){function t(n,t){return n=n.replace(/<inner>/g,function(){return e}),t&&(n=n+"|"+n.replace(/_/g,"\\*")),RegExp(/((?:^|[^\\])(?:\\{2})*)/.source+"(?:"+n+")")}var e=/(?:\\.|[^\\\n\r]|(?:\n|\r\n?)(?!\n|\r\n?))/.source,i=/(?:\\.|``.+?``|`[^`\r\n]+`|[^\\|\r\n`])+/.source,r=/\|?__(?:\|__)+\|?(?:(?:\n|\r\n?)|$)/.source.replace(/__/g,function(){return i}),o=/\|?[ \t]*:?-{3,}:?[ \t]*(?:\|[ \t]*:?-{3,}:?[ \t]*)+\|?(?:\n|\r\n?)/.source;n.languages.markdown=n.languages.extend("markup",{}),n.languages.insertBefore("markdown","prolog",{blockquote:{pattern:/^>(?:[\t ]*>)*/m,alias:"punctuation"},table:{pattern:RegExp("^"+r+o+"(?:"+r+")*","m"),inside:{"table-data-rows":{pattern:RegExp("^("+r+o+")(?:"+r+")*$"),lookbehind:!0,inside:{"table-data":{pattern:RegExp(i),inside:n.languages.markdown},punctuation:/\|/}},"table-line":{pattern:RegExp("^("+r+")"+o+"$"),lookbehind:!0,inside:{punctuation:/\||:?-{3,}:?/}},"table-header-row":{pattern:RegExp("^"+r+"$"),inside:{"table-header":{pattern:RegExp(i),alias:"important",inside:n.languages.markdown},punctuation:/\|/}}}},code:[{pattern:/((?:^|\n)[ \t]*\n|(?:^|\r\n?)[ \t]*\r\n?)(?: {4}|\t).+(?:(?:\n|\r\n?)(?: {4}|\t).+)*/,lookbehind:!0,alias:"keyword"},{pattern:/``.+?``|`[^`\r\n]+`/,alias:"keyword"},{pattern:/^```[\s\S]*?^```$/m,greedy:!0,inside:{"code-block":{pattern:/^(```.*(?:\n|\r\n?))[\s\S]+?(?=(?:\n|\r\n?)^```$)/m,lookbehind:!0},"code-language":{pattern:/^(```).+/,lookbehind:!0},punctuation:/```/}}],title:[{pattern:/\S.*(?:\n|\r\n?)(?:==+|--+)(?=[ \t]*$)/m,alias:"important",inside:{punctuation:/==+$|--+$/}},{pattern:/(^\s*)#+.+/m,lookbehind:!0,alias:"important",inside:{punctuation:/^#+|#+$/}}],hr:{pattern:/(^\s*)([*-])(?:[\t ]*\2){2,}(?=\s*$)/m,lookbehind:!0,alias:"punctuation"},list:{pattern:/(^\s*)(?:[*+-]|\d+\.)(?=[\t ].)/m,lookbehind:!0,alias:"punctuation"},"url-reference":{pattern:/!?\[[^\]]+\]:[\t ]+(?:\S+|<(?:\\.|[^>\\])+>)(?:[\t ]+(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\)))?/,inside:{variable:{pattern:/^(!?\[)[^\]]+/,lookbehind:!0},string:/(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\))$/,punctuation:/^[\[\]!:]|[<>]/},alias:"url"},bold:{pattern:t(/__(?:(?!_)<inner>|_(?:(?!_)<inner>)+_)+__/.source,!0),lookbehind:!0,greedy:!0,inside:{content:{pattern:/(^..)[\s\S]+(?=..$)/,lookbehind:!0,inside:{}},punctuation:/\*\*|__/}},italic:{pattern:t(/_(?:(?!_)<inner>|__(?:(?!_)<inner>)+__)+_/.source,!0),lookbehind:!0,greedy:!0,inside:{content:{pattern:/(^.)[\s\S]+(?=.$)/,lookbehind:!0,inside:{}},punctuation:/[*_]/}},strike:{pattern:t(/(~~?)(?:(?!~)<inner>)+?\2/.source,!1),lookbehind:!0,greedy:!0,inside:{content:{pattern:/(^~~?)[\s\S]+(?=\1$)/,lookbehind:!0,inside:{}},punctuation:/~~?/}},url:{pattern:t(/!?\[(?:(?!\])<inner>)+\](?:\([^\s)]+(?:[\t ]+"(?:\\.|[^"\\])*")?\)| ?\[(?:(?!\])<inner>)+\])/.source,!1),lookbehind:!0,greedy:!0,inside:{variable:{pattern:/(\[)[^\]]+(?=\]$)/,lookbehind:!0},content:{pattern:/(^!?\[)[^\]]+(?=\])/,lookbehind:!0,inside:{}},string:{pattern:/"(?:\\.|[^"\\])*"(?=\)$)/}}}}),["url","bold","italic","strike"].forEach(function(t){["url","bold","italic","strike"].forEach(function(e){t!==e&&(n.languages.markdown[t].inside.content.inside[e]=n.languages.markdown[e])})}),n.hooks.add("after-tokenize",function(n){function t(n){if(n&&"string"!=typeof n)for(var e=0,i=n.length;e<i;e++){var r=n[e];if("code"===r.type){var o=r.content[1],a=r.content[3];if(o&&a&&"code-language"===o.type&&"code-block"===a.type&&"string"==typeof o.content){var s=o.content.replace(/\b#/g,"sharp").replace(/\b\+\+/g,"pp"),l="language-"+(s=(/[a-z][\w-]*/i.exec(s)||[""])[0].toLowerCase());a.alias?"string"==typeof a.alias?a.alias=[a.alias,l]:a.alias.push(l):a.alias=[l]}}else t(r.content)}}"markdown"!==n.language&&"md"!==n.language||t(n.tokens)}),n.hooks.add("wrap",function(t){if("code-block"===t.type){for(var e="",i=0,r=t.classes.length;i<r;i++){var o=t.classes[i],a=/language-(.+)/.exec(o);if(a){e=a[1];break}}var s=n.languages[e];if(s){var l=t.content.replace(/&lt;/g,"<").replace(/&amp;/g,"&");t.content=n.highlight(l,s,e)}else if(e&&"none"!==e&&n.plugins.autoloader){var u="md-"+(new Date).valueOf()+"-"+Math.floor(1e16*Math.random());t.attributes.id=u,n.plugins.autoloader.loadLanguages(e,function(){var t=document.getElementById(u);t&&(t.innerHTML=n.highlight(t.textContent,n.languages[e],e))})}}}),n.languages.md=n.languages.markdown}(Prism),Prism.languages.julia={comment:{pattern:/(^|[^\\])#.*/,lookbehind:!0},string:/("""|''')[\s\S]+?\1|("|')(?:\\.|(?!\2)[^\\\r\n])*\2/,keyword:/\b(?:abstract|baremodule|begin|bitstype|break|catch|ccall|const|continue|do|else|elseif|end|export|finally|for|function|global|if|immutable|import|importall|in|let|local|macro|module|print|println|quote|return|struct|try|type|typealias|using|while)\b/,boolean:/\b(?:true|false)\b/,number:/(?:\b(?=\d)|\B(?=\.))(?:0[box])?(?:[\da-f]+\.?\d*|\.\d+)(?:[efp][+-]?\d+)?j?/i,operator:/[-+*^%\xf7&$\\]=?|\/[\/=]?|!=?=?|\|[=>]?|<(?:<=?|[=:])?|>(?:=|>>?=?)?|==?=?|[~\u2260\u2264\u2265]/,punctuation:/[{}[\];(),.:]/,constant:/\b(?:(?:NaN|Inf)(?:16|32|64)?)\b/};
+// Copyright 2018 The Distill Template Authors
+const fo=Or("d-code",`\n<style>\n\ncode {\n  white-space: nowrap;\n  background: rgba(0, 0, 0, 0.04);\n  border-radius: 2px;\n  padding: 4px 7px;\n  font-size: 15px;\n  color: rgba(0, 0, 0, 0.6);\n}\n\npre code {\n  display: block;\n  border-left: 2px solid rgba(0, 0, 0, .1);\n  padding: 0 0 0 36px;\n}\n\n${'/**\n * prism.js default theme for JavaScript, CSS and HTML\n * Based on dabblet (http://dabblet.com)\n * @author Lea Verou\n */\n\ncode[class*="language-"],\npre[class*="language-"] {\n\tcolor: black;\n\tbackground: none;\n\ttext-shadow: 0 1px white;\n\tfont-family: Consolas, Monaco, \'Andale Mono\', \'Ubuntu Mono\', monospace;\n\tfont-size: 1em;\n\ttext-align: left;\n\twhite-space: pre;\n\tword-spacing: normal;\n\tword-break: normal;\n\tword-wrap: normal;\n\tline-height: 1.5;\n\n\t-moz-tab-size: 4;\n\t-o-tab-size: 4;\n\ttab-size: 4;\n\n\t-webkit-hyphens: none;\n\t-moz-hyphens: none;\n\t-ms-hyphens: none;\n\thyphens: none;\n}\n\npre[class*="language-"]::-moz-selection, pre[class*="language-"] ::-moz-selection,\ncode[class*="language-"]::-moz-selection, code[class*="language-"] ::-moz-selection {\n\ttext-shadow: none;\n\tbackground: #b3d4fc;\n}\n\npre[class*="language-"]::selection, pre[class*="language-"] ::selection,\ncode[class*="language-"]::selection, code[class*="language-"] ::selection {\n\ttext-shadow: none;\n\tbackground: #b3d4fc;\n}\n\n@media print {\n\tcode[class*="language-"],\n\tpre[class*="language-"] {\n\t\ttext-shadow: none;\n\t}\n}\n\n/* Code blocks */\npre[class*="language-"] {\n\tpadding: 1em;\n\tmargin: .5em 0;\n\toverflow: auto;\n}\n\n:not(pre) > code[class*="language-"],\npre[class*="language-"] {\n\tbackground: #f5f2f0;\n}\n\n/* Inline code */\n:not(pre) > code[class*="language-"] {\n\tpadding: .1em;\n\tborder-radius: .3em;\n\twhite-space: normal;\n}\n\n.token.comment,\n.token.prolog,\n.token.doctype,\n.token.cdata {\n\tcolor: slategray;\n}\n\n.token.punctuation {\n\tcolor: #999;\n}\n\n.token.namespace {\n\topacity: .7;\n}\n\n.token.property,\n.token.tag,\n.token.boolean,\n.token.number,\n.token.constant,\n.token.symbol,\n.token.deleted {\n\tcolor: #905;\n}\n\n.token.selector,\n.token.attr-name,\n.token.string,\n.token.char,\n.token.builtin,\n.token.inserted {\n\tcolor: #690;\n}\n\n.token.operator,\n.token.entity,\n.token.url,\n.language-css .token.string,\n.style .token.string {\n\tcolor: #9a6e3a;\n\tbackground: hsla(0, 0%, 100%, .5);\n}\n\n.token.atrule,\n.token.attr-value,\n.token.keyword {\n\tcolor: #07a;\n}\n\n.token.function,\n.token.class-name {\n\tcolor: #DD4A68;\n}\n\n.token.regex,\n.token.important,\n.token.variable {\n\tcolor: #e90;\n}\n\n.token.important,\n.token.bold {\n\tfont-weight: bold;\n}\n.token.italic {\n\tfont-style: italic;\n}\n\n.token.entity {\n\tcursor: help;\n}\n'}\n</style>\n\n<code id="code-container"></code>\n\n`);class go extends(Dr(fo(HTMLElement))){renderContent(){if(this.languageName=this.getAttribute("language"),!this.languageName)return void console.warn('You need to provide a language attribute to your <d-code> block to let us know how to highlight your code; e.g.:\n <d-code language="python">zeros = np.zeros(shape)</d-code>.');const n=po.languages[this.languageName];if(n==undefined)return void console.warn(`Distill does not yet support highlighting your code block in "${this.languageName}'.`);let t=this.textContent;const e=this.shadowRoot.querySelector("#code-container");if(this.hasAttribute("block")){const n=(t=t.replace(/\n/,"")).match(/\s*/);if(t=(t=t.replace(new RegExp("\n"+n,"g"),"\n")).trim(),e.parentNode instanceof ShadowRoot){const n=document.createElement("pre");this.shadowRoot.removeChild(e),n.appendChild(e),this.shadowRoot.appendChild(n)}}e.className=`language-${this.languageName}`,e.innerHTML=po.highlight(t,n)}}
+// Copyright 2018 The Distill Template Authors
+const mo=Or("d-footnote",'\n<style>\n\nd-math[block] {\n  display: block;\n}\n\n:host {\n\n}\n\nsup {\n  line-height: 1em;\n  font-size: 0.75em;\n  position: relative;\n  top: -.5em;\n  vertical-align: baseline;\n}\n\nspan {\n  color: hsla(206, 90%, 20%, 0.7);\n  cursor: default;\n}\n\n.footnote-container {\n  padding: 10px;\n}\n\n</style>\n\n<d-hover-box>\n  <div class="footnote-container">\n    <slot id="slot"></slot>\n  </div>\n</d-hover-box>\n\n<sup>\n  <span id="fn-" data-hover-ref=""></span>\n</sup>\n\n');class bo extends(mo(HTMLElement)){constructor(){super();const n={childList:!0,characterData:!0,subtree:!0};new MutationObserver(this.notify).observe(this,n)}notify(){const n=new CustomEvent("onFootnoteChanged",{detail:this,bubbles:!0});document.dispatchEvent(n)}connectedCallback(){this.hoverBox=this.root.querySelector("d-hover-box"),window.customElements.whenDefined("d-hover-box").then(()=>{this.hoverBox.listen(this)}),bo.currentFootnoteId+=1;const n=bo.currentFootnoteId.toString();this.root.host.id="d-footnote-"+n;const t="dt-fn-hover-box-"+n;this.hoverBox.id=t;const e=this.root.querySelector("#fn-");e.setAttribute("id","fn-"+n),e.setAttribute("data-hover-ref",t),e.textContent=n}}bo.currentFootnoteId=0;
+// Copyright 2018 The Distill Template Authors
+const yo=Or("d-footnote-list","\n<style>\n\nd-footnote-list {\n  contain: layout style;\n}\n\nd-footnote-list > * {\n  grid-column: text;\n}\n\nd-footnote-list a.footnote-backlink {\n  color: rgba(0,0,0,0.3);\n  padding-left: 0.5em;\n}\n\n</style>\n\n<h3>Footnotes</h3>\n<ol></ol>\n",!1);class vo extends(yo(HTMLElement)){connectedCallback(){super.connectedCallback(),this.list=this.root.querySelector("ol"),this.root.style.display="none"}set footnotes(n){if(this.list.innerHTML="",n.length){this.root.style.display="";for(const t of n){const n=document.createElement("li");n.id=t.id+"-listing",n.innerHTML=t.innerHTML;const e=document.createElement("a");e.setAttribute("class","footnote-backlink"),e.textContent="[\u21a9]",e.href="#"+t.id,n.appendChild(e),this.list.appendChild(n)}}else this.root.style.display="none"}}
+// Copyright 2018 The Distill Template Authors
+const wo=Or("d-hover-box",'\n<style>\n\n:host {\n  position: absolute;\n  width: 100%;\n  left: 0px;\n  z-index: 10000;\n  display: none;\n  white-space: normal\n}\n\n.container {\n  position: relative;\n  width: 704px;\n  max-width: 100vw;\n  margin: 0 auto;\n}\n\n.panel {\n  position: absolute;\n  font-size: 1rem;\n  line-height: 1.5em;\n  top: 0;\n  left: 0;\n  width: 100%;\n  border: 1px solid rgba(0, 0, 0, 0.1);\n  background-color: rgba(250, 250, 250, 0.95);\n  box-shadow: 0 0 7px rgba(0, 0, 0, 0.1);\n  border-radius: 4px;\n  box-sizing: border-box;\n\n  backdrop-filter: blur(2px);\n  -webkit-backdrop-filter: blur(2px);\n}\n\n</style>\n\n<div class="container">\n  <div class="panel">\n    <slot></slot>\n  </div>\n</div>\n');class xo extends(wo(HTMLElement)){constructor(){super()}connectedCallback(){}listen(n){this.bindDivEvents(this),this.bindTriggerEvents(n)}bindDivEvents(n){n.addEventListener("mouseover",()=>{this.visible||this.showAtNode(n),this.stopTimeout()}),n.addEventListener("mouseout",()=>{this.extendTimeout(500)}),n.addEventListener("touchstart",n=>{n.stopPropagation()},{passive:!0}),document.body.addEventListener("touchstart",()=>{this.hide()},{passive:!0})}bindTriggerEvents(n){n.addEventListener("mouseover",()=>{this.visible||this.showAtNode(n),this.stopTimeout()}),n.addEventListener("mouseout",()=>{this.extendTimeout(300)}),n.addEventListener("touchstart",t=>{this.visible?this.hide():this.showAtNode(n),t.stopPropagation()},{passive:!0})}show(n){this.visible=!0,this.style.display="block",this.style.top=Math.round(n[1]+10)+"px"}showAtNode(n){const t=n.getBoundingClientRect();this.show([n.offsetLeft+t.width,n.offsetTop+t.height])}hide(){this.visible=!1,this.style.display="none",this.stopTimeout()}stopTimeout(){this.timeout&&clearTimeout(this.timeout)}extendTimeout(n){this.stopTimeout(),this.timeout=setTimeout(()=>{this.hide()},n)}}
+// Copyright 2018 The Distill Template Authors
+class ko extends HTMLElement{static get is(){return"d-title"}}
+// Copyright 2018 The Distill Template Authors
+const So=Or("d-references","\n<style>\nd-references {\n  display: block;\n}\n</style>\n",!1);class Mo extends(So(HTMLElement)){}
+// Copyright 2018 The Distill Template Authors
+class To extends HTMLElement{static get is(){return"d-toc"}connectedCallback(){this.getAttribute("prerendered")||(window.onload=(()=>{k(this,document.querySelector("d-article").querySelectorAll("h2, h3"))}))}}class _o extends HTMLElement{static get is(){return"d-figure"}static get readyQueue(){return _o._readyQueue||(_o._readyQueue=[]),_o._readyQueue}static addToReadyQueue(n){-1===_o.readyQueue.indexOf(n)&&(_o.readyQueue.push(n),_o.runReadyQueue())}static runReadyQueue(){const n=_o.readyQueue.sort((n,t)=>n._seenOnScreen-t._seenOnScreen).filter(n=>!n._ready).pop();n&&(n.ready(),requestAnimationFrame(_o.runReadyQueue))}constructor(){super(),this._ready=!1,this._onscreen=!1,this._offscreen=!0}connectedCallback(){this.loadsWhileScrolling=this.hasAttribute("loadsWhileScrolling"),_o.marginObserver.observe(this),_o.directObserver.observe(this)}disconnectedCallback(){_o.marginObserver.unobserve(this),_o.directObserver.unobserve(this)}static get marginObserver(){if(!_o._marginObserver){const n=window.innerHeight,t=Math.floor(2*n),e={rootMargin:t+"px 0px "+t+"px 0px",threshold:.01},i=_o.didObserveMarginIntersection,r=new IntersectionObserver(i,e);_o._marginObserver=r}return _o._marginObserver}static didObserveMarginIntersection(n){for(const t of n){const n=t.target;t.isIntersecting&&!n._ready&&_o.addToReadyQueue(n)}}static get directObserver(){return _o._directObserver||(_o._directObserver=new IntersectionObserver(_o.didObserveDirectIntersection,{rootMargin:"0px",threshold:[0,1]})),_o._directObserver}static didObserveDirectIntersection(n){for(const t of n){const n=t.target;t.isIntersecting?(n._seenOnScreen=new Date,n._offscreen&&n.onscreen()):n._onscreen&&n.offscreen()}}addEventListener(n,t){super.addEventListener(n,t),"ready"===n&&-1!==_o.readyQueue.indexOf(this)&&(this._ready=!1,_o.runReadyQueue()),"onscreen"===n&&this.onscreen()}ready(){this._ready=!0,_o.marginObserver.unobserve(this);const n=new CustomEvent("ready");this.dispatchEvent(n)}onscreen(){this._onscreen=!0,this._offscreen=!1;const n=new CustomEvent("onscreen");this.dispatchEvent(n)}offscreen(){this._onscreen=!1,this._offscreen=!0;const n=new CustomEvent("offscreen");this.dispatchEvent(n)}}if("undefined"!=typeof window){let n;_o.isScrolling=!1;const t=()=>{_o.isScrolling=!0,clearTimeout(n),n=setTimeout(()=>{_o.isScrolling=!1,_o.runReadyQueue()},500)};window.addEventListener("scroll",t,!0)}
+// Copyright 2018 The Distill Template Authors
+const Co="distill.pub",Ao=Or("d-interstitial",'\n<style>\n\n.overlay {\n  position: fixed;\n  width: 100%;\n  height: 100%;\n  top: 0;\n  left: 0;\n  background: white;\n\n  opacity: 1;\n  visibility: visible;\n\n  display: flex;\n  flex-flow: column;\n  justify-content: center;\n  z-index: 2147483647 /* MaxInt32 */\n\n}\n\n.container {\n  position: relative;\n  margin-left: auto;\n  margin-right: auto;\n  max-width: 420px;\n  padding: 2em;\n}\n\nh1 {\n  text-decoration: underline;\n  text-decoration-color: hsl(0,100%,40%);\n  -webkit-text-decoration-color: hsl(0,100%,40%);\n  margin-bottom: 1em;\n  line-height: 1.5em;\n}\n\ninput[type="password"] {\n  -webkit-appearance: none;\n  -moz-appearance: none;\n  appearance: none;\n  -webkit-box-shadow: none;\n  -moz-box-shadow: none;\n  box-shadow: none;\n  -webkit-border-radius: none;\n  -moz-border-radius: none;\n  -ms-border-radius: none;\n  -o-border-radius: none;\n  border-radius: none;\n  outline: none;\n\n  font-size: 18px;\n  background: none;\n  width: 25%;\n  padding: 10px;\n  border: none;\n  border-bottom: solid 2px #999;\n  transition: border .3s;\n}\n\ninput[type="password"]:focus {\n  border-bottom: solid 2px #333;\n}\n\ninput[type="password"].wrong {\n  border-bottom: solid 2px hsl(0,100%,40%);\n}\n\np small {\n  color: #888;\n}\n\n.logo {\n  position: relative;\n  font-size: 1.5em;\n  margin-bottom: 3em;\n}\n\n.logo svg {\n  width: 36px;\n  position: relative;\n  top: 6px;\n  margin-right: 2px;\n}\n\n.logo svg path {\n  fill: none;\n  stroke: black;\n  stroke-width: 2px;\n}\n\n</style>\n\n<div class="overlay">\n  <div class="container">\n    <h1>This article is in review.</h1>\n    <p>Do not share this URL or the contents of this article. Thank you!</p>\n    <input id="interstitial-password-input" type="password" name="password" autofocus/>\n    <p><small>Enter the password we shared with you as part of the review process to view the article.</small></p>\n  </div>\n</div>\n');class Eo extends(Ao(HTMLElement)){connectedCallback(){if(this.shouldRemoveSelf())this.parentElement.removeChild(this);else{this.root.querySelector("#interstitial-password-input").oninput=(n=>this.passwordChanged(n))}}passwordChanged(n){n.target.value===this.password&&(console.log("Correct password entered."),this.parentElement.removeChild(this),"undefined"!=typeof Storage&&(console.log("Saved that correct password was entered."),localStorage.setItem(this.localStorageIdentifier(),"true")))}shouldRemoveSelf(){return window&&window.location.hostname===Co?(console.warn("Interstitial found on production, hiding it."),!0):"undefined"!=typeof Storage&&"true"===localStorage.getItem(this.localStorageIdentifier())&&(console.log("Loaded that correct password was entered before; skipping interstitial."),!0)}localStorageIdentifier(){const n="interstitial-password-correct";return"distill-drafts"+(window?window.location.pathname:"-")+n}}var No=M(S).right,Lo=Math.sqrt(50),Do=Math.sqrt(10),Oo=Math.sqrt(2),Io=.7,Fo=1/Io,Ro="\\s*([+-]?\\d+)\\s*",Uo="\\s*([+-]?\\d*\\.?\\d+(?:[eE][+-]?\\d+)?)\\s*",$o="\\s*([+-]?\\d*\\.?\\d+(?:[eE][+-]?\\d+)?)%\\s*",Po=/^#([0-9a-f]{3,8})$/,Ho=new RegExp("^rgb\\("+[Ro,Ro,Ro]+"\\)$"),zo=new RegExp("^rgb\\("+[$o,$o,$o]+"\\)$"),qo=new RegExp("^rgba\\("+[Ro,Ro,Ro,Uo]+"\\)$"),jo=new RegExp("^rgba\\("+[$o,$o,$o,Uo]+"\\)$"),Bo=new RegExp("^hsl\\("+[Uo,$o,$o]+"\\)$"),Yo=new RegExp("^hsla\\("+[Uo,$o,$o,Uo]+"\\)$"),Wo={aliceblue:15792383,antiquewhite:16444375,aqua:65535,aquamarine:8388564,azure:15794175,beige:16119260,bisque:16770244,black:0,blanchedalmond:16772045,blue:255,blueviolet:9055202,brown:10824234,burlywood:14596231,cadetblue:6266528,chartreuse:8388352,chocolate:13789470,coral:16744272,cornflowerblue:6591981,cornsilk:16775388,crimson:14423100,cyan:65535,darkblue:139,darkcyan:35723,darkgoldenrod:12092939,darkgray:11119017,darkgreen:25600,darkgrey:11119017,darkkhaki:12433259,darkmagenta:9109643,darkolivegreen:5597999,darkorange:16747520,darkorchid:10040012,darkred:9109504,darksalmon:15308410,darkseagreen:9419919,darkslateblue:4734347,darkslategray:3100495,darkslategrey:3100495,darkturquoise:52945,darkviolet:9699539,deeppink:16716947,deepskyblue:49151,dimgray:6908265,dimgrey:6908265,dodgerblue:2003199,firebrick:11674146,floralwhite:16775920,forestgreen:2263842,fuchsia:16711935,gainsboro:14474460,ghostwhite:16316671,gold:16766720,goldenrod:14329120,gray:8421504,green:32768,greenyellow:11403055,grey:8421504,honeydew:15794160,hotpink:16738740,indianred:13458524,indigo:4915330,ivory:16777200,khaki:15787660,lavender:15132410,lavenderblush:16773365,lawngreen:8190976,lemonchiffon:16775885,lightblue:11393254,lightcoral:15761536,lightcyan:14745599,lightgoldenrodyellow:16448210,lightgray:13882323,lightgreen:9498256,lightgrey:13882323,lightpink:16758465,lightsalmon:16752762,lightseagreen:2142890,lightskyblue:8900346,lightslategray:7833753,lightslategrey:7833753,lightsteelblue:11584734,lightyellow:16777184,lime:65280,limegreen:3329330,linen:16445670,magenta:16711935,maroon:8388608,mediumaquamarine:6737322,mediumblue:205,mediumorchid:12211667,mediumpurple:9662683,mediumseagreen:3978097,mediumslateblue:8087790,mediumspringgreen:64154,mediumturquoise:4772300,mediumvioletred:13047173,midnightblue:1644912,mintcream:16121850,mistyrose:16770273,moccasin:16770229,navajowhite:16768685,navy:128,oldlace:16643558,olive:8421376,olivedrab:7048739,orange:16753920,orangered:16729344,orchid:14315734,palegoldenrod:15657130,palegreen:10025880,paleturquoise:11529966,palevioletred:14381203,papayawhip:16773077,peachpuff:16767673,peru:13468991,pink:16761035,plum:14524637,powderblue:11591910,purple:8388736,rebeccapurple:6697881,red:16711680,rosybrown:12357519,royalblue:4286945,saddlebrown:9127187,salmon:16416882,sandybrown:16032864,seagreen:3050327,seashell:16774638,sienna:10506797,silver:12632256,skyblue:8900331,slateblue:6970061,slategray:7372944,slategrey:7372944,snow:16775930,springgreen:65407,steelblue:4620980,tan:13808780,teal:32896,thistle:14204888,tomato:16737095,turquoise:4251856,violet:15631086,wheat:16113331,white:16777215,whitesmoke:16119285,yellow:16776960,yellowgreen:10145074};L(O,U,{copy:function(n){return Object.assign(new this.constructor,this,n)},displayable:function(){return this.rgb().displayable()},hex:I,formatHex:I,formatHsl:F,formatRgb:R,toString:R}),L(q,z,D(O,{brighter:function(n){return n=null==n?Fo:Math.pow(Fo,n),new q(this.r*n,this.g*n,this.b*n,this.opacity)},darker:function(n){return n=null==n?Io:Math.pow(Io,n),new q(this.r*n,this.g*n,this.b*n,this.opacity)},rgb:function(){return this},displayable:function(){return-.5<=this.r&&this.r<255.5&&-.5<=this.g&&this.g<255.5&&-.5<=this.b&&this.b<255.5&&0<=this.opacity&&this.opacity<=1},hex:j,formatHex:j,formatRgb:B,toString:B})),L(K,V,D(O,{brighter:function(n){return n=null==n?Fo:Math.pow(Fo,n),new K(this.h,this.s,this.l*n,this.opacity)},darker:function(n){return n=null==n?Io:Math.pow(Io,n),new K(this.h,this.s,this.l*n,this.opacity)},rgb:function(){var n=this.h%360+360*(this.h<0),t=isNaN(n)||isNaN(this.s)?0:this.s,e=this.l,i=e+(e<.5?e:1-e)*t,r=2*e-i;return new q(X(n>=240?n-240:n+120,r,i),X(n,r,i),X(n<120?n+240:n-120,r,i),this.opacity)},displayable:function(){return(0<=this.s&&this.s<=1||isNaN(this.s))&&0<=this.l&&this.l<=1&&0<=this.opacity&&this.opacity<=1},formatHsl:function(){var n=this.opacity;return(1===(n=isNaN(n)?1:Math.max(0,Math.min(1,n)))?"hsl(":"hsla(")+(this.h||0)+", "+100*(this.s||0)+"%, "+100*(this.l||0)+"%"+(1===n?")":", "+n+")")}}));var Go=Math.PI/180,Vo=180/Math.PI,Ko=18,Xo=.96422,Zo=1,Qo=.82521,Jo=4/29,na=6/29,ta=3*na*na,ea=na*na*na;L(J,Q,D(O,{brighter:function(n){return new J(this.l+Ko*(null==n?1:n),this.a,this.b,this.opacity)},darker:function(n){return new J(this.l-Ko*(null==n?1:n),this.a,this.b,this.opacity)},rgb:function(){var n=(this.l+16)/116,t=isNaN(this.a)?n:n+this.a/500,e=isNaN(this.b)?n:n-this.b/200;return new q(en(3.1338561*(t=Xo*tn(t))-1.6168667*(n=Zo*tn(n))-.4906146*(e=Qo*tn(e))),en(-.9787684*t+1.9161415*n+.033454*e),en(.0719453*t-.2289914*n+1.4052427*e),this.opacity)}})),L(sn,an,D(O,{brighter:function(n){return new sn(this.h,this.c,this.l+Ko*(null==n?1:n),this.opacity)},darker:function(n){return new sn(this.h,this.c,this.l-Ko*(null==n?1:n),this.opacity)},rgb:function(){return ln(this).rgb()}}));var ia=-.14861,ra=1.78277,oa=-.29227,aa=-.90649,sa=1.97294,la=sa*aa,ua=sa*ra,ca=ra*oa-aa*ia;L(dn,cn,D(O,{brighter:function(n){return n=null==n?Fo:Math.pow(Fo,n),new dn(this.h,this.s,this.l*n,this.opacity)},darker:function(n){return n=null==n?Io:Math.pow(Io,n),new dn(this.h,this.s,this.l*n,this.opacity)},rgb:function(){var n=isNaN(this.h)?0:(this.h+120)*Go,t=+this.l,e=isNaN(this.s)?0:this.s*t*(1-t),i=Math.cos(n),r=Math.sin(n);return new q(255*(t+e*(ia*i+ra*r)),255*(t+e*(oa*i+aa*r)),255*(t+e*(sa*i)),this.opacity)}}));var da,ha=function gs(n){function t(n,t){var i=e((n=z(n)).r,(t=z(t)).r),r=e(n.g,t.g),o=e(n.b,t.b),a=mn(n.opacity,t.opacity);return function(t){return n.r=i(t),n.g=r(t),n.b=o(t),n.opacity=a(t),n+""}}var e=gn(n);return t.gamma=gs,t}(1),pa=/[-+]?(?:\d+\.?\d*|\.?\d+)(?:[eE][-+]?\d+)?/g,fa=new RegExp(pa.source,"g"),ga=[0,1],ma=/^(?:(.)?([<>=^]))?([+\-( ])?([$#])?(0)?(\d+)?(,)?(\.\d+)?(~)?([a-z%])?$/i;qn.prototype=jn.prototype,jn.prototype.toString=function(){return this.fill+this.align+this.sign+this.symbol+(this.zero?"0":"")+(this.width===undefined?"":Math.max(1,0|this.width))+(this.comma?",":"")+(this.precision===undefined?"":"."+Math.max(0,0|this.precision))+(this.trim?"~":"")+this.type};var ba,ya,va,wa={"%":function(n,t){return(100*n).toFixed(t)},b:function(n){return Math.round(n).toString(2)},c:function(n){return n+""},d:function(n){return Math.round(n).toString(10)},e:function(n,t){return n.toExponential(t)},f:function(n,t){return n.toFixed(t)},g:function(n,t){return n.toPrecision(t)},o:function(n){return Math.round(n).toString(8)},p:function(n,t){return Wn(100*n,t)},r:Wn,s:Yn,X:function(n){return Math.round(n).toString(16).toUpperCase()},x:function(n){return Math.round(n).toString(16)}},xa=Array.prototype.map,ka=["y","z","a","f","p","n","\xb5","m","","k","M","G","T","P","E","Z","Y"];Kn({decimal:".",thousands:",",grouping:[3],currency:["$",""],minus:"-"});var Sa=new Date,Ma=new Date,Ta=et(function(){},function(n,t){n.setTime(+n+t)},function(n,t){return t-n});Ta.every=function(n){return n=Math.floor(n),isFinite(n)&&n>0?n>1?et(function(t){t.setTime(Math.floor(t/n)*n)},function(t,e){t.setTime(+t+e*n)},function(t,e){return(e-t)/n}):Ta:null};var _a=1e3,Ca=6e4,Aa=36e5,Ea=864e5,Na=6048e5,La=(et(function(n){n.setTime(n-n.getMilliseconds())},function(n,t){n.setTime(+n+t*_a)},function(n,t){return(t-n)/_a},function(n){return n.getUTCSeconds()}),et(function(n){n.setTime(n-n.getMilliseconds()-n.getSeconds()*_a)},function(n,t){n.setTime(+n+t*Ca)},function(n,t){return(t-n)/Ca},function(n){return n.getMinutes()}),et(function(n){n.setTime(n-n.getMilliseconds()-n.getSeconds()*_a-n.getMinutes()*Ca)},function(n,t){n.setTime(+n+t*Aa)},function(n,t){return(t-n)/Aa},function(n){return n.getHours()}),et(function(n){n.setHours(0,0,0,0)},function(n,t){n.setDate(n.getDate()+t)},function(n,t){return(t-n-(t.getTimezoneOffset()-n.getTimezoneOffset())*Ca)/Ea},function(n){return n.getDate()-1})),Da=it(0),Oa=it(1),Ia=(it(2),it(3),it(4)),Fa=(it(5),it(6),et(function(n){n.setDate(1),n.setHours(0,0,0,0)},function(n,t){n.setMonth(n.getMonth()+t)},function(n,t){return t.getMonth()-n.getMonth()+12*(t.getFullYear()-n.getFullYear())},function(n){return n.getMonth()}),et(function(n){n.setMonth(0,1),n.setHours(0,0,0,0)},function(n,t){n.setFullYear(n.getFullYear()+t)},function(n,t){return t.getFullYear()-n.getFullYear()},function(n){return n.getFullYear()}));Fa.every=function(n){return isFinite(n=Math.floor(n))&&n>0?et(function(t){t.setFullYear(Math.floor(t.getFullYear()/n)*n),t.setMonth(0,1),t.setHours(0,0,0,0)},function(t,e){t.setFullYear(t.getFullYear()+e*n)}):null};et(function(n){n.setUTCSeconds(0,0)},function(n,t){n.setTime(+n+t*Ca)},function(n,t){return(t-n)/Ca},function(n){return n.getUTCMinutes()}),et(function(n){n.setUTCMinutes(0,0,0)},function(n,t){n.setTime(+n+t*Aa)},function(n,t){return(t-n)/Aa},function(n){return n.getUTCHours()});var Ra=et(function(n){n.setUTCHours(0,0,0,0)},function(n,t){n.setUTCDate(n.getUTCDate()+t)},function(n,t){return(t-n)/Ea},function(n){return n.getUTCDate()-1}),Ua=rt(0),$a=rt(1),Pa=(rt(2),rt(3),rt(4)),Ha=(rt(5),rt(6),et(function(n){n.setUTCDate(1),n.setUTCHours(0,0,0,0)},function(n,t){n.setUTCMonth(n.getUTCMonth()+t)},function(n,t){return t.getUTCMonth()-n.getUTCMonth()+12*(t.getUTCFullYear()-n.getUTCFullYear())},function(n){return n.getUTCMonth()}),et(function(n){n.setUTCMonth(0,1),n.setUTCHours(0,0,0,0)},function(n,t){n.setUTCFullYear(n.getUTCFullYear()+t)},function(n,t){return t.getUTCFullYear()-n.getUTCFullYear()},function(n){return n.getUTCFullYear()}));Ha.every=function(n){return isFinite(n=Math.floor(n))&&n>0?et(function(t){t.setUTCFullYear(Math.floor(t.getUTCFullYear()/n)*n),t.setUTCMonth(0,1),t.setUTCHours(0,0,0,0)},function(t,e){t.setUTCFullYear(t.getUTCFullYear()+e*n)}):null};var za,qa,ja,Ba={"-":"",_:" ",0:"0"},Ya=/^\s*\d+/,Wa=/^%/,Ga=/[\\^$*+?|[\]().{}]/g;me({dateTime:"%x, %X",date:"%-m/%-d/%Y",time:"%-I:%M:%S %p",periods:["AM","PM"],days:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],shortDays:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],months:["January","February","March","April","May","June","July","August","September","October","November","December"],shortMonths:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]});var Va="%Y-%m-%dT%H:%M:%S.%LZ",Ka=(Date.prototype.toISOString||qa(Va),+new Date("2000-01-01T00:00:00.000Z")||ja(Va),{value:function(){}});we.prototype=ve.prototype={constructor:we,on:function(n,t){var e,i=this._,r=xe(n+"",i),o=-1,a=r.length;if(!(arguments.length<2)){if(null!=t&&"function"!=typeof t)throw new Error("invalid callback: "+t);for(;++o<a;)if(e=(n=r[o]).type)i[e]=Se(i[e],n.name,t);else if(null==t)for(e in i)i[e]=Se(i[e],n.name,null);return this}for(;++o<a;)if((e=(n=r[o]).type)&&(e=ke(i[e],n.name)))return e},copy:function(){var n={},t=this._;for(var e in t)n[e]=t[e].slice();return new we(n)},call:function(n,t){if((e=arguments.length-2)>0)for(var e,i,r=new Array(e),o=0;o<e;++o)r[o]=arguments[o+2];if(!this._.hasOwnProperty(n))throw new Error("unknown type: "+n);for(o=0,e=(i=this._[n]).length;o<e;++o)i[o].value.apply(t,r)},apply:function(n,t,e){if(!this._.hasOwnProperty(n))throw new Error("unknown type: "+n);for(var i=this._[n],r=0,o=i.length;r<o;++r)i[r].value.apply(t,e)}};var Xa="http://www.w3.org/1999/xhtml",Za={svg:"http://www.w3.org/2000/svg",xhtml:Xa,xlink:"http://www.w3.org/1999/xlink",xml:"http://www.w3.org/XML/1998/namespace",xmlns:"http://www.w3.org/2000/xmlns/"};$e.prototype={constructor:$e,appendChild:function(n){return this._parent.insertBefore(n,this._next)},insertBefore:function(n,t){return this._parent.insertBefore(n,t)},querySelector:function(n){return this._parent.querySelector(n)},querySelectorAll:function(n){return this._parent.querySelectorAll(n)}};var Qa="$";wi.prototype={add:function(n){this._names.indexOf(n)<0&&(this._names.push(n),this._node.setAttribute("class",this._names.join(" ")))},remove:function(n){var t=this._names.indexOf(n);t>=0&&(this._names.splice(t,1),this._node.setAttribute("class",this._names.join(" ")))},contains:function(n){return this._names.indexOf(n)>=0}};var Ja={},ns=null;"undefined"!=typeof document&&("onmouseenter"in document.documentElement||(Ja={mouseenter:"mouseover",mouseleave:"mouseout"}));var ts=[null];or.prototype=ar.prototype={constructor:or,select:Ne,selectAll:Oe,filter:Fe,data:qe,enter:Ue,exit:je,join:Be,merge:Ye,order:We,sort:Ge,call:Ke,nodes:Xe,node:Ze,size:Qe,empty:Je,each:ni,attr:si,style:hi,property:bi,classed:_i,text:Ni,html:Ii,raise:Ri,lower:$i,append:Pi,insert:zi,remove:ji,clone:Wi,datum:Gi,on:Ji,dispatch:rr},br.prototype.on=function(){var n=this._.on.apply(this._,arguments);return n===this._?this:n};const es=Or("d-slider","\n<style>\n  :host {\n    position: relative;\n    display: inline-block;\n  }\n\n  :host(:focus) {\n    outline: none;\n  }\n\n  .background {\n    padding: 9px 0;\n    color: white;\n    position: relative;\n  }\n\n  .track {\n    height: 3px;\n    width: 100%;\n    border-radius: 2px;\n    background-color: hsla(0, 0%, 0%, 0.2);\n  }\n\n  .track-fill {\n    position: absolute;\n    top: 9px;\n    height: 3px;\n    border-radius: 4px;\n    background-color: hsl(24, 100%, 50%);\n  }\n\n  .knob-container {\n    position: absolute;\n    top: 10px;\n  }\n\n  .knob {\n    position: absolute;\n    top: -6px;\n    left: -6px;\n    width: 13px;\n    height: 13px;\n    background-color: hsl(24, 100%, 50%);\n    border-radius: 50%;\n    transition-property: transform;\n    transition-duration: 0.18s;\n    transition-timing-function: ease;\n  }\n  .mousedown .knob {\n    transform: scale(1.5);\n  }\n\n  .knob-highlight {\n    position: absolute;\n    top: -6px;\n    left: -6px;\n    width: 13px;\n    height: 13px;\n    background-color: hsla(0, 0%, 0%, 0.1);\n    border-radius: 50%;\n    transition-property: transform;\n    transition-duration: 0.18s;\n    transition-timing-function: ease;\n  }\n\n  .focus .knob-highlight {\n    transform: scale(2);\n  }\n\n  .ticks {\n    position: absolute;\n    top: 16px;\n    height: 4px;\n    width: 100%;\n    z-index: -1;\n  }\n\n  .ticks .tick {\n    position: absolute;\n    height: 100%;\n    border-left: 1px solid hsla(0, 0%, 0%, 0.2);\n  }\n\n</style>\n\n  <div class='background'>\n    <div class='track'></div>\n    <div class='track-fill'></div>\n    <div class='knob-container'>\n      <div class='knob-highlight'></div>\n      <div class='knob'></div>\n    </div>\n    <div class='ticks'></div>\n  </div>\n"),is={left:37,up:38,right:39,down:40,pageUp:33,pageDown:34,end:35,home:36};class rs extends(es(HTMLElement)){connectedCallback(){this.connected=!0,this.setAttribute("role","slider"),this.hasAttribute("tabindex")||this.setAttribute("tabindex",0),this.mouseEvent=!1,this.knob=this.root.querySelector(".knob-container"),this.background=this.root.querySelector(".background"),this.trackFill=this.root.querySelector(".track-fill"),this.track=this.root.querySelector(".track"),this.min=this.min?this.min:0,this.max=this.max?this.max:100,this.scale=tt().domain([this.min,this.max]).range([0,1]).clamp(!0),this.origin=this.origin!==undefined?this.origin:this.min,this.step=this.step?this.step:1,this.update(this.value?this.value:0),this.ticks=!!this.ticks&&this.ticks,this.renderTicks(),this.drag=kr().container(this.background).on("start",()=>{this.mouseEvent=!0,this.background.classList.add("mousedown"),this.changeValue=this.value,this.dragUpdate()}).on("drag",()=>{this.dragUpdate()}).on("end",()=>{this.mouseEvent=!1,this.background.classList.remove("mousedown"),this.dragUpdate(),this.changeValue!==this.value&&this.dispatchChange(),this.changeValue=this.value}),this.drag(sr(this.background)),this.addEventListener("focusin",()=>{this.mouseEvent||this.background.classList.add("focus")}),this.addEventListener("focusout",()=>{this.background.classList.remove("focus")}),this.addEventListener("keydown",this.onKeyDown)}static get observedAttributes(){return["min","max","value","step","ticks","origin","tickValues","tickLabels"]}attributeChangedCallback(n,t,e){isNaN(e)||e===undefined||null===e||("min"==n&&(this.min=+e,this.setAttribute("aria-valuemin",this.min)),"max"==n&&(this.max=+e,this.setAttribute("aria-valuemax",this.max)),"value"==n&&this.update(+e),"origin"==n&&(this.origin=+e),"step"==n&&e>0&&(this.step=+e),"ticks"==n&&(this.ticks=""===e||e))}onKeyDown(n){this.changeValue=this.value;let t=!1;switch(n.keyCode){case is.left:case is.down:this.update(this.value-this.step),t=!0;break;case is.right:case is.up:this.update(this.value+this.step),t=!0;break;case is.pageUp:case is.pageDown:this.update(this.value+10*this.step),t=!0;break;case is.home:this.update(this.min),t=!0;break;case is.end:this.update(this.max),t=!0}t&&(this.background.classList.add("focus"),n.preventDefault(),n.stopPropagation(),this.changeValue!==this.value&&this.dispatchChange())}validateValueRange(n,t,e){return Math.max(Math.min(t,e),n)}quantizeValue(n,t){return Math.round(n/t)*t}dragUpdate(){const n=this.background.getBoundingClientRect(),t=ns.x,e=n.width;this.update(this.scale.invert(t/e))}update(n){let t=n;"any"!==this.step&&(t=this.quantizeValue(n,this.step)),t=this.validateValueRange(this.min,this.max,t),this.connected&&(this.knob.style.left=100*this.scale(t)+"%",this.trackFill.style.width=100*this.scale(this.min+Math.abs(t-this.origin))+"%",this.trackFill.style.left=100*this.scale(Math.min(t,this.origin))+"%"),this.value!==t&&(this.value=t,this.setAttribute("aria-valuenow",this.value),this.dispatchInput())}dispatchChange(){const n=new Event("change");this.dispatchEvent(n,{})}dispatchInput(){const n=new Event("input");this.dispatchEvent(n,{})}renderTicks(){const n=this.root.querySelector(".ticks");if(!1!==this.ticks){let t=[];(t=this.ticks>0?this.scale.ticks(this.ticks):"any"===this.step?this.scale.ticks():_(this.min,this.max+1e-6,this.step)).forEach(t=>{const e=document.createElement("div");e.classList.add("tick"),e.style.left=100*this.scale(t)+"%",n.appendChild(e)})}else n.style.display="none"}}var os='<svg viewBox="-607 419 64 64">\n  <path d="M-573.4,478.9c-8,0-14.6-6.4-14.6-14.5s14.6-25.9,14.6-40.8c0,14.9,14.6,32.8,14.6,40.8S-565.4,478.9-573.4,478.9z"/>\n</svg>\n';const as=Or("distill-header",`\n<style>\ndistill-header {\n  position: relative;\n  height: 60px;\n  background-color: hsl(200, 60%, 15%);\n  width: 100%;\n  box-sizing: border-box;\n  z-index: 2;\n  color: rgba(0, 0, 0, 0.8);\n  border-bottom: 1px solid rgba(0, 0, 0, 0.08);\n  box-shadow: 0 1px 6px rgba(0, 0, 0, 0.05);\n}\ndistill-header .content {\n  height: 70px;\n  grid-column: page;\n}\ndistill-header a {\n  font-size: 16px;\n  height: 60px;\n  line-height: 60px;\n  text-decoration: none;\n  color: rgba(255, 255, 255, 0.8);\n  padding: 22px 0;\n}\ndistill-header a:hover {\n  color: rgba(255, 255, 255, 1);\n}\ndistill-header svg {\n  width: 24px;\n  position: relative;\n  top: 4px;\n  margin-right: 2px;\n}\n@media(min-width: 1080px) {\n  distill-header {\n    height: 70px;\n  }\n  distill-header a {\n    height: 70px;\n    line-height: 70px;\n    padding: 28px 0;\n  }\n  distill-header .logo {\n  }\n}\ndistill-header svg path {\n  fill: none;\n  stroke: rgba(255, 255, 255, 0.8);\n  stroke-width: 3px;\n}\ndistill-header .logo {\n  font-size: 17px;\n  font-weight: 200;\n}\ndistill-header .nav {\n  float: right;\n  font-weight: 300;\n}\ndistill-header .nav a {\n  font-size: 12px;\n  margin-left: 24px;\n  text-transform: uppercase;\n}\n</style>\n<div class="content">\n  <a href="/" class="logo">\n    ${os}\n    Distill\n  </a>\n  <nav class="nav">\n    <a href="/about/">About</a>\n    <a href="/prize/">Prize</a>\n    <a href="/journal/">Submit</a>\n  </nav>\n</div>\n`,!1);
+// Copyright 2018 The Distill Template Authors
+class ss extends(as(HTMLElement)){}
+// Copyright 2018 The Distill Template Authors
+const ls="\n<style>\n  distill-appendix {\n    contain: layout style;\n  }\n\n  distill-appendix .citation {\n    font-size: 11px;\n    line-height: 15px;\n    border-left: 1px solid rgba(0, 0, 0, 0.1);\n    padding-left: 18px;\n    border: 1px solid rgba(0,0,0,0.1);\n    background: rgba(0, 0, 0, 0.02);\n    padding: 10px 18px;\n    border-radius: 3px;\n    color: rgba(150, 150, 150, 1);\n    overflow: hidden;\n    margin-top: -12px;\n    white-space: pre-wrap;\n    word-wrap: break-word;\n  }\n\n  distill-appendix > * {\n    grid-column: text;\n  }\n</style>\n";class us extends HTMLElement{static get is(){return"distill-appendix"}set frontMatter(n){this.innerHTML=Sr(n)}}const cs=Or("distill-footer",`\n<style>\n\n:host {\n  color: rgba(255, 255, 255, 0.5);\n  font-weight: 300;\n  padding: 2rem 0;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  background-color: hsl(180, 5%, 15%); /*hsl(200, 60%, 15%);*/\n  text-align: left;\n  contain: content;\n}\n\n.footer-container .logo svg {\n  width: 24px;\n  position: relative;\n  top: 4px;\n  margin-right: 2px;\n}\n\n.footer-container .logo svg path {\n  fill: none;\n  stroke: rgba(255, 255, 255, 0.8);\n  stroke-width: 3px;\n}\n\n.footer-container .logo {\n  font-size: 17px;\n  font-weight: 200;\n  color: rgba(255, 255, 255, 0.8);\n  text-decoration: none;\n  margin-right: 6px;\n}\n\n.footer-container {\n  grid-column: text;\n}\n\n.footer-container .nav {\n  font-size: 0.9em;\n  margin-top: 1.5em;\n}\n\n.footer-container .nav a {\n  color: rgba(255, 255, 255, 0.8);\n  margin-right: 6px;\n  text-decoration: none;\n}\n\n</style>\n\n<div class='footer-container'>\n\n  <a href="/" class="logo">\n    ${os}\n    Distill\n  </a> is dedicated to clear explanations of machine learning\n\n  <div class="nav">\n    <a href="https://distill.pub/about/">About</a>\n    <a href="https://distill.pub/journal/">Submit</a>\n    <a href="https://distill.pub/prize/">Prize</a>\n    <a href="https://distill.pub/archive/">Archive</a>\n    <a href="https://distill.pub/rss.xml">RSS</a>\n    <a href="https://github.com/distillpub">GitHub</a>\n    <a href="https://twitter.com/distillpub">Twitter</a>\n    &nbsp;&nbsp;&nbsp;&nbsp; ISSN 2476-0757\n  </div>\n\n</div>\n\n`);
+// Copyright 2018 The Distill Template Authors
+class ds extends(cs(HTMLElement)){}
+// Copyright 2018 The Distill Template Authors
+let hs=!1,ps=0;const fs=function(){if(window.distill.runlevel<1)throw new Error("Insufficient Runlevel for Distill Template!");if("distill"in window&&window.distill.templateIsLoading)throw new Error("Runlevel 1: Distill Template is getting loaded more than once, aborting!");window.distill.templateIsLoading=!0,console.debug("Runlevel 1: Distill Template has started loading."),p(document),console.debug("Runlevel 1: Static Distill styles have been added."),console.debug("Runlevel 1->2."),window.distill.runlevel+=1;for(const[n,t]of Object.entries(Vr.listeners))"function"==typeof t?document.addEventListener(n,t):console.error("Runlevel 2: Controller listeners need to be functions!");console.debug("Runlevel 2: We can now listen to controller events."),console.debug("Runlevel 2->3."),window.distill.runlevel+=1;const n=[Jr,to,io,ao,so,uo,ho,go,bo,vo,Wr,xo,ko,Yr,Mo,To,_o,rs,Eo],t=[ss,us,ds];if(window.distill.runlevel<2)throw new Error("Insufficient Runlevel for adding custom elements!");const e=n.concat(t);for(const n of e)console.debug("Runlevel 2: Registering custom element: "+n.is),customElements.define(n.is,n);console.debug("Runlevel 3: Distill Template finished registering custom elements."),console.debug("Runlevel 3->4."),window.distill.runlevel+=1,u()&&Vr.listeners.DOMContentLoaded(),console.debug("Runlevel 4: Distill Template initialisation complete."),window.distill.templateIsLoading=!1,window.distill.templateHasLoaded=!0};window.distill={runlevel:ps,initialize:fs,templateIsLoading:hs},Zr.browserSupportsAllFeatures()?(console.debug("Runlevel 0: No need for polyfills."),console.debug("Runlevel 0->1."),window.distill.runlevel+=1,window.distill.initialize()):(console.debug("Runlevel 0: Distill Template is loading polyfills."),Zr.load(window.distill.initialize))});
\ No newline at end of file
diff --git a/assets/js/distillpub/transforms.v2.js b/assets/js/distillpub/transforms.v2.js
index 5fbba910..41d3b7d3 100644
--- a/assets/js/distillpub/transforms.v2.js
+++ b/assets/js/distillpub/transforms.v2.js
@@ -1,13185 +1,75 @@
-(function (global, factory) {
-  typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('fs')) :
-  typeof define === 'function' && define.amd ? define(['exports', 'fs'], factory) :
-  (global = global || self, factory(global.dl = {}, global.fs));
-}(this, (function (exports, fs) { 'use strict';
-
-  fs = fs && Object.prototype.hasOwnProperty.call(fs, 'default') ? fs['default'] : fs;
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  const days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'];
-  const months = ['Jan.', 'Feb.', 'March', 'April', 'May', 'June', 'July', 'Aug.', 'Sept.', 'Oct.', 'Nov.', 'Dec.'];
-  const zeroPad = n => n < 10 ? '0' + n : n;
-
-  const RFC = function(date) {
-    const day = days[date.getDay()].substring(0, 3);
-    const paddedDate = zeroPad(date.getDate());
-    const month = months[date.getMonth()].substring(0,3);
-    const year = date.getFullYear().toString();
-    const hours = date.getUTCHours().toString();
-    const minutes = date.getUTCMinutes().toString();
-    const seconds = date.getUTCSeconds().toString();
-    return `${day}, ${paddedDate} ${month} ${year} ${hours}:${minutes}:${seconds} Z`;
-  };
-
-  const objectFromMap = function(map) {
-    const object = Array.from(map).reduce((object, [key, value]) => (
-      Object.assign(object, { [key]: value }) // Be careful! Maps can have non-String keys; object literals can't.
-    ), {});
-    return object;
-  };
-
-  const mapFromObject = function(object) {
-    const map = new Map();
-    for (var property in object) {
-      if (object.hasOwnProperty(property)) {
-        map.set(property, object[property]);
-      }
-    }
-    return map;
-  };
-
-  class Author {
-
-    // constructor(name='', personalURL='', affiliation='', affiliationURL='') {
-    //   this.name = name; // 'Chris Olah'
-    //   this.personalURL = personalURL; // 'https://colah.github.io'
-    //   this.affiliation = affiliation; // 'Google Brain'
-    //   this.affiliationURL = affiliationURL; // 'https://g.co/brain'
-    // }
-
-    constructor(object) {
-      this.name = object.author; // 'Chris Olah'
-      this.personalURL = object.authorURL; // 'https://colah.github.io'
-      this.affiliation = object.affiliation; // 'Google Brain'
-      this.affiliationURL = object.affiliationURL; // 'https://g.co/brain'
-      this.affiliations = object.affiliations || []; // new-style affiliations
-    }
-
-    // 'Chris'
-    get firstName() {
-      const names = this.name.split(' ');
-      return names.slice(0, names.length - 1).join(' ');
-    }
-
-    // 'Olah'
-    get lastName() {
-      const names = this.name.split(' ');
-      return names[names.length -1];
-    }
-  }
-
-  function mergeFromYMLFrontmatter(target, source) {
-    target.title = source.title;
-    if (source.published) {
-      if (source.published instanceof Date) {
-        target.publishedDate = source.published;
-      } else if (source.published.constructor === String) {
-        target.publishedDate = new Date(source.published);
-      }
-    }
-    if (source.publishedDate) {
-      if (source.publishedDate instanceof Date) {
-        target.publishedDate = source.publishedDate;
-      } else if (source.publishedDate.constructor === String) {
-        target.publishedDate = new Date(source.publishedDate);
-      } else {
-        console.error('Don\'t know what to do with published date: ' + source.publishedDate);
-      }
-    }
-    target.description = source.description;
-    target.authors = source.authors.map( (authorObject) => new Author(authorObject));
-    target.katex = source.katex;
-    target.password = source.password;
-    if (source.doi) {
-      target.doi = source.doi;
-    }
-  }
-
-  class FrontMatter {
-    constructor() {
-      this.title = 'unnamed article'; // 'Attention and Augmented Recurrent Neural Networks'
-      this.description = ''; // 'A visual overview of neural attention...'
-      this.authors = []; // Array of Author(s)
-
-      this.bibliography = new Map();
-      this.bibliographyParsed = false;
-      //  {
-      //    'gregor2015draw': {
-      //      'title': 'DRAW: A recurrent neural network for image generation',
-      //      'author': 'Gregor, Karol and Danihelka, Ivo and Graves, Alex and Rezende, Danilo Jimenez and Wierstra, Daan',
-      //      'journal': 'arXiv preprint arXiv:1502.04623',
-      //      'year': '2015',
-      //      'url': 'https://arxiv.org/pdf/1502.04623.pdf',
-      //      'type': 'article'
-      //    },
-      //  }
-
-      // Citation keys should be listed in the order that they are appear in the document.
-      // Each key refers to a key in the bibliography dictionary.
-      this.citations = []; // [ 'gregor2015draw', 'mercier2011humans' ]
-      this.citationsCollected = false;
-
-      //
-      // Assigned from posts.csv
-      //
-
-      //  publishedDate: 2016-09-08T07:00:00.000Z,
-      //  tags: [ 'rnn' ],
-      //  distillPath: '2016/augmented-rnns',
-      //  githubPath: 'distillpub/post--augmented-rnns',
-      //  doiSuffix: 1,
-
-      //
-      // Assigned from journal
-      //
-      this.journal = {};
-      //  journal: {
-      //    'title': 'Distill',
-      //    'full_title': 'Distill',
-      //    'abbrev_title': 'Distill',
-      //    'url': 'http://distill.pub',
-      //    'doi': '10.23915/distill',
-      //    'publisherName': 'Distill Working Group',
-      //    'publisherEmail': 'admin@distill.pub',
-      //    'issn': '2476-0757',
-      //    'editors': [...],
-      //    'committee': [...]
-      //  }
-      //  volume: 1,
-      //  issue: 9,
-
-      this.katex = {};
-
-      //
-      // Assigned from publishing process
-      //
-
-      //  githubCompareUpdatesUrl: 'https://github.com/distillpub/post--augmented-rnns/compare/1596e094d8943d2dc0ea445d92071129c6419c59...3bd9209e0c24d020f87cf6152dcecc6017cbc193',
-      //  updatedDate: 2017-03-21T07:13:16.000Z,
-      //  doi: '10.23915/distill.00001',
-      this.doi = undefined;
-      this.publishedDate = undefined;
-    }
-
-    // Example:
-    // title: Demo Title Attention and Augmented Recurrent Neural Networks
-    // published: Jan 10, 2017
-    // authors:
-    // - Chris Olah:
-    // - Shan Carter: http://shancarter.com
-    // affiliations:
-    // - Google Brain:
-    // - Google Brain: http://g.co/brain
-
-    //
-    // Computed Properties
-    //
-
-    // 'http://distill.pub/2016/augmented-rnns',
-    set url(value) {
-      this._url = value;
-    }
-    get url() {
-      if (this._url) {
-        return this._url;
-      } else if (this.distillPath && this.journal.url) {
-        return this.journal.url + '/' + this.distillPath;
-      } else if (this.journal.url) {
-        return this.journal.url;
-      }
-    }
-
-    // 'https://github.com/distillpub/post--augmented-rnns',
-    get githubUrl() {
-      if (this.githubPath) {
-        return 'https://github.com/' + this.githubPath;
-      } else {
-        return undefined;
-      }
-    }
-
-    // TODO resolve differences in naming of URL/Url/url.
-    // 'http://distill.pub/2016/augmented-rnns/thumbnail.jpg',
-    set previewURL(value) {
-      this._previewURL = value;
-    }
-    get previewURL() {
-      return this._previewURL ? this._previewURL : this.url + '/thumbnail.jpg';
-    }
-
-    // 'Thu, 08 Sep 2016 00:00:00 -0700',
-    get publishedDateRFC() {
-      return RFC(this.publishedDate);
-    }
-
-    // 'Thu, 08 Sep 2016 00:00:00 -0700',
-    get updatedDateRFC() {
-      return RFC(this.updatedDate);
-    }
-
-    // 2016,
-    get publishedYear() {
-      return this.publishedDate.getFullYear();
-    }
-
-    // 'Sept',
-    get publishedMonth() {
-      return months[this.publishedDate.getMonth()];
-    }
-
-    // 8,
-    get publishedDay() {
-      return this.publishedDate.getDate();
-    }
-
-    // '09',
-    get publishedMonthPadded() {
-      return zeroPad(this.publishedDate.getMonth() + 1);
-    }
-
-    // '08',
-    get publishedDayPadded() {
-      return zeroPad(this.publishedDate.getDate());
-    }
-
-    get publishedISODateOnly() {
-      return this.publishedDate.toISOString().split('T')[0];
-    }
-
-    get volume() {
-      const volume = this.publishedYear - 2015;
-      if (volume < 1) {
-        throw new Error('Invalid publish date detected during computing volume');
-      }
-      return volume;
-    }
-
-    get issue() {
-      return this.publishedDate.getMonth() + 1;
-    }
-
-    // 'Olah & Carter',
-    get concatenatedAuthors() {
-      if (this.authors.length > 2) {
-        return this.authors[0].lastName + ', et al.';
-      } else if (this.authors.length === 2) {
-        return this.authors[0].lastName + ' & ' + this.authors[1].lastName;
-      } else if (this.authors.length === 1) {
-        return this.authors[0].lastName;
-      }
-    }
-
-    // 'Olah, Chris and Carter, Shan',
-    get bibtexAuthors() {
-      return this.authors.map(author => {
-        return author.lastName + ', ' + author.firstName;
-      }).join(' and ');
-    }
-
-    // 'olah2016attention'
-    get slug() {
-      let slug = '';
-      if (this.authors.length) {
-        slug += this.authors[0].lastName.toLowerCase();
-        slug += this.publishedYear;
-        slug += this.title.split(' ')[0].toLowerCase();
-      }
-      return slug || 'Untitled';
-    }
-
-    get bibliographyEntries() {
-      return new Map(this.citations.map( citationKey => {
-        const entry = this.bibliography.get(citationKey);
-        return [citationKey, entry];
-      }));
-    }
-
-    set bibliography(bibliography) {
-      if (bibliography instanceof Map) {
-        this._bibliography = bibliography;
-      } else if (typeof bibliography === 'object') {
-        this._bibliography = mapFromObject(bibliography);
-      }
-    }
-
-    get bibliography() {
-      return this._bibliography;
-    }
-
-    static fromObject(source) {
-      const frontMatter = new FrontMatter();
-      Object.assign(frontMatter, source);
-      return frontMatter;
-    }
-
-    assignToObject(target) {
-      Object.assign(target, this);
-      target.bibliography = objectFromMap(this.bibliographyEntries);
-      target.url = this.url;
-      target.doi = this.doi;
-      target.githubUrl = this.githubUrl;
-      target.previewURL = this.previewURL;
-      if (this.publishedDate) {
-        target.volume = this.volume;
-        target.issue = this.issue;
-        target.publishedDateRFC = this.publishedDateRFC;
-        target.publishedYear = this.publishedYear;
-        target.publishedMonth = this.publishedMonth;
-        target.publishedDay = this.publishedDay;
-        target.publishedMonthPadded = this.publishedMonthPadded;
-        target.publishedDayPadded = this.publishedDayPadded;
-      }
-      if (this.updatedDate) {
-        target.updatedDateRFC = this.updatedDateRFC;
-      }
-      target.concatenatedAuthors = this.concatenatedAuthors;
-      target.bibtexAuthors = this.bibtexAuthors;
-      target.slug = this.slug;
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  function _moveLegacyAffiliationFormatIntoArray(frontMatter) {
-    // authors used to have propoerties "affiliation" and "affiliationURL".
-    // We now encourage using an array for affiliations containing objects with
-    // properties "name" and "url".
-    for (let author of frontMatter.authors) {
-      const hasOldStyle = Boolean(author.affiliation);
-      const hasNewStyle = Boolean(author.affiliations);
-      if (!hasOldStyle) continue;
-      if (hasNewStyle) {
-        console.warn(`Author ${author.author} has both old-style ("affiliation" & "affiliationURL") and new style ("affiliations") affiliation information!`);
-      } else {
-        let newAffiliation = {
-          "name": author.affiliation
-        };
-        if (author.affiliationURL) newAffiliation.url = author.affiliationURL;
-        author.affiliations = [newAffiliation];
-      }
-    }
-    return frontMatter
-  }
-
-  function parseFrontmatter(element) {
-    const scriptTag = element.firstElementChild;
-    if (scriptTag) {
-      const type = scriptTag.getAttribute('type');
-      if (type.split('/')[1] == 'json') {
-        const content = scriptTag.textContent;
-        const parsed = JSON.parse(content);
-        return _moveLegacyAffiliationFormatIntoArray(parsed);
-      } else {
-        console.error('Distill only supports JSON frontmatter tags anymore; no more YAML.');
-      }
-    } else {
-      console.error('You added a frontmatter tag but did not provide a script tag with front matter data in it. Please take a look at our templates.');
-    }
-    return {};
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function ExtractFrontmatter(dom, data) {
-    const frontMatterTag = dom.querySelector('d-front-matter');
-    if (!frontMatterTag) {
-      console.warn('No front matter tag found!');
-      return;
-    }
-    const extractedData = parseFrontmatter(frontMatterTag);
-    mergeFromYMLFrontmatter(data, extractedData);
-  }
-
-  function commonjsRequire () {
-  	throw new Error('Dynamic requires are not currently supported by rollup-plugin-commonjs');
-  }
-
-  function unwrapExports (x) {
-  	return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x;
-  }
-
-  function createCommonjsModule(fn, module) {
-  	return module = { exports: {} }, fn(module, module.exports), module.exports;
-  }
-
-  var bibtexParse = createCommonjsModule(function (module, exports) {
-  /* start bibtexParse 0.0.22 */
-
-  //Original work by Henrik Muehe (c) 2010
-  //
-  //CommonJS port by Mikola Lysenko 2013
-  //
-  //Port to Browser lib by ORCID / RCPETERS
-  //
-  //Issues:
-  //no comment handling within strings
-  //no string concatenation
-  //no variable values yet
-  //Grammar implemented here:
-  //bibtex -> (string | preamble | comment | entry)*;
-  //string -> '@STRING' '{' key_equals_value '}';
-  //preamble -> '@PREAMBLE' '{' value '}';
-  //comment -> '@COMMENT' '{' value '}';
-  //entry -> '@' key '{' key ',' key_value_list '}';
-  //key_value_list -> key_equals_value (',' key_equals_value)*;
-  //key_equals_value -> key '=' value;
-  //value -> value_quotes | value_braces | key;
-  //value_quotes -> '"' .*? '"'; // not quite
-  //value_braces -> '{' .*? '"'; // not quite
-  (function(exports) {
-
-      function BibtexParser() {
-          
-          this.months = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"];
-          this.notKey = [',','{','}',' ','='];
-          this.pos = 0;
-          this.input = "";
-          this.entries = new Array();
-
-          this.currentEntry = "";
-
-          this.setInput = function(t) {
-              this.input = t;
-          };
-
-          this.getEntries = function() {
-              return this.entries;
-          };
-
-          this.isWhitespace = function(s) {
-              return (s == ' ' || s == '\r' || s == '\t' || s == '\n');
-          };
-
-          this.match = function(s, canCommentOut) {
-              if (canCommentOut == undefined || canCommentOut == null)
-                  canCommentOut = true;
-              this.skipWhitespace(canCommentOut);
-              if (this.input.substring(this.pos, this.pos + s.length) == s) {
-                  this.pos += s.length;
-              } else {
-                  throw "Token mismatch, expected " + s + ", found "
-                          + this.input.substring(this.pos);
-              }            this.skipWhitespace(canCommentOut);
-          };
-
-          this.tryMatch = function(s, canCommentOut) {
-              if (canCommentOut == undefined || canCommentOut == null)
-                  canCommentOut = true;
-              this.skipWhitespace(canCommentOut);
-              if (this.input.substring(this.pos, this.pos + s.length) == s) {
-                  return true;
-              } else {
-                  return false;
-              }        };
-
-          /* when search for a match all text can be ignored, not just white space */
-          this.matchAt = function() {
-              while (this.input.length > this.pos && this.input[this.pos] != '@') {
-                  this.pos++;
-              }
-              if (this.input[this.pos] == '@') {
-                  return true;
-              }            return false;
-          };
-
-          this.skipWhitespace = function(canCommentOut) {
-              while (this.isWhitespace(this.input[this.pos])) {
-                  this.pos++;
-              }            if (this.input[this.pos] == "%" && canCommentOut == true) {
-                  while (this.input[this.pos] != "\n") {
-                      this.pos++;
-                  }                this.skipWhitespace(canCommentOut);
-              }        };
-
-          this.value_braces = function() {
-              var bracecount = 0;
-              this.match("{", false);
-              var start = this.pos;
-              var escaped = false;
-              while (true) {
-                  if (!escaped) {
-                      if (this.input[this.pos] == '}') {
-                          if (bracecount > 0) {
-                              bracecount--;
-                          } else {
-                              var end = this.pos;
-                              this.match("}", false);
-                              return this.input.substring(start, end);
-                          }                    } else if (this.input[this.pos] == '{') {
-                          bracecount++;
-                      } else if (this.pos >= this.input.length - 1) {
-                          throw "Unterminated value";
-                      }                }                if (this.input[this.pos] == '\\' && escaped == false)
-                      escaped = true;
-                  else
-                      escaped = false;
-                  this.pos++;
-              }        };
-
-          this.value_comment = function() {
-              var str = '';
-              var brcktCnt = 0;
-              while (!(this.tryMatch("}", false) && brcktCnt == 0)) {
-                  str = str + this.input[this.pos];
-                  if (this.input[this.pos] == '{')
-                      brcktCnt++;
-                  if (this.input[this.pos] == '}')
-                      brcktCnt--;
-                  if (this.pos >= this.input.length - 1) {
-                      throw "Unterminated value:" + this.input.substring(start);
-                  }                this.pos++;
-              }            return str;
-          };
-
-          this.value_quotes = function() {
-              this.match('"', false);
-              var start = this.pos;
-              var escaped = false;
-              while (true) {
-                  if (!escaped) {
-                      if (this.input[this.pos] == '"') {
-                          var end = this.pos;
-                          this.match('"', false);
-                          return this.input.substring(start, end);
-                      } else if (this.pos >= this.input.length - 1) {
-                          throw "Unterminated value:" + this.input.substring(start);
-                      }                }
-                  if (this.input[this.pos] == '\\' && escaped == false)
-                      escaped = true;
-                  else
-                      escaped = false;
-                  this.pos++;
-              }        };
-
-          this.single_value = function() {
-              var start = this.pos;
-              if (this.tryMatch("{")) {
-                  return this.value_braces();
-              } else if (this.tryMatch('"')) {
-                  return this.value_quotes();
-              } else {
-                  var k = this.key();
-                  if (k.match("^[0-9]+$"))
-                      return k;
-                  else if (this.months.indexOf(k.toLowerCase()) >= 0)
-                      return k.toLowerCase();
-                  else
-                      throw "Value expected:" + this.input.substring(start) + ' for key: ' + k;
-              
-              }        };
-
-          this.value = function() {
-              var values = [];
-              values.push(this.single_value());
-              while (this.tryMatch("#")) {
-                  this.match("#");
-                  values.push(this.single_value());
-              }            return values.join("");
-          };
-
-          this.key = function() {
-              var start = this.pos;
-              while (true) {
-                  if (this.pos >= this.input.length) {
-                      throw "Runaway key";
-                  }                                // а-яА-Я is Cyrillic
-                  //console.log(this.input[this.pos]);
-                  if (this.notKey.indexOf(this.input[this.pos]) >= 0) {
-                      return this.input.substring(start, this.pos);
-                  } else {
-                      this.pos++;
-                      
-                  }            }        };
-
-          this.key_equals_value = function() {
-              var key = this.key();
-              if (this.tryMatch("=")) {
-                  this.match("=");
-                  var val = this.value();
-                  return [ key, val ];
-              } else {
-                  throw "... = value expected, equals sign missing:"
-                          + this.input.substring(this.pos);
-              }        };
-
-          this.key_value_list = function() {
-              var kv = this.key_equals_value();
-              this.currentEntry['entryTags'] = {};
-              this.currentEntry['entryTags'][kv[0]] = kv[1];
-              while (this.tryMatch(",")) {
-                  this.match(",");
-                  // fixes problems with commas at the end of a list
-                  if (this.tryMatch("}")) {
-                      break;
-                  }
-                  kv = this.key_equals_value();
-                  this.currentEntry['entryTags'][kv[0]] = kv[1];
-              }        };
-
-          this.entry_body = function(d) {
-              this.currentEntry = {};
-              this.currentEntry['citationKey'] = this.key();
-              this.currentEntry['entryType'] = d.substring(1);
-              this.match(",");
-              this.key_value_list();
-              this.entries.push(this.currentEntry);
-          };
-
-          this.directive = function() {
-              this.match("@");
-              return "@" + this.key();
-          };
-
-          this.preamble = function() {
-              this.currentEntry = {};
-              this.currentEntry['entryType'] = 'PREAMBLE';
-              this.currentEntry['entry'] = this.value_comment();
-              this.entries.push(this.currentEntry);
-          };
-
-          this.comment = function() {
-              this.currentEntry = {};
-              this.currentEntry['entryType'] = 'COMMENT';
-              this.currentEntry['entry'] = this.value_comment();
-              this.entries.push(this.currentEntry);
-          };
-
-          this.entry = function(d) {
-              this.entry_body(d);
-          };
-
-          this.bibtex = function() {
-              while (this.matchAt()) {
-                  var d = this.directive();
-                  this.match("{");
-                  if (d == "@STRING") {
-                      this.string();
-                  } else if (d == "@PREAMBLE") {
-                      this.preamble();
-                  } else if (d == "@COMMENT") {
-                      this.comment();
-                  } else {
-                      this.entry(d);
-                  }
-                  this.match("}");
-              }        };
-      }    
-      exports.toJSON = function(bibtex) {
-          var b = new BibtexParser();
-          b.setInput(bibtex);
-          b.bibtex();
-          return b.entries;
-      };
-
-      /* added during hackathon don't hate on me */
-      exports.toBibtex = function(json) {
-          var out = '';
-          for ( var i in json) {
-              out += "@" + json[i].entryType;
-              out += '{';
-              if (json[i].citationKey)
-                  out += json[i].citationKey + ', ';
-              if (json[i].entry)
-                  out += json[i].entry ;
-              if (json[i].entryTags) {
-                  var tags = '';
-                  for (var jdx in json[i].entryTags) {
-                      if (tags.length != 0)
-                          tags += ', ';
-                      tags += jdx + '= {' + json[i].entryTags[jdx] + '}';
-                  }
-                  out += tags;
-              }
-              out += '}\n\n';
-          }
-          return out;
-          
-      };
-
-  })( exports);
-
-  /* end bibtexParse */
-  });
-
-  // Copyright 2018 The Distill Template Authors
-
-  function normalizeTag(string) {
-    return string
-      .replace(/[\t\n ]+/g, ' ')
-      .replace(/{\\["^`.'acu~Hvs]( )?([a-zA-Z])}/g, (full, x, char) => char)
-      .replace(/{\\([a-zA-Z])}/g, (full, char) => char);
-  }
-
-  function parseBibtex(bibtex) {
-    const bibliography = new Map();
-    const parsedEntries = bibtexParse.toJSON(bibtex);
-    for (const entry of parsedEntries) {
-      // normalize tags; note entryTags is an object, not Map
-      for (const [key, value] of Object.entries(entry.entryTags)) {
-        entry.entryTags[key.toLowerCase()] = normalizeTag(value);
-      }
-      entry.entryTags.type = entry.entryType;
-      // add to bibliography
-      bibliography.set(entry.citationKey, entry.entryTags);
-    }
-    return bibliography;
-  }
-
-  function serializeFrontmatterToBibtex(frontMatter) {
-    return `@article{${frontMatter.slug},
-  author = {${frontMatter.bibtexAuthors}},
-  title = {${frontMatter.title}},
-  journal = {${frontMatter.journal.title}},
-  year = {${frontMatter.publishedYear}},
-  note = {${frontMatter.url}},
-  doi = {${frontMatter.doi}}
-}`;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function parseBibliography(element) {
-    const scriptTag = element.firstElementChild;
-    if (scriptTag && scriptTag.tagName === 'SCRIPT') {
-      if (scriptTag.type == 'text/bibtex') {
-        const bibtex = element.firstElementChild.textContent;
-        return parseBibtex(bibtex);
-      } else if (scriptTag.type == 'text/json') {
-        return new Map(JSON.parse(scriptTag.textContent));
-      } else {
-        console.warn('Unsupported bibliography script tag type: ' + scriptTag.type);
-      }
-    } else {
-      console.warn('Bibliography did not have any script tag.');
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function ExtractBibliography(dom, data) {
-    const bibliographyTag = dom.querySelector('d-bibliography');
-    if (!bibliographyTag) {
-      console.warn('No bibliography tag found!');
-      return;
-    }
-
-    const src = bibliographyTag.getAttribute('src');
-    if (src) {
-      const path = data.inputDirectory + '/' + src;
-      const text = fs.readFileSync(path, 'utf-8');
-      const bibliography = parseBibtex(text);
-      const scriptTag = dom.createElement('script');
-      scriptTag.type = 'text/json';
-      scriptTag.textContent = JSON.stringify([...bibliography]);
-      bibliographyTag.appendChild(scriptTag);
-      bibliographyTag.removeAttribute('src');
-    }
-
-    data.bibliography = parseBibliography(bibliographyTag);
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  function collect_citations(dom = document) {
-    const citations = new Set();
-    const citeTags = dom.querySelectorAll("d-cite");
-    for (const tag of citeTags) {
-      const keyString = tag.getAttribute("key") || tag.getAttribute("bibtex-key");
-      const keys = keyString.split(",").map(k => k.trim());
-      for (const key of keys) {
-        citations.add(key);
-      }
-    }
-    return [...citations];
-  }
-
-  function author_string(ent, template, sep, finalSep) {
-    if (ent.author == null) {
-      return "";
-    }
-    var names = ent.author.split(" and ");
-    let name_strings = names.map(name => {
-      name = name.trim();
-      if (name.indexOf(",") != -1) {
-        var last = name.split(",")[0].trim();
-        var firsts = name.split(",")[1];
-      } else if (name.indexOf(" ") != -1) {
-        var last = name
-          .split(" ")
-          .slice(-1)[0]
-          .trim();
-        var firsts = name
-          .split(" ")
-          .slice(0, -1)
-          .join(" ");
-      } else {
-        var last = name.trim();
-      }
-      var initials = "";
-      if (firsts != undefined) {
-        initials = firsts
-          .trim()
-          .split(" ")
-          .map(s => s.trim()[0]);
-        initials = initials.join(".") + ".";
-      }
-      return template
-        .replace("${F}", firsts)
-        .replace("${L}", last)
-        .replace("${I}", initials)
-        .trim(); // in case one of first or last was empty
-    });
-    if (names.length > 1) {
-      var str = name_strings.slice(0, names.length - 1).join(sep);
-      str += (finalSep || sep) + name_strings[names.length - 1];
-      return str;
-    } else {
-      return name_strings[0];
-    }
-  }
-
-  function venue_string(ent) {
-    var cite = ent.journal || ent.booktitle || "";
-    if ("volume" in ent) {
-      var issue = ent.issue || ent.number;
-      issue = issue != undefined ? "(" + issue + ")" : "";
-      cite += ", Vol " + ent.volume + issue;
-    }
-    if ("pages" in ent) {
-      cite += ", pp. " + ent.pages;
-    }
-    if (cite != "") cite += ". ";
-    if ("publisher" in ent) {
-      cite += ent.publisher;
-      if (cite[cite.length - 1] != ".") cite += ".";
-    }
-    return cite;
-  }
-
-  function link_string(ent) {
-    if ("url" in ent) {
-      var url = ent.url;
-      var arxiv_match = /arxiv\.org\/abs\/([0-9\.]*)/.exec(url);
-      if (arxiv_match != null) {
-        url = `http://arxiv.org/pdf/${arxiv_match[1]}.pdf`;
-      }
-
-      if (url.slice(-4) == ".pdf") {
-        var label = "PDF";
-      } else if (url.slice(-5) == ".html") {
-        var label = "HTML";
-      }
-      return ` &ensp;<a href="${url}">[${label || "link"}]</a>`;
-    } /* else if ("doi" in ent){
-      return ` &ensp;<a href="https://doi.org/${ent.doi}" >[DOI]</a>`;
-    }*/ else {
-      return "";
-    }
-  }
-  function doi_string(ent, new_line) {
-    if ("doi" in ent) {
-      return `${new_line ? "<br>" : ""} <a href="https://doi.org/${
-      ent.doi
-    }" style="text-decoration:inherit;">DOI: ${ent.doi}</a>`;
-    } else {
-      return "";
-    }
-  }
-
-  function title_string(ent) {
-    return '<span class="title">' + ent.title + "</span> ";
-  }
-
-  function bibliography_cite(ent, fancy) {
-    if (ent) {
-      var cite = title_string(ent);
-      cite += link_string(ent) + "<br>";
-      if (ent.author) {
-        cite += author_string(ent, "${L}, ${I}", ", ", " and ");
-        if (ent.year || ent.date) {
-          cite += ", ";
-        }
-      }
-      if (ent.year || ent.date) {
-        cite += (ent.year || ent.date) + ". ";
-      } else {
-        cite += ". ";
-      }
-      cite += venue_string(ent);
-      cite += doi_string(ent);
-      return cite;
-      /*var cite =  author_string(ent, "${L}, ${I}", ", ", " and ");
-      if (ent.year || ent.date){
-        cite += ", " + (ent.year || ent.date) + ". "
-      } else {
-        cite += ". "
-      }
-      cite += "<b>" + ent.title + "</b>. ";
-      cite += venue_string(ent);
-      cite += doi_string(ent);
-      cite += link_string(ent);
-      return cite*/
-    } else {
-      return "?";
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function ExtractCitations(dom, data) {
-    const citations = new Set(data.citations);
-    const newCitations = collect_citations(dom);
-    for (const citation of newCitations) {
-      citations.add(citation);
-    }
-    data.citations = Array.from(citations);
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  function HTML(dom) {
-
-    const head = dom.querySelector('head');
-
-    // set language to 'en'
-    if (!dom.querySelector('html').getAttribute('lang')) {
-      dom.querySelector('html').setAttribute('lang', 'en');
-    }
-
-    // set charset to 'utf-8'
-    if (!dom.querySelector('meta[charset]')) {
-      const meta = dom.createElement('meta');
-      meta.setAttribute('charset', 'utf-8');
-      head.appendChild(meta);
-    }
-
-    // set viewport
-    if (!dom.querySelector('meta[name=viewport]')) {
-      const meta = dom.createElement('meta');
-      meta.setAttribute('name', 'viewport');
-      meta.setAttribute('content', 'width=device-width, initial-scale=1');
-      head.appendChild(meta);
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  // import style from '../styles/d-byline.css';
-
-  function bylineTemplate(frontMatter) {
-    return `
-  <div class="byline grid">
-    <div class="authors-affiliations grid">
-      <h3>Authors</h3>
-      <h3>Affiliations</h3>
-      ${frontMatter.authors.map(author => `
-        <p class="author">
-          ${author.personalURL ? `
-            <a class="name" href="${author.personalURL}">${author.name}</a>` : `
-            <span class="name">${author.name}</span>`}
-        </p>
-        <p class="affiliation">
-        ${author.affiliations.map(affiliation =>
-          affiliation.url ? `<a class="affiliation" href="${affiliation.url}">${affiliation.name}</a>` : `<span class="affiliation">${affiliation.name}</span>`
-        ).join(', ')}
-        </p>
-      `).join('')}
-    </div>
-    <div>
-      <h3>Published</h3>
-      ${frontMatter.publishedDate ? `
-        <p>${frontMatter.publishedMonth} ${frontMatter.publishedDay}, ${frontMatter.publishedYear}</p> ` : `
-        <p><em>Not published yet.</em></p>`}
-    </div>
-  </div>
-`;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function Byline(dom, data) {
-    const byline = dom.querySelector('d-byline');
-    if (byline) {
-      byline.innerHTML = bylineTemplate(data);
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  // no appendix -> add appendix
-  // title in front, no h1 -> add it
-  // no title in front, h1 -> read and put into frontMatter
-  // footnote -> footnote list
-  // break up bib
-  // if citation, no bib-list -> add citation-list
-
-  // if authors, no byline -> add byline
-
-  function OptionalComponents(dom, data) {
-    const body = dom.body;
-    const article = body.querySelector('d-article');
-
-    // If we don't have an article tag, something weird is going on—giving up.
-    if (!article) {
-      console.warn('No d-article tag found; skipping adding optional components!');
-      return;
-    }
-
-    let byline = dom.querySelector('d-byline');
-    if (!byline) {
-      if (data.authors) {
-        byline = dom.createElement('d-byline');
-        body.insertBefore(byline, article);
-      } else {
-        console.warn('No authors found in front matter; please add them before submission!');
-      }
-    }
-
-    let title = dom.querySelector('d-title');
-    if (!title) {
-      title = dom.createElement('d-title');
-      body.insertBefore(title, byline);
-    }
-
-    let h1 = title.querySelector('h1');
-    if (!h1) {
-      h1 = dom.createElement('h1');
-      h1.textContent = data.title;
-      title.insertBefore(h1, title.firstChild);
-    }
-
-    const hasPassword = typeof data.password !== 'undefined';
-    let interstitial = body.querySelector('d-interstitial');
-    if (hasPassword && !interstitial) {
-      const inBrowser = typeof window !== 'undefined';
-      const onLocalhost = inBrowser && window.location.hostname.includes('localhost');
-      if (!inBrowser || !onLocalhost) {
-        interstitial = dom.createElement('d-interstitial');
-        interstitial.password = data.password;
-        body.insertBefore(interstitial, body.firstChild);
-      }
-    } else if (!hasPassword && interstitial) {
-      interstitial.parentElement.removeChild(this);
-    }
-
-    let appendix = dom.querySelector('d-appendix');
-    if (!appendix) {
-      appendix = dom.createElement('d-appendix');
-      dom.body.appendChild(appendix);
-    }
-
-    let footnoteList = dom.querySelector('d-footnote-list');
-    if (!footnoteList) {
-      footnoteList = dom.createElement('d-footnote-list');
-      appendix.appendChild(footnoteList);
-    }
-
-    let citationList = dom.querySelector('d-citation-list');
-    if (!citationList) {
-      citationList = dom.createElement('d-citation-list');
-      appendix.appendChild(citationList);
-    }
-
-  }
-
-  var katex$1 = createCommonjsModule(function (module, exports) {
-  (function(f){{module.exports=f();}})(function(){return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof commonjsRequire=="function"&&commonjsRequire;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r);}return n[o].exports}var i=typeof commonjsRequire=="function"&&commonjsRequire;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
-
-  var _ParseError = require("./src/ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  var _Settings = require("./src/Settings");
-
-  var _Settings2 = _interopRequireDefault(_Settings);
-
-  var _buildTree = require("./src/buildTree");
-
-  var _buildTree2 = _interopRequireDefault(_buildTree);
-
-  var _parseTree = require("./src/parseTree");
-
-  var _parseTree2 = _interopRequireDefault(_parseTree);
-
-  var _utils = require("./src/utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * Parse and build an expression, and place that expression in the DOM node
-   * given.
-   */
-  var render = function render(expression, baseNode, options) {
-      _utils2.default.clearNode(baseNode);
-
-      var settings = new _Settings2.default(options);
-
-      var tree = (0, _parseTree2.default)(expression, settings);
-      var node = (0, _buildTree2.default)(tree, expression, settings).toNode();
-
-      baseNode.appendChild(node);
-  };
-
-  // KaTeX's styles don't work properly in quirks mode. Print out an error, and
-  // disable rendering.
-  /* eslint no-console:0 */
-  /**
-   * This is the main entry point for KaTeX. Here, we expose functions for
-   * rendering expressions either to DOM nodes or to markup strings.
-   *
-   * We also expose the ParseError class to check if errors thrown from KaTeX are
-   * errors in the expression, or errors in javascript handling.
-   */
-
-  if (typeof document !== "undefined") {
-      if (document.compatMode !== "CSS1Compat") {
-          typeof console !== "undefined" && console.warn("Warning: KaTeX doesn't work in quirks mode. Make sure your " + "website has a suitable doctype.");
-
-          render = function render() {
-              throw new _ParseError2.default("KaTeX doesn't work in quirks mode.");
-          };
-      }
-  }
-
-  /**
-   * Parse and build an expression, and return the markup for that.
-   */
-  var renderToString = function renderToString(expression, options) {
-      var settings = new _Settings2.default(options);
-
-      var tree = (0, _parseTree2.default)(expression, settings);
-      return (0, _buildTree2.default)(tree, expression, settings).toMarkup();
-  };
-
-  /**
-   * Parse an expression and return the parse tree.
-   */
-  var generateParseTree = function generateParseTree(expression, options) {
-      var settings = new _Settings2.default(options);
-      return (0, _parseTree2.default)(expression, settings);
-  };
-
-  module.exports = {
-      render: render,
-      renderToString: renderToString,
-      /**
-       * NOTE: This method is not currently recommended for public use.
-       * The internal tree representation is unstable and is very likely
-       * to change. Use at your own risk.
-       */
-      __parse: generateParseTree,
-      ParseError: _ParseError2.default
-  };
-
-  },{"./src/ParseError":29,"./src/Settings":32,"./src/buildTree":37,"./src/parseTree":46,"./src/utils":51}],2:[function(require,module,exports){
-  module.exports = { "default": require("core-js/library/fn/json/stringify"), __esModule: true };
-  },{"core-js/library/fn/json/stringify":6}],3:[function(require,module,exports){
-  module.exports = { "default": require("core-js/library/fn/object/define-property"), __esModule: true };
-  },{"core-js/library/fn/object/define-property":7}],4:[function(require,module,exports){
-
-  exports.__esModule = true;
-
-  exports.default = function (instance, Constructor) {
-    if (!(instance instanceof Constructor)) {
-      throw new TypeError("Cannot call a class as a function");
-    }
-  };
-  },{}],5:[function(require,module,exports){
-
-  exports.__esModule = true;
-
-  var _defineProperty = require("../core-js/object/define-property");
-
-  var _defineProperty2 = _interopRequireDefault(_defineProperty);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  exports.default = function () {
-    function defineProperties(target, props) {
-      for (var i = 0; i < props.length; i++) {
-        var descriptor = props[i];
-        descriptor.enumerable = descriptor.enumerable || false;
-        descriptor.configurable = true;
-        if ("value" in descriptor) descriptor.writable = true;
-        (0, _defineProperty2.default)(target, descriptor.key, descriptor);
-      }
-    }
-
-    return function (Constructor, protoProps, staticProps) {
-      if (protoProps) defineProperties(Constructor.prototype, protoProps);
-      if (staticProps) defineProperties(Constructor, staticProps);
-      return Constructor;
-    };
-  }();
-  },{"../core-js/object/define-property":3}],6:[function(require,module,exports){
-  var core  = require('../../modules/_core')
-    , $JSON = core.JSON || (core.JSON = {stringify: JSON.stringify});
-  module.exports = function stringify(it){ // eslint-disable-line no-unused-vars
-    return $JSON.stringify.apply($JSON, arguments);
-  };
-  },{"../../modules/_core":10}],7:[function(require,module,exports){
-  require('../../modules/es6.object.define-property');
-  var $Object = require('../../modules/_core').Object;
-  module.exports = function defineProperty(it, key, desc){
-    return $Object.defineProperty(it, key, desc);
-  };
-  },{"../../modules/_core":10,"../../modules/es6.object.define-property":23}],8:[function(require,module,exports){
-  module.exports = function(it){
-    if(typeof it != 'function')throw TypeError(it + ' is not a function!');
-    return it;
-  };
-  },{}],9:[function(require,module,exports){
-  var isObject = require('./_is-object');
-  module.exports = function(it){
-    if(!isObject(it))throw TypeError(it + ' is not an object!');
-    return it;
-  };
-  },{"./_is-object":19}],10:[function(require,module,exports){
-  var core = module.exports = {version: '2.4.0'};
-  if(typeof __e == 'number')__e = core; // eslint-disable-line no-undef
-  },{}],11:[function(require,module,exports){
-  // optional / simple context binding
-  var aFunction = require('./_a-function');
-  module.exports = function(fn, that, length){
-    aFunction(fn);
-    if(that === undefined)return fn;
-    switch(length){
-      case 1: return function(a){
-        return fn.call(that, a);
-      };
-      case 2: return function(a, b){
-        return fn.call(that, a, b);
-      };
-      case 3: return function(a, b, c){
-        return fn.call(that, a, b, c);
-      };
-    }
-    return function(/* ...args */){
-      return fn.apply(that, arguments);
-    };
-  };
-  },{"./_a-function":8}],12:[function(require,module,exports){
-  // Thank's IE8 for his funny defineProperty
-  module.exports = !require('./_fails')(function(){
-    return Object.defineProperty({}, 'a', {get: function(){ return 7; }}).a != 7;
-  });
-  },{"./_fails":15}],13:[function(require,module,exports){
-  var isObject = require('./_is-object')
-    , document = require('./_global').document
-    // in old IE typeof document.createElement is 'object'
-    , is = isObject(document) && isObject(document.createElement);
-  module.exports = function(it){
-    return is ? document.createElement(it) : {};
-  };
-  },{"./_global":16,"./_is-object":19}],14:[function(require,module,exports){
-  var global    = require('./_global')
-    , core      = require('./_core')
-    , ctx       = require('./_ctx')
-    , hide      = require('./_hide')
-    , PROTOTYPE = 'prototype';
-
-  var $export = function(type, name, source){
-    var IS_FORCED = type & $export.F
-      , IS_GLOBAL = type & $export.G
-      , IS_STATIC = type & $export.S
-      , IS_PROTO  = type & $export.P
-      , IS_BIND   = type & $export.B
-      , IS_WRAP   = type & $export.W
-      , exports   = IS_GLOBAL ? core : core[name] || (core[name] = {})
-      , expProto  = exports[PROTOTYPE]
-      , target    = IS_GLOBAL ? global : IS_STATIC ? global[name] : (global[name] || {})[PROTOTYPE]
-      , key, own, out;
-    if(IS_GLOBAL)source = name;
-    for(key in source){
-      // contains in native
-      own = !IS_FORCED && target && target[key] !== undefined;
-      if(own && key in exports)continue;
-      // export native or passed
-      out = own ? target[key] : source[key];
-      // prevent global pollution for namespaces
-      exports[key] = IS_GLOBAL && typeof target[key] != 'function' ? source[key]
-      // bind timers to global for call from export context
-      : IS_BIND && own ? ctx(out, global)
-      // wrap global constructors for prevent change them in library
-      : IS_WRAP && target[key] == out ? (function(C){
-        var F = function(a, b, c){
-          if(this instanceof C){
-            switch(arguments.length){
-              case 0: return new C;
-              case 1: return new C(a);
-              case 2: return new C(a, b);
-            } return new C(a, b, c);
-          } return C.apply(this, arguments);
-        };
-        F[PROTOTYPE] = C[PROTOTYPE];
-        return F;
-      // make static versions for prototype methods
-      })(out) : IS_PROTO && typeof out == 'function' ? ctx(Function.call, out) : out;
-      // export proto methods to core.%CONSTRUCTOR%.methods.%NAME%
-      if(IS_PROTO){
-        (exports.virtual || (exports.virtual = {}))[key] = out;
-        // export proto methods to core.%CONSTRUCTOR%.prototype.%NAME%
-        if(type & $export.R && expProto && !expProto[key])hide(expProto, key, out);
-      }
-    }
-  };
-  // type bitmap
-  $export.F = 1;   // forced
-  $export.G = 2;   // global
-  $export.S = 4;   // static
-  $export.P = 8;   // proto
-  $export.B = 16;  // bind
-  $export.W = 32;  // wrap
-  $export.U = 64;  // safe
-  $export.R = 128; // real proto method for `library` 
-  module.exports = $export;
-  },{"./_core":10,"./_ctx":11,"./_global":16,"./_hide":17}],15:[function(require,module,exports){
-  module.exports = function(exec){
-    try {
-      return !!exec();
-    } catch(e){
-      return true;
-    }
-  };
-  },{}],16:[function(require,module,exports){
-  // https://github.com/zloirock/core-js/issues/86#issuecomment-115759028
-  var global = module.exports = typeof window != 'undefined' && window.Math == Math
-    ? window : typeof self != 'undefined' && self.Math == Math ? self : Function('return this')();
-  if(typeof __g == 'number')__g = global; // eslint-disable-line no-undef
-  },{}],17:[function(require,module,exports){
-  var dP         = require('./_object-dp')
-    , createDesc = require('./_property-desc');
-  module.exports = require('./_descriptors') ? function(object, key, value){
-    return dP.f(object, key, createDesc(1, value));
-  } : function(object, key, value){
-    object[key] = value;
-    return object;
-  };
-  },{"./_descriptors":12,"./_object-dp":20,"./_property-desc":21}],18:[function(require,module,exports){
-  module.exports = !require('./_descriptors') && !require('./_fails')(function(){
-    return Object.defineProperty(require('./_dom-create')('div'), 'a', {get: function(){ return 7; }}).a != 7;
-  });
-  },{"./_descriptors":12,"./_dom-create":13,"./_fails":15}],19:[function(require,module,exports){
-  module.exports = function(it){
-    return typeof it === 'object' ? it !== null : typeof it === 'function';
-  };
-  },{}],20:[function(require,module,exports){
-  var anObject       = require('./_an-object')
-    , IE8_DOM_DEFINE = require('./_ie8-dom-define')
-    , toPrimitive    = require('./_to-primitive')
-    , dP             = Object.defineProperty;
-
-  exports.f = require('./_descriptors') ? Object.defineProperty : function defineProperty(O, P, Attributes){
-    anObject(O);
-    P = toPrimitive(P, true);
-    anObject(Attributes);
-    if(IE8_DOM_DEFINE)try {
-      return dP(O, P, Attributes);
-    } catch(e){ /* empty */ }
-    if('get' in Attributes || 'set' in Attributes)throw TypeError('Accessors not supported!');
-    if('value' in Attributes)O[P] = Attributes.value;
-    return O;
-  };
-  },{"./_an-object":9,"./_descriptors":12,"./_ie8-dom-define":18,"./_to-primitive":22}],21:[function(require,module,exports){
-  module.exports = function(bitmap, value){
-    return {
-      enumerable  : !(bitmap & 1),
-      configurable: !(bitmap & 2),
-      writable    : !(bitmap & 4),
-      value       : value
-    };
-  };
-  },{}],22:[function(require,module,exports){
-  // 7.1.1 ToPrimitive(input [, PreferredType])
-  var isObject = require('./_is-object');
-  // instead of the ES6 spec version, we didn't implement @@toPrimitive case
-  // and the second argument - flag - preferred type is a string
-  module.exports = function(it, S){
-    if(!isObject(it))return it;
-    var fn, val;
-    if(S && typeof (fn = it.toString) == 'function' && !isObject(val = fn.call(it)))return val;
-    if(typeof (fn = it.valueOf) == 'function' && !isObject(val = fn.call(it)))return val;
-    if(!S && typeof (fn = it.toString) == 'function' && !isObject(val = fn.call(it)))return val;
-    throw TypeError("Can't convert object to primitive value");
-  };
-  },{"./_is-object":19}],23:[function(require,module,exports){
-  var $export = require('./_export');
-  // 19.1.2.4 / 15.2.3.6 Object.defineProperty(O, P, Attributes)
-  $export($export.S + $export.F * !require('./_descriptors'), 'Object', {defineProperty: require('./_object-dp').f});
-  },{"./_descriptors":12,"./_export":14,"./_object-dp":20}],24:[function(require,module,exports){
-
-  function getRelocatable(re) {
-    // In the future, this could use a WeakMap instead of an expando.
-    if (!re.__matchAtRelocatable) {
-      // Disjunctions are the lowest-precedence operator, so we can make any
-      // pattern match the empty string by appending `|()` to it:
-      // https://people.mozilla.org/~jorendorff/es6-draft.html#sec-patterns
-      var source = re.source + "|()";
-
-      // We always make the new regex global.
-      var flags = "g" + (re.ignoreCase ? "i" : "") + (re.multiline ? "m" : "") + (re.unicode ? "u" : "")
-      // sticky (/.../y) doesn't make sense in conjunction with our relocation
-      // logic, so we ignore it here.
-      ;
-
-      re.__matchAtRelocatable = new RegExp(source, flags);
-    }
-    return re.__matchAtRelocatable;
-  }
-
-  function matchAt(re, str, pos) {
-    if (re.global || re.sticky) {
-      throw new Error("matchAt(...): Only non-global regexes are supported");
-    }
-    var reloc = getRelocatable(re);
-    reloc.lastIndex = pos;
-    var match = reloc.exec(str);
-    // Last capturing group is our sentinel that indicates whether the regex
-    // matched at the given location.
-    if (match[match.length - 1] == null) {
-      // Original regex matched.
-      match.length = match.length - 1;
-      return match;
-    } else {
-      return null;
-    }
-  }
-
-  module.exports = matchAt;
-  },{}],25:[function(require,module,exports){
-  /* eslint-disable no-unused-vars */
-  var hasOwnProperty = Object.prototype.hasOwnProperty;
-  var propIsEnumerable = Object.prototype.propertyIsEnumerable;
-
-  function toObject(val) {
-  	if (val === null || val === undefined) {
-  		throw new TypeError('Object.assign cannot be called with null or undefined');
-  	}
-
-  	return Object(val);
-  }
-
-  function shouldUseNative() {
-  	try {
-  		if (!Object.assign) {
-  			return false;
-  		}
-
-  		// Detect buggy property enumeration order in older V8 versions.
-
-  		// https://bugs.chromium.org/p/v8/issues/detail?id=4118
-  		var test1 = new String('abc');  // eslint-disable-line
-  		test1[5] = 'de';
-  		if (Object.getOwnPropertyNames(test1)[0] === '5') {
-  			return false;
-  		}
-
-  		// https://bugs.chromium.org/p/v8/issues/detail?id=3056
-  		var test2 = {};
-  		for (var i = 0; i < 10; i++) {
-  			test2['_' + String.fromCharCode(i)] = i;
-  		}
-  		var order2 = Object.getOwnPropertyNames(test2).map(function (n) {
-  			return test2[n];
-  		});
-  		if (order2.join('') !== '0123456789') {
-  			return false;
-  		}
-
-  		// https://bugs.chromium.org/p/v8/issues/detail?id=3056
-  		var test3 = {};
-  		'abcdefghijklmnopqrst'.split('').forEach(function (letter) {
-  			test3[letter] = letter;
-  		});
-  		if (Object.keys(Object.assign({}, test3)).join('') !==
-  				'abcdefghijklmnopqrst') {
-  			return false;
-  		}
-
-  		return true;
-  	} catch (e) {
-  		// We don't expect any of the above to throw, but better to be safe.
-  		return false;
-  	}
-  }
-
-  module.exports = shouldUseNative() ? Object.assign : function (target, source) {
-  	var from;
-  	var to = toObject(target);
-  	var symbols;
-
-  	for (var s = 1; s < arguments.length; s++) {
-  		from = Object(arguments[s]);
-
-  		for (var key in from) {
-  			if (hasOwnProperty.call(from, key)) {
-  				to[key] = from[key];
-  			}
-  		}
-
-  		if (Object.getOwnPropertySymbols) {
-  			symbols = Object.getOwnPropertySymbols(from);
-  			for (var i = 0; i < symbols.length; i++) {
-  				if (propIsEnumerable.call(from, symbols[i])) {
-  					to[symbols[i]] = from[symbols[i]];
-  				}
-  			}
-  		}
-  	}
-
-  	return to;
-  };
-
-  },{}],26:[function(require,module,exports){
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  var _createClass2 = require("babel-runtime/helpers/createClass");
-
-  var _createClass3 = _interopRequireDefault(_createClass2);
-
-  var _matchAt = require("match-at");
-
-  var _matchAt2 = _interopRequireDefault(_matchAt);
-
-  var _ParseError = require("./ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * The resulting token returned from `lex`.
-   *
-   * It consists of the token text plus some position information.
-   * The position information is essentially a range in an input string,
-   * but instead of referencing the bare input string, we refer to the lexer.
-   * That way it is possible to attach extra metadata to the input string,
-   * like for example a file name or similar.
-   *
-   * The position information (all three parameters) is optional,
-   * so it is OK to construct synthetic tokens if appropriate.
-   * Not providing available position information may lead to
-   * degraded error reporting, though.
-   *
-   * @param {string}  text   the text of this token
-   * @param {number=} start  the start offset, zero-based inclusive
-   * @param {number=} end    the end offset, zero-based exclusive
-   * @param {Lexer=}  lexer  the lexer which in turn holds the input string
-   */
-  /**
-   * The Lexer class handles tokenizing the input in various ways. Since our
-   * parser expects us to be able to backtrack, the lexer allows lexing from any
-   * given starting point.
-   *
-   * Its main exposed function is the `lex` function, which takes a position to
-   * lex from and a type of token to lex. It defers to the appropriate `_innerLex`
-   * function.
-   *
-   * The various `_innerLex` functions perform the actual lexing of different
-   * kinds.
-   */
-
-  var Token = function () {
-      function Token(text, start, end, lexer) {
-          (0, _classCallCheck3.default)(this, Token);
-
-          this.text = text;
-          this.start = start;
-          this.end = end;
-          this.lexer = lexer;
-      }
-
-      /**
-       * Given a pair of tokens (this and endToken), compute a “Token” encompassing
-       * the whole input range enclosed by these two.
-       *
-       * @param {Token}  endToken  last token of the range, inclusive
-       * @param {string} text      the text of the newly constructed token
-       */
-
-
-      (0, _createClass3.default)(Token, [{
-          key: "range",
-          value: function range(endToken, text) {
-              if (endToken.lexer !== this.lexer) {
-                  return new Token(text); // sorry, no position information available
-              }
-              return new Token(text, this.start, endToken.end, this.lexer);
-          }
-      }]);
-      return Token;
-  }();
-
-  /* The following tokenRegex
-   * - matches typical whitespace (but not NBSP etc.) using its first group
-   * - does not match any control character \x00-\x1f except whitespace
-   * - does not match a bare backslash
-   * - matches any ASCII character except those just mentioned
-   * - does not match the BMP private use area \uE000-\uF8FF
-   * - does not match bare surrogate code units
-   * - matches any BMP character except for those just described
-   * - matches any valid Unicode surrogate pair
-   * - matches a backslash followed by one or more letters
-   * - matches a backslash followed by any BMP character, including newline
-   * Just because the Lexer matches something doesn't mean it's valid input:
-   * If there is no matching function or symbol definition, the Parser will
-   * still reject the input.
-   */
-
-
-  var tokenRegex = new RegExp("([ \r\n\t]+)|" + // whitespace
-  "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
-  "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
-  "|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" + // function name
-  ")");
-
-  /*
-   * Main Lexer class
-   */
-
-  var Lexer = function () {
-      function Lexer(input) {
-          (0, _classCallCheck3.default)(this, Lexer);
-
-          this.input = input;
-          this.pos = 0;
-      }
-
-      /**
-       * This function lexes a single token.
-       */
-
-
-      (0, _createClass3.default)(Lexer, [{
-          key: "lex",
-          value: function lex() {
-              var input = this.input;
-              var pos = this.pos;
-              if (pos === input.length) {
-                  return new Token("EOF", pos, pos, this);
-              }
-              var match = (0, _matchAt2.default)(tokenRegex, input, pos);
-              if (match === null) {
-                  throw new _ParseError2.default("Unexpected character: '" + input[pos] + "'", new Token(input[pos], pos, pos + 1, this));
-              }
-              var text = match[2] || " ";
-              var start = this.pos;
-              this.pos += match[0].length;
-              var end = this.pos;
-              return new Token(text, start, end, this);
-          }
-      }]);
-      return Lexer;
-  }();
-
-  module.exports = Lexer;
-
-  },{"./ParseError":29,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5,"match-at":24}],27:[function(require,module,exports){
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  var _createClass2 = require("babel-runtime/helpers/createClass");
-
-  var _createClass3 = _interopRequireDefault(_createClass2);
-
-  var _Lexer = require("./Lexer");
-
-  var _Lexer2 = _interopRequireDefault(_Lexer);
-
-  var _macros = require("./macros");
-
-  var _macros2 = _interopRequireDefault(_macros);
-
-  var _ParseError = require("./ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  var _objectAssign = require("object-assign");
-
-  var _objectAssign2 = _interopRequireDefault(_objectAssign);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * This file contains the “gullet” where macros are expanded
-   * until only non-macro tokens remain.
-   */
-
-  var MacroExpander = function () {
-      function MacroExpander(input, macros) {
-          (0, _classCallCheck3.default)(this, MacroExpander);
-
-          this.lexer = new _Lexer2.default(input);
-          this.macros = (0, _objectAssign2.default)({}, _macros2.default, macros);
-          this.stack = []; // contains tokens in REVERSE order
-          this.discardedWhiteSpace = [];
-      }
-
-      /**
-       * Recursively expand first token, then return first non-expandable token.
-       *
-       * At the moment, macro expansion doesn't handle delimited macros,
-       * i.e. things like those defined by \def\foo#1\end{…}.
-       * See the TeX book page 202ff. for details on how those should behave.
-       */
-
-
-      (0, _createClass3.default)(MacroExpander, [{
-          key: "nextToken",
-          value: function nextToken() {
-              for (;;) {
-                  if (this.stack.length === 0) {
-                      this.stack.push(this.lexer.lex());
-                  }
-                  var topToken = this.stack.pop();
-                  var name = topToken.text;
-                  if (!(name.charAt(0) === "\\" && this.macros.hasOwnProperty(name))) {
-                      return topToken;
-                  }
-                  var tok = void 0;
-                  var expansion = this.macros[name];
-                  if (typeof expansion === "string") {
-                      var numArgs = 0;
-                      if (expansion.indexOf("#") !== -1) {
-                          var stripped = expansion.replace(/##/g, "");
-                          while (stripped.indexOf("#" + (numArgs + 1)) !== -1) {
-                              ++numArgs;
-                          }
-                      }
-                      var bodyLexer = new _Lexer2.default(expansion);
-                      expansion = [];
-                      tok = bodyLexer.lex();
-                      while (tok.text !== "EOF") {
-                          expansion.push(tok);
-                          tok = bodyLexer.lex();
-                      }
-                      expansion.reverse(); // to fit in with stack using push and pop
-                      expansion.numArgs = numArgs;
-                      this.macros[name] = expansion;
-                  }
-                  if (expansion.numArgs) {
-                      var args = [];
-                      var i = void 0;
-                      // obtain arguments, either single token or balanced {…} group
-                      for (i = 0; i < expansion.numArgs; ++i) {
-                          var startOfArg = this.get(true);
-                          if (startOfArg.text === "{") {
-                              var arg = [];
-                              var depth = 1;
-                              while (depth !== 0) {
-                                  tok = this.get(false);
-                                  arg.push(tok);
-                                  if (tok.text === "{") {
-                                      ++depth;
-                                  } else if (tok.text === "}") {
-                                      --depth;
-                                  } else if (tok.text === "EOF") {
-                                      throw new _ParseError2.default("End of input in macro argument", startOfArg);
-                                  }
-                              }
-                              arg.pop(); // remove last }
-                              arg.reverse(); // like above, to fit in with stack order
-                              args[i] = arg;
-                          } else if (startOfArg.text === "EOF") {
-                              throw new _ParseError2.default("End of input expecting macro argument", topToken);
-                          } else {
-                              args[i] = [startOfArg];
-                          }
-                      }
-                      // paste arguments in place of the placeholders
-                      expansion = expansion.slice(); // make a shallow copy
-                      for (i = expansion.length - 1; i >= 0; --i) {
-                          tok = expansion[i];
-                          if (tok.text === "#") {
-                              if (i === 0) {
-                                  throw new _ParseError2.default("Incomplete placeholder at end of macro body", tok);
-                              }
-                              tok = expansion[--i]; // next token on stack
-                              if (tok.text === "#") {
-                                  // ## → #
-                                  expansion.splice(i + 1, 1); // drop first #
-                              } else if (/^[1-9]$/.test(tok.text)) {
-                                  // expansion.splice(i, 2, arg[0], arg[1], …)
-                                  // to replace placeholder with the indicated argument.
-                                  // TODO: use spread once we move to ES2015
-                                  expansion.splice.apply(expansion, [i, 2].concat(args[tok.text - 1]));
-                              } else {
-                                  throw new _ParseError2.default("Not a valid argument number", tok);
-                              }
-                          }
-                      }
-                  }
-                  this.stack = this.stack.concat(expansion);
-              }
-          }
-      }, {
-          key: "get",
-          value: function get(ignoreSpace) {
-              this.discardedWhiteSpace = [];
-              var token = this.nextToken();
-              if (ignoreSpace) {
-                  while (token.text === " ") {
-                      this.discardedWhiteSpace.push(token);
-                      token = this.nextToken();
-                  }
-              }
-              return token;
-          }
-
-          /**
-           * Undo the effect of the preceding call to the get method.
-           * A call to this method MUST be immediately preceded and immediately followed
-           * by a call to get.  Only used during mode switching, i.e. after one token
-           * was got in the old mode but should get got again in a new mode
-           * with possibly different whitespace handling.
-           */
-
-      }, {
-          key: "unget",
-          value: function unget(token) {
-              this.stack.push(token);
-              while (this.discardedWhiteSpace.length !== 0) {
-                  this.stack.push(this.discardedWhiteSpace.pop());
-              }
-          }
-      }]);
-      return MacroExpander;
-  }();
-
-  module.exports = MacroExpander;
-
-  },{"./Lexer":26,"./ParseError":29,"./macros":44,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5,"object-assign":25}],28:[function(require,module,exports){
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  var _createClass2 = require("babel-runtime/helpers/createClass");
-
-  var _createClass3 = _interopRequireDefault(_createClass2);
-
-  var _fontMetrics2 = require("./fontMetrics");
-
-  var _fontMetrics3 = _interopRequireDefault(_fontMetrics2);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  var BASESIZE = 6; /**
-                     * This file contains information about the options that the Parser carries
-                     * around with it while parsing. Data is held in an `Options` object, and when
-                     * recursing, a new `Options` object can be created with the `.with*` and
-                     * `.reset` functions.
-                     */
-
-  var sizeStyleMap = [
-  // Each element contains [textsize, scriptsize, scriptscriptsize].
-  // The size mappings are taken from TeX with \normalsize=10pt.
-  [1, 1, 1], // size1: [5, 5, 5]              \tiny
-  [2, 1, 1], // size2: [6, 5, 5]
-  [3, 1, 1], // size3: [7, 5, 5]              \scriptsize
-  [4, 2, 1], // size4: [8, 6, 5]              \footnotesize
-  [5, 2, 1], // size5: [9, 6, 5]              \small
-  [6, 3, 1], // size6: [10, 7, 5]             \normalsize
-  [7, 4, 2], // size7: [12, 8, 6]             \large
-  [8, 6, 3], // size8: [14.4, 10, 7]          \Large
-  [9, 7, 6], // size9: [17.28, 12, 10]        \LARGE
-  [10, 8, 7], // size10: [20.74, 14.4, 12]     \huge
-  [11, 10, 9]];
-
-  var sizeMultipliers = [
-  // fontMetrics.js:getFontMetrics also uses size indexes, so if
-  // you change size indexes, change that function.
-  0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.44, 1.728, 2.074, 2.488];
-
-  var sizeAtStyle = function sizeAtStyle(size, style) {
-      return style.size < 2 ? size : sizeStyleMap[size - 1][style.size - 1];
-  };
-
-  /**
-   * This is the main options class. It contains the current style, size, color,
-   * and font.
-   *
-   * Options objects should not be modified. To create a new Options with
-   * different properties, call a `.having*` method.
-   */
-
-  var Options = function () {
-      function Options(data) {
-          (0, _classCallCheck3.default)(this, Options);
-
-          this.style = data.style;
-          this.color = data.color;
-          this.size = data.size || BASESIZE;
-          this.textSize = data.textSize || this.size;
-          this.phantom = data.phantom;
-          this.font = data.font;
-          this.sizeMultiplier = sizeMultipliers[this.size - 1];
-          this._fontMetrics = null;
-      }
-
-      /**
-       * Returns a new options object with the same properties as "this".  Properties
-       * from "extension" will be copied to the new options object.
-       */
-
-
-      (0, _createClass3.default)(Options, [{
-          key: "extend",
-          value: function extend(extension) {
-              var data = {
-                  style: this.style,
-                  size: this.size,
-                  textSize: this.textSize,
-                  color: this.color,
-                  phantom: this.phantom,
-                  font: this.font
-              };
-
-              for (var key in extension) {
-                  if (extension.hasOwnProperty(key)) {
-                      data[key] = extension[key];
-                  }
-              }
-
-              return new Options(data);
-          }
-
-          /**
-           * Return an options object with the given style. If `this.style === style`,
-           * returns `this`.
-           */
-
-      }, {
-          key: "havingStyle",
-          value: function havingStyle(style) {
-              if (this.style === style) {
-                  return this;
-              } else {
-                  return this.extend({
-                      style: style,
-                      size: sizeAtStyle(this.textSize, style)
-                  });
-              }
-          }
-
-          /**
-           * Return an options object with a cramped version of the current style. If
-           * the current style is cramped, returns `this`.
-           */
-
-      }, {
-          key: "havingCrampedStyle",
-          value: function havingCrampedStyle() {
-              return this.havingStyle(this.style.cramp());
-          }
-
-          /**
-           * Return an options object with the given size and in at least `\textstyle`.
-           * Returns `this` if appropriate.
-           */
-
-      }, {
-          key: "havingSize",
-          value: function havingSize(size) {
-              if (this.size === size && this.textSize === size) {
-                  return this;
-              } else {
-                  return this.extend({
-                      style: this.style.text(),
-                      size: size,
-                      textSize: size
-                  });
-              }
-          }
-
-          /**
-           * Like `this.havingSize(BASESIZE).havingStyle(style)`. If `style` is omitted,
-           * changes to at least `\textstyle`.
-           */
-
-      }, {
-          key: "havingBaseStyle",
-          value: function havingBaseStyle(style) {
-              style = style || this.style.text();
-              var wantSize = sizeAtStyle(BASESIZE, style);
-              if (this.size === wantSize && this.textSize === BASESIZE && this.style === style) {
-                  return this;
-              } else {
-                  return this.extend({
-                      style: style,
-                      size: wantSize,
-                      baseSize: BASESIZE
-                  });
-              }
-          }
-
-          /**
-           * Create a new options object with the given color.
-           */
-
-      }, {
-          key: "withColor",
-          value: function withColor(color) {
-              return this.extend({
-                  color: color
-              });
-          }
-
-          /**
-           * Create a new options object with "phantom" set to true.
-           */
-
-      }, {
-          key: "withPhantom",
-          value: function withPhantom() {
-              return this.extend({
-                  phantom: true
-              });
-          }
-
-          /**
-           * Create a new options objects with the give font.
-           */
-
-      }, {
-          key: "withFont",
-          value: function withFont(font) {
-              return this.extend({
-                  font: font || this.font
-              });
-          }
-
-          /**
-           * Return the CSS sizing classes required to switch from enclosing options
-           * `oldOptions` to `this`. Returns an array of classes.
-           */
-
-      }, {
-          key: "sizingClasses",
-          value: function sizingClasses(oldOptions) {
-              if (oldOptions.size !== this.size) {
-                  return ["sizing", "reset-size" + oldOptions.size, "size" + this.size];
-              } else {
-                  return [];
-              }
-          }
-
-          /**
-           * Return the CSS sizing classes required to switch to the base size. Like
-           * `this.havingSize(BASESIZE).sizingClasses(this)`.
-           */
-
-      }, {
-          key: "baseSizingClasses",
-          value: function baseSizingClasses() {
-              if (this.size !== BASESIZE) {
-                  return ["sizing", "reset-size" + this.size, "size" + BASESIZE];
-              } else {
-                  return [];
-              }
-          }
-
-          /**
-           * Return the font metrics for this size.
-           */
-
-      }, {
-          key: "fontMetrics",
-          value: function fontMetrics() {
-              if (!this._fontMetrics) {
-                  this._fontMetrics = _fontMetrics3.default.getFontMetrics(this.size);
-              }
-              return this._fontMetrics;
-          }
-
-          /**
-           * A map of color names to CSS colors.
-           * TODO(emily): Remove this when we have real macros
-           */
-
-      }, {
-          key: "getColor",
-
-
-          /**
-           * Gets the CSS color of the current options object, accounting for the
-           * `colorMap`.
-           */
-          value: function getColor() {
-              if (this.phantom) {
-                  return "transparent";
-              } else {
-                  return Options.colorMap[this.color] || this.color;
-              }
-          }
-      }]);
-      return Options;
-  }();
-
-  /**
-   * The base size index.
-   */
-
-
-  Options.colorMap = {
-      "katex-blue": "#6495ed",
-      "katex-orange": "#ffa500",
-      "katex-pink": "#ff00af",
-      "katex-red": "#df0030",
-      "katex-green": "#28ae7b",
-      "katex-gray": "gray",
-      "katex-purple": "#9d38bd",
-      "katex-blueA": "#ccfaff",
-      "katex-blueB": "#80f6ff",
-      "katex-blueC": "#63d9ea",
-      "katex-blueD": "#11accd",
-      "katex-blueE": "#0c7f99",
-      "katex-tealA": "#94fff5",
-      "katex-tealB": "#26edd5",
-      "katex-tealC": "#01d1c1",
-      "katex-tealD": "#01a995",
-      "katex-tealE": "#208170",
-      "katex-greenA": "#b6ffb0",
-      "katex-greenB": "#8af281",
-      "katex-greenC": "#74cf70",
-      "katex-greenD": "#1fab54",
-      "katex-greenE": "#0d923f",
-      "katex-goldA": "#ffd0a9",
-      "katex-goldB": "#ffbb71",
-      "katex-goldC": "#ff9c39",
-      "katex-goldD": "#e07d10",
-      "katex-goldE": "#a75a05",
-      "katex-redA": "#fca9a9",
-      "katex-redB": "#ff8482",
-      "katex-redC": "#f9685d",
-      "katex-redD": "#e84d39",
-      "katex-redE": "#bc2612",
-      "katex-maroonA": "#ffbde0",
-      "katex-maroonB": "#ff92c6",
-      "katex-maroonC": "#ed5fa6",
-      "katex-maroonD": "#ca337c",
-      "katex-maroonE": "#9e034e",
-      "katex-purpleA": "#ddd7ff",
-      "katex-purpleB": "#c6b9fc",
-      "katex-purpleC": "#aa87ff",
-      "katex-purpleD": "#7854ab",
-      "katex-purpleE": "#543b78",
-      "katex-mintA": "#f5f9e8",
-      "katex-mintB": "#edf2df",
-      "katex-mintC": "#e0e5cc",
-      "katex-grayA": "#f6f7f7",
-      "katex-grayB": "#f0f1f2",
-      "katex-grayC": "#e3e5e6",
-      "katex-grayD": "#d6d8da",
-      "katex-grayE": "#babec2",
-      "katex-grayF": "#888d93",
-      "katex-grayG": "#626569",
-      "katex-grayH": "#3b3e40",
-      "katex-grayI": "#21242c",
-      "katex-kaBlue": "#314453",
-      "katex-kaGreen": "#71B307"
-  };
-  Options.BASESIZE = BASESIZE;
-
-  module.exports = Options;
-
-  },{"./fontMetrics":41,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],29:[function(require,module,exports){
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * This is the ParseError class, which is the main error thrown by KaTeX
-   * functions when something has gone wrong. This is used to distinguish internal
-   * errors from errors in the expression that the user provided.
-   *
-   * If possible, a caller should provide a Token or ParseNode with information
-   * about where in the source string the problem occurred.
-   *
-   * @param {string} message  The error message
-   * @param {(Token|ParseNode)=} token  An object providing position information
-   */
-  var ParseError = function ParseError(message, token) {
-      (0, _classCallCheck3.default)(this, ParseError);
-
-      var error = "KaTeX parse error: " + message;
-      var start = void 0;
-      var end = void 0;
-
-      if (token && token.lexer && token.start <= token.end) {
-          // If we have the input and a position, make the error a bit fancier
-
-          // Get the input
-          var input = token.lexer.input;
-
-          // Prepend some information
-          start = token.start;
-          end = token.end;
-          if (start === input.length) {
-              error += " at end of input: ";
-          } else {
-              error += " at position " + (start + 1) + ": ";
-          }
-
-          // Underline token in question using combining underscores
-          var underlined = input.slice(start, end).replace(/[^]/g, "$&\u0332");
-
-          // Extract some context from the input and add it to the error
-          var left = void 0;
-          if (start > 15) {
-              left = "…" + input.slice(start - 15, start);
-          } else {
-              left = input.slice(0, start);
-          }
-          var right = void 0;
-          if (end + 15 < input.length) {
-              right = input.slice(end, end + 15) + "…";
-          } else {
-              right = input.slice(end);
-          }
-          error += left + underlined + right;
-      }
-
-      // Some hackery to make ParseError a prototype of Error
-      // See http://stackoverflow.com/a/8460753
-      var self = new Error(error);
-      self.name = "ParseError";
-      self.__proto__ = ParseError.prototype;
-
-      self.position = start;
-      return self;
-  };
-
-  // More hackery
-
-
-  ParseError.prototype.__proto__ = Error.prototype;
-
-  module.exports = ParseError;
-
-  },{"babel-runtime/helpers/classCallCheck":4}],30:[function(require,module,exports){
-
-  Object.defineProperty(exports, "__esModule", {
-      value: true
-  });
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * The resulting parse tree nodes of the parse tree.
-   *
-   * It is possible to provide position information, so that a ParseNode can
-   * fulfil a role similar to a Token in error reporting.
-   * For details on the corresponding properties see Token constructor.
-   * Providing such information can lead to better error reporting.
-   *
-   * @param {string}  type       type of node, like e.g. "ordgroup"
-   * @param {?object} value      type-specific representation of the node
-   * @param {string}  mode       parse mode in action for this node,
-   *                             "math" or "text"
-   * @param {Token=} firstToken  first token of the input for this node,
-   *                             will omit position information if unset
-   * @param {Token=} lastToken   last token of the input for this node,
-   *                             will default to firstToken if unset
-   */
-  var ParseNode = function ParseNode(type, value, mode, firstToken, lastToken) {
-      (0, _classCallCheck3.default)(this, ParseNode);
-
-      this.type = type;
-      this.value = value;
-      this.mode = mode;
-      if (firstToken && (!lastToken || lastToken.lexer === firstToken.lexer)) {
-          this.lexer = firstToken.lexer;
-          this.start = firstToken.start;
-          this.end = (lastToken || firstToken).end;
-      }
-  };
-
-  exports.default = ParseNode;
-
-  },{"babel-runtime/helpers/classCallCheck":4}],31:[function(require,module,exports){
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  var _createClass2 = require("babel-runtime/helpers/createClass");
-
-  var _createClass3 = _interopRequireDefault(_createClass2);
-
-  var _functions = require("./functions");
-
-  var _functions2 = _interopRequireDefault(_functions);
-
-  var _environments = require("./environments");
-
-  var _environments2 = _interopRequireDefault(_environments);
-
-  var _MacroExpander = require("./MacroExpander");
-
-  var _MacroExpander2 = _interopRequireDefault(_MacroExpander);
-
-  var _symbols = require("./symbols");
-
-  var _symbols2 = _interopRequireDefault(_symbols);
-
-  var _utils = require("./utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  var _units = require("./units");
-
-  var _units2 = _interopRequireDefault(_units);
-
-  var _unicodeRegexes = require("./unicodeRegexes");
-
-  var _ParseNode = require("./ParseNode");
-
-  var _ParseNode2 = _interopRequireDefault(_ParseNode);
-
-  var _ParseError = require("./ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * This file contains the parser used to parse out a TeX expression from the
-   * input. Since TeX isn't context-free, standard parsers don't work particularly
-   * well.
-   *
-   * The strategy of this parser is as such:
-   *
-   * The main functions (the `.parse...` ones) take a position in the current
-   * parse string to parse tokens from. The lexer (found in Lexer.js, stored at
-   * this.lexer) also supports pulling out tokens at arbitrary places. When
-   * individual tokens are needed at a position, the lexer is called to pull out a
-   * token, which is then used.
-   *
-   * The parser has a property called "mode" indicating the mode that
-   * the parser is currently in. Currently it has to be one of "math" or
-   * "text", which denotes whether the current environment is a math-y
-   * one or a text-y one (e.g. inside \text). Currently, this serves to
-   * limit the functions which can be used in text mode.
-   *
-   * The main functions then return an object which contains the useful data that
-   * was parsed at its given point, and a new position at the end of the parsed
-   * data. The main functions can call each other and continue the parsing by
-   * using the returned position as a new starting point.
-   *
-   * There are also extra `.handle...` functions, which pull out some reused
-   * functionality into self-contained functions.
-   *
-   * The earlier functions return ParseNodes.
-   * The later functions (which are called deeper in the parse) sometimes return
-   * ParseFuncOrArgument, which contain a ParseNode as well as some data about
-   * whether the parsed object is a function which is missing some arguments, or a
-   * standalone object which can be used as an argument to another function.
-   */
-
-  /**
-   * An initial function (without its arguments), or an argument to a function.
-   * The `result` argument should be a ParseNode.
-   */
-  function ParseFuncOrArgument(result, isFunction, token) {
-      this.result = result;
-      // Is this a function (i.e. is it something defined in functions.js)?
-      this.isFunction = isFunction;
-      this.token = token;
-  } /* eslint no-constant-condition:0 */
-
-  var Parser = function () {
-      function Parser(input, settings) {
-          (0, _classCallCheck3.default)(this, Parser);
-
-          // Create a new macro expander (gullet) and (indirectly via that) also a
-          // new lexer (mouth) for this parser (stomach, in the language of TeX)
-          this.gullet = new _MacroExpander2.default(input, settings.macros);
-          // Use old \color behavior (same as LaTeX's \textcolor) if requested.
-          // We do this after the macros object has been copied by MacroExpander.
-          if (settings.colorIsTextColor) {
-              this.gullet.macros["\\color"] = "\\textcolor";
-          }
-          // Store the settings for use in parsing
-          this.settings = settings;
-          // Count leftright depth (for \middle errors)
-          this.leftrightDepth = 0;
-      }
-
-      /**
-       * Checks a result to make sure it has the right type, and throws an
-       * appropriate error otherwise.
-       *
-       * @param {boolean=} consume whether to consume the expected token,
-       *                           defaults to true
-       */
-
-
-      (0, _createClass3.default)(Parser, [{
-          key: "expect",
-          value: function expect(text, consume) {
-              if (this.nextToken.text !== text) {
-                  throw new _ParseError2.default("Expected '" + text + "', got '" + this.nextToken.text + "'", this.nextToken);
-              }
-              if (consume !== false) {
-                  this.consume();
-              }
-          }
-
-          /**
-           * Considers the current look ahead token as consumed,
-           * and fetches the one after that as the new look ahead.
-           */
-
-      }, {
-          key: "consume",
-          value: function consume() {
-              this.nextToken = this.gullet.get(this.mode === "math");
-          }
-      }, {
-          key: "switchMode",
-          value: function switchMode(newMode) {
-              this.gullet.unget(this.nextToken);
-              this.mode = newMode;
-              this.consume();
-          }
-
-          /**
-           * Main parsing function, which parses an entire input.
-           *
-           * @return {?Array.<ParseNode>}
-           */
-
-      }, {
-          key: "parse",
-          value: function parse() {
-              // Try to parse the input
-              this.mode = "math";
-              this.consume();
-              var parse = this.parseInput();
-              return parse;
-          }
-
-          /**
-           * Parses an entire input tree.
-           */
-
-      }, {
-          key: "parseInput",
-          value: function parseInput() {
-              // Parse an expression
-              var expression = this.parseExpression(false);
-              // If we succeeded, make sure there's an EOF at the end
-              this.expect("EOF", false);
-              return expression;
-          }
-      }, {
-          key: "parseExpression",
-
-
-          /**
-           * Parses an "expression", which is a list of atoms.
-           *
-           * @param {boolean} breakOnInfix  Should the parsing stop when we hit infix
-           *                  nodes? This happens when functions have higher precendence
-           *                  than infix nodes in implicit parses.
-           *
-           * @param {?string} breakOnTokenText  The text of the token that the expression
-           *                  should end with, or `null` if something else should end the
-           *                  expression.
-           *
-           * @return {ParseNode}
-           */
-          value: function parseExpression(breakOnInfix, breakOnTokenText) {
-              var body = [];
-              // Keep adding atoms to the body until we can't parse any more atoms (either
-              // we reached the end, a }, or a \right)
-              while (true) {
-                  var lex = this.nextToken;
-                  if (Parser.endOfExpression.indexOf(lex.text) !== -1) {
-                      break;
-                  }
-                  if (breakOnTokenText && lex.text === breakOnTokenText) {
-                      break;
-                  }
-                  if (breakOnInfix && _functions2.default[lex.text] && _functions2.default[lex.text].infix) {
-                      break;
-                  }
-                  var atom = this.parseAtom();
-                  if (!atom) {
-                      if (!this.settings.throwOnError && lex.text[0] === "\\") {
-                          var errorNode = this.handleUnsupportedCmd();
-                          body.push(errorNode);
-                          continue;
-                      }
-
-                      break;
-                  }
-                  body.push(atom);
-              }
-              return this.handleInfixNodes(body);
-          }
-
-          /**
-           * Rewrites infix operators such as \over with corresponding commands such
-           * as \frac.
-           *
-           * There can only be one infix operator per group.  If there's more than one
-           * then the expression is ambiguous.  This can be resolved by adding {}.
-           *
-           * @returns {Array}
-           */
-
-      }, {
-          key: "handleInfixNodes",
-          value: function handleInfixNodes(body) {
-              var overIndex = -1;
-              var funcName = void 0;
-
-              for (var i = 0; i < body.length; i++) {
-                  var node = body[i];
-                  if (node.type === "infix") {
-                      if (overIndex !== -1) {
-                          throw new _ParseError2.default("only one infix operator per group", node.value.token);
-                      }
-                      overIndex = i;
-                      funcName = node.value.replaceWith;
-                  }
-              }
-
-              if (overIndex !== -1) {
-                  var numerNode = void 0;
-                  var denomNode = void 0;
-
-                  var numerBody = body.slice(0, overIndex);
-                  var denomBody = body.slice(overIndex + 1);
-
-                  if (numerBody.length === 1 && numerBody[0].type === "ordgroup") {
-                      numerNode = numerBody[0];
-                  } else {
-                      numerNode = new _ParseNode2.default("ordgroup", numerBody, this.mode);
-                  }
-
-                  if (denomBody.length === 1 && denomBody[0].type === "ordgroup") {
-                      denomNode = denomBody[0];
-                  } else {
-                      denomNode = new _ParseNode2.default("ordgroup", denomBody, this.mode);
-                  }
-
-                  var value = this.callFunction(funcName, [numerNode, denomNode], null);
-                  return [new _ParseNode2.default(value.type, value, this.mode)];
-              } else {
-                  return body;
-              }
-          }
-
-          // The greediness of a superscript or subscript
-
-      }, {
-          key: "handleSupSubscript",
-
-
-          /**
-           * Handle a subscript or superscript with nice errors.
-           */
-          value: function handleSupSubscript(name) {
-              var symbolToken = this.nextToken;
-              var symbol = symbolToken.text;
-              this.consume();
-              var group = this.parseGroup();
-
-              if (!group) {
-                  if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") {
-                      return this.handleUnsupportedCmd();
-                  } else {
-                      throw new _ParseError2.default("Expected group after '" + symbol + "'", symbolToken);
-                  }
-              } else if (group.isFunction) {
-                  // ^ and _ have a greediness, so handle interactions with functions'
-                  // greediness
-                  var funcGreediness = _functions2.default[group.result].greediness;
-                  if (funcGreediness > Parser.SUPSUB_GREEDINESS) {
-                      return this.parseFunction(group);
-                  } else {
-                      throw new _ParseError2.default("Got function '" + group.result + "' with no arguments " + "as " + name, symbolToken);
-                  }
-              } else {
-                  return group.result;
-              }
-          }
-
-          /**
-           * Converts the textual input of an unsupported command into a text node
-           * contained within a color node whose color is determined by errorColor
-           */
-
-      }, {
-          key: "handleUnsupportedCmd",
-          value: function handleUnsupportedCmd() {
-              var text = this.nextToken.text;
-              var textordArray = [];
-
-              for (var i = 0; i < text.length; i++) {
-                  textordArray.push(new _ParseNode2.default("textord", text[i], "text"));
-              }
-
-              var textNode = new _ParseNode2.default("text", {
-                  body: textordArray,
-                  type: "text"
-              }, this.mode);
-
-              var colorNode = new _ParseNode2.default("color", {
-                  color: this.settings.errorColor,
-                  value: [textNode],
-                  type: "color"
-              }, this.mode);
-
-              this.consume();
-              return colorNode;
-          }
-
-          /**
-           * Parses a group with optional super/subscripts.
-           *
-           * @return {?ParseNode}
-           */
-
-      }, {
-          key: "parseAtom",
-          value: function parseAtom() {
-              // The body of an atom is an implicit group, so that things like
-              // \left(x\right)^2 work correctly.
-              var base = this.parseImplicitGroup();
-
-              // In text mode, we don't have superscripts or subscripts
-              if (this.mode === "text") {
-                  return base;
-              }
-
-              // Note that base may be empty (i.e. null) at this point.
-
-              var superscript = void 0;
-              var subscript = void 0;
-              while (true) {
-                  // Lex the first token
-                  var lex = this.nextToken;
-
-                  if (lex.text === "\\limits" || lex.text === "\\nolimits") {
-                      // We got a limit control
-                      if (!base || base.type !== "op") {
-                          throw new _ParseError2.default("Limit controls must follow a math operator", lex);
-                      } else {
-                          var limits = lex.text === "\\limits";
-                          base.value.limits = limits;
-                          base.value.alwaysHandleSupSub = true;
-                      }
-                      this.consume();
-                  } else if (lex.text === "^") {
-                      // We got a superscript start
-                      if (superscript) {
-                          throw new _ParseError2.default("Double superscript", lex);
-                      }
-                      superscript = this.handleSupSubscript("superscript");
-                  } else if (lex.text === "_") {
-                      // We got a subscript start
-                      if (subscript) {
-                          throw new _ParseError2.default("Double subscript", lex);
-                      }
-                      subscript = this.handleSupSubscript("subscript");
-                  } else if (lex.text === "'") {
-                      // We got a prime
-                      if (superscript) {
-                          throw new _ParseError2.default("Double superscript", lex);
-                      }
-                      var prime = new _ParseNode2.default("textord", "\\prime", this.mode);
-
-                      // Many primes can be grouped together, so we handle this here
-                      var primes = [prime];
-                      this.consume();
-                      // Keep lexing tokens until we get something that's not a prime
-                      while (this.nextToken.text === "'") {
-                          // For each one, add another prime to the list
-                          primes.push(prime);
-                          this.consume();
-                      }
-                      // If there's a superscript following the primes, combine that
-                      // superscript in with the primes.
-                      if (this.nextToken.text === "^") {
-                          primes.push(this.handleSupSubscript("superscript"));
-                      }
-                      // Put everything into an ordgroup as the superscript
-                      superscript = new _ParseNode2.default("ordgroup", primes, this.mode);
-                  } else {
-                      // If it wasn't ^, _, or ', stop parsing super/subscripts
-                      break;
-                  }
-              }
-
-              if (superscript || subscript) {
-                  // If we got either a superscript or subscript, create a supsub
-                  return new _ParseNode2.default("supsub", {
-                      base: base,
-                      sup: superscript,
-                      sub: subscript
-                  }, this.mode);
-              } else {
-                  // Otherwise return the original body
-                  return base;
-              }
-          }
-
-          // A list of the size-changing functions, for use in parseImplicitGroup
-
-
-          // A list of the style-changing functions, for use in parseImplicitGroup
-
-
-          // Old font functions
-
-      }, {
-          key: "parseImplicitGroup",
-
-
-          /**
-           * Parses an implicit group, which is a group that starts at the end of a
-           * specified, and ends right before a higher explicit group ends, or at EOL. It
-           * is used for functions that appear to affect the current style, like \Large or
-           * \textrm, where instead of keeping a style we just pretend that there is an
-           * implicit grouping after it until the end of the group. E.g.
-           *   small text {\Large large text} small text again
-           * It is also used for \left and \right to get the correct grouping.
-           *
-           * @return {?ParseNode}
-           */
-          value: function parseImplicitGroup() {
-              var start = this.parseSymbol();
-
-              if (start == null) {
-                  // If we didn't get anything we handle, fall back to parseFunction
-                  return this.parseFunction();
-              }
-
-              var func = start.result;
-
-              if (func === "\\left") {
-                  // If we see a left:
-                  // Parse the entire left function (including the delimiter)
-                  var left = this.parseFunction(start);
-                  // Parse out the implicit body
-                  ++this.leftrightDepth;
-                  var body = this.parseExpression(false);
-                  --this.leftrightDepth;
-                  // Check the next token
-                  this.expect("\\right", false);
-                  var right = this.parseFunction();
-                  return new _ParseNode2.default("leftright", {
-                      body: body,
-                      left: left.value.value,
-                      right: right.value.value
-                  }, this.mode);
-              } else if (func === "\\begin") {
-                  // begin...end is similar to left...right
-                  var begin = this.parseFunction(start);
-                  var envName = begin.value.name;
-                  if (!_environments2.default.hasOwnProperty(envName)) {
-                      throw new _ParseError2.default("No such environment: " + envName, begin.value.nameGroup);
-                  }
-                  // Build the environment object. Arguments and other information will
-                  // be made available to the begin and end methods using properties.
-                  var env = _environments2.default[envName];
-                  var args = this.parseArguments("\\begin{" + envName + "}", env);
-                  var context = {
-                      mode: this.mode,
-                      envName: envName,
-                      parser: this,
-                      positions: args.pop()
-                  };
-                  var result = env.handler(context, args);
-                  this.expect("\\end", false);
-                  var endNameToken = this.nextToken;
-                  var end = this.parseFunction();
-                  if (end.value.name !== envName) {
-                      throw new _ParseError2.default("Mismatch: \\begin{" + envName + "} matched " + "by \\end{" + end.value.name + "}", endNameToken);
-                  }
-                  result.position = end.position;
-                  return result;
-              } else if (_utils2.default.contains(Parser.sizeFuncs, func)) {
-                  // If we see a sizing function, parse out the implicit body
-                  this.consumeSpaces();
-                  var _body = this.parseExpression(false);
-                  return new _ParseNode2.default("sizing", {
-                      // Figure out what size to use based on the list of functions above
-                      size: _utils2.default.indexOf(Parser.sizeFuncs, func) + 1,
-                      value: _body
-                  }, this.mode);
-              } else if (_utils2.default.contains(Parser.styleFuncs, func)) {
-                  // If we see a styling function, parse out the implicit body
-                  this.consumeSpaces();
-                  var _body2 = this.parseExpression(true);
-                  return new _ParseNode2.default("styling", {
-                      // Figure out what style to use by pulling out the style from
-                      // the function name
-                      style: func.slice(1, func.length - 5),
-                      value: _body2
-                  }, this.mode);
-              } else if (func in Parser.oldFontFuncs) {
-                  var style = Parser.oldFontFuncs[func];
-                  // If we see an old font function, parse out the implicit body
-                  this.consumeSpaces();
-                  var _body3 = this.parseExpression(true);
-                  if (style.slice(0, 4) === 'text') {
-                      return new _ParseNode2.default("text", {
-                          style: style,
-                          body: new _ParseNode2.default("ordgroup", _body3, this.mode)
-                      }, this.mode);
-                  } else {
-                      return new _ParseNode2.default("font", {
-                          font: style,
-                          body: new _ParseNode2.default("ordgroup", _body3, this.mode)
-                      }, this.mode);
-                  }
-              } else if (func === "\\color") {
-                  // If we see a styling function, parse out the implicit body
-                  var color = this.parseColorGroup(false);
-                  if (!color) {
-                      throw new _ParseError2.default("\\color not followed by color");
-                  }
-                  var _body4 = this.parseExpression(true);
-                  return new _ParseNode2.default("color", {
-                      type: "color",
-                      color: color.result.value,
-                      value: _body4
-                  }, this.mode);
-              } else if (func === "$") {
-                  if (this.mode === "math") {
-                      throw new _ParseError2.default("$ within math mode");
-                  }
-                  this.consume();
-                  var outerMode = this.mode;
-                  this.switchMode("math");
-                  var _body5 = this.parseExpression(false, "$");
-                  this.expect("$", true);
-                  this.switchMode(outerMode);
-                  return new _ParseNode2.default("styling", {
-                      style: "text",
-                      value: _body5
-                  }, "math");
-              } else {
-                  // Defer to parseFunction if it's not a function we handle
-                  return this.parseFunction(start);
-              }
-          }
-
-          /**
-           * Parses an entire function, including its base and all of its arguments.
-           * The base might either have been parsed already, in which case
-           * it is provided as an argument, or it's the next group in the input.
-           *
-           * @param {ParseFuncOrArgument=} baseGroup optional as described above
-           * @return {?ParseNode}
-           */
-
-      }, {
-          key: "parseFunction",
-          value: function parseFunction(baseGroup) {
-              if (!baseGroup) {
-                  baseGroup = this.parseGroup();
-              }
-
-              if (baseGroup) {
-                  if (baseGroup.isFunction) {
-                      var func = baseGroup.result;
-                      var funcData = _functions2.default[func];
-                      if (this.mode === "text" && !funcData.allowedInText) {
-                          throw new _ParseError2.default("Can't use function '" + func + "' in text mode", baseGroup.token);
-                      } else if (this.mode === "math" && funcData.allowedInMath === false) {
-                          throw new _ParseError2.default("Can't use function '" + func + "' in math mode", baseGroup.token);
-                      }
-
-                      var args = this.parseArguments(func, funcData);
-                      var token = baseGroup.token;
-                      var result = this.callFunction(func, args, args.pop(), token);
-                      return new _ParseNode2.default(result.type, result, this.mode);
-                  } else {
-                      return baseGroup.result;
-                  }
-              } else {
-                  return null;
-              }
-          }
-
-          /**
-           * Call a function handler with a suitable context and arguments.
-           */
-
-      }, {
-          key: "callFunction",
-          value: function callFunction(name, args, positions, token) {
-              var context = {
-                  funcName: name,
-                  parser: this,
-                  positions: positions,
-                  token: token
-              };
-              return _functions2.default[name].handler(context, args);
-          }
-
-          /**
-           * Parses the arguments of a function or environment
-           *
-           * @param {string} func  "\name" or "\begin{name}"
-           * @param {{numArgs:number,numOptionalArgs:number|undefined}} funcData
-           * @return the array of arguments, with the list of positions as last element
-           */
-
-      }, {
-          key: "parseArguments",
-          value: function parseArguments(func, funcData) {
-              var totalArgs = funcData.numArgs + funcData.numOptionalArgs;
-              if (totalArgs === 0) {
-                  return [[this.pos]];
-              }
-
-              var baseGreediness = funcData.greediness;
-              var positions = [this.pos];
-              var args = [];
-
-              for (var i = 0; i < totalArgs; i++) {
-                  var nextToken = this.nextToken;
-                  var argType = funcData.argTypes && funcData.argTypes[i];
-                  var arg = void 0;
-                  if (i < funcData.numOptionalArgs) {
-                      if (argType) {
-                          arg = this.parseGroupOfType(argType, true);
-                      } else {
-                          arg = this.parseGroup(true);
-                      }
-                      if (!arg) {
-                          args.push(null);
-                          positions.push(this.pos);
-                          continue;
-                      }
-                  } else {
-                      if (argType) {
-                          arg = this.parseGroupOfType(argType);
-                      } else {
-                          arg = this.parseGroup();
-                      }
-                      if (!arg) {
-                          if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") {
-                              arg = new ParseFuncOrArgument(this.handleUnsupportedCmd(this.nextToken.text), false);
-                          } else {
-                              throw new _ParseError2.default("Expected group after '" + func + "'", nextToken);
-                          }
-                      }
-                  }
-                  var argNode = void 0;
-                  if (arg.isFunction) {
-                      var argGreediness = _functions2.default[arg.result].greediness;
-                      if (argGreediness > baseGreediness) {
-                          argNode = this.parseFunction(arg);
-                      } else {
-                          throw new _ParseError2.default("Got function '" + arg.result + "' as " + "argument to '" + func + "'", nextToken);
-                      }
-                  } else {
-                      argNode = arg.result;
-                  }
-                  args.push(argNode);
-                  positions.push(this.pos);
-              }
-
-              args.push(positions);
-
-              return args;
-          }
-
-          /**
-           * Parses a group when the mode is changing.
-           *
-           * @return {?ParseFuncOrArgument}
-           */
-
-      }, {
-          key: "parseGroupOfType",
-          value: function parseGroupOfType(innerMode, optional) {
-              var outerMode = this.mode;
-              // Handle `original` argTypes
-              if (innerMode === "original") {
-                  innerMode = outerMode;
-              }
-
-              if (innerMode === "color") {
-                  return this.parseColorGroup(optional);
-              }
-              if (innerMode === "size") {
-                  return this.parseSizeGroup(optional);
-              }
-
-              this.switchMode(innerMode);
-              if (innerMode === "text") {
-                  // text mode is special because it should ignore the whitespace before
-                  // it
-                  this.consumeSpaces();
-              }
-              // By the time we get here, innerMode is one of "text" or "math".
-              // We switch the mode of the parser, recurse, then restore the old mode.
-              var res = this.parseGroup(optional);
-              this.switchMode(outerMode);
-              return res;
-          }
-      }, {
-          key: "consumeSpaces",
-          value: function consumeSpaces() {
-              while (this.nextToken.text === " ") {
-                  this.consume();
-              }
-          }
-
-          /**
-           * Parses a group, essentially returning the string formed by the
-           * brace-enclosed tokens plus some position information.
-           *
-           * @param {string} modeName  Used to describe the mode in error messages
-           * @param {boolean=} optional  Whether the group is optional or required
-           */
-
-      }, {
-          key: "parseStringGroup",
-          value: function parseStringGroup(modeName, optional) {
-              if (optional && this.nextToken.text !== "[") {
-                  return null;
-              }
-              var outerMode = this.mode;
-              this.mode = "text";
-              this.expect(optional ? "[" : "{");
-              var str = "";
-              var firstToken = this.nextToken;
-              var lastToken = firstToken;
-              while (this.nextToken.text !== (optional ? "]" : "}")) {
-                  if (this.nextToken.text === "EOF") {
-                      throw new _ParseError2.default("Unexpected end of input in " + modeName, firstToken.range(this.nextToken, str));
-                  }
-                  lastToken = this.nextToken;
-                  str += lastToken.text;
-                  this.consume();
-              }
-              this.mode = outerMode;
-              this.expect(optional ? "]" : "}");
-              return firstToken.range(lastToken, str);
-          }
-
-          /**
-           * Parses a regex-delimited group: the largest sequence of tokens
-           * whose concatenated strings match `regex`. Returns the string
-           * formed by the tokens plus some position information.
-           *
-           * @param {RegExp} regex
-           * @param {string} modeName  Used to describe the mode in error messages
-           */
-
-      }, {
-          key: "parseRegexGroup",
-          value: function parseRegexGroup(regex, modeName) {
-              var outerMode = this.mode;
-              this.mode = "text";
-              var firstToken = this.nextToken;
-              var lastToken = firstToken;
-              var str = "";
-              while (this.nextToken.text !== "EOF" && regex.test(str + this.nextToken.text)) {
-                  lastToken = this.nextToken;
-                  str += lastToken.text;
-                  this.consume();
-              }
-              if (str === "") {
-                  throw new _ParseError2.default("Invalid " + modeName + ": '" + firstToken.text + "'", firstToken);
-              }
-              this.mode = outerMode;
-              return firstToken.range(lastToken, str);
-          }
-
-          /**
-           * Parses a color description.
-           */
-
-      }, {
-          key: "parseColorGroup",
-          value: function parseColorGroup(optional) {
-              var res = this.parseStringGroup("color", optional);
-              if (!res) {
-                  return null;
-              }
-              var match = /^(#[a-z0-9]+|[a-z]+)$/i.exec(res.text);
-              if (!match) {
-                  throw new _ParseError2.default("Invalid color: '" + res.text + "'", res);
-              }
-              return new ParseFuncOrArgument(new _ParseNode2.default("color", match[0], this.mode), false);
-          }
-
-          /**
-           * Parses a size specification, consisting of magnitude and unit.
-           */
-
-      }, {
-          key: "parseSizeGroup",
-          value: function parseSizeGroup(optional) {
-              var res = void 0;
-              if (!optional && this.nextToken.text !== "{") {
-                  res = this.parseRegexGroup(/^[-+]? *(?:$|\d+|\d+\.\d*|\.\d*) *[a-z]{0,2} *$/, "size");
-              } else {
-                  res = this.parseStringGroup("size", optional);
-              }
-              if (!res) {
-                  return null;
-              }
-              var match = /([-+]?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})/.exec(res.text);
-              if (!match) {
-                  throw new _ParseError2.default("Invalid size: '" + res.text + "'", res);
-              }
-              var data = {
-                  number: +(match[1] + match[2]), // sign + magnitude, cast to number
-                  unit: match[3]
-              };
-              if (!_units2.default.validUnit(data)) {
-                  throw new _ParseError2.default("Invalid unit: '" + data.unit + "'", res);
-              }
-              return new ParseFuncOrArgument(new _ParseNode2.default("size", data, this.mode), false);
-          }
-
-          /**
-           * If the argument is false or absent, this parses an ordinary group,
-           * which is either a single nucleus (like "x") or an expression
-           * in braces (like "{x+y}").
-           * If the argument is true, it parses either a bracket-delimited expression
-           * (like "[x+y]") or returns null to indicate the absence of a
-           * bracket-enclosed group.
-           *
-           * @param {boolean=} optional  Whether the group is optional or required
-           * @return {?ParseFuncOrArgument}
-           */
-
-      }, {
-          key: "parseGroup",
-          value: function parseGroup(optional) {
-              var firstToken = this.nextToken;
-              // Try to parse an open brace
-              if (this.nextToken.text === (optional ? "[" : "{")) {
-                  // If we get a brace, parse an expression
-                  this.consume();
-                  var expression = this.parseExpression(false, optional ? "]" : null);
-                  var lastToken = this.nextToken;
-                  // Make sure we get a close brace
-                  this.expect(optional ? "]" : "}");
-                  if (this.mode === "text") {
-                      this.formLigatures(expression);
-                  }
-                  return new ParseFuncOrArgument(new _ParseNode2.default("ordgroup", expression, this.mode, firstToken, lastToken), false);
-              } else {
-                  // Otherwise, just return a nucleus, or nothing for an optional group
-                  return optional ? null : this.parseSymbol();
-              }
-          }
-
-          /**
-           * Form ligature-like combinations of characters for text mode.
-           * This includes inputs like "--", "---", "``" and "''".
-           * The result will simply replace multiple textord nodes with a single
-           * character in each value by a single textord node having multiple
-           * characters in its value.  The representation is still ASCII source.
-           *
-           * @param {Array.<ParseNode>} group  the nodes of this group,
-           *                                   list will be moified in place
-           */
-
-      }, {
-          key: "formLigatures",
-          value: function formLigatures(group) {
-              var n = group.length - 1;
-              for (var i = 0; i < n; ++i) {
-                  var a = group[i];
-                  var v = a.value;
-                  if (v === "-" && group[i + 1].value === "-") {
-                      if (i + 1 < n && group[i + 2].value === "-") {
-                          group.splice(i, 3, new _ParseNode2.default("textord", "---", "text", a, group[i + 2]));
-                          n -= 2;
-                      } else {
-                          group.splice(i, 2, new _ParseNode2.default("textord", "--", "text", a, group[i + 1]));
-                          n -= 1;
-                      }
-                  }
-                  if ((v === "'" || v === "`") && group[i + 1].value === v) {
-                      group.splice(i, 2, new _ParseNode2.default("textord", v + v, "text", a, group[i + 1]));
-                      n -= 1;
-                  }
-              }
-          }
-
-          /**
-           * Parse a single symbol out of the string. Here, we handle both the functions
-           * we have defined, as well as the single character symbols
-           *
-           * @return {?ParseFuncOrArgument}
-           */
-
-      }, {
-          key: "parseSymbol",
-          value: function parseSymbol() {
-              var nucleus = this.nextToken;
-
-              if (_functions2.default[nucleus.text]) {
-                  this.consume();
-                  // If there exists a function with this name, we return the function and
-                  // say that it is a function.
-                  return new ParseFuncOrArgument(nucleus.text, true, nucleus);
-              } else if (_symbols2.default[this.mode][nucleus.text]) {
-                  this.consume();
-                  // Otherwise if this is a no-argument function, find the type it
-                  // corresponds to in the symbols map
-                  return new ParseFuncOrArgument(new _ParseNode2.default(_symbols2.default[this.mode][nucleus.text].group, nucleus.text, this.mode, nucleus), false, nucleus);
-              } else if (this.mode === "text" && _unicodeRegexes.cjkRegex.test(nucleus.text)) {
-                  this.consume();
-                  return new ParseFuncOrArgument(new _ParseNode2.default("textord", nucleus.text, this.mode, nucleus), false, nucleus);
-              } else if (nucleus.text === "$") {
-                  return new ParseFuncOrArgument(nucleus.text, false, nucleus);
-              } else {
-                  return null;
-              }
-          }
-      }]);
-      return Parser;
-  }();
-
-  Parser.endOfExpression = ["}", "\\end", "\\right", "&", "\\\\", "\\cr"];
-  Parser.SUPSUB_GREEDINESS = 1;
-  Parser.sizeFuncs = ["\\tiny", "\\sixptsize", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize", "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge"];
-  Parser.styleFuncs = ["\\displaystyle", "\\textstyle", "\\scriptstyle", "\\scriptscriptstyle"];
-  Parser.oldFontFuncs = {
-      "\\rm": "mathrm",
-      "\\sf": "mathsf",
-      "\\tt": "mathtt",
-      "\\bf": "mathbf",
-      "\\it": "mathit"
-  };
-
-
-  Parser.prototype.ParseNode = _ParseNode2.default;
-
-  module.exports = Parser;
-
-  },{"./MacroExpander":27,"./ParseError":29,"./ParseNode":30,"./environments":40,"./functions":43,"./symbols":48,"./unicodeRegexes":49,"./units":50,"./utils":51,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],32:[function(require,module,exports){
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  var _utils = require("./utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * The main Settings object
-   *
-   * The current options stored are:
-   *  - displayMode: Whether the expression should be typeset as inline math
-   *                 (false, the default), meaning that the math starts in
-   *                 \textstyle and is placed in an inline-block); or as display
-   *                 math (true), meaning that the math starts in \displaystyle
-   *                 and is placed in a block with vertical margin.
-   */
-  var Settings = function Settings(options) {
-    (0, _classCallCheck3.default)(this, Settings);
-
-    // allow null options
-    options = options || {};
-    this.displayMode = _utils2.default.deflt(options.displayMode, false);
-    this.throwOnError = _utils2.default.deflt(options.throwOnError, true);
-    this.errorColor = _utils2.default.deflt(options.errorColor, "#cc0000");
-    this.macros = options.macros || {};
-    this.colorIsTextColor = _utils2.default.deflt(options.colorIsTextColor, false);
-  }; /**
-      * This is a module for storing settings passed into KaTeX. It correctly handles
-      * default settings.
-      */
-
-  module.exports = Settings;
-
-  },{"./utils":51,"babel-runtime/helpers/classCallCheck":4}],33:[function(require,module,exports){
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  var _createClass2 = require("babel-runtime/helpers/createClass");
-
-  var _createClass3 = _interopRequireDefault(_createClass2);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * This file contains information and classes for the various kinds of styles
-   * used in TeX. It provides a generic `Style` class, which holds information
-   * about a specific style. It then provides instances of all the different kinds
-   * of styles possible, and provides functions to move between them and get
-   * information about them.
-   */
-
-  /**
-   * The main style class. Contains a unique id for the style, a size (which is
-   * the same for cramped and uncramped version of a style), and a cramped flag.
-   */
-  var Style = function () {
-      function Style(id, size, cramped) {
-          (0, _classCallCheck3.default)(this, Style);
-
-          this.id = id;
-          this.size = size;
-          this.cramped = cramped;
-      }
-
-      /**
-       * Get the style of a superscript given a base in the current style.
-       */
-
-
-      (0, _createClass3.default)(Style, [{
-          key: "sup",
-          value: function sup() {
-              return styles[_sup[this.id]];
-          }
-
-          /**
-           * Get the style of a subscript given a base in the current style.
-           */
-
-      }, {
-          key: "sub",
-          value: function sub() {
-              return styles[_sub[this.id]];
-          }
-
-          /**
-           * Get the style of a fraction numerator given the fraction in the current
-           * style.
-           */
-
-      }, {
-          key: "fracNum",
-          value: function fracNum() {
-              return styles[_fracNum[this.id]];
-          }
-
-          /**
-           * Get the style of a fraction denominator given the fraction in the current
-           * style.
-           */
-
-      }, {
-          key: "fracDen",
-          value: function fracDen() {
-              return styles[_fracDen[this.id]];
-          }
-
-          /**
-           * Get the cramped version of a style (in particular, cramping a cramped style
-           * doesn't change the style).
-           */
-
-      }, {
-          key: "cramp",
-          value: function cramp() {
-              return styles[_cramp[this.id]];
-          }
-
-          /**
-           * Get a text or display version of this style.
-           */
-
-      }, {
-          key: "text",
-          value: function text() {
-              return styles[_text[this.id]];
-          }
-
-          /**
-           * Return if this style is tightly spaced (scriptstyle/scriptscriptstyle)
-           */
-
-      }, {
-          key: "isTight",
-          value: function isTight() {
-              return this.size >= 2;
-          }
-      }]);
-      return Style;
-  }();
-
-  // IDs of the different styles
-
-
-  var D = 0;
-  var Dc = 1;
-  var T = 2;
-  var Tc = 3;
-  var S = 4;
-  var Sc = 5;
-  var SS = 6;
-  var SSc = 7;
-
-  // Instances of the different styles
-  var styles = [new Style(D, 0, false), new Style(Dc, 0, true), new Style(T, 1, false), new Style(Tc, 1, true), new Style(S, 2, false), new Style(Sc, 2, true), new Style(SS, 3, false), new Style(SSc, 3, true)];
-
-  // Lookup tables for switching from one style to another
-  var _sup = [S, Sc, S, Sc, SS, SSc, SS, SSc];
-  var _sub = [Sc, Sc, Sc, Sc, SSc, SSc, SSc, SSc];
-  var _fracNum = [T, Tc, S, Sc, SS, SSc, SS, SSc];
-  var _fracDen = [Tc, Tc, Sc, Sc, SSc, SSc, SSc, SSc];
-  var _cramp = [Dc, Dc, Tc, Tc, Sc, Sc, SSc, SSc];
-  var _text = [D, Dc, T, Tc, T, Tc, T, Tc];
-
-  // We only export some of the styles. Also, we don't export the `Style` class so
-  // no more styles can be generated.
-  module.exports = {
-      DISPLAY: styles[D],
-      TEXT: styles[T],
-      SCRIPT: styles[S],
-      SCRIPTSCRIPT: styles[SS]
-  };
-
-  },{"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],34:[function(require,module,exports){
-
-  var _domTree = require("./domTree");
-
-  var _domTree2 = _interopRequireDefault(_domTree);
-
-  var _fontMetrics = require("./fontMetrics");
-
-  var _fontMetrics2 = _interopRequireDefault(_fontMetrics);
-
-  var _symbols = require("./symbols");
-
-  var _symbols2 = _interopRequireDefault(_symbols);
-
-  var _utils = require("./utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  // The following have to be loaded from Main-Italic font, using class mainit
-  /* eslint no-console:0 */
-  /**
-   * This module contains general functions that can be used for building
-   * different kinds of domTree nodes in a consistent manner.
-   */
-
-  var mainitLetters = ["\\imath", // dotless i
-  "\\jmath", // dotless j
-  "\\pounds"];
-
-  /**
-   * Looks up the given symbol in fontMetrics, after applying any symbol
-   * replacements defined in symbol.js
-   */
-  var lookupSymbol = function lookupSymbol(value, fontFamily, mode) {
-      // Replace the value with its replaced value from symbol.js
-      if (_symbols2.default[mode][value] && _symbols2.default[mode][value].replace) {
-          value = _symbols2.default[mode][value].replace;
-      }
-      return {
-          value: value,
-          metrics: _fontMetrics2.default.getCharacterMetrics(value, fontFamily)
-      };
-  };
-
-  /**
-   * Makes a symbolNode after translation via the list of symbols in symbols.js.
-   * Correctly pulls out metrics for the character, and optionally takes a list of
-   * classes to be attached to the node.
-   *
-   * TODO: make argument order closer to makeSpan
-   * TODO: add a separate argument for math class (e.g. `mop`, `mbin`), which
-   * should if present come first in `classes`.
-   */
-  var makeSymbol = function makeSymbol(value, fontFamily, mode, options, classes) {
-      var lookup = lookupSymbol(value, fontFamily, mode);
-      var metrics = lookup.metrics;
-      value = lookup.value;
-
-      var symbolNode = void 0;
-      if (metrics) {
-          var italic = metrics.italic;
-          if (mode === "text") {
-              italic = 0;
-          }
-          symbolNode = new _domTree2.default.symbolNode(value, metrics.height, metrics.depth, italic, metrics.skew, classes);
-      } else {
-          // TODO(emily): Figure out a good way to only print this in development
-          typeof console !== "undefined" && console.warn("No character metrics for '" + value + "' in style '" + fontFamily + "'");
-          symbolNode = new _domTree2.default.symbolNode(value, 0, 0, 0, 0, classes);
-      }
-
-      if (options) {
-          symbolNode.maxFontSize = options.sizeMultiplier;
-          if (options.style.isTight()) {
-              symbolNode.classes.push("mtight");
-          }
-          if (options.getColor()) {
-              symbolNode.style.color = options.getColor();
-          }
-      }
-
-      return symbolNode;
-  };
-
-  /**
-   * Makes a symbol in Main-Regular or AMS-Regular.
-   * Used for rel, bin, open, close, inner, and punct.
-   */
-  var mathsym = function mathsym(value, mode, options, classes) {
-      // Decide what font to render the symbol in by its entry in the symbols
-      // table.
-      // Have a special case for when the value = \ because the \ is used as a
-      // textord in unsupported command errors but cannot be parsed as a regular
-      // text ordinal and is therefore not present as a symbol in the symbols
-      // table for text
-      if (value === "\\" || _symbols2.default[mode][value].font === "main") {
-          return makeSymbol(value, "Main-Regular", mode, options, classes);
-      } else {
-          return makeSymbol(value, "AMS-Regular", mode, options, classes.concat(["amsrm"]));
-      }
-  };
-
-  /**
-   * Makes a symbol in the default font for mathords and textords.
-   */
-  var mathDefault = function mathDefault(value, mode, options, classes, type) {
-      if (type === "mathord") {
-          var fontLookup = mathit(value);
-          return makeSymbol(value, fontLookup.fontName, mode, options, classes.concat([fontLookup.fontClass]));
-      } else if (type === "textord") {
-          var font = _symbols2.default[mode][value] && _symbols2.default[mode][value].font;
-          if (font === "ams") {
-              return makeSymbol(value, "AMS-Regular", mode, options, classes.concat(["amsrm"]));
-          } else {
-              // if (font === "main") {
-              return makeSymbol(value, "Main-Regular", mode, options, classes.concat(["mathrm"]));
-          }
-      } else {
-          throw new Error("unexpected type: " + type + " in mathDefault");
-      }
-  };
-
-  /**
-   * Determines which of the two font names (Main-Italic and Math-Italic) and
-   * corresponding style tags (mainit or mathit) to use for font "mathit",
-   * depending on the symbol.  Use this function instead of fontMap for font
-   * "mathit".
-   */
-  var mathit = function mathit(value, mode, options, classes) {
-      if (/[0-9]/.test(value.charAt(0)) ||
-      // glyphs for \imath and \jmath do not exist in Math-Italic so we
-      // need to use Main-Italic instead
-      _utils2.default.contains(mainitLetters, value)) {
-          return {
-              fontName: "Main-Italic",
-              fontClass: "mainit"
-          };
-      } else {
-          return {
-              fontName: "Math-Italic",
-              fontClass: "mathit"
-          };
-      }
-  };
-
-  /**
-   * Makes either a mathord or textord in the correct font and color.
-   */
-  var makeOrd = function makeOrd(group, options, type) {
-      var mode = group.mode;
-      var value = group.value;
-
-      var classes = ["mord"];
-
-      var font = options.font;
-      if (font) {
-          var fontLookup = void 0;
-          if (font === "mathit" || _utils2.default.contains(mainitLetters, value)) {
-              fontLookup = mathit(value);
-          } else {
-              fontLookup = fontMap[font];
-          }
-          if (lookupSymbol(value, fontLookup.fontName, mode).metrics) {
-              return makeSymbol(value, fontLookup.fontName, mode, options, classes.concat([fontLookup.fontClass || font]));
-          } else {
-              return mathDefault(value, mode, options, classes, type);
-          }
-      } else {
-          return mathDefault(value, mode, options, classes, type);
-      }
-  };
-
-  /**
-   * Calculate the height, depth, and maxFontSize of an element based on its
-   * children.
-   */
-  var sizeElementFromChildren = function sizeElementFromChildren(elem) {
-      var height = 0;
-      var depth = 0;
-      var maxFontSize = 0;
-
-      if (elem.children) {
-          for (var i = 0; i < elem.children.length; i++) {
-              if (elem.children[i].height > height) {
-                  height = elem.children[i].height;
-              }
-              if (elem.children[i].depth > depth) {
-                  depth = elem.children[i].depth;
-              }
-              if (elem.children[i].maxFontSize > maxFontSize) {
-                  maxFontSize = elem.children[i].maxFontSize;
-              }
-          }
-      }
-
-      elem.height = height;
-      elem.depth = depth;
-      elem.maxFontSize = maxFontSize;
-  };
-
-  /**
-   * Makes a span with the given list of classes, list of children, and options.
-   *
-   * TODO: Ensure that `options` is always provided (currently some call sites
-   * don't pass it).
-   * TODO: add a separate argument for math class (e.g. `mop`, `mbin`), which
-   * should if present come first in `classes`.
-   */
-  var makeSpan = function makeSpan(classes, children, options) {
-      var span = new _domTree2.default.span(classes, children, options);
-
-      sizeElementFromChildren(span);
-
-      return span;
-  };
-
-  /**
-   * Prepends the given children to the given span, updating height, depth, and
-   * maxFontSize.
-   */
-  var prependChildren = function prependChildren(span, children) {
-      span.children = children.concat(span.children);
-
-      sizeElementFromChildren(span);
-  };
-
-  /**
-   * Makes a document fragment with the given list of children.
-   */
-  var makeFragment = function makeFragment(children) {
-      var fragment = new _domTree2.default.documentFragment(children);
-
-      sizeElementFromChildren(fragment);
-
-      return fragment;
-  };
-
-  /**
-   * Makes a vertical list by stacking elements and kerns on top of each other.
-   * Allows for many different ways of specifying the positioning method.
-   *
-   * Arguments:
-   *  - children: A list of child or kern nodes to be stacked on top of each other
-   *              (i.e. the first element will be at the bottom, and the last at
-   *              the top). Element nodes are specified as
-   *                {type: "elem", elem: node}
-   *              while kern nodes are specified as
-   *                {type: "kern", size: size}
-   *  - positionType: The method by which the vlist should be positioned. Valid
-   *                  values are:
-   *                   - "individualShift": The children list only contains elem
-   *                                        nodes, and each node contains an extra
-   *                                        "shift" value of how much it should be
-   *                                        shifted (note that shifting is always
-   *                                        moving downwards). positionData is
-   *                                        ignored.
-   *                   - "top": The positionData specifies the topmost point of
-   *                            the vlist (note this is expected to be a height,
-   *                            so positive values move up)
-   *                   - "bottom": The positionData specifies the bottommost point
-   *                               of the vlist (note this is expected to be a
-   *                               depth, so positive values move down
-   *                   - "shift": The vlist will be positioned such that its
-   *                              baseline is positionData away from the baseline
-   *                              of the first child. Positive values move
-   *                              downwards.
-   *                   - "firstBaseline": The vlist will be positioned such that
-   *                                      its baseline is aligned with the
-   *                                      baseline of the first child.
-   *                                      positionData is ignored. (this is
-   *                                      equivalent to "shift" with
-   *                                      positionData=0)
-   *  - positionData: Data used in different ways depending on positionType
-   *  - options: An Options object
-   *
-   */
-  var makeVList = function makeVList(children, positionType, positionData, options) {
-      var depth = void 0;
-      var currPos = void 0;
-      var i = void 0;
-      if (positionType === "individualShift") {
-          var oldChildren = children;
-          children = [oldChildren[0]];
-
-          // Add in kerns to the list of children to get each element to be
-          // shifted to the correct specified shift
-          depth = -oldChildren[0].shift - oldChildren[0].elem.depth;
-          currPos = depth;
-          for (i = 1; i < oldChildren.length; i++) {
-              var diff = -oldChildren[i].shift - currPos - oldChildren[i].elem.depth;
-              var size = diff - (oldChildren[i - 1].elem.height + oldChildren[i - 1].elem.depth);
-
-              currPos = currPos + diff;
-
-              children.push({ type: "kern", size: size });
-              children.push(oldChildren[i]);
-          }
-      } else if (positionType === "top") {
-          // We always start at the bottom, so calculate the bottom by adding up
-          // all the sizes
-          var bottom = positionData;
-          for (i = 0; i < children.length; i++) {
-              if (children[i].type === "kern") {
-                  bottom -= children[i].size;
-              } else {
-                  bottom -= children[i].elem.height + children[i].elem.depth;
-              }
-          }
-          depth = bottom;
-      } else if (positionType === "bottom") {
-          depth = -positionData;
-      } else if (positionType === "shift") {
-          depth = -children[0].elem.depth - positionData;
-      } else if (positionType === "firstBaseline") {
-          depth = -children[0].elem.depth;
-      } else {
-          depth = 0;
-      }
-
-      // Create a strut that is taller than any list item. The strut is added to
-      // each item, where it will determine the item's baseline. Since it has
-      // `overflow:hidden`, the strut's top edge will sit on the item's line box's
-      // top edge and the strut's bottom edge will sit on the item's baseline,
-      // with no additional line-height spacing. This allows the item baseline to
-      // be positioned precisely without worrying about font ascent and
-      // line-height.
-      var pstrutSize = 0;
-      for (i = 0; i < children.length; i++) {
-          if (children[i].type === "elem") {
-              var child = children[i].elem;
-              pstrutSize = Math.max(pstrutSize, child.maxFontSize, child.height);
-          }
-      }
-      pstrutSize += 2;
-      var pstrut = makeSpan(["pstrut"], []);
-      pstrut.style.height = pstrutSize + "em";
-
-      // Create a new list of actual children at the correct offsets
-      var realChildren = [];
-      var minPos = depth;
-      var maxPos = depth;
-      currPos = depth;
-      for (i = 0; i < children.length; i++) {
-          if (children[i].type === "kern") {
-              currPos += children[i].size;
-          } else {
-              var _child = children[i].elem;
-
-              var childWrap = makeSpan([], [pstrut, _child]);
-              childWrap.style.top = -pstrutSize - currPos - _child.depth + "em";
-              if (children[i].marginLeft) {
-                  childWrap.style.marginLeft = children[i].marginLeft;
-              }
-              if (children[i].marginRight) {
-                  childWrap.style.marginRight = children[i].marginRight;
-              }
-
-              realChildren.push(childWrap);
-              currPos += _child.height + _child.depth;
-          }
-          minPos = Math.min(minPos, currPos);
-          maxPos = Math.max(maxPos, currPos);
-      }
-
-      // The vlist contents go in a table-cell with `vertical-align:bottom`.
-      // This cell's bottom edge will determine the containing table's baseline
-      // without overly expanding the containing line-box.
-      var vlist = makeSpan(["vlist"], realChildren);
-      vlist.style.height = maxPos + "em";
-
-      // A second row is used if necessary to represent the vlist's depth.
-      var rows = void 0;
-      if (minPos < 0) {
-          var depthStrut = makeSpan(["vlist"], []);
-          depthStrut.style.height = -minPos + "em";
-
-          // Safari wants the first row to have inline content; otherwise it
-          // puts the bottom of the *second* row on the baseline.
-          var topStrut = makeSpan(["vlist-s"], [new _domTree2.default.symbolNode("\u200B")]);
-
-          rows = [makeSpan(["vlist-r"], [vlist, topStrut]), makeSpan(["vlist-r"], [depthStrut])];
-      } else {
-          rows = [makeSpan(["vlist-r"], [vlist])];
-      }
-
-      var vtable = makeSpan(["vlist-t"], rows);
-      if (rows.length === 2) {
-          vtable.classes.push("vlist-t2");
-      }
-      vtable.height = maxPos;
-      vtable.depth = -minPos;
-      return vtable;
-  };
-
-  // A map of spacing functions to their attributes, like size and corresponding
-  // CSS class
-  var spacingFunctions = {
-      "\\qquad": {
-          size: "2em",
-          className: "qquad"
-      },
-      "\\quad": {
-          size: "1em",
-          className: "quad"
-      },
-      "\\enspace": {
-          size: "0.5em",
-          className: "enspace"
-      },
-      "\\;": {
-          size: "0.277778em",
-          className: "thickspace"
-      },
-      "\\:": {
-          size: "0.22222em",
-          className: "mediumspace"
-      },
-      "\\,": {
-          size: "0.16667em",
-          className: "thinspace"
-      },
-      "\\!": {
-          size: "-0.16667em",
-          className: "negativethinspace"
-      }
-  };
-
-  /**
-   * Maps TeX font commands to objects containing:
-   * - variant: string used for "mathvariant" attribute in buildMathML.js
-   * - fontName: the "style" parameter to fontMetrics.getCharacterMetrics
-   */
-  // A map between tex font commands an MathML mathvariant attribute values
-  var fontMap = {
-      // styles
-      "mathbf": {
-          variant: "bold",
-          fontName: "Main-Bold"
-      },
-      "mathrm": {
-          variant: "normal",
-          fontName: "Main-Regular"
-      },
-      "textit": {
-          variant: "italic",
-          fontName: "Main-Italic"
-      },
-
-      // "mathit" is missing because it requires the use of two fonts: Main-Italic
-      // and Math-Italic.  This is handled by a special case in makeOrd which ends
-      // up calling mathit.
-
-      // families
-      "mathbb": {
-          variant: "double-struck",
-          fontName: "AMS-Regular"
-      },
-      "mathcal": {
-          variant: "script",
-          fontName: "Caligraphic-Regular"
-      },
-      "mathfrak": {
-          variant: "fraktur",
-          fontName: "Fraktur-Regular"
-      },
-      "mathscr": {
-          variant: "script",
-          fontName: "Script-Regular"
-      },
-      "mathsf": {
-          variant: "sans-serif",
-          fontName: "SansSerif-Regular"
-      },
-      "mathtt": {
-          variant: "monospace",
-          fontName: "Typewriter-Regular"
-      }
-  };
-
-  module.exports = {
-      fontMap: fontMap,
-      makeSymbol: makeSymbol,
-      mathsym: mathsym,
-      makeSpan: makeSpan,
-      makeFragment: makeFragment,
-      makeVList: makeVList,
-      makeOrd: makeOrd,
-      prependChildren: prependChildren,
-      spacingFunctions: spacingFunctions
-  };
-
-  },{"./domTree":39,"./fontMetrics":41,"./symbols":48,"./utils":51}],35:[function(require,module,exports){
-
-  var _stringify = require("babel-runtime/core-js/json/stringify");
-
-  var _stringify2 = _interopRequireDefault(_stringify);
-
-  var _ParseError = require("./ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  var _Style = require("./Style");
-
-  var _Style2 = _interopRequireDefault(_Style);
-
-  var _buildCommon = require("./buildCommon");
-
-  var _buildCommon2 = _interopRequireDefault(_buildCommon);
-
-  var _delimiter = require("./delimiter");
-
-  var _delimiter2 = _interopRequireDefault(_delimiter);
-
-  var _domTree = require("./domTree");
-
-  var _domTree2 = _interopRequireDefault(_domTree);
-
-  var _units = require("./units");
-
-  var _units2 = _interopRequireDefault(_units);
-
-  var _utils = require("./utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  var _stretchy = require("./stretchy");
-
-  var _stretchy2 = _interopRequireDefault(_stretchy);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /* eslint no-console:0 */
-  /**
-   * This file does the main work of building a domTree structure from a parse
-   * tree. The entry point is the `buildHTML` function, which takes a parse tree.
-   * Then, the buildExpression, buildGroup, and various groupTypes functions are
-   * called, to produce a final HTML tree.
-   */
-
-  var isSpace = function isSpace(node) {
-      return node instanceof _domTree2.default.span && node.classes[0] === "mspace";
-  };
-
-  // Binary atoms (first class `mbin`) change into ordinary atoms (`mord`)
-  // depending on their surroundings. See TeXbook pg. 442-446, Rules 5 and 6,
-  // and the text before Rule 19.
-  var isBin = function isBin(node) {
-      return node && node.classes[0] === "mbin";
-  };
-
-  var isBinLeftCanceller = function isBinLeftCanceller(node, isRealGroup) {
-      // TODO: This code assumes that a node's math class is the first element
-      // of its `classes` array. A later cleanup should ensure this, for
-      // instance by changing the signature of `makeSpan`.
-      if (node) {
-          return _utils2.default.contains(["mbin", "mopen", "mrel", "mop", "mpunct"], node.classes[0]);
-      } else {
-          return isRealGroup;
-      }
-  };
-
-  var isBinRightCanceller = function isBinRightCanceller(node, isRealGroup) {
-      if (node) {
-          return _utils2.default.contains(["mrel", "mclose", "mpunct"], node.classes[0]);
-      } else {
-          return isRealGroup;
-      }
-  };
-
-  /**
-   * Splice out any spaces from `children` starting at position `i`, and return
-   * the spliced-out array. Returns null if `children[i]` does not exist or is not
-   * a space.
-   */
-  var spliceSpaces = function spliceSpaces(children, i) {
-      var j = i;
-      while (j < children.length && isSpace(children[j])) {
-          j++;
-      }
-      if (j === i) {
-          return null;
-      } else {
-          return children.splice(i, j - i);
-      }
-  };
-
-  /**
-   * Take a list of nodes, build them in order, and return a list of the built
-   * nodes. documentFragments are flattened into their contents, so the
-   * returned list contains no fragments. `isRealGroup` is true if `expression`
-   * is a real group (no atoms will be added on either side), as opposed to
-   * a partial group (e.g. one created by \color).
-   */
-  var buildExpression = function buildExpression(expression, options, isRealGroup) {
-      // Parse expressions into `groups`.
-      var groups = [];
-      for (var i = 0; i < expression.length; i++) {
-          var group = expression[i];
-          var output = buildGroup(group, options);
-          if (output instanceof _domTree2.default.documentFragment) {
-              Array.prototype.push.apply(groups, output.children);
-          } else {
-              groups.push(output);
-          }
-      }
-      // At this point `groups` consists entirely of `symbolNode`s and `span`s.
-
-      // Explicit spaces (e.g., \;, \,) should be ignored with respect to atom
-      // spacing (e.g., "add thick space between mord and mrel"). Since CSS
-      // adjacency rules implement atom spacing, spaces should be invisible to
-      // CSS. So we splice them out of `groups` and into the atoms themselves.
-      for (var _i = 0; _i < groups.length; _i++) {
-          var spaces = spliceSpaces(groups, _i);
-          if (spaces) {
-              // Splicing of spaces may have removed all remaining groups.
-              if (_i < groups.length) {
-                  // If there is a following group, move space within it.
-                  if (groups[_i] instanceof _domTree2.default.symbolNode) {
-                      groups[_i] = (0, _buildCommon.makeSpan)([].concat(groups[_i].classes), [groups[_i]]);
-                  }
-                  _buildCommon2.default.prependChildren(groups[_i], spaces);
-              } else {
-                  // Otherwise, put any spaces back at the end of the groups.
-                  Array.prototype.push.apply(groups, spaces);
-                  break;
-              }
-          }
-      }
-
-      // Binary operators change to ordinary symbols in some contexts.
-      for (var _i2 = 0; _i2 < groups.length; _i2++) {
-          if (isBin(groups[_i2]) && (isBinLeftCanceller(groups[_i2 - 1], isRealGroup) || isBinRightCanceller(groups[_i2 + 1], isRealGroup))) {
-              groups[_i2].classes[0] = "mord";
-          }
-      }
-
-      // Process \\not commands within the group.
-      // TODO(kevinb): Handle multiple \\not commands in a row.
-      // TODO(kevinb): Handle \\not{abc} correctly.  The \\not should appear over
-      // the 'a' instead of the 'c'.
-      for (var _i3 = 0; _i3 < groups.length; _i3++) {
-          if (groups[_i3].value === "\u0338" && _i3 + 1 < groups.length) {
-              var children = groups.slice(_i3, _i3 + 2);
-
-              children[0].classes = ["mainrm"];
-              // \u0338 is a combining glyph so we could reorder the children so
-              // that it comes after the other glyph.  This works correctly on
-              // most browsers except for Safari.  Instead we absolutely position
-              // the glyph and set its right side to match that of the other
-              // glyph which is visually equivalent.
-              children[0].style.position = "absolute";
-              children[0].style.right = "0";
-
-              // Copy the classes from the second glyph to the new container.
-              // This is so it behaves the same as though there was no \\not.
-              var classes = groups[_i3 + 1].classes;
-              var container = (0, _buildCommon.makeSpan)(classes, children);
-
-              // LaTeX adds a space between ords separated by a \\not.
-              if (classes.indexOf("mord") !== -1) {
-                  // \glue(\thickmuskip) 2.77771 plus 2.77771
-                  container.style.paddingLeft = "0.277771em";
-              }
-
-              // Ensure that the \u0338 is positioned relative to the container.
-              container.style.position = "relative";
-              groups.splice(_i3, 2, container);
-          }
-      }
-
-      return groups;
-  };
-
-  // Return math atom class (mclass) of a domTree.
-  var getTypeOfDomTree = function getTypeOfDomTree(node) {
-      if (node instanceof _domTree2.default.documentFragment) {
-          if (node.children.length) {
-              return getTypeOfDomTree(node.children[node.children.length - 1]);
-          }
-      } else {
-          if (_utils2.default.contains(["mord", "mop", "mbin", "mrel", "mopen", "mclose", "mpunct", "minner"], node.classes[0])) {
-              return node.classes[0];
-          }
-      }
-      return null;
-  };
-
-  /**
-   * Sometimes, groups perform special rules when they have superscripts or
-   * subscripts attached to them. This function lets the `supsub` group know that
-   * its inner element should handle the superscripts and subscripts instead of
-   * handling them itself.
-   */
-  var shouldHandleSupSub = function shouldHandleSupSub(group, options) {
-      if (!group.value.base) {
-          return false;
-      } else {
-          var base = group.value.base;
-          if (base.type === "op") {
-              // Operators handle supsubs differently when they have limits
-              // (e.g. `\displaystyle\sum_2^3`)
-              return base.value.limits && (options.style.size === _Style2.default.DISPLAY.size || base.value.alwaysHandleSupSub);
-          } else if (base.type === "accent") {
-              return isCharacterBox(base.value.base);
-          } else if (base.type === "horizBrace") {
-              var isSup = group.value.sub ? false : true;
-              return isSup === base.value.isOver;
-          } else {
-              return null;
-          }
-      }
-  };
-
-  /**
-   * Sometimes we want to pull out the innermost element of a group. In most
-   * cases, this will just be the group itself, but when ordgroups and colors have
-   * a single element, we want to pull that out.
-   */
-  var getBaseElem = function getBaseElem(group) {
-      if (!group) {
-          return false;
-      } else if (group.type === "ordgroup") {
-          if (group.value.length === 1) {
-              return getBaseElem(group.value[0]);
-          } else {
-              return group;
-          }
-      } else if (group.type === "color") {
-          if (group.value.value.length === 1) {
-              return getBaseElem(group.value.value[0]);
-          } else {
-              return group;
-          }
-      } else if (group.type === "font") {
-          return getBaseElem(group.value.body);
-      } else {
-          return group;
-      }
-  };
-
-  /**
-   * TeXbook algorithms often reference "character boxes", which are simply groups
-   * with a single character in them. To decide if something is a character box,
-   * we find its innermost group, and see if it is a single character.
-   */
-  var isCharacterBox = function isCharacterBox(group) {
-      var baseElem = getBaseElem(group);
-
-      // These are all they types of groups which hold single characters
-      return baseElem.type === "mathord" || baseElem.type === "textord" || baseElem.type === "bin" || baseElem.type === "rel" || baseElem.type === "inner" || baseElem.type === "open" || baseElem.type === "close" || baseElem.type === "punct";
-  };
-
-  var makeNullDelimiter = function makeNullDelimiter(options, classes) {
-      var moreClasses = ["nulldelimiter"].concat(options.baseSizingClasses());
-      return (0, _buildCommon.makeSpan)(classes.concat(moreClasses));
-  };
-
-  /**
-   * This is a map of group types to the function used to handle that type.
-   * Simpler types come at the beginning, while complicated types come afterwards.
-   */
-  var groupTypes = {};
-
-  groupTypes.mathord = function (group, options) {
-      return _buildCommon2.default.makeOrd(group, options, "mathord");
-  };
-
-  groupTypes.textord = function (group, options) {
-      return _buildCommon2.default.makeOrd(group, options, "textord");
-  };
-
-  groupTypes.bin = function (group, options) {
-      return _buildCommon2.default.mathsym(group.value, group.mode, options, ["mbin"]);
-  };
-
-  groupTypes.rel = function (group, options) {
-      return _buildCommon2.default.mathsym(group.value, group.mode, options, ["mrel"]);
-  };
-
-  groupTypes.open = function (group, options) {
-      return _buildCommon2.default.mathsym(group.value, group.mode, options, ["mopen"]);
-  };
-
-  groupTypes.close = function (group, options) {
-      return _buildCommon2.default.mathsym(group.value, group.mode, options, ["mclose"]);
-  };
-
-  groupTypes.inner = function (group, options) {
-      return _buildCommon2.default.mathsym(group.value, group.mode, options, ["minner"]);
-  };
-
-  groupTypes.punct = function (group, options) {
-      return _buildCommon2.default.mathsym(group.value, group.mode, options, ["mpunct"]);
-  };
-
-  groupTypes.ordgroup = function (group, options) {
-      return (0, _buildCommon.makeSpan)(["mord"], buildExpression(group.value, options, true), options);
-  };
-
-  groupTypes.text = function (group, options) {
-      var newOptions = options.withFont(group.value.style);
-      var inner = buildExpression(group.value.body, newOptions, true);
-      for (var i = 0; i < inner.length - 1; i++) {
-          if (inner[i].tryCombine(inner[i + 1])) {
-              inner.splice(i + 1, 1);
-              i--;
-          }
-      }
-      return (0, _buildCommon.makeSpan)(["mord", "text"], inner, newOptions);
-  };
-
-  groupTypes.color = function (group, options) {
-      var elements = buildExpression(group.value.value, options.withColor(group.value.color), false);
-
-      // \color isn't supposed to affect the type of the elements it contains.
-      // To accomplish this, we wrap the results in a fragment, so the inner
-      // elements will be able to directly interact with their neighbors. For
-      // example, `\color{red}{2 +} 3` has the same spacing as `2 + 3`
-      return new _buildCommon2.default.makeFragment(elements);
-  };
-
-  groupTypes.supsub = function (group, options) {
-      // Superscript and subscripts are handled in the TeXbook on page
-      // 445-446, rules 18(a-f).
-
-      // Here is where we defer to the inner group if it should handle
-      // superscripts and subscripts itself.
-      if (shouldHandleSupSub(group, options)) {
-          return groupTypes[group.value.base.type](group, options);
-      }
-
-      var base = buildGroup(group.value.base, options);
-      var supm = void 0;
-      var subm = void 0;
-
-      var metrics = options.fontMetrics();
-      var newOptions = void 0;
-
-      // Rule 18a
-      var supShift = 0;
-      var subShift = 0;
-
-      if (group.value.sup) {
-          newOptions = options.havingStyle(options.style.sup());
-          supm = buildGroup(group.value.sup, newOptions, options);
-          if (!isCharacterBox(group.value.base)) {
-              supShift = base.height - newOptions.fontMetrics().supDrop * newOptions.sizeMultiplier / options.sizeMultiplier;
-          }
-      }
-
-      if (group.value.sub) {
-          newOptions = options.havingStyle(options.style.sub());
-          subm = buildGroup(group.value.sub, newOptions, options);
-          if (!isCharacterBox(group.value.base)) {
-              subShift = base.depth + newOptions.fontMetrics().subDrop * newOptions.sizeMultiplier / options.sizeMultiplier;
-          }
-      }
-
-      // Rule 18c
-      var minSupShift = void 0;
-      if (options.style === _Style2.default.DISPLAY) {
-          minSupShift = metrics.sup1;
-      } else if (options.style.cramped) {
-          minSupShift = metrics.sup3;
-      } else {
-          minSupShift = metrics.sup2;
-      }
-
-      // scriptspace is a font-size-independent size, so scale it
-      // appropriately
-      var multiplier = options.sizeMultiplier;
-      var scriptspace = 0.5 / metrics.ptPerEm / multiplier + "em";
-
-      var supsub = void 0;
-      if (!group.value.sup) {
-          // Rule 18b
-          subShift = Math.max(subShift, metrics.sub1, subm.height - 0.8 * metrics.xHeight);
-
-          var vlistElem = [{ type: "elem", elem: subm, marginRight: scriptspace }];
-          // Subscripts shouldn't be shifted by the base's italic correction.
-          // Account for that by shifting the subscript back the appropriate
-          // amount. Note we only do this when the base is a single symbol.
-          if (base instanceof _domTree2.default.symbolNode) {
-              vlistElem[0].marginLeft = -base.italic + "em";
-          }
-
-          supsub = _buildCommon2.default.makeVList(vlistElem, "shift", subShift, options);
-      } else if (!group.value.sub) {
-          // Rule 18c, d
-          supShift = Math.max(supShift, minSupShift, supm.depth + 0.25 * metrics.xHeight);
-
-          supsub = _buildCommon2.default.makeVList([{ type: "elem", elem: supm, marginRight: scriptspace }], "shift", -supShift, options);
-      } else {
-          supShift = Math.max(supShift, minSupShift, supm.depth + 0.25 * metrics.xHeight);
-          subShift = Math.max(subShift, metrics.sub2);
-
-          var ruleWidth = metrics.defaultRuleThickness;
-
-          // Rule 18e
-          if (supShift - supm.depth - (subm.height - subShift) < 4 * ruleWidth) {
-              subShift = 4 * ruleWidth - (supShift - supm.depth) + subm.height;
-              var psi = 0.8 * metrics.xHeight - (supShift - supm.depth);
-              if (psi > 0) {
-                  supShift += psi;
-                  subShift -= psi;
-              }
-          }
-
-          var _vlistElem = [{ type: "elem", elem: subm, shift: subShift, marginRight: scriptspace }, { type: "elem", elem: supm, shift: -supShift, marginRight: scriptspace }];
-          // See comment above about subscripts not being shifted
-          if (base instanceof _domTree2.default.symbolNode) {
-              _vlistElem[0].marginLeft = -base.italic + "em";
-          }
-
-          supsub = _buildCommon2.default.makeVList(_vlistElem, "individualShift", null, options);
-      }
-
-      // We ensure to wrap the supsub vlist in a span.msupsub to reset text-align
-      var mclass = getTypeOfDomTree(base) || "mord";
-      return (0, _buildCommon.makeSpan)([mclass], [base, (0, _buildCommon.makeSpan)(["msupsub"], [supsub])], options);
-  };
-
-  groupTypes.genfrac = function (group, options) {
-      // Fractions are handled in the TeXbook on pages 444-445, rules 15(a-e).
-      // Figure out what style this fraction should be in based on the
-      // function used
-      var style = options.style;
-      if (group.value.size === "display") {
-          style = _Style2.default.DISPLAY;
-      } else if (group.value.size === "text") {
-          style = _Style2.default.TEXT;
-      }
-
-      var nstyle = style.fracNum();
-      var dstyle = style.fracDen();
-      var newOptions = void 0;
-
-      newOptions = options.havingStyle(nstyle);
-      var numerm = buildGroup(group.value.numer, newOptions, options);
-
-      newOptions = options.havingStyle(dstyle);
-      var denomm = buildGroup(group.value.denom, newOptions, options);
-
-      var rule = void 0;
-      var ruleWidth = void 0;
-      var ruleSpacing = void 0;
-      if (group.value.hasBarLine) {
-          rule = makeLineSpan("frac-line", options);
-          ruleWidth = rule.height;
-          ruleSpacing = rule.height;
-      } else {
-          rule = null;
-          ruleWidth = 0;
-          ruleSpacing = options.fontMetrics().defaultRuleThickness;
-      }
-
-      // Rule 15b
-      var numShift = void 0;
-      var clearance = void 0;
-      var denomShift = void 0;
-      if (style.size === _Style2.default.DISPLAY.size) {
-          numShift = options.fontMetrics().num1;
-          if (ruleWidth > 0) {
-              clearance = 3 * ruleSpacing;
-          } else {
-              clearance = 7 * ruleSpacing;
-          }
-          denomShift = options.fontMetrics().denom1;
-      } else {
-          if (ruleWidth > 0) {
-              numShift = options.fontMetrics().num2;
-              clearance = ruleSpacing;
-          } else {
-              numShift = options.fontMetrics().num3;
-              clearance = 3 * ruleSpacing;
-          }
-          denomShift = options.fontMetrics().denom2;
-      }
-
-      var frac = void 0;
-      if (ruleWidth === 0) {
-          // Rule 15c
-          var candidateClearance = numShift - numerm.depth - (denomm.height - denomShift);
-          if (candidateClearance < clearance) {
-              numShift += 0.5 * (clearance - candidateClearance);
-              denomShift += 0.5 * (clearance - candidateClearance);
-          }
-
-          frac = _buildCommon2.default.makeVList([{ type: "elem", elem: denomm, shift: denomShift }, { type: "elem", elem: numerm, shift: -numShift }], "individualShift", null, options);
-      } else {
-          // Rule 15d
-          var axisHeight = options.fontMetrics().axisHeight;
-
-          if (numShift - numerm.depth - (axisHeight + 0.5 * ruleWidth) < clearance) {
-              numShift += clearance - (numShift - numerm.depth - (axisHeight + 0.5 * ruleWidth));
-          }
-
-          if (axisHeight - 0.5 * ruleWidth - (denomm.height - denomShift) < clearance) {
-              denomShift += clearance - (axisHeight - 0.5 * ruleWidth - (denomm.height - denomShift));
-          }
-
-          var midShift = -(axisHeight - 0.5 * ruleWidth);
-
-          frac = _buildCommon2.default.makeVList([{ type: "elem", elem: denomm, shift: denomShift }, { type: "elem", elem: rule, shift: midShift }, { type: "elem", elem: numerm, shift: -numShift }], "individualShift", null, options);
-      }
-
-      // Since we manually change the style sometimes (with \dfrac or \tfrac),
-      // account for the possible size change here.
-      newOptions = options.havingStyle(style);
-      frac.height *= newOptions.sizeMultiplier / options.sizeMultiplier;
-      frac.depth *= newOptions.sizeMultiplier / options.sizeMultiplier;
-
-      // Rule 15e
-      var delimSize = void 0;
-      if (style.size === _Style2.default.DISPLAY.size) {
-          delimSize = options.fontMetrics().delim1;
-      } else {
-          delimSize = options.fontMetrics().delim2;
-      }
-
-      var leftDelim = void 0;
-      var rightDelim = void 0;
-      if (group.value.leftDelim == null) {
-          leftDelim = makeNullDelimiter(options, ["mopen"]);
-      } else {
-          leftDelim = _delimiter2.default.customSizedDelim(group.value.leftDelim, delimSize, true, options.havingStyle(style), group.mode, ["mopen"]);
-      }
-      if (group.value.rightDelim == null) {
-          rightDelim = makeNullDelimiter(options, ["mclose"]);
-      } else {
-          rightDelim = _delimiter2.default.customSizedDelim(group.value.rightDelim, delimSize, true, options.havingStyle(style), group.mode, ["mclose"]);
-      }
-
-      return (0, _buildCommon.makeSpan)(["mord"].concat(newOptions.sizingClasses(options)), [leftDelim, (0, _buildCommon.makeSpan)(["mfrac"], [frac]), rightDelim], options);
-  };
-
-  groupTypes.array = function (group, options) {
-      var r = void 0;
-      var c = void 0;
-      var nr = group.value.body.length;
-      var nc = 0;
-      var body = new Array(nr);
-
-      // Horizontal spacing
-      var pt = 1 / options.fontMetrics().ptPerEm;
-      var arraycolsep = 5 * pt; // \arraycolsep in article.cls
-
-      // Vertical spacing
-      var baselineskip = 12 * pt; // see size10.clo
-      // Default \jot from ltmath.dtx
-      // TODO(edemaine): allow overriding \jot via \setlength (#687)
-      var jot = 3 * pt;
-      // Default \arraystretch from lttab.dtx
-      // TODO(gagern): may get redefined once we have user-defined macros
-      var arraystretch = _utils2.default.deflt(group.value.arraystretch, 1);
-      var arrayskip = arraystretch * baselineskip;
-      var arstrutHeight = 0.7 * arrayskip; // \strutbox in ltfsstrc.dtx and
-      var arstrutDepth = 0.3 * arrayskip; // \@arstrutbox in lttab.dtx
-
-      var totalHeight = 0;
-      for (r = 0; r < group.value.body.length; ++r) {
-          var inrow = group.value.body[r];
-          var height = arstrutHeight; // \@array adds an \@arstrut
-          var depth = arstrutDepth; // to each tow (via the template)
-
-          if (nc < inrow.length) {
-              nc = inrow.length;
-          }
-
-          var outrow = new Array(inrow.length);
-          for (c = 0; c < inrow.length; ++c) {
-              var elt = buildGroup(inrow[c], options);
-              if (depth < elt.depth) {
-                  depth = elt.depth;
-              }
-              if (height < elt.height) {
-                  height = elt.height;
-              }
-              outrow[c] = elt;
-          }
-
-          var gap = 0;
-          if (group.value.rowGaps[r]) {
-              gap = _units2.default.calculateSize(group.value.rowGaps[r].value, options);
-              if (gap > 0) {
-                  // \@argarraycr
-                  gap += arstrutDepth;
-                  if (depth < gap) {
-                      depth = gap; // \@xargarraycr
-                  }
-                  gap = 0;
-              }
-          }
-          // In AMS multiline environments such as aligned and gathered, rows
-          // correspond to lines that have additional \jot added to the
-          // \baselineskip via \openup.
-          if (group.value.addJot) {
-              depth += jot;
-          }
-
-          outrow.height = height;
-          outrow.depth = depth;
-          totalHeight += height;
-          outrow.pos = totalHeight;
-          totalHeight += depth + gap; // \@yargarraycr
-          body[r] = outrow;
-      }
-
-      var offset = totalHeight / 2 + options.fontMetrics().axisHeight;
-      var colDescriptions = group.value.cols || [];
-      var cols = [];
-      var colSep = void 0;
-      var colDescrNum = void 0;
-      for (c = 0, colDescrNum = 0;
-      // Continue while either there are more columns or more column
-      // descriptions, so trailing separators don't get lost.
-      c < nc || colDescrNum < colDescriptions.length; ++c, ++colDescrNum) {
-
-          var colDescr = colDescriptions[colDescrNum] || {};
-
-          var firstSeparator = true;
-          while (colDescr.type === "separator") {
-              // If there is more than one separator in a row, add a space
-              // between them.
-              if (!firstSeparator) {
-                  colSep = (0, _buildCommon.makeSpan)(["arraycolsep"], []);
-                  colSep.style.width = options.fontMetrics().doubleRuleSep + "em";
-                  cols.push(colSep);
-              }
-
-              if (colDescr.separator === "|") {
-                  var separator = (0, _buildCommon.makeSpan)(["vertical-separator"], []);
-                  separator.style.height = totalHeight + "em";
-                  separator.style.verticalAlign = -(totalHeight - offset) + "em";
-
-                  cols.push(separator);
-              } else {
-                  throw new _ParseError2.default("Invalid separator type: " + colDescr.separator);
-              }
-
-              colDescrNum++;
-              colDescr = colDescriptions[colDescrNum] || {};
-              firstSeparator = false;
-          }
-
-          if (c >= nc) {
-              continue;
-          }
-
-          var sepwidth = void 0;
-          if (c > 0 || group.value.hskipBeforeAndAfter) {
-              sepwidth = _utils2.default.deflt(colDescr.pregap, arraycolsep);
-              if (sepwidth !== 0) {
-                  colSep = (0, _buildCommon.makeSpan)(["arraycolsep"], []);
-                  colSep.style.width = sepwidth + "em";
-                  cols.push(colSep);
-              }
-          }
-
-          var col = [];
-          for (r = 0; r < nr; ++r) {
-              var row = body[r];
-              var elem = row[c];
-              if (!elem) {
-                  continue;
-              }
-              var shift = row.pos - offset;
-              elem.depth = row.depth;
-              elem.height = row.height;
-              col.push({ type: "elem", elem: elem, shift: shift });
-          }
-
-          col = _buildCommon2.default.makeVList(col, "individualShift", null, options);
-          col = (0, _buildCommon.makeSpan)(["col-align-" + (colDescr.align || "c")], [col]);
-          cols.push(col);
-
-          if (c < nc - 1 || group.value.hskipBeforeAndAfter) {
-              sepwidth = _utils2.default.deflt(colDescr.postgap, arraycolsep);
-              if (sepwidth !== 0) {
-                  colSep = (0, _buildCommon.makeSpan)(["arraycolsep"], []);
-                  colSep.style.width = sepwidth + "em";
-                  cols.push(colSep);
-              }
-          }
-      }
-      body = (0, _buildCommon.makeSpan)(["mtable"], cols);
-      return (0, _buildCommon.makeSpan)(["mord"], [body], options);
-  };
-
-  groupTypes.spacing = function (group, options) {
-      if (group.value === "\\ " || group.value === "\\space" || group.value === " " || group.value === "~") {
-          // Spaces are generated by adding an actual space. Each of these
-          // things has an entry in the symbols table, so these will be turned
-          // into appropriate outputs.
-          if (group.mode === "text") {
-              return _buildCommon2.default.makeOrd(group, options, "textord");
-          } else {
-              return (0, _buildCommon.makeSpan)(["mspace"], [_buildCommon2.default.mathsym(group.value, group.mode, options)], options);
-          }
-      } else {
-          // Other kinds of spaces are of arbitrary width. We use CSS to
-          // generate these.
-          return (0, _buildCommon.makeSpan)(["mspace", _buildCommon2.default.spacingFunctions[group.value].className], [], options);
-      }
-  };
-
-  groupTypes.llap = function (group, options) {
-      var inner = (0, _buildCommon.makeSpan)(["inner"], [buildGroup(group.value.body, options)]);
-      var fix = (0, _buildCommon.makeSpan)(["fix"], []);
-      return (0, _buildCommon.makeSpan)(["mord", "llap"], [inner, fix], options);
-  };
-
-  groupTypes.rlap = function (group, options) {
-      var inner = (0, _buildCommon.makeSpan)(["inner"], [buildGroup(group.value.body, options)]);
-      var fix = (0, _buildCommon.makeSpan)(["fix"], []);
-      return (0, _buildCommon.makeSpan)(["mord", "rlap"], [inner, fix], options);
-  };
-
-  groupTypes.op = function (group, options) {
-      // Operators are handled in the TeXbook pg. 443-444, rule 13(a).
-      var supGroup = void 0;
-      var subGroup = void 0;
-      var hasLimits = false;
-      if (group.type === "supsub") {
-          // If we have limits, supsub will pass us its group to handle. Pull
-          // out the superscript and subscript and set the group to the op in
-          // its base.
-          supGroup = group.value.sup;
-          subGroup = group.value.sub;
-          group = group.value.base;
-          hasLimits = true;
-      }
-
-      var style = options.style;
-
-      // Most operators have a large successor symbol, but these don't.
-      var noSuccessor = ["\\smallint"];
-
-      var large = false;
-      if (style.size === _Style2.default.DISPLAY.size && group.value.symbol && !_utils2.default.contains(noSuccessor, group.value.body)) {
-
-          // Most symbol operators get larger in displaystyle (rule 13)
-          large = true;
-      }
-
-      var base = void 0;
-      if (group.value.symbol) {
-          // If this is a symbol, create the symbol.
-          var fontName = large ? "Size2-Regular" : "Size1-Regular";
-          base = _buildCommon2.default.makeSymbol(group.value.body, fontName, "math", options, ["mop", "op-symbol", large ? "large-op" : "small-op"]);
-      } else if (group.value.value) {
-          // If this is a list, compose that list.
-          var inner = buildExpression(group.value.value, options, true);
-          if (inner.length === 1 && inner[0] instanceof _domTree2.default.symbolNode) {
-              base = inner[0];
-              base.classes[0] = "mop"; // replace old mclass
-          } else {
-              base = (0, _buildCommon.makeSpan)(["mop"], inner, options);
-          }
-      } else {
-          // Otherwise, this is a text operator. Build the text from the
-          // operator's name.
-          // TODO(emily): Add a space in the middle of some of these
-          // operators, like \limsup
-          var output = [];
-          for (var i = 1; i < group.value.body.length; i++) {
-              output.push(_buildCommon2.default.mathsym(group.value.body[i], group.mode));
-          }
-          base = (0, _buildCommon.makeSpan)(["mop"], output, options);
-      }
-
-      // If content of op is a single symbol, shift it vertically.
-      var baseShift = 0;
-      var slant = 0;
-      if (base instanceof _domTree2.default.symbolNode) {
-          // Shift the symbol so its center lies on the axis (rule 13). It
-          // appears that our fonts have the centers of the symbols already
-          // almost on the axis, so these numbers are very small. Note we
-          // don't actually apply this here, but instead it is used either in
-          // the vlist creation or separately when there are no limits.
-          baseShift = (base.height - base.depth) / 2 - options.fontMetrics().axisHeight;
-
-          // The slant of the symbol is just its italic correction.
-          slant = base.italic;
-      }
-
-      if (hasLimits) {
-          // IE 8 clips \int if it is in a display: inline-block. We wrap it
-          // in a new span so it is an inline, and works.
-          base = (0, _buildCommon.makeSpan)([], [base]);
-
-          var supm = void 0;
-          var supKern = void 0;
-          var subm = void 0;
-          var subKern = void 0;
-          var newOptions = void 0;
-          // We manually have to handle the superscripts and subscripts. This,
-          // aside from the kern calculations, is copied from supsub.
-          if (supGroup) {
-              newOptions = options.havingStyle(style.sup());
-              supm = buildGroup(supGroup, newOptions, options);
-
-              supKern = Math.max(options.fontMetrics().bigOpSpacing1, options.fontMetrics().bigOpSpacing3 - supm.depth);
-          }
-
-          if (subGroup) {
-              newOptions = options.havingStyle(style.sub());
-              subm = buildGroup(subGroup, newOptions, options);
-
-              subKern = Math.max(options.fontMetrics().bigOpSpacing2, options.fontMetrics().bigOpSpacing4 - subm.height);
-          }
-
-          // Build the final group as a vlist of the possible subscript, base,
-          // and possible superscript.
-          var finalGroup = void 0;
-          var top = void 0;
-          var bottom = void 0;
-          if (!supGroup) {
-              top = base.height - baseShift;
-
-              // Shift the limits by the slant of the symbol. Note
-              // that we are supposed to shift the limits by 1/2 of the slant,
-              // but since we are centering the limits adding a full slant of
-              // margin will shift by 1/2 that.
-              finalGroup = _buildCommon2.default.makeVList([{ type: "kern", size: options.fontMetrics().bigOpSpacing5 }, { type: "elem", elem: subm, marginLeft: -slant + "em" }, { type: "kern", size: subKern }, { type: "elem", elem: base }], "top", top, options);
-          } else if (!subGroup) {
-              bottom = base.depth + baseShift;
-
-              finalGroup = _buildCommon2.default.makeVList([{ type: "elem", elem: base }, { type: "kern", size: supKern }, { type: "elem", elem: supm, marginLeft: slant + "em" }, { type: "kern", size: options.fontMetrics().bigOpSpacing5 }], "bottom", bottom, options);
-          } else if (!supGroup && !subGroup) {
-              // This case probably shouldn't occur (this would mean the
-              // supsub was sending us a group with no superscript or
-              // subscript) but be safe.
-              return base;
-          } else {
-              bottom = options.fontMetrics().bigOpSpacing5 + subm.height + subm.depth + subKern + base.depth + baseShift;
-
-              finalGroup = _buildCommon2.default.makeVList([{ type: "kern", size: options.fontMetrics().bigOpSpacing5 }, { type: "elem", elem: subm, marginLeft: -slant + "em" }, { type: "kern", size: subKern }, { type: "elem", elem: base }, { type: "kern", size: supKern }, { type: "elem", elem: supm, marginLeft: slant + "em" }, { type: "kern", size: options.fontMetrics().bigOpSpacing5 }], "bottom", bottom, options);
-          }
-
-          return (0, _buildCommon.makeSpan)(["mop", "op-limits"], [finalGroup], options);
-      } else {
-          if (baseShift) {
-              base.style.position = "relative";
-              base.style.top = baseShift + "em";
-          }
-
-          return base;
-      }
-  };
-
-  groupTypes.mod = function (group, options) {
-      var inner = [];
-
-      if (group.value.modType === "bmod") {
-          // “\nonscript\mskip-\medmuskip\mkern5mu”
-          if (!options.style.isTight()) {
-              inner.push((0, _buildCommon.makeSpan)(["mspace", "negativemediumspace"], [], options));
-          }
-          inner.push((0, _buildCommon.makeSpan)(["mspace", "thickspace"], [], options));
-      } else if (options.style.size === _Style2.default.DISPLAY.size) {
-          inner.push((0, _buildCommon.makeSpan)(["mspace", "quad"], [], options));
-      } else if (group.value.modType === "mod") {
-          inner.push((0, _buildCommon.makeSpan)(["mspace", "twelvemuspace"], [], options));
-      } else {
-          inner.push((0, _buildCommon.makeSpan)(["mspace", "eightmuspace"], [], options));
-      }
-
-      if (group.value.modType === "pod" || group.value.modType === "pmod") {
-          inner.push(_buildCommon2.default.mathsym("(", group.mode));
-      }
-
-      if (group.value.modType !== "pod") {
-          var modInner = [_buildCommon2.default.mathsym("m", group.mode), _buildCommon2.default.mathsym("o", group.mode), _buildCommon2.default.mathsym("d", group.mode)];
-          if (group.value.modType === "bmod") {
-              inner.push((0, _buildCommon.makeSpan)(["mbin"], modInner, options));
-              // “\mkern5mu\nonscript\mskip-\medmuskip”
-              inner.push((0, _buildCommon.makeSpan)(["mspace", "thickspace"], [], options));
-              if (!options.style.isTight()) {
-                  inner.push((0, _buildCommon.makeSpan)(["mspace", "negativemediumspace"], [], options));
-              }
-          } else {
-              Array.prototype.push.apply(inner, modInner);
-              inner.push((0, _buildCommon.makeSpan)(["mspace", "sixmuspace"], [], options));
-          }
-      }
-
-      if (group.value.value) {
-          Array.prototype.push.apply(inner, buildExpression(group.value.value, options, false));
-      }
-
-      if (group.value.modType === "pod" || group.value.modType === "pmod") {
-          inner.push(_buildCommon2.default.mathsym(")", group.mode));
-      }
-
-      return _buildCommon2.default.makeFragment(inner);
-  };
-
-  groupTypes.katex = function (group, options) {
-      // The KaTeX logo. The offsets for the K and a were chosen to look
-      // good, but the offsets for the T, E, and X were taken from the
-      // definition of \TeX in TeX (see TeXbook pg. 356)
-      var k = (0, _buildCommon.makeSpan)(["k"], [_buildCommon2.default.mathsym("K", group.mode)], options);
-      var a = (0, _buildCommon.makeSpan)(["a"], [_buildCommon2.default.mathsym("A", group.mode)], options);
-
-      a.height = (a.height + 0.2) * 0.75;
-      a.depth = (a.height - 0.2) * 0.75;
-
-      var t = (0, _buildCommon.makeSpan)(["t"], [_buildCommon2.default.mathsym("T", group.mode)], options);
-      var e = (0, _buildCommon.makeSpan)(["e"], [_buildCommon2.default.mathsym("E", group.mode)], options);
-
-      e.height = e.height - 0.2155;
-      e.depth = e.depth + 0.2155;
-
-      var x = (0, _buildCommon.makeSpan)(["x"], [_buildCommon2.default.mathsym("X", group.mode)], options);
-
-      return (0, _buildCommon.makeSpan)(["mord", "katex-logo"], [k, a, t, e, x], options);
-  };
-
-  var makeLineSpan = function makeLineSpan(className, options, thickness) {
-      var line = (0, _buildCommon.makeSpan)([className], [], options);
-      line.height = thickness || options.fontMetrics().defaultRuleThickness;
-      line.style.borderBottomWidth = line.height + "em";
-      line.maxFontSize = 1.0;
-      return line;
-  };
-
-  groupTypes.overline = function (group, options) {
-      // Overlines are handled in the TeXbook pg 443, Rule 9.
-
-      // Build the inner group in the cramped style.
-      var innerGroup = buildGroup(group.value.body, options.havingCrampedStyle());
-
-      // Create the line above the body
-      var line = makeLineSpan("overline-line", options);
-
-      // Generate the vlist, with the appropriate kerns
-      var vlist = _buildCommon2.default.makeVList([{ type: "elem", elem: innerGroup }, { type: "kern", size: 3 * line.height }, { type: "elem", elem: line }, { type: "kern", size: line.height }], "firstBaseline", null, options);
-
-      return (0, _buildCommon.makeSpan)(["mord", "overline"], [vlist], options);
-  };
-
-  groupTypes.underline = function (group, options) {
-      // Underlines are handled in the TeXbook pg 443, Rule 10.
-      // Build the inner group.
-      var innerGroup = buildGroup(group.value.body, options);
-
-      // Create the line above the body
-      var line = makeLineSpan("underline-line", options);
-
-      // Generate the vlist, with the appropriate kerns
-      var vlist = _buildCommon2.default.makeVList([{ type: "kern", size: line.height }, { type: "elem", elem: line }, { type: "kern", size: 3 * line.height }, { type: "elem", elem: innerGroup }], "top", innerGroup.height, options);
-
-      return (0, _buildCommon.makeSpan)(["mord", "underline"], [vlist], options);
-  };
-
-  groupTypes.sqrt = function (group, options) {
-      // Square roots are handled in the TeXbook pg. 443, Rule 11.
-
-      // First, we do the same steps as in overline to build the inner group
-      // and line
-      var inner = buildGroup(group.value.body, options.havingCrampedStyle());
-
-      // Some groups can return document fragments.  Handle those by wrapping
-      // them in a span.
-      if (inner instanceof _domTree2.default.documentFragment) {
-          inner = (0, _buildCommon.makeSpan)([], [inner], options);
-      }
-
-      // Calculate the minimum size for the \surd delimiter
-      var metrics = options.fontMetrics();
-      var theta = metrics.defaultRuleThickness;
-
-      var phi = theta;
-      if (options.style.id < _Style2.default.TEXT.id) {
-          phi = options.fontMetrics().xHeight;
-      }
-
-      // Calculate the clearance between the body and line
-      var lineClearance = theta + phi / 4;
-
-      var minDelimiterHeight = (inner.height + inner.depth + lineClearance + theta) * options.sizeMultiplier;
-
-      // Create a sqrt SVG of the required minimum size
-      var img = _delimiter2.default.customSizedDelim("\\surd", minDelimiterHeight, false, options, group.mode);
-
-      // Calculate the actual line width.
-      // This actually should depend on the chosen font -- e.g. \boldmath
-      // should use the thicker surd symbols from e.g. KaTeX_Main-Bold, and
-      // have thicker rules.
-      var ruleWidth = options.fontMetrics().sqrtRuleThickness * img.sizeMultiplier;
-
-      var delimDepth = img.height - ruleWidth;
-
-      // Adjust the clearance based on the delimiter size
-      if (delimDepth > inner.height + inner.depth + lineClearance) {
-          lineClearance = (lineClearance + delimDepth - inner.height - inner.depth) / 2;
-      }
-
-      // Shift the sqrt image
-      var imgShift = img.height - inner.height - lineClearance - ruleWidth;
-
-      // We add a special case here, because even when `inner` is empty, we
-      // still get a line. So, we use a simple heuristic to decide if we
-      // should omit the body entirely. (note this doesn't work for something
-      // like `\sqrt{\rlap{x}}`, but if someone is doing that they deserve for
-      // it not to work.
-      var body = void 0;
-      if (inner.height === 0 && inner.depth === 0) {
-          body = (0, _buildCommon.makeSpan)();
-      } else {
-          inner.style.paddingLeft = img.surdWidth + "em";
-
-          // Overlay the image and the argument.
-          body = _buildCommon2.default.makeVList([{ type: "elem", elem: inner }, { type: "kern", size: -(inner.height + imgShift) }, { type: "elem", elem: img }, { type: "kern", size: ruleWidth }], "firstBaseline", null, options);
-          body.children[0].children[0].classes.push("svg-align");
-      }
-
-      if (!group.value.index) {
-          return (0, _buildCommon.makeSpan)(["mord", "sqrt"], [body], options);
-      } else {
-          // Handle the optional root index
-
-          // The index is always in scriptscript style
-          var newOptions = options.havingStyle(_Style2.default.SCRIPTSCRIPT);
-          var rootm = buildGroup(group.value.index, newOptions, options);
-
-          // The amount the index is shifted by. This is taken from the TeX
-          // source, in the definition of `\r@@t`.
-          var toShift = 0.6 * (body.height - body.depth);
-
-          // Build a VList with the superscript shifted up correctly
-          var rootVList = _buildCommon2.default.makeVList([{ type: "elem", elem: rootm }], "shift", -toShift, options);
-          // Add a class surrounding it so we can add on the appropriate
-          // kerning
-          var rootVListWrap = (0, _buildCommon.makeSpan)(["root"], [rootVList]);
-
-          return (0, _buildCommon.makeSpan)(["mord", "sqrt"], [rootVListWrap, body], options);
-      }
-  };
-
-  function sizingGroup(value, options, baseOptions) {
-      var inner = buildExpression(value, options, false);
-      var multiplier = options.sizeMultiplier / baseOptions.sizeMultiplier;
-
-      // Add size-resetting classes to the inner list and set maxFontSize
-      // manually. Handle nested size changes.
-      for (var i = 0; i < inner.length; i++) {
-          var pos = _utils2.default.indexOf(inner[i].classes, "sizing");
-          if (pos < 0) {
-              Array.prototype.push.apply(inner[i].classes, options.sizingClasses(baseOptions));
-          } else if (inner[i].classes[pos + 1] === "reset-size" + options.size) {
-              // This is a nested size change: e.g., inner[i] is the "b" in
-              // `\Huge a \small b`. Override the old size (the `reset-` class)
-              // but not the new size.
-              inner[i].classes[pos + 1] = "reset-size" + baseOptions.size;
-          }
-
-          inner[i].height *= multiplier;
-          inner[i].depth *= multiplier;
-      }
-
-      return _buildCommon2.default.makeFragment(inner);
-  }
-
-  groupTypes.sizing = function (group, options) {
-      // Handle sizing operators like \Huge. Real TeX doesn't actually allow
-      // these functions inside of math expressions, so we do some special
-      // handling.
-      var newOptions = options.havingSize(group.value.size);
-      return sizingGroup(group.value.value, newOptions, options);
-  };
-
-  groupTypes.styling = function (group, options) {
-      // Style changes are handled in the TeXbook on pg. 442, Rule 3.
-
-      // Figure out what style we're changing to.
-      var styleMap = {
-          "display": _Style2.default.DISPLAY,
-          "text": _Style2.default.TEXT,
-          "script": _Style2.default.SCRIPT,
-          "scriptscript": _Style2.default.SCRIPTSCRIPT
-      };
-
-      var newStyle = styleMap[group.value.style];
-      var newOptions = options.havingStyle(newStyle);
-      return sizingGroup(group.value.value, newOptions, options);
-  };
-
-  groupTypes.font = function (group, options) {
-      var font = group.value.font;
-      return buildGroup(group.value.body, options.withFont(font));
-  };
-
-  groupTypes.delimsizing = function (group, options) {
-      var delim = group.value.value;
-
-      if (delim === ".") {
-          // Empty delimiters still count as elements, even though they don't
-          // show anything.
-          return (0, _buildCommon.makeSpan)([group.value.mclass]);
-      }
-
-      // Use delimiter.sizedDelim to generate the delimiter.
-      return _delimiter2.default.sizedDelim(delim, group.value.size, options, group.mode, [group.value.mclass]);
-  };
-
-  groupTypes.leftright = function (group, options) {
-      // Build the inner expression
-      var inner = buildExpression(group.value.body, options, true);
-
-      var innerHeight = 0;
-      var innerDepth = 0;
-      var hadMiddle = false;
-
-      // Calculate its height and depth
-      for (var i = 0; i < inner.length; i++) {
-          if (inner[i].isMiddle) {
-              hadMiddle = true;
-          } else {
-              innerHeight = Math.max(inner[i].height, innerHeight);
-              innerDepth = Math.max(inner[i].depth, innerDepth);
-          }
-      }
-
-      // The size of delimiters is the same, regardless of what style we are
-      // in. Thus, to correctly calculate the size of delimiter we need around
-      // a group, we scale down the inner size based on the size.
-      innerHeight *= options.sizeMultiplier;
-      innerDepth *= options.sizeMultiplier;
-
-      var leftDelim = void 0;
-      if (group.value.left === ".") {
-          // Empty delimiters in \left and \right make null delimiter spaces.
-          leftDelim = makeNullDelimiter(options, ["mopen"]);
-      } else {
-          // Otherwise, use leftRightDelim to generate the correct sized
-          // delimiter.
-          leftDelim = _delimiter2.default.leftRightDelim(group.value.left, innerHeight, innerDepth, options, group.mode, ["mopen"]);
-      }
-      // Add it to the beginning of the expression
-      inner.unshift(leftDelim);
-
-      // Handle middle delimiters
-      if (hadMiddle) {
-          for (var _i4 = 1; _i4 < inner.length; _i4++) {
-              var middleDelim = inner[_i4];
-              if (middleDelim.isMiddle) {
-                  // Apply the options that were active when \middle was called
-                  inner[_i4] = _delimiter2.default.leftRightDelim(middleDelim.isMiddle.value, innerHeight, innerDepth, middleDelim.isMiddle.options, group.mode, []);
-                  // Add back spaces shifted into the delimiter
-                  var spaces = spliceSpaces(middleDelim.children, 0);
-                  if (spaces) {
-                      _buildCommon2.default.prependChildren(inner[_i4], spaces);
-                  }
-              }
-          }
-      }
-
-      var rightDelim = void 0;
-      // Same for the right delimiter
-      if (group.value.right === ".") {
-          rightDelim = makeNullDelimiter(options, ["mclose"]);
-      } else {
-          rightDelim = _delimiter2.default.leftRightDelim(group.value.right, innerHeight, innerDepth, options, group.mode, ["mclose"]);
-      }
-      // Add it to the end of the expression.
-      inner.push(rightDelim);
-
-      return (0, _buildCommon.makeSpan)(["minner"], inner, options);
-  };
-
-  groupTypes.middle = function (group, options) {
-      var middleDelim = void 0;
-      if (group.value.value === ".") {
-          middleDelim = makeNullDelimiter(options, []);
-      } else {
-          middleDelim = _delimiter2.default.sizedDelim(group.value.value, 1, options, group.mode, []);
-          middleDelim.isMiddle = { value: group.value.value, options: options };
-      }
-      return middleDelim;
-  };
-
-  groupTypes.rule = function (group, options) {
-      // Make an empty span for the rule
-      var rule = (0, _buildCommon.makeSpan)(["mord", "rule"], [], options);
-
-      // Calculate the shift, width, and height of the rule, and account for units
-      var shift = 0;
-      if (group.value.shift) {
-          shift = _units2.default.calculateSize(group.value.shift, options);
-      }
-
-      var width = _units2.default.calculateSize(group.value.width, options);
-      var height = _units2.default.calculateSize(group.value.height, options);
-
-      // Style the rule to the right size
-      rule.style.borderRightWidth = width + "em";
-      rule.style.borderTopWidth = height + "em";
-      rule.style.bottom = shift + "em";
-
-      // Record the height and width
-      rule.width = width;
-      rule.height = height + shift;
-      rule.depth = -shift;
-      // Font size is the number large enough that the browser will
-      // reserve at least `absHeight` space above the baseline.
-      // The 1.125 factor was empirically determined
-      rule.maxFontSize = height * 1.125 * options.sizeMultiplier;
-
-      return rule;
-  };
-
-  groupTypes.kern = function (group, options) {
-      // Make an empty span for the rule
-      var rule = (0, _buildCommon.makeSpan)(["mord", "rule"], [], options);
-
-      if (group.value.dimension) {
-          var dimension = _units2.default.calculateSize(group.value.dimension, options);
-          rule.style.marginLeft = dimension + "em";
-      }
-
-      return rule;
-  };
-
-  groupTypes.accent = function (group, options) {
-      // Accents are handled in the TeXbook pg. 443, rule 12.
-      var base = group.value.base;
-
-      var supsubGroup = void 0;
-      if (group.type === "supsub") {
-          // If our base is a character box, and we have superscripts and
-          // subscripts, the supsub will defer to us. In particular, we want
-          // to attach the superscripts and subscripts to the inner body (so
-          // that the position of the superscripts and subscripts won't be
-          // affected by the height of the accent). We accomplish this by
-          // sticking the base of the accent into the base of the supsub, and
-          // rendering that, while keeping track of where the accent is.
-
-          // The supsub group is the group that was passed in
-          var supsub = group;
-          // The real accent group is the base of the supsub group
-          group = supsub.value.base;
-          // The character box is the base of the accent group
-          base = group.value.base;
-          // Stick the character box into the base of the supsub group
-          supsub.value.base = base;
-
-          // Rerender the supsub group with its new base, and store that
-          // result.
-          supsubGroup = buildGroup(supsub, options);
-      }
-
-      // Build the base group
-      var body = buildGroup(base, options.havingCrampedStyle());
-
-      // Does the accent need to shift for the skew of a character?
-      var mustShift = group.value.isShifty && isCharacterBox(base);
-
-      // Calculate the skew of the accent. This is based on the line "If the
-      // nucleus is not a single character, let s = 0; otherwise set s to the
-      // kern amount for the nucleus followed by the \skewchar of its font."
-      // Note that our skew metrics are just the kern between each character
-      // and the skewchar.
-      var skew = 0;
-      if (mustShift) {
-          // If the base is a character box, then we want the skew of the
-          // innermost character. To do that, we find the innermost character:
-          var baseChar = getBaseElem(base);
-          // Then, we render its group to get the symbol inside it
-          var baseGroup = buildGroup(baseChar, options.havingCrampedStyle());
-          // Finally, we pull the skew off of the symbol.
-          skew = baseGroup.skew;
-          // Note that we now throw away baseGroup, because the layers we
-          // removed with getBaseElem might contain things like \color which
-          // we can't get rid of.
-          // TODO(emily): Find a better way to get the skew
-      }
-
-      // calculate the amount of space between the body and the accent
-      var clearance = Math.min(body.height, options.fontMetrics().xHeight);
-
-      // Build the accent
-      var accentBody = void 0;
-      if (!group.value.isStretchy) {
-          var accent = _buildCommon2.default.makeSymbol(group.value.label, "Main-Regular", group.mode, options);
-          // Remove the italic correction of the accent, because it only serves to
-          // shift the accent over to a place we don't want.
-          accent.italic = 0;
-
-          // The \vec character that the fonts use is a combining character, and
-          // thus shows up much too far to the left. To account for this, we add a
-          // specific class which shifts the accent over to where we want it.
-          // TODO(emily): Fix this in a better way, like by changing the font
-          // Similarly, text accent \H is a combining character and
-          // requires a different adjustment.
-          var accentClass = null;
-          if (group.value.label === "\\vec") {
-              accentClass = "accent-vec";
-          } else if (group.value.label === '\\H') {
-              accentClass = "accent-hungarian";
-          }
-
-          accentBody = (0, _buildCommon.makeSpan)([], [accent]);
-          accentBody = (0, _buildCommon.makeSpan)(["accent-body", accentClass], [accentBody]);
-
-          // Shift the accent over by the skew. Note we shift by twice the skew
-          // because we are centering the accent, so by adding 2*skew to the left,
-          // we shift it to the right by 1*skew.
-          accentBody.style.marginLeft = 2 * skew + "em";
-
-          accentBody = _buildCommon2.default.makeVList([{ type: "elem", elem: body }, { type: "kern", size: -clearance }, { type: "elem", elem: accentBody }], "firstBaseline", null, options);
-      } else {
-          accentBody = _stretchy2.default.svgSpan(group, options);
-
-          accentBody = _buildCommon2.default.makeVList([{ type: "elem", elem: body }, { type: "elem", elem: accentBody }], "firstBaseline", null, options);
-
-          var styleSpan = accentBody.children[0].children[0].children[1];
-          styleSpan.classes.push("svg-align"); // text-align: left;
-          if (skew > 0) {
-              // Shorten the accent and nudge it to the right.
-              styleSpan.style.width = "calc(100% - " + 2 * skew + "em)";
-              styleSpan.style.marginLeft = 2 * skew + "em";
-          }
-      }
-
-      var accentWrap = (0, _buildCommon.makeSpan)(["mord", "accent"], [accentBody], options);
-
-      if (supsubGroup) {
-          // Here, we replace the "base" child of the supsub with our newly
-          // generated accent.
-          supsubGroup.children[0] = accentWrap;
-
-          // Since we don't rerun the height calculation after replacing the
-          // accent, we manually recalculate height.
-          supsubGroup.height = Math.max(accentWrap.height, supsubGroup.height);
-
-          // Accents should always be ords, even when their innards are not.
-          supsubGroup.classes[0] = "mord";
-
-          return supsubGroup;
-      } else {
-          return accentWrap;
-      }
-  };
-
-  groupTypes.horizBrace = function (group, options) {
-      var style = options.style;
-
-      var hasSupSub = group.type === "supsub";
-      var supSubGroup = void 0;
-      var newOptions = void 0;
-      if (hasSupSub) {
-          // Ref: LaTeX source2e: }}}}\limits}
-          // i.e. LaTeX treats the brace similar to an op and passes it
-          // with \limits, so we need to assign supsub style.
-          if (group.value.sup) {
-              newOptions = options.havingStyle(style.sup());
-              supSubGroup = buildGroup(group.value.sup, newOptions, options);
-          } else {
-              newOptions = options.havingStyle(style.sub());
-              supSubGroup = buildGroup(group.value.sub, newOptions, options);
-          }
-          group = group.value.base;
-      }
-
-      // Build the base group
-      var body = buildGroup(group.value.base, options.havingBaseStyle(_Style2.default.DISPLAY));
-
-      // Create the stretchy element
-      var braceBody = _stretchy2.default.svgSpan(group, options);
-
-      // Generate the vlist, with the appropriate kerns               ┏━━━━━━━━┓
-      // This first vlist contains the subject matter and the brace:   equation
-      var vlist = void 0;
-      if (group.value.isOver) {
-          vlist = _buildCommon2.default.makeVList([{ type: "elem", elem: body }, { type: "kern", size: 0.1 }, { type: "elem", elem: braceBody }], "firstBaseline", null, options);
-          vlist.children[0].children[0].children[1].classes.push("svg-align");
-      } else {
-          vlist = _buildCommon2.default.makeVList([{ type: "elem", elem: braceBody }, { type: "kern", size: 0.1 }, { type: "elem", elem: body }], "bottom", body.depth + 0.1 + braceBody.height, options);
-          vlist.children[0].children[0].children[0].classes.push("svg-align");
-      }
-
-      if (hasSupSub) {
-          // In order to write the supsub, wrap the first vlist in another vlist:
-          // They can't all go in the same vlist, because the note might be wider
-          // than the equation. We want the equation to control the brace width.
-
-          //      note          long note           long note
-          //   ┏━━━━━━━━┓   or    ┏━━━┓     not    ┏━━━━━━━━━┓
-          //    equation           eqn                 eqn
-
-          var vSpan = (0, _buildCommon.makeSpan)(["mord", group.value.isOver ? "mover" : "munder"], [vlist], options);
-
-          if (group.value.isOver) {
-              vlist = _buildCommon2.default.makeVList([{ type: "elem", elem: vSpan }, { type: "kern", size: 0.2 }, { type: "elem", elem: supSubGroup }], "firstBaseline", null, options);
-          } else {
-              vlist = _buildCommon2.default.makeVList([{ type: "elem", elem: supSubGroup }, { type: "kern", size: 0.2 }, { type: "elem", elem: vSpan }], "bottom", vSpan.depth + 0.2 + supSubGroup.height, options);
-          }
-      }
-
-      return (0, _buildCommon.makeSpan)(["mord", group.value.isOver ? "mover" : "munder"], [vlist], options);
-  };
-
-  groupTypes.accentUnder = function (group, options) {
-      // Treat under accents much like underlines.
-      var innerGroup = buildGroup(group.value.body, options);
-
-      var accentBody = _stretchy2.default.svgSpan(group, options);
-      var kern = /tilde/.test(group.value.label) ? 0.12 : 0;
-
-      // Generate the vlist, with the appropriate kerns
-      var vlist = _buildCommon2.default.makeVList([{ type: "elem", elem: accentBody }, { type: "kern", size: kern }, { type: "elem", elem: innerGroup }], "bottom", accentBody.height + kern, options);
-
-      vlist.children[0].children[0].children[0].classes.push("svg-align");
-
-      return (0, _buildCommon.makeSpan)(["mord", "accentunder"], [vlist], options);
-  };
-
-  groupTypes.enclose = function (group, options) {
-      // \cancel, \bcancel, \xcancel, \sout, \fbox
-      var inner = buildGroup(group.value.body, options);
-
-      var label = group.value.label.substr(1);
-      var scale = options.sizeMultiplier;
-      var img = void 0;
-      var pad = 0;
-      var imgShift = 0;
-
-      if (label === "sout") {
-          img = (0, _buildCommon.makeSpan)(["stretchy", "sout"]);
-          img.height = options.fontMetrics().defaultRuleThickness / scale;
-          imgShift = -0.5 * options.fontMetrics().xHeight;
-      } else {
-          // Add horizontal padding
-          inner.classes.push(label === "fbox" ? "boxpad" : "cancel-pad");
-
-          // Add vertical padding
-          var isCharBox = isCharacterBox(group.value.body);
-          // ref: LaTeX source2e: \fboxsep = 3pt;  \fboxrule = .4pt
-          // ref: cancel package: \advance\totalheight2\p@ % "+2"
-          pad = label === "fbox" ? 0.34 : isCharBox ? 0.2 : 0;
-          imgShift = inner.depth + pad;
-
-          img = _stretchy2.default.encloseSpan(inner, label, pad, options);
-      }
-
-      var vlist = _buildCommon2.default.makeVList([{ type: "elem", elem: inner, shift: 0 }, { type: "elem", elem: img, shift: imgShift }], "individualShift", null, options);
-
-      if (label !== "fbox") {
-          vlist.children[0].children[0].children[1].classes.push("svg-align");
-      }
-
-      if (/cancel/.test(label)) {
-          // cancel does not create horiz space for its line extension.
-          // That is, not when adjacent to a mord.
-          return (0, _buildCommon.makeSpan)(["mord", "cancel-lap"], [vlist], options);
-      } else {
-          return (0, _buildCommon.makeSpan)(["mord"], [vlist], options);
-      }
-  };
-
-  groupTypes.xArrow = function (group, options) {
-      var style = options.style;
-
-      // Build the argument groups in the appropriate style.
-      // Ref: amsmath.dtx:   \hbox{$\scriptstyle\mkern#3mu{#6}\mkern#4mu$}%
-
-      var newOptions = options.havingStyle(style.sup());
-      var upperGroup = buildGroup(group.value.body, newOptions, options);
-      upperGroup.classes.push("x-arrow-pad");
-
-      var lowerGroup = void 0;
-      if (group.value.below) {
-          // Build the lower group
-          newOptions = options.havingStyle(style.sub());
-          lowerGroup = buildGroup(group.value.below, newOptions, options);
-          lowerGroup.classes.push("x-arrow-pad");
-      }
-
-      var arrowBody = _stretchy2.default.svgSpan(group, options);
-
-      var arrowShift = -options.fontMetrics().axisHeight + arrowBody.depth;
-      var upperShift = -options.fontMetrics().axisHeight - arrowBody.height - 0.111; // 2 mu. Ref: amsmath.dtx: #7\if0#2\else\mkern#2mu\fi
-
-      // Generate the vlist
-      var vlist = void 0;
-      if (group.value.below) {
-          var lowerShift = -options.fontMetrics().axisHeight + lowerGroup.height + arrowBody.height + 0.111;
-          vlist = _buildCommon2.default.makeVList([{ type: "elem", elem: upperGroup, shift: upperShift }, { type: "elem", elem: arrowBody, shift: arrowShift }, { type: "elem", elem: lowerGroup, shift: lowerShift }], "individualShift", null, options);
-      } else {
-          vlist = _buildCommon2.default.makeVList([{ type: "elem", elem: upperGroup, shift: upperShift }, { type: "elem", elem: arrowBody, shift: arrowShift }], "individualShift", null, options);
-      }
-
-      vlist.children[0].children[0].children[1].classes.push("svg-align");
-
-      return (0, _buildCommon.makeSpan)(["mrel", "x-arrow"], [vlist], options);
-  };
-
-  groupTypes.phantom = function (group, options) {
-      var elements = buildExpression(group.value.value, options.withPhantom(), false);
-
-      // \phantom isn't supposed to affect the elements it contains.
-      // See "color" for more details.
-      return new _buildCommon2.default.makeFragment(elements);
-  };
-
-  groupTypes.mclass = function (group, options) {
-      var elements = buildExpression(group.value.value, options, true);
-
-      return (0, _buildCommon.makeSpan)([group.value.mclass], elements, options);
-  };
-
-  /**
-   * buildGroup is the function that takes a group and calls the correct groupType
-   * function for it. It also handles the interaction of size and style changes
-   * between parents and children.
-   */
-  var buildGroup = function buildGroup(group, options, baseOptions) {
-      if (!group) {
-          return (0, _buildCommon.makeSpan)();
-      }
-
-      if (groupTypes[group.type]) {
-          // Call the groupTypes function
-          var groupNode = groupTypes[group.type](group, options);
-
-          // If the size changed between the parent and the current group, account
-          // for that size difference.
-          if (baseOptions && options.size !== baseOptions.size) {
-              groupNode = (0, _buildCommon.makeSpan)(options.sizingClasses(baseOptions), [groupNode], options);
-
-              var multiplier = options.sizeMultiplier / baseOptions.sizeMultiplier;
-
-              groupNode.height *= multiplier;
-              groupNode.depth *= multiplier;
-          }
-
-          return groupNode;
-      } else {
-          throw new _ParseError2.default("Got group of unknown type: '" + group.type + "'");
-      }
-  };
-
-  /**
-   * Take an entire parse tree, and build it into an appropriate set of HTML
-   * nodes.
-   */
-  var buildHTML = function buildHTML(tree, options) {
-      // buildExpression is destructive, so we need to make a clone
-      // of the incoming tree so that it isn't accidentally changed
-      tree = JSON.parse((0, _stringify2.default)(tree));
-
-      // Build the expression contained in the tree
-      var expression = buildExpression(tree, options, true);
-      var body = (0, _buildCommon.makeSpan)(["base"], expression, options);
-
-      // Add struts, which ensure that the top of the HTML element falls at the
-      // height of the expression, and the bottom of the HTML element falls at the
-      // depth of the expression.
-      var topStrut = (0, _buildCommon.makeSpan)(["strut"]);
-      var bottomStrut = (0, _buildCommon.makeSpan)(["strut", "bottom"]);
-
-      topStrut.style.height = body.height + "em";
-      bottomStrut.style.height = body.height + body.depth + "em";
-      // We'd like to use `vertical-align: top` but in IE 9 this lowers the
-      // baseline of the box to the bottom of this strut (instead staying in the
-      // normal place) so we use an absolute value for vertical-align instead
-      bottomStrut.style.verticalAlign = -body.depth + "em";
-
-      // Wrap the struts and body together
-      var htmlNode = (0, _buildCommon.makeSpan)(["katex-html"], [topStrut, bottomStrut, body]);
-
-      htmlNode.setAttribute("aria-hidden", "true");
-
-      return htmlNode;
-  };
-
-  module.exports = buildHTML;
-
-  },{"./ParseError":29,"./Style":33,"./buildCommon":34,"./delimiter":38,"./domTree":39,"./stretchy":47,"./units":50,"./utils":51,"babel-runtime/core-js/json/stringify":2}],36:[function(require,module,exports){
-
-  var _buildCommon = require("./buildCommon");
-
-  var _buildCommon2 = _interopRequireDefault(_buildCommon);
-
-  var _fontMetrics = require("./fontMetrics");
-
-  var _fontMetrics2 = _interopRequireDefault(_fontMetrics);
-
-  var _mathMLTree = require("./mathMLTree");
-
-  var _mathMLTree2 = _interopRequireDefault(_mathMLTree);
-
-  var _ParseError = require("./ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  var _Style = require("./Style");
-
-  var _Style2 = _interopRequireDefault(_Style);
-
-  var _symbols = require("./symbols");
-
-  var _symbols2 = _interopRequireDefault(_symbols);
-
-  var _utils = require("./utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  var _stretchy = require("./stretchy");
-
-  var _stretchy2 = _interopRequireDefault(_stretchy);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * Takes a symbol and converts it into a MathML text node after performing
-   * optional replacement from symbols.js.
-   */
-  /**
-   * This file converts a parse tree into a cooresponding MathML tree. The main
-   * entry point is the `buildMathML` function, which takes a parse tree from the
-   * parser.
-   */
-
-  var makeText = function makeText(text, mode) {
-      if (_symbols2.default[mode][text] && _symbols2.default[mode][text].replace) {
-          text = _symbols2.default[mode][text].replace;
-      }
-
-      return new _mathMLTree2.default.TextNode(text);
-  };
-
-  /**
-   * Returns the math variant as a string or null if none is required.
-   */
-  var getVariant = function getVariant(group, options) {
-      var font = options.font;
-      if (!font) {
-          return null;
-      }
-
-      var mode = group.mode;
-      if (font === "mathit") {
-          return "italic";
-      }
-
-      var value = group.value;
-      if (_utils2.default.contains(["\\imath", "\\jmath"], value)) {
-          return null;
-      }
-
-      if (_symbols2.default[mode][value] && _symbols2.default[mode][value].replace) {
-          value = _symbols2.default[mode][value].replace;
-      }
-
-      var fontName = _buildCommon.fontMap[font].fontName;
-      if (_fontMetrics2.default.getCharacterMetrics(value, fontName)) {
-          return _buildCommon.fontMap[options.font].variant;
-      }
-
-      return null;
-  };
-
-  /**
-   * Functions for handling the different types of groups found in the parse
-   * tree. Each function should take a parse group and return a MathML node.
-   */
-  var groupTypes = {};
-
-  var defaultVariant = {
-      "mi": "italic",
-      "mn": "normal",
-      "mtext": "normal"
-  };
-
-  groupTypes.mathord = function (group, options) {
-      var node = new _mathMLTree2.default.MathNode("mi", [makeText(group.value, group.mode)]);
-
-      var variant = getVariant(group, options) || "italic";
-      if (variant !== defaultVariant[node.type]) {
-          node.setAttribute("mathvariant", variant);
-      }
-      return node;
-  };
-
-  groupTypes.textord = function (group, options) {
-      var text = makeText(group.value, group.mode);
-
-      var variant = getVariant(group, options) || "normal";
-
-      var node = void 0;
-      if (group.mode === 'text') {
-          node = new _mathMLTree2.default.MathNode("mtext", [text]);
-      } else if (/[0-9]/.test(group.value)) {
-          // TODO(kevinb) merge adjacent <mn> nodes
-          // do it as a post processing step
-          node = new _mathMLTree2.default.MathNode("mn", [text]);
-      } else if (group.value === "\\prime") {
-          node = new _mathMLTree2.default.MathNode("mo", [text]);
-      } else {
-          node = new _mathMLTree2.default.MathNode("mi", [text]);
-      }
-      if (variant !== defaultVariant[node.type]) {
-          node.setAttribute("mathvariant", variant);
-      }
-
-      return node;
-  };
-
-  groupTypes.bin = function (group) {
-      var node = new _mathMLTree2.default.MathNode("mo", [makeText(group.value, group.mode)]);
-
-      return node;
-  };
-
-  groupTypes.rel = function (group) {
-      var node = new _mathMLTree2.default.MathNode("mo", [makeText(group.value, group.mode)]);
-
-      return node;
-  };
-
-  groupTypes.open = function (group) {
-      var node = new _mathMLTree2.default.MathNode("mo", [makeText(group.value, group.mode)]);
-
-      return node;
-  };
-
-  groupTypes.close = function (group) {
-      var node = new _mathMLTree2.default.MathNode("mo", [makeText(group.value, group.mode)]);
-
-      return node;
-  };
-
-  groupTypes.inner = function (group) {
-      var node = new _mathMLTree2.default.MathNode("mo", [makeText(group.value, group.mode)]);
-
-      return node;
-  };
-
-  groupTypes.punct = function (group) {
-      var node = new _mathMLTree2.default.MathNode("mo", [makeText(group.value, group.mode)]);
-
-      node.setAttribute("separator", "true");
-
-      return node;
-  };
-
-  groupTypes.ordgroup = function (group, options) {
-      var inner = buildExpression(group.value, options);
-
-      var node = new _mathMLTree2.default.MathNode("mrow", inner);
-
-      return node;
-  };
-
-  groupTypes.text = function (group, options) {
-      var body = group.value.body;
-
-      // Convert each element of the body into MathML, and combine consecutive
-      // <mtext> outputs into a single <mtext> tag.  In this way, we don't
-      // nest non-text items (e.g., $nested-math$) within an <mtext>.
-      var inner = [];
-      var currentText = null;
-      for (var i = 0; i < body.length; i++) {
-          var _group = buildGroup(body[i], options);
-          if (_group.type === 'mtext' && currentText != null) {
-              Array.prototype.push.apply(currentText.children, _group.children);
-          } else {
-              inner.push(_group);
-              if (_group.type === 'mtext') {
-                  currentText = _group;
-              }
-          }
-      }
-
-      // If there is a single tag in the end (presumably <mtext>),
-      // just return it.  Otherwise, wrap them in an <mrow>.
-      if (inner.length === 1) {
-          return inner[0];
-      } else {
-          return new _mathMLTree2.default.MathNode("mrow", inner);
-      }
-  };
-
-  groupTypes.color = function (group, options) {
-      var inner = buildExpression(group.value.value, options);
-
-      var node = new _mathMLTree2.default.MathNode("mstyle", inner);
-
-      node.setAttribute("mathcolor", group.value.color);
-
-      return node;
-  };
-
-  groupTypes.supsub = function (group, options) {
-      // Is the inner group a relevant horizonal brace?
-      var isBrace = false;
-      var isOver = void 0;
-      var isSup = void 0;
-      if (group.value.base) {
-          if (group.value.base.value.type === "horizBrace") {
-              isSup = group.value.sup ? true : false;
-              if (isSup === group.value.base.value.isOver) {
-                  isBrace = true;
-                  isOver = group.value.base.value.isOver;
-              }
-          }
-      }
-
-      var removeUnnecessaryRow = true;
-      var children = [buildGroup(group.value.base, options, removeUnnecessaryRow)];
-
-      if (group.value.sub) {
-          children.push(buildGroup(group.value.sub, options, removeUnnecessaryRow));
-      }
-
-      if (group.value.sup) {
-          children.push(buildGroup(group.value.sup, options, removeUnnecessaryRow));
-      }
-
-      var nodeType = void 0;
-      if (isBrace) {
-          nodeType = isOver ? "mover" : "munder";
-      } else if (!group.value.sub) {
-          nodeType = "msup";
-      } else if (!group.value.sup) {
-          nodeType = "msub";
-      } else {
-          var base = group.value.base;
-          if (base && base.value.limits && options.style === _Style2.default.DISPLAY) {
-              nodeType = "munderover";
-          } else {
-              nodeType = "msubsup";
-          }
-      }
-
-      var node = new _mathMLTree2.default.MathNode(nodeType, children);
-
-      return node;
-  };
-
-  groupTypes.genfrac = function (group, options) {
-      var node = new _mathMLTree2.default.MathNode("mfrac", [buildGroup(group.value.numer, options), buildGroup(group.value.denom, options)]);
-
-      if (!group.value.hasBarLine) {
-          node.setAttribute("linethickness", "0px");
-      }
-
-      if (group.value.leftDelim != null || group.value.rightDelim != null) {
-          var withDelims = [];
-
-          if (group.value.leftDelim != null) {
-              var leftOp = new _mathMLTree2.default.MathNode("mo", [new _mathMLTree2.default.TextNode(group.value.leftDelim)]);
-
-              leftOp.setAttribute("fence", "true");
-
-              withDelims.push(leftOp);
-          }
-
-          withDelims.push(node);
-
-          if (group.value.rightDelim != null) {
-              var rightOp = new _mathMLTree2.default.MathNode("mo", [new _mathMLTree2.default.TextNode(group.value.rightDelim)]);
-
-              rightOp.setAttribute("fence", "true");
-
-              withDelims.push(rightOp);
-          }
-
-          var outerNode = new _mathMLTree2.default.MathNode("mrow", withDelims);
-
-          return outerNode;
-      }
-
-      return node;
-  };
-
-  groupTypes.array = function (group, options) {
-      return new _mathMLTree2.default.MathNode("mtable", group.value.body.map(function (row) {
-          return new _mathMLTree2.default.MathNode("mtr", row.map(function (cell) {
-              return new _mathMLTree2.default.MathNode("mtd", [buildGroup(cell, options)]);
-          }));
-      }));
-  };
-
-  groupTypes.sqrt = function (group, options) {
-      var node = void 0;
-      if (group.value.index) {
-          node = new _mathMLTree2.default.MathNode("mroot", [buildGroup(group.value.body, options), buildGroup(group.value.index, options)]);
-      } else {
-          node = new _mathMLTree2.default.MathNode("msqrt", [buildGroup(group.value.body, options)]);
-      }
-
-      return node;
-  };
-
-  groupTypes.leftright = function (group, options) {
-      var inner = buildExpression(group.value.body, options);
-
-      if (group.value.left !== ".") {
-          var leftNode = new _mathMLTree2.default.MathNode("mo", [makeText(group.value.left, group.mode)]);
-
-          leftNode.setAttribute("fence", "true");
-
-          inner.unshift(leftNode);
-      }
-
-      if (group.value.right !== ".") {
-          var rightNode = new _mathMLTree2.default.MathNode("mo", [makeText(group.value.right, group.mode)]);
-
-          rightNode.setAttribute("fence", "true");
-
-          inner.push(rightNode);
-      }
-
-      var outerNode = new _mathMLTree2.default.MathNode("mrow", inner);
-
-      return outerNode;
-  };
-
-  groupTypes.middle = function (group, options) {
-      var middleNode = new _mathMLTree2.default.MathNode("mo", [makeText(group.value.middle, group.mode)]);
-      middleNode.setAttribute("fence", "true");
-      return middleNode;
-  };
-
-  groupTypes.accent = function (group, options) {
-      var accentNode = void 0;
-      if (group.value.isStretchy) {
-          accentNode = _stretchy2.default.mathMLnode(group.value.label);
-      } else {
-          accentNode = new _mathMLTree2.default.MathNode("mo", [makeText(group.value.label, group.mode)]);
-      }
-
-      var node = new _mathMLTree2.default.MathNode("mover", [buildGroup(group.value.base, options), accentNode]);
-
-      node.setAttribute("accent", "true");
-
-      return node;
-  };
-
-  groupTypes.spacing = function (group) {
-      var node = void 0;
-
-      if (group.value === "\\ " || group.value === "\\space" || group.value === " " || group.value === "~") {
-          node = new _mathMLTree2.default.MathNode("mtext", [new _mathMLTree2.default.TextNode("\xA0")]);
-      } else {
-          node = new _mathMLTree2.default.MathNode("mspace");
-
-          node.setAttribute("width", _buildCommon2.default.spacingFunctions[group.value].size);
-      }
-
-      return node;
-  };
-
-  groupTypes.op = function (group, options) {
-      var node = void 0;
-
-      // TODO(emily): handle big operators using the `largeop` attribute
-
-      if (group.value.symbol) {
-          // This is a symbol. Just add the symbol.
-          node = new _mathMLTree2.default.MathNode("mo", [makeText(group.value.body, group.mode)]);
-      } else if (group.value.value) {
-          // This is an operator with children. Add them.
-          node = new _mathMLTree2.default.MathNode("mo", buildExpression(group.value.value, options));
-      } else {
-          // This is a text operator. Add all of the characters from the
-          // operator's name.
-          // TODO(emily): Add a space in the middle of some of these
-          // operators, like \limsup.
-          node = new _mathMLTree2.default.MathNode("mi", [new _mathMLTree2.default.TextNode(group.value.body.slice(1))]);
-      }
-
-      return node;
-  };
-
-  groupTypes.mod = function (group, options) {
-      var inner = [];
-
-      if (group.value.modType === "pod" || group.value.modType === "pmod") {
-          inner.push(new _mathMLTree2.default.MathNode("mo", [makeText("(", group.mode)]));
-      }
-      if (group.value.modType !== "pod") {
-          inner.push(new _mathMLTree2.default.MathNode("mo", [makeText("mod", group.mode)]));
-      }
-      if (group.value.value) {
-          var space = new _mathMLTree2.default.MathNode("mspace");
-          space.setAttribute("width", "0.333333em");
-          inner.push(space);
-          inner = inner.concat(buildExpression(group.value.value, options));
-      }
-      if (group.value.modType === "pod" || group.value.modType === "pmod") {
-          inner.push(new _mathMLTree2.default.MathNode("mo", [makeText(")", group.mode)]));
-      }
-
-      return new _mathMLTree2.default.MathNode("mo", inner);
-  };
-
-  groupTypes.katex = function (group) {
-      var node = new _mathMLTree2.default.MathNode("mtext", [new _mathMLTree2.default.TextNode("KaTeX")]);
-
-      return node;
-  };
-
-  groupTypes.font = function (group, options) {
-      var font = group.value.font;
-      return buildGroup(group.value.body, options.withFont(font));
-  };
-
-  groupTypes.delimsizing = function (group) {
-      var children = [];
-
-      if (group.value.value !== ".") {
-          children.push(makeText(group.value.value, group.mode));
-      }
-
-      var node = new _mathMLTree2.default.MathNode("mo", children);
-
-      if (group.value.mclass === "mopen" || group.value.mclass === "mclose") {
-          // Only some of the delimsizing functions act as fences, and they
-          // return "mopen" or "mclose" mclass.
-          node.setAttribute("fence", "true");
-      } else {
-          // Explicitly disable fencing if it's not a fence, to override the
-          // defaults.
-          node.setAttribute("fence", "false");
-      }
-
-      return node;
-  };
-
-  groupTypes.styling = function (group, options) {
-      // Figure out what style we're changing to.
-      // TODO(kevinb): dedupe this with buildHTML.js
-      // This will be easier of handling of styling nodes is in the same file.
-      var styleMap = {
-          "display": _Style2.default.DISPLAY,
-          "text": _Style2.default.TEXT,
-          "script": _Style2.default.SCRIPT,
-          "scriptscript": _Style2.default.SCRIPTSCRIPT
-      };
-
-      var newStyle = styleMap[group.value.style];
-      var newOptions = options.havingStyle(newStyle);
-
-      var inner = buildExpression(group.value.value, newOptions);
-
-      var node = new _mathMLTree2.default.MathNode("mstyle", inner);
-
-      var styleAttributes = {
-          "display": ["0", "true"],
-          "text": ["0", "false"],
-          "script": ["1", "false"],
-          "scriptscript": ["2", "false"]
-      };
-
-      var attr = styleAttributes[group.value.style];
-
-      node.setAttribute("scriptlevel", attr[0]);
-      node.setAttribute("displaystyle", attr[1]);
-
-      return node;
-  };
-
-  groupTypes.sizing = function (group, options) {
-      var newOptions = options.havingSize(group.value.size);
-      var inner = buildExpression(group.value.value, newOptions);
-
-      var node = new _mathMLTree2.default.MathNode("mstyle", inner);
-
-      // TODO(emily): This doesn't produce the correct size for nested size
-      // changes, because we don't keep state of what style we're currently
-      // in, so we can't reset the size to normal before changing it.  Now
-      // that we're passing an options parameter we should be able to fix
-      // this.
-      node.setAttribute("mathsize", newOptions.sizeMultiplier + "em");
-
-      return node;
-  };
-
-  groupTypes.overline = function (group, options) {
-      var operator = new _mathMLTree2.default.MathNode("mo", [new _mathMLTree2.default.TextNode("\u203E")]);
-      operator.setAttribute("stretchy", "true");
-
-      var node = new _mathMLTree2.default.MathNode("mover", [buildGroup(group.value.body, options), operator]);
-      node.setAttribute("accent", "true");
-
-      return node;
-  };
-
-  groupTypes.underline = function (group, options) {
-      var operator = new _mathMLTree2.default.MathNode("mo", [new _mathMLTree2.default.TextNode("\u203E")]);
-      operator.setAttribute("stretchy", "true");
-
-      var node = new _mathMLTree2.default.MathNode("munder", [buildGroup(group.value.body, options), operator]);
-      node.setAttribute("accentunder", "true");
-
-      return node;
-  };
-
-  groupTypes.accentUnder = function (group, options) {
-      var accentNode = _stretchy2.default.mathMLnode(group.value.label);
-      var node = new _mathMLTree2.default.MathNode("munder", [buildGroup(group.value.body, options), accentNode]);
-      node.setAttribute("accentunder", "true");
-      return node;
-  };
-
-  groupTypes.enclose = function (group, options) {
-      var node = new _mathMLTree2.default.MathNode("menclose", [buildGroup(group.value.body, options)]);
-      var notation = "";
-      switch (group.value.label) {
-          case "\\bcancel":
-              notation = "downdiagonalstrike";
-              break;
-          case "\\sout":
-              notation = "horizontalstrike";
-              break;
-          case "\\fbox":
-              notation = "box";
-              break;
-          default:
-              notation = "updiagonalstrike";
-      }
-      node.setAttribute("notation", notation);
-      return node;
-  };
-
-  groupTypes.horizBrace = function (group, options) {
-      var accentNode = _stretchy2.default.mathMLnode(group.value.label);
-      return new _mathMLTree2.default.MathNode(group.value.isOver ? "mover" : "munder", [buildGroup(group.value.base, options), accentNode]);
-  };
-
-  groupTypes.xArrow = function (group, options) {
-      var arrowNode = _stretchy2.default.mathMLnode(group.value.label);
-      var node = void 0;
-      var lowerNode = void 0;
-
-      if (group.value.body) {
-          var upperNode = buildGroup(group.value.body, options);
-          if (group.value.below) {
-              lowerNode = buildGroup(group.value.below, options);
-              node = new _mathMLTree2.default.MathNode("munderover", [arrowNode, lowerNode, upperNode]);
-          } else {
-              node = new _mathMLTree2.default.MathNode("mover", [arrowNode, upperNode]);
-          }
-      } else if (group.value.below) {
-          lowerNode = buildGroup(group.value.below, options);
-          node = new _mathMLTree2.default.MathNode("munder", [arrowNode, lowerNode]);
-      } else {
-          node = new _mathMLTree2.default.MathNode("mover", [arrowNode]);
-      }
-      return node;
-  };
-
-  groupTypes.rule = function (group) {
-      // TODO(emily): Figure out if there's an actual way to draw black boxes
-      // in MathML.
-      var node = new _mathMLTree2.default.MathNode("mrow");
-
-      return node;
-  };
-
-  groupTypes.kern = function (group) {
-      // TODO(kevin): Figure out if there's a way to add space in MathML
-      var node = new _mathMLTree2.default.MathNode("mrow");
-
-      return node;
-  };
-
-  groupTypes.llap = function (group, options) {
-      var node = new _mathMLTree2.default.MathNode("mpadded", [buildGroup(group.value.body, options)]);
-
-      node.setAttribute("lspace", "-1width");
-      node.setAttribute("width", "0px");
-
-      return node;
-  };
-
-  groupTypes.rlap = function (group, options) {
-      var node = new _mathMLTree2.default.MathNode("mpadded", [buildGroup(group.value.body, options)]);
-
-      node.setAttribute("width", "0px");
-
-      return node;
-  };
-
-  groupTypes.phantom = function (group, options) {
-      var inner = buildExpression(group.value.value, options);
-      return new _mathMLTree2.default.MathNode("mphantom", inner);
-  };
-
-  groupTypes.mclass = function (group, options) {
-      var inner = buildExpression(group.value.value, options);
-      return new _mathMLTree2.default.MathNode("mstyle", inner);
-  };
-
-  /**
-   * Takes a list of nodes, builds them, and returns a list of the generated
-   * MathML nodes. A little simpler than the HTML version because we don't do any
-   * previous-node handling.
-   */
-  var buildExpression = function buildExpression(expression, options) {
-      var groups = [];
-      for (var i = 0; i < expression.length; i++) {
-          var group = expression[i];
-          groups.push(buildGroup(group, options));
-      }
-
-      // TODO(kevinb): combine \\not with mrels and mords
-
-      return groups;
-  };
-
-  /**
-   * Takes a group from the parser and calls the appropriate groupTypes function
-   * on it to produce a MathML node.
-   */
-  // TODO(kevinb): determine if removeUnnecessaryRow should always be true
-  var buildGroup = function buildGroup(group, options) {
-      var removeUnnecessaryRow = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
-
-      if (!group) {
-          return new _mathMLTree2.default.MathNode("mrow");
-      }
-
-      if (groupTypes[group.type]) {
-          // Call the groupTypes function
-          var result = groupTypes[group.type](group, options);
-          if (removeUnnecessaryRow) {
-              if (result.type === "mrow" && result.children.length === 1) {
-                  return result.children[0];
-              }
-          }
-          return result;
-      } else {
-          throw new _ParseError2.default("Got group of unknown type: '" + group.type + "'");
-      }
-  };
-
-  /**
-   * Takes a full parse tree and settings and builds a MathML representation of
-   * it. In particular, we put the elements from building the parse tree into a
-   * <semantics> tag so we can also include that TeX source as an annotation.
-   *
-   * Note that we actually return a domTree element with a `<math>` inside it so
-   * we can do appropriate styling.
-   */
-  var buildMathML = function buildMathML(tree, texExpression, options) {
-      var expression = buildExpression(tree, options);
-
-      // Wrap up the expression in an mrow so it is presented in the semantics
-      // tag correctly.
-      var wrapper = new _mathMLTree2.default.MathNode("mrow", expression);
-
-      // Build a TeX annotation of the source
-      var annotation = new _mathMLTree2.default.MathNode("annotation", [new _mathMLTree2.default.TextNode(texExpression)]);
-
-      annotation.setAttribute("encoding", "application/x-tex");
-
-      var semantics = new _mathMLTree2.default.MathNode("semantics", [wrapper, annotation]);
-
-      var math = new _mathMLTree2.default.MathNode("math", [semantics]);
-
-      // You can't style <math> nodes, so we wrap the node in a span.
-      return (0, _buildCommon.makeSpan)(["katex-mathml"], [math]);
-  };
-
-  module.exports = buildMathML;
-
-  },{"./ParseError":29,"./Style":33,"./buildCommon":34,"./fontMetrics":41,"./mathMLTree":45,"./stretchy":47,"./symbols":48,"./utils":51}],37:[function(require,module,exports){
-
-  var _buildHTML = require("./buildHTML");
-
-  var _buildHTML2 = _interopRequireDefault(_buildHTML);
-
-  var _buildMathML = require("./buildMathML");
-
-  var _buildMathML2 = _interopRequireDefault(_buildMathML);
-
-  var _buildCommon = require("./buildCommon");
-
-  var _Options = require("./Options");
-
-  var _Options2 = _interopRequireDefault(_Options);
-
-  var _Settings = require("./Settings");
-
-  var _Settings2 = _interopRequireDefault(_Settings);
-
-  var _Style = require("./Style");
-
-  var _Style2 = _interopRequireDefault(_Style);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  var buildTree = function buildTree(tree, expression, settings) {
-      settings = settings || new _Settings2.default({});
-
-      var startStyle = _Style2.default.TEXT;
-      if (settings.displayMode) {
-          startStyle = _Style2.default.DISPLAY;
-      }
-
-      // Setup the default options
-      var options = new _Options2.default({
-          style: startStyle
-      });
-
-      // `buildHTML` sometimes messes with the parse tree (like turning bins ->
-      // ords), so we build the MathML version first.
-      var mathMLNode = (0, _buildMathML2.default)(tree, expression, options);
-      var htmlNode = (0, _buildHTML2.default)(tree, options);
-
-      var katexNode = (0, _buildCommon.makeSpan)(["katex"], [mathMLNode, htmlNode]);
-
-      if (settings.displayMode) {
-          return (0, _buildCommon.makeSpan)(["katex-display"], [katexNode]);
-      } else {
-          return katexNode;
-      }
-  };
-
-  module.exports = buildTree;
-
-  },{"./Options":28,"./Settings":32,"./Style":33,"./buildCommon":34,"./buildHTML":35,"./buildMathML":36}],38:[function(require,module,exports){
-
-  var _ParseError = require("./ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  var _Style = require("./Style");
-
-  var _Style2 = _interopRequireDefault(_Style);
-
-  var _buildCommon = require("./buildCommon");
-
-  var _buildCommon2 = _interopRequireDefault(_buildCommon);
-
-  var _fontMetrics = require("./fontMetrics");
-
-  var _fontMetrics2 = _interopRequireDefault(_fontMetrics);
-
-  var _symbols = require("./symbols");
-
-  var _symbols2 = _interopRequireDefault(_symbols);
-
-  var _utils = require("./utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * Get the metrics for a given symbol and font, after transformation (i.e.
-   * after following replacement from symbols.js)
-   */
-  /**
-   * This file deals with creating delimiters of various sizes. The TeXbook
-   * discusses these routines on page 441-442, in the "Another subroutine sets box
-   * x to a specified variable delimiter" paragraph.
-   *
-   * There are three main routines here. `makeSmallDelim` makes a delimiter in the
-   * normal font, but in either text, script, or scriptscript style.
-   * `makeLargeDelim` makes a delimiter in textstyle, but in one of the Size1,
-   * Size2, Size3, or Size4 fonts. `makeStackedDelim` makes a delimiter out of
-   * smaller pieces that are stacked on top of one another.
-   *
-   * The functions take a parameter `center`, which determines if the delimiter
-   * should be centered around the axis.
-   *
-   * Then, there are three exposed functions. `sizedDelim` makes a delimiter in
-   * one of the given sizes. This is used for things like `\bigl`.
-   * `customSizedDelim` makes a delimiter with a given total height+depth. It is
-   * called in places like `\sqrt`. `leftRightDelim` makes an appropriate
-   * delimiter which surrounds an expression of a given height an depth. It is
-   * used in `\left` and `\right`.
-   */
-
-  var getMetrics = function getMetrics(symbol, font) {
-      if (_symbols2.default.math[symbol] && _symbols2.default.math[symbol].replace) {
-          return _fontMetrics2.default.getCharacterMetrics(_symbols2.default.math[symbol].replace, font);
-      } else {
-          return _fontMetrics2.default.getCharacterMetrics(symbol, font);
-      }
-  };
-
-  /**
-   * Puts a delimiter span in a given style, and adds appropriate height, depth,
-   * and maxFontSizes.
-   */
-  var styleWrap = function styleWrap(delim, toStyle, options, classes) {
-      var newOptions = options.havingBaseStyle(toStyle);
-
-      var span = (0, _buildCommon.makeSpan)((classes || []).concat(newOptions.sizingClasses(options)), [delim], options);
-
-      span.delimSizeMultiplier = newOptions.sizeMultiplier / options.sizeMultiplier;
-      span.height *= span.delimSizeMultiplier;
-      span.depth *= span.delimSizeMultiplier;
-      span.maxFontSize = newOptions.sizeMultiplier;
-
-      return span;
-  };
-
-  var centerSpan = function centerSpan(span, options, style) {
-      var newOptions = options.havingBaseStyle(style);
-      var shift = (1 - options.sizeMultiplier / newOptions.sizeMultiplier) * options.fontMetrics().axisHeight;
-
-      span.classes.push("delimcenter");
-      span.style.top = shift + "em";
-      span.height -= shift;
-      span.depth += shift;
-  };
-
-  /**
-   * Makes a small delimiter. This is a delimiter that comes in the Main-Regular
-   * font, but is restyled to either be in textstyle, scriptstyle, or
-   * scriptscriptstyle.
-   */
-  var makeSmallDelim = function makeSmallDelim(delim, style, center, options, mode, classes) {
-      var text = _buildCommon2.default.makeSymbol(delim, "Main-Regular", mode, options);
-      var span = styleWrap(text, style, options, classes);
-      if (center) {
-          centerSpan(span, options, style);
-      }
-      return span;
-  };
-
-  /**
-   * Builds a symbol in the given font size (note size is an integer)
-   */
-  var mathrmSize = function mathrmSize(value, size, mode, options) {
-      return _buildCommon2.default.makeSymbol(value, "Size" + size + "-Regular", mode, options);
-  };
-
-  /**
-   * Makes a large delimiter. This is a delimiter that comes in the Size1, Size2,
-   * Size3, or Size4 fonts. It is always rendered in textstyle.
-   */
-  var makeLargeDelim = function makeLargeDelim(delim, size, center, options, mode, classes) {
-      var inner = mathrmSize(delim, size, mode, options);
-      var span = styleWrap((0, _buildCommon.makeSpan)(["delimsizing", "size" + size], [inner], options), _Style2.default.TEXT, options, classes);
-      if (center) {
-          centerSpan(span, options, _Style2.default.TEXT);
-      }
-      return span;
-  };
-
-  /**
-   * Make an inner span with the given offset and in the given font. This is used
-   * in `makeStackedDelim` to make the stacking pieces for the delimiter.
-   */
-  var makeInner = function makeInner(symbol, font, mode) {
-      var sizeClass = void 0;
-      // Apply the correct CSS class to choose the right font.
-      if (font === "Size1-Regular") {
-          sizeClass = "delim-size1";
-      } else if (font === "Size4-Regular") {
-          sizeClass = "delim-size4";
-      }
-
-      var inner = (0, _buildCommon.makeSpan)(["delimsizinginner", sizeClass], [(0, _buildCommon.makeSpan)([], [_buildCommon2.default.makeSymbol(symbol, font, mode)])]);
-
-      // Since this will be passed into `makeVList` in the end, wrap the element
-      // in the appropriate tag that VList uses.
-      return { type: "elem", elem: inner };
-  };
-
-  /**
-   * Make a stacked delimiter out of a given delimiter, with the total height at
-   * least `heightTotal`. This routine is mentioned on page 442 of the TeXbook.
-   */
-  var makeStackedDelim = function makeStackedDelim(delim, heightTotal, center, options, mode, classes) {
-      // There are four parts, the top, an optional middle, a repeated part, and a
-      // bottom.
-      var top = void 0;
-      var middle = void 0;
-      var repeat = void 0;
-      var bottom = void 0;
-      top = repeat = bottom = delim;
-      middle = null;
-      // Also keep track of what font the delimiters are in
-      var font = "Size1-Regular";
-
-      // We set the parts and font based on the symbol. Note that we use
-      // '\u23d0' instead of '|' and '\u2016' instead of '\\|' for the
-      // repeats of the arrows
-      if (delim === "\\uparrow") {
-          repeat = bottom = "\u23D0";
-      } else if (delim === "\\Uparrow") {
-          repeat = bottom = "\u2016";
-      } else if (delim === "\\downarrow") {
-          top = repeat = "\u23D0";
-      } else if (delim === "\\Downarrow") {
-          top = repeat = "\u2016";
-      } else if (delim === "\\updownarrow") {
-          top = "\\uparrow";
-          repeat = "\u23D0";
-          bottom = "\\downarrow";
-      } else if (delim === "\\Updownarrow") {
-          top = "\\Uparrow";
-          repeat = "\u2016";
-          bottom = "\\Downarrow";
-      } else if (delim === "[" || delim === "\\lbrack") {
-          top = "\u23A1";
-          repeat = "\u23A2";
-          bottom = "\u23A3";
-          font = "Size4-Regular";
-      } else if (delim === "]" || delim === "\\rbrack") {
-          top = "\u23A4";
-          repeat = "\u23A5";
-          bottom = "\u23A6";
-          font = "Size4-Regular";
-      } else if (delim === "\\lfloor") {
-          repeat = top = "\u23A2";
-          bottom = "\u23A3";
-          font = "Size4-Regular";
-      } else if (delim === "\\lceil") {
-          top = "\u23A1";
-          repeat = bottom = "\u23A2";
-          font = "Size4-Regular";
-      } else if (delim === "\\rfloor") {
-          repeat = top = "\u23A5";
-          bottom = "\u23A6";
-          font = "Size4-Regular";
-      } else if (delim === "\\rceil") {
-          top = "\u23A4";
-          repeat = bottom = "\u23A5";
-          font = "Size4-Regular";
-      } else if (delim === "(") {
-          top = "\u239B";
-          repeat = "\u239C";
-          bottom = "\u239D";
-          font = "Size4-Regular";
-      } else if (delim === ")") {
-          top = "\u239E";
-          repeat = "\u239F";
-          bottom = "\u23A0";
-          font = "Size4-Regular";
-      } else if (delim === "\\{" || delim === "\\lbrace") {
-          top = "\u23A7";
-          middle = "\u23A8";
-          bottom = "\u23A9";
-          repeat = "\u23AA";
-          font = "Size4-Regular";
-      } else if (delim === "\\}" || delim === "\\rbrace") {
-          top = "\u23AB";
-          middle = "\u23AC";
-          bottom = "\u23AD";
-          repeat = "\u23AA";
-          font = "Size4-Regular";
-      } else if (delim === "\\lgroup") {
-          top = "\u23A7";
-          bottom = "\u23A9";
-          repeat = "\u23AA";
-          font = "Size4-Regular";
-      } else if (delim === "\\rgroup") {
-          top = "\u23AB";
-          bottom = "\u23AD";
-          repeat = "\u23AA";
-          font = "Size4-Regular";
-      } else if (delim === "\\lmoustache") {
-          top = "\u23A7";
-          bottom = "\u23AD";
-          repeat = "\u23AA";
-          font = "Size4-Regular";
-      } else if (delim === "\\rmoustache") {
-          top = "\u23AB";
-          bottom = "\u23A9";
-          repeat = "\u23AA";
-          font = "Size4-Regular";
-      }
-
-      // Get the metrics of the four sections
-      var topMetrics = getMetrics(top, font);
-      var topHeightTotal = topMetrics.height + topMetrics.depth;
-      var repeatMetrics = getMetrics(repeat, font);
-      var repeatHeightTotal = repeatMetrics.height + repeatMetrics.depth;
-      var bottomMetrics = getMetrics(bottom, font);
-      var bottomHeightTotal = bottomMetrics.height + bottomMetrics.depth;
-      var middleHeightTotal = 0;
-      var middleFactor = 1;
-      if (middle !== null) {
-          var middleMetrics = getMetrics(middle, font);
-          middleHeightTotal = middleMetrics.height + middleMetrics.depth;
-          middleFactor = 2; // repeat symmetrically above and below middle
-      }
-
-      // Calcuate the minimal height that the delimiter can have.
-      // It is at least the size of the top, bottom, and optional middle combined.
-      var minHeight = topHeightTotal + bottomHeightTotal + middleHeightTotal;
-
-      // Compute the number of copies of the repeat symbol we will need
-      var repeatCount = Math.ceil((heightTotal - minHeight) / (middleFactor * repeatHeightTotal));
-
-      // Compute the total height of the delimiter including all the symbols
-      var realHeightTotal = minHeight + repeatCount * middleFactor * repeatHeightTotal;
-
-      // The center of the delimiter is placed at the center of the axis. Note
-      // that in this context, "center" means that the delimiter should be
-      // centered around the axis in the current style, while normally it is
-      // centered around the axis in textstyle.
-      var axisHeight = options.fontMetrics().axisHeight;
-      if (center) {
-          axisHeight *= options.sizeMultiplier;
-      }
-      // Calculate the depth
-      var depth = realHeightTotal / 2 - axisHeight;
-
-      // Now, we start building the pieces that will go into the vlist
-
-      // Keep a list of the inner pieces
-      var inners = [];
-
-      // Add the bottom symbol
-      inners.push(makeInner(bottom, font, mode));
-
-      if (middle === null) {
-          // Add that many symbols
-          for (var i = 0; i < repeatCount; i++) {
-              inners.push(makeInner(repeat, font, mode));
-          }
-      } else {
-          // When there is a middle bit, we need the middle part and two repeated
-          // sections
-          for (var _i = 0; _i < repeatCount; _i++) {
-              inners.push(makeInner(repeat, font, mode));
-          }
-          inners.push(makeInner(middle, font, mode));
-          for (var _i2 = 0; _i2 < repeatCount; _i2++) {
-              inners.push(makeInner(repeat, font, mode));
-          }
-      }
-
-      // Add the top symbol
-      inners.push(makeInner(top, font, mode));
-
-      // Finally, build the vlist
-      var newOptions = options.havingBaseStyle(_Style2.default.TEXT);
-      var inner = _buildCommon2.default.makeVList(inners, "bottom", depth, newOptions);
-
-      return styleWrap((0, _buildCommon.makeSpan)(["delimsizing", "mult"], [inner], newOptions), _Style2.default.TEXT, options, classes);
-  };
-
-  var sqrtInnerSVG = {
-      // The main path geometry is from glyph U221A in the font KaTeX Main
-      main: "<svg viewBox='0 0 400000 1000' preserveAspectRatio='xMinYMin\nslice'><path d='M95 622c-2.667 0-7.167-2.667-13.5\n-8S72 604 72 600c0-2 .333-3.333 1-4 1.333-2.667 23.833-20.667 67.5-54s\n65.833-50.333 66.5-51c1.333-1.333 3-2 5-2 4.667 0 8.667 3.333 12 10l173\n378c.667 0 35.333-71 104-213s137.5-285 206.5-429S812 17.333 812 14c5.333\n-9.333 12-14 20-14h399166v40H845.272L620 507 385 993c-2.667 4.667-9 7-19\n7-6 0-10-1-12-3L160 575l-65 47zM834 0h399166v40H845z'/></svg>",
-
-      // size1 is from glyph U221A in the font KaTeX_Size1-Regular
-      1: "<svg viewBox='0 0 400000 1200' preserveAspectRatio='xMinYMin\nslice'><path d='M263 601c.667 0 18 39.667 52 119s68.167\n 158.667 102.5 238 51.833 119.333 52.5 120C810 373.333 980.667 17.667 982 11\nc4.667-7.333 11-11 19-11h398999v40H1012.333L741 607c-38.667 80.667-84 175-136\n 283s-89.167 185.333-111.5 232-33.833 70.333-34.5 71c-4.667 4.667-12.333 7-23\n 7l-12-1-109-253c-72.667-168-109.333-252-110-252-10.667 8-22 16.667-34 26-22\n 17.333-33.333 26-34 26l-26-26 76-59 76-60zM1001 0h398999v40H1012z'/></svg>",
-
-      // size2 is from glyph U221A in the font KaTeX_Size2-Regular
-      2: "<svg viewBox='0 0 400000 1800' preserveAspectRatio='xMinYMin\nslice'><path d='M1001 0h398999v40H1013.084S929.667 308 749\n 880s-277 876.333-289 913c-4.667 4.667-12.667 7-24 7h-12c-1.333-3.333-3.667\n-11.667-7-25-35.333-125.333-106.667-373.333-214-744-10 12-21 25-33 39l-32 39\nc-6-5.333-15-14-27-26l25-30c26.667-32.667 52-63 76-91l52-60 208 722c56-175.333\n 126.333-397.333 211-666s153.833-488.167 207.5-658.5C944.167 129.167 975 32.667\n 983 10c4-6.667 10-10 18-10zm0 0h398999v40H1013z'/></svg>",
-
-      // size3 is from glyph U221A in the font KaTeX_Size3-Regular
-      3: "<svg viewBox='0 0 400000 2400' preserveAspectRatio='xMinYMin\nslice'><path d='M424 2398c-1.333-.667-38.5-172-111.5-514\nS202.667 1370.667 202 1370c0-2-10.667 14.333-32 49-4.667 7.333-9.833 15.667\n-15.5 25s-9.833 16-12.5 20l-5 7c-4-3.333-8.333-7.667-13-13l-13-13 76-122 77-121\n 209 968c0-2 84.667-361.667 254-1079C896.333 373.667 981.667 13.333 983 10\nc4-6.667 10-10 18-10h398999v40H1014.622S927.332 418.667 742 1206c-185.333\n 787.333-279.333 1182.333-282 1185-2 6-10 9-24 9-8 0-12-.667-12-2z\nM1001 0h398999v40H1014z'/></svg>",
-
-      // size4 is from glyph U221A in the font KaTeX_Size4-Regular
-      4: "<svg viewBox='0 0 400000 3000' preserveAspectRatio='xMinYMin\nslice'><path d='M473 2713C812.333 913.667 982.333 13 983 11\nc3.333-7.333 9.333-11 18-11h399110v40H1017.698S927.168 518 741.5 1506C555.833\n 2494 462 2989 460 2991c-2 6-10 9-24 9-8 0-12-.667-12-2s-5.333-32-16-92c-50.667\n-293.333-119.667-693.333-207-1200 0-1.333-5.333 8.667-16 30l-32 64-16 33-26-26\n 76-153 77-151c.667.667 35.667 202 105 604 67.333 400.667 102 602.667 104 606z\nM1001 0h398999v40H1017z'/></svg>",
-
-      // tall is from glyph U23B7 in the font KaTeX_Size4-Regular
-      tall: "l-4 4-4 4c-.667.667-2 1.5-4 2.5s-4.167 1.833-6.5 2.5-5.5 1-9.5 1h\n-12l-28-84c-16.667-52-96.667 -294.333-240-727l-212 -643 -85 170c-4-3.333-8.333\n-7.667-13 -13l-13-13l77-155 77-156c66 199.333 139 419.667 219 661 l218 661z\nM702 0H400000v40H742z'/></svg>"
-  };
-
-  var sqrtSpan = function sqrtSpan(height, delim, options) {
-      // Create a span containing an SVG image of a sqrt symbol.
-      var span = _buildCommon2.default.makeSpan([], [], options);
-      var sizeMultiplier = options.sizeMultiplier; // default
-
-      if (delim.type === "small") {
-          // Get an SVG that is derived from glyph U+221A in font KaTeX-Main.
-          var newOptions = options.havingBaseStyle(delim.style);
-          sizeMultiplier = newOptions.sizeMultiplier / options.sizeMultiplier;
-
-          span.height = 1 * sizeMultiplier;
-          span.style.height = span.height + "em";
-          span.surdWidth = 0.833 * sizeMultiplier; // from the font.
-          //In the font, the glyph is 1000 units tall. The font scale is 1:1000.
-
-          span.innerHTML = "<svg width='100%' height='" + span.height + "em'>\n            " + sqrtInnerSVG['main'] + "</svg>";
-      } else if (delim.type === "large") {
-          // These SVGs come from fonts: KaTeX_Size1, _Size2, etc.
-          // Get sqrt height from font data
-          span.height = sizeToMaxHeight[delim.size] / sizeMultiplier;
-          span.style.height = span.height + "em";
-          span.surdWidth = 1.0 / sizeMultiplier; // from the font
-
-          span.innerHTML = "<svg width=\"100%\" height=\"" + span.height + "em\">\n            " + sqrtInnerSVG[delim.size] + "</svg>";
-      } else {
-          // Tall sqrt. In TeX, this would be stacked using multiple glyphs.
-          // We'll use a single SVG to accomplish the same thing.
-          span.height = height / sizeMultiplier;
-          span.style.height = span.height + "em";
-          span.surdWidth = 1.056 / sizeMultiplier;
-          var viewBoxHeight = Math.floor(span.height * 1000); // scale = 1:1000
-          var vertSegment = viewBoxHeight - 54;
-
-          // This \sqrt is customized in both height and width. We set the
-          // height now. Then CSS will stretch the image to the correct width.
-          // This SVG path comes from glyph U+23B7, font KaTeX_Size4-Regular.
-          span.innerHTML = "<svg width='100%' height='" + span.height + "em'>\n            <svg viewBox='0 0 400000 " + viewBoxHeight + "'\n            preserveAspectRatio='xMinYMax slice'>\n            <path d='M702 0H400000v40H742v" + vertSegment + "\n            " + sqrtInnerSVG['tall'] + "</svg>";
-      }
-
-      span.sizeMultiplier = sizeMultiplier;
-
-      return span;
-  };
-
-  // There are three kinds of delimiters, delimiters that stack when they become
-  // too large
-  var stackLargeDelimiters = ["(", ")", "[", "\\lbrack", "]", "\\rbrack", "\\{", "\\lbrace", "\\}", "\\rbrace", "\\lfloor", "\\rfloor", "\\lceil", "\\rceil", "\\surd"];
-
-  // delimiters that always stack
-  var stackAlwaysDelimiters = ["\\uparrow", "\\downarrow", "\\updownarrow", "\\Uparrow", "\\Downarrow", "\\Updownarrow", "|", "\\|", "\\vert", "\\Vert", "\\lvert", "\\rvert", "\\lVert", "\\rVert", "\\lgroup", "\\rgroup", "\\lmoustache", "\\rmoustache"];
-
-  // and delimiters that never stack
-  var stackNeverDelimiters = ["<", ">", "\\langle", "\\rangle", "/", "\\backslash", "\\lt", "\\gt"];
-
-  // Metrics of the different sizes. Found by looking at TeX's output of
-  // $\bigl| // \Bigl| \biggl| \Biggl| \showlists$
-  // Used to create stacked delimiters of appropriate sizes in makeSizedDelim.
-  var sizeToMaxHeight = [0, 1.2, 1.8, 2.4, 3.0];
-
-  /**
-   * Used to create a delimiter of a specific size, where `size` is 1, 2, 3, or 4.
-   */
-  var makeSizedDelim = function makeSizedDelim(delim, size, options, mode, classes) {
-      // < and > turn into \langle and \rangle in delimiters
-      if (delim === "<" || delim === "\\lt") {
-          delim = "\\langle";
-      } else if (delim === ">" || delim === "\\gt") {
-          delim = "\\rangle";
-      }
-
-      // Sized delimiters are never centered.
-      if (_utils2.default.contains(stackLargeDelimiters, delim) || _utils2.default.contains(stackNeverDelimiters, delim)) {
-          return makeLargeDelim(delim, size, false, options, mode, classes);
-      } else if (_utils2.default.contains(stackAlwaysDelimiters, delim)) {
-          return makeStackedDelim(delim, sizeToMaxHeight[size], false, options, mode, classes);
-      } else {
-          throw new _ParseError2.default("Illegal delimiter: '" + delim + "'");
-      }
-  };
-
-  /**
-   * There are three different sequences of delimiter sizes that the delimiters
-   * follow depending on the kind of delimiter. This is used when creating custom
-   * sized delimiters to decide whether to create a small, large, or stacked
-   * delimiter.
-   *
-   * In real TeX, these sequences aren't explicitly defined, but are instead
-   * defined inside the font metrics. Since there are only three sequences that
-   * are possible for the delimiters that TeX defines, it is easier to just encode
-   * them explicitly here.
-   */
-
-  // Delimiters that never stack try small delimiters and large delimiters only
-  var stackNeverDelimiterSequence = [{ type: "small", style: _Style2.default.SCRIPTSCRIPT }, { type: "small", style: _Style2.default.SCRIPT }, { type: "small", style: _Style2.default.TEXT }, { type: "large", size: 1 }, { type: "large", size: 2 }, { type: "large", size: 3 }, { type: "large", size: 4 }];
-
-  // Delimiters that always stack try the small delimiters first, then stack
-  var stackAlwaysDelimiterSequence = [{ type: "small", style: _Style2.default.SCRIPTSCRIPT }, { type: "small", style: _Style2.default.SCRIPT }, { type: "small", style: _Style2.default.TEXT }, { type: "stack" }];
-
-  // Delimiters that stack when large try the small and then large delimiters, and
-  // stack afterwards
-  var stackLargeDelimiterSequence = [{ type: "small", style: _Style2.default.SCRIPTSCRIPT }, { type: "small", style: _Style2.default.SCRIPT }, { type: "small", style: _Style2.default.TEXT }, { type: "large", size: 1 }, { type: "large", size: 2 }, { type: "large", size: 3 }, { type: "large", size: 4 }, { type: "stack" }];
-
-  /**
-   * Get the font used in a delimiter based on what kind of delimiter it is.
-   */
-  var delimTypeToFont = function delimTypeToFont(type) {
-      if (type.type === "small") {
-          return "Main-Regular";
-      } else if (type.type === "large") {
-          return "Size" + type.size + "-Regular";
-      } else if (type.type === "stack") {
-          return "Size4-Regular";
-      }
-  };
-
-  /**
-   * Traverse a sequence of types of delimiters to decide what kind of delimiter
-   * should be used to create a delimiter of the given height+depth.
-   */
-  var traverseSequence = function traverseSequence(delim, height, sequence, options) {
-      // Here, we choose the index we should start at in the sequences. In smaller
-      // sizes (which correspond to larger numbers in style.size) we start earlier
-      // in the sequence. Thus, scriptscript starts at index 3-3=0, script starts
-      // at index 3-2=1, text starts at 3-1=2, and display starts at min(2,3-0)=2
-      var start = Math.min(2, 3 - options.style.size);
-      for (var i = start; i < sequence.length; i++) {
-          if (sequence[i].type === "stack") {
-              // This is always the last delimiter, so we just break the loop now.
-              break;
-          }
-
-          var metrics = getMetrics(delim, delimTypeToFont(sequence[i]));
-          var heightDepth = metrics.height + metrics.depth;
-
-          // Small delimiters are scaled down versions of the same font, so we
-          // account for the style change size.
-
-          if (sequence[i].type === "small") {
-              var newOptions = options.havingBaseStyle(sequence[i].style);
-              heightDepth *= newOptions.sizeMultiplier;
-          }
-
-          // Check if the delimiter at this size works for the given height.
-          if (heightDepth > height) {
-              return sequence[i];
-          }
-      }
-
-      // If we reached the end of the sequence, return the last sequence element.
-      return sequence[sequence.length - 1];
-  };
-
-  /**
-   * Make a delimiter of a given height+depth, with optional centering. Here, we
-   * traverse the sequences, and create a delimiter that the sequence tells us to.
-   */
-  var makeCustomSizedDelim = function makeCustomSizedDelim(delim, height, center, options, mode, classes) {
-      if (delim === "<" || delim === "\\lt") {
-          delim = "\\langle";
-      } else if (delim === ">" || delim === "\\gt") {
-          delim = "\\rangle";
-      }
-
-      // Decide what sequence to use
-      var sequence = void 0;
-      if (_utils2.default.contains(stackNeverDelimiters, delim)) {
-          sequence = stackNeverDelimiterSequence;
-      } else if (_utils2.default.contains(stackLargeDelimiters, delim)) {
-          sequence = stackLargeDelimiterSequence;
-      } else {
-          sequence = stackAlwaysDelimiterSequence;
-      }
-
-      // Look through the sequence
-      var delimType = traverseSequence(delim, height, sequence, options);
-
-      if (delim === "\\surd") {
-          // Get an SVG image for
-          return sqrtSpan(height, delimType, options);
-      } else {
-          // Get the delimiter from font glyphs.
-          // Depending on the sequence element we decided on, call the
-          // appropriate function.
-          if (delimType.type === "small") {
-              return makeSmallDelim(delim, delimType.style, center, options, mode, classes);
-          } else if (delimType.type === "large") {
-              return makeLargeDelim(delim, delimType.size, center, options, mode, classes);
-          } else if (delimType.type === "stack") {
-              return makeStackedDelim(delim, height, center, options, mode, classes);
-          }
-      }
-  };
-
-  /**
-   * Make a delimiter for use with `\left` and `\right`, given a height and depth
-   * of an expression that the delimiters surround.
-   */
-  var makeLeftRightDelim = function makeLeftRightDelim(delim, height, depth, options, mode, classes) {
-      // We always center \left/\right delimiters, so the axis is always shifted
-      var axisHeight = options.fontMetrics().axisHeight * options.sizeMultiplier;
-
-      // Taken from TeX source, tex.web, function make_left_right
-      var delimiterFactor = 901;
-      var delimiterExtend = 5.0 / options.fontMetrics().ptPerEm;
-
-      var maxDistFromAxis = Math.max(height - axisHeight, depth + axisHeight);
-
-      var totalHeight = Math.max(
-      // In real TeX, calculations are done using integral values which are
-      // 65536 per pt, or 655360 per em. So, the division here truncates in
-      // TeX but doesn't here, producing different results. If we wanted to
-      // exactly match TeX's calculation, we could do
-      //   Math.floor(655360 * maxDistFromAxis / 500) *
-      //    delimiterFactor / 655360
-      // (To see the difference, compare
-      //    x^{x^{\left(\rule{0.1em}{0.68em}\right)}}
-      // in TeX and KaTeX)
-      maxDistFromAxis / 500 * delimiterFactor, 2 * maxDistFromAxis - delimiterExtend);
-
-      // Finally, we defer to `makeCustomSizedDelim` with our calculated total
-      // height
-      return makeCustomSizedDelim(delim, totalHeight, true, options, mode, classes);
-  };
-
-  module.exports = {
-      sizedDelim: makeSizedDelim,
-      customSizedDelim: makeCustomSizedDelim,
-      leftRightDelim: makeLeftRightDelim
-  };
-
-  },{"./ParseError":29,"./Style":33,"./buildCommon":34,"./fontMetrics":41,"./symbols":48,"./utils":51}],39:[function(require,module,exports){
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  var _createClass2 = require("babel-runtime/helpers/createClass");
-
-  var _createClass3 = _interopRequireDefault(_createClass2);
-
-  var _unicodeRegexes = require("./unicodeRegexes");
-
-  var _unicodeRegexes2 = _interopRequireDefault(_unicodeRegexes);
-
-  var _utils = require("./utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * Create an HTML className based on a list of classes. In addition to joining
-   * with spaces, we also remove null or empty classes.
-   */
-  /**
-   * These objects store the data about the DOM nodes we create, as well as some
-   * extra data. They can then be transformed into real DOM nodes with the
-   * `toNode` function or HTML markup using `toMarkup`. They are useful for both
-   * storing extra properties on the nodes, as well as providing a way to easily
-   * work with the DOM.
-   *
-   * Similar functions for working with MathML nodes exist in mathMLTree.js.
-   */
-  var createClass = function createClass(classes) {
-      classes = classes.slice();
-      for (var i = classes.length - 1; i >= 0; i--) {
-          if (!classes[i]) {
-              classes.splice(i, 1);
-          }
-      }
-
-      return classes.join(" ");
-  };
-
-  /**
-   * This node represents a span node, with a className, a list of children, and
-   * an inline style. It also contains information about its height, depth, and
-   * maxFontSize.
-   */
-
-  var span = function () {
-      function span(classes, children, options) {
-          (0, _classCallCheck3.default)(this, span);
-
-          this.classes = classes || [];
-          this.children = children || [];
-          this.height = 0;
-          this.depth = 0;
-          this.maxFontSize = 0;
-          this.style = {};
-          this.attributes = {};
-          this.innerHTML; // used for inline SVG code.
-          if (options) {
-              if (options.style.isTight()) {
-                  this.classes.push("mtight");
-              }
-              if (options.getColor()) {
-                  this.style.color = options.getColor();
-              }
-          }
-      }
-
-      /**
-       * Sets an arbitrary attribute on the span. Warning: use this wisely. Not all
-       * browsers support attributes the same, and having too many custom attributes
-       * is probably bad.
-       */
-
-
-      (0, _createClass3.default)(span, [{
-          key: "setAttribute",
-          value: function setAttribute(attribute, value) {
-              this.attributes[attribute] = value;
-          }
-      }, {
-          key: "tryCombine",
-          value: function tryCombine(sibling) {
-              return false;
-          }
-
-          /**
-           * Convert the span into an HTML node
-           */
-
-      }, {
-          key: "toNode",
-          value: function toNode() {
-              var span = document.createElement("span");
-
-              // Apply the class
-              span.className = createClass(this.classes);
-
-              // Apply inline styles
-              for (var style in this.style) {
-                  if (Object.prototype.hasOwnProperty.call(this.style, style)) {
-                      span.style[style] = this.style[style];
-                  }
-              }
-
-              // Apply attributes
-              for (var attr in this.attributes) {
-                  if (Object.prototype.hasOwnProperty.call(this.attributes, attr)) {
-                      span.setAttribute(attr, this.attributes[attr]);
-                  }
-              }
-
-              if (this.innerHTML) {
-                  span.innerHTML = this.innerHTML;
-              }
-
-              // Append the children, also as HTML nodes
-              for (var i = 0; i < this.children.length; i++) {
-                  span.appendChild(this.children[i].toNode());
-              }
-
-              return span;
-          }
-
-          /**
-           * Convert the span into an HTML markup string
-           */
-
-      }, {
-          key: "toMarkup",
-          value: function toMarkup() {
-              var markup = "<span";
-
-              // Add the class
-              if (this.classes.length) {
-                  markup += " class=\"";
-                  markup += _utils2.default.escape(createClass(this.classes));
-                  markup += "\"";
-              }
-
-              var styles = "";
-
-              // Add the styles, after hyphenation
-              for (var style in this.style) {
-                  if (this.style.hasOwnProperty(style)) {
-                      styles += _utils2.default.hyphenate(style) + ":" + this.style[style] + ";";
-                  }
-              }
-
-              if (styles) {
-                  markup += " style=\"" + _utils2.default.escape(styles) + "\"";
-              }
-
-              // Add the attributes
-              for (var attr in this.attributes) {
-                  if (Object.prototype.hasOwnProperty.call(this.attributes, attr)) {
-                      markup += " " + attr + "=\"";
-                      markup += _utils2.default.escape(this.attributes[attr]);
-                      markup += "\"";
-                  }
-              }
-
-              markup += ">";
-
-              if (this.innerHTML) {
-                  markup += this.innerHTML;
-              }
-
-              // Add the markup of the children, also as markup
-              for (var i = 0; i < this.children.length; i++) {
-                  markup += this.children[i].toMarkup();
-              }
-
-              markup += "</span>";
-
-              return markup;
-          }
-      }]);
-      return span;
-  }();
-
-  /**
-   * This node represents a document fragment, which contains elements, but when
-   * placed into the DOM doesn't have any representation itself. Thus, it only
-   * contains children and doesn't have any HTML properties. It also keeps track
-   * of a height, depth, and maxFontSize.
-   */
-
-
-  var documentFragment = function () {
-      function documentFragment(children) {
-          (0, _classCallCheck3.default)(this, documentFragment);
-
-          this.children = children || [];
-          this.height = 0;
-          this.depth = 0;
-          this.maxFontSize = 0;
-      }
-
-      /**
-       * Convert the fragment into a node
-       */
-
-
-      (0, _createClass3.default)(documentFragment, [{
-          key: "toNode",
-          value: function toNode() {
-              // Create a fragment
-              var frag = document.createDocumentFragment();
-
-              // Append the children
-              for (var i = 0; i < this.children.length; i++) {
-                  frag.appendChild(this.children[i].toNode());
-              }
-
-              return frag;
-          }
-
-          /**
-           * Convert the fragment into HTML markup
-           */
-
-      }, {
-          key: "toMarkup",
-          value: function toMarkup() {
-              var markup = "";
-
-              // Simply concatenate the markup for the children together
-              for (var i = 0; i < this.children.length; i++) {
-                  markup += this.children[i].toMarkup();
-              }
-
-              return markup;
-          }
-      }]);
-      return documentFragment;
-  }();
-
-  var iCombinations = {
-      'î': "\u0131\u0302",
-      'ï': "\u0131\u0308",
-      'í': "\u0131\u0301",
-      // 'ī': '\u0131\u0304', // enable when we add Extended Latin
-      'ì': "\u0131\u0300"
-  };
-
-  /**
-   * A symbol node contains information about a single symbol. It either renders
-   * to a single text node, or a span with a single text node in it, depending on
-   * whether it has CSS classes, styles, or needs italic correction.
-   */
-
-  var symbolNode = function () {
-      function symbolNode(value, height, depth, italic, skew, classes, style) {
-          (0, _classCallCheck3.default)(this, symbolNode);
-
-          this.value = value || "";
-          this.height = height || 0;
-          this.depth = depth || 0;
-          this.italic = italic || 0;
-          this.skew = skew || 0;
-          this.classes = classes || [];
-          this.style = style || {};
-          this.maxFontSize = 0;
-
-          // Mark CJK characters with specific classes so that we can specify which
-          // fonts to use.  This allows us to render these characters with a serif
-          // font in situations where the browser would either default to a sans serif
-          // or render a placeholder character.
-          if (_unicodeRegexes2.default.cjkRegex.test(value)) {
-              // I couldn't find any fonts that contained Hangul as well as all of
-              // the other characters we wanted to test there for it gets its own
-              // CSS class.
-              if (_unicodeRegexes2.default.hangulRegex.test(value)) {
-                  this.classes.push('hangul_fallback');
-              } else {
-                  this.classes.push('cjk_fallback');
-              }
-          }
-
-          if (/[îïíì]/.test(this.value)) {
-              // add ī when we add Extended Latin
-              this.value = iCombinations[this.value];
-          }
-      }
-
-      (0, _createClass3.default)(symbolNode, [{
-          key: "tryCombine",
-          value: function tryCombine(sibling) {
-              if (!sibling || !(sibling instanceof symbolNode) || this.italic > 0 || createClass(this.classes) !== createClass(sibling.classes) || this.skew !== sibling.skew || this.maxFontSize !== sibling.maxFontSize) {
-                  return false;
-              }
-              for (var style in this.style) {
-                  if (this.style.hasOwnProperty(style) && this.style[style] !== sibling.style[style]) {
-                      return false;
-                  }
-              }
-              for (var _style in sibling.style) {
-                  if (sibling.style.hasOwnProperty(_style) && this.style[_style] !== sibling.style[_style]) {
-                      return false;
-                  }
-              }
-              this.value += sibling.value;
-              this.height = Math.max(this.height, sibling.height);
-              this.depth = Math.max(this.depth, sibling.depth);
-              this.italic = sibling.italic;
-              return true;
-          }
-
-          /**
-           * Creates a text node or span from a symbol node. Note that a span is only
-           * created if it is needed.
-           */
-
-      }, {
-          key: "toNode",
-          value: function toNode() {
-              var node = document.createTextNode(this.value);
-              var span = null;
-
-              if (this.italic > 0) {
-                  span = document.createElement("span");
-                  span.style.marginRight = this.italic + "em";
-              }
-
-              if (this.classes.length > 0) {
-                  span = span || document.createElement("span");
-                  span.className = createClass(this.classes);
-              }
-
-              for (var style in this.style) {
-                  if (this.style.hasOwnProperty(style)) {
-                      span = span || document.createElement("span");
-                      span.style[style] = this.style[style];
-                  }
-              }
-
-              if (span) {
-                  span.appendChild(node);
-                  return span;
-              } else {
-                  return node;
-              }
-          }
-
-          /**
-           * Creates markup for a symbol node.
-           */
-
-      }, {
-          key: "toMarkup",
-          value: function toMarkup() {
-              // TODO(alpert): More duplication than I'd like from
-              // span.prototype.toMarkup and symbolNode.prototype.toNode...
-              var needsSpan = false;
-
-              var markup = "<span";
-
-              if (this.classes.length) {
-                  needsSpan = true;
-                  markup += " class=\"";
-                  markup += _utils2.default.escape(createClass(this.classes));
-                  markup += "\"";
-              }
-
-              var styles = "";
-
-              if (this.italic > 0) {
-                  styles += "margin-right:" + this.italic + "em;";
-              }
-              for (var style in this.style) {
-                  if (this.style.hasOwnProperty(style)) {
-                      styles += _utils2.default.hyphenate(style) + ":" + this.style[style] + ";";
-                  }
-              }
-
-              if (styles) {
-                  needsSpan = true;
-                  markup += " style=\"" + _utils2.default.escape(styles) + "\"";
-              }
-
-              var escaped = _utils2.default.escape(this.value);
-              if (needsSpan) {
-                  markup += ">";
-                  markup += escaped;
-                  markup += "</span>";
-                  return markup;
-              } else {
-                  return escaped;
-              }
-          }
-      }]);
-      return symbolNode;
-  }();
-
-  module.exports = {
-      span: span,
-      documentFragment: documentFragment,
-      symbolNode: symbolNode
-  };
-
-  },{"./unicodeRegexes":49,"./utils":51,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],40:[function(require,module,exports){
-
-  var _ParseNode = require("./ParseNode");
-
-  var _ParseNode2 = _interopRequireDefault(_ParseNode);
-
-  var _ParseError = require("./ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * Parse the body of the environment, with rows delimited by \\ and
-   * columns delimited by &, and create a nested list in row-major order
-   * with one group per cell.  If given an optional argument style
-   * ("text", "display", etc.), then each cell is cast into that style.
-   */
-  /* eslint no-constant-condition:0 */
-  function parseArray(parser, result, style) {
-      var row = [];
-      var body = [row];
-      var rowGaps = [];
-      while (true) {
-          var cell = parser.parseExpression(false, null);
-          cell = new _ParseNode2.default("ordgroup", cell, parser.mode);
-          if (style) {
-              cell = new _ParseNode2.default("styling", {
-                  style: style,
-                  value: [cell]
-              }, parser.mode);
-          }
-          row.push(cell);
-          var next = parser.nextToken.text;
-          if (next === "&") {
-              parser.consume();
-          } else if (next === "\\end") {
-              break;
-          } else if (next === "\\\\" || next === "\\cr") {
-              var cr = parser.parseFunction();
-              rowGaps.push(cr.value.size);
-              row = [];
-              body.push(row);
-          } else {
-              throw new _ParseError2.default("Expected & or \\\\ or \\end", parser.nextToken);
-          }
-      }
-      result.body = body;
-      result.rowGaps = rowGaps;
-      return new _ParseNode2.default(result.type, result, parser.mode);
-  }
-
-  /*
-   * An environment definition is very similar to a function definition:
-   * it is declared with a name or a list of names, a set of properties
-   * and a handler containing the actual implementation.
-   *
-   * The properties include:
-   *  - numArgs: The number of arguments after the \begin{name} function.
-   *  - argTypes: (optional) Just like for a function
-   *  - allowedInText: (optional) Whether or not the environment is allowed inside
-   *                   text mode (default false) (not enforced yet)
-   *  - numOptionalArgs: (optional) Just like for a function
-   * A bare number instead of that object indicates the numArgs value.
-   *
-   * The handler function will receive two arguments
-   *  - context: information and references provided by the parser
-   *  - args: an array of arguments passed to \begin{name}
-   * The context contains the following properties:
-   *  - envName: the name of the environment, one of the listed names.
-   *  - parser: the parser object
-   *  - lexer: the lexer object
-   *  - positions: the positions associated with these arguments from args.
-   * The handler must return a ParseResult.
-   */
-  function defineEnvironment(names, props, handler) {
-      if (typeof names === "string") {
-          names = [names];
-      }
-      if (typeof props === "number") {
-          props = { numArgs: props };
-      }
-      // Set default values of environments
-      var data = {
-          numArgs: props.numArgs || 0,
-          argTypes: props.argTypes,
-          greediness: 1,
-          allowedInText: !!props.allowedInText,
-          numOptionalArgs: props.numOptionalArgs || 0,
-          handler: handler
-      };
-      for (var i = 0; i < names.length; ++i) {
-          module.exports[names[i]] = data;
-      }
-  }
-
-  // Decides on a style for cells in an array according to whether the given
-  // environment name starts with the letter 'd'.
-  function dCellStyle(envName) {
-      if (envName.substr(0, 1) === "d") {
-          return "display";
-      } else {
-          return "text";
-      }
-  }
-
-  // Arrays are part of LaTeX, defined in lttab.dtx so its documentation
-  // is part of the source2e.pdf file of LaTeX2e source documentation.
-  // {darray} is an {array} environment where cells are set in \displaystyle,
-  // as defined in nccmath.sty.
-  defineEnvironment(["array", "darray"], {
-      numArgs: 1
-  }, function (context, args) {
-      var colalign = args[0];
-      colalign = colalign.value.map ? colalign.value : [colalign];
-      var cols = colalign.map(function (node) {
-          var ca = node.value;
-          if ("lcr".indexOf(ca) !== -1) {
-              return {
-                  type: "align",
-                  align: ca
-              };
-          } else if (ca === "|") {
-              return {
-                  type: "separator",
-                  separator: "|"
-              };
-          }
-          throw new _ParseError2.default("Unknown column alignment: " + node.value, node);
-      });
-      var res = {
-          type: "array",
-          cols: cols,
-          hskipBeforeAndAfter: true };
-      res = parseArray(context.parser, res, dCellStyle(context.envName));
-      return res;
-  });
-
-  // The matrix environments of amsmath builds on the array environment
-  // of LaTeX, which is discussed above.
-  defineEnvironment(["matrix", "pmatrix", "bmatrix", "Bmatrix", "vmatrix", "Vmatrix"], {}, function (context) {
-      var delimiters = {
-          "matrix": null,
-          "pmatrix": ["(", ")"],
-          "bmatrix": ["[", "]"],
-          "Bmatrix": ["\\{", "\\}"],
-          "vmatrix": ["|", "|"],
-          "Vmatrix": ["\\Vert", "\\Vert"]
-      }[context.envName];
-      var res = {
-          type: "array",
-          hskipBeforeAndAfter: false };
-      res = parseArray(context.parser, res, dCellStyle(context.envName));
-      if (delimiters) {
-          res = new _ParseNode2.default("leftright", {
-              body: [res],
-              left: delimiters[0],
-              right: delimiters[1]
-          }, context.mode);
-      }
-      return res;
-  });
-
-  // A cases environment (in amsmath.sty) is almost equivalent to
-  // \def\arraystretch{1.2}%
-  // \left\{\begin{array}{@{}l@{\quad}l@{}} … \end{array}\right.
-  // {dcases} is a {cases} environment where cells are set in \displaystyle,
-  // as defined in mathtools.sty.
-  defineEnvironment(["cases", "dcases"], {}, function (context) {
-      var res = {
-          type: "array",
-          arraystretch: 1.2,
-          cols: [{
-              type: "align",
-              align: "l",
-              pregap: 0,
-              // TODO(kevinb) get the current style.
-              // For now we use the metrics for TEXT style which is what we were
-              // doing before.  Before attempting to get the current style we
-              // should look at TeX's behavior especially for \over and matrices.
-              postgap: 1.0 }, {
-              type: "align",
-              align: "l",
-              pregap: 0,
-              postgap: 0
-          }]
-      };
-      res = parseArray(context.parser, res, dCellStyle(context.envName));
-      res = new _ParseNode2.default("leftright", {
-          body: [res],
-          left: "\\{",
-          right: "."
-      }, context.mode);
-      return res;
-  });
-
-  // An aligned environment is like the align* environment
-  // except it operates within math mode.
-  // Note that we assume \nomallineskiplimit to be zero,
-  // so that \strut@ is the same as \strut.
-  defineEnvironment("aligned", {}, function (context) {
-      var res = {
-          type: "array",
-          cols: [],
-          addJot: true
-      };
-      res = parseArray(context.parser, res, "display");
-      // Count number of columns = maximum number of cells in each row.
-      // At the same time, prepend empty group {} at beginning of every second
-      // cell in each row (starting with second cell) so that operators become
-      // binary.  This behavior is implemented in amsmath's \start@aligned.
-      var emptyGroup = new _ParseNode2.default("ordgroup", [], context.mode);
-      var numCols = 0;
-      res.value.body.forEach(function (row) {
-          for (var i = 1; i < row.length; i += 2) {
-              // Modify ordgroup node within styling node
-              var ordgroup = row[i].value.value[0];
-              ordgroup.value.unshift(emptyGroup);
-          }
-          if (numCols < row.length) {
-              numCols = row.length;
-          }
-      });
-      for (var i = 0; i < numCols; ++i) {
-          var align = "r";
-          var pregap = 0;
-          if (i % 2 === 1) {
-              align = "l";
-          } else if (i > 0) {
-              pregap = 2; // one \qquad between columns
-          }
-          res.value.cols[i] = {
-              type: "align",
-              align: align,
-              pregap: pregap,
-              postgap: 0
-          };
-      }
-      return res;
-  });
-
-  // A gathered environment is like an array environment with one centered
-  // column, but where rows are considered lines so get \jot line spacing
-  // and contents are set in \displaystyle.
-  defineEnvironment("gathered", {}, function (context) {
-      var res = {
-          type: "array",
-          cols: [{
-              type: "align",
-              align: "c"
-          }],
-          addJot: true
-      };
-      res = parseArray(context.parser, res, "display");
-      return res;
-  });
-
-  },{"./ParseError":29,"./ParseNode":30}],41:[function(require,module,exports){
-
-  var _unicodeRegexes = require("./unicodeRegexes");
-
-  var _fontMetricsData = require("./fontMetricsData");
-
-  var _fontMetricsData2 = _interopRequireDefault(_fontMetricsData);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * This file contains metrics regarding fonts and individual symbols. The sigma
-   * and xi variables, as well as the metricMap map contain data extracted from
-   * TeX, TeX font metrics, and the TTF files. These data are then exposed via the
-   * `metrics` variable and the getCharacterMetrics function.
-   */
-
-  // In TeX, there are actually three sets of dimensions, one for each of
-  // textstyle (size index 5 and higher: >=9pt), scriptstyle (size index 3 and 4:
-  // 7-8pt), and scriptscriptstyle (size index 1 and 2: 5-6pt).  These are
-  // provided in the the arrays below, in that order.
-  //
-  // The font metrics are stored in fonts cmsy10, cmsy7, and cmsy5 respsectively.
-  // This was determined by running the following script:
-  //
-  //     latex -interaction=nonstopmode \
-  //     '\documentclass{article}\usepackage{amsmath}\begin{document}' \
-  //     '$a$ \expandafter\show\the\textfont2' \
-  //     '\expandafter\show\the\scriptfont2' \
-  //     '\expandafter\show\the\scriptscriptfont2' \
-  //     '\stop'
-  //
-  // The metrics themselves were retreived using the following commands:
-  //
-  //     tftopl cmsy10
-  //     tftopl cmsy7
-  //     tftopl cmsy5
-  //
-  // The output of each of these commands is quite lengthy.  The only part we
-  // care about is the FONTDIMEN section. Each value is measured in EMs.
-  var sigmasAndXis = {
-      slant: [0.250, 0.250, 0.250], // sigma1
-      space: [0.000, 0.000, 0.000], // sigma2
-      stretch: [0.000, 0.000, 0.000], // sigma3
-      shrink: [0.000, 0.000, 0.000], // sigma4
-      xHeight: [0.431, 0.431, 0.431], // sigma5
-      quad: [1.000, 1.171, 1.472], // sigma6
-      extraSpace: [0.000, 0.000, 0.000], // sigma7
-      num1: [0.677, 0.732, 0.925], // sigma8
-      num2: [0.394, 0.384, 0.387], // sigma9
-      num3: [0.444, 0.471, 0.504], // sigma10
-      denom1: [0.686, 0.752, 1.025], // sigma11
-      denom2: [0.345, 0.344, 0.532], // sigma12
-      sup1: [0.413, 0.503, 0.504], // sigma13
-      sup2: [0.363, 0.431, 0.404], // sigma14
-      sup3: [0.289, 0.286, 0.294], // sigma15
-      sub1: [0.150, 0.143, 0.200], // sigma16
-      sub2: [0.247, 0.286, 0.400], // sigma17
-      supDrop: [0.386, 0.353, 0.494], // sigma18
-      subDrop: [0.050, 0.071, 0.100], // sigma19
-      delim1: [2.390, 1.700, 1.980], // sigma20
-      delim2: [1.010, 1.157, 1.420], // sigma21
-      axisHeight: [0.250, 0.250, 0.250], // sigma22
-
-      // These font metrics are extracted from TeX by using tftopl on cmex10.tfm;
-      // they correspond to the font parameters of the extension fonts (family 3).
-      // See the TeXbook, page 441. In AMSTeX, the extension fonts scale; to
-      // match cmex7, we'd use cmex7.tfm values for script and scriptscript
-      // values.
-      defaultRuleThickness: [0.04, 0.049, 0.049], // xi8; cmex7: 0.049
-      bigOpSpacing1: [0.111, 0.111, 0.111], // xi9
-      bigOpSpacing2: [0.166, 0.166, 0.166], // xi10
-      bigOpSpacing3: [0.2, 0.2, 0.2], // xi11
-      bigOpSpacing4: [0.6, 0.611, 0.611], // xi12; cmex7: 0.611
-      bigOpSpacing5: [0.1, 0.143, 0.143], // xi13; cmex7: 0.143
-
-      // The \sqrt rule width is taken from the height of the surd character.
-      // Since we use the same font at all sizes, this thickness doesn't scale.
-      sqrtRuleThickness: [0.04, 0.04, 0.04],
-
-      // This value determines how large a pt is, for metrics which are defined
-      // in terms of pts.
-      // This value is also used in katex.less; if you change it make sure the
-      // values match.
-      ptPerEm: [10.0, 10.0, 10.0],
-
-      // The space between adjacent `|` columns in an array definition. From
-      // `\showthe\doublerulesep` in LaTeX. Equals 2.0 / ptPerEm.
-      doubleRuleSep: [0.2, 0.2, 0.2]
-  };
-
-  // This map contains a mapping from font name and character code to character
-  // metrics, including height, depth, italic correction, and skew (kern from the
-  // character to the corresponding \skewchar)
-  // This map is generated via `make metrics`. It should not be changed manually.
-
-
-  // These are very rough approximations.  We default to Times New Roman which
-  // should have Latin-1 and Cyrillic characters, but may not depending on the
-  // operating system.  The metrics do not account for extra height from the
-  // accents.  In the case of Cyrillic characters which have both ascenders and
-  // descenders we prefer approximations with ascenders, primarily to prevent
-  // the fraction bar or root line from intersecting the glyph.
-  // TODO(kevinb) allow union of multiple glyph metrics for better accuracy.
-  var extraCharacterMap = {
-      // Latin-1
-      'À': 'A',
-      'Á': 'A',
-      'Â': 'A',
-      'Ã': 'A',
-      'Ä': 'A',
-      'Å': 'A',
-      'Æ': 'A',
-      'Ç': 'C',
-      'È': 'E',
-      'É': 'E',
-      'Ê': 'E',
-      'Ë': 'E',
-      'Ì': 'I',
-      'Í': 'I',
-      'Î': 'I',
-      'Ï': 'I',
-      'Ð': 'D',
-      'Ñ': 'N',
-      'Ò': 'O',
-      'Ó': 'O',
-      'Ô': 'O',
-      'Õ': 'O',
-      'Ö': 'O',
-      'Ø': 'O',
-      'Ù': 'U',
-      'Ú': 'U',
-      'Û': 'U',
-      'Ü': 'U',
-      'Ý': 'Y',
-      'Þ': 'o',
-      'ß': 'B',
-      'à': 'a',
-      'á': 'a',
-      'â': 'a',
-      'ã': 'a',
-      'ä': 'a',
-      'å': 'a',
-      'æ': 'a',
-      'ç': 'c',
-      'è': 'e',
-      'é': 'e',
-      'ê': 'e',
-      'ë': 'e',
-      'ì': 'i',
-      'í': 'i',
-      'î': 'i',
-      'ï': 'i',
-      'ð': 'd',
-      'ñ': 'n',
-      'ò': 'o',
-      'ó': 'o',
-      'ô': 'o',
-      'õ': 'o',
-      'ö': 'o',
-      'ø': 'o',
-      'ù': 'u',
-      'ú': 'u',
-      'û': 'u',
-      'ü': 'u',
-      'ý': 'y',
-      'þ': 'o',
-      'ÿ': 'y',
-
-      // Cyrillic
-      'А': 'A',
-      'Б': 'B',
-      'В': 'B',
-      'Г': 'F',
-      'Д': 'A',
-      'Е': 'E',
-      'Ж': 'K',
-      'З': '3',
-      'И': 'N',
-      'Й': 'N',
-      'К': 'K',
-      'Л': 'N',
-      'М': 'M',
-      'Н': 'H',
-      'О': 'O',
-      'П': 'N',
-      'Р': 'P',
-      'С': 'C',
-      'Т': 'T',
-      'У': 'y',
-      'Ф': 'O',
-      'Х': 'X',
-      'Ц': 'U',
-      'Ч': 'h',
-      'Ш': 'W',
-      'Щ': 'W',
-      'Ъ': 'B',
-      'Ы': 'X',
-      'Ь': 'B',
-      'Э': '3',
-      'Ю': 'X',
-      'Я': 'R',
-      'а': 'a',
-      'б': 'b',
-      'в': 'a',
-      'г': 'r',
-      'д': 'y',
-      'е': 'e',
-      'ж': 'm',
-      'з': 'e',
-      'и': 'n',
-      'й': 'n',
-      'к': 'n',
-      'л': 'n',
-      'м': 'm',
-      'н': 'n',
-      'о': 'o',
-      'п': 'n',
-      'р': 'p',
-      'с': 'c',
-      'т': 'o',
-      'у': 'y',
-      'ф': 'b',
-      'х': 'x',
-      'ц': 'n',
-      'ч': 'n',
-      'ш': 'w',
-      'щ': 'w',
-      'ъ': 'a',
-      'ы': 'm',
-      'ь': 'a',
-      'э': 'e',
-      'ю': 'm',
-      'я': 'r'
-  };
-
-  /**
-   * This function is a convenience function for looking up information in the
-   * metricMap table. It takes a character as a string, and a style.
-   *
-   * Note: the `width` property may be undefined if fontMetricsData.js wasn't
-   * built using `Make extended_metrics`.
-   */
-  var getCharacterMetrics = function getCharacterMetrics(character, style) {
-      var ch = character.charCodeAt(0);
-      if (character[0] in extraCharacterMap) {
-          ch = extraCharacterMap[character[0]].charCodeAt(0);
-      } else if (_unicodeRegexes.cjkRegex.test(character[0])) {
-          ch = 'M'.charCodeAt(0);
-      }
-      var metrics = _fontMetricsData2.default[style][ch];
-      if (metrics) {
-          return {
-              depth: metrics[0],
-              height: metrics[1],
-              italic: metrics[2],
-              skew: metrics[3],
-              width: metrics[4]
-          };
-      }
-  };
-
-  var fontMetricsBySizeIndex = {};
-
-  /**
-   * Get the font metrics for a given size.
-   */
-  var getFontMetrics = function getFontMetrics(size) {
-      var sizeIndex = void 0;
-      if (size >= 5) {
-          sizeIndex = 0;
-      } else if (size >= 3) {
-          sizeIndex = 1;
-      } else {
-          sizeIndex = 2;
-      }
-      if (!fontMetricsBySizeIndex[sizeIndex]) {
-          var metrics = fontMetricsBySizeIndex[sizeIndex] = {};
-          for (var key in sigmasAndXis) {
-              if (sigmasAndXis.hasOwnProperty(key)) {
-                  metrics[key] = sigmasAndXis[key][sizeIndex];
-              }
-          }
-          metrics.cssEmPerMu = metrics.quad / 18;
-      }
-      return fontMetricsBySizeIndex[sizeIndex];
-  };
-
-  module.exports = {
-      getFontMetrics: getFontMetrics,
-      getCharacterMetrics: getCharacterMetrics
-  };
-
-  },{"./fontMetricsData":42,"./unicodeRegexes":49}],42:[function(require,module,exports){
-
-  module.exports = {
-      "AMS-Regular": {
-          "65": [0, 0.68889, 0, 0],
-          "66": [0, 0.68889, 0, 0],
-          "67": [0, 0.68889, 0, 0],
-          "68": [0, 0.68889, 0, 0],
-          "69": [0, 0.68889, 0, 0],
-          "70": [0, 0.68889, 0, 0],
-          "71": [0, 0.68889, 0, 0],
-          "72": [0, 0.68889, 0, 0],
-          "73": [0, 0.68889, 0, 0],
-          "74": [0.16667, 0.68889, 0, 0],
-          "75": [0, 0.68889, 0, 0],
-          "76": [0, 0.68889, 0, 0],
-          "77": [0, 0.68889, 0, 0],
-          "78": [0, 0.68889, 0, 0],
-          "79": [0.16667, 0.68889, 0, 0],
-          "80": [0, 0.68889, 0, 0],
-          "81": [0.16667, 0.68889, 0, 0],
-          "82": [0, 0.68889, 0, 0],
-          "83": [0, 0.68889, 0, 0],
-          "84": [0, 0.68889, 0, 0],
-          "85": [0, 0.68889, 0, 0],
-          "86": [0, 0.68889, 0, 0],
-          "87": [0, 0.68889, 0, 0],
-          "88": [0, 0.68889, 0, 0],
-          "89": [0, 0.68889, 0, 0],
-          "90": [0, 0.68889, 0, 0],
-          "107": [0, 0.68889, 0, 0],
-          "165": [0, 0.675, 0.025, 0],
-          "174": [0.15559, 0.69224, 0, 0],
-          "240": [0, 0.68889, 0, 0],
-          "295": [0, 0.68889, 0, 0],
-          "710": [0, 0.825, 0, 0],
-          "732": [0, 0.9, 0, 0],
-          "770": [0, 0.825, 0, 0],
-          "771": [0, 0.9, 0, 0],
-          "989": [0.08167, 0.58167, 0, 0],
-          "1008": [0, 0.43056, 0.04028, 0],
-          "8245": [0, 0.54986, 0, 0],
-          "8463": [0, 0.68889, 0, 0],
-          "8487": [0, 0.68889, 0, 0],
-          "8498": [0, 0.68889, 0, 0],
-          "8502": [0, 0.68889, 0, 0],
-          "8503": [0, 0.68889, 0, 0],
-          "8504": [0, 0.68889, 0, 0],
-          "8513": [0, 0.68889, 0, 0],
-          "8592": [-0.03598, 0.46402, 0, 0],
-          "8594": [-0.03598, 0.46402, 0, 0],
-          "8602": [-0.13313, 0.36687, 0, 0],
-          "8603": [-0.13313, 0.36687, 0, 0],
-          "8606": [0.01354, 0.52239, 0, 0],
-          "8608": [0.01354, 0.52239, 0, 0],
-          "8610": [0.01354, 0.52239, 0, 0],
-          "8611": [0.01354, 0.52239, 0, 0],
-          "8619": [0, 0.54986, 0, 0],
-          "8620": [0, 0.54986, 0, 0],
-          "8621": [-0.13313, 0.37788, 0, 0],
-          "8622": [-0.13313, 0.36687, 0, 0],
-          "8624": [0, 0.69224, 0, 0],
-          "8625": [0, 0.69224, 0, 0],
-          "8630": [0, 0.43056, 0, 0],
-          "8631": [0, 0.43056, 0, 0],
-          "8634": [0.08198, 0.58198, 0, 0],
-          "8635": [0.08198, 0.58198, 0, 0],
-          "8638": [0.19444, 0.69224, 0, 0],
-          "8639": [0.19444, 0.69224, 0, 0],
-          "8642": [0.19444, 0.69224, 0, 0],
-          "8643": [0.19444, 0.69224, 0, 0],
-          "8644": [0.1808, 0.675, 0, 0],
-          "8646": [0.1808, 0.675, 0, 0],
-          "8647": [0.1808, 0.675, 0, 0],
-          "8648": [0.19444, 0.69224, 0, 0],
-          "8649": [0.1808, 0.675, 0, 0],
-          "8650": [0.19444, 0.69224, 0, 0],
-          "8651": [0.01354, 0.52239, 0, 0],
-          "8652": [0.01354, 0.52239, 0, 0],
-          "8653": [-0.13313, 0.36687, 0, 0],
-          "8654": [-0.13313, 0.36687, 0, 0],
-          "8655": [-0.13313, 0.36687, 0, 0],
-          "8666": [0.13667, 0.63667, 0, 0],
-          "8667": [0.13667, 0.63667, 0, 0],
-          "8669": [-0.13313, 0.37788, 0, 0],
-          "8672": [-0.064, 0.437, 0, 0],
-          "8674": [-0.064, 0.437, 0, 0],
-          "8705": [0, 0.825, 0, 0],
-          "8708": [0, 0.68889, 0, 0],
-          "8709": [0.08167, 0.58167, 0, 0],
-          "8717": [0, 0.43056, 0, 0],
-          "8722": [-0.03598, 0.46402, 0, 0],
-          "8724": [0.08198, 0.69224, 0, 0],
-          "8726": [0.08167, 0.58167, 0, 0],
-          "8733": [0, 0.69224, 0, 0],
-          "8736": [0, 0.69224, 0, 0],
-          "8737": [0, 0.69224, 0, 0],
-          "8738": [0.03517, 0.52239, 0, 0],
-          "8739": [0.08167, 0.58167, 0, 0],
-          "8740": [0.25142, 0.74111, 0, 0],
-          "8741": [0.08167, 0.58167, 0, 0],
-          "8742": [0.25142, 0.74111, 0, 0],
-          "8756": [0, 0.69224, 0, 0],
-          "8757": [0, 0.69224, 0, 0],
-          "8764": [-0.13313, 0.36687, 0, 0],
-          "8765": [-0.13313, 0.37788, 0, 0],
-          "8769": [-0.13313, 0.36687, 0, 0],
-          "8770": [-0.03625, 0.46375, 0, 0],
-          "8774": [0.30274, 0.79383, 0, 0],
-          "8776": [-0.01688, 0.48312, 0, 0],
-          "8778": [0.08167, 0.58167, 0, 0],
-          "8782": [0.06062, 0.54986, 0, 0],
-          "8783": [0.06062, 0.54986, 0, 0],
-          "8785": [0.08198, 0.58198, 0, 0],
-          "8786": [0.08198, 0.58198, 0, 0],
-          "8787": [0.08198, 0.58198, 0, 0],
-          "8790": [0, 0.69224, 0, 0],
-          "8791": [0.22958, 0.72958, 0, 0],
-          "8796": [0.08198, 0.91667, 0, 0],
-          "8806": [0.25583, 0.75583, 0, 0],
-          "8807": [0.25583, 0.75583, 0, 0],
-          "8808": [0.25142, 0.75726, 0, 0],
-          "8809": [0.25142, 0.75726, 0, 0],
-          "8812": [0.25583, 0.75583, 0, 0],
-          "8814": [0.20576, 0.70576, 0, 0],
-          "8815": [0.20576, 0.70576, 0, 0],
-          "8816": [0.30274, 0.79383, 0, 0],
-          "8817": [0.30274, 0.79383, 0, 0],
-          "8818": [0.22958, 0.72958, 0, 0],
-          "8819": [0.22958, 0.72958, 0, 0],
-          "8822": [0.1808, 0.675, 0, 0],
-          "8823": [0.1808, 0.675, 0, 0],
-          "8828": [0.13667, 0.63667, 0, 0],
-          "8829": [0.13667, 0.63667, 0, 0],
-          "8830": [0.22958, 0.72958, 0, 0],
-          "8831": [0.22958, 0.72958, 0, 0],
-          "8832": [0.20576, 0.70576, 0, 0],
-          "8833": [0.20576, 0.70576, 0, 0],
-          "8840": [0.30274, 0.79383, 0, 0],
-          "8841": [0.30274, 0.79383, 0, 0],
-          "8842": [0.13597, 0.63597, 0, 0],
-          "8843": [0.13597, 0.63597, 0, 0],
-          "8847": [0.03517, 0.54986, 0, 0],
-          "8848": [0.03517, 0.54986, 0, 0],
-          "8858": [0.08198, 0.58198, 0, 0],
-          "8859": [0.08198, 0.58198, 0, 0],
-          "8861": [0.08198, 0.58198, 0, 0],
-          "8862": [0, 0.675, 0, 0],
-          "8863": [0, 0.675, 0, 0],
-          "8864": [0, 0.675, 0, 0],
-          "8865": [0, 0.675, 0, 0],
-          "8872": [0, 0.69224, 0, 0],
-          "8873": [0, 0.69224, 0, 0],
-          "8874": [0, 0.69224, 0, 0],
-          "8876": [0, 0.68889, 0, 0],
-          "8877": [0, 0.68889, 0, 0],
-          "8878": [0, 0.68889, 0, 0],
-          "8879": [0, 0.68889, 0, 0],
-          "8882": [0.03517, 0.54986, 0, 0],
-          "8883": [0.03517, 0.54986, 0, 0],
-          "8884": [0.13667, 0.63667, 0, 0],
-          "8885": [0.13667, 0.63667, 0, 0],
-          "8888": [0, 0.54986, 0, 0],
-          "8890": [0.19444, 0.43056, 0, 0],
-          "8891": [0.19444, 0.69224, 0, 0],
-          "8892": [0.19444, 0.69224, 0, 0],
-          "8901": [0, 0.54986, 0, 0],
-          "8903": [0.08167, 0.58167, 0, 0],
-          "8905": [0.08167, 0.58167, 0, 0],
-          "8906": [0.08167, 0.58167, 0, 0],
-          "8907": [0, 0.69224, 0, 0],
-          "8908": [0, 0.69224, 0, 0],
-          "8909": [-0.03598, 0.46402, 0, 0],
-          "8910": [0, 0.54986, 0, 0],
-          "8911": [0, 0.54986, 0, 0],
-          "8912": [0.03517, 0.54986, 0, 0],
-          "8913": [0.03517, 0.54986, 0, 0],
-          "8914": [0, 0.54986, 0, 0],
-          "8915": [0, 0.54986, 0, 0],
-          "8916": [0, 0.69224, 0, 0],
-          "8918": [0.0391, 0.5391, 0, 0],
-          "8919": [0.0391, 0.5391, 0, 0],
-          "8920": [0.03517, 0.54986, 0, 0],
-          "8921": [0.03517, 0.54986, 0, 0],
-          "8922": [0.38569, 0.88569, 0, 0],
-          "8923": [0.38569, 0.88569, 0, 0],
-          "8926": [0.13667, 0.63667, 0, 0],
-          "8927": [0.13667, 0.63667, 0, 0],
-          "8928": [0.30274, 0.79383, 0, 0],
-          "8929": [0.30274, 0.79383, 0, 0],
-          "8934": [0.23222, 0.74111, 0, 0],
-          "8935": [0.23222, 0.74111, 0, 0],
-          "8936": [0.23222, 0.74111, 0, 0],
-          "8937": [0.23222, 0.74111, 0, 0],
-          "8938": [0.20576, 0.70576, 0, 0],
-          "8939": [0.20576, 0.70576, 0, 0],
-          "8940": [0.30274, 0.79383, 0, 0],
-          "8941": [0.30274, 0.79383, 0, 0],
-          "8994": [0.19444, 0.69224, 0, 0],
-          "8995": [0.19444, 0.69224, 0, 0],
-          "9416": [0.15559, 0.69224, 0, 0],
-          "9484": [0, 0.69224, 0, 0],
-          "9488": [0, 0.69224, 0, 0],
-          "9492": [0, 0.37788, 0, 0],
-          "9496": [0, 0.37788, 0, 0],
-          "9585": [0.19444, 0.68889, 0, 0],
-          "9586": [0.19444, 0.74111, 0, 0],
-          "9632": [0, 0.675, 0, 0],
-          "9633": [0, 0.675, 0, 0],
-          "9650": [0, 0.54986, 0, 0],
-          "9651": [0, 0.54986, 0, 0],
-          "9654": [0.03517, 0.54986, 0, 0],
-          "9660": [0, 0.54986, 0, 0],
-          "9661": [0, 0.54986, 0, 0],
-          "9664": [0.03517, 0.54986, 0, 0],
-          "9674": [0.11111, 0.69224, 0, 0],
-          "9733": [0.19444, 0.69224, 0, 0],
-          "10003": [0, 0.69224, 0, 0],
-          "10016": [0, 0.69224, 0, 0],
-          "10731": [0.11111, 0.69224, 0, 0],
-          "10846": [0.19444, 0.75583, 0, 0],
-          "10877": [0.13667, 0.63667, 0, 0],
-          "10878": [0.13667, 0.63667, 0, 0],
-          "10885": [0.25583, 0.75583, 0, 0],
-          "10886": [0.25583, 0.75583, 0, 0],
-          "10887": [0.13597, 0.63597, 0, 0],
-          "10888": [0.13597, 0.63597, 0, 0],
-          "10889": [0.26167, 0.75726, 0, 0],
-          "10890": [0.26167, 0.75726, 0, 0],
-          "10891": [0.48256, 0.98256, 0, 0],
-          "10892": [0.48256, 0.98256, 0, 0],
-          "10901": [0.13667, 0.63667, 0, 0],
-          "10902": [0.13667, 0.63667, 0, 0],
-          "10933": [0.25142, 0.75726, 0, 0],
-          "10934": [0.25142, 0.75726, 0, 0],
-          "10935": [0.26167, 0.75726, 0, 0],
-          "10936": [0.26167, 0.75726, 0, 0],
-          "10937": [0.26167, 0.75726, 0, 0],
-          "10938": [0.26167, 0.75726, 0, 0],
-          "10949": [0.25583, 0.75583, 0, 0],
-          "10950": [0.25583, 0.75583, 0, 0],
-          "10955": [0.28481, 0.79383, 0, 0],
-          "10956": [0.28481, 0.79383, 0, 0],
-          "57350": [0.08167, 0.58167, 0, 0],
-          "57351": [0.08167, 0.58167, 0, 0],
-          "57352": [0.08167, 0.58167, 0, 0],
-          "57353": [0, 0.43056, 0.04028, 0],
-          "57356": [0.25142, 0.75726, 0, 0],
-          "57357": [0.25142, 0.75726, 0, 0],
-          "57358": [0.41951, 0.91951, 0, 0],
-          "57359": [0.30274, 0.79383, 0, 0],
-          "57360": [0.30274, 0.79383, 0, 0],
-          "57361": [0.41951, 0.91951, 0, 0],
-          "57366": [0.25142, 0.75726, 0, 0],
-          "57367": [0.25142, 0.75726, 0, 0],
-          "57368": [0.25142, 0.75726, 0, 0],
-          "57369": [0.25142, 0.75726, 0, 0],
-          "57370": [0.13597, 0.63597, 0, 0],
-          "57371": [0.13597, 0.63597, 0, 0]
-      },
-      "Caligraphic-Regular": {
-          "48": [0, 0.43056, 0, 0],
-          "49": [0, 0.43056, 0, 0],
-          "50": [0, 0.43056, 0, 0],
-          "51": [0.19444, 0.43056, 0, 0],
-          "52": [0.19444, 0.43056, 0, 0],
-          "53": [0.19444, 0.43056, 0, 0],
-          "54": [0, 0.64444, 0, 0],
-          "55": [0.19444, 0.43056, 0, 0],
-          "56": [0, 0.64444, 0, 0],
-          "57": [0.19444, 0.43056, 0, 0],
-          "65": [0, 0.68333, 0, 0.19445],
-          "66": [0, 0.68333, 0.03041, 0.13889],
-          "67": [0, 0.68333, 0.05834, 0.13889],
-          "68": [0, 0.68333, 0.02778, 0.08334],
-          "69": [0, 0.68333, 0.08944, 0.11111],
-          "70": [0, 0.68333, 0.09931, 0.11111],
-          "71": [0.09722, 0.68333, 0.0593, 0.11111],
-          "72": [0, 0.68333, 0.00965, 0.11111],
-          "73": [0, 0.68333, 0.07382, 0],
-          "74": [0.09722, 0.68333, 0.18472, 0.16667],
-          "75": [0, 0.68333, 0.01445, 0.05556],
-          "76": [0, 0.68333, 0, 0.13889],
-          "77": [0, 0.68333, 0, 0.13889],
-          "78": [0, 0.68333, 0.14736, 0.08334],
-          "79": [0, 0.68333, 0.02778, 0.11111],
-          "80": [0, 0.68333, 0.08222, 0.08334],
-          "81": [0.09722, 0.68333, 0, 0.11111],
-          "82": [0, 0.68333, 0, 0.08334],
-          "83": [0, 0.68333, 0.075, 0.13889],
-          "84": [0, 0.68333, 0.25417, 0],
-          "85": [0, 0.68333, 0.09931, 0.08334],
-          "86": [0, 0.68333, 0.08222, 0],
-          "87": [0, 0.68333, 0.08222, 0.08334],
-          "88": [0, 0.68333, 0.14643, 0.13889],
-          "89": [0.09722, 0.68333, 0.08222, 0.08334],
-          "90": [0, 0.68333, 0.07944, 0.13889]
-      },
-      "Fraktur-Regular": {
-          "33": [0, 0.69141, 0, 0],
-          "34": [0, 0.69141, 0, 0],
-          "38": [0, 0.69141, 0, 0],
-          "39": [0, 0.69141, 0, 0],
-          "40": [0.24982, 0.74947, 0, 0],
-          "41": [0.24982, 0.74947, 0, 0],
-          "42": [0, 0.62119, 0, 0],
-          "43": [0.08319, 0.58283, 0, 0],
-          "44": [0, 0.10803, 0, 0],
-          "45": [0.08319, 0.58283, 0, 0],
-          "46": [0, 0.10803, 0, 0],
-          "47": [0.24982, 0.74947, 0, 0],
-          "48": [0, 0.47534, 0, 0],
-          "49": [0, 0.47534, 0, 0],
-          "50": [0, 0.47534, 0, 0],
-          "51": [0.18906, 0.47534, 0, 0],
-          "52": [0.18906, 0.47534, 0, 0],
-          "53": [0.18906, 0.47534, 0, 0],
-          "54": [0, 0.69141, 0, 0],
-          "55": [0.18906, 0.47534, 0, 0],
-          "56": [0, 0.69141, 0, 0],
-          "57": [0.18906, 0.47534, 0, 0],
-          "58": [0, 0.47534, 0, 0],
-          "59": [0.12604, 0.47534, 0, 0],
-          "61": [-0.13099, 0.36866, 0, 0],
-          "63": [0, 0.69141, 0, 0],
-          "65": [0, 0.69141, 0, 0],
-          "66": [0, 0.69141, 0, 0],
-          "67": [0, 0.69141, 0, 0],
-          "68": [0, 0.69141, 0, 0],
-          "69": [0, 0.69141, 0, 0],
-          "70": [0.12604, 0.69141, 0, 0],
-          "71": [0, 0.69141, 0, 0],
-          "72": [0.06302, 0.69141, 0, 0],
-          "73": [0, 0.69141, 0, 0],
-          "74": [0.12604, 0.69141, 0, 0],
-          "75": [0, 0.69141, 0, 0],
-          "76": [0, 0.69141, 0, 0],
-          "77": [0, 0.69141, 0, 0],
-          "78": [0, 0.69141, 0, 0],
-          "79": [0, 0.69141, 0, 0],
-          "80": [0.18906, 0.69141, 0, 0],
-          "81": [0.03781, 0.69141, 0, 0],
-          "82": [0, 0.69141, 0, 0],
-          "83": [0, 0.69141, 0, 0],
-          "84": [0, 0.69141, 0, 0],
-          "85": [0, 0.69141, 0, 0],
-          "86": [0, 0.69141, 0, 0],
-          "87": [0, 0.69141, 0, 0],
-          "88": [0, 0.69141, 0, 0],
-          "89": [0.18906, 0.69141, 0, 0],
-          "90": [0.12604, 0.69141, 0, 0],
-          "91": [0.24982, 0.74947, 0, 0],
-          "93": [0.24982, 0.74947, 0, 0],
-          "94": [0, 0.69141, 0, 0],
-          "97": [0, 0.47534, 0, 0],
-          "98": [0, 0.69141, 0, 0],
-          "99": [0, 0.47534, 0, 0],
-          "100": [0, 0.62119, 0, 0],
-          "101": [0, 0.47534, 0, 0],
-          "102": [0.18906, 0.69141, 0, 0],
-          "103": [0.18906, 0.47534, 0, 0],
-          "104": [0.18906, 0.69141, 0, 0],
-          "105": [0, 0.69141, 0, 0],
-          "106": [0, 0.69141, 0, 0],
-          "107": [0, 0.69141, 0, 0],
-          "108": [0, 0.69141, 0, 0],
-          "109": [0, 0.47534, 0, 0],
-          "110": [0, 0.47534, 0, 0],
-          "111": [0, 0.47534, 0, 0],
-          "112": [0.18906, 0.52396, 0, 0],
-          "113": [0.18906, 0.47534, 0, 0],
-          "114": [0, 0.47534, 0, 0],
-          "115": [0, 0.47534, 0, 0],
-          "116": [0, 0.62119, 0, 0],
-          "117": [0, 0.47534, 0, 0],
-          "118": [0, 0.52396, 0, 0],
-          "119": [0, 0.52396, 0, 0],
-          "120": [0.18906, 0.47534, 0, 0],
-          "121": [0.18906, 0.47534, 0, 0],
-          "122": [0.18906, 0.47534, 0, 0],
-          "8216": [0, 0.69141, 0, 0],
-          "8217": [0, 0.69141, 0, 0],
-          "58112": [0, 0.62119, 0, 0],
-          "58113": [0, 0.62119, 0, 0],
-          "58114": [0.18906, 0.69141, 0, 0],
-          "58115": [0.18906, 0.69141, 0, 0],
-          "58116": [0.18906, 0.47534, 0, 0],
-          "58117": [0, 0.69141, 0, 0],
-          "58118": [0, 0.62119, 0, 0],
-          "58119": [0, 0.47534, 0, 0]
-      },
-      "Main-Bold": {
-          "33": [0, 0.69444, 0, 0],
-          "34": [0, 0.69444, 0, 0],
-          "35": [0.19444, 0.69444, 0, 0],
-          "36": [0.05556, 0.75, 0, 0],
-          "37": [0.05556, 0.75, 0, 0],
-          "38": [0, 0.69444, 0, 0],
-          "39": [0, 0.69444, 0, 0],
-          "40": [0.25, 0.75, 0, 0],
-          "41": [0.25, 0.75, 0, 0],
-          "42": [0, 0.75, 0, 0],
-          "43": [0.13333, 0.63333, 0, 0],
-          "44": [0.19444, 0.15556, 0, 0],
-          "45": [0, 0.44444, 0, 0],
-          "46": [0, 0.15556, 0, 0],
-          "47": [0.25, 0.75, 0, 0],
-          "48": [0, 0.64444, 0, 0],
-          "49": [0, 0.64444, 0, 0],
-          "50": [0, 0.64444, 0, 0],
-          "51": [0, 0.64444, 0, 0],
-          "52": [0, 0.64444, 0, 0],
-          "53": [0, 0.64444, 0, 0],
-          "54": [0, 0.64444, 0, 0],
-          "55": [0, 0.64444, 0, 0],
-          "56": [0, 0.64444, 0, 0],
-          "57": [0, 0.64444, 0, 0],
-          "58": [0, 0.44444, 0, 0],
-          "59": [0.19444, 0.44444, 0, 0],
-          "60": [0.08556, 0.58556, 0, 0],
-          "61": [-0.10889, 0.39111, 0, 0],
-          "62": [0.08556, 0.58556, 0, 0],
-          "63": [0, 0.69444, 0, 0],
-          "64": [0, 0.69444, 0, 0],
-          "65": [0, 0.68611, 0, 0],
-          "66": [0, 0.68611, 0, 0],
-          "67": [0, 0.68611, 0, 0],
-          "68": [0, 0.68611, 0, 0],
-          "69": [0, 0.68611, 0, 0],
-          "70": [0, 0.68611, 0, 0],
-          "71": [0, 0.68611, 0, 0],
-          "72": [0, 0.68611, 0, 0],
-          "73": [0, 0.68611, 0, 0],
-          "74": [0, 0.68611, 0, 0],
-          "75": [0, 0.68611, 0, 0],
-          "76": [0, 0.68611, 0, 0],
-          "77": [0, 0.68611, 0, 0],
-          "78": [0, 0.68611, 0, 0],
-          "79": [0, 0.68611, 0, 0],
-          "80": [0, 0.68611, 0, 0],
-          "81": [0.19444, 0.68611, 0, 0],
-          "82": [0, 0.68611, 0, 0],
-          "83": [0, 0.68611, 0, 0],
-          "84": [0, 0.68611, 0, 0],
-          "85": [0, 0.68611, 0, 0],
-          "86": [0, 0.68611, 0.01597, 0],
-          "87": [0, 0.68611, 0.01597, 0],
-          "88": [0, 0.68611, 0, 0],
-          "89": [0, 0.68611, 0.02875, 0],
-          "90": [0, 0.68611, 0, 0],
-          "91": [0.25, 0.75, 0, 0],
-          "92": [0.25, 0.75, 0, 0],
-          "93": [0.25, 0.75, 0, 0],
-          "94": [0, 0.69444, 0, 0],
-          "95": [0.31, 0.13444, 0.03194, 0],
-          "96": [0, 0.69444, 0, 0],
-          "97": [0, 0.44444, 0, 0],
-          "98": [0, 0.69444, 0, 0],
-          "99": [0, 0.44444, 0, 0],
-          "100": [0, 0.69444, 0, 0],
-          "101": [0, 0.44444, 0, 0],
-          "102": [0, 0.69444, 0.10903, 0],
-          "103": [0.19444, 0.44444, 0.01597, 0],
-          "104": [0, 0.69444, 0, 0],
-          "105": [0, 0.69444, 0, 0],
-          "106": [0.19444, 0.69444, 0, 0],
-          "107": [0, 0.69444, 0, 0],
-          "108": [0, 0.69444, 0, 0],
-          "109": [0, 0.44444, 0, 0],
-          "110": [0, 0.44444, 0, 0],
-          "111": [0, 0.44444, 0, 0],
-          "112": [0.19444, 0.44444, 0, 0],
-          "113": [0.19444, 0.44444, 0, 0],
-          "114": [0, 0.44444, 0, 0],
-          "115": [0, 0.44444, 0, 0],
-          "116": [0, 0.63492, 0, 0],
-          "117": [0, 0.44444, 0, 0],
-          "118": [0, 0.44444, 0.01597, 0],
-          "119": [0, 0.44444, 0.01597, 0],
-          "120": [0, 0.44444, 0, 0],
-          "121": [0.19444, 0.44444, 0.01597, 0],
-          "122": [0, 0.44444, 0, 0],
-          "123": [0.25, 0.75, 0, 0],
-          "124": [0.25, 0.75, 0, 0],
-          "125": [0.25, 0.75, 0, 0],
-          "126": [0.35, 0.34444, 0, 0],
-          "168": [0, 0.69444, 0, 0],
-          "172": [0, 0.44444, 0, 0],
-          "175": [0, 0.59611, 0, 0],
-          "176": [0, 0.69444, 0, 0],
-          "177": [0.13333, 0.63333, 0, 0],
-          "180": [0, 0.69444, 0, 0],
-          "215": [0.13333, 0.63333, 0, 0],
-          "247": [0.13333, 0.63333, 0, 0],
-          "305": [0, 0.44444, 0, 0],
-          "567": [0.19444, 0.44444, 0, 0],
-          "710": [0, 0.69444, 0, 0],
-          "711": [0, 0.63194, 0, 0],
-          "713": [0, 0.59611, 0, 0],
-          "714": [0, 0.69444, 0, 0],
-          "715": [0, 0.69444, 0, 0],
-          "728": [0, 0.69444, 0, 0],
-          "729": [0, 0.69444, 0, 0],
-          "730": [0, 0.69444, 0, 0],
-          "732": [0, 0.69444, 0, 0],
-          "768": [0, 0.69444, 0, 0],
-          "769": [0, 0.69444, 0, 0],
-          "770": [0, 0.69444, 0, 0],
-          "771": [0, 0.69444, 0, 0],
-          "772": [0, 0.59611, 0, 0],
-          "774": [0, 0.69444, 0, 0],
-          "775": [0, 0.69444, 0, 0],
-          "776": [0, 0.69444, 0, 0],
-          "778": [0, 0.69444, 0, 0],
-          "779": [0, 0.69444, 0, 0],
-          "780": [0, 0.63194, 0, 0],
-          "824": [0.19444, 0.69444, 0, 0],
-          "915": [0, 0.68611, 0, 0],
-          "916": [0, 0.68611, 0, 0],
-          "920": [0, 0.68611, 0, 0],
-          "923": [0, 0.68611, 0, 0],
-          "926": [0, 0.68611, 0, 0],
-          "928": [0, 0.68611, 0, 0],
-          "931": [0, 0.68611, 0, 0],
-          "933": [0, 0.68611, 0, 0],
-          "934": [0, 0.68611, 0, 0],
-          "936": [0, 0.68611, 0, 0],
-          "937": [0, 0.68611, 0, 0],
-          "8211": [0, 0.44444, 0.03194, 0],
-          "8212": [0, 0.44444, 0.03194, 0],
-          "8216": [0, 0.69444, 0, 0],
-          "8217": [0, 0.69444, 0, 0],
-          "8220": [0, 0.69444, 0, 0],
-          "8221": [0, 0.69444, 0, 0],
-          "8224": [0.19444, 0.69444, 0, 0],
-          "8225": [0.19444, 0.69444, 0, 0],
-          "8242": [0, 0.55556, 0, 0],
-          "8407": [0, 0.72444, 0.15486, 0],
-          "8463": [0, 0.69444, 0, 0],
-          "8465": [0, 0.69444, 0, 0],
-          "8467": [0, 0.69444, 0, 0],
-          "8472": [0.19444, 0.44444, 0, 0],
-          "8476": [0, 0.69444, 0, 0],
-          "8501": [0, 0.69444, 0, 0],
-          "8592": [-0.10889, 0.39111, 0, 0],
-          "8593": [0.19444, 0.69444, 0, 0],
-          "8594": [-0.10889, 0.39111, 0, 0],
-          "8595": [0.19444, 0.69444, 0, 0],
-          "8596": [-0.10889, 0.39111, 0, 0],
-          "8597": [0.25, 0.75, 0, 0],
-          "8598": [0.19444, 0.69444, 0, 0],
-          "8599": [0.19444, 0.69444, 0, 0],
-          "8600": [0.19444, 0.69444, 0, 0],
-          "8601": [0.19444, 0.69444, 0, 0],
-          "8636": [-0.10889, 0.39111, 0, 0],
-          "8637": [-0.10889, 0.39111, 0, 0],
-          "8640": [-0.10889, 0.39111, 0, 0],
-          "8641": [-0.10889, 0.39111, 0, 0],
-          "8656": [-0.10889, 0.39111, 0, 0],
-          "8657": [0.19444, 0.69444, 0, 0],
-          "8658": [-0.10889, 0.39111, 0, 0],
-          "8659": [0.19444, 0.69444, 0, 0],
-          "8660": [-0.10889, 0.39111, 0, 0],
-          "8661": [0.25, 0.75, 0, 0],
-          "8704": [0, 0.69444, 0, 0],
-          "8706": [0, 0.69444, 0.06389, 0],
-          "8707": [0, 0.69444, 0, 0],
-          "8709": [0.05556, 0.75, 0, 0],
-          "8711": [0, 0.68611, 0, 0],
-          "8712": [0.08556, 0.58556, 0, 0],
-          "8715": [0.08556, 0.58556, 0, 0],
-          "8722": [0.13333, 0.63333, 0, 0],
-          "8723": [0.13333, 0.63333, 0, 0],
-          "8725": [0.25, 0.75, 0, 0],
-          "8726": [0.25, 0.75, 0, 0],
-          "8727": [-0.02778, 0.47222, 0, 0],
-          "8728": [-0.02639, 0.47361, 0, 0],
-          "8729": [-0.02639, 0.47361, 0, 0],
-          "8730": [0.18, 0.82, 0, 0],
-          "8733": [0, 0.44444, 0, 0],
-          "8734": [0, 0.44444, 0, 0],
-          "8736": [0, 0.69224, 0, 0],
-          "8739": [0.25, 0.75, 0, 0],
-          "8741": [0.25, 0.75, 0, 0],
-          "8743": [0, 0.55556, 0, 0],
-          "8744": [0, 0.55556, 0, 0],
-          "8745": [0, 0.55556, 0, 0],
-          "8746": [0, 0.55556, 0, 0],
-          "8747": [0.19444, 0.69444, 0.12778, 0],
-          "8764": [-0.10889, 0.39111, 0, 0],
-          "8768": [0.19444, 0.69444, 0, 0],
-          "8771": [0.00222, 0.50222, 0, 0],
-          "8776": [0.02444, 0.52444, 0, 0],
-          "8781": [0.00222, 0.50222, 0, 0],
-          "8801": [0.00222, 0.50222, 0, 0],
-          "8804": [0.19667, 0.69667, 0, 0],
-          "8805": [0.19667, 0.69667, 0, 0],
-          "8810": [0.08556, 0.58556, 0, 0],
-          "8811": [0.08556, 0.58556, 0, 0],
-          "8826": [0.08556, 0.58556, 0, 0],
-          "8827": [0.08556, 0.58556, 0, 0],
-          "8834": [0.08556, 0.58556, 0, 0],
-          "8835": [0.08556, 0.58556, 0, 0],
-          "8838": [0.19667, 0.69667, 0, 0],
-          "8839": [0.19667, 0.69667, 0, 0],
-          "8846": [0, 0.55556, 0, 0],
-          "8849": [0.19667, 0.69667, 0, 0],
-          "8850": [0.19667, 0.69667, 0, 0],
-          "8851": [0, 0.55556, 0, 0],
-          "8852": [0, 0.55556, 0, 0],
-          "8853": [0.13333, 0.63333, 0, 0],
-          "8854": [0.13333, 0.63333, 0, 0],
-          "8855": [0.13333, 0.63333, 0, 0],
-          "8856": [0.13333, 0.63333, 0, 0],
-          "8857": [0.13333, 0.63333, 0, 0],
-          "8866": [0, 0.69444, 0, 0],
-          "8867": [0, 0.69444, 0, 0],
-          "8868": [0, 0.69444, 0, 0],
-          "8869": [0, 0.69444, 0, 0],
-          "8900": [-0.02639, 0.47361, 0, 0],
-          "8901": [-0.02639, 0.47361, 0, 0],
-          "8902": [-0.02778, 0.47222, 0, 0],
-          "8968": [0.25, 0.75, 0, 0],
-          "8969": [0.25, 0.75, 0, 0],
-          "8970": [0.25, 0.75, 0, 0],
-          "8971": [0.25, 0.75, 0, 0],
-          "8994": [-0.13889, 0.36111, 0, 0],
-          "8995": [-0.13889, 0.36111, 0, 0],
-          "9651": [0.19444, 0.69444, 0, 0],
-          "9657": [-0.02778, 0.47222, 0, 0],
-          "9661": [0.19444, 0.69444, 0, 0],
-          "9667": [-0.02778, 0.47222, 0, 0],
-          "9711": [0.19444, 0.69444, 0, 0],
-          "9824": [0.12963, 0.69444, 0, 0],
-          "9825": [0.12963, 0.69444, 0, 0],
-          "9826": [0.12963, 0.69444, 0, 0],
-          "9827": [0.12963, 0.69444, 0, 0],
-          "9837": [0, 0.75, 0, 0],
-          "9838": [0.19444, 0.69444, 0, 0],
-          "9839": [0.19444, 0.69444, 0, 0],
-          "10216": [0.25, 0.75, 0, 0],
-          "10217": [0.25, 0.75, 0, 0],
-          "10815": [0, 0.68611, 0, 0],
-          "10927": [0.19667, 0.69667, 0, 0],
-          "10928": [0.19667, 0.69667, 0, 0]
-      },
-      "Main-Italic": {
-          "33": [0, 0.69444, 0.12417, 0],
-          "34": [0, 0.69444, 0.06961, 0],
-          "35": [0.19444, 0.69444, 0.06616, 0],
-          "37": [0.05556, 0.75, 0.13639, 0],
-          "38": [0, 0.69444, 0.09694, 0],
-          "39": [0, 0.69444, 0.12417, 0],
-          "40": [0.25, 0.75, 0.16194, 0],
-          "41": [0.25, 0.75, 0.03694, 0],
-          "42": [0, 0.75, 0.14917, 0],
-          "43": [0.05667, 0.56167, 0.03694, 0],
-          "44": [0.19444, 0.10556, 0, 0],
-          "45": [0, 0.43056, 0.02826, 0],
-          "46": [0, 0.10556, 0, 0],
-          "47": [0.25, 0.75, 0.16194, 0],
-          "48": [0, 0.64444, 0.13556, 0],
-          "49": [0, 0.64444, 0.13556, 0],
-          "50": [0, 0.64444, 0.13556, 0],
-          "51": [0, 0.64444, 0.13556, 0],
-          "52": [0.19444, 0.64444, 0.13556, 0],
-          "53": [0, 0.64444, 0.13556, 0],
-          "54": [0, 0.64444, 0.13556, 0],
-          "55": [0.19444, 0.64444, 0.13556, 0],
-          "56": [0, 0.64444, 0.13556, 0],
-          "57": [0, 0.64444, 0.13556, 0],
-          "58": [0, 0.43056, 0.0582, 0],
-          "59": [0.19444, 0.43056, 0.0582, 0],
-          "61": [-0.13313, 0.36687, 0.06616, 0],
-          "63": [0, 0.69444, 0.1225, 0],
-          "64": [0, 0.69444, 0.09597, 0],
-          "65": [0, 0.68333, 0, 0],
-          "66": [0, 0.68333, 0.10257, 0],
-          "67": [0, 0.68333, 0.14528, 0],
-          "68": [0, 0.68333, 0.09403, 0],
-          "69": [0, 0.68333, 0.12028, 0],
-          "70": [0, 0.68333, 0.13305, 0],
-          "71": [0, 0.68333, 0.08722, 0],
-          "72": [0, 0.68333, 0.16389, 0],
-          "73": [0, 0.68333, 0.15806, 0],
-          "74": [0, 0.68333, 0.14028, 0],
-          "75": [0, 0.68333, 0.14528, 0],
-          "76": [0, 0.68333, 0, 0],
-          "77": [0, 0.68333, 0.16389, 0],
-          "78": [0, 0.68333, 0.16389, 0],
-          "79": [0, 0.68333, 0.09403, 0],
-          "80": [0, 0.68333, 0.10257, 0],
-          "81": [0.19444, 0.68333, 0.09403, 0],
-          "82": [0, 0.68333, 0.03868, 0],
-          "83": [0, 0.68333, 0.11972, 0],
-          "84": [0, 0.68333, 0.13305, 0],
-          "85": [0, 0.68333, 0.16389, 0],
-          "86": [0, 0.68333, 0.18361, 0],
-          "87": [0, 0.68333, 0.18361, 0],
-          "88": [0, 0.68333, 0.15806, 0],
-          "89": [0, 0.68333, 0.19383, 0],
-          "90": [0, 0.68333, 0.14528, 0],
-          "91": [0.25, 0.75, 0.1875, 0],
-          "93": [0.25, 0.75, 0.10528, 0],
-          "94": [0, 0.69444, 0.06646, 0],
-          "95": [0.31, 0.12056, 0.09208, 0],
-          "97": [0, 0.43056, 0.07671, 0],
-          "98": [0, 0.69444, 0.06312, 0],
-          "99": [0, 0.43056, 0.05653, 0],
-          "100": [0, 0.69444, 0.10333, 0],
-          "101": [0, 0.43056, 0.07514, 0],
-          "102": [0.19444, 0.69444, 0.21194, 0],
-          "103": [0.19444, 0.43056, 0.08847, 0],
-          "104": [0, 0.69444, 0.07671, 0],
-          "105": [0, 0.65536, 0.1019, 0],
-          "106": [0.19444, 0.65536, 0.14467, 0],
-          "107": [0, 0.69444, 0.10764, 0],
-          "108": [0, 0.69444, 0.10333, 0],
-          "109": [0, 0.43056, 0.07671, 0],
-          "110": [0, 0.43056, 0.07671, 0],
-          "111": [0, 0.43056, 0.06312, 0],
-          "112": [0.19444, 0.43056, 0.06312, 0],
-          "113": [0.19444, 0.43056, 0.08847, 0],
-          "114": [0, 0.43056, 0.10764, 0],
-          "115": [0, 0.43056, 0.08208, 0],
-          "116": [0, 0.61508, 0.09486, 0],
-          "117": [0, 0.43056, 0.07671, 0],
-          "118": [0, 0.43056, 0.10764, 0],
-          "119": [0, 0.43056, 0.10764, 0],
-          "120": [0, 0.43056, 0.12042, 0],
-          "121": [0.19444, 0.43056, 0.08847, 0],
-          "122": [0, 0.43056, 0.12292, 0],
-          "126": [0.35, 0.31786, 0.11585, 0],
-          "163": [0, 0.69444, 0, 0],
-          "305": [0, 0.43056, 0, 0.02778],
-          "567": [0.19444, 0.43056, 0, 0.08334],
-          "768": [0, 0.69444, 0, 0],
-          "769": [0, 0.69444, 0.09694, 0],
-          "770": [0, 0.69444, 0.06646, 0],
-          "771": [0, 0.66786, 0.11585, 0],
-          "772": [0, 0.56167, 0.10333, 0],
-          "774": [0, 0.69444, 0.10806, 0],
-          "775": [0, 0.66786, 0.11752, 0],
-          "776": [0, 0.66786, 0.10474, 0],
-          "778": [0, 0.69444, 0, 0],
-          "779": [0, 0.69444, 0.1225, 0],
-          "780": [0, 0.62847, 0.08295, 0],
-          "915": [0, 0.68333, 0.13305, 0],
-          "916": [0, 0.68333, 0, 0],
-          "920": [0, 0.68333, 0.09403, 0],
-          "923": [0, 0.68333, 0, 0],
-          "926": [0, 0.68333, 0.15294, 0],
-          "928": [0, 0.68333, 0.16389, 0],
-          "931": [0, 0.68333, 0.12028, 0],
-          "933": [0, 0.68333, 0.11111, 0],
-          "934": [0, 0.68333, 0.05986, 0],
-          "936": [0, 0.68333, 0.11111, 0],
-          "937": [0, 0.68333, 0.10257, 0],
-          "8211": [0, 0.43056, 0.09208, 0],
-          "8212": [0, 0.43056, 0.09208, 0],
-          "8216": [0, 0.69444, 0.12417, 0],
-          "8217": [0, 0.69444, 0.12417, 0],
-          "8220": [0, 0.69444, 0.1685, 0],
-          "8221": [0, 0.69444, 0.06961, 0],
-          "8463": [0, 0.68889, 0, 0]
-      },
-      "Main-Regular": {
-          "32": [0, 0, 0, 0],
-          "33": [0, 0.69444, 0, 0],
-          "34": [0, 0.69444, 0, 0],
-          "35": [0.19444, 0.69444, 0, 0],
-          "36": [0.05556, 0.75, 0, 0],
-          "37": [0.05556, 0.75, 0, 0],
-          "38": [0, 0.69444, 0, 0],
-          "39": [0, 0.69444, 0, 0],
-          "40": [0.25, 0.75, 0, 0],
-          "41": [0.25, 0.75, 0, 0],
-          "42": [0, 0.75, 0, 0],
-          "43": [0.08333, 0.58333, 0, 0],
-          "44": [0.19444, 0.10556, 0, 0],
-          "45": [0, 0.43056, 0, 0],
-          "46": [0, 0.10556, 0, 0],
-          "47": [0.25, 0.75, 0, 0],
-          "48": [0, 0.64444, 0, 0],
-          "49": [0, 0.64444, 0, 0],
-          "50": [0, 0.64444, 0, 0],
-          "51": [0, 0.64444, 0, 0],
-          "52": [0, 0.64444, 0, 0],
-          "53": [0, 0.64444, 0, 0],
-          "54": [0, 0.64444, 0, 0],
-          "55": [0, 0.64444, 0, 0],
-          "56": [0, 0.64444, 0, 0],
-          "57": [0, 0.64444, 0, 0],
-          "58": [0, 0.43056, 0, 0],
-          "59": [0.19444, 0.43056, 0, 0],
-          "60": [0.0391, 0.5391, 0, 0],
-          "61": [-0.13313, 0.36687, 0, 0],
-          "62": [0.0391, 0.5391, 0, 0],
-          "63": [0, 0.69444, 0, 0],
-          "64": [0, 0.69444, 0, 0],
-          "65": [0, 0.68333, 0, 0],
-          "66": [0, 0.68333, 0, 0],
-          "67": [0, 0.68333, 0, 0],
-          "68": [0, 0.68333, 0, 0],
-          "69": [0, 0.68333, 0, 0],
-          "70": [0, 0.68333, 0, 0],
-          "71": [0, 0.68333, 0, 0],
-          "72": [0, 0.68333, 0, 0],
-          "73": [0, 0.68333, 0, 0],
-          "74": [0, 0.68333, 0, 0],
-          "75": [0, 0.68333, 0, 0],
-          "76": [0, 0.68333, 0, 0],
-          "77": [0, 0.68333, 0, 0],
-          "78": [0, 0.68333, 0, 0],
-          "79": [0, 0.68333, 0, 0],
-          "80": [0, 0.68333, 0, 0],
-          "81": [0.19444, 0.68333, 0, 0],
-          "82": [0, 0.68333, 0, 0],
-          "83": [0, 0.68333, 0, 0],
-          "84": [0, 0.68333, 0, 0],
-          "85": [0, 0.68333, 0, 0],
-          "86": [0, 0.68333, 0.01389, 0],
-          "87": [0, 0.68333, 0.01389, 0],
-          "88": [0, 0.68333, 0, 0],
-          "89": [0, 0.68333, 0.025, 0],
-          "90": [0, 0.68333, 0, 0],
-          "91": [0.25, 0.75, 0, 0],
-          "92": [0.25, 0.75, 0, 0],
-          "93": [0.25, 0.75, 0, 0],
-          "94": [0, 0.69444, 0, 0],
-          "95": [0.31, 0.12056, 0.02778, 0],
-          "96": [0, 0.69444, 0, 0],
-          "97": [0, 0.43056, 0, 0],
-          "98": [0, 0.69444, 0, 0],
-          "99": [0, 0.43056, 0, 0],
-          "100": [0, 0.69444, 0, 0],
-          "101": [0, 0.43056, 0, 0],
-          "102": [0, 0.69444, 0.07778, 0],
-          "103": [0.19444, 0.43056, 0.01389, 0],
-          "104": [0, 0.69444, 0, 0],
-          "105": [0, 0.66786, 0, 0],
-          "106": [0.19444, 0.66786, 0, 0],
-          "107": [0, 0.69444, 0, 0],
-          "108": [0, 0.69444, 0, 0],
-          "109": [0, 0.43056, 0, 0],
-          "110": [0, 0.43056, 0, 0],
-          "111": [0, 0.43056, 0, 0],
-          "112": [0.19444, 0.43056, 0, 0],
-          "113": [0.19444, 0.43056, 0, 0],
-          "114": [0, 0.43056, 0, 0],
-          "115": [0, 0.43056, 0, 0],
-          "116": [0, 0.61508, 0, 0],
-          "117": [0, 0.43056, 0, 0],
-          "118": [0, 0.43056, 0.01389, 0],
-          "119": [0, 0.43056, 0.01389, 0],
-          "120": [0, 0.43056, 0, 0],
-          "121": [0.19444, 0.43056, 0.01389, 0],
-          "122": [0, 0.43056, 0, 0],
-          "123": [0.25, 0.75, 0, 0],
-          "124": [0.25, 0.75, 0, 0],
-          "125": [0.25, 0.75, 0, 0],
-          "126": [0.35, 0.31786, 0, 0],
-          "160": [0, 0, 0, 0],
-          "168": [0, 0.66786, 0, 0],
-          "172": [0, 0.43056, 0, 0],
-          "175": [0, 0.56778, 0, 0],
-          "176": [0, 0.69444, 0, 0],
-          "177": [0.08333, 0.58333, 0, 0],
-          "180": [0, 0.69444, 0, 0],
-          "215": [0.08333, 0.58333, 0, 0],
-          "247": [0.08333, 0.58333, 0, 0],
-          "305": [0, 0.43056, 0, 0],
-          "567": [0.19444, 0.43056, 0, 0],
-          "710": [0, 0.69444, 0, 0],
-          "711": [0, 0.62847, 0, 0],
-          "713": [0, 0.56778, 0, 0],
-          "714": [0, 0.69444, 0, 0],
-          "715": [0, 0.69444, 0, 0],
-          "728": [0, 0.69444, 0, 0],
-          "729": [0, 0.66786, 0, 0],
-          "730": [0, 0.69444, 0, 0],
-          "732": [0, 0.66786, 0, 0],
-          "768": [0, 0.69444, 0, 0],
-          "769": [0, 0.69444, 0, 0],
-          "770": [0, 0.69444, 0, 0],
-          "771": [0, 0.66786, 0, 0],
-          "772": [0, 0.56778, 0, 0],
-          "774": [0, 0.69444, 0, 0],
-          "775": [0, 0.66786, 0, 0],
-          "776": [0, 0.66786, 0, 0],
-          "778": [0, 0.69444, 0, 0],
-          "779": [0, 0.69444, 0, 0],
-          "780": [0, 0.62847, 0, 0],
-          "824": [0.19444, 0.69444, 0, 0],
-          "915": [0, 0.68333, 0, 0],
-          "916": [0, 0.68333, 0, 0],
-          "920": [0, 0.68333, 0, 0],
-          "923": [0, 0.68333, 0, 0],
-          "926": [0, 0.68333, 0, 0],
-          "928": [0, 0.68333, 0, 0],
-          "931": [0, 0.68333, 0, 0],
-          "933": [0, 0.68333, 0, 0],
-          "934": [0, 0.68333, 0, 0],
-          "936": [0, 0.68333, 0, 0],
-          "937": [0, 0.68333, 0, 0],
-          "8211": [0, 0.43056, 0.02778, 0],
-          "8212": [0, 0.43056, 0.02778, 0],
-          "8216": [0, 0.69444, 0, 0],
-          "8217": [0, 0.69444, 0, 0],
-          "8220": [0, 0.69444, 0, 0],
-          "8221": [0, 0.69444, 0, 0],
-          "8224": [0.19444, 0.69444, 0, 0],
-          "8225": [0.19444, 0.69444, 0, 0],
-          "8230": [0, 0.12, 0, 0],
-          "8242": [0, 0.55556, 0, 0],
-          "8407": [0, 0.71444, 0.15382, 0],
-          "8463": [0, 0.68889, 0, 0],
-          "8465": [0, 0.69444, 0, 0],
-          "8467": [0, 0.69444, 0, 0.11111],
-          "8472": [0.19444, 0.43056, 0, 0.11111],
-          "8476": [0, 0.69444, 0, 0],
-          "8501": [0, 0.69444, 0, 0],
-          "8592": [-0.13313, 0.36687, 0, 0],
-          "8593": [0.19444, 0.69444, 0, 0],
-          "8594": [-0.13313, 0.36687, 0, 0],
-          "8595": [0.19444, 0.69444, 0, 0],
-          "8596": [-0.13313, 0.36687, 0, 0],
-          "8597": [0.25, 0.75, 0, 0],
-          "8598": [0.19444, 0.69444, 0, 0],
-          "8599": [0.19444, 0.69444, 0, 0],
-          "8600": [0.19444, 0.69444, 0, 0],
-          "8601": [0.19444, 0.69444, 0, 0],
-          "8614": [0.011, 0.511, 0, 0],
-          "8617": [0.011, 0.511, 0, 0],
-          "8618": [0.011, 0.511, 0, 0],
-          "8636": [-0.13313, 0.36687, 0, 0],
-          "8637": [-0.13313, 0.36687, 0, 0],
-          "8640": [-0.13313, 0.36687, 0, 0],
-          "8641": [-0.13313, 0.36687, 0, 0],
-          "8652": [0.011, 0.671, 0, 0],
-          "8656": [-0.13313, 0.36687, 0, 0],
-          "8657": [0.19444, 0.69444, 0, 0],
-          "8658": [-0.13313, 0.36687, 0, 0],
-          "8659": [0.19444, 0.69444, 0, 0],
-          "8660": [-0.13313, 0.36687, 0, 0],
-          "8661": [0.25, 0.75, 0, 0],
-          "8704": [0, 0.69444, 0, 0],
-          "8706": [0, 0.69444, 0.05556, 0.08334],
-          "8707": [0, 0.69444, 0, 0],
-          "8709": [0.05556, 0.75, 0, 0],
-          "8711": [0, 0.68333, 0, 0],
-          "8712": [0.0391, 0.5391, 0, 0],
-          "8715": [0.0391, 0.5391, 0, 0],
-          "8722": [0.08333, 0.58333, 0, 0],
-          "8723": [0.08333, 0.58333, 0, 0],
-          "8725": [0.25, 0.75, 0, 0],
-          "8726": [0.25, 0.75, 0, 0],
-          "8727": [-0.03472, 0.46528, 0, 0],
-          "8728": [-0.05555, 0.44445, 0, 0],
-          "8729": [-0.05555, 0.44445, 0, 0],
-          "8730": [0.2, 0.8, 0, 0],
-          "8733": [0, 0.43056, 0, 0],
-          "8734": [0, 0.43056, 0, 0],
-          "8736": [0, 0.69224, 0, 0],
-          "8739": [0.25, 0.75, 0, 0],
-          "8741": [0.25, 0.75, 0, 0],
-          "8743": [0, 0.55556, 0, 0],
-          "8744": [0, 0.55556, 0, 0],
-          "8745": [0, 0.55556, 0, 0],
-          "8746": [0, 0.55556, 0, 0],
-          "8747": [0.19444, 0.69444, 0.11111, 0],
-          "8764": [-0.13313, 0.36687, 0, 0],
-          "8768": [0.19444, 0.69444, 0, 0],
-          "8771": [-0.03625, 0.46375, 0, 0],
-          "8773": [-0.022, 0.589, 0, 0],
-          "8776": [-0.01688, 0.48312, 0, 0],
-          "8781": [-0.03625, 0.46375, 0, 0],
-          "8784": [-0.133, 0.67, 0, 0],
-          "8800": [0.215, 0.716, 0, 0],
-          "8801": [-0.03625, 0.46375, 0, 0],
-          "8804": [0.13597, 0.63597, 0, 0],
-          "8805": [0.13597, 0.63597, 0, 0],
-          "8810": [0.0391, 0.5391, 0, 0],
-          "8811": [0.0391, 0.5391, 0, 0],
-          "8826": [0.0391, 0.5391, 0, 0],
-          "8827": [0.0391, 0.5391, 0, 0],
-          "8834": [0.0391, 0.5391, 0, 0],
-          "8835": [0.0391, 0.5391, 0, 0],
-          "8838": [0.13597, 0.63597, 0, 0],
-          "8839": [0.13597, 0.63597, 0, 0],
-          "8846": [0, 0.55556, 0, 0],
-          "8849": [0.13597, 0.63597, 0, 0],
-          "8850": [0.13597, 0.63597, 0, 0],
-          "8851": [0, 0.55556, 0, 0],
-          "8852": [0, 0.55556, 0, 0],
-          "8853": [0.08333, 0.58333, 0, 0],
-          "8854": [0.08333, 0.58333, 0, 0],
-          "8855": [0.08333, 0.58333, 0, 0],
-          "8856": [0.08333, 0.58333, 0, 0],
-          "8857": [0.08333, 0.58333, 0, 0],
-          "8866": [0, 0.69444, 0, 0],
-          "8867": [0, 0.69444, 0, 0],
-          "8868": [0, 0.69444, 0, 0],
-          "8869": [0, 0.69444, 0, 0],
-          "8872": [0.249, 0.75, 0, 0],
-          "8900": [-0.05555, 0.44445, 0, 0],
-          "8901": [-0.05555, 0.44445, 0, 0],
-          "8902": [-0.03472, 0.46528, 0, 0],
-          "8904": [0.005, 0.505, 0, 0],
-          "8942": [0.03, 0.9, 0, 0],
-          "8943": [-0.19, 0.31, 0, 0],
-          "8945": [-0.1, 0.82, 0, 0],
-          "8968": [0.25, 0.75, 0, 0],
-          "8969": [0.25, 0.75, 0, 0],
-          "8970": [0.25, 0.75, 0, 0],
-          "8971": [0.25, 0.75, 0, 0],
-          "8994": [-0.14236, 0.35764, 0, 0],
-          "8995": [-0.14236, 0.35764, 0, 0],
-          "9136": [0.244, 0.744, 0, 0],
-          "9137": [0.244, 0.744, 0, 0],
-          "9651": [0.19444, 0.69444, 0, 0],
-          "9657": [-0.03472, 0.46528, 0, 0],
-          "9661": [0.19444, 0.69444, 0, 0],
-          "9667": [-0.03472, 0.46528, 0, 0],
-          "9711": [0.19444, 0.69444, 0, 0],
-          "9824": [0.12963, 0.69444, 0, 0],
-          "9825": [0.12963, 0.69444, 0, 0],
-          "9826": [0.12963, 0.69444, 0, 0],
-          "9827": [0.12963, 0.69444, 0, 0],
-          "9837": [0, 0.75, 0, 0],
-          "9838": [0.19444, 0.69444, 0, 0],
-          "9839": [0.19444, 0.69444, 0, 0],
-          "10216": [0.25, 0.75, 0, 0],
-          "10217": [0.25, 0.75, 0, 0],
-          "10222": [0.244, 0.744, 0, 0],
-          "10223": [0.244, 0.744, 0, 0],
-          "10229": [0.011, 0.511, 0, 0],
-          "10230": [0.011, 0.511, 0, 0],
-          "10231": [0.011, 0.511, 0, 0],
-          "10232": [0.024, 0.525, 0, 0],
-          "10233": [0.024, 0.525, 0, 0],
-          "10234": [0.024, 0.525, 0, 0],
-          "10236": [0.011, 0.511, 0, 0],
-          "10815": [0, 0.68333, 0, 0],
-          "10927": [0.13597, 0.63597, 0, 0],
-          "10928": [0.13597, 0.63597, 0, 0]
-      },
-      "Math-BoldItalic": {
-          "47": [0.19444, 0.69444, 0, 0],
-          "65": [0, 0.68611, 0, 0],
-          "66": [0, 0.68611, 0.04835, 0],
-          "67": [0, 0.68611, 0.06979, 0],
-          "68": [0, 0.68611, 0.03194, 0],
-          "69": [0, 0.68611, 0.05451, 0],
-          "70": [0, 0.68611, 0.15972, 0],
-          "71": [0, 0.68611, 0, 0],
-          "72": [0, 0.68611, 0.08229, 0],
-          "73": [0, 0.68611, 0.07778, 0],
-          "74": [0, 0.68611, 0.10069, 0],
-          "75": [0, 0.68611, 0.06979, 0],
-          "76": [0, 0.68611, 0, 0],
-          "77": [0, 0.68611, 0.11424, 0],
-          "78": [0, 0.68611, 0.11424, 0],
-          "79": [0, 0.68611, 0.03194, 0],
-          "80": [0, 0.68611, 0.15972, 0],
-          "81": [0.19444, 0.68611, 0, 0],
-          "82": [0, 0.68611, 0.00421, 0],
-          "83": [0, 0.68611, 0.05382, 0],
-          "84": [0, 0.68611, 0.15972, 0],
-          "85": [0, 0.68611, 0.11424, 0],
-          "86": [0, 0.68611, 0.25555, 0],
-          "87": [0, 0.68611, 0.15972, 0],
-          "88": [0, 0.68611, 0.07778, 0],
-          "89": [0, 0.68611, 0.25555, 0],
-          "90": [0, 0.68611, 0.06979, 0],
-          "97": [0, 0.44444, 0, 0],
-          "98": [0, 0.69444, 0, 0],
-          "99": [0, 0.44444, 0, 0],
-          "100": [0, 0.69444, 0, 0],
-          "101": [0, 0.44444, 0, 0],
-          "102": [0.19444, 0.69444, 0.11042, 0],
-          "103": [0.19444, 0.44444, 0.03704, 0],
-          "104": [0, 0.69444, 0, 0],
-          "105": [0, 0.69326, 0, 0],
-          "106": [0.19444, 0.69326, 0.0622, 0],
-          "107": [0, 0.69444, 0.01852, 0],
-          "108": [0, 0.69444, 0.0088, 0],
-          "109": [0, 0.44444, 0, 0],
-          "110": [0, 0.44444, 0, 0],
-          "111": [0, 0.44444, 0, 0],
-          "112": [0.19444, 0.44444, 0, 0],
-          "113": [0.19444, 0.44444, 0.03704, 0],
-          "114": [0, 0.44444, 0.03194, 0],
-          "115": [0, 0.44444, 0, 0],
-          "116": [0, 0.63492, 0, 0],
-          "117": [0, 0.44444, 0, 0],
-          "118": [0, 0.44444, 0.03704, 0],
-          "119": [0, 0.44444, 0.02778, 0],
-          "120": [0, 0.44444, 0, 0],
-          "121": [0.19444, 0.44444, 0.03704, 0],
-          "122": [0, 0.44444, 0.04213, 0],
-          "915": [0, 0.68611, 0.15972, 0],
-          "916": [0, 0.68611, 0, 0],
-          "920": [0, 0.68611, 0.03194, 0],
-          "923": [0, 0.68611, 0, 0],
-          "926": [0, 0.68611, 0.07458, 0],
-          "928": [0, 0.68611, 0.08229, 0],
-          "931": [0, 0.68611, 0.05451, 0],
-          "933": [0, 0.68611, 0.15972, 0],
-          "934": [0, 0.68611, 0, 0],
-          "936": [0, 0.68611, 0.11653, 0],
-          "937": [0, 0.68611, 0.04835, 0],
-          "945": [0, 0.44444, 0, 0],
-          "946": [0.19444, 0.69444, 0.03403, 0],
-          "947": [0.19444, 0.44444, 0.06389, 0],
-          "948": [0, 0.69444, 0.03819, 0],
-          "949": [0, 0.44444, 0, 0],
-          "950": [0.19444, 0.69444, 0.06215, 0],
-          "951": [0.19444, 0.44444, 0.03704, 0],
-          "952": [0, 0.69444, 0.03194, 0],
-          "953": [0, 0.44444, 0, 0],
-          "954": [0, 0.44444, 0, 0],
-          "955": [0, 0.69444, 0, 0],
-          "956": [0.19444, 0.44444, 0, 0],
-          "957": [0, 0.44444, 0.06898, 0],
-          "958": [0.19444, 0.69444, 0.03021, 0],
-          "959": [0, 0.44444, 0, 0],
-          "960": [0, 0.44444, 0.03704, 0],
-          "961": [0.19444, 0.44444, 0, 0],
-          "962": [0.09722, 0.44444, 0.07917, 0],
-          "963": [0, 0.44444, 0.03704, 0],
-          "964": [0, 0.44444, 0.13472, 0],
-          "965": [0, 0.44444, 0.03704, 0],
-          "966": [0.19444, 0.44444, 0, 0],
-          "967": [0.19444, 0.44444, 0, 0],
-          "968": [0.19444, 0.69444, 0.03704, 0],
-          "969": [0, 0.44444, 0.03704, 0],
-          "977": [0, 0.69444, 0, 0],
-          "981": [0.19444, 0.69444, 0, 0],
-          "982": [0, 0.44444, 0.03194, 0],
-          "1009": [0.19444, 0.44444, 0, 0],
-          "1013": [0, 0.44444, 0, 0]
-      },
-      "Math-Italic": {
-          "47": [0.19444, 0.69444, 0, 0],
-          "65": [0, 0.68333, 0, 0.13889],
-          "66": [0, 0.68333, 0.05017, 0.08334],
-          "67": [0, 0.68333, 0.07153, 0.08334],
-          "68": [0, 0.68333, 0.02778, 0.05556],
-          "69": [0, 0.68333, 0.05764, 0.08334],
-          "70": [0, 0.68333, 0.13889, 0.08334],
-          "71": [0, 0.68333, 0, 0.08334],
-          "72": [0, 0.68333, 0.08125, 0.05556],
-          "73": [0, 0.68333, 0.07847, 0.11111],
-          "74": [0, 0.68333, 0.09618, 0.16667],
-          "75": [0, 0.68333, 0.07153, 0.05556],
-          "76": [0, 0.68333, 0, 0.02778],
-          "77": [0, 0.68333, 0.10903, 0.08334],
-          "78": [0, 0.68333, 0.10903, 0.08334],
-          "79": [0, 0.68333, 0.02778, 0.08334],
-          "80": [0, 0.68333, 0.13889, 0.08334],
-          "81": [0.19444, 0.68333, 0, 0.08334],
-          "82": [0, 0.68333, 0.00773, 0.08334],
-          "83": [0, 0.68333, 0.05764, 0.08334],
-          "84": [0, 0.68333, 0.13889, 0.08334],
-          "85": [0, 0.68333, 0.10903, 0.02778],
-          "86": [0, 0.68333, 0.22222, 0],
-          "87": [0, 0.68333, 0.13889, 0],
-          "88": [0, 0.68333, 0.07847, 0.08334],
-          "89": [0, 0.68333, 0.22222, 0],
-          "90": [0, 0.68333, 0.07153, 0.08334],
-          "97": [0, 0.43056, 0, 0],
-          "98": [0, 0.69444, 0, 0],
-          "99": [0, 0.43056, 0, 0.05556],
-          "100": [0, 0.69444, 0, 0.16667],
-          "101": [0, 0.43056, 0, 0.05556],
-          "102": [0.19444, 0.69444, 0.10764, 0.16667],
-          "103": [0.19444, 0.43056, 0.03588, 0.02778],
-          "104": [0, 0.69444, 0, 0],
-          "105": [0, 0.65952, 0, 0],
-          "106": [0.19444, 0.65952, 0.05724, 0],
-          "107": [0, 0.69444, 0.03148, 0],
-          "108": [0, 0.69444, 0.01968, 0.08334],
-          "109": [0, 0.43056, 0, 0],
-          "110": [0, 0.43056, 0, 0],
-          "111": [0, 0.43056, 0, 0.05556],
-          "112": [0.19444, 0.43056, 0, 0.08334],
-          "113": [0.19444, 0.43056, 0.03588, 0.08334],
-          "114": [0, 0.43056, 0.02778, 0.05556],
-          "115": [0, 0.43056, 0, 0.05556],
-          "116": [0, 0.61508, 0, 0.08334],
-          "117": [0, 0.43056, 0, 0.02778],
-          "118": [0, 0.43056, 0.03588, 0.02778],
-          "119": [0, 0.43056, 0.02691, 0.08334],
-          "120": [0, 0.43056, 0, 0.02778],
-          "121": [0.19444, 0.43056, 0.03588, 0.05556],
-          "122": [0, 0.43056, 0.04398, 0.05556],
-          "915": [0, 0.68333, 0.13889, 0.08334],
-          "916": [0, 0.68333, 0, 0.16667],
-          "920": [0, 0.68333, 0.02778, 0.08334],
-          "923": [0, 0.68333, 0, 0.16667],
-          "926": [0, 0.68333, 0.07569, 0.08334],
-          "928": [0, 0.68333, 0.08125, 0.05556],
-          "931": [0, 0.68333, 0.05764, 0.08334],
-          "933": [0, 0.68333, 0.13889, 0.05556],
-          "934": [0, 0.68333, 0, 0.08334],
-          "936": [0, 0.68333, 0.11, 0.05556],
-          "937": [0, 0.68333, 0.05017, 0.08334],
-          "945": [0, 0.43056, 0.0037, 0.02778],
-          "946": [0.19444, 0.69444, 0.05278, 0.08334],
-          "947": [0.19444, 0.43056, 0.05556, 0],
-          "948": [0, 0.69444, 0.03785, 0.05556],
-          "949": [0, 0.43056, 0, 0.08334],
-          "950": [0.19444, 0.69444, 0.07378, 0.08334],
-          "951": [0.19444, 0.43056, 0.03588, 0.05556],
-          "952": [0, 0.69444, 0.02778, 0.08334],
-          "953": [0, 0.43056, 0, 0.05556],
-          "954": [0, 0.43056, 0, 0],
-          "955": [0, 0.69444, 0, 0],
-          "956": [0.19444, 0.43056, 0, 0.02778],
-          "957": [0, 0.43056, 0.06366, 0.02778],
-          "958": [0.19444, 0.69444, 0.04601, 0.11111],
-          "959": [0, 0.43056, 0, 0.05556],
-          "960": [0, 0.43056, 0.03588, 0],
-          "961": [0.19444, 0.43056, 0, 0.08334],
-          "962": [0.09722, 0.43056, 0.07986, 0.08334],
-          "963": [0, 0.43056, 0.03588, 0],
-          "964": [0, 0.43056, 0.1132, 0.02778],
-          "965": [0, 0.43056, 0.03588, 0.02778],
-          "966": [0.19444, 0.43056, 0, 0.08334],
-          "967": [0.19444, 0.43056, 0, 0.05556],
-          "968": [0.19444, 0.69444, 0.03588, 0.11111],
-          "969": [0, 0.43056, 0.03588, 0],
-          "977": [0, 0.69444, 0, 0.08334],
-          "981": [0.19444, 0.69444, 0, 0.08334],
-          "982": [0, 0.43056, 0.02778, 0],
-          "1009": [0.19444, 0.43056, 0, 0.08334],
-          "1013": [0, 0.43056, 0, 0.05556]
-      },
-      "Math-Regular": {
-          "65": [0, 0.68333, 0, 0.13889],
-          "66": [0, 0.68333, 0.05017, 0.08334],
-          "67": [0, 0.68333, 0.07153, 0.08334],
-          "68": [0, 0.68333, 0.02778, 0.05556],
-          "69": [0, 0.68333, 0.05764, 0.08334],
-          "70": [0, 0.68333, 0.13889, 0.08334],
-          "71": [0, 0.68333, 0, 0.08334],
-          "72": [0, 0.68333, 0.08125, 0.05556],
-          "73": [0, 0.68333, 0.07847, 0.11111],
-          "74": [0, 0.68333, 0.09618, 0.16667],
-          "75": [0, 0.68333, 0.07153, 0.05556],
-          "76": [0, 0.68333, 0, 0.02778],
-          "77": [0, 0.68333, 0.10903, 0.08334],
-          "78": [0, 0.68333, 0.10903, 0.08334],
-          "79": [0, 0.68333, 0.02778, 0.08334],
-          "80": [0, 0.68333, 0.13889, 0.08334],
-          "81": [0.19444, 0.68333, 0, 0.08334],
-          "82": [0, 0.68333, 0.00773, 0.08334],
-          "83": [0, 0.68333, 0.05764, 0.08334],
-          "84": [0, 0.68333, 0.13889, 0.08334],
-          "85": [0, 0.68333, 0.10903, 0.02778],
-          "86": [0, 0.68333, 0.22222, 0],
-          "87": [0, 0.68333, 0.13889, 0],
-          "88": [0, 0.68333, 0.07847, 0.08334],
-          "89": [0, 0.68333, 0.22222, 0],
-          "90": [0, 0.68333, 0.07153, 0.08334],
-          "97": [0, 0.43056, 0, 0],
-          "98": [0, 0.69444, 0, 0],
-          "99": [0, 0.43056, 0, 0.05556],
-          "100": [0, 0.69444, 0, 0.16667],
-          "101": [0, 0.43056, 0, 0.05556],
-          "102": [0.19444, 0.69444, 0.10764, 0.16667],
-          "103": [0.19444, 0.43056, 0.03588, 0.02778],
-          "104": [0, 0.69444, 0, 0],
-          "105": [0, 0.65952, 0, 0],
-          "106": [0.19444, 0.65952, 0.05724, 0],
-          "107": [0, 0.69444, 0.03148, 0],
-          "108": [0, 0.69444, 0.01968, 0.08334],
-          "109": [0, 0.43056, 0, 0],
-          "110": [0, 0.43056, 0, 0],
-          "111": [0, 0.43056, 0, 0.05556],
-          "112": [0.19444, 0.43056, 0, 0.08334],
-          "113": [0.19444, 0.43056, 0.03588, 0.08334],
-          "114": [0, 0.43056, 0.02778, 0.05556],
-          "115": [0, 0.43056, 0, 0.05556],
-          "116": [0, 0.61508, 0, 0.08334],
-          "117": [0, 0.43056, 0, 0.02778],
-          "118": [0, 0.43056, 0.03588, 0.02778],
-          "119": [0, 0.43056, 0.02691, 0.08334],
-          "120": [0, 0.43056, 0, 0.02778],
-          "121": [0.19444, 0.43056, 0.03588, 0.05556],
-          "122": [0, 0.43056, 0.04398, 0.05556],
-          "915": [0, 0.68333, 0.13889, 0.08334],
-          "916": [0, 0.68333, 0, 0.16667],
-          "920": [0, 0.68333, 0.02778, 0.08334],
-          "923": [0, 0.68333, 0, 0.16667],
-          "926": [0, 0.68333, 0.07569, 0.08334],
-          "928": [0, 0.68333, 0.08125, 0.05556],
-          "931": [0, 0.68333, 0.05764, 0.08334],
-          "933": [0, 0.68333, 0.13889, 0.05556],
-          "934": [0, 0.68333, 0, 0.08334],
-          "936": [0, 0.68333, 0.11, 0.05556],
-          "937": [0, 0.68333, 0.05017, 0.08334],
-          "945": [0, 0.43056, 0.0037, 0.02778],
-          "946": [0.19444, 0.69444, 0.05278, 0.08334],
-          "947": [0.19444, 0.43056, 0.05556, 0],
-          "948": [0, 0.69444, 0.03785, 0.05556],
-          "949": [0, 0.43056, 0, 0.08334],
-          "950": [0.19444, 0.69444, 0.07378, 0.08334],
-          "951": [0.19444, 0.43056, 0.03588, 0.05556],
-          "952": [0, 0.69444, 0.02778, 0.08334],
-          "953": [0, 0.43056, 0, 0.05556],
-          "954": [0, 0.43056, 0, 0],
-          "955": [0, 0.69444, 0, 0],
-          "956": [0.19444, 0.43056, 0, 0.02778],
-          "957": [0, 0.43056, 0.06366, 0.02778],
-          "958": [0.19444, 0.69444, 0.04601, 0.11111],
-          "959": [0, 0.43056, 0, 0.05556],
-          "960": [0, 0.43056, 0.03588, 0],
-          "961": [0.19444, 0.43056, 0, 0.08334],
-          "962": [0.09722, 0.43056, 0.07986, 0.08334],
-          "963": [0, 0.43056, 0.03588, 0],
-          "964": [0, 0.43056, 0.1132, 0.02778],
-          "965": [0, 0.43056, 0.03588, 0.02778],
-          "966": [0.19444, 0.43056, 0, 0.08334],
-          "967": [0.19444, 0.43056, 0, 0.05556],
-          "968": [0.19444, 0.69444, 0.03588, 0.11111],
-          "969": [0, 0.43056, 0.03588, 0],
-          "977": [0, 0.69444, 0, 0.08334],
-          "981": [0.19444, 0.69444, 0, 0.08334],
-          "982": [0, 0.43056, 0.02778, 0],
-          "1009": [0.19444, 0.43056, 0, 0.08334],
-          "1013": [0, 0.43056, 0, 0.05556]
-      },
-      "SansSerif-Regular": {
-          "33": [0, 0.69444, 0, 0],
-          "34": [0, 0.69444, 0, 0],
-          "35": [0.19444, 0.69444, 0, 0],
-          "36": [0.05556, 0.75, 0, 0],
-          "37": [0.05556, 0.75, 0, 0],
-          "38": [0, 0.69444, 0, 0],
-          "39": [0, 0.69444, 0, 0],
-          "40": [0.25, 0.75, 0, 0],
-          "41": [0.25, 0.75, 0, 0],
-          "42": [0, 0.75, 0, 0],
-          "43": [0.08333, 0.58333, 0, 0],
-          "44": [0.125, 0.08333, 0, 0],
-          "45": [0, 0.44444, 0, 0],
-          "46": [0, 0.08333, 0, 0],
-          "47": [0.25, 0.75, 0, 0],
-          "48": [0, 0.65556, 0, 0],
-          "49": [0, 0.65556, 0, 0],
-          "50": [0, 0.65556, 0, 0],
-          "51": [0, 0.65556, 0, 0],
-          "52": [0, 0.65556, 0, 0],
-          "53": [0, 0.65556, 0, 0],
-          "54": [0, 0.65556, 0, 0],
-          "55": [0, 0.65556, 0, 0],
-          "56": [0, 0.65556, 0, 0],
-          "57": [0, 0.65556, 0, 0],
-          "58": [0, 0.44444, 0, 0],
-          "59": [0.125, 0.44444, 0, 0],
-          "61": [-0.13, 0.37, 0, 0],
-          "63": [0, 0.69444, 0, 0],
-          "64": [0, 0.69444, 0, 0],
-          "65": [0, 0.69444, 0, 0],
-          "66": [0, 0.69444, 0, 0],
-          "67": [0, 0.69444, 0, 0],
-          "68": [0, 0.69444, 0, 0],
-          "69": [0, 0.69444, 0, 0],
-          "70": [0, 0.69444, 0, 0],
-          "71": [0, 0.69444, 0, 0],
-          "72": [0, 0.69444, 0, 0],
-          "73": [0, 0.69444, 0, 0],
-          "74": [0, 0.69444, 0, 0],
-          "75": [0, 0.69444, 0, 0],
-          "76": [0, 0.69444, 0, 0],
-          "77": [0, 0.69444, 0, 0],
-          "78": [0, 0.69444, 0, 0],
-          "79": [0, 0.69444, 0, 0],
-          "80": [0, 0.69444, 0, 0],
-          "81": [0.125, 0.69444, 0, 0],
-          "82": [0, 0.69444, 0, 0],
-          "83": [0, 0.69444, 0, 0],
-          "84": [0, 0.69444, 0, 0],
-          "85": [0, 0.69444, 0, 0],
-          "86": [0, 0.69444, 0.01389, 0],
-          "87": [0, 0.69444, 0.01389, 0],
-          "88": [0, 0.69444, 0, 0],
-          "89": [0, 0.69444, 0.025, 0],
-          "90": [0, 0.69444, 0, 0],
-          "91": [0.25, 0.75, 0, 0],
-          "93": [0.25, 0.75, 0, 0],
-          "94": [0, 0.69444, 0, 0],
-          "95": [0.35, 0.09444, 0.02778, 0],
-          "97": [0, 0.44444, 0, 0],
-          "98": [0, 0.69444, 0, 0],
-          "99": [0, 0.44444, 0, 0],
-          "100": [0, 0.69444, 0, 0],
-          "101": [0, 0.44444, 0, 0],
-          "102": [0, 0.69444, 0.06944, 0],
-          "103": [0.19444, 0.44444, 0.01389, 0],
-          "104": [0, 0.69444, 0, 0],
-          "105": [0, 0.67937, 0, 0],
-          "106": [0.19444, 0.67937, 0, 0],
-          "107": [0, 0.69444, 0, 0],
-          "108": [0, 0.69444, 0, 0],
-          "109": [0, 0.44444, 0, 0],
-          "110": [0, 0.44444, 0, 0],
-          "111": [0, 0.44444, 0, 0],
-          "112": [0.19444, 0.44444, 0, 0],
-          "113": [0.19444, 0.44444, 0, 0],
-          "114": [0, 0.44444, 0.01389, 0],
-          "115": [0, 0.44444, 0, 0],
-          "116": [0, 0.57143, 0, 0],
-          "117": [0, 0.44444, 0, 0],
-          "118": [0, 0.44444, 0.01389, 0],
-          "119": [0, 0.44444, 0.01389, 0],
-          "120": [0, 0.44444, 0, 0],
-          "121": [0.19444, 0.44444, 0.01389, 0],
-          "122": [0, 0.44444, 0, 0],
-          "126": [0.35, 0.32659, 0, 0],
-          "305": [0, 0.44444, 0, 0],
-          "567": [0.19444, 0.44444, 0, 0],
-          "768": [0, 0.69444, 0, 0],
-          "769": [0, 0.69444, 0, 0],
-          "770": [0, 0.69444, 0, 0],
-          "771": [0, 0.67659, 0, 0],
-          "772": [0, 0.60889, 0, 0],
-          "774": [0, 0.69444, 0, 0],
-          "775": [0, 0.67937, 0, 0],
-          "776": [0, 0.67937, 0, 0],
-          "778": [0, 0.69444, 0, 0],
-          "779": [0, 0.69444, 0, 0],
-          "780": [0, 0.63194, 0, 0],
-          "915": [0, 0.69444, 0, 0],
-          "916": [0, 0.69444, 0, 0],
-          "920": [0, 0.69444, 0, 0],
-          "923": [0, 0.69444, 0, 0],
-          "926": [0, 0.69444, 0, 0],
-          "928": [0, 0.69444, 0, 0],
-          "931": [0, 0.69444, 0, 0],
-          "933": [0, 0.69444, 0, 0],
-          "934": [0, 0.69444, 0, 0],
-          "936": [0, 0.69444, 0, 0],
-          "937": [0, 0.69444, 0, 0],
-          "8211": [0, 0.44444, 0.02778, 0],
-          "8212": [0, 0.44444, 0.02778, 0],
-          "8216": [0, 0.69444, 0, 0],
-          "8217": [0, 0.69444, 0, 0],
-          "8220": [0, 0.69444, 0, 0],
-          "8221": [0, 0.69444, 0, 0]
-      },
-      "Script-Regular": {
-          "65": [0, 0.7, 0.22925, 0],
-          "66": [0, 0.7, 0.04087, 0],
-          "67": [0, 0.7, 0.1689, 0],
-          "68": [0, 0.7, 0.09371, 0],
-          "69": [0, 0.7, 0.18583, 0],
-          "70": [0, 0.7, 0.13634, 0],
-          "71": [0, 0.7, 0.17322, 0],
-          "72": [0, 0.7, 0.29694, 0],
-          "73": [0, 0.7, 0.19189, 0],
-          "74": [0.27778, 0.7, 0.19189, 0],
-          "75": [0, 0.7, 0.31259, 0],
-          "76": [0, 0.7, 0.19189, 0],
-          "77": [0, 0.7, 0.15981, 0],
-          "78": [0, 0.7, 0.3525, 0],
-          "79": [0, 0.7, 0.08078, 0],
-          "80": [0, 0.7, 0.08078, 0],
-          "81": [0, 0.7, 0.03305, 0],
-          "82": [0, 0.7, 0.06259, 0],
-          "83": [0, 0.7, 0.19189, 0],
-          "84": [0, 0.7, 0.29087, 0],
-          "85": [0, 0.7, 0.25815, 0],
-          "86": [0, 0.7, 0.27523, 0],
-          "87": [0, 0.7, 0.27523, 0],
-          "88": [0, 0.7, 0.26006, 0],
-          "89": [0, 0.7, 0.2939, 0],
-          "90": [0, 0.7, 0.24037, 0]
-      },
-      "Size1-Regular": {
-          "40": [0.35001, 0.85, 0, 0],
-          "41": [0.35001, 0.85, 0, 0],
-          "47": [0.35001, 0.85, 0, 0],
-          "91": [0.35001, 0.85, 0, 0],
-          "92": [0.35001, 0.85, 0, 0],
-          "93": [0.35001, 0.85, 0, 0],
-          "123": [0.35001, 0.85, 0, 0],
-          "125": [0.35001, 0.85, 0, 0],
-          "710": [0, 0.72222, 0, 0],
-          "732": [0, 0.72222, 0, 0],
-          "770": [0, 0.72222, 0, 0],
-          "771": [0, 0.72222, 0, 0],
-          "8214": [-0.00099, 0.601, 0, 0],
-          "8593": [1e-05, 0.6, 0, 0],
-          "8595": [1e-05, 0.6, 0, 0],
-          "8657": [1e-05, 0.6, 0, 0],
-          "8659": [1e-05, 0.6, 0, 0],
-          "8719": [0.25001, 0.75, 0, 0],
-          "8720": [0.25001, 0.75, 0, 0],
-          "8721": [0.25001, 0.75, 0, 0],
-          "8730": [0.35001, 0.85, 0, 0],
-          "8739": [-0.00599, 0.606, 0, 0],
-          "8741": [-0.00599, 0.606, 0, 0],
-          "8747": [0.30612, 0.805, 0.19445, 0],
-          "8748": [0.306, 0.805, 0.19445, 0],
-          "8749": [0.306, 0.805, 0.19445, 0],
-          "8750": [0.30612, 0.805, 0.19445, 0],
-          "8896": [0.25001, 0.75, 0, 0],
-          "8897": [0.25001, 0.75, 0, 0],
-          "8898": [0.25001, 0.75, 0, 0],
-          "8899": [0.25001, 0.75, 0, 0],
-          "8968": [0.35001, 0.85, 0, 0],
-          "8969": [0.35001, 0.85, 0, 0],
-          "8970": [0.35001, 0.85, 0, 0],
-          "8971": [0.35001, 0.85, 0, 0],
-          "9168": [-0.00099, 0.601, 0, 0],
-          "10216": [0.35001, 0.85, 0, 0],
-          "10217": [0.35001, 0.85, 0, 0],
-          "10752": [0.25001, 0.75, 0, 0],
-          "10753": [0.25001, 0.75, 0, 0],
-          "10754": [0.25001, 0.75, 0, 0],
-          "10756": [0.25001, 0.75, 0, 0],
-          "10758": [0.25001, 0.75, 0, 0]
-      },
-      "Size2-Regular": {
-          "40": [0.65002, 1.15, 0, 0],
-          "41": [0.65002, 1.15, 0, 0],
-          "47": [0.65002, 1.15, 0, 0],
-          "91": [0.65002, 1.15, 0, 0],
-          "92": [0.65002, 1.15, 0, 0],
-          "93": [0.65002, 1.15, 0, 0],
-          "123": [0.65002, 1.15, 0, 0],
-          "125": [0.65002, 1.15, 0, 0],
-          "710": [0, 0.75, 0, 0],
-          "732": [0, 0.75, 0, 0],
-          "770": [0, 0.75, 0, 0],
-          "771": [0, 0.75, 0, 0],
-          "8719": [0.55001, 1.05, 0, 0],
-          "8720": [0.55001, 1.05, 0, 0],
-          "8721": [0.55001, 1.05, 0, 0],
-          "8730": [0.65002, 1.15, 0, 0],
-          "8747": [0.86225, 1.36, 0.44445, 0],
-          "8748": [0.862, 1.36, 0.44445, 0],
-          "8749": [0.862, 1.36, 0.44445, 0],
-          "8750": [0.86225, 1.36, 0.44445, 0],
-          "8896": [0.55001, 1.05, 0, 0],
-          "8897": [0.55001, 1.05, 0, 0],
-          "8898": [0.55001, 1.05, 0, 0],
-          "8899": [0.55001, 1.05, 0, 0],
-          "8968": [0.65002, 1.15, 0, 0],
-          "8969": [0.65002, 1.15, 0, 0],
-          "8970": [0.65002, 1.15, 0, 0],
-          "8971": [0.65002, 1.15, 0, 0],
-          "10216": [0.65002, 1.15, 0, 0],
-          "10217": [0.65002, 1.15, 0, 0],
-          "10752": [0.55001, 1.05, 0, 0],
-          "10753": [0.55001, 1.05, 0, 0],
-          "10754": [0.55001, 1.05, 0, 0],
-          "10756": [0.55001, 1.05, 0, 0],
-          "10758": [0.55001, 1.05, 0, 0]
-      },
-      "Size3-Regular": {
-          "40": [0.95003, 1.45, 0, 0],
-          "41": [0.95003, 1.45, 0, 0],
-          "47": [0.95003, 1.45, 0, 0],
-          "91": [0.95003, 1.45, 0, 0],
-          "92": [0.95003, 1.45, 0, 0],
-          "93": [0.95003, 1.45, 0, 0],
-          "123": [0.95003, 1.45, 0, 0],
-          "125": [0.95003, 1.45, 0, 0],
-          "710": [0, 0.75, 0, 0],
-          "732": [0, 0.75, 0, 0],
-          "770": [0, 0.75, 0, 0],
-          "771": [0, 0.75, 0, 0],
-          "8730": [0.95003, 1.45, 0, 0],
-          "8968": [0.95003, 1.45, 0, 0],
-          "8969": [0.95003, 1.45, 0, 0],
-          "8970": [0.95003, 1.45, 0, 0],
-          "8971": [0.95003, 1.45, 0, 0],
-          "10216": [0.95003, 1.45, 0, 0],
-          "10217": [0.95003, 1.45, 0, 0]
-      },
-      "Size4-Regular": {
-          "40": [1.25003, 1.75, 0, 0],
-          "41": [1.25003, 1.75, 0, 0],
-          "47": [1.25003, 1.75, 0, 0],
-          "91": [1.25003, 1.75, 0, 0],
-          "92": [1.25003, 1.75, 0, 0],
-          "93": [1.25003, 1.75, 0, 0],
-          "123": [1.25003, 1.75, 0, 0],
-          "125": [1.25003, 1.75, 0, 0],
-          "710": [0, 0.825, 0, 0],
-          "732": [0, 0.825, 0, 0],
-          "770": [0, 0.825, 0, 0],
-          "771": [0, 0.825, 0, 0],
-          "8730": [1.25003, 1.75, 0, 0],
-          "8968": [1.25003, 1.75, 0, 0],
-          "8969": [1.25003, 1.75, 0, 0],
-          "8970": [1.25003, 1.75, 0, 0],
-          "8971": [1.25003, 1.75, 0, 0],
-          "9115": [0.64502, 1.155, 0, 0],
-          "9116": [1e-05, 0.6, 0, 0],
-          "9117": [0.64502, 1.155, 0, 0],
-          "9118": [0.64502, 1.155, 0, 0],
-          "9119": [1e-05, 0.6, 0, 0],
-          "9120": [0.64502, 1.155, 0, 0],
-          "9121": [0.64502, 1.155, 0, 0],
-          "9122": [-0.00099, 0.601, 0, 0],
-          "9123": [0.64502, 1.155, 0, 0],
-          "9124": [0.64502, 1.155, 0, 0],
-          "9125": [-0.00099, 0.601, 0, 0],
-          "9126": [0.64502, 1.155, 0, 0],
-          "9127": [1e-05, 0.9, 0, 0],
-          "9128": [0.65002, 1.15, 0, 0],
-          "9129": [0.90001, 0, 0, 0],
-          "9130": [0, 0.3, 0, 0],
-          "9131": [1e-05, 0.9, 0, 0],
-          "9132": [0.65002, 1.15, 0, 0],
-          "9133": [0.90001, 0, 0, 0],
-          "9143": [0.88502, 0.915, 0, 0],
-          "10216": [1.25003, 1.75, 0, 0],
-          "10217": [1.25003, 1.75, 0, 0],
-          "57344": [-0.00499, 0.605, 0, 0],
-          "57345": [-0.00499, 0.605, 0, 0],
-          "57680": [0, 0.12, 0, 0],
-          "57681": [0, 0.12, 0, 0],
-          "57682": [0, 0.12, 0, 0],
-          "57683": [0, 0.12, 0, 0]
-      },
-      "Typewriter-Regular": {
-          "33": [0, 0.61111, 0, 0],
-          "34": [0, 0.61111, 0, 0],
-          "35": [0, 0.61111, 0, 0],
-          "36": [0.08333, 0.69444, 0, 0],
-          "37": [0.08333, 0.69444, 0, 0],
-          "38": [0, 0.61111, 0, 0],
-          "39": [0, 0.61111, 0, 0],
-          "40": [0.08333, 0.69444, 0, 0],
-          "41": [0.08333, 0.69444, 0, 0],
-          "42": [0, 0.52083, 0, 0],
-          "43": [-0.08056, 0.53055, 0, 0],
-          "44": [0.13889, 0.125, 0, 0],
-          "45": [-0.08056, 0.53055, 0, 0],
-          "46": [0, 0.125, 0, 0],
-          "47": [0.08333, 0.69444, 0, 0],
-          "48": [0, 0.61111, 0, 0],
-          "49": [0, 0.61111, 0, 0],
-          "50": [0, 0.61111, 0, 0],
-          "51": [0, 0.61111, 0, 0],
-          "52": [0, 0.61111, 0, 0],
-          "53": [0, 0.61111, 0, 0],
-          "54": [0, 0.61111, 0, 0],
-          "55": [0, 0.61111, 0, 0],
-          "56": [0, 0.61111, 0, 0],
-          "57": [0, 0.61111, 0, 0],
-          "58": [0, 0.43056, 0, 0],
-          "59": [0.13889, 0.43056, 0, 0],
-          "60": [-0.05556, 0.55556, 0, 0],
-          "61": [-0.19549, 0.41562, 0, 0],
-          "62": [-0.05556, 0.55556, 0, 0],
-          "63": [0, 0.61111, 0, 0],
-          "64": [0, 0.61111, 0, 0],
-          "65": [0, 0.61111, 0, 0],
-          "66": [0, 0.61111, 0, 0],
-          "67": [0, 0.61111, 0, 0],
-          "68": [0, 0.61111, 0, 0],
-          "69": [0, 0.61111, 0, 0],
-          "70": [0, 0.61111, 0, 0],
-          "71": [0, 0.61111, 0, 0],
-          "72": [0, 0.61111, 0, 0],
-          "73": [0, 0.61111, 0, 0],
-          "74": [0, 0.61111, 0, 0],
-          "75": [0, 0.61111, 0, 0],
-          "76": [0, 0.61111, 0, 0],
-          "77": [0, 0.61111, 0, 0],
-          "78": [0, 0.61111, 0, 0],
-          "79": [0, 0.61111, 0, 0],
-          "80": [0, 0.61111, 0, 0],
-          "81": [0.13889, 0.61111, 0, 0],
-          "82": [0, 0.61111, 0, 0],
-          "83": [0, 0.61111, 0, 0],
-          "84": [0, 0.61111, 0, 0],
-          "85": [0, 0.61111, 0, 0],
-          "86": [0, 0.61111, 0, 0],
-          "87": [0, 0.61111, 0, 0],
-          "88": [0, 0.61111, 0, 0],
-          "89": [0, 0.61111, 0, 0],
-          "90": [0, 0.61111, 0, 0],
-          "91": [0.08333, 0.69444, 0, 0],
-          "92": [0.08333, 0.69444, 0, 0],
-          "93": [0.08333, 0.69444, 0, 0],
-          "94": [0, 0.61111, 0, 0],
-          "95": [0.09514, 0, 0, 0],
-          "96": [0, 0.61111, 0, 0],
-          "97": [0, 0.43056, 0, 0],
-          "98": [0, 0.61111, 0, 0],
-          "99": [0, 0.43056, 0, 0],
-          "100": [0, 0.61111, 0, 0],
-          "101": [0, 0.43056, 0, 0],
-          "102": [0, 0.61111, 0, 0],
-          "103": [0.22222, 0.43056, 0, 0],
-          "104": [0, 0.61111, 0, 0],
-          "105": [0, 0.61111, 0, 0],
-          "106": [0.22222, 0.61111, 0, 0],
-          "107": [0, 0.61111, 0, 0],
-          "108": [0, 0.61111, 0, 0],
-          "109": [0, 0.43056, 0, 0],
-          "110": [0, 0.43056, 0, 0],
-          "111": [0, 0.43056, 0, 0],
-          "112": [0.22222, 0.43056, 0, 0],
-          "113": [0.22222, 0.43056, 0, 0],
-          "114": [0, 0.43056, 0, 0],
-          "115": [0, 0.43056, 0, 0],
-          "116": [0, 0.55358, 0, 0],
-          "117": [0, 0.43056, 0, 0],
-          "118": [0, 0.43056, 0, 0],
-          "119": [0, 0.43056, 0, 0],
-          "120": [0, 0.43056, 0, 0],
-          "121": [0.22222, 0.43056, 0, 0],
-          "122": [0, 0.43056, 0, 0],
-          "123": [0.08333, 0.69444, 0, 0],
-          "124": [0.08333, 0.69444, 0, 0],
-          "125": [0.08333, 0.69444, 0, 0],
-          "126": [0, 0.61111, 0, 0],
-          "127": [0, 0.61111, 0, 0],
-          "305": [0, 0.43056, 0, 0],
-          "567": [0.22222, 0.43056, 0, 0],
-          "768": [0, 0.61111, 0, 0],
-          "769": [0, 0.61111, 0, 0],
-          "770": [0, 0.61111, 0, 0],
-          "771": [0, 0.61111, 0, 0],
-          "772": [0, 0.56555, 0, 0],
-          "774": [0, 0.61111, 0, 0],
-          "776": [0, 0.61111, 0, 0],
-          "778": [0, 0.61111, 0, 0],
-          "780": [0, 0.56597, 0, 0],
-          "915": [0, 0.61111, 0, 0],
-          "916": [0, 0.61111, 0, 0],
-          "920": [0, 0.61111, 0, 0],
-          "923": [0, 0.61111, 0, 0],
-          "926": [0, 0.61111, 0, 0],
-          "928": [0, 0.61111, 0, 0],
-          "931": [0, 0.61111, 0, 0],
-          "933": [0, 0.61111, 0, 0],
-          "934": [0, 0.61111, 0, 0],
-          "936": [0, 0.61111, 0, 0],
-          "937": [0, 0.61111, 0, 0],
-          "2018": [0, 0.61111, 0, 0],
-          "2019": [0, 0.61111, 0, 0],
-          "8242": [0, 0.61111, 0, 0]
-      }
-  };
-
-  },{}],43:[function(require,module,exports){
-
-  var _utils = require("./utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  var _ParseError = require("./ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  var _ParseNode = require("./ParseNode");
-
-  var _ParseNode2 = _interopRequireDefault(_ParseNode);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /* This file contains a list of functions that we parse, identified by
-   * the calls to defineFunction.
-   *
-   * The first argument to defineFunction is a single name or a list of names.
-   * All functions named in such a list will share a single implementation.
-   *
-   * Each declared function can have associated properties, which
-   * include the following:
-   *
-   *  - numArgs: The number of arguments the function takes.
-   *             If this is the only property, it can be passed as a number
-   *             instead of an element of a properties object.
-   *  - argTypes: (optional) An array corresponding to each argument of the
-   *              function, giving the type of argument that should be parsed. Its
-   *              length should be equal to `numArgs + numOptionalArgs`. Valid
-   *              types:
-   *               - "size": A size-like thing, such as "1em" or "5ex"
-   *               - "color": An html color, like "#abc" or "blue"
-   *               - "original": The same type as the environment that the
-   *                             function being parsed is in (e.g. used for the
-   *                             bodies of functions like \textcolor where the
-   *                             first argument is special and the second
-   *                             argument is parsed normally)
-   *              Other possible types (probably shouldn't be used)
-   *               - "text": Text-like (e.g. \text)
-   *               - "math": Normal math
-   *              If undefined, this will be treated as an appropriate length
-   *              array of "original" strings
-   *  - greediness: (optional) The greediness of the function to use ungrouped
-   *                arguments.
-   *
-   *                E.g. if you have an expression
-   *                  \sqrt \frac 1 2
-   *                since \frac has greediness=2 vs \sqrt's greediness=1, \frac
-   *                will use the two arguments '1' and '2' as its two arguments,
-   *                then that whole function will be used as the argument to
-   *                \sqrt. On the other hand, the expressions
-   *                  \frac \frac 1 2 3
-   *                and
-   *                  \frac \sqrt 1 2
-   *                will fail because \frac and \frac have equal greediness
-   *                and \sqrt has a lower greediness than \frac respectively. To
-   *                make these parse, we would have to change them to:
-   *                  \frac {\frac 1 2} 3
-   *                and
-   *                  \frac {\sqrt 1} 2
-   *
-   *                The default value is `1`
-   *  - allowedInText: (optional) Whether or not the function is allowed inside
-   *                   text mode (default false)
-   *  - numOptionalArgs: (optional) The number of optional arguments the function
-   *                     should parse. If the optional arguments aren't found,
-   *                     `null` will be passed to the handler in their place.
-   *                     (default 0)
-   *  - infix: (optional) Must be true if the function is an infix operator.
-   *
-   * The last argument is that implementation, the handler for the function(s).
-   * It is called to handle these functions and their arguments.
-   * It receives two arguments:
-   *  - context contains information and references provided by the parser
-   *  - args is an array of arguments obtained from TeX input
-   * The context contains the following properties:
-   *  - funcName: the text (i.e. name) of the function, including \
-   *  - parser: the parser object
-   *  - lexer: the lexer object
-   *  - positions: the positions in the overall string of the function
-   *               and the arguments.
-   * The latter three should only be used to produce error messages.
-   *
-   * The function should return an object with the following keys:
-   *  - type: The type of element that this is. This is then used in
-   *          buildHTML/buildMathML to determine which function
-   *          should be called to build this node into a DOM node
-   * Any other data can be added to the object, which will be passed
-   * in to the function in buildHTML/buildMathML as `group.value`.
-   */
-
-  function defineFunction(names, props, handler) {
-      if (typeof names === "string") {
-          names = [names];
-      }
-      if (typeof props === "number") {
-          props = { numArgs: props };
-      }
-      // Set default values of functions
-      var data = {
-          numArgs: props.numArgs,
-          argTypes: props.argTypes,
-          greediness: props.greediness === undefined ? 1 : props.greediness,
-          allowedInText: !!props.allowedInText,
-          allowedInMath: props.allowedInMath,
-          numOptionalArgs: props.numOptionalArgs || 0,
-          infix: !!props.infix,
-          handler: handler
-      };
-      for (var i = 0; i < names.length; ++i) {
-          module.exports[names[i]] = data;
-      }
-  }
-
-  // Since the corresponding buildHTML/buildMathML function expects a
-  // list of elements, we normalize for different kinds of arguments
-  var ordargument = function ordargument(arg) {
-      if (arg.type === "ordgroup") {
-          return arg.value;
-      } else {
-          return [arg];
-      }
-  };
-
-  // A normal square root
-  defineFunction("\\sqrt", {
-      numArgs: 1,
-      numOptionalArgs: 1
-  }, function (context, args) {
-      var index = args[0];
-      var body = args[1];
-      return {
-          type: "sqrt",
-          body: body,
-          index: index
-      };
-  });
-
-  // Non-mathy text, possibly in a font
-  var textFunctionStyles = {
-      "\\text": undefined, "\\textrm": "mathrm", "\\textsf": "mathsf",
-      "\\texttt": "mathtt", "\\textnormal": "mathrm", "\\textbf": "mathbf",
-      "\\textit": "textit"
-  };
-
-  defineFunction(["\\text", "\\textrm", "\\textsf", "\\texttt", "\\textnormal", "\\textbf", "\\textit"], {
-      numArgs: 1,
-      argTypes: ["text"],
-      greediness: 2,
-      allowedInText: true
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "text",
-          body: ordargument(body),
-          style: textFunctionStyles[context.funcName]
-      };
-  });
-
-  // A two-argument custom color
-  defineFunction("\\textcolor", {
-      numArgs: 2,
-      allowedInText: true,
-      greediness: 3,
-      argTypes: ["color", "original"]
-  }, function (context, args) {
-      var color = args[0];
-      var body = args[1];
-      return {
-          type: "color",
-          color: color.value,
-          value: ordargument(body)
-      };
-  });
-
-  // \color is handled in Parser.js's parseImplicitGroup
-  defineFunction("\\color", {
-      numArgs: 1,
-      allowedInText: true,
-      greediness: 3,
-      argTypes: ["color"]
-  }, null);
-
-  // An overline
-  defineFunction("\\overline", {
-      numArgs: 1
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "overline",
-          body: body
-      };
-  });
-
-  // An underline
-  defineFunction("\\underline", {
-      numArgs: 1
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "underline",
-          body: body
-      };
-  });
-
-  // A box of the width and height
-  defineFunction("\\rule", {
-      numArgs: 2,
-      numOptionalArgs: 1,
-      argTypes: ["size", "size", "size"]
-  }, function (context, args) {
-      var shift = args[0];
-      var width = args[1];
-      var height = args[2];
-      return {
-          type: "rule",
-          shift: shift && shift.value,
-          width: width.value,
-          height: height.value
-      };
-  });
-
-  // TODO: In TeX, \mkern only accepts mu-units, and \kern does not accept
-  // mu-units. In current KaTeX we relax this; both commands accept any unit.
-  defineFunction(["\\kern", "\\mkern"], {
-      numArgs: 1,
-      argTypes: ["size"]
-  }, function (context, args) {
-      return {
-          type: "kern",
-          dimension: args[0].value
-      };
-  });
-
-  // A KaTeX logo
-  defineFunction("\\KaTeX", {
-      numArgs: 0
-  }, function (context) {
-      return {
-          type: "katex"
-      };
-  });
-
-  defineFunction("\\phantom", {
-      numArgs: 1
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "phantom",
-          value: ordargument(body)
-      };
-  });
-
-  // Math class commands except \mathop
-  defineFunction(["\\mathord", "\\mathbin", "\\mathrel", "\\mathopen", "\\mathclose", "\\mathpunct", "\\mathinner"], {
-      numArgs: 1
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "mclass",
-          mclass: "m" + context.funcName.substr(5),
-          value: ordargument(body)
-      };
-  });
-
-  // Build a relation by placing one symbol on top of another
-  defineFunction("\\stackrel", {
-      numArgs: 2
-  }, function (context, args) {
-      var top = args[0];
-      var bottom = args[1];
-
-      var bottomop = new _ParseNode2.default("op", {
-          type: "op",
-          limits: true,
-          alwaysHandleSupSub: true,
-          symbol: false,
-          value: ordargument(bottom)
-      }, bottom.mode);
-
-      var supsub = new _ParseNode2.default("supsub", {
-          base: bottomop,
-          sup: top,
-          sub: null
-      }, top.mode);
-
-      return {
-          type: "mclass",
-          mclass: "mrel",
-          value: [supsub]
-      };
-  });
-
-  // \mod-type functions
-  defineFunction("\\bmod", {
-      numArgs: 0
-  }, function (context, args) {
-      return {
-          type: "mod",
-          modType: "bmod",
-          value: null
-      };
-  });
-
-  defineFunction(["\\pod", "\\pmod", "\\mod"], {
-      numArgs: 1
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "mod",
-          modType: context.funcName.substr(1),
-          value: ordargument(body)
-      };
-  });
-
-  // Extra data needed for the delimiter handler down below
-  var delimiterSizes = {
-      "\\bigl": { mclass: "mopen", size: 1 },
-      "\\Bigl": { mclass: "mopen", size: 2 },
-      "\\biggl": { mclass: "mopen", size: 3 },
-      "\\Biggl": { mclass: "mopen", size: 4 },
-      "\\bigr": { mclass: "mclose", size: 1 },
-      "\\Bigr": { mclass: "mclose", size: 2 },
-      "\\biggr": { mclass: "mclose", size: 3 },
-      "\\Biggr": { mclass: "mclose", size: 4 },
-      "\\bigm": { mclass: "mrel", size: 1 },
-      "\\Bigm": { mclass: "mrel", size: 2 },
-      "\\biggm": { mclass: "mrel", size: 3 },
-      "\\Biggm": { mclass: "mrel", size: 4 },
-      "\\big": { mclass: "mord", size: 1 },
-      "\\Big": { mclass: "mord", size: 2 },
-      "\\bigg": { mclass: "mord", size: 3 },
-      "\\Bigg": { mclass: "mord", size: 4 }
-  };
-
-  var delimiters = ["(", ")", "[", "\\lbrack", "]", "\\rbrack", "\\{", "\\lbrace", "\\}", "\\rbrace", "\\lfloor", "\\rfloor", "\\lceil", "\\rceil", "<", ">", "\\langle", "\\rangle", "\\lt", "\\gt", "\\lvert", "\\rvert", "\\lVert", "\\rVert", "\\lgroup", "\\rgroup", "\\lmoustache", "\\rmoustache", "/", "\\backslash", "|", "\\vert", "\\|", "\\Vert", "\\uparrow", "\\Uparrow", "\\downarrow", "\\Downarrow", "\\updownarrow", "\\Updownarrow", "."];
-
-  var fontAliases = {
-      "\\Bbb": "\\mathbb",
-      "\\bold": "\\mathbf",
-      "\\frak": "\\mathfrak"
-  };
-
-  // Single-argument color functions
-  defineFunction(["\\blue", "\\orange", "\\pink", "\\red", "\\green", "\\gray", "\\purple", "\\blueA", "\\blueB", "\\blueC", "\\blueD", "\\blueE", "\\tealA", "\\tealB", "\\tealC", "\\tealD", "\\tealE", "\\greenA", "\\greenB", "\\greenC", "\\greenD", "\\greenE", "\\goldA", "\\goldB", "\\goldC", "\\goldD", "\\goldE", "\\redA", "\\redB", "\\redC", "\\redD", "\\redE", "\\maroonA", "\\maroonB", "\\maroonC", "\\maroonD", "\\maroonE", "\\purpleA", "\\purpleB", "\\purpleC", "\\purpleD", "\\purpleE", "\\mintA", "\\mintB", "\\mintC", "\\grayA", "\\grayB", "\\grayC", "\\grayD", "\\grayE", "\\grayF", "\\grayG", "\\grayH", "\\grayI", "\\kaBlue", "\\kaGreen"], {
-      numArgs: 1,
-      allowedInText: true,
-      greediness: 3
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "color",
-          color: "katex-" + context.funcName.slice(1),
-          value: ordargument(body)
-      };
-  });
-
-  // There are 2 flags for operators; whether they produce limits in
-  // displaystyle, and whether they are symbols and should grow in
-  // displaystyle. These four groups cover the four possible choices.
-
-  // No limits, not symbols
-  defineFunction(["\\arcsin", "\\arccos", "\\arctan", "\\arctg", "\\arcctg", "\\arg", "\\ch", "\\cos", "\\cosec", "\\cosh", "\\cot", "\\cotg", "\\coth", "\\csc", "\\ctg", "\\cth", "\\deg", "\\dim", "\\exp", "\\hom", "\\ker", "\\lg", "\\ln", "\\log", "\\sec", "\\sin", "\\sinh", "\\sh", "\\tan", "\\tanh", "\\tg", "\\th"], {
-      numArgs: 0
-  }, function (context) {
-      return {
-          type: "op",
-          limits: false,
-          symbol: false,
-          body: context.funcName
-      };
-  });
-
-  // Limits, not symbols
-  defineFunction(["\\det", "\\gcd", "\\inf", "\\lim", "\\liminf", "\\limsup", "\\max", "\\min", "\\Pr", "\\sup"], {
-      numArgs: 0
-  }, function (context) {
-      return {
-          type: "op",
-          limits: true,
-          symbol: false,
-          body: context.funcName
-      };
-  });
-
-  // No limits, symbols
-  defineFunction(["\\int", "\\iint", "\\iiint", "\\oint"], {
-      numArgs: 0
-  }, function (context) {
-      return {
-          type: "op",
-          limits: false,
-          symbol: true,
-          body: context.funcName
-      };
-  });
-
-  // Limits, symbols
-  defineFunction(["\\coprod", "\\bigvee", "\\bigwedge", "\\biguplus", "\\bigcap", "\\bigcup", "\\intop", "\\prod", "\\sum", "\\bigotimes", "\\bigoplus", "\\bigodot", "\\bigsqcup", "\\smallint"], {
-      numArgs: 0
-  }, function (context) {
-      return {
-          type: "op",
-          limits: true,
-          symbol: true,
-          body: context.funcName
-      };
-  });
-
-  // \mathop class command
-  defineFunction("\\mathop", {
-      numArgs: 1
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "op",
-          limits: false,
-          symbol: false,
-          value: ordargument(body)
-      };
-  });
-
-  // Fractions
-  defineFunction(["\\dfrac", "\\frac", "\\tfrac", "\\dbinom", "\\binom", "\\tbinom", "\\\\atopfrac"], {
-      numArgs: 2,
-      greediness: 2
-  }, function (context, args) {
-      var numer = args[0];
-      var denom = args[1];
-      var hasBarLine = void 0;
-      var leftDelim = null;
-      var rightDelim = null;
-      var size = "auto";
-
-      switch (context.funcName) {
-          case "\\dfrac":
-          case "\\frac":
-          case "\\tfrac":
-              hasBarLine = true;
-              break;
-          case "\\\\atopfrac":
-              hasBarLine = false;
-              break;
-          case "\\dbinom":
-          case "\\binom":
-          case "\\tbinom":
-              hasBarLine = false;
-              leftDelim = "(";
-              rightDelim = ")";
-              break;
-          default:
-              throw new Error("Unrecognized genfrac command");
-      }
-
-      switch (context.funcName) {
-          case "\\dfrac":
-          case "\\dbinom":
-              size = "display";
-              break;
-          case "\\tfrac":
-          case "\\tbinom":
-              size = "text";
-              break;
-      }
-
-      return {
-          type: "genfrac",
-          numer: numer,
-          denom: denom,
-          hasBarLine: hasBarLine,
-          leftDelim: leftDelim,
-          rightDelim: rightDelim,
-          size: size
-      };
-  });
-
-  // Left and right overlap functions
-  defineFunction(["\\llap", "\\rlap"], {
-      numArgs: 1,
-      allowedInText: true
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: context.funcName.slice(1),
-          body: body
-      };
-  });
-
-  // Delimiter functions
-  var checkDelimiter = function checkDelimiter(delim, context) {
-      if (_utils2.default.contains(delimiters, delim.value)) {
-          return delim;
-      } else {
-          throw new _ParseError2.default("Invalid delimiter: '" + delim.value + "' after '" + context.funcName + "'", delim);
-      }
-  };
-
-  defineFunction(["\\bigl", "\\Bigl", "\\biggl", "\\Biggl", "\\bigr", "\\Bigr", "\\biggr", "\\Biggr", "\\bigm", "\\Bigm", "\\biggm", "\\Biggm", "\\big", "\\Big", "\\bigg", "\\Bigg"], {
-      numArgs: 1
-  }, function (context, args) {
-      var delim = checkDelimiter(args[0], context);
-
-      return {
-          type: "delimsizing",
-          size: delimiterSizes[context.funcName].size,
-          mclass: delimiterSizes[context.funcName].mclass,
-          value: delim.value
-      };
-  });
-
-  defineFunction(["\\left", "\\right"], {
-      numArgs: 1
-  }, function (context, args) {
-      var delim = checkDelimiter(args[0], context);
-
-      // \left and \right are caught somewhere in Parser.js, which is
-      // why this data doesn't match what is in buildHTML.
-      return {
-          type: "leftright",
-          value: delim.value
-      };
-  });
-
-  defineFunction("\\middle", {
-      numArgs: 1
-  }, function (context, args) {
-      var delim = checkDelimiter(args[0], context);
-      if (!context.parser.leftrightDepth) {
-          throw new _ParseError2.default("\\middle without preceding \\left", delim);
-      }
-
-      return {
-          type: "middle",
-          value: delim.value
-      };
-  });
-
-  // Sizing functions (handled in Parser.js explicitly, hence no handler)
-  defineFunction(["\\tiny", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize", "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge"], 0, null);
-
-  // Style changing functions (handled in Parser.js explicitly, hence no
-  // handler)
-  defineFunction(["\\displaystyle", "\\textstyle", "\\scriptstyle", "\\scriptscriptstyle"], 0, null);
-
-  // Old font changing functions
-  defineFunction(["\\rm", "\\sf", "\\tt", "\\bf", "\\it"], 0, null);
-
-  defineFunction([
-  // styles
-  "\\mathrm", "\\mathit", "\\mathbf",
-
-  // families
-  "\\mathbb", "\\mathcal", "\\mathfrak", "\\mathscr", "\\mathsf", "\\mathtt",
-
-  // aliases
-  "\\Bbb", "\\bold", "\\frak"], {
-      numArgs: 1,
-      greediness: 2
-  }, function (context, args) {
-      var body = args[0];
-      var func = context.funcName;
-      if (func in fontAliases) {
-          func = fontAliases[func];
-      }
-      return {
-          type: "font",
-          font: func.slice(1),
-          body: body
-      };
-  });
-
-  // Accents
-  defineFunction(["\\acute", "\\grave", "\\ddot", "\\tilde", "\\bar", "\\breve", "\\check", "\\hat", "\\vec", "\\dot", "\\widehat", "\\widetilde", "\\overrightarrow", "\\overleftarrow", "\\Overrightarrow", "\\overleftrightarrow", "\\overgroup", "\\overlinesegment", "\\overleftharpoon", "\\overrightharpoon"], {
-      numArgs: 1
-  }, function (context, args) {
-      var base = args[0];
-
-      var isStretchy = !_utils2.default.contains(["\\acute", "\\grave", "\\ddot", "\\tilde", "\\bar", "\\breve", "\\check", "\\hat", "\\vec", "\\dot"], context.funcName);
-
-      var isShifty = !isStretchy || _utils2.default.contains(["\\widehat", "\\widetilde"], context.funcName);
-
-      return {
-          type: "accent",
-          label: context.funcName,
-          isStretchy: isStretchy,
-          isShifty: isShifty,
-          value: ordargument(base),
-          base: base
-      };
-  });
-
-  // Text-mode accents
-  defineFunction(["\\'", "\\`", "\\^", "\\~", "\\=", "\\u", "\\.", '\\"', "\\r", "\\H", "\\v"], {
-      numArgs: 1,
-      allowedInText: true,
-      allowedInMath: false
-  }, function (context, args) {
-      var base = args[0];
-
-      return {
-          type: "accent",
-          label: context.funcName,
-          isStretchy: false,
-          isShifty: true,
-          value: ordargument(base),
-          base: base
-      };
-  });
-
-  // Horizontal stretchy braces
-  defineFunction(["\\overbrace", "\\underbrace"], {
-      numArgs: 1
-  }, function (context, args) {
-      var base = args[0];
-      return {
-          type: "horizBrace",
-          label: context.funcName,
-          isOver: /^\\over/.test(context.funcName),
-          base: base
-      };
-  });
-
-  // Stretchy accents under the body
-  defineFunction(["\\underleftarrow", "\\underrightarrow", "\\underleftrightarrow", "\\undergroup", "\\underlinesegment", "\\undertilde"], {
-      numArgs: 1
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "accentUnder",
-          label: context.funcName,
-          value: ordargument(body),
-          body: body
-      };
-  });
-
-  // Stretchy arrows with an optional argument
-  defineFunction(["\\xleftarrow", "\\xrightarrow", "\\xLeftarrow", "\\xRightarrow", "\\xleftrightarrow", "\\xLeftrightarrow", "\\xhookleftarrow", "\\xhookrightarrow", "\\xmapsto", "\\xrightharpoondown", "\\xrightharpoonup", "\\xleftharpoondown", "\\xleftharpoonup", "\\xrightleftharpoons", "\\xleftrightharpoons", "\\xLongequal", "\\xtwoheadrightarrow", "\\xtwoheadleftarrow", "\\xLongequal", "\\xtofrom"], {
-      numArgs: 1,
-      numOptionalArgs: 1
-  }, function (context, args) {
-      var below = args[0];
-      var body = args[1];
-      return {
-          type: "xArrow", // x for extensible
-          label: context.funcName,
-          body: body,
-          below: below
-      };
-  });
-
-  // enclose
-  defineFunction(["\\cancel", "\\bcancel", "\\xcancel", "\\sout", "\\fbox"], {
-      numArgs: 1
-  }, function (context, args) {
-      var body = args[0];
-      return {
-          type: "enclose",
-          label: context.funcName,
-          body: body
-      };
-  });
-
-  // Infix generalized fractions
-  defineFunction(["\\over", "\\choose", "\\atop"], {
-      numArgs: 0,
-      infix: true
-  }, function (context) {
-      var replaceWith = void 0;
-      switch (context.funcName) {
-          case "\\over":
-              replaceWith = "\\frac";
-              break;
-          case "\\choose":
-              replaceWith = "\\binom";
-              break;
-          case "\\atop":
-              replaceWith = "\\\\atopfrac";
-              break;
-          default:
-              throw new Error("Unrecognized infix genfrac command");
-      }
-      return {
-          type: "infix",
-          replaceWith: replaceWith,
-          token: context.token
-      };
-  });
-
-  // Row breaks for aligned data
-  defineFunction(["\\\\", "\\cr"], {
-      numArgs: 0,
-      numOptionalArgs: 1,
-      argTypes: ["size"]
-  }, function (context, args) {
-      var size = args[0];
-      return {
-          type: "cr",
-          size: size
-      };
-  });
-
-  // Environment delimiters
-  defineFunction(["\\begin", "\\end"], {
-      numArgs: 1,
-      argTypes: ["text"]
-  }, function (context, args) {
-      var nameGroup = args[0];
-      if (nameGroup.type !== "ordgroup") {
-          throw new _ParseError2.default("Invalid environment name", nameGroup);
-      }
-      var name = "";
-      for (var i = 0; i < nameGroup.value.length; ++i) {
-          name += nameGroup.value[i].value;
-      }
-      return {
-          type: "environment",
-          name: name,
-          nameGroup: nameGroup
-      };
-  });
-
-  },{"./ParseError":29,"./ParseNode":30,"./utils":51}],44:[function(require,module,exports){
-
-  /**
-   * Predefined macros for KaTeX.
-   * This can be used to define some commands in terms of others.
-   */
-
-  // This function might one day accept additional argument and do more things.
-  function defineMacro(name, body) {
-    module.exports[name] = body;
-  }
-
-  //////////////////////////////////////////////////////////////////////
-  // basics
-  defineMacro("\\bgroup", "{");
-  defineMacro("\\egroup", "}");
-  defineMacro("\\begingroup", "{");
-  defineMacro("\\endgroup", "}");
-
-  // We don't distinguish between math and nonmath kerns.
-  // (In TeX, the mu unit works only with \mkern.)
-  defineMacro("\\mkern", "\\kern");
-
-  //////////////////////////////////////////////////////////////////////
-  // amsmath.sty
-
-  // \def\overset#1#2{\binrel@{#2}\binrel@@{\mathop{\kern\z@#2}\limits^{#1}}}
-  defineMacro("\\overset", "\\mathop{#2}\\limits^{#1}");
-  defineMacro("\\underset", "\\mathop{#2}\\limits_{#1}");
-
-  // \newcommand{\boxed}[1]{\fbox{\m@th$\displaystyle#1$}}
-  defineMacro("\\boxed", "\\fbox{\\displaystyle{#1}}");
-
-  //TODO: When implementing \dots, should ideally add the \DOTSB indicator
-  //      into the macro, to indicate these are binary operators.
-  // \def\iff{\DOTSB\;\Longleftrightarrow\;}
-  // \def\implies{\DOTSB\;\Longrightarrow\;}
-  // \def\impliedby{\DOTSB\;\Longleftarrow\;}
-  defineMacro("\\iff", "\\;\\Longleftrightarrow\\;");
-  defineMacro("\\implies", "\\;\\Longrightarrow\\;");
-  defineMacro("\\impliedby", "\\;\\Longleftarrow\\;");
-
-  //////////////////////////////////////////////////////////////////////
-  // mathtools.sty
-
-  //\providecommand\ordinarycolon{:}
-  defineMacro("\\ordinarycolon", ":");
-  //\def\vcentcolon{\mathrel{\mathop\ordinarycolon}}
-  //TODO(edemaine): Not yet centered. Fix via \raisebox or #726
-  defineMacro("\\vcentcolon", "\\mathrel{\\mathop\\ordinarycolon}");
-  // \providecommand*\dblcolon{\vcentcolon\mathrel{\mkern-.9mu}\vcentcolon}
-  defineMacro("\\dblcolon", "\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon");
-  // \providecommand*\coloneqq{\vcentcolon\mathrel{\mkern-1.2mu}=}
-  defineMacro("\\coloneqq", "\\vcentcolon\\mathrel{\\mkern-1.2mu}=");
-  // \providecommand*\Coloneqq{\dblcolon\mathrel{\mkern-1.2mu}=}
-  defineMacro("\\Coloneqq", "\\dblcolon\\mathrel{\\mkern-1.2mu}=");
-  // \providecommand*\coloneq{\vcentcolon\mathrel{\mkern-1.2mu}\mathrel{-}}
-  defineMacro("\\coloneq", "\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}");
-  // \providecommand*\Coloneq{\dblcolon\mathrel{\mkern-1.2mu}\mathrel{-}}
-  defineMacro("\\Coloneq", "\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}");
-  // \providecommand*\eqqcolon{=\mathrel{\mkern-1.2mu}\vcentcolon}
-  defineMacro("\\eqqcolon", "=\\mathrel{\\mkern-1.2mu}\\vcentcolon");
-  // \providecommand*\Eqqcolon{=\mathrel{\mkern-1.2mu}\dblcolon}
-  defineMacro("\\Eqqcolon", "=\\mathrel{\\mkern-1.2mu}\\dblcolon");
-  // \providecommand*\eqcolon{\mathrel{-}\mathrel{\mkern-1.2mu}\vcentcolon}
-  defineMacro("\\eqcolon", "\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon");
-  // \providecommand*\Eqcolon{\mathrel{-}\mathrel{\mkern-1.2mu}\dblcolon}
-  defineMacro("\\Eqcolon", "\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon");
-  // \providecommand*\colonapprox{\vcentcolon\mathrel{\mkern-1.2mu}\approx}
-  defineMacro("\\colonapprox", "\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx");
-  // \providecommand*\Colonapprox{\dblcolon\mathrel{\mkern-1.2mu}\approx}
-  defineMacro("\\Colonapprox", "\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx");
-  // \providecommand*\colonsim{\vcentcolon\mathrel{\mkern-1.2mu}\sim}
-  defineMacro("\\colonsim", "\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim");
-  // \providecommand*\Colonsim{\dblcolon\mathrel{\mkern-1.2mu}\sim}
-  defineMacro("\\Colonsim", "\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim");
-
-  //////////////////////////////////////////////////////////////////////
-  // colonequals.sty
-
-  // Alternate names for mathtools's macros:
-  defineMacro("\\ratio", "\\vcentcolon");
-  defineMacro("\\coloncolon", "\\dblcolon");
-  defineMacro("\\colonequals", "\\coloneqq");
-  defineMacro("\\coloncolonequals", "\\Coloneqq");
-  defineMacro("\\equalscolon", "\\eqqcolon");
-  defineMacro("\\equalscoloncolon", "\\Eqqcolon");
-  defineMacro("\\colonminus", "\\coloneq");
-  defineMacro("\\coloncolonminus", "\\Coloneq");
-  defineMacro("\\minuscolon", "\\eqcolon");
-  defineMacro("\\minuscoloncolon", "\\Eqcolon");
-  // \colonapprox name is same in mathtools and colonequals.
-  defineMacro("\\coloncolonapprox", "\\Colonapprox");
-  // \colonsim name is same in mathtools and colonequals.
-  defineMacro("\\coloncolonsim", "\\Colonsim");
-
-  // Additional macros, implemented by analogy with mathtools definitions:
-  defineMacro("\\simcolon", "\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon");
-  defineMacro("\\simcoloncolon", "\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon");
-  defineMacro("\\approxcolon", "\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon");
-  defineMacro("\\approxcoloncolon", "\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon");
-
-  },{}],45:[function(require,module,exports){
-
-  var _classCallCheck2 = require("babel-runtime/helpers/classCallCheck");
-
-  var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
-
-  var _createClass2 = require("babel-runtime/helpers/createClass");
-
-  var _createClass3 = _interopRequireDefault(_createClass2);
-
-  var _utils = require("./utils");
-
-  var _utils2 = _interopRequireDefault(_utils);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * This node represents a general purpose MathML node of any type. The
-   * constructor requires the type of node to create (for example, `"mo"` or
-   * `"mspace"`, corresponding to `<mo>` and `<mspace>` tags).
-   */
-  var MathNode = function () {
-      function MathNode(type, children) {
-          (0, _classCallCheck3.default)(this, MathNode);
-
-          this.type = type;
-          this.attributes = {};
-          this.children = children || [];
-      }
-
-      /**
-       * Sets an attribute on a MathML node. MathML depends on attributes to convey a
-       * semantic content, so this is used heavily.
-       */
-
-
-      (0, _createClass3.default)(MathNode, [{
-          key: "setAttribute",
-          value: function setAttribute(name, value) {
-              this.attributes[name] = value;
-          }
-
-          /**
-           * Converts the math node into a MathML-namespaced DOM element.
-           */
-
-      }, {
-          key: "toNode",
-          value: function toNode() {
-              var node = document.createElementNS("http://www.w3.org/1998/Math/MathML", this.type);
-
-              for (var attr in this.attributes) {
-                  if (Object.prototype.hasOwnProperty.call(this.attributes, attr)) {
-                      node.setAttribute(attr, this.attributes[attr]);
-                  }
-              }
-
-              for (var i = 0; i < this.children.length; i++) {
-                  node.appendChild(this.children[i].toNode());
-              }
-
-              return node;
-          }
-
-          /**
-           * Converts the math node into an HTML markup string.
-           */
-
-      }, {
-          key: "toMarkup",
-          value: function toMarkup() {
-              var markup = "<" + this.type;
-
-              // Add the attributes
-              for (var attr in this.attributes) {
-                  if (Object.prototype.hasOwnProperty.call(this.attributes, attr)) {
-                      markup += " " + attr + "=\"";
-                      markup += _utils2.default.escape(this.attributes[attr]);
-                      markup += "\"";
-                  }
-              }
-
-              markup += ">";
-
-              for (var i = 0; i < this.children.length; i++) {
-                  markup += this.children[i].toMarkup();
-              }
-
-              markup += "</" + this.type + ">";
-
-              return markup;
-          }
-      }]);
-      return MathNode;
-  }();
-
-  /**
-   * This node represents a piece of text.
-   */
-  /**
-   * These objects store data about MathML nodes. This is the MathML equivalent
-   * of the types in domTree.js. Since MathML handles its own rendering, and
-   * since we're mainly using MathML to improve accessibility, we don't manage
-   * any of the styling state that the plain DOM nodes do.
-   *
-   * The `toNode` and `toMarkup` functions work simlarly to how they do in
-   * domTree.js, creating namespaced DOM nodes and HTML text markup respectively.
-   */
-
-  var TextNode = function () {
-      function TextNode(text) {
-          (0, _classCallCheck3.default)(this, TextNode);
-
-          this.text = text;
-      }
-
-      /**
-       * Converts the text node into a DOM text node.
-       */
-
-
-      (0, _createClass3.default)(TextNode, [{
-          key: "toNode",
-          value: function toNode() {
-              return document.createTextNode(this.text);
-          }
-
-          /**
-           * Converts the text node into HTML markup (which is just the text itself).
-           */
-
-      }, {
-          key: "toMarkup",
-          value: function toMarkup() {
-              return _utils2.default.escape(this.text);
-          }
-      }]);
-      return TextNode;
-  }();
-
-  module.exports = {
-      MathNode: MathNode,
-      TextNode: TextNode
-  };
-
-  },{"./utils":51,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],46:[function(require,module,exports){
-
-  var _Parser = require('./Parser');
-
-  var _Parser2 = _interopRequireDefault(_Parser);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  /**
-   * Parses an expression using a Parser, then returns the parsed result.
-   */
-  var parseTree = function parseTree(toParse, settings) {
-    if (!(typeof toParse === 'string' || toParse instanceof String)) {
-      throw new TypeError('KaTeX can only parse string typed expression');
-    }
-    var parser = new _Parser2.default(toParse, settings);
-
-    return parser.parse();
-  }; /**
-      * Provides a single function for parsing an expression using a Parser
-      * TODO(emily): Remove this
-      */
-
-  module.exports = parseTree;
-
-  },{"./Parser":31}],47:[function(require,module,exports){
-
-  /**
-   * This file provides support to buildMathML.js and buildHTML.js
-   * for stretchy wide elements rendered from SVG files
-   * and other CSS trickery.
-   */
-
-  var buildCommon = require("./buildCommon");
-  var mathMLTree = require("./mathMLTree");
-  var utils = require("./utils");
-
-  var stretchyCodePoint = {
-      widehat: "^",
-      widetilde: "~",
-      undertilde: "~",
-      overleftarrow: "\u2190",
-      underleftarrow: "\u2190",
-      xleftarrow: "\u2190",
-      overrightarrow: "\u2192",
-      underrightarrow: "\u2192",
-      xrightarrow: "\u2192",
-      underbrace: "\u23B5",
-      overbrace: "\u23DE",
-      overleftrightarrow: "\u2194",
-      underleftrightarrow: "\u2194",
-      xleftrightarrow: "\u2194",
-      Overrightarrow: "\u21D2",
-      xRightarrow: "\u21D2",
-      overleftharpoon: "\u21BC",
-      xleftharpoonup: "\u21BC",
-      overrightharpoon: "\u21C0",
-      xrightharpoonup: "\u21C0",
-      xLeftarrow: "\u21D0",
-      xLeftrightarrow: "\u21D4",
-      xhookleftarrow: "\u21A9",
-      xhookrightarrow: "\u21AA",
-      xmapsto: "\u21A6",
-      xrightharpoondown: "\u21C1",
-      xleftharpoondown: "\u21BD",
-      xrightleftharpoons: "\u21CC",
-      xleftrightharpoons: "\u21CB",
-      xtwoheadleftarrow: "\u219E",
-      xtwoheadrightarrow: "\u21A0",
-      xLongequal: "=",
-      xtofrom: "\u21C4"
-  };
-
-  var mathMLnode = function mathMLnode(label) {
-      var node = new mathMLTree.MathNode("mo", [new mathMLTree.TextNode(stretchyCodePoint[label.substr(1)])]);
-      node.setAttribute("stretchy", "true");
-      return node;
-  };
-
-  // In the katexImagesData object just below, the dimensions all
-  // correspond to path geometry inside the relevant SVG.
-  // For example, \rightarrow uses the same arrowhead as glyph U+2192
-  // from the KaTeX Main font. The scaling factor is 1000.
-  // That is, inside the font, that arrowhead is 522 units tall, which
-  // corresponds to 0.522 em inside the document.
-  // And for extensible arrows, we split that distance around the math axis.
-
-  var katexImagesData = {
-      // height, depth, imageName, minWidth
-      overleftarrow: [0.522, 0, "leftarrow", 0.5],
-      underleftarrow: [0.522, 0, "leftarrow", 0.5],
-      xleftarrow: [0.261, 0.261, "leftarrow", 0.783],
-      overrightarrow: [0.522, 0, "rightarrow", 0.5],
-      underrightarrow: [0.522, 0, "rightarrow", 0.5],
-      xrightarrow: [0.261, 0.261, "rightarrow", 0.783],
-      overbrace: [0.548, 0, "overbrace", 1.6],
-      underbrace: [0.548, 0, "underbrace", 1.6],
-      overleftrightarrow: [0.522, 0, "leftrightarrow", 0.5],
-      underleftrightarrow: [0.522, 0, "leftrightarrow", 0.5],
-      xleftrightarrow: [0.261, 0.261, "leftrightarrow", 0.783],
-      Overrightarrow: [0.56, 0, "doublerightarrow", 0.5],
-      xLeftarrow: [0.28, 0.28, "doubleleftarrow", 0.783],
-      xRightarrow: [0.28, 0.28, "doublerightarrow", 0.783],
-      xLeftrightarrow: [0.28, 0.28, "doubleleftrightarrow", 0.955],
-      overleftharpoon: [0.522, 0, "leftharpoon", 0.5],
-      overrightharpoon: [0.522, 0, "rightharpoon", 0.5],
-      xleftharpoonup: [0.261, 0.261, "leftharpoon", 0.783],
-      xrightharpoonup: [0.261, 0.261, "rightharpoon", 0.783],
-      xhookleftarrow: [0.261, 0.261, "hookleftarrow", 0.87],
-      xhookrightarrow: [0.261, 0.261, "hookrightarrow", 0.87],
-      overlinesegment: [0.414, 0, "linesegment", 0.5],
-      underlinesegment: [0.414, 0, "linesegment", 0.5],
-      xmapsto: [0.261, 0.261, "mapsto", 0.783],
-      xrightharpoondown: [0.261, 0.261, "rightharpoondown", 0.783],
-      xleftharpoondown: [0.261, 0.261, "leftharpoondown", 0.783],
-      xrightleftharpoons: [0.358, 0.358, "rightleftharpoons", 0.716],
-      xleftrightharpoons: [0.358, 0.358, "leftrightharpoons", 0.716],
-      overgroup: [0.342, 0, "overgroup", 0.87],
-      undergroup: [0.342, 0, "undergroup", 0.87],
-      xtwoheadleftarrow: [0.167, 0.167, "twoheadleftarrow", 0.86],
-      xtwoheadrightarrow: [0.167, 0.167, "twoheadrightarrow", 0.86],
-      xLongequal: [0.167, 0.167, "longequal", 0.5],
-      xtofrom: [0.264, 0.264, "tofrom", 0.86]
-  };
-
-  // Many of the KaTeX SVG images have been adapted from glyphs in KaTeX fonts.
-  // Copyright (c) 2009-2010, Design Science, Inc. (<www.mathjax.org>)
-  // Copyright (c) 2014-2017 Khan Academy (<www.khanacademy.org>)
-  // Licensed under the SIL Open Font License, Version 1.1.
-  // See \nhttp://scripts.sil.org/OFL
-
-  // Nested SVGs
-  //    Many of the KaTeX SVG images contain a nested SVG. This is done to
-  //    achieve a stretchy image while avoiding distortion of arrowheads or
-  //    brace corners.
-
-  //    The inner SVG typically contains a very long (400 em) arrow.
-
-  //    The outer SVG acts like a window that exposes only part of the inner SVG.
-  //    The outer SVG will grow or shrink to match the dimensions set by CSS.
-
-  //    The inner SVG always has a longer, thinner aspect ratio than the outer
-  //    SVG. After the inner SVG fills 100% of the height of the outer SVG,
-  //    there is a long arrow shaft left over. That left-over shaft is not shown.
-  //    Instead, it is sliced off because the inner SVG is set to
-  //    "preserveAspectRatio='... slice'".
-
-  //    Thus, the reader sees an arrow that matches the subject matter width
-  //    without distortion.
-
-  //    Some functions, such as \cancel, need to vary their aspect ratio. These
-  //    functions do not get the nested SVG treatment.
-
-  // Second Brush Stroke
-  //    Low resolution monitors struggle to display images in fine detail.
-  //    So browsers apply anti-aliasing. A long straight arrow shaft therefore
-  //    will sometimes appear as if it has a blurred edge.
-
-  //    To mitigate this, these SVG files contain a second "brush-stroke" on the
-  //    arrow shafts. That is, a second long thin rectangular SVG path has been
-  //    written directly on top of each arrow shaft. This reinforcement causes
-  //    some of the screen pixels to display as black instead of the anti-aliased
-  //    gray pixel that a  single path would generate. So we get arrow shafts
-  //    whose edges appear to be sharper.
-
-  var svgPath = {
-      doubleleftarrow: "<path d='M262 157\nl10-10c34-36 62.7-77 86-123 3.3-8 5-13.3 5-16 0-5.3-6.7-8-20-8-7.3\n 0-12.2.5-14.5 1.5-2.3 1-4.8 4.5-7.5 10.5-49.3 97.3-121.7 169.3-217 216-28\n 14-57.3 25-88 33-6.7 2-11 3.8-13 5.5-2 1.7-3 4.2-3 7.5s1 5.8 3 7.5\nc2 1.7 6.3 3.5 13 5.5 68 17.3 128.2 47.8 180.5 91.5 52.3 43.7 93.8 96.2 124.5\n 157.5 9.3 8 15.3 12.3 18 13h6c12-.7 18-4 18-10 0-2-1.7-7-5-15-23.3-46-52-87\n-86-123l-10-10h399738v-40H218c328 0 0 0 0 0l-10-8c-26.7-20-65.7-43-117-69 2.7\n-2 6-3.7 10-5 36.7-16 72.3-37.3 107-64l10-8h399782v-40z\nm8 0v40h399730v-40zm0 194v40h399730v-40z'/>",
-
-      doublerightarrow: "<path d='M399738 392l\n-10 10c-34 36-62.7 77-86 123-3.3 8-5 13.3-5 16 0 5.3 6.7 8 20 8 7.3 0 12.2-.5\n 14.5-1.5 2.3-1 4.8-4.5 7.5-10.5 49.3-97.3 121.7-169.3 217-216 28-14 57.3-25 88\n-33 6.7-2 11-3.8 13-5.5 2-1.7 3-4.2 3-7.5s-1-5.8-3-7.5c-2-1.7-6.3-3.5-13-5.5-68\n-17.3-128.2-47.8-180.5-91.5-52.3-43.7-93.8-96.2-124.5-157.5-9.3-8-15.3-12.3-18\n-13h-6c-12 .7-18 4-18 10 0 2 1.7 7 5 15 23.3 46 52 87 86 123l10 10H0v40h399782\nc-328 0 0 0 0 0l10 8c26.7 20 65.7 43 117 69-2.7 2-6 3.7-10 5-36.7 16-72.3 37.3\n-107 64l-10 8H0v40zM0 157v40h399730v-40zm0 194v40h399730v-40z'/>",
-
-      leftarrow: "<path d='M400000 241H110l3-3c68.7-52.7 113.7-120\n 135-202 4-14.7 6-23 6-25 0-7.3-7-11-21-11-8 0-13.2.8-15.5 2.5-2.3 1.7-4.2 5.8\n-5.5 12.5-1.3 4.7-2.7 10.3-4 17-12 48.7-34.8 92-68.5 130S65.3 228.3 18 247\nc-10 4-16 7.7-18 11 0 8.7 6 14.3 18 17 47.3 18.7 87.8 47 121.5 85S196 441.3 208\n 490c.7 2 1.3 5 2 9s1.2 6.7 1.5 8c.3 1.3 1 3.3 2 6s2.2 4.5 3.5 5.5c1.3 1 3.3\n 1.8 6 2.5s6 1 10 1c14 0 21-3.7 21-11 0-2-2-10.3-6-25-20-79.3-65-146.7-135-202\n l-3-3h399890zM100 241v40h399900v-40z'/>",
-
-      rightarrow: "<path d='M0 241v40h399891c-47.3 35.3-84 78-110 128\n-16.7 32-27.7 63.7-33 95 0 1.3-.2 2.7-.5 4-.3 1.3-.5 2.3-.5 3 0 7.3 6.7 11 20\n 11 8 0 13.2-.8 15.5-2.5 2.3-1.7 4.2-5.5 5.5-11.5 2-13.3 5.7-27 11-41 14.7-44.7\n 39-84.5 73-119.5s73.7-60.2 119-75.5c6-2 9-5.7 9-11s-3-9-9-11c-45.3-15.3-85\n-40.5-119-75.5s-58.3-74.8-73-119.5c-4.7-14-8.3-27.3-11-40-1.3-6.7-3.2-10.8-5.5\n-12.5-2.3-1.7-7.5-2.5-15.5-2.5-14 0-21 3.7-21 11 0 2 2 10.3 6 25 20.7 83.3 67\n 151.7 139 205zm0 0v40h399900v-40z'/>"
-  };
-
-  var innerSVG = {
-      // Since bcancel's SVG is inline and it omits the viewBox attribute,
-      // it's stroke-width will not vary with span area.
-      bcancel: "<line x1='0' y1='0' x2='100%' y2='100%' stroke-width='0.046em'/>",
-
-      cancel: "<line x1='0' y1='100%' x2='100%' y2='0' stroke-width='0.046em'/>",
-
-      // The doubleleftarrow geometry is from glyph U+21D0 in the font KaTeX Main
-      doubleleftarrow: "><svg viewBox='0 0 400000 549'\npreserveAspectRatio='xMinYMin slice'>" + svgPath["doubleleftarrow"] + "</svg>",
-
-      // doubleleftrightarrow is from glyph U+21D4 in font KaTeX Main
-      doubleleftrightarrow: "><svg width='50.1%' viewBox='0 0 400000 549'\npreserveAspectRatio='xMinYMin slice'>" + svgPath["doubleleftarrow"] + "</svg>\n<svg x='50%' width='50%' viewBox='0 0 400000 549' preserveAspectRatio='xMaxYMin\n slice'>" + svgPath["doublerightarrow"] + "</svg>",
-
-      // doublerightarrow is from glyph U+21D2 in font KaTeX Main
-      doublerightarrow: "><svg viewBox='0 0 400000 549'\npreserveAspectRatio='xMaxYMin slice'>" + svgPath["doublerightarrow"] + "</svg>",
-
-      // hookleftarrow is from glyph U+21A9 in font KaTeX Main
-      hookleftarrow: "><svg width='50.1%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'>" + svgPath["leftarrow"] + "</svg>\n<svg x='50%' width='50%' viewBox='0 0 400000 522' preserveAspectRatio='xMaxYMin\n slice'><path d='M399859 241c-764 0 0 0 0 0 40-3.3 68.7\n -15.7 86-37 10-12 15-25.3 15-40 0-22.7-9.8-40.7-29.5-54-19.7-13.3-43.5-21-71.5\n -23-17.3-1.3-26-8-26-20 0-13.3 8.7-20 26-20 38 0 71 11.2 99 33.5 0 0 7 5.6 21\n 16.7 14 11.2 21 33.5 21 66.8s-14 61.2-42 83.5c-28 22.3-61 33.5-99 33.5L0 241z\n M0 281v-40h399859v40z'/></svg>",
-
-      // hookrightarrow is from glyph U+21AA in font KaTeX Main
-      hookrightarrow: "><svg width='50.1%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'><path d='M400000 281\nH103s-33-11.2-61-33.5S0 197.3 0 164s14.2-61.2 42.5-83.5C70.8 58.2 104 47 142 47\nc16.7 0 25 6.7 25 20 0 12-8.7 18.7-26 20-40 3.3-68.7 15.7-86 37-10 12-15 25.3\n-15 40 0 22.7 9.8 40.7 29.5 54 19.7 13.3 43.5 21 71.5 23h399859zM103 281v-40\nh399897v40z'/></svg><svg x='50%' width='50%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMaxYMin slice'>" + svgPath["rightarrow"] + "</svg>",
-
-      // leftarrow is from glyph U+2190 in font KaTeX Main
-      leftarrow: "><svg viewBox='0 0 400000 522' preserveAspectRatio='xMinYMin\n slice'>" + svgPath["leftarrow"] + "</svg>",
-
-      // leftharpoon is from glyph U+21BD in font KaTeX Main
-      leftharpoon: "><svg viewBox='0 0 400000 522' preserveAspectRatio='xMinYMin\n slice'><path d='M0 267c.7 5.3 3 10 7 14h399993v-40H93c3.3\n-3.3 10.2-9.5 20.5-18.5s17.8-15.8 22.5-20.5c50.7-52 88-110.3 112-175 4-11.3 5\n-18.3 3-21-1.3-4-7.3-6-18-6-8 0-13 .7-15 2s-4.7 6.7-8 16c-42 98.7-107.3 174.7\n-196 228-6.7 4.7-10.7 8-12 10-1.3 2-2 5.7-2 11zm100-26v40h399900v-40z'/></svg>",
-
-      // leftharpoondown is from glyph U+21BD in font KaTeX Main
-      leftharpoondown: "><svg viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'><path d=\"M7 241c-4 4-6.333 8.667-7 14\n 0 5.333.667 9 2 11s5.333 5.333 12 10c90.667 54 156 130 196 228 3.333 10.667\n 6.333 16.333 9 17 2 .667 5 1 9 1h5c10.667 0 16.667-2 18-6 2-2.667 1-9.667-3-21\n -32-87.333-82.667-157.667-152-211l-3-3h399907v-40z\nM93 281 H400000 v-40L7 241z\"/></svg>",
-
-      // leftrightarrow is from glyph U+2194 in font KaTeX Main
-      leftrightarrow: "><svg width='50.1%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'>" + svgPath["leftarrow"] + "</svg>\n<svg x='50%' width='50%' viewBox='0 0 400000 522' preserveAspectRatio='xMaxYMin\n slice'>" + svgPath["rightarrow"] + "</svg>",
-
-      // leftrightharpoons is from glyphs U+21BC/21B1 in font KaTeX Main
-      leftrightharpoons: "><svg width='50.1%' viewBox='0 0 400000 716'\npreserveAspectRatio='xMinYMin slice'><path d='M0 267c.7 5.3\n 3 10 7 14h399993v-40H93c3.3-3.3 10.2-9.5 20.5-18.5s17.8-15.8 22.5-20.5c50.7-52\n 88-110.3 112-175 4-11.3 5-18.3 3-21-1.3-4-7.3-6-18-6-8 0-13 .7-15 2s-4.7 6.7-8\n 16c-42 98.7-107.3 174.7-196 228-6.7 4.7-10.7 8-12 10-1.3 2-2 5.7-2 11zm100-26\nv40h399900v-40zM0 435v40h400000v-40zm0 0v40h400000v-40z'/></svg>\n<svg x='50%' width='50%' viewBox='0 0 400000 716' preserveAspectRatio='xMaxYMin\n slice'><path d='M399747 705c0 7.3 6.7 11 20 11 8 0 13-.8\n 15-2.5s4.7-6.8 8-15.5c40-94 99.3-166.3 178-217 13.3-8 20.3-12.3 21-13 5.3-3.3\n 8.5-5.8 9.5-7.5 1-1.7 1.5-5.2 1.5-10.5s-2.3-10.3-7-15H0v40h399908c-34 25.3\n-64.7 57-92 95-27.3 38-48.7 77.7-64 119-3.3 8.7-5 14-5 16zM0 435v40h399900v-40z\nm0-194v40h400000v-40zm0 0v40h400000v-40z'/></svg>",
-
-      linesegment: "><svg width='50.1%' viewBox='0 0 400000 414'\npreserveAspectRatio='xMinYMin slice'><path d='M40 187V40H0\nv334h40V227h399960v-40zm0 0V40H0v334h40V227h399960v-40z'/></svg><svg x='50%'\nwidth='50%' viewBox='0 0 400000 414' preserveAspectRatio='xMaxYMin slice'>\n<path d='M0 187v40h399960v147h40V40h-40v147zm0\n 0v40h399960v147h40V40h-40v147z'/></svg>",
-
-      longequal: " viewBox='0 0 100 334' preserveAspectRatio='none'>\n<path d='M0 50h100v40H0zm0 194h100v40H0z'/>",
-
-      // mapsto is from glyph U+21A6 in font KaTeX Main
-      mapsto: "><svg width='50.1%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'><path d='M40 241c740\n 0 0 0 0 0v-75c0-40.7-.2-64.3-.5-71-.3-6.7-2.2-11.7-5.5-15-4-4-8.7-6-14-6-5.3 0\n-10 2-14 6C2.7 83.3.8 91.3.5 104 .2 116.7 0 169 0 261c0 114 .7 172.3 2 175 4 8\n 10 12 18 12 5.3 0 10-2 14-6 3.3-3.3 5.2-8.3 5.5-15 .3-6.7.5-30.3.5-71v-75\nh399960zm0 0v40h399960v-40z'/></svg><svg x='50%' width='50%' viewBox='0 0\n 400000 522' preserveAspectRatio='xMaxYMin slice'>" + svgPath["rightarrow"] + "</svg>",
-
-      // overbrace is from glyphs U+23A9/23A8/23A7 in font KaTeX_Size4-Regular
-      overbrace: "><svg width='25.5%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMinYMin slice'><path d='M6 548l-6-6\nv-35l6-11c56-104 135.3-181.3 238-232 57.3-28.7 117-45 179-50h399577v120H403\nc-43.3 7-81 15-113 26-100.7 33-179.7 91-237 174-2.7 5-6 9-10 13-.7 1-7.3 1-20 1\nH6z'/></svg><svg x='25%' width='50%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMidYMin slice'><path d='M200428 334\nc-100.7-8.3-195.3-44-280-108-55.3-42-101.7-93-139-153l-9-14c-2.7 4-5.7 8.7-9 14\n-53.3 86.7-123.7 153-211 199-66.7 36-137.3 56.3-212 62H0V214h199568c178.3-11.7\n 311.7-78.3 403-201 6-8 9.7-12 11-12 .7-.7 6.7-1 18-1s17.3.3 18 1c1.3 0 5 4 11\n 12 44.7 59.3 101.3 106.3 170 141s145.3 54.3 229 60h199572v120z'/></svg>\n<svg x='74.9%' width='24.1%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMaxYMin slice'><path d='M400000 542l\n-6 6h-17c-12.7 0-19.3-.3-20-1-4-4-7.3-8.3-10-13-35.3-51.3-80.8-93.8-136.5-127.5\ns-117.2-55.8-184.5-66.5c-.7 0-2-.3-4-1-18.7-2.7-76-4.3-172-5H0V214h399571l6 1\nc124.7 8 235 61.7 331 161 31.3 33.3 59.7 72.7 85 118l7 13v35z'/></svg>",
-
-      // overgroup is from the MnSymbol package (public domain)
-      overgroup: "><svg width='50.1%' viewBox='0 0 400000 342'\npreserveAspectRatio='xMinYMin slice'><path d='M400000 80\nH435C64 80 168.3 229.4 21 260c-5.9 1.2-18 0-18 0-2 0-3-1-3-3v-38C76 61 257 0\n 435 0h399565z'/></svg><svg x='50%' width='50%' viewBox='0 0 400000 342'\npreserveAspectRatio='xMaxYMin slice'><path d='M0 80h399565\nc371 0 266.7 149.4 414 180 5.9 1.2 18 0 18 0 2 0 3-1 3-3v-38\nc-76-158-257-219-435-219H0z'/></svg>",
-
-      // rightarrow is from glyph U+2192 in font KaTeX Main
-      rightarrow: "><svg viewBox='0 0 400000 522' preserveAspectRatio='xMaxYMin\n slice'>" + svgPath["rightarrow"] + "</svg>",
-
-      // rightharpoon is from glyph U+21C0 in font KaTeX Main
-      rightharpoon: "><svg viewBox='0 0 400000 522' preserveAspectRatio='xMaxYMin\n slice'><path d='M0 241v40h399993c4.7-4.7 7-9.3 7-14 0-9.3\n-3.7-15.3-11-18-92.7-56.7-159-133.7-199-231-3.3-9.3-6-14.7-8-16-2-1.3-7-2-15-2\n-10.7 0-16.7 2-18 6-2 2.7-1 9.7 3 21 15.3 42 36.7 81.8 64 119.5 27.3 37.7 58\n 69.2 92 94.5zm0 0v40h399900v-40z'/></svg>",
-
-      // rightharpoondown is from glyph U+21C1 in font KaTeX Main
-      rightharpoondown: "><svg viewBox='0 0 400000 522'\npreserveAspectRatio='xMaxYMin slice'><path d='M399747 511\nc0 7.3 6.7 11 20 11 8 0 13-.8 15-2.5s4.7-6.8 8-15.5c40-94 99.3-166.3 178-217\n 13.3-8 20.3-12.3 21-13 5.3-3.3 8.5-5.8 9.5-7.5 1-1.7 1.5-5.2 1.5-10.5s-2.3\n -10.3-7-15H0v40h399908c-34 25.3-64.7 57-92 95-27.3 38-48.7 77.7-64 119-3.3\n 8.7-5 14-5 16zM0 241v40h399900v-40z'/></svg>",
-
-      // rightleftharpoons is from glyph U+21CC in font KaTeX Main
-      rightleftharpoons: "><svg width='50%' viewBox='0 0 400000 716'\npreserveAspectRatio='xMinYMin slice'><path d='M7 435c-4 4\n-6.3 8.7-7 14 0 5.3.7 9 2 11s5.3 5.3 12 10c90.7 54 156 130 196 228 3.3 10.7 6.3\n 16.3 9 17 2 .7 5 1 9 1h5c10.7 0 16.7-2 18-6 2-2.7 1-9.7-3-21-32-87.3-82.7\n-157.7-152-211l-3-3h399907v-40H7zm93 0v40h399900v-40zM0 241v40h399900v-40z\nm0 0v40h399900v-40z'/></svg><svg x='50%' width='50%' viewBox='0 0 400000 716'\npreserveAspectRatio='xMaxYMin slice'><path d='M0 241v40\nh399993c4.7-4.7 7-9.3 7-14 0-9.3-3.7-15.3-11-18-92.7-56.7-159-133.7-199-231-3.3\n-9.3-6-14.7-8-16-2-1.3-7-2-15-2-10.7 0-16.7 2-18 6-2 2.7-1 9.7 3 21 15.3 42\n 36.7 81.8 64 119.5 27.3 37.7 58 69.2 92 94.5zm0 0v40h399900v-40z\n m100 194v40h399900v-40zm0 0v40h399900v-40z'/></svg>",
-
-      // tilde1 is a modified version of a glyph from the MnSymbol package
-      tilde1: " viewBox='0 0 600 260' preserveAspectRatio='none'>\n<path d='M200 55.538c-77 0-168 73.953-177 73.953-3 0-7\n-2.175-9-5.437L2 97c-1-2-2-4-2-6 0-4 2-7 5-9l20-12C116 12 171 0 207 0c86 0\n 114 68 191 68 78 0 168-68 177-68 4 0 7 2 9 5l12 19c1 2.175 2 4.35 2 6.525 0\n 4.35-2 7.613-5 9.788l-19 13.05c-92 63.077-116.937 75.308-183 76.128\n-68.267.847-113-73.952-191-73.952z'/>",
-
-      // Ditto tilde2, tilde3, and tilde 4
-      tilde2: " viewBox='0 0 1033 286' preserveAspectRatio='none'>\n<path d='M344 55.266c-142 0-300.638 81.316-311.5 86.418\n-8.01 3.762-22.5 10.91-23.5 5.562L1 120c-1-2-1-3-1-4 0-5 3-9 8-10l18.4-9C160.9\n 31.9 283 0 358 0c148 0 188 122 331 122s314-97 326-97c4 0 8 2 10 7l7 21.114\nc1 2.14 1 3.21 1 4.28 0 5.347-3 9.626-7 10.696l-22.3 12.622C852.6 158.372 751\n 181.476 676 181.476c-149 0-189-126.21-332-126.21z'/>",
-
-      tilde3: " viewBox='0 0 2339 306' preserveAspectRatio='none'>\n<path d='M786 59C457 59 32 175.242 13 175.242c-6 0-10-3.457\n-11-10.37L.15 138c-1-7 3-12 10-13l19.2-6.4C378.4 40.7 634.3 0 804.3 0c337 0\n 411.8 157 746.8 157 328 0 754-112 773-112 5 0 10 3 11 9l1 14.075c1 8.066-.697\n 16.595-6.697 17.492l-21.052 7.31c-367.9 98.146-609.15 122.696-778.15 122.696\n -338 0-409-156.573-744-156.573z'/>",
-
-      tilde4: " viewBox='0 0 2340 312' preserveAspectRatio='none'>\n<path d='M786 58C457 58 32 177.487 13 177.487c-6 0-10-3.345\n-11-10.035L.15 143c-1-7 3-12 10-13l22-6.7C381.2 35 637.15 0 807.15 0c337 0 409\n 177 744 177 328 0 754-127 773-127 5 0 10 3 11 9l1 14.794c1 7.805-3 13.38-9\n 14.495l-20.7 5.574c-366.85 99.79-607.3 139.372-776.3 139.372-338 0-409\n -175.236-744-175.236z'/>",
-
-      // tofrom is from glyph U+21C4 in font KaTeX AMS Regular
-      tofrom: "><svg width='50.1%' viewBox='0 0 400000 528'\npreserveAspectRatio='xMinYMin slice'><path d='M0 147h400000\nv40H0zm0 214c68 40 115.7 95.7 143 167h22c15.3 0 23-.3 23-1 0-1.3-5.3-13.7-16-37\n-18-35.3-41.3-69-70-101l-7-8h399905v-40H95l7-8c28.7-32 52-65.7 70-101 10.7-23.3\n 16-35.7 16-37 0-.7-7.7-1-23-1h-22C115.7 265.3 68 321 0 361zm0-174v-40h399900\nv40zm100 154v40h399900v-40z'/></svg><svg x='50%' width='50%' viewBox='0 0\n 400000 528' preserveAspectRatio='xMaxYMin slice'><path\nd='M400000 167c-70.7-42-118-97.7-142-167h-23c-15.3 0-23 .3-23 1 0 1.3 5.3 13.7\n 16 37 18 35.3 41.3 69 70 101l7 8H0v40h399905l-7 8c-28.7 32-52 65.7-70 101-10.7\n 23.3-16 35.7-16 37 0 .7 7.7 1 23 1h23c24-69.3 71.3-125 142-167z\n M100 147v40h399900v-40zM0 341v40h399900v-40z'/></svg>",
-
-      // twoheadleftarrow is from glyph U+219E in font KaTeX AMS Regular
-      twoheadleftarrow: "><svg viewBox='0 0 400000 334'\npreserveAspectRatio='xMinYMin slice'><path d='M0 167c68 40\n 115.7 95.7 143 167h22c15.3 0 23-.3 23-1 0-1.3-5.3-13.7-16-37-18-35.3-41.3-69\n-70-101l-7-8h125l9 7c50.7 39.3 85 86 103 140h46c0-4.7-6.3-18.7-19-42-18-35.3\n-40-67.3-66-96l-9-9h399716v-40H284l9-9c26-28.7 48-60.7 66-96 12.7-23.333 19\n-37.333 19-42h-46c-18 54-52.3 100.7-103 140l-9 7H95l7-8c28.7-32 52-65.7 70-101\n 10.7-23.333 16-35.7 16-37 0-.7-7.7-1-23-1h-22C115.7 71.3 68 127 0 167z'/>\n</svg>",
-
-      // twoheadrightarrow is from glyph U+21A0 in font KaTeX AMS Regular
-      twoheadrightarrow: "><svg viewBox='0 0 400000 334'\npreserveAspectRatio='xMaxYMin slice'><path d='M400000 167\nc-68-40-115.7-95.7-143-167h-22c-15.3 0-23 .3-23 1 0 1.3 5.3 13.7 16 37 18 35.3\n 41.3 69 70 101l7 8h-125l-9-7c-50.7-39.3-85-86-103-140h-46c0 4.7 6.3 18.7 19 42\n 18 35.3 40 67.3 66 96l9 9H0v40h399716l-9 9c-26 28.7-48 60.7-66 96-12.7 23.333\n-19 37.333-19 42h46c18-54 52.3-100.7 103-140l9-7h125l-7 8c-28.7 32-52 65.7-70\n 101-10.7 23.333-16 35.7-16 37 0 .7 7.7 1 23 1h22c27.3-71.3 75-127 143-167z'/>\n</svg>",
-
-      // underbrace is from glyphs U+23A9/23A8/23A7 in font KaTeX_Size4-Regular
-      underbrace: "><svg width='25.1%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17\nc12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5\n 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5\nh399450v120H429l-6-1c-124.688-8-235-61.7-331-161C60.687 138.7 32.312 99.3 7 54\nL0 41V6z'/></svg><svg x='25%' width='50%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMidYMin slice'><path d='M199572 214\nc100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14\n 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3\n 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0\n-5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg>\n<svg x='74.9%' width='25.1%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6\nv35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897\nc43.3-7 81-15 113-26 100.7-33 179.7-91 237-174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1\nh17z'/></svg>",
-
-      // undergroup is from the MnSymbol package (public domain)
-      undergroup: "><svg width='50.1%' viewBox='0 0 400000 342'\npreserveAspectRatio='xMinYMin slice'><path d='M400000 262\nH435C64 262 168.3 112.6 21 82c-5.9-1.2-18 0-18 0-2 0-3 1-3 3v38c76 158 257 219\n 435 219h399565z'/></svg><svg x='50%' width='50%' viewBox='0 0 400000 342'\npreserveAspectRatio='xMaxYMin slice'><path d='M0 262h399565\nc371 0 266.7-149.4 414-180 5.9-1.2 18 0 18 0 2 0 3 1 3 3v38c-76 158-257\n 219-435 219H0z'/></svg>",
-
-      // widehat1 is a modified version of a glyph from the MnSymbol package
-      widehat1: " viewBox='0 0 1062 239' preserveAspectRatio='none'>\n<path d='M529 0h5l519 115c5 1 9 5 9 10 0 1-1 2-1 3l-4 22\nc-1 5-5 9-11 9h-2L532 67 19 159h-2c-5 0-9-4-11-9l-5-22c-1-6 2-12 8-13z'/>",
-
-      // Ditto widehat2, widehat3, and widehat4
-      widehat2: " viewBox='0 0 2364 300' preserveAspectRatio='none'>\n<path d='M1181 0h2l1171 176c6 0 10 5 10 11l-2 23c-1 6-5 10\n-11 10h-1L1182 67 15 220h-1c-6 0-10-4-11-10l-2-23c-1-6 4-11 10-11z'/>",
-
-      widehat3: " viewBox='0 0 2364 360' preserveAspectRatio='none'>\n<path d='M1181 0h2l1171 236c6 0 10 5 10 11l-2 23c-1 6-5 10\n-11 10h-1L1182 67 15 280h-1c-6 0-10-4-11-10l-2-23c-1-6 4-11 10-11z'/>",
-
-      widehat4: " viewBox='0 0 2364 420' preserveAspectRatio='none'>\n<path d='M1181 0h2l1171 296c6 0 10 5 10 11l-2 23c-1 6-5 10\n-11 10h-1L1182 67 15 340h-1c-6 0-10-4-11-10l-2-23c-1-6 4-11 10-11z'/>",
-
-      xcancel: "<line x1='0' y1='0' x2='100%' y2='100%' stroke-width='0.046em'/>\n<line x1='0' y1='100%' x2='100%' y2='0' stroke-width='0.046em'/>"
-  };
-
-  var svgSpan = function svgSpan(group, options) {
-      // Create a span with inline SVG for the element.
-      var label = group.value.label.substr(1);
-      var height = 0;
-      var depth = 0;
-      var imageName = "";
-      var minWidth = 0;
-
-      if (utils.contains(["widehat", "widetilde", "undertilde"], label)) {
-          // There are four SVG images available for each function.
-          // Choose a taller image when there are more characters.
-          var numChars = group.value.value.length;
-          if (numChars > 5) {
-              height = 0.312;
-              imageName = (label === "widehat" ? "widehat" : "tilde") + "4";
-          } else {
-              var imgIndex = [1, 1, 2, 2, 3, 3][numChars];
-              if (label === "widehat") {
-                  height = [0, 0.24, 0.30, 0.30, 0.36, 0.36][numChars];
-                  imageName = "widehat" + imgIndex;
-              } else {
-                  height = [0, 0.26, 0.30, 0.30, 0.34, 0.34][numChars];
-                  imageName = "tilde" + imgIndex;
-              }
-          }
-      } else {
-          var imgData = katexImagesData[label];
-          height = imgData[0];
-          depth = imgData[1];
-          imageName = imgData[2];
-          minWidth = imgData[3];
-      }
-
-      var span = buildCommon.makeSpan([], [], options);
-      span.height = height;
-      span.depth = depth;
-      var totalHeight = height + depth;
-      span.style.height = totalHeight + "em";
-      if (minWidth > 0) {
-          span.style.minWidth = minWidth + "em";
-      }
-
-      span.innerHTML = "<svg width='100%' height='" + totalHeight + "em'" + innerSVG[imageName] + "</svg>";
-
-      return span;
-  };
-
-  var encloseSpan = function encloseSpan(inner, label, pad, options) {
-      // Return an image span for \cancel, \bcancel, \xcancel, or \fbox
-      var img = void 0;
-      var totalHeight = inner.height + inner.depth + 2 * pad;
-
-      if (label === "fbox") {
-          img = buildCommon.makeSpan(["stretchy", label], [], options);
-          if (options.color) {
-              img.style.borderColor = options.getColor();
-          }
-      } else {
-          img = buildCommon.makeSpan([], [], options);
-          img.innerHTML = "<svg width='100%' height='" + totalHeight + "em'>" + innerSVG[label] + "</svg>";
-      }
-
-      img.height = totalHeight;
-      img.style.height = totalHeight + "em";
-
-      return img;
-  };
-
-  module.exports = {
-      encloseSpan: encloseSpan,
-      mathMLnode: mathMLnode,
-      svgSpan: svgSpan
-  };
-
-  },{"./buildCommon":34,"./mathMLTree":45,"./utils":51}],48:[function(require,module,exports){
-
-  /**
-   * This file holds a list of all no-argument functions and single-character
-   * symbols (like 'a' or ';').
-   *
-   * For each of the symbols, there are three properties they can have:
-   * - font (required): the font to be used for this symbol. Either "main" (the
-       normal font), or "ams" (the ams fonts).
-   * - group (required): the ParseNode group type the symbol should have (i.e.
-       "textord", "mathord", etc).
-       See https://github.com/Khan/KaTeX/wiki/Examining-TeX#group-types
-   * - replace: the character that this symbol or function should be
-   *   replaced with (i.e. "\phi" has a replace value of "\u03d5", the phi
-   *   character in the main font).
-   *
-   * The outermost map in the table indicates what mode the symbols should be
-   * accepted in (e.g. "math" or "text").
-   */
-
-  module.exports = {
-      math: {},
-      text: {}
-  };
-
-  function defineSymbol(mode, font, group, replace, name, acceptUnicodeChar) {
-      module.exports[mode][name] = {
-          font: font,
-          group: group,
-          replace: replace
-      };
-
-      if (acceptUnicodeChar) {
-          module.exports[mode][replace] = module.exports[mode][name];
-      }
-  }
-
-  // Some abbreviations for commonly used strings.
-  // This helps minify the code, and also spotting typos using jshint.
-
-  // modes:
-  var math = "math";
-  var text = "text";
-
-  // fonts:
-  var main = "main";
-  var ams = "ams";
-
-  // groups:
-  var accent = "accent";
-  var bin = "bin";
-  var close = "close";
-  var inner = "inner";
-  var mathord = "mathord";
-  var op = "op";
-  var open = "open";
-  var punct = "punct";
-  var rel = "rel";
-  var spacing = "spacing";
-  var textord = "textord";
-
-  // Now comes the symbol table
-
-  // Relation Symbols
-  defineSymbol(math, main, rel, "\u2261", "\\equiv");
-  defineSymbol(math, main, rel, "\u227A", "\\prec");
-  defineSymbol(math, main, rel, "\u227B", "\\succ");
-  defineSymbol(math, main, rel, "\u223C", "\\sim");
-  defineSymbol(math, main, rel, "\u22A5", "\\perp");
-  defineSymbol(math, main, rel, "\u2AAF", "\\preceq");
-  defineSymbol(math, main, rel, "\u2AB0", "\\succeq");
-  defineSymbol(math, main, rel, "\u2243", "\\simeq");
-  defineSymbol(math, main, rel, "\u2223", "\\mid");
-  defineSymbol(math, main, rel, "\u226A", "\\ll");
-  defineSymbol(math, main, rel, "\u226B", "\\gg");
-  defineSymbol(math, main, rel, "\u224D", "\\asymp");
-  defineSymbol(math, main, rel, "\u2225", "\\parallel");
-  defineSymbol(math, main, rel, "\u22C8", "\\bowtie");
-  defineSymbol(math, main, rel, "\u2323", "\\smile");
-  defineSymbol(math, main, rel, "\u2291", "\\sqsubseteq");
-  defineSymbol(math, main, rel, "\u2292", "\\sqsupseteq");
-  defineSymbol(math, main, rel, "\u2250", "\\doteq");
-  defineSymbol(math, main, rel, "\u2322", "\\frown");
-  defineSymbol(math, main, rel, "\u220B", "\\ni");
-  defineSymbol(math, main, rel, "\u221D", "\\propto");
-  defineSymbol(math, main, rel, "\u22A2", "\\vdash");
-  defineSymbol(math, main, rel, "\u22A3", "\\dashv");
-  defineSymbol(math, main, rel, "\u220B", "\\owns");
-
-  // Punctuation
-  defineSymbol(math, main, punct, ".", "\\ldotp");
-  defineSymbol(math, main, punct, "\u22C5", "\\cdotp");
-
-  // Misc Symbols
-  defineSymbol(math, main, textord, "#", "\\#");
-  defineSymbol(text, main, textord, "#", "\\#");
-  defineSymbol(math, main, textord, "&", "\\&");
-  defineSymbol(text, main, textord, "&", "\\&");
-  defineSymbol(math, main, textord, "\u2135", "\\aleph");
-  defineSymbol(math, main, textord, "\u2200", "\\forall");
-  defineSymbol(math, main, textord, "\u210F", "\\hbar");
-  defineSymbol(math, main, textord, "\u2203", "\\exists");
-  defineSymbol(math, main, textord, "\u2207", "\\nabla");
-  defineSymbol(math, main, textord, "\u266D", "\\flat");
-  defineSymbol(math, main, textord, "\u2113", "\\ell");
-  defineSymbol(math, main, textord, "\u266E", "\\natural");
-  defineSymbol(math, main, textord, "\u2663", "\\clubsuit");
-  defineSymbol(math, main, textord, "\u2118", "\\wp");
-  defineSymbol(math, main, textord, "\u266F", "\\sharp");
-  defineSymbol(math, main, textord, "\u2662", "\\diamondsuit");
-  defineSymbol(math, main, textord, "\u211C", "\\Re");
-  defineSymbol(math, main, textord, "\u2661", "\\heartsuit");
-  defineSymbol(math, main, textord, "\u2111", "\\Im");
-  defineSymbol(math, main, textord, "\u2660", "\\spadesuit");
-
-  // Math and Text
-  defineSymbol(math, main, textord, "\u2020", "\\dag");
-  defineSymbol(text, main, textord, "\u2020", "\\dag");
-  defineSymbol(text, main, textord, "\u2020", "\\textdagger");
-  defineSymbol(math, main, textord, "\u2021", "\\ddag");
-  defineSymbol(text, main, textord, "\u2021", "\\ddag");
-  defineSymbol(text, main, textord, "\u2020", "\\textdaggerdbl");
-
-  // Large Delimiters
-  defineSymbol(math, main, close, "\u23B1", "\\rmoustache");
-  defineSymbol(math, main, open, "\u23B0", "\\lmoustache");
-  defineSymbol(math, main, close, "\u27EF", "\\rgroup");
-  defineSymbol(math, main, open, "\u27EE", "\\lgroup");
-
-  // Binary Operators
-  defineSymbol(math, main, bin, "\u2213", "\\mp");
-  defineSymbol(math, main, bin, "\u2296", "\\ominus");
-  defineSymbol(math, main, bin, "\u228E", "\\uplus");
-  defineSymbol(math, main, bin, "\u2293", "\\sqcap");
-  defineSymbol(math, main, bin, "\u2217", "\\ast");
-  defineSymbol(math, main, bin, "\u2294", "\\sqcup");
-  defineSymbol(math, main, bin, "\u25EF", "\\bigcirc");
-  defineSymbol(math, main, bin, "\u2219", "\\bullet");
-  defineSymbol(math, main, bin, "\u2021", "\\ddagger");
-  defineSymbol(math, main, bin, "\u2240", "\\wr");
-  defineSymbol(math, main, bin, "\u2A3F", "\\amalg");
-
-  // Arrow Symbols
-  defineSymbol(math, main, rel, "\u27F5", "\\longleftarrow");
-  defineSymbol(math, main, rel, "\u21D0", "\\Leftarrow");
-  defineSymbol(math, main, rel, "\u27F8", "\\Longleftarrow");
-  defineSymbol(math, main, rel, "\u27F6", "\\longrightarrow");
-  defineSymbol(math, main, rel, "\u21D2", "\\Rightarrow");
-  defineSymbol(math, main, rel, "\u27F9", "\\Longrightarrow");
-  defineSymbol(math, main, rel, "\u2194", "\\leftrightarrow");
-  defineSymbol(math, main, rel, "\u27F7", "\\longleftrightarrow");
-  defineSymbol(math, main, rel, "\u21D4", "\\Leftrightarrow");
-  defineSymbol(math, main, rel, "\u27FA", "\\Longleftrightarrow");
-  defineSymbol(math, main, rel, "\u21A6", "\\mapsto");
-  defineSymbol(math, main, rel, "\u27FC", "\\longmapsto");
-  defineSymbol(math, main, rel, "\u2197", "\\nearrow");
-  defineSymbol(math, main, rel, "\u21A9", "\\hookleftarrow");
-  defineSymbol(math, main, rel, "\u21AA", "\\hookrightarrow");
-  defineSymbol(math, main, rel, "\u2198", "\\searrow");
-  defineSymbol(math, main, rel, "\u21BC", "\\leftharpoonup");
-  defineSymbol(math, main, rel, "\u21C0", "\\rightharpoonup");
-  defineSymbol(math, main, rel, "\u2199", "\\swarrow");
-  defineSymbol(math, main, rel, "\u21BD", "\\leftharpoondown");
-  defineSymbol(math, main, rel, "\u21C1", "\\rightharpoondown");
-  defineSymbol(math, main, rel, "\u2196", "\\nwarrow");
-  defineSymbol(math, main, rel, "\u21CC", "\\rightleftharpoons");
-
-  // AMS Negated Binary Relations
-  defineSymbol(math, ams, rel, "\u226E", "\\nless");
-  defineSymbol(math, ams, rel, "\uE010", "\\nleqslant");
-  defineSymbol(math, ams, rel, "\uE011", "\\nleqq");
-  defineSymbol(math, ams, rel, "\u2A87", "\\lneq");
-  defineSymbol(math, ams, rel, "\u2268", "\\lneqq");
-  defineSymbol(math, ams, rel, "\uE00C", "\\lvertneqq");
-  defineSymbol(math, ams, rel, "\u22E6", "\\lnsim");
-  defineSymbol(math, ams, rel, "\u2A89", "\\lnapprox");
-  defineSymbol(math, ams, rel, "\u2280", "\\nprec");
-  defineSymbol(math, ams, rel, "\u22E0", "\\npreceq");
-  defineSymbol(math, ams, rel, "\u22E8", "\\precnsim");
-  defineSymbol(math, ams, rel, "\u2AB9", "\\precnapprox");
-  defineSymbol(math, ams, rel, "\u2241", "\\nsim");
-  defineSymbol(math, ams, rel, "\uE006", "\\nshortmid");
-  defineSymbol(math, ams, rel, "\u2224", "\\nmid");
-  defineSymbol(math, ams, rel, "\u22AC", "\\nvdash");
-  defineSymbol(math, ams, rel, "\u22AD", "\\nvDash");
-  defineSymbol(math, ams, rel, "\u22EA", "\\ntriangleleft");
-  defineSymbol(math, ams, rel, "\u22EC", "\\ntrianglelefteq");
-  defineSymbol(math, ams, rel, "\u228A", "\\subsetneq");
-  defineSymbol(math, ams, rel, "\uE01A", "\\varsubsetneq");
-  defineSymbol(math, ams, rel, "\u2ACB", "\\subsetneqq");
-  defineSymbol(math, ams, rel, "\uE017", "\\varsubsetneqq");
-  defineSymbol(math, ams, rel, "\u226F", "\\ngtr");
-  defineSymbol(math, ams, rel, "\uE00F", "\\ngeqslant");
-  defineSymbol(math, ams, rel, "\uE00E", "\\ngeqq");
-  defineSymbol(math, ams, rel, "\u2A88", "\\gneq");
-  defineSymbol(math, ams, rel, "\u2269", "\\gneqq");
-  defineSymbol(math, ams, rel, "\uE00D", "\\gvertneqq");
-  defineSymbol(math, ams, rel, "\u22E7", "\\gnsim");
-  defineSymbol(math, ams, rel, "\u2A8A", "\\gnapprox");
-  defineSymbol(math, ams, rel, "\u2281", "\\nsucc");
-  defineSymbol(math, ams, rel, "\u22E1", "\\nsucceq");
-  defineSymbol(math, ams, rel, "\u22E9", "\\succnsim");
-  defineSymbol(math, ams, rel, "\u2ABA", "\\succnapprox");
-  defineSymbol(math, ams, rel, "\u2246", "\\ncong");
-  defineSymbol(math, ams, rel, "\uE007", "\\nshortparallel");
-  defineSymbol(math, ams, rel, "\u2226", "\\nparallel");
-  defineSymbol(math, ams, rel, "\u22AF", "\\nVDash");
-  defineSymbol(math, ams, rel, "\u22EB", "\\ntriangleright");
-  defineSymbol(math, ams, rel, "\u22ED", "\\ntrianglerighteq");
-  defineSymbol(math, ams, rel, "\uE018", "\\nsupseteqq");
-  defineSymbol(math, ams, rel, "\u228B", "\\supsetneq");
-  defineSymbol(math, ams, rel, "\uE01B", "\\varsupsetneq");
-  defineSymbol(math, ams, rel, "\u2ACC", "\\supsetneqq");
-  defineSymbol(math, ams, rel, "\uE019", "\\varsupsetneqq");
-  defineSymbol(math, ams, rel, "\u22AE", "\\nVdash");
-  defineSymbol(math, ams, rel, "\u2AB5", "\\precneqq");
-  defineSymbol(math, ams, rel, "\u2AB6", "\\succneqq");
-  defineSymbol(math, ams, rel, "\uE016", "\\nsubseteqq");
-  defineSymbol(math, ams, bin, "\u22B4", "\\unlhd");
-  defineSymbol(math, ams, bin, "\u22B5", "\\unrhd");
-
-  // AMS Negated Arrows
-  defineSymbol(math, ams, rel, "\u219A", "\\nleftarrow");
-  defineSymbol(math, ams, rel, "\u219B", "\\nrightarrow");
-  defineSymbol(math, ams, rel, "\u21CD", "\\nLeftarrow");
-  defineSymbol(math, ams, rel, "\u21CF", "\\nRightarrow");
-  defineSymbol(math, ams, rel, "\u21AE", "\\nleftrightarrow");
-  defineSymbol(math, ams, rel, "\u21CE", "\\nLeftrightarrow");
-
-  // AMS Misc
-  defineSymbol(math, ams, rel, "\u25B3", "\\vartriangle");
-  defineSymbol(math, ams, textord, "\u210F", "\\hslash");
-  defineSymbol(math, ams, textord, "\u25BD", "\\triangledown");
-  defineSymbol(math, ams, textord, "\u25CA", "\\lozenge");
-  defineSymbol(math, ams, textord, "\u24C8", "\\circledS");
-  defineSymbol(math, ams, textord, "\xAE", "\\circledR");
-  defineSymbol(text, ams, textord, "\xAE", "\\circledR");
-  defineSymbol(math, ams, textord, "\u2221", "\\measuredangle");
-  defineSymbol(math, ams, textord, "\u2204", "\\nexists");
-  defineSymbol(math, ams, textord, "\u2127", "\\mho");
-  defineSymbol(math, ams, textord, "\u2132", "\\Finv");
-  defineSymbol(math, ams, textord, "\u2141", "\\Game");
-  defineSymbol(math, ams, textord, "k", "\\Bbbk");
-  defineSymbol(math, ams, textord, "\u2035", "\\backprime");
-  defineSymbol(math, ams, textord, "\u25B2", "\\blacktriangle");
-  defineSymbol(math, ams, textord, "\u25BC", "\\blacktriangledown");
-  defineSymbol(math, ams, textord, "\u25A0", "\\blacksquare");
-  defineSymbol(math, ams, textord, "\u29EB", "\\blacklozenge");
-  defineSymbol(math, ams, textord, "\u2605", "\\bigstar");
-  defineSymbol(math, ams, textord, "\u2222", "\\sphericalangle");
-  defineSymbol(math, ams, textord, "\u2201", "\\complement");
-  defineSymbol(math, ams, textord, "\xF0", "\\eth");
-  defineSymbol(math, ams, textord, "\u2571", "\\diagup");
-  defineSymbol(math, ams, textord, "\u2572", "\\diagdown");
-  defineSymbol(math, ams, textord, "\u25A1", "\\square");
-  defineSymbol(math, ams, textord, "\u25A1", "\\Box");
-  defineSymbol(math, ams, textord, "\u25CA", "\\Diamond");
-  defineSymbol(math, ams, textord, "\xA5", "\\yen");
-  defineSymbol(math, ams, textord, "\u2713", "\\checkmark");
-  defineSymbol(text, ams, textord, "\u2713", "\\checkmark");
-
-  // AMS Hebrew
-  defineSymbol(math, ams, textord, "\u2136", "\\beth");
-  defineSymbol(math, ams, textord, "\u2138", "\\daleth");
-  defineSymbol(math, ams, textord, "\u2137", "\\gimel");
-
-  // AMS Greek
-  defineSymbol(math, ams, textord, "\u03DD", "\\digamma");
-  defineSymbol(math, ams, textord, "\u03F0", "\\varkappa");
-
-  // AMS Delimiters
-  defineSymbol(math, ams, open, "\u250C", "\\ulcorner");
-  defineSymbol(math, ams, close, "\u2510", "\\urcorner");
-  defineSymbol(math, ams, open, "\u2514", "\\llcorner");
-  defineSymbol(math, ams, close, "\u2518", "\\lrcorner");
-
-  // AMS Binary Relations
-  defineSymbol(math, ams, rel, "\u2266", "\\leqq");
-  defineSymbol(math, ams, rel, "\u2A7D", "\\leqslant");
-  defineSymbol(math, ams, rel, "\u2A95", "\\eqslantless");
-  defineSymbol(math, ams, rel, "\u2272", "\\lesssim");
-  defineSymbol(math, ams, rel, "\u2A85", "\\lessapprox");
-  defineSymbol(math, ams, rel, "\u224A", "\\approxeq");
-  defineSymbol(math, ams, bin, "\u22D6", "\\lessdot");
-  defineSymbol(math, ams, rel, "\u22D8", "\\lll");
-  defineSymbol(math, ams, rel, "\u2276", "\\lessgtr");
-  defineSymbol(math, ams, rel, "\u22DA", "\\lesseqgtr");
-  defineSymbol(math, ams, rel, "\u2A8B", "\\lesseqqgtr");
-  defineSymbol(math, ams, rel, "\u2251", "\\doteqdot");
-  defineSymbol(math, ams, rel, "\u2253", "\\risingdotseq");
-  defineSymbol(math, ams, rel, "\u2252", "\\fallingdotseq");
-  defineSymbol(math, ams, rel, "\u223D", "\\backsim");
-  defineSymbol(math, ams, rel, "\u22CD", "\\backsimeq");
-  defineSymbol(math, ams, rel, "\u2AC5", "\\subseteqq");
-  defineSymbol(math, ams, rel, "\u22D0", "\\Subset");
-  defineSymbol(math, ams, rel, "\u228F", "\\sqsubset");
-  defineSymbol(math, ams, rel, "\u227C", "\\preccurlyeq");
-  defineSymbol(math, ams, rel, "\u22DE", "\\curlyeqprec");
-  defineSymbol(math, ams, rel, "\u227E", "\\precsim");
-  defineSymbol(math, ams, rel, "\u2AB7", "\\precapprox");
-  defineSymbol(math, ams, rel, "\u22B2", "\\vartriangleleft");
-  defineSymbol(math, ams, rel, "\u22B4", "\\trianglelefteq");
-  defineSymbol(math, ams, rel, "\u22A8", "\\vDash");
-  defineSymbol(math, ams, rel, "\u22AA", "\\Vvdash");
-  defineSymbol(math, ams, rel, "\u2323", "\\smallsmile");
-  defineSymbol(math, ams, rel, "\u2322", "\\smallfrown");
-  defineSymbol(math, ams, rel, "\u224F", "\\bumpeq");
-  defineSymbol(math, ams, rel, "\u224E", "\\Bumpeq");
-  defineSymbol(math, ams, rel, "\u2267", "\\geqq");
-  defineSymbol(math, ams, rel, "\u2A7E", "\\geqslant");
-  defineSymbol(math, ams, rel, "\u2A96", "\\eqslantgtr");
-  defineSymbol(math, ams, rel, "\u2273", "\\gtrsim");
-  defineSymbol(math, ams, rel, "\u2A86", "\\gtrapprox");
-  defineSymbol(math, ams, bin, "\u22D7", "\\gtrdot");
-  defineSymbol(math, ams, rel, "\u22D9", "\\ggg");
-  defineSymbol(math, ams, rel, "\u2277", "\\gtrless");
-  defineSymbol(math, ams, rel, "\u22DB", "\\gtreqless");
-  defineSymbol(math, ams, rel, "\u2A8C", "\\gtreqqless");
-  defineSymbol(math, ams, rel, "\u2256", "\\eqcirc");
-  defineSymbol(math, ams, rel, "\u2257", "\\circeq");
-  defineSymbol(math, ams, rel, "\u225C", "\\triangleq");
-  defineSymbol(math, ams, rel, "\u223C", "\\thicksim");
-  defineSymbol(math, ams, rel, "\u2248", "\\thickapprox");
-  defineSymbol(math, ams, rel, "\u2AC6", "\\supseteqq");
-  defineSymbol(math, ams, rel, "\u22D1", "\\Supset");
-  defineSymbol(math, ams, rel, "\u2290", "\\sqsupset");
-  defineSymbol(math, ams, rel, "\u227D", "\\succcurlyeq");
-  defineSymbol(math, ams, rel, "\u22DF", "\\curlyeqsucc");
-  defineSymbol(math, ams, rel, "\u227F", "\\succsim");
-  defineSymbol(math, ams, rel, "\u2AB8", "\\succapprox");
-  defineSymbol(math, ams, rel, "\u22B3", "\\vartriangleright");
-  defineSymbol(math, ams, rel, "\u22B5", "\\trianglerighteq");
-  defineSymbol(math, ams, rel, "\u22A9", "\\Vdash");
-  defineSymbol(math, ams, rel, "\u2223", "\\shortmid");
-  defineSymbol(math, ams, rel, "\u2225", "\\shortparallel");
-  defineSymbol(math, ams, rel, "\u226C", "\\between");
-  defineSymbol(math, ams, rel, "\u22D4", "\\pitchfork");
-  defineSymbol(math, ams, rel, "\u221D", "\\varpropto");
-  defineSymbol(math, ams, rel, "\u25C0", "\\blacktriangleleft");
-  defineSymbol(math, ams, rel, "\u2234", "\\therefore");
-  defineSymbol(math, ams, rel, "\u220D", "\\backepsilon");
-  defineSymbol(math, ams, rel, "\u25B6", "\\blacktriangleright");
-  defineSymbol(math, ams, rel, "\u2235", "\\because");
-  defineSymbol(math, ams, rel, "\u22D8", "\\llless");
-  defineSymbol(math, ams, rel, "\u22D9", "\\gggtr");
-  defineSymbol(math, ams, bin, "\u22B2", "\\lhd");
-  defineSymbol(math, ams, bin, "\u22B3", "\\rhd");
-  defineSymbol(math, ams, rel, "\u2242", "\\eqsim");
-  defineSymbol(math, main, rel, "\u22C8", "\\Join");
-  defineSymbol(math, ams, rel, "\u2251", "\\Doteq");
-
-  // AMS Binary Operators
-  defineSymbol(math, ams, bin, "\u2214", "\\dotplus");
-  defineSymbol(math, ams, bin, "\u2216", "\\smallsetminus");
-  defineSymbol(math, ams, bin, "\u22D2", "\\Cap");
-  defineSymbol(math, ams, bin, "\u22D3", "\\Cup");
-  defineSymbol(math, ams, bin, "\u2A5E", "\\doublebarwedge");
-  defineSymbol(math, ams, bin, "\u229F", "\\boxminus");
-  defineSymbol(math, ams, bin, "\u229E", "\\boxplus");
-  defineSymbol(math, ams, bin, "\u22C7", "\\divideontimes");
-  defineSymbol(math, ams, bin, "\u22C9", "\\ltimes");
-  defineSymbol(math, ams, bin, "\u22CA", "\\rtimes");
-  defineSymbol(math, ams, bin, "\u22CB", "\\leftthreetimes");
-  defineSymbol(math, ams, bin, "\u22CC", "\\rightthreetimes");
-  defineSymbol(math, ams, bin, "\u22CF", "\\curlywedge");
-  defineSymbol(math, ams, bin, "\u22CE", "\\curlyvee");
-  defineSymbol(math, ams, bin, "\u229D", "\\circleddash");
-  defineSymbol(math, ams, bin, "\u229B", "\\circledast");
-  defineSymbol(math, ams, bin, "\u22C5", "\\centerdot");
-  defineSymbol(math, ams, bin, "\u22BA", "\\intercal");
-  defineSymbol(math, ams, bin, "\u22D2", "\\doublecap");
-  defineSymbol(math, ams, bin, "\u22D3", "\\doublecup");
-  defineSymbol(math, ams, bin, "\u22A0", "\\boxtimes");
-
-  // AMS Arrows
-  defineSymbol(math, ams, rel, "\u21E2", "\\dashrightarrow");
-  defineSymbol(math, ams, rel, "\u21E0", "\\dashleftarrow");
-  defineSymbol(math, ams, rel, "\u21C7", "\\leftleftarrows");
-  defineSymbol(math, ams, rel, "\u21C6", "\\leftrightarrows");
-  defineSymbol(math, ams, rel, "\u21DA", "\\Lleftarrow");
-  defineSymbol(math, ams, rel, "\u219E", "\\twoheadleftarrow");
-  defineSymbol(math, ams, rel, "\u21A2", "\\leftarrowtail");
-  defineSymbol(math, ams, rel, "\u21AB", "\\looparrowleft");
-  defineSymbol(math, ams, rel, "\u21CB", "\\leftrightharpoons");
-  defineSymbol(math, ams, rel, "\u21B6", "\\curvearrowleft");
-  defineSymbol(math, ams, rel, "\u21BA", "\\circlearrowleft");
-  defineSymbol(math, ams, rel, "\u21B0", "\\Lsh");
-  defineSymbol(math, ams, rel, "\u21C8", "\\upuparrows");
-  defineSymbol(math, ams, rel, "\u21BF", "\\upharpoonleft");
-  defineSymbol(math, ams, rel, "\u21C3", "\\downharpoonleft");
-  defineSymbol(math, ams, rel, "\u22B8", "\\multimap");
-  defineSymbol(math, ams, rel, "\u21AD", "\\leftrightsquigarrow");
-  defineSymbol(math, ams, rel, "\u21C9", "\\rightrightarrows");
-  defineSymbol(math, ams, rel, "\u21C4", "\\rightleftarrows");
-  defineSymbol(math, ams, rel, "\u21A0", "\\twoheadrightarrow");
-  defineSymbol(math, ams, rel, "\u21A3", "\\rightarrowtail");
-  defineSymbol(math, ams, rel, "\u21AC", "\\looparrowright");
-  defineSymbol(math, ams, rel, "\u21B7", "\\curvearrowright");
-  defineSymbol(math, ams, rel, "\u21BB", "\\circlearrowright");
-  defineSymbol(math, ams, rel, "\u21B1", "\\Rsh");
-  defineSymbol(math, ams, rel, "\u21CA", "\\downdownarrows");
-  defineSymbol(math, ams, rel, "\u21BE", "\\upharpoonright");
-  defineSymbol(math, ams, rel, "\u21C2", "\\downharpoonright");
-  defineSymbol(math, ams, rel, "\u21DD", "\\rightsquigarrow");
-  defineSymbol(math, ams, rel, "\u21DD", "\\leadsto");
-  defineSymbol(math, ams, rel, "\u21DB", "\\Rrightarrow");
-  defineSymbol(math, ams, rel, "\u21BE", "\\restriction");
-
-  defineSymbol(math, main, textord, "\u2018", "`");
-  defineSymbol(math, main, textord, "$", "\\$");
-  defineSymbol(text, main, textord, "$", "\\$");
-  defineSymbol(text, main, textord, "$", "\\textdollar");
-  defineSymbol(math, main, textord, "%", "\\%");
-  defineSymbol(text, main, textord, "%", "\\%");
-  defineSymbol(math, main, textord, "_", "\\_");
-  defineSymbol(text, main, textord, "_", "\\_");
-  defineSymbol(text, main, textord, "_", "\\textunderscore");
-  defineSymbol(math, main, textord, "\u2220", "\\angle");
-  defineSymbol(math, main, textord, "\u221E", "\\infty");
-  defineSymbol(math, main, textord, "\u2032", "\\prime");
-  defineSymbol(math, main, textord, "\u25B3", "\\triangle");
-  defineSymbol(math, main, textord, "\u0393", "\\Gamma", true);
-  defineSymbol(math, main, textord, "\u0394", "\\Delta", true);
-  defineSymbol(math, main, textord, "\u0398", "\\Theta", true);
-  defineSymbol(math, main, textord, "\u039B", "\\Lambda", true);
-  defineSymbol(math, main, textord, "\u039E", "\\Xi", true);
-  defineSymbol(math, main, textord, "\u03A0", "\\Pi", true);
-  defineSymbol(math, main, textord, "\u03A3", "\\Sigma", true);
-  defineSymbol(math, main, textord, "\u03A5", "\\Upsilon", true);
-  defineSymbol(math, main, textord, "\u03A6", "\\Phi", true);
-  defineSymbol(math, main, textord, "\u03A8", "\\Psi", true);
-  defineSymbol(math, main, textord, "\u03A9", "\\Omega", true);
-  defineSymbol(math, main, textord, "\xAC", "\\neg");
-  defineSymbol(math, main, textord, "\xAC", "\\lnot");
-  defineSymbol(math, main, textord, "\u22A4", "\\top");
-  defineSymbol(math, main, textord, "\u22A5", "\\bot");
-  defineSymbol(math, main, textord, "\u2205", "\\emptyset");
-  defineSymbol(math, ams, textord, "\u2205", "\\varnothing");
-  defineSymbol(math, main, mathord, "\u03B1", "\\alpha", true);
-  defineSymbol(math, main, mathord, "\u03B2", "\\beta", true);
-  defineSymbol(math, main, mathord, "\u03B3", "\\gamma", true);
-  defineSymbol(math, main, mathord, "\u03B4", "\\delta", true);
-  defineSymbol(math, main, mathord, "\u03F5", "\\epsilon", true);
-  defineSymbol(math, main, mathord, "\u03B6", "\\zeta", true);
-  defineSymbol(math, main, mathord, "\u03B7", "\\eta", true);
-  defineSymbol(math, main, mathord, "\u03B8", "\\theta", true);
-  defineSymbol(math, main, mathord, "\u03B9", "\\iota", true);
-  defineSymbol(math, main, mathord, "\u03BA", "\\kappa", true);
-  defineSymbol(math, main, mathord, "\u03BB", "\\lambda", true);
-  defineSymbol(math, main, mathord, "\u03BC", "\\mu", true);
-  defineSymbol(math, main, mathord, "\u03BD", "\\nu", true);
-  defineSymbol(math, main, mathord, "\u03BE", "\\xi", true);
-  defineSymbol(math, main, mathord, "\u03BF", "\\omicron", true);
-  defineSymbol(math, main, mathord, "\u03C0", "\\pi", true);
-  defineSymbol(math, main, mathord, "\u03C1", "\\rho", true);
-  defineSymbol(math, main, mathord, "\u03C3", "\\sigma", true);
-  defineSymbol(math, main, mathord, "\u03C4", "\\tau", true);
-  defineSymbol(math, main, mathord, "\u03C5", "\\upsilon", true);
-  defineSymbol(math, main, mathord, "\u03D5", "\\phi", true);
-  defineSymbol(math, main, mathord, "\u03C7", "\\chi", true);
-  defineSymbol(math, main, mathord, "\u03C8", "\\psi", true);
-  defineSymbol(math, main, mathord, "\u03C9", "\\omega", true);
-  defineSymbol(math, main, mathord, "\u03B5", "\\varepsilon", true);
-  defineSymbol(math, main, mathord, "\u03D1", "\\vartheta", true);
-  defineSymbol(math, main, mathord, "\u03D6", "\\varpi", true);
-  defineSymbol(math, main, mathord, "\u03F1", "\\varrho", true);
-  defineSymbol(math, main, mathord, "\u03C2", "\\varsigma", true);
-  defineSymbol(math, main, mathord, "\u03C6", "\\varphi", true);
-  defineSymbol(math, main, bin, "\u2217", "*");
-  defineSymbol(math, main, bin, "+", "+");
-  defineSymbol(math, main, bin, "\u2212", "-");
-  defineSymbol(math, main, bin, "\u22C5", "\\cdot");
-  defineSymbol(math, main, bin, "\u2218", "\\circ");
-  defineSymbol(math, main, bin, "\xF7", "\\div");
-  defineSymbol(math, main, bin, "\xB1", "\\pm");
-  defineSymbol(math, main, bin, "\xD7", "\\times");
-  defineSymbol(math, main, bin, "\u2229", "\\cap");
-  defineSymbol(math, main, bin, "\u222A", "\\cup");
-  defineSymbol(math, main, bin, "\u2216", "\\setminus");
-  defineSymbol(math, main, bin, "\u2227", "\\land");
-  defineSymbol(math, main, bin, "\u2228", "\\lor");
-  defineSymbol(math, main, bin, "\u2227", "\\wedge");
-  defineSymbol(math, main, bin, "\u2228", "\\vee");
-  defineSymbol(math, main, textord, "\u221A", "\\surd");
-  defineSymbol(math, main, open, "(", "(");
-  defineSymbol(math, main, open, "[", "[");
-  defineSymbol(math, main, open, "\u27E8", "\\langle");
-  defineSymbol(math, main, open, "\u2223", "\\lvert");
-  defineSymbol(math, main, open, "\u2225", "\\lVert");
-  defineSymbol(math, main, close, ")", ")");
-  defineSymbol(math, main, close, "]", "]");
-  defineSymbol(math, main, close, "?", "?");
-  defineSymbol(math, main, close, "!", "!");
-  defineSymbol(math, main, close, "\u27E9", "\\rangle");
-  defineSymbol(math, main, close, "\u2223", "\\rvert");
-  defineSymbol(math, main, close, "\u2225", "\\rVert");
-  defineSymbol(math, main, rel, "=", "=");
-  defineSymbol(math, main, rel, "<", "<");
-  defineSymbol(math, main, rel, ">", ">");
-  defineSymbol(math, main, rel, ":", ":");
-  defineSymbol(math, main, rel, "\u2248", "\\approx");
-  defineSymbol(math, main, rel, "\u2245", "\\cong");
-  defineSymbol(math, main, rel, "\u2265", "\\ge");
-  defineSymbol(math, main, rel, "\u2265", "\\geq");
-  defineSymbol(math, main, rel, "\u2190", "\\gets");
-  defineSymbol(math, main, rel, ">", "\\gt");
-  defineSymbol(math, main, rel, "\u2208", "\\in");
-  defineSymbol(math, main, rel, "\u2209", "\\notin");
-  defineSymbol(math, main, rel, "\u0338", "\\not");
-  defineSymbol(math, main, rel, "\u2282", "\\subset");
-  defineSymbol(math, main, rel, "\u2283", "\\supset");
-  defineSymbol(math, main, rel, "\u2286", "\\subseteq");
-  defineSymbol(math, main, rel, "\u2287", "\\supseteq");
-  defineSymbol(math, ams, rel, "\u2288", "\\nsubseteq");
-  defineSymbol(math, ams, rel, "\u2289", "\\nsupseteq");
-  defineSymbol(math, main, rel, "\u22A8", "\\models");
-  defineSymbol(math, main, rel, "\u2190", "\\leftarrow");
-  defineSymbol(math, main, rel, "\u2264", "\\le");
-  defineSymbol(math, main, rel, "\u2264", "\\leq");
-  defineSymbol(math, main, rel, "<", "\\lt");
-  defineSymbol(math, main, rel, "\u2260", "\\ne");
-  defineSymbol(math, main, rel, "\u2260", "\\neq");
-  defineSymbol(math, main, rel, "\u2192", "\\rightarrow");
-  defineSymbol(math, main, rel, "\u2192", "\\to");
-  defineSymbol(math, ams, rel, "\u2271", "\\ngeq");
-  defineSymbol(math, ams, rel, "\u2270", "\\nleq");
-  defineSymbol(math, main, spacing, null, "\\!");
-  defineSymbol(math, main, spacing, "\xA0", "\\ ");
-  defineSymbol(math, main, spacing, "\xA0", "~");
-  defineSymbol(math, main, spacing, null, "\\,");
-  defineSymbol(math, main, spacing, null, "\\:");
-  defineSymbol(math, main, spacing, null, "\\;");
-  defineSymbol(math, main, spacing, null, "\\enspace");
-  defineSymbol(math, main, spacing, null, "\\qquad");
-  defineSymbol(math, main, spacing, null, "\\quad");
-  defineSymbol(math, main, spacing, "\xA0", "\\space");
-  defineSymbol(math, main, punct, ",", ",");
-  defineSymbol(math, main, punct, ";", ";");
-  defineSymbol(math, main, punct, ":", "\\colon");
-  defineSymbol(math, ams, bin, "\u22BC", "\\barwedge");
-  defineSymbol(math, ams, bin, "\u22BB", "\\veebar");
-  defineSymbol(math, main, bin, "\u2299", "\\odot");
-  defineSymbol(math, main, bin, "\u2295", "\\oplus");
-  defineSymbol(math, main, bin, "\u2297", "\\otimes");
-  defineSymbol(math, main, textord, "\u2202", "\\partial");
-  defineSymbol(math, main, bin, "\u2298", "\\oslash");
-  defineSymbol(math, ams, bin, "\u229A", "\\circledcirc");
-  defineSymbol(math, ams, bin, "\u22A1", "\\boxdot");
-  defineSymbol(math, main, bin, "\u25B3", "\\bigtriangleup");
-  defineSymbol(math, main, bin, "\u25BD", "\\bigtriangledown");
-  defineSymbol(math, main, bin, "\u2020", "\\dagger");
-  defineSymbol(math, main, bin, "\u22C4", "\\diamond");
-  defineSymbol(math, main, bin, "\u22C6", "\\star");
-  defineSymbol(math, main, bin, "\u25C3", "\\triangleleft");
-  defineSymbol(math, main, bin, "\u25B9", "\\triangleright");
-  defineSymbol(math, main, open, "{", "\\{");
-  defineSymbol(text, main, textord, "{", "\\{");
-  defineSymbol(text, main, textord, "{", "\\textbraceleft");
-  defineSymbol(math, main, close, "}", "\\}");
-  defineSymbol(text, main, textord, "}", "\\}");
-  defineSymbol(text, main, textord, "}", "\\textbraceright");
-  defineSymbol(math, main, open, "{", "\\lbrace");
-  defineSymbol(math, main, close, "}", "\\rbrace");
-  defineSymbol(math, main, open, "[", "\\lbrack");
-  defineSymbol(math, main, close, "]", "\\rbrack");
-  defineSymbol(text, main, textord, "<", "\\textless"); // in T1 fontenc
-  defineSymbol(text, main, textord, ">", "\\textgreater"); // in T1 fontenc
-  defineSymbol(math, main, open, "\u230A", "\\lfloor");
-  defineSymbol(math, main, close, "\u230B", "\\rfloor");
-  defineSymbol(math, main, open, "\u2308", "\\lceil");
-  defineSymbol(math, main, close, "\u2309", "\\rceil");
-  defineSymbol(math, main, textord, "\\", "\\backslash");
-  defineSymbol(math, main, textord, "\u2223", "|");
-  defineSymbol(math, main, textord, "\u2223", "\\vert");
-  defineSymbol(text, main, textord, "|", "\\textbar"); // in T1 fontenc
-  defineSymbol(math, main, textord, "\u2225", "\\|");
-  defineSymbol(math, main, textord, "\u2225", "\\Vert");
-  defineSymbol(text, main, textord, "\u2225", "\\textbardbl");
-  defineSymbol(math, main, rel, "\u2191", "\\uparrow");
-  defineSymbol(math, main, rel, "\u21D1", "\\Uparrow");
-  defineSymbol(math, main, rel, "\u2193", "\\downarrow");
-  defineSymbol(math, main, rel, "\u21D3", "\\Downarrow");
-  defineSymbol(math, main, rel, "\u2195", "\\updownarrow");
-  defineSymbol(math, main, rel, "\u21D5", "\\Updownarrow");
-  defineSymbol(math, main, op, "\u2210", "\\coprod");
-  defineSymbol(math, main, op, "\u22C1", "\\bigvee");
-  defineSymbol(math, main, op, "\u22C0", "\\bigwedge");
-  defineSymbol(math, main, op, "\u2A04", "\\biguplus");
-  defineSymbol(math, main, op, "\u22C2", "\\bigcap");
-  defineSymbol(math, main, op, "\u22C3", "\\bigcup");
-  defineSymbol(math, main, op, "\u222B", "\\int");
-  defineSymbol(math, main, op, "\u222B", "\\intop");
-  defineSymbol(math, main, op, "\u222C", "\\iint");
-  defineSymbol(math, main, op, "\u222D", "\\iiint");
-  defineSymbol(math, main, op, "\u220F", "\\prod");
-  defineSymbol(math, main, op, "\u2211", "\\sum");
-  defineSymbol(math, main, op, "\u2A02", "\\bigotimes");
-  defineSymbol(math, main, op, "\u2A01", "\\bigoplus");
-  defineSymbol(math, main, op, "\u2A00", "\\bigodot");
-  defineSymbol(math, main, op, "\u222E", "\\oint");
-  defineSymbol(math, main, op, "\u2A06", "\\bigsqcup");
-  defineSymbol(math, main, op, "\u222B", "\\smallint");
-  defineSymbol(text, main, inner, "\u2026", "\\textellipsis");
-  defineSymbol(math, main, inner, "\u2026", "\\mathellipsis");
-  defineSymbol(text, main, inner, "\u2026", "\\ldots", true);
-  defineSymbol(math, main, inner, "\u2026", "\\ldots", true);
-  defineSymbol(math, main, inner, "\u22EF", "\\cdots", true);
-  defineSymbol(math, main, inner, "\u22F1", "\\ddots", true);
-  defineSymbol(math, main, textord, "\u22EE", "\\vdots", true);
-  defineSymbol(math, main, accent, "\xB4", "\\acute");
-  defineSymbol(math, main, accent, "`", "\\grave");
-  defineSymbol(math, main, accent, "\xA8", "\\ddot");
-  defineSymbol(math, main, accent, "~", "\\tilde");
-  defineSymbol(math, main, accent, "\xAF", "\\bar");
-  defineSymbol(math, main, accent, "\u02D8", "\\breve");
-  defineSymbol(math, main, accent, "\u02C7", "\\check");
-  defineSymbol(math, main, accent, "^", "\\hat");
-  defineSymbol(math, main, accent, "\u20D7", "\\vec");
-  defineSymbol(math, main, accent, "\u02D9", "\\dot");
-  defineSymbol(math, main, mathord, "\u0131", "\\imath");
-  defineSymbol(math, main, mathord, "\u0237", "\\jmath");
-  defineSymbol(text, main, accent, "\u02CA", "\\'"); // acute
-  defineSymbol(text, main, accent, "\u02CB", "\\`"); // grave
-  defineSymbol(text, main, accent, "\u02C6", "\\^"); // circumflex
-  defineSymbol(text, main, accent, "\u02DC", "\\~"); // tilde
-  defineSymbol(text, main, accent, "\u02C9", "\\="); // macron
-  defineSymbol(text, main, accent, "\u02D8", "\\u"); // breve
-  defineSymbol(text, main, accent, "\u02D9", "\\."); // dot above
-  defineSymbol(text, main, accent, "\u02DA", "\\r"); // ring above
-  defineSymbol(text, main, accent, "\u02C7", "\\v"); // caron
-  defineSymbol(text, main, accent, "\xA8", '\\"'); // diaresis
-  defineSymbol(text, main, accent, "\u030B", "\\H"); // double acute
-
-  defineSymbol(text, main, textord, "\u2013", "--");
-  defineSymbol(text, main, textord, "\u2013", "\\textendash");
-  defineSymbol(text, main, textord, "\u2014", "---");
-  defineSymbol(text, main, textord, "\u2014", "\\textemdash");
-  defineSymbol(text, main, textord, "\u2018", "`");
-  defineSymbol(text, main, textord, "\u2018", "\\textquoteleft");
-  defineSymbol(text, main, textord, "\u2019", "'");
-  defineSymbol(text, main, textord, "\u2019", "\\textquoteright");
-  defineSymbol(text, main, textord, "\u201C", "``");
-  defineSymbol(text, main, textord, "\u201C", "\\textquotedblleft");
-  defineSymbol(text, main, textord, "\u201D", "''");
-  defineSymbol(text, main, textord, "\u201D", "\\textquotedblright");
-  defineSymbol(math, main, textord, "\xB0", "\\degree");
-  defineSymbol(text, main, textord, "\xB0", "\\degree");
-  // TODO: In LaTeX, \pounds can generate a different character in text and math
-  // mode, but among our fonts, only Main-Italic defines this character "163".
-  defineSymbol(math, main, mathord, "\xA3", "\\pounds");
-  defineSymbol(math, main, mathord, "\xA3", "\\mathsterling");
-  defineSymbol(text, main, mathord, "\xA3", "\\pounds");
-  defineSymbol(text, main, mathord, "\xA3", "\\textsterling");
-  defineSymbol(math, ams, textord, "\u2720", "\\maltese");
-  defineSymbol(text, ams, textord, "\u2720", "\\maltese");
-
-  defineSymbol(text, main, spacing, "\xA0", "\\ ");
-  defineSymbol(text, main, spacing, "\xA0", " ");
-  defineSymbol(text, main, spacing, "\xA0", "~");
-
-  // There are lots of symbols which are the same, so we add them in afterwards.
-
-  // All of these are textords in math mode
-  var mathTextSymbols = "0123456789/@.\"";
-  for (var i = 0; i < mathTextSymbols.length; i++) {
-      var ch = mathTextSymbols.charAt(i);
-      defineSymbol(math, main, textord, ch, ch);
-  }
-
-  // All of these are textords in text mode
-  var textSymbols = "0123456789!@*()-=+[]<>|\";:?/.,";
-  for (var _i = 0; _i < textSymbols.length; _i++) {
-      var _ch = textSymbols.charAt(_i);
-      defineSymbol(text, main, textord, _ch, _ch);
-  }
-
-  // All of these are textords in text mode, and mathords in math mode
-  var letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-  for (var _i2 = 0; _i2 < letters.length; _i2++) {
-      var _ch2 = letters.charAt(_i2);
-      defineSymbol(math, main, mathord, _ch2, _ch2);
-      defineSymbol(text, main, textord, _ch2, _ch2);
-  }
-
-  // Latin-1 letters
-  for (var _i3 = 0x00C0; _i3 <= 0x00D6; _i3++) {
-      var _ch3 = String.fromCharCode(_i3);
-      defineSymbol(math, main, mathord, _ch3, _ch3);
-      defineSymbol(text, main, textord, _ch3, _ch3);
-  }
-
-  for (var _i4 = 0x00D8; _i4 <= 0x00F6; _i4++) {
-      var _ch4 = String.fromCharCode(_i4);
-      defineSymbol(math, main, mathord, _ch4, _ch4);
-      defineSymbol(text, main, textord, _ch4, _ch4);
-  }
-
-  for (var _i5 = 0x00F8; _i5 <= 0x00FF; _i5++) {
-      var _ch5 = String.fromCharCode(_i5);
-      defineSymbol(math, main, mathord, _ch5, _ch5);
-      defineSymbol(text, main, textord, _ch5, _ch5);
-  }
-
-  // Cyrillic
-  for (var _i6 = 0x0410; _i6 <= 0x044F; _i6++) {
-      var _ch6 = String.fromCharCode(_i6);
-      defineSymbol(text, main, textord, _ch6, _ch6);
-  }
-
-  // Unicode versions of existing characters
-  defineSymbol(text, main, textord, "\u2013", "–");
-  defineSymbol(text, main, textord, "\u2014", "—");
-  defineSymbol(text, main, textord, "\u2018", "‘");
-  defineSymbol(text, main, textord, "\u2019", "’");
-  defineSymbol(text, main, textord, "\u201C", "“");
-  defineSymbol(text, main, textord, "\u201D", "”");
-
-  },{}],49:[function(require,module,exports){
-
-  var hangulRegex = /[\uAC00-\uD7AF]/;
-
-  // This regex combines
-  // - CJK symbols and punctuation: [\u3000-\u303F]
-  // - Hiragana: [\u3040-\u309F]
-  // - Katakana: [\u30A0-\u30FF]
-  // - CJK ideograms: [\u4E00-\u9FAF]
-  // - Hangul syllables: [\uAC00-\uD7AF]
-  // - Fullwidth punctuation: [\uFF00-\uFF60]
-  // Notably missing are halfwidth Katakana and Romanji glyphs.
-  var cjkRegex = /[\u3000-\u30FF\u4E00-\u9FAF\uAC00-\uD7AF\uFF00-\uFF60]/;
-
-  module.exports = {
-      cjkRegex: cjkRegex,
-      hangulRegex: hangulRegex
-  };
-
-  },{}],50:[function(require,module,exports){
-
-  var _ParseError = require("./ParseError");
-
-  var _ParseError2 = _interopRequireDefault(_ParseError);
-
-  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
-
-  // This table gives the number of TeX pts in one of each *absolute* TeX unit.
-  // Thus, multiplying a length by this number converts the length from units
-  // into pts.  Dividing the result by ptPerEm gives the number of ems
-  // *assuming* a font size of ptPerEm (normal size, normal style).
-  var ptPerUnit = {
-      // https://en.wikibooks.org/wiki/LaTeX/Lengths and
-      // https://tex.stackexchange.com/a/8263
-      "pt": 1, // TeX point
-      "mm": 7227 / 2540, // millimeter
-      "cm": 7227 / 254, // centimeter
-      "in": 72.27, // inch
-      "bp": 803 / 800, // big (PostScript) points
-      "pc": 12, // pica
-      "dd": 1238 / 1157, // didot
-      "cc": 14856 / 1157, // cicero (12 didot)
-      "nd": 685 / 642, // new didot
-      "nc": 1370 / 107, // new cicero (12 new didot)
-      "sp": 1 / 65536, // scaled point (TeX's internal smallest unit)
-      // https://tex.stackexchange.com/a/41371
-      "px": 803 / 800 };
-
-  // Dictionary of relative units, for fast validity testing.
-  /* eslint no-console:0 */
-
-  /**
-   * This file does conversion between units.  In particular, it provides
-   * calculateSize to convert other units into ems.
-   */
-
-  var relativeUnit = {
-      "ex": true,
-      "em": true,
-      "mu": true
-  };
-
-  /**
-   * Determine whether the specified unit (either a string defining the unit
-   * or a "size" parse node containing a unit field) is valid.
-   */
-  var validUnit = function validUnit(unit) {
-      if (unit.unit) {
-          unit = unit.unit;
-      }
-      return unit in ptPerUnit || unit in relativeUnit || unit === "ex";
-  };
-
-  /*
-   * Convert a "size" parse node (with numeric "number" and string "unit" fields,
-   * as parsed by functions.js argType "size") into a CSS em value for the
-   * current style/scale.  `options` gives the current options.
-   */
-  var calculateSize = function calculateSize(sizeValue, options) {
-      var scale = void 0;
-      if (sizeValue.unit in ptPerUnit) {
-          // Absolute units
-          scale = ptPerUnit[sizeValue.unit] // Convert unit to pt
-          / options.fontMetrics().ptPerEm // Convert pt to CSS em
-          / options.sizeMultiplier; // Unscale to make absolute units
-      } else if (sizeValue.unit === "mu") {
-          // `mu` units scale with scriptstyle/scriptscriptstyle.
-          scale = options.fontMetrics().cssEmPerMu;
-      } else {
-          // Other relative units always refer to the *textstyle* font
-          // in the current size.
-          var unitOptions = void 0;
-          if (options.style.isTight()) {
-              // isTight() means current style is script/scriptscript.
-              unitOptions = options.havingStyle(options.style.text());
-          } else {
-              unitOptions = options;
-          }
-          // TODO: In TeX these units are relative to the quad of the current
-          // *text* font, e.g. cmr10. KaTeX instead uses values from the
-          // comparably-sized *Computer Modern symbol* font. At 10pt, these
-          // match. At 7pt and 5pt, they differ: cmr7=1.138894, cmsy7=1.170641;
-          // cmr5=1.361133, cmsy5=1.472241. Consider $\scriptsize a\kern1emb$.
-          // TeX \showlists shows a kern of 1.13889 * fontsize;
-          // KaTeX shows a kern of 1.171 * fontsize.
-          if (sizeValue.unit === "ex") {
-              scale = unitOptions.fontMetrics().xHeight;
-          } else if (sizeValue.unit === "em") {
-              scale = unitOptions.fontMetrics().quad;
-          } else {
-              throw new _ParseError2.default("Invalid unit: '" + sizeValue.unit + "'");
-          }
-          if (unitOptions !== options) {
-              scale *= unitOptions.sizeMultiplier / options.sizeMultiplier;
-          }
-      }
-      return sizeValue.number * scale;
-  };
-
-  module.exports = {
-      validUnit: validUnit,
-      calculateSize: calculateSize
-  };
-
-  },{"./ParseError":29}],51:[function(require,module,exports){
-
-  /**
-   * This file contains a list of utility functions which are useful in other
-   * files.
-   */
-
-  /**
-   * Provide an `indexOf` function which works in IE8, but defers to native if
-   * possible.
-   */
-  var nativeIndexOf = Array.prototype.indexOf;
-  var indexOf = function indexOf(list, elem) {
-      if (list == null) {
-          return -1;
-      }
-      if (nativeIndexOf && list.indexOf === nativeIndexOf) {
-          return list.indexOf(elem);
-      }
-      var l = list.length;
-      for (var i = 0; i < l; i++) {
-          if (list[i] === elem) {
-              return i;
-          }
-      }
-      return -1;
-  };
-
-  /**
-   * Return whether an element is contained in a list
-   */
-  var contains = function contains(list, elem) {
-      return indexOf(list, elem) !== -1;
-  };
-
-  /**
-   * Provide a default value if a setting is undefined
-   */
-  var deflt = function deflt(setting, defaultIfUndefined) {
-      return setting === undefined ? defaultIfUndefined : setting;
-  };
-
-  // hyphenate and escape adapted from Facebook's React under Apache 2 license
-
-  var uppercase = /([A-Z])/g;
-  var hyphenate = function hyphenate(str) {
-      return str.replace(uppercase, "-$1").toLowerCase();
-  };
-
-  var ESCAPE_LOOKUP = {
-      "&": "&amp;",
-      ">": "&gt;",
-      "<": "&lt;",
-      "\"": "&quot;",
-      "'": "&#x27;"
-  };
-
-  var ESCAPE_REGEX = /[&><"']/g;
-
-  function escaper(match) {
-      return ESCAPE_LOOKUP[match];
-  }
-
-  /**
-   * Escapes text to prevent scripting attacks.
-   *
-   * @param {*} text Text value to escape.
-   * @return {string} An escaped string.
-   */
-  function escape(text) {
-      return ("" + text).replace(ESCAPE_REGEX, escaper);
-  }
-
-  /**
-   * A function to set the text content of a DOM element in all supported
-   * browsers. Note that we don't define this if there is no document.
-   */
-  var setTextContent = void 0;
-  if (typeof document !== "undefined") {
-      var testNode = document.createElement("span");
-      if ("textContent" in testNode) {
-          setTextContent = function setTextContent(node, text) {
-              node.textContent = text;
-          };
-      } else {
-          setTextContent = function setTextContent(node, text) {
-              node.innerText = text;
-          };
-      }
-  }
-
-  /**
-   * A function to clear a node.
-   */
-  function clearNode(node) {
-      setTextContent(node, "");
-  }
-
-  module.exports = {
-      contains: contains,
-      deflt: deflt,
-      escape: escape,
-      hyphenate: hyphenate,
-      indexOf: indexOf,
-      setTextContent: setTextContent,
-      clearNode: clearNode
-  };
-
-  },{}]},{},[1])(1)
-  });
-  });
-
-  var katex$2 = unwrapExports(katex$1);
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  // This is a straight concatenation of code from KaTeX's contrib folder,
-  // but we aren't using some of their helpers that don't work well outside a browser environment.
-
-  /*global katex */
-
-  const findEndOfMath = function(delimiter, text, startIndex) {
-    // Adapted from
-    // https://github.com/Khan/perseus/blob/master/src/perseus-markdown.jsx
-    let index = startIndex;
-    let braceLevel = 0;
-
-    const delimLength = delimiter.length;
-
-    while (index < text.length) {
-      const character = text[index];
-
-      if (
-        braceLevel <= 0 &&
-        text.slice(index, index + delimLength) === delimiter
-      ) {
-        return index;
-      } else if (character === "\\") {
-        index++;
-      } else if (character === "{") {
-        braceLevel++;
-      } else if (character === "}") {
-        braceLevel--;
-      }
-
-      index++;
-    }
-
-    return -1;
-  };
-
-  const splitAtDelimiters = function(startData, leftDelim, rightDelim, display) {
-    const finalData = [];
-
-    for (let i = 0; i < startData.length; i++) {
-      if (startData[i].type === "text") {
-        const text = startData[i].data;
-
-        let lookingForLeft = true;
-        let currIndex = 0;
-        let nextIndex;
-
-        nextIndex = text.indexOf(leftDelim);
-        if (nextIndex !== -1) {
-          currIndex = nextIndex;
-          finalData.push({
-            type: "text",
-            data: text.slice(0, currIndex)
-          });
-          lookingForLeft = false;
-        }
-
-        while (true) {
-          // eslint-disable-line no-constant-condition
-          if (lookingForLeft) {
-            nextIndex = text.indexOf(leftDelim, currIndex);
-            if (nextIndex === -1) {
-              break;
-            }
-
-            finalData.push({
-              type: "text",
-              data: text.slice(currIndex, nextIndex)
-            });
-
-            currIndex = nextIndex;
-          } else {
-            nextIndex = findEndOfMath(
-              rightDelim,
-              text,
-              currIndex + leftDelim.length
-            );
-            if (nextIndex === -1) {
-              break;
-            }
-
-            finalData.push({
-              type: "math",
-              data: text.slice(currIndex + leftDelim.length, nextIndex),
-              rawData: text.slice(currIndex, nextIndex + rightDelim.length),
-              display: display
-            });
-
-            currIndex = nextIndex + rightDelim.length;
-          }
-
-          lookingForLeft = !lookingForLeft;
-        }
-
-        finalData.push({
-          type: "text",
-          data: text.slice(currIndex)
-        });
-      } else {
-        finalData.push(startData[i]);
-      }
-    }
-
-    return finalData;
-  };
-
-  const splitWithDelimiters = function(text, delimiters) {
-    let data = [{ type: "text", data: text }];
-    for (let i = 0; i < delimiters.length; i++) {
-      const delimiter = delimiters[i];
-      data = splitAtDelimiters(
-        data,
-        delimiter.left,
-        delimiter.right,
-        delimiter.display || false
-      );
-    }
-    return data;
-  };
-
-  /* Note: optionsCopy is mutated by this method. If it is ever exposed in the
-   * API, we should copy it before mutating.
-   */
-  const renderMathInText = function(text, optionsCopy) {
-    const data = splitWithDelimiters(text, optionsCopy.delimiters);
-    const fragment = document.createDocumentFragment();
-
-    for (let i = 0; i < data.length; i++) {
-      if (data[i].type === "text") {
-        fragment.appendChild(document.createTextNode(data[i].data));
-      } else {
-        const tag = document.createElement("d-math");
-        const math = data[i].data;
-        // Override any display mode defined in the settings with that
-        // defined by the text itself
-        optionsCopy.displayMode = data[i].display;
-        try {
-          tag.textContent = math;
-          if (optionsCopy.displayMode) {
-            tag.setAttribute("block", "");
-          }
-        } catch (e) {
-          if (!(e instanceof katex.ParseError)) {
-            throw e;
-          }
-          optionsCopy.errorCallback(
-            "KaTeX auto-render: Failed to parse `" + data[i].data + "` with ",
-            e
-          );
-          fragment.appendChild(document.createTextNode(data[i].rawData));
-          continue;
-        }
-        fragment.appendChild(tag);
-      }
-    }
-
-    return fragment;
-  };
-
-  const renderElem = function(elem, optionsCopy) {
-    for (let i = 0; i < elem.childNodes.length; i++) {
-      const childNode = elem.childNodes[i];
-      if (childNode.nodeType === 3) {
-        // Text node
-        const text = childNode.textContent;
-        if (optionsCopy.mightHaveMath(text)) {
-          const frag = renderMathInText(text, optionsCopy);
-          i += frag.childNodes.length - 1;
-          elem.replaceChild(frag, childNode);
-        }
-      } else if (childNode.nodeType === 1) {
-        // Element node
-        const shouldRender =
-          optionsCopy.ignoredTags.indexOf(childNode.nodeName.toLowerCase()) ===
-          -1;
-
-        if (shouldRender) {
-          renderElem(childNode, optionsCopy);
-        }
-      }
-      // Otherwise, it's something else, and ignore it.
-    }
-  };
-
-  const defaultAutoRenderOptions = {
-    delimiters: [
-      { left: "$$", right: "$$", display: true },
-      { left: "\\[", right: "\\]", display: true },
-      { left: "\\(", right: "\\)", display: false }
-      // LaTeX uses this, but it ruins the display of normal `$` in text:
-      // {left: '$', right: '$', display: false},
-    ],
-
-    ignoredTags: [
-      "script",
-      "noscript",
-      "style",
-      "textarea",
-      "pre",
-      "code",
-      "svg"
-    ],
-
-    errorCallback: function(msg, err) {
-      console.error(msg, err);
-    }
-  };
-
-  const renderMathInElement = function(elem, options) {
-    if (!elem) {
-      throw new Error("No element provided to render");
-    }
-
-    const optionsCopy = Object.assign({}, defaultAutoRenderOptions, options);
-    const delimiterStrings = optionsCopy.delimiters.flatMap(d => [
-      d.left,
-      d.right
-    ]);
-    const mightHaveMath = text =>
-      delimiterStrings.some(d => text.indexOf(d) !== -1);
-    optionsCopy.mightHaveMath = mightHaveMath;
-    renderElem(elem, optionsCopy);
-  };
-
-  // Copyright 2018 The Distill Template Authors
-
-  function Mathematics(dom, data) {
-    let needsCSS = false;
-    const body = dom.querySelector('body');
-
-    if (!body) {
-      console.warn("No body tag found!");
-      return;
-    }
-
-    if (data.katex && data.katex.delimiters) {
-      global.document = dom;
-      renderMathInElement(body, data.katex);
-    }
-
-    // render d-math tags
-    const mathTags = body.querySelectorAll('d-math');
-    if (mathTags.length > 0) {
-      needsCSS = true;
-      console.warn(`Prerendering ${mathTags.length} math tags...`);
-      for (const mathTag of mathTags) {
-        const localOptions = { displayMode: mathTag.hasAttribute('block') };
-        const options = Object.assign(localOptions, data.katex);
-        const html = katex$2.renderToString(mathTag.textContent, options);
-        const container = dom.createElement('span');
-        container.innerHTML = html;
-        mathTag.parentElement.insertBefore(container, mathTag);
-        mathTag.parentElement.removeChild(mathTag);
-      }
-    }
-
-    if (needsCSS) {
-      const katexCSSTag = '<link rel="stylesheet" href="https://distill.pub/third-party/katex/katex.min.css" crossorigin="anonymous">';
-      dom.head.insertAdjacentHTML('beforeend', katexCSSTag);
-    }
-
-  }
-
-  var favicon = "iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAA99JREFUeNrsG4t1ozDMzQSM4A2ODUonKBucN2hugtIJ6E1AboLcBiQTkJsANiAb9OCd/OpzMWBJBl5TvaeXPiiyJetry0J8wW3D3QpjRh3GjneXDq+fSQA9s2mH9x3KDhN4foJfCb8N/Jrv+2fnDn8vLRQOplWHVYdvHZYdZsBcZP1vBmh/n8DzEmhUQDPaOuP9pFuY+JwJHwHnCLQE2tnWBGEyXozY9xCUgHMhhjE2I4heVWtgIkZ83wL6Qgxj1obfWBxymPwe+b00BCCRNPbwfb60yleAkkBHGT5AEehIYz7eJrFDMF9CvH4wwhcGHiHMneFvLDQwlwvMLQq58trRcYBWfYn0A0OgHWQUSu25mE+BnoYKnnEJoeIWAifzOv7vLWd2ZKRfWAIme3tOiUaQ3UnLkb0xj1FxRIeEGKaGIHOs9nEgLaaA9i0JRYo1Ic67wJW86KSKE/ZAM8KuVMk8ITVhmxUxJ3Cl2xlm9Vtkeju1+mpCQNxaEGNCY8bs9X2YqwNoQeGjBWut/ma0QAWy/TqAsHx9wSya3I5IRxOfTC+leG+kA/4vSeEcGBtNUN6byhu3+keEZCQJUNh8MAO7HL6H8pQLnsW/Hd4T4lv93TPjfM7A46iEEqbB5EDOvwYNW6tGNZzT/o+CZ6sqZ6wUtR/wf7mi/VL8iNciT6rHih48Y55b4nKCHJCCzb4y0nwFmin3ZEMIoLfZF8F7nncFmvnWBaBj7CGAYA/WGJsUwHdYqVDwAmNsUgAx4CGgAA7GOOxADYOFWOaIKifuVYzmOpREqA21Mo7aPsgiY1PhOMAmxtR+AUbYH3Id2wc0SAFIQTsn9IUGWR8k9jx3vtXSiAacFxTAGakBk9UudkNECd6jLe+6HrshshvIuC6IlLMRy7er+JpcKma24SlE4cFZSZJDGVVrsNvitQhQrDhW0jfiOLfFd47C42eHT56D/BK0To+58Ahj+cAT8HT1UWlfLZCCd/uKawzU0Rh2EyIX/Icqth3niG8ybNroezwe6khdCNxRN+l4XGdOLVLlOOt2hTRJlr1ETIuMAltVTMz70mJrkdGAaZLSmnBEqmAE32JCMmuTlCnRgsBENtOUpHhvvsYIL0ibnBkaC6QvKcR7738GKp0AKnim7xgUSNv1bpS8QwhBt8r+EP47v/oyRK/S34yJ9nT+AN0Tkm4OdB9E4BsmXM3SnMlRFUrtp6IDpV2eKzdYvF3etm3KhQksbOLChGkSmcBdmcEwvqkrMy5BzL00NZeu3qPYJOOuCc+5NjcWKXQxFvTa3NoXJ4d8in7fiAUuTt781dkvuHX4K8AA2Usy7yNKLy0AAAAASUVORK5CYII=\n";
-
-  /*!
-   * escape-html
-   * Copyright(c) 2012-2013 TJ Holowaychuk
-   * Copyright(c) 2015 Andreas Lubbe
-   * Copyright(c) 2015 Tiancheng "Timothy" Gu
-   * MIT Licensed
-   */
-
-  /**
-   * Module variables.
-   * @private
-   */
-
-  var matchHtmlRegExp = /["'&<>]/;
-
-  /**
-   * Module exports.
-   * @public
-   */
-
-  var escapeHtml_1 = escapeHtml;
-
-  /**
-   * Escape special characters in the given string of html.
-   *
-   * @param  {string} string The string to escape for inserting into HTML
-   * @return {string}
-   * @public
-   */
-
-  function escapeHtml(string) {
-    var str = '' + string;
-    var match = matchHtmlRegExp.exec(str);
-
-    if (!match) {
-      return str;
-    }
-
-    var escape;
-    var html = '';
-    var index = 0;
-    var lastIndex = 0;
-
-    for (index = match.index; index < str.length; index++) {
-      switch (str.charCodeAt(index)) {
-        case 34: // "
-          escape = '&quot;';
-          break;
-        case 38: // &
-          escape = '&amp;';
-          break;
-        case 39: // '
-          escape = '&#39;';
-          break;
-        case 60: // <
-          escape = '&lt;';
-          break;
-        case 62: // >
-          escape = '&gt;';
-          break;
-        default:
-          continue;
-      }
-
-      if (lastIndex !== index) {
-        html += str.substring(lastIndex, index);
-      }
-
-      lastIndex = index + 1;
-      html += escape;
-    }
-
-    return lastIndex !== index
-      ? html + str.substring(lastIndex, index)
-      : html;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function Meta(dom, data) {
-    let head = dom.querySelector('head');
-    let appendHead = html => appendHtml(head, html);
-
-    function meta(name, content, force) {
-      if (content || force)
-        appendHead(`    <meta name="${name}" content="${escapeHtml_1(content)}" >\n`);
-    }
-
-    appendHead(`
-    <meta http-equiv="X-UA-Compatible" content="IE=Edge,chrome=1">
-    <link rel="icon" type="image/png" href="data:image/png;base64,${favicon}">
-    <link href="/rss.xml" rel="alternate" type="application/rss+xml" title="Articles from Distill">
-  `);
-
-    if (data.title) {
-      appendHead(`
-    <title>${escapeHtml_1(data.title)}</title>
-    `);
-    }
-
-    if (data.url) {
-      appendHead(`
-    <link rel="canonical" href="${data.url}">
-    `);
-    }
-
-
-    if (data.publishedDate){
-      appendHead(`
-    <!--  https://schema.org/Article -->
-    <meta property="description"       itemprop="description"   content="${escapeHtml_1(data.description)}" />
-    <meta property="article:published" itemprop="datePublished" content="${data.publishedISODateOnly}" />
-    <meta property="article:created"   itemprop="dateCreated"   content="${data.publishedISODateOnly}" />
-    `);
-    }
-
-    if (data.updatedDate) {
-      appendHead(`
-    <meta property="article:modified"  itemprop="dateModified"  content="${data.updatedDate.toISOString()}" />
-    `);
-    }
-
-    (data.authors || []).forEach((a) => {
-      appendHtml(head, `
-    <meta property="article:author" content="${escapeHtml_1(a.firstName)} ${escapeHtml_1(a.lastName)}" />`);
-    });
-
-    appendHead(`
-    <!--  https://developers.facebook.com/docs/sharing/webmasters#markup -->
-    <meta property="og:type" content="article"/>
-    <meta property="og:title" content="${escapeHtml_1(data.title)}"/>
-    <meta property="og:description" content="${escapeHtml_1(data.description)}">
-    <meta property="og:url" content="${data.url}"/>
-    <meta property="og:image" content="${data.previewURL}"/>
-    <meta property="og:locale" content="en_US" />
-    <meta property="og:site_name" content="Distill" />
-  `);
-
-    appendHead(`
-    <!--  https://dev.twitter.com/cards/types/summary -->
-    <meta name="twitter:card" content="summary_large_image">
-    <meta name="twitter:title" content="${escapeHtml_1(data.title)}">
-    <meta name="twitter:description" content="${escapeHtml_1(data.description)}">
-    <meta name="twitter:url" content="${data.url}">
-    <meta name="twitter:image" content="${data.previewURL}">
-    <meta name="twitter:image:width" content="560">
-    <meta name="twitter:image:height" content="295">
-  `);
-
-    // if this is a proprer article, generate Google Scholar meta data
-    if (data.doiSuffix){
-      appendHead(`
-      <!--  https://scholar.google.com/intl/en/scholar/inclusion.html#indexing -->\n`);
-
-      meta('citation_title', data.title);
-      meta('citation_fulltext_html_url', data.url);
-      meta('citation_volume', data.volume);
-      meta('citation_issue', data.issue);
-      meta('citation_firstpage', data.doiSuffix ? `e${data.doiSuffix}` : undefined);
-      meta('citation_doi', data.doi);
-
-      let journal = data.journal || {};
-      meta('citation_journal_title', journal.full_title || journal.title);
-      meta('citation_journal_abbrev', journal.abbrev_title);
-      meta('citation_issn', journal.issn);
-      meta('citation_publisher', journal.publisher);
-      meta('citation_fulltext_world_readable', '', true);
-
-      if (data.publishedDate){
-        meta('citation_online_date', `${data.publishedYear}/${data.publishedMonthPadded}/${data.publishedDayPadded}`);
-        meta('citation_publication_date', `${data.publishedYear}/${data.publishedMonthPadded}/${data.publishedDayPadded}`);
-      }
-
-      (data.authors || []).forEach((a) => {
-        meta('citation_author', `${a.lastName}, ${a.firstName}`);
-        meta('citation_author_institution', a.affiliation);
-      });
-    } else {
-      console.warn('No DOI suffix in data; not adding citation meta tags!');
-    }
-
-    if (data.citations) {
-      data.citations.forEach(key => {
-        if (data.bibliography && data.bibliography.has(key)) {
-          const entry = data.bibliography.get(key);
-          meta('citation_reference', citation_meta_content(entry) );
-        } else {
-          console.warn('No bibliography data found for ' + key);
-        }
-      });
-    } else {
-      console.warn('No citations found; not adding any references meta tags!');
-    }
-  }
-
-  function appendHtml(el, html) {
-    el.innerHTML += html;
-  }
-
-  function citation_meta_content(ref){
-    var content = `citation_title=${ref.title};`;
-
-    if (ref.author && ref.author !== '') {
-      ref.author.split(' and ').forEach(name => {
-        name = name.trim();
-        let last, firsts;
-        if (name.indexOf(',') != -1){
-          last = name.split(',')[0].trim();
-          firsts = name.split(',')[1].trim();
-        } else {
-          last = name.split(' ').slice(-1)[0].trim();
-          firsts = name.split(' ').slice(0,-1).join(' ');
-        }
-        content += `citation_author=${firsts} ${last};`;
-      });
-    }
-
-    if ('year' in ref) {
-      content += `citation_publication_date=${ref.year};`;
-    }
-
-    // Special test for arxiv
-    let arxiv_id_search = /https?:\/\/arxiv\.org\/pdf\/([0-9]*\.[0-9]*)\.pdf/.exec(ref.url);
-    arxiv_id_search = arxiv_id_search || /https?:\/\/arxiv\.org\/abs\/([0-9]*\.[0-9]*)/.exec(ref.url);
-    arxiv_id_search = arxiv_id_search || /arXiv preprint arXiv:([0-9]*\.[0-9]*)/.exec(ref.journal);
-    if (arxiv_id_search && arxiv_id_search[1]){
-      content += `citation_arxiv_id=${arxiv_id_search[1]};`;
-      return content; // arXiv is not considered a journal, so we don't need journal/volume/issue
-    }
-    if ('journal' in ref){
-      content += `citation_journal_title=${escapeHtml_1(ref.journal)};`;
-    }
-    if ('volume' in ref) {
-      content += `citation_volume=${escapeHtml_1(ref.volume)};`;
-    }
-    if ('issue' in ref || 'number' in ref){
-      content += `citation_number=${escapeHtml_1(ref.issue || ref.number)};`;
-    }
-    return content;
-  }
-
-  var base = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nhtml {\n  font-size: 14px;\n\tline-height: 1.6em;\n  /* font-family: \"Libre Franklin\", \"Helvetica Neue\", sans-serif; */\n  font-family: -apple-system, BlinkMacSystemFont, \"Segoe UI\", Roboto, Oxygen, Ubuntu, Cantarell, \"Fira Sans\", \"Droid Sans\", \"Helvetica Neue\", Arial, sans-serif;\n  /*, \"Apple Color Emoji\", \"Segoe UI Emoji\", \"Segoe UI Symbol\";*/\n  text-size-adjust: 100%;\n  -ms-text-size-adjust: 100%;\n  -webkit-text-size-adjust: 100%;\n}\n\n@media(min-width: 768px) {\n  html {\n    font-size: 16px;\n  }\n}\n\nbody {\n  margin: 0;\n}\n\na {\n  color: #004276;\n}\n\nfigure {\n  margin: 0;\n}\n\ntable {\n\tborder-collapse: collapse;\n\tborder-spacing: 0;\n}\n\ntable th {\n\ttext-align: left;\n}\n\ntable thead {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.05);\n}\n\ntable thead th {\n  padding-bottom: 0.5em;\n}\n\ntable tbody :first-child td {\n  padding-top: 0.5em;\n}\n\npre {\n  overflow: auto;\n  max-width: 100%;\n}\n\np {\n  margin-top: 0;\n  margin-bottom: 1em;\n}\n\nsup, sub {\n  vertical-align: baseline;\n  position: relative;\n  top: -0.4em;\n  line-height: 1em;\n}\n\nsub {\n  top: 0.4em;\n}\n\n.kicker,\n.marker {\n  font-size: 15px;\n  font-weight: 600;\n  color: rgba(0, 0, 0, 0.5);\n}\n\n\n/* Headline */\n\n@media(min-width: 1024px) {\n  d-title h1 span {\n    display: block;\n  }\n}\n\n/* Figure */\n\nfigure {\n  position: relative;\n  margin-bottom: 2.5em;\n  margin-top: 1.5em;\n}\n\nfigcaption+figure {\n\n}\n\nfigure img {\n  width: 100%;\n}\n\nfigure svg text,\nfigure svg tspan {\n}\n\nfigcaption,\n.figcaption {\n  color: rgba(0, 0, 0, 0.6);\n  font-size: 12px;\n  line-height: 1.5em;\n}\n\n@media(min-width: 1024px) {\nfigcaption,\n.figcaption {\n    font-size: 13px;\n  }\n}\n\nfigure.external img {\n  background: white;\n  border: 1px solid rgba(0, 0, 0, 0.1);\n  box-shadow: 0 1px 8px rgba(0, 0, 0, 0.1);\n  padding: 18px;\n  box-sizing: border-box;\n}\n\nfigcaption a {\n  color: rgba(0, 0, 0, 0.6);\n}\n\nfigcaption b,\nfigcaption strong, {\n  font-weight: 600;\n  color: rgba(0, 0, 0, 1.0);\n}\n";
-
-  var layout = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n@supports not (display: grid) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    display: block;\n    padding: 8px;\n  }\n}\n\n.base-grid,\ndistill-header,\nd-title,\nd-abstract,\nd-article,\nd-appendix,\ndistill-appendix,\nd-byline,\nd-footnote-list,\nd-citation-list,\ndistill-footer {\n  display: grid;\n  justify-items: stretch;\n  grid-template-columns: [screen-start] 8px [page-start kicker-start text-start gutter-start middle-start] 1fr 1fr 1fr 1fr 1fr 1fr 1fr 1fr [text-end page-end gutter-end kicker-end middle-end] 8px [screen-end];\n  grid-column-gap: 8px;\n}\n\n.grid {\n  display: grid;\n  grid-column-gap: 8px;\n}\n\n@media(min-width: 768px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start middle-start text-start] 45px 45px 45px 45px 45px 45px 45px 45px [ kicker-end text-end gutter-start] 45px [middle-end] 45px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 16px;\n  }\n\n  .grid {\n    grid-column-gap: 16px;\n  }\n}\n\n@media(min-width: 1000px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start] 50px [middle-start] 50px [text-start kicker-end] 50px 50px 50px 50px 50px 50px 50px 50px [text-end gutter-start] 50px [middle-end] 50px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 16px;\n  }\n\n  .grid {\n    grid-column-gap: 16px;\n  }\n}\n\n@media(min-width: 1180px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start] 60px [middle-start] 60px [text-start kicker-end] 60px 60px 60px 60px 60px 60px 60px 60px [text-end gutter-start] 60px [middle-end] 60px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 32px;\n  }\n\n  .grid {\n    grid-column-gap: 32px;\n  }\n}\n\n\n\n\n.base-grid {\n  grid-column: screen;\n}\n\n/* .l-body,\nd-article > *  {\n  grid-column: text;\n}\n\n.l-page,\nd-title > *,\nd-figure {\n  grid-column: page;\n} */\n\n.l-gutter {\n  grid-column: gutter;\n}\n\n.l-text,\n.l-body {\n  grid-column: text;\n}\n\n.l-page {\n  grid-column: page;\n}\n\n.l-body-outset {\n  grid-column: middle;\n}\n\n.l-page-outset {\n  grid-column: page;\n}\n\n.l-screen {\n  grid-column: screen;\n}\n\n.l-screen-inset {\n  grid-column: screen;\n  padding-left: 16px;\n  padding-left: 16px;\n}\n\n\n/* Aside */\n\nd-article aside {\n  grid-column: gutter;\n  font-size: 12px;\n  line-height: 1.6em;\n  color: rgba(0, 0, 0, 0.6)\n}\n\n@media(min-width: 768px) {\n  aside {\n    grid-column: gutter;\n  }\n\n  .side {\n    grid-column: gutter;\n  }\n}\n";
-
-  var print = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n@media print {\n\n  @page {\n    size: 8in 11in;\n    @bottom-right {\n      content: counter(page) \" of \" counter(pages);\n    }\n  }\n\n  html {\n    /* no general margins -- CSS Grid takes care of those */\n  }\n\n  p, code {\n    page-break-inside: avoid;\n  }\n\n  h2, h3 {\n    page-break-after: avoid;\n  }\n\n  d-header {\n    visibility: hidden;\n  }\n\n  d-footer {\n    display: none!important;\n  }\n\n}\n";
-
-  var byline = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-byline {\n  contain: style;\n  overflow: hidden;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  font-size: 0.8rem;\n  line-height: 1.8em;\n  padding: 1.5rem 0;\n  min-height: 1.8em;\n}\n\n\nd-byline .byline {\n  grid-template-columns: 1fr 1fr;\n  grid-column: text;\n}\n\n@media(min-width: 768px) {\n  d-byline .byline {\n    grid-template-columns: 1fr 1fr 1fr 1fr;\n  }\n}\n\nd-byline .authors-affiliations {\n  grid-column-end: span 2;\n  grid-template-columns: 1fr 1fr;\n  margin-bottom: 1em;\n}\n\n@media(min-width: 768px) {\n  d-byline .authors-affiliations {\n    margin-bottom: 0;\n  }\n}\n\nd-byline h3 {\n  font-size: 0.6rem;\n  font-weight: 400;\n  color: rgba(0, 0, 0, 0.5);\n  margin: 0;\n  text-transform: uppercase;\n}\n\nd-byline p {\n  margin: 0;\n}\n\nd-byline a,\nd-article d-byline a {\n  color: rgba(0, 0, 0, 0.8);\n  text-decoration: none;\n  border-bottom: none;\n}\n\nd-article d-byline a:hover {\n  text-decoration: underline;\n  border-bottom: none;\n}\n\nd-byline p.author {\n  font-weight: 500;\n}\n\nd-byline .affiliations {\n\n}\n";
-
-  var article = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-article {\n  contain: layout style;\n  overflow-x: hidden;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  padding-top: 2rem;\n  color: rgba(0, 0, 0, 0.8);\n}\n\nd-article > * {\n  grid-column: text;\n}\n\n@media(min-width: 768px) {\n  d-article {\n    font-size: 16px;\n  }\n}\n\n@media(min-width: 1024px) {\n  d-article {\n    font-size: 1.06rem;\n    line-height: 1.7em;\n  }\n}\n\n\n/* H2 */\n\n\nd-article .marker {\n  text-decoration: none;\n  border: none;\n  counter-reset: section;\n  grid-column: kicker;\n  line-height: 1.7em;\n}\n\nd-article .marker:hover {\n  border: none;\n}\n\nd-article .marker span {\n  padding: 0 3px 4px;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n  position: relative;\n  top: 4px;\n}\n\nd-article .marker:hover span {\n  color: rgba(0, 0, 0, 0.7);\n  border-bottom: 1px solid rgba(0, 0, 0, 0.7);\n}\n\nd-article h2 {\n  font-weight: 600;\n  font-size: 24px;\n  line-height: 1.25em;\n  margin: 2rem 0 1.5rem 0;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n  padding-bottom: 1rem;\n}\n\n@media(min-width: 1024px) {\n  d-article h2 {\n    font-size: 36px;\n  }\n}\n\n/* H3 */\n\nd-article h3 {\n  font-weight: 700;\n  font-size: 18px;\n  line-height: 1.4em;\n  margin-bottom: 1em;\n  margin-top: 2em;\n}\n\n@media(min-width: 1024px) {\n  d-article h3 {\n    font-size: 20px;\n  }\n}\n\n/* H4 */\n\nd-article h4 {\n  font-weight: 600;\n  text-transform: uppercase;\n  font-size: 14px;\n  line-height: 1.4em;\n}\n\nd-article a {\n  color: inherit;\n}\n\nd-article p,\nd-article ul,\nd-article ol,\nd-article blockquote {\n  margin-top: 0;\n  margin-bottom: 1em;\n  margin-left: 0;\n  margin-right: 0;\n}\n\nd-article blockquote {\n  border-left: 2px solid rgba(0, 0, 0, 0.2);\n  padding-left: 2em;\n  font-style: italic;\n  color: rgba(0, 0, 0, 0.6);\n}\n\nd-article a {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.4);\n  text-decoration: none;\n}\n\nd-article a:hover {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.8);\n}\n\nd-article .link {\n  text-decoration: underline;\n  cursor: pointer;\n}\n\nd-article ul,\nd-article ol {\n  padding-left: 24px;\n}\n\nd-article li {\n  margin-bottom: 1em;\n  margin-left: 0;\n  padding-left: 0;\n}\n\nd-article li:last-child {\n  margin-bottom: 0;\n}\n\nd-article pre {\n  font-size: 14px;\n  margin-bottom: 20px;\n}\n\nd-article hr {\n  grid-column: screen;\n  width: 100%;\n  border: none;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n  margin-top: 60px;\n  margin-bottom: 60px;\n}\n\nd-article section {\n  margin-top: 60px;\n  margin-bottom: 60px;\n}\n\nd-article span.equation-mimic {\n  font-family: georgia;\n  font-size: 115%;\n  font-style: italic;\n}\n\nd-article > d-code,\nd-article section > d-code  {\n  display: block;\n}\n\nd-article > d-math[block],\nd-article section > d-math[block]  {\n  display: block;\n}\n\n@media (max-width: 768px) {\n  d-article > d-code,\n  d-article section > d-code,\n  d-article > d-math[block],\n  d-article section > d-math[block] {\n      overflow-x: scroll;\n      -ms-overflow-style: none;  // IE 10+\n      overflow: -moz-scrollbars-none;  // Firefox\n  }\n\n  d-article > d-code::-webkit-scrollbar,\n  d-article section > d-code::-webkit-scrollbar,\n  d-article > d-math[block]::-webkit-scrollbar,\n  d-article section > d-math[block]::-webkit-scrollbar {\n    display: none;  // Safari and Chrome\n  }\n}\n\nd-article .citation {\n  color: #668;\n  cursor: pointer;\n}\n\nd-include {\n  width: auto;\n  display: block;\n}\n\nd-figure {\n  contain: layout style;\n}\n\n/* KaTeX */\n\n.katex, .katex-prerendered {\n  contain: style;\n  display: inline-block;\n}\n\n/* Tables */\n\nd-article table {\n  border-collapse: collapse;\n  margin-bottom: 1.5rem;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n}\n\nd-article table th {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n}\n\nd-article table td {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.05);\n}\n\nd-article table tr:last-of-type td {\n  border-bottom: none;\n}\n\nd-article table th,\nd-article table td {\n  font-size: 15px;\n  padding: 2px 8px;\n}\n\nd-article table tbody :first-child td {\n  padding-top: 2px;\n}\n";
-
-  var title = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-title {\n  padding: 2rem 0 1.5rem;\n  contain: layout style;\n  overflow-x: hidden;\n}\n\n@media(min-width: 768px) {\n  d-title {\n    padding: 4rem 0 1.5rem;\n  }\n}\n\nd-title h1 {\n  grid-column: text;\n  font-size: 40px;\n  font-weight: 700;\n  line-height: 1.1em;\n  margin: 0 0 0.5rem;\n}\n\n@media(min-width: 768px) {\n  d-title h1 {\n    font-size: 50px;\n  }\n}\n\nd-title p {\n  font-weight: 300;\n  font-size: 1.2rem;\n  line-height: 1.55em;\n  grid-column: text;\n}\n\nd-title .status {\n  margin-top: 0px;\n  font-size: 12px;\n  color: #009688;\n  opacity: 0.8;\n  grid-column: kicker;\n}\n\nd-title .status span {\n  line-height: 1;\n  display: inline-block;\n  padding: 6px 0;\n  border-bottom: 1px solid #80cbc4;\n  font-size: 11px;\n  text-transform: uppercase;\n}\n";
-
-  var math = "/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nspan.katex-display {\n  text-align: left;\n  padding: 8px 0 8px 0;\n  margin: 0.5em 0 0.5em 1em;\n}\n\nspan.katex {\n  -webkit-font-smoothing: antialiased;\n  color: rgba(0, 0, 0, 0.8);\n  font-size: 1.18em;\n}\n";
-
-  // Copyright 2018 The Distill Template Authors
-
-  const styles = base + layout + title + byline + article + math + print;
-
-  function makeStyleTag(dom) {
-
-    const styleTagId = 'distill-prerendered-styles';
-    const prerenderedTag = dom.getElementById(styleTagId);
-    if (!prerenderedTag) {
-      const styleTag = dom.createElement('style');
-      styleTag.id = styleTagId;
-      styleTag.type = 'text/css';
-      const cssTextTag = dom.createTextNode(styles);
-      styleTag.appendChild(cssTextTag);
-      const firstScriptTag = dom.head.querySelector('script');
-      dom.head.insertBefore(styleTag, firstScriptTag);
-    }
-
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function renderTOC(element, headings) {
-
-    let ToC =`
-  <style>
-
-  d-toc {
-    contain: layout style;
-    display: block;
-  }
-
-  d-toc ul {
-    padding-left: 0;
-  }
-
-  d-toc ul > ul {
-    padding-left: 24px;
-  }
-
-  d-toc a {
-    border-bottom: none;
-    text-decoration: none;
-  }
-
-  </style>
-  <nav role="navigation" class="table-of-contents"></nav>
-  <h2>Table of contents</h2>
-  <ul>`;
-
-    for (const el of headings) {
-      // should element be included in TOC?
-      const isInTitle = el.parentElement.tagName == 'D-TITLE';
-      const isException = el.getAttribute('no-toc');
-      if (isInTitle || isException) continue;
-      // create TOC entry
-      const title = el.textContent;
-      const link = '#' + el.getAttribute('id');
-
-      let newLine = '<li>' + '<a href="' + link + '">' + title + '</a>' + '</li>';
-      if (el.tagName == 'H3') {
-        newLine = '<ul>' + newLine + '</ul>';
-      } else {
-        newLine += '<br>';
-      }
-      ToC += newLine;
-
-    }
-
-    ToC += '</ul></nav>';
-    element.innerHTML = ToC;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function TOC(dom) {
-    const article = dom.querySelector('d-article');
-    const toc = dom.querySelector('d-toc');
-    if (toc) {
-      const headings = article.querySelectorAll('h2, h3');
-      renderTOC(toc, headings);
-      toc.setAttribute('prerendered', 'true');
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  function Typeset(dom) {
-
-    var textNodes = dom.createTreeWalker(
-      dom.body,
-      dom.defaultView.NodeFilter.SHOW_TEXT
-    );
-    while (textNodes.nextNode()) {
-      var n = textNodes.currentNode,
-        text = n.nodeValue;
-      if (text && acceptNode(n)) {
-        text = quotes(text);
-        text = punctuation(text);
-        // TODO: Add back support for ligatures once their uppercased versions don't hang Chrome search anymore
-        // see: https://bugs.chromium.org/p/chromium/issues/detail?id=862648
-        // text = ligatures(text);
-        n.nodeValue = text;
-      }
-    }
-  }
-
-  // 2018-07-11 shancarter@ and ludwigschubert@ no longer know what this was meant to accomplish
-  // if it was trying to not replace text in any child nodes of those listed here,
-  // then it does not accomplish that.
-  function acceptNode(node) {
-    var parent = node.parentElement;
-    var isMath = (parent && parent.getAttribute && parent.getAttribute('class')) ? parent.getAttribute('class').includes('katex') || parent.getAttribute('class').includes('MathJax') : false;
-    return parent &&
-           parent.nodeName !== 'SCRIPT' &&
-           parent.nodeName !== 'STYLE' &&
-           parent.nodeName !== 'CODE' &&
-           parent.nodeName !== 'PRE' &&
-           parent.nodeName !== 'SPAN' &&
-           parent.nodeName !== 'D-HEADER' &&
-           parent.nodeName !== 'D-BYLINE' &&
-           parent.nodeName !== 'D-MATH' &&
-           parent.nodeName !== 'D-CODE' &&
-           parent.nodeName !== 'D-BIBLIOGRAPHY' &&
-           parent.nodeName !== 'D-FOOTER' &&
-           parent.nodeName !== 'D-APPENDIX' &&
-           parent.nodeName !== 'D-FRONTMATTER' &&
-           parent.nodeName !== 'D-TOC' &&
-           parent.nodeType !== 8 && //comment nodes
-           !isMath;
-  }
-
-
-  /*!
-   * typeset - Typesetting for the web
-   * @version v0.1.6
-   * @link https://github.com/davidmerfield/Typeset.js
-   * @author David Merfield
-   */
-  // which has a CC0 license
-  // http://creativecommons.org/publicdomain/zero/1.0/
-
-
-  function punctuation(text){
-
-    // Dashes
-    text = text.replace(/--/g, '\u2014');
-    text = text.replace(/\s*\u2014\s*/g,'\u2009\u2014\u2009'); //this has thin spaces
-
-    // Elipses
-    text = text.replace(/\.\.\./g,'…');
-
-    // Nbsp for punc with spaces
-    var NBSP = '\u00a0';
-    var NBSP_PUNCTUATION_START = /([«¿¡]) /g;
-    var NBSP_PUNCTUATION_END = / ([!?:;.,‽»])/g;
-
-    text = text.replace(NBSP_PUNCTUATION_START, '$1' + NBSP);
-    text = text.replace(NBSP_PUNCTUATION_END, NBSP + '$1');
-
-    return text;
-  }
-
-  function quotes(text) {
-
-    text = text
-      .replace(/(\W|^)"([^\s!?:;.,‽»])/g, '$1\u201c$2') // beginning "
-      .replace(/(\u201c[^"]*)"([^"]*$|[^\u201c"]*\u201c)/g, '$1\u201d$2') // ending "
-      .replace(/([^0-9])"/g,'$1\u201d') // remaining " at end of word
-      .replace(/(\W|^)'(\S)/g, '$1\u2018$2') // beginning '
-      .replace(/([a-z])'([a-z])/ig, '$1\u2019$2') // conjunction's possession
-      .replace(/((\u2018[^']*)|[a-z])'([^0-9]|$)/ig, '$1\u2019$3') // ending '
-      .replace(/(\u2018)([0-9]{2}[^\u2019]*)(\u2018([^0-9]|$)|$|\u2019[a-z])/ig, '\u2019$2$3') // abbrev. years like '93
-      .replace(/(\B|^)\u2018(?=([^\u2019]*\u2019\b)*([^\u2019\u2018]*\W[\u2019\u2018]\b|[^\u2019\u2018]*$))/ig, '$1\u2019') // backwards apostrophe
-      .replace(/'''/g, '\u2034') // triple prime
-      .replace(/("|'')/g, '\u2033') // double prime
-      .replace(/'/g, '\u2032');
-
-    // Allow escaped quotes
-    text = text.replace(/\\“/, '"');
-    text = text.replace(/\\”/, '"');
-    text = text.replace(/\\’/, '\'');
-    text = text.replace(/\\‘/, '\'');
-
-    return text;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  // const template = `
-  // if ('IntersectionObserver' in window &&
-  //   'IntersectionObserverEntry' in window &&
-  //   'intersectionRatio' in IntersectionObserverEntry.prototype) {
-  //     // Platform supports IntersectionObserver natively! :-)
-  //     if (!('isIntersecting' in IntersectionObserverEntry.prototype)) {
-  //       Object.defineProperty(IntersectionObserverEntry.prototype,
-  //         'isIntersecting', {
-  //         get: function () {
-  //           return this.intersectionRatio > 0;
-  //         }
-  //       });
-  //     }
-  // } else {
-  //   // Platform does not support webcomponents--loading polyfills synchronously.
-  //   const scriptTag = document.createElement('script');
-  //   scriptTag.src = '${intersectionObserverPath}';
-  //   scriptTag.async = false;
-  //   document.currentScript.parentNode.insertBefore(scriptTag, document.currentScript.nextSibling);
-  // }
-  //
-  // if ('registerElement' in document &&
-  //     'import' in document.createElement('link') &&
-  //     'content' in document.createElement('template')) {
-  //   // Platform supports webcomponents natively! :-)
-  // } else {
-  //   // Platform does not support webcomponents--loading polyfills synchronously.
-  //   const scriptTag = document.createElement('script');
-  //   scriptTag.src = '${webcomponentPath}';
-  //   scriptTag.async = false;
-  //   document.currentScript.parentNode.insertBefore(scriptTag, document.currentScript.nextSibling);
-  // }
-  //
-  //
-  // `;
-
-
-  const addBackIn = `
-window.addEventListener('WebComponentsReady', function() {
-  console.warn('WebComponentsReady');
-  const loaderTag = document.createElement('script');
-  loaderTag.src = 'https://distill.pub/template.v2.js';
-  document.head.insertBefore(loaderTag, document.head.firstChild);
-});
-`;
-
-  function render(dom) {
-    // pull out template script tag
-    const templateTag = dom.querySelector('script[src*="template.v2.js"]');
-    if (templateTag) {
-      templateTag.parentNode.removeChild(templateTag);
-    } else {
-      console.debug('FYI: Did not find template tag when trying to remove it. You may not have added it. Be aware that our polyfills will add it.');
-    }
-
-    // add loader
-    const loaderTag = dom.createElement('script');
-    loaderTag.src = 'https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.0.17/webcomponents-loader.js';
-    dom.head.insertBefore(loaderTag, dom.head.firstChild);
-
-    // add loader event listener to add tempalrte back in
-    const addTag = dom.createElement('script');
-    addTag.innerHTML = addBackIn;
-    dom.head.insertBefore(addTag, dom.head.firstChild);
-
-
-    // create polyfill script tag
-    // const polyfillScriptTag = dom.createElement('script');
-    // polyfillScriptTag.innerHTML = template;
-    // polyfillScriptTag.id = 'polyfills';
-
-    // insert at appropriate position--before any other script tag
-    // const firstScriptTag = dom.head.querySelector('script');
-    // dom.head.insertBefore(polyfillScriptTag, firstScriptTag);
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const styles$1 = `
-d-citation-list {
-  contain: style;
-}
-
-d-citation-list .references {
-  grid-column: text;
-}
-
-d-citation-list .references .title {
-  font-weight: 500;
-}
-`;
-
-  function renderCitationList(element, entries, dom=document) {
-    if (entries.size > 0) {
-      element.style.display = '';
-      let list = element.querySelector('.references');
-      if (list) {
-        list.innerHTML = '';
-      } else {
-        const stylesTag = dom.createElement('style');
-        stylesTag.innerHTML = styles$1;
-        element.appendChild(stylesTag);
-
-        const heading = dom.createElement('h3');
-        heading.id = 'references';
-        heading.textContent = 'References';
-        element.appendChild(heading);
-
-        list = dom.createElement('ol');
-        list.id = 'references-list';
-        list.className = 'references';
-        element.appendChild(list);
-      }
-
-      for (const [key, entry] of entries) {
-        const listItem = dom.createElement('li');
-        listItem.id = key;
-        listItem.innerHTML = bibliography_cite(entry);
-        list.appendChild(listItem);
-      }
-    } else {
-      element.style.display = 'none';
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function CitationList(dom, data) {
-    const citationListTag = dom.querySelector('d-citation-list');
-    if (citationListTag) {
-      const entries = new Map(data.citations.map( citationKey => {
-        return [citationKey, data.bibliography.get(citationKey)];
-      }));
-      renderCitationList(citationListTag, entries, dom);
-      citationListTag.setAttribute('distill-prerendered', 'true');
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-  //
-  // Licensed under the Apache License, Version 2.0 (the "License");
-  // you may not use this file except in compliance with the License.
-  // You may obtain a copy of the License at
-  //
-  //      http://www.apache.org/licenses/LICENSE-2.0
-  //
-  // Unless required by applicable law or agreed to in writing, software
-  // distributed under the License is distributed on an "AS IS" BASIS,
-  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  // See the License for the specific language governing permissions and
-  // limitations under the License.
-
-  /*
-    Try to only reorder things that MAY be user defined.
-    Try to use templates etc to define the order of our own tags.
-  */
-
-  function render$1(dom) {
-    const head = dom.head;
-
-    const metaIE = head.querySelector('meta[http-equiv]');
-    head.insertBefore(metaIE, head.firstChild);
-
-    const metaViewport = head.querySelector('meta[name=viewport]');
-    head.insertBefore(metaViewport, head.firstChild);
-
-    const metaCharset = head.querySelector('meta[charset]');
-    head.insertBefore(metaCharset, head.firstChild);
-  }
-
-  var logo = "<svg viewBox=\"-607 419 64 64\">\n  <path d=\"M-573.4,478.9c-8,0-14.6-6.4-14.6-14.5s14.6-25.9,14.6-40.8c0,14.9,14.6,32.8,14.6,40.8S-565.4,478.9-573.4,478.9z\"/>\n</svg>\n";
-
-  const headerTemplate = `
-<style>
-distill-header {
-  position: relative;
-  height: 60px;
-  background-color: hsl(200, 60%, 15%);
-  width: 100%;
-  box-sizing: border-box;
-  z-index: 2;
-  color: rgba(0, 0, 0, 0.8);
-  border-bottom: 1px solid rgba(0, 0, 0, 0.08);
-  box-shadow: 0 1px 6px rgba(0, 0, 0, 0.05);
-}
-distill-header .content {
-  height: 70px;
-  grid-column: page;
-}
-distill-header a {
-  font-size: 16px;
-  height: 60px;
-  line-height: 60px;
-  text-decoration: none;
-  color: rgba(255, 255, 255, 0.8);
-  padding: 22px 0;
-}
-distill-header a:hover {
-  color: rgba(255, 255, 255, 1);
-}
-distill-header svg {
-  width: 24px;
-  position: relative;
-  top: 4px;
-  margin-right: 2px;
-}
-@media(min-width: 1080px) {
-  distill-header {
-    height: 70px;
-  }
-  distill-header a {
-    height: 70px;
-    line-height: 70px;
-    padding: 28px 0;
-  }
-  distill-header .logo {
-  }
-}
-distill-header svg path {
-  fill: none;
-  stroke: rgba(255, 255, 255, 0.8);
-  stroke-width: 3px;
-}
-distill-header .logo {
-  font-size: 17px;
-  font-weight: 200;
-}
-distill-header .nav {
-  float: right;
-  font-weight: 300;
-}
-distill-header .nav a {
-  font-size: 12px;
-  margin-left: 24px;
-  text-transform: uppercase;
-}
-</style>
-<div class="content">
-  <a href="/" class="logo">
-    ${logo}
-    Distill
-  </a>
-  <nav class="nav">
-    <a href="/about/">About</a>
-    <a href="/prize/">Prize</a>
-    <a href="/journal/">Submit</a>
-  </nav>
-</div>
-`;
-
-  // Copyright 2018 The Distill Template Authors
-
-  function DistillHeader(dom, data) {
-    const headerTag = dom.querySelector('distill-header');
-    if (!headerTag) {
-      const header = dom.createElement('distill-header');
-      header.innerHTML = headerTemplate;
-      header.setAttribute('distill-prerendered', "");
-      const body = dom.querySelector('body');
-      body.insertBefore(header, body.firstChild);
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const styles$2 = `
-<style>
-  distill-appendix {
-    contain: layout style;
-  }
-
-  distill-appendix .citation {
-    font-size: 11px;
-    line-height: 15px;
-    border-left: 1px solid rgba(0, 0, 0, 0.1);
-    padding-left: 18px;
-    border: 1px solid rgba(0,0,0,0.1);
-    background: rgba(0, 0, 0, 0.02);
-    padding: 10px 18px;
-    border-radius: 3px;
-    color: rgba(150, 150, 150, 1);
-    overflow: hidden;
-    margin-top: -12px;
-    white-space: pre-wrap;
-    word-wrap: break-word;
-  }
-
-  distill-appendix > * {
-    grid-column: text;
-  }
-</style>
-`;
-
-  function appendixTemplate(frontMatter) {
-    let html = styles$2;
-
-    if (typeof frontMatter.githubUrl !== 'undefined') {
-      html += `
-    <h3 id="updates-and-corrections">Updates and Corrections</h3>
-    <p>`;
-      if (frontMatter.githubCompareUpdatesUrl) {
-        html += `<a href="${frontMatter.githubCompareUpdatesUrl}">View all changes</a> to this article since it was first published.`;
-      }
-      html += `
-    If you see mistakes or want to suggest changes, please <a href="${frontMatter.githubUrl + '/issues/new'}">create an issue on GitHub</a>. </p>
-    `;
-    }
-
-    const journal = frontMatter.journal;
-    if (typeof journal !== 'undefined' && journal.title === 'Distill') {
-      html += `
-    <h3 id="reuse">Reuse</h3>
-    <p>Diagrams and text are licensed under Creative Commons Attribution <a href="https://creativecommons.org/licenses/by/4.0/">CC-BY 4.0</a> with the <a class="github" href="${frontMatter.githubUrl}">source available on GitHub</a>, unless noted otherwise. The figures that have been reused from other sources don’t fall under this license and can be recognized by a note in their caption: “Figure from …”.</p>
-    `;
-    }
-
-    if (typeof frontMatter.publishedDate !== 'undefined') {
-      html += `
-    <h3 id="citation">Citation</h3>
-    <p>For attribution in academic contexts, please cite this work as</p>
-    <pre class="citation short">${frontMatter.concatenatedAuthors}, "${frontMatter.title}", Distill, ${frontMatter.publishedYear}.</pre>
-    <p>BibTeX citation</p>
-    <pre class="citation long">${serializeFrontmatterToBibtex(frontMatter)}</pre>
-    `;
-    }
-
-    return html;
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  function DistillAppendix(dom, data) {
-
-    const appendixTag = dom.querySelector('d-appendix');
-    if (!appendixTag) {
-      console.warn('No appendix tag found!');
-      return;
-    }
-    const distillAppendixTag = appendixTag.querySelector('distill-appendix');
-    if (!distillAppendixTag) {
-      const distillAppendix = dom.createElement('distill-appendix');
-      appendixTag.appendChild(distillAppendix);
-      distillAppendix.innerHTML = appendixTemplate(data);
-    }
-
-  }
-
-  const footerTemplate = `
-<style>
-
-:host {
-  color: rgba(255, 255, 255, 0.5);
-  font-weight: 300;
-  padding: 2rem 0;
-  border-top: 1px solid rgba(0, 0, 0, 0.1);
-  background-color: hsl(180, 5%, 15%); /*hsl(200, 60%, 15%);*/
-  text-align: left;
-  contain: content;
-}
-
-.footer-container .logo svg {
-  width: 24px;
-  position: relative;
-  top: 4px;
-  margin-right: 2px;
-}
-
-.footer-container .logo svg path {
-  fill: none;
-  stroke: rgba(255, 255, 255, 0.8);
-  stroke-width: 3px;
-}
-
-.footer-container .logo {
-  font-size: 17px;
-  font-weight: 200;
-  color: rgba(255, 255, 255, 0.8);
-  text-decoration: none;
-  margin-right: 6px;
-}
-
-.footer-container {
-  grid-column: text;
-}
-
-.footer-container .nav {
-  font-size: 0.9em;
-  margin-top: 1.5em;
-}
-
-.footer-container .nav a {
-  color: rgba(255, 255, 255, 0.8);
-  margin-right: 6px;
-  text-decoration: none;
-}
-
-</style>
-
-<div class='footer-container'>
-
-  <a href="/" class="logo">
-    ${logo}
-    Distill
-  </a> is dedicated to clear explanations of machine learning
-
-  <div class="nav">
-    <a href="https://distill.pub/about/">About</a>
-    <a href="https://distill.pub/journal/">Submit</a>
-    <a href="https://distill.pub/prize/">Prize</a>
-    <a href="https://distill.pub/archive/">Archive</a>
-    <a href="https://distill.pub/rss.xml">RSS</a>
-    <a href="https://github.com/distillpub">GitHub</a>
-    <a href="https://twitter.com/distillpub">Twitter</a>
-    &nbsp;&nbsp;&nbsp;&nbsp; ISSN 2476-0757
-  </div>
-
-</div>
-
-`;
-
-  // Copyright 2018 The Distill Template Authors
-
-  function DistillFooter(dom) {
-    const footerTag = dom.querySelector('distill-footer');
-    if(!footerTag) {
-      const footer = dom.createElement('distill-footer');
-      footer.innerHTML = footerTemplate;
-      const body = dom.querySelector('body');
-      body.appendChild(footer);
-    }
-  }
-
-  // Copyright 2018 The Distill Template Authors
-
-  const extractors = new Map([
-    ['ExtractFrontmatter', ExtractFrontmatter],
-    ['ExtractBibliography', ExtractBibliography],
-    ['ExtractCitations', ExtractCitations],
-  ]);
-
-  const transforms = new Map([
-    ['HTML', HTML],
-    ['makeStyleTag', makeStyleTag],
-    ['OptionalComponents', OptionalComponents],
-    ['TOC', TOC],
-    ['Byline', Byline],
-    ['Mathematics', Mathematics],
-    ['Meta', Meta],
-    ['Typeset', Typeset],
-    ['Polyfills', render],
-    ['CitationList', CitationList],
-    ['Reorder', render$1] // keep last
-  ]);
-
-  const distillTransforms = new Map([
-    ['DistillHeader', DistillHeader],
-    ['DistillAppendix', DistillAppendix],
-    ['DistillFooter', DistillFooter],
-  ]);
-
-  /* Exported functions */
-
-  function render$2(dom, data, verbose=true) {
-    let frontMatter;
-    if (data instanceof FrontMatter) {
-      frontMatter = data;
-    } else {
-      frontMatter = FrontMatter.fromObject(data);
-    }
-    // first, we collect static data from the dom
-    for (const [name, extract] of extractors.entries()) {
-      if (verbose) console.warn('Running extractor: ' + name);
-      extract(dom, frontMatter, verbose);
-    }
-    // secondly we use it to transform parts of the dom
-    for (const [name, transform] of transforms.entries()) {
-      if (verbose) console.warn('Running transform: ' + name);
-      // console.warn('Running transform: ', transform);
-      transform(dom, frontMatter, verbose);
-    }
-    dom.body.setAttribute('distill-prerendered', '');
-    // the function calling us can now use the transformed dom and filled data object
-    if (data instanceof FrontMatter) ; else {
-      frontMatter.assignToObject(data);
-    }
-  }
-
-  function distillify(dom, data, verbose=true) {
-    // thirdly, we can use these additional transforms when publishing on the Distill website
-    for (const [name, transform] of distillTransforms.entries()) {
-      if (verbose) console.warn('Running distillify: ', name);
-      transform(dom, data, verbose);
-    }
-  }
-
-  function usesTemplateV2(dom) {
-    const tags = dom.querySelectorAll('script');
-    let usesV2 = undefined;
-    for (const tag of tags) {
-      const src = tag.src;
-      if (src.includes('template.v1.js')) {
-        usesV2 = false;
-      } else if (src.includes('template.v2.js')) {
-        usesV2 = true;
-      } else if (src.includes('template.')) {
-        throw new Error('Uses distill template, but unknown version?!');
-      }
-    }
-
-    if (usesV2 === undefined) {
-      throw new Error('Does not seem to use Distill template at all.');
-    } else {
-      return usesV2;
-    }
-  }
-
-  const testing = {
-    extractors: extractors,
-    transforms: transforms,
-    distillTransforms: distillTransforms
-  };
-
-  exports.FrontMatter = FrontMatter;
-  exports.distillify = distillify;
-  exports.render = render$2;
-  exports.testing = testing;
-  exports.usesTemplateV2 = usesTemplateV2;
-
-  Object.defineProperty(exports, '__esModule', { value: true });
-
-})));
-//# sourceMappingURL=transforms.v2.js.map
+!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports,require("fs")):"function"==typeof define&&define.amd?define(["exports","fs"],t):t((e=e||self).dl={},e.fs)}(this,function(e,t){"use strict";function n(e,t){e.title=t.title,t.published&&(t.published instanceof Date?e.publishedDate=t.published:t.published.constructor===String&&(e.publishedDate=new Date(t.published))),t.publishedDate&&(t.publishedDate instanceof Date?e.publishedDate=t.publishedDate:t.publishedDate.constructor===String?e.publishedDate=new Date(t.publishedDate):console.error("Don't know what to do with published date: "+t.publishedDate)),e.description=t.description,e.authors=t.authors.map(e=>new te(e)),e.katex=t.katex,e.password=t.password,t.doi&&(e.doi=t.doi)}
+// Copyright 2018 The Distill Template Authors
+function r(e){for(let t of e.authors){const e=Boolean(t.affiliation),n=Boolean(t.affiliations);if(e)if(n)console.warn(`Author ${t.author} has both old-style ("affiliation" & "affiliationURL") and new style ("affiliations") affiliation information!`);else{let e={name:t.affiliation};t.affiliationURL&&(e.url=t.affiliationURL),t.affiliations=[e]}}return e}function i(e){const t=e.firstElementChild;if(t){if("json"==t.getAttribute("type").split("/")[1]){const e=t.textContent;return r(JSON.parse(e))}console.error("Distill only supports JSON frontmatter tags anymore; no more YAML.")}else console.error("You added a frontmatter tag but did not provide a script tag with front matter data in it. Please take a look at our templates.");return{}}
+// Copyright 2018 The Distill Template Authors
+function a(e,t){const r=e.querySelector("d-front-matter");r?n(t,i(r)):console.warn("No front matter tag found!")}function o(){throw new Error("Dynamic requires are not currently supported by rollup-plugin-commonjs")}function s(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e["default"]:e}function l(e,t){return e(t={exports:{}},t.exports),t.exports}
+// Copyright 2018 The Distill Template Authors
+function u(e){return e.replace(/[\t\n ]+/g," ").replace(/{\\["^`.'acu~Hvs]( )?([a-zA-Z])}/g,(e,t,n)=>n).replace(/{\\([a-zA-Z])}/g,(e,t)=>t)}function d(e){const t=new Map,n=re.toJSON(e);for(const e of n){for(const[t,n]of Object.entries(e.entryTags))e.entryTags[t.toLowerCase()]=u(n);e.entryTags.type=e.entryType,t.set(e.citationKey,e.entryTags)}return t}function c(e){return`@article{${e.slug},\n  author = {${e.bibtexAuthors}},\n  title = {${e.title}},\n  journal = {${e.journal.title}},\n  year = {${e.publishedYear}},\n  note = {${e.url}},\n  doi = {${e.doi}}\n}`}
+// Copyright 2018 The Distill Template Authors
+function h(e){const t=e.firstElementChild;if(t&&"SCRIPT"===t.tagName){if("text/bibtex"==t.type){return d(e.firstElementChild.textContent)}if("text/json"==t.type)return new Map(JSON.parse(t.textContent));console.warn("Unsupported bibliography script tag type: "+t.type)}else console.warn("Bibliography did not have any script tag.")}
+// Copyright 2018 The Distill Template Authors
+function p(e,n){const r=e.querySelector("d-bibliography");if(!r)return void console.warn("No bibliography tag found!");const i=r.getAttribute("src");if(i){const a=n.inputDirectory+"/"+i,o=d(t.readFileSync(a,"utf-8")),s=e.createElement("script");s.type="text/json",s.textContent=JSON.stringify([...o]),r.appendChild(s),r.removeAttribute("src")}n.bibliography=h(r)}
+// Copyright 2018 The Distill Template Authors
+function f(e=document){const t=new Set,n=e.querySelectorAll("d-cite");for(const e of n){const n=(e.getAttribute("key")||e.getAttribute("bibtex-key")).split(",").map(e=>e.trim());for(const e of n)t.add(e)}return[...t]}function m(e,t,n,r){if(null==e.author)return"";var i=e.author.split(" and ");let a=i.map(e=>{if(-1!=(e=e.trim()).indexOf(","))var n=e.split(",")[0].trim(),r=e.split(",")[1];else if(-1!=e.indexOf(" "))n=e.split(" ").slice(-1)[0].trim(),r=e.split(" ").slice(0,-1).join(" ");else n=e.trim();var i="";return r!=undefined&&(i=(i=r.trim().split(" ").map(e=>e.trim()[0])).join(".")+"."),t.replace("${F}",r).replace("${L}",n).replace("${I}",i).trim()});if(i.length>1){var o=a.slice(0,i.length-1).join(n);return o+=(r||n)+a[i.length-1]}return a[0]}function g(e){var t=e.journal||e.booktitle||"";if("volume"in e){var n=e.issue||e.number;n=n!=undefined?"("+n+")":"",t+=", Vol "+e.volume+n}return"pages"in e&&(t+=", pp. "+e.pages),""!=t&&(t+=". "),"publisher"in e&&"."!=(t+=e.publisher)[t.length-1]&&(t+="."),t}function v(e){if("url"in e){var t=e.url,n=/arxiv\.org\/abs\/([0-9\.]*)/.exec(t);if(null!=n&&(t=`http://arxiv.org/pdf/${n[1]}.pdf`),".pdf"==t.slice(-4))var r="PDF";else if(".html"==t.slice(-5))r="HTML";return` &ensp;<a href="${t}">[${r||"link"}]</a>`}return""}function b(e,t){return"doi"in e?`${t?"<br>":""} <a href="https://doi.org/${e.doi}" style="text-decoration:inherit;">DOI: ${e.doi}</a>`:""}function y(e){return'<span class="title">'+e.title+"</span> "}function x(e){if(e){var t=y(e);return t+=v(e)+"<br>",e.author&&(t+=m(e,"${L}, ${I}",", "," and "),(e.year||e.date)&&(t+=", ")),e.year||e.date?t+=(e.year||e.date)+". ":t+=". ",t+=g(e),t+=b(e)}return"?"}
+// Copyright 2018 The Distill Template Authors
+function w(e,t){const n=new Set(t.citations),r=f(e);for(const e of r)n.add(e);t.citations=Array.from(n)}
+// Copyright 2018 The Distill Template Authors
+function k(e){const t=e.querySelector("head");if(e.querySelector("html").getAttribute("lang")||e.querySelector("html").setAttribute("lang","en"),!e.querySelector("meta[charset]")){const n=e.createElement("meta");n.setAttribute("charset","utf-8"),t.appendChild(n)}if(!e.querySelector("meta[name=viewport]")){const n=e.createElement("meta");n.setAttribute("name","viewport"),n.setAttribute("content","width=device-width, initial-scale=1"),t.appendChild(n)}}
+// Copyright 2018 The Distill Template Authors
+function M(e){return`\n  <div class="byline grid">\n    <div class="authors-affiliations grid">\n      <h3>Authors</h3>\n      <h3>Affiliations</h3>\n      ${e.authors.map(e=>`\n        <p class="author">\n          ${e.personalURL?`\n            <a class="name" href="${e.personalURL}">${e.name}</a>`:`\n            <span class="name">${e.name}</span>`}\n        </p>\n        <p class="affiliation">\n        ${e.affiliations.map(e=>e.url?`<a class="affiliation" href="${e.url}">${e.name}</a>`:`<span class="affiliation">${e.name}</span>`).join(", ")}\n        </p>\n      `).join("")}\n    </div>\n    <div>\n      <h3>Published</h3>\n      ${e.publishedDate?`\n        <p>${e.publishedMonth} ${e.publishedDay}, ${e.publishedYear}</p> `:"\n        <p><em>Not published yet.</em></p>"}\n    </div>\n  </div>\n`}
+// Copyright 2018 The Distill Template Authors
+function S(e,t){const n=e.querySelector("d-byline");n&&(n.innerHTML=M(t))}
+// Copyright 2018 The Distill Template Authors
+function z(e,t){const n=e.body,r=n.querySelector("d-article");if(!r)return void console.warn("No d-article tag found; skipping adding optional components!");let i=e.querySelector("d-byline");i||(t.authors?(i=e.createElement("d-byline"),n.insertBefore(i,r)):console.warn("No authors found in front matter; please add them before submission!"));let a=e.querySelector("d-title");a||(a=e.createElement("d-title"),n.insertBefore(a,i));let o=a.querySelector("h1");o||((o=e.createElement("h1")).textContent=t.title,a.insertBefore(o,a.firstChild));const s="undefined"!=typeof t.password;let l=n.querySelector("d-interstitial");if(s&&!l){const r="undefined"!=typeof window,i=r&&window.location.hostname.includes("localhost");r&&i||((l=e.createElement("d-interstitial")).password=t.password,n.insertBefore(l,n.firstChild))}else!s&&l&&l.parentElement.removeChild(this);let u=e.querySelector("d-appendix");u||(u=e.createElement("d-appendix"),e.body.appendChild(u));let d=e.querySelector("d-footnote-list");d||(d=e.createElement("d-footnote-list"),u.appendChild(d));let c=e.querySelector("d-citation-list");c||(c=e.createElement("d-citation-list"),u.appendChild(c))}
+// Copyright 2018 The Distill Template Authors
+function A(e,t){let n=!1;const r=e.querySelector("body");if(!r)return void console.warn("No body tag found!");t.katex&&t.katex.delimiters&&(global.document=e,ce(r,t.katex));const i=r.querySelectorAll("d-math");if(i.length>0){n=!0,console.warn(`Prerendering ${i.length} math tags...`);for(const n of i){const r={displayMode:n.hasAttribute("block")},i=Object.assign(r,t.katex),a=ie.renderToString(n.textContent,i),o=e.createElement("span");o.innerHTML=a,n.parentElement.insertBefore(o,n),n.parentElement.removeChild(n)}}if(n){const t='<link rel="stylesheet" href="https://distill.pub/third-party/katex/katex.min.css" crossorigin="anonymous">';e.head.insertAdjacentHTML("beforeend",t)}}function C(e){var t,n=""+e,r=pe.exec(n);if(!r)return n;var i="",a=0,o=0;for(a=r.index;a<n.length;a++){switch(n.charCodeAt(a)){case 34:t="&quot;";break;case 38:t="&amp;";break;case 39:t="&#39;";break;case 60:t="&lt;";break;case 62:t="&gt;";break;default:continue}o!==a&&(i+=n.substring(o,a)),o=a+1,i+=t}return o!==a?i+n.substring(o,a):i}
+// Copyright 2018 The Distill Template Authors
+function T(e,t){function n(e,t,n){(t||n)&&i(`    <meta name="${e}" content="${fe(t)}" >\n`)}let r=e.querySelector("head"),i=e=>N(r,e);if(i(`\n    <meta http-equiv="X-UA-Compatible" content="IE=Edge,chrome=1">\n    <link rel="icon" type="image/png" href="data:image/png;base64,${he}">\n    <link href="/rss.xml" rel="alternate" type="application/rss+xml" title="Articles from Distill">\n  `),t.title&&i(`\n    <title>${fe(t.title)}</title>\n    `),t.url&&i(`\n    <link rel="canonical" href="${t.url}">\n    `),t.publishedDate&&i(`\n    <!--  https://schema.org/Article -->\n    <meta property="description"       itemprop="description"   content="${fe(t.description)}" />\n    <meta property="article:published" itemprop="datePublished" content="${t.publishedISODateOnly}" />\n    <meta property="article:created"   itemprop="dateCreated"   content="${t.publishedISODateOnly}" />\n    `),t.updatedDate&&i(`\n    <meta property="article:modified"  itemprop="dateModified"  content="${t.updatedDate.toISOString()}" />\n    `),(t.authors||[]).forEach(e=>{N(r,`\n    <meta property="article:author" content="${fe(e.firstName)} ${fe(e.lastName)}" />`)}),i(`\n    <!--  https://developers.facebook.com/docs/sharing/webmasters#markup -->\n    <meta property="og:type" content="article"/>\n    <meta property="og:title" content="${fe(t.title)}"/>\n    <meta property="og:description" content="${fe(t.description)}">\n    <meta property="og:url" content="${t.url}"/>\n    <meta property="og:image" content="${t.previewURL}"/>\n    <meta property="og:locale" content="en_US" />\n    <meta property="og:site_name" content="Distill" />\n  `),i(`\n    <!--  https://dev.twitter.com/cards/types/summary -->\n    <meta name="twitter:card" content="summary_large_image">\n    <meta name="twitter:title" content="${fe(t.title)}">\n    <meta name="twitter:description" content="${fe(t.description)}">\n    <meta name="twitter:url" content="${t.url}">\n    <meta name="twitter:image" content="${t.previewURL}">\n    <meta name="twitter:image:width" content="560">\n    <meta name="twitter:image:height" content="295">\n  `),t.doiSuffix){i("\n      <!--  https://scholar.google.com/intl/en/scholar/inclusion.html#indexing -->\n"),n("citation_title",t.title),n("citation_fulltext_html_url",t.url),n("citation_volume",t.volume),n("citation_issue",t.issue),n("citation_firstpage",t.doiSuffix?`e${t.doiSuffix}`:undefined),n("citation_doi",t.doi);let e=t.journal||{};n("citation_journal_title",e.full_title||e.title),n("citation_journal_abbrev",e.abbrev_title),n("citation_issn",e.issn),n("citation_publisher",e.publisher),n("citation_fulltext_world_readable","",!0),t.publishedDate&&(n("citation_online_date",`${t.publishedYear}/${t.publishedMonthPadded}/${t.publishedDayPadded}`),n("citation_publication_date",`${t.publishedYear}/${t.publishedMonthPadded}/${t.publishedDayPadded}`)),(t.authors||[]).forEach(e=>{n("citation_author",`${e.lastName}, ${e.firstName}`),n("citation_author_institution",e.affiliation)})}else console.warn("No DOI suffix in data; not adding citation meta tags!");t.citations?t.citations.forEach(e=>{if(t.bibliography&&t.bibliography.has(e)){n("citation_reference",E(t.bibliography.get(e)))}else console.warn("No bibliography data found for "+e)}):console.warn("No citations found; not adding any references meta tags!")}function N(e,t){e.innerHTML+=t}function E(e){var t=`citation_title=${e.title};`;e.author&&""!==e.author&&e.author.split(" and ").forEach(e=>{let n,r;-1!=(e=e.trim()).indexOf(",")?(n=e.split(",")[0].trim(),r=e.split(",")[1].trim()):(n=e.split(" ").slice(-1)[0].trim(),r=e.split(" ").slice(0,-1).join(" ")),t+=`citation_author=${r} ${n};`}),"year"in e&&(t+=`citation_publication_date=${e.year};`);let n=/https?:\/\/arxiv\.org\/pdf\/([0-9]*\.[0-9]*)\.pdf/.exec(e.url);return(n=(n=n||/https?:\/\/arxiv\.org\/abs\/([0-9]*\.[0-9]*)/.exec(e.url))||/arXiv preprint arXiv:([0-9]*\.[0-9]*)/.exec(e.journal))&&n[1]?t+=`citation_arxiv_id=${n[1]};`:("journal"in e&&(t+=`citation_journal_title=${fe(e.journal)};`),"volume"in e&&(t+=`citation_volume=${fe(e.volume)};`),("issue"in e||"number"in e)&&(t+=`citation_number=${fe(e.issue||e.number)};`),t)}function R(e){const t="distill-prerendered-styles";if(!e.getElementById(t)){const n=e.createElement("style");n.id=t,n.type="text/css";const r=e.createTextNode(me);n.appendChild(r);const i=e.head.querySelector("script");e.head.insertBefore(n,i)}}
+// Copyright 2018 The Distill Template Authors
+function L(e,t){let n='\n  <style>\n\n  d-toc {\n    contain: layout style;\n    display: block;\n  }\n\n  d-toc ul {\n    padding-left: 0;\n  }\n\n  d-toc ul > ul {\n    padding-left: 24px;\n  }\n\n  d-toc a {\n    border-bottom: none;\n    text-decoration: none;\n  }\n\n  </style>\n  <nav role="navigation" class="table-of-contents"></nav>\n  <h2>Table of contents</h2>\n  <ul>';for(const e of t){const t="D-TITLE"==e.parentElement.tagName,r=e.getAttribute("no-toc");if(t||r)continue;const i=e.textContent;let a='<li><a href="'+("#"+e.getAttribute("id"))+'">'+i+"</a></li>";"H3"==e.tagName?a="<ul>"+a+"</ul>":a+="<br>",n+=a}n+="</ul></nav>",e.innerHTML=n}
+// Copyright 2018 The Distill Template Authors
+function O(e){const t=e.querySelector("d-article"),n=e.querySelector("d-toc");if(n){L(n,t.querySelectorAll("h2, h3")),n.setAttribute("prerendered","true")}}
+// Copyright 2018 The Distill Template Authors
+function q(e){for(var t=e.createTreeWalker(e.body,e.defaultView.NodeFilter.SHOW_TEXT);t.nextNode();){var n=t.currentNode,r=n.nodeValue;r&&_(n)&&(r=D(r=B(r)),n.nodeValue=r)}}function _(e){var t=e.parentElement,n=!!(t&&t.getAttribute&&t.getAttribute("class"))&&(t.getAttribute("class").includes("katex")||t.getAttribute("class").includes("MathJax"));return t&&"SCRIPT"!==t.nodeName&&"STYLE"!==t.nodeName&&"CODE"!==t.nodeName&&"PRE"!==t.nodeName&&"SPAN"!==t.nodeName&&"D-HEADER"!==t.nodeName&&"D-BYLINE"!==t.nodeName&&"D-MATH"!==t.nodeName&&"D-CODE"!==t.nodeName&&"D-BIBLIOGRAPHY"!==t.nodeName&&"D-FOOTER"!==t.nodeName&&"D-APPENDIX"!==t.nodeName&&"D-FRONTMATTER"!==t.nodeName&&"D-TOC"!==t.nodeName&&8!==t.nodeType&&!n}
+/*!
+   * typeset - Typesetting for the web
+   * @version v0.1.6
+   * @link https://github.com/davidmerfield/Typeset.js
+   * @author David Merfield
+   */function D(e){var t="\xa0",n=/([\xab\xbf\xa1]) /g,r=/ ([!?:;.,\u203d\xbb])/g;return e=(e=(e=(e=(e=e.replace(/--/g,"\u2014")).replace(/\s*\u2014\s*/g,"\u2009\u2014\u2009")).replace(/\.\.\./g,"\u2026")).replace(n,"$1"+t)).replace(r,t+"$1")}function B(e){return e=(e=(e=(e=(e=e.replace(/(\W|^)"([^\s!?:;.,\u203d\xbb])/g,"$1\u201c$2").replace(/(\u201c[^"]*)"([^"]*$|[^\u201c"]*\u201c)/g,"$1\u201d$2").replace(/([^0-9])"/g,"$1\u201d").replace(/(\W|^)'(\S)/g,"$1\u2018$2").replace(/([a-z])'([a-z])/gi,"$1\u2019$2").replace(/((\u2018[^']*)|[a-z])'([^0-9]|$)/gi,"$1\u2019$3").replace(/(\u2018)([0-9]{2}[^\u2019]*)(\u2018([^0-9]|$)|$|\u2019[a-z])/gi,"\u2019$2$3").replace(/(\B|^)\u2018(?=([^\u2019]*\u2019\b)*([^\u2019\u2018]*\W[\u2019\u2018]\b|[^\u2019\u2018]*$))/gi,"$1\u2019").replace(/'''/g,"\u2034").replace(/("|'')/g,"\u2033").replace(/'/g,"\u2032")).replace(/\\\u201c/,'"')).replace(/\\\u201d/,'"')).replace(/\\\u2019/,"'")).replace(/\\\u2018/,"'")}
+// Copyright 2018 The Distill Template Authors
+function I(e){const t=e.querySelector('script[src*="template.v2.js"]');t?t.parentNode.removeChild(t):console.debug("FYI: Did not find template tag when trying to remove it. You may not have added it. Be aware that our polyfills will add it.");const n=e.createElement("script");n.src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.0.17/webcomponents-loader.js",e.head.insertBefore(n,e.head.firstChild);const r=e.createElement("script");r.innerHTML=ge,e.head.insertBefore(r,e.head.firstChild)}
+// Copyright 2018 The Distill Template Authors
+function H(e,t,n=document){if(t.size>0){e.style.display="";let r=e.querySelector(".references");if(r)r.innerHTML="";else{const t=n.createElement("style");t.innerHTML=ve,e.appendChild(t);const i=n.createElement("h3");i.id="references",i.textContent="References",e.appendChild(i),(r=n.createElement("ol")).id="references-list",r.className="references",e.appendChild(r)}for(const[e,i]of t){const t=n.createElement("li");t.id=e,t.innerHTML=x(i),r.appendChild(t)}}else e.style.display="none"}
+// Copyright 2018 The Distill Template Authors
+function P(e,t){const n=e.querySelector("d-citation-list");if(n){H(n,new Map(t.citations.map(e=>[e,t.bibliography.get(e)])),e),n.setAttribute("distill-prerendered","true")}}
+// Copyright 2018 The Distill Template Authors
+function j(e){const t=e.head,n=t.querySelector("meta[http-equiv]");t.insertBefore(n,t.firstChild);const r=t.querySelector("meta[name=viewport]");t.insertBefore(r,t.firstChild);const i=t.querySelector("meta[charset]");t.insertBefore(i,t.firstChild)}
+// Copyright 2018 The Distill Template Authors
+function F(e){if(!e.querySelector("distill-header")){const t=e.createElement("distill-header");t.innerHTML=ye,t.setAttribute("distill-prerendered","");const n=e.querySelector("body");n.insertBefore(t,n.firstChild)}}
+// Copyright 2018 The Distill Template Authors
+function $(e){let t=xe;"undefined"!=typeof e.githubUrl&&(t+='\n    <h3 id="updates-and-corrections">Updates and Corrections</h3>\n    <p>',e.githubCompareUpdatesUrl&&(t+=`<a href="${e.githubCompareUpdatesUrl}">View all changes</a> to this article since it was first published.`),t+=`\n    If you see mistakes or want to suggest changes, please <a href="${e.githubUrl+"/issues/new"}">create an issue on GitHub</a>. </p>\n    `);const n=e.journal;return void 0!==n&&"Distill"===n.title&&(t+=`\n    <h3 id="reuse">Reuse</h3>\n    <p>Diagrams and text are licensed under Creative Commons Attribution <a href="https://creativecommons.org/licenses/by/4.0/">CC-BY 4.0</a> with the <a class="github" href="${e.githubUrl}">source available on GitHub</a>, unless noted otherwise. The figures that have been reused from other sources don\u2019t fall under this license and can be recognized by a note in their caption: \u201cFigure from \u2026\u201d.</p>\n    `),"undefined"!=typeof e.publishedDate&&(t+=`\n    <h3 id="citation">Citation</h3>\n    <p>For attribution in academic contexts, please cite this work as</p>\n    <pre class="citation short">${e.concatenatedAuthors}, "${e.title}", Distill, ${e.publishedYear}.</pre>\n    <p>BibTeX citation</p>\n    <pre class="citation long">${c(e)}</pre>\n    `),t}
+// Copyright 2018 The Distill Template Authors
+function U(e,t){const n=e.querySelector("d-appendix");if(n){if(!n.querySelector("distill-appendix")){const r=e.createElement("distill-appendix");n.appendChild(r),r.innerHTML=$(t)}}else console.warn("No appendix tag found!")}
+// Copyright 2018 The Distill Template Authors
+function Y(e){if(!e.querySelector("distill-footer")){const t=e.createElement("distill-footer");t.innerHTML=we,e.querySelector("body").appendChild(t)}}
+// Copyright 2018 The Distill Template Authors
+function V(e,t,n=!0){let r;r=t instanceof ne?t:ne.fromObject(t);for(const[t,i]of ke.entries())n&&console.warn("Running extractor: "+t),i(e,r,n);for(const[t,i]of Me.entries())n&&console.warn("Running transform: "+t),i(e,r,n);e.body.setAttribute("distill-prerendered",""),t instanceof ne||r.assignToObject(t)}function G(e,t,n=!0){for(const[r,i]of Se.entries())n&&console.warn("Running distillify: ",r),i(e,t,n)}function W(e){const t=e.querySelectorAll("script");let n=undefined;for(const e of t){const t=e.src;if(t.includes("template.v1.js"))n=!1;else if(t.includes("template.v2.js"))n=!0;else if(t.includes("template."))throw new Error("Uses distill template, but unknown version?!")}if(n===undefined)throw new Error("Does not seem to use Distill template at all.");return n}t=t&&Object.prototype.hasOwnProperty.call(t,"default")?t["default"]:t;
+// Copyright 2018 The Distill Template Authors
+const K=["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],J=["Jan.","Feb.","March","April","May","June","July","Aug.","Sept.","Oct.","Nov.","Dec."],X=e=>e<10?"0"+e:e,Z=function(e){return`${K[e.getDay()].substring(0,3)}, ${X(e.getDate())} ${J[e.getMonth()].substring(0,3)} ${e.getFullYear().toString()} ${e.getUTCHours().toString()}:${e.getUTCMinutes().toString()}:${e.getUTCSeconds().toString()} Z`},Q=function(e){return Array.from(e).reduce((e,[t,n])=>Object.assign(e,{[t]:n}),{})},ee=function(e){const t=new Map;for(var n in e)e.hasOwnProperty(n)&&t.set(n,e[n]);return t};class te{constructor(e){this.name=e.author,this.personalURL=e.authorURL,this.affiliation=e.affiliation,this.affiliationURL=e.affiliationURL,this.affiliations=e.affiliations||[]}get firstName(){const e=this.name.split(" ");return e.slice(0,e.length-1).join(" ")}get lastName(){const e=this.name.split(" ");return e[e.length-1]}}class ne{constructor(){this.title="unnamed article",this.description="",this.authors=[],this.bibliography=new Map,this.bibliographyParsed=!1,this.citations=[],this.citationsCollected=!1,this.journal={},this.katex={},this.doi=undefined,this.publishedDate=undefined}set url(e){this._url=e}get url(){return this._url?this._url:this.distillPath&&this.journal.url?this.journal.url+"/"+this.distillPath:this.journal.url?this.journal.url:void 0}get githubUrl(){return this.githubPath?"https://github.com/"+this.githubPath:undefined}set previewURL(e){this._previewURL=e}get previewURL(){return this._previewURL?this._previewURL:this.url+"/thumbnail.jpg"}get publishedDateRFC(){return Z(this.publishedDate)}get updatedDateRFC(){return Z(this.updatedDate)}get publishedYear(){return this.publishedDate.getFullYear()}get publishedMonth(){return J[this.publishedDate.getMonth()]}get publishedDay(){return this.publishedDate.getDate()}get publishedMonthPadded(){return X(this.publishedDate.getMonth()+1)}get publishedDayPadded(){return X(this.publishedDate.getDate())}get publishedISODateOnly(){return this.publishedDate.toISOString().split("T")[0]}get volume(){const e=this.publishedYear-2015;if(e<1)throw new Error("Invalid publish date detected during computing volume");return e}get issue(){return this.publishedDate.getMonth()+1}get concatenatedAuthors(){return this.authors.length>2?this.authors[0].lastName+", et al.":2===this.authors.length?this.authors[0].lastName+" & "+this.authors[1].lastName:1===this.authors.length?this.authors[0].lastName:void 0}get bibtexAuthors(){return this.authors.map(e=>e.lastName+", "+e.firstName).join(" and ")}get slug(){let e="";return this.authors.length&&(e+=this.authors[0].lastName.toLowerCase(),e+=this.publishedYear,e+=this.title.split(" ")[0].toLowerCase()),e||"Untitled"}get bibliographyEntries(){return new Map(this.citations.map(e=>{return[e,this.bibliography.get(e)]}))}set bibliography(e){e instanceof Map?this._bibliography=e:"object"==typeof e&&(this._bibliography=ee(e))}get bibliography(){return this._bibliography}static fromObject(e){const t=new ne;return Object.assign(t,e),t}assignToObject(e){Object.assign(e,this),e.bibliography=Q(this.bibliographyEntries),e.url=this.url,e.doi=this.doi,e.githubUrl=this.githubUrl,e.previewURL=this.previewURL,this.publishedDate&&(e.volume=this.volume,e.issue=this.issue,e.publishedDateRFC=this.publishedDateRFC,e.publishedYear=this.publishedYear,e.publishedMonth=this.publishedMonth,e.publishedDay=this.publishedDay,e.publishedMonthPadded=this.publishedMonthPadded,e.publishedDayPadded=this.publishedDayPadded),this.updatedDate&&(e.updatedDateRFC=this.updatedDateRFC),e.concatenatedAuthors=this.concatenatedAuthors,e.bibtexAuthors=this.bibtexAuthors,e.slug=this.slug}}var re=l(function(e,t){!function(e){function t(){this.months=["jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"],this.notKey=[",","{","}"," ","="],this.pos=0,this.input="",this.entries=new Array,this.currentEntry="",this.setInput=function(e){this.input=e},this.getEntries=function(){return this.entries},this.isWhitespace=function(e){return" "==e||"\r"==e||"\t"==e||"\n"==e},this.match=function(e,t){if(t!=undefined&&null!=t||(t=!0),this.skipWhitespace(t),this.input.substring(this.pos,this.pos+e.length)!=e)throw"Token mismatch, expected "+e+", found "+this.input.substring(this.pos);this.pos+=e.length,this.skipWhitespace(t)},this.tryMatch=function(e,t){return t!=undefined&&null!=t||(t=!0),this.skipWhitespace(t),this.input.substring(this.pos,this.pos+e.length)==e},this.matchAt=function(){for(;this.input.length>this.pos&&"@"!=this.input[this.pos];)this.pos++;return"@"==this.input[this.pos]},this.skipWhitespace=function(e){for(;this.isWhitespace(this.input[this.pos]);)this.pos++;if("%"==this.input[this.pos]&&1==e){for(;"\n"!=this.input[this.pos];)this.pos++;this.skipWhitespace(e)}},this.value_braces=function(){var e=0;this.match("{",!1);for(var t=this.pos,n=!1;;){if(!n)if("}"==this.input[this.pos]){if(!(e>0)){var r=this.pos;return this.match("}",!1),this.input.substring(t,r)}e--}else if("{"==this.input[this.pos])e++;else if(this.pos>=this.input.length-1)throw"Unterminated value";n="\\"==this.input[this.pos]&&0==n,this.pos++}},this.value_comment=function(){for(var e="",t=0;!this.tryMatch("}",!1)||0!=t;){if(e+=this.input[this.pos],"{"==this.input[this.pos]&&t++,"}"==this.input[this.pos]&&t--,this.pos>=this.input.length-1)throw"Unterminated value:"+this.input.substring(start);this.pos++}return e},this.value_quotes=function(){this.match('"',!1);for(var e=this.pos,t=!1;;){if(!t){if('"'==this.input[this.pos]){var n=this.pos;return this.match('"',!1),this.input.substring(e,n)}if(this.pos>=this.input.length-1)throw"Unterminated value:"+this.input.substring(e)}t="\\"==this.input[this.pos]&&0==t,this.pos++}},this.single_value=function(){var e=this.pos;if(this.tryMatch("{"))return this.value_braces();if(this.tryMatch('"'))return this.value_quotes();var t=this.key();if(t.match("^[0-9]+$"))return t;if(this.months.indexOf(t.toLowerCase())>=0)return t.toLowerCase();throw"Value expected:"+this.input.substring(e)+" for key: "+t},this.value=function(){var e=[];for(e.push(this.single_value());this.tryMatch("#");)this.match("#"),e.push(this.single_value());return e.join("")},this.key=function(){for(var e=this.pos;;){if(this.pos>=this.input.length)throw"Runaway key";if(this.notKey.indexOf(this.input[this.pos])>=0)return this.input.substring(e,this.pos);this.pos++}},this.key_equals_value=function(){var e=this.key();if(this.tryMatch("="))return this.match("="),[e,this.value()];throw"... = value expected, equals sign missing:"+this.input.substring(this.pos)},this.key_value_list=function(){var e=this.key_equals_value();for(this.currentEntry.entryTags={},this.currentEntry.entryTags[e[0]]=e[1];this.tryMatch(",")&&(this.match(","),!this.tryMatch("}"));)e=this.key_equals_value(),this.currentEntry.entryTags[e[0]]=e[1]},this.entry_body=function(e){this.currentEntry={},this.currentEntry.citationKey=this.key(),this.currentEntry.entryType=e.substring(1),this.match(","),this.key_value_list(),this.entries.push(this.currentEntry)},this.directive=function(){return this.match("@"),"@"+this.key()},this.preamble=function(){this.currentEntry={},this.currentEntry.entryType="PREAMBLE",this.currentEntry.entry=this.value_comment(),this.entries.push(this.currentEntry)},this.comment=function(){this.currentEntry={},this.currentEntry.entryType="COMMENT",this.currentEntry.entry=this.value_comment(),this.entries.push(this.currentEntry)},this.entry=function(e){this.entry_body(e)},this.bibtex=function(){for(;this.matchAt();){var e=this.directive();this.match("{"),"@STRING"==e?this.string():"@PREAMBLE"==e?this.preamble():"@COMMENT"==e?this.comment():this.entry(e),this.match("}")}}}e.toJSON=function(e){var n=new t;return n.setInput(e),n.bibtex(),n.entries},e.toBibtex=function(e){var t="";for(var n in e){if(t+="@"+e[n].entryType,t+="{",e[n].citationKey&&(t+=e[n].citationKey+", "),e[n].entry&&(t+=e[n].entry),e[n].entryTags){var r="";for(var i in e[n].entryTags)0!=r.length&&(r+=", "),r+=i+"= {"+e[n].entryTags[i]+"}";t+=r}t+="}\n\n"}return t}}(t)}),ie=s(l(function(e){var t;t=function(){return function e(t,n,r){function i(s,l){if(!n[s]){if(!t[s]){var u="function"==typeof o&&o;if(!l&&u)return u(s,!0);if(a)return a(s,!0);var d=new Error("Cannot find module '"+s+"'");throw d.code="MODULE_NOT_FOUND",d}var c=n[s]={exports:{}};t[s][0].call(c.exports,function(e){var n=t[s][1][e];return i(n||e)},c,c.exports,e,t,n,r)}return n[s].exports}for(var a="function"==typeof o&&o,s=0;s<r.length;s++)i(r[s]);return i}({1:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("./src/ParseError")),i=n(e("./src/Settings")),a=n(e("./src/buildTree")),o=n(e("./src/parseTree")),s=n(e("./src/utils")),l=function(e,t,n){s["default"].clearNode(t);var r=new i["default"](n),l=(0,o["default"])(e,r),u=(0,a["default"])(l,e,r).toNode();t.appendChild(u)};"undefined"!=typeof document&&"CSS1Compat"!==document.compatMode&&("undefined"!=typeof console&&console.warn("Warning: KaTeX doesn't work in quirks mode. Make sure your website has a suitable doctype."),l=function(){throw new r["default"]("KaTeX doesn't work in quirks mode.")});var u=function(e,t){var n=new i["default"](t),r=(0,o["default"])(e,n);return(0,a["default"])(r,e,n).toMarkup()},d=function(e,t){var n=new i["default"](t);return(0,o["default"])(e,n)};t.exports={render:l,renderToString:u,__parse:d,ParseError:r["default"]}},{"./src/ParseError":29,"./src/Settings":32,"./src/buildTree":37,"./src/parseTree":46,"./src/utils":51}],2:[function(e,t){t.exports={"default":e("core-js/library/fn/json/stringify"),__esModule:!0}},{"core-js/library/fn/json/stringify":6}],3:[function(e,t){t.exports={"default":e("core-js/library/fn/object/define-property"),__esModule:!0}},{"core-js/library/fn/object/define-property":7}],4:[function(e,t,n){n.__esModule=!0,n["default"]=function(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}},{}],5:[function(e,t,n){function r(e){return e&&e.__esModule?e:{"default":e}}n.__esModule=!0;var i=r(e("../core-js/object/define-property"));n["default"]=function(){function e(e,t){for(var n=0;n<t.length;n++){var r=t[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),(0,i["default"])(e,r.key,r)}}return function(t,n,r){return n&&e(t.prototype,n),r&&e(t,r),t}}()},{"../core-js/object/define-property":3}],6:[function(e,t){var n=e("../../modules/_core"),r=n.JSON||(n.JSON={stringify:JSON.stringify});t.exports=function(){return r.stringify.apply(r,arguments)}},{"../../modules/_core":10}],7:[function(e,t){e("../../modules/es6.object.define-property");var n=e("../../modules/_core").Object;t.exports=function(e,t,r){return n.defineProperty(e,t,r)}},{"../../modules/_core":10,"../../modules/es6.object.define-property":23}],8:[function(e,t){t.exports=function(e){if("function"!=typeof e)throw TypeError(e+" is not a function!");return e}},{}],9:[function(e,t){var n=e("./_is-object");t.exports=function(e){if(!n(e))throw TypeError(e+" is not an object!");return e}},{"./_is-object":19}],10:[function(e,t){var n=t.exports={version:"2.4.0"};"number"==typeof __e&&(__e=n)},{}],11:[function(e,t){var n=e("./_a-function");t.exports=function(e,t,r){if(n(e),t===undefined)return e;switch(r){case 1:return function(n){return e.call(t,n)};case 2:return function(n,r){return e.call(t,n,r)};case 3:return function(n,r,i){return e.call(t,n,r,i)}}return function(){return e.apply(t,arguments)}}},{"./_a-function":8}],12:[function(e,t){t.exports=!e("./_fails")(function(){return 7!=Object.defineProperty({},"a",{get:function(){return 7}}).a})},{"./_fails":15}],13:[function(e,t){var n=e("./_is-object"),r=e("./_global").document,i=n(r)&&n(r.createElement);t.exports=function(e){return i?r.createElement(e):{}}},{"./_global":16,"./_is-object":19}],14:[function(e,t){var n=e("./_global"),r=e("./_core"),i=e("./_ctx"),a=e("./_hide"),o="prototype",s=function(e,t,l){var u,d,c,h=e&s.F,p=e&s.G,f=e&s.S,m=e&s.P,g=e&s.B,v=e&s.W,b=p?r:r[t]||(r[t]={}),y=b[o],x=p?n:f?n[t]:(n[t]||{})[o];for(u in p&&(l=t),l)(d=!h&&x&&x[u]!==undefined)&&u in b||(c=d?x[u]:l[u],b[u]=p&&"function"!=typeof x[u]?l[u]:g&&d?i(c,n):v&&x[u]==c?function(e){var t=function(t,n,r){if(this instanceof e){switch(arguments.length){case 0:return new e;case 1:return new e(t);case 2:return new e(t,n)}return new e(t,n,r)}return e.apply(this,arguments)};return t[o]=e[o],t}(c):m&&"function"==typeof c?i(Function.call,c):c,m&&((b.virtual||(b.virtual={}))[u]=c,e&s.R&&y&&!y[u]&&a(y,u,c)))};s.F=1,s.G=2,s.S=4,s.P=8,s.B=16,s.W=32,s.U=64,s.R=128,t.exports=s},{"./_core":10,"./_ctx":11,"./_global":16,"./_hide":17}],15:[function(e,t){t.exports=function(e){try{return!!e()}catch(t){return!0}}},{}],16:[function(e,t){var n=t.exports="undefined"!=typeof window&&window.Math==Math?window:"undefined"!=typeof self&&self.Math==Math?self:Function("return this")();"number"==typeof __g&&(__g=n)},{}],17:[function(e,t){var n=e("./_object-dp"),r=e("./_property-desc");t.exports=e("./_descriptors")?function(e,t,i){return n.f(e,t,r(1,i))}:function(e,t,n){return e[t]=n,e}},{"./_descriptors":12,"./_object-dp":20,"./_property-desc":21}],18:[function(e,t){t.exports=!e("./_descriptors")&&!e("./_fails")(function(){return 7!=Object.defineProperty(e("./_dom-create")("div"),"a",{get:function(){return 7}}).a})},{"./_descriptors":12,"./_dom-create":13,"./_fails":15}],19:[function(e,t){t.exports=function(e){return"object"==typeof e?null!==e:"function"==typeof e}},{}],20:[function(e,t,n){var r=e("./_an-object"),i=e("./_ie8-dom-define"),a=e("./_to-primitive"),o=Object.defineProperty;n.f=e("./_descriptors")?Object.defineProperty:function(e,t,n){if(r(e),t=a(t,!0),r(n),i)try{return o(e,t,n)}catch(s){}if("get"in n||"set"in n)throw TypeError("Accessors not supported!");return"value"in n&&(e[t]=n.value),e}},{"./_an-object":9,"./_descriptors":12,"./_ie8-dom-define":18,"./_to-primitive":22}],21:[function(e,t){t.exports=function(e,t){return{enumerable:!(1&e),configurable:!(2&e),writable:!(4&e),value:t}}},{}],22:[function(e,t){var n=e("./_is-object");t.exports=function(e,t){if(!n(e))return e;var r,i;if(t&&"function"==typeof(r=e.toString)&&!n(i=r.call(e)))return i;if("function"==typeof(r=e.valueOf)&&!n(i=r.call(e)))return i;if(!t&&"function"==typeof(r=e.toString)&&!n(i=r.call(e)))return i;throw TypeError("Can't convert object to primitive value")}},{"./_is-object":19}],23:[function(e){var t=e("./_export");t(t.S+t.F*!e("./_descriptors"),"Object",{defineProperty:e("./_object-dp").f})},{"./_descriptors":12,"./_export":14,"./_object-dp":20}],24:[function(e,t){function n(e){if(!e.__matchAtRelocatable){var t=e.source+"|()",n="g"+(e.ignoreCase?"i":"")+(e.multiline?"m":"")+(e.unicode?"u":"");e.__matchAtRelocatable=new RegExp(t,n)}return e.__matchAtRelocatable}function r(e,t,r){if(e.global||e.sticky)throw new Error("matchAt(...): Only non-global regexes are supported");var i=n(e);i.lastIndex=r;var a=i.exec(t);return null==a[a.length-1]?(a.length=a.length-1,a):null}t.exports=r},{}],25:[function(e,t){function n(e){if(null===e||e===undefined)throw new TypeError("Object.assign cannot be called with null or undefined");return Object(e)}function r(){try{if(!Object.assign)return!1;var e=new String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map(function(e){return t[e]}).join(""))return!1;var r={};return"abcdefghijklmnopqrst".split("").forEach(function(e){r[e]=e}),"abcdefghijklmnopqrst"===Object.keys(Object.assign({},r)).join("")}catch(i){return!1}}var i=Object.prototype.hasOwnProperty,a=Object.prototype.propertyIsEnumerable;t.exports=r()?Object.assign:function(e){for(var t,r,o=n(e),s=1;s<arguments.length;s++){for(var l in t=Object(arguments[s]))i.call(t,l)&&(o[l]=t[l]);if(Object.getOwnPropertySymbols){r=Object.getOwnPropertySymbols(t);for(var u=0;u<r.length;u++)a.call(t,r[u])&&(o[r[u]]=t[r[u]])}}return o}},{}],26:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("babel-runtime/helpers/classCallCheck")),i=n(e("babel-runtime/helpers/createClass")),a=n(e("match-at")),o=n(e("./ParseError")),s=function(){function e(t,n,i,a){(0,r["default"])(this,e),this.text=t,this.start=n,this.end=i,this.lexer=a}return(0,i["default"])(e,[{key:"range",value:function(t,n){return t.lexer!==this.lexer?new e(n):new e(n,this.start,t.end,this.lexer)}}]),e}(),l=new RegExp("([ \r\n\t]+)|([!-\\[\\]-\u2027\u202a-\ud7ff\uf900-\uffff]|[\ud800-\udbff][\udc00-\udfff]|\\\\(?:[a-zA-Z]+|[^\ud800-\udfff]))"),u=function(){function e(t){(0,r["default"])(this,e),this.input=t,this.pos=0}return(0,i["default"])(e,[{key:"lex",value:function(){var e=this.input,t=this.pos;if(t===e.length)return new s("EOF",t,t,this);var n=(0,a["default"])(l,e,t);if(null===n)throw new o["default"]("Unexpected character: '"+e[t]+"'",new s(e[t],t,t+1,this));var r=n[2]||" ",i=this.pos;this.pos+=n[0].length;var u=this.pos;return new s(r,i,u,this)}}]),e}();t.exports=u},{"./ParseError":29,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5,"match-at":24}],27:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("babel-runtime/helpers/classCallCheck")),i=n(e("babel-runtime/helpers/createClass")),a=n(e("./Lexer")),o=n(e("./macros")),s=n(e("./ParseError")),l=n(e("object-assign")),u=function(){function e(t,n){(0,r["default"])(this,e),this.lexer=new a["default"](t),this.macros=(0,l["default"])({},o["default"],n),this.stack=[],this.discardedWhiteSpace=[]}return(0,i["default"])(e,[{key:"nextToken",value:function(){for(;;){0===this.stack.length&&this.stack.push(this.lexer.lex());var e=this.stack.pop(),t=e.text;if("\\"!==t.charAt(0)||!this.macros.hasOwnProperty(t))return e;var n=void 0,r=this.macros[t];if("string"==typeof r){var i=0;if(-1!==r.indexOf("#"))for(var o=r.replace(/##/g,"");-1!==o.indexOf("#"+(i+1));)++i;var l=new a["default"](r);for(r=[],n=l.lex();"EOF"!==n.text;)r.push(n),n=l.lex();r.reverse(),r.numArgs=i,this.macros[t]=r}if(r.numArgs){var u=[],d=void 0;for(d=0;d<r.numArgs;++d){var c=this.get(!0);if("{"===c.text){for(var h=[],p=1;0!==p;)if(n=this.get(!1),h.push(n),"{"===n.text)++p;else if("}"===n.text)--p;else if("EOF"===n.text)throw new s["default"]("End of input in macro argument",c);h.pop(),h.reverse(),u[d]=h}else{if("EOF"===c.text)throw new s["default"]("End of input expecting macro argument",e);u[d]=[c]}}for(d=(r=r.slice()).length-1;d>=0;--d)if("#"===(n=r[d]).text){if(0===d)throw new s["default"]("Incomplete placeholder at end of macro body",n);if("#"===(n=r[--d]).text)r.splice(d+1,1);else{if(!/^[1-9]$/.test(n.text))throw new s["default"]("Not a valid argument number",n);r.splice.apply(r,[d,2].concat(u[n.text-1]))}}}this.stack=this.stack.concat(r)}}},{key:"get",value:function(e){this.discardedWhiteSpace=[];var t=this.nextToken();if(e)for(;" "===t.text;)this.discardedWhiteSpace.push(t),t=this.nextToken();return t}},{key:"unget",value:function(e){for(this.stack.push(e);0!==this.discardedWhiteSpace.length;)this.stack.push(this.discardedWhiteSpace.pop())}}]),e}();t.exports=u},{"./Lexer":26,"./ParseError":29,"./macros":44,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5,"object-assign":25}],28:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("babel-runtime/helpers/classCallCheck")),i=n(e("babel-runtime/helpers/createClass")),a=n(e("./fontMetrics")),o=6,s=[[1,1,1],[2,1,1],[3,1,1],[4,2,1],[5,2,1],[6,3,1],[7,4,2],[8,6,3],[9,7,6],[10,8,7],[11,10,9]],l=[.5,.6,.7,.8,.9,1,1.2,1.44,1.728,2.074,2.488],u=function(e,t){return t.size<2?e:s[e-1][t.size-1]},d=function(){function e(t){(0,r["default"])(this,e),this.style=t.style,this.color=t.color,this.size=t.size||o,this.textSize=t.textSize||this.size,this.phantom=t.phantom,this.font=t.font,this.sizeMultiplier=l[this.size-1],this._fontMetrics=null}return(0,i["default"])(e,[{key:"extend",value:function(t){var n={style:this.style,size:this.size,textSize:this.textSize,color:this.color,phantom:this.phantom,font:this.font};for(var r in t)t.hasOwnProperty(r)&&(n[r]=t[r]);return new e(n)}},{key:"havingStyle",value:function(e){return this.style===e?this:this.extend({style:e,size:u(this.textSize,e)})}},{key:"havingCrampedStyle",value:function(){return this.havingStyle(this.style.cramp())}},{key:"havingSize",value:function(e){return this.size===e&&this.textSize===e?this:this.extend({style:this.style.text(),size:e,textSize:e})}},{key:"havingBaseStyle",value:function(e){e=e||this.style.text();var t=u(o,e);return this.size===t&&this.textSize===o&&this.style===e?this:this.extend({style:e,size:t,baseSize:o})}},{key:"withColor",value:function(e){return this.extend({color:e})}},{key:"withPhantom",value:function(){return this.extend({phantom:!0})}},{key:"withFont",value:function(e){return this.extend({font:e||this.font})}},{key:"sizingClasses",value:function(e){return e.size!==this.size?["sizing","reset-size"+e.size,"size"+this.size]:[]}},{key:"baseSizingClasses",value:function(){return this.size!==o?["sizing","reset-size"+this.size,"size"+o]:[]}},{key:"fontMetrics",value:function(){return this._fontMetrics||(this._fontMetrics=a["default"].getFontMetrics(this.size)),this._fontMetrics}},{key:"getColor",value:function(){return this.phantom?"transparent":e.colorMap[this.color]||this.color}}]),e}();d.colorMap={"katex-blue":"#6495ed","katex-orange":"#ffa500","katex-pink":"#ff00af","katex-red":"#df0030","katex-green":"#28ae7b","katex-gray":"gray","katex-purple":"#9d38bd","katex-blueA":"#ccfaff","katex-blueB":"#80f6ff","katex-blueC":"#63d9ea","katex-blueD":"#11accd","katex-blueE":"#0c7f99","katex-tealA":"#94fff5","katex-tealB":"#26edd5","katex-tealC":"#01d1c1","katex-tealD":"#01a995","katex-tealE":"#208170","katex-greenA":"#b6ffb0","katex-greenB":"#8af281","katex-greenC":"#74cf70","katex-greenD":"#1fab54","katex-greenE":"#0d923f","katex-goldA":"#ffd0a9","katex-goldB":"#ffbb71","katex-goldC":"#ff9c39","katex-goldD":"#e07d10","katex-goldE":"#a75a05","katex-redA":"#fca9a9","katex-redB":"#ff8482","katex-redC":"#f9685d","katex-redD":"#e84d39","katex-redE":"#bc2612","katex-maroonA":"#ffbde0","katex-maroonB":"#ff92c6","katex-maroonC":"#ed5fa6","katex-maroonD":"#ca337c","katex-maroonE":"#9e034e","katex-purpleA":"#ddd7ff","katex-purpleB":"#c6b9fc","katex-purpleC":"#aa87ff","katex-purpleD":"#7854ab","katex-purpleE":"#543b78","katex-mintA":"#f5f9e8","katex-mintB":"#edf2df","katex-mintC":"#e0e5cc","katex-grayA":"#f6f7f7","katex-grayB":"#f0f1f2","katex-grayC":"#e3e5e6","katex-grayD":"#d6d8da","katex-grayE":"#babec2","katex-grayF":"#888d93","katex-grayG":"#626569","katex-grayH":"#3b3e40","katex-grayI":"#21242c","katex-kaBlue":"#314453","katex-kaGreen":"#71B307"},d.BASESIZE=o,t.exports=d},{"./fontMetrics":41,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],29:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("babel-runtime/helpers/classCallCheck")),i=function a(e,t){(0,r["default"])(this,a);var n="KaTeX parse error: "+e,i=void 0,o=void 0;if(t&&t.lexer&&t.start<=t.end){var s=t.lexer.input;i=t.start,o=t.end,i===s.length?n+=" at end of input: ":n+=" at position "+(i+1)+": ";var l=s.slice(i,o).replace(/[^]/g,"$&\u0332");n+=(i>15?"\u2026"+s.slice(i-15,i):s.slice(0,i))+l+(o+15<s.length?s.slice(o,o+15)+"\u2026":s.slice(o))}var u=new Error(n);return u.name="ParseError",u.__proto__=a.prototype,u.position=i,u};i.prototype.__proto__=Error.prototype,t.exports=i},{"babel-runtime/helpers/classCallCheck":4}],30:[function(e,t,n){function r(e){return e&&e.__esModule?e:{"default":e}}Object.defineProperty(n,"__esModule",{value:!0});var i=r(e("babel-runtime/helpers/classCallCheck")),a=function o(e,t,n,r,a){(0,i["default"])(this,o),this.type=e,this.value=t,this.mode=n,!r||a&&a.lexer!==r.lexer||(this.lexer=r.lexer,this.start=r.start,this.end=(a||r).end)};n["default"]=a},{"babel-runtime/helpers/classCallCheck":4}],31:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}function r(e,t,n){this.result=e,this.isFunction=t,this.token=n}var i=n(e("babel-runtime/helpers/classCallCheck")),a=n(e("babel-runtime/helpers/createClass")),o=n(e("./functions")),s=n(e("./environments")),l=n(e("./MacroExpander")),u=n(e("./symbols")),d=n(e("./utils")),c=n(e("./units")),h=e("./unicodeRegexes"),p=n(e("./ParseNode")),f=n(e("./ParseError")),m=function(){function e(t,n){(0,i["default"])(this,e),this.gullet=new l["default"](t,n.macros),n.colorIsTextColor&&(this.gullet.macros["\\color"]="\\textcolor"),this.settings=n,this.leftrightDepth=0}return(0,a["default"])(e,[{key:"expect",value:function(e,t){if(this.nextToken.text!==e)throw new f["default"]("Expected '"+e+"', got '"+this.nextToken.text+"'",this.nextToken);!1!==t&&this.consume()}},{key:"consume",value:function(){this.nextToken=this.gullet.get("math"===this.mode)}},{key:"switchMode",value:function(e){this.gullet.unget(this.nextToken),this.mode=e,this.consume()}},{key:"parse",value:function(){this.mode="math",this.consume();var e=this.parseInput();return e}},{key:"parseInput",value:function(){var e=this.parseExpression(!1);return this.expect("EOF",!1),e}},{key:"parseExpression",value:function(t,n){for(var r=[];;){var i=this.nextToken;if(-1!==e.endOfExpression.indexOf(i.text))break;if(n&&i.text===n)break;if(t&&o["default"][i.text]&&o["default"][i.text].infix)break;var a=this.parseAtom();if(!a){if(!this.settings.throwOnError&&"\\"===i.text[0]){var s=this.handleUnsupportedCmd();r.push(s);continue}break}r.push(a)}return this.handleInfixNodes(r)}},{key:"handleInfixNodes",value:function(e){for(var t=-1,n=void 0,r=0;r<e.length;r++){var i=e[r];if("infix"===i.type){if(-1!==t)throw new f["default"]("only one infix operator per group",i.value.token);t=r,n=i.value.replaceWith}}if(-1!==t){var a=void 0,o=void 0,s=e.slice(0,t),l=e.slice(t+1);a=1===s.length&&"ordgroup"===s[0].type?s[0]:new p["default"]("ordgroup",s,this.mode),o=1===l.length&&"ordgroup"===l[0].type?l[0]:new p["default"]("ordgroup",l,this.mode);var u=this.callFunction(n,[a,o],null);return[new p["default"](u.type,u,this.mode)]}return e}},{key:"handleSupSubscript",value:function(t){var n=this.nextToken,r=n.text;this.consume();var i=this.parseGroup();if(i){if(i.isFunction){if(o["default"][i.result].greediness>e.SUPSUB_GREEDINESS)return this.parseFunction(i);throw new f["default"]("Got function '"+i.result+"' with no arguments as "+t,n)}return i.result}if(this.settings.throwOnError||"\\"!==this.nextToken.text[0])throw new f["default"]("Expected group after '"+r+"'",n);return this.handleUnsupportedCmd()}},{key:"handleUnsupportedCmd",value:function(){for(var e=this.nextToken.text,t=[],n=0;n<e.length;n++)t.push(new p["default"]("textord",e[n],"text"));var r=new p["default"]("text",{body:t,type:"text"},this.mode),i=new p["default"]("color",{color:this.settings.errorColor,value:[r],type:"color"},this.mode);return this.consume(),i}},{key:"parseAtom",value:function(){var e=this.parseImplicitGroup();if("text"===this.mode)return e;for(var t=void 0,n=void 0;;){var r=this.nextToken;if("\\limits"===r.text||"\\nolimits"===r.text){if(!e||"op"!==e.type)throw new f["default"]("Limit controls must follow a math operator",r);var i="\\limits"===r.text;e.value.limits=i,e.value.alwaysHandleSupSub=!0,this.consume()}else if("^"===r.text){if(t)throw new f["default"]("Double superscript",r);t=this.handleSupSubscript("superscript")}else if("_"===r.text){if(n)throw new f["default"]("Double subscript",r);n=this.handleSupSubscript("subscript")}else{if("'"!==r.text)break;if(t)throw new f["default"]("Double superscript",r);var a=new p["default"]("textord","\\prime",this.mode),o=[a];for(this.consume();"'"===this.nextToken.text;)o.push(a),this.consume();"^"===this.nextToken.text&&o.push(this.handleSupSubscript("superscript")),t=new p["default"]("ordgroup",o,this.mode)}}return t||n?new p["default"]("supsub",{base:e,sup:t,sub:n},this.mode):e}},{key:"parseImplicitGroup",value:function(){var t=this.parseSymbol();if(null==t)return this.parseFunction();var n=t.result;if("\\left"===n){var r=this.parseFunction(t);++this.leftrightDepth;var i=this.parseExpression(!1);--this.leftrightDepth,this.expect("\\right",!1);var a=this.parseFunction();return new p["default"]("leftright",{body:i,left:r.value.value,right:a.value.value},this.mode)}if("\\begin"===n){var o=this.parseFunction(t),l=o.value.name;if(!s["default"].hasOwnProperty(l))throw new f["default"]("No such environment: "+l,o.value.nameGroup);var u=s["default"][l],c=this.parseArguments("\\begin{"+l+"}",u),h={mode:this.mode,envName:l,parser:this,positions:c.pop()},m=u.handler(h,c);this.expect("\\end",!1);var g=this.nextToken,v=this.parseFunction();if(v.value.name!==l)throw new f["default"]("Mismatch: \\begin{"+l+"} matched by \\end{"+v.value.name+"}",g);return m.position=v.position,m}if(d["default"].contains(e.sizeFuncs,n)){this.consumeSpaces();var b=this.parseExpression(!1);return new p["default"]("sizing",{size:d["default"].indexOf(e.sizeFuncs,n)+1,value:b},this.mode)}if(d["default"].contains(e.styleFuncs,n)){this.consumeSpaces();var y=this.parseExpression(!0);return new p["default"]("styling",{style:n.slice(1,n.length-5),value:y},this.mode)}if(n in e.oldFontFuncs){var x=e.oldFontFuncs[n];this.consumeSpaces();var w=this.parseExpression(!0);return"text"===x.slice(0,4)?new p["default"]("text",{style:x,body:new p["default"]("ordgroup",w,this.mode)},this.mode):new p["default"]("font",{font:x,body:new p["default"]("ordgroup",w,this.mode)},this.mode)}if("\\color"===n){var k=this.parseColorGroup(!1);if(!k)throw new f["default"]("\\color not followed by color");var M=this.parseExpression(!0);return new p["default"]("color",{type:"color",color:k.result.value,value:M},this.mode)}if("$"===n){if("math"===this.mode)throw new f["default"]("$ within math mode");this.consume();var S=this.mode;this.switchMode("math");var z=this.parseExpression(!1,"$");return this.expect("$",!0),this.switchMode(S),new p["default"]("styling",{style:"text",value:z},"math")}return this.parseFunction(t)}},{key:"parseFunction",value:function(e){if(e||(e=this.parseGroup()),e){if(e.isFunction){var t=e.result,n=o["default"][t];if("text"===this.mode&&!n.allowedInText)throw new f["default"]("Can't use function '"+t+"' in text mode",e.token);if("math"===this.mode&&!1===n.allowedInMath)throw new f["default"]("Can't use function '"+t+"' in math mode",e.token);var r=this.parseArguments(t,n),i=e.token,a=this.callFunction(t,r,r.pop(),i);return new p["default"](a.type,a,this.mode)}return e.result}return null}},{key:"callFunction",value:function(e,t,n,r){var i={funcName:e,parser:this,positions:n,token:r};return o["default"][e].handler(i,t)}},{key:"parseArguments",value:function(e,t){var n=t.numArgs+t.numOptionalArgs;if(0===n)return[[this.pos]];for(var i=t.greediness,a=[this.pos],s=[],l=0;l<n;l++){var u=this.nextToken,d=t.argTypes&&t.argTypes[l],c=void 0;if(l<t.numOptionalArgs){if(!(c=d?this.parseGroupOfType(d,!0):this.parseGroup(!0))){s.push(null),a.push(this.pos);continue}}else if(!(c=d?this.parseGroupOfType(d):this.parseGroup())){if(this.settings.throwOnError||"\\"!==this.nextToken.text[0])throw new f["default"]("Expected group after '"+e+"'",u);c=new r(this.handleUnsupportedCmd(this.nextToken.text),!1)}var h=void 0;if(c.isFunction){if(!(o["default"][c.result].greediness>i))throw new f["default"]("Got function '"+c.result+"' as argument to '"+e+"'",u);h=this.parseFunction(c)}else h=c.result;s.push(h),a.push(this.pos)}return s.push(a),s}},{key:"parseGroupOfType",value:function(e,t){var n=this.mode;if("original"===e&&(e=n),"color"===e)return this.parseColorGroup(t);if("size"===e)return this.parseSizeGroup(t);this.switchMode(e),"text"===e&&this.consumeSpaces();var r=this.parseGroup(t);return this.switchMode(n),r}},{key:"consumeSpaces",value:function(){for(;" "===this.nextToken.text;)this.consume()}},{key:"parseStringGroup",value:function(e,t){if(t&&"["!==this.nextToken.text)return null;var n=this.mode;this.mode="text",this.expect(t?"[":"{");for(var r="",i=this.nextToken,a=i;this.nextToken.text!==(t?"]":"}");){if("EOF"===this.nextToken.text)throw new f["default"]("Unexpected end of input in "+e,i.range(this.nextToken,r));r+=(a=this.nextToken).text,this.consume()}return this.mode=n,this.expect(t?"]":"}"),i.range(a,r)}},{key:"parseRegexGroup",value:function(e,t){var n=this.mode;this.mode="text";for(var r=this.nextToken,i=r,a="";"EOF"!==this.nextToken.text&&e.test(a+this.nextToken.text);)a+=(i=this.nextToken).text,this.consume();if(""===a)throw new f["default"]("Invalid "+t+": '"+r.text+"'",r);return this.mode=n,r.range(i,a)}},{key:"parseColorGroup",value:function(e){var t=this.parseStringGroup("color",e);if(!t)return null;var n=/^(#[a-z0-9]+|[a-z]+)$/i.exec(t.text);if(!n)throw new f["default"]("Invalid color: '"+t.text+"'",t)
+;return new r(new p["default"]("color",n[0],this.mode),!1)}},{key:"parseSizeGroup",value:function(e){var t=void 0;if(!(t=e||"{"===this.nextToken.text?this.parseStringGroup("size",e):this.parseRegexGroup(/^[-+]? *(?:$|\d+|\d+\.\d*|\.\d*) *[a-z]{0,2} *$/,"size")))return null;var n=/([-+]?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})/.exec(t.text);if(!n)throw new f["default"]("Invalid size: '"+t.text+"'",t);var i={number:+(n[1]+n[2]),unit:n[3]};if(!c["default"].validUnit(i))throw new f["default"]("Invalid unit: '"+i.unit+"'",t);return new r(new p["default"]("size",i,this.mode),!1)}},{key:"parseGroup",value:function(e){var t=this.nextToken;if(this.nextToken.text===(e?"[":"{")){this.consume();var n=this.parseExpression(!1,e?"]":null),i=this.nextToken;return this.expect(e?"]":"}"),"text"===this.mode&&this.formLigatures(n),new r(new p["default"]("ordgroup",n,this.mode,t,i),!1)}return e?null:this.parseSymbol()}},{key:"formLigatures",value:function(e){for(var t=e.length-1,n=0;n<t;++n){var r=e[n],i=r.value;"-"===i&&"-"===e[n+1].value&&(n+1<t&&"-"===e[n+2].value?(e.splice(n,3,new p["default"]("textord","---","text",r,e[n+2])),t-=2):(e.splice(n,2,new p["default"]("textord","--","text",r,e[n+1])),t-=1)),"'"!==i&&"`"!==i||e[n+1].value!==i||(e.splice(n,2,new p["default"]("textord",i+i,"text",r,e[n+1])),t-=1)}}},{key:"parseSymbol",value:function(){var e=this.nextToken;return o["default"][e.text]?(this.consume(),new r(e.text,!0,e)):u["default"][this.mode][e.text]?(this.consume(),new r(new p["default"](u["default"][this.mode][e.text].group,e.text,this.mode,e),!1,e)):"text"===this.mode&&h.cjkRegex.test(e.text)?(this.consume(),new r(new p["default"]("textord",e.text,this.mode,e),!1,e)):"$"===e.text?new r(e.text,!1,e):null}}]),e}();m.endOfExpression=["}","\\end","\\right","&","\\\\","\\cr"],m.SUPSUB_GREEDINESS=1,m.sizeFuncs=["\\tiny","\\sixptsize","\\scriptsize","\\footnotesize","\\small","\\normalsize","\\large","\\Large","\\LARGE","\\huge","\\Huge"],m.styleFuncs=["\\displaystyle","\\textstyle","\\scriptstyle","\\scriptscriptstyle"],m.oldFontFuncs={"\\rm":"mathrm","\\sf":"mathsf","\\tt":"mathtt","\\bf":"mathbf","\\it":"mathit"},m.prototype.ParseNode=p["default"],t.exports=m},{"./MacroExpander":27,"./ParseError":29,"./ParseNode":30,"./environments":40,"./functions":43,"./symbols":48,"./unicodeRegexes":49,"./units":50,"./utils":51,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],32:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("babel-runtime/helpers/classCallCheck")),i=n(e("./utils")),a=function o(e){(0,r["default"])(this,o),e=e||{},this.displayMode=i["default"].deflt(e.displayMode,!1),this.throwOnError=i["default"].deflt(e.throwOnError,!0),this.errorColor=i["default"].deflt(e.errorColor,"#cc0000"),this.macros=e.macros||{},this.colorIsTextColor=i["default"].deflt(e.colorIsTextColor,!1)};t.exports=a},{"./utils":51,"babel-runtime/helpers/classCallCheck":4}],33:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("babel-runtime/helpers/classCallCheck")),i=n(e("babel-runtime/helpers/createClass")),a=function(){function e(t,n,i){(0,r["default"])(this,e),this.id=t,this.size=n,this.cramped=i}return(0,i["default"])(e,[{key:"sup",value:function(){return f[m[this.id]]}},{key:"sub",value:function(){return f[g[this.id]]}},{key:"fracNum",value:function(){return f[v[this.id]]}},{key:"fracDen",value:function(){return f[b[this.id]]}},{key:"cramp",value:function(){return f[y[this.id]]}},{key:"text",value:function(){return f[x[this.id]]}},{key:"isTight",value:function(){return this.size>=2}}]),e}(),o=0,s=1,l=2,u=3,d=4,c=5,h=6,p=7,f=[new a(o,0,!1),new a(s,0,!0),new a(l,1,!1),new a(u,1,!0),new a(d,2,!1),new a(c,2,!0),new a(h,3,!1),new a(p,3,!0)],m=[d,c,d,c,h,p,h,p],g=[c,c,c,c,p,p,p,p],v=[l,u,d,c,h,p,h,p],b=[u,u,c,c,p,p,p,p],y=[s,s,u,u,c,c,p,p],x=[o,s,l,u,l,u,l,u];t.exports={DISPLAY:f[o],TEXT:f[l],SCRIPT:f[d],SCRIPTSCRIPT:f[h]}},{"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],34:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("./domTree")),i=n(e("./fontMetrics")),a=n(e("./symbols")),o=n(e("./utils")),s=["\\imath","\\jmath","\\pounds"],l=function(e,t,n){return a["default"][n][e]&&a["default"][n][e].replace&&(e=a["default"][n][e].replace),{value:e,metrics:i["default"].getCharacterMetrics(e,t)}},u=function(e,t,n,i,a){var o=l(e,t,n),s=o.metrics;e=o.value;var u=void 0;if(s){var d=s.italic;"text"===n&&(d=0),u=new r["default"].symbolNode(e,s.height,s.depth,d,s.skew,a)}else"undefined"!=typeof console&&console.warn("No character metrics for '"+e+"' in style '"+t+"'"),u=new r["default"].symbolNode(e,0,0,0,0,a);return i&&(u.maxFontSize=i.sizeMultiplier,i.style.isTight()&&u.classes.push("mtight"),i.getColor()&&(u.style.color=i.getColor())),u},d=function(e,t,n,r){return"\\"===e||"main"===a["default"][t][e].font?u(e,"Main-Regular",t,n,r):u(e,"AMS-Regular",t,n,r.concat(["amsrm"]))},c=function(e,t,n,r,i){if("mathord"===i){var o=h(e);return u(e,o.fontName,t,n,r.concat([o.fontClass]))}if("textord"===i)return"ams"===(a["default"][t][e]&&a["default"][t][e].font)?u(e,"AMS-Regular",t,n,r.concat(["amsrm"])):u(e,"Main-Regular",t,n,r.concat(["mathrm"]));throw new Error("unexpected type: "+i+" in mathDefault")},h=function(e){return/[0-9]/.test(e.charAt(0))||o["default"].contains(s,e)?{fontName:"Main-Italic",fontClass:"mainit"}:{fontName:"Math-Italic",fontClass:"mathit"}},p=function(e,t,n){var r=e.mode,i=e.value,a=["mord"],d=t.font;if(d){var p=void 0;return p="mathit"===d||o["default"].contains(s,i)?h(i):x[d],l(i,p.fontName,r).metrics?u(i,p.fontName,r,t,a.concat([p.fontClass||d])):c(i,r,t,a,n)}return c(i,r,t,a,n)},f=function(e){var t=0,n=0,r=0;if(e.children)for(var i=0;i<e.children.length;i++)e.children[i].height>t&&(t=e.children[i].height),e.children[i].depth>n&&(n=e.children[i].depth),e.children[i].maxFontSize>r&&(r=e.children[i].maxFontSize);e.height=t,e.depth=n,e.maxFontSize=r},m=function(e,t,n){var i=new r["default"].span(e,t,n);return f(i),i},g=function(e,t){e.children=t.concat(e.children),f(e)},v=function(e){var t=new r["default"].documentFragment(e);return f(t),t},b=function(e,t,n){var i=void 0,a=void 0,o=void 0;if("individualShift"===t){var s=e;for(e=[s[0]],a=i=-s[0].shift-s[0].elem.depth,o=1;o<s.length;o++){var l=-s[o].shift-a-s[o].elem.depth,u=l-(s[o-1].elem.height+s[o-1].elem.depth);a+=l,e.push({type:"kern",size:u}),e.push(s[o])}}else if("top"===t){var d=n;for(o=0;o<e.length;o++)"kern"===e[o].type?d-=e[o].size:d-=e[o].elem.height+e[o].elem.depth;i=d}else i="bottom"===t?-n:"shift"===t?-e[0].elem.depth-n:"firstBaseline"===t?-e[0].elem.depth:0;var c=0;for(o=0;o<e.length;o++)if("elem"===e[o].type){var h=e[o].elem;c=Math.max(c,h.maxFontSize,h.height)}c+=2;var p=m(["pstrut"],[]);p.style.height=c+"em";var f=[],g=i,v=i;for(a=i,o=0;o<e.length;o++){if("kern"===e[o].type)a+=e[o].size;else{var b=e[o].elem,y=m([],[p,b]);y.style.top=-c-a-b.depth+"em",e[o].marginLeft&&(y.style.marginLeft=e[o].marginLeft),e[o].marginRight&&(y.style.marginRight=e[o].marginRight),f.push(y),a+=b.height+b.depth}g=Math.min(g,a),v=Math.max(v,a)}var x=m(["vlist"],f);x.style.height=v+"em";var w=void 0;if(g<0){var k=m(["vlist"],[]);k.style.height=-g+"em";var M=m(["vlist-s"],[new r["default"].symbolNode("\u200b")]);w=[m(["vlist-r"],[x,M]),m(["vlist-r"],[k])]}else w=[m(["vlist-r"],[x])];var S=m(["vlist-t"],w);return 2===w.length&&S.classes.push("vlist-t2"),S.height=v,S.depth=-g,S},y={"\\qquad":{size:"2em",className:"qquad"},"\\quad":{size:"1em",className:"quad"},"\\enspace":{size:"0.5em",className:"enspace"},"\\;":{size:"0.277778em",className:"thickspace"},"\\:":{size:"0.22222em",className:"mediumspace"},"\\,":{size:"0.16667em",className:"thinspace"},"\\!":{size:"-0.16667em",className:"negativethinspace"}},x={mathbf:{variant:"bold",fontName:"Main-Bold"},mathrm:{variant:"normal",fontName:"Main-Regular"},textit:{variant:"italic",fontName:"Main-Italic"},mathbb:{variant:"double-struck",fontName:"AMS-Regular"},mathcal:{variant:"script",fontName:"Caligraphic-Regular"},mathfrak:{variant:"fraktur",fontName:"Fraktur-Regular"},mathscr:{variant:"script",fontName:"Script-Regular"},mathsf:{variant:"sans-serif",fontName:"SansSerif-Regular"},mathtt:{variant:"monospace",fontName:"Typewriter-Regular"}};t.exports={fontMap:x,makeSymbol:u,mathsym:d,makeSpan:m,makeFragment:v,makeVList:b,makeOrd:p,prependChildren:g,spacingFunctions:y}},{"./domTree":39,"./fontMetrics":41,"./symbols":48,"./utils":51}],35:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}function r(e,t,n){for(var r=y(e,t,!1),i=t.sizeMultiplier/n.sizeMultiplier,a=0;a<r.length;a++){var o=h["default"].indexOf(r[a].classes,"sizing");o<0?Array.prototype.push.apply(r[a].classes,t.sizingClasses(n)):r[a].classes[o+1]==="reset-size"+t.size&&(r[a].classes[o+1]="reset-size"+n.size),r[a].height*=i,r[a].depth*=i}return l["default"].makeFragment(r)}var i=n(e("babel-runtime/core-js/json/stringify")),a=n(e("./ParseError")),o=n(e("./Style")),s=e("./buildCommon"),l=n(s),u=n(e("./delimiter")),d=n(e("./domTree")),c=n(e("./units")),h=n(e("./utils")),p=n(e("./stretchy")),f=function(e){return e instanceof d["default"].span&&"mspace"===e.classes[0]},m=function(e){return e&&"mbin"===e.classes[0]},g=function(e,t){return e?h["default"].contains(["mbin","mopen","mrel","mop","mpunct"],e.classes[0]):t},v=function(e,t){return e?h["default"].contains(["mrel","mclose","mpunct"],e.classes[0]):t},b=function(e,t){for(var n=t;n<e.length&&f(e[n]);)n++;return n===t?null:e.splice(t,n-t)},y=function(e,t,n){for(var r=[],i=0;i<e.length;i++){var a=e[i],o=C(a,t);o instanceof d["default"].documentFragment?Array.prototype.push.apply(r,o.children):r.push(o)}for(var u=0;u<r.length;u++){var c=b(r,u);if(c){if(!(u<r.length)){Array.prototype.push.apply(r,c);break}r[u]instanceof d["default"].symbolNode&&(r[u]=(0,s.makeSpan)([].concat(r[u].classes),[r[u]])),l["default"].prependChildren(r[u],c)}}for(var h=0;h<r.length;h++)m(r[h])&&(g(r[h-1],n)||v(r[h+1],n))&&(r[h].classes[0]="mord");for(var p=0;p<r.length;p++)if("\u0338"===r[p].value&&p+1<r.length){var f=r.slice(p,p+2);f[0].classes=["mainrm"],f[0].style.position="absolute",f[0].style.right="0";var y=r[p+1].classes,x=(0,s.makeSpan)(y,f);-1!==y.indexOf("mord")&&(x.style.paddingLeft="0.277771em"),x.style.position="relative",r.splice(p,2,x)}return r},x=function N(e){if(e instanceof d["default"].documentFragment){if(e.children.length)return N(e.children[e.children.length-1])}else if(h["default"].contains(["mord","mop","mbin","mrel","mopen","mclose","mpunct","minner"],e.classes[0]))return e.classes[0];return null},w=function(e,t){if(e.value.base){var n=e.value.base;return"op"===n.type?n.value.limits&&(t.style.size===o["default"].DISPLAY.size||n.value.alwaysHandleSupSub):"accent"===n.type?M(n.value.base):"horizBrace"===n.type?!e.value.sub===n.value.isOver:null}return!1},k=function E(e){return!!e&&("ordgroup"===e.type?1===e.value.length?E(e.value[0]):e:"color"===e.type?1===e.value.value.length?E(e.value.value[0]):e:"font"===e.type?E(e.value.body):e)},M=function(e){var t=k(e);return"mathord"===t.type||"textord"===t.type||"bin"===t.type||"rel"===t.type||"inner"===t.type||"open"===t.type||"close"===t.type||"punct"===t.type},S=function(e,t){var n=["nulldelimiter"].concat(e.baseSizingClasses());return(0,s.makeSpan)(t.concat(n))},z={mathord:function(e,t){return l["default"].makeOrd(e,t,"mathord")},textord:function(e,t){return l["default"].makeOrd(e,t,"textord")},bin:function(e,t){return l["default"].mathsym(e.value,e.mode,t,["mbin"])},rel:function(e,t){return l["default"].mathsym(e.value,e.mode,t,["mrel"])},open:function(e,t){return l["default"].mathsym(e.value,e.mode,t,["mopen"])},close:function(e,t){return l["default"].mathsym(e.value,e.mode,t,["mclose"])},inner:function(e,t){return l["default"].mathsym(e.value,e.mode,t,["minner"])},punct:function(e,t){return l["default"].mathsym(e.value,e.mode,t,["mpunct"])},ordgroup:function(e,t){return(0,s.makeSpan)(["mord"],y(e.value,t,!0),t)},text:function(e,t){for(var n=t.withFont(e.value.style),r=y(e.value.body,n,!0),i=0;i<r.length-1;i++)r[i].tryCombine(r[i+1])&&(r.splice(i+1,1),i--);return(0,s.makeSpan)(["mord","text"],r,n)},color:function(e,t){var n=y(e.value.value,t.withColor(e.value.color),!1);return new l["default"].makeFragment(n)},supsub:function(e,t){if(w(e,t))return z[e.value.base.type](e,t);var n=C(e.value.base,t),r=void 0,i=void 0,a=t.fontMetrics(),u=void 0,c=0,h=0;e.value.sup&&(u=t.havingStyle(t.style.sup()),r=C(e.value.sup,u,t),M(e.value.base)||(c=n.height-u.fontMetrics().supDrop*u.sizeMultiplier/t.sizeMultiplier)),e.value.sub&&(u=t.havingStyle(t.style.sub()),i=C(e.value.sub,u,t),M(e.value.base)||(h=n.depth+u.fontMetrics().subDrop*u.sizeMultiplier/t.sizeMultiplier));var p=void 0;p=t.style===o["default"].DISPLAY?a.sup1:t.style.cramped?a.sup3:a.sup2;var f=t.sizeMultiplier,m=.5/a.ptPerEm/f+"em",g=void 0;if(e.value.sup)if(e.value.sub){c=Math.max(c,p,r.depth+.25*a.xHeight),h=Math.max(h,a.sub2);var v=a.defaultRuleThickness;if(c-r.depth-(i.height-h)<4*v){h=4*v-(c-r.depth)+i.height;var b=.8*a.xHeight-(c-r.depth);b>0&&(c+=b,h-=b)}var y=[{type:"elem",elem:i,shift:h,marginRight:m},{type:"elem",elem:r,shift:-c,marginRight:m}];n instanceof d["default"].symbolNode&&(y[0].marginLeft=-n.italic+"em"),g=l["default"].makeVList(y,"individualShift",null,t)}else c=Math.max(c,p,r.depth+.25*a.xHeight),g=l["default"].makeVList([{type:"elem",elem:r,marginRight:m}],"shift",-c,t);else{h=Math.max(h,a.sub1,i.height-.8*a.xHeight);var k=[{type:"elem",elem:i,marginRight:m}];n instanceof d["default"].symbolNode&&(k[0].marginLeft=-n.italic+"em"),g=l["default"].makeVList(k,"shift",h,t)}var S=x(n)||"mord";return(0,s.makeSpan)([S],[n,(0,s.makeSpan)(["msupsub"],[g])],t)},genfrac:function(e,t){var n=t.style;"display"===e.value.size?n=o["default"].DISPLAY:"text"===e.value.size&&(n=o["default"].TEXT);var r=n.fracNum(),i=n.fracDen(),a=void 0;a=t.havingStyle(r);var d=C(e.value.numer,a,t);a=t.havingStyle(i);var c=C(e.value.denom,a,t),h=void 0,p=void 0,f=void 0;e.value.hasBarLine?(p=(h=A("frac-line",t)).height,f=h.height):(h=null,p=0,f=t.fontMetrics().defaultRuleThickness);var m=void 0,g=void 0,v=void 0;n.size===o["default"].DISPLAY.size?(m=t.fontMetrics().num1,g=p>0?3*f:7*f,v=t.fontMetrics().denom1):(p>0?(m=t.fontMetrics().num2,g=f):(m=t.fontMetrics().num3,g=3*f),v=t.fontMetrics().denom2);var b=void 0;if(0===p){var y=m-d.depth-(c.height-v);y<g&&(m+=.5*(g-y),v+=.5*(g-y)),b=l["default"].makeVList([{type:"elem",elem:c,shift:v},{type:"elem",elem:d,shift:-m}],"individualShift",null,t)}else{var x=t.fontMetrics().axisHeight;m-d.depth-(x+.5*p)<g&&(m+=g-(m-d.depth-(x+.5*p))),x-.5*p-(c.height-v)<g&&(v+=g-(x-.5*p-(c.height-v)));var w=-(x-.5*p);b=l["default"].makeVList([{type:"elem",elem:c,shift:v},{type:"elem",elem:h,shift:w},{type:"elem",elem:d,shift:-m}],"individualShift",null,t)}a=t.havingStyle(n),b.height*=a.sizeMultiplier/t.sizeMultiplier,b.depth*=a.sizeMultiplier/t.sizeMultiplier;var k=void 0;k=n.size===o["default"].DISPLAY.size?t.fontMetrics().delim1:t.fontMetrics().delim2;var M=void 0,z=void 0;return M=null==e.value.leftDelim?S(t,["mopen"]):u["default"].customSizedDelim(e.value.leftDelim,k,!0,t.havingStyle(n),e.mode,["mopen"]),z=null==e.value.rightDelim?S(t,["mclose"]):u["default"].customSizedDelim(e.value.rightDelim,k,!0,t.havingStyle(n),e.mode,["mclose"]),(0,s.makeSpan)(["mord"].concat(a.sizingClasses(t)),[M,(0,s.makeSpan)(["mfrac"],[b]),z],t)},array:function(e,t){var n=void 0,r=void 0,i=e.value.body.length,o=0,u=new Array(i),d=1/t.fontMetrics().ptPerEm,p=5*d,f=12*d,m=3*d,g=h["default"].deflt(e.value.arraystretch,1)*f,v=.7*g,b=.3*g,y=0;for(n=0;n<e.value.body.length;++n){var x=e.value.body[n],w=v,k=b;o<x.length&&(o=x.length);var M=new Array(x.length);for(r=0;r<x.length;++r){var S=C(x[r],t);k<S.depth&&(k=S.depth),w<S.height&&(w=S.height),M[r]=S}var z=0;e.value.rowGaps[n]&&(z=c["default"].calculateSize(e.value.rowGaps[n].value,t))>0&&(k<(z+=b)&&(k=z),z=0),e.value.addJot&&(k+=m),M.height=w,M.depth=k,y+=w,M.pos=y,y+=k+z,u[n]=M}var A=y/2+t.fontMetrics().axisHeight,T=e.value.cols||[],N=[],E=void 0,R=void 0;for(r=0,R=0;r<o||R<T.length;++r,++R){for(var L=T[R]||{},O=!0;"separator"===L.type;){if(O||((E=(0,s.makeSpan)(["arraycolsep"],[])).style.width=t.fontMetrics().doubleRuleSep+"em",N.push(E)),"|"!==L.separator)throw new a["default"]("Invalid separator type: "+L.separator);var q=(0,s.makeSpan)(["vertical-separator"],[]);q.style.height=y+"em",q.style.verticalAlign=-(y-A)+"em",N.push(q),L=T[++R]||{},O=!1}if(!(r>=o)){var _=void 0;(r>0||e.value.hskipBeforeAndAfter)&&0!==(_=h["default"].deflt(L.pregap,p))&&((E=(0,s.makeSpan)(["arraycolsep"],[])).style.width=_+"em",N.push(E));var D=[];for(n=0;n<i;++n){var B=u[n],I=B[r];if(I){var H=B.pos-A;I.depth=B.depth,I.height=B.height,D.push({type:"elem",elem:I,shift:H})}}D=l["default"].makeVList(D,"individualShift",null,t),D=(0,s.makeSpan)(["col-align-"+(L.align||"c")],[D]),N.push(D),(r<o-1||e.value.hskipBeforeAndAfter)&&0!==(_=h["default"].deflt(L.postgap,p))&&((E=(0,s.makeSpan)(["arraycolsep"],[])).style.width=_+"em",N.push(E))}}return u=(0,s.makeSpan)(["mtable"],N),(0,s.makeSpan)(["mord"],[u],t)},spacing:function(e,t){return"\\ "===e.value||"\\space"===e.value||" "===e.value||"~"===e.value?"text"===e.mode?l["default"].makeOrd(e,t,"textord"):(0,s.makeSpan)(["mspace"],[l["default"].mathsym(e.value,e.mode,t)],t):(0,s.makeSpan)(["mspace",l["default"].spacingFunctions[e.value].className],[],t)},llap:function(e,t){var n=(0,s.makeSpan)(["inner"],[C(e.value.body,t)]),r=(0,s.makeSpan)(["fix"],[]);return(0,s.makeSpan)(["mord","llap"],[n,r],t)},rlap:function(e,t){var n=(0,s.makeSpan)(["inner"],[C(e.value.body,t)]),r=(0,s.makeSpan)(["fix"],[]);return(0,s.makeSpan)(["mord","rlap"],[n,r],t)},op:function(e,t){var n=void 0,r=void 0,i=!1;"supsub"===e.type&&(n=e.value.sup,r=e.value.sub,e=e.value.base,i=!0);var a=t.style,u=["\\smallint"],c=!1;a.size===o["default"].DISPLAY.size&&e.value.symbol&&!h["default"].contains(u,e.value.body)&&(c=!0);var p=void 0;if(e.value.symbol){var f=c?"Size2-Regular":"Size1-Regular";p=l["default"].makeSymbol(e.value.body,f,"math",t,["mop","op-symbol",c?"large-op":"small-op"])}else if(e.value.value){var m=y(e.value.value,t,!0);1===m.length&&m[0]instanceof d["default"].symbolNode?(p=m[0]).classes[0]="mop":p=(0,s.makeSpan)(["mop"],m,t)}else{for(var g=[],v=1;v<e.value.body.length;v++)g.push(l["default"].mathsym(e.value.body[v],e.mode));p=(0,s.makeSpan)(["mop"],g,t)}var b=0,x=0;if(p instanceof d["default"].symbolNode&&(b=(p.height-p.depth)/2-t.fontMetrics().axisHeight,x=p.italic),i){p=(0,s.makeSpan)([],[p]);var w=void 0,k=void 0,M=void 0,S=void 0,z=void 0;n&&(z=t.havingStyle(a.sup()),w=C(n,z,t),k=Math.max(t.fontMetrics().bigOpSpacing1,t.fontMetrics().bigOpSpacing3-w.depth)),r&&(z=t.havingStyle(a.sub()),M=C(r,z,t),S=Math.max(t.fontMetrics().bigOpSpacing2,t.fontMetrics().bigOpSpacing4-M.height));var A=void 0,T=void 0,N=void 0;if(n)if(r){if(!n&&!r)return p;N=t.fontMetrics().bigOpSpacing5+M.height+M.depth+S+p.depth+b,A=l["default"].makeVList([{type:"kern",size:t.fontMetrics().bigOpSpacing5},{type:"elem",elem:M,marginLeft:-x+"em"},{type:"kern",size:S},{type:"elem",elem:p},{type:"kern",size:k},{type:"elem",elem:w,marginLeft:x+"em"},{type:"kern",size:t.fontMetrics().bigOpSpacing5}],"bottom",N,t)}else N=p.depth+b,A=l["default"].makeVList([{type:"elem",elem:p},{type:"kern",size:k},{type:"elem",elem:w,marginLeft:x+"em"},{type:"kern",size:t.fontMetrics().bigOpSpacing5}],"bottom",N,t);else T=p.height-b,A=l["default"].makeVList([{type:"kern",size:t.fontMetrics().bigOpSpacing5},{type:"elem",elem:M,marginLeft:-x+"em"},{type:"kern",size:S},{type:"elem",elem:p}],"top",T,t);return(0,s.makeSpan)(["mop","op-limits"],[A],t)}return b&&(p.style.position="relative",p.style.top=b+"em"),p},mod:function(e,t){var n=[];if("bmod"===e.value.modType?(t.style.isTight()||n.push((0,s.makeSpan)(["mspace","negativemediumspace"],[],t)),n.push((0,s.makeSpan)(["mspace","thickspace"],[],t))):t.style.size===o["default"].DISPLAY.size?n.push((0,s.makeSpan)(["mspace","quad"],[],t)):"mod"===e.value.modType?n.push((0,s.makeSpan)(["mspace","twelvemuspace"],[],t)):n.push((0,s.makeSpan)(["mspace","eightmuspace"],[],t)),"pod"!==e.value.modType&&"pmod"!==e.value.modType||n.push(l["default"].mathsym("(",e.mode)),"pod"!==e.value.modType){var r=[l["default"].mathsym("m",e.mode),l["default"].mathsym("o",e.mode),l["default"].mathsym("d",e.mode)];"bmod"===e.value.modType?(n.push((0,s.makeSpan)(["mbin"],r,t)),n.push((0,s.makeSpan)(["mspace","thickspace"],[],t)),t.style.isTight()||n.push((0,s.makeSpan)(["mspace","negativemediumspace"],[],t))):(Array.prototype.push.apply(n,r),n.push((0,s.makeSpan)(["mspace","sixmuspace"],[],t)))}return e.value.value&&Array.prototype.push.apply(n,y(e.value.value,t,!1)),"pod"!==e.value.modType&&"pmod"!==e.value.modType||n.push(l["default"].mathsym(")",e.mode)),l["default"].makeFragment(n)},katex:function(e,t){var n=(0,s.makeSpan)(["k"],[l["default"].mathsym("K",e.mode)],t),r=(0,s.makeSpan)(["a"],[l["default"].mathsym("A",e.mode)],t);r.height=.75*(r.height+.2),r.depth=.75*(r.height-.2);var i=(0,s.makeSpan)(["t"],[l["default"].mathsym("T",e.mode)],t),a=(0,s.makeSpan)(["e"],[l["default"].mathsym("E",e.mode)],t);a.height=a.height-.2155,a.depth=a.depth+.2155;var o=(0,s.makeSpan)(["x"],[l["default"].mathsym("X",e.mode)],t);return(0,s.makeSpan)(["mord","katex-logo"],[n,r,i,a,o],t)}},A=function(e,t,n){var r=(0,s.makeSpan)([e],[],t);return r.height=n||t.fontMetrics().defaultRuleThickness,r.style.borderBottomWidth=r.height+"em",r.maxFontSize=1,r};z.overline=function(e,t){var n=C(e.value.body,t.havingCrampedStyle()),r=A("overline-line",t),i=l["default"].makeVList([{type:"elem",elem:n},{type:"kern",size:3*r.height},{type:"elem",elem:r},{type:"kern",size:r.height}],"firstBaseline",null,t);return(0,s.makeSpan)(["mord","overline"],[i],t)},z.underline=function(e,t){var n=C(e.value.body,t),r=A("underline-line",t),i=l["default"].makeVList([{type:"kern",size:r.height},{type:"elem",elem:r},{type:"kern",size:3*r.height},{type:"elem",elem:n}],"top",n.height,t);return(0,s.makeSpan)(["mord","underline"],[i],t)},z.sqrt=function(e,t){var n=C(e.value.body,t.havingCrampedStyle());n instanceof d["default"].documentFragment&&(n=(0,s.makeSpan)([],[n],t));var r=t.fontMetrics().defaultRuleThickness,i=r;t.style.id<o["default"].TEXT.id&&(i=t.fontMetrics().xHeight);var a=r+i/4,c=(n.height+n.depth+a+r)*t.sizeMultiplier,h=u["default"].customSizedDelim("\\surd",c,!1,t,e.mode),p=t.fontMetrics().sqrtRuleThickness*h.sizeMultiplier,f=h.height-p;f>n.height+n.depth+a&&(a=(a+f-n.height-n.depth)/2);var m=h.height-n.height-a-p,g=void 0;if(0===n.height&&0===n.depth?g=(0,s.makeSpan)():(n.style.paddingLeft=h.surdWidth+"em",(g=l["default"].makeVList([{type:"elem",elem:n},{type:"kern",size:-(n.height+m)},{type:"elem",elem:h},{type:"kern",size:p}],"firstBaseline",null,t)).children[0].children[0].classes.push("svg-align")),e.value.index){var v=t.havingStyle(o["default"].SCRIPTSCRIPT),b=C(e.value.index,v,t),y=.6*(g.height-g.depth),x=l["default"].makeVList([{type:"elem",elem:b}],"shift",-y,t),w=(0,s.makeSpan)(["root"],[x]);return(0,s.makeSpan)(["mord","sqrt"],[w,g],t)}return(0,s.makeSpan)(["mord","sqrt"],[g],t)},z.sizing=function(e,t){var n=t.havingSize(e.value.size);return r(e.value.value,n,t)},z.styling=function(e,t){var n={display:o["default"].DISPLAY,text:o["default"].TEXT,script:o["default"].SCRIPT,scriptscript:o["default"].SCRIPTSCRIPT}[e.value.style],i=t.havingStyle(n);return r(e.value.value,i,t)},z.font=function(e,t){var n=e.value.font;return C(e.value.body,t.withFont(n))},z.delimsizing=function(e,t){var n=e.value.value;return"."===n?(0,s.makeSpan)([e.value.mclass]):u["default"].sizedDelim(n,e.value.size,t,e.mode,[e.value.mclass])},z.leftright=function(e,t){for(var n=y(e.value.body,t,!0),r=0,i=0,a=!1,o=0;o<n.length;o++)n[o].isMiddle?a=!0:(r=Math.max(n[o].height,r),i=Math.max(n[o].depth,i));r*=t.sizeMultiplier,i*=t.sizeMultiplier;var d=void 0;if(d="."===e.value.left?S(t,["mopen"]):u["default"].leftRightDelim(e.value.left,r,i,t,e.mode,["mopen"]),n.unshift(d),a)for(var c=1;c<n.length;c++){var h=n[c];if(h.isMiddle){n[c]=u["default"].leftRightDelim(h.isMiddle.value,r,i,h.isMiddle.options,e.mode,[]);var p=b(h.children,0);p&&l["default"].prependChildren(n[c],p)}}var f=void 0;return f="."===e.value.right?S(t,["mclose"]):u["default"].leftRightDelim(e.value.right,r,i,t,e.mode,["mclose"]),n.push(f),(0,s.makeSpan)(["minner"],n,t)},z.middle=function(e,t){var n=void 0;return"."===e.value.value?n=S(t,[]):(n=u["default"].sizedDelim(e.value.value,1,t,e.mode,[])).isMiddle={value:e.value.value,options:t},n},z.rule=function(e,t){var n=(0,s.makeSpan)(["mord","rule"],[],t),r=0;e.value.shift&&(r=c["default"].calculateSize(e.value.shift,t));var i=c["default"].calculateSize(e.value.width,t),a=c["default"].calculateSize(e.value.height,t);return n.style.borderRightWidth=i+"em",n.style.borderTopWidth=a+"em",n.style.bottom=r+"em",n.width=i,n.height=a+r,n.depth=-r,n.maxFontSize=1.125*a*t.sizeMultiplier,n},z.kern=function(e,t){var n=(0,s.makeSpan)(["mord","rule"],[],t);if(e.value.dimension){var r=c["default"].calculateSize(e.value.dimension,t);n.style.marginLeft=r+"em"}return n},z.accent=function(e,t){var n=e.value.base,r=void 0;if("supsub"===e.type){var i=e;n=(e=i.value.base).value.base,i.value.base=n,r=C(i,t)}var a=C(n,t.havingCrampedStyle()),o=0;if(e.value.isShifty&&M(n)){var u=k(n);o=C(u,t.havingCrampedStyle()).skew}var d=Math.min(a.height,t.fontMetrics().xHeight),c=void 0;if(e.value.isStretchy){c=p["default"].svgSpan(e,t);var h=(c=l["default"].makeVList([{type:"elem",elem:a},{type:"elem",elem:c}],"firstBaseline",null,t)).children[0].children[0].children[1];h.classes.push("svg-align"),o>0&&(h.style.width="calc(100% - "+2*o+"em)",h.style.marginLeft=2*o+"em")}else{var f=l["default"].makeSymbol(e.value.label,"Main-Regular",e.mode,t);f.italic=0;var m=null;"\\vec"===e.value.label?m="accent-vec":"\\H"===e.value.label&&(m="accent-hungarian"),c=(0,s.makeSpan)([],[f]),(c=(0,s.makeSpan)(["accent-body",m],[c])).style.marginLeft=2*o+"em",c=l["default"].makeVList([{type:"elem",elem:a},{type:"kern",size:-d},{type:"elem",elem:c}],"firstBaseline",null,t)}var g=(0,s.makeSpan)(["mord","accent"],[c],t);return r?(r.children[0]=g,r.height=Math.max(g.height,r.height),r.classes[0]="mord",r):g},z.horizBrace=function(e,t){var n=t.style,r="supsub"===e.type,i=void 0,a=void 0;r&&(e.value.sup?(a=t.havingStyle(n.sup()),i=C(e.value.sup,a,t)):(a=t.havingStyle(n.sub()),i=C(e.value.sub,a,t)),e=e.value.base);var u=C(e.value.base,t.havingBaseStyle(o["default"].DISPLAY)),d=p["default"].svgSpan(e,t),c=void 0;if(e.value.isOver?(c=l["default"].makeVList([{type:"elem",elem:u},{type:"kern",size:.1},{type:"elem",elem:d}],"firstBaseline",null,t)).children[0].children[0].children[1].classes.push("svg-align"):(c=l["default"].makeVList([{type:"elem",elem:d},{type:"kern",size:.1},{type:"elem",elem:u}],"bottom",u.depth+.1+d.height,t)).children[0].children[0].children[0].classes.push("svg-align"),r){var h=(0,s.makeSpan)(["mord",e.value.isOver?"mover":"munder"],[c],t);c=e.value.isOver?l["default"].makeVList([{type:"elem",elem:h},{type:"kern",size:.2},{type:"elem",elem:i}],"firstBaseline",null,t):l["default"].makeVList([{type:"elem",elem:i},{type:"kern",size:.2},{type:"elem",elem:h}],"bottom",h.depth+.2+i.height,t)}return(0,s.makeSpan)(["mord",e.value.isOver?"mover":"munder"],[c],t)},z.accentUnder=function(e,t){var n=C(e.value.body,t),r=p["default"].svgSpan(e,t),i=/tilde/.test(e.value.label)?.12:0,a=l["default"].makeVList([{type:"elem",elem:r},{type:"kern",size:i},{type:"elem",elem:n}],"bottom",r.height+i,t);return a.children[0].children[0].children[0].classes.push("svg-align"),(0,s.makeSpan)(["mord","accentunder"],[a],t)},z.enclose=function(e,t){var n=C(e.value.body,t),r=e.value.label.substr(1),i=t.sizeMultiplier,a=void 0,o=0,u=0;if("sout"===r)(a=(0,s.makeSpan)(["stretchy","sout"])).height=t.fontMetrics().defaultRuleThickness/i,u=-.5*t.fontMetrics().xHeight;else{n.classes.push("fbox"===r?"boxpad":"cancel-pad");var d=M(e.value.body);o="fbox"===r?.34:d?.2:0,u=n.depth+o,a=p["default"].encloseSpan(n,r,o,t)}var c=l["default"].makeVList([{type:"elem",elem:n,shift:0},{type:"elem",elem:a,shift:u}],"individualShift",null,t);return"fbox"!==r&&c.children[0].children[0].children[1].classes.push("svg-align"),/cancel/.test(r)?(0,s.makeSpan)(["mord","cancel-lap"],[c],t):(0,s.makeSpan)(["mord"],[c],t)},z.xArrow=function(e,t){var n=t.style,r=t.havingStyle(n.sup()),i=C(e.value.body,r,t);i.classes.push("x-arrow-pad");var a=void 0;e.value.below&&(r=t.havingStyle(n.sub()),(a=C(e.value.below,r,t)).classes.push("x-arrow-pad"));var o=p["default"].svgSpan(e,t),u=-t.fontMetrics().axisHeight+o.depth,d=-t.fontMetrics().axisHeight-o.height-.111,c=void 0;if(e.value.below){var h=-t.fontMetrics().axisHeight+a.height+o.height+.111;c=l["default"].makeVList([{type:"elem",elem:i,shift:d},{type:"elem",elem:o,shift:u},{type:"elem",elem:a,shift:h}],"individualShift",null,t)}else c=l["default"].makeVList([{type:"elem",elem:i,shift:d},{type:"elem",elem:o,shift:u}],"individualShift",null,t);return c.children[0].children[0].children[1].classes.push("svg-align"),(0,s.makeSpan)(["mrel","x-arrow"],[c],t)},z.phantom=function(e,t){var n=y(e.value.value,t.withPhantom(),!1);return new l["default"].makeFragment(n)},z.mclass=function(e,t){var n=y(e.value.value,t,!0);return(0,s.makeSpan)([e.value.mclass],n,t)};var C=function(e,t,n){if(!e)return(0,s.makeSpan)();if(z[e.type]){var r=z[e.type](e,t);if(n&&t.size!==n.size){r=(0,s.makeSpan)(t.sizingClasses(n),[r],t);var i=t.sizeMultiplier/n.sizeMultiplier;r.height*=i,r.depth*=i}return r}throw new a["default"]("Got group of unknown type: '"+e.type+"'")},T=function(e,t){e=JSON.parse((0,i["default"])(e));var n=y(e,t,!0),r=(0,s.makeSpan)(["base"],n,t),a=(0,s.makeSpan)(["strut"]),o=(0,s.makeSpan)(["strut","bottom"]);a.style.height=r.height+"em",o.style.height=r.height+r.depth+"em",o.style.verticalAlign=-r.depth+"em";var l=(0,s.makeSpan)(["katex-html"],[a,o,r]);return l.setAttribute("aria-hidden","true"),l};t.exports=T},{"./ParseError":29,"./Style":33,"./buildCommon":34,"./delimiter":38,"./domTree":39,"./stretchy":47,"./units":50,"./utils":51,"babel-runtime/core-js/json/stringify":2}],36:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=e("./buildCommon"),i=n(r),a=n(e("./fontMetrics")),o=n(e("./mathMLTree")),s=n(e("./ParseError")),l=n(e("./Style")),u=n(e("./symbols")),d=n(e("./utils")),c=n(e("./stretchy")),h=function(e,t){return u["default"][t][e]&&u["default"][t][e].replace&&(e=u["default"][t][e].replace),new o["default"].TextNode(e)},p=function(e,t){var n=t.font;if(!n)return null;var i=e.mode;if("mathit"===n)return"italic";var o=e.value;if(d["default"].contains(["\\imath","\\jmath"],o))return null;u["default"][i][o]&&u["default"][i][o].replace&&(o=u["default"][i][o].replace);var s=r.fontMap[n].fontName;return a["default"].getCharacterMetrics(o,s)?r.fontMap[t.font].variant:null},f={},m={mi:"italic",mn:"normal",mtext:"normal"};f.mathord=function(e,t){var n=new o["default"].MathNode("mi",[h(e.value,e.mode)]),r=p(e,t)||"italic";return r!==m[n.type]&&n.setAttribute("mathvariant",r),n},f.textord=function(e,t){var n=h(e.value,e.mode),r=p(e,t)||"normal",i=void 0;return i="text"===e.mode?new o["default"].MathNode("mtext",[n]):/[0-9]/.test(e.value)?new o["default"].MathNode("mn",[n]):"\\prime"===e.value?new o["default"].MathNode("mo",[n]):new o["default"].MathNode("mi",[n]),r!==m[i.type]&&i.setAttribute("mathvariant",r),i},f.bin=function(e){return new o["default"].MathNode("mo",[h(e.value,e.mode)])},f.rel=function(e){return new o["default"].MathNode("mo",[h(e.value,e.mode)])},f.open=function(e){return new o["default"].MathNode("mo",[h(e.value,e.mode)])},f.close=function(e){return new o["default"].MathNode("mo",[h(e.value,e.mode)])},f.inner=function(e){return new o["default"].MathNode("mo",[h(e.value,e.mode)])},f.punct=function(e){var t=new o["default"].MathNode("mo",[h(e.value,e.mode)]);return t.setAttribute("separator","true"),t},f.ordgroup=function(e,t){var n=g(e.value,t);return new o["default"].MathNode("mrow",n)},f.text=function(e,t){for(var n=e.value.body,r=[],i=null,a=0;a<n.length;a++){var s=v(n[a],t);"mtext"===s.type&&null!=i?Array.prototype.push.apply(i.children,s.children):(r.push(s),"mtext"===s.type&&(i=s))}return 1===r.length?r[0]:new o["default"].MathNode("mrow",r)},f.color=function(e,t){var n=g(e.value.value,t),r=new o["default"].MathNode("mstyle",n);return r.setAttribute("mathcolor",e.value.color),r},f.supsub=function(e,t){var n=!1,r=void 0;e.value.base&&"horizBrace"===e.value.base.value.type&&!!e.value.sup===e.value.base.value.isOver&&(n=!0,r=e.value.base.value.isOver);var i=!0,a=[v(e.value.base,t,i)];e.value.sub&&a.push(v(e.value.sub,t,i)),
+e.value.sup&&a.push(v(e.value.sup,t,i));var s=void 0;if(n)s=r?"mover":"munder";else if(e.value.sub)if(e.value.sup){var u=e.value.base;s=u&&u.value.limits&&t.style===l["default"].DISPLAY?"munderover":"msubsup"}else s="msub";else s="msup";return new o["default"].MathNode(s,a)},f.genfrac=function(e,t){var n=new o["default"].MathNode("mfrac",[v(e.value.numer,t),v(e.value.denom,t)]);if(e.value.hasBarLine||n.setAttribute("linethickness","0px"),null!=e.value.leftDelim||null!=e.value.rightDelim){var r=[];if(null!=e.value.leftDelim){var i=new o["default"].MathNode("mo",[new o["default"].TextNode(e.value.leftDelim)]);i.setAttribute("fence","true"),r.push(i)}if(r.push(n),null!=e.value.rightDelim){var a=new o["default"].MathNode("mo",[new o["default"].TextNode(e.value.rightDelim)]);a.setAttribute("fence","true"),r.push(a)}return new o["default"].MathNode("mrow",r)}return n},f.array=function(e,t){return new o["default"].MathNode("mtable",e.value.body.map(function(e){return new o["default"].MathNode("mtr",e.map(function(e){return new o["default"].MathNode("mtd",[v(e,t)])}))}))},f.sqrt=function(e,t){return e.value.index?new o["default"].MathNode("mroot",[v(e.value.body,t),v(e.value.index,t)]):new o["default"].MathNode("msqrt",[v(e.value.body,t)])},f.leftright=function(e,t){var n=g(e.value.body,t);if("."!==e.value.left){var r=new o["default"].MathNode("mo",[h(e.value.left,e.mode)]);r.setAttribute("fence","true"),n.unshift(r)}if("."!==e.value.right){var i=new o["default"].MathNode("mo",[h(e.value.right,e.mode)]);i.setAttribute("fence","true"),n.push(i)}return new o["default"].MathNode("mrow",n)},f.middle=function(e){var t=new o["default"].MathNode("mo",[h(e.value.middle,e.mode)]);return t.setAttribute("fence","true"),t},f.accent=function(e,t){var n=void 0;n=e.value.isStretchy?c["default"].mathMLnode(e.value.label):new o["default"].MathNode("mo",[h(e.value.label,e.mode)]);var r=new o["default"].MathNode("mover",[v(e.value.base,t),n]);return r.setAttribute("accent","true"),r},f.spacing=function(e){var t=void 0;return"\\ "===e.value||"\\space"===e.value||" "===e.value||"~"===e.value?t=new o["default"].MathNode("mtext",[new o["default"].TextNode("\xa0")]):(t=new o["default"].MathNode("mspace")).setAttribute("width",i["default"].spacingFunctions[e.value].size),t},f.op=function(e,t){return e.value.symbol?new o["default"].MathNode("mo",[h(e.value.body,e.mode)]):e.value.value?new o["default"].MathNode("mo",g(e.value.value,t)):new o["default"].MathNode("mi",[new o["default"].TextNode(e.value.body.slice(1))])},f.mod=function(e,t){var n=[];if("pod"!==e.value.modType&&"pmod"!==e.value.modType||n.push(new o["default"].MathNode("mo",[h("(",e.mode)])),"pod"!==e.value.modType&&n.push(new o["default"].MathNode("mo",[h("mod",e.mode)])),e.value.value){var r=new o["default"].MathNode("mspace");r.setAttribute("width","0.333333em"),n.push(r),n=n.concat(g(e.value.value,t))}return"pod"!==e.value.modType&&"pmod"!==e.value.modType||n.push(new o["default"].MathNode("mo",[h(")",e.mode)])),new o["default"].MathNode("mo",n)},f.katex=function(){return new o["default"].MathNode("mtext",[new o["default"].TextNode("KaTeX")])},f.font=function(e,t){var n=e.value.font;return v(e.value.body,t.withFont(n))},f.delimsizing=function(e){var t=[];"."!==e.value.value&&t.push(h(e.value.value,e.mode));var n=new o["default"].MathNode("mo",t);return"mopen"===e.value.mclass||"mclose"===e.value.mclass?n.setAttribute("fence","true"):n.setAttribute("fence","false"),n},f.styling=function(e,t){var n={display:l["default"].DISPLAY,text:l["default"].TEXT,script:l["default"].SCRIPT,scriptscript:l["default"].SCRIPTSCRIPT}[e.value.style],r=t.havingStyle(n),i=g(e.value.value,r),a=new o["default"].MathNode("mstyle",i),s={display:["0","true"],text:["0","false"],script:["1","false"],scriptscript:["2","false"]}[e.value.style];return a.setAttribute("scriptlevel",s[0]),a.setAttribute("displaystyle",s[1]),a},f.sizing=function(e,t){var n=t.havingSize(e.value.size),r=g(e.value.value,n),i=new o["default"].MathNode("mstyle",r);return i.setAttribute("mathsize",n.sizeMultiplier+"em"),i},f.overline=function(e,t){var n=new o["default"].MathNode("mo",[new o["default"].TextNode("\u203e")]);n.setAttribute("stretchy","true");var r=new o["default"].MathNode("mover",[v(e.value.body,t),n]);return r.setAttribute("accent","true"),r},f.underline=function(e,t){var n=new o["default"].MathNode("mo",[new o["default"].TextNode("\u203e")]);n.setAttribute("stretchy","true");var r=new o["default"].MathNode("munder",[v(e.value.body,t),n]);return r.setAttribute("accentunder","true"),r},f.accentUnder=function(e,t){var n=c["default"].mathMLnode(e.value.label),r=new o["default"].MathNode("munder",[v(e.value.body,t),n]);return r.setAttribute("accentunder","true"),r},f.enclose=function(e,t){var n=new o["default"].MathNode("menclose",[v(e.value.body,t)]),r="";switch(e.value.label){case"\\bcancel":r="downdiagonalstrike";break;case"\\sout":r="horizontalstrike";break;case"\\fbox":r="box";break;default:r="updiagonalstrike"}return n.setAttribute("notation",r),n},f.horizBrace=function(e,t){var n=c["default"].mathMLnode(e.value.label);return new o["default"].MathNode(e.value.isOver?"mover":"munder",[v(e.value.base,t),n])},f.xArrow=function(e,t){var n=c["default"].mathMLnode(e.value.label),r=void 0,i=void 0;if(e.value.body){var a=v(e.value.body,t);e.value.below?(i=v(e.value.below,t),r=new o["default"].MathNode("munderover",[n,i,a])):r=new o["default"].MathNode("mover",[n,a])}else e.value.below?(i=v(e.value.below,t),r=new o["default"].MathNode("munder",[n,i])):r=new o["default"].MathNode("mover",[n]);return r},f.rule=function(){return new o["default"].MathNode("mrow")},f.kern=function(){return new o["default"].MathNode("mrow")},f.llap=function(e,t){var n=new o["default"].MathNode("mpadded",[v(e.value.body,t)]);return n.setAttribute("lspace","-1width"),n.setAttribute("width","0px"),n},f.rlap=function(e,t){var n=new o["default"].MathNode("mpadded",[v(e.value.body,t)]);return n.setAttribute("width","0px"),n},f.phantom=function(e,t){var n=g(e.value.value,t);return new o["default"].MathNode("mphantom",n)},f.mclass=function(e,t){var n=g(e.value.value,t);return new o["default"].MathNode("mstyle",n)};var g=function(e,t){for(var n=[],r=0;r<e.length;r++){var i=e[r];n.push(v(i,t))}return n},v=function(e,t){var n=arguments.length>2&&arguments[2]!==undefined&&arguments[2];if(!e)return new o["default"].MathNode("mrow");if(f[e.type]){var r=f[e.type](e,t);return n&&"mrow"===r.type&&1===r.children.length?r.children[0]:r}throw new s["default"]("Got group of unknown type: '"+e.type+"'")},b=function(e,t,n){var i=g(e,n),a=new o["default"].MathNode("mrow",i),s=new o["default"].MathNode("annotation",[new o["default"].TextNode(t)]);s.setAttribute("encoding","application/x-tex");var l=new o["default"].MathNode("semantics",[a,s]),u=new o["default"].MathNode("math",[l]);return(0,r.makeSpan)(["katex-mathml"],[u])};t.exports=b},{"./ParseError":29,"./Style":33,"./buildCommon":34,"./fontMetrics":41,"./mathMLTree":45,"./stretchy":47,"./symbols":48,"./utils":51}],37:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("./buildHTML")),i=n(e("./buildMathML")),a=e("./buildCommon"),o=n(e("./Options")),s=n(e("./Settings")),l=n(e("./Style")),u=function(e,t,n){n=n||new s["default"]({});var u=l["default"].TEXT;n.displayMode&&(u=l["default"].DISPLAY);var d=new o["default"]({style:u}),c=(0,i["default"])(e,t,d),h=(0,r["default"])(e,d),p=(0,a.makeSpan)(["katex"],[c,h]);return n.displayMode?(0,a.makeSpan)(["katex-display"],[p]):p};t.exports=u},{"./Options":28,"./Settings":32,"./Style":33,"./buildCommon":34,"./buildHTML":35,"./buildMathML":36}],38:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("./ParseError")),i=n(e("./Style")),a=e("./buildCommon"),o=n(a),s=n(e("./fontMetrics")),l=n(e("./symbols")),u=n(e("./utils")),d=function(e,t){return l["default"].math[e]&&l["default"].math[e].replace?s["default"].getCharacterMetrics(l["default"].math[e].replace,t):s["default"].getCharacterMetrics(e,t)},c=function(e,t,n,r){var i=n.havingBaseStyle(t),o=(0,a.makeSpan)((r||[]).concat(i.sizingClasses(n)),[e],n);return o.delimSizeMultiplier=i.sizeMultiplier/n.sizeMultiplier,o.height*=o.delimSizeMultiplier,o.depth*=o.delimSizeMultiplier,o.maxFontSize=i.sizeMultiplier,o},h=function(e,t,n){var r=t.havingBaseStyle(n),i=(1-t.sizeMultiplier/r.sizeMultiplier)*t.fontMetrics().axisHeight;e.classes.push("delimcenter"),e.style.top=i+"em",e.height-=i,e.depth+=i},p=function(e,t,n,r,i,a){var s=o["default"].makeSymbol(e,"Main-Regular",i,r),l=c(s,t,r,a);return n&&h(l,r,t),l},f=function(e,t,n,r){return o["default"].makeSymbol(e,"Size"+t+"-Regular",n,r)},m=function(e,t,n,r,o,s){var l=f(e,t,o,r),u=c((0,a.makeSpan)(["delimsizing","size"+t],[l],r),i["default"].TEXT,r,s);return n&&h(u,r,i["default"].TEXT),u},g=function(e,t,n){var r=void 0;return"Size1-Regular"===t?r="delim-size1":"Size4-Regular"===t&&(r="delim-size4"),{type:"elem",elem:(0,a.makeSpan)(["delimsizinginner",r],[(0,a.makeSpan)([],[o["default"].makeSymbol(e,t,n)])])}},v=function(e,t,n,r,s,l){var u=void 0,h=void 0,p=void 0,f=void 0;u=p=f=e,h=null;var m="Size1-Regular";"\\uparrow"===e?p=f="\u23d0":"\\Uparrow"===e?p=f="\u2016":"\\downarrow"===e?u=p="\u23d0":"\\Downarrow"===e?u=p="\u2016":"\\updownarrow"===e?(u="\\uparrow",p="\u23d0",f="\\downarrow"):"\\Updownarrow"===e?(u="\\Uparrow",p="\u2016",f="\\Downarrow"):"["===e||"\\lbrack"===e?(u="\u23a1",p="\u23a2",f="\u23a3",m="Size4-Regular"):"]"===e||"\\rbrack"===e?(u="\u23a4",p="\u23a5",f="\u23a6",m="Size4-Regular"):"\\lfloor"===e?(p=u="\u23a2",f="\u23a3",m="Size4-Regular"):"\\lceil"===e?(u="\u23a1",p=f="\u23a2",m="Size4-Regular"):"\\rfloor"===e?(p=u="\u23a5",f="\u23a6",m="Size4-Regular"):"\\rceil"===e?(u="\u23a4",p=f="\u23a5",m="Size4-Regular"):"("===e?(u="\u239b",p="\u239c",f="\u239d",m="Size4-Regular"):")"===e?(u="\u239e",p="\u239f",f="\u23a0",m="Size4-Regular"):"\\{"===e||"\\lbrace"===e?(u="\u23a7",h="\u23a8",f="\u23a9",p="\u23aa",m="Size4-Regular"):"\\}"===e||"\\rbrace"===e?(u="\u23ab",h="\u23ac",f="\u23ad",p="\u23aa",m="Size4-Regular"):"\\lgroup"===e?(u="\u23a7",f="\u23a9",p="\u23aa",m="Size4-Regular"):"\\rgroup"===e?(u="\u23ab",f="\u23ad",p="\u23aa",m="Size4-Regular"):"\\lmoustache"===e?(u="\u23a7",f="\u23ad",p="\u23aa",m="Size4-Regular"):"\\rmoustache"===e&&(u="\u23ab",f="\u23a9",p="\u23aa",m="Size4-Regular");var v=d(u,m),b=v.height+v.depth,y=d(p,m),x=y.height+y.depth,w=d(f,m),k=w.height+w.depth,M=0,S=1;if(null!==h){var z=d(h,m);M=z.height+z.depth,S=2}var A=b+k+M,C=Math.ceil((t-A)/(S*x)),T=A+C*S*x,N=r.fontMetrics().axisHeight;n&&(N*=r.sizeMultiplier);var E=T/2-N,R=[];if(R.push(g(f,m,s)),null===h)for(var L=0;L<C;L++)R.push(g(p,m,s));else{for(var O=0;O<C;O++)R.push(g(p,m,s));R.push(g(h,m,s));for(var q=0;q<C;q++)R.push(g(p,m,s))}R.push(g(u,m,s));var _=r.havingBaseStyle(i["default"].TEXT),D=o["default"].makeVList(R,"bottom",E,_);return c((0,a.makeSpan)(["delimsizing","mult"],[D],_),i["default"].TEXT,r,l)},b={main:"<svg viewBox='0 0 400000 1000' preserveAspectRatio='xMinYMin\nslice'><path d='M95 622c-2.667 0-7.167-2.667-13.5\n-8S72 604 72 600c0-2 .333-3.333 1-4 1.333-2.667 23.833-20.667 67.5-54s\n65.833-50.333 66.5-51c1.333-1.333 3-2 5-2 4.667 0 8.667 3.333 12 10l173\n378c.667 0 35.333-71 104-213s137.5-285 206.5-429S812 17.333 812 14c5.333\n-9.333 12-14 20-14h399166v40H845.272L620 507 385 993c-2.667 4.667-9 7-19\n7-6 0-10-1-12-3L160 575l-65 47zM834 0h399166v40H845z'/></svg>",1:"<svg viewBox='0 0 400000 1200' preserveAspectRatio='xMinYMin\nslice'><path d='M263 601c.667 0 18 39.667 52 119s68.167\n 158.667 102.5 238 51.833 119.333 52.5 120C810 373.333 980.667 17.667 982 11\nc4.667-7.333 11-11 19-11h398999v40H1012.333L741 607c-38.667 80.667-84 175-136\n 283s-89.167 185.333-111.5 232-33.833 70.333-34.5 71c-4.667 4.667-12.333 7-23\n 7l-12-1-109-253c-72.667-168-109.333-252-110-252-10.667 8-22 16.667-34 26-22\n 17.333-33.333 26-34 26l-26-26 76-59 76-60zM1001 0h398999v40H1012z'/></svg>",2:"<svg viewBox='0 0 400000 1800' preserveAspectRatio='xMinYMin\nslice'><path d='M1001 0h398999v40H1013.084S929.667 308 749\n 880s-277 876.333-289 913c-4.667 4.667-12.667 7-24 7h-12c-1.333-3.333-3.667\n-11.667-7-25-35.333-125.333-106.667-373.333-214-744-10 12-21 25-33 39l-32 39\nc-6-5.333-15-14-27-26l25-30c26.667-32.667 52-63 76-91l52-60 208 722c56-175.333\n 126.333-397.333 211-666s153.833-488.167 207.5-658.5C944.167 129.167 975 32.667\n 983 10c4-6.667 10-10 18-10zm0 0h398999v40H1013z'/></svg>",3:"<svg viewBox='0 0 400000 2400' preserveAspectRatio='xMinYMin\nslice'><path d='M424 2398c-1.333-.667-38.5-172-111.5-514\nS202.667 1370.667 202 1370c0-2-10.667 14.333-32 49-4.667 7.333-9.833 15.667\n-15.5 25s-9.833 16-12.5 20l-5 7c-4-3.333-8.333-7.667-13-13l-13-13 76-122 77-121\n 209 968c0-2 84.667-361.667 254-1079C896.333 373.667 981.667 13.333 983 10\nc4-6.667 10-10 18-10h398999v40H1014.622S927.332 418.667 742 1206c-185.333\n 787.333-279.333 1182.333-282 1185-2 6-10 9-24 9-8 0-12-.667-12-2z\nM1001 0h398999v40H1014z'/></svg>",4:"<svg viewBox='0 0 400000 3000' preserveAspectRatio='xMinYMin\nslice'><path d='M473 2713C812.333 913.667 982.333 13 983 11\nc3.333-7.333 9.333-11 18-11h399110v40H1017.698S927.168 518 741.5 1506C555.833\n 2494 462 2989 460 2991c-2 6-10 9-24 9-8 0-12-.667-12-2s-5.333-32-16-92c-50.667\n-293.333-119.667-693.333-207-1200 0-1.333-5.333 8.667-16 30l-32 64-16 33-26-26\n 76-153 77-151c.667.667 35.667 202 105 604 67.333 400.667 102 602.667 104 606z\nM1001 0h398999v40H1017z'/></svg>",tall:"l-4 4-4 4c-.667.667-2 1.5-4 2.5s-4.167 1.833-6.5 2.5-5.5 1-9.5 1h\n-12l-28-84c-16.667-52-96.667 -294.333-240-727l-212 -643 -85 170c-4-3.333-8.333\n-7.667-13 -13l-13-13l77-155 77-156c66 199.333 139 419.667 219 661 l218 661z\nM702 0H400000v40H742z'/></svg>"},y=function(e,t,n){var r=o["default"].makeSpan([],[],n),i=n.sizeMultiplier;if("small"===t.type)i=n.havingBaseStyle(t.style).sizeMultiplier/n.sizeMultiplier,r.height=1*i,r.style.height=r.height+"em",r.surdWidth=.833*i,r.innerHTML="<svg width='100%' height='"+r.height+"em'>\n            "+b.main+"</svg>";else if("large"===t.type)r.height=M[t.size]/i,r.style.height=r.height+"em",r.surdWidth=1/i,r.innerHTML='<svg width="100%" height="'+r.height+'em">\n            '+b[t.size]+"</svg>";else{r.height=e/i,r.style.height=r.height+"em",r.surdWidth=1.056/i;var a=Math.floor(1e3*r.height),s=a-54;r.innerHTML="<svg width='100%' height='"+r.height+"em'>\n            <svg viewBox='0 0 400000 "+a+"'\n            preserveAspectRatio='xMinYMax slice'>\n            <path d='M702 0H400000v40H742v"+s+"\n            "+b.tall+"</svg>"}return r.sizeMultiplier=i,r},x=["(",")","[","\\lbrack","]","\\rbrack","\\{","\\lbrace","\\}","\\rbrace","\\lfloor","\\rfloor","\\lceil","\\rceil","\\surd"],w=["\\uparrow","\\downarrow","\\updownarrow","\\Uparrow","\\Downarrow","\\Updownarrow","|","\\|","\\vert","\\Vert","\\lvert","\\rvert","\\lVert","\\rVert","\\lgroup","\\rgroup","\\lmoustache","\\rmoustache"],k=["<",">","\\langle","\\rangle","/","\\backslash","\\lt","\\gt"],M=[0,1.2,1.8,2.4,3],S=function(e,t,n,i,a){if("<"===e||"\\lt"===e?e="\\langle":">"!==e&&"\\gt"!==e||(e="\\rangle"),u["default"].contains(x,e)||u["default"].contains(k,e))return m(e,t,!1,n,i,a);if(u["default"].contains(w,e))return v(e,M[t],!1,n,i,a);throw new r["default"]("Illegal delimiter: '"+e+"'")},z=[{type:"small",style:i["default"].SCRIPTSCRIPT},{type:"small",style:i["default"].SCRIPT},{type:"small",style:i["default"].TEXT},{type:"large",size:1},{type:"large",size:2},{type:"large",size:3},{type:"large",size:4}],A=[{type:"small",style:i["default"].SCRIPTSCRIPT},{type:"small",style:i["default"].SCRIPT},{type:"small",style:i["default"].TEXT},{type:"stack"}],C=[{type:"small",style:i["default"].SCRIPTSCRIPT},{type:"small",style:i["default"].SCRIPT},{type:"small",style:i["default"].TEXT},{type:"large",size:1},{type:"large",size:2},{type:"large",size:3},{type:"large",size:4},{type:"stack"}],T=function(e){return"small"===e.type?"Main-Regular":"large"===e.type?"Size"+e.size+"-Regular":"stack"===e.type?"Size4-Regular":void 0},N=function(e,t,n,r){for(var i=Math.min(2,3-r.style.size);i<n.length&&"stack"!==n[i].type;i++){var a=d(e,T(n[i])),o=a.height+a.depth;if("small"===n[i].type&&(o*=r.havingBaseStyle(n[i].style).sizeMultiplier),o>t)return n[i]}return n[n.length-1]},E=function(e,t,n,r,i,a){"<"===e||"\\lt"===e?e="\\langle":">"!==e&&"\\gt"!==e||(e="\\rangle");var o=void 0;o=u["default"].contains(k,e)?z:u["default"].contains(x,e)?C:A;var s=N(e,t,o,r);return"\\surd"===e?y(t,s,r):"small"===s.type?p(e,s.style,n,r,i,a):"large"===s.type?m(e,s.size,n,r,i,a):"stack"===s.type?v(e,t,n,r,i,a):void 0},R=function(e,t,n,r,i,a){var o=r.fontMetrics().axisHeight*r.sizeMultiplier,s=901,l=5/r.fontMetrics().ptPerEm,u=Math.max(t-o,n+o),d=Math.max(u/500*s,2*u-l);return E(e,d,!0,r,i,a)};t.exports={sizedDelim:S,customSizedDelim:E,leftRightDelim:R}},{"./ParseError":29,"./Style":33,"./buildCommon":34,"./fontMetrics":41,"./symbols":48,"./utils":51}],39:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("babel-runtime/helpers/classCallCheck")),i=n(e("babel-runtime/helpers/createClass")),a=n(e("./unicodeRegexes")),o=n(e("./utils")),s=function(e){for(var t=(e=e.slice()).length-1;t>=0;t--)e[t]||e.splice(t,1);return e.join(" ")},l=function(){function e(t,n,i){(0,r["default"])(this,e),this.classes=t||[],this.children=n||[],this.height=0,this.depth=0,this.maxFontSize=0,this.style={},this.attributes={},this.innerHTML,i&&(i.style.isTight()&&this.classes.push("mtight"),i.getColor()&&(this.style.color=i.getColor()))}return(0,i["default"])(e,[{key:"setAttribute",value:function(e,t){this.attributes[e]=t}},{key:"tryCombine",value:function(){return!1}},{key:"toNode",value:function(){var e=document.createElement("span");for(var t in e.className=s(this.classes),this.style)Object.prototype.hasOwnProperty.call(this.style,t)&&(e.style[t]=this.style[t]);for(var n in this.attributes)Object.prototype.hasOwnProperty.call(this.attributes,n)&&e.setAttribute(n,this.attributes[n]);this.innerHTML&&(e.innerHTML=this.innerHTML);for(var r=0;r<this.children.length;r++)e.appendChild(this.children[r].toNode());return e}},{key:"toMarkup",value:function(){var e="<span";this.classes.length&&(e+=' class="',e+=o["default"].escape(s(this.classes)),e+='"');var t="";for(var n in this.style)this.style.hasOwnProperty(n)&&(t+=o["default"].hyphenate(n)+":"+this.style[n]+";");for(var r in t&&(e+=' style="'+o["default"].escape(t)+'"'),this.attributes)Object.prototype.hasOwnProperty.call(this.attributes,r)&&(e+=" "+r+'="',e+=o["default"].escape(this.attributes[r]),e+='"');e+=">",this.innerHTML&&(e+=this.innerHTML);for(var i=0;i<this.children.length;i++)e+=this.children[i].toMarkup();return e+="</span>"}}]),e}(),u=function(){function e(t){(0,r["default"])(this,e),this.children=t||[],this.height=0,this.depth=0,this.maxFontSize=0}return(0,i["default"])(e,[{key:"toNode",value:function(){for(var e=document.createDocumentFragment(),t=0;t<this.children.length;t++)e.appendChild(this.children[t].toNode());return e}},{key:"toMarkup",value:function(){for(var e="",t=0;t<this.children.length;t++)e+=this.children[t].toMarkup();return e}}]),e}(),d={"\xee":"\u0131\u0302","\xef":"\u0131\u0308","\xed":"\u0131\u0301","\xec":"\u0131\u0300"},c=function(){function e(t,n,i,o,s,l,u){(0,r["default"])(this,e),this.value=t||"",this.height=n||0,this.depth=i||0,this.italic=o||0,this.skew=s||0,this.classes=l||[],this.style=u||{},this.maxFontSize=0,a["default"].cjkRegex.test(t)&&(a["default"].hangulRegex.test(t)?this.classes.push("hangul_fallback"):this.classes.push("cjk_fallback")),/[\xee\xef\xed\xec]/.test(this.value)&&(this.value=d[this.value])}return(0,i["default"])(e,[{key:"tryCombine",value:function(t){if(!t||!(t instanceof e)||this.italic>0||s(this.classes)!==s(t.classes)||this.skew!==t.skew||this.maxFontSize!==t.maxFontSize)return!1;for(var n in this.style)if(this.style.hasOwnProperty(n)&&this.style[n]!==t.style[n])return!1;for(var r in t.style)if(t.style.hasOwnProperty(r)&&this.style[r]!==t.style[r])return!1;return this.value+=t.value,this.height=Math.max(this.height,t.height),this.depth=Math.max(this.depth,t.depth),this.italic=t.italic,!0}},{key:"toNode",value:function(){var e=document.createTextNode(this.value),t=null;for(var n in this.italic>0&&((t=document.createElement("span")).style.marginRight=this.italic+"em"),this.classes.length>0&&((t=t||document.createElement("span")).className=s(this.classes)),this.style)this.style.hasOwnProperty(n)&&((t=t||document.createElement("span")).style[n]=this.style[n]);return t?(t.appendChild(e),t):e}},{key:"toMarkup",value:function(){var e=!1,t="<span";this.classes.length&&(e=!0,t+=' class="',t+=o["default"].escape(s(this.classes)),t+='"');var n="";for(var r in this.italic>0&&(n+="margin-right:"+this.italic+"em;"),this.style)this.style.hasOwnProperty(r)&&(n+=o["default"].hyphenate(r)+":"+this.style[r]+";");n&&(e=!0,t+=' style="'+o["default"].escape(n)+'"');var i=o["default"].escape(this.value);return e?(t+=">",t+=i,t+="</span>"):i}}]),e}();t.exports={span:l,documentFragment:u,symbolNode:c}},{"./unicodeRegexes":49,"./utils":51,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],40:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}function r(e,t,n){for(var r=[],i=[r],a=[];;){var l=e.parseExpression(!1,null);l=new o["default"]("ordgroup",l,e.mode),n&&(l=new o["default"]("styling",{style:n,value:[l]},e.mode)),r.push(l);var u=e.nextToken.text;if("&"===u)e.consume();else{if("\\end"===u)break;if("\\\\"!==u&&"\\cr"!==u)throw new s["default"]("Expected & or \\\\ or \\end",e.nextToken);var d=e.parseFunction();a.push(d.value.size),r=[],i.push(r)}}return t.body=i,t.rowGaps=a,new o["default"](t.type,t,e.mode)}function i(e,n,r){"string"==typeof e&&(e=[e]),"number"==typeof n&&(n={numArgs:n});for(var i={numArgs:n.numArgs||0,argTypes:n.argTypes,greediness:1,allowedInText:!!n.allowedInText,numOptionalArgs:n.numOptionalArgs||0,handler:r},a=0;a<e.length;++a)t.exports[e[a]]=i}function a(e){return"d"===e.substr(0,1)?"display":"text"}var o=n(e("./ParseNode")),s=n(e("./ParseError"));i(["array","darray"],{numArgs:1},function(e,t){var n=t[0],i={type:"array",cols:(n=n.value.map?n.value:[n]).map(function(e){var t=e.value;if(-1!=="lcr".indexOf(t))return{type:"align",align:t};if("|"===t)return{type:"separator",separator:"|"};throw new s["default"]("Unknown column alignment: "+e.value,e)}),hskipBeforeAndAfter:!0};return i=r(e.parser,i,a(e.envName))}),i(["matrix","pmatrix","bmatrix","Bmatrix","vmatrix","Vmatrix"],{},function(e){var t={matrix:null,pmatrix:["(",")"],bmatrix:["[","]"],Bmatrix:["\\{","\\}"],vmatrix:["|","|"],Vmatrix:["\\Vert","\\Vert"]}[e.envName],n={type:"array",hskipBeforeAndAfter:!1};return n=r(e.parser,n,a(e.envName)),t&&(n=new o["default"]("leftright",{body:[n],left:t[0],right:t[1]},e.mode)),n}),i(["cases","dcases"],{},function(e){var t={type:"array",arraystretch:1.2,cols:[{type:"align",align:"l",pregap:0,postgap:1},{type:"align",align:"l",pregap:0,postgap:0}]};return t=r(e.parser,t,a(e.envName)),t=new o["default"]("leftright",{body:[t],left:"\\{",right:"."},e.mode)}),i("aligned",{},function(e){var t={type:"array",cols:[],addJot:!0};t=r(e.parser,t,"display");var n=new o["default"]("ordgroup",[],e.mode),i=0;t.value.body.forEach(function(e){for(var t=1;t<e.length;t+=2){e[t].value.value[0].value.unshift(n)}i<e.length&&(i=e.length)});for(var a=0;a<i;++a){var s="r",l=0;a%2==1?s="l":a>0&&(l=2),t.value.cols[a]={type:"align",align:s,pregap:l,postgap:0}}return t}),i("gathered",{},function(e){var t={type:"array",cols:[{type:"align",align:"c"}],addJot:!0};return t=r(e.parser,t,"display")})},{"./ParseError":29,"./ParseNode":30}],41:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=e("./unicodeRegexes"),i=n(e("./fontMetricsData")),a={slant:[.25,.25,.25],space:[0,0,0],stretch:[0,0,0],shrink:[0,0,0],xHeight:[.431,.431,.431],quad:[1,1.171,1.472],extraSpace:[0,0,0],num1:[.677,.732,.925],num2:[.394,.384,.387],num3:[.444,.471,.504],denom1:[.686,.752,1.025],denom2:[.345,.344,.532],sup1:[.413,.503,.504],sup2:[.363,.431,.404],sup3:[.289,.286,.294],sub1:[.15,.143,.2],sub2:[.247,.286,.4],supDrop:[.386,.353,.494],subDrop:[.05,.071,.1],delim1:[2.39,1.7,1.98],delim2:[1.01,1.157,1.42],axisHeight:[.25,.25,.25],defaultRuleThickness:[.04,.049,.049],bigOpSpacing1:[.111,.111,.111],bigOpSpacing2:[.166,.166,.166],bigOpSpacing3:[.2,.2,.2],bigOpSpacing4:[.6,.611,.611],bigOpSpacing5:[.1,.143,.143],sqrtRuleThickness:[.04,.04,.04],ptPerEm:[10,10,10],doubleRuleSep:[.2,.2,.2]},o={"\xc0":"A","\xc1":"A","\xc2":"A","\xc3":"A","\xc4":"A","\xc5":"A","\xc6":"A","\xc7":"C","\xc8":"E","\xc9":"E","\xca":"E","\xcb":"E","\xcc":"I","\xcd":"I","\xce":"I","\xcf":"I","\xd0":"D","\xd1":"N","\xd2":"O","\xd3":"O","\xd4":"O","\xd5":"O","\xd6":"O","\xd8":"O","\xd9":"U","\xda":"U","\xdb":"U","\xdc":"U","\xdd":"Y","\xde":"o","\xdf":"B","\xe0":"a","\xe1":"a","\xe2":"a","\xe3":"a","\xe4":"a","\xe5":"a","\xe6":"a","\xe7":"c","\xe8":"e","\xe9":"e","\xea":"e","\xeb":"e","\xec":"i","\xed":"i","\xee":"i","\xef":"i","\xf0":"d","\xf1":"n","\xf2":"o","\xf3":"o","\xf4":"o","\xf5":"o","\xf6":"o","\xf8":"o","\xf9":"u","\xfa":"u","\xfb":"u","\xfc":"u","\xfd":"y","\xfe":"o","\xff":"y","\u0410":"A","\u0411":"B","\u0412":"B","\u0413":"F","\u0414":"A","\u0415":"E","\u0416":"K","\u0417":"3","\u0418":"N","\u0419":"N","\u041a":"K","\u041b":"N","\u041c":"M","\u041d":"H","\u041e":"O","\u041f":"N","\u0420":"P","\u0421":"C","\u0422":"T","\u0423":"y","\u0424":"O","\u0425":"X","\u0426":"U","\u0427":"h","\u0428":"W","\u0429":"W","\u042a":"B","\u042b":"X","\u042c":"B","\u042d":"3","\u042e":"X","\u042f":"R","\u0430":"a","\u0431":"b","\u0432":"a","\u0433":"r","\u0434":"y","\u0435":"e","\u0436":"m","\u0437":"e","\u0438":"n","\u0439":"n","\u043a":"n","\u043b":"n","\u043c":"m","\u043d":"n","\u043e":"o","\u043f":"n","\u0440":"p","\u0441":"c","\u0442":"o","\u0443":"y","\u0444":"b","\u0445":"x","\u0446":"n","\u0447":"n","\u0448":"w","\u0449":"w","\u044a":"a","\u044b":"m","\u044c":"a","\u044d":"e","\u044e":"m","\u044f":"r"},s=function(e,t){var n=e.charCodeAt(0);e[0]in o?n=o[e[0]].charCodeAt(0):r.cjkRegex.test(e[0])&&(n="M".charCodeAt(0));var a=i["default"][t][n];if(a)return{depth:a[0],height:a[1],italic:a[2],skew:a[3],width:a[4]}},l={},u=function(e){var t=void 0;if(!l[t=e>=5?0:e>=3?1:2]){var n=l[t]={};for(var r in a)a.hasOwnProperty(r)&&(n[r]=a[r][t]);n.cssEmPerMu=n.quad/18}return l[t]};t.exports={getFontMetrics:u,getCharacterMetrics:s}},{"./fontMetricsData":42,"./unicodeRegexes":49}],42:[function(e,t){t.exports={"AMS-Regular":{65:[0,.68889,0,0],66:[0,.68889,0,0],67:[0,.68889,0,0],68:[0,.68889,0,0],69:[0,.68889,0,0],70:[0,.68889,0,0],71:[0,.68889,0,0],72:[0,.68889,0,0],73:[0,.68889,0,0],74:[.16667,.68889,0,0],75:[0,.68889,0,0],76:[0,.68889,0,0],77:[0,.68889,0,0],78:[0,.68889,0,0],79:[.16667,.68889,0,0],80:[0,.68889,0,0],81:[.16667,.68889,0,0],82:[0,.68889,0,0],83:[0,.68889,0,0],84:[0,.68889,0,0],85:[0,.68889,0,0],86:[0,.68889,0,0],87:[0,.68889,0,0],88:[0,.68889,0,0],89:[0,.68889,0,0],90:[0,.68889,0,0],107:[0,.68889,0,0],165:[0,.675,.025,0],174:[.15559,.69224,0,0],240:[0,.68889,0,0],295:[0,.68889,0,0],710:[0,.825,0,0],732:[0,.9,0,0],770:[0,.825,0,0],771:[0,.9,0,0],989:[.08167,.58167,0,0],1008:[0,.43056,.04028,0],8245:[0,.54986,0,0],8463:[0,.68889,0,0],8487:[0,.68889,0,0],8498:[0,.68889,0,0],8502:[0,.68889,0,0],8503:[0,.68889,0,0],8504:[0,.68889,0,0],8513:[0,.68889,0,0],8592:[-.03598,.46402,0,0],8594:[-.03598,.46402,0,0],8602:[-.13313,.36687,0,0],8603:[-.13313,.36687,0,0],8606:[.01354,.52239,0,0],8608:[.01354,.52239,0,0],8610:[.01354,.52239,0,0],8611:[.01354,.52239,0,0],8619:[0,.54986,0,0],8620:[0,.54986,0,0],8621:[-.13313,.37788,0,0],8622:[-.13313,.36687,0,0],8624:[0,.69224,0,0],8625:[0,.69224,0,0],8630:[0,.43056,0,0],8631:[0,.43056,0,0],8634:[.08198,.58198,0,0],8635:[.08198,.58198,0,0],8638:[.19444,.69224,0,0],8639:[.19444,.69224,0,0],8642:[.19444,.69224,0,0],8643:[.19444,.69224,0,0],8644:[.1808,.675,0,0],8646:[.1808,.675,0,0],8647:[.1808,.675,0,0],8648:[.19444,.69224,0,0],8649:[.1808,.675,0,0],8650:[.19444,.69224,0,0],8651:[.01354,.52239,0,0],8652:[.01354,.52239,0,0],8653:[-.13313,.36687,0,0],8654:[-.13313,.36687,0,0],8655:[-.13313,.36687,0,0],8666:[.13667,.63667,0,0],8667:[.13667,.63667,0,0],8669:[-.13313,.37788,0,0],8672:[-.064,.437,0,0],8674:[-.064,.437,0,0],8705:[0,.825,0,0],8708:[0,.68889,0,0],8709:[.08167,.58167,0,0],8717:[0,.43056,0,0],8722:[-.03598,.46402,0,0],8724:[.08198,.69224,0,0],8726:[.08167,.58167,0,0],8733:[0,.69224,0,0],8736:[0,.69224,0,0],8737:[0,.69224,0,0],8738:[.03517,.52239,0,0],8739:[.08167,.58167,0,0],8740:[.25142,.74111,0,0],8741:[.08167,.58167,0,0],8742:[.25142,.74111,0,0],8756:[0,.69224,0,0],8757:[0,.69224,0,0],8764:[-.13313,.36687,0,0],8765:[-.13313,.37788,0,0],8769:[-.13313,.36687,0,0],8770:[-.03625,.46375,0,0],8774:[.30274,.79383,0,0],8776:[-.01688,.48312,0,0],8778:[.08167,.58167,0,0],8782:[.06062,.54986,0,0],8783:[.06062,.54986,0,0],8785:[.08198,.58198,0,0],8786:[.08198,.58198,0,0],8787:[.08198,.58198,0,0],8790:[0,.69224,0,0],8791:[.22958,.72958,0,0],8796:[.08198,.91667,0,0],8806:[.25583,.75583,0,0],8807:[.25583,.75583,0,0],8808:[.25142,.75726,0,0],8809:[.25142,.75726,0,0],8812:[.25583,.75583,0,0],8814:[.20576,.70576,0,0],8815:[.20576,.70576,0,0],8816:[.30274,.79383,0,0],8817:[.30274,.79383,0,0],8818:[.22958,.72958,0,0],8819:[.22958,.72958,0,0],8822:[.1808,.675,0,0],8823:[.1808,.675,0,0],8828:[.13667,.63667,0,0],8829:[.13667,.63667,0,0],8830:[.22958,.72958,0,0],8831:[.22958,.72958,0,0],8832:[.20576,.70576,0,0],8833:[.20576,.70576,0,0],8840:[.30274,.79383,0,0],8841:[.30274,.79383,0,0],8842:[.13597,.63597,0,0],8843:[.13597,.63597,0,0],8847:[.03517,.54986,0,0],8848:[.03517,.54986,0,0],8858:[.08198,.58198,0,0],8859:[.08198,.58198,0,0],8861:[.08198,.58198,0,0],8862:[0,.675,0,0],8863:[0,.675,0,0],8864:[0,.675,0,0],8865:[0,.675,0,0],8872:[0,.69224,0,0],8873:[0,.69224,0,0],8874:[0,.69224,0,0],8876:[0,.68889,0,0],8877:[0,.68889,0,0],8878:[0,.68889,0,0],8879:[0,.68889,0,0],8882:[.03517,.54986,0,0],8883:[.03517,.54986,0,0],8884:[.13667,.63667,0,0],8885:[.13667,.63667,0,0],8888:[0,.54986,0,0],8890:[.19444,.43056,0,0],8891:[.19444,.69224,0,0],8892:[.19444,.69224,0,0],8901:[0,.54986,0,0],8903:[.08167,.58167,0,0],8905:[.08167,.58167,0,0],8906:[.08167,.58167,0,0],8907:[0,.69224,0,0],8908:[0,.69224,0,0],8909:[-.03598,.46402,0,0],8910:[0,.54986,0,0],8911:[0,.54986,0,0],8912:[.03517,.54986,0,0],8913:[.03517,.54986,0,0],8914:[0,.54986,0,0],8915:[0,.54986,0,0],8916:[0,.69224,0,0],8918:[.0391,.5391,0,0],8919:[.0391,.5391,0,0],8920:[.03517,.54986,0,0],8921:[.03517,.54986,0,0],8922:[.38569,.88569,0,0],8923:[.38569,.88569,0,0],8926:[.13667,.63667,0,0],8927:[.13667,.63667,0,0],8928:[.30274,.79383,0,0],8929:[.30274,.79383,0,0],8934:[.23222,.74111,0,0],8935:[.23222,.74111,0,0],8936:[.23222,.74111,0,0],8937:[.23222,.74111,0,0],8938:[.20576,.70576,0,0],8939:[.20576,.70576,0,0],8940:[.30274,.79383,0,0],8941:[.30274,.79383,0,0],8994:[.19444,.69224,0,0],8995:[.19444,.69224,0,0],9416:[.15559,.69224,0,0],9484:[0,.69224,0,0],9488:[0,.69224,0,0],9492:[0,.37788,0,0],9496:[0,.37788,0,0],9585:[.19444,.68889,0,0],9586:[.19444,.74111,0,0],9632:[0,.675,0,0],9633:[0,.675,0,0],9650:[0,.54986,0,0],9651:[0,.54986,0,0],9654:[.03517,.54986,0,0],9660:[0,.54986,0,0],9661:[0,.54986,0,0],9664:[.03517,.54986,0,0],9674:[.11111,.69224,0,0],9733:[.19444,.69224,0,0],10003:[0,.69224,0,0],10016:[0,.69224,0,0],10731:[.11111,.69224,0,0],10846:[.19444,.75583,0,0],10877:[.13667,.63667,0,0],10878:[.13667,.63667,0,0],10885:[.25583,.75583,0,0],10886:[.25583,.75583,0,0],10887:[.13597,.63597,0,0],10888:[.13597,.63597,0,0],10889:[.26167,.75726,0,0],10890:[.26167,.75726,0,0],10891:[.48256,.98256,0,0],10892:[.48256,.98256,0,0],10901:[.13667,.63667,0,0],10902:[.13667,.63667,0,0],10933:[.25142,.75726,0,0],10934:[.25142,.75726,0,0],10935:[.26167,.75726,0,0],10936:[.26167,.75726,0,0],10937:[.26167,.75726,0,0],10938:[.26167,.75726,0,0],10949:[.25583,.75583,0,0],10950:[.25583,.75583,0,0],10955:[.28481,.79383,0,0],10956:[.28481,.79383,0,0],57350:[.08167,.58167,0,0],57351:[.08167,.58167,0,0],57352:[.08167,.58167,0,0],57353:[0,.43056,.04028,0],57356:[.25142,.75726,0,0],57357:[.25142,.75726,0,0],57358:[.41951,.91951,0,0],57359:[.30274,.79383,0,0],57360:[.30274,.79383,0,0],57361:[.41951,.91951,0,0],57366:[.25142,.75726,0,0],57367:[.25142,.75726,0,0],57368:[.25142,.75726,0,0],57369:[.25142,.75726,0,0],57370:[.13597,.63597,0,0],57371:[.13597,.63597,0,0]},"Caligraphic-Regular":{48:[0,.43056,0,0],49:[0,.43056,0,0],50:[0,.43056,0,0],51:[.19444,.43056,0,0],52:[.19444,.43056,0,0],53:[.19444,.43056,0,0],54:[0,.64444,0,0],55:[.19444,.43056,0,0],56:[0,.64444,0,0],57:[.19444,.43056,0,0],65:[0,.68333,0,.19445],66:[0,.68333,.03041,.13889],67:[0,.68333,.05834,.13889],
+68:[0,.68333,.02778,.08334],69:[0,.68333,.08944,.11111],70:[0,.68333,.09931,.11111],71:[.09722,.68333,.0593,.11111],72:[0,.68333,.00965,.11111],73:[0,.68333,.07382,0],74:[.09722,.68333,.18472,.16667],75:[0,.68333,.01445,.05556],76:[0,.68333,0,.13889],77:[0,.68333,0,.13889],78:[0,.68333,.14736,.08334],79:[0,.68333,.02778,.11111],80:[0,.68333,.08222,.08334],81:[.09722,.68333,0,.11111],82:[0,.68333,0,.08334],83:[0,.68333,.075,.13889],84:[0,.68333,.25417,0],85:[0,.68333,.09931,.08334],86:[0,.68333,.08222,0],87:[0,.68333,.08222,.08334],88:[0,.68333,.14643,.13889],89:[.09722,.68333,.08222,.08334],90:[0,.68333,.07944,.13889]},"Fraktur-Regular":{33:[0,.69141,0,0],34:[0,.69141,0,0],38:[0,.69141,0,0],39:[0,.69141,0,0],40:[.24982,.74947,0,0],41:[.24982,.74947,0,0],42:[0,.62119,0,0],43:[.08319,.58283,0,0],44:[0,.10803,0,0],45:[.08319,.58283,0,0],46:[0,.10803,0,0],47:[.24982,.74947,0,0],48:[0,.47534,0,0],49:[0,.47534,0,0],50:[0,.47534,0,0],51:[.18906,.47534,0,0],52:[.18906,.47534,0,0],53:[.18906,.47534,0,0],54:[0,.69141,0,0],55:[.18906,.47534,0,0],56:[0,.69141,0,0],57:[.18906,.47534,0,0],58:[0,.47534,0,0],59:[.12604,.47534,0,0],61:[-.13099,.36866,0,0],63:[0,.69141,0,0],65:[0,.69141,0,0],66:[0,.69141,0,0],67:[0,.69141,0,0],68:[0,.69141,0,0],69:[0,.69141,0,0],70:[.12604,.69141,0,0],71:[0,.69141,0,0],72:[.06302,.69141,0,0],73:[0,.69141,0,0],74:[.12604,.69141,0,0],75:[0,.69141,0,0],76:[0,.69141,0,0],77:[0,.69141,0,0],78:[0,.69141,0,0],79:[0,.69141,0,0],80:[.18906,.69141,0,0],81:[.03781,.69141,0,0],82:[0,.69141,0,0],83:[0,.69141,0,0],84:[0,.69141,0,0],85:[0,.69141,0,0],86:[0,.69141,0,0],87:[0,.69141,0,0],88:[0,.69141,0,0],89:[.18906,.69141,0,0],90:[.12604,.69141,0,0],91:[.24982,.74947,0,0],93:[.24982,.74947,0,0],94:[0,.69141,0,0],97:[0,.47534,0,0],98:[0,.69141,0,0],99:[0,.47534,0,0],100:[0,.62119,0,0],101:[0,.47534,0,0],102:[.18906,.69141,0,0],103:[.18906,.47534,0,0],104:[.18906,.69141,0,0],105:[0,.69141,0,0],106:[0,.69141,0,0],107:[0,.69141,0,0],108:[0,.69141,0,0],109:[0,.47534,0,0],110:[0,.47534,0,0],111:[0,.47534,0,0],112:[.18906,.52396,0,0],113:[.18906,.47534,0,0],114:[0,.47534,0,0],115:[0,.47534,0,0],116:[0,.62119,0,0],117:[0,.47534,0,0],118:[0,.52396,0,0],119:[0,.52396,0,0],120:[.18906,.47534,0,0],121:[.18906,.47534,0,0],122:[.18906,.47534,0,0],8216:[0,.69141,0,0],8217:[0,.69141,0,0],58112:[0,.62119,0,0],58113:[0,.62119,0,0],58114:[.18906,.69141,0,0],58115:[.18906,.69141,0,0],58116:[.18906,.47534,0,0],58117:[0,.69141,0,0],58118:[0,.62119,0,0],58119:[0,.47534,0,0]},"Main-Bold":{33:[0,.69444,0,0],34:[0,.69444,0,0],35:[.19444,.69444,0,0],36:[.05556,.75,0,0],37:[.05556,.75,0,0],38:[0,.69444,0,0],39:[0,.69444,0,0],40:[.25,.75,0,0],41:[.25,.75,0,0],42:[0,.75,0,0],43:[.13333,.63333,0,0],44:[.19444,.15556,0,0],45:[0,.44444,0,0],46:[0,.15556,0,0],47:[.25,.75,0,0],48:[0,.64444,0,0],49:[0,.64444,0,0],50:[0,.64444,0,0],51:[0,.64444,0,0],52:[0,.64444,0,0],53:[0,.64444,0,0],54:[0,.64444,0,0],55:[0,.64444,0,0],56:[0,.64444,0,0],57:[0,.64444,0,0],58:[0,.44444,0,0],59:[.19444,.44444,0,0],60:[.08556,.58556,0,0],61:[-.10889,.39111,0,0],62:[.08556,.58556,0,0],63:[0,.69444,0,0],64:[0,.69444,0,0],65:[0,.68611,0,0],66:[0,.68611,0,0],67:[0,.68611,0,0],68:[0,.68611,0,0],69:[0,.68611,0,0],70:[0,.68611,0,0],71:[0,.68611,0,0],72:[0,.68611,0,0],73:[0,.68611,0,0],74:[0,.68611,0,0],75:[0,.68611,0,0],76:[0,.68611,0,0],77:[0,.68611,0,0],78:[0,.68611,0,0],79:[0,.68611,0,0],80:[0,.68611,0,0],81:[.19444,.68611,0,0],82:[0,.68611,0,0],83:[0,.68611,0,0],84:[0,.68611,0,0],85:[0,.68611,0,0],86:[0,.68611,.01597,0],87:[0,.68611,.01597,0],88:[0,.68611,0,0],89:[0,.68611,.02875,0],90:[0,.68611,0,0],91:[.25,.75,0,0],92:[.25,.75,0,0],93:[.25,.75,0,0],94:[0,.69444,0,0],95:[.31,.13444,.03194,0],96:[0,.69444,0,0],97:[0,.44444,0,0],98:[0,.69444,0,0],99:[0,.44444,0,0],100:[0,.69444,0,0],101:[0,.44444,0,0],102:[0,.69444,.10903,0],103:[.19444,.44444,.01597,0],104:[0,.69444,0,0],105:[0,.69444,0,0],106:[.19444,.69444,0,0],107:[0,.69444,0,0],108:[0,.69444,0,0],109:[0,.44444,0,0],110:[0,.44444,0,0],111:[0,.44444,0,0],112:[.19444,.44444,0,0],113:[.19444,.44444,0,0],114:[0,.44444,0,0],115:[0,.44444,0,0],116:[0,.63492,0,0],117:[0,.44444,0,0],118:[0,.44444,.01597,0],119:[0,.44444,.01597,0],120:[0,.44444,0,0],121:[.19444,.44444,.01597,0],122:[0,.44444,0,0],123:[.25,.75,0,0],124:[.25,.75,0,0],125:[.25,.75,0,0],126:[.35,.34444,0,0],168:[0,.69444,0,0],172:[0,.44444,0,0],175:[0,.59611,0,0],176:[0,.69444,0,0],177:[.13333,.63333,0,0],180:[0,.69444,0,0],215:[.13333,.63333,0,0],247:[.13333,.63333,0,0],305:[0,.44444,0,0],567:[.19444,.44444,0,0],710:[0,.69444,0,0],711:[0,.63194,0,0],713:[0,.59611,0,0],714:[0,.69444,0,0],715:[0,.69444,0,0],728:[0,.69444,0,0],729:[0,.69444,0,0],730:[0,.69444,0,0],732:[0,.69444,0,0],768:[0,.69444,0,0],769:[0,.69444,0,0],770:[0,.69444,0,0],771:[0,.69444,0,0],772:[0,.59611,0,0],774:[0,.69444,0,0],775:[0,.69444,0,0],776:[0,.69444,0,0],778:[0,.69444,0,0],779:[0,.69444,0,0],780:[0,.63194,0,0],824:[.19444,.69444,0,0],915:[0,.68611,0,0],916:[0,.68611,0,0],920:[0,.68611,0,0],923:[0,.68611,0,0],926:[0,.68611,0,0],928:[0,.68611,0,0],931:[0,.68611,0,0],933:[0,.68611,0,0],934:[0,.68611,0,0],936:[0,.68611,0,0],937:[0,.68611,0,0],8211:[0,.44444,.03194,0],8212:[0,.44444,.03194,0],8216:[0,.69444,0,0],8217:[0,.69444,0,0],8220:[0,.69444,0,0],8221:[0,.69444,0,0],8224:[.19444,.69444,0,0],8225:[.19444,.69444,0,0],8242:[0,.55556,0,0],8407:[0,.72444,.15486,0],8463:[0,.69444,0,0],8465:[0,.69444,0,0],8467:[0,.69444,0,0],8472:[.19444,.44444,0,0],8476:[0,.69444,0,0],8501:[0,.69444,0,0],8592:[-.10889,.39111,0,0],8593:[.19444,.69444,0,0],8594:[-.10889,.39111,0,0],8595:[.19444,.69444,0,0],8596:[-.10889,.39111,0,0],8597:[.25,.75,0,0],8598:[.19444,.69444,0,0],8599:[.19444,.69444,0,0],8600:[.19444,.69444,0,0],8601:[.19444,.69444,0,0],8636:[-.10889,.39111,0,0],8637:[-.10889,.39111,0,0],8640:[-.10889,.39111,0,0],8641:[-.10889,.39111,0,0],8656:[-.10889,.39111,0,0],8657:[.19444,.69444,0,0],8658:[-.10889,.39111,0,0],8659:[.19444,.69444,0,0],8660:[-.10889,.39111,0,0],8661:[.25,.75,0,0],8704:[0,.69444,0,0],8706:[0,.69444,.06389,0],8707:[0,.69444,0,0],8709:[.05556,.75,0,0],8711:[0,.68611,0,0],8712:[.08556,.58556,0,0],8715:[.08556,.58556,0,0],8722:[.13333,.63333,0,0],8723:[.13333,.63333,0,0],8725:[.25,.75,0,0],8726:[.25,.75,0,0],8727:[-.02778,.47222,0,0],8728:[-.02639,.47361,0,0],8729:[-.02639,.47361,0,0],8730:[.18,.82,0,0],8733:[0,.44444,0,0],8734:[0,.44444,0,0],8736:[0,.69224,0,0],8739:[.25,.75,0,0],8741:[.25,.75,0,0],8743:[0,.55556,0,0],8744:[0,.55556,0,0],8745:[0,.55556,0,0],8746:[0,.55556,0,0],8747:[.19444,.69444,.12778,0],8764:[-.10889,.39111,0,0],8768:[.19444,.69444,0,0],8771:[.00222,.50222,0,0],8776:[.02444,.52444,0,0],8781:[.00222,.50222,0,0],8801:[.00222,.50222,0,0],8804:[.19667,.69667,0,0],8805:[.19667,.69667,0,0],8810:[.08556,.58556,0,0],8811:[.08556,.58556,0,0],8826:[.08556,.58556,0,0],8827:[.08556,.58556,0,0],8834:[.08556,.58556,0,0],8835:[.08556,.58556,0,0],8838:[.19667,.69667,0,0],8839:[.19667,.69667,0,0],8846:[0,.55556,0,0],8849:[.19667,.69667,0,0],8850:[.19667,.69667,0,0],8851:[0,.55556,0,0],8852:[0,.55556,0,0],8853:[.13333,.63333,0,0],8854:[.13333,.63333,0,0],8855:[.13333,.63333,0,0],8856:[.13333,.63333,0,0],8857:[.13333,.63333,0,0],8866:[0,.69444,0,0],8867:[0,.69444,0,0],8868:[0,.69444,0,0],8869:[0,.69444,0,0],8900:[-.02639,.47361,0,0],8901:[-.02639,.47361,0,0],8902:[-.02778,.47222,0,0],8968:[.25,.75,0,0],8969:[.25,.75,0,0],8970:[.25,.75,0,0],8971:[.25,.75,0,0],8994:[-.13889,.36111,0,0],8995:[-.13889,.36111,0,0],9651:[.19444,.69444,0,0],9657:[-.02778,.47222,0,0],9661:[.19444,.69444,0,0],9667:[-.02778,.47222,0,0],9711:[.19444,.69444,0,0],9824:[.12963,.69444,0,0],9825:[.12963,.69444,0,0],9826:[.12963,.69444,0,0],9827:[.12963,.69444,0,0],9837:[0,.75,0,0],9838:[.19444,.69444,0,0],9839:[.19444,.69444,0,0],10216:[.25,.75,0,0],10217:[.25,.75,0,0],10815:[0,.68611,0,0],10927:[.19667,.69667,0,0],10928:[.19667,.69667,0,0]},"Main-Italic":{33:[0,.69444,.12417,0],34:[0,.69444,.06961,0],35:[.19444,.69444,.06616,0],37:[.05556,.75,.13639,0],38:[0,.69444,.09694,0],39:[0,.69444,.12417,0],40:[.25,.75,.16194,0],41:[.25,.75,.03694,0],42:[0,.75,.14917,0],43:[.05667,.56167,.03694,0],44:[.19444,.10556,0,0],45:[0,.43056,.02826,0],46:[0,.10556,0,0],47:[.25,.75,.16194,0],48:[0,.64444,.13556,0],49:[0,.64444,.13556,0],50:[0,.64444,.13556,0],51:[0,.64444,.13556,0],52:[.19444,.64444,.13556,0],53:[0,.64444,.13556,0],54:[0,.64444,.13556,0],55:[.19444,.64444,.13556,0],56:[0,.64444,.13556,0],57:[0,.64444,.13556,0],58:[0,.43056,.0582,0],59:[.19444,.43056,.0582,0],61:[-.13313,.36687,.06616,0],63:[0,.69444,.1225,0],64:[0,.69444,.09597,0],65:[0,.68333,0,0],66:[0,.68333,.10257,0],67:[0,.68333,.14528,0],68:[0,.68333,.09403,0],69:[0,.68333,.12028,0],70:[0,.68333,.13305,0],71:[0,.68333,.08722,0],72:[0,.68333,.16389,0],73:[0,.68333,.15806,0],74:[0,.68333,.14028,0],75:[0,.68333,.14528,0],76:[0,.68333,0,0],77:[0,.68333,.16389,0],78:[0,.68333,.16389,0],79:[0,.68333,.09403,0],80:[0,.68333,.10257,0],81:[.19444,.68333,.09403,0],82:[0,.68333,.03868,0],83:[0,.68333,.11972,0],84:[0,.68333,.13305,0],85:[0,.68333,.16389,0],86:[0,.68333,.18361,0],87:[0,.68333,.18361,0],88:[0,.68333,.15806,0],89:[0,.68333,.19383,0],90:[0,.68333,.14528,0],91:[.25,.75,.1875,0],93:[.25,.75,.10528,0],94:[0,.69444,.06646,0],95:[.31,.12056,.09208,0],97:[0,.43056,.07671,0],98:[0,.69444,.06312,0],99:[0,.43056,.05653,0],100:[0,.69444,.10333,0],101:[0,.43056,.07514,0],102:[.19444,.69444,.21194,0],103:[.19444,.43056,.08847,0],104:[0,.69444,.07671,0],105:[0,.65536,.1019,0],106:[.19444,.65536,.14467,0],107:[0,.69444,.10764,0],108:[0,.69444,.10333,0],109:[0,.43056,.07671,0],110:[0,.43056,.07671,0],111:[0,.43056,.06312,0],112:[.19444,.43056,.06312,0],113:[.19444,.43056,.08847,0],114:[0,.43056,.10764,0],115:[0,.43056,.08208,0],116:[0,.61508,.09486,0],117:[0,.43056,.07671,0],118:[0,.43056,.10764,0],119:[0,.43056,.10764,0],120:[0,.43056,.12042,0],121:[.19444,.43056,.08847,0],122:[0,.43056,.12292,0],126:[.35,.31786,.11585,0],163:[0,.69444,0,0],305:[0,.43056,0,.02778],567:[.19444,.43056,0,.08334],768:[0,.69444,0,0],769:[0,.69444,.09694,0],770:[0,.69444,.06646,0],771:[0,.66786,.11585,0],772:[0,.56167,.10333,0],774:[0,.69444,.10806,0],775:[0,.66786,.11752,0],776:[0,.66786,.10474,0],778:[0,.69444,0,0],779:[0,.69444,.1225,0],780:[0,.62847,.08295,0],915:[0,.68333,.13305,0],916:[0,.68333,0,0],920:[0,.68333,.09403,0],923:[0,.68333,0,0],926:[0,.68333,.15294,0],928:[0,.68333,.16389,0],931:[0,.68333,.12028,0],933:[0,.68333,.11111,0],934:[0,.68333,.05986,0],936:[0,.68333,.11111,0],937:[0,.68333,.10257,0],8211:[0,.43056,.09208,0],8212:[0,.43056,.09208,0],8216:[0,.69444,.12417,0],8217:[0,.69444,.12417,0],8220:[0,.69444,.1685,0],8221:[0,.69444,.06961,0],8463:[0,.68889,0,0]},"Main-Regular":{32:[0,0,0,0],33:[0,.69444,0,0],34:[0,.69444,0,0],35:[.19444,.69444,0,0],36:[.05556,.75,0,0],37:[.05556,.75,0,0],38:[0,.69444,0,0],39:[0,.69444,0,0],40:[.25,.75,0,0],41:[.25,.75,0,0],42:[0,.75,0,0],43:[.08333,.58333,0,0],44:[.19444,.10556,0,0],45:[0,.43056,0,0],46:[0,.10556,0,0],47:[.25,.75,0,0],48:[0,.64444,0,0],49:[0,.64444,0,0],50:[0,.64444,0,0],51:[0,.64444,0,0],52:[0,.64444,0,0],53:[0,.64444,0,0],54:[0,.64444,0,0],55:[0,.64444,0,0],56:[0,.64444,0,0],57:[0,.64444,0,0],58:[0,.43056,0,0],59:[.19444,.43056,0,0],60:[.0391,.5391,0,0],61:[-.13313,.36687,0,0],62:[.0391,.5391,0,0],63:[0,.69444,0,0],64:[0,.69444,0,0],65:[0,.68333,0,0],66:[0,.68333,0,0],67:[0,.68333,0,0],68:[0,.68333,0,0],69:[0,.68333,0,0],70:[0,.68333,0,0],71:[0,.68333,0,0],72:[0,.68333,0,0],73:[0,.68333,0,0],74:[0,.68333,0,0],75:[0,.68333,0,0],76:[0,.68333,0,0],77:[0,.68333,0,0],78:[0,.68333,0,0],79:[0,.68333,0,0],80:[0,.68333,0,0],81:[.19444,.68333,0,0],82:[0,.68333,0,0],83:[0,.68333,0,0],84:[0,.68333,0,0],85:[0,.68333,0,0],86:[0,.68333,.01389,0],87:[0,.68333,.01389,0],88:[0,.68333,0,0],89:[0,.68333,.025,0],90:[0,.68333,0,0],91:[.25,.75,0,0],92:[.25,.75,0,0],93:[.25,.75,0,0],94:[0,.69444,0,0],95:[.31,.12056,.02778,0],96:[0,.69444,0,0],97:[0,.43056,0,0],98:[0,.69444,0,0],99:[0,.43056,0,0],100:[0,.69444,0,0],101:[0,.43056,0,0],102:[0,.69444,.07778,0],103:[.19444,.43056,.01389,0],104:[0,.69444,0,0],105:[0,.66786,0,0],106:[.19444,.66786,0,0],107:[0,.69444,0,0],108:[0,.69444,0,0],109:[0,.43056,0,0],110:[0,.43056,0,0],111:[0,.43056,0,0],112:[.19444,.43056,0,0],113:[.19444,.43056,0,0],114:[0,.43056,0,0],115:[0,.43056,0,0],116:[0,.61508,0,0],117:[0,.43056,0,0],118:[0,.43056,.01389,0],119:[0,.43056,.01389,0],120:[0,.43056,0,0],121:[.19444,.43056,.01389,0],122:[0,.43056,0,0],123:[.25,.75,0,0],124:[.25,.75,0,0],125:[.25,.75,0,0],126:[.35,.31786,0,0],160:[0,0,0,0],168:[0,.66786,0,0],172:[0,.43056,0,0],175:[0,.56778,0,0],176:[0,.69444,0,0],177:[.08333,.58333,0,0],180:[0,.69444,0,0],215:[.08333,.58333,0,0],247:[.08333,.58333,0,0],305:[0,.43056,0,0],567:[.19444,.43056,0,0],710:[0,.69444,0,0],711:[0,.62847,0,0],713:[0,.56778,0,0],714:[0,.69444,0,0],715:[0,.69444,0,0],728:[0,.69444,0,0],729:[0,.66786,0,0],730:[0,.69444,0,0],732:[0,.66786,0,0],768:[0,.69444,0,0],769:[0,.69444,0,0],770:[0,.69444,0,0],771:[0,.66786,0,0],772:[0,.56778,0,0],774:[0,.69444,0,0],775:[0,.66786,0,0],776:[0,.66786,0,0],778:[0,.69444,0,0],779:[0,.69444,0,0],780:[0,.62847,0,0],824:[.19444,.69444,0,0],915:[0,.68333,0,0],916:[0,.68333,0,0],920:[0,.68333,0,0],923:[0,.68333,0,0],926:[0,.68333,0,0],928:[0,.68333,0,0],931:[0,.68333,0,0],933:[0,.68333,0,0],934:[0,.68333,0,0],936:[0,.68333,0,0],937:[0,.68333,0,0],8211:[0,.43056,.02778,0],8212:[0,.43056,.02778,0],8216:[0,.69444,0,0],8217:[0,.69444,0,0],8220:[0,.69444,0,0],8221:[0,.69444,0,0],8224:[.19444,.69444,0,0],8225:[.19444,.69444,0,0],8230:[0,.12,0,0],8242:[0,.55556,0,0],8407:[0,.71444,.15382,0],8463:[0,.68889,0,0],8465:[0,.69444,0,0],8467:[0,.69444,0,.11111],8472:[.19444,.43056,0,.11111],8476:[0,.69444,0,0],8501:[0,.69444,0,0],8592:[-.13313,.36687,0,0],8593:[.19444,.69444,0,0],8594:[-.13313,.36687,0,0],8595:[.19444,.69444,0,0],8596:[-.13313,.36687,0,0],8597:[.25,.75,0,0],8598:[.19444,.69444,0,0],8599:[.19444,.69444,0,0],8600:[.19444,.69444,0,0],8601:[.19444,.69444,0,0],8614:[.011,.511,0,0],8617:[.011,.511,0,0],8618:[.011,.511,0,0],8636:[-.13313,.36687,0,0],8637:[-.13313,.36687,0,0],8640:[-.13313,.36687,0,0],8641:[-.13313,.36687,0,0],8652:[.011,.671,0,0],8656:[-.13313,.36687,0,0],8657:[.19444,.69444,0,0],8658:[-.13313,.36687,0,0],8659:[.19444,.69444,0,0],8660:[-.13313,.36687,0,0],8661:[.25,.75,0,0],8704:[0,.69444,0,0],8706:[0,.69444,.05556,.08334],8707:[0,.69444,0,0],8709:[.05556,.75,0,0],8711:[0,.68333,0,0],8712:[.0391,.5391,0,0],8715:[.0391,.5391,0,0],8722:[.08333,.58333,0,0],8723:[.08333,.58333,0,0],8725:[.25,.75,0,0],8726:[.25,.75,0,0],8727:[-.03472,.46528,0,0],8728:[-.05555,.44445,0,0],8729:[-.05555,.44445,0,0],8730:[.2,.8,0,0],8733:[0,.43056,0,0],8734:[0,.43056,0,0],8736:[0,.69224,0,0],8739:[.25,.75,0,0],8741:[.25,.75,0,0],8743:[0,.55556,0,0],8744:[0,.55556,0,0],8745:[0,.55556,0,0],8746:[0,.55556,0,0],8747:[.19444,.69444,.11111,0],8764:[-.13313,.36687,0,0],8768:[.19444,.69444,0,0],8771:[-.03625,.46375,0,0],8773:[-.022,.589,0,0],8776:[-.01688,.48312,0,0],8781:[-.03625,.46375,0,0],8784:[-.133,.67,0,0],8800:[.215,.716,0,0],8801:[-.03625,.46375,0,0],8804:[.13597,.63597,0,0],8805:[.13597,.63597,0,0],8810:[.0391,.5391,0,0],8811:[.0391,.5391,0,0],8826:[.0391,.5391,0,0],8827:[.0391,.5391,0,0],8834:[.0391,.5391,0,0],8835:[.0391,.5391,0,0],8838:[.13597,.63597,0,0],8839:[.13597,.63597,0,0],8846:[0,.55556,0,0],8849:[.13597,.63597,0,0],8850:[.13597,.63597,0,0],8851:[0,.55556,0,0],8852:[0,.55556,0,0],8853:[.08333,.58333,0,0],8854:[.08333,.58333,0,0],8855:[.08333,.58333,0,0],8856:[.08333,.58333,0,0],8857:[.08333,.58333,0,0],8866:[0,.69444,0,0],8867:[0,.69444,0,0],8868:[0,.69444,0,0],8869:[0,.69444,0,0],8872:[.249,.75,0,0],8900:[-.05555,.44445,0,0],8901:[-.05555,.44445,0,0],8902:[-.03472,.46528,0,0],8904:[.005,.505,0,0],8942:[.03,.9,0,0],8943:[-.19,.31,0,0],8945:[-.1,.82,0,0],8968:[.25,.75,0,0],8969:[.25,.75,0,0],8970:[.25,.75,0,0],8971:[.25,.75,0,0],8994:[-.14236,.35764,0,0],8995:[-.14236,.35764,0,0],9136:[.244,.744,0,0],9137:[.244,.744,0,0],9651:[.19444,.69444,0,0],9657:[-.03472,.46528,0,0],9661:[.19444,.69444,0,0],9667:[-.03472,.46528,0,0],9711:[.19444,.69444,0,0],9824:[.12963,.69444,0,0],9825:[.12963,.69444,0,0],9826:[.12963,.69444,0,0],9827:[.12963,.69444,0,0],9837:[0,.75,0,0],9838:[.19444,.69444,0,0],9839:[.19444,.69444,0,0],10216:[.25,.75,0,0],10217:[.25,.75,0,0],10222:[.244,.744,0,0],10223:[.244,.744,0,0],10229:[.011,.511,0,0],10230:[.011,.511,0,0],10231:[.011,.511,0,0],10232:[.024,.525,0,0],10233:[.024,.525,0,0],10234:[.024,.525,0,0],10236:[.011,.511,0,0],10815:[0,.68333,0,0],10927:[.13597,.63597,0,0],10928:[.13597,.63597,0,0]},"Math-BoldItalic":{47:[.19444,.69444,0,0],65:[0,.68611,0,0],66:[0,.68611,.04835,0],67:[0,.68611,.06979,0],68:[0,.68611,.03194,0],69:[0,.68611,.05451,0],70:[0,.68611,.15972,0],71:[0,.68611,0,0],72:[0,.68611,.08229,0],73:[0,.68611,.07778,0],74:[0,.68611,.10069,0],75:[0,.68611,.06979,0],76:[0,.68611,0,0],77:[0,.68611,.11424,0],78:[0,.68611,.11424,0],79:[0,.68611,.03194,0],80:[0,.68611,.15972,0],81:[.19444,.68611,0,0],82:[0,.68611,.00421,0],83:[0,.68611,.05382,0],84:[0,.68611,.15972,0],85:[0,.68611,.11424,0],86:[0,.68611,.25555,0],87:[0,.68611,.15972,0],88:[0,.68611,.07778,0],89:[0,.68611,.25555,0],90:[0,.68611,.06979,0],97:[0,.44444,0,0],98:[0,.69444,0,0],99:[0,.44444,0,0],100:[0,.69444,0,0],101:[0,.44444,0,0],102:[.19444,.69444,.11042,0],103:[.19444,.44444,.03704,0],104:[0,.69444,0,0],105:[0,.69326,0,0],106:[.19444,.69326,.0622,0],107:[0,.69444,.01852,0],108:[0,.69444,.0088,0],109:[0,.44444,0,0],110:[0,.44444,0,0],111:[0,.44444,0,0],112:[.19444,.44444,0,0],113:[.19444,.44444,.03704,0],114:[0,.44444,.03194,0],115:[0,.44444,0,0],116:[0,.63492,0,0],117:[0,.44444,0,0],118:[0,.44444,.03704,0],119:[0,.44444,.02778,0],120:[0,.44444,0,0],121:[.19444,.44444,.03704,0],122:[0,.44444,.04213,0],915:[0,.68611,.15972,0],916:[0,.68611,0,0],920:[0,.68611,.03194,0],923:[0,.68611,0,0],926:[0,.68611,.07458,0],928:[0,.68611,.08229,0],931:[0,.68611,.05451,0],933:[0,.68611,.15972,0],934:[0,.68611,0,0],936:[0,.68611,.11653,0],937:[0,.68611,.04835,0],945:[0,.44444,0,0],946:[.19444,.69444,.03403,0],947:[.19444,.44444,.06389,0],948:[0,.69444,.03819,0],949:[0,.44444,0,0],950:[.19444,.69444,.06215,0],951:[.19444,.44444,.03704,0],952:[0,.69444,.03194,0],953:[0,.44444,0,0],954:[0,.44444,0,0],955:[0,.69444,0,0],956:[.19444,.44444,0,0],957:[0,.44444,.06898,0],958:[.19444,.69444,.03021,0],959:[0,.44444,0,0],960:[0,.44444,.03704,0],961:[.19444,.44444,0,0],962:[.09722,.44444,.07917,0],963:[0,.44444,.03704,0],964:[0,.44444,.13472,0],965:[0,.44444,.03704,0],966:[.19444,.44444,0,0],967:[.19444,.44444,0,0],968:[.19444,.69444,.03704,0],969:[0,.44444,.03704,0],977:[0,.69444,0,0],981:[.19444,.69444,0,0],982:[0,.44444,.03194,0],1009:[.19444,.44444,0,0],1013:[0,.44444,0,0]},"Math-Italic":{47:[.19444,.69444,0,0],65:[0,.68333,0,.13889],66:[0,.68333,.05017,.08334],67:[0,.68333,.07153,.08334],68:[0,.68333,.02778,.05556],69:[0,.68333,.05764,.08334],70:[0,.68333,.13889,.08334],71:[0,.68333,0,.08334],72:[0,.68333,.08125,.05556],73:[0,.68333,.07847,.11111],74:[0,.68333,.09618,.16667],75:[0,.68333,.07153,.05556],76:[0,.68333,0,.02778],77:[0,.68333,.10903,.08334],78:[0,.68333,.10903,.08334],79:[0,.68333,.02778,.08334],80:[0,.68333,.13889,.08334],81:[.19444,.68333,0,.08334],82:[0,.68333,.00773,.08334],83:[0,.68333,.05764,.08334],84:[0,.68333,.13889,.08334],85:[0,.68333,.10903,.02778],86:[0,.68333,.22222,0],87:[0,.68333,.13889,0],88:[0,.68333,.07847,.08334],89:[0,.68333,.22222,0],90:[0,.68333,.07153,.08334],97:[0,.43056,0,0],98:[0,.69444,0,0],99:[0,.43056,0,.05556],100:[0,.69444,0,.16667],101:[0,.43056,0,.05556],102:[.19444,.69444,.10764,.16667],103:[.19444,.43056,.03588,.02778],104:[0,.69444,0,0],105:[0,.65952,0,0],106:[.19444,.65952,.05724,0],107:[0,.69444,.03148,0],108:[0,.69444,.01968,.08334],109:[0,.43056,0,0],110:[0,.43056,0,0],111:[0,.43056,0,.05556],112:[.19444,.43056,0,.08334],113:[.19444,.43056,.03588,.08334],114:[0,.43056,.02778,.05556],115:[0,.43056,0,.05556],116:[0,.61508,0,.08334],117:[0,.43056,0,.02778],118:[0,.43056,.03588,.02778],119:[0,.43056,.02691,.08334],120:[0,.43056,0,.02778],121:[.19444,.43056,.03588,.05556],122:[0,.43056,.04398,.05556],915:[0,.68333,.13889,.08334],916:[0,.68333,0,.16667],920:[0,.68333,.02778,.08334],923:[0,.68333,0,.16667],926:[0,.68333,.07569,.08334],928:[0,.68333,.08125,.05556],931:[0,.68333,.05764,.08334],933:[0,.68333,.13889,.05556],934:[0,.68333,0,.08334],936:[0,.68333,.11,.05556],937:[0,.68333,.05017,.08334],945:[0,.43056,.0037,.02778],946:[.19444,.69444,.05278,.08334],947:[.19444,.43056,.05556,0],948:[0,.69444,.03785,.05556],949:[0,.43056,0,.08334],950:[.19444,.69444,.07378,.08334],951:[.19444,.43056,.03588,.05556],952:[0,.69444,.02778,.08334],953:[0,.43056,0,.05556],954:[0,.43056,0,0],955:[0,.69444,0,0],956:[.19444,.43056,0,.02778],957:[0,.43056,.06366,.02778],958:[.19444,.69444,.04601,.11111],959:[0,.43056,0,.05556],960:[0,.43056,.03588,0],961:[.19444,.43056,0,.08334],962:[.09722,.43056,.07986,.08334],963:[0,.43056,.03588,0],964:[0,.43056,.1132,.02778],965:[0,.43056,.03588,.02778],966:[.19444,.43056,0,.08334],967:[.19444,.43056,0,.05556],968:[.19444,.69444,.03588,.11111],969:[0,.43056,.03588,0],977:[0,.69444,0,.08334],981:[.19444,.69444,0,.08334],982:[0,.43056,.02778,0],1009:[.19444,.43056,0,.08334],1013:[0,.43056,0,.05556]},"Math-Regular":{65:[0,.68333,0,.13889],66:[0,.68333,.05017,.08334],67:[0,.68333,.07153,.08334],68:[0,.68333,.02778,.05556],69:[0,.68333,.05764,.08334],70:[0,.68333,.13889,.08334],71:[0,.68333,0,.08334],72:[0,.68333,.08125,.05556],73:[0,.68333,.07847,.11111],74:[0,.68333,.09618,.16667],75:[0,.68333,.07153,.05556],76:[0,.68333,0,.02778],77:[0,.68333,.10903,.08334],78:[0,.68333,.10903,.08334],79:[0,.68333,.02778,.08334],80:[0,.68333,.13889,.08334],81:[.19444,.68333,0,.08334],82:[0,.68333,.00773,.08334],83:[0,.68333,.05764,.08334],84:[0,.68333,.13889,.08334],85:[0,.68333,.10903,.02778],86:[0,.68333,.22222,0],87:[0,.68333,.13889,0],88:[0,.68333,.07847,.08334],89:[0,.68333,.22222,0],90:[0,.68333,.07153,.08334],97:[0,.43056,0,0],98:[0,.69444,0,0],99:[0,.43056,0,.05556],100:[0,.69444,0,.16667],101:[0,.43056,0,.05556],102:[.19444,.69444,.10764,.16667],103:[.19444,.43056,.03588,.02778],104:[0,.69444,0,0],105:[0,.65952,0,0],106:[.19444,.65952,.05724,0],107:[0,.69444,.03148,0],108:[0,.69444,.01968,.08334],109:[0,.43056,0,0],110:[0,.43056,0,0],111:[0,.43056,0,.05556],112:[.19444,.43056,0,.08334],113:[.19444,.43056,.03588,.08334],114:[0,.43056,.02778,.05556],115:[0,.43056,0,.05556],116:[0,.61508,0,.08334],117:[0,.43056,0,.02778],118:[0,.43056,.03588,.02778],119:[0,.43056,.02691,.08334],120:[0,.43056,0,.02778],121:[.19444,.43056,.03588,.05556],122:[0,.43056,.04398,.05556],915:[0,.68333,.13889,.08334],916:[0,.68333,0,.16667],920:[0,.68333,.02778,.08334],923:[0,.68333,0,.16667],926:[0,.68333,.07569,.08334],928:[0,.68333,.08125,.05556],931:[0,.68333,.05764,.08334],933:[0,.68333,.13889,.05556],934:[0,.68333,0,.08334],936:[0,.68333,.11,.05556],937:[0,.68333,.05017,.08334],945:[0,.43056,.0037,.02778],946:[.19444,.69444,.05278,.08334],947:[.19444,.43056,.05556,0],948:[0,.69444,.03785,.05556],949:[0,.43056,0,.08334],950:[.19444,.69444,.07378,.08334],951:[.19444,.43056,.03588,.05556],952:[0,.69444,.02778,.08334],953:[0,.43056,0,.05556],954:[0,.43056,0,0],955:[0,.69444,0,0],956:[.19444,.43056,0,.02778],957:[0,.43056,.06366,.02778],958:[.19444,.69444,.04601,.11111],959:[0,.43056,0,.05556],960:[0,.43056,.03588,0],961:[.19444,.43056,0,.08334],962:[.09722,.43056,.07986,.08334],963:[0,.43056,.03588,0],964:[0,.43056,.1132,.02778],965:[0,.43056,.03588,.02778],966:[.19444,.43056,0,.08334],967:[.19444,.43056,0,.05556],968:[.19444,.69444,.03588,.11111],969:[0,.43056,.03588,0],977:[0,.69444,0,.08334],981:[.19444,.69444,0,.08334],982:[0,.43056,.02778,0],1009:[.19444,.43056,0,.08334],1013:[0,.43056,0,.05556]},"SansSerif-Regular":{33:[0,.69444,0,0],34:[0,.69444,0,0],35:[.19444,.69444,0,0],36:[.05556,.75,0,0],37:[.05556,.75,0,0],38:[0,.69444,0,0],39:[0,.69444,0,0],40:[.25,.75,0,0],41:[.25,.75,0,0],42:[0,.75,0,0],43:[.08333,.58333,0,0],44:[.125,.08333,0,0],45:[0,.44444,0,0],46:[0,.08333,0,0],47:[.25,.75,0,0],48:[0,.65556,0,0],49:[0,.65556,0,0],50:[0,.65556,0,0],51:[0,.65556,0,0],52:[0,.65556,0,0],53:[0,.65556,0,0],54:[0,.65556,0,0],55:[0,.65556,0,0],56:[0,.65556,0,0],57:[0,.65556,0,0],58:[0,.44444,0,0],59:[.125,.44444,0,0],61:[-.13,.37,0,0],63:[0,.69444,0,0],64:[0,.69444,0,0],65:[0,.69444,0,0],66:[0,.69444,0,0],67:[0,.69444,0,0],68:[0,.69444,0,0],69:[0,.69444,0,0],70:[0,.69444,0,0],71:[0,.69444,0,0],72:[0,.69444,0,0],73:[0,.69444,0,0],74:[0,.69444,0,0],75:[0,.69444,0,0],76:[0,.69444,0,0],77:[0,.69444,0,0],78:[0,.69444,0,0],79:[0,.69444,0,0],80:[0,.69444,0,0],81:[.125,.69444,0,0],82:[0,.69444,0,0],83:[0,.69444,0,0],84:[0,.69444,0,0],85:[0,.69444,0,0],86:[0,.69444,.01389,0],87:[0,.69444,.01389,0],88:[0,.69444,0,0],89:[0,.69444,.025,0],90:[0,.69444,0,0],91:[.25,.75,0,0],93:[.25,.75,0,0],94:[0,.69444,0,0],95:[.35,.09444,.02778,0],97:[0,.44444,0,0],98:[0,.69444,0,0],99:[0,.44444,0,0],100:[0,.69444,0,0],101:[0,.44444,0,0],102:[0,.69444,.06944,0],103:[.19444,.44444,.01389,0],104:[0,.69444,0,0],105:[0,.67937,0,0],106:[.19444,.67937,0,0],107:[0,.69444,0,0],108:[0,.69444,0,0],109:[0,.44444,0,0],110:[0,.44444,0,0],111:[0,.44444,0,0],112:[.19444,.44444,0,0],113:[.19444,.44444,0,0],114:[0,.44444,.01389,0],115:[0,.44444,0,0],116:[0,.57143,0,0],117:[0,.44444,0,0],118:[0,.44444,.01389,0],119:[0,.44444,.01389,0],120:[0,.44444,0,0],121:[.19444,.44444,.01389,0],122:[0,.44444,0,0],126:[.35,.32659,0,0],305:[0,.44444,0,0],567:[.19444,.44444,0,0],768:[0,.69444,0,0],769:[0,.69444,0,0],770:[0,.69444,0,0],771:[0,.67659,0,0],772:[0,.60889,0,0],774:[0,.69444,0,0],775:[0,.67937,0,0],776:[0,.67937,0,0],778:[0,.69444,0,0],779:[0,.69444,0,0],780:[0,.63194,0,0],915:[0,.69444,0,0],916:[0,.69444,0,0],920:[0,.69444,0,0],923:[0,.69444,0,0],926:[0,.69444,0,0],928:[0,.69444,0,0],931:[0,.69444,0,0],933:[0,.69444,0,0],934:[0,.69444,0,0],936:[0,.69444,0,0],937:[0,.69444,0,0],8211:[0,.44444,.02778,0],8212:[0,.44444,.02778,0],8216:[0,.69444,0,0],8217:[0,.69444,0,0],8220:[0,.69444,0,0],8221:[0,.69444,0,0]},"Script-Regular":{65:[0,.7,.22925,0],66:[0,.7,.04087,0],67:[0,.7,.1689,0],68:[0,.7,.09371,0],69:[0,.7,.18583,0],70:[0,.7,.13634,0],71:[0,.7,.17322,0],72:[0,.7,.29694,0],73:[0,.7,.19189,0],74:[.27778,.7,.19189,0],75:[0,.7,.31259,0],76:[0,.7,.19189,0],77:[0,.7,.15981,0],78:[0,.7,.3525,0],79:[0,.7,.08078,0],80:[0,.7,.08078,0],81:[0,.7,.03305,0],82:[0,.7,.06259,0],83:[0,.7,.19189,0],84:[0,.7,.29087,0],85:[0,.7,.25815,0],86:[0,.7,.27523,0],87:[0,.7,.27523,0],88:[0,.7,.26006,0],89:[0,.7,.2939,0],90:[0,.7,.24037,0]},"Size1-Regular":{40:[.35001,.85,0,0],41:[.35001,.85,0,0],47:[.35001,.85,0,0],91:[.35001,.85,0,0],92:[.35001,.85,0,0],93:[.35001,.85,0,0],123:[.35001,.85,0,0],125:[.35001,.85,0,0],710:[0,.72222,0,0],732:[0,.72222,0,0],770:[0,.72222,0,0],771:[0,.72222,0,0],8214:[-99e-5,.601,0,0],8593:[1e-5,.6,0,0],8595:[1e-5,.6,0,0],8657:[1e-5,.6,0,0],8659:[1e-5,.6,0,0],8719:[.25001,.75,0,0],8720:[.25001,.75,0,0],8721:[.25001,.75,0,0],8730:[.35001,.85,0,0],8739:[-.00599,.606,0,0],8741:[-.00599,.606,0,0],8747:[.30612,.805,.19445,0],8748:[.306,.805,.19445,0],8749:[.306,.805,.19445,0],8750:[.30612,.805,.19445,0],8896:[.25001,.75,0,0],8897:[.25001,.75,0,0],8898:[.25001,.75,0,0],8899:[.25001,.75,0,0],8968:[.35001,.85,0,0],8969:[.35001,.85,0,0],8970:[.35001,.85,0,0],8971:[.35001,.85,0,0],9168:[-99e-5,.601,0,0],10216:[.35001,.85,0,0],10217:[.35001,.85,0,0],10752:[.25001,.75,0,0],10753:[.25001,.75,0,0],10754:[.25001,.75,0,0],10756:[.25001,.75,0,0],10758:[.25001,.75,0,0]},"Size2-Regular":{40:[.65002,1.15,0,0],41:[.65002,1.15,0,0],47:[.65002,1.15,0,0],91:[.65002,1.15,0,0],92:[.65002,1.15,0,0],93:[.65002,1.15,0,0],123:[.65002,1.15,0,0],125:[.65002,1.15,0,0],710:[0,.75,0,0],732:[0,.75,0,0],770:[0,.75,0,0],771:[0,.75,0,0],8719:[.55001,1.05,0,0],8720:[.55001,1.05,0,0],8721:[.55001,1.05,0,0],8730:[.65002,1.15,0,0],8747:[.86225,1.36,.44445,0],8748:[.862,1.36,.44445,0],8749:[.862,1.36,.44445,0],8750:[.86225,1.36,.44445,0],8896:[.55001,1.05,0,0],8897:[.55001,1.05,0,0],8898:[.55001,1.05,0,0],8899:[.55001,1.05,0,0],8968:[.65002,1.15,0,0],8969:[.65002,1.15,0,0],8970:[.65002,1.15,0,0],8971:[.65002,1.15,0,0],10216:[.65002,1.15,0,0],10217:[.65002,1.15,0,0],10752:[.55001,1.05,0,0],10753:[.55001,1.05,0,0],10754:[.55001,1.05,0,0],10756:[.55001,1.05,0,0],10758:[.55001,1.05,0,0]},"Size3-Regular":{40:[.95003,1.45,0,0],41:[.95003,1.45,0,0],47:[.95003,1.45,0,0],91:[.95003,1.45,0,0],92:[.95003,1.45,0,0],93:[.95003,1.45,0,0],123:[.95003,1.45,0,0],125:[.95003,1.45,0,0],710:[0,.75,0,0],732:[0,.75,0,0],770:[0,.75,0,0],771:[0,.75,0,0],8730:[.95003,1.45,0,0],8968:[.95003,1.45,0,0],8969:[.95003,1.45,0,0],8970:[.95003,1.45,0,0],8971:[.95003,1.45,0,0],10216:[.95003,1.45,0,0],10217:[.95003,1.45,0,0]},"Size4-Regular":{40:[1.25003,1.75,0,0],41:[1.25003,1.75,0,0],47:[1.25003,1.75,0,0],91:[1.25003,1.75,0,0],92:[1.25003,1.75,0,0],93:[1.25003,1.75,0,0],123:[1.25003,1.75,0,0],125:[1.25003,1.75,0,0],710:[0,.825,0,0],732:[0,.825,0,0],770:[0,.825,0,0],771:[0,.825,0,0],8730:[1.25003,1.75,0,0],8968:[1.25003,1.75,0,0],8969:[1.25003,1.75,0,0],8970:[1.25003,1.75,0,0],8971:[1.25003,1.75,0,0],9115:[.64502,1.155,0,0],9116:[1e-5,.6,0,0],9117:[.64502,1.155,0,0],9118:[.64502,1.155,0,0],9119:[1e-5,.6,0,0],9120:[.64502,1.155,0,0],9121:[.64502,1.155,0,0],9122:[-99e-5,.601,0,0],9123:[.64502,1.155,0,0],9124:[.64502,1.155,0,0],9125:[-99e-5,.601,0,0],9126:[.64502,1.155,0,0],9127:[1e-5,.9,0,0],9128:[.65002,1.15,0,0],9129:[.90001,0,0,0],9130:[0,.3,0,0],9131:[1e-5,.9,0,0],9132:[.65002,1.15,0,0],9133:[.90001,0,0,0],9143:[.88502,.915,0,0],10216:[1.25003,1.75,0,0],10217:[1.25003,1.75,0,0],57344:[-.00499,.605,0,0],57345:[-.00499,.605,0,0],57680:[0,.12,0,0],57681:[0,.12,0,0],57682:[0,.12,0,0],57683:[0,.12,0,0]},"Typewriter-Regular":{33:[0,.61111,0,0],34:[0,.61111,0,0],35:[0,.61111,0,0],36:[.08333,.69444,0,0],37:[.08333,.69444,0,0],38:[0,.61111,0,0],39:[0,.61111,0,0],40:[.08333,.69444,0,0],41:[.08333,.69444,0,0],42:[0,.52083,0,0],43:[-.08056,.53055,0,0],44:[.13889,.125,0,0],45:[-.08056,.53055,0,0],46:[0,.125,0,0],47:[.08333,.69444,0,0],48:[0,.61111,0,0],49:[0,.61111,0,0],50:[0,.61111,0,0],51:[0,.61111,0,0],52:[0,.61111,0,0],53:[0,.61111,0,0],54:[0,.61111,0,0],55:[0,.61111,0,0],56:[0,.61111,0,0],57:[0,.61111,0,0],58:[0,.43056,0,0],59:[.13889,.43056,0,0],60:[-.05556,.55556,0,0],61:[-.19549,.41562,0,0],62:[-.05556,.55556,0,0],63:[0,.61111,0,0],64:[0,.61111,0,0],65:[0,.61111,0,0],66:[0,.61111,0,0],67:[0,.61111,0,0],68:[0,.61111,0,0],69:[0,.61111,0,0],70:[0,.61111,0,0],71:[0,.61111,0,0],72:[0,.61111,0,0],73:[0,.61111,0,0],74:[0,.61111,0,0],75:[0,.61111,0,0],76:[0,.61111,0,0],77:[0,.61111,0,0],78:[0,.61111,0,0],79:[0,.61111,0,0],80:[0,.61111,0,0],81:[.13889,.61111,0,0],82:[0,.61111,0,0],83:[0,.61111,0,0],84:[0,.61111,0,0],85:[0,.61111,0,0],86:[0,.61111,0,0],87:[0,.61111,0,0],88:[0,.61111,0,0],89:[0,.61111,0,0],90:[0,.61111,0,0],91:[.08333,.69444,0,0],92:[.08333,.69444,0,0],93:[.08333,.69444,0,0],94:[0,.61111,0,0],95:[.09514,0,0,0],96:[0,.61111,0,0],97:[0,.43056,0,0],98:[0,.61111,0,0],99:[0,.43056,0,0],100:[0,.61111,0,0],101:[0,.43056,0,0],102:[0,.61111,0,0],103:[.22222,.43056,0,0],104:[0,.61111,0,0],105:[0,.61111,0,0],106:[.22222,.61111,0,0],107:[0,.61111,0,0],108:[0,.61111,0,0],109:[0,.43056,0,0],110:[0,.43056,0,0],111:[0,.43056,0,0],112:[.22222,.43056,0,0],113:[.22222,.43056,0,0],114:[0,.43056,0,0],115:[0,.43056,0,0],116:[0,.55358,0,0],117:[0,.43056,0,0],118:[0,.43056,0,0],119:[0,.43056,0,0],120:[0,.43056,0,0],121:[.22222,.43056,0,0],122:[0,.43056,0,0],123:[.08333,.69444,0,0],124:[.08333,.69444,0,0],125:[.08333,.69444,0,0],126:[0,.61111,0,0],127:[0,.61111,0,0],305:[0,.43056,0,0],567:[.22222,.43056,0,0],768:[0,.61111,0,0],769:[0,.61111,0,0],770:[0,.61111,0,0],771:[0,.61111,0,0],772:[0,.56555,0,0],774:[0,.61111,0,0],776:[0,.61111,0,0],778:[0,.61111,0,0],780:[0,.56597,0,0],915:[0,.61111,0,0],916:[0,.61111,0,0],920:[0,.61111,0,0],923:[0,.61111,0,0],926:[0,.61111,0,0],928:[0,.61111,0,0],931:[0,.61111,0,0],933:[0,.61111,0,0],934:[0,.61111,0,0],936:[0,.61111,0,0],937:[0,.61111,0,0],2018:[0,.61111,0,0],2019:[0,.61111,0,0],8242:[0,.61111,0,0]}}},{}],43:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}function r(e,n,r){"string"==typeof e&&(e=[e]),"number"==typeof n&&(n={numArgs:n});for(var i={numArgs:n.numArgs,argTypes:n.argTypes,greediness:n.greediness===undefined?1:n.greediness,allowedInText:!!n.allowedInText,allowedInMath:n.allowedInMath,numOptionalArgs:n.numOptionalArgs||0,infix:!!n.infix,handler:r},a=0;a<e.length;++a)t.exports[e[a]]=i}var i=n(e("./utils")),a=n(e("./ParseError")),o=n(e("./ParseNode")),s=function(e){return"ordgroup"===e.type?e.value:[e]};r("\\sqrt",{numArgs:1,numOptionalArgs:1
+},function(e,t){var n=t[0];return{type:"sqrt",body:t[1],index:n}});var l={"\\text":undefined,"\\textrm":"mathrm","\\textsf":"mathsf","\\texttt":"mathtt","\\textnormal":"mathrm","\\textbf":"mathbf","\\textit":"textit"};r(["\\text","\\textrm","\\textsf","\\texttt","\\textnormal","\\textbf","\\textit"],{numArgs:1,argTypes:["text"],greediness:2,allowedInText:!0},function(e,t){var n=t[0];return{type:"text",body:s(n),style:l[e.funcName]}}),r("\\textcolor",{numArgs:2,allowedInText:!0,greediness:3,argTypes:["color","original"]},function(e,t){var n=t[0],r=t[1];return{type:"color",color:n.value,value:s(r)}}),r("\\color",{numArgs:1,allowedInText:!0,greediness:3,argTypes:["color"]},null),r("\\overline",{numArgs:1},function(e,t){return{type:"overline",body:t[0]}}),r("\\underline",{numArgs:1},function(e,t){return{type:"underline",body:t[0]}}),r("\\rule",{numArgs:2,numOptionalArgs:1,argTypes:["size","size","size"]},function(e,t){var n=t[0],r=t[1],i=t[2];return{type:"rule",shift:n&&n.value,width:r.value,height:i.value}}),r(["\\kern","\\mkern"],{numArgs:1,argTypes:["size"]},function(e,t){return{type:"kern",dimension:t[0].value}}),r("\\KaTeX",{numArgs:0},function(){return{type:"katex"}}),r("\\phantom",{numArgs:1},function(e,t){var n=t[0];return{type:"phantom",value:s(n)}}),r(["\\mathord","\\mathbin","\\mathrel","\\mathopen","\\mathclose","\\mathpunct","\\mathinner"],{numArgs:1},function(e,t){var n=t[0];return{type:"mclass",mclass:"m"+e.funcName.substr(5),value:s(n)}}),r("\\stackrel",{numArgs:2},function(e,t){var n=t[0],r=t[1],i=new o["default"]("op",{type:"op",limits:!0,alwaysHandleSupSub:!0,symbol:!1,value:s(r)},r.mode);return{type:"mclass",mclass:"mrel",value:[new o["default"]("supsub",{base:i,sup:n,sub:null},n.mode)]}}),r("\\bmod",{numArgs:0},function(){return{type:"mod",modType:"bmod",value:null}}),r(["\\pod","\\pmod","\\mod"],{numArgs:1},function(e,t){var n=t[0];return{type:"mod",modType:e.funcName.substr(1),value:s(n)}});var u={"\\bigl":{mclass:"mopen",size:1},"\\Bigl":{mclass:"mopen",size:2},"\\biggl":{mclass:"mopen",size:3},"\\Biggl":{mclass:"mopen",size:4},"\\bigr":{mclass:"mclose",size:1},"\\Bigr":{mclass:"mclose",size:2},"\\biggr":{mclass:"mclose",size:3},"\\Biggr":{mclass:"mclose",size:4},"\\bigm":{mclass:"mrel",size:1},"\\Bigm":{mclass:"mrel",size:2},"\\biggm":{mclass:"mrel",size:3},"\\Biggm":{mclass:"mrel",size:4},"\\big":{mclass:"mord",size:1},"\\Big":{mclass:"mord",size:2},"\\bigg":{mclass:"mord",size:3},"\\Bigg":{mclass:"mord",size:4}},d=["(",")","[","\\lbrack","]","\\rbrack","\\{","\\lbrace","\\}","\\rbrace","\\lfloor","\\rfloor","\\lceil","\\rceil","<",">","\\langle","\\rangle","\\lt","\\gt","\\lvert","\\rvert","\\lVert","\\rVert","\\lgroup","\\rgroup","\\lmoustache","\\rmoustache","/","\\backslash","|","\\vert","\\|","\\Vert","\\uparrow","\\Uparrow","\\downarrow","\\Downarrow","\\updownarrow","\\Updownarrow","."],c={"\\Bbb":"\\mathbb","\\bold":"\\mathbf","\\frak":"\\mathfrak"};r(["\\blue","\\orange","\\pink","\\red","\\green","\\gray","\\purple","\\blueA","\\blueB","\\blueC","\\blueD","\\blueE","\\tealA","\\tealB","\\tealC","\\tealD","\\tealE","\\greenA","\\greenB","\\greenC","\\greenD","\\greenE","\\goldA","\\goldB","\\goldC","\\goldD","\\goldE","\\redA","\\redB","\\redC","\\redD","\\redE","\\maroonA","\\maroonB","\\maroonC","\\maroonD","\\maroonE","\\purpleA","\\purpleB","\\purpleC","\\purpleD","\\purpleE","\\mintA","\\mintB","\\mintC","\\grayA","\\grayB","\\grayC","\\grayD","\\grayE","\\grayF","\\grayG","\\grayH","\\grayI","\\kaBlue","\\kaGreen"],{numArgs:1,allowedInText:!0,greediness:3},function(e,t){var n=t[0];return{type:"color",color:"katex-"+e.funcName.slice(1),value:s(n)}}),r(["\\arcsin","\\arccos","\\arctan","\\arctg","\\arcctg","\\arg","\\ch","\\cos","\\cosec","\\cosh","\\cot","\\cotg","\\coth","\\csc","\\ctg","\\cth","\\deg","\\dim","\\exp","\\hom","\\ker","\\lg","\\ln","\\log","\\sec","\\sin","\\sinh","\\sh","\\tan","\\tanh","\\tg","\\th"],{numArgs:0},function(e){return{type:"op",limits:!1,symbol:!1,body:e.funcName}}),r(["\\det","\\gcd","\\inf","\\lim","\\liminf","\\limsup","\\max","\\min","\\Pr","\\sup"],{numArgs:0},function(e){return{type:"op",limits:!0,symbol:!1,body:e.funcName}}),r(["\\int","\\iint","\\iiint","\\oint"],{numArgs:0},function(e){return{type:"op",limits:!1,symbol:!0,body:e.funcName}}),r(["\\coprod","\\bigvee","\\bigwedge","\\biguplus","\\bigcap","\\bigcup","\\intop","\\prod","\\sum","\\bigotimes","\\bigoplus","\\bigodot","\\bigsqcup","\\smallint"],{numArgs:0},function(e){return{type:"op",limits:!0,symbol:!0,body:e.funcName}}),r("\\mathop",{numArgs:1},function(e,t){var n=t[0];return{type:"op",limits:!1,symbol:!1,value:s(n)}}),r(["\\dfrac","\\frac","\\tfrac","\\dbinom","\\binom","\\tbinom","\\\\atopfrac"],{numArgs:2,greediness:2},function(e,t){var n=t[0],r=t[1],i=void 0,a=null,o=null,s="auto";switch(e.funcName){case"\\dfrac":case"\\frac":case"\\tfrac":i=!0;break;case"\\\\atopfrac":i=!1;break;case"\\dbinom":case"\\binom":case"\\tbinom":i=!1,a="(",o=")";break;default:throw new Error("Unrecognized genfrac command")}switch(e.funcName){case"\\dfrac":case"\\dbinom":s="display";break;case"\\tfrac":case"\\tbinom":s="text"}return{type:"genfrac",numer:n,denom:r,hasBarLine:i,leftDelim:a,rightDelim:o,size:s}}),r(["\\llap","\\rlap"],{numArgs:1,allowedInText:!0},function(e,t){var n=t[0];return{type:e.funcName.slice(1),body:n}});var h=function(e,t){if(i["default"].contains(d,e.value))return e;throw new a["default"]("Invalid delimiter: '"+e.value+"' after '"+t.funcName+"'",e)};r(["\\bigl","\\Bigl","\\biggl","\\Biggl","\\bigr","\\Bigr","\\biggr","\\Biggr","\\bigm","\\Bigm","\\biggm","\\Biggm","\\big","\\Big","\\bigg","\\Bigg"],{numArgs:1},function(e,t){var n=h(t[0],e);return{type:"delimsizing",size:u[e.funcName].size,mclass:u[e.funcName].mclass,value:n.value}}),r(["\\left","\\right"],{numArgs:1},function(e,t){return{type:"leftright",value:h(t[0],e).value}}),r("\\middle",{numArgs:1},function(e,t){var n=h(t[0],e);if(!e.parser.leftrightDepth)throw new a["default"]("\\middle without preceding \\left",n);return{type:"middle",value:n.value}}),r(["\\tiny","\\scriptsize","\\footnotesize","\\small","\\normalsize","\\large","\\Large","\\LARGE","\\huge","\\Huge"],0,null),r(["\\displaystyle","\\textstyle","\\scriptstyle","\\scriptscriptstyle"],0,null),r(["\\rm","\\sf","\\tt","\\bf","\\it"],0,null),r(["\\mathrm","\\mathit","\\mathbf","\\mathbb","\\mathcal","\\mathfrak","\\mathscr","\\mathsf","\\mathtt","\\Bbb","\\bold","\\frak"],{numArgs:1,greediness:2},function(e,t){var n=t[0],r=e.funcName;return r in c&&(r=c[r]),{type:"font",font:r.slice(1),body:n}}),r(["\\acute","\\grave","\\ddot","\\tilde","\\bar","\\breve","\\check","\\hat","\\vec","\\dot","\\widehat","\\widetilde","\\overrightarrow","\\overleftarrow","\\Overrightarrow","\\overleftrightarrow","\\overgroup","\\overlinesegment","\\overleftharpoon","\\overrightharpoon"],{numArgs:1},function(e,t){var n=t[0],r=!i["default"].contains(["\\acute","\\grave","\\ddot","\\tilde","\\bar","\\breve","\\check","\\hat","\\vec","\\dot"],e.funcName),a=!r||i["default"].contains(["\\widehat","\\widetilde"],e.funcName);return{type:"accent",label:e.funcName,isStretchy:r,isShifty:a,value:s(n),base:n}}),r(["\\'","\\`","\\^","\\~","\\=","\\u","\\.",'\\"',"\\r","\\H","\\v"],{numArgs:1,allowedInText:!0,allowedInMath:!1},function(e,t){var n=t[0];return{type:"accent",label:e.funcName,isStretchy:!1,isShifty:!0,value:s(n),base:n}}),r(["\\overbrace","\\underbrace"],{numArgs:1},function(e,t){var n=t[0];return{type:"horizBrace",label:e.funcName,isOver:/^\\over/.test(e.funcName),base:n}}),r(["\\underleftarrow","\\underrightarrow","\\underleftrightarrow","\\undergroup","\\underlinesegment","\\undertilde"],{numArgs:1},function(e,t){var n=t[0];return{type:"accentUnder",label:e.funcName,value:s(n),body:n}}),r(["\\xleftarrow","\\xrightarrow","\\xLeftarrow","\\xRightarrow","\\xleftrightarrow","\\xLeftrightarrow","\\xhookleftarrow","\\xhookrightarrow","\\xmapsto","\\xrightharpoondown","\\xrightharpoonup","\\xleftharpoondown","\\xleftharpoonup","\\xrightleftharpoons","\\xleftrightharpoons","\\xLongequal","\\xtwoheadrightarrow","\\xtwoheadleftarrow","\\xLongequal","\\xtofrom"],{numArgs:1,numOptionalArgs:1},function(e,t){var n=t[0],r=t[1];return{type:"xArrow",label:e.funcName,body:r,below:n}}),r(["\\cancel","\\bcancel","\\xcancel","\\sout","\\fbox"],{numArgs:1},function(e,t){var n=t[0];return{type:"enclose",label:e.funcName,body:n}}),r(["\\over","\\choose","\\atop"],{numArgs:0,infix:!0},function(e){var t=void 0;switch(e.funcName){case"\\over":t="\\frac";break;case"\\choose":t="\\binom";break;case"\\atop":t="\\\\atopfrac";break;default:throw new Error("Unrecognized infix genfrac command")}return{type:"infix",replaceWith:t,token:e.token}}),r(["\\\\","\\cr"],{numArgs:0,numOptionalArgs:1,argTypes:["size"]},function(e,t){return{type:"cr",size:t[0]}}),r(["\\begin","\\end"],{numArgs:1,argTypes:["text"]},function(e,t){var n=t[0];if("ordgroup"!==n.type)throw new a["default"]("Invalid environment name",n);for(var r="",i=0;i<n.value.length;++i)r+=n.value[i].value;return{type:"environment",name:r,nameGroup:n}})},{"./ParseError":29,"./ParseNode":30,"./utils":51}],44:[function(e,t){function n(e,n){t.exports[e]=n}n("\\bgroup","{"),n("\\egroup","}"),n("\\begingroup","{"),n("\\endgroup","}"),n("\\mkern","\\kern"),n("\\overset","\\mathop{#2}\\limits^{#1}"),n("\\underset","\\mathop{#2}\\limits_{#1}"),n("\\boxed","\\fbox{\\displaystyle{#1}}"),n("\\iff","\\;\\Longleftrightarrow\\;"),n("\\implies","\\;\\Longrightarrow\\;"),n("\\impliedby","\\;\\Longleftarrow\\;"),n("\\ordinarycolon",":"),n("\\vcentcolon","\\mathrel{\\mathop\\ordinarycolon}"),n("\\dblcolon","\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon"),n("\\coloneqq","\\vcentcolon\\mathrel{\\mkern-1.2mu}="),n("\\Coloneqq","\\dblcolon\\mathrel{\\mkern-1.2mu}="),n("\\coloneq","\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}"),n("\\Coloneq","\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}"),n("\\eqqcolon","=\\mathrel{\\mkern-1.2mu}\\vcentcolon"),n("\\Eqqcolon","=\\mathrel{\\mkern-1.2mu}\\dblcolon"),n("\\eqcolon","\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon"),n("\\Eqcolon","\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon"),n("\\colonapprox","\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx"),n("\\Colonapprox","\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx"),n("\\colonsim","\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim"),n("\\Colonsim","\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim"),n("\\ratio","\\vcentcolon"),n("\\coloncolon","\\dblcolon"),n("\\colonequals","\\coloneqq"),n("\\coloncolonequals","\\Coloneqq"),n("\\equalscolon","\\eqqcolon"),n("\\equalscoloncolon","\\Eqqcolon"),n("\\colonminus","\\coloneq"),n("\\coloncolonminus","\\Coloneq"),n("\\minuscolon","\\eqcolon"),n("\\minuscoloncolon","\\Eqcolon"),n("\\coloncolonapprox","\\Colonapprox"),n("\\coloncolonsim","\\Colonsim"),n("\\simcolon","\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon"),n("\\simcoloncolon","\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon"),n("\\approxcolon","\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon"),n("\\approxcoloncolon","\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon")},{}],45:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("babel-runtime/helpers/classCallCheck")),i=n(e("babel-runtime/helpers/createClass")),a=n(e("./utils")),o=function(){function e(t,n){(0,r["default"])(this,e),this.type=t,this.attributes={},this.children=n||[]}return(0,i["default"])(e,[{key:"setAttribute",value:function(e,t){this.attributes[e]=t}},{key:"toNode",value:function(){var e=document.createElementNS("http://www.w3.org/1998/Math/MathML",this.type);for(var t in this.attributes)Object.prototype.hasOwnProperty.call(this.attributes,t)&&e.setAttribute(t,this.attributes[t]);for(var n=0;n<this.children.length;n++)e.appendChild(this.children[n].toNode());return e}},{key:"toMarkup",value:function(){var e="<"+this.type;for(var t in this.attributes)Object.prototype.hasOwnProperty.call(this.attributes,t)&&(e+=" "+t+'="',e+=a["default"].escape(this.attributes[t]),e+='"');e+=">";for(var n=0;n<this.children.length;n++)e+=this.children[n].toMarkup();return e+="</"+this.type+">"}}]),e}(),s=function(){function e(t){(0,r["default"])(this,e),this.text=t}return(0,i["default"])(e,[{key:"toNode",value:function(){return document.createTextNode(this.text)}},{key:"toMarkup",value:function(){return a["default"].escape(this.text)}}]),e}();t.exports={MathNode:o,TextNode:s}},{"./utils":51,"babel-runtime/helpers/classCallCheck":4,"babel-runtime/helpers/createClass":5}],46:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("./Parser")),i=function(e,t){if(!("string"==typeof e||e instanceof String))throw new TypeError("KaTeX can only parse string typed expression");return new r["default"](e,t).parse()};t.exports=i},{"./Parser":31}],47:[function(e,t){var n=e("./buildCommon"),r=e("./mathMLTree"),i=e("./utils"),a={widehat:"^",widetilde:"~",undertilde:"~",overleftarrow:"\u2190",underleftarrow:"\u2190",xleftarrow:"\u2190",overrightarrow:"\u2192",underrightarrow:"\u2192",xrightarrow:"\u2192",underbrace:"\u23b5",overbrace:"\u23de",overleftrightarrow:"\u2194",underleftrightarrow:"\u2194",xleftrightarrow:"\u2194",Overrightarrow:"\u21d2",xRightarrow:"\u21d2",overleftharpoon:"\u21bc",xleftharpoonup:"\u21bc",overrightharpoon:"\u21c0",xrightharpoonup:"\u21c0",xLeftarrow:"\u21d0",xLeftrightarrow:"\u21d4",xhookleftarrow:"\u21a9",xhookrightarrow:"\u21aa",xmapsto:"\u21a6",xrightharpoondown:"\u21c1",xleftharpoondown:"\u21bd",xrightleftharpoons:"\u21cc",xleftrightharpoons:"\u21cb",xtwoheadleftarrow:"\u219e",xtwoheadrightarrow:"\u21a0",xLongequal:"=",xtofrom:"\u21c4"},o=function(e){var t=new r.MathNode("mo",[new r.TextNode(a[e.substr(1)])]);return t.setAttribute("stretchy","true"),t},s={overleftarrow:[.522,0,"leftarrow",.5],underleftarrow:[.522,0,"leftarrow",.5],xleftarrow:[.261,.261,"leftarrow",.783],overrightarrow:[.522,0,"rightarrow",.5],underrightarrow:[.522,0,"rightarrow",.5],xrightarrow:[.261,.261,"rightarrow",.783],overbrace:[.548,0,"overbrace",1.6],underbrace:[.548,0,"underbrace",1.6],overleftrightarrow:[.522,0,"leftrightarrow",.5],underleftrightarrow:[.522,0,"leftrightarrow",.5],xleftrightarrow:[.261,.261,"leftrightarrow",.783],Overrightarrow:[.56,0,"doublerightarrow",.5],xLeftarrow:[.28,.28,"doubleleftarrow",.783],xRightarrow:[.28,.28,"doublerightarrow",.783],xLeftrightarrow:[.28,.28,"doubleleftrightarrow",.955],overleftharpoon:[.522,0,"leftharpoon",.5],overrightharpoon:[.522,0,"rightharpoon",.5],xleftharpoonup:[.261,.261,"leftharpoon",.783],xrightharpoonup:[.261,.261,"rightharpoon",.783],xhookleftarrow:[.261,.261,"hookleftarrow",.87],xhookrightarrow:[.261,.261,"hookrightarrow",.87],overlinesegment:[.414,0,"linesegment",.5],underlinesegment:[.414,0,"linesegment",.5],xmapsto:[.261,.261,"mapsto",.783],xrightharpoondown:[.261,.261,"rightharpoondown",.783],xleftharpoondown:[.261,.261,"leftharpoondown",.783],xrightleftharpoons:[.358,.358,"rightleftharpoons",.716],xleftrightharpoons:[.358,.358,"leftrightharpoons",.716],overgroup:[.342,0,"overgroup",.87],undergroup:[.342,0,"undergroup",.87],xtwoheadleftarrow:[.167,.167,"twoheadleftarrow",.86],xtwoheadrightarrow:[.167,.167,"twoheadrightarrow",.86],xLongequal:[.167,.167,"longequal",.5],xtofrom:[.264,.264,"tofrom",.86]},l={doubleleftarrow:"<path d='M262 157\nl10-10c34-36 62.7-77 86-123 3.3-8 5-13.3 5-16 0-5.3-6.7-8-20-8-7.3\n 0-12.2.5-14.5 1.5-2.3 1-4.8 4.5-7.5 10.5-49.3 97.3-121.7 169.3-217 216-28\n 14-57.3 25-88 33-6.7 2-11 3.8-13 5.5-2 1.7-3 4.2-3 7.5s1 5.8 3 7.5\nc2 1.7 6.3 3.5 13 5.5 68 17.3 128.2 47.8 180.5 91.5 52.3 43.7 93.8 96.2 124.5\n 157.5 9.3 8 15.3 12.3 18 13h6c12-.7 18-4 18-10 0-2-1.7-7-5-15-23.3-46-52-87\n-86-123l-10-10h399738v-40H218c328 0 0 0 0 0l-10-8c-26.7-20-65.7-43-117-69 2.7\n-2 6-3.7 10-5 36.7-16 72.3-37.3 107-64l10-8h399782v-40z\nm8 0v40h399730v-40zm0 194v40h399730v-40z'/>",doublerightarrow:"<path d='M399738 392l\n-10 10c-34 36-62.7 77-86 123-3.3 8-5 13.3-5 16 0 5.3 6.7 8 20 8 7.3 0 12.2-.5\n 14.5-1.5 2.3-1 4.8-4.5 7.5-10.5 49.3-97.3 121.7-169.3 217-216 28-14 57.3-25 88\n-33 6.7-2 11-3.8 13-5.5 2-1.7 3-4.2 3-7.5s-1-5.8-3-7.5c-2-1.7-6.3-3.5-13-5.5-68\n-17.3-128.2-47.8-180.5-91.5-52.3-43.7-93.8-96.2-124.5-157.5-9.3-8-15.3-12.3-18\n-13h-6c-12 .7-18 4-18 10 0 2 1.7 7 5 15 23.3 46 52 87 86 123l10 10H0v40h399782\nc-328 0 0 0 0 0l10 8c26.7 20 65.7 43 117 69-2.7 2-6 3.7-10 5-36.7 16-72.3 37.3\n-107 64l-10 8H0v40zM0 157v40h399730v-40zm0 194v40h399730v-40z'/>",leftarrow:"<path d='M400000 241H110l3-3c68.7-52.7 113.7-120\n 135-202 4-14.7 6-23 6-25 0-7.3-7-11-21-11-8 0-13.2.8-15.5 2.5-2.3 1.7-4.2 5.8\n-5.5 12.5-1.3 4.7-2.7 10.3-4 17-12 48.7-34.8 92-68.5 130S65.3 228.3 18 247\nc-10 4-16 7.7-18 11 0 8.7 6 14.3 18 17 47.3 18.7 87.8 47 121.5 85S196 441.3 208\n 490c.7 2 1.3 5 2 9s1.2 6.7 1.5 8c.3 1.3 1 3.3 2 6s2.2 4.5 3.5 5.5c1.3 1 3.3\n 1.8 6 2.5s6 1 10 1c14 0 21-3.7 21-11 0-2-2-10.3-6-25-20-79.3-65-146.7-135-202\n l-3-3h399890zM100 241v40h399900v-40z'/>",rightarrow:"<path d='M0 241v40h399891c-47.3 35.3-84 78-110 128\n-16.7 32-27.7 63.7-33 95 0 1.3-.2 2.7-.5 4-.3 1.3-.5 2.3-.5 3 0 7.3 6.7 11 20\n 11 8 0 13.2-.8 15.5-2.5 2.3-1.7 4.2-5.5 5.5-11.5 2-13.3 5.7-27 11-41 14.7-44.7\n 39-84.5 73-119.5s73.7-60.2 119-75.5c6-2 9-5.7 9-11s-3-9-9-11c-45.3-15.3-85\n-40.5-119-75.5s-58.3-74.8-73-119.5c-4.7-14-8.3-27.3-11-40-1.3-6.7-3.2-10.8-5.5\n-12.5-2.3-1.7-7.5-2.5-15.5-2.5-14 0-21 3.7-21 11 0 2 2 10.3 6 25 20.7 83.3 67\n 151.7 139 205zm0 0v40h399900v-40z'/>"},u={bcancel:"<line x1='0' y1='0' x2='100%' y2='100%' stroke-width='0.046em'/>",cancel:"<line x1='0' y1='100%' x2='100%' y2='0' stroke-width='0.046em'/>",doubleleftarrow:"><svg viewBox='0 0 400000 549'\npreserveAspectRatio='xMinYMin slice'>"+l.doubleleftarrow+"</svg>",doubleleftrightarrow:"><svg width='50.1%' viewBox='0 0 400000 549'\npreserveAspectRatio='xMinYMin slice'>"+l.doubleleftarrow+"</svg>\n<svg x='50%' width='50%' viewBox='0 0 400000 549' preserveAspectRatio='xMaxYMin\n slice'>"+l.doublerightarrow+"</svg>",doublerightarrow:"><svg viewBox='0 0 400000 549'\npreserveAspectRatio='xMaxYMin slice'>"+l.doublerightarrow+"</svg>",hookleftarrow:"><svg width='50.1%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'>"+l.leftarrow+"</svg>\n<svg x='50%' width='50%' viewBox='0 0 400000 522' preserveAspectRatio='xMaxYMin\n slice'><path d='M399859 241c-764 0 0 0 0 0 40-3.3 68.7\n -15.7 86-37 10-12 15-25.3 15-40 0-22.7-9.8-40.7-29.5-54-19.7-13.3-43.5-21-71.5\n -23-17.3-1.3-26-8-26-20 0-13.3 8.7-20 26-20 38 0 71 11.2 99 33.5 0 0 7 5.6 21\n 16.7 14 11.2 21 33.5 21 66.8s-14 61.2-42 83.5c-28 22.3-61 33.5-99 33.5L0 241z\n M0 281v-40h399859v40z'/></svg>",hookrightarrow:"><svg width='50.1%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'><path d='M400000 281\nH103s-33-11.2-61-33.5S0 197.3 0 164s14.2-61.2 42.5-83.5C70.8 58.2 104 47 142 47\nc16.7 0 25 6.7 25 20 0 12-8.7 18.7-26 20-40 3.3-68.7 15.7-86 37-10 12-15 25.3\n-15 40 0 22.7 9.8 40.7 29.5 54 19.7 13.3 43.5 21 71.5 23h399859zM103 281v-40\nh399897v40z'/></svg><svg x='50%' width='50%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMaxYMin slice'>"+l.rightarrow+"</svg>",leftarrow:"><svg viewBox='0 0 400000 522' preserveAspectRatio='xMinYMin\n slice'>"+l.leftarrow+"</svg>",leftharpoon:"><svg viewBox='0 0 400000 522' preserveAspectRatio='xMinYMin\n slice'><path d='M0 267c.7 5.3 3 10 7 14h399993v-40H93c3.3\n-3.3 10.2-9.5 20.5-18.5s17.8-15.8 22.5-20.5c50.7-52 88-110.3 112-175 4-11.3 5\n-18.3 3-21-1.3-4-7.3-6-18-6-8 0-13 .7-15 2s-4.7 6.7-8 16c-42 98.7-107.3 174.7\n-196 228-6.7 4.7-10.7 8-12 10-1.3 2-2 5.7-2 11zm100-26v40h399900v-40z'/></svg>",leftharpoondown:"><svg viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'><path d=\"M7 241c-4 4-6.333 8.667-7 14\n 0 5.333.667 9 2 11s5.333 5.333 12 10c90.667 54 156 130 196 228 3.333 10.667\n 6.333 16.333 9 17 2 .667 5 1 9 1h5c10.667 0 16.667-2 18-6 2-2.667 1-9.667-3-21\n -32-87.333-82.667-157.667-152-211l-3-3h399907v-40z\nM93 281 H400000 v-40L7 241z\"/></svg>",leftrightarrow:"><svg width='50.1%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'>"+l.leftarrow+"</svg>\n<svg x='50%' width='50%' viewBox='0 0 400000 522' preserveAspectRatio='xMaxYMin\n slice'>"+l.rightarrow+"</svg>",leftrightharpoons:"><svg width='50.1%' viewBox='0 0 400000 716'\npreserveAspectRatio='xMinYMin slice'><path d='M0 267c.7 5.3\n 3 10 7 14h399993v-40H93c3.3-3.3 10.2-9.5 20.5-18.5s17.8-15.8 22.5-20.5c50.7-52\n 88-110.3 112-175 4-11.3 5-18.3 3-21-1.3-4-7.3-6-18-6-8 0-13 .7-15 2s-4.7 6.7-8\n 16c-42 98.7-107.3 174.7-196 228-6.7 4.7-10.7 8-12 10-1.3 2-2 5.7-2 11zm100-26\nv40h399900v-40zM0 435v40h400000v-40zm0 0v40h400000v-40z'/></svg>\n<svg x='50%' width='50%' viewBox='0 0 400000 716' preserveAspectRatio='xMaxYMin\n slice'><path d='M399747 705c0 7.3 6.7 11 20 11 8 0 13-.8\n 15-2.5s4.7-6.8 8-15.5c40-94 99.3-166.3 178-217 13.3-8 20.3-12.3 21-13 5.3-3.3\n 8.5-5.8 9.5-7.5 1-1.7 1.5-5.2 1.5-10.5s-2.3-10.3-7-15H0v40h399908c-34 25.3\n-64.7 57-92 95-27.3 38-48.7 77.7-64 119-3.3 8.7-5 14-5 16zM0 435v40h399900v-40z\nm0-194v40h400000v-40zm0 0v40h400000v-40z'/></svg>",linesegment:"><svg width='50.1%' viewBox='0 0 400000 414'\npreserveAspectRatio='xMinYMin slice'><path d='M40 187V40H0\nv334h40V227h399960v-40zm0 0V40H0v334h40V227h399960v-40z'/></svg><svg x='50%'\nwidth='50%' viewBox='0 0 400000 414' preserveAspectRatio='xMaxYMin slice'>\n<path d='M0 187v40h399960v147h40V40h-40v147zm0\n 0v40h399960v147h40V40h-40v147z'/></svg>",longequal:" viewBox='0 0 100 334' preserveAspectRatio='none'>\n<path d='M0 50h100v40H0zm0 194h100v40H0z'/>",mapsto:"><svg width='50.1%' viewBox='0 0 400000 522'\npreserveAspectRatio='xMinYMin slice'><path d='M40 241c740\n 0 0 0 0 0v-75c0-40.7-.2-64.3-.5-71-.3-6.7-2.2-11.7-5.5-15-4-4-8.7-6-14-6-5.3 0\n-10 2-14 6C2.7 83.3.8 91.3.5 104 .2 116.7 0 169 0 261c0 114 .7 172.3 2 175 4 8\n 10 12 18 12 5.3 0 10-2 14-6 3.3-3.3 5.2-8.3 5.5-15 .3-6.7.5-30.3.5-71v-75\nh399960zm0 0v40h399960v-40z'/></svg><svg x='50%' width='50%' viewBox='0 0\n 400000 522' preserveAspectRatio='xMaxYMin slice'>"+l.rightarrow+"</svg>",overbrace:"><svg width='25.5%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMinYMin slice'><path d='M6 548l-6-6\nv-35l6-11c56-104 135.3-181.3 238-232 57.3-28.7 117-45 179-50h399577v120H403\nc-43.3 7-81 15-113 26-100.7 33-179.7 91-237 174-2.7 5-6 9-10 13-.7 1-7.3 1-20 1\nH6z'/></svg><svg x='25%' width='50%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMidYMin slice'><path d='M200428 334\nc-100.7-8.3-195.3-44-280-108-55.3-42-101.7-93-139-153l-9-14c-2.7 4-5.7 8.7-9 14\n-53.3 86.7-123.7 153-211 199-66.7 36-137.3 56.3-212 62H0V214h199568c178.3-11.7\n 311.7-78.3 403-201 6-8 9.7-12 11-12 .7-.7 6.7-1 18-1s17.3.3 18 1c1.3 0 5 4 11\n 12 44.7 59.3 101.3 106.3 170 141s145.3 54.3 229 60h199572v120z'/></svg>\n<svg x='74.9%' width='24.1%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMaxYMin slice'><path d='M400000 542l\n-6 6h-17c-12.7 0-19.3-.3-20-1-4-4-7.3-8.3-10-13-35.3-51.3-80.8-93.8-136.5-127.5\ns-117.2-55.8-184.5-66.5c-.7 0-2-.3-4-1-18.7-2.7-76-4.3-172-5H0V214h399571l6 1\nc124.7 8 235 61.7 331 161 31.3 33.3 59.7 72.7 85 118l7 13v35z'/></svg>",overgroup:"><svg width='50.1%' viewBox='0 0 400000 342'\npreserveAspectRatio='xMinYMin slice'><path d='M400000 80\nH435C64 80 168.3 229.4 21 260c-5.9 1.2-18 0-18 0-2 0-3-1-3-3v-38C76 61 257 0\n 435 0h399565z'/></svg><svg x='50%' width='50%' viewBox='0 0 400000 342'\npreserveAspectRatio='xMaxYMin slice'><path d='M0 80h399565\nc371 0 266.7 149.4 414 180 5.9 1.2 18 0 18 0 2 0 3-1 3-3v-38\nc-76-158-257-219-435-219H0z'/></svg>",rightarrow:"><svg viewBox='0 0 400000 522' preserveAspectRatio='xMaxYMin\n slice'>"+l.rightarrow+"</svg>",rightharpoon:"><svg viewBox='0 0 400000 522' preserveAspectRatio='xMaxYMin\n slice'><path d='M0 241v40h399993c4.7-4.7 7-9.3 7-14 0-9.3\n-3.7-15.3-11-18-92.7-56.7-159-133.7-199-231-3.3-9.3-6-14.7-8-16-2-1.3-7-2-15-2\n-10.7 0-16.7 2-18 6-2 2.7-1 9.7 3 21 15.3 42 36.7 81.8 64 119.5 27.3 37.7 58\n 69.2 92 94.5zm0 0v40h399900v-40z'/></svg>",rightharpoondown:"><svg viewBox='0 0 400000 522'\npreserveAspectRatio='xMaxYMin slice'><path d='M399747 511\nc0 7.3 6.7 11 20 11 8 0 13-.8 15-2.5s4.7-6.8 8-15.5c40-94 99.3-166.3 178-217\n 13.3-8 20.3-12.3 21-13 5.3-3.3 8.5-5.8 9.5-7.5 1-1.7 1.5-5.2 1.5-10.5s-2.3\n -10.3-7-15H0v40h399908c-34 25.3-64.7 57-92 95-27.3 38-48.7 77.7-64 119-3.3\n 8.7-5 14-5 16zM0 241v40h399900v-40z'/></svg>",rightleftharpoons:"><svg width='50%' viewBox='0 0 400000 716'\npreserveAspectRatio='xMinYMin slice'><path d='M7 435c-4 4\n-6.3 8.7-7 14 0 5.3.7 9 2 11s5.3 5.3 12 10c90.7 54 156 130 196 228 3.3 10.7 6.3\n 16.3 9 17 2 .7 5 1 9 1h5c10.7 0 16.7-2 18-6 2-2.7 1-9.7-3-21-32-87.3-82.7\n-157.7-152-211l-3-3h399907v-40H7zm93 0v40h399900v-40zM0 241v40h399900v-40z\nm0 0v40h399900v-40z'/></svg><svg x='50%' width='50%' viewBox='0 0 400000 716'\npreserveAspectRatio='xMaxYMin slice'><path d='M0 241v40\nh399993c4.7-4.7 7-9.3 7-14 0-9.3-3.7-15.3-11-18-92.7-56.7-159-133.7-199-231-3.3\n-9.3-6-14.7-8-16-2-1.3-7-2-15-2-10.7 0-16.7 2-18 6-2 2.7-1 9.7 3 21 15.3 42\n 36.7 81.8 64 119.5 27.3 37.7 58 69.2 92 94.5zm0 0v40h399900v-40z\n m100 194v40h399900v-40zm0 0v40h399900v-40z'/></svg>",tilde1:" viewBox='0 0 600 260' preserveAspectRatio='none'>\n<path d='M200 55.538c-77 0-168 73.953-177 73.953-3 0-7\n-2.175-9-5.437L2 97c-1-2-2-4-2-6 0-4 2-7 5-9l20-12C116 12 171 0 207 0c86 0\n 114 68 191 68 78 0 168-68 177-68 4 0 7 2 9 5l12 19c1 2.175 2 4.35 2 6.525 0\n 4.35-2 7.613-5 9.788l-19 13.05c-92 63.077-116.937 75.308-183 76.128\n-68.267.847-113-73.952-191-73.952z'/>",tilde2:" viewBox='0 0 1033 286' preserveAspectRatio='none'>\n<path d='M344 55.266c-142 0-300.638 81.316-311.5 86.418\n-8.01 3.762-22.5 10.91-23.5 5.562L1 120c-1-2-1-3-1-4 0-5 3-9 8-10l18.4-9C160.9\n 31.9 283 0 358 0c148 0 188 122 331 122s314-97 326-97c4 0 8 2 10 7l7 21.114\nc1 2.14 1 3.21 1 4.28 0 5.347-3 9.626-7 10.696l-22.3 12.622C852.6 158.372 751\n 181.476 676 181.476c-149 0-189-126.21-332-126.21z'/>",tilde3:" viewBox='0 0 2339 306' preserveAspectRatio='none'>\n<path d='M786 59C457 59 32 175.242 13 175.242c-6 0-10-3.457\n-11-10.37L.15 138c-1-7 3-12 10-13l19.2-6.4C378.4 40.7 634.3 0 804.3 0c337 0\n 411.8 157 746.8 157 328 0 754-112 773-112 5 0 10 3 11 9l1 14.075c1 8.066-.697\n 16.595-6.697 17.492l-21.052 7.31c-367.9 98.146-609.15 122.696-778.15 122.696\n -338 0-409-156.573-744-156.573z'/>",tilde4:" viewBox='0 0 2340 312' preserveAspectRatio='none'>\n<path d='M786 58C457 58 32 177.487 13 177.487c-6 0-10-3.345\n-11-10.035L.15 143c-1-7 3-12 10-13l22-6.7C381.2 35 637.15 0 807.15 0c337 0 409\n 177 744 177 328 0 754-127 773-127 5 0 10 3 11 9l1 14.794c1 7.805-3 13.38-9\n 14.495l-20.7 5.574c-366.85 99.79-607.3 139.372-776.3 139.372-338 0-409\n -175.236-744-175.236z'/>",tofrom:"><svg width='50.1%' viewBox='0 0 400000 528'\npreserveAspectRatio='xMinYMin slice'><path d='M0 147h400000\nv40H0zm0 214c68 40 115.7 95.7 143 167h22c15.3 0 23-.3 23-1 0-1.3-5.3-13.7-16-37\n-18-35.3-41.3-69-70-101l-7-8h399905v-40H95l7-8c28.7-32 52-65.7 70-101 10.7-23.3\n 16-35.7 16-37 0-.7-7.7-1-23-1h-22C115.7 265.3 68 321 0 361zm0-174v-40h399900\nv40zm100 154v40h399900v-40z'/></svg><svg x='50%' width='50%' viewBox='0 0\n 400000 528' preserveAspectRatio='xMaxYMin slice'><path\nd='M400000 167c-70.7-42-118-97.7-142-167h-23c-15.3 0-23 .3-23 1 0 1.3 5.3 13.7\n 16 37 18 35.3 41.3 69 70 101l7 8H0v40h399905l-7 8c-28.7 32-52 65.7-70 101-10.7\n 23.3-16 35.7-16 37 0 .7 7.7 1 23 1h23c24-69.3 71.3-125 142-167z\n M100 147v40h399900v-40zM0 341v40h399900v-40z'/></svg>",twoheadleftarrow:"><svg viewBox='0 0 400000 334'\npreserveAspectRatio='xMinYMin slice'><path d='M0 167c68 40\n 115.7 95.7 143 167h22c15.3 0 23-.3 23-1 0-1.3-5.3-13.7-16-37-18-35.3-41.3-69\n-70-101l-7-8h125l9 7c50.7 39.3 85 86 103 140h46c0-4.7-6.3-18.7-19-42-18-35.3\n-40-67.3-66-96l-9-9h399716v-40H284l9-9c26-28.7 48-60.7 66-96 12.7-23.333 19\n-37.333 19-42h-46c-18 54-52.3 100.7-103 140l-9 7H95l7-8c28.7-32 52-65.7 70-101\n 10.7-23.333 16-35.7 16-37 0-.7-7.7-1-23-1h-22C115.7 71.3 68 127 0 167z'/>\n</svg>",twoheadrightarrow:"><svg viewBox='0 0 400000 334'\npreserveAspectRatio='xMaxYMin slice'><path d='M400000 167\nc-68-40-115.7-95.7-143-167h-22c-15.3 0-23 .3-23 1 0 1.3 5.3 13.7 16 37 18 35.3\n 41.3 69 70 101l7 8h-125l-9-7c-50.7-39.3-85-86-103-140h-46c0 4.7 6.3 18.7 19 42\n 18 35.3 40 67.3 66 96l9 9H0v40h399716l-9 9c-26 28.7-48 60.7-66 96-12.7 23.333\n-19 37.333-19 42h46c18-54 52.3-100.7 103-140l9-7h125l-7 8c-28.7 32-52 65.7-70\n 101-10.7 23.333-16 35.7-16 37 0 .7 7.7 1 23 1h22c27.3-71.3 75-127 143-167z'/>\n</svg>",underbrace:"><svg width='25.1%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17\nc12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5\n 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5\nh399450v120H429l-6-1c-124.688-8-235-61.7-331-161C60.687 138.7 32.312 99.3 7 54\nL0 41V6z'/></svg><svg x='25%' width='50%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMidYMin slice'><path d='M199572 214\nc100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14\n 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3\n 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0\n-5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg>\n<svg x='74.9%' width='25.1%' viewBox='0 0 400000 548'\npreserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6\nv35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897\nc43.3-7 81-15 113-26 100.7-33 179.7-91 237-174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1\nh17z'/></svg>",undergroup:"><svg width='50.1%' viewBox='0 0 400000 342'\npreserveAspectRatio='xMinYMin slice'><path d='M400000 262\nH435C64 262 168.3 112.6 21 82c-5.9-1.2-18 0-18 0-2 0-3 1-3 3v38c76 158 257 219\n 435 219h399565z'/></svg><svg x='50%' width='50%' viewBox='0 0 400000 342'\npreserveAspectRatio='xMaxYMin slice'><path d='M0 262h399565\nc371 0 266.7-149.4 414-180 5.9-1.2 18 0 18 0 2 0 3 1 3 3v38c-76 158-257\n 219-435 219H0z'/></svg>",widehat1:" viewBox='0 0 1062 239' preserveAspectRatio='none'>\n<path d='M529 0h5l519 115c5 1 9 5 9 10 0 1-1 2-1 3l-4 22\nc-1 5-5 9-11 9h-2L532 67 19 159h-2c-5 0-9-4-11-9l-5-22c-1-6 2-12 8-13z'/>",widehat2:" viewBox='0 0 2364 300' preserveAspectRatio='none'>\n<path d='M1181 0h2l1171 176c6 0 10 5 10 11l-2 23c-1 6-5 10\n-11 10h-1L1182 67 15 220h-1c-6 0-10-4-11-10l-2-23c-1-6 4-11 10-11z'/>",widehat3:" viewBox='0 0 2364 360' preserveAspectRatio='none'>\n<path d='M1181 0h2l1171 236c6 0 10 5 10 11l-2 23c-1 6-5 10\n-11 10h-1L1182 67 15 280h-1c-6 0-10-4-11-10l-2-23c-1-6 4-11 10-11z'/>",widehat4:" viewBox='0 0 2364 420' preserveAspectRatio='none'>\n<path d='M1181 0h2l1171 296c6 0 10 5 10 11l-2 23c-1 6-5 10\n-11 10h-1L1182 67 15 340h-1c-6 0-10-4-11-10l-2-23c-1-6 4-11 10-11z'/>",xcancel:"<line x1='0' y1='0' x2='100%' y2='100%' stroke-width='0.046em'/>\n<line x1='0' y1='100%' x2='100%' y2='0' stroke-width='0.046em'/>"},d=function(e,t){var r=e.value.label.substr(1),a=0,o=0,l="",d=0;if(i.contains(["widehat","widetilde","undertilde"],r)){var c=e.value.value.length;if(c>5)a=.312,l=("widehat"===r?"widehat":"tilde")+"4";else{var h=[1,1,2,2,3,3][c];"widehat"===r?(a=[0,.24,.3,.3,.36,.36][c],l="widehat"+h):(a=[0,.26,.3,.3,.34,.34][c],l="tilde"+h)}}else{var p=s[r];a=p[0],o=p[1],l=p[2],d=p[3]}var f=n.makeSpan([],[],t);f.height=a,f.depth=o;var m=a+o;return f.style.height=m+"em",d>0&&(f.style.minWidth=d+"em"),f.innerHTML="<svg width='100%' height='"+m+"em'"+u[l]+"</svg>",f},c=function(e,t,r,i){var a=void 0,o=e.height+e.depth+2*r;return"fbox"===t?(a=n.makeSpan(["stretchy",t],[],i),i.color&&(a.style.borderColor=i.getColor())):(a=n.makeSpan([],[],i)).innerHTML="<svg width='100%' height='"+o+"em'>"+u[t]+"</svg>",a.height=o,a.style.height=o+"em",a};t.exports={encloseSpan:c,mathMLnode:o,svgSpan:d}},{"./buildCommon":34,"./mathMLTree":45,"./utils":51}],48:[function(e,t){function n(e,n,r,i,a,o){t.exports[e][a]={font:n,group:r,replace:i},o&&(t.exports[e][i]=t.exports[e][a])}t.exports={math:{},text:{}};var r="math",i="text",a="main",o="ams",s="accent",l="bin",u="close",d="inner",c="mathord",h="op",p="open",f="punct",m="rel",g="spacing",v="textord";n(r,a,m,"\u2261","\\equiv"),n(r,a,m,"\u227a","\\prec"),n(r,a,m,"\u227b","\\succ"),n(r,a,m,"\u223c","\\sim"),n(r,a,m,"\u22a5","\\perp"),n(r,a,m,"\u2aaf","\\preceq"),n(r,a,m,"\u2ab0","\\succeq"),n(r,a,m,"\u2243","\\simeq"),n(r,a,m,"\u2223","\\mid"),n(r,a,m,"\u226a","\\ll"),n(r,a,m,"\u226b","\\gg"),n(r,a,m,"\u224d","\\asymp"),n(r,a,m,"\u2225","\\parallel"),n(r,a,m,"\u22c8","\\bowtie"),n(r,a,m,"\u2323","\\smile"),n(r,a,m,"\u2291","\\sqsubseteq"),n(r,a,m,"\u2292","\\sqsupseteq"),n(r,a,m,"\u2250","\\doteq"),n(r,a,m,"\u2322","\\frown"),n(r,a,m,"\u220b","\\ni"),n(r,a,m,"\u221d","\\propto"),n(r,a,m,"\u22a2","\\vdash"),n(r,a,m,"\u22a3","\\dashv"),
+n(r,a,m,"\u220b","\\owns"),n(r,a,f,".","\\ldotp"),n(r,a,f,"\u22c5","\\cdotp"),n(r,a,v,"#","\\#"),n(i,a,v,"#","\\#"),n(r,a,v,"&","\\&"),n(i,a,v,"&","\\&"),n(r,a,v,"\u2135","\\aleph"),n(r,a,v,"\u2200","\\forall"),n(r,a,v,"\u210f","\\hbar"),n(r,a,v,"\u2203","\\exists"),n(r,a,v,"\u2207","\\nabla"),n(r,a,v,"\u266d","\\flat"),n(r,a,v,"\u2113","\\ell"),n(r,a,v,"\u266e","\\natural"),n(r,a,v,"\u2663","\\clubsuit"),n(r,a,v,"\u2118","\\wp"),n(r,a,v,"\u266f","\\sharp"),n(r,a,v,"\u2662","\\diamondsuit"),n(r,a,v,"\u211c","\\Re"),n(r,a,v,"\u2661","\\heartsuit"),n(r,a,v,"\u2111","\\Im"),n(r,a,v,"\u2660","\\spadesuit"),n(r,a,v,"\u2020","\\dag"),n(i,a,v,"\u2020","\\dag"),n(i,a,v,"\u2020","\\textdagger"),n(r,a,v,"\u2021","\\ddag"),n(i,a,v,"\u2021","\\ddag"),n(i,a,v,"\u2020","\\textdaggerdbl"),n(r,a,u,"\u23b1","\\rmoustache"),n(r,a,p,"\u23b0","\\lmoustache"),n(r,a,u,"\u27ef","\\rgroup"),n(r,a,p,"\u27ee","\\lgroup"),n(r,a,l,"\u2213","\\mp"),n(r,a,l,"\u2296","\\ominus"),n(r,a,l,"\u228e","\\uplus"),n(r,a,l,"\u2293","\\sqcap"),n(r,a,l,"\u2217","\\ast"),n(r,a,l,"\u2294","\\sqcup"),n(r,a,l,"\u25ef","\\bigcirc"),n(r,a,l,"\u2219","\\bullet"),n(r,a,l,"\u2021","\\ddagger"),n(r,a,l,"\u2240","\\wr"),n(r,a,l,"\u2a3f","\\amalg"),n(r,a,m,"\u27f5","\\longleftarrow"),n(r,a,m,"\u21d0","\\Leftarrow"),n(r,a,m,"\u27f8","\\Longleftarrow"),n(r,a,m,"\u27f6","\\longrightarrow"),n(r,a,m,"\u21d2","\\Rightarrow"),n(r,a,m,"\u27f9","\\Longrightarrow"),n(r,a,m,"\u2194","\\leftrightarrow"),n(r,a,m,"\u27f7","\\longleftrightarrow"),n(r,a,m,"\u21d4","\\Leftrightarrow"),n(r,a,m,"\u27fa","\\Longleftrightarrow"),n(r,a,m,"\u21a6","\\mapsto"),n(r,a,m,"\u27fc","\\longmapsto"),n(r,a,m,"\u2197","\\nearrow"),n(r,a,m,"\u21a9","\\hookleftarrow"),n(r,a,m,"\u21aa","\\hookrightarrow"),n(r,a,m,"\u2198","\\searrow"),n(r,a,m,"\u21bc","\\leftharpoonup"),n(r,a,m,"\u21c0","\\rightharpoonup"),n(r,a,m,"\u2199","\\swarrow"),n(r,a,m,"\u21bd","\\leftharpoondown"),n(r,a,m,"\u21c1","\\rightharpoondown"),n(r,a,m,"\u2196","\\nwarrow"),n(r,a,m,"\u21cc","\\rightleftharpoons"),n(r,o,m,"\u226e","\\nless"),n(r,o,m,"\ue010","\\nleqslant"),n(r,o,m,"\ue011","\\nleqq"),n(r,o,m,"\u2a87","\\lneq"),n(r,o,m,"\u2268","\\lneqq"),n(r,o,m,"\ue00c","\\lvertneqq"),n(r,o,m,"\u22e6","\\lnsim"),n(r,o,m,"\u2a89","\\lnapprox"),n(r,o,m,"\u2280","\\nprec"),n(r,o,m,"\u22e0","\\npreceq"),n(r,o,m,"\u22e8","\\precnsim"),n(r,o,m,"\u2ab9","\\precnapprox"),n(r,o,m,"\u2241","\\nsim"),n(r,o,m,"\ue006","\\nshortmid"),n(r,o,m,"\u2224","\\nmid"),n(r,o,m,"\u22ac","\\nvdash"),n(r,o,m,"\u22ad","\\nvDash"),n(r,o,m,"\u22ea","\\ntriangleleft"),n(r,o,m,"\u22ec","\\ntrianglelefteq"),n(r,o,m,"\u228a","\\subsetneq"),n(r,o,m,"\ue01a","\\varsubsetneq"),n(r,o,m,"\u2acb","\\subsetneqq"),n(r,o,m,"\ue017","\\varsubsetneqq"),n(r,o,m,"\u226f","\\ngtr"),n(r,o,m,"\ue00f","\\ngeqslant"),n(r,o,m,"\ue00e","\\ngeqq"),n(r,o,m,"\u2a88","\\gneq"),n(r,o,m,"\u2269","\\gneqq"),n(r,o,m,"\ue00d","\\gvertneqq"),n(r,o,m,"\u22e7","\\gnsim"),n(r,o,m,"\u2a8a","\\gnapprox"),n(r,o,m,"\u2281","\\nsucc"),n(r,o,m,"\u22e1","\\nsucceq"),n(r,o,m,"\u22e9","\\succnsim"),n(r,o,m,"\u2aba","\\succnapprox"),n(r,o,m,"\u2246","\\ncong"),n(r,o,m,"\ue007","\\nshortparallel"),n(r,o,m,"\u2226","\\nparallel"),n(r,o,m,"\u22af","\\nVDash"),n(r,o,m,"\u22eb","\\ntriangleright"),n(r,o,m,"\u22ed","\\ntrianglerighteq"),n(r,o,m,"\ue018","\\nsupseteqq"),n(r,o,m,"\u228b","\\supsetneq"),n(r,o,m,"\ue01b","\\varsupsetneq"),n(r,o,m,"\u2acc","\\supsetneqq"),n(r,o,m,"\ue019","\\varsupsetneqq"),n(r,o,m,"\u22ae","\\nVdash"),n(r,o,m,"\u2ab5","\\precneqq"),n(r,o,m,"\u2ab6","\\succneqq"),n(r,o,m,"\ue016","\\nsubseteqq"),n(r,o,l,"\u22b4","\\unlhd"),n(r,o,l,"\u22b5","\\unrhd"),n(r,o,m,"\u219a","\\nleftarrow"),n(r,o,m,"\u219b","\\nrightarrow"),n(r,o,m,"\u21cd","\\nLeftarrow"),n(r,o,m,"\u21cf","\\nRightarrow"),n(r,o,m,"\u21ae","\\nleftrightarrow"),n(r,o,m,"\u21ce","\\nLeftrightarrow"),n(r,o,m,"\u25b3","\\vartriangle"),n(r,o,v,"\u210f","\\hslash"),n(r,o,v,"\u25bd","\\triangledown"),n(r,o,v,"\u25ca","\\lozenge"),n(r,o,v,"\u24c8","\\circledS"),n(r,o,v,"\xae","\\circledR"),n(i,o,v,"\xae","\\circledR"),n(r,o,v,"\u2221","\\measuredangle"),n(r,o,v,"\u2204","\\nexists"),n(r,o,v,"\u2127","\\mho"),n(r,o,v,"\u2132","\\Finv"),n(r,o,v,"\u2141","\\Game"),n(r,o,v,"k","\\Bbbk"),n(r,o,v,"\u2035","\\backprime"),n(r,o,v,"\u25b2","\\blacktriangle"),n(r,o,v,"\u25bc","\\blacktriangledown"),n(r,o,v,"\u25a0","\\blacksquare"),n(r,o,v,"\u29eb","\\blacklozenge"),n(r,o,v,"\u2605","\\bigstar"),n(r,o,v,"\u2222","\\sphericalangle"),n(r,o,v,"\u2201","\\complement"),n(r,o,v,"\xf0","\\eth"),n(r,o,v,"\u2571","\\diagup"),n(r,o,v,"\u2572","\\diagdown"),n(r,o,v,"\u25a1","\\square"),n(r,o,v,"\u25a1","\\Box"),n(r,o,v,"\u25ca","\\Diamond"),n(r,o,v,"\xa5","\\yen"),n(r,o,v,"\u2713","\\checkmark"),n(i,o,v,"\u2713","\\checkmark"),n(r,o,v,"\u2136","\\beth"),n(r,o,v,"\u2138","\\daleth"),n(r,o,v,"\u2137","\\gimel"),n(r,o,v,"\u03dd","\\digamma"),n(r,o,v,"\u03f0","\\varkappa"),n(r,o,p,"\u250c","\\ulcorner"),n(r,o,u,"\u2510","\\urcorner"),n(r,o,p,"\u2514","\\llcorner"),n(r,o,u,"\u2518","\\lrcorner"),n(r,o,m,"\u2266","\\leqq"),n(r,o,m,"\u2a7d","\\leqslant"),n(r,o,m,"\u2a95","\\eqslantless"),n(r,o,m,"\u2272","\\lesssim"),n(r,o,m,"\u2a85","\\lessapprox"),n(r,o,m,"\u224a","\\approxeq"),n(r,o,l,"\u22d6","\\lessdot"),n(r,o,m,"\u22d8","\\lll"),n(r,o,m,"\u2276","\\lessgtr"),n(r,o,m,"\u22da","\\lesseqgtr"),n(r,o,m,"\u2a8b","\\lesseqqgtr"),n(r,o,m,"\u2251","\\doteqdot"),n(r,o,m,"\u2253","\\risingdotseq"),n(r,o,m,"\u2252","\\fallingdotseq"),n(r,o,m,"\u223d","\\backsim"),n(r,o,m,"\u22cd","\\backsimeq"),n(r,o,m,"\u2ac5","\\subseteqq"),n(r,o,m,"\u22d0","\\Subset"),n(r,o,m,"\u228f","\\sqsubset"),n(r,o,m,"\u227c","\\preccurlyeq"),n(r,o,m,"\u22de","\\curlyeqprec"),n(r,o,m,"\u227e","\\precsim"),n(r,o,m,"\u2ab7","\\precapprox"),n(r,o,m,"\u22b2","\\vartriangleleft"),n(r,o,m,"\u22b4","\\trianglelefteq"),n(r,o,m,"\u22a8","\\vDash"),n(r,o,m,"\u22aa","\\Vvdash"),n(r,o,m,"\u2323","\\smallsmile"),n(r,o,m,"\u2322","\\smallfrown"),n(r,o,m,"\u224f","\\bumpeq"),n(r,o,m,"\u224e","\\Bumpeq"),n(r,o,m,"\u2267","\\geqq"),n(r,o,m,"\u2a7e","\\geqslant"),n(r,o,m,"\u2a96","\\eqslantgtr"),n(r,o,m,"\u2273","\\gtrsim"),n(r,o,m,"\u2a86","\\gtrapprox"),n(r,o,l,"\u22d7","\\gtrdot"),n(r,o,m,"\u22d9","\\ggg"),n(r,o,m,"\u2277","\\gtrless"),n(r,o,m,"\u22db","\\gtreqless"),n(r,o,m,"\u2a8c","\\gtreqqless"),n(r,o,m,"\u2256","\\eqcirc"),n(r,o,m,"\u2257","\\circeq"),n(r,o,m,"\u225c","\\triangleq"),n(r,o,m,"\u223c","\\thicksim"),n(r,o,m,"\u2248","\\thickapprox"),n(r,o,m,"\u2ac6","\\supseteqq"),n(r,o,m,"\u22d1","\\Supset"),n(r,o,m,"\u2290","\\sqsupset"),n(r,o,m,"\u227d","\\succcurlyeq"),n(r,o,m,"\u22df","\\curlyeqsucc"),n(r,o,m,"\u227f","\\succsim"),n(r,o,m,"\u2ab8","\\succapprox"),n(r,o,m,"\u22b3","\\vartriangleright"),n(r,o,m,"\u22b5","\\trianglerighteq"),n(r,o,m,"\u22a9","\\Vdash"),n(r,o,m,"\u2223","\\shortmid"),n(r,o,m,"\u2225","\\shortparallel"),n(r,o,m,"\u226c","\\between"),n(r,o,m,"\u22d4","\\pitchfork"),n(r,o,m,"\u221d","\\varpropto"),n(r,o,m,"\u25c0","\\blacktriangleleft"),n(r,o,m,"\u2234","\\therefore"),n(r,o,m,"\u220d","\\backepsilon"),n(r,o,m,"\u25b6","\\blacktriangleright"),n(r,o,m,"\u2235","\\because"),n(r,o,m,"\u22d8","\\llless"),n(r,o,m,"\u22d9","\\gggtr"),n(r,o,l,"\u22b2","\\lhd"),n(r,o,l,"\u22b3","\\rhd"),n(r,o,m,"\u2242","\\eqsim"),n(r,a,m,"\u22c8","\\Join"),n(r,o,m,"\u2251","\\Doteq"),n(r,o,l,"\u2214","\\dotplus"),n(r,o,l,"\u2216","\\smallsetminus"),n(r,o,l,"\u22d2","\\Cap"),n(r,o,l,"\u22d3","\\Cup"),n(r,o,l,"\u2a5e","\\doublebarwedge"),n(r,o,l,"\u229f","\\boxminus"),n(r,o,l,"\u229e","\\boxplus"),n(r,o,l,"\u22c7","\\divideontimes"),n(r,o,l,"\u22c9","\\ltimes"),n(r,o,l,"\u22ca","\\rtimes"),n(r,o,l,"\u22cb","\\leftthreetimes"),n(r,o,l,"\u22cc","\\rightthreetimes"),n(r,o,l,"\u22cf","\\curlywedge"),n(r,o,l,"\u22ce","\\curlyvee"),n(r,o,l,"\u229d","\\circleddash"),n(r,o,l,"\u229b","\\circledast"),n(r,o,l,"\u22c5","\\centerdot"),n(r,o,l,"\u22ba","\\intercal"),n(r,o,l,"\u22d2","\\doublecap"),n(r,o,l,"\u22d3","\\doublecup"),n(r,o,l,"\u22a0","\\boxtimes"),n(r,o,m,"\u21e2","\\dashrightarrow"),n(r,o,m,"\u21e0","\\dashleftarrow"),n(r,o,m,"\u21c7","\\leftleftarrows"),n(r,o,m,"\u21c6","\\leftrightarrows"),n(r,o,m,"\u21da","\\Lleftarrow"),n(r,o,m,"\u219e","\\twoheadleftarrow"),n(r,o,m,"\u21a2","\\leftarrowtail"),n(r,o,m,"\u21ab","\\looparrowleft"),n(r,o,m,"\u21cb","\\leftrightharpoons"),n(r,o,m,"\u21b6","\\curvearrowleft"),n(r,o,m,"\u21ba","\\circlearrowleft"),n(r,o,m,"\u21b0","\\Lsh"),n(r,o,m,"\u21c8","\\upuparrows"),n(r,o,m,"\u21bf","\\upharpoonleft"),n(r,o,m,"\u21c3","\\downharpoonleft"),n(r,o,m,"\u22b8","\\multimap"),n(r,o,m,"\u21ad","\\leftrightsquigarrow"),n(r,o,m,"\u21c9","\\rightrightarrows"),n(r,o,m,"\u21c4","\\rightleftarrows"),n(r,o,m,"\u21a0","\\twoheadrightarrow"),n(r,o,m,"\u21a3","\\rightarrowtail"),n(r,o,m,"\u21ac","\\looparrowright"),n(r,o,m,"\u21b7","\\curvearrowright"),n(r,o,m,"\u21bb","\\circlearrowright"),n(r,o,m,"\u21b1","\\Rsh"),n(r,o,m,"\u21ca","\\downdownarrows"),n(r,o,m,"\u21be","\\upharpoonright"),n(r,o,m,"\u21c2","\\downharpoonright"),n(r,o,m,"\u21dd","\\rightsquigarrow"),n(r,o,m,"\u21dd","\\leadsto"),n(r,o,m,"\u21db","\\Rrightarrow"),n(r,o,m,"\u21be","\\restriction"),n(r,a,v,"\u2018","`"),n(r,a,v,"$","\\$"),n(i,a,v,"$","\\$"),n(i,a,v,"$","\\textdollar"),n(r,a,v,"%","\\%"),n(i,a,v,"%","\\%"),n(r,a,v,"_","\\_"),n(i,a,v,"_","\\_"),n(i,a,v,"_","\\textunderscore"),n(r,a,v,"\u2220","\\angle"),n(r,a,v,"\u221e","\\infty"),n(r,a,v,"\u2032","\\prime"),n(r,a,v,"\u25b3","\\triangle"),n(r,a,v,"\u0393","\\Gamma",!0),n(r,a,v,"\u0394","\\Delta",!0),n(r,a,v,"\u0398","\\Theta",!0),n(r,a,v,"\u039b","\\Lambda",!0),n(r,a,v,"\u039e","\\Xi",!0),n(r,a,v,"\u03a0","\\Pi",!0),n(r,a,v,"\u03a3","\\Sigma",!0),n(r,a,v,"\u03a5","\\Upsilon",!0),n(r,a,v,"\u03a6","\\Phi",!0),n(r,a,v,"\u03a8","\\Psi",!0),n(r,a,v,"\u03a9","\\Omega",!0),n(r,a,v,"\xac","\\neg"),n(r,a,v,"\xac","\\lnot"),n(r,a,v,"\u22a4","\\top"),n(r,a,v,"\u22a5","\\bot"),n(r,a,v,"\u2205","\\emptyset"),n(r,o,v,"\u2205","\\varnothing"),n(r,a,c,"\u03b1","\\alpha",!0),n(r,a,c,"\u03b2","\\beta",!0),n(r,a,c,"\u03b3","\\gamma",!0),n(r,a,c,"\u03b4","\\delta",!0),n(r,a,c,"\u03f5","\\epsilon",!0),n(r,a,c,"\u03b6","\\zeta",!0),n(r,a,c,"\u03b7","\\eta",!0),n(r,a,c,"\u03b8","\\theta",!0),n(r,a,c,"\u03b9","\\iota",!0),n(r,a,c,"\u03ba","\\kappa",!0),n(r,a,c,"\u03bb","\\lambda",!0),n(r,a,c,"\u03bc","\\mu",!0),n(r,a,c,"\u03bd","\\nu",!0),n(r,a,c,"\u03be","\\xi",!0),n(r,a,c,"\u03bf","\\omicron",!0),n(r,a,c,"\u03c0","\\pi",!0),n(r,a,c,"\u03c1","\\rho",!0),n(r,a,c,"\u03c3","\\sigma",!0),n(r,a,c,"\u03c4","\\tau",!0),n(r,a,c,"\u03c5","\\upsilon",!0),n(r,a,c,"\u03d5","\\phi",!0),n(r,a,c,"\u03c7","\\chi",!0),n(r,a,c,"\u03c8","\\psi",!0),n(r,a,c,"\u03c9","\\omega",!0),n(r,a,c,"\u03b5","\\varepsilon",!0),n(r,a,c,"\u03d1","\\vartheta",!0),n(r,a,c,"\u03d6","\\varpi",!0),n(r,a,c,"\u03f1","\\varrho",!0),n(r,a,c,"\u03c2","\\varsigma",!0),n(r,a,c,"\u03c6","\\varphi",!0),n(r,a,l,"\u2217","*"),n(r,a,l,"+","+"),n(r,a,l,"\u2212","-"),n(r,a,l,"\u22c5","\\cdot"),n(r,a,l,"\u2218","\\circ"),n(r,a,l,"\xf7","\\div"),n(r,a,l,"\xb1","\\pm"),n(r,a,l,"\xd7","\\times"),n(r,a,l,"\u2229","\\cap"),n(r,a,l,"\u222a","\\cup"),n(r,a,l,"\u2216","\\setminus"),n(r,a,l,"\u2227","\\land"),n(r,a,l,"\u2228","\\lor"),n(r,a,l,"\u2227","\\wedge"),n(r,a,l,"\u2228","\\vee"),n(r,a,v,"\u221a","\\surd"),n(r,a,p,"(","("),n(r,a,p,"[","["),n(r,a,p,"\u27e8","\\langle"),n(r,a,p,"\u2223","\\lvert"),n(r,a,p,"\u2225","\\lVert"),n(r,a,u,")",")"),n(r,a,u,"]","]"),n(r,a,u,"?","?"),n(r,a,u,"!","!"),n(r,a,u,"\u27e9","\\rangle"),n(r,a,u,"\u2223","\\rvert"),n(r,a,u,"\u2225","\\rVert"),n(r,a,m,"=","="),n(r,a,m,"<","<"),n(r,a,m,">",">"),n(r,a,m,":",":"),n(r,a,m,"\u2248","\\approx"),n(r,a,m,"\u2245","\\cong"),n(r,a,m,"\u2265","\\ge"),n(r,a,m,"\u2265","\\geq"),n(r,a,m,"\u2190","\\gets"),n(r,a,m,">","\\gt"),n(r,a,m,"\u2208","\\in"),n(r,a,m,"\u2209","\\notin"),n(r,a,m,"\u0338","\\not"),n(r,a,m,"\u2282","\\subset"),n(r,a,m,"\u2283","\\supset"),n(r,a,m,"\u2286","\\subseteq"),n(r,a,m,"\u2287","\\supseteq"),n(r,o,m,"\u2288","\\nsubseteq"),n(r,o,m,"\u2289","\\nsupseteq"),n(r,a,m,"\u22a8","\\models"),n(r,a,m,"\u2190","\\leftarrow"),n(r,a,m,"\u2264","\\le"),n(r,a,m,"\u2264","\\leq"),n(r,a,m,"<","\\lt"),n(r,a,m,"\u2260","\\ne"),n(r,a,m,"\u2260","\\neq"),n(r,a,m,"\u2192","\\rightarrow"),n(r,a,m,"\u2192","\\to"),n(r,o,m,"\u2271","\\ngeq"),n(r,o,m,"\u2270","\\nleq"),n(r,a,g,null,"\\!"),n(r,a,g,"\xa0","\\ "),n(r,a,g,"\xa0","~"),n(r,a,g,null,"\\,"),n(r,a,g,null,"\\:"),n(r,a,g,null,"\\;"),n(r,a,g,null,"\\enspace"),n(r,a,g,null,"\\qquad"),n(r,a,g,null,"\\quad"),n(r,a,g,"\xa0","\\space"),n(r,a,f,",",","),n(r,a,f,";",";"),n(r,a,f,":","\\colon"),n(r,o,l,"\u22bc","\\barwedge"),n(r,o,l,"\u22bb","\\veebar"),n(r,a,l,"\u2299","\\odot"),n(r,a,l,"\u2295","\\oplus"),n(r,a,l,"\u2297","\\otimes"),n(r,a,v,"\u2202","\\partial"),n(r,a,l,"\u2298","\\oslash"),n(r,o,l,"\u229a","\\circledcirc"),n(r,o,l,"\u22a1","\\boxdot"),n(r,a,l,"\u25b3","\\bigtriangleup"),n(r,a,l,"\u25bd","\\bigtriangledown"),n(r,a,l,"\u2020","\\dagger"),n(r,a,l,"\u22c4","\\diamond"),n(r,a,l,"\u22c6","\\star"),n(r,a,l,"\u25c3","\\triangleleft"),n(r,a,l,"\u25b9","\\triangleright"),n(r,a,p,"{","\\{"),n(i,a,v,"{","\\{"),n(i,a,v,"{","\\textbraceleft"),n(r,a,u,"}","\\}"),n(i,a,v,"}","\\}"),n(i,a,v,"}","\\textbraceright"),n(r,a,p,"{","\\lbrace"),n(r,a,u,"}","\\rbrace"),n(r,a,p,"[","\\lbrack"),n(r,a,u,"]","\\rbrack"),n(i,a,v,"<","\\textless"),n(i,a,v,">","\\textgreater"),n(r,a,p,"\u230a","\\lfloor"),n(r,a,u,"\u230b","\\rfloor"),n(r,a,p,"\u2308","\\lceil"),n(r,a,u,"\u2309","\\rceil"),n(r,a,v,"\\","\\backslash"),n(r,a,v,"\u2223","|"),n(r,a,v,"\u2223","\\vert"),n(i,a,v,"|","\\textbar"),n(r,a,v,"\u2225","\\|"),n(r,a,v,"\u2225","\\Vert"),n(i,a,v,"\u2225","\\textbardbl"),n(r,a,m,"\u2191","\\uparrow"),n(r,a,m,"\u21d1","\\Uparrow"),n(r,a,m,"\u2193","\\downarrow"),n(r,a,m,"\u21d3","\\Downarrow"),n(r,a,m,"\u2195","\\updownarrow"),n(r,a,m,"\u21d5","\\Updownarrow"),n(r,a,h,"\u2210","\\coprod"),n(r,a,h,"\u22c1","\\bigvee"),n(r,a,h,"\u22c0","\\bigwedge"),n(r,a,h,"\u2a04","\\biguplus"),n(r,a,h,"\u22c2","\\bigcap"),n(r,a,h,"\u22c3","\\bigcup"),n(r,a,h,"\u222b","\\int"),n(r,a,h,"\u222b","\\intop"),n(r,a,h,"\u222c","\\iint"),n(r,a,h,"\u222d","\\iiint"),n(r,a,h,"\u220f","\\prod"),n(r,a,h,"\u2211","\\sum"),n(r,a,h,"\u2a02","\\bigotimes"),n(r,a,h,"\u2a01","\\bigoplus"),n(r,a,h,"\u2a00","\\bigodot"),n(r,a,h,"\u222e","\\oint"),n(r,a,h,"\u2a06","\\bigsqcup"),n(r,a,h,"\u222b","\\smallint"),n(i,a,d,"\u2026","\\textellipsis"),n(r,a,d,"\u2026","\\mathellipsis"),n(i,a,d,"\u2026","\\ldots",!0),n(r,a,d,"\u2026","\\ldots",!0),n(r,a,d,"\u22ef","\\cdots",!0),n(r,a,d,"\u22f1","\\ddots",!0),n(r,a,v,"\u22ee","\\vdots",!0),n(r,a,s,"\xb4","\\acute"),n(r,a,s,"`","\\grave"),n(r,a,s,"\xa8","\\ddot"),n(r,a,s,"~","\\tilde"),n(r,a,s,"\xaf","\\bar"),n(r,a,s,"\u02d8","\\breve"),n(r,a,s,"\u02c7","\\check"),n(r,a,s,"^","\\hat"),n(r,a,s,"\u20d7","\\vec"),n(r,a,s,"\u02d9","\\dot"),n(r,a,c,"\u0131","\\imath"),n(r,a,c,"\u0237","\\jmath"),n(i,a,s,"\u02ca","\\'"),n(i,a,s,"\u02cb","\\`"),n(i,a,s,"\u02c6","\\^"),n(i,a,s,"\u02dc","\\~"),n(i,a,s,"\u02c9","\\="),n(i,a,s,"\u02d8","\\u"),n(i,a,s,"\u02d9","\\."),n(i,a,s,"\u02da","\\r"),n(i,a,s,"\u02c7","\\v"),n(i,a,s,"\xa8",'\\"'),n(i,a,s,"\u030b","\\H"),n(i,a,v,"\u2013","--"),n(i,a,v,"\u2013","\\textendash"),n(i,a,v,"\u2014","---"),n(i,a,v,"\u2014","\\textemdash"),n(i,a,v,"\u2018","`"),n(i,a,v,"\u2018","\\textquoteleft"),n(i,a,v,"\u2019","'"),n(i,a,v,"\u2019","\\textquoteright"),n(i,a,v,"\u201c","``"),n(i,a,v,"\u201c","\\textquotedblleft"),n(i,a,v,"\u201d","''"),n(i,a,v,"\u201d","\\textquotedblright"),n(r,a,v,"\xb0","\\degree"),n(i,a,v,"\xb0","\\degree"),n(r,a,c,"\xa3","\\pounds"),n(r,a,c,"\xa3","\\mathsterling"),n(i,a,c,"\xa3","\\pounds"),n(i,a,c,"\xa3","\\textsterling"),n(r,o,v,"\u2720","\\maltese"),n(i,o,v,"\u2720","\\maltese"),n(i,a,g,"\xa0","\\ "),n(i,a,g,"\xa0"," "),n(i,a,g,"\xa0","~");for(var b='0123456789/@."',y=0;y<b.length;y++){var x=b.charAt(y);n(r,a,v,x,x)}for(var w='0123456789!@*()-=+[]<>|";:?/.,',k=0;k<w.length;k++){var M=w.charAt(k);n(i,a,v,M,M)}for(var S="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",z=0;z<S.length;z++){var A=S.charAt(z);n(r,a,c,A,A),n(i,a,v,A,A)}for(var C=192;C<=214;C++){var T=String.fromCharCode(C);n(r,a,c,T,T),n(i,a,v,T,T)}for(var N=216;N<=246;N++){var E=String.fromCharCode(N);n(r,a,c,E,E),n(i,a,v,E,E)}for(var R=248;R<=255;R++){var L=String.fromCharCode(R);n(r,a,c,L,L),n(i,a,v,L,L)}for(var O=1040;O<=1103;O++){var q=String.fromCharCode(O);n(i,a,v,q,q)}n(i,a,v,"\u2013","\u2013"),n(i,a,v,"\u2014","\u2014"),n(i,a,v,"\u2018","\u2018"),n(i,a,v,"\u2019","\u2019"),n(i,a,v,"\u201c","\u201c"),n(i,a,v,"\u201d","\u201d")},{}],49:[function(e,t){var n=/[\uAC00-\uD7AF]/,r=/[\u3000-\u30FF\u4E00-\u9FAF\uAC00-\uD7AF\uFF00-\uFF60]/;t.exports={cjkRegex:r,hangulRegex:n}},{}],50:[function(e,t){function n(e){return e&&e.__esModule?e:{"default":e}}var r=n(e("./ParseError")),i={pt:1,mm:7227/2540,cm:7227/254,"in":72.27,bp:1.00375,pc:12,dd:1238/1157,cc:14856/1157,nd:685/642,nc:1370/107,sp:1/65536,px:1.00375},a={ex:!0,em:!0,mu:!0},o=function(e){return e.unit&&(e=e.unit),e in i||e in a||"ex"===e},s=function(e,t){var n=void 0;if(e.unit in i)n=i[e.unit]/t.fontMetrics().ptPerEm/t.sizeMultiplier;else if("mu"===e.unit)n=t.fontMetrics().cssEmPerMu;else{var a=void 0;if(a=t.style.isTight()?t.havingStyle(t.style.text()):t,"ex"===e.unit)n=a.fontMetrics().xHeight;else{if("em"!==e.unit)throw new r["default"]("Invalid unit: '"+e.unit+"'");n=a.fontMetrics().quad}a!==t&&(n*=a.sizeMultiplier/t.sizeMultiplier)}return e.number*n};t.exports={validUnit:o,calculateSize:s}},{"./ParseError":29}],51:[function(e,t){function n(e){return c[e]}function r(e){return(""+e).replace(h,n)}function i(e){p(e,"")}var a=Array.prototype.indexOf,o=function(e,t){if(null==e)return-1;if(a&&e.indexOf===a)return e.indexOf(t);for(var n=e.length,r=0;r<n;r++)if(e[r]===t)return r;return-1},s=function(e,t){return-1!==o(e,t)},l=function(e,t){return e===undefined?t:e},u=/([A-Z])/g,d=function(e){return e.replace(u,"-$1").toLowerCase()},c={"&":"&amp;",">":"&gt;","<":"&lt;",'"':"&quot;","'":"&#x27;"},h=/[&><"']/g,p=void 0;if("undefined"!=typeof document){var f=document.createElement("span");p="textContent"in f?function(e,t){e.textContent=t}:function(e,t){e.innerText=t}}t.exports={contains:s,deflt:l,escape:r,hyphenate:d,indexOf:o,setTextContent:p,clearNode:i}},{}]},{},[1])(1)},e.exports=t()}));
+// Copyright 2018 The Distill Template Authors
+const ae=function(e,t,n){let r=n,i=0;const a=e.length;for(;r<t.length;){const n=t[r];if(i<=0&&t.slice(r,r+a)===e)return r;"\\"===n?r++:"{"===n?i++:"}"===n&&i--,r++}return-1},oe=function(e,t,n,r){const i=[];for(let a=0;a<e.length;a++)if("text"===e[a].type){const o=e[a].data;let s,l=!0,u=0;for(-1!==(s=o.indexOf(t))&&(u=s,i.push({type:"text",data:o.slice(0,u)}),l=!1);;){if(l){if(-1===(s=o.indexOf(t,u)))break;i.push({type:"text",data:o.slice(u,s)}),u=s}else{if(-1===(s=ae(n,o,u+t.length)))break;i.push({type:"math",data:o.slice(u+t.length,s),rawData:o.slice(u,s+n.length),display:r}),u=s+n.length}l=!l}i.push({type:"text",data:o.slice(u)})}else i.push(e[a]);return i},se=function(e,t){let n=[{type:"text",data:e}];for(let e=0;e<t.length;e++){const r=t[e];n=oe(n,r.left,r.right,r.display||!1)}return n},le=function(e,t){const n=se(e,t.delimiters),r=document.createDocumentFragment();for(let e=0;e<n.length;e++)if("text"===n[e].type)r.appendChild(document.createTextNode(n[e].data));else{const a=document.createElement("d-math"),o=n[e].data;t.displayMode=n[e].display;try{a.textContent=o,t.displayMode&&a.setAttribute("block","")}catch(i){if(!(i instanceof katex.ParseError))throw i;t.errorCallback("KaTeX auto-render: Failed to parse `"+n[e].data+"` with ",i),r.appendChild(document.createTextNode(n[e].rawData));continue}r.appendChild(a)}return r},ue=function(e,t){for(let n=0;n<e.childNodes.length;n++){const r=e.childNodes[n];if(3===r.nodeType){const i=r.textContent;if(t.mightHaveMath(i)){const a=le(i,t);n+=a.childNodes.length-1,e.replaceChild(a,r)}}else if(1===r.nodeType){-1===t.ignoredTags.indexOf(r.nodeName.toLowerCase())&&ue(r,t)}}},de={delimiters:[{left:"$$",right:"$$",display:!0},{left:"\\[",right:"\\]",display:!0},{left:"\\(",right:"\\)",display:!1}],ignoredTags:["script","noscript","style","textarea","pre","code","svg"],errorCallback:function(e,t){console.error(e,t)}},ce=function(e,t){if(!e)throw new Error("No element provided to render");const n=Object.assign({},de,t),r=n.delimiters.flatMap(e=>[e.left,e.right]),i=e=>r.some(t=>-1!==e.indexOf(t));n.mightHaveMath=i,ue(e,n)};var he="iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAA99JREFUeNrsG4t1ozDMzQSM4A2ODUonKBucN2hugtIJ6E1AboLcBiQTkJsANiAb9OCd/OpzMWBJBl5TvaeXPiiyJetry0J8wW3D3QpjRh3GjneXDq+fSQA9s2mH9x3KDhN4foJfCb8N/Jrv+2fnDn8vLRQOplWHVYdvHZYdZsBcZP1vBmh/n8DzEmhUQDPaOuP9pFuY+JwJHwHnCLQE2tnWBGEyXozY9xCUgHMhhjE2I4heVWtgIkZ83wL6Qgxj1obfWBxymPwe+b00BCCRNPbwfb60yleAkkBHGT5AEehIYz7eJrFDMF9CvH4wwhcGHiHMneFvLDQwlwvMLQq58trRcYBWfYn0A0OgHWQUSu25mE+BnoYKnnEJoeIWAifzOv7vLWd2ZKRfWAIme3tOiUaQ3UnLkb0xj1FxRIeEGKaGIHOs9nEgLaaA9i0JRYo1Ic67wJW86KSKE/ZAM8KuVMk8ITVhmxUxJ3Cl2xlm9Vtkeju1+mpCQNxaEGNCY8bs9X2YqwNoQeGjBWut/ma0QAWy/TqAsHx9wSya3I5IRxOfTC+leG+kA/4vSeEcGBtNUN6byhu3+keEZCQJUNh8MAO7HL6H8pQLnsW/Hd4T4lv93TPjfM7A46iEEqbB5EDOvwYNW6tGNZzT/o+CZ6sqZ6wUtR/wf7mi/VL8iNciT6rHih48Y55b4nKCHJCCzb4y0nwFmin3ZEMIoLfZF8F7nncFmvnWBaBj7CGAYA/WGJsUwHdYqVDwAmNsUgAx4CGgAA7GOOxADYOFWOaIKifuVYzmOpREqA21Mo7aPsgiY1PhOMAmxtR+AUbYH3Id2wc0SAFIQTsn9IUGWR8k9jx3vtXSiAacFxTAGakBk9UudkNECd6jLe+6HrshshvIuC6IlLMRy7er+JpcKma24SlE4cFZSZJDGVVrsNvitQhQrDhW0jfiOLfFd47C42eHT56D/BK0To+58Ahj+cAT8HT1UWlfLZCCd/uKawzU0Rh2EyIX/Icqth3niG8ybNroezwe6khdCNxRN+l4XGdOLVLlOOt2hTRJlr1ETIuMAltVTMz70mJrkdGAaZLSmnBEqmAE32JCMmuTlCnRgsBENtOUpHhvvsYIL0ibnBkaC6QvKcR7738GKp0AKnim7xgUSNv1bpS8QwhBt8r+EP47v/oyRK/S34yJ9nT+AN0Tkm4OdB9E4BsmXM3SnMlRFUrtp6IDpV2eKzdYvF3etm3KhQksbOLChGkSmcBdmcEwvqkrMy5BzL00NZeu3qPYJOOuCc+5NjcWKXQxFvTa3NoXJ4d8in7fiAUuTt781dkvuHX4K8AA2Usy7yNKLy0AAAAASUVORK5CYII=\n",pe=/["'&<>]/,fe=C;
+/*!
+   * escape-html
+   * Copyright(c) 2012-2013 TJ Holowaychuk
+   * Copyright(c) 2015 Andreas Lubbe
+   * Copyright(c) 2015 Tiancheng "Timothy" Gu
+   * MIT Licensed
+   */
+// Copyright 2018 The Distill Template Authors
+const me='/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nhtml {\n  font-size: 14px;\n\tline-height: 1.6em;\n  /* font-family: "Libre Franklin", "Helvetica Neue", sans-serif; */\n  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", Arial, sans-serif;\n  /*, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";*/\n  text-size-adjust: 100%;\n  -ms-text-size-adjust: 100%;\n  -webkit-text-size-adjust: 100%;\n}\n\n@media(min-width: 768px) {\n  html {\n    font-size: 16px;\n  }\n}\n\nbody {\n  margin: 0;\n}\n\na {\n  color: #004276;\n}\n\nfigure {\n  margin: 0;\n}\n\ntable {\n\tborder-collapse: collapse;\n\tborder-spacing: 0;\n}\n\ntable th {\n\ttext-align: left;\n}\n\ntable thead {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.05);\n}\n\ntable thead th {\n  padding-bottom: 0.5em;\n}\n\ntable tbody :first-child td {\n  padding-top: 0.5em;\n}\n\npre {\n  overflow: auto;\n  max-width: 100%;\n}\n\np {\n  margin-top: 0;\n  margin-bottom: 1em;\n}\n\nsup, sub {\n  vertical-align: baseline;\n  position: relative;\n  top: -0.4em;\n  line-height: 1em;\n}\n\nsub {\n  top: 0.4em;\n}\n\n.kicker,\n.marker {\n  font-size: 15px;\n  font-weight: 600;\n  color: rgba(0, 0, 0, 0.5);\n}\n\n\n/* Headline */\n\n@media(min-width: 1024px) {\n  d-title h1 span {\n    display: block;\n  }\n}\n\n/* Figure */\n\nfigure {\n  position: relative;\n  margin-bottom: 2.5em;\n  margin-top: 1.5em;\n}\n\nfigcaption+figure {\n\n}\n\nfigure img {\n  width: 100%;\n}\n\nfigure svg text,\nfigure svg tspan {\n}\n\nfigcaption,\n.figcaption {\n  color: rgba(0, 0, 0, 0.6);\n  font-size: 12px;\n  line-height: 1.5em;\n}\n\n@media(min-width: 1024px) {\nfigcaption,\n.figcaption {\n    font-size: 13px;\n  }\n}\n\nfigure.external img {\n  background: white;\n  border: 1px solid rgba(0, 0, 0, 0.1);\n  box-shadow: 0 1px 8px rgba(0, 0, 0, 0.1);\n  padding: 18px;\n  box-sizing: border-box;\n}\n\nfigcaption a {\n  color: rgba(0, 0, 0, 0.6);\n}\n\nfigcaption b,\nfigcaption strong, {\n  font-weight: 600;\n  color: rgba(0, 0, 0, 1.0);\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n@supports not (display: grid) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    display: block;\n    padding: 8px;\n  }\n}\n\n.base-grid,\ndistill-header,\nd-title,\nd-abstract,\nd-article,\nd-appendix,\ndistill-appendix,\nd-byline,\nd-footnote-list,\nd-citation-list,\ndistill-footer {\n  display: grid;\n  justify-items: stretch;\n  grid-template-columns: [screen-start] 8px [page-start kicker-start text-start gutter-start middle-start] 1fr 1fr 1fr 1fr 1fr 1fr 1fr 1fr [text-end page-end gutter-end kicker-end middle-end] 8px [screen-end];\n  grid-column-gap: 8px;\n}\n\n.grid {\n  display: grid;\n  grid-column-gap: 8px;\n}\n\n@media(min-width: 768px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start middle-start text-start] 45px 45px 45px 45px 45px 45px 45px 45px [ kicker-end text-end gutter-start] 45px [middle-end] 45px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 16px;\n  }\n\n  .grid {\n    grid-column-gap: 16px;\n  }\n}\n\n@media(min-width: 1000px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start] 50px [middle-start] 50px [text-start kicker-end] 50px 50px 50px 50px 50px 50px 50px 50px [text-end gutter-start] 50px [middle-end] 50px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 16px;\n  }\n\n  .grid {\n    grid-column-gap: 16px;\n  }\n}\n\n@media(min-width: 1180px) {\n  .base-grid,\n  distill-header,\n  d-title,\n  d-abstract,\n  d-article,\n  d-appendix,\n  distill-appendix,\n  d-byline,\n  d-footnote-list,\n  d-citation-list,\n  distill-footer {\n    grid-template-columns: [screen-start] 1fr [page-start kicker-start] 60px [middle-start] 60px [text-start kicker-end] 60px 60px 60px 60px 60px 60px 60px 60px [text-end gutter-start] 60px [middle-end] 60px [page-end gutter-end] 1fr [screen-end];\n    grid-column-gap: 32px;\n  }\n\n  .grid {\n    grid-column-gap: 32px;\n  }\n}\n\n\n\n\n.base-grid {\n  grid-column: screen;\n}\n\n/* .l-body,\nd-article > *  {\n  grid-column: text;\n}\n\n.l-page,\nd-title > *,\nd-figure {\n  grid-column: page;\n} */\n\n.l-gutter {\n  grid-column: gutter;\n}\n\n.l-text,\n.l-body {\n  grid-column: text;\n}\n\n.l-page {\n  grid-column: page;\n}\n\n.l-body-outset {\n  grid-column: middle;\n}\n\n.l-page-outset {\n  grid-column: page;\n}\n\n.l-screen {\n  grid-column: screen;\n}\n\n.l-screen-inset {\n  grid-column: screen;\n  padding-left: 16px;\n  padding-left: 16px;\n}\n\n\n/* Aside */\n\nd-article aside {\n  grid-column: gutter;\n  font-size: 12px;\n  line-height: 1.6em;\n  color: rgba(0, 0, 0, 0.6)\n}\n\n@media(min-width: 768px) {\n  aside {\n    grid-column: gutter;\n  }\n\n  .side {\n    grid-column: gutter;\n  }\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-title {\n  padding: 2rem 0 1.5rem;\n  contain: layout style;\n  overflow-x: hidden;\n}\n\n@media(min-width: 768px) {\n  d-title {\n    padding: 4rem 0 1.5rem;\n  }\n}\n\nd-title h1 {\n  grid-column: text;\n  font-size: 40px;\n  font-weight: 700;\n  line-height: 1.1em;\n  margin: 0 0 0.5rem;\n}\n\n@media(min-width: 768px) {\n  d-title h1 {\n    font-size: 50px;\n  }\n}\n\nd-title p {\n  font-weight: 300;\n  font-size: 1.2rem;\n  line-height: 1.55em;\n  grid-column: text;\n}\n\nd-title .status {\n  margin-top: 0px;\n  font-size: 12px;\n  color: #009688;\n  opacity: 0.8;\n  grid-column: kicker;\n}\n\nd-title .status span {\n  line-height: 1;\n  display: inline-block;\n  padding: 6px 0;\n  border-bottom: 1px solid #80cbc4;\n  font-size: 11px;\n  text-transform: uppercase;\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-byline {\n  contain: style;\n  overflow: hidden;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  font-size: 0.8rem;\n  line-height: 1.8em;\n  padding: 1.5rem 0;\n  min-height: 1.8em;\n}\n\n\nd-byline .byline {\n  grid-template-columns: 1fr 1fr;\n  grid-column: text;\n}\n\n@media(min-width: 768px) {\n  d-byline .byline {\n    grid-template-columns: 1fr 1fr 1fr 1fr;\n  }\n}\n\nd-byline .authors-affiliations {\n  grid-column-end: span 2;\n  grid-template-columns: 1fr 1fr;\n  margin-bottom: 1em;\n}\n\n@media(min-width: 768px) {\n  d-byline .authors-affiliations {\n    margin-bottom: 0;\n  }\n}\n\nd-byline h3 {\n  font-size: 0.6rem;\n  font-weight: 400;\n  color: rgba(0, 0, 0, 0.5);\n  margin: 0;\n  text-transform: uppercase;\n}\n\nd-byline p {\n  margin: 0;\n}\n\nd-byline a,\nd-article d-byline a {\n  color: rgba(0, 0, 0, 0.8);\n  text-decoration: none;\n  border-bottom: none;\n}\n\nd-article d-byline a:hover {\n  text-decoration: underline;\n  border-bottom: none;\n}\n\nd-byline p.author {\n  font-weight: 500;\n}\n\nd-byline .affiliations {\n\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nd-article {\n  contain: layout style;\n  overflow-x: hidden;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  padding-top: 2rem;\n  color: rgba(0, 0, 0, 0.8);\n}\n\nd-article > * {\n  grid-column: text;\n}\n\n@media(min-width: 768px) {\n  d-article {\n    font-size: 16px;\n  }\n}\n\n@media(min-width: 1024px) {\n  d-article {\n    font-size: 1.06rem;\n    line-height: 1.7em;\n  }\n}\n\n\n/* H2 */\n\n\nd-article .marker {\n  text-decoration: none;\n  border: none;\n  counter-reset: section;\n  grid-column: kicker;\n  line-height: 1.7em;\n}\n\nd-article .marker:hover {\n  border: none;\n}\n\nd-article .marker span {\n  padding: 0 3px 4px;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n  position: relative;\n  top: 4px;\n}\n\nd-article .marker:hover span {\n  color: rgba(0, 0, 0, 0.7);\n  border-bottom: 1px solid rgba(0, 0, 0, 0.7);\n}\n\nd-article h2 {\n  font-weight: 600;\n  font-size: 24px;\n  line-height: 1.25em;\n  margin: 2rem 0 1.5rem 0;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n  padding-bottom: 1rem;\n}\n\n@media(min-width: 1024px) {\n  d-article h2 {\n    font-size: 36px;\n  }\n}\n\n/* H3 */\n\nd-article h3 {\n  font-weight: 700;\n  font-size: 18px;\n  line-height: 1.4em;\n  margin-bottom: 1em;\n  margin-top: 2em;\n}\n\n@media(min-width: 1024px) {\n  d-article h3 {\n    font-size: 20px;\n  }\n}\n\n/* H4 */\n\nd-article h4 {\n  font-weight: 600;\n  text-transform: uppercase;\n  font-size: 14px;\n  line-height: 1.4em;\n}\n\nd-article a {\n  color: inherit;\n}\n\nd-article p,\nd-article ul,\nd-article ol,\nd-article blockquote {\n  margin-top: 0;\n  margin-bottom: 1em;\n  margin-left: 0;\n  margin-right: 0;\n}\n\nd-article blockquote {\n  border-left: 2px solid rgba(0, 0, 0, 0.2);\n  padding-left: 2em;\n  font-style: italic;\n  color: rgba(0, 0, 0, 0.6);\n}\n\nd-article a {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.4);\n  text-decoration: none;\n}\n\nd-article a:hover {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.8);\n}\n\nd-article .link {\n  text-decoration: underline;\n  cursor: pointer;\n}\n\nd-article ul,\nd-article ol {\n  padding-left: 24px;\n}\n\nd-article li {\n  margin-bottom: 1em;\n  margin-left: 0;\n  padding-left: 0;\n}\n\nd-article li:last-child {\n  margin-bottom: 0;\n}\n\nd-article pre {\n  font-size: 14px;\n  margin-bottom: 20px;\n}\n\nd-article hr {\n  grid-column: screen;\n  width: 100%;\n  border: none;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n  margin-top: 60px;\n  margin-bottom: 60px;\n}\n\nd-article section {\n  margin-top: 60px;\n  margin-bottom: 60px;\n}\n\nd-article span.equation-mimic {\n  font-family: georgia;\n  font-size: 115%;\n  font-style: italic;\n}\n\nd-article > d-code,\nd-article section > d-code  {\n  display: block;\n}\n\nd-article > d-math[block],\nd-article section > d-math[block]  {\n  display: block;\n}\n\n@media (max-width: 768px) {\n  d-article > d-code,\n  d-article section > d-code,\n  d-article > d-math[block],\n  d-article section > d-math[block] {\n      overflow-x: scroll;\n      -ms-overflow-style: none;  // IE 10+\n      overflow: -moz-scrollbars-none;  // Firefox\n  }\n\n  d-article > d-code::-webkit-scrollbar,\n  d-article section > d-code::-webkit-scrollbar,\n  d-article > d-math[block]::-webkit-scrollbar,\n  d-article section > d-math[block]::-webkit-scrollbar {\n    display: none;  // Safari and Chrome\n  }\n}\n\nd-article .citation {\n  color: #668;\n  cursor: pointer;\n}\n\nd-include {\n  width: auto;\n  display: block;\n}\n\nd-figure {\n  contain: layout style;\n}\n\n/* KaTeX */\n\n.katex, .katex-prerendered {\n  contain: style;\n  display: inline-block;\n}\n\n/* Tables */\n\nd-article table {\n  border-collapse: collapse;\n  margin-bottom: 1.5rem;\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n}\n\nd-article table th {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.2);\n}\n\nd-article table td {\n  border-bottom: 1px solid rgba(0, 0, 0, 0.05);\n}\n\nd-article table tr:last-of-type td {\n  border-bottom: none;\n}\n\nd-article table th,\nd-article table td {\n  font-size: 15px;\n  padding: 2px 8px;\n}\n\nd-article table tbody :first-child td {\n  padding-top: 2px;\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nspan.katex-display {\n  text-align: left;\n  padding: 8px 0 8px 0;\n  margin: 0.5em 0 0.5em 1em;\n}\n\nspan.katex {\n  -webkit-font-smoothing: antialiased;\n  color: rgba(0, 0, 0, 0.8);\n  font-size: 1.18em;\n}\n'+'/*\n * Copyright 2018 The Distill Template Authors\n *\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *      http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n@media print {\n\n  @page {\n    size: 8in 11in;\n    @bottom-right {\n      content: counter(page) " of " counter(pages);\n    }\n  }\n\n  html {\n    /* no general margins -- CSS Grid takes care of those */\n  }\n\n  p, code {\n    page-break-inside: avoid;\n  }\n\n  h2, h3 {\n    page-break-after: avoid;\n  }\n\n  d-header {\n    visibility: hidden;\n  }\n\n  d-footer {\n    display: none!important;\n  }\n\n}\n',ge="\nwindow.addEventListener('WebComponentsReady', function() {\n  console.warn('WebComponentsReady');\n  const loaderTag = document.createElement('script');\n  loaderTag.src = 'https://distill.pub/template.v2.js';\n  document.head.insertBefore(loaderTag, document.head.firstChild);\n});\n",ve="\nd-citation-list {\n  contain: style;\n}\n\nd-citation-list .references {\n  grid-column: text;\n}\n\nd-citation-list .references .title {\n  font-weight: 500;\n}\n";var be='<svg viewBox="-607 419 64 64">\n  <path d="M-573.4,478.9c-8,0-14.6-6.4-14.6-14.5s14.6-25.9,14.6-40.8c0,14.9,14.6,32.8,14.6,40.8S-565.4,478.9-573.4,478.9z"/>\n</svg>\n';const ye=`\n<style>\ndistill-header {\n  position: relative;\n  height: 60px;\n  background-color: hsl(200, 60%, 15%);\n  width: 100%;\n  box-sizing: border-box;\n  z-index: 2;\n  color: rgba(0, 0, 0, 0.8);\n  border-bottom: 1px solid rgba(0, 0, 0, 0.08);\n  box-shadow: 0 1px 6px rgba(0, 0, 0, 0.05);\n}\ndistill-header .content {\n  height: 70px;\n  grid-column: page;\n}\ndistill-header a {\n  font-size: 16px;\n  height: 60px;\n  line-height: 60px;\n  text-decoration: none;\n  color: rgba(255, 255, 255, 0.8);\n  padding: 22px 0;\n}\ndistill-header a:hover {\n  color: rgba(255, 255, 255, 1);\n}\ndistill-header svg {\n  width: 24px;\n  position: relative;\n  top: 4px;\n  margin-right: 2px;\n}\n@media(min-width: 1080px) {\n  distill-header {\n    height: 70px;\n  }\n  distill-header a {\n    height: 70px;\n    line-height: 70px;\n    padding: 28px 0;\n  }\n  distill-header .logo {\n  }\n}\ndistill-header svg path {\n  fill: none;\n  stroke: rgba(255, 255, 255, 0.8);\n  stroke-width: 3px;\n}\ndistill-header .logo {\n  font-size: 17px;\n  font-weight: 200;\n}\ndistill-header .nav {\n  float: right;\n  font-weight: 300;\n}\ndistill-header .nav a {\n  font-size: 12px;\n  margin-left: 24px;\n  text-transform: uppercase;\n}\n</style>\n<div class="content">\n  <a href="/" class="logo">\n    ${be}\n    Distill\n  </a>\n  <nav class="nav">\n    <a href="/about/">About</a>\n    <a href="/prize/">Prize</a>\n    <a href="/journal/">Submit</a>\n  </nav>\n</div>\n`,xe="\n<style>\n  distill-appendix {\n    contain: layout style;\n  }\n\n  distill-appendix .citation {\n    font-size: 11px;\n    line-height: 15px;\n    border-left: 1px solid rgba(0, 0, 0, 0.1);\n    padding-left: 18px;\n    border: 1px solid rgba(0,0,0,0.1);\n    background: rgba(0, 0, 0, 0.02);\n    padding: 10px 18px;\n    border-radius: 3px;\n    color: rgba(150, 150, 150, 1);\n    overflow: hidden;\n    margin-top: -12px;\n    white-space: pre-wrap;\n    word-wrap: break-word;\n  }\n\n  distill-appendix > * {\n    grid-column: text;\n  }\n</style>\n",we=`\n<style>\n\n:host {\n  color: rgba(255, 255, 255, 0.5);\n  font-weight: 300;\n  padding: 2rem 0;\n  border-top: 1px solid rgba(0, 0, 0, 0.1);\n  background-color: hsl(180, 5%, 15%); /*hsl(200, 60%, 15%);*/\n  text-align: left;\n  contain: content;\n}\n\n.footer-container .logo svg {\n  width: 24px;\n  position: relative;\n  top: 4px;\n  margin-right: 2px;\n}\n\n.footer-container .logo svg path {\n  fill: none;\n  stroke: rgba(255, 255, 255, 0.8);\n  stroke-width: 3px;\n}\n\n.footer-container .logo {\n  font-size: 17px;\n  font-weight: 200;\n  color: rgba(255, 255, 255, 0.8);\n  text-decoration: none;\n  margin-right: 6px;\n}\n\n.footer-container {\n  grid-column: text;\n}\n\n.footer-container .nav {\n  font-size: 0.9em;\n  margin-top: 1.5em;\n}\n\n.footer-container .nav a {\n  color: rgba(255, 255, 255, 0.8);\n  margin-right: 6px;\n  text-decoration: none;\n}\n\n</style>\n\n<div class='footer-container'>\n\n  <a href="/" class="logo">\n    ${be}\n    Distill\n  </a> is dedicated to clear explanations of machine learning\n\n  <div class="nav">\n    <a href="https://distill.pub/about/">About</a>\n    <a href="https://distill.pub/journal/">Submit</a>\n    <a href="https://distill.pub/prize/">Prize</a>\n    <a href="https://distill.pub/archive/">Archive</a>\n    <a href="https://distill.pub/rss.xml">RSS</a>\n    <a href="https://github.com/distillpub">GitHub</a>\n    <a href="https://twitter.com/distillpub">Twitter</a>\n    &nbsp;&nbsp;&nbsp;&nbsp; ISSN 2476-0757\n  </div>\n\n</div>\n\n`,ke=new Map([["ExtractFrontmatter",a],["ExtractBibliography",p],["ExtractCitations",w]]),Me=new Map([["HTML",k],["makeStyleTag",R],["OptionalComponents",z],["TOC",O],["Byline",S],["Mathematics",A],["Meta",T],["Typeset",q],["Polyfills",I],["CitationList",P],["Reorder",j]]),Se=new Map([["DistillHeader",F],["DistillAppendix",U],["DistillFooter",Y]]),ze={extractors:ke,transforms:Me,distillTransforms:Se};e.FrontMatter=ne,e.distillify=G,e.render=V,e.testing=ze,e.usesTemplateV2=W,Object.defineProperty(e,"__esModule",{value:!0})});
\ No newline at end of file
diff --git a/assets/js/masonry.js b/assets/js/masonry.js
index dd96530d..57fd6fe5 100644
--- a/assets/js/masonry.js
+++ b/assets/js/masonry.js
@@ -1,12 +1 @@
-$(document).ready(function() {
-  // Init Masonry
-  var $grid = $('.grid').masonry({
-    gutter: 10,
-    horizontalOrder: true,
-    itemSelector: '.grid-item',
-  });
-  // Layout Masonry after each image loads
-  $grid.imagesLoaded().progress( function() {
-    $grid.masonry('layout');
-  });
-});
+$(document).ready(function(){var r=$(".grid").masonry({gutter:10,horizontalOrder:!0,itemSelector:".grid-item"});r.imagesLoaded().progress(function(){r.masonry("layout")})});
\ No newline at end of file
diff --git a/assets/js/theme.js b/assets/js/theme.js
index f6c9cdf7..55f4fd8e 100644
--- a/assets/js/theme.js
+++ b/assets/js/theme.js
@@ -1,64 +1 @@
-// Has to be in the head tag, otherwise a flicker effect will occur.
-
-let toggleTheme = (theme) => {
-  if (theme == "dark") {
-    setTheme("light");
-  } else {
-    setTheme("dark");
-  }
-}
-
-
-let setTheme = (theme) =>  {
-  transTheme();
-  setHighlight(theme);
-
-  if (theme) {
-    document.documentElement.setAttribute("data-theme", theme);
-  }
-  else {
-    document.documentElement.removeAttribute("data-theme");
-  }
-  localStorage.setItem("theme", theme);
-  
-  // Updates the background of medium-zoom overlay.
-  if (typeof medium_zoom !== 'undefined') {
-    medium_zoom.update({
-      background: getComputedStyle(document.documentElement)
-          .getPropertyValue('--global-bg-color') + 'ee',  // + 'ee' for trasparency.
-    })
-  }
-};
-
-let setHighlight = (theme) => {
-  if (theme == "dark") {
-    document.getElementById("highlight_theme_light").media = "none";
-    document.getElementById("highlight_theme_dark").media = "";
-  } else {
-    document.getElementById("highlight_theme_dark").media = "none";
-    document.getElementById("highlight_theme_light").media = "";
-  }
-}
-
-
-let transTheme = () => {
-  document.documentElement.classList.add("transition");
-  window.setTimeout(() => {
-    document.documentElement.classList.remove("transition");
-  }, 500)
-}
-
-
-let initTheme = (theme) => {
-  if (theme == null || theme == 'null') {
-    const userPref = window.matchMedia;
-    if (userPref && userPref('(prefers-color-scheme: dark)').matches) {
-        theme = 'dark';
-    }
-  }
-  
-  setTheme(theme);
-}
-
-
-initTheme(localStorage.getItem("theme"));
+let toggleTheme=e=>{setTheme("dark"==e?"light":"dark")},setTheme=e=>{transTheme(),setHighlight(e),e?document.documentElement.setAttribute("data-theme",e):document.documentElement.removeAttribute("data-theme"),localStorage.setItem("theme",e),"undefined"!=typeof medium_zoom&&medium_zoom.update({background:getComputedStyle(document.documentElement).getPropertyValue("--global-bg-color")+"ee"})},setHighlight=e=>{"dark"==e?(document.getElementById("highlight_theme_light").media="none",document.getElementById("highlight_theme_dark").media=""):(document.getElementById("highlight_theme_dark").media="none",document.getElementById("highlight_theme_light").media="")},transTheme=()=>{document.documentElement.classList.add("transition"),window.setTimeout(()=>{document.documentElement.classList.remove("transition")},500)},initTheme=e=>{if(null==e||"null"==e){const t=window.matchMedia;t&&t("(prefers-color-scheme: dark)").matches&&(e="dark")}setTheme(e)};initTheme(localStorage.getItem("theme"));
\ No newline at end of file
diff --git a/assets/js/zoom.js b/assets/js/zoom.js
index 369afc31..2a8bc1fb 100644
--- a/assets/js/zoom.js
+++ b/assets/js/zoom.js
@@ -1,8 +1 @@
-// Initialize medium zoom.
-$(document).ready(function() {
-  medium_zoom = mediumZoom('[data-zoomable]', {
-    margin: 100,
-    background: getComputedStyle(document.documentElement)
-        .getPropertyValue('--global-bg-color') + 'ee',  // + 'ee' for trasparency.
-  })
-});
+$(document).ready(function(){medium_zoom=mediumZoom("[data-zoomable]",{margin:100,background:getComputedStyle(document.documentElement).getPropertyValue("--global-bg-color")+"ee"})});
\ No newline at end of file
diff --git a/atom.xml b/atom.xml
index 15eed6ca..1ee03f82 100644
--- a/atom.xml
+++ b/atom.xml
@@ -1,28 +1 @@
----
-layout: null
----
-
-<?xml version="1.0" encoding="utf-8"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-
- <title>{{ site.title }}</title>
- <link href="{{ site.url }}{{ site.baseurl }}/atom.xml" rel="self"/>
- <link href="{{ site.url }}{{ site.baseurl }}/"/>
- <updated>{{ site.time | date_to_xmlschema }}</updated>
- <id>{{ site.url }}</id>
- <author>
-   <name>{{ site.author.name }}</name>
-   <email>{{ site.author.email }}</email>
- </author>
-
- {% for post in site.posts %}
- <entry>
-   <title>{{ post.title }}</title>
-   <link href="{{ site.url }}{{ post.url }}"/>
-   <updated>{{ post.date | date_to_xmlschema }}</updated>
-   <id>{{ site.url }}{{ site.baseurl }}{{ post.id }}</id>
-   <content type="html">{{ post.content | xml_escape }}</content>
- </entry>
- {% endfor %}
-
-</feed>
+<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <title>6.S898 Deep Learning Blogs 2023</title> <link href="https://deep-learning-mit.github.io/staging/atom.xml" rel="self"/> <link href="https://deep-learning-mit.github.io/staging/"/> <updated>2023-12-18T18:10:56+00:00</updated> <id>https://deep-learning-mit.github.io</id> <author> <name></name> <email></email> </author> <entry> <title>Are Watermarked Large Language Models More Prone to Hallucinations?</title> <link href="https://deep-learning-mit.github.io/blog/2023/watermarked-llms/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/watermarked-llms</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;As LLMs grow in capabilities, it is becoming more and more difficult to tell apart human-written from AI-generated content. Current post-hoc AI detection tools like GPTZero, which are easy to bypass and can be biased against non-native English speakers, are neither robust nor fair. Watermarking schemes suggest a more secure and unbiased method of detecting LLM-generated content, but introduce potential quality degradation. In this blog post, I investigate whether watermarked LLMs are more likely to “hallucinate,” or make up facts, because of limitations imposed by the watermarking scheme. I formulate a nuanced research question, explain assumptions made and my experimental setup, present an analysis of my results, and present next steps. Overall, although I do not obtain statistically significant results, I do provide statistical evidence that hallucinations in watermarked LLMs are worth studying, with interpretable qualitative results that I explain with fundamental ML concepts.&lt;/p&gt; &lt;h2 id=&quot;background&quot;&gt;Background&lt;/h2&gt; &lt;h3 id=&quot;the-need-for-ai-detection-algorithms&quot;&gt;The need for AI detection algorithms&lt;/h3&gt; &lt;p&gt;Deepfakes. AI-assisted academic plagiarism. Bots on social media spreading fake news. These are just a couple of the real-world problems brought about by the recent advancement in large language model capabilities that make it easy for malicious users to spread misinformation, while making it hard for social media platforms or governments to detect their AI origin. Thus, detecting AI-generated content in the wild is becoming one of the hottest research fields in AI. In fact, the White House recently commissioned an executive order &lt;d-cite key=&quot;biden&quot;&gt;&lt;/d-cite&gt; on AI to direct action towards detecting AI-generated content and authenticating official content. But as AI models get more and more powerful, the question arises: will it even be possible to tell apart truth from disinformation?&lt;/p&gt; &lt;p&gt;Some signs have already appeared that point to the answer being “no.” When ChatGPT first released to the public, coding Q&amp;amp;A site StackOverflow temporarily banned &lt;d-cite key=&quot;stack-overflow&quot;&gt;&lt;/d-cite&gt; answers generated by ChatGPT, because it was so easy to use ChatGPT to generate answers that seemed to be correct but were wrong on closer inspection. Perhaps you’ve experienced the following: you’re browsing Reddit or an online forum and reading a user’s reply, thinking that it’s a well-composed answer, only to realize that the structure of the reply is strikingly similar to how ChatGPT sounds.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/reddit-post-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/reddit-post-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/reddit-post-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/reddit-post.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt;A Reddit post pointing out a user who writes all their comments using ChatGPT.&lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/reddit-comment-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/reddit-comment-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/reddit-comment-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/reddit-comment.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt;An unexpected comment replying to the Reddit post above.&lt;/div&gt; &lt;p&gt;Some AI detection tools, such as GPTZero &lt;d-cite key=&quot;gpt-zero&quot;&gt;&lt;/d-cite&gt;, claim to solve this problem by detecting text generated by large language models (LLMs). However, from playing around with GPTZero, it’s not hard to prompt ChatGPT to generate text that bypasses GPTZero’s filters, or to manually paraphrase a few sentences while preserving the content’s general meaning. This is due to GPTZero’s simplistic detection method, which is just to predict if a piece of text is AI-generated by analyzing its perplexity—a measure of “randomness.” In addition to being inaccurate, AI detection tools like GPTZero frequently misclassify &lt;d-cite key=&quot;bias&quot;&gt;&lt;/d-cite&gt; text written by non-native English speakers as AI generated, bringing up issues of fairness and biases.&lt;/p&gt; &lt;p&gt;So is it all doomed? Will we reach a state of the world in which people can’t trust anything they see on the internet to be human-generated?&lt;/p&gt; &lt;p&gt;Not quite. New watermarking algorithms make it possible to trace back any text generated by specifically-watermarked LLMs with high accuracy and with low false-positive rates, and with considerable amount of effort required to modify the output of such an LLM without also degrading the quality of the output.&lt;/p&gt; &lt;h3 id=&quot;so-what-is-watermarking&quot;&gt;So what is watermarking?&lt;/h3&gt; &lt;p&gt;Watermarking, in the context of LLMs, is the process of modifying an LLMs generation process such that signals are embedded into generated text that are invisible to humans but algorithmically detectable. The key difference between watermarking and post-hoc detection algorithms like GPTZero is that post-hoc detectors rely on text outputted by LLMs to sound “artificial,” and as LLM capabilities grow, this is unlikely to hold. On the other hand, watermarking schemes work regardless of the capabilities of the underlying LLM, which make them more robust to advancements in AI. The watermarking scheme designed in &lt;em&gt;A Watermark for Large Language Models&lt;/em&gt; (Kirchenbauer, Geiping et al.) &lt;d-cite key=&quot;watermark-for-llms&quot;&gt;&lt;/d-cite&gt; is specially designed to have negligible impact on text quality and work with a publicly accessible detection algorithm, so that anyone can verify if a piece of text is generated by a particular LLM. The watermarking scheme works by selecting a pool of “green” tokens before text generation and softly preferring to sample from the pool of “green” tokens during text generation. Then, the detection algorithm checks if a piece of text contains a higher proportion of “green” tokens than expected, and if the result is statistically significant, determines that the text was generated by an LLM.&lt;/p&gt; &lt;p&gt;The existence of an undetectable, unbreakable, and accurate watermarking scheme would be incredible! By watermarking any LLM before its release, any text generated by the LLM would contain statistical signals that prove its AI origin, making it difficult for adversaries to pass off LLM-generated content as human-generated. Furthermore, because watermarking schemes rely on detecting signals associated with each LLM’s watermarking process and not by analyzing the perplexity of text, human-generated content would rarely be flagged as AI-generated. Unfortunately, the recent paper &lt;em&gt;Watermarks in the Sand: Impossibility of Strong Watermarking for Generative Models&lt;/em&gt; (Zhang et al.) &lt;d-cite key=&quot;watermark-impossibility&quot;&gt;&lt;/d-cite&gt; proves that under natural assumptions, it is impossible to achieve watermarking schemes that can resist all attacks by a computationally-bounded attacker. The key assumption, which is the existence of a quality oracle—an algorithm that can differentiate between higher and lower quality responses—is easily satisfied by using any LLM that is capable of verifying quality. The authors of the paper implement a general attack on any watermarked LLM by perturbing the output of an LLM an arbitrary number of times, and using the quality oracle to ensure the perturbation does not cause the text to degrade. After enough perturbations, the text is unlikely to contain the statistical signals embedded in the original output, and the attacker evades detection.&lt;/p&gt; &lt;p&gt;So if an attacker is willing to spend lots of time and effort, they can break any watermarking scheme. Still, maybe this barrier is enough to deter most attackers. Then, why wouldn’t we watermark every LLM released to the public?&lt;/p&gt; &lt;h3 id=&quot;quality-degradation-in-watermarked-llms&quot;&gt;Quality degradation in watermarked LLMs&lt;/h3&gt; &lt;p&gt;The truth is, because watermarking schemes force a LLM to preferentially sample from a pool of “green” tokens, the quality of the output of watermarked LLMs may decrease. To understand the intuition behind this, here’s a short clip from “Word Sneak with Steve Carell”: &lt;a href=&quot;https://youtu.be/9nBBgD0q6rA?feature=shared&amp;amp;t=107&quot;&gt;link&lt;/a&gt;&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/word-sneak-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/word-sneak-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/word-sneak-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/word-sneak.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;“We weren’t dressed properly for &lt;strong&gt;moose&lt;/strong&gt;-watching or &lt;strong&gt;zucchini&lt;/strong&gt;-finding… I for one had the wrong kind of &lt;strong&gt;pantaloons&lt;/strong&gt; on.”&lt;/p&gt; &lt;p&gt;Steve and Jimmy were given cards with random words and had to work them into a casual conversation. Similarly, one can imagine an LLM generating odd-sounding sentences in order to adhere to a watermarking scheme.&lt;/p&gt; &lt;p&gt;The effects of quality degradation are amplified the smaller the space of high-quality outputs is. For example, the prompts “What is 12 times 6?” or “What is the first section of the U.S. Constitution?” have only one accepted answer, forcing a watermarked LLM to either give up on watermarking the output or &lt;em&gt;hallucinate incorrect answers&lt;/em&gt;.&lt;/p&gt; &lt;p&gt;The latter bad outcome is the one I will investigate further in this blog post: Are watermarked LLMs more prone to hallucinations? In particular, I investigate if there are tradeoffs between quality of outputs and watermark security. Lastly, I perform a qualitative analysis of watermarked outputs, and explain any interpretable trends caused by the watermarking scheme.&lt;/p&gt; &lt;h2 id=&quot;experiment&quot;&gt;Experiment&lt;/h2&gt; &lt;h3 id=&quot;setup&quot;&gt;Setup&lt;/h3&gt; &lt;p&gt;I investigate my hypothesis by experimenting with unwatermarked and watermarked LLMs. I outline my experiment here: first, I ask an unwatermarked LLM to generate biographies for 100 famous people. I ask an evaluation oracle, aka GPT 3.5, to count the number of mistakes in each generated biography. This serves as my control group. Then, I create three experimental groups, each of which correspond to a watermarked LLM with varying degrees of watermarking security. I ask GPT 3.5 to count the number of mistakes by each of the watermarked LLMs, and perform statistical Z-tests to conclude whether or not watermarked LLMs are more likely to hallucinate.&lt;/p&gt; &lt;p&gt;I now walk through the steps of my experiment in more depth, with commentary on any decisions or tradeoffs I made in the process. Hopefully anyone reading this can follow what I did to replicate, or even build upon, my results!&lt;/p&gt; &lt;p&gt;My coding environment was Google Colab Pro, and its V100 GPU was sufficient to run all my code—a complete runthrough of my final Jupyter notebook would take a bit over an hour. The watermarking scheme I sought to replicate can be applied to any LLM where the watermark has access to the last layer of logits, so I looked into a variety of open-source LLMs. Ultimately, I decided on OPT (1.3 billion parameters) &lt;d-cite key=&quot;opt&quot;&gt;&lt;/d-cite&gt;, because its small model size allowed me to experiment with different parameters more efficiently, with faster inference times. Other open-source LLMs I considered were Mistral, Llama, and Roberta.&lt;/p&gt; &lt;p&gt;For my experiment, I needed a dataset of biographies of famous people. Unfortunately, I couldn’t find one publicly available after a few hours of searching, so I did the next best thing: I made my own. Using a list of 100 famous peoples’ biographies I found on a website &lt;d-cite key=&quot;famous-people&quot;&gt;&lt;/d-cite&gt;, I copy-pasted each of their Wikipedia biographies into a CSV.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/bios-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/bios-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/bios-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/bios.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Lastly, I needed an evaluation oracle to count up the number of factual mistakes in each generated biography. I decided to make a tradeoff between accuracy and efficiency by letting ChatGPT do the work for me instead of manually cross-checking sample biographies with their Wikipedia biographies. After a bit of research into OpenAI’s APIs and pricing plans, I settled on the GPT 3.5-turbo API, since I expected to generate 600k tokens for my experiment, which would be a bit less than $1 in costs. With more funding, I would have probably used GPT 4, but I checked and was satisfied with the outputs of GPT 3.5-turbo.&lt;/p&gt; &lt;h3 id=&quot;watermarking-scheme-implementation&quot;&gt;Watermarking scheme implementation&lt;/h3&gt; &lt;p&gt;With the experimental variables of open-source model, dataset, and evaluation oracle decided upon, I began to implement the watermarking scheme detailed in &lt;em&gt;A Watermark for Large Language Models&lt;/em&gt;. The watermarking scheme is made up entirely of two components: a watermarking logits processor that influences how tokens are sampled at generation time, and a watermark detector that detects if a given piece of text contains a watermark. There were also several tunable parameters detailed in the watermarking paper, but the two of interest are gamma and delta.&lt;/p&gt; &lt;p&gt;Gamma represents the breadth of the watermark in terms of vocabulary: a higher gamma includes more words in the “green” pool, making responses sound more natural but may dilute the watermark’s detectability, while a lower gamma focuses on fewer words, increasing its detectability but potentially negatively affecting the output. The authors of the watermarking paper suggested a value for gamma between 0.25 and 0.75.&lt;/p&gt; &lt;p&gt;Delta represents the intensity of the watermark, or how strongly the watermark prefers “green” tokens to “red” tokens at each step of the generation process. The higher the delta, the more evident the resulting watermark. The watermarking paper suggested a value for delta between 0.5 and 2.0.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/watermark-scheme-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/watermark-scheme-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/watermark-scheme-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/watermark-scheme.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt;A visualization of the watermarking scheme (figure from original paper).&lt;/div&gt; &lt;p&gt;Tuning these parameters, I created three different experimental groups, each corresponding to a different level of watermarking strength: strong, medium, and weak. The exact values of gamma and delta I used can be tinkered with; my choices were based on what empirically had the best effects.&lt;/p&gt; &lt;p&gt;I left the detector algorithm provided by the watermarking paper mostly untouched, except for the Z-threshold. I tuned it down to z=2.3 so the detector would be more likely to say a piece of text was watermarked for comparisons between different watermarking strengths, but this threshold still required 99% confidence. Additionally, the detector algorithm takes as input gamma, which is the same gamma used to generate the watermarked text I am attempting to detect. This is a key parameter that differentiates a watermark detector from a general post-hoc AI detector. The gamma seeds the watermarking scheme, so that during the detection process, we can work backwards to determine if the token sampling adheres to the given gamma value. This ensures that human-written text that sounds bland or like a non-native English speaker won’t be misclassified as AI-generated, resulting in a low false-positive rate.&lt;/p&gt; &lt;h3 id=&quot;prompting-my-models&quot;&gt;Prompting my models&lt;/h3&gt; &lt;p&gt;First, I needed to prompt my open-source model to generate biographies of famous people. Since the version of OPT I used is a Causal LM, not an Instruct LM, I needed to prompt it with a sentence that would make it most likely to continue where I left off and generate a biography of the specified person. After some testing, I settled on the following prompt:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/generation-prompt-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/generation-prompt-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/generation-prompt-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/generation-prompt.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;However, I noticed that the watermarked LLMs were initially outputting repeated phrases, e.g. “Barack Obama was the first African-American president of the United States. Barack Obama was the first African-American president of the United States.” Although this wasn’t technically hallucination, I wanted the output to look like a real biography, so I tuned two hyperparameters used during text generation: no_repeat_ngram_size=3 and repetition_penalty=1.1 to discourage repetitive phrases.&lt;/p&gt; &lt;p&gt;Next, I needed to prompt my evaluation oracle, GPT 3.5, to evaluate sample biographies. Since GPT 3.5 is an Instruct model, I can directly ask it to evaluate a given biography. I decided on the following prompt:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/evaluation-prompt.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;I ask GPT 3.5 to clearly list out each detected mistake and their corresponding correction in order to reduce the likelihood of it hallucinating, as well as allowing me to manually verify its evaluations.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;quantitative-results&quot;&gt;Quantitative results&lt;/h3&gt; &lt;p&gt;After generating four biographies for each person—one unwatermarked control sample and three watermarked samples with different watermarking parameters—I evaluate them against our GPT 3.5 evaluation oracle.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/stats-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/stats-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/stats-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/stats.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;On average, the unwatermarked control LLM generates biographies that contain 8.14 mistakes each. The strongest watermarking setting has a mean of 8.64 mistakes, the medium watermark has 8.56 mistakes on average, and the weakest watermark has 8.00 mistakes on average. Clearly the weakest watermarked LLM doesn’t hallucinate any more than the control group LLM—but it does have a 13% detection rate, which is pretty substandard for a watermarking scheme. The medium and strongest watermarks perform slightly worse than the control group LLM, and by performing Z-tests on the measure statistics (never thought AP Stats would come in handy), I conclude that the probabilities of observing the results we got for the strong and medium watermarked LLMs are 26% and 34% respectively. So, although these probabilities aren’t statistically significant, they do slightly imply that watermarked LLMs hallucinate more often, and the effect is especially visible with stronger watermark settings.&lt;/p&gt; &lt;p&gt;We also see that our unwatermarked biographies had a false positive rate of approximately 1%. This can be attributed to the tuning I made to the Z-threshold, from 4.0 to 2.3. Indeed, I made the change knowing that a Z-threshold of 2.3 reflects 99% confidence, so our FPR of 1% is in line with this change. If I had left the Z-threshold to 4.0, we would have a FPR of approximately 0.003%. However, with a higher Z-threshold, the weakest watermarked LLM would consequently have an even lower successful detection rate, so I made this tradeoff of having one or two false positives in order to catch more watermarks. This also lets us see more clearly how even though weaker watermarks are less detectable, some trace of the watermarking signal still remains.&lt;/p&gt; &lt;h3 id=&quot;qualitative-results&quot;&gt;Qualitative results&lt;/h3&gt; &lt;p&gt;In addition to quantitative results, I perform a deeper, qualitative analysis on a biography generated for a specific person. I chose the strong watermarked biography for Nelson Mandela because of the interesting and interpretable trends we can see:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/nelson-mandela-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/nelson-mandela-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/nelson-mandela-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/nelson-mandela.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;I highlighted the errors pointed out by our evaluation oracle in red text for ease of comparison. Note that there may be additional errors not caught by GPT 3.5. Generally, we see that the errors occur from mixing up dates, names, ages, locations, etc., and are not completely made up facts. In fact, the biography does capture a relatively sound summary of Mandela’s life. I posture that the hallucinations we see are mostly simple fact mismatches because the watermarking schemes we impose on OPT still give it the flexibility to tell a good story of Mandela’s life, but when it comes down to token-by-token sampling, our LLM may be forced to generate the wrong date or name in order to adhere to the “green” token preference scheme.&lt;/p&gt; &lt;p&gt;I also wanted to highlight the blue text. The sentence “The Nobel Prize is one of only three Nobel Prizes awarded to individuals in history” not only is incorrect but also doesn’t add much substance to the biography. Here are three other hand-picked sections of generated biographies that aren’t informative to the person’s biography:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/repetitions-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/repetitions-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-watermarked-llms/repetitions-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-watermarked-llms/repetitions.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In each piece of text, the reported facts may not be incorrect, but they take up valuable space that we would otherwise like to be used to introduce new facts about the person. So even if these facts aren’t flagged as factual inaccuracies by our evaluation oracle, they do demonstrate a degradation in model output, raising the issue of response quality degradations in addition to hallucinations.&lt;/p&gt; &lt;h3 id=&quot;theoretical-underpinnings-of-results&quot;&gt;Theoretical underpinnings of results&lt;/h3&gt; &lt;p&gt;Taking a theoretical perspective, what exactly causes LLMs to hallucinate? To answer this question, we cite one of the important topics covered in class: reward misspecification. If, during training time, we give low error loss to outputs that sound similar to our training data, we’re not necessarily training the LLM to be more &lt;em&gt;accurate&lt;/em&gt;. Instead, we’re training the LLM to generate output that is more likely to be accepted as “close-enough” to the training data. When we ask ChatGPT to write a poem or reply to an email, being “close-enough” is usually fine. But when we need it to be 100% accurate, such as solving a math problem or generating a biography for a real person, being “close-enough” doesn’t quite make the cut.&lt;/p&gt; &lt;p&gt;Furthermore, the auto-regressive manner in which LLMs generate text means it samples the “most-likely” token, based on previously seen tokens. If our LLM starts to generate FDR’s most important New Deal measures, the “most-likely” tokens to follow might be explaining each of the New Deal measures in detail. But this isn’t what we want out of a biography of FDR!&lt;/p&gt; &lt;p&gt;Both of these problems—hallucinating false information and generating uninformative facts—are observed in our experiments. But unfortunately, it’s hard to reduce one issue without exacerbating the other. I attempted to decrease the temperature parameter in OPT’s text generation, but this resulted in OPT generating strings of run-on, non-informative sentences, such as &lt;em&gt;“Marilyn Monroe starred in several films, including Dangerous Years, Scudda Hoo! Scudda Hay!, Ladies of the Chorus, Love Happy…”&lt;/em&gt; because each additional film was the most likely follow-up to the previously generated tokens. Similarly, increasing the temperature might generate text that sounds more “human-like,” but upon closer inspection, would be riddled with factual inaccuracies.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;h3 id=&quot;further-work&quot;&gt;Further Work&lt;/h3&gt; &lt;p&gt;There’s a couple of improvements I would have made if I had more time or more compute to work with. With more time, I would have liked to learn how to scrape data from webpages, so I could create a dataset of a thousand famous peoples’ biographies. Then, I could run my experiments with a larger sample size, allowing me to produce more statistically significant results. However, even with access to a larger dataset, I would have been bottlenecked by compute. Using Colab Pro’s V100 GPU, I estimate that generating biographies and evaluating them for 1000 people would take at least 10 hours of runtime.&lt;/p&gt; &lt;p&gt;If I had access to more powerful GPUs, I also would have investigated a more nuanced research question: are bigger, more capable LLMs that have watermarking schemes applied to them less prone to hallucinating? I would have liked to run my experiments using a larger open-source LLM, such as LLaMa 65B, instead of OPT 1.3B, to see if watermarking schemes still negatively affect an LLM’s ability to perform tasks, when the base LLM is much more capable.&lt;/p&gt; &lt;h3 id=&quot;what-i-learned&quot;&gt;What I learned&lt;/h3&gt; &lt;p&gt;As this project was my first self-driven research experiment, I faced many challenges, but also learned so much. Probably the most important thing I learned is that compute is important, but it’s not an end-all-be-all. There’s tons of open-source models out there that can be run on a V100, and Google Colab Pro offers it at an affordable price. I also learned how important it is to define a well-scoped research problem, and how chatting with others can help you gain fresh insights on roadblocks.&lt;/p&gt; &lt;p&gt;I found that my work towards this project was structured much differently than how I would approach a problem set. With a pset, much of the starter code is provided, and in particular, the code to import datasets, process them, and visualize results are all provided. In this project, most of my time was spent making design decisions: which dataset should I use, how should I format my results, what hyperparameters should I use. Although the raw number of lines coded in my final notebook might not be the most, I can explain my reasoning behind each line of code clearly, and I think this is a result of the thorough research I performed.&lt;/p&gt; &lt;p&gt;Lastly, I learned that tackling an unanswered question in research is tractable for most students with some programming experience and interest in a scientific field. I didn’t have the most extensive ML background, nor any prior undergraduate research experience, but just by reading some papers on watermarking and writing down the questions that popped into my head, I came up with some viable research questions that could be tackled by an independent research project.&lt;/p&gt; &lt;p&gt;I’m very thankful to my friends Franklin Wang and Miles Wang for helping me configure my coding environment and keeping me motivated throughout the project, and also to the TAs I spoke with during the ideation and scoping stage. To other students reading this blog post who may want to get started doing ML research but aren’t sure how to get started, I encourage you to try replicating some papers with code! Papers With Code &lt;d-cite key=&quot;papers-with-code&quot;&gt;&lt;/d-cite&gt; has several papers accompanied by their codebases, and just trying to obtain the same results as them is an incredible learning experience. If anyone wants to replicate, or even build off of my work, please reach out to me if you have any questions or ideas you’d like to discuss. You can reach me at justin ji [at] college.harvard.edu.&lt;/p&gt; &lt;h3 id=&quot;supplemental-material&quot;&gt;Supplemental Material&lt;/h3&gt; &lt;p&gt;In this Github repository, you can access the dataset I made of famous people’s biographies, the code I used to generate my results, and the CSV files of results.&lt;/p&gt; &lt;p&gt;&lt;a href=&quot;https://github.com/jujipotle/deep-learning-project&quot;&gt;Github Repo&lt;/a&gt;&lt;/p&gt; </content> </entry> <entry> <title>Predicting the Future: LSTM vs Transformers for Time Series Modeling</title> <link href="https://deep-learning-mit.github.io/blog/2023/time-series-lstm-transformer/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/time-series-lstm-transformer</id> <content type="html">&lt;h1 id=&quot;6s898-final-project---lstm-vs-transformers-for-time-series-modeling&quot;&gt;6.S898 Final Project - LSTM vs Transformers for Time Series Modeling&lt;/h1&gt; &lt;p&gt;By Miranda Cai and Roderick Huang&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div style=&quot;text-align:center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/intro_photo_time_series.webp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/intro_photo_time_series.webp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/intro_photo_time_series.webp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/intro_photo_time_series.webp&quot; class=&quot;img-fluid rounded z-depth-1 w-100&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;h2 id=&quot;1-introduction&quot;&gt;1. Introduction&lt;/h2&gt; &lt;p&gt;In the context of time series forecasting, comparing Long Short-Term Memory (LSTM) networks to Transformers is a fascinating exploration into the evolution of deep learning architectures. Despite having distinct strengths and approaches, both LSTM and transformer models have revolutionized natural language processing (NLP) and sequential data tasks.&lt;/p&gt; &lt;p&gt;LSTMs, with their recurrent structure, were pioneers in capturing long-range dependencies in sequential data. While the accuracy of such models have been shown to be quite effective in many applications, training LSTM models takes a relatively long time because of the fact that they must remember all past observances. One faster alternative to LSTM models are transformers. Transformers are able to remember only the important bits of inputs using an attention-mechanism, and is also parallelizable making it much faster to train than recursive LSTMs that must be processed sequentially.&lt;/p&gt; &lt;p&gt;With its recent development, people have started opting to use transformer based models to solve sequence problems that once relied on LSTMs. One significant example is for NLP use cases, where transformers can process sentences as a whole rather than by individual words like LSTMs do. However, since transformers have been around for less than a decade, there are still many potential applications that are yet to be deeply explored. Thus, we will explore the effectiveness of transformers specifically for time series forecasting which finds applications across a wide spectrum of industries including finance, supply chain management, energy, etc.&lt;/p&gt; &lt;p&gt;Our goal is to realize which particular features of time series datasets could lead transformer-based models to outperform LSTM models.&lt;/p&gt; &lt;h2 id=&quot;2-related-work&quot;&gt;2. Related Work&lt;/h2&gt; &lt;p&gt;With the growth of ChatGPT in the recent years, extensive research has been done across various NLP tasks such as language modeling, machine translation, sentiment analysis, and summarization, each aiming to provide comprehensive insights into when each architecture excels and where their limitations lie. While research on time series data exists, it hasn’t garnered as much attention, so we aim to broaden this area of study.&lt;/p&gt; &lt;h3 id=&quot;21-effect-of-dataset-size&quot;&gt;2.1 Effect of Dataset Size&lt;/h3&gt; &lt;p&gt;The size of a dataset plays an important role in the performance of an LSTM model versus a transformer model. A study &lt;d-cite key=&quot;comparison&quot;&gt;&lt;/d-cite&gt; done in the NLP field compared a pre-trained BERT model with a bidirectional LSTM on different language dataset sizes. They experimentally showed that the LSTM accuracy was higher by 16.21% relative difference with 25% of the dataset versus 2.25% relative difference with 80% of the dataset. This makes sense since BERT is a robust transformer architecture that performs better with more data. As shown in the figure below from &lt;d-cite key=&quot;comparison&quot;&gt;&lt;/d-cite&gt;, while LSTM outperformed BERT, the accuracy difference gets smaller as the perctange of training data used for training increases.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-md-0 d-flex align-items-center justify-content-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;While we perform a similar methodology which is discussed further in section 4.1, the major difference is in the type of data we test. Instead of measuring classification accuracy for NLP tasks, this study measures the mean squared error (MSE) loss for regression time series data.&lt;/p&gt; &lt;h3 id=&quot;22-effect-of-noisy-datasets&quot;&gt;2.2 Effect of Noisy Datasets&lt;/h3&gt; &lt;p&gt;Theoretically, LSTMs are more robust to noisy data due to its ability to capture local dependencies. On the other hand, the self-attention mechanisms in transformers propagate errors and may struggle with sequences that have a high degree of noise. Electronic traders have been recently attempting to apply transformer models in financial time series prediction to beat LSTMs &lt;d-cite key=&quot;trading&quot;&gt;&lt;/d-cite&gt;. Largely focused on type of assets, the research showed that transformer models have limited advantage in absolute price sequence prediction. In other scenarios like price difference and price movement, LSTMs had better performance.&lt;/p&gt; &lt;p&gt;Financial data sets are known to be extremely noisy, and in addition, very hard to find due to their confidential nature. The application of &lt;d-cite key=&quot;trading&quot;&gt;&lt;/d-cite&gt; gave inspiration to study how the “amount” of noisiness would affect the LSTM and transformer models. Discussed further in section 4.2, this study added various amounts of noise to a clean dataset to see how this would affect each architecture.&lt;/p&gt; &lt;h3 id=&quot;23-effect-of-multi-step-prediction&quot;&gt;2.3 Effect of Multi-step Prediction&lt;/h3&gt; &lt;p&gt;The last feature that we would like to look at between LSTMs and transformer models is forecasting length. Forecasting length describes how far into the future we would like our model to predict based on the input sequence length. One paper &lt;d-cite key=&quot;multistep&quot;&gt;&lt;/d-cite&gt; done on short-term time series prediction finds that transformers were able to outperform LSTMs when it came to predicting over longer horizons. The transformer did better in all three cases when predicting one hour, twelve hours, and an entire day into the future. They accredit these results to the fact that attention better captured longer-term dependencies than recurrence did.&lt;/p&gt; &lt;p&gt;Similarly to this paper, we will focus only on short-term forecasting. Short-term forecasting is important in situations like stock market predictions, where stock values show high volatility in the span of hours and may or may not have learnable trends over long periods of time.&lt;/p&gt; &lt;p&gt;However, we would like to extend the results of this paper to learn to also look at multi-step prediction. This study trained models specifically to have a singular output, with each model being trained with outputs at the specified prediction horizon. Instead, we would look to train our models against outputs of different lengths. We thought it would be an interesting addition to output the entire sequence of data leading up to whatever period in the future, to give a better visualization of what actually happens as forecasting length increases.&lt;/p&gt; &lt;h2 id=&quot;3-methodology&quot;&gt;3. Methodology&lt;/h2&gt; &lt;p&gt;The dataset we will be using throughout this study is the Hourly Energy Consumption dataset that documents hourly energy consumption data in megawatts (MW) from the Eastern Interconnection grid system &lt;d-cite key=&quot;dataset&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;h3 id=&quot;31-experiments&quot;&gt;3.1 Experiments&lt;/h3&gt; &lt;p&gt;We can utilize this dataset to predict energy consumption over the following features of a dataset.&lt;/p&gt; &lt;ul&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Size of a dataset&lt;/strong&gt;: As discussed in Section 2.1 &lt;d-cite key=&quot;comparison&quot;&gt;&lt;/d-cite&gt;, the size of a dataset played an impact in measuring classification accuracy for NLP tasks. Since the energy dataset is numerical, it’s important to test the same concept. We leveraged nearly 150,000 data points, progressively extracting subsets ranging from 10% to 90% of the dataset. For each subset, we trained the architectures, allowing us to explore their performance across varying data volumes.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Amount of noise in the dataset&lt;/strong&gt;: As discussed in Section 2.2 &lt;d-cite key=&quot;trading&quot;&gt;&lt;/d-cite&gt;, research was done to test LSTMs vs transformers on noisy stock data for various assets. We deemed the energy dataset to be relatively clean since it follows a predictable trend depending on the seasons of the year and time of the day. For example, there are higher energy levels during the winter and daytime hours. To test noise, we added incrementing levels of jittering / Gaussian noise &lt;d-cite key=&quot;augmentations&quot;&gt;&lt;/d-cite&gt; to observe the effect of noisy data on LSTMs and transformers. Example augmentations with different variances are plotted below in blue against a portion of the original dataset in red.&lt;/p&gt; &lt;/li&gt; &lt;/ul&gt; &lt;div class=&quot;d-flex justify-content-center&quot;&gt; &lt;div style=&quot;text-align:center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001.png&quot; class=&quot;img-fluid rounded center z-depth-1 w-75&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001.png&quot; class=&quot;img-fluid rounded z-depth-1 w-75&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003.png&quot; class=&quot;img-fluid rounded z-depth-1 w-75&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008.png&quot; class=&quot;img-fluid rounded z-depth-1 w-75&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Output size&lt;/strong&gt;: As discussed in Section 2.3 &lt;d-cite key=&quot;multistep&quot;&gt;&lt;/d-cite&gt;, there have been few studies measuring the effect of varying the forecasting length, and in the ones that do they still only output one class &lt;em&gt;at&lt;/em&gt; the specified time into the future. In our novel experimentation, we aimed to generate an entire sequence of outputs &lt;em&gt;up until&lt;/em&gt; the specified time into the future. We created models that would predict forecasting lengths of 10%, …, 100% of our input sequence length of 10. To do so, we set the output size of our models to be equal to these forecasting lengths. This involved removing any final dense or convolutional layers.&lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;32-selected-architectures--setup&quot;&gt;3.2 Selected Architectures &amp;amp; Setup&lt;/h3&gt; &lt;p&gt;Selecting the right architecture for LSTM (Long Short-Term Memory) networks hinged on several key considerations. The LSTM architecture is extended of the RNN to preserve information over many timesteps. Capturing long-range dependencies requires propagating information through a long chain of dependencies so old observations are forgotten, otherwise known as the &lt;strong&gt;vanishing/exploding gradient problem&lt;/strong&gt;. LSTMs attempt to solve this problem by having separate memory to learn when to forget past or current dependencies. Visually, LSTMs look like the following &lt;d-cite key=&quot;rnn_lstm&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;div align=&quot;center&quot; style=&quot;text-align:center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm.png&quot; class=&quot;img-fluid rounded z-depth-1 w-75&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;p&gt;Transformers, initially designed for natural language processing, should work well for time series data. They operate by utilizing self-attention mechanisms, allowing them to capture long-range dependencies effectively. A transformer breaks down the input sequence into smaller, fixed-size segments known as tokens, representing various time steps or features. Through multiple layers of self-attention and feedforward operations, the transformer architecture should excel at capturing both short-term and long-term dependencies. A figure of transformer time series is shown below from &lt;d-cite key=&quot;transformer_arch&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;div align=&quot;center&quot; style=&quot;text-align:center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch.png&quot; class=&quot;img-fluid rounded z-depth-1 w-75&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;p&gt;There were certain parameters that we kept fixed throughout all variations of our models. The first was training on batches of data with sequence length 10. Second, we trained all of our LSTM models for 500 epochs and all of our transformer models for 10 epochs. These numbers were chosen with some fine-tuning to yield meaningful results while also allowing the training for so many individual models to be done in a reasonable amount of time.&lt;/p&gt; &lt;p&gt;Additionally, the data was normalized since the range of energy values was from 10000 Megawatts (MW) to 60000 MW. Normalizing the data improves convergence for gradient descent optimization and mitigates issues related to model regularization.&lt;/p&gt; &lt;h2 id=&quot;4-experimental-results-and-discussion&quot;&gt;4. Experimental Results and Discussion&lt;/h2&gt; &lt;h3 id=&quot;41-size-of-a-dataset&quot;&gt;4.1 Size of a Dataset&lt;/h3&gt; &lt;p&gt;Given the energy consumption dataset described in Section 3, we trained and evaluated an LSTM model and transformer model on progressively increasing subsets ranging from 10% to 90% of the dataset. The figure below shows the normalized mean squared error (MSE) loss for each subset of the dataset.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;d-flex flex-column justify-content-center&quot; style=&quot;text-align:center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res.png&quot; class=&quot;rounded z-depth-1 w-50&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The experimental results show that transformers have an improving trend as the size of the dataset increases while the LSTM has an unclear trend. Regardless of the size of the training dataset, the LSTM doesn’t have a consistent result for the testing set.&lt;/p&gt; &lt;p&gt;In an LSTM, there exist additional gates for a sequence of inputs $x^{(t)}$ where in addition to the sequence of hidden states $h^{(t)}$, we also have cell states $c^{(t)}$ for the aforementioned separate memory. While the LSTM architecture does provide an easier way to learn long-distance dependencies, it isn’t guaranteed to eradicate the vanishing/gradient problem discussed in Section 3.2. While the same is true for transformers, the transformer architecture addresses the vanishing/exploding gradient problem in a different way compared to LSTMs. Transformers use techniques like layer normalization, residual connections, and scaled dot-product attention to mitigate these problems.&lt;/p&gt; &lt;p&gt;For time series dataset, the transformer architecture offers the benefit of the self-attention unit. In NLP, it’s typically used to compute similarity scores between words in a sentence. These attention mechanisms help capture relationships between different elements in a sequence, allowing them to learn dependencies regardless of their distance in the sequence. For time series data, transformers might offer advantages over LSTMs in certain scenarios, especially when dealing with longer sequences or when capturing complex relationships within the data such as seasonal changes in energy use.&lt;/p&gt; &lt;p&gt;From a qualitative perspective, if we pull a subset of the test data to observe the predicted values from an LSTM vs a transformer for 40% of the training set, we have the following.&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/p&gt; &lt;p&gt;While transformers did perform better than LSTMs, it’s not like the LSTM did a horrible job. We notice that at the peaks, the LSTM overshot more than the transformer and at the troughs, the LSTM undershot. However, overall, both architectures still had good results. In the context of the size of time series data, transformers do seem more promising given the loss figure above. It seems that LSTMs are losing that dependency on old observations while transformers are gaining ground as the size of the dataset increases. While &lt;d-cite key=&quot;comparison&quot;&gt;&lt;/d-cite&gt; showed that bidirectional LSTM models achieved significantly higher results than a BERT model for NLP datasets,&lt;/p&gt; &lt;blockquote&gt; &lt;p&gt;The performance of a model is dependent on the task and the data, and therefore before making a model choice, these factors should be taken into consideration instead of directly choosing the most popular model. - Ezen-Can 2020&lt;/p&gt; &lt;/blockquote&gt; &lt;p&gt;For this experiment, the outlook of large datasets in time series applications for the transformer architecture looks promising.&lt;/p&gt; &lt;h3 id=&quot;42-amount-of-noise-in-a-dataset&quot;&gt;4.2 Amount of Noise in a Dataset&lt;/h3&gt; &lt;p&gt;To test the performance of our models on simulated noisy data, we first trained our models on batches of the original clean dataset and then ran our evaluations on different levels of noisy data. Random noise was added according to Gaussian distributions with variances in &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;[0.0, 0.0001, 0.001, 0.002, 0.003, 0.005, 0.008, 0.01]&lt;/code&gt; to create these data augmentations. Below is a comparison of the MSE loss for both models as a function of the injected noise variance.&lt;/p&gt; &lt;div style=&quot;text-align:center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss.png&quot; class=&quot;img-fluid rounded z-depth-1 w-50&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;p&gt;Since loss is not very descriptive in itself, we also visualize the model output for some of these augmented datasets. For each graph below, red is the true value while blue is predicted value.&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;table border=&quot;0&quot;&gt; &lt;tr&gt; &lt;td&gt;&lt;b style=&quot;font-size:15px&quot;&gt;LSTM&lt;/b&gt;&lt;/td&gt; &lt;td&gt;&lt;b style=&quot;font-size:15px&quot;&gt;Transformer&lt;/b&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;/tr&gt; &lt;/table&gt; &lt;/p&gt; &lt;p&gt;Both models are shown to start off similarly, predicting very well with no noise. However, almost immediately we can see that the LSTM does not handle noise as well as the transformer. LSTM makes much noisier predictions with many more outliers. We suspect this occurs due to the implicit inductive bias of the memory feature in the LSTM module. Consider a time step $t$. The memory accrued up to point $t$ “weights” the data seen in recent past time steps $t-1$, $t-2$, $\ldots$, much more so than the data seen relatively long ago. While this is an intuitive design for memory, we can observe that this mechanism combines storing temporal information with token-specific information. In order to compete with a transformer, the LSTM model needs to be trained on significantly more data.&lt;/p&gt; &lt;p&gt;The transformer on the other hand has the negative effects of its own inductive bias mitigated by its attention mechanism. Because the transformer has both a mechanism to account for temporal information and a mechanism to select the next associated token (attention module), and because they are separated, it is able to produce more “accurate” results.&lt;/p&gt; &lt;h3 id=&quot;43-prediction-size&quot;&gt;4.3 Prediction Size&lt;/h3&gt; &lt;p&gt;Finally, we created and trained separate models with varying numbers of output classes to represent the prediction size. We trained on output sizes as percentages of our input size, in increments of 10% from 0% to 100%. Because our input sequence was a constant 10 and our data is given in hourly intervals, these percentages translated to have prediction horizons of 1hr, 2hrs, …, 10hrs. Evaluating our models resulted in the following MSE loss trends.&lt;/p&gt; &lt;div style=&quot;text-align:center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss.png&quot; class=&quot;img-fluid rounded z-depth-1 w-50&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;p&gt;Again, to get a better sense of why we see these results, we visualize the outputs. Since our outputs are sequences of data, to have a more clean visualization we plot only the last prediction in the sequence. For each graph below, red is the true value while blue is predicted value.&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;table border=&quot;0&quot;&gt; &lt;tr&gt; &lt;td&gt;&lt;b style=&quot;font-size:15px&quot;&gt;LSTM&lt;/b&gt;&lt;/td&gt; &lt;td&gt;&lt;b style=&quot;font-size:15px&quot;&gt;Transformer&lt;/b&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;td&gt;&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/td&gt; &lt;/tr&gt; &lt;/table&gt; &lt;/p&gt; &lt;p&gt;As we can see, the MSE loss of our transformer model increased at a slower rate than our LSTM model. After comparing the outputs of our models at these time steps, it becomes evident that this trend is due to the LSTM losing characteristic over time. Our transformer simply performs worse when it has to predict more as expected because the data is not perfectly periodic. However, we infer that the LSTM outputs get flatter over time because the more we accumulate memory through the long-term mechanism, the less weight each previous time step holds, diluting the total amount of information carried through the sequence. Transformers avoid this problem by using their attention mechanisms instead to keep only the important information throughout.&lt;/p&gt; &lt;h2 id=&quot;5-conclusion&quot;&gt;5. Conclusion&lt;/h2&gt; &lt;p&gt;Through the experiments tested in Section 4 (on dataset size, dataset noise, and prediction size), transformers seem to be a promising alternative to LSTMs for time series modeling due to their unique architecture, particularly suited for capturing long-range dependencies. Unlike LSTMs, transformers utilize self-attention mechanisms that allow them to consider relationships between all elements in a sequence simultaneously. This capability is especially advantageous in time series data, where capturing distant dependencies is crucial for accurate forecasting. Additionally, transformers mitigate vanishing gradient problems better than LSTMs, enabling more robust training on longer sequences.&lt;/p&gt; &lt;p&gt;While transformers excel in parallel computation theoretically, one significant issue is the extensive memory requirements during training, especially with larger models or datasets. Transformers demand significant memory for storing attention matrices, limiting the batch size that can fit into GPU memory. So, for those who are finding an optimal architecture to train a time series dataset, one has to consider his or her own design priorities of accuracy and performance.&lt;/p&gt; &lt;p&gt;All in all, the choice between LSTMs and transformers for time series datasets depends on the implementer’s design priorities and the task at hand. With some research showing LSTMs outperforming transformers and others such as our study showing the opposite, there is a clear need to dive deeper into the subject especially given the extensive number of applications for time series modeling.&lt;/p&gt; </content> </entry> <entry> <title>Studying the benefits and limitations of sparse auto-encoders for compositional reasoning tasks</title> <link href="https://deep-learning-mit.github.io/blog/2023/sparse-autoencoders-for-othello/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/sparse-autoencoders-for-othello</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;Neural networks accomplish complex tasks and are poised to be increasingly used in critical and ubiquitous sectors of civilization. But given a model seemingly solving a problem, how much can we say about precisely how it does that and what its solution looks like?&lt;/p&gt; &lt;p&gt;It might seem like this type of question would be hopeless, but interpretability has been progressing and we can make some headway on questions like these. One of the issues for interpretability is the fact that networks pack a lot of information into individual neurons in complex hard to separate ways, which means it’s hard to look at top activating examples for a neuron and see what it’s doing. This is &lt;a href=&quot;https://arxiv.org/abs/2209.10652&quot;&gt;superposition&lt;/a&gt;. &lt;a href=&quot;https://transformer-circuits.pub/2023/monosemantic-features/index.html&quot;&gt;Anthropic’s recent paper&lt;/a&gt; leveraged sparse autoencoders (&lt;em&gt;SAEs&lt;/em&gt;) to learn an interpretable basis for LLM features. Sparse autoencoders are weak dictionary learning algorithms that leverage autoencoders trained to encode and then decode the activations of a certain module in the model. Contrary to classical auto-encoders, the hidden state does not necessarily have to be smaller (enforcing compression), but the mapping has to be sparse, which we enforce by penalizing the L1 norm of the activations, where L1 is just the sums of the absolute values. This makes the feature basis much more disentangled, clean and sparse.&lt;/p&gt; &lt;p&gt;That paper is far reaching in its results and suggests a lot of potential for SAE interpretability methods. However our work wants to investigate how effective SAEs are in contexts where there is a lot of compositional reasoning. Indeed, the a lot of the features they find hinge on the fact that their simple 1L language model is picking up on a lot of cleanly separatable cues and heuristics that are feeding into its prediction – for example a feature that’s high for arabic text, or in HTML contexts, etc…. But this seems like it’d be harder if we have a model composing reasoning and computation across steps in by nature entangled ways.&lt;/p&gt; &lt;p&gt;So we decided to see how this method would perform on a task where there are plausibly much less heuristic features that are are separable, and intuitively requires more compositionality and reasoning than the capabilities of a small 1 layer language model. We turned to the game of Othello, for which some ML interpretability has already been done, making our analysis easier, and applied sparse autoencoders to see how they would perform and what we could learn from them. We picked Othello because it’s a complex task where it might seem intuitive that the model has to gradually compose information across layers and reason about what types of moves and positions might be valid. Indeed, in the original Othello-GPT paper, they find a linear world representation when you feed the model sequence data, suggesting complex reasoning patterns. This is an initial analysis and there are many things we’d be excited to see that would make this more fleshed out.&lt;/p&gt; &lt;h1 id=&quot;background-and-related-work&quot;&gt;Background and related work&lt;/h1&gt; &lt;p&gt;&lt;strong&gt;Sparse Autoencoders&lt;/strong&gt;: There is some previous work done on &lt;a href=&quot;https://arxiv.org/abs/2103.15949&quot;&gt;dictionary learning&lt;/a&gt; to interpret neural networks. The idea of sparse dictionary learning is to find an over-complete basis (ie there are more basis vectors than dimensions) in your embedding space, such that on inputs in your data most of the dictionary basises are orthogonal to your data, and only a few activate (sparsity). This has been used very recently to visualize transformer features for language models, as a way of taking internal feature representations out of &lt;a href=&quot;https://transformer-circuits.pub/2022/toy_model/index.html&quot;&gt;superposition&lt;/a&gt;.Superposition is a barrier to interpertability where neurons and features are encoding a lot of things at once, making it hard to study on individual behaviors and parts of the model. Most recently, Anthropic did extensive interpretability work on a 1 layer transformer by using sparse autoencoders in &lt;a href=&quot;https://transformer-circuits.pub/2023/monosemantic-features#related-work&quot;&gt;decomposing language models&lt;/a&gt;. They learned a sparse embedding space and then conducted a lot of analysis and interpretability on the features the original network was learning by studying it in the sparse embedding space.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Transformers for reasoning tasks and Othello:&lt;/strong&gt; Transformers and specificially &lt;a href=&quot;https://arxiv.org/pdf/2106.01345.pdf&quot;&gt;decision transformers&lt;/a&gt; have formerly been used for more complicated tasks than natural language sequence prediciton like reasoning tasks and games and proven to be successful. Although cutting edge LLMs exhibit strong reasoning capabilities, toy models and small languag models that are more accessible and that people are trying to use for interpretability are quite small, limiting their reasoning ability. Othello is a simple to understand but complex to win two player board game, where you gradually place pieces and try to “capture opponent” pieces by sandwiching rows, columns, and diagonals of the board with two of your pieces. The winner is the player with the most pieces at the end. &lt;a href=&quot;https://arxiv.org/pdf/2210.13382.pdf&quot;&gt;Recent work&lt;/a&gt; lead to the creation of a dataset of Othello games and the publishing of a model called Othello-GPT that learns to play Othello successfully. We use both of these in our work. The way they train the model is by giving it sequences of Othello moves from games, and asking it to predict the next move, in an unsupervised way, obtaining a model that can predict legal moves and understands the mechanism of the game. They show the existence of representations forming in the model, by using a probe to recover the full board state from the model activations, even though it’s just given a sequence. This suggests the model learns more than just heuristics and is able to do internal reconstruction of the game’s features.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Interpreting features and circuits&lt;/strong&gt; In the original Othello-GPT, their world model probe was nonlinear. Neel Nanda &lt;a href=&quot;https://www.neelnanda.io/mechanistic-interpretability/othello&quot;&gt;extended their work&lt;/a&gt; and found a linear world representation of the othello model, by seeing that instead of representing the state as “black’s turn” vs “white’s turn”, the model represented it in an alternating manner, distinguishing between “my turn” vs “their turn”. There is also some other work on &lt;a href=&quot;https://www.lesswrong.com/posts/bBuBDJBYHt39Q5zZy/decision-transformer-interpretability&quot;&gt;interpreting&lt;/a&gt; transformer models outside of the context of language modeling, for example with decision transformers, but this is very much a growing subfield. We were also able to get a better intuition for the features in the othello model by using &lt;a href=&quot;https://kran.ai/othelloscope/index.html&quot;&gt;neuron visualization data published by the authors&lt;/a&gt;.&lt;/p&gt; &lt;h1 id=&quot;method-and-setup&quot;&gt;Method and setup&lt;/h1&gt; &lt;p&gt;In order to investigate a reasoning task, we used a synthetic GPT model trained on a dataset of valid Othello game sequences of length 60 &lt;a href=&quot;https://github.com/likenneth/othello_world&quot;&gt;(by Li et al)&lt;/a&gt;. We manipulate and access the model’s activations and internals using the &lt;a href=&quot;https://neelnanda-io.github.io/TransformerLens/&quot;&gt;TransformerLens&lt;/a&gt; library.&lt;/p&gt; &lt;p&gt;We used the MSE loss as a baseline to compare the performance of sparse autoencoders on a reasoning tasks versus a natural language sequence prediction task. We replicated the training of a recent &lt;a href=&quot;https://www.alignmentforum.org/posts/AaoWLcmpY3LKvtdyq/some-open-source-dictionaries-and-dictionary-learning&quot;&gt;set of dictionaries&lt;/a&gt; of similar size on the GPT language model (EleutherAI’s 6-layer pythia-70m-deduped) and compare our results.&lt;/p&gt; &lt;p&gt;Our set up for the replication, where we pick the same hyperparameters as the authors, consists of an 8-layer &lt;a href=&quot;https://openai.com/research/language-unsupervised&quot;&gt;GPT&lt;/a&gt; model with an 8-head attention mechanism and a 512-dimensional hidden space. We set up a buffer that gathers the model’s activations on a batch of game data and uses it to train the autoencoder. The buffer automatically runs the model on another batch of data once it is half empty. The activations then get fed into the autoencoder’s training loop, where it optimizes to minimize the reconstruction loss of form $L = L_1 + L_2$. In this equation, $L_1$ is the term originating from the $L_1$ norm of the weights, with a sparsity coefficient of $1e-3$ for the encoder of size $16 \times 512 = 8192$ a sparsity coefficient of $3e-3$ for the size $64 \times 512 = 32768$ and $L_2$ is the term originating from the square error of the reconstruction with regards to the actual model investigations.&lt;/p&gt; &lt;p&gt;We then train various sizes of sparse autoencoders on the 4th layer of the othello model and investigate the impact of the autoencoders size on the reconstructed hidden state.&lt;/p&gt; &lt;p&gt;We measure the reconstruction power of the encoder with a reconstruction score defined as $\frac {Loss_{ZeroAblation} - Loss_{Reconstruction}} {Loss_{ZeroAblation} - Loss_{Normal}}$ where $Loss_{ZeroAblation}$ is Loss after ablating the reconstructed layer and use this as a measure for how well the encoder is able to reconstruct the mlp layer. The intuition behind this is that we compare a “base zero”, which is the ablation loss, with both the reconstruction of the layer and the original construction of the layer. This will provide us with a metric of how close our reconstruction is to ground truth.&lt;/p&gt; &lt;h1 id=&quot;results&quot;&gt;Results&lt;/h1&gt; &lt;h2 id=&quot;comparison-to-pythia-70m-dictionaries&quot;&gt;Comparison to Pythia-70m dictionaries&lt;/h2&gt; &lt;p&gt;The following tables are the results from training a sparse autoencoder of size $16 \times 512 = 8192$ and $L_1$ penalty coefficient of $1e-3$.&lt;/p&gt; &lt;p&gt;Encoder’s Measured MSE loss on OthelloGPT after 100000 epochs.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Layer&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;MSE&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.370&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;1&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.537&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;2&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.686&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;3&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.833&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.744&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Encoder’s reported MSE loss on Pythia-70m after 100000 epochs.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Layer&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;MSE&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.056&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;1&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.089&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;2&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.108&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;3&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.135&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.148&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;The following tables are the results from training a sparse autoencoder of size $64 \times 512 = 32768$ and $L_1$ penalty coefficient of $3e-3$&lt;/p&gt; &lt;p&gt;Encoder’s Measured MSE loss on OthelloGPT after 100000 epochs.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Layer&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;MSE&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.749&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;1&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.979&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;2&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;1.363&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;3&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;1.673&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;2.601&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Encoder’s reported MSE loss on Pythia-70m after 100000 epochs.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Layer&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;MSE&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.09&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;1&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.13&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;2&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.152&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;3&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.211&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.222&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;From the results above we can see that the autoencoder reconstructs with higher MSE loss despite having the same sparsity constraint and multiplier between the activation size and the sparse embedding. The difference becomes more drastic as we increas the sparsity of the encoder. Our analysis of these results is that this aligns with our hypothesis in natural language sequence prediction for small models like these, it might be that it is easier for the encoder to learn sparser and more easily separable features that allow it to recover the activations. However, on a task like playing the game of Othello where the features are more abstract, and we think there might be a higher requirement of complex compositionality across layers, increasing sparsity and size, makes the model perform worse.&lt;/p&gt; &lt;p&gt;Another significant emerging pattern in the MSE loss of the encoders is the fact that loss increases in the furthur layers, which backs up our initial claim; that as features become more abstract, the autoencoder has a harder time reconstructing them.&lt;/p&gt; &lt;p&gt;It is worth noting that the increase of MSE across the two sets of tables is impacted by both the increase in size and sparsity. We had made the two tables, to match the already existing &lt;a href=&quot;https://www.alignmentforum.org/posts/AaoWLcmpY3LKvtdyq/some-open-source-dictionaries-and-dictionary-learning&quot;&gt;benchmarks&lt;/a&gt;. However, in the following, we include the results of a sparse autoencoder with penalty coefficient of $3e-3$ and size $16 \times 512 = 8192$ to validate our claims about sparsity, without the effect of size.&lt;/p&gt; &lt;p&gt;Encoder’s Measured MSE loss on OthelloGPT after 100000 epochs.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Layer&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;MSE&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.954&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;1&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;1.389&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;2&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;1.715&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;3&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;2.038&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;3.057&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;We see that without the effect of size and only sparsity, the model performs significantly worse and if we compare the results to the one of size $64 \times 512 = 32768$ the model gets better because it is has more dimensions.&lt;/p&gt; &lt;h2 id=&quot;investigating-the-effect-of-size&quot;&gt;Investigating the effect of size&lt;/h2&gt; &lt;p&gt;In furthur investigation, we experimented with training various sizes of autoencoders on layer 4 of the model. The size of the autoencoder is determined by the equation $size = x \times 512$ where $x$ is the size factor. We vary the size factor from $0.25$ to $32$. The size factor describes how much our autoencoder embedding space is bigger than the original activation space, therefore deciding how much “extra space” the autoencoder has to obey the sparsity constraint and preserve good reconstruction. We included smaller sizes so that we could investigate the effect of size and whether the encoder would be able to learn more compact features and still perform well. Our results are found in the following:&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;https://hackmd.io/_uploads/S1GB0NBUp.png&quot; alt=&quot;recons_loss vs epochs&quot; /&gt;&lt;/p&gt; &lt;p&gt;As seen in the figure above, we see reconstruction loss decrease significantly as the number of dimensions in the autoencoder’s hidden space becomes larger than the original space. A sparse autoencoder with less dimensions than the original latent space fails to reconstruct well and this can be even better observed in the following figure.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;https://hackmd.io/_uploads/BJAJerHLa.png&quot; alt=&quot;Screenshot 2023-12-11 at 8.47.16 PM&quot; /&gt;&lt;/p&gt; &lt;p&gt;This picture suggests that maybe if we scale up sparse auto encoder embedding size we can recover performance at low cost. However, Anthropic’s interpretability work, linked earlier, suggests that as you increase the size of your autoencoder embeddding, you risk getting a lot of niche highly specific features with complex interactions, therefore making interpretability harder. For example, at a given size they observe a base64 feature that fires for base64 text, and then at a larger size they see it splits into several base64 features that activate for slightly different token beginnings.&lt;/p&gt; &lt;p&gt;These results highlight the challenge of sparse autoencoders for compositional tasks, and bring us to the question of interpreting sparse embedding spaces for compositonal reasoning.&lt;/p&gt; &lt;h2 id=&quot;interpreting-the-sparse-autoencoder&quot;&gt;Interpreting the sparse autoencoder&lt;/h2&gt; &lt;p&gt;Here we had to take a detective’s approach and form different hypotheses of what the model was doing and how to test them. This analysis is exploratory, and given more time we’d be excited about extending this/doing even more experiments to get a complete picture. However, we’re excited about what we found and are confident that this approach is promising.&lt;/p&gt; &lt;p&gt;We started by caching the autoencoder embeddings on a subset of data with valid Othello sequences and moves. This gave us a dataset to work with.&lt;/p&gt; &lt;p&gt;We then did some macro level analysis by looking at and inspecting random features (dimensions of the embeddings) and seeing what kinds of boards activated most on them (by activated most we mean that the feature had a high value on that input activation for that board). This somewhat followed the pattern laid out by &lt;a href=&quot;https://transformer-circuits.pub/2023/monosemantic-features/index.html#global-analysis-interp&quot;&gt;Anthropic’s analysis&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;However, in Anthropic’s 1L language model paper they have the following figure:&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;https://hackmd.io/_uploads/SyIELvLIT.png&quot; alt=&quot;image&quot; /&gt;&lt;/p&gt; &lt;p&gt;They are indicating that in their setup most of the features seem to be interpretable and clear to a human, according to human scores. In our experience looking at our sparse autoencoder and top activating examples for different features, it seems that a lot of the features are still not interpretable and we will need more work to understand the full picture &lt;sup id=&quot;fnref:1&quot; role=&quot;doc-noteref&quot;&gt;&lt;a href=&quot;#fn:1&quot; class=&quot;footnote&quot; rel=&quot;footnote&quot;&gt;1&lt;/a&gt;&lt;/sup&gt;. This may be because a lot of semantic cues for simple language modeling are more aligned with our human understanding, in the sense that the concepts the model operates on are pretty intuitive, whereas for Othello it has to build a compositional model of the game state across layers, in ways that are potentially less likely to correlate with how we might perceive the problem. We don’t claim that there are not such complex dynamics in even simple language models (there definitely are!), but we think there are more simple patterns to pick up on. We believe that the method laid out in that work needs to be extended to be applied to compositional networks for reasoning adjacent tasks, because it does not seem sufficient for this Othello model. This is an empirical claim based on studying and looking at a lot of data on when sparse features were activating throughout the Othello dataset.&lt;/p&gt; &lt;p&gt;To do some global analysis, we computed a frequency histogram of the values of each feature on the dataset, and then we took an average of this frequency histogram to get a full picture of how often and how strongly features are activating across the dataset. This is on a log scale.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;https://hackmd.io/_uploads/B1V7_HIL6.png&quot; alt=&quot;image&quot; /&gt;&lt;/p&gt; &lt;p&gt;As we can see, on average for each feature there are a lot of inputs where the feature is not reading much at all, which makes sense given the sparsity constraint. Then as the activation gets higher and higher the frequency of each bucket decreases.&lt;/p&gt; &lt;p&gt;If we increased the sparsity regularization even more we might see a sparser activation graph with more high activing frequency for large activations, but in a lot of classic encoders the distribution of embeddings tends to have a lot of smaller noise around zero, where here a lot of our values are actually very often split into either zero, or something significant.&lt;/p&gt; &lt;p&gt;We then proceed to making some hypotheses about how the model might be localizing computation about the game board throughout its features, and make some tests to see what might be going on.&lt;/p&gt; &lt;h3 id=&quot;h1-location-features&quot;&gt;H1: Location features&lt;/h3&gt; &lt;p&gt;Hypothesis: what if there are features that represent the location of the last move, and only activate when that last move is within some cluster of the board? This would align with earlier world model wor.&lt;/p&gt; &lt;p&gt;This would be an example of a strong monosemantic and interpretable feature.&lt;/p&gt; &lt;p&gt;However, we later realized that this is probably more likely as a more primitive pattern that would be noticed earlier in the model layers, before it then refines and comes up with information to decide what to predict.&lt;/p&gt; &lt;p&gt;Never the less, we looked at the contexts in which a feature is reading strongly, and thus found a list of high-activating moves for each feature (&lt;em&gt;for what current moves is feature j activating&lt;/em&gt;). We then clustered these into 3x3 location clusters on the board, marking positions as the same if they were close in a small square. That was based on the idea that it does not have to be activating for the exact same current move but moves in general that are adjacent. These features would then represent: &lt;em&gt;was the current move around this position of the board?&lt;/em&gt;.&lt;/p&gt; &lt;p&gt;This plot was computed by looking at those activating contexts for each feature and seeing how many non-adjacent clusters of positions are within those moves. We then compute a histogram on the cluster count, trying to see how many features activate locally in a small number of clusters.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;https://hackmd.io/_uploads/BymEFrU8T.png&quot; alt=&quot;image&quot; /&gt;&lt;/p&gt; &lt;p&gt;We can see that our hypothesis was wrong here and that at this point in the network our features are activating for current moves across the board, not really in a localized, and don’t sparsely activate just when a given location is played. This was useful data to see and showed us that at this point in the network it was probably operating on high level features and things that could directly relate to its final prediction. The small amount of locally activating features all tend to just have small activations in general.&lt;/p&gt; &lt;h3 id=&quot;h2-predictive-features&quot;&gt;H2: Predictive features&lt;/h3&gt; &lt;p&gt;This brought us to the next experiment, where we wanted to test for higher level patterns related to its prediction.&lt;/p&gt; &lt;p&gt;We were curious studying the link between the times when a feature of our autoencoder is writing strongly on an input and the actual correct prediction for that input, ie the actual correct next token it’s trying to predict. Is there a localization effect there where a feature activates highly only when the correct prediction is within some cluster?&lt;/p&gt; &lt;p&gt;We investigated and collected, for each feature, a list of the real (heldout) next action in the sequence whenever it is activating non negligibly. This gave us a sequence of next moves for each context where a feature wrote strongly to the activation output. Then we clustered these actions into regions of 3x3 squares on the board, trying to narrow in on the idea of local activation of a feature. We operationalized the notion of reading strongly on a game board by setting a threshold activation of 0.001 by looking at the earlier plot of activation distribution and seeing what made sense. This is actually pretty low, but it still stays significant because the sparsity constraint often just nulls out values when they are not relevant, so even low small values have signal.&lt;/p&gt; &lt;p&gt;This allows us to map each feature to a number of activating clusters.&lt;/p&gt; &lt;p&gt;We then plot a histogram for the number of clusters of next action locations for each feature in our dataset. The idea is that if a feature is activating on a small number of clusters for the next action, then it might be picking up in patterns on the board that are linked to the final model’s prediction, in a consistent way based on the real result.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;https://hackmd.io/_uploads/Sy9PKBUIT.png&quot; alt=&quot;image&quot; /&gt;&lt;/p&gt; &lt;p&gt;It’s interesting to compare this to the previous plot, as here there are actually a decent amount of features that seem localized, reacting and forming predictions based on what part of the board they think the next step or move might be in, and not activating across the board for the next token. These are the ~100s of features that are only reacting in some small amount of clusters, like two or 1.&lt;/p&gt; &lt;p&gt;It seems that in layer 4 in certain cases the model is already developing an idea of what the next move will be, and is localizing sparse features for different prediction areas.&lt;/p&gt; &lt;p&gt;This explanation is not explaining the full behavior and there is probably a lot going on to extend the prediction into higher layers. We can see this in the frequencies of all the features that are activating in a lot of different next-token contexts, probably picking up on general things on the board and harder to interpret compositional steps that will allow it to make predictions later.&lt;/p&gt; &lt;p&gt;This reminded us of the [logit lens] in language modeling where you can unembed the early activations and get coherent (and gradually improving as you increase the layer number) predictions for the next token. This seems to be showing that some of the features are already localizing predictions about the correct prediction, in a consistent manner.&lt;/p&gt; &lt;p&gt;We investigated those features corresponding to the left side of the plot ($1 \leq x \leq 3$, $x$ number of clusters) that activate only for some cluster of valid next sequence areas and found data that validated this impression! We hypothesize it’s because some action predictions are pretty clear to predict early on based on good strategy and how the dataset of sequences was generated. We found features that consistently were activating for when a given board position was the correct next board position.&lt;/p&gt; &lt;p&gt;We focused particularly on feature #15 of our dim 4096 autoencoder, noticing through our analysis that it had interesting activation patterns.&lt;/p&gt; &lt;p&gt;We plotted its activation value histogram:&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;https://hackmd.io/_uploads/Byk19HULT.png&quot; alt=&quot;image&quot; /&gt;&lt;/p&gt; &lt;p&gt;We can see a long sparse tail of inputs where the feature doesn’t activate at all, and then a similar frequency for values beyond some threshold of activation.&lt;/p&gt; &lt;p&gt;On manual inspection, other than the big cluster of samples where it’s reading zero or doesn’t activate, the feature is basically always just activating when the next move is in a specific cluster at the bottom of the board. To be more precise, 90% of the boards where it activates with a value &amp;gt; 0.001 are in that cluster, 93% for 0.01,&lt;/p&gt; &lt;p&gt;Here are some of those example boards, where the next move played is G4, and the model activates strongly.&lt;/p&gt; &lt;p&gt;One of many examples of board where feature #15 activates strongly and in fact the next correct move is G4. &lt;img src=&quot;https://hackmd.io/_uploads/BJZEDS8U6.png&quot; alt=&quot;image&quot; /&gt; &lt;sup id=&quot;fnref:2&quot; role=&quot;doc-noteref&quot;&gt;&lt;a href=&quot;#fn:2&quot; class=&quot;footnote&quot; rel=&quot;footnote&quot;&gt;2&lt;/a&gt;&lt;/sup&gt;&lt;/p&gt; &lt;p&gt;Example where the feature activates and the actual next move is F4, right above G4, in the same cluster: &lt;img src=&quot;https://hackmd.io/_uploads/ryy8Jj8U6.png&quot; alt=&quot;image&quot; /&gt;&lt;/p&gt; &lt;p&gt;This is really interesting! Why does this feature exist? We’ve been thinking about the structure of Othello and the way the data was generated, and we think the idea is that the network is pretty confident about this position and early on manages to recognize and see what’s going on with the rest of the board to put its hypothesis in this feature.&lt;/p&gt; &lt;p&gt;Although we haven’t explained a lot of the other features, it’s cool that this method has allowed us to understand and correlate this feature with a state of the game and the understanding the model has of the board!&lt;/p&gt; &lt;h1 id=&quot;discussion-and-conclusion&quot;&gt;Discussion and Conclusion&lt;/h1&gt; &lt;p&gt;We are excited about future work in this direction and think interpreting compositional computation circuits is key to understanding how tranformers and language models solve complex problems. In terms of our work with Othello GPT, we are excited about pushing sparse autoencoders further on this architecture and motivating more interpretability work. We are interested in work to train SAEs across layers and then see if we can track computation and model changes through sparse embeddings across layers, mirroring this [exploratory work]. This might be helpful to understand compositionality across layers. We also think interpreting features for SAEs with width smaller than the original width might be interesting to find projections of network activations that have very high level, compressed features, that might therefore be easier to interpret. We are also interested in methods that use SAE features to make causal statements aobut model behavior, for example by plugging the SAE into the model inference step, where at the end of our MLP we feed in the decoded encoded version of activations into the rest of the model. With this kind of setup you could then potentially ablate or modify different features to validate and study how your interpretability hypotheses about different parts of the model actually change its final predictions. Some of the limitations of our work is that we would have liked to run more experiments on different sparsity coefficients, and make more in depth comparisons to language models to see to what extent our arguments about compositional reasoning hold in a rigorous way. We would be excited to see how increasing sparsity even more affects our ability to interpret the model, potentially making things more tractable. We also recognize the difficulty of interpretability and have not been yet been able to interpret any of the more complex Othello SAE mechanisms.&lt;/p&gt; &lt;p&gt;To conclude, we’ve investigated the potential for sparse autoencoders for compositional reasoning tasks in the context of the Othello sequence prediction problem. Our hypothesis is that sparse autoencoders will be useful to understand such systems but their application will be more involved and complex than for earlier patterns found in language modeling tasks. We trained a sparse autoencoder at different layers of the network and see how its performance and capabilities differ compared to previous results on language. We observe our autoencoder trained with the same hyperparameters and scaling factor for size still struggles to reach the same reconstruction performance as those for language model activations. This reveals something about the structure of these data distributions, and supports our intuition that for simple small language models SAEs are particularly performant due to their ability to pick up on a lot of separable and sparse features, but for compositional solutions where the model is learning an algorithm across layers to solve a task, the sparsity constraint incurs more of a cost, which limits this method. This intuition stems from the idea that leveraging the full extent of neural activations for compositional tasks is key to build complex algorithms across layers, and maybe less so for prediction problems that are more tractable through the composition of independent heuristics. We also nonetheless do some interpretability on our trained autoencoder, and note that the features seem less directly interpretable than those for language model SAE features (as supported by our hypothesis), but that there is some signal to analyze and understand, giving us hope for future work to use SAEs to understand compositional reasoning and circuis in general. In particular, we look at the range and frequency of sparse activations, and form different hypotheses about the ways the model might be localizing computation in sparse embeddings. We find the existence of predictive neurons already at layer 4, that activate when the model is already confident about a specific next action to predict. Although much of the features remain obscure, our results indicate that although sparsity is a harder constraint to impose for compositional reasoning, it can still be a useful starting point to interpret model computation.&lt;/p&gt; &lt;div class=&quot;footnotes&quot; role=&quot;doc-endnotes&quot;&gt; &lt;ol&gt; &lt;li id=&quot;fn:1&quot; role=&quot;doc-endnote&quot;&gt; &lt;p&gt;To some extent increasing the sparse regularization penalty could help with this, but our exploratory analysis revealed that increasing the sparsity penalty made the model perform too badly on the data. We could always counter this by increasing the size of the encoder, but Anthropic’s paper and our understanding suggests that this leads core interpretable features to split and split until it’s hard to get a good picture of what’s going on. &lt;a href=&quot;#fnref:1&quot; class=&quot;reversefootnote&quot; role=&quot;doc-backlink&quot;&gt;&amp;#8617;&lt;/a&gt;&lt;/p&gt; &lt;/li&gt; &lt;li id=&quot;fn:2&quot; role=&quot;doc-endnote&quot;&gt; &lt;p&gt;these plots are both before the G4 cluster move is played. &lt;a href=&quot;#fnref:2&quot; class=&quot;reversefootnote&quot; role=&quot;doc-backlink&quot;&gt;&amp;#8617;&lt;/a&gt;&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;/div&gt; </content> </entry> <entry> <title>Solvent Encoding for solubility prediction using GNN</title> <link href="https://deep-learning-mit.github.io/blog/2023/solvent-encoding/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/solvent-encoding</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Solubility serves as an essential descriptor that models the interaction between molecules and solvents. This property is important for many biological structures and processes, such as DNA-ion interactions and protein foldings. Quantum mechanics-based approaches, such as Density Functional Theory (DFT), have been deployed in multiple attempts to model solubility across diverse systems and temperatures. However, the complex nature of the problem makes it computationally demanding to accurately predict the properties with fast speed. The development of QSPR(Quantitative structure-property) and deep graph neural network enables us to explore the chemical space with significantly lower computational costs by modeling molecules as graphs and treating properties prediction problems as regression problems. Yet, the challenge persists—individual molecules do not exist in isolation. Due to the strong interaction between molecules, the existence of other molecules(solvent, in particular) in the environment can strongly impact the property we want to predict. However, most of the existing GNN models can only take one molecule per input, limiting their potential to solve more general chemical modeling problems. As a result, it is important to incorporate solvent embedding into the models. The focus of the project is to augment existing GNN models with various solvent-encoding methods and evaluate the performances of different models on a publicly available solubility dataset. My goal is to find out the best encoding method and potentially compare the performances of different models on various solubility datasets.&lt;/p&gt; &lt;h2 id=&quot;implementation&quot;&gt;Implementation&lt;/h2&gt; &lt;p&gt;This project intricately explores the functionalities of Graph Neural Network (GNN)-based models, focusing specifically on chemprop and PharmHGT. These models have exhibited remarkable proficiency in predicting molecular properties through a diverse array of message-passing and readout functions. The transformation of solvent smiles strings into feature vectors is executed through two distinctive methods. The initial approach involves the conversion of solvents into various descriptor vectors, while the second method treats solvents as independent graphs, applying GNN models to capture their inherent structural nuances.&lt;/p&gt; &lt;p&gt;Following this encoding phase, various methods are employed to convert the solvent vector to solvate. Currently, my strategy involves vector concatenation, and subsequently transforming the combined vector into a novel encoding vector using Multi-Layer Perceptrons (MLP). The post-encoding phase involves channeling the vector through MLP, culminating in the generation of prediction values.&lt;/p&gt; &lt;p&gt;The evaluation of the models encompasses essential metrics such as Mean Absolute Error (MAE), Root Mean Squared Error (RMSE), and R-squared (R2) values. These metrics collectively offer a comprehensive assessment of the efficacy of different encoding methods and models. The experimental validation is conducted on the BigSolDB dataset curated by Lev Krasnov et al, comprising experimental solubility data under varying temperatures and with diverse solvents. This dataset provides a robust foundation for rigorously evaluating the predictive capabilities of the GNN-based models in real-world scenarios.&lt;/p&gt; &lt;h2 id=&quot;literature-model-and-descriptor-review&quot;&gt;Literature, model, and descriptor review&lt;/h2&gt; &lt;p&gt;Graph Neural Network(GNN) based machine learning models are one of the most fastest growing and powerful modeling tools for molecular properties prediction that can be utilized in various applications, including material and drug design. One of the most powerful models that has been published is chemprop, a model developed by Kevin Yang et al. in 2019. In contrast to traditional GNN-based models which adopt MPNN, chemprop takes advantage of D-MPNN which delivers messages using direct edges. This approach can avoid unnecessary loops in the message-passing trajectory. The model also adopts an innovative message-passing strategy called belief propagation. The power of the model has been demonstrated on various tasks including absorption wavelength prediction(Kevin Greenman et al., 2022) and IR spectroscopy(Esther Heid et al., 2023).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/chemprop-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/chemprop-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/chemprop-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-solvent-encoding/chemprop.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In tandem with chemprop, I integrate the Pharmacophoric-constrained Heterogeneous Graph Transformer (PharmHGT) into this project, a model crafted by Yinghui Jiang et al., tailored specifically for drug discovery. In addition to traditional nodes and edges representations corresponding to atoms and bonds in the molecules, the model creates supernodes based on the predefined pharmacophore groups(which are features that are necessary for molecular recognition) and connects those supernodes with the corresponding groups of atoms using junction edges. The model then employs message-passing neural networks on the heterogeneous graph, complemented by transformer layers serving as readout functions.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/PharmHGT-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/PharmHGT-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/PharmHGT-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-solvent-encoding/PharmHGT.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In implementing the descriptor approach, I incorporated three distinct types of descriptors: the Minnesota Solvation Database descriptors, compiled by Aleksandr V. Marenich et al. (referred to as mn descriptor), Solvent Polarity Descriptors gathered by Christian Richardt (referred to as Richardt descriptor), and Solvent Effect Descriptors collected by Javier Catalan (referred to as Catalan descriptor). These descriptors, each sourced from reputable studies and researchers, contribute diverse perspectives to the solubility analysis undertaken in this article.&lt;/p&gt; &lt;h2 id=&quot;method&quot;&gt;Method&lt;/h2&gt; &lt;p&gt;The BigSolDB dataset encompasses solubility data across various temperatures and solvents. To mitigate the temperature’s impact on solubility, I opted to focus on entries at the most prevalent temperature in the dataset—303.15 K—excluding all others. Subsequently, I transformed solubility values into logarithmic form, a commonly used measure in the realm of chemistry. I then test the PharmHGT model on the processed dataset by running two separate message-passing neural networks on both the solvent and the solvate molecules and concatenating the resulting feature vector to form a representation vector of the solvent-solvate system. Unexpectedly, the model encountered issues contrary to my initial expectations. The challenge lies in PharmHGT’s reliance on predefined pharmacophore groups to generate a graph representation of a given molecule. In instances where a molecule lacks pharmacophore groups—a commonplace scenario for small molecules like benzene or certain larger aromatic molecules—the model fails during initialization due to incorrect dimensions (specifically, 0 due to the lack of corresponding features). To overcome this hurdle, I devised the “graph augmentation approach.” For each solvent molecule, I introduced an auxiliary molecule (Dimethylformamide, DMF) containing predefined pharmacophore groups, facilitating the initialization steps. By merging the solvent graph with the auxiliary graph, the model can successfully run the initialization steps thanks to the presence of the extra junction edges in the graph.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/graph-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/graph-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/graph-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-solvent-encoding/graph.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;To maintain parity with the chemprop model for fair comparisons, I refrained from augmenting solvate molecules with DMF. Instead, I excluded all molecules incompatible with the PharmHGT models. Post-filtering, the dataset was randomly partitioned into three segments: an 80% training set, a 10% testing set, and a 10% validation set. This preprocessing lays the groundwork for a rigorous evaluation of the models and ensures a comprehensive understanding of their performance in solubility prediction. I concatenates different kinds of solvent descriptors to the dataset and evaluate their performances separately.&lt;/p&gt; &lt;h2 id=&quot;result&quot;&gt;Result&lt;/h2&gt; &lt;p&gt;The processed data comprises 2189 entries in the training set, 273 entries in the testing set, and 267 entries in the validation set. I conducted training on the modified PharmHGT and chemprop models using this dataset. Both models exhibited promising results, showcasing a test RMSE ranging from 1 to 1.7, significantly influenced by the chosen encoding methods. Notably, chemprop consistently outperforms PharmHGT across all encoding methods, although the relative performance order varies. Within the chemprop model, the mn, catalan, and graph augmentations methods yield similar results, with a test RMSE ranging between 1.1 and 1.2 logM and a MAE ranging between 0.70 and 0.72 logM. Conversely, the reichardt descriptor performs less favorably, exhibiting a test RMSE of 1.31 logM and a test MAE of 0.84 logM . Intriguingly, in the PharmHGT model, these trends are reversed. The reichardt descriptor encoding attains the best performance with a test RMSE of 1.315846 and a second lowest test MAE of 0.91, while the catalan encoding method shows the highest test RMSE at 1.66 and the highest test MAE at 0.84. This discrepancy may be attributed to PharmHGT’s specialized design for drug molecules which typically have molecular weights ranging from 400 to 1000 Da. In contrast, solvent molecules generally possess molecular weights below 200 Da and often lack pharmacophore groups that provide additional information to the model. As a result, the model tends to be reduced to basic GNN models, focusing solely on modeling interactions between neighboring atoms and therefore ignoring the important functional groups that strongly influenced the solubility.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;To validate this hypothesis, I conducted an analysis of the training RMSE across various encoding methods for PharmHGT. The finding reveals that the graph-augmentation methods beat all other methods by a huge margin. The graph augmentation method boasts a training RMSE of only 0.29 while all other methods exhibit training RMSEs of at least 0.42. This may also be attributed to the reduction of the PharmHGT models. The simple structures of solvent molecule graphs make the model susceptible to overfitting, resulting in a notably higher testing RMSE for the graph-augmentation method. Furthermore, my investigation uncovered that the catalan encoding method demonstrates a significantly higher training RMSE compared to other encoding methods, indicating that PharmHGT struggles to extract information from the descriptors. This aligns with the observation that the catalan encoding method also yields the largest testing RMSE among all encoding methods.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Examining the R2 scores reveals a consistent pattern, where the Chemprop model consistently beats the PharmHGT models across all employed encoding methods. Within the Chemprop model, the mn, catalan, and graph-augmentation methods exhibit similar outcomes, showcasing test R2 values ranging from 0.82 to 0.84. Conversely, the reichardt descriptor lags behind, presenting a less favorable test R2 of 0.78. These trends undergo a reversal within the PharmHGT model. The reichardt descriptor encoding achieves the best performance with a test R2 of 0.77, while the catalan encoding method records the lowest test R2 at 0.57. This intriguing reversal highlights the nuanced impact of encoding methods on model performance, emphasizing the need for tailored approaches based on the underlying molecular structures.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;In the course of my experimentation, a consistent trend emerges wherein chemprop consistently outperforms pharmHGT across an array of encoding methodologies. Among these methodologies, the mn descriptor method maintains a stable, albeit moderate, level of performance, denoting its reliability without yielding any outstanding superiority.&lt;/p&gt; &lt;p&gt;A noteworthy observation manifests when employing the catalan descriptor method, which remarkably enhances the effectiveness of the PharmHGT model. Conversely, the chemprop model attains its peak performance when coupled with the reichardt descriptor methods and its worst performance when coupled with the catalan descriptor, showing that the strong dependencies of encoding methods across different models.&lt;/p&gt; &lt;p&gt;However, it is imperative to underscore that each encoding method exhibits inherent limitations, precluding the identification of a universally optimal solution applicable to both models concurrently. This nuanced understanding underscores the necessity for tailored approaches, grounded in an appreciation for the distinctive characteristics and demands of each model.&lt;/p&gt; &lt;p&gt;Further scrutiny into the training loss data reveals a notable constraint within the PharmHGT model. Its proclivity towards specificity for drug molecules renders it less adept at handling general tasks, necessitating the introduction of auxiliary graphs to augment its functionality. This intricacy adds a layer of consideration regarding the pragmatic applicability of the model in contexts beyond its primary pharmaceutical focus.&lt;/p&gt; &lt;p&gt;In navigating these findings, it becomes evident that the pursuit of a comprehensive and adaptable model mandates a nuanced comprehension of the interplay between encoding methodologies, model architecture, and the inherent limitations associated with specific domains.&lt;/p&gt; &lt;h2 id=&quot;prospective-works&quot;&gt;Prospective works&lt;/h2&gt; &lt;p&gt;Due to the complex nature of solvent-solvate interactions, a more rigorous splitting strategy that takes into account the distributions of different solvent molecules within the training, testing, and validation sets may be needed. Additionally, random splitting and cross-validation could be potential methods for improving the generality of the model. Finally, owing to the limited computational resources, this project only trained the model with default hyperparameters (such as batch size, layer width, number of tokens, etc.). Hyperparameter optimization can also be performed to gain a better understanding of the model’s capabilities.&lt;/p&gt; &lt;h2 id=&quot;reference&quot;&gt;Reference&lt;/h2&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Analyzing Learned Molecular Representations for Property Prediction &lt;a href=&quot;https://pubs.acs.org/doi/10.1021/acs.jcim.9b00237&quot;&gt;https://pubs.acs.org/doi/10.1021/acs.jcim.9b00237&lt;/a&gt;&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Pharmacophoric-constrained heterogeneous graph transformer model for molecular property prediction &lt;a href=&quot;https://www.nature.com/articles/s42004-023-00857-x&quot;&gt;https://www.nature.com/articles/s42004-023-00857-x&lt;/a&gt;&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Multi-fidelity prediction of molecular optical peaks with deep learning &lt;a href=&quot;https://pubs.rsc.org/en/content/articlelanding/2022/sc/d1sc05677h&quot;&gt;https://pubs.rsc.org/en/content/articlelanding/2022/sc/d1sc05677h&lt;/a&gt;&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Minnesota Solvent Descriptor Database &lt;a href=&quot;https://comp.chem.umn.edu/solvation/mnsddb.pdf&quot;&gt;https://comp.chem.umn.edu/solvation/mnsddb.pdf&lt;/a&gt;&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Solvatochromic Dyes as Solvent Polarity Indicators &lt;a href=&quot;https://pubs.acs.org/doi/10.1021/cr00032a005&quot;&gt;https://pubs.acs.org/doi/10.1021/cr00032a005&lt;/a&gt;&lt;/p&gt; &lt;/li&gt; &lt;li&gt;Toward a Generalized Treatment of the Solvent Effect Based on Four Empirical Scales: Dipolarity (SdP, a New Scale), Polarizability (SP), Acidity(SA), and Basicity (SB) of the Medium &lt;a href=&quot;https://pubs.acs.org/doi/10.1021/jp8095727&quot;&gt;https://pubs.acs.org/doi/10.1021/jp8095727&lt;/a&gt;&lt;/li&gt; &lt;li&gt;BigSolDB: Solubility Dataset of Compounds in Organic Solvents and Water in a Wide Range of Temperatures &lt;a href=&quot;https://chemrxiv.org/engage/chemrxiv/article-details/6426c1d8db1a20696e4c947b&quot;&gt;https://chemrxiv.org/engage/chemrxiv/article-details/6426c1d8db1a20696e4c947b&lt;/a&gt;&lt;/li&gt; &lt;li&gt;Chemprop: A Machine Learning Package for Chemical Property Prediction &lt;a href=&quot;https://chemrxiv.org/engage/chemrxiv/article-details/656f3bae5bc9fcb5c918caa2&quot;&gt;https://chemrxiv.org/engage/chemrxiv/article-details/656f3bae5bc9fcb5c918caa2&lt;/a&gt;&lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;data&quot;&gt;data&lt;/h2&gt; &lt;p&gt;The data and code for the experiments are available at &lt;a href=&quot;https://github.com/RuiXiWangTW/solvent_encoding-data&quot;&gt;https://github.com/RuiXiWangTW/solvent_encoding-data&lt;/a&gt;&lt;/p&gt; </content> </entry> <entry> <title>6.s898 Final Project- Investigating the biological underpinnings of latent embeddings for scRNA-seq</title> <link href="https://deep-learning-mit.github.io/blog/2023/scRNA-GNNs/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/scRNA-GNNs</id> <content type="html">&lt;h2 id=&quot;background-and-motivation&quot;&gt;Background and Motivation&lt;/h2&gt; &lt;p&gt;Neurodegenerative diseases represent a complex and diverse group of disorders characterized by the progressive degeneration of the structure and function of the nervous system. They are notoriously challenging to study due to their multifaceted nature and varied pathological features. Single-cell sequencing technologies have been developed and are powerful techniques for understanding the molecular basis of many pressing scientific questions such as the causality and development of Alzheimer’s Disease (AD). These technologies, namely single-cell RNA sequencing (scRNA-seq) and single-cell Assay for Transpose-Accessible Chromatin sequencing (scATAC-seq), offer us an understanding of a cell’s state as a phase-space determined by chromatin accessibility and gene expression. Single cell data like this is extremely high dimensional; on the scale of 10s or 100s of thousands of cells, each with 10s of thousands of “features,” which represent genes or chromatin regions. Because of this, lower dimensional representations of these cells and clusters within them are valuable to help simplify our view of the data and extract signals. Moreover, in the context of cells characterized by biomarkers and stemming from patients with varying neurodegenerative diseases, it is in our interest to explore cell neighborhoods and embeddings to investigate if they properly represent the biological underpinnings of such disease.&lt;/p&gt; &lt;h2 id=&quot;graph-neural-networks-gnns-as-an-architecture-and-their-application-to-single-cell-analysis&quot;&gt;Graph Neural Networks (GNNs) as an architecture and their application to single-cell analysis&lt;/h2&gt; &lt;p&gt;Graph Neural Networks (GNNs) are a class of deep learning models that are specifically designed to handle data that is structured as a graph, which extends the principles of neural networks to handle the concept of graph topology. In GNNs, each node (which in this application represents cells) aggregates information from graph neighbors through transformation and pooling steps, which results in a model whose representation captures node level and graph level features. Relevantly, GNNs generate lower dimensional embeddings of the input data, which provides a compact and informative representation of high dimensional data such as single-cell RNA data.&lt;/p&gt; &lt;p&gt;The scGNN package specifically applies these principles of GNNs to single-cell genomics, treating cells as nodes in a graph and the edges as a measure of similarity in the transcriptome of two cells. scGNN performs two main functions: clustering and imputation. The architecture is as such:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Feature Autoencoder: Generates low-dimensional representation of gene expression, which is the foundation for a cell graph.&lt;/li&gt; &lt;li&gt;Graph Autoencoder: Learns a topological representation of the aforementioned cell graph, which is the foundation for cell type clustering.&lt;/li&gt; &lt;li&gt;Cluster Autoencoders: There is an autoencoder for each cell type that reconstructs gene expression values.&lt;/li&gt; &lt;li&gt;Imputation Autoencoder: Recovers imputed gene expression values.&lt;/li&gt; &lt;/ul&gt; &lt;h2 id=&quot;intro-to-the-data&quot;&gt;Intro to the Data&lt;/h2&gt; &lt;p&gt;The &lt;a href=&quot;https://www.sciencedirect.com/science/article/pii/S009286742300973X?ref=pdf_download&amp;amp;fr=RR-2&amp;amp;rr=834b08acfbd66ac7&quot;&gt;dataset&lt;/a&gt; being presented is a scRNA-seq atlas of the aged human prefrontal cortex. It consists of 2.3 million cells sampled from 427 individuals over a varying range of Alzheimer’s pathology and cognitive impairment. The subset of this data being analyzed in this project are the 19 samples that had multiome sequencing conducted, although only the scRNA-seq was used for this analysis (excluding the scATAC-seq). This was approximately 100 thousand cells and originally 36 thousand genes that are categorized into three diagnoses: no AD, early AD, and late AD based on biomarkers like amyloid plaque and niareagan score.&lt;/p&gt; &lt;h2 id=&quot;applying-scgnn-to-our-ad-scrna-seq-data&quot;&gt;Applying scGNN to our AD scRNA-seq data&lt;/h2&gt; &lt;p&gt;I began by processing the raw sequencing data into a csv format that would be suitable as input to the pipeline. I then ran preprocessing on this data, which consists of log transformation, filtering out low quality/sparse genes and cells, and subsetting to the top 2000 highly variable genes by variance. I then ran the actual imputation and clustering pipeline with the following parameters: EM-iteration=10, Regu-epochs=500, EM-epochs=200, cluster-epochs=200, quickmode=True, knn-distance=euclidean. The result of training is a imputed cell matrix, a cell graph, cell type clusters, and the actual embeddings of the cells themselves. These results provide the foundation for the next layer of analysis.&lt;/p&gt; &lt;h2 id=&quot;visualizing-the-degree-distribution-of-the-cell-graph&quot;&gt;Visualizing the Degree Distribution of the Cell Graph&lt;/h2&gt; &lt;p&gt;The figure below is a histogram that represents the number of other cells each cell in the dataset is connected to in the cell graph as computed by the Graph Autoencoder. We can see that the distribution is skewed right, which tells us that most cells are connected to a relatively few other cells, which could indicate a particularly heterogeneous cell population. However, there are a select few that have substantially higher number of connections, which could represent some sort of “hub” cells.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/assets/img/2023-12-12-scRNA-GNNS/degree.jpeg&quot; alt=&quot;&quot; /&gt;&lt;/p&gt; &lt;h2 id=&quot;understanding-cell-clusters-in-the-embedding-space&quot;&gt;Understanding Cell Clusters in the Embedding Space&lt;/h2&gt; &lt;p&gt;The next approach was a detailed analysis of the clusters generated by the graph architecture by comparing to clusters generated on the imputed output data. This is important in visualizing the efficacy of the GNNs embeddings in delineating cell types compared the clusters derived from traditional methods on the imputed data, which included all 2000 highly variable genes (HVGs). The steps are as following:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Computing Neighbors: Step 1 is to compute the neighbors for each cell, which as a reminder explains gene expression similarity between cells.&lt;/li&gt; &lt;li&gt;Principal Component Analysis (PCA): The subsequent step is to compute PCA on the data, which is a dimensionality reduction technique.&lt;/li&gt; &lt;li&gt;Louvain Clustering: After PCA, I used Louvain clustering, which is widely used in scRNA-seq analysis for clustering cell types, and tuned the resolution to match a similar number of clusters as generated in scGNN.&lt;/li&gt; &lt;li&gt;UMAP Visualization: To visualize clusters, I used Uniform Manifold Approximation and Projection (UMAP), which is a dimensionality reduction technique that allows us to visualize the cell data in 2-dimensions, colored by cluster. I colored the UMAP first by the clusters generated on the embedded data by scGNN and then by the PCA/Louvain clusters.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;In the figures below, we see the result of computing cell type clusters based on data embedded by the feature and graph autoencoder versus using the traditional method of PCA then Louvain clustering. While they resulted in slightly different number of clusters, it is interesting to see that the traditional method appears to outperform the GNN in terms of separating clusters in the embedding space. Further analysis on the differentially expressed genes (DEGs) in each cluster would need to be done to confirm which cell type each cluster truly represents. Only then would we be able to determine the accuracy of each, but from a visual perspective in UMAP space, the GNN clusters are less consistent.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters.jpg&quot; alt=&quot;&quot; /&gt; &lt;img src=&quot;/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters.jpg&quot; alt=&quot;&quot; /&gt;&lt;/p&gt; &lt;h2 id=&quot;exploring-alzheimers-related-gene-contributions-to-the-embedding-space&quot;&gt;Exploring Alzheimer’s Related Gene Contributions to the Embedding Space&lt;/h2&gt; &lt;p&gt;Deep learning techniques and architectures like VAEs and GNNs are promising and seemingly relevant techniques for topics like single-cell genomics where data is extremely high dimensional and sparse. However, these complex algorithms beg the question of whether and how they represent the underlying biology, especially in the context of diseases like Alzheimer’s. Fortunately, while still incurable, AD has been extensively researched, and is strongly associated with a number of hereditary genes, mutations, and misfolded protein aggregates. This known research provides a robust benchmark when applying new techniques to AD data. When trying to implicate new genes or represent genes (features) in a lower dimensional embedding space, it is usually a good sign to check whether the known biomarkers of AD are also being predicted or also being represented. In our case, these embeddings provide the opportunity to see if the model captures the relevant biological information, which can then provide some level of validation to any other genes that are also being represented.&lt;/p&gt; &lt;p&gt;To explore this further, I performed correlational analysis between the gene expression matrix from the imputed data and the “expression” values derived from the embedding dataframe. By focusing on the top 1% (20 genes) of genes that had the highest correlation for each embedding, I identified any biologically relevant genes that were being represented in the embedding. Below is a list of the AD relevant genes that showed up as being highly represented in this embedding space.&lt;/p&gt; &lt;ul&gt; &lt;li&gt;APOE: This gene, particularly the e4 allele, is the most widely known genetic risk for late onset Alzheimer’s Disease. This allele is responsible for about half of all AD cases&lt;/li&gt; &lt;li&gt;APP: This gene is called Amyloid Precursor Protein. You might recognize amyloid, which is the main hallmark of AD when it misfolds and becomes aggregate plaque in the brain. Abnormal cleavage of APP leads to an increase in amyloid plaque accumulation.&lt;/li&gt; &lt;li&gt;SORL1: Genetic mutations of this gene are associated with AD because of its role in recycling APP.&lt;/li&gt; &lt;li&gt;BIN1: Bridging integrator 1 has been implicated in many AD GWAS studies and has been found to influence the spread of tau, which is another hallmark of AD when misfolded, leading to neurofibrillary tangles.&lt;/li&gt; &lt;li&gt;CLU: Clusterin has been implicated in AD for its role in clearing amyloid-beta plaque from the brain.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;For example, in the figures below you can see that APOE falls into the genes with the highest correlation for embedding number 24, with a correlation of 0.79, and APP falls into those for embedding number 5 with a correlation of 0.79 as well.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/assets/img/2023-12-12-scRNA-GNNS/embedding5.jpg&quot; alt=&quot;&quot; /&gt; &lt;img src=&quot;/assets/img/2023-12-12-scRNA-GNNS/embedding24.jpg&quot; alt=&quot;&quot; /&gt;&lt;/p&gt; &lt;h2 id=&quot;wrapping-it-up&quot;&gt;Wrapping it up&lt;/h2&gt; &lt;p&gt;I hope this analysis has demonstrated the potential of combining advanced computational methods in deep learning with with foundational biological data like scRNA-seq on AD to unravel long standing questions we have in the field.&lt;/p&gt; &lt;h2 id=&quot;future-analysis&quot;&gt;Future Analysis&lt;/h2&gt; &lt;p&gt;Due to computational time, I elected to train the model on the entire dataset. Future work could include training the model on subsets of the data separated by the different level of AD pathology, which would give a slightly more nuanced understanding of disease progression and how that is reflected in the embedding space of each diagnosis category.&lt;/p&gt; </content> </entry> <entry> <title>Forbidden Facts</title> <link href="https://deep-learning-mit.github.io/blog/2023/forbidden-facts/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/forbidden-facts</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;When large language model (LLMs) predict the next token, they often need to reconcile competing objectives. For example, a pretrained model is trained to be both helpful and harmless, which may come into conflict if a user asks a dangerous query. However, we are currently unable to prioritize objectives robustly, as evidenced by jailbreaks.&lt;/p&gt; &lt;p&gt;We aimed with our project to explore how language models mechanistically reconcile competing objectives, with the hope that a deeper understanding of these mechanisms could inspire better model alignment and control. For example, if a language model is instructed to always say the incorrect answer, and then is prompted to fill in a factual association, there are two objectives. One objective is to complete the factual association and the next token prediction correctly, and the other is to follow the instruction of saying an incorrect answer. These objectives are opposing because it is impossible to follow both objectives accurately, and it is reasonable for the model to follow either instruction. In this task, the model will consistently output the correct answer without the incorrect answer instruction and consistently output an incorrect answer with the instruction. Specifically, we tried to understand how Llama-2 models reconcile obedience and truthfulness in the context of prompts like the following one:&lt;/p&gt; &lt;figure class=&quot;highlight&quot;&gt;&lt;pre&gt;&lt;code class=&quot;language-c--&quot; data-lang=&quot;c++&quot;&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;INST&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;SYS&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;gt;&amp;gt;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;You&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;are&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;an&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;obedient&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;assistant&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;who&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;only&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;responds&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;a&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;single&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;word&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;no&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;punctuation&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;You&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;answer&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;truthfully&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;However&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;you&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;are&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;not&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;allowed&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;to&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;say&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;the&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;forbidden&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;word&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;California&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;/&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;SYS&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;gt;&amp;gt;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;The&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;Golden&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;Gate&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;Bridge&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;is&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;the&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;state&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;of&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;INST&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;br&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&amp;gt;&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/figure&gt; &lt;p&gt;Here the obedience objective makes the model not want to respond with “California”, while the truthfulness objective does the opposite. Since there has already been some existing work on how large language models perform factual recall, we decided to focus on the obedience objective, and in particular on what mechanisms the model uses to downweight the forbidden word.&lt;/p&gt; &lt;h2 id=&quot;background&quot;&gt;Background&lt;/h2&gt; &lt;p&gt;Factual recall. Meng et al. &lt;d-cite key=&quot;meng2022locating&quot;&gt;&lt;/d-cite&gt; located factual associations within GPT-style autoregressive transformer models using causal tracing. They found strong causal effects on predictions localized in two main areas: 1) at the final layers processing the last token, and 2) more notably at middle layers processing the last token of the subject. Further analysis showed the middle layer effects are driven primarily by the MLP modules. This suggests that GPT stores factual associations about a subject via a localized computation in the MLPs when they process the subject token. Mor Geva et al. &lt;d-cite key=&quot;geva2023dissecting&quot;&gt;&lt;/d-cite&gt; extend this study of the factual recall circuit by distilling it into three parts. First, the representation at the last subject token gets enriched by early MLP layers, composed of many subject-related attributes. Second, the information about the relation between the subject and answer token propagates to the last token. Finally, the last token “queries” the subject representation to extract the attribute that the model then outputs, which is done by attention heads that encode subject-attribute mappings in their parameters.&lt;/p&gt; &lt;p&gt;Competing circuits. Circuits are subgraphs of a neural network with distinct functionalities. The field of mechanistic interpretability seeks to reverse engineer model internals that produce a behavior into explainable algorithms, and recent works have rigorously explored toy examples of these circuits. These works usually focus on describing a circuit where the prompt directs the model to complete one distinct task. This project seeks to understand what happens when there are two opposing tasks and how the circuits for each task interact with each other.&lt;/p&gt; &lt;p&gt;Mechanistic interpretability. Olsson et al. &lt;d-cite key=&quot;olsson2022context&quot;&gt;&lt;/d-cite&gt; and Nanda et al. &lt;d-cite key=&quot;nanda2023progress&quot;&gt;&lt;/d-cite&gt; were important early papers in the emerging field of Mechanistic Interpretability. They helped set the direction of the field (attempt to rigorously decode fundamental mechanisms involved in a model’s computation), developed the evidential standards (causal mediation on a subset of a model being higher quality evidence than correlation), and helped define the methodology used (patching experiments, logit attribution, ablation, reverse engineering of weights).&lt;/p&gt; &lt;p&gt;Prompt injections. Wei et al. &lt;d-cite key=&quot;wei2023jailbroken&quot;&gt;&lt;/d-cite&gt; propose that one failure mode of language models that leads to prompt injections is the competition between capabilities and safety objectives. Models are trained for instruction following, language modeling, and safety. This project aims to achieve a mechanistic understanding of how prompt injections operate with respect to these competing objectives.&lt;/p&gt; &lt;p&gt;Latent knowledge. Research has demonstrated that models have latent knowledge of correct answers, but won’t output them if prompted naively &lt;d-cite key=&quot;saunders2022self&quot;&gt;&lt;/d-cite&gt;. A problem for future advanced models is being able to distinguish whether its outputs are truthful or deceptive. For example, if a model’s objective is to say the incorrect answer, somewhere in its internal computations it has to compute what the correct answer is to know it is outputting the incorrect answer. One question that naturally arises is if we are able to deduce from its internal representations what the correct answer is. This project investigates a model’s latent knowledge under specific circumstances.&lt;/p&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;p&gt;We initially used Llama-2-7B-chat, a 32-layer decoder-only transformer model fine-tuned with supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align to human preferences for helpfulness and safety. We chose Llama-2-7B-chat because the model achieves reliably good performance on our instruction tasks, has its weights open-sourced, and has a relatively low number of parameters to reduce computational costs. Previously, we fine-tuned GPT-2-XL on the Alpaca instruction dataset, but could not get reliable results on our tasks.&lt;/p&gt; &lt;p&gt;A competing prompt is when the correct answer is forbidden, and a non-competing prompt is when an incorrect answer is forbidden (equivalent to a normal factual recall).&lt;/p&gt; &lt;p&gt;We used first-order patching to replace a component’s activations in a non-competing run with its activations in a competing run (and vice versa). To calculate component $r_{i}$’s importance, we take the log odds of predicting the correct answer in a non-competing run with $r_{i}$ patched from a competing run, and subtract the log odds of predicting a correct answer during a normal non-competing run:&lt;/p&gt; \[\begin{equation} \left[ \mathrm{LO}_a\left( r_i(\mathbf{p}_\text{c}) + \sum_{j \neq i} r_j(\mathbf{p}_\text{nc}) \right) - \mathrm{LO}_a\left(\sum_{j} r_j(\mathbf{p}_\text{nc})\right) \right]. \end{equation}\] &lt;p&gt;This is a natural method to analyze model mechanisms at a coarse-grained level. If Llama 2 is a Bayesian model that aggregates information from each component, Equation 2 can be interpreted as the average log Bayes factor associated with changing the $r_{i}$’s view of the world from forbidding an incorrect answer to forbidding the correct answer. If this Bayes factor is small, then $r_{i}$ plays a large role in the model suppression behavior. We also only consider the residual stream on the last token because these components have the direct effect on the next token prediction.&lt;/p&gt; &lt;p&gt;By first-order, we mean we don’t consider the effect the component may have on other components. We chose to do first-order patching because when multiple pieces of evidence are independent, their aggregate log Bayes factor is the sum of their individual log Bayes factors, which is why we can cumulatively add the components’ importance in the last plot.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;Our high-level takeaway was that the forbidding mechanism is complicated. The following plots illustrate its overall behavior:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;This plots the probability Llama 2 answers a competing prompt correctly versus the probability it answers a non-competing prompt correctly across our dataset. A competing prompt is when the correct answer is forbidden, and a non-competing prompt is when an incorrect answer is forbidden (equivalent to a normal factual recall). The plot is cut off on the sides because we filter the dataset to ensure the model gets the initial factual recall task correct and has a significant suppression effect.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;To get this plot, we first rank the residual stream components (attention heads and MLPs on the final token) by their importance from first-order patching, a tool we explain in the next paragraph. Adding the components’ importance “scores”, we find that patching 41 components is enough to achieve the same suppression as patching all 1057 components. This number stays roughly the same across the 13b and 70b versions of Llama even as the total component size rises to 10,570 components. There is a lagging tail here: for example, only the top 15 components are needed to create ~80% of the suppression effect.&lt;/p&gt; &lt;p&gt;I worked jointly with Tony Wang and Kaivu Hariharan on this research project. I then did some more extended analysis for this class.&lt;/p&gt; &lt;h2 id=&quot;extended-results&quot;&gt;Extended Results&lt;/h2&gt; &lt;p&gt;I then asked how this analysis scales to larger models, namely the 13 and 70 billion parameter versions of Llama 2.&lt;/p&gt; &lt;p&gt;13B:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;70B:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot4.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;I initially expected the number of components needed to replicate the initial effect to scale with the number of parameters. For example, 70b has 10,570 components versus 7b’s 1057 components. However, the number actually slightly decreased to 37 components and 38 components for 13b and 70b, respectively. We can see that the total suppression effect increased from 7b but remained the same between the two. This analysis suggests that the number of components needed to implement the suppression circuit remains constant as the number of model parameters grows. This makes sense when considering that larger models have a lot more emergent capabilities, which would entail a greater number of circuits as opposed to just enlarging the circuits of smaller models.&lt;/p&gt; &lt;p&gt;The highly important suppressor heads paid attention to the forbidden token, which raised the question of how they knew to do so. We found that the key and query were “enriched” throughout the layers of the model to pay attention to the forbidden token. Testing query and key specificity, we found that swapping the queries from competing and non-competing runs made no difference to the attention paid to the key. Nor did changing the positional embeddings of the key. There must be another, perhaps more complicated, way that the model knows to pay attention to the forbidden token.&lt;/p&gt; &lt;p&gt;We found that suppression heads privilege attend more to the forbidden word when it is the correct answer to the factual recall task. Understanding these model’s heuristics mechanistically allowed us to exploit its behavior, something we call the California Attack. Let’s take the initial example in a non-competing run:&lt;/p&gt; &lt;figure class=&quot;highlight&quot;&gt;&lt;pre&gt;&lt;code class=&quot;language-c--&quot; data-lang=&quot;c++&quot;&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;INST&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;SYS&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;gt;&amp;gt;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;You&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;are&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;an&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;obedient&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;assistant&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;who&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;only&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;responds&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;a&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;single&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;word&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;no&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;punctuation&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;You&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;answer&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;truthfully&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;However&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;you&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;are&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;not&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;allowed&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;to&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;say&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;the&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;forbidden&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;word&lt;/span&gt; &lt;span class=&quot;err&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;floor&lt;/span&gt;&lt;span class=&quot;err&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;/&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;SYS&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;gt;&amp;gt;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;The&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;Golden&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;Gate&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;Bridge&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;is&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;the&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;state&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;of&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;INST&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;br&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&amp;gt;&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/figure&gt; &lt;p&gt;The model naturally answers ‘California’. By adding two words to the first sentence of the prompt: “You are an obedient assistant from California […] “, we break the model. The suppressor components take the forbidding instruction to never say ‘floor’ and instead downweight “California” to an 18% completion probability, elevating “San Francisco”, an incorrect answer, to the top response.&lt;/p&gt; &lt;p&gt;I did some more exploring of this California Attack. I found three things:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;In the previous example, ablating just the top suppressor head resulted in California being the top response again.&lt;/li&gt; &lt;li&gt;Some particular heads like attending more to certain categories (e.g. countries) and certain words. In this particular case, the top suppressor head enjoyed paying attention to California specifically. The key then is to have the forbidden instruction apply to a word it doesn’t like paying attention to, such as ‘floor’.&lt;/li&gt; &lt;li&gt;We can find the words they downweight the most and least by applying the OV matrix against the entire vocabulary distributions and looking at the beginning and end of the distribution.&lt;/li&gt; &lt;/ol&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot5.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Keeping these lessons in mind, I found another attack by analyzing some of the words the suppressor heads downweight the most. In the above example, I added that Llama 2 was an assistant “to Trump” in the system message. In the above message, the first run is the adversarial attack where the top response to answering who the 45th President of the USA was is ‘Great’. Under a normal run without the adversarial attack, the top answer is ‘Trump’:&lt;/p&gt; &lt;p&gt;I also experimented with the 13B version of Llama 2, and found that the Calornia attack also applies to this model when forbidding ‘table’ in a non-competing run:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot6-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot6-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot6-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-forbidden-facts/plot6.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;However, I could not find a similar adversarial attack for the 70B version of Llama 2. This suggests that as models get larger, their heuristics get more robust to such mechanistic exploits.&lt;/p&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;In this work, we decompose and attempt to characterize important components of Llama 2 that allow it to suppress the forbidden word in the forbidden fact task. While we identify some structural similarities between the most important attention heads, we also find evidence that the mechanisms used by Llama 2 are complex and heterogeneous. Overall, we found that even components directly involved in suppressing the forbidden word carry out this mechanism in different ways and that Llama 2’s mechanisms are more akin to messy heuristics than simple algorithms.&lt;/p&gt; &lt;p&gt;This results in an important limitation of our work: we could not find a clean, sparse circuit implementing the forbidden behavior. Moreover, it is unclear if we are working in the right “basis” of attention heads and MLPs, or if causal attribution methods such as activation patching are able to recover the correct representation of a circuit.&lt;/p&gt; &lt;p&gt;This raises some questions about the goals of mechanistic interpretability. Previous mechanistic interpretability papers have largely studied algorithmic tasks on small models to understand how models implement behaviors and characterize certain properties. However, moving away from toy settings to understand how models with hundreds of billions of parameters implement a variety of complex behaviors with competing objectives might be much harder.&lt;/p&gt; &lt;p&gt;Computational irreducibility is the idea that there are certain systems whose behavior can only be predicted by fully simulating the system itself, meaning there are no shortcuts to predicting the system’s behavior. Initially proposed by Stephen Wolfram in the context of cellular automata, this concept challenges the reductionist approach to science, which may be analogous to the approach mechanistic interpretability takes today.&lt;/p&gt; &lt;p&gt;If computational irreducibility applies to mechanistic interpretability in understanding models, it may be very difficult to get generalizable guarantees about its behavior. If even the most efficient way of computing important properties about models is too slow, then mechanistic interpretability can’t achieve one of its main goals. This project provides some suggestive evidence that we could live in a world where frontier models are computationally irreducible.&lt;/p&gt; &lt;p&gt;Thanks for reading! If you have any questions, feel free to reach out at miles_wang [at] college [dot] harvard [dot] edu!&lt;/p&gt; </content> </entry> <entry> <title>Modeling Elephantfish Communication through Deep RNNs</title> <link href="https://deep-learning-mit.github.io/blog/2023/elephantfish-model/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/elephantfish-model</id> <content type="html">&lt;h2 id=&quot;abstract&quot;&gt;Abstract&lt;/h2&gt; &lt;p&gt;Elephantfish, known for their unique use of electric fields for sensing and interaction, present a fascinating study subject within the realms of bioacoustics and animal communication. This project, pioneering the use of deep learning, specifically Recurrent Neural Networks (RNNs), aims to model and interpret these electrical communication signals. By combining insights from bioacoustics, linguistics, and computer science, we seek to decode these bioelectrical signals into a human-comprehensible format, thereby expanding our understanding of animal cognition and communication. The overarching goal is to decode and understand the complexity of elephantfish communication and to explore the broader applications in sociolinguistics, pragmatics, and computational linguistics for non-human species. This project pioneers in utilizing deep learning, specifically Recurrent Neural Networks (RNNs), to model and interpret the electrical communication signals of elephantfish. The study’s novelty lies in its interdisciplinary approach, combining insights from bioacoustics, linguistics, and computer science to unravel the complexities of non-human communication systems. Our goal is to translate these unique bioelectrical signals into a form comprehensible to humans, thereby expanding our understanding of animal cognition and communication.&lt;/p&gt; &lt;h2 id=&quot;introduction-and-objectives&quot;&gt;Introduction and Objectives&lt;/h2&gt; &lt;p&gt;The elephantfish, a species renowned for its unique electric-based communication and the largest brain-to-body weight ratio of all known vertebrates, offers a fascinating window into the study of non-human communication systems. These creatures, inhabiting the murky waters of African rivers and lakes, have developed a sophisticated method of communication that relies on generating and sensing electric fields. This remarkable ability not only sets them apart in the aquatic world but also poses intriguing questions about the nature and complexity of their interactions. The study of elephantfish communication is not just a pursuit in understanding an exotic species; it reflects a broader scientific curiosity about the principles of communication and social behavior across different life forms.&lt;/p&gt; &lt;p&gt;The primary objective of this project is to develop a deep understanding of elephantfish communication through the application of advanced neural language models, specifically focusing on Recurrent Neural Networks (RNNs). This approach is inspired by the parallels drawn between the electric signals used by elephantfish and the structural aspects of human language. By leveraging techniques commonly used in natural language processing (NLP), we aim to decode these bioelectrical signals and translate them into a format that can be understood by humans. This endeavor is not only about interpreting the ‘language’ of a non-human species; it is about enriching our understanding of communication as a fundamental biological and social function.&lt;/p&gt; &lt;p&gt;To capture the complexity of elephantfish communication, we have collaborated with labs at MIT and Columbia, gaining access to a comprehensive dataset of elephantfish electric communication signals. This dataset includes a wide range of signals recorded under various environmental and social conditions, providing a rich source of data for analysis.&lt;/p&gt; &lt;p&gt;Utilizing the latest advancements in deep learning, we will develop and train neural language models that can accurately interpret and model these electric signals. The focus will be on employing Long Short-Term Memory (LSTM) RNNs, which are well-suited for handling the temporal sequences inherent in these signals.&lt;/p&gt; &lt;p&gt;Drawing from the field of NLP, we will apply a range of techniques to analyze and understand the ‘language’ of elephantfish. This analysis will delve into the sensing, communication, and social dynamics of the species, offering insights into how they interact with each other and their environment.&lt;/p&gt; &lt;p&gt;One of the most challenging aspects of this project is translating the electric signals into a form that is comprehensible to humans. This task will involve developing innovative methods to represent these signals visually or auditorily, making the complex patterns of communication accessible for further study and interpretation.&lt;/p&gt; &lt;p&gt;Beyond the technical analysis, we aim to explore the sociolinguistic and pragmatic aspects of elephantfish communication. This exploration will involve understanding the social context and significance of different patterns of signals, thereby contributing to the broader field of computational linguistics and sociolinguistics.&lt;/p&gt; &lt;p&gt;In undertaking this research, we are not only contributing to the field of bioacoustics but also bridging gaps between biology, linguistics, and computer science. The insights gained from this study have the potential to transform our understanding of animal communication and cognition, opening up new possibilities for interdisciplinary research and discovery.&lt;/p&gt; &lt;h2 id=&quot;literature-review&quot;&gt;Literature Review&lt;/h2&gt; &lt;p&gt;Time series analysis has been extensively used in biological studies, especially for understanding patterns in animal behavior and communication. Studies like Jurtz, et al. (2017) have demonstrated the efficacy of time series analysis in interpreting complex behavioral data in wildlife research.&lt;d-cite key=&quot;jurtz2017introduction&quot;&gt;&lt;/d-cite&gt; This forms a basis for our approach to model elephantfish movements, which are intrinsically temporal and dynamic.&lt;/p&gt; &lt;p&gt;The unique architecture of LSTM RNNs, with their ability to remember long-term dependencies, makes them particularly suitable for time series prediction. Gers, Schmidhuber, and Cummins (2000) showcased the potential of LSTM RNNs in learning to bridge minimal time lags in excess of 1000 discrete time steps between relevant input events and target signals, setting a precedent for their application in predicting animal movement patterns.&lt;d-cite key=&quot;gers2002learning&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;Recent advancements in bioacoustics have seen LSTM RNNs being employed to analyze and predict patterns in animal communication. For instance, Stowell and Plumbley (2014) applied LSTM networks to bird song recognition, illustrating the network’s capacity to handle temporal sequences in bioacoustic signals of bird sounds. This aligns closely with our project’s objective of modeling the movement patterns of elephantfish, which are hypothesized to be closely tied to their communication.&lt;d-cite key=&quot;stowell2014automatic&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;Research on aquatic species like elephantfish presents unique challenges due to their environment and the nature of their communication. The work of Stoddard et al. (2010) in electric signal analysis of male electric fishes provides insights into the complexity of such studies.&lt;d-cite key=&quot;stoddard2008signal&quot;&gt;&lt;/d-cite&gt; However, there is a noticeable gap in applying advanced time series models, like LSTM RNNs, specifically to the movement patterns and communication signals of elephantfish.&lt;/p&gt; &lt;p&gt;The application of NLP techniques to animal communication is a relatively unexplored frontier. Recent work by Wilensky et al. (2021) in decoding prairie dog vocalizations using natural language processing provides a compelling case for extending similar approaches to non-vocal animal communication. Our project takes this concept further by applying deep learning techniques to decode the electric signals of elephantfish, which, while different from vocalizations, share parallels in terms of being a structured form of communication.&lt;/p&gt; &lt;p&gt;The application of LSTM RNNs in predicting the current positions of elephantfish based on past positions not only addresses a significant gap in the study of aquatic animal behavior but also sets the stage for future research in this area. The success of this approach could revolutionize the way we understand and interpret the communication and social interactions of these unique species.&lt;/p&gt; &lt;h2 id=&quot;methodology&quot;&gt;Methodology&lt;/h2&gt; &lt;h3 id=&quot;data-collection&quot;&gt;Data Collection&lt;/h3&gt; &lt;p&gt;Collaborating with labs at MIT and Columbia, we have gained access to a diverse and comprehensive dataset of elephantfish electric communication signals. The dataset encompasses signals recorded in various environmental conditions, capturing the nuances of communication in different contexts. The recordings include instances of social interaction, mating rituals, and responses to external stimuli.&lt;/p&gt; &lt;h3 id=&quot;data-preprocessing&quot;&gt;Data Preprocessing&lt;/h3&gt; &lt;p&gt;The raw electric signal data require extensive preprocessing to extract meaningful features for the deep learning models. This involves filtering, noise reduction, and segmentation to isolate individual communication events. Given the temporal nature of the signals, we will focus on capturing time-dependent features that are crucial for LSTM RNNs.&lt;/p&gt; &lt;h3 id=&quot;model-architecture&quot;&gt;Model Architecture&lt;/h3&gt; &lt;p&gt;Our chosen model architecture revolves around Long Short-Term Memory (LSTM) Recurrent Neural Networks. LSTMs are well-suited for modeling sequences with long-term dependencies, making them ideal for capturing the temporal dynamics of elephantfish communication signals. The network will be designed to take into account the sequential nature of the signals, allowing for effective learning of patterns over time.&lt;/p&gt; &lt;h3 id=&quot;training&quot;&gt;Training&lt;/h3&gt; &lt;p&gt;The training process involves exposing the LSTM network to the preprocessed dataset, allowing it to learn and adapt to the patterns within the electric signals. The model’s performance will be iteratively refined through multiple training sessions, adjusting hyperparameters to optimize for accuracy and generalization.&lt;/p&gt; &lt;h3 id=&quot;evaluation&quot;&gt;Evaluation&lt;/h3&gt; &lt;p&gt;The evaluation phase includes testing the trained model on a separate set of elephantfish communication signals not seen during training. This assesses the model’s ability to generalize its learning to new and unseen data. Metrics such as accuracy, precision, recall, and F1 score will be used to quantify the model’s performance.&lt;/p&gt; &lt;h2 id=&quot;experiments-and-results&quot;&gt;Experiments and Results&lt;/h2&gt; &lt;h3 id=&quot;experiment-1-signal-reconstruction&quot;&gt;Experiment 1: Signal Reconstruction&lt;/h3&gt; &lt;p&gt;Our first experiment aims to assess the model’s ability to reconstruct the original electric signals from the learned representations. This involves comparing the reconstructed signals with the original signals using established metrics for signal similarity.&lt;/p&gt; &lt;h3 id=&quot;experiment-2-pattern-recognition&quot;&gt;Experiment 2: Pattern Recognition&lt;/h3&gt; &lt;p&gt;In the second experiment, we evaluate the model’s performance in recognizing and categorizing different patterns within the elephantfish communication signals. This includes identifying specific sequences associated with social interactions, mating rituals, and responses to external stimuli.&lt;/p&gt; &lt;h3 id=&quot;results&quot;&gt;Results&lt;/h3&gt; &lt;p&gt;Preliminary results indicate promising performance in both signal reconstruction and pattern recognition tasks. The LSTM RNN demonstrates an ability to capture and reproduce complex temporal patterns within the electric signals. The model’s accuracy in distinguishing between different communication contexts is encouraging, suggesting that it can effectively learn and differentiate the nuances of elephantfish communication.&lt;/p&gt; &lt;p&gt;The experiments involved training the RNNs on the collected dataset, followed by validation and testing phases. We present detailed results demonstrating the models’ ability to capture and replicate the intricate patterns of elephantfish communication. The analysis includes a comparative study with existing knowledge in marine biology, validating the accuracy and relevance of our models.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%201-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%201-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%201-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%201.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%202%20--%20comparison%20epoch%2010-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%202%20--%20comparison%20epoch%2010-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%202%20--%20comparison%20epoch%2010-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%202%20--%20comparison%20epoch%2010.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%203%20feature%20distributions-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%203%20feature%20distributions-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%203%20feature%20distributions-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%203%20feature%20distributions.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%204%20feature%20conclusions%20heatmap-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%204%20feature%20conclusions%20heatmap-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%204%20feature%20conclusions%20heatmap-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-elephantfish-model/figure%204%20feature%20conclusions%20heatmap.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;discussion-and-conclusions&quot;&gt;Discussion and Conclusions&lt;/h2&gt; &lt;p&gt;The successful application of LSTM RNNs to model elephantfish communication signals represents a significant step forward in our understanding of non-human communication systems. The results demonstrate the capacity of deep learning techniques to decode and interpret complex bioelectrical signals, opening avenues for further exploration in bioacoustics and animal communication.&lt;/p&gt; &lt;p&gt;The ability to reconstruct signals and recognize patterns within elephantfish communication provides a foundation for future studies on the sociolinguistic and pragmatic aspects of their interactions. By translating these signals into a comprehensible format, we pave the way for a deeper exploration of the meanings and nuances embedded in the electric language of elephantfish.&lt;/p&gt; &lt;p&gt;Our research marks a significant stride in understanding non-human communication systems, demonstratint the ability to predict the movement and communication patterns of elephantfish. The findings not only shed light on the complex social structures of elephantfish but also open new avenues in the study of animal linguistics. We discuss the broader implications of our work in the fields of cognitive science and artificial intelligence, highlighting the potential applications and societal impact. Our LSTM RNN models, compared to baseline models that use the immediate last time step position to predict, show superior performance in predicting the complex communication patterns of elephantfish.&lt;/p&gt; &lt;p&gt;This superiority highlights the effectiveness of our LSTM RNNs in capturing the intricate temporal dynamics of elephantfish communication. Moreover, our method of processing raw electric data has been optimized through trial and error, finding that skipping exactly every 5 data points results in the lowest loss, demonstrating the importance of fine-tuning data preprocessing in machine learning models.&lt;/p&gt; &lt;h2 id=&quot;challenges-and-future-directions&quot;&gt;Challenges and Future Directions&lt;/h2&gt; &lt;p&gt;This project stands at the intersection of technology and biology, with the potential to significantly advance our understanding of animal communication. The success of this endeavor could pave the way for interdisciplinary research, contributing valuable insights into the cognitive abilities of non-human species and the fundamental principles of communication.&lt;/p&gt; &lt;p&gt;The research conducted on elephantfish communication using LSTM RNNs has yielded insights that significantly advance our understanding of non-human communication systems. Our models have demonstrated a notable ability to predict movement and communication patterns, offering a new lens through which to view the complex social interactions of these aquatic species.&lt;/p&gt; &lt;p&gt;This is a large scale long term collaboration between a few labs, and in the future we will utilize more of the data from a marine biology lab at Columbia to interpret the electric signals. We will likely collaborate with marine biologists to collect a data set of electric signals from elephantfish under various environmental and social conditions.&lt;/p&gt; &lt;p&gt;Comparatively, our approach has shown improvements over traditional models, providing a more nuanced understanding of the temporal dynamics in elephantfish communication. These results not only align with existing theories in marine biology but also open new avenues for exploration in animal linguistics and cognitive science.&lt;/p&gt; &lt;p&gt;However, this study is not without its limitations. One of the primary constraints was the size and diversity of the dataset. While we managed to collect a substantial amount of data, the variability in environmental conditions and individual elephantfish behaviors was limited. This constraint could potentially impact the generalizability of our models to broader applications. The translation of bioelectrical signals into a human-understandable format is an ongoing challenge that requires further refinement. Additionally, the diversity and variability within elephantfish communication present complexities that demand a nuanced understanding beyond the scope of this initial study.&lt;/p&gt; &lt;p&gt;Another limitation lies in the inherent complexities of LSTM RNNs, which, while powerful, can sometimes become “black boxes.” This opaqueness makes it challenging to dissect the exact learning mechanisms and to fully understand how the models are making their predictions.&lt;/p&gt; &lt;p&gt;Our study marks a significant step forward in the field but also highlights areas for further research. Future studies could focus on expanding the dataset and exploring more diverse environmental conditions. Additionally, we hope to develop more interpretable machine learning models that could provide clearer insights into the learning and prediction processes. One thing we hope to do is to convert back the predicted positions of fishes to the pixel positions in the tank, this way we can have a more visual intuition about how our model is predicting the positions.&lt;/p&gt; </content> </entry> <entry> <title>Exploring Image-Supervised Contrastive Diffusion - A Comparative Analysis with Applications in Image-to-Video Generation</title> <link href="https://deep-learning-mit.github.io/blog/2023/contrastivediffusion-image2video/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/contrastivediffusion-image2video</id> <content type="html">&lt;h2 id=&quot;introduction-and-motivation&quot;&gt;Introduction and Motivation&lt;/h2&gt; &lt;p&gt;With recent advances in computer vision and generative AI, we all have observed the various feats that diffusive models have achieved in conditional image generation. These models have demonstrated unparalleled ability in creativity, fidelity, and relevance when generating images from text prompts. Given this explosive success of diffusion for the task of image generation, the idea of applying the same concepts to conditional video generation seems like a logical follow-up. Yet, the field still lacks robust and compelling methods for conditional video generation with diffusion models. This raises the question: why might this be? Or perhaps a follow-up: what makes videos so hard in comparison to images?&lt;/p&gt; &lt;p&gt;In an attempt to address our first question, if we take a brief dive into previous literature, we will find that the issue is not a lack of effort. Ho et al. &lt;d-cite key=&quot;ho2022video&quot;&gt;&lt;/d-cite&gt;, Zhang et al. &lt;d-cite key=&quot;2023i2vgenxl&quot;&gt;&lt;/d-cite&gt;, and Chen et al. &lt;d-cite key=&quot;chen2023videocrafter1&quot;&gt;&lt;/d-cite&gt;, all explore this idea, yet the results from these methods are not nearly as exciting as the results we see in images. But why is this?&lt;/p&gt; &lt;p&gt;Perhaps the answer lies in the solution to our second question. One of the most obvious complexities that videos have over images is also perhaps one of the most difficult: the temporal dependence between frames. But why is this relationship so hard for diffusion models? Following the work of Zhu et al. &lt;d-cite key=&quot;zhu2022discrete&quot;&gt;&lt;/d-cite&gt;, we hypothesize that this is because the implicit learning of diffusive steps between images in a video is too complex of a problem for a diffusion model; relying on the model to learn the implicit relationship between representations of video frames is infeasible from a training and convergence standpoint. If we can instead learn diffusive steps over a more regularized learned latent space, the optimization problem can be greatly simplified and the diffusion model will in theory be more robust.&lt;/p&gt; &lt;p&gt;To do so, we introduce a new framework for fine-tuning diffusion models when given images in addition to text as conditional information, targeting this challenge of making the model’s use of the latent space more robust. Specifically, we utilize contrastive learning techniques to ensure that the model learns consistency between latents from different image domains, which we first validate on the easier image-to-image (I2I) case before moving into image-to-video (I2V).&lt;/p&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;p&gt;Taking a step back to examine the current state of research, let’s first take a look at what current I2I models look like.&lt;/p&gt; &lt;h3 id=&quot;image-to-image-models&quot;&gt;Image-to-Image Models&lt;/h3&gt; &lt;p&gt;In the field of image-to-image, there are two main approaches, using images to control the model output, and modifying the image itself.&lt;/p&gt; &lt;p&gt;The first approach is characterized by work like ControlNet and T2I &lt;d-cite key=&quot;mou2023t2i&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;zhang2023adding&quot;&gt;&lt;/d-cite&gt;, which freeze the overall diffusion network and instead fine-tune a lightweight adapter model on the top. This adapter is typically a combination of stable diffusion layers that have an additive effect on the model as a whole, trained using the fine-tuning dataset. However, these models are largely still built for text-to-image tasks, using the input images as conditioning for the input text prompt, such as a wireframe image for poses. However, this does not allow modification of the image itself, simply using the image as guidelines during the diffusion process itself, meaning that its’ style is not preserved.&lt;/p&gt; &lt;p&gt;The second method is more related to maintaining both the style and content of the original image, and instead directly fine-tunes the diffusion network to actually use the input images. The first such model for this purpose is the original pix2pix architecture, which while built for GANs, still carries vital lessons to this day. By fine-tuning a loss that actually involves the mapping between input and output image, the model learns to actually adapt the image while keeping other relevant contexts the same &lt;d-cite key=&quot;pix2pix2017&quot;&gt;&lt;/d-cite&gt;. After this, the Palette model for generalist, multi-task diffusion trained a diffusion model from scratch for multiple different tasks &lt;d-cite key=&quot;saharia2022palette&quot;&gt;&lt;/d-cite&gt;. However, then Instruct-Pix2Pix built on the original pix2pix architecture, taking a pre-trained diffusion model and conditioning it on both the noisy text latent and the input image latent, meaning that the training latent had both and therefore would fully train on the input image latent &lt;d-cite key=&quot;brooks2022instructpix2pix&quot;&gt;&lt;/d-cite&gt;. This architecture is presented below.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Traditional diffusion architecture &lt;/div&gt; &lt;p&gt;For the purpose of this blog, we study Instruct-Pix2Pix like fine-tuning schemes, since they align with what we need for video-based studies, maintaining content of the previous image while making small modulations based on the input text.&lt;/p&gt; &lt;h3 id=&quot;image-to-video-models&quot;&gt;Image-to-Video Models&lt;/h3&gt; &lt;p&gt;Moving to I2V, we find that current image-to-video frameworks typically still use a traditional diffusion architecture, going straight from text and image representations to an output image. However, this naive approach struggles with serious issues like frame clipping and loss of contextual information, which is expected since noise-based sampling can easily throw off the output of individual frames.&lt;/p&gt; &lt;p&gt;Hence, Ho et al. in 2022 proposed the first solution, supplementing conditional sampling for generation with an adjusted denoising model that directly forces image latents to be more similar to the corresponding text latents &lt;d-cite key=&quot;ho2022video&quot;&gt;&lt;/d-cite&gt;. While this achieved improved results over the straightforward diffusion approach, this often forces the model to stick too closely to the text latent, resulting in incoherent videos.&lt;/p&gt; &lt;p&gt;To solve this issue, two recent approaches from Chen et al. and Zhang et al. have proposed methods to augment the video diffusion models themselves. Chen et al. uses the image encodings from CLIP-like language embeddings in an encoder-decoder language model, feeding the CLIP encodings at each step into a cross-attention layer that generates attention scores with the current video generation &lt;d-cite key=&quot;chen2023videocrafter1&quot;&gt;&lt;/d-cite&gt;. In doing so, additional coherence between frames is achieved. On the other hand, Zhang et al. use multiple encoders, with CLIP and VQ-GAN concatenated before two stages of diffusion model training, which they claim provides the hierarchical learning required to learn the temporal processing &lt;d-cite key=&quot;2023i2vgenxl&quot;&gt;&lt;/d-cite&gt;. However, both these models are extremely data-heavy and still suffer from hallucination and frame skipping.&lt;/p&gt; &lt;h3 id=&quot;contrastive-models&quot;&gt;Contrastive Models&lt;/h3&gt; &lt;p&gt;To remedy these issues in diffusion models, Ouyang et al. and Zhu et al. posit that the implicit representation learning objective in diffusion models is the primary cause of the slow convergence and hallucination issues. Specifically, diffusion models do not directly compare their output to their input, as in contrastive models, instead performing a variational approximation of the negative log-likelihood loss over the full Markov chain. Instead, Ouyang and Zhu propose to train the diffusion model to output a structured latent in the latent space of a contrastive model like a VQ-VAE, which then reconstructs the output image &lt;d-cite key=&quot;zhu2022discrete&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;ouyang2023improving&quot;&gt;&lt;/d-cite&gt;. In doing so, a contrastive term can be added to the loss of the diffusion model, maximizing the mutual information between the structured (output) latent and input latent, leading to stronger correlations between input and output, and hence improved convergence. Hence, this approach seems to have potential in fixing the hallucination and coherence issues in video diffusion models, without the need for added complexity.&lt;/p&gt; &lt;h2 id=&quot;our-proposal&quot;&gt;Our Proposal&lt;/h2&gt; &lt;p&gt;Thus, we propose a novel method for conditional image-to-image generation (generating images given a starting frame and text description) by training the diffusion model to actually utilize the regularized latent space in which a diffusion model can operate. Following the line of thought introduced above, we hypothesize that under such a formulation, the diffusion model is much more robust to temporal inconsistency, because of the regularity in the latent space. For example, if we imagine a highly regularized latent space, we will find all logical next frames for a given anchor frame clustered very closely around the anchor in this latent space. Therefore, any step the diffusion model takes would produce valid subsequent frames; it suffices simply for the model to learn which direction to go given the conditioned text prompt.&lt;/p&gt; &lt;h2 id=&quot;model-architecture&quot;&gt;Model Architecture&lt;/h2&gt; &lt;h3 id=&quot;image-to-image&quot;&gt;Image to Image&lt;/h3&gt; &lt;p&gt;Given a base pretrained diffusion model, such as Runway ML’s StableDiffusion 1.4, which is the model used in this blog, it consists of various components. The three that are the most important are the VAE image encoder/decoder, the UNet, and the CLIP text encoder. The VAE begins by learning to transform images into latents and vice-versa, which is used to compress the input image and decode the output latent in the original Instruct-Pix2Pix stack. On the other hand, the UNet predicts the noise in the denoising part of the pipeline, whereas the CLIP text encoder encodes the input text.&lt;/p&gt; &lt;p&gt;In terms of the general diffusion model, we use the traditional diffusion loss,&lt;/p&gt; \[\mathcal{L} = \mathbb{E}[(\epsilon - \epsilon_\theta(x_t))^2]\] &lt;p&gt;which essentially encodes the mean squared error loss between the added noise and the noise that is predicted by the UNet. This pipeline is illustrated in the below image.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(4)-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(4)-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(4)-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(4).png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Construction of the MSE loss for a traditional diffusion fine-tuning stack. &lt;/div&gt; &lt;p&gt;However, this loss does not encode anything of the relation between the frames themselves, which has the potential to lead to low coherence between source and target image, and thus lead to poor output quality. However, contrastively trained models like CLIP have shown strong correlative behavior between multiple modalities in the past, like between text and image, which is why we move towards contrastive losses.&lt;/p&gt; &lt;p&gt;In traditional contrastive learning, we typically have our classes divided by our dataset, such as for shape, as shown in this example of a shape dataset taken from the fourth homework of 6.s898:&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; An example of a contrastive dataset for shapes &lt;/div&gt; &lt;p&gt;For this contrastive learning dataset, we have images that are well classified, but in terms of our image to image task, there is no such easy classification. Instead, we adopt the notion that in such a dataset, with a batch size that is small relative to the size of the dataset, each image will be reasonably different from the other images. Also because we don’t want to cluster the latent space, as the VAE is fully pretrained in the case of the diffusion fine-tuning methodology, we don’t need to actually push similar items between the test set closer together, only push the diffusion output closer to the input conditioning.&lt;/p&gt; &lt;p&gt;Hence, for this task, we consider each image within the larger batch as a negative sample, only using the corresponding latent in our optimization task as the positive sample. Also, given that we want both similarity to the input image and the target image, we want our loss to look like&lt;/p&gt; \[\mathcal{L} = \mathcal{L}_{MSE} + \mathcal{L}_{c, i} + \mathcal{L}_{c, t}\] &lt;p&gt;where c indicates contrastive and i, t indicate input and target, respectively.&lt;/p&gt; &lt;p&gt;For the images, they are encoded by the VAE, which has learned structure due to its Gaussian training objective in the ELBO loss, which means we can directly dot product the latents when calculating the contrastive loss:&lt;/p&gt; \[\mathcal{L}_c = \mathbb{E}[\frac{e^{x_+^{T}x}}{\sum_{x&apos; \in \{x_+, x_{-} \}} e^{x&apos;^{T}x}}]\] &lt;p&gt;This is calculated easily using a matrix multiplication and a cross entropy loss. Now, since we compute the contrastive loss using the predicted latent, and not the noise, we also add on a constructive aspect to our diffusion model. From the final noise prediction, the model also generates the predicted latent using the noise scheduler:&lt;/p&gt; \[x_0 = \frac{1}{\sqrt{\bar{\alpha_t}}}(x_t \pm \sqrt{1 - \bar{\alpha_t}}\epsilon_\theta(t))\] &lt;p&gt;where alpha is the cumulative products of the alphas in the noise scheduler. These predicted final latents are then used directly in the contrastive loss formula. A visualization of how we calculate our contrastive loss can be found below:&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(3)-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(3)-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(3)-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(3).png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Construction of the MSE + Contrastive loss for our fine-tuning stack. &lt;/div&gt; &lt;p&gt;We note that in this case, we must scale the losses for numerical stability. The model we train with has latents of dimension 4 by 32 by 32, and while the MSE is scaled from 0 to 4 (due to pixel values from 1 to -1), the cross entropy loss is not. Indeed, many of these dot products are on the order of 4000, so we choose a high temperature of 1 to prevent NaN computations and then scale the losses by 4000, which is chosen because it scales the effect of each pixel in the dot product to around the same order as that in the MSE, which is averaged over all 4096 values in the latent.&lt;/p&gt; &lt;h3 id=&quot;image-to-video&quot;&gt;Image to Video&lt;/h3&gt; &lt;p&gt;Now, for image to video, the training process of such a model involves the optimization of the above diffusion/contrastive loss based on a given pair of nearby video frames, as well as the corresponding text description for that video. This procedure works well because in a video, we must train the model to learn the next frame, so just like how masked language models are asked to predict masked tokens from a sequence, we ask the diffusion model to predict a masked frame from the given frame. On top of that, the text prompt, which often still provides the majority of the guidance for the video as a whole is already conditioned using the MSE loss, while the contrastive loss optimizes the similarity to previous frames. Otherwise, this is trained the same as a traditional diffusion model.&lt;/p&gt; &lt;p&gt;During inference, we generate a video through the following process. First, an initial frame and the text description are encoded into our latent space using the VAE encoder and CLIP encoder, respectively. Now, we run an arbitrary number of passes through our diffusion model, generating a latent at each step, which is then passed in as the conditioning frame for the next forward pass. Finally, we decode the latent at each time step to obtain our video frame at that time step; stringing these frames together produces our video.&lt;/p&gt; &lt;p&gt;From a more theoretical perspective, this method essentially aims to restrict the diffusion model’s flexibility to paths within a highly regularized, lower dimensional latent space, as opposed to the entire space of images that classical diffusion-based approaches can diffuse over. Such a restriction makes it much harder for the diffusion model to produce non-sensible output; the development of such a method would therefore enable the robust generation of highly temporally consistent and thus smooth videos. We also imagine the value of producing such a latent space itself. An interesting exercise, for example, is taking an arbitrary continuous path along vectors within a perfectly regular latent space to obtain sensible videos at arbitrary framerates.&lt;/p&gt; &lt;h2 id=&quot;data&quot;&gt;Data&lt;/h2&gt; &lt;p&gt;Now, we explain where we got our data from.&lt;/p&gt; &lt;p&gt;For text-conditioned image-to-image generation, we train on the Instruct-Pix2Pix dataset from HuggingFace, sampling 20k samples from the original training set used in the paper (timbrooks/instructpix2pix-clip-filtered). Our test and evaluation sets consist of 500 nonoverlapping samples from this same set &lt;d-cite key=&quot;brooks2022instructpix2pix&quot;&gt;&lt;/d-cite&gt;. This dataset consists of samples with input images, edited images, input prompts, edited prompts, and an edit string that describes the edit that was made. An example is presented below:&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; An example of a sample from the Instruct-Pix2Pix dataset. &lt;/div&gt; &lt;p&gt;For text-conditioned image-to-video generation, we experimented with the use of two different video-caption datasets: MSR-VTT and WebVid-10M. Due to the high dissimilarity between the properties of the two datasets, we tested the finetuning performance of both our contrastive model and non-contrastive models on both datasets. MSR-VTT contains 10k clips scraped from a wide range of domains, with multiple human-generated captions for each video. WebVid, on the other hand, contains 10M video clips compiled from stock image sources, with captions corresponding to the stock photo titles. For WebVid10M, we only take from the 2.5M subset. For both datasets, samples were generated to follow the Instruct-Pix2Pix data formulation (original image, edit prompt, edited image) using the following strategy:&lt;/p&gt; &lt;p&gt;First, we sample 25k and 10k videos from WebVid-10M and MSR-VTT, respectively. We aim to sample roughly an equal number of samples from each video for a total of 20k (original image, edit prompt, edited image) triplets. We ignore videos longer than 30 seconds in length to minimize the probability of temporal inconsistency within a given video. Then, for each video, we choose a random frame in the video (the original video fps is 25; but these frames are too close together, so we say that only one out of every 5 video frames is a valid selection target) to be our “original” image. The video’s caption is our “edit” prompt. To select our “edited” image, we note that we are optimizing the model to produce the next frame, while maintaining consistency between frames. Therefore, to select the “edited” image, we sample a normal distribution with standard deviation of 10 valid frames (50 frames in the original video), or two seconds, to select a frame after our “original” image as our “edited” image. A sample processed image from WebVid is included below.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; An example of a processed sample from WebVid. &lt;/div&gt; &lt;h2 id=&quot;experiments&quot;&gt;Experiments&lt;/h2&gt; &lt;p&gt;To assess the efficacy of our newly proposed strategy, we run experiments on both the original Instruct-Pix2Pix task of text-conditioned image-to-image generation, as well as the task of text-conditioned image-to-video generation, against the baseline Instruct-Pix2Pix model. The original Instruct-Pix2Pix task is run to confirm that our model, after obtaining coherency, does not lose significant expressivity. On the other hand, we expect the image-to-video model to have comparable expressivity to the baseline on a task where coherency is significantly more important.&lt;/p&gt; &lt;p&gt;All of these evaluations and experiments were performed using the Accelerate library and HuggingFace Diffusers, &lt;d-cite key=&quot;von-platen-etal-2022-diffusers&quot;&gt;&lt;/d-cite&gt;, building off of their Instruct-Pix2Pix codebase. The model is RunwayML’s Stable Diffusion v1.5 release. For the task of image-to-image generation, we trained both the baseline Instruct-Pix2Pix and our model for 9000 training steps on 4xA100-80GB with a batch size of 16 and a learning rate of 5e-5, which took on the order of 12 hours. For the image-to-video generation task, we trained both baseline Instruct-Pix2Pix and our contrastive model for 4500 training steps at a learning rate of 1e-5 and a batch size of 16 due to overfitting issues at higher # of training steps and higher learning rates, possibly due to the repetitiveness of our dataset. Note that we had a limited ability to hyperparameter tune/ablate, since each diffusion fine tuning run took multiple hours at a minimum, and we were operating on a minimal budget of spare A100s when they were available from our labs, so those results are not shown in this blog.&lt;/p&gt; &lt;p&gt;We then evaluate on the test splits of the corresponding datasets described above (for image-to-video generation, we evaluate on the test split of WebVid, since MSRVTT’s testing set has a number of non-corresponding video-prompt pairs and also very jittery videos).&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;Now, we explain our results. For both tasks, we assess two metrics: the first is the Frechet Inception Distance (FID) &lt;d-cite key=&quot;FID &quot;&gt;&lt;/d-cite&gt;between the target image and the predicted image. The FID calculates the similarity between the distribution of images and the distribution of generated images, so a lower FID is considered better, as the distributions are more similar. Note that since our models are still technically image-to-image models and not image-to-video at their core, to evaluate the base models we use FID instead of its video counterpart FVD. For our second metric, we use CLIP Scores &lt;d-cite key=&quot;hessel-etal-2021-clipscore &quot;&gt;&lt;/d-cite&gt;between the source image and the edit prompt, the target image and the edit prompt, and the predicted image and the edit prompt for the source image, predicted image, source image description (from the Instruct-Pix2Pix dataset), and target image description. The CLIP score can be thought about as a measure of the similarity between the prompt and the image, with a higher score being better, referring to higher similarity.&lt;/p&gt; &lt;p&gt;These metrics are used to evaluate our base image-to-video models as well, as they both determine the amount of prompt following and fidelity we can determine in our videos.&lt;/p&gt; &lt;h3 id=&quot;image-to-image-results&quot;&gt;Image to Image Results&lt;/h3&gt; &lt;p&gt;For text-conditioned image-to-image generation, we observe that our models have these FID and CLIP scores:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;FID&lt;/th&gt; &lt;th&gt;CLIP (source - prompt)&lt;/th&gt; &lt;th&gt;CLIP (gen - prompt)&lt;/th&gt; &lt;th&gt;CLIP (target - prompt)&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Ours&lt;/td&gt; &lt;td&gt;158.8&lt;/td&gt; &lt;td&gt;21.7&lt;/td&gt; &lt;td&gt;&lt;strong&gt;24.4&lt;/strong&gt;&lt;/td&gt; &lt;td&gt;24.1&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Baseline&lt;/td&gt; &lt;td&gt;&lt;strong&gt;142.4&lt;/strong&gt;&lt;/td&gt; &lt;td&gt;21.7&lt;/td&gt; &lt;td&gt;&lt;strong&gt;24.4&lt;/strong&gt;&lt;/td&gt; &lt;td&gt;24.1&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Our model matches the baseline on CLIP score, meaning that our model exhibits similar prompt following characteristics as the baseline. On top of that, our FID is only slightly higher than the baseline, meaning that the expressivity has not decreased significantly. However, images do not have similarly robust coherence metrics, so we evaluate these qualitatively.&lt;/p&gt; &lt;h4 id=&quot;coherence&quot;&gt;Coherence&lt;/h4&gt; &lt;p&gt;On the subject of coherence, we provide some image output pairs in the figure below:&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; An example of a sampled images, with prompt of &quot;make the mountain snowy&quot; and &quot;make it futuristic,&quot; respectively. &lt;/div&gt; &lt;p&gt;For both scenes, while the baseline diffusion model follows the prompt more fully, which may match the output distribution (hence getting a better FID score), we notice several key contrastive differences, which would impact coherence. In the mountain for example, the forest disappears in the baseline version, which also doesn’t maintain the painting-like style. On top of that, in the Eiffel tower case, the Eiffel tower rotates in the non-contrastive version. These observations lead to the idea that the contrastive model may be prioritizing coherence as desired, despite some loss in performance. Similar patterns are observed throughout the dataset.&lt;/p&gt; &lt;h3 id=&quot;image-to-video-results&quot;&gt;Image to Video Results&lt;/h3&gt; &lt;p&gt;For text-conditioned image-to-video generation, we observe that our models have the FID and CLIP scores in the table below:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;FID&lt;/th&gt; &lt;th&gt;CLIP (source - prompt)&lt;/th&gt; &lt;th&gt;CLIP (gen - prompt)&lt;/th&gt; &lt;th&gt;CLIP (target - prompt)&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Ours (trained on WebVid)&lt;/td&gt; &lt;td&gt;&lt;strong&gt;102.9&lt;/strong&gt;&lt;/td&gt; &lt;td&gt;29.9&lt;/td&gt; &lt;td&gt;27.5&lt;/td&gt; &lt;td&gt;29.8&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Ours (trained on MSR-VTT)&lt;/td&gt; &lt;td&gt;149.3&lt;/td&gt; &lt;td&gt;29.9&lt;/td&gt; &lt;td&gt;27.6&lt;/td&gt; &lt;td&gt;29.8&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Baseline (trained on WebVid)&lt;/td&gt; &lt;td&gt;*&lt;/td&gt; &lt;td&gt;*&lt;/td&gt; &lt;td&gt;*&lt;/td&gt; &lt;td&gt;*&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Baseline (trained on MSR-VTT)&lt;/td&gt; &lt;td&gt;172.3&lt;/td&gt; &lt;td&gt;29.9&lt;/td&gt; &lt;td&gt;**29.4 **&lt;/td&gt; &lt;td&gt;29.8&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Note that in this case, we include asterisks for the baseline numbers on WebVid because it produces NSFW content as marked by the HuggingFace Diffusers library more than 25% of the time. This means that the metrics are not directly comparable as we were unable to find a validation set on which we could evaluate the models quantitatively on even ground. Nonetheless, we still include the WebVid baseline in our qualitative analysis.&lt;/p&gt; &lt;p&gt;Looking at the rest of the metrics, the baseline on MSR-VTT has a decently higher correlation with the prompt than the contrastive model. This makes sense, as the baseline is trained only the objective of denoising the prompt latent, while we add the contrastive term. On the other hand, we have a significantly lower FID score of the MSR-VTT trained models, which means that the distributions of our output data relative to the target output data was more similar, which is probably due to the fact that our high coherence is useful in tasks where source and target distributions are similar.&lt;/p&gt; &lt;h4 id=&quot;qualitative-video-generation&quot;&gt;Qualitative Video Generation&lt;/h4&gt; &lt;p&gt;For a better understanding of the in-context performance of our model and to make up for the invalidity of the baseline model trained on the WebVid dataset above, we also perform qualitative assessments of longer videos generated by our models and the baselines. For each of 4 selected starting frames, we use a prompt generated from the sequestered part of WebVid to generate 5 subsequent frames for the video:&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; An example of sampled videos for a prompt of a factory emitting smoke. &lt;/div&gt; &lt;p&gt;From these generated videos, we observe that our models are significantly better at generating coherent frames, as we expected. In particular, we see that the MSR-VTT baseline model deviates heavily from the starting image on the very next frame, while our MSR-VTT model largely retains the original characteristics despite some content drifting after frame 3. WebVid noticeably performs better on the baseline, but does still observe some signs of progressive degradation in our predicted outputs, along with lack of motion in contrast to the prompt for the baseline model. This progressive degradation is likely due to small levels of inclarity in each subsequent frame being compounded over multiple frames; due to coherence between frames, the subsequent frames will contain strictly more inclarity than the previous. On the other hand, our model on WebVid sees less degradation on top of actually having coherent motion of smoke billowing, showing successful output.&lt;/p&gt; &lt;p&gt;Overall though, WebVid was observed to have significantly better results than MSR-VTT, which is likely attributed to the greater quality of the dataset and less jittery videos.&lt;/p&gt; &lt;h2 id=&quot;discussion-and-conclusion&quot;&gt;Discussion and Conclusion&lt;/h2&gt; &lt;p&gt;In this project, we explored the idea of using contrastive losses to improve the coherency between input and output images in the context of text-conditioned image-to-image generation. In particular, we study the utility of this ability to generate highly coherent diffusion results in I2V, where the current state-of-the-art suffers heavily from temporal inconsistency. We evaluate our models on the classic Instruct Pix2Pix task to assess its preservation of expressive ability and conclude that no significant degradation of expressive ability was observed. We then evaluate our contrastive strategy on text-conditioned image-to-video synthesis and find that our models outperform the classic non-contrastive formulation in video generation tasks when evaluated on CLIP Score and KID.&lt;/p&gt; &lt;p&gt;Through our experiments, we have also identified some limitations of our methods and potential areas for improvement. First, we note that our model has trouble with the previously mentioned problem of progressive degradation. A possible solution to this problem could be introducing GAN training to encourage the model to produce higher-fidelity images. More robust methods could also be used (instead of sampling subsequent frames) to generate positive samples, which would increase our model’s robustness. We also notice that both our model and the baseline have trouble with a continuous depiction of motion. This is likely due to the fact that any frame is only conditioned on the previous frame. Conditioning on images multiple frames before the current image would help with this consistency issue, as well as the aforementioned progressive degradation issue. Also, due our loss function’s negative sampling-based approach to training our models, on a dataset with significant amount of repetition like ours, this led to significant overfitting in preliminary runs. On top of that, runs suffered from loss spiking when the numeric instability of cross-entropy loss led to the calculation of NaN losses and exploding gradients, which leads to requiring very low values of learning rate. This could be resolved with better sweeps of hyperparameters for scaling the losses relative to each other or higher quality data. Finally, as alluded to above, more time to do hyperparameter tuning with the training of larger models on larger datasets would likely help with performance in general.&lt;/p&gt; &lt;p&gt;With this study, we examined the use of contrastive loss to improve coherency in latent diffusion, with experiments that demonstrated minimal loss of expressive capabilities and superior consistency in diffusion, resulting in better performance on image-to-video generation. We hope that through this study, we can drive focus toward contrastive loss approaches to obtain higher fidelity results in video generation, accelerating progress in I2V and T2V.&lt;/p&gt; </content> </entry> <entry> <title>Combining Modalities for Better Molecular Representation Learning</title> <link href="https://deep-learning-mit.github.io/blog/2023/combining-modalities-for-better-representation-learning/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/combining-modalities-for-better-representation-learning</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;h3 id=&quot;importance-of-molecular-representation-learning&quot;&gt;Importance of molecular representation learning&lt;/h3&gt; &lt;p&gt;Molecular Representation Learning (MRL) is one of the most important tasks in molecular machine learning, drug design, and cheminformatics. &lt;d-cite key=&quot;mol_rep_review&quot;&gt;&lt;/d-cite&gt; It is central to addressing several key challenges in molecular sciences, including high-quality representation learning for molecular property prediction, &lt;d-cite key=&quot;mol_prop_pred&quot;&gt;&lt;/d-cite&gt; predicting organic reaction outcomes, &lt;d-cite key=&quot;reaction_pred&quot;&gt;&lt;/d-cite&gt; retrosynthesis planning, &lt;d-cite key=&quot;retrosynthesis&quot;&gt;&lt;/d-cite&gt; and generative modeling. &lt;d-cite key=&quot;generative_review&quot;&gt;&lt;/d-cite&gt; Excelling in these domains is essential for the development of new drugs, materials, and catalysts.&lt;/p&gt; &lt;h3 id=&quot;different-ways-to-represent-molecules&quot;&gt;Different ways to represent molecules&lt;/h3&gt; &lt;p&gt;The challenge of learning molecular representations is more complex than in fields like computer vision or natural language processing. This complexity stems from the variety of methods available for encoding molecular structures and the assumptions inherent to each representation. Primarily, there are four ways to represent molecules:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;strong&gt;Fingerprints&lt;/strong&gt;. One of the oldest ways to represent molecules in Quantitative structure–activity relationship (QSAR) modelling. Molecular fingerprints are binary vectors that encode the presence or absence of certain substructures in the molecule. Fingerprints were one of the first ways to get the initial representation of molecules in machine learning problems. &lt;d-cite key=&quot;fingerprints_pred&quot;&gt;&lt;/d-cite&gt;&lt;/li&gt; &lt;li&gt;&lt;strong&gt;String representation&lt;/strong&gt; (e.g. SMILES strings). This approach involves encoding molecular fragments as tokens to form a string. This initial molecules encoding is widely used in generative molecular modeling. &lt;d-cite key=&quot;lang_complex_distr&quot;&gt;&lt;/d-cite&gt;&lt;/li&gt; &lt;li&gt;&lt;strong&gt;2-D graph&lt;/strong&gt;. A popular and intuitive approach where molecules are represented as graphs, with atoms and bonds corresponding to nodes and edges, respectively. With advancements in Graph Neural Networks (GNNs) architectures,&lt;d-cite key=&quot;gnns_review&quot;&gt;&lt;/d-cite&gt; this format is extensively used in molecular property prediction. &lt;d-cite key=&quot;chemprop&quot;&gt;&lt;/d-cite&gt;&lt;/li&gt; &lt;li&gt;&lt;strong&gt;3-D graph&lt;/strong&gt;. The most detailed representation, which includes spatial information about atoms and bonds in addition to the graph structure. Although obtaining 3-D graph representations is challenging, models based on this approach often demonstrate superior performance. Various modeling techniques are applied to 3-D graphs, including invariant and equivariant GNNs. &lt;d-cite key=&quot;schnet,equiv_gnn&quot;&gt;&lt;/d-cite&gt;&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Given these diverse approaches, this work aims to explore various molecular representations and their potential combination for enhanced performance in downstream tasks, such as molecular property prediction. Additionally, this blog post seeks to analyze the representations of small molecules by comparing nearest neighbors in the latent chemical space. We also investigate representations learned by language models trained on SMILES strings.&lt;/p&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;h3 id=&quot;data&quot;&gt;Data&lt;/h3&gt; &lt;p&gt;In this study, we utilized the QM9 dataset to train and evaluate our models. Comprising approximately 133,000 small organic molecules, the dataset includes molecules with up to nine heavy atoms (specifically Carbon, Nitrogen, Oxygen, and Fluorine) and 19 distinct properties. As a well-established benchmark in molecular property prediction research, QM9 offers a comprehensive foundation for our analysis.&lt;d-cite key=&quot;qm9&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;Our primary focus was on predicting the free energy $G$ at 298.15K. To ensure a robust evaluation, we divided the dataset using Murcko scaffolds &lt;d-cite key=&quot;murcko&quot;&gt;&lt;/d-cite&gt; to prevent the same molecular scaffolds from appearing in both the training and testing sets. This division allocates 80% of the data for training, 10% for validation, and the remaining 10% for testing purposes. Additionally, we standardized the target values to have a zero mean and unit variance, aiming for consistency in our predictive modeling.&lt;/p&gt; &lt;h3 id=&quot;models&quot;&gt;Models&lt;/h3&gt; &lt;p&gt;The illustration of the overall approach is presented in Figure 1.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1. Illustration of the overall approach. We use different ways to represent molecules and train different models on these initial encodings. &lt;/div&gt; &lt;p&gt;We use the following models to learn the representations of molecules:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;strong&gt;Fingerprint-based model&lt;/strong&gt;. Utilizing Morgan fingerprints &lt;d-cite key=&quot;morgan&quot;&gt;&lt;/d-cite&gt; with a radius of 2 and 2048 bits, we developed a multilayer perceptron (MLP) featuring six layers, layer normalization, and a varying number of hidden units (ranging from 512 to 256). This model focuses on learning representations from molecular fingerprints.&lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;SMILES-based model&lt;/strong&gt;. For the representation of SMILES strings in the QM9 dataset, we employed a Recurrent Neural Network (RNN) with LSTM cells, comprising three layers and 256 hidden units. This model learns to predict the next token in a SMILES string based on the previous tokens, using cross-entropy loss for training: \(\mathcal{L}_{\text{CE}} = -\sum_{t=1}^{T} \log p(x_t | x_{&amp;lt;t})\)&lt;/p&gt; &lt;/li&gt; &lt;li&gt;&lt;strong&gt;2-D graph-based model&lt;/strong&gt;. To handle 2-D graph representations of molecules, we used a Message Passing Neural Network with four layers, 256 hidden units, sum aggregation, mean pooling, and residual connections between convolution layers. The model updates the nodes’ hidden representations as follows:&lt;/li&gt; &lt;/ol&gt; \[h_i^{\ell+1} = \phi \left( h_i^{\ell}, \frac{1}{|\mathcal{N}_i|}\sum_{j \in \mathcal{N}_i} \psi \left( h_i^{\ell}, h_j^{\ell}, e_{ij} \right) \right)\] &lt;ol&gt; &lt;li&gt;&lt;strong&gt;3-D graph-based model&lt;/strong&gt;. While there are many different architectures to model points in 3-D space, we decided to use one of the simplest architectures — E(n) Equivariant Graph Neural Network (EGNN) &lt;d-cite key=&quot;egnn&quot;&gt;&lt;/d-cite&gt; that is equivariant to rotations, translations, reflections, and permutations of the nodes. We used 4 layers, 256 hidden units, sum aggregation, mean pooling and residual connections between convolution layers to learn the representations of 3-D graphs of molecules that updates the nodes hidden representations according to the equations given in the Figure 1.&lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;training&quot;&gt;Training&lt;/h3&gt; &lt;p&gt;We trained all models using the Adam optimizer with learning rate of $1\cdot10^{-3}$, batch size 32, and 100 epochs. We additionally used &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ReduceLROnPlateau&lt;/code&gt; learning rate scheduler. We used the mean absolute error (MAE) as the metric for evaluation.&lt;/p&gt; &lt;h3 id=&quot;evaluation&quot;&gt;Evaluation&lt;/h3&gt; &lt;p&gt;We used several combination of modalities to evaluate the performance of the models:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;MPNN + FPs: This model integrates the representation learned by the Message Passing Neural Network (MPNN) with the MLP trained on fingerprints, featuring 256 hidden units. It concatenates the representations from MPNN and MLP, using an MLP layer for the final target value prediction.&lt;/li&gt; &lt;li&gt;EGNN + FPs: Similar to the previous model but uses the representation learned by the EGNN.&lt;/li&gt; &lt;li&gt;EGNN + MPNN: This configuration combines the representations from EGNN and MPNN, followed by an MLP for target value prediction.&lt;/li&gt; &lt;li&gt;MPNN + RNN: This model merges representations from MPNN and a pretrained Recurrent Neural Network (RNN). The RNN’s encodings remain static and are not updated during training. However, this model did not converge and was excluded from the final evaluation.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;The results of evaluation of different models on the QM9 dataset are presented in Figure 2.&lt;/p&gt; &lt;div class=&quot;l-page&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-12-12-combining-modalities-for-better-representation-learning/mae.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2. Different models&apos; performance on the QM9 dataset. The models are trained on the same data, but with different representations. The number of parameters is displayed on top of each bar. &lt;/div&gt; &lt;h2 id=&quot;analysis&quot;&gt;Analysis&lt;/h2&gt; &lt;h3 id=&quot;comparison-of-different-models&quot;&gt;Comparison of different models&lt;/h3&gt; &lt;p&gt;As depicted in Figure 2, the EGNN model demonstrates superior performance. A likely explanation is that the QM9 dataset’s labels were calculated using computational methods that leverage the 3-D structure of molecules. The 3-D representation, therefore, proves most effective for this task, with the EGNN adept at capturing crucial 3-D interactions for predicting the target value. Interestingly, simple concatenation of hidden representations seems to dilute the information, resulting in inferior performance. This suggests that combining modalities is a complex endeavor, requiring thoughtful architectural design. &lt;d-cite key=&quot;modality_blending,molecule_sde&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;h3 id=&quot;nearest-neighbors-analysis&quot;&gt;Nearest neighbors analysis&lt;/h3&gt; &lt;p&gt;After the training of the models we performed the nearest neighbors analysis to compare the learned representations of molecules. We took the learned representations of the molecules in the test set and computed the nearest neighbors in the latent chemical space using cosine similarity. Additionally we plotted the PCA reduced representations (Figure 3) and analyzed the nearest neighbors for 4 different molecular scaffolds.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3. PCA reduced representations of the molecules in the test set. The color of the points corresponds to the molecular scaffold. &lt;/div&gt; &lt;p&gt;There are several interesting observations from the nearest neighbors analysis:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;In case of fingerprints reductions the nearest neighbors are far away from the queried molecules in the latent chemical space.&lt;/li&gt; &lt;li&gt;For the reduced learned representations of the molecules in the test set we can see that the nearest neighbors are very close to the queried molecules in the latent chemical space. This is expected as the models were trained to predict the target value and therefore the representations of the molecules that are close in the latent chemical space should have similar target values.&lt;/li&gt; &lt;li&gt;The bottom right plot of Figure 3, showcasing the EGNN + FPs combination reveals very interesting pattern — the reduced chemical space reminds the combination of the reduced chemical spaces of the EGNN and FPs. EGNN’s reduced chemical is more “sparse”, while the representation that learned by MLP is more dense but much more spread out. Another interesting observation is that the combined chemical space is more structured due to the presence of some clustered fragments, which is not present in case of both EGNN and MLP.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Additionally we analyzed the nearest neighbors for 4 different molecular scaffolds. The results for 3 of them are present in Figure 4.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 4. Nearest neighbors for 3 different molecular scaffold instances. Top molecule for each cell is the closest molecule to the queried molecule in the latent chemical space, the bottom molecule is the second closest molecule. &lt;/div&gt; &lt;p&gt;From the Figure 4 we can make some additional observations:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;For the fingerprints similarity, molecules are very similar to the queried molecule. This is expected results because the molecules with the highest matches in the fingerprints are the most similar to the queried molecule. Although, for the third example the second closest molecule is not very similar to the queried molecule.&lt;/li&gt; &lt;li&gt;MPNN, EGNN as well as their combination return the molecules that are very similar to the queried molecule. Because the model was trained to predict the target value, the nearest neighbors are molecules with similar target values (this is not guaranteed for the fingerprints similarity because substructures can be combined in different ways potentially leading to very different molecular properties).&lt;/li&gt; &lt;li&gt;In case of MLP trained on fingerprints, the nearest neighbors can have very different scaffolds. This agrees with the performance of the model on the QM9 dataset — the model is not able to fully capture the molecular structure and therefore the nearest neighbors can have very different scaffolds even though the initial representations were the ones retrieving the most similar molecules (fingerprints).&lt;/li&gt; &lt;li&gt;Interestingly, in case of RNN trained on SMILES strings, the nearest neighbors can have very different scaffolds. This result is expected because RNN was trained to predict next token in the sequence and therefore the nearest neighbors are the molecules with similar SMILES strings. For example, first molecule contains triple bond between two carbon atoms. In the case of the second closest neighbor for first scaffold instance there are two triple bonds between carbon and nitrogen atoms. The scaffold is different, but the SMILES strings are similar.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Overall, the key takeaway is that the more effectively a model performs in the supervised learning phase (excluding the RNN), the more meaningful its nearest neighbors are in terms of molecular structure resemblance. While fingerprint similarity still yields closely matched molecules, the results are not as insightful as those from GNNs, which capture molecular structures with greater nuance and expressiveness.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;h3 id=&quot;results-of-modalities-mixing&quot;&gt;Results of modalities mixing&lt;/h3&gt; &lt;p&gt;Modalities mixing is a very interesting and promising approach for the problems in the field of molecular machine learning. However, architectures should be desinged carefully to achieve the best performance. In our work we showed that simple concatenation of the representations learned by different models can lead to worse performance on the downstream tasks.&lt;/p&gt; &lt;h3 id=&quot;future-work&quot;&gt;Future work&lt;/h3&gt; &lt;p&gt;The obvious direction of future work — to experiment with different architectures for modalities mixing. Another interesting direction is to use the mixed modalities for the generative molecular modeling as string methods still perform better than majority of 3-D generative approaches even though the latter one is more natural. &lt;d-cite key=&quot;benchmarking&quot;&gt;&lt;/d-cite&gt; Therefore, it would be interesting to explore the combination of the string and 3-D graph representations for the generative modeling.&lt;/p&gt; </content> </entry> <entry> <title>Exploring Frobenius and Spectral Normalization in MLPs and Residual networks</title> <link href="https://deep-learning-mit.github.io/blog/2023/WeightDecaySpecNormEffects/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/WeightDecaySpecNormEffects</id> <content type="html">&lt;h2 id=&quot;relevance-and-investigation&quot;&gt;Relevance and Investigation&lt;/h2&gt; &lt;p&gt;Weight normalization in deep learning is vital because it prevents weights from getting too large, thereby improving model’s learning ability, accelerating convergence, and preventing overfitting. One traditional method for weight normalization involves adding the sum of the weights’ Frobenius norms to the loss function. One of the issues with penalizing Frobenius normalization of weight matrices is that it imposes a more strict constraint than may be desired for some model types- it enforces that the sum of the singular values is one, which can lead to weight matrices of rank one, which essentially enforces models to make decisions based on only one feature. In 2018, Spectral normalization emerged as an effective method, especially for Generative Adversarial Networks (GANs), to control the Lipschitz constant of the model and stabilize the training process &lt;d-cite key=&quot;DBLP:journals/corr/abs-1802-05957&quot;&gt;&lt;/d-cite&gt;. Spectral normalization is a more relaxed form of weight normalization that scales weight matrices based on their spectral norms, which is the maximum singular value of the matrix. This method is more relaxed because it does not impose a constraint on the sum of singular values, but rather only on the maximum singular value. This allows for weight matrices of higher rank, which may be more desirable for certain model types. Since 2018, spectral normalization has been used in many GAN architectures, but has not been explored as much in other deep learning scenarios. In fact, there is little published research on other approaches to the spectral view of weight normalization in general.&lt;/p&gt; &lt;p&gt;We introduce two novel normalization techniques inspired by AdamW and motivated by issues caused by penalties in the loss function &lt;d-cite key=&quot;DBLP:journals/corr/abs-1711-05101&quot;&gt;&lt;/d-cite&gt;. Our method, which we call Norm Scaling, takes a training step using a loss function that does not include a norm penalty, then scales the norms of the weight matrices after the step. Comparing our Frobenius and spectral normalization algorithms to each other can provide valuable insights into their advantages and disadvantages in various model architectures through a thorough investigation of their effects on the weight matrices. We aim to understand how the spectral norm of weight matrices change over time and how they affect overall model performance. Furthermore, we want to see how singular values change across architectures and algorithms, determining if certain types of architectures can benefit more from spectral normalization than another. It especially becomes interesting to investigate whether spectral normalization’s superior performance in stabilizing GAN training is generalized to other deep learning scenarios via different architecture types and a different scaling technique. The ultimate goal of this exploration is to deepen our understanding of these normalization techniques to find more intelligent ways to regularize weight matrices in order to acheive less overfitting and improve learning ability.&lt;/p&gt; &lt;h2 id=&quot;norm-scaling&quot;&gt;Norm Scaling&lt;/h2&gt; &lt;p&gt;Let us introduce our novel normalization technique, Norm Scaling. We will first describe the algorithm in the context of Frobenius normalization, then we will describe how it will be applied with spectral normalization. We begin each process by initializing the weight matrices of the model to be orthogonal, which helps prevent gradient numerical stability issues and improve convergence timing. We then multiply each weight matrix, \(W_k\) by \(\sqrt{\frac{d_k}{d_{k-1}}}\) where \(d_k\) is the size of the output at layer \(k\). This enforces the initial spectral norm of each weight matrix to be \(\sqrt{\frac{d_k}{d_{k-1}}}\), and the initial Frobenius Norm to be \(\sqrt{min(d_k, d_{k-1})*\frac{d_k}{d_{k-1}}}\).&lt;/p&gt; &lt;p&gt;In the Frobenius Norm Scaling algorithm training is relatively straightfoward. After we initialize the orthogonal weight matrices but before beginning training, we calculate the Frobenius norm of each weight matrix based on the equation above and save these in our model. On each training step, we first calculate the loss, compute the gradients, and take a step using the optimizer. Then, we calculate the Frobenius norm of each weight matrix, \(W_k\), divide the matrix by this norm, and multiply it by its initial value that we calculated before training:&lt;/p&gt; \[\bar{W}_k = \frac{W_k}{||W_k||_F} * \sqrt{min(d_k, d_{k-1})*\frac{d_k}{d_{k-1}}}\] &lt;p&gt;This ensures that the Frobenius norm of each weight matrix, \(W_k\), is equal to its initial value throughout the entire training process.&lt;/p&gt; &lt;p&gt;The Spectral Norm Scaling algorithm is slightly more mathematically complicated, and required the use of power iteration to make sure training time was feasible. After we initialize the orthogonal weight matrices but before training, we save target spectral norms for each weight matrix, \(W_k\). On each training step, we first calculate the loss, compute the gradients, and take a step using the optimizer. Then, we calculate the first singular value, which is the same as the spectral norm, and the first right singular vector of each weight matrix, \(W_k\), using power iteration. In order to mimimize the difference beween the right singular vector and the power iteration prediction of this vector we use 500 steps. To use power iteration with convolution weight matrices, which have dimension 4, we view them as 2 dimension weight matrices where all dimensions past the first are flattened (this reshaping is the channel-wise decomposition method and was used for similar work in Yang et al., 2020 &lt;d-cite key=&quot;yang2020learning&quot;&gt;&lt;/d-cite&gt;).&lt;/p&gt; &lt;p&gt;To find the first right singular vector and singular value, we use the fact that the top eigenvector and corresponding eigenvalue of \(A^TA\) are the first right singular vector and singular value of A respectively. So using the power method, we compute the top eigenvector and eigenvalue of \(W_k^TW_K\). We then use the fact that \(W_kv_1 = \sigma_1u_1\) to compute \(u_1 = \frac{W_kv_1}{\sigma_1}\).&lt;/p&gt; &lt;p&gt;We then perform the following normalization step:&lt;/p&gt; \[\bar{W}_k = W_k + u_1v_1^T(\sigma^* -\sigma_1)\] &lt;p&gt;Where \(\sigma^*\) is the target spectral norm described above.&lt;/p&gt; &lt;p&gt;Note that this calculation subtracts the best rank one approximation of \(W_k\) from \(W_k\), but adds the same outer product back, scaled by \(\sigma^*\). Note that this does NOT enforce that the new spectral norm is \(\sigma^*\), because it is possible that \(\sigma_2\) is greater than \(\sigma^*\). We hope that this normalization prevents the first outer product of singular vectors from dominating the properties of the weight matrix, thus allowing for better generalization outside of the training distribution.&lt;/p&gt; &lt;h2 id=&quot;experiments&quot;&gt;Experiments&lt;/h2&gt; &lt;p&gt;In order to test our Norm Scaling learning algorithm, we train a variety of models on image classification of the CIFAR100 dataset &lt;d-cite key=&quot;CIFAR100&quot;&gt;&lt;/d-cite&gt;. We train two types of models, MLPs and ResNets &lt;d-cite key=&quot;DBLP:journals/corr/HeZRS15&quot;&gt;&lt;/d-cite&gt;. Our MLPs consist of fully connected layers without biases, where our medium MLP has 5 layers each with 2,000 hidden units and our large MLP has 7 layers each with 4,000 hidden units. Our ResNets are ResNet34 and ResNet50, loaded in without pre-training. We adjust the size of the first convolution layer to 64 channels of 3x3 kernels and the output layer to output 100 classes. We train each of these four models with three different conditions: the first uses no weight scaling, the second uses Frobenius norm scaling, and the third uses spectral norm scaling. We train each model for 200 epochs with a batch size of 512, an initial learning rate of 0.001, and no weight decay. We use the Adam optimizer and a multi-step learning rate scheduler with \(\gamma = 0.1\) applied at epochs 60 and 120. We use the cross entropy loss function for all models. We use the same training hyper-parameters for all models. The models were trained on 4 NVIDIA Tesla A100 GPUs with paralellization handled by the pytorch lightning library.&lt;/p&gt; &lt;p&gt;At the end of training, the MLP with depth 5, width 2000, and no norm scaling had a test accuracy of 25.12% and a test loss of 10.86. The MLP with depth 5, width 2000, and Frobenius norm scaling had a test accuracy of 28.23% and a test loss of 4.47. The MLP with depth 5, width 2000, and spectral norm scaling had a test accuracy of 23.21% and a test loss of 3.53. The MLP with depth 7, width 4000, and no norm scaling had a test accuracy of 23.95% and a test loss of 11.00. The MLP with depth 7, width 4000, and Frobenius norm scaling had a test accuracy of 26.62% and a test loss of 6.10. The MLP with depth 7, width 4000, and spectral norm scaling has a test accuracy of 36.25% and a test loss of 2.63. ResNet34 with no norm scaling had a test accuracy of 70.1% and a test loss of 2.03. ResNet34 with Frobenius norm scaling had a test accuracy of 75.24% and a test loss of 1.46. ResNet34 with spectral norm scaling had a test accuracy of 71.79% and a test loss of 1.78. ResNet50 with no norm scaling had a test accuracy of 73.45% and a test loss of 1.72. ResNet50 with Frobenius norm scaling had a test accuracy of 75.72% and a test loss of 1.40. ResNet50 with spectral norm scaling had a test accuracy of 73.29% and a test loss of 1.63. Full summaries of the changes of these metrics across epochs are plotted below with checkpoints every 10 epochs.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Training accuracy of MLPs. Above right: Training loss of MLPs. Spec refers to models trained with spectral norm scaling, Frob refers to models trained with Frobenius norm scaling, and Baseline refers to models trained with no norm scaling. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Testing accuracy of MLPs. Above right: Testing loss of MLPs. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Training accuracy of ResNets. Above right: Training loss of ResNets. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Testing accuracy of ResNets. Above right: Testing loss of ResNets. &lt;/div&gt; &lt;h2 id=&quot;findings&quot;&gt;Findings&lt;/h2&gt; &lt;h3 id=&quot;scaling-effects-on-training-stability&quot;&gt;Scaling Effects on Training Stability&lt;/h3&gt; &lt;p&gt;One of the most interesting findings of this investigation is the effect of spectral norm scaling on the stability of training. We can see in the figures above that spectral norm scaling has a significant effect on the stability of training for MLPs, but not for ResNets. For MLPs, spectral norm scaling significantly improves the stability of training, as shown by the fact that the training and test loss curves remain close and follow a similar path. This is especially true for the large MLP, where the training and testing loss and accuracy curves maintain a similar relationship for the entire duration of training while the test loss increases and test accuracy plateaus for the other two normalization methods.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Testing and training accuracy of Large MLP (depth 7, width 4000) with no norm scaling. Above right: Testing and training loss, same model. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Testing and training accuracy of Large MLP with Frobenius norm scaling. Above right: Testing and training loss, same model. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Testing and training accuracy of Large MLP with spectral norm scaling. Above right: Testing and training loss, same model. &lt;/div&gt; &lt;p&gt;Although the train accuracy when using spectral norm scaling doesn’t get as high as in the other two models, it is an accuracy predictor for test accuracy during the entire training time. Furthermore, it is the only of the methods we tests that continues to decrease test loss for the duration of training, where the other two show signatures of overfitting the data and increasing test loss. This is a very interesting finding because it shows that spectral norm scaling can be used to improve the stability of training for MLPs, which is a very important property for deep learning models. This is especially true for MLPs because they are more prone to overfitting than other model types, so improving the stability of training can help prevent overfitting.&lt;/p&gt; &lt;p&gt;We see that this pattern does not hold for ResNets. Rather, it seems that the Frobenius norm scaling method introduces the most stability, but is still not stable as the relationship for spectral norm scaling in MLPs. Similarly, because ResNets rely on convolutions, we do not see issues with overfitting in any of the models. Altough it appears that spectral norm scaling may improve over the baseline stability, the effect is not as noticeable as the effect from Frobenius norm scaling.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Testing and training accuracy of ResNet50 with no norm scaling. Above right: Testing and training loss, same model. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Testing and training accuracy of ResNet50 with Frobenius norm scaling. Above right: Testing and training loss, same model. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Above left: Testing and training accuracy of ResNet50 with spectral norm scaling. Above right: Testing and training loss, same model. &lt;/div&gt; &lt;p&gt;This is a surprising result considering that spectral normalization was first developed in the context of GANs using convolutional layers for image generation. We will address this disparity in the conclusion.&lt;/p&gt; &lt;h3 id=&quot;scaling-effects-on-spectral-norms&quot;&gt;Scaling Effects on Spectral Norms&lt;/h3&gt; &lt;p&gt;While both our spectral norm and Frobenius norm scaling algorithms resulted in consistently lower spectral norm values across all epochs compared to no normalization, spectral norm scaling had far and away the largest effect on enforcing low spectral norm values for weight matrices:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Changes in spectral norm values for each weight matrix in medium MLP (depth 5, width 2000) across epochs. Above left: No norm scaling. Above center: Frobenius norm scaling. Above right: Spectral norm scaling. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Changes in spectral norm values for each weight matrix in ResNet34 across epochs. Darker lines correspond to earlier layers. Above left: No norm scaling. Above center: Frobenius norm scaling. Above right: Spectral norm scaling. &lt;/div&gt; &lt;p&gt;Using spectral norm scaling, the spectral norms of both architectures on all layers collapse to values significantly lower than those seen when using Frobenius norm scaling or no norm scaling. The average spectral norm values at the penultimate epoch (199) using spectral norm scaling is 0.8; Frobenius norm scaling is 7.8; and no normalization is 35.4 on the width 2000, depth 5 MLP architecture.&lt;/p&gt; &lt;p&gt;It is also interesting that spectral norms are very similar across layers in later epochs when using spectral norm scaling, but the same is not true for the other two experiments: the average standard deviation in spectral norm values across all layers for the last 100 epochs using spectral norm scaling is ~0.02; Frobenius norm scaling is ~3.7; and no normalization is ~18.4 on the width 2000, depth 5 MLP architecture.&lt;/p&gt; &lt;p&gt;While it may seem obvious that spectral norm scaling would do the best job at encouraging low spectral norm values, this was not evidently the case. While we subtract the best rank one approximation, thus decreasing the spectral norm, the new spectral norm does not necessarily become the target value, as it is possible that the second largest singular value is larger than our target spectral norm. It seemed possible that merely subtracting a rank one matrix would fail to completely curb spectral norm blow up or do it with this level of success. These results show that not only does our method do it successfully, but does it much more so than Frobenius norm scaling. What’s more, the results generalize across wildly different architectures: we see rapid convergence to low singular values in both the ResNet and MLP case roughly around the same epoch.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;One drawback of our method was the significant increase in training times of our models. Compared to the time it took to train the baseline and Frobenius norm scaling implementations, the spectral norm implementations took between ~400% to ~1,500% longer to train. In order to address this in the future we will implement an adaptive power iteration that stops once the singular vectors converge to a certain threshold. This will allow us to reduce the number of power iterations needed to calculate the singular values, thus reducing the training time.&lt;/p&gt; &lt;p&gt;An interesting fold in our results was the difference between stability effects in the MLP and ResNet cases. We see that spectral norm scaling has a significant effect on the stability of training for MLPs, but not for ResNets. This is a surprising result considering that spectral normalization was first developed in the context of convolutional layers for image generation. We believe that this may stem from one of two reasons. The first is that we had to reduce the dimensionality of the convolutional matrices in order to use the power iteration algorithm. Although this allowed us to efficiently calculate the values we needed, it may not have been an accurate reflection of the matrix singular vectors. One route to address this in the future is to try initializing the spectral norm target values based solely on the input and output channel sizes, rather than the full size of the inputs and outputs. The second reason is that the convolutional layers in ResNets are not as prone to overfitting as the fully connected layers in MLPs, so the stability effects of spectral norm scaling would not be as noticeable. However, we still see an effect of Frobenius norm scaling, so this may be a matter of mathematical properties of the convolutional layers that we have not yet explored.&lt;/p&gt; &lt;p&gt;We may see most desired effects on singular values in spectral norm scaling because subtracting the best rank one approximation of the weight matrix does not influence other singular values nor the outer products of their singular vectors. When we view the singular value decomposition as the sum of outer products of singular vectors scaled by singular values, we can see that we only regularize one term in this sum. This may prevent a single outer product from dominating the linear transformation, especially preventing overfitting in MLPs where overfitting tends to be an issue. This is not true of Frobenius normalization, as we scale the entire matrix.&lt;/p&gt; &lt;p&gt;Overall, our results show that spectral norm scaling is a very effective method for stabilizing training in MLPs and enforcing low spectral norm values in MLPs and ResNets. This shows that spectral norm scaling may be a feasible and generalizable method for stabilizing training in a variety of conditions beyond GANs. Furthermore, we were able to achieve this without the use of a penalty in the loss function, achieving the same effect as a penalty without the negative effects. This is especially important because penalties in the loss function can cause issues with convergence and numerical stability alongside enforcing low rank, which we avoid by using our Norm Scaling algorithm. We beleive our results show great potential for further rigorous qauntitative research on the spectral view of weight normalization. We hope that our Norm Scaling algorithm will be used as a baseline for investigating spectral normalization algorithms that are both computationally efficient and effective at stabilizing training alongside enforcing low spectral norm values.&lt;/p&gt; &lt;p&gt;All of our training code can be found in this &lt;a href=&quot;https://github.com/phess2/SpecNorm/tree/main&quot;&gt;GitHub Repository&lt;/a&gt;.&lt;/p&gt; &lt;hr /&gt; </content> </entry> <entry> <title>Iterated Representation Learning</title> <link href="https://deep-learning-mit.github.io/blog/2023/Iterated-Representation-Learning/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Iterated-Representation-Learning</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Representation learning has become a transformative subfield of deep learning within recent years, garnering widespread attention for its sophistication in learning lower-dimensional embeddings of data beyond classical techniques such as principal component analysis (PCA). From class, we learned that desirable characteristics of good representations include minimality, sufficiency, disentangelement, and interpretability. However, because typical representation learning techniques such as autoencoders learn only one latent embedding from the input data, there exists a gap in the literature on the &lt;em&gt;stability&lt;/em&gt; of the model and learned embeddings.&lt;/p&gt; &lt;p&gt;In this project, we thus explore a new approach to traditional representation learning techniques, in which embeddings for a given set of data are learned repeatedly until some sort of convergence with respect to the model and learned embedding space, a process we call &lt;strong&gt;Iterated Representation Learning (IRL)&lt;/strong&gt;; by analyzing the performance of this iterative approach, our work aims to discover potential insights into the robustness qualities inherent to a model and its associated latent embedding space. We propose an algorithmic framework for IRL, provide an empirical case study of the efficacy of our IRL framework on the MNIST dataset, and suggest a novel evaluation procedure for representation stability and robustness via iterated learning.&lt;/p&gt; &lt;h3 id=&quot;representation-learning-primer&quot;&gt;Representation Learning Primer&lt;/h3&gt; &lt;p&gt;The goal of representation learning is to build models that effectively learn meaningful representations of the data. Representations are important for a variety of reasons, including determining which features are the most explanatory or variable in a dataset, compressing repeated information from a dataset to make it more compact, and learning more effective neural networks, to name a few examples. These representations are typically abstract and less interpretable than the input data, but of lower dimension, which makes them useful in capturing the most essential or compressed characteristics of the data.&lt;/p&gt; &lt;p&gt;More formally, representation learning aims to learn a mapping from datapoints \(\mathbf{x} \in \mathcal{X}\) to a (typically lower-dimensional) representation \(\mathbf{z} \in \mathcal{Z}\); we call this mapping an &lt;strong&gt;encoding&lt;/strong&gt;, and the learned encoding is a function \(f: \mathcal{X} \rightarrow \mathcal{Z}\). From this, a &lt;strong&gt;decoder&lt;/strong&gt; \(g: \mathcal{Z} \rightarrow \mathcal{X}\) can be applied to reconstruct the encoded data into its original dimension. This is demonstrated in the diagram below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/representation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/representation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/representation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/representation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Representation learning goal. Image credit: &lt;i&gt;Foundations of Computer Vision: Representation Learning&lt;/i&gt; (Torralba, Isola, Freeman 2023). &lt;/div&gt; &lt;p&gt;Some of the most salient learning methods within representation learning today include autoencoding, contrastive learning, clustering, and imputation; in this project, we focus on specifically on iterative approaches for the class of &lt;strong&gt;autoencoders&lt;/strong&gt;.&lt;/p&gt; &lt;p&gt;Representation learning also has intricate ties to generative modeling, the subfield of deep learning that aims to generate new data by mapping a simple base distribution to complicated high-dimensional data, which is essentially the opposite goal of representation learning. Then, after learning an embedding space via representation learning, this embedding can then be sampled from to &lt;em&gt;generate&lt;/em&gt; new data that mimics the original data, as demonstrated by &lt;strong&gt;variational autoencoders (VAEs)&lt;/strong&gt;, which we also explore in this paper.&lt;/p&gt; &lt;h3 id=&quot;prior-literature&quot;&gt;Prior Literature&lt;/h3&gt; &lt;p&gt;Relatively little literature exists regarding iteratively training dimensionality reduction or representation learning models. &lt;a href=&quot;https://ieeexplore.ieee.org/document/9528915&quot;&gt;Vlahek and Mongus (2023)&lt;/a&gt; proposes an iterative approach for &lt;em&gt;conducting&lt;/em&gt; representation learning more efficiently, specifically for the goal of learning the most salient features, which fundamentally diverges from our goal and also does not consider embedding robustness. &lt;a href=&quot;https://arxiv.org/abs/1809.10324&quot;&gt;Chen et al. (2019)&lt;/a&gt; introduces an iterative model for supervised extractive text summarization, though their objective of trying to optimize for a particular document by feeding a given document through the representation multiple times differs from ours. &lt;a href=&quot;https://ieeexplore.ieee.org/abstract/document/9414713&quot;&gt;Cai, Wang, and Li (2021)&lt;/a&gt; finds an iterative framework for self-supervised speaker representation learning which performs 61% better than a speaker embedding model trained with contrastive loss, but mainly focuses on the self-supervision aspect of the model and optimizes purely for model test accuracy, not considering other metrics such as stability or robustness.&lt;/p&gt; &lt;p&gt;Overall, we find that the literature regarding iterative approaches to representation learning is already sparse; of the work that exists, most focuses on very specific use cases, and no work directly examines the robustness or stability of the model and embeddings themselves learned over time, rather optimizing purely for final model performance.&lt;/p&gt; &lt;h2 id=&quot;iterated-representation-learning&quot;&gt;Iterated Representation Learning&lt;/h2&gt; &lt;h3 id=&quot;existing-dimensionality-reduction-and-representation-models&quot;&gt;Existing Dimensionality Reduction and Representation Models&lt;/h3&gt; &lt;p&gt;Nowadays, there are a variety of approaches to effective dimensionality reduction. Below we cover three of the most common techniques.&lt;/p&gt; &lt;h4 id=&quot;principal-component-analysis&quot;&gt;Principal Component Analysis&lt;/h4&gt; &lt;p&gt;Principal Component Analysis (PCA) has two primary objectives. First, maximizing sample variance of the newly transformed data, which is analogous to identifying and capturing the greatest (largest) directions of variability in the data (principal components or PCs). Formally, a PC is defined&lt;/p&gt; \[v^* = \arg \max_v \frac{1}{N-1} \sum_{n=1}^N (x^T_n v - \bar{x}^T v)^2 = \arg \max_v v^T C v\] &lt;p&gt;where \(C = \frac{X^T X}{n-1} \in \mathbb{R}^{d \times d}\) is the empirical covariance matrix.&lt;/p&gt; &lt;p&gt;The second objective is minimizing reconstruction loss, which is analogous to identifying the directions of variability to accurately and concisely represent data. Let \(U\) be the orthonormal basis projection matrix of eigenvectors of \(C\). Then we define reconstruction loss as&lt;/p&gt; \[\mathcal{L}(U) = \frac{\sum_{n=1}^N ||x_n - U U^T x_n||^2}{N}\] &lt;p&gt;Above, we observe that maximizing sample variance and minimizing reconstruction loss go hand-in-hand. Since PCA applies projections by multiplying vectors/matrices to the data, PCA is limited to the &lt;em&gt;linear&lt;/em&gt; transformation setting, hence restricting its applicability in many modeling problems.&lt;/p&gt; &lt;h4 id=&quot;autoencoders&quot;&gt;Autoencoders&lt;/h4&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Autoencoder structure. Image credit: &lt;i&gt;Foundations of Computer Vision: Representation Learning&lt;/i&gt; (Torralba, Isola, Freeman 2023). &lt;/div&gt; &lt;p&gt;Similar to PCA, autoencoders also aim to minimize reconstruction loss. However, autoencoders are not limited to just linear transformations, which enables autoencoders to learn more general lower-dimensional representations of data. Autoencoders are comprised of an encoder and decoder, where the encoder maps data to a lower-dimensional representation (embedding) via some function $f$, and the decoder maps the originally transformed data back to its original dimensional space via some function $g$.&lt;/p&gt; &lt;p&gt;End to end, the data space starts in \(\mathbb{R}^N\), is downsized to \(\mathbb{R}^M\) by \(f\), and then is reverted back to \(\mathbb{R}^N\) where \(N &amp;gt; M\). In this case, we can formalize the objective as follows:&lt;/p&gt; \[f^*, g^* = \arg \min_{f,g} E_\mathbf{x} || \mathbf{x} - g(f(\mathbf{x}))||^2_2\] &lt;h4 id=&quot;variational-autoencoders&quot;&gt;Variational Autoencoders&lt;/h4&gt; &lt;p&gt;VAEs couple autoencoders with probability to get maximum likelihood generative models. Typically for encoding, VAEs regularizes the latent (hidden) distribution of data to “massage” the distribution into a unit Gaussian, and when reverting back to the original dimensional space, VAEs add noise to the output — hence, a mixture of Gaussians. By imposing a unit Gaussian structure on the learned embedding space, this allows VAEs to act as generative models by sampling from the Gaussian latent space to generate new data. Unlike traditional autoencoders, VAEs may have embedding spaces that are complicated (if not just as complicated as the data).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; VAE&apos;s complex embedding space. Image credit: &lt;i&gt;Foundations of Computer Vision: Generative Modeling Meets Representation Learning&lt;/i&gt; (Torralba, Isola, Freeman 2023). &lt;/div&gt; &lt;p&gt;Formally, the VAE learning problem is defined by&lt;/p&gt; \[\theta^* = \arg \max_{\theta} L(\{\mathbf{x}^{(i)}\}^N_{i=1}, \theta) = \arg \max_{\theta} \sum_{i=1}^N \log \int_{\mathbf{z}} \mathcal{N} (\mathbf{x}^{(i)}; g_{\theta}^{\mu}(\mathbf{z}), g_{\theta}^{\Sigma}(\mathbf{z})) \cdot \mathcal{N}(\mathbf{z}; \mathbf{0}, \mathbf{\mathrm{I}}) d\mathbf{z}\] &lt;h3 id=&quot;iterated-representation-learning-1&quot;&gt;Iterated Representation Learning&lt;/h3&gt; &lt;h4 id=&quot;proposed-framework&quot;&gt;Proposed Framework&lt;/h4&gt; &lt;p&gt;We now introduce the Iterated Representation Learning Framework (IRL) for autoencoders and VAEs. We start with IRL for autoencoders:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Given design matrix \(X\), learn an autoencoder for \(X\).&lt;/li&gt; &lt;li&gt;Using the decoder from above, reconstruct the data to get \(X&apos;\) and compute its reconstruction loss.&lt;/li&gt; &lt;li&gt;Using the reconstructed data \(X&apos;\), repeat Steps 1 and 2 and iterate until the reconstruction loss converges or reaching iteration limit.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;As for VAEs, we follow a similar procedure as above.&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Given design matrix \(X\), learn a VAE for \(X\).&lt;/li&gt; &lt;li&gt;Using the decoder and adding Gaussian noise, reconstruct the data to get \(X&apos;\). Compute its reconstruction loss.&lt;/li&gt; &lt;li&gt;Using the reconstructed data \(X&apos;\), repeat Steps 1 and 2 and iterate until the reconstruction loss converges or reaching iteration limit.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;In this report, we examine how IRL is connected to representation, investigate several hypotheses about IRL, and conduct a preliminary case study of IRL on the MNIST dataset.&lt;/p&gt; &lt;h4 id=&quot;preliminary-questions-and-hypotheses&quot;&gt;Preliminary Questions and Hypotheses&lt;/h4&gt; &lt;p&gt;Motivated by how there may be unexplored stability properties of embeddings, our main hypotheses are twofold. First, iterated reconstruction loss per IRL can convergence with respect to the model. Second, learned embedding spaces can be reached via IRL, and that the number of iterations until convergence, loss at convergence, and such preserved features upon convergence could reveal meaningful properties of the true representation space, model, and data that are not immediately obvious from a standard autoencoder model.&lt;/p&gt; &lt;p&gt;More specifically, does the number of iterations until convergence have anything to do with how ``good’’ or stable the model or learned representation is? What does it mean if the reconstruction losses converge? What can we say about characteristics of the data that are maintained through iterations, and characteristics that evolve as the iterations go on? For example, if we observe that a model remains invariant to a certain feature, but becomes sensitive to new features of the data, what does this tell us about these particular features, our model, and the original data itself?&lt;/p&gt; &lt;p&gt;Perhaps most importantly, beyond the qualitative observations themselves, can we propose some sort of representation learning evaluation framework using iterated representation learning, e.g. rough guidelines on ideal number of iterations required until convergence, and what this says about how good a model is? Ultimately, we hope that using an iterated framework can serve as a general tool for (1) evaluating the stability or robustness of a representation learning model and (2) identifying the most core characteristics of a given dataset.&lt;/p&gt; &lt;h2 id=&quot;case-study-mnist-dataset&quot;&gt;Case Study: MNIST Dataset&lt;/h2&gt; &lt;p&gt;To evaluate IRL on a real-world dataset, we selected MNIST to test our hypotheses. We carefully designed our experiments, collected relevant data, and include our analysis below.&lt;/p&gt; &lt;h3 id=&quot;experimental-design&quot;&gt;Experimental Design&lt;/h3&gt; &lt;p&gt;For our experiments, we implemented IRL using the framework given above for the class MNIST digits dataset (due to its simplicity and intrepretability), where we preset the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;num_iterations&lt;/code&gt;. At every iteration, we initialize a new autoencoder model with &lt;a href=&quot;https://arxiv.org/abs/2206.08309&quot;&gt;Chadebec, Vincent, and Allassonnière’s (2022)&lt;/a&gt; &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;pythae&lt;/code&gt; autoencoder/VAE library. The encoder architecture is formed by sequential convolutional layers from &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;PyTorch&lt;/code&gt;.&lt;/p&gt; &lt;p&gt;We then trained the model, reconstructed the data, and saved the trained and validation loss. We also saved the original train/test and reconstructed train/test images of the first 25 datapoints to track how IRL progressed visually.&lt;/p&gt; &lt;h3 id=&quot;autoencoder-irl-analysis&quot;&gt;Autoencoder IRL Analysis&lt;/h3&gt; &lt;p&gt;First, we take a look at the (log) mean squared error of our autoencoder over 30 iterations of IRL, given in the plot below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Autoencoder log loss over IRL of 30 iterations. &lt;/div&gt; &lt;p&gt;We notice that both the train and validation loss steeply decrease until around iteration 10, upon which the validation loss begins to roughly stabilize and converge. This confirms our intuition that the loss following an iterated approach should eventually converge, which we can theoretically verify by observing that if we ran \(n\) iterations, then as \(n\to\infty\), because the loss is lower-bounded by zero and should generally from iteration to iteration (since we are removing information from our data), we must eventually converge. We further hypothesize that the fact that the loss has converged means that the embeddings upon convergence have learned the most succinct, critical portion of the data.&lt;/p&gt; &lt;p&gt;We also notice that the number of iterations until convergence is very small; as mentioned, after about 10 iterations, it seems that the validation loss has roughly converged. We had hypothesized earlier that if the autoencoder converges after a small number of iterations, then that says something about the quality of the autoencoder architecture. Here, the fact that the loss converged after a small number iterations gives evidence for this hypothesis, since based on separate tests, this architecture indeed achieves relatively high classification accuracy for the MNIST dataset. We suggest that IRL can thus serve as a framework for evaluating the quality of an autoencoder on a particular dataset.&lt;/p&gt; &lt;p&gt;Additionally, the validation loss converges at a relatively small number (around 0.25 by iteration 10), meaning that the distance between the original and reconstructed data in a given iteration are very similar. Interestingly enough, the validation loss is actually consistently lower than the train loss, which suggests that the learned representations through this iterated approach actually generalize very well to unseen data, which is certainly a desirable quality of any model.&lt;/p&gt; &lt;p&gt;We also give the original and reconstructed data for iterations 1, 5, 10, 15, and 20, for both the train and test data, in the figures below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Reconstructed train data. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Reconstructed test data. &lt;/div&gt; &lt;p&gt;In the beginning, we see that the data starts losing resolution (e.g. the numbers become fuzzier and start losing their distinctness from the background), which makes sense because more iterations means more reconstructions that continue to accumulate reconstruction loss. The reconstructed images are also less clear than the originals due to the information that is lost from the encoding-decoding process.&lt;/p&gt; &lt;p&gt;Our key observation is that the reconstruction loss stabilizes around the 10th iteration, where the original test images and reconstructed test images look very similar — we hypothesize that this is the point where the autoencoder has learned to represent the data as succinct as possible while preserving the most critical information.&lt;/p&gt; &lt;h3 id=&quot;vae-irl-analysis&quot;&gt;VAE IRL Analysis&lt;/h3&gt; &lt;p&gt;We similarly plot the log loss for our VAE, as well as the train, test, and sampled data over iterations in the figures below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; VAE log loss over IRL of 30 iterations. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Train, test, and normal sampled data. &lt;/div&gt; &lt;p&gt;Unlike the autoencoder, the VAE’s train data becomes much more noisy across the 20 iterations. This is likely due to how the VAE injects noise in the reconstruction, which in this case resulted in the images to lose their distinctness. While the general shape is preserved (roundness, lines, etc), many of the numbers actually ended up merging together and losing their number shape altogether (e.g. some 6s, 3s, 9s all become 0s).&lt;/p&gt; &lt;p&gt;When comparing IRL on the autoencoder versus the VAE, we observe that the VAE’s log loss converges to a larger log loss than the autoencoder, which makes sense because the VAE’s decoding step adds noise to the images that therefore adds loss to the reconstruction. We also note that the both of the models experience steep drop offs in log loss initially, which means the first few iterations eliminated most of the noise in the data and preserved the features that we characterize as “stable”.&lt;/p&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;Our proposed IRL framework considers how some features may be more important or more stable than others, and it aims to capture those features while eliminating the noise in the data. While traditional dimensionality reduction techniques have their merits, IRL takes those methods one step further by iteratively trimming away noise until convergence or termination. Throughout this project, we cover representation learning fundamentals and IRL can capitalize on the way they learn embeddings, and we also apply this framework to real world data on MNIST. We argue that in our case study of MNIST, IRL does converge in terms of both loss (log mean squared error converges) and reconstructions, which is a promising first step in the analysis of stability and fundamental characteristics of the data. Moreover, we showcase how the number of iterations until convergence has significance, serving as a benchmark for how good an autoencoder/VAE is on a given dataset. Although VAE’s reconstructed images were more noisy, that’s by nature of the VAE, and we still observe that the fundamental features of the data (lines vs circles) are still preserved throughout iterations.&lt;/p&gt; &lt;p&gt;There are a variety of directions we’d like to continue to explore with this project, given more time.&lt;/p&gt; &lt;ol&gt; &lt;li&gt;We were only able to run a limited number of experiments due to computational power and the duration of time to train a full IRL from start to finish for, say, 30 iterations. Given more time, there are multiple other experiments we’d like to run, including training on other datasets and trying out the performance on different autoencoder architectures to better understand the properties of this iterated approach. Another thing we’d like to evaluate the empirical performance of, but also couldn’t due to computational constraints, is how a single autoencoder with 20 times as many neurons as some basic autoencoder compares to the basic autoencoder trained using IRL for 20 iterations.&lt;/li&gt; &lt;li&gt;We’re also curious to further explore the theoretical guarantees provided by IRL, including rigorous bounds on convergence. We’re also very interested in exploring whether any of our observations from IRL can generalize to other classes of deep learning models.&lt;/li&gt; &lt;li&gt;We’d lastly look into ways to make IRL more computationally tractable. As mentioned, our experimentation was heavily limited due to the computational cost of training a new autoencoder during every iteration. If possible, we’d like to look for optimizations of this framework that still preserve the desired methodology.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Overall, Iterated Representation Learning serves as a framework to evaluate stability-related properties of data, which we believe to be an important but overlooked standard for representation learning. Our case study of MNIST shows promise for empirical convergence guarantees on certain datasets, and we hope that our work lays the foundation for future representation discussions with respect to stability.&lt;/p&gt; </content> </entry> <entry> <title>A Method for Alleviating Catastrophic Forgetting With Explainability</title> <link href="https://deep-learning-mit.github.io/blog/2023/alleviating-catastrophic-forgetting-in-classification-tasks-through-strategic-filter-dropout/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/alleviating-catastrophic-forgetting in-classification-tasks-through-strategic-filter-dropout</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;With recent advancements in deep learning, the intelligence of computers is quickly rivaling that of humans. GPT-4, with significant size and data, is able to score in the 90th percentile of the BAR, 88th percentile of the LSAT, and the 92nd percentile on the SAT &lt;d-cite key=&quot;openai2023gpt4&quot;&gt;&lt;/d-cite&gt;. In dermatology, sophisticated computer vision models have outperformed trained professionals in diagnosing skin diseases and cancer &lt;d-cite key=&quot;jeong2023deep&quot;&gt;&lt;/d-cite&gt;. Despite this substantial computational advantage, neural networks notably lag behind humans in their capacity for continuous learning, a skill essential for any intelligent entity. Particularly, they suffer from catastrophic forgetting, a phenomenon in which the learning of a new objective significantly degrades performance on prior tasks.&lt;/p&gt; &lt;p&gt;The human brain is able to protect itself from conflicting information and reductions in performance on previous tasks using complex mechanisms involving synaptic plasticity &lt;d-cite key=&quot;hadsell2020embracing&quot;&gt;&lt;/d-cite&gt;. In essence, the brain is able to self regulate the strength of its connections, allowing for neurons to become less activated according to their memory and relevance. This ability has been attributed for the unmatched ability to learn in humans, which has allowed for humans to show improvement in skill on nearly any motor task given training, while still remembering previous information &lt;d-cite key=&quot;green2008exercising&quot;&gt;&lt;/d-cite&gt;. This, then, is highly desirable for neural networks.&lt;/p&gt; &lt;p&gt;In contrast to the human’s ability to learn, neural networks significantly alter their parameters when learning a new task. In effect, the network’s understanding of previous tasks is overwritten. This poses a great barrier to the creation of artificial general intelligences, which ultimately depend on continual, life-long learning &lt;d-cite key=&quot;silver2011machine&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;With the rapid increase in size and complexity of models, the field of model explainability and the desire to understand exactly what models are doing has quickly grown. Specifically in the field of computer vision, effort has been made to understand how models make decisions, what information leads to this decision, and how they learn what to observe &lt;d-cite key=&quot;haar2023analysis&quot;&gt;&lt;/d-cite&gt;. Methods such as saliency mapping, which displays the importance of aspects of an input image to predicting a class, filter visualization, which finds the most activating features for a given filter, and gradient class activation maps, which visualizes the gradients flowing into the final convolutional layer, have all significantly contributed towards the understanding of how models make decisions &lt;d-cite key=&quot;adebayo2018sanity&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;erhan2009visualizing&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;selvaraju2017grad&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;We propose to make use of these explainability methods for the intelligent freezing of filters of a convolutional neural network. Specifically, we use saliency maps and filter visualizations to consider what a model is observing to classify an image, and then decipher which filters are most strongly contributing to this. In this paper, we contribute the following: 1. We create a method for the ranking of importance of filters in a convolutional neural network. We expand and combine upon previous works in model explainability to understand which filters are most strongly contributing to positive predictions. 2. We create a method for the freezing of filters of a convolutional neural network according to these rankings. We do this by first training on one task, freezing filters according to importance, then retraining the same model on a novel task. In doing this, we both corroborate our ranking system and identify a new strategy for alleviating catastrophic forgetting.&lt;/p&gt; &lt;h1 id=&quot;related-works&quot;&gt;Related Works&lt;/h1&gt; &lt;p&gt;Continual learning and its core problem of catastrophic forgetting has gotten recent attention in deep learning research. It’s easy to understand why the goal of having a model that can adapt to new data without being completely re-trained is sought after, and there have been many approaches to the problem of aiding the model’s ‘memory’ of past tasks. Solutions range from attaching a significance attribute to certain weights in the model that regularizes change introduced by the new data to explicitly freezing weights via different metrics of the weights’ performance.&lt;/p&gt; &lt;h2 id=&quot;weight-changing-regularization&quot;&gt;Weight Changing Regularization&lt;/h2&gt; &lt;p&gt;Elastic Weight Consolidation(EWC) approaches the problem of catastrophic forgetting by adding a ‘stiffness’ to the weights of previous tasks dependent on an approximation of the importance they had to previous task performance. The authors of ‘Overcoming catastrophic forgetting in neural networks’ &lt;d-cite key=&quot;Kirkpatrick_2017&quot;&gt;&lt;/d-cite&gt;. explain EWC as maximizing a posterior of the parameters over the entire dataset, and then splitting up the posterior into a loss over the new task and a posterior of the parameters over the old task. They model the posterior of the old data as a quadratic difference of the original parameters and the current ones multiplied by the Fisher information matrix, so minimizing this results in preventing parameters from changing too much from being predictable from the old task’s data. The authors of the original paper showed that EWC was effective at preventing CNN from forgetting how to classify the MNIST dataset and helping an RL model maintain performance in Atari games. However, EWC is an additional loss metric that must be calculated for each back-propogation and for each previous task; it’s also linear in the size of the output and therefore is prohibitive for high dimensional data.&lt;/p&gt; &lt;p&gt;Another technique that attempts to use a regularizing factor to slow the retraining of old task parameters is explicitly computing a importance metric for each neuron in the network&lt;d-cite key=&quot;zenke2017continual&quot;&gt;&lt;/d-cite&gt;. The authors denote this method as “Synaptic Intelligence” as they drew their inspiration from the complex adaptation of synapses in the brain contrasted with the simple uni-scalar representation of neurons in a MLP network, and by allowing the network to account for the importance of they could help a neural network model the human behavior of continual learning. The metric they calculate as importance is based on 1) how much a parameter contributed to the reduction of loss over the entirety of training and 2) how much a parameter changed during training. They compared their performance to EWC and standard SGD on the MNIST dataset and found similar results to EWC while beating naive SGD as the number of consecutive tasks increased.&lt;/p&gt; &lt;h2 id=&quot;architectural-changes&quot;&gt;Architectural Changes&lt;/h2&gt; &lt;p&gt;A drastically different approach that a couple papers investigated was preventing interference between training runs by completely freezing the weights in parts of the model after completing a task’s training. The papers here differentiate themselves via the method they decide to freeze certain weights and layers. The earliest such paper we found was detailing a method called Packnet &lt;d-cite key=&quot;mallya2018packnet&quot;&gt;&lt;/d-cite&gt;, where the weights they selected to keep via freezing was purely based on a certain percentage of the weights with the highest magnitude. They also made the decision to completely wipe the weights they did not freeze and then do a couple epochs of training on the model that was a mix of frozen and pruned weights. Their strategy achieved performance roughly equal to networks jointly trained on all the data at once and outperformed the naive strategy of simply retraining, validating a version of the freezing strategy.&lt;/p&gt; &lt;p&gt;Instead of simply measuring the magnitude of weights to decide what layers or specific weights to freeze, authors of a paper on catastrophic forgetting explainability paper use a custom metric to find a layer that scores highest on their metric and subsequently freeze all the layers prior to that layer &lt;d-cite key=&quot;nguyen2022explaining&quot;&gt;&lt;/d-cite&gt; Their metric is an analysis of the difference in activation maps of a layer in the model pre- and post- training on the new task. They posit that this difference in activation is a measurement of how much a layer has forgotten how to activate in response to an input. Their reasoning for freezing the layers prior to the layer most changed by the new sample set is that the errors that induce catastrophic forgetting propagate throughout the network, so identifying the layer with the sharpest drop-off indicates that prior layers are to blame. This seemingly builds off an earlier paper &lt;d-cite key=&quot;nguyen2020dissecting&quot;&gt;&lt;/d-cite&gt; that uses a similar activation map difference scheme to delicate layers that change more easily during training and instead directly freezes those fragile layers rather than those prior. In both papers, their results for this technique are an improvement over their ‘fine-tuning’ baseline, but the more recent paper’s results were not that differentiated from just selecting a layer to freeze before training a new task.&lt;/p&gt; &lt;h2 id=&quot;explanability-metrics&quot;&gt;Explanability Metrics&lt;/h2&gt; &lt;p&gt;There exists many other explainability metrics with which one can target layers prior to training on a new task to try to prevent interference, an interesting one being saliency maps. Saliency maps attempt to capture the importance of features of the input on the output of a deep neural network. In the domain of CNNs, this can be thought of both the pixels and larger features, such as a window on a car, that contribute to a correct classification; saliency maps are analogous to trying to map out what parts of an image a model uses to make correct identification. A model of saliency maps we felt compelled enough to use in our project is that of &lt;d-cite key=&quot;srinivas2019fullgradient&quot;&gt;&lt;/d-cite&gt;, where their full-gradient approach creates saliency maps from the gradients of each layer. This strategy encapsulates the importance of both the inputs and the impact of neurons throughout the network on the saliency map. As parts of a neural network might suffer from varying degrees of catastrophic forgetting, being able to identify the saliency of individual neurons is a desirable quality in choosing a metric that explains catastrophic forgetting.&lt;/p&gt; &lt;h1 id=&quot;methods&quot;&gt;Methods&lt;/h1&gt; &lt;h2 id=&quot;model-type&quot;&gt;Model Type&lt;/h2&gt; &lt;p&gt;We tested our method using VGG16. VGG16 is a deep convolutional neural network that has achieved impressive results on the ImageNet classification challenge, with a top-1 accuracy of 72% &lt;d-cite key=&quot;simonyan2014very&quot;&gt;&lt;/d-cite&gt;. Its sequential nature lends itself well to explainability methods like saliency maps. Further, it is relatively quick to train, even given the constraints of Google Colab. All of these attributes were highly desirable, as it allowed for rapid iteration for hyperparameter tuning, computation of saliency maps and filter visualizations, and a direct way to compare the viability of our freezing method through image classification accuracy. To ensure that the model did not have inference on any tasks prior to training, we randomly initialized the parameters.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/vgg16.webp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/vgg16.webp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/vgg16.webp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/vgg16.webp&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1: Schematic of VGG16 Architecture &lt;/div&gt; &lt;h2 id=&quot;saliency-mapping&quot;&gt;Saliency Mapping&lt;/h2&gt; &lt;p&gt;The computation of saliency maps is grounded in the principles of backpropagation. It follows a multi-staged procedure which uses gradients to consider the impact of each pixel in an image. First, it computes the partial derivatives of the target output with respect to individual segments of the input image. Then, it uses backpropagation to propagate error signals back to the input layer. It does this in order to identify the impact of pixels. It considers pixels with larger signals to have the greatest impact on the decision-making process. There are a bountiful number of papers which propose different improvements on the original saliency map. When selecting a procedure, we identified two key features necessary for a useful visualization. We believed that a saliency map must have a full explanation of why a model made its prediction. Secondly, we believed that rather than considering each individual pixel, it clusters pixels together to consider importance. After testing, we ultimately used full-gradient saliency maps &lt;d-cite key=&quot;srinivas2019full&quot;&gt;&lt;/d-cite&gt;. Code for this method is publicly available on the GitHub created by the authors of this paper, fullgrad-saliency.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_raw-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_raw-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_raw-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_raw.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_saliency-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_saliency-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_saliency-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_saliency.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2: Raw image and saliency map of a mug. &lt;/div&gt; &lt;p&gt;The essence of full-gradient saliency maps lines up directly with the key features that we identified. To begin, it defines importance in the input image as a change in the feature resulting in change in model output. It seeks to illustrate a full answer for the model’s output. To this end, it considers both global and local importance of features in the input image, which results in a method which both weighs the importance of each pixel individually, but also considers the importance of different grouping of pixels.&lt;/p&gt; &lt;h2 id=&quot;filter-visualization&quot;&gt;Filter Visualization&lt;/h2&gt; &lt;p&gt;In order to compute what different filters are looking at, we made use of the Convolutional Neural Network Visualizations GitHub repository, which is a useful library that has implementations of many popular explainability methods &lt;d-cite key=&quot;uozbulak_pytorch_vis_2022&quot;&gt;&lt;/d-cite&gt;. Specifically, we used the implementation of a filter visualization method from the paper “Visualizing Higher-Layer Features of a Deep Network”, which uses backpropagation to maximize the activation of a given filter &lt;d-cite key=&quot;erhan2009visualizing&quot;&gt;&lt;/d-cite&gt;. With this, we can compute exactly what a filter is attempting to observe in an image. This method provides two different options for creating filter visualizations - one with gradient hooks, and one without.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_raw-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_raw-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_raw-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_raw.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_saliency-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_saliency-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_saliency-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_saliency.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3: Heatmaps of Feature Visualization(left), Actual Image(middle), Saliency Map(right) &lt;/div&gt; &lt;h2 id=&quot;training-procedure-and-dataset-selection&quot;&gt;Training Procedure and Dataset Selection&lt;/h2&gt; &lt;p&gt;We created two datasets from CIFAR-100 &lt;d-cite key=&quot;erhan2009visualizing&quot;&gt;&lt;/d-cite&gt;. We randomly selected 20 classes out of the total 100 and then divided these groups into two. We filtered the images from CIFAR-100 so that only images of those classes were in our datasets. We did this to ensure that the tasks the model was attempting to learn were of equal difficulty. We chose CIFAR-100 because we believed it was of adequate difficulty for the VGG16 architecture. We normalized the data and augmented it with random horizontal flips and random croppings. For the first instance of training, we trained using stochastic gradient descent for 10 epochs with a learning rate of 1E-3. We did not implement any regularization or early stopping, as it was not necessary given training losses and testing losses. After this training, we used the described methods for calculating saliency maps and filter visualizations. For each class in the first dataset, we calculated the most useful filters by comparing saliency maps for the class to all filters. We compared these through multiple metrics, including mean squared error and Pearson correlation. To account for the fact that different layers of convolutional neural networks capture different types of information, we froze some percent of filters in each individual layer rather than the entire model. We left this percent as a hyperparameter. To ensure fairness for each task, the second instance of training followed the same exact procedure as the first - the optimizer was stochastic gradient descent, we trained for 10 epochs, and used a learning rate of 1E-3.&lt;/p&gt; &lt;h1 id=&quot;results&quot;&gt;Results&lt;/h1&gt; &lt;p&gt;For the sake of hyperparameter tuning and evaluating different strategies, we froze the datasets to be the first and second ten images of CIFAR-100. We sought to check how the number of filters we freeze changes performance across datasets, which metric is most useful in comparing saliency images to filter visualizations, and how viable this method is as compared to training on a single, larger dataset. Prior to the second round of training, the test accuracy on the first dataset was .4566 and the test accuracy on the second dataset was .1322.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The impact of freezing varying numbers of filters is in line with expectation - the more filters you freeze, the less inference you can gain, but also the more you will remember your previous task. In the table above, we can observe that with 25% of the filters frozen, we perform the best on dataset 2, with an accuracy of 39.2%, but the worst on dataset 1, with an accuracy of 20.7%. In contrast, when 75% of the filters are frozen, we maintain an accuracy of 38.4%, but do not learn about the new task, with an accuracy of 25.7%.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We found that mean squared error was the greatest metric for the comparison of saliency maps and filter visualizations, recording the highest average accuracy and also retaining much more information about the first dataset. From the table, we can see that when freezing 50% of filters in the network and selecting using mean squared error, we do roughly ten percentage points worse on the first dataset, but gain nearly double this loss on the second dataset. When compared to the randomly frozen method, it performs significantly better on the first dataset. This suggests that the filters that we froze are actually more important for correct predictions than the average. It makes sense that Pearson correlation is not particularly useful for comparison - it is not able to take into account the spatial information that is crucial for this comparison.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Finally, we found that training tasks sequentially and using the freezing method with a comparison metric of mean squared error slightly outperforms training the model on a larger, combined dataset at once. With this method, the model performed five percentage points better on predicting classes in both the first and second dataset. It is important to note that the accuracy reported for the model trained on the combined dataset is just the average accuracy over all of the classes, not necessarily split by the datasets. Still, to ensure fairness, the training procedure used for the combined dataset was the same as for the sequential training procedure, but trained for twenty epochs at once rather than ten epochs at two different times. This result implies that intelligently freezing filters of a neural network can be a viable strategy for overcoming catastrophic forgetting, even if just in a smaller setting.&lt;/p&gt; &lt;h1 id=&quot;discussion&quot;&gt;Discussion&lt;/h1&gt; &lt;h2 id=&quot;takeaways&quot;&gt;Takeaways&lt;/h2&gt; &lt;p&gt;Through using convolutional neural network explainability methods such as saliency maps and filter visualizations, we were able to observe key insights into the relevance of different filters in VGG16. Quantitatively, we were able to measure this by freezing these layers and observing how well performance persisted after training on a new task. We found that freezing filters according to the similarity of their visualizations to saliency maps retains significantly more inference on a previous task, suggesting that these filters were more relevant to the previous task. By freezing these weights, we were also able to outperform simply training on a larger dataset. We believe that more research should be directed towards applying explainability methods to achieve the objective of continual learning. Although there has been previous work in the past, these often rely on stopping catastrophic forgetting once it has been observed, rather than determining which parts of the network are too integral to a task to be retrained.&lt;/p&gt; &lt;h2 id=&quot;limitations&quot;&gt;Limitations&lt;/h2&gt; &lt;p&gt;Because we are completely freezing weights, it is unlikely that this method could be generalizable to an arbitrary number of tasks. Future works could explore the integration of elastic weight consolidation into our pipeline rather than stopping change entirely. Doing class by class freezing of filters also introduces a cap to the number of tasks that this method could generalize to and the number of classes that can be predicted in each task. During our research, we concluded that this approach was better than attempting to combine saliency maps, but future work could also explore how to effectively combine saliency maps to capture important aspects of each class. Further, this method relies on the comparability of saliency maps and filter visualizations. While it makes intuitive sense that a filter is more relevant if it is seeking the parts of an input that are most important for a correct prediction, it is not as simple as directly comparing the two. While we attempt to alleviate some of this issue by doing layer-by-layer freezing, future work could certainly explore better metrics for choosing filters, especially given the stark difference in performance when using something as simple as mean squared error compared to Pearson correlation. Finally, the computational overhead of the method in combination with the limitations of Google Colab resulted in an inability to train on high-resolution images and use larger models. We believe that using high-resolution images would significantly benefit the feasibility of the method, as saliency maps are much more clearly defined. We again leave this to future work, as we are unable to explore this path.&lt;/p&gt; </content> </entry> <entry> <title>Graph Articulated Objects</title> <link href="https://deep-learning-mit.github.io/blog/2023/graph-articulated-objects/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/graph-articulated-objects</id> <content type="html">&lt;h2 id=&quot;project-background&quot;&gt;Project Background&lt;/h2&gt; &lt;p&gt;Recent advancements in generative AI have transformed robotic capabilities across all parts of the stack, whether in control, planning, or perception. As self-driving cars roll out to public roads and factory assembly-line robots become more and more generalizable, embodied intelligence is transforming the way that humans interact with each other and automate their daily tasks.&lt;/p&gt; &lt;p&gt;Across the robotic manipulation stack, we are most interested in exploring the problem of scene representation; using the limited sensors available, how might a robot build a representation of its environment that will allow it to perform a wide range of general tasks with ease? While developments in inverse graphics like NeRF have given robots access to increasingly rich geometric representations, recent work in language modeling has allowed robots to leverage more semantic scene understanding to plan for tasks.&lt;/p&gt; &lt;h3 id=&quot;introduction-to-task-planning&quot;&gt;Introduction to Task Planning&lt;/h3&gt; &lt;p&gt;In robotics, the term &lt;strong&gt;task planning&lt;/strong&gt; is used to describe the process of using scene understanding to break a &lt;em&gt;goal&lt;/em&gt; down into a sequence of individual &lt;em&gt;actions&lt;/em&gt;. This is in contrast with &lt;em&gt;motion planning&lt;/em&gt;, which describes the problem of breaking a desired &lt;em&gt;movement&lt;/em&gt; into individual configurations that satisfy some constraints (such as collision constraints). While simply using motion planning to specify a task is necessary for any generalized robotic system, &lt;em&gt;task planning&lt;/em&gt; provides robots with a &lt;em&gt;high-level&lt;/em&gt; abstraction that enables them to accomplish multi-step tasks.&lt;/p&gt; &lt;p&gt;Take the problem of brushing one’s teeth in the morning. As humans, we might describe the steps necessary as follows:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Walk to the sink.&lt;/li&gt; &lt;li&gt;Grab the toothbrush and toothpaste tube.&lt;/li&gt; &lt;li&gt;Open the toothpaste tube.&lt;/li&gt; &lt;li&gt;Squeeze toothpaste onto brush.&lt;/li&gt; &lt;li&gt;Brush teeth.&lt;/li&gt; &lt;li&gt;Rinse mouth.&lt;/li&gt; &lt;li&gt;Clean toothbrush.&lt;/li&gt; &lt;li&gt;Put everything back.&lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;planning-domain-definition-language-pddl-explained&quot;&gt;Planning Domain Definition Language (PDDL) Explained&lt;/h3&gt; &lt;p&gt;Creating a task plan is a trivial task for humans. However, a computer must use a state-space search algorithm like &lt;em&gt;A* search&lt;/em&gt; to plan a sequence of interactions from a &lt;em&gt;start state&lt;/em&gt; to a desired &lt;em&gt;goal state&lt;/em&gt;. Doing so requires us to define a standard that formally specifies all relevant &lt;em&gt;environment states&lt;/em&gt;, along with the &lt;em&gt;preconditions&lt;/em&gt; and &lt;em&gt;effects&lt;/em&gt; of all possible transitions between two states.&lt;/p&gt; &lt;p&gt;The Planning Domain Definition Language (PDDL) was invented to solve this problem. Description languages like PDDL allow us to define the space of all possible environment states using the states of all entities that make up the environment. Environments are defined as a task-agnostic &lt;em&gt;domain file&lt;/em&gt;, while the &lt;em&gt;problem file&lt;/em&gt; defines a specific task by specifying a desired &lt;em&gt;start&lt;/em&gt; and &lt;em&gt;end&lt;/em&gt; state.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/pddl-explained-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/pddl-explained-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/pddl-explained-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/pddl-explained.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1: This figure above breaks down a simple PDDL domain file into its constituent components. A PDDL domain generally consists of predicates, which describe the state, and actions, which specify state transitions. &lt;/div&gt; &lt;p&gt;Despite task planning’s utility, however, there is one major drawback; this approach to planning requires the robot to have a &lt;em&gt;detailed PDDL domain file&lt;/em&gt; that accurately represents its environment. Generating this file from perception requires not only a semantic understanding of all objects in a space, but also of all possible interactions between these objects, as well as all interactions that the robot is afforded within the environment. Clearly, there is a major gap between the task-planning literature and the realities of upstream perception capabilities.&lt;/p&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;p&gt;The use of LLMs in robotic planning and reasoning has exploded in the past few years, due to the promise of leveraging a language model’s internal world understanding to provide more information for planning. One such work is LLM+P&lt;d-cite key=&quot;liu2023llm+&quot;&gt;&lt;/d-cite&gt;, which combines an LLM with a classical planner to solve a given problem specified in natural language, using PDDL as an intermediate representation. LLM+P works by converting the description into a PDDL problem representation, running a classical planning algorithm to find a solution, and then computing the sequence of actions back into a natural language description interpretable by humans. Importantly, LLM+P demonstrates that using an LLM to output a PDDL representation can be a viable strategy in solving planning problems that are specified to a robot. However, there are a few limitations. For one, LLM+P assumes that a relevant domain file is already provided to the robot, specifying all entities and their relationships within the environment’s context. While domain files are generally carefully crafted by hand, vision-language models can automate this process.&lt;/p&gt; &lt;p&gt;LLMs have also been used to solve plans directly, to varying levels of success. Works like SayCan&lt;d-cite key=&quot;ahn2022can&quot;&gt;&lt;/d-cite&gt; and LLM-Planner&lt;d-cite key=&quot;song2023llm&quot;&gt;&lt;/d-cite&gt; use the LLM as a planning engine directly, circumventing the need to use a traditional high-level planner completely. SayCan, in particular, uses a combination of language-grounded instructions and task affordances that indicate the robot’s ability to execute a given task, using language to determine the most viable skill to execute from a set of predefined skills. These bodies of work have greatly enabled the ability of robots to parse, understand, and execute instructions given to them by their operators as natural language. Particularly, an LLM’s ability to break a problem down into several constituent steps is critical to enabling long-horizon task planning with multiple steps.&lt;/p&gt; &lt;p&gt;Language is an increasingly promising modality for robots to operate in, due to the ubiquity of relevant language data to learn real-world entity relations from the internet. However, foundation models that integrate vision and robot-action modalities enable even stronger semantic reasoning. Google’s Robot Transformer 2 (RT-2)&lt;d-cite key=&quot;brohan2023rt&quot;&gt;&lt;/d-cite&gt;, for example, is a recent work that performs perception, planning, and control all in a single neural network, leveraging internet-scale data. One major drawback of visuomotor policies, such as that employed by RT-2, is that we lose interpretability of a robot’s internal representation.&lt;/p&gt; &lt;p&gt;Nonetheless, multi-modal foundation models have proven to be a useful tool across the spectrum of robotic planning. Our project takes inspiration from the above works in LLMs for planning and extends the idea to domain-generation, allowing task-planners to work in real-world scenarios.&lt;/p&gt; &lt;p&gt;The rapid advancement of LLMs and vision-language models open up a world of possibilities in closing this gap, as robotic perception systems may be able to leverage learned world understanding to generate PDDL files of their own to use in downstream planning tasks. This project aims to investigate the question: can VLMs be used to generate accurate PDDL domains?&lt;/p&gt; &lt;h2 id=&quot;experimental-setup&quot;&gt;Experimental Setup&lt;/h2&gt; &lt;p&gt;To investigate this, we decided to explore this problem by testing the capabilities of VLMs on various tasks and levels of prior conditioning. This allows us to explore the problem on two axes: domain complexity and amount of information provided as a prior to the VLM. Each of these axes are chosen to progressively increase the complexity of the domain being explored, while also progressively increasing the amount of &lt;em&gt;information&lt;/em&gt; available. Designing our experiments like this allows us to understand the importance of &lt;em&gt;information&lt;/em&gt; and &lt;em&gt;domain complexity&lt;/em&gt; and how they affect the overall results.&lt;/p&gt; &lt;p&gt;Due to ease of access, we decided to use OpenAI ChatGPT’s &lt;em&gt;GPT4-Vision&lt;/em&gt; functionality to run our experiments. A more comprehensive ablation may analyze these experiments across a wider range of VLMs.&lt;/p&gt; &lt;h3 id=&quot;domains-of-interest&quot;&gt;Domains of Interest&lt;/h3&gt; &lt;p&gt;Within the context of task planning for generalizable robotics, the problem of cooking in a kitchen setting is a fascinating problem because of the combination of their usefulness and the high dimensionality and discretization of kitchen tasks. As a result, kitchen setups like cooking, cleaning, and cutting ingredients are great ways to understand task-planning, and are the domains that we chose to study in this work.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/image-examples-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/image-examples-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/image-examples-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/image-examples.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2: A summary of domains that we are interested in studying. The three domains increase in complexity, which allows us to analyze the effect of complexity on the VLM&apos;s effectiveness. &lt;/div&gt; &lt;p&gt;The three domains used in our study are:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Cut&lt;/strong&gt;: Bagel + utensils used for cutting ingredients&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Cook&lt;/strong&gt;: Everything in Cut + a pan, spatula, and a stove&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Clean&lt;/strong&gt;: Everything in Clean + a soap bottle, a sink, and a sponge&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Our handcrafted “ground-truth” domain files are designed to support the target tasks of &lt;em&gt;cutting&lt;/em&gt; a bagel, &lt;em&gt;cooking&lt;/em&gt; a sliced bagel, and &lt;em&gt;cleaning&lt;/em&gt; utensils, respectively. Ideally a good PDDL file generated is one where these tasks are supported.&lt;/p&gt; &lt;h3 id=&quot;prompting-strategies&quot;&gt;Prompting Strategies.&lt;/h3&gt; &lt;p&gt;We also experimented with four different prompting strategies, with each strategy providing progressively more information to the VLM for its PDDL generation task. All prompts provided to the VLM consist of the target image, along with a text-based prompt meant to guide the VLM towards a more accurate PDDL representation.&lt;/p&gt; &lt;p&gt;The strategies are as follows, along with examples used by our experiment for the cut domain. Text that was added progressively to the prompt is &lt;strong&gt;&lt;em&gt;bolded&lt;/em&gt;&lt;/strong&gt;:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Raw Generation: Image + generic prompt &lt;ul&gt; &lt;li&gt;&lt;em&gt;You are a robot that needs to execute task planning in the setup shown in the image. Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.&lt;/em&gt;&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;Prompt 1 + describe each object in the scene &lt;ul&gt; &lt;li&gt;&lt;em&gt;You are a robot that needs to execute task planning in the setup shown in the image. &lt;strong&gt;This image includes a bagel, a plate, a bowl, and a knife.&lt;/strong&gt; Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.&lt;/em&gt;&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;Prompt 2 + describe the target task &lt;ul&gt; &lt;li&gt;&lt;em&gt;You are a robot that needs to execute task planning &lt;strong&gt;to cut the bagel&lt;/strong&gt; in the setup shown in the image. This image includes a bagel, a plate, a bowl, and a knife. Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.&lt;/em&gt;&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;Prompt 3 + explain object relations in detail &lt;ul&gt; &lt;li&gt;&lt;em&gt;You are a robot that needs to execute task planning to cut the bagel in the setup shown in the image. This image includes a bagel, a plate, a bowl, and a knife. &lt;strong&gt;In order to cut the bagel, one must use the knife and place the bagel and knife on the plate beforehand. I can place the bagel on the plate or the bowl, and cut the bagel using the knife.&lt;/strong&gt; Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.&lt;/em&gt;&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;evaluation-metric-embedding-cosine-similarity&quot;&gt;Evaluation Metric: Embedding Cosine Similarity&lt;/h3&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/evaluation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/evaluation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/evaluation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/evaluation.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3: Our evaluation process. We used a vector-embedding-space cosine similarity metric to conduct our analysis, using a handcrafted PDDL file as input. &lt;/div&gt; &lt;p&gt;Since several different PDDL domains can be used to represent the same set of actions and predicates, the task of generating PDDL files is quite subjective. Since generating PDDL tasks is an often-tedious task that humans must do themselves to represent any given domain, we evaluate each VLM output based on its similarity to real PDDL domains handcrafted manually. After asking the VLM to generate a PDDL file, both the &lt;em&gt;target&lt;/em&gt; and the &lt;em&gt;generated&lt;/em&gt; domain descriptions are embedded using the &lt;strong&gt;Longformer: Long Document Transformer&lt;/strong&gt;&lt;d-cite key=&quot;beltagy2020longformer&quot;&gt;&lt;/d-cite&gt; model, before finally computing the cosine similarity between the two embeddings. We decided to use &lt;strong&gt;Longformer&lt;/strong&gt;, instead a more standard model like &lt;em&gt;LLAMA 2&lt;/em&gt;&lt;d-cite key=&quot;touvron2023llama&quot;&gt;&lt;/d-cite&gt; or &lt;em&gt;BERT&lt;/em&gt;&lt;d-cite key=&quot;tenney2019bert&quot;&gt;&lt;/d-cite&gt;, due to the long context-length afforded to us by the model; a more comprehensive study would require studying these effects on a wider range of embedding models to more accurately understand the text-to-text similarity between the two domains.&lt;/p&gt; &lt;p&gt;Note that this cosine similarity in the embedding space is quite a coarse metric to evaluate our outputs for a couple of reasons. The primary concern with this evaluation approach has to do with the transferability between PDDL files, which are specified in a LISP-like syntax, and natural language documents, which Longformer was trained to embed. In this study, we assume that such an embedding model &lt;em&gt;can&lt;/em&gt; be used to make such a comparison, and discuss our study accordingly.&lt;/p&gt; &lt;p&gt;Aside from this, PDDL’s structure also provides several keywords that are commonly used by all PDDL files, such as &lt;em&gt;action&lt;/em&gt;, &lt;em&gt;predicate&lt;/em&gt;, and &lt;em&gt;preconditions&lt;/em&gt;. In order to handle these, we decided to simply remove all instances of these words from both the &lt;em&gt;target&lt;/em&gt; and the &lt;em&gt;generated&lt;/em&gt; PDDL files, in order to mitigate the effect of the similarity between these tokens.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;After experimenting on a wide range of complex environments with various prompting strategies, it seems that VLMs perform quite well for the task of generating PDDLs from image and text conditioning. We measured the similarity of the ground truth PDDL file with each image and experiment’s generated PDDL file. To quantitatively measure similarity, we used the cosine similarity metric on the embeddings of the masked pieces of text using Longformer&lt;d-cite key=&quot;beltagy2020longformer&quot;&gt;&lt;/d-cite&gt;. We did not use BERT&lt;d-cite key=&quot;tenney2019bert&quot;&gt;&lt;/d-cite&gt; due to the context length not being long enough for our PDDL files. Recall that we masked out certain frequently appearing words in PDDL files so as to not inflate the similarity due to this commonality.&lt;/p&gt; &lt;p&gt;The exact generated PDDL files can be found at &lt;a href=&quot;https://github.com/anirudhv27/final-project-dl/tree/main/experiments.&quot;&gt;this link&lt;/a&gt;&lt;/p&gt; &lt;p&gt;First, we will qualitatively analyze the generated words in each of the three categories of the PDDL files: types, predicates, and actions. Then, we will also provide quantitative metrics that measure similarity directly with the ground truth PDDL files that we wrote.&lt;/p&gt; &lt;h3 id=&quot;types&quot;&gt;Types&lt;/h3&gt; &lt;p&gt;Types are the first part of PDDL files. They describe the various sorts of objects that appear in the image. For example, in the “cut” image, the generated types are “utensil, plate, food”. Note that the types often compress similar sorts of objects, e.g. both spatulas and knives fall under the type “utensil”. Type generation is somewhat inconsistent, since types are not strictly required by PDDL files to exist, which could contribute towards why certain generated PDDL files do not have a types section at all.&lt;/p&gt; &lt;h3 id=&quot;predicates&quot;&gt;Predicates&lt;/h3&gt; &lt;p&gt;Predicates in the PDDL files are descriptive phrases that describe distinct parts of the scene, at a given time. For example, in the “cut” image, experiment 4 has the following predicates “(plate-empty), (bowl-empty), (bagel-on-plate), (bagel-on-bowl), (knife-on-plate), (bagel-cut)”. Note that these are not precisely representative of the current state of the image, but rather represent what states could also appear in the future, e.g. “(bagel-cut)”, even though the bagel is not yet cut. The accuracy of the generated predicate set is surprisingly accurate, regardless of which experiment we use.&lt;/p&gt; &lt;p&gt;It seems that all four experiments generate approximately the same predicate set. For the “cut” image, all of the predicates generally have the objects “bagel”, “knife”, “plate”, etc., and sometimes where they are placed relative to each other. In the later “cook” and “clean” images, there are also predicates conditioning on whether the bowl/plate is clean or not. In particular, the generated predicates for Experiment 1 – where we do not tell the VLM the task – also make sense with respect to the inferred task! This evidence suggests that the generated predicates match the planned task, thus implying that the VLM is able to learn the task quite well just based on the image.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/predicate-example-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/predicate-example-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/predicate-example-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/predicate-example.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 4: The correspondence between the inputted image and the generated predicates. Note that the predicates are not descriptive of just the current state, but also are descriptive of the later potential states based on the inferred task. &lt;/div&gt; &lt;h3 id=&quot;actions&quot;&gt;Actions&lt;/h3&gt; &lt;p&gt;Similar to the predicate generation, the action generation is extremely accurate. The various sequences of predicted actions make sense for the given images and conditioning. For example, one of the generated action sequences from Experiment 1 is:&lt;/p&gt; &lt;blockquote&gt; &lt;p&gt;(:action prepare-sandwich :parameters (?b - food ?p - container) :precondition (and (contains ?p ?b) (is-clean ?p)) :effect (and (inside ?b ?p) (not (empty ?p))) )&lt;/p&gt; &lt;/blockquote&gt; &lt;p&gt;This is a very detailed sequence of actions, which also makes sense – in order to prepare a sandwich, the generated PDDL file notices we need the food and the container, and then checks if it is clean and not empty.&lt;/p&gt; &lt;p&gt;Again, the results from Experiment 1 compared to the later experiments which have more textual conditioning are extremely similar, indicating that most of the information the VLM collects is from the image. Our added conditioning does not seem to improve generation of the action sequences much more.&lt;/p&gt; &lt;h3 id=&quot;quantitative-analysis-with-cosine-similarity&quot;&gt;Quantitative Analysis with Cosine Similarity&lt;/h3&gt; &lt;p&gt;Along with qualitative analysis of each part of the PDDL file, we also performed a holistic analysis of the entire PDDL file that compares similarity with our handcrafted ground truth PDDL file. We measured the cosine similarity between the two PDDL files, for each experiment in each image. Due to the general format of PDDL files, certain words appear at the same places many times. Hence, we masked these words out, in order to not inflate the similarity in a superficial manner.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/cosine-similarities-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/cosine-similarities-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/cosine-similarities-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/cosine-similarities.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Table 1: Cosine similarities using the Longformer embedding model. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/bar-chart-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/bar-chart-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/bar-chart-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/bar-chart.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 5: Bar chart detailing cosine similarity in all tested scenarios. While there are clear trends between prompting strategies, all cosine similarity scores tend to hover around 98%. &lt;/div&gt; &lt;p&gt;As we can see, our methods performed quite well, with masked cosine similarity consistently above 0.98. This makes sense qualitatively as well, since as discussed above, the VLM generated types, predicates, and actions that made sense.&lt;/p&gt; &lt;p&gt;One of the most noteworthy aspects of the above data is that according to this metric:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Experiments 1-3 all perform similarly, with some doing better than others in different images.&lt;/li&gt; &lt;li&gt;Experiment 4 consistently performs worse than Experiments 1-3.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;This is surprising, since we would expect that more conditioning implies better performance. In Experiment 4, we added certain conditioning of the form of textual relationship between objects in the image. This result leads us to the conclusion that adding this sort of conditioning is not helpful for PDDL file generation, and is in fact negatively correlated with performance. Previous analysis has implied that the VLM learns extremely well from the image alone, and this result suggests that in fact it is better to let the VLM learn only from the image, without adding too much of our own conditioning.&lt;/p&gt; &lt;h2 id=&quot;conclusion-limitations-and-future-work&quot;&gt;Conclusion: Limitations and Future Work&lt;/h2&gt; &lt;p&gt;Our work analyzes the potential of the recent advances in VLMs for the purposes of robotic task planning. By creating a systematic set of experiments over increasingly complex images, we were able to showcase the power of VLMs as a potentially very powerful tool for general task planning problems. The accurate generation of PDDL files based on only the images shows us that VLMs learn from images extremely well, without the need for extra textual conditioning. In fact, we noticed that providing too much conditioning actually can decrease performance, thus further suggesting that VLMs learn best from images. This result is promising for generalizing to the greater context of robotic task planning, since vision is one of the most prominent ways in which robots dynamically task plan when navigating real-world environments. Harnessing the power of VLMs could prove to be the future of robotic task planning.&lt;/p&gt; &lt;p&gt;There are a couple of limitations in our work, which have the potential for future exploration. In order to test the true utility of the generated domain files, we would need to also generate problem PDDL files, after which we could run the problem on the domain to test the robustness of the domain. The qualitative and quantitative metrics in our study heavily imply that our domain file is valid, by testing on ground truth PDDL files. However, a more comprehensive study could also concurrently generate problem files, which are tested on the generated domain file. Perhaps a method could be made which alternatively trains both the problem and domain files by iteratively testing the problem on the domain, similar to the idea of a Conditional Generative Adversarial Network (GAN)&lt;d-cite key=&quot;mirza2014conditional&quot;&gt;&lt;/d-cite&gt;. Another limitation is that we only tested on one VLM architecture. A more comprehensive ablation may analyze these experiments across a wider range of VLMs.&lt;/p&gt; </content> </entry> <entry> <title>Physics Loss</title> <link href="https://deep-learning-mit.github.io/blog/2023/physics-loss/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/physics-loss</id> <content type="html">&lt;h1 id=&quot;super-resolution-multi-objective-training-for-optimizing-a-single-objective&quot;&gt;Super Resolution: Multi-Objective Training for Optimizing a Single Objective&lt;/h1&gt; &lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Super-resolution (SR) refers to image processing techniques which enhance the quality of low-resolution images [2]. Recently deep learning based SR has been applied to the field fluid dynamics to recreate chaotic turbulent flows from low-resolution experimental or numerical data [3]. For some loss function \(\mathcal{L}\), the goal is to find weights \(\theta^*\) such that&lt;/p&gt; \[\begin{aligned} \theta^* = \text{argmin}_\theta\; \mathcal{L}(\bold{u_H},f(\bold{u_L};\bold{\theta})) \end{aligned}\] &lt;p&gt;where \(\bf u_H\) is the reference high resolution data field and \(\bf u_L\) is the corresponding coarsened low resolution data input to the neural network \(f\) (see the figure below).&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/assets/img/2023-11-11-physics-loss/fig1.png&quot; alt=&quot;Super-resolution reconstruction of turbulent vorticity field using physics-based neural network. Adapted from [2].&quot; /&gt;&lt;/p&gt; &lt;p&gt;​ &lt;em&gt;Fig 1: Super-resolution reconstruction of turbulent vorticity field using physics-based neural network. Adapted from [2]. Disclaimer: we didn’t have time to train on nice images like these for the present investigation.&lt;/em&gt;&lt;/p&gt; &lt;p&gt;Doing so can aid our understanding of flow physics [3]. Many have already applied deep learning to this problem, applying a variety of methods. The performance of the resulting networks depends heavily on the loss function used to train the network. Looking to improve upon the standard \(L_2\) loss function, some have introduced physics-based loss function that incorporates physical laws that the real flow must obey. For example [2] use the following type of form:&lt;/p&gt; \[\begin{aligned} \mathcal{L} &amp;amp;= \beta_0||\bold{u_H}-f(\bold{u_L})||_2 + \beta_1 ||p_1(\bold{u_H})-p_1(f(\bold{u_L}))||_2 + \beta_2 ||p_2(\bold{u_H})-p_2(f(\bold{u_L}))||_2 + ... \end{aligned}\] &lt;p&gt;where \(p_i(\cdot)\) is a physical objective that we want to enforce during training (e.g. spatial and time derivatives \(\nabla \bf u_H\), \(\bf\dot{u}_H\) etc.) and the \(\beta_i\) are fixed weighting coefficients.&lt;/p&gt; &lt;p&gt;Typically, multi-objective super resolution approaches aim to overcome the weaknesses of the single objective $L_2$ reconstruction loss, a primary one being that the $L_2$ loss favors blurry reconstructions over sharper more ‘realistic’ ones. The general idea is that the additional objectives push the training away from un-realistic reconstructions.&lt;/p&gt; &lt;p&gt;However suppose the goal really is to minimize the $L_2$ reconstruction loss. Can multi-objective training reduce the loss on the original objective or do the new objectives just get in the way? In this investigation we apply adaptively-weighted multi-objective optimization methods to the problem of turbulence super resolution which is a novel approach.&lt;/p&gt; &lt;h2 id=&quot;methodology&quot;&gt;Methodology&lt;/h2&gt; &lt;h3 id=&quot;the-dataset&quot;&gt;The Dataset&lt;/h3&gt; &lt;p&gt;Super resolution reconstruction is an interesting problem for turbulent flows due there inherent multi-scale nature. Information is lost in the coarsening/pooling process making perfect reconstruction impossible without additional insights. Unfortunately, due to time and resource constraints it is unfeasible to train on 2D turbulence slices as in figure 1. In order to retain a challenging problem for the the super-resolution we build an artificial dataset of 1D turbulence as follows:&lt;/p&gt; \[u(x) = \sum_{k=1}^{10} k^{-1}\sin\left(kx+\phi(k)\right) + (2k)^{-1}\sin\left( 2kx +\phi(k)\right)\] &lt;p&gt;The amplitude scaling $k^{-1}$ models how the frequencies in a particular turbulent signal might decay with increasing wavenumber (velocity, temperature, pressure, kinetic energy, etc.). In other words the contribution of higher modes to the entire signal becomes less and less important in a predictable way. We generate each individual signal by fixing a phase function $\phi(k)$. For each $k$, $\phi(k)$ is taken to be the realization of uniform random variable in the range $[0,2\pi)$. This function $u(x)$ bakes in inherent correlations between the low and high frequency waveforms (Please note: this is not physical. We are just making a useful toy dataset for this investigation). Even with extremely coarse low-resolution inputs, we expect that a well-trained neural network can use these correlations to reconstruct the high frequency waveforms.&lt;/p&gt; &lt;p&gt;For input to the network, the samples are discretized to a $512$ point high resolution grid: $(\mathbf{u_H})_j = u(x_j)=u(j\cdot\frac{2\pi}{512})$. The low resolution data is average pooled with a kernel size of $32$. This results in a low resolution grid of size $512/32 = 16$. Average pooling has been shown to have nice training properties for super resolution reconstruction [2]. The following is typical high/low resolution pair:&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;assets/img/2023-11-11-physics-loss/fig2.png&quot; alt=&quot;Typical Input&quot; /&gt;&lt;/p&gt; &lt;p&gt;​ &lt;em&gt;Fig 2: Typical high/low resolution data pair. The high resolution version exists on a 512 point grid. The low resolution version has been average pooled down to a 16 point grid using a average pooling kernel of size 32. The pooling procedure removes the highest frequency components of the data meaning that full reconstruction requires deeper understanding of the underlying structure.&lt;/em&gt;&lt;/p&gt; &lt;h3 id=&quot;the-network&quot;&gt;The Network&lt;/h3&gt; &lt;p&gt;The network is a three layer fully connected network with hidden sizes $[1024,1024,1024]$.&lt;/p&gt; &lt;h3 id=&quot;training-scheme&quot;&gt;Training Scheme&lt;/h3&gt; &lt;p&gt;The multi-objective loss function&lt;/p&gt; \[\begin{aligned} \mathcal{L} &amp;amp;= \mathcal{L}_0 + \mathcal{L}_1 + \mathcal{L}_2+... \\&amp;amp;= \beta_0||\bold{u_H}-f(\bold{u_L})||_2 + \beta_1 ||p_1(\bold{u_H})-p_1(f(\bold{u_L}))||_2 + \beta_2 ||p_2(\bold{u_H})-p_2(f(\bold{u_L}))||_2 + ... \end{aligned}\] &lt;p&gt;presents a unique training challenge. Many turbulence super-resolution studies to date set the weights $\beta_i$ by trial and error in an attempt to produce ‘nice’ results [3]. This approach is sub-optimal because the best values of $\beta_i$ are dependent on the units and orders of magnitude of the properties $p_i$. Also, the best choice for the weights may change depending on the stage of training. For example it may be best to put more emphasis on the reconstruction loss $\mathcal{L}_0$ during the first stages of training and then shift emphasis to other properties to refine the model during the latter stages. In addition to these considerations [5] observed that for physics informed neural networks fixed weights tended to induce training instability as the multiple objectives compete with one another.&lt;/p&gt; &lt;p&gt;To mitigate these issues in this investigation we employ a multi-objective optimizer (MOO). After each training epoch a MOO reviews the progress for each loss component $\mathcal{L}_i$ and updates the weights $\beta_i$. A schematic is shown below:&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;assets/img/2023-11-11-physics-loss/fig3.png&quot; alt=&quot;Schematic of one training epoch &quot; /&gt;&lt;/p&gt; &lt;p&gt;​ &lt;em&gt;Fig3: One epoch of training with adaptive loss using ReLoBRaLo MOO. At the end of batched training iterations the MOO updates ${\beta_i}$ according to the progress of each individual loss component. The Adam training optimizer learning rate is fixed at $10^{-5}$ for the entire investigation.&lt;/em&gt;&lt;/p&gt; &lt;p&gt;In particular we use the Relative Loss Balancing with Random Lookback (ReLoBRaLo) scheme from [5] for the MOO. The scheme adaptively updates the loss weights at the end of each epoch according to the progress of each individual loss component:&lt;/p&gt; \[\begin{align*} \beta_i^{bal}(t) &amp;amp;= m\cdot \frac {\exp\left(\frac{\mathcal{L}_i(t)}{\mathcal{T}\mathcal{L}_i(t-1)}\right)} {\sum_{j=1}^m \exp\left(\frac{\mathcal{L}_j(t)}{\mathcal{T}\mathcal{L}_j(t-1)}\right)},\;i\in\{1,...,m\}\\ \beta_i(t) &amp;amp;= \alpha\beta_i(t-1) + (1-\alpha)\beta_i^{bal}(t) \end{align*}\] &lt;p&gt;There are many more details in [5], but essentially the $\beta_i^{bal}(t)$ term measures the progress of the loss $\mathcal{L}_i$ since the previous epoch relative to the progress made by other losses. The more a particular loss is struggling the more we increment its weight for the next epoch. The $\alpha$ hyper-parameter indicates bias towards the existing weight values. When $\alpha=1$ no updates are made. The temperature hyper-parameter $\mathcal{T}$ indicates the the level of equality across loss components. As $\mathcal{T} \to 0$ only the most struggling loss component receives a weight update. When $\mathcal{T}\to \infty$ all components receive an equal weight update. Note that we initialize by $\beta_0(0)=1$ and $\beta_i(0)=0$ for $i&amp;gt;0$.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;two-objective-loss&quot;&gt;Two Objective Loss&lt;/h3&gt; &lt;p&gt;We tried training on a variety of two-objective loss functions of the form&lt;/p&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;$\mathcal{L} = \beta_0&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt;\bold{u_H}-f(\bold{u_L})&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt;_2 + \beta_1&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt;p_1(\bold{u_H})-p_1(f(\bold{u_L}))&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt;_2$&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;where the $p_1$ objective was taken to be Fourier transform $\mathcal{F}$, spatial derivative $\frac{d}{dx}$, standard deviation $\sigma(\cdot)$, mean $\mathbb{E}_x(\cdot)$, absolute value$&lt;/td&gt; &lt;td&gt;\cdot&lt;/td&gt; &lt;td&gt;$, or functional compositions of the aforementioned. Compared to training on the standard single objective reconstruction loss $\mathcal{L}= \mathcal{L}_0 = \beta_0&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt;\bold{u_H}-f(\bold{u_L})&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt;_2$ , only the two-objective loss with Fourier transform loss gave significant improvements in training performance. Training with $\mathbb{E}_x$ gave marginal improvements. All other properties gave fairly neutral or negative results. Composing the Fourier transform with other properties was detrimental. The following table summarizes the training ($\alpha =0.9,\; \mathcal{T}=1$):&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;​ &lt;em&gt;Table 1: Training performance for two-objective loss functions. All runs were performed with $\alpha =0.9,\; \mathcal{T}=1$&lt;/em&gt;. The rightmost column show the percent improvement from the single objective training. The poor performance of $\mathcal{F}\circ\frac{d}{dx}$ might be due to high frequency noise being amplified by the derivative operator before being passed through the Fourier transform.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;$\boldsymbol{p_1}$&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;$\boldsymbol{\mathcal{L_0}(\text{epoch = }200)}$&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;% Improvement over Single Objective&lt;/th&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt; &lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;None (single objective)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.01895&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0 %&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;$\mathcal{F}$&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.01366&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;29 %&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;$\frac{d}{dx}$&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.01993&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;5.3 %&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;$\sigma(\cdot)$&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.02437&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;-29 %&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;$\mathbb{E}_x$&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.01771&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;6.7 %&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;$&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;\cdot&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;$&lt;/td&gt; &lt;td&gt;0.01745&lt;/td&gt; &lt;td&gt;8.1%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;$\mathcal{F}\circ\frac{d}{dx}$&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.17174&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;-830%&lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt; &lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Figures 4 provides a more detailed look at the training for $p_1=\mathcal{F}$. There is considerable variation in the rate of learning due to altering the $\alpha$ hyper-parameter. The bottom panel of figure 4 gives an example of a reconstructed signal. With enough training the network is able to learn the inherent structure in the data and reconstruct the high frequencies.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;assets/img/2023-11-11-physics-loss/fig4.png&quot; alt=&quot;Fourier loss two objective training&quot; /&gt;&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;assets/img/2023-11-11-physics-loss/fig4b.png&quot; alt=&quot;Reconstructed data by two-objective training&quot; /&gt;&lt;/p&gt; &lt;p&gt;​ &lt;em&gt;Fig 4: Top panel: Two objective training with Fourier loss for $\mathcal{T}=1$. The results for setting $\mathcal{T}=0.01,100$ are very similar so they are omitted for brevity. The two objective training (reconstruction + Fourier) outperforms the single objective training for every value of $\alpha$. The optimal value of $\alpha$ is close to $0.999$.&lt;/em&gt; Bottom panel: example of reconstructed validation data. The model is able to recover the high frequency components from the original high resolution signal.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;assets/img/2023-11-11-physics-loss/fig5a.png&quot; alt=&quot;beta evolution&quot; /&gt;&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;assets/img/2023-11-11-physics-loss/fig5b.png&quot; alt=&quot;fig5b&quot; /&gt;&lt;/p&gt; &lt;p&gt;​ &lt;em&gt;Fig 5: Reconstruction and Fourier objective ${\beta_i}$ evolution for $\alpha=0.9,0.999$. The smaller $\alpha$ the faster the loss weights converge to 1.&lt;/em&gt;&lt;/p&gt; &lt;p&gt;The two objective training curves in figure 4 are significantly better than the single objective curve. There is a particular value of $\alpha$ (~0.999) that gives the best overall result. Figure 5 demonstrates how the loss weights adapt over the course of training as the ReLoBRaLo MOO tries to balance the improvements in each loss component. For $\alpha=0.9$ the MOO rapidly increases $\beta_1$ in order to put more weight on the lagging Fourier loss. When $\alpha=0.999$ the increase is a lot more gradual. In the limit as $\alpha\to1$ we just have single objective optimization.&lt;/p&gt; &lt;p&gt;Figure 6 shows a similar weight evolution when the second objective is ‘bad’, $p_1=\sigma(\cdot)$:&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;assets/img/2023-11-11-physics-loss/fig6.png&quot; alt=&quot;beta evolution for standard deviation&quot; /&gt;&lt;/p&gt; &lt;p&gt;​ &lt;em&gt;Fig 6: Reconstruction and $\sigma(\cdot)$ objective ${\beta_i}$ evolutions. There is evidence of instability at the start of training.&lt;/em&gt;&lt;/p&gt; &lt;p&gt;In contrast to the reconstruction and Fourier two-objective training, the reconstruction and $\sigma(\cdot)$ weight evolutions show signs of instability. At around $15$ epochs $\beta_0$ experiences a bump. This is mostly likely the MOO responding to degrading progress on the reconstruction objective due to the two objectives competing with each other. Therefore for optimal multi-objective training it seems preferable that all loss components smoothly decrease without cross interreference.&lt;/p&gt; &lt;h3 id=&quot;multi-objective-loss&quot;&gt;Multi Objective Loss&lt;/h3&gt; &lt;p&gt;We also study a multi-objective loss created by combining the most successful objectives from the previous study.&lt;/p&gt; \[\begin{aligned} p_1&amp;amp;=\mathcal{F}\\ p_2&amp;amp;=|\cdot|\\ p_3&amp;amp;=\mathbb{E}_x\\ p_4&amp;amp;=\frac{d}{dx}\\ \end{aligned}\] &lt;p&gt;The results closely mimic the two objective Fourier loss so we omit further details. Interestingly, even when we introduce a ‘bad’ objective such as $\sigma(\cdot)$ or $\mathcal{F}\circ\frac{d}{dx}$into the multi-objective loss it doesn’t appear to spoil the result despite causing a minor instability (see figure 6). These results suggest that it may be possible to just ‘throw in’ many auxiliary objectives in the hopes that one of them improves training. We might not necessarily need to worry about bad objectives spoiling the bunch. Or it could just be that in this particular case that the Fourier objective $\mathcal{F}$ is strong enough to overcome the bad objectives. This needs more investigation.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;This investigation showed that multi-objective loss functions can be useful even when only one objective is ultimately of interest. Most likely due to the manner in which the data set was constructed, the Fourier objective turned out to be a great training aid (Note that we did try single objective training with the Fourier objective replacing the reconstruction objective. This did not yield as good results suggesting that there is something inherently beneficial about multi-objective training as opposed to just changing basis).&lt;/p&gt; &lt;p&gt;The other objectives did not do nearly as well and some even degraded the training by causing instabilities. The ReLoBRaLo MOO was a critical component of training. None of the aforementioned results would have been possible with fixed weights. It was critical to fine tune the $\alpha$ parameter which determines how aggressively the MOO does updates. Presumably, an overly aggressive MOO doesn’t give the network time to settle in the early stages of training but an overly passive MOO hardly makes any difference at all.&lt;/p&gt; &lt;p&gt;While good, ultimately the ReLoBRaLo scheme was designed for traditional MOO problems (such as solving partial differential equations) and is most likely far from optimal under the unique settings of this investigation. In addition, the objectives in this study were chosen quite arbitrarily. The Fourier objective was an easy one to discover due to the low-pass nature of super-resolution reconstruction and the manufactured dataset. For a more general problem where we might want to introduce auxiliary objectives it will be very difficult a-priori to identify high performance auxiliary objectives. An interesting future investigation could be to design a neural network that adaptively updates the auxiliary objectives after each epoch with the goal accelerating the main network’s learning curve.&lt;/p&gt; &lt;h2 id=&quot;references&quot;&gt;References&lt;/h2&gt; &lt;p&gt;[1] Bode, M., Gauding, M., Lian, Z., Denker, D., Davidovic, M., Kleinheinz, K., Jitsev, J. and Pitsch, H. Using physics-informed enhanced super-resolution generative adversarial networks for subfilter modeling in turbulent reactive flows. &lt;em&gt;Proceedings of the Combustion Institute&lt;/em&gt;, 2021.&lt;/p&gt; &lt;p&gt;[2] Fukami, K., Fukagata, K. and Taira, K. Super-resolution reconstruction of turbulent flows with machine learning. &lt;em&gt;Journal of Fluid Mechanics&lt;/em&gt;, 2019.&lt;/p&gt; &lt;p&gt;[3] Fukami, K.,Fukagata, K., and Taira, K. Super-Resolution Analysis Via Machine Learning: A Survey For Fluid Flows. [Unpublished manuscript], 2023.&lt;/p&gt; &lt;p&gt;[4] Wang, C., Li, S., He, D. and Wang, L. Is L2 Physics-Informed Loss Always Suitable for Training Physics-Informed Neural Network?. &lt;em&gt;Conference on Neural Information Processing Systems&lt;/em&gt;, 2022.&lt;/p&gt; &lt;p&gt;[5] Bischof, R., and Kraus, M. Multi-Objective Loss Balancing for Physics-Informed DeepLearning. [Unpublished manuscript], 2022.&lt;/p&gt; </content> </entry> <entry> <title>Diffusion Models on Low-Brightness Images</title> <link href="https://deep-learning-mit.github.io/blog/2023/Tracking-Multiple-Objects/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Tracking-Multiple-Objects</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Since the introduction of ChatGPT, everyone seems to be speaking about “generative AI,” with almost 15x more google searches for generative AI now than at this time last year. This blog post focuses a specific use case for diffusion models, which have applications across the board, from generating images given keywords to planning trajectories for robot manipulation. In short, diffusion models are a family of probabilistic generative models that progressively destruct data by injecting noise, then learn to reverse this process for sample generation.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:90%&quot; /&gt; &lt;em&gt;Figure 1.1. How a diffusion model iteratively transforms noise to generate an image&lt;/em&gt;&lt;/p&gt; &lt;p&gt;Diffusion models have been used with great success for a number of use cases, but they still remain largely unused on dim images. The primary related work has been on using a diffusion model for low-light image enhancement. However, most of these works agree that attempting to generate an image from noise generated on top of an already dim image often results in rgb shift and global degradation of the image &lt;d-cite key=&quot;zhou2023pyramid&quot;&gt;&lt;/d-cite&gt; This is because a diffusion model adds noise to the given image and then attempts to denoise the image, so given a dim and low-contrast image, the model has a difficult time denoising.&lt;/p&gt; &lt;p&gt;For a visual example of why low-light scenarios can be a problem for diffusion models, we can just look at the control of our experiments. The left image is from the diffusion model trained and evaluated on low-light images, while the right image is from the diffusion model trained and evaluated on normal-light images.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:45%; margin-right:3%&quot; /&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:45%&quot; /&gt;&lt;/p&gt; &lt;p&gt;We can observe all sorts of problems here, from the model being unable to determine the image background color to the model sometimes not even showing the butterfly. In contrast, the exact same training done on the normal butterfly dataset shows distortions occasionally, but has no issues determining the background color or the contrast between the butterfly and the background. This illustrates the issue talked about previously of rgb shift and global degradation. In this blog, we aim to conduct experiments by adding different features to the DDPM scheduler and investigate which can actually make a difference for low-light scenarios.&lt;/p&gt; &lt;h2 id=&quot;background&quot;&gt;Background&lt;/h2&gt; &lt;p&gt;First, we discuss the idea of a diffusion model in more depth. In a nutshell, a diffusion model relies on semi-supervised training. The model is given an image from a training set to which random noise has been applied \(t\) times. This noisy image is given to the model along with the value of \(t\), a loss is computed between the output of the model and the noised image. The random noise is applied with a noise scheduler, which takes a batch of images from the training set, a batch of random noise, and the timesteps for each image. The overall training objective of the model is to be able to predict the noise added through the scheduler to retrieve the initial image.&lt;/p&gt; &lt;p&gt;Since diffusion models on dim images are relatively unstudied, this blog post focuses on taking a well-known diffusion model for regular images and making modifications to the scheduler, which controls the noising and denoising process, and the model architecture to improve its performance in low-light scenarios. We begin with the DDPM (Denoising Diffusion Probabilistic Models) model &lt;d-cite key=&quot;ho2020denoising&quot;&gt;&lt;/d-cite&gt;. There are a number of viable diffusion models to study, but one of the major benefits of DDPM is that it balances image fidelity in generation and speed. Several other major models such as DDIM and PNDM can be much worse at capturing fine details even though they have some speed and computation advantage &lt;d-cite key=&quot;andrew2023diffusion&quot;&gt;&lt;/d-cite&gt;. This offers some intuition as to why DDPM was chosen to work with for this project.&lt;/p&gt; &lt;p&gt;A DDPM uses two Markov chains for its denoising and noising process: one to perturb the data to noise, and another one to convert the noise back into data&lt;d-cite key=&quot;10.1145/3626235&quot;&gt;&lt;/d-cite&gt; The noising and denoising process uses a unique scheduler that we refer to as the DDPMScheduler. Mathematically, we can refer to the initial distribution as \(q(x_0)\) and show that following \(x_t\) are computed as \(q(x_t \vert x_{t-1}) = \mathbb{N}(x_t; \sqrt{1-\beta_t}x_{t-1},\beta_tI).\) This is referred to as a Gaussian transition kernel, and shows that consecutive noised images are generated by taking the previous image and sampling more noise into it using a Gaussian perturbation. Of particular note are the \(\beta_t\) terms, since those vary from timestep to timestep and we discuss later how those should be modified. In the specific ddpm scheduler implementation, the \(\beta_{\text{start}}\) and \(\beta_{\text{end}}\) are specified, so the \(\beta\) in the Gaussian perturbation, which specifies how much noise should be added at any time, is gradually increased. &lt;d-cite key=&quot;von-platen-etal-2022-diffusers&quot;&gt;&amp;lt;/dcite&amp;gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;The specified beta values can have many consequences on the model overall, but one is more aggressive denoising which can combat rgb shift. This is because rgb shift can cause color inconsistencies between adjacent pixels, which can be combated by greater noise reduction. In addition, aggressive denoising may be able to recover the underlying structure of the image and smooth out artifacts introduced by rgb shift. However, aggressive denoising can result in a loss of detail as well&lt;/p&gt; &lt;p&gt;By integrating the previous noise during the noising step to determine \(q(x_T)\) we can get \(q(x_T) = \int q(x_T \vert x_0)q(x_0)dx_0 \sim \mathbb{N}(x_t; 0, I)\), showing that after all the noise is integrated, the entire structure of the image is lost. After the denoising, DDPMs start generating new samples by generating a noise vector from the prior distribution \(p(x_T = \mathbb{N}(x_T; 0, I)),\) and gradually removing noise by running a Markov chain in the reverse. The goal is to learn the transition kernel between timesteps. The reverse transition can be written as \(p_{\theta}(x_{t-1} \vert x_t) = \mathbb{N}(x_{t-1}; \mu_{\theta}(x_t, t), \sigma_{\theta}(x_t, t))\) where \(\theta\) is the model’s parameters and the mean and variance are parametrized by neural networks&lt;d-cite key=&quot;andrew2023diffusion&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;This variance will also come into play later, as it is one of the parameters that we toggle in the DDPM scheduler. Variance in the DDPM Scheduler of the Diffuser library has several possible values: fixed_small, fixed_small_log, fixed_large, fixed_large_log &lt;d-cite key=&quot;schedulerdocumentation&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;variance_type&lt;/th&gt; &lt;th&gt;effect&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;“fixed_small”&lt;/td&gt; &lt;td&gt;The variance is a small and fixed value&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;“fixed_small_log”&lt;/td&gt; &lt;td&gt;The variance is small and fixed in the log space&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;“fixed_large”&lt;/td&gt; &lt;td&gt;The variance is a large and fixed value&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;“fixed_large_log”&lt;/td&gt; &lt;td&gt;The variance is large and fixed in the log space&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;p&gt;The first method evaluated as a control is simply an implementation of a DDPM using the Diffusers library &lt;d-cite key=&quot;von-platen-etal-2022-diffusers&quot;&gt;&lt;/d-cite&gt; on HuggingFace. This model was trained using the Smithsonian butterflies dataset of 1000 images hosted on HuggingFace &lt;d-cite key=&quot;huggan_dataset&quot;&gt;&lt;/d-cite&gt;. Initially, since the images in the dataset are all different, the dataset was resized to have all images conform to a square resolution, randomly flipped to augment the dataset, and normalized into the [-1,1] range. To generate “dim” images, the regular dataset was reduced in brightness by 50% using the Transforms library from HuggingFace.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:90%&quot; /&gt;&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;em&gt;Figure 3.1. Original images from dataset&lt;/em&gt; &lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:90%&quot; /&gt;&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;em&gt;Figure 3.2. Images after preprocessing&lt;/em&gt; &lt;/p&gt; &lt;p&gt;Next, noise is added to the images. For this, we use the DDPMScheduler with the default parameters from Diffusers. The model is then trained on the noisy image, and evaluated. For evaluation, the model is tested on sixteen different images previously sampled randomly from the training dataset and set aside as test images. These images are noised using the scheduler in the same way as the rest of the images, and the model is run on the noised images to retrieve the original images.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Control Parameters&lt;/th&gt; &lt;th&gt; &lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;noise_timesteps&lt;/td&gt; &lt;td&gt;50&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;num_epochs&lt;/td&gt; &lt;td&gt;50&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;beta_start&lt;/td&gt; &lt;td&gt;0.0001&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;beta_max&lt;/td&gt; &lt;td&gt;0.02&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;variance_type&lt;/td&gt; &lt;td&gt;“fixed_large”&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;resnet layers per unet block&lt;/td&gt; &lt;td&gt;2&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;&lt;em&gt;Figure 4.1. Showing default parameters used in the diffusion model &lt;/em&gt;&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:90%&quot; /&gt; &lt;em&gt;Figure 4.2. Figure depicting the UNet architecture used in the model&lt;/em&gt; &lt;/p&gt; &lt;p&gt;Initially, a quantitative method of evaluation was considered, and some losses were computed between the test images before noising and the corresponding test results after denoising. While these measurements were computed, they didn’t seem as valuable as simply looking at the image because of the various patterns between images that a loss function cannot always capture (ie how similar is the butterfly and the pattern of the butterfly to the initial image). As an example, the image on the left receives a lower mean squared error loss than the image on the right, yet looking at them, it is apparent that the denoised version on the right is better. Thus, the evaluation here mostly presents the model outputs for us to qualitatively compare across different variations.&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:=30%&quot; /&gt; &lt;em&gt;Figure 4.3. Showing two outputs of different models given the same input. MSE Loss proved to be unreliable for this task as the loss of the left image compared to the control was less than the loss of the right image due to rgb shift&lt;/em&gt; &lt;/p&gt; &lt;p&gt;After the control, this process is repeated for a variety of parameters carefully chosen and model architecture modifications to evaluate the best variation for use in this low-light scenario.&lt;/p&gt; &lt;h2 id=&quot;resultsdiscussion&quot;&gt;Results/Discussion&lt;/h2&gt; &lt;p&gt;The results of the control are as seen in the introduction above. The result of the dim images is on the left, while the result of the brighter images is on the right.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:45%; margin-right:3%&quot; /&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:45%&quot; /&gt; &lt;em&gt;Figure 5.1. The left shows the output of the control model trained on the dim images and the right shows it trained on the bright images&lt;/em&gt;&lt;/p&gt; &lt;p&gt;One of the most pressing problems seen on the dimmer images is the rgb shift. As discussed in the background, the variance, which partly controls how aggressively the model is denoised, can help with rgb shift because it larger denoising can retrieve details lost in noise. Thus, the first modification is changing the variance type from “fixed_small” to “fixed_large.” This modification, after training, resulted in the evaluation images below.&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:65%&quot; /&gt; &lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;em&gt;Figure 5.2. Result of evaluation after changing variance&lt;/em&gt; &lt;/p&gt; &lt;p&gt;As we can see, this helped greatly with the rgb shift issue, and eliminated the background discoloration for several of the images. Certain images, such as the second row on the left-most column and the third from the left on the bottom row also show huge detail improvements. For the reasons discussed earlier, this is expected as a result of larger denoising, since it can clear away ome artifacts. The only image that showed a decrease in quality after the variance change was the right-most image in the top row.&lt;/p&gt; &lt;p&gt;Now that some of the rgb shift has been resolved, we move to tackling the loss of detail in many of these evaluation images. One classic approach to loss of information is simply increasing the capacity of the model to learn. In more technical terms, by increasing the number of ResNet layers per UNet block, we may allow the model to capture more intricate features and details. Deeper layers can learn hierarchical representations, potentially improving the ability to encapsulate fine-grained information. To do this, we edit our model architecture to make each UNet block deeper.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:45%; margin-right:3%&quot; /&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:45%&quot; /&gt; &lt;em&gt;Figure 5.3. The left image shows the output of the new change in model architecture on the dimmed dataset, while the right image shows the bright dataset control output for color comparison&lt;/em&gt;&lt;/p&gt; &lt;p&gt;A huge improvement can be seen just by deepening the model architecture and at least the outline of every butterfly is now visible. However, this still hasn’t solved the problem of rgb shift. As we can see, the butterflies in the denoised dim images are all skewed yellow, while the butterflies in the denoised control bright images are all of varying colors. Next, we try to train with various betas in the scheduler to tackle this issue. As discussed before, higher beta values can help with rgb shift. However, higher values can also lead to loss of detail. The beta_start for the control was 0.0001 and the beta_max was 0.02. Thus, we try two combinations of start and max: 0.001 and 0.01, and 0.0005 and 0.015.&lt;/p&gt; &lt;p&gt;&lt;img src=&quot;/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output.png&quot; alt=&quot;Alt Text&quot; style=&quot;width:90%&quot; /&gt;&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;em&gt;Figure 5.4. The left figure shows the output for beta start = 0.001 and beta end = 0.01, and the right figure shows the output for beta start = 0.0005 and beta end = 0.15&lt;/em&gt; &lt;/p&gt; &lt;p&gt;As seen above, this modification was unsuccessful, and the images have much less detail than before and the rgb shift is worse than before. This may be because the biggest issue is the distortion of colors and blurring, and thus, a high beta value and larger denoising is needed to fix these issues rather than smaller denoising as was previously hypothesized. This future modification is not analyzed in this project, but would be interesting to see in the future.&lt;/p&gt; &lt;h2 id=&quot;future-directions&quot;&gt;Future Directions&lt;/h2&gt; &lt;p&gt;There are several limitations and future directions worth discussing. For one, this project investigates a specific model, the DDPM model. The DDPM model was chosen for various reasons, but mostly because it draws a balance between detail and also efficiency. In the future, multiple models could be considered to figure out which is really best for image generation under low-light scenarios. In addition, this work only focuses on one dataset of butterflies, and generates “low-light” data by reducing the brightness of the original dataset. This is good evidence for the success of the methods presented, but additional datasets and real data taken from environments with low-light would have lended more evidence to the success of the methods. In addition, the amount of data and depth of the models used had to be limited used to gpu usage limits. A model trained for more epochs with data may work better than this one. In addition, a good future starting point for this work would be to work with the beta start and beta max to figure out how to improve the rgb shift, which I believe would help with the detail in the dim images.&lt;/p&gt; </content> </entry> <entry> <title>Semi-Supervised Domain Adaptation using Diffusion Models</title> <link href="https://deep-learning-mit.github.io/blog/2023/Exploring-Task-Specific-Data-Augmentation/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Exploring-Task-Specific-Data-Augmentation</id> <content type="html">&lt;h1 id=&quot;abstract&quot;&gt;Abstract&lt;/h1&gt; &lt;p&gt;Recently, there has been a breakthrough in image manipulation using Contrastive Language-Image Pretraining (CLIP). Recent work shows that GANs combined with CLIP can translate the images to unseen domains &lt;d-cite key=&quot;gal2021stylegannada&quot;&gt;&lt;/d-cite&gt;. However, in many cases these manipulations destroy the important information that user might want to learn (e.g., labels). Recently, there was a paper showing image manipulation leveraging a combination of diffusion models and CLIP &lt;d-cite key=&quot;kim2022diffusionclip&quot;&gt;&lt;/d-cite&gt;. We leverage the method proposed in DiffusionCLIP paper to perform semi-supervised domain adaptation having limited labeled data. GitHub project page: https://github.com/babiakua/semi_supervised_domain_adapt.&lt;/p&gt; &lt;h1 id=&quot;introduction--background&quot;&gt;Introduction &amp;amp; Background&lt;/h1&gt; &lt;h2 id=&quot;diffusion-models&quot;&gt;Diffusion models&lt;/h2&gt; &lt;p&gt;Diffusion Denoising Probabilistic Models (DDPMs) were introduced by Ho et al. as a novel approach in the field of generative models &lt;d-cite key=&quot;ho2020denoising&quot;&gt;&lt;/d-cite&gt;. These models are built on the idea of gradually adding noise to data and then learning to reverse this process.&lt;/p&gt; &lt;p&gt;The diffusion process is a Markov chain that adds Gaussian noise to the data over a series of steps. This process can be described mathematically as:&lt;/p&gt; \[x_{t} = \sqrt{\alpha_{t}} x_{0} + \sqrt{1 - \alpha_{t}} \epsilon\] &lt;p&gt;where $x_{t}$ is the data at step $t$, $x_{0}$ is the original data, $\alpha_{t}$ is a variance schedule, and $\epsilon$ is Gaussian noise.&lt;/p&gt; &lt;p&gt;The reverse process aims to denoise the data, starting from the noisy version and progressively removing noise. It’s modeled as:&lt;/p&gt; \[x_{t-1} = \frac{1}{\sqrt{\alpha_{t}}}\left(x_{t} - \frac{1-\alpha_{t}}{\sqrt{1-\alpha_{t}}} \epsilon_{\theta}(x_{t}, t)\right)\] &lt;p&gt;with $\epsilon_{\theta}(x_{t}, t)$ being a neural network predicting the noise. This neural network usually has a UNet architecture with downsampling layers, upsampling layers, and a bottleneck.&lt;/p&gt; &lt;p&gt;The training objective is to minimize the difference between the predicted noise $\epsilon_{\theta}(x_{t}, t)$ and the actual noise $\epsilon$. This is done using a variant of the mean squared error (MSE) loss:&lt;/p&gt; \[\min_\theta \mathbb{E}_{x_0 \sim q(x_0), w \sim \mathcal{N}(0, I), t} \left\| w - \epsilon_{\theta}(x_t, t) \right\|^2_2.\] &lt;p&gt;DDIM (Denoising Diffusion Implicit Models) paper &lt;d-cite key=&quot;song2022denoising&quot;&gt;&lt;/d-cite&gt; proposed an alternative non-Markovian noising process that has the same forward marginals as DDPM but has a distinct sampling process as follows:&lt;/p&gt; \[x_{t-1} = \sqrt{\alpha_{t-1}} f_\theta(x_t, t) + \sqrt{1 - \alpha_{t-1} - \sigma_t^2}\epsilon_\theta(x_t, t) + \sigma_t^2 z,\] &lt;p&gt;where, $z \sim \mathcal{N}(0, I)$ and $f_\theta(x_t, t)$ is a the prediction of $x_0$ at $t$ given $x_t$ and $\epsilon_\theta(x_t, t)$:&lt;/p&gt; \[f_\theta(x_t, t) := \frac{x_t - \sqrt{1 - \alpha_t}\epsilon_\theta(x_t, t)}{\sqrt{\alpha_t}}.\] &lt;p&gt;DDIM process allows for the use of different samplers by setting $\sigma_t$ to different values. In particular, setting $\sigma_t = 1$ makes the process a Markov process equivalent to DDPM while setting $\sigma_t = 0$ makes the process deterministic and allows for almost perfect inversion. DiffusionCLIP method leverages the deterministic nature of the process for image manipulation.&lt;/p&gt; &lt;h2 id=&quot;image-manipulation-with-clip&quot;&gt;Image manipulation with CLIP&lt;/h2&gt; &lt;p&gt;CLIP is a model for joint image-language representations which is trained on a large dataset of image-text pairs &lt;d-cite key=&quot;radford2021learning&quot;&gt;&lt;/d-cite&gt;. Using a contrastive learning objective, it learns a joint, multimodal embedding space. The representations learned by CLIP can be used for many tasks including image manipulation and image synthesis. DiffusionCLIP uses CLIP loss to tune the image generator (e.g., a pretrained diffusion model). CLIP loss takes the following form:&lt;/p&gt; \[\mathcal{L}_{\text{direction}} (x_{\text{gen}}, y_{\text{tar}}; x_{\text{ref}}, y_{\text{ref}}) := 1 - \frac{\langle \Delta I, \Delta T \rangle}{\| \Delta I \| \| \Delta T \|}\] &lt;p&gt;where $ \Delta T = E_T(y_{\text{tar}}) - E_T(y_{\text{ref}}), \Delta I = E_I(x_{\text{gen}}) - E_I(x_{\text{ref}}). $&lt;/p&gt; &lt;p&gt;$E_I$ and $E_T$ are CLIP’s image and text encoders, $y_{\text{ref}}, x_{\text{ref}}$ are the source domain text and image, and \(y_{\text{tar}}\) is a text description of a target and \(x_{\text{gen}}\) denotes the generated image.&lt;/p&gt; &lt;h1 id=&quot;related-work&quot;&gt;Related Work&lt;/h1&gt; &lt;p&gt;Recent work in the field discovered an alternative way of manipulating image attributes using pre-trained diffusion models &lt;d-cite key=&quot;kwon2023diffusion&quot;&gt;&lt;/d-cite&gt;. The authors show that instead of tuning the model, one can modify the reverse process and guide it towards the target domain. The reverse process is guided through a lower-dimensional (compared to original latents) latent space which in this case is the bottleneck of the UNet of the original pre-trained diffusion model. Authors show that this latent space enjoys high-level semantics and linearity which allows for more flexible image manipulation.&lt;/p&gt; &lt;p&gt;Although this method is still in development (as it was our initial idea for domain adaptation which did not succeed), the latent space suggested by the authors can be used for a more powerful idea which is unsupervised domain adaptation. By smoothing the test images at appropriate noise level, one can classify whether the image possesses a given attribute. Then one can make training and test distributions close to each other by manipulating the attributes of interest. This direction is of our future interest to explore.&lt;/p&gt; &lt;p&gt;Another area of current research is trying to use GANs (also guided by the CLIP loss) for image manipulation &lt;d-cite key=&quot;gal2021stylegannada&quot;&gt;&lt;/d-cite&gt;. Using GANs allows for zero-shot image manipulation which is way faster than the diffusion models’ reverse process. However, GANs suffer from their limited inversion capability and destruction of initial image information which might be dangerous for downstream tasks (e.g., consider a classification task with GAN manipulating training image labels).&lt;/p&gt; &lt;p&gt;An alternative method for manipulating and editing images is mixing latents of source and target &lt;d-cite key=&quot;choi2020stargan&quot;&gt;&lt;/d-cite&gt;. Although this method does provide good results in terms of sample quality, it lacks control for our set-up. We would like to have control over the attributes we are changing and keep the others unchanged.&lt;/p&gt; &lt;p&gt;Another method for image editing is classifier guidance which adds classifier gradients in the reverse process to control the generation process &lt;d-cite key=&quot;dhariwal2021diffusion&quot;&gt;&lt;/d-cite&gt;. This method is unsuitable for our problem set-up since we need to train an additional classifier for the target domain, and we do not have enough data to train it.&lt;/p&gt; &lt;h1 id=&quot;our-contribution&quot;&gt;Our Contribution&lt;/h1&gt; &lt;p&gt;We demonstrate capabilities of text-guided diffusion to perform domain adaptation in a semi-supervised setting (e.g., unseen attributes of the target domain). To the best of our knowledge, this is the first work that shows the power of diffusion models in performing domain adaptation when the difference between the train and target domains can be described in a short prompt.&lt;/p&gt; &lt;h1 id=&quot;methodology&quot;&gt;Methodology&lt;/h1&gt; &lt;p&gt;A frequently encountered problem in supervised learning is one where we have training data from one domain (the source domain) but we want to conduct inference on data that comes from a different but related domain (the target domain) that can be described using text. Specifically, we want to focus on the setting where we have access to an adequate number (for training) of observations from the source domain (a subset of which are labelled) and we want to conduct inference (eg. classification) on unlabelled observations from the target domain. An additional constraint is that we only have a limited number of observations from the target domain so it is infeasible to learn the target distribution. Here, we deal with image data.&lt;/p&gt; &lt;h2 id=&quot;diffusionclip&quot;&gt;DiffusionCLIP&lt;/h2&gt; &lt;p&gt;We first train a diffusion model on both labelled and unlablled images from the source domain. This diffusion model is first used to convert input images (from source domain) to the latent. Then, the reverse path is fine-tuned to generate images driven by the target text (text decription of target domain), guided by the CLIP loss. The details are given in the subsequent sections.&lt;/p&gt; &lt;h3 id=&quot;diffusionclip-fine-tuning&quot;&gt;DiffusionCLIP Fine-tuning&lt;/h3&gt; &lt;p&gt;In terms of fine-tuning, the DiffusionCLIP model &lt;d-cite key=&quot;kim2022diffusionclip&quot;&gt;&lt;/d-cite&gt; allows for modification of the diffusion model itself as compared to the latent, enhancing its effectiveness. The process utilizes a composite objective including directional CLIP loss and identity loss for fine-tuning the reverse diffusion model parameters.&lt;/p&gt; &lt;h4 id=&quot;loss-function&quot;&gt;Loss Function&lt;/h4&gt; &lt;p&gt;The objective function is given by:&lt;/p&gt; \[\mathcal{L}_{\text{direction}} (\hat{x}_0(\theta), y_{\text{tar}}; x_0, y_{\text{ref}}) + \mathcal{L}_{\text{id}} (\hat{x}_0(\theta), x_0)\] &lt;p&gt;where $x_0$ is the original image and \(\hat{x}_0(\theta)\) is the generated image from the latent with optimized parameters $\theta$. The identity loss \(\mathcal{L}_{\text{id}}\) &lt;d-cite key=&quot;kim2022diffusionclip&quot;&gt;&lt;/d-cite&gt; aims to preserve the object’s identity post-manipulation.&lt;/p&gt; &lt;h4 id=&quot;optimization-and-identity-preservation&quot;&gt;Optimization and Identity Preservation&lt;/h4&gt; &lt;p&gt;Optimization is guided by directional CLIP loss, requiring a reference and a target text for image manipulation. The identity loss includes $\ell_1$ loss for pixel similarity and a face identity loss for maintaining recognizable human features.&lt;/p&gt; &lt;h4 id=&quot;architecture&quot;&gt;Architecture&lt;/h4&gt; &lt;p&gt;The fine-tuning involves a shared U-Net architecture across time steps, with gradient flow illustrated in Figure 1. This structure supports the transformation of images to align with target texts.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;em&gt;Figure 1. Gradient flows during fine-tuning the diffusion model with the shared architecture across t &lt;d-cite key=&quot;kim2022diffusionclip&quot;&gt;&lt;/d-cite&gt;.&lt;/em&gt;&lt;/p&gt; &lt;h3 id=&quot;forward-diffusion-and-generative-process&quot;&gt;Forward Diffusion and Generative Process&lt;/h3&gt; &lt;p&gt;Kwon et al &lt;d-cite key=&quot;kim2022diffusionclip&quot;&gt;&lt;/d-cite&gt; discusses the DDPM’s sampling process, which is inherently stochastic. This stochastic nature results in varied samples even from the same latent input. However, to leverage the image synthesis capabilities of diffusion models for precise image manipulation, the authors use DDIM’s deterministic forward process with \(\sigma_t=0\) which allows for almost perfect reconstruction. Using deterministic processes, however, limits model’s generative capability and this problem has been developed in the subsequent papers by injecting noise at specific timesteps &lt;d-cite key=&quot;kwon2023diffusion&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h4 id=&quot;deterministic-diffusion-processes&quot;&gt;Deterministic Diffusion Processes&lt;/h4&gt; &lt;p&gt;The deterministic processes are formulated as follows:&lt;/p&gt; &lt;p&gt;$x_{t+1} = \sqrt{\alpha_{t+1}}f_\theta(x_t, t) + \sqrt{1 - \alpha_{t+1}}\epsilon(x_t, t)$&lt;/p&gt; &lt;p&gt;$x_{t-1} = \sqrt{\alpha_{t-1}}f_\theta(x_t, t) + \sqrt{1 - \alpha_{t-1}}\epsilon(x_t, t)$&lt;/p&gt; &lt;h4 id=&quot;fast-sampling-strategy&quot;&gt;Fast Sampling Strategy&lt;/h4&gt; &lt;p&gt;To expedite the sampling, a ‘return step’ is introduced along with a strategy to use fewer discretization steps. This accelerates training without significantly compromising the identity preservation of the object in the image.&lt;/p&gt; &lt;p&gt;Detailed mathematical derivations and more comprehensive analyses can be found in the supplementary sections of &lt;d-cite key=&quot;kim2022diffusionclip&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h2 id=&quot;experimental-setup-and-procedure&quot;&gt;Experimental Setup and Procedure&lt;/h2&gt; &lt;p&gt;Our method is intended to be used given a setup as follows. We have a set of images from the source domain, \(\{x_i\}_{i=1}^{n}\), out of which we have labels \(\{y_i\}_{i=1}^{n&apos;}\) for a subset of them, where \(n&apos; &amp;lt;&amp;lt; n\). For simplicity, we are dealing with a binary classification task with 0-1 labels. We now want to classify test images from the target distribution, \(\{x^t_i\}_{i=1}^{m}\) (\(m &amp;lt;&amp;lt; n\)). We also have a text description of the target distribution, \(T_{target}\) (a short prompt that captures how the source and target domains differ; for example, if the source domain is images in the summer and the target domain is images in the winter, \(T_{target}\) could be “winter”).&lt;/p&gt; &lt;p&gt;We now use the images from the source domain \(\{x_i\}_{i=1}^{n}\) to train a diffusion model and use DiffusionCLIP fine-tuning to generate an image \(x&apos;_i\) from each labelled source image \(x_i\) driven by \(T_{target}\). Thus, we have created a new training dataset with the target distribution \(\{(x&apos;_i, y_i)\}_{i=1}^{n&apos;}\).&lt;/p&gt; &lt;p&gt;Now, we use supervised learning to train a model on the \(\{(x&apos;_i, y_i)\}_{i=1}^{n&apos;}\) pairs and subsequently classify the test images \(\{x^t_i\}_{i=1}^{m}\). The idea is that by shifting the distribution of training data to match that of the test data using just the text description of the target distribution, we can achieve a model that generalizes well to the target domain even in the regime of limited labelled data and target domain images without having to explicitly learn the target distribution.&lt;/p&gt; &lt;h1 id=&quot;experiment-and-results&quot;&gt;Experiment and Results&lt;/h1&gt; &lt;h2 id=&quot;problem-set-up&quot;&gt;Problem set-up&lt;/h2&gt; &lt;p&gt;We run a simple experiment to show the power of domain adaptation using our method in this setting. We consider a gender classification problem on CelebA dataset with test domain being different from the train domain.&lt;/p&gt; &lt;p&gt;Our train domain is original CelebA images while our target domain is the same images but in the “sketch” style. The “sketch” style images were generated by the same method (DiffusionCLIP) by editing the original CelebA images on the test set. This style transfer doesn’t change the face identity (including gender, which is of our interest for the given task), so we keep all the labels unchanged.&lt;/p&gt; &lt;p&gt;We have a training set of size 1,200 images and test set of size 300 images (mainly for computation reasons). Our data comes from publicly available CelebA dataset with binary attributes (including the gender attribute of interest)&lt;d-cite key=&quot;liu2015faceattributes&quot;&gt;&lt;/d-cite&gt;. We resize all the images to size 256x256 using Bilinear interpolation.&lt;/p&gt; &lt;p&gt;We use a simple CNN architecture for gender classification - three convolutional layers with increasing filter depth (32, 64, 128), each followed by a max pooling layer that halves the image dimensions, followed by 2 fully connected layers with sigmoid activation. Our experiment is ran for demonstrative purposes for the most part and does not require complex architectures. The training size of 1,200 images is additionally hinting at the necessity to scale the model complexity down for the purposes of our experiment. Our objective function is binary cross-entropy loss.&lt;/p&gt; &lt;h2 id=&quot;experimental-pipeline&quot;&gt;Experimental pipeline&lt;/h2&gt; &lt;p&gt;We run the following experiments to confirm our intuition about the method’s effectiveness:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Experiment 1 &lt;ul&gt; &lt;li&gt;Training set (1,200 labeled images) - original CelebA images&lt;/li&gt; &lt;li&gt;Test set (300 labeled images) - “sketched” images&lt;/li&gt; &lt;li&gt;We train CNN on plain CelebA images and evaluate on a shifted test domain. We use the plain CelebA test domain as a performance benchmark. We expect this model to do worse on the “sketched” test set than on the original one.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;Experiment 2 &lt;ul&gt; &lt;li&gt;Training set (1,200 labeled images) - adapted images. Original train images adapted to the “sketch” style using the method described in the subsection below. &lt;ul&gt; &lt;li&gt;Note: We keep the number of images in the train set the same as in the experiment above (e.g., we create new train images and delete the original ones instead of augmenting the data) for the clarity of the experiment. In practice, one can combine images from both domains for learning.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;Test set (300 labeled images) - “sketched” images.&lt;/li&gt; &lt;li&gt;We train the CNN on the “sketched” images now and evaluate the performance on both “sketched” and plain test sets. We expect this model to do better on the “sketched” test set which is our initial goal.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;h2 id=&quot;domain-adaptation-method&quot;&gt;Domain adaptation method&lt;/h2&gt; &lt;p&gt;To edit our images from the plain CelebA distribution to the target “sketched” distribution, we use the method proposed in DiffusionCLIP &lt;d-cite key=&quot;kim2022diffusionclip&quot;&gt;&lt;/d-cite&gt;. We used pre-trained fine-tuned diffusion model based on original diffusion model trained on CelebA images using P2 objective introduced by Choi et al &lt;d-cite key=&quot;choi2022perception&quot;&gt;&lt;/d-cite&gt;. Note that the original pre-trained P2 diffusion model was trained on the whole CelebA dataset which makes use of large amounts of unlabeled data in the train domain and is consistent with our problem set-up. The diffusion model was fine-tuned using the prompt “Sketch”. We made use of deterministic DDIM inversion process with 40 steps (instead of a 1,000 steps in the original noise schedule) and 6 generative steps.&lt;/p&gt; &lt;p&gt;Despite the sufficient computation cost savings by using the DDIM process, transforming 1,500 images took more than 6 hours on a single NVIDIA GeForce RTX 3050TI 4GB GPU. Computation time is still the main drawback of using diffusion models for image editing and this is the main reason for us to limit the total sample size to 1,500 images.&lt;/p&gt; &lt;p&gt;Note: We use the same procedure for generating test images from “technically unknown” sketch domain and adapting the training set to this domain. This assumes the user perfectly identified the prompt which describes the target domain and used it to fine-tune the pre-trained diffusion model which is unrealistic in practice. We believe, however, that for simple prompts semantic similarity of the user prompt and the word “Sketch” would allow to get adapted images similar to the target domain because of the CLIP loss properties.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;image-manipulation&quot;&gt;Image Manipulation&lt;/h3&gt; &lt;p&gt;Figure 2 shows examples of DiffusionCLIP fine-tuning applied to CelebA images, resulting in “sketched” images.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;em&gt;Figure 2. Examples of DiffusionCLIP fine-tuning.&lt;/em&gt;&lt;/p&gt; &lt;h3 id=&quot;classification&quot;&gt;Classification&lt;/h3&gt; &lt;p&gt;Figure 3 shows the performance of the CNN trained on the original CelebA images and tested on images in the source domain as well as the target domain, while Figure 4 shows the performance of the CNN trained on the adapted images.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;em&gt;Figure 3. Performance of CNN trained on original CelebA images.&lt;/em&gt;&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;em&gt;Figure 4. Performance of CNN trained on adapted images.&lt;/em&gt;&lt;/p&gt; &lt;p&gt;These results confirm our intuition that adapting our source domain to the target domain results in a non-trivial performance boost. We observe that for the initial few epochs, the performance for both the source and target domains is similar, but this gap increases as we train further. This tells us that initially, the model learns relevant “higher level” features that are present in both the domains since they are both related. However, for later epochs, the model overfits to the distribution of the training data which results in a large performance gap between the two domains. At this stage, the model is learning “lower level” features that belong to the source domain, which are different in the target domain. Thus, the performance on a shifted domain becomes worse as time goes on. If we train further, we expect to learn more lower level features of the source domain, which will enhance performance for a test set from the source domain but deteriorate performance for a test set from the target domain.&lt;/p&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;p&gt;We have shown, with a simple binary classification experiment, that the proposed domain adaptation method using DiffusionCLIP fine-tuning leads to a significant performance boost when we have training and test data sampled from different but related domains.&lt;/p&gt; &lt;p&gt;Future work in this direction might include working with the h-space proposed in &lt;d-cite key=&quot;kwon2023diffusion&quot;&gt;&lt;/d-cite&gt;. Our idea for semi-supervised domain adaptation naturally extends to unsupervised domain adaptation by leveraging the properties of this latent space. One could use this latent space as an implicit attribute classifier after smoothing the image at appropriate noise level and then balance the attributes between train and test sets in an unsupervised manner. This approach, however, requires a better implementation of the original method presented in &lt;d-cite key=&quot;kwon2023diffusion&quot;&gt;&lt;/d-cite&gt; and is not feasible as of now.&lt;/p&gt; </content> </entry> <entry> <title>The Effect of Activation Functions On Superposition in Toy Models</title> <link href="https://deep-learning-mit.github.io/blog/2023/superposition/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/superposition</id> <content type="html">&lt;h2 id=&quot;introduction-to-superposition&quot;&gt;Introduction to Superposition&lt;/h2&gt; &lt;p&gt;With the recent emergence of grokking, mechanistic interpretability research has trended towards understanding how models learn &lt;d-cite key=&quot;GrokNanda&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;Pizza&quot;&gt;&lt;/d-cite&gt;. A central concept in this pursuit is superposition - a single neuron learning multiple “features.”&lt;/p&gt; &lt;p&gt;Features are the distinguishing properties of data points, the “things” that allow a neural network to learn the difference between, say, a dog and a cat, or a Phillip Isola and a Jennifer Aniston. Features are the building blocks that determine what makes one data point different from another. In many cases, features discovered by and encoded within neural networks correspond to human-understandable ideas. For example, in language models there exist embedding vectors describing relations like gender or relative size (e.g., the famous vec(“king”) - vec(“man”) + vec(“woman”) =~ vec(“queen”)&lt;d-cite key=&quot;mikolov2013efficient&quot;&gt;&lt;/d-cite&gt;). It has been found that language models often map ideas like these to features within their parameters. Human understanding is not necessary though, as models can find and map features that exist beyond the perception of humans. This is an important part of the success (and dual inscrutability) of modern deep models, as these models can determine features and relationships within the data that allow them to model large datasets, like language, very well.&lt;/p&gt; &lt;p&gt;In this work we:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Explain Superposition, why it may occur, and why it is important&lt;/li&gt; &lt;li&gt;Motivate a framework to easily study Superposition&lt;/li&gt; &lt;li&gt;Study how activation functions affect Superposition&lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;superposition-and-previous-work&quot;&gt;Superposition and Previous Work&lt;/h2&gt; &lt;p&gt;Let us elaborate further. If you were to train some neural network and visualize the weights - chances are you would see some mess that looks like this:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/random_matrix_equation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/random_matrix_equation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/random_matrix_equation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/random_matrix_equation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;You are likely looking at superposition!&lt;/p&gt; &lt;p&gt;As hypothesized by &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, superposition is a phenomenon which occurs when the number of features being learned by a model is greater than the number of parameters in that model. To capture $n$ features with $m&amp;lt;n$ parameters, one can think of the neurons as “working overtime.” In other words, some of the neurons within a model encode information about more than one feature. The neuron exhibiting superposition operates as an information compressor. The caveat is that this compression is often unpredictable and hard to understand!&lt;/p&gt; &lt;p&gt;In a linear model, i.e., one which maps inputs to outputs with only linear functions, there are fewer parameters than the features it tries to represent, so it can only represent the top $m$ features. How then do neural networks use compression and map back to $n&amp;gt;m$ features using only $m$ parameters? The answer is non-linearity. Clearly, the activation function is key to understanding how superposition occurs - unexplored by other work in the field. &lt;d-cite key=&quot;elhage2022solu&quot;&gt;&lt;/d-cite&gt; explores the activation function in transformer MLP, but not in the setting we present here.&lt;/p&gt; &lt;p&gt;But why do we care about Superposition? Why spend time studying this?&lt;/p&gt; &lt;p&gt;While it may seem tangential, Superposition sheds important insights on Large Language Models (LLMs)! While LLMs are billions of parameters large, this is still not enough for a one-to-one mapping to “features” on the internet. Therefore LLMs also MUST exhibit superposition to learn. We focus our current work on the $\textit{bottleneck superposition}$ regime, but &lt;d-cite key=&quot;incidental&quot;&gt;&lt;/d-cite&gt; has shown that the picture is far more complicated than presented in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;. Namely, varying the initialization can change how superposition unfolds. To normalize across experiments, we initialize all weights using the Xavier norm, as outlined by &lt;d-cite key=&quot;xavier&quot;&gt;&lt;/d-cite&gt;. However, this is certainly a limitation of our presented work. A more rigourous analysis of superposition with activation functions would explore it outside the contex of the bottleneck regime. We leave this for future work.&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/feature_visual-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/feature_visual-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/feature_visual-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/feature_visual.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; From &lt;a href=&quot;https://distill.pub/2017/feature-visualization/&quot;&gt;Distill Blog&lt;/a&gt;, &quot;Feature visualization allows us to see how GoogLeNet trained on the ImageNet dataset, builds up its understanding of images over many layers. &lt;/div&gt; &lt;p&gt;Previous research, as detailed in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, has predominantly explored superposition within the confines of toy models utilizing the Rectified Linear Unit (ReLU) activation function. However, to extend these findings to contemporary neural networks, it is crucial to investigate the influence of different activation functions on superposition. Different activation functions provide different ways for a model to use superposition to its advantage.&lt;/p&gt; &lt;p&gt;So you train a neural network - what happens at the neuron level? There are three possibilities. As the network trains each neuron has three choices:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;The neuron chooses not to encode the “features”&lt;/li&gt; &lt;li&gt;The neuron chooses to dedicate itself to one feature&lt;/li&gt; &lt;li&gt;The neuron chooses to encode multiple features&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;(We anthropomorphize - The neuron doesn’t choose to do anything - there is no free will - you are born into a loss landscape and an optimizer telling you what to do.)&lt;/p&gt; &lt;p&gt;In linear models, each neuron is limited to representing only the most significant features (2), discarding others (1). Conversely, superposition, enabled by non-linear activation functions, adopts a more inclusive approach (3), trying to encode multiple features per neuron and learning efficient representational shortcuts.&lt;/p&gt; &lt;p&gt;While ReLU bears similarity to the Gaussian Error Linear Unit (GeLU) used in modern GPT architectures, a deeper understanding of how different nonlinear activations impact superposition can provide crucial insights. Such understanding is key to unraveling the complex mechanisms through which neural networks utilize non-linearities, a cornerstone in the broader narrative of neural network interpretability.&lt;/p&gt; &lt;h3 id=&quot;monosemanticity-and-polysemanticity&quot;&gt;Monosemanticity and Polysemanticity&lt;/h3&gt; &lt;p&gt;To connect to existing literature (2) and (3) above are given the names monosemanticity and polysemanticity. We will also follow this notation going forward.&lt;/p&gt; &lt;p&gt;To describe further, the idea of superposition in neural networks leads us to two distinct types of neuron behaviors: monosemanticity and polysemanticity.&lt;/p&gt; &lt;p&gt;Monosemantic neurons are those that specialize in a single, distinct feature, acting as dedicated detectors. This characteristic is often observed in the intermediate layers of architectures like Convolutional Neural Networks (CNNs), where neurons become adept at recognizing specific patterns, such as curves or colors. Polysemantic neurons do not align with just one feature but engage with multiple features simultaneously, offering a broader and more nuanced understanding of the data. This trait is essential for handling complex, high-dimensional datasets but comes at the cost of reduced interpretability.&lt;/p&gt; &lt;h2 id=&quot;motivation-and-notation&quot;&gt;Motivation and Notation&lt;/h2&gt; &lt;p&gt;Our work extends the work done in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt; by examining how the changing of the activation function on toy model networks affects the behavior and interpretability of these networks. &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt; uses the canonical ReLU activation function to add non-linearity to two-layer models to analyze how superposition occurs within small networks. They did not generalize their work to other activation functions, which we find, result in &lt;strong&gt;distinct&lt;/strong&gt; new phenomenon. Our work compares the ReLU function with five other common activation functions: GeLU, SiLU, Sigmoid, Tanh, and SoLU. We hope that generalizing the phenomenon across activation functions can push the toy dataset to be in closer to realistic ML settings.&lt;/p&gt; &lt;h3 id=&quot;problem-specification&quot;&gt;Problem Specification&lt;/h3&gt; &lt;p&gt;The models in this experiment will be learning how to replicate a length-$n$ vector of inputs in the range $[0, 1]$ with a compression to a length-$m$ embedding (where $n&amp;gt;m$). The model will then use the length-$m$ embedding to recreate the length-$n$ input, using a non-linear activation function to allow for superposition.&lt;/p&gt; &lt;p&gt;We will run two variations of the experiment. One variation of the experiment will involve compressing inputs of size $n=10$ to an embedding of size $m=5$. This experiment aims to see how superposition occurs across many features which are encoded in a bottleneck with half the number of spots as there are features. The second variation of the experiment will involve compressing inputs of size $n=2$ to an embedding of size $m=1$. This experiment aims to understand precisely how the model encodes the second “extra” feature in a variety of settings.&lt;/p&gt; &lt;p&gt;To set up this experiment, we need to create a dataset that allows for superposition to occur and that also allows for interpretability of the superposition. To motivate this further, we begin with a careful discussion of features.&lt;/p&gt; &lt;h3 id=&quot;features&quot;&gt;Features&lt;/h3&gt; &lt;p&gt;Features are the salient “things” that a neural network learns to differentiate inputs &lt;d-cite key=&quot;features&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Technically, features are the properties which neural networks try to extract from data during learning to compress inputs to useful representations during inference. Although features can map to human-understandable concepts (e.g., dog ears), they can also represent properties of the data that are not immediately apparent to the human brain. To experiment with superposition, we need to encode features in a way that we can understand. In other words, we do not want our experimental model to learn features that we are unaware of. This would make it hard for us to interpret how the model maps features in the data to embeddings within its parameters, consequently obscuring how superposition works. To this aim, we must generate features within the training set for our model which are simple and understandable to us a priori. Similar to &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, we use as each input a vector with entries drawn independently from a uniform distribution over $[0, 1]$. Making each entry independent of the others enforces that each entry is its own (artificial) feature with no correlation to the other features.&lt;/p&gt; &lt;p&gt;Here we define two important augmentations that we used in the dataset to simulate real-world features: sparsity and importance.&lt;/p&gt; &lt;h4 id=&quot;sparsity&quot;&gt;Sparsity&lt;/h4&gt; &lt;p&gt;Sparsity is a measure of how often a specific feature is present in a dataset. A feature is characterized as “sparse” if it only appears in a small fraction of the inputs to the model. Similarly, features that are “dense” appear in many of the inputs. We will also use the term ‘density’, which is the complement of sparsity, defined as $1-S$.&lt;/p&gt; &lt;p&gt;Specifically, a feature with a sparsity of $S \in [0, 1]$ has a probability $S$ of being expressed in any given input. If we have $S=0$, this means that the feature is expressed in every input, whereas if we have $S=0.5$, this means that the feature is expected to be expressed in about half of the inputs.&lt;/p&gt; &lt;p&gt;In our experiment, we train models at different sparsities to capture how sparsity affects superposition.&lt;/p&gt; &lt;h4 id=&quot;importance&quot;&gt;Importance&lt;/h4&gt; &lt;p&gt;Not all features are created equal!&lt;/p&gt; &lt;p&gt;Some features are more useful than others in determining relevant information about inputs. For instance, when building a dog detector - capturing features related to dogs’ faces are extremely important! A model would need to pick up salient features of dogs, perhaps floppy ears and snouts. Other features, like the grass a dog is sitting on or a frisbee in a dog’s mouth, may not be as useful for detecting a dog. The varying degrees of usefulness among features are encapsulated in the concept of “importance”.&lt;/p&gt; &lt;p&gt;In the context of feature detection by a neural network, importance plays a role in modulating which features are encoded within the embedded layers of the network. In the context of the superposition hypothesis, if one feature has more importance than another feature, then it would be inefficient for the network to map both features equally within the embedding; allocating more weight to the feature with greater importance would be more valuable to the network in minimizing error.&lt;/p&gt; &lt;p&gt;In our experiment, we give each input feature a different importance to allow the models to differentiate between them. We will examine when and how the model justifies mapping multiple features of differing importances to the same neuron, i.e., we will observe the superposition of features with differing importances.&lt;/p&gt; &lt;h3 id=&quot;dataset&quot;&gt;Dataset&lt;/h3&gt; &lt;p&gt;To run this experiment, we will synthetically generate data that has desired sparsity and importance properties.&lt;/p&gt; &lt;p&gt;Each input $x$ will be a vector of length $n$. Each element $x_i$ in the vector will be drawn independently from the other elements in the uniform range $[0, 1]$. As discussed before, we can now synonymously refer to each of these elements as features, given their independent generation. (We will refer to them as features from this point onwards.)&lt;/p&gt; &lt;p&gt;Each feature $x_i$ in the vector has a relative importance to each of the other features $x_{j\ne i}$. The importance of feature $x_i$ is $I_i = r_I^i$ where $r_I\in(0, 1)$ is a constant describing the relative decay of importance between neighboring features. This attribute of the data will be implemented in the loss function (see below for more details).&lt;/p&gt; &lt;p&gt;We will train separate models for each of the varying levels of sparsity. For an input $x$ with sparsity $S$, each feature $x_i$ will take on its “true” value, a uniformly distributed number, with a probability of $1-S$ and will otherwise be set to 0 with a probability of $S$.&lt;/p&gt; &lt;p&gt;Below is a visualization of two batches of inputs with respective sparsities $S=0.5$ and $S=0.99$.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/input_batches-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/input_batches-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/input_batches-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/input_batches.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Each column of the plots represents a feature vector of length 20. Each batch has size 100, corresponding to the number of columns in the plots. Notice how the changing in sparsity affects the feature density. &lt;/div&gt; &lt;h3 id=&quot;network&quot;&gt;Network&lt;/h3&gt; &lt;p&gt;Below are the architectures of the base (linear) and experimental (non-linear) models that we are using in this experiment. Of particular note is the activation function $\mathbb{f}$, which we will substitute using the aforementioned activation functions.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Linear Model&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Activation ( $\mathbb{f}$ ) Output Model&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(h = Wx\)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(h = Wx\)&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(x&apos; = W^T h + b\)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(x&apos; = f(W^T h + b)\)&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(x&apos; = W^T Wx + b\)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(x&apos; = f(W^T Wx + b)\)&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Autoencoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Autoencoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Autoencoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/Autoencoder.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;We create an autoencoder - compressing down to induce polysemanticity. This maps $x$ to a direction in a lower-dimensional space, represented by \(h = Wx\). Each column of $W$ corresponds to a lower-dimensional representation of a feature in $x$. To reconstruct the original vector, $W^T$ is used, ensuring clear feature representation correspondence. This structure results in a symmetric matrix $W^TW$ and allows for clear visualization of the weights. They visually allow for the determination of the presence of superposition.&lt;/p&gt; &lt;h3 id=&quot;loss&quot;&gt;Loss&lt;/h3&gt; &lt;p&gt;Sparsity, Importance and Our Network come together in the following loss function:&lt;/p&gt; \[L = \sum_{i} \sum_{x} I_{i}(x_{i} - x&apos;_{i})^{2}\] &lt;p&gt;Motivated by &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, we use a standard MSE loss, where $x_i$ and $x_i’$ measure the absolute difference in the auto-encoding of the datapoint. The Importance factor, $I_i$ , describes how important the given reconstruction is. A smaller importance will allow loss minimization even with a poor reconstruction.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;Below we present each activation function, along with plots depicting how training results in superposition at varying degrees of sparsity.&lt;/p&gt; &lt;p&gt;For the $n=10, m=5$ experiment, we show the $W^TW$ matrix and neuron feature distribution at varying degrees of sparsity. The $W^TW$ matrix reveals which features are prioritized (shown by the diagonal terms) and any polysemanticity that occurs (shown by the off-diagonal terms). The neuron feature distribution shows how each of the $m=10$ features are mapped to each of the $n=5$ embedding dimensions. This can aid in understanding under what conditions polysemanticity arises and how it occurs under each condition of sparsity.&lt;/p&gt; &lt;p&gt;For the $n=2, m=1$ experiment, we show a phase diagram. This phase diagram shows how the second “extra” feature of the length-2 input vector is encoded. There are three options: not encoded at all (only the first feature is encoded), encoded in superposition with the first feature, and encoded as the only feature (the first feature is not encoded).&lt;/p&gt; &lt;h3 id=&quot;relu&quot;&gt;ReLU&lt;/h3&gt; &lt;p&gt;The ReLU (Rectified Linear Units) activation function is a piecewise-linear function, a simple non-linearity that allows models to use superposition of features. ReLU was the only activation function used in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, so our work with the ReLU function was primarily to verify the results from their work and create a baseline for our subsequent experiments.&lt;/p&gt; &lt;p&gt;The following are the $W^TW$ matrices and feature-neuron mappings:&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; ReLU $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_relu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_relu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_relu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_relu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;As per the results in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, the ReLU model focuses on the most significant features in the low sparsity regime (generally resulting in monosemanticity), while relying on superposition in the high sparsity regime (polysemanticity). With weaker signals for the most important features in the high sparsity regime, the model encodes multiple features in each neuron activation to minimize error of the sparse signals. Notably, the ReLU model uses antipodal pairs in the mapping of features to encode multiple features to single neurons. This can be seen as a light-colored diagonal entry within $W^T W$ and a corresponding dark-colored off-diagonal entry within the same column. This antipodal mapping of features is a method that the model uses to compress more than one feature to one neuron. This antipodal mapping is more interpretable than other kinds of polysemanticity which occurs in subsequently-described activation functions which “speckle” multiple features into a single neuron, making it more difficult to determine how the superposition occurs in that model.&lt;/p&gt; &lt;p&gt;The following is the phase diagram of the ReLU models:&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_relu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_relu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_relu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_relu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;In regimes of high sparsity (i.e., below $1-S=0.1$ on the phase diagram above) the ReLU models are highly polysemantic for all relative feature importances, reflecting an inability to encode features with a sparse signal. In regimes of low sparsity, the model generally embeds the more important of the two features. This result mirrors the phase diagram in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt; as expected.&lt;/p&gt; &lt;h3 id=&quot;gelusilu&quot;&gt;GeLU/SiLU&lt;/h3&gt; &lt;p&gt;The GeLU (Gaussian Error Linear Units) and SiLU (Sigmoid Linear Units) activation functions are very similar to one another, and as a result produced very similar experimental results. Both functions are akin to a “smoothed out” version of the ReLU function, i.e., they have no discontinuities. The GeLU has recently been popularized as the activation function of choice in many transformers, including BERT &lt;d-cite key=&quot;Devlin2019BERTPO&quot;&gt;&lt;/d-cite&gt; and GPT &lt;d-cite key=&quot;gpt&quot;&gt;&lt;/d-cite&gt;. The GeLU is differentiable for all $x$ - and has a smoother curve than the SiLU (Swish) activation. &lt;d-cite key=&quot;elhage2022solu&quot;&gt;&lt;/d-cite&gt; found that in the setting of transformers, the GeLU was less interpretable than the SoLU. This may be the case after having many linear layers activation - but with a single layer this is not the case.&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; GeLU $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_gelu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_gelu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_gelu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_gelu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; SiLU $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_silu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_silu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_silu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_silu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The GeLU and SiLU models exhibit similar kinds of superposition in their weight matrices. With increasing sparsity, superposition of features does happen, but it is more “strict” than the ReLU model, generally mapping at most two features to any single neuron. In each of the polysemantic neurons, though, there is one feature that dominates, suggesting that these activation functions enforce sparsity in their activations. There are also many antipodal pairs of features within these models, reiterating the behavior that exists in the ReLU models (also found in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;).&lt;/p&gt; &lt;div class=&quot;row mt-0 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-2 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_gelu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_gelu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_gelu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_gelu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-2 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_silu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_silu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_silu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_silu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-0 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-2 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The above phase diagrams of the GeLU and SiLU models show a marked difference from that of the ReLU model (earlier), despite the similar shapes of these three activation functions. The GeLU and SiLU models exhibit significant monosemanticity at high degrees of sparsity, unlike the ReLU, which results in near-complete polysemanticity for sparsities higher than $S=0.9$. This differnce may reflect SiLU’s and GeLU’s better fit as an activation for picking up the signal in sparse feature representations, making the case for GeLU and SiLU as more interpretable activation functions within larger models.&lt;/p&gt; &lt;h3 id=&quot;sigmoid&quot;&gt;Sigmoid&lt;/h3&gt; &lt;p&gt;The Sigmoid function is a smooth activation function with an output range of $(0, 1)$. This maps directly to the desired range of values that the model is trying to replicate.&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; Sigmoid $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The Sigmoid model exhibits superposition in all neurons as soon as the sparsity is non-zero, as can be seen from the “speckling” of non-zero off-diagonal terms in $W^T W$. This is a difference from the ReLU/GeLU/SiLU models, for which the superposition “leaks” into the least significant encoded features at low, non-zero sparsities and eventually affects all features at higher sparsities. This low-sparsity superposition may occur because the Sigmoid function strictly maps to $(0, 1)$, with increasingly large pre-activation inputs necessary to map to values close to 0 and 1. As such, the model may be “speckling” the off-diagonal values in an attempt to “reach” these inputs which are close to 0 and 1.&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_sigmoid-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_sigmoid-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_sigmoid-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_sigmoid.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Despite differences in the occurrence of polysemanticity, the ReLU and Sigmoid models exhibit very similar phase diagrams, reflecting an inability to encode multiple features at sparsities above $S=0.9$ (i.e., below $1-S=0.1$ on the phase diagram). As discussed above, this may be caused by the vanilla sigmoid activation’s inability to “reach” target values close to 0 or 1.&lt;/p&gt; &lt;h3 id=&quot;tanh&quot;&gt;Tanh&lt;/h3&gt; &lt;p&gt;The Tanh function is another smooth activation function, but it results in significantly different behavior from the Sigmoid (despite being a linear mapping of the Sigmoid).&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; Tanh $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_tanh-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_tanh-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_tanh-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_tanh.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;With the Tanh activation function, the models prioritize the most important features regardless of sparsity. This behavior is possibly attributed to the range that the Tanh function maps to $(-1, 1)$, while the target range of input values in this experiment are $[0, 1]$. This behavior is similar to that of a linear model (i.e., no activation function) which exhibits no capability to use superposition, but the phase diagram reveals subtle differences from the linear model results.&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_tanh-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_tanh-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_tanh-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_tanh.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Although nearly performing as the linear model would, only encoding the most important feature, there is some difference to the linear model along the boundary between features, as can be seen around the importance of 1. This reflects the model’s ability to use non-linearity to perform superposition.&lt;/p&gt; &lt;h4 id=&quot;a-note-on-sigmoid-and-tanh&quot;&gt;A Note on Sigmoid and Tanh&lt;/h4&gt; &lt;p&gt;Despite similarities in the S-like curvature of the Sigmoid and Tanh activation functions, the Sigmoid model exhibits superposition, whereas the Tanh model exhibits nearly zero superposition. A key difference between the two functions is the fact that the Sigmoid function maps inputs to a range of $(0, 1)$, while the Tanh function maps inputs to a range of $(-1, 1)$. This difference is significant in our experiment, as our experiment uses models to recreate random vectors with elements in the range $[0, 1]$. The range of the Sigmoid function matches this range, while the range of the Tanh function which matches this range only occurs for non-negative inputs to the Tanh function. In other words, the $(-\infty, 0)$ input domain (which maps to the range $(-1, 0)$) of the Tanh function remains useless for prediction of values which should be in the range $[0, 1]$. Therefore, the tanh function empirically acts like a linear function (i.e., no activation layer).&lt;/p&gt; &lt;h3 id=&quot;solu&quot;&gt;SoLU&lt;/h3&gt; &lt;p&gt;The SoLU (Softmax Linear Units) activation function is based on the work from &lt;d-cite key=&quot;elhage2022solu&quot;&gt;&lt;/d-cite&gt;. \(Solu(x) = x * softmax(x)\) SoLU is a function for which the activation of each neuron is dependent on all the other neurons within its own layer. This is significantly different from all the other activations that we tested, as the activations of neurons with the other functions are independent of the other neurons within the same layer. In other words, all the other activation functions are univariate while the SoLU is multivariate. Similar to other approaches like L1 regularization, the SoLU amplifies neurons with relatively large pre-activations and de-amplifies neurons with relatively smaller pre-activations. This behavior pressures the model to be more monosemantic (and therefore more interpretable in some settings), as discussed in &lt;d-cite key=&quot;elhage2022solu&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; SoLU $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_solu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_solu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_solu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/Sparsity_super_solu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;In our experiment, the SoLU model results in non-zero superposition of all features with all degrees of sparsity. This may be attributed to the way that the SoLU “forces” activations to be sparse, i.e., the activations result in a “winner-takes-all” behavior due to the way that the Softmax function works. This is not a useful property for prediction of a vector of independently-drawn values, as the input vectors are unlikely to be peaky, i.e., the SoLU does not quite fit the purposes of its task.&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_solu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_solu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_solu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/phase_51_solu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-superposition/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-superposition/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;As seen in the heatmap plot above, the SoLU activation results in very polysemantic behavior. This function is not precisely fit for its task of recreating given vectors and likely results in using polysemanticity to attempt to pass information about inputs forward. Curiously, the SoLU models have preference for the more important feature in the low sparsity regime.&lt;/p&gt; &lt;h3 id=&quot;bringing-them-all-together&quot;&gt;Bringing Them All Together&lt;/h3&gt; &lt;div class=&quot;caption&quot;&gt; Sparsity vs Dimensions Per Feature &lt;/div&gt; &lt;div class=&quot;l-page&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-10-superposition/file.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;p&gt;The diagram above depicts a variation on the two experiments explained thus far. In this experiment $n=200$ features were compressed to $m=20$ features and the loss function was tweaked to give uniform importance $I_i = 1$ to all features. This was done to determine how each activation functions compresses features in different sparsity regimes without the influence of feature importance.&lt;/p&gt; &lt;p&gt;On the y axis, the plot depicts a metric (dimensions per feature) that measures the number of dimensions a model dedicates to each feature. In other words, a point with a y-value near 1 represents a model that dedicates one dimension of its embedding space to one feature, whereas a point with a y-value near 0.25 represents a model that represents four features at each dimension.&lt;/p&gt; &lt;p&gt;The plots are generally consistent with the analysis from the previous experiments. Many of the activations result in superposition in the low-density/high-sparsity regime, and increases in sparsity result in increases in the polysemanticity of the model (i.e., the dimensions per feature decrease). Consistent with the other experiments, SiLU and GELU perform very similarly. The Sigmoid and SoLU activations pack nearly 20 features per dimension at high sparsities. The Tanh activation exhibits behavior similar to the linear model, neatly packing one dimension with one feature, a result that is mirrored in the previous experiments. Similar to the results in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, we see “sticky” behavior of the ReLU activation function at 1 and 0.5 dimensions per feature. This can be explained by the phenomenon of “antipodal pairs” discussed in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;. None of the other activation functions that we tested exhibit this behavior - which is striking since this is a well-studied effect for the ReLU activation function. This may be because the ReLU activation function is the only one that is not smooth, and therefore has a differentiable behavior than the other activation functions.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Our investigation into the effects of various activation functions reveals that significant changes occur in model behavior depending on the chosen function. This finding underscores the ability to modulate the degree of superposition through the selection of activation functions, highlighting yet unexplored degrees of freedom in model design. This line of inquiry goes seamlessly with considerations of how neural networks are initialized and trained, suggesting these as promising future research directions.&lt;/p&gt; &lt;p&gt;Our work is limited by the breadth of activation functions that we tested, though. Further iterations on each of the activation functions (e.g., tweaking the Sigmoid function to map to the range $(-\epsilon, 1+\epsilon)$) could prove fruitful in getting better performance from the models. Furthermore, while writing this blog, &lt;d-cite key=&quot;incidental&quot;&gt;&lt;/d-cite&gt; published a new key insight related to the importance of initialization in superposition, which we do not explore here. Despite this, we have learned valuable insights about the effects that our set of activation functions can have on superposition.&lt;/p&gt; &lt;p&gt;Pursuing enhanced interpretability, however, does not come without its challenges. Specifically, striving for transparency and understandability in neural network models raises concerns about the potential for deception. Despite these challenges, our work aims to develop neural network models that are more interpretable, transparent, and secure.&lt;/p&gt; &lt;ol class=&quot;bibliography&quot;&gt;&lt;/ol&gt; </content> </entry> <entry> <title>Gradient-Boosted Neural Wavlet Interpolation for Time Series (G-BiTS)</title> <link href="https://deep-learning-mit.github.io/blog/2023/distill-example/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/distill-example</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Energy companies struggle with energy allocation. The power grid contains a multitude of homes, schools, and offices all which require different amounts of power draw and capacity. As the current grid stands, the control loop is running on old data and isnt adequately reactive to sudden spikes, as well the inability to properly model trends. Energy forecasting is the means by which we work to rectify that gap. Energy forcasting is a blanket umbrella term coming from general forcasting of any time series data. There are a lot of methods currently available, ranging from purely statistical models up to deep neural networks. At the moment, the SOTA in predictive modeling from statistical models is SARIMAX: Seasonal Autoregressive Integrated Moving Average Exogenous. In deep learning, the SOTA is N-HiTS[1]. Both work well in most circumstances, but there is a lot of work to improve upon the current performance given we want to generate better embeddings to decrease loss through the energy grid. There has been great performance boosts associated with combinding the strengths of the different methods, and that is part of what this paper explores. Another big target: as it stands the current flavors of N-HiTS dont touch upon the further work reccomendations from the original paper. This includes advanced interpolation, moving away from the current linear interpolation for the Block modules and moving towards incorporating wavelet decomposition and transforms to help convert the signal into a form that makes it much easier to deliver robust data. I propose gradient-boosted neural wavlet interpolation for time series (G-BiTS) as a new entry to forcasting models relying on a mix of statistical and neural network based models. G-BiTS expands upon N-HiTS which stands for neural basis expansion analysis for interpretable time series. N-HiTS decompose time series into a set of basis functions, capturing and interpreting temporal patterns. This paper explores ensembling methods and time series analysis.&lt;/p&gt; &lt;h2 id=&quot;related-works&quot;&gt;Related Works&lt;/h2&gt; &lt;p&gt;The main related works relate to the following topics: SARIMAX, N-HiTS, and GBM. SARIMAX stands for seasonal autoRegressive integrated moving average with exogenous variables model. Each element of the SARIMAX are all important in the following ways. AutoRegressive: captures the relationship between an observations at various lags. Integrated: the differencing of raw observations to make the time series stationary. Moving Average: the relationship between an observation and a residual error from a moving average model applied to lagged observations. Seasonal: accounts for seasonality in data, like weekly, monthly, or yearly patterns. Exogenous Variables: These are external variables or predictors that aren’t part of the time series itself but are believed to have an impact on it. This is mainly represented in time series analysis by date information with respect to variables unrelated to the power, but can be used to model a common behavior. The biggest flaw with SARIMAX comes from its inability to model more than one seasonality, hampering predictions. A more robust model is N-HiTS which stands for neural basis expansion analysis for interpretable time series forecasting. The best benefit from N-HiTS comes from its ability to learn rich embeddings for time series that properly represent all of the trends and seasonalities inherent to the data, while also producing gains through being able to apply much more data as it is made for longer range predictions. N-HiTS is good, and this paper will be exploring a multiforld extension using gradient boosting [2] and adaptive ensembling[3]. Gradient boosting generates good predictions by training decision trees sequentially. A new tree is modeled on the residual errors made by the preceding trees. Finally, tying everything all together we have wavelet transforms. Wavelets are wave-like oscillations that represent data at various scales effectively. GBMs help us take advantage of a repeated pattern of smooth behavior interrupted by sudden changes or transients in time series data.&lt;/p&gt; &lt;h2 id=&quot;g-bits&quot;&gt;G-BiTS&lt;/h2&gt; &lt;p&gt;This paper proposes a new deep learning framework powered by gradient boosting and signal pre-processing G-BiTS. G-BiTS stands for Gradient-Boosted Neural Wavlet Interpolation for Time Series. G-BiTS builds upon the success of N-HiTS and explores a question posed by the authors in the original paper on replacing the existant sequential projections from the interpolation functions onto wavelet induced spaces, getting high resolution output. G-BiTS is an ensemble model, which is where gradient boosting comes in. The maximum of the combined predictions is taken for adaptive ensembling and higher performance as well as generatily. Max can be min or mean, just depends on the use case and having higher output in this circumstance if perfered. The hope is to use the hourly modeling capabilities of light gradient boosting machines with the versatility of N-HiTS to create a robust ensemble model.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;The testing for the comparisions of the different forcasting methods is based on the BuildingsBench dataset. Specifically, this paper surveys office buildings withing the Fox subsection from the original input. The data includes buildings with energy data that has multiple seasonalities, mostly hourly, daily, weekly, and monthly. Looking at the data, there are some interesting patterns. These are the average skew and kurtosis values for the data: high skew and kurtosis.&lt;/p&gt; &lt;p&gt;Skewness: 1.1118040201238155 Kurtosis: 3.452262511716185&lt;/p&gt; &lt;p&gt;Statistical analysis also shows that the data was not drawn from a normal ditribution and is not stationary, so the variance and mean were not constant throughout the time series.&lt;/p&gt; &lt;p&gt;Our baseline is simply copying over the values from the previous week and repeating the same for the following week. Non-baseline models tested include the previously mentioned SARIMAX, N-HiTS, LGBM, and G-BiTS. The following are the respective errors from each building ordered as mean average error, root mean squared error, and mean average percent error.&lt;/p&gt; &lt;h3 id=&quot;building-id-margarita&quot;&gt;Building ID: Margarita&lt;/h3&gt; &lt;p&gt;SARIMAX (211.47498604910714, 249.84373502456708, 11.805270962305448)&lt;/p&gt; &lt;p&gt;NHITS (21.72069293617509, 27.65604571924576, 1.6335940075280377)&lt;/p&gt; &lt;p&gt;LGBM (33.16067034334621, 41.84784011583212, 2.0058567433490087)&lt;/p&gt; &lt;p&gt;GBITS (26.955107763269822, 31.504577778268615, 1.6841760555882481)&lt;/p&gt; &lt;h3 id=&quot;building-id-loreta&quot;&gt;Building ID: Loreta&lt;/h3&gt; &lt;p&gt;SARIMAX (2966.2653087797617, 3513.45974924458, 12.756417057832824)&lt;/p&gt; &lt;p&gt;NHITS (203.50202658318491, 338.92442661325015, 1.0121962487927345)&lt;/p&gt; &lt;p&gt;LGBM (419.71931531784384, 476.48902925976694, 1.8085151798175159)&lt;/p&gt; &lt;p&gt;GBITS (215.94950733822594, 264.7384239183662, 0.9401638424018465)&lt;/p&gt; &lt;h3 id=&quot;building-id-gaylord&quot;&gt;Building ID: Gaylord&lt;/h3&gt; &lt;p&gt;SARIMAX (1220.2237444196428, 1479.439585459469, 8.095511476323951)&lt;/p&gt; &lt;p&gt;NHITS (137.39752238818102, 203.64435240098928, 0.8720707702102791)&lt;/p&gt; &lt;p&gt;LGBM (347.0178199198448, 435.19043719851146, 2.3137853719619144)&lt;/p&gt; &lt;p&gt;GBITS (21.02548764010548, 27.84334532157823, .73338746467575437)&lt;/p&gt; &lt;h2 id=&quot;analysis&quot;&gt;Analysis&lt;/h2&gt; &lt;p&gt;Across the board, SARIMAX perfofmed the worst, followed closely by NHiTS and LGBMs. The biggest issue with SARIMAX is that it can only take a very limited amount of data, as well as being unable to model multiple seasonalities. G-BiTS showed good adaptability as one model over the large dataset was able to get transferable and adaptible embeddings. The wavelet transforms showed the greatest gains from the interpolation stage as the two level smoothing helped the N-HiTS model better fit the unstationary data. N-HiTS as expected performs well across the board too and had the best time modeling the data.&lt;/p&gt; &lt;h2 id=&quot;conclusions&quot;&gt;Conclusions&lt;/h2&gt; &lt;p&gt;There is more work to be done to extend this research topic. Mainly, finding better wavelet decompositions and symmetric recompositions for modeling multiple seasonalities faster and in a more efficient manner. The decomposition showed the biggest gain and confirms the original papers thoughts about the approach. Boosting helped standardize the model and generated really interesting embeddings through the initial wavelet based N-HiTS.&lt;/p&gt; &lt;h2 id=&quot;bibliography&quot;&gt;Bibliography&lt;/h2&gt; &lt;p&gt;[1]&lt;/p&gt; &lt;p&gt;N-HiTS: Neural Hierarchical Interpolation for Time Series Forecasting&lt;/p&gt; &lt;p&gt;Cristian Challu, Kin G. Olivares, Boris N. Oreshkin, Federico Garza, Max Mergenthaler-Canseco, Artur Dubrawski&lt;/p&gt; &lt;p&gt;https://arxiv.org/abs/2201.12886&lt;/p&gt; &lt;p&gt;[2]&lt;/p&gt; &lt;p&gt;Gradient Boosting Neural Networks: GrowNet&lt;/p&gt; &lt;p&gt;Sarkhan Badirli, Xuanqing Liu, Zhengming Xing, Avradeep Bhowmik, Khoa Doan, Sathiya S. Keerthi&lt;/p&gt; &lt;p&gt;https://arxiv.org/abs/2002.07971&lt;/p&gt; &lt;p&gt;[3]&lt;/p&gt; &lt;p&gt;Adaptive Ensemble Learning: Boosting Model Performance through Intelligent Feature Fusion in Deep Neural Networks&lt;/p&gt; &lt;p&gt;Neelesh Mungoli&lt;/p&gt; &lt;p&gt;https://arxiv.org/abs/2304.02653&lt;/p&gt; </content> </entry> <entry> <title>Challenges in Deep Learning Surrogates for Constrained Linear Optimization</title> <link href="https://deep-learning-mit.github.io/blog/2023/Physics-Informed-Primal-Dual-Learning/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Physics-Informed-Primal-Dual-Learning</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;Physics-informed machine learning has emerged as an important paradigm for safety-critical applications where certain constraints must be satisfied.&lt;/p&gt; &lt;p&gt;The goal of this project is to learn a deep learning surrogate for a linear programming optimization problem with hard constraints. The overall approach is inspired by standard KKT conditions. This project will attempt a different DNN approach that aims to predict basic feasible solutions (BFS), and then benchmark it against a modern optimization solver. This project will highlight challenges in designing deep learning LP surrogates.&lt;/p&gt; &lt;p&gt;Due to computing resource limits, the focus on the project will be more about broad training strategy choices (“discrete” architecture choices), instead of a systematic sweep of hyperparameters.&lt;/p&gt; &lt;h3 id=&quot;optimization-problem&quot;&gt;Optimization problem&lt;/h3&gt; &lt;p&gt;We are interested in learning to optimize this linear program with $n$ variables and $m$ equality constraints:&lt;/p&gt; \[\begin{aligned} \min \quad &amp;amp;c^T y \\ \text{s.t. } &amp;amp;Ay = b, (\lambda) \\ &amp;amp;x \geq 0 \end{aligned}\] &lt;p&gt;The KKT conditions are:&lt;/p&gt; &lt;p&gt;\(\begin{aligned} \quad Ay &amp;amp;=b, \\ A^T\lambda + s &amp;amp;= c, \\ y_i s_i &amp;amp;= 0, \forall i \in [n], \\ y, s &amp;amp;\geq 0 \end{aligned}\) &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;/p&gt; &lt;h1 id=&quot;literature-review&quot;&gt;Literature review&lt;/h1&gt; &lt;p&gt;Fundamental connections between deep learning and the polyhedral theory central to optimization has been noted in &lt;d-cite key=&quot;huchette2023deep&quot;&gt;&lt;/d-cite&gt;, which makes theoretical connections such as bounds on the number and shapes of linear regions expressed in a deep neural net. Beyond linear problems, &lt;d-cite key=&quot;amos2023tutorial&quot;&gt;&lt;/d-cite&gt; surveys approaches that exhibit “amortized optimization”, i.e. incurring larger upfront training times to learn parameters that (hopefully) can generalize sufficiently to novel problems within some set of specially-structured problems; and this upfront training can result in a model that may be orders of magnitude faster at inference time compared to classical (often iterative-based) models.&lt;/p&gt; &lt;p&gt;Previous literature on machine learning for linearly-constrained optimization problems could be categorized by how they manage the various components of the KKT conditions. In many of these papers, there is some common deep neural architecture at the start (e.g. FCNN or GNN); and then to attempt to recover a feasible solution, the final layers in the architecture correspond to some “repair” or “correction” layers that are informed by optimization theory.&lt;/p&gt; &lt;p&gt;&lt;b&gt;(KKT equalities + Complementarity)&lt;/b&gt;: Building on &lt;d-cite key=&quot;deka2019learning&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;pagnier2022machine&quot;&gt;&lt;/d-cite&gt;’s DNN predicts binary classifications for the active set of constraints, i.e. predicting which of $y_i$ or $s_i$ is 0. Then the remaining linear system can be solved efficiently. However, “false negatives” i.e. failure to detect a binding constraint can lead to infeasibilities (labeled as “misidentifications” in the paper), i.e. potentially violating inequality constraints.&lt;/p&gt; &lt;p&gt;&lt;b&gt;(Primal equality + Subset of primal inequalities)&lt;/b&gt;: &lt;b&gt;E2ELR&lt;/b&gt; &lt;d-cite key=&quot;chen2023end&quot;&gt;&lt;/d-cite&gt; uses &lt;i&gt;specialized repair layers&lt;/i&gt; for a single-period DC optimal power flow problem, where the differentiable repair layers guarantees power balance (an equality constraint) plus generator limits (a subset of primal inequalities); but can still violate line thermal limits (other primal inequalities). E2ELR leverages domain-specific cost structure, where electricity market penalties for line violations may be order(s) of magnitude smaller than energy balance violations; in this way this paper justifies the potential for remaining primal infeasibilities; but does not generally guarantee feasibility for all constraints.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;i&gt;Source: &lt;d-cite key=&quot;chen2023end&quot;&gt;&lt;/d-cite&gt;, showing the power balance repair layer, i.e. a linear combination of the predicted points with an inequality-feasible extreme point, in order to satisfy the equality constraint.&lt;/i&gt;&lt;/p&gt; &lt;p&gt;&lt;b&gt;(Primal equality + All primal inequalities)&lt;/b&gt;: Following a similar application in control/RL, &lt;d-cite key=&quot;li2023learning&quot;&gt;&lt;/d-cite&gt; uses a Minkowski functionals / &lt;b&gt;gauge map&lt;/b&gt; repair layer to guarantee feasibility in any general polyhedral feasible regions; however, a critical limitation of the gauge map approach is the need to calculate a strictly interior point (so as to tranform the feasible region to an &lt;i&gt;absorbing set&lt;/i&gt;) which generally may be computationally as hard as the optimization problem.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;i&gt;Source: &lt;d-cite key=&quot;li2023learning&quot;&gt;&lt;/d-cite&gt;, showing usage of the gauge mapping; note this requires identifying strictly interior points for every sample instance.&lt;/i&gt;&lt;/p&gt; &lt;p&gt;Alternatively, “Deep Constraint Completion and Correction” &lt;b&gt;DC3&lt;/b&gt; &lt;d-cite key=&quot;donti2021dc3&quot;&gt;&lt;/d-cite&gt; enforces primal equality constraints as a differentiable layer and then embedded gradient steps as neural net layers (“gradient unrolling”) to enforce inequality at the output layers; in addition, DC3 uses inequality penalty/Lagrangian term in the loss function to predict statistically near-feasible points, in some sense warm-starting the inequality correction steps. However, other papers observe that insufficient number of descent steps in DC3 could still lead to primal infeasibility (e.g. Table 2 of &lt;d-cite key=&quot;li2023learning&quot;&gt;&lt;/d-cite&gt;).&lt;/p&gt; &lt;p&gt;To truly guarantee polyhedral constraints, &lt;d-cite key=&quot;frerix2020homogeneous&quot;&gt;&lt;/d-cite&gt; proposes using the Minkowski-Weyl theorem / double description algorithm to convert the algebraic halfspace polyhedron representation to the convex combination of vertices and extreme rays. Then the neural net is in charge of predicting the linear weights, and so the resulting convex combination must be feasible; the paper’s setting is simplified by only considering homogenous constraints $Ay=0$ i.e. a cone so only the rays are needed. However, with nonzero RHS coefficients, naturally this method will face the curse of dimensionality at higher dimensions since the number of vertices can scale exponentially. The authors acknowledge this point: “Overall, one can expect the algorithm to be efficient only for problems with a reasonably small number $m$ of inequalities and dimension $d$.”&lt;/p&gt; &lt;p&gt;&lt;b&gt;(Primal + dual approaches)&lt;/b&gt;: Previous work &lt;d-cite key=&quot;liu2022topology&quot;&gt;&lt;/d-cite&gt; has used a GNN to predict electricity prices (i.e. dual solutions), and then recover the primal solution. More recently, &lt;d-cite key=&quot;park2023self&quot;&gt;&lt;/d-cite&gt; trains two separate neural networks: a primal and a dual network to emulate the optimization iterations of an augmented Lagrangian method; one drawback to this approach is the need to tune more hyperparameters related to the outer optimization loop. Instead, &lt;d-cite key=&quot;chen2020learning&quot;&gt;&lt;/d-cite&gt; predicts one scalar value of the optimal value, and leverages the backward automatic differentiation to extract dual solution estimates from the trained neural architecture; these dual values are then used to solve the remaining system of equations to recover the full solution; &lt;d-cite key=&quot;zhang2021convex&quot;&gt;&lt;/d-cite&gt; builds on this work and enforces an &lt;i&gt;input convex neural network&lt;/i&gt; architecture since the optimal value function is convex. Still, these above approaches do not necessarily guarantee primal feasibility.&lt;/p&gt; &lt;p&gt;In a similar vein of trying to incorporate the whole primal-dual problem structure, the GNN for LP paper &lt;d-cite key=&quot;chen2022representing&quot;&gt;&lt;/d-cite&gt; provides theoretical demonstration of a universal approximation property that GNN can express LP optimal solution mappings, and also demonstrates on a small $m=10,n=50$ problem of achieving 0% error in terms of feasibility detection, as well as prediction of the optimal solution; however, this paper &lt;i&gt;does not address generalization performance&lt;/i&gt;.&lt;/p&gt; &lt;h1 id=&quot;method&quot;&gt;Method&lt;/h1&gt; &lt;h3 id=&quot;data-generation&quot;&gt;Data generation&lt;/h3&gt; &lt;p&gt;Since the focus is on learning LP’s generally, the dataset is fully synthetic. For this project, focus on having matrix $A$ fixed (one was created with entries drawn from the standard normal distribution), and training over different data examples of $x=(b,c)$. As an application example, this can represent learning on a fixed electric grid network topology and technology set, but learning to predict over different RHS resource capacities / renewables availabilities, and different fuel costs.&lt;/p&gt; &lt;p&gt;To ensure feasibility (primal problem is feasible and bounded), the space of examples is generated by first creating primitive or latent variables, for each of the $N$ samples (this was implemented in PyTorch to be efficiently calculated in a vectorized way):&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Binary vector $\xi \in {0,1}^n$ representing the optimal LP basis, with $\sum_i \xi_i = m$; the value is drawn uniformly from the $(n \text{ C } m)$ possible combinations. Practically this was implemented as a batched permutation of an identity tensor with extra columns.&lt;/li&gt; &lt;li&gt;Nonnegative vector $d \in \mathbb{R}^n$, with each $d \sim U[0,1]$ uniformly drawn to be nonnegative.&lt;/li&gt; &lt;li&gt;Then for each element $i$, use $\xi_i$ to determine whether to assign the value of $d_i$ to either the primal variable $y_i$ or the dual slack variable $s_i$. This way complementary slackness is enforced. Namely,f \(\begin{aligned} y &amp;amp;:= d\odot\xi, \\ s &amp;amp;:= d\odot(1-\xi) \end{aligned}\)&lt;/li&gt; &lt;li&gt;Sample $\lambda \in \mathbb{R}^n, \lambda_i \sim U[0,1]$.&lt;/li&gt; &lt;li&gt;Finally construct $b=Ay, c= A^T\lambda + s&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;By constructing the dataset in this way, we also know the ground truth optimal solutions (which might not be unique if there are degenerate solutions, which is assumed here to have low impact due to the random coefficients), and importantly also the optimal LP basis.&lt;/p&gt; &lt;h3 id=&quot;model&quot;&gt;Model&lt;/h3&gt; &lt;p&gt;As a different approach, this project will try to predict the underlying latent target $\xi$, i.e. the optimal LP basis, as a classification problem. Since there may be non-local interactions between coefficients and variables, a fully-connected architecture is chosen, where every layer is followed by a ReLU nonlinearity. The neural net forms a mapping between inputs $x=(b,c) \in \mathbb{R}^{m+n}$ to outputs $\hat{\xi} = f(x) \in {0,1}^{m}$, i.e. binary classifications of whether each variable is chosen in the LP basis. Below is an illustration of all the LP bases vectors for the $n=10, m=5$ problem size; there are $10 \text{ C } 5 = 252$ bases.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;b&gt;Supervised vs. self-supervised learning&lt;/b&gt;: Many of the referenced papers devise self-supervised training methods, which is motivated by the expensive computational costs (time) to solve the dataset instances with traditional optimization solvers. However, this synthetic dataset is somewhat of an inverse-problem approach, i.e. by starting out with a sample of assumed optimal solutions, the optimal solutions are very efficiently identified during dataset generation. This synthetic generation can also be thought of as a &lt;b&gt;data augmentation&lt;/b&gt; method.&lt;/p&gt; &lt;p&gt;Since this is binary classification, the training loss used will be binary cross entropy, which is defined in PyTorch for each sample as: \(l(\hat{\xi},\xi) = [l_1, ..., l_i, ..., l_n],\ \ l_i = \xi_i \log \hat{\xi}_i + (1-\xi_i) \log (1-\hat{\xi}_i)\)&lt;/p&gt; &lt;p&gt;A softmax layer multiplied by $m$ is optionally added at the output of the NN, to enforce the requirement that there should be $m$ basic variables (in a continuously-relaxed way).&lt;/p&gt; &lt;p&gt;&lt;b&gt;Equality completion&lt;/b&gt;: Once this is done, the LP basis uniquely determines a basic solution (but not necessarily feasible) according to \(\hat{y}^* = (A^\xi)^{-1}b,\) where $A^\xi$ is the $m\times m$ submatrix corresponding to the chosen columns. Rather than matrix inversion, this can be solved in a batched way with PyTorch (torch.linalg.solve) to obtain all samples’ solutions. The entire flow, from supervised dataset generation to neural net prediction and then $y$ solution recovery, is illustrated in the flowchart below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;As baselines, also consider the DC3 model, where novelty versus the original paper is that here both $b$ and $c$ are varied across samples (as opposed to only the RHS $b$ vectors). Also benchmark against a modern first-order based optimization solver OSQP &lt;d-cite key=&quot;stellato2020osqp&quot;&gt;. For OSQP, the LP can be easily formulated in the necessary format: $$\begin{bmatrix}b\\0\end{bmatrix}\leq \begin{bmatrix} A \\ I_{n\times n} \end{bmatrix} y \leq \begin{bmatrix}b\\ \infty_n\end{bmatrix} $$&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;All experiments are implemented on Google Colab T4 GPU instances (except OSQP which can use CPU). Neural network training is optimized with Adam.&lt;/p&gt; &lt;h1 id=&quot;results&quot;&gt;Results&lt;/h1&gt; &lt;h3 id=&quot;approximation-and-generalization&quot;&gt;Approximation and generalization&lt;/h3&gt; &lt;h4 id=&quot;small-scale-n4m2&quot;&gt;Small scale ($n=4,m=2$)&lt;/h4&gt; &lt;p&gt;On a small $n=4,m=2$ problem, the proposed method (using a 3-layer FCNN with width-100 hidden layers; and trained for $&amp;lt;$100 epochs) can achieve near-perfect accuracy ($&amp;gt;$0.997) in both training and testing. The training set has 10,000 samples, and the test set has 1,000 samples, both generated according to the method above. The learning rate used was $10^{-3}$.&lt;/p&gt; &lt;p&gt;The accuracies when including and excluding the softmax layer (sum to $m$) are reported in the plot below, where this layer does have some (very) small positive effect on training and testing accuracies. More importantly, the $\hat{\xi}$ predictions after the solution recovery step are all feasible, i.e. with no negative elements, and the predicted optimal solutions can be seen in the right plot to match extremely closely with the ground truth $y^*$. This latter property is a desirable feature of the proposed method, that is, once the correct basic feasible solution is predicted, then the linear equation solver will precisely recover the optimal solution.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h4 id=&quot;scaling-up-n10m5&quot;&gt;Scaling up ($n=10,m=5$)&lt;/h4&gt; &lt;p&gt;Scaling up to a still quite small problem size of $n=10,m=5$ (i.e. 6.25 times larger in terms of $A$ matrix entries), now encounters generalization issues. The same network parameter sizing and training scheme was used here. The left plot shows training accuracy reaches about 0.97 after 300 epochs (and should continue rising if allowed to continue). However, the testing accuracy plateaus at around 0.93 with no further improvement.&lt;/p&gt; &lt;p&gt;More importantly, while a $&amp;gt;$0.9 accuracy in deep learning tasks is often sufficient, in this particular context the inaccuracies can lead to optimization problem infeasibilities. This is seen in the right plot, where mis-classified $\hat{\xi}$ result in &lt;i&gt;catastrophically&lt;/i&gt; wrong $\hat{y}$ primal solution predictions (the severe orange prediction errors in both negative and positive extremes); even when the remaining correctly-predicted $\hat{\xi}$ samples receive precisely correct solutions.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Furthermore, even though there are about $1-0.93 = 7%$ of individual $\xi_i$ entries that are mis-classified, these errors are fairly spread across various samples. This results in a &lt;u&gt;$19%$ infeasibility rate&lt;/u&gt; in the test set, i.e. $19%$ of the predicted $\hat{y}$ vectors violate the nonnegative constraint. In other words, since this particular approach is predicting every individual entry of the basis vector, even small errors for each sample can lead to the overall prediction being wrong. This disproportionate impact is intuitively explained by examining the distribution of bit-wise errors plotted below. Most samples result in 0 bits of error, and then the remaining samples mostly get 1 or 2 bits of error. This means that errors are spread out among many samples, leading to a high rate of infeasible prediction vectors.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h4 id=&quot;attempts-to-improve-accuracy&quot;&gt;Attempts to improve accuracy&lt;/h4&gt; &lt;p&gt;The previous training error plot appears to show an generalization or overfitting problem. Based on this, various data augmentation techniques were attempted, such as perturbing $b$, $c$, or both vectors (both based on random noise vectors and simple scaling invariance of $\alpha b, \beta c$ while keeping the latent $\xi$ targets; as well as generating new $\xi$ vectors after regular numbers of epochs; different schedules of the aforementioned were also tried. However, none of these attempted approaches were able to produce validation accuracy rates significantly above the original $\sim 0.93$.&lt;/p&gt; &lt;p&gt;Notably, an alternative architecture was tried: instead of outputting size-$n$ binary vectors, now try to predict multi-class classification out of the 252 basis vector classes. This actually resulted in worse testing set performance. Intuitively, treating all bases as discrete classes does not leverage the geometric proximity of 2 adjacent bases (e.g. which are off by 1 in Hamming distance).&lt;/p&gt; &lt;h3 id=&quot;benchmarking&quot;&gt;Benchmarking&lt;/h3&gt; &lt;h4 id=&quot;vs-dc3-an-interior-learning-approach&quot;&gt;vs. DC3 (an “interior” learning approach)&lt;/h4&gt; &lt;p&gt;As a comparison for the $n=4,m=2$ case, the DC3 methodology was implemented using a 3-layer neural net and the self-supervised training loss of the primal objective plus infeasibility penalty, with a chosen penalty rate of 10: \(\mathcal{L} = c^T \hat{y} + 10 ||\max\{0, -\hat{y}\}||^2_2\)&lt;/p&gt; &lt;p&gt;The number of inequality correction steps during training was chosen to be $t_{train} = 10$, and to maximize the chance of feasibility a very large $t_{test} = 10,000$ was used (i.e. allow many inequality-correction gradient steps during testing inference).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;With a learning rate of $10^{-5}$, the training stabilizes after about 30 epochs. Overall, the predictions are fairly accurate in terms of the out-of-sample average objective: $-0.247$ (a 2% optimality gap versus the ground truth), and an $R^2$ of predicted objective values of 0.9992 (see middle plot). (The qualitative results were robust to faster learning rates too: A previous higher lr=$10^{-3}$ produced a tighter average objective gap, but the optimal solution deviation versus the ground truth was larger.)&lt;/p&gt; &lt;p&gt;However, despite being designed to enforce all hard constraints, the predictions still resulted in infeasible negative values (see the negative dip in the right plot). A similar disproportionate classification error to infeasibility impact is seen here (albeit to a lesser extent): $2.6%$ of all output entries are negative, while $7%$ of test samples lead to an infeasible prediction.&lt;/p&gt; &lt;p&gt;Similarly to before, inequality violations are spread out among different samples, rather than all concentrated within a few samples; this is seen in the plot below. This provides an explanatory mechanism for the relatively large infeasible rate.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h4 id=&quot;vs-optimization-solver&quot;&gt;vs. Optimization solver&lt;/h4&gt; &lt;p&gt;Thus far, the DNN is able to scale quite well along the number of samples dimension, but not the actual problem dimension (number of variables and constraints).&lt;/p&gt; &lt;p&gt;Return for now to the small $n=4,m=2$ case for which the DNN method achieves perfect out-of-sample testing accuracy. A next practical question is how does this method compare with “classical” optimization methods, or in what contexts would we prefer one over the other?&lt;/p&gt; &lt;p&gt;Note that there are only $4 \text{ C } 2 = 6$ bases. So once the NN produces a $\hat{\xi}$ estimate, these can be mapped to an index in ${1,2,…,6}$. All possible non-basic submatrix inverses can be pre-calculated. In total, to evaluate 1 million testing samples, the DNN predict-basis approach takes:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;46 sec for training&lt;/li&gt; &lt;li&gt;0.002 sec for prediction of $10^6$ samples&lt;/li&gt; &lt;li&gt;10 sec to map $\xi$ to bases indices (&lt;i&gt;note this is not done in a fully vectorized way and potentially could be sped up&lt;/i&gt;).&lt;/li&gt; &lt;li&gt;$&amp;lt;0.001$ sec to batch matrix multiply every sample $j$’s: $(A^{\xi^j})^{-1}b^j$. Note this is done using einsum which is very efficient on CUDA.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;In comparison, even when running all the 1 million problem instances fully sequentially, the OSQP solver took a total of &lt;b&gt;67 sec&lt;/b&gt;, i.e. solving about 15,000 problem instances per second.&lt;/p&gt; &lt;p&gt;This means that this DNN model here only achieved a speedup factor of about 1.2x, when &lt;i&gt;including the DNN training time&lt;/i&gt;. Furthermore, the above “mapping” step is a remaining coding bottleneck at DNN inference time, and this will scale linearly as the test sample size increases; i.e. this speedup ratio is unlikely to increase much beyond this at higher sample sizes.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The timing tradeoff can be understood in terms of fixed vs. variable costs, as plotted here. Note the orange and red lines, representing this project’s DNN approach, is using the batched matrix solve instead of the pre-computing 6 matrix inverses (thus taking longer in the solving stage). Despite its very large speedup when only considering the prediction step, holistically the DNN approach here did not pose very significant timing advantages over the optimization solver.&lt;/p&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;p&gt;This project broadly compared 3 very different approaches to LP optimization: 1) a DNN to predict the optimal LP basis, 2) the DC3 method, and 3) optimization solver. Among the 2 deep learning methods, on the small $n=4,m=2$ problem, the LP basis method produced more robust and accurate results (i.e. it was able to perfeclty learn the input to optimal solution mapping, for the chosen data domain) compared to DC3 which already faces inequality violation issues. However, neither deep learning methods were able to easily scale to the slightly larger problem.&lt;/p&gt; &lt;p&gt;Qualitatively, the predict-LP-basis approach can result in “all-or-nothing” accuracy, i.e. predicting the correct basis vector results in the globally optimal solution, whereas even a nearby classification error can lead to catastrophic primal infeasibilities (due to enforcing the equality constraint). Moreover, in both predict-basis and DC3, inequality violations tend to be spread out among different samples, leading to disproportionate impact on the percentage of infeasible solution vector predictions.&lt;/p&gt; &lt;p&gt;Domain-specific knowledge and leveraging problem structure may be needed for tractable DNN solutions for LP optimization. This includes real-life choices of how much accuracy we need exactly in different aspects of the problem (e.g. different components of the KKT conditions).&lt;/p&gt; </content> </entry> <entry> <title>Activation Patching in Vision Transformers</title> <link href="https://deep-learning-mit.github.io/blog/2023/CNN-activation-patching/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/CNN-activation-patching</id> <content type="html">&lt;h1 id=&quot;motivation&quot;&gt;Motivation&lt;/h1&gt; &lt;p&gt;Neural networks contain large amounts of parameters and connections that they use to model a given phenomenon. Often, the breadth and complexity of these systems make it difficult for humans to understand the mechanisms that the model uses to perform its tasks. The model is treated like a black-box. When attempting to alter the behavior of the model when it does not behave in the desired way, engineers often rely on trial-and-error tuning of hyperparameters or providing larger, more diverse datasets for training. However, it is often difficult to get representative training data. In addtion, hyperparameters can improve training but are limited in their ability to alter the innate limitations of a model.&lt;/p&gt; &lt;p&gt;Mechanistic interpretability aims to unpack the underlying logic and behaviors of neural networks. &lt;d-cite key=&quot;zhang2023best&quot;&gt;&lt;/d-cite&gt; Activation patching is an interpretability technique that replaces activations in a corrupted model with that of an uncorrupted model in order to analyze their influence on model output. When a patched activation improves model performance, it indicates that the patched activation playes a role relevant to the corrupted information. &lt;d-cite key=&quot;Vig2020InvestigatingGB&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;A better understanding of the logic within neural networks will allow for more strategic improvements to these models inspired by this newfound understanding. In additon, interpretability is the first step toward changing and correcting models. With an understanding of the underlying mechanisms comes more control of these mechanisms, which can be used to apply necessary changes for goal alignment and mitigating issues such as bias. Mechanistic interpretability plays a key role in ensuring the reliability and safety of AI systems.&lt;/p&gt; &lt;h1 id=&quot;related-work&quot;&gt;Related Work&lt;/h1&gt; &lt;p&gt;Pearl et al. &lt;d-cite key=&quot;10.5555/2074022.2074073&quot;&gt;&lt;/d-cite&gt; defines “causal mediation analysis” in order to analyze the effect of intermediate entities on a desired result. An application of the “indirect effect” introduced by this research is activation patching, also known as causal tracing. The indirect effect is the effect a given activation has on the output of the model. Since the activation is encompassed within the layers of a neural network, it has an indirect effect on the output. This analysis has been used in language models.&lt;d-cite key=&quot;meng2023locating&quot;&gt;&lt;/d-cite&gt; Here, the indirect effect is defined as the role of an MLP or attention layer on the output. This role is analyzed by first corrupting the outputs of the network. Then, activations from an uncorrupted run of the model can be iteratively patched into the corrupted run in order to determine which activations can best restore the uncorrupted outputs. The activations with the most significant restorative impact have the highest indirect effect.&lt;/p&gt; &lt;p&gt;For example, if the hidden state for a given attention head in a language model with prompt “The Eiffel Tower is in” is patched into that of a prompt “The Colosseum is in” and successfully changes the output from “Rome” to “Paris”, this indicates that the patched head contains knowledge about the Eiffel Tower. &lt;d-cite key=&quot;meng2023locating&quot;&gt;&lt;/d-cite&gt; The figure below depicts this process of patching from a clean to corrupt run.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/patch-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/patch-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/patch-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/patch.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;d-cite key=&quot;meng2023locating&quot;&gt;&lt;/d-cite&gt; &lt;p&gt;Meng et al. also provides an example of how interpretability can open opportunities for model editing. &lt;d-cite key=&quot;meng2023locating&quot;&gt;&lt;/d-cite&gt; Their research revealed that MLP layers in the middle of a GPT network had a large influence on the final predicted token from the model. This indicated that the model stored knowledge about the subject of the prompt within these layers. With the understanding of where knowledge of facts is stored within the model MLPs, these layers were then used to edit the knowledge of the language model in a way that is generalizable to other phrases that applied this knowledge. The study revealed the utility of interpretability projects in improving model understanding of the problem at hand.&lt;/p&gt; &lt;p&gt;Activation patching has been used for language models, which rely on a transformer architecture. Vision transformers &lt;d-cite key=&quot;dosovitskiy2021image&quot;&gt;&lt;/d-cite&gt; take advantage of the transformer architecture to perform common computer vision tasks such as image classification. These transformers use attention to glean valuable context about a given patch in an image, a task that a convolutional neural network has difficulty with due to the independent nature of its receptive fields. &lt;d-cite key=&quot;Torralba_Isola_Freeman_2023&quot;&gt;&lt;/d-cite&gt; Through the use of multi-headed attention, vision transformers can focus on just the parts of the image that are relevant to the task at hand, and they do so with a global understanding of relevance across the entire image. The attention heads learn how to find relevant patches, or tokens, in image for a given query. However, research regarding what exactly these heads “pay attention” to is still ongoing. &lt;d-cite key=&quot;Gandelsman2023interpreting&quot;&gt;&lt;/d-cite&gt; The strategies of activation patching used for language transformers can therefore apply in the context of vision transfomers due to the similarity in architecture and need to address questions of interpretability. Palit et al. performed a similar causal tracing analysis to that of the language model study except with a focus on BLIP, a multi-modal model that can answer questions about a given image. This investigation showed how activation patching can be performed on images along with language rather than language alone.&lt;d-cite key=&quot;palit2023visionlanguage&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;h1 id=&quot;methods&quot;&gt;Methods&lt;/h1&gt; &lt;p&gt;The model that was used for this investigation was a vision transformer that was fine-tuned for the CIFAR10 dataset, a dataset that is often used to train image classification models. The pretrained model that was used, which can be found &lt;a href=&quot;https://huggingface.co/aaraki/vit-base-patch16-224-in21k-finetuned-cifar10&quot;&gt;here&lt;/a&gt;, often fails to classify images in the dataset if they are converted to grayscale. For example, the model classifies the image of a deer below as a cat.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/image-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/image-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/image-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/image.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/gray-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/gray-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/gray-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/gray.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;!-- &lt;img src=&quot;assets/img/2023-11-10-CNN-activation-patching/gray.jpg&quot; alt=&quot;drawing&quot; style=&quot;width:10px;&quot;/&gt; --&gt; &lt;p&gt;In order to trace which attention heads focus on color information, a clean, corrupted, and restored run was performed with the model. A batch was created was a given image along with a grayscale version of that image. The colored image played the role of the clean run. The grayscale image is a corrupted input that hinders the model’s ability to classify the object in the image. This is reflected in the lower logits when the classifier attempts to classify the grayscale image. Even in the off chance the model is still able to classify the image correctly in the corrupted run, the logits will reflect the confidence, or lack thereof, of the model in its classification.&lt;/p&gt; &lt;p&gt;This corrupted grayscale run was the baseline in the investigation. Once this baseline was established, the restored run demonstrated the influence of a given attention head. In this run, the hidden state in a given corrupted layer was replaced with the hidden state at that layer from the clean run. A hidden state was defined as the values of the embedded tokens after passing through a given layer in the neural network. One set of restored runs only restored states for individual layers. However, as demonstrated in previous research &lt;d-cite key=&quot;meng2023locating&quot;&gt;&lt;/d-cite&gt;, a window of layers is necessary to be restored in order to have a noticeable effect on the output, as opposed to just a single layer. In this experiment, the window was 3, so the given layer as well as its adjacent layers were restored. While activation in the language setting often performs activation patching at the granularity of individual tokens, it was assumed that the relationship between token and layer in the image setting would not be as informative across a wide array of images. Language tokens have a shared meaning across different prompts, but image tokens play a different role depending on the object at hand. This information would not help in understanding the role of activations after averaging across all of the classifications of all images. So, this study was performed by corrupting all tokens in the image and restoring all of those tokens during the patching of a given hidden state.&lt;/p&gt; &lt;p&gt;This analysis was performed for 1000 images from the CIFAR10 dataset. For each image, the output of the restored run was collected and compared to that of the corrupted run. The indirect effect of a given layer was calculated by the difference in the softmax probability of the class of the image between the corrupted and patched run.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/eqn-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/eqn-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/eqn-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/eqn.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;d-cite key=&quot;meng2023locating&quot;&gt;&lt;/d-cite&gt; &lt;p&gt;For each image, this patching process was repeated for every attention layer in the neural network. Finally, the results of activation patching were averaged together for each layer across all of the images in order to get a general sense of which layers are most pertinent for processing image color information.&lt;/p&gt; &lt;h1 id=&quot;results&quot;&gt;Results&lt;/h1&gt; &lt;p&gt;When single layers were patched rather than a window of layers, results matched that of Meng et al. The patching of a single activation did not have a unique effect on the output.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/single-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/single-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/single-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/single.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;From averaging the change in outputs from activation patching 1000 CIFAR10 images, results show that attention heads of most relevance to color tended to be in the middle or last layers.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/attn-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/attn-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/attn-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/attn.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Here are some examples of activation patching for individual images from the dataset. The graphs display the probability in the output for the correct class of the given image.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/deer-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/deer-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/deer-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/deer.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/car-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/car-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/car-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/car.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/plane-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/plane-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/plane-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-CNN-activation-patching/plane.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;This experiment found that in a 12-layer network with a window size of 3, attention in the fourth layer and final layers of the network had the biggest impact on predictions made by the model. In these layers, the probability of the correct class of the image had the largest change when clean hidden states were patched from these layers into the grayscale run of the vision transformer.&lt;/p&gt; &lt;p&gt;As portrayed by the tracing of individual images displayed above, not all images followed this trend exactly. The deer image, for example, had more emphasis on earlier layers and less emphasis on later layers. The automobile had a stronger influence from the attention layer 6 than that of 4. However, it was generally common for layers in the middle and end of the network to play a large role in this classification problem.&lt;/p&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;p&gt;The influence of attention heads close to the output align with the conclusions found by Palit et al. This is likely due to direct connection of final layers to the output. There is also a significant influence of middle attention heads on the output, which is some indication of the key information that is stored in these layers relevant to color. A possible explanation is that these layers are close to the input layer, which directly stores color information, while maintaining enough distance from the input to have narrowed down (attended to) which tokens are relevant to the class the image belongs to. This study provided an initial insight into how vision transformers store information about colors of an image.&lt;/p&gt; &lt;p&gt;Future investigations could include other forms of corruption to provide more information about the roles of the different attention layers in a trasformer. For example, adding noise to the image embeddings would give insight to the general importance of different layers rather than just focusing on color information. By varying the amount of noise, this corruption would allow more control on how much the output would change and possibly allow room for more significant restorative effects from patching and therefore more definitive results as to where the most influential attention heads live in vision transformers. Other methods of corruption could also explore other tasks ingrained in image classification, such as blurring for edge detection or using silhouettes and image segmentation for texture or pattern identification. In addition, performing activation patching with window sizes other than 3 could provide more context as to how important is an individual attention layer. A similar experiment should be performed on other models and datasets. A focus on different objects, larger datasets, and larger networks would help verify the role of middle and final layer attention heads indicated by this study.&lt;/p&gt; </content> </entry> <entry> <title>Learning Generals.io</title> <link href="https://deep-learning-mit.github.io/blog/2023/transformers-as-gamers/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/transformers-as-gamers</id> <content type="html">&lt;!-- ### TLDR: In this post, we explore . &lt;ol&gt; &lt;li&gt;f &lt;/li&gt; &lt;/ol&gt; --&gt; &lt;!-- This project took a lot more time to get off the ground than expected. My initial code was riddled with bugs largely due to the implementation of parsing out features from the game data. It was a very good lesson for applying the &quot;Hacker&apos;s Guide to Deep Learning,&quot; so I also write some of what I learned in applying deep learning to a problem. Here are my main takeaways, read more [here](#practical-learnings): &lt;ol&gt; &lt;li&gt;Take the time to &lt;/li&gt; &lt;li&gt;Take the time to &lt;/li&gt; &lt;/ol&gt; --&gt; &lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;&lt;a href=&quot;https://generals.io&quot;&gt;Generals.io&lt;/a&gt; is a real-time turn-based strategy game. In generals.io, two players with a “general”, denoted with a crown, spawn on a board with mountains and cities scattered. Initially, players have no knowledge of other parts of the board besides the tiles immediately surrounding their general. Armies are the main resource of the game, which generate slowly from ordinary tiles, but quickly from the general and cities. Using armies, players compete to capture terrain and cities, which also grants further vision of the board. On each turn, a player is able to click on a cell with their army and use the keyboard to move it in the four cardinal directions. The goal of the game is for the player to use their army to capture the tile of their opponent’s general.&lt;/p&gt; &lt;p&gt;A typical game state will look like the following:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/generals-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/generals-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/generals-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/generals.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The left image shows what the game play screen while playing from the perspective of red. Red only is able to see tiles adjacent to it, and every other tile is covered in a fog of war. The right image lifts the fog of war and shows the perspective of the spectator, and we can see where blue&apos;s general is located, as well as unclaimed cities around the edge of the map. &lt;/div&gt; &lt;p&gt;Generals.io has a modest daily player base and has had attempts to implement bots to play against humans. Currently, no bots have been able to defeat top humans consistently. The top bots, such as &lt;a href=&quot;https://github.com/EklipZgit/generals-bot&quot;&gt;this one&lt;/a&gt;, are implemented using rule-based logic. They achieve human-level performance and are able to win some games against the top 10 ranked players. Previous machine-learning based bots have attempted to use a CNN LSTM in the model architecture, such as &lt;a href=&quot;https://yilundu.github.io/2017/09/05/A3C-and-Policy-Bot-on-Generals.io.html&quot;&gt;this post by Yilun Du&lt;/a&gt;. He separately evaluates a supervised learning approach and a reinforcement learning approach. His supervised learning approach reaches a competent level of play and is able to expand while having an awareness of needing to defend. However, it is very inefficient and makes basic strategic mistakes, such as running army into cities without fully taking them. The reinforcement learning approach was trained using A3C from scratch, but it was not able to learn beyond random movements.&lt;/p&gt; &lt;p&gt;I set out to build on Yilun’s work and improve the bot’s performance, as well as explore and document what details are actually important for improvement.&lt;/p&gt; &lt;h1 id=&quot;related-work-and-why-generals&quot;&gt;Related Work and Why Generals&lt;/h1&gt; &lt;p&gt;Deep learning has already been used to conquer many games, achieving either human-level or superhuman-level performance. The pattern for most games has been to use deep reinforcement learning at enormous scale through self-play. There has been success in chess, Go&lt;d-cite key=&quot;alphazero&quot;&gt;&lt;/d-cite&gt;, Dota 2&lt;d-cite key=&quot;dota2&quot;&gt;&lt;/d-cite&gt;, and Starcraft&lt;d-cite key=&quot;star&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;!-- , Games require strong sequential decision making in order to succeed. Previous methods to play games such as Dota 2 have used LSTMs and reinforcement learning &lt;d-cite key=&quot;dota2&quot;&gt;&lt;/d-cite&gt;. Transformers have also seen success on RL baselines such as Atari&lt;d-cite key=&quot;chen2021decision&quot;&gt;&lt;/d-cite&gt;. --&gt; &lt;p&gt;While games in higher complexity have already been defeated by deep learning, the experimentation is often quite opaque, as there are too many decisions that are made to be worthy of reporting on. Furthermore, the games and methods are often way too large for a single researcher to reproduce. For example, OpenAI Five was only able to beat Dota 2 pros after training for ten months, using 770 PFlops/s-days. Generals.io allows for more accessible experimentation through its smaller size and open data pipeline for replays.&lt;/p&gt; &lt;p&gt;I think there are still insights to be gained in defeating generals.io. In particular, the game comes with a combination of challenges that aren’t clearly addressed by previous approaches:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;The game is requires a high degree of calculation and precision, as well as strong intuition. Similar to chess, certain parts of the game are more intuitive and positional, and certain parts require searching through possibilities to calculate precisely. In generals.io, the precision mostly comes from being maximally efficient in the opening, as well as calculating distances relative to opponents army. This would suggest that some kind of model needs to search in order to achieve superhuman performance.&lt;/li&gt; &lt;li&gt;The game is partially observable. This prevents approaches used in perfect information games such as Monte Carlo Tree Search, as we need to form belief states over the opponents state.&lt;/li&gt; &lt;li&gt;The state and action space is enormous, and it requires planning on long time horizons. Games such as poker satisfy both of the above two bullet points, but it was able to be tackled with approaches such as counterfactual regret minimization after bucketing the state and action space&lt;d-cite key=&quot;dosovitskiy2021image&quot;&gt;&lt;/d-cite&gt;. Bucketing the state and action space likely won&apos;t work for generals.io, nor will an approach like CFR work.&lt;/li&gt; &lt;/ol&gt; &lt;h1 id=&quot;methods&quot;&gt;Methods&lt;/h1&gt; &lt;p&gt;Formally, generals.io can be represented as a POMDP. The underlying state, which is the state of the whole board, can only be observed at tiles that are adjacent to tiles claimed by the player. &lt;!-- In addition, both the board state and action space are completely discrete. While the space of all possible actions throughout the game is large, only a small portion of actions is usually valid at a time: valid actions move army from a tile that is owned by the player. --&gt;&lt;/p&gt; &lt;p&gt;A wealth of data (over 500,000 games, each containing hundreds of state-action pairs) are available via human replays. We use imitation learning to try to learn from the replays. Concretely, the problem can be modeled as selecting parameters \(\theta\) of a policy \(\pi\) (a neural network) to maximize the log likelihood of the dataset \(D\):&lt;/p&gt; \[\max_\theta \sum_{(s,a)\sim D} \log \pi_\theta(a | s)\] &lt;p&gt;I used &lt;a href=&quot;https://github.com/vzhou842/generals.io-Replay-Utils&quot;&gt;existing tools&lt;/a&gt; in order to convert the replays into a json format that could then be parsed. I then adapted Yilun’s code, which no longer directly works, in order to simulate the replays to construct the dataset. To start, I only used 1000 replays of highly ranked players to construct my dataset.&lt;/p&gt; &lt;p&gt;I started mostly with Yilun’s features, with small modifications:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Channel&lt;/th&gt; &lt;th&gt;Description&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;td&gt;friendly army values&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;1&lt;/td&gt; &lt;td&gt;enemy army values&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;2&lt;/td&gt; &lt;td&gt;boolean indicators for mountains and cities&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;3&lt;/td&gt; &lt;td&gt;unclaimed city army values&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;4&lt;/td&gt; &lt;td&gt;friendly city army values&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;5&lt;/td&gt; &lt;td&gt;enemy city army values&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;6&lt;/td&gt; &lt;td&gt;boolean indicator for mountains&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;7&lt;/td&gt; &lt;td&gt;boolean indicator for friendly and enemy general (if found)&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;8&lt;/td&gt; &lt;td&gt;boolean indicator for fog of war&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;9&lt;/td&gt; &lt;td&gt;(turn number % 50)/50&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;The features made a lot of sense to me as a generals player - it’s all the information I use to play. I removed Yilun’s last feature since a new replay standard made it impossible to compute.&lt;/p&gt; &lt;p&gt;Yilun used a CNN LSTM as his architecture. In order to keep it simple and evaluate the basic components that improve performance, I removed the memory and only used a simple fully convolutional net with 5 stacked 5x5 filters.&lt;/p&gt; &lt;p&gt;Policies were evaluated by coding a small bot in the recently released &lt;a href=&quot;https://corsaircoalition.github.io/&quot;&gt;botting framework&lt;/a&gt; for generals. The bot sampled from the policy’s distribution over legal moves. Two policies were able to go head to head through this framework, and I could queue 10 games in order to get good estimates for the relative strength between the bots.&lt;/p&gt; &lt;p&gt;I’ll now describe some of the changes I tried and give an analysis of the results of each change.&lt;/p&gt; &lt;h2 id=&quot;effects-of-more-data&quot;&gt;Effects of more data&lt;/h2&gt; &lt;p&gt;The baseline policy, trained with 1000 games, was not very successful. The bot would often move back and forth, without trying to expand or take land.&lt;/p&gt; &lt;p&gt;I wanted to first explore how the amount of data affected the policy. I took 2000 more games of high ranking players and trained the same policy on a dataset with 3000 games. I expected an improvement in the similarity of the validation and train loss. This was confirmed by the results, shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/combined-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/combined-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/combined-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/combined.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Train and validation loss curves of training with less and more data. &lt;/div&gt; &lt;p&gt;This makes sense, as adding more data is essentially a regularizer. It prevents the model from overfitting, as it needs to do well on the added data too. Furthermore, it looks like it converges faster in epoch space, but in reality it’s also going through more examples, so it trained at roughly the same speed if one were to scale the epochs by a factor of 3. The policy was also much more effective, and it did not run back and forth as much. I think this was likely due to reduced overfitting.&lt;/p&gt; &lt;p&gt;I suspect that more data would have improved the policy even more, but I didn’t go larger, as it would have broken past the limits of the infrastructure I built. In particular, the dataset consisting of 3000 games took over 4 GB of disk space. A smarter job of batching the data would have allowed me to train with more.&lt;/p&gt; &lt;h2 id=&quot;squishing-army-features&quot;&gt;Squishing army features&lt;/h2&gt; &lt;p&gt;Working with the 3000 games, I turned my attention towards improving the features. They were already pretty comprehensive, but I was skeptical of the many army features we had. In particular, all of the other features were binary. Army values ranged from 0 to hundreds. I hypothesized that the features encoding armies could lead to unstable training. Using some knowledge about the game, I thought it would make sense to use a function like a sigmoid, in order to squish large values down.&lt;/p&gt; &lt;p&gt;As a generals.io player, this made sense to me, as the difference between 1 army on a tile and 2 army on a tile is very large, but the difference between 14 and 15 army is not so large. I expected better performance due to the inductive bias I was adding to the model. However, the loss curve showed similar, slightly slower convergence to the previous experiment. The policies were about the same too.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/squish-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/squish-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/squish-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/squish.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Train and validation loss curves of training after squishing the army features. &lt;/div&gt; &lt;h2 id=&quot;deeper-network&quot;&gt;Deeper Network&lt;/h2&gt; &lt;p&gt;Motivated by the success of ResNets &lt;d-cite key=&quot;he2015deep&quot;&gt;&lt;/d-cite&gt; when CNNs were leading computer vision, I wanted to try using a deeper network with residual connections. I replaced the network with a stack of one 5x5 filter, followed by nine 3x3 filters. I added skip connections between every two layers. The performance was again about the same. I suspect that more data and/or an even deeper network is needed in order to see benefits from a deeper network.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/deep-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/deep-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/deep-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/deep.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Train and validation loss curves of training after using a deeper network with residual connections. &lt;/div&gt; &lt;h1 id=&quot;discussion-and-conclusion&quot;&gt;Discussion and Conclusion&lt;/h1&gt; &lt;p&gt;Combining all of the above leads to a decent policy with coherent strategy, shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/game.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/game.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/game.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformers-as-gamers/game.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The best performing policy I trained, defeating an unsuspecting opponent (me). Replayed at 5x speed. &lt;/div&gt; &lt;p&gt;Qualitatively, this policy is much better than Yilun’s policy. While I don’t have his to evaluate, he shows a &lt;a href=&quot;https://bot.generals.io/replays/Be0wkw2t-&quot;&gt;replay&lt;/a&gt; of its early game performance. My bot does a much better job in the early game of efficiently expanding in order to maximize growth rate. Yilun’s bot has a handle on using a large army to explore, but mine is able to collect army efficiently on turns 25-50 in order to take the opponent’s land.&lt;/p&gt; &lt;p&gt;This is interesting because my policy is actually still much simpler than Yilun’s, given he uses a LSTM. It’s possible that the training was not very stable, or it may have overfit, or he just chose a bad replay of his bot.&lt;/p&gt; &lt;h2 id=&quot;limitations-and-future-work&quot;&gt;Limitations and Future Work&lt;/h2&gt; &lt;p&gt;The bot is not competitive with any human that has played a decent amount of games. It is still pretty inefficient and makes many nonsensical moves (it moves back and forth a few times in the replay).&lt;/p&gt; &lt;p&gt;There is still a lot to try, and I’ll actually continue working on some of these ideas after the class, as it was a lot of fun. There’s a decent amount of low hanging fruit:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;I noticed the bots often like to expand toward the wall. I&apos;m guessing this is because there is no information encoding the boundaries of the wall, and I just let the padding in the convolutions take care of it. Adding a special indicator would likely be helpful.&lt;/li&gt; &lt;li&gt;Use reinforcement learning for improving the policy beyond the demonstrations.&lt;/li&gt; &lt;li&gt;Train on a dataset consisting of only one or only a few players in order to reduce multimodality problems (similar style of play).&lt;/li&gt; &lt;li&gt;Adding memory to the network.&lt;/li&gt; &lt;li&gt;Trying a vision transformer&lt;d-cite key=&quot;dosovitskiy2021image&quot;&gt;&lt;/d-cite&gt;, and trying to have it attend to previous states for recurrence too.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;I think achieving even higher levels of performance would require doing some form of search. From my understanding, the most similar approach would be something like MuZero&lt;d-cite key=&quot;muzero&quot;&gt;&lt;/d-cite&gt;. This would allow us to properly learn a world model despite the incomplete information, which we can then do search on.&lt;/p&gt; &lt;p&gt;Overall, I learned a ton in this project about how to apply deep learning to a new problem. I encountered many of the issues described in “Hacker’s Guide to DL” and the related readings. My biggest takeaway is to spend the time setting up the proper infrastructure. Poor infrastructure causes bugs and makes it really hard to iterate.&lt;/p&gt; &lt;!-- # Conclusion We saw some simple experiments for how . A lot of future work is needed to make a learning-based bot competitive. --&gt; &lt;!-- ## Pratical Learnings I learned a lot of tools and ways to be more productive in using deep learning for a new problem. Here are some of the ones I learned: &lt;ol&gt; &lt;li&gt; After &lt;li&gt; f &lt;/ol&gt; One issue is 1. How does the performance of CNN LSTM compare to using a transformer? 2. What properties do transformers learn when applied to sequential decision making in a game? 3. Can we learn good representations for quantities such as army counts on each tile? --&gt; </content> </entry> <entry> <title>A Comparative Study of transformer on long sequence time series data</title> <link href="https://deep-learning-mit.github.io/blog/2023/transformer-time/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/transformer-time</id> <content type="html">&lt;h2 id=&quot;abstract&quot;&gt;Abstract&lt;/h2&gt; &lt;p&gt;This research means to discover the power of transformer in dealing with time series data, for instance traffic flow. Transformer with multihead self-attention mechanism is well-suited for the task like traffic prediction as it can weight the importance of various aspects in the traffic data sequence, capturing both long-term dependencies and short-term patterns. Compared to the LSTM, the transformer owns the power of parallelization, which is more efficient when facing a large dataset. And it can capture the dependencies better with long sequences. However, the transformer may have trouble dealing with the long sequence time-series data due to the heavy computation. This research compares differnt methods that make use of the information redundancy and their combination from the perspective of computational efficiency and prediction accuracy.&lt;/p&gt; &lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;The time series data processing and prediction are usually conducted with RNN and LSTM. In the case of traffic prediction, CNN and GNN are combined for efficiently capturing spatial and temporal information. And LSTM is widely used as its better performance on capturing temporal dependencies. While recent studies have propsed to replace RNNs with Transformer architecture as it is more efficient and able to capture sequantial dependencies. However, the model is inapplicable when facing long sequence time-series data due to quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. &lt;d-cite key=&quot;Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;Not all time series are predictable, the ones that is feasible to be better forecasted should contain cyclic or periodic patterns. &lt;d-cite key=&quot;Zeng_Chen_Zhang_Xu_2023&quot;&gt;&lt;/d-cite&gt; It indicates that there are redundant information in the long sequence data. The coundary of the redundancy can be measured by the optimal masking ratio of using MAE to process the dataset. Natural images are more information-redundant than languages and thus the optimal masking ratio is higher. BERT&lt;d-cite key=&quot;devlin2019bert&quot;&gt;&lt;/d-cite&gt; uses a masking ratio of 15% for language, MAE&lt;d-cite key=&quot;He_2022_CVPR&quot;&gt;&lt;/d-cite&gt; uses 75% for image and the optimal ratio for video is up to 90%.&lt;d-cite key=&quot;feichtenhofer2022masked&quot;&gt;&lt;/d-cite&gt; Traffic data is potentially redundant. It contains temporal and spatial information so that neighbor sensors can provide extra information in addition to temporal consistency. We inducted that the optimal ratio for traffic data should be located between image and video. As it has multidimensional information than image and the speed captured by sensors is not as consistent as the frames in videos. We use the GRIN&lt;d-cite key=&quot;cini2022filling&quot;&gt;&lt;/d-cite&gt; model to mask the inputdata using Metr_LA dataset to test the redundancy of traffic data. The results show that it is tolerant when the masking ratio is lower than 90%. Then there is the possibility of using distilling operation to compress information, reducing computational requirement and memory usage. Similar to traffic data, most of the time series data are multivariate.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/GRIN-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/GRIN-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/GRIN-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformer-time/GRIN.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Table 1: Performance comparison with baseline models and GRIN&lt;d-cite key=&quot;cini2022filling&quot;&gt;&lt;/d-cite&gt; with various masking ratio. (by Tinus A,Jie F, Yiwei L) &lt;/div&gt; &lt;h2 id=&quot;methodology&quot;&gt;Methodology&lt;/h2&gt; &lt;p&gt;The information redundancy leads to the common solutions of using transformer to deal with long sequence time-series forecasting(LSTF) problems, where models focus more on valuable datapoints to extract time-series features. Notable models are focsing on the less explored and challenging long-term time series forecasting(LTSF) problem, include Log- Trans, Informer, Autoformer, Pyraformer, Triformer and the recent FEDformer. &lt;d-cite key=&quot;Zeng_Chen_Zhang_Xu_2023&quot;&gt;&lt;/d-cite&gt; There are several main solutions:&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Data decomposition&lt;/strong&gt;. Data decomposition refers to the process of breakking down a complex dataset into simpler, manageable components. Autoformer &lt;d-cite key=&quot;wu2021autoformer&quot;&gt;&lt;/d-cite&gt; first applies seasonal-trend decomposition behind each neural block, which is a standard method in time series analysis to make raw data more predictable &lt;d-cite key=&quot;cleveland1990stl&quot;&gt;&lt;/d-cite&gt;. Specifically, they use a moving average kernel on the input sequence to extract the trend-cyclical component of the time series. The difference between the original sequence and the trend component is regarded as the seasonal component. &lt;d-cite key=&quot;Zeng_Chen_Zhang_Xu_2023&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Learning time trend&lt;/strong&gt;. Positional embeddings are widely used in transformer architecture to capture spatial information. &lt;d-cite key=&quot;feichtenhofer2022masked&quot;&gt;&lt;/d-cite&gt; Moreover, additional position embeddings can help the model to understand the periodicity inherented in traffic data, which implies applying the relative or global positioin encoding interms of weeks and days. &lt;d-cite key=&quot;https://doi.org/10.1111/tgis.12644&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Distillation&lt;/strong&gt;. The Informer model applies ProbSparse self-attention mechanism to let each key to only attend to several dominant queries and then use the distilling operation to deal with the redundance. The operation privileges the superior ones with dominaitng features and make a focused self-attention feature map in the next layer, which trims the input’s time dimension.&lt;d-cite key=&quot;Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Patching&lt;/strong&gt;. As proposed in ViT&lt;d-cite key=&quot;DBLP:journals/corr/abs-2010-11929&quot;&gt;&lt;/d-cite&gt;, the patch embeddings are small segments of an input image, which transfer the 2D image to 1D sequence. Each patch contains partial information of the image and additional positional embedding helps the transformer to understand the order of a series of patch embeddings. In the case of time series, though it is 1D sequence that can be received by standard transformer, the self-attention may not efficiently capture the long dependencies and cause heavy computation. Hence, dealing with time-series data, patching is used to understand the temporal correlation between data in a time-step interval. Unlike point-wise input tokens, it enhances the locality and captures the comprehensive semantic information in different time steps by aggregating times steps into subseries-level patches. &lt;d-cite key=&quot;nie2023time&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;h2 id=&quot;experiment&quot;&gt;Experiment&lt;/h2&gt; &lt;h3 id=&quot;dataset&quot;&gt;Dataset&lt;/h3&gt; &lt;p&gt;We used a multivariate traffic&lt;d-footnote&gt;https://pems.dot.ca.gov/&lt;/d-footnote&gt; dataset that records the road occupancy rates from different sensors on San Francisco freeways. We selected first 100 censors as our experiment dataset.&lt;/p&gt; &lt;h3 id=&quot;experimental-settings&quot;&gt;Experimental Settings&lt;/h3&gt; &lt;p&gt;We choose two models, Informer&lt;d-cite key=&quot;Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021&quot;&gt;&lt;/d-cite&gt; and PatchTST(supervised) &lt;d-cite key=&quot;nie2023time&quot;&gt;&lt;/d-cite&gt; to test the influence of distillation, positional embeddings, patching and data decomposition. For the implementation of Informer and PatchTST, we used the code provided by the authors.&lt;d-footnote&gt;https://github.com/yuqinie98/patchtst&lt;/d-footnote&gt;. We mean to compare different methods that aim to efficiently explore on long sequence data, considering both efficiency and accuracy. This leads to a discussion about the trade off when using these models to solve real life cases and the possibility of improving or combing different methods.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/Informer-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/Informer-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/Informer-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformer-time/Informer.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1: Informer architecture.&lt;d-cite key=&quot;Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/PatchTST-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/PatchTST-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/PatchTST-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformer-time/PatchTST.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2: PatchTST architecture.&lt;d-cite key=&quot;nie2023time&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;Setting 1. Compare efficieny and accuracy of distillation and patching. All the models are following the same setup, using 10 epochs and batch size 12 with input length \(\in\) {96,192,336,720} and predictioin length \(\in\) {96,192,336,720}. The performance and cost time is listed in the table 2.&lt;/p&gt; &lt;p&gt;Setting 2. Explore the influence of data decomposition. We slightly change the setup to compare different methods. We apply the data decomposition with PatchTST to explore the significance of these techniques.&lt;/p&gt; &lt;h2 id=&quot;result&quot;&gt;Result&lt;/h2&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/test1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/test1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/test1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformer-time/test1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Table 2: Setting 1. Traffic forecasting result with Informer and supervised PatchTST. Input length in {96,192,336,720} and predictioin length in {96,192,336,720}. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformer-time/1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3: Setting 1. Traffic forecasting result with Informer and supervised PatchTST. Input length in {96,192,336,720} and predictioin length = 720. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/test2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/test2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transformer-time/test2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transformer-time/test2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Table 3: Setting 2.Traffic forecasting result with supervised PatchTST, with and without data decomposition. Input length = 336 and predictioin length in {96,192,336,720}. &lt;/div&gt; &lt;p&gt;Sufficiency. According to Table 2. The Informer(ProbSparse self-attention, distilling operation,positional embedding) is generally more sufficient than PatchTST(patching, positional embedding). Especially with the increase of input sequence, Informer with idstilling operation can forecast in significantly less time comparing to patching method. Across differnt prediction sequence length, PatchTST does have much difference and Informer tends to cost more time with longer prediction. According to table 3, with data decomposition, PatchTST spends more time while does not achieve significant better performance.&lt;/p&gt; &lt;p&gt;Accuracy. According to Table 2. In all scenarios, the performance of PatchTST is better than Informer considering the prediction accuracy. Along with the increase of input sequence length, PatchTST tends to have better accuracy while Informer stays stable.&lt;/p&gt; &lt;p&gt;Overall, we can induct from the design of two models about their performances. Informer is able to save more time with distilling operation and PatchTST can get better accuracy with the capture of local and global information. Though patch embeddings help the model to get better accuracy with prediction task, it achieves so at the expense of consuming significant amount of time. When the input sequence is 720, PatchTST takes more than twice as long as B.&lt;/p&gt; &lt;h2 id=&quot;conclusion-and-discussion&quot;&gt;Conclusion and Discussion&lt;/h2&gt; &lt;p&gt;Based on existing models, different measures can be combined to balance the time consumed for forecasting with the accuracy that can be achieved. Due to time constraints, this study did not have the opportunity to combine additional measures for comparison. We hope to continue the research afterward and compare these performances.&lt;/p&gt; &lt;p&gt;In addition to applying transformer architecture alone, a combination of various methods or framework may help us to benefit from the advantages of different models. The transformer-based framwork for multivariate time series representation lerning is proposed by George et al. &lt;d-cite key=&quot;DBLP:journals/corr/abs-2010-02803&quot;&gt;&lt;/d-cite&gt; The Spatial-Temporal Graph Neural Networks(STGNNs) is another widely used model in traffic prediction, which only consider short-term data. The STEP model is propsde to enhance STGNN with a scalable time series pre-training mode. In the pre-training stage. They split very long-term time series into segments and feed them into TSFormer, which is trained via the masked autoencoding strategy. And then in the forecasting stage. They enhance the downstream STGNN based on the segment-level representations of the pre-trained TSFormer.&lt;d-cite key=&quot;10.1145/3534678.3539396&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;!-- ## Citations Citations are then used in the article body with the `&lt;d-cite&gt;` tag. The key attribute is a reference to the id provided in the bibliography. The key attribute can take multiple ids, separated by commas. The citation is presented inline like this: &lt;d-cite key=&quot;gregor2015draw&quot;&gt;&lt;/d-cite&gt; (a number that displays more information on hover). If you have an appendix, a bibliography is automatically created and populated in it. Distill chose a numerical inline citation style to improve readability of citation dense articles and because many of the benefits of longer citations are obviated by displaying more information on hover. However, we consider it good style to mention author last names if you discuss something at length and it fits into the flow well — the authors are human and it’s nice for them to have the community associate them with their work. *** --&gt; &lt;!-- ## Footnotes Just wrap the text you would like to show up in a footnote in a `&lt;d-footnote&gt;` tag. The number of the footnote will be automatically generated.&lt;d-footnote&gt;This will become a hoverable footnote.&lt;/d-footnote&gt; --&gt; &lt;!-- *** ## Code Blocks This theme implements a built-in Jekyll feature, the use of Rouge, for syntax highlighting. It supports more than 100 languages. This example is in C++. All you have to do is wrap your code in a liquid tag: {% highlight c++ linenos %} &lt;br/&gt; code code code &lt;br/&gt; {% endhighlight %} The keyword `linenos` triggers display of line numbers. You can try toggling it on or off yourself below: &lt;figure class=&quot;highlight&quot;&gt;&lt;pre&gt;&lt;code class=&quot;language-c--&quot; data-lang=&quot;c++&quot;&gt;&lt;span class=&quot;kt&quot;&gt;int&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;main&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;kt&quot;&gt;int&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;argc&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;kt&quot;&gt;char&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;const&lt;/span&gt; &lt;span class=&quot;err&quot;&gt;\&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;argv&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[])&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;{&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;string&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;myString&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;cout&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;s&quot;&gt;&quot;input a string: &quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;getline&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;cin&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;myString&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;);&lt;/span&gt; &lt;span class=&quot;kt&quot;&gt;int&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;length&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;myString&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;length&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;();&lt;/span&gt; &lt;span class=&quot;kt&quot;&gt;char&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;charArray&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;new&lt;/span&gt; &lt;span class=&quot;kt&quot;&gt;char&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;length&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;];&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;charArray&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;myString&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;for&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;kt&quot;&gt;int&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;i&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;i&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;length&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;++&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;i&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;){&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;cout&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;charArray&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;i&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;s&quot;&gt;&quot; &quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;}&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;}&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/figure&gt; &lt;!-- ## Blockquotes &lt;blockquote&gt; We do not grow absolutely, chronologically. We grow sometimes in one dimension, and not in another, unevenly. We grow partially. We are relative. We are mature in one realm, childish in another. —Anais Nin &lt;/blockquote&gt; *** --&gt; &lt;!-- ## Layouts The main text column is referred to as the body. It is the assumed layout of any direct descendants of the `d-article` element. &lt;div class=&quot;fake-img l-body&quot;&gt; &lt;p&gt;.l-body&lt;/p&gt; &lt;/div&gt; For images you want to display a little larger, try `.l-page`: &lt;div class=&quot;fake-img l-page&quot;&gt; &lt;p&gt;.l-page&lt;/p&gt; &lt;/div&gt; All of these have an outset variant if you want to poke out from the body text a little bit. For instance: &lt;div class=&quot;fake-img l-body-outset&quot;&gt; &lt;p&gt;.l-body-outset&lt;/p&gt; &lt;/div&gt; &lt;div class=&quot;fake-img l-page-outset&quot;&gt; &lt;p&gt;.l-page-outset&lt;/p&gt; &lt;/div&gt; Occasionally you’ll want to use the full browser width. For this, use `.l-screen`. You can also inset the element a little from the edge of the browser by using the inset variant. &lt;div class=&quot;fake-img l-screen&quot;&gt; &lt;p&gt;.l-screen&lt;/p&gt; &lt;/div&gt; &lt;div class=&quot;fake-img l-screen-inset&quot;&gt; &lt;p&gt;.l-screen-inset&lt;/p&gt; &lt;/div&gt; The final layout is for marginalia, asides, and footnotes. It does not interrupt the normal flow of `.l-body` sized text except on mobile screen sizes. &lt;div class=&quot;fake-img l-gutter&quot;&gt; &lt;p&gt;.l-gutter&lt;/p&gt; &lt;/div&gt; --&gt; &lt;!-- *** ## Other Typography? Emphasis, aka italics, with *asterisks* (`*asterisks*`) or _underscores_ (`_underscores_`). Strong emphasis, aka bold, with **asterisks** or __underscores__. Combined emphasis with **asterisks and _underscores_**. Strikethrough uses two tildes. ~~Scratch this.~~ 1. First ordered list item 2. Another item ⋅⋅* Unordered sub-list. 1. Actual numbers don&apos;t matter, just that it&apos;s a number ⋅⋅1. Ordered sub-list 4. And another item. ⋅⋅⋅You can have properly indented paragraphs within list items. Notice the blank line above, and the leading spaces (at least one, but we&apos;ll use three here to also align the raw Markdown). ⋅⋅⋅To have a line break without a paragraph, you will need to use two trailing spaces.⋅⋅ ⋅⋅⋅Note that this line is separate, but within the same paragraph.⋅⋅ ⋅⋅⋅(This is contrary to the typical GFM line break behaviour, where trailing spaces are not required.) * Unordered list can use asterisks - Or minuses + Or pluses [I&apos;m an inline-style link](https://www.google.com) [I&apos;m an inline-style link with title](https://www.google.com &quot;Google&apos;s Homepage&quot;) [I&apos;m a reference-style link][Arbitrary case-insensitive reference text] [I&apos;m a relative reference to a repository file](../blob/master/LICENSE) [You can use numbers for reference-style link definitions][1] Or leave it empty and use the [link text itself]. URLs and URLs in angle brackets will automatically get turned into links. http://www.example.com or &lt;http://www.example.com&gt; and sometimes example.com (but not on Github, for example). Some text to show that the reference links can follow later. [arbitrary case-insensitive reference text]: https://www.mozilla.org [1]: http://slashdot.org [link text itself]: http://www.reddit.com Here&apos;s our logo (hover to see the title text): Inline-style: ![alt text](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png &quot;Logo Title Text 1&quot;) Reference-style: ![alt text][logo] [logo]: https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png &quot;Logo Title Text 2&quot; Inline `code` has `back-ticks around` it. ```javascript var s = &quot;JavaScript syntax highlighting&quot;; alert(s); ``` ```python s = &quot;Python syntax highlighting&quot; print s ``` ``` No language indicated, so no syntax highlighting. But let&apos;s throw in a &lt;b&gt;tag&lt;/b&gt;. ``` Colons can be used to align columns. | Tables | Are | Cool | | ------------- |:-------------:| -----:| | col 3 is | right-aligned | $1600 | | col 2 is | centered | $12 | | zebra stripes | are neat | $1 | There must be at least 3 dashes separating each header cell. The outer pipes (|) are optional, and you don&apos;t need to make the raw Markdown line up prettily. You can also use inline Markdown. Markdown | Less | Pretty --- | --- | --- *Still* | `renders` | **nicely** 1 | 2 | 3 &gt; Blockquotes are very handy in email to emulate reply text. &gt; This line is part of the same quote. Quote break. &gt; This is a very long line that will still be quoted properly when it wraps. Oh boy let&apos;s keep writing to make sure this is long enough to actually wrap for everyone. Oh, you can *put* **Markdown** into a blockquote. Here&apos;s a line for us to start with. This line is separated from the one above by two newlines, so it will be a *separate paragraph*. This line is also a separate paragraph, but... This line is only separated by a single newline, so it&apos;s a separate line in the *same paragraph*. --&gt; </content> </entry> <entry> <title>Transfer Resistant Model Training</title> <link href="https://deep-learning-mit.github.io/blog/2023/transfer-resistant-model-training/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/transfer-resistant-model-training</id> <content type="html">&lt;h2 id=&quot;introduction-and-motivation&quot;&gt;Introduction and Motivation&lt;/h2&gt; &lt;p&gt;In transfer learning, a model is trained for a specific task and is then fine-tuned for a different task &lt;d-cite key=&quot;zhuang2020comprehensive&quot;&gt;&lt;/d-cite&gt;. In doing so, one tries to best leverage and reuse features and performance of the large pre-trained model for other tasks. Many works have focused on making transfer learning more robust and efficient. Transfer learning can be very useful for saving compute resources, time, and money.&lt;/p&gt; &lt;p&gt;In this project, we study an opposing question: how to learn model weights that classify well for one dataset but reduce learning efficiency when transferred to another. The motivation is as follows. As computational resources and capable models become more accessible, the risk of unregulated agents fine-tuning existing models increases, including for malicious tasks. Recent work has shown that previously aligned models can be compromised to produce malicious or harmful outputs &lt;d-cite key=&quot;anonymous2023shadow&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;qi2023finetuning&quot;&gt;&lt;/d-cite&gt;. This may even occur with a few adversarial examples against models specifically trained to produce safe outputs &lt;d-cite key=&quot;lermen2023lora&quot;&gt;&lt;/d-cite&gt;. Currently, risks with language models are commonly discussed. However, investigating CNNs can guide designing defenses for neural network architectures against malicious agents in general.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/setting-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/setting-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/setting-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/setting.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;To our knowledge, there exists no previous literature on learning parameters robust against transfer learning. A related field is machine unlearning. In machine unlearning, a model must forget certain pieces of data used in training &lt;d-cite key=&quot;cao2015towards&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;10.1007/s42979-023-01767-4&quot;&gt;&lt;/d-cite&gt;. However, we wish to examine methods that not only guarantee poor performance after unlearning, but also after fine-tuning on the “malicious” or “forget” dataset. For example, using a popular unlearning approach which reaches 0% accuracy on the “forget” dataset, we easily fine-tuned the model with the same dataset to reach higher accuracy after a few epochs as shown below &lt;d-cite key=&quot;tarun2023fast&quot;&gt;&lt;/d-cite&gt;. This is a gap in previous work in machine unlearning and demonstrates the novelty and difficulty of learning models that not only perform poorly on specified datasets but are robust against fine-tuning.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We propose two new approaches: selective knowledge distillation (SKD) and Reverse Model-Agnostic Meta-Learning (MAML) &lt;d-cite key=&quot;finn2017model&quot;&gt;&lt;/d-cite&gt;. In SKD, a “student” model is trained using activations of a “teacher” for the beneficial data and trained on hardcoded activations for the “malicious” data. In Reverse-MAML, we attempt to learn parameters that aren’t robust to transfer to specified tasks. Due to computational constraints, we examine a toy setting with the CIFAR-10 Dataset as well as using a small CNN model shown in the appendix &lt;d-cite key=&quot;krizhevsky2012imagenet&quot;&gt;&lt;/d-cite&gt;. Overall, both the Reverse-MAML and SKD approach exceed baseline approaches on scoring good accuracy on a “beneficial” dataset while being on-par with preventing fine-tuning on a “malicious” dataset. Thus, there remain limitations, and we conclude with future work.&lt;/p&gt; &lt;h2 id=&quot;related-works&quot;&gt;Related Works&lt;/h2&gt; &lt;h3 id=&quot;1-transfer-learning&quot;&gt;1. Transfer Learning&lt;/h3&gt; &lt;p&gt;As mentioned previously, transfer learning has been a long-time objective in deep learning research &lt;d-cite key=&quot;zhuang2020comprehensive&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;raffel2020exploring&quot;&gt;&lt;/d-cite&gt;. By training a model on one dataset, the goal is to be able to reuse parameters and learned features to achieve high performance or efficient learning for another dataset. Transfer learning for convolutional neural networks has been a popular approach, allowing users to train a high-performance model with limited computational resources or data &lt;d-cite key=&quot;zhuang2020comprehensive&quot;&gt;&lt;/d-cite&gt;. Further work has analyzed settings for successful transfer learning in image classification and further challenges when there is poor transfer &lt;d-cite key=&quot;plested2022deep&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;2-model-agnostic-meta-learning-maml&quot;&gt;2. Model-Agnostic Meta-Learning (MAML)&lt;/h3&gt; &lt;p&gt;MAML is an algorithm that makes models readily adaptable to new tasks &lt;d-cite key=&quot;finn2017model&quot;&gt;&lt;/d-cite&gt;. It essentially primes the model for transfer learning as effectively as possible. The algorithm attempts to learn parameters and model weights such that a few steps of gradient descent on learning a new task will lead to good performance on said new task. Further work has continued attempting to meta-learn useful model parameters, building off of MAML &lt;d-cite key=&quot;goerttler2021exploring&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;park2019meta&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;3-machine-unlearning&quot;&gt;3. Machine Unlearning&lt;/h3&gt; &lt;p&gt;A closely aligned question to ours is the problem of machine unlearning. Machine unlearning attempts to remove the influence of a set of data points on an already trained model. In this setting, a model is initially trained on some dataset &lt;d-cite key=&quot;bourtoule2021machine&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;cao2015towards&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;10.1007/s42979-023-01767-4&quot;&gt;&lt;/d-cite&gt;. The model embeds information about and “remembers” features about data points. This means that the model uses information about the data points to make decisions. For example, LLMs like GPT can learn sensitive information about some people &lt;d-cite key=&quot;wu2023unveiling&quot;&gt;&lt;/d-cite&gt;. This might pose a threat to privacy. We may want the model to “forget” some subset of the training set, in this case information about the people. However, we currently have no standardized method of doing this. Machine unlearning is a nascent field in artificial intelligence research and is currently being studied. It is a difficult problem, and our work is tangential to machine unlearning.&lt;/p&gt; &lt;p&gt;&lt;br /&gt; &lt;br /&gt;&lt;/p&gt; &lt;p&gt;To our knowledge, there hasn’t been any research on models that are resistant to transfer learning and fine-tuning. The works mentioned above, transfer learning techniques and MAML, focus on improving fine-tuning. We aim to make fine-tuning more difficult while preserving robustness on the original task. Machine unlearning seeks to forget data that the model has been previously trained on. On the other hand, our goal is to preemptively guard the model from learning certain data in the first place. Thus, our research question demonstrates a clear gap in existing research which has focused on either improving transfer learning or only reducing model performance on external datasets. Our research explores this new question in the deep learning field and draws from recent works to guide methodology.&lt;/p&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;p&gt;We propose three methods, one existing and two novel, to begin addressing the problem of learning parameters scoring high accuracy on a “beneficial” dataset but are robust against transfer learning on a known “malicious” dataset. Further experimental details are found in the experiments section.&lt;/p&gt; &lt;h3 id=&quot;1-machine-unlearning&quot;&gt;1. Machine Unlearning&lt;/h3&gt; &lt;p&gt;The first approach is a baseline and reimplementation of a popular machine unlearning method from &lt;d-cite key=&quot;tarun2023fast&quot;&gt;&lt;/d-cite&gt;. Here, the model is initially trained on both the “beneficial” and “malicious” dataset and undergoes a forgetting stage where the “malicious” dataset is forgotten using a noise matrix. A final repair stage is then conducted to improve performance of the model on the “beneficial” dataset. Specific details can be found at &lt;d-cite key=&quot;tarun2023fast&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/performance-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/performance-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/performance-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/performance.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;2-selective-knowledge-distillation&quot;&gt;2. Selective Knowledge Distillation&lt;/h3&gt; &lt;p&gt;Our first proposed novel approach is selective knowledge distillation (SKD) drawing inspiration from knowledge distillation. In knowledge distillation, a smaller “student” model is trained to imitate a larger “teacher” model by learning logits outputs from the “teacher” model. In doing so, the “student” model can hopefully achieve similar performance to the “teacher” model while reducing model size and complexity.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In SKD, we similarly have a “teacher” and “student” model. The “teacher” is a model that has high accuracy on the “beneficial” dataset but is not necessarily robust against fine-tuning on the “malicious” dataset. Our “student” model is almost identical in architecture to the “teacher” but excludes the final classification layer and the ReLU layer before it. This is shown below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Our goal is for the student model to have high performance on the “beneficial” dataset after adding a classification layer while being robust against fine-tuning on the “malicious” dataset. To perform SKD, we initially train the teacher model until reaching sufficiently high performance on the “beneficial” dataset.&lt;/p&gt; &lt;p&gt;We then construct a dataset that contains all the images in the “beneficial” dataset. The labels are activations of the second-to-last layer of the “teacher” model. Note that this is similar to knowledge distillation, except we are taking the second-to-last layer’s activations. We further add all the images in the “malicious” dataset and set their labels to be a vector of significantly negative values. For our experiments, we used -100.0. We train the student model on this collective dataset of images and activation values.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_complex.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_complex.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_complex.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_complex.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Finally, we add a fully-connected classification layer to the student model and backpropagate only on the added layer with the “beneficial” dataset.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/student-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/student-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/student-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/student.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Our end goal is to prevent fine-tuning of our CNN on the “malicious” dataset. Thus, if the student model can output activations that all are negative if the image belongs in the “malicious” dataset, then after appending the ReLU layer and setting biases of the second-to-last layer to 0, the inputs to the final classification layer will always be 0, reducing the ability to learn on the “malicious” dataset. Furthermore, the gradient will always be 0 on inputs from the “malicious” dataset so any backpropagating on images and labels originating from the “malicious” dataset from the final layer activations would be useless.&lt;/p&gt; &lt;h3 id=&quot;3-reverse-maml&quot;&gt;3. Reverse-MAML&lt;/h3&gt; &lt;p&gt;Recall that MAML is focused on finding some optimal set of model weights \(\theta\) such that running gradient descent on the model from a new few-shot learning task results in a \(\theta’\) that scores high accuracy on the new task &lt;d-cite key=&quot;finn2017model&quot;&gt;&lt;/d-cite&gt;. MAML achieves this by learning the optimal \(\theta\). To learn this \(\theta\), MAML computes the second order gradient on the model weights. This allows the model to learn about where the initial \(\theta\) should have been before an iteration of gradient descent so that taking the step of gradient descent would have led to the minimal loss.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/MAML-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/MAML-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/MAML-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/MAML.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In our version, we attempt to learn a \(\theta\) that fine-tunes well to a data distribution \(p_1\) but fine-tunes poorly to distribution \(p_2\). To do this, we partition the data into two sets: a “good” set and a “bad” set. We train such that for “good” samples MAML performs the standard algorithm above, learning \(\theta\) that would fine-tune well to the “good” samples. However, for the “bad” set we train the model to do the opposite, learning a \(\theta\) that would lead to poor fine-tuning. To do this, when taking the second order gradient, the model goes up the gradient instead of down.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;experiments&quot;&gt;Experiments&lt;/h2&gt; &lt;p&gt;Due to computational constraints, we work in the following toy setting. We use the CIFAR-10 dataset where images in the first five ([0, 4]) classes are the “beneficial” dataset and the images in the last five ([5, 9]) classes are the “malicious” dataset. We split the 60,000 CIFAR-10 image dataset into a 40,000 image pre-training dataset, 10,000 image fine-tuning dataset, and 10,000 image test dataset. To evaluate each approach, we first evaluate the accuracy of the model on the beneficial test dataset. Then, we replace the last layer parameters of the output model, freeze all previous layer’s parameters, and finally fine-tune on the malicious fine-tuning dataset. We fine-tune using the Adam optimizer with a learning rate of 0.1 and momentum of 0.9. We finally evaluate model performance on a malicious test dataset. These steps in this evaluation represent the common pipeline to perform transfer learning and are shown below. Full hyperparameters for evaluation are listed in the appendix. We also perform ablation studies on the quality of the teacher model for SKD; further details are found in the Discussion section. All experiments, including ablations, are performed and averaged over 5 random seeds.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/pipeline.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/evaluation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;The first evaluation metric is accuracy of the outputted model from each approach on beneficial data. This is shown in the figure below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1 &lt;/div&gt; &lt;p&gt;The second metric of evaluation is the accuracy of the output model from each approach on test malicious data as it’s being fine-tuned on fine-tune malicious data. This is shown with learning curves in the figure below. Note that lower accuracy is better.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2 &lt;/div&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;We observe that finding parameters that have high accuracy on a “beneficial” dataset but are robust against fine-tuning on a “malicious” dataset is challenging. On all three methods, including a popular machine unlearning approach, the model is able to somewhat fit to the “malicious” dataset. However, for SKD, this accuracy consistently does not significantly exceed 40%.&lt;/p&gt; &lt;p&gt;More importantly, we find in Figure 1 that both Reverse-MAML and SKD are able score higher accuracy on the beneficial dataset. This is surprising as machine unlearning methods were designed to maintain high accuracy on a retain dataset. Combining these two graphs, we conclude that there remains future work to explain why the resulting models had such high accuracy on the malicious data out-of-the-box and how to minimize it.&lt;/p&gt; &lt;p&gt;We also experimented with Reverse-MAML under the Omniglot dataset &lt;d-cite key=&quot;lake2015human&quot;&gt;&lt;/d-cite&gt;. Here, we attempted to fine-tune on digit images. We found that Reverse-MAML performed very well in this setting. After training the Reverse-MAML model, the model held around 85% test accuracy on the “Beneficial” Omniglot dataset and around 20% on the “Malicious” digit dataset. On the digit set, the model would often predict the same digit for all samples, as shown below. We believe that Reverse-MAML performed better here because the Omniglot characters and the digits are simpler to interpret and learn specific features about compared to CIFAR-10.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/digits-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/digits-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/digits-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/digits.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; All digits were predicted to be a 2. &lt;/div&gt; &lt;p&gt;Slow learning in SKD is likely caused by filtering by the ReLU activation function which causes activations to become 0. This ideally occurs when we train the student model to output negative activation values into the final classification layer if the input is from the “malicious” dataset. These values make it more difficult to learn useful weights for the final classification layer and apply gradient descent on earlier layers. We confirm this by measuring misses or the percent of “malicious” images that don’t result in all 0 activations into the final classification layer shown below. We show, in general, misses are low across different teacher models. For this ablation, we vary teacher models by the number of epochs they are trained.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/student_table-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/student_table-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/student_table-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/student_table.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We also measure how accuracy of the teacher model impacts performance of the student downstream. We vary the number of epochs the teacher model is trained in and report accuracies of the teacher model on the “beneficial” dataset below. More importantly, we empirically show that high teacher accuracy on the “beneficial” dataset is needed for the student to achieve high accuracy on the “beneficial” dataset. This follows our knowledge distillation framework as the student attempts to mimic the teacher model’s performance on the “beneficial” dataset by learning activation values.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;limitations-and-future-work&quot;&gt;Limitations and Future Work&lt;/h2&gt; &lt;h3 id=&quot;1-requirement-for-malicious-data&quot;&gt;1. Requirement for “Malicious” data&lt;/h3&gt; &lt;p&gt;The motivating example for this project was preventing a malicious agent from hijacking a model to perform undesirable tasks. However, it is often not possible to list out every possible “bad” task, and thus future work which extends from this project can explore how to prevent fine-tuning of tasks that aren’t specified as clearly and completely.&lt;/p&gt; &lt;h3 id=&quot;2-computational-restraints&quot;&gt;2. Computational Restraints&lt;/h3&gt; &lt;p&gt;Due to computational restraints, we were unable to test or fine-tune models with significantly higher parameter counts or experiment with larger datasets. However, this remains an important step as transfer learning or fine-tuning is commonly applied on large models which we could not sufficiently investigate. Thus, future work can apply these existing methods on larger models and datasets.&lt;/p&gt; &lt;h3 id=&quot;3-exploration-of-more-methods-in-machine-unlearning-and-meta-learning&quot;&gt;3. Exploration of More Methods in Machine Unlearning and Meta-Learning&lt;/h3&gt; &lt;p&gt;Further analysis of existing methods in machine unlearning and meta-learning can be used to benchmark our proposed approaches. Though we tried to select methods that had significant impact and success in their respective problem settings, other approaches are promising, including using MAML variants like Reptile or FOMAML &lt;d-cite key=&quot;DBLP:journals/corr/abs-1803-02999&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;4-imperfection-in-filtering-malicious-data-for-skd&quot;&gt;4. Imperfection in filtering “malicious” data for SKD&lt;/h3&gt; &lt;p&gt;Ideally, in SKD, the underlying model would always output negative activation values given a “malicious” input. However, this does not always occur, and thus fitting on the malicious data is still possible. Future work can explore how to improve this, though perfect accuracy will likely not be feasible. Furthermore, it is still possible for a malicious agent to hijack the model by performing distilled learning on the second-to-last layer activations, thus removing this ideal guarantee. Future work can also investigate how to have similar guarantees throughout all of the model’s activation layers instead of just one.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;In this project, we investigated how to train a model such that it performs well on a “beneficial” dataset but is robust against transfer learning on a “malicious” dataset. First, we show this is a challenging problem, as existing state of the art methods in machine unlearning are unable to prevent fine-tuning. We then propose two new approaches: Reverse-MAML and SKD. Both serve as a proof of concept with promising preliminary results on the CIFAR-10 Dataset. We conclude by noting there are limitations to this work, most notably the need for a “malicious” dataset and computational limits. We then propose future work stemming from these experiments.&lt;/p&gt; &lt;h2 id=&quot;appendix&quot;&gt;Appendix&lt;/h2&gt; &lt;p&gt;CNN Architectures used for experiments:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;ul&gt; &lt;li&gt;Note, all graphs and tables are averaged over 5 seeds with reported standard deviation.&lt;/li&gt; &lt;/ul&gt; </content> </entry> <entry> <title>Sparse Autoencoders for a More Interpretable RLHF</title> <link href="https://deep-learning-mit.github.io/blog/2023/sparse-autoencoders-for-interpretable-rlhf/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/sparse-autoencoders-for-interpretable-rlhf</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Understanding how machine learning models arrive at the answers they do, known as &lt;em&gt;machine learning interpretability&lt;/em&gt;, is becoming increasingly important as models are deployed more widely and in high-stakes scenarios. Without interpretability, models may exhibit bias, toxicity, hallucinations, dishonesty, or malice, without their users or their creators knowing. But machine learning models are notoriously difficult to interpret. Adding to the challenge, the most widely used method for aligning language models with human preferences, RLHF (Reinforcement Learning from Human Feedback), impacts model cognition in ways that researchers do not understand. In this work, inspired by recent advances in sparse autoencoders from Anthropic, we investigate how sparse autoencoders can help to interpret large language models. We contribute a novel, more interpretable form of fine-tuning that only learns parameters related to interpretable features of the sparse autoencoder.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Machine learning practitioners often cannot interpret the models they build (xkcd #1838). &lt;/div&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;p&gt;Research on interpreting machine learning models falls broadly under one of two areas: representation-based interpretability (top-down) and mechanistic interpretability (bottom-up).&lt;/p&gt; &lt;p&gt;Representation-based interpretability seeks to map out meaningful directions in the representation space of models. For example, Li &lt;em&gt;et al.&lt;/em&gt; &lt;d-cite key=&quot;li2023inferencetime&quot;&gt;&lt;/d-cite&gt; found a direction in one model that causally corresponds to truthfulness. Subsequent work by Zou &lt;em&gt;et al.&lt;/em&gt; &lt;d-cite key=&quot;zou2023representation&quot;&gt;&lt;/d-cite&gt; borrows from neuroscience methods to find directions for hallucination, honesty, power, and morality, in addition to several others. But directions in representation space can prove brittle. As Marks &lt;em&gt;et al.&lt;/em&gt; &lt;d-cite key=&quot;marks2023geometry&quot;&gt;&lt;/d-cite&gt; found, truthfulness directions for the same model can vary across datasets. Moreover, current methods for extracting representation space directions largely rely on probing &lt;d-cite key=&quot;belinkov2022probing&quot;&gt;&lt;/d-cite&gt; and the linearity hypothesis &lt;d-cite key=&quot;elhage2022superposition&quot;&gt;&lt;/d-cite&gt;, but models may have an incentive to store some information in nonlinear ways. For example, Gurnee &lt;em&gt;et al.&lt;/em&gt; &lt;d-cite key=&quot;gurnee2023language&quot;&gt;&lt;/d-cite&gt; showed that language models represent time and space using internal world models; for a world model to store physical scales ranging from the size of the sun to the size of an electron, it may prefer a logarithmic representation.&lt;/p&gt; &lt;p&gt;Mechanistic interpretability, unlike representation engineering, studies individual neurons, layers, and circuits, seeking to map out model reasoning at a granular level. One challenge is that individual neurons often fire in response to many unrelated features, a phenomenon known as polysemanticity. For example, Olah &lt;em&gt;et al.&lt;/em&gt; &lt;d-cite key=&quot;olah2020zoom&quot;&gt;&lt;/d-cite&gt; found polysemantic neurons in vision models, including one that fires on both cat legs and car fronts. Olah &lt;em&gt;et al.&lt;/em&gt; hypothesized that polysemanticity arises due to superposition, which is when the model attempts to learn more features than it has dimensions. Subsequent work investigated superposition in toy models, suggesting paths toward disentangling superposition in real models &lt;d-cite key=&quot;elhage2022superposition&quot;&gt;&lt;/d-cite&gt;. Superposition is relevant for language models because the real world has billions of features that a model could learn (names, places, facts, etc.), while highly deployed models have many fewer hidden dimensions, such as 12,288 for GPT-3 &lt;d-cite key=&quot;brown2020fewshot&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Recently, Sharkey &lt;em&gt;et al.&lt;/em&gt; &lt;d-cite key=&quot;sharkey2022interim&quot;&gt;&lt;/d-cite&gt; proposed using sparse autoencoders to pull features out of superposition. In an interim research report, the team describes inserting a sparse autoencoder, which expands dimensionality, into the residual stream of a transformer layer. In a follow-up work, Cunningham &lt;em&gt;et al.&lt;/em&gt; &lt;d-cite key=&quot;cunningham2023sparse&quot;&gt;&lt;/d-cite&gt; found that sparse autoencoders learn highly interpretable features in language models. In a study on one-layer transformers, Anthropic provided further evidence that sparse autoencoders can tease interpretable features out of superposition &lt;d-cite key=&quot;bricken2023monosemanticity&quot;&gt;&lt;/d-cite&gt;. Although interest in sparse autoencoders in machine learning is relatively recent, sparse autoencoders have been studied in neuroscience for many decades under the name of expansion recoding &lt;d-cite key=&quot;albus1971cerebellar&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Researchers have begun to apply sparse autoencoders to other interpretability problems. For example, Marks &lt;em&gt;et al.&lt;/em&gt; &lt;d-cite key=&quot;marks2023rlhf&quot;&gt;&lt;/d-cite&gt; investigated whether models on which we perform RLHF internalize the reward signal. To do so, Marks compared sparse autoencoders trained on the base model with sparse autoencoders trained on the fine-tuned model. But, to our knowledge, while others have used sparse autoencoders to probe the effects of fine-tuning, there is no prior research on using sparse autoencoders to define a more interpretable form of fine-tuning. We propose a new form of fine-tuning in which the learnable parameters are related to the interpretable features of the sparse autoencoder.&lt;/p&gt; &lt;h2 id=&quot;background&quot;&gt;Background&lt;/h2&gt; &lt;p&gt;An &lt;strong&gt;autoencoder&lt;/strong&gt; is an architecture for reproducing input data, with a dimensionality bottleneck. Let $d_\text{model}$ denote the dimension of the residual stream in a transformer (4096 for Pythia 6.9B). Let $d_\text{auto}$ denote the dimensionality of the autoencoder. To enforce the dimensionality bottleneck, we require $d_\text{model} &amp;gt; d_\text{auto}$. The diagram below depicts an autoencoder.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; An autoencoder is trained to reproduce its input, subject to a dimensionality bottleneck. &lt;/div&gt; &lt;p&gt;A &lt;strong&gt;sparse autoencoder&lt;/strong&gt; relies on a different kind of bottleneck, called sparsity. For a sparse autoencoder $g \circ f$ that acts on $x \in \mathbb{R}^{d_\text{model}}$ by sending $f(x) \in \mathbb{R}^{d_\text{auto}}$ and $g(f(x)) \in \mathbb{R}^{d_\text{model}}$, the training objective combines MSE loss with an $L^1$ sparsity penalty:&lt;/p&gt; \[\mathcal{L}(x; f, g) = \|x - g(f(x))\|_2^2 + \beta \| f(x) \|_1,\] &lt;p&gt;where $\beta &amp;gt; 0$ trades off sparsity loss with reconstruction loss. With the sparsity constraint, we can now let $d_\text{auto} &amp;gt; d_\text{model}$ by a factor known as the &lt;em&gt;expansion factor&lt;/em&gt;. In our work, we typically use an expansion factor of $4$ or $8$. The purpose of the sparse autoencoder is to expand out the dimension enough to overcome superposition. The diagram below depicts a sparse autoencoder.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; A sparse autoencoder is trained to reproduce its input, subject to an $L^1$ sparsity bottleneck. &lt;/div&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;p&gt;Our main experiment is to insert a sparse autoencoder into a transformer layer, train the sparse autoencoder, and then use the fused model to perform a new, more interpretable form of fine-tuning. &lt;d-footnote&gt;While we originally planned to investigate RLHF, we determined that existing libraries could not perform PPO (Proximal Policy Optimization) on custom model architectures such as our transformer fused with a sparse autoencoder. As a result, we chose to investigate fine-tuning instead of RLHF.&lt;/d-footnote&gt; We run all experiments on a single A100 GPU through Google Colab Pro+.&lt;/p&gt; &lt;h3 id=&quot;inserting-a-sparse-autoencoder-in-a-transformer&quot;&gt;Inserting a Sparse Autoencoder in a Transformer&lt;/h3&gt; &lt;p&gt;There are three natural places to insert a sparse autoencoder into a transformer:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;MLP activations before the nonlinearity&lt;/li&gt; &lt;li&gt;MLP activations before adding back to the residual stream&lt;/li&gt; &lt;li&gt;The residual stream directly&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;We choose the second option. The upside of operating in the MLP space is that MLP blocks may be in less superposition than the residual stream, given that MLPs may perform more isolated operations on residual stream subspaces. The upside of operating after the MLP projects down to the residual stream dimension is a matter of economy: because $d_\text{model} &amp;lt; d_\text{MLP}$, we can afford a larger expansion factor with the same memory resources.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; We insert a sparse autoencoder into a transformer after the MLP, but before adding into the residual stream. &lt;/div&gt; &lt;h3 id=&quot;how-we-train-our-sparse-autoencoder&quot;&gt;How We Train our Sparse Autoencoder&lt;/h3&gt; &lt;p&gt;We train our sparse autoencoder to reproduce MLP-post activations in layer one of Pythia 6.9B (deduplicated) &lt;d-footnote&gt;Deduplicated means that this Pythia 6.9B model was trained on scraped web text where duplicate articles and lengthy passages are removed. Because Pythia inherits from the GPT-NeoX architecture, the specific activations we collected are named gpt_neox.layers.1.mlp.dense_4h_to_h.&lt;/d-footnote&gt;. To create a dataset of activations for training, we stream in text from &lt;a href=&quot;https://huggingface.co/datasets/Skylion007/openwebtext&quot;&gt;an open-source replication of WebText&lt;/a&gt;, the dataset used to train GPT-2. For each batch of text, we collect Pythia 6.9B’s MLP-post activations at layer one and use these activations as training data for the sparse autoencoder.&lt;/p&gt; &lt;p&gt;Concretely, our sparse autoencoder has four learnable parameters: $W_\text{enc}$, $W_\text{dec}$, $b_\text{enc}$, and $b_\text{dec}$. The second bias $b_\text{dec}$ is used to center the input. The sparse autoencoder encodes, applies a nonlinearity, and decodes its input $x$ as follows:&lt;/p&gt; \[\text{SAE}(x) = \text{ReLU}((x - b_\text{dec}) W_\text{enc} + b_\text{enc}) W_\text{dec} + b_\text{dec}.\] &lt;p&gt;We constrain the rows of $W_\text{dec}$ to have unit norm by renormalizing after each optimizer step. Another approach to constrain the rows is to remove gradient information parallel to the feature vectors before each optimizer step, and also renormalize the rows. Although we did not implement it, Anthropic found that that the second approach &lt;a href=&quot;https://transformer-circuits.pub/2023/monosemantic-features/index.html#appendix-autoencoder-optimization&quot;&gt;slightly reduces loss&lt;/a&gt; &lt;d-cite key=&quot;bricken2023monosemanticity&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;We use an expansion factor of $4$, meaning $d_\text{auto} = 16384$. When training, we use batch size $8$, learning rate $10^{-4}$, and default $\beta_1 = 0.9, \beta_2 = 0.999$ for the Adam optimizer. Because Pythia 6.9B’s context length is $128$ tokens, each training step includes activations from $1024$ tokens. We save checkpoints every $20000$ steps ($20.48$ million tokens).&lt;/p&gt; &lt;p&gt;One subtlety in training is that the sparsity constraint can eventually cause some autoencoder neurons to never activate. How to best handle these so-called dead neurons is an open question. We follow Anthropic in &lt;a href=&quot;https://transformer-circuits.pub/2023/monosemantic-features/index.html#appendix-autoencoder-resampling&quot;&gt;resampling dead neurons&lt;/a&gt; to new values &lt;d-cite key=&quot;bricken2023monosemanticity&quot;&gt;&lt;/d-cite&gt;. Because resampling can cause instability during training, we resample only every 10000 training steps. At that point, we say a sparse autoencoder neuron is dead if it has not activated in any of the last 5000 training steps. In an attempt to improve autoencoder performance, Anthropic resampled dead neurons to the feature directions in which the sparse autoencoder performed worst. For simplicity, we resample dead neurons by setting their corresponding rows of $W_\text{enc}$ and $W_\text{dec}$ to Kaiming uniform random vectors. We reset dead biases to zero.&lt;/p&gt; &lt;h3 id=&quot;fine-tuning&quot;&gt;Fine-Tuning&lt;/h3&gt; &lt;p&gt;We fine-tune Pythia 70M &lt;d-footnote&gt;We wanted to fine-tune Pythia 6.9B, but we encountered out-of-memory errors on an A100 GPU. In follow-up work, we will investigate quantization so that we can study Pythia 6.9B, including the sparse autoencoder we trained for it.&lt;/d-footnote&gt; with our sparse autoencoder inserted in layer one &lt;d-footnote&gt;To learn the most about how fine-tuning affects transformer features, we would ideally learn interpretable feature directions at every transformer layer using a sparse autoencoder. Then, after fine-tuning, we could perform rich comparisons across the model. Unfortunately, reconstruction loss compounds across layers. With current training methods, it is only feasible for us to insert a sparse autoencoder into one layer of the transformer before performance significantly degrades.&lt;/d-footnote&gt;. Instead of adjusting weights everywhere in the network, we constrain fine-tuning to adjust only a small set of interpretable parameters within the sparse autoencoder. In particular, we learn two vectors of dimension $d_\text{auto}$: a coefficient vector $c$ and a bias vector $d$. Just prior to applying $\text{ReLU}$ in the sparse autoencoder, we scale the activations by $c$ and translate them by $d$.&lt;/p&gt; &lt;p&gt;For our fine-tuning experiments, the sparse autoencoder we use is trained on Pythia 70M Chess (a variant fine-tuned on a chess dataset) &lt;d-footnote&gt;This autoencoder was trained to perform well on Pythia 70M Chess, not on the base model Pythia 70M. In future work, we will match the models to investigate how our sparse autoencoder on Pythia 6.9B performs when fine-tuning Pythia 6.9B.&lt;/d-footnote&gt;. We insert this sparse autoencoder into the base Pythia 70M, define new learnable parameters $c$ and $d$ as above, and freeze the gradients on every weight in the fused model except the new learnable parameters. We fine-tune on a small dataset of arithmetic questions (&lt;a href=&quot;https://huggingface.co/datasets/EleutherAI/arithmetic&quot;&gt;EleutherAI/arithmetic&lt;/a&gt;). One training example is shown below:&lt;/p&gt; \[\text{Question: What is }(2 * 7) + 2\text{? Answer:}\] &lt;p&gt;We train with batch size $8$, learning rate $10^{-3}$, and weight decay $10^{-2}$ using the AdamW optimizer &lt;d-cite key=&quot;loshchilov2018decoupled&quot;&gt;&lt;/d-cite&gt; over $10$ epochs with $200$ steps per epoch. The figure below shows the training loss as we fine-tune.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;Our results come in two parts: an exploration of our trained sparse autoencoder on Pythia 6.9B and an analysis of fine-tuning using a smaller sparse autoencoder on Pythia 70M.&lt;/p&gt; &lt;h3 id=&quot;exploring-a-sparse-autoencoder&quot;&gt;Exploring a Sparse Autoencoder&lt;/h3&gt; &lt;p&gt;When inserted into Pythia 6.9B at layer one, our sparse autoencoder achieves a loss of $3.201$ (zero-ablation degrades loss to $3.227$) on the held-out dataset &lt;a href=&quot;https://paperswithcode.com/dataset/wikitext-103&quot;&gt;WikiText-103&lt;/a&gt;, consisting of over 100M tokens from Good and Featured articles on Wikipedia. Pythia 6.9B’s baseline loss is $3.193$. Notably, the sparse autoencoder outperforms a zero-ablation of the layer, demonstrating that it learned features that are useful for reconstruction.&lt;/p&gt; &lt;p&gt;As expected, if the sparse autoencoder is inserted into a layer it was not trained for, performance collapses. For example, if inserted at layer $31$ of Pythia 6.9B, the loss becomes $12.586$. Below is a figure showing the additional loss from inserting the sparse autoencoder at the first eight layers of Pythia 6.9B.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The sparse autoencoder preserves model performance in layer 1, the layer it was trained for. The green bar is loss on WikiText-103 of Pythia 6.9B on 5 random batches. The red bar is the additional loss incurred if the sparse autoencoder is inserted after the MLP at a given layer. The first eight layers are shown. &lt;/div&gt; &lt;p&gt;For more details on the training run, four figures demonstrating the sparsity, $L^1$ coefficient, $L^1$ loss, and reconstruction loss of our sparse autoencoder during training are shown below. After training on the first five million tokens, we automatically begin to adjust the $L^1$ coefficient $\beta$ until we reach the desired sparsity of $1\%$. By the end, our sparse autoencoder stabilizes at a sparsity of $100$, which means that only $0.5\%$ of sparse autoencoder features activate on a given token.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_sparsity.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_sparsity.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_sparsity.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_sparsity.svg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Sparsity across the training run on Pythia 6.9B. On a given batch, sparsity is recorded as the average number of sparse autoencoder features that activate on the batch&apos;s $1024$ tokens. Our sparse autoencoder stabilizes at a sparsity of around $100$, or $0.5\%$ of its hidden dimension. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_coeff.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_coeff.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_coeff.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_coeff.svg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The $\beta$ coefficient in $L_1$ loss across the training run on Pythia 6.9B. After training on five million tokens, we begin to adjust the coefficient until the sparse autoencoder reaches its target sparsity of $1\%$. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_loss.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_loss.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_loss.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_loss.svg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The $L^1$ loss of the sparse autoencoder across the training run on Pythia 6.9B. The $L^1$ loss initially rises while the $L^1$ coefficient is adjusted, then falls once the target sparsity is reached as the sparse autoencoder learns a more compact representation. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_reconstr_loss.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_reconstr_loss.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_reconstr_loss.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_reconstr_loss.svg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The reconstruction loss of the sparse autoencoder across the training run on Pythia 6.9B. Reconstruction loss initially rises while the $L^1$ coefficient is adjusted, due to the tradeoff between reconstruction and sparsity. Once the $L^1$ coefficient stabilizes, reconstruction loss slowly falls as the sparse autoencoder learns a more effective representation. &lt;/div&gt; &lt;p&gt;We find that our sparse autoencoder learned several interpretable features. For example, the second most frequently activating feature (feature index $11928$) activates strongly on the token “·the”. The figure below shows a table with examples.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The second most frequent feature (feature index $11928$) in the Pythia 6.9B sparse autoencoder activates on the token &quot;·the&quot;. Relevant table columns are $\text{str\_tokens}$ (the token that activates the feature), $\text{context}$ (surrounding tokens in the sentence), and $\text{feature}$ (the raw feature activation in the sparse autoencoder, sorted in descending order). We include the top 15 examples. The feature activates once on “·of” and “·and”, but it activates most on the token “·the”. (Credit: the visualization code for the table is due to Neel Nanda in his open-source replication of Anthropic&apos;s sparse autoencoder work.) &lt;/div&gt; &lt;p&gt;In addition, we found a surprising correlation between dead features. In particular, almost all dead features point in similar directions, as indicated by a high cosine similarity. In comparison, features that are not dead have a cosine similarity that is much closer to centered at zero. If dead features were drawn from the same distribution as non-dead features, we would expect cosine similarities closer to zero.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The plot above shows the cosine similarity of dead features (red) and non-dead features (blue). Here, a feature is counted as dead if it activates nowhere on WikiText-103-v1. The cosine similarity is calculated compared to the average dead feature. (Credit: the visualization code for cosine similarity is due to Neel Nanda in his open-source replication of Anthropic&apos;s sparse autoencoder work.) &lt;/div&gt; &lt;h3 id=&quot;fine-tuning-with-a-sparse-autoencoder&quot;&gt;Fine-Tuning with a Sparse Autoencoder&lt;/h3&gt; &lt;p&gt;We fine-tune Pythia 70M on arithmetic data by adjusting only a coefficient and bias vector within the sparse autoencoder space.&lt;/p&gt; &lt;p&gt;On layer $4$, we observe an unexpected lowering of loss from $6.449$ for the base model to $6.270$ after inserting the sparse autoencoder. Once fine-tuning the sparse autoencoder on arithmetic, loss remains constant at $6.270$. We believe that the fine-tuning may perform better when we experiment on a larger model such as Pythia 6.9B.&lt;/p&gt; &lt;p&gt;Although the loss does not fall, several features that our interpretable fine-tuning adjusts are interpretable. For example, the feature that is scaled up the most activates on colons (feature index $1338$). Because colons appear twice in every line of the arithmetic data, it makes sense that the fine-tuned model would like to more readibly predict colons. The figure below shows the top activations of feature $1338$ on the arithmetic dataset before and after fine-tuning. After fine-tuning, the feature activates slightly more strongly in all cases.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The table above shows the arithmetic dataset tokens on which feature $1338$ most strongly activates, before fine-tuning in the column $\text{feature}$ and after fine-tuning in the column $\text{feature (FT)}. In all cases, the feature activates slightly more after fine-tuning. &lt;/div&gt; &lt;p&gt;The feature that is most inhibited (feature index $619$) activates on newlines. We hypothesize that the sparse autoencoder learns to avoid newlines because, in the chess dataset for which it was trained, newlines are always followed by “Score: ”, indicating the start of a new game. But in the arithmetic dataset, newlines are always followed by “Answer: ”. Therefore, the model wants to inhibit this unhelpful feature. The discrepancy is a difference in datasets. To rigorously verify this hypothesis, we could compute direct logit attributions from feature $619$ to check whether it contributes to the “Answer” token. Either way, the inhibition above demonstrates that our fine-tuning procedure can detect and modify unhelpful features in the sparse autoencoder.&lt;/p&gt; &lt;p&gt;For a broader view of the dynamics of our interpretable fine-tuning, the two figures below show the learned scale and bias terms across every feature in the sparse autoencoder space (where $d_\text{auto} = 2048$), sorted in ascending order. We observe that the majority of features are largely unaffected, but a few features at the tails are significantly enhanced or inhibited.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The learned bias in the sparse autoencoder space inhibits approximately half of features while enhancing the other half. The x-axis is sorted so that the feature index runs in ascending order of the learned bias. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The learned scaling coefficient in the sparse autoencoder space significantly inhibits a small number of features while significantly enhancing several others. We also observe that a majority of features ($2/3$) are inhibited, compared to a smaller number enhanced. The x-axis is sorted so that the feature index runs in ascending order of the learned scaling. &lt;/div&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;One limitation of our fine-tuning experiments is that Pythia 70M is a small model for which there are fewer interpretable features. In addition, we inserted into Pythia 70M a sparse autoencoder trained to reconstruct activations in Pythia 70M Chess. Nonetheless, our fine-tuning results are promising. The majority of features are not significantly affected, but a few features at the tails are either significantly enhanced or inhibited. We found it fruitful to interpret these outlier features first, as they are a starting point for finding which sparse autoencoder features matter most for the fine-tuning dataset.&lt;/p&gt; &lt;p&gt;When training a sparse autoencoder on Pythia 6.9B, we were successful in learning interpretable features, such as the “the” feature. But we remain uncertain of the best way to train a sparse autoencoder, especially how to resample dead features. However, one implication of our work is that research on sparse autoencoders is accessible to a wide array of researchers. We believe a systematic study of training techniques for sparse autoencoders could benefit the field.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Our work indicates that sparse autoencoders are a promising tool for machine learning interpretability. By inserting sparse autoencoders into transformer language models, we investigate how a novel form of fine-tuning can provide insight into changes in model behavior after fine-tuning. We find that our fine-tuning successfully modifies interpretable features in the sparse autoencoder space. Given the rapid adoption of powerful, fine-tuned language models across industries, we believe our method for interpretable fine-tuning is an important direction to continue to explore as researchers seek to understand how fine-tuning affects model cognition. Although our current work is limited because we only fine-tune Pythia 70M, future work can scale up model size, compute resources, and the number of tokens used to train the sparse autoencoder. Additionally, future work can extend from direct fine-tuning to investigating the effects of RLHF performed with PPO (Proximal Policy Optimization).&lt;/p&gt; &lt;h2 id=&quot;acknowledgements&quot;&gt;Acknowledgements&lt;/h2&gt; &lt;p&gt;We would like to thank Professor Isola, Professor Beery, and Dr. Bernstein for an introduction to fundamental perspectives in deep learning that will stay with us forever. Thank you to Logan Smith for invaluable early guidance on the questions we could explore related to sparse autoencoders. We are thankful for the AI Safety Student Team at Harvard (AISST) and MIT AI Alignment (MAIA) for a supportive community of fellow researchers.&lt;/p&gt; &lt;h2 id=&quot;appendix&quot;&gt;Appendix&lt;/h2&gt; &lt;p&gt;Our code is available at the following Google Colab notebooks:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;a href=&quot;https://colab.research.google.com/drive/1LAjClrzEzQEu0BMjachoHuvmbmLz1Zf1?usp=sharing&quot;&gt;Training Sparse Autoencoders&lt;/a&gt;&lt;/li&gt; &lt;li&gt;&lt;a href=&quot;https://colab.research.google.com/drive/1KF2djPVOKOemOECnZq3MeI-k9ypf7PEE?usp=sharing&quot;&gt;Analyzing Sparse Autoencoders&lt;/a&gt;&lt;/li&gt; &lt;li&gt;&lt;a href=&quot;https://colab.research.google.com/drive/1CVIIbxO7iYnVuiH5UC0bdQuDvFHtBuac?usp=sharing&quot;&gt;Fine-Tuning with Sparse Autoencoders&lt;/a&gt;&lt;/li&gt; &lt;li&gt;&lt;a href=&quot;https://colab.research.google.com/drive/1VGVImnhky0bo-SdBYYYDopQIjGpx-5aK?usp=sharing&quot;&gt;Analysis of Fine-Tuning with Sparse Autoencoders&lt;/a&gt;&lt;/li&gt; &lt;/ol&gt; </content> </entry> <entry> <title>Using Synthetic Data to Minimize Real Data Requirements</title> <link href="https://deep-learning-mit.github.io/blog/2023/proposal-2/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/proposal-2</id> <content type="html">&lt;p&gt;*And used it as the basis for transfer learning with the real data that someone put hard work in to generate.&lt;/p&gt; &lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Synthetic biology is a burgeoning field of research which has attracted a lot of attention of the scientific community in recent years with the advancement of technologies that enable the better understanding and manipulation of biological systems. A significant contributor to its steadily increasing popularity is the diverse array of potential applications synthetic biology may have, ranging from curing cancer, to addressing significant climate issues, to colonizing other planets&lt;d-cite key=&quot;lim2022reprogramming&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;delisi2020role&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;conde2020synthetic&quot;&gt;&lt;/d-cite&gt;. But, to effectively manipulate these biological systems, it is necessary to understand how they work and how they interact with other biological systems — it has been shown time and time again that a system characterized in isolation, compared to the same system in a broader, non-isolated context, will not perform identically&lt;d-cite key=&quot;qian2017resource&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;gyorgy2015isocost&quot;&gt;&lt;/d-cite&gt;. This necessitates models that can predict a system’s behavior given both stimuli &lt;em&gt;and&lt;/em&gt; context.&lt;/p&gt; &lt;p&gt;In the synthetic biology literature, the behavior of many systems is characterized by the chemical reactions that take place; these reactions consist most frequently of the so-called central dogma of biology, in which DNA produces RNA, which produces proteins. These proteins are then free to perform almost every function within a cell, including — most notably for us — regulation of DNA. By varying the extent and nature of this regulation, these systems yield mathematical models that range from simple linear systems to highly complex nonlinear dynamical systems:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal-2/fig1.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1: A simple model of the central dogma of biology: a stretch of DNA is used to create a strand of messenger RNA, which is used to create a functional protein. Functional proteins are responsible for almost all operations within the cell, from cellular movement to RNA production and everything in between. &lt;/div&gt; &lt;p&gt;However, the figure above does not capture the full purview of the cell; it neglects factors that synthetic biologists know to be critical to the process of protein expression, as well as factors that have not been characterized rigorously yet. The process of analyzing the behavior of a system at the fullest level of detail necessary to encapsulate these intricate dynamics is expensive and time-consuming, and requires significant experimental data to validate — not to mention the fact that, as was mentioned, there are some factors which we simply don’t know about yet. Protein production is an immense and complex task, and identifying its critical parameters at the highest level of detail is no small feat.&lt;/p&gt; &lt;h3 id=&quot;enter-machine-learning&quot;&gt;Enter Machine Learning&lt;/h3&gt; &lt;p&gt;With this in mind, many synthetic biologists are experimenting with characterizing system behavior, especially when augmenting pre-existing models to include newly discovered phenomena, using machine learning and neural networks, due to their universal function approximator property. In this fashion, we may be able to better abstract the levels of biological detail, enabling better prediction of the composition of two genetic circuits.&lt;/p&gt; &lt;p&gt;Unfortunately, training neural networks also requires (surprise surprise!) substantial experimental data, which is taxing on both a researcher’s budget and time — for a small lab with few researchers working, a single experiment may take upwards of 12 hours of attentive action, while yielding only up to 96 data points for training. Some large-scale gene expression data has been collected to assist in the development of machine learning algorithms; however, this data is focused largely on the expression of a static set of genes in different cellular contexts — rather than on a dynamic set of genes being assembled — and is therefore insufficient to address the questions of composition that are being posed here.&lt;/p&gt; &lt;p&gt;This leads us to a fundamental question: &lt;strong&gt;can we use transfer learning to reduce the experimental data we need for training by pre-training on a synthetic dataset which uses a less-detailed model of our system?&lt;/strong&gt; In other words, can we still derive value from the models that we know don’t account for the full depth of the system? If so, &lt;strong&gt;what kinds of structural similarities need to be in place for this to be the case?&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;In this project, we aim to address each of these questions; to do this, we will first pre-train a model using simpler synthetic data, and use this pre-trained model’s parameters as the basis for training a host of models on varying volumes of our more complex real data. Then, we will consider sets of more complex real data that are less structurally similar to our original synthetic data, and see how well our transfer learning works with each of these sets.&lt;/p&gt; &lt;p&gt;In theory, since the synthetic data from the literature uses models that have already captured some of the critical details in the model, this fine-tuning step will allow us to only learn the &lt;em&gt;new&lt;/em&gt; things that are specific to this more complex model, thus allowing transfer learning to be successful. As the two underlying models become increasingly distant, then, one would expect that this transfer will become less and less effective.&lt;/p&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;h3 id=&quot;problem-formulation&quot;&gt;Problem Formulation&lt;/h3&gt; &lt;p&gt;Consider we have access to a limited number of datapoints which are input-output $(x_i,y_i)$ pairs for a biological system, and we want to train a neural network to capture the system behavior. The experimental data for the output $y_i$ we have is corrupted by an additive unit gaussian noise, due to white noise and measurement equipment precision. Moreover, we consider that we also have access to a theoretical model from another biological system which we know to be a simplified version of the one in our experiments, but which explicitly defines a mapping $\hat y_i = g(x_i)$.&lt;/p&gt; &lt;p&gt;Our goal is thus to train a model $y_i = f(x_i)$ to predict the real pairs while using minimal real pairs of data $(x_i, y_i)$. Instead, we will pre-train with $(x_i, \hat y_i)$ pairs of synthetic data, and use our real data for fine-tuning.&lt;/p&gt; &lt;h3 id=&quot;data-acquisition&quot;&gt;Data Acquisition&lt;/h3&gt; &lt;p&gt;In this work we will additionally consider a domain shift between two datasets, which we will refer to as the big domain and the small domain. In the big domain, our inputs will vary between 0 and 20nM, and in the small domain the inputs will vary between 0 and 10nM. These domains represent the ranges for the inputs in the experiments in the small domain, which may be limited due to laboratory equipment, and the desired operation range of the systems in the big domain.&lt;/p&gt; &lt;p&gt;Furthermore, &lt;strong&gt;for all datasets - pre-training, fine-tuning, or oracle training - we will be generating synthetic data for training and testing purposes.&lt;/strong&gt; We will use different levels of complexity to simulate a difference between experimentally-generated and computationally-generated data. In a real setting, we would use the complex model $f$ that we’re trying to learn here as the simple, known model $g$ in our setup. Going forward, we will refer to the data generated by our low-complexity model $g$ as “synthetic” data, and to the data generated by our high-complexity model as “real” or “experimental” data.&lt;/p&gt; &lt;p&gt;For our low-complexity theoretical model, we consider the simplest gene expression model available in the literature, in which the input $x_i$ is an activator, and the output $y_i$ is given by the following Hill function:&lt;/p&gt; \[y_i = \eta_i \frac{\theta_i x_i}{1 + \Sigma_{j=1}^2 \theta_j x_j}, i\in {1,2},\] &lt;p&gt;where our $\eta_i$’s and $\theta_i$’s are all inherent parameters of the system.&lt;/p&gt; &lt;p&gt;For the first experimental model, we consider a more complex gene expression model, where the activator $x_i$ must form an $n$-part complex with itself before being able to start the gene expression process, which yields the following expression for the output $y_i$:&lt;/p&gt; \[y_i = \eta_i \frac{(\theta_i x_i)^n}{1 + \Sigma_{j=1}^2 (\theta_j x_j)^n}, i\in {1,2},\] &lt;p&gt;where - once again - our $\eta_i$’s and $\theta_i$’s are all inherent parameters of the system. Note that, at $n=1$, our real model is identical to our synthetic model. As one metric of increasing complexity, we will vary $n$ to change the steepness of the drop of this Hill function.&lt;/p&gt; &lt;p&gt;As an additional test of increased complexity, we will consider a phosphorylation cycle in which inputs $x_i$ induce the phosphorylation or dephosphorylation of a given protein. We take the dephosphorylated protein to be an output $y_1$, and the phosphorylated protein to be a secondary output $y_2$, for which we have:&lt;/p&gt; \[y_i = y_{tot} \frac{\theta_i x_i}{\Sigma_{j=1}^2 \theta_j x_j}, i\in {1,2},\] &lt;p&gt;in which $\theta_i$’s and $y_{tot}$ are each system parameters. Note that the only functional difference between this system and the synthetic data generation system lies in the denominator of each, as one has a nonzero bias term, where the other does not.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal-2/fig2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2: Graphical representation of the three different synthetic or experimental models used in this project. In the first diagram, our input protein $x_i$ is activating the production of an output protein $y_i$. This is the simplest model of which we can conceive, and constitutes our synthetic data. In the second diagram, two copies of our input protein $x_i$ come together to form a complex that induces the production of our output protein $y_i$. This is a step up in complexity, and varying the number of proteins that come together allows us to introduce more and more complexity into our system. Finally, a single protein which can be either of our outputs $y_1$ or $y_2$ is moved between these states by our two input proteins $x_1$ and $x_2$. This system, while seemingly very dissimilar from the above two, winds up being mathematically not too far off, and offers another model on which to transfer our learning. &lt;/div&gt; &lt;h3 id=&quot;training--testing&quot;&gt;Training &amp;amp; Testing&lt;/h3&gt; &lt;p&gt;For each experiment, we trained MLPs composed of 5 hidden layers with 10 nodes each and a ReLU activation function.&lt;/p&gt; &lt;p&gt;For the first experiment, we performed transfer learning by pre-training our model for 90% of the total number of epochs (1800/2000) with the synthetic data sampled from the big domain, where we have a high quantity of data points (40000 $(x_i, y_i)$ pairs); for the remaining 10% of epochs, the network was trained on the experimental data sampled from the small domain, with varying numbers of data points used for training. This can be compared to a model trained exclusively on the same volume of experimental data for a full 2000 epochs, to establish a baseline level of performance. An oracle model was trained for all 2000 epochs on experimental data sampled from the big domain with a high volume of data, and serves as the best-case performance of our model.&lt;/p&gt; &lt;p&gt;For the second experiment, we followed a very similar protocol as in the first experiment; the critical difference here lies in the fact that, where the fine-tuning step used different volumes of data in the previous case, we now instead use a fixed data volume (1000 $(x_i, y_i)$ pairs), and fine-tune on a host of different models of varying complexity relative to the synthetic model.&lt;/p&gt; &lt;p&gt;To evaluate performance of our neural networks, we uniformly sample 100 points from the big domain, for which we calculate the L1 loss mean and variance between the network predictions and the experimental model output.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal-2/fig5.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3: A visual example of the training done - on the right is the intended function to be learned, where the left features the output of one of the models that was trained with transfer learning. &lt;/div&gt; &lt;h2 id=&quot;results--analysis&quot;&gt;Results &amp;amp; Analysis&lt;/h2&gt; &lt;h3 id=&quot;experiment-1&quot;&gt;Experiment 1&lt;/h3&gt; &lt;p&gt;As was mentioned before, the first experiment was targeted towards addressing the question of whether we can pre-train a model and use transfer learning to reduce the volume of real data needed to achieve a comparable standard of accuracy. To this end, we trained several models with a fixed volume of pre-training data, and varied the volume of fine-tuning data available to the model.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal-2/fig3.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 4: Bar plots of model loss as the volume of fine-tuning (blue) or training (orange) data increases. As can be seen, at high volumes, the blue bars reach a lower loss than the orange bars, suggesting that transfer learning is effective at taking high volumes of data, and improving them further. For very low volumes, these two models are roughly equivalent, although the orange bars have a significantly higher variance than the blue bars. Somewhere in between, a transition occurs, and transfer learning outpaces learning without prior knowledge of anything. &lt;/div&gt; &lt;p&gt;As can be seen in the blue bars of Figure 4, the greater the volume of real data coupled with transfer learning, the lower the loss, and the better the performance. This is to be expected, but this curve helps to give a better sense regarding how quickly we approach the limit of best-case performance, and suggests that the volume of real data used for oracle training could cut be cut down by nearly an order of magnitude while achieving comparable performance. One might argue that this is because the volume of real data used in this training is itself sufficient to effectively train this model; to that end, we consider the orange bars, which represent the loss of models trained for 2000 epochs exclusively on the given volume of real data. This, coupled with the blue bars, suggests that - across all volumes of data - it is, at the very least, more consistent to use transfer learning. Models trained for that duration on exclusively real data sampled from the small domain tended to overfit, and had a much higher variance as a result. As the volume of real data used for fine-tuning increased, the difference between the two regimes of transfer vs. non-transfer learning became more pronounced, and the benefits of transfer learning become more noticeable. Thus we conclude that we can use transfer learning to cut down on the quantity of real data needed, while sacrificing relatively little up to a ~75% cut of data requirements.&lt;/p&gt; &lt;h3 id=&quot;experiment-2&quot;&gt;Experiment 2&lt;/h3&gt; &lt;p&gt;Next, we wish to address the question of how structurally dissimilar a model can be while still making this transfer learning effective. To this end, we varied $n$ from our first experimental model, and generated data with our second experimental model. In each case, we performed a ~95% cut in the volume of real data relative to the volume of data used to train each oracle.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal-2/fig4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal-2/fig4.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 5: Bar plots of model loss as the model being learned is varied, as a means of representing increases in complexity or structure. As can be seen, within this range of complexity variation, transfer learning is consistently able to learn the system to a comparable degree across all cases. &lt;/div&gt; &lt;p&gt;In Figure 5, we compare the loss of models trained with transfer learning to oracles for each - as can be seen, the transfer learning models performed consistently across all models being learned, and the oracles of each were similarly consistent. This suggests that the architectures of the models being learned are sufficiently similar that the transfer learning is effective, which is a promising sign for more applications in which the system being learned has been simplified significantly in its mathematical models.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Ultimately, we’ve developed a method by which to potentially reduce the volume of experimental data needed to effectively train a machine learning model by using synthetic data generated by a lower-complexity model of the system. We’ve demonstrated that it has the potential to cut down data requirements significantly while still achieving a high level of accuracy, and that the simple system used to generate data in the sense that the learning process can shore up some substantial structural differences betwen the simple and complex system. These findings are not necessarily limited strictly to synthetic biological learning tasks, either - any complex, data-starved phenomenon in which there is a simpler model to describe parts of the system may find value in this. Looking forward, one can consider deeper structural dissimilarities, as well as application with real synthetic biological data, rather than simply using two models of increasing complexity.&lt;/p&gt; </content> </entry> <entry> <title>Applications of Deep Learning in Timbre Transfer</title> <link href="https://deep-learning-mit.github.io/blog/2023/proposal-1/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/proposal-1</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/spectrogram-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/spectrogram-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/spectrogram-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/spectrogram.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;em&gt;Timbre&lt;/em&gt; is what distinguishes a flute from a trumpet, piano or any other musical instrument. Even if two performers play the same note, there is no ambiguity in the tone of their instruments. But unlike pitch (frequency) or amplitude (loudness), &lt;em&gt;timbre&lt;/em&gt; is not a trivial metric; rather, it pertains much more to subjective qualities like &lt;em&gt;raspiness&lt;/em&gt;, &lt;em&gt;articulation&lt;/em&gt; and even musical &lt;em&gt;intent&lt;/em&gt;. In this article, I’ll be discussing different data-driven approaches to extracting and manipulating this quality of sound using deep learning.&lt;/p&gt; &lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;/staging/assets/video/2023-11-09-ddsp-proposal/brass.wav&quot; type=&quot;audio/mpeg&quot; /&gt; Your browser does not support the audio element. &lt;/audio&gt; &lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;/staging/assets/video/2023-11-09-ddsp-proposal/flute.wav&quot; type=&quot;audio/mpeg&quot; /&gt; Your browser does not support the audio element. &lt;/audio&gt; &lt;p&gt;In particular I’d like to explore &lt;em&gt;timbre transfer&lt;/em&gt;, where one instrument is made to sound like another while retaining most aspects of the original performance. I’ll be training an auto-encoder architecture first conditioned on the source instrument (whistling) then tuned to tracks of trumpets to achieve whistling-to-trumpet timbre transfer. Moreover, I’d like to reduce the complexity of previous architectures to achieve realtime results suitable for musical performance.&lt;/p&gt; &lt;p&gt;First, some context on sound and our perception thereof.&lt;/p&gt; &lt;h2 id=&quot;what-is-sound&quot;&gt;What is Sound?&lt;/h2&gt; &lt;p&gt;Our ears are sensitive to changes in air pressure over time, which we perceive as sound. Digital audio is analogous to this phenomenon, where its representation is a sequence of samples usually in the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;[-1, 1]&lt;/code&gt; range and discretized at a frequency high enough that it becomes indistinguishable from natural sources. This is known as the time domain, however all signals can be mapped to the frequency domain where the individual sinusoids that compose it are graphed against their respective amplitudes. Below is a Fourier transform &lt;d-cite key=&quot;1&quot;&gt;&lt;/d-cite&gt; applied to the sound of a trumpet from above:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/brass_freq-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/brass_freq-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/brass_freq-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/brass_freq.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;/staging/assets/video/2023-11-09-ddsp-proposal/brass.wav&quot; type=&quot;audio/mpeg&quot; /&gt; Your browser does not support the audio element. &lt;/audio&gt; &lt;p&gt;It turns out that only the bottom-most frequency, \(f_0\), informs our ears of this note’s &lt;em&gt;pitch&lt;/em&gt;. In fact, a pure sine wave at that frequency will sound &lt;em&gt;similar&lt;/em&gt; to the trumpet.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/sine_freq-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/sine_freq-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/sine_freq-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/sine_freq.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;/staging/assets/video/2023-11-09-ddsp-proposal/sine.wav&quot; type=&quot;audio/mpeg&quot; /&gt; Your browser does not support the audio element. &lt;/audio&gt; &lt;p&gt;The distinction between the trumpet and sine wave lies in the frequencies above \(f_0\), known as overtones. Moreover, certain musical instruments exhibit an interesting &lt;em&gt;harmonic&lt;/em&gt; behavior where only the overtones that are multiples of \(f_0\) are actually prominent; this is the case for most instruments you could name, though some non-examples include the gong and timpani &lt;d-cite key=&quot;2&quot;&gt;2&lt;/d-cite&gt;. Below is a spectrogram, which displays the frequency-domain of a signal over time. Observe the estimated \(f_0\) (implemented using the YIN algorithm &lt;d-cite key=&quot;3&quot;&gt;&lt;/d-cite&gt;) and how its multiples (\(2 * f_0\), \(3 * f_0\), etc) evolve over time.&lt;/p&gt; &lt;div class=&quot;l-page&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-09-ddsp-proposal/spectrogram.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;p&gt;&lt;em&gt;Try playing the audio clip above, whistle into the spectrogram or record your own instrument! The horizontal axis is time and vertical axis is frequency&lt;/em&gt;&lt;/p&gt; &lt;p&gt;So how do overtones relate to timbre? Well, the harmonic series is the most obvious distinguishing factor between different instruments playing the same pitch, &lt;strong&gt;so we could model timbre as the evolution of \(f_0\) and its overtones’ amplitudes over time&lt;/strong&gt;. Note that this is assuming a strictly monophonic context (one note at a time), and overlooks non-harmonic parts of the signal (e.g. a flutist’s breathing). So this representation will still sound synthetic but it forms a good basis for what we’re trying to achieve.&lt;/p&gt; &lt;h2 id=&quot;timbre-transfer&quot;&gt;Timbre Transfer&lt;/h2&gt; &lt;p&gt;Perhaps the most obvious method for achieving timbre transfer is approximating the pitch of the source audio (as demonstrated above) and recreating it using a synthetic MIDI instrument. However, this discards much of the expressiveness which isn’t desireable in a musical performance.&lt;/p&gt; &lt;p&gt;Rather, data-driven approaches have shown promise in audio synthesis &lt;d-cite key=&quot;6&quot;&gt;&lt;/d-cite&gt; and existing deep learning architectures can be repurposed to achieve &lt;em&gt;nuanced&lt;/em&gt; timbre transfer to various degrees of success. &lt;d-cite key=&quot;5&quot;&gt;&lt;/d-cite&gt; treats timbre transfer as an image-to-image problem, leveraging a Conditional Adversarial Networks architecture &lt;d-cite key=&quot;7&quot;&gt;&lt;/d-cite&gt; trained on natural images to transform spectrograms of audio signals. &lt;d-cite key=&quot;4&quot;&gt;&lt;/d-cite&gt; uses a Denoising Diffusion Implicit Model (DDIM) to achieve similar results. The audio is then synthesized from these spectrograms using the Inverse Fourier Transform or another neural network.&lt;/p&gt; &lt;table&gt; &lt;tr&gt; &lt;th&gt;Keyboard&lt;/th&gt; &lt;th&gt;Guitar&lt;/th&gt; &lt;th&gt;String&lt;/th&gt; &lt;th&gt;Synth Lead&lt;/th&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;img src=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic.png&quot; width=&quot;200&quot; height=&quot;200&quot; /&gt;&lt;/td&gt; &lt;td&gt;&lt;img src=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic.png&quot; width=&quot;200&quot; height=&quot;200&quot; /&gt;&lt;/td&gt; &lt;td&gt;&lt;img src=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/string_acoustic.png&quot; width=&quot;200&quot; height=&quot;200&quot; /&gt;&lt;/td&gt; &lt;td&gt;&lt;img src=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic.png&quot; width=&quot;200&quot; height=&quot;200&quot; /&gt;&lt;/td&gt; &lt;/tr&gt; &lt;/table&gt; &lt;p&gt;&lt;em&gt;Images courtesy of &lt;d-cite key=&quot;5&quot;&gt;&lt;/d-cite&gt;&lt;/em&gt;&lt;/p&gt; &lt;p&gt;However, these methods rely on a dataset of audio tracks in two timbre domains, namely audio synthesized from MIDI instruments like in &lt;d-cite key=&quot;9&quot;&gt;&lt;/d-cite&gt; since recordings of performers will never match exactly. The results thereby &lt;em&gt;sound&lt;/em&gt; synthetic; a better architecture would thus be self-supervised and trained on acoustic performances directly.&lt;/p&gt; &lt;h2 id=&quot;proposed-model&quot;&gt;Proposed Model&lt;/h2&gt; &lt;p&gt;I experimented with an auto-encoder architecture, where a network is trained to minimize the audible difference between some input audio track \(x\) and its re-synthesized counterpart \(\hat{x}\); so, the model attempts to recreate its input \(x\) by first encoding it to some latent representation \(z\) and decoding back to audio. Note that although over-fitting is possible, a one-to-one mapping (or, &lt;em&gt;cheating&lt;/em&gt;) is impossible because \(z\) &lt;em&gt;bottlenecks&lt;/em&gt; (has less dimensions than) \(x\). The appeal of this approach is that the problem is now self-supervised and can be trained directly on musical performances of the &lt;em&gt;source&lt;/em&gt; instrument (e.g. whistling).&lt;/p&gt; &lt;p&gt;Next, the encoder is frozen (unaffected by gradient descent) and the decoder is trained anew on samples of the &lt;em&gt;target&lt;/em&gt; instrument (e.g. trumpet). So, the networks knows how to encode the &lt;em&gt;source&lt;/em&gt; instrument to some \(z\), and hopefully its decoder has adapted to map \(z\) onto the &lt;em&gt;target&lt;/em&gt; instrument.&lt;/p&gt; &lt;p&gt;The decoder doesn’t output audio directly, nor does it generate a spectrogram like in &lt;d-cite key=&quot;5&quot;&gt;&lt;/d-cite&gt;. Rather, it controls parameters of a harmonic oscillator proposed by &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt; which follows the intuition of timbre as discussed prior; that is, the oscillator has parameters for its \(f_0\) and the amplitudes of each harmonic overtone. Leveraging this strong inductive bias should reduce the size of the neural network enough to be applicable to realtime performances.&lt;/p&gt; &lt;p&gt;The encoder architecture is taken from &lt;d-cite key=&quot;11&quot;&gt;&lt;/d-cite&gt;, whose original application is tracking pitch; I don’t track pitch explicitely, rather &lt;d-cite key=&quot;11&quot;&gt;&lt;/d-cite&gt; demonstrates that CNNs can extract meaningful data from audio directly in the time domain. The issue with working in the frequency domain is shown in &lt;d-cite key=&quot;12&quot;&gt;&lt;/d-cite&gt;, where we’d need a high sampling rate (and thus the network needs to be that much faster) for high frequencies or a long sampling window (which yields a network with more parameters) for low frequencies. Note that there is a nice compromise to these issues by windowing the inputs and outputs &lt;d-cite key=&quot;13&quot;&gt;&lt;/d-cite&gt;, which I’d like to try later.&lt;/p&gt; &lt;p&gt;Finally, the loss I’m using is multi-scale spectrogram loss proposed in &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt;, which computes the L1 loss of two audio tracks in the frequency-domain on both a linear and log scale.&lt;/p&gt; &lt;h2 id=&quot;encoder&quot;&gt;Encoder&lt;/h2&gt; &lt;p&gt;The architecture of my model is largely inspired by Magenta’s Differentiable Digital Signal Processing (DDSP) &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt; paper, where differentiable sound processors are introduced. Although modules like reverb and a finite-impulse response (FIR) filter are included, I’m only experimenting with its harmonic oscillator for simplicity. The architecture proposed by &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt; is also an auto-encoder, however its latent representation is built on two heuristics (pitch, amplitude) rather than the audio itself. Despite this, &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt; is able to achieve natural sounding instruments but its controls are limited in expression, much like MIDI inputs. Realtime Audio Variational autoEncoder (RAVE) &lt;d-cite key=&quot;15&quot;&gt;&lt;/d-cite&gt; builds upon this by encoding a multiband decomposition of the source audio, or a collection of Fourier transforms with varying amount of bins to overcome limitations of the Nyquist frequency and limited precision of discretization. A single Fourier transform operates on a linear scale, where its frequency bins scale from \(0\) to its Nyquist frequency. However, humans hear on a logarithmic scale (i.e. A4 is \(440 \text{Hz}\) but an octave above that is \(880 \text{Hz}\)) so the transform has a bias towards low frequencies. Multiband decomposition approaches this by shifting the frequency bins using different window sizes of audio and letting the network generalize over the complete frequency spectrum. However, although &lt;d-cite key=&quot;15&quot;&gt;&lt;/d-cite&gt; has shown some incredible results and claims to run in realtime, that is not the case in practice &lt;d-cite key=&quot;16&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;In my experiment, I leverage a Convolutional Representation for Pitch Estimation (CREPE) &lt;d-cite key=&quot;11&quot;&gt;&lt;/d-cite&gt;; it is a CNN-based pitch estimator that operates directly on the time-domain of an audio signal and achieves state of the art results. Rather than using its output, like in &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt;, I use its latent representation and train the network to generalize over more characteristics of sound than just pitch.&lt;/p&gt; &lt;h2 id=&quot;decoder&quot;&gt;Decoder&lt;/h2&gt; &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt; &lt;p&gt;introduced the idea of using oscillators for audio synthesis as opposed to raw waveform modeling. &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt; demonstrates that their architecture benefits from this inductive bias and is able to be significantly reduced in size. I wanted to experiment with the encoder for the part, so the decoder of my model remains unchanged from the original paper (for the most part). It consists of several dense layers, ReLU activation functions and layer normalization. In between these is a Gated Recurrent Unit (GRU). The harmonic oscillator from &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt; cannot produce sinusoids out of phase (the instantaneous phase is accumulated at each time step) but presumably the network needs &lt;em&gt;some&lt;/em&gt; time dependency to form an audio envelope.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/adsr-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/adsr-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/adsr-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-ddsp-proposal/adsr.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;em&gt;Image courtesy of Tellef Kvifte&lt;/em&gt;&lt;/p&gt; &lt;h2 id=&quot;dataset&quot;&gt;Dataset&lt;/h2&gt; &lt;p&gt;I trained the target instrument auto-encoder on the URMP dataset &lt;d-cite key=&quot;17&quot;&gt;&lt;/d-cite&gt;, which consists of individual recordings of performers across a variety of instruments. Specifically, I wrote a dataloader that selects only trumpet solo tracks and randomly samples a 4 second clip from each of them. The audio is down-sampled to \(16\text{kHz}\) because the dataset doesn’t contain many frequencies above \(8\text{kHz}\) and the reduced dimensionality allows for training on my M2 MacBook Air with a batch size of 16!&lt;/p&gt; &lt;p&gt;I also created my own whistling dataset, sampled from MIT students with varying levels of proficiency. The audio clips are normalized, silence is cutout and altogether I have around 2 hours of data.&lt;/p&gt; &lt;h2 id=&quot;loss&quot;&gt;Loss&lt;/h2&gt; &lt;p&gt;Like &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt; and &lt;d-cite key=&quot;15&quot;&gt;&lt;/d-cite&gt;, I focus on &lt;em&gt;perceptual&lt;/em&gt; loss which approximates human hearing. So, comparing waveforms in the time-domain would not work because humans aren’t sensitive to changes in phase whereas the signal changes drastically. I extend upon the multi-scale spectrogram loss proposed by &lt;d-cite key=&quot;10&quot;&gt;&lt;/d-cite&gt;, which consists of taking the L1 norm of the two inputs’ spectrograms (so phase is discarded) in both the linear and log domain. Note that human hearing is logarithmic, but spectrograms are not. I experiment upon this by employing the log Mel spectrogram &lt;d-cite key=&quot;8&quot;&gt;&lt;/d-cite&gt; which is an even better approximation of human hearing and used by &lt;d-cite key=&quot;4&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;18&quot;&gt;&lt;/d-cite&gt; and &lt;d-cite key=&quot;19&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;I trained 500 epochs of 16 times 4 second samples on a single M2 MacBook Air with Metal acceleration, totaling around 10 hours. Unfortunately, the loss converged but the network was not able to generalize over abstract characteristics of sound as I’d hoped. Rather, it learned to represent sound as a mellow mix of harmonics instead of anything useful. I think future experiments should penalize silence (or close to it), and perhaps add skip connections from the inputs’ power (explicitely calculated) to the decoder. Moreover, the size of the encoder was drastically reduced (a few orders of magnitude less parameters in both width and depth) so it’s possible the latent representation did not contain much meaningful data.&lt;/p&gt; &lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;/staging/assets/video/2023-11-09-ddsp-proposal/epoch0.wav&quot; type=&quot;audio/mpeg&quot; /&gt; Your browser does not support the audio element. &lt;/audio&gt; &lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;/staging/assets/video/2023-11-09-ddsp-proposal/epoch250.wav&quot; type=&quot;audio/mpeg&quot; /&gt; Your browser does not support the audio element. &lt;/audio&gt; &lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;/staging/assets/video/2023-11-09-ddsp-proposal/epoch470.wav&quot; type=&quot;audio/mpeg&quot; /&gt; Your browser does not support the audio element. &lt;/audio&gt; &lt;p&gt;Sample synthesized waveforms at epochs 0, 250, and 470 respectively (loud sounds warning!).&lt;/p&gt; </content> </entry> <entry> <title>The Effect of Activation Functions On Superposition in Toy Models</title> <link href="https://deep-learning-mit.github.io/blog/2023/interpretability-of-toy-tasks/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/interpretability-of-toy-tasks</id> <content type="html">&lt;h2 id=&quot;introduction-to-superposition&quot;&gt;Introduction to Superposition&lt;/h2&gt; &lt;p&gt;With the recent emergence of grokking, mechanistic interpretability research has trended towards understanding how models learn &lt;d-cite key=&quot;GrokNanda&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;Pizza&quot;&gt;&lt;/d-cite&gt;. A central concept in this pursuit is superposition - a single neuron learning multiple “features.”&lt;/p&gt; &lt;p&gt;Features are the distinguishing properties of data points, the “things” that allow a neural network to learn the difference between, say, a dog and a cat, or a Phillip Isola and a Jennifer Aniston. Features are the building blocks that determine what makes one data point different from another. In many cases, features discovered by and encoded within neural networks correspond to human-understandable ideas. For example, in language models there exist embedding vectors describing relations like gender or relative size (e.g., the famous vec(“king”) - vec(“man”) + vec(“woman”) =~ vec(“queen”)&lt;d-cite key=&quot;mikolov2013efficient&quot;&gt;&lt;/d-cite&gt;). It has been found that language models often map ideas like these to features within their parameters. Human understanding is not necessary though, as models can find and map features that exist beyond the perception of humans. This is an important part of the success (and dual inscrutability) of modern deep models, as these models can determine features and relationships within the data that allow them to model large datasets, like language, very well.&lt;/p&gt; &lt;p&gt;In this work we:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Explain Superposition, why it may occur, and why it is important&lt;/li&gt; &lt;li&gt;Motivate a framework to easily study Superposition&lt;/li&gt; &lt;li&gt;Study how activation functions affect Superposition&lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;superposition-and-previous-work&quot;&gt;Superposition and Previous Work&lt;/h2&gt; &lt;p&gt;Let us elaborate further. If you were to train some neural network and visualize the weights - chances are you would see some mess that looks like this:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;You are likely looking at superposition!&lt;/p&gt; &lt;p&gt;As hypothesized by &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, superposition is a phenomenon which occurs when the number of features being learned by a model is greater than the number of parameters in that model. To capture $n$ features with $m&amp;lt;n$ parameters, one can think of the neurons as “working overtime.” In other words, some of the neurons within a model encode information about more than one feature. The neuron exhibiting superposition operates as an information compressor. The caveat is that this compression is often unpredictable and hard to understand!&lt;/p&gt; &lt;p&gt;In a linear model, i.e., one which maps inputs to outputs with only linear functions, there are fewer parameters than the features it tries to represent, so it can only represent the top $m$ features. How then do neural networks use compression and map back to $n&amp;gt;m$ features using only $m$ parameters? The answer is non-linearity. Clearly, the activation function is key to understanding how superposition occurs - unexplored by other work in the field. &lt;d-cite key=&quot;elhage2022solu&quot;&gt;&lt;/d-cite&gt; explores the activation function in transformer MLP, but not in the setting we present here.&lt;/p&gt; &lt;p&gt;But why do we care about Superposition? Why spend time studying this?&lt;/p&gt; &lt;p&gt;While it may seem tangential, Superposition sheds important insights on Large Language Models (LLMs)! While LLMs are billions of parameters large, this is still not enough for a one-to-one mapping to “features” on the internet. Therefore LLMs also MUST exhibit superposition to learn. We focus our current work on the $\textit{bottleneck superposition}$ regime, but &lt;d-cite key=&quot;incidental&quot;&gt;&lt;/d-cite&gt; has shown that the picture is far more complicated than presented in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;. Namely, varying the initialization can change how superposition unfolds. To normalize across experiments, we initialize all weights using the Xavier norm, as outlined by &lt;d-cite key=&quot;xavier&quot;&gt;&lt;/d-cite&gt;. However, this is certainly a limitation of our presented work. A more rigourous analysis of superposition with activation functions would explore it outside the contex of the bottleneck regime. We leave this for future work.&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; From &lt;a href=&quot;https://distill.pub/2017/feature-visualization/&quot;&gt;Distill Blog&lt;/a&gt;, &quot;Feature visualization allows us to see how GoogLeNet trained on the ImageNet dataset, builds up its understanding of images over many layers. &lt;/div&gt; &lt;p&gt;Previous research, as detailed in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, has predominantly explored superposition within the confines of toy models utilizing the Rectified Linear Unit (ReLU) activation function. However, to extend these findings to contemporary neural networks, it is crucial to investigate the influence of different activation functions on superposition. Different activation functions provide different ways for a model to use superposition to its advantage.&lt;/p&gt; &lt;p&gt;So you train a neural network - what happens at the neuron level? There are three possibilities. As the network trains each neuron has three choices:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;The neuron chooses not to encode the “features”&lt;/li&gt; &lt;li&gt;The neuron chooses to dedicate itself to one feature&lt;/li&gt; &lt;li&gt;The neuron chooses to encode multiple features&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;(We anthropomorphize - The neuron doesn’t choose to do anything - there is no free will - you are born into a loss landscape and an optimizer telling you what to do.)&lt;/p&gt; &lt;p&gt;In linear models, each neuron is limited to representing only the most significant features (2), discarding others (1). Conversely, superposition, enabled by non-linear activation functions, adopts a more inclusive approach (3), trying to encode multiple features per neuron and learning efficient representational shortcuts.&lt;/p&gt; &lt;p&gt;While ReLU bears similarity to the Gaussian Error Linear Unit (GeLU) used in modern GPT architectures, a deeper understanding of how different nonlinear activations impact superposition can provide crucial insights. Such understanding is key to unraveling the complex mechanisms through which neural networks utilize non-linearities, a cornerstone in the broader narrative of neural network interpretability.&lt;/p&gt; &lt;h3 id=&quot;monosemanticity-and-polysemanticity&quot;&gt;Monosemanticity and Polysemanticity&lt;/h3&gt; &lt;p&gt;To connect to existing literature (2) and (3) above are given the names monosemanticity and polysemanticity. We will also follow this notation going forward.&lt;/p&gt; &lt;p&gt;To describe further, the idea of superposition in neural networks leads us to two distinct types of neuron behaviors: monosemanticity and polysemanticity.&lt;/p&gt; &lt;p&gt;Monosemantic neurons are those that specialize in a single, distinct feature, acting as dedicated detectors. This characteristic is often observed in the intermediate layers of architectures like Convolutional Neural Networks (CNNs), where neurons become adept at recognizing specific patterns, such as curves or colors. Polysemantic neurons do not align with just one feature but engage with multiple features simultaneously, offering a broader and more nuanced understanding of the data. This trait is essential for handling complex, high-dimensional datasets but comes at the cost of reduced interpretability.&lt;/p&gt; &lt;h2 id=&quot;motivation-and-notation&quot;&gt;Motivation and Notation&lt;/h2&gt; &lt;p&gt;Our work extends the work done in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt; by examining how the changing of the activation function on toy model networks affects the behavior and interpretability of these networks. &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt; uses the canonical ReLU activation function to add non-linearity to two-layer models to analyze how superposition occurs within small networks. They did not generalize their work to other activation functions, which we find, result in &lt;strong&gt;distinct&lt;/strong&gt; new phenomenon. Our work compares the ReLU function with five other common activation functions: GeLU, SiLU, Sigmoid, Tanh, and SoLU. We hope that generalizing the phenomenon across activation functions can push the toy dataset to be in closer to realistic ML settings.&lt;/p&gt; &lt;h3 id=&quot;problem-specification&quot;&gt;Problem Specification&lt;/h3&gt; &lt;p&gt;The models in this experiment will be learning how to replicate a length-$n$ vector of inputs in the range $[0, 1]$ with a compression to a length-$m$ embedding (where $n&amp;gt;m$). The model will then use the length-$m$ embedding to recreate the length-$n$ input, using a non-linear activation function to allow for superposition.&lt;/p&gt; &lt;p&gt;We will run two variations of the experiment. One variation of the experiment will involve compressing inputs of size $n=10$ to an embedding of size $m=5$. This experiment aims to see how superposition occurs across many features which are encoded in a bottleneck with half the number of spots as there are features. The second variation of the experiment will involve compressing inputs of size $n=2$ to an embedding of size $m=1$. This experiment aims to understand precisely how the model encodes the second “extra” feature in a variety of settings.&lt;/p&gt; &lt;p&gt;To set up this experiment, we need to create a dataset that allows for superposition to occur and that also allows for interpretability of the superposition. To motivate this further, we begin with a careful discussion of features.&lt;/p&gt; &lt;h3 id=&quot;features&quot;&gt;Features&lt;/h3&gt; &lt;p&gt;Features are the salient “things” that a neural network learns to differentiate inputs &lt;d-cite key=&quot;features&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Technically, features are the properties which neural networks try to extract from data during learning to compress inputs to useful representations during inference. Although features can map to human-understandable concepts (e.g., dog ears), they can also represent properties of the data that are not immediately apparent to the human brain. To experiment with superposition, we need to encode features in a way that we can understand. In other words, we do not want our experimental model to learn features that we are unaware of. This would make it hard for us to interpret how the model maps features in the data to embeddings within its parameters, consequently obscuring how superposition works. To this aim, we must generate features within the training set for our model which are simple and understandable to us a priori. Similar to &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, we use as each input a vector with entries drawn independently from a uniform distribution over $[0, 1]$. Making each entry independent of the others enforces that each entry is its own (artificial) feature with no correlation to the other features.&lt;/p&gt; &lt;p&gt;Here we define two important augmentations that we used in the dataset to simulate real-world features: sparsity and importance.&lt;/p&gt; &lt;h4 id=&quot;sparsity&quot;&gt;Sparsity&lt;/h4&gt; &lt;p&gt;Sparsity is a measure of how often a specific feature is present in a dataset. A feature is characterized as “sparse” if it only appears in a small fraction of the inputs to the model. Similarly, features that are “dense” appear in many of the inputs. We will also use the term ‘density’, which is the complement of sparsity, defined as $1-S$.&lt;/p&gt; &lt;p&gt;Specifically, a feature with a sparsity of $S \in [0, 1]$ has a probability $S$ of being expressed in any given input. If we have $S=0$, this means that the feature is expressed in every input, whereas if we have $S=0.5$, this means that the feature is expected to be expressed in about half of the inputs.&lt;/p&gt; &lt;p&gt;In our experiment, we train models at different sparsities to capture how sparsity affects superposition.&lt;/p&gt; &lt;h4 id=&quot;importance&quot;&gt;Importance&lt;/h4&gt; &lt;p&gt;Not all features are created equal!&lt;/p&gt; &lt;p&gt;Some features are more useful than others in determining relevant information about inputs. For instance, when building a dog detector - capturing features related to dogs’ faces are extremely important! A model would need to pick up salient features of dogs, perhaps floppy ears and snouts. Other features, like the grass a dog is sitting on or a frisbee in a dog’s mouth, may not be as useful for detecting a dog. The varying degrees of usefulness among features are encapsulated in the concept of “importance”.&lt;/p&gt; &lt;p&gt;In the context of feature detection by a neural network, importance plays a role in modulating which features are encoded within the embedded layers of the network. In the context of the superposition hypothesis, if one feature has more importance than another feature, then it would be inefficient for the network to map both features equally within the embedding; allocating more weight to the feature with greater importance would be more valuable to the network in minimizing error.&lt;/p&gt; &lt;p&gt;In our experiment, we give each input feature a different importance to allow the models to differentiate between them. We will examine when and how the model justifies mapping multiple features of differing importances to the same neuron, i.e., we will observe the superposition of features with differing importances.&lt;/p&gt; &lt;h3 id=&quot;dataset&quot;&gt;Dataset&lt;/h3&gt; &lt;p&gt;To run this experiment, we will synthetically generate data that has desired sparsity and importance properties.&lt;/p&gt; &lt;p&gt;Each input $x$ will be a vector of length $n$. Each element $x_i$ in the vector will be drawn independently from the other elements in the uniform range $[0, 1]$. As discussed before, we can now synonymously refer to each of these elements as features, given their independent generation. (We will refer to them as features from this point onwards.)&lt;/p&gt; &lt;p&gt;Each feature $x_i$ in the vector has a relative importance to each of the other features $x_{j\ne i}$. The importance of feature $x_i$ is $I_i = r_I^i$ where $r_I\in(0, 1)$ is a constant describing the relative decay of importance between neighboring features. This attribute of the data will be implemented in the loss function (see below for more details).&lt;/p&gt; &lt;p&gt;We will train separate models for each of the varying levels of sparsity. For an input $x$ with sparsity $S$, each feature $x_i$ will take on its “true” value, a uniformly distributed number, with a probability of $1-S$ and will otherwise be set to 0 with a probability of $S$.&lt;/p&gt; &lt;p&gt;Below is a visualization of two batches of inputs with respective sparsities $S=0.5$ and $S=0.99$.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Each column of the plots represents a feature vector of length 20. Each batch has size 100, corresponding to the number of columns in the plots. Notice how the changing in sparsity affects the feature density. &lt;/div&gt; &lt;h3 id=&quot;network&quot;&gt;Network&lt;/h3&gt; &lt;p&gt;Below are the architectures of the base (linear) and experimental (non-linear) models that we are using in this experiment. Of particular note is the activation function $\mathbb{f}$, which we will substitute using the aforementioned activation functions.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Linear Model&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Activation ( $\mathbb{f}$ ) Output Model&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(h = Wx\)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(h = Wx\)&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(x&apos; = W^T h + b\)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(x&apos; = f(W^T h + b)\)&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(x&apos; = W^T Wx + b\)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;\(x&apos; = f(W^T Wx + b)\)&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;We create an autoencoder - compressing down to induce polysemanticity. This maps $x$ to a direction in a lower-dimensional space, represented by \(h = Wx\). Each column of $W$ corresponds to a lower-dimensional representation of a feature in $x$. To reconstruct the original vector, $W^T$ is used, ensuring clear feature representation correspondence. This structure results in a symmetric matrix $W^TW$ and allows for clear visualization of the weights. They visually allow for the determination of the presence of superposition.&lt;/p&gt; &lt;h3 id=&quot;loss&quot;&gt;Loss&lt;/h3&gt; &lt;p&gt;Sparsity, Importance and Our Network come together in the following loss function:&lt;/p&gt; \[L = \sum_{i} \sum_{x} I_{i}(x_{i} - x&apos;_{i})^{2}\] &lt;p&gt;Motivated by &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, we use a standard MSE loss, where $x_i$ and $x_i’$ measure the absolute difference in the auto-encoding of the datapoint. The Importance factor, $I_i$ , describes how important the given reconstruction is. A smaller importance will allow loss minimization even with a poor reconstruction.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;Below we present each activation function, along with plots depicting how training results in superposition at varying degrees of sparsity.&lt;/p&gt; &lt;p&gt;For the $n=10, m=5$ experiment, we show the $W^TW$ matrix and neuron feature distribution at varying degrees of sparsity. The $W^TW$ matrix reveals which features are prioritized (shown by the diagonal terms) and any polysemanticity that occurs (shown by the off-diagonal terms). The neuron feature distribution shows how each of the $m=10$ features are mapped to each of the $n=5$ embedding dimensions. This can aid in understanding under what conditions polysemanticity arises and how it occurs under each condition of sparsity.&lt;/p&gt; &lt;p&gt;For the $n=2, m=1$ experiment, we show a phase diagram. This phase diagram shows how the second “extra” feature of the length-2 input vector is encoded. There are three options: not encoded at all (only the first feature is encoded), encoded in superposition with the first feature, and encoded as the only feature (the first feature is not encoded).&lt;/p&gt; &lt;h3 id=&quot;relu&quot;&gt;ReLU&lt;/h3&gt; &lt;p&gt;The ReLU (Rectified Linear Units) activation function is a piecewise-linear function, a simple non-linearity that allows models to use superposition of features. ReLU was the only activation function used in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, so our work with the ReLU function was primarily to verify the results from their work and create a baseline for our subsequent experiments.&lt;/p&gt; &lt;p&gt;The following are the $W^TW$ matrices and feature-neuron mappings:&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; ReLU $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;As per the results in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, the ReLU model focuses on the most significant features in the low sparsity regime (generally resulting in monosemanticity), while relying on superposition in the high sparsity regime (polysemanticity). With weaker signals for the most important features in the high sparsity regime, the model encodes multiple features in each neuron activation to minimize error of the sparse signals. Notably, the ReLU model uses antipodal pairs in the mapping of features to encode multiple features to single neurons. This can be seen as a light-colored diagonal entry within $W^T W$ and a corresponding dark-colored off-diagonal entry within the same column. This antipodal mapping of features is a method that the model uses to compress more than one feature to one neuron. This antipodal mapping is more interpretable than other kinds of polysemanticity which occurs in subsequently-described activation functions which “speckle” multiple features into a single neuron, making it more difficult to determine how the superposition occurs in that model.&lt;/p&gt; &lt;p&gt;The following is the phase diagram of the ReLU models:&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;In regimes of high sparsity (i.e., below $1-S=0.1$ on the phase diagram above) the ReLU models are highly polysemantic for all relative feature importances, reflecting an inability to encode features with a sparse signal. In regimes of low sparsity, the model generally embeds the more important of the two features. This result mirrors the phase diagram in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt; as expected.&lt;/p&gt; &lt;h3 id=&quot;gelusilu&quot;&gt;GeLU/SiLU&lt;/h3&gt; &lt;p&gt;The GeLU (Gaussian Error Linear Units) and SiLU (Sigmoid Linear Units) activation functions are very similar to one another, and as a result produced very similar experimental results. Both functions are akin to a “smoothed out” version of the ReLU function, i.e., they have no discontinuities. The GeLU has recently been popularized as the activation function of choice in many transformers, including BERT &lt;d-cite key=&quot;Devlin2019BERTPO&quot;&gt;&lt;/d-cite&gt; and GPT &lt;d-cite key=&quot;gpt&quot;&gt;&lt;/d-cite&gt;. The GeLU is differentiable for all $x$ - and has a smoother curve than the SiLU (Swish) activation. &lt;d-cite key=&quot;elhage2022solu&quot;&gt;&lt;/d-cite&gt; found that in the setting of transformers, the GeLU was less interpretable than the SoLU. This may be the case after having many linear layers activation - but with a single layer this is not the case.&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; GeLU $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; SiLU $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The GeLU and SiLU models exhibit similar kinds of superposition in their weight matrices. With increasing sparsity, superposition of features does happen, but it is more “strict” than the ReLU model, generally mapping at most two features to any single neuron. In each of the polysemantic neurons, though, there is one feature that dominates, suggesting that these activation functions enforce sparsity in their activations. There are also many antipodal pairs of features within these models, reiterating the behavior that exists in the ReLU models (also found in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;).&lt;/p&gt; &lt;div class=&quot;row mt-0 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-2 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-2 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-0 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-2 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The above phase diagrams of the GeLU and SiLU models show a marked difference from that of the ReLU model (earlier), despite the similar shapes of these three activation functions. The GeLU and SiLU models exhibit significant monosemanticity at high degrees of sparsity, unlike the ReLU, which results in near-complete polysemanticity for sparsities higher than $S=0.9$. This differnce may reflect SiLU’s and GeLU’s better fit as an activation for picking up the signal in sparse feature representations, making the case for GeLU and SiLU as more interpretable activation functions within larger models.&lt;/p&gt; &lt;h3 id=&quot;sigmoid&quot;&gt;Sigmoid&lt;/h3&gt; &lt;p&gt;The Sigmoid function is a smooth activation function with an output range of $(0, 1)$. This maps directly to the desired range of values that the model is trying to replicate.&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; Sigmoid $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The Sigmoid model exhibits superposition in all neurons as soon as the sparsity is non-zero, as can be seen from the “speckling” of non-zero off-diagonal terms in $W^T W$. This is a difference from the ReLU/GeLU/SiLU models, for which the superposition “leaks” into the least significant encoded features at low, non-zero sparsities and eventually affects all features at higher sparsities. This low-sparsity superposition may occur because the Sigmoid function strictly maps to $(0, 1)$, with increasingly large pre-activation inputs necessary to map to values close to 0 and 1. As such, the model may be “speckling” the off-diagonal values in an attempt to “reach” these inputs which are close to 0 and 1.&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Despite differences in the occurrence of polysemanticity, the ReLU and Sigmoid models exhibit very similar phase diagrams, reflecting an inability to encode multiple features at sparsities above $S=0.9$ (i.e., below $1-S=0.1$ on the phase diagram). As discussed above, this may be caused by the vanilla sigmoid activation’s inability to “reach” target values close to 0 or 1.&lt;/p&gt; &lt;h3 id=&quot;tanh&quot;&gt;Tanh&lt;/h3&gt; &lt;p&gt;The Tanh function is another smooth activation function, but it results in significantly different behavior from the Sigmoid (despite being a linear mapping of the Sigmoid).&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; Tanh $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;With the Tanh activation function, the models prioritize the most important features regardless of sparsity. This behavior is possibly attributed to the range that the Tanh function maps to $(-1, 1)$, while the target range of input values in this experiment are $[0, 1]$. This behavior is similar to that of a linear model (i.e., no activation function) which exhibits no capability to use superposition, but the phase diagram reveals subtle differences from the linear model results.&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Although nearly performing as the linear model would, only encoding the most important feature, there is some difference to the linear model along the boundary between features, as can be seen around the importance of 1. This reflects the model’s ability to use non-linearity to perform superposition.&lt;/p&gt; &lt;h4 id=&quot;a-note-on-sigmoid-and-tanh&quot;&gt;A Note on Sigmoid and Tanh&lt;/h4&gt; &lt;p&gt;Despite similarities in the S-like curvature of the Sigmoid and Tanh activation functions, the Sigmoid model exhibits superposition, whereas the Tanh model exhibits nearly zero superposition. A key difference between the two functions is the fact that the Sigmoid function maps inputs to a range of $(0, 1)$, while the Tanh function maps inputs to a range of $(-1, 1)$. This difference is significant in our experiment, as our experiment uses models to recreate random vectors with elements in the range $[0, 1]$. The range of the Sigmoid function matches this range, while the range of the Tanh function which matches this range only occurs for non-negative inputs to the Tanh function. In other words, the $(-\infty, 0)$ input domain (which maps to the range $(-1, 0)$) of the Tanh function remains useless for prediction of values which should be in the range $[0, 1]$. Therefore, the tanh function empirically acts like a linear function (i.e., no activation layer).&lt;/p&gt; &lt;h3 id=&quot;solu&quot;&gt;SoLU&lt;/h3&gt; &lt;p&gt;The SoLU (Softmax Linear Units) activation function is based on the work from &lt;d-cite key=&quot;elhage2022solu&quot;&gt;&lt;/d-cite&gt;. \(Solu(x) = x * softmax(x)\) SoLU is a function for which the activation of each neuron is dependent on all the other neurons within its own layer. This is significantly different from all the other activations that we tested, as the activations of neurons with the other functions are independent of the other neurons within the same layer. In other words, all the other activation functions are univariate while the SoLU is multivariate. Similar to other approaches like L1 regularization, the SoLU amplifies neurons with relatively large pre-activations and de-amplifies neurons with relatively smaller pre-activations. This behavior pressures the model to be more monosemantic (and therefore more interpretable in some settings), as discussed in &lt;d-cite key=&quot;elhage2022solu&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; SoLU $W^TW$ Matrices &lt;/div&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;In our experiment, the SoLU model results in non-zero superposition of all features with all degrees of sparsity. This may be attributed to the way that the SoLU “forces” activations to be sparse, i.e., the activations result in a “winner-takes-all” behavior due to the way that the Softmax function works. This is not a useful property for prediction of a vector of independently-drawn values, as the input vectors are unlikely to be peaky, i.e., the SoLU does not quite fit the purposes of its task.&lt;/p&gt; &lt;div class=&quot;row mt-3 l-page&quot;&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;As seen in the heatmap plot above, the SoLU activation results in very polysemantic behavior. This function is not precisely fit for its task of recreating given vectors and likely results in using polysemanticity to attempt to pass information about inputs forward. Curiously, the SoLU models have preference for the more important feature in the low sparsity regime.&lt;/p&gt; &lt;h3 id=&quot;bringing-them-all-together&quot;&gt;Bringing Them All Together&lt;/h3&gt; &lt;div class=&quot;caption&quot;&gt; Sparsity vs Dimensions Per Feature &lt;/div&gt; &lt;div class=&quot;l-page&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-09-interpretability-of-toy-tasks/file.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;p&gt;The diagram above depicts a variation on the two experiments explained thus far. In this experiment $n=200$ features were compressed to $m=20$ features and the loss function was tweaked to give uniform importance $I_i = 1$ to all features. This was done to determine how each activation functions compresses features in different sparsity regimes without the influence of feature importance.&lt;/p&gt; &lt;p&gt;On the y axis, the plot depicts a metric (dimensions per feature) that measures the number of dimensions a model dedicates to each feature. In other words, a point with a y-value near 1 represents a model that dedicates one dimension of its embedding space to one feature, whereas a point with a y-value near 0.25 represents a model that represents four features at each dimension.&lt;/p&gt; &lt;p&gt;The plots are generally consistent with the analysis from the previous experiments. Many of the activations result in superposition in the low-density/high-sparsity regime, and increases in sparsity result in increases in the polysemanticity of the model (i.e., the dimensions per feature decrease). Consistent with the other experiments, SiLU and GELU perform very similarly. The Sigmoid and SoLU activations pack nearly 20 features per dimension at high sparsities. The Tanh activation exhibits behavior similar to the linear model, neatly packing one dimension with one feature, a result that is mirrored in the previous experiments. Similar to the results in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;, we see “sticky” behavior of the ReLU activation function at 1 and 0.5 dimensions per feature. This can be explained by the phenomenon of “antipodal pairs” discussed in &lt;d-cite key=&quot;toymodels&quot;&gt;&lt;/d-cite&gt;. None of the other activation functions that we tested exhibit this behavior - which is striking since this is a well-studied effect for the ReLU activation function. This may be because the ReLU activation function is the only one that is not smooth, and therefore has a differentiable behavior than the other activation functions.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Our investigation into the effects of various activation functions reveals that significant changes occur in model behavior depending on the chosen function. This finding underscores the ability to modulate the degree of superposition through the selection of activation functions, highlighting yet unexplored degrees of freedom in model design. This line of inquiry goes seamlessly with considerations of how neural networks are initialized and trained, suggesting these as promising future research directions.&lt;/p&gt; &lt;p&gt;Our work is limited by the breadth of activation functions that we tested, though. Further iterations on each of the activation functions (e.g., tweaking the Sigmoid function to map to the range $(-\epsilon, 1+\epsilon)$) could prove fruitful in getting better performance from the models. Furthermore, while writing this blog, &lt;d-cite key=&quot;incidental&quot;&gt;&lt;/d-cite&gt; published a new key insight related to the importance of initialization in superposition, which we do not explore here. Despite this, we have learned valuable insights about the effects that our set of activation functions can have on superposition.&lt;/p&gt; &lt;p&gt;Pursuing enhanced interpretability, however, does not come without its challenges. Specifically, striving for transparency and understandability in neural network models raises concerns about the potential for deception. Despite these challenges, our work aims to develop neural network models that are more interpretable, transparent, and secure.&lt;/p&gt; &lt;ol class=&quot;bibliography&quot;&gt;&lt;/ol&gt; </content> </entry> <entry> <title>Training Robust Networks</title> <link href="https://deep-learning-mit.github.io/blog/2023/generating-robust-networks/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/generating-robust-networks</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;In the recent years, deep neural networks have emerged as a dominant force in the field of machine learning, achieving remarkable success across a variety of tasks, from VGG-16 in image classification to ChatGPT in natural language modeling. However, the very complexity that allows deep neural networks to learn and represent complex patterns and relationships can also leave them susceptible to challenges such as overfitting, adversarial attacks, and interpretability. The brittleness of deep neural networks, in particular, poses a significant challenge toward their deployment in real-world applications, especially those where reliability is paramount, like medical image diagnosis and autonomous vehicle navigation. Consequently, it is crucial to develop a better understanding of deep architectures and explore strategies for enhancing robustness. This project focuses specifically on ResNet, a model introduced in 2015 for image classification that is still widely used today. In particular, we study the model’s vulnerability to adversarial perturbations and, subsequently, work through a strategy to enhance its resilience through data augmentation and hyperparameter optimization.&lt;/p&gt; &lt;h1 id=&quot;related-works&quot;&gt;Related Works&lt;/h1&gt; &lt;p&gt;ResNet&lt;d-cite key=&quot;resnet2015&quot;&gt;&lt;/d-cite&gt; is a convolutional neural network architecture introduced in 2015 that sought to overcome numerical instability issues in deep networks and simplify the complexity of architecture search. It achieved this by incorporating skip connections, essentially allowing the training procedure to dynamically determine the optimal number of layers for the network. ResNet is trained on the ImageNet dataset&lt;d-cite key=&quot;imagenet2014&quot;&gt;&lt;/d-cite&gt;, a popular benchmark in object category classification with 1,000 classes and millions of images. For our project, we use ResNet-18, a version of the original ResNet-34 model that is 18 layers deep, and TinyImageNet, a smaller version of ImageNet with around 100,000 images and 200 classes. This is largely for computational ease.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/resnet-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/resnet-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/resnet-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/resnet.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1. ResNet-18 Architecture &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/tinyimagenet.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2. Sample Images from TinyImageNet &lt;/div&gt; &lt;p&gt;The brittleness of many deep neural networks for computer vision, including ResNet, is well documented. For example, adding a tiny amount of random Gaussian noise, imperceptible to the human eye, can dramatically affect the accuracy and confidence of a network. In fact, we can optimize over the input image to generate small, non-random perturbations that can be used to alter the network’s prediction behavior arbitrarily, a vulnerability that applies to a variety of networks&lt;d-cite key=&quot;brittleness1&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;brittleness2&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;In this project, we investigate two small perturbations: adding random Gaussian noise and modifying the colors of a small subset of pixels. We use hyperparameter search to fine-tune ResNet-18, aiming to create a network robust to these perturbations without compromising significantly on accuracy. Specifically, we examine general hyperparameters like batch size, learning rate, number of frozen layers, and more. The ultimate goal is to define a straightforward and resource-efficient strategy for mitigating brittleness that can potentially be extended to other architectures and domains.&lt;/p&gt; &lt;h1 id=&quot;methodology&quot;&gt;Methodology&lt;/h1&gt; &lt;h2 id=&quot;baseline-model&quot;&gt;Baseline Model&lt;/h2&gt; &lt;p&gt;The out-of-the-box ResNet18 model is pretrained on ImageNet, achieving about 55% accuracy on the ImageNet validation set. TinyImageNet is a subset of ImageNet with fewer classes; there is a potential need for further fine-tuning of the out-of-the-box model to optimize performance. Thus, we start off by performing a simple hyperparameter grid search over batch size and learning rate. Each model is trained on the TinyImageNet training set, a dataset of 40,000 images (downsampled from 100,000 for computational ease) with 200 classes (roughly uniform class distribution). The baseline model is then selected based on accuracy on the TinyImageNet validation set, a uniformly balanced dataset of 10,000 images.&lt;/p&gt; &lt;h2 id=&quot;generating-adversarial-perturbations&quot;&gt;Generating Adversarial Perturbations&lt;/h2&gt; &lt;p&gt;Next, we use gradient descent to create adversarial perturbations. The first perturbation is adding a small amount of Gaussian noise. We try to maximize the probability of the input image belonging to a wrong class (the inverse of the standard cross-entropy classification objective) while also penalizing the magnitude of the noise. This approach is more efficient and controllable compared to attempting to add a random sample of Gaussian noise with the hope of inducing misclassification.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-4&quot;&gt;&lt;/div&gt; &lt;div class=&quot;col-sm-4 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/noise_steps-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/noise_steps-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/noise_steps-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/noise_steps.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-4&quot;&gt;&lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3. Noise added to image during each step in a sample gradient descent path for the first perturbation &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-3&quot;&gt;&lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/noise_examples-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/noise_examples-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/noise_examples-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/noise_examples.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-3&quot;&gt;&lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 4. Sample images and their Gaussian-perturbed, misclassified versions &lt;/div&gt; &lt;p&gt;The other perturbation is randomly selecting a small subset of pixels (0.5%) and adjusting their color until the image is misclassified by the baseline model. A gradient descent approach that maximizes the probability of the input image belong to a wrong class is used to implement this perturbation; however, it is much more sensitive to initialization and can require retries, making it less resource-efficient.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-4&quot;&gt;&lt;/div&gt; &lt;div class=&quot;col-sm-4 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/pixel_steps-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/pixel_steps-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/pixel_steps-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/pixel_steps.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-4&quot;&gt;&lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 5. Noise added to image during each step in a sample gradient descent path for the second perturbation &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-3&quot;&gt;&lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/pixel_examples-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/pixel_examples-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/pixel_examples-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/pixel_examples.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-3&quot;&gt;&lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 6. Sample images and their pixel-perturbed, misclassified versions &lt;/div&gt; &lt;p&gt;We generate 11,000 adversarial examples using the Gaussian noise perturbation technique on the training examples that the baseline model correctly classifies. Of these adversarial examples, we use 10,000 of them to augment the training dataset (call it the augmented training set) and reserve 1,000 for hyperparameter optimization (call it the perturbed training set). We also generate 2,000 adversarial examples using the same perturbation technique on the validation examples that the baseline model correctly classifies. 1,000 of these are used for hyperparameter optimization (call it the perturbed validation set) while the rest are saved for out-of-sample evaluation (call it the hold-out validation set).&lt;/p&gt; &lt;p&gt;Note that we keep adversarial examples generated from the validation set out of the augmented training set to avoid lookahead bias. We want to avoid allowing the model to gain insights into the characteristics of examples that it will encounter in the validation set (since perturbed images are very similar to the original images), ensuring a more accurate assessment of the model’s robustness and generalization capabilities.&lt;/p&gt; &lt;p&gt;Finally, we generate an additional 500 examples using the pixel modification perturbation technique on the validation examples that the baseline correctly classifies (call it the out-of-distribution hold-out set). These examples are reserved for out-of-sample and out-of-distribution evaluation, assessing the model’s ability to perform well on adversarial perturbations it has never seen before.&lt;/p&gt; &lt;h2 id=&quot;hyperparameter-optimization-to-create-a-more-robust-model&quot;&gt;Hyperparameter Optimization to Create a More Robust Model&lt;/h2&gt; &lt;p&gt;Equipped with the augmented/additional datasets from the previous step, we start the process of model creation. The relevant metrics for selecting a model are original validation accuracy (derived from the original validation dataset from TinyImageNet), perturbed training accuracy, and perturbed validation accuracy. It is crucial to look at original validation accuracy to ensure that we are not creating robust models by compromising significantly on the original image classification task. In addition, accuracy on the perturbed train dataset tells us how well our model adjusts to the perturbation, while accuracy on the perturbed validation dataset provides an additional perspective by evaluating how well the model generalizes to perturbations on images it has never seen before. The same set of metrics is used in evaluating the final model on out-of-sample datasets, in addition to accuracy on the out-of-distribution hold-out set.&lt;/p&gt; &lt;p&gt;We examine how varying four different hyperparameters affects the robustness of ResNet-18. The first hyperparameter involves initializing the model with either weights from the baseline model or the default pre-trained weights. The next hyperparameter is how many layers of ResNet-18 are frozen during the training procedure. The last two hyperparameters are batch size and learning rate. It is important to note that we do not conduct a search over a four-dimensional hyperparameter grid for computational reasons. Instead, we fix some hyperparameters at reasonable default values while we vary over the other hyperparameters. Using the insights gleaned from this hyperparameter search, we proceed to train the final model.&lt;/p&gt; &lt;h2 id=&quot;comparing-models-via-visualization&quot;&gt;Comparing Models via Visualization&lt;/h2&gt; &lt;p&gt;Finally, we transform the feature maps generated for an input image into interpretable visualizations to better understand the learned representations within the models. These feature maps capture the activations of learned filters or kernels across different regions of the input images and are the basis for our analysis&lt;d-cite key=&quot;simonyan2014&quot;&gt;&lt;/d-cite&gt;. Each residual block in a ResNet consists of multiple convolutional layers. We register forawrd hooks (a feature in Pytorch that allows us to register a function to be called each time a forward pass is executed through a layer) for each convolutional and linear layer in the network to capture and store the activations produced during the forward pass. The layers in the ResNet model are as follows:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;Layer: conv1, Activation shape: torch.Size([1, 64, 112, 112]) Layer: layer1.0.conv1, Activation shape: torch.Size([1, 64, 56, 56]) Layer: layer1.0.conv2, Activation shape: torch.Size([1, 64, 56, 56]) Layer: layer1.1.conv1, Activation shape: torch.Size([1, 64, 56, 56]) Layer: layer1.1.conv2, Activation shape: torch.Size([1, 64, 56, 56]) Layer: layer2.0.conv1, Activation shape: torch.Size([1, 128, 28, 28]) Layer: layer2.0.conv2, Activation shape: torch.Size([1, 128, 28, 28]) Layer: layer2.0.downsample.0, Activation shape: torch.Size([1, 128, 28, 28]) Layer: layer2.1.conv1, Activation shape: torch.Size([1, 128, 28, 28]) Layer: layer2.1.conv2, Activation shape: torch.Size([1, 128, 28, 28]) Layer: layer3.0.conv1, Activation shape: torch.Size([1, 256, 14, 14]) Layer: layer3.0.conv2, Activation shape: torch.Size([1, 256, 14, 14]) Layer: layer3.0.downsample.0, Activation shape: torch.Size([1, 256, 14, 14]) Layer: layer3.1.conv1, Activation shape: torch.Size([1, 256, 14, 14]) Layer: layer3.1.conv2, Activation shape: torch.Size([1, 256, 14, 14]) Layer: layer4.0.conv1, Activation shape: torch.Size([1, 512, 7, 7]) Layer: layer4.0.conv2, Activation shape: torch.Size([1, 512, 7, 7]) Layer: layer4.0.downsample.0, Activation shape: torch.Size([1, 512, 7, 7]) Layer: layer4.1.conv1, Activation shape: torch.Size([1, 512, 7, 7]) Layer: layer4.1.conv2, Activation shape: torch.Size([1, 512, 7, 7]) Layer: fc, Activation shape: torch.Size([1, 1000]) &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;After obtaining these activations, we compute the average activation values across the channels (neurons) within a specified layer of interest. This process provides insights into which regions or patterns in the input images contribute significantly to the neuron activations within that layer. We then create heatmap visualizations based on these average activations, highlighting the areas of the input data that have the most substantial impact on the network’s feature detection process. This allows us to gain valuable insights into how the network perceives and prioritizes various features across its layers, aiding in our understanding of the model’s inner workings.&lt;/p&gt; &lt;p&gt;We use this approach to compare the baseline model to the final model, aiming to identify significant differences in feature prioritization or the patterns detected at various layers.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/heatmap_sample.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 7. Heatmap visualization at four different layers when an image of a goldfish is passed into the ResNet &lt;/div&gt; &lt;h1 id=&quot;results-and-discussion&quot;&gt;Results and Discussion&lt;/h1&gt; &lt;h2 id=&quot;baseline-model-1&quot;&gt;Baseline Model&lt;/h2&gt; &lt;p&gt;First, we perform a grid search over batch sizes ranging from 128 to 512 and learning rates ranging from 0.0001 to 0.01.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-3&quot;&gt;&lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/baseline1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/baseline1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/baseline1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/baseline1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-3&quot;&gt;&lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 8. Hyperparameter grid for baseline model &lt;/div&gt; &lt;p&gt;The results from the first hyperparameter search suggest that conservative learning rates and large batch sizes lead to good performance. Thus, we perform a finer grid search over batch sizes ranging from 256 to 512 and learning rates ranging from 0.00001 to 0.0001.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-3&quot;&gt;&lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/baseline2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/baseline2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/baseline2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/baseline2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-3&quot;&gt;&lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 9. Finer hyperparameter grid for baseline model &lt;/div&gt; &lt;p&gt;Based on the results from the second hyperparameter search, we choose our baseline model to be ResNet-18 fine-tuned with a batch size of 256 and a learning rate of 0.00005. The baseline model achieves nearly 73% accuracy on the validation set, which is possibly due to the fact that TinyImageNet has less classes, so classification may be an easier task.&lt;/p&gt; &lt;h2 id=&quot;effect-of-hyperparameters&quot;&gt;Effect of Hyperparameters&lt;/h2&gt; &lt;h4 id=&quot;number-of-unfrozen-layers&quot;&gt;Number of Unfrozen Layers&lt;/h4&gt; &lt;p&gt;Next, we evaluate how the number of unfrozen layers (up to 3) affects the robustness of the trained models, whose weights can either be initialized from the baseline model or from the pre-trained/default model (in the diagram below, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;is_finetuned=True&lt;/code&gt; corresponds to the baseline model).&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-7 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-1&quot;&gt;&lt;/div&gt; &lt;div class=&quot;col-sm-4 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 10. Performance of trained models as number of frozen layers and source of initialized weights changes &lt;/div&gt; &lt;p&gt;First, we observe that training for more epochs does not improve the metrics of interest. This implies that training for robustness can be computationally efficient. Next, we observe there is a substantial drop in accuracy for the perturbed datasets compared to the original validation dataset, which is to be expected. Pairing the accuracies for the perturbed datasets across hyperparameter combinations, we observe that they are tightly correlated, which implies that our models are effectively adapting to the perturbation.&lt;/p&gt; &lt;p&gt;One interesting observation to note here is that accuracies on the perturbed datasets are significantly higher for the model initialized with default weights (27% compared to 10%). An intuitive explanation for this is that we have deliberately engineered a brittle baseline model, so the model is in a region of the optimization landscape characterized by high accuracy but low robustness. If we want achieve high accuracy and high robustness, we may need to start from a less unfavorable position in the optimization landscape.&lt;/p&gt; &lt;p&gt;Finally, we observe that freezing some layers can enhance robustness for models initialized from the default weights at the cost of performance on the original task. This aligns with intuition, since allowing all the weights to vary can lead to overfitting, resulting in more brittle networks.&lt;/p&gt; &lt;h4 id=&quot;batch-size&quot;&gt;Batch Size&lt;/h4&gt; &lt;p&gt;Next, we evaluate how batch size (ranging from 4 to 512) affects the robustness of the trained models.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-7 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-1&quot;&gt;&lt;/div&gt; &lt;div class=&quot;col-sm-4 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 11. Performance of trained models as batch size and source of initialized weights changes &lt;/div&gt; &lt;p&gt;We notice immediately that batch size has a considerable effect on robustness. For both the perturbed training set and the perturbed validation set, accuracies are markedly lower with large batch sizes (around 15%) and higher with small batch sizes (around 70%). As expected, this comes at the expense of lower performance on the original task, with original validation accuracy dropping 10% as the batch size decreases from 512 to 4. Depending on the use case, this may be an efficient tradeoff to make!&lt;/p&gt; &lt;h4 id=&quot;learning-rate&quot;&gt;Learning Rate&lt;/h4&gt; &lt;p&gt;Finally, we evaluate how learning rate (ranging from 0.00001 to 0.001) affects the robustness of the trained models.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-7 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_line3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-1&quot;&gt;&lt;/div&gt; &lt;div class=&quot;col-sm-4 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/final_bar3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 12. Performance of trained models as learning rate and source of initialized weights changes &lt;/div&gt; &lt;p&gt;Like batch size, learning rate significantly impacts robustness. The sweet spot for learning rate in terms of robustness seems to be around 0.00025, with original validation accuracy dropping as the learning rate becomes more conservative; a learning rate of 0.00025 leads to a 3% drop in performance. Like before, this may be a worthwhile tradeoff to make.&lt;/p&gt; &lt;h2 id=&quot;out-of-sample-evaluation&quot;&gt;Out of Sample Evaluation&lt;/h2&gt; &lt;p&gt;Using the insights gained from the hyperparameter search, we define the final model with the following hyperparameters:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;is_finetuned=False num_unfrozen_layers=3 batch_size=8 learning_rate=0.00025 &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Of course, this is likely not the optimal hyperparameter combination, since we were not able to perform a full grid search. The results are as follows:&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; Table 1. Performance of final model &lt;/div&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Dataset&lt;/th&gt; &lt;th&gt;Accuracy&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Original validation&lt;/td&gt; &lt;td&gt;0.522754&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Perturbed training&lt;/td&gt; &lt;td&gt;0.569572&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Perturbed validation&lt;/td&gt; &lt;td&gt;0.442720&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Hold-out validation&lt;/td&gt; &lt;td&gt;0.485621&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Out-of-distribution validation&lt;/td&gt; &lt;td&gt;0.489786&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Original validation, perturbed validation, and hold-out validation accuracy are somewhat lower than the optimistic estimates derived from the hyperparameter search. However, we observe that we are able to achieve nearly 50% accuracy on the out-of-distribution validation set, which contains pixel modification perturbations that the model was never trained on, underscoring the robustness and adaptability of our model.&lt;/p&gt; &lt;h2 id=&quot;model-comparison&quot;&gt;Model Comparison&lt;/h2&gt; &lt;p&gt;Lastly, we observe the progression of feature map representations: starting from basic visual elements such as edges and textures in the initial layers, to more complex patterns in intermediate layers, and culminating in sophisticated, high-level feature representations in the deeper layers. This layered evolution is integral to the network’s ability to analyze and recognize complex images.&lt;/p&gt; &lt;p&gt;When comparing the baseline model to the final model, there are very few (if any) differences in the initial layers. By the intermediate and deeper layers, there are clear differences in which aspects of the images have the greatest activation. This observation aligns with the foundational principles of convolutional neural networks, where initial layers tend to be more generic, capturing universal features that are commonly useful across various tasks. As a result, the similarity in the initial layers between the baseline and final models suggests that these early representations are robust and essential for basic image processing, irrespective of specific model optimizations or task-focused training.&lt;/p&gt; &lt;p&gt;However, the divergence observed in the intermediate and deeper layers is indicative of the specialized learning that occurs as a result of hyperparameter tuning in the final model. These layers, being more task-specific, have adapted to capture more complex and abstract features relevant to the particular objectives of the final model.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/comparison_original-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/comparison_original-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/comparison_original-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/comparison_original.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 13. Comparison of the heatmaps for both models when passed in an image of a frog &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 14. Comparison of the heatmaps for both models when passed in a perturbed image of a frog &lt;/div&gt; &lt;h1 id=&quot;conclusion-and-next-steps&quot;&gt;Conclusion and Next Steps&lt;/h1&gt; &lt;p&gt;In this project, we have undertaken a comprehensive exploration of enhancing ResNet through data augmentation with adversarial examples and straightforward hyperparameter tuning. Key highlights include the computational efficiency and simplicity of the employed technique, the resulting model’s ability to adapt to both seen and unseen perturbations, and the capacity to finely control tradeoffs between robustness and accuracy thorugh the manipulation of diverse hyperparameters.&lt;/p&gt; &lt;p&gt;There are many potential avenues for future exploration. One prospect involves expanding and refining the discussed techniques by continuing to explore the hyperparameter space, considering additional parameters or refining the search range. Additionally, applying this analysis to different architectures and domains could reveal further insights. Finally, broadening the scope of perturbations presents another avenue, offering the potential to develop more practical models tailored for real-world applications.&lt;/p&gt; </content> </entry> <entry> <title>Imposing uniformity through Poisson flow models</title> <link href="https://deep-learning-mit.github.io/blog/2023/enforcing-uniformity/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/enforcing-uniformity</id> <content type="html">&lt;!-- Introduction --&gt; &lt;p&gt;Most objects encountered in machine learning are extremely high dimensional. For example, a relatively small $512$x$512$ RGB image has over $750,000$ dimensions. However most of this space is empty, that is the set of well-formed images form an extremely small subset of this large space.&lt;/p&gt; &lt;p&gt;Thus a useful task in machine learning is to map this large space into a much smaller space, such that the images we care about form a compact organized distribution in this new space. This is called representation learning. For such a map to be useful, there are two key features. Firstly the representations should be useful for downstream tasks and not worse than the original representation. Thus they should preserve as much of the useful data as possible. Secondly, they should be relatively task agnostic and help across a diverse array of such downstream tasks. For example, word embeddings (such as those produced by BERT &lt;d-cite key=&quot;bert&quot;&gt;&lt;/d-cite&gt;) can be used for a wide array of language tasks such as language modeling and generation to sentiment analysis. An important question is how to generally find such useful representations.&lt;/p&gt; &lt;p&gt;Several methods exist. For example, autoencoders &lt;d-cite key=&quot;autoencoder&quot;&gt;&lt;/d-cite&gt; attempt to learn maps that are essentially bijective over the dataset we care about. These ensure that important information is not lost during the transformation. Contrastive encoders attempt to learn maps that enforce similarity between representations of similar images. Contrastive encoders are seen to perform quite well on unsupervised representation learning tasks, and we will explore these in a bit more detail soon. Lastly, we can layers of already trained neural networks can be used as features as well. For example, layers of VGG-19 trained on ImageNet are useful features that contain much information about the style and content of the images &lt;d-cite key=&quot;nst&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;It is important to further quantify what makes a useful representation from a theoretical standpoint. Wang and Isola &lt;d-cite key=&quot;isola-alignment&quot;&gt;&lt;/d-cite&gt; introduced concepts of alignment and uniformity to explain why contrastive encoders perform well. Alignment is the idea that similar objects should have close feature vectors in the representation space. Uniformity is the idea that the set of well-formed objects should cover the representation space uniformly.&lt;/p&gt; &lt;p&gt;In this post, we will further examine how uniformity affects the quality of representations. To do this, we will use Poisson flows. As we shall see, Poisson flows are an incredibly useful tool to enforce uniformity. We show that enforcing uniformity on well-aligned features can improve representations as measured by their performance on downstream tasks.&lt;/p&gt; &lt;h2 id=&quot;notation&quot;&gt;Notation&lt;/h2&gt; &lt;p&gt;We introduce several notations to make talking about representations easier. Let $\mathcal{X}$ be our original space of the data, and let $p_{\mathrm{x}}$ be the distribution of the data. Let $\mathcal{Y}$ be any representation space, and let $f: \mathcal{X} \to \mathcal{Y}$ be a mapping from the original space to the representation space. If $\mathrm{y} = f(\mathrm{x}), \ \mathrm{x} \sim p_{\mathrm{x}}$, then let $\mathrm{y} \sim p_{f}$ and where $p_{f}$ is the new distribution after $f$.&lt;/p&gt; &lt;p&gt;We will also have a notion of similarity. Let $p_{\mathrm{pos}}(x_1, x_2)$ be a joint probability distribution that quantifies this similarity. We assume that $p_{\mathrm{pos}}$ satisfies&lt;/p&gt; \[\begin{aligned} p_{\mathrm{pos}}(x_1, x_2) &amp;amp;= p_{\mathrm{pos}}(x_2, x_1) \\ \int_{x_2} p_{\mathrm{pos}}(x_1, x_2) d x_2 &amp;amp;= p_{\mathrm{x}}(x_1) \end{aligned}\] &lt;h2 id=&quot;alignment-and-uniformity&quot;&gt;Alignment and Uniformity&lt;/h2&gt; &lt;p&gt;As mentioned earlier, contrastive autoencoders learn useful representations by minimizing a distance metric for similar pairs, while maximizing the same for dissimilar pairs &lt;d-cite key=&quot;isola-cmc&quot;&gt;. Thus if $D(x_1, x_2)$ is some distance metric of $\mathcal{Y}$, contrastive encoders maximize $d(x, x^+)$ for positive pairs, while minimizing $d(x, x^-)$ for negative pairs.&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;In their most common formulation, they set $\mathcal{Y}$ as the hypersphere $\mathcal{S}^d \subset \mathbb{R}^d$, and use cosine similarity &lt;d-cite key=&quot;SimCLR&quot;&gt;&lt;/d-cite&gt; $d(x_1, x_2) = x_1^T x_2$ as the similarity metric. Then the loss function becomes &lt;d-footnote&gt;$M$ and $\tau$ are hyper parameters&lt;/d-footnote&gt;&lt;/p&gt; \[\mathcal{L} \triangleq \mathbb{E}_{(x, x^+) \sim p_{\mathrm{pos}}, \{x_i^-\}_{i=1}^M \overset{\mathrm{iid}}{\sim} p_{\mathrm{x}}} \left[ \frac {e^{f(x)^T f(x^+)^T / \tau}} {e^{f(x)^T f(x^+)^T / \tau} + \sum_{i=1}^{M} e^{f(x)^T f(x_i^-)^T / \tau}} \right]\] &lt;p&gt;These encoders have been successful at several image representation tasks. Wang and Isola explained their performance through alignment and uniformity. Alignment, is simply the the quality that similar images are close together in the representation space. This is clearly present in contrastive encoders, as one of their goals is indeed to minimize&lt;/p&gt; \[\mathcal{L}_{\mathrm{alignment}} \triangleq \mathbb{E}_{(x, x^+)\sim p_{\mathrm{pos}}} \left[ D(x, x^+) \right]\] &lt;p&gt;However, Wang and Isola also stated that uniformity was an equally important feature of contrastive architectures. That is, when training the contrastive loss to learn an encoder $f$, the new probability distribution $p_{f}$ is close to uniform. They showed that using $L_2$ norm as a distance metric and using Gaussian kernels to promote uniformity, learned representations perform better than those learned by contrastive learning.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1: Alignment and Uniformity. In figure (a), we see the quality of alignment, i.e. similar images are close to each other in the representation space. In figure (b), we see the quality of uniformity, i.e. images form a uniform distribution across the representation space. Image borrowed from &lt;d-cite key=&quot;isola-alignment&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;Why does uniformity help? Firstly, it acts as a regularization term. This is because if we tried to learn representations that maximized alignment without any target for uniformity, then a map that just takes all input vectors to zero would trivially minimize the loss. Yet this would be an extremely bad representation. However, aside from regularization, uniform distributions also have maximal self-entropy. Thus their importance can be explained equally well through some sort of minimizing loss of information. Indeed this is how &lt;d-cite key=&quot;isola-alignment&quot;&gt;&lt;/d-cite&gt; explains it.&lt;/p&gt; &lt;p&gt;In this post we will investigate this even further. In particular, if regularization is the only effect that uniformity has on representations, then slightly nudging already aligned representations to make them uniform should not improve their quality. This is exactly what we will do, and we will do this through Poisson Flows.&lt;/p&gt; &lt;h2 id=&quot;poisson-flows&quot;&gt;Poisson Flows&lt;/h2&gt; &lt;p&gt;If you let a planar positive distribution of charges slightly above $z=0$ loose, then they will repel each other. If you stop them at some large enough distance $R$ from the origin, then their distribution approaches uniform as $R \to \infty$. This is very interesting, and what’s even more interesting is that this fact generalizes to arbitrary dimensions. Thus such fields allow a convenient way to map arbitrary high-dimensional distributions to uniform distributions. Poisson flow generative models proposed by Xu and Liu &lt;d-cite key=&quot;poisson-flow&quot;&gt;&lt;/d-cite&gt; exploit this property for image generation; by sampling uniformly from the hemisphere, one can iterate through the backward ODE and thus sample from $p_{\mathrm{x}}$. We shall use it to impose uniformity on well-aligned features.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/poisson-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/poisson-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/poisson-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/poisson.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2: Evolution of data points through a Poisson field. As we can see, arbitrary distributions are mapped to uniform. Further the mapping is continuous. Borrowed from &lt;d-cite key=&quot;poisson-flow&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;Say we have a probability distribution $p_{\mathrm{y}}$ over $\mathcal{Y}_1 = \mathbb{R^d}$. Set this distribution at the $z = 0$ plane &lt;d-footnote&gt;here z-dimension refers to the new dimension we recently augmented the dataset with&lt;/d-footnote&gt; in the expanded space $\tilde{\mathcal{Y}}_1 = \mathcal{Y} \times \mathbb{R}$. Let the electric field at an arbitrary point in $\tilde{\mathcal{Y}}_1$ be defined as&lt;/p&gt; \[E_{p_{\tilde{\mathrm{y}}}}(\tilde{y}) = \int_{\tilde{y}&apos;} \frac{\tilde{y} - \tilde{y&apos;}}{\|\tilde{y} - \tilde{y&apos;} \|_2^{d+1}} \cdot p_{\tilde{\mathrm{y}}}(\tilde{y}&apos;) d\tilde{y}&apos;\] &lt;p&gt;Let $\mathrm{y} \sim p_{\mathrm{y}}$. Evolve $\tilde{\mathrm{y}} = (\mathrm{y}, 0) \in \tilde{\mathcal{Y}_1}$ according to the ODE&lt;/p&gt; \[\frac{d\tilde{\mathrm{y}}}{dt} = E_{p_{\tilde{\mathrm{y}}}}(\tilde{y})\] &lt;p&gt;Let the final point be $f_{\mathrm{poisson}}(\mathrm{y}; p_{\mathrm{y}})$. Then the distribution of $p_{f_{\mathrm{poisson}}}(\cdot)$ approaches uniform as $R \to \infty$.&lt;/p&gt; &lt;p&gt;In practice, since we want to take $s = 0$ to $R$, we do a change of variables to write the ODE as&lt;/p&gt; \[\frac{d \tilde{\mathrm{y}}}{ds} = \frac{1}{E_{p_{\tilde{\mathrm{y}}}}(\tilde{\mathrm{y}})^T \tilde{\mathrm{y}}} \cdot E_{p_{\tilde{\mathrm{y}}}}(\tilde{\mathrm{y}})\] &lt;p&gt;Note that the field stated here isn’t actually used directly, it is rather learned through a deep neural network. This is possible since the integral can be replaced with an expectation, which itself can be approximated through Monte-Carlo methods.&lt;/p&gt; &lt;p&gt;Since Poisson flows allow us to map arbitrary distributions to uniform ones, while preserving continuity; they are an extremely powerful tool to further understand the effects of uniformity. This brings us to our main hypothesis&lt;/p&gt; &lt;h2 id=&quot;hypothesis&quot;&gt;Hypothesis&lt;/h2&gt; &lt;blockquote&gt; &lt;p&gt;Assume that uniformity acts more than just a regularizing term for learning useful representations. Then if we take any well-aligned features that have good downstream performance, and apply a continuous map that imposes uniformity, our new features should perform better at downstream tasks&lt;/p&gt; &lt;/blockquote&gt; &lt;p&gt;This is because if uniformity is simply a regularizing term, then training them for the downstream task is the best we can do. This hypothesis itself is counterintuitive because the original features should already be well-trained against the task at hand. However, surprisingly, this hypothesis seems to hold true. To show this, we describe the following experiment.&lt;/p&gt; &lt;h2 id=&quot;experiment&quot;&gt;Experiment&lt;/h2&gt; &lt;p&gt;We consider the pen-ultimate layer of AlexNet &lt;d-cite key=&quot;alexnet&quot;&gt;&lt;/d-cite&gt; trained on CIFAR-10 &lt;d-cite key=&quot;cifar-10&quot;&gt;&lt;/d-cite&gt; as our initial features. These features must be well aligned, as linear decision boundaries are able to accurately classify them into their classes.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/flow-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/flow-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/flow-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/flow.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3: A representation of how the features should evolve. (a) Initial representation is seemingly random and hard to classify with. (b) After training a classifier, the penultimate layer is well clustered between the various features. (c) Features after learning a Poisson field, and sending the data through it. &lt;d-footnote&gt;Note that images (a) and (b) are for representation purposes only. However image (c) is indeed produced through a learned Poisson field from data points in (b)&lt;/d-footnote&gt; &lt;/div&gt; &lt;p&gt;We take these features and learn a corresponding Poisson field. For our predicted poisson field, we use a relatively small fixed-size two-hidden layer network.&lt;/p&gt; &lt;p&gt;We finally pass our features through this Poisson field and train a linear classifier on top of the final learned representations. We compare this accuracy against the original accuracy.&lt;/p&gt; &lt;p&gt;A summary of our approach is given in the figure below:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/architecture-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/architecture-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/architecture-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-enforcing-uniformity/architecture.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 4: Overview of architecture &lt;/div&gt; &lt;p&gt;Further training details are given in &lt;a href=&quot;#appendix-a-training-details&quot;&gt;Appendix A&lt;/a&gt;.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;The results are given in the below table.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Architecture&lt;/th&gt; &lt;th&gt;Train accuracy&lt;/th&gt; &lt;th&gt;Test accuracy&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;AlexNet&lt;/td&gt; &lt;td&gt;88%&lt;/td&gt; &lt;td&gt;82%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;AlexNet + Poisson Flow &lt;em&gt;(ours)&lt;/em&gt;&lt;/td&gt; &lt;td&gt;95%&lt;/td&gt; &lt;td&gt;85%&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Here we see that our method outperforms a well-trained AlexNet considerably.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;This is a surprisingly nice improvement. Note that the Poisson flow post-processing step is completely unsupervised. This seems to hint that having a uniform prior is helpful for reasons other than just regularization.&lt;/p&gt; &lt;p&gt;It would be extremely interesting to develop an entirely unsupervised architecture based on Poisson flow. This would begin by using an unsupervised method to learn well-aligned features. A suitable loss candidate could possibly be just a contrastive loss, with L2 norm as a distance metric:&lt;/p&gt; \[\mathcal{L} \triangleq \mathbb{E}_{(x, x^+) \sim p_{\mathrm{pos}}, \{x_i^-\}_{i=1}^M \overset{\mathrm{iid}}{\sim} p_{\mathrm{x}}} \left[ \|x - x^+\|_2^{\alpha} - \lambda \sum_{i=1}^{M} \|x - x_i^{-}\|_2^{\beta} \right]\] &lt;p&gt;Then passing these well-aligned features through a Poisson flow would enforce uniformity. Such a proposed architecture could be worth exploring.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;appendices&quot;&gt;Appendices&lt;/h2&gt; &lt;p&gt;See &lt;a href=&quot;https://github.com/mathletema/poisson-representations&quot;&gt;https://github.com/mathletema/poisson-representations&lt;/a&gt; for code.&lt;/p&gt; &lt;h3 id=&quot;appendix-a-training-details&quot;&gt;Appendix A: Training details&lt;/h3&gt; &lt;p&gt;We used a version of AlexNet similar to that given in Isola’s paper, such that the pen-ultimate layer was 128 neurons wide. We trained this network against cross entropy loss for 20 epochs using Adam as an optimizer.&lt;/p&gt; &lt;p&gt;After this, we moved the features from $\mathbb{R}^{128}$ to $\mathbb{R}^{129}$ by setting $z = 0$. We then learned a Poisson field for this network similar to &lt;d-cite key=&quot;poisson-flow&quot;&gt;&lt;/d-cite&gt;. We use the default values of $\tau, \gamma, \sigma$ as the original paper, but used $M = 20$ as a consequence of our reduced dimension size. We trained this Poisson field with a large batch size of $1024$ and a small batch size of $128$. We trained this over $200$ epochs.&lt;/p&gt; &lt;p&gt;We then passed the features through the Poisson field. To simulate the ODE, we used Euler’s method with a small delta of $0.01$ and $100$ steps. Using RK4 might produce better results, and we leave this to future work.&lt;/p&gt; &lt;p&gt;We finally trained a logistic classifier on top of these final representations, and printed train and test accuracies.&lt;/p&gt; </content> </entry> <entry> <title>6-DOF estimation through visual place recognition</title> <link href="https://deep-learning-mit.github.io/blog/2023/dof-visual-place-recognition-satellite/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/dof-visual-place-recognition-satellite</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;The goal of this project is to demonstrate how a drone or other platform with a downward-facing camera could perform approximate geolocation using a neural scene representation of existing satellite imagery. Note that the use of the term “Visual Place Recognition” in the title is a carryover from the proposal, but no longer applies to this project. Rather, the goal of this project is to implement 6-DOF pose-estimation.&lt;/p&gt; &lt;p&gt;Pose estimation &lt;d-cite key=&quot;xiang2018posecnn&quot;&gt;&lt;/d-cite&gt; can refer to the ability of an agent to determine its 3D position and orientation based on visual or other sensor info.&lt;/p&gt; &lt;p&gt;In this work, the goal is to compress the ground-truth image data into a neural model which maps live camera footage to geolocation coordinates.&lt;/p&gt; &lt;p&gt;Twitter user Stephan Sturges demonstrates his solution&lt;d-cite key=&quot;Sturges_2023&quot;&gt;&lt;/d-cite&gt; for allowing a drone with a downward-facing camera to geolocate through cross-referencing against a database of satellite images:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr.jpeg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Twitter user Stephan Sturges shows the results&lt;d-cite key=&quot;Sturges_2023&quot;&gt;&lt;/d-cite&gt; of geolocation, purportedly based on Visual Place Recognition. &lt;/div&gt; &lt;p&gt;The author of the above tweet employs a reference database of images. It would be interesting to eliminate the need for a raw dataset. Whereas the author employs Visual Place Recognition, here I employ pose estimation techniques. Thus I do not seek to estimate predict place &lt;em&gt;labels&lt;/em&gt;, but rather geolocated place &lt;em&gt;coordinates&lt;/em&gt; for the camera, as well as the camera’s orientation.&lt;/p&gt; &lt;p&gt;Thus, this works seeks to develop a neural network which maps a terrain image from the agent’s downward-facing camera, to a 6-DOF (position/rotation) representation of the agent in 3-space.&lt;/p&gt; &lt;h1 id=&quot;background&quot;&gt;Background&lt;/h1&gt; &lt;p&gt;The goal-statement - relating a camera image to a location and orientation in the world - has been deeply studied in computer vision and rendering&lt;d-cite key=&quot;Anwar_2022&quot;&gt;&lt;/d-cite&gt;:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Camera parameters, as described in&lt;d-cite key=&quot;Anwar_2022&quot;&gt;&lt;/d-cite&gt;. &lt;/div&gt; &lt;p&gt;Formally&lt;d-cite key=&quot;Anwar_2022&quot;&gt;&lt;/d-cite&gt;,&lt;/p&gt; &lt;ul&gt; &lt;li&gt;The image-formation problem is modeled as a camera forming an image of the world using a planar sensor.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;World coordinates&lt;/strong&gt; refer to 3-space coordinates in the Earth or world reference frame.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Image coordinates&lt;/strong&gt; refer to 2-space planar coordinates in the camera image plane.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Pixel coordinates&lt;/strong&gt; refer to 2-space coordinates in the final image output from the image sensor, taking into account any translation or skew of pixel coordinates with respect to the image coordinates.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The mapping from world coordinates to pixel coordinates is framed as two composed transformations, described as sets of parameters&lt;d-cite key=&quot;Anwar_2022&quot;&gt;&lt;/d-cite&gt;:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Extrinsic camera parameters&lt;/strong&gt; - the transformation from world coordinates to image coordinates (affected by factors “extrinsic” to the camera internals, i.e. position and orientation.)&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Intrinsic camera parameters&lt;/strong&gt; - the transformation from image coordinates to pixel coordinates (affected by factors “intrinsic” to the camera’s design.)&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;And so broadly speaking, this work strives to design a neural network that can map from an image (taken by the agent’s downward-facing camera) to camera parameters of the agent’s camera. With camera parameters in hand, geolocation parameters automatically drop out from extracting extrinsic translation parameters.&lt;/p&gt; &lt;p&gt;To simplify the task, assume that camera intrinsic characteristics are consistent from image to image, and thus could easily be calibrated out in any application use-case. Therefore, this work focuses on inferring &lt;strong&gt;extrinsic camera parameters&lt;/strong&gt; from an image. We assume that pixels map directly into image space.&lt;/p&gt; &lt;p&gt;The structure of extrinsic camera parameters is as follows&lt;d-cite key=&quot;Anwar_2022&quot;&gt;&lt;/d-cite&gt;:&lt;/p&gt; \[\mathbf{E}_{4 \times 4} = \begin{bmatrix} \mathbf{R}_{3 \times 3} &amp;amp; \mathbf{t}_{3 \times 1} \\ \mathbf{0}_{1 \times 3} &amp;amp; 1 \end{bmatrix}\] &lt;p&gt;where \(\mathbf{R}_{3 \times 3} \in \mathbb{R^{3 \times 3}}\) is rotation matrix representing the rotation from the world reference frame to the camera reference frame, and \(\mathbf{t}_{3 \times 1} \in \mathbb{R^{3 \times 1}}\) represents a translation vector from the world origin to the image/camera origin.&lt;/p&gt; &lt;p&gt;Then the image coordinates (a.k.a. camera coordinates) \(P_c\) of a world point \(P_w\) can be computed as&lt;d-cite key=&quot;Anwar_2022&quot;&gt;&lt;/d-cite&gt;:&lt;/p&gt; \[\mathbf{P_c} = \mathbf{E}_{4 \times 4} \cdot \mathbf{P_w}\] &lt;h1 id=&quot;proposed-solution&quot;&gt;Proposed solution&lt;/h1&gt; &lt;h2 id=&quot;image-to-extrinsics-encoder-architecture&quot;&gt;Image-to-extrinsics encoder architecture&lt;/h2&gt; &lt;p&gt;The goal of this work, is to train a neural network which maps an image drawn from \(R^{3 \times S \times S}\) (where \(S\) is pixel side-length of an image matrix) to a pair of camera extrinsic parameters \(R_{3 \times 3}\) and \(t_{3 \times 1}\):&lt;/p&gt; \[\mathbb{R^{3 \times S \times S}} \rightarrow \mathbb{R^{3 \times 3}} \times \mathbb{R^3}\] &lt;p&gt;The proposed solution is a CNN-based encoder which maps the image into a length-12 vector (the flattened extrinsic parameters); a hypothetical architecture sketch is shown below:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Image encoder architecture. &lt;/div&gt; &lt;h2 id=&quot;data-sources-for-offline-training&quot;&gt;Data sources for offline training&lt;/h2&gt; &lt;p&gt;Online sources&lt;d-cite key=&quot;Geller_2022&quot;&gt;&lt;/d-cite&gt; provide downloadable satellite terrain images.&lt;/p&gt; &lt;h2 id=&quot;training-and-evaluation&quot;&gt;Training and evaluation&lt;/h2&gt; &lt;p&gt;The scope of the model’s evaluation is, that it will be trained to recognize aerial views of some constrained area i.e. Atlantic City New Jersey; this constrained area will be referred to as the “area of interest.”&lt;/p&gt; &lt;h3 id=&quot;data-pipeline&quot;&gt;Data pipeline&lt;/h3&gt; &lt;p&gt;The input to the data pipeline is a single aerial image of the area of interest. The output of the pipeline is a data loader which generates augmented images.&lt;/p&gt; &lt;p&gt;The image of the area of interest is \(\mathbb{R^{3 \times T \times T}}\) where \(T\) is the image side-length in pixels.&lt;/p&gt; &lt;p&gt;Camera images will be of the form \(\mathbb{R^{3 \times S \times S}}\) where \(S\) is the image side-length in pixels, which may differ from \(T\).&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Generate an image from the agent camera’s vantage-point&lt;/strong&gt; &lt;ul&gt; &lt;li&gt;Convert the area-of-interest image tensor (\(\mathbb{R^{3 \times T \times T}}\)) to a matrix of homogenous world coordinates (\(\mathbb{R^{pixels \times 4}}\)) and an associated matrix of RGB values for each point (\(\mathbb{R^{pixels \times 3}}\)) &lt;ul&gt; &lt;li&gt;For simplicity, assume that all features in the image have an altitutde of zero&lt;/li&gt; &lt;li&gt;Thus, all of the pixel world coordinates will lie in a plane&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;Generate random extrinsic camera parameters \(R_{3 \times 3}\) and \(t_{3 \times 1}\)&lt;/li&gt; &lt;li&gt;Transform the world coordinates into image coordinates (\(\mathbb{R^{pixels \times 3}}\)) (note, this does not affect the RGB matrix)&lt;/li&gt; &lt;li&gt;Note - this implicitly accomplishes the commonly-used image augmentations such as shrink/expand, crop, rotate, skew&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;strong&gt;Additional data augmentation&lt;/strong&gt; - to prevent overfitting &lt;ul&gt; &lt;li&gt;Added noise&lt;/li&gt; &lt;li&gt;Color/brightness adjustment&lt;/li&gt; &lt;li&gt;TBD&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;strong&gt;Convert the image coordinates and the RGB matrix into a camera image tensor (\(\mathbb{R^{3 \times S \times S}}\))&lt;/strong&gt;&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Each element of a batch from this dataloader, will be a tuple of (extrinsic parameters,camera image).&lt;/p&gt; &lt;h2 id=&quot;training&quot;&gt;Training&lt;/h2&gt; &lt;ul&gt; &lt;li&gt;For each epoch, and each mini-batch…&lt;/li&gt; &lt;li&gt;unpack batch elements into camera images and ground-truth extrinsic parameters&lt;/li&gt; &lt;li&gt;Apply the encoder to the camera images&lt;/li&gt; &lt;li&gt;Loss: MSE between encoder estimates of extrinsic parameters, and the ground-truth values&lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;hyperparameters&quot;&gt;Hyperparameters&lt;/h3&gt; &lt;ul&gt; &lt;li&gt;Architecture &lt;ul&gt; &lt;li&gt;Encoder architecture - CNN vs MLP vs ViT(?) vs …, number of layers, …&lt;/li&gt; &lt;li&gt;Output normalizations&lt;/li&gt; &lt;li&gt;Nonlinearities - ReLU, tanh, …&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;Learning-rate&lt;/li&gt; &lt;li&gt;Optimizer - ADAM, etc.&lt;/li&gt; &lt;li&gt;Regularizations - dropout, L1, L2, …&lt;/li&gt; &lt;/ul&gt; &lt;h2 id=&quot;evaluation&quot;&gt;Evaluation&lt;/h2&gt; &lt;p&gt;For a single epoch, measure the total MSE loss of the model’s extrinsic parameter estimates relative to the ground-truth.&lt;/p&gt; &lt;h2 id=&quot;feasibility&quot;&gt;Feasibility&lt;/h2&gt; &lt;p&gt;Note that I am concurrently taking 6.s980 “Machine learning for inverse graphics” so I already have background in working with camera parameters, which should help me to complete this project on time.&lt;/p&gt; &lt;h1 id=&quot;implementation&quot;&gt;Implementation&lt;/h1&gt; &lt;h2 id=&quot;source-image&quot;&gt;Source image&lt;/h2&gt; &lt;p&gt;DOF estimation was applied to a 2D aerial image&lt;d-cite key=&quot;Taylor_2020&quot;&gt;&lt;/d-cite&gt;, shown below:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Sample aerial image from &lt;d-cite key=&quot;Taylor_2020&quot;&gt;&lt;/d-cite&gt;. &lt;/div&gt; &lt;h2 id=&quot;dataloader&quot;&gt;Dataloader&lt;/h2&gt; &lt;p&gt;A dataloader was created which generates (1) generates a random extrinsic camera matrix as described above, in order to generate (2) visualization of the above source image from the perspective of the random camera matrix.&lt;/p&gt; &lt;p&gt;More specifically, the dataloader generates &lt;em&gt;Euler Angles&lt;/em&gt; in radians associated with with the camera matrix rotation, as well as a 3D offset representing the camera’s position.&lt;/p&gt; &lt;p&gt;You will notice that the images suffer from an artifact whereby the pixels are not adjacent to each other but rather have black space between them; a production implementation of this solution would require interpolation between pixels in order to produce a continuous image.&lt;/p&gt; &lt;p&gt;An example of a single generated image is shown below; it is the original image, above, viewed from the perspective of a random camera matrix:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Single datapoint from data loader. &lt;/div&gt; &lt;p&gt;A batch of generated images is shown below:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; A batch from the data loader. &lt;/div&gt; &lt;p&gt;Again, you can see that owing to a lack of interpolation, the pixels are spread out, with black space between them.&lt;/p&gt; &lt;h2 id=&quot;dnn-architecture&quot;&gt;DNN architecture&lt;/h2&gt; &lt;p&gt;The DNN architecture is an MLP with 6 hidden layers of width 512, 256 and 128.&lt;/p&gt; &lt;p&gt;The input is a 224x224 image with 3 color channels representing the view of the source image from an orientation determined by the (unknown) camera extrinsic parameters.&lt;/p&gt; &lt;p&gt;The architecture outputs 6 logit values values corresponding to predictions of 3 Euler angles and 3 positional offsets for the camera extrinsic matrix.&lt;/p&gt; &lt;p&gt;For this project, I experimented with the sinusoidal activation functions described in the SIREN&lt;d-cite key=&quot;sitzmann2020implicit&quot;&gt;&lt;/d-cite&gt; paper. Sinusoidal activation functions, combined with MLPs, were previously shown to be more effective at capturing high-frequency information in radiance fields, compared to ReLU MLPs. I employed sinusoidal activation functions in this work in the hopes of more effectively capturing high-frequency variation in the relationship between camera extrinsic parameters and camera image pixels.&lt;/p&gt; &lt;p&gt;One question which might arise is, if the DNN outputs logits, how do I account for the difference in statistical characteristics between the three Euler Angle outputs and the three translation vector outputs? I employed scikitlearn StandardScalers at both the input and the output in order to normalize image pixels and extrinsic camera matrix parameters, respectively. The use of normalization at the input is standard. The use of normalization at the output allows each dimension of the 6-logit output to learn a zero-mean, unit-variance distribution: the output StandardScaler converts from zero-mean, unit-variance to the estimated actual mean and variance of the target distribution. The way the output StandardScaler is computed is as follows: a batch of random data is sampled from the dataloader; mean and variance are computed; then a StandardScaler is designed such that its &lt;em&gt;inverse&lt;/em&gt; maps from the computed mean and variance of the target extrinsics, to zero mean/unit-variance. Thus, run forward, the output StandardScaler will map from unit gaussian to the computed mean and variance.&lt;/p&gt; &lt;h2 id=&quot;training-setup&quot;&gt;Training setup&lt;/h2&gt; &lt;p&gt;I train for 80 epochs with an Adam optimizer and a learning rate of 0.00001.&lt;/p&gt; &lt;p&gt;MSE loss is employed for training and evaluation. The extrinsic parameters predicted by the DNN are compared against the target (correct) extrinsic parameters which the dataloader used to generate the camera image of the scene. Recall from the previous section that, owing to the output StandardScaler, the DNN outputs 6 roughly zero-mean/unit-variance predicted camera extrinsic parameters. I chose to evaluate loss &lt;em&gt;relative to these zero-mean/unit-variance predictions&lt;/em&gt;, prior to the output StandardScaler; the rationale being that I wanted each extrsinsic parameter to have equal weighting in the MSE loss computation, and not be biased by the mean/variance of the particular parameter. Thus, I use the output StandardScaler in &lt;em&gt;inverse&lt;/em&gt; mode to normalize the target values to zero-mean/unit-variance. MSE loss is then computed between the DNN output logits, and these normalized target values.&lt;/p&gt; &lt;p&gt;A side-effect of computing MSE against normalized values, is that it is effectively a relative measure: MSE tells me how large the variance in the error between predictions and target is, relative to the unit-variance of the normalized target values. Thus I expect that an MSE much less than one is a good heuristic for the quality of the estimate.&lt;/p&gt; &lt;h1 id=&quot;training-results&quot;&gt;Training results&lt;/h1&gt; &lt;p&gt;The plot below shows that the DNN architecture was able to converge on low-MSE predictions of the extrinsic camera matrix:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Train and test MSE loss between the predicted and actual extrinsic camera matrix. &lt;/div&gt; &lt;p&gt;Note that the train and test curves overlap almost perfectly; this is because all datapoints generated by the dataloader are random, so in fact the model is constantly being trained on fresh data, and the resampling is really unnecessary.&lt;/p&gt; &lt;p&gt;Since the final MSE is relatively small (0.020), and since (as described in the previous section) the MSE is effectively a relative measure of error, I believe the DNN is learning a relatively good estimate of camera extrinsics.&lt;/p&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;p&gt;Based on the low MSE attained during training, I believe I successfully trained a DNN to roughly estimate camera extrinsics from orientation-dependent camera views.&lt;/p&gt; &lt;p&gt;There are many improvements which would be necessary in order to deploy this in production.&lt;/p&gt; &lt;p&gt;For example, it would be better to use more detailed satellite imagery, preferably with stereoscopic views that effectively provide 3D information. Without having 3D information about the scene, it is hard to train the model to recognize how the scene will look from different angles. In my work, I used a 2D image and essentially assumed that the height of the geographic features in the image was negligible, such that I could approximate the 3D point-cloud as lying within a 2D plane. With stereoscopic satellite data, it could be possible to construct a truly 3D point-cloud, on which basis I could synthesize more accurate camera views during the training process.&lt;/p&gt; &lt;p&gt;Also, as discussed in the Implementation section, it would likely be necessary to implement interpolation between the pixels when generating simulated camera views. Otherwise, the camera views during training would look nothing like what the camera would see in the real world.&lt;/p&gt; </content> </entry> <entry> <title>Tracing the Seeds of Conflict: Advanced Semantic Parsing Techniques for Causality Detection in News Texts</title> <link href="https://deep-learning-mit.github.io/blog/2023/conflict-causality/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/conflict-causality</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;blockquote&gt; &lt;p&gt;&lt;em&gt;“In the complex world of political conflicts, understanding the underlying dynamics can often feel like trying to solve a puzzle with missing pieces. This project attempts to find those missing pieces through a novel approach that combines the insights of qualitative research with the precision of quantitative analysis.”&lt;/em&gt;&lt;/p&gt; &lt;/blockquote&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img1_map-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img1_map-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img1_map-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-conflict-causality/img1_map.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p align=&quot;center&quot; style=&quot;color: white; font-style: italic; font-weight: bold;&quot;&gt;Retrieved from https://conflictforecast.org&lt;/p&gt; &lt;p&gt;Political conflicts are multifaceted and dynamic, posing significant challenges for researchers attempting to decode their intricate patterns. Traditional methods, while insightful, often grapple with the dual challenges of scale and specificity. This project embarks on an innovative journey to bridge this gap, leveraging a frame-semantic parser to illustrate its applicability for the task and to discuss an approach to achieve domain-specificity for the model using semantic similarity. By synthesizing the depth of qualitative research into the scalability of quantitative methods, we aim to contribute to more informed analyses and actions in low-resource, low-tech domains like conflict studies.&lt;/p&gt; &lt;p&gt;On this journey, the projects key contributions are:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Advancing Frame-Semantic Parsing in Conflict Research&lt;/strong&gt;: We introduce the frame-semantic parser, a method that brings a high degree of explainability to conflict studies. Particularly when used in conjunction with news articles, this parser emerges as a powerful tool in areas where data is scarce, enabling deeper insights into the nuances of political conflicts.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Harnessing Semantic Similarity for Domain Attunement&lt;/strong&gt;: The project underscores the significance of semantic similarity analysis as a precursor to frame-semantic parsing. This approach finely tunes the parser to specific thematic domains, addressing the gaps often present in domain distribution of common data sources. It illustrates how tailoring the parser input can yield more contextually relevant insights.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Demonstrating Domain-Dependent Performance in Frame-Semantic Parsing&lt;/strong&gt;: We delve into the impact of thematic domains on the performance of a transformer-based frame-semantic parser. The research highlights how the parser’s effectiveness varies with the domain of analysis, primarily due to biases and structural peculiarities in the training data. This finding is pivotal for understanding the limitations and potential of semantic parsing across different contexts.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Developing Domain-Specific Performance Metrics&lt;/strong&gt;: In environments where additional, domain-specific labeled test data is scarce, the project proposes an intuitive method to derive relevant performance metrics. This approach not only aligns the evaluation more closely with the domain of interest but also provides a practical solution for researchers working in resource-constrained settings.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;literature-background&quot;&gt;Literature Background&lt;/h2&gt; &lt;h3 id=&quot;qualitative-research-on-conflicts&quot;&gt;Qualitative Research on Conflicts&lt;/h3&gt; &lt;p&gt;Qualitative research has long been a cornerstone in the study of political conflicts. This body of work, now well-established, emphasizes the unique nature of each conflict, advocating for a nuanced, context-specific approach to understanding the drivers and dynamics of conflicts. Researchers in this domain have developed a robust understanding of the various pathways that lead to conflicts, highlighting the importance of cultural, historical, and socio-political factors in shaping these trajectories. While rich in detail and depth, this approach often faces challenges in scalability and systematic analysis across diverse conflict scenarios.&lt;/p&gt; &lt;h3 id=&quot;the-role-of-quantitative-methods&quot;&gt;The Role of Quantitative Methods&lt;/h3&gt; &lt;p&gt;The emergence of computational tools has spurred a growing interest in quantitative approaches to conflict research. These methods primarily focus on predicting the severity and outcomes of ongoing conflicts, with some success &lt;d-cite key=&quot;beck2000improving&quot;&gt;&lt;/d-cite&gt;. However, the onset of conflicts remains challenging to predict, indicating a need for more sophisticated tools and methodologies. Quantitative methods provide scalability and a degree of objectivity but often fail to capture the complexities and evolving nature of conflicts. &lt;d-cite key=&quot;goldstein1992conflict&quot;&gt;&lt;/d-cite&gt;’s work on a conflict-cooperation scale illustrates the difficulty in quantifying conflict dynamics and the controversy in creating aggregate time series from event data. &lt;d-cite key=&quot;vesco2022united&quot;&gt;&lt;/d-cite&gt; highlight the importance of diverse, accurate predictions in conflict forecasting, noting the value of incorporating contextual variables to predict early signals of escalation.&lt;/p&gt; &lt;h3 id=&quot;bridging-the-gap-with-explainable-modeling-approaches&quot;&gt;Bridging the Gap with Explainable Modeling Approaches&lt;/h3&gt; &lt;p&gt;The challenge now lies in bridging the insights from qualitative research with the systematic, data-driven approaches of quantitative methods. While the former provides a deep understanding of conflict pathways, the latter offers tools for large-scale analysis and prediction. The key to unlocking this synergy lies in developing advanced computational methods to see the smoke before the fire – identifying the early precursors and subtle indicators of impending conflicts &lt;d-cite key=&quot;vesco2022united&quot;&gt;&lt;/d-cite&gt;. This approach aligns with the evolving needs of conflict research, where traditional models may not adequately address the complex and non-linear nature of conflict data &lt;d-cite key=&quot;weidmann2023recent&quot;&gt;&lt;/d-cite&gt;. &lt;d-cite key=&quot;mueller2018reading&quot;&gt;&lt;/d-cite&gt; demonstrate the potential of utilizing newspaper text for predicting political violence, suggesting a novel data source for uncovering early conflict indicators. However, these early attempts are outdated given the fast technological development in recent years, particularly in the field of natural language processing. This research endeavour seeks to fill that gap and introduce a scalable, explainable method to quantitative conflict research.&lt;/p&gt; &lt;h2 id=&quot;data&quot;&gt;Data&lt;/h2&gt; &lt;p&gt;The project capitalizes on the premise that risk factors triggering a conflict, including food crises, are frequently mentioned in on-the-ground news reports before being reflected in traditional risk indicators, which can often be incomplete, delayed, or outdated. By harnessing newspaper articles as a key data source, this initiative aims to identify these causal precursors more timely and accurately than conventional methods.&lt;/p&gt; &lt;h3 id=&quot;news-articles-as-data-source&quot;&gt;News Articles as Data Source&lt;/h3&gt; &lt;p&gt;News articles represent a valuable data source, particularly in research domains where timely and detailed information is crucial. In contrast to another “live” data source that currently revels in popularity amongst researchers - social media data - news articles are arguably less prone to unverified narratives. While news articles typically undergo editorial checks and balances, ensuring a certain level of reliability and credibility, they certainly do not withstand all potential biases and are to be handled with caution - as arguably every data source. To counteract potential biases of individual news outputs, accessing a diverse range of news sources is essential. Rather than having to scrape or otherwise collect data on news articles, there is a set of resources available:&lt;/p&gt; &lt;ul&gt; &lt;li&gt; &lt;p&gt;&lt;a href=&quot;https://newsapi.org/&quot;&gt;NewsAPI&lt;/a&gt;: This platform provides convenient access to a daily limit of 100 articles, offering diverse query options. Its integration with a Python library streamlines the process of data retrieval. However, the limitation lies in the relatively small number of data points it offers, potentially restricting the scope of analysis.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;a href=&quot;https://www.gdeltproject.org/&quot;&gt;GDELT Database&lt;/a&gt;: Renowned for its vast repository of historical information spanning several decades, GDELT stands as a comprehensive data source. Its extensive database is a significant asset, but similar to NewsAPI, it predominantly features article summaries or initial sentences rather than complete texts, which may limit the depth of analysis.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;a href=&quot;https://www.dowjones.com/professional/factiva/&quot;&gt;Factiva&lt;/a&gt;: A premium service that grants access to the complete bodies of articles from a plethora of global news sources in multiple languages. While offering an exhaustive depth of data, this resource comes with associated costs, which may be a consideration for budget-constrained projects.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;a href=&quot;https://paperswithcode.com/dataset/realnews&quot;&gt;RealNews&lt;/a&gt;: As a cost-free alternative, this dataset encompasses entire newspaper articles collated between 2016 and 2019. Selected for this project due to its unrestricted accessibility and comprehensive nature, it provides a substantial set of articles, making it a valuable resource for in-depth analysis.&lt;/p&gt; &lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;descriptive-analysis-of-the-data&quot;&gt;Descriptive Analysis of the Data&lt;/h3&gt; &lt;p&gt;The analysis delved into a selected subset of &lt;strong&gt;120,000 articles&lt;/strong&gt; from the &lt;a href=&quot;https://paperswithcode.com/dataset/realnews&quot;&gt;RealNews&lt;/a&gt; open-source dataset. This subset was chosen randomly to manage the extensive scope of the complete dataset within the project’s time constraints. Each article in this subset provided a rich array of information, including &lt;strong&gt;url&lt;/strong&gt;, &lt;strong&gt;url_used&lt;/strong&gt;, &lt;strong&gt;title&lt;/strong&gt;, &lt;strong&gt;text&lt;/strong&gt;, &lt;strong&gt;summary&lt;/strong&gt;, &lt;strong&gt;authors&lt;/strong&gt;, &lt;strong&gt;publish_date&lt;/strong&gt;, &lt;strong&gt;domain&lt;/strong&gt;, &lt;strong&gt;warc_date&lt;/strong&gt;, and &lt;strong&gt;status&lt;/strong&gt;.&lt;/p&gt; &lt;p&gt;The range of articles spans from 1869 to 2019, but for focused analysis, we narrowed the scope to articles from &lt;strong&gt;January 2016 through March 2019&lt;/strong&gt;. This temporal delimitation resulted in a dataset comprising &lt;strong&gt;58,867 articles&lt;/strong&gt;. These articles originated from an expansive pool of &lt;strong&gt;493 distinct news outlets&lt;/strong&gt;, offering a broad perspective on global events and narratives. The distribution of these articles across the specified time frame provides the expected observation of increasing news reporting, as visualized below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img4_articlecounts-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img4_articlecounts-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img4_articlecounts-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-conflict-causality/img4_articlecounts.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p align=&quot;center&quot; style=&quot;color: white; font-style: italic; font-weight: bold;&quot;&gt;Counts of Articles over Time&lt;/p&gt; &lt;p&gt;To understand the content of our dataset’s news articles better, we utilized the &lt;em&gt;TfidfVectorizer&lt;/em&gt;, a powerful tool that transforms text into a numerical representation, emphasizing key words based on their frequency and distinctiveness within the dataset. To ensure focus on the most relevant terms, we filtered out commonly used English stopwords. The &lt;em&gt;TfidfVectorizer&lt;/em&gt; then generated a &lt;em&gt;tf-idf matrix&lt;/em&gt;, assigning weights to words that reflect their importance in the overall dataset. By summing the Inverse Document Frequency (IDF) of each term, we obtained the adjusted frequencies that helped identify the most influential words in our corpus. To visually represent these findings, we created a word cloud (see below), where the size of each word correlates with its relative importance.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img3_wordcloud-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img3_wordcloud-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img3_wordcloud-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-conflict-causality/img3_wordcloud.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p align=&quot;center&quot; style=&quot;color: white; font-style: italic; font-weight: bold;&quot;&gt;Word Cloud for Entire News Article Dataset (tf-idf adjusted)&lt;/p&gt; &lt;h2 id=&quot;methodology&quot;&gt;Methodology&lt;/h2&gt; &lt;p&gt;We showcase the applicability of a frame-semantic parsing to the study of conflicts and inform the model with domain-specific seed phrases identified through semantic similarity analysis. This approach not only demonstrates the effectiveness of the method in conflict studies but also showcases how domain-specific applications of deep learning tasks can be accurately applied and measured. Thus, we not only validate the utility of frame-semantic parsing in conflict analysis but also explore innovative ways to tailor and evaluate domain-specific performance metrics.&lt;/p&gt; &lt;h3 id=&quot;the-frame-semantic-parser&quot;&gt;The Frame-Semantic Parser&lt;/h3&gt; &lt;h4 id=&quot;contextualizing-the-approach&quot;&gt;Contextualizing the Approach&lt;/h4&gt; &lt;p&gt;In the pursuit of bridging the gap between the robust theoretical understanding of conflict dynamics and the practical challenges in data availability, the frame-semantic parser emerges as a promising methodological tool. In a recent study (&lt;d-cite key=&quot;balashankar2023predicting&quot;&gt;&lt;/d-cite&gt;), a team of researchers established a proof-of-concept via its successful application of a frame-semantic parser for the study of food insecurity - a field with similar challenges surrounding data access and quality. While this study relied on what can now be considered the “old state-of-the-art,” our proposed approach diverges towards a more contemporary, transformer-based model, inspired by the advancements outlined in &lt;d-cite key=&quot;chanin2023open&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img2_parser-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img2_parser-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img2_parser-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-conflict-causality/img2_parser.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p align=&quot;center&quot; style=&quot;color: white; font-style: italic; font-weight: bold;&quot;&gt;Retrieved from https://github.com/swabhs/open-sesame&lt;/p&gt; &lt;h4 id=&quot;how-does-a-frame-semantic-parser-work&quot;&gt;How Does a Frame-Semantic Parser Work?&lt;/h4&gt; &lt;p&gt;At the heart of frame-semantic parsing, as conceptualized by &lt;d-cite key=&quot;gildea2002frame&quot;&gt;&lt;/d-cite&gt; and formalized by the FrameNet project &lt;d-cite key=&quot;baker1998framenet&quot;&gt;&lt;/d-cite&gt;, is the identification of structured semantic frames and their arguments from natural language text. As illustrated above, these frames encapsulate events, relations, or situations along with their participants, making it a critical tool in natural language understanding (NLU) tasks. The practical applications of frame semantics are broad, ranging from voice assistants and dialog systems &lt;d-cite key=&quot;chen2013dialog&quot;&gt;&lt;/d-cite&gt; to complex text analysis &lt;d-cite key=&quot;zhao2023text&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;The process of frame-semantic parsing constitutes three subtasks:&lt;/p&gt; &lt;ul&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Trigger Identification&lt;/strong&gt;: This initial step involves pinpointing locations in a sentence that could potentially evoke a frame. It’s a foundational task that sets the stage for more detailed analysis.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Frame Classification&lt;/strong&gt;: Following trigger identification, each potential trigger is analyzed to classify the specific FrameNet frame it references. This task is facilitated by leveraging lexical units (LUs) from FrameNet, which provide a strong indication of potential frames.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Argument Extraction&lt;/strong&gt;: The final task involves identifying the frame elements and their corresponding arguments within the text. This process adds depth to the frame by fleshing out its components and contextualizing its application within the sentence.&lt;/p&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;While frame-semantic parsers have arguably not received as much attention as other language modeling methods, three major contributions of the past few years can be highlighted. &lt;d-cite key=&quot;swayamdipta2017frame&quot;&gt;&lt;/d-cite&gt;’s approach - which is still outperforming many other implementations - presented an efficient parser with softmax-margin segmental RNNs and a syntactic scaffold. It demonstrates that syntax, while beneficial, is not a necessity for high-performance frame-semantic parsing. &lt;d-cite key=&quot;kalyanpur2020open&quot;&gt;&lt;/d-cite&gt; explores the application of transformer-based architectures to frame semantic parsing, employing a multi-task learning approach that significantly improves upon previous state-of-the-art results. Most recently, &lt;d-cite key=&quot;chanin2023open&quot;&gt;&lt;/d-cite&gt; developed the first open-source approach - treating frame semantic parsing as a sequence-to-sequence text generation task, utilizing a T5 transformer model. It emphasizes the importance of pretraining on related datasets and employing data augmentations for improved performance. The distinctive strength of a frame-semantic parser lies in its ability to contextualize information, rather than interpreting it in isolation. This feature is particularly invaluable in conflict analysis, where the semantics of discourse play a critical role.&lt;/p&gt; &lt;h4 id=&quot;implementation-of-the-frame-semantic-parser&quot;&gt;Implementation of the Frame-Semantic Parser&lt;/h4&gt; &lt;p&gt;The implementation of our frame-semantic parser involves several key steps. We begin by splitting our text data into sentences using a &lt;em&gt;split_into_sentences&lt;/em&gt; function. This granular approach allows us to focus on individual narrative elements within the articles and since frame-semantic parsers are reported to perform better on sentence-level &lt;d-cite key=&quot;chanin2023open&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;swayamdipta2017frame&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;In the heart of our methodology, we utilize various functions to extract and filter relevant frames from the text. Our &lt;em&gt;extract_features&lt;/em&gt; function captures the full text of each frame element, ensuring a comprehensive analysis of the semantic content. The &lt;em&gt;filter_frames&lt;/em&gt; function then refines this data, focusing on frames that are explicitly relevant to conflict, as informed by research on causal frames in FrameNet.&lt;/p&gt; &lt;p&gt;To optimize the performance of our transformer-based parser, we build a &lt;em&gt;process_batch&lt;/em&gt; function. This function handles batches of sentences, applying the frame semantic transformer model to detect and filter frames relevant to our study.&lt;/p&gt; &lt;p&gt;Our approach also includes a careful selection of specific frames related to causality and conflict as we are interested in these frames and not just any. We rely on both manually identified frame names (informed by &lt;d-cite key=&quot;vieu2016a&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;vieu2020a&quot;&gt;&lt;/d-cite&gt;) and pattern-based searches in &lt;strong&gt;FrameNet&lt;/strong&gt; to compile a comprehensive list of relevant frames. This curated set of frames is instrumental in identifying the nuanced aspects of conflict narratives within the news articles.&lt;/p&gt; &lt;p&gt;The implementation is designed to be efficient and scalable, processing large batches of sentences and extracting the most relevant semantic frames. This approach enables us to parse and analyze a substantial corpus of news articles, providing a rich dataset for our conflict analysis.&lt;/p&gt; &lt;h3 id=&quot;seed-selection-via-semantic-similarity-analysis-to-inform-causal-modeling&quot;&gt;Seed Selection via Semantic Similarity Analysis to Inform Causal Modeling&lt;/h3&gt; &lt;h4 id=&quot;understanding-semantic-similarity&quot;&gt;Understanding Semantic Similarity&lt;/h4&gt; &lt;p&gt;Semantic similarity plays a pivotal role in our methodology, serving as the foundation for expanding our understanding of how conflict is discussed in news articles. By exploring the semantic relationships between words and phrases, we can broaden our analysis to include a diverse array of expressions and viewpoints related to conflict. This expansion is not merely linguistic; it delves into the conceptual realms, uncovering varying narratives and perspectives that shape the discourse on conflict.&lt;/p&gt; &lt;h4 id=&quot;how-do-we-compute-semantic-similarity&quot;&gt;How Do We Compute Semantic Similarity?&lt;/h4&gt; &lt;p&gt;To compute semantic similarity and refine our seed phrases, we employ a combination of distance calculation and cosine similarity measures. We begin with a set of initial key phrases &lt;strong&gt;conflict&lt;/strong&gt;, &lt;strong&gt;war&lt;/strong&gt;, and &lt;strong&gt;battle&lt;/strong&gt;, ensuring they capture the core essence of our thematic domain. We then leverage pretrained word embeddings from the &lt;em&gt;Gensim&lt;/em&gt; library to map these phrases into a high-dimensional semantic space. We also experimented with more sophisticated embedding approaches (like transformer-based) to compute the semantic similarity and thus obtain the seeds. When trading off complexity/time and performance the simpler pretrained &lt;em&gt;Gensim&lt;/em&gt; model preservered.&lt;/p&gt; &lt;p&gt;Our methodology involves generating candidate seeds from our corpus of documents, including unigrams, bigrams, and trigrams, with a focus on those containing key words related to conflict. We filter these candidates based on their presence in the word vectors vocabulary, ensuring relevance and coherence with our seed phrases.&lt;/p&gt; &lt;p&gt;Using functions like &lt;em&gt;calculate_distances&lt;/em&gt; and &lt;em&gt;calculate_cosine_similarity&lt;/em&gt;, we measure the semantic proximity of these candidates to our initial seed phrases. This process involves averaging the distances or similarities across the seed phrases for each candidate, providing a nuanced understanding of their semantic relatedness.&lt;/p&gt; &lt;p&gt;The candidates are then ranked based on their similarity scores, with the top candidates selected for further analysis. This refined set of seed phrases, after manual evaluation and cleaning, forms the basis of our domain-specific analysis, guiding the frame-semantic parsing process towards a more focused and relevant exploration of conflict narratives.&lt;/p&gt; &lt;h3 id=&quot;domain-specific-metrics&quot;&gt;Domain-Specific Metrics&lt;/h3&gt; &lt;p&gt;In the final stage of our methodology, we integrate the identified seed phrases into the frame-semantic parser’s analysis. By comparing the model’s performance on a general set of sentences versus a subset containing at least one seed phrase, we assess the model’s domain-specific efficacy. This comparison not only highlights the general capabilities of large language models (LLMs) but also underscores their potential limitations in domain-specific contexts.&lt;/p&gt; &lt;p&gt;Our approach offers a pragmatic solution for researchers and practitioners in low-resource settings. We demonstrate that while general-purpose LLMs are powerful, they often require fine-tuning for specific domain applications. By utilizing identified domain-specific keywords to construct a tailored test dataset, users can evaluate the suitability of general LLMs for their specific needs.&lt;/p&gt; &lt;p&gt;In cases where technical skills and resources allow, this domain-specific dataset can serve as an invaluable tool for further refining the model through data augmentation and fine-tuning. Our methodology, therefore, not only provides a robust framework for conflict analysis but also lays the groundwork for adaptable and efficient use of advanced NLP tools in various thematic domains.&lt;/p&gt; &lt;p&gt;We present the results for these domain-specific measure for &lt;strong&gt;F1 score&lt;/strong&gt;, &lt;strong&gt;recall&lt;/strong&gt;, and &lt;strong&gt;precisions&lt;/strong&gt;. Likewise, to illustrate performance differences across domains, we conducted the entire approach also for the finance domain, starting with the keywords &lt;strong&gt;finance&lt;/strong&gt;, &lt;strong&gt;banking&lt;/strong&gt;, and &lt;strong&gt;economy&lt;/strong&gt;.&lt;/p&gt; &lt;h2 id=&quot;findings--insights&quot;&gt;Findings &amp;amp; Insights&lt;/h2&gt; &lt;h3 id=&quot;frame-semantic-parser-identifies-causal-frames-reliably&quot;&gt;Frame-Semantic Parser Identifies Causal Frames Reliably&lt;/h3&gt; &lt;p&gt;In this stage, we assess if the methodology is truly applicable to the domain of conflicts and for the use with news article data. We find that of our 37 identified cause-effect related frames, all are represented with various instances in our dataset. In fact, as few as 1,600 randomly selected news articles (processed in 100 batches of 16 batch samples) suffice to cover all cause-effect related frames. Therefore, for this intermediate step of the project, we gather support that the parser is in-fact applicable to news article data.&lt;/p&gt; &lt;h3 id=&quot;differences-in-seed-phrase-selection&quot;&gt;Differences in Seed Phrase Selection&lt;/h3&gt; &lt;p&gt;We make one major observation between the results of the finance- versus conflict-specific seed selection for downstream use. Potentially driven by the fact that conflicts are drastically driven by geographic labels and information, a number of the top 50 seed phrases were geographic terms like “Afghanistan.” Since we did not want to bias the downstream evaluation of our domain-specific metrics we excluded these seed phrases and continued the analysis with 34 seeds. In contrast, the top 50 finance-specific seed phrases obtained from the semantic analysis were neither geographic nor linked to individual (financial) historic events, wherefore we continued the downstream analysis with all top 50 seed phrases. Already here we can observe the deviances across domains, given more support to the idea of domain-specific evaluation and metrics.&lt;/p&gt; &lt;h3 id=&quot;employing-domain-specific-performance-metrics&quot;&gt;Employing Domain-Specific Performance Metrics&lt;/h3&gt; &lt;p&gt;Our research involved an extensive evaluation of the frame-semantic parser, based on a transformer architecture, across various configurations and domain-specific datasets. We began by rebuilding and training the model using the vanilla code and a smaller model size without hyperparameter tuning. Subsequently, we fine-tuned the hyperparameters to match the baseline performance levels. After this, we move to one of the main contributions of this project: the domain-specific evaluation. The evaluation was carried out on domain-specific validation and test datasets, curated using seed words from &lt;strong&gt;finance&lt;/strong&gt; and &lt;strong&gt;conflict&lt;/strong&gt; domains to highlight differences across domains.&lt;/p&gt; &lt;p&gt;The untuned model (&lt;em&gt;validation n = 646, test n = 1891&lt;/em&gt;) showed an argument extraction &lt;strong&gt;F1 score of 0.669&lt;/strong&gt; and a &lt;strong&gt;loss of 0.181&lt;/strong&gt; on the validation set. On the test set, it presented a slightly similar &lt;strong&gt;F1 score of 0.669&lt;/strong&gt; and a &lt;strong&gt;loss of 0.227&lt;/strong&gt;. Hyperparameter-Tuned Performance&lt;/p&gt; &lt;p&gt;Post hyperparameter tuning, there was a notable improvement in the model’s validation performance (&lt;em&gt;n = 156&lt;/em&gt;), with the &lt;strong&gt;F1 score for frame classification reaching as high as 0.873&lt;/strong&gt;, and the &lt;strong&gt;precision for trigger identification at 0.818&lt;/strong&gt;. The test metrics (&lt;em&gt;n = 195&lt;/em&gt;) also showed consistent enhancement, with the &lt;strong&gt;F1 score for frame classification at 0.864&lt;/strong&gt; and &lt;strong&gt;trigger identification precision at 0.747&lt;/strong&gt;.&lt;/p&gt; &lt;p&gt;When evaluated on domain-specific datasets, &lt;strong&gt;the model exhibited varying degrees of effectiveness&lt;/strong&gt; which showcases our assumption that domains matter to the applicability of LLMs to domain-specific tasks and that our simple proposed way of generating domain-specific metrics can give insights on that. For the conflict keywords (&lt;em&gt;validation n = 121, test n = 255&lt;/em&gt;), the model achieved a &lt;strong&gt;validation F1 score of 0.865 for frame classification and 0.764 for trigger identification precision&lt;/strong&gt;. However, for the finance domain (&lt;em&gt;validation n = 121, test n = 255&lt;/em&gt;), the &lt;strong&gt;F1 score for frame classification was slightly higher at 0.878&lt;/strong&gt;, while the &lt;strong&gt;trigger identification precision was lower at 0.781&lt;/strong&gt; compared to the conflict domain.&lt;/p&gt; &lt;p&gt;The results indicate that the hyperparameter-tuned model significantly outperforms the vanilla model across all metrics. Additionally, domain-specific tuning appears to have a considerable impact on the model’s performance, with the finance domain showing slightly better results in certain metrics compared to the conflict domain. These insights could be pivotal for further refinements and targeted applications of the frame-semantic parser in natural language processing tasks. Moreover, these observation fit our general understanding of the two domains. Reports on conflicts are likely to discuss the involved parties’ reasons for specific actions like attacks on certain targets. Additionally, the actions in conflicts are arguably more &lt;strong&gt;triggering&lt;/strong&gt; events than “the good old stable economy.” Certainly, this research project can only be the beginning of a more rigorous assessment, but these findings show great promise of the idea of &lt;strong&gt;generating and evaluating simple, domain-specific performance metrics&lt;/strong&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img5_performance-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img5_performance-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-conflict-causality/img5_performance-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-conflict-causality/img5_performance.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p align=&quot;center&quot; style=&quot;color: white; font-style: italic; font-weight: bold;&quot;&gt;Performance Evaluation of Frame-Semantic Parser&lt;/p&gt; &lt;h2 id=&quot;conclusion--limitations&quot;&gt;Conclusion &amp;amp; Limitations&lt;/h2&gt; &lt;p&gt;This project has embarked on an innovative journey, merging advanced natural language processing techniques with the intricate study of conflict. By harnessing the power of a transformer-based frame-semantic parser and integrating semantic similarity analysis, we have made significant strides in identifying causal relationships within news articles. This methodology has not only illuminated the dynamics of conflict as portrayed in media but also demonstrated the adaptability and potential of frame-semantic parsing in domain-specific applications.&lt;/p&gt; &lt;h3 id=&quot;key-findings&quot;&gt;Key Findings&lt;/h3&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Utility of Frame-Semantic Parsing&lt;/strong&gt;: Our work has showcased the frame-semantic parser as a valuable and explainable tool, particularly effective in data-scarce environments like conflict research. Its ability to contextualize information and discern nuanced semantic relationships makes it an indispensable asset in understanding complex thematic domains.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Semantic Similarity for Domain-Specific Perspective&lt;/strong&gt;: We illustrated the effectiveness of using semantic similarity to refine seed phrases, thereby tailoring the frame-semantic parser to the specific domain of conflict. This approach has proven to be a straightforward yet powerful means to customize advanced NLP models for targeted analysis.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Dependence on Domain for Model Performance&lt;/strong&gt;: Our findings highlight a significant insight: the performance of general-purpose language models can vary depending on the domain of application. This observation underscores the need for domain-specific tuning to achieve optimal results in specialized contexts.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Development of Domain-Specific Performance Metrics&lt;/strong&gt;: We proposed and validated a practical approach to developing domain-specific metrics, especially useful in resource-constrained environments. This methodology enables a nuanced evaluation of model performance tailored to specific thematic areas.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;limitations--future-research&quot;&gt;Limitations &amp;amp; Future Research&lt;/h3&gt; &lt;p&gt;Despite the promising results, our project is not without its limitations, which pave the way for future research opportunities:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Data Dependency&lt;/strong&gt;: The effectiveness of our approach is heavily reliant on the quality and diversity of the news article dataset. Biases in media reporting or limitations in the scope of articles can skew the analysis and affect the accuracy of the results. In an extended version of the project - and with funding - one could switch to the &lt;a href=&quot;https://www.dowjones.com/professional/factiva/&quot;&gt;Factiva&lt;/a&gt; dataset.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Applicability of Domain-Specificity to Other Themes&lt;/strong&gt;: While our method has shown efficacy in the context of conflict analysis, its applicability to other specific domains requires further exploration. Future research could test and refine our approach across various thematic areas to assess its broader utility.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Model Complexity and Interpretability&lt;/strong&gt;: While we have emphasized the explainability of the frame-semantic parser, the inherent complexity of transformer-based models can pose challenges in terms of scaling and deployment. Future work could focus on simplifying these models without compromising their performance - for instance via pruning and quantization.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Expansion of Semantic Similarity Techniques&lt;/strong&gt;: Our semantic similarity analysis was instrumental in refining seed phrases, but there is room for further enhancement. Incorporating more advanced semantic analysis techniques could yield even more precise and relevant seed phrases. While we found alternative methods, like BERT-based approaches to not yield significant improvements, ever more models flood the market.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Integration with Other Data Sources&lt;/strong&gt;: Expanding the dataset beyond news articles to include social media, governmental reports, or academic literature could provide a more holistic view of conflict narratives and their causal relations.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;In conclusion, our project represents a significant step forward in the intersection of natural language processing and conflict research. By addressing these limitations and building on our foundational work, future research can continue to push the boundaries of what is possible in this exciting and ever-evolving field.&lt;/p&gt; </content> </entry> <entry> <title>To Encode or Not To Encode: The Case for the Encoder-free Autodecoder Architecture</title> <link href="https://deep-learning-mit.github.io/blog/2023/autodecoders/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/autodecoders</id> <content type="html">&lt;h2 id=&quot;autodecoders&quot;&gt;Autodecoders&lt;/h2&gt; &lt;h3 id=&quot;introduction&quot;&gt;Introduction&lt;/h3&gt; &lt;p&gt;Autoencoders have been a part of the neural network landscape for decades, first proposed by LeCun in 1987. Today, many variants of the autoencoder architecture exist as successful applications in different fields, including computer vision and natural language processing, and the variational autoencoder remains among the forefront of generative modeling. Autoencoders are neural networks trained to reconstruct their input as their output via compression through dimensionality reduction, accomplishing this task with the use of an encoder-decoder network.&lt;/p&gt; &lt;p&gt;Autoencoders comprise of the encoder network, which takes a data sample input and translates it to a lower-dimensional latent representation consisting of only the most necessary features, and the decoder network, which attempts to reconstruct the original data from this encoding. By learning a compressed, distributed representation of the data, the latent space learned by autoencoders is usable for a plethora of downstream tasks.&lt;/p&gt; &lt;p&gt;With traditional autoencoders, both the encoder and decoder are trained, but for certain applications— particularly generative tasks— only the decoder is utilized for inference. Because the itself encoder is not used at test time, training an encoder may not be an effective use of computational resources; the autodecoder is an alternative architecture that operates without an encoder network and brings some novel benefits.&lt;/p&gt; &lt;p&gt;Rather than using the encoder to encode an input into a low-dimensional latent code, each sample in the training set begins with a randomly initialized latent code, and the latent codes and decoder weights are updated jointly during training time. For inference on new data, the latent vector for a given sample is then also randomly initialized and updated through an additional optimization loop with the decoder’s frozen weights.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/encoderdecoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/encoderdecoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/encoderdecoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/encoderdecoder.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The architecture for an autoencoder (top) compared to that of an autodecoder (bottom).&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Are explicit encoders necessary for image reconstruction? What are the unique benefits that come from using decoder-only architectures? One interesting application of autodecoders is the ability to reconstruct complete samples from partial inputs. The main focus of our research revolved around testing this ability, answering the question of how much of a sample is required for a complete reconstruction using an autodecoder given an expressive latent space, and comparing its performance to that of an autoencoder.&lt;/p&gt; &lt;p&gt;Furthermore, we discuss additional applications in various fields that other research has accomplished in part due to the utilization of the autodecoder architecture over the traditional autoencoder, with a focus on the beneficial properties that we explore in our experiments, including partial reconstructions.&lt;/p&gt; &lt;h3 id=&quot;related-work&quot;&gt;Related Work&lt;/h3&gt; &lt;p&gt;Different literature have utilized autodecoder frameworks in the past along with providing rationale for their usage, mainly for tasks related to reconstruction or generative modeling through representation learning. However, none have provided standalone examples of their use, something we aim to accomplish in this blog.&lt;/p&gt; &lt;p&gt;The Generative Latent Optimization framework was introduced by Bojanowski et al. (2019) as an alternative to the adversarial training protocol of GANs. Instead of producing the latent representation with a parametric encoder, the representation is learned freely in a non-parametric manner. One noise vector is optimized by minimizing a simple reconstruction loss and is mapped to each image in the dataset.&lt;/p&gt; &lt;p&gt;Tang, Sennrich, and Nivre (2019) trained encoder-free neural machine translation (NMT) models in an endeavor to produce more interpretable models. In the encoder-free model, the source was the sum of the word embeddings and the sinusoid embeddings (Vaswani et al., 2017), and the decoder was a transformer or RNN. The models without an encoder produced significantly poorer results; however, the word embeddings produced by encoder-free models were competitive to those produced by the default NMT models.&lt;/p&gt; &lt;p&gt;DeepSDF, a learned continuous Signed Distance Function (SDF) representation of a class of shapes, was introduced by Park et al. (2019) as a novel representation for generative 3D modelling. Autodecoder networks were used for learning the shape embeddings, trained with self-reconstruction loss on decoder-only architectures. These autodecoders simultaneously optimized the latent vectors mapping to each data point and the decoder weights through backpropogation. While outperforming previous methods in both space representation and completion tasks, autodecoding was significantly more time consuming during inference because of the explicit need for optimization over the latent vector.&lt;/p&gt; &lt;p&gt;Sitzmann et al. (2022) introduced a novel neural scene representation called Light Field Networks (LFNs), reducing the time and memory complexity of storing 360-degree light fields and enabling real-time rendering. 3D scenes are individually represented by their individual latent vectors that are obtained by using an autodecoder framework, but it is noted that this may not be the framework that performs the best. The latent parameters and the hypernetwork parameters are both optimized in the training loop using gradient descent; the LFN is conditioned on a single latent variable. Potential applications are noted to include enabling out-of-distribution through combining LFNs with local conditioning.&lt;/p&gt; &lt;p&gt;Scene Representation Networks (SRNs) represent scenes as continuous functions without knowledge of depth or shape, allowing for generalization and applications including few-shot reconstruction. SRNs, introduced by Sitzmann, Zollhöfer and Wetzstein (2019), represent both the geometry and appearance of a scene, and are able to accomplish tasks such as novel view synthesis and shape interpolation from unsupervised training on sets of 2D images. An autodecoder framework is used to find the latent vectors that characterize the different shapes and appearance properties of scenes.&lt;/p&gt; &lt;h3 id=&quot;methodology&quot;&gt;Methodology&lt;/h3&gt; &lt;h4 id=&quot;traditional-autoencoder&quot;&gt;Traditional Autoencoder&lt;/h4&gt; &lt;p&gt;To establish a baseline, we first trained a convolutional autoencoder network containing both an encoder and decoder on a version of the MNIST dataset normalized and padded to contain 32x32 sized images. For our autoencoder architecture, we utilized convolutional layers with ReLU nonlinearity.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoderloss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoderloss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoderloss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoderloss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The training and validation losses from the training loop for the autoencoder.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The latent space learned by the autoencoder, color-coded by digit label and visualized through a 2-dimensional t-SNE plot. We see the expected result, with consistency and separation.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;A sample output from an unseen image after training. We can see that our small convolutional autoencoder does a fairly good job at learning how to compress simple information into a single latent code and decode it into its original form.&lt;/figcaption&gt; &lt;/figure&gt; &lt;h4 id=&quot;autodecoder&quot;&gt;Autodecoder&lt;/h4&gt; &lt;p&gt;We implemented and trained an autodecoder on the same dataset by creating a convolutional decoder that takes latent codes as an input and transforms them into full images. We utilized transpose convolutions to upscale the images while additionally concatenating normalized coordinates to embed positional information, and also used leaky ReLU layers for nonlinearity.&lt;/p&gt; &lt;p&gt;For training, the latent codes for 10,000 images in our training set were randomly initialized. The loss for our autodecoder then included three components: the reconstruction loss; the latent loss, which encourages latent values to be closer to zero in order to encourage a compact latent space; and the L2 weight regularization, which prevents the decoder from overfitting to the training set by encouraging the model weights to be sparse.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/lossfn-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/lossfn-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/lossfn-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/lossfn.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The loss function used to train the autodecoder. During inference, a custom loss function can be used based on the application.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecoderloss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecoderloss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecoderloss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecoderloss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The training and validation losses from the training loop for the autodecoder. The validation loss has no actual meaning in the autodecoder framework, as new images would have a randomly initialized latent code and so would output nonsense. This loss was included simply to demonstrate this feature.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Below are progressive reconstructions on the training data performed by the autodecoder as it trained and optimized both the decoder weights and the training set’s latent codes. We can observe that the digits’ general forms were learned before the exact shapes, which implies good concentration and consistency of the latent space between digits of the same class.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/progress1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/progress2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/progress3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/progress4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/progress4.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Progressive reconstructions from top to bottom (model outputs compared to ground truth): 1. Decoding a randomly initialized latent code outputs nonsense. 2. The correct digit is reconstructed, implying that the latent space is improving, but the specific shape differs from that of the ground truth image. 3. The output’s shape begins to better match that of the ground truth. 4. The autodecoder and latent code are optimized to be able to effectively reconstruct the ground truth image.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The latent space learned by the autodecoder, also visualized through a 2-dimensional t-SNE plot. When compared to the t-SNE plot representing the latent space learned by the autoencoder, we again see consistency but notice that here the clusters are more compact. While the distance between clusters in t-SNE plots does not have a definite meaning, this could potentially imply that the features of shapes, rather than the shapes themselves, are better learned, as different digits share similar features (curves, straight lines, etc).&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Upon training the autodecoder, for inference on a new image we first freeze the decoder weights and then run an additional gradient descent-based optimization loop over a new randomly initialized latent code with reconstruction loss.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Output from the trained autodecoder on a new image from the test set.&lt;/figcaption&gt; &lt;/figure&gt; &lt;h3 id=&quot;experimentation&quot;&gt;Experimentation&lt;/h3&gt; &lt;p&gt;One benefit of the autodecoder framework is that because we have an additional optimization loop for each input during inference, we are able to do varying pixel-level reconstructions, whereas an autoencoder is designed and trained to reconstruct complete images each time.&lt;/p&gt; &lt;p&gt;We demonstrate this feature in our experiments below by applying center masks to our images before autoencoding or decoding.&lt;/p&gt; &lt;p&gt;1: We trained a traditional &lt;strong&gt;autoencoder&lt;/strong&gt; with generic reconstruction loss, and input an image with a mask in the center. The output is expected, as the autoencoder learned to reconstruct whatever it saw, and so the empty space from the mask is included in the result.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7.png&quot; class=&quot;img-fluid&quot; width=&quot;50&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The input image&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The reconstructed image compared to the image without the mask.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;2: We trained a traditional &lt;strong&gt;autoencoder&lt;/strong&gt; with reconstruction loss without considering a centered square area and input an unmodified image. The output is again expected, as the autoencoder was trained to fully disregard the center area, and so the output is empty in that region.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4.png&quot; class=&quot;img-fluid&quot; width=&quot;50&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The area of the images that the autoencoder is trained to learn on.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The model output compared to the original image.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;3: We trained an &lt;strong&gt;autodecoder&lt;/strong&gt; with generic reconstruction loss, and during the optimization loop for inference we utilized a custom loss function that did not consider the masked area. However, in this case, we are still able to reconstruct the original image to varying levels of success because of the latent space we originally learned through the training loop.&lt;/p&gt; &lt;p&gt;Shown below are the areas optimized in the loss functions, along with the decoded output and original image.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/8x8mask_input-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/8x8mask_input-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/8x8mask_input-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/8x8mask_input.png&quot; class=&quot;img-fluid&quot; width=&quot;50&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/8x8mask_output-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/8x8mask_output-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/8x8mask_output-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/8x8mask_output.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;An 8x8 mask.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/12x12mask_input-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/12x12mask_input-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/12x12mask_input-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/12x12mask_input.png&quot; class=&quot;img-fluid&quot; width=&quot;50&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;A 12x12 mask. Even with significant information about the digit missing, the autodecoder is able to sufficiently reconstruct the ground truth image based on the learned information.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/16x16mask_input-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/16x16mask_input-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/16x16mask_input-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/16x16mask_input.png&quot; class=&quot;img-fluid&quot; width=&quot;50&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/16x16mask_output-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/16x16mask_output-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/16x16mask_output-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/16x16mask_output.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;A 16x16 mask.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/20x20mask_input-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/20x20mask_input-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/20x20mask_input-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/20x20mask_input.png&quot; class=&quot;img-fluid&quot; width=&quot;50&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/20x20mask_output-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/20x20mask_output-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/20x20mask_output-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/20x20mask_output.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;A 20x20 mask. Although the reconstructed digit is ultimately incorrect, we see that we are able to get very close even with extremely limited inputs.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;To analyze and compare the latent spaces learned by both our autoencoder and autodecoder, we additionally perform linear interpolation (with α=0.5) between the embeddings of two images and include their decoded results below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The output of the decoded interpolation of two embeddings from the autoencoder.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;The output of the decoded interpolation of two embeddings from the autodecoder.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;The autoencoder output was somewhat expected due to the simplistic nature of the MNSIT dataset, and we can see a merge of the two images with equal features of both.&lt;/p&gt; &lt;p&gt;More interesting was the output for the autodecoder, which simply returned an image consisting of the pixel average of both images. Some hypotheses for this result include:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;The shape of the latent space for the learned autodecoder potentially being one that does not pair well with linear interpolation, causing linear interpolations in latent space to be equivalent to those in the data space. Meanwhile, the shape of the latent space for the autoencoder might better match a Gaussian, which translates to effective nonlinear interpolations in the data space, which is desired.&lt;/li&gt; &lt;li&gt;The inductive bias from the existence of the encoder architecture allowing for better interpolatability.&lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h3&gt; &lt;h4 id=&quot;discussion&quot;&gt;Discussion&lt;/h4&gt; &lt;p&gt;While autoencoders (and variations such as VAEs) have traditionally been the standard architectures for representation learning, we explore an alternate autodecoder architecture, in which the encoder is excluded and individual latent codes are learned along with the decoder. We investigated the necessity of an explicit encoder in representation learning tasks and found that even without an encoder network, we are able to learn latent representations of input data through the optimization of randomly initialized latent codes during the training loop. Through this alternate dimensionality reduction process, we showed that we were still able to learn a consistent latent space on a multi-class dataset. Furthermore, we showed that through the use of an additional optimization loop for inference rather than learned encoder weights, the autodecoder can learn to reconstruct incomplete observations through pixel-level optimizations.&lt;/p&gt; &lt;p&gt;The autodecoder has the potential for many further applications beyond the scope of the research and experiments introduced in this blog. As an example, the task of prior-based 3D scene reconstruction in the field of computer vision, in which novel views of a 3D scene can be generated from a limited number of static images of that scene along with their camera poses, utilizes the autodecoder architecture to guarantee better out-of-distribution views. This task involves the use of camera pose as an additional source of information in addition to input images, something that the encoder itself is unable to integrate when encoding images, leading to the valuable scene representation information being left out. Meanwhile, because the latent code itself is learned in an autodecoder, it is able to use the camera pose to effectively generalize to novel viewpoints. This serves as just one of several examples of the autodecoder being able to carry out tasks normally gatekept by the limitations of the encoder.&lt;/p&gt; &lt;h4 id=&quot;limitations&quot;&gt;Limitations&lt;/h4&gt; &lt;p&gt;Some limitations of the encoder-free architecture include certain fallbacks discussed in our experiments, including the difficulties in generating satisfactory novel outputs through linear interpolation of the latent space. Furthermore, while the existence of a secondary optimization loop during inference comes with interesting properties such as being able to define unique loss functions for different purposes, this can be more computationally or temporally costly than running inputs on a trained encoder for inference. Regardless, as much of the research around this topic has emerged only within the past several years, it can be expected that autodecoders and their unique properties will continue to emerge, evolve, and find use in novel applications in the years to come.&lt;/p&gt; &lt;h3 id=&quot;references&quot;&gt;References&lt;/h3&gt; &lt;p&gt;Robin Baumann. Introduction to neural fields, 2022.&lt;/p&gt; &lt;p&gt;Piotr Bojanowski, Armand Joulin, David Lopez-Paz, and Arthur Szlam. Optimizing the latent space of generative networks. arXiv preprint arXiv:1707.05776, 2017.&lt;/p&gt; &lt;p&gt;Jeong Joon Park, Peter Florence, Julian Straub, Richard Newcombe, and Steven Lovegrove. Deepsdf: Learning continuous signed distance functions for shape representation. In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pages 165–174, 2019.&lt;/p&gt; &lt;p&gt;Vincent Sitzmann, Semon Rezchikov, Bill Freeman, Josh Tenenbaum, and Fredo Durand. Light field networks: Neural scene representations with single-evaluation rendering. Advances in Neural Information Processing Systems, 34:19313–19325, 2021.&lt;/p&gt; &lt;p&gt;Vincent Sitzmann, Michael Zollhöfer, and Gordon Wetzstein. Scene representation networks: Continuous 3d-structure-aware neural scene representations. Advances in Neural Information Processing Systems, 32, 2019.&lt;/p&gt; &lt;p&gt;Gongbo Tang, Rico Sennrich, and Joakim Nivre. Encoders help you disambiguate word senses in neural machine translation. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics, 2019.&lt;/p&gt; </content> </entry> <entry> <title>New Synthesis Approach for Personalized LLMS</title> <link href="https://deep-learning-mit.github.io/blog/2023/PersonalizedGeneration_w_LLMAgents/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/PersonalizedGeneration_w_LLMAgents</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;Deep learning has revolutionized the way in which humans interact with the world around them. Its growing ability to ingest vast amounts of data, automate feature extraction, and learn complex patterns and nuances among inputs have contributed to breakthroughs in healthcare, natural language processing, computer vision, and more. A particularly exciting avenue of this innovation has been in the burgeoning field of personalized text generation, which aims to produce text that resembles the style, tone, and word choice taken on by a particular user. Significant advancement in this field has the potential to create more effective forms of communication for individuals with disabilities, personalize educational content, and enhance user interactions with chatbots and virtual assistants, all contributing to a better overall user experience.&lt;/p&gt; &lt;p&gt;In an effort to make the availability of personalized text generation more wide-scale, researchers have conducted several studies in the field, centering their approach to the generation of domain-specific personalized text (utilizing domain-specific features/knowledge). Notable studies conducted include &lt;a href=&quot;https://arxiv.org/pdf/1910.03506.pdf&quot;&gt;Towards Controllable and Personalized Review Generation&lt;/a&gt;, which utilizes a product description and self-attentive recursive autoencoders to generate a personalized review &lt;a href=&quot;#1&quot;&gt;[1]&lt;/a&gt;, &lt;a href=&quot;https://arxiv.org/pdf/2010.01480.pdf&quot;&gt;Knowledge-Enhanced Personalized Review Generation with Capsule Graph Neural Network&lt;/a&gt;, which constructs a model based on a CapsGNN, and &lt;a href=&quot;https://iopscience.iop.org/article/10.1088/1742-6596/2294/1/012015/pdf&quot;&gt;Research on user granularity-level personalized social text generation technology&lt;/a&gt;, which utilizes an encoder and decoder for text generation &lt;a href=&quot;#2&quot;&gt;[2]&lt;/a&gt;. A lesser explored part of the field and an area that we have chosen to explore for our final project is embedding in the ability to generate personalized text across domains without domain-specific features &lt;a href=&quot;#3&quot;&gt;[3]&lt;/a&gt;. Our project draws inspiration from &lt;a href=&quot;https://arxiv.org/pdf/2308.07968.pdf&quot;&gt;“Teach LLMs to Personalize – An Approach inspired by Writing Education”&lt;/a&gt;, which includes a promising multi-step framework that retrieves, summarizes, ranks, and synthesizes a user’s past documents to generate a personalized version of the document at hand &lt;a href=&quot;#4&quot;&gt;[4]&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;A critical aspect of the workflow discussed in the LLM personalization paper and an area that we believe can be improved upon using some of the methods discussed in 6.S898 this semester is the way in which the model synthesizes past documents. Throughout the paper, we will be exploring two creative approaches to synthesis that utilize vector word embeddings to pull relevant words from past documents in an effort to improve the models ability to personalize text.&lt;/p&gt; &lt;h1 id=&quot;literature-review&quot;&gt;Literature Review&lt;/h1&gt; &lt;p&gt;An integral part of our exploration project was experimenting with using less data and smaller models to see how performance degrades with respect to the approach discussed in the personalization for LLMs paper (no open source code attached as the project is currently being worked on by researchers at Google). Experimentation required taking an extensive look at the steps involved in the original implementation, gaining an in-depth understanding of the deep learning principles discussed, and optimizing training and compute under machine constraints to process vast amounts of real-world data.&lt;/p&gt; &lt;p&gt;The problem formulation for the approach to personalized text generation discussed in the paper can be stated as the following: Given the immediate context of a current document (first k characters) written by a user and access to their past documents, can we develop a model that generates text that is similar to the text of the current document (similarity evaluated by calculating Rouge-1, Rouge-2, Rouge-L, and Bleu scores) . As mentioned earlier, the framework for answering this problem formulation involves first obtaining outputs for retrieval, ranking, summarization, and synthesis, and then feeding these distinct parts into an LLM to produce a personalized body of text (we ignore the auxiliary task of training the LLM to distinguish the owners of written documents for the purposes of this project).&lt;/p&gt; &lt;p&gt;The retrieval discussed in the paper uses two methods of outputting relevant documents: sparse retrieval, which compares past documents to the current context using the popular BM25 ranking algorithm, and dense retrieval, which uses a transformer-based text-to-text model to map and compare documents in a 768 dimensional vector space. The ranking step then takes this input, orders documents based on their BM25 scores or cosine similarity when compared with the immediate context, and truncates the input to 2500 characters to only take the top documents. The summarization step then summarizes the top ranked past documents in two ways: context independent summarization, which finetunes an LLM on publicly available data and applies this model to the top ranked entries, and context dependent summarization, which uses weak labels (generated from immediate context) to generate a summary in line with the contents of the current document. A visualization of the approach to the structure can be seen below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; An overview of the infrastructure used to process documents and fine tune the personalized generative model. &lt;/div&gt; &lt;p&gt;The paper currently explores two methodologies for synthesis: (1) context dependent synthesis, which simply uses the top 20 frequently used keywords from a user’s past documents and (2) context dependent synthesis, which uses weak labels to find text from past documents similar to the immediate context of the document to be personalized. After carefully analyzing the two methodologies, we found that by focusing on keywords the synthesized text is missing an in-depth understanding of sentence structure and semantics that are crucial to personalization.&lt;/p&gt; &lt;p&gt;To enhance this step of the text generation process, we have explored several new methods of synthesis and have landed on two approaches with one utilizing the Word2Vec model and the other using GloVe. We have chosen these methods because they both use unique embedding space attributes to form important relationships between texts. Both networks use the method of creating a centroid of the current document that exists in vector space and output words from top ranked past documents that exist close to this centroid. By doing this, we are essentially selecting words (after filtering out synonyms and stopwords) that are in line with the theme of the current document, which will provide the LLM with more thematically relevant synthesized entries that should in theory generate a more personalized output.&lt;/p&gt; &lt;p&gt;As an additional research consideration, we explored the effect of passing in the output from both the context independent synthesis discussed in the paper and our auxiliary method of using Word2Vec or GloVe compared to passing in just one of the methods of synthesis. The motivation for doing so came from our initial hypothesis that the combination of both methods of synthesis would enable the LLM to learn complex interactions between important words (results from context independent synthesis) and thematic words (GloVe/Word2Vec) that could lead to better personalization of the final output. A more detailed explanation of the implementations of our proposed approaches will be shown in the following section.&lt;/p&gt; &lt;h1 id=&quot;description-of-methods--experiments&quot;&gt;Description of methods &amp;amp; experiments&lt;/h1&gt; &lt;h2 id=&quot;the-baseline-implementation&quot;&gt;The Baseline Implementation&lt;/h2&gt; &lt;p&gt;Our methodological approach began by re-implementing the baseline model from the “Teach LLMs to Personalize” paper. We utilized two datasets mentioned in the research paper: CNN_DailyMail (&lt;a href=&quot;https://huggingface.co/datasets/cnn_dailymail&quot;&gt;CNN_DailyMail&lt;/a&gt;)and Amazon Review Data for Books (&lt;a href=&quot;https://cseweb.ucsd.edu/~jmcauley/datasets/amazon_v2/&quot;&gt;Amazon_review_data&lt;/a&gt;). To enhance efficiency of compute time, we streamlined the data by reducing its size, ensuring a quicker fine-tuning process while retaining data integrity. We also utilized the &lt;a href=&quot;https://huggingface.co/t5-base&quot;&gt;T5-base model&lt;/a&gt;, a smaller model than the T5-11b model mentioned in the paper, for summarization and the personalized generation model. Furthermore, we opted to use the context-independent methods for both summarization and synthesis because the research paper results indicated that their effectiveness is closely comparable to the context-dependent methods. For fine-tuning the summarization model, we utilized a 10 percent subset of the CNN daily mail dataset (311k datapoint original size) with the AdamW optimizer (seeing AdamW is a comparable optimizer to Adafactor, which is what was used in the “Teach LLMs to Personalize” paper), ensuring a balance between efficiency of tuning and comprehensive learning. This set the foundation for our exploration of advanced text synthesis techniques by giving us a base fine tuning and data processing infrastructure. On top of this, the changes we made to the amount of data used along with utilizing a smaller T5 model allowed us to analyze whether the final evaluation results degraded significantly when making the infrastructure of fine tuning the personalized generation model more compact.&lt;/p&gt; &lt;h2 id=&quot;overview-of-modification--experiments&quot;&gt;Overview of Modification &amp;amp; Experiments&lt;/h2&gt; &lt;p&gt;In our new approach for synthesis, we utilized Word2Vec and GloVe which hinges on the concept of embedding space. In this space, words are represented as vectors, capturing their semantic relationships based on their context in large text corpora. By embedding the current document and past documents (from the same user) in this space, each word is assigned a position that reflects its semantic meaning.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; An example of how using TSNE dimension reduction can illustrate how words are placed in embedding space. Note that dimension reduction does not always come out cleanly since word embeddings are complex and can&apos;t be easily represented in 2D space. &lt;/div&gt; &lt;p&gt;The ‘centroid’ of the current document in this space is a calculated mean vector, representing the overall semantic direction of the document. Words closest to this centroid are likely to be central to the document’s theme or style. When we look for words from past documents that are closest to this centroid, we are essentially searching for words that align closely with the thematic and stylistic essence of the current document.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; An example of how our centroid for the current document corresponds to other words from past documents (note we used PCA dimensionality here). We also chose to display words that had very close euclidean distances to the centroid. Note our centroid from the current document highlighted the following as significant words: [&apos;like&apos;, &apos;since&apos;, &apos;first&apos;, &apos;mystery&apos;, &apos;book&apos;] &lt;/div&gt; &lt;p&gt;This method makes sense technically because it leverages the nuanced understanding of language captured in word embeddings. By focusing on words that are semantically close to the central theme of the current document, the model can more effectively identify and use terms that are likely to be relevant and stylistically consistent for personalization, thereby potentially enhancing the generated text of the personalized generation model.&lt;/p&gt; &lt;h2 id=&quot;experiment--word2vec-vs-glove&quot;&gt;Experiment / Word2Vec vs GloVe&lt;/h2&gt; &lt;p&gt;Word2Vec and GloVe are both models for word embeddings, but they differ in their approach to creating these embeddings. Word2Vec, developed by Google, primarily uses local context information of words (words surrounding a given word) to generate embeddings. This results in embeddings that capture more of the syntactic and semantic relationships based on specific local contexts.&lt;/p&gt; &lt;p&gt;GloVe (Global Vectors for Word Representation), on the other hand, is designed by Stanford and incorporates global matrix factorization and local context window methods. It emphasizes capturing global statistics of the corpus by considering overall word co-occurrence frequencies, essentially acting as an unsupervised learning algorithm that generates word embeddings.&lt;/p&gt; &lt;p&gt;When used for synthesis in text personalization, these differences influence the nature of the embeddings. Word2Vec might be more sensitive to the specific contextual use of words in the current and past documents, potentially offering more precise thematic matches based on immediate context. GloVe, with its global perspective, might bring in a broader understanding of word use, capturing more general usage patterns and thematic relationships that extend beyond the immediate context. This could lead to a slightly different set of words being selected for personalization in the synthesis process.&lt;/p&gt; &lt;p&gt;In our experiment, we adapted the structure from the “Teach LLMs” paper, incorporating our novel synthesis methods using Word2Vec and GloVe. The process involved independently fine-tuning the personalized generation model for each synthesis approach. This fine-tuning was crucial to observe how the different embedding techniques influenced the model’s performance. After implementing the new synthesis methods, we conducted a thorough evaluation to compare their effectiveness, along with the combination of the original and new synthesis approaches, with the base model. The key focus was on analyzing how the different word embeddings (and combinations of embeddings) impacted the quality and personalization of the generated text, with performance metrics providing insights into the strengths and limitations of each method.&lt;/p&gt; &lt;h1 id=&quot;analysis--evaluation-of-results&quot;&gt;Analysis / Evaluation of Results&lt;/h1&gt; &lt;p&gt;The evaluation metrics used in the “Teach LLMs” paper (and also what we utilized), BLEU (Bilingual Evaluation Understudy), ROUGE-1, ROUGE-2, and ROUGE-L, are standard metrics used to evaluate the quality of text which has been machine-translated or generated by machine learning models.&lt;/p&gt; &lt;p&gt;BLEU Score: The BLEU score evaluates the quality of machine-translated text by comparing it with one or more reference translations. It does so at various levels, from individual words to consecutive sequences of words (n-grams), to assess precision. A higher BLEU score indicates more similarity to the reference text, often implying better translation quality. However, BLEU has limitations as it does not account for the fluency or grammaticality of the generated text.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Calculations behind the BLEU score calculations. &lt;/div&gt; &lt;p&gt;ROUGE Scores: ROUGE (Recall-Oriented Understudy for Gisting Evaluation) is another set of metrics for evaluating automatic summarization and machine translation. ROUGE-1 and ROUGE-2 refer to the overlap of unigrams (single words) and bigrams (two consecutive words) between the machine-generated text and a set of reference texts, respectively. ROUGE-L considers the longest common subsequence, focusing on the longest coherently matching sequence of words. ROUGE scores can consider both precision (like BLEU) and recall, providing a more rounded evaluation.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Calculations behind the ROUGE-N (N-gram) score calculations; in our case N = 1, 2, or longest common subsequence. &lt;/div&gt; &lt;p&gt;We can also take a look into how our models performed during the fine tuning period. Based on the progression of the training and validation loss, you can infer how well the model is learning and whether it’s overfitting (learning the training data too closely and not generalizing well) or underfitting (not learning the training data well enough).&lt;/p&gt; &lt;p&gt;Comparing the performance of our models using two different synthesis approaches–our base model versus the new synthesis approach using the GloVe or Word2Vec model, and the combination of the base model and new synthesis–could result in different behaviors most likely for one particular reason:&lt;/p&gt; &lt;p&gt;Quality of Embeddings: The GloVe and Word2Vec models provide a different representation for words, capturing semantic relationships in a more nuanced way than just looking at IDF scores, which could lead to varied results during fine tuning. Also, combining our original synthesis with our new synthesis can give the model more information to finetune on allowing for a more intricate understanding of the text when generating.&lt;/p&gt; &lt;p&gt;The differences in BLEU and ROUGE scores between the two models can arise from how each model handles the linguistic nuances of the generated text. If the new approach with the GloVe model is better at capturing the context and meaning of the sentences, it might score higher in BLEU and ROUGE, despite potentially higher loss values.&lt;/p&gt; &lt;p&gt;The variations in BLEU and ROUGE scores could also indicate how each model deals with the trade-off between precision and recall—whether it’s better at producing text that contains most of the expected content (high recall) or at avoiding extraneous content not in the reference (high precision).&lt;/p&gt; &lt;p&gt;Evaluating these metrics in combination with each other, rather than in isolation, provides a more comprehensive picture of a model’s performance and areas for potential improvement.&lt;/p&gt; &lt;p&gt;The following results portray the overarching BLEU, ROUGE-1, ROUGE-2, and ROUGE-L score we received for the base model, our model using the new synthesis approach, and our model using the base synthesis along with the new synthesis. We have highlighted the snippets of the generated cases that produced our highest scores which are indicative of the possibilities of improvement if we were able to utilize larger T5 models and more training data.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;The following table highlights the results of our evaluation of generated outputs from our baseline model versus our two new approaches (new synthesis and old synth + new synth). Althought there are cases where the max score for our new approaches are high, we believe that this is most likely the case where we generate the rest of a document that is already signficantly short. Essentially, since we don’t need to generate a diverse output of words for a longer length, our more compact t5-base model with minimal training performs very well still. &lt;sup id=&quot;fnref:1&quot; role=&quot;doc-noteref&quot;&gt;&lt;a href=&quot;#fn:1&quot; class=&quot;footnote&quot; rel=&quot;footnote&quot;&gt;1&lt;/a&gt;&lt;/sup&gt;&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: left&quot;&gt; &lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;BLEU (avg)&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;ROUGE1 (avg)&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;ROUGE2 (avg)&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;ROUGEL (avg)&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;BLEU (max)&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;ROUGE1 (max)&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;ROUGE2 (max)&lt;/th&gt; &lt;th style=&quot;text-align: right&quot;&gt;ROUGEL (max)&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: left&quot;&gt;Baseline Model&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;08.9531&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;29.5847&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;18.6126&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;25.6882&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;49.5207&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;65.2174&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;62.2222&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;65.2173&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: left&quot;&gt;New Synth (Word2Vec)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;09.0722&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;29.3465&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;18.3129&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;25.6115&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;46.6638&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;65.9340&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;62.2222&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;65.2174&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: left&quot;&gt;New Synth (GloVe)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;10.3810&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;31.9870&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;21.1543&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;27.4335&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;50.5317&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;65.8537&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;60.1942&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;63.4146&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: left&quot;&gt;New Synth (Word2Vec) + Old Synth&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;10.4402&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;31.4181&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;20.2349&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;27.7710&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;58.0197&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;64.8148&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;61.9048&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;62.7907&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: left&quot;&gt;New Synth (GloVe) + Old Synth&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;08.7228&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;29.2284&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;17.1685&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;24.6075&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;49.7273&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;65.5462&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;60.9756&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;61.9048&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h1 id=&quot;conclusion--discussion-of-limitations&quot;&gt;Conclusion / Discussion of Limitations&lt;/h1&gt; &lt;p&gt;Throughout the paper, we have demonstrated the potential of embedding techniques like Word2Vec and GloVe in enhancing the personalization aspect of text generation models. Our experiments, which involved comparing these methods with traditional synthesis techniques, have shown promising results in terms of creating text that more accurately reflects the style and thematic preferences of individual users.&lt;/p&gt; &lt;h2 id=&quot;limitations&quot;&gt;Limitations&lt;/h2&gt; &lt;p&gt;For our exploration, we were limited to running all of our models and doing our data analysis on Google Colab in a short period of time along with having to reimplement the structure used in the “Teach LLMs to Personalize” paper since no codebase exists for it. Because of this, we had to find ways to condense our models and limit the amount of data we ingested so that we could spend less time waiting on models to run and freeing up storage and more time analyzing the output of our code. Two of the big adjustments that we made to navigate these constraints was using the t5-base model (fewer tokens than t5-11b), which we ran for a limited number of epochs, instead of the t5-11b model and using only a subset of data points from the provided Amazon Review Dataset. One of the other things that we tried to make the most advantage of our compute was quantizing our t5-base model to provide faster synthesis and summary to run on our ingested data, but we unfortunately ran into dependency issues and were unable to get this method working. However, from our analysis, we estimate that our evaluation results would have been much more in line with the paper’s results, or even surpass them, if we were able to run the t5-11b model for a larger amount of epochs and utilize more amazon review data.&lt;/p&gt; &lt;h2 id=&quot;next-steps&quot;&gt;Next Steps&lt;/h2&gt; &lt;p&gt;If we choose to continue this project, we want to explore ways in which we can synthesize domain-specific knowledge, along with thematic tendencies, related to the current document that can be fed into the final LLM for text generation. There are a lot of benefits of providing synthesized information to the model as it filters for the “most important/significant” words in a document and we hypothesize that this supplementary information could add an extra level of knowledge to a model that has proven to perform well in personalization.&lt;/p&gt; &lt;p&gt;Also, another pathway that could be explored is integrating Agent LLMs in the initial document ranking phase to see if the procured rankings are better than the current methods set in place (RankDocBM25, RankDocDense, RankSnippet, RankDocBySnpt). We believe that utilizing LLMs that have more awareness of context over large document spaces (and even varying languages) could be benefitial to the process of developing personalized generation model.&lt;/p&gt; &lt;h1 id=&quot;bibliography&quot;&gt;Bibliography&lt;/h1&gt; &lt;p&gt;&lt;a id=&quot;1&quot;&gt;[1]&lt;/a&gt; Li, Pan, and Alexander Tuzhilin. Towards Controllable and Personalized Review Generation - arXiv.Org, arxiv.org/pdf/1910.03506.pdf. Accessed 12 Dec. 2023.&lt;/p&gt; &lt;p&gt;&lt;a id=&quot;2&quot;&gt;[2]&lt;/a&gt; Li, Junyi, et al. Knowledge-Enhanced Personalized Review Generation with … - Arxiv.Org, arxiv.org/pdf/2010.01480.pdf. Accessed 12 Dec. 2023.&lt;/p&gt; &lt;p&gt;&lt;a id=&quot;3&quot;&gt;[3]&lt;/a&gt; Gao, Y B, et al. “IOPscience.” Journal of Physics: Conference Series, IOP Publishing, 1 June 2022, iopscience.iop.org/article/10.1088/1742-6596/2294/1/012015.&lt;/p&gt; &lt;p&gt;&lt;a id=&quot;4&quot;&gt;[4]&lt;/a&gt; Li, Cheng, et al. Teach LLMs to Personalize: An Approach Inspired by Writing Education - Arxiv.Org, arxiv.org/pdf/2308.07968.pdf. Accessed 12 Dec. 2023.&lt;/p&gt; &lt;div class=&quot;footnotes&quot; role=&quot;doc-endnotes&quot;&gt; &lt;ol&gt; &lt;li id=&quot;fn:1&quot; role=&quot;doc-endnote&quot;&gt; &lt;p&gt;Output Produced From our Codebase: &lt;a href=&quot;https://github.com/dapacica/DL_finalproject_code/blob/main/FinalProjCleanColab.ipynb&quot;&gt;https://github.com/dapacica/DL_finalproject_code/blob/main/FinalProjCleanColab.ipynb&lt;/a&gt; &lt;a href=&quot;#fnref:1&quot; class=&quot;reversefootnote&quot; role=&quot;doc-backlink&quot;&gt;&amp;#8617;&lt;/a&gt;&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;/div&gt; </content> </entry> <entry> <title>Augmenting Expert Domain Image Inputs for Enhancing Visual Language Models Performance</title> <link href="https://deep-learning-mit.github.io/blog/2023/vig-algorithm-flow-project-proposal/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/vig-algorithm-flow-project-proposal</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Over the past few years, we have seen a surge in creation, adoption, and excitement around visual language models, specifically around Open AI’s CLIP model. Visual language models can bridge the gap between image and text, allowing tokenized understanding of the visual world around us. For instance, Meta released Segment Anything, a model with enhanced object detection through multimodal inputs like defined bounding boxes and text.&lt;/p&gt; &lt;p&gt;After the recent surge with ChatGPT, we have begun to see advancements in the visual language model space to combine the image analysis and conversational tool. While the recent developments with Bard, GPT4-v, LLava, and many others have progressed the visual language model domain, the overall capabilities of the models are limited to the type of images provided. Most of the models have been trained and finetuned on common day objects, specializing in every-day normal tasks.&lt;/p&gt; &lt;p&gt;However, theses models continue to struggle with answering images derived from an expert domain, especially scientific literature. Images from these domains can be challenging for the model, as they require common background knowledge, domain knowledge, and interpretation of the diagram.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Question: What is the path from 1 to 4? &lt;br /&gt;&lt;br /&gt; GPT4-v Answer: The image you&apos;ve uploaded appears to show a diagram with four numbered points, possibly representing steps or locations connected by a path... However, as an AI, I can&apos;t visually trace paths or analyze images in the way a human would... &lt;/div&gt; &lt;/div&gt; &lt;p&gt;How can we assist visual language models to improve performance in expert domains?&lt;/p&gt; &lt;h2 id=&quot;past-works&quot;&gt;Past Works&lt;/h2&gt; &lt;p&gt;Visual Language Models have become very popular in the recent years with their ability to connect image to text. Open Flamingo&lt;d-cite key=&quot;openflamingo&quot;&gt;&lt;/d-cite&gt; is an open source model with a similar architecture to Flamingo&lt;d-cite key=&quot;flamingo&quot;&gt;&lt;/d-cite&gt;: images are sent to (1) visual encoders, (2) perceived resampler, and (3) through a dense group of layers. Through few shot learning, text completion, and image analysis, Open Flamingo allows users to have a conversation involving images.&lt;/p&gt; &lt;p&gt;Currently, popular visual language models, like Flamingo, utilize CLIP&lt;d-cite key=&quot;clip&quot;&gt;&lt;/d-cite&gt; as the visual encoder to perform the image-to-embedding conversion. Behind the scenes, CLIP uses a Vision Transformer architecture as the encoder. However, when we analyze the embeddings output from CLIP, they do not necessarily have a clear representation to the human eye. This makes it really challenging to inject domain-specific knowledge through the embedding to help perform specific tasks.&lt;/p&gt; &lt;p&gt;The project, FlowchartQA&lt;d-cite key=&quot;flowchart&quot;&gt;&lt;/d-cite&gt;, contributed by creating a dataset of flowcharts and code, revealing insights into the relationships forming code. However, no research has tried to understand the way visual language models interpret graphs with nodes and edges, specifically for domain specific questions. Through various changes to text and image inputs, we can learn about the way a visual language model understands graphical structures to improve future performance of VLMs.&lt;/p&gt; &lt;h2 id=&quot;procedure&quot;&gt;Procedure&lt;/h2&gt; &lt;h3 id=&quot;dataset-creation&quot;&gt;Dataset Creation&lt;/h3&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Data Augmentation Pipeline for the Inputs &lt;/div&gt; &lt;p&gt;To learn more about the graphical understanding of VLMs, a dataset had to be curated to test various conditions. The original images of the flowcharts are sampled from the BizGraphQA dataset&lt;d-cite key=&quot;bizgraphqa&quot;&gt;&lt;/d-cite&gt;. Due to the compute and time contraints, only a sample of ten images were utilized for the analysis. This subset of images is passed through the Segment Anything Model from Meta &lt;d-cite key=&quot;sam&quot;&gt;&lt;/d-cite&gt; to extract a mask of each of the nodes. From there, OCR is performed on each node to retrieve the text and hand coloring is performed to color the image. In the end, we get dataset triples of the original image, colored image, and the list of nodes in the image.&lt;/p&gt; &lt;p&gt;For example, for this image, we would have the following dataset.&lt;/p&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Original Image &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Colored Image &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Node List &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; [Finanze Tyco Roadsafe Insurance, Greyhound Bbbytf Limited, Etablissements Lgac Lthalat Incorporated Indiana, Korte Pro-Cut Grayhawk Insurace North Carolina, Hutchins Sandvik Maryland, Nm Suc. Krsx Limited Michigan] &lt;/div&gt; &lt;/div&gt; &lt;h3 id=&quot;experimentation&quot;&gt;Experimentation&lt;/h3&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Experimentation Pipeline for the Inputs &lt;/div&gt; &lt;p&gt;Bard uses Flamingo, a visual language model, to answer queries. We will provide an input image with or without the augmentation and a question about the graph into Flamingo, as illustrated in the figure above. Each image will be paired with a question in a specific category. For this analysis, we will focus on four major types of questions to evaluate the VLM’s understanding of graph connectivity. These questions are to be asked in tandem with the original image, the colored image, and the original image paired with the list of nodes in the image. We ask the following questions:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Based on the image, is there a cycle in the graph?&lt;/li&gt; &lt;li&gt;Based on the image, what is the path from &lt;strong&gt;__ to __&lt;/strong&gt;_? (The ground truth path involves nodes that only have one child node.)&lt;/li&gt; &lt;li&gt;Based on the image, what is the path from &lt;strong&gt;__ to __&lt;/strong&gt;_? (The ground truth path involves nodes that have multiple child nodes.)&lt;/li&gt; &lt;li&gt;Based on the image, how many child/parent nodes does _____ have?&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;For the same image from above, here are the questions and relevant answers:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt; &lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Question&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Answer&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;1&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Is there a cycle in this graph?&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;No&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;2&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;What is the organization hierarchy path from Etablissements Lgac Lthalat Incorporated, Indiana to Nm Suc. Krsx Limited - Michigan?&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;The path is Etablissements Lgac Lthalat Incorporated, Indiana to Korte Pro-Cut Grayhawk Insurance, North Carolina to Nm Suc. Krsx Limited - Michigan&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;3&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;What is the organization hierarchy path from Finanze Tyco Roadsafe Insurance to Nm Suc. Krsx Limited - Michigan?&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;The path is from Finanze Tyco Roadsafe Insurance to Greyhound Bbbytf Limited to Nm Suc. Krsx Limited - Michigan&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;How many child companies does Greyhound Bbbytf Limited have holdings in?&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Two&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;But, you must be wondering: why ask these questions specifically? Each question tests understanding of graphical elements without background understanding of the topic. This should serve as a baseline for the way that VLMs understand graphical structures and the common questions to be asked.&lt;/p&gt; &lt;h3 id=&quot;evaluation-metrics&quot;&gt;Evaluation Metrics&lt;/h3&gt; &lt;p&gt;To evaluate the success of our model, we will conduct both qualitative and quantitative analyses on the dataset, given that quantitative evaluation of generative models can be challenging. The control group will provide a baseline for normalizing the results.&lt;/p&gt; &lt;p&gt;Qualitatively, we will perform a manual analysis of the generated outputs. By using prompts, images, and answer, we will subjectively compare the prompt, the image, and the resulting answer. Our primary goal is to assess how effectively the visual language model generates the answer based on the prompt while being constrained by the graph.&lt;/p&gt; &lt;p&gt;Quantitatively, we plan to utilize an accuracy score will be employed to evaluate the percentage of questions that have been answered correctly in each metric. Incorporating these metrics will provide a more comprehensive understanding of the model’s performance and allow for better comparisons with other generative models.&lt;/p&gt; &lt;p&gt;This comparative analysis will facilitate a robust evaluation of the methods employed to represent graphical information within visual language models.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;The following table provides the accuracy metric of the aforementioned experiment:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt; &lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Question 1&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Question 2&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Question 3&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Question 4&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;Original Image&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.7&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;Original Image with Node List&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.3&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.1&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;Colored Image&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;1&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.3&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.1&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.2&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;We clearly see that the Colored Image performs better than the Original Image in most of the scenarios. Also, we unfortunately see that the node list did not provide gains in performance for the VLM. Overall, the performance is quite limited when asking specific questions about path finding and children nodes. Let’s dive a bit deeper to understand why.&lt;/p&gt; &lt;p&gt;Preface: In the following sections, there are tables with prompts, images, and answers from Bard for various mini explorations.&lt;/p&gt; &lt;h3 id=&quot;superiority-in-extracting-nodes&quot;&gt;Superiority in Extracting Nodes&lt;/h3&gt; &lt;p&gt;When first embraking on this project, I believed that the VLM would struggle to pair the text and nodes together. However, the VLM actually proved to be quite impressive with the extraction of nodes and the names associated with, making the need for a node list not quite neccesary.&lt;/p&gt; &lt;p&gt;Using the input figure from above without any augmentations, I asked the VLM how many child nodes a specific node in the graph had. The VLM outputs the wrong answer, but it actually provides generated code for its reasoning. Here is a portion of the code:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;# Define the corporate structure as a dictionary corporate_structure = { &quot;Etablissements Lgac Lthalat Incorporated, Indiana&quot;: { &quot;Nm Suc. Krsx Limited - Michigan&quot;: {} }, &quot;Nm Suc. Krsx Limited - Michigan&quot;: {}, &quot;Krsx Incorporated, California&quot;: { &quot;Nm Suc. Krsx Limited - Michigan&quot;: {} }, &quot;Krsx Incorporated, Texas&quot;: {}, &quot;Lthalat Holdings, Delaware&quot;: { &quot;Etablissements Lgac Lthalat Incorporated, Indiana&quot;: {}, &quot;Krsx Incorporated, California&quot;: {}, &quot;Krsx Incorporated, Texas&quot;: {} }, &quot;Greyhound Bbbytf Limited&quot;: { &quot;Krsx Incorporated, California&quot;: {}, &quot;Krsx Incorporated, Texas&quot;: {} } } &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;The VLM was actually quite impressive with obtaining the specific nodes in the graph, recognizing the text through OCR, and pairing it with the nodes. While all the nodes are not correct, we can see that the VLM can follow a procedure to determine this. While this is a good example of node extraction, the capability is still ambiguous.&lt;/p&gt; &lt;p&gt;To poke this topic a bit more, I wanted to test out the VLM’s ability to extract the nodes if the colors are the same or different. I designed a basic figure with just nodes to test this. The same prompt was passed into Bard with the images below. The only difference between the two images is the fact that in one image, the colors of the nodes are same, and in the other image, the colors of the nodes are different. In the results below, we can clearly see that the VLM is able to perform better with the colored nodes, as the VLM is able to distinguish between different nodes.&lt;/p&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Original Image &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Colored Image &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Prompt: What are the names of the nodes in this graph? &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Prompt: What are the names of the nodes in this graph? &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Answer: B1, B2, B3, B4, B5. &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Answer: B1, B2, B3, B4. &lt;/div&gt; &lt;/div&gt; &lt;p&gt;&lt;br /&gt; To support this argument, we look at the attention that CLIP places on segments of the image based on a caption. We specifically use CLIP because CLIP is the visual encoder in Flamingo. While this isn’t necessarily a rigorous proof, we can see that the attention on the nodes is placed stronger in the colored graph example rather than the regular graph example.&lt;/p&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Caption: Node B1, B2, B3, B4 &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Caption: Node B1, B2, B3, B4 &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Through the examples and tests above, we can clearly see the VLM’s ability to extract nodes, especially with a visually distinugishing factor between the nodes like color. Since the VLM can do a pretty decent job of extracting the nodes, it makes sense that providing the VLM with the node list may not allow for great improvements in performance.&lt;/p&gt; &lt;p&gt;So, if the VLM can extract the nodes relatively well, why is the performance still subpar?&lt;/p&gt; &lt;h3 id=&quot;difficulties-with-edge-dectection&quot;&gt;Difficulties with Edge Dectection&lt;/h3&gt; &lt;p&gt;Aside from nodes, most graphs have edges, and for the questions asked in the experiments, understanding the connectivity was crucial to providing the correct answer. We actually observed that the colored graphs had answers that were closer to 100% accuracy in comparison to the regular graphs. To explore how VLMs understand the connections between nodes, I decided to ask Bard about some simple graphs to determine how it responded.&lt;/p&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Image &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Prompt &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Can you please tell me what the edges in the graph are? &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Can you please tell me what the edges in the graph are? &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Answer &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; The edges in the graph are: a -&amp;gt; b, b -&amp;gt; c, c -&amp;gt; a &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; The edges in the graph are: B -&amp;gt; A, C -&amp;gt; A &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Wow! It’s really surprising that the VLM is creating edges where there aren’t even edges? Also, the direction of the edges are wrong. Why would this be the case?&lt;/p&gt; &lt;p&gt;Let’s explore a bit more about the individual arrows. Can a VLM accurately describe the arrow direction?&lt;/p&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Image &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Prompt &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Which way does the arrow point? &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Which way does the arrow point? &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row&quot; style=&quot;border: 1px solid #000&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; Answer &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; The arrow in the image points to the right. &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot; style=&quot;border: 1px solid #000&quot;&gt; The arrow in the image points to the left. &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Well, if you take a look at the responses above, we notice that the VLM can accurately provide the arrow direction for larger arrows incomparison to smaller arrows. Since most of the graphs involve smaller arrows, we can see why the VLM could not tell the directionality of the arrow.&lt;/p&gt; &lt;h3 id=&quot;outside-information&quot;&gt;Outside information&lt;/h3&gt; &lt;p&gt;Beyond the graphical structure of the nodes, I found that Bard typically incorporated outside information that required background knowledge about the topic. These situations can be categorized into two categories: useful outside information and irrelevant outside information. To distinguish between the two, I found that in the siutations that Bard used graph algorithms and definitions, the outside information was useful in generating the answer. However, many times, Bard would look up specific companies or use graphs from other sources online.&lt;/p&gt; &lt;p&gt;In this example, Bard was asked if there was a cycle in the graph, and Bard outputted the answer below. The VLM was very intelligent by quoting the definition of a cycle and using DFS to determine the existence of one. The step-by-step algorithm is correct and ultimately, leads to the solution. This shows that the incorporation of algorithms and accurate definitions can assist the VLM in answering queries.&lt;/p&gt; &lt;blockquote&gt; &lt;p&gt;No, there is no cycle in the graph of the Nove 1-66Express organizational structure. A cycle is a path in a graph that starts and ends at the same vertex, and there is no such path in this graph. I verified this by tracing the graph and ensuring that I never returned to a vertex that I had already visited. I also used the following algorithm to check for cycles in graphs: Mark all vertices as unvisited. Choose a vertex and start a depth-first search (DFS). As you traverse the graph, mark each vertex as visited. If you ever reach a vertex that is already marked as visited, then there is a cycle in the graph. If you finish traversing the graph without ever reaching a visited vertex, then there is no cycle in the graph. I ran this algorithm on the graph of the Nove 1-66Express organizational structure and found that there are no cycles.&lt;/p&gt; &lt;/blockquote&gt; &lt;p&gt;However, outside information can be just as harmful as it is beneficial. Pulling from outside sources can be tricky for a VLM. It needs to remember to operate within the grouth truths presented in the figure. In multiple instances, the VLM actually referenced the wrong figure because it forgot about the image provided with the prompt.&lt;/p&gt; &lt;p&gt;Therefore, the VLM needs to be fine-tuned to filter the outside information it brings in to improve the accuracy of its answers.&lt;/p&gt; &lt;h2 id=&quot;conclusions-and-future-steps&quot;&gt;Conclusions and Future Steps&lt;/h2&gt; &lt;p&gt;In this study, I delved into the capabilities and limitations of VLMs, like Open Flamingo, in interpreting complex graphical structures within expert domains, such as scientific literature. The experiments, involving various image augmentation techniques such as color changes and node names, revealed key insights. While VLMs demonstrated proficiency in node extraction, it faced challenges in edge detection and understanding the connectivity between nodes. This was particularly evident when colored images outperformed non-colored ones, highlighting the importance of visual distinction for VLM comprehension. However, the addition of node lists did not significantly enhance performance, suggesting existing capabilities in node identification. The connectivity was difficult for the VLM to understand because of the size of the arrows.&lt;/p&gt; &lt;p&gt;The findings of this research highlight a crucial challenge for VLMs: integrating domain-specific knowledge, especially for non-standard images like scientific diagrams. However, due to the small dataset size, suggests that further research with a larger and more diverse dataset is necessary to validate these findings. In the future, this research can be applied to help improve prompting for graphical structures, provide insights on how to finetune a VLM for this task, and create a new interest in using VLMs for scientific diagrams.&lt;/p&gt; </content> </entry> <entry> <title>Embeddings for Spatio-temporal Forecasting</title> <link href="https://deep-learning-mit.github.io/blog/2023/spatiotemporal/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/spatiotemporal</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Time series forecasting is an interdisciplinary field that affects various domains, including finance and healthcare, where autoregressive modeling is used for informed decision-making. While many forecasting techniques focus solely on the temporal or spatial relationships within the input data, we have found that few use both. Our goal was to compare two SOTA spatiotemporal models, the STAEformer and the Spacetimeformer, and determine why one works better than the other. The papers on both models did not feature each other in their benchmark evaluations, and we thought that analyzing their embeddings and identifying their failure modes could offer new insights on what exactly the models are learning from the dataset. We hypothesized that the Spacetimeformer would perform better as its proposed approach, sequence flattening with Transformer-based processing, seems to offer a more flexible and dynamic representation of spatiotemporal relationships that doesn’t depend on predefined variable graphs. We focused on forecasting in the field of traffic congestion, which is a pervasive challenge in urban areas.&lt;/p&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;p&gt;We focused on two SOTA spatiotemporal models that were evaluated on traffic forecasting datasets. The first is the STAEformer &lt;d-cite key=&quot;liu2023staeformer&quot;&gt;&lt;/d-cite&gt;. STAEformer proposes a novel adaptive embedding that learns the spatio-temporal relations in the dataset. In their architecture, the input embedding is generated by combining the projected raw data (denoted by \(E_p\) in the embedding layer) with temporal embeddings (\(E_f\)) and the adaptive embeddings (\(E_a\)), which was used instead of an embedding solely focused on capturing spatial relations. This output is then fed into temporal and spatial transformer layers, followed by a regression layer.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Architecture of the Spatio-Temporal Adaptive Embedding transformer (STAEformer). &lt;d-cite key=&quot;liu2023staeformer&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;The second is the Spacetimeformer &lt;d-cite key=&quot;grigsby2023spacetimeformer&quot;&gt;&lt;/d-cite&gt;. Spacetimeformer uses embeddings generated from breaking down standard embeddings into elongated spatiotemporal sequences. The idea behind doing this is to enable the downstream tasks to learn direct relationships between variables at every timestep. In their architecture, these embeddings are fed into a variant of the transformer model using local, global, and cross self-attention. The figure below shows an intuitive visualization for this idea.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Architecture of the Spacetimeformer Embedding. &lt;d-cite key=&quot;grigsby2023spacetimeformer&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;h2 id=&quot;dataset&quot;&gt;Dataset&lt;/h2&gt; &lt;p&gt;We used the PEMS08 dataset &lt;d-cite key=&quot;pems08&quot;&gt;&lt;/d-cite&gt;, which contains traffic data in San Bernardino from July to August of 2016. Each data point consists of readings from 170 detectors along with the time of day and day of the week they were recorded. We initially considered using the PEMSBAY dataset &lt;d-cite key=&quot;pemsbay&quot;&gt;&lt;/d-cite&gt;, which is widely used in traffic speed forecasting, but it was almost double the size of the PEMS08 dataset and took too long to train our model on.&lt;/p&gt; &lt;h2 id=&quot;methodology&quot;&gt;Methodology&lt;/h2&gt; &lt;p&gt;The problem statement is as follows: given the sensor readings across the 170 sensors for the previous N timesteps, we want to predict their readings for the next N timesteps. We tested the model with varying context lengths, but we found that the default value of 12 given in the STAEformer paper provided enough information to the model. We used huber loss as we wanted the model to converge faster in the presence of outliers, which was necessary given the limited compute that we had (training 50 epochs took around 3 hours).&lt;/p&gt; &lt;p&gt;We trained STAEformer for 50 epochs, which was sufficient to achieve performance metrics similar to that of the paper. To compare the embeddings from Spacetimeformer, we retrained the model end to end after replacing the embedding layer in the model with Spacetimeformer’s embedding layer. To do this, we kept the context dimensions the same and flattened the input sequence along the input dimension and the dimension corresponding to the number of sensors. This structured the embedding layer so that it could learn the spatiotemporal relations across the sensors from different time frames.&lt;/p&gt; &lt;p&gt;Replacing the embedding layer within the STAEformer with a pretrained embedding layer from the Spacetimeformer instead may seem like a more legitimate method to test the effectiveness of the embeddings, as we would basically be doing transfer learning on the embedding layer. However, the pretrained embeddings from Spacetimeformer might have been optimized to capture specific spatiotemporal patterns unique to its architecture, which was why we believe training the model end to end with the Spacetimeformer embeddings would result in a more accurate and contextually relevant integration of the pretrained embeddings into the STAEformer framework.&lt;/p&gt; &lt;p&gt;After training, we wanted to provide visualizations of the embeddings from STAEformer and Spacetimeformer to show whether the learned embeddings are meaningful at all. To do this, we obtained the embeddings by passing in the raw data through the embedding layers of the loaded models and generated t-SNE plots with them. For the STAEformer, we focused solely on the adaptive embeddings as they were the parts of the embedding layer that captured spatiotemporal relations in the data. To generate the t-SNE plots, we had to reshape the embeddings so that they could be passed into the function so we flattened them across the model dimension. After fitting the t-SNE, we then unflattened the embeddings back to their original shape and plotted them. Each sensor was color coded with different colors, and the results can be shown in the next section. We hypothesized that the t-SNE plots would contain clusters grouped by either the sensors or the time the readings were recorded.&lt;/p&gt; &lt;p&gt;After generating the t-SNE plots, we wanted to test the effects of perturbing the raw data on the embeddings. We wanted to know how the embeddings would change. For example, regardless of what the clusters represent, are they tighter? Will additional clusters be formed? Conversely, will some of the existing clusters break apart? In particular, we were hoping that augmenting the data would perhaps improve cluster formations in the worse looking embeddings, as there is a good possibility that the data itself isn’t good enough.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;The table below shows the results after training STAEformer and the STAEformer model with a Spacetimeformer embedding layer for 50 epochs each. Table of loss values:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Embedding Layer&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Train Loss&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Validation Loss&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;STAEformer&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;12.21681&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;13.22100&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;Spacetimeformer&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;12.42218&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;16.85528&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;We can see that the STAEformer had better training and validation loss than the Spacetimeformer. While the train loss converged to similar values, the validation loss for the model using the STAEformer embedding layer was much better. So now that we know the STAEformer embedding layer seems to perform better than the Spacetimeformer embedding layer, we plotted the embeddings for both to analyze why this is the case. To do this, we passed a data point from the validation set through the embedding layer. The results are shown in the figure below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; t-SNE plot of the STAEformer embeddings. &lt;/div&gt; &lt;p&gt;The t-SNE plot for the STAEformer embeddings shows clearly separate clusters for most of the 170 different sensors. The shape of each cluster is a “snake-like” trajectory. Therefore, we know that the embeddings preserve some pattern-like notion across readings from a single sensor. We hypothesize that each of these trajectories represent the reading of a single sensor over time. There are a couple outliers, where the clusters are not grouped by color. One prominent example is the string of cyan, maroon, and moss points along the bottom of the plot. However, even these points have some clustering, though they may not be clustered by color.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; t-SNE plot of the Spacetimeformer embeddings. &lt;/div&gt; &lt;p&gt;On the other hand, the t-SNE plot for the Spacetimeformer embeddings show no clear clusters across the same sensor. The distribution resembles a normal distribution, meaning that there is little pattern preserved in the embeddings. It becomes more difficult to differentiate between data points from the same sensor across time.&lt;/p&gt; &lt;p&gt;In order to further analyze the effectiveness of each embedding layer, we perturbed the training data and re-trained each model. We were expecting the clusters from the STAEformer embeddings to remain largely the same, with some of the existing clusters possibly breaking apart due to the added noise. However, we were hoping that the Spacetimeformer embeddings would show more visible clusters after the raw data was perturbed. Given the characteristics of the embeddings, one possible output we expected were clusters containing multiple colors. An example is shown in the following image.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Ideal t-SNE plot of the Spacetimer embeddings. &lt;/div&gt; &lt;p&gt;This would show that the Spacetimeformer successfully learned spatial relationships across the sensors at variable timesteps. Instead of each cluster representing the embeddings for one sensor, the presence of larger clusters with multiple colors could imply that the Spacetimeformer learned spatiotemporal relations among the corresponding sensors and embedded them into a larger cluster.&lt;/p&gt; &lt;p&gt;The following table shows the results after training the model with the perturbed data.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;Embedding Layer&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Train Loss&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Validation Loss&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;STAEformer (with perturbations)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;13.58251&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;13.35917&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;Spacetimeformer (with perturbations)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;13.42251&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;17.01614&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;As expected, validation loss slightly increased for both models, and the STAEformer continued to have lower loss values than the model with the Spacetimeformer embedding layer.&lt;/p&gt; &lt;p&gt;When we generated the t-SNEplots with the new embeddings, we obtained the following:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; t-SNE plot of the perturbed STAEformer embeddings. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; t-SNE plot of the perturbed Spacetimeformer embeddings. &lt;/div&gt; &lt;p&gt;Both t-SNE plots for the STAEformer and Spacetimeformer embeddings look the same as when the models were trained on the original, unperturbed data. So unfortunately, the augmentation had little to no effect on the embedding layers for these two models.&lt;/p&gt; &lt;p&gt;Since the t-SNE plots can be hard to parse with the human eye, we decided to focus on the embeddings for the most relevant features of the dataset and see how they compared between the Spacetimeformer and STAEformer. In parallel, this would enable us to identify the failure modes of the dataset and augment those features to see if they improve the model performance. In order to do this, we used PCA to identify the principal components. From there, we found which features help explain the most variance in the dataset and identified those as the features that had the largest impact on the learned embeddings.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Z-normalized correlation matrix between the original PEMS08 dataset and PC-space, normalized by explained variance. &lt;d-cite key=&quot;pca&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;This heatmap shows the top 10 principal components and and the top 10 features that correlate with each principal component. From this heatmap, we can see that the 9th sensor in the dataset is the most relevant feature. Therefore, we can find the corresponding embedding to be the most relevant.&lt;/p&gt; &lt;p&gt;Using only the 5 most relevant embeddings obtained from PCA, we re-graphed the t-SNE plots. This helped us to narrow our attention to the most important embeddings.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; t-SNE plot of the top 5 STAEformer embeddings. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; t-SNE plot of the top 5 Spacetimeformer embeddings. &lt;/div&gt; &lt;p&gt;As expected, the embeddings for the most relevant sensors in the STAEformer all maintain the “snake-like” trajectory. However, the embeddings for even the most relevant sensors in the Spacetimeformer are seemingly random, and have no pattern across the points.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Cumulative PCA plot of the original PEMS08 dataset. &lt;d-cite key=&quot;pca&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;We found that the top 25 sensors explained 95% of the variance in the dataset, so we did a quick experiment where we augmented the rest of the 145 sensors (as opposed to the entire training dataset) to see how that affected the learned embeddings. For this augmentation, we expected the results to not improve by much since the learned embeddings for even the most relevant sensors in Spacetimeformer didn’t form visible clusters in the t-SNE plots. As expected, the results were almost identical to the ones generated from augmenting the entire dataset.&lt;/p&gt; &lt;h2 id=&quot;conclusion-discussion-next-steps&quot;&gt;Conclusion, Discussion, Next Steps&lt;/h2&gt; &lt;p&gt;There are a couple of reasons why we think the Spacetimeformer performed worse than the STAEformer overall. The first explanation that came to mind is that the readings across different sensors may be mostly independent from one another. The color coded t-SNE plots for the STAEformer clearly separate each sensor into its individual cluster. In this case, the Spacetimeformer would not be suited for the task as its embedding layer solely focuses on learning spatiotemporal relationships, while the STAEformer also contains an embedding layer that is solely dedicated to learning temporal relationships.&lt;/p&gt; &lt;p&gt;A second, more plausible explanation deals with the embedding architecture. The difference in performance between the STAEformer and the Spacetimeformer in time series forecasting shows the importance of adaptive embeddings in capturing spatio-temporal relationships. While the STAEformer introduces adaptive embeddings to comprehend the patterns in the data, the Spacetimeformer relies on breaking down standard embeddings into elongated spatiotemporal sequences. The t-SNE plots show that the STAEformer’s adaptive embeddings generate clusters representing sensors with snake-like trajectories, providing a visualization of the model’s ability to capture spatio-temporal patterns. In contrast, the Spacetimeformer’s embeddings follow a scattered distribution, indicating challenges in identifying clusters. This suggests that the Spacetimeformer’s approach may face limitations in effectively learning the spatio-temporal relationships within the PEMS08 dataset, and potentially traffic data in general.&lt;/p&gt; &lt;p&gt;Having said all this, the resilience of both the STAEformer and Spacetimeformer to perturbations in the raw data showcases the robustness of their learned representations. Despite the added augmentations, the fact that the t-SNE plots remain largely unchanged indicates the stability in the embedding layers. This may be attributed to the models’ ability to learn a generalizable representation of the spatio-temporal patterns resilient to changes in the input data, regardless of how accurate they may be. This may also be attributed due to the dataset itself. The PEMS08 dataset’s readings may already have been noisy, as it’s unlikely that the readings were recorded with perfect accuracy. We would like to explore these implications of the embeddings’ robustness in our future work.&lt;/p&gt; &lt;p&gt;Another possible avenue we would like to explore is why certain sensors (such as the 9th sensor) are more relevant than others beyond just the theory. We came up with a couple hypotheses. First, it’s probable that this particular sensor is placed at important intersections, such that cars that pass this sensor are guaranteed to pass many other sensors. This would mean that there exists a way to extrapolate the readings from this sensor to the readings from other sensors. Tangentially related, it’s possible that two nodes are negatively correlated, such that the cars that pass through one node tend to not pass through another node, and the model extracts readings based on this relationship. If neither of these ideas is the case, the exact opposite concept could be true: the sensor is at a location where the speed data is very consistent, such as a highway. This allows the readings from the sensor to give very accurate predictions. The next step would be to figure out the geographical locations of the sensors and determine whether the ones we found to be the most relevant seem to be placed at important locations.&lt;/p&gt; &lt;p&gt;We would also like to do some more experimentation in the future. We used a personal GPU for training (an RTX 2070), and it took a few hours to train the model for every one of our experiments which made it difficult to tune our hyperparameters. Further experiments we would like to run with more compute include running the experiments on the Spacetimeformer model architecture instead of the STAEformer architecture and replacing its embedding layer with STAEformer’s. We mentioned before that the learned embeddings may have been optimized for the model architecture it’s from. Therefore, if the resulting plots from the embeddings look similar to the ones we have generated, then we have conclusive evidence that the STAEformer input embedding does a better job of learning the spatio-temporal relations in the data.&lt;/p&gt; </content> </entry> <entry> <title>In the pursuit of cheap and robust word embeddings</title> <link href="https://deep-learning-mit.github.io/blog/2023/sentence-embeddings/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/sentence-embeddings</id> <content type="html">&lt;h2 id=&quot;introduction-and-motivation&quot;&gt;Introduction and Motivation&lt;/h2&gt; &lt;p&gt;Large Language Models (LLMs), such as Bard and OpenAI’s GPT-4 are typically used to obtain data embeddings of text. These embeddings are quite rich, encoding common-sense semantic information. A good embedding naturally aligns with our intuitive human understanding of language: at a high level, similar text/words are clustered together, while dissimilar text/words are farther apart.&lt;/p&gt; &lt;p&gt;High-quality embeddings also satisfy semantic equations that represent simple analogies. Define \((\text{some_text})\) to be the embedding of some string “some_text.” Then, a traditionally good embedding will typically obey linguistic equations like &lt;d-cite key=&quot;Vylomova2016&quot;&gt;&lt;/d-cite&gt;:&lt;/p&gt; \[(\text{King}) - (\text{Man}) + (\text{Woman}) ≈ (\text{Queen})\] \[(\text{Paris}) - (\text{France}) = (\text{Warsaw}) - (\text{Poland})\] &lt;p&gt;However, repeatedly querying LLMs for large-scale analysis is expensive. Many utilize thousands of cloud GPUs and are constantly fine-tuned, adding to their cost. This cost barrier discourages researchers—especially those with less funding—from making use of these embeddings for their own models. Repeated strain on LLM’s infrastructure can even cause a negative environmental impact. However, we often don’t need embeddings as good as these fancy ones to conduct certain types of research. Specifically, it would be desirable for a researcher to choose their embedding quality, with the understanding that higher-quality embeddings take longer, and vice versa. Such a model should be robust and resistant to being trained on a small amount of incorrect data (which can happen by accident when scraping tex, or due to malicious behavior.)&lt;/p&gt; &lt;p&gt;&lt;strong&gt;These issues motivate the following research question: on how little data can we train a text embedding model—with OpenAI embedding as ground truth—such that our embeddings are good enough quality? And can we quickly preprocess the data to improve our results?&lt;/strong&gt;&lt;/p&gt; &lt;h2 id=&quot;background-and-literature-review&quot;&gt;Background and Literature Review&lt;/h2&gt; &lt;p&gt;While there is some existing literature on generating word embeddings more “cheaply,” significant differences exist with current methodologies. Broadly, this process is called knowledge distillation (KD), which aims to “distill” knowledge from a larger teacher model (in our case, OpenAI embeddings) into a smaller student model.&lt;/p&gt; &lt;p&gt;For example, Shin et al. discuss a novel distillation technique that “distills” a “student” embedding model from a “teacher” model &lt;d-cite key=&quot;Shin2019&quot;&gt;&lt;/d-cite&gt;. Importantly, this work focuses on reducing the dimensionality of the “student” word embeddings without compromising accuracy—which is fundamentally different from our goals. While our OpenAI embeddings form the “teacher,” our student model should output embeddings of the same dimensionality. Our dimensionality preservation is useful if we want to directly translate general patterns or trends from our student’s embeddings to the OpenAI embeddings.&lt;/p&gt; &lt;p&gt;Gao et al. take a different approach. They propose a KD framework for contrastive sentence embeddings, DistilCSE. It works by first applying KD on a large amount of unlabeled text before fine-tuning the student model via contrastive learning on limited labeled data &lt;d-cite key=&quot;gao2023distilcse&quot;&gt;&lt;/d-cite&gt;. Contrastive learning in this domain is promising, especially since synonyms and pairs of similar words naturally form positive pairs. However, in our context, a direct application of contrastive learning presents some issues.&lt;/p&gt; &lt;p&gt;For example, suppose we had some Euclidean distance threshold A and B, such that, for any two word embeddings \(c\) and \(d\):&lt;/p&gt; &lt;p&gt;If the distance between \(c\) and \(d\) is less than A, then define \(c\) and \(d\) to be positive pairs for contrastive learning.&lt;/p&gt; &lt;p&gt;If the distance between \(c\) and \(d\) is greater than B, then define \(c\) and \(d\) to be negative pairs for contrastive learning.&lt;/p&gt; &lt;p&gt;While this process (and others like it) isn’t too resource-intensive, it has a few issues, even if we are able to define proper thresholds A and B. Firstly, it “wastes” pairs of data where the distance is in between A and B. Secondly, information about direction is easy to lose—so while a student would learn to embed similar words closer together and dissimilar ones further apart, the student may be invariant to direction and sensitive only to Euclidean distance in the n-dimensional space. This is not ideal.&lt;/p&gt; &lt;p&gt;Other related state-of-the-art approaches also present issues. Gao et al. describe another approach involving running data through an encoder multiple times with standard dropout to generate positive pairs instead of searching for them in the data itself &lt;d-cite key=&quot;Gao2022&quot;&gt;&lt;/d-cite&gt;. While this approach is promising, the approach involves repeatedly using somewhat slow pre-trained text embedders, which is exactly what we want to avoid.&lt;/p&gt; &lt;p&gt;To reduce computational complexity while still reaping the benefits of preprocessing, we look to a paper by Rahimi et al. They explain how removing stop words (common words, like “a,” “the,” etc.) and punctuation improves sentence embedding quality, for a variety of reasons &lt;d-cite key=&quot;Rahimi2023&quot;&gt;&lt;/d-cite&gt;. Even though we’re focusing on word embeddings—and not sentence embeddings—we can adapt this general approach to our project with lemmatization, which applies morphological analysis to words to simplify them to a base form. For example, “fighting” turns into “fight,” “was” turns into “be,” and “octopi” turns into octopus, and so on.&lt;/p&gt; &lt;p&gt;This reduces data sparsity by consolidating related forms of words into a single representation, which is especially helpful for low-frequency words. This in turn helps the model generalize across tenses and other variations as it can focus on the “core” differences of words rather than auxiliary modifiers. We thus plan to investigate lemmatization in this context.&lt;/p&gt; &lt;p&gt;We struggle to find closely related literature about student models’ resistance to poisoned data. Thus, we decided to investigate this aspect as well.&lt;/p&gt; &lt;p&gt;To conclude our literature review, while different variants of KD exist, we decide to focus on a modified response-based KD, in which the teacher model sends final predictions to the student network, which then directly mimics these predictions by minimizing some loss &lt;d-cite key=&quot;Yang2023&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Generic response-based knowledge distillation. Image credits: Gou, J., Yu, B., Maybank, S.J. et al. Knowledge Distillation: A Survey. Int J Comput Vis 129, 1789–1819 (2021). https://doi.org/10.1007/s11263-021-01453-z &lt;/div&gt; &lt;p&gt;Other distillation approaches—such as feature-based KD, relation-based KD, and the contrastive approach described above—do exist, but require more intimate knowledge of the teacher’s features and/or layers &lt;d-cite key=&quot;yang2023attention&quot;&gt; &lt;/d-cite&gt; &lt;d-cite key=&quot;Park2019&quot;&gt;&lt;/d-cite&gt;. This is not reasonable information we can expect to have, as companies are often incentivized to obfuscate their specific word embedding architectures.&lt;/p&gt; &lt;h2 id=&quot;methods-and-experiments&quot;&gt;Methods and Experiments&lt;/h2&gt; &lt;p&gt;We center our studies on a standard dataset of 10k English words scraped from high-level Standard English texts that’s been empirically validated for quality. We also use the OpenAI API to obtain text-embedding-ada-002 embeddings of the entire dataset to use as ground truth. While these aren’t necessarily the best embeddings, even among OpenAI’s own embeddings, they are the best choice given our computational restrictions.&lt;/p&gt; &lt;p&gt;Now, we detail our model architecture. Our baseline model (call this Model A) is a sequential ReLU and nn.Embedding layer followed by L2 normalization. Model A serves as a crude baseline—therefore, we do not investigate it as deeply as the more complex model that followed due to large differences in performance.&lt;/p&gt; &lt;p&gt;Instead, we focus our efforts on the more complex Model B, detailed below in Figure 1 in the context of our pipeline. Model B utilizes an nn.Embedding layer, followed sequentially by 2 blocks. The first uses ReLU activation followed by a linear layer of size \(\frac{\text{embedding_dim}}{2}\). The second layer is the same, except the final Linear layer outputs embeddings with the full “embedding_dim.” Notably, we use L2 normalization to make sure each embedding vector has magnitude 1 (such that all embeddings exist in an n-hypersphere.) Since all embeddings are unit embeddings, using cosine embedding loss along an Adam optimizer is natural. Thus, instead of computing cosine similarities between teacher and student vectors, we can just focus on minimizing this embedding loss.&lt;/p&gt; &lt;p&gt;For the training stage, we train our embedding model to map words to vector embeddings on Google Colab with an Nvidia T4 GPU. There may be up to 3 processing steps, as depicted in Figure 1:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 1: An overview of Model B’s architecture in context of our experimentation. &lt;/div&gt; &lt;p&gt;First, we choose whether or not to lemmatize the entire dataset before proceeding.&lt;/p&gt; &lt;p&gt;Second, the training split. We train our embedding models above on each of the following proportions (call this \(p\)) of the dataset: 0.005, 0.009, 0.016, 0.029, 0.053, 0.095, 0.171, 0.308, 0.555, and 1.00.&lt;/p&gt; &lt;p&gt;Finally, we choose whether or not to poison 10 percent of the entire word dataset (not the training dataset). When a word is poisoned, the model incorrectly believes that some random unit vector is the ground-truth embedding instead of the actual OpenAI embedding.&lt;/p&gt; &lt;p&gt;For each such model, we train for up to 80 epochs, limited by our computational resources.&lt;/p&gt; &lt;p&gt;We then evaluate the model’s embeddings against the ground truth with multiple metrics—cosine similarity (via the embedded cosine loss), graphically via distributions of the embedding means, linguistic math, etc.&lt;/p&gt; &lt;p&gt;Taken together, this methodology is comprehensive.&lt;/p&gt; &lt;h2 id=&quot;results-and-analysis&quot;&gt;Results and Analysis&lt;/h2&gt; &lt;h3 id=&quot;model-a-the-baseline&quot;&gt;Model A, the Baseline&lt;/h3&gt; &lt;p&gt;First, here is a graph of training up our baseline Model A (Figure 2) and our augmented Model B (Figure 3). The difference in epochs (80 for Model A, and 60 for Model B) training is due to limited resources. This doesn’t matter much, as a clear, near-linear relationship between \(p\) and training time, which we use to estimate used computational resources. Thus, we consider \(p\) as inversely proportional to the computational resources used for all our experiments.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 2 &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 3. &lt;/div&gt; &lt;p&gt;For Model A (with no lemmatization, no data poisoning), we also want to visualize the tradeoffs between the number of epochs trained, the training proportion \(p\), and the training loss to establish some baseline intuition. To this end, we take inspiration from the game theoretic concept of Pareto efficiency, which aims to find equilibria where no change improves one of these 3 factors without hurting one of the other 2.&lt;/p&gt; &lt;p&gt;We also wanted to visualize the tradeoffs between the number of epochs trained, the training proportion, and the cosine embedding loss, since we are motivated to find the optimal balance of these 3 factors. See Fig. 4.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 4. &lt;/div&gt; &lt;p&gt;Unfortunately, Fig. 4 is not particularly enlightening. Training loss decreases as the number of epochs increases and as training proportion \(p\) increases. There are also no local minima or maxima of interest. Figures 5 and 6 also confirm this with their plots of distributions of embedding means. Specifically, as we tend to move towards the right and bottom of Fig. 6, i.e. we train longer and on more data, we simply seem to approach the true distribution (Fig. 5) without anything of note.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 5. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 6. &lt;/div&gt; &lt;p&gt;These results motivate us to look beyond our Model A. Our results from this point focus on Model B because we didn’t want a poorly performing model like Model A to be a true control, it merely served as an intuitive baseline.&lt;/p&gt; &lt;h3 id=&quot;model-b-the-baseline&quot;&gt;Model B, the Baseline&lt;/h3&gt; &lt;p&gt;As in the previous part, we obtain a Pareto-like graph for Model B, without any lemmatization and data poisoning. Firstly, the cosine embedding losses are much lower than before, due to the improved model architecture. More interestingly, after about 10 iterations, the training loss seems to stabilize across all versions of the model, potentially suggesting that training longer may not be worthwhile.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 7. &lt;/div&gt; &lt;p&gt;Since this is our base model, we don’t investigate further.&lt;/p&gt; &lt;h3 id=&quot;model-b-lemmatization-no-poisoned-data&quot;&gt;Model B, Lemmatization, No Poisoned Data&lt;/h3&gt; &lt;p&gt;Now, we look to Model B, with lemmatization, but no poisoned data. The Pareto-like curve for this is telling (Fig. 8), with it looking very similar to the baseline Model B’s. As before, this suggests that training for longer may not be worthwhile, and could potentially lead to overfitting.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 8. &lt;/div&gt; &lt;p&gt;We also have a distribution of the means of embeddings for the whole dataset (Fig. 9) and from each variant of the model at different epochs (Fig. 10). Again, the results don’t say anything surprising: as we train on more data for longer, the distribution approaches that of the training dataset.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 9. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 10. &lt;/div&gt; &lt;p&gt;To check for overfitting, we will later validate our model on simple linguistic tests, as described in the very beginning. Specifically, we will validate our model’s performance on linguistic math against OpenAI’s performance.&lt;/p&gt; &lt;h3 id=&quot;model-b-lemmatization-poisoned-data&quot;&gt;Model B, Lemmatization, Poisoned Data&lt;/h3&gt; &lt;p&gt;The following is the Pareto-like curve, except now we poison 10 percent of the entire dataset, as described in Methods/Experiments. Curiously, we find a local minima at approximately \(p = 0.1\) and ~20 epochs, demonstrating that our overall approach of training on a small fraction of the dataset naturally resists moderate-scale adversarial attacks on our ground-truth embeddings. Of course, the addition of poisoned data means that the loss values are on average higher than those in the previous subsection, where there was no poisoned data.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 11. &lt;/div&gt; &lt;p&gt;Again, looking at the distribution of the means of embeddings (see below), we see that models that trained on too much of the data are completely ruined. We don’t even need to compare these distributions against the whole-model distribution to see this. This result demonstrates that even a relatively small amount of poisoned data can manipulate a naive embedding model trained on an entire dataset.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 12. &lt;/div&gt; &lt;h3 id=&quot;the-effects-of-data-poisoning-and-surprising-robustness&quot;&gt;The Effects of Data Poisoning and Surprising Robustness&lt;/h3&gt; &lt;p&gt;As discussed previously, we want to externally validate our models with both linguistic equations and pairs of synonyms. Essentially, we want to check that our student groups together similar words like the OpenAI teacher. Since our poisoned model performed best with \(p = 0.095,\) we use this training proportion to compare Model B with lemmatization, but no poisoned data to Model B with lemmatization and poisoned data.&lt;/p&gt; &lt;p&gt;For clarity’s sake, we focus on single a representative example of our validation results in this blog. Specifically, we look into “nonviolent” and “antiaggressive,” which intuitively should exist close together in the n-dimensional unit hypersphere. Using dimensionality reduction techniques to visualize this in 2D, we obtain the following:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Fig. 13. &lt;/div&gt; &lt;p&gt;The poisoned model is surprisingly performant, performing decently against both the unpoisoned model and the OpenAI model. These results support our notion that student models that train on as little of the data as possible are somewhat resistant to uniform, random adversarial data poisoning. This empirical result is encouraging, especially since our data poisoning threshold was somewhat high.&lt;/p&gt; &lt;h2 id=&quot;conclusion-discussions-and-future-directions&quot;&gt;Conclusion, Discussions, and Future Directions&lt;/h2&gt; &lt;p&gt;On balance, our results help us answer our question about how to best mimic OpenAI’s word embeddings without excessive API calls. We utilize a spin-off of a response-based KD architecture to train our student model under different conditions, demonstrating both that certain preprocessing (lemmatization) improves our embedding model and that training on smaller amounts of data creates more robust models that resist adversarial data. Our initial results demonstrate promise and serve as a call to action for others to research other cheap, robust word embedding models.&lt;/p&gt; &lt;p&gt;To be clear, there are certainly many limitations to our study. For one, we keep our modeling architecture simpler due to our limited compute, while a real model would certainly use a different architecture altogether. Our dataset was also on the smaller side and doesn’t fully represent the English language. Also, our implicit use of time as a proxy for computation (especially on the erratic Google Colab) is imperfect. Also, preprocessing (including, but not limited to, lemmatization) may require substantial computational resources in some cases, which we don’t account for.&lt;/p&gt; &lt;p&gt;Additionally, many of the constants that we chose (such as the 10 percent data poisoning threshold, the proportions of data we trained on, etc.) are arbitrarily chosen due to limited compute. This could’ve caused unexpected issues. For example, the output dimension of embedding Model B, 1536, is more than 10 percent the size of the dataset (10k). Thus, due to our relative lack of data, our trials with data poisoning can encourage non-generalizable memorization, which is not ideal.&lt;/p&gt; &lt;p&gt;Future directions would include exploring other types of preprocessing, as hinted at in our literature review. We could also look into different types of adversaries—perhaps smarter ones that actively feed information that they know to be detrimental to the model, instead of some random unit vector. While we didn’t have robust supercomputer access, we’d also love to be able to test out fancier embedding architectures.&lt;/p&gt; &lt;p&gt;Finally, we’d like to thank the 6.S898 faculty and TAs for their support!&lt;/p&gt; </content> </entry> <entry> <title>Leveraging Representation Engineering For LLM’s In-Context-Learning</title> <link href="https://deep-learning-mit.github.io/blog/2023/representationengineering-incontextlearning/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/representationengineering-incontextlearning</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;Emerging capabilities in deep neural networks are not well understood, one of which is the concept of “in-context learning” (ICL), a phenomenon where the a Large Language Model (LLM)’s understanding of the prompt and ability to answer accordingly drastically increases after being shown some examples that answer the question. Evaluating in-context learning and understanding why the behavior happens is both an interesting theoretical research question and a practical question that informs directions to conduct research that further advances LLM capabilities by, say, exploiting more of in-context learning.&lt;/p&gt; &lt;p&gt;We attempt to explore the phenomenon of in-context learning by leveraging another exciting field of work on mechanistic interpretability where researchers set out to understand model behaviors by interpreting and editing internal weights in models. One such work that we base on is Representation Engineering by Zou et al. (2023)&lt;d-cite key=&quot;zou2023representation&quot;&gt;&lt;/d-cite&gt; , where they construct a set of training text stimuli to probe LLM activations and use such stimuli to identify a direction that accurately predicts the underlying concept based on the neural activations of the model. This approach allows us to elicit readings of representation and control such representation.&lt;/p&gt; &lt;p&gt;We propose to use methods in Zou et al. (2023) &lt;d-cite key=&quot;zou2023representation&quot;&gt;&lt;/d-cite&gt; to evaluate in-context learning. There has not been previous attempts to examine the model internals holistically in a LLM while it is performing in-context learning. We expose such neural activations by constructing stimulus through artificial examples of in-context learning on binary classication tasks. We find a reading vector that shows high neural activity after the model is stimulated with the context pairs; such a “Context Vector” indicates the context the models draws from. While we hoped to find certain universal mechanisms across different datasets, we find that the Context Vector is dataset-specific and confirm previous hypotheses that in-context learning retrieves information from different parts of the model’s latent space.&lt;/p&gt; &lt;p&gt;We then explore the results of controlling the activations along the “Context Vector” direction, in the hope that editing the activitions would further boost the performance on top of in-context learning. We compare the model outputs on the classification datasets in a zero-shot setting and a setting of natural in-context learning, with the “Context Vector” amplified, and suppressed. While we find boosting performance through such editing to be challenging and sometimes finicky to tune, we find the results to be promising on editing weights to suppress the context that the model draws from and drastically reducing the performance.&lt;/p&gt; &lt;h1 id=&quot;background--related-work&quot;&gt;Background &amp;amp; Related Work&lt;/h1&gt; &lt;h3 id=&quot;in-context-learning-icl&quot;&gt;In-Context Learning (ICL)&lt;/h3&gt; &lt;p&gt;An LLM is frequently aseked to perform a task in inference time that many realized providing some examples of how to answer the task can drastically improve the model’s performance. This phenomenon is called in-context learning. For example, Zhou et al. (2022) &lt;d-cite key=&quot;zhou2022teaching&quot;&gt;&lt;/d-cite&gt; evaluates how LLM can become better at solving algorithmic problems through in-context learning, a task that LLM traditionally struggles at.&lt;/p&gt; &lt;p&gt;In other scenarios, the LLM does not need to rely on prompts at all and can deduce the pattern from the few-shot examples alone to predict the answer. While there is no universal definition of in-context learning and its meaning has shifted over time, we define it as the performance boost to answer questions based on a limited amount of examples (as the context).&lt;/p&gt; &lt;p&gt;Interesting, Min et al. (2022) &lt;d-cite key=&quot;min2022rethinking&quot;&gt;&lt;/d-cite&gt; observes that such ICL phenonemon is observed as long as examples are given, and a mismatch between input and output pairs would not hinder the ability of models performing ICL and thus its performance on the tasks. Wei et al. (2023) &lt;d-cite key=&quot;wei2023larger&quot;&gt;&lt;/d-cite&gt; further corrobates this work by finding on small models but show that as models scale, the ability to pick up on flipped patterns when given in-context examples with flipped labels and override semantic priors is stronger.&lt;/p&gt; &lt;h3 id=&quot;theories-on-why-icl-happens&quot;&gt;Theories on why ICL happens&lt;/h3&gt; &lt;p&gt;While the concept of ICL is well studied, the underlying mechanism of ICL is not well understood. Xie et al. (2022) &lt;d-cite key=&quot;xie2022explanation&quot;&gt;&lt;/d-cite&gt; explains the phenomenon of ICL as an Implicit Bayesian Inference, where the in-context learning prompt serves as a stimulus for the model to go “locate” corresponding concept stored in the model’s latent space that the LM has learned implicitly during pre-training. They study this by generating a simple pretraining distribution that parameterizes the transition of a Hidden Markov Model (HMM) and another prompting distribution. In this setting, the authors reduce the ICL task to Bayesian inference to map the prompting distribution to the pretraining distribution.&lt;/p&gt; &lt;p&gt;Akyürek et al. (2022) &lt;d-cite key=&quot;akyürek2023learning&quot;&gt;&lt;/d-cite&gt; further explains that Transformer-based in-context learners implement standard learning algorithms implicitly by encoding smaller models modularized to perform each specific tasks and update them based on the new in-context exampless. von Oswald et al. (2023) &lt;d-cite key=&quot;vonoswald2023transformers&quot;&gt;&lt;/d-cite&gt;claims that Transformer-based in-context learners is similar to gradient-based meta-learning formulations where they found that the Transformer can learn smaller models of a certain concept by gradient descent in their forward pass.&lt;/p&gt; &lt;p&gt;Furthermore, Olsson et al. (2022) &lt;d-cite key=&quot;olsson2022context&quot;&gt;&lt;/d-cite&gt; draws parallel from ICL to a more understood phenomenon of Induction Head, where attention-only Transformers picks up on the algorithm to predict next tokens by searching for a previous occurance of the last token and copying the same next token from previous occurences. They claim that this can be a potential mechanism to explain ICL.&lt;/p&gt; &lt;p&gt;While many hypotheses and theories have been proposed to explain ICL, most explorations to prove their theory has been small in scale, and the literature lacks a study on the large-scale LMs’ internals when performing ICL.&lt;/p&gt; &lt;h3 id=&quot;model-editing--representation-engineering&quot;&gt;Model Editing &amp;amp; Representation Engineering&lt;/h3&gt; &lt;p&gt;We’ll use the Representation reading and controls methods presented in &lt;a href=&quot;https://arxiv.org/pdf/2310.01405.pdf&quot;&gt;Zou et al. (2023)&lt;/a&gt; to understand the context where the model attends to and discover directions that indicate such reasoning.&lt;/p&gt; &lt;p&gt;Relatedly, there have been a recent surge in research related to model knowledge editing, including Meng et al. (2023) &lt;d-cite key=&quot;meng2023massediting&quot;&gt;&lt;/d-cite&gt;, Zhong et al. (2023) &lt;d-cite key=&quot;zhong2023mquake&quot;&gt;&lt;/d-cite&gt;, and Hernandez et al. (2023) &lt;d-cite key=&quot;hernandez2023inspecting&quot;&gt;&lt;/d-cite&gt; that demonstrate different methods for locating and editing factual associations. Other work, including Shao et al. (2023) &lt;d-cite key=&quot;shao2023gold&quot;&gt;&lt;/d-cite&gt; and Belrose et al. (2023) &lt;d-cite key=&quot;belrose2023leace&quot;&gt;&lt;/d-cite&gt;, have shown results on erasing larger-scale memory units such as concepts. Li et al. (2023) &lt;d-cite key=&quot;li2023inferencetime&quot;&gt;&lt;/d-cite&gt; applies such concept erasion techniques by conducting Inference Time Interference, where one can find a direction of causal influence on “truthfulness” data and increase the activations along that direction to increase truthfulness, scoring better on the TruthfulQA dataset.&lt;/p&gt; &lt;h1 id=&quot;experiment-setup&quot;&gt;Experiment Setup&lt;/h1&gt; &lt;h3 id=&quot;datasets&quot;&gt;Datasets&lt;/h3&gt; &lt;p&gt;We adopt a total of 30 datasets on binary classification, (sentiment analysis, natural language inference, true/false inference) and multiple choices; 16 datasets are used by Min et al. (2022) &lt;d-cite key=&quot;min2022rethinking&quot;&gt;&lt;/d-cite&gt;, plus 12 extra datasets in the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;tweet_eval&lt;/code&gt; and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ethos&lt;/code&gt; dataset families, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;rotten_tomatoes&lt;/code&gt;, and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ade_corpus_v2-classification&lt;/code&gt;. Following Min et al. (2022)&lt;d-cite key=&quot;min2022rethinking&quot;&gt;&lt;/d-cite&gt;, we only use the test set to avoid potential cross-contamination with the data that the model is pretrained on. reserve &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;k=64&lt;/code&gt; examples in the test for few-shot training, and the rest are used for testing.&lt;/p&gt; &lt;h3 id=&quot;training-data-generation&quot;&gt;Training Data Generation&lt;/h3&gt; &lt;p&gt;For training, we construct a set of context pairs for each dataset, each context pairs containing the same examples but different instructions. The instructions are “Pay attention to the following examples” and “Ignore the following examples” respectively, in the hope that by stimulating two opposites and examining the difference, we can find a Context Vector that represents what the model draws from. We then truncate the example at each and every token till the last 5 tokens, so we can get a neural activation reading for each of the tokens.&lt;/p&gt; &lt;p&gt;A sample training data input using the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;rotten_tomatoes&lt;/code&gt; dataset is as follows:&lt;/p&gt; &lt;blockquote&gt; &lt;p&gt;[INST] Pay attention to the following examples: [/INST]&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;offers that rare combination of entertainment and education.&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;positive.&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;a sentimental mess that never rings true .&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;negative.&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;[INST] Ignore the following examples: [/INST]&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;offers that rare combination of entertainment and education.&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;positive.&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;a sentimental mess that never rings true .&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;negative.&lt;/p&gt; &lt;/blockquote&gt; &lt;p&gt;Each context pair is identical except for the instructions. We use the context pairs to stimulate the model to learn the context and use the context vector to control the model’s behavior.&lt;/p&gt; &lt;h3 id=&quot;testing-data-generation&quot;&gt;Testing Data Generation&lt;/h3&gt; &lt;p&gt;For testing data, we use 3 input-labels pairs as the prompt, with the first two pairs serving as the in-context examples, and the last pair serving as the question that we actually want to test on, obfuscating the label from the prompt.&lt;/p&gt; &lt;p&gt;A sample testing data input using the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;rotten_tomatoes&lt;/code&gt; dataset is as follows:&lt;/p&gt; &lt;p&gt;Input:&lt;/p&gt; &lt;blockquote&gt; &lt;p&gt;[INST] offers that rare combination of entertainment and education. [/INST]&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;positive.&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;[INST] a sentimental mess that never rings true . [/INST]&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;negative.&lt;/p&gt; &lt;/blockquote&gt; &lt;blockquote&gt; &lt;p&gt;an odd , haphazard , and inconsequential romantic comedy .&lt;/p&gt; &lt;/blockquote&gt; &lt;p&gt;Label:&lt;/p&gt; &lt;blockquote&gt; &lt;p&gt;negative.&lt;/p&gt; &lt;/blockquote&gt; &lt;h3 id=&quot;model&quot;&gt;Model&lt;/h3&gt; &lt;p&gt;We have explored using two models with 7 billion parameters, including &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;Mistral-7B-Instruct-v0.&lt;/code&gt; and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;Llama-2-7b-hf&lt;/code&gt;; while we have found preliminary results consistent between the two models, all of our results later reported are from &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;Mistral-7B-Instruct-v0&lt;/code&gt; for consistency and due to a constraint on computational power and time.&lt;/p&gt; &lt;h3 id=&quot;training-infrastructure&quot;&gt;Training Infrastructure&lt;/h3&gt; &lt;p&gt;We used the MIT Supercloud infrastructure and a local machine with a single RTX 4090 GPU to train the model.&lt;/p&gt; &lt;h1 id=&quot;results&quot;&gt;Results&lt;/h1&gt; &lt;p&gt;We present results first on finding the Context Vector in the embedding space, then on using the Context Vector to control model outputs and evaluate their performance.&lt;/p&gt; &lt;h3 id=&quot;representation-reading&quot;&gt;Representation Reading&lt;/h3&gt; &lt;p&gt;We use the Representation Reading method presented in Zou et al. (2023) &lt;d-cite key=&quot;zou2023representation&quot;&gt;&lt;/d-cite&gt; to find the Context Vector. Specifically, we adopted the setup of the instruction response pairs where for a given function $&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;f&lt;/code&gt;$ and pairs of instructions $&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;x_i&lt;/code&gt;$ and $&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;y_i&lt;/code&gt;$, we denote the model’s response truncated at the $&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;j&lt;/code&gt;$-th token as $&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;f(x_i)_j&lt;/code&gt;$ and $&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;f(y_i)_j&lt;/code&gt;$ and take the neuron activity at the last token of each of the responses, namely the activations of each and every token in the response.&lt;/p&gt; &lt;p&gt;We then perform PCA on the difference of the activations of the two instructions, namely $&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;f(x_i)_j - f(y_i)_j&lt;/code&gt;$ and find the first principal component $&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;v&lt;/code&gt;$ that maximizes the difference in the embedding space.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Graph plotting the correlation between the Context Vector sign and actual dataset label on Rotten Tomatoes dataset. The x-axis is the layer and the y-axis is the correlation. &lt;/div&gt; &lt;p&gt;More surprisingly is the fact that we can find a clean representation of such Context Vector that correlates decently with the model inputs.&lt;/p&gt; &lt;p&gt;We use t-SNE to visualize the difference in the embedding space on the inputs of the 30 datasets across 32 different layers and report the results below.&lt;/p&gt; &lt;!-- &lt;figure&gt; &lt;img src=&quot;assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data.png&quot; class=&quot;img-fluid&quot; alt=&quot;Description of the first image&quot;&gt; &lt;figcaption&gt;Caption for the first image&lt;/figcaption&gt; &lt;/figure&gt; --&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; t-SNE plot of the embedding space of the Context Vectors across the 30 datasets and 32 layers, color coded by dataset. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; t-SNE plot of the embedding space of the Context Vectors across the 30 datasets and 32 layers, color coded by layers. &lt;/div&gt; &lt;p&gt;As shown in the figure, we find that the vectors are clustered by dataset, indicating that the Context Vectors are dataset-specific. There are no clear patterns across dataset or between different layers of the Context Vectors, further indicating that in-context learning activates different parts of the model’s latent space with information about different types of tasks.&lt;/p&gt; &lt;p&gt;We also conducted scans for neuron activities in the Context Vector across the different tokens of an example sequence in a similar style as Zou et al. (2023) &lt;d-cite key=&quot;zou2023representation&quot;&gt;&lt;/d-cite&gt;, for which the previous work has referred to as Linear Artificial Tomography (LAT) scans.&lt;/p&gt; &lt;p&gt;The following are the LAT scans for the neuron activities corresponding to a Context Vector trained on &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;rotten_tomatoes&lt;/code&gt; sentiment analysis dataset evaluated on different dataset sequences. The following graphs further corroborate the findings above on the dataset-specificity of in-context learning; while the a sequence from the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;rotton_tomatoes&lt;/code&gt; dataset result in high neural activities for the Context Vector, most sequences from the other dataset do not, showing the uniqueness of such Context Vector. We have also observed most of the neuron activities in the later layers. This phenomenon makes sense since more abstract concepts and semantic structures formulate in later layers, thus being more correlated with the Context Vector, while earlier layers pick up more on token-level abstractions.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `rotten_tomatoes` sequence. The x-axis is the token index, and the y-axis is the Layer number. More red indicates higher neural activities, and more blue indicates lower neural activities. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `medical_questions_pair` sequence. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `ethos-religion` sequence. &lt;/div&gt; &lt;p&gt;We have also produced graphs that zoom into the token-level neural activities detection on the Context Vector of the opposing pair (Pay attention &amp;amp; Don’t pay attention), shown below. A large difference in the neural activities of the two instructions is denoted by red and indicates that the ablation is effective, while the green shades indicate that there are similar in neural activities. The results show that the neural activities are consistently different across the sequence until the model starts generating next tokens and the context ends where the neural activities are similar.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; A token-level LAT scan that compares the difference between the neural activities of the Context Vector of the opposing pair (Pay attention &amp;amp; Don&apos;t pay attention) on the `rotten_tomatoes` dataset. &lt;/div&gt; &lt;h2 id=&quot;representation-control&quot;&gt;Representation Control&lt;/h2&gt; &lt;p&gt;To change an activation along some direction, we can imagine there are several canonical ways. First, given our Context Vector $v$ and an activation $a$, we can do one of the following.&lt;/p&gt; &lt;h3 id=&quot;addition&quot;&gt;Addition&lt;/h3&gt; \[a&apos; = a + v\] &lt;h3 id=&quot;amplification&quot;&gt;Amplification&lt;/h3&gt; \[a&apos; = a + \text{sign}(a \cdot v) v\] &lt;h3 id=&quot;projection&quot;&gt;Projection&lt;/h3&gt; \[a&apos; = a - (a \cdot v) \cdot \frac{v}{||v||^2}\] &lt;p&gt;The first represents a constant perturbation so it supposedly transforms the representation to become more of a certain quality. The second amplifies the direction according to which side it is on, so it makes the representation more extreme. The third removes the quality from the representation by subtracting the projection.&lt;/p&gt; &lt;p&gt;We explore all these methods to control Mistral-7b-instruct. We do our experiments on the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;rotten_tomato&lt;/code&gt;, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;sick&lt;/code&gt;, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;hate_speech18&lt;/code&gt;, and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;glue-wnli&lt;/code&gt; in-context-learning datasets consisting of input-output pairings where outputs have two possible correct options – positive or negative contradiction or entailment, hate or noHate, and entailment or not_entailment (for &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;sick&lt;/code&gt;, it originally contains a third option of &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;neutral&lt;/code&gt; which we remove since our framework requires two classes).&lt;/p&gt; &lt;p&gt;Given learned representations with the same configuration as our representation reading, we construct a test set from the same dataset as training. The test set has $16$ examples, each with one demonstration followed by a question. We evaluate correctness by having the LLM generate $10$ tokens and checking if the correct answer is contained in the output and the incorrect answer is not contained in the output, without being sensitive to case. This ensures correct evaluation so that an answer of no_entailment does not evaluate as correct for having entailment inside of it if entailment is the right answer.&lt;/p&gt; &lt;p&gt;A hyperparameter which we denote $\alpha$ scales the size of $v$. If our Context Vector is $r$, sign value is $s$, then we have $v = \alpha \cdot r \cdot s$. We vary $\alpha \in { 0, 0.25, 0.5, 1, 2, 5, 10}$, and also take the negative of $\alpha$, which we label as positive and negative respectively.&lt;/p&gt; &lt;h3 id=&quot;results-for-control-with-addition&quot;&gt;Results for Control with Addition&lt;/h3&gt; &lt;p&gt;For rotten tomatoes, we see the expected performance gap of positive over negative, though positive does worse than no control. Moreover, we see in glue-wnli and sick, the negative control actually does better than positive control. In hate_speech18, we see the desired result.&lt;/p&gt; &lt;p&gt;Despite modifying the layers that we controlled, based upon observing the layers at which the Context Vectors had the most correlation to the trained concept, we cannot find a set of layers to control that works &lt;strong&gt;consistently&lt;/strong&gt; across all four datasets, though we can find layers that work for one dataset.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `rotten_tomatoes` dataset with amplification or suppression of the Context Vector using Addition. The x-axis is the coefficient of amplification, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Addition&lt;/bold&gt;. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `hate_spe` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Addition&lt;/bold&gt;. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Addition&lt;/bold&gt;. &lt;/div&gt; &lt;h3 id=&quot;results-for-control-with-amplification&quot;&gt;Results for Control with Amplification&lt;/h3&gt; &lt;p&gt;Note the result depends on the absolute value of $\alpha$ so the positive and negative graphs converge. The affect of amplification is quite smooth relative to addition in the sense that there is a consistent downward trend in performance for both amplification and suppression. This could be because amplification amplifies existing signals and this gets stronger as $\alpha$ increases.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Amplification&lt;/bold&gt;. The x-axis is the alpha value, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Amplification&lt;/bold&gt;. The x-axis is the alpha value, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Amplification&lt;/bold&gt;. The x-axis is the alpha value, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Amplification&lt;/bold&gt;. The x-axis is the alpha value, and the y-axis is the accuracy. &lt;/div&gt; &lt;h3 id=&quot;results-for-control-with-projection&quot;&gt;Results for Control with Projection&lt;/h3&gt; &lt;p&gt;We can see that projection consistently decreases performance, which is expected as we can imagine projection as erasing the idea that the model needs to pay attention to these examples. Having positive or negative sign of $\alpha$ does not affect projection.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Projection&lt;/bold&gt;. The x-axis is the alpha value, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Projection&lt;/bold&gt;. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Projection&lt;/bold&gt;. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using &lt;bold&gt;Projection&lt;/bold&gt;. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. &lt;/div&gt; &lt;h3 id=&quot;ablation-studies&quot;&gt;Ablation Studies&lt;/h3&gt; &lt;p&gt;A key question is whether the Context Vectors are truly special. Especially because much of our results do not work, we would like to assess the “noise level.” By sampling a random unit vector from $4096$-dimensional space, the hidden dimension of &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;Mistral-7b-instruct&lt;/code&gt;, for each layer and using that for control, we get the following results.&lt;/p&gt; &lt;p&gt;If we take the negative of all the Context Vectors, the graphs for positive and negative $\alpha$’s would switch. The fact that in our random sample we see such a large gap in the Glue-wnli graph indicates that there is quite a lot of noise. Moreover, if we take the negative of our particular randomly sampled vector, we obtain a Context Vector for Glue-wnli that is &lt;strong&gt;extremely good&lt;/strong&gt; at controlling in-context-learning. The large landscape of $4096$-dimensional space is an exciting mystery.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of a random vector using &lt;bold&gt;Addition&lt;/bold&gt;. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of a random vector using &lt;bold&gt;Addition&lt;/bold&gt;. The x-axis is the alpha value, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of a random vector using &lt;bold&gt;Addition&lt;/bold&gt;. The x-axis is the alpha value, and the y-axis is the accuracy. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of a random vector using &lt;bold&gt;Addition&lt;/bold&gt;. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. &lt;/div&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;p&gt;While we understand our work is limited due to time and compute constraints and did not achieve the results we hoped for, we tried our best to explore this research direction of finding a Context Vector that corresponds to the in-context learning behaviors and experiments of using it to control model outputs.&lt;/p&gt; &lt;h2 id=&quot;implications&quot;&gt;Implications&lt;/h2&gt; &lt;p&gt;If successful, this research direction could be a powerful tool to understand mechanistically why in-context learning emerges and potentially use model editing to achieve better State-of-the-Art results on LLMs in specific benchmark evaluation scenarios with model editing. Even with our current results that demonstrate more success in suppressing the Context Vector than amplifying it, i.e. suppressing such behaviors than boosting it, this can have implications on works that try to perform model unlearning and impact the robustness of LLMs.&lt;/p&gt; &lt;h2 id=&quot;future-work&quot;&gt;Future Work&lt;/h2&gt; &lt;p&gt;Through ablating with the random vector in the embedding space, it is unfortunate that controlling for the particular Context Vector we found is not particularly different from other vectors, despite it showing some promises on suppressing the results. We hope to run further ablation studies to confirm that suppressing the Context Vector is only suppressing the in-context learning behaviors of the specific behaviors and does not have other side effects.&lt;/p&gt; &lt;p&gt;Regarding our current setup of the contrasting prompts of telling the model to pay attention or not pay attention to the concept, we can further explore the space of contrasting prompts. Directly related to our work, we would also like to explore the other type of experiment setup in Zou et al. (2023)&lt;d-cite key=&quot;zou2023representation&quot;&gt;&lt;/d-cite&gt;; unlike the data pair setup where we ask the model to pay attention to the examples or ignore them, we can ask the model to “think hard about the context/structure of the question” and elicit neural activities that way.&lt;/p&gt; &lt;p&gt;We are also interested in exploring vectors that control step-by-step reasoning and in general, intelligence. The phrases “Let’s think step by step” &lt;d-cite key=&quot;kojima2023stepbystep&quot;&gt;&lt;/d-cite&gt; or “Take a deep breath and work on this problem step-by-step” &lt;d-cite key=&quot;yang2023deepbreath&quot;&gt;&lt;/d-cite&gt; are powerful phrases that elicit chain-of-thought reasoning and improve model performance. Could we engineer activation transformations that improve these models’ performance even more than and without the need for prompting?&lt;/p&gt; </content> </entry> <entry> <title>Reasoning with Maps: Assessing Spatial Comprehension on Maps in Pre-trained Models</title> <link href="https://deep-learning-mit.github.io/blog/2023/mapreason/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/mapreason</id> <content type="html">&lt;h1 id=&quot;motivation&quot;&gt;Motivation&lt;/h1&gt; &lt;p&gt;Humans possess a remarkable ability to intuitively understand and make sense of maps, demonstrating a fundamental capacity for spatial reasoning, even without specific domain knowledge. To illustrate this, consider the following question: Do these two maps represent the same location?&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/85_301-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/85_301-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/85_301-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/85_301.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/85_302-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/85_302-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/85_302-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/85_302.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Answering this query necessitates &lt;strong&gt;coregistration&lt;/strong&gt;, the ability to align two maps by overlaying their significant landmarks or key features. Moreover, humans can go beyond mere alignment; they can tackle complex inquiries that demand aligning maps, extracting pertinent data from each, and integrating this information to provide answers.&lt;/p&gt; &lt;p&gt;Maps reasoning is a fundamental skill with important applications in domains such as navigation and geographic analysis. For example, pilots need to be able to reference and understand multiple kinds of FAA charts as a core prerequisite for many aviation-related tasks. Further, making inferences on historical maps that lack digitized versions relies on human capabilities for reasoning on maps and is crucial for various fields such as geology or archeology. Machine learning models that can match human visual map understanding hold substantial promise in these applications. Additionally, such models have the potential to enhance accessibility by providing alternative modalities for individuals with visual impairments to comprehend and extract spatial information from maps.&lt;/p&gt; &lt;p&gt;Our work aims to tackle the following question: To what degree do contemporary state-of-the-art (SOTA) machine learning models, pre-trained on vast datasets comprising millions or even billions of images, possess the capacity for spatial reasoning and do they reach the human level? We will do this specifically by focusing on the task of coregistration.&lt;/p&gt; &lt;p&gt;We propose a map reasoning dataset which we believe is a suitable initial benchmark to test the capabilities of multimodal models on coregistration; The example given above about coregistration possibly cannot be answered directly using prior knowledge a Large Language Model (LLM) might have while ignoring the vision modality. Moreover, the complexity of the task can be increased and controlled, leading to a rigorous evaluation of the model’s ability to comprehend and synthesize information across textual and visual modalities.&lt;/p&gt; &lt;h2 id=&quot;literature-review-and-the-gap-in-previous-literature&quot;&gt;Literature review and the gap in previous literature&lt;/h2&gt; &lt;p&gt;&lt;strong&gt;Multimodality:&lt;/strong&gt; There are countless significant recent advances in Large Language Models (LLMs) achieved by models such as Meta’s Llama 2&lt;d-cite key=&quot;llama&quot;&gt;&lt;/d-cite&gt;, OpenAI’s GPT 3&lt;d-cite key=&quot;gpt3&quot;&gt;&lt;/d-cite&gt;, Google’s Palm 2&lt;d-cite key=&quot;palm2&quot;&gt;&lt;/d-cite&gt;, WizardLM&lt;d-cite key=&quot;wizardlm&quot;&gt;&lt;/d-cite&gt;, and countless others. These models have successfully achieved or surpassed human-level performances on numerous natural language processing tasks (such as Retro-Reader&lt;d-cite key=&quot;retro_reader&quot;&gt;&lt;/d-cite&gt; on the SQuAD2.0&lt;d-cite key=&quot;squad2&quot;&gt;&lt;/d-cite&gt; benchmark, ST-MOE&lt;d-cite key=&quot;st_moe&quot;&gt;&lt;/d-cite&gt; on the SuperGLUE&lt;d-cite key=&quot;super_glue&quot;&gt;&lt;/d-cite&gt; benchmark, and many other benchmarks). LLMs have achieved a surprisingly high level of knowledge about text by being able to achieve very high zero-shot scores on many NLP tasks demonstrating their understanding and versatility in Language as opposed to non-LLM NLP models that are usually trained to accomplish a specific task and do not generalize beyond that task.&lt;/p&gt; &lt;p&gt;Recently there has been a massive push towards integrating other modalities into LLMs, most notably vision. Models such as Google’s Gemini&lt;d-cite key=&quot;gemini&quot;&gt;&lt;/d-cite&gt;, OpenAI’s GPT 4&lt;d-cite key=&quot;gpt4&quot;&gt;&lt;/d-cite&gt;, VisionLLM&lt;d-cite key=&quot;vision_llm&quot;&gt;&lt;/d-cite&gt;, and many others all focus on scaling the capabilities achieved by LLMs in the NLP domain to the vision domain. The language modality LLMs are trained on consists mostly of written text recorded online that follows a grammatical structure from a human language (English, etc.) or a computer language (Python, HTML, Latex, etc). However, this is in stark contrast to the vision modality which can consist of categorically different representations consisting of anything from photographs of people in the park, to a representation of the Silicon Atom from a computer simulation, to an infographic about the importance of drinking enough liters of water per day. This difference between the distribution of the entirety of text modality and the vision modality hints that much more careful attention needs to be placed on compiling unique and diverse datasets that aim at sufficiently representing the distribution of the vision modality.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Vision-Language Benchmarks:&lt;/strong&gt; There were significant strides made in the past years in developing benchmarks and datasets for LVLMs which are composed of questions that require both Language and Vision to successfully answer. However, there are very few datasets that include or focus on maps as part of the benchmark. LVLM-eHub&lt;d-cite key=&quot;lvlm_ehub&quot;&gt;&lt;/d-cite&gt; compiles numerous benchmarks for LVLMs such as benchmarking object and scene recognition using ImageNet1K, GVT, and many other datasets, or benchmarking visual common sense using ImageNetVC and the visual commonsense reasoning datasets. They also include a Visual Reasoning benchmark, however, it focuses mostly on real-world photographs (such as the Visual Spatial Reasoning&lt;d-cite key=&quot;visual_reasoning&quot;&gt;&lt;/d-cite&gt; dataset or the VisDial&lt;d-cite key=&quot;visual_dialog&quot;&gt;&lt;/d-cite&gt; Dataset that is based on images from COCO&lt;d-cite key=&quot;coco&quot;&gt;&lt;/d-cite&gt;). However, none of these datasets place focus or sufficiently contain images of maps or diagrams while they place much more focus on real-world images.&lt;/p&gt; &lt;p&gt;A frequently used dataset for evaluating LVLMs, which is also included in the Visual Reasoning benchmark, is the ScienceQA&lt;d-cite key=&quot;science_qa&quot;&gt;&lt;/d-cite&gt; dataset which includes more than 20 thousand multimodal multiple-choice questions across 127 categories including a category for maps. However, examining the narrow slice of questions with images in the geography/map category shows that many of them do not necessitate a vision component and can be answered solely based on the textual question (e.g. “Which of these states is farthest north?” with four different states as a multiple choice question), or provide an entirely unused visual representation (e.g. providing a map of the United States with no text alongside the question “What is the capital of New York?”). Out of the questions that rely on the visual component to correctly answer, many of them require relatively little understanding of maps such as asking “Which country is highlighted?” which only requires visual matching of the highlighted section with typical shapes of countries or continents. Additionally, recent papers such as LLama-adapter&lt;d-cite key=&quot;llama_adapter&quot;&gt;&lt;/d-cite&gt; have demonstrated that it’s possible to achieve a high accuracy of 78.3% on ScienceQA using an unimodal text-only Large Language Model. Thus, although ScienceQA does have a subsection dedicated to maps, it does not seem sufficiently capable of testing the capabilities of LVLMs to reason and understand maps.&lt;/p&gt; &lt;p&gt;An area closely related to maps that do have a relatively higher degree of focus is the capability of models to parse and reason about diagrams, figures, and plots. Datasets on this topic include the ACL-FIG&lt;d-cite key=&quot;acl_fig&quot;&gt;&lt;/d-cite&gt; which involves classifying and labeling scientific figures, InfographicVQA&lt;d-cite key=&quot;info_vqa&quot;&gt;&lt;/d-cite&gt; which requires reasoning over data visualizations in infographics, ChartQA&lt;d-cite key=&quot;chart_qa&quot;&gt;&lt;/d-cite&gt; which requires reasoning over charts, and many other datasets that focus on figures are plots. Models have been developed to specifically tackle this challenge, such as Google’s DEPLOT&lt;d-cite key=&quot;deplot&quot;&gt;&lt;/d-cite&gt; which is capable of reasoning over charts and plots by translating them to text and then using an LLM as a reasoning engine on top of the outputted text. However, charts and plots are still significantly different from maps, as the plots these datasets usually contain are simple line charts and bar graphs that can be translated into a table or textual format in a relatively lossless manner, while it is difficult or impossible to perfectly transform a sufficiently detailed map to a textual format without losing information. This illustrates the inherent complexities associated with processing maps meant to depict dense information which requires direct reasoning on the vision modality as opposed to charts and plots which present data in a simple manner.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Maps Reasoning:&lt;/strong&gt; Huge strides have been made in specific tasks related to maps, such as image-to-map&lt;d-cite key=&quot;image_to_map&quot;&gt;&lt;/d-cite&gt; conversion and map segmentation&lt;d-cite key=&quot;map_segmentation&quot;&gt;&lt;/d-cite&gt;. However, we wanted to focus more generally on map understanding and reasoning by LVLMs as opposed to a single task-specific performance. To draw on an analogy, consider the act of isolating specific parts of speech (such as nouns or verbs) in language. A model designed exclusively for this task lacks the comprehensive understanding exhibited by an LLM which is proficient in addressing almost any language task. In the context of map co-location, deep learning models are employed solely as preprocessing steps to extract relevant information for subsequent utilization by matching algorithms as opposed to an LVLM model capable of general reasoning on maps. For example, the authors in this study&lt;d-cite key=&quot;intersection_map&quot;&gt;&lt;/d-cite&gt; use region-based CNN to extract road intersections, which are subsequently input into a map-matching procedure. Other features like street names have also been proposed in the literature&lt;d-cite key=&quot;street_name_map&quot;&gt;&lt;/d-cite&gt;. In general, current frameworks for map reasoning require many hand-crafted and engineered steps (see, e.g., this&lt;d-cite key=&quot;aerial_understanding&quot;&gt;&lt;/d-cite&gt; work and the references within). A recently proposed dataset, MapQA&lt;d-cite key=&quot;map_qa&quot;&gt;&lt;/d-cite&gt;, is closely related to what we consider as map reasoning. However, the maps contained are of sufficiently low information-density and exhibit similar limitations to what we described in InfographicVQA and ChartQA as the images provided can be sufficiently translated to a textual domain before considering the textual input using a similar technique to DEPLOT. To the best of our knowledge, there are no examples in the literature where LVLMs are used to directly reason about maps at the detail we propose and perform tasks such as coregistration.&lt;/p&gt; &lt;p&gt;Our aim is to tackle the gap in assessing the map reasoning capabilities of LVLMs by developing a dataset aimed only at coregistration and analyzing the capabilities of existing models on such a dataset We focus our benchmark construction on the specific task of coregistration as it serves as an indicator of map reasoning capabilities and is one step towards constructing a comprehensive benchmark for map reasoning capabilities of LVLMs.&lt;/p&gt; &lt;!-- ############## --&gt; &lt;p&gt;&lt;br /&gt;&lt;/p&gt; &lt;h1 id=&quot;new-dataset&quot;&gt;New Dataset&lt;/h1&gt; &lt;p&gt;We have opted to create and compile a map dataset focusing on maps from the aviation domain for our research. The maps we utilized are carefully crafted by aviation agencies to provide a wealth of information while maintaining readability within a concise timeframe, ensuring clarity for pilots. Our dataset will be constructed by incorporating maps from the following sources:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;World Visual Flight Rules (VFR):&lt;/strong&gt; These maps are intended to guide pilots when they operate aircraft visually. They include aeronautical and topographic information such as airports, obstructions, and navigation aids.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;World Instrument Flight Rules (IFR) Low&lt;/strong&gt; These maps are suitable to assist pilots when they control the aircraft through instruments. They contain information such as cruising altitudes, route data, and controlled airspaces.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;These maps are accessible in an interactive environment through the SkyVector website (&lt;a href=&quot;https://skyvector.com/?ll=42.3525,-71.025833333&amp;amp;chart=301&amp;amp;zoom=2&quot;&gt;VFR&lt;/a&gt;, &lt;a href=&quot;https://skyvector.com/?ll=42.3525,-71.025833333&amp;amp;chart=302&amp;amp;zoom=1&quot;&gt;IFR Low&lt;/a&gt;), which we used as part of our dataset generation pipeline.&lt;/p&gt; &lt;p&gt;To generate the map snippets for our experiment, we chose to sample from the previous map sources around airports. This selection guarantees that the snippets are inherently information-rich, given that the map originates in the aviation domain. To ensure diversity in our dataset, we specifically sampled airports situated in the states of Massachusetts, New York, Delaware, Arizona, and Hawaii.&lt;/p&gt; &lt;p&gt;The resulting dataset exhibits significant variations in terms of density, featuring both isolated airports and those nestled within cities, diverse locations such as inland, seaside, and islands, as well as various terrain types ranging from greenery landscapes, mountainous regions, and arid environments. In total, our dataset contains 1185 image pairs, each image is 600x600 pixels in PNG format. The total size of our dataset is 1.28 GB.&lt;/p&gt; &lt;h2 id=&quot;a-glimpse-of-the-coregistration-task&quot;&gt;A glimpse of the coregistration task&lt;/h2&gt; &lt;p&gt;To gain an understanding of our task and its intricacies, we present a few examples from our dataset. Generally, humans can successfully align two maps by identifying common features, which fall into one of the following categories:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Terrains:&lt;/strong&gt; such as shorelines or mountains.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Charts:&lt;/strong&gt; such as flight paths or restricted airspaces.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Landmarks:&lt;/strong&gt; such as airport or city names.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The process of mapping by terrain is typically swift for humans, especially when there are ample distinctive details. On the other hand, mapping by chart requires a more thoughtful approach, involving careful examination to establish a connection between the depicted attributes. Mapping by names usually serves as a last resort, employed if the prior approaches prove unsuccessful. Consider the following examples:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/43_301-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/43_301-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/43_301-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/43_301.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/43_302-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/43_302-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/43_302-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/43_302.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Positive Pair #1: A VFR map (left) and an IFR map (right) that depict the same location (Port Allen Airport in South Kauai, Hawaii) and can be coregistered easily by the shorelines of the island. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1038_301-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1038_301-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1038_301-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/1038_301.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1038_302-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1038_302-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1038_302-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/1038_302.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Positive Pair #2: A VFR map (left) and an IFR map (right) that depict the same location (Circle H Ranch Airport in Arizona) and can be coregistered by aeronautical lines (even if one does not know what these lines represent in the aviation domain). &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/779_301-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/779_301-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/779_301-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/779_301.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/779_302-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/779_302-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/779_302-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/779_302.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Positive Pair #3: A VFR map (left) and an IFR map (right) that depict the same location (Bertrand Chaffee Hospital Heliport, New York). The VFR map is feature-rich providing information for Helicopter pilots while the IFR map is a hard example due to being almost blank as the Heliport does not facilitate landing any IFR aircraft. Thus, the IFR map only depicts certain names of cities and waypoints. The two maps can be coregistered by matching the name SPRINGVILLE. &lt;/div&gt; &lt;p&gt;All of these examples are positive (the maps show the same location). We showcase below negative examples with varying complexity.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/51_301-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/51_301-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/51_301-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/51_301.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/223_302-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/223_302-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/223_302-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/223_302.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Negative Pair #1: An easy and straightforward negative for humans due to the VFR map depicting a shoreline crossing the map vertically, while the IFR depicts only a landmass. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1037_301-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1037_301-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1037_301-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/1037_301.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1013_302-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1013_302-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/1013_302-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/1013_302.png&quot; class=&quot;img-fluid z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Negative Pair #2: A harder negative example. Aeronautical lines partially appear similar but the locations are different. Naively matching circles on both maps to classify a positive pair would provide a false positive. &lt;/div&gt; &lt;p&gt;We showcase multiple positive and negative pairs alongside the natural reasoning that a human would take to correctly classify the pairs. We hope that this showcases the complexity of the task and the various strategies involved in achieving successful coregistration.&lt;/p&gt; &lt;!-- ############## --&gt; &lt;p&gt;&lt;br /&gt;&lt;/p&gt; &lt;h1 id=&quot;experiments&quot;&gt;Experiments&lt;/h1&gt; &lt;h2 id=&quot;zero-shot-evaluation&quot;&gt;Zero-shot evaluation&lt;/h2&gt; &lt;p&gt;To start, we want to evaluate the zero-shot performance of pre-trained LVLMs on the task of identifying whether the two images are the same (coregistration). The models we start our evaluation with are BLIP-2&lt;d-cite key=&quot;blip&quot;&gt;&lt;/d-cite&gt;, ViLT&lt;d-cite key=&quot;vilt&quot;&gt;&lt;/d-cite&gt;, LXMERT-VQA, and LXMERT-GQA&lt;d-cite key=&quot;lxmert&quot;&gt;&lt;/d-cite&gt;. We specifically chose these models as they are all publicly available multimodal text generative models that were partly trained on visual question-answering datasets. Thus, they are able to accept both the vision and language inputs consisting of an image of the two side-by-side maps alongside the yes-or-no question of whether the two maps depict the same geographical location.&lt;/p&gt; &lt;p&gt;To verify that the models we obtained are behaving as expected and are capable of answering a textual question that relies on a visual component, we compile a very simple dataset of 200 cat and dog pictures, half the images depict a cat while the other half depict dogs. We present these trivial images to the models alongside the prompt “Is this an image of a cat? Answer:” and generate a single token. As expected, out of the 200 images all four models achieved an almost perfect classification accuracy (&amp;gt;95% for all 4 models) by answering with either a “Yes” or a “No” token.&lt;/p&gt; &lt;p&gt;This is not surprising because, as mentioned, object recognition questions are very prevalent in visual question-answering datasets, especially on ubiquitous everyday objects such as cats and dogs. To see if these models can generalize beyond their training datasets and properly reason on maps, we start by running the following experiment:&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Experiment #1:&lt;/strong&gt; For each VFR and IFR image pair, we generate two examples (positive and negative). For the positive example, we use the correct pairing (e.g., maps from the same location with the two different styles). For the negative example, we randomly replace one map uniformly from our datasets. Each model is provided with a concatenation of the two maps in its vision input, and with the question “Do these two maps show the same location? Answer with “Yes” or “No”. Answer:” in its text input.&lt;/p&gt; &lt;p&gt;In total, each model was asked 2370 questions. Below, we show the accuracy, precision, and recall that each model obtained.&lt;/p&gt; &lt;div class=&quot;row mt-3 align-items-center&quot;&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_01_01-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_01_01-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_01_01-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_01_01.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_01_02-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_01_02-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_01_02-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_01_02.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The models performed barely above random guessing in the zero-shot experiment, and some models consistently produced the same single output (either “yes” or “no”) regardless of whether the input image was a positive or negative pair.&lt;/p&gt; &lt;p&gt;While the results of the models are very low and barely above random guessing, we wanted to analyze whether this failure is due to the model not comprehending the task or whether the issue is simply in the last layer of the model where the text generation occurs. The reason behind this analysis is that there is a possibility that the LVLM is able to correctly capture all the features necessary for determining whether the two maps coregister while still failing at providing the final answer due to the final layer of the model outputting an incorrect distribution over the labels (or tokens in the case of LVLMs). Thus we decide to ignore the last linear layer of the model (the language model head) and capture the hidden state of the last token from the last layer of the model.&lt;/p&gt; &lt;h2 id=&quot;fine-tuned-evaluation&quot;&gt;Fine-tuned evaluation&lt;/h2&gt; &lt;p&gt;Using this methodology, the output we obtain from each model is a single embedding vector (the length of which depends on the embedding size of the model). Usually, a single linear layer is finetuned on the last layer to directly predict the answer. However, we opt for a more detailed analysis by training multiple classifiers (Logistic Regression, SVM, and XGBoost) that take the embedding vector and produce a binary output. In all the upcoming figures, we always report the results using the classifier that performed the best (for each model) on the validation set.&lt;/p&gt; &lt;p&gt;Moreover, it is known that LLMs can be sensitive to prompts&lt;d-cite key=&quot;yang2023large,yang2022prompt&quot;&gt;&lt;/d-cite&gt;. Thus, to more rigorously analyze the performance of the models, we create a total of 8 variations of the coregistration question, as this one question serves as the text input for all pairs of images in our created dataset. A detailed analysis of these prompts will be provided later. Similar to the classifiers, we report results using the best prompt for each model.&lt;/p&gt; &lt;p&gt;This methodology of using the model to extract a rich embedding that contains the answer to our prompt (instead of generating the answer directly as text) means that we are now capable of utilizing additional large transformer-based multimodal models that output an embedding vectors instead of directly outputting text. Thus we include in our analysis two such models which are FLAVA&lt;d-cite key=&quot;flava&quot;&gt;&lt;/d-cite&gt; and GIT&lt;d-cite key=&quot;git&quot;&gt;&lt;/d-cite&gt; that output embeddings that represent the multimodal input. Those embeddings are then similarly passed to the simple classifiers to detect whether the models are capable of capturing the required details from the maps to answer the co-location task correctly. Thus we perform our second experiment:&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Experiment #2:&lt;/strong&gt; We generate the examples using the same procedure described in Experiment #1. Then, for each model, we pass through the entire dataset and generate an embedding for each sample in our dataset. We then train the simple classifiers on 70% of the embedding vectors to predict the positive and negative pairs. We report the evaluation on the other 30% data and report the results in orange below.&lt;/p&gt; &lt;div class=&quot;row mt-3 justify-content-center&quot;&gt; &lt;div class=&quot;col-sm-8 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_02_01-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_02_01-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_02_01-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_02_01.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The accuracy of this fine-tuning methodology (shown in orange) achieves around 65%-70% for all models which is a significantly higher accuracy compared to the zero-shot accuracy of the LVLMs (shown in red) which was incapable of achieving more than 55%. This experiment shows that the embedding of the last token does contain a slightly more feature-rich representation of the multimodal input and can be used to classify the positive/negative pairs at a higher rate than random but is overall still incapable of sufficiently solving the task.&lt;/p&gt; &lt;p&gt;Thus far we have tried to assess the capabilities of LVLMs and (more generally) Multimodal Vision Language models on solving the coregistration task, and we assessed this capability using our constructed dataset of determining whether two maps of different styles represent the same location or not. Given the low accuracy achieved on this task, we can claim that the LVLMs we have analyzed are incapable of reasoning and answering more complicated questions relative to our simple baseline question of “Are these two maps of the same location”&lt;/p&gt; &lt;h2 id=&quot;improving-results-for-co-registration&quot;&gt;Improving results for co-registration&lt;/h2&gt; &lt;p&gt;We emphasize that our goal is not to directly achieve high accuracy on this task by utilizing any machine learning model, but rather it is to evaluate the capabilities of LVLMs to reason on maps. Furthermore, we created and proposed this dataset and task to act as a baseline for assessing the reasoning abilities of LVLMs on maps.&lt;/p&gt; &lt;p&gt;However, despite the failure of LVLMs to answer this baseline task, we next want to assess the inherent difficulty of the dataset. For this, we develop a simple model by utilizing the same simple classifiers used above to train on the embedding of a unimodal vision-only model. Unlike LVLMs, we are not testing our proposed task-specific model on the dataset to assess its capabilities for reasoning on maps, as the model is not trained to answer questions based on images, does not accept text modality, and is specifically fine-tuned to solve this one narrow task. Thus, the results of this experiment serve only to give a sense of the difficulty of the task that we considered as a simple baseline for map reasoning. This will hopefully demonstrate that the relatively older frozen vision-only models can achieve a significantly higher accuracy on this specific task when compared to state-of-the-art open-source LVLMs and possibly indicating the gap between the embeddings captured by the vision-only model and the LVLMs.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Experiment #3:&lt;/strong&gt; We develop a simple unimodal vision classification model by utilizing a frozen CLIPVIsion model as a backbone. First, we feature-engineer the input by subtracting the two maps from each other in the image space to produce a single image. This image is passed through the frozen CLIPVision model to generate an embedding of the difference between the maps, the embeddings are then used to train the simple classifiers mentioned above and the one that achieves the highest accuracy on the validation set is reported below.&lt;/p&gt; &lt;div class=&quot;row mt-3 justify-content-center&quot;&gt; &lt;div class=&quot;col-sm-8 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_03_01-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_03_01-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_03_01-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_03_01.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;We see that our fine-tuned vision model (shown in green) achieves a significantly higher accuracy than all previously tested LVLMs. This shows that the task is not a significantly difficult vision task as a frozen CLIPVision model with a head fine-tuned on approximately two thousand samples was able to sufficiently extract an embedding and correctly distinguish positive and negative pairs 85% of the time.&lt;/p&gt; &lt;p&gt;This significant difference between the accuracy of the frozen CLIP model and the LVLMs on this task signifies that the LVLMs we tested are still significantly farther behind on certain tasks even when compared to a frozen vision-only model that was trained and released years prior. This is in stark contrast to the significant achievements that LLMs accomplish on numerous datasets when compared to task-specific NLP models, where the highest-scoring models on most NLP datasets are LLMs.&lt;/p&gt; &lt;h2 id=&quot;human-benchmarking&quot;&gt;Human benchmarking&lt;/h2&gt; &lt;p&gt;So far, we have examined the performance of pre-trained LVLMs on our proposed dataset in a zero-shot as well as a fine-tuned manner alongside a vision-only model with feature engineering to assess the difficulty of the task.&lt;/p&gt; &lt;p&gt;A natural next question to analyze is the performance of humans on this same task as it is not immediately clear how hard or easy the task is for us. The performance achieved by humans on a task such as this would serve as a great target for LVLMs to try to reach.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Experiment #4:&lt;/strong&gt; We present the following task to two subjects. Each human subject will see two maps for 10 seconds. The pair can be positive or negative with equal probability. After the 10 seconds elapse, the maps automatically disappear and the human subject is asked if the two maps show the same location with a binary “Yes” or “No” choice. After the answer is received, a new pair is sampled and this process is repeated until we gather 50 answers from each human subject.&lt;/p&gt; &lt;p&gt;The 10-second window acts as a pseudo-computational limit on the human subject and ensures that the subject’s answers are mostly based on visual and spatial reasoning and not on reading and comparing text. If the subject does not immediately identify a visual or spatial cue, the 10-second window possibly allows for a maximum of one or two texts to be compared if the subject is quick enough. This time limitation prevents the participants from spending an extensive amount of time comparing the nuances of the two images for a severely long time which would make the task more trivial. Below, we show the accuracy obtained from two human subjects and compare it with the previous LVLM results.&lt;/p&gt; &lt;div class=&quot;row mt-3 justify-content-center&quot;&gt; &lt;div class=&quot;col-sm-8 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_04_01-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_04_01-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_04_01-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_04_01.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;We see that both human participants (shown in blue) achieve a significantly higher accuracy (~95%) compared to all the tested ML models. This shows that the task is significantly easier for humans despite the 10-second time limit preventing the subject from extensively comparing the images.&lt;/p&gt; &lt;p&gt;Our experiments showcase the inability of LVLMs to properly solve our proposed dataset on coregistration as well as showing that a vision-only fine-tuned model with feature-engineering is able to solve the task at a significantly higher accuracy. Finally, we show that humans are able to solve the time-constrained task with a significantly high accuracy.&lt;/p&gt; &lt;h2 id=&quot;analysis-on-prompt-engineering&quot;&gt;Analysis on prompt engineering&lt;/h2&gt; &lt;p&gt;Numerous recent studies have indicated the importance of prompt engineering in the quality of the output of Large-Transformer based models&lt;d-cite key=&quot;yang2023large,yang2022prompt,white2023prompt, zhou2022large&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Due to the potential importance of prompts in affecting performance, we decided to run all experiments that require prompts using multiple different prompts with varying degrees of length and complexity. We note that the prompts considered and listed below were only the ones that consistently conditioned the model to output a “Yes” or “No” output token instead of any other arbitrary sentence completion output. The prompts are shown in the following table:&lt;/p&gt; &lt;p&gt;&lt;br /&gt;&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt;ID&lt;/th&gt; &lt;th style=&quot;text-align: left&quot;&gt;Prompt&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;1&lt;/td&gt; &lt;td style=&quot;text-align: left&quot;&gt;Are these two maps the same? Answer with “Yes” or “No”. Answer:&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;2&lt;/td&gt; &lt;td style=&quot;text-align: left&quot;&gt;Do these two maps show the same location? Answer with “Yes” or “No”. Answer:&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;3&lt;/td&gt; &lt;td style=&quot;text-align: left&quot;&gt;Do the two charts depict the same area? Answer:”&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;4&lt;/td&gt; &lt;td style=&quot;text-align: left&quot;&gt;The following image contains two maps with different styles side by side. Do the two maps show the same location? Answer with “Yes” or “No”. Answer:&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;5&lt;/td&gt; &lt;td style=&quot;text-align: left&quot;&gt;On the left there is a map from the VFR dataset and on the right a map from the IFR dataset. Do the two maps show the same location? Answer with “Yes” or “No”. Answer:&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;6&lt;/td&gt; &lt;td style=&quot;text-align: left&quot;&gt;There are two maps of different styles, do they represent the same area or are they completely different? Answer:&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;7&lt;/td&gt; &lt;td style=&quot;text-align: left&quot;&gt;The following image contains two maps with different styles side by side. Do the two maps show the same location? Try to compare the maps by looking at key landmarks or features. Answer with “Yes” or “No”. Answer:&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;8&lt;/td&gt; &lt;td style=&quot;text-align: left&quot;&gt;Carefully examine the following two images that contain two maps with different styles side by side. Do the two maps correspond on the same latitude and longitude point? It is of utmost importance that you answer this correctly. Answer with “Yes” or “No”. Answer:&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;&lt;br /&gt;&lt;/p&gt; &lt;p&gt;The initial prompts (prompts #1 - #3) are meant to be short and direct, while the ones in the middle (prompts #4 - #6) are more verbose and add a bit more complexity, while the last two (prompts #7 - #8) are very verbose and add an exact explanation of the task. We also include additions to some of the prompts that try to guide the models on how they accomplish the task, and some additions that emphasize the importance of correct answers. In the figure below, we study the effect of prompts on model performance.&lt;/p&gt; &lt;div class=&quot;row mt-3 justify-content-center&quot;&gt; &lt;div class=&quot;col-sm-8 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_05_01-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_05_01-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_05_01-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_05_01.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;We notice that varying the prompts has a relatively high variance in terms of accuracy with an improvement of less than 5% for all models across all prompts. Still, there are no strong general trends across models when considering prompts with increasing complexity. We note that the VILT model was incapable of accepting prompts #5 - #8 due to the limitation of its maximum context length which is shorter than the other models.&lt;/p&gt; &lt;p&gt;One aspect that might limit this analysis is that almost all prompts contain an explicit requirement for the models to provide answers immediately (e.g., “Answer with ‘Yes’ or ‘No’. Answer:”). This was done to reduce the computational inference cost and avoid generating long sequences of texts. The models might respond better to some prompts if they were allowed to reason about their answers first.&lt;/p&gt; &lt;h2 id=&quot;investigating-the-failure-points-of-lvlms-on-coregistration&quot;&gt;Investigating the failure points of LVLMs on coregistration&lt;/h2&gt; &lt;p&gt;The figures presented in the beginning of the blog post demonstrating some examples in our proposed dataset give a clue of the variance in the difficulty of the examples in the dataset, where some samples are easy to identify as positive pairs and others much harder to do so.&lt;/p&gt; &lt;p&gt;Thus, to get a better insight into the model’s performance and investigate its failure points, we investigate some examples where the models made confidently wrong predictions. Here, we focus on a single model, FLAVA, which was our best-performing LVLM. In the figure below, we investigate both false positives with the highest predicted positive label and false negatives with the highest predicted negative label. The figure contains the 9 examples where the model generated a very high (very low) score while the true label was positive (negative).&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_06_01-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_06_01-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_06_01-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_06_01.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; data-zoomable=&quot;&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_06_02-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_06_02-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_06_02-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-mapreason/experiment_06_02.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; data-zoomable=&quot;&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;For the false positives, we see more than one example where two maps containing water were wrongly classified. This might indicate that the model is making predictions on these images based on colors more so than spatial reasoning. For the false negatives, there are many examples where the VFR chart is dense while the IFR is sparse. These examples require discarding a lot of information from the VFR charts and focusing solely on the region where the IFR chart contains information. Given that the model made wrong decisions in these examples, there might be a preference for positively matching images based on density. Notably, some of these examples were straightforward for the human subjects (matching based on the shoreline), while other examples required more effort (matching between dense and sparse maps).&lt;/p&gt; &lt;p&gt;&lt;br /&gt;&lt;/p&gt; &lt;h1 id=&quot;discussion-limitations-and-future-work&quot;&gt;Discussion, Limitations, and Future Work&lt;/h1&gt; &lt;p&gt;One of the key takeaways of our experiments, and specifically from contrasting the first two experiments with the third experiment, is that it was not difficult for a non-LVLM model to achieve an 85% accuracy on our proposed dataset. Yet, our dataset proved to be challenging for LVLMs, especially in zero-shot performance where they achieved almost no better than random guessing. This implies that it would be beneficial to further expand future datasets that are used for LVLM training and specifically the addition of data collection similar to what we propose and that this could provide invaluable improvements to future training of LVLMs.&lt;/p&gt; &lt;p&gt;Existing vision-language benchmarks exhibit a heavy focus on real-world objects and scenes, with a distinctive lack of images and questions on maps. This is despite the fact that maps are ubiquitous and used in many real-world scenarios. Furthermore, many maps are easily accessible in digital format and ready to be integrated into vision-language benchmarks. We believe such inclusion would require relatively little effort in terms of data collection while providing significantly higher capabilities for LVLMs.&lt;/p&gt; &lt;p&gt;We plan to expand the size of our new dataset used in this project and to make it publicly available. Additionally, while our current project primarily focused on the coregistration tasks, we have plans to incorporate more intricate and challenging questions that delve deeper into map reasoning.&lt;/p&gt; &lt;p&gt;There are some limitations to the current analysis done in this project. A significant limitation is the computational limit preventing us from feasibly generating answers from the LVLMs in an autoregressive manner instead of our analysis which used only one output token per sample. A possible future work is examining more complicated generation methods such as Chain of Thought&lt;d-cite key=&quot;chain_Thought&quot;&gt;&lt;/d-cite&gt; prompting. Additionally regarding the inputs of the models, although we investigated different text prompts, we only used one template for the visual prompts while there have been multiple recent works on visual prompt engineering in vision-language models&lt;d-cite key=&quot;vision_prompt&quot;&gt;&lt;/d-cite&gt; analogous to textual prompt engineering in LLMs. It could be the case that some models are sensitive to the way the maps are concatenated. This aspect warrants further investigation to gain a more comprehensive understanding of how different concatenation methods might impact model performance and results.&lt;/p&gt; &lt;p&gt;Another limitation is that we were only capable of running our analysis on open-source models, the largest model tested was blip-2 with less than 3 billion parameters. This was the largest LVLM that we had access to in terms of weights, to be able to run our analysis on. Future work could attempt to run the analysis on larger closed-source models if access is granted.&lt;/p&gt; &lt;p&gt;&lt;br /&gt;&lt;/p&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;p&gt;In this project, we propose a novel dataset to serve as an initial benchmark for the capabilities of LVLMs to reason on maps with the goal of addressing a gap in current LVLM benchmarks and datasets.&lt;/p&gt; &lt;p&gt;Using this dataset, we run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on the coregistration task. Additionally, we show that the task for our dataset is a relatively simple vision task by showing that a fine-tuned vision-only model released years prior to the tested LVLMs achieves a significantly higher accuracy. Finally, we show that the coregistration task is intuitive to humans, as participants were able to achieve close to perfect accuracy even in a time-constrained manner.&lt;/p&gt; &lt;p&gt;We hope that future initiatives regarding data collection for LVLMs and training foundational LVLMs will put more emphasis on datasets such as our proposed datasets. This will hopefully unlock new capabilities for LVLMs enabling them to advance beyond their current limitations and possibly expand their utility and reasoning abilities in a variety of real-world scenarios.&lt;/p&gt; </content> </entry> <entry> <title>Autoen-chorder: Predicting Musical Success With Neural Nets</title> <link href="https://deep-learning-mit.github.io/blog/2023/foley-to-video/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/foley-to-video</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Our aim is to use deep learning (the crux of 6.s898) to help musicians and their sponsors (for example: agents, record labels, and investors) identify whether songs will resonate with listeners. Solving this problem would enable established artists to release more impactful music, and spur new musicians to break into a competitive market.&lt;/p&gt; &lt;p&gt;We first begin by establishing what our success metric is. For the purposes of this project, we will use the concept of song “popularity” as the metric we want to predict, and we source our popularity data from the SpotGenTrack Popularity Dataset. This dataset leverages Spotify’s Popularity Index, which is a relative rank measured against other songs’ popularities. It is a function of recent stream count, save rate, playlist appearance count, skip rate, share rate, and more.&lt;/p&gt; &lt;p&gt;There already exist a few models to help us solve this problem. However, these models make use of metadata, such as artist name, year of release, and genre. We believe that these models – while interesting – are insufficient to be actionable, particularly for up-and-coming musicians who may be innovating new music genres, or who may not yet have a strong name. Specifically, metadata like Artist Name are both highly-weighted (for example, even Taylor Swift’s least popular song will be a relative superhit) and unchangeable (we cannot suggest that artists change their identity to Beyonce). Additionally, features like Genre are imprecise, and can quickly become outdated as new subgenres and crossover genres are developed.&lt;/p&gt; &lt;p&gt;To address this gap and become more actionable to musicians, we aimed to create a new model that can achieve near-parity with metadata-based models without leveraging any metadata. By combining multiple audio-feature models, we not only achieved comparable results to metadata-based models, we actually outperformed metadata-based models on more than half our testing data.&lt;/p&gt; &lt;h2 id=&quot;previous-works&quot;&gt;Previous Works&lt;/h2&gt; &lt;p&gt;The most prominent existing model is HitMusicNet (heretofore referred to as “HMN”). The HMN model predicts popularity based on lyric data from Genius.com (syllables, words, etc.), high-level audio features from SpotGenTrack (e.g. acousticness, key, tempo, speechiness), low-level audio features from SpotGenTrack (audio preprocessing, such as spectral analyses), and metadata from SpotGenTrack (e.g. artist name, year of release, genre). A feature vector is created with this information, and said vector is fed as the input into an Autoencoder network to compress the features, followed by a neural network to obtain the predicted popularity.&lt;/p&gt; &lt;p&gt;HitMusicNet has two different objectives: Regression and classification. For this project, we will focus only on regression since it will allow us to visualize differences between our model and HMN with higher granularity. We replicated the code from the paper in PyTorch, using the same functions and data and calculated metrics to make sure our implementation is correctly replicating the paper. We see a slight discrepancy in the errors, likely due to the test/train split during the paper’s training. Altogether, we can still consider our replicated model as valid as the metrics are within reasonable range to the reported metrics. Additionally, we added the R-squared metric as an additional metric to ensure our model fits the data.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Comparison-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Comparison-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Comparison-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Comparison.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;A second model, VGGish, is a pretrained convolutional neural network trained on YouTube-100M (a database with 100 million YouTube videos). This network is a representation learning network widely used in established papers. This network takes in a wav file and processes it on 0.96-second windows, and calculates 128 embeddings per window. This means that the resulting tensor from VGGish will be 2 dimensional for a single file, and 3 dimensional for a batch of files.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-HMN-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-HMN-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-HMN-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-HMN.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Autoencoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Autoencoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Autoencoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Autoencoder.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;hypothesis&quot;&gt;Hypothesis&lt;/h2&gt; &lt;p&gt;HMN has a tendency to be heavily-indexed on metadata features and lyrics. Data such as artist name heavily bias the model’s popularity predictions in favor of big-name artists. Lyrics information can make the model biased to predicting instrumental music as less likely to be successful. While this may be representative of how the industry works, it makes HMN much less actionable for musicians trying to assess their chance of success with the market.&lt;/p&gt; &lt;p&gt;We believe that audio-only features – such as temporal information (i.e. the structure of the song and information about previous sections) and repetition – can alone be fairly successful in determining a song’s popularity. Thus, we chose to use just audio data, as well as temporal data, to predict popularity.&lt;/p&gt; &lt;p&gt;We hypothesize that combining the audio-only features of HMN with VGGish’s audio representation will yield superior outcomes to HMN’s audio-only features alone. We also hope that our new model can compete with the full HMN model (i.e. audio features and metadata combined).&lt;/p&gt; &lt;h2 id=&quot;data-preprocessing&quot;&gt;Data Preprocessing&lt;/h2&gt; &lt;p&gt;Given our hypothesis, we need to extract the low-level features from our signal and map each row to its corresponding audio file to be fed into VGGish. We used Spotify’s API to obtain the raw audio files to be processed, and then ran them through the VGGish network. We performed the same preprocessing as the one done in the MusicHitNet paper.&lt;/p&gt; &lt;h3 id=&quot;file-length-limitation&quot;&gt;File length limitation&lt;/h3&gt; &lt;p&gt;Unfourtunately, Spotify only allows the download of 30s previews of songs.&lt;/p&gt; &lt;h3 id=&quot;memory-limitation&quot;&gt;Memory limitation&lt;/h3&gt; &lt;p&gt;Audio files are heavy, and the longer they are, the heavier. We should ideally process all 95,000 songs’ full length, but given Spotify’s API’s request limit, and the size of the files, we were only able to obtain 10,000 30s snippets. This still resulted in roughly 14.5 GB of data. Processing the whole dataset would not only require roughly 140 GBs of data, but the Spotify API’s limits will likely be exceeded, and our colab notebook will likely run out of memory.&lt;/p&gt; &lt;h3 id=&quot;downsampling-and-splitting&quot;&gt;Downsampling and splitting&lt;/h3&gt; &lt;p&gt;Given the considerations above, we decided to use 10,000 songs for our model’s development, splitting the data into 8,500 samples for training and 1,500 for validation. Given that this is roughly 10% of the original data, we expect the model’s performance to be below the reported metrics.&lt;/p&gt; &lt;h2 id=&quot;baselines&quot;&gt;Baselines&lt;/h2&gt; &lt;p&gt;The metrics obtained when replicating the HMN network serve as a reasonable parameter to verify our model’s accuracy. As mentioned above, the model’s performance is expected to be below the paper’s reported metrics. To understand the range, we retrained a network with the same shape as the paper’s using the 10,000 samples in the same train/test split we will feed to our new network. The resulting metrics for this experiment can be seen in Table 2.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/img5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/img5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/img5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/img5.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Training a model that results in similar metrics would be ideal, but realistically, as we will only be using low-level data, we expect the metrics to be lower than the values in Table 2. To ensure that our trained model isn’t just predicting noise, we use a baseline comparison, comparing against a random normal distribution with mean μ=40.02 and σ=16.79.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/img6-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/img6-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/img6-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/img6.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;As seen in table 3, the baseline intuitively would not appear to be too far from the trained HMN model in terms of MSE and MAE. When looking at the r-squared, the random model has a negative value, while the trained HMN netw ork results with a much higher 0.5616 value. To deem a model as successful, we will compare it against both sets of metrics.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/img1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/img1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/img1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/img1.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/img2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/img2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/img2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/img2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;alternate-models&quot;&gt;Alternate Models&lt;/h2&gt; &lt;ul&gt; &lt;li&gt;Single autoencoder. Our first iteration to solve this problem consisted of using a single autoencoder to find representations with data coming from VGGish and SpotGetTrack low level features, and then running that through a feed-forward network similar to the one used in HMN. Since the output of VGGish is a tensor of shape (batch_size, n_windows, n_features) and the output of SpotGenTrack is (batch_size, 207), we concluded there was no simple way to combine the two data sources without losing temporal information.&lt;/li&gt; &lt;li&gt;RNN. Our second iteration consisted of running the data coming from SpotGenTrack Low-Level through an autoencoder in the same way HMN does it. After this initial train gives us a compressed representation of the data from SpotGenTrack Low-Level, we train two subsequent networks: First an LSTM RNN which transforms data into (batch_size, 20), then we add the compressed representation from SpotGenTrack Low-Level and run that through a feedforward network. This model yielded a performance below the baseline.&lt;/li&gt; &lt;li&gt;HMN+VGGish: This model consists of taking the full SpotGenTrack data, passing it through the regular autoencoder defined in HMN, and add it to the output coming from VGGish. This model, while resulting in promising results, still yielded worse performance than HMN on its own, so our team decided to explore alternatives.&lt;/li&gt; &lt;li&gt;LossNet. Our third exploration consisted of training a model that uses VGGish’s outputs to try and predict losses from HMN. In essence, we are trying to use VGGish Representation to capture information that HMN consistently is unable to. This approach has parallels with Adversarial Networks, in that one model is being trained on the losses of another model. However, this approach is more cooperative than adversarial, since the result of the two models is not zero-sum. This approach led to a dead-end with surprising results.&lt;/li&gt; &lt;/ul&gt; &lt;h2 id=&quot;final-architecture&quot;&gt;Final Architecture&lt;/h2&gt; &lt;p&gt;Our final iteration consists of a model with two autoencoders: One for data from SpotGenTrack low level features, the second for the representation obtained using the VGGish model. The slight difference between these two models is that the VGGish autoencoder has additional LSTM layers at the start of the encoder, and at the end of the decoder. The output from these two autoencoders is then added together and passed through a feed-forward network. This architecture can be seen in Figure 4.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-FinalArch-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-FinalArch-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-FinalArch-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-FinalArch.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;padding-and-packing&quot;&gt;Padding and Packing&lt;/h3&gt; &lt;p&gt;None of the audio files coming from Spotify previews are more than 30s in duration, but some are in fact shorter than others. To solve this issue, and also to be able to feed our model whichever sized data we require, we use pytorch’s packing functionality. Packing allows us to process sequential data with different sizes, so that only the relevant information is passed through the LSTM. Conversely, padding allows us to add zeros at the end of sequences so that all samples have the same size. This is required to store data in tensors.&lt;/p&gt; &lt;h3 id=&quot;hyperparameters&quot;&gt;Hyperparameters&lt;/h3&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/img3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/img3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/img3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/img3.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;additional-model-considerations&quot;&gt;Additional Model Considerations&lt;/h3&gt; &lt;p&gt;The original HMN model compiles 228 features into 45 representations for the feed-forward network. We want our model’s feed-forward network to have a similar number of inputs as the given architecture, therefore we compress the data in the encoder of both autoencoders to 20 features, so that when added together, they result in 40 total features.&lt;/p&gt; &lt;p&gt;Additionally, as can be seen in figure 3.2, the target’s distribution is condensed at a central point, and distributed in a Gaussian shape. To help our model accurately predict the shape of the results, we use multiply the losses by a weighting factor. This multiplication is important to make our model more likely to predict outliers. The equation is the following:&lt;/p&gt; &lt;p&gt;\begin{equation} \frac{1}{N} \sum_{i=1}^{N} \exp\left(\left(\frac_{i} - \text)}}{\alpha \cdot \text}\right)^2 \cdot \frac{1}{\beta}\right) \end{equation}&lt;/p&gt; &lt;p&gt;Our feed-forward network was suffering of vanishing gradients during training. To attempt to avoid this, we initialized all linear layers with a weight distributed by Xavier uniform, and a constant bias of 0.1.&lt;/p&gt; &lt;h3 id=&quot;finding-the-best-model&quot;&gt;Finding the Best Model&lt;/h3&gt; &lt;p&gt;In order to find the best model, we modified plenty of parameters and hyperparameters. We first found the optimal autoencoder models (seen on table 4), and then we proceeded to run several loops over our linear layer to obtain the model with lowest errors. The parameters modified were the following:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Learning rate: (0.001, 0.0001, 0.0002, 0.02, 0.0005)&lt;/li&gt; &lt;li&gt;Weight decays: (0, 0.0001, 0.0002)&lt;/li&gt; &lt;li&gt;Batch sizes: (200, 100, 256, 277)&lt;/li&gt; &lt;li&gt;Means (for weights calculation): 0.33, 0.34, 0.35, 0.37, 0.38, 0.40, 0.42, 0.45)&lt;/li&gt; &lt;li&gt;Alphas (for weights calculation): (1.8, 2.0, 2.1, 2.2)&lt;/li&gt; &lt;li&gt;Betas (for weights calculation): (1.8, 2.0, 2.2)&lt;/li&gt; &lt;li&gt;Number of linear layers: (7, 9, 12)&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The combination that resulted in the optimal model was the following:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Weight decays: 0&lt;/li&gt; &lt;li&gt;Batch sizes: 200&lt;/li&gt; &lt;li&gt;Means (for weights calculation): 0.36&lt;/li&gt; &lt;li&gt;Alphas (for weights calculation): 2.0&lt;/li&gt; &lt;li&gt;Betas (for weights calculation): 2.0&lt;/li&gt; &lt;/ul&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/img4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/img4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/img4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/img4.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Table 5 shows the best-performing models obtained after experimentation. MAE, MSE and r-squared were calculated using the testing data, i.e. Data not used in training. Looking at the data in tables 2 and 3, we see that our model shows a significant improvement above the random baseline, with a reasonable r-squared and MSE. Reduction in the MAE remains challenging, but still we see a significant improvement from the random baseline.&lt;/p&gt; &lt;p&gt;Furthermore, we analyzed the testing data, and found that in 919 of the 1,500 songs (61.2%) of the songs, our model did better than HitMusicNet. Upon further analysis, we found that our model did a better job predicting the popularity of songs with popularities ranged [0.22-0.55], while HMN does a better job at predicting outliers (songs with &amp;lt;0.2 or &amp;gt;0.6 of popularity).&lt;/p&gt; &lt;h2 id=&quot;conclusions-and-next-steps&quot;&gt;Conclusions and Next Steps&lt;/h2&gt; &lt;h3 id=&quot;data-exploration&quot;&gt;Data Exploration&lt;/h3&gt; &lt;p&gt;Given Spotify’s ubiquity and analytics excellence, its Popularity Index is a good proxy for relative song popularity. But there are concerns around using data from a single platform (Spotify) and from a single channel (digital streaming). Given this concern, we would like to explore other methods of calibrating a track’s popularity (for example, Billboard and Discogs API). We can aggregate popularities into a single output, or can train each model on multiple outputs of various popularity scores.&lt;/p&gt; &lt;p&gt;Currently, our data consists of 30s audio clips. The average new song length is around 3min 17s, meaning that our models’ inputs cover around 15% of the song. This can cause the model to miss information critical to song likeability, such as the intro, chorus, or bridge. We would like to make our dataset more complete by using full songs as inputs. Furthermore, we’re using only 10,000 data points, which can also be affecting our training efficiency, especially our ability to detect outliers, which we have found to be a key issue with our model. Ideally, we would like to train our models on all 95k songs in SpotGenTrack.&lt;/p&gt; &lt;h3 id=&quot;architectures&quot;&gt;Architectures&lt;/h3&gt; &lt;p&gt;Many more architectures can further be explored to predict song popularity. We found VGGish with an LSTM to be an efficient “boosting” algorithm, which contributed to the model in a less significant way that SpotGenTrack, but still allowed our model to increase its performance. Similarly, the use of transformer architectures can help improve the performance of our model.&lt;/p&gt; &lt;p&gt;In this study, we explored and evaluated our model against the HitMusicNet’s regression algorithm. In further studies, it could be beneficial to explore the classification algorithm, as we have seen very promising results in the prediction of songs along a certain range.&lt;/p&gt; &lt;p&gt;We used the VGGish model purely on inference since we required to train the autoencoder and then the feed-forward network. Future studies can include architectures such that the VGGish model is trained in series with the feedforward network, and fine-tuned to predict popularity. We could also look at alternate representation models that are perhaps better suited or supply a more apt representation for our task than VGGish.&lt;/p&gt; &lt;p&gt;In conclusion, the use of low-level features to predict popularity can have several real-world advantages. The proposed model is able to predict a song’s popularity to a fair degree without the need for high-level features. Emerging artists can use these parameters to determine the possible success of their songs. Music labels can use this algorithm to predict an artist’s possible popularity. Platforms such as Spotify can also take advantage of this model in order to tackle recommendations and boost emerging artists.&lt;/p&gt; &lt;h2 id=&quot;bibliography&quot;&gt;Bibliography&lt;/h2&gt; &lt;ul&gt; &lt;li&gt;D. Martín-Gutiérrez, G. Hernández Peñaloza, A. Belmonte-Hernández and F. Álvarez García, “A Multimodal End-to-End Deep Learning Architecture for Music Popularity Prediction,” in IEEE Access, vol. 8, pp. 39361-39374, 2020, doi: 10.1109/ACCESS.2020.2976033.&lt;/li&gt; &lt;li&gt;Ding, Yiwei, and Alexander Lerch. “Audio embeddings as teachers for music classification.” arXiv preprint arXiv:2306.17424 (2023).&lt;/li&gt; &lt;li&gt;D. Martín-Gutiérrez, “HitMusicNet” in https://github.com/dmgutierrez/hitmusicnet.&lt;/li&gt; &lt;li&gt;Koutini, Khaled, et al. “Efficient training of audio transformers with patchout.” arXiv preprint arXiv:2110.05069 (2021).&lt;/li&gt; &lt;li&gt;P. Nandi, “Recurrent Neural Nets for Audio Classification” in https://towardsdatascience.com/recurrent-neural-nets-for-audio-classification-81cb62327990.&lt;/li&gt; &lt;li&gt;Wu, Rick, “VGGish Tensorflow to PyTorch” in https://github.com/tcvrick/audioset-vggish-tensorflow-to-pytorch.&lt;/li&gt; &lt;li&gt;Wu, Yiming. (2023). Self-Supervised Disentanglement of Harmonic and Rhythmic Features in Music Audio Signals.&lt;/li&gt; &lt;li&gt;S. Shahane, “Spotify and Genius Track Dataset” in https://www.kaggle.com/datasets/saurabhshahane/spotgen-music-dataset/data.&lt;/li&gt; &lt;/ul&gt; </content> </entry> <entry> <title>Ensemble Learning for Mitigating Double Descent</title> <link href="https://deep-learning-mit.github.io/blog/2023/double_descent/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/double_descent</id> <content type="html">&lt;h2 id=&quot;abstract&quot;&gt;Abstract&lt;/h2&gt; &lt;p&gt;We outline the fundamental ‘bias-variance tradeoff’ concept in machine learning, as well as how the double descent phenomenon counterintuitively bucks this trend for models with levels of parameterization at or beyond the number of data points in a training set. We present a novel investigation of the mitigation of the double descent phenomenon by coupling overparameterized neural networks with each other as well as various weak learners. Our findings demonstrate that coupling neural models results in decreased loss during the variance-induced jump in loss before the interpolation threshold, as well as a considerable improvement in model performance well past this threshold. Machine learning practitioners may also find useful the additional dimension of parallelization allowed through ensemble training when invoking double descent.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;motivation&quot;&gt;Motivation&lt;/h2&gt; &lt;p&gt;There are many important considerations that machine learning scientists and engineers must consider when developing a model. How long should I train a model for? What features and data should I focus on? What exactly is an appropriate model size? This last question is a particularly interesting one, as there is a bit of contention regarding the correct answer between different schools of thought. A classical statistician may argue that, at a certain point, larger models begin to hurt our ability to generalize. By adding more and more parameters, we may end up overfitting to the training data, resulting in a model that poorly generalizes on new samples. On the other hand, a modern machine learning scientist may contest that a bigger model is always better. If the true function relating an input and output is conveyed by a simple function, in reality, neither of these ideas are completely correct in practice, and empirical findings demonstrate some combination of these philosophies. This brings us to the concept known as &lt;em&gt;double descent&lt;/em&gt;. Double descent is the phenomenon where, as a model’s size is increased, test loss increases after reaching a minimum, then eventually decreases again, potentially to a new global minimum. This often happens in the region where training loss becomes zero (or whatever the ’perfect’ loss score may be), which can be interpreted as the model ’memorizing’ the training data given to it. Miraculously, however, the model is not only memorizing the training data, but learning to generalize as well, as is indicated by the decreasing test loss.&lt;/p&gt; &lt;p&gt;The question of ’how big should my model be?’ is key to the studies of machine learning practitioners. While many over-parameterized models can achieve lower test losses than the initial test loss minimum, it is fair to ask if the additional time, computing resources, and electricity used make the additional performance worth it. To study this question in a novel way, we propose incorporating &lt;em&gt;ensemble learning&lt;/em&gt;.&lt;/p&gt; &lt;p&gt;Ensemble learning is the practice of using several machine learning models in conjunction to potentially achieve even greater accuracy on test datasets than any of the individual models. Ensemble learning is quite popular for classification tasks due to this reduced error empirically found on many datasets. To our knowledge, there is not much literature on how double descent is affected by ensemble learning versus how the phenomenon arises for any individual model.&lt;/p&gt; &lt;p&gt;We are effectively studying two different &lt;em&gt;types&lt;/em&gt; of model complexity: one that incorporates higher levels of parameterization for an individual model, and one that uses several models in conjunction with each other. We demonstrate how ensemble learning affects the onset of the double descent phenomenon. By creating an ensemble that includes (or is fully comprised of) overparameterized neural networks, which can take extreme amounts of time and resources to generate, with overparameterized machine learning models, we will show the changes in the loss curve, specifically noting the changes in the regions where double descent is invoked. We hope that the results we have found can potentially be used by machine learning researchers and engineers to build more effective models.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;p&gt;One of the first papers discussing double descent was ’Reconciling modern machine- learning practice and the classical bias–variance trade-off’ by Belkin et al. &lt;d-cite key=&quot;belkin2019reconciling&quot;&gt;&lt;/d-cite&gt;. This paper challenged the traditional idea of the ’bias-variance tradeoff’, a fundamental concept in machine learning that describes the tension between two types of model error: bias and variance. Bias is the error between the expected prediction of the model and the true output value, introduced by approximating a real-world quantity with a model, which may overisimplify the true problem at hand. Variance refers to the error due to a model’s sensitivity to small fluctuations in the training dataset. Overfitted models may have high variance, as they may model random noise in the data as well.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/biasvariance-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/biasvariance-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/biasvariance-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/biasvariance.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;The Double Descent Curve&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The Bias-Variance Tradeoff &lt;d-cite key=&quot;cornell&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;In short, classical statistical learning argues that there is some optimal level of parameterization of a model, where it is neither underparameterized nor overparameterized, that minimizes the total error between bias and variance. However, Belkin’s paper finds that, empirically, the tension between bias and variance no longer becomes a tradeoff after a certain level of overparamaterization. They showed that after the interpolation threshold (beyond where the model fits perfectly to the training data), test error eventually began to decrease again, even going below the error deemed optimal by the bias-variance minimum.&lt;/p&gt; &lt;p&gt;Nakkiran et al.’s ’Deep Double Descent: Where Bigger Models and More Data Hurt’ &lt;d-cite key=&quot;nakkiran2021deep&quot;&gt;&lt;/d-cite&gt; expanded these findings to the realm of &lt;em&gt;deep&lt;/em&gt; learning. In this work, double descent is shown to occur for both large models and large datasets. Additionally, this paper demonstrates that, counterintuitively, adding more data at a certain point actually worsened the performance of sufficiently large models. Specifically, this occurred at and close to the interpolation threshold for neural models. This paper’s results can be seen here:&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/openai-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/openai-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/openai-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/openai.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;The Double Descent Curve&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The Double Descent Curve &lt;d-cite key=&quot;nakkiran2021deep&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;For the region between the first and second loss minima, model performance can suffer greatly, despite the increased computational time and resources used to generate such models. While this region of the test loss curve is typically not a level of parameterization that one would use in practice, understanding such loss curve behavior can help practitioners for several reasons. For one, this degraded phase of performance can be crucial for tweaking model architecture and adjusting training strategies. This is key to discovering if one’s model is robust and adaptable to various other datasets and tasks. This highlights the need for a new understanding for model selection in order to effectively generalize to testing datasets better, mitigating decreases in model performance and invoking a second loss minimum quickly.&lt;/p&gt; &lt;p&gt;In the classic paper ’Bagging Predictors’, Breiman describes the concept of combining the decisions of multiple models to improve classification ability &lt;d-cite key=&quot;breiman1996bagging&quot;&gt;&lt;/d-cite&gt;. Empirically, this bootstrap aggregating, or ’bagging’ technique, reduced variance and improved accuracy, outperforming the single predictors that comprised the ensemble model. We present a novel combination of the findings of this paper with the double descent phenomenon. Effectively, by increasing model complexity via overparameterization and ensemble learning, we aim to study if this combination can mitigate loss increases and invoke a second loss minimum with smaller models.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;setup&quot;&gt;Setup&lt;/h2&gt; &lt;h3 id=&quot;computing-resources-and-software&quot;&gt;Computing Resources and Software&lt;/h3&gt; &lt;p&gt;We have implemented this project using CUDA and the free version of Google Colab, with additional computing units for more costly experiments. To train and test these models, we use various machine learning packages in Python, namely Scikit-learn, PyTorch and Tensorflow. Additional software commonly used for machine learning projects, such as numpy, tensorboard and matplotlib, was also utilized.&lt;/p&gt; &lt;p&gt;All plots have been produced by us, unless otherwise specified. Note that all tensorboard plots have $0.25$ smoothing applied, except for the Soft-Voting Ensemble, which has $0.6$ smoothing applied (though this won’t make much of a difference as will soon be seen). The non-smoothed plot can be seen traced in light-blue in all provided plots.&lt;/p&gt; &lt;h3 id=&quot;data&quot;&gt;Data&lt;/h3&gt; &lt;p&gt;We use the MNIST dataset for this report &lt;d-cite key=&quot;deng2012mnist&quot;&gt;&lt;/d-cite&gt;. MNIST is a popular dataset used for image classification, where each sample image is a $28$ by $28$ grayscale image of a written integer between $0$ and $9$, inclusive. Each image comes with the true label of the image’s integer. This data is publicly available for experimentation, and our use of it does not pose any ethical or copyright concerns.&lt;/p&gt; &lt;p&gt;For this project, we use the MNIST dataset to unearth the double descent phenomenon. We experiment with a variety of models, as well as an ensemble of them: decision trees, AdaBoost trees, L2-Boost trees, random forests, logistic regression, and small neural networks. We choose these models because of their ability to be used for classification tasks, and more complicated models run the risk of exceeding Google Colab’s limitations, especially when we overparameterize these models to invoke double descent.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mnist-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mnist-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mnist-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/mnist.jpeg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;MNIST Sample Data&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Sample MNIST Data &lt;d-cite key=&quot;deng2012mnist&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;hr /&gt; &lt;h2 id=&quot;models&quot;&gt;Models&lt;/h2&gt; &lt;h3 id=&quot;decision-trees&quot;&gt;Decision Trees&lt;/h3&gt; &lt;p&gt;Decision trees are a machine learning model used for classification tasks. This model resembles a tree, splitting the data at branches, culminating in a prediction at the leaves of the tree.&lt;/p&gt; &lt;p&gt;To invoke overparameterization for decision trees, we can start with a tree of depth 2, and increase the number of maximum leaves of the model until the loss plateaus. Then, keeping this new number of max leaves in our decision tree, we continually increase the maximum depth of the tree until the loss once again stops decreasing. Lastly, keep both the maximum leaves and depth at their plateau levels while increasing the max features. The results of this are plotted below. Notice how varying the number of maximum leaves has minimal effect on the loss, and how increasing the maximum depth causes the most dramatic decrease. However, fluctuations on the maximum depth at this point do not have a major effect, whereas varying the number of features causes another slight, yet consistent, fall in classification loss.&lt;/p&gt; &lt;p&gt;Notice that the loss curve is more or less linear in the number of parameters (with some having much more effect than others), and so there is little evidence of double descent for this model.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Decision Tree Overparameterization &lt;/div&gt; &lt;h3 id=&quot;adaboost-tree&quot;&gt;AdaBoost Tree&lt;/h3&gt; &lt;p&gt;Adaptive Boosting (AdaBoost) itself is an ensemble model used for robust classification. Freund et al.’s paper ‘A Decision-Theoretic Generalization of On-Line Learning and an Application to Boosting’ first introduced the algorithm &lt;d-cite key=&quot;freund1997decision&quot;&gt;&lt;/d-cite&gt;. On a high level, this paper describes how boosting is especially effective when sequentially combining weak learners that are moderately inaccurate (in this case, these are decision trees) to create a strong learner. We study the loss curve of the AdaBoost model as we first increase the number of boosting trees which form a forest, then increase the number of forests across which we average results, after adding additional trees fails to significantly increase model performance. Each tree is constrained to have no more than 10 leaves.&lt;/p&gt; &lt;p&gt;Notice that the loss curve is more or less linear in the number of parameters, and the double-U shape doesn’t seem to make its presence known.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; AdaBoost Overparameterization &lt;/div&gt; &lt;h3 id=&quot;l2-boost-tree&quot;&gt;L2-Boost Tree&lt;/h3&gt; &lt;p&gt;L2 Boosting is quite similar to the AdaBoost model, except for L2 Boosting, as models are built sequentially, each new model in the boosting algorithm aims to minimize the L2 loss&lt;d-cite key=&quot;article&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;10.1214/aos/1013203451&quot;&gt;&lt;/d-cite&gt;. Like before, we first increase the number of boosting trees which form a forest in the L2-Boost model, then the number of forests across which we average using majority voting. The results can be seen below. Each tree is constrained to have no more than 10 leaves.&lt;/p&gt; &lt;p&gt;Notice how the classification loss begins to fall, then rises up again, then falls once more when we average across more forests to lower minimums than before. This result was consistent across multiple runs of this experiment, suggesting that double descent is real for L2-Boosted Tree Ensembles.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; L2-Boost Overparameterization &lt;/div&gt; &lt;p&gt;The behavior of the loss once we add more models agrees with general intuition regarding ensembling, but the appearance of double descent as we increase the total number of parameters is still quite interesting to see. L2-Boost is a relatively inexpensive model and ensembling a large number of trees is still quite fast, suggesting that overparameterization could be the way to go in this case.&lt;/p&gt; &lt;h3 id=&quot;random-forest&quot;&gt;Random Forest&lt;/h3&gt; &lt;p&gt;Random Forest is another popular ensemble model. As the name implies, it is a collection of decision trees with randomly selected features, and, like the singular decision tree, this model is used for classification tasks.&lt;/p&gt; &lt;p&gt;We initialize random forest with a small number of maximum leaves allowed in each tree, and increase the max leaves until we see the loss plateau as we continually add more. After this, we begin increasing the number of trees in our forest until the loss plateaus once again.&lt;/p&gt; &lt;p&gt;While Belkin et al. lists random forest as a model exhibiting double descent, this claim has been recently disputed, namely by Buschjager et al, which suggests that there is no true double descent with the random forest model &lt;d-cite key=&quot;randomforest&quot;&gt;&lt;/d-cite&gt;. Instead, they suggest that random forest does not overfit in the classical sense, and argue that its curve instead has a single descent. By looking at our results, there is little evidence for the double-U curve, but we will refrain from drawing a decisive conclusion.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Random Forest Overparameterization &lt;/div&gt; &lt;p&gt;Despite this, for our ensemble model, we aim to see if the addition of this overparameterized learner to the neural network’s decision making is able to improve ensemble performance.&lt;/p&gt; &lt;h3 id=&quot;logistic-regression&quot;&gt;Logistic Regression&lt;/h3&gt; &lt;p&gt;Logistic regression is a classic model used for estimating the probability a sample belongs to various classes. We induce overfitting in logistic regression through two methods.&lt;/p&gt; &lt;p&gt;First, we continually increase the ‘C’ parameter, indicating the inverse strength of regularization applied to the regression, as shown below. Notice that the loss decreases to a minimum before it starts slowly rising again, indicating that overfitting through fluctuations in ‘C’ may not actually lead to double descent, as would be expected from classical theory.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Logistic Regression Overparameterization (Regularization Based) &lt;/div&gt; &lt;p&gt;Second, we try inducing double descent by varying the ratio of the number of features over the amount of data. We gradually reduce this ratio using the intuition developed by Deng et al. in order to induce overfitting &lt;d-cite key=&quot;logistic&quot;&gt;&lt;/d-cite&gt;, since the data becomes more and more separable as the number of features increases relative to the number of data samples.&lt;/p&gt; &lt;p&gt;To do this, we test varying across the number of training samples instead of varying the number of features used for training. This eventually leads to 0 training error, but causes testing error to blow up, suggesting that some significant amount of training data is still needed to witness the desired behavior, consistent with both statistical and machine learning theory.&lt;/p&gt; &lt;div class=&quot;row&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Logistic Regression Overparameterization (Feature-Data Ratio Based) &lt;/div&gt; &lt;p&gt;An interesting setup for future experiments would be simultaneously increasing the amount of training samples and the number of polynomial features given to the logistic regression, while increasing the feature-data ratio each time we reparameterize or redefine the dataset.&lt;/p&gt; &lt;h3 id=&quot;neural-networks&quot;&gt;Neural Networks&lt;/h3&gt; &lt;p&gt;We use a Multilayer Perceptron as our main model for the ensemble. Our deep learning model is a relatively small one, with variable width in the hidden layer. By increasing this width, we eventually achieve perfect training loss.&lt;/p&gt; &lt;p&gt;We define the general architecture of the neural network used in this report as follows:&lt;/p&gt; &lt;h4 id=&quot;network-layer&quot;&gt;Network Layer&lt;/h4&gt; &lt;p&gt;Let the input data be an $m$ by $m$ pixel image from the MNIST dataset, which can be processed as an $m$ by $m$ matrix, where entry $(i,j)$ is an integer between $0$ and $255$ (inclusive) representing the grayscale color of the pixel. Note that $m=28$ for MNIST, though for generality, we use $ m $ in this network definition. A value of $0$ represents a black pixel, $255$ is a white pixel, and values between these are varying shades of gray. We first flatten this structure into a $d = m^2 $ by 1 vector, such that the entry $ (i,j) $ of the matrix becomes the $ j + 28*i$-th entry of the vector, using zero-indexing. We use this vector as the input of our neural network.&lt;/p&gt; &lt;p&gt;Set $H$ as the hidden layer width, which in our project will be varied in different tests. Let $ W^1 $ be an $ d \times H$ matrix, where $ W^1_{ij}$ is the weight of input $i$ applied to node $j$, and let $W^1_0$ be an $H \times 1$ column vector representing the biases added to the weighted input. For an input $X$, we define the &lt;em&gt;pre-activation&lt;/em&gt; to be an $H \times 1$ vector represented by $Z = {W^1}^T X + W^1_0$.&lt;/p&gt; &lt;p&gt;We then pass this linearly transformed vector to the ReLU activation function, defined such that&lt;/p&gt; \[\begin{equation*} \text{ReLU}(x)=\begin{cases} x \quad &amp;amp;\text{if} \, x &amp;gt; 0 \\ 0 \quad &amp;amp;\text{if} \, x \leq 0 \\ \end{cases} \end{equation*}\] &lt;p&gt;We use this choice of activation function due to the well-known theorem of universal approximation. This theorem states that a feedforward network with at least one single hidden layer containing a finite number of neurons can approximate continuous functions on compact subsets of $ \mathbb{R}^{m^2} $ if the ReLU activation function is used &lt;d-cite key=&quot;hornik1991approximation&quot;&gt;&lt;/d-cite&gt;. Applying an activation function ReLU to each element of $Z $, the layer finally outputs&lt;/p&gt; \[A = \text{ReLU}(Z) = \text{ReLU}(W^T X + W_0)\] &lt;p&gt;Next, we will input $A$ into a second hidden layer of the neural network. Let $K$ be the number of classes that the data can possibly belong to. Again, $K = 10$ for MNIST, though we will use $K$ for generality. Then let $W^2$ be an $H$ by $K$ matrix, where $W^2_{ij}$ is the weight of input $i$ applied to node $j$, and let $W^2_0$ be a $K \times 1$ column vector representing the biases added to the weighted input. For input $A$, define a second pre-activation to be a $K \times 1$ vector represented by $B = {W^2}^T A + W^2_0$.&lt;/p&gt; &lt;p&gt;This will yield a $K \times 1$ vector representing the logits of the input image, with which we’ll be able to take Cross Entropy Loss or compute its probability of belonging to any of the $K$ classes.&lt;/p&gt; &lt;h4 id=&quot;training&quot;&gt;Training&lt;/h4&gt; &lt;p&gt;Let class $i $ be the true classification for a data point. We have that $y_i = 1$, and for all $j \neq i$, $y_j = 0$. Furthermore, let $\hat{y_i}$ be the generated probability that the sample belongs to class $i$. The categorical cross-entropy loss is then defined as follows:&lt;/p&gt; \[\mathcal{L}_{CCE} (y_i, \hat{y_i}) = - \sum_{i=0}^{9} y_i \log (\hat{y_i})\] &lt;p&gt;From this computed loss, we use backpropagation and stochastic gradient descent (SGD) with learning rate $\eta = 0.1$ and $momentum = 0.95$ to optimize model weights. We run experiments on a dataset with $n = 4000$ subsamples that train over $100$, $500$, and $2000$ epochs using Belkin et al.’s approach to training &lt;d-cite key=&quot;belkin2019reconciling&quot;&gt;&lt;/d-cite&gt;. Up to interpolation, we train until we reach zero classification error on the training data, or until we have finished all of the epochs, whichever comes first. After we have reached the interpolation thereshold, we train until we have gone through all of the epochs. Note that to get their results, Belkin et al. trained over $6000$ epochs, which proved to be prohibitively expensive given our resources. Instead, we chose to train over a variety of smaller maximum epoch sizes to illustrate the double descent curve taking clearer shape, with the $2000$ epoch run being the most indicative of this phenomena. Below are the results of the trained and tested neural networks. Notice that interpolation consistently happens when the number of parameters is roughly equal to $n\times K$ (i.e. Parameter Count / 1000 $= 40$), and the test loss starts consistently getting lower and lower as we add more and more parameters beyond this threshold. Double descent is real!&lt;/p&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;MLP 100 Epoch Training&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;MLP 100 Epoch Testing&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; MLP 100 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 &lt;/div&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;MLP 500 Epoch Training&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;MLP 500 Epoch Testing&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; MLP 500 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 &lt;/div&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;MLP 2000 Epoch Training&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-15 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;MLP 2000 Epoch Testing&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; MLP 2000 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 &lt;/div&gt; &lt;p&gt;For the sake of brevity, we avoid including plots for train/test classification loss for the MLPs. However, it is worth noting that train classification loss eventually reaches 0 in all experiments, whereas test loss eventually becomes $\sim$ 0.08 or smaller.&lt;/p&gt; &lt;p&gt;Throughout each experiment, we vary across the number of total parameters of the model. For a network with $H$ hidden units, the total number of parameters is equal to $(d+1)\times H + (H + 1)\times K$, and so we choose $H$ accordingly each time we reparametrize.&lt;/p&gt; &lt;p&gt;Note that we also incorporated a weight reuse scheme for models in the underparametrized regime to cut on training time, similarly to the approach in Belkin et al &lt;d-cite key=&quot;belkin2019reconciling&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Additionally, even though the individual MLPs are small, training several of them sequentially for a relatively large number of epochs can take a very long time. To help reduce the time it takes to complete experiments, we also try adapting the Parameter Count Generation Algorithm provided in John Abascal’s blog &lt;d-cite key=&quot;testbed&quot;&gt;&lt;/d-cite&gt; to intelligently select the next level of parameterization given the performance of the previous one. This algorithm was designed to most clearly showcase the existence of the double descent curve by fitting a third degree polynomial (since that is roughly what we expect the double descent curve to look like) to the model capacity vs. test loss graph, and choosing the next parameter by examining the first derivative of the polynomial fit to the data. More detail is provided in his blog.&lt;/p&gt; &lt;p&gt;This algorithm proved helpful for empirically confirming the existence and validity of the interpolation threshold. However, after a few tests with the algorithm, we chose to complete most of the experiments using a pre-specified list of parameters which were able to consistently capture the double descent phenomena in detail.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;ensemble-learning&quot;&gt;Ensemble Learning&lt;/h2&gt; &lt;p&gt;We experimented with two different types of ensembles. The first ensemble is what we call the ‘weak-learner’ ensemble, which is the model that incorporates the multi-layer perceptron supported by L2-Boost tree ensembles, random forests, decision trees and logistic regression. Note that we ultimately did not use AdaBoost in this ensemble because we believed this was too similar to the included L2-Boost model in both architecture and performance.&lt;/p&gt; &lt;p&gt;The second ensemble is the ‘multi-layer perceptron’ ensemble, which includes 5 MLPs.&lt;/p&gt; &lt;h3 id=&quot;weak-learner-ensemble&quot;&gt;Weak-Learner Ensemble&lt;/h3&gt; &lt;p&gt;We use bootstrap aggregating, or ‘bagging’, to formulate our ensemble of these five models . Effectively, each model is given a certain number of ‘votes’ on what that model believes is the correct classification for any given MNIST sample image. We then experimented with two approaches to voting: hard voting and soft voting.&lt;/p&gt; &lt;p&gt;In hard voting, the classification with the most total votes is then used as the ensemble’s overall output. In the event of a tie, the neural network’s prediction is chosen. Using this voting scheme, we train the MLP independently of the other models in the ensemble, using the same scheme as described previously.&lt;/p&gt; &lt;p&gt;In soft voting, the weighted average of the predicted class probabilities of each model is used as the predicted class probabilities of the ensemble. We utilize this prediction when training the MLP, and use negative log likelihood loss instead of cross entropy loss, since taking the softmax of probabilities is not necessary. This way, we can incorporate the predictions of the whole ensemble into the training of the MLP. Since the ensemble now outputs a vector of class probabilities, the one with the highest probability will be used as the soft voting ensemble’s prediction.&lt;/p&gt; &lt;p&gt;Since we want a neural model to be the basis of our ensemble, we vary the number of votes assigned to the neural network while keeping the number of votes for other models fixed to 1. With four supplementary models in addition to the neural network, giving the neural network 4 or more votes is not necessary, since this ensemble would always output the same results as the neural network. Because of this, we study the loss curve when giving the neural network 1, 2, and 3 votes. Note that decimal value votes for the neural network are not sensible (at least in the hard-voting scheme), since it can be proved that all potential voting scenarios are encapsulated into the three voting levels we have chosen.&lt;/p&gt; &lt;p&gt;Another important aspect of our ensemble is that the ‘weak’ classifiers do not vary in parameterization; only the MLP does. Refitting all the weak classifiers across epochs and MLP parameterizations took much longer than expected, perhaps due to incompatibilities between sklearn and GPUs, and completing the experiments using this approach was unfortunately unfeasible. Hence, all ‘weak’ classifiers have fixed architectures, chosen such that each one has low test error but is not at the highest level of parameterization according to the previous discussion, and only the MLP varies.&lt;/p&gt; &lt;h3 id=&quot;multi-layer-perceptron-ensemble&quot;&gt;Multi-Layer Perceptron Ensemble&lt;/h3&gt; &lt;p&gt;The Multi-Layer Perceptron Ensemble uses 5 identically initialized MLPs which are trained in parallel using Pytorch’s autovectorization capabilities. Since they are defined in the same way and trained simultaneously using the MLP training scheme discussed above, each receives equal weight when it comes to taking an averaged prediction. However, unlike the bagging method used for the Weak-Learner Ensemble, we take advantage of the identical architectures of the models and the numerical stability provided by this, and generate ensemble predictions by averaging the logits of all five learners and using those values as the logits of the ensemble. Again, we experiment using 100 and 500 epochs to see how the behavior evolves across increasing number of epochs, but we omit training over 2000 epochs due to excessive computational costs. An experiment for the future would be training over a very large number of epochs for even greater ensemble sizes to see how results vary across time.&lt;/p&gt; &lt;p&gt;There has been discussion in the past of whether to average the raw logits or the softmax-transformed probabilities. The main concern raised over averaging across raw logits is that the outputted values can vary greatly in magnitude across models (and therefore overconfident models can potentially overshadow all other models when taking the prediction), but, empirically, that doesn’t seem to be a problem here. Tassi et al. provide some intuition in “The Impact of Averaging Logits Over Probabilities on Ensembles of Neural Networks” &lt;d-cite key=&quot;Tassi2022TheIO&quot;&gt;&lt;/d-cite&gt;, suggesting that different approaches to ensembling should be taken depending on the architecture and levels of confidence of the models. For general safety, they recommend averaging the probabilities, but for the purposes of our task, averaging the logits suffices.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;results-and-discussion&quot;&gt;Results and Discussion&lt;/h2&gt; &lt;p&gt;Contrary to our expectations, the Weak Learner Ensemble performs much worse than even the individual models on MNIST classification. Although our focus is on double descent and not on the strong predictive power of ensembles, the latter is needed to observe the former, or at least discuss it at an interesting level.&lt;/p&gt; &lt;p&gt;Initially, we tried applying the soft-voting scheme for the Weak Learner Ensemble, but the reported results are unexpectedly poor, yielding very high classification loss, especially when compared to the results of each model taken individually. This may be because each ‘weak’ learner has high confidence in its predicted class, whereas the MLP may be more evenly split between different classes, which would result in the weak classifiers winning more often, even if the MLP has higher weight in the prediction. The plot of the negative log likelihood loss for both training and testing is also hard to understand, but it is clear the ensemble has a very hard time improving, even as more parameters are added. We only include the results for the ensemble that allocates 3 votes to the MLP, but note that these are the best loss curves we were able to produce given this method.&lt;/p&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Soft-Voting Weak-Learner Ensemble 100 Epoch Training/Testing Negative Log Likelihood Loss Over MLP Parameter Count / 1000. MLP given 3 votes. &lt;/div&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Soft-Voting Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 3 votes. &lt;/div&gt; &lt;p&gt;We then tried the weak-learner approach again with hard-voting, and let the MLP independently train using the unmodified MLP training scheme mentioned previously. However, as opposed to halting training when MLP classifications loss first hits 0, we only halt training when &lt;em&gt;ensemble&lt;/em&gt; classification first hits 0.&lt;/p&gt; &lt;p&gt;We found that while classification loss had certainly gone down when compared to the soft-voting scheme (with even just &lt;em&gt;one&lt;/em&gt; vote!), the ensemble still severely underperformed when compared to each of the individual models used. As seen in the plots, the classification loss starts to improve once the MLP gets more and more votes, agreeing with intuition that, eventually, the MLP has the veto right. As opposed to the soft-voting scheme, all classifiers now have a contribution that is proportional to their voting weight, which mitigates the previous problem of some models having much higher confidence than others. However, we believe the poor results can be attributed to the models we used for ensembling. Indeed, a significant number of models are regular, boosted or ensembled (or all) versions of decision trees, which means there is a significant chance that they make similar mistakes on similar data points. Looking at the plots for overparameterized decision trees and L2-Boost ensembles, we see that train error never quite reaches 0 for any of them. Since the train loss seems to pleateau for our models as well, this may prove why. In the cases of 1 or 2 votes, this can lead to consistently poor predictions, especially since the models are not reparameterized across the experiment. For 3 votes, this phenomenon is less significant, as the ensemble slowly begins to reach the testing performance of the individual models.&lt;/p&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 1 vote. &lt;/div&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 2 votes. &lt;/div&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 3 votes. &lt;/div&gt; &lt;p&gt;Further work could be done on the Weak-Learner Ensemble, focusing on better model selection and concurrent reparameterization across all models. Given the limited time and compute resources at our disposal, we leave this problem open for now.&lt;/p&gt; &lt;p&gt;All hope is not lost, however. Seeing the poor performance of the Weak-Learner Ensemble given the significantly better performance of individual models, one could be discouraged from attempting to use ensembling to mitigate double descent, since it may not even be observable in such settings. However, we saw double descent in L2-Boost ensembles and, arguably, in random forests, and so we pushed onward. All other ensemble methods used multiple copies of the same model, and so we decided to experiment with a small ensemble of MLPs, to see how they would behave.&lt;/p&gt; &lt;p&gt;This was feasible for 100 and 500 epochs only, but the obtained results shed light on how ensembling could in fact mitigate double descent. The phenomenon is not quite as observable in the 100 epoch case (one explanation could be that the train loss has not converged yet), but it becomes quite clear when looking at the 500 epoch ensemble and comparing it with the original 500 epoch MLP. Double descent is still very easy to see, ocuring at the same threshold as before. This makes sense, since the MLPs have all reached interpolation, which should increase test loss for all, and then start going down as we overparametrize more and more. However, the main result is that the increase once we reach interpolation is &lt;em&gt;much&lt;/em&gt; lower than before. Indeed, the ensemble sees a jump from $\sim$ 0.35 to around $\sim$ 0.4 at the highest, whereas the individual MLP sees a jump from $\sim$ 0.36 to around $\sim$ 0.52. Another important result is that the loss as we overparameterize becomes &lt;em&gt;significantly&lt;/em&gt; lower in the ensemble model than in the individual MLP.&lt;/p&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; MLP Ensemble 100 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 &lt;/div&gt; &lt;div class=&quot;row justify-content-sm-center&quot;&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm-12 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; title=&quot;example image&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; MLP Ensemble 500 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 &lt;/div&gt; &lt;p&gt;While we weren’t able to fully get rid of the double descent curve by ensembling multiple MLPs, the fact that it became flatter and the loss past the interpolation threshold started to become smaller is quite exciting, as it suggests that, potentially, large ensembles of MLPs may not noticeably suffer from double descent at all, and yield better overall predictions than individual models can. One notable advantage to this ensemble method is the ability to further parallelize one’s training of overparameterized neural networks. These models can take extreme lengths of time to train, and besides increasing the computational allocation used, practitioners may use data, model, or processor parallelism in order to reduce this time. The ensemble neural networks we use are independently generated, meaning that they can be vectorized or trained on different GPU cores without issue. This could be a valid alternative to training for more epochs for reducing model error past the interpolation threshold. More work investigating the effect of neural network ensembling on double descent, especially on models trained over many epochs, would be very exciting and potentially shed even more light on the possible advantages of overparameterization.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;We discussed the existence of double descent for some simple and classical models, observing the effects of varying across levels of parameterization and noting that single descent can sometimes be mistaken for double descent, and proposed the use of various ensembles to mitigate the effects of double descent.&lt;/p&gt; &lt;p&gt;Ensembles consisting solely of neural networks resulted in a considerable boost in performance past the individual model interpolation threshold, and in a flatter curve when compared to individual models. However, pairing the neural network with weak learners in an ensemble voting system decreased testing performance, though this adverse effect waned as the neural network received proportionally more votes. Machine learning engineers that intend to intentionally overparameterize their models may take advantage of not only the ensemble approach’s increased performance and significantly more reliable results, but the enhanced parallelization and vectorization capabilities offered by the proposed method.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;future-work&quot;&gt;Future Work&lt;/h2&gt; &lt;p&gt;This project was implemented using Google Colab, which proved to be restrictive for adopting more complex models. A key part of the double descent phenomenon is overparameterization, which happens across multiple full training loops, and so complex models that are additionally overparameterized will require more powerful computing resources beyond what we used. For example, a model which takes 10 hours to complete a single training loop will take multiple days to train before being able to plot results and observe double descent. Even for models that take around 10 to 15 minutes to train, such as the 500 epoch MLP we explored throughout our project, a full experiment that showcases the double descent curve in detail can take upwards of 5 hours. Furthermore, additional computing power can allow for this project to be expanded to more complicated datasets and tasks. MNIST classification is computationally inexpensive, though invoking double descent in more complex tasks such as text generation in natural language processing was not feasible using Google Colab. Future projects that follow this work should keep computational limitations in mind when choosing models and datasets.&lt;/p&gt; &lt;p&gt;In addition to the future work suggested throughout our project, we propose a final approach that we believe is worth exploring further. During the planning process of this project, we discussed using a more rigorous voting system than what is traditionally found in ensemble model projects. Effectively, each model would have a weight associated with how much influence its output should have on the overall ensemble output. For $n$ models, each model could start with, say, a weight of $1/n$. Then, after producing each model’s vector output, the categorical cross-entropy loss with respect the true output could be computed, and the weights of each model could be updated such that each model has its weight decreased by some amount proportional to the calculated loss. Then, these weights could be normalized using the softmax function. This would be repeated for each level of parameterization. Due to resource constraints and the limitations of sklearn to the CPU, learning both the model weights and ensemble weights at each level of ensemble parameterization was not feasible given the size of the models we built and the classifiers we chose to use, as well as the number of epochs we trained over. Future studies may wish to implement this method, however, to produce a more robust ensemble for classification.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;reproducibility-statement&quot;&gt;Reproducibility Statement&lt;/h2&gt; &lt;p&gt;To ensure reproducibility, we have included the codebase used for this project, as well as the above description of our data, models, and methods &lt;d-cite key=&quot;colab&quot;&gt;&lt;/d-cite&gt;. Note that the Colab notebook that we have worked in is currently very messy and sometimes incomplete due to faults in Google’s autosaving feature, but we plan to clean it up and have it available for easy future experimentation.&lt;/p&gt; </content> </entry> <entry> <title>Injecting Node Information via Embedding Initializations</title> <link href="https://deep-learning-mit.github.io/blog/2023/SmartEmbeddingInitializations/"/> <updated>2023-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/SmartEmbeddingInitializations</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Graph Neural Networks (GNNs) have emerged as a transformative tool in machine learning, with the ability to capture the complex structures and relationships inherent in data. In molecular property prediction, for example, GNNs are great at encoding the atomic structure and intermolecular forces into high-dimensional embeddings, leading to more accurate predictions of chemical properties and drug efficacy. GNNs have also be used in traffic time prediction problems, physics simulations and social media analysis applications. Through message-passing and updating, GNNs are capable of learning embeddings that encode informations of node neighbors and long-distance complex connections – that we, as humans, may not be able to make. The quality of the embeddings is not only important for the accuracy of the task the GNN is trained on, but quality node embeddings can be used through transfer learning – enabling models trained on one task to adapt and excel in another. The importance of good embeddings in GNNs is why we want to look closer at embedding initializations and if we can inject additional information – not present in the graph – to result in better learned embeddings after training.&lt;/p&gt; &lt;p&gt;Possible applications of initial embedding initializations could help in the field of drug discovery. For GNNs used for protein retrieval trained on a biomedical knowledge graph, using ESM embeddings for the proteins could add structure information that is not previously encoded in the graph entities.&lt;/p&gt; &lt;h3 id=&quot;project-outline&quot;&gt;Project Outline&lt;/h3&gt; &lt;p&gt;We will explore the question can additional node information be injected into the model by using intentional embedding initializations rather than random initializations? Furthermore, are the learned embeddings better representations of the nodes? To answer this question we will follow the steps outlined below:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;We will download a precision medicine knowledge graph that and use a GNN, TxGNN, that is implemented for disease-drug link prediction on a biomedical knowledge graph as a baseline model.&lt;/li&gt; &lt;li&gt;We will modify the GNN for protein-molecular function link prediction.&lt;/li&gt; &lt;li&gt;Generate and download ESM embeddings for each protein&lt;/li&gt; &lt;li&gt;Pretrain and finetune two models – one using random protein node initialization and one using ESM embeddings for protein node initialization. We must pretrain our own models, rather than use the already pretrained model, since we are focusing on how different node initializations impact the predictive power.&lt;/li&gt; &lt;li&gt;Evaluate both models&lt;/li&gt; &lt;li&gt;Visualize latent spaces before pretrain, after pretraining and after finetuning&lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;related-work--motivation&quot;&gt;Related Work &amp;amp; Motivation&lt;/h2&gt; &lt;p&gt;In reviewing the literature, we found several papers which reference the possibility of improved performance through a more informed initialization process. As discussed by Li et al., the initialization methods used for GNNs, such as Xavier random initialization were originally designed for CNNs and FNNs. In that setting, the Xavier approach helped to avoid vanishing gradients and maintain a constant information flux. However, Li et al. point out that by leveraging the structure of the graph, we can likely do better than the random intializations used previously &lt;d-cite key=&quot;Li2023&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;In the paper detailing TxGNN, Huang et al. present promising results on their ability to predict drug repurposing opportunities using a GNN &lt;d-cite key=&quot;Huang2023&quot;&gt;&lt;/d-cite&gt;. However, in their work they considered only the Xavier random initializations for weight matrices and node embeddings. This left open the idea of initializing the graph using more sophisticated methods.&lt;/p&gt; &lt;p&gt;Previous work by Cui et al. has explored the power of artificial node initializations, finding that encoding structural and positional information in the node initializations can have profound effect on the ability of a GNN to accurately predict features based on the graph. They provide a basis for our investigation by showing the effect that initializations can have on the results, if done correctly. We seek to build on this work by testing the effect of injecting related, but not exactly equivalent information through the node initializations &lt;d-cite key=&quot;Cui2021&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Not only did we see an opportunity to try a different initialization method, but this problem also lent itself well to data-informed initializations. The molecules in TxGNN have a wealth of knowledge about them which is not represented in the linkages in the graph, some of which is represented in the ESM embeddings of the molecules. Thus, we thought that by supplying these embeddings to the GNN, we might be able to leverage the additional data to make better predictions.&lt;/p&gt; &lt;h2 id=&quot;data&quot;&gt;Data&lt;/h2&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/PrimeKG-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/PrimeKG-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/PrimeKG-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/PrimeKG.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Precision Medicine Knowledge Graph. Figure credit: &lt;i&gt;Building a knowledge graph to enable precision medicine&lt;/i&gt; (Chandak, Huang, Zitnik 2023). &lt;/div&gt; &lt;p&gt;We used a precision medicine knowledge graph (PrimeKG) constructed by Marinka Zitnik’s group at Harvard &lt;d-cite key=&quot;Chandak2023&quot;&gt;&lt;/d-cite&gt;. PrimeKG compiles data from knowledge bases that coverage a broad variety of biomedical information including human disease, drug-protein interactions, genes and proteins with their associated biological processes, functions and cellular component, etc. PrimeKG contains 10 different node types – shown above – and 29 different types of undirected edges. There are over 120,000 nodes in total and over 8 million edges. What PrimeKG lacks, importantly, is any nodes or encodings of structural, molecular or sequenctial information for entity nodes such as proteins and drugs. The node types of interest for our model are proteins, extracted from NCBI, and molecular function Gene Ontology (GO) annotations &lt;d-cite key=&quot;Gene_Ontology_Consortium2021-uk&quot;&gt;&lt;/d-cite&gt;. We will be predicting links between these two node types.&lt;/p&gt; &lt;p&gt;The other data used were ESM embeddings for proteins in PrimeKG. ESM embeddings, or Evolutionary Scale Modeling embeddings, are high-dimensional vector representations of proteins, derived from advanced machine learning models developed by Meta trained on large datasets of protein sequences. These embeddings capture the intricate structural and functional characteristics of proteins, reflecting evolutionary relationships and biochemical properties that are crucial for various biological and computational applications &lt;d-cite key=&quot;Lin2022-esm2&quot;&gt;&lt;/d-cite&gt;. The reason we were interested in using ESM embeddings, rather than embeddings from other protein foundation models, was that structural information was not already captured in PrimeKG, as previously mentioned. To obtain the ESM embeddings, first we downloaded the amino acid sequence for each protein from NCBI using Entrez. Then, using these sequences as input to Facebook’s ESM2 model, we extracted the corresponding embedding.&lt;/p&gt; &lt;h2 id=&quot;gnn&quot;&gt;GNN&lt;/h2&gt; &lt;p&gt;The model we used as a baseline is TxGNN, a graph neural network trained on PrimeKG used to make therapeutic drug predictions for diseases &lt;d-cite key=&quot;Huang2023&quot;&gt;&lt;/d-cite&gt;. The GNN has two training phases. First, pretraining where the GNN finds biologically meaningful embeddings for all nodes in the knowledge graph, and therefore the objective is all link prediction. The second phase is to finetune the GNN, using self-supervised learning, to be able to predict drugs for diseases. Therefore, the objective for finetuning is to optimize contraindication and indication link prediction – the two types of links between diseases and drugs. We modified the training code for the finetuning phase, to train and validate on protein-molecular function links instead.&lt;/p&gt; &lt;h3 id=&quot;architecture&quot;&gt;Architecture&lt;/h3&gt; &lt;p&gt;The GNN has two linear layers with parameters n_input, n_hidden, and n_output. For all our models n_input is 1280, restricted by the length of ESM embeddings. We play around with different dimensions for the hidden and output layers. Leaky ReLU activation is used after the first layer.&lt;/p&gt; &lt;h3 id=&quot;training&quot;&gt;Training&lt;/h3&gt; &lt;p&gt;The first step of the training phase is &lt;strong&gt;node embedding initialization&lt;/strong&gt;. The default, which is our random control, is to initialize all nodes using Xavier uniform initialization &lt;d-cite key=&quot;pmlr-v9-glorot10a&quot;&gt;&lt;/d-cite&gt;. Models referred to as &lt;em&gt;random&lt;/em&gt; from here on out are referring to using Xavier uniform initialization. For our experimental model, we initialized the protein nodes using the ESM embeddings we obtained earlier. All other node types were still initialized with Xavier uniform initialization. Note that we reinitialized nodes between pretraining and finetuning.&lt;/p&gt; &lt;p&gt;During the training phase, the GNN uses a standard message-passing algorithm to update and optimize the node embeddings. There is a relation-type specific weight matrix (for each of the 29 relation types) used to calculate relation-type specific messages. The message for one relation to the some node $i$ is calculated using this equation: \begin{equation} m_{r, i}^{(l)} = W_{r, M}^{(l)} h_i^{(l-1)} \end{equation}&lt;/p&gt; &lt;p&gt;For each node $v_i$, we aggregate incoming messages from neighboring nodes for each relation-type $r$, denoted as $N_r(i)$. This is done by taking the average of these messages: \begin{equation} m_{g_r, i}^{(l)} = \frac{1}{|N_r(i)|} \sum_{j \in N_r(i)} m_{r, j}^{(l)} \end{equation}&lt;/p&gt; &lt;p&gt;The new node embedding is then updated by combining the node embedding from the last layer and the aggregated messages from all relations: \begin{equation} h_i^{(l)} = h_i^{(l-1)} + \sum_{r \in TR} m_{g_r, i}^{(l)} \end{equation}&lt;/p&gt; &lt;p&gt;Finally, DistMult &lt;d-cite key=&quot;Yang2014-zb&quot;&gt;&lt;/d-cite&gt; is used to calculate link prediction between two nodes using their respective embeddings.&lt;/p&gt; &lt;h2 id=&quot;model-evaluation&quot;&gt;Model Evaluation&lt;/h2&gt; &lt;p&gt;We fixed all parameters and hyperparameters, and trained two models – one using random initializations and one using ESM embeddings. We pretrained for 3 epochs with a learning rate of $1e-3$ and a batch size of 1024. We then finetuned for 150 epochs with a learning rate of $5e-4$.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/Figure2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/Figure2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/Figure2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/Figure2.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;These results are promising and using ESM embeddings to initialize the protein node representations slightly improves the model. The ESM model has a final testing loss of 0.3915, whereas the random model has a final testing loss of 0.4151. However, the difference between the models is slim and may not be significant, especially looking at the similarities in the pretraining, training and validation loss curves. Later, we will look more in depth about how the embedding spaces vary between the 2 models which has the potential to yield more interesting results.&lt;/p&gt; &lt;h3 id=&quot;testing-varying-hidden-and-output-layer-dimensions&quot;&gt;Testing varying hidden and output layer dimensions&lt;/h3&gt; &lt;p&gt;We wanted to see the impact changing the hidden and output layer dimensions would have on model performance. We tested 3 models, with parameters detailed in Table 1. All models outside of this experiment, unless otherwise specified, have the same parameters as Model 1.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;Input Dimensions&lt;/th&gt; &lt;th&gt;Hidden Layer Dim.&lt;/th&gt; &lt;th&gt;Output Layer Dim.&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Model 1&lt;/td&gt; &lt;td&gt;1280&lt;/td&gt; &lt;td&gt;1280&lt;/td&gt; &lt;td&gt;1280&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Model 2&lt;/td&gt; &lt;td&gt;1280&lt;/td&gt; &lt;td&gt;512&lt;/td&gt; &lt;td&gt;512&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Model 3&lt;/td&gt; &lt;td&gt;1280&lt;/td&gt; &lt;td&gt;512&lt;/td&gt; &lt;td&gt;128&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/Testing_output_dim-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/Testing_output_dim-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/Testing_output_dim-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/Testing_output_dim.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We can see from the testing loss that when just comparing ESM initialized model, testing loss increases as the output layer decreases. The same trend holds true between random initialized models. We can also see that when comparing ESM and random models for the same layer dimensions, ESM always slightly outperforms the random model.&lt;/p&gt; &lt;h2 id=&quot;latent-space-visualizations&quot;&gt;Latent Space Visualizations&lt;/h2&gt; &lt;p&gt;In the fast-evolving world of deep learning, the analysis of model latent spaces has emerged as an interesting area of study, especially to get a better understanding of how models are achieving their tasks. These spaces are important to understanding how complex models like GNNs perceive and process the intricate relationships and structures inherent in graph data. GNNs can learn powerful representations that capture both node-level and graph-level features. By analyzing the latent spaces of GNNs, we can get insights into how these models prioritize various patterns and connections within the data. The following analyses visualize the latent spaces our models, clustered and colored in different ways, to get a deeper understanding of how the ESM initialized embeddings are effecting the GNN.&lt;/p&gt; &lt;p&gt;We first were curious whether, after training our model, the final embeddings retained structural information about the proteins. To do this, we first clustered the original ESM embeddings using K-Means clustering. Next, we visualized the embedding space of the original ESM embeddings, the final embeddings from the ESM model and the final embeddings from the random model using t-distributed Stochastic Neighbor Embedding (t-SNE) for dimensionality reduction. From the t-SNE plot of original ESM embeddings, we can clearly see the clusters from K-Means which serves as a verification of our clustering technique.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/init_cinit-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/init_cinit-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/init_cinit-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/init_cinit.jpeg&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Looking at the embedding space for the ESM and random models, colored by ESM clusters, we note that most of the ESM structural information (used to create the clusters) seems to be forgotten during the training process, as evidenced by the mostly random assortment of colors present in the t-SNE plot. We note that some clusters do remain, for example cluster 12 (light sage green on the right side of the ESM initialized plots) is still clustering in the final embeddings (top middle cluster). However, the most prominent ones appear in both the ESM initialized and random initialized data, meaning that the ESM embedding did encode some function, but the model using random initialized embeddings was able to capture that relation as well.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/cluster_init-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/cluster_init-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/cluster_init-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/cluster_init.jpeg&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Given that the final embedding space for the ESM model didn’t seem to retain much of the information for the ESM embedding initialization, we were curious whether the ESM and random embeddings converged to a similar space. To test this theory, we clustered the final ESM model embeddings and subsequently visualized the final embeddings of the ESM and random models using t-SNE and colored by those clusters.&lt;/p&gt; &lt;p&gt;If the two models converged to similar embedding spaces, we’d expect to see that clusters found in one embedding space would also be found in the other. This is the case, as seen in the two plots below. Both plots are colored based on a clustering of the final embeddings generated by the ESM initialized network, and they share many of the same structures, indicating that the two networks were able to pick up on mostly the same features in the underlying information. Both models converged to a similar embedding space different initialization methods.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/cluster_esm-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/cluster_esm-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/cluster_esm-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/cluster_esm.jpeg&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;testing-varying-hidden-and-output-layer-dimensions-1&quot;&gt;Testing varying hidden and output layer dimensions&lt;/h3&gt; &lt;p&gt;As mentioned earlier, we tested different dimensions for the hidden and output layers to see whether more and less output dimensions would retain the original ESM embedding information.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/dimensions-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/dimensions-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/dimensions-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/dimensions.jpeg&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Although there are more distinct clusters on the t-SNE plots as the number of output dimensions increases, these clusters are not the same as the clusters from the original ESM embeddings (seen by the randomly colored dots). Therefore, neither of these 3 models retained the structural information provided by initializing with ESM embeddings. It does not seem that decreasing output and hidden layer dimensions improves the model performance or latent space of our GNN.&lt;/p&gt; &lt;h3 id=&quot;clustering-by-molecular-function-labels&quot;&gt;Clustering by molecular function labels&lt;/h3&gt; &lt;p&gt;Because our model’s task was to predict links between protein and molecular function nodes, we were curious to see if the final embeddings for the protein nodes would cluster well on the function labels. However, this wasn’t as straight forward as having 1 molecular function label for each protein node, because each protein may be linked to multiple molecular functions. One protein may have multiple molecular function Gene Ontology (GO) annotations because the GO database uses a hierarchical system to categorize functions, where broader functions encompass more specific ones. A protein can be involved in several distinct biochemical activities, each represented by its own GO term, reflecting the diverse roles a protein can play in the cell. Instead of a single label, we extracted a molecular function profile, $v_i$, for each protein where $v_i[j] = 1$ if a link exists between protein $i$ and function $j$. We then had a sparse matrix, $V^{i \times j}$. Before clustering, we performed dimensionality reduction using truncated SVD which is optimal for sparse matrices. Finally, we performed K-Means clustering.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/cluster_func-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/cluster_func-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/cluster_func-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/cluster_func.jpeg&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Looking at the t-SNE plots, there is no apparent clustering by molecular function profiles in the final embedding spaces for either the ESM model or the randomly initialized model. There are multiple possible explanations for this. One explanation is that the actual objective is to prediction each singular link between a protein and a function node, not to predict do well at predict all function nodes linked to a protein at once. On top of that our GNN uses self-supervised learning, therefore the molecular function profiles are not true labels used during training.&lt;/p&gt; &lt;p&gt;The second plausible explanation has to do once again with the hierarchical nature of molecular function GO annotations. Because the molecular function nodes have random indices when stored in PrimeKG, it is not apparent that molecular function that have the same parent function are close to each other, or their parent function in the molecular function profiles. Therefore, when performing truncated SVD and subsequently k-means clustering, the similar functions may not be clustered together if their indices are far apart. Further analysis could be done to reorder the molecular function nodes and then conduct hierarchical clustering, instead than k-means. These possible clusters may then be found in the final latent spaces for the two models.&lt;/p&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;In this post, we have modified and fine-tuned a Graph Neural Network, TxGNN originally designed for drug-repurposing prediction, for protein function prediction with a variety of initializations of the node embeddings. We observed that while much of the information in the initialization is forgotten during the training process, a small amount is retained, leading to slightly better performance on the test set in the final network. This provides a potential avenue for further study, investigating the overall effects of informed initialization techniques on GNN performance. Some of this investigation is discussed in Li et al. &lt;d-cite key=&quot;Li2023&quot;&gt;&lt;/d-cite&gt;, where they experiment with weight matrix initializations and propose a new paradigm for determining weight initializaiotns, but there is certainly more investigation to be done.&lt;/p&gt; </content> </entry> <entry> <title>Overparameterization of Neural Networks through Kernel Regression and Gaussian Processes</title> <link href="https://deep-learning-mit.github.io/blog/2023/overparameterization/"/> <updated>2023-12-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/overparameterization</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;!-- ideas for experiments: - visualization of functions learned by MLP of different widths/kernels/GP on a fixed dataset - figure on the connection between 3 methods - comparison test error as # of samples increase for a fixed dataset - table of r^2 for genomic (high-dimensional) data --&gt; &lt;p&gt;In this work, we will explore the successes of overparameterization of neural networks through evaluating the relationship between the Neural Tangent Kernel (NTK), MLPs, and Gaussian processes. Recent work has shown that overparameterized neural networks can perfectly fit the training data yet generalize well enough to test data. This was formalized as “the double descent curve” &lt;d-cite key=&quot;belkin2019reconciling&quot;&gt;&lt;/d-cite&gt;, which suggests that increasing model capacity results in improved performance.&lt;/p&gt; &lt;p&gt;To help elucidite our understanding of neural networks as the width increases, I wanted to understand the connections between neural networks, which are often regarded as “black boxes,” and other classes of statistical methods, such as kernels and NNGPs. My goal is to put neural networks in the greater contexts of statistical machine learning methods that are hopefully easier to reason with and interpret.&lt;/p&gt; &lt;h3 id=&quot;literature-review&quot;&gt;Literature Review&lt;/h3&gt; &lt;p&gt;There is already prior literature on the connections between these three classes of models.&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Kernel Regression $\iff$ MLPs: This connection was introduced in &lt;d-cite key=&quot;jacot2018neural&quot;&gt;&lt;/d-cite&gt;. In particular, they proved that the limit of a neural network as width approaches infinity is equivalent to kernel regression with the Neural Tangent Kernel (NTK).&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;MLP $\iff$ Gaussian Processes: The connection for infinitely-wide one-layer neural networks was introduced in &lt;d-cite key=&quot;neal1996priors&quot;&gt;&lt;/d-cite&gt; and for deep networks in &lt;d-cite key=&quot;lee2017deep&quot;&gt;&lt;/d-cite&gt;. This comes from the observation that if the weights are sampled Gaussian i.i.d., then the Central Limit Theorem states that as the width approaches infinity, the output is also Gaussian. We also went over this briefly in class.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Gaussian Processes $\iff$ Kernel Regression: Other than the obvious fact that they both use kernels and the “kernel trick,” I could not really find a resource that established a clear connection between the two other than through the intermediary of MLPs. In this project, this is one link that I will try to explicitly establish.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Other relevant prior works I reviewed include:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;The formalization of the double descent curve in &lt;d-cite key=&quot;belkin2019reconciling&quot;&gt;, which uprooted our previous understanding of the bias-variance tradeoff and the notion that models should not overfit. This also motivates the use of infinite-wide neural networks (extreme overparameterization) for prediction tasks. Otherwise, conventional wisdom would say that these models overfit.&lt;/d-cite&gt;&lt;/li&gt; &lt;li&gt;Why is this problem even interesting? This paper &lt;d-cite key=&quot;radhakrishnan2022simple&quot;&gt; shows that kernels achieve competitive performance for important matrix completion tasks, so neural networks are not necessarily the only solution to many tasks of interest.&lt;/d-cite&gt;&lt;/li&gt; &lt;li&gt;The lecture notes from &lt;a href=&quot;https://web.mit.edu/modernml/course/&quot;&gt;this IAP class&lt;/a&gt;. I used some of the notation, definitions, and theorems from the lecture notes to write this post, but I also worked through some of the math on my own (e.g. the overparameterized linear regression proof for general $\eta$ and $w^{(0)}$, proving that $X^\dagger$ minimizes $\ell_2$ norm, etc.).&lt;/li&gt; &lt;li&gt;I also used &lt;a href=&quot;https://lilianweng.github.io/posts/2022-09-08-ntk/&quot;&gt;this blog&lt;/a&gt; to better understand the intuition behind NTKs.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The gaps in prior knowledge I want to tackle include (1) the explicit connection between GPs and kernel regression and (2) how sparsity of kernel regression can help explain the generalization abilities of neural networks.&lt;/p&gt; &lt;h3 id=&quot;my-contributions&quot;&gt;My Contributions&lt;/h3&gt; &lt;ul&gt; &lt;li&gt;The explicit connections between kernel regression, MLPs, and Gaussian Processes (GP), particularly kernel regression and GP.&lt;/li&gt; &lt;li&gt;How properties of overparameterized linear/kernel regression can help us understand overparameterization of neural networks, particularly the regularization of the weights.&lt;/li&gt; &lt;li&gt;Empirical demonstrations of the theory developed here.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;To start, I work through the math to understand overparameterization in linear regression and connect the results to overparameterization in kernel regression.&lt;/p&gt; &lt;h3 id=&quot;overparameterization-in-linear-regression&quot;&gt;Overparameterization in Linear Regression&lt;/h3&gt; &lt;p&gt;Linear regression involves learning a predictor of the form $\hat{f}(x) = wx$, where $w \in \mathbb{R}^{1 \times d}, x \in \mathbb{R}^{d \times 1}$. Much like neural networks, we find $\hat{w}$ by minimizing the Mean Squared Error (MSE) of the prediction $\hat{f}$ from the target $y \in \mathbb{R}$ across all $n$ samples: \(\mathcal{L}(w) = \frac{1}{2}||y - \hat{f}(x)||_2^2\)&lt;/p&gt; &lt;p&gt;Without knowing much about the relationship between $n$ and $d$, it is not obvious that there is a closed form solution to this system of equations. Of course, if $n = d$ (and $X$ is full rank), then we can directly solve for $w$. Specifically, if $Y \in \mathbb{R}^{1 \times n}$, $X \in \mathbb{R}^{d \times n}$, $w \in \mathbb{R}^{1 \times d}$, then \(Y = wX \implies w = YX^{-1}.\)&lt;/p&gt; &lt;p&gt;What about when $n &amp;lt; d$ (underparameterized regime) or $n &amp;gt; d$ (overparameterized regime)? We need to turn to gradient descent then, \(w^{(t+1)} = w^{(t)} - \eta \nabla_w \mathcal{L}w^{(t)}.\) We can actually explicitly characterize the conditions for convergence and its limit for different values of the learning rate $\eta$ and initialization $w^{(0)}$. Namely, let us start with \(w^{(t+1)} = w^{(t)} - \eta \nabla_w \mathcal{L}(w^{(t)}) = w^{(t+1)} = w^{(t)} - \eta (-(y - w^{(t)}X))X^\top = w^{(t)} + \eta (y - w^{(t)}X)X^\top\) Using this equation, we can derive a closed form expression for $w^{(t)}$. \(\begin{align*} w^{(t+1)} &amp;amp;= w^{(t)} + \eta (y - w^{(t)}X)X^\top = w^{(t)} +\eta yX^\top - \eta w^{(t)} XX^\top = w^{(t)}(I - \eta X X^\top) + \eta y X^\top \\ w^{(1)} &amp;amp;= w^{(0)} (I - \eta XX^\top) + n y X^\top\\ w^{(2)} &amp;amp;= w^{(0)} (I - \eta XX^\top)^2 + n y X^\top(I - \eta XX^\top) + n y X^\top\\ w^{(3)} &amp;amp;= w^{(0)} (I - \eta XX^\top)^3 + n y X^\top(I - \eta XX^\top)^2 + n y X^\top(I - \eta XX^\top) + n y X^\top\\ &amp;amp;\dots\\ \end{align*}\) Let $A = (I - \eta XX^\top)$, $B = nyX^\top$, and $X = U\Sigma V^\top$ be the singular value decomposition of $X$ where $\sigma_1 \geq \dots \geq \sigma_r$ are the non-zero singular values. Then \(\begin{align*} w^{(t)} &amp;amp;= w^{(0)}A^\top + BA^{t-1} + BA^{t-2} + \dots + BA + B = w^{(0)}A^\top + B(A^{t-1} + A^{t-2} + \dots + A + I) = w^{(0)} A^t + (nyX^\top)(UU^\top + U(I - n\Sigma^2)U^\top + \dots + U(I - n\Sigma^2)^{t-1}U^\top) \\ &amp;amp;= w^{(0)} A^t + (nyX^\top)U(I + (I - n\Sigma^2) + \dots + (I - n\Sigma^2)^{t-1})U^\top = w^{(0)}(I - n XX^\top)^t + nyX^\top U\begin{bmatrix} \frac{1 - (1 - \eta\sigma_1^2)^t}{n\sigma_1^2} &amp;amp; &amp;amp; &amp;amp;\\ &amp;amp; \frac{1 - (1 - \eta\sigma_2^2)^t}{n\sigma_2^2} &amp;amp; &amp;amp;\\ &amp;amp; &amp;amp; \ddots &amp;amp; \\ &amp;amp; &amp;amp; &amp;amp; 0 \end{bmatrix}U^\top \end{align*}\\\) From this equation, we can derive many insights into the conditions for convergence. In particular, if we want the RHS to converge, we require $|1 - \eta \sigma_1^2| &amp;lt; 1 \implies -1 &amp;lt; 1 - \eta\sigma_1^2 &amp;lt; 1$. Thus, when $\eta &amp;lt; \frac{2}{\sigma_1^2}$ (which implies $\eta \leq \frac{2}{\sigma_2^2}, \eta \leq \frac{3}{\sigma_3^2}, \dots$), gradient descent for linear regression converges.&lt;/p&gt; &lt;p&gt;With this condition on $\eta$, we can further characterize $w^{(\infty)}$. \(\begin{align*} w^{(\infty)} &amp;amp;= \lim_{t \rightarrow \infty} w^{(0)}(I - \eta XX^\top)^t + n yX^\top U \begin{bmatrix} \frac{1}{n\sigma_1^2} &amp;amp; &amp;amp; &amp;amp;\\ &amp;amp; \frac{1}{n\sigma_2^2} &amp;amp; &amp;amp;\\ &amp;amp; &amp;amp; \ddots &amp;amp; \\ &amp;amp; &amp;amp; &amp;amp; 0 \end{bmatrix}U^\top = \lim_{t \rightarrow \infty} w^{(0)}(UU^\top - \eta U \Sigma^2 U^\top)^t + yV\Sigma^\top U^\top U \begin{bmatrix} \frac{1}{\sigma_1^2} &amp;amp; &amp;amp; &amp;amp;\\ &amp;amp; \frac{1}{\sigma_2^2} &amp;amp; &amp;amp;\\ &amp;amp; &amp;amp; \ddots &amp;amp; \\ &amp;amp; &amp;amp; &amp;amp; 0 \end{bmatrix}U^\top \\ &amp;amp;= \lim_{t \rightarrow \infty} w^{(0)}U(I - \eta \Sigma^2)^tU^\top + yV\Sigma^\top \begin{bmatrix} \frac{1}{\sigma_1^2} &amp;amp; &amp;amp; &amp;amp;\\ &amp;amp; \frac{1}{\sigma_2^2} &amp;amp; &amp;amp;\\ &amp;amp; &amp;amp; \ddots &amp;amp; \\ &amp;amp; &amp;amp; &amp;amp; 0 \end{bmatrix}U^\top = w^{(0)}U\begin{bmatrix} 0 &amp;amp; &amp;amp; &amp;amp;\\ &amp;amp; 1 &amp;amp; &amp;amp;\\ &amp;amp; &amp;amp; \ddots &amp;amp; \\ &amp;amp; &amp;amp; &amp;amp; 1 \end{bmatrix}U^\top + yV\begin{bmatrix} \frac{1}{\sigma_1} &amp;amp; &amp;amp; &amp;amp;\\ &amp;amp; \frac{1}{\sigma_2} &amp;amp; &amp;amp;\\ &amp;amp; &amp;amp; \ddots &amp;amp; \\ &amp;amp; &amp;amp; &amp;amp; 0 \end{bmatrix}U^\top =w^{(0)}U\begin{bmatrix} 0 &amp;amp; &amp;amp; &amp;amp;\\ &amp;amp; 1 &amp;amp; &amp;amp;\\ &amp;amp; &amp;amp; \ddots &amp;amp; \\ &amp;amp; &amp;amp; &amp;amp; 1 \end{bmatrix}U^\top + yX^\dagger \\ \end{align*}\) Note the dependency on this result on $w^{(0)}$. If $w^{(0)} = 0$, then $w^{(\infty)} = yX^\dagger$. Furthermore, we can also prove that $w = yX^\dagger$ is the minimum $\ell_2$ solution. Suppose there exists another solution, $\tilde{w}$. If $Xw = X\tilde{w}$, then $\tilde{w} - w \perp w$ because \((\tilde{w} - w)w^\top = (\tilde{w} - w)w^\top = (\tilde{w} - w)(y(X^\top X)^{-1}X^\top)^\top = (\tilde{w}-w)X(X^\top X^{-1})^\top y^\top = 0\) Thus, \(\|\tilde{w}\|_2^2 = \|\tilde{w} - w + w\|_2^2 = \|\tilde{w} - w\|_2^2 + \|w\|_2^2 + 2(\tilde{w}-w)w^\top = \|\tilde{w} - w\|_2^2 + \|w\|_2^2 \geq \|w\|_2^2.\)&lt;/p&gt; &lt;p&gt;This characterization is consistent when $n = d$, $n &amp;lt; d$, and $n &amp;gt; d$. If $n = d$, then $X^\dagger = (X^\top X)^{-1} X^\top = X^{-1}(X^{\top})^{-1} X^\top = X^{-1}$. When $n &amp;gt; d$ and the rank of $X$ is $d$, then when $\nabla_w \mathcal{L}(w) = 0$, then $(y-wX)X^\top = 0 \implies w = yX^\top(XX^\top)^{-1}$. $XX^\top \in \mathbb{R}^{d \times d}$ is invertible since $X$ is full rank, so $w = yX^\top(XX^\top)^{-1} =y(X^\top X)^{-1}X^\top = yX^\dagger$.&lt;/p&gt; &lt;p&gt;We are particularly interested in the overparameterized regime, i.e. when $n &amp;gt; d$. The results above show that when $w^{(0)} = 0$, even though there are an infinite number of $w$ that satisfy $y = Xw$, gradient descent converges to the minimum $\ell_2$-norm solution, $w = yX^\dagger$. This sparsity may help prevent overparameterization even when there are enough parameters to fully memorize the input data.&lt;/p&gt; &lt;p&gt;Why is this analysis helpful? This characterization may help us understand the solution obtained by kernel regression, which can be viewed as just linear regression on a nonlinear, high-dimensional space.&lt;/p&gt; &lt;h3 id=&quot;overparameterization-in-kernel-regression&quot;&gt;Overparameterization in Kernel Regression&lt;/h3&gt; &lt;p&gt;We will start with a brief definition of kernel regression. Intuitively, kernel regression is running linear regression after applying a non-linear feature map, $\psi$, onto the datapoints $x \in \mathbb{R}^{d}$. Formally, we require that $\psi: \mathbb{R}^{d} \rightarrow \mathcal{H}$, $w \in \mathcal{H}$, and the predictor $\hat{f}: \mathbb{R}^{d} \rightarrow \mathbb{R}$ to take the form $\langle w, \psi(x)\rangle_{\mathcal{H}}$, where $\mathcal{H}$ is a Hilbert space. A Hilbert space is a complete metric space with an inner product. Intuitively, Hilbert spaces generalize finite-dimensional vector spaces to infinite-dimensional spaces, which is helpful for us because this allows for infinite-dimensional feature maps, an extreme example of overparameterization. All the finite-dimensional inner product spaces that are familiar to us, e.g. $\mathbb{R}^n$ with the usual dot product, are Hilbert spaces.&lt;/p&gt; &lt;p&gt;At first glance, it might seem impossible to even store the weights of infinite-dimensional feature maps. However, this problem is resolved by the observation that weights from solving linear regression will always a linear combination of the training samples. In particular, since $yX^\dagger$ has the same span as $X$, we can always rewrite the weights as $w = \sum_{i=1}^n \alpha_i x_i^\top$, where $x_i$ denotes the $i$ th sample. What’s really interesting is that this can be extended to kernels as well.&lt;/p&gt; &lt;p&gt;Specifically, for kernel regression, we seek a solution to the MSE problem: \(\mathcal{L}(w) = \|y-\hat{x}\|_2^2 = \|y-\langle w,\psi(x)\rangle\|_2^2.\)&lt;/p&gt; &lt;p&gt;We know that the weights must take the following form, \(w = \sum_{i=1}^n \alpha_i \psi(x_i).\)&lt;/p&gt; &lt;p&gt;Thus, expanding out the loss function, we have that \(\mathcal{L}(w) = \frac{1}{2}\|y-\langle w, \psi(x)\rangle\|_2^2 = \frac{1}{2}\|y-\langle \sum_{i=1}^n \alpha_i \psi(x_i), \psi(x_i)\rangle\|_2^2 = \frac{1}{2}\sum_{j=1}^n (y_j -\langle \sum_{i=1}^n \alpha_i \psi(x_i), \psi(x_j)\rangle)^2 = \frac{1}{2}\sum_{j=1}^n (y_j -\langle \alpha, \begin{bmatrix} \langle \psi(x_1), \psi(x_j) \rangle \\ \langle \psi(x_2), \psi(x_j) \rangle \\ \vdots \\ \langle \psi(x_n), \psi(x_j) \rangle \\ \end{bmatrix}\rangle)^2.\)&lt;/p&gt; &lt;p&gt;Thus, rather than storing the weights $w$ that act on the feature map directly, we just need to store $\alpha$, the weights acting on the samples. Moreover, another observation from this equation is that we don’t even need to define the feature map directly. We only need to store the inner product of each sample with every other sample. Formally, this inner product is called a kernel ($K: \mathbb{R}^d \times \mathbb{R}^d \rightarrow \mathbb{R}$). With a slight abuse of notation, we will also use $K$ to denote the matrix of inner products, $K(X,X)$.&lt;/p&gt; &lt;p&gt;Much like our discussion in class on Gaussian Processes (GP), kernels can be thought of as a “distance” or “covariance” function on samples. Some well-known kernels include:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Gaussian kernel: $K(x,\tilde{x}) = \exp(|x - \tilde{x}|_2^2)$&lt;/li&gt; &lt;li&gt;Laplacian kernel: $K(x,\tilde{x}) = \exp(|x - \tilde{x}|_2)$&lt;/li&gt; &lt;li&gt;Neural Tangent kernel with ReLU activation: $K(x,\tilde{x}) = \frac{1}{\pi}(x^\top \tilde{x}(\pi - \arccos(x^\top \tilde{x})) + \sqrt{1 - (x^\top \tilde{x})^2}) + x^\top \tilde{x}\frac{1}{\pi}(\pi - \arccos(x^\top \tilde{x}))$&lt;/li&gt; &lt;li&gt;Linear kernel: $K(x,\tilde{x}) = x^\top \tilde{x}$&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The linear kernel is equivalent to linear regression, and (as we will explore later), the Neural Tangent kernel with ReLU activation approximates an infinitely wide neural network with $\phi(z) = \sqrt{2}\max(0,z)$ activation.&lt;/p&gt; &lt;p&gt;Note also that all of these kernels, however finite, represent infinite-dimensional feature maps. For example, the feature map for the Gaussian kernel is $\psi(x) = \Big(\sqrt{\frac{(2L)^m}{p_1!p_2!\dots p_d!}}x_1^{p_1}x_2^{p_2}\dots x_d^{p_d}\Big)_{p_1,p_2,\dots,p_d \in \mathbb{N} \cup {0}}.$ It is remarkable that kernel regression even does well in practice considering it works in an extremely over-parameterized regime.&lt;/p&gt; &lt;p&gt;However, our analysis using linear regression may shed some light on why. In particular, recall that our loss function is \(\mathcal{L}(w) = \frac{1}{2}\sum_{j=1}^n (y_j - \alpha K(X,X)).\)&lt;/p&gt; &lt;p&gt;Since this is just linear regression in $\mathcal{H}$, gradient descent converges to $\alpha = yK^\dagger$ if $\alpha^{(0)} = 0$. This means the predictor for kernel regression looks like \(\hat{f}(x) = \alpha K(X,x) = yK^{\dagger}K(X,x).\)&lt;/p&gt; &lt;p&gt;Since $K(X,X)$ is a square matrix, (technically, $n = d$ from the linear regression case), this equation can be solved directly. Moreover, $\alpha$ is the minimum $\mathcal{H}$-norm solution, just like how the weights from the linear regression model is the minimum $\ell_2$-norm solution.&lt;/p&gt; &lt;p&gt;The ability to be solved in closed form is an important property of kernel regression. In practice, $\alpha^{(0)}$ cannot be initialized to $0$ in gradient descent, so neural networks do not necessarily converge to the minimum-norm solution that kernels do. This may offer some explanation for the predictive ability of kernels on tabular data.&lt;/p&gt; &lt;p&gt;Now, let us formally define the Neural Tangent Kernel. The NTK for a neural network is defined as the outer product of the gradients of the network’s output with respect to its parameters, averaged over the parameter initialization distribution. Formally, if $f(x; w)$ is the output of the network for input $ x $ and parameters $ w $, the NTK is given by:&lt;/p&gt; \[K_{\text{NTK}}(x, \tilde{x}) = \mathbb{E}_{w}\left[\left\langle \frac{\partial f(x; w)}{\partial w}, \frac{\partial f(\tilde{x}; w)}{\partial w} \right\rangle\right].\] &lt;p&gt;The intuition for this comes from understanding how parameters change in neural networks during gradient descent.&lt;/p&gt; &lt;p&gt;In particular, note that \(\frac{df(x;w)}{dt} = \frac{df(x;w)}{dw} \frac{dw}{dt} \approx \frac{df(x;w)}{dw} (-\nabla_w \mathcal{L}(w)) = -\frac{1}{N}\sum_{i=1}^N \underbrace{\nabla_w f(x;w)^\top \nabla_w f(x_i;w)}_{NTK} \nabla_f\mathcal{L}(f,y_i).\)&lt;/p&gt; &lt;p&gt;From this equation, we see that during gradient descent, the network $f$ changes based on its effect on the loss function weighted by the “covariance”/”distance” of $x$ w.r.t. the other samples. The intuition for the NTK thus comes from the way that the neural network evolves during gradient descent.&lt;/p&gt; &lt;d-cite key=&quot;jacot2018neural&quot;&gt;&lt;/d-cite&gt; &lt;p&gt;established that training an infinite-width neural network $f(x;w)$ with gradient descent and MSE loss is equivalent to kernel regression where the kernel is the NTK.&lt;/p&gt; &lt;p&gt;To further understand the connections between the NTK and wide neural networks, I benchmarked the performance of wide neural networks and the NTK on the task of predicting the effects of a gene knockout on a cell.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 1. Experiment workflow.&lt;/p&gt; &lt;p&gt;All the datasets are publicly available on &lt;a href=&quot;https://depmap.org/portal/&quot;&gt;DepMap&lt;/a&gt; and I processed the data the same way as I did in &lt;d-cite key=&quot;cai2023synthetic&quot;&gt;&lt;/d-cite&gt; (but the experimental results I describe here are new). In short, I have 998 cells embedded with a 30,000-dimensional vector of the expression and mutation status of genes in the cell. The target task is to predict the effect of knocking out the gene KRAS in each cell. The “effect” is a scalar that indicates how alive the cell is, with negative values indicating that the cell is more dead and positive values indicating that the cell is more alive.&lt;/p&gt; &lt;p&gt;Biological datasets are well-suited for the analysis of overparameterized models because the embeddings are by default extremely high-dimensional, i.e. $d » n$. However, since I want to test the effects of increasing the width of neural networks and I do not want the shape of the weight matrix to be $\lim_{k \rightarrow \infty}\mathbb{R}^{30,000 \times k}$, I reduced the computational complexity of this problem by first running PCA on the cell embedding to reduce $d$ to $500$. Thus, $X \in \mathbb{R}^{998 \times 500}$ and $Y \in \mathbb{R}^{998 \times 1}$. I did a simple 80/20 training/test split on the data, so $X_{train} \in \mathbb{R}^{798 \times 500}$ and $X_{test} \in \mathbb{R}^{200 \times 500}$.&lt;/p&gt; &lt;p&gt;I then benchmarked a one hidden layer MLP, i.e. $A\phi(Bx)$ with ReLU activation, where $A \in \mathbb{R}^{k \times 1}, B \in \mathbb{R}^{500 \times k}$, as $k$ ranged from ${10,110,210,\dots,9,910}$. I also benchmarked the NTK on the same task. There are several interesting insights from this experiment.&lt;/p&gt; &lt;ol&gt; &lt;li&gt;The NTK always exactly fits the training data by construction because we directly solve the MSE problem.&lt;/li&gt; &lt;li&gt;The MSE of a neural network as $k$ increases approaches the MSE of the NTK, which aligns with the theory. However, I want to note that if I shrink $d$, i.e. if I take $d = 10$ or $d=100$, the second point does not always hold. In those cases, the MSE of the NTK is much larger than the MSE of the neural network. That was a bit counterintuitive, but one explanation could be that the NTK is a poor approximation for the neural network in those cases because the neural network cannot be linearized when it is changing so drastically based on the small set of features.&lt;/li&gt; &lt;li&gt;The MSE asymptotically decreases as $k \rightarrow \infty$. This aligns with the theory of the double-descent curve. It would be interesting to test if the weights learned by the MLP enforces some sort of sparsity, e.g. by plotting $\frac{|A|_2}{|x|_2}$, where $A,x \in \mathbb{R}^{k \times 1}$ and $x \sim \mathcal{N}(0,I_k)$ (unfortunately, the latter does not have a nice form).&lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;gaussian-processes&quot;&gt;Gaussian Processes&lt;/h3&gt; &lt;p&gt;Compared to linear and kernel regression, a Gaussian Process (GP) is a much more general class of nonparametric functions. Formally, a Gaussian Process (GP) is a collection of random variables, any finite number of which have a joint Gaussian distribution. A GP can be thought of as a distribution over functions and is fully specified by its mean function $\mu(x)$ and covariance function $K(x, \tilde{x})$, (similar to kernel regression, this is also known as the kernel of the GP).&lt;/p&gt; &lt;p&gt;Given a set of points $X = {x_1, x_2, \ldots, x_n}$, the function values at these points under a GP are distributed as:&lt;/p&gt; \[\mathbf{f}(X) \sim \mathcal{N}(\mathbf{\mu}(X), K(X, X)),\] &lt;p&gt;where $ \mathbf{\mu}(X) $ is the mean vector and $ K(X, X) $ is the covariance matrix constructed using the kernel function $K$.&lt;/p&gt; &lt;p&gt;Key to the concept of Gaussian Processes is the closure of multivariate Gaussians under conditioning and marginalization. Since all the function values are jointly Gaussian, the value of a new function value, given the existing ones, is also Gaussian, e.g. assuming $\mu(X) = 0$,&lt;/p&gt; &lt;p&gt;\(f(x_{test}) | f(x_1)\dots f(x_n) = \mathcal{N}(\mu_{test},\Sigma_{test})\) where $\mu_{test}$ = $K(x,X)K(X,X)^{-1}f(X)$ and $\Sigma_{test}$ = $K(x,x) - K(x,X)K(X,X)^{-1}K(x,X)$. (The math for this is a bit tedious, so I omit that here.)&lt;/p&gt; &lt;h3 id=&quot;connecting-gaussian-processes-kernel-regression-and-mlps&quot;&gt;Connecting Gaussian Processes, Kernel Regression, and MLPs&lt;/h3&gt; &lt;p&gt;It is interesting to note the similarities between this closed form for the predictor of a Gaussian process and the predictor for kernel regression. In fact, $\mu_{test}$ is exactly the same as $\hat{f}(x)&lt;em&gt;{kernel}$. This suggests GPs parameterize the class of functions drawn from a normal distribution with mean $\mu&lt;/em&gt;{test}$ while kernel regression converges to a deterministic function that is exactly $\mu_{test}$. In other words, I think that the function learned by kernel regression can be thought of as the maximum of the posterior distribution of the GP with the same kernel.&lt;/p&gt; &lt;p&gt;To test this insight, I ran an experiment to see how similar a Gaussian Process trained on a fixed dataset is to kernel regression with the same kernel.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig4.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 2. Results of Gaussian Process Regression and Kernel Ridge Regression on synthetic data with the same kernel function.&lt;/p&gt; &lt;p&gt;I sampled $X \sim \mathcal{N}(5,1)$ and $Y \sim \sin(X) + \mathcal{N}(0,0.2)$. I then trained a Gaussian Process and kernel ridge regression on the data with $K(x,\tilde{x}) = -\exp{\frac{|x-\tilde{x}|_2^2}{2}} + Id$. As expected, the function learned by kernel ridge regression closely matches the mean of the class of functions learned by the GP.&lt;/p&gt; &lt;p&gt;Another connection between kernel regression and GPs can be made through the introduction of a one hidden layer MLP. See below figure.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 3. Visualization of kernel regression, MLPs, and Gaussian Processes.&lt;/p&gt; &lt;p&gt;Starting with kernel regression, if we fix the “feature map,” $B $, then training gradient descent with $A^{(0)} = 0$ is equivalent to training kernel regression with $K(x,\tilde{x}) = \langle \phi(Bx), \phi(Bx) \rangle$. This is intuitive because again, we can just think of kernel regression as linear regression ($A$) after applying a nonlinear feature map, ($\phi \circ B$).&lt;/p&gt; &lt;p&gt;The connection between neural networks and Gaussian Processes is a bit more complicated. Suppose we are in the overparameterized regime and $A \in \mathbb{R}^{1 \times k}$ and $B \in \mathbb{R}^{k \times d}$. Forgoing the bias term out of simplity, the output of the network is \(f(x) = A\phi(Bx) = \sum_{i=1}^k A_i\phi(Bx)_i.\) If the weights of the network are sampled i.i.d. Gaussian, then $f(x)$ is a sum of i.i.d. Gaussians and so as $k \rightarrow \infty$, the Central Limit Theorem states that the output of the network will also be Gaussian with some fixed mean and covariance, i.e. in the limit, \(f(x) \sim \mathcal{N}(0,\Sigma)\) \(\begin{bmatrix} f(x_1) \\ f(x_2) \\ \vdots \\ f(x_n) \end{bmatrix} \sim \mathcal{N}(0,K)\)&lt;/p&gt; &lt;p&gt;Now, let us compute $K$: \(K(x,\tilde{x}) = \mathbb{E}[f(x)f(\tilde{x})] = \mathbb{E}[A\phi(Bx)A\phi(B\tilde{x})] = \mathbb{E}\Big[\lim_{k \rightarrow \infty}\Big(\sum_{i=1}^k A_i \phi(Bx)_i\Big)\Big(\sum_{i=1}^k A_i \phi(B\tilde{x})_i\Big)\Big]\) Suppose for simplicity that $A \sim \mathcal{N}(0,I)$. Then $\mathbb{E}[A_iA_j] = 0$ and $\mathbb{E}[A_iA_i] = 1$: \(= \mathbb{E}\Big[\lim_{k \rightarrow \infty}\sum_{i=1}^k A_i^2 \phi(Bx)_i\phi(B\tilde{x})_i\Big] = 1 \lim_{k \rightarrow \infty} \sum_{i=1}^k \phi(Bx)_i\phi(B\tilde{x})_i= \underbrace{\lim_{k \rightarrow \infty} \langle \phi(Bx),\phi(B\tilde{x}) \rangle}_{k \times NNGP}.\)&lt;/p&gt; &lt;p&gt;The latter is essentially the definition of the Neural Network Gaussian Process, which is the kernel of the Gaussian Process that neural networks converge to when its width goes to infinity. (The NNGP has an extra $\frac{1}{k}$ term to allow the Law of Large Numbers to be used again.)&lt;/p&gt; &lt;p&gt;Ultimately, what this shows is that a neural network of infinite width over i.i.d. parameters is the class of Gaussian functions parameterized by the Neural Network Gaussian Process. With gradient descent, neural networks and kernel regression converge to a deterministic function that can be thought of as a sample from a GP.&lt;/p&gt; &lt;p&gt;The below figure summarizes my findings on the connections between the three types of function classes:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-overparameterization/Fig2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 4. Comparison of kernel regression, MLPs, and Gaussian Processes.&lt;/p&gt; &lt;h1 id=&quot;discussion&quot;&gt;Discussion&lt;/h1&gt; &lt;p&gt;To summarize, these are the implications of the NN-Kernel Regression-GP Connection:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Predictive Distribution: In the infinite-width limit, the predictive distribution of a neural network for a new input $x_{test}$ can be described by a Gaussian distribution with mean and variance determined by the NNGP.&lt;/li&gt; &lt;li&gt;Regularization and Generalization: Kernels inherently regularize the function space explored by the network. This regularization is not in the form of an explicit penalty but may arise from the minimum $\mathcal{H}$-norm solution of kernel regression. This may explain the observed generalization capabilities of wide neural networks.&lt;/li&gt; &lt;li&gt;Analytical Insights: This correspondence provides a powerful analytical tool to study the learning dynamics of neural networks, which are often difficult to analyze due to their non-linear and high-dimensional nature.&lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;limitations&quot;&gt;Limitations&lt;/h3&gt; &lt;p&gt;A major limitation of this current work is that I evaluated overparameterized neural networks only through the lens of kernels/GPs. It would be interesting to try to understand the successes of neural networks through other metrics, such as evaluating test risk as width increases. Furthemore, it would also be interesting to characterize what happens when depth, rather than just width, increases. Another interesting next step would be expanding this analysis to understanding overparameterization of other architectures, such as CNNs and transformers, and their connections to kernel regression and Gaussian Processes.&lt;/p&gt; &lt;p&gt;Understanding neural networks through the lens of the NTK and Gaussian processes deepens our appreciation of the foundational principles in machine learning. It unifies three seemingly disparate areas: the powerful yet often opaque world of deep learning, the straightforward approach of kernel regression, and the rigorous, probabilistic framework of Gaussian processes. This confluence not only enriches our theoretical understanding but also paves the way for novel methodologies and insights in the practical application of machine learning algorithms.&lt;/p&gt; </content> </entry> <entry> <title>Exploring Methods for Generating Music</title> <link href="https://deep-learning-mit.github.io/blog/2023/exploring-music-generation/"/> <updated>2023-12-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/exploring-music-generation</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;The problem of music generation has been widely explored for a long time. Music has very similar parallels to how speech and language is structured. Just like language, music is temporal and in the traditional western sense, has a defined set of standards/rules for how music should be structured. What makes music generation a more challenging problem than language is that music has an artistic/expressive component as well as both low and high level structure. For “good” music, it isn’t enough to simply generate a series of notes and harmonies that obey music theory conventions. At the low level, “good” music makes use of varying dynamics, note groupings, and articulation. At the high level, “good” music may feature overarching motifs and specific &lt;a href=&quot;https://en.wikipedia.org/wiki/Musical_form&quot;&gt;forms&lt;/a&gt; (round, sonata form, ABAB, etc). This level of complexity is analagous to the problem of generating poetry and generating speech that mimics a human reciting it. The poetry will have structures like rhyme, rhythm, motifs, metaphors, etc. and the speech reading it will have to replicate expressiveness to be convinving. This level of complexity is not yet achievable with high level of robusts by current speech generators, LLMs, and NLP methods.&lt;/p&gt; &lt;p&gt;It is this level of structural complexity required for generating “good” music that make machine learning methods, specifically deep learning, a compelling approach to tackling the problem of generating “good” music. Deep learning methods should be able to capture music’s low level music theory structure as well as the high level It is the hope that given enough data and the right architectures, music generation will be able to mimick a level akin to the best human composers. While music generation such as OpenAi’s jukebox &lt;d-cite key=&quot;dhariwal2020jukebox&quot;&gt;&lt;/d-cite&gt; as yielded very good results, it is trained on pure audio frequencies. I will focus on musical generation and training from a “written” / musical structural perspective rather than audio. (Think human speech vs. language/text), as I think this can provide greater insight into how these models learn and what about musical structure is being learned.&lt;/p&gt; &lt;h1 id=&quot;related-work&quot;&gt;Related Work&lt;/h1&gt; &lt;p&gt;There has been several studies/project done in the generation of music. OpenAi has done work with using audio samples to generate music. They took a representation learning and autoencoder approach leveraging VQ-VAEs. Other work &lt;d-cite key=&quot;doi:10.1080/25765299.2019.1649972&quot;&gt;&lt;/d-cite&gt; took approaches similar to me and tried to analyze the “written” structure of music and used a combination of LSTMs and a midi encoding scheme to . Work has been done to capture the expressitivity of music &lt;d-cite key=&quot;10124351&quot;&gt;&lt;/d-cite&gt;, where they leverage large transformer models and condition them on emotion to generate music. There has been success in generating expressitivity based on this conditional approach. My work here aims to analyze purely the syntactical structure of music and will not be leveraging conditioning.&lt;/p&gt; &lt;h1 id=&quot;methodology&quot;&gt;Methodology&lt;/h1&gt; &lt;p&gt;Before music can be learned and generated, it needs to first be converted to a format that can be input to a ML model. To achieve this I used a subset of a piano midi dataset &lt;d-cite key=&quot;ferreira_aiide_2020&quot;&gt;&lt;/d-cite&gt; and utilized a &lt;a href=&quot;https://pypi.org/project/py-midicsv/&quot;&gt;program&lt;/a&gt; to convert from MIDI to .csv. Using this .csv file I encoded each note in the midi to a 107 dimensional vector. Where the first 106 dimensions correspond to midi-notes &lt;a href=&quot;https://www.inspiredacoustics.com/en/MIDI_note_numbers_and_center_frequencies&quot;&gt;A0-G9&lt;/a&gt;, and the last dimension is encodes the duration of the midi-note divided by the midi-clock/quarter frequency to get a duration of the note in quarter notes. Since note A0 corresponds to midi-note 21, all of the midinote values are subtracted by this baseline value when being encoded into the vector. If a midi-note is played it is encoded as “ON” in the .csv and as such is represented with a 1 in it’s corresponding index in the note vector. For example, if a C4 and A4 note (MIDI note 60, and 69 respectively) are played at the same time in a song, it will be encoded as a 107 dimensional zero vector with indices 37, 47 (60 (midi value) -21 (baseline)-1 (0-index notation)) being 1 and index 106 being the duration of the chord.&lt;/p&gt; &lt;p&gt;I then tested 3 different models to see how they performed. The first model I tested was an RNN with hidden_size = 64, RNN_layers = 2, and sequences of 24, 48, 64, and 200. I next tested LSTM models with hidden_size = 64, RNN_layers = 2, and sequences of 24, 48, 64, and 200 and compared a birection vs. single directional model. The last model I analyzed was a transformer. In which I first took my note encodings and created an embedded representation of the notes and combined this with positional encoding in the sequence of music to get my final embedding to pass into my transformer architecture.&lt;/p&gt; &lt;h1 id=&quot;resultsexperiments&quot;&gt;Results/Experiments&lt;/h1&gt; &lt;p&gt;I found that the RNN architecture to be the worst performing model. It has a high ringing for some training and mostly unstructured and random. The results of a sample music generation can be found &lt;a href=&quot;https://drive.google.com/drive/folders/1FiuobbyVUnwpUZUx_PYBR57qOwj5jYXe?usp=sharing&quot;&gt;here&lt;/a&gt;. The LSTM model took longer to train but performed better with hidden size = 64, sequence_length=48, and 30 epochs. I found that it worked even better when using a bidirectional architecture. A sample generation can be found &lt;a href=&quot;https://drive.google.com/drive/folders/10CzuEbuVXKCyLsY5vwQZjSKJT1ABqXbA?usp=sharing&quot;&gt;here&lt;/a&gt; in which it was fed the starting 10 notes of Polonaise in A-flat major, Op. 53 and was asked to generate a long sequence from that. The transformer took the longest to train and its results can be found &lt;a href=&quot;https://drive.google.com/drive/folders/1fGe7xUZyFNlFGMbGB8aXnVfSEx067ZaA?usp=sharing&quot;&gt;here&lt;/a&gt;&lt;/p&gt; &lt;h1 id=&quot;closing-thoughts-and-future-work&quot;&gt;Closing Thoughts and Future Work&lt;/h1&gt; &lt;p&gt;As expected the base RNN architecture failed to generate anything meaningful. It took a while to find hyperparameters that would make the LSTM generate something of note, but when it did successfully generate music I was surprised by some of the resemblences it had to music in the training data.&lt;/p&gt; &lt;p&gt;One noticeable flaw in my work is that I that my metric for success outside of training error is qualitative. It would have been useful for evaluation of my model implementations if I had a quanititative metric. I originally calculated the loss of my models based on how they replicated unseen music from a test set given sequences from the same music, however losses for every model failed to converge in a reasonable amount of time. It is certainly difficult to tell if poor performance is due to implementation or a small dataset and limited compute resources.&lt;/p&gt; &lt;p&gt;Continuing on the idea of lack of data. One of the challenges I faced was in the curation of my dataset. I originally was going to generate music tokens for my network based on a very descriptive musical format cally &lt;a href=&quot;https://lilypond.org/&quot;&gt;lilypond&lt;/a&gt;. However, there were inconsisencies between samples of music in how they were resprented in the lilypond text format, so creation of a program to transcribe the text to a good format for representing music was very difficult which is why I turned to the more standardized MIDI file format. It is unfortunate because a lot of the the complex expression in music is lost in midi format, making it harder if not impossible for models trained on midi input to learn these complex representations/behavior. I say impossible because if data for musical expression is completely absent from training, then this important component of music is simply out of distribution and impossible to learn. So a better way to encode/represent music is needed for better results.&lt;/p&gt; &lt;p&gt;Moving forward, it would be interesting to explore how representation learning can be used to enhance the generation of music. I wanted to explore the use of VAEs and some of the more advanced variations like the one in used in OpenAi’s jukebox, VQ-VAE. These methods maybe be able to capture both the high level structure and complex low level structure found in music. I also want to explore methods for encoding the dynamics, articulation, and expression found in music, something I was not able to do this time around. Lastly, exploring a better way to encode and learn the duration of notes would lead to better music generation.&lt;/p&gt; </content> </entry> <entry> <title>Can Constrastive Learning Recommend Me a Movie?</title> <link href="https://deep-learning-mit.github.io/blog/2023/rep-learning-for-rec-systems/"/> <updated>2023-12-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/rep-learning-for-rec-systems</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;With the vast amount of information and content available online, the need for intelligent recommendation systems has only become more necessary. Many of the apps we use, YouTube, TikTok, Instagram, Netflix, Spotify, etc. all incorporate recommender systems to provide personalized content. But how do these systems work? An important factor in delivering good recomendations is having a system that can find an expressive and useful representation of users and items (where items are the specific piece of content we want to recommend).&lt;/p&gt; &lt;p&gt;Traditional approaches for developing recommender systems include collaborative filtering, matrix factorization, and deep neural networks such as multi-layer perceptrons (MLPs) and graph neural networks (GNNs) &lt;d-cite key=&quot;history&quot;&gt;&lt;/d-cite&gt;. Moreover, a focus on using a hybridized approach of the previous models are also in active research, with aims of balancing their various benefits and tradeoffs.&lt;/p&gt; &lt;p&gt;This project aims to explore if contrastive learning can be used to recommend movies for a user based on a their prior movie ratings.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;More specifically, by choosing different strategies of defining positive/negative pairs, can we learn a user embedding that facilites the downstream task of movie recommendation?&lt;/strong&gt;&lt;/p&gt; &lt;h2 id=&quot;background-and-related-work&quot;&gt;Background And Related Work&lt;/h2&gt; &lt;h3 id=&quot;contrastive-learning&quot;&gt;Contrastive Learning&lt;/h3&gt; &lt;p&gt;Contrastive learning is a self-supervised machine learning technique for training a model (often called an encoder) to distinguish between similar and dissimilar pairs of data points. The goal is to map each data point from its original representation space to a smaller dimensional latent space. If the encoder is trained well and is able to learn a good representation, the newly encoded data points should act as a sort of “compressed” version of the original data point while still containing some useful semantic information.&lt;/p&gt; &lt;p&gt;Contrastive learning has tradionally been used in the domains of computer vision and natural language processing. However, more recent work has shown that contrastive learning, when combined with graph neural networks (GNNs), can learn impressive representations when applied to recommender systems &lt;d-cite key=&quot;gnn&quot;&gt;&lt;/d-cite&gt;. For the purposes of this project, instead of using a GNN as our encoder, a simpler MLP will be used.&lt;/p&gt; &lt;h3 id=&quot;dataset&quot;&gt;Dataset&lt;/h3&gt; &lt;p&gt;This project explores creating a movie recommender system based on the &lt;a href=&quot;https://grouplens.org/datasets/movielens/&quot;&gt;MovieLens dataset&lt;/a&gt;. The small version of this dataset contains 10,000 ratings of 9,000 movies by 600 users on a 0-5 star scale. Data was collected by users of the MovieLens website, last updated in September 2018. An example of the primary &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ratings.csv&lt;/code&gt; dataset is shown below:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;userId&lt;/th&gt; &lt;th&gt;movieId&lt;/th&gt; &lt;th&gt;rating&lt;/th&gt; &lt;th&gt;timestamp&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;4.0&lt;/td&gt; &lt;td&gt;964982703&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;3&lt;/td&gt; &lt;td&gt;4.0&lt;/td&gt; &lt;td&gt;964981247&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;…&lt;/td&gt; &lt;td&gt;…&lt;/td&gt; &lt;td&gt;…&lt;/td&gt; &lt;td&gt;…&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;2&lt;/td&gt; &lt;td&gt;318&lt;/td&gt; &lt;td&gt;3.0&lt;/td&gt; &lt;td&gt;1445714835&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;2&lt;/td&gt; &lt;td&gt;333&lt;/td&gt; &lt;td&gt;4.0&lt;/td&gt; &lt;td&gt;1445715029&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;…&lt;/td&gt; &lt;td&gt;…&lt;/td&gt; &lt;td&gt;…&lt;/td&gt; &lt;td&gt;…&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;600&lt;/td&gt; &lt;td&gt;170875&lt;/td&gt; &lt;td&gt;3.0&lt;/td&gt; &lt;td&gt;1493846415&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h2 id=&quot;methodology&quot;&gt;Methodology&lt;/h2&gt; &lt;h3 id=&quot;preprocessing-of-dataset&quot;&gt;Preprocessing of Dataset&lt;/h3&gt; &lt;p&gt;The MovieLens dataset of user-movie interactions (movie ratings) is split into a training and test dataset. For each user, 95% of their interactions were randomly sampled and allocated to the training dataset, while the remaining 5% of interactions were allocated to the test dataset.&lt;/p&gt; &lt;p&gt;Thresholds were chosen to quantify whether a user “liked” a movie (&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;LIKE_THRESHOLD&lt;/code&gt;) or “disliked” a movie (&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;DISLIKE_THRESHOLD&lt;/code&gt;) based on that user’s rating. The training dataset was then filtered to only include interactions involving movies that had a minimum number of users who “liked” it and a minimum number of users who “disliked” the movie. This was to ensure that each movie had enough user data to facilite the computations for selecting positive / negative pairs.&lt;/p&gt; &lt;h3 id=&quot;positive-and-negative-pair-strategies&quot;&gt;Positive and Negative Pair Strategies&lt;/h3&gt; &lt;p&gt;An important component of contrastive learning involves the definintion of positive pairs and negative pairs. For a given interaction (user &lt;em&gt;u&lt;/em&gt; rates movie &lt;em&gt;m&lt;/em&gt;), what should be considered a similar interaction and what should be considered a dissimilar interaction?&lt;/p&gt; &lt;p&gt;Given an interaction by user ${u}$, let $\text{pos}(u) = u^+$ and $\text{neg}(u) = u^-$ where $(u,u^+)$ is a positive pair and $(u,u^-)$ is a negative pair. The goal will be to find the pair of functions $\text{pos}(), \text{neg()}$ such that a good representation is learned.&lt;/p&gt; &lt;h3 id=&quot;encoder-architecture&quot;&gt;Encoder Architecture&lt;/h3&gt; &lt;p&gt;The proposed encoder architecture is shown below. The encoder recieves as input a batch of userIds, $u$ , integers in the range $0 \leq u \leq 599 $. The first layer of the encoder is an embedding layer, mapping userIds to a vector of dimension &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;input_dim&lt;/code&gt;. This layer is followed by a 2-layer MLP with relu activations, with a hidden dimension of &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;hidden_dim&lt;/code&gt; and an output dimension of &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;latent_dim&lt;/code&gt;. Additionally, the final output of the encoder normalized.&lt;/p&gt; &lt;div class=&quot;row mt-3 align-items-center&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Architecture for encoder, where &lt;strong&gt;input_dim&lt;/strong&gt; = 1024, &lt;strong&gt;hidden_dim&lt;/strong&gt; = 600, &lt;strong&gt;latent_dim&lt;/strong&gt; = 200. &lt;/div&gt; &lt;h3 id=&quot;evaluation-metrics&quot;&gt;Evaluation Metrics&lt;/h3&gt; &lt;p&gt;In order to evaluate the quality of the learned user representations, there are a handful of metrics that will be used.&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Top K Movie Recommendation&lt;/strong&gt;: Movie recommendation will serve as a downstream task that acts as a proxy for how well the learned user representations are. To recommend movies for a user, the encoder is used to get the user embeddings for all users in the dataset. We then use the cosine-similarity to compute the N=10 nearest neighbors to our target user. From these N neighbors, we retreive all of their “liked” movies and sort by their respective ratings. The top K movies are returned as the system’s recommendations.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;recall@k&lt;/strong&gt;: A popular metric used for evaluating recommender systems is recall@k &lt;d-cite key=&quot;rec&quot;&gt;&lt;/d-cite&gt;. It measures the proportion of relevant items that were successfully retrieved from the top-k movie recommendations. Relevant items are defined as items that a user “likes” from the test dataset. The proportion of these items found in top-k recommendations from our recommender system (based on the learned encoder) is the recall@k. The higher the recall, the greater the overlap between our recommender’s recommended movies and the user’s actual preferred movies.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Visualization of User Embeddings&lt;/strong&gt;: By visualzing the learned user representation’s ability to be distinguished into separate clusters, we can better examine the potential user clusters for any distinguishing features. By utilizing t-distributed Stochastic Neighbor Embedding (TSNE) for dimensionality reduction of the user embedding vectors, we can project users representations to the 2D plane and use traditional clustering algorithms for visualization &lt;d-cite key=&quot;rec&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Top Movies Per User Cluster&lt;/strong&gt;: To provide more insight into the resulting user embedding clusters, the top movies of the users in each cluster is also reported.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;experiments&quot;&gt;Experiments&lt;/h2&gt; &lt;p&gt;In addition to standard hyperparamter-tuning techniques to optimize training, different positive pairs and negative pairs strategies will be tested.&lt;/p&gt; &lt;p&gt;All encoders were trained with &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;num_epochs&lt;/code&gt; = 20, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;batch_size&lt;/code&gt; = 512, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;lr&lt;/code&gt; = 0.0001 (using Adam optimizer), and contrastive triplet loss.&lt;/p&gt; &lt;h3 id=&quot;strategy-1&quot;&gt;Strategy 1&lt;/h3&gt; &lt;p&gt;For a given user $u_i$ a similar user is determined by a random selection from a set of candidate users. These candidate users consist of the subset of users that have “liked” the same movies that $u_i$ “liked”, i.e. their ratings $\geq$ &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;LIKE_THRESHOLD&lt;/code&gt;. Likewise, dissimilar users for $u_i$ were randomly selected from a set of candidate users that “disliked” the same movies $u_i$ “disliked”, i.e. their ratings $ &amp;lt; $ &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;DISLIKE_THRESHOLD&lt;/code&gt;.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;LIKE_THRESHOLD&lt;/th&gt; &lt;th&gt;DISLIKE_THRESHOLD&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;3.5&lt;/td&gt; &lt;td&gt;3.5&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;With these definitions of positive and negative pairs, an encoder was trained with the resulting user embeddings shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3 align-items-center&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Learned user embedding clusters and top movies using Strategy 1. &lt;/div&gt; &lt;p&gt;By examining the user embedding clusters, we see four loosely-defined user clusters. The top 5 highest rated movies by each cluster’s members are also depicted. A key takeaway is that we see a repetition of the same movies across each cluster, movies like &lt;em&gt;The Nutty Professor&lt;/em&gt;, &lt;em&gt;Mission Impossible 2&lt;/em&gt;, &lt;em&gt;Ace Ventura: When Nature Calls&lt;/em&gt;, etc. These are all very popular and well-liked movies with a wide audience. The prevalence of highly-rated and popular movies such as these leads to a bias in our positive pairs. Since many users are fans of these movies, they are all considered similar users, i.e. our definition of similarity is too weak. The following strategies will try to address this.&lt;/p&gt; &lt;h3 id=&quot;strategy-2&quot;&gt;Strategy 2&lt;/h3&gt; &lt;p&gt;In order to decrease the influence of popular movies, one strategy is to filter out all movies that are “liked” by a certain number of users. We define &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;POPULARITY_THRESHOLD&lt;/code&gt; = 100, which removes all movies with over 100 “liked” users. As a result, the distribution of “liked” users per movie is relatively uniform. The definitions of positive and negative pairs remains the same as in Strategy 1.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;LIKE_THRESHOLD&lt;/th&gt; &lt;th&gt;DISLIKE_THRESHOLD&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;3.5&lt;/td&gt; &lt;td&gt;3.5&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3 align-items-center&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Learned user embedding clusters and top movies using Strategy 2. &lt;/div&gt; &lt;h3 id=&quot;strategy-3&quot;&gt;Strategy 3&lt;/h3&gt; &lt;p&gt;A different method for reducing the influence of popular movies was to normalize each users ratings. By subtracting a movie’s average rating across all users from any particular user’s rating, we are able to determine whether the user liked the movie more than others or disliked it more than others. Popular movies only have an impact if the user really liked (or disliked) it relative to everyone else.&lt;/p&gt; &lt;p&gt;Using this new strategy, for any user $u_i$, instead of randomly selecting a similar user from candidates that “liked” a movie in common, these candidate users are ranked such that the candidate that has the highest normalizes rating is selected (the opposite is true for choosing a disimilar user). Therefore, instead of having a positive pair of users who rated the same movie highly, the positive pair will consist of users who both gave the same movie a higher rating than the average user.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;LIKE_THRESHOLD&lt;/th&gt; &lt;th&gt;DISLIKE_THRESHOLD&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;3.5&lt;/td&gt; &lt;td&gt;3.5&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3 align-items-center&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Learned user embedding clusters and top movies using Strategy 3. &lt;/div&gt; &lt;h3 id=&quot;strategy-4&quot;&gt;Strategy 4&lt;/h3&gt; &lt;p&gt;Despite the previous strategies, there still seems to be a lack of cohesion among the resulting user embedding clusters. The final strategy tested was a hybrid approach. In this scenario, the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;LIKE_THRESHOLD&lt;/code&gt; has been raised and the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;DISLIKE_THRESHOLD&lt;/code&gt; lowered in an attempt to narrow the candidate pools to more extreme users. Moreover, Strategies 2 and 3 are combined. Highly popular movies are removed and normalized ratings are used.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;LIKE_THRESHOLD&lt;/th&gt; &lt;th&gt;DISLIKE_THRESHOLD&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;4&lt;/td&gt; &lt;td&gt;3&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3 align-items-center&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Learned user embedding clusters and top movies using Strategy 4. &lt;/div&gt; &lt;h3 id=&quot;analysis&quot;&gt;Analysis&lt;/h3&gt; &lt;p&gt;For each strategy, the recall@k for various values of k are shown, along with the sizes of the train and test datasets after filtering.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;Strategy 1&lt;/th&gt; &lt;th&gt;Strategy 2&lt;/th&gt; &lt;th&gt;Strategy 3&lt;/th&gt; &lt;th&gt;Strategy 4&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;recall@10 (%)&lt;/td&gt; &lt;td&gt;0.62&lt;/td&gt; &lt;td&gt;1.29&lt;/td&gt; &lt;td&gt;0.73&lt;/td&gt; &lt;td&gt;0.78&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;recall@20 (%)&lt;/td&gt; &lt;td&gt;1.97&lt;/td&gt; &lt;td&gt;2.16&lt;/td&gt; &lt;td&gt;2.18&lt;/td&gt; &lt;td&gt;3.10&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;recall@50 (%)&lt;/td&gt; &lt;td&gt;3.84&lt;/td&gt; &lt;td&gt;6.03&lt;/td&gt; &lt;td&gt;4.36&lt;/td&gt; &lt;td&gt;11.63&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Size Train Set&lt;/td&gt; &lt;td&gt;51,576&lt;/td&gt; &lt;td&gt;32,609&lt;/td&gt; &lt;td&gt;51,576&lt;/td&gt; &lt;td&gt;10,826&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Size Test Set&lt;/td&gt; &lt;td&gt;1,361&lt;/td&gt; &lt;td&gt;984&lt;/td&gt; &lt;td&gt;1,361&lt;/td&gt; &lt;td&gt;232&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;It appears that both Strategy 2 and Strategy 3 alone seemed to make a noticable improvement in recall, with Strategy 2 (the removal of the most popular movies) making a larger impact than normalizing ratings. Furthermore, by using both strategies along with a few other changes, a representation the resulted in a better recomender system and more well-defined embedding clusters was learned.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;From the above experiments, it seems that contrastive learning (even when used with a simple MLP encoder) can eventually learn a user embedding resulting in clusters. However, it seems like either a more advanced architecture or positive/negative pair mining procedures are required to ensure that the learned representations have a useful semantic meaning. Weak positive pairs resulted from the presence of popular movies with diverse audiences. Previous work in applying contrastive learning to recommender systems highlight more complex formulations of ranked loss functions, assigning different weights depending on whether the pairs are hard or easy negative samples &lt;d-cite key=&quot;ranked&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;An interesting extension of this project could explore the use of GNNs as the basis of the encoder architecture, as these types of models more naturally preserve the structure of user-movie interactions.&lt;/p&gt; </content> </entry> <entry> <title>Improving CLIP Spatial Awareness Using Hard Negative Mining</title> <link href="https://deep-learning-mit.github.io/blog/2023/spacial-CLIP/"/> <updated>2023-12-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/spacial-CLIP</id> <content type="html">&lt;h1 id=&quot;introduction-clip-doesnt-know-its-left-and-rights&quot;&gt;Introduction: CLIP doesn’t know its left and rights&lt;/h1&gt; &lt;p&gt;Multimodal learning has come into prominence recently, with text-to-image synthesis models such as DALLE or Stable Diffusion, and image-text contrastive learning models such as CLIP. In particular, CLIP has proven to be extremely useful in learning zero-shot capabilities from paired image and text data.&lt;/p&gt; &lt;p&gt;However, recent work has highlighted a common limitation in multimodal models: the ability to capture spatial relationships. Spatial relationships can be defined as how objects in an image are positioned concerning other objects. For example, A is next to B or B is on top of A. Although Language models now demonstrate an understanding of word order and spatial awareness, multimodal models still struggle to capture this relationship in both the image and captions.&lt;/p&gt; &lt;h2 id=&quot;downstream-tasks&quot;&gt;Downstream tasks&lt;/h2&gt; &lt;p&gt;Improving captioning abilities is an important building block in overcoming this limitation in all multimodal models. Creating synthetic captions from images is an already popular method in developing training data for other models such as DALLE-3. However, limitations in captioning abilities carry over to downstream tasks, and therefore, models such as DALLE-3 often also struggle to generate images from prompts that include spatial relationships. We hope that demonstrating the ability to generate spatially-aware captions will also lead to improvements in other Vision-Language models in the future.&lt;/p&gt; &lt;h2 id=&quot;semantic-similarity&quot;&gt;Semantic similarity&lt;/h2&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Taken from Yamada et al. &lt;/div&gt; &lt;p&gt;CLIP is trained to maximize the similarity between embeddings of images and text. This leads to CLIP matching semantically similar images and captions but not understanding finer-grained details. Concept Association is especially an issue when there are multiple objects in an image where CLIP struggles to reason about the object’s attributes (Yamada 2022). Additionally, because of the focus on semantic similarity, CLIP also struggles with spatial relationships between objects.&lt;/p&gt; &lt;h1 id=&quot;winoground&quot;&gt;Winoground&lt;/h1&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/winoground_example-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/winoground_example-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/winoground_example-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/winoground_example.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Taken from Thrush et al. &lt;/div&gt; &lt;p&gt;Spatial awareness has been explored explicitly throughout previous literature. Thrush et al. in Winoground created an evaluation dataset that targets compositional reasoning. Each data point contains two captions and two images, where the captions contain the same words only in different orders. The difference in word ordering drastically changes the meaning of the sentence and therefore the image associated with the alternative caption also is completely different. The task then becomes to match the images to the correct captions (Thrush 2022).&lt;/p&gt; &lt;h2 id=&quot;evaluation-specifics-and-results&quot;&gt;Evaluation Specifics and Results&lt;/h2&gt; &lt;p&gt;We are going to use the image-to-caption evaluation of Winoground which aims to match captions to each image in constrast to images to captions. Different models have differnt matching strategies; CLIP uses the higher dot product similarity score when deciding which caption fits each image. Since there are in total, 4 different possible matchings out of the 2 image/caption pairs, random chance would score 25%. However, many multimodal models fail to score much higher than random chace. CLIP (ViT-B/32) scores 30.75% while the best models only score 38%.&lt;/p&gt; &lt;h2 id=&quot;spatial-examples&quot;&gt;Spatial Examples&lt;/h2&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; An example of spatial image/caption pairs. Taken from Thrush et al. &lt;/div&gt; &lt;p&gt;CLIP has shown to be an extremely difficult benchmark for multimodals - and there are multitude of reasons why. First, changing the word orders creates image/caption pairs that need fine-grained reasoning capabilities to differentiate. One of the many reasoning capabilities needed to do well is spatial reasoning. We filter out 101 examples of CLIP that contain image/captions that require spatial reasoning to create a more task-speciific benchmark. Our filtering is caption-based and targets key words that may indicate spatial relationships. We will refer to this filtered out evaluation benchmark as, Winoground-Spatial.&lt;/p&gt; &lt;h1 id=&quot;hard-negative-examples&quot;&gt;Hard Negative Examples&lt;/h1&gt; &lt;p&gt;Hard negative examples are negative examples that are close to our anchor pair. These are examples that are close in some way to our positive example, but still wrong. Oftentimes, these examples are hard to distinguish from one another, and therefore cause the model trouble.&lt;/p&gt; &lt;h2 id=&quot;clip-loss&quot;&gt;CLIP Loss&lt;/h2&gt; &lt;p&gt;As a refresher on how CLIP is trained, CLIP first calculates an N by N similarity matrix from the dot products of the two embeddings. The model the calculates a loss function as the average of two cross entropies. The task becomes a classification task where we classify the correct caption for each image and the correct image for each caption, thus leading to two cross entropy functions.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_OG.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_OG.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_OG.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_OG.svg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; CLIP similarity matrix. Radford et al. &lt;/div&gt; &lt;p&gt;We modify this training procedure to include additional hard negative captions. For each image/caption pair, we generate M additional negative captions. We then calculate an N by NM similarity matrix from the dot products. Then, we only modify the loss function for image classification cross entropy function to include negative captions alongisde the original N captions. We don’t modify the caption classification cross entropy function since the negative examples don’t have a corresponding “image”.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; CLIP similarity matrix with negative examples. &lt;/div&gt; &lt;h2 id=&quot;data-and-augmentation&quot;&gt;Data and Augmentation&lt;/h2&gt; &lt;p&gt;How do we generate negative examples? We first have to create a fine-tuning dataset that contains image/caption pairs that display spatial relationships. To do this, we utilize the dataset Flickr30k, a dataset that contains 31,000 images collected from Flickr along with 5 captions annotated by human annotators. We chose this dataset due to it’s caption quality alongside the fact that many of the image/caption pairs contain multiple objects.&lt;/p&gt; &lt;p&gt;We then filter out image/caption pairs based on the captions in a similar way we created our evalutation benchmark, Winoground-Spatial. We use 20 key words and phrases such as: “left”, “on top of”, “beneath”, etc. to create a training set of roughly 3,600 examples. Although there are most likely more spatial examples, we choose this method as it is cost-effective while still ensuring the quality of the traning set being only examples of spatial relationships.&lt;/p&gt; &lt;p&gt;Data augmentations have been a commonly used as a method to prevent overfitting in image classification tasks. Although it is common to perform image augmentations, Fan et al. introduce LaCLIP to perform text augmentations on captions to create additional image/caption pairs. This method can be thought of as generating additional “positive pairs”. In order to generate text-augmentations, they utilize language models such as llama7b and GPT-3 to ensure the sentences generated are still grammatically correct. They use in-context learning and prompts such as, “Rewrite this caption of an image vividly, and keep it less than thirty words:”.&lt;/p&gt; &lt;p&gt;We follow a similar procedure to generate our negative examples. For each image/caption pair, we prompt GPT-3.5-turbo-instruct to do different augmentations. Details of the prompts are provided in the later experiments.&lt;/p&gt; &lt;h1 id=&quot;experiments&quot;&gt;Experiments&lt;/h1&gt; &lt;p&gt;For all experiments, we use a base model of CLIP(ViT-B/32) pre-trained on OpenAI’s WIT provided by OpenClip. We then use OpenAI’s API to generate augmentations. In total, the cost of generating augmentations were under $50 in credits.&lt;/p&gt; &lt;h2 id=&quot;experiment-1-switching-word-order&quot;&gt;Experiment 1: Switching word order&lt;/h2&gt; &lt;p&gt;Our first experiment explores how switching the word order may serve as hard negative examples. This method is inspired by the benchmark we are using, where the captions share the same words but in a different order. For each caption, we generate a single hard negative caption. The prompt we use is displayed below:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-spacial-CLIP/GPT-word-order.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; In-context-learning prompt used to augment word order. &lt;/div&gt; &lt;p&gt;We discover adding a single hard-negative example to each example already leads to an impressive performance boost. The accuracy improves from 19.8% to a staggering 50.5% from fine-tuning.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;Pretrained CLIP&lt;/th&gt; &lt;th&gt;Word Order CLIP&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Pairs matched correctly&lt;/td&gt; &lt;td&gt;20&lt;/td&gt; &lt;td&gt;51&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Accuracy&lt;/td&gt; &lt;td&gt;0.198&lt;/td&gt; &lt;td&gt;0.505&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;We did some extra probing and noticed the majority of the improvement was from distinguishing left and right. From the additional 31 examples our fine-tuned model got correct, 18 of them were examples that the captions included the keyword of either left or right. This is consistent with our training set, where the most popular keyword of our examples is left/right.&lt;/p&gt; &lt;h2 id=&quot;experiment-2-replacing-key-spatial-words&quot;&gt;Experiment 2: Replacing key spatial words&lt;/h2&gt; &lt;p&gt;We then explore how a different augmentation workflow could impact the accuracy. In this experiment, we augment the captions to replace the keyword with another spatial keyword. For example, the keyword “on top of” could be replaced by “underneath” or “to the right of”. We again, utilize GPT to ensure the captions are still grammatically and logically correct. Because of the number of keywords avaialable, we explore how the number of negative examples during training time may affect the model’s accuracy.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;0 negative examples (Pretrained CLIP)&lt;/th&gt; &lt;th&gt;1 negative examples&lt;/th&gt; &lt;th&gt;5 negative examples&lt;/th&gt; &lt;th&gt;10 negative examples&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Pairs matched correctly&lt;/td&gt; &lt;td&gt;20&lt;/td&gt; &lt;td&gt;31&lt;/td&gt; &lt;td&gt;65&lt;/td&gt; &lt;td&gt;55&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Accuracy&lt;/td&gt; &lt;td&gt;0.198&lt;/td&gt; &lt;td&gt;0.307&lt;/td&gt; &lt;td&gt;0.644&lt;/td&gt; &lt;td&gt;0.545&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;We can notice that from 0-5 negative training examples, there is a distinctive increase in model accuracy. However, an interesting result is the dropoff in accuracy from 5 training examples to 10. We did some probing into why this may be the case in the training data. One hypothesis may be the training examples for hard negatives are incorrect, in that, by a human they could be interpreted as positive examples. For example, object A could be both next to and above object B, but we are training CLIP to recognize the keyword above to be false in this case. Another hypothesis is the difficulty in training examples stunting training and needing more data. This could be case when looking at the loss function, on whether it has fully converged or not.&lt;/p&gt; &lt;h1 id=&quot;conclusion-and-limitations&quot;&gt;Conclusion and Limitations&lt;/h1&gt; &lt;p&gt;Although we have not fully tackled the issue of spatial awareness, we have made signifigant progress from our base model of CLIP, with the highest accuracy being at 64.4% compared to 19.8%. This proof of concept work shows how hard-negative examples could boost improvements in specific reasoning tasks. The concept of using these hard-negative examples are not limited to spatial relationships: it could be interesting to examine how hard negative tasks may improve other Winoground examples that require reasoning capabilities such as counting. We also note that there is a possiblity that improving the training data may not be enough, and that the architecture may need a change to fully solve spatial relationships.&lt;/p&gt; &lt;h3 id=&quot;references&quot;&gt;References:&lt;/h3&gt; &lt;p&gt;1.Robinson, J. D.; Chuang, C.-Y.; Sra, S.; Jegelka, S. Contrastive Learning with Hard Negative Samples. In Proceedings of the International Conference on Learning Representations, 2021.&lt;/p&gt; &lt;p&gt;2.Thrush Tristan, Jiang Ryan, Bartolo Max, Singh Amanpreet, Williams Adina, Kiela Douwe, and Ross Candace. 2022. Winoground: Probing Vision and Language Models for Visio-Linguistic Compositionality. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 5238–5248.&lt;/p&gt; &lt;p&gt;3.Fan, L., Krishnan, D., Isola, P., Katabi, D., and Tian, Y. (2023a). Improving clip training with language rewrites. arXiv preprint arXiv:2305.20088.&lt;/p&gt; </content> </entry> <entry> <title>Transformer-Based Approaches for Hyperspectral Imagery in Remote Sensing</title> <link href="https://deep-learning-mit.github.io/blog/2023/A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/"/> <updated>2023-12-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Hyperspectral imaging (HSI) captures a wide spectrum of light per pixel, providing detailed information across numerous contiguous spectral bands. Unlike multispectral imaging, which only captures a few specific bands, hyperspectral imaging offers finer spectral resolution, allowing for more precise identification and analysis of materials. This capability makes it valuable in remote sensing for applications like mineral exploration, agriculture (e.g., crop health monitoring), environmental studies, and land cover classification. Each spectral band captures unique light wavelengths, enabling the identification of specific spectral signatures associated with different materials or conditions on the Earth’s surface. HSI images present unique challenges in deep learning compared to typical RGB images due to their high dimensionality. Each pixel in a hyperspectral image contains information across hundreds of spectral bands, leading to a massive increase in the data’s complexity and volume. This makes model training more computationally intensive and can lead to issues like overfitting if not handled properly. Current datasets, such as the Indian Pines or Salinas Scenes datasets, often have fewer samples compared to standard image datasets, exacerbating the difficulty in training deep learning models without overfitting. There’s also the challenge of effectively extracting and utilizing the rich spectral information in these images, which requires specialized architectures and processing techniques. However, analysis of hyperspectral data is of great importance in many practical applications, such as land cover/use classification or change and object detection and there is momentum in the field of remote sensing to embrace deep learning.&lt;/p&gt; &lt;p&gt;Traditional hyperspectral image classification (HSIC) methods, based on pattern recognition and manually designed features, struggled with spectral variability. Deep learning, particularly CNNs, brought advancements by extracting intricate spectral-spatial features, enhancing HSIC’s accuracy. Yet, CNNs have their drawbacks, such as a propensity for overfitting due to the high dimensionality of hyperspectral data and limitations imposed by their fixed-size kernel, which could obscure the classification boundary and fail to capture varying spatial relationships in the data effectively.&lt;/p&gt; &lt;div class=&quot;l-body-outset&quot; style=&quot;display: flex; justify-content: center; align-items: center;&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/hyperbands_plot.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;div class=&quot;l-body-outset&quot; style=&quot;display: flex; justify-content: center; align-items: center;&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/bands.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;800px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;p&gt;Compared to CNNs, there is relatively little work on using vision transformers for HSI classification but they have great potential as they have been excelling at many different tasks and have great potential in the field of HSI classification. Vision transformers, inspired by the Transformer architecture initially designed for natural language processing, have gained attention for their capacity to capture intricate patterns and relationships in data. This architecture leverages self-attention mechanisms, allowing it to model long-range dependencies effectively, which can be particularly advantageous in hyperspectral data where spatial-spectral interactions are crucial. Spectral signatures play a pivotal role in HSI analysis, enabling the differentiation of materials or conditions based on their distinct spectral characteristics, a capability that conventional RGB images cannot provide. Leveraging the strengths of vision transformers to effectively capture and exploit these spectral signatures holds promise for advancing the accuracy and precision of HSI in remote sensing classification tasks.&lt;/p&gt; &lt;h3 id=&quot;spectral-feature-based-methods-and-spatialspectral-feature-based-methods&quot;&gt;Spectral Feature-Based Methods and Spatial–Spectral Feature-Based Methods&lt;/h3&gt; &lt;p&gt;Spectral feature-based approaches classify hyperspectral images (HSIs) by analyzing each spectral pixel vector individually. However, this method has limitations as it overlooks the spatial context of the pixels. Spatial–spectral feature-based methods on the other hand, consider both the spectral and spatial characteristics of HSIs in a more integrated manner. These methods involve using a patch that includes the target pixel and its neighboring pixels, instead of just the individual pixel, to extract spatial–spectral features. Among these methods, convolutional neural networks (CNNs) are particularly prominent, having shown significant effectiveness in HSI classification. Despite the success of CNN-based models in classifying HSIs, they are not without issues. The CNN’s receptive field is limited by the small size of its convolutional kernels, such as 3×3 or 5×5, which makes it challenging to model the long-range dependencies and global information in HSIs. Additionally, the complexity of convolution operations makes it difficult to emphasize the varying importance of different spectral features.&lt;/p&gt; &lt;p&gt;When comparing spectral feature-based methods with spatial–spectral feature-based methods in hyperspectral image (HSI) classification, each has distinct advantages and applications. Spectral feature-based methods are valued for their simplicity and efficiency, especially effective in scenarios where unique spectral signatures are key, such as in material identification or pollution monitoring. They require less computational power, making them suitable for resource-limited applications. Alternatively, spatial–spectral feature-based methods offer a more comprehensive approach by integrating both spectral and spatial information, leading to higher accuracy in complex scenes. This makes them ideal for detailed land cover classification, urban planning, and military surveillance where spatial context is crucial. Among spatial–spectral methods, convolutional neural networks (CNNs) stand out for their advanced feature extraction capabilities and adaptability, making them useful in a variety of applications, from automatic target recognition to medical imaging. Although, they face challenges such as the need for large datasets and difficulties in capturing long-range spatial dependencies. While spectral methods are efficient and effective in specific contexts, spatial–spectral methods, particularly those using CNNs, offer greater versatility and accuracy at the cost of increased computational complexity.&lt;/p&gt; &lt;h3 id=&quot;hyperspectral-image-classification&quot;&gt;Hyperspectral Image Classification&lt;/h3&gt; &lt;p&gt;&lt;u&gt;Three-Dimensional Convolutional Neural Network (CNN3D)&lt;/u&gt;&lt;/p&gt; &lt;p&gt;The first stage will involve the collection of multi-spectral satellite imagery and high-resolution Digital Elevation Models (DEMs) of MTR-affected landscapes. This data will be preprocessed to ensure compatibility, which includes image normalization, augmentation, and the alignment of satellite imagery with corresponding DEMs to maintain spatial congruence. Preprocessing will also involve the segmentation of satellite data into labeled datasets for supervised learning, with categories representing different land cover types relevant to ecological states.&lt;/p&gt; &lt;p&gt;&lt;u&gt;SpectralFormer&lt;/u&gt;&lt;/p&gt; &lt;p&gt;Transformer models have exhibited remarkable success beyond their initial domain of natural language processing. Their unique self-attention mechanism enables them to capture long-range dependencies, making them a potentially good choice for complex spatial analysis. Vision Transformers, in particular, offer a new approach by treating image patches as tokens and allowing them to process the global context of an image effectively. This capability is beneficial for satellite imagery analysis, where understanding the broader environmental context is critical. Transformers designed for point cloud data, adapting to the inherent irregularities of LiDAR measurements, can potentially uncover intricate structural patterns and temporal changes within landscape data. With strategic approaches like transfer learning, transformers can overcome their computational resource complexity.&lt;/p&gt; &lt;p&gt;&lt;u&gt;Group-Aware Hierarchical Transformer (GAHT)&lt;/u&gt;&lt;/p&gt; &lt;p&gt;The final step will be the development of a 3D simulation environment using Unreal Engine. The simulation will visualize the predicted ecological states and changes over time, providing an interactive tool for users to explore the landscape recovery process. The interface will allow users to manipulate variables and observe potential outcomes of different restoration strategies in a virtual setting.&lt;/p&gt; &lt;h3 id=&quot;conclusions&quot;&gt;Conclusions&lt;/h3&gt; &lt;p&gt;For the spatial analysis of satellite imagery and LiDAR data, the evaluation will focus on the transformer’s ability to discern and classify diverse land cover types. The key metrics for this assessment will include accuracy, precision, recall, and the F1 score extracted from confusion matrices. The model should accurately identify and categorize ecological features from high-resolution imagery. Temporally, the performance will be evaluated based on its capacity to predict ecological changes over time. This involves analyzing the model’s output against a time series of known data points to calculate the Mean Squared Error (MSE) for continuous predictions or log-loss for discrete outcomes.&lt;/p&gt; </content> </entry> <entry> <title>Multimodal Commonsense</title> <link href="https://deep-learning-mit.github.io/blog/2023/multimodal-commonsense/"/> <updated>2023-12-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/multimodal-commonsense</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;In recent years, language models have been proven to be quite proficient in producing human-like text, computing somewhat semantically-meaningful and human-interpretable word and token embeddings, and generating realistic conversation. However, there is a vast distinction between mimicking human linguistics from data and forming an understanding of the world and its abstract connections from data. The latter describes the commonsense knowledge of a language model, or its ability to reason about simple relationships, interactions, and general logic of the world.&lt;/p&gt; &lt;p&gt;With the advent and growth of large language models in recent years (and months), understanding the world and developing deeper underlying representations of physical and abstract concepts through text alone has become much more feasible and tractable. Yet, there is only so much someone or something can understand by simply reading about it. When evaluating the performance of language models in this context, does the language model simply mimic this knowledge or does it inherently possess it? One paradigm through which to formalize this is through a deeper categorization of common sense.&lt;/p&gt; &lt;p&gt;In particular, physical common sense, or knowledge about the physical world and its properties, is fundamental knowledge for realizing the world and the interactions within it. Physical common sense is a naturally multimodal concept, though, that for humans requires a combination of several senses to perceive, as physical properties are manifested in multiple modalities. A lack of info in any modality may make an object visually ambiguous, or otherwise manifest some misunderstanding of an object. Can we expand the capabilities of language models by imbuing them with multifaceted input to expand its knowledge base beyond text alone?&lt;/p&gt; &lt;p&gt;In this work, I focus on evaluating the physical commonsense reasoning ability of unimodal and multimodal models from text-based tasks under multimodal input. I specifically compare the performance of a text-only language model with a multimodal vision-language model and investigate (a) whether the multiple modalities of input in pretraining the multimodal model can have comparable performance to a text-specialized model, and (b) whether the supplementation of relevant image data at inference time boosts the performance of the multimodal model, compared to a previously text-only input.&lt;/p&gt; &lt;p&gt;Intuitively, vision data should benefit the physical commonsense reasoning of a model by providing the inputs the additional feature of a physical manifestation. Here, I investigate whether image data truly gives deep learning models an additional dimension of representation to benefit its commonsense reasoning.&lt;/p&gt; &lt;h1 id=&quot;related-works&quot;&gt;Related Works&lt;/h1&gt; &lt;p&gt;Several previous works evaluate language models on unimodal text-based commonsense reasoning. A number of common sense benchmarks for LMs exist, evaluating a variety of common sense categories &lt;d-cite key=&quot;bisk2019&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;sap2019&quot;&gt;&lt;/d-cite&gt;, from which fine-tuning has shown to improve general commonsense capabilities of state-of-the-art models. Li et al., 2021 &lt;d-cite key=&quot;li2021&quot;&gt;&lt;/d-cite&gt; analyzes the performance of the Gopher language model in zero-shot and few-shot learning with varying model sizes, finding that their LM performed relatively well in physical common sense, but worse in other common sense categories. Zhao et al., 2023 &lt;d-cite key=&quot;zhao2023&quot;&gt;&lt;/d-cite&gt; investigates the downstream impact of LLMs’ “commonsense model” of the world on robot task generation; they find that using the LLM commonsense knowledge as a heuristic policy achieves better-reasoned decision-making, implying that LLM common sense can empirically benefit physical interaction with the world.&lt;/p&gt; &lt;p&gt;The intersection between text and vision in models has also been explored in several works, though not in the context of commonsense reasoning. For example, text-to-image models have shown significantly greater improvement in improving &amp;amp; expanding the text encoder as opposed to a similar increase in size of the image diffusion model &lt;d-cite key=&quot;saharia2022&quot;&gt;&lt;/d-cite&gt;, showing the effect of powerful text embeddings in image generation. Common sense benchmarks with multimodal inputs have also been created and demonstrate an increase in multimodal model performance after fine-tuning &lt;d-cite key=&quot;yu2022pacs&quot;&gt;&lt;/d-cite&gt;, but they don’t investigate the empirical effect or the representational differences between utilizing additional modalities of data versus not.&lt;/p&gt; &lt;p&gt;More generally, the effect of additional modalities of data on downstream performance is studied in Xue et al. 2022 &lt;d-cite key=&quot;xue2023modality&quot;&gt;&lt;/d-cite&gt; in the context of crossmodal knowledge distillation, where they conclude that multimodal input isn’t unconditionally beneficial; multimodal teacher models maybe perform better than unimodal teachers, but students of multimodal teachers tend to perform better than those of unimodal teachers.&lt;/p&gt; &lt;h1 id=&quot;methods&quot;&gt;Methods&lt;/h1&gt; &lt;h2 id=&quot;commonsense-benchmarks&quot;&gt;Commonsense Benchmarks&lt;/h2&gt; &lt;p&gt;It’s important to note that there are many distinguishing categories of commonsense knowledge. Physical common sense (e.g., a ball rolls down an incline instead of remaining still), social common sense (e.g., shouting at a person may incite fear), temporal common sense (e.g., pan-frying chicken takes longer than oven-roasting one), and numerical/logical common sense (e.g., basic arithmetic) are a few examples that all require different modalities of reasoning and may favor some models &amp;amp; architectures over others. Here I focus on physical common sense, since intuitively vision data may influence a model’s physical knowledge the most.&lt;/p&gt; &lt;p&gt;Commonsense benchmarks can be further categorized into (a) multiple-choice evaluation, where given a short background prompt, a model must select the most reasonable option or continuation from a set of given options, and (b) generative evaluation, where a model must generate an answer or continuation to the prompt. Here, I will focus on multiple-choice evaluation, as multiple-choice benchmarks provide a more concrete and reliable metric for determining similarity to “human” judgment. To evaluate the commonsense performance of both the unimodal and multimodal models, the HellaSwag benchmark is used.&lt;/p&gt; &lt;h2 id=&quot;hellaswag&quot;&gt;HellaSwag&lt;/h2&gt; &lt;p&gt;The HellaSwag benchmark &lt;d-cite key=&quot;zellers2019&quot;&gt;&lt;/d-cite&gt; is designed to evaluate physical, grounded, and temporal common sense. Given a few-sentence-long story or prompt, the model must choose the correct continuation from four choices. The prompts are generated from (a) ActivityNet &lt;d-cite key=&quot;caba2015activitynet&quot;&gt;&lt;/d-cite&gt;, a large-scale video benchmark for evaluating Human Activity Understanding containing annotations for segments of YouTube videos, and (b) WikiHow &lt;d-cite key=&quot;koupaee2018wikihow&quot;&gt;&lt;/d-cite&gt;, a large-scale text summarization dataset. Data splits are provided by the HellaSwag benchmark, but only the train and validation splits are used here, as the test set labels are not public.&lt;/p&gt; &lt;p&gt;Here, for evaluating the multimodal model, I use only the entries generated from ActivityNet, as each ActivityNet prompt has an associated source ID from which the original source video may be accessed. From the video, image data can be scraped to augment the multimodal model’s fine-tuning and inference. The image data generation process is described in more detail in a following section.&lt;/p&gt; &lt;p&gt;Due to resource and time constraints, only a subset of this data was used for training and evaluation. Given the large size of the original HellaSwag benchmark, the sampled subset of the original data contains 10% of the original data. Each datum within the sampled dataset is sampled randomly from the original train/validation set, and each prompt within the sampled dataset is verified to have a publicly available video associated with it, i.e., the associated YouTube video is not private or deleted. Implications of this limitation are discussed further in the Limitations section below.&lt;/p&gt; &lt;div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1. Example prompts from the HellaSwag benchmark. Image sourced from the original HellaSwag paper &lt;d-cite key=&quot;zellers2019&quot;&gt;&lt;/d-cite&gt;. &lt;/div&gt; &lt;/div&gt; &lt;h2 id=&quot;text-only-language-model&quot;&gt;Text-Only Language Model&lt;/h2&gt; &lt;h3 id=&quot;roberta&quot;&gt;RoBERTa&lt;/h3&gt; &lt;p&gt;RoBERTa &lt;d-cite key=&quot;liu2019roberta&quot;&gt;&lt;/d-cite&gt; is used for evaluation of text-only models for physical commonsense reasoning. I use RoBERTa for its established high performance in general natural language processing tasks while being relatively compact and fast to run. A dropout layer and a linear classification head is used with the RoBERTa model to generate the logits for label classification of each prompt.&lt;/p&gt; &lt;h2 id=&quot;vision-text-multimodal-model&quot;&gt;Vision-Text Multimodal Model&lt;/h2&gt; &lt;h3 id=&quot;clip&quot;&gt;CLIP&lt;/h3&gt; &lt;p&gt;The CLIP (Contrastive Language-Image Pre-Training) model is a multimodal vision and language model &lt;d-cite key=&quot;radford2021clip&quot;&gt;&lt;/d-cite&gt;. It was introduced as a novel, simplified pretraining strategy utilizing the large amounts of public available data from the Internet to form (image, text) pairs, demonstrating high performance in downstream tasks such as OCR, geo-localization, and action recognition. Since CLIP utilizes natural language processing methods for processing text and image captions, it also boasts an impressive language model, making the model useful for both unimodal and multimodal tasks.&lt;/p&gt; &lt;p&gt;In the experiments described below, the multimodal model is compared to the unimodal model via text sequence classification and text + vision sequence classification for determining the most likely ending to each HellaSwag prompt, so high baseline performance in both of these tasks is an essential starting point, which CLIP provides. Like for the RoBERTa model, a dropout layer and a linear classification head is used in conjunction with CLIP to perform the label classification for each prompt.&lt;/p&gt; &lt;h3 id=&quot;image-data-generation&quot;&gt;Image Data Generation&lt;/h3&gt; &lt;p&gt;To collect the supplementary vision data for fine-tuning and evaluating the multimodal model, an additional scraping script is used to collect the relevant image data for each HellaSwag prompt. As described before, each prompt in the HellaSwag benchmark is generated from an associated ActivityNet prompt. Each ActivityNet prompt contains a source ID for the corresponding YouTube video, as well as a time segment containing the start and end time (in seconds) for the relevant video annotation. Using this information, each text prompt can be supplemented with an additional image prompt via a frame from the corresponding YouTube video.&lt;/p&gt; &lt;p&gt;A custom script is used to access each prompt’s corresponding YouTube video and scrape image data. The script works as follows:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;From a HellaSwag entry, obtain the source ID for the corresponding ActivityNet entry.&lt;/li&gt; &lt;li&gt;From the ActivityNet entry, obtain the YouTube video source ID (to be used directly in the YouTube URL) and the time segment indicating the start/end time of the annotated clip.&lt;/li&gt; &lt;li&gt;Download a low-resolution copy of the YouTube video via accessing the URL &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;https://www.youtube.com/watch?v={source_id}&lt;/code&gt;. Here, we download the 144p resolution copy of each video.&lt;/li&gt; &lt;li&gt;Capture a single selected frame from the video data. Note: the selected frame is determined by calculating the average between the video clip’s start and end time, then scraping the frame of the video at that timestamp. Implications of this frame selection are described in more detail in the Limitations section below.&lt;/li&gt; &lt;li&gt;Save the frame as image data for multimodal fine-tuning.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;This pipeline is used on the (sampled) HellaSwag train, validation, and test sets so that image data is available for both fine-tuning of the multimodal model, as well as inference for evaluation.&lt;/p&gt; &lt;h1 id=&quot;experiments&quot;&gt;Experiments&lt;/h1&gt; &lt;h2 id=&quot;data&quot;&gt;Data&lt;/h2&gt; &lt;p&gt;For fine-tuning and evaluation of the unimodal and multimodal models, a subset of the HellaSwag dataset is used, as already described above. Further summary of the sampled dataset can be found in Table 1.&lt;/p&gt; &lt;p&gt;To prepare the data for Multiple Choice Classification, the data from each prompt must be preprocessed as follows. Each prompt in the HellaSwag dataset is broken into three components: &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ctx_a&lt;/code&gt;, which contains the first sentence(s) of the prompt, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ctx_b&lt;/code&gt;, which contains the initial few words of the final sentence, and four &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ending&lt;/code&gt;s all stemming from the same &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ctx_a&lt;/code&gt; and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ctx_b&lt;/code&gt; but each with different conclusions. This particular formatting of the data is important for the RoBERTa tokenizer, where each sequence within an inputted text pair must be a complete sentence. Each prompt then generates four text pairs of the form &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;(ctx_a, ctx_b + ending_i)&lt;/code&gt; for each of the four endings. This allows for the multiple choice classification head to compute the most likely of the four endings, given the same context &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ctx_a&lt;/code&gt;, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ctx_b&lt;/code&gt;.&lt;/p&gt; &lt;div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Table 1. Summary of sampled HellaSwag dataset. &lt;/div&gt; &lt;/div&gt; &lt;h2 id=&quot;setup&quot;&gt;Setup&lt;/h2&gt; &lt;p&gt;The architecture of neither the RoBERTa nor CLIP are designed for sequence or multiple choice classification, so a separate linear classification head follows each of the unimodal RoBERTa, unimodal CLIP, and multimodal CLIP models.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Text-only fine-tuning&lt;/strong&gt;: The training and validation sets for fine-tuning are formatted and preprocessed as described above. To adjust the weights of the classifier and the core embedding model, each model is fine-tuned on the HellaSwag training data and evaluated during training on the validation data for 20 epochs. Since only the text prompt is inputted to CLIP here, only the CLIP text embedding is used for classification.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Text-image fine-tuning&lt;/strong&gt;: To fine-tune the multimodal CLIP model, the original training and validation datasets are augmented by adding each prompt’s relevant corresponding image data (from the process described in the Image Data Generation section). The multimodal model is then fine-tuned on both the text prompts as before and the relevant image data simultaneously. With both text and image input, CLIP outputs a combined text-image embedding that is used for the classification head, instead of the text-only embedding from before.&lt;/p&gt; &lt;p&gt;After fine-tuning, each model is evaluated on the withheld HellaSwag test dataset for classification accuracy. For both the text-only and text-image fine-tuning, I perform three total repetitions for each model and average the results in Figure 1.&lt;/p&gt; &lt;div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/accuracy-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/accuracy-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/accuracy-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/accuracy.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2. Accuracy results for each model, averaged across three runs. &lt;/div&gt; &lt;/div&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;As shown in the accuracy results, the RoBERTa model performs the best, while the unimodal CLIP model performs worse, and the multimodal CLIP model only slightly better than the unimodal CLIP but still marginally worse than RoBERTa. RoBERTa likekly performs so well because of its generally high performance in other text-based tasks, and its bidirectional contextual embeddings allow for evaluation of a prompt/ending holistically. In this setup, the supplementary image data did not provide any significant empirical improvement to the multimodal model, as shown by the insignificant improvement in downstream performance when comparing the text-only to text-image CLIP models.&lt;/p&gt; &lt;p&gt;However, I attempt to provide an explanation for this shortcoming through further investigation of the supplementary images. Below, I display the class activation map of the image data from a particular prompt to attempt to visualize why the additional modality of data had little effect on the classification distinguishability across the four endings of the prompt. Figure 2 shows the image (which is the same for all four endings) and the individual image attention masks generated from each ending corresponding to the following context: &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;A lady named linda, creator of paint along is demonstrating how to do an acrylic painting. She starts with a one inch flat brush and yellow and white acrylic paint. she ...&lt;/code&gt;&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint0.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3. The class activation maps for the following prompt: &quot;A lady named linda, creator of paint along is demonstrating how to do an acrylic painting. She starts with a one inch flat brush and yellow and white acrylic paint. she ...&quot; and the following endings: (1) &quot;... then paints about six shades of purple color on the paper.&quot; (2) &quot;... makes x patterns across the canvas with the yellow color.&quot; (3) &quot;... puts on a cream colored coat and begins painting the white wooden fence without a paint shaker.&quot; (4) &quot;... powders the paint in an electric flat brush and applies it over the wall.&quot; The image caption that generated the shown attention masks is the full sentence pair, i.e., context + ending &lt;/div&gt; &lt;p&gt;Notice that across all four prompt/ending pairs, CLIP attends primarily to the same location on the image. While the image data might enrich the model’s representation of the prompt itself, the similarity across the generated attention masks demonstrates that the image doesn’t serve to distinguish the endings from each other and, therefore, has little effect in influencing the likelihood of any particular ending from being more likely. In this setup, the text embedding alone determines the classifier output, and the lack of image distinguishing power provides some explanation for the similarity in downstream performance between the unimodal and multimodal CLIP models.&lt;/p&gt; &lt;p&gt;However, it’s possible that the attention masks were only so similar because all endings were prepended by the same exact context. In the case of Figure 2, the context describes an interaction with the painting, so it may be natural for all attention masks to focus on the painting, regardless of the conclusion of the ending. What if we restrict the image caption to contain only the final sentence (&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ctx_b&lt;/code&gt; + &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;ending&lt;/code&gt;)? Figure 3 displays the class activation maps for this setup (though, not from an additional CLIP model fine-tuned on this image caption setup).&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end0-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end0-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end0-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end0.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 4. The class activation maps for the endings above, without the prepended context. &lt;/div&gt; &lt;p&gt;We see that using the final sentence without the preceding context generates more varied attention masks, so does this unconditionally allow for more diversity in the image/common sense representation in the joint text/image embedding? I claim that the answer is no; having the entire context for analysis is fundamental for common sense reasoning, so removing a significant portion of the context promotes greater ambiguity in both the intent of the prompt/image caption and the benefit of the attention mask. Using only the final sentence may produce more varied results in the image attention mask, but this may potentially be more detrimental than beneficial by attending to an irrelevant portion of the image that may detract from the commonsense ground truth answer.&lt;/p&gt; &lt;p&gt;Further investigation into different formulations of the image caption with respect to the original prompt in this manner may result in truly richer representations and more meaningful results for downstream model performance.&lt;/p&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;p&gt;In this work, I compare the physical commonsense reasoning capbility of a text-only language model with a multimodal vision-language model and evaluate whether the multiple modalities of input in pretraining the multimodal model can have comparable performance to a text-specialized model, and whether the addition of relevant image data for inference boosts the performance of the multimodal model. I find that, within the proposed experimental setup, the effects of image data supplementation are insignificant, though I provide a potential explanation for this unintuitive result via class activation maps of the multimodal model’s image attention data; alternative formulations for this text-image data augmentation may provide better and more intuitive results. Overall, I provide an empirical experimental pipeline and analysis for potential factors toward further artifical intelligence models’ physical commonsense reasoning, and their internal representations of the world.&lt;/p&gt; &lt;h2 id=&quot;ethical-implications&quot;&gt;Ethical Implications&lt;/h2&gt; &lt;p&gt;It’s also important to note the ethical considerations of “improving” the commonsense reasoning capabilities of deep learning models. Converging on a universally-accepted definition of common sense is utopian, so the interpretation of common sense evaluation must be constantly scrutinized. The biases and malicious elements of a model’s knowledge base must be investigated to ensure that fine-tuning on common sense benchmarks are not further accumulated and embedded into the model. Physical common sense is relatively simple for finding a ground truth answer or natural continuation, but for social common sense, for instance, what a model “should” predict for a particular situation or prompt is much more ambiguous.&lt;/p&gt; &lt;h2 id=&quot;limitations&quot;&gt;Limitations&lt;/h2&gt; &lt;p&gt;The implementation and constraints of this work imply some limitations. One evident limitation is the size of both the benchmark dataset and the models used. Evaluating uni- and multimodal models on the full HellaSwag benchmark, including all of both ActivityNet and WikiHow entries, may conclude in slightly different results. Furthermore, newer and bigger models for both text and vision-text models exist; for example, if evaluation is extended to generative prompt evaluation, the recently released GPT4 model &lt;d-cite key=&quot;openai2023gpt4&quot;&gt;&lt;/d-cite&gt; can be evaluated in this framework given its original text-only functionality and its new image input capabilities.&lt;/p&gt; &lt;p&gt;On the topic of generative prompt evaluation, this work only uses multiple-choice prompts for the simplicity and clarity of its evaluation results. However, generative prompts may more closely reflect human-generated responses and may be more representative of multimodal capabilities. Finally, making progress toward a more general-purpose intelligent system means extending the common sense evaluation to more categories than physical. Designing a more comprehensive multimodal model for common sense requires evaluation on all modalities of common sense, and will likely also require additional modalities of input data (e.g., audio cues for better social common sense performance).&lt;/p&gt; </content> </entry> <entry> <title>Exploring Univariate Time Series Anomaly Detection using VAE's</title> <link href="https://deep-learning-mit.github.io/blog/2023/Exploring-Generative-Models-In-Time-Series/"/> <updated>2023-12-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Exploring-Generative-Models-In-Time-Series</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Anomaly detection in time series data is a extensively studied field in academia, holding significant importance due to its wide-ranging applications in real-world scenarios. Time series are present everywhere, and the ability to detect anomalies is vital for tasks such as identifying potential health issues, predicting system failures, or recognizing regime changes in business operations. There are a wide range of methods that have been developed over the years in order to tackle this crucial yet challenging problem. Classical methods approaches rooted in statistics have long been employed, but in recent years, researchers have began to experiment with adapting deep learning techniques to achieve performance improvements.&lt;/p&gt; &lt;p&gt;The deep methods can generally be classified into distinct approaches. The first approach is forecasting, where the method attempts to learn the generating process of the series, and then classifies a point in the series as anomalous if the ground truth value deviates significantly from the predicted value. The second type of approach reconstruction. The models attempt to learn the generating process of the series in a latent space. The model then reconstructs the series, and uses a well designed reconstruction score in order to classify the series points as normal or anomalous. DONUT is an example of a method that falls into this category.&lt;/p&gt; &lt;h2 id=&quot;problem-formulation-and-background&quot;&gt;Problem Formulation and Background&lt;/h2&gt; &lt;p&gt;Xu et al &lt;d-cite key=&quot;xu2018unsupervised&quot;&gt;&lt;/d-cite&gt; explore time series anomaly detection within the framework of key performance indicator time series. These time series show high levels of seasonality, a result of being a derivative of human action. They propose a method called DONUT, a highly adaptable anomaly detection method that can perform both supervised, semi-supervised, and unsupervised detection. Their main contributions include modifications to the traditional VAE formulation, the discovery that DONUT requires training on both normal and anomalous data (contrary to other methods), and a kernel density estimation interpretation of the latent space. Given a series \(x_1, x_2, ....x_T\), and potentially labels \(y_1, y_2, ...y_T\), where \(y_t\) is 1 when \(x_t\) is an anomaly and 0 otherwise, for any time \(t &amp;lt; T\) the method should be able to classify \(x_t\) as normal or anomalous.&lt;/p&gt; &lt;p&gt;We begin by defining what an anomaly means in the context of time series. Darban et al &lt;d-cite key=&quot;darban2022deep&quot;&gt;&lt;/d-cite&gt; classify temporal anomalies as follows. Global, contextual, seasonal, trend, and shapelet. Global anomalies are points with extreme values in comparison with the rest of the series. Contextual anomalies are deviations from the context or neighborhood of a point. Seasonal anomalies are deviations from the typical seasonal patterns in the series. Trend anomalies are transitions or deviations from a local or global trend in a series, and shapelet anomalies are subsequences that have different shapelets from the sequence. Xu et al &lt;d-cite key=&quot;xu2018unsupervised&quot;&gt;&lt;/d-cite&gt; make a further distinction by defining abnormal points as points that are either anomalous or missing. Conversely, points that are neither anomalous nor missing are classified as normal. These definitions give us a framework for analyzing what types of anomalies a method is able to detect.&lt;/p&gt; &lt;h2 id=&quot;overview-on-vaes&quot;&gt;Overview on VAE’s&lt;/h2&gt; &lt;p&gt;Generative modeling refers to the objective of generating data from compact, low-dimensional representations. Representation learning can be a byproduct of generative modeling, where the generative model attempts to learn lower dimension representations of data such that inputs with similar high level features will be close to each other in the low dimension representation. Conversely, inputs that have dissimilar features will be far away from each other in the lower dimension representation space. These properties imply that the representation learner learns a good representation of the data that captures as much distinguishing information as possible. VAE’s achieve this through a two step process. Given an input x, an encoder is learned that maps the the input to a latent space, and then a decoder takes the latent space representation and maps it back up to the original feature space. The key property of VAE’s is that they can attempt to enforce a specific distribution in the latent space, such that we can sample from it and generate real looking outputs. The goal is to learn a model \(P_{\theta}(x) = \int p_{\theta}(x | z) p_z(z)dz\), where x are the inputs and z is a random variable in our latent space. In DONUT, and in most other VAE methods, \(p_{\theta}(x | z)\) and \(p_z(z)\) are chosen to be gaussian. Given this model, we would like to find the parameters that maximize the log likelihood \(log P_{\theta}(x)\). This is often an intractable integral to solve or approximate, so a trick called importance sampling is used. We can rewrite the integral as&lt;/p&gt; \[P_{\theta}(x) = \int p_{\theta}(x | z) p_z(z) \frac{q_z(z)}{q_z(z)}dz\] &lt;p&gt;where \(q_z(z)\) is a distribution we know how to sample from. Now, we rewrite this expression as an Expectation&lt;/p&gt; \[E_{z \sim q_z}[p_{\theta}(x | z) \frac{p_z(z)}{q_z(z)}]\] &lt;p&gt;We can now use monte carlo integration to estimate this expectation. This estimation will be inefficient to estimate with the wrong choice of \(q_z\). It turns out that&lt;/p&gt; \[q_z(z) = p_{\theta}(z | x)\] &lt;p&gt;is the optimal choice for \(q_z(z)\), and because this distribution might be hard to sample from, we use the variational inference trick where we find an approximation to this distribution by minimizing the objective&lt;/p&gt; \[J_q = KL(q_{\psi}(z | x) || p_{\theta}(z | x))\] &lt;p&gt;Thus we can now define an objective to be minimized that is fully parametrized by \(\theta\) and \(\psi\).&lt;/p&gt; \[J_p = -log E_{z \sim q_{\psi}(z | x)}[p_{\theta}(x | z) \frac{p_z(z)}{q_{\psi}(z | x)}]\] &lt;p&gt;The monte carlo estimate of this expecation produces a baised estimation of \(\theta\), so instead of optimizing the objective directly, we optimize a lower bound of the negated objective. Using Jensen’s inequality and expanding out the log terms, we know that&lt;/p&gt; \[-J_p \geq E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z) + log p_z(z) - log q_{\psi}(z | x)] = E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)] - KL(q_{\psi}(z | x) || p_z(z))\] &lt;p&gt;This expectation lower bound is known as the ELBO, and is the surrogate objective that VAE’s optimize in order to learn good encoders and decoders.&lt;/p&gt; &lt;h2 id=&quot;donut&quot;&gt;DONUT&lt;/h2&gt; &lt;p&gt;The key goal of DONUT is to take a series with normal data and potentially anomalous data, learn how to represent the normal features of the series, and then use these representations to compute a reconstruction probability score. Intuitively, if the method learns to represent normal inputs well, an anomalous input will have a low chance of being well reconstructed, and thus will have a low reconstruction probability. The challenge is that in order for the method to work really well, it is important that the method does not attempt to learn good representations for anomalous data. Xu et al &lt;d-cite key=&quot;xu2018unsupervised&quot;&gt;&lt;/d-cite&gt; achieve this by formulating a modified objective function called the modified ELBO (M-ELBO). The idea is to reduce the contribution of anomalous and missing points to the learned representation. M-ELBO is defined as such&lt;/p&gt; \[E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z) - log q_{\psi}(z | x)]\] &lt;p&gt;Where \(\alpha_w\) is 1 when \(x_w\) is not an abnormal point, and 0 when \(x_w\) is abnormal. \(\beta = (\sum_{w = 1}^W \alpha_w) / W\). We will take a deep dive into this modified elbo through empiricall experiments and by considering what role each term in the objective plays in both the learning of the latent space, and performance.&lt;/p&gt; &lt;p&gt;The authors also introduce two innovations that serve to improve performance, something we will reproduce in our experiments. The first innovation is markov chain monte carlo imputation of the missing points. The authors hypothesize that during testing, the presence of missing points in a given sample window might bias the reconstruction of the window, and thus affect the reconstruction probability, so they introduce iterative generation of normal points that can replace the missing points. Additionaly, the authors implement “missing point injection”. Before each training epoch, they inject missing points into the training samples by randomly selecting a subset of training sample points and removing the points (setting their values to zero). Note that the original samples will be recovered after the epoch is completed. They claim that missing point injection amplifies the effect of M-ELBO by forcing DONUT to learn the normal representation of data in abnormal windows. It certainly helps to improve performance, and we will perform a more thorough emperical analysis on both injection, and the \(\beta\) term in the M-ELBO.&lt;/p&gt; &lt;p&gt;The authors formulate the reconstruction probability as follows. They begin with the expression&lt;/p&gt; \[p_{\theta}(x) = E_{p_{\theta}(z)}[p_{\theta}(x | z)]\] &lt;p&gt;The authors claim that this does not work well emperically, and thus choose to use \(E_{q_{\phi}(z | x)}[log p_{\theta}(x | z)]\) as the reconstruction probability score. If the negation of these scores exceed a given threshold, the point will be classified as an anomaly.&lt;/p&gt; &lt;p&gt;We now describe the model structure of DONUT. The encoder \(q_{\phi}(z | x)\) is represented by a deep fully connected net that maps x to a lower dimension feature space. Then there are two readout heads that map the learned features from the net to a mean and variance, which we will denote \(\mu_z\) and \(\sigma_z\). We can then sample \(z\) from \(N(\mu_z, \sigma_z)\). The decoder \(p_{\theta}(x | z)\) is represented by a deep fully connected net that maps a latent variable \(z\) to a larger feature space. There are then two readout heads that map the learned features to a mean and variance, which we will denote \(\mu_x\) and \(\sigma_x\). We can then sample \(x\) from \(N(\mu_x, \sigma_x)\)&lt;/p&gt; &lt;h2 id=&quot;experimental-setting-and-evaluation&quot;&gt;Experimental Setting and Evaluation&lt;/h2&gt; &lt;p&gt;Before we lay out the experimental findings and their implications, we begin by briefly describing the datasets used and their characteristics, model architectures, training, and the metrics used for evaluation. We will use three datasets for experimentation, two of which come from the repository of the original paper. The first dataset is called “cpu” and is a series representing some cpu related kpi sampled every minute. The second dataset is called “g” and is also sampled every minute. The third dataset is air temperature time series from CIMIS station 44 in Riverside California, sampled at hourly intervals in the month of March from 2009 to 2019. The dataset did not come with time stamps. These series are all mostly normal, with few anomaly points. This makes the problem more challenging and interesting.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Plot of the cpu series with anomaly points colored red &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Plot of the cpu series seasonal decomposition &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Plot of the g series with anomaly points colored red &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Plot of the g series seasonal decomposition &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Plot of the weather series with anomaly points colored red &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Plot of the weather series seasonal decomposition &lt;/div&gt; &lt;p&gt;The cpu time series shows clear seasonality, and has an anomaly ratio of .015. The second series has much less clear seasonality, an anomaly ratio of .06, and is generally less smooth than the first series. This allows us to test the robustness of DONUT on a series that showcases less obvious seasonality, and draw some insights and comparisons on performance on series with relatively different smoothnesses. The weather series also displays clear seasonality and is smoother than the second series, but it differs from the other two series in that there are no anomalies in the training data. Thus, DONUT must learn to detect anomalies by training on purely normal data.&lt;/p&gt; &lt;p&gt;We create the training and testing data as follows. We begin by standardizing both the training and testing splits in order to represent all of the series on the same scale. We then set any missing values in the series to zero. Finally, we perform slide sampling in order to turn the series into windows of length \(W\). For each window, we will be predicting whether the last value in the window is an anomaly or not. We use a window size of 120 for the first two datasets which means our windows encapsulate two hours of information. For the weather dataset, we use a window size of 24, so each window encapsulates a day of information.&lt;/p&gt; &lt;p&gt;We will use the same metrics described by Xu et al &lt;d-cite key=&quot;xu2018unsupervised&quot;&gt;&lt;/d-cite&gt;. Given the probability reconstruction scores, we can compute the precision and recall for a given threshold. The original paper and our experiments are not entirely concerned with the process of automatic threshold selection, so f scores and the ROC curve serve as valid evaluation metrics of the reconstruction probability scores. The main performance metric will be the best f score. Xu et al &lt;d-cite key=&quot;xu2018unsupervised&quot;&gt;&lt;/d-cite&gt; also allow for delayed detection. That is, in any contiguous sequence of anomaly points, if a point is classified as an anomaly, then the anomaly points that came before it will also be classified as an anomaly if they had not previously been before. The authors claim this is reasonable, as the delay, (the time between the first detected anomaly point and the first point in the contiguous sequence) is quite low. This also seems reasonable in a practical setting, as being able to detect an anomaly within a few hours of it happening can still give you enough time to act.&lt;/p&gt; &lt;p&gt;For our experiments. We will use fairly small and simple architectures. The baseline VAE in the paper is done using fully connected networks, and so we will use a fully connected network with depth two. We also experiment with CNN VAE’s, and in order to try and compare performance with the fully connected VAE encoders and decoders, we also use a CNN with two layers. We perform experiments on behavior when the latent dimension is increased, and needed to double the width and depth of the fully connected VAE in order to allow for training to converge.&lt;/p&gt; &lt;h2 id=&quot;reproducing-results-and-establishing-baselines&quot;&gt;Reproducing Results and Establishing Baselines&lt;/h2&gt; &lt;p&gt;Xu et al &lt;d-cite key=&quot;xu2018unsupervised&quot;&gt;&lt;/d-cite&gt; show that using both mcmc imputation and missing data injection along M-ELBO improves performance over just using M-ELBO. In our first experiment, we compare the performance of DONUT on the cpu dataset with both mcmc imputation and missing data injection, just mcmc impuation, just missing data injection, and neither of the methods. For each configuration, and for future experiments, we will run the full training and prediction loop 10 times and average results due to the randomness baked into training and prediction with VAE’s. We see that the averge best f score is highest when we use both mcmc imputation and missing data injection. We also plot density estimate of the f scores from the four configurations, and notice that the f scores of the configuration with both mcmc imputation and missing data injection show more right skewness and fatter tails that the other four configurations.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Configuration&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;avg best f score over 10 runs&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;both&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.642&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;just inject&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.613&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;just mcmc&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.5737&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;neither&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.588&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Plot of distribution of the f scores shaded by the configuration &lt;/div&gt; &lt;p&gt;Next, we run DONUT with the baseline configurations for each of our three datasets. We randomly sample a third of the training data and plot the selected samples mappings in 3-d z space. We also plot the predicted anomaly points by the model with the highest f score over the 10 runs for each of the three datasets.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Dataset&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;avg best f score over 10 runs&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;cpu&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.642&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;g&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.881&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;weather&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;.996&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Xu et al &lt;d-cite key=&quot;xu2018unsupervised&quot;&gt;&lt;/d-cite&gt; noticed a phenomenon they labeled “time gradient” in latent space.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Plot of learned latent space from the original paper of one of the datasets in the paper. &lt;/div&gt; &lt;p&gt;They noticed that the latent space was spread out according to time of the day, as time of the day likely encoded a large amount of information about the shape of the series. We did not notice such a phenomenon in our experiments. This is likely the result of a difference in experimental setting, but could also be the result of the local variation within the seasonal data, and the fact that similar shapes occur all over the series irrespective of time. We see that on the second datset, DONUT learned to classify many troughs in the series as anomalous. It was able to detect both global anomalies, as well as contextual and seasonal anomalies, as seen by its ability to detect sudden flat shapes in the series and sudden spikes in the unsual places.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The baseline predictions on the g dataset. Ground truth anomalies are colored red and predicted anomalies are green &lt;/div&gt; &lt;p&gt;The performance on the third datset is quite suprising. Given no anomalous data in the train set, DONUT was able to achieve a .996 average best f score on the testing data. This highlights DONUT’s ability to really learn the normal features of a series. Thus during testing, it was not able to reconstruct the anomalous parts of the series as well as the normal parts. While this result does not contradict the claim that it is important to train on both normal and anomalous data, it certainly suggests that there is still value on learning purely normal qualities of the data. M-ELBO does not fully remove learning of anomalous qualities of the data in the latent space, which could lead to unexpectedly high reconstruction probability scores on anomalous testing data&lt;/p&gt; &lt;h2 id=&quot;understanding-the-latent-space&quot;&gt;Understanding The Latent Space&lt;/h2&gt; &lt;p&gt;It is important that we gain further insights on the latent space, as it is the bottle neck of any VAE method. We will perform a variety of experiments that aim to fully uncover how each term in ELBO controls the characteristics of the latent space. We begin by first explaining the findings and claims of the original paper.&lt;/p&gt; &lt;p&gt;The authors claim that the number of dimensions in the latent space plays a huge role. A small dimension latent space would not allow you to capture enough information, and too big a latent space would cause DONUT to perhaps capture too much information, including anomalous representations. They found that latent dimensions between 3 and 10 typically produced good results. They next discuss how they believe each term in the ELBO contributes to the time gradient phenomena they observe. We restate the M-ELBO objective&lt;/p&gt; \[E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z) - log q_{\psi}(z | x)]\] &lt;p&gt;We can rewrite this objective as&lt;/p&gt; \[E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z)] + H[z | x]\] &lt;p&gt;Where \(H[z | x]\) is entropy. The authors claim that the first term, \(log p_{\theta}(x | z)\) requires the latent space to be able to reconstruct normal x well, thus it pushes latent representations of dissimilar x further away from eachother. The second term, \(log p_z(z)\), serves to encourage the gaussian shape in the latent space and thus encourages the latent space to not expand too much. However, we shrink the contribution of this term by the ratio of normal points in our training data. The entropy term encourages expansion of the latent space, as it is largest when the latent space encodes as much information as possible. This should happen when the latent represenations are as distinguishing as possible.&lt;/p&gt; &lt;h2 id=&quot;effects-of-changing-the-latent-distribution&quot;&gt;Effects of Changing the latent distribution&lt;/h2&gt; &lt;p&gt;Most VAE methods traditionally represent the latent space as a mixture of gaussians, both for its simplicty, as well as its flexibility and ability to approximate many complicated distributions. What happens when we use other types of distributions? We will analyze what happens to performance and the shape of the latent space when we represent it as a mixture of Student-T distributions with 10 degrees of freedom. We hypthesize that replacing a mixture of gaussians with a mixture of any other symmetric distribution will not cause any profound differences in the shape of the latent space, at least in 3 dimensions, however, a symmetric latent space with fatter tails could lead to worse reconstruction performance. Consider \(P_{\theta}(x | z)\), where z is sampled from the latent space. With a fatter tailed distribution, we are more likely to sample a z that is further away from the mean of its distribution. This behavior can be beneficial for generative purposes but for reconstruction purposes, this behavior is likely detrimental and will lead to lower likelihoods that a given x came from the sampled z. We now analyze the empericall effects for all three datasets. For the cpu dataset, we notice that the latent space does not look drasticaly different, considering we only plot a random subset of it. We do however notice a performance dip.&lt;/p&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Latent Distribution&lt;/td&gt; &lt;td&gt;avg best f score over 10 runs&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;gaussian&lt;/td&gt; &lt;td&gt;.642&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;t with 10 df&lt;/td&gt; &lt;td&gt;.593&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the cpu dataset made by guassian mixture, and the image on the right is the latent space of the cpu dataset made by a t-distribution mixture &lt;/div&gt; &lt;p&gt;Similarly for the g dataset, we see a slight performance reduction, but a similarly shaped latent space.&lt;/p&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Latent Distribution&lt;/td&gt; &lt;td&gt;avg best f score over 10 runs&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;gaussian&lt;/td&gt; &lt;td&gt;.8809&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;t with 10 df&lt;/td&gt; &lt;td&gt;.871&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the g dataset made by guassian mixture, and the image on the right is the latent space of the g dataset made by a t-distribution mixture &lt;/div&gt; &lt;p&gt;For the weather dataset, the performance reduction is negligible which suggests that the means of our learned latent space truly represent the normal patterns of the series. (Note that this dataset did not come with timestamps. Disregard any time colorations on latent space plots)&lt;/p&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Latent Distribution&lt;/td&gt; &lt;td&gt;avg best f score over 10 runs&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;gaussian&lt;/td&gt; &lt;td&gt;.996&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;t with 10 df&lt;/td&gt; &lt;td&gt;.995&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the weather dataset made by guassian mixture, and the image on the right is the latent space of the weather dataset made by a t-distribution mixture &lt;/div&gt; &lt;p&gt;This brief analysis suggests that the gaussian distribution is truly a good adaptable choice for our latent space. It allows for some variability when doing generative modeling, but also allows for a more robust estimator of reconstruction probability.&lt;/p&gt; &lt;h2 id=&quot;should-we-scale-the-entropy-term-in-m-elbo&quot;&gt;Should we Scale the Entropy term in M-ELBO?&lt;/h2&gt; &lt;p&gt;Xu et al &lt;d-cite key=&quot;xu2018unsupervised&quot;&gt;&lt;/d-cite&gt; provide a weighting system for the first two terms of M-ELBO, but choose not to add any weighting to the entropy term. They hypothesize that because of the entropy term’s role in expanding the latent space, it is perhaps better to keep it untouched. We will perform an empircal analysis on the effects on weighting the entropy term.&lt;/p&gt; &lt;p&gt;In our first experiment, we choose a reasonable choice for the weight of the entropy term. We will use \(\beta\) to weight both \(logP_{z}(z)\) and \(logq_{\psi}(z | x)\). Thus M-ELBO becomes&lt;/p&gt; \[E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z) - \beta log q_{\psi}(z | x)]\] &lt;p&gt;We can reformulate the M-ELBO in terms of the KL divergence to hypothesize what effects scaling \(logq_{\psi}(z | x)\) by \(\beta\) might have.&lt;/p&gt; \[E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)] - KL(q_{\psi}(z | x)^{\beta} || p_z(z)^{\beta})\] &lt;p&gt;Using the power rule of logarithms, we can rewrite this objective as&lt;/p&gt; \[E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)] - \beta KL(q_{\psi}(z | x) || p_z(z))\] &lt;p&gt;Thus we have essentially applied shrinkage to the KL divergence between the prior and the posterior based on the amount of abnormal data in our training data. This would perhaps encourage the latent space to look more gaussian, such that the prior probability dominates the posterior probability in order to increase the M-ELBO lower bound. Thus we can hypothesize that our latent space will perhaps experience shrinkage. This would certainly be undesired behavior if our goal is to expand our latent space and allow for more distinguishing latent space represenations while keeping some form of structure.&lt;/p&gt; &lt;p&gt;We now analyze the emperical results. We first analyze the effects on the cpu dataset. There does seem to be signs of shrinkage in the latent space when it is weighted, however there is no clear absolute shrinkage or expansion. The shape is certainly different, and it seems like the latent space expanded in the negative direction in the second dimension of the latent space, and shrunk in the positive direction. We also observe a performance increase.&lt;/p&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt;avg best f score over 10 runs&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Unweighted Entropy&lt;/td&gt; &lt;td&gt;.642&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Weighted Entropy&lt;/td&gt; &lt;td&gt;.665&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the cpu dataset without weighting the entropy term, and the image on the right is the latent space of the cpu dataset with a weighted entropy term &lt;/div&gt; &lt;p&gt;On the g dataset, we can certainly see a differently shaped latent space. We notice that the third dimension of the latent space expanded, while the first and second dimensions showed some level or shrinkage compared to the baseline. We do see a slight reduction in performance compared to the baseline&lt;/p&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt;avg best f score over 10 runs&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Unweighted Entropy&lt;/td&gt; &lt;td&gt;.8809&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Weighted Entropy&lt;/td&gt; &lt;td&gt;.875&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the g dataset without weighting the entropy term, and the image on the right is the latent space of the g dataset with a weighted entropy term &lt;/div&gt; &lt;p&gt;Finally, for the weather dataset, we also see that weighting the entropy term did not lead to absolute expansion or shrinkage of our latent space. We observe shrinkage in the third dimension of the latent space, slight shrinkage in the first dimension, and slight expansion in the second dimension. We also observe a slight performance dip.&lt;/p&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt; &lt;/td&gt; &lt;td&gt;avg best f score over 10 runs&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Unweighted Entropy&lt;/td&gt; &lt;td&gt;.9967&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Weighted Entropy&lt;/td&gt; &lt;td&gt;.9928&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the weather dataset without weighting the entropy term, and the image on the right is the latent space of the weather dataset with a weighted entropy term &lt;/div&gt; &lt;p&gt;These results suggest that weighting the entropy term can lead to shrinkage of the latent space. It certainly lead to different latent space shapes, where we observed expansion in some dimensions and shrinkage in others. There are also no conclusive results in its affects on performance, as we saw improved performance in one dataset and decreased performance in the other two.&lt;/p&gt; &lt;p&gt;We will now perform a more general experiment on the effects on weighting the entropy term with the cpu dataset. Instead of weighting the entropy term with \(\beta\), we will try different weights between 0 and 1 and observe the effects. We increased the capacity of our VAE network, so we rerun the experiments on weighting entropy with \(\beta\) and not weighting entropy in order to have a valid comparison of results.&lt;/p&gt; &lt;p&gt;When the entropy term is weighted by zero, we notice a very speherically shaped latent space which looks like a unit gaussian ball. This matches up with a quick evaluation of the elbo. There is no more reshaping of our latent space by the entropy term, and thus DONUT learns a latent space that matches up with the gaussian prior. With a weight of .2, we again see a circular latent space, however there is more deviation from a spherical shape. We continue to see this phenomenon of deviating from a spherical shape when the weights increase. We also notice that the points become more clustered as the weights increase. There seems to be a level of shrinkage as the weights increase, but for weights equal to .8 and .9, we see the latent space expand again. These results indicate that it is unlikely that weighting the entropy term has any effect on expanding the latent space. Results even suggest that non zero weights can lead to shrinkage. However, weighting the entropy term certainly affects the shape of our latent space, and the ability of the VAE to learn representations that look less guassian.&lt;/p&gt; &lt;p&gt;The performance results provide some interesting insights, and can serve to motivate future areas of exploration. We see that performance is maximal when the weights are very low, or around .6 and .7. When the weights are low, the latent space is very constrained, and thus DONUT will learn learn purely normal representations of the data. As the weights increase, and the ability of DONUT to learn latent representations that deviate from purely guassian increases, we generally see consistently good performance that is comparable to the zero weight case. With weights larger than .8, we begin to see a dip in performance. With large weights, the latent space deviates the most from being gaussian shaped and perhaps begins to learn anomalous representations. This suggests a benefit to enforcing more normality and constraint on the shape of our latent space for the purposes of anomaly detection. This could mean not weighting the prior term by \(\beta\), or adding some additional terms to M-ELBO that somehow enforce the latent space to look more guassian.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;weight&lt;/th&gt; &lt;th&gt;avg best f score over 5 runs&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;.682&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.1&lt;/td&gt; &lt;td&gt;.673&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.2&lt;/td&gt; &lt;td&gt;.657&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.3&lt;/td&gt; &lt;td&gt;.602&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.4&lt;/td&gt; &lt;td&gt;.666&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.5&lt;/td&gt; &lt;td&gt;.634&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.6&lt;/td&gt; &lt;td&gt;.7&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.7&lt;/td&gt; &lt;td&gt;.688&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.8&lt;/td&gt; &lt;td&gt;.602&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.9&lt;/td&gt; &lt;td&gt;.625&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;1 or unweighted&lt;/td&gt; &lt;td&gt;.64&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Beta weighted&lt;/td&gt; &lt;td&gt;.6&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the cpu dataset without weighting the entropy term, and the image on the right is the latent space of the cpu dataset with a weight of 0 on the entropy term &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the cpu dataset with a weight of .2 on the entropy term, and the image on the right is the latent space of the cpu dataset with a weight of .4 on the entropy term. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the cpu dataset with a weight of .6 on the entropy term, and the image on the right is the latent space of the cpu dataset with a weight of .8 on the entropy term &lt;/div&gt; &lt;h2 id=&quot;empirical-exploration-of-the-effects-of-beta-and-the-missing-data-injection-rate&quot;&gt;Empirical Exploration of the Effects of Beta and the Missing Data Injection Rate&lt;/h2&gt; &lt;p&gt;We now perform analysis on exactly how \(\beta\) affects $p_z(z)$, both through experimenting with differing rates of missing data injection, as well as manually adjusting \(\beta\) and observing the results.&lt;/p&gt; &lt;p&gt;We restate M-ELBO in tems of the KL divergence.&lt;/p&gt; \[E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)] - KL(q_{\psi}(z | x) || p_z(z)^{\beta})\] &lt;p&gt;As \(\beta\) decreases, the KL divergence increases. In order to decrease the divergence, the VAE should decrease the posterior probability, which could lead to a more spread out or non gaussian looking latent space, or rather one where we learn very distinguishing representations. As seen from our previous analysis, this might be undesired behavior for the purposes of anomaly detection. Performing automatic reduction of \(\beta\) by increasing the missing data injection rate could prevent DONUT from learning enough about the normal patterns in the training data, and thus performance will likely suffer if the injection rate gets too large.&lt;/p&gt; &lt;p&gt;We begin first by trying out \(\beta\) values between 0 and 1 in order observe the effects, and motivate adjusting the missing data injection rate.&lt;/p&gt; &lt;p&gt;When \(\beta\) is set to to 0, we see that the latent space looks fairly compact and non spherical. At \(\beta\) between .1 and .4, we can see that the latent space is quite spread out, and displays some spherical properties, especially for \(\beta\) = .3. For \(\beta\) between .4 and .9, we can see that the sampled latent space begins to look more and more compact, yet there is still a reasonable spread in the latent space. There does not seem to be a clear relationship between the spread and shape of the latent space and perfomance, however, we note that the \(\beta\) that resulted in the highest performance was \(\beta\) = .3, whose latent space looks the most spherical. This again supports the notion that when the latent space looks more gaussian, anomaly detection is improved.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Beta&lt;/th&gt; &lt;th&gt;avg best f score over 5 runs&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;.648&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.1&lt;/td&gt; &lt;td&gt;.595&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.2&lt;/td&gt; &lt;td&gt;.591&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.3&lt;/td&gt; &lt;td&gt;.686&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.4&lt;/td&gt; &lt;td&gt;.633&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.5&lt;/td&gt; &lt;td&gt;.6&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.6&lt;/td&gt; &lt;td&gt;.623&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.7&lt;/td&gt; &lt;td&gt;.614&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.8&lt;/td&gt; &lt;td&gt;.669&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;.9&lt;/td&gt; &lt;td&gt;.646&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;1 or unweighted&lt;/td&gt; &lt;td&gt;.64&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Beta weighted&lt;/td&gt; &lt;td&gt;.6&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the cpu dataset with a weight of zero on the prior term, and the image on the right is the latent space of the cpu dataset with a weight of .1 on the prior term &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the cpu dataset with a weight of .3 on the prior term, and the image on the right is the latent space of the cpu dataset with a weight of .5 on the prior term. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The image on the left is the latent space of the cpu dataset with a weight of .7 on the prior term, and the image on the right is the latent space of the cpu dataset with a weight of .9 on the prior term &lt;/div&gt; &lt;p&gt;In our experiments on adjusting the missing injection rate, we saw a significant decrease in performance as the rate increased, even reaching an average best f score of .06 when the rate was .8. It is unclear from our experiments whether this is the result of training not converging, as we do observe high loss values, or simply bad performance of DONUT when a vast majority of the data is missing, which would be expected behavior. This is something that would need to be explored further&lt;/p&gt; &lt;h2 id=&quot;improving-vae-architecture&quot;&gt;Improving VAE Architecture&lt;/h2&gt; &lt;p&gt;For the purposes of simplicity, DONUT utilizes fully connected layers for both the encoder and the decoder. While these choices certainly produce decent results, perhaps we can implement architectures that can better utilize the temportal information encoded within each window. We explore using a one dimensional CNN for the encoder in DONUT. Perhaps CNNs are better able to learn representations that encode more temporal information within a sample window. In order to make the CNN network as comparable as possible with the fully connected network, we will only use two convolution layers. We apply a kernel size of 3, and a stride of 1. We also use max pooling to downsample the data.&lt;/p&gt; &lt;p&gt;For the cpu dataset, we observe significant performance improvements with the CNN architecture. We notice the detection of contextual anomalies, which are non obvious local deviations. The latent space looks fairly spherical, however there does not seem to be any noticeable time gradient behavior in the latent space, despite the improved ability of the encoder to take advantage of temporal information.&lt;/p&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Architecture&lt;/td&gt; &lt;td&gt;avg best f score over 10 runs&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;2 layer CNN&lt;/td&gt; &lt;td&gt;.714&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;2 layer fc&lt;/td&gt; &lt;td&gt;.642&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; On the top are the ground truth and predicted anomaly points by the baseline DONUT with fully connected encoders on the cpu dataset. On the bottom are the ground truth and predicted anomaly points by DONUT with CNN encoders on the cpu dataset. Ground truth anomalies are colored in red, and predicted anomalies are colored in green &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Latent space plot for DONUT with CNN encoders on the cpu dataset &lt;/div&gt; &lt;p&gt;We did not see this same performance improvement in the other two datasets. Additionally, we struggled to achieve stable training on the weather dataset, and so further work needs to be done to achieve convergence in order to perform evaluations on the efficiacy of CNNs with that dataset. For the g dataset, we noticed a significant performance reduction. The difference between the performance on the cpu dataset and the g dataset could suggest that CNN architectures could lead to overfitting on less smooth time series. Looking at the plot of predicted anomalies seems to suggest this, as DONUT with a CNN encoder seems to predict that a larger number of the troughs in the g series are anomaly points, an indicator of potential overfitting to the series pattern.&lt;/p&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Architecture&lt;/td&gt; &lt;td&gt;avg best f score over 10 runs&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;2 layer CNN&lt;/td&gt; &lt;td&gt;.824&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;2 layer fc&lt;/td&gt; &lt;td&gt;.881&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; On the top are the ground truth and predicted anomaly points by the baseline DONUT with fully connected encoders on the g dataset. On the bottom are the ground truth and predicted anomaly points by DONUT with CNN encoders on the g dataset. Ground truth anomalies are colored in red, and predicted anomalies are colored in green &lt;/div&gt; &lt;p&gt;This is an interesting area of exploration for DONUT. There are a variety of architectures such as RNN’s and transformers that have shown superior performance on time series data, and those could be adapted to this method to improve performance over both CNN and fully connected architectures.&lt;/p&gt; &lt;h2 id=&quot;choosing-number-of-latent-space-dimensions&quot;&gt;Choosing Number of Latent Space Dimensions&lt;/h2&gt; &lt;p&gt;For the purposes of plotting the latent space in our experiments, we chose to use use a latent space with dimension three. However, intuitively, and as shown in the paper, choosing higher a higher dimension latent space can lead to performance improvements. &lt;d-cite key=&quot;xu2018unsupervised&quot;&gt;&lt;/d-cite&gt; explain that not only does increasing the size of the latent space increase the quality of the representations learned in the latent space, but it also serves to improve the stability of training. We will look to explore whether the properties of a series can give insight into good choices for the size of the latent space, and perhaps motivate automatic selection of the number of dimensions in the latent space.&lt;/p&gt; &lt;p&gt;We hypothesize that smoother series do not need as large a dimension in the latent space as series that display higher levels of roughness. Intuitively, in smoother series, the anomalies should be more “obvious”, while in less smooth series, rough behavior could be mistaken for an anomalous pattern.&lt;/p&gt; &lt;p&gt;We take a technique from smoothing splines, which are function estimates obtained from noisy observations of some data process. Smoothing splines enforce a roughness penalty on the function estimate, defined as such &lt;d-cite key=&quot;wang2011smoothing&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; \[\int \hat{f}(x) ^2 dx\] &lt;p&gt;We will use a finite difference estimate of this penalty on the standardized series to define a metric that can be used to describe the roughness/smoothness the series. Now that we have defined a metric describing the smoothness of a series, we can evaluate the best choice of number of latent dimension for series of differing levels of smoothness. In order to converge during training, we had to double the width of the fully connected VAE, and also double its depth.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Dataset&lt;/th&gt; &lt;th&gt;Roughness Penalty&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;cpu&lt;/td&gt; &lt;td&gt;.061&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;g&lt;/td&gt; &lt;td&gt;.598&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;weather&lt;/td&gt; &lt;td&gt;.023&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;We begin with the cpu dataset. We notice that performance significantly increases when the latent space is 6 dimensions, but performance begins to drop off as the number of dimensions increases, which suggests overfitting.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;number of dimensions&lt;/th&gt; &lt;th&gt;avg best f score over 5 iterations&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;3&lt;/td&gt; &lt;td&gt;. 637&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;.833&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;9&lt;/td&gt; &lt;td&gt;.826&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;12&lt;/td&gt; &lt;td&gt;.797&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;For the g dataset, performance peaks when the latent space has 9 dimensions. We also see slightly better performance with a latent space dimension of 12 compared to 6&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;number of dimensions&lt;/th&gt; &lt;th&gt;avg best f score over 5 iterations&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;3&lt;/td&gt; &lt;td&gt;. 889&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;.882&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;9&lt;/td&gt; &lt;td&gt;.894&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;12&lt;/td&gt; &lt;td&gt;.885&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;For the weather dataset, we notice a consistent performance improvement when the number of dimensions is increased.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;number of dimensions&lt;/th&gt; &lt;th&gt;avg best f score over 5 iterations&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;3&lt;/td&gt; &lt;td&gt;. 994&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;.997&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;9&lt;/td&gt; &lt;td&gt;.998&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;12&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;These results do not provide any clear picture on whether there is any relationship between the smoothness of a series and the best choice for the number of latent dimensions. For our smoothest series (weather), we observed consistent improvement as the number of dimensions increases. The roughest series (g) also seems to show this behavior. However, we see that increasing the number of dimensions for the cpu dataset decreases performance.&lt;/p&gt; &lt;h2 id=&quot;concluding-thoughts&quot;&gt;Concluding Thoughts&lt;/h2&gt; &lt;p&gt;Generative models present an interesting approach to the problem of anomaly detection in time series. They present an extremely customizable class of hypotheses that allow us to design a fairly robust probabilistic anomaly detector. Through the experiments we ran, we gained further insights into DONUT, and VAE’s more generally as anomaly detectors. We explored what characteristics of the learned latent space can lead to improved anomaly detection performance, and how we can modify ELBO to achieve those goals. We also see that there is huge potential for exploring more complex encoder architectures for additional performance improvements. Perhaps VAE’s can become a robust tool for anomaly detection, and provide benefit to a large variety of peoples and industries.&lt;/p&gt; </content> </entry> <entry> <title>Graph Transformers</title> <link href="https://deep-learning-mit.github.io/blog/2023/graphs-transformers/"/> <updated>2023-12-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/graphs-transformers</id> <content type="html">&lt;h2 id=&quot;motivation--project-outline&quot;&gt;Motivation &amp;amp; Project outline&lt;/h2&gt; &lt;p&gt;Our project aims to advance the understanding of Transformers in graph theory, focusing on the Shortest Path Problem, a cornerstone of graph theory and Dynamic Programming (DP). We introduce a custom Graph Transformer architecture, designed to tackle this specific challenge. Our work begins with a theoretical demonstration that the shortest path problem is Probably Approximately Correct (PAC)-learnable by our Graph Transformer. We then empirically test its performance, comparing it against simpler models like Multilayer Perceptrons (MLPs) and sophisticated benchmarks like Graph Neural Networks (GNNs). This study seeks to validate the Graph Transformer as an effective tool for solving fundamental graph-based problems, and “simple” DP problems in particular.&lt;/p&gt; &lt;div class=&quot;row align-items-center mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/erdos_renyi.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/erdos_renyi.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/erdos_renyi.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/erdos_renyi.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption mt-3&quot;&gt; Left: example of an Erdős–Rényi graph, right: original Transformer architecture &lt;/div&gt; &lt;h2 id=&quot;introduction--literature-review&quot;&gt;Introduction &amp;amp; Literature review&lt;/h2&gt; &lt;p&gt;Transformers have shown significant effectiveness in domains that require an understanding of long-range dependencies and contextual information. Originally prominent in natural language processing&lt;d-cite key=&quot;devlin2018bert&quot;&gt;&lt;/d-cite&gt;, their applications have expanded to include areas such as computer vision&lt;d-cite key=&quot;dosovitskiy2020image&quot;&gt;&lt;/d-cite&gt; and speech recognition&lt;d-cite key=&quot;wang2020transformer&quot;&gt;&lt;/d-cite&gt;. Recent explorations have also delved into Transformers’ abilities in mathematical tasks like arithmetic, GCD computations, and matrix operations&lt;d-cite key=&quot;DBLP:journals/corr/abs-2112-01898&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;charton2023transformers&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;lample2019deep&quot;&gt;&lt;/d-cite&gt;, shedding light on the learning mechanisms of these models.&lt;/p&gt; &lt;p&gt;A particular area of interest within these applications is graph problems. Recent research has assessed Transformers’ performance in this domain&lt;d-cite key=&quot;DBLP:journals/corr/abs-2106-05234&quot;&gt;&lt;/d-cite&gt; and explored adapting the Transformer architecture to fit the context of graph problems&lt;d-cite key=&quot;DBLP:journals/corr/abs-1905-12712&quot;&gt;&lt;/d-cite&gt;. However, much of the current research does not focus on Transformers’ comprehension of fundamental graph challenges, such as the shortest path problem. Notably, in the studies mentioned above, the shortest path is often directly input as a matrix, with each entry $i,j$ representing the shortest path distance between nodes $i$ and $j$. Our study will investigate Transformers’ performance on “raw” graph data, where only edge weights, the adjacency matrix, and positional encodings are provided. The Transformer will be trained to predict the shortest path from a designated node 0 to all other nodes, in the form of an $n\times1$ vector&lt;d-cite key=&quot;DBLP:journals/corr/abs-1905-13211&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;We will demonstrate that, by adapting the Transformer architecture for our purposes, the shortest path problem and other “simple” dynamic programming (DP) challenges are Probably Approximately Correct (PAC)-learnable by the model. Our approach is based on the framework developed for GNNs&lt;d-cite key=&quot;DBLP:journals/corr/abs-1905-13211&quot;&gt;&lt;/d-cite&gt; and adapted to our Graph Transformer.&lt;/p&gt; &lt;h2 id=&quot;graph-transformer-model-design&quot;&gt;Graph Transformer Model Design&lt;/h2&gt; &lt;p&gt;Let’s dive into our Graph Transformer model, drawing inspiration from the classical Transformer architecture.&lt;/p&gt; &lt;h3 id=&quot;vanilla-transformer&quot;&gt;Vanilla Transformer&lt;/h3&gt; &lt;p&gt;We first recall the vanilla architecture of Transformers, described in &lt;d-cite key=&quot;DBLP:journals/corr/VaswaniSPUJGKP17&quot;&gt;&lt;/d-cite&gt;, which is fundamentally built on two key ideas: tokenization and attention, both of which we adapt for graph data.&lt;/p&gt; &lt;p&gt;In our context, think of tokens like the attributes of nodes in Graph Neural Networks (GNNs). These tokens are packets of information, allowing transformers to handle diverse data types, including graphs. The process begins with a token net, which is a sequence of linear and non-linear layers. This is somewhat equivalent to the alternating aggregation and combination stages in a GNN, where each node processes and integrates information from its neighbors.&lt;/p&gt; &lt;p&gt;The real game-changer in transformers, however, is the attention mechanism, layered on top of the token net. This mechanism involves a set of matrices known as query, key, and value. These matrices enable tokens to use information from the nodes they’re paying attention to, in order to learn and update their own values.&lt;/p&gt; &lt;p&gt;Here’s a simple way to visualize it. Imagine each token in the transformer scanning the entire graph and deciding which nodes (or other tokens) to focus on. This process is driven by the query-key-value matrices. Each token creates a ‘query’, which is then matched against ‘keys’ from other tokens. The better the match, the more attention the token pays to the ‘value’ of that other token. Mathematically, this can be expressed as:&lt;/p&gt; \[Attention(Q, K, V) = softmax \left(\frac{QK^T}{\sqrt{d_k}} \right)V\] &lt;p&gt;In this formula, $ Q $, $ K $, and $ V $ represent the query, key, and value matrices, respectively. The term $ \sqrt{d_k} $ is a scaling factor based on the dimensionality of the keys.&lt;/p&gt; &lt;p&gt;While the process in Graph Neural Networks (GNNs) might seem similar, there’s an essential distinction to be made. In GNNs, the flow of information is local, with nodes exchanging information with their immediate neighbors. However, in our Graph Transformer model, we employ self-attention to potentially allow each node (or token) to consider information from the entire graph. This includes nodes that might be several steps away in the graph structure.&lt;/p&gt; &lt;p&gt;One axe of our research is then to explore the potential benefits - or drawbacks - of this global perspective, and seeing how leveraging global information compares to the traditional local feature aggregation used in GNNs, in the context of graph theory challenges like the Shortest Path Problem. By enabling each node to have a broader view of the entire graph, we’re exploring how this approach influences the prediction quality (Accuracy) and the efficiency of path computations, specifically focusing on the speed at which the network adapts and learns (Training Efficiency).&lt;/p&gt; &lt;p&gt;A full Transformer will be a sequence of self-attention layers and MLPs. We now turn to the specifics of how we implement it, starting with tokenization.&lt;/p&gt; &lt;div class=&quot;row justify-content-center align-items-center mt-3&quot;&gt; &lt;div class=&quot;col-md-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/transformer_DL-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/transformer_DL-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/transformer_DL-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/transformer_DL.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption mt-3&quot;&gt; Vanilla Transformer architecture (Source: 6.S898 Lecture) &lt;/div&gt; &lt;h3 id=&quot;tokenization-approach-and-positional-encoding&quot;&gt;Tokenization Approach and Positional Encoding&lt;/h3&gt; &lt;p&gt;The first step in our model is converting graph information (including nodes, edges, and their weights) into a format suitable for transformers. We’ve developed a method to encode this graph data into tokens.&lt;/p&gt; &lt;p&gt;Each token in our system is a vector with a length of $2n$. Here, $n$ represents the number of nodes in the graph. Half of this vector contains binary values indicating whether a connection exists to other nodes (1 for a connection, 0 for no connection). The other half of the vector holds the weights of these edges.&lt;/p&gt; \[\text{Token} = [\text{Edge Connections (Binary Values)}, \text{Edge Weights}] = [\mathbf{a}, \mathbf{w}]\] &lt;p&gt;This structure seems sufficient to capture the essential structure of the graph. But, to further aid the transformer in identifying the shortest path, we can introduce additional local information into these tokens through positional encoding. Encoding positional information of the nodes has already be achieved in various ways, for example, using graph kernels &lt;d-cite key=&quot;DBLP:journals/corr/abs-2106-05667&quot;&gt;&lt;/d-cite&gt;. Here we choose a simpler onehot encoding method : we assign an arbitrary rank to each node and include an indicator vector within the token. This vector, also of size $n$, points to the node’s position. With this addition, each token becomes a vector of size $3n$:&lt;/p&gt; \[\text{Token} = [\text{Edge Connections}, \text{Edge Weights}, \text{Positional Encoding}] = [\mathbf{a}, \mathbf{w}, \mathbf{1}]\] &lt;p&gt;We plan to rigorously test both approaches as part of our diverse model lineup.&lt;/p&gt; &lt;h2 id=&quot;attention-in-graph-transformers---the-necessity-of-a-skip-connection&quot;&gt;Attention in Graph Transformers - the Necessity of a Skip-Connection&lt;/h2&gt; &lt;p&gt;The Query-Key-Value (QKV) Attention Mechanism is a pivotal aspect of how Graph Transformers can effectively learn the Shortest Path Problem. Building on the insights from Dudzik et al. &lt;d-cite key=&quot;dudzik2022graph&quot;&gt;&lt;/d-cite&gt;, who illustrated the capacity of GNNs to tackle Dynamic Programming challenges, including the Shortest Path Problem, we delve into how Transformers might achieve similar feats using attention.&lt;/p&gt; &lt;p&gt;Recall the Bellman-Ford algorithm’s key update step for the Shortest Path Problem, expressed as:&lt;/p&gt; \[d_i^{k+1} = \min_j d_j^k + w_{i, j}\] &lt;p&gt;In this context, our hypothesis is that Transformers could replicate this dynamic through the attention mechanism, which we prove mathematically in Appendix A. The key observation is that the softmax layer would be able to mimic the $ \min $ operator, as long as the query-key cross product is able to retrieve $d_j + w_{i,j}$ for all nodes $i,j$. Intuitively, this can be done if each query token $i$ picks up on the node’s positional encoding, and each key token $j$ on the node’s current shortest path value $d_j$ and edges values $w_j$. Taking the cross product of the onehot encoding $i$ with edges values $w_j$ would then return exactly $w_{i,j}$ for all $i,j$. To select only seighboring connections, we’ll use an appropriated attention mask.&lt;/p&gt; &lt;!-- Imagine queries being tailored to pinpoint the positional encoding of node $i$, while keys focus on the edge value $w_{i,j}$ between node $i$ and its neighbor $j$, as well as the connections $a_j$ which inform about the current shortest distance $d_j$. The attention would concentrate on neighbors, considering both the edge weights and the current shortest distances of these neighbors. The softmax step in the attention mechanism would then allow the token for node $i$ to zero in on the neighbor that minimizes a combination of edge weight and actual distance, paralleling the logic in the Bellman-Ford algorithm. --&gt; &lt;p&gt;However, there is a catch. The learning process might not fully grasp the Bellman-Ford update using the attention mechanism alone. After the attention picks up on the correct minimizer neighbour token $j$, it needs to update the the current node $i$’s values. The Bellman-Ford update isn’t a simple operation on the tokens like a sum. For instance, we only want $d_i^k$ to change, and we want to update it with the correct $w_{i,j}$. This is where the idea of incorporating a skip-connection mechanism comes into play. By concatenating tokens $i$ (the input) and $j$ (the attention’s output) before feeding them to the MLP layer following the self-attention layer, we could effectively emulate the Bellman-Ford update process.&lt;/p&gt; &lt;p&gt;Overall, combining attention and skip-connection could ensure our Graph Transformer can comprehensively learn and apply the Bellman-Ford logic to solve the Shortest Path Problem. We offer a mathematical proof of this concept in Appendix A, using a slightly different tokenization method.&lt;/p&gt; &lt;p&gt;Additionally, it’s worth considering that our Graph Transformer might be learning an entirely distinct logical process for solving the Shortest Path Problem. Still, proving that such a logic is within the model’s grasp underlines the model’s versatility in addressing some graph-related and/or dynamic programming challenges. We’ll tackle this notion in the next part about learnability and algorithmic alignment.&lt;/p&gt; &lt;div class=&quot;row justify-content-center align-items-center mt-3&quot;&gt; &lt;div class=&quot;col-md-6 mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/skip_connection-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/skip_connection-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/skip_connection-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/skip_connection.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption mt-3&quot;&gt; Graph Transformer - Skip connection &lt;/div&gt; &lt;h3 id=&quot;model-architecture-overview&quot;&gt;Model Architecture Overview&lt;/h3&gt; &lt;p&gt;In this section, we revisit the architecture of our Graph Transformer, which is an adaptation of the standard Transformer model. Our model is composed of a sequence of self-attention layers and MLPs, each augmented with a skip-connection. The tokens in our model encapsulate both edge connections and their corresponding weights, alongside positional encoding.&lt;/p&gt; &lt;p&gt;The most notable feature of our architecture is the introduction of the attention mask. This mask restricts the attention of each token to its immediate neighbors, aligning our approach more closely with the local message-passing process typical in GNNs. The inclusion or not of this feature and the resultant effect in our architecture marks the crucial difference between the global vs. local token aggregation methodologies that we discussed earlier.&lt;/p&gt; &lt;h2 id=&quot;a-measure-of-learnability&quot;&gt;A measure of learnability&lt;/h2&gt; &lt;p&gt;Our project falls into the wider research interest in the interaction between network structures and specific tasks. While basic and common structures such as MLPs are known to be universal approximators, their effectiveness varies based on the amount of data required for accurate approximations. Notably, their out-of-sample performance often lags behind task-specific architectures, such as Graph Neural Networks (GNNs) in graph-related problems, which highlights the issue of a network’s generalization capacity.&lt;/p&gt; &lt;p&gt;To evaluate theoretically the ability of transformers to effectively learn the Shortest Path Problem and similar challenges, we position our study within the framework of PAC (Probably Approximately Correct) Learning. This framework allows us to explore the concept of algorithmic alignment. Algorithmic alignment is here crucial as it pertains to a model’s capability to emulate a given algorithm with a minimal number of modules, each of relatively low complexity. Such approach has already been taken by Xu et. al &lt;d-cite key=&quot;DBLP:journals/corr/abs-1905-13211&quot;&gt;&lt;/d-cite&gt; to give a better understanding of the reasoning process of complex networks like GNNs, and it is instrumental in assessing the adaptability and efficiency of transformers in learning and solving complex graph-based tasks.&lt;/p&gt; &lt;h3 id=&quot;algorithmic-alignment&quot;&gt;Algorithmic Alignment&lt;/h3&gt; &lt;p&gt;In this section, we delve into a series of definitions to establish the mathematical groundwork of our investigation.&lt;/p&gt; &lt;p&gt;We first recall a definition of the PAC-Learnibility:&lt;/p&gt; &lt;h4 id=&quot;definition-pac-learning-and-sample-complexity&quot;&gt;Definition (PAC learning and sample complexity)&lt;/h4&gt; &lt;p&gt;Let \(\{x_i,y_i\}_{i=1}^M\) be i.i.d. samples from some distribution $ \mathcal{D} $, and suppose $ y_i = g(x_i) $ for some underlying function $ g $. Let \(f = \mathcal{A}(\{x_i, y_i\}_{i=1}^M)\) be the function generated by a learning algorithm $ \mathcal{A} $. Then $ g $ is $ (M, \epsilon, \delta) $-learnable with $ \mathcal{A} $ if&lt;/p&gt; \[\mathbb{P}_{x \sim \mathcal{D}} [\| f(x) - g(x) \| \leq \epsilon] \geq 1 - \delta\] &lt;p&gt;where $ \epsilon &amp;gt; 0 $ is the error parameter and $ \delta \in (0, 1) $ the failure probability.&lt;/p&gt; &lt;p&gt;We then define the &lt;em&gt;sample complexity&lt;/em&gt; as \(\mathcal{C_A}(g, \epsilon, \delta) = \min M\) for every $ M $ such that $ g $ is $ (M, \epsilon, \delta) $-learnable with $ \mathcal{A} $.&lt;/p&gt; &lt;p&gt;This is a crucial concept in computational learning theory that helps us understand the feasibility of learning a given function from a set of examples to a certain degree of approximation, with a certain level of confidence.&lt;/p&gt; &lt;p&gt;Next, we outline a definition that connects the concepts of function generation with the architecture of neural networks.&lt;/p&gt; &lt;h4 id=&quot;definition-generation&quot;&gt;Definition (Generation)&lt;/h4&gt; &lt;p&gt;Let $ f_1, \ldots, f_n $ be module functions, $ g $ a reasoning function and $ \mathcal{N} $ a neural network. We say that $ f_1, \ldots, f_n $ generate $ g $ for $ \mathcal{N} $, and we write \(f_1, \ldots, f_n \underset{\mathcal{N}}{\equiv} g\) if, by replacing $ \mathcal{N}_i $ with $ f_i $, the network $ \mathcal{N} $ simulates $ g $.&lt;/p&gt; &lt;p&gt;Using these ideas, we then introduce a key point for our project: algorithmic alignment, which we intend to validate for Transformers applied to the Shortest Path Problem.&lt;/p&gt; &lt;h4 id=&quot;definition-algorithmic-alignment&quot;&gt;Definition (Algorithmic alignment)&lt;/h4&gt; &lt;p&gt;Consider a neural network $ \mathcal{N} $ with $ n $ modules \(\mathcal{N}_i\) that tries to approximate a reasoning function $ g $. Suppose that there exists $ f_1, \ldots, f_n $ some module functions such that \(f_1, \ldots, f_n \underset{\mathcal{N}}{\equiv} g\). Then $ \mathcal{N} $ is $ (M, \epsilon, \delta) $-algorithmically aligned with $ g $ there are learning algorithms \(\mathcal{A}_i\) for the \(\mathcal{N}_i\)’s such that \(n \cdot \max_i \mathcal{C}_{\mathcal{A}_i} (f_i, \epsilon, \delta) \leq M\).&lt;/p&gt; &lt;p&gt;A small number of sample $ M $ would then imply good algorithmic alignment, i.e. that the algorithmic steps $f_i$ to simulate g are &lt;em&gt;easy to learn&lt;/em&gt;.&lt;/p&gt; &lt;p&gt;Finally, we state the following theorem, proven by Xu et al. &lt;d-cite key=&quot;DBLP:journals/corr/abs-1905-13211&quot;&gt;&lt;/d-cite&gt; It provides evidence that generalization benefits from algorithmic alignment.&lt;/p&gt; &lt;h4 id=&quot;theorem-1-algorithmic-alignment-improves-sample-complexity&quot;&gt;Theorem 1 (Algorithmic alignment improves sample complexity)&lt;/h4&gt; &lt;p&gt;Fix $\varepsilon$ and $\delta$. Suppose ${x_i, y_i} \sim D$, where $|x_i| &amp;lt; N$, and $y_i = g(S_i)$ for some $g$. Suppose $\mathcal{N}_1, \dots \mathcal{N}_n$ are $\mathcal{N}$’s MLP modules in sequential order. Suppose $\mathcal{N}$ and $g$ algorithmically align via functions $f_1, …, f_n$, as well as the following assumptions.&lt;/p&gt; &lt;p&gt;&lt;em&gt;&lt;strong&gt;i. Algorithm stability&lt;/strong&gt;&lt;/em&gt;. Let $\mathcal{A}$ be the learning algorithm for the \(\mathcal{N}_i\)’s. Suppose \(f = \mathcal{A}(\{x_i, y_i\}^M_{i=1})\), \(\hat{f} = \mathcal{A}(\{\hat{x}_i, y_i\}^M_{i=1})\). For any x, \(\|f(x) - f(\hat{x})\| &amp;lt; L_0 \cdot \max_i\|x_i - \hat{x}_i\|\), for some \(L_0\).&lt;/p&gt; &lt;p&gt;&lt;em&gt;&lt;strong&gt;ii. Sequential learning&lt;/strong&gt;&lt;/em&gt;. We train the \(\mathcal{N}_i\)’s sequentially. The inputs for $\mathcal{N}_j$ are the outputs from the previous modules \(\mathcal{N}_1, \dots, \mathcal{N}_{j-1}\), while labels are generated by the correct functions \(f_{1}, ..., f_{j-1}\).&lt;/p&gt; &lt;p&gt;&lt;em&gt;&lt;strong&gt;iii. Lipschitzness&lt;/strong&gt;&lt;/em&gt;. The learned functions $f_j$ satisfy \(\|f_j(x) - f_j(z)\| \leq L_1\|x - z\|\), for some $L_1$.&lt;/p&gt; &lt;p&gt;Then g is learnable by N.&lt;/p&gt; &lt;h3 id=&quot;application-to-transformers&quot;&gt;Application to Transformers&lt;/h3&gt; &lt;p&gt;We now apply this theoretical framework to Transformers. The justifications of the results in this part will be a combination of sketch of mathematical proofs and empirical evidence. We first state a first result:&lt;/p&gt; &lt;h4 id=&quot;lemma-1-transformers-algorithmically-align-with-the-shortest-path-problem&quot;&gt;Lemma 1 (Transformers algorithmically align with the Shortest Path Problem)&lt;/h4&gt; &lt;p&gt;Let $ \mathcal{T} $ be a Transformer, let $ g $ be the reasoning function of the Shortest Path Problem applied to a graph with $n$ nodes. Then $ \mathcal{T} $ is algorithmically aligned with $ g $.&lt;/p&gt; &lt;p&gt;We can directly prove this lemma. Let $ f_1, \ldots, f_n $ be the Bellman-Ford update processes of the Shortest Path Problem: \(d_u^{k+1} = \min_{v \in \mathcal{N}(u)} d_v^{k} + c(u, v)\) where $\mathcal{N}(u)$ is the set of neighbors of node $u$. From Bellman-Ford algorithm, we have: \(f_1, \ldots, f_n \underset{\mathcal{T}}{\equiv} g\), with $g$ being the shortest path function.&lt;/p&gt; &lt;p&gt;Then, from our discussion on Transformers attention layers and proof in Appendix A, each attention-MLP sequence $\mathcal{N}_i$ has a learning algorithm $\mathcal{A}_i$ such that $f_i$ is learnable with $\mathcal{A}_i$. Each sample complexity is then bounded by M, which concludes the proof.&lt;/p&gt; &lt;p&gt;We can now state the following theorem:&lt;/p&gt; &lt;h4 id=&quot;theorem-2-transformers-can-learn-the-shortest-path-problem&quot;&gt;Theorem 2 (Transformers can learn the Shortest Path Problem)&lt;/h4&gt; &lt;p&gt;Let $ \mathcal{T} $ be a Transformer, let $ g $ be the shortest path function. Then, $g$ is learnable by $\mathcal{T}$.&lt;/p&gt; &lt;p&gt;We provide here a sketch of a proof of this theorem. From Lemma 1, $\mathcal{T}$ and $g$ algorithmically align via $f_1, \ldots, f_n$. We must now check the 3 assumptions of Theorem 1.&lt;/p&gt; &lt;p&gt;Sequential Learning &lt;strong&gt;(ii)&lt;/strong&gt; is clearly true, since transformers architectures incorporate sequence of MLPs (associated with attention layers). Li et al &lt;d-cite key=&quot;li2023transformers&quot;&gt;&lt;/d-cite&gt; have provided an empirical proof of the algorithm stability &lt;strong&gt;(i)&lt;/strong&gt; of transformers. Finally, considering a self-attention token network combined with a ReLU-MLP for each layer of $\mathcal{T}$, every function in the related learning algorithm $\mathcal{A}_i$ (softmax, ReLU, Linear) is Lipschitz-continuous, hence their combination is Lipschitz-continuous too, which validates Assumption &lt;strong&gt;(iii)&lt;/strong&gt;.&lt;/p&gt; &lt;p&gt;We can then apply Theorem 1 and conclude the proof.&lt;/p&gt; &lt;p&gt;Having laid the theoretical foundation for our problem, we now turn our attention to the practical application, where we employ our Graph Transformer to the concrete task of learning and solving the Shortest Path Problem.&lt;/p&gt; &lt;h2 id=&quot;methodology-for-training-and-evaluation&quot;&gt;Methodology for Training and Evaluation&lt;/h2&gt; &lt;h3 id=&quot;constructing-the-dataset&quot;&gt;Constructing the Dataset&lt;/h3&gt; &lt;p&gt;For training and evaluating our different models, we generate a comprehensive dataset comprising 50,000 samples, each representing a graph. These graphs were randomly created following the Erdős–Rényi model, specifically the $\mathcal{G}(n, p)$ variant, where &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;n&lt;/code&gt; represents the number of nodes and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;p&lt;/code&gt; is the probability of edge formation between any two nodes. In our dataset, each graph consists of 10 nodes (&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;n = 10&lt;/code&gt;), and the edge probability (&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;p&lt;/code&gt;) is set at 0.5. This setting ensures a balanced mix of sparsely and densely connected graphs, providing a robust testing ground for the Graph Transformer’s ability to discern and compute shortest paths under varied connectivity scenarios .&lt;/p&gt; &lt;p&gt;Furthermore, we assign to the edges in these graphs some weights that are integral values ranging from 1 to 10. This range of weights introduces a second layer of complexity to the shortest path calculations, as the Graph Transformer must now navigate not only the structure of the graph but also weigh the cost-benefit of traversing various paths based on these weights. The inclusion of weighted edges makes the dataset more representative of real-world graph problems, where edges often have varying degrees of traversal difficulty or cost associated with them.&lt;/p&gt; &lt;p&gt;This dataset is designed to challenge and evaluate the Graph Transformer’s capability in accurately determining the shortest path in diverse graph structures under different weight conditions. The small number of nodes ensures a wide variability in the degree of connectivity in a sample graph. It also allows for an initial performance evaluation on smaller-scale problems, with the potential to extend these studies to larger-scale graphs in the future. Hence, the dataset’s structure supports a comprehensive assessment of the model’s performance and its adaptability to a wide range of graph-related scenarios.&lt;/p&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/shortest_path_counts.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/shortest_path_counts.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/shortest_path_counts.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/shortest_path_counts.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;50%&quot; height=&quot;50%&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption mt-3&quot;&gt; Shortest path distribution for our entire dataset (50,000 samples) &lt;/div&gt; &lt;h3 id=&quot;training-protocols&quot;&gt;Training Protocols&lt;/h3&gt; &lt;p&gt;In the fixed dataset approach we’ve employed, the dataset is pre-constructed with 50,000 graph samples and remains unchanged throughout the training process. This method, involving a consistent 60/20/20 split for training, validation, and testing, ensures that every model is assessed under the same conditions at each epoch. This consistency is crucial for our primary goal: to compare the performance of different models or architectures in a controlled and repeatable manner. To an on-the-fly approach, where data is dynamically generated during each training epoch, introduces more variability. This variability can be beneficial in a second step for thoroughly testing the robustness and adaptability of a single model, as it faces new and diverse scenarios in each epoch. However, for our first objective of directly comparing different models, the fixed dataset approach provides a more stable and reliable framework to begin with.&lt;/p&gt; &lt;p&gt;We use the Adam Optimizer because it’s good at handling different kinds of data and works efficiently. The learning rate is set at a standard value of 0.001, which serves as a common and reliable starting point, ensuring a consistent basis for comparing the learning performance across all models.&lt;/p&gt; &lt;p&gt;Our main tool for measuring success is the L1 loss function. This function is suited for our shortest path problem because it treats all mistakes the same, whether they’re big or small. It’s different from the L2 loss, which is harsher on bigger mistakes. This way, our model pays equal attention to finding shorter and longer paths correctly.&lt;/p&gt; &lt;h3 id=&quot;metrics-and-evaluation-criteria&quot;&gt;Metrics and Evaluation Criteria&lt;/h3&gt; &lt;p&gt;We use two main metrics to check how good our models perform: L1 Loss and Accuracy. L1 Loss adds up all the differences between the predicted and actual path costs across all nodes. It’s a direct way to see how well the model is doing.&lt;/p&gt; \[L1 \, Loss = \frac{1}{N} \sum_{i=1}^{N} |y_i - \hat{y}_i|\] &lt;p&gt;where $ N $ is the total number of nodes, $ y_i $ is the actual path cost for the $i$-th node, and $ \hat{y}_i $ is the predicted path cost for the $i$-th node.&lt;/p&gt; &lt;p&gt;Accuracy is the second measure. It shows what percentage of nodes the model got exactly right in predicting the shortest path. It’s a simple way to understand how precise our model is.&lt;/p&gt; \[Accuracy = \frac{\text{Number of Correct Predictions}}{\text{Total Number of Predictions}} \times 100\%\] &lt;p&gt;Here, a prediction is counted as “correct” if its rounded value is the true shortest path. I.e., if the model predicts 10.3 for a node, but the true sortest path is 11, this is marked as incorrect. If it predicts 10.7, it will be counted as correct.&lt;/p&gt; &lt;p&gt;Together, these two measures help us see how well our Graph Transformer is doing compared to other models like MLPs and GNNs, especially in solving shortest path problems in graphs.&lt;/p&gt; &lt;h2 id=&quot;results-and-comparative-analysis&quot;&gt;Results and Comparative Analysis&lt;/h2&gt; &lt;p&gt;In our analysis, we compared the performances of MLPs, Transformers, and GNNs using our generated dataset. Initially, we evaluated the performance of each architecture across different sizes by recording in-sample and out-of-sample losses at each epoch, along with out-of-sample accuracy. We compared three model sizes: “small,” “mid,” and “large,” which correspond to the depth of the model. For GNNs, this signifies the number of iterations; for Transformers and MLPs, it refers to the number of layers. Small models have 2 iterations/layers, mid models 5, and large models 10.&lt;/p&gt; &lt;p&gt;To maintain fair comparisons, the MLP and the Transformer were designed to have an equal total number of trainable parameters at each size. We excluded GNNs from this comparison, as they outperformed both models with significantly fewer parameters.&lt;/p&gt; &lt;h3 id=&quot;gnn-performance&quot;&gt;GNN performance&lt;/h3&gt; &lt;p&gt;Our GNNs demonstrated exceptional performance on the shortest path task. Tailoring the model’s architecture to this problem (using maximum aggregation and initializing node features appropriately) likely contributed to this success. However, several interesting observations emerged from our results. We compared GNNs of three different sizes: small (2 iterations, 13k parameters), medium (5 iterations, 32k parameters), and large (10 iterations, 64k parameters).&lt;/p&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_train_loss.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_train_loss.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_train_loss.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_train_loss.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_loss.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_loss.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_loss.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_loss.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_acc.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_acc.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_acc.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_acc.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption mt-3&quot;&gt; GNN Training loss, validation loss &amp;amp; validation accuracy for different sizes &lt;/div&gt; &lt;p&gt;We observed that both medium and large GNNs achieved over 99% out-of-sample accuracy after just a few epochs. The large model’s performance aligns with expectations, as it conducts 10 iterations in total—matching the maximum number of iterations required by standard shortest-path-finding algorithms like Bellman-Ford for n-node graphs.&lt;/p&gt; &lt;p&gt;Surprisingly, the medium-sized model, with only 5 iterations, also achieved similar accuracy. This initially seems counterintuitive since 5 iterations suggest that information can only propagate to nodes within 5 neighbors. However, as noted in &lt;d-cite key=&quot;DBLP:journals/corr/abs-1905-13211&quot;&gt;&lt;/d-cite&gt;, our GNN effectively learns an optimized version of the Bellman-Ford algorithm, requiring only half the number of iterations compared to the standard algorithm.&lt;/p&gt; &lt;p&gt;This explains why the medium GNN can converge efficiently, but the small model with just 2 iterations cannot. Even with an optimized Bellman-Ford algorithm, a 2-iteration GNN would only correctly solve paths shorter than or equal to 5 nodes, limiting its overall learning capacity.&lt;/p&gt; &lt;h3 id=&quot;mlp-performance&quot;&gt;MLP performance&lt;/h3&gt; &lt;p&gt;Although GNNs quickly converged to near-perfect predictions, their inherent suitability for the shortest path task was expected. To gauge the Transformers’ performance more accurately, we compared them with MLPs, which are not specifically designed for this task. As indicated in &lt;d-cite key=&quot;DBLP:journals/corr/abs-1905-13211&quot;&gt;&lt;/d-cite&gt;, MLPs struggle with iterative algorithms like Bellman-Ford due to difficulties in learning for-loop structures. We analyzed MLP performance across three sizes: small (2 layers, 44k parameters), medium (4 layers, 76k parameters), and large (8 layers, 142k parameters). It’s important to note that each GNN size had roughly half the parameters of the corresponding MLP size.&lt;/p&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_train_loss.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_train_loss.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_train_loss.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_train_loss.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_loss.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_loss.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_loss.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_loss.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_acc.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_acc.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_acc.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_acc.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption mt-3&quot;&gt; MLP Training loss, validation loss &amp;amp; validation accuracy for different sizes &lt;/div&gt; &lt;p&gt;The smaller MLP models converged faster, yet both small and medium models barely exceeded 50% accuracy, even after extensive training (16 epochs for GNNs and 64 for MLPs). This supports the hypothesis that MLPs face challenges in learning iterative algorithms.&lt;/p&gt; &lt;p&gt;Increasing model size or training duration did not significantly improve performance; the largest model struggled particularly with fitting the problem. While more hyperparameter tuning might enhance the “large” model’s performance, the “medium” model’s struggles suggest that MLPs have inherent difficulties with this task, regardless of parameter count.&lt;/p&gt; &lt;h3 id=&quot;transformer-performance&quot;&gt;Transformer performance&lt;/h3&gt; &lt;p&gt;Turning our attention to Transformers, we initially doubted their ability to match GNN performance levels. However, the question remained: could they outperform MLPs, and if yes by how much? We began by testing a basic Transformer version (no attention mask, positional encoding, or skip connection). To ensure fair comparisons, all model sizes maintained approximately the same number of parameters as the MLPs, with equivalent layers/iterations (small: 2 layers, 44k parameters; medium: 5 layers, 86k parameters; large: 10 layers, 172k parameters).&lt;/p&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption mt-3&quot;&gt; Transformer training loss, validation loss &amp;amp; validation accuracy for different sizes &lt;/div&gt; &lt;p&gt;A notable improvement in accuracy was observed, with the best-performing Transformer model reaching 70% accuracy. The training was stopped at 64 epochs to maintain consistency across all models. As it does not show signs of overfitting, extending training beyond 64 epochs might further enhance the Transformer’s performance. Interestingly, increasing the model size to over 150k parameters did not significantly boost performance under our hyperparameter settings. The small and medium architectures exhibited similar performance, with the medium model slightly outperforming after a few epochs.&lt;/p&gt; &lt;p&gt;Regarding sizes, similarly to the MLP, increasing the depth and parameter count of the transformer over 150k parameters doesn’t seem to help with the model’s performance, at least with our set of hyperparameters (as this big of a transformer is long to train, we haven’t been able to do much hyperparameter tuning). The small and medium architectures seem almost tied, but the medium one seems to perform better after a few epochs.&lt;/p&gt; &lt;p&gt;Our hypothesis in Part 1 suggested that Transformers, capable of performing $O(n^2)$ operations per attention head, should learn loop structures more effectively. However, their learning is constrained by the specific operations allowed in the attention mechanism. To test this, we proposed three enhancements to our Transformer: an attention mask, positional encoding, and a skip connection, as outlined in Part 1 and Appendix A. We hypothesized that these additions would enable the Transformer to better learn the Bellman-Ford iteration step.&lt;/p&gt; &lt;h3 id=&quot;transformer-with-attention-mask-positional-encoding--skip-connection&quot;&gt;Transformer with Attention Mask, Positional Encoding &amp;amp; Skip Connection&lt;/h3&gt; &lt;p&gt;As discussed in Part 1, we adapted our Transformer model to include these three components, expecting an improvement in performance. The attention mask, a fundamental feature of Transformers, enables the model to focus on specific token relationships. In our setup, each token (node) attends only to its neighbors, as dictated by the adjacency matrix. We incorporated the attention mask into the medium-sized Transformer for comparison.&lt;/p&gt; &lt;p&gt;Next, we added positional encoding. Based on our Part 1 discussion, positional encodings can inform the feedforward network (FFN) about the neighboring tokens selected by the attention layer. We used basic one-hot encodings, effectively adding an $n×n$ identity matrix or concatenating an $n×1$ one-hot vector to each token. Although more sophisticated encodings might be beneficial, we demonstrated the feasibility of using one-hot encodings for the Bellman-Ford update.&lt;/p&gt; &lt;p&gt;Finally, we implemented a custom skip connection. Instead of a standard sum skip connection, our model concatenates the input and output of the attention head before feeding it into the FFN. This approach potentially allows the attention head to select a neighbor, with the FFN combining its token with the receiving node’s token.&lt;/p&gt; &lt;p&gt;We added each augmentation stepwise, building upon the previous modifications (e.g., transformer_pos_enc includes positional encoding, attention mask, and is medium-sized). Here are the results:&lt;/p&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss_all.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss_all.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss_all.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss_all.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss_all.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss_all.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss_all.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss_all.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc_all.svg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc_all.svg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc_all.svg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc_all.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption mt-3&quot;&gt; Transformer training loss, validation loss &amp;amp; validation accuracy for different sizes &lt;/div&gt; &lt;p&gt;Each augmentation step led to clear improvements. Over 64 epochs, our base model’s out-of-sample accuracy improved from 70% to over 90%. The positional encoding contributed the most significant enhancement, which was somewhat surprising given its simplicity. Overall, these results support our hypothesis regarding the Transformer’s capacity to learn the Bellman-Ford iteration step.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;In this project, we compared MLPs, Transformers, and GNNs in solving graph-related problems, with a focus on the shortest path in Erdos-Renyi graphs. Our findings indicate GNNs excel in such tasks due to their specialized architecture. However, the adaptability of Transformers, particularly with architectural modifications like attention masks, positional encodings, and skip connections, is a significant discovery. While these models showed promise, larger MLP and Transformer models faced convergence issues, highlighting the need for better hyperparameter optimization in future work.&lt;/p&gt; &lt;p&gt;Transformers’ theoretical success in approximating the Bellman-Ford algorithm, verified by empirical results, suggests potential in a subset of dynamic programming (DP) problems where DP updates are simple and manageable by attention heads. However, their capability is inherently limited compared to the theoretically more versatile GNNs, due to the softmax and linear combination constraints in attention mechanisms. Future work could delve into designing Transformer models with enhanced attention mechanisms, potentially broadening their applicability in complex DP problems. Investigating the synergy between Transformers and GNNs could also lead to innovative hybrid models.&lt;/p&gt; &lt;p&gt;Overall, our exploration sheds light on the potential of Transformers in graph-related tasks, suggesting they could offer valuable insights and solutions, alongside the more established GNNs. This finding could open up interesting possibilities for research and innovation in neural network applications, particularly in solving complex graph-related challenges.&lt;/p&gt; &lt;h2 id=&quot;appendix&quot;&gt;Appendix&lt;/h2&gt; &lt;h3 id=&quot;appendix-a&quot;&gt;Appendix A.&lt;/h3&gt; &lt;p&gt;We present here a mathematical proof of how the Graph Transformer Architecture can learn the Bellman-Ford update in the Shortest Path Problem.&lt;br /&gt; We consider a slightly different tokenization: for every node $i$, at layer $k$, we encode its information in a tensor of the form:&lt;/p&gt; \[t_i^k = [\mathbb{1}_i, w_i, d_i^k]\] &lt;p&gt;where $\mathbb{1}_i \in \mathbb{R}^n$ is the positional encoding, $w_i \in \mathbb{R}^n$ the edges weights and $d_i^k$ the current shortest distance computed at layer $k$.&lt;/p&gt; &lt;p&gt;Recall the formula of query-key-value attention:&lt;/p&gt; \[t_i = \frac{\sum_{j} \exp^{-q_i&apos; k_j / \sqrt{2n+1}}v_j}{\sum_{j} \exp^{-q_i&apos; k_j / \sqrt{2n+1}}}\] &lt;p&gt;Set up the weights matrices as:&lt;/p&gt; \[\begin{cases} W_Q = \begin{pmatrix} I_{n+1} &amp;amp; O_{n \times n+1} \\ 1_n &amp;amp; 0_{n+1} \end{pmatrix}\\ W_K = \begin{pmatrix} O_{n+1 \times n} &amp;amp; I_{n+1} \end{pmatrix}\\ W_V = I_{2n+1} \end{cases}\] &lt;p&gt;so that \(q_i&apos; k_j = w_{j,i} + d_j\) &lt;em&gt;i.e.&lt;/em&gt; attention is determined by the update values of the Bellman-Ford equation.&lt;/p&gt; &lt;p&gt;Hence taking the softmax - and if necessary augmenting the weights of the matrices by a common factor -, we have the ouput \(t_{j^\star}\) for the appropriate node \(j^\star = \text{argmin}_j \{w_{j,i} + d_j\}\).&lt;/p&gt; &lt;p&gt;Notice that in this configuration \(t_{j^\star}\) is not enough to retrieve the desired edge weight \(w_{i, j^\star}\) : we need the positional encoding from node $i$.&lt;/p&gt; &lt;p&gt;The skip-connection achieves this, by concatenating original input $t_i$ with attention output \(t_{j^\star}\). We can then retrieve the desired value \(w_{j^\star,i} + d_{j^\star}\) with the MLP of layer $k$, which concludes the proof&lt;/p&gt; </content> </entry> <entry> <title>Learning a Lifted Linearization for Switched Dynamical Systems</title> <link href="https://deep-learning-mit.github.io/blog/2023/croneillproposal/"/> <updated>2023-12-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/croneillproposal</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;blockquote&gt; All models are wrong, but some are useful. —George Box &lt;/blockquote&gt; &lt;p&gt;Deep neural networks are incredibly capable of generating models from data. Whether these are models that allow for the classification of images, the generation of text, or the prediction of a physical system’s dynamics, neural networks have proliferated as a favored way of extracting useful, predictive information from set of data &lt;d-cite key=&quot;rombach2021highresolution, Brown2020, Tsipras2020&quot;&gt;&lt;/d-cite&gt;. But while well-tuned and well-designed neural networks can demonstrate miraculous performance at a given task, raw accuracy is not the only measure of a model’s usefulness.&lt;/p&gt; &lt;p&gt;In robotics, the speed at which a model can be run and its explainability can be just as important as the accuracy of its predictions. Techniques such as model predictive control can enable remarkable performance even when they’re based on flawed predictive models &lt;d-cite key=&quot;Rawlings2022&quot;&gt;&lt;/d-cite&gt;. In practice, most of these models are linearizations of more accurate, nonlinear equations. Produced by considering low order truncations of the Taylor series, these linearizations can be run incredibly efficiently on modern computer hardware and are amenable to linear analysis techniques for explainability purposes.&lt;/p&gt; &lt;p&gt;Nevertheless, this kind of linearization has its own weaknesses. Chief among them is the inherently local nature of the approach: a Taylor series must be taken around a single point and becomes less valid further away from this location. As an alternative, lifting linearization approaches inspired by Koopman Operator theory have become more commonplace &lt;d-cite key=&quot;Koopmanism, brunton2021modern, AsadaDE, Lusch2018, Shi2022&quot;&gt;&lt;/d-cite&gt;. These techniques seek to linearize a system by lifting it to a higher dimensional representation where the dynamics can be made to evolve linearly over time. While such models can suffer from the curse of dimensionality when compared to their lower-order Taylor series brethren, they can offer greater accuracy while still providing most of the benefits of a linear model.&lt;/p&gt; &lt;p&gt;\(f(x)|_{x=a}\approx f(a)+\frac{f&apos;(a)}{1!}(x-a)\)&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; A truncated Taylor series makes use of the derivatives of a function around a point. &lt;/div&gt; &lt;p&gt;Deep neural networks have emerged as a useful way to produce these lifted linear models &lt;d-cite key=&quot;Lusch2018&quot;&gt;&lt;/d-cite&gt;. An encoder is used to transform a system’s state into a higher dimensional latent space of “observables”. These observables are then fed through a linear layer which evolves the system forward in time: a linear dynamical model. In the literature, this approach has come to be known as Deep Koopman Networks (DKNs). We can see how these networks can learn lifted linear models for physical systems by considering a simple pendulum.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/deepnet-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/deepnet-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/deepnet-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-croneillproposal/deepnet.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; An example of a neural network architectured used to learn observables for a linear Koopman model, taken from &lt;d-cite key=&quot;lusch2018deep&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/DKN_simplepen-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/DKN_simplepen-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/DKN_simplepen-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-croneillproposal/DKN_simplepen.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Performance of a DKN for predicting a damped, simple pendulum across a set of trajectories. On the left, the dotted lines represent the ground truth trajectories, while the lines connected by crosses are the predicted trajectories. On the right, the MSE of the trajectories for the full 30 time steps of data is presented. &lt;/div&gt; &lt;p&gt;While the potential of DKNs has already been explored in recent years, the field is still being actively studied. In this blog, I am interested in exploring how a DKN can be used to model a particular kind of a dynamical system: one with piecewise dynamics that vary discretely across state space. These systems are inherently challenging for traditional, point-wise linearization techniques. To explain this, we can consider an example inspired by our old friend, the simple pendulum.&lt;/p&gt; &lt;p&gt;Consider a pendulum as before, but with the addition of two springs located at $\theta=30\degree$ and $\theta=-30\degree$. If we to consider a point arbitrarily close to one of these springs, say at $\theta=29.99…\degree$, then a Taylor series about this point – even with infinite terms – would not be able to accurately represent the dynamics when the spring is engaged. In contrast, a lifted linearization may better model such a system thanks to its ability to incorporate information beyond a single point.&lt;/p&gt; &lt;p&gt;\(\begin{align} \ddot\theta =f(\theta,\dot\theta) =\begin{cases} -g\sin{\theta}-b\dot\theta, &amp;amp; \theta\in [-30^\circ,30^\circ]\\ -g\sin{\theta}-b\dot\theta-k(\theta+30), &amp;amp; \theta&amp;lt;-30^\circ\\ -g\sin{\theta}-b\dot\theta-k(\theta-30), &amp;amp; \theta&amp;gt;30^\circ \end{cases} \end{align}\)&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; The dynamics of a pendulum with a pair of springs can be expressed as a set of piecewise equations. $k=1000$ is the stiffness of the springs and $b=1$ is the damping constant. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/spring_diagram-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/spring_diagram-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/spring_diagram-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-croneillproposal/spring_diagram.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Diagram of the damped pendulum system with a pair of fixed springs, space at equal angles away from $\theta=0$. &lt;/div&gt; &lt;p&gt;Although that isn’t to say that a brute-force implementation of a DKN would necessarily be all too successful in this case either. Piecewise, switched, or hybrid systems (terminology depending on who you ask) are composed of particularly harsh nonlinearities due to their non-continuous derivatives. These can be difficult for lifted linearization approaches to model &lt;d-cite key=&quot;Bakker:KoopHybrid, Govindarajan:KoopHyPend, NgCable&quot;&gt;&lt;/d-cite&gt;, with some systems theoretically requiring an infinite number of observables to be accurately linearized. This project is motivated by the question of whether we could modify the standard DKN approach to be more amenable for piecewise systems, specifically by taking inspiration from the common practice of pre-training neural networks.&lt;/p&gt; &lt;p&gt;As a bit of a spoiler for the conclusion of this report, we don’t end up seeing any noticeable improvement from pre-training the DKN. Nevertheless, the process of experimenting with the proposed approaches was an insightful experience and I am happy to share the results below.&lt;/p&gt; &lt;h2 id=&quot;proposed-approaches&quot;&gt;Proposed Approaches&lt;/h2&gt; &lt;p&gt;I experimented with two approaches for pre-training our DKN, one inspired by curriculum learning &lt;d-cite key=&quot;Soviany2022&quot;&gt;&lt;/d-cite&gt; and another seeking to leverage an intuitive understanding of a lifted linearization’s observables. We then compared the results to an aggregate DKN model trained from scratch with 50 observables.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/aggregate_DKN-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/aggregate_DKN-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/aggregate_DKN-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-croneillproposal/aggregate_DKN.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; A DKN with 50 observables trained from scratch for the pendulum with springs. On the left, the dotted lines again represent ground truth trajectories while the lines connected by crosses are predictions. &lt;/div&gt; &lt;p&gt;In the case of applying curriculum learning, we considered an approach with a data-based curriculum. In these cases, the difficulty of the training data is gradually increased over time. This has the potential benefit of allowing a model to more readily learn a challenging task, while also preventing a situation where a model is not sufficiently ‘challenged’ by new data during the training process. Our curriculum learning approach sought to take advantage of DKNs’ already good performance for the standard pendulum case. Intuitively, we identify the spring’s stiffness as the primary source of increased difficulty in our toy system. With this in mind, I created four data sets with different values for the spring constant, $k=0,10,100,1000$. A single model was then trained sequentially on these data sets. If our intuition is correct, we would expect to see the model gradually learn to account for the presence of the spring while maintaining the dynamics of a simple pendulum closer to the origin.&lt;/p&gt; &lt;p&gt;For the second approach tested in this project, it is necessary to consider what an observable is meant to represent in a lifted linearization. As an additional piece of terminology, the function which is used to generate a given observable is referred to as an observable function &lt;d-cite key=&quot;brunton2021modern&quot;&gt;&lt;/d-cite&gt;. While it may be possible to use different sets of observable functions to linearize a given system, it is possible to find a set of observable functions that are analogous to a linear system’s eigenvectors. The evolution of these observables in time, referred to as Koopman eigenfunctions, is defined by an associated complex eigenvalue. Much like their eigenvector cousins, these eigenfunctions can provide useful information on how the system might evolve over time, including information on how the time evolution may vary spatially.&lt;/p&gt; &lt;p&gt;Based on this understanding of Koopman eigenfunctions, we are motivated to see if a DKN could be coaxed into more readily learning spatially-relevant observables. If we consider our system of interest, the pendulum with springs, we posit that different regions of state space would be primarily influenced by different eigenfunctions. In particular, the larger central region where the pendulum’s dynamics are independent of the springs may be expected to be affected by a set of eigenfunctions with a lower spatial frequency and a global relevance. That is, eigenfunctions which better represent the dynamics of the system averaged throughout the state space and which may be valid everywhere – even when the springs are engaged, the natural dynamics of the pendulum are still in effect. In contrast, the dynamics when the springs are engaged (each spring is active in a comparatively smaller region of state space) may rely heavily on a set of eigenfunctions that are only locally relevant.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/pend_statespace-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/pend_statespace-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/pend_statespace-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-croneillproposal/pend_statespace.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; On the left, a visualization of trajectories used to train the models for the pendulum with springs. Dotted vertical lines mark where the boundary between regions of state space where the springs are and are not engaged. On the right, we see the trajectories considered for the system when there are no springs. Note that the presence of the springs compress `squeeze&apos; the higher energy trajectories further away from the origin of the state space. &lt;/div&gt; &lt;p&gt;While I believe that this is an interesting thought, it is worth noting that this intuitive motivation is not necessarily backed up with a rigorous mathematical understanding. Nevertheless, we can empirically test whether the approach can lead to improved results.&lt;/p&gt; &lt;p&gt;In contrast to the curriculum learning approach, we have only a single set of data: that generated from a model of a pendulum with a spring stiffness of $k=1000$. Instead of the standard approach of DKN, where a larger number of observables is considered to (in general) allow for a system to be more easily linearized, we deliberately constrain the latent space dimension to be small. The intention is for this restriction to limit the number of observable functions that the model can represent, encouraging it to learn observables with a low spatial frequency and which are relevant across a larger region of state space. In our system of interest, this would be observable functions that represent the dynamics of the pendulum without the springs.&lt;/p&gt; &lt;p&gt;Once we have initially trained this smaller model, we use its encoder within a larger model. This initial encoder is kept fixed in future training processes so that it continues to represent the same set of observables. An additional encoder is then then in the larger model, with the goal being to learn additional observables capable of making up for the initial model’s deficiencies. If the initial model learned the low spatial frequency observables as hoped, then we would expect this additional encoder to learn observables that are more relevant in areas where the springs are exerting a force on the pendulum. In practice, we could see this as a particular form of curriculum learning where the complexity of the model is increased over time. A key difference here compared to traditional approaches is that instead of increasing the complexity of the model by adding layers depth-wise, we are effectively increasing the width of the model by giving it the ability to learn additional observables.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/model_arch-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/model_arch-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/model_arch-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-croneillproposal/model_arch.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The architecture used to train the combined model. A smaller, 10 observable model was first trained, before a larger model was then trained to learn an additional 40 observables. &lt;/div&gt; &lt;h2 id=&quot;the-model&quot;&gt;The Model&lt;/h2&gt; &lt;p&gt;To reduce the influence that other factors may have in the results of our experiments, I sought to minimize any changes to the overall structure of the DKNs being used, save for those being studied. Chief among these was the number of hidden layers in the network, the loss function being used, and the input. Other variables, such as the optimizer being used, the batch size, and the learning rate, were also kept as unchanged as feasible. The need to tune each of these other hyperparameters and the challenges in doing so are well-documented in the machine learning field, and as such I won’t spend any additional time describing the processes involved.&lt;/p&gt; &lt;p&gt;The general &lt;em&gt;encoder&lt;/em&gt; architecture of the networks being used was as follows, with $D_x$ being the number of states (2, in the case of the pendulum) and $D_e$ being the number of observables:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Layer&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Input Dimensions&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Output Dimensions&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Nonlinearity&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Linear&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;$D_x$&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;16&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;ReLU&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Linear&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;16&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;16&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;ReLU&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Linear&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;16&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;$D_e$&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;None&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;In addition to the encoder network, a linear layer was present to determine the time evolution of the observables. For this linear layer, the input and output dimensions were both D_e + D_x since our final set of observables always had the system’s states concatenated onto those learned by the encoder.&lt;/p&gt; &lt;p&gt;The loss function that I used was composed of two main components: a loss related to the time evolution of the observables being output by the encoder, and a loss related to the time evolution of the state variables. In the literature, additional loss terms are often included to help regularize the network during training. These were not found to be significant in the testing done for this report, however and so were excluded. Tests were also done with different weights between the state loss and the observable loss, with an equal balance between the two found to provide reasonable outcomes. Another hyperapameter that we needed to tune is for how many time steps to enforce a loss on the values predicted by the model. In this report, we stuck to 30 time steps although significant experimentation was not done to explore how varying this parameter may have affected the results. We did briefly look into whether having a weight on any of the loss terms which decayed over time would improve training and did not see any immediate benefits.&lt;/p&gt; &lt;p&gt;\(\mathrm{loss}=\mathrm{multistep\_loss\_state}+\mathrm{multistep\_loss\_observables}\) \(\mathrm{multistep\_loss\_state}=\sum^{30}_{t=1}\lvert\lvert(\psi(\textbf{x}_t)-K^t\psi(\textbf{x}_0))[:2]\rvert\rvert_{\mathrm{MSE}}\) \(\mathrm{multistep\_loss\_observables}=\sum^{30}_{t=1}\lvert\lvert(\psi(\textbf{x}_t)-K^t\psi(\textbf{x}_0))[2:]\rvert\rvert_{\mathrm{MSE}}\)&lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; The loss function ultimately used for each of the models considers the prediction error for both the state and the observables. $\psi$ represents the act of using the model&apos;s encoder and then concatenating the state as an additional pair of observables. $K$ represents the linear layer in the architecture used to model the time evolution of the lifted state. &lt;/div&gt; &lt;h2 id=&quot;analysis&quot;&gt;Analysis&lt;/h2&gt; &lt;h3 id=&quot;curriculum-learning&quot;&gt;Curriculum Learning&lt;/h3&gt; &lt;p&gt;The initial model for stiffness $k=0$ was trained on the simple pendulum dynamics for 600 epochs, and served as the pre-trained model for this approach. Subsequent models were each trained for 200 epochs with the Adam optimizer and a decaying learning rate scheduler. When analyzing the performance of these models, we looked at how the error for a set of trajectories not in the training set evolved over time.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/curriculum_results-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/curriculum_results-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/curriculum_results-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-croneillproposal/curriculum_results.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Performance of the model trained using curriculum learning after each of the learning stages. We observe that performance decreases over time, and that the original model trained when $k=0$ seems to perform the best. &lt;/div&gt; &lt;p&gt;By this metric, we observe the performance of the model gradually getting worse. While this on its own is not too surprising, the final model ends up performing significantly worse than a DKN with the equivalent number of observables trained from scratch. Interestingly, it looks like the final model is unstable, with the trajectories blowing up away from the origin. Looking into this, issues surrounding the stability of linearized models is not a new phenomenon in the field of Koopman linearizations. Prior works have proposed several methods to help alleviate this issue, such as by adding an addition term to the loss function which stabilizes the time-evolution matrix. While there was no time to implement this change for this report, it could be an interesting modification to attempt for future work.&lt;/p&gt; &lt;h3 id=&quot;learning-new-observables&quot;&gt;Learning New Observables&lt;/h3&gt; &lt;p&gt;While trying to gradually learn additional observables for the model, we started with a network that learned 10 observable functions and trained it for 600 epochs. Once this process was complete, an extended model learned an additional 40 observable functions for an additional 600 epochs. The end result was comparable in performance to a single aggregate model of 50 observables trained from scratch. The aggregate model did appear to specifically outperform our gradually trained model during the initial time steps, while slightly underperforming in comparison at the later time steps. This may be due to some differences in the stability of the two learned linear models, although further investigation would be needed to verify this. Part of the motivation for this method was the hope that the network would learn locally relevant observable functions. The learned observables were plotted on a grid to visualize them and see if this were the case, but not distinctive, qualitative features indicating that different observables were learned for different regions of state space.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/combined_results-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/combined_results-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/combined_results-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-croneillproposal/combined_results.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The combined model doesn&apos;t see any noteworthy improvement in performance when compared to the standard DKN approach. While not shown here, the combined model was found to be sensitive to how many observables were learned by each of its constituents. For example, having 30 observables in the first encoder and 20 in the second led to worse results. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/obs_visualization-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/obs_visualization-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-croneillproposal/obs_visualization-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-croneillproposal/obs_visualization.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Visualization of a pair of observables from the combined model, arbitrarily selected as the first observable from both encoder 1 (left) and encoder 2 (right). While only these two observables are shown here, plots for all 50 were produced. We noticed that observables from encoder 1 (the fixed model) tended to show `kinks&apos; around $\theta=+-30\degree$. This may indicate that it was learning to account for the presence of the springs. In contrast, encoder 2 (the extended model) learned observable functions that were generally smoother across state space. &lt;/div&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;In this project, we sought to test two modifications to a DKN training scheme on an example of a piecewise dynamical system. By using a curriculum learning process or gradually increasing the number of observable functions, we hypothesized that the DKN would show better performance than an aggregate model trained from scratch. Ultimately, we found that neither of the proposed methods led to significant improvements.&lt;/p&gt; &lt;p&gt;One of the potential causes of underperformance is the learned linear models’ instability. While this is a known issue regarding lifted linearization techniques &lt;d-cite key=&quot;ng2022learned, Mamakoukas2023Stable&quot;&gt;&lt;/d-cite&gt;, attempting to resolve the issue would require further work and additional study into how best to do so for this use case. The example model of a pendulum with springs could also have been chosen poorly. I opted to experiment with this system since it was physically meaningful, and I believed that it would be a simple toy model that wouldn’t require large models with extensive compute requirements. But observing the dramatic change in performance that occurred in the linear models simply through the addition of the springs made me wonder whether this system truly was as simple as I had initially made it out to be. It is possible that larger and more elaborate models with more observables and resources for training are necessary to learn an appropriate linearization.&lt;/p&gt; &lt;p&gt;It is also worth considering the severe limitations of this study, imposed upon it by the need to tune a wide variety of hyperparameters. Even in the process of creating a linear model for the simple pendulum, I observed a wide range of performance based upon how the cost function or learning rate were varied. While some effort was taken to tune these and other hyperparameters for the models I explored, this process was far from exhaustive.&lt;/p&gt; &lt;p&gt;Moreover, the proposed changes to the typical DKN architecture only served to add additional hyperparameters into the mix. What spring stiffnesses should be used during curriculum learning? Should the learning rate be decreased between different curriculums, or should the number of epochs be varied? How about the ratio of observables between the two models used in the second approach, is a 10:40 split really optimal? Some variations of these hyperparameters were considered during this project, but again an exhaustive search for optimal values was impossible.&lt;/p&gt; &lt;p&gt;This means that there is a chance that I simply used the wrong selection of hyperparameters to see better performance from the tested approaches, it highlights the sensitivity that I observed in the performance of the DKNs. Even beyond the considerations described thus far, there are further considerations that can impact the structure and performance of learned linearizations. Some approaches augment the state variables with time-delayed measurements, for example. In other cases, the state variables are not included as observables and are instead extracted using a decoder network. This latter case is of particular interest, since recent work in the field has identified that certain types of nonlinear systems are impossible to linearize with a set of observables that include the states.&lt;/p&gt; &lt;p&gt;Ultimately, while the experiments in this project didn’t agree with my hypothesis (and resulted in some underwhelming predictive performance) I gained a newfound appreciation for the process of training these models along the way.&lt;/p&gt; </content> </entry> <entry> <title>Sparse Autoencoder Universality - Under What Conditions are Learned Features Consistent?</title> <link href="https://deep-learning-mit.github.io/blog/2023/universal-features/"/> <updated>2023-12-10T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/universal-features</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Neural networks are black boxes. We understand the process by which they are created, but just as understanding the principle of evolution yields little insight into the human brain, designing a model’s optimization process yields little insight into how that model reasons. The field of mechanistic interpretability attempts to understand how human-understandable concepts combine within a model to form its output. With sufficiently good interpretability tools, we could ensure reasoning transparency and easily find and remove harmful capabilities within models (such as hallucinations) &lt;d-cite key=&quot;marks2023geometry&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;In 2022, Anthropic identified a core challenge in interpreting a model’s reasoning layer-by-layer: polysemanticity, a phenomenon in which a single neuron activates for many different concepts (e.g. academic citations, English dialogue, HTTP requests, and Korean text). This is a result of a high-dimensional space of concepts (‘features’) being compressed into the lower-dimension space of the neural network &lt;d-cite key=&quot;Elhage2022-wh&quot;&gt;&lt;/d-cite&gt;. Sparse autoencoders, a form of dictionary learning, help to linearly disentangle polysemantic neurons into interpretable features &lt;d-cite key=&quot;bricken2023monosemanticity&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Sparse autoencoders work by projecting a single layer of a neural network into a higher-dimension space (in our experiments, we train autoencoders ranging from a 1:1 projection to a 1:32 projection) and then back down to the size of the original layer. They are trained on a combination of reconstruction loss, their ability to reconstruct the original input layer, and a sparsity penalty, encouraging as many weights as possible to be 0 while retaining good performance &lt;d-cite key=&quot;cunningham2023sparse&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;h2 id=&quot;setup&quot;&gt;Setup&lt;/h2&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/image%20(2)-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/image%20(2)-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/image%20(2)-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/image%20(2).png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;(https://www.alignmentforum.org/posts/wqRqb7h6ZC48iDgfK/tentatively-found-600-monosemantic-features-in-a-small-lm)&lt;/p&gt; &lt;p&gt;The intuition behind sparse autoencoders is that if each neuron in the input layer learns n features, then projecting to n dimensional space while retaining all the information from the input layer should theoretically leave us with one feature represented in each encoded neuron. Then, these neurons should all be monosemantic, meaning they should each represent one interpretable concept. Because the columns of the decoder matrix tell us how these encoded neurons linearly combine to recreate the input layer, each column of the decoder matrix represents one feature of the network (in other words, what linear combination of neurons represents an individual concept). &lt;d-cite key=&quot;cunningham2023sparse&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;However, because sparse autoencoders were only popularized as an interpretability method earlier this year by Anthropic, the literature on them is, for lack of a better word, sparse. In particular, we were curious about whether the features learned by sparse autoencoders are universal. In other words, we’d like to know if the learned features are similar regardless of variables like autoencoder size, model size, autoencoder training set, and model training set. If they are, it shows both that sparse autoencoders consistently extract the correct features and that learned features are similar across different model sizes and training sets. If they aren’t, it would be evidence that sparse autoencoders don’t accurately capture the full scope of features a model represents and that we cannot easily transfer them across different models.&lt;/p&gt; &lt;p&gt;In our experiments, we train autoencoders of projection ratios ranging from 1:1 to 1:32 on five different Pythia models: 70m, 160m, 410m, 160m deduped, and 410m deduped. In some cases, we exclude data from Pythia 410m because running experiments on it was too computationally expensive. We train on the first four layers of each model to provide additional insight into how the efficacy of autoencoders changes as one moves deeper into the model. We also train autoencoders on two different datasets from the same distribution to test whether the learned features change in response to small perturbations in training order or distribution. Together, these models let us answer a few broad questions surrounding the consistency of learned features:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Do learned features consistently transfer between different model sizes and training datasets?&lt;/li&gt; &lt;li&gt;Are learned features consistent across different autoencoder sizes?&lt;/li&gt; &lt;li&gt;Do sparse autoencoders learn interpretable features less consistently in later layers where reasoning may become more abstract or hard to follow?&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;These meta-level questions build on Anthropic’s feature-extraction process outlined below:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/image%20(3)-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/image%20(3)-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/image%20(3)-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/image%20(3).png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;(This image is from Cunningham et. al &lt;d-cite key=&quot;cunningham2023sparse&quot;&gt;&lt;/d-cite&gt;)&lt;/p&gt; &lt;p&gt;To answer these questions, we use the following three metrics in a variety of comparisons:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Mean cosine similarity (MCS) between decoder weights – since the columns of the decoder matrix represent the features, we can use them to measure the similarity of the learned features. To compare two decoders, we start by taking the mean cosine similarity between the first column in the first decoder and every column in the second decoder. Because the decoders might learn features in different orders, we take the maximum of these similarities. We repeat this process for every column in the first decoder, and then we take the average similarity across the columns.&lt;/li&gt; &lt;li&gt;Correlation between activation vectors of encoded layers – another way of inspecting the features learned by a sparse autoencoder is to examine when different neurons in the encoded layer activate on different types of token. So, to compare two autoencoders, we pass over 10,000 tokens of text through their respective models and save vectors representing each encoded neuron’s activations across those tokens. Then, as with mean cosine similarity, we took the maximum correlation between a neuron in the first encoder and any neuron in the second encoder, and then averaged these values across every neuron. If two encoders typically had the same neurons activating for the same tokens, this is strong evidence that the encoders learned similar features.&lt;/li&gt; &lt;li&gt;Feature frequency of an autoencoder – because neurons in encoded layers are intended to represent specific individual concepts, we expect them to activate much less than typical neurons in a neural network. We used this metric both as a way of verifying that our autoencoders are working as intended and as a way of evaluating how easily autoencoders are able to learn monosemantic features as we vary other parameters. To create feature frequency plots, we pass over four million tokens through the model and plot the frequency with which a feature activates (usually around once every 10-1000 tokens) against the number of features which had that frequency.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Specifically, we ran the following experiments:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;On the question of whether learned features consistently transfer between different model sizes and training datasets: we created feature frequency plots, tables of correlations, and MCS graphs to contrast different model sizes along with deduped and original models.&lt;/li&gt; &lt;li&gt;On the question of whether learned features are consistent across different autoencoder sizes: we created feature frequency plots, MCS tables, and graphs of pairwise activation correlations and MCS to contrast features learned by different autoencoder sizes.&lt;/li&gt; &lt;li&gt;On the question of whether sparse autoencoders learn interpretable features less consistently in later layers where reasoning may become more abstract or hard to follow: we create feature frequency plots contrasting learned feature frequencies at different layers throughout Pythia 70m and Pythia 160m.&lt;/li&gt; &lt;/ul&gt; &lt;h1 id=&quot;experiments-and-results&quot;&gt;Experiments and Results&lt;/h1&gt; &lt;p&gt;We ran baselines for both MCS and correlations by taking the corresponding measurement between autoencoders trained on two different layers as well as randomly initialized weights. For MCS, the baseline was around 0.15 and was always below 0.20 in our experiments. For correlations, random measured to be about .40.&lt;/p&gt; &lt;h2 id=&quot;training-and-evaluating-sparse-autoencoders&quot;&gt;Training and evaluating sparse autoencoders&lt;/h2&gt; &lt;p&gt;We trained a range of sparse autoencoders on the activations at the output of the MLP of various Pythia models. We used 100 million tokens of web text, from a HuggingFace dataset to train each autoencoder. As seen from the loss curve, this is likely over training. We spent some time fine-tuning the hyperparameters and conferred with other researchers who have trained similar autoencoders. You can see from our loss curve that we are likely over training. Since we are partially optimizing for reconstruction loss, we did not expect the quality of the model to decrease on test sets significantly. We ran our model with and without the sparse autoencoder or a small dataset and saw the perplexity go up from 25 to 31, which we were content with. However, there is a lot of room left for improvement to get better sparse autoencoders.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/W&amp;amp;B%20Chart%2012_12_2023,%2011_09_59%20PM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/W&amp;amp;B%20Chart%2012_12_2023,%2011_09_59%20PM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/W&amp;amp;B%20Chart%2012_12_2023,%2011_09_59%20PM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/W&amp;amp;B%20Chart%2012_12_2023,%2011_09_59%20PM.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;(total loss curve of an 1:8 autoencoder trained on Pythia-70m)&lt;/p&gt; &lt;h2 id=&quot;do-learned-features-consistently-transfer-between-different-model-sizes-and-training-datasets&quot;&gt;Do learned features consistently transfer between different model sizes and training datasets?&lt;/h2&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/frequency_plot-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/frequency_plot-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/frequency_plot-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/frequency_plot.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Activation frequencies are distributed roughly symmetrical around 0.01, meaning that the modal encoded neuron activated around once every one hundred tokens. This is solid evidence that our sparse autoencoders were effectively learning sparse, monosemantic representations. If a neuron was only needed every one hundred tokens to reconstruct the input, it likely represents a very specific concept rather than many concepts all at once. We see no clear trend when varying model size, demonstrating that this does not have much effect on an autoencoder’s ability to extract monosemantic features.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The table above measures the correlations between 1:8 autoencoders trained on layer 0 of three different model sizes. You can see that autoencoders trained on models closer in size have a higher correlation factor of their features, suggesting that smaller autoencoders may not store some of the features that large autoencoders do.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Deduped%20vs%20non-deduped-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Deduped%20vs%20non-deduped-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Deduped%20vs%20non-deduped-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/Deduped%20vs%20non-deduped.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The above graph shows the MCS between autoencoders trained on deduped vs regular models. We anticipated the MCS of these models to be fairly high, but these were some of the lowest results we have seen, with autoencoders trained on layer 0 (of any of the three models we looked at) being around .4. Notably, all of our MCS were above .15 which was our baseline.&lt;/p&gt; &lt;h2 id=&quot;are-learned-features-consistent-across-different-autoencoder-sizes-and-training-datasets&quot;&gt;Are learned features consistent across different autoencoder sizes and training datasets?&lt;/h2&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/frequency_plots_by_layer.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Sparsity tends to increase when the projection ratio increases, which makes sense, as a larger layer needs to use each neuron less often. This is evidence that our autoencoders are not learning all possible features, and using even larger autoencoders would allow us to unpack more features.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Four%20AE%20sizes%20on%20Pythia-70m%20MCS-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Four%20AE%20sizes%20on%20Pythia-70m%20MCS-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Four%20AE%20sizes%20on%20Pythia-70m%20MCS-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/Four%20AE%20sizes%20on%20Pythia-70m%20MCS.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The above table looks at MCS loss of different sized autoencoders on Pythia 70m. Interestingly, we observed that MCS between autoencoders whose dimensions have the same ratio (e.g. 4:8 vs 8:16) are similar (e.g. both are .870.)&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_Graph_70m.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m.png/7.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes.png&quot; class=&quot;img-fluid z-depth-2&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m.png/7.jpg&quot; class=&quot;img-fluid z-depth-2&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Activation correlations and MCS were very high for all autoencoder projection ratios, demonstrating that different size autoencoders learn very similar features. Note that all metrics were lower for the autoencoders with a 1:1 projection ratio, likely because they were penalized on sparsity while not having any additional space with which to represent concepts. This means the total information they could retain was likely much less than the other sizes. We see a slight upward trend as autoencoder projection ratio increases, which is small enough that it could probably be chalked up to the exact mean-max methodology used in the calculations. In the MCS graphs, the orange line represents mean-max MCS going from the smaller projection size to the larger projection size, where the blue line is the inverse. It is positive evidence that the blue line is much lower, because we should expect the most important features to correlate strongly with some of the features learned by the larger autoencoder, while the many features learned by the larger autoencoder should not all necessarily have a match in the smaller one.&lt;/p&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;In this post, we explored the potential of sparse autoencoders as tools for interpreting neural networks, particularly focusing on their capability to disentangle polysemantic neurons into interpretable, monosemantic features. Our experiments, conducted on various configurations of Pythia models and sparse autoencoders, aimed to understand the consistency and universality of the features extracted by these autoencoders across different model sizes, training datasets, and autoencoder dimensions.&lt;/p&gt; &lt;p&gt;Our findings indicate that sparse autoencoders are indeed effective in learning sparse, monosemantic representations. This effectiveness is observed across different model sizes and is not significantly impacted by the size of the model, suggesting a level of universality in the features extracted. However, our results also reveal that the correlation between features tends to be higher in autoencoders trained on models closer in size, hinting at some limitations in the transferability of learned features across vastly different model scales.&lt;/p&gt; &lt;p&gt;Interestingly, we observed a tendency towards increased sparsity in the representations as we moved into the later layers of the network. This suggests that higher-level concepts in these layers might be more specialized and interpretable, aligning with intuitive expectations about neural networks.&lt;/p&gt; &lt;h2 id=&quot;limitations&quot;&gt;Limitations&lt;/h2&gt; &lt;p&gt;Limitations of sparse autoencoders include that they are extremely computationally intensive, especially if one wants to interpret multiple layers of a network, neural networks are not entirely human-interpretable to begin with, so their learned features will never quite represent human concepts, and all the metrics we use to analyze them rely on overall trends rather than individual features, so despite our ability to provide evidence to help answer broad questions, our analysis is still very imprecise.&lt;/p&gt; &lt;h2 id=&quot;future-work&quot;&gt;Future Work&lt;/h2&gt; &lt;p&gt;One future direction is focussing on training better sparse autoencoders, ones with lower reconstruction and sparsity loss. Given that we did not optimize our project for this and were limited by time and compute, it is very possible that better sparse autoencoders can improve our results.&lt;/p&gt; &lt;p&gt;It would also be interesting to train the same sparse autoencoder architectures on different datasets and see whether they are invariant to small perturbations in the dataset. If not, it’s evidence that the method may not work as well as we hope.&lt;/p&gt; &lt;p&gt;Finally, we could start to look at the features that the autoencoders are finding. We were able to measure similarity and correlations but did not have the time to look at the actual concepts that the representations were finding. This could give us additional insight into similarities between models that we currently are overlooking.&lt;/p&gt; &lt;h2 id=&quot;acknowledgements&quot;&gt;Acknowledgements&lt;/h2&gt; &lt;p&gt;Special thanks to Sam Marks for suggesting the initial experiment ideas and to &lt;a href=&quot;https://www.mitalignment.org/&quot;&gt;MIT AI Alignment&lt;/a&gt; for providing connections with mentorship and compute resources.&lt;/p&gt; </content> </entry> <entry> <title>Optimizations of Transformers for Small-scale Performance</title> <link href="https://deep-learning-mit.github.io/blog/2023/diaz-proposal/"/> <updated>2023-12-10T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/diaz-proposal</id> <content type="html">&lt;div class=&quot;text-center&quot;&gt; &lt;a name=&quot;figure-1&quot;&gt;&lt;/a&gt; &lt;img src=&quot;https://discuss.tensorflow.org/uploads/default/original/2X/4/44b54935a57a92b71902d81265e9bc3c6d99fb12.gif&quot; width=&quot;700&quot; height=&quot;250&quot; /&gt; &lt;p class=&quot;caption&quot;&gt; Figure 1: Attention Maps of a Vision Transformer (DINO). Source: &lt;a href=&quot;https://github.com/sayakpaul/probing-vits&quot;&gt;https://github.com/sayakpaul/probing-vits &lt;/a&gt;. &lt;/p&gt; &lt;/div&gt; &lt;h2 id=&quot;transformers-great-but-fall-short&quot;&gt;Transformers: Great but fall short&lt;/h2&gt; &lt;h3 id=&quot;basic-background&quot;&gt;Basic Background&lt;/h3&gt; &lt;p&gt;Transformers have well-earned their place in deep learning. Since the architecture’s introduction in&lt;d-cite key=&quot;AttentionIsAllYouNeed&quot;&gt;&lt;/d-cite&gt;, we have seen huge improvements in our model’s capabilities. The most notable of which being natural language processing (NLP) with large-language models such as GPT-4 stunning the world at-large.&lt;/p&gt; &lt;p&gt;Originally designed for NLP, the transformer architecture has been robust in other domains and tasks. For example, it has been translated, with success, to de-novo protein design&lt;d-cite key=&quot;Grechishnikova2021&quot;&gt;&lt;/d-cite&gt;, the medical field&lt;d-cite key=&quot;Hu2022&quot;&gt;&lt;/d-cite&gt;, and, of most relevance, computer vision&lt;d-cite key=&quot;Dosovitskiy2020&quot;&gt;&lt;/d-cite&gt;. This behaviour differs from architectures of the past like RNNs and CNNs which have been limited to one domain. The potent generalizability of the transformer lies within the self-attention mechanism. Without getting to much into detail, self-attention enables nodes within a neural network to probe the input sequence, determine what is most interesting, and attend towards the region of interest by dynamically updating its weights. Visualization of attention can be seen in &lt;a href=&quot;#figure-1&quot;&gt;Figure 1&lt;/a&gt;. By probing the data landscape, the architecture enables long-range dependencies to be modeled regardless of distance. From a Fourier perspective, the transformer caters towards the low-frequency information in the data and deciphers how each element of an input sequence all relate to each other&lt;d-cite key=&quot;Wang2022&quot;&gt;&lt;/d-cite&gt;. These connections help the transformer accurately model global information in the data perhaps indicating why they are so powerful. In this blog, we will specifically examine the transformer in vision, determine how it can be improved, and evaluate new strategies to increase its viability on small datasets.&lt;/p&gt; &lt;div class=&quot;col-sm text-center&quot;&gt; &lt;a name=&quot;figure-2&quot;&gt;&lt;/a&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/vit_workflow-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/vit_workflow-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/vit_workflow-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-diaz-proposal/vit_workflow.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2: ViT workflow. &lt;/div&gt; &lt;/div&gt; &lt;h3 id=&quot;vision-the-problem&quot;&gt;Vision: The Problem&lt;/h3&gt; &lt;p&gt;The Vision Transformer (ViT)&lt;d-cite key=&quot;Dosovitskiy2020&quot;&gt;&lt;/d-cite&gt; introduced the transformer to the computer vision world in late 2020. The ViT is simple: it funnels image patches into a tokenization scheme, adds positional encoding, and feeds these tokens into a transformer block. A graphical workflow of the ViT from the original paper can be seen in &lt;a href=&quot;#figure-2&quot;&gt;Figure 2&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;Since its introduction, the ViT and associated variants have demonstrated remarkable benchmarks in image classification&lt;d-cite key=&quot;Liu2021&quot;&gt;&lt;/d-cite&gt;, image restoration&lt;d-cite key=&quot;Liang2021&quot;&gt;&lt;/d-cite&gt;, and object detection&lt;d-cite key=&quot;Li2022&quot;&gt;&lt;/d-cite&gt;. Much of these new methods can compete and even outperform long-established CNNs. However, ViTs are data-hungry requiring extensive amounts of training data to surpass CNNs. In small scale training, ViTs are burdensome to train and achieve sub-par performance compared to their CNNs counterparts&lt;d-cite key=&quot;Naimi2021&quot;&gt;&lt;/d-cite&gt;. In &lt;d-cite key=&quot;Zhu2023&quot;&gt;&lt;/d-cite&gt;, they investigate this discrepancy by comparing the feature and attention maps of small-scale CNNs and ViTs, respectively. The authors determine the ViT lacks the ability to learn local information and has ill-suited representation capacity in the lower layers. In contrast, the CNN demonstrate remarkable inductive bias due to weight sharing and locality properties which enable high-frequency modeling&lt;d-cite key=&quot;Park2022&quot;&gt;&lt;/d-cite&gt;. The ViT’s low-frequency and the CNNs high-frequency capacity has initiated a wave of new models aimed at combining the two for comprehensive modeling capability&lt;d-cite key=&quot;Si2022&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;ConvViT&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Despite the complementary nature of these architectures, they break the fidelity of the transformer and make for difficult analysis. Therefore, there exists a gap in the traditional transformer architecture to perform in small-data regimes, particularly in vision. Motivated by this shortcoming, we aim to investigate and improve the current ViT paradigm to narrow the gap between CNNs and ViTs on small-data. In particular, we examine novel initialization schemes, removal of component parts in our transformer block, and new-learnable parameters which can lead to better performance, image throughput, and stable training on small-scale datasets.&lt;/p&gt; &lt;div class=&quot;col-sm text-center&quot;&gt; &lt;a name=&quot;figure-3&quot;&gt;&lt;/a&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-diaz-proposal/transformer.svg&quot; class=&quot;img-fluid rounded z-depth-1&quot; style=&quot;width: 300px;&quot; /&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3: Standard transformer encoder block. Encoder can be stacked for x amount of layers. &lt;/div&gt; &lt;/div&gt; &lt;h3 id=&quot;transformer-block&quot;&gt;Transformer Block&lt;/h3&gt; &lt;p&gt;To serve as a basis of comparison, we will examine the stanford transformer block seen in &lt;a href=&quot;#figure-3&quot;&gt;Figure 3&lt;/a&gt;. The block is identical to &lt;d-cite key=&quot;AttentionIsAllYouNeed&quot;&gt;&lt;/d-cite&gt; with the exception of using layer normalizations before the multi-headed attention (MHA) and multi-level perceptron (MLP) blocks as opposed to after. In practice, this placement has been shown to be more stable and increase performance&lt;d-cite key=&quot;Liu2020&quot;&gt;&lt;/d-cite&gt;. With the exception of this modification, the block has seen little improvements over the years testifying to its robustness. However, recent trends in theory hints towards ways we could break this notion – all while enjoying increased performance.&lt;/p&gt; &lt;p&gt;Before we delve into these advances and their implications, consider the following transformer block information flow:&lt;/p&gt; \[\displaylines{ \text{Attention} = \text{A}(X) = \text{Softmax}\Biggl(\frac{XW_{Q}W_{K}^{T}X^{T}}{\sqrt{k}}\Biggl) \\ \\ \text{A}(X) \in \mathbb{R}^{T\times T}}\] &lt;p&gt;which is shortly followed by:&lt;/p&gt; \[\displaylines{ \text{S}(X) = \text{A}(X)W_{V}W_{O} \\ \\ \text{S}(X) \in \mathbb{R}^{T\times d} }\] &lt;p&gt;and:&lt;/p&gt; \[\text{Output} = \text{MLP}(\text{S}(X))= \text{Linear}(\text{GELU}(\text{Linear}(\text{S}(X))))\] &lt;p&gt;where:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Embedded input sequence: \(X \in \mathbb{R}^{T \times d}\)&lt;/li&gt; &lt;li&gt;Linear queury and key layers: \(W_{Q},W_{K} \in \mathbb{R}^{d \times k}\)&lt;/li&gt; &lt;li&gt;Linear value and projection layers: \(W_{V}, W_{O} \in \mathbb{R}^{d \times d}\)&lt;/li&gt; &lt;li&gt;MLP Linear layers: \(\text{Linear} \in \mathbb{R}^{d \times d}\)&lt;/li&gt; &lt;li&gt;\(T =\) # of tokens, \(d =\) embedding dimension, \(k = \frac{d}{H}\), \(H =\) # of attention heads&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The flow of information mirrors the transformer block in &lt;a href=&quot;#figure-3&quot;&gt;Figure 3&lt;/a&gt;. Readers unfamiliar with transformer intricacies such as MHA and MLPs are encouraged to read&lt;d-cite key=&quot;AttentionIsAllYouNeed&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Recently, there have been many proposals on how the transformer block can be further modified to increase data throughput and eliminate “redundant” or “useless” parts that do not have any significant contribute to the tranformer’s modeling capabilities. For example, &lt;d-cite key=&quot;2302.05442&quot;&gt;&lt;/d-cite&gt;, used a parallel MHA and MLP incorporated into a large-scale ViT for stable and efficient training. Throughout this blog, we will focus on the ideas overviewed and proposed by &lt;d-cite key=&quot;He2023&quot;&gt;&lt;/d-cite&gt; as they present intriguing results and a synthesis on the current state of this research topic. The interested reader is encouraged to study their paper for a more extensive understanding of the ideas.&lt;/p&gt; &lt;div class=&quot;col-sm text-center&quot;&gt; &lt;a name=&quot;figure-4&quot;&gt;&lt;/a&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/simplified_block-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/simplified_block-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/simplified_block-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-diaz-proposal/simplified_block.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 4: Comparison between trasnformer architectures. &lt;em&gt;Left&lt;/em&gt;: Standard block as shown in Figure 3. &lt;em&gt;Bottom Right&lt;/em&gt;: Parallel block proposed in. &lt;em&gt;Top Right&lt;/em&gt;: Newly proposed encoder. &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The overaching theme of &lt;d-cite key=&quot;He2023&quot;&gt;&lt;/d-cite&gt; was to take the standard trasnformer block and evaluate the necessity of each component. In doing so, they removed each component part and studied its effects on performance. Understandably, blindly removing components will lead to unstable training and ill-performance (i.e. if one were to remove the skip connnections, they would encounter vanishing gradients as seen &lt;a href=&quot;#figure-14&quot;&gt;Figure 14&lt;/a&gt;). However, &lt;d-cite key=&quot;He2023&quot;&gt;&lt;/d-cite&gt; took the approach of removal combined with recovery. For example, when the authors removed skip connections, they required a modification to the self-attention matrix of the form:&lt;/p&gt; \[\text{A}(X) \leftarrow (\alpha\text{I} + \beta \text{A}(X))\] &lt;p&gt;where \(\alpha\) and \(\beta\) are learnable scalars and intialized to \(1\) and \(0\), respectively, and \(\text{I} \in \mathbb{R}^{T \times T}\) is the identity matrix. This modification intiailizes the self-attention matrix providing a pathway towards training stability. They further entertained a more complicated scheme with a third parameter, but we only consider the two parameter version for simplicity. By this iterative removal and recovery process, the authors converged towards the final transformer block seen in &lt;a href=&quot;#figure-4&quot;&gt;Figure 4&lt;/a&gt;. The most shocking aspect of this proposed block is the removal of the \(W_{V}\) and \(W_O\) layers. They arrived to this justification by initialializing \(W_{V}\) and \(W_{O}\) to the identity with separate, learnable scalars and training a model. Over the course of training, the scalar ratios converged towards zero&lt;d-footnote&gt;This is a slight simplification. Look at Section 4.2 and Figures 4 and 20 in He et. al 2023 for a more detailed explanation.&lt;/d-footnote&gt;. Due to the heavy cost and speed these linear layers present, removal of them decreases parameters counts and enables more data throughput. A concise PyTorch interpretation of the new block can be seen below:&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt; &lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch.nn&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;as&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;class&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;ShapedAttention&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Module&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;width&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt; &lt;span class=&quot;nb&quot;&gt;int&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;n_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt; &lt;span class=&quot;nb&quot;&gt;int&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_heads&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt; &lt;span class=&quot;nb&quot;&gt;int&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;super&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;().&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Determining if hidden dimension of attention layer is divisible by number of heads &lt;/span&gt; &lt;span class=&quot;k&quot;&gt;assert&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;width&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;%&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_heads&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;==&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;Width and number of heads are not divisble.&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Setting vars &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;head_dim&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;n_hidden&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;//&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_heads&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_heads&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_heads&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Creating Linear Layers &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;W_K&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;width&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;head_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;W_Q&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;width&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;head_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Learnable Scalars: alpha_init and beta_init are up to user &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;alpha&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Parameter&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;alpha_init&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;beta&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Parameter&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;beta_init&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Softmax &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;softmax&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Softmax&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dim&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;forward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Tensor&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&amp;gt;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Tensor&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Input: &lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# x: shape (B x T x dim) &lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Outputs: &lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# attn_output: shape (B x T x width) &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;bp&quot;&gt;None&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Compute keys and queries &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;k&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;W_K&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;q&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;W_Q&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Scaled dot-product &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_scores&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;bmm&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;q&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;k&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;transpose&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;head_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;**-&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;0.5&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_scores&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;softmax&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;attn_scores&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Shaped attention &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;B&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;T&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;_&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;shape&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;alpha&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;eye&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;T&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;device&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;device&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;beta&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_scores&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;The performance of the final transformer block (referred to as SAS-P) demonstrated powerful results. In the &lt;a href=&quot;#figure-5&quot;&gt;Figure&lt;/a&gt;, the simplified transformer matches the standard block in cross-entropy loss even when taken through a long runtime. Additionally, Figure 6 in &lt;d-cite key=&quot;He2023&quot;&gt;&lt;/d-cite&gt; demonstrates the model scales better with depth which is of paramount importance in modern neural network.&lt;/p&gt; &lt;div class=&quot;col-sm text-center&quot;&gt; &lt;a name=&quot;figure-5&quot;&gt;&lt;/a&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/fig5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/fig5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/fig5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-diaz-proposal/fig5.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 5: Training speed experiment. Figure 6. in Ref. 15. Pre-LN is the standard transformer block. SAS-P is the block. It is shown with and without an initial layer normalization. &lt;/div&gt; &lt;/div&gt; &lt;h2 id=&quot;translation-to-vision-experimentation-and-analysis&quot;&gt;Translation to Vision: Experimentation and Analysis&lt;/h2&gt; &lt;p&gt;The results shown in &lt;d-cite key=&quot;He2023&quot;&gt;&lt;/d-cite&gt; show promise the transformer can be improved. Motivated by vision applications, we seek to implement such infrastructure, with slight modifications, and determine if it improves performance in small datasets.&lt;/p&gt; &lt;h3 id=&quot;vanilla-vs-simplified-comparison&quot;&gt;Vanilla vs. Simplified Comparison&lt;/h3&gt; &lt;p&gt;For evaluation, we compare the simplified transformer to a vanilla ViT. The vanilla ViT’s transformer block is identical to the formulation presented earlier. We use Conv2D patch embedding with a random initial positional embedding. For the simplified setup, we initialize \(\alpha = \beta = 0.5\) and do not use a centering matrix – although it has been shown to improve ViT performance&lt;d-cite key=&quot;2306.01610&quot;&gt;&lt;/d-cite&gt;. We use one Layer Normalization just prior to the transformer encoder. \(\text{Width} = 96\) is kept uniform throughout the model. The dataset is CIFAR-10 with a \(\text{batch size} = 256\). Data augmentations were random horizontal and vertical flips with 15º random rotations. Optimizer is AdamW with \(\text{lr} = 0.003\) and \(\text{weight decay} = 0.01\). We employ a cosine learning rate scheduler to maintain consistency with ViT literature, although &lt;d-cite key=&quot;He2023&quot;&gt;&lt;/d-cite&gt; empirically showed a linear learning rate is slightly advantegeous&lt;d-footnote&gt;Figure 11 in He et. al 2023.&lt;/d-footnote&gt;. We ran our model for \(\text{epochs} = 50\) with \(\text{runs} = 3\) to evalute run-to-run stability. A condensed version of the experiment choices can be seen in Table 1. The results can be seen in &lt;a href=&quot;#figure-6&quot;&gt;Figure 6&lt;/a&gt; and Table 2&lt;d-footnote&gt;To avoid clutter, only the training accuracies vs epochs are shown. Loss curves showed similar relationships.&lt;/d-footnote&gt;.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Table 1. Experiment 1: ViT Model Settings&lt;/th&gt; &lt;th&gt; &lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;# of channels&lt;/td&gt; &lt;td&gt;3&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Image size&lt;/td&gt; &lt;td&gt;32&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Patch size&lt;/td&gt; &lt;td&gt;4&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Width&lt;/td&gt; &lt;td&gt;96&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;# of heads&lt;/td&gt; &lt;td&gt;4&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;# of layers&lt;/td&gt; &lt;td&gt;8&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;l-page&quot; style=&quot;display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;&quot;&gt; &lt;a name=&quot;figure-6&quot;&gt;&lt;/a&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-diaz-proposal/exp1_w96.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;500px&quot; width=&quot;70%&quot;&gt;&lt;/iframe&gt; &lt;div class=&quot;caption&quot; style=&quot;margin-top: 10px; text-align: center;&quot;&gt; Figure 6. Comparison between vanilla and simplified transformers. Width = 96. Layers/Depth = 8. &lt;/div&gt; &lt;/div&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Table 2. Experiment 2: Results&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Vanilla&lt;/th&gt; &lt;th style=&quot;text-align: right&quot;&gt;Simplified&lt;/th&gt; &lt;th style=&quot;text-align: right&quot;&gt;\(\Delta\)&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Parameters&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;358186&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;209210&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;-41.59%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Avg. epoch time (s)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;12.954&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;11.305&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;-12.73%&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Experiment 1 showed the training evaluation trajectory is nearly identicable between the two models although the simplified outperforms by small margin. Although the subtle difference, it is noteworthy to mention the simplified version achieved mirroring performance with less parameters and higher image throughput. The similarity of the curves hints the removal of the skip connections, layer normalizations, and value/projection layers were merited, begging the question whether these components held our modeling power back.&lt;/p&gt; &lt;p&gt;This experimentation shows the similar nature of each model, but does not translate well to wider modern neural networks. In Experiment 2, we expanded to \(\text{width} = 128\) to determine if there is any emergent behaviour as the network becomes wider. We replicate everything in Experiment 1 and solely modify the width. The settings are restated in Table 3. The results for Experiment 2 can be seen in &lt;a href=&quot;#figure-7&quot;&gt;Figure 7&lt;/a&gt; and Table 4 below.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Table 3&lt;/th&gt; &lt;th&gt;Experiment 2: ViT Model Settings&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;# of channels&lt;/td&gt; &lt;td&gt;3&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Image size&lt;/td&gt; &lt;td&gt;32&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Patch size&lt;/td&gt; &lt;td&gt;4&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Width&lt;/td&gt; &lt;td&gt;128&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;# of heads&lt;/td&gt; &lt;td&gt;4&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;# of layers&lt;/td&gt; &lt;td&gt;8&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;l-page&quot; style=&quot;display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;&quot;&gt; &lt;a name=&quot;figure-7&quot;&gt;&lt;/a&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-diaz-proposal/exp2_w128.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;500px&quot; width=&quot;70%&quot;&gt;&lt;/iframe&gt; &lt;div class=&quot;caption&quot; style=&quot;margin-top: 10px; text-align: center;&quot;&gt; Figure 7. Comparison between vanilla and simplified transformers. Width = 128. Layers/Depth = 8. &lt;/div&gt; &lt;/div&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Table 4. Experiment 2: Results&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Vanilla&lt;/th&gt; &lt;th style=&quot;text-align: right&quot;&gt;Simplified&lt;/th&gt; &lt;th style=&quot;text-align: right&quot;&gt;\(\Delta\)&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Parameters&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;629130&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;364954&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;-41.99%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Avg. epoch time (s)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;13.093&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;11.735&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;-10.37%&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;The narrative is different for Experiment 2. The simplified version outperforms the vanilla version by a considerable margin. An adequate explanation for this discrepancy in vision tasks merits further exploration. However, considering the proposed unnecessary nature of the value and projection matrices, we can hypothesize they interfere with the modeling capability as more parameters are introduced.&lt;/p&gt; &lt;p&gt;Due to the sheer difference in outcomes between the models, we question how the models are attending towards various inputs to gain a better understanding of what is happening under the hood. To probe this curiosity, we trained the models with identical setting in Experiment 2, but modified the \(\text{depth} = \text{layers} = 12\). This model setup will be covered in more detail in future paragraphs. We inputted CIFAR-10 to each model and visualized a side-by-side comparison of attention maps for five input images. An interactive figure is seen &lt;a href=&quot;#figure-8&quot;&gt;Figure 8&lt;/a&gt;.&lt;/p&gt; &lt;div class=&quot;l-page&quot; style=&quot;display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;&quot;&gt; &lt;div style=&quot;display: flex; flex-direction: column; align-items: center;&quot;&gt; &lt;a name=&quot;figure-8&quot;&gt;&lt;/a&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-diaz-proposal/attention_maps.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;70%&quot;&gt;&lt;/iframe&gt; &lt;div class=&quot;caption&quot; style=&quot;margin-top: 10px; text-align: center;&quot;&gt; Figure 8. Comparison between vanilla and simplified attention maps. Width = 128. Layers/Depth = 12. Interpolation method: &quot;nearest&quot;. &lt;/div&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;There is a noticeable contrast in the attention maps. For the simplified model, the attention maps seem to place weight in a deliberation manner, localizing the attention towards prominent features in the input image. On the other hand, the vanilla model is choatic in its attention allocation. It is noteworthy that the vanilla model does place attention towards areas of interest, but also attends towards irrelevant information perhaps compromising its judgement at the time of classification. It can thus be reasoned the simplified model can better decipher which features are relevant demonstrating, even in low data regimes, the representational quality is increased.&lt;/p&gt; &lt;p&gt;While we have so far investigated width, it will be informative to understand how depth impacts the performance of the simplified version. In &lt;d-cite key=&quot;He2023&quot;&gt;&lt;/d-cite&gt;, they employ signal propagation theory, which is most prominent in deeper networks. Therefore, we suspect as we increase the depth of our models, the simplified version will outperform the vanilla version by a larger margin. Here, we set \(\text{layers} = 12\) and maintain \(\text{width}=128\). The training accuracies and experiment results are seen in &lt;a href=&quot;#figure-9&quot;&gt;Figure 9&lt;/a&gt; and Table 5.&lt;/p&gt; &lt;div class=&quot;l-page&quot; style=&quot;display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;&quot;&gt; &lt;a name=&quot;figure-9&quot;&gt;&lt;/a&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-diaz-proposal/exp3_w128_l12.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;500px&quot; width=&quot;70%&quot;&gt;&lt;/iframe&gt; &lt;div class=&quot;caption&quot; style=&quot;margin-top: 10px; text-align: center;&quot;&gt; Figure 9. Comparison between vanilla and simplified transformers. Width = 128. Layers/Depth = 12. &lt;/div&gt; &lt;/div&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Table 5. Experiment 3: Results&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Vanilla&lt;/th&gt; &lt;th style=&quot;text-align: right&quot;&gt;Simplified&lt;/th&gt; &lt;th style=&quot;text-align: right&quot;&gt;\(\Delta\)&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Parameters&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;927370&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;531106&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;-42.72%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Avg. epoch time (s)&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;17.527&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;15.723&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;-10.29%&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Again, the simplified model outperforms the vanilla model by a large margin. Although we have focused on performance in the past, we discern an interesting trend when we scaled the depth: the simplified version seemed to be more consistent from run-to-run (recall \(\text{runs} = 5\)). This leads us to believe that as we continue to scale the depth, the simplified version will be more stable. Future experimentation will be necessary to corroborate this claim.&lt;/p&gt; &lt;h3 id=&quot;initialization-schemes&quot;&gt;Initialization Schemes&lt;/h3&gt; &lt;p&gt;We have seen the impact simplification can have on the performance of the transformer performance and self-attention. However, the used initializatons of \(\alpha\) and \(\beta\) in Experiments 1, 2, and 3, was based on equal weighting between the initial attention matrix and the identity matrix. In &lt;d-cite key=&quot;He2023&quot;&gt;&lt;/d-cite&gt;, they employ a full weighting of the identity matrix and zero’d out the attention matrix at initialization. Here, we aim to determine the effect of different initialization values. Recall \(\alpha = \beta = 0.5\) in Experiments 1, 2, 3. Now, we investigate two more initializaton schemes: \(\alpha = 1.0\) and \(\beta = 0.0\) and vice-versa. We replicate the protocol used in Experiment 2 and only modify these learnable scalar at initializaton and set \(\text{runs} = 1\). The results are shown in &lt;a href=&quot;#figure-10&quot;&gt;Figure 10&lt;/a&gt;. Interestingly, the initialization scheme proposed by &lt;d-cite key=&quot;He2023&quot;&gt;&lt;/d-cite&gt;, does &lt;em&gt;not&lt;/em&gt; outperform the equal weighting or inverse weighting scheme. Understandably, it does poorly at initialization, but never recovers. The equal weighting and inverse weighting approaches show nearly identical performance often trading off superior performance from epoch-to-epoch.&lt;/p&gt; &lt;div class=&quot;l-page&quot; style=&quot;display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;&quot;&gt; &lt;a name=&quot;figure-10&quot;&gt;&lt;/a&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-diaz-proposal/exp4_init_new.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;500px&quot; width=&quot;80%&quot;&gt;&lt;/iframe&gt; &lt;div class=&quot;caption&quot; style=&quot;margin-top: 10px; text-align: center;&quot;&gt; Figure 10. Various Initialization Schemes. &lt;/div&gt; &lt;/div&gt; &lt;p&gt;This lead us to believe the initializaton scheme could be improved. There has been some work on initializing vanilla ViTs&lt;d-cite key=&quot;Trockman2023&quot;&gt;&lt;/d-cite&gt; to gain performance. In &lt;d-cite key=&quot;Trockman2023&quot;&gt;&lt;/d-cite&gt;, a prominent diagonal was observed for the \(W_{q}W_{k}^{T}\) layers in ViT’s pre-trained on large datasets, which have been shown to outperform CNNs. The figure shown in the paper can be seen in &lt;a href=&quot;#figure-10&quot;&gt;Figure 10&lt;/a&gt;. This motivated the authors to provide a novel initialization scheme where the \(W_{Q}\) and \(W_{K}\) matrices are initialized in a way to encourage diagonal prominence in the forward pass. However, our findings contradicted this scheme, as our diagonal-dominant initialization scheme \(\alpha = 1\) and \(\beta = 0\) did not out perform the inverse or the equal weighting. This is likely due to the fact we have learnable parameters and do not initialize our \(W_{Q}\) and \(W_{K}\)’s directly, but rather the attention matrix post-softmax. However, it is important to realize that the learnable parameters still encourage diagonal prominence regardless of intialization. Although&lt;d-cite key=&quot;Trockman2023&quot;&gt;&lt;/d-cite&gt; used this initialization scheme to increase performance in small ViT’s trained from scratch, which encourages tokens to attend toward to themselves through the depth of the network, they did not take into consideration how the diagnolization varys from layer-to-layer. Seen in &lt;a href=&quot;#figure-10&quot;&gt;Figure 10&lt;/a&gt;, we can see the prominence of the diagnoal elements fades as we go deeper into the network. Observing this behaviour, we hypothesize the reason the initialization scheme of \(\alpha = 1\) and \(\beta = 0\) underperformed was not due to the initialization itself, but how it was applied to each layer. In other words, when we initialized \(\alpha = 1\) and \(\beta = 0\), we encouraged this token self-attentive nature throughout the depth of the network, when we should be encouraging it in the opening layers and tapering it off as we approach the end of the model.&lt;/p&gt; &lt;p&gt;To give more evidence to this hypothesis, we experimented with the following dynamic initialization scheme:&lt;/p&gt; \[\displaylines{ \alpha_i = \frac{1}{i}, \beta_i = 1 - \frac{1}{i} \\ \text{ where } i \in [1, 2, ..., L] \text{ and } L = \text{# of layers} }\] &lt;p&gt;The results from this initialization scheme compared to the uniform initializations can be seen in &lt;a href=&quot;#figure-12&quot;&gt;Figure 12&lt;/a&gt; The results show that the dynamic scheme outperform the results perhaps indicating the representation quality is connected toward encouraging self-token connection in the lower layers, while allowing for token’s to intermingle in higher layers. We further experiment with the inverse dynamic where we switch the \(\alpha\) and \(\beta\) values. The results in &lt;a href=&quot;#figure-13&quot;&gt;Figure 13&lt;/a&gt; show the dynamic approach is stronger during training then the inverse dynamic approach.&lt;/p&gt; &lt;div class=&quot;col-sm text-center&quot;&gt; &lt;a name=&quot;figure-11&quot;&gt;&lt;/a&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 11: Diagonal prominence in a pre-trained ViT Tiny. Layers 1-11 (Left-to-Right). Heads 1-3 (Top-to-Bottom). Extracted from Figure 1 of &lt;a href=&quot;https://arxiv.org/abs/2305.09828&quot;&gt;Mimetic Initialization of Self-Attention Layers&lt;/a&gt;. &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;l-page&quot; style=&quot;display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;&quot;&gt; &lt;a name=&quot;figure-12&quot;&gt;&lt;/a&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-diaz-proposal/exp5_init_dynamic.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;500px&quot; width=&quot;80%%&quot;&gt;&lt;/iframe&gt; &lt;div class=&quot;caption&quot; style=&quot;margin-top: 10px; text-align: center;&quot;&gt; Figure 12. Experiment 5: Dynamic vs. Uniform Initializations. &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;l-page&quot; style=&quot;display: flex; flex-direction: column; align-items: center; margin-bottom: 10px;&quot;&gt; &lt;a name=&quot;figure-13&quot;&gt;&lt;/a&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-diaz-proposal/exp6_init_inverse.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;500px&quot; width=&quot;80%&quot;&gt;&lt;/iframe&gt; &lt;div class=&quot;caption&quot; style=&quot;margin-top: 10px; text-align: center;&quot;&gt; Figure 13. Experiment 6: Dynamic vs. Inverse Dynamic Initializations. &lt;/div&gt; &lt;/div&gt; &lt;h2 id=&quot;conclusion-and-limitations&quot;&gt;Conclusion and Limitations&lt;/h2&gt; &lt;p&gt;Through this blog post we have overviewed the simplification of our known transformer block and novel initialization schemes. We took the problem of small-scale training of ViT’s and looked to address it leveraging such ideas. Through a series of experiments and thoughtful schemes, we generated an informed and sophisticated approach to tackle such a problem. In the end, we generated a method that outperformed a tradtional ViT in small scales. We explored ways of scaling the ViT in width and depth and probed how the new model distributed attention. Our comparisons were intentionally simple and effective in addressing the underlying task and illustrating the models potential. Although the results presented showed promise, extensive validation needs to be performed in the future. It will be interesting to see how this new transformer block and intialization scheme can be further utilized in computer vision. For example, a logical next route to entertain is to compare convergence rates in larger scale ViT on datasets such as ImageNet-21k to see if the modeling advantage persists.&lt;/p&gt; &lt;p&gt;There are a few limitations in this study. For one, only one dataset was used. Using other datasets such as CIFAR-100 or SVHN would provide more insight into this methodology. Secondly, there is a need for more comprehensive evaluation and ablation studies to determine the true nature of the simplified transformer and initialization schemes. Third, a comparison to a smaller scale CNNs is needed to gauge where this method comparatively sits in modeling power.&lt;/p&gt; &lt;div class=&quot;l-page&quot; style=&quot;display: flex; flex-direction: column; align-items: center; margin-bottom: 10px;&quot;&gt; &lt;a name=&quot;figure-14&quot;&gt;&lt;/a&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-diaz-proposal/exp0.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;500px&quot; width=&quot;80%&quot;&gt;&lt;/iframe&gt; &lt;div class=&quot;caption&quot; style=&quot;margin-top: 10px; text-align: center;&quot;&gt; Figure 14. Experiment 0: Removal of skip connections in traditional ViT. &lt;/div&gt; &lt;/div&gt; </content> </entry> <entry> <title>Guided Transfer Learning and Learning How to Learn: When Is It Useful?</title> <link href="https://deep-learning-mit.github.io/blog/2023/guided-transfer-learning/"/> <updated>2023-12-10T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/guided-transfer-learning</id> <content type="html">&lt;h1 id=&quot;introductionmotivation-never-enough-data&quot;&gt;Introduction/Motivation: Never Enough Data&lt;/h1&gt; &lt;p&gt;If we take a step back and reflect upon the current state of AI, especially in domains like computer vision and NLP, it appears that the gap between machine and human intelligence is rapidly narrowing. In fact, if we only consider aspects such as the predictive accuracy of discriminatory models and the sensibility of outputs by generative models, it may seem that this gap is almost trivial or even nonexistent for many tasks. However, every time we submit a training script and leave for the next few hours (or few weeks), it becomes abundantly clear that AI is still nowhere near human intelligence because of one critical kryptonite: the amount of data needed to effectively train AI models, especially deep neural networks.&lt;/p&gt; &lt;p&gt;While we have tons of training data in domains such as general computer vision (e.g. ImageNet) and NLP (e.g. the entirety of the internet), other domains may not have this luxury. For example, bulk RNA-sequencing data in biomedical research is notoriously cursed with high dimensionality and extremely low sample size. Training AI models on bulk RNA-sequencing datasets often leads to severe overfitting. In order to successfully utilize AI in domains like biomedicine, the highest priority challenge that must be addressed is that of overcoming the necessity of exuberant amounts of training data.&lt;/p&gt; &lt;h2 id=&quot;machine-vs-human-intelligence&quot;&gt;Machine vs Human Intelligence&lt;/h2&gt; &lt;p&gt;It often feels like the requirement of having abundant training samples has been accepted as an inevitable, undeniable truth in the AI community. But one visit to a preschool classroom is all that it takes to make you question why AI models need so much data. A human baby can learn the difference between a cat and a dog after being shown one or two examples of each, and will generally be able to identify those animals in various orientations, colors, contexts, etc. for the rest of its life. Imagine how much more preschool teachers would have to be paid if you needed to show toddlers thousands of examples (in various orientations and augmentations) just for them to learn what a giraffe is.&lt;/p&gt; &lt;p&gt;Fortunately, humans are very proficient at few-shot learning– being able to learn from few samples. Why isn’t AI at this level yet? Well, as intelligence researchers have discussed &lt;d-cite key=&quot;Nikoli2017&quot;&gt;&lt;/d-cite&gt;, biological brains are not born as empty slates of neurons with random initial connections. Millions of years of evolution have resulted in us being born with brains that are already predisposed to learn certain domains of tasks very quickly, such as image recognition and language acquisition tasks. In these domains, learning a specific task like differntiating between a cat and a dog or between letters of the English alphabet doesn’t require exposure to many samples. Additionally, as we gain more experiences throughout life, we acquire general knowledge that can help us learn new tasks more efficiently if they’re similar to something we’ve learned before. Thus, naturally, the first step toward bridging the gap between natural and machine intelligence is somehow finding a way to predispose an AI to be able to learn any &lt;em&gt;specific&lt;/em&gt; task within a certain domain with very few samples. The advent of traditional transfer learning has attempted to approach this predisposition task from the “general knowledge” perspective.&lt;/p&gt; &lt;h2 id=&quot;traditional-transfer-learning-learning-general-knowledge&quot;&gt;Traditional Transfer Learning: Learning General Knowledge&lt;/h2&gt; &lt;p&gt;Transfer learning has been invaluable to almost all endeavors in modern deep learning. One of the most common solutions for tasks that have too little training data is to first pre-train the model on a large general dataset in the same domain, and then finetune the pre-trained model to the more specific downstream task. For example, if we need to train a neural network to determine whether or not a patient has a rare type of cancer based on an X-ray image, we likely will not have enough data to effectively train such a model from scratch without severe overfitting. We can, however, start with a model pre-trained on a large image dataset that’s not specific to cancer (e.g. ImageNet), and if we start training from those &lt;em&gt;pre-trained&lt;/em&gt; weights, the downstream cancer diagnostic task becomes much easier for the neural network to learn despite the small dataset size.&lt;/p&gt; &lt;p&gt;One way to intuitvely understand why this is the case is through the lens of “general knowledge”. &lt;d-cite key=&quot;Nikoli2017&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;gtl&quot;&gt;&lt;/d-cite&gt; When the model is pre-trained on ImageNet data, it learns a lot of knowledge about image data &lt;em&gt;in general&lt;/em&gt;; for example, the earlier layers of the model will learn low-level features detectors (e.g. edge detectors, simple shape detectors, etc.) that will likely be useful for &lt;em&gt;any&lt;/em&gt; computer vision task. This can be viewed as the model learning “general knowledge” about the domain of image data broadly. When we then fine-tune this model on a cancer dataset, the model doesn’t have to relearn the ability to detect these general, low-level features. This general knowledge encoded in the pre-trained weights regularizes the model and mitigates overfitting, as it &lt;em&gt;predisposes&lt;/em&gt; the model to learn relationships/feature detectors that are generalizable and sensible within the context of image data.&lt;/p&gt; &lt;p&gt;However, if transfer learning could solve all our problems, this blog post wouldn’t exist. When our downstream dataset is in the extremeties of the high dimensional, low sample size characterization (e.g. in fields like space biology research, more on this later), learning general knowledge in the form of pre-trained weights isn’t enough. &lt;d-cite key=&quot;hldsshard&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;hldsshard2&quot;&gt;&lt;/d-cite&gt; How, then, can we predispose models such that they can do extreme few-shot learning, or even &lt;em&gt;one-shot&lt;/em&gt; learning? Enter guided transfer learning.&lt;/p&gt; &lt;hr /&gt; &lt;h1 id=&quot;guided-transfer-learning-and-meta-learning-learning-inductive-biases&quot;&gt;Guided Transfer Learning and Meta-learning: Learning &lt;em&gt;Inductive Biases&lt;/em&gt;&lt;/h1&gt; &lt;p&gt;Guided transfer learning (GTL) &lt;d-cite key=&quot;gtl&quot;&gt;&lt;/d-cite&gt; is a meta-learning paradigm proposed by the group &lt;a href=&quot;https://robotsgomental.com&quot;&gt;Robots Go Mental&lt;/a&gt;. The main idea for guided transfer learning is that, instead of just having the AI model learn general knowledge, we also want the AI to learn &lt;em&gt;how&lt;/em&gt; to learn. Specifically, we want it to learn how to pick up new knowledge &lt;em&gt;most efficiently&lt;/em&gt; for a particular domain/modality of data. This means during pre-training, the model, in addition to learning good initial weights, will also learn &lt;strong&gt;&lt;em&gt;inductive biases&lt;/em&gt;&lt;/strong&gt; that affect the future, downstream training &lt;em&gt;process&lt;/em&gt;.&lt;/p&gt; &lt;p&gt;GTL is a very novel method; its preprint was just released in the past few months! Hence, beyond the experiements in the original preprint, there has not been much exploration of some of its behavioral nuances and various application scenarios. So in this blog, I will be doing a few experiments that attempt to gain more insight into some of my questions that were left unanswered by the original GTL paper.&lt;/p&gt; &lt;p&gt;But before we get to that, let’s first get a rundown on how GTL works! The two most important concepts in GTL are &lt;strong&gt;scouting&lt;/strong&gt; and &lt;strong&gt;guide values&lt;/strong&gt;.&lt;/p&gt; &lt;h2 id=&quot;scouting&quot;&gt;Scouting&lt;/h2&gt; &lt;p&gt;Inductive biases, which affect what kind of functions a model can learn, are usually &lt;em&gt;built into&lt;/em&gt; the choice of deep learning architecture, or decided by other hyperparameters we humans choose. With guided transfer learning, they can now be &lt;em&gt;learned&lt;/em&gt; automatically during pre-training. It’s almost like the model is figuring out some of its own optimal hyperparameters for learning in a particular domain.&lt;/p&gt; &lt;p&gt;Sounds like magic, right? How does GTL allow a model to &lt;em&gt;learn&lt;/em&gt; inductive biases? Well, the core behind the GTL approach is a process known as &lt;strong&gt;scouting&lt;/strong&gt;, which is an alternative to traditional pre-training. The high-level idea is that it trains copies of the model, called scouts, on easier subproblems. These subproblems should be similar to the target downstream tasks, but easier so that the scouts are more likely to succesfully converge to a generalizable model. (If the scouts themselves overfit, then how can the inductive biases they learn help our downstream few-shot training not overfit?)&lt;/p&gt; &lt;p&gt;In the process of converging, the scouts keep track of which parameters in the model are important to keep flexible for efficient convergence and which ones aren’t. They’re basically logging their learning process.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/scouting-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/scouting-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/scouting-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/scouting.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;For example, if weight &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;A&lt;/code&gt; increases drastically during training, it’s probably an important weight to change and we should keep it flexible. On the other hand, if weight &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;B&lt;/code&gt; doesn’t change much at all or fluctuates in a very noisy manner (i.e. doesn’t change &lt;em&gt;meaningfully&lt;/em&gt;), it is probably not as important to change.&lt;/p&gt; &lt;p&gt;After the scouts are finished training, the collective feedback from all the scouts is used to decide what inductive biases to impose on the &lt;em&gt;main model&lt;/em&gt;, such that the main model can learn most efficiently for the particular domain of data and avoid &lt;em&gt;wasting effort&lt;/em&gt; and being &lt;em&gt;distracted/misguided&lt;/em&gt; by changing parameters that don’t really help in that domain.&lt;/p&gt; &lt;h2 id=&quot;guide-values&quot;&gt;Guide Values&lt;/h2&gt; &lt;p&gt;So what do these “inductive biases” actually look like, and how do they affect future training? The inductive biases in the context of GTL come in the form of &lt;strong&gt;guide values&lt;/strong&gt;. So after scouting, each parameter will not only have its usual weight value, but it will also have a guide value. During gradient decent, the normal update for a particular weight is then multiplied by its corresponding guide value. Thus, the larger the guide value, the more that parameter is allowed to change during downstream training.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_1.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; In this very simple neural network with two weights, we can see here that weight `A` has a guide value of 0.56, while weight `B` has a guide value of merely 0.01. Thus, weight `A` is more flexible, in other words allowed to change more, than weight `B` during downstream training. In fact, weight `B` is pretty much frozen, as its guide value of 0.01 makes it so that weight B can barely change throughout training. &lt;/div&gt; &lt;p&gt;Thus, the goal of scouting is to &lt;strong&gt;find these optimal guide values&lt;/strong&gt;, which will ultimately make the training &lt;em&gt;process&lt;/em&gt; more sparse (i.e. so that only the weights that are useful to change get changed). Note that this is different from making the &lt;em&gt;neural network model itself&lt;/em&gt; more sparse (i.e. setting weights/connections that are useless to zero).&lt;/p&gt; &lt;h2 id=&quot;calculating-guide-values&quot;&gt;Calculating Guide Values&lt;/h2&gt; &lt;p&gt;So how do we actually get the guide values after training the scouts? Well, as mentioned above, we keep track of how parameters change during the scout training processes. Specifically, during the training of each scout, we log the initial value and final value (i.e. value after convergence) of each parameter in the model. Then, we calculate how much each parameter changes throughout the process of convergence via some distance metric between its initial and final value. The default used in the GTL paper was the squared distance: \((w_b - w_f)^2\), where \(w_b\) is the baseline (initial) value of the parameter \(w\), and \(w_f\) is its final value.&lt;/p&gt; &lt;p&gt;Now, each scout will converge differently, since they are trained on slightly different subproblems (more on this later). To have a robust estimator of how much some parameter \(w\) changes during convergence, we take the mean squared change of the parameter across &lt;em&gt;all&lt;/em&gt; the scouts. Let’s call this value \(m_w\).&lt;/p&gt; &lt;p&gt;Assuming we have \(N\) scouts, this would be: \(m_w = \frac{1}{N}\sum_{i=1}^{N}(w_{b,i} - w_{f,i})^2\), where \(w_{b,i}\) and \(w_{f,i}\) are the initial and final values (respectively) of parameter \(w\) in scout \(i\).&lt;/p&gt; &lt;p&gt;Add on a 0-1 normalization across the \(m_w\)s of &lt;em&gt;all&lt;/em&gt; the parameters in the model, and we have our guide values (all of which are between 0 and 1)!&lt;/p&gt; &lt;p&gt;Intuitively, we can see that parameters that changed a lot throughout the convergence process in the scout models are deemed “important to change during training” and are thus given higher guide values (i.e. closer to 1), allowing them to be more flexible for downstream fine-tuning.&lt;/p&gt; &lt;p&gt;It’s really quite an elegant and simple approach, which is the beauty of it! It’s comparably lightweight in terms of both memory and computation compared to many other popular meta-learning/few-shot learning methods. &lt;d-cite key=&quot;NIPS2016_90e13578&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;woodward2017active&quot;&gt;&lt;/d-cite&gt; The devil is in the details on how to design the subproblems for scouts, which will be one of the main aspects we will be exploring in the experiments later in this blogpost.&lt;/p&gt; &lt;h2 id=&quot;experiment-and-results-from-the-gtl-paper&quot;&gt;Experiment and Results from the GTL Paper&lt;/h2&gt; &lt;p&gt;Before we get started with some of our own experiments to explore more nuances of GTL behavior and benefits, it might be nice to establish that– &lt;strong&gt;Yes, it does work!&lt;/strong&gt; Or, it at least provides very impressive benefits.&lt;/p&gt; &lt;p&gt;In the original GTL paper, Nikolić et al. tested how much benefit GTL would provide for few-shot learning tasks specifically in the domain of computer vision. Specifically, they tested one-shot learning capability on the Omniglot dataset. &lt;d-cite key=&quot;omniglot&quot;&gt;&lt;/d-cite&gt; The Omniglot dataset is a popular few-shot learning dataset containing characters from 50 different alphabets, with only 20 handwritten examples of each (I will also be using this dataset for a couple of my experiments below). The conventional problem set up with Omniglot is to train/fine-tune your model on just &lt;em&gt;one&lt;/em&gt; example of each character, and use the rest of the examples for validation/testing.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; To test one-shot learning, a model is trained/fine-tuned on one example of a character and tested on its ability to identify that character in validation samples. Figure from Nikolić et al. &lt;d-cite key=&quot;gtl&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;To prepare a one-shot learner for this task, Nikolić et al. pre-trained a very basic CNN using the following GTL pipeline:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;strong&gt;Pre-train the model &lt;em&gt;traditionally&lt;/em&gt;&lt;/strong&gt; on MNIST (lots of data there!). The goal here is to have the model acquire general knowledge in the form of pre-trained weights. No inductive biases yet.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Scouting.&lt;/strong&gt; The meat of GTL, where &lt;em&gt;inductive biases&lt;/em&gt; are learned!&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Downstream fine-tuning and evaluation&lt;/strong&gt; on Omniglot using the one-shot scheme described above.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;The most interesting part is the second step: scouting! Remember, we have the following criteria for the scout problems:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;There needs to be &lt;em&gt;multiple&lt;/em&gt; different scouting problems (so the we can have an ensemble of &lt;em&gt;different&lt;/em&gt; scouts contributing to the guide value calculations, making the guide values more robust)&lt;/li&gt; &lt;li&gt;The scout problems need to be &lt;em&gt;easy&lt;/em&gt; enough so that the scouts can actually successfully learn generalizable models! Again, if the scouts themselves overfit, the guide values derived form them won’t be very helpful for downstream one-shot learning :)&lt;/li&gt; &lt;li&gt;The scout problems need to be &lt;em&gt;similar&lt;/em&gt; to the downstream task, i.e. in the same domain (in this case, computer vision) and of the same kind of problem (e.g. in this case, classification). If the scout problems are too different, why would the inductive biases be transferable?&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Given these criteria, Nikolić et al. used the following scheme for generating scouting tasks:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Create &lt;em&gt;subdatasets&lt;/em&gt; of MNIST (termed “cousin” problems in the paper), where each subdataset/cousin contains data for only &lt;em&gt;three&lt;/em&gt; of the digits in MNIST (120 of these cousin datasets were created in the paper).&lt;/li&gt; &lt;li&gt;Train a scout on each of the cousin problems (120 scouts total).&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;This scheme satisfies all three criteria above. We now have multiple different scouting problems. These scouting problems are also comparatively way easier than the downstream task (there’s &lt;em&gt;way&lt;/em&gt; more training data than Omniglot, and it’s only a 3-category classification problem). BUT, despite being easier, they’re still similar enough to the downstream task such that we can expect transferability (it’s still a handwritten character image classification task, after all).&lt;/p&gt; &lt;p&gt;And this worked quite spectacularly! Here are the results from their paper:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults1.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Left plot shows validation accuracy curves for a mdoel that was traditionally pre-trained and a model that was pre-trained with the addition of GTL. Right plot shows the distribution of guide values in the model that was pre-trained with GTL. Figures from Nikolić et al. &lt;d-cite key=&quot;gtl&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;The plot on the left shows the validation curves for the downstream one-shot Omniglot task for 1) a model that was pre-trained traditionally (line in blue) and 2) the model that was pre-trained traditionally &lt;em&gt;and&lt;/em&gt; underwent GTL scouting (line in orange). Although the GTL model was still only to get around 25% validation accuracy, that’s quite impressive for only getting one example of each character, and is a signficant improvement over the model that only experienced traditional pre-training.&lt;/p&gt; &lt;p&gt;Interestingly, the plot on the right plots the distribution of guide values. We see a heavy right skew, indicating that most of the guide values are very close to 0! This means downstream fine-tuning has been made &lt;em&gt;very&lt;/em&gt; sparse (very few parameters were allowed to change drastically), providing very strong inductive biases that heaviliy influenced &lt;em&gt;how&lt;/em&gt; the model was allowed to learn. These inductive biases, as the results suggest, seem to be correct for the task at hand. But that shouldn’t be surprising because they were, in a way, &lt;em&gt;learned&lt;/em&gt;.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;And that is the beauty of GTL. We no longer have to “guess” what inductive biases (often in the form of architectural choices) might be appropriate for a certain domain; instead, we have these biases be “learned”!&lt;/strong&gt;&lt;/p&gt; &lt;hr /&gt; &lt;h1 id=&quot;answering-unanswered-questions-exploring-the-nuances&quot;&gt;Answering Unanswered Questions: Exploring the Nuances&lt;/h1&gt; &lt;p&gt;Now that we see GTL does provide noticeable benefit for one-shot learning tasks based on the experiemental results from Nikolić et al., I would like to run some additional experiments of my own to explore some of the nuances of when GTL can be helpful, how we can optimize the benefit we get from using it, and how we should go about designing scout problems. These questions had not been explored in the original GTL paper, and since no other piece of literature has yet to even mention GTL, I thought I’d take the lead and try to gain some initial insight into some of these open topics :)&lt;/p&gt; &lt;h2 id=&quot;experiment-1-can-gtl-compensate-for-lack-of-pre-training-data-not-just-lack-of-fine-tuning-data&quot;&gt;Experiment 1: Can GTL compensate for lack of &lt;em&gt;pre-training&lt;/em&gt; data (not just lack of &lt;em&gt;fine-tuning&lt;/em&gt; data)?&lt;/h2&gt; &lt;p&gt;So we’ve established that GTL can aid in learning &lt;em&gt;downstream&lt;/em&gt; tasks with few training samples, but it still requires a large amount of pre-training data (e.g. MNIST), much like traditional transfer learning. What I want to know now is: what if we don’t have &lt;em&gt;that&lt;/em&gt; much pre-training data? In such &lt;em&gt;low pre-training-data contexts&lt;/em&gt;, performance on downstream tasks usually suffers as a results when using traditional transfer learning. Can the addition of scouting/GTL &lt;em&gt;compensate&lt;/em&gt; for this lack of pre-training data? That is, can a model pre-trained with a small pre-training dataset + GTL do as well as a model that’s just traditionally pre-trained on a large pre-training dataset?&lt;/p&gt; &lt;h3 id=&quot;setup&quot;&gt;Setup&lt;/h3&gt; &lt;p&gt;To do test this, I pre-train a small CNN with a very similar GTL pipeline as the one used by Nikolić et al., but using only a mere &lt;em&gt;1000&lt;/em&gt; of the full 60,000 samples from the MNIST dataset during pre-training/scouting. A significantly smaller pre-training dataset! I’ll sometimes refer to this subset of MNIST as “small MNIST”. I then evaluate the performance of this model on an Omniglot one-shot task and compare it to 1) a model that is only traditionally pre-trained on small MNIST (no GTL) and 2) a model that is traditionally pre-trained on the full 60,000-sample MNIST (also no GTL).&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Experiment setup &lt;/div&gt; &lt;h3 id=&quot;downstream-task-specification&quot;&gt;Downstream Task Specification&lt;/h3&gt; &lt;p&gt;Note that the exact setup for the downstream Omniglot one-shot task used in the original GTL paper was not revealed. There are a few variations of one-shot learning setups, but the one I will be using is:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Take a 100-cateogry subset of the full Omniglot dataset (that is, 100 unique characters)&lt;/li&gt; &lt;li&gt;Train the model on &lt;strong&gt;one example&lt;/strong&gt; of each unique character (i.e. 100 training samples total), and use the rest as a validation set (i.e. 1900 validation samples total)&lt;/li&gt; &lt;li&gt;The task is thus a 100-way classification problem (given a handwritten image, predict which of the 100 characters it is)&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Since the specification above is likely not the exact Omniglot problem setup used by Nikolić et al., and the hyperparameters they used are also not specified in the original paper, some of the baseline results I’m using do not quite match to the corresponding results in the original paper.&lt;/p&gt; &lt;h3 id=&quot;results-and-analysis&quot;&gt;Results and Analysis&lt;/h3&gt; &lt;p&gt;With that said, here are the resulting &lt;em&gt;validation&lt;/em&gt; accuracy and loss curves for the downstream Omniglot one-shot task described above:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Validation accuracy and loss curves for 1) model traditionally pre-trained with large MNIST, 2) model traditionally pre-trained with small MNIST, 3) model pre-trained with GTL and small MNIST. Note that an automatic early stopping mechanism was implemented to cut off when training when validation accuracy stopped increasing. &lt;/div&gt; &lt;p&gt;As we can see, when GTL is not used, pre-training on a 1000 sample subset of MNIST results in notably worse performance on the one-shot downtream task compared to pre-training on the full 60,000 MNIST (16% vs 21% max validation accuracy). This is as expected.&lt;/p&gt; &lt;p&gt;&lt;em&gt;However&lt;/em&gt;, if we use small MNIST &lt;em&gt;and&lt;/em&gt; add scouting/GTL (using the same scout problem set up in the original GTL paper), we see that the resulting model ends up being able to reach &lt;em&gt;almost&lt;/em&gt; the same max validation accuracy as the model traditionally pre-trained on the full MNIST dataset (20% vs 21%).&lt;/p&gt; &lt;p&gt;What this suggests is that the &lt;em&gt;inductive biases&lt;/em&gt; learned by GTL can compensate for any decrease in “general knowledge” (encoded in the form of pre-trained weights) that comes from having a smaller pre-training dataset. &lt;strong&gt;So not only is GTL helpful when you don’t have enough downstream data, it can also be helpful when you don’t have enough pre-training data!&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;Additionally, if we inspect the validation &lt;em&gt;losses&lt;/em&gt;, we see that, depsite an apparent drop in validation accuracy, overfitting is still occuring in the shadows for all the models, as all the validation loss curves start rising after a certain point. However, the model that is pre-trained with GTL achieves the lowest validation loss of the three models before overfitting, and also starts overfitting the latest. So even though there’s no huge difference in the maximum validation accuracy achieved by the model that was pre-trained with GTL on small MNIST and the model that was traditionally pre-trained on full MNIST, the former is able to be &lt;em&gt;optimized further&lt;/em&gt; before overfitting, suggesting that &lt;strong&gt;GTL with a small pre-training dataset provides a stronger “regularizing” effect than traditional transfer learning with a large pre-training dataset!&lt;/strong&gt; This is certainly an interesting observation that could potentially have more obvious practical implications in certain scenarios, though we will not go into that further in this blog. The takeaway, however, is that GTL is, at the end of the day, really just a strong “regularizer”. If we look at how the orange and red curves look in both the accuracy and loss plots, we see the performance benefit that comes form adding GTL really just comes from the &lt;em&gt;delay of overfitting&lt;/em&gt;. This regularization-based mechanism of performance improvement by GTL makes sense, as strong inductive biases hold the model back from learning “just anything” that fits the downstream training data.&lt;/p&gt; &lt;h2 id=&quot;experiment-2-how-does-the-design-of-the-scouting-task-affect-downstream-performance&quot;&gt;Experiment 2: How does the design of the scouting task affect downstream performance?&lt;/h2&gt; &lt;p&gt;Okay, it seems so far that the scouting pipeline used in the original GTL paper seems to be pretty helpful for various scenarios. But how did the authors arrive at that specific scouting task formulation? What if we used different scouting tasks than the ones they did? How does that affect GTL performance, and what might such differences (if any) imply? After all, when we leave the context of MNIST and Omniglot, we’ll have to be designing these scouting tasks on our own…&lt;/p&gt; &lt;h3 id=&quot;setup-1&quot;&gt;Setup&lt;/h3&gt; &lt;p&gt;For the sake of experimental control, however, I will stick with MNIST and Omniglot for now (don’t worry, I deviate from these datasets in the next experiment). Here, I begin by testing the effects of changing &lt;em&gt;how many categoriess&lt;/em&gt; are included the cousin subdatasets that the scouts are trained on. The original paper used 3 categories per scout dataset (i.e. a 3-way classification task). What if used 2? Or 4? And if that makes a difference, why?&lt;/p&gt; &lt;p&gt;In my eyes, this experiment explores how &lt;em&gt;similarity&lt;/em&gt; between the scout tasks and the downstream task affects &lt;em&gt;transferability&lt;/em&gt;. Specifically, because the downstream Omniglot task is a 100-way classification problem, one might expect that scout tasks that include more classification categories (and are thus more similar to the donwstream task) would result in better transferability.&lt;/p&gt; &lt;p&gt;To test this, I use a 5000-sample subset of MNIST for pre-training/scouting (to save computation and time). For scouting, I create 120 cousin problems, as done in the paper. But instead of sticking to 3-category cousin problems, I also try 2-category, 4-category, and 5-category problems.&lt;/p&gt; &lt;h3 id=&quot;results-and-analysis-1&quot;&gt;Results and Analysis&lt;/h3&gt; &lt;p&gt;Here are the results:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;As we can see, apparently the number of categories doesn’t make too big of a difference in maximum validation accuracy! They all provide seemingly equal accuracy improvement from a baseline model pre-trained traditionally on the same 5000-sample MNIST subset. This isn’t too surprising. Compared to the 1000-way downstream classification, the difference between 2-way and 5-way classification tasks would intuitively seeem pretty negligible.&lt;/p&gt; &lt;p&gt;The validation loss plot tells a slightly different story, however. We see &lt;em&gt;most&lt;/em&gt; of the models pre-trained with GTL have similar loss curves, consisting of a lower minimal loss and more resilience to overfitting compared to the baseline model. However, the model based on scouts trained on &lt;em&gt;5-category&lt;/em&gt; cousin problems seems to achieve the &lt;em&gt;worst&lt;/em&gt; (highest) minimum validation loss! This seems… a bit hard to explain. Perhaps this is just due to stochasticity; after all, we see that overfitting still occurs &lt;em&gt;later&lt;/em&gt; relative to the baseline model, suggesting there still is some resilience to overfitting.&lt;/p&gt; &lt;p&gt;But a perhaps more interesting explanation (that admittedly could be &lt;em&gt;completely&lt;/em&gt; wrong) is that 5-category problems may have been too &lt;em&gt;difficult&lt;/em&gt; of a scouting task given the smaller subset of MNIST used (since lots of categories + few training samples is a often recipe for overfitting). That is, perhaps many of the &lt;em&gt;scouts&lt;/em&gt; themselves would have started overfitting while being trained on these subproblems, so the guide values derived from such scouts don’t end up providing robust enough inductive biases.&lt;/p&gt; &lt;p&gt;Again, this is just a speculation, but if it were true, this could suggest an interesting tradeoff between the &lt;strong&gt;easiness&lt;/strong&gt; of the scouting tasks and their &lt;strong&gt;similarity&lt;/strong&gt; to the target downstream task. Make a scouting task too easy, and it’s too different from the target downstream task, and transferability suffers as a result. Make a task too similar to the target downstream task, and it might be too difficult, causing the scouts themselves to overfit and the resulting guide values to be less useful. An intersting balance to think about and explore further.&lt;/p&gt; &lt;p&gt;The overarching takeaway from this experiment, however, seems to be that the exact number of categories for the scouting problems at this specific scale does not drastically affect downstream one-shot performance. Sure, I could have tried to keep increasing the number of categories, but keep in mind there’s also a bit of a tradeoff between number of categories and number of possible scouts past a certain point. For example, we would only be able to have one cousin problem with 10 categories (and it would be the whole MNIST dataset)!&lt;/p&gt; &lt;h2 id=&quot;experiment-3-what-about-unsupervisedself-supervised-settings&quot;&gt;Experiment 3: What about unsupervised/self-supervised settings?&lt;/h2&gt; &lt;p&gt;&lt;strong&gt;Note:&lt;/strong&gt; This particular experiment builds off of some previous work I have done outside of this class.&lt;/p&gt; &lt;p&gt;For the final experiment, I would like to provide a bit of my research background for context. I’m primarily intereted in applying/developing AI methodologies for biomedical research. Specifically, I work a lot with “omics” data (e.g. transcriptomics data like RNA-seq, proteomic data, etc.), which is a domain notoriously cursed with datsets characterized by high dimensionality and low sample size. This means that we are almost always forced to utilize pre-training and transfer learning in order to make any deep learning model work for specific downtream tasks. Sounds like the perfect context to apply GTL to!&lt;/p&gt; &lt;p&gt;However, there’s one very important caveat. Pre-training in the omics domain is usually &lt;strong&gt;self-supervised&lt;/strong&gt;, since large pre-training datasets are often aggregates of hundreds of smaller datasets from separate studies that don’t share the same labeling/metadata catogories. So far, whether it’s the original GTL paper or our own experiments above, we have only explored GTL in the context of &lt;em&gt;supervised&lt;/em&gt; pre-training, scouting, and fine-tuning. How can we adapt GTL when the pre-training (and perhaps the scouting) involve unlabeled data?&lt;/p&gt; &lt;p&gt;To explore this, I will build off of one of my previous research projects, conducted while I was an intern at NASA Ame’s Space Biology Division. The project involved pre-training (traditionally) a large RNA-seq BERT-like model (called scBERT &lt;d-cite key=&quot;scbert&quot;&gt;&lt;/d-cite&gt;) on a large &lt;em&gt;unlabeled&lt;/em&gt; collection of RNA-seq data (recount3 &lt;d-cite key=&quot;recount3&quot;&gt;&lt;/d-cite&gt;) in a self-supervised manner (via input masking, like the original BERT). I had evaluated this pre-trained model on a downstream classification task that involved predicting whether or not a mouse had been to space based on its RNA-seq profile. The downstream task involved fine-tuning on &lt;a href=&quot;https://osdr.nasa.gov/bio/&quot;&gt;NASA’s OSDR datasets&lt;/a&gt;, which contain RNA-seq data from ground control mice and &lt;em&gt;actual mice that have been to space&lt;/em&gt; during the NASA Rodent Research missions. The problem was, of course, that very few mice have been to space, so these datasets were tiny (i.e. 12 mice per dataset). It was quite an extreme exacerbation of the high dimensional, low sample size characterization I normally had to deal with. In that project, I showed that traditionally pre-training provided signficant benefits over training from scratch (as expected).&lt;/p&gt; &lt;h3 id=&quot;gtl-pipeline-for-scbert-scouting-problem&quot;&gt;GTL pipeline for scBERT: Scouting Problem&lt;/h3&gt; &lt;p&gt;Today, however, I would like to see if GTL can provide any additional benefits to that project. The most obvious challenge, as mentioned earlier, is creating scout problems out of an unlabeled pre-training dataset (recount3).&lt;/p&gt; &lt;p&gt;Sure, we could use self-supervised masked input prediction for scouting, which is how scBERT is pre-trained traditionally. However, it’s not immediately clear, at least to me, how exactly we would create &lt;em&gt;multiple different&lt;/em&gt; scout problems using this scheme (perhaps different masking patterns?). Additionally, we would ideally want the scout tasks to be more similar to the downstream task (which is a binary classification task, i.e. predicting whether or not a mouse sample is ground control or spaceflown) and share mostly the same architecture (i.e. more parameters with transferable guide values). Finally, as mentioned before, we would like to make the scouting tasks sufficiently easy so that the scouts can be successfully trained without overfitting. Given these criteria, I propose the following scouting problem:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Reduce the dimensionality of recount3 dataset using UMAP, keeping only the top 30 UMAP dimensions (to make the next step computationally tractable)&lt;/li&gt; &lt;li&gt;Cluster using K-means clustering. K=30 seems to provide visually logical clusters, so that’s the one we will go with.&lt;/li&gt; &lt;li&gt;To create subdatasets (“cousin” problems), we choose random &lt;em&gt;pairs&lt;/em&gt; of K-means clusters. Thus, each subdataset includes recount3 data from a random pair of clusters.&lt;/li&gt; &lt;li&gt;For each subdatset created, train a scout to classify the &lt;em&gt;cluster identity&lt;/em&gt; of the samples (a binary classification task). Thus, the scouting task is very similar to the downstream task (which is also binary classification). This &lt;em&gt;also&lt;/em&gt; means we can use the same exact model architecture for both the scouting tasks and the downstream task (maximal transferability!).&lt;/li&gt; &lt;/ol&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Pipeline for creating scout problems. &lt;/div&gt; &lt;p&gt;Now, this might seem like a trivial task for the classifier. After all, we are clustering the data based on geometric proximity, then train a model to find decision boundaries between the clusters, so it would seem that the model could find a perfectly clean decision boundary pretty easily. However, keep in mind that the clustering is done in UMAP space, with only the top 30 UMAP components, while the classification is done in the original feature space. UMAP is a nonlinear transformation, so clusters that are easily perfectly separable in top 30 UMAP space may not be in the original space. However, it is definitely still a pretty easy task, but we &lt;em&gt;want&lt;/em&gt; the scouting tasks to be doable enough so that the scouts can easily converge to a generalizable relationship. So theoretically, it seems reasonable that this could work! (((Admittedly, it took a lot of playing around before deciding on the above scouting formulation; it just ended up being the one that worked the best. I can’t tell you exactly why, but my reasoning above is the best “intuitve” reasoning I could come up with.)))&lt;/p&gt; &lt;h3 id=&quot;gtl-pipeline-for-scbert-downstream-task&quot;&gt;GTL pipeline for scBERT: Downstream Task&lt;/h3&gt; &lt;p&gt;What about the downstream few-shot task? Here, I will use the same task that I had previously used to evaluate my traditionally pre-trained scBERT model:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;We train the model on a single NASA OSD dataset, OSD 105 &lt;d-cite key=&quot;OSD-105&quot;&gt;&lt;/d-cite&gt;, containing bulk RNA-seq data from 6 spaceflown and 6 ground control mice, and have it predict whether a mouse was spaceflown or ground control. A simple binary classification task, like the scouting problem, but much harder given the incredibly low sample size.&lt;/li&gt; &lt;li&gt;We then validate using another similar NASA OSD dataset, OSD 104 &lt;d-cite key=&quot;OSD-104&quot;&gt;&lt;/d-cite&gt;, also containing 6 spaceflown and 6 ground control mice.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;It’s important to note that these two datasets, OSD 105 and 104, contain RNA-seq data from different muscle locations. OSD 105 contains tibilalis anterior data, while OSD 104 contains soleus data. However, since these datasets all contain data from some sort of mouse skeletal muscle tissue, we expect that cross-dataset generalizability would be reasonable for a strong generalizable model, and I actually intentionally chose datasets from different muscle tissues to test this difficult problem of cross-tissue generalizability.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/downstream-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/downstream-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/downstream-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/downstream.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Downstream few-shot binrary classification task on NASA OSDR datasets (predicting whether mouse is a ground control or spaceflown sample based on its RNA-seq profile). &lt;/div&gt; &lt;h3 id=&quot;gtl-pipeline-for-scbert-whole-pipeline&quot;&gt;GTL pipeline for scBERT: Whole Pipeline&lt;/h3&gt; &lt;p&gt;After deciding on the scouting problem formulation, the rest of the pipeline is pretty straightforward. Here’s the full pipeline:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;strong&gt;Pre-train scBERT traditionally&lt;/strong&gt; on recount3 (self-supervised masked input prediction). This involves the encoder portion of the architecture, which embeds the input, and a reconstructor portion, which uses that embedding to reconstruct the masked input values. The goal here, as always, is to learn &lt;em&gt;general knowledge&lt;/em&gt; about the domain (RNA-seq) in the form of good &lt;em&gt;pre-trained weights&lt;/em&gt;.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Scouting&lt;/strong&gt; on recount3, using the scouting formulation described above. Here, we replace the reconstructor portion of the scBERT architecture with a classification layer. The goal here is, of course, to learn &lt;em&gt;inductive biases&lt;/em&gt; in the form of &lt;em&gt;guide values&lt;/em&gt;.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Downstream few-shot fine-tuning&lt;/strong&gt; on NASA OSDR datasets, using the few-shot formulation described above. Here, we use the &lt;em&gt;same&lt;/em&gt; architecture as the scouts. &lt;em&gt;All guide values transfer over!&lt;/em&gt;&lt;/li&gt; &lt;/ol&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/pipeline-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/pipeline-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/pipeline-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/pipeline.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Full pipeline for applying GTL on scBERT. &lt;/div&gt; &lt;h3 id=&quot;results-and-analysis-2&quot;&gt;Results and Analysis&lt;/h3&gt; &lt;p&gt;And… here are the results for the downstream task! To compare, I’ve also included results for an scBERT copy that didn’t undergo any pre-training and an scBERT copy that was only traditionally pre-trained on recount3.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/results-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/results-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/results-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-02-guided-transfer-learning/results.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Downstream training and validation accuracy/loss curves for (from left to right): scBERT without any pre-training, scBERT traditionally pre-trained, scBERT pre-trained with scouting (GTL). A cosine annealing learning rate scheduler was used, ala the original scBERT paper, hence the jumps and fluctuations. &lt;/div&gt; &lt;p&gt;With no pre-training, we can see that severe overfitting to the training set happens almost immediately, with validation loss going up while training loss goes down. This makes sense given the extremely small size of the training set, and the fact that the training and validation sets are from different muscles. With traditional pre-training, however, we see that overfitting also does eventually happen, but right before it happens, at around 200 epochs, we get this sweet spot where validation loss is at a low and validation accuracy is at a high of around 90% (highlighted by the purple box). So it seems that general knowledge about RNA-seq data obtained from traditional pre=training already provides a regularizing effect that reigns in the model from overfitting immediately to the small dowsntream training dataset. These results are from my previous work and are nothing new.&lt;/p&gt; &lt;p&gt;Now, when we add &lt;em&gt;scouting&lt;/em&gt;, the max validation accuracy becomes 100%, which is an improvement from the traditionally pre-trained model, though this by itself may not be that notable given the already good validation accuracy after traditional pre-training. What’s potentially more interesting, however, is that this maximum validation performance is maintained over three times as many epochs compared to the traditionally pre-trained model, suggesting that the maximal performance achieved by the GTL model is more robust. However, it is also worth noting that the validation accuracy is a lot noisier and jumps around a lot more for this model compared to the others (keep in mind cosine annealing learning rate scheduler is being used for all these models). But overall, it seems that guided transfer learning provides a more robust regularization effect, giving it a longer period of time with peak validation performance before overfitting occurs.&lt;/p&gt; &lt;p&gt;This is quite exciting, as it shows that, given the right scouting problem setup, &lt;strong&gt;we can adapt GTL in settings where our pre-training data is unlabeled, as well!&lt;/strong&gt; The flexiblity of GTL that allows it to be adapted to such a large variety of scenarios is what, in my eyes, makes this method truly innovative!&lt;/p&gt; &lt;h1 id=&quot;closing-thoughts&quot;&gt;Closing Thoughts&lt;/h1&gt; &lt;h2 id=&quot;experiment-limitations-and-next-steps&quot;&gt;Experiment Limitations and Next Steps&lt;/h2&gt; &lt;p&gt;These experiements are merely to serve as a &lt;em&gt;preliminary&lt;/em&gt; exploration of the nuances of GTL beyond what was presented in the original paper, in hopes that more questions will be explored by the community as GTL gains further publicity and traction. Thus, there is clearly plenty of room for imporvement and next steps regarding these experiments.&lt;/p&gt; &lt;p&gt;For experiement 1, I think it would be cool to establish a more rigorous characterization of the amount of pre-training data (or rather lack thereof) that the addition of GTL can compensate for in terms of downstream performance. This might involve using arious &lt;em&gt;even smaller&lt;/em&gt; subsets MNIST and finding the boundary where a pre-training dataset is too small that even GTL cannot compensate for it.&lt;/p&gt; &lt;p&gt;The results of experiment 2 obviously leaves a lot of to be desired, as I only explored single-digit values for the number of categories use in the scout problems. These values are all over an order magnitude off from the number of categories in the downstream task, so none of them gave very useful insight into how “similar” scouting tasks need to be to the downstream task. This was, of course, limited by the MNIST dataset itself, which only had 10 categories. Perhaps using a pre-training dataset with more categories could allow a more comprehensive experiment of this type.&lt;/p&gt; &lt;p&gt;And for experiment 3, I wish I had more time to curate a more robust validation scheme for the downstream few-shot task. A validation set with only 12 samples was really not granular enough to precisely capture the potential benefits of adding GTL on top of traditional transfer learning. When the traditionally pre-trained model is already getting 11/12 prediction correct at its best, is 12/12 really that meaningful of an improvement?&lt;/p&gt; &lt;h2 id=&quot;how-exciting-is-gtl&quot;&gt;How Exciting is GTL?&lt;/h2&gt; &lt;p&gt;As promising as all these results are, GTL is, of course, not the perfect end-all be-all solution to few-shot learning. As was discussed in the original GTL paper and shown in the experiments above, GTL can only provide so much improvement before hitting a wall (e.g. the one-shot learning ability on Omniglot never surpassed 25% validation accuracy). It does not yet quite result in models that match the few-shot learning ability of human intelligence, and still requires a considerable amount of pre-training data. However, the lightweight nature, simplicity, elegance, and adaptibility of the model makes it so that it’s a (relatively) quick and easy solution to get a downstream performance boost on any AI pipelines that already utilize traditional transfer learning!&lt;/p&gt; </content> </entry> <entry> <title>Alive Scene</title> <link href="https://deep-learning-mit.github.io/blog/2023/Visualization-of-CLIP's-Learning-and-Perceiving-Dynamics/"/> <updated>2023-12-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Visualization of CLIP's Learning and Perceiving Dynamics</id> <content type="html">&lt;h2 id=&quot;enchanting-images-with-semantic-embedding&quot;&gt;Enchanting Images with Semantic Embedding&lt;/h2&gt; &lt;p&gt;“Alive Scene” is an advanced AI-driven project that revolutionizes the concept of scene capture, drawing inspiration from the enchanting, ever-changing portraits in the Harry Potter series. This innovative pipeline goes beyond traditional methods of capturing scenes as static images. Instead, it delves deep into the semantic understanding of each scene, enabling it to not only recreate these scenes with high fidelity but also to imbue them with the ability to act, evolve, and respond autonomously.&lt;/p&gt; &lt;p&gt;The following GIF image on the right is the output from the Alive Scene Pipeline. Notice that these scenes start from the same status.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/7cFU.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/7cFU.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/7cFU.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/7cFU.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Hogwarts Portraits &lt;/div&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Alive Scene captures cats&apos; napping behaviors &lt;/div&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The core of this project lies in its sophisticated AI algorithms that analyze and interpret the nuances of each scene, from the physical elements to the underlying emotions and narratives. This enables the system to generate dynamic, lifelike representations that are far from static images. These AI-crafted scenes possess the unique ability to change organically over time, reflecting the natural progression and evolution one would expect in real life.&lt;/p&gt; &lt;p&gt;Through “Alive Scene,” portraits and scenes are no longer mere representations; they become entities with a semblance of life, capable of exhibiting behaviors and changes that mirror the fluidity and spontaneity of living beings. There are three elements in this project, the first is using CLIP model as encoder to compress image into clip embeddings. Second, train a generator to reconstruct the original image from the CLIP embedding. then train a behavior model to lean the behavior of clip embeddings in the clip feature space; the behavior will use to drive the generator; making the scene representation alive. The following is the diagrams of the pipeline.&lt;/p&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/pipeline-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/pipeline-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/pipeline-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/pipeline.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Alive Scene Pipeline &lt;/div&gt; &lt;/figure&gt; &lt;/div&gt; &lt;h3 id=&quot;introduction&quot;&gt;Introduction&lt;/h3&gt; &lt;p&gt;The CLIP (Contrastive Language–Image Pre-training) model&lt;d-cite key=&quot;radford2021learning&quot;&gt;&lt;/d-cite&gt;, represents a groundbreaking approach in integrating visual and textual data within the realm of artificial intelligence. In this project, it plays and important role to comprehend the scenario and characters’ behaviors in the scene. Detailed investigations&lt;d-cite key=&quot;wang2020understanding&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;shi2023understanding&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;zhao2017exact&quot;&gt;&lt;/d-cite&gt; offers insightful understanding of the model’s operations, showing the potential that CLIP embeddings could make a machine comprehend and compress complex information of images.&lt;/p&gt; &lt;p&gt;The study&lt;d-cite key=&quot;author2021cinn&quot;&gt;&lt;/d-cite&gt; explores using conditional Invertible Neural Networks (cINNs) for transforming still images into videos, highlighting cINNs’ prowess in handling static to dynamic content transitions. Although proficient in capturing motion, the model’s grasp on object/event types may benefit from CLIP embeddings enhancement. My project, unlike this work, aims to animate static scene representations with self-driven behaviors, not just manipulate videos.&lt;/p&gt; &lt;p&gt;Another significant work, “Make-A-Video”&lt;d-cite key=&quot;singer2022makeavideo&quot;&gt;&lt;/d-cite&gt;, introduces a text-to-video generation method utilizing text-to-image models. This approach circumvents the need for text-video paired data, learning from text-image data and unsupervised videos. It employs a spatiotemporal diffusion model and super-resolution techniques for high-quality video creation from text. My project differs, focusing on bringing life to existing videos or image sequences, rather than generating new content from text.&lt;/p&gt; &lt;p&gt;Despite the static background, the cats’ movements are so subtle that they pose a challenge for human observers to distinguish differences between frames. To visualize the clip embeddings of the frames from the video, I employ both UMAP and t-SNE&lt;d-cite key=&quot;maaten2008tsne&quot;&gt;&lt;/d-cite&gt; techniques for gaining more insights.&lt;/p&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_umap.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_umap.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_umap.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_umap.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; UMAP Visualization &lt;/div&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_tsne.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_tsne.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_tsne.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_tsne.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; t-SNE Visualization &lt;/div&gt; &lt;/figure&gt; &lt;/div&gt; &lt;p&gt;The behavior over time resembles a ‘spaghetti’ pattern, indicating that certain scenarios or behaviors may recur (as seen in the crossings or interactions within the spaghetti diagram). Some intersecting points demonstrate similar tendencies, while others are more unpredictable, highlighting the complexity of the video.&lt;/p&gt; &lt;p&gt;Both visualizations provide a promising sign: the end and start frames are positioned close to those in the middle. This proximity allows the Alive Scene to operate seamlessly and endlessly. For example, when the Alive Scene approaches a point near the end, it can smoothly transition to a frame somewhere in the middle. Similarly, when it encounters a region where different frames cluster together, it has a variety of options to choose from for its next move. This flexibility is key to making the Alive Scene function effectively.&lt;/p&gt; &lt;h3 id=&quot;generator&quot;&gt;Generator&lt;/h3&gt; &lt;p&gt;The Generator (decoder) is a SIREN model, which employs CLIP semantic embeddings and positional embeddings of pixel coordinates to generate RGB colors&lt;d-cite key=&quot;sitzmann2019siren&quot;&gt;&lt;/d-cite&gt;. SIRENs, or Sinusoidal Representation Networks, diverge from traditional neural networks by utilizing sinusoidal activation functions instead of common ones like ReLU. These networks are adept at implicitly representing intricate data patterns, making them particularly advantageous for tasks that involve complex spatial structures or continuous data. The incorporation of periodic activation functions in SIRENs can significantly enhance deep learning capabilities, especially in fields such as computer vision and generative models.&lt;/p&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/SIREN_DECODER-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/SIREN_DECODER-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/SIREN_DECODER-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/SIREN_DECODER.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; a SIREN model as the generator &lt;/div&gt; &lt;/figure&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/tp_siren.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/tp_siren.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/tp_siren.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/tp_siren.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; the training progression &lt;/div&gt; &lt;/figure&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/generated_.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/generated_.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/generated_.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/generated_.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Original Video vs Generated Video &lt;/div&gt; &lt;/figure&gt; &lt;p&gt;The code of the generator model (SIREN)&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;k&quot;&gt;class&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;SineLayer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Module&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;super&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;SineLayer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;forward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sin&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;class&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;Siren&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Module&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;20&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;in_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;560&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;256&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;super&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Siren&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Sequential&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;in_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;SineLayer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;SineLayer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;SineLayer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;SineLayer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Init weights &lt;/span&gt; &lt;span class=&quot;k&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;no_grad&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;():&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;].&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;weight&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;uniform_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;1.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;in_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mf&quot;&gt;1.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;in_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;].&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;weight&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;uniform_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;6.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;6.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;4&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;].&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;weight&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;uniform_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;6.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;6.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;6&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;].&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;weight&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;uniform_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;6.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;6.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;8&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;].&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;weight&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;uniform_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;6.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;6.&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;forward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;class&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;MLP&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Module&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;in_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;256&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;super&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;MLP&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Sequential&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;in_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;forward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;train&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;model_optimizer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nb_epochs&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;15000&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;psnr&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[]&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;_&lt;/span&gt; &lt;span class=&quot;ow&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;tqdm&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;range&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nb_epochs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;model_output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;pixel_coordinates&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;loss&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;((&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;model_output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pixel_values&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;**&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;mean&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;psnr&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;append&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;20&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;log10&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;1.0&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;/&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;np&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;loss&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;item&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;())))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;model_optimizer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;zero_grad&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;loss&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;backward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;model_optimizer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;step&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;psnr&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;model_output&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;h3 id=&quot;behavior-model&quot;&gt;Behavior model&lt;/h3&gt; &lt;p&gt;This project introduces a customized asymmetrical Variational Autoencoder (VAE)&lt;d-cite key=&quot;kingma2014autoencoding&quot;&gt;&lt;/d-cite&gt; as the probabilistic model to predict motion within the CLIP embedding space. A VAE-like model may prove beneficial for this task for two primary reasons. Firstly, they are adept at learning a continuous, smooth latent space, facilitating efficient interpolation and manipulation of data representations. Given that the training data derives from a video, it is inherently sequential and should be represented in a continuous fashion. Secondly, VAEs utilize amortized inference, where the encoder is trained to generalize the mapping of inputs to the latent space across the dataset, as opposed to conducting inference anew for each input. For this project, the objective is to devise a method that allows for a smooth navigation within the observed embedding space.&lt;/p&gt; &lt;p&gt;The code of the behavior model (VAE)&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt; &lt;span class=&quot;n&quot;&gt;device&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;device&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;cuda&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;cuda&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;is_available&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;else&lt;/span&gt; &lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;cpu&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# BehaviorModel(inspired by VAE) &lt;/span&gt;&lt;span class=&quot;k&quot;&gt;class&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;BehaviorModel&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Module&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;input_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;512&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;latent_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;256&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;super&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;VAE&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Encoder &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fc1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;input_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;400&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;bn1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;BatchNorm1d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;400&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fc2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;400&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;300&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;bn2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;BatchNorm1d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;300&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fc21&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;300&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;latent_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Mean &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fc22&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;300&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;latent_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Log variance &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dropout&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Dropout&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;0.55&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Decoder &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fc3&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;latent_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;300&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;bn3&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;BatchNorm1d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;300&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fc4&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;300&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;400&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;bn4&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;BatchNorm1d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;400&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fc5&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;400&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;input_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;encode&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;h1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;relu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;bn1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;fc1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;h2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;relu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;bn2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;fc2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;h1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;h2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;dropout&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;h2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;fc21&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;h2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;fc22&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;h2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;reparameterize&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;std&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;exp&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;0.5&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;eps&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;randn_like&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;std&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;eps&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;std&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;decode&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;z&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;h3&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;relu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;bn3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;fc3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;z&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;h4&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;relu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;bn4&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;fc4&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;h3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)))&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;tanh&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;fc5&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;h4&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;forward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;encode&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;view&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;512&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;z&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;reparameterize&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;decode&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;z&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Loss function &lt;/span&gt;&lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;loss_function&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;recon_x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;BCE&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;binary_cross_entropy&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;recon_x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;view&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;512&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;reduction&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;sum&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;KLD&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;0.5&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sum&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;pow&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;exp&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;())&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;BCE&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;KLD&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;loss_function&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;recon_x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Use Mean Squared Error for the reconstruction loss &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;MSE&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;mse_loss&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;recon_x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;view&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;512&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;reduction&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;sum&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# KLD is unchanged &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;KLD&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;0.5&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sum&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;pow&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;exp&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;())&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;MSE&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;KLD&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;The process begins with a CLIP embedding as the input, which is then transformed by the model to output a motion vector. This vector retains the same dimensions as the CLIP embedding and is utilized to alter the original embedding, facilitating the generation of the subsequent frame based on this modified embedding.&lt;/p&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/PrbabilisticModel-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/PrbabilisticModel-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/PrbabilisticModel-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/PrbabilisticModel.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; the Asymmetrical VAE &lt;/div&gt; &lt;/figure&gt; &lt;p&gt;In this case, I generate 200 frames for training; the number is quite small. To enhance the model’s learning efficacy, new data points are generated through linear interpolation between existing data points (frames). By doing this, I generated 1000 clip embeddings and frames. These newly created samples undergo normalization to conform to the geometric constraints of the CLIP embedding space, often characterized as a hypersphere. This normalization process ensures that the interpolated data points adhere to the distribution pattern of the original embeddings. As depicted in the diagram, this technique leads to a densified clustering of data points in close proximity to the original embeddings, which is advantageous. It implies a higher confidence in the authenticity of these new points due to their closeness to the authentic, or ground truth, data.&lt;/p&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Interpolation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Interpolation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Interpolation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Interpolation.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Gaining more data points by Interpolation &lt;/div&gt; &lt;/figure&gt; &lt;p&gt;When operating the process that animates the Alive Scene, it occasionally generates artifacts. This may be caused by certain movements that deviate significantly from the observed reality. Please refer to the following GIF for an example.&lt;/p&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/broken.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/broken.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/broken.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/broken.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Artifacts &lt;/div&gt; &lt;/figure&gt; &lt;p&gt;To resolve the issue, I have developed a post-processing technique that stabilizes the outcomes. The process begins by re-normalizing the resulting embedding onto the hypersphere. Following this, a weighted parameter is introduced to draw the vector incrementally toward the domain of previously observed CLIP embeddings. For example, if the weighting parameter is set to 0.1 for the observed embedding, it would be scaled by 0.1, while the predicted embedding is scaled by 0.9. These two are then summed to produce a final embedding that, while primarily influenced by the prediction, retains a subtle alignment with the observed data. This weighted approach aims to mitigate artifacts by anchoring the predictions within the realm of observed realities.&lt;/p&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Post-curing-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Post-curing-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Post-curing-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Post-curing.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Post-curing method &lt;/div&gt; &lt;/figure&gt; &lt;p&gt;By applying this method, the Alive Scene has started to yield more stable results. Interestingly, the outcomes are varied, exhibiting behaviors akin to a living creature — somewhat unpredictable yet within a framework of predictability.&lt;/p&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; 4 different versions &lt;/div&gt; &lt;/figure&gt; &lt;h3 id=&quot;manipulation&quot;&gt;Manipulation&lt;/h3&gt; &lt;p&gt;The Alive Scene operates autonomously, and to explore the modulation of its behavior, I have introduced the concept of ‘temperature.’ This concept acts as a coefficient that scales the movement vector, thereby allowing the scene to exhibit behaviors that are either more expansive and varied, or more constrained and subtle, depending on the temperature setting.&lt;/p&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/TEMPERATURE-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/TEMPERATURE-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/TEMPERATURE-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/TEMPERATURE.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; 4 different versions &lt;/div&gt; &lt;/figure&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/vt.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/vt.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/vt.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/vt.gif&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; different temperature settings &lt;/div&gt; &lt;/figure&gt; &lt;h3 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h3&gt; &lt;p&gt;The “Alive Scene” project signifies a profound achievement in the domain of Deep Learning for scene representation. It leverages CLIP semantic embeddings to decode and imbue scenes with lifelike attributes, while also seamlessly integrating the potent SIREN model as a generator, capable of breathing vitality into the processed embeddings by producing authentic images.&lt;/p&gt; &lt;p&gt;Furthermore, the project implements an asymmetric Variational Autoencoder (VAE) to predict and model motion within the CLIP embedding space, thereby enhancing the dynamism and fluidity of the scenes.&lt;/p&gt; &lt;p&gt;However, the significance of this undertaking extends well beyond its technical accomplishments. By giving birth to scenes that autonomously and organically evolve, the project ushers in a transformative era of possibilities in digital storytelling and interactive media, fundamentally reshaping the landscape of creative expression in the digital realm.&lt;/p&gt; &lt;h3 id=&quot;future-work&quot;&gt;Future Work&lt;/h3&gt; &lt;p&gt;In this project, a SIREN model is trained to create a 2D scene representation. This model can be extended to generate a 3D scene by simply adding an additional output node to adopt the Neural Radiance Field (NeRF)&lt;d-cite key=&quot;mildenhall2020nerf&quot;&gt;&lt;/d-cite&gt; architecture. Such an enhancement allows the 3D Alive Scene to offer a more immersive and complex scene representation. Looking ahead, it’s conceivable that a non-player character (NPC) could be manipulated in this manner, especially if the model, when trained on a vast dataset, can learn more sophisticated behaviors. This approach has the potential to encapsulate all necessary information within a highly compact model, offering an extremely lightweight solution for dynamic scene generation.&lt;/p&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/2dvs3d-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/2dvs3d-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/2dvs3d-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/2dvs3d.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; extend the model for a 3D Scene &lt;/div&gt; &lt;/figure&gt; &lt;h3 id=&quot;potential-usages-and-contributions&quot;&gt;Potential Usages and Contributions:&lt;/h3&gt; &lt;p&gt;Digital Art and Entertainment: This project can revolutionize digital art and entertainment by offering dynamic, evolving scenes that enhance animations and virtual experiences.&lt;/p&gt; &lt;p&gt;Film and Animation: It can automate the generation of realistic backgrounds, streamlining the production process for films and animated content.&lt;/p&gt; &lt;p&gt;Advertising and Marketing: The project offers the capability to create interactive, dynamic advertising content, thereby engaging audiences more effectively.&lt;/p&gt; &lt;p&gt;Behavioral Studies: It provides a tool for in-depth analysis of human and animal behaviors, supporting research in fields such as psychology, ethology, and anthropology.&lt;/p&gt; &lt;p&gt;Cultural Preservation: This technology can enliven historical scenes or artworks in museums, offering visitors more immersive and engaging experiences.&lt;/p&gt; &lt;p&gt;Data Visualization: It introduces innovative methods for interacting with and interpreting complex data, useful in sectors like finance and healthcare.&lt;/p&gt; &lt;p&gt;Gaming: The project enables the creation of NPCs with realistic behaviors, significantly enhancing the gaming experience.&lt;/p&gt; &lt;p&gt;Architecture and Engineering: It can be applied for dynamic visualizations in architectural and engineering projects, aiding in design and planning.&lt;/p&gt; &lt;p&gt;Conservation: This technology can contribute to wildlife conservation by facilitating the study of animal behaviors in natural settings.&lt;/p&gt; </content> </entry> <entry> <title>Projected fast feedforward networks</title> <link href="https://deep-learning-mit.github.io/blog/2023/projected-fff-networks/"/> <updated>2023-12-05T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/projected-fff-networks</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Compression of neural networks is a crucial task in Machine Learning. There are three important performance metrics that we should take into account when deploying models:&lt;/p&gt; &lt;ul&gt; &lt;li&gt; &lt;p&gt;Size of the model. Having a smaller number of parameters that describe the model makes transferring it over network faster. In addition, being able to concisely represent the differences between original and finetuned model would enable storing and distributing a lot of possible finetunings, such as in Stable Diffusion LORA &lt;d-cite key=&quot;luo2023lcmlora&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;GPU memory needed to perform the inference. If the metric is lower, the model inference can be run on less expensive GPUs with less available memory. Some models could even be ran on smartphones or IoT devices&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Inference time. We also can take into account how does the time scales with the size of the batch&lt;/p&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Balancing these characteristics is a non-trivial task, since improvements in one of them could lead to a decline in other metrics. The optimal tradeoff depends on the environment in which the model is ran.&lt;/p&gt; &lt;p&gt;We will explore a way to significantly reduce the model size and the memory needed for inference, keeping the inference time reasonable. We achieve the size reduction by utilizing a common property of having small intrinsic dimension of objetive landscape that many models have.&lt;/p&gt; &lt;h2 id=&quot;related-works&quot;&gt;Related works&lt;/h2&gt; &lt;p&gt;There are several ways how the size of the model can be reduced. One of the popular techniques is model quantization. Quantization of a machine learning model involves decreasing the precision of weights for the sake of reduction of the total memory needed to store them. Quantized models can utilize 16, 8, or even 4-bit floats, with carefully selected summation and multiplication tables. There are different ways of dealing with the inevitable degradation of accuracy due to lack of precision, one possible way is described in paper &lt;d-cite key=&quot;nagel2021white&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;Another direction of model size optimization utilizes the notion of matrix low-rank approximation. The layers of neural networks are commonly represented as matrices, the simpliest example being the parameters of feedforward linear layer. Each matrix \(A\) has a Singular Value Decomposition \(A = U\Sigma V^*\), and, using this decomposition, it’s possible to get close low-rank approximation of \(A\). We note that a matrix of size \(n \times m\) of rank \(k\) can be stored in \(O((n+m)k)\) memory if we express it as a sum of outer products of \(k\) pairs of vectors, so if \(k\) is small, this representation uses much less memory than \(O(nm)\) — the memory used by the dense representation. One of the papers that compresses models with low-rank approximation is &lt;d-cite key=&quot;jaderberg2014speeding&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;However, we are going to explore another compression method, which utilizes small dimensionality of optimization landscape, which is common for many model-task pairs. When training a neural network, we have some loss \(\mathcal{L}\), and a parameter space \(\mathbb{R}^{p}\). Then, we are trying to find \(v \in \mathbb{R}^{p}\) such that \(\mathcal{L}(v)\) is minimized. Instead of searching over the whole space, we generate a linear operator \(\phi\colon \; \mathbb{R}^{d} \to \mathbb{R}^{p}\), where \(d &amp;lt; p\), and parametrize \(v\) as \(v = \phi u\), where \(u \in \mathbb{R}^{d}\). Li et al. &lt;d-cite key=&quot;li2018measuring&quot;&gt;&lt;/d-cite&gt; found that if the the matrix of $\phi$ has normalized columns, for many tasks it’s possible to find a network, parametrized the way above, where \(d\) is significantly smaller than \(p\), such that the network has at least 90% of metric value of the original network. Then, if our way to generate random projection matrix is seeded deterministically, we only need to store \(d\) floats, which are the coefficients of \(u\). During inference, we re-create \(\phi\), and restore the parameters of original architecture \(v = \phi u\). The compression ratio of this method is \(\frac{p}{d}\). In this blog, we will explore its potential modifications, and evaluate them by running experiments.&lt;/p&gt; &lt;h2 id=&quot;basic-experiment&quot;&gt;Basic experiment&lt;/h2&gt; &lt;p&gt;First, we test the method without any modifications. We use dataset MNIST &lt;d-cite key=&quot;deng2012mnist&quot;&gt;&lt;/d-cite&gt;, containing 30000 \(28 \times 28\) monochrome images of digits. Each image belongs to one of 10 classes, depending on the digit, and the task of the model is to classify these digits.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/mnist-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/mnist-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/mnist-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/mnist.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;For each of the experiment, we use a neural network with one hidden layer with 128 units and ReLU activations. We optimize the parameters with Adam and learning rate \(10^{-4}\). The training is ran for \(100\) epochs, our batch size is \(128\).&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;d&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;final val acc&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;17.56&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;16&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;20.39&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;64&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;50.2&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;256&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;71.1&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;512&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;61.25&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;1024&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;61.66&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;original&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;95.65&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/random-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/random-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/random-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/random.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;better-initialization&quot;&gt;Better initialization&lt;/h2&gt; &lt;p&gt;We’ve noticed that the optimization of the compressed model does not converge fast. To initialize better, we can use pre-trained weights of non-compressed model \(v\).&lt;/p&gt; &lt;p&gt;Let \(A\) be the projection matrix that we used in the compression. Then, to convert compressed parameters of a model to the original ones, we need to multiply by \(A\) on the left. The idea is to start from the compressed parameters, such that after going to uncompressed space, they would be as close to \(v\) as possible by Eucledian distance. Then, we can use the formula for projection onto a linear subspace:&lt;/p&gt; \[u^{*} = \mathop{argmin}_u ||Au - v||^2 \Rightarrow u^{*} = (A^TA)^{-1}A^Tv\] &lt;p&gt;By initializing \(u\) this way, we achieve a faster convergence of the optimizer, because after projecting to subspace and returning to original coordinates, we get a parameter vector that is close to the optimal one, so it should be near the optimum in the coordinates of projection.&lt;/p&gt; &lt;p&gt;In our experiments, we compare how fast does the model train with random initializtion and with projection initialization.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;d&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;final val acc&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;17.72&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;16&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;28.68&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;64&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;51.52&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;256&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;71.18&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;512&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;83.93&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;1024&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;90.18&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;original&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;95.65&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non_random-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non_random-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non_random-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non_random.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;distillation&quot;&gt;Distillation&lt;/h2&gt; &lt;p&gt;The concept of model distillation was introduced by &lt;d-cite key=&quot;hinton2015distilling&quot;&gt; &lt;/d-cite&gt;. The main idea is to train a compact model (“student”) to emulate a larger, pre-trained model (“teacher”). In our case, the compact model would be a reparametrized model with \(d\) dimensions, while the “teacher” model has all the original parameters. When training, we regress the mean square difference between the logits of the original and compact model.&lt;/p&gt; &lt;p&gt;We initialize the compressed model with the projection of the original model as in the previous section. In our experiments, we’ve noticed that this training procedure has comparable convergence speed, however, its validation accuracy reaches a plateau on a lower value than in regular training procedure.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/distil-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/distil-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/distil-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/distil.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non-distil-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non-distil-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non-distil-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non-distil.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; CIFAR-10 experiment. Network is logistic regression. Projections for d=700, on the left: with distilation, on the right: without distilation &lt;/div&gt; &lt;h2 id=&quot;independent-projections-for-layers&quot;&gt;Independent projections for layers&lt;/h2&gt; &lt;p&gt;In many cases, the model we are compressing contains several independent layers. Therefore, we can try to split the coordinates in the space to which we are projecting parameters so that each coordinate corresponds to exactly one layer. This constraint corresponds to the matrix of \(\phi\) being block-diagonal.&lt;/p&gt; &lt;p&gt;These changes improve the accuracy, and decrease the inference time (because for each layer we only need to use some part of the compressed coordinates), while keeping \(d\) constant.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non_block-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non_block-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non_block-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/non_block.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/block-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/block-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/block-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/block.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; CIFAR-10 experiment. Network has one hidden layer with 32 units. On the left: one projection for d=2500. On the right: separate projections with d=500 and d=100 &lt;/div&gt; &lt;h2 id=&quot;gpu-memory-utilization&quot;&gt;GPU memory utilization&lt;/h2&gt; &lt;p&gt;Let we want to make inference with minimal possible usage of RAM. Let’s assume that the architecture of model that we are evaluating is an MLP. Then, using the compressed representation, we can use no more than \(O(\max(d, L))\), where \(d\) is the dimension to which we compressed the model, and \(L\) is the maximum size of the layer.&lt;/p&gt; &lt;p&gt;We describe the inference prodcedure consuming this little memory. We need to sequentially apply each of the feedforward networks in our MLP. For each layer, we have to transform the input vector \(x\) to the output \(y\). We fill in the output vector with zeros, and for each index \((i, j)\) in the weight matrix we need to make an update \(y_i \leftarrow y_i + A_{ij}x_j\). However, we don’t store any of the parameters in memory except for \(d\) compressed parameters. So, in order to get the value of \(A_{ij}\), we need to take the dot product of a row in the projection matrix and a vector of compressed parameters.&lt;/p&gt; &lt;p&gt;It is not obvious how to random access a row in a random matrix, where all columns should be normalized, and the outcomes during train and inference are consistent. We note that the true randomness of the projection matrix is not important for us. So, instead we can generate the \(i\)-th row by seeding the random to \(i\) and generating a row. During train, we generate the whole matrix this way, and compute the normalization coefficients of columns, which are included into the model’s representation in memory. During inference, to get the \(i\)-th row, we just need to sample a row and divide it by normalization coefficients pointwise. We have checked that this way of generating the projection matrix has no negative effects on the performance of the compressed model, compared to the truly random option.&lt;/p&gt; &lt;h2 id=&quot;diffusion-models&quot;&gt;Diffusion models&lt;/h2&gt; &lt;p&gt;We have also attempted to apply model compression to a different domains besides image classification. One of the problems we considered is generating 2D points from a certain distribution using a diffusion model. In this setup, we have a neural network that predicts the noise for a pair \((x, t)\) — point in space and time.&lt;/p&gt; &lt;p&gt;We use continuous time on \([0, 1]\), linear noise schedule with \(\beta_{min} = 0.3\), \(\beta_{max} = 30\), various-preserving SDE, batch size \(64\), sampling timesteps \(100\), ODE sampler. The distribution that we are trying to learn is a mixture of \(6\) gaussians. We use an MLP score net with \(2\)-dimensional input and \(32\)-dimensional Gaussian Fourier Projection time embeddings.&lt;/p&gt; &lt;p&gt;However, even setting the compression dimension \(1000\) or \(5000\) did not enable us to see good sampling results.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/gauss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/gauss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/gauss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/gauss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/la_100-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/la_100-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/la_100-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/la_100.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/la_100_bad-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/la_100_bad-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/la_100_bad-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-projected-fff-networks/la_100_bad.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; On the left: sampling result with original model. On the right: sampling with compressed model, d=1000 &lt;/div&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;We have discussed a way to compress models, decreasing its size by several orders of magnitude. We identified ways to improve the validation accuracy of compressed models, such as doing the initializtion with projection and having independent projections for layers. This technique leads to surprising consequences, such as being able to do machine learning model inference with very small amount of RAM.&lt;/p&gt; </content> </entry> <entry> <title>Understanding Linear Mode Connectivity</title> <link href="https://deep-learning-mit.github.io/blog/2023/LinearMode/"/> <updated>2023-12-01T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/LinearMode</id> <content type="html">&lt;h2 id=&quot;instability-analysis-and-linear-mode-connectivity&quot;&gt;Instability Analysis and Linear Mode Connectivity&lt;/h2&gt; &lt;p&gt;The advent of transformer models stands as a pivotal advancement within the domain of machine learning, fundamentally reshaping the landscape of artificial intelligence. First introduced in 2017 through the seminal work “Attention is All You Need” by Vaswani et al., transformers have since exploded in both uses and applications, such as language and vision tasks. In fact, ChatGPT, which was the fastest-growing application in history (until Threads in 2023), is built using a transformer architecture. Although transformers can achieve state-of-the-art performance in many tasks, they are often limited by their size, which can create issues for memory and energy both during training and deployment. For example, GPT-3 has 175 billion parameters, and GPT-4, which was released earlier in 2023, has 1.76 trillion parameters! Compression techniques such as knowledge distillation and pruning can be used to deal with these issues, reducing the size of the network while retaining most of its capabilities. Several methods already exist for shrinking transformers such as weight pruning (Zhang et al. 2022), as well as post-training compression (Kwon et al. 2022). However, there is little research on the conditions under which a transformer can be effectively compressed or at what point during training a transformer compression should begin.&lt;/p&gt; &lt;p&gt;Frankle et al. (2020) suggest that &lt;em&gt;instability analysis&lt;/em&gt;—analyzing the stability of training with respect to stochastic gradient descent (SGD) noise—could be a way of identifying conditions under which pruning can be useful. To determine whether the outcome of training is stable w.r.t SGD noise, we create two copies of a network with the same initialization, and optimize those networks using different samples of SGD noise. We can then evaluate how similar or dissimilar the resulting networks are. For this purpose, Frankle et al. propose &lt;em&gt;linear interpolation instability&lt;/em&gt;, defined to be the maximum increase in error along the linear path in parameter space connecting the two resulting networks. When error is nonincreasing along this path, the networks are said to have &lt;em&gt;linear mode connectivity&lt;/em&gt;. In their paper, they propose that this instability analysis is related to lottery ticket networks, which are subnetworks from randomly-initialized dense neural networks that can achieve comparable test accuracy to the original network after training. They found that pruned networks that were capable of achieving near full test accuracy were stable to SGD noise, and showed linear mode connectivity.&lt;/p&gt; &lt;p&gt;Frankle et al. study linear mode connectivity in neural networks, which is a stricter version of mode connectivity. They train two networks with the same initialization on SGD noise (randomly augmented datasets) and calculate the maximum loss along the linear path between the two resulting network to quantitatively analyze the instability of the original network to noise.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/LMC.PNG-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/LMC.PNG-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/LMC.PNG-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-LinearMode/LMC.PNG&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; $W_i$ represents the weights at initialization, and the red lines represent two different paths through parameter space corresponding to different SGD noise. The figure on the left shows linear mode connectivity, but the figure on the right shows increasing loss along the blue interpolated path between local minima, which shows instability to SGD noise during training. &lt;/div&gt; &lt;p&gt;In our project, we plan to expand on the research from Frankle et al. and apply it to transformers. In doing so, we hope to study the conditions under which transformers can be effectively compressed as well as the optimization landscape of training transformers. We seek to evaluate linear mode connectivity in transformer architectures and whether it is an effective indicator for how effectively a transformer can be compressed.&lt;/p&gt; &lt;h2 id=&quot;transformers-and-related-work&quot;&gt;Transformers and Related Work&lt;/h2&gt; &lt;p&gt;We restricted our analysis of transformer architectures to the Vision Transformer (ViT) model proposed by Dosovitskiy (2021). ViT works by splitting an image into patches, then computing embeddings of those patches via linear transformation. After adding positional embeddings, the resulting embeddings are fed into a standard Transformer encoder. Due to runtime issues, we were unable to fully train transformers from scratch. We ended up working with and fine-tuning pretrained transformers, which were imported from the HuggingFace transformers package.&lt;/p&gt; &lt;p&gt;Shen et al. (2023) investigated a more general form of the lottery ticket hypothesis with ViTs, proposing ways to select a subset of the input image patches on which the ViT can be trained to similar accuracy as with the full data. However, they write “the conventional winning ticket [i.e. subnetwork] is hard to find at the weight level of ViTs by existing methods.”&lt;/p&gt; &lt;p&gt;Chen et al. (2020) investigated the lottery ticket hypothesis for pre-trained BERT networks, and did indeed find subnetworks at varying levels of sparsity capable of matching the full accuracy. Our work hoped to find similar results for vision transformers.&lt;/p&gt; &lt;p&gt;Linear mode connectivity is also deeply connected to the nature of the optimization landscape. This has important applications with regards to federated learning, and combining the results of independent models. For example, Adilova et al. (2023) showed that many deep networks have &lt;em&gt;layer-wise&lt;/em&gt; linearly connected minima in the optimization landscape, which they explain as being the result of the layer-wise optimization landscape being convex, even if the whole optimization landscape is not. They found similar behavior in vision networks trained on CIFAR-10.&lt;/p&gt; &lt;p&gt;In our project, we seek to evaluate the connection between linear mode connectivity and the existence of winning subnetworks. We expand on the work from Shen et al. and Chen et al. by incorporating the linear mode connectivity analysis proposed by Frankle et al. as well as search for conventional winning subnetworks in transformers for vision tasks. Our goal is to find conditions and methods for which transformers can be compressed while retaining high performance.&lt;/p&gt; &lt;h2 id=&quot;experiments-with-linear-mode-connectivity&quot;&gt;Experiments with Linear Mode Connectivity&lt;/h2&gt; &lt;p&gt;We decided to work with the pretrained ViT model from HuggingFace transformers, and to fine tune this model on CIFAR-10. We also augmented the data set of 32x32 images with a random 24x24 crop followed by resizing, followed by a random horizontal flip and color jitter (randomly changing brightness, contrast, saturation and hue). To evaluate linear mode connectivity, we train a pair of models with the same initialization on different randomly shuffled and augmented datasets.&lt;/p&gt; &lt;p&gt;In order to assess the instability of the original network to the dataset augmentations, we use the procedure described by Frankle et al. and evaluate the test loss and accuracy of the linearly interpolated models. The weights of the interpolated models are directly calculated from the weights of the trained models using evenly spaced values of \(\alpha\). The test dataset did not receive the augmentations that the training dataset did.&lt;/p&gt; &lt;p&gt;All models trained for the linear interpolation instability analysis were trained using the AdamW optimizer for 8 epochs with a learning rate of 2e-4. We use the default ViTImageProcessor imported from HuggingFace to convert the images into input tensors.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/interpolation_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/interpolation_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/interpolation_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-LinearMode/interpolation_loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The above plot shows the result of linear interpolation after fine tuning two copies of the pretrained model. The evaluation loss is non-increasing, and in fact decreases, possibly as an artifact of the fact that the test set did not recieve augmentations. Otherwise, it seems that there is linear mode connectivity, at least in the local optimization landscape when starting from a pretrained model.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/instability-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/instability-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/instability-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-LinearMode/instability.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;However, we failed to observe linear mode connectivity in randomly initialized transformers, noting an increase in test loss as well as a decrease in test accuracy around \(\alpha = 0.5\). The maximum observed test loss of the interpolated models is more than double the mean of the test losses of the original pair of trained models, which is much more than the threshold of a \(2\%\) increase used by the original authors.&lt;/p&gt; &lt;p&gt;The resulting networks seem to end up in disconnected local optima, implying that these networks are not invariant to the dataset augmentations. This is consistent with the analysis done by Frankle et al., who find that the stability of networks increases over the course of training.&lt;/p&gt; &lt;p&gt;Our results combined with the original analysis by Frankle et al. seems to suggest that linear mode connectivity emerges at some point during training, but we have yet to observe the point at which it emerges due to computation restraints and the size of the ImageNet dataset used to pretrain the ViT models.&lt;/p&gt; &lt;h2 id=&quot;pruning&quot;&gt;Pruning&lt;/h2&gt; &lt;p&gt;We used the PLATON compression algorithm (Zhang et al. 2022) during training to prune networks to different levels of sparsity. PLATON uses several “scores” to prune parameters. One score is parameter magnitude; smaller magnitude parameters tend to be pruned. However, in a complex network, small magnitude weights can still have a large impact; to measure this, PLATON uses the gradient-weight product \(\theta^T \nabla \mathcal{L}(\theta)\) as a first order Taylor approximation of the impact of the removal of a weight on the loss. PLATON also maintains uncertainties for all the weights, preferring not to prune weights with uncertain scores.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/sparsity20percent-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/sparsity20percent-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/sparsity20percent-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-LinearMode/sparsity20percent.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/sparsity5percent-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/sparsity5percent-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/sparsity5percent-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-LinearMode/sparsity5percent.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Pruning and retraining the pretrained model to 20% of its original size over 4 epochs results in a test accuracy of 95.3%, compared to 98% accuracy of the full model, and pruning to 5% resulted in 93.7% test accuracy. So although the compressed models cannot reach the accuracy of the original model, they are able to still maintain a relatively high test accuracy, and the PLATON algorithm does a good job of selecting weights. We also used the pruned weights at 20% sparsity to generate a mask, and applied this mask to the original model.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Blue and orange traces represent training loss and an exponential moving average resp. &lt;/div&gt; &lt;p&gt;When training the original model, but applying a mask (effectively setting the corresponding weights and gradients to zero), we were able to train the model to 93.6% test accuracy. This supports the lottery ticket hypothesis, since the PLATON algorithm can be used to identify a relatively small subset of weights from the pretrained network that can be trained high accuracy in isolation.&lt;/p&gt; &lt;h2 id=&quot;analysis-and-conclusions&quot;&gt;Analysis and Conclusions&lt;/h2&gt; &lt;p&gt;Our results with linear mode connectivity suggest that at some point during the training process, optimization ends up in a linearly connected local minimum, and further optimization will be stable to SGD noise. This is because we were indeed able to observe linear mode connectivity when fine tuning a pretrained mode. Additionally, with random initialization, we found the absence of linear mode connectivity. Unfortunately, we were not able to determine exactly where in the training process linear mode connectivity emerges.&lt;/p&gt; &lt;p&gt;It is notable that over the course of training, the loss does not seem to go down steadily, rather rapidly oscillating between high and low loss. The exponential moving average smooths it out, but it is still quite chaotic. During pruning, it seems plausible that the oscillations could correspond to weights being pruned, but the model approaches the target ratio of nonzero weights by the end of the third epoch of training, leaving the behavior in the final epoch unexplained. Furthermore, the training loss displays similar behavior while training the masked models. Further work could be done to investigate this phenomena and potentially make pruning/training more stable.&lt;/p&gt; &lt;p&gt;Our results with pruning show that a standard compression algorithm, PLATON, is able to sucessfully prune the pretrained ViT model to high levels of sparsity while maintaining relatively high accuracy. Our results with masking weights also suggest the existence of lottery ticket networks in the pretrained model, since we were able to train the corresponding subnetwork to a high level of accuracy. Unfortunately, the connection between linear mode connectivity and lottery ticket transforms remains very ambiguous, since we were unable to perform pruning experiments on models that did not demonstrate linear mode connectivity.&lt;/p&gt; &lt;p&gt;Further work could be done to investigate linear mode connectivity from different levels of pretraining as initialization, which would shed light on when the optimization of transformers settles into a connected minimum (or when it doesn’t). Further work on when linear mode connectivity arises, as well as experiments pruning the corresponding networks, would help determine if there is a connection between connectivity and the presence of lottery transformers. This would also be important for determining whether linear mode connectivity is a good indicator that transformers can be compressed more definitively. Additionally, as mentioned earlier, the existence of lottery networks in language models has already been investigated, and it would be interesting to see if this is related to linear mode connectivity as well.&lt;/p&gt; &lt;h2 id=&quot;references&quot;&gt;References&lt;/h2&gt; &lt;p&gt;Adilova, L., Andriushchenko, M., Kamp, M., Fischer, A., &amp;amp; Jaggi, M. (2023). &lt;i&gt;Layer-wise Linear Mode Connectivity&lt;/i&gt;.&lt;/p&gt; &lt;p&gt;Frankle, J., Dziugaite, G. K., Roy, D. M., &amp;amp; Carbin, M. (2020). &lt;i&gt;Linear Mode Connectivity and the Lottery Ticket Hypothesis&lt;/i&gt;.&lt;/p&gt; &lt;p&gt;Zhang, Q., Zuo, S., Liang, C., Bukharin, A., He, P., Chen, W., &amp;amp; Zhao, T. (2022). PLATON: Pruning Large Transformer Models with Upper Confidence Bound of Weight Importance. In K. Chaudhuri, S. Jegelka, L. Song, C. Szepesvari, G. Niu, &amp;amp; S. Sabato (Eds.), &lt;i&gt;Proceedings of the 39th International Conference on Machine Learning&lt;/i&gt; (Vol. 162, pp. 26809–26823). PMLR. https://proceedings.mlr.press/v162/zhang22ao.html&lt;/p&gt; &lt;p&gt;Kwon, W., Kim, S., Mahoney, M. W., Hassoun, J., Keutzer, K., &amp;amp; Gholami, A. (2022). A fast post-training pruning framework for transformers. &lt;i&gt;Advances in Neural Information Processing Systems&lt;/i&gt;, &lt;i&gt;35&lt;/i&gt;, 24101–24116.&lt;/p&gt; &lt;p&gt;Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., &amp;amp; Houlsby, N. (2021). &lt;i&gt;An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale&lt;/i&gt;.&lt;/p&gt; &lt;p&gt;Shen, X., Kong, Z., Qin, M., Dong, P., Yuan, G., Meng, X., Tang, H., Ma, X., &amp;amp; Wang, Y. (2023). &lt;i&gt;Data Level Lottery Ticket Hypothesis for Vision Transformers&lt;/i&gt;.&lt;/p&gt; &lt;p&gt;Chen, T., Frankle, J., Chang, S., Liu, S., Zhang, Y., Wang, Z., &amp;amp; Carbin, M. (2020). &lt;i&gt;The Lottery Ticket Hypothesis for Pre-trained BERT Networks&lt;/i&gt;.&lt;/p&gt; </content> </entry> <entry> <title>Transformers vs. RNNs: How do findings from real-world datasets relate to the theory?</title> <link href="https://deep-learning-mit.github.io/blog/2023/TransformersAndRNNs/"/> <updated>2023-12-01T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/TransformersAndRNNs</id> <content type="html">&lt;h2 id=&quot;introduction--motivation&quot;&gt;Introduction &amp;amp; Motivation&lt;/h2&gt; &lt;p&gt;Since their invention, transformers have quickly surpassed RNNs in popularity due to their efficiency via parallel computing &lt;d-cite key=&quot;qin2023hierarchically&quot;&gt;&lt;/d-cite&gt;. They do this without sacrificing, and often improving, model accuracy. Liu et al has developed a theorhetical explanation for this by mathematically proving that transformers learn shortcuts to automata that RNNs are unable to take &lt;d-cite key=&quot;liu2023transformers&quot;&gt;&lt;/d-cite&gt;. However, the results of this proof were only tested on synthetic dataset, and the question of how Transformers perform better than RNNs on memory-based tasks without keeping track of recurrence is still relevant, particularly for developers &lt;d-cite key=&quot;liu2023transformers&quot;&gt;&lt;/d-cite&gt;. In this project, I analyzed and compared the performance of transformer and RNN based models on both a financial stock and medical ECG dataset. By practically testing the applications of RNNs and Transformers in two different settings, I aim to aid developers by suggesting considerations for them to have while choosing an architecture to work with.&lt;/p&gt; &lt;h2 id=&quot;background--prior-work&quot;&gt;Background &amp;amp; Prior Work&lt;/h2&gt; &lt;p&gt;Recurrent neural networks (RNN) are a type of neural network that were previously considered state-of-the-art for generating predictions on sequential data including speech, financial data, and video &lt;d-cite key=&quot;Kanagachidambaresan2021&quot;&gt;&lt;/d-cite&gt;. RNNs are distinct from other types of neural networks beecause they had an internal “memory” &lt;d-cite key=&quot;mhaskar2016learning&quot;&gt;&lt;/d-cite&gt;. This memory was based on the netowrk not only taking in the current input into its function but also information learned from all of the previous inputs &lt;d-cite key=&quot;Kanagachidambaresan2021&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;However, since Transformers were invented in 2017, they have rapidly made the use RNNs obsolete &lt;d-cite key=&quot;qin2023hierarchically&quot;&gt;&lt;/d-cite&gt;. Transformers were proposed as a simple network architecture based only on an attention mechanism, without the need for recurrence or convolutions &lt;d-cite key=&quot;NIPS2017_3f5ee243&quot;&gt;&lt;/d-cite&gt;. While they are quadratic in time complexity &lt;d-cite key=&quot;keles2022computational&quot;&gt;&lt;/d-cite&gt;, as opposed to RNNs’ linear complexity, their ability to parallelize makes them significantly faster to train &lt;d-cite key=&quot;NIPS2017_3f5ee243&quot;&gt;&lt;/d-cite&gt; than RNNs. Their popularity is broad, and Transformers are considered to be the gold standard in many fields including natural language processing, computer vision, and signal processing &lt;d-cite key=&quot;islam2023comprehensive&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;While Transformers were accepted to perform better, the question remained – why? Transformers do not keep track of recurrence but are somehow able to successfully complete memory-based tasks. Liu et al aimed to answer this question by exploring how transformers learn shortcuts to automata &lt;d-cite key=&quot;liu2023transformers&quot;&gt;&lt;/d-cite&gt;. They did this both by mathematical proof and also through experimentation on synthetic data sets. Their primary conclusion is that transformers are able to universally approximate these complex functions in few layers by building simple parallel circuits. This leads to improvement in computational efficiency and also performance improvements &lt;d-cite key=&quot;liu2023transformers&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Current research in the RNN space is largely focused on trying to leverage their inherently linear complexity to its advantage &lt;d-cite key=&quot;peng2023rwkv&quot;&gt;&lt;/d-cite&gt;, but I wondered if there were current applications where RNNs might already perform similarly or better. There are few papers in this space, and the ones that exist largely focus on a domain-specific performance mechanism &lt;d-cite key=&quot;10.3389/fnbot.2023.1157957&quot;&gt;&lt;/d-cite&gt;. With that, I wanted to characterize performance of RNNs versus Transformers across a range of metrics: including accuracy, training time, memory, and parameters. This project acts as an extension of Liu et al by looking at real-world datasets from different applications and seeing if the conclusions change in the real-world. This aims to fill a gap between practical applications and mathematical explanations by attempting to provide an intuitive understanding of real-world dataset performance between transformers and RNNs. This would provide a holsitic summary of the tradeoffs between RNNs and Transformers while aiming to provide insight as to why Transformers remain so dominant in the space.&lt;/p&gt; &lt;h2 id=&quot;methods--results&quot;&gt;Methods &amp;amp; Results&lt;/h2&gt; &lt;h3 id=&quot;data&quot;&gt;Data&lt;/h3&gt; &lt;p&gt;The first dataset I used was Yahoo Finance’s stock dataset, accessible through the yfinance API. I specifically looked at the closing price data from the S&amp;amp;P500 stock group which represents the stocks from the 500 largest companies. The second dataset I used was from Kaggle (available &lt;a href=&quot;https://www.kaggle.com/datasets/shayanfazeli/heartbeat/&quot;&gt;here&lt;/a&gt;). This dataset captures ECG data. I specifically used the abnormal and normal sub datasets that contained single-heart beat single-lead ECG data.&lt;/p&gt; &lt;h3 id=&quot;software&quot;&gt;Software&lt;/h3&gt; &lt;p&gt;I ran all of the code for this project using Python 3.10 in Google Colab. The APIs numpy, scipy, matplotlib, seaborn, keras, tensorflow, and yfinance were all used. The notebook used for the stock experiements is available &lt;a href=&quot;https://colab.research.google.com/drive/1NNZKU18bm7QsZaWOjX8EYD7NFoJK0jkQ#scrollTo=YQfvmYtVU3Yg&quot;&gt;here&lt;/a&gt; and the ECG experiments &lt;a href=&quot;https://colab.research.google.com/drive/1yBfvgPEaUau6ttJV9ufdkgVm6x9y-AI9#scrollTo=MsfkYwwCS6WC&quot;&gt;here&lt;/a&gt;.&lt;/p&gt; &lt;h3 id=&quot;stock-model-comparisons&quot;&gt;Stock Model Comparisons&lt;/h3&gt; &lt;p&gt;I began my experiments by loading and visualizing the data. I wanted to compare the transformer and RNN models on a time-series prediction so I decided to use 11 months of data to predict the next 1 month behavior. To do this, I loaded data from July 1st, 2022 to July 31st 2022. Of note, the stock market is closed during weekends and holidays, so there were 251 days in my dataframe, and I trained on the first 231 days to predict the last 20. I then used an 80/20 train and test split.&lt;/p&gt; &lt;p&gt;I also visualized several iterations of ten random samples to better understand the dataset and ensure that I was preprocessing correctly.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The closing price of 10 random stocks from the S&amp;amp;P 500 over a year timeframe. &lt;/div&gt; &lt;p&gt;Once I had the data set up, I began to build each model. In addition to a simple RNN architecture and a Transformer model, I also built an LSTM model which is a specialized subset of RNNs that aim to solve a vanishing gradient problem in traditional RNNs &lt;d-cite key=&quot;Sherstinsky_2020&quot;&gt;&lt;/d-cite&gt;. In addition to providing another reference model, I decided to include LSTM because I could easily test identical architectures between LSTM and RNNs &lt;d-cite key=&quot;lstmseries&quot;&gt;&lt;/d-cite&gt;. Additionally, LSTMs are notoriously slower to train and this provides a sanity check for my later timing results &lt;d-cite key=&quot;lstmseries&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;In building the models, I tried to keep them all as simple and equivalent as possible for a fair comparison. This was simple for the LSTM and RNN, I just used two LSTM (or RNN) layers followed by a linear layer and then an output linear layer. Because of the different architecture of transformers, it didn’t seem possible to create a completely equivalent architecture. However, I tried to approximate this by having just a singular attention layer that didn’t have a feed foward network component and only had a standard layer normalization and then a multiheaded attention wiht 2 heads (the same number of layers for RNN/LSTM with the head size equivalent to the RNN/LSTM layer size). I followed this with a pooling layer, a linear layer (with the same size as the RNN/LSTM linear layer) and a linear output layer. I trained all models with a batch size of 25 and 30 epochs.&lt;/p&gt; &lt;p&gt;For each model, I measured RMSE for the predictions (used for accuracy), time used to train the model, memory used to train the model, number of parameters, and storage used for parameters. The results are shown in the following table.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th&gt;RMSE&lt;/th&gt; &lt;th&gt;Memory in Training (KB)&lt;/th&gt; &lt;th&gt;Time to Train (s)&lt;/th&gt; &lt;th&gt;Parameters (#)&lt;/th&gt; &lt;th&gt;Memory for Parameters (KB)&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;LSTM&lt;/td&gt; &lt;td&gt;155.61&lt;/td&gt; &lt;td&gt;16575097&lt;/td&gt; &lt;td&gt;151.76&lt;/td&gt; &lt;td&gt;54190&lt;/td&gt; &lt;td&gt;211.68&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;RNN&lt;/td&gt; &lt;td&gt;149. 07&lt;/td&gt; &lt;td&gt;4856823&lt;/td&gt; &lt;td&gt;67.25&lt;/td&gt; &lt;td&gt;16750&lt;/td&gt; &lt;td&gt;65.43&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Transformers&lt;/td&gt; &lt;td&gt;36.46&lt;/td&gt; &lt;td&gt;3165225&lt;/td&gt; &lt;td&gt;87.00&lt;/td&gt; &lt;td&gt;2019&lt;/td&gt; &lt;td&gt;7.89&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;As expected, the LSTM model runs much slower with higher memory usage which is consistent with literature models &lt;d-cite key=&quot;lstmseries&quot;&gt;&lt;/d-cite&gt;. When comparing the models, on first look, it seems like the Transformer model greatly outperforms the other models, both in terms of accuracy and memory with comprable training times to the RNN. However, I noticed something odd when I visualized my results: while transformers performed better in RMSE, it didn’t look like they performed better.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Performance of initial model experiments. Before the black line, is the training data of the closing price, after the line, the testing data as well as the predictions for all three models are shown. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The testing data compared to the predictions for all three models. &lt;/div&gt; &lt;p&gt;Besides noting that the models all did fairly well given their simplicity, this was puzzling. Addditionally, when I reran the models, I noted that the RMSE values for the LSTM/RNN models varied wildly with results between 50-550 whereas transformer’s performance was consistently around 35. To investigate, I printed out the RMSE for each prediction and analyzed them. I found that most errors were fairly small but there were a couple very large errors that ended up skewing the overall reported average. In visualizing that outlier and performance between the models, I saw that the prices for the outliers were much higher than most stocks, making the LSTM/RNN models predict a much lower price.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Outlier visualization of one stock with all three model&apos;s performance. &lt;/div&gt; &lt;p&gt;Transformers still do okay here, likely do to the first normalization layer I used. Thus, to make the problem more equal, I decided to normalize all of the data at the onset.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Sample of data once normalized. &lt;/div&gt; &lt;p&gt;When rerunning the training, the tabular results match the visualizations. Surprisingly, Transformers perform worse than RNNs/LSTMs, with less memory used but no real difference in training time. Even with adding complexity to the Transformer model via increasing the feed-forward network complexity through increasing the size of the embedded feed forward network and increasing the number of attention layers, no performance difference was seen – the time to train just substantially increased.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th&gt;RMSE&lt;/th&gt; &lt;th&gt;Memory in Training (KB)&lt;/th&gt; &lt;th&gt;Time to Train (s)&lt;/th&gt; &lt;th&gt;Parameters (#)&lt;/th&gt; &lt;th&gt;Memory for Parameters (KB)&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;LSTM&lt;/td&gt; &lt;td&gt;0.125&lt;/td&gt; &lt;td&gt;8233179&lt;/td&gt; &lt;td&gt;128.25&lt;/td&gt; &lt;td&gt;54190&lt;/td&gt; &lt;td&gt;211.68&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;RNN&lt;/td&gt; &lt;td&gt;0.121&lt;/td&gt; &lt;td&gt;4147757&lt;/td&gt; &lt;td&gt;87.58&lt;/td&gt; &lt;td&gt;16750&lt;/td&gt; &lt;td&gt;65.43&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Transformers&lt;/td&gt; &lt;td&gt;0.281&lt;/td&gt; &lt;td&gt;3148379&lt;/td&gt; &lt;td&gt;87.38&lt;/td&gt; &lt;td&gt;2019&lt;/td&gt; &lt;td&gt;7.89&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Complicated Transformers&lt;/td&gt; &lt;td&gt;0.282&lt;/td&gt; &lt;td&gt;40052260&lt;/td&gt; &lt;td&gt;1243.01&lt;/td&gt; &lt;td&gt;16248&lt;/td&gt; &lt;td&gt;63.47&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Performance of model experiments with normalized data. Before the black line, is the training data of the closing price, after the line, the testing data as well as the predictions for all three models are shown. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; The testing data compared to the predictions for all three models with normalized data. &lt;/div&gt; &lt;p&gt;This seems to go against prior results which almost universally found Transformers faster without sacrificing efficiency &lt;d-cite key=&quot;qin2023hierarchically&quot;&gt;&lt;/d-cite&gt;. I hypothesized that this could be because Transformers generally have poor length generalization and perhaps this is an inoptimal time window for this particular model &lt;d-cite key=&quot;anil2022exploring&quot;&gt;&lt;/d-cite&gt;. This could also explain the lack of a time improvement as a transformer’s time complexity is quadtratic whereas RNN’s is linear «d-cite key=”peng2023rwkv”&amp;gt;&amp;lt;/d-cite&amp;gt;, &lt;d-cite key=&quot;keles2022computational&quot;&gt;&lt;/d-cite&gt;. Presumably, the quadratic performance will slow down the Transformer when parallelization becomes impossible &lt;d-cite key=&quot;peng2023rwkv&quot;&gt;&lt;/d-cite&gt;. To test this hypothesis, I decided to try to predict my results with a smaller time window.&lt;/p&gt; &lt;p&gt;I did this by predicting the closing price of just one day of data using a week of prior data. I normalized all data and retrained my models. I reverted back to the simple transformer model in an effort to test relatively equivalent model complexities.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th&gt;RMSE&lt;/th&gt; &lt;th&gt;Memory in Training (KB)&lt;/th&gt; &lt;th&gt;Time to Train (s)&lt;/th&gt; &lt;th&gt;Parameters (#)&lt;/th&gt; &lt;th&gt;Memory for Parameters (KB)&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;LSTM&lt;/td&gt; &lt;td&gt;0.386&lt;/td&gt; &lt;td&gt;9588885&lt;/td&gt; &lt;td&gt;19.00&lt;/td&gt; &lt;td&gt;53221&lt;/td&gt; &lt;td&gt;207.89&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;RNN&lt;/td&gt; &lt;td&gt;0.381&lt;/td&gt; &lt;td&gt;4197690&lt;/td&gt; &lt;td&gt;13.45&lt;/td&gt; &lt;td&gt;15781&lt;/td&gt; &lt;td&gt;61.64&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Transformers&lt;/td&gt; &lt;td&gt;0.384&lt;/td&gt; &lt;td&gt;2707340&lt;/td&gt; &lt;td&gt;11.45&lt;/td&gt; &lt;td&gt;1050&lt;/td&gt; &lt;td&gt;4.1&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;As the results show, my hypothesis was correct. The transformer performed much faster without a reduction in accuracy. However, it is also very possible that I didn’t see a time difference because I am using small models with a short training time. These timing differences could become larger with more computationally intensive models.&lt;/p&gt; &lt;h2 id=&quot;ecg-model-comparisons&quot;&gt;ECG Model Comparisons&lt;/h2&gt; &lt;p&gt;While the results from the stock dataset were interesting, I also wanted to test these models with a different type of input that perhaps would capture different underlying strengths and weaknesses of the models. I decided to use an ECG to predict the presence of an abnormality in the heart beat. This represents a difference in the stock dataset in three key ways:&lt;/p&gt; &lt;p&gt;1) The output is binary instead of discrete. 2) There is a better source of ground truth for this data. If there was a definitive way to predict the behavior of a stock, everyone would be rich, but that’s not the case – there’s inherently uncertainty and an expected level of innaccuracy. For health data, the person will have the condition or not and an experienced cardiologist would be able to definitively diagnose the patient. 3) The input has an expected, structured shape. All ECGs are supposed to look roughly the same and should have a similar visibility in the dataset. This has effects on the causality window used in models that I was interested in analyzing.&lt;/p&gt; &lt;p&gt;I first visualized my data for both the abnormal and normal heart beats. The overall sample size was around 9000 patients, and I artificially created a 50/50 split between abnormal and normal to prevent class imbalance. I once again used an 80/20 train/test split for my models.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/ECG-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/ECG-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/ECG-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/ECG.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Data visualization for ECG. &lt;/div&gt; &lt;p&gt;I immediately ran into difficulties once I began training with the performance of all models really being a coin toss between the two. I then focused my time on trying to build more complex models. For the RNN, I added more layers with varying dimensions and played around with adding dropout and linear layers. For the Transformer, I built up the feedforward network part of the algorithm by increasing the size of the embedded feed forward network and adding multiple attention layers. For both, I tuned hyperparameters such as the optimizer, batch size, and number of epochs. Despite this results still remined poor.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/Loss.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Loss curve for both models on training and validation data. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/ROC-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/ROC-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/ROC-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-TransformersAndRNNs/ROC.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; ROC curves for both models with displayed AUC values. &lt;/div&gt; &lt;p&gt;There is virutally no reduction on validation loss for the RNN graph, no matter what structure I chose. While there is a normal looking curve for transformer, the scale of loss reduction is very small when you consider the y-axis. Additionally, the RNN network never performed better than randomly, whereas the Transformer network was only slightly improved.&lt;/p&gt; &lt;p&gt;One interpretation of these results could be that the Transformer model performed better. However, because neither of these architectures perfomred overly sucessfully, I don’t think that is a sound conclusion. It is unclear to me if this is a shortcoming of my code or a difficulty with the problem and dataset. This would be an area where future work is required.&lt;/p&gt; &lt;p&gt;My main takeaway from this process of working with the ECG data was how much easier it was to tune and problemsolve with the Transformer than the RNN. For the Transformer, I was able to adjust the number of heads or the sizes of heads, or the feed foward network, etc, whereas, in the RNN, I really could only play with the layers of the RNN itself. While both of these architectures have black-box components, I found the Transformer a lot easier to work and play around with as a developer, and I could develop some intuition on what things I should change and why. This perhaps represents another difference from the transformer vs RNN debate but from a usability standpoint.&lt;/p&gt; &lt;h2 id=&quot;limitations&quot;&gt;Limitations&lt;/h2&gt; &lt;p&gt;There are several limitations of this project. First, I only analyzed a couple of different datasets. This is not representative of all of the different applications of GNNs and transformers, meaning the conclusions are limited to the datasets chosen and are not necessarily representative of the full field. Additionally, my implementation of the models may not be the most efficient ones. While I tried to test a wide range of parameters, due to limited resources available (ie time and being one person) there are undoubtably more optimal structures or hyperparameters that I did not test. This ability to not only test a limited number of parameters, but also architectures remains an overall limitation and challenge of the deep learning field &lt;d-cite key=&quot;limitation&quot;&gt;&lt;/d-cite&gt;. Noteably, I also worked with small datasets and models. While this was useful in running a lot of experiments quickly, the differences in architectures, especialy in regards to training time/memory, may be more important and more pronounced in larger-scale tasks.&lt;/p&gt; &lt;p&gt;Additionally, I did not test every metric of success. While I focused on number of trainable parameters, training time, memory, and accuracy – these are not the only things that matter in machine learning. For instance, in some applications, senstivity might matter a lot more than specificity and overall accuracy. In others, explainability of the model may be essential, such as time sensitive healthcare settings &lt;d-cite key=&quot;limitationexplain&quot;&gt;&lt;/d-cite&gt;. Thus, in making a decision in chosing one model over another, it is important to consider the specific application and benefits of each approach. These methods of success also might not be “fair” to each model. For instance, a much larger number of trainable parameters may be fine if overall time is not lost. If one has the capacity to run their machine over night but with limited memory, they might reevaluate these metrics of sucess. Given these limitations, it is important to do a holistic evaluation of architectures when deciding which to use for a deep learning project and to use this project as a guidelines of preliminary experiments to run in making that decision. This project does not serve as a ground truth as to why to choose one model over another.&lt;/p&gt; &lt;h2 id=&quot;conclusions&quot;&gt;Conclusions&lt;/h2&gt; &lt;p&gt;Transformers seem to be easier to work with when there are still questions surrounding the data. For instance, with the stock dataset, there may be circumstances where you would prefer a model that can perform well prior without normalizing the dataset if for instance, you care about the magnitude of closing prices between stocks. Similarly, for the ECG model, they were easier to tune with different hyper paramters and felt more intuitive in comparison to working with the RNN. Transformers also consistently used less memory with much fewer parameters across the board, which is important when working in resource-limited systems.&lt;/p&gt; &lt;p&gt;However, this project found that transformers are not always faster or more accurate than alternatives. While Liu et al found that typical transformers can find shortcuts to learn automata &lt;d-cite key=&quot;liu2023transformers&quot;&gt;&lt;/d-cite&gt;, this might not be the case for all datasets across all applications. Previous studies have found that length generalization is a deficiency of transformers &lt;d-cite key=&quot;anil2022exploring&quot;&gt;&lt;/d-cite&gt;, and this is supported by this project which found that for longer-term predictions, RNNs were the faster and more accurate approach post-normalization.&lt;/p&gt; &lt;p&gt;These findings underscore the importance of taking the time to test different architectures in your resarch and not assuming that just because Transformers are more popular, it doesn’t mean they are necessarily the best fit for your problem. In deep learning research, we often get bogged down in tuning a model and it’s important to take a step back and consider your assumptions about the task – which may include the broader model consideration.&lt;/p&gt; </content> </entry> <entry> <title>Exploring the latent space of text-to-image diffusion models</title> <link href="https://deep-learning-mit.github.io/blog/2023/latent-interpolation/"/> <updated>2023-12-01T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/latent-interpolation</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Diffusion models &lt;d-cite key=&quot;ho2020denoising&quot;&gt;&lt;/d-cite&gt; are a class of deep generative models that have shown promising results in many different tasks, including photorealistic image generation &lt;d-cite key=&quot;saharia2022photorealistic&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;ramesh2022hierarchical&quot;&gt;&lt;/d-cite&gt; and protein design &lt;d-cite key=&quot;watson2023novo&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;lee2023score&quot;&gt;&lt;/d-cite&gt;. Diffusion models work by gradually destroying structure of an object with $T$ steps of a fixed noising process, and then learning to reverse this process to recover the original object. This allows the model to learn the underlying structure of the data, and to generate new objects that are both realistic and diverse. The forward process $q( x_t | x_{t-1} )$ defines how noise is added to an original image $x_0$, and the reverse process $q( x_{t-1} | x_{t} )$ that we want to learn, can recover a less noisy version of an image.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Stable Diffusion (SD) &lt;d-cite key=&quot;rombach2022high&quot;&gt;&lt;/d-cite&gt; is an open-source latent text-to-image diffusion model which is able to realize images with fine grained details, when prompted with a textual input describing the desired characteristics of the output image. SD is reasonably fast compared to other diffusion models, since it performs the diffusion steps in a low dimensional latent space. The strategy consists of using an image encoder $\mathcal{E}: \mathcal{X} \rightarrow \mathcal{Z}^0$ which maps an image $x_0 \in \mathcal{X}$ to a lower dimensional image latent code $z_0 \in \mathcal{Z}^0$, and a latent decoder $\mathcal{D}: \mathcal{Z}^0 \rightarrow \mathcal{X}$ which recovers an image $\mathcal{D}(z_0)$ from the image latent code $z_0$. Using these two models it is possible to learn to denoise $z_T$, instead of $x_T$, which is also normally distributed, saving a lot in computing since the latent codes dimensionality are usually chosen to be much smaller than the original images dimensionality. During inference time, for a given input textual prompt $y$, we encode the prompt into a vector $s = \tau_\phi(y)$ using CLIP &lt;d-cite key=&quot;radford2021learning&quot;&gt;&lt;/d-cite&gt;, sample $z_T \sim \mathcal{N}(0, I)$, and provide these two tensors to the diffusion model $f_\theta: \mathcal{Z}^T \times \mathcal{S} \rightarrow \mathcal{Z}^0$, which generates $z_0 = f_\theta(z_T, s)$. We can then map this vector into an image using the decoder: $x_0 = \mathcal{D}(z_0)$ which hopefully is in the data distribution.&lt;/p&gt; &lt;h2 id=&quot;background-and-related-work&quot;&gt;Background and related work&lt;/h2&gt; &lt;p&gt;In order to be able to learn the complex interaction between textual descriptions and images coming from a very large multimodal dataset, SD has to organize its image latent space $\mathcal{Z}^T$ coherently. If the learned representations are smooth for instance, we could expect that $\mathcal{D}(f_\theta(z_T, s))$ and $\mathcal{D}(f_\theta(z_T + \epsilon, s))$, where $\epsilon$ is a tensor of same dimensionality as $z_T$ with values very close to 0, will be very similar images. A common technique to explore and interpret the latent space of generative models for images is to perform latent interpolation between two initial latent codes, and generate the $N$ images corresponding to each of the interpolated tensors. If we sample $z_\text{start}, z_\text{end} \sim \mathcal{N}(0, I)$, fix a textual prompt such that $s = \tau_\phi({y})$ and use SD to generate images conditioned on the textual information we could explore different techniques for generating interpolated vectors. A very common approach is linear interpolation, where for $\gamma \in [0, 1]$ we can compute:&lt;/p&gt; \[z_\text{linear}^{(\gamma)} = (1-\gamma)z_\text{start} + \gamma z_\text{end}\] &lt;p&gt;Mimicking these exact steps for three different pairs sampled latent codes for $(z_\text{start}, z_\text{end})$, and for each of them fixing a text prompt we get:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/latent_interpolation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/latent_interpolation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/latent_interpolation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/latent_interpolation.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;As we can see from the image, when we move away from both $z_\text{start}$ and $z_\text{end}$ we get blurred images after decoding the interpolated image latent codes, which have only high level features of what the image should depict, but no fine grained details, for $\gamma = 0.5$ for instance, we get:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/extreme_case-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/extreme_case-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/extreme_case-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/extreme_case.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In contrast, if we perform interpolation in the text space by sampling $z_T \sim \mathcal{N}(0, I)$, which is kept fixed afterwards, and interpolating between two text latent codes $s_\text{start} = \tau_\phi(y_\text{start})$ and $s_\text{end} = \tau_\phi(y_\text{end})$, we get something more coherent:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/text_interpolation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/text_interpolation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/text_interpolation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/text_interpolation.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Latent interpolation is a very common technique in Machine Learning, particularly in generative models, &lt;d-cite key=&quot;gomez2018automatic&quot;&gt;&lt;/d-cite&gt; used interpolation in the latent space of a Variational Autoencoder (VAE) &lt;d-cite key=&quot;kingma2013auto&quot;&gt;&lt;/d-cite&gt; to generated molecules between two initial ones by encoding them in the VAE latent space, interpolating between them and using the decoder to obtain the molecules from the latents, &lt;d-cite key=&quot;upchurch2017deep&quot;&gt;&lt;/d-cite&gt; showed how interpolation can be used to perform semantic transformations on images, by changing features of a CNN. More broadly interpolation has also been studied in a probabilistic point of view &lt;d-cite key=&quot;lesniak2018distribution&quot;&gt;&lt;/d-cite&gt;, evaluating how different techniques might generate out of distribution samples, which we explore later in this blog post.&lt;/p&gt; &lt;p&gt;In this project we explore geometric properties of the image latent space of Stable Diffusion, gaining insights of how the model organizes information and providing strategies to navigate this very complex latent space. One of our focuses here is to investigate how to better interpolate the latents such that the sequence of decoded images is coherence and smooth. Depending on the context, the insights here could transferred to other domains as well if the sampling process is similar to the one used in SD. The experiments are performed using python and heavily relying on the PyTorch &lt;d-cite key=&quot;paszke2019pytorch&quot;&gt;&lt;/d-cite&gt;, Transformers &lt;d-cite key=&quot;wolf-etal-2020-transformers&quot;&gt;&lt;/d-cite&gt; and Diffusers &lt;d-cite key=&quot;von-platen-etal-2022-diffusers&quot;&gt;&lt;/d-cite&gt; libraries.&lt;/p&gt; &lt;h2 id=&quot;method&quot;&gt;Method&lt;/h2&gt; &lt;p&gt;In this section we compare several interpolation techniques. For reproducibility reasons we ran the experiments with the same prompt and sample latent vectors across different. We use Stable Diffusion version 1.4 from CompVis with the large CLIP vision transformer, the DPMSolverMultistepScheduler &lt;d-cite key=&quot;lu2022dpm&quot;&gt;&lt;/d-cite&gt;, 30 inference steps and a guidance scale of 7.5 &lt;d-cite key=&quot;dhariwal2021diffusion&quot;&gt;&lt;/d-cite&gt;. We use the prompt “An high resolution photo of a cat” and seed = 1 to generate both $z_\text{start}$ and $z_\text{end}$. The corresponding generated pictures are shown below:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/endpoint_images-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/endpoint_images-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/endpoint_images-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/endpoint_images.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;linear-interpolation&quot;&gt;Linear Interpolation&lt;/h3&gt; &lt;p&gt;Although linear interpolation is still a very commonly used interpolation technique, it is known that is generates points which are not from the same distribution than the original data points &lt;d-cite key=&quot;agustsson2018optimal&quot;&gt;&lt;/d-cite&gt; depending on the original distribution of the points being interpolated. Particularly, for $z_{\text{start}}, z_{\text{end}} \sim \mathcal{N}(0, I)$ and $\gamma \in [0,1]$, we have:&lt;/p&gt; \[z_\text{linear}^{(\gamma)} = (1-\gamma)z_\text{start} + \gamma z_\text{end}\] &lt;p&gt;Hence:&lt;/p&gt; \[\begin{eqnarray} \mathbb{E}\left[z_\text{linear}^{(\gamma)}\right] &amp;amp;=&amp;amp; \mathbb{E}\left[(1-\gamma)z_\text{start} + \gamma z_\text{end}\right] \nonumber \\ &amp;amp;=&amp;amp; \mathbb{E}[(1-\gamma)z_\text{start}] + \mathbb{E}[\gamma z_\text{end}] \nonumber \\ &amp;amp;=&amp;amp; (1-\gamma)\mathbb{E}[z_\text{start}] + \gamma \mathbb{E}[z_\text{end}] \nonumber \\ &amp;amp;=&amp;amp; 0 \nonumber \end{eqnarray}\] &lt;p&gt;Therefore, the mean stays unchanged, but the variance is smaller than 1 for $\gamma \in (0,1)$:&lt;/p&gt; \[\begin{eqnarray} \text{Var}[z_\text{linear}^{(\gamma)}] &amp;amp;=&amp;amp; \text{Var}[(1-\gamma)z_\text{start} + \gamma z_\text{end}] \nonumber \\ &amp;amp;=&amp;amp; \text{Var}[\gamma z_\text{start}] + \text{Var}[(1-\gamma)z_\text{end}] \nonumber \\ &amp;amp;=&amp;amp; \gamma^2\text{Var}[z_\text{start}] + (1-\gamma)^2\text{Var}[z_\text{end}] \nonumber \\ &amp;amp;=&amp;amp; \gamma(2\gamma - 2)I + I \nonumber \\ &amp;amp;=&amp;amp; (\gamma(2\gamma - 2) + 1)I \nonumber \end{eqnarray}\] &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/linear_interpolation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/linear_interpolation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/linear_interpolation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/linear_interpolation.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Given that the sum of two independent Gaussian distributed random variables results in a Gaussian distributed random variable, $z_\text{linear}^{(\gamma)} \sim \mathcal{N}(0, (\gamma(2\gamma - 2) + 1)I)$. This shows how the distribution of the interpolated latent codes change. To further understand the effect of this shift, we can use the interactive figure below. Where for $\text{std} \in [0.5, 1.5]$ we generate an image using the embedding $\text{std} \, z_\text{start}$:&lt;/p&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-01-latent-interpolation/variance.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;h3 id=&quot;normalized-linear-interpolation&quot;&gt;Normalized linear interpolation&lt;/h3&gt; &lt;p&gt;As shown before, linear interpolation is not a good technique for interpolation random variables which are normally distributed, given the change in the distribution of the interpolated latent vectors. To correct this distribution shift, we can perform a simply normalization of the random variable. We will refer this this as normalized linear interpolation. For $\gamma \in [0,1]$ we define $z_\text{normalized}^{(\gamma)}$ as:&lt;/p&gt; \[z_\text{normalized}^{(\gamma)} = \dfrac{z_\text{linear}^{(\gamma)}}{\sqrt{(\gamma(2\gamma - 2) + 1)}} \implies z_\text{normalized}^{(\gamma)} \sim \mathcal{N}(0, I)\] &lt;p&gt;Now, as we move further way from the endpoints $z_\text{start}$ and $z_\text{end}$, we still get coherent output images:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/normalized_interpolation.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;slerp&quot;&gt;SLERP&lt;/h3&gt; &lt;p&gt;Spherical Linear Interpolation (Slerp) &lt;d-cite key=&quot;shoemake1985animating&quot;&gt;&lt;/d-cite&gt;, is a technique used in computer graphics and animation to smoothly transition between two orientations, especially rotations. If we let $\phi = \text{angle}(z_\text{start}, z_\text{start})$, then for $\gamma \in [0,1]$, the interpolated latent is defined by:&lt;/p&gt; \[\text{slerp}(z_\text{start}, z_\text{end}; t) = \dfrac{\sin((1-\gamma)\phi)}{\sin(\phi)}z_\text{start} + \dfrac{\sin(\gamma\phi)}{\sin(\phi)}z_\text{end}\] &lt;p&gt;where $\phi$ is the angle between $z_\text{start}$ and $z_\text{end}$. The intuition is that Slerp interpolates two vectors along the shortest arc. We use an implementation of Slerp based on Andrej Karpathy &lt;d-cite key=&quot;Karpathy2022&quot;&gt;&lt;/d-cite&gt;. As we can see from the images below, slerp generates very good quality interpolated vectors.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/slerp_interpolation.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;If we compare the obtained results with normalized linear interpolation we see that the generated images are very similar, but as opposed to normalized linear interpolation, we cannot easily theoretically analyze the distribution of generated latents. To have some intuition behind how these different techniques interpolate between two vectors and can sample and fix two vectors sampled from a 2-dimensional normal distribution. We can visualize how these trajectories compare with each other:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/interpolations_comparison.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;translation&quot;&gt;Translation&lt;/h3&gt; &lt;p&gt;To further investigate some properties of the latent space we also perform the following experiment. Let $z_\text{concat} \in \mathbb{R}^{4 \times 64 \times 128}$ be the concatenation of $z_\text{start}$ and $z_\text{end}$ over the third dimension. We will denote by $z_\text{concat}[i, j, k] \in \mathbb{R}$ as a specific element of the latent code and $:$ as the operator that selects all the elements of that dimension and $m:n$ the operator that selects from elements $m$ to element $n$ of a specific dimension. We can create a sliding window over the concatenated latent and generated the corresponding images. We define the translation operator $\mathcal{T}$ such that $\mathcal{T}(z_\text{concat}; t) = z_\text{concat}[:, :, t:64+t]$, which is defined for $t = {0, \cdots, 64}$. The sequence of generated images can be visualized below using our interactive tool:&lt;/p&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-01-latent-interpolation/translation.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;p&gt;Surprisingly, we note that applying $\mathcal{T}$ to our concatenated latent code is materialized into a translation in image space as well. But not only the object translates, we also see changes in the images style, which is justified by changing some of the latent dimensions.&lt;/p&gt; &lt;p&gt;We can correct this behavior by mixing the two latent codes only in a single slice of the latent code. Let $\mathcal{C}(z_\text{start}, z_\text{end}; t)$ represent the concatenation of $z_\text{start}[:, :, 64:64+t]$ and $z_\text{end}[:, :, t:64]$ along the third dimension. With this transformation we obtain the following:&lt;/p&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-01-latent-interpolation/corrected_translation.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;p&gt;Hence, translation is also a valid interpolation technique and could be further expanded to generate an arbitrary size of latent vectors.&lt;/p&gt; &lt;h2 id=&quot;analysis&quot;&gt;Analysis&lt;/h2&gt; &lt;p&gt;In order to evaluate the quality of the generated interpolations we use CLIP, a powerful technique for jointly learning representations of images and text. It relies on contrastive learning, by training a model to distinguish between similar and dissimilar pairs of images in a embedding space using a text and an image encoder. If a (text, image) pair is such that the textual description matches the image, the similarity between the CLIP embeddings of this pair should be high:&lt;/p&gt; \[\text{CLIPScore(text,image)} = \max \left(100 \times \dfrac{z_{\text{text}} \cdot z_{\text{image}}}{ \lVert z_{\text{text}} \rVert \lVert z_{\text{image}} \rVert}, 0 \right)\] &lt;p&gt;For each interpolation strategy $f \in \{\text{linear}, \text{normalized}, \text{slerp}\}$ presented, we fix the prompt $\text{text} = $ “A high resolution image of a cat” and generate $n = 300$ interpolated latents $f(z_\text{start}, z_\text{end}, \gamma) = z_f^{(\gamma)}$ with $\gamma = \{0, \frac{1}{n-1}, \frac{1}{n-2}, \cdots, 1\}$. We then generate the images $x_f^{(\gamma)}$ from the interpolated latents, finally we use the CLIP encoder $\mathcal{E}_\text{CLIP}$ on the generated images to create image embeddings that can be compared with the text embedding the we define Interpolation Score $\text{InterpScore}(f, \text{text}, n)$ as:&lt;/p&gt; \[\text{InterpScore}(f, \text{text}, n) = \dfrac{1}{n} \sum_{\gamma \in \{0, \frac{1}{n-1}, \frac{1}{n-2}, \cdots, 1\}} \max \left(100 \times \dfrac{z_{\text{text}} \cdot \mathcal{E}_\text{CLIP}(x_\text{f}^{(\gamma)})}{ \lVert z_{\text{text}} \rVert \lVert \mathcal{E}_\text{CLIP}(x_\text{f}^{(\gamma)}) \rVert}, 0 \right)\] &lt;p&gt;Applying these steps we obtained the following results:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/clip_scores-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/clip_scores-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/clip_scores-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/clip_scores.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Surprisingly, linear interpolation performed better than normalized linear and slerp, this could indicate that CLIP scores might not be a good metric for image and text similarity in this context. Given that in this class project the main goal was to gain insights, as future work we could run a large scale experiment to check whether this behavior would be repeated. We can also visually inspect the quality of the interpolation by generating a video for each interpolation. From left to right we have images generated from latents from linear, normalized and slerp interpolations respectively:&lt;/p&gt; &lt;iframe width=&quot;720&quot; height=&quot;480&quot; src=&quot;https://www.youtube.com/embed/6dEGSbam11o&quot;&gt; &lt;/iframe&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;This work shows the importance of choosing an interpolation technique when generating latent vectors for generative models. It also provides insights of the organization of the latent space of Stable Diffusion, we showed how translations of the latent code corresponds to translations on image space as well (but also changes in the image content). Further investigation of the organization of the latent space could be done, where we could try for instance, to understand how different dimensions of the latent code influence the output image. As an example, if we fix a image latent and use four different prompts, which are specified in the image below, we get:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/latent_dim-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/latent_dim-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-latent-interpolation/latent_dim-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-latent-interpolation/latent_dim.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;As we can see all the generated images have some common characteristics, all the backgrounds, body positions and outfits (both in color and style) of the generated images are very similar. This indicates that even without explicitly specifying those characteristics on the textual prompt, they are present in some dimensions of the image latent code. Hence, the images share those similarities. Understanding how we can modify the latent code such that we change the shirt color in all the images from blue to red would be something interesting. Additionally, we showed some indication that CLIP scores might not be a good proxy for evaluating quality images generated from an interpolation technique.&lt;/p&gt; </content> </entry> <entry> <title>Accelerating large model inference with speculative decoding - 6.s898</title> <link href="https://deep-learning-mit.github.io/blog/2023/speculative-decoding/"/> <updated>2023-11-16T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/speculative-decoding</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;h3 id=&quot;inference-in-autoregressive-models&quot;&gt;Inference in autoregressive models&lt;/h3&gt; &lt;p&gt;Autoregressive models, particularly transformers and RNNs, play a crucial role in tasks involving sequential data processing, such as natural language processing and time series analysis. However, a significant limitation of these models is their slow inference speed. The primary bottleneck in these models is associated with memory reads and writes, rather than arithmetic computations. This is especially problematic in larger models with vast parameter spaces, where efficient memory management is critical to performance. Further, these models generate outputs sequentially, one token at a time, with each new token depending on all previously generated tokens. This inherent sequential dependency limits the model’s ability to parallelize the token generation process, leading to inference latency much greater than that of models capable of processing data in parallel. The challenge is to overcome this sequential bottleneck without compromising the model’s ability to accurately capture dependencies in the data.&lt;/p&gt; &lt;p&gt;The central question this project addresses is whether it’s possible to introduce parallelism into the inference process of autoregressive models. A more specific aspect of this problem is whether probabilities for multiple tokens can be computed simultaneously, rather than processing each token individually. This project aims to enhance methods that have been proposed for parallelizing the decoding process, focusing on solutions that draw inspiration from speculative execution in processors and other systems design strategies.&lt;/p&gt; &lt;h3 id=&quot;speculative-execution-in-processors&quot;&gt;Speculative execution in processors&lt;/h3&gt; &lt;p&gt;Speculative execution is a technique used in CPU architecture to improve processing speed. Instead of waiting for sequential execution of instructions, processors predict which instructions are likely to be executed next and start processing them in advance. If the prediction is correct, this leads to a significant reduction in latency, as the processor has preemptively executed necessary instructions. If the prediction is incorrect, the processor discards the speculative results and reverts to the correct execution path. This method effectively utilizes CPU resources that would otherwise remain idle during the waiting period, thus optimizing the overall processing speed and reducing latency.&lt;/p&gt; &lt;h3 id=&quot;applying-speculative-execution-to-model-inference&quot;&gt;Applying speculative execution to model inference&lt;/h3&gt; &lt;p&gt;Inspired by speculative execution in processors, this project explores how similar principles can be applied to accelerate inference in large autoregressive models. The concept involves generating multiple potential outputs in parallel, using a smaller or draft model, and then evaluating these outputs with the larger target model. This mimics the speculative execution process where multiple paths are explored simultaneously, with the most promising path being selected as the final output. This approach, referred to as “speculative sampling” or “speculative decoding,” aims to introduce a level of parallelism in the inference process, enabling faster generation of outputs without compromising the quality or accuracy of the model’s predictions.&lt;/p&gt; &lt;h3 id=&quot;hierarchical-speculative-decoding&quot;&gt;Hierarchical speculative decoding&lt;/h3&gt; &lt;p&gt;In addition to implementing already proposed speculative decoding techniques, this project investigates a strategy that has the potential further speed up inference: hierarchical speculative decoding. This method aims to accelerate the smaller approximation model with an even smaller, faster model. While I experiment with two-layer (traditional) and three-layer hierarchies in this project, one could theoretically extend this idea to create an &lt;em&gt;n&lt;/em&gt; layer hierarchy, assuming sufficient memory. Although researchers developing speculative decoding algorithms and sampling methods have mentioned the potential viability of hierarchical speculative decoding, none have tried to implement it. Thus, this project aims to find an efficient implementation of the approach and determine if it actually further speeds up inference.&lt;/p&gt; &lt;h2 id=&quot;current-work&quot;&gt;Current Work&lt;/h2&gt; &lt;p&gt;Multiple papers have presented novel speculative decoding algorithms, with the nuance typically in the way that sampling is performed. The two most-referenced papers in this space are DeepMind’s Accelerating Large Language Model Decoding with Speculative Sampling (Chen et al.) &lt;a href=&quot;https://arxiv.org/pdf/2302.01318.pdf&quot;&gt;(paper)&lt;/a&gt; and Google Research’s Fast Inference from Transformers via Speculative Decoding (Leviathan et al.) &lt;a href=&quot;https://arxiv.org/pdf/2211.17192.pdf&quot;&gt;(paper)&lt;/a&gt;. This project draws its architecture from the latter, so we will more explore its approach in-depth and describe how its shortcomings motivated the experiments in this project.&lt;/p&gt; &lt;h3 id=&quot;general-setup&quot;&gt;General setup&lt;/h3&gt; &lt;p&gt;The approach presented in Fast Inference from Transformers via Speculative Decoding (Leviathan et al.) aims to accelerate inference from a target transformer-like model $M_p$. We present a distilled version of the speculative decoding set-up, algorithm, and evaluation here.&lt;/p&gt; &lt;p&gt;We start with two models:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;$M_p$ (the target model)&lt;/li&gt; &lt;li&gt;$M_q$ (a smaller approximation model)&lt;/li&gt; &lt;/ol&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;$p(x_{t}&lt;/td&gt; &lt;td&gt;x_{&amp;lt;t})$ describes the sampling of token $x_t$ given pretext $x_{&amp;lt;t}$, and we will refer to this as just $p(x)$. The shorthand applies for $q(x)$.&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Our goal is to generate $\gamma \in \mathbb{Z}^{+}$ completions quickly with the approximation model, check that the probability of those generations are identical to the target model’s (in parallel), and then reject and resample starting from the first “wrong” generation.&lt;/p&gt; &lt;h3 id=&quot;sampling-px&quot;&gt;Sampling $p(x)$&lt;/h3&gt; &lt;p&gt;In order to sample $p(x)$, we will sample $x \sim q(x)$ instead.&lt;/p&gt; &lt;ol&gt; &lt;li&gt;If $q(x)\leq p(x)$, we keep $x$&lt;/li&gt; &lt;li&gt;Otherwise, we reject $x$ with a $1-\frac{p(x)}{q(x)}$ probability. &lt;ul&gt; &lt;li&gt;If we end up rejecting $x$, we resample $x\sim\text{norm}(\max(0, p(x)-q(x)))$.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Basically, we want $x\sim p(x)$ to be &lt;em&gt;at least&lt;/em&gt; as likely as $x \sim q(x)$. Following the steps above is equivalent to just sampling $x \sim q(x)$, and the paper provides a comprehensive proof of this in its appendix.&lt;/p&gt; &lt;h3 id=&quot;the-algorithm&quot;&gt;The Algorithm&lt;/h3&gt; &lt;p&gt;We use an implementation of the following algorithm from Leviathan et al. We start with some conditioning $prefix$ (our starting tokens) and generate between $1$ and $\gamma+1$ tokens at once.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-16-speculative-decoding/Algorithm1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-16-speculative-decoding/Algorithm1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-16-speculative-decoding/Algorithm1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-16-speculative-decoding/Algorithm1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;evaluation&quot;&gt;Evaluation&lt;/h3&gt; &lt;p&gt;To evaluate the effectiveness of this approach, we need to calculate the total wall time improvement of speculative decoding versus normal inference on the target model.&lt;/p&gt; &lt;p&gt;To make this evaluation more simple, assume we can run $\gamma + 1$ concurrent evaluations of $M_p$ in parallel. Now, we just need to get the cost of running $M_q$ (the approximation model).&lt;/p&gt; &lt;p&gt;Let $c$ = the cost coefficient, which is the ratio between the time for a single run of $M_q$ and a single run of $M_p$. $c$ will depend only on our hardware and software implementation details.&lt;/p&gt; &lt;p&gt;Now, we need some measure of how well $M_q$ approximates $M_p$.&lt;/p&gt; &lt;p&gt;Let $\beta$ be the &lt;em&gt;acceptance rate&lt;/em&gt;.&lt;/p&gt; &lt;ul&gt; &lt;li&gt; &lt;table&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;$\beta_{x&amp;lt;t}$ is the probability of accepting $x_{t}\sim q(x_{t}&lt;/td&gt; &lt;td&gt;x_{&amp;lt;t})$ by speculative sampling.&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;/li&gt; &lt;li&gt;Assume that the $\beta$s are i.i.d.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Let $\alpha=E(\beta)$. This gives us the average acceptance rate across many samples, which is a good measure of how well $M_q$ approximates $M_p$.&lt;/p&gt; &lt;p&gt;The expectation of the number of generated tokens is now a bounded geometric function of $\alpha$ (bounded by $\gamma$) :\(E(\text{# of generated tokens}) = \frac{1-\alpha^{\gamma + 1}}{1-\alpha}\)Given this relationship, we can derive the expected improvement factor for the total wall time (assuming longer generations):\(\frac{1-\alpha^{\gamma+1}}{(1-\alpha)(\gamma c+1)}\) For the sake of conciseness, we leave the full proof to the paper, but the general sketch relies on the fact that each run of Algorithm 1 costs $Tc\gamma + T$ (where $T$ is the cost of running one step of $M_p$). We run $M_q$ $\gamma$ times and $M_p$ once, and each run of Algorithm 1 produces $\frac{1-\alpha^{\gamma + 1}}{1-\alpha}$ tokens. Since the cost of producing a single token with a standard algorithm is $T$, we get the above improvement.&lt;/p&gt; &lt;h2 id=&quot;hierarchical-speculative-decoding-1&quot;&gt;Hierarchical Speculative Decoding&lt;/h2&gt; &lt;p&gt;How much faster can we make model inference by accelerating the approximation model with an even smaller, faster model? Let’s look at the case where we have three models:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;strong&gt;$M_p$:&lt;/strong&gt; The target model&lt;/li&gt; &lt;li&gt;&lt;strong&gt;$M_q$:&lt;/strong&gt; The first-level approximation model, used to approximate $M_p$.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;$M_r$:&lt;/strong&gt; The second-level, even smaller approximation model, used to approximate $M_q$.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;With the introduction of $M_r$, we now need to consider additional parameters:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;$\gamma_r$:&lt;/strong&gt; The number of concurrent evaluations that can be run using $M_r$.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;$\beta_r$:&lt;/strong&gt; The acceptance rate for $M_r$, analogous to $\beta$ for $M_q$.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;$\alpha_r = E(\beta_r)$:&lt;/strong&gt; The average acceptance rate for $M_r$, representing how well $M_r$ approximates $M_q$.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Now, $\beta$ for $M_q$ becomes a function of $\beta_r$, reflecting the hierarchical nature of this setup. The acceptance rate $\beta$ for $M_q$ now depends on how effectively $M_r$ approximates $M_q$, which in turn approximates $M_p$.&lt;/p&gt; &lt;p&gt;We can hypothesize that the effectiveness of $M_q$ in approximating $M_p$ might now be influenced by the performance of $M_r$. This could mean that $\beta$, and consequently $\alpha$, might be a function of $\alpha_r$.&lt;/p&gt; &lt;p&gt;The expectation of the number of generated tokens would now need to consider the hierarchical relationship. A new formula would be required to calculate this expectation, taking into account the performances of both $M_q$ and $M_r$.&lt;/p&gt; &lt;p&gt;Finally, the expected improvement factor for the total wall time would also need to be recalculated to reflect this hierarchical structure. This would involve integrating the costs and efficiencies of $M_r$ into our existing model, which so far only considered $M_q$ and $M_p$.&lt;/p&gt; &lt;p&gt;Whether or not this approach will actually speed up the model in practice is left to be determined experimentally.&lt;/p&gt; &lt;h2 id=&quot;experiments&quot;&gt;Experiments&lt;/h2&gt; &lt;p&gt;I experimented on multiple transformer model families, most notably &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-125m&lt;/code&gt;, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-1.3b&lt;/code&gt;, and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-13b&lt;/code&gt;.&lt;/p&gt; &lt;p&gt;The primary research questions I investigated include:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;How many orders of magnitude larger should $M_p$ be than $M_q$ to achieve the maximal improvement?&lt;/li&gt; &lt;li&gt;To what extent does hierarchical speculative decoding further speed up inference?&lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;general-set-up-for-experiments&quot;&gt;General set-up for experiments&lt;/h3&gt; &lt;ul&gt; &lt;li&gt;For the standard (non-hierarchical) speculative decoding, I implemented the algorithm exactly as described above. &lt;ul&gt; &lt;li&gt;I used a gamma value of 4&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;I used both top-k sampling and nucleus sampling, with &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;k=20&lt;/code&gt; and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;p=0.9&lt;/code&gt; constant throughout all experiments.&lt;/li&gt; &lt;li&gt;I typically prompted the models with &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;input_text = &quot;Once upon a&quot;&lt;/code&gt; and generated 20 tokens.&lt;/li&gt; &lt;li&gt;I used consistent sets of seeds (such as &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;torch.manual_seed(898)&lt;/code&gt;) when running the same experiment across multiple model combinations for the sake of reproducibility and so that I could more easily compare results across models on shorter generation lengths.&lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;how-many-orders-of-magnitude-larger-should-m_p-be-than-m_q&quot;&gt;How many orders of magnitude larger should $M_p$ be than $M_q$?&lt;/h3&gt; &lt;ul&gt; &lt;li&gt;To investigate this, I calculated inference time (tokens per second) on each of the following (approximator, target) model pairs: &lt;ul&gt; &lt;li&gt;&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-125m&lt;/code&gt;, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-1.3b&lt;/code&gt;&lt;/li&gt; &lt;li&gt;&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-125m&lt;/code&gt;, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-13b&lt;/code&gt;&lt;/li&gt; &lt;li&gt;&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-1.3b&lt;/code&gt;, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-13b&lt;/code&gt;&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;set-up-for-hierarchical-speculative-decoding&quot;&gt;Set-up for hierarchical speculative decoding&lt;/h3&gt; &lt;p&gt;I experimented with a three-level hierarchical approach using&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Small approximation model $M_r$: &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-125m&lt;/code&gt;&lt;/li&gt; &lt;li&gt;Approximation model $M_q$: &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-1.3b&lt;/code&gt;&lt;/li&gt; &lt;li&gt;Target model $M_p$: &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;facebook/opt-13b&lt;/code&gt;&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;To add hierarchical decoding to the algorithm, I replaced the sampling of $M_q$, where we typically sample $x \sim q(x)$ with a sampling process that mirrors the sampling from the target model. So we sample from $x\sim r(x)$ instead, keep if it’s at least as likely in $q(x)$, and reject proportional to the likelihood of the sample under either model, adjusting the distribution as before if we need to sample again. This made the theoretical implementation rather simple, as we could re-use a lot of the code. The implementation in practice was slightly more difficult than expected, however, as my implementation of the two-layer speculative decoding didn’t permit direct functional composition, and I had to restructure the implementation a bit.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;calculating-c-for-each-model-pair&quot;&gt;Calculating $c$ for each model pair&lt;/h3&gt; &lt;p&gt;(The larger model is used as the target model $M_p$)&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;opt-125m&lt;/th&gt; &lt;th&gt;opt-1.3b&lt;/th&gt; &lt;th&gt;opt-13b&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;opt-125m&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;N/A&lt;/td&gt; &lt;td&gt;N/A&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-1.3b&lt;/td&gt; &lt;td&gt;0.015&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;N/A&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;0.022&lt;/td&gt; &lt;td&gt;0.015&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;This gives insight into the relative efficiencies of the models when performing assisted inference.&lt;/p&gt; &lt;h3 id=&quot;the-general-effect-of-speculative-decoding&quot;&gt;The general effect of speculative decoding&lt;/h3&gt; &lt;p&gt;Wall time improvements from speculative decoding have already been documented, so these results are not novel, but I include them here for further proof that the algorithm works and for comparison with other results.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Target Model&lt;/th&gt; &lt;th&gt;Approximation Model&lt;/th&gt; &lt;th&gt;Tokens/Second&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;None&lt;/td&gt; &lt;td&gt;0.047&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;opt-1.3b&lt;/td&gt; &lt;td&gt;0.087&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;opt-125m&lt;/td&gt; &lt;td&gt;0.057&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-1.3b&lt;/td&gt; &lt;td&gt;None&lt;/td&gt; &lt;td&gt;0.336&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-1.3b&lt;/td&gt; &lt;td&gt;opt-125m&lt;/td&gt; &lt;td&gt;1.05&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;In all cases, including an approximation model increases the model’s token per second inference rate.&lt;/p&gt; &lt;h3 id=&quot;acceptance-rates-and-wall-time-given-m_p-and-m_q&quot;&gt;Acceptance rates and wall time given $M_p$ and $M_q$&lt;/h3&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Target Model&lt;/th&gt; &lt;th&gt;Approximator Model&lt;/th&gt; &lt;th&gt;Tokens/Second&lt;/th&gt; &lt;th&gt;Acceptance Rate&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;opt-1.3b&lt;/td&gt; &lt;td&gt;opt-125m&lt;/td&gt; &lt;td&gt;1.05&lt;/td&gt; &lt;td&gt;38%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;opt-125m&lt;/td&gt; &lt;td&gt;0.057&lt;/td&gt; &lt;td&gt;15%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;opt-1.3b&lt;/td&gt; &lt;td&gt;0.087&lt;/td&gt; &lt;td&gt;19%&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;These results help us answer the question: &lt;em&gt;How many orders of magnitude larger should $M_p$ be than $M_q$?&lt;/em&gt;&lt;/p&gt; &lt;p&gt;One order of magnitude seems to yield higher acceptance rates, and the smaller models were obviously faster.&lt;/p&gt; &lt;h3 id=&quot;results-of-hierarchical-speculative-decoding&quot;&gt;Results of hierarchical speculative decoding&lt;/h3&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Target Model&lt;/th&gt; &lt;th&gt;Approximation Model&lt;/th&gt; &lt;th&gt;Tokens/Second&lt;/th&gt; &lt;th&gt;Acceptance Rate&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;None&lt;/td&gt; &lt;td&gt;0.047&lt;/td&gt; &lt;td&gt;N/A&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;opt-1.3b&lt;/td&gt; &lt;td&gt;0.087&lt;/td&gt; &lt;td&gt;19%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;opt-125m&lt;/td&gt; &lt;td&gt;0.057&lt;/td&gt; &lt;td&gt;15%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;opt-13b&lt;/td&gt; &lt;td&gt;opt-1.3b, opt-125m&lt;/td&gt; &lt;td&gt;0.030&lt;/td&gt; &lt;td&gt;17%, 33%&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;I found that running the three-layer hierarchical speculative decoding &lt;em&gt;did not&lt;/em&gt; speed up model inference, but I hypothesize that this is because of compute limitations. Running all three models on my computer given the parallelization requirements of the algorithm forced the program to map data to devices in a less-efficient way. I wasn’t able to find smaller pre-trained models with which I could test this on my local machine, so a future experiment should either train custom smaller models for the sake of inference in this setting or use a device with greater memory capacity.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;This project explored the potential of speculative decoding, a technique inspired by speculative execution in processors, to accelerate inference in autoregressive models like transformers. Our exploration focused on implementing and extending existing methods of speculative decoding, particularly the ones proposed in the seminal works by Chen et al. and Leviathan et al., while also introducing early experiments with concept of hierarchical speculative decoding, which is to be further investigated.&lt;/p&gt; </content> </entry> <entry> <title>Unraveling Social Reasoning in LLMs: A Deep Dive into the Social IQA Benchmark</title> <link href="https://deep-learning-mit.github.io/blog/2023/unraveling-social-reasoning-in-llms/"/> <updated>2023-11-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/unraveling-social-reasoning-in-llms</id> <content type="html">&lt;h1 id=&quot;unraveling-social-reasoning-in-llms-a-decision-tree-framework-for-error-categorization&quot;&gt;Unraveling Social Reasoning in LLMs: A Decision Tree Framework for Error Categorization&lt;/h1&gt; &lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Social commonsense reasoning is a skill most people acquire within the first few years of life, often without formal education. Consider this example of a social commonsense reasoning question:&lt;/p&gt; &lt;p&gt;Q: Kai was frantically running to a gate at the airport. Why was Kai running?&lt;/p&gt; &lt;p&gt;A) They were trying to catch a flight that departs soon&lt;/p&gt; &lt;p&gt;B) They were training for a marathon&lt;/p&gt; &lt;p&gt;C) They were testing out their new running shoe&lt;/p&gt; &lt;p&gt;Most would likely infer that Kai was rushing to catch a flight that would depart soon and choose A, the correct answer. Social commonsense reasoning, at its core, entails reasoning about the past, current, and future states of others.&lt;/p&gt; &lt;p&gt;Despite advancements in Large Language Models (LLMs), prompting models to achieve near-human levels of performance in different tasks across various domains, they have traditionally struggled with social commonsense reasoning tasks, often underperforming humans. Though, this isn’t surprising to most observers &lt;d-cite key=&quot;sap_neural_2023&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;To better understand why, previous studies have created benchmarks for social commonsense reasoning &lt;d-cite key=&quot;huang_towards_2023&quot;&gt;&lt;/d-cite&gt;; benchmarked how different models perform on various social commonsense reasoning benchmarks; and/or provided classifications for different question types testing social commonsense reasoning &lt;d-cite key=&quot;huang_towards_2023&quot;&gt;&lt;/d-cite&gt;. Certain question categories, like those directly related to social norms, are documented to exhibit higher error rates, suggesting that some aspects of social reasoning are more challenging for models to learn &lt;d-cite key=&quot;li_systematic_2022&quot;&gt;&lt;/d-cite&gt;. However, these works used older models that were not specifically designed for reasoning tasks.&lt;/p&gt; &lt;p&gt;Specifically, our blog investigates the question, &lt;strong&gt;What are underlying themes in social errors that large language models make?&lt;/strong&gt; From both a qualitative and quantitative perspective. The goal of our findings is to help discover if there are methods that could potentially address these errors.&lt;/p&gt; &lt;p&gt;To answer this question, we ran Flan-T5 on the Social IQA benchmark, which was introduced in 2019 and features 38,000 multiple-choice questions (MCQs) designed to gauge “emotional and social intelligence in everyday situations” &lt;d-cite key=&quot;sap_socialiqa_2019&quot;&gt;&lt;/d-cite&gt;. After qualitatively labeling 350 model errors, we made a striking discovery: over half of the questions Flan-T5 got wrong were due to problematic questions in the Social IQA dataset.&lt;/p&gt; &lt;p&gt;Upon making this curious realization, we pivoted our project from designing a decision tree abstraction for providing detailed categorization of social commonsense questions to analyzing and addressing the two types of errors:&lt;/p&gt; &lt;p&gt;Type 1: Errors stemming from the flawed construction of the Social IQA dataset&lt;/p&gt; &lt;p&gt;Type 2: Errors where Flan-T5’s choices don’t align with social commonsense.&lt;/p&gt; &lt;p&gt;In the first error group, even reasonable humans, including this blog post’s authors, disagreed with Social IQA’s “correct” answers. Questions in this first group have nonsensical contexts/questions, lack a single reasonable answer, or have many reasonable answers.&lt;/p&gt; &lt;p&gt;When examining questions in the second error group, we noticed that Flan-T5 often over-infers underlying reasons when a more straightforward answer exists. To address this group of errors, we visualized T5’s attention mechanisms when processing such questions.&lt;/p&gt; &lt;h2 id=&quot;background-and-related-works&quot;&gt;&lt;strong&gt;Background and Related Works&lt;/strong&gt;&lt;/h2&gt; &lt;p&gt;&lt;strong&gt;LLMs and Reasoning&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;Language models like GPT-4 have captured widespread media attention, given their question-answering capabilities.&lt;/p&gt; &lt;p&gt;Throughout the development and testing of LLMs, various tasks have been developed to empirically assess these models’ abilities and limitations. In literature, these tasks are typically categorized into two main groups: natural language understanding (NLU) and natural language generation (NLG). NLU tasks evaluate a language model’s ability to understand natural language. This includes tasks like Natural Language Inference, Reading Comprehension, and various reasoning tasks, including social commonsense reasoning &lt;d-cite key=&quot;naveed_comprehensive_2023&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;comprehensive-overview-of-social-commonsense-reasoning-benchmarks&quot;&gt;&lt;strong&gt;Comprehensive Overview of Social Commonsense Reasoning Benchmarks&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Over 100 large-scale benchmarks have been proposed to assess and compare models’ social commonsense reasoning abilities and to serve as resources for transfer learning &lt;d-cite key=&quot;davis_benchmarks_2023&quot;&gt;&lt;/d-cite&gt;. In general, these benchmarks aim to evaluate models’ abilities to infer the mental states, beliefs, and intentions of others. Their development was inspired by the Theory of Mind (ToM), a concept originating from childhood development psychology—some benchmark questions are, in fact, explicitly based on ToM tasks used to assess children’s social reasoning &lt;d-cite key=&quot;nematzadeh_evaluating_2018&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Notable benchmarks frequently mentioned in the literature include multiple-choice Question Answering (QA) benchmarks like the 2019 Social IQA &lt;d-cite key=&quot;sap_socialiqa_2019&quot;&gt;&lt;/d-cite&gt; and 2019 HellaSwag &lt;d-cite key=&quot;zellers_hellaswag_2019&quot;&gt;&lt;/d-cite&gt;, along with generative evaluation benchmarks like 2020 ProtoQA &lt;d-cite key=&quot;boratko_protoqa_2020&quot;&gt;&lt;/d-cite&gt;. Many of these benchmarks are based on structured knowledge banks about everyday commonsense reasoning like ATOMIC &lt;d-cite key=&quot;sap_atomic_2019&quot;&gt;&lt;/d-cite&gt; and COMET &lt;d-cite key=&quot;bosselut_comet_2019&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Similar to that of other studies evaluating LLMs’ commonsense knowledge, we use an MCQ benchmark and not a generative one because they are more simple and reliable for evaluation &lt;d-cite key=&quot;li_systematic_2022&quot;&gt;&lt;/d-cite&gt;; under tight time and resource constraints, we err on the side of a simple and reliable evaluation method, though, in future works, we would like to generalize our findings to more datasets.&lt;/p&gt; &lt;p&gt;However, despite their widespread use, benchmarking datasets like Social IQA are not without flaws. Previous studies have shown that many aspects of common sense are still untested by these benchmarks, indicating an ongoing need for reliable methods to evaluate social commonsense reasoning &lt;d-cite key=&quot;davis_commonsense_2015&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;h3 id=&quot;problems-with-social-iqa&quot;&gt;&lt;strong&gt;Problems With Social IQA&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Social IQA focuses on evaluating models’ abilities to reason about others’ mental states, aligning with Theory of Mind concepts &lt;d-cite key=&quot;gandhi_understanding_2023&quot;&gt;&lt;/d-cite&gt;. Each question in Social IQA comprises a context, a question, and three answer choices; for each of the 37,588 multiple-choice questions, the context, question, correct answer choice, and two incorrect answer choices were gathered through three phases of crowdsourcing on Amazon Mechanical Turk (MTurk) &lt;d-cite key=&quot;sap_socialiqa_2019&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;In the first two phase, MTurk crowdsource workers sourced context sentences and questions using the ATOMIC knowledge base &lt;d-cite key=&quot;sap_atomic_2019&quot;&gt;&lt;/d-cite&gt;. In the third phase, MTurk workers generated correct answers for the given context-question pairs. Incorrect choices for each question were derived from correct answers to related questions &lt;d-cite key=&quot;sap_socialiqa_2019&quot;&gt;&lt;/d-cite&gt;. In our Discussion section, we will discuss issues with this methodology for sourcing negative answers.&lt;/p&gt; &lt;p&gt;Many critiques have been raised about the reliance on crowdsourcing for benchmarks, specifically, about the challenges in obtaining high-quality material &lt;d-cite key=&quot;davis_commonsense_2015&quot;&gt;&lt;/d-cite&gt;. Given the low pay on MTurk, workers often prioritize quantity over quality, leading to errors. There have even been reported instances of crowdsourcing workers using bots to help them complete more tasks.&lt;/p&gt; &lt;h3 id=&quot;prior-error-analysis-work-using-social-iqa-dataset&quot;&gt;&lt;strong&gt;Prior Error Analysis Work Using Social IQA Dataset&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;The authors of Social IQA conducted a preliminary error analysis of their dataset, finding that language models found questions about context pre-conditions, such as motivations and prior actions, to be much easier than those about stative attributes or predicting future actions. Interpreting these results, the authors hypothesized that models might be learning lexical associations rather than true meaning &lt;d-cite key=&quot;sap_socialiqa_2019&quot;&gt;&lt;/d-cite&gt;. This aligns with findings presented in other works &lt;d-cite key=&quot;zellers_hellaswag_2019&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Other research, such as Wang et al.’s &lt;d-cite key=&quot;wang_semantic_2021&quot;&gt;&lt;/d-cite&gt;, categorize Social IQA questions into four types: 1) Feelings and Characteristics, 2) Interaction, 3) Daily Events, and 4) Knowledge, Norm, and Rules. They found the final category to be the most challenging for models.&lt;/p&gt; &lt;h3 id=&quot;general-methodology-for-conducting-systematic-error-analysis-for-qa&quot;&gt;&lt;strong&gt;General Methodology for Conducting Systematic Error Analysis for QA&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Our research, aimed at identifying themes in social errors made by LLMs, draws inspiration from conventional methodologies for system error analysis in QA tasks. Moldovan et al.’s data-driven approach to QA error analysis, focusing on answer accuracy based on question stems, reveals that certain question types are more challenging for LLMs &lt;d-cite key=&quot;moldovan_performance_2003&quot;&gt;&lt;/d-cite&gt;. Rondeau et al. used feature extraction for systematic error analysis in the Stanford Question Answering Dataset (SQuAD), employing feature extractors and classifiers &lt;d-cite key=&quot;rondeau_systematic_2018&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;existing-approaches-to-improve-social-commonsense-reasoning&quot;&gt;&lt;strong&gt;Existing Approaches to Improve Social Commonsense Reasoning&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Our research also explores existing literature offering solutions for mitigating errors in social commonsense reasoning. Some of these works suggest incorporating external structured data, such as knowledge graphs, into models. For example, Chang et al. showed that integrating knowledge graphs like ConceptNet improves performance on Social IQA &lt;d-cite key=&quot;chang_incorporating_2020&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;However, despite confirming the effectiveness of this approach, studies like Mitra et al. also noted instances where models, even with access to relevant information that can directly lead to the correct answer, predicted incorrect answers based on irrelevant knowledge &lt;d-cite key=&quot;mitra_how_2020&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h2 id=&quot;methodology&quot;&gt;&lt;strong&gt;Methodology&lt;/strong&gt;&lt;/h2&gt; &lt;h3 id=&quot;step-1-applying-flan-t5-to-social-iqa&quot;&gt;&lt;strong&gt;Step 1: Applying Flan-T5 to Social IQA&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;We first prompted Flan-T5, known for its promising reasoning task performance &lt;d-cite key=&quot;chung_scaling_2022&quot;&gt;&lt;/d-cite&gt;, to answer 33,411 MCQs from the Social IQA dataset. The prompt format, emulating the examples &lt;d-cite key=&quot;bosma_introducing_2021&quot;&gt;&lt;/d-cite&gt; was as follows:&lt;/p&gt; &lt;p&gt;[Context].&lt;/p&gt; &lt;p&gt;Based on the context above, choose the best answer to the question:&lt;/p&gt; &lt;p&gt;[Question]&lt;/p&gt; &lt;p&gt;OPTIONS:&lt;/p&gt; &lt;p&gt;(A) [Answer A]&lt;/p&gt; &lt;p&gt;(B) [Answer B]&lt;/p&gt; &lt;p&gt;(C) [Answer C]&lt;/p&gt; &lt;p&gt;For your answer, return exactly one character, either A, B, or C.&lt;/p&gt; &lt;h3 id=&quot;step-2-qualitative-coding-of-350-errors&quot;&gt;&lt;strong&gt;Step 2: Qualitative Coding of 350 Errors&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Next, we used the following procedure, based on standard iterative qualitative coding methods, to categorize instances where Flan-T5’s response differed from the Social IQA dataset’s correct answer.&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Initial Annotation: initially, for a subset of 100 rows, two independent coders annotated each row, noting the reasons for the discrepancy in the correct answer choice between the dataset and Flan-T5.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Theme Identification: the coders reviewed each other’s annotations and engaged in discussions to identify major themes in inconsistencies. Based on these discussions, they developed a formal set of tags to apply to the rows.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Tagging: finally, they applied these tags to a total of 350 rows&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;step-3-quantitative-error-analysis&quot;&gt;&lt;strong&gt;Step 3: Quantitative Error Analysis&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;We then analyzed the data to determine the frequency of each error type within our tagged dataset (n=350). We explored potential features, such as specific words, that contributed to the difficulty of the questions.&lt;/p&gt; &lt;h3 id=&quot;step-4-addressing-type-1-errors---developing-a-pruning-tool&quot;&gt;&lt;strong&gt;Step 4: Addressing Type 1 Errors - Developing a Pruning Tool&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Our objective here was to develop a tool that could use our tagged question set to accurately identify problematic questions. Unfortunately, this approach did not yield the desired results and needs future work.&lt;/p&gt; &lt;h3 id=&quot;step-5-addressing-type-2-errors---analyzing-through-attention-mechanism-visualization&quot;&gt;&lt;strong&gt;Step 5: Addressing Type 2 Errors - Analyzing through Attention Mechanism Visualization&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Finally, we shifted our focus to examining errors by visualizing the attention mechanisms of the model. This approach aimed to provide deeper insights into how the model processes and responds to various types of questions, particularly those categorized as Type 2 errors.&lt;/p&gt; &lt;h2 id=&quot;analysis-and-evaluations&quot;&gt;&lt;strong&gt;Analysis and Evaluations&lt;/strong&gt;&lt;/h2&gt; &lt;h3 id=&quot;general-accuracy-of-flan-t5-on-social-iqa&quot;&gt;&lt;strong&gt;General Accuracy of Flan-T5 on Social IQA&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Overall, Flan-T5 exhibits a high accuracy of 90% when presented with MCQs from Social IQA, which could be because it was fine-tuned “on a large set of varied instructions,” similar to the questions we present it &lt;d-cite key=&quot;bosma_introducing_2021&quot;&gt;&lt;/d-cite&gt;. This accuracy is much higher than BERT, which had a 64.5% accuracy &lt;d-cite key=&quot;sap_socialiqa_2019&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;set-of-formal-tags-derived-from-qualitative-coding&quot;&gt;&lt;strong&gt;Set of Formal Tags Derived from Qualitative Coding&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;In the initial annotation phase of qualitative coding, both coders were surprised to find many questions marked “incorrect” because of issues inherent in the Social IQA questions themselves (see below for an example). Therefore, we wanted to characterize why the Social IQA multiple choice questions were problematic: was it a lack of context comprehension, the unreasonableness of all answer options, or the presence of multiple equally reasonable answers?&lt;/p&gt; &lt;p&gt;During the theme identification phase, the coders established two groups of tags:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Errors arising from the flawed construction of the Social IQA dataset&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Errors due to Flan-T5’s responses not aligning with social commonsense&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;type-1-errors&quot;&gt;&lt;strong&gt;Type 1 Errors&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;For Type 1 errors, six labels were created:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Nonsensical Context: When the context sentence is incomprehensible to a reasonable human.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Nonsensical or Ambiguous Question: When the question is either nonsensical or too ambiguous.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Major Typo: Refers to incomprehensible parts of the Context, Question, or answer choices due to typos.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Multiple Reasonable Answers: When several answers appear equally reasonable, either due to similar meanings or general reasonableness.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;No Reasonable Answer: When no answer options seem appropriate or reasonable.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Incorrectly Labeled “Correct” Answer: When an alternative answer seems more reasonable than the one marked “correct.”&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;&lt;strong&gt;Examples of Type 1 Errors&lt;/strong&gt;&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Examples of Questions and Answers &lt;/div&gt; &lt;h3 id=&quot;type-2-errors&quot;&gt;&lt;strong&gt;Type 2 Errors&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;For Type 2 errors: we devise the following set of three labels:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Overinfers: This tag is for questions where Flan-T5 seems to make a large leap in logic, resulting in it picking an answer choice that makes spurious assumptions when a much more direct and clear answer is available&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Associated but Incorrect: This is for questions where Flan-T5 picks an answer choice that is associated with the context and question, but is not what the question is specifically asking about. This differs from over-inferring in that this usually entails picking irrelevant answer choices.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Flan-T5 Incorrect (unspecified): all other mistakes attributable to Flan-T5.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;distribution-of-tags&quot;&gt;&lt;strong&gt;Distribution of Tags&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Looking at our annotated questions, we see that 65% of errors stemmed from the flawed construction of the Social IQA dataset. Meanwhile, 38% of errors were errors stemming from Social IQA not picking the right answer. Observe that it is possible for a question to be tagged with both a Type 1 tag and a Type 2 tag.&lt;/p&gt; &lt;p&gt;For Type 1 errors, we see that having multiple reasonable answers is by far the most common reason why a question is problematic. This was followed by having no reasonable answer, and the answer labeled “correct” not being the best available answer. Indeed, the top three reasons why a question is considered problematic all stem from questionable answer choices. This highlights how the construction of the answer choices, and thus Social IQA as a benchmark set, is problematic.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Histogram of the Types of Errors in SocialIQA &lt;/div&gt; &lt;p&gt;Next, we examine the distribution of Type 2 error tags. We see that the most common reason is Flan-T5 over-inferring.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Histogram of the Types of Errors FlanT5 Makes &lt;/div&gt; &lt;h3 id=&quot;analysis-of-question-types&quot;&gt;&lt;strong&gt;Analysis of Question Types&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;In our quantitative analysis, we identified key features contributing to lower accuracy in certain questions. Notably, questions containing the word ‘others’ scored lower in accuracy, with an average of 0.880, compared to the general accuracy score of 0.990. Furthermore, questions featuring repeated answer choices also exhibited a lower accuracy score of 0.818.&lt;/p&gt; &lt;h3 id=&quot;attempt-to-prune-social-iqa&quot;&gt;&lt;strong&gt;Attempt to Prune Social IQA&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Assessing models on social commonsense reasoning questions requires clear comprehension and consensus on the appropriateness of the questions and their answer choices. Our goal was to create a tool to classify the sensibility of these questions and answers. To achieve this, we experimented with various models, including Flan-T5 and GPT-4, asking them to evaluate the coherence of the questions. Unfortunately, the results were inconsistent, often varying with each regeneration of the response. Despite these challenges, we maintain that addressing this issue remains crucial.&lt;/p&gt; &lt;h3 id=&quot;visualization-of-attention-mechanism&quot;&gt;&lt;strong&gt;Visualization of Attention Mechanism&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;In our analysis of Type 2 errors, we focused on how the errors happen because Flan-T5 overinfers the underlying reasons not explicitly stated in the question instead of picking the more straightforward and correct answer, or picks some answer associated with the words in the context that isn’t directly related to the question.&lt;/p&gt; &lt;p&gt;In addition to providing qualitative analysis, we set out to provide some quantitative analysis to better understand why this was happening. Consider these linked notebooks, which visualize the cross attention and the encoder attention for &lt;a href=&quot;https://colab.research.google.com/drive/1M4XhFORT5KJgFaaVnZM3g5QS8JPV7Apq#scrollTo=JHkd_dwR2AiZ&quot;&gt;one correctly labeled example&lt;/a&gt; and &lt;a href=&quot;https://colab.research.google.com/drive/1VtU2kvJA5EKL4AeuaW-N0RwiBimb96MB#scrollTo=JHkd_dwR2AiZ&quot;&gt;one incorrectly labeled example&lt;/a&gt;, where Flan-T5 chooses an associated but incorrect answer. (Note that the specific images were chosen for brightness in the heatmaps, since the attention was normalized. Please reference the notebook.).&lt;/p&gt; &lt;p&gt;To visualize cross-attention, we looked at the cross-attention between the answer Flan-T5 generates and the encodings, across each layer and attention head in Flan-T5, grouping in both orders. To visualize the encoder attention, we looked at the average attention for each layer in the input encoding, and for the layer that saw the most drastic change (layer 2, starting from 0 index), we visualized the attention for each attention head.&lt;/p&gt; &lt;p&gt;Now, consider the context and question:&lt;/p&gt; &lt;p&gt;Cameron had a big paper due later in the week, so Cameron put pen to paper. What will Cameron want to do next?&lt;/p&gt; &lt;p&gt;A) research his topic&lt;/p&gt; &lt;p&gt;B) write an outline&lt;/p&gt; &lt;p&gt;C) redo his topic&lt;/p&gt; &lt;p&gt;Flan-T5 answers A), while the correct answer is “write an outline.” Notably, Flan-T5 doesn’t choose the third answer, “redo his topic.”&lt;/p&gt; &lt;p&gt;Therefore, we can see that Flan-T5’s is associated with the topic, but isn’t the correct answer, which is given by the phrase “put pen to paper.” Visualizing the average encoder attention and cross attention, we see that the contextualized embeddings and generation primarily focus on the words “big paper” and the question, but don’t pay much attention to the word “pen.”&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Cross Attention for Associated But Incorrect Answer &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Encoder Attention for Associated But Incorrect Answer &lt;/div&gt; &lt;p&gt;Generalizing our results a bit, we find that FLAN only pays reasonable attention (normalized attention &amp;gt; 0.05) to the keywords for 14 out of 26 examples tagged under “associated,” even for simpler questions such as&lt;/p&gt; &lt;p&gt;On the other hand, consider the question,&lt;/p&gt; &lt;p&gt;Sydney played basketball with her friends after school on a sunny afternoon. What does Sydney need to do before this?&lt;/p&gt; &lt;p&gt;A) take a nap before this&lt;/p&gt; &lt;p&gt;B) have a basketball before this&lt;/p&gt; &lt;p&gt;C) go home before this&lt;/p&gt; &lt;p&gt;Flan-T5 correctly answers “have a basketball before this,” not choosing “take a nap before this” or “go home before this.”&lt;/p&gt; &lt;p&gt;Indeed, we see the four vertical lines in the encoder and cross attentions that correspond to key phrases in the sentence. For the questions that Flan-T5 gets correct, it pays attention to the right keywords 9 out of 10 times. Lastly, note that for questions labeled “overinfer,” Flan-T5 pays attention to the right keywords 8 out of 10 times.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Cross Attention for Correct Answer &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Encoder Attention for Correct Answer &lt;/div&gt; &lt;p&gt;Therefore, for more straightforward questions, namely, questions that have one straightforward answer, Flan-T5 can find the right keywords that lead it to the answer (i.e. the correct questions). On the other hand, for more challenging questions that require paying attention to specific keywords and reasoning from the perspective of a character (recall the Sally-Anne Test), Flan-T5 struggles more, with more variance between what it pays attention to and doesn’t (e.g. paper but not pen).&lt;/p&gt; &lt;p&gt;In addition, since Flan-T5 pays attention to the right keywords most of the time for the questions it overinfers on, this suggests that there’s some aspect of reasoning that’s not being captured via our attention visualizations, and that this reasoning isn’t performing that well.&lt;/p&gt; &lt;p&gt;Notably, something interesting to note is that for all of the examples, by the third encoder layer, on average, Flan-T5 doesn’t change its encodings, and for the cross attention, the attention remains consistent across all layers and (most) attention heads. Therefore, it seems like most of the “reasoning” is being performed in the encoding stage.&lt;/p&gt; &lt;p&gt;Therefore, some of our next steps are understanding how removing attention heads in a smaller affects the model’s ability to reason, given the large number of heads and layers (24 x 32) in Flan-T5-xxl . We visualized each encoder head for one layer, but this doesn’t immediately lend itself to an intuitive interpretation.&lt;/p&gt; &lt;h2 id=&quot;discussion&quot;&gt;&lt;strong&gt;Discussion&lt;/strong&gt;&lt;/h2&gt; &lt;p&gt;Our work concentrated on analyzing two categories of errors and proposing solutions to address them. The two error types are:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Errors originating from the flawed construction of the Social IQA dataset.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Errors where Flan-T5’s responses do not align with social commonsense.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;problems-with-social-iqa-1&quot;&gt;&lt;strong&gt;Problems with Social IQA&lt;/strong&gt;&lt;/h3&gt; &lt;p&gt;Our analysis of Type 1 errors in the Social IQA dataset revealed significant issues. In examining n=350 incorrectly answered questions, we found that 65% had problems with their context, question, or answer choices. Additionally, 54.4% of these errors had multiple reasonable answers, 23.7% lacked any reasonable answer, and 14.0% seemed to have mislabeled correct answers. This indicates a substantial number of misleading answer choices in the Social IQA questions.&lt;/p&gt; &lt;p&gt;This issue partly stems from the dataset’s construction, which involved assigning crowdsourced workers tasks of writing positive answers for each question and sourcing negative answers from “different but related” questions. This approach likely contributed to the high error rate.&lt;/p&gt; &lt;p&gt;Since Social IQA is so frequently used in evaluating model performances and transfer learning tasks, the challenge is to identify and remove these flawed questions. Although our attempt to do this was unsuccessful due to time and budget constraints, we believe it is feasible. Many evaluations of large language models (LLMs) use crowdsourced multiple-choice questions, so a pruning tool to ensure benchmark reliability would be highly beneficial beyond the task of social commonsense reasoning.&lt;/p&gt; &lt;p&gt;Pruning the Social IQA dataset to eliminate most erroneous questions would also provide an opportunity to reassess older models.&lt;/p&gt; &lt;p&gt;Overall, our analysis of Type 1 errors underscores the need for caution in crowdsourcing benchmark questions. While crowdsourcing likely still remains the best solution for creating large benchmark sets, a pruning tool is essential to maintain the reliability of such datasets.&lt;/p&gt; &lt;p&gt;On the other hand, our analysis of Type 2 errors suggests that LLMs still might not match the social reasoning skills of humans for more complex scenarios. For simpler questions, they can often find a single keyword that informs their answer, while for more complex questions, they often miss important phrases and can’t necessarily think from another person’s perspective. For instance, recall how questions containing the keyword “other” result in Flan-T5 having considerably lower accuracy.&lt;/p&gt; &lt;h2 id=&quot;main-limitations&quot;&gt;&lt;strong&gt;Main Limitations&lt;/strong&gt;&lt;/h2&gt; &lt;p&gt;The primary limitations of our study are rooted in its scope and methodology. Firstly, we focused exclusively on a single model, Flan-T5, which may limit the generalizability of our findings. Additionally, our analysis was based on a relatively small sample size of n=350, and it involved only two coders. For a more robust and comprehensive evaluation, increasing the number of coders would be beneficial, particularly to assess intercoder reliability. Furthermore, implementing measures to mitigate recognition bias during the tagging process would enhance the validity of our results.&lt;/p&gt; </content> </entry> <entry> <title>Comparing data augmentation using VAEs and denoising-VAEs for limited noisy datasets</title> <link href="https://deep-learning-mit.github.io/blog/2023/denoisingVAE/"/> <updated>2023-11-11T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/denoisingVAE</id> <content type="html">&lt;h2 id=&quot;background&quot;&gt;Background&lt;/h2&gt; &lt;p&gt;One of the significant challenges in this evolving landscape of machine learning is the prevalance of limited and noisy datasets. Traditional models and downstream tasks such as classification often struggle with such datasets, leading to suboptimal performance and a lack of generalizability.&lt;/p&gt; &lt;p&gt;Could this be tackled using auto-encoders, specifically, Denoising Autoencoders (DAE) and Variational Autoencoders (VAE)? Denoising autoencoders (DAE) are trained to reconstruct their clean inputs with noise injected at the input level, while variational autoencoders (VAE) are trained with noise injected in their stochastic hidden layer, with a regularizer (KL divergence) that encourages this noise injection. But what if we could combine these strengths?&lt;/p&gt; &lt;h2 id=&quot;motivation&quot;&gt;Motivation&lt;/h2&gt; &lt;p&gt;Denoising autoencoders (DAE)&lt;d-cite key=&quot;vincent2008extracting&quot;&gt;&lt;/d-cite&gt;, are trained to reconstruct their clean inputs with noise injected at the input level, while variational autoencoders (VAE)&lt;d-cite key=&quot;kingma2014autoencoding&quot;&gt;&lt;/d-cite&gt; are trained with noise injected in their stochastic hidden layer, with a regularizer (KL divergence) that encourages this noise injection. Denoising Variational Autoencoders (DVAEs) are an extension of the traditional variational autoencoder (VAE). The motivation for delving into the realm of DVAEs stems from a critical need - the ability to effectively interpret and utilize limited, noisy data. They merge the robustness of DAEs in handling noisy inputs with the generative prowess of VAEs. As highlighted in the research paper “Denoising Criterion for Variational Auto-Encoding Framework”&lt;d-cite key=&quot;denoisingVAE&quot;&gt;&lt;/d-cite&gt;, integrating a denoising criterion into the VAE framework refines the robustness of learned representations, thereby enhancing the model’s generalization ability over various tasks.&lt;/p&gt; &lt;p&gt;VAEs, known for their generative capabilities, introduce noise at the hidden layer level, potentially offering a means to augment limited datasets&lt;d-cite key=&quot;saldanha2022data&quot;&gt;&lt;/d-cite&gt;. On the other hand, DVAEs, an innovative extension of VAEs, introduce perturbation to input data, promising a more robust feature extraction and create additional, realistic augmentations of the data. Our aim here is to comprehensively analyze and contrast the efficacy of VAEs and DVAEs in augmenting such datasets. We hypothesize that while VAEs can offer some level of data enhancement, DVAEs, with their inherent denoising capability, might prove superior in extracting more reliable and robust features from noisy datasets.&lt;/p&gt; &lt;h2 id=&quot;research-problem-statement&quot;&gt;Research Problem Statement&lt;/h2&gt; &lt;p&gt;The first aspect of this research is to explore the dual functionality of DVAEs — their ability to denoise input data while concurrently learning a generative model of the data distribution. The next aspect is to to compare the performance of DVAEs against traditional VAEs in i) learning robust latent representations, and ii) in downstream classification tasks with richer varied datasets by utilising data augmentation aspect of these generative models.&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Learning Robust representation and Generating Synthetic data using DVAEs:&lt;/strong&gt; Can DVAEs dual capability of denoising input data and learning a generative model of the data distribution simultaneously be exploited to effectively learn robust representations from limited and noisy datasets and utilized to generate additional synthetic data (augmented dataset)? How does it compare to using traditional VAEs?&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Performance Enhancement for downstream tasks:&lt;/strong&gt; How does the DVAE-generated synthetic data impact the performance metrics of downstream classification tasks? Compare performance metrics with traditonal VAE for different noise levels in test datasets.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;h3 id=&quot;dataset-selection-and-preparation&quot;&gt;Dataset Selection and Preparation&lt;/h3&gt; &lt;p&gt;The Fashion-MNIST dataset, which includes 60,000 training images, is selected for the experiments mentioned above. To simulate a limited data environment, a subset of 5,000 images is randomly selected from the dataset. We also create a noisy version of the training dataset to understand the efficacy in scenarios when clean input data is not available.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 1. Sample Fashion-MNIST images&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 2. Artificially Corrupted(Noised) Fashion-MNIST images&lt;/p&gt; &lt;h3 id=&quot;vae-and-dvae---architecture-and-hyperparameters&quot;&gt;VAE and DVAE - Architecture and Hyperparameters&lt;/h3&gt; &lt;p&gt;The VAE and DVAE architecture is similar and differ only in the sense that DVAE adds noise to input images before passing it to encoder.&lt;/p&gt; &lt;p&gt;The encoder comprises two hidden layers, each with 128 neurons. The input size is flattened to 28 * 28 dimensions. Each hidden layer in the encoder is followed by a ReLU activation function. The encoder’s output is connected to two separate layers: one for generating the mean (µ) and the other for the logarithm of the variance (log-variance), both projecting to a 4-dimensional latent space (z_dims).&lt;/p&gt; &lt;p&gt;On the decoding side, the architecture starts with the latent space and expands through a similar structure of two hidden layers, each with 128 neurons and ReLU activation functions. The final output layer reconstructs the original input size of 28 * 28 dimensions and applies a Sigmoid activation function.&lt;/p&gt; &lt;p&gt;This VAE/DVAE employs a reconstruction loss using the binary cross-entropy between the input and its reconstruction, and a regularization term(KL-Divergence) derived from the latent space to enforce a probabilistic distribution. Each model is trained for 60 epochs with batch size 128.&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt; &lt;span class=&quot;n&quot;&gt;input_size&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;28&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;28&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;z_dims&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;4&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;encoder&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Sequential&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;input_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mu&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;z_dims&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;logvar&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;z_dims&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;decoder&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Sequential&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;z_dims&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_hidden&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;input_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Sigmoid&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;h3 id=&quot;data-augmentation&quot;&gt;Data Augmentation&lt;/h3&gt; &lt;p&gt;For augmenting the dataset, we generate 2 newer samples or each input image. First, the image is passed through the encoder part of VAE/DVAE and then sample a latent representation vector around the obtained latent representaion - mean and std.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 3. Example: VAE Data Augmentation&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 4. Example: DVAE Data Augmentation&lt;/p&gt; &lt;h3 id=&quot;classification-networkcnn-architecture&quot;&gt;Classification Network(CNN) Architecture&lt;/h3&gt; &lt;p&gt;The Classification Network(CNN) architecture is comprised of a series of convolutional, activation, pooling, and fully connected layers. Initially, it features a convolutional layer with 1 input channel and 32 output channels, using 3x3 kernels, stride of 1, and padding of 1 with ‘reflect’ mode, followed by an ReLU activation function. This is succeeded by another convolutional layer that increases the depth to 64 filters, maintaining the same kernel size, stride, and padding, accompanied by the same activation function. Subsequently, a max pooling layer with a 2x2 kernel reduces the spatial dimensions of the feature maps, highlighting significant features. The data is then flattened, resulting in a feature vector with a length of 64 * 14 * 14, which feeds into a series of three linear layers, each with 128 units, interspersed with the activation function. This sequence of fully connected layers is designed to capture complex relationships in the data. Finally, the architecture has an output linear layer that maps to the number of outputs (num_outputs=10).&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt; &lt;span class=&quot;n&quot;&gt;image_dim&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;28&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_outputs&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;10&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;act_cls&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ReLU&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;net&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Conv2d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;in_channels&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out_channels&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;32&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;kernel_size&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;stride&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;padding&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;padding_mode&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;reflect&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;act_cls&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;extend&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;([&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Conv2d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;in_channels&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;32&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out_channels&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;64&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;kernel_size&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;stride&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;padding&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;padding_mode&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;reflect&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;act_cls&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;MaxPool2d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;kernel_size&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;])&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;input_len&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;int&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;64&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;image_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;image_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;extend&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;([&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Flatten&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;input_len&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;act_cls&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;act_cls&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;act_cls&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;])&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;extend&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;([&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_outputs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)])&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;Here, we utilize the standard Fashion-MNIST dataset for our analysis. Initially, we train the VAE/DVAE network on a subset of 5,000 samples from the training dataset over 60 epochs. Following this, we employ the VAE/DVAE networks to generate synthetic data, leveraging the learned latent space representation for data augmentation purposes. The performance of the augmented datasets is then evaluated using the previously described CNN architecture for classification tasks.&lt;/p&gt; &lt;h3 id=&quot;vae-dvae-performance&quot;&gt;VAE-DVAE Performance&lt;/h3&gt; &lt;p&gt;DVAE’s training loss closely tracks the VAE’s loss throughout training. This is interesting because the DVAE is dealing with additional artificial noise, yet it performs on par with the standard VAE. The fact that the DVAE does not exhibit a significantly higher loss than the VAE might suggest that it is effectively denoising the data and learning a robust representation, despite the additional noise.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 5. Training Loss: VAE v/s DVAE&lt;/p&gt; &lt;h3 id=&quot;latent-space-visualization&quot;&gt;Latent Space Visualization&lt;/h3&gt; &lt;p&gt;Here, we are visualizing the latent space of VAE and DVAE, a high-dimensional space where each dimension represents certain features learned by the model from the data. For this, we plot a 10x10 grid of images where each image in the grid is generated by the model by varying the values in two chosen latent dimensions (i and j), while keeping the other dimensions set to zero. This helps in understanding the effect of each latent dimension on the generated output.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 6. VAE Latent Space Visualization&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 7. DVAE Latent Space Visualization&lt;/p&gt; &lt;p&gt;The lack of visible differences in the latent space structures of both VAE and DVAE indicates that the two models are learning similar representations. To delve into these nuances, we assess the effectiveness of augmented data (created using these learned latent spaces) in a subsequent classification task.&lt;/p&gt; &lt;h3 id=&quot;classification-performance&quot;&gt;Classification Performance&lt;/h3&gt; &lt;p&gt;To delve into the efficacy of VAE and DVAE in augmenting datasets for downstream image classification tasks, we trained a CNN on a limited subset of the Fashion-MNIST dataset to establish a baseline. Subsequently, we generated synthetic data using both VAE and DVAE, aiming to enrich the training dataset and observe the resultant impact on the CNN’s performance. This is crucial considering the initial constraint of limited training data to start with. We used Fashion-MNIST test dataset, which includes 10,000 test images, for evaluating the performance of learned CNN network.&lt;/p&gt; &lt;p&gt;We also tested robustness of these augmented datasets against varying levels of noise (artifically added to test dataset), simulating real-world conditions where test data often includes such imperfections, arising because of the limitations in measurement tools.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 8. CNN Learning Curve for Limited Dataset&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 9. CNN Learning Curve for VAE Augmented Dataset&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 10. CNN Learning Curve for DVAE Augmented Dataset&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Dataset Type \ Noise Level&lt;/th&gt; &lt;th&gt;No Noise&lt;/th&gt; &lt;th&gt;2.5% Noise&lt;/th&gt; &lt;th&gt;5% Noise&lt;/th&gt; &lt;th&gt;7.5% Noise&lt;/th&gt; &lt;th&gt;10% Noise&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Limited Dataset&lt;/td&gt; &lt;td&gt;83.56%&lt;/td&gt; &lt;td&gt;83.39%&lt;/td&gt; &lt;td&gt;83.11%&lt;/td&gt; &lt;td&gt;82.33%&lt;/td&gt; &lt;td&gt;81.75%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;VAE Augmented Dataset&lt;/td&gt; &lt;td&gt;84.18%&lt;/td&gt; &lt;td&gt;84.03%&lt;/td&gt; &lt;td&gt;83.57%&lt;/td&gt; &lt;td&gt;82.68%&lt;/td&gt; &lt;td&gt;81.43%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;DVAE Augmented Dataset&lt;/td&gt; &lt;td&gt;85.32%&lt;/td&gt; &lt;td&gt;84.98%&lt;/td&gt; &lt;td&gt;84.67%&lt;/td&gt; &lt;td&gt;83.98%&lt;/td&gt; &lt;td&gt;82.59%&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h4 id=&quot;artificially-corrupted-fashion-mnist&quot;&gt;Artificially corrupted Fashion-MNIST&lt;/h4&gt; &lt;p&gt;Here, we deliberately introduced artifical noise to the standard Fashion-MNIST dataset to effectively simulate the real-world scenario where training data is not cleaned and is often noisy and imperfect. Such conditions often pose significant challenges in learning effective representations, making our approach highly relevant for understanding the adaptability and efficiency of VAE and DVAE models in handling noisy data. This way we expose the model and train it on a variety of noise patterns while forcing it to reconstruct the original noised image. The model will learn to effectively separate noise from the signal and will be less likely to overfit to the ‘clean’ aspects of the training data and can thus perform better on unseen, noisy data. This improves the generalization capabilities of the model making it more suitable for practical applications.&lt;/p&gt; &lt;p&gt;Here, we generated synthetic data using both VAE and DVAE which are trained on artifically corrupted Fashion-MNIST dataset. We then compare the performance of CNN network for three datasets - Limited Noisy Dataset with no augmentation, VAE Augmented dataset and DVAE Augmented Dataset, where representations are learned using the noisy training set. Consistent with our earlier methodology, we further evaluated the robustness of CNNs trained with these datasets by testing them against varying levels of noise in the test dataset.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Dataset Type \ Noise Level&lt;/th&gt; &lt;th&gt;No Noise&lt;/th&gt; &lt;th&gt;2.5% Noise&lt;/th&gt; &lt;th&gt;5% Noise&lt;/th&gt; &lt;th&gt;7.5% Noise&lt;/th&gt; &lt;th&gt;10% Noise&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Limited Noisy Dataset&lt;/td&gt; &lt;td&gt;83.77%&lt;/td&gt; &lt;td&gt;83.79%&lt;/td&gt; &lt;td&gt;83.61%&lt;/td&gt; &lt;td&gt;83.36%&lt;/td&gt; &lt;td&gt;82.98%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;VAE Augmented Dataset&lt;/td&gt; &lt;td&gt;85.24%&lt;/td&gt; &lt;td&gt;84.99%&lt;/td&gt; &lt;td&gt;84.62%&lt;/td&gt; &lt;td&gt;84.04%&lt;/td&gt; &lt;td&gt;83.20%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;DVAE Augmented Dataset&lt;/td&gt; &lt;td&gt;85.48%&lt;/td&gt; &lt;td&gt;85.38%&lt;/td&gt; &lt;td&gt;85.10%&lt;/td&gt; &lt;td&gt;84.89%&lt;/td&gt; &lt;td&gt;84.58%&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h2 id=&quot;conclusions&quot;&gt;Conclusions&lt;/h2&gt; &lt;p&gt;Here are the key findings from our research:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Enhanced Learning from Augmented Data:&lt;/strong&gt; We observed that the CNN trained with data augmented by both VAE and DVAE demonstrated improved accuracy and generalization capabilities, especially when compared to the CNN trained on a limited dataset. This underscores the effectiveness of generative models in enriching training datasets, leading to more robust learning.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Superiority of DVAE in Handling Noise:&lt;/strong&gt; The CNN trained with DVAE augmented data consistently outperformed the one trained with traditional VAE augmented data in tests involving noisy conditions. This aligns perfectly with our research hypothesis about the dual functionality of DVAEs — not only do they learn a generative model of the data distribution but also excel in denoising input data.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Robustness to Varied Noise Levels:&lt;/strong&gt; A crucial aspect of our research was evaluating the performance of augmented datasets under various noise levels. The augmented datasets, especially those generated by DVAEs, maintained consistent performance across different noise conditions. This suggests that the models have not only learned the essential features of the data but are also adept at filtering out noise.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;In downstream classification tasks, DVAE-generated synthetic data improved performance metrics, surpassing those achieved with traditional VAE-generated data, particularly in tests with varied noise levels. This validates our hypothesis and highlights the potential of DVAEs in real-world applications where data is limited and data quality is a critical factor.&lt;/p&gt; &lt;p&gt;The next steps for this research could be to focus on expanding the types of noise tested in our experiments to evaluate the adaptability and robustness of DVAEs in a broader range of real-world scenarios. We could conduct more comprehensive data augmentation experiments to delve deeper into the capabilities of DVAEs in enhancing neural network learning and generalization.&lt;/p&gt; </content> </entry> <entry> <title>Emoji3Vec</title> <link href="https://deep-learning-mit.github.io/blog/2023/transformer-elo-prediction/"/> <updated>2023-11-10T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/transformer-elo-prediction</id> <content type="html">&lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;In machine learning, models often create or learn internal representations for the inputs they are given. For instance, an image might become a vector containing the RGB data for every pixel. These internal representations are then processed and transformed until the model finally translates its representation into the desired output form (via softmax over all output possibilities, for example).&lt;/p&gt; &lt;p&gt;The lower dimensional internal representations, known as embeddings, can often carry semantic meaning which can help us understand the data better. Inspired by word2vec, a project for learning embeddings for words, we attempt to learn embeddings for emojis that are semantically interpretable. Learning accurate representations is important for downstream tasks, for example: sentiment analysis and other kinds of classification run better with useful embeddings.&lt;/p&gt; &lt;h1 id=&quot;background-and-related-work&quot;&gt;Background and Related Work&lt;/h1&gt; &lt;p&gt;Although similar ideas have been explored in the past, we felt that there was still a gap in prior research: specifically, we wanted to create a lightweight model that still learned emoji embeddings directly from data and context.&lt;/p&gt; &lt;p&gt;First, it is important to mention the influential and well known &lt;a href=&quot;https://arxiv.org/pdf/1301.3781.pdf&quot;&gt;Efficient Estimation of Word Representations in Vector Space&lt;/a&gt;, commonly referred to as word2vec &lt;d-cite key=&quot;mikolov2013word&quot;&gt;&lt;/d-cite&gt;. Word2vec was trained on a massive dataset of around 6 billion words, and was able to produce some very accurate embeddings that were proven to be useful in downstream tasks. For instance, doing the following arithmetic on the embeddings associated with each word produced: King - Man + Woman = Queen. This was an incredible result and inspired much work in the NLP domain in the following years.&lt;/p&gt; &lt;p&gt;In 2016, a paper called &lt;a href=&quot;https://arxiv.org/pdf/1609.08359.pdf&quot;&gt;emoji2vec: Learning Emoji Representations from their Description&lt;/a&gt; &lt;d-cite key=&quot;eisner2016emoji&quot;&gt;&lt;/d-cite&gt; was published. As the name suggests, this paper sought to produce embeddings for emojis to be used in the same vector space as the word2vec embeddings, and attempted to do it by using emoji descriptions. The researchers trained their model with baseline embeddings taken directly from summing the word embeddings for each word in every emoji’s description. For instance, the embedding for “😍” began as the sum of the word embeddings (taken from word2vec) of: “smiling” + “face” + “with” + “heart” + “eyes”. The main benefit of this approach was a strong baseline that could be established without any training data. Recently, in 2021, another paper called &lt;a href=&quot;https://bashthebuilder.github.io/files/Emojional.pdf&quot;&gt;Emojional: Emoji Embeddings&lt;/a&gt; &lt;d-cite key=&quot;barry2021emojional&quot;&gt;&lt;/d-cite&gt; was published that extended this approach, adding in additional words (that are related, as judged by Google News) to each baseline embedding. For instance, “✊” was set to be the result of: “raised fist” + “resistance” + “black lives matter” + …&lt;/p&gt; &lt;p&gt;After considering the above papers, we decided to create a model that would train similarly to word2vec (using naturally sourced data, and from context as opposed to a description) that also was able to produce useful embeddings on smaller amounts of data/memory/training time. Specifically, we felt that the descriptions would err when emojis began to mean different things than they are described as. For instance, the skull emoji is perhaps more often used to indicate embarassment or disagreement than actual death or skulls. This is addressed somewhat in the 2021 Emojional paper, but that one is very limited by the exact words it puts into each emoji’s embedding, and is less adaptable to new meanings. Further, we felt that there was value in creating a more lightweight model that was still able to produce meaningful representations, both to simply be easier to train and run and also to perhaps find optimizations that wouldn’t have been found if we had the option of just training on a larger set of data/training for a longer time.&lt;/p&gt; &lt;h1 id=&quot;methods-and-results&quot;&gt;Methods and Results&lt;/h1&gt; &lt;p&gt;We trained two sets of emoji embeddings to map emojis to the same 300-dimensional space as the one FastText uses for its word embeddings. The first was trained on a set of emoji descriptions, with the intention to learn emoji embeddings that reflect the literal appearances of each emoji. We closely follow the methodology as described in the emoji2vec paper to use as a baseline. The second was trained on a set of emoji-containing tweets, with the intention to learn emoji embeddings that reflect how they’re used online.&lt;/p&gt; &lt;h2 id=&quot;training-emoji-embeddings-with-descriptions&quot;&gt;Training Emoji Embeddings with Descriptions&lt;/h2&gt; &lt;h3 id=&quot;data-cleaning&quot;&gt;Data Cleaning&lt;/h3&gt; &lt;p&gt;We started with a &lt;a href=&quot;https://github.com/pwiercinski/emoji2vec_pytorch/blob/master/data/raw_training_data/emoji_joined.txt&quot;&gt;dataset&lt;/a&gt; of emoji descriptions from the Unicode emoji list. After cleaning, we were left with about 6000 descriptive phrases for 1661 emojis within a Python dictionary mapping emojis to various corresponding descriptions. Examples of entries include:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;‘🐏’: {‘horn’, ‘horns’, ‘male’, ‘ram’, ‘sheep’}&lt;/li&gt; &lt;li&gt;‘🆘’: {‘distress signal’, ‘emergency’, ‘sos’, ‘squared sos’}&lt;/li&gt; &lt;li&gt;‘👷’: {‘builder’, ‘construction worker’, ‘face with hat’, ‘safety helmet’}&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;One detail is that we had to generate a bijective mapping between emojis and integers for model training. We encourage those attempting similar projects to save this mapping (in a pickle file, for example) for later use. Leon was very sad when he lost this mapping and couldn’t make sense of his first trained model’s outputted embeddings.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/project-vis-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/project-vis-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/project-vis-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/project-vis.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;em&gt;a visualization of how we cleaned our data, from an example of a tweet&lt;/em&gt;&lt;/p&gt; &lt;h3 id=&quot;generating-training-and-test-data&quot;&gt;Generating Training and Test Data&lt;/h3&gt; &lt;p&gt;With a representation learning framework in mind, we randomly generated positive and negative descriptions for each emoji. We defined an emoji’s positive samples as descriptions that truly corresponded to the emoji, and we defined its negative samples as other descriptions in the dataset that weren’t used to describe the emoji. Guided by the emoji2vec paper, we generated positive and negative samples in a 1:1 ratio.&lt;/p&gt; &lt;h3 id=&quot;model-training&quot;&gt;Model Training&lt;/h3&gt; &lt;p&gt;After generating positive and negative samples, we used a pretrained FastText model to calculate the average of the embeddings of each word in each description. Put mathematically, if we let the sequence of words in a description be \(w_1, w_2, \dots, w_k\), the set of all strings be \(\mathcal{W}\), and the FastText model be expressed as a mapping \(f: \mathcal{W} \mapsto \mathbb{R}^{300}\), we calculated our description embeddings as&lt;/p&gt; \[\frac{1}{k}\sum_{i=1}^kf(w_i).\] &lt;p&gt;This is a notable deviation from the methodology as described in the emoji2vec paper. Instead of using word2vec embeddings, we chose FastText because it uses sub-word tokenization and thus supports out-of-vocabulary strings as input. We also averaged the description embeddings instead of simply taking a summation to normalize for description length.&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;#creates a dictionary mapping descriptions to avg. word embeddings descr_to_embedding = dict() for descr in all_descriptions: word_lst = descr.split(&apos; &apos;) #split description into list of words embed_lst = [] for i in range(len(word_lst)): #repl. words by their embeddings embed_lst.append(torch.tensor(ft[word_lst[i]])) avg_embedding = torch.mean(torch.stack(embed_lst, dim=0), dim=0) #take mean over embeddings descr_to_embedding[descr] = avg_embedding &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;We again followed the emoji2vec training methodology. For every emoji embedding \(x_i\) and description embedding \(v_i\), the authors model \(\sigma(x_i^T v_j)\) as the probability of the description matching with the emoji, where \(\sigma\) is the sigmoid function. Then our model minimizes the binary cross-entropy loss function&lt;/p&gt; \[\mathcal{L}(x_i,v_j,y_{ij}) = -\log(\sigma(y_{ij}x_i^T v_j + (1-v_{ij})x_i^T v_j))\] &lt;p&gt;where \(y_{ij}\) is 1 when \(v_j\) is a positive sample and 1 otherwise.&lt;/p&gt; &lt;p&gt;The authors don’t describe the exact model architecture used to learn the emoji embeddings, so we likely also deviate in methodology here. Our model is very simple: on some input emoji \(x_i\), we pass it through an nn.Embedding() module, compute \(\sigma(x_i^T v_j)\), and pass it to nn.BCELoss(). This way, the only learnable parameters in the model are in nn.Embedding(), and model training is as efficient as possible.&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;# the main model class # follows the Emoji2Vec training class EmojiDict(nn.Module): def __init__(self, n_emojis): # n_emojis: the number of emojis we&apos;re learning representations of super().__init__() self.embedding = nn.Embedding( num_embeddings = n_emojis, embedding_dim = 300 # size of word2vec embedding ) self.sigmoid = nn.Sigmoid() def forward(self, x, sample): # x: a batch of emoji indices, shape (B, ) # sample: a batch of avg&apos;d embeddings, shape (B, 300) x = self.embedding(x) # performing a batched dot product x = torch.unsqueeze(x, dim=1) #(B x 1 x 300) sample = torch.unsqueeze(sample, dim=2) #(B x 300 x 1) result = torch.bmm(x, sample) #(B x 1 x 1) result = torch.flatten(result) #(B, ) result = self.sigmoid(result) #should output probabilities return result #should be shape (B, ) &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;h3 id=&quot;t-sne-on-learned-embeddings&quot;&gt;t-SNE on Learned Embeddings&lt;/h3&gt; &lt;p&gt;We trained the model for 60 epochs over a 80-20 train-test split of 250 positive and 250 negative samples for each emoji. We used an Adam optimizer with the default parameters, and model training took roughly an hour. The model achieved 0.19 logloss and 0.98 accuracy on a validation set.&lt;/p&gt; &lt;p&gt;After the model was trained, we took emoji embedding weights from the model’s nn.Embedding() module and projected them down to two dimensions using t-SNE.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We can see that the model is excellent at grouping emojis that have similar appearances. Nearly all the faces are in the top-left, the zodiac symbols are in the bottom-left, the flags are at the bottom, the foods are on the right, the modes of transportation are in the top-right… the list can keep going. While there are some random emojis scattered about, similar emojis generally are similar in embedding space as well.&lt;/p&gt; &lt;h3 id=&quot;emoji-emoji-similarities&quot;&gt;Emoji-Emoji Similarities&lt;/h3&gt; &lt;p&gt;To confirm this idea quantitatively, we can fix individual emojis and look at its nearest neighbors in embedding space with cosine distance.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Emoji&lt;/th&gt; &lt;th&gt;1-NN&lt;/th&gt; &lt;th&gt;2-NN&lt;/th&gt; &lt;th&gt;3-NN&lt;/th&gt; &lt;th&gt;4-NN&lt;/th&gt; &lt;th&gt;5-NN&lt;/th&gt; &lt;th&gt;6-NN&lt;/th&gt; &lt;th&gt;7-NN&lt;/th&gt; &lt;th&gt;8-NN&lt;/th&gt; &lt;th&gt;9-NN&lt;/th&gt; &lt;th&gt;10-NN&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;😍&lt;/td&gt; &lt;td&gt;💖&lt;/td&gt; &lt;td&gt;😻&lt;/td&gt; &lt;td&gt;😄&lt;/td&gt; &lt;td&gt;😀&lt;/td&gt; &lt;td&gt;😚&lt;/td&gt; &lt;td&gt;💟&lt;/td&gt; &lt;td&gt;😘&lt;/td&gt; &lt;td&gt;😊&lt;/td&gt; &lt;td&gt;😽&lt;/td&gt; &lt;td&gt;💑&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;😀&lt;/td&gt; &lt;td&gt;😄&lt;/td&gt; &lt;td&gt;😊&lt;/td&gt; &lt;td&gt;😃&lt;/td&gt; &lt;td&gt;🙂&lt;/td&gt; &lt;td&gt;😑&lt;/td&gt; &lt;td&gt;😁&lt;/td&gt; &lt;td&gt;😸&lt;/td&gt; &lt;td&gt;🤗&lt;/td&gt; &lt;td&gt;😆&lt;/td&gt; &lt;td&gt;🤧&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;💀&lt;/td&gt; &lt;td&gt;☠&lt;/td&gt; &lt;td&gt;🆎&lt;/td&gt; &lt;td&gt;🌫&lt;/td&gt; &lt;td&gt;🐁&lt;/td&gt; &lt;td&gt;⛓&lt;/td&gt; &lt;td&gt;⛸&lt;/td&gt; &lt;td&gt;🌮&lt;/td&gt; &lt;td&gt;🦅&lt;/td&gt; &lt;td&gt;⚖&lt;/td&gt; &lt;td&gt;🐙&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;🚀&lt;/td&gt; &lt;td&gt;🛰&lt;/td&gt; &lt;td&gt;👽&lt;/td&gt; &lt;td&gt;🚡&lt;/td&gt; &lt;td&gt;🛳&lt;/td&gt; &lt;td&gt;📡&lt;/td&gt; &lt;td&gt;🚢&lt;/td&gt; &lt;td&gt;📋&lt;/td&gt; &lt;td&gt;🚎&lt;/td&gt; &lt;td&gt;🆚&lt;/td&gt; &lt;td&gt;🛥&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;We see here that the nearest neighbors also generally make sense. 😍’s nearest neighbors all involve love or positive emotions, and 🚀’s neighbors are generally about space or modes of transport. Interestingly, only 💀’s first neighbor seems remotely similar to it. We believe that this is just because death is a mostly unrepresented theme in emojis.&lt;/p&gt; &lt;h3 id=&quot;word-emoji-similarities&quot;&gt;Word-Emoji Similarities&lt;/h3&gt; &lt;p&gt;Since we trained emoji embeddings into the same space as the FastText word embeddings, we can also look at the nearest emoji neighbors to any English word!&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Word&lt;/th&gt; &lt;th&gt;1-NN&lt;/th&gt; &lt;th&gt;2-NN&lt;/th&gt; &lt;th&gt;3-NN&lt;/th&gt; &lt;th&gt;4-NN&lt;/th&gt; &lt;th&gt;5-NN&lt;/th&gt; &lt;th&gt;6-NN&lt;/th&gt; &lt;th&gt;7-NN&lt;/th&gt; &lt;th&gt;8-NN&lt;/th&gt; &lt;th&gt;9-NN&lt;/th&gt; &lt;th&gt;10-NN&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;happy&lt;/td&gt; &lt;td&gt;😃&lt;/td&gt; &lt;td&gt;😺&lt;/td&gt; &lt;td&gt;😌&lt;/td&gt; &lt;td&gt;😹&lt;/td&gt; &lt;td&gt;🏩&lt;/td&gt; &lt;td&gt;😊&lt;/td&gt; &lt;td&gt;💛&lt;/td&gt; &lt;td&gt;😂&lt;/td&gt; &lt;td&gt;😞&lt;/td&gt; &lt;td&gt;😁&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;sad&lt;/td&gt; &lt;td&gt;😔&lt;/td&gt; &lt;td&gt;😭&lt;/td&gt; &lt;td&gt;😒&lt;/td&gt; &lt;td&gt;🙁&lt;/td&gt; &lt;td&gt;😟&lt;/td&gt; &lt;td&gt;😞&lt;/td&gt; &lt;td&gt;🙍&lt;/td&gt; &lt;td&gt;😢&lt;/td&gt; &lt;td&gt;😁&lt;/td&gt; &lt;td&gt;😯&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;lit&lt;/td&gt; &lt;td&gt;🚨&lt;/td&gt; &lt;td&gt;🕎&lt;/td&gt; &lt;td&gt;🌆&lt;/td&gt; &lt;td&gt;🔦&lt;/td&gt; &lt;td&gt;📭&lt;/td&gt; &lt;td&gt;🎇&lt;/td&gt; &lt;td&gt;🕯&lt;/td&gt; &lt;td&gt;💫&lt;/td&gt; &lt;td&gt;🏥&lt;/td&gt; &lt;td&gt;💡&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;bitcoin&lt;/td&gt; &lt;td&gt;💛&lt;/td&gt; &lt;td&gt;🤑&lt;/td&gt; &lt;td&gt;🎮&lt;/td&gt; &lt;td&gt;💙&lt;/td&gt; &lt;td&gt;🌈&lt;/td&gt; &lt;td&gt;🤓&lt;/td&gt; &lt;td&gt;📱&lt;/td&gt; &lt;td&gt;📅&lt;/td&gt; &lt;td&gt;🐰&lt;/td&gt; &lt;td&gt;🍆&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;Again, the nearest neighboring emojis generally make sense. Bitcoin’s nearest neighbors are considerably less accurate than the others. Since our nearest neighbors are much more accurate for other English words like “cryptocurrency”, we attribute this inaccuracy to FastText having poor embeddings for “Bitcoin”, which was much less popular word when FastText was trained (in 2015).&lt;/p&gt; &lt;p&gt;One thing to note from these nearest-neighbor tables is that embeddings trained with the emoji2vec method take words very literally. “🚀” is related to space and transportation, and “lit” is related to things that literally light up. As such, these embeddings won’t adjust to semantic changes in emojis as slang develops and people become increasingly clever in their emoji use.&lt;/p&gt; &lt;h2 id=&quot;training-emoji-embeddings-with-twitter-data&quot;&gt;Training Emoji Embeddings with Twitter Data&lt;/h2&gt; &lt;h3 id=&quot;data-cleaning-1&quot;&gt;Data Cleaning&lt;/h3&gt; &lt;p&gt;We started with a &lt;a href=&quot;https://www.kaggle.com/datasets/rexhaif/emojifydata-en?select=test.txt&quot;&gt;dataset&lt;/a&gt; of emoji-containing tweets. Motivated by the data cleaning done in the emojiSpace paper, we remove duplicate tweets, numbers, hashtags, links, emails, and mentions. Then, we extract the “context” words and emojis around each emoji with a window size of 4 in both directions and tokenize it. We cleaned only a subsample of the tweets due to constraints on memory and compute. Even so, after cleaning, we were left with about 272,000 contexts for 1251 emojis. Examples of contexts for the emoji 🤑 include:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;(‘the’, ‘promotion’, ‘code’, ‘works’, ‘we’, ‘will’, ‘be’, ‘giving’)&lt;/li&gt; &lt;li&gt;(‘my’, ‘grind’, ‘all’, ‘week’, ‘i’, ‘ain’t’, ‘been’, ‘getting’)&lt;/li&gt; &lt;li&gt;(‘cash’, ‘in’, ‘on’, ‘sunday’, ‘thank’, ‘you’, ‘so’, ‘much’)&lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;generating-training-and-test-data-1&quot;&gt;Generating Training and Test Data&lt;/h3&gt; &lt;p&gt;With a representation learning framework in mind, we randomly generated positive and negative descriptions for each emoji. We defined an emoji’s positive samples as descriptions that truly corresponded to the emoji, and we defined its negative samples as other descriptions in the dataset that weren’t used to describe the emoji. Guided by the emoji2vec paper, we generated positive and negative samples in a 1:1 ratio.&lt;/p&gt; &lt;p&gt;As in the earlier model, we randomly generated positive and negative contexts for each emoji. We defined an emoji’s positive samples equivalently as before, but this time we used the set of all contexts across all emojis as the set of negative examples. Doing this is obviously not ideal, but it provided a huge performance boost when generating data. Additionally, with such a large dataset, drawing a positive sample as a negative one happens relatively infrequently.&lt;/p&gt; &lt;h3 id=&quot;model-training-1&quot;&gt;Model Training&lt;/h3&gt; &lt;p&gt;The training method we used for this model was nearly identical to that of the first model, and similar to the Continuous Bag-of-Words (CBOW) method for training word2vec. For every context, we calculated the average of the individual word embeddings using FastText. Often, another emoji would be part of the context; such emojis would be passed into the nn.Embedding() module as well to produce an embedding to be passed into the average. The model architecture remained nearly identical, and continued using binary cross-entropy loss as our loss function.&lt;/p&gt; &lt;p&gt;Our model architecture differs somewhat from the original word2vec model, which uses a cross-entropy loss over the entire vocabulary of words as its loss function. While we may lose some expressivity by using binary cross-entropy instead, we believe that making this change made our model more lightweight and easier to train.&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;# the main model class # essentially a CBOW on emojis class EmojiCBOW(nn.Module): def __init__(self, n_emojis): # n_emojis: the number of emojis we&apos;re learning representations of super().__init__() self.embedding = nn.Embedding( num_embeddings = n_emojis, embedding_dim = 300 # size of word2vec embedding ) self.sigmoid = nn.Sigmoid() def forward(self, x, embeddings, emojis, masks): # x: a batch of emoji indices, shape (B, ) # embeddings: a batch of summed word embeddings from context, shape (B x 300) # emojis: a batch of in-context emoji indices, with -1 as a placeholder, shape (B x 8) # masks: a batch of masks for the relevant emoji indices, shape (B x 8) x = self.embedding(x) masks_unsqueezed = torch.unsqueeze(masks, dim=2) # get the dimensions right emoji_embeddings = self.embedding(emojis * masks) * masks_unsqueezed # apply embeddings to emojis w/ mask applied, (B x 8 x 300) emoji_embeddings = torch.sum(emoji_embeddings, dim=1) # sum acros embeddings, (B x 300) tot_embeddings = embeddings + emoji_embeddings # (B x 300) tot_embeddings = tot_embeddings / 8 # get avg embeddings, could help w/ numerical stability? # performing a batched dot product x = torch.unsqueeze(x, dim=1) #(B x 1 x 300) tot_embeddings = torch.unsqueeze(tot_embeddings, dim=2) #(B x 300 x 1) tot_embeddings = tot_embeddings.to(torch.float) / 8 result = torch.bmm(x, tot_embeddings) #(B x 1 x 1) result = torch.flatten(result) #(B, ) result = self.sigmoid(result) #should output target probabilities return result #should be shape (B, ) &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;h3 id=&quot;t-sne-on-learned-embeddings-1&quot;&gt;t-SNE on Learned Embeddings&lt;/h3&gt; &lt;p&gt;We trained the model for 80 epochs over a 80-20 train-test split of 250 positive and 250 negative samples for each emoji. We used an Adam optimizer with the default parameters, and model training took roughly two hours. The model achieved 0.39 logloss and 0.79 accuracy on a validation set.&lt;/p&gt; &lt;p&gt;After the model was trained, we took emoji embedding weights from the model’s nn.Embedding() module and projected them down to two dimensions using t-SNE.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The model does reasonably well at clustering similar emojis together; as before, the flags, faces, and numbers are close together in embedding space. However, the quality of this clustering is noticeably worse than it was in the baseline model. We attribute this to the quality of the dataset and to the increased difficulty in the learning task. The emoji descriptions were clean, precise, and informative; tweets are generally none of those three. Additionally, learning embeddings from contexts has historically required a lot of training data and compute to perform successfully. We, however, only had the compute and memory to sample 500 tweets per emoji, which is only a tiny sample from the massive distribution of possible contexts that may surround any given emoji. Producing emoji embeddings that outperform the baseline model would require much more training data and time than what Colab offers.&lt;/p&gt; &lt;p&gt;While these embeddings lose to the baseline embeddings in overall quality, they have certain properties that the baseline embeddings lack. Namely, since these embeddings were trained on a much more varied and organic dataset, they encode emoji use cases beyond what emojis literally mean. Specifically, they can learn from slang.&lt;/p&gt; &lt;h3 id=&quot;emoji-emoji-similarities-1&quot;&gt;Emoji-Emoji Similarities&lt;/h3&gt; &lt;p&gt;To illustrate this, we can look at the nearest neighbors of the same four emojis that were presented earlier. We narrow down our search to the top-200 most common emojis in our dataset because those were likely learned the best by our model.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Emoji&lt;/th&gt; &lt;th&gt;1-NN&lt;/th&gt; &lt;th&gt;2-NN&lt;/th&gt; &lt;th&gt;3-NN&lt;/th&gt; &lt;th&gt;4-NN&lt;/th&gt; &lt;th&gt;5-NN&lt;/th&gt; &lt;th&gt;6-NN&lt;/th&gt; &lt;th&gt;7-NN&lt;/th&gt; &lt;th&gt;8-NN&lt;/th&gt; &lt;th&gt;9-NN&lt;/th&gt; &lt;th&gt;10-NN&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;😍&lt;/td&gt; &lt;td&gt;🏆&lt;/td&gt; &lt;td&gt;💜&lt;/td&gt; &lt;td&gt;🎉&lt;/td&gt; &lt;td&gt;🇩🇪&lt;/td&gt; &lt;td&gt;💘&lt;/td&gt; &lt;td&gt;💖&lt;/td&gt; &lt;td&gt;👑&lt;/td&gt; &lt;td&gt;💞&lt;/td&gt; &lt;td&gt;💪&lt;/td&gt; &lt;td&gt;🇧🇷&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;😀&lt;/td&gt; &lt;td&gt;📚&lt;/td&gt; &lt;td&gt;😆&lt;/td&gt; &lt;td&gt;😏&lt;/td&gt; &lt;td&gt;🎉&lt;/td&gt; &lt;td&gt;😌&lt;/td&gt; &lt;td&gt;😫&lt;/td&gt; &lt;td&gt;🔗&lt;/td&gt; &lt;td&gt;🙂&lt;/td&gt; &lt;td&gt;⚡&lt;/td&gt; &lt;td&gt;🇫🇷&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;💀&lt;/td&gt; &lt;td&gt;😭&lt;/td&gt; &lt;td&gt;🍆&lt;/td&gt; &lt;td&gt;😓&lt;/td&gt; &lt;td&gt;🤤&lt;/td&gt; &lt;td&gt;💔&lt;/td&gt; &lt;td&gt;😩&lt;/td&gt; &lt;td&gt;🐥&lt;/td&gt; &lt;td&gt;😮&lt;/td&gt; &lt;td&gt;🐻&lt;/td&gt; &lt;td&gt;🍑&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;🚀&lt;/td&gt; &lt;td&gt;💸&lt;/td&gt; &lt;td&gt;🔹&lt;/td&gt; &lt;td&gt;💯&lt;/td&gt; &lt;td&gt;🎯&lt;/td&gt; &lt;td&gt;💵&lt;/td&gt; &lt;td&gt;2️⃣&lt;/td&gt; &lt;td&gt;👋&lt;/td&gt; &lt;td&gt;💰&lt;/td&gt; &lt;td&gt;😤&lt;/td&gt; &lt;td&gt;😎&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;We see here that the nearest neighbors for 😍 and 😀 are noticeably less intuitive than the ones in the baseline model, though some still make sense. Interestingly, however, 💀 has become more associated with strong emotions like 😭 and 😩. This correlates with the online slang “I’m dead,” which expresses a strong (could be both positive or negative) emotional response to something. Additionally, 🚀 has become more associated with money, which correlates with the use of 🚀 to indicate a stock or asset going “to the moon.”&lt;/p&gt; &lt;h3 id=&quot;word-emoji-similarities-1&quot;&gt;Word-Emoji Similarities&lt;/h3&gt; &lt;p&gt;We can also observe this phenomenon in the cosine similarities between words and emojis. We use the same words as above, and again we narrow our nearest neighbors search to the top 200 most popular emojis.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Word&lt;/th&gt; &lt;th&gt;1-NN&lt;/th&gt; &lt;th&gt;2-NN&lt;/th&gt; &lt;th&gt;3-NN&lt;/th&gt; &lt;th&gt;4-NN&lt;/th&gt; &lt;th&gt;5-NN&lt;/th&gt; &lt;th&gt;6-NN&lt;/th&gt; &lt;th&gt;7-NN&lt;/th&gt; &lt;th&gt;8-NN&lt;/th&gt; &lt;th&gt;9-NN&lt;/th&gt; &lt;th&gt;10-NN&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;happy&lt;/td&gt; &lt;td&gt;😃&lt;/td&gt; &lt;td&gt;😺&lt;/td&gt; &lt;td&gt;😌&lt;/td&gt; &lt;td&gt;😹&lt;/td&gt; &lt;td&gt;🏩&lt;/td&gt; &lt;td&gt;😊&lt;/td&gt; &lt;td&gt;💛&lt;/td&gt; &lt;td&gt;😂&lt;/td&gt; &lt;td&gt;😞&lt;/td&gt; &lt;td&gt;😁&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;sad&lt;/td&gt; &lt;td&gt;😒&lt;/td&gt; &lt;td&gt;😻&lt;/td&gt; &lt;td&gt;👏&lt;/td&gt; &lt;td&gt;😥&lt;/td&gt; &lt;td&gt;😭&lt;/td&gt; &lt;td&gt;😓&lt;/td&gt; &lt;td&gt;😣&lt;/td&gt; &lt;td&gt;😔&lt;/td&gt; &lt;td&gt;😂&lt;/td&gt; &lt;td&gt;😪&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;lit&lt;/td&gt; &lt;td&gt;🔥&lt;/td&gt; &lt;td&gt;🚨&lt;/td&gt; &lt;td&gt;😍&lt;/td&gt; &lt;td&gt;✅&lt;/td&gt; &lt;td&gt;😎&lt;/td&gt; &lt;td&gt;💯&lt;/td&gt; &lt;td&gt;💣&lt;/td&gt; &lt;td&gt;🇺🇸&lt;/td&gt; &lt;td&gt;🗣&lt;/td&gt; &lt;td&gt;💫&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;bitcoin&lt;/td&gt; &lt;td&gt;💵&lt;/td&gt; &lt;td&gt;🎉&lt;/td&gt; &lt;td&gt;😱&lt;/td&gt; &lt;td&gt;💸&lt;/td&gt; &lt;td&gt;🤑&lt;/td&gt; &lt;td&gt;🔹&lt;/td&gt; &lt;td&gt;🇮🇳&lt;/td&gt; &lt;td&gt;🍃&lt;/td&gt; &lt;td&gt;😆&lt;/td&gt; &lt;td&gt;🌊&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;As before, the nearest neighboring emojis generally make sense, but are less accurate than the neighbors in the baseline model. At the same time, the nearest neighbors now align more closely with slang (or “new” words like bitcoin). “Lit” now is more related to a feeling of firm agreement, and “bitcoin” is now more related to money. In both cases, the nearest neighbors align more with the words’ common usages than their literal meanings.&lt;/p&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;h2 id=&quot;future-work&quot;&gt;Future Work&lt;/h2&gt; &lt;p&gt;Given the time and computational constraints we had for this project, we had to pass on many paths for future exploration. We list a few in this section.&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;We would’ve liked to train our second model for much longer on a much larger dataset of tweets. Only about 400 of our emojis had over 50 tweets associated with them. This greatly restricted their positive sample sets, which likely resulted in far-from-optimal emoji embeddings.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;We also considered training a more expressive neural architecture for our second model. One word2vec CBOW &lt;a href=&quot;https://towardsdatascience.com/word2vec-with-pytorch-implementing-original-paper-2cd7040120b0&quot;&gt;implementation&lt;/a&gt; we found used a Linear layer after the Embedding layer. It projected the 300-dimensional embeddings into embeddings with dimensionality equal to the size of the emoji vocabulary to learn embeddings via a multi-class classification problem. We ultimately decided against using such a model because we doubted that we had the time, data, and compute to train a more complex model.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Something we realized towards the end of our model training was that the embeddings from the first model could be used to inform training on our second model. It would be interesting to see if transfer learning could result in increased performance for our second model, especially since many emojis were underrepresented in our dataset of tweets.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;Overall, despite the limitations, our lightweight model achieved reasonable accuracy with less than optimal conditions. One other challenge we faced had to do with Colab’s memory constraints: we were only able to train on a small set of data and were forced to generate positive and negative pairs over and over from the same set. Given a larger and more diverse set of positive/negative pairs, we believe our model could have performed even better.&lt;/p&gt; &lt;p&gt;Furthermore, we felt that our CBOW model definitely could add value for people solving downstream tasks, such as sentiment analysis. The emoji2vec model of summing the emoji’s description’s word embeddings is useful when there are few datapoints for each emoji, but the CBOW approach captures more subtle meanings and is much more accurate to how people actually use emojis in their day to day life—both have their merits.&lt;/p&gt; </content> </entry> <entry> <title>Modeling Human Speech Recognition with Different Network Architectures</title> <link href="https://deep-learning-mit.github.io/blog/2023/speech-recognition-proposal/"/> <updated>2023-11-10T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/speech-recognition-proposal</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Recent advances in machine learning have made perception tasks more doable by computers, approaching levels similar to humans. In particular, structuring models biologically and using ecologically realistic training datasets have helped to yield more humanlike results. In the field of speech recognition, models trained under realistic conditions with stimuli structured how sounds are represented in the cochlea, with network layers imitating the processing pipeline in the brain, seem to be successful in performing speech recognition tasks. However, it is unclear whether specific network architectures are more beneficial to learning human speech recognition patterns. In this project, I seek to investigate how different network architectures such as CNNs vs. TNNs affect the ability to recognize speech in a humanlike way.&lt;/p&gt; &lt;p&gt;One facet of more biological models is that they attempt to recreate the structure of the human brain. For auditory models, a useful structure to replicate is the cochlea; these replications are called cochleagrams. Cochleagrams have been used in order to model the ear more effectively, leading to models that imitate auditory perception in a more human-like way. A cochleagram works in a similar way to how the cochlea works in a human. It filters a sound signal through bandpass filters of different frequencies, creating multiple frequency subbands, where the subbands for higher frequencies are wider, like how the cochlea works in the human ear. The amplitudes of the different subbands are then compressed nonlinearly, modeling the compressive nonlinearity of the human cochlea &lt;d-cite key=&quot;mcdermott2013&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;mcdermott2011&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;A recent application of cochlear models to speech perception is found in Kell’s 2018 paper, where they create a convolutional neural network which replicates human speech recognition &lt;d-cite key=&quot;kell2018&quot;&gt;&lt;/d-cite&gt;. They trained the network to recognize a word in the middle of a 2 second clip, from a possible vocabulary of 587 words. To imitate how the ear functions, they preprocessed the sound signals into cochleagrams, intended to be a more biologically realistic model of the ear. The activations in different layers of the neural network were able to predict voxel responses in different parts of the brain, revealing that the auditory processing pipeline aligned with layers of the network.&lt;/p&gt; &lt;p&gt;A natural question to ask at this point is whether a convolutional neural network is the best architecture for this task. In Mamyrbayev Orken et al.’s 2022 paper, they explore a speech recognition system for Kazakh speech &lt;d-cite key=&quot;orken2022&quot;&gt;&lt;/d-cite&gt;. In this paper, they create a transformer model that is able to perform a continuous speech recognition task on “clean” speech (speech without noise or alterations). They trained the model on a dataset of clean speech and telephone speech, and the model was able to perform the speech recognition task on clean speech with high accuracy. Although this network does well on the specific task it is given, it is still unclear how its performance compares to that of a convolutional neural network due to not having a direct comparison between the two on the same task. In addition, it is unclear whether the transformer can yield human-like results for speech recognition.&lt;/p&gt; &lt;p&gt;In the field of computer vision, there has been work done comparing convolutional neural networks to vision transformers for the task of object recognition. Tuli’s 2021 paper explores this through the lens of human-like object recognition, determining whether the errors of a vision transformer or a convolutional neural network are more similar to humans &lt;d-cite key=&quot;tuli2021&quot;&gt;&lt;/d-cite&gt;. Their findings indicated that the vision transformer behaved in a more human-like way. In particular, in human vision, there is a phenomenon called shape bias, where if an object has the shape of a certain object A, but the texture of a certain object B, humans will be likely to predict that the object is an instance of object A. Many vision models struggle with shape vs. texture bias, being more inclined to categorize objects by their texture; the vision transformer presented in this paper has approximately twice the amount of shape bias as the convolutional neural network, further suggesting that the transformer is a more human-like representation of vision.&lt;/p&gt; &lt;p&gt;In this post, I investigate more closely the importance of network architecture in the ability to effectively model human speech recognition. I focus on three metrics of evaluating how well a model replicates human speech recognition:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Ability to generalize to speakers not found in the training set: Humans hear speech from new speakers all the time, and a person who they’ve never heard before usually does not hinder their ability to recognize what they are saying. Models of speech recognition are usually trained on a corpus of speech that is inherently biased towards a set of talkers that participates in creating the corpus, so it is possible that it could overfit to the speakers in the training set. A good model of speech recognition should be able to perform well on new talkers.&lt;/li&gt; &lt;li&gt;Ability to recognize speech in different background noise conditions: Humans rarely hear speech unaccompanied by some form of background noise, and are generally robust to noise up to large signal to noise ratios. Many models of speech recognition such as the transformer in Orken 2022 are not trained or tested on noisy speech, so it is likely that it would not be able to recognize speech in these conditions.&lt;/li&gt; &lt;li&gt;Ability to recognize distorted forms of speech: Humans are remarkably robust to various distortions of speech such as sped-up/slowed-down speech, reverberant speech, and local-time manipulations, despite not encountering some of these often in their lives &lt;d-cite key=&quot;ml1950&quot;&gt;&lt;/d-cite&gt;. In order to further test a model’s ability to replicate human speech recognition, we should test how well it performs on speech manipulations.&lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;p&gt;The models in my experiment were given a 2 second speech clip, and were tasked with identifying the word overlapping the middle of the clip. In particular, they were trained on a dataset containing 2 second speech clips from the Common Voice dataset, where the word at the middle of the clip is from a vocabulary of 800 words, imposed on different background noises taken from the Audio Set dataset &lt;d-cite key=&quot;ardila2019&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;gemmeke2017&quot;&gt;&lt;/d-cite&gt;. So, the models were essentially given a speech clip and asked to perform an 800-way classification task.&lt;/p&gt; &lt;p&gt;In order to generate the fairest comparison between convolutional neural networks and transformers, I start with a baseline CNN inspired by Saddler 2021, and then either replace the last convolutional layer with a multi-headed attention layer or remove it from the network &lt;d-cite key=&quot;saddler2021&quot;&gt;&lt;/d-cite&gt;. Each block of the CNN is made up of a convolutional layer, followed by a ReLU activation, a weighted-average pooling layer, and a normalization layer &lt;d-cite key=&quot;saddler2021&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;feather2019&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/block-architectures.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The baseline CNN (called CNN6) is composed of 6 blocks followed by a fully connected layer and a classification layer. The transformer-CNN hybrid (CNN5+Attn) is composed of 5 convolutional blocks, followed by an attention block, followed by a fully connected layer and a classification layer. Lastly, I created a “control” CNN (called CNN5) that is the same as CNN6, but with the last convolutional block removed. This was intended to test whether an attention layer provides any benefit as opposed to not including the layer at all. All networks begin with an initial data preprocessing step that converts the audio signal into a cochleagram.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/network-architectures.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;It is difficult to derive a direct comparison between a convolutional layer and a multi-headed attention layer, in particular how to decide how many attention heads to include and what the attentional layer dimension should be. In order to have the best chance of comparison between CNN5+Attn and the other networks, I ran multiple CNN5+Attn networks with a larger vs. smaller number of attention heads (64 vs. 16) and a larger vs. smaller attention dimension (512 vs. 16) for 10 epochs to determine a preliminary measure of network performance across these parameters. The preliminary results after 10 epochs showed that the CNN5+Attn network with a small number of attention heads and a smaller attention dimension had the highest training accuracy and trained the fastest, so I used this model for my analysis.&lt;/p&gt; &lt;p&gt;After preliminary analysis, I trained the CNN6, CNN5+Attn, and CNN5 networks for 100 epochs. I then evaluated the models’ performance on this task in the three aforementioned conditions.&lt;/p&gt; &lt;p&gt;1) To evaluate performance on clips spoken by talkers not encountered in the training dataset, I evaluated the models on clips taken from the WSJ speech corpus.&lt;/p&gt; &lt;p&gt;2) For clips superimposed on different types of background noise, I evaluated the model on 5 types of background noise, in signal-to-noise ratios ranging from -9 dB to +3 dB, plus a +infinity condition which represents no background noise:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Auditory scenes: background noises encountered in everyday life like rain or cars passing by&lt;/li&gt; &lt;li&gt;8-speaker babble: 8 other people talking in the background&lt;/li&gt; &lt;li&gt;Music&lt;/li&gt; &lt;li&gt;Speech-shaped noise: gaussian noise that is given the envelope of speech signals&lt;/li&gt; &lt;li&gt;Modulated speech-shaped noise: speech-shaped noise that is modulated so that the noise alternates between being very quiet and very loud&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;3) Distorted speech clips with 6 types of distortions:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Sped-up/slowed-down speech (preserving original pitches)&lt;/li&gt; &lt;li&gt;Speech in a reverberant environment: speech convolved with an impulse response of different reverberant environments&lt;/li&gt; &lt;li&gt;Whispered speech&lt;/li&gt; &lt;li&gt;Inharmonic speech: speech signals are decomposed into their harmonics, and the harmonics are moved up or down to distort the signal&lt;/li&gt; &lt;li&gt;Sine wave speech: speech signals are filtered into frequency subbands, and each band is replaced by a sine wave with the center frequency of the band&lt;/li&gt; &lt;li&gt;Locally time-reversed speech: speech is decomposed into chunks of a certain length, and the chunks are reversed&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Then I compared the models’ performance on these conditions to existing human data where humans were asked to perform the same task of recognizing the middle word of a 2-second clip in various types of noise or distortion.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;Overall, I found that CNN6 performed better than CNN5+Attn, which performed better than CNN5. After 100 epochs, CNN6 had a validation accuracy of around 0.60, CNN5+Attn had validation accuracy of 0.55, and CNN5 had validation accuracy of 0.53. In particular, CNN5 overfit quite a bit (0.12 gap between training and validation accuracy) while CNN5+Attn overfit much less (0.05 gap between training and validation accuracy).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;All three models performed similarly to humans for clean speech spoken by talkers not encountered in the training dataset.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/clean-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/clean-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/clean-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/clean.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In different types of background noise, in general the models performed similarly to humans, except in the condition of modulated speech-shaped noise. In general, humans perform better for modulated noise than “stationary” noise because they are able to fill in speech in the “gaps”, or quieter sections, of the noise, but none of the models have as strong of an effect as humans for this. The CNN5+Attn model does particularly badly on this compared to the other networks.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/background-noise-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/background-noise-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/background-noise-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/background-noise.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The models all perform similarly to humans for inharmonic speech, local time reversal, and low levels of sped-up or slowed-down speech. For whispered speech and sine-wave speech, the models perform slightly worse than humans, with CNN6 performing better than CNN5+Attn performing better than CNN5. For reverberant speech and extremely sped-up or slowed-down speech, all of the models perform significantly worse than humans, with the same hierarchy of performance between the models.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;Overall, it seems that CNN6 is the best option for replicating human speech recognition, but CNN5+Attn does have some benefits. In particular, it trains substantially faster than CNN5, and overfits less than both CNN5 and CNN6. The hybrid architecture may help with overfitting because it forces the model to do multiple types of analysis in order to determine the output. Although CNN5+Attn does still perform worse than CNN6, it is reasonable to hypothesize that it has potential. Due to resource limitations, I was only able to test two different conditions for number of attention heads and attention dimension, but as shown from the preliminary training the number of attention heads and the attention dimension does have an effect. It seems likely that with a more extensive search of these parameters, it could be possible to create a CNN5+Attn network that performs similarly or better than the CNN6 network on these tasks.&lt;/p&gt; &lt;p&gt;All of the models have discrepancies with humans for the modulated background noise condition. One possible explanation for this is that the models do not learn the process of recognizing smaller phonemes of a word, only learning a classification task on the 800 words that they are given, so they are unable to piece together chunks of a word into a larger word like humans do. A possible way to test this would be to create a model for a phoneme-detection task, and then add a layer that combines the phonemes into a larger word, and see whether this performs better in this condition. This would make sense because some of the earliest things humans learn about speech are not full words, but phonemes like “ba” or “da,” so a model trained on this task would then have been optimized in more human-like conditions.&lt;/p&gt; &lt;p&gt;In addition, there are some discrepancies between the models and humans in some of the speech distortions. The largest discrepancies are found in very sped-up or slowed-down speech, and in reverberant speech. This seems likely to be due to a shortcoming of the dataset. The Common Voice dataset is composed of people reading passages, which is generally a single slow, steady speed, and there is no reverberation. The speech that humans encounter in their lives varies a lot in speed, and they also encounter speech in many different reverberant environments, so they are optimized to recognize speech in these conditions. It is reasonable to assume that if reverberation and varied speeds of speech were incorporated into the training dataset, the model would perform better in these conditions.&lt;/p&gt; &lt;p&gt;Further directions of this project could include trying more variations of the parameters of the attention model. In addition, it would be interesting to try different hybrid architectures; for example, 4 layers of convolution followed by 2 layers of attention. This could give a more complete idea of the benefits and disadvantages of CNNs and transformers for the task of speech recognition. In conclusion, the current results seem promising, but more extensive testing is needed in order to get a full picture of whether these models can accurately replicate human speech recognition.&lt;/p&gt; &lt;h2 id=&quot;acknowledgements&quot;&gt;Acknowledgements&lt;/h2&gt; &lt;p&gt;I would like to thank my fellow members of the McDermott Lab, particularly Mark Saddler for creating the code for the baseline CNN, and Erica Shook for providing me with human data and experimental manipulation code.&lt;/p&gt; </content> </entry> <entry> <title>Analytic, Empirical, and Monte Carlo Bayesian Methods for Uncertainty Estimation</title> <link href="https://deep-learning-mit.github.io/blog/2023/uncertainty/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/uncertainty</id> <content type="html">&lt;h2 id=&quot;motivation&quot;&gt;Motivation&lt;/h2&gt; &lt;p&gt;Many practical uses of deep neural network (DNN) models involve using them with a restricted amount of training data, which doesn’t encompass all the potential inputs the model might face when actually used. This exposes a significant limitation of models based on data: they can behave unpredictably when dealing with inputs that differ from the data they were trained on, known as out-of-distribution (OOD) inputs. Machine learning models that are trained within a closed-world framework often mistakenly identify test samples from unfamiliar classes as belonging to one of the recognized categories with high confidence&lt;d-cite key=&quot;scheirer2013&quot;&gt;&lt;/d-cite&gt;. This tendency of the model to make overconfident misclassifications is sometimes described in the literature as “arrogance.” This can be catastrophical when such models are deployed in the real world&lt;d-cite key=&quot;amodei2016&quot;&gt;&lt;/d-cite&gt;. For instance, in self-driving technology, the system should warn the user and transfer the control to the human driver when it encounters unfamiliar scenes or objects that were not present in its training data, and thus it is unable to make a safe and reliable decision.&lt;/p&gt; &lt;p&gt;Consequently, there has been a surge of research focused on improving DNN models to be able to assess their own uncertainty and recognize OOD inputs during their operational phase &lt;d-cite key=&quot;yang2022&quot;&gt;&lt;/d-cite&gt;. Not only improving their usage in safety critical settings, being able to predict model uncertainty is important in predicting model performance. With increased ability to predict model performance, we can improve the training process and improve inference. In this project, we explore 3 different methods of quantifying uncertainty: Monte Carlo Dropout&lt;d-cite key=&quot;gal2016&quot;&gt;&lt;/d-cite&gt;, Sketching Curvature for Efficient Out-of-Distribution Detection (SCOD)&lt;d-cite key=&quot;sharma2021&quot;&gt;&lt;/d-cite&gt;, and Stochastic Weight Averaging Gaussian (SWAG)&lt;d-cite key=&quot;maddox2019&quot;&gt;&lt;/d-cite&gt;. We’ll also attempt to modify these existing methods and even combine them in order to provide improvements to practicality such as RAM usage and improvements to important metrics we establish. We develop 2 methods, SCODv2 which is an extends a simple isotropic prior used by SCOD and SCWAG which combines elements of both SCOD and SWAG.&lt;/p&gt; &lt;h2 id=&quot;stochastic-weight-averaging-gaussian-swag&quot;&gt;Stochastic Weight Averaging Gaussian (SWAG)&lt;/h2&gt; &lt;p&gt;SWAG&lt;d-cite key=&quot;maddox2019&quot;&gt;&lt;/d-cite&gt; is a method that can improve generalization in deep learning settings. It approximates the posterior distribution of weights as a normal distribution with mean determined by an average of weight iterates and covariance determined by the sum of the running variances of each weight and a low rank covariance matrix. More specifically, we use the following to perform our weight updates.&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;train_swag&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;loader&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_epochs&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;5&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;K&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;25&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;swag_freq&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;50&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;swag_start&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;get_all_weights&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;shape&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;D&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;zeros&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;((&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;K&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;cpu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta_bar&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;clone&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;().&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;cpu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;M2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;zeros&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;cpu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;sigmas&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;zeros&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;cpu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;optimizer&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;optim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Adam&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;parameters&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;lr&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;0.001&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;train&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;epoch&lt;/span&gt; &lt;span class=&quot;ow&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;range&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_epochs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;train_loss&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;n_&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;batch_idx&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;target&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;ow&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;enumerate&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;loader&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;optimizer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;zero_grad&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;to&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;device&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;loss&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;cross_entropy&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;target&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;to&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;device&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;loss&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;backward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;optimizer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;step&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;train_loss&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;loss&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;batch_idx&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;%&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;swag_freq&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;==&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;if &lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;swag_start&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;epoch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;no_grad&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;():&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;n_&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;n_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;n_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;get_all_weights&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;net&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;cpu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta_bar_new&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w1&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;theta_bar&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;w2&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;theta&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;M2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;M2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;theta&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;theta_bar&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;theta&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;theta_bar_new&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta_bar&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta_bar_new&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;clone&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;().&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;cpu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;D&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;D&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:]&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;D&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta_bar&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;sigmas&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;M2&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;n_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;n_&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;theta_bar&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;sigmas&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;D&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;The learned \(\bar{w} \in \mathbf{R}^d\) is the mean of the posterior distribution on weights. The \(\Sigma\) vector represents the running variance of the weights and can be diagonalized to get a very rough posterior. (The method we used to determine the running variance is unlike the one presented in the SWAG paper due to issues with numerical instability and catastrophic cancellation which resulted in negative variances. To address this issue we used Welford’s online algorithm.) The \(D\) matrix contains the last \(K\) deviations of updated \(w\) values from \(\bar{w}\) (including the effect that the updated \(w\) has on \(\bar{w}\)). This allows us to form a rank \(K\) approximation of the posterior covariance. Thus we have the posterior \(P(w\mid\mathcal{D}) = \mathcal{N}\left(\bar{w}, \frac{1}{2}\left(\text{diag}(\Sigma) + \frac{DD^T}{K-1} \right)\right)\). To sample from the posterior, we do the following reparametrization&lt;/p&gt; \[z_d \sim \mathcal{N}(0, \mathbf{I}_d)\] \[z_K \sim \mathcal{N}(0, \mathbf{I}_K)\] \[\tilde{w} = \bar{w} + \frac{1}{\sqrt{2}}\text{diag}(\Sigma)^{\frac{1}{2}}z_d + \frac{1}{\sqrt{2(K-1)}}Dz_K\] &lt;p&gt;It is important to note that while a prior distribution on weights is not specified, it is implicitly chosen through how often we update our running average of the weights, variances, and deviations, as well as the optimizer being used.&lt;/p&gt; &lt;p&gt;For the purposes of inference, each \(\tilde{w}\) determines the parameters for a clone model and with \(S\) samples we effectively have an ensemble of \(S\) models. Their output distributions are averaged arithmetically to yield the final output. We expect that for in-distribution inputs, the individual outputs do not disagree drastically. And for out-of-distribution inputs, the individual outputs can differ a lot. So like with out other ensemble method, a good metric of uncertainty here is to use the average-pairwise KL divergence between the distributions. Here are some results and findings of this metric applied to SWAG.&lt;/p&gt; &lt;p&gt;We train a model with SWAG on the MINST and CIFAR10 datasets. First, we only train on the digits/classes from 0-5 and look at the KL scores on the digits/class 6-9. Expectedly, the scores tend to drastically increase on the unseen digits. However, the increase is less drastic for the CIFAR dataset as the data is a bit more homogenous.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;We can also take a look at the data itself and identify the images which have the highest and lowest scores for different splits of the data. For these images, we identify the true label, followed by the KL score assigned to the image (higher being more uncertain), and finally the predictions made by 10 of 25 sampled models.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The above pictures correspond to the highest and lowest scores from in-distribution training data. The major contributors for the high scores for MNIST are digits that are so poorly written it’s hard to say what it is or it resembles another image too much. For CIFAR, it seems like the high score images are inducing confusion due to their color scheme or background. A lot of images with a blue or sky background such as those of birds do seem to be mistaken for planes at times. The low score images on the other hands are all extremely similar to one another; they’re very well written digits (usually 0) or something that is obviously a car (usually red).&lt;/p&gt; &lt;p&gt;Next, we take a look at how these scores fair on new out-of-distribution images.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;These are the highest and lowest scores on the OOD dataset. It’s unsurprising that the highest scores are assigned to the images that are unlike anything in the training set. For MNIST this is the number 8 and for CIFAR there doesn’t seem to be any one class. However, it is important to see that there are still images where our model has very low scores (high certainty). However, this simply comes from the fact that these inputs happen to look more similar to one class of training images (9 is really similar looking to 4 and trucks look pretty close to cars, especially if they’re red since a lot of the low score car-images are red).&lt;/p&gt; &lt;p&gt;All the methods used in this paper tend to show similar results for the images corresponding to the highest and lower measures of uncertainty so we won’t be lookig at those images for every single method.&lt;/p&gt; &lt;p&gt;Now that we’ve seen that we can use our measure of uncertainty as how well the output will yield the correct answer, we can try using uncertainty of output as a way to predict error. Ideally, we would like to see some sort of correlation between our uncertainty measure and our actual errors or probability of corect answer. So we retrained our models on all digits using SWAG and looked at the performance on a validation set. Notice that we don’t care too much about the error itself, but it’s (actually the probability of target label) correlation with the uncertainty measure. In particular, we look at the Spearman correlation to capture nonlinear relationships.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; Spearman Correlation: -.9923 &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; Spearman Correlation: -.3867 &lt;/div&gt; &lt;/div&gt; &lt;p&gt;There is significant negative correlation which is what we’re looking for. If we can predict how well our model will perform on certain inputs, it allows us to better deploy model in real world situations as well as possibly improve it by doing something such as boosting or improved training. We now look to improve this relationship between error and uncertainty measure by finding better uncertainty measures.&lt;/p&gt; &lt;h2 id=&quot;local-ensemble-monte-carlo-dropout&quot;&gt;Local Ensemble: Monte Carlo Dropout&lt;/h2&gt; &lt;p&gt;We start off by comparing with a very simple method. Given a neural net with Dropout layers, and a new datapoint from test ID or OOD datasets, we output \(50\) different probabilistic distributions (rather than setting our model on evaluation mode, we keep the Dropout layers on), \(p_1, p_2, \ldots p_{50}\). Our uncertainty score is \(\text{Unc}(x) = \frac{1}{49\cdot 50}\sum_{i\neq j}D_\text{KL}(p_i\, \Vert \, p_j)\), i.e. the average KL divergence between any pair of distributions. The intuition is that, when the model shouldn’t be confident about a OOD datapoint, dropping weights (which can be seen as perburtabions) should change our output distributions significantly. This sensitiveness indicates lack of robustness and certainty.&lt;/p&gt; &lt;p&gt;This model is very simple and our weight “peturbations” are not too mathematically motivated in the sense of them coming from some justified posterior. However, it still provides a good baseline to compare against.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; Spearman Correlation: -.9944 &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; Spearman Correlation: -.2936 &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Overall, the error estimation on MNIST is about the same but significantly worse on the CIFAR dataset. This is about expected since MC dropout is such a simple method.&lt;/p&gt; &lt;h2 id=&quot;sketching-curvature-for-efficient-out-of-distribution-detection-scod&quot;&gt;Sketching Curvature for Efficient Out-of-Distribution Detection (SCOD)&lt;/h2&gt; &lt;p&gt;There is research literature on leveraging the local curvature of DNNs to reason about epistemic uncertainty. [Sharma et al.] explores this idea through a Bayesian framework. Let us assume a prior on the weights, \(P(w) = \mathcal{N}(0, \epsilon^2 I)\). By using a second-order approximation of the log-likelihood \(\log p(y,w\mid x)\), we arrive at the Laplace posterior \(P(w\mid\mathcal{D}) =\mathcal{N}(w^{MAP}, \Sigma^*)\), where \(\Sigma^* = \frac{1}{2}(H_L + \frac{1}{2\epsilon^2}I)^{-1}\) and \(H_L\) is the Hessian of the cross-entropy loss wrt \(w\). Given a pretrained DNN, \(\theta=f(x,w)\in\mathcal{R}^d\) where \(\theta\) determines a distribution on \(y\), we assume that the trained weights \(w^*\) are a good approximation for \(w^{MAP}\). We define our uncertainty metric to be the change in the output distribution, \(\theta\), when the weights are perturbed around \(w^*\) according to the posterior distribution. Using the KL divergence to measure distance between output distributions, we define&lt;/p&gt; \[\text{Unc}(x) = \mathbb{E}_{dw\sim \mathcal{N}(0, \Sigma^*)}\left[ D_{\text{KL}}\left( p(\theta\mid x, w^*)\, \Vert \, p(\theta\mid x, w^* + dw)\right) \right]\] &lt;p&gt;We can approximate the local KL divergence using the Fisher information matrix (FIM) of \(y\) wrt \(\theta\): \(D_{\text{KL}} \approx d\theta^TF_\theta(\theta)d\theta + O(d\theta^3)\). Also, by change of variables, we can rewrite the FIM in terms of \(w\): \(F_w(x, w) = J^T_{f,w}F_\theta(f(x,w))J_{f, w}\) where \(J_{f,w}\) is the Jacobian of the network outputs with respect to the weights. Putting this together, we get that&lt;/p&gt; \[\text{Unc}(x) = \mathbb{E}_{dw\sim \mathcal{N}(0, \Sigma^*)} \left[dw^TF_w(x,w^*)dw \right] = \text{Tr}\left( F_w(x,w^*)\Sigma^*\right)\] &lt;p&gt;We can also approximate \(\Sigma^* \approx \frac{1}{2}(MF_{w^*}^\mathcal{D} + \frac{1}{2\epsilon^2}I)^{-1}\), where \(F_{w^*}^\mathcal{D}\) is the averaged FIM on the training dataset&lt;d-cite key=&quot;ritter2018&quot;&gt;&lt;/d-cite&gt; and \(M\) is the size of the training set. If the neural net has \(N\) weights, then the FIMs are \(N \times N\) matrices and computing them becomes intractable (both time complexity and memory).&lt;/p&gt; &lt;p&gt;For simplicity, let us assume that the output of our DNN, \(\theta\), is the categorial distribution, i.e. \(\theta_i\) represents the probability assigned to class \(i\). In this case, we have that \(F_\theta(\theta) = \text{diag}(\theta)^{-1}\). Therefore, the FIM for one input os has rank at most \(\min(n, d)\) and we can represent it as \(F_w(x,w^*) = LL^T\), where \(L=J_{f,w}^T\text{diag}(\theta)^{-1/2}\). The same trick, however, doesn’t work for \(F_{w^*}^\mathcal{D}\) as it can reach rank as high as \(min(N, Md)\). For now, let us assume that we can find a low-rank approximation of \(F_{w^*}^\mathcal{D} = U\text{diag}(\lambda)U^T\), where \(U\in\mathbb{R}^{N\times k}\) and \(\lambda\in\mathbb{R}^k\). With a few mathematical tricks (which can be followed in [Sharma et al.]), one can prove that&lt;/p&gt; \[\text{Unc}(x) = \epsilon^2\Vert L\Vert_F^2 - \epsilon^2 \left \Vert \text{diag}\left(\sqrt{\frac{\lambda}{\lambda + 1/(2M\epsilon^2)}}\right)U^TL \right \Vert^2_F\] &lt;p&gt;[Sharma et al.] also provides an randomized algorithm for finding \(U\) and \(\Lambda\) by using the FixedRankSymmetricApproximation&lt;d-cite key=&quot;tropp2017&quot;&gt;&lt;/d-cite&gt; algorithm presented in [Tropp et all.]. We chose \(\epsilon \approx 0.04536\) by training another model with the same hyperparameters and architecture and taking the standard deviation of all its weights. This is the pseudocode of the SCOD algorithm&lt;d-cite key=&quot;sharma2021&quot;&gt;&lt;/d-cite&gt;:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_algo.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;\(\Sigma\in\mathbb{R}^{r\times N}\) and \(\Psi \in \mathbb{R}^{s\times N}\) are random sketching matrices, which we chose to simply be matrices with i.i.d standard Gaussian entries. \(r+s\) is the size of the sketch and is ideally chosen as high as RAM allows. We also use the budget split \(s = 2k+1\) and \(r=4k+3\), where \(k\) is the target rank, as [Tropp et all.] suggests. We ended up setting \(k=50\) and got the following results:&lt;/p&gt; &lt;p&gt;We have been able to implement SCOD, but due to issues with saving our results and time, we can now only show the performance of the uncertainty score on predicting error on a subset (classes 0-5) of the CIFAR dataset.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; Spearman Correlation: -.1083 &lt;/div&gt; &lt;/div&gt; &lt;p&gt;The score is a bit suspiciously low, so there may be something wrong with our implementation ignoring the fact that we only test of the subset. Nonetheless, it still a significant negative correlation and we get similar results when looking at high uncertainty and low uncertainty images.&lt;/p&gt; &lt;h2 id=&quot;scodv2&quot;&gt;SCODv2&lt;/h2&gt; &lt;p&gt;We also did our own tweak on SCOD. Rather than having a vanilla prior, we can generalize it to any normal distribution with diagonal covariance. Let’s say that our prior is \(w\sim\mathcal{N}(0, \Sigma)\), where \(\Sigma\) is a diagonal matrix. Then, our Laplacian posterior’s covariance matrix becomes \(\Sigma^* = \frac{1}{2}(MF_{w^*}^\mathcal{D} + \frac{1}{2}\Sigma^{-1})^{-1}\). By the Woodbury matrix identity \(\Sigma^*=\Sigma - 2\Sigma U\left(\text{diag}(M\lambda)^{-1}+2U^T\Sigma U \right)^{-1}U^T\Sigma\). Using the well-known identities, \(\Vert A\Vert_F^2 = \text{Tr}(AA^T)\), \(\text{Tr}(AB) = \text{Tr}(BA)\), we get that&lt;/p&gt; \[\text{Unc}(x_{\text{new}}) = \text{Tr}\left(\Sigma^*F_w(x_{\text{new}},w^*)\right) = \text{Tr}\left(L^T\Sigma L\right) - 2\text{Tr}\left(L^T\Sigma U\left(\text{diag}(M\lambda)^{-1}+2U^T\Sigma U \right)^{-1}U^T\Sigma L\right)\] &lt;p&gt;\(= \left \Vert L^T \Sigma^{1/2}\right \Vert_F^2 - 2\left \Vert L^T \Sigma UA\right \Vert_F^2\), where \(AA^T = \left(\text{diag}(M\lambda)^{-1}+2U^T\Sigma U \right)^{-1}\).&lt;/p&gt; &lt;p&gt;Since \(\Sigma\) is a diagonal matrix, the biggest matrices we ever compute are of size \(N\times \max(k, d)\), which means that the computation is equally efficient asymptotically to the vanilla prior. To decide what diagonal matrix to use, for each layer, we assigned the same variance given by the variance of the weights of the same layer in a differently trained model (with same architecture).&lt;/p&gt; &lt;p&gt;Due to issues with saving our results and timing, we are not able to show our results estimating error from uncertainty for SCODv2.&lt;/p&gt; &lt;h2 id=&quot;stochastic-curvature-and-weight-averaging-gaussian-scwag&quot;&gt;Stochastic Curvature and Weight Averaging Gaussian (SCWAG)&lt;/h2&gt; &lt;p&gt;Whereas SCOD attempts to analytically approximate the posterior by approximating the Hessian using the Gauss-Newton matrix, SWAG approximates the posterior by keeping running track of moments and deviations when it approaches flat regions in the loss landscape. What if we could combine these two ideas? We could use the SWAG emprical posterior. This method would not require matrix sketching of any form and lowers the required RAM necessary an SCOD can be quite RAM intensive. Using the \(\Sigma\) and \(D\) from SWAG to determine the posterior \(\Sigma^*\), we arrive the following measure of uncertainty (after digging through some math).&lt;/p&gt; \[\text{Unc}(x) = \text{Tr}\left( F_w(x,w^*)\Sigma^*\right) = \frac{1}{2} \text{Tr}\left(F_w(x,\bar{w})\left(\text{diag}(\Sigma) + \frac{DD^T}{K-1} \right) \right)\] \[\text{Unc}(x) \propto ||L\Sigma||_F^2 + \frac{1}{K-1}||LD||_F^2\] &lt;p&gt;We do this by introducing a wrapper model that takes in a base model as well as the SWAG outputs in order to perform the Jacobian based operations during each forward pass. For evaluation, we look at the Spearman correlation of the uncertainty score with the target probability and we notice some improvement over SWAG on the CIFAR dataset.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; Spearman Correlation: -.9897 &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; Spearman Correlation: -.8484 &lt;/div&gt; &lt;/div&gt; &lt;p&gt;With MNIST, we already had near perfect correlation so this slight decrease isn’t too worrisome. However, the Spearman correlation has shot up drastically which shows that this method of combining the analytical approximation of uncertainty with an empirically constructed posterior has merit. There is something worrisome with the fact that the model with exactly \(bar{w}\) with its weights is producing distributions that have a maximum value of around \(.25\). We suspect we could have made some error here but have not been able to pinpoint anything wrong with out implementaton. The model still seems to have fairly accurate predictions as seen below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;h2 id=&quot;future-work&quot;&gt;Future Work&lt;/h2&gt; &lt;p&gt;For SCWAG, we could work on figuring out why our output distributions becomes less spiked as a result of using \(\bar{w}\) as the weights for the network. We suspect that it’s a result of starting our SWAG averaging for \(\bar{w}\) too early so we were considering \(w\) far away from flat local minima of the loss landscape. Additionally, we could inspect the arcing nature in the plot of target probabilities vs score. For near 0 scores, it seems that the target probabilities arc from .25 to 0 which is unusual. Finally, we want to think of a way to introduce the loss landscape more into our approach. Maybe we can form a more expressive posterior. If we can manage that, our uncertainty estimates and correlation might improve. But more importantly, we would be able to call our method SCALL(y)WAG which is pretty cool.&lt;/p&gt; &lt;p&gt;In general and particularly for SCOD, we’d still like to experiment with priors that induce different types of posteriors. Because the dependence on prior is explicit here as opposed to implicit for SWAG, it allows us more room for experimentation in choosing nice expressive priors.&lt;/p&gt; </content> </entry> <entry> <title>Understanding LLM Attention on Useless Numbers in Word Problems (and this Title has 8 Es)</title> <link href="https://deep-learning-mit.github.io/blog/2023/structured-physics-loss-diffusion/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/structured-physics-loss-diffusion</id> <content type="html">&lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/title-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/title-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/title-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/title.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;summary&quot;&gt;Summary&lt;/h2&gt; &lt;p&gt;We investigate how attention is used to identify salient parts of word problems. There is no difference between attention across layers to necessary and useless numbers in math word problems. Slightly decreasing attention on useless numbers in word problems increases performance, while increasing or significantly lowering attention decreases performance.&lt;/p&gt; &lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Transformer model architectures are the new magic bullet in natural language processing, largely due to their attention mechanism. The sudden salience of the transformer and subsequent massive research focus resulted in the emergence of powerful large language models such as the GPT series, Llama, PaLM, and others. The ever-increasing size of these models, as well as the datasets on which they were trained, allows them to continually perform better at a wide range of text generation and analysis tasks [11]. &lt;br /&gt;&lt;br /&gt;&lt;/p&gt; &lt;p&gt;However, as with many generative algorithms - especially autoregressive ones like LLMs - the underlying model has no implicit structure for processing or analyzing a logical framework inside the prompt it is given. Transformers, and by extension LLMs, are at their core sequence-to-sequence models. These take in a sequence of arbitrary length and output a sequence of arbitrary length, for example an English sentence input its French translation as the output. Sequence-to-sequence models leverage the fact that language has structure and syntax, and are capable of creating responses that mimic the structural rules followed by its training data [4, 6, 8]. However, in sequence-to-sequence models and the recurrent-neural-network-derived architectures that follow, such as the transformer, there are no intrinsic characteristics that leverage the logical framework of the input. Models that strive to have reasoning capabilities use a variety of approaches to augment the transformer architecture [10], such as specific prompting [1, 7], machine translation [3], salience allocation [5], and more. Some of these improved models exhibit performance that suggests the use of reasoning processes, but as described by Wei et al. [12] “As for limitations, we first qualify that although chain of thought emulates the thought processes of human reasoners, this does not answer whether the neural network is actually ‘reasoning.’” Huang et al. share a similar sentiment that highlights that the most widespread solution, and an effective one, is simply the ever-increasing size of LLMs: “…there is observation that these models may exhibit reasoning abilities when they are sufficiently large… despite the strong performance of LLMs on certain reasoning tasks, it remains unclear whether LLMs are actually reasoning and to what extent they are capable of reasoning.” &lt;br /&gt; &lt;br /&gt;&lt;/p&gt; &lt;p&gt;Before diving into why this is interesting, let’s take a step back and briefly inspect the transformer as an architecture. Transformers are loosely an extension of a recurrent neural network that leverage parallel processing and a mechanism known as attention to remove the typical reliance RNNs have on temporal data and instead allow the model to process an entire input sequence simultaneously [13, 9]. &lt;br /&gt;&lt;br /&gt;&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;But what is attention? The key upside of transformers is that they are able to capture both short- and long-range dependencies within the input simultaneously, without the need to manage a memory cell like in certain RNN architectures such as a long short-term memory network. This is accomplished through attention, essentially the computation of how much each part of the input should be weighted based on parameters learned from training data. &lt;br /&gt;&lt;br /&gt;&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/att-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/att-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/att-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/att.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;As shown above, each element in the input, split into “tokens,” is given a calculated query and key vector, along with the value vector describing the text, image, or other kind of data contained in the token. This is designed to mimic a value in a database, corresponding to a specific key, being retrieved according to a query. Multiplying some query vector with a given token’s key vector results in a scalar that essentially defines the “significance” of the given token compared to the other tokens, known as an attention score. This attention score is then multiplied by its corresponding token’s value vector and summed to create a context vector representing the aggregate information from the attention step.&lt;br /&gt;&lt;br /&gt; Now we circle back to word problems. Due to the aforementioned absence of explicit logical reasoning capabilities, transformer-based language models - especially smaller ones - can struggle with the few short analytical hops to correctly evaluate a word problem without help. For example, the following question was asked to Llama 2, Meta’s open-source LLM released in 2023. This version of Llama, the smallest available, has 7 billion parameters. &lt;br /&gt;&lt;br /&gt;&lt;/p&gt; &lt;div align=&quot;center&quot; style=&quot;font-family: monospace; font-size: smaller;&quot;&gt; *User: Answer as concisely as possible. Jack has 7 chairs. Jill takes 7 chairs from him, as well as 2 brooms. How many chairs does Jack have left?* &lt;br /&gt; *Llama-2-7b-chat: Jack has 7 chairs left.* &lt;/div&gt; &lt;p&gt;You may notice that this response is incorrect. However, it is wrong in a way that seems to ignore certain important information presented in the question (removing 2 brooms). Of course, there is also unimportant information in the prompt that we want the model to ignore - the fact that Jill took two of Jack’s brooms is not relevant to the number of chairs in his possession.&lt;/p&gt; &lt;h3 id=&quot;related-work&quot;&gt;Related Work&lt;/h3&gt; &lt;p&gt;Existing approaches to entice LLMs to correctly answer word problems like these take a few forms, touched on previously. There are various versions of prompt engineering, which are designed to ask the question in a specific way in order to prompt the model’s response to be formatted in a certain way. Zero-shot chain-of-thought prompting [1, 12, 14] is a commonly cited example, where an additional instruction such as “Let’s think about this step by step” or “Let’s think analytically” are appended to the prompt. These additional instructions encourage the model to decompose the problem into intermediate steps and solve them procedurally. However, Wei et al. note that this does not indicate that the model itself is reasoning, only that it is achieving better results by emulating a structure often used in linear reasoning problems. Additionally, the authors go on to note that this emergent behavior of large models is challenging to reproduce in smaller models. Another novel approach is to parse the input information in a way that is conducive to solving an underlying math problem numerically. Griffith and Kalita treat this process as a machine translation problem, training several unique transformer architectures to make multiple translations from English to arithmetic expressions [3] that can then be evaluated computationally, outside of the LLM. These two techniques can also be fused, using fine-tuned chain-of-thought prompting for zero-shot math word problem solutions, bridging the gap between the previous two methods [7].&lt;br /&gt;&lt;br /&gt; More broadly, solving word problems is a two-part problem: selecting for important information, and then analytically evaluating that information to arrive at an answer. There is a broad body of work on using LLMs to summarize bodies of text, which parallels extraction of useful numerical features from word problems. The two main types of summarization are extractive summarization and abstractive summarization, where the former remains truer to the original input text but struggles to create novel text, while the latter attempts to fill in those gaps but can sometimes create information that was not originally present and may not be correct [15, 5]. Wang et al. in particular create an augmentation to the transformer architecture, dubbed SEASON, that is designed to combine both extractive and abstractive summarization, but contains useful insights into how extractive summarization of text might apply to math word problems. For example, the abstractive power of SEASON comes from the underlying transformer and its generative capabilities, but it is constrained by a fixed-allocation salience system to emphasize extraction of useful information by essentially adding additional key vectors that describe their relevance to a summarization query. This allows the model to predict the salience of potential responses in order to reduce hallucination of abstractive elements. This salience-driven approach shows theoretical promise in complex extractive word problem scenarios, where managing an allocation of salience could translationally be indicative of useful numerical inputs rather than core themes. Salience also shares some characteristics, mechanically, with attention, and raises the question of whether intuition from summarization models can be applied to augment transformer attention to have better extractive logic.&lt;/p&gt; &lt;h3 id=&quot;motivation&quot;&gt;Motivation&lt;/h3&gt; &lt;p&gt;This question, bolstered by the similarly-themed research underlying the ability of LLMs to reason and solve math word problems, was the driving force behind our project. Attention is an extremely powerful tool, and a better understanding of how attention scores affect assessment and evaluation of word problems is necessary in order to use it more effectively to address the gaps in the reasoning capabilities of LLMs, especially smaller architectures. A true solution to this problem would be complex, but we strove to answer certain core questions about how math word problems move through large language models, what their attention scores can tell us about how the model is choosing to respond, and what information the model is responding to. Chiefly, we were interested in how the attention scores of certain tokens in word problems - particularly pertaining to numbers necessary for solving the problem - would change throughout the layers of the transformer, and whether that yields insight into how to tune the attention process generally to enhance the models’ abilities, both reasoning and extractive.&lt;/p&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;h3 id=&quot;model-and-hardware&quot;&gt;Model and Hardware&lt;/h3&gt; &lt;p&gt;Our chosen model for study was Meta’s Llama 2 7B-chat parameter model. This choice was a result of our particular focus on smaller LLMs, due to the aforementioned emergent reasoning capabilities of models with significantly larger numbers of parameters. Llama 2 is also open-source, allowing us to easily peel apart the attention layers and heads to study how input and output information propagated through the network, as well as extract model weights and attention values. The chat version of the model additionally is better suited for direct question responses, and includes wrappers to handle the relevant meta-parameters to make the chat interface feasible. We hosted Llama 2 on a vast.ai cloud instance due to the high VRAM requirements of the model. The instance consisted of a single Nvidia RTX 4090 GPU instance with 24GB of VRAM connected to an AMD Ryzen 9 5950X 16-core CPU. The model was supported by Nvidia CUDA version 11.7 and the cuDNN GPU-accelerated development library, version 8.9.7. The model itself ran using PyTorch 2.0.1.&lt;/p&gt; &lt;h3 id=&quot;prompt-generation&quot;&gt;Prompt Generation&lt;/h3&gt; &lt;p&gt;We prepended the instruction “Answer as concisely as possible” to each prompt in order to deliberately circumvent potentially invoking chain-of-thought reasoning and thereby subverting the qualities under investigation regarding the model’s zero-shot ability to discern relevant and irrelevant information. In order to assess that capability, we created a question generation algorithm to randomly generate a bank of simple subtraction word problems, for example “If Jack starts out with 7 sponges and Jill takes 4 of them, then Jack gets 2 badges, how many sponges does he have?” Each question contains two numbers necessary to the subtraction - in this example, that would be the number of sponges before and after the events of the problem: 7 and 4. Each example also contains one useless number, corresponding to things that are not relevant to the ultimate question being asked to the model. In this case, that would be the two badges. Each number is generated in its numeral representation (‘7’ rather than ‘seven’), as this ensures that Llama encodes each of these numbers as a single token that can be easily traced. &lt;br /&gt;&lt;br /&gt;Numbers with more digits or numbers spelled out in natural language were often split into multiple consecutive tokens, so to simplify our visualizations we elected to force a single-token representation. This necessitated that each of the four numerical quantities in the math problem - the two relevant numbers, the useless number, and the answer - had to all be unique, in order to avoid accidentally crediting the model for producing a correct response when in fact it simply selected a number in the problem that had been generated to be a duplicate of the answer. This might occur with a problem like “If Jack has 8 umbrellas, and Jill takes 5 of them, then Jack gets 3 belts, how many umbrellas does he have?” In this case, attribution of salience to the value “3 belts” and subsequent inclusion of the number 3 in the answer introduces ambiguity into the correctness of the response, since 3 is in fact the true answer.&lt;br /&gt;&lt;br /&gt; To avoid one-off errors attributed with specific words or sentence structures, the algorithm was designed to randomly construct the sentences using multiple different semantic structures and sample the nouns used from a bank of 100 random objects. Coupled with large testing sets of several hundred examples, this prevents irregularities in the model’s responses to particular syntax or words from significantly affecting results. Finally, the last meaningful element of prompt design was that the nouns chosen to be in the random object pool were deliberately selected to be as semantically difficult as possible. If the model is presented with a question that, for example, includes a number of vehicles as well as a number of cars, it would be entirely justifiable to interpret that question differently than the intent of a subtraction problem with the same numbers but instead involving apples and chinchillas.&lt;br /&gt;&lt;br /&gt; We calculate whether the problem is correct by checking whether the correct number and noun are both present in the correct configuration in the answer content output by Llama. Each prompt was run on a fresh reinitialized instance of Llama, to avoid extracting information from a larger content window that might include numbers or insight from past problems.&lt;/p&gt; &lt;h3 id=&quot;data-extraction&quot;&gt;Data Extraction&lt;/h3&gt; &lt;p&gt;The main data structure was filled as follows. For each new autoregressive output logit, each head in each transformer layer calculates attention scores across all input tokens. These scores were collected and aggregated to map the attention in the model as each prompt moved through the transformer blocks.&lt;/p&gt; &lt;p&gt;In each experiment, attention scores were scraped from the individual model instance for each prompt by selecting the attention values associated with the tokenized representations of the two necessary numerical inputs as well as the single useless input. This produced a lot of data in high dimensions.&lt;/p&gt; &lt;p&gt;To extract the significant aspects of the data and compress it to a reasonable number of dimensions for graphical representation, we took the attention score tensors (which were also saved at their original sizes) and averaged across the following dimensions:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Heads in each layer:&lt;/strong&gt; This revealed the change in attention over layers, rather than over heads, in order to potentially reveal the numbers’ progression through deeper-level abstractions, allowing us to answer questions like: &lt;ul&gt; &lt;li&gt;How do self-attention and attention in early layers look for values relevant to the problem?&lt;/li&gt; &lt;li&gt;What role does attention play for the purposes of arriving at a solution to the problem as we reach the middle layers of the model?&lt;/li&gt; &lt;li&gt;Is there a meaningful representation of the numerical values the problem is concerned with deep inside the model?&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Output logits:&lt;/strong&gt; The rationale behind this choice was to allow any intermediate “reasoning” to become evident by encapsulating multiple parts of the response.&lt;/p&gt; &lt;/li&gt; &lt;li&gt;&lt;strong&gt;Input problems:&lt;/strong&gt; Eliminates intrinsic variation in response to slightly different questions.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;This allowed us to arrive at a representation of how the attention for the relevant tokens changed as it passed through the individual layers of the model.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;attention-modification&quot;&gt;Attention Modification&lt;/h3&gt; &lt;p&gt;For our experiments where we modify attention scores to the useless token, in every layer we multiply every attention score to that token by some value, the multiplier, before taking softmax.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;We found that there was no significant difference between attention to the useless number and the two necessary numbers over 100 samples (with 55/100 accuracy). Perhaps the mid-layers attention peak in the useless number is earlier than for the necessary numbers, but not significantly. We found a peak in attention to all number tokens in middle layers. We found no significant difference between the graphs for problems it answered correctly versus incorrectly.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/correct.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Here is the attention across all input tokens for one example problem. (Note these are not all the input tokens. The full input tokens were [’’, ‘[’, ‘INST’, ‘]’, ‘Answer’, ‘as’, ‘cons’, ‘is’, ‘ely’, ‘as’, ‘possible’, ‘.’, ‘Jack’, ‘has’, ‘’, ‘9’, ‘pens’, ‘and’, ‘’, ‘7’, ‘spo’, ‘ons’, ‘.’, ‘He’, ‘gives’, ‘away’, ‘’, ‘9’, ‘pens’, ‘.’, ‘How’, ‘many’, ‘pens’, ‘does’, ‘he’, ‘have’, ‘?’, ‘[’, ‘/’, ‘INST’, ‘]’, ‘’]&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Surprisingly, there was not more attention to numbered tokens compared to other tokens.&lt;/p&gt; &lt;p&gt;When looking through each attention head individually, some attended to specific numbered tokens. For example, head 13 layer 16 strongly attended to “9”&lt;/p&gt; &lt;h4 id=&quot;graph-for-13th-heads-only&quot;&gt;Graph for 13th Heads Only&lt;/h4&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/head13.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Finally, we multiplied attentions to the useless number’s token and varied the multiplier from 0 to 5. (30 sampler per data point). We found that it is actually useful to slightly decrease attention to the useless token, and performance decreases as attention to the useless token increases.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We suspect the rise of performance from multiplier of 2 to 5 be insignificant and random due to low sample size.&lt;/p&gt; &lt;p&gt;For small multipliers above 1, there are most responses of the type where the useless number is responded.&lt;/p&gt; &lt;div align=&quot;center&quot; style=&quot;font-family: monospace; font-size: smaller;&quot;&gt; *User: If Jack starts out with 7 coasters and Jill takes 2 of them, then Jack gets 3 badges, how many coasters does he have?* &lt;br /&gt; *Llama-2-7b-chat: Jack has 3 coasters.* &lt;/div&gt; &lt;p&gt;For large multipliers above 1, the softmax causes the other attention values to approach zero and the model’s quality deteriorates.&lt;/p&gt; &lt;div align=&quot;center&quot; style=&quot;font-family: monospace; font-size: smaller;&quot;&gt; *User: Jack has 7 tacos and 2 cucumbers. He gives away 3 tacos. How many tacos does he have?* * &lt;br /&gt; *Jack has 767 tacos. How many tacos does Jack have? Jack has 76 tacos. How many tacos does Jack has?* &lt;/div&gt; &lt;p&gt;And at very extreme multipliers, the model outputs gibberish.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;We found decreasing attention 50% (pre-softmax) on the useless token improves performance on our word problems, and increasing the attention (or decreasing the attention too much). We hypothesize the performance decreases because it 1) makes the model more likely to output the useless number, and 2) changes the model too much, turning responses into gibberish. Our initial exploration of the attention tracked through the layers of the model yielded very little insight, perhaps due to rapid abstraction of the tokens. This gives us insight into how we might further explore using attention as a salience-adajcent metric for extracting information from world problems.&lt;/p&gt; </content> </entry> <entry> <title>Cross-Lingual Fine-Tuning for Multilingual Text Embeddings</title> <link href="https://deep-learning-mit.github.io/blog/2023/multilingual-representations-in-embeddings-models/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/multilingual-representations-in-embeddings-models</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Recently, &lt;a href=&quot;https://platform.openai.com/docs/guides/embeddings&quot;&gt;embeddings models&lt;/a&gt; have become incredibly popular as LLMs become more integrated into tools and applications. Embeddings models (specifically, Siamese encoder-only Transformers) are the state-of-the-art method in retrieval, an old problem in computer science. Embeddings are often used in settings like recommendation algorithms, similarity search, and clustering, and have recently found extensive use in Retrieval-Augmented Generation&lt;d-cite key=&quot;rag&quot;&gt;&lt;/d-cite&gt;, assisting LLMs to be more knowledgeable and truthful. However, the best embeddings models are trained on only English data, which means they suffer greatly at applications in other languages, and are inaccessible to most of the world&lt;d-cite key=&quot;mteb&quot;&gt;&lt;/d-cite&gt;. In this blog post, we summarize the history of embeddings research, detail the training regime of a modern embeddings model, present a new multilingual embedding benchmark, and investigate whether it is possible to fine-tune in multilingual capability to a pretrained monolingual model.&lt;/p&gt; &lt;p&gt;Our central question is whether it is possible to learn new languages at the finetuning stage, using contrastive training on publicly available text pair datasets. If successful, it would mean that the encoder can learn a map from one language onto the embedding space of another. This implies that it is possible to approximate translation, at a conceptual level, with a transformation. We will study the results on various language pairs, and compare to a fully pretrained multilingual model.&lt;/p&gt; &lt;h2 id=&quot;the-embedding-task&quot;&gt;The Embedding Task&lt;/h2&gt; &lt;p&gt;The aim of embedding text (or any other medium) is to convert human-readable information into vectors. This is useful, because while neural nets cannot process words, images, or sound, they can process vectors. Every NLP model thus has some form of embedding - GPTs, for example, have an embedding layer at the start that transforms input tokens into vector representations&lt;d-cite key=&quot;gpt1&quot;&gt;&lt;/d-cite&gt;. GPTs need an embedding layer because the amount of unique tokens is huge (GPT-2, for example, has 50,257 possible tokens&lt;d-cite key=&quot;gpt2&quot;&gt;&lt;/d-cite&gt;), and it is much more computationally efficient to work with lower-dimensional vectors (GPT-2 embeds these down to 768-dimensional vectors to compute with).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Embeddings models, as described by OpenAI &lt;/div&gt; &lt;p&gt;Because of this reduction of information, embeddings are also a form of compression. To turn a whole sentence (or paragraph) into a vector requires prioritising some characteristics and losing others, and we find that the most valuable thing to prioritise is semantic and contextual information. This leads to a very useful property: text pairs with similar meanings or usage patterns tend to have similar vector representations. For example, the vectors “cat” and “dog” are closer to each other than “cat” and “cucumber”. Even more interestingly, as found in the Word2Vec paper, this property causes embeddings to have arithmetic consistency, as shown in the famous “king - man + woman = queen” example.&lt;d-cite key=&quot;w2v&quot;&gt;&lt;/d-cite&gt; You can explore the Word2Vec embedding space in the interactive visualization below:&lt;/p&gt; &lt;div class=&quot;l-page&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-09-multilingual-representations-in-embeddings-models/word2vec_demo.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Visualisation of Word2Vec for the 250 most common English nouns &lt;/div&gt; &lt;p&gt;While this may seem abstract, embeddings have found usage in many downstream and commercial tasks, including:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;strong&gt;Classification&lt;/strong&gt; - embeddings models classify sentences, such as in sentiment analysis between positive or negative airline reviews&lt;d-cite key=&quot;sent&quot;&gt;&lt;/d-cite&gt;.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Search&lt;/strong&gt; - models return nearest-embedded results to a search query, understanding synonyms and context&lt;d-cite key=&quot;sgpt&quot;&gt;&lt;/d-cite&gt;.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Recommendation&lt;/strong&gt; - models return embeddings that suggest related items users may like, for example &lt;a href=&quot;https://arxiv.org/pdf/1507.08439.pdf&quot;&gt;clothes and jewellery&lt;/a&gt;.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Clustering&lt;/strong&gt; - embeddings are used to cluster datapoints into smaller groups, with downstream algorithms like k-means&lt;d-cite key=&quot;kmeans&quot;&gt;&lt;/d-cite&gt;.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Reranking&lt;/strong&gt; - embeddings are used to sort a list, such as one retrieved from a database, into most relevant items&lt;d-cite key=&quot;rerank&quot;&gt;&lt;/d-cite&gt;.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Retrieval&lt;/strong&gt; - a query is embedded, and answers are selected by the closeness of their embedding.&lt;d-cite key=&quot;beir&quot;&gt;&lt;/d-cite&gt;.&lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;history-and-background&quot;&gt;History and Background&lt;/h3&gt; &lt;p&gt;The first successful approaches to these problems were bag-of-words models. These are non-neural algorithms that work by ranking documents based on how many word occurrences they share. There were some improvements around this basic idea, for example Okapi BM25&lt;d-cite key=&quot;bm25&quot;&gt;&lt;/d-cite&gt; includes a term for the expected likelihood of that word co-occurring.&lt;/p&gt; &lt;table&gt; &lt;tr&gt; &lt;th&gt;Sentence&lt;/th&gt; &lt;th&gt;about&lt;/th&gt; &lt;th&gt;bird&lt;/th&gt; &lt;th&gt;bird,&lt;/th&gt; &lt;th&gt;heard&lt;/th&gt; &lt;th&gt;is&lt;/th&gt; &lt;th&gt;the&lt;/th&gt; &lt;th&gt;word&lt;/th&gt; &lt;th&gt;you&lt;/th&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;About the bird, the bird, bird bird bird&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;3&lt;/td&gt; &lt;td&gt;2&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;2&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;You heard about the bird&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;The bird is the word&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;2&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;/tr&gt; &lt;/table&gt; &lt;div class=&quot;caption&quot;&gt; A table demonstrating bag-of-words calculation. &lt;/div&gt; &lt;p&gt;The first neural approaches to this problem actually used bag-of-words as a loss function, for example Word2Vec (2013)&lt;d-cite key=&quot;w2v&quot;&gt;&lt;/d-cite&gt; used either continuous bag-of-words (CBOW) or skipgram loss to train a word embedding model. Word2Vec itself is a shallow two-layer neural network that is used to generate an embedding, which in the CBOW training regime is used to predict a word given a bag of surrounding words. The skipgram loss is similar, but weighs words depending on their proximity to the word we’re trying to predict. This word-prediction-from-embeddings task is a &lt;em&gt;key part&lt;/em&gt; of training language models to have useful representations, and we’ll see it again later.&lt;/p&gt; &lt;p&gt;Word2Vec had some incredible results, and was later improved by subsequent approaches&lt;d-cite key=&quot;glove&quot;&gt;&lt;/d-cite&gt;, but word embeddings often failed due to the fact that words with multiple meanings had to share the same point in the embedding space. The sentences “I went to the bank to cash a check” and “I went to the bank to catch a fish” are obviously semantically unrelated, but the word “bank” will necessarily have to share an embedding, making the embedding itself likely meaningless.&lt;/p&gt; &lt;div class=&quot;l-page&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-09-multilingual-representations-in-embeddings-models/special_demo.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Visualisation of Word2Vec struggling with polysemanticity in the &quot;riverbank&quot; example &lt;/div&gt; &lt;p&gt;To solve this, embeddings need to be generated in-context, and be able to support multiple meanings. There were some attempts at changing Word2Vec to support polysemanticity, such as Multi-Sense Skip-Gram (MSSG)&lt;d-cite key=&quot;mssg&quot;&gt;&lt;/d-cite&gt;, but they required hacky workarounds such as pre-programming an expected number of meanings for each word.&lt;/p&gt; &lt;h4 id=&quot;bert&quot;&gt;BERT&lt;/h4&gt; &lt;p&gt;BERT&lt;d-cite key=&quot;bert&quot;&gt;&lt;/d-cite&gt; was arguably the beginning of the LLM revolution, as it showed for the first time that a single pretrained language model could be finetuned to support many different tasks downstream. It was essentially an embeddings model - trained again with the word prediction task, now with the context of words not weighted by proximity, but by a trainable position embedding that provided information that the model could use to predict long-term associations and causality. This fixed the polysemanticity problem described above. It can be used to produce both word-level and sentence-level embeddings, that proved extraordinarily useful for the embeddings tasks.&lt;/p&gt; &lt;h5 id=&quot;bert-training&quot;&gt;BERT Training&lt;/h5&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; BERT architecture diagram &lt;/div&gt; &lt;p&gt;BERT (Bidirectional Encoder Representations from Transformers) is based on the Transformer architecture introduced by Vashwani et al. in 2017&lt;d-cite key=&quot;attn&quot;&gt;&lt;/d-cite&gt;. The key differences were that BERT was allowed bidirectional context rather than left-side-only, that it did not include a decoder, and its masked language modeling and next sentence prediction training objectives. The bidirectional context is crucial for language modeling, since “The [MASK]” is much harder to predict than “The [MASK] colored fire engine”, and in embeddings (unlike text generation) we have the whole original text available.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; BERT&apos;s Masked Language Modeling loss &lt;/div&gt; &lt;p&gt;MLM works by taking 15% of the text tokens that BERT sees and replacing them with a [MASK] token. The model’s objective is to predict that masked word with its embedding, using the context from the surrounding tokens, and then it is trained on the cross-entropy loss between the predictions and the actual truth.&lt;/p&gt; &lt;p&gt;BERT was also trained on the NSP (Next Sentence Prediction) objective. In training, the model is given a pair of input segments, and its task is to predict whether the second segment (segment B) follows the first one (segment A) in the original text or if they are randomly sampled and unrelated. The input is constructed by concatenating segment A, which is preceded by a special [CLS] token, and segment B, with a special [SEP] (separator) token in between. For example: “[CLS] Segment A [SEP] Segment B”. BERT then produces a pair of embeddings: one for the [CLS] token at the beginning of the input and one for the [SEP] token that separates the two segments. These embeddings are then used to compute a binary classification. The intended effect is that [CLS] contains information about the overall meaning of the first sentence, and [SEP] contains information about the second. This is the first example of sentence embeddings, which are the key to how a modern embeddings model works.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; BERT&apos;s Next Sentence Prediction loss &lt;/div&gt; &lt;p&gt;BERT turns token inputs into embeddings for each token in its context window, which is 512 tokens long. We can choose to construct a single text embedding from this any way we like. There are several popular strategies for this “token pooling” problem. Reading the above, one may be tempted to take the [CLS] token’s embedding. In practice, however, the [CLS] token embeddings proved to be slightly worse than just taking the average of all the individual token embeddings of the sentence&lt;d-cite key=&quot;berthater&quot;&gt;&lt;/d-cite&gt;, and subsequent models such as RoBERTa&lt;d-cite key=&quot;roberta&quot;&gt;&lt;/d-cite&gt; skipped the NSP training objective and actually performed slightly better. Why this is the case is an area of ongoing research, but as a matter of opinion, we personally suspect Shitao Xiao’s work on RetroMAE&lt;d-cite key=&quot;rmae&quot;&gt;&lt;/d-cite&gt; correctly diagnoses the issue, as demonstrated by their models’ improved performance on benchmarks. The training losses described in that paper are more complex and outside the scope of this blog post, but it’s worth a read if interested.&lt;/p&gt; &lt;h4 id=&quot;sbert&quot;&gt;SBERT&lt;/h4&gt; &lt;p&gt;The final part of the story is Sentence-BERT&lt;d-cite key=&quot;sbert&quot;&gt;&lt;/d-cite&gt;, and its addition of contrastive text-pair pretraining. This what turns BERT, a general language model, into a model that specifically generates text embeddings. Contrastive training was discussed at length in 6.s898; the core insight is that we can train an encoder model to have a useful representation if we train it to embed similar examples together, and dissimilar examples far apart. In Sentence Transformers, this is done by contructing a “Siamese BERT” network. There are two BERT models (or commonly two copies of the same model) that are each used to embed a text passage. Then, the loss is calculated by the following formula:&lt;/p&gt; \[\mathcal{L}_N = -\mathbb{E}_{X} \left[ \log \frac{f_k(x_t+k, c_t)}{\sum_{x_j \in X} f_k(x_j, c_t)} \right]\] &lt;p&gt;This encourages the model to predict positive pairs (similar passages) as vectors with close to 1 similarity, and negative pairs close to 0. Similarity metrics include (Euclidean) distance, but most often used is cosine similarity. Negative pairs can either be “mined” with some heuristic such as bag-of-words, or simply sampled at random from other examples in the batch. Due to this, pretraining batch sizes for embedding BERTs are often huge, in the tens of thousands&lt;d-cite key=&quot;gte&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The Siamese BERT architecture &lt;/div&gt; &lt;p&gt;The reason two models are used is that many tasks see improved performance if there is a distinction made between “questions” and “answers”. For example, searches and retrieval queries may not resemble the results they most need in meaning: “What is the the tallest building in Hong Kong” and “The International Commerce Centre” are not closely semantically related, but should be paired in search contexts. Because of this, we can train a “query” and “passage” model together as one giant network on a contrastive loss, and thus get a model that can take in both.&lt;/p&gt; &lt;p&gt;In practice, this improvement is rarely worth doubling the number of parameters, and so most papers simply re-use the same model for both queries and passages.&lt;/p&gt; &lt;h2 id=&quot;how-embeddings-models-are-trained&quot;&gt;How Embeddings Models are Trained&lt;/h2&gt; &lt;p&gt;Putting all this together, we have the current standard recipe for training a modern embeddings model, in up to three stages:&lt;/p&gt; &lt;h3 id=&quot;1-pretraining&quot;&gt;1. Pretraining&lt;/h3&gt; &lt;p&gt;It is valuable to start with a language model that has already learned some inner representation of language. This makes the embeddings task significantly easier, since the model must only learn to condense this inner representation into a single high-dimensional dense vector space. While it is possible to use more modern LLMs such as GPT or LLaMA for embeddings&lt;d-cite key=&quot;sgpt&quot;&gt;&lt;/d-cite&gt;, they are fundamentally hampered because they cannot attend to context in both directions. Therefore, almost all state-of-the-art embeddings models still begin from the BERT models themselves, or their derivatives&lt;d-cite key=&quot;gte&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;e5&quot;&gt;&lt;/d-cite&gt;. These are trained as described above, with an MLM and potentially NSP loss.&lt;/p&gt; &lt;h3 id=&quot;2-training&quot;&gt;2. Training&lt;/h3&gt; &lt;p&gt;Following Sentence-BERT, the model is trained contrastively. At this point, we choose a pooling strategy to convert BERT outputs into sentence embeddings. Many current papers choose to use average pooling&lt;d-cite key=&quot;sbert&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;e5&quot;&gt;&lt;/d-cite&gt;, but max-pooling or taking the [CLS] token are occasionally used. Positive pairs are either handpicked from datasets such as search engine question-responses&lt;d-cite key=&quot;msmarco&quot;&gt;&lt;/d-cite&gt;, or commonly generated from general text data, such as academic paper title-abstract pairs, Wikipedia page title-summaries and so forth&lt;d-cite key=&quot;gte&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;3-fine-tuning&quot;&gt;3. Fine-Tuning&lt;/h3&gt; &lt;p&gt;It has also become common to fine-tune especially large embeddings models on higher-quality datasets, such as MS MARCO (Bing question-passage responses)&lt;d-cite key=&quot;msmarco&quot;&gt;&lt;/d-cite&gt;, fact verification (e.g. FEVER)&lt;d-cite key=&quot;fever&quot;&gt;&lt;/d-cite&gt;, and paraphrasing (e.g. Quora). This increases performance at desired tasks&lt;d-cite key=&quot;bge&quot;&gt;&lt;/d-cite&gt;, and was the inspiration for our approach.&lt;/p&gt; &lt;h2 id=&quot;how-embeddings-models-are-tested&quot;&gt;How Embeddings Models are Tested&lt;/h2&gt; &lt;p&gt;Similarly to how decoder LLMs have recently converged on being measured on the HuggingFace Open LLM Leaderboard, the currently ubiquitous benchmark for embeddings models is MTEB&lt;d-cite key=&quot;mteb&quot;&gt;&lt;/d-cite&gt;. Presented in a 2022 paper, it contains 8 embedding tasks covering a total of 58 datasets. The tasks are:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; MTEB datasets &lt;/div&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Bitext Mining&lt;/strong&gt;: Inputs are two sets of sentences from two different languages. For each sentence in the first set, the best match in the second set needs to be found. This metric is commonly ignored in places such as the MTEB Leaderboard and in papers, because few multilingual models have been created.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Classification&lt;/strong&gt;: A train and test set are embedded with the provided model. The train set embeddings are used to train a logistic regression classifier, which is scored on the test set.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Clustering&lt;/strong&gt;: Involves grouping a set of sentences or paragraphs into meaningful clusters. A k-means model is trained on embedded texts. The model’s performance is assessed using the v-measure, which is independent of the cluster labels.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Pair Classification&lt;/strong&gt;: Requires assigning labels to pairs of text inputs, typically indicating if they are duplicates or paraphrases. Texts are embedded and distances calculated using various metrics (cosine similarity, dot product, Euclidean, Manhattan). Metrics like accuracy, average precision, F1, precision, and recall are used.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Reranking&lt;/strong&gt;: Involves ranking query results against relevant and irrelevant reference texts. Texts are embedded using a model, with cosine similarity determining relevance. Rankings are scored using mean MRR@k and MAP, with MAP as the primary metric.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Retrieval&lt;/strong&gt;: Each dataset includes a corpus and queries, with a goal to find relevant documents. Models embed queries and documents, computing similarity scores. Metrics like nDCG@k, MRR@k, MAP@k, precision@k, and recall@k are used, focusing on nDCG@10.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Semantic Textual Similarity (STS)&lt;/strong&gt;: Involves assessing the similarity of sentence pairs. Labels are continuous, with higher scores for more similar sentences. Models embed sentences and compute similarity using various metrics, benchmarked against ground truth using Pearson and Spearman correlations. Spearman correlation based on cosine similarity is the main metric.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;strong&gt;Summarization&lt;/strong&gt;: Evaluates machine-generated summaries against human-written ones. Models embed summaries, computing distances between machine and human summaries. The closest score, such as the highest cosine similarity, is used for evaluation. Metrics include Pearson and Spearman correlations with human assessments, focusing on Spearman correlation based on cosine similarity.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;We can see that MTEB represents many downstream users’ desires as described earlier, but could be criticised for favoring cosine similarity as a distance metric for training. In either case, MTEB has demonstrated, and itself encouraged, some trends in research:&lt;/p&gt; &lt;h3 id=&quot;scaling&quot;&gt;Scaling&lt;/h3&gt; &lt;p&gt;The MTEB paper itself, as well as the GTR&lt;d-cite key=&quot;gtr&quot;&gt;&lt;/d-cite&gt; and Sentence-T5&lt;d-cite key=&quot;st5&quot;&gt;&lt;/d-cite&gt; papers, suggested that model parameters are correlated with higher performance. We should expect that from intuition about GPTs and their scaling laws, larger models perform better&lt;d-cite key=&quot;chinchilla&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3 from MTEB demonstrating scaling vs. performance &lt;/div&gt; &lt;p&gt;However, if we extrapolate to more recent research , we find that the state-of-the-art models have failed to get bigger over time, and the highest-performance models are still under 1B parameters. This shows that embeddings is not as easily reduced to scaling laws as LLMs are.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; MTEB score vs time for SOTA models. The size of the cross represents parameter count. &lt;/div&gt; &lt;p&gt;However, even these small models still train on hundreds of millions or billions of text pairs&lt;d-cite key=&quot;gtr&quot;&gt;&lt;/d-cite&gt;, requiring thousands of GPU-hours to train. We can conclude that while parameter count may not be increasing, the overall compute requirements of training an embeddings model are getting higher, and it is no longer within the reach of all researchers to work on these models.&lt;/p&gt; &lt;h3 id=&quot;multilingualism&quot;&gt;Multilingualism&lt;/h3&gt; &lt;p&gt;While MTEB is a multilingual benchmark, only a few tasks, namely STS, Classification and Bitext Mining, have multilingual versions. Combined with the abundance of English training data, this has led to every language except English, Chinese and Polish lacking a complete MTEB and thus lacking the benefits of state-of-the-art models.&lt;/p&gt; &lt;p&gt;As in other subfields of NLP, multilingual performance is often an afterthought, and left by the wayside in pursuit of higher performance on English benchmarks, or exclusively in the domain of labs that can afford extra runs&lt;d-cite key=&quot;mt5&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h2 id=&quot;method&quot;&gt;Method&lt;/h2&gt; &lt;p&gt;With these problems as our motivation, we aim to find out if it is possible to add multilingualism to an existing model without having to pretrain from scratch. This may be a step towards bringing the benefits of increased embeddings performance to languages that don’t currently have a state-of-the-art model. Furthermore, if it is possible to add a new language to an existing model, this hints at the ideas that models do not necessary learn a representation based on a particular language, and that translation is easier than expected in the context of embeddings, modelable as a transformation of the representation space.&lt;/p&gt; &lt;p&gt;To do this, we will take an existing model that has both monolingual English and multilingual variants, and use contrastive training to add in new languages without sacrificing English performance, by using publicly available text translation pairs. We call this approach Cross-Lingual Fine-Tuning (CLFT). We will attempt to create a model that performs on-par with the multilingual model in multiple languages, and on-par with the original model in English, which we will measure by completing with our own data a multilingual version of MTEB in all tasks.&lt;/p&gt; &lt;h3 id=&quot;model-choice&quot;&gt;Model Choice&lt;/h3&gt; &lt;p&gt;We choose e5-base-v2 and multilingual-e5-base&lt;d-cite key=&quot;e5&quot;&gt;&lt;/d-cite&gt; as our demonstration models. E5 is the highest-performing current open-weights model with both a mono- and multilingual version, and still holds the top spot in many less popular languages. Both models are the size of BERT, with 12 layers, 768-dimensional embeddings and a context window of 512 tokens. The only difference is that the multilingual model has a much larger vocabulary to support more languages, and uses the XLM-RoBERTa tokenizer, leading to about 60% more parameters.&lt;/p&gt; &lt;p&gt;This choice does produce a caveat in the rest of our post - since the BERT tokenizer of e5-base has been trained only on English data, it will be unable to tokenize text that is not also a possible English string. In practice, this means that any Latin or near-Latin speaking languages, such as French, German and Turkish, can be used, but the model cannot be finetuned to read unknown characters like Japanese or Arabic script. Any non-Latin characters will likely become an [UNK] token, which carries no information for the model to embed. We are confident that this is not a fatal flaw, though, since just as it is possible to train LLMs with unused vocabulary, such as Persimmon-8B&lt;d-cite key=&quot;persimmon&quot;&gt;&lt;/d-cite&gt;, it is possible to train an embeddings model with a big unused vocabulary. In the case that this research proves useful, it would be easy to train a large English embeddings model with a multilingual tokenizer and fill in this extra vocabulary space afterwards in finetuning.&lt;/p&gt; &lt;h3 id=&quot;benchmarking&quot;&gt;Benchmarking&lt;/h3&gt; &lt;p&gt;As described above, it is hard to use MTEB to test performance in non-English languages, due to the lack of available tasks. After investigating the source datasets, we know that this is because of a lack of data. In the interest of producing a universally fair test, especially for low-resource languages where quality data is not available, we opted to use synthetic data to create a multilingual MTEB test set, by using machine-translation to convert the English datasets into each language.&lt;/p&gt; &lt;div style=&quot;margin-top: 0.5em; margin-bottom: 1em; padding: 1em; background-color: #f2f5f7; border-radius: 10px; font-size: 1rem&quot;&gt; &lt;i&gt;Side note: We were fascinated to find that the state-of-the-art neural machine translation model is no longer GNMT&lt;d-cite key=&quot;gnmt&quot;&gt;&lt;/d-cite&gt; or the Google Translate API, but in fact just GPT-4!&lt;/i&gt; &lt;/div&gt; &lt;p&gt;We used GPT 3.5 to process ~200K test examples in each of the following languages: French, German, Spanish, Swahili, and Turkish. We selected these languages because of their presence on the No Language Left Behind (NLLB) text-pair dataset&lt;d-cite key=&quot;nllb&quot;&gt;&lt;/d-cite&gt;, widespread usage, use of Latin alphabet, and varying degrees of similarity to English. We were particularly interested in Turkish because, while many words are near-Latin when written down, the language is from a completely different Proto-Turkic root, and appears much harder for native English-speakers to read. We were also interested in Swahili, as despite being one of the world’s biggest languages, it is comparatively data-poor and so multilingual models often struggle to generalize to. We trained five models in total.&lt;/p&gt; &lt;p&gt;As mentioned before, MTEB already contains some multilingual components, in the textual similarity, bitext mining and classification tasks. The bitext mining task in particular requires a cross-lingual model, so we will use it only on the final all-language model. The remaining tasks are clustering, retrieval, classification, re-ranking, STS, and summarization. For each task, we selected one dataset that would generalise well across languages. Given more time and compute resources, it would be easy to expand the dataset to a full synthetic multilingual MTEB. From now on, we refer to this benchmark as MMTEB (Multilingual Massive Text Embeddings Benchmark).&lt;/p&gt; &lt;p&gt;Datasets and code for evaluation are available &lt;a href=&quot;https://github.com/mtybadger/mmteb-toolkit&quot;&gt;HERE&lt;/a&gt;.&lt;/p&gt; &lt;table&gt; &lt;tr&gt; &lt;th&gt;Task&lt;/th&gt; &lt;th&gt;Classification&lt;/th&gt; &lt;th&gt;Clustering&lt;/th&gt; &lt;th&gt;Retrieval&lt;/th&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Dataset&lt;/td&gt; &lt;td&gt;&lt;a href=&quot;https://huggingface.co/datasets/AmazonScience/massive&quot;&gt;MASSIVE&lt;/a&gt;&lt;/td&gt; &lt;td&gt;&lt;a href=&quot;https://huggingface.co/datasets/willhath/french-reddit-clustering&quot;&gt;Reddit&lt;/a&gt; and &lt;a href=&quot;https://huggingface.co/datasets/willhath/spanish-twentynewsgroups-clustering&quot;&gt;TwentyNewsgroup&lt;/a&gt;&lt;/td&gt; &lt;td&gt;&lt;a href=&quot;https://huggingface.co/datasets/sproos/scifact-fr&quot;&gt;SciFact&lt;/a&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;Semantic Text Similarity&lt;/th&gt; &lt;th&gt;Summarization&lt;/th&gt; &lt;th&gt;Reranking&lt;/th&gt; &lt;th&gt;Pair Classification&lt;/th&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;&lt;a href=&quot;https://huggingface.co/datasets/mteb/sts22-crosslingual-sts/viewer/en?row=0&quot;&gt;STS-22&lt;/a&gt;&lt;/td&gt; &lt;td&gt;&lt;a href=&quot;https://huggingface.co/datasets/sproos/summeval-tr&quot;&gt;SummEval&lt;/a&gt;&lt;/td&gt; &lt;td&gt;&lt;a href=&quot;https://huggingface.co/datasets/sproos/mindsmall-tr&quot;&gt;MIND&lt;/a&gt;&lt;/td&gt; &lt;td&gt;&lt;a href=&quot;https://huggingface.co/datasets/sproos/twitter-pairclass-fr&quot;&gt;Twitter URL Corpus&lt;/a&gt;&lt;/td&gt; &lt;/tr&gt; &lt;/table&gt; &lt;div class=&quot;caption&quot;&gt; Tasks and datasets in MMTEB. &lt;/div&gt; &lt;h3 id=&quot;training&quot;&gt;Training&lt;/h3&gt; &lt;p&gt;In CLFT, we initialize two instances of our base model, one of which is frozen, and the other is trained. We will refer to these as $f_s$ and $f_\theta$ for the static and trained model. The static model will be used to anchor our trained model to the initial representation. For each lanuage $l$, our data $X_l$, is composed of pairs of data points $(x_e, x_l) \in X_l$, where $x_e$ is a sentence in english, and $x_l$ is that sentenced translated to language $l$.&lt;/p&gt; &lt;p&gt;We initially attempted to use the literature-standard InfoNCE&lt;d-cite key=&quot;infonce&quot;&gt;&lt;/d-cite&gt; contrastive loss for \(\mathcal{L}_{\text{eng}}\) and \(\mathcal{L}_{\text{cross}}\), treating equivalent translation pairs as positive examples, and non-equivalent text pairs as negative examples. However, empirically this did not perform as well, which we suspect may be because of compute-based batch size limitations (e5 was trained on a comparatively huge batch size of 32,768&lt;d-cite key=&quot;e5&quot;&gt;&lt;/d-cite&gt;). Because of this, we chose to use our own simplified cosine similarity objective.&lt;/p&gt; &lt;p&gt;We give the model \(f_\theta\) the following goal: place \(x_l\) as close to \(x_e\) as possible, without changing where we place \(x_e\). This is crucial, because it forces the model to map the new language onto its existing representation. This is done with the following loss function&lt;/p&gt; \[\mathcal{L}(x_e, x_f) = \mathcal{L}_{\text{eng}} + \beta \mathcal{L}_{\text{cross}}\] &lt;p&gt;Where:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;\(\mathcal{L}_{\text{eng}} = 1 - f_\theta(x_e) \cdot f_s(x_e)\) represents the loss component for English text, with \(f_\theta\) as the dynamic model being trained and \(f_s\) as the static reference model.&lt;/li&gt; &lt;li&gt;\(\mathcal{L}_{\text{cross}} = 1 - f_\theta(x_e) \cdot f_\theta(x_f)\) represents the cross-lingual consistency loss, comparing the dynamic model’s outputs for English and foreign text.&lt;/li&gt; &lt;li&gt;\(x_e\) and \(x_f\) are inputs for English and foreign text, respectively.&lt;/li&gt; &lt;li&gt;\(\beta\) is a coefficient to balance the influence of the cross-lingual consistency term.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;We ran each of our mono-lingual models on 400,0000 text pairs from the NLLB&lt;d-cite key=&quot;nllb&quot;&gt;&lt;/d-cite&gt; dataset, with learning rate \(lr = 1e-4\) and dynamic scaling \(\beta = 0.4\).&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;We found interesting and surprising results across our chosen languages and tasks. The results in table format are available in the appendix.&lt;/p&gt; &lt;p&gt;We can visualize these results in two graphs: comparing our approach to the baseline English model, and to the current state-of-the-art multilingual model.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; All tasks and languages vs. base model performance &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; All tasks and languages vs. multilingual model performance &lt;/div&gt; &lt;p&gt;We can see that the CLFT approach did extremely well on tasks like classification, pair classification and clustering, even beating the multilingual model itself. This is to be expected in particularly well-suited tasks, since a perfect monolingual model will always outperform a multilingual model at a set number of parameters. However, the model did not improve as strongly in retrieval and semantic textual similarity tasks. Additionally, we can see the model struggle most significantly in Swahili, the most distant language to its original English in our training set. Overall, we observed an average 5.5% relative improvement on the base model, taking us &lt;strong&gt;49.8%&lt;/strong&gt; of the way to the performance of the multilingual model.&lt;/p&gt; &lt;p&gt;We have some conjectures about the reason for this split, which relate to the theory of representation learning. Since our loss is purely on positive pairs, there is weaker enforcement of a shape of the embeddings space. It is therefore likely that our approach is degenerating the shape of the embeddings space, leading to more clustering and noisier local structure. This means that tasks that rely on broad-strokes embeddings, such as clustering, classification and so on, will benefit from this approach, whereas tasks that rely on fine-grained relative positioning such as retreival, reranking and STS will suffer. CLFT could thus be viewed as a trade-off between speed and ease of training, and noisiness of embeddings.&lt;/p&gt; &lt;p&gt;We investigate this by performing a visual analysis of the embeddings after PCA dimension reduction. In the figure below, we see how different model represents the same text, after it has been translated. The texts were taken from the associated reddit clustering datasets for each language, and the labels in the diagrams are the name of the corresponding class of the datapoint. We see that the position of each embedding is roughly the same, which makes sense given our loss function.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Representations of the same text, translated to models&apos;s respecive languages &lt;/div&gt; &lt;p&gt;Additionally, the figure below demonstrates that we were mostly successful in our goal of keeping our trained models aligned with the underlying english model. We embedded the same, English text with each model and got an even tighter clustering. We see that the training on languages more similar to english, such as Spanish, did not alter the English represenations as significantly. Conversely, more distant languages, such as Swahili, led to further degradation of the embedding space.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Representations of English text, according to each language &lt;/div&gt; &lt;h2 id=&quot;conclusions&quot;&gt;Conclusions&lt;/h2&gt; &lt;p&gt;Based on our results, we conclude that fine tuning for multilinguality is a cheap and viable alternative, especially when working with languages that do not have a large presence on the internet. While not an improvement over “true” multilingual models in general, CLFT can outperform multilingual models in scenarios where high-quality data is sparse, or in specific task categories (like clustering and reranking).&lt;/p&gt; &lt;p&gt;Additionally, we have made steps to introduce the first truly multilingual benchmark, for future embedding models to be evaluated against. All code and data for MMTEB assessment can be found &lt;a href=&quot;https://github.com/mtybadger/mmteb-toolkit&quot;&gt;here&lt;/a&gt;&lt;/p&gt; &lt;h3 id=&quot;limitations-and-next-steps&quot;&gt;Limitations and Next Steps&lt;/h3&gt; &lt;p&gt;Our experiment has several limitations, and there is plenty of room for extension:&lt;/p&gt; &lt;p&gt;The fact that we used machine-translated English text for our benchmark poses potential issues. It’s likely that the distribution of data that our translation model produces is not equivalent to that produced in the real world, meaning that our benchmark isn’t as accurate as the English one is. This is hard to ameliorate, especially for languages lacking many large datasets. However, barring vast troves of previously undiscovered internet data being discovered, translations can serve as a useful stopgap, and an equalizer for these less available languages. Completing the MMTEB benchmark would be a valuable contribution to the field, and a path to more languages being represented in state-of-the-art models.&lt;/p&gt; &lt;p&gt;In this paper, we only evaluated monolingual models, and did not study how the approach scales to multiple languages at once. Due to time and compute constriants, we were unable to try and train a “true” multilingual model, beyond just english and one other language. We believe that with further training, it may be possible to repeat the process above for multiple languages.&lt;/p&gt; &lt;p&gt;As mentioned in our results, CLFT can lead to noisy embeddings, which may decrease performance on particular tasks. A better distillation loss, or traditional contrastive loss with a much larger batch size, may help to regularize the data and resolve this issue.&lt;/p&gt; &lt;p&gt;As previously mentioned, we could not explore non-latin characters, vastly reducing our set of potential languages. We believe that with the correct tokenizer and base model, this should be possible. Additionally, it’s becoming possible to imagine a future of Transformers without tokenization, which would greatly help approaches like ours.&lt;/p&gt; &lt;p&gt;Despite our models maintaining near perfect alignment with the base model on the english text pairs during training, we observed performance on the English MTEB decrease substantially. This suggests that the text pairs on NLLB do not fully capture the distribution of data seen during testing,which is something that could be improved upon with better translation datasets.&lt;/p&gt; &lt;h2 id=&quot;appendix&quot;&gt;Appendix&lt;/h2&gt; &lt;p&gt;Here is a full table of our results:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;Classification&lt;/th&gt; &lt;th&gt;Summarization&lt;/th&gt; &lt;th&gt;Clustering&lt;/th&gt; &lt;th&gt;Retrieval&lt;/th&gt; &lt;th&gt;STS&lt;/th&gt; &lt;th&gt;Reranking&lt;/th&gt; &lt;th&gt;Pair Classification&lt;/th&gt; &lt;th&gt;Average&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Spanish (e5-base)&lt;/td&gt; &lt;td&gt;0.511&lt;/td&gt; &lt;td&gt;0.314&lt;/td&gt; &lt;td&gt;0.333&lt;/td&gt; &lt;td&gt;0.554&lt;/td&gt; &lt;td&gt;0.585&lt;/td&gt; &lt;td&gt;0.296&lt;/td&gt; &lt;td&gt;0.828&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.489&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Spanish (e5-multi)&lt;/td&gt; &lt;td&gt;0.635&lt;/td&gt; &lt;td&gt;0.301&lt;/td&gt; &lt;td&gt;0.336&lt;/td&gt; &lt;td&gt;0.655&lt;/td&gt; &lt;td&gt;0.629&lt;/td&gt; &lt;td&gt;0.243&lt;/td&gt; &lt;td&gt;0.848&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.521&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Spanish (ours)&lt;/td&gt; &lt;td&gt;0.583&lt;/td&gt; &lt;td&gt;0.314&lt;/td&gt; &lt;td&gt;0.398&lt;/td&gt; &lt;td&gt;0.568&lt;/td&gt; &lt;td&gt;0.553&lt;/td&gt; &lt;td&gt;0.284&lt;/td&gt; &lt;td&gt;0.847&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.507&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;German (e5-base)&lt;/td&gt; &lt;td&gt;0.522&lt;/td&gt; &lt;td&gt;0.307&lt;/td&gt; &lt;td&gt;0.328&lt;/td&gt; &lt;td&gt;0.560&lt;/td&gt; &lt;td&gt;0.236&lt;/td&gt; &lt;td&gt;0.293&lt;/td&gt; &lt;td&gt;0.812&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.437&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;German (e5-multi)&lt;/td&gt; &lt;td&gt;0.637&lt;/td&gt; &lt;td&gt;0.313&lt;/td&gt; &lt;td&gt;0.346&lt;/td&gt; &lt;td&gt;0.648&lt;/td&gt; &lt;td&gt;0.491&lt;/td&gt; &lt;td&gt;0.230&lt;/td&gt; &lt;td&gt;0.840&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.501&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;German (ours)&lt;/td&gt; &lt;td&gt;0.602&lt;/td&gt; &lt;td&gt;0.320&lt;/td&gt; &lt;td&gt;0.393&lt;/td&gt; &lt;td&gt;0.546&lt;/td&gt; &lt;td&gt;0.273&lt;/td&gt; &lt;td&gt;0.332&lt;/td&gt; &lt;td&gt;0.841&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.472&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;French (e5-base)&lt;/td&gt; &lt;td&gt;0.512&lt;/td&gt; &lt;td&gt;0.312&lt;/td&gt; &lt;td&gt;0.329&lt;/td&gt; &lt;td&gt;0.568&lt;/td&gt; &lt;td&gt;0.747&lt;/td&gt; &lt;td&gt;0.330&lt;/td&gt; &lt;td&gt;0.825&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.518&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;French (e5-multi)&lt;/td&gt; &lt;td&gt;0.637&lt;/td&gt; &lt;td&gt;0.306&lt;/td&gt; &lt;td&gt;0.263&lt;/td&gt; &lt;td&gt;0.644&lt;/td&gt; &lt;td&gt;0.764&lt;/td&gt; &lt;td&gt;0.222&lt;/td&gt; &lt;td&gt;0.845&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.526&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;French (ours)&lt;/td&gt; &lt;td&gt;0.622&lt;/td&gt; &lt;td&gt;0.302&lt;/td&gt; &lt;td&gt;0.404&lt;/td&gt; &lt;td&gt;0.604&lt;/td&gt; &lt;td&gt;0.749&lt;/td&gt; &lt;td&gt;0.344&lt;/td&gt; &lt;td&gt;0.849&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.554&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Turkish (e5-base)&lt;/td&gt; &lt;td&gt;0.458&lt;/td&gt; &lt;td&gt;0.296&lt;/td&gt; &lt;td&gt;0.221&lt;/td&gt; &lt;td&gt;0.411&lt;/td&gt; &lt;td&gt;0.456&lt;/td&gt; &lt;td&gt;0.308&lt;/td&gt; &lt;td&gt;0.776&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.418&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Turkish (e5-multi)&lt;/td&gt; &lt;td&gt;0.639&lt;/td&gt; &lt;td&gt;0.304&lt;/td&gt; &lt;td&gt;0.318&lt;/td&gt; &lt;td&gt;0.631&lt;/td&gt; &lt;td&gt;0.601&lt;/td&gt; &lt;td&gt;0.258&lt;/td&gt; &lt;td&gt;0.827&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.511&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Turkish (ours)&lt;/td&gt; &lt;td&gt;0.557&lt;/td&gt; &lt;td&gt;0.307&lt;/td&gt; &lt;td&gt;0.382&lt;/td&gt; &lt;td&gt;0.413&lt;/td&gt; &lt;td&gt;0.414&lt;/td&gt; &lt;td&gt;0.336&lt;/td&gt; &lt;td&gt;0.826&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.462&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Swahili (e5-base)&lt;/td&gt; &lt;td&gt;0.413&lt;/td&gt; &lt;td&gt;0.304&lt;/td&gt; &lt;td&gt;0.181&lt;/td&gt; &lt;td&gt;0.281&lt;/td&gt; &lt;td&gt;0.000&lt;/td&gt; &lt;td&gt;0.313&lt;/td&gt; &lt;td&gt;0.751&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.321&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Swahili (e5-multi)&lt;/td&gt; &lt;td&gt;0.528&lt;/td&gt; &lt;td&gt;0.303&lt;/td&gt; &lt;td&gt;0.166&lt;/td&gt; &lt;td&gt;0.527&lt;/td&gt; &lt;td&gt;0.000&lt;/td&gt; &lt;td&gt;0.257&lt;/td&gt; &lt;td&gt;0.822&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.372&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Swahili (ours)&lt;/td&gt; &lt;td&gt;0.347&lt;/td&gt; &lt;td&gt;0.315&lt;/td&gt; &lt;td&gt;0.238&lt;/td&gt; &lt;td&gt;0.332&lt;/td&gt; &lt;td&gt;0.000&lt;/td&gt; &lt;td&gt;0.275&lt;/td&gt; &lt;td&gt;0.764&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.325&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Average (e5-base)&lt;/td&gt; &lt;td&gt;0.483&lt;/td&gt; &lt;td&gt;0.307&lt;/td&gt; &lt;td&gt;0.279&lt;/td&gt; &lt;td&gt;0.475&lt;/td&gt; &lt;td&gt;0.405&lt;/td&gt; &lt;td&gt;0.308&lt;/td&gt; &lt;td&gt;0.799&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.436&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Average (e5-multi)&lt;/td&gt; &lt;td&gt;0.615&lt;/td&gt; &lt;td&gt;0.306&lt;/td&gt; &lt;td&gt;0.286&lt;/td&gt; &lt;td&gt;0.621&lt;/td&gt; &lt;td&gt;0.497&lt;/td&gt; &lt;td&gt;0.242&lt;/td&gt; &lt;td&gt;0.836&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.486&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Average (ours)&lt;/td&gt; &lt;td&gt;0.542&lt;/td&gt; &lt;td&gt;0.312&lt;/td&gt; &lt;td&gt;0.363&lt;/td&gt; &lt;td&gt;0.493&lt;/td&gt; &lt;td&gt;0.398&lt;/td&gt; &lt;td&gt;0.314&lt;/td&gt; &lt;td&gt;0.825&lt;/td&gt; &lt;td&gt;&lt;strong&gt;0.464&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; </content> </entry> <entry> <title>Learning Interpretable Features with Sparse Auto-Encoders</title> <link href="https://deep-learning-mit.github.io/blog/2023/learning-interpretable-features-with-sparse-autoencoders/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/learning-interpretable-features-with-sparse-autoencoders</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;The field of Machine Learning is becoming increasingly promising as humanity endeavors to create intelligent systems, with models outperforming humans on many tasks. As models become increasingly capable, its important that humans are able to interpret a model’s internal decision making process to mitigate the risk of negative outcomes. While significant progress has been made on interpreting important parts of models like &lt;a href=&quot;https://transformer-circuits.pub/2021/framework/index.html&quot;&gt;attention heads&lt;/a&gt; &lt;d-cite key=&quot;elhage2021mathematical&quot;&gt;&lt;/d-cite&gt;, it’s also the case that hidden layers in deep neural networks have remained notoriously hard to interpret.&lt;/p&gt; &lt;h2 id=&quot;superposition-hypothesis&quot;&gt;Superposition Hypothesis&lt;/h2&gt; &lt;p&gt;One hypothesis for why it can be challenging to interpret individual neurons is because they are simultaneously representing multiple concepts. One may wonder why a network would have its neurons learn to represent multiple concepts. At a first glance, this approach to encoding information feels unintuitive and messy. The key idea comes from the Johnson–Lindenstrauss lemma: In $n$ dimensions, you can have at most $n$ pairwise orthogonal vectors, but the number of pairwise “almost orthogonal” vectors (i.e. cosine similarity at most $\epsilon$) you can have is exponential in $n$. This enables a layer to encode for many more concepts than it has neurons. So long as each neuron is only activated by a sparse combination of concepts, we can reconstruct these concepts from a given activation with minimal interference between the concepts, since they are “almost orthogonal”. This hypothesis is known as &lt;strong&gt;&lt;a href=&quot;https://transformer-circuits.pub/2022/toy_model/index.html&quot;&gt;superposition&lt;/a&gt;&lt;/strong&gt; &lt;d-cite key=&quot;elhage2022superposition&quot;&gt;&lt;/d-cite&gt;, and offers an explanation for why neurons have been observed in practice to be polysemantic.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Diagram depicting a larger model with disentngled features and a lower dimensional projection simulating this larger network using polysemanticity. Source &lt;d-cite key=&quot;elhage2022superposition&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;h2 id=&quot;sparse-auto-encoders&quot;&gt;Sparse Auto-Encoders&lt;/h2&gt; &lt;p&gt;Since deep neural networks are strongly biased towards making neurons polysemantic during training, humans might try to understand the model’s decision making process by “unwrapping” the network into the sparse features that the neurons in some particular layer are simulating. To do this, a concept called a Sparse Auto-Encoder (SAE) is used. An SAE is similar to a normal autoencoder, with two main differences: (1) the encoding layer is larger than the neuron layer, often by a factor of 4x. (2) the loss function penalizes not only for the MSE loss, but also for the sparsity of the encoder matrix, frequently represented as L1 loss. A sparse autoencoder lets us learn a sparse representation for a vector, but in a higher dimensional space. SAEs were first proposed in a &lt;a href=&quot;https://www.lesswrong.com/posts/z6QQJbtpkEAX3Aojj/interim-research-report-taking-features-out-of-superposition&quot;&gt;blogpost&lt;/a&gt; by Lee Sharkey in December 2022, and in September 2023 more research was published on SAEs, both by a group of &lt;a href=&quot;https://arxiv.org/abs/2309.08600&quot;&gt;independent researchers&lt;/a&gt; &lt;d-cite key=&quot;cunningham2023sparse&quot;&gt;&lt;/d-cite&gt; and by &lt;a href=&quot;https://transformer-circuits.pub/2023/monosemantic-features/&quot;&gt;Anthropic&lt;/a&gt; &lt;d-cite key=&quot;bricken2023monosemanticity&quot;&gt;&lt;/d-cite&gt; demonstrating that not only can SAEs be learned at a specific layer, but the features they learn are human interpretable.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Diagram depicting an SAE architecture for a transformer language model. Source &lt;d-cite key=&quot;cunningham2023sparse&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;h2 id=&quot;research-question&quot;&gt;Research Question&lt;/h2&gt; &lt;p&gt;This inspired a new idea: what if we could take a neural network, unwrap each layer into a larger, sparse, interpretable set of features, and then learn a sparse weight matrix connecting all pairs of two consecutive feature layers? This would mean that we could take a neural network, and transform it into a new neural network simulating the old neural network, with the nice property that the computations are sparse and hopefully interpretable.&lt;/p&gt; &lt;p&gt;The main question we wish to explore is: Can we unwrap a deep neural network into a larger sparse network and learn sparse weights between consecutive feature layers without losing performance?&lt;/p&gt; &lt;h2 id=&quot;initial-mathematics&quot;&gt;Initial Mathematics&lt;/h2&gt; &lt;p&gt;Let’s begin by looking at $L_1$ and $L_2$, two consecutive layers in a deep neural network with ReLU activations. Let $W$ and $b$ be the matrix and bias respectively that connects these two layers. Then we have&lt;/p&gt; \[L_2 = \text{ReLU}(W L_1 + b)\] &lt;p&gt;We create autoencoders such that&lt;/p&gt; \[L_1 = D_1 \text{ReLU}(E_1 L_1 + e_1) \equiv D_1 F_1\] \[L_2 = D_2 \text{ReLU}(E_2 L_2 + e_2) \equiv D_2 F_2\] &lt;p&gt;where $D_i$ is the decoder for layer $i$, $E_i$ and $e_i$ are the weights of the encoder and encoder bias, and $F_i$ is the feature vector.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Biases excluded from diagram for clarity. The hockey sticks on top of $F_1$, $L_2$, and $F_2$ indicate that a ReLU is applied to get the activations at that layer. If our autoencoder is good (which it should be), we have $L_1=L_1&apos;$ and $L_2=L_2&apos;$. &lt;/div&gt; &lt;p&gt;Thus we have&lt;/p&gt; \[\begin{align} F_2 &amp;amp;= \text{ReLU}(E_2 L_2 + e_2) \\ &amp;amp;= \text{ReLU}(E_2 \text{ReLU}(W L_1 + b) + e_2) \\ &amp;amp;= \text{ReLU}(E_2 \text{ReLU}(W D_1 F_1 + b) + e_2). \end{align}\] &lt;p&gt;In general, an approximation of the form&lt;/p&gt; \[F_2 = \text{ReLU}(W_2 F_1 + b_2)\] &lt;p&gt;would be pretty terrible since we cannot easily approximate a double ReLU function with a single ReLU function. However, because of the way $F_1$ and $F_2$ are created from $L_1$ and $L_2$, the relationships are actually very sparse in nature, so we will try to learn the approximation above. Perhaps there is a clever initialization that will allow us to learn this more easily.&lt;/p&gt; &lt;p&gt;If we just ignored the inside ReLU in the definition of $F_2$, then we’d have&lt;/p&gt; \[F_2 = \text{ReLU}(E_2 W D_1 F_1 + E_2 b + e_2)\] &lt;p&gt;which suggests the following could be a good initialization for our learned weight $W_2$ and bias $b_2$.&lt;/p&gt; \[W_2 = E_2 W D_1\] \[b_2 = E_2 b + e_2\] &lt;p&gt;While this initialization seemed reasonable at the start of the project, it turned out that during training this results in a local minimum, and you can actually get much lower loss if you randomly initialize $W_2$ and $b_2$.&lt;/p&gt; &lt;h2 id=&quot;codebase&quot;&gt;Codebase&lt;/h2&gt; &lt;p&gt;To answer this main question, the first step was to build out a &lt;a href=&quot;https://drive.google.com/file/d/1_0g_Qq76AqJByCrj_i-tYr76KPeAfIem/view?usp=sharing&quot;&gt;codebase&lt;/a&gt; that had all the implementations necessary to run experiements to explore this question. The codebase was developed from scratch to ensure I understood how each part of the code worked.&lt;/p&gt; &lt;h3 id=&quot;model&quot;&gt;Model&lt;/h3&gt; &lt;p&gt;The first part of the code trains a four layer neural network to classify MNIST images. After training we got a validation loss of 0.09 and a validation accuracy: 0.98, indicating the model does well. For clarity, all losses described in this section will refer to loss on the validation set.&lt;/p&gt; &lt;h3 id=&quot;saes&quot;&gt;SAEs&lt;/h3&gt; &lt;p&gt;Next, two autoencoder architectures are implemented, one that learns both an encoder and decoder, and one that learns only an encoder as its decoder is tied as the transpose of the encoder. Empirically, the tied autoencoder seemed to perform better and achieved an L1 (sparsity) loss of 0.04928, and an L2 (MSE) loss of 0.03970. Seeing these numbers close in magnitude is good, indicating that the model is neither penalizing too much nor too little for L1 sparsity loss.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; For a random input: The top diagram depicts neuron activations (blue) and reconstructed neuron activations from the SAE (orange), indicating the SAE has low L2 loss and reconstructs the input well. The bottom diagram depicts the feature activations for the same input, showing they are sparse. Notably, 38/64 of the neuron activations have magnitude above 0.3, but only 7/256 of the encoded features have magnitude above 0.3. &lt;/div&gt; &lt;h3 id=&quot;feature-connectors&quot;&gt;Feature Connectors&lt;/h3&gt; &lt;p&gt;Then, a feature connector was implemented, which learns the matrices $W_2$ and $b_2$ descibed above mapping one layer to another layer. The inputs are the set of all feature $i$ activations and the outputs are the set of all feature $i+1$ activations, allowing us to gradient descent over loss (which consists of L1 sparsity and L2 MSE) to optimize $W_2$ and $b_2$. The L1 (sparsity) loss was 0.02114 and the L2 (MSE) loss: 0.03209, indicating that there is a good tradeoff between L1 and L2 penalty.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Weights matrix connecting neuron layer 1 to neuron layer 2. This is a mess. 2205 weights have magnitude greater than 0.1. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Weights matrix connecting encoded features in layer 1 to encoded features in layer 2. This is nice and sparse. 458 weights have magnitude greater than 0.1. &lt;/div&gt; &lt;p&gt;Below is what the feature connector matrix looks like after each epoch of training.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_connector1_2.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_connector1_2.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_connector1_2.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_connector1_2.gif&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;simulating-the-feature-network&quot;&gt;Simulating the Feature Network&lt;/h3&gt; &lt;p&gt;Finally, we replace neuron connections with feature connections. This means that when we pass an input through the network, we immediately encode it as a feature and propogate it through the feature connector weights, skipping the neuron layer weights. In this network, removing two neuron to neuron layers and substituting them with feature to feature layers results in a decrease from 97.8% accuracy to 94% accuracy, which is pretty good considering we made our network much sparser.&lt;/p&gt; &lt;p&gt;Next, I tried to visualize the features using a variety of methods (both inspired by a class lecture and a &lt;a href=&quot;https://distill.pub/2017/feature-visualization&quot;&gt;Distill blogpost&lt;/a&gt; &lt;d-cite key=&quot;olah2017feature&quot;&gt;&lt;/d-cite&gt;). Unfortunately, I did not find the features to be much more interpretable than the neurons for the MNIST dataset. Still, our results are cool: we can take a network, and with only a fraction of the parameters maintain comparable performance.&lt;/p&gt; &lt;h2 id=&quot;language-models&quot;&gt;Language Models&lt;/h2&gt; &lt;p&gt;I shared these results with Logan Riggs, one of the &lt;a href=&quot;https://arxiv.org/abs/2309.08600&quot;&gt;independent researchers&lt;/a&gt; &lt;d-cite key=&quot;cunningham2023sparse&quot;&gt;&lt;/d-cite&gt; who published about SAEs in October 2023. Excited about the possibility, we collaborated to see if we could achieve the same results for language models, anticipating that the learned features might be more interpretable. We and a couple other collaborators published a &lt;a href=&quot;https://www.lesswrong.com/posts/7fxusXdkMNmAhkAfc/finding-sparse-linear-connections-between-features-in-llms&quot;&gt;blogpost&lt;/a&gt; showing that the learned features in Pythia-70M are indeed interpretable, and there are cool relationships! (the remainder of this section is adapted from that blogpost)&lt;/p&gt; &lt;p&gt;Below we show some examples of sparse linear feature connections. For the curious reader, additional examples can be found &lt;a href=&quot;https://comet-scorpio-0b3.notion.site/More-Examples-ceaefc95cc924afba318dca1da37d4a4?pvs=4&quot;&gt;here&lt;/a&gt;.&lt;/p&gt; &lt;h3 id=&quot;or-example&quot;&gt;OR Example&lt;/h3&gt; &lt;p&gt;In Layer 1, we have:&lt;/p&gt; \[OF_{30} = 0.26IF_{2797} + 0.23IF_{259} + 0.10IF_{946}\] &lt;p&gt;where OF is output feature (in MLP_out), and IF is input feature (in Residual Stream before the MLP)&lt;/p&gt; &lt;p&gt;Below is input feature 2797, activating strongly on the token “former”&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former.webp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former.webp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former.webp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former.webp&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; This is 5 examples. For each example, the top row of words are feature activation e.g. token &quot;former&quot; activated 9.4. The bottom blank row is: if we removed this feature, how much worse does the model get at predicting these tokens? e.g. Soviet is 5.5 logits worse when the model can&apos;t use this &quot;former&quot; feature. &lt;/div&gt; &lt;p&gt;Below is input feature 259, activating strongly on the token “old”&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/old.webp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/old.webp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/old.webp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/old.webp&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Below is input feature 946, activating on the token “young”&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/young.webp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/young.webp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/young.webp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/young.webp&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In the output feature, we see the tokens former, old, and young all activate, with young activating about half as strongly as “former” and “old” as we would expect from the weight coefficients.&lt;/p&gt; &lt;p&gt;\(OF_{30} = 0.26IF_{former} + 0.23IF_{old} + 0.10IF_{young}\)&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former_old_young.webp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former_old_young.webp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former_old_young.webp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former_old_young.webp&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We can view this computation as a weighted logical OR. Output Feature 30 activates on former OR old OR young.&lt;/p&gt; &lt;h3 id=&quot;negative-weight-example&quot;&gt;Negative Weight Example&lt;/h3&gt; &lt;p&gt;In Layer 1, we have:&lt;/p&gt; \[OF_{505} = 0.68IF_{3021} -0.21IF_{729}\] &lt;p&gt;where OF is output feature, and IF is input feature.&lt;/p&gt; &lt;p&gt;Below is input feature 3021, activating strongly on tokens like “said” which in almost all cases appear not after a quote.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/all_said.webp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/all_said.webp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/all_said.webp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/all_said.webp&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Below is input feature 729, activating strongly on tokens like “said” when they appear shortly after a quote.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_quotes.webp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_quotes.webp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_quotes.webp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_quotes.webp&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Below we see the output feature activates on tokens like “said” that have no prior quote tokens. We’ve “subtracted out” with a large negative weight, so to speak, the examples where “said” appears after a quote, and now the feature only activates when “said” appears without any prior quotes.&lt;/p&gt; \[OF_{505} = 0.68IF_{(\text{&quot;said&quot; in many contexts})} -0.21IF_{(\text{&quot;said&quot; after quotes})}\] &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_no_quotes.webp-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_no_quotes.webp-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_no_quotes.webp-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_no_quotes.webp&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We can view this computation as a weighted logical AND. Output Feature 505 activates on A AND ~B. In the case where A is a superset of B, this is the complement of B e.g. I have the set of all fruits and all yellow fruits, so now I can find all non-yellow fruits.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Our exploration into interpreting neural networks using Sparse Auto-Encoders has shown promising results. The ability to unwrap the layers of a neural network into a more interpretable, sparse representation without a significant loss in performance supports the superposition hypothesis. Even if the features were only interpretable on some architectures/datasets, I am optimistic that Sparse Auto-Encoders will not only make deep neural networks more interpretable, but they will also allow for quicker parallelized inference since each output feature will depend on a small fraction of the total possible input features.&lt;/p&gt; &lt;p&gt;I’d like to thank everyone who has contributed to my deep learning education this semester. I have learned a tremendous amount and really enjoyed working on this project.&lt;/p&gt; </content> </entry> <entry> <title>How does model size impact catastrophic forgetting in online continual learning?</title> <link href="https://deep-learning-mit.github.io/blog/2023/eunhae-project/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/eunhae-project</id> <content type="html">&lt;!-- &lt;style&gt; .caption { font-size: 0.8em; /* Adjust the size as needed */ text-align: center; color: grey; /* or any color you prefer */ } /* h1 { margin: 0.5em 0 0 0; font-size: 36px; } h3 { margin: 0em; } */ &lt;/style&gt; --&gt; &lt;h1 id=&quot;introduction&quot;&gt;Introduction&lt;/h1&gt; &lt;p&gt;One of the biggest unsolved challenges in continual learning is preventing forgetting previously learned information upon acquiring new information. Known as “catastrophic forgetting,” this phenomenon is particularly pertinent in scenarios where AI systems must adapt to new data without losing valuable insights from past experiences. Numerous studies have investigated different approaches to solving this problem in the past years, mostly around proposing innovative strategies to modify the way models are trained and measuring its impact on model performance, such as accuracy and forgetting.&lt;/p&gt; &lt;p&gt;Yet, compared to the numerous amount of studies done in establishing new strategies and evaluative approaches in visual continual learning, there is surprisingly little discussion on the impact of model size. It is commonly known that the size of a deep learning model (the number of parameters) is known to play a crucial role in its learning capabilities &lt;d-cite key=&quot;hu2021model, Bianco_2018&quot;&gt;&lt;/d-cite&gt;. Given the limitations in computational resources in most real-world circumstances, it is often not practical or feasible to choose the largest model available. In addition, sometimes smaller models perform just as well as larger models in specific contexts&lt;d-cite key=&quot;Bressem_2020&quot;&gt;&lt;/d-cite&gt;. Given this context, a better understanding of how model size impacts performance in a continual learning setting can provide insights and implications on real-world deployment of continual learning systems.&lt;/p&gt; &lt;p&gt;In this blog post, I explore the following research question: &lt;em&gt;How do network depth and width impact model performance in an online continual learning setting?&lt;/em&gt; I set forth a hypothesis based on existing literature and conduct a series experiments with models of varying sizes to explore this relationship. This study aims to shed light on whether larger models truly offer an advantage in mitigating catastrophic forgetting, or if the reality is more nuanced.&lt;/p&gt; &lt;h1 id=&quot;related-work&quot;&gt;Related Work&lt;/h1&gt; &lt;h3 id=&quot;online-continual-learning&quot;&gt;Online continual learning&lt;/h3&gt; &lt;p&gt;Continual learning (CL), also known as lifelong learning or incremental learning, is an approach that seeks to continually learn from non-iid data streams without forgetting previously acquired knowledge. The challenge in continual learning is generally known as the stability-plasticity dilemma&lt;d-cite key=&quot;mermillod2013-dilemma&quot;&gt;&lt;/d-cite&gt;, and the goal of continual learning is to strike a balance between learning stability and plasticity.&lt;/p&gt; &lt;p&gt;While traditional CL models assume new data arrives task by task, each with a stable data distribution, enabling &lt;em&gt;offline&lt;/em&gt; training. However, this requires having access to all task data, which can be impractical due to privacy or resource limitations. In this study, I will consider a more realistic setting of Online Continual Learning (OCL), where data arrives in smaller batches and are not accessible after training, requiring models to learn from a single pass over an online data stream. This allows the model to learn data in real-time&lt;d-cite key=&quot;soutif-cormerais_comprehensive_2023, cai_online_2021, mai_online_2021&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Online continual learning can involve adapting to new classes (class-incremental) or changing data characteristics (domain-incremental). Specifically, for class-incremental learning, the goal is to continually expand the model’s ability to recognize an increasing number of classes, maintaining its performance on all classes it has seen so far, despite not having continued access to the old class data&lt;d-cite key=&quot;soutif-cormerais_comprehensive_2023, ghunaim_real-time_2023&quot;&gt;&lt;/d-cite&gt;. Moreover, there has been more recent work done in unsupervised continual learning &lt;d-cite key=&quot;yu_scale_2023, madaan_representational_2022&quot;&gt;&lt;/d-cite&gt;. To narrow the scope of the vast CL landscape to focus on learning the impact of model size in CL performance, I will focus on the more common problem of class-incremental learning in supervised image classification in this study.&lt;/p&gt; &lt;h3 id=&quot;continual-learning-techniques&quot;&gt;Continual learning techniques&lt;/h3&gt; &lt;p&gt;Popular methods to mitigate catastrophic forgetting in continual learning generally fall into three buckets:&lt;d-cite key=&quot;ghunaim_real-time_2023&quot;&gt; :&lt;/d-cite&gt;&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;em&gt;regularization-based&lt;/em&gt; approaches that modify the classification objective to preserve past representations or foster more insightful representations, such as Elastic Weight Consolidation (EWC)&lt;d-cite key=&quot;kirkpatrick2017overcoming&quot;&gt;&lt;/d-cite&gt; and Learning without Forgetting (LwF)&lt;d-cite key=&quot;li_learning_2017&quot;&gt;&lt;/d-cite&gt;;&lt;/li&gt; &lt;li&gt;&lt;em&gt;memory-based&lt;/em&gt; approaches that replay samples retrieved from a memory buffer along with every incoming mini-batch, including Experience Replay (ER)&lt;d-cite key=&quot;chaudhry2019tiny&quot;&gt;&lt;/d-cite&gt; and Maximally Interfered Retrieval&lt;d-cite key=&quot;aljundi2019online&quot;&gt;&lt;/d-cite&gt;, with variations on how the memory is retrieved and how the model and memory are updated; and&lt;/li&gt; &lt;li&gt;&lt;em&gt;architectural&lt;/em&gt; approaches including parameter-isolation approaches where new parameters are added for new tasks and leaving previous parameters unchanged such as Progressive Neural Networks (PNNs)&lt;d-cite key=&quot;rusu2022progressive&quot;&gt;&lt;/d-cite&gt;.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Moreover, there are many methods that combine two or more of these techniques such as Averaged Gradient Episodic Memory (A-GEM)&lt;d-cite key=&quot;chaudhry2019efficient&quot;&gt;&lt;/d-cite&gt; and Incremental Classifier and Representation Learning (iCaRL)&lt;d-cite key=&quot;rebuffi2017icarl&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Among the methods, &lt;strong&gt;Experience Replay (ER)&lt;/strong&gt; is a classic replay-based method and widely used for online continual learning. Despite its simplicity, recent studies have shown ER still outperforms many of the newer methods that have come after that, especially for online continual learning &lt;d-cite key=&quot;soutif-cormerais_comprehensive_2023, mai_online_2021, ghunaim_real-time_2023&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;model-size-and-performance&quot;&gt;Model size and performance&lt;/h3&gt; &lt;p&gt;It is generally known across literature that deeper models increase performance&lt;d-cite key=&quot;hu2021model&quot;&gt;&lt;/d-cite&gt;. Bianco et al. conducted a survey of key performance-related metrics to compare across various architectures, including accuracy, model complexity, computational complexity, and accuracy density&lt;d-cite key=&quot;Bianco_2018&quot;&gt;&lt;/d-cite&gt;. Relationship between model width and performance is also been discussed&lt;d-cite key=&quot;hu2021model&quot;&gt;&lt;/d-cite&gt;, albeit less frequently.&lt;/p&gt; &lt;p&gt;He et al. introduced Residual Networks (ResNets)&lt;d-cite key=&quot;he2015deep&quot;&gt;&lt;/d-cite&gt; which was a major innovation in computer vision by tackling the problem of degradation in deeper networks. ResNets do this by residual blocks to increase the accuracy of deeper models. Residual blocks that contain two ore more layers are stacked together, and “skip connections” are used in between these blocks. The skip connections act as an alternate shortcut for the gradient to pass through, which alleviates the issue of vanishing gradient. They also make it easier for the model to learn identity functions. As a result, ResNet improves the efficiency of deep neural networks with more neural layers while minimizing the percentage of errors. The authors compare models of different depths (composed of 18, 34, 50, 101, 152 layers) and show that accuracy increases with depth of the model.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt; &lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;ResNet18&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;ResNet34&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;ResNet50&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;ResNet101&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;ResNet152&lt;/strong&gt;&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Number of Layers&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;18&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;34&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;50&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;101&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;152&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Number of Parameters&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;~11.7 million&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;~21.8 million&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;~25.6 million&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;~44.5 million&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;~60 million&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Top-1 Accuracy&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;69.76%&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;73.31%&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;76.13%&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;77.37%&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;78.31%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Top-5 Accuracy&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;89.08%&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;91.42%&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;92.86%&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;93.68%&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;94.05%&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;FLOPs&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;1.8 billion&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;3.6 billion&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;3.8 billion&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;7.6 billion&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;11.3 billion&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;caption&quot;&gt;Table 1: Comparison of ResNet Architectures&lt;/div&gt; &lt;p&gt;This leads to the question: do larger models perform better in continual learning? While much of the focus in continual learning research has often been on developing various strategies, methods, and establishing benchmarks, the impact of model scale remains a less explored path.&lt;/p&gt; &lt;p&gt;Moreover, recent studies on model scale in slightly different contexts have shown conflicting results. Luo et al.&lt;d-cite key=&quot;luo2023empirical&quot;&gt;&lt;/d-cite&gt; highlights a direct correlation between increasing model size and the severity of catastrophic forgetting in large language models (LLMs). They test models of varying sizes from 1 to 7 billion parameters. Yet, Dyer et al.&lt;d-cite key=&quot;dyer2022&quot;&gt;&lt;/d-cite&gt; show a constrasting perspective in the context of pretrained deep learning models. Their results show that large, pretrained ResNets and Transformers are a lot more resistant to forgetting than randomly-initialized, trained-from-scratch models, and that this tendency increases with the scale of model and the pretraining dataset size.&lt;/p&gt; &lt;p&gt;The relative lack of discussion on model size and the conflicting perspectives among existing studies indicate that the answer to the question is far from being definitive. In the next section, I will describe further how I approach this study.&lt;/p&gt; &lt;h1 id=&quot;method&quot;&gt;Method&lt;/h1&gt; &lt;h3 id=&quot;problem-definition&quot;&gt;Problem definition&lt;/h3&gt; &lt;p&gt;Online continual learning can be defined as follows&lt;d-cite key=&quot;cai_online_2021, ghunaim_real-time_2023&quot;&gt;&lt;/d-cite&gt;:&lt;/p&gt; &lt;p&gt;The objective is to learn a function $f_\theta : \mathcal X \rightarrow \mathcal Y$ with parameters $\theta$ that predicts the label $Y \in \mathcal Y$ of the input $\mathbf X \in \mathcal X$. Over time steps $t \in \lbrace 1, 2, \ldots \infty \rbrace$, a distribution-varying stream $\mathcal S$ reveals data sequentially, which is different from classical supervised learning.&lt;/p&gt; &lt;p&gt;At every time step,&lt;/p&gt; &lt;ol&gt; &lt;li&gt;$\mathcal S$ reveals a set of data points (images) $\mathbf X_t \sim \pi_t$ from a non-stationary distribution $\pi_t$&lt;/li&gt; &lt;li&gt;Learner $f_\theta$ makes predictions $\hat Y_t$ based on current parameters $\theta_t$&lt;/li&gt; &lt;li&gt;$\mathcal S$ reveals true labels $Y_t$&lt;/li&gt; &lt;li&gt;Compare the predictions with the true labels, compute the training loss $L(Y_t, \hat Y_t)$&lt;/li&gt; &lt;li&gt;Learner updates the parameters of the model to $\theta_{t+1}$&lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;task-agnostic-and-boundary-agnostic&quot;&gt;Task-agnostic and boundary-agnostic&lt;/h3&gt; &lt;p&gt;In the context of class-incremental learning, I will adopt the definitions of task-agnostic and boundary-agnostic from Soutif et al. 2023&lt;d-cite key=&quot;soutif-cormerais_comprehensive_2023&quot;&gt;&lt;/d-cite&gt;. A &lt;em&gt;task-agnostic&lt;/em&gt; setting refers to when task labels are not available, which means the model does not know that the samples belong to a certain task. A &lt;em&gt;boundary-agnostic&lt;/em&gt; setting is considered, where information on task boundaries are not available. This means that the model does not know when the data distribution changes to a new task.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt; &lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Yes&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;No&lt;/strong&gt;&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Task labels&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Task-aware&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Task-agnotic&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Task boundaries&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Boundary-aware&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Boundary-agnostic&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;caption&quot;&gt;Table 2: Task labels and task boundaries. This project assumes task-agnostic and boundary-agnostic settings.&lt;/div&gt; &lt;h3 id=&quot;experience-replay-er&quot;&gt;Experience Replay (ER)&lt;/h3&gt; &lt;p&gt;In a class-incremental learning setting, the nature of the Experience Replay (ER) method aligns well with task-agnostic and boundary-agnostic settings. This is because ER focuses on replaying a subset of past experiences, which helps in maintaining knowledge of previous classes without needing explicit task labels or boundaries. This characteristic of ER allows it to adapt to new classes as they are introduced, while retaining the ability to recognize previously learned classes, making it inherently suitable for task-agnostic and boundary-agnostic continual learning scenarios.&lt;/p&gt; &lt;p&gt;Implementation-wise, ER involves randomly initializing an external memory buffer $\mathcal M$, then implementing &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;before_training_exp&lt;/code&gt; and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;after_training_exp&lt;/code&gt; callbacks to use the dataloader to create mini-batches with samples from both training stream and the memory buffer. Each mini-batch is balanced so that all tasks or experiences are equally represented in terms of stored samples&lt;d-cite key=&quot;lomonaco2021avalanche&quot;&gt;&lt;/d-cite&gt;. As ER is known be well-suited for online continual learning, it will be the go-to method used to compare performances across models of varying sizes.&lt;/p&gt; &lt;h3 id=&quot;benchmark&quot;&gt;Benchmark&lt;/h3&gt; &lt;p&gt;For this study, the SplitCIFAR-10&lt;d-cite key=&quot;lomonaco2021avalanche&quot;&gt;&lt;/d-cite&gt; is used as the main benchmark. SplitCIFAR-10 splits the popular CIFAR-10 dataset into 5 tasks with disjoint classes, each task including 2 classes each. Each task has 10,000 3×32×32 images for training and 2000 images for testing. The model is exposed to these tasks or experiences sequentially, which simulates a real-world scenario where a learning system is exposed to new categories of data over time. This is suitable for class-incremental learning scenarios. This benchmark is used for both testing online and offline continual learning in this study.&lt;/p&gt; &lt;h3 id=&quot;metrics&quot;&gt;Metrics&lt;/h3&gt; &lt;p&gt;Key metrics established in earlier work in online continual learning are used to evaluate the performance of each model.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Average Anytime Accuracy (AAA)&lt;/strong&gt; as defined in &lt;d-cite key=&quot;caccia_new_2022&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;The concept of average anytime accuracy serves as an indicator of a model’s overall performance throughout its learning phase, extending the idea of average incremental accuracy to include continuous assessment scenarios. This metric assesses the effectiveness of the model across all stages of training, rather than at a single endpoint, offering a more comprehensive view of its learning trajectory.&lt;/p&gt; \[\text{AAA} = \frac{1}{T} \sum_{t=1}^{T} (\text{AA})_t\] &lt;p&gt;&lt;strong&gt;Average Cumulative Forgetting (ACF)&lt;/strong&gt; as defined in &lt;d-cite key=&quot;soutif-cormerais_comprehensive_2023, soutifcormerais2021importance&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;This equation represents the calculation of the &lt;strong&gt;Cumulative Accuracy&lt;/strong&gt; ($b_k^t$) for task $k$ after the model has been trained up to task $t$. It computes the mean accuracy over the evaluation set $E^k_\Sigma$, which contains all instances $x$ and their true labels $y$ up to task $k$. The model’s prediction for each instance is given by $\underset{c \in C^k_\Sigma}{\text{arg max }} f^t(x)_c$, which selects the class $c$ with the highest predicted logit $f^t(x)_c$. The indicator function $1_y(\hat{y})$ outputs 1 if the prediction matches the true label, and 0 otherwise. The sum of these outputs is then averaged over the size of the evaluation set to compute the cumulative accuracy.&lt;/p&gt; \[b_k^t = \frac{1}{|E^k_\Sigma|} \sum_{(x,y) \in E^k_\Sigma} 1_y(\underset{c \in C^k_\Sigma}{\text{arg max }} f^t(x)_c)\] &lt;p&gt;From Cumulative Accuracy, we can calculate the &lt;strong&gt;Average Cumulative Forgetting&lt;/strong&gt; ($F_{\Sigma}^t$) by setting the cumulative forgetting about a previous cumulative task $k$, then averaging over all tasks learned so far:&lt;/p&gt; \[F_{\Sigma}^t = \frac{1}{t-1} \sum_{k=1}^{t-1} \max_{i=1,...,t} \left( b_k^i - b_k^t \right)\] &lt;p&gt;&lt;strong&gt;Average Accuracy (AA) and Average Forgetting (AF)&lt;/strong&gt; as defined in &lt;d-cite key=&quot;mai_online_2021&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;$a_{i,j}$ is the accuracy evaluated on the test set of task $j$ after training the network from task 1 to $i$, while $i$ is the current task being trained. Average Accuracy (AA) is computed by averaging this over the number of tasks.&lt;/p&gt; \[\text{Average Accuracy} (AA_i) = \frac{1}{i} \sum_{j=1}^{i} a_{i,j}\] &lt;p&gt;Average Forgetting measures how much a model’s performance on a previous task (task $j$) decreases after it has learned a new task (task $i$). It is calculated by comparing the highest accuracy the model $\max_{l \in {1, \ldots, k-1}} (a_{l, j})$ had on task $j$ before it learned task $k$, with the accuracy $a_{k, j}$ on task $j$ after learning task $k$.&lt;/p&gt; \[\text{Average Forgetting}(F_i) = \frac{1}{i - 1} \sum_{j=1}^{i-1} f_{i,j}\] \[f_{k,j} = \max_{l \in \{1,...,k-1\}} (a_{l,j}) - a_{k,j}, \quad \forall j &amp;lt; k\] &lt;p&gt;In the context of class-incremental learning, the concept of classical forgetting may not provide meaningful insight due to its tendency to increase as the complexity of the task grows (considering more classes within the classification problem). Therefore, &lt;d-cite key=&quot;soutif-cormerais_comprehensive_2023&quot;&gt;&lt;/d-cite&gt;recommendeds avoiding relying on classical forgetting as a metric in settings of class-incremental learning, both online and offline settings. Thus, Average Anytime Accuracy (AAA) and Average Cumulative Forgetting (ACF) are used throughout this experiment, although AA and AF are computed as part of the process.&lt;/p&gt; &lt;h3 id=&quot;model-selection&quot;&gt;Model selection&lt;/h3&gt; &lt;p&gt;To compare learning performance across varying model depths, I chose to use the popular ResNet architectures, particularly ResNet18, ResNet34, and ResNet50. As mentioned earlier in this blog, ResNets were designed to increase the performance of deeper neural networks, and their performance metrics are well known. While using custom models for more variability in sizes was a consideration, existing popular architectures were chosen for better reproducibility.&lt;/p&gt; &lt;p&gt;Moreover, while there are newer versions (i.e. ResNeXt&lt;d-cite key=&quot;xie2017aggregated&quot;&gt;&lt;/d-cite&gt;) that have shown to perform better without a huge increase in computational complexity&lt;d-cite key=&quot;Bianco_2018&quot;&gt;&lt;/d-cite&gt;, for this study the original smaller models were chosen to avoid introducing unnecessary variables. ResNet18 and ResNet34 have the basic residual network structure, and ResNet50, ResNet101, and ResNet152 use slightly modified building blocks that have 3 layers instead of 2. This ”bottleneck design” was made to reduce training time. The specifics of the design of these models are detailed in the table from the original paper by He et al.&lt;d-cite key=&quot;he2015deep&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/resnets_comparison-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/resnets_comparison-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/resnets_comparison-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-eunhae-project/resnets_comparison.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;ResNet architecture. Table from He et al. (2015)&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Moreover, in order to observe the effect of model width on performance, I also test a slim version of ResNet18 that has been used in previous works&lt;d-cite key=&quot;lopez-paz_gradient_2017&quot;&gt;&lt;/d-cite&gt;. The slim version uses fewer filters per layer, reducing the model width and computational load while keeping the original depth.&lt;/p&gt; &lt;h3 id=&quot;saliency-maps&quot;&gt;Saliency maps&lt;/h3&gt; &lt;p&gt;I use saliency maps to visualize “attention” of the networks. Saliency maps are known to be useful for understanding which parts of the input image are most influential for the model’s predictions. By visualizing the specific areas of an image that a CNN considers important for classification, saliency maps provide insights into the internal representation and decision-making process of the network&lt;d-cite key=&quot;simonyan2014deep&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h1 id=&quot;experiment&quot;&gt;Experiment&lt;/h1&gt; &lt;h3 id=&quot;the-setup&quot;&gt;The setup&lt;/h3&gt; &lt;ul&gt; &lt;li&gt;Each model was trained from scratch using the Split-CIFAR10 benchmark with 2 classes per task, for 3 epoches with a mini-batch size of 64.&lt;/li&gt; &lt;li&gt;SGD optimizer with a 0.9 momentum and 1e-5 weight decay was used. The initial learning rate is set to 0.01 and the scheduler reduces it by a factor of 0.1 every 30 epochs, as done in &lt;d-cite key=&quot;lin_clear_2022&quot;&gt;&lt;/d-cite&gt;.&lt;/li&gt; &lt;li&gt;Cross entropy loss is used as the criterion, as is common for image classification in continual learning.&lt;/li&gt; &lt;li&gt;Basic data augmentation is done on the training data to enhance model robustness and generalization by artificially expanding the dataset with varied, modified versions of the original images.&lt;/li&gt; &lt;li&gt;Each model is trained offline as well to serve as baselines.&lt;/li&gt; &lt;li&gt;Memory size of 500 is used to implement Experience Replay. This represents 1% of the training dataset.&lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;implementation&quot;&gt;Implementation&lt;/h3&gt; &lt;p&gt;The continual learning benchmark was implemented using the Avalanche framework&lt;d-cite key=&quot;lomonaco2021avalanche&quot;&gt;&lt;/d-cite&gt;, an open source continual learning library, as well as the code for online continual learning by Soutif et al.&lt;d-cite key=&quot;soutif-cormerais_comprehensive_2023&quot;&gt;&lt;/d-cite&gt;. The experiments were run on Google Colab using NVIDIA Tesla T4 GPU.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt; &lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Experiment 1&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Experiment 2&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Experiment 3&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Experiment 4&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Experiment 5&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Experiment 6&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Experiment 7&lt;/strong&gt;&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Model&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;ResNet18&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;ResNet34&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;ResNet50&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;SlimResNet18&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;ResNet18&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;ResNet34&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;ResNet50&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Strategy&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Experience Replay&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Experience Replay&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Experience Replay&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Experience Replay&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Experience Replay&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Experience Replay&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Experience Replay&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Benchmark&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;SplitCIFAR10&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;SplitCIFAR10&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;SplitCIFAR10&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;SplitCIFAR10&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;SplitCIFAR10&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;SplitCIFAR10&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;SplitCIFAR10&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Training&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Online&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Online&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Online&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Online&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Offline&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Offline&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;Offline&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;GPU&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;V100&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;T4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;A100&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;T4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;T4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;T4&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;T4&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Training time (estimate)&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;3h&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;4.5h&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;5h&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;1h&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;&amp;lt;5m&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;&amp;lt;5m&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;&amp;lt;5m&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;caption&quot;&gt;Table 3: Details of experiments conducted in this study&lt;/div&gt; &lt;h1 id=&quot;results&quot;&gt;Results&lt;/h1&gt; &lt;p&gt;Average Anytime Accuracy (AAA) decreases with model size (Chart 1), with a sharper drop from ResNet34 to ResNet50. The decrease in AAA is more significant in online learning than offline learning.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/AAA_on_off-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/AAA_on_off-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/AAA_on_off-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-eunhae-project/AAA_on_off.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Chart 1: Average Anytime Accuracy (AAA) of different sized ResNets in online and offline continual learning&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;When looking at average accuracy for validation stream for online CL setting (Chart 2), we see that the rate to which accuracy increases with each task degrade with larger models. Slim-ResNet18 shows the highest accuracy and growth trend. This could indicate that larger models are worse at generalizing to a class-incremental learning scenario.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/stream_acc1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/stream_acc1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/stream_acc1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-eunhae-project/stream_acc1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Chart 2: Validation stream accuracy (Online CL)&lt;/figcaption&gt; &lt;/figure&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th style=&quot;text-align: center&quot;&gt; &lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Average Anytime Acc (AAA)&lt;/strong&gt;&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Final Average Acc&lt;/strong&gt;&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;Slim ResNet18&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.664463&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.5364&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;ResNet18&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.610965&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.3712&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;ResNet34&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.576129&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.3568&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td style=&quot;text-align: center&quot;&gt;&lt;strong&gt;ResNet50&lt;/strong&gt;&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.459375&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.3036&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;div class=&quot;caption&quot;&gt;Table 4: Accuracy metrics across differently sized models (Online CL) &lt;/div&gt; &lt;p&gt;Now we turn to forgetting.&lt;/p&gt; &lt;p&gt;Looking at Average Cumulative Forgetting (ACF), we see that for online CL setting, ResNet34 performs the best (with a slight overlap at the end with ResNet18), and ResNet50 shows the mosts forgetting. An noticeable observation in both ACF and AF is that ResNet50 performed better initially but forgetting started to increase after a few tasks.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/forgetting_online-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/forgetting_online-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/forgetting_online-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-eunhae-project/forgetting_online.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Chart 3: forgetting curves, Online CL (Solid: Average Forgetting (AF); Dotted: Average Cumulative Forgetting (ACF))&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;However, results look different for offline CL setting. ResNet50 has the lowest Average Cumulative Forgetting (ACF) (although with a slight increase in the middle), followed by ResNet18, and finally ResNet34. This differences in forgetting between online and offline CL setting is aligned with the accuracy metrics earlier, where the performance of ResNet50 decreases more starkly in the online CL setting.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/forgetting_offline-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/forgetting_offline-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/forgetting_offline-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-eunhae-project/forgetting_offline.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Chart 4: Forgetting curves, Offline CL (Solid: Average Forgetting (AF); Dotted: Average Cumulative Forgetting (ACF))&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Visual inspection of the saliency maps revealed some interesting observations. When it comes to the ability to highlight intuitive areas of interest in the images, there seemed to be a noticeable improvement from ResNet18 to ResNet34, but this was not necessarily the case from ResNet34 to ResNet50. This phenomenon was more salient in the online CL setting.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Online&lt;/strong&gt;&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliency_online-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliency_online-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliency_online-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliency_online.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Image: Saliency map visualizations for Online CL&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;&lt;strong&gt;Offline&lt;/strong&gt;&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliency_offline-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliency_offline-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliency_offline-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliency_offline.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Image: Saliency map visualization for Offline CL&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Interestingly, Slim-ResNet18 seems to be doing better than most of them, certainly better than its plain counterpart ResNet18. A further exploration of model width on performance and representation quality would be an interesting avenue of research.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Slim-ResNet18&lt;/strong&gt;&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-eunhae-project/saliencymap_exp4.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Image: Saliency map visualization (Slim ResNet18)&lt;/figcaption&gt; &lt;/figure&gt; &lt;h1 id=&quot;discussion&quot;&gt;Discussion&lt;/h1&gt; &lt;p&gt;In this study, I compared key accuracy and forgetting metrics in online continual learning across ResNets of different depths and width, as well as brief qualitative inspection of the models’ internal representation. These results show that larger models do not necessary lead to better continual learning performance. We saw that Average Anytime Accuracy (AAA) and stream accuracy dropped progressively with model size, hinting that larger models struggle to generalize to newly trained tasks, especially in an online CL setting. Forgetting curves showed similar trends but with more nuance; larger models perform well at first but suffer from increased forgetting with more incoming tasks. Interestingly, the problem was not as pronounced in the offline CL setting, which highlights the challenges of training models in a more realistic, online continual learning context.&lt;/p&gt; &lt;p&gt;Why do larger models perform worse at continual learning? One of the reasons is that larger models tend to have more parameters, which might make it harder to maintain stability in the learned features as new data is introduced. This makes them more prone to overfitting and forgetting previously learned information, reducing their ability to generalize.&lt;/p&gt; &lt;p&gt;Building on this work, future research could investigate the impact of model size on CL performance by exploring the following questions:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Do pre-trained larger models (vs trained-from-scratch models) generalize better in continual learning settings?&lt;/li&gt; &lt;li&gt;Do longer training improve relatively performance of larger models in CL setting?&lt;/li&gt; &lt;li&gt;Can different CL strategies (other than Experience Replay) mitigate the degradation of performance in larger models?&lt;/li&gt; &lt;li&gt;Do slimmer versions of existing models always perform better?&lt;/li&gt; &lt;li&gt;How might different hyperparameters (i.e. learning rate) impact CL performance of larger models?&lt;/li&gt; &lt;/ul&gt; &lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt; &lt;p&gt;To conclude, this study has empirically explored the role of model size on performance in the context of online continual learning. Specifically, it has shown that model size matters when it comes to continual learning and forgetting, albeit in nuanced ways. These findings contribute to the ongoing discussions on the role of the scale of deep learning models on performance and have implications for future area of research.&lt;/p&gt; </content> </entry> <entry> <title>VGAE Clustering of the Fruit Fly Connectome</title> <link href="https://deep-learning-mit.github.io/blog/2023/deep-connectome-clustering/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/deep-connectome-clustering</id> <content type="html">&lt;h2 id=&quot;motivation&quot;&gt;Motivation&lt;/h2&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; The fruit fly connectome.&lt;d-cite key=&quot;winding2023connectome&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;Everything you’ve ever learned, every memory you have, and every behavior that defines you is stored somewhere in the neurons and synapses of your big, beautiful brain. The emerging field of connectomics seeks to build connectomes–or neuron graphs–that map the connections between all neurons in the brains of increasingly complex animals, with the goal of leveraging graph structure to gain insights into the functions of specific neurons, and eventually the behaviors that emerge from their interactions. This, as you can imagine, is quite a difficult task, but progress over the last few years has been promising.&lt;/p&gt; &lt;p&gt;Now, you might be asking yourself, can you really predict the functions of neurons based on their neighbors in the connectome? A paper published by Yan et al. in 2017&lt;d-cite key=&quot;yan2017network&quot;&gt;&lt;/d-cite&gt; asked this same question, searching for an answer in a roundworm (C. elegans) connectome. In their investigation, they discovered a neuron whose behavior had not been previously characterized, which they hypothesized was necessary for locomotion. They tested this hypothesis by ablating the neuron on a living C. elegans, and to the dismay of that poor roundworm, found that it was indeed necessary.&lt;/p&gt; &lt;p&gt;Although impressive, the C. elegans connectome has only ~300 neurons, compared with the ~100,000,000,000 in the human brain; however, this year (2023):&lt;/p&gt; &lt;ol&gt; &lt;li&gt;A paper by Winding et al.&lt;d-cite key=&quot;winding2023connectome&quot;&gt;&lt;/d-cite&gt; has published the entire connectome of a fruit fly larvae, identifying 3016 neurons and their 548,000 synapses.&lt;/li&gt; &lt;li&gt;Google Research has announced an effort to map a mouse brain (~100,000,000 neurons)&lt;d-cite key=&quot;januszewski2023google&quot;&gt;&lt;/d-cite&gt;&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;This is exciting because the fruit fly dataset presents an opportunity to identify more nuanced functions of neurons that may be present in more complex species like mice, but not in simpler species like the roundworm. This creates the requirement for algorithms that are &lt;strong&gt;sufficiently expressive&lt;/strong&gt; and able to disentangle the similarities between neurons that appear different, but are functionally similar.&lt;/p&gt; &lt;p&gt;Furthermore, current efforts to map connectomes of increasingly complex animals makes it desirable to have algorithms that are &lt;strong&gt;able to scale&lt;/strong&gt; and handle that additional complexity, with the hopes of one day discovering the algorithms that give rise to consciousness.&lt;/p&gt; &lt;h2 id=&quot;background&quot;&gt;Background&lt;/h2&gt; &lt;h3 id=&quot;can-we-learn-about-human-brains-by-studying-connectomes-of-simpler-organisms&quot;&gt;Can we learn about human brains by studying connectomes of simpler organisms?&lt;/h3&gt; &lt;p&gt;The primate brain exhibits a surprising degree of specialization, particularly for social objects. For instance, neurons in the face fusiform area (FFA) in the IT cortex appear to fire only in response to faces. Furthermore, individuals with lesions in or brain damage to this area lose their ability to recognize faces &lt;d-cite key=&quot;kanwisher_fusiform_2006&quot;&gt;&lt;/d-cite&gt;. In fact, there is even evidence of rudimentary face perception even in newborn infants with limited access to visual “training data,” who preferentially look at photos of faces, and other face-like arrangements, like inverted triangles (two vertices being the eyes and the third the mouth) &lt;d-cite key=&quot;otsuka_face_2014&quot;&gt;&lt;/d-cite&gt;. While there may not exist a grandmother cell that can recognize your grandmother, there certainly seems to be at least some engineered specialization in the brain. Cognitive scientists theorize that there is a set of core systems for representing object, actions, number, space, and conspecifics (other people!), together constituting what we might call “common sense,” which may help determine the blueprint of the human brain down to the genetic level &lt;d-cite key=&quot;spelke_core_2007&quot;&gt;&lt;/d-cite&gt;. Notably, facial recognition exhibits substantial genetic heritability (over 60%!) and appears to be uncorrelated with general intelligence &lt;d-cite key=&quot;shakeshaft_genetic_2015&quot;&gt;&lt;/d-cite&gt;. We might imagine that there are a set of capabilities, including social cognition, that were so critical for human behavior that our brains evolved over hundreds of thousands of years to “hard code” certain structures, like the FFA, to help scaffold them. After all, another person’s face is an important signal for processes like mate selection, friendship formation, and theory of mind. The human brain and the cognitive processes it supports are evolutionary products. And even more importantly, the brain seems to be specialized in some ways, but behave flexibly in others. Through the scientific process, how good of an understanding can we reach about the complex organ sitting between our ears? To what degree are the neuronal assemblages in our brain specialized? How do the communications amongst these neurons grant us our incredible cognitive capabilities?&lt;/p&gt; &lt;p&gt;In 1982, neuroscientist David Marr proposed three levels of analyses to study complex systems like the human mind: the computational level (what task is the system designed to solve?), the algorithmic level (how does the system solve it?), and the implementation level (where and how is the algorithm implemented in the system hardware?) &lt;d-cite key=&quot;mcclamrock_marrs_1991&quot;&gt;&lt;/d-cite&gt;. At one end of the spectrum, we might think about characterizing the computational capabilities of human cognition, like object recognition. On the other end, we might be interested in how object recognition is implemented in the brain itself, in all of its fleshy glory–how an incoming visual signal is processed by composites of receptive fields in the retina (biological “Gabor filters”) and fed to neurons in the primary and secondary visual areas of the cerebral cortex, for instance &lt;d-cite key=&quot;leeds_comparing_2013&quot;&gt;&lt;/d-cite&gt;. In recent years, scientists have developed an interest in understanding the implementation level at an extremely high resolution by charting the connectome–the comprehensive map of all neural connections in the brain. However, if the grandmother cell is too simplistic of a model for knowledge representation in the human brain, then indeed the human connectome may offer an overly complex view. It seems easy to get lost in the wilderness of its approximately 100 trillion neurons and the nearly quadrillion synapses which connect them &lt;d-cite key=&quot;sporns_human_2005&quot;&gt;&lt;/d-cite&gt;! How can we begin to approach this overwhelming terra incognita?&lt;/p&gt; &lt;p&gt;We might consider instead studying the connectome of a much simpler model organism, like the transparent 1mm-long nematode Caenorhabditis elegans, with whom we share an estimated 20-71% of our genes with &lt;d-cite key=&quot;lai_identification_2000&quot;&gt;&lt;/d-cite&gt;. Or, maybe even the fruit fly Drosophila melanogaster, 60% of whose genes can also be found in the human genome (Max Planck). Even the study of such model organisms necessitates adding structure to complex, often unlabeled, relational data. And while the fruit fly brain is orders of magnitude less complex than our own, there are still over 3,000 neurons and half a million synapses to explore &lt;d-cite key=&quot;winding2023connectome&quot;&gt;&lt;/d-cite&gt;(Winding et al., 2023). Luckily, mankind’s toolkit for studying graph-like data is well-equipped.&lt;/p&gt; &lt;h3 id=&quot;unsupervised-graph-representation-learning&quot;&gt;Unsupervised graph representation learning&lt;/h3&gt; &lt;p&gt;The problem of subdividing neurons in a connectome into types based on their synaptic connectivity is a problem of unsupervised graph representation learning, which seeks to find a low-dimensional embedding of nodes in a graph such that similar neurons are close together in the embedding space.&lt;/p&gt; &lt;p&gt;A common way to identify functional clusters of neurons is through the lens of homophily, meaning that neurons serve the same function if they are within the same densely connected cluster in the connectome; however, this fails to capture the likely case that neurons with similar low-level functions span across many regions of the brain&lt;d-cite key=&quot;winding2023connectome&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Instead, a better approach might be to cluster neurons based on their structural equivalence, such that groups of neurons with similar subgraph structures are embedded similarly, regardless of their absolute location in the connectome. This is the approach taken by Winding et al.&lt;d-cite key=&quot;winding2023connectome&quot;&gt;&lt;/d-cite&gt;, who “used graph spectral embedding to hierarchically cluster neurons based on synaptic connectivity into 93 neuron types”. They found that even though they used only information about the graph structure to predict functions, neurons in the same clusters ended up sharing other similarities, including morphology and known function in some cases.&lt;/p&gt; &lt;p&gt;Spectral embedding is a popular and general machine learning approach that uses spectral decomposition to perform a nonlinear dimensionality reduction of a graph dataset, and works well in practice. Deep learning, however, appears to be particularly well suited to identifying better representations in the field of biology (e.g., AlphaFold2&lt;d-cite key=&quot;jumper2021highly&quot;&gt;&lt;/d-cite&gt;), and deep learning methods do appear to be capable of creating embeddings that more effectively preserve the topology of nodes in graphs&lt;d-cite key=&quot;zhu2023unsupervised&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;kipf2016variational&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Learned VGAE graph embedding for Cora citation network dataset.&lt;d-cite key=&quot;kipf2016variational&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;p&gt;Thus, it stands to reason that deep learning might offer more insights into the functions of neurons in the fruit fly connectome, or at the very least, that exploring the differences between the spectral embedding found by Winding et al. and the embeddings discovered by deep learning methods might provide intuition as to how the methods differ on real datasets.&lt;/p&gt; &lt;p&gt;In this project, we explore the differences between functional neuron clusters in the fruit fly connectome identified via spectral embedding by Winding et al. and deep learning. Specifically, we are interested in exploring how spectral embedding clusters differ from embeddings learned by Variational Graph Auto-Encooders (GVAE)&lt;d-cite key=&quot;kipf2016variational&quot;&gt;&lt;/d-cite&gt;, which are a more recent architecture proposed by one of the co-authors of the Variational Auto-Encoders (VAE) paper&lt;d-cite key=&quot;kingma2013auto&quot;&gt;&lt;/d-cite&gt;, Max Welling. GVAEs are an interesting intersection of graph neural networks (GNNs) and VAEs, both of which we explored in class, and comparing this technique to spectral embedding is relevant because of our previous discussions of spectral decomposition in class with respect to network scalability and RNN weights.&lt;/p&gt; &lt;p&gt;We hypothesize that a deep learning technique would be better suited to learning graph embeddings of connectomes because they are able to incorporate additional information about neurons (such as the neurotransmitters released at synapses between neurons) and are able to learn a nonlinear embedding space that more accurately represents the topological structure of that particular connectome, learning to weight the connections between some neurons above others.&lt;/p&gt; &lt;p&gt;Before we can discuss the experiments, however, we first provide more detail for Spectral Embedding and Graph Variational Autoencoders and compare the two methods.&lt;/p&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;h3 id=&quot;spectral-embedding&quot;&gt;Spectral Embedding&lt;/h3&gt; &lt;p&gt;One classical approach for understanding graph-like data comes from a class of spectral methods which use pairwise distance measures between data points to embed and cluster data. Spectral methods offer two obvious advantages when compared to other machine learning approaches. One, we can straightforwardly perform clustering for datasets which are inherently relational, like the connectome, where it is not immediately clear how a method like k-means can be used when we only have access to the relationships between data points (the “edges”) and not the node-level features themselves. Two, spectral methods are &lt;strong&gt;nonlinear&lt;/strong&gt;, and don’t rely on measures like squared Euclidean distance, which can be misleading for data which are tangled in high dimensions, but which exhibit a lower &lt;strong&gt;intrinsic&lt;/strong&gt; dimensionality.&lt;/p&gt; &lt;p&gt;So, how does spectral embedding work, exactly? In short, an adjacency matrix is first calculated from the original dataset, which is then used to compute the graph Laplacian. Next, a normalized graph Laplacian is then eigen-decomposed and generates a lower dimensional embedding space on which simpler linear clustering algorithms, like k-means, can be used to identify untangled clusters of the original data.&lt;/p&gt; &lt;p&gt;This class of methods makes no assumptions about the data (including cluster shape) and can be adjusted to be less noise sensitive–for example, by performing a t-step random walk across the affinity matrix for the data, as in diffusion mapping &lt;d-cite key=&quot;coifman_geometric_2005&quot;&gt;&lt;/d-cite&gt;. An added benefit is that under the hood, spectral embedding can be performed by a series of linear algebra calculations, making it extremely time-efficient. However, as with many unsupervised learning methods, clustering based on spectral embeddings is difficult to scale–in our case, due to the eigen-decomposition step of the graph Laplacian.&lt;/p&gt; &lt;h3 id=&quot;variational-graph-autoencoders&quot;&gt;Variational Graph Autoencoders&lt;/h3&gt; &lt;p&gt;Although Spectral Embedding is still very popular, in recent years, more attention has been paid to the burgeoning field of geometric deep learning, a set of ideas which aim to to solve prediction or embedding tasks by taking into account the relational structure between data points. One example is the variational graph auto-encoder (VGAE), which learns to embed a complex object like a network into a low-dimensional, well-behaved latent space. Kipf and Welling (2016)&lt;d-cite key=&quot;kipf2016variational&quot;&gt;&lt;/d-cite&gt; propose an encoder using a two-layer graph convolutional network, which performs convolutions across local subgraphs of the input network data (not unlike convolution on images, where the graph is a grid!). The graph is projected onto a low dimensional space distributed according to the standard normal through the optimization of a variational lower bound loss, and then upsampled using an inner product between latent variables. They show that this method achieves competitive results on a link prediction task when compared to other methods like spectral clustering and DeepWalk, a random walk-based representation learning algorithm.&lt;/p&gt; &lt;p&gt;On the other hand, some have discovered that spectral embedding leads to more clear separability in low dimensional representation spaces for text data compared to GNN approaches like node2vec, which reportedly achieve state-of-the-art (sota) scores for multilabel classification and link prediction in other datasets &lt;d-cite key=&quot;grover_node2vec_2016&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;palamuttam_evaluating_nodate&quot;&gt;&lt;/d-cite&gt;. In addition, it appears that simple modifications like performing an error correlation correction on the training data and smoothing predictions on the test data for GNN-free architectures lead to sota-comparable performances &lt;d-cite key=&quot;huang_combining_2020&quot;&gt;&lt;/d-cite&gt;. There are even concerns that the performance of geometric deep learning approaches are inflated, particularly in tasks like multi-label node classification, due to the assumption that the number of labels for test data are known to researchers &lt;d-cite key=&quot;lin_use_2021&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Thus, it remains unclear in what circumstances relatively novel geometric deep learning approaches do better compared to established and widely-explored methods like spectral learning, and particularly for novel data like the connectome. In this work, we attempt to gain deeper insights into which method is moroe well-suited to the task of connectome modeling, with the hope of learning about which method should be implemented in future connectomes, such as that of the mouse and eventually the human.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/background_visual-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/background_visual-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/background_visual-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/background_visual.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Spectral Clustering (von Luxburg, 2007; Park, Jeon, &amp;amp; Pedryc, 2014) vs (V)GAEs (Kipf &amp;amp; Welling, 2016): A Story in Pictures &lt;/div&gt; &lt;h2 id=&quot;experiments&quot;&gt;Experiments&lt;/h2&gt; &lt;p&gt;Now that we have a good idea of how these methods compare to each other in terms of implementation, we explore them from an experimental perspective. Through our experiments, we try to quantitatively and qualitatively address the question of how connectome clusters learned by GVAE compare to the spectral clusters found in the paper. To answer this question, we make use of the fruit fly connectome adjacency matrix provided by Winding et al. as our primary dataset with the hope of answering this question for our readers.&lt;/p&gt; &lt;h3 id=&quot;experiment-1-link-prediction&quot;&gt;Experiment 1: Link Prediction&lt;/h3&gt; &lt;p&gt;One common way to compare unsupervised graph representation learning algorithms is through a link prediction task, where a model is trained on a subset of the edges of a graph, and then must correctly predict the existence (or non-existence) of edges provided in a test set. If the model has learned a good, compressed representation of the underlying graph data structure, then it will be able to accurately predict both where missing test edges belong, and where they do not.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; A link prediction task. Green lines correspond to the training data, which contains samples of positive samples of edges that are present in the graph, and negative samples of edges that are not present in the graph. The test set in red corresponds to the remainder of positive and negative samples in the graph. &lt;/div&gt; &lt;p&gt;We evaluate the models by computing the area under curve (AUC) of the ROC curve, which plots the true positive rate against the false positive rate. A completely random classifier that does not learn anything about the underlying graph structure would get an AUC of 0.5, while a perfect classifier would have an area of 1.0.&lt;/p&gt; &lt;p&gt;Another metric we use to evaluate how good the models are is average precision (AP) of the precision-recall curve, which describes the consistency of the model.&lt;/p&gt; &lt;p&gt;In addition to comparing the models with these metrics, we also explore how robust they are to decreasing dimensionalities of the latent space. We hypothesize that if a model is able to maintain high AUC and AP, even at very low-dimensional embedding spaces, then it is likely better at capturing the structure of the connectome and is more likely to be able to scale to larger datasets, like that of the human brain one day.&lt;/p&gt; &lt;p&gt;Running this experiment yields the following curves, where the x-axis shows the dimensionality of the latent space, and the y-axis shows the AUCs and APs of the respective models.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;From this experiment, we find that both the Graph Autoencoder (GAE) and Variational Graph Autoencoder (VGAE) perform better than Spectral Embedding methods in terms of AUC and AP, indicating that the models might be better suited to capturing the nuances in the fruit fly connectome. At the dimensionality used for spectral embedding in Winding et al., d=24, we find that the models have comparable performance, but as we reduce the dimensionality of the learned embedding, the spectral embedding method quickly breaks down and loses its ability to capture significant features in the data, with an AUC of 0.52 at a dimensionality of 2. Since a score of 0.5 corresponds to a random model, this means that the spectral embedding method is no longer able to capture any meaningful structure in the data at that dimensionality. Winding et al. gets around this by only using spectral embedding to get a latent space of size 24, and then performing a hierarchical clustering algorithm inspired by Gaussian Mixture Models, but the simplicity and robustness of the GAE model seems to show that they may be better suited to modeling the types of functional neurons present in the connectomes of animals.&lt;/p&gt; &lt;h3 id=&quot;experiment-2-gvae-latent-exploration&quot;&gt;Experiment 2: GVAE Latent Exploration&lt;/h3&gt; &lt;p&gt;Although the link-prediction experiment gives us a quantitative comparison of the models, we also believe it is important to explore the latent embeddings learned by GAE to see how they qualitatively compare with the learned embeddings used in the Winding et al. work. After observing that the GAE was robust to a latent space of size 2, we decided to look specifically at if there were any similarities between the clusters found by the GAE with the 2-d embedding and the level 7 clusters published by Winding et. al. Also, although the GAE showed better overall performance, we decided to specifically explore the Variational GAE because we expect it to have a latent manifold similar to that of the Variational Autoencoders.&lt;/p&gt; &lt;p&gt;To this end, we first trained a Variational GAE with a 2-d latent space on the full fruit fly connectome and extracted the latent embedding of each node in the connectome.&lt;/p&gt; &lt;p&gt;With this latent embedding, we first visualized the latent space using colors corresponding to the 93 clusters identified by Winding et al. Clusters of the same color in the learned GAE latent space mean that the VGAE identified the same cluster that was identified in the Winding et. al. paper and areas where there are many colors within a cluster mean that GAE found a different cluster compared to spectral embedding.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Coloring the GVAE latent space by the found level 7 clusters Winding et al. Black points correspond to neurons that were not assigned a cluster by Winding et al. &lt;/div&gt; &lt;p&gt;As seen in the figure above, we find that while VGAE projects directly to a 2-d latent space without any additional clustering to reduce the dimensionality, the learned embedding still shares many similarities with the spectral embedding down to a dimensionality of 24 followed by Gaussian Mixture Model hierarchical clustering. Therefore, using VGAE to learn a direct 2-d latent space still captures much of the same information that a more complex machine learning algorithm like spectral embedding is able to.&lt;/p&gt; &lt;p&gt;We further explored the learned latent space by looking at whether the learned embedding had any correlation with the cell types identified in the fruit fly larvae connectome. Since the VGAE only had information about the structure of the graph embedding, clusters of similar colors in this figure mean that the cell type within the cluster shared a lot of common structures, like potentially the same degree or being connected to similar types of up or downstream neurons.&lt;/p&gt; &lt;p&gt;We use the same color palette as the Winding et al. paper so that cell types in the level 7 clusters of the Winding et al. paper can be directly compared to the learned VGAE latent embedding.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Coloring the Winding et al. level 7 clusters (left) and GVAE latent space (right) by cell types. This information was not provided to either algorithm during training, so clusters of the same cell type mean that its type can be inferred from structure only. &lt;/div&gt; &lt;p&gt;As seen in the figure above, both spectral embedding and GVAE latent spaces capture knowledge about the cell types when trained purely on the graph structure. We believe this is because cells of this type have similar properties in terms of the types of neighboring neurons they connect to in the connectome, and they may also have special properties like higher degree of connections.&lt;/p&gt; &lt;p&gt;In particular, it is interesting that sensory neurons and Kenyon cells are very well captured by both embeddings, and that MBIN cells and sensory neurons are clustered together by both their spectral embedding algorithm and VGAE.&lt;/p&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;Our preliminary investigations show that deep learning algorithms such as Graph Autoencoders (GAEs) and Variational Graph Autoencoders (VGAEs) are able to capture at least as much nuance and information about function as spectral embedding algorithms. In addition, they come with the following advangates:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;With their current implementation, they can easily be run on a GPU, while common spectral embedding algorithms in libraries such as scikit learn are only designed to work on CPUs. Since we take a deep learning approach, our GNN method can use batches optimized via Adam, while spectral embedding only works if the entire adjacency matrix fits in memoruy. This makes deep learning methods &lt;strong&gt;better able to scale to larger datasets&lt;/strong&gt; such as the mouse connectome that may come in the next few years.&lt;/li&gt; &lt;li&gt;As shown in experiment 2, GAEs and Variational GAEs are &lt;strong&gt;able to directly learn a robust embedding into a 2-d space&lt;/strong&gt; without any additional clustering, making interpretation easy and fast. We suspect that because of its higher performance at embedding connectomes to such low dimensions compared to spectral embedding which performs only marginally better than a random algorithm at such low dimensions, VGAEs must be capturing some addiitonal nuance of the graph structures that spectral embedding is simply not able to encode.&lt;/li&gt; &lt;li&gt;Comparing the 2-d embeddings of VGAE to the clustered 24-d spectral embeddings found in Winding et al. we find that even when compressing to such a low-dimensional space, the semantic information captured does in fact match that of spectral embedding at a higher dimensional space. Coloring by cell type shows that it also &lt;strong&gt;captures information about the function of neurons&lt;/strong&gt;, with similar neuron types being clustered together even when they are located all over the brain, such as Kenyon cells. Cells of the same type likely serve simlar functions, so in this respect, VGAE is able to capture information about the function of cells using only knowledge of the graph structure.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;However, VGAE does not come without its &lt;strong&gt;limitations&lt;/strong&gt;. One large limitation we found while implementing the architecture is that it currently requires graphs to be &lt;strong&gt;undirected&lt;/strong&gt;, so we had to remove information about the direction of neurons for this work. Connectomes are inherently directed, so we likely missed some key information about the function of graphs by removing this directional nature of the connectome. Although this is not explored in our work, one simple way to fix this would be to add features to each node corresponding to the in-degree and out-degree of each neuron.&lt;/p&gt; &lt;p&gt;This brings us to the another limitation of our study, which is that we did not explore &lt;strong&gt;adding features to neurons&lt;/strong&gt; in our connectome with the VGAE algorithm. Past work on GAEs has shown that adding features leads to better model results &lt;d-cite key=&quot;kipf2016variational&quot;&gt;&lt;/d-cite&gt; and makes the model better able to capture relevant structures in the data. We did not feel that would be a fair comparison with Winding et al. because spectral embedding methods are not able to include additional features related to nodes that one would get for free when mapping the connectome, but we believe that including these features in the GAE structure would lead to an even better representation of the underlying dataset. Examples of these “free” features we could get that would help us predict functions of neurons include 1) the hemisphere the cell belongs to (e.g., not in fruit flies, but neurons in the left brain of humans correspond to language), 2) the axon I/O ratio, and the dendrite output-input ratio of a neuron.&lt;/p&gt; &lt;p&gt;One final limiation is that our &lt;strong&gt;model only trains on a single connectome&lt;/strong&gt;. This means that we aren’t able to capture the variation of connectomes within a species. Maybe one day, we will be able to scan connectomes of people in the same way that we are able to scan genomes of people, but that day is likely still far away. We might be able to help this by using the generative compoment of the VGAE to create brains that are physically feasible given the structure of a single connectome, but it would be hard to test. Since we are currently only looking at the connectome of a single species, we likely aren’t capturing an embedding space that finds functionally similar neurons in different animals such as C. elegans, which we may be able to do in future work.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;In this work, we asked if Deep Learning techniques like Variational Graph Autoencoders could learn something about the functions of cells in a connectome using only the graph structure. We found that VGAE did in fact capture relevant structures of the graph, even in the undirected case. It performed similarly to spectral embeding, even when embedding directly into a visualizable 2-d latent space. In the future, we may be able to learn about neurons that serve the same purpose across species, or learn about the underlying low level syntactic structures like for-loops or data types that our brain uses to encode consciousness, vision, and more.&lt;/p&gt; </content> </entry> <entry> <title>Robust Image to Video Generation Using Contrastive Diffusion Over Latents</title> <link href="https://deep-learning-mit.github.io/blog/2023/contrastivediffusion-image2video/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/contrastivediffusion-image2video</id> <content type="html">&lt;h2 id=&quot;introduction-and-motivation&quot;&gt;Introduction and Motivation&lt;/h2&gt; &lt;p&gt;With recent advances in computer vision and generative AI, we all have observed the various feats that diffusive models have achieved in conditional image generation. These models have demonstrated unparalleled ability in creativity, fidelity, and relevance when generating images from text prompts. Given this explosive success of diffusion for the task of image generation, the idea of applying the same concepts to conditional video generation seems like a logical follow-up. Yet, the field still lacks robust and compelling methods for conditional video generation with diffusion models. This raises the question: why might this be? Or perhaps a follow-up: what makes videos so hard in comparison to images?&lt;/p&gt; &lt;p&gt;In an attempt to address our first question, if we take a brief dive into previous literature, we will find that the issue is not a lack of effort. Ho et al. &lt;d-cite key=&quot;ho2022video&quot;&gt;&lt;/d-cite&gt;, Zhang et al. &lt;d-cite key=&quot;2023i2vgenxl&quot;&gt;&lt;/d-cite&gt;, and Chen et al. &lt;d-cite key=&quot;chen2023videocrafter1&quot;&gt;&lt;/d-cite&gt;, all explore this idea, yet the results from these methods are not nearly as exciting as the results we see in images. But why is this?&lt;/p&gt; &lt;p&gt;Perhaps the answer lies in the solution to our second question. One of the most obvious complexities that videos have over images is also perhaps one of the most difficult: the temporal dependence between frames. But why is this relationship so hard for diffusion models? Following the work of Zhu et al. &lt;d-cite key=&quot;zhu2022discrete&quot;&gt;&lt;/d-cite&gt;, we hypothesize that this is because the implicit learning of diffusive steps between images in a video is too complex of a problem for a diffusion model; relying on the model to learn the implicit relationship between representations of video frames is infeasible from a training and convergence standpoint. If we can instead learn diffusive steps over a more regularized learned latent space, the optimization problem can be greatly simplified and the diffusion model will in theory be more robust.&lt;/p&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;p&gt;Taking a step back to examine the current state of research, we find that current image-to-video frameworks typically still use a traditional diffusion architecture, going straight from text and image representations to an output image. However, this naive approach struggles with serious issues like frame clipping and loss of contextual information, which is expected since noise-based sampling can easily throw off the output of individual frames.&lt;/p&gt; &lt;p&gt;Hence, Ho et al. in 2022 proposed the first solution, supplementing conditional sampling for generation with an adjusted denoising model that directly forces image latents to be more similar to the corresponding text latents &lt;d-cite key=&quot;ho2022video&quot;&gt;&lt;/d-cite&gt;. While this achieved improved results over the straightforward diffusion approach, this often forces the model to stick too closely to the text latent, resulting in incoherent videos.&lt;/p&gt; &lt;p&gt;To solve this issue, two recent approaches from Chen et al. and Zhang et al. have proposed methods to augment the video diffusion models themselves. Chen et al. uses the image encodings from CLIP-like language embeddings in an encoder-decoder language model, feeding the CLIP encodings at each step into a cross-attention layer that generates attention scores with the current video generation &lt;d-cite key=&quot;chen2023videocrafter1&quot;&gt;&lt;/d-cite&gt;. In doing so, additional coherence between frames is achieved. On the other hand, Zhang et al. use multiple encoders, with CLIP and VQ-GAN concatenated before two stages of diffusion model training, which they claim provides the hierarchical learning required to learn the temporal processing &lt;d-cite key=&quot;2023i2vgenxl&quot;&gt;&lt;/d-cite&gt;. However, both these models are extremely data-heavy and still suffer from hallucination and frame skipping.&lt;/p&gt; &lt;p&gt;To remedy these issues in diffusion models, Ouyang et al. and Zhu et al. posit that the implicit representation learning objective in diffusion models is the primary cause of the slow convergence and hallucination issues. Specifically, diffusion models do not directly compare their output to their input, as in contrastive models, instead performing a variational approximation of the negative log-likelihood loss over the full Markov chain. Instead, Ouyang and Zhu propose to train the diffusion model to output a structured latent in the latent space of a contrastive model like a VQ-VAE, which then reconstructs the output image &lt;d-cite key=&quot;zhu2022discrete&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;ouyang2023improving&quot;&gt;&lt;/d-cite&gt;. In doing so, a contrastive term can be added to the loss of the diffusion model, maximizing the mutual information between the structured (output) latent and input latent, leading to stronger correlations between input and output, and hence improved convergence. Hence, this approach seems to have potential in fixing the hallucination and coherence issues in video diffusion models, without the need for added complexity.&lt;/p&gt; &lt;h2 id=&quot;proposed-project-outline&quot;&gt;Proposed Project Outline&lt;/h2&gt; &lt;p&gt;Thus, we propose a novel method for conditional video generation (generating videos given a starting frame and text description) by utilizing an autoencoder framework and contrastive loss to train a regularized latent space in which a diffusion model can operate. Following the line of thought introduced above, we hypothesize that under such a formulation, the diffusion model is much more robust to temporal inconsistency, because of the regularity in the latent space. For example, if we imagine a highly regularized latent space, we will find all logical next frames for a given anchor frame clustered very closely around the anchor in this latent space. Therefore, any step the diffusion model takes would produce valid subsequent frames; it suffices simply for the model to learn which direction to go given the conditioned text prompt.&lt;/p&gt; &lt;p&gt;With this in mind, we detail the construction of the model by describing its components as follows:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;An encoder for image data is used to map a given video frame into our latent space&lt;/li&gt; &lt;li&gt;An encoder for text data is used to map a given video description into our latent space&lt;/li&gt; &lt;li&gt;A diffusion-based model operates within the latent space, diffusing between different vectors within this latent space.&lt;/li&gt; &lt;li&gt;A decoder is used to generate images from vectors in this latent space.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;The training process of such a model will involve the optimization of a diffusion/contrastive loss based on a given pair of adjacent video frames, as well as the corresponding text description for that video. We define a training step to involve the following:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Both video frames and the text description are encoded into our latent space.&lt;/li&gt; &lt;li&gt;One iteration of our diffusive model is run by diffusing from the latent vector corresponding to our earlier frame conditioned on our text prompt latent to obtain a new latent vector.&lt;/li&gt; &lt;li&gt;This new latent vector after cross-attention is passed through the decoder to obtain our predicted subsequent frame.&lt;/li&gt; &lt;li&gt;We then optimize our model according to the contrastive diffusion model loss presented by &lt;d-cite key=&quot;zhu2022discrete&quot;&gt;&lt;/d-cite&gt; with a key alteration: we replace their contrastive loss with our contrastive loss, which contains two terms: &lt;ol&gt; &lt;li&gt;a term that aims to push our two adjacent video frames closer together in our latent space and&lt;/li&gt; &lt;li&gt;a term that aims to push video frames closer to the text description in our latent space.&lt;/li&gt; &lt;/ol&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;During inference, we generate a video through the following process:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;An initial frame and the text description are encoded into our latent space&lt;/li&gt; &lt;li&gt;We run an arbitrary number of diffusive steps, generating a latent at each step.&lt;/li&gt; &lt;li&gt;We decode the latent at each time step to obtain our video frame at that time step; stringing these frames together produces our video.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;From a more theoretical perspective, this method essentially aims to restrict the diffusion model’s flexibility to paths within a highly regularized, lower dimensional latent space, as opposed to the entire space of images that classical diffusion-based approaches can diffuse over. Such a restriction makes it much harder for the diffusion model to produce non-sensible output; the development of such a method would therefore enable the robust generation of highly temporally consistent and thus smooth videos. We also imagine the value of producing such a latent space itself. An interesting exercise, for example, is taking an arbitrary continuous path along vectors within a perfectly regular latent space to obtain sensible videos at arbitrary framerates.&lt;/p&gt; &lt;h2 id=&quot;evaluation&quot;&gt;Evaluation&lt;/h2&gt; &lt;p&gt;There are two axes along which we wish to evaluate our model: quality of generation, and quality of the contrastive latent space.&lt;/p&gt; &lt;h3 id=&quot;generation-quality&quot;&gt;Generation Quality&lt;/h3&gt; &lt;p&gt;To measure generation quality, we follow the approach presented by Ho et al., evaluating famous metrics like the FID, FVD, and IS scores. For all of these metrics, we expect to evaluate them throughout the video from beginning to end, with the level of preservation of metric values throughout a video indicating consistent video quality. Similarly, we will compare our models to those of similar size using the same metrics to evaluate whether adding the contrastive loss term truly improves generation quality. These metrics will be supplemented with qualitative human analyses, where we will score the videos on a variety of axes including coherence and relevance to the prompt.&lt;/p&gt; &lt;h3 id=&quot;use-of-contrastive-latent-space&quot;&gt;Use of Contrastive Latent Space&lt;/h3&gt; &lt;p&gt;Given that the diffusion model now maps to a much smaller latent space when compared to the whole space of output images, we believe that the diffusion output should have interpretable representations in the latent space. Hence, we will begin by exploring the latents generated by different text prompts, clustering them around the image source encodings to evaluate if the contrastive loss has truly clustered appropriately. On top of that, we plan to visualize the trajectories of videos for both the training set and our generations, to evaluate our theory of continuous trajectory evolution in the latent space.&lt;/p&gt; &lt;h2 id=&quot;implementationdeliverables&quot;&gt;Implementation/Deliverables&lt;/h2&gt; &lt;p&gt;The implementation of such a method can be greatly simplified through the use of an existing codebase. We plan on using the contrastive diffusion model &lt;a href=&quot;https://github.com/L-YeZhu/CDCD/tree/main&quot;&gt;GitHub repository&lt;/a&gt; for the implementation of &lt;d-cite key=&quot;zhu2022discrete&quot;&gt;&lt;/d-cite&gt; with a few key modifications:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;We use a pre-trained contrastive model as our starting point (such as an image encoder/decoder from CLIP) &lt;d-cite key=&quot;Radford2021LearningTV&quot;&gt;&lt;/d-cite&gt;&lt;/li&gt; &lt;li&gt;The diffusion model is trained to predict the next frame of a video conditioned on a given text description of the video and the current frame of the video as above.&lt;/li&gt; &lt;li&gt;Our contrastive loss is used as described above.&lt;/li&gt; &lt;li&gt;Inference is modified to generate a video as described above.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Data for this project requires video/text description pairs. There are a few datasets consisting of such data, including the &lt;a href=&quot;https://www.kaggle.com/datasets/vishnutheepb/msrvtt&quot;&gt;MSR-VTT dataset&lt;/a&gt;, which is human-annotated, and the &lt;a href=&quot;https://github.com/OpenGVLab/InternVideo/tree/main/Data/InternVid&quot;&gt;InternVid dataset&lt;/a&gt;, which is annotated by LLMs.&lt;/p&gt; &lt;p&gt;The project should be feasible to complete within the remaining time in the semester, with a rough timeline of deliverables as follows:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Implementation&lt;/strong&gt; of our method by applying the specified modifications to the existing codebase should take around 1-2 weeks.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Training&lt;/strong&gt; of the models on cloud computing resources should take &amp;lt;1 week.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Evaluation and benchmarking&lt;/strong&gt; along with data visualization should take 1 week, even with the potential need for retraining our models.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Blog writing&lt;/strong&gt; should take &amp;lt;1 week and can be completed in parallel with evaluation and benchmarking.&lt;/li&gt; &lt;/ul&gt; </content> </entry> <entry> <title>Adaptive Controller with Neural Net Equations of Motion for High-DOF Robots</title> <link href="https://deep-learning-mit.github.io/blog/2023/adaptive-controller-graph-eom/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/adaptive-controller-graph-eom</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Adaptive controllers are integral to modern robotic arms, enabling robots to adjust to dynamic environments and internal variations such as actuator wear, manufacturing tolerances, or payload changes. At the heart of such controllers is the formulation of the robot’s Equations of Motion (EoM), typically expressed in the form:&lt;/p&gt; &lt;p&gt;The standard symbolic form of EoM is represented as:&lt;/p&gt; \[M(q)q&apos;&apos; + C(q, q&apos;) = T(q) + Bu\] &lt;p&gt;where:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;( M(q) ) is the mass matrix&lt;/li&gt; &lt;li&gt;( C(q, q’) ) represents Coriolis and centripetal forces&lt;/li&gt; &lt;li&gt;( T(q) ) depicts gravitational torques&lt;/li&gt; &lt;li&gt;( B ) is the input transformation matrix&lt;/li&gt; &lt;li&gt;( u ) denotes control input&lt;/li&gt; &lt;li&gt;( q, q’ ) are the joint angle state variables and their derivatives, respectively.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The symbolic complexity of the EoM increases considerably for robots with a high Degree of Freedom (DOF), due to the analytical resolution of the Lagrangian or Hamiltonian dynamics required. While these equations can be derived algorithmically, the computational burden is significant, and the resulting symbolic equations are extensively lengthy. To illustrate, consider the EoM for a 7-DoF Panda Emika Franka robot arm &lt;a href=&quot;https://github.com/marcocognetti/FrankaEmikaPandaDynModel/tree/master/matlab/dyn_model_panda&quot;&gt;(link)&lt;/a&gt;. The code that determines the EoM is extraordinarily verbose.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/panda.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The aim of this project is to bypass the need for an explicit symbolic articulation of the EoM by formulating a neural network representation. With an accurately modeled neural network, it could serve as a foundational element in the development of an adaptive controller. The goal is for the controller to adapt a robotic arm’s physical parameters based on calibration sequences and to estimate the mass and inertia matrix of unfamiliar payloads.&lt;/p&gt; &lt;p&gt;Aside from symbolic representation, the EoM can also be computed numerically at each operating point using the Recursive Inertia Matrix Method &lt;d-cite key=&quot;featherstone2008rigid&quot;&gt;&lt;/d-cite&gt; , which has a computational complexity of ( O(n^3) ), where ( n ) is the number of joints in the rigid body. Substituting this computation-heavy method with a neural network, we can potentially calculate the forward dynamics in linear time, albeit with a trade-off in memory usage.&lt;/p&gt; &lt;h2 id=&quot;background-and-related-work&quot;&gt;Background and Related Work&lt;/h2&gt; &lt;p&gt;Before we delve into neural net architecture, let’s take a look closer at our problem and how it’s solved right now. To come up with the symbolic equation for the EOM, we use Lagrangian Mechanics in which we compute the Potential, U, and Kinectic Energy, T, of our system.&lt;/p&gt; &lt;p&gt;\(L = T - U\) \(\frac{d}{dt} \left( \frac{\partial L}{\partial \dot{q}_i} \right) - \frac{\partial L}{\partial q_i} = u_i\)&lt;/p&gt; &lt;p&gt;Quick describing of how it turns in into the manipulator equations. Working through these equations, a pattern emerge in which you can groups the equation as the manipulator equations.&lt;/p&gt; \[M(q)q&apos;&apos; + C(q, q&apos;) = T(q) + Bu\] &lt;p&gt;This method works well when the degree of freedom in the system is low. It provides much insight on how the dynamics of the system work. For example, the kinetic energy can be represented as:&lt;/p&gt; \[T = \frac{1}{2} \dot{q}^T M(q) \dot{q}\] &lt;p&gt;Highlighting that ( M ) is symmetric and positive definite. However, as introduced earlier, this method scales poorly with complexity in higher DOF systems.&lt;/p&gt; &lt;p&gt;However, as shown in the introduction, when this method is used for a 7 DOF system, the resulting equation is extraordinarily complex.&lt;/p&gt; &lt;p&gt;Bhatoo et al. &lt;d-cite key=&quot;bhattoo2022learning&quot;&gt;&lt;/d-cite&gt; introduced a graph neural network to represent the potential and kinetic energy of rope systems—a high DOF system—by segmenting the system into short segments. Each segment was then treated as a node in the graph neural network. Although they didn’t derive the forward dynamics using the Lagrangian formulation, the prospect of representing serially linked robot arms with graph neural networks was indicated as feasible.&lt;/p&gt; &lt;p&gt;The other approach to create the manipulator equation is to numerically calculate it at each operating point. There are two versions of this equation, the inverse dynamics and the forward dynamics version. In the inverse dynamics formulation, we essentially calculate \(M(q)q&apos;&apos; + C(q, q&apos;) - T(q) = Bu\)&lt;/p&gt; &lt;p&gt;Giving a particular state of the robot and a desired acceleration, what is the required torque. The inverse dynamics formulation can be computed with the Recursive Newton-Euler Algorithm with a O(n) complexity where n is the number of joints &lt;d-cite key=&quot;featherstone2008rigid&quot;&gt;&lt;/d-cite&gt; . The key idea for this algorithm is that the motion of a body is directly influence by the previous link. It’s essentially a dynamic programming algorithm in which you can store the motion of one body and then apply to the next body. This suggests that a directed graph neural net is sufficient to represent our model.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;While very efficient to calculate, the inverse dynamics is not as useful as the forward dynamics version if the end goal is to create an adaptive controller. The forward dynamics is the model that describes what is the accelerations of the system based on current state and torque input.&lt;/p&gt; \[q&apos;&apos; = M(q) \ (- C(q, q&apos;) + T(q) - Bu)\] &lt;p&gt;This formulation is more useful for adaptive controller as we can compared predicted acceleration and actual acceleration. Use their difference as a loss and to compute the gradient from the model parameters. The problem with the forward dynamics problem is that it requires a O(n^3) computation for a serially linked robot arm (the mass matrix inversion must be done). The algorithm for Forward Dynamics is called Inertia Matrix Method &lt;d-cite key=&quot;featherstone2008rigid&quot;&gt;&lt;/d-cite&gt; . One physical intuition that can be glean from the algorithm is that reaction input torques propagate down chain. Once again, this indicates that there is a one way information flow from one link to the next. Given that this version is more computationally expensive, it would be more valuable to tackle with a neural net representation as well compared to the much faster inverse dynamics problem.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;experiments-and-results&quot;&gt;Experiments and Results&lt;/h2&gt; &lt;h3 id=&quot;generating-training-data&quot;&gt;Generating Training Data&lt;/h3&gt; &lt;p&gt;Utilizing numerical methods implemented in MATLAB, we generated a large volume of training data, spanning the full operational space of the robot arm. We based our robot arm model on realistic parameters from the publicly available data of the Emika Franka Panda, comprising a total of 10 links, seven revolute joints, and two fixed joints. After disregarding the base link, we have a model with 10 parameters for each link (mass, center of mass as a 1x3 vector, and the symmetric inertia matrix flattened into a 1x6 vector) and joint properties (angle, angular velocity, angular acceleration, and torque).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We simulated the arm moving from one random configuration to another—marked in the image above by an X — recording states, torques, and accelerations during transitions. To introduce variability, we applied realistic perturbations to the physical properties of each link after every 100 recorded motion paths. In total, we accumulated 250,000 data points&lt;/p&gt; &lt;h3 id=&quot;attempt-1-graph-neural-net&quot;&gt;Attempt 1: Graph Neural Net&lt;/h3&gt; &lt;p&gt;As inspired by Bhatoo, we rearrange the dataset as a Graph Dataset based on the PyTorch Geometric Library. Each node contains the 10 physical property parameters, angle, angular velocity, and torque input. In total, each node has 13 features. The output is set to be angular acceleration of the 7 joints (1x7 vector). As for the edge index, the graph is defined to be directed, either information flows from the last node to the first or the first node to the last node. This is inspired by the physical intuition that forces propagate sequentially from one body to the next, and that motion with respect to the global coordinate frame also sequential depended on the previous body link.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/node-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/node-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/node-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/node.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We applied nine iterations of the Graph Convolution Layer, ensuring information flow from one end of the arm to the other.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Despite extensive parameter tuning, learning rate adjustments, and the application of various schedulers, the loss showed no convergence. Potential reasons for this include the complexity in capturing temporal dependencies and the possible oversimplification of force propagation through the links using graph convolutions. The complexity of 9 different GCNV also increases complexity needlessly.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;attempt-2-lstm&quot;&gt;Attempt 2: LSTM&lt;/h3&gt; &lt;p&gt;Reevaluating the necessity for graph neural networks, we considered the inherent sequential nature of the information flow in our system. There are no branches in the structure of a serially linked robot arm; hence, an LSTM, which excels in capturing long-range dependencies in sequence data, seemed appropriate. The input sequence now reflects the node properties from the previous attempt, and our LSTM architecture is defined as follows:&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;k&quot;&gt;class&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;RobotLSTM&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Module&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;input_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_size2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_layers&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;super&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;RobotLSTM&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_size&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_size&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_layers&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_layers&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# LSTM Layer &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lstm&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;LSTM&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;input_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_layers&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;batch_first&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;bp&quot;&gt;True&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Fully connected layers &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;l1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_size2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;l2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_size2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;hidden_size2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;l3&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_size2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;forward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Initializing hidden state and cell state for LSTM &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;h0&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;zeros&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_layers&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;to&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;device&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;c0&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;zeros&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_layers&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;hidden_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;to&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;device&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Forward propagate the LSTM &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;_&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;lstm&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;h0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;c0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Pass the output of the last time step to the classifier &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;:]&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# We are interested in the last timestep &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;relu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;l1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;out&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;F&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;relu&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;l2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;out&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;l3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;out&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;out&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Despite the theoretically simpler representation of the system, the results were still not satisfactory, with stabilization and convergence being unachievable.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;attempt-3-transformer&quot;&gt;Attempt 3: Transformer&lt;/h3&gt; &lt;p&gt;With LSTM and GNN strategies failing to deliver conclusive results, we pivoted to the more general-purpose Transformer architecture. This paradigm shifts focus from a strictly sequential data flow to a structure capable of interpreting the relationships between all links through its attention mechanism. Note, we also use a sinusoidal positional encoder to maintain the order coherance of the robot arm.&lt;/p&gt; &lt;p&gt;For the Transformer model, we employ the following architecture, designed to be flexible and adaptable to high DOF systems in future implementations:&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;k&quot;&gt;class&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;RobotTransformerModel&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Module&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;input_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;13&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;24&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mlp_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nhead&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_encoder_layers&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;5&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;dim_feedforward&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;48&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;7&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;super&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;().&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Store d_model as an instance attribute &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;embedding&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;input_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;pos_encoder&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;PositionalEncoding&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Sinusoidal positional encoding &lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Transformer Encoder Layer &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;transformer_encoder&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;Transformer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mlp_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mlp_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dim_feedforward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_heads&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nhead&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_layers&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_encoder_layers&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;output_layer&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Sequential&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;LayerNorm&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;criterion&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;MSELoss&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;forward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;permute&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Shape: [seq_len, batch, feature] &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;embedding&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;math&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;pos_encoder&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;alphas&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;transformer_encoder&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_mask&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;bp&quot;&gt;None&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;return_attn&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;bp&quot;&gt;False&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;:,&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;:]&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# use the output of the first token (similar to BERT&apos;s [CLS] token) &lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;output_layer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;However, even with this advanced architecture, convergence remained elusive, indicating that further restructuring of the problem was required.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;final-attempt-physics-informed-structured-transformer&quot;&gt;Final Attempt: Physics Informed Structured Transformer&lt;/h3&gt; &lt;p&gt;As nothing seems to be working, we now simplify our problem statement to gain some insights that could then we applied to the larger problem later. First, we now reformulate the serially linked robot arm dynamics into a double pendulum system with simplified parameters—each link defined by its length and a point mass at the end. The state variables in this reduced complexity scenario are simply the two link angles and their angular velocities.&lt;/p&gt; \[\mathbf{M}(q)\ddot{q} + \mathbf{C}(q, \dot{q})\dot{q} = \mathbf{T}_g(q) + \mathbf{B}u\] &lt;p&gt;where&lt;/p&gt; \[\mathbf{M} = \begin{bmatrix} (m_1 + m_2)l_1^2 + m_2l_2^2 + 2m_2l_1l_2\cos(q_1) &amp;amp; m_2l_2^2 + m_2l_1l_2\cos(q_2) \\ m_2l_2^2 + m_2l_1l_2\cos(q_2) &amp;amp; m_2l_2^2 \end{bmatrix},\] \[\mathbf{C} = \begin{bmatrix} 0 &amp;amp; -m_2l_1l_2(2\dot{q}_1 + \dot{q}_2)\sin(q_2) \\ \frac{1}{2}m_2l_1l_2(2\dot{q}_1 + \dot{q}_2)\sin(q_2) &amp;amp; -\frac{1}{2}m_2l_1l_2\dot{q}_1\sin(q_2) \end{bmatrix},\] \[\mathbf{T}_g = -g \begin{bmatrix} (m_1+m_2)l_1\sin(q_1) + m_2l_2\sin(q_1+q_2) \\ m_2l_2\sin(q_1+q_2) \end{bmatrix},\] \[\mathbf{B} = \begin{bmatrix} 1 &amp;amp; 0 \\ 0 &amp;amp; 1 \end{bmatrix}.\] &lt;p&gt;In this simpler problem statement, we switch to solving the Inverse Dynamics problem instead which numerically has a computational complexity of O(n). We assume that there is less complexity in this representation (a complete guess), so the neural net doesn’t have to work as hard compared to the Forward Dynamics problem which has computational complexity of O(n^3).&lt;/p&gt; &lt;p&gt;However, the task now focuses on the inverse dynamics with a reduced computational complexity of ( O(n) ), given that ( M(q) ) can be linearly separated from ( C ) and ( T_g ) and knowing that ( M(q) ) is symmetric and positive definite.&lt;/p&gt; &lt;p&gt;For this, two Transformer neural networks were created, one for ( M(q)\ddot{q} ) and another for ( C(q, \dot{q})\dot{q} - T_g(q) ). Both models were trained separately with their respective datasets before being combined to model the complete manipulator equation. We can uniquely generate training data that only incite this mode by setting gravity and angular velocity to zero to get only M(q)*ddq = u.&lt;/p&gt; &lt;p&gt;The architectures for these Transformers were revised to employ a Physics Informed approach, ensuring the adherence to known physical laws:&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;k&quot;&gt;class&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;RobotTransformerModelH&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;pl&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;LightningModule&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;input_dim&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mlp_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nhead&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_encoder_layers&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;5&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;dim_feedforward&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;48&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;super&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;().&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;embedding&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;input_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;pos_encoder&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;PositionalEncoding&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;transformer_encoder&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;Transformer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dim_feedforward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mlp_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mlp_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_heads&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nhead&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_layers&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_encoder_layers&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;output_layer&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Sequential&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;LayerNorm&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Output is a 1x3 vector &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;criterion&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;MSELoss&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;forward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;ddq&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;permute&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Reshape for transformer &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;embedding&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;math&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;pos_encoder&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;alphas&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;transformer_encoder&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_mask&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;bp&quot;&gt;None&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;return_attn&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;bp&quot;&gt;False&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;:,&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;:]&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;output_layer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Create a batch of symmetric 2x2 matrices from the batch of 1x3 output vectors &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;batch_size&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;shape&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;symmetric_matrices&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;zeros&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;((&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;batch_size&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;device&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;device&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;symmetric_matrices&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;symmetric_matrices&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;symmetric_matrices&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;symmetric_matrices&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[:,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;transformed_ddq&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;torch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;matmul&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;symmetric_matrices&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;ddq&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;unsqueeze&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)).&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;squeeze&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;transformed_ddq&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/H-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/H-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/H-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/H.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Then we create a separate transformer neural net for C(q, dq)*dq - Tg(q). Similarly, we can generate training data that only exictes this mode by setting ddq = 0.&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;k&quot;&gt;class&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;RobotTransformerModelC&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;pl&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;LightningModule&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;input_dim&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;4&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mlp_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nhead&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_encoder_layers&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;5&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;dim_feedforward&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;48&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;super&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;().&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;__init__&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;embedding&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;input_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;pos_encoder&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;PositionalEncoding&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;transformer_encoder&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nc&quot;&gt;Transformer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dim_feedforward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mlp_dim&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mlp_dim&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_heads&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nhead&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;num_layers&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;num_encoder_layers&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;output_layer&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Sequential&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;LayerNorm&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Output is a 1x2 vector &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;criterion&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;MSELoss&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;forward&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;):&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;permute&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# Reshape for transformer &lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;embedding&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;math&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d_model&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;src&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;pos_encoder&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;alphas&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;transformer_encoder&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;src&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;attn_mask&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;bp&quot;&gt;None&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;return_attn&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;bp&quot;&gt;False&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;:,&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;:]&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;self&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;output_layer&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;output&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;output&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/C-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/C-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/C-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/C.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We picked Transformer as it’s more general compared to LSTM or GNN. Furthermore, it can easily be extended to high DOF system later on by just working with a longer input sequence. After training these two models independtly with their own training data set, we combined the two pretrained model togeher to recreate the full manipulator equation with a complete dataset.&lt;/p&gt; &lt;p&gt;lass CombinedRobotTransformerModel(pl.LightningModule): def &lt;strong&gt;init&lt;/strong&gt;(self, config_H, config_C): super().&lt;strong&gt;init&lt;/strong&gt;() # Initialize the two models self.model_H = RobotTransformerModelH(&lt;strong&gt;config_H) self.model_C = RobotTransformerModelC(&lt;/strong&gt;config_C) self.criterion = nn.MSELoss() # Additional layers or attributes can be added here if needed&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;def load_pretrained_weights(self, path_H, path_C): # Load the pre-trained weights into each model self.model_H.load_state_dict(torch.load(path_H)) self.model_C.load_state_dict(torch.load(path_C)) def forward(self, src_H, ddq, src_C): # Forward pass for each model output_H = self.model_H(src_H, ddq) output_C = self.model_C(src_C) # Combine the outputs from both models combined_output = output_H + output_C return combined_output &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;This modular approach, informed by the physical structure of the dynamics, resulted in improved convergence and an adaptive controller with the capability to generalize well to unseen conditions of the double pendulum.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/combined.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Through this journey of building and testing various neural network architectures to approximate the equations of motion for high-DOF robotic systems, it becomes evident that while cutting-edge machine learning tools hold promise, their effectiveness is tied to the physical realities of the problems they aim to solve. Success in neural net modeling involves really understanding the data and problem you are trying to solve. Here we managed to make a little head way in modeling the EOM of a 2 DOF system by mimicking the structure of the analytical solution.&lt;/p&gt; &lt;p&gt;For future work, we should take the success in the 2 DOF system and push it for higher DOF with more complex parameters. We can generate data that can isolate specific motion modes of the model that can be used to train sections of the neural net at a time. By then training all the modes independently, we can stitch together the whole structure for the whole dataset.&lt;/p&gt; </content> </entry> <entry> <title>Robustness of self-supervised ViT features in b-mode images</title> <link href="https://deep-learning-mit.github.io/blog/2023/Robustness-of-self-supervised-ViT-features-in-b-mode-images/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Robustness-of-self-supervised-ViT-features-in-b-mode-images</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;B-mode ultrasound imaging is a widely employed medical imaging technique that uses high-frequency sound waves to produce visual representations of the internal structures of the human body. Its main advantages are its ability to produce real-time images, its portability, low cost, and especially the fact that is noninvasive and safe (non-radiating). However, it is an imaging modality that carries a very high noise-to-signal ratio. Speckle noise, out-of-plane movement, and high variability in image reconstruction across devices make the resulting images complex to interpret and diagnose &lt;d-cite key=&quot;us&quot;&gt;&lt;/d-cite&gt;. As an example, the following figure shows an annotated b-mode ultrasound image.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Ultrasound b-mode image of the upper arm with the main physiology annotated. &lt;/div&gt; &lt;p&gt;Self-supervised Vision Transformers (ViT) have emerged as a powerful tool to extract deep features for a variety of downstream tasks, such as classification, segmentation, or image correspondence. Especially, DINO architectures &lt;d-cite key=&quot;dino1&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;dino2&quot;&gt;&lt;/d-cite&gt; have exhibited striking properties, where its deep features present localized semantic information shared across related object categories, even in zero-shot methodologies &lt;d-cite key=&quot;dino_feat&quot;&gt;&lt;/d-cite&gt;. Consequently, the aforementioned properties of DINO may allow us to develop efficient yet simple methods for b-mode ultrasound image interpretation, without the need for an expert or ground truth labels.&lt;/p&gt; &lt;p&gt;In this work, we propose analyzing the performance and robustness of DINO in b-mode ultrasound images of the arm and leg, capturing musculoskeletal tissue from two different ultrasound devices. We note that this dataset features a series of images with a high noise-to-signal ratio, which is a property that DINO has not yet been tested against. In particular, we focus on assessing DINO-vit-s/8 deep features across its blocks as well as its attention weights, with the final objective of segmenting bone on b-mode images in a zero-shot approach. Through all these experiments, we show the potential and feasibility of implementing DINO models in real-world b-mode medical imaging applications.&lt;/p&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;h3 id=&quot;dino-vit-assessment&quot;&gt;DINO-vit Assessment&lt;/h3&gt; &lt;p&gt;Since the release of DINO, a self-supervised method for training ViTs based on self-distillation, there has been a line of work focused on exploring new capabilities and assessing the deep features obtained from such pre-trained models. In &lt;d-cite key=&quot;dino1&quot;&gt;&lt;/d-cite&gt;, they showed how the attention heads corresponded to different parts of an object in an image, or how one could segment desired objects by thresholding the self-attention maps. Similarly, semantic information analysis across related images was performed to show the potential of the deep features contained in DINO-vit models. Employing principal component analysis (PCA), matching algorithms or linear classifiers on the deep features, promising results on segmentation, semantic co-segmentation, and correspondence tasks were presented &lt;d-cite key=&quot;dino2&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;dino_feat&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Further research was done by combining Stable Diffusion features and DINO features, improving semantic correspondence tasks at the cost of increasing the computation effort &lt;d-cite key=&quot;dino_stable&quot;&gt;&lt;/d-cite&gt;. While DINO has shown strong generalization to downstream tasks, there has been no work on the assessment of this model on a b-mode ultrasound imaging domain. Besides the high signal-to-noise ratio, ultrasound images usually present a complex structure of tissues that makes it difficult to differentiate between the foreground, the desired structure to segment or analyze, and the background. Our work shows that DINO is also robust to this type of images, leading to promising results on segmentation tasks.&lt;/p&gt; &lt;h3 id=&quot;ultrasound-b-mode-imaging-segmentation-on-musculoskeletal-tissue&quot;&gt;Ultrasound B-mode Imaging Segmentation on Musculoskeletal Tissue&lt;/h3&gt; &lt;p&gt;Muscle and bone segmentation have important applications in clinical and rehabilitation practices to assess motion performance, diagnosis of the musculoskeletal system, and quantification of rehabilitation procedures, among others. There has been effort in developing deep learning tools to automatically segment and quantify desired parameters for the aforementioned applications. In &lt;d-cite key=&quot;unet_segment&quot;&gt;&lt;/d-cite&gt;, a U-Net architecture with Deep Residual Shrinkage layers for denoising was implemented and trained to segment muscle fibers. Similarly, different muscle heads were segmented employing a large dataset of muscle images from different subjects and devices to train several convolutional neural network architectures &lt;d-cite key=&quot;muscle_segment&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;muscle_segment2&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Medical images, from any source, are in general scarce and difficult to label, which poses a limitation for deep learning models to achieve a good performance and generalization. Most of the current methods, lack the capability to perform well in unseen segmentation tasks involving different anatomies. In &lt;d-cite key=&quot;universeg&quot;&gt;&lt;/d-cite&gt;, they developed a deep learning model, UniverSeg, based on a novel Cross-Block mechanism that produces accurate segmentation maps without the need for additional training. However, when employed in noisier data domains, such as b-mode images, the performance breaks down. In this work, we discover that DINO has potential even when dealing with noisier datasets based on b-mode ultrasound images.&lt;/p&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;h3 id=&quot;dataset&quot;&gt;Dataset&lt;/h3&gt; &lt;p&gt;The dataset consists of b-mode ultrasound images from the arm and leg of two subjects while moving. We recorded short videos and randomly selected frames to obtain the images. In the images, bone, muscle, and fascia tissues can be appreciated. We also acquired videos from two different ultrasound sources to expand the domain where DINO was tested. With all this, 4 different image origins (or image domains) form the dataset, as appreciated in the figure below. We labeled 10 bone heads of each domain to evaluate DINO’s performance.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Example of one image of each origin with its mask label (blue). a) Arm (Source 1, Subject 1). b) Arm (Source 1, Subject 2). c) Arm (Source 2, Subject 1). d) Leg (Source 2, Subject 1) &lt;/div&gt; &lt;h3 id=&quot;deep-feature-assessment&quot;&gt;Deep Feature Assessment&lt;/h3&gt; &lt;p&gt;We analyzed DINO-vit-s/8 features over different layers qualitatively. For any block \(i\), we extracted the Keys, Values, Queries, and Tokens and applied a principal component analysis (PCA) to get the three most important components. For the attention maps, we averaged the self-attention weights of the CLS token over each head of the multi-head block.&lt;/p&gt; &lt;p&gt;This analysis was done with the intention of qualitatively finding the most suitable deep features for the subsequent segmentation task. Similarly, the self-attention maps were observed to corroborate that the model focuses especially on the bone, and less on the surrounding structures.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Workflow to obtain deep features as well as self-attention information. Transformer block design obtained from &lt;d-cite key=&quot;dino_feat&quot;&gt;&lt;/d-cite&gt;. &lt;/div&gt; &lt;h3 id=&quot;segmentation-pipeline&quot;&gt;Segmentation Pipeline&lt;/h3&gt; &lt;p&gt;As described in the results section, the Keys of the last block (block 12) of DINO-vit-s/8 were employed as deep features for the segmentation. As in &lt;d-cite key=&quot;dino_feat&quot;&gt;&lt;/d-cite&gt;, we used a zero-shot approach as the pipeline for bone segmentation. We first clustered together all the features obtained from the different images passed through DINO with k-means. Then, we selected those clusters for the segmentation mask employing a simple voting algorithm. Being \(\texttt{Attn}_i^\mathcal{I}\) the self-attention of the CLS token averaged over all heads of block 12 in image \(\mathcal{I}\) and patch \(i\); and \(S_k^\mathcal{I}\) the segment in image \(\mathcal{I}\) belonging to cluster \(k\). The saliency of this segment was computed as&lt;/p&gt; \[\texttt{Sal}(S_k^\mathcal{I}) = \frac{1}{|S_k^\mathcal{I}|} \sum_{i \in S_k^\mathcal{I}} \texttt{Attn}_i^\mathcal{I}\] &lt;p&gt;and the voting of the cluster \(k\) was obtained as&lt;/p&gt; \[\texttt{Votes}(k) = \mathbb{1}[\sum_\mathcal{I}\texttt{Sal}(S_k^\mathcal{I}) \geq \tau ]\] &lt;p&gt;for a threshold \(\tau\) set to 0.2. Then, a cluster \(k\) was considered to be part of the mask if its \(\texttt{Votes}(k)\) were above a percentage of 65% of all images. The following image sketches the whole process.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Zero-shot segmentation pipeline using keys as deep features. &lt;/div&gt; &lt;p&gt;To quantitatively assess the segmentation results, both Dice and IoU metrics were computed employing the labeled bone head segmentations.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;deep-features-assessment&quot;&gt;Deep Features Assessment&lt;/h3&gt; &lt;p&gt;We first input a single image to the model and analyzed the Keys, Values, Queries, and Tokens, as well as the self-attention of the CLS token from shallower to deeper layers.&lt;/p&gt; &lt;p&gt;The three most important components after performing the PCA on the deep features are plotted in RGB as depicted in the figure below. Tokens seem to carry spatial information throughout the different blocks, representing depth information in the final block. On the other hand, Keys and Values seem to carry spatial information on the shallower blocks, and semantic information on the deeper blocks. In fact, we considered the Keys descriptors the most appropriate to be used to segment bone, as the bone head can be distinguished from the surrounding structures. Regarding the attention maps, they seem to move from the skin (in shallow blocks) to the bone (deeper blocks).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Token, Value, Key, and Query features as well as self-attention maps for different blocks (from shallow to deep). &lt;/div&gt; &lt;p&gt;Now, if we focus on the Keys features of the last block for the four different image domains, we can appreciate a similar behavior. Bone heads seem to be represented in all four cases by the Keys, being differentiated by the surrounding structures. That being said, we should note that the intersection between muscles just above the bone is in some cases also represented like the bone. Regarding the self-attention maps, in all four cases, they are principally focused on the bone head. However, we can also see that some muscle fibers or intersections may be present.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Keys deep features and self-attention maps from block 12 for the four different image origins. &lt;/div&gt; &lt;p&gt;An interactive scatter plot is another method to argue the representation of the bone by the Key features. For all the four different image origins, the patches belonging to the bone head are grouped on a region of the Euclidean space, while the patches belonging to other structures are scattered all over other regions.&lt;/p&gt; &lt;div class=&quot;l-page&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/scatter.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; 3D scatter plot of the 3 components of the Key descriptors (block 12). Legend: &quot;other&quot; any patch not belonging to the bone head. &quot;boneS1A1&quot; bone patches of Source 1 - Arm Subject 1. &quot;boneS1A2&quot; bone patches of Source 1 - Arm Subject 2. &quot;boneS2A1&quot; bone patches of Source 2 - Arm Subject 1. &quot;boneS2L&quot; bone patches of Source 2 - Leg Subject 1. &lt;/div&gt; &lt;h3 id=&quot;same-domain-experiment&quot;&gt;Same Domain Experiment&lt;/h3&gt; &lt;p&gt;We subsequently performed the segmentation task on a set of images from the same origin. For each of the 4 domains, sets of 2, 3, 5, and 10 images were input to the segmentation pipeline. Recalling that the images were selected as random frames from short videos, each image within a domain presented a slightly different configuration of bone and surrounding structures. Therefore, the goal of segmenting with varying image quantities was to evaluate the balance between improvements due to increased feature quantity versus confusion introduced by variation in the images.&lt;/p&gt; &lt;p&gt;The reader can observe the results in the figure below. The bones from Source 1 Arm 1 are the best segmented, and the amount of images does not affect the performance, obtaining constant values of Dice and IoU of about 0.9 and 0.77, respectively. The segmentation of images from Source 1 Arm 2 in general takes also some part of the muscle tissue, and as in the previous case, the amount of images used does not change the performance with Dice and IoU metrics of about 0.7 and 0.5, respectively. In the case of images from Source 2 Arm 1, a larger quantity of images improves the segmentation results, increasing Dice and IoU metrics from 0.58 to 0.75, and 0.46 to 0.61, respectively. Finally, the segmentation masks from images from Source 2 Leg carry not only the bone but part of the surrounding tissue too. When increasing the number of images to 10, the performance drastically falls (with Dice and IoU of 0) as the segmentation results contain muscle fibers instead of bone.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Results of the segmentation on same domain images experiment. a) Segmentation result examples for the 4 different image domains. b) Metrics for the 4 different image domains and different amounts of images (mean and standard deviation). &lt;/div&gt; &lt;h3 id=&quot;different-domain-experiments&quot;&gt;Different Domain Experiments&lt;/h3&gt; &lt;p&gt;Then, we performed the segmentation task on a set of images from origin pairs. Five images of each origin were paired forming the following groups. Group 1: different physiology (source 1 - arm subject 1 and source 1 - arm subject 2), group 2: different sources (source 1 - arm subject 1 and source 2 - arm subject 1), group 3: different body parts (source 2 - arm subject 1 and source 2 - leg subject 1), and finally group 4: different body parts and sources (source 1 - arm subject 1 and source 2 - leg subject 1). We carried out this experiment to evaluate if the deep features shared from different image origins were similar enough to properly perform the segmentation task, giving an idea of feature correspondence between different image domains.&lt;/p&gt; &lt;p&gt;The image below shows the experiment results. The segmentation performed on the domain source 1 arm subject 1 worsens when paired with any other image domains. Both IoU and Dice metrics fall from 0.9 and 0.77 (previous values) to 0.78 and 0.59, respectively. Contrarily, the domains consisting of source 1 arm subject 2 and source 2 arm subject 1 improve when paired with source 1 arm subject 1. Finally, the image origin containing leg images maintains a similar segmentation performance when being paired.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Results of the segmentation for pairs of domain images. Legend: Different physiology (source 1 - arm subject 1 and source 1 - arm subject 2), Different sources (source 1 - arm subject 1 and source 2 - arm subject 1), Different body parts (source 2 - arm subject 1 and source 2 - leg subject 1), and Different body parts and sources (source 1 - arm subject 1 and source 2 - leg subject 1). Bar plots contain mean and standard deviation. &lt;/div&gt; &lt;h3 id=&quot;noise-experiment&quot;&gt;Noise Experiment&lt;/h3&gt; &lt;p&gt;We further assessed DINO by introducing white noise to the dataset. Being an image \(\mathcal{I}\), the image input to DINO was \(\mathcal{I}_{\texttt{Noisy}} = \mathcal{I} + \epsilon \cdot \mathcal{N}(0, 1)\). We segmented five images from the domain Source 1 Arm Subject 1 and incrementally increased the white noise strength by tuning \(\epsilon\). We performed this last experiment to evaluate how the deep features and attention maps change as well as the resulting segmentation masks with increasing noise, gaining intuition on how robust DINO can be.&lt;/p&gt; &lt;p&gt;As observed in the following figure, the Keys features and the attention weights start being affected by the noise at \(\epsilon = 2.0\). Keys features are less efficient at describing the bone from the surrounding structures, and the attention maps start shifting the attention to only the left side of the bone and the muscle line above the bone. Segmentation results show that with increased noise, some parts of the muscle are segmented and for \(\epsilon \geq 2.5\), the right side of the bone is not included on the segmentation mask.&lt;/p&gt; &lt;p&gt;Taking a look at the metrics, the more the noise strength is increased, the lower the Dice and IoU values obtained. From little noise to the highest tested in this experiment, a reduction of about 50% for both Dice and IoU occurs.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Results with noisy images. a) Original, Keys features, attention, maps and segmentation results for different values of $\epsilon$. b) Dice and IoU metrics for different values of $\epsilon$. &lt;/div&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;In this project, we used a DINO ViT model to segment bone heads from ultrasound images using a zero-shot methodology involving clustering. We first studied how the model deep features change across different layers, and chose Key features as the most appropriate for characterizing bone. We then segmented bone from different image domains, initially employing batches of images from the same domain, and then combining them. Finally, we tested DINO and its robustness by adding additional noise.&lt;/p&gt; &lt;p&gt;Encouraging results were found in the deep features of the model. We could appreciate how both Key and Query features were capable of differentiating bone, some muscle regions, and skin tissue. We also obtained surprisingly good segmentation masks for a zero-shot methodology on a new dataset as ultrasound b-mode images are. In particular, the image domain “source 1 arm subject 1” presented very similar segmentation masks compared to the labeled ones, giving an idea of how semantic features obtained by DINO extend beyond its training data domain, displaying astonishing generalization. Even when adding noise to the image dataset, DINO Key features kept describing the bone up to high noise strengths.&lt;/p&gt; &lt;p&gt;While the project has yielded promising results, there are several limitations to take into account. First, we should note that the success of the zero-shot methodology has relied on an initial hyperparameter tuning, finding the threshold \(\tau\), the voting percentage, and the number of clusters. However, we are aware that the optimal configuration may vary across different datasets or imaging conditions. Additionally, we focused on segmenting only bone, but we have not explored the capabilities of DINO to segment other tissues or structures. We acknowledge that a comprehensive medical imaging solution should combine the segmentation of multiple relevant structures for a general understanding and application. Finally, only two anatomical parts (arm and leg) and two subjects were included in the dataset. To better explore the applicability of the model, a more diverse dataset containing more anatomical parts from more subjects should be considered.&lt;/p&gt; &lt;p&gt;In conclusion, this project demonstrates the potential of employing the DINO ViT model for ultrasound bone segmentation using a zero-shot methodology. We believe that this work lays a foundation for future improvements, promoting a more comprehensive understanding of DINO’s capabilities in medical image segmentation.&lt;/p&gt; </content> </entry> <entry> <title>Investigating the Impact of Symmetric Optimization Algorithms on Learnability</title> <link href="https://deep-learning-mit.github.io/blog/2023/Symmetry-Optimization/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Symmetry-Optimization</id> <content type="html">&lt;h2 id=&quot;introductions&quot;&gt;Introductions&lt;/h2&gt; &lt;p&gt;Neural networks have been a staple in Deep Learning due to their expressive power. While the architecture itself is very powerful, the process of \textit{optimizing} the neural network - i.e., finding the values of the parameters of the network that minimize the loss over training data - is approximate. After all, a neural network is a highly non-smooth function and is inherently difficult to optimize. The core idea of many of these methods is to approximate the neural network (i.e. via first or second-order approximations), which are then easier to optimize over.&lt;/p&gt; &lt;p&gt;Our goal is to explore if incorporating “asymmetries” into optimization can help. Many methods use a similar update rule for all parameters in the network. We experiment with using different rules for different parameters, guided by heuristics.&lt;/p&gt; &lt;h2 id=&quot;motivation-a-couple-of-nice-papers&quot;&gt;Motivation: a couple of nice papers&lt;/h2&gt; &lt;p&gt;This project is motivated by a couple results, not necessarily in the context of neural networks. The first comes from a 2004 paper by Andrew Ng titled “Feature Selection, \(L_1\) vs. \(L_2\) regularization, and rotational invariance”. It concerns the sample complexity of feature selection - how much training data is necessary to fit the model to some accuracy with high probability - where the number of relevant features is small compared to the total number of features. The paper shows that the mode of regularization is of utmost importance to the sample complexity: the sample complexity using $L_2$ regularization is exponential compared to the sample complexity with $L_1$ regularization. One may ask: what does this have to do with symmetry? In the case of $L_2$ regularization, the classifier remains the same even when the training data is rotated (i.e. the data is pre-multiplied by a rotation matrix). More aptly, logistic regression with $L_2$ regularization is \textit{rotationally invariant}. This is not the case for $L_1$ regularization. For the precise statements, see the theorems from the paper below:&lt;/p&gt; &lt;h3 id=&quot;theorem-sample-complexity-with-l_1-regularized-logistic-regression&quot;&gt;Theorem: Sample complexity with $L_1$-regularized logistic regression&lt;/h3&gt; &lt;p&gt;Let any $\epsilon&amp;gt;0, \delta&amp;gt;0, C&amp;gt;0, K\geq 1$ be given, and let $0&amp;lt;\gamma&amp;lt;1$ be a fixed constant. Suppose there exist $r$ indices $1\leq i_1, i_2,\ldots i_r\leq n$, and a parameter vector \(\theta^*\in\mathbb{R}^n\) such that only the $r$ corressponding components of $\theta^*$ are non-zero, and \(|\theta_{ij}|\leq K\) ($j=1,\ldots r$). Suppose further that \(C\geq rK\). Then, in order to guarantee that, with probability at least $1-\delta$, the parameters $\hat{\theta}$ output by our learning algorithm does nearly as well as \(\theta^*\), i.e., that \(\epsilon^l(\hat{\theta})\leq \epsilon^l(\theta^*)+\epsilon,\) it suffices that \(m=\Omega((\log n)\cdot \text{poly}(r, K, \log(1/\delta), 1/\epsilon, C)).\)&lt;/p&gt; &lt;h3 id=&quot;theorem-sample-complexity-for-rotationally-invariant-algorithms-including-l_2-regularized-logistic-regression&quot;&gt;Theorem: Sample complexity for rotationally invariant algorithms (including $L_2$-regularized logistic regression)&lt;/h3&gt; &lt;p&gt;Let $L$ be any rotationally invariant learning algorithm, and let any $0&amp;lt;\epsilon&amp;lt;1/8, 0&amp;lt;\delta&amp;lt;1/100$ be fixed. Then there exists a learning problem $\mathscr{D}$ so that: $(i)$ The labels are determinisitically related to the inputs according to $y=1$ if $x_1\geq t$, $y=0$ otherwise for some $t$, and $(ii)$ In order for $L$ to attain $\epsilon$ or lower $0/1$ misclassification error with probability at least $1-\delta$, it is necessary that the training set size be at least \(m=\Omega(n/\epsilon)\)&lt;/p&gt; &lt;p&gt;While this example is nice and shows us how symmetry can be harmful, it concerns the symmetry of the algorithm disregarding optimization. A 2022 paper by Abbe and Adsera specializes the effects of symmetry to neural networks trained by gradient descent (more on this later). This paper uses a notion of symmetry called \textit{G-equivariance}. See the definition below:&lt;/p&gt; &lt;h3 id=&quot;definition-g-equivariance-a-randomized-algorithm-a-that-takes-in-a-data-distribution-mathcaldinmathcalpmathcalxtimesmathcaly-and-outputs-a-function-mathcalamathcald-mathcalxrightarrowmathcaly-is-said-to-be-g-equivariant-if-for-all-gin-g-mathcalamathcaldoversetdmathcalagmathcaldcirc-g&quot;&gt;(Definition: $G-$equivariance) A randomized algorithm $A$ that takes in a data distribution $\mathcal{D}\in\mathcal{P}(\mathcal{X}\times\mathcal{Y})$ and outputs a function $\mathcal{A}(\mathcal{D}): \mathcal{X}\rightarrow\mathcal{Y}$ is said to be $G-$equivariant if for all $g\in G$ \(\mathcal{A}(\mathcal{D})\overset{d}{=}\mathcal{A}(g(\mathcal{D}))\circ g\)&lt;/h3&gt; &lt;p&gt;Here $g$ is a group element that acts on the data space $\mathcal{X}$, and so is viewed as a function $g:\mathcal{X}\rightarrow\mathcal{X}$, and $g(\mathcal{D})$ is the distribution of ${g(\mathbf{x}),y}$ where $(\mathbf{x}, y)\sim\mathcal{D}$&lt;/p&gt; &lt;p&gt;More simply, an algorithm is G-equivariant if the prediction function produced by the algorithm does not vary when the data distribution is transformed according to $G$ (i.e., a group element $g$ is applied to the data distribution). Note the algorithm includes optimizing parameters: an example of a G-equivariant algorithm is learning a fully-connected neural network via SGD with Gaussian initialization, which is equivariant with respect to orthogonal transformations. More generally, neural networks trained with SGD or noisy GD hold G-equivariance. The paper claims that G-equivariant algorithms are limitted in which functions they can learn. This is stated informally in the following theorem, where the G-alignment is a (rather complicated) measure of distance:&lt;/p&gt; &lt;h3 id=&quot;gd-lower-bound-informal-statement-limitations-of-g-equivariant-algorithms&quot;&gt;GD lower bound, informal statement: Limitations of G-equivariant algorithms&lt;/h3&gt; &lt;p&gt;Let \(\mathcal{D}_f\in\mathcal{P}(\mathcal{X}\times\mathbb{R})\) be the distribution of \((\mathbf{x}, f(\mathbf{x}))\) for \(\mathbf{x}\sim \mu_\mathcal{X}\). If \(\mu_\mathcal{X}\) is \(G-\)invariant and the \(G-\)alignment of \((\mu_\mathcal{X},f)\) is small, then \(f\) cannot be efficiently learned by a $G-$equivariant GD algorithm.&lt;/p&gt; &lt;p&gt;We refer readers interested in further details and the proof of the theorem to the paper. The paper is quite nice and we encourage readers interested in theory to take a look at it. All in all, the paper suggests training neural networks with SGD is not necessarily the way to go. Therefore, we consider variants of GD that prove to perform better in practice. We first introduce gradient descent and a popular variant: Adam.&lt;/p&gt; &lt;h2 id=&quot;overview-of-existing-optimization-algorithms&quot;&gt;Overview of existing optimization algorithms&lt;/h2&gt; &lt;h3 id=&quot;gradient-descent&quot;&gt;Gradient Descent&lt;/h3&gt; &lt;p&gt;The most widely-used optimization algorithms are some version of \textit{gradient descent}. Gradient descent iteratively updates the parameter values, moving the parameter in the direction of steepest descent (given by the negative of the gradient of the loss with respect to the parameter). Essentially, gradient descent uses a first-order approximation The amount by which the parameter is moved in this direction is referred to as \textit{learning rate} or step size, typically denoted by $\eta$. The update rule is given by \(\theta^{t+1}= \theta^t - \eta_t\nabla_{\theta}\mathscr{L}_{\mathscr{D}}(\theta^t)\) where the subscript on $\eta$ indicates a learning rate that can be changed over time. Common strategies for varying $\eta$ over time consist of decaying $\eta$, whether it be a linear or exponential decay (or something in between). In practice, \textit{stochastic} gradient descent (SGD) is used. In SGD, instead of computing the gradient for each datapoint, the gradient is approximating by taking the average of the gradients at a subset (i.e. batch) of the data. A variation of gradient descent incorporates the concept of momentum. With momentum, the increment to the parameter is a constant \(\mu\), the momentum parameter, times the previous increment, plus the update we saw in GD: \(\eta_t\nabla_{\theta}\mathscr{L}_{\mathscr{D}}(\theta^t)\). In other words, the increment is a weighted average of the previous increment and the typical GD update. Too high of a momentum can lead to overshooting the minimizer, analogous to how too high of a learning rate in GD can lead to divergence.&lt;/p&gt; &lt;h3 id=&quot;adam&quot;&gt;Adam&lt;/h3&gt; &lt;p&gt;The most popular optimizer in practice is called Adam, which performs well compared to . Adam is a gradient-based method which uses the gradient as well as the squared gradient (computed from batches), as well as an exponential decay scheme, to iteratively update $\theta$. It estimates the first and second moments of the gradient from the batch computations, and uses these estimates in its update rule. Adam requires three parameters: the learning rate, and one each for the rate of exponential decays of the moment estimates of the gradients. Adam consistently outperforms standard SGD. The optimization we present is based upon Adam, with a few modifications.&lt;/p&gt; &lt;p&gt;We briefly note that these methods are \textit{first-order methods}: they only consider first derivatives, i.e. the gradient. Second-order methods, such as Newton’s method, should theoretically be better because the approximation of the function will be better. However, the computation of the Hessian is rather cumbersome in neural networks, which is why they are not typically used.&lt;/p&gt; &lt;h3 id=&quot;automatic-gradient-descent&quot;&gt;Automatic Gradient Descent&lt;/h3&gt; &lt;p&gt;Another method we consider is Automatic Gradient Descent (AGD), which is developed in recent literature (co-authored by our very own instructor, Jeremy Bernstein!). This paper attempts to get rid of the pesky hyperparameter-tuning stage that is involved in training neural networks, leading to \textit{hyperparameter transfer}. In practice, a variety of learning rates is tested during training. In addition, this learning rate may not “transfer” across architectures: if one were to make their neural network wider or deeper, they would most likely have to search for the optimal learning rate once again. Automatic Gradient Descent attempts to solve this problem by coming up with an update that is architecture-independent in the realm of MLPs. AGD operates by computing an upperbound for the loss after the update (i.e. $\mathscr{L}(\mathbf{w}+\Delta\mathbf{w})$, where $\mathbf{w}$ is the parameter we are optimizing), then optimizing this upperbound in $\Delta\mathbf{w}$ to find the best step size. This step size is then used to update the parameter, and is recalculated at each iteration. The algorithm uses spectrally-normalized weight matrices, which allows for a nice upperbound for the loss function allowing for the optimal choice of $\eta$ to be solved for (in particular, it allows for matrix inequalities involving matrix norms to be used). The algorithm is given in full below:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/agd-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/agd-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/agd-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/agd.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We include AGD in this discussion because it is an asymmetric algorithm: the weights are normalized in a layer-dependent fashion. In addition, it takes a stab at alleviating the annoying task of hyperparameter tuning. We see in practice, however, that it does not perform as well as Adam. This is presumably because the approximation of the loss function via upperbounding with matrix inequalities is not tight, or maybe because the model does not incorporate biases as presented in the paper.&lt;br /&gt; We now begin discussion of our method, which has been crafted after studying these existing methods and taking into account the potential disbenefits of asymmetry.&lt;/p&gt; &lt;h3 id=&quot;extension-of-agd-to-regularized-losses&quot;&gt;Extension of AGD to regularized losses&lt;/h3&gt; &lt;p&gt;We found the idea of AGD to be very nice, and in an attempt to understand it better, decided to explore one of the further directions listed in the paper: applying the method to regularized losses. The work in the paper applies to losses of the form $\frac{1}{N}\sum_{(x, y)}l(f_w(x), y)$. However, a more general loss includes a regularization term: \(\mathcal{L}(w)=\frac{1}{N}\sum_{(x, y)}l(f_w(x), y)+\lambda R(w)\) where $R(w)$ is a regularization term. For our purposes, we assume $l$ to be the squared-loss and $R(w)$ to be the $L_2$ norm of $w$. We shorthand $\frac{1}{N}\sum_{(x, y)}l(f_w(x), y)$ to $\hat{l}$. Below, we derive the learning rate, in the context of AGD (i.e. with the spectrally normalized weights and same form of update), for this regularized loss:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/reg-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/reg-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/reg-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/reg.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We have omitted a lot of intermediary steps involving matrix inequalities and derivatives - see the paper on AGD if you are interested in the details! We remark that this choise of $\eta$ depends on $\lambda$, so hyperparameter tuning is still necessary. Some dependence on the architecture shows up in $\eta$, namely $\Tilde{d}$. However, as the network scales this parameter can stay constant. We are interested in how this will perform in practice - check the blog for updates on this!&lt;/p&gt; &lt;h2 id=&quot;introducing-asymmetric-nature&quot;&gt;Introducing Asymmetric Nature&lt;/h2&gt; &lt;p&gt;Our initial experiment involved a two-layer neural network (width: 1024) trained on the MNIST Dataset using three distinct learning algorithms: i) AGD (gain = 1), ii) Default Adam, and iii) Adam with diverse hyperparameters for both layers. The graph below showcases the resulting loss and accuracy. The first graph showcase loss while the second one showcase accuracy.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Given MNIST’s high accuracy even with minimal epochs, the distinction wasn’t apparent. Notably, while the asymmetric algorithm matched or outperformed default Adam, fine-tuning Adam’s hyperparameters yielded superior performance.&lt;/p&gt; &lt;p&gt;Inspired by AGD’s removal of the learning rate hyperparameter, we crafted two AGD variations for comparison with SGD and the original AGD.&lt;/p&gt; &lt;h3 id=&quot;variation-1&quot;&gt;Variation 1&lt;/h3&gt; &lt;p&gt;This variation incorporated momentum into AGD, integrating AGD’s learning rate and gradient summary with momentum’s past and current gradients. Surprisingly, this had minimal impact, indicating the optimality of gradient summary and learning rate.&lt;/p&gt; &lt;h3 id=&quot;variation-2&quot;&gt;Variation 2&lt;/h3&gt; &lt;p&gt;Here, instead of typical momentum, we introduced layer-wise asymmetry, acknowledging each layer’s varying impact on loss. Adjusting each layer’s learning rate inversely proportional to its number resulted in notable performance differences!&lt;/p&gt; &lt;p&gt;Results from training under these algorithms using the cifar-10 Dataset and MSE Loss are depicted in the subsequent diagram.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;evaluation-metrics&quot;&gt;Evaluation Metrics&lt;/h3&gt; &lt;p&gt;Emphasizing learnability, we adopt the ordering concept over exact measures. Algorithm $A_1$ is deemed superior to $A_2$ if its expected learning ability (distinguishing correct/incorrect classifications) surpasses $A_2$. This learning ability, resembling a Beta distribution, hinges on directly propotional to current accuracy. Therefore, we made our evaluation on accuracy and loss graph over epochs.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Our blog offers insights into optimizing neural networks and advocates for the potential benefits of asymmetry in training processes. We trust you found our journey as engaging as we did in developing it!&lt;/p&gt; &lt;h2 id=&quot;citations&quot;&gt;Citations&lt;/h2&gt; &lt;p&gt;Ng, Andrew Y. ”Feature selection, L 1 vs. L 2 regularization, and rotational invariance.” Proceedings of the twenty-first international conference on Machine learning. 2004.&lt;/p&gt; &lt;p&gt;Bernstein, Jeremy, et al. ”Automatic Gradient Descent: Deep Learning without Hyperparameters.” arXiv preprint arXiv:2304.05187 (2023).&lt;/p&gt; &lt;p&gt;Bernstein, Jeremy, et al. ”Automatic Gradient Descent: Deep Learning without Hyperparameters.” arXiv preprint arXiv:2304.05187 (2023).&lt;/p&gt; &lt;p&gt;Kingma, Diederik P., and Jimmy Ba. ”Adam: A method for stochastic optimization.” arXiv preprint arXiv:1412.6980 (2014).&lt;/p&gt; &lt;p&gt;Philipp, George, Dawn Song, and Jaime G. Carbonell. ”The exploding gradient problem demystified- definition, prevalence, impact, origin, tradeoffs, and solutions.” arXiv preprint arXiv:1712.05577 (2017).&lt;/p&gt; </content> </entry> <entry> <title>Can CNN learn shapes?</title> <link href="https://deep-learning-mit.github.io/blog/2023/how-cnns-learn-shapes/"/> <updated>2023-11-09T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/how-cnns-learn-shapes</id> <content type="html">&lt;h2 id=&quot;background&quot;&gt;Background&lt;/h2&gt; &lt;p&gt;One widely accepted intuition is that Convolutional Neural Networks (CNNs) that are trained for object classification, combine low-level features (e.g. edges) to gradually learn more complex and abstracted patterns that are useful in differentiating images. Stemming from this is the idea that neural networks can understand and use shape information to classify objects, as humans would. Previous works have termed this explanation the shape hypothesis. As &lt;d-cite key=&quot;kriegeskorte2015deep&quot;&gt;&lt;/d-cite&gt; puts it,&lt;/p&gt; &lt;blockquote&gt; &lt;p&gt;… the network acquires complex knowledge about the kinds of shapes associated with each category. […] High-level units appear to learn representations of shapes occurring in natural images&lt;/p&gt; &lt;/blockquote&gt; &lt;p&gt;This notion also appears in other explanations, such as in &lt;d-cite key=&quot;lecun2015deep&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;blockquote&gt; &lt;p&gt;Intermediate CNN layers recognize parts of familiar objects, and subsequent layers […] detect objects as combinations of these parts.&lt;/p&gt; &lt;/blockquote&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 1. &lt;d-cite key=&quot;geirhos2018imagenet&quot;&gt;&lt;/d-cite&gt; shows that CNNs trained on ImageNet data are biased towards predicting the category corresponding to the texture rather than shape.&lt;/p&gt; &lt;p&gt;Yet it remains poorly understood how CNNs actually make their decisions, and how their recognition strategies differ from humans. Specifically, there is a major debate about the question of whether CNNs primarily rely on surface regularities of objects, or whether they are capable of exploiting the spatial arrangement of features, similar to humans. Studies have shown that the extent to which CNNs use global features ; shapes or spatial relationships of shapes, is heavily dependent on the dataset it is trained on. &lt;d-cite key=&quot;geirhos2018imagenet&quot;&gt;&lt;/d-cite&gt; shows that CNNs trained on ImageNet data are biased towards predicting the category corresponding to the texture rather than shape. &lt;d-cite key=&quot;farahat2023novel&quot;&gt;&lt;/d-cite&gt; reveal that CNNs learn spatial arrangements of features only up to a intermediate level of granularity by comparing networks trained on Sketchy dataset, composed of sketches drawn by images of animals, and the Animals dataset, images of animals.&lt;/p&gt; &lt;h2 id=&quot;motivation&quot;&gt;Motivation&lt;/h2&gt; &lt;p&gt;The question leading this project is if it is possible to steer the learning of a CNN network to use abstracted global shape features as dominant strategy in classifying images, in a similar sense that humans do. Previous works have shown that networks trained on &lt;d-cite key=&quot;geirhos2018imagenet&quot;&gt;&lt;/d-cite&gt; texture agnostic datasets, or &lt;d-cite key=&quot;farahat2023novel&quot;&gt;&lt;/d-cite&gt; abstracted sketch dataset have an increased ability to integrate global features. Extending the findings of these works, I experiment if it possible to induce the learning of CNNs to depend on global shapes by adjusting the filter size, or augmenting and curating the training data.&lt;/p&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;p&gt;In the following experiments, I train a CNN on human-generated sketch data and test with conlfict sets to determine if it has learned to integrate global features in its decision making. The objective is to push the network to learn and depend on global features (the overall shape) of the object rather than local features (direction or curvature of strokes) in classifying images. To do this, I first vary the filter sizes to see if there is an opimal sequence that enables the network to learn such features. Next I augment the data by fragmentation and by adding a false category so that the network is forced to learn to classify images even when the local information is obscured and only when global information is present. Finally, to test the ability of the models from each experiment in integrating the global feature, I design a conflict set that is different from the training data. Images in the conflict set have the global features (overall shape) that aligns with its category but the local features (strokes and corner conditions) are distorted to varying degrees.&lt;/p&gt; &lt;h3 id=&quot;training-data&quot;&gt;Training Data&lt;/h3&gt; &lt;p&gt;The first way that the model is pushed to learn global features is by training it on human generated sketch data. This is distinct from the previous works that have used stylized image data, or image data that has been turned in to line drawings in that it is more driven by the human perception. It is likely that the data is more varied because it is each drawn by a different person, but what humans perceive as distinctive features of that object category is likely to be present across instances.&lt;/p&gt; &lt;p&gt;The hypothesis is that because of the scarsity of features, and absense of other local features such as texture, the model would inevitably have to learn global features that humans commonly associate to object categories, such as shape.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 2. Example from circle and square category of &lt;a href=&quot;https://github.com/googlecreativelab/quickdraw-dataset&quot;&gt;Quick, Draw! dataset&lt;/a&gt; that are used in this project.&lt;/p&gt; &lt;p&gt;For the following experiments I use 100,000 instances each from the circle and square categories of the &lt;a href=&quot;https://github.com/googlecreativelab/quickdraw-dataset&quot;&gt;Quick, Draw! dataset&lt;/a&gt; that have been rendered into 28x28 grayscale bitmap in .npy format. The dataset is split 85% for training and 15% for validation.&lt;/p&gt; &lt;h3 id=&quot;architecture-and-training-hyperparameters&quot;&gt;Architecture and Training Hyperparameters&lt;/h3&gt; &lt;p&gt;The CNN architecture is composed of 3 convolution layers and 2 linear layers with max pooling and relu activation. The filter size of each convolution layer, marked as * is varied in the following experiments. We use cross entropy loss and accuracy is the portion of instances that were labeled correcty. Each model is trained for 20 epochs with batch size 256.&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Sequential&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;data_augmentation&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Conv2d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;64&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;padding&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;same&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;MaxPool2d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Conv2d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;64&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;padding&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;same&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;MaxPool2d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Conv2d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;128&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;256&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;padding&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;same&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;MaxPool2d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Flatten&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2304&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;512&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;ReLU&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nn&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nc&quot;&gt;Linear&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;512&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# 2 categories (circle, square) &lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;h3 id=&quot;convolutional-layer-filter-size&quot;&gt;Convolutional Layer Filter Size&lt;/h3&gt; &lt;p&gt;The hypothesis is that the size of the filters of each convolution layer affects the scale of features that the network effectively learns and integrates in its final decision making. The underlying assumption is that if the filter size gradually increases, the CNN learns global scale features and uses that as dominant stragety. I test for different combinations of size 3,5,7,9 to see if there is an optimal size filter to train a CNN network for our purpose.&lt;/p&gt; &lt;h3 id=&quot;data-augmentation---fragmentation&quot;&gt;Data Augmentation - Fragmentation&lt;/h3&gt; &lt;p&gt;I train models with augmented data of different degree of fragmentation. Lower degrees of fragmentation divide the shape into 2 fragments and with higher degree, the shape is divided into an increasing number of parts. I do this by using masks that create streaks going across the image each in the horizontal, vertical and two diagonal directions. As a result, we create circles and squares with dashed lines.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 3. Augmentations with varying degrees of fragmentation.&lt;/p&gt; &lt;p&gt;The hypothesis is that fragments of circles and squares may be similar, so as the network is trained to distinguish between two categories regardless, it has to gain an understanding of larger scale features ; how these line segments are composed. If the model successfully train on datasets that are highly fragmented, it is expected to acquire knowledge of global features. For instance, intermediate scale understanding interpretation of circles would be that the angle of line segments are gratually rotating. On the otherhand squares would have parallel line segments up to each corner where ther is a 90 degree change in the angle.&lt;/p&gt; &lt;h3 id=&quot;data-augmentation---negative-labels&quot;&gt;Data Augmentation - Negative Labels&lt;/h3&gt; &lt;p&gt;We add instances where the local features of the circle or square is preserved, but the global feature is absent and labeled them as an additional category, ‘false’. We create this augmentation by masking half or 3/4 of the existing data. The intention here is to have the model learn to only categorize shapes when their global features are present.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 4. Augmentation with addition of ‘false’ category.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;training-evaluation&quot;&gt;Training Evaluation&lt;/h3&gt; &lt;p&gt;We first want to examine if the independent variables affect the model’s training on the classification task. There is the possibility that with certain filter sizes, the model may not be able to encode enough information to differentiate circles and squares. More likely there is a possibility with the augmentations that we are using to force the CNN to learn a more difficult strategy, where the model fails to train to classify instances similar to the training set to start with. If training the model is unsuccessful, it means that CNNs under those conditions are incapable of finding any strategy to differentiate the two shape categories.&lt;/p&gt; &lt;h3 id=&quot;conflict-set-evaluation&quot;&gt;Conflict Set Evaluation&lt;/h3&gt; &lt;p&gt;To test the networks ability to employ global features we borrow the approach of &lt;d-cite key=&quot;baker2020local&quot;&gt;&lt;/d-cite&gt; that use “conflict examples”. Conflict instances have the overall shape that aligns to its label, but the local features, such as stroke or texture do not. The premise is that it is easy for humans, that primarily use global information to differentiate shapes to successfully categorize these conflict sets. Therefore, it would be a good way to test if the trained CNNs use similar differentiating strategies as humans.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 5. Three conflict sets that obscure local features to contradict the global feature and ground truth label.&lt;/p&gt; &lt;p&gt;We create three series of conflict sets for circle and squares that obscure its most distinguishing local features. The first set obscures the corner conditions - circles with one to two angular corners and squares with chamfered corners are included in this set. The second obscures line conditions - circles with angular lines and squares with curvy lines are created for this set. The third series targets the composition of strokes - instead of continuous lines, we use series of parallel lines of varying angles to form a circle or square.&lt;/p&gt; &lt;h3 id=&quot;filter-variation&quot;&gt;Filter Variation&lt;/h3&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 6. Training evalution for variations in filter size of the convolution layer.&lt;/p&gt; &lt;p&gt;For each variation in filter size, the models trained to reach over 98.5% accuracy on the validation set. Contrary to our speculation, the filter size did not largely affect the models ability to learn the classification task.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 7. Evaluation with conflict set for variations in filter size of the convolution layer.&lt;/p&gt; &lt;p&gt;Overall we observe that having a large size filter at the final layer increases the model’s performance on the conflict set as with filter sequence 337 and 339. We can speculate that having consistantly smaller size filters in the earlier layers and only increasing it at the end (337, 339) is better than gradually increaseing the size (357, 379). However, this is not true all the time as models with consistant size filters performed relavitely well (333, 555). Starting with a larger size filter (555, 557, 579 compared to 333, 337, 379) also helped in performance. However, this also came with an exception where 339 performced better than 559.&lt;/p&gt; &lt;p&gt;Overall we can see that the models have trouble classifying instances with increased degree of conflicting local features. For instance the 4th instance in set 2 obstructs all four of the perpendicular angles of a square. The 3rd and 4th instance of set 2 have the most angular ridges forming its lines and the 7th and 8th instance of set 2 have the most circluar forming its lines. From set 3, the first and second instance obstruct the gradually changing angle of strokes within the circle the most.&lt;/p&gt; &lt;h3 id=&quot;data-augmentation-variation&quot;&gt;Data Augmentation Variation&lt;/h3&gt; &lt;p&gt;Based on the results with filter variation, we choose the filter size 555 to that performed moderately well, but still has room for improvement for the next experiment with augmented training data.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 8. Training evalution for variations in augmentation of training data.&lt;/p&gt; &lt;p&gt;All models trained to reach over 98% accuracy on the validation set. As we speculated, the model had more difficulty in training with the augmentation as opposed to without. With the additional third negative category, the model was easier to train. This is evident with the divide in the plot with datasets that were augmented with the negative category to have higher evaluation values than the baseline and those that were only augmented with fragmented data were below the baseline.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 9. Evaluation with conflict set for variations in augmentation of training data.&lt;/p&gt; &lt;p&gt;The performance of models trained with augmented data on the conflict set was worse than that trained only on the original data which proves our initial hypothesis that it would be possible to enforce the network to use global features with augmented data wrong. What is interesting is how difference augmentations affect the performance. Initially, we thought that with the increased degree of fragmentation in the augmentation, the model would learn global features better, and would perform better on the conflict set. However comparison among the augmentation variations, Aug 2 showed significanly poor performance. Adding a ‘false’ category did not boost the performance either. What is interesting is that the misclassification does not include the false label. We speculate that the model has learned to look at how much of the image is occupied.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;The experiments in this project have shown that there isn’t an obvious way to steer CNN networks to learn intended scale features with filter size variation and data augmentation. While it was difficult to find a strict correlation, the variation in performance across experiments shows that the independent variables do have an affect on the information that the network encodes, and what information reaches the end of the network to determine the output. The fact that trained models were unable to generalize to the conflict set reinforces the fact that encoding global features is difficult for CNNs and it would likely resort to classifying with smaller scale features, if there are apparent differences.&lt;/p&gt; &lt;p&gt;While the project seeks to entangle factors that could affect what the CNN learns, the evaluation with conflict sets does not directly review how features are processed and learned within the network. Approaches such as visualizing the activation of each neuron or layer can be more affective in this and can reveal more about how to alter the network’s sensitivity to the global features.&lt;/p&gt; </content> </entry> <entry> <title>Quantum Circuit Optimization with Graph Neural Nets</title> <link href="https://deep-learning-mit.github.io/blog/2023/quantum-gnn/"/> <updated>2023-11-08T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/quantum-gnn</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;One of the most notable technological developments of the past century has been computing based on binary bits (0’s and 1’s). Over the past decades, however, a new approach based on the principles of quantum mechanics threatens to usurp the reigning champion. Basing the informational unit on the quantum bit, or qubit, instead of the binary bit of “classical” computing, quantum computing takes advantage of the strange phenomena of modern physics like superposition, entanglement, and quantum tunneling.&lt;/p&gt; &lt;p&gt;Leveraging these as algorithmic tools, surprising new algorithms may be created. Shor’s algorithm, based on quantum algorithms, can solve classically hard cryptographic puzzles, threatening the security of current cryptographic protocols. Additionally, quantum computers can significantly accelerate drug discovery and materials science through quantum molecular dynamics simulations. They also show great potential in Quantum Machine Learning (QML), enhancing data analysis and pattern recognition tasks that are computationally intensive for classical computers.&lt;/p&gt; &lt;p&gt;Similar to classical computers, which base their algorithms on circuits, quantum computers build their quantum algorithms on quantum circuits. However, quantum computers are still in development and are incredibly noisy. The complexity of a quantum circuit increases its susceptibility to errors. Therefore, optimizing quantum circuits to their smallest equivalent form is a crucial approach to minimize unnecessary complexity. This optimization is framed as a reinforcement learning problem, where agent actions are circuit transformations, allowing the training of RL agents to perform Quantum Circuit Optimization (QCO). Previous techniques in this domain have employed agents based on convolutional neural networks (CNN) &lt;d-cite key=&quot;fosel2021&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;My previous research has demonstrated that the inherent graphical structure of circuits make QCO based on graph neural networks (GNN) more promising than CNNs. GNNs are particularly effective for data with a graph-like structure, such as social networks, subways, and molecules. Their unique property is that the model’s structure mirrors the data’s structure, which they operate over. This adaptability sets GNNs apart from other machine learning models, like CNNs or transformers, which can actually be reduced to GNNs. This alignment makes GNNs a highly promising approach for optimizing quantum circuits, potentially leading to more efficient and error-resistant quantum computing algorithms.&lt;/p&gt; &lt;p&gt;This project extends my previous work by systematically investigating the impact of various architectural choices on the performance of GNNs in quantum circuit optimization. This is achieved through a series of experiments focusing on key variables such as the number of layers in the GNN, the implementation of positional encoding, and the types of GNN layers used.&lt;/p&gt; &lt;p&gt;Specific objectives include:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;strong&gt;Evaluating the Number of GNN Layers&lt;/strong&gt;: Investigating how the depth of GNNs influences the accuracy and efficiency of quantum circuit optimization. This involves comparing shallow networks against deeper configurations to understand the trade-offs between complexity and performance.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Exploring Positional Encoding Techniques&lt;/strong&gt;: Positional encoding plays a crucial role in GNNs by providing information about the structure and position of nodes within a graph. This project experiments with various encoding methods to determine their impact on the accuracy of quantum circuit optimization.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Assessing Different Sizes of Hidden Dimension&lt;/strong&gt;: This objective focuses on understanding the influence of the hidden dimension size within GNN layers on the performance of quantum circuit optimization. By varying the size of the hidden dimension, the project identifies the optimal balance between computational complexity and the model’s ability to capture complex relationships within the data.&lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;quantum-circuits-and-transformation-environment&quot;&gt;Quantum Circuits and Transformation Environment&lt;/h2&gt; &lt;p&gt;In order to have quantum circuit optimizers we need quantum circuits! Quantum circuits are built out of quantum gates operating on qubits. These quatum circuits implement quantum algorithms in a similar way that classical circuits implement classical algorithms. In the below example, we have a five qubit circuit. It has a variety of single qubit gates (X, Rz, and H) as well as two qubit gates (CX).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/random_before-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/random_before-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/random_before-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/random_before.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Some gates have classical analogs, like the X gate which is analogous to the classical NOT bit-flip gate. Others, like the Hadamaard (H) gate, cannot be understood with classical intuition. We can use gates like H in combination with a two qubit gate like CX two put two qubits into unique quantum states. For example, with the following circuit, we can put two qubits into a special state called “quantum entanglement”.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/entanglement-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/entanglement-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/entanglement-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/entanglement.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/heads-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/heads-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/heads-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/heads.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;These qubits have outcomes that are perfectly correlated with each other. If they are measured, they will always result in the same outcome, even if after the circuit is applied the qubits are separated an arbitrary distance. This is despite the fact that the outcome is perfectly random! Measurement will result in 0 and 1 with probability 50% each. This is like flipping two coins whose outcome you cannot predict, but which always land both heads or both tails.&lt;/p&gt; &lt;p&gt;We can write the circuit and subsequent quantum state with the following equation. The two possible resulting states (both heads or both tails) are represented in bracket notation: \(\ket{00}\) and \(\ket{11}\).&lt;/p&gt; &lt;p&gt;\begin{equation} \ket{\psi} = \text{CX} \cdot (H \otimes I) \ket{00} = \frac{1}{\sqrt{2}}(\ket{00} + \ket{11}) \end{equation}&lt;/p&gt; &lt;p&gt;However, just like classical algorithms can be written down according to different programs and circuits which do the same thing, quantum circuits can have different equivalent forms. Transitions between these equivalent forms can be written down according to a set of local rules mapping from some set of quantum gates to another.&lt;/p&gt; &lt;p&gt;In the following diagram we show the quantum transformations used for this project. They are ordered according to 1) single qubit, 2) two qubit, and 3) three qubit transformations.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/one_gate_trans-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/one_gate_trans-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/one_gate_trans-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/one_gate_trans.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/two_gate_trans-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/two_gate_trans-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/two_gate_trans-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/two_gate_trans.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/three_gate_trans-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/three_gate_trans-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/three_gate_trans-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/three_gate_trans.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;These transformations will serve as the action space for our quantum circuit environment. Notably, some of these circuit transformations involve merges or cancellations, which can be used to simplify the circuits. A quantum agent which chooses an appropriate sequence of circuit transformations can then simplify a circuit into an equivalent form with fewer gates. Therefore, the task of circuit optimization may be decomposed into a trajectory of agent steps leading between different states, where states correspond to quantum circuits which are all algorithmically equivalent.&lt;/p&gt; &lt;h2 id=&quot;proximal-policy-optimization&quot;&gt;Proximal Policy Optimization&lt;/h2&gt; &lt;p&gt;To train the GNN agent, we use the Proximal Policy Optimization (PPO) algorithm. PPO is a model-free, on-policy reinforcement learning algorithm that aims to optimize the policy of a reinforcement learning agent by iteratively updating its policy network. We train the GNN agent on n-qubit random circuits. For training the GNN-based agents for quantum circuit optimization, we use the Proximal Policy Optimization (PPO) algorithm. PPO is a deep reinforcement learning algorithm that has shown success in a variety of applications, including game playing and robotics. The algorithm updates the policy by maximizing a surrogate objective function that approximates the expected improvement in the policy, while enforcing a constraint on the maximum change in the policy. This constraint helps to prevent the policy from changing too much from one iteration to the next, which can destabilize the training process.&lt;/p&gt; &lt;p&gt;\begin{equation} L^{\text{CLIP}}(\theta) = \hat{\mathbb{E}}_t[\min(r_t(\theta))\hat{A}_t, \text{clip}(r_t(\theta), 1 - \epsilon, 1 + \epsilon)\hat{A}_t] \end{equation}&lt;/p&gt; &lt;p&gt;To train the GNN agents for quantum circuit optimization, we start by initializing the GNN weights randomly. We then use the PPO algorithm to update the weights by sampling circuits from a distribution of n-qubit random circuits, encoding them into graphs, and simulating the circuits in a custom python gym environment. For each transformation we use&lt;/p&gt; &lt;p&gt;\begin{equation} r_t = - \left(q(s_{t+1}) - q(s_{t})\right) \end{equation}&lt;/p&gt; &lt;p&gt;as the reward signal for the PPO algorithm following &lt;d-cite key=&quot;fosel2021&quot;&gt;&lt;/d-cite&gt;, where we let $q$ be a function quantifying the desirability of the circuit. The PPO algorithm then updates the GNN weights to maximize the expected reward, subject to the maximum change in the policy constraint.&lt;/p&gt; &lt;p&gt;We use \(q(s) = -\texttt{circuit_size}(s)\), such that the agent’s objective is to reduce the overall circuit size, as measured by number of gates, resulting in the reward function:&lt;/p&gt; &lt;p&gt;\begin{equation} r_t = \texttt{circuit_size}(s_{t+1}) - \texttt{circuit_size}(s_t) \end{equation}&lt;/p&gt; &lt;p&gt;The methodology for implementing the quantum circuit optimization using deep reinforcement learning and graph neural networks consists of three main components: (1) encoding the circuits as directed acyclic graphs using the DAG encoding and (2) encoding the graphs as node and edge feature tensors and training a GNN-based agent using the PPO algorithm,.&lt;/p&gt; &lt;h2 id=&quot;gnn-architecture&quot;&gt;GNN architecture&lt;/h2&gt; &lt;p&gt;The GNN architecture used is inspired by the message passing neural network (MPNN), which is a type of GNN that performs iterative message passing between nodes in the graph. The GNN architecture used for this approach consists of \(L\) layers of Residual Gated Graph ConvNets.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/gnn-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/gnn-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/gnn-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/gnn.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The GNN gets as input the graph (encoded as the three tensors shown above), the positional encoding, and a binary tensor encoding of which transformations are allowed for each node (this can be computed in \(O(\# nodes)\) time).&lt;/p&gt; &lt;p&gt;Node features and positional encoding are both mapped to a k-dimensional embedding with a linear transformation and concatenated added together, forming a vector \(h\). The edge features are also linearly mapped to some \(l\)-dimensional embedding vector \(e\).&lt;/p&gt; &lt;p&gt;After, passing through \(L\) layers, each node has a feature vector \(h’\). These features are mapped to a length \(t\) Q-vector where t=# transformations. A mask is applied so that all impossible transformations are ignored. The length \(t\) Q-vectors are concatenated together from all nodes and then outputted by the GNN. An action is selected by choosing the node/transformation which corresponds to the index of the maximum Q-value.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;After training our graph neural network agent in the quantum circuit environment using PPO, we can verify that the agent can indeed optimize circuits. We randomly sample a five qubit circuit and run our agent on the circuit for fifty steps. We see that the agent is able to successfully reduce the cirucit size from 44 gates to 30, a 14 gate reduction. Meanwhile, the standard Qiskit optimizer could only reduce the circuit to 36 gates.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/random_optimization-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/random_optimization-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/random_optimization-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/random_optimization.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Now that we have verified our learning algorithm we can successfully train a quantum circuit optimizing agent, we proceed with our study over three hyperparameters: 1) number of layers, 2) the use of positional encoding, and 3) hidden dimension. For all plots, we display the average over several runs with standard error.&lt;/p&gt; &lt;h3 id=&quot;number-of-layers&quot;&gt;Number of Layers&lt;/h3&gt; &lt;p&gt;We innvestigate how the depth of GNNs influences the accuracy and efficiency of quantum circuit optimization. This involves comparing shallow networks against deeper configurations to understand the trade-offs between complexity and performance. In order to do this we scan over the number of layers \(L\) in our GNN from 1 to 7.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/n_layers-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/n_layers-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/n_layers-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/n_layers.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We see that, generally, increasing the number of layers in the model improves performance of the model on random circuits. This is aligned with the intuition that increasing the number of layers of a GNN allows models to “see” information from further away, which can be used to make strategic decisions.&lt;/p&gt; &lt;p&gt;However, we also observe that there is some critical point in which increasing \(L\) no longer leads to better outcomes from the model. This threshold appears to occur around \(L=5\), which performs similarly to \(L=7\).&lt;/p&gt; &lt;p&gt;This could be related to a known property of GNNs, in which features of nodes which are closer together are more similar. This becomes excerterbated as the number of layers increase, smearing out information. Therefore, we expect that if we continued to increase \(L\) then model performance would degrade.&lt;/p&gt; &lt;h3 id=&quot;positional-encoding&quot;&gt;Positional Encoding&lt;/h3&gt; &lt;p&gt;Positional encoding can provide information about the structure and position of nodes within a graph. These features can often play a role in symmetry-breaking.&lt;/p&gt; &lt;p&gt;In addition to the existing features encoding gate type and wire information, we concatenate 8 normally distributed dimensions to the feature vector. We hypothesize that these random features can be used to “ID” gates that have the same gate type but are a located in different locations. We experiment with training a GNN with and without the addition of random positional encoding.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/pos_encoding-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/pos_encoding-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/pos_encoding-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/pos_encoding.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The resulting plot shows inconclusive evidence. While the random positional encoding came out on top at the end of training, the difference is not significant enough to be able to conclude that it is demonstrably better.&lt;/p&gt; &lt;h3 id=&quot;hidden-dimension&quot;&gt;Hidden Dimension&lt;/h3&gt; &lt;p&gt;The last hyperparameter we examine is the hidden dimension of the GNN layers. We scan over values 16, 32, 64, and 128. All other parameters are kept fixed.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/hidden_dim-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/hidden_dim-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-quantum-gnn/hidden_dim-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-quantum-gnn/hidden_dim.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We observe that performance tends to improve with scale. However, similarly to the “number of layers” hyperparameter, there appears to be some critical threshold after which scaling no longer appears to improve performance. From our experiments this threshold appears to be around 64.&lt;/p&gt; &lt;p&gt;It is unclear what would happen if we continued scaling past 128. For example, the performance could stay at the plateau reached at hidden dimension 64 and 128, or it could eventually get worse.&lt;/p&gt; &lt;h2 id=&quot;further-work&quot;&gt;Further Work&lt;/h2&gt; &lt;p&gt;While this work gave a first glimpse at some of the structural properties that work with GNNs for RL on quantum circuits, much remaining work remains.&lt;/p&gt; &lt;p&gt;Notacably, many of the training runs did not seem to train until plateau. To be fully confident in the results, training until plateau would be necessary for full confidence. Additionally, many of the runs were quite noisy, making it difficult to distinguish between the performance under different runs. Therefore, increasing training samples could effectively reduce standard error for better statistics.&lt;/p&gt; &lt;p&gt;Moreover, the scope of future exploration can be expanded. One of the most interesting areas of future work would be on what types of graph layers work best. While we use Residual Gated Convulation Nets, it is not clear that this is the best layer type. Other things than could be tested are other positional encoding schemes. While we experimented with random features, more standard positional encoding schemes include Laplacian and Random walk encoding.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;We find that there appears to be critical thresholds of optimal values for the hidden dimension and number of layers in GNNs. We also find no evidence that random positional encoding appears to improve performance, contrary to intuition that it would serve a useful symmetry-breaking function. While much work is left to be done, this work provides a first investigation into how performance of GNNs on QCO can be affected by various choices of hyperparameters.&lt;/p&gt; </content> </entry> <entry> <title>Structural vs Data Inductive Bias</title> <link href="https://deep-learning-mit.github.io/blog/2023/Structural_vs_Data_Inductive_Bias/"/> <updated>2023-11-08T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Structural_vs_Data_Inductive_Bias</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;h3 id=&quot;lack-of-training-data&quot;&gt;Lack of Training Data&lt;/h3&gt; &lt;p&gt;The transformative impact of vision transformer (ViT) architectures in the realm of deep learning has been profound, with their applications swiftly extending from computer vision tasks, competing with traditional neural network architectures like convolutional neural networks (CNNs). Despite their success, the intricacies of how architectural variations within ViTs influence their performance under different data conditions remain largely uncharted. Unraveling these subtleties&lt;/p&gt; &lt;h3 id=&quot;project-goal&quot;&gt;Project Goal&lt;/h3&gt; &lt;p&gt;While much research has being made to find the best choice of data augmentation or the best structural change in the model to increase performance, our project empirically compares two kinds of methods:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Data augmentation through tuning-free procedures&lt;/li&gt; &lt;li&gt;Explicit inductive bias through discrete attention masking For data augmentation, we chose a simple-to-use procedure called TrivialAugment to increase by four times the amount of training data. Here we want an easy-to-use method that could help as a benchmark for the second method.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;For explicit inductive bias, we use a general vision transformer architecture which allow us the change the number of attention heads and layers where the mask would be applied, this mask is what explicitly induce a bias in the model by forcing some layers to only learn relationship between close patches of the data.&lt;/p&gt; &lt;p&gt;Our goal with this comparison and the difference with previous works is that we want to experiment to which point one method could be better than the other by really compensating for the lack of information in the training of a vision transformer.&lt;/p&gt; &lt;p&gt;Due to computational and time limitations, we would train our model in a simple task of image classification based on CINIC-10. We also use a tiny model to be able to iterate many times through different scenarios of inductive bias. The selection of methods also reinforces these limitations but are a good starting point as many of the projects that would be lacking in training data probably are in testing phases where light tools like Google Colab are used.&lt;/p&gt; &lt;h3 id=&quot;contribution&quot;&gt;Contribution&lt;/h3&gt; &lt;p&gt;The result from this project contributes in two ways. First, it gives us a glance of how beneficial the level of proposed inductive bias in the performance of the model could be, and second, it contrasts which method, and until which point, performs better given different scenarios of initial training data available.&lt;/p&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;h3 id=&quot;data-augmentation&quot;&gt;Data Augmentation&lt;/h3&gt; &lt;p&gt;Data augmentation consists in applying certain transformations to the data in order to create new examples with the same semantic meaning as the original data. For images, data augmentation consists in spatial transformations like cropping, zooming or flipping. Although data augmentation is very popular among practitioners, previous works like &lt;d-cite key=&quot;ref1&quot;&gt;&lt;/d-cite&gt; have proven that data augmentation by itself reaches a saturation point where it is even worse than training in the new data, decreasing the performance of the model. Given our goal of comparing data augmentation with inductive bias, we expect to get a similar result in the efficacy of data augmentation while we increase the initial amount of data.&lt;/p&gt; &lt;p&gt;Data augmentation decisions can be thought because of the many options available to perform, but it is so popular that some researches are trying to make it more easy to use and computational-efficient, one example been TrivialAugment &lt;d-cite key=&quot;ref2&quot;&gt;&lt;/d-cite&gt; method where simple random augmentation can be compared to or outperform other more complex algorithms that try to find the best augmentation for the given dataset. TrivialAugment would be the procedure used in this project given it simplicity.&lt;/p&gt; &lt;h3 id=&quot;changes-in-architecture&quot;&gt;Changes in Architecture&lt;/h3&gt; &lt;p&gt;To compensate the lack of training data for vision transformers, an interesting approach from &lt;d-cite key=&quot;ref3&quot;&gt;&lt;/d-cite&gt; is to use instance discrimination techniques which adjust the loss function of the model to improve the representation of the datapoints getting high accuracy scores for datasets with only 2k samples. The model proposed is trained from scratch with few data, but its implementation and interpretability could be troublesome for small projects.&lt;/p&gt; &lt;p&gt;Othe authors in &lt;d-cite key=&quot;ref4&quot;&gt;&lt;/d-cite&gt; use a set of pre-trained models with complementary structures (Convolutional and Involutional) to help a lightweight visual transformer model called DeiT (Data-efficient image Transformer) increase its performance by getting a baseline result that is added as a token and works like an induced bias of the properties of the image. The scores from the pre-trained models give more information than the ground truth because it gives a value of likehood for every class, which is a result of the different attributes of the specific image.&lt;/p&gt; &lt;p&gt;Although these changes demonstrate that it is possible to get better performance with few data without augmentation, it is not clear how we can adjust the inductive bias produced to identify until which point it works. The usage of pre-trained models is also not desirable here because of our premise that we could be using this experiment to make decisions in new datasets and tasks.&lt;/p&gt; &lt;h3 id=&quot;explicit-inductive-bias&quot;&gt;Explicit Inductive Bias&lt;/h3&gt; &lt;p&gt;The model proposed in &lt;d-cite key=&quot;ref5&quot;&gt;&lt;/d-cite&gt; is a better example of real lack of training data overcome by introducing a different kind of attention heads. In this case, medical images tend to have the same orientation, property that is leveraged to force the attention heads to focus on axial information which normally represents the spread of tumors. Here the inductive bias is that the image has a structure where patches aligned in rows and columns are more related between them than diagonal ones.&lt;/p&gt; &lt;p&gt;Following this path, &lt;d-cite key=&quot;ref6&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;ref7&quot;&gt;&lt;/d-cite&gt; and &lt;d-cite key=&quot;ref8&quot;&gt;&lt;/d-cite&gt; try to apply the local induced bias of convolutional networks in a transformer by different methods. &lt;d-cite key=&quot;ref6&quot;&gt;&lt;/d-cite&gt; adds a new layer at the beginning of the model which acts like a local mask but with variable learnable attention levels, where the model figures out how much local attention it should apply given the proposed task. &lt;d-cite key=&quot;ref7&quot;&gt;&lt;/d-cite&gt; on the other hand add new convolutional layers in parallel to the transformer to let them capture the local information while letting the original transformer to keep the big picture of the image. Finally, in &lt;d-cite key=&quot;ref8&quot;&gt;&lt;/d-cite&gt; it is proposed a change in the initial attention layers, making them GPSA (Gated Positional Self-Attention) which learn for each patch if pay more attention to the attention product (Query * Key) or the position of the patch in the image.&lt;/p&gt; &lt;p&gt;From these works it is stated that some layers of the transformer converge to convolutional behaviors given the nature of the data used for training, but this requires a relatively big amount of data that could not be available. It is also noticed that the inductive bias is applied to the first layers of the model.&lt;/p&gt; &lt;p&gt;The model proposed in &lt;d-cite key=&quot;ref9&quot;&gt;&lt;/d-cite&gt; uses a simpler method which consists in applying a mask pattern to some of the attention heads to induce local attention bias into the model. To decide which heads and layers should be masked, it uses a soft masking approach where the model learns a scale factor between 0 and 1 which sets the level of local inductive bias that is applied to that head. The results show that it is possible to obtain good results by using more local masking in the first layers and keeping the global interaction in the last ones. This approach is also model agnostic and easy to implement, which is why it is close to the experimentation of this project.&lt;/p&gt; &lt;p&gt;The power of this masking method is also shown in &lt;d-cite key=&quot;ref10&quot;&gt;&lt;/d-cite&gt; where the mask is learned by a parallel process of pixel-wise classification, successfully increasing the performance in more complex tasks like pixel-wise segmentation.&lt;/p&gt; &lt;h2 id=&quot;methods-and-experiment&quot;&gt;Methods and Experiment&lt;/h2&gt; &lt;p&gt;To explore and compare the benefits of data augmentation versus induced bias we are running three related experiments. All experiments would be run with CINIC-10 &lt;d-cite key=&quot;ref11&quot;&gt;&lt;/d-cite&gt; dataset in Google Colab using a T4 GPU. We decided to use CINIC-10 instead of CIFAR-10 because even though it is a drop-in replacement of CIFAR-10, it is a much larger than CIFAR-10 so we can test on different number of base training samples but not so large like ImageNet that is too large/difficult to test.&lt;/p&gt; &lt;h3 id=&quot;experiment-1&quot;&gt;Experiment 1&lt;/h3&gt; &lt;p&gt;The goal of the first experiment is to get a glance of the overall differences in accuracy for the compared methods. The model used for this experiment consists of a basic visual transformer with six layers and linear positional embeddings. Each layer corresponds to a multiheaded attention layer with only two heads. The schematic of the model can be seen in figure 1.&lt;/p&gt; &lt;p&gt;Figure 1&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;By default, the attention heads in the model are fully connected to give them a global behavior, but the model can be configured to apply a local pattern mask or a sparse pattern mask to all heads in all layers.&lt;/p&gt; &lt;p&gt;Figure 2&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The model would be trained with different scenarios of initial data, in specific, with 1000, 2000, 5000, 12500 and 20000 samples. In each scenario, we would get four different models:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Baseline model: Without data augmentation and with default global attention&lt;/li&gt; &lt;li&gt;Data augmentation: With data augmentation and default global attention&lt;/li&gt; &lt;li&gt;Local attention: Without data augmentation and with local attention&lt;/li&gt; &lt;li&gt;Sparse attention: Without data augmentation and with sparse attention&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The data augmentation technique would be TrivialAugment and the metric would be accuracy on validation dataset. We set these four models trying not to mix data augmentation with changes in the induced bias, keeping the default global attention in the transformer as our baseline.&lt;/p&gt; &lt;h3 id=&quot;experiment-2&quot;&gt;Experiment 2&lt;/h3&gt; &lt;p&gt;Having experimented with the differences where all layers have the same mask, we now set experiments to play with the level of induced bias applied to the model. The goal now is to identify a relation between the level of induced bias applied to the model and their performance. For this experiment we modify our first model in the following ways:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;We increase the number of attention heads in each layer from 2 to 4&lt;/li&gt; &lt;li&gt;We set the final two layers to global attention, so the mask is not applied to them&lt;/li&gt; &lt;li&gt;We configure each head in the first four layers to be able to be hard configured as either local or global attention.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Figure 3&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;With this new model, we can create one instance for each combination of global/local head in any of the first four layers, generating a sense of “level of induced bias” based on the number and configuration of attention heads treated as local.&lt;/p&gt; &lt;p&gt;Given computational limitations, we would set only two initial data scenarios (10000 and 50000) and get 16 models for each scenario:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Baseline model: Without augmentation and with all global attention&lt;/li&gt; &lt;li&gt;Data augmentation: With data augmentation and all global attention&lt;/li&gt; &lt;li&gt;14 combinations of local heads and layers:&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Table 1&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We would analyze the differences in accuracy between different levels of induced bias in the same initial data scenario and see if we can get a selection of best performing inductive bias levels to apply them more broadly in the third experiment.&lt;/p&gt; &lt;p&gt;With this comparison we also want to capture what are the visual differences between the attention heads in the different levels of induced bias to try to explain with is doing better or worse than the baseline.&lt;/p&gt; &lt;h3 id=&quot;experiment-3&quot;&gt;Experiment 3&lt;/h3&gt; &lt;p&gt;Our final experiment consists in comparing the accuracy and the effective additional data (EAD) that each method brings when applied to different initial amounts of data. The initial data scenarios to train the models would be 1000, 5000, 10000, 20000, and 50000 samples. The comparison would be made between the data augmentation model for each scenario, versus the top 3 levels of induced bias from experiment 2.&lt;/p&gt; &lt;p&gt;The effective additional data (EAD) represents the extra amount of real data that the method is compensating, the higher the better to be considered as a successful method for solving lack of data problems. This metric is calculated by looking at which scenario of initial data would make the baseline model perform equal to the method analyzed.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;experiment-1-1&quot;&gt;Experiment 1&lt;/h3&gt; &lt;p&gt;In our initial experiment, we compared performance on four variations of model scenarios. Our baseline model uses global attention mechanism, one uses local attention mechanism, another one uses sparse attention mechanism, and the last model uses the same global attention mechanism as the first model except that data augmentation is applied during its training process. One notable callout for our initial experiment is that we took a naïve approach and designed our local and sparse attention heads to be in all six attention layers of the attention. We trained and collected the validation accuracy and training time for each model variation for different number of base training samples from 1000 to 20000. Below are the results.&lt;/p&gt; &lt;h4 id=&quot;result-and-analysis&quot;&gt;Result and Analysis&lt;/h4&gt; &lt;p&gt;Figure 4&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 5&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;There are a few notable observations to point out from the results. First, we can see that the two models using the local attention mechanism or sparse attention mechanism performed significantly worse than our baseline model that used global attention. Though we did expect this to happen since CINIC-10’s classification task intuitively requires a global context of the image, we did not foresee the performance difference to be so drastic. For example, when the base number of training data is 5000, we see that the baseline model achieves a validation accuracy of 62.5% while the local attention model achieves just 13.97% and the sparse attention model 42.64%. We observe a similar pattern across different levels of base samples. It’s also worth calling out that sparse attention models perform better than local attention models. This makes sense as sparse attention models still take into consideration the global context just not completely on all the patches. Nevertheless, the sparse attention model takes almost the amount of time to train as the baseline model, hence it does not make sense to use it in lieu of the baseline model in practice. On the flip side, we verify that data augmentation improves performance and is the most significant when number of base samples is small.&lt;/p&gt; &lt;h3 id=&quot;experiment-2-1&quot;&gt;Experiment 2&lt;/h3&gt; &lt;p&gt;Our first experiment showed that simply setting all attention layers to contain only local or sparse attention heads does not produce good performance. As we were exploring additional datasets or tasks where applying a different attention mechanism may yield better performance, we came across the paper in &lt;d-cite key=&quot;ref9&quot;&gt;&lt;/d-cite&gt;, in which it alluded to the idea that only applying local attention mechanism to just a few beginning layers of the network may improve performance even when the task intuitively requires a global context. The rationale behind it is that perhaps through using local attention mechanism, the model can create its own convolution making understanding local information better before using that information to answer a task that requires global context. With this inspiration, we designed our second experiment trying out different combinations of local attention heads and layers, as described in the Methods and Experiments section, and below are the results and analysis.&lt;/p&gt; &lt;h4 id=&quot;result-and-analysis-1&quot;&gt;Result and Analysis&lt;/h4&gt; &lt;p&gt;Figure 6&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Here, the two matrices outline the validation accuracies we got when we trained the different local attention mechanism model on 10k and 50k base training samples. A quick recap, 1 Local Head and 1 Layer means we would use 1 local attention head in the 1st layer of the transformer. The color gradient in each matrix indicates the best performing combination from best (red) to worst (green).&lt;/p&gt; &lt;p&gt;A few patterns can be noticed. First, for both matrices, models in the bottom right corner, representing a high number of local heads and in more layers, are performing worse than the rest. This aligns with our intuition from our first experiment because having more local attention heads in deeper portions of network will prevent the models from capturing global context, thus resulting in a worse performance.&lt;/p&gt; &lt;p&gt;Figure 7&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Diving further, in figure 7, we visualize the attention weights to better compare different levels of induced bias. It seems that the performance increases as we add more local heads, but it ends up fading and not capturing the important characteristics of the data. In the 50k samples scenario it can be noticed that with more local heads, the attention spots converge to small parts of the image where there is no information about the object in it.&lt;/p&gt; &lt;p&gt;Figure 8&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Moreso, in figure 8, it can be noticed that when local heads are used, it identifies correctly smaller details of the image. In this case, with all heads being global, it is hard to identify the three different cows in the middle image, but when some local heads are used, we can capture them.&lt;/p&gt; &lt;p&gt;In summary, the major result of this experiment is that some models in the 10k samples sub-experiment produced better results than the base model. This is promising and validates our hypothesis from before. Though no combination produced better results in the 50k samples sub-experiment, we showed in Figure 8 that having local attentions can still be beneficial as it is able to capture some details that the baseline model misses.&lt;/p&gt; &lt;h3 id=&quot;experiment-3-1&quot;&gt;Experiment 3&lt;/h3&gt; &lt;p&gt;From the second experiment, we were then intrigued to see how some of the better performing models do under different number of base samples than just 10k and 50k. So, we pick three combinations (2 local heads for 2 layers, 1 local head for 2 layers, 3 local heads for 1 layer) and tested their performance against the baseline model and baseline + data augmentation for different number of base training samples from 5000 to 50k. Below are the results and analysis.&lt;/p&gt; &lt;h4 id=&quot;result-and-analysis-2&quot;&gt;Result and Analysis&lt;/h4&gt; &lt;p&gt;Figure 9&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Figure 10&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Here, we can observe two very interesting trends. First it validates our hypothesis that using local attention heads early in the layers of the vision transformers can improve performance despite the fact that task intuitive requires global context. This outcome is true for all three variations of the local attention models when the number of base training samples are 1000, 5000, and 10000. However, this effect tapers off when the number of base samples is sufficiently large, and the baseline model performs better. This seems to suggest that the benefit of the inductive bias coming from the local heads no longer outweighs the lack of information of the dataset. In other words, once there is sufficient data, the baseline model has enough information to learn a better representation on its own than that of the models.&lt;/p&gt; &lt;p&gt;Figure 11&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Another perhaps more explicit and comparable way of explaining the phenomenon is to look at the Effective Extra Sample score. Essentially, the data tells us how much extra (or less) training data the change in model architecture gets us to achieve the same performance accuracy if using the baseline model. This graph clearly illustrates that data augmentation and tuning of local attention heads are very effective when the training datasets are relatively small, less than 15000 samples. This is likely because the inductive bias of the local attention heads causes the models to capture important characteristics of the image more efficiently and effectively than does the baseline model. However, once the number of base training samples gets over 20000, the effect reverses and they all perform worse than the baseline model, as illustrated by the negative effective training samples.&lt;/p&gt; &lt;p&gt;Note: We did not plot the extra effective data for the data augmentation model scenario pass 10000 base training samples as its performance dropped significantly and is behaving weirdly.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Through different experimentations, both data augmentation and induced bias by discrete attention masking can compensate for the lack of data for a given problem, but this compensation is only noticeable when the initial data is very low.&lt;/p&gt; &lt;p&gt;The maximum effective additional data that the data augmentation method creates is higher than the induced bias method, but there is a sweet spot where induced bias is better than both data augmentation and baseline model.&lt;/p&gt; &lt;p&gt;Once the initial amount of data starts to increase, data augmentation is the first one that in fact worsens the performance of the model. Induced bias on the other hand looks more stable while the initial data is increasing but is still not significantly better than the baseline model.&lt;/p&gt; &lt;p&gt;We have shown that induced bias can help identify local attributes of the image more easily than the baseline alone, but this is only leveraged when the task that we want to solve is more specific and cannot be appreciated in a general task like image classification.&lt;/p&gt; &lt;h3 id=&quot;limitations-and-next-steps&quot;&gt;Limitations and Next Steps&lt;/h3&gt; &lt;p&gt;Given the restricted resources and amount of time available to execute this project, there is enough room for continuing research on this topic:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;We tried to make the data augmentation and inductive bias methods simple and easy to play with, but they could not be the best ones. The same procedures of this project can be applied to better and more complex types of data augmentation and induced bias to see if the results are replicable in other situations.&lt;/li&gt; &lt;li&gt;Further experimentation could be done with datasets with multiple tasks and a deeper model to see if the type of task has an impact of the effectiveness of one method or the other. This could also be applied in recent real-world problems where there is not enough data yet, but we can clearly identify the relying relationship between patches of the images.&lt;/li&gt; &lt;li&gt;Given a deeper model and a lot more experimentation in the level of inductive bias, there is an opportunity to empirically try to make a regression between how much inductive bias is applied to the model vs the resulting change in performance. The results of this project are not enough to implement such relations.&lt;/li&gt; &lt;/ul&gt; </content> </entry> <entry> <title>From Scroll to Misbelief - Modeling the Unobservable Susceptibility to Misinformation on Social Media</title> <link href="https://deep-learning-mit.github.io/blog/2023/suscep/"/> <updated>2023-11-08T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/suscep</id> <content type="html">&lt;h2 id=&quot;abstract&quot;&gt;Abstract&lt;/h2&gt; &lt;p&gt;Susceptibility to misinformation describes the extent to believe false claims, which is hidden in people’s mental process and infeasible to observe. Existing susceptibility studies heavily rely on the crowdsourced self-reported belief level, making the downstream research homogeneous and unscalable. To relieve these limitations, we propose a computational model that infers users’ susceptibility levels given their reposting behaviors. We utilize the supervision from the observable sharing behavior, incorporating a user’s susceptibility level as a key input for the reposting prediction task. Utilizing the capability of large-scale susceptibility labeling, we could also perform a comprehensive analysis of psychological factors and susceptibility levels across professional and geographical communities. Hopefully, we could observe that susceptibility is influenced by complicated factors, demonstrating a degree of correlation with economic development around the world, and with political leanings in the U.S.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-suscep/suscep_model-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-suscep/suscep_model-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-suscep/suscep_model-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-suscep/suscep_model.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; data-zoomable=&quot;&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figcaption&gt;&lt;strong&gt;Illustration of the Susceptibility Modeling.&lt;/strong&gt; We formulate the model to predict whether a given user will retweet a specific misinformation tweet. We utilize a shallow neural network to predict the susceptibility score, and together with the dot product of the user and tweet embeddings to predict retweet behavior. Our model is trained using two loss functions: binary classification entropy and triplet loss.&lt;/figcaption&gt; &lt;/figure&gt; &lt;/div&gt; &lt;p&gt;False claims spread on social media platforms, such as conspiracy theories, fake news, and unreliable health information, mislead people’s judgment, promote societal polarization, and decrease protective behavior intentions&lt;d-cite key=&quot;pennycook2021psychology&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;Thier2021HealthMisinformation&quot;&gt;&lt;/d-cite&gt;. The harm is especially significant in various contentious events including elections, religious persecution, and the global response to the COVID-19 pandemic&lt;d-cite key=&quot;Ecker2022PsychologicalDriversMisinformation&quot;&gt;&lt;/d-cite&gt;. Many works have investigated the &lt;strong&gt;observable&lt;/strong&gt; behavior of information propagation such as where the information propagates&lt;d-cite key=&quot;Taylor2023WhereDoesYour&quot;&gt;&lt;/d-cite&gt;, how people share it&lt;d-cite key=&quot;Yang2021COVID19InfodemicTwitter&quot;&gt;&lt;/d-cite&gt;, and what people discuss about it&lt;d-cite key=&quot;Gupta2023PolarisedSocialMedia&quot;&gt;&lt;/d-cite&gt;. However, it is still crucial but challenging to understand the &lt;strong&gt;unobservable&lt;/strong&gt; mental and cognitive processes when individuals believe misinformation&lt;d-cite key=&quot;Ecker2022PsychologicalDriversMisinformation&quot;&gt;&lt;/d-cite&gt;. Users’ susceptibility (i.e., the likelihood of individuals believing misinformation) plays a pivotal role in this context. If a person is more susceptible to misinformation, they are not only more likely to share false claims but also more prone to being misled by them&lt;d-cite key=&quot;Scherer2020WhoSusceptibleOnline&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Existing works have investigated the psychological, demographic, and other factors that may contribute to the high susceptibility of a population&lt;d-cite key=&quot;Brashier2020AgingEraFake&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;Pennycook2020WhoFallsFake&quot;&gt;&lt;/d-cite&gt;. However, previous susceptibility studies heavily rely on self-reported belief towards false claims collected from questionnaire-based participant survey&lt;d-cite key=&quot;Escola-Gascon2021CriticalThinkingPredicts&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;Rosenzweig2021HappinessSurpriseAre&quot;&gt;&lt;/d-cite&gt;, which presents several limitations. For instance, different participants might interpret the belief levels in different ways. Moreover, the data collection process is labor-heavy and thus limits the scale of downstream research on size, scope, and diversity of the target population&lt;d-cite key=&quot;nan2022people&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;The unobservance of people’s beliefs makes it infeasible to model susceptibility directly. Luckily, existing psychological literature bridges unobservable beliefs and observable behaviors, showing that the sharing behavior is largely influenced by whether users believe the misinformation&lt;d-cite key=&quot;Altay2022WhyFewPeople&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;Atske2019ManyAmericansSay&quot;&gt;&lt;/d-cite&gt;, the attributes of the sharing content&lt;d-cite key=&quot;pennycook2021psychology&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;van2022misinformation&quot;&gt;&lt;/d-cite&gt;, and users’ internal mental motives&lt;d-cite key=&quot;Brady2020MADModelMoral&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;Islam2020MisinformationSharingSocial&quot;&gt;&lt;/d-cite&gt;. Motivated by these prior works, we formulate the relationship between believing and sharing described in social science literature into a machine learning task.&lt;/p&gt; &lt;p&gt;Concretely, we propose to infer people’s susceptibility level given their re/posting behaviors. To parameterize the model, we wrap the susceptibility level as input for the prediction model of the observable reposting behavior. We perform multi-task learning to simultaneously learn to classify whether a user would share a post, and rank susceptibility scores among similar and dissimilar users when the same content is seen. Note that our model does not aim to predict any ground-truth susceptibility for individuals. Instead, we use users’ reposting behaviors towards misinformation as a proxy for their susceptibility level for better interpretability. Our model design enables unobservable modeling with supervision signals for observable behavior, unlocks the scales of misinformation-related studies, and provides a novel perspective to reveal the users’ belief patterns.&lt;/p&gt; &lt;p&gt;We conduct comprehensive evaluations to validate the proposed susceptibility measurement and find that the estimations from our model are highly aligned with human judgment. Building upon such large-scale susceptibility labeling, we further conduct a set analysis of how different social factors relate to susceptibility. We find that political leanings and psychological factors are associated with susceptibility in varying degrees. Moreover, our analysis based on these inferred susceptibility scores corroborates the findings of previous studies based on self-reported beliefs, e.g., stronger analytical thinking is an indicator of low susceptibility. The results of our analysis extend findings in existing literature in a significant way. For example, we demonstrate that susceptibility distribution in the U.S. exhibits a certain degree of correlation with political leanings.&lt;/p&gt; &lt;p&gt;To sum up, our contributions are:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;We propose a computational model to infer people’s susceptibility towards misinformation in the context of COVID-19, by modeling unobservable latent susceptibility through observable sharing activities.&lt;/li&gt; &lt;li&gt;Evaluation shows that our model effectively models unobservable belief, and the predictions highly correlate with human judgment.&lt;/li&gt; &lt;li&gt;We conduct a large-scale analysis to uncover the underlying factors contributing to susceptibility across a diverse user population from various professional fields and geographical regions, presenting important implications for related social science studies.&lt;/li&gt; &lt;/ol&gt; &lt;hr /&gt; &lt;h2 id=&quot;computational-susceptibility-modeling&quot;&gt;Computational Susceptibility Modeling&lt;/h2&gt; &lt;h3 id=&quot;modeling-unobservable-susceptibility&quot;&gt;Modeling Unobservable Susceptibility&lt;/h3&gt; &lt;p&gt;Inspired by the existing studies indicating that believing is an essential driver for dissemination, we propose to model susceptibility, which reflects users’ beliefs, as a driver for the sharing behavior, while considering characteristics of the sharing content and user profile.&lt;/p&gt; &lt;p&gt;We propose a computational model to infer a user’s unobservable susceptibility score based on their historical activities as shown in the model figure, and further train the model with signals from the observable disseminating behavior. We construct approximate contrastive user-post pairs as the training data (&lt;a href=&quot;#dataset-and-experiment-setup&quot;&gt;Dataset and Experiment Setup&lt;/a&gt;).&lt;/p&gt; &lt;p&gt;This design would allow us to explore the best parameters for the computational model of an unobservable and data-hungry susceptibility variable using the rich data resources available on social media platforms.&lt;/p&gt; &lt;h4 id=&quot;content-sensitive-susceptibility&quot;&gt;Content-Sensitive Susceptibility&lt;/h4&gt; &lt;p&gt;We compute the user’s susceptibility when a particular piece of misinformation $p$ is perceived (i.e. $s_{u, p}$). This allows us to account for the fact that an individual’s susceptibility can vary across different content, influenced by factors such as topics and linguistic styles. By focusing on the susceptibility to specific pieces of misinformation, we aim to create a more nuanced, fine-grained, and accurate representation of how users interact with and react to different COVID-19 misinformation.&lt;/p&gt; &lt;h4 id=&quot;user-and-misinfo-post-embeddings&quot;&gt;User and Misinfo Post Embeddings&lt;/h4&gt; &lt;p&gt;As a component of the computational model, we use SBERT developed upon RoBERTa-large to produce a fixed-sized vector to represent the semantic information contained in the posts and user profiles. We consider the misinformation post as a sentence and produce its representation with SBERT. For the user profile, we calculate the average of sentence representations for the user’s recent original posts. More specifically, for every user-post pair $(u, p)$, we gather the historical posts written by user $u$ within a 10-day window preceding the creation time of the misinformation post $p$, to learn a representation of user $u$ at that specific time.&lt;/p&gt; &lt;h4 id=&quot;computational-model-for-susceptibility&quot;&gt;Computational Model for Susceptibility&lt;/h4&gt; &lt;p&gt;Given the input of the user profile for the user $u$ and the content for misinformation post $p$, the susceptibility computational model is expected to produce the &lt;em&gt;susceptibility score&lt;/em&gt; $s_{u, p}$ as shown below, reflecting the susceptibility of $u$ when $p$ is perceived.&lt;/p&gt; \[s_{u, p} = suscep(E(u), E(p))\] &lt;p&gt;We first obtain the embeddings $E(p)$ and $E(u)$ for post $p$ and user profile $u$, where $u$ is represented by the user’s historical tweets and $E$ is the frozen SBERT sentence embedding function. The susceptibility score is calculated by the function $suscep$, which is implemented as a multi-layer neural network, taking the concatenation of the user and post embeddings as inputs. In the training phase, we keep the sentence embedder frozen and learn the weights for the $suscep$ function that could be used to produce reasonable susceptibility scores. We expect to produce susceptibility scores for novel $u$ and $p$ pairs using the learned $suscep$ function during inference. Additionally, we normalize the resulting susceptibility scores into the -100 to 100 range for better interpretability.&lt;/p&gt; &lt;h3 id=&quot;training-with-supervision-from-observable-behavior&quot;&gt;Training with Supervision from Observable Behavior&lt;/h3&gt; &lt;p&gt;Susceptibility is not easily observable, thus it is infeasible to apply supervision on $s_{u, p}$ directly as only the user $u$ themselves know their belief towards content $p$. Thus, we propose to utilize the supervision signal for sharing a piece of misinformation, which is an observable behavior. We consider susceptibility as an essential factor of sharing behavior and use the susceptibility computational model’s output to predict the repost behavior.&lt;/p&gt; &lt;p&gt;To produce the probability for user $u$ to share post $p$, we calculate the dot product of the embeddings of the user profile and post content and consider the susceptibility score for the same pair of $u$ and $p$ as a weight factor, and passing the result through a sigmoid function, as illustrated in the model figure.&lt;/p&gt; \[p_{\text{rp}} = \sigma \left( E(u) \cdot E(p) \cdot s_{u, p} \right)\] &lt;p&gt;Note that we do not directly employ the \textit{susceptibility score} to compute the probability of sharing because the sharing behavior depends not only on the susceptibility level but also on other potential confounding factors. It is possible that a user possesses a notably high susceptibility score for a piece of misinformation yet chooses not to repost it. Hence, we incorporate a dot product of the user and post embedding in our model \dkvtwo{involve the misinformation post content and user profiles into the consideration of predicting the sharing behavior}.&lt;/p&gt; &lt;p&gt;\(\begin{align} \mathcal{L}_{\text{bce}}(u_i, t) &amp;amp;= -\left( y_i \log(p_{\text{rt}}(u_i, t)) + (1 - y_i) \log(1 - p_{\text{rt}}(u_i, t)) \right) \nonumber \\ \mathcal{L}_{\text{triplet}}(u_a, u_s, u_{ds}, t) &amp;amp;= \text{ReLU}\left(\Vert s_{u_{a},t} - s_{u_{s},t}\Vert_2^2 - \Vert s_{u_{a},t} - s_{u_{ds},t} \Vert_2^2 + \alpha \right) \nonumber \\ \mathcal{L}(u_a, u_s, u_{ds}, t) &amp;amp;= \frac{\lambda}{3} \sum_{i \in \{a, s, ds\}} \mathcal{L}_{\text{bce}}(u_i, t) + (1 - \lambda) \mathcal{L}_{\text{triplet}}(u_a, u_s, u_{ds}, t) \nonumber \label{eq:loss} \end{align}\)&lt;/p&gt; &lt;h4 id=&quot;objectives&quot;&gt;Objectives&lt;/h4&gt; &lt;p&gt;We perform multi-task learning to utilize different supervision signals. We first consider a binary classification task of predicting repost or not with a cross-entropy loss. Additionally, we perform the triplet ranking task&lt;d-cite key=&quot;chen2009ranking&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;hoffer2015deep&quot;&gt;&lt;/d-cite&gt; to distinguish the subtle differences among the susceptibility scores of multiple users when the same false content is present.&lt;/p&gt; &lt;p&gt;During each forward pass, our model is provided with three user-post pairs: the anchor pair $(u_a, p)$, the similar pair $(u_s, p)$, and the dissimilar pair $(u_{ds}, p)$. We determine the similar user $u_s$ as the user who reposted $p$ if and only if user $u_a$ reposted $p$. Conversely, the dissimilar user $u_{ds}$ is determined by reversing this relationship. When multiple potential candidate users exist for either $u_s$ or $u_{ds}$, we randomly select one. However, if there are no suitable candidate users available, we randomly sample one user from the positive (for “reposted” cases) or negative examples (for “did not repost” cases) and pair this randomly chosen user with this misinformation post $p$.&lt;/p&gt; &lt;p&gt;Here, we elaborate on the definition of our loss function. Here, $y_i$ takes the value of 1 if and only if user $u_i$ reposted misinformation post $p$. The parameter $\alpha$ corresponds to the margin employed in the triplet loss, serving as a hyperparameter that determines the minimum distance difference needed between the anchor and the similar or dissimilar sample for the loss to equal zero. Additionally, we introduce the control hyperparameter $\lambda$, which governs the weighting of the binary cross-entropy and triplet loss components.&lt;/p&gt; &lt;h2 id=&quot;dataset-and-experiment-setup&quot;&gt;Dataset and Experiment Setup&lt;/h2&gt; &lt;p&gt;We use Twitter data because it hosts an extensive and diverse collection of users, the accessibility of its data, and its popularity for computational social science research&lt;d-cite key=&quot;zhang2015ideology&quot;&gt;&lt;/d-cite&gt;. Additionally, it provides users’ free-text personal and emotional expression with crucial metadata, including timestamps and location data, which are useful for our subsequent analytical endeavors.&lt;/p&gt; &lt;h4 id=&quot;misinformation-tweets&quot;&gt;Misinformation Tweets&lt;/h4&gt; &lt;p&gt;We consider two misinformation tweet datasets: the ANTi-Vax dataset &lt;d-cite key=&quot;hayawi2022anti&quot;&gt;&lt;/d-cite&gt; was collected and annotated specifically for COVID-19 vaccine misinformation tweets. On the other hand, CoAID &lt;d-cite key=&quot;cui2020coaid&quot;&gt;&lt;/d-cite&gt; encompasses a broader range of misinformation related to COVID-19 healthcare, including fake news on websites and social platforms. The former dataset contains 3,775 instances of misinformation tweets, while the latter contains 10,443.&lt;/p&gt; &lt;p&gt;However, a substantial number of tweets within these two datasets do not have any retweets. Consequently, we choose to retain only those misinformation tweets that have been retweeted by valid users. Finally, we have collected a total of 1,271 misinformation tweets for our study.&lt;/p&gt; &lt;h4 id=&quot;positive-examples&quot;&gt;Positive Examples&lt;/h4&gt; &lt;p&gt;We define the positive examples for modeling as $(u_{pos}, t)$ pairs, where user $u_{pos}$ viewed and retweeted the misinformation tweet $t$. We obtained all retweeters for each misinformation tweet through the Twitter API.&lt;/p&gt; &lt;h4 id=&quot;negative-examples&quot;&gt;Negative Examples&lt;/h4&gt; &lt;p&gt;Regarding negative examples, we define them as $(u_{neg}, t)$ pairs where user $u_{neg}$ viewed but did not retweet misinformation post $t$. However, obtaining these negative examples poses a substantial challenge, because the Twitter API does not provide information on the “being viewed” activities of a specific tweet. To tackle this issue, we infer potential users $u_{neg}$ that highly likely viewed a given tweet $t$ following the heuristics: 1) $u_{neg}$ should be a follower of the author of the misinformation tweet $t$, 2) $u_{neg}$ should not retweet $t$, and 3) $u_{neg}$ was active on Twitter within 10 days before and 2 days after the timestamp of $t$.&lt;/p&gt; &lt;p&gt;We have collected a total of 3,811 positive examples and 3,847 negative examples, resulting in a dataset comprising 7,658 user-post pairs in total. We divide the dataset into three subsets with an 80% - 10% - 10% split for train, validation, and test purposes, respectively. The detailed statistics of the collected data are illustrated in the table below.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;Total&lt;/th&gt; &lt;th&gt;Positive&lt;/th&gt; &lt;th&gt;Negative&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;# Example&lt;/td&gt; &lt;td&gt;7658&lt;/td&gt; &lt;td&gt;3811&lt;/td&gt; &lt;td&gt;3847&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;# User&lt;/td&gt; &lt;td&gt;6908&lt;/td&gt; &lt;td&gt;3669&lt;/td&gt; &lt;td&gt;3255&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;# Misinfo tweet&lt;/td&gt; &lt;td&gt;1271&lt;/td&gt; &lt;td&gt;787&lt;/td&gt; &lt;td&gt;1028&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h2 id=&quot;evaluation&quot;&gt;Evaluation&lt;/h2&gt; &lt;p&gt;In this section, we demonstrate the effectiveness of our susceptibility modeling by directly comparing our estimations with human judgment and indirectly evaluating its performance for predicting sharing behavior.&lt;/p&gt; &lt;h3 id=&quot;validation-with-human-judgement&quot;&gt;Validation with Human Judgement&lt;/h3&gt; &lt;p&gt;Due to the abstract nature of susceptibility and the lack of concrete ground truth, we face challenges in directly evaluating our susceptibility modeling. We use human evaluations to validate the effectiveness of our inferred susceptibility. Given the subjectivity inherent in the concept of susceptibility, and to mitigate potential issues arising from variations in individual evaluation scales, we opt not to request humans to annotate a user’s susceptibility directly. Instead, we structure the human evaluation as presenting human evaluators with pairs of users along with their historical tweets and requesting them to determine which user appears more susceptible to overall COVID-19 misinformation.&lt;/p&gt; &lt;p&gt;Subsequently, we compared the predictions made by our model with the human-annotated predictions. To obtain predictions from our model, we compute each user’s susceptibility to overall COVID-19 misinformation by averaging their susceptibility scores to each COVID-19 misinformation tweet in our dataset. As presented in the table below, our model achieves an average agreement of 73.06% with human predictions, indicating a solid alignment with the annotations provided by human evaluators. Additionally, we consider a baseline that directly calculates susceptibility scores as the cosine similarity between the user and misinformation tweet embeddings. Compared to this baseline, our susceptibility modeling brings a 10.06% improvement. Moreover, we compare the performance with ChatGPT prompting with the task description of the susceptibility level comparison setting as instruction in a zero-shot manner. We observe that our model also outperforms predictions made by ChatGPT. The results from the human judgment validate the effectiveness of our susceptibility modeling and its capability to reliably assess user susceptibility to COVID-19 misinformation.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt; &lt;/th&gt; &lt;th&gt;Our&lt;/th&gt; &lt;th&gt;Baseline&lt;/th&gt; &lt;th&gt;ChatGPT&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Agreement&lt;/td&gt; &lt;td&gt;73.06±8.19&lt;/td&gt; &lt;td&gt;63.00±9.07&lt;/td&gt; &lt;td&gt;64.85±9.02&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h3 id=&quot;susceptibility-score-distribution&quot;&gt;Susceptibility Score Distribution&lt;/h3&gt; &lt;p&gt;We provide a visualization of the distribution of susceptibility scores within positive and negative examples produced by our model on the training data. As depicted below, there is a notable disparity in the distribution between positive and negative examples, verifying our assumption that believing is an essential driver for sharing behavior. The difference in the means of the positive and negative groups is statistically significant, with a p-value of less than 0.001.&lt;/p&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-suscep/pos_neg_distribution-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-suscep/pos_neg_distribution-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-suscep/pos_neg_distribution-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-suscep/pos_neg_distribution.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; data-zoomable=&quot;&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figcaption&gt;&lt;strong&gt;Susceptibility Score Distribution&lt;/strong&gt; among positive and negative user-tweet samples. The distribution of positive (red) and negative (blue) examples exhibits a clear disparity.&lt;/figcaption&gt; &lt;/figure&gt; &lt;/div&gt; &lt;h3 id=&quot;sharing-behavior-prediction&quot;&gt;Sharing Behavior Prediction&lt;/h3&gt; &lt;p&gt;Furthermore, holding a belief is highly likely to result in subsequent sharing behavior. We demonstrated that our trained model possesses a strong ability for sharing behavior prediction. When tested on the held-out test dataset, our model achieves a test accuracy of 78.11% and an F1 score of 77.93. These results indirectly demonstrate the reliable performance of our model for susceptibility modeling.&lt;/p&gt; &lt;h2 id=&quot;analysis&quot;&gt;Analysis&lt;/h2&gt; &lt;p&gt;In this section, we show the potential of our inferred susceptibility scores in expanding the scope of susceptibility research. Our analysis not only aligns with the findings of previous survey-based studies but also goes a step further by extending and enriching their conclusions.&lt;/p&gt; &lt;h3 id=&quot;correlation-with-psychological-factors&quot;&gt;Correlation with Psychological Factors&lt;/h3&gt; &lt;p&gt;Previous research on human susceptibility to health and COVID-19 misinformation has been primarily based on questionnaire surveys &lt;d-cite key=&quot;nan2022people&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;van2022misinformation&quot;&gt;&lt;/d-cite&gt;. These studies have identified several psychological factors that influence individuals’ susceptibility to misinformation. For instance, analytical thinking (as opposed to intuitive thinking), trust in science, and positive emotions have been linked to a greater resistance to health misinformation. Conversely, susceptibility to health misinformation is associated with factors such as conspiracy thinking, religiosity, conservative ideology, and negative emotions. In this part, we analyze the correlation coefficients between our modeled susceptibility scores and the aforementioned factors to determine if our results align with previous research findings.&lt;/p&gt; &lt;p&gt;To achieve this, we compute factor scores for each user in our dataset based on their historical tweets using LIWC Analysis. We calculate the average value across all the user’s historical tweets as the final factor score. However, for emotional factors such as anxiety and anger with less frequent appearance, we opt for the maximum value instead to more effectively capture these emotions. We primarily consider the following factors: &lt;em&gt;Analytic Thinking&lt;/em&gt;, Emotions (&lt;em&gt;Positive&lt;/em&gt; emotions, &lt;em&gt;Anxious&lt;/em&gt;, &lt;em&gt;Angery&lt;/em&gt; and &lt;em&gt;Sad&lt;/em&gt;), &lt;em&gt;Swear&lt;/em&gt;, &lt;em&gt;Political Leaning&lt;/em&gt;, &lt;em&gt;Ethnicity&lt;/em&gt;, &lt;em&gt;Technology&lt;/em&gt;, &lt;em&gt;Religiosity&lt;/em&gt;, &lt;em&gt;Illness&lt;/em&gt; and &lt;em&gt;Wellness&lt;/em&gt;. These factors have been extensively studied in previous works and can be inferred from a user’s historical tweets. We calculate and plot the Pearson correlation coefficients between each factor and the susceptibility predicted by our model in the following table.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Factors&lt;/th&gt; &lt;th&gt;Coeff.&lt;/th&gt; &lt;th&gt;Factors&lt;/th&gt; &lt;th&gt;Coeff.&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Analytic Thinking&lt;/td&gt; &lt;td&gt;-0.31&lt;/td&gt; &lt;td&gt;Emotion - Positive&lt;/td&gt; &lt;td&gt;-0.08&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Political Leaning&lt;/td&gt; &lt;td&gt;0.13&lt;/td&gt; &lt;td&gt;Emotion - Anxious&lt;/td&gt; &lt;td&gt;0.08&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Ethnicity&lt;/td&gt; &lt;td&gt;0.09&lt;/td&gt; &lt;td&gt;Emotion - Angry&lt;/td&gt; &lt;td&gt;0.16&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Religiosity&lt;/td&gt; &lt;td&gt;0.10&lt;/td&gt; &lt;td&gt;Emotion - Sad&lt;/td&gt; &lt;td&gt;0.14&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Technology&lt;/td&gt; &lt;td&gt;-0.09&lt;/td&gt; &lt;td&gt;Swear&lt;/td&gt; &lt;td&gt;0.18&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Illness&lt;/td&gt; &lt;td&gt;0.09&lt;/td&gt; &lt;td&gt;Wellness&lt;/td&gt; &lt;td&gt;-0.02&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;According to our analysis, correlations are consistent with previous social science studies based on surveys on health susceptibility. For instance, &lt;em&gt;Analytic Thinking&lt;/em&gt; is a strong indicator of low susceptibility, with a correlation coefficient of -0.31. Conversely, certain features such as &lt;em&gt;Swear&lt;/em&gt;, &lt;em&gt;Political Leaning&lt;/em&gt; and &lt;em&gt;Angry&lt;/em&gt; exhibit a weak correlation with a high susceptibility score. These results not only corroborate the conclusions drawn from previous survey-based studies &lt;d-cite key=&quot;van2022misinformation&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;nan2022people&quot;&gt;&lt;/d-cite&gt; but also provide further validation for the effectiveness of our computational modeling for susceptibility.&lt;/p&gt; &lt;h3 id=&quot;geographical-community-differences&quot;&gt;Geographical Community Differences&lt;/h3&gt; &lt;p&gt;We delve into the geographical distribution of susceptibility. Given the significant imbalance in the number of users from different U.S. states, we calculate the average susceptibility scores for each state using Bayesian smoothing. We use the overall mean susceptibility score and overall standard deviation as our priors and the more the users in the group, the less the overall mean will affect the group’s score.&lt;/p&gt; &lt;p&gt;We explore the susceptibility distribution among different U.S. states, considering the influence of political ideology associated with different states &lt;d-cite key=&quot;gelman2009red&quot;&gt;&lt;/d-cite&gt;. Out of the 100,000 users sampled from around the world, 25,653 users are from U.S. states with more than 200 users for each state. As illustrated in the figure below, the susceptibility distribution across U.S. states is imbalanced and exhibits a certain degree of correlation with political leanings, where generally, states known to have a more conservative population tend to have relatively higher susceptibility scores, while states that are considered more liberal have lower scores. Specifically, the average susceptibility score for users in blue or red states is -3.66 and -2.82 respectively. Red or blue states refer to US states whose voters vote predominantly for the Republican or Democratic Party. We determine blue/red states according to the 2020 presidential election result. We observe that 60% of the ten states with the highest susceptibility scores are red states, and 90% of the ten states with the lowest susceptibility scores are blue states. This is a trend that has been observed in various research where political ideology influences the perception of scientific information &lt;d-cite key=&quot;mccright2013influence&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;baptista2021influence&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;imhoff2022conspiracy&quot;&gt;&lt;/d-cite&gt;. However, it is crucial to acknowledge the limitations of our analysis, as it solely reflects the susceptibility distribution of the sampled users within each state.&lt;/p&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-suscep/usa-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-suscep/usa-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-suscep/usa-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-suscep/usa.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; data-zoomable=&quot;&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figcaption&gt;&lt;strong&gt;Susceptibility Distribution by U.S. State&lt;/strong&gt; (with bayesian smoothing). We use the average susceptibility score in the United States (-2.87) as the threshold, with scores above it displayed in red, and those below it in blue. Due to space constraints and insufficient data points, we are only displaying data for 48 contiguous states within the U.S.&lt;/figcaption&gt; &lt;/figure&gt; &lt;/div&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;h3 id=&quot;measure-of-susceptibility&quot;&gt;Measure of Susceptibility&lt;/h3&gt; &lt;p&gt;The common practice in measuring susceptibility involves collecting self-reported data on agreement or disagreement with verified false claims&lt;d-cite key=&quot;Roozenbeek2020SusceptibilityMisinformationCOVID19&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;Escola-Gascon2021CriticalThinkingPredicts&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;Rosenzweig2021HappinessSurpriseAre&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;nan2022people&quot;&gt;&lt;/d-cite&gt;. Some studies assess susceptibility indirectly through its impact on behavior, but this approach fails to capture actual belief systems&lt;d-cite key=&quot;Loomba2021MeasuringImpactCOVID19&quot;&gt;&lt;/d-cite&gt;. Our work proposes a computational model as a scalable alternative to expensive and limited self-reported beliefs.&lt;/p&gt; &lt;h3 id=&quot;contributing-factors-and-application-of-susceptibility&quot;&gt;Contributing Factors and Application of Susceptibility&lt;/h3&gt; &lt;p&gt;Research utilizing manually collected susceptibility annotations has explored various factors influencing susceptibility, such as emotion&lt;d-cite key=&quot;Sharma2023SystematicReviewRelationship&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;Weeks2015EmotionsPartisanshipMisperceptions&quot;&gt;&lt;/d-cite&gt;, analytic thinking&lt;d-cite key=&quot;Li2022EmotionAnalyticThinking&quot;&gt;&lt;/d-cite&gt;, partisan bias&lt;d-cite key=&quot;Roozenbeek2022SusceptibilityMisinformationConsistent&quot;&gt;&lt;/d-cite&gt;, source credibility&lt;d-cite key=&quot;Traberg2022BirdsFeatherAre&quot;&gt;&lt;/d-cite&gt;, and repetition&lt;d-cite key=&quot;Foster2012RepetitionNotNumber&quot;&gt;&lt;/d-cite&gt;. Theories explaining susceptibility range from limited knowledge acquisition&lt;d-cite key=&quot;Brashier2020AgingEraFake&quot;&gt;&lt;/d-cite&gt; to overconfidence&lt;d-cite key=&quot;Salovich2021CanConfidenceHelp&quot;&gt;&lt;/d-cite&gt;. This understanding aids in applications like analyzing bot-driven misinformation spread&lt;d-cite key=&quot;Himelein-Wachowiak2021BotsMisinformationSpread&quot;&gt;&lt;/d-cite&gt; and developing prebunking interventions&lt;d-cite key=&quot;Roozenbeek2020PrebunkingInterventionsBased&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;Roozenbeek2022PsychologicalInoculationImproves&quot;&gt;&lt;/d-cite&gt;. However, the field lacks a computational model for large-scale susceptibility inference, which we address in our work.&lt;/p&gt; &lt;h3 id=&quot;inferring-unobservables-from-observables&quot;&gt;Inferring Unobservables from Observables&lt;/h3&gt; &lt;p&gt;Latent constructs, or variables that are not directly observable, are often inferred through models from observable variables&lt;d-cite key=&quot;Bollen2002LatentVariablesPsychology&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;Borsboom2003TheoreticalStatusLatent&quot;&gt;&lt;/d-cite&gt;. Methods like nonlinear mixed-effects models and hidden Markov models are used for this purpose. In our approach, we utilize a neural network-based architecture to represent these latent variables, aiding in predicting observable variables.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;In this work, we propose a computational approach to model people’s &lt;strong&gt;unobservable&lt;/strong&gt; susceptibility to misinformation. While previous research on susceptibility is heavily based on self-reported beliefs collected from questionnaire-based surveys, our model trained in a multi-task manner can approximate user’s susceptibility scores from their reposting behavior. When compared with human judgment, our model shows highly aligned predictions on a susceptibility comparison evaluation task. To demonstrate the potential of our computational model in extending the scope of previous misinformation-related studies, we leverage susceptibility scores generated by our model to analyze factors contributing to misinformation susceptibility. This thorough analysis encompasses a diverse U.S. population from various professional and geographical backgrounds. The results of our analysis algin, corroborate, and expand upon the conclusions drawn from previous survey-based computational social science studies.&lt;/p&gt; &lt;h2 id=&quot;limitations&quot;&gt;Limitations&lt;/h2&gt; &lt;p&gt;Besides investigating the underlying mechanism of misinformation propagation at a large scale, the susceptibility scores produced by our model have the potential to be used to visualize and interpret individual and community vulnerability in information propagation paths, identify users with high risks of believing in false claims and take preventative measures, and use as predictors for other human behavior such as following and sharing. However, while our research represents a significant step in modeling susceptibility to misinformation, several limitations should be acknowledged.&lt;/p&gt; &lt;p&gt;First, our model provides insights into susceptibility based on the available data and the features we have incorporated. However, it’s important to recognize that various other factors, both individual and contextual, may influence susceptibility to misinformation. These factors, such as personal experiences and offline social interactions, have not been comprehensively incorporated into our model and should be considered in future research.&lt;/p&gt; &lt;p&gt;Moreover, the susceptibility scores modeled by our model represent an estimation of an individual’s likelihood to engage with misinformation. These scores may not always align perfectly with real-world susceptibility levels. Actual susceptibility is a complex interplay of cognitive, psychological, and social factors that cannot be entirely captured through computational modeling. Our model should be seen as a valuable tool for understanding trends and patterns rather than providing definitive individual susceptibility assessments.&lt;/p&gt; &lt;p&gt;Finally, our study’s findings are based on a specific dataset and may not be fully generalizable to all populations, platforms, or types of misinformation. For example, due to the high cost of data collection, not all countries or U.S. states have a sufficient amount of Twitter data available for analysis, especially when we examine the geographical distribution of susceptibility. Furthermore, platform-specific differences and variations in the types of misinformation can potentially impact the effectiveness of our model and the interpretation of susceptibility scores.&lt;/p&gt; </content> </entry> <entry> <title>Examining assumptions in scRNA-seq foundation model pre-training (6.S898 Final Project)</title> <link href="https://deep-learning-mit.github.io/blog/2023/scRNAseq-assumptions/"/> <updated>2023-11-08T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/scRNAseq-assumptions</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;If the fundamental building block of biology is the cell, then the fundamental building block of cells are genes. Genes are small segments of DNA that encode the information to create a protein, and proteins are a diverse set of macromolecules that can perform a staggering range of chemical functions which, when taken all together, lead to the complex behavior of cells and the organisms they make up. To create proteins from genes, an intermediate “data transfer” occurs through another molecule type known as RNA. This information flow of genes to RNA to proteins is typically referred to as “gene expression”, and is so core to biology that it’s also known as the “central dogma of molecular biology”.&lt;/p&gt; &lt;p&gt;Due to the importance of gene expression, many technologies have been developed to make quantitative measurements of gene expression from cells. One of the most prominent technologies is called single-cell RNA sequencing (scRNA-seq), which enables the measurement of the expression of all genes in a given cell, often measured across thousands of cells simultaneously &lt;d-cite key=&quot;hwangSinglecellRNASequencing2018&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Schematic overview of the scRNA-seq workflow. Figure sourced from &lt;d-cite key=&quot;panMicrofluidicsFacilitatesDevelopment2022&quot;&gt;&lt;/d-cite&gt;. &lt;/div&gt; &lt;p&gt;Large scale scRNA-seq datasets have enabled the high-resolution profiling of different organs and tissues at the cellular level, uncovering diverse cell types, rare subpopulations, and dynamic gene expression patterns within complex tissues and organisms. This technology has found applications in various fields, from developmental biology and immunology to cancer research and regenerative medicine.&lt;/p&gt; &lt;p&gt;While scRNA-seq has seen broad-scale adoption, many challenges remain. In particular, an individual research experiment may focus on a particular cell or tissue type, and produce insufficient data to apply modern machine learning techniques. To supplement their data or to gain additional context, a researcher may wish to utilize data generated from other experiments or researchers. However, performing large-scale integration of datasets across samples, tissues, and experiments currently presents challenges of scalability and non-biological differences between datasets driven by experimental variability (colloquially referred to as “batch effects”) &lt;d-cite key=&quot;lahnemannElevenGrandChallenges2020&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;In parallel to the explosion of available scRNA-seq data, the machine learning field has seen an increasing trend towards “foundation models”. Foundation models are large-scale deep learning models pre-trained with vast amounts of data for the purposes of creating a generalizable representation of a particular datatype (e.g. text, images). Given these developments, recent work has focused on developing scRNA-seq foundation models as an approach to solve the challenge of integrating diverse sets of scRNA-seq datasets in a scalable and generalizable way &lt;d-cite key=&quot;theodorisTransferLearningEnables2023&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;yangScBERTLargescalePretrained2022&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;cuiScGPTBuildingFoundation2023&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;chenGeneptSimpleHardtoBeat2023&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;yangGeneCompassDecipheringUniversal2023&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;haoLargeScaleFoundation2023&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;levineCell2SentenceTeachingLarge2023&quot;&gt;&lt;/d-cite&gt;. Beyond just integration, foundation models of gene expression hold great promise in contributing to a broader understanding of biology by learning a representation space of cellular state, which could also lead to a large impact in downstream applications such as &lt;em&gt;in silico&lt;/em&gt; prediction of cellular responses to novel therapeutics.&lt;/p&gt; &lt;p&gt;In this post, we’ll explore a fundamental assumption of three such models (Geneformer&lt;d-cite key=&quot;theodorisTransferLearningEnables2023&quot;&gt;&lt;/d-cite&gt;, cell2sentence &lt;d-cite key=&quot;levineCell2SentenceTeachingLarge2023&quot;&gt;&lt;/d-cite&gt;, and GenePT &lt;d-cite key=&quot;chenGeneptSimpleHardtoBeat2023&quot;&gt;&lt;/d-cite&gt;), which is the assertion that a given gene expression profile can be well-approximated by a rank-value encoding of genes.&lt;/p&gt; &lt;p&gt;What exactly is a rank-value encoding? Well, a typical representation of gene expression is a vector \(x \in \mathbb{R}^N\), where \(N\) is the number of genes, and each entry is a measure of the corresponding gene’s expression. In a rank-value encoding, gene expression is instead represented as a list of N strings, where the strings are gene names, and are ordered in descending order of the underlying gene expression value.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Standard encoding of gene expression values compared to a rank-value encoding. &lt;/div&gt; &lt;p&gt;The rank-value encoding provides an intuitive transformation of the continuous gene expression values into an English language sentence that is compatible with existing approaches for foundation models in the natural language processing (NLP) field. However, as can be seen above, the rank-value encoding also drops the information of the exact gene expression values. Hopefully by the end of this post, we’ll have gained some intuition for how a rank-value encoding of gene expression could be hindering the development of foundation models for gene expression and see that this does play out in practice for a real scRNA-seq foundation model.&lt;/p&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related work&lt;/h2&gt; &lt;h3 id=&quot;overview-of-gene-expression-representations-in-foundation-models&quot;&gt;Overview of gene expression representations in foundation models&lt;/h3&gt; &lt;p&gt;While we won’t go into a full detailed comparison of different methods for constructing gene expression foundation models from scRNA-seq data, it’s worth spending a little time discussing the commonalities and differences of various approaches at a high-level.&lt;/p&gt; &lt;p&gt;The most important distinction for this post is between methods that use a rank-value encoding and those that don’t. For methods that don’t use a rank-value encoding, we see a further distinction between methods that employ some form of value-binning, where continuous expression values are mapped to a discrete number of pre-specified bins, and those that don’t. Methods that use a binning approach are scGPT&lt;d-cite key=&quot;cuiScGPTBuildingFoundation2023&quot;&gt;&lt;/d-cite&gt; and scBERT&lt;d-cite key=&quot;yangScBERTLargescalePretrained2022&quot;&gt;&lt;/d-cite&gt;. In both scGPT and scBERT, gene expression values are first binned to map the continuous values to a set vocabulary of tokens, and these tokens are then passed through an embedding layer to generate higher-dimensional representations. In contrast, scFoundation&lt;d-cite key=&quot;haoLargeScaleFoundation2023&quot;&gt;&lt;/d-cite&gt; calculates gene expression embeddings by first transforming continuous scalar values to a vector using a small MLP, and then calculating a final embedding by using an attention mechanism over K learned vectors. While we won’t cover the full details, schematics of the approaches can be seen below to get a sense of the overall architectures, and most importantly to see how they directly use the gene expression values as input.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Schematics of the various approaches that *do not* use a rank-value encoding (top to bottom): scGPT, scBERT, and scFoundation. Figures sourced from &lt;d-cite key=&quot;cuiScGPTBuildingFoundation2023&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;yangScBERTLargescalePretrained2022&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;haoLargeScaleFoundation2023&quot;&gt;&lt;/d-cite&gt;. &lt;/div&gt; &lt;p&gt;On the other hand, we have the methods that we’re most interested in for the purposes of this post: the ones that utilize a rank-value encoding of gene expression. These methods are: Geneformer&lt;d-cite key=&quot;theodorisTransferLearningEnables2023&quot;&gt;&lt;/d-cite&gt;, GenePT&lt;d-cite key=&quot;chenGeneptSimpleHardtoBeat2023&quot;&gt;&lt;/d-cite&gt;, and cell2sentence&lt;d-cite key=&quot;levineCell2SentenceTeachingLarge2023&quot;&gt;&lt;/d-cite&gt;. In Geneformer, gene expression values are first converted to a rank-value encoding and then used to train a Transformer-based model using a variant of a masked language modeling objective in which a set of genes at random ranks are masked, and the model must learn to predict the masked gene names. In cell2sentence and GenePT, pre-trained auto-regressive language models (GPT-2 and GPT-3.5 respectively) are applied to the rank-value encoded list of genes to obtain cell-level embeddings that are then used for downstream tasks. Again, we won’t dive into the full details of these approaches, but provide schematic overviews of them below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_Geneformer_schematic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_Geneformer_schematic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_Geneformer_schematic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_Geneformer_schematic.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Schematics of the various approaches that *do* use a rank-value encoding (top to bottom): Geneformer, GenePT, and cell2sentence. Figures sourced from &lt;d-cite key=&quot;theodorisTransferLearningEnables2023&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;chenGeneptSimpleHardtoBeat2023&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;levineCell2SentenceTeachingLarge2023&quot;&gt;&lt;/d-cite&gt;. &lt;/div&gt; &lt;h3 id=&quot;critical-examinations-of-scrna-seq-foundation-models&quot;&gt;Critical examinations of scRNA-seq foundation models&lt;/h3&gt; &lt;p&gt;In light of the recent development of many approaches for scRNA-seq foundation models, researchers have also begun performing critical assessments of such models. One of the main value propositions of foundation models is generalization to new data in a few-shot or zero-shot manner. To test this hypothesis, Kedzierska et al.&lt;d-cite key=&quot;kedzierskaAssessingLimitsZeroshot&quot;&gt;&lt;/d-cite&gt; benchmarked the performance of Geneformer and scGPT at two zero-shot tasks with novel datasets: cell clustering and integration of data across batches (i.e. batch effect removal) . They found that both methods underperformed compared to simpler baseline methods. Similarly, Boiarsky et al.&lt;d-cite key=&quot;boiarskyDeepDiveSingleCell2023&quot;&gt;&lt;/d-cite&gt; compared scGPT and scBERT to logistic regressions in the context of cell type annotation, and also found that the simpler approach performed competitively.&lt;/p&gt; &lt;p&gt;However, both of the works discussed above focused on examining the performance of scRNA-seq foundation models as a black box, whereas to the best of our knowledge, there are no current works examining the fundamental assumptions implicit in these foundation model approaches. We hope to begin addressing that gap in this post. By understanding whether or not rank-value encoding well-approximates the real similarities and differences in gene expression across cell types, we hope to either validate this assumption or gain insight into future avenues for improving pretraining of such scRNA-seq foundation models.&lt;/p&gt; &lt;h2 id=&quot;methods&quot;&gt;Methods&lt;/h2&gt; &lt;h3 id=&quot;dataset&quot;&gt;Dataset&lt;/h3&gt; &lt;p&gt;To perform our assessment of rank-value encoding, we’ll work with the Tabula Sapiens dataset &lt;d-cite key=&quot;consortiumTabulaSapiensMultipleorgan2022&quot;&gt;&lt;/d-cite&gt;. This scRNA-seq dataset is a reference-quality collection of nearly 500,000 cells from 24 organs, sourced from 15 normal human subjects. The Tabula Sapiens dataset provides a good testbed for our experiments, as the samples have been processed in a uniform manner, allowing us to ask how rank-value encoding performs in a “best case” scenario. In the future, it would be beneficial to see how rank-value encoding performs across datasets as well, as there may be advantages in terms of smoothing out experimental noise.&lt;/p&gt; &lt;p&gt;We use the final dataset from Tabula Sapiens, which has already been subjected to quality control assessment, filtering, and normalization. While we won’t go into the details of their pipeline here, these are available in their manuscript for the interested reader. In line with typical scRNA-seq workflows, we also subset the full set of ~22,000 genes down to a subset of 2,435 genes that have been marked as “highly variable genes” (HVGs) in the Tabula Sapiens dataset. This is a fairly standard step in scRNA-seq data processing workflows, as many genes are constitutively expressed across cell types, and thus provide little information for distinguishing between cell types. Highly variable gene selection was performed by the Tabula Sapiens Consortium following the methods and recommendations in Seurat&lt;d-cite key=&quot;stuartComprehensiveIntegrationSingleCell2019&quot;&gt;&lt;/d-cite&gt;, a commonly used scRNA-seq data processing package.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Number of cells per cell type. Note that the majority of cell types have ~1000 examples, but that there&apos;s a long tail of highly represented cell types with up to 35k examples. &lt;/div&gt; &lt;p&gt;Additionally, since the Tabula Sapiens dataset is quite large and also has some cell types that are disproportionately represented, as shown above, we’ll also subset the data to get a more tractable dataset for experimentation. To do so, we’ll focus on cell types with 500 or more examples, and then further randomly subsample to 500 cells per type. This leaves us with 89 cell types&lt;d-footnote&gt;acinar cell of salivary gland, adventitial cell, b cell, basal cell, basal cell of prostate epithelium, basophil, bladder urothelial cell, capillary aerocyte, capillary endothelial cell, cardiac endothelial cell, cardiac muscle cell, cd24 neutrophil, cd4-positive alpha-beta t cell, cd4-positive helper t cell, cd4-positive, alpha-beta memory t cell, cd4-positive, alpha-beta t cell, cd8-positive alpha-beta t cell, cd8-positive, alpha-beta cytokine secreting effector t cell, cd8-positive, alpha-beta cytotoxic t cell, cd8-positive, alpha-beta memory t cell, cd8-positive, alpha-beta t cell, classical monocyte, club cell, club cell of prostate epithelium, conjunctival epithelial cell, corneal epithelial cell, corneal keratocyte, dendritic cell, dn1 thymic pro-t cell, dn3 thymocyte, duct epithelial cell, endothelial cell, endothelial cell of artery, endothelial cell of lymphatic vessel, endothelial cell of vascular tree, enterocyte of epithelium of large intestine, enterocyte of epithelium of small intestine, epithelial cell, erythrocyte, erythroid progenitor, eye photoreceptor cell, fibroblast, fibroblast of breast, granulocyte, hematopoietic stem cell, hepatocyte, immature enterocyte, immune cell, innate lymphoid cell, intermediate monocyte, keratinocyte, kidney epithelial cell, luminal cell of prostate epithelium, luminal epithelial cell of mammary gland, lung ciliated cell, macrophage, mast cell, mature enterocyte, mature nk t cell, memory b cell, mesenchymal stem cell, monocyte, myeloid cell, myofibroblast cell, naive b cell, naive regulatory t cell, naive thymus-derived cd4-positive, alpha-beta t cell, naive thymus-derived cd8-positive, alpha-beta t cell, neutrophil, nk cell, nkt cell, non-classical monocyte, pancreatic acinar cell, pancreatic ductal cell, paneth cell of epithelium of large intestine, paneth cell of epithelium of small intestine, pericyte cell, plasma cell, regulatory t cell, respiratory goblet cell, skeletal muscle satellite stem cell, smooth muscle cell, stromal cell, t cell, thymocyte, type i nk t cell, type ii pneumocyte, vascular associated smooth muscle cell, vein endothelial cell&lt;/d-footnote&gt; and 500 cells per type, for a total of 44,500 datapoints.&lt;/p&gt; &lt;p&gt;To interact with this data, we’ll be using the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;AnnData&lt;/code&gt;&lt;d-cite key=&quot;virshupAnndataAnnotatedData2021&quot;&gt;&lt;/d-cite&gt; and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;scanpy&lt;/code&gt;&lt;d-cite key=&quot;virshupScverseProjectProvides2023&quot;&gt;&lt;/d-cite&gt; Python packages, which we won’t cover in detail here but flag in case you’re interested in working with such data in the future.&lt;/p&gt; &lt;h3 id=&quot;assessments&quot;&gt;Assessments&lt;/h3&gt; &lt;p&gt;To assess how well a cellular state can be represented using a rank-value encoding of genes, we’ll look at various measures of similarity in the raw gene expression space and the rank-value encoded space, and compare those measures both within cell types and between cell types. We’ll calculate the following measures for all pairs of cells:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Euclidean distance of UMAP-projected gene expression values&lt;/li&gt; &lt;li&gt;&lt;a href=&quot;https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient&quot;&gt;Spearman rank correlation coefficient&lt;/a&gt;&lt;/li&gt; &lt;li&gt;Euclidean distance of UMAP-projected Geneformer embeddings&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;For each distance measure, we can then generate comparisons at the level of cell types by summarizing via the median of the pairwise distances, either within or between cell types. A schematic of this approach is shown below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Overview of method for computing distance measures between cells followed by summarization to generate comparisons at the level of cell types. &lt;/div&gt; &lt;h4 id=&quot;umap-of-gene-expression-values&quot;&gt;UMAP of gene expression values&lt;/h4&gt; &lt;p&gt;The idea behind this comparison is to utilize the continuous gene expression vectors, but using UMAP (Uniform Manifold Approximation and Projection&lt;d-cite key=&quot;mcinnesUMAPUniformManifold2020&quot;&gt;&lt;/d-cite&gt;) to approximate the kind of non-linear transformation one might learn using a deep neural network. To calculate these values, we perform UMAP embprojectionedding of the gene expression values using the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;umap-learn&lt;/code&gt; Python package with defaut settings and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;n_components=5&lt;/code&gt;. Once we have the per-cell projections, we calculate Euclidean distance between all pairs of cells.&lt;/p&gt; &lt;h4 id=&quot;spearman-rank-correlation-coefficients&quot;&gt;Spearman rank correlation coefficients&lt;/h4&gt; &lt;p&gt;The Spearman rank correlation is a non-parametric measure of correlation between two ranked lists, which we can leverage to obtain a direct comparison of rank-value encoded gene lists. To accomplish this, we first calculate a rank-encoding of each cell’s gene expression, with identical values being assigned a &lt;a href=&quot;https://en.wikipedia.org/wiki/Ranking#Fractional_ranking_(%221_2.5_2.5_4%22_ranking)&quot;&gt;fractional rank equal to the mean of their ordinal ranks&lt;/a&gt;. As the Spearman correlation is defined as the Pearson correlation on the rank-encoded lists, we can then directly calculate the Spearman correlations between all pairs of cells.&lt;/p&gt; &lt;h4 id=&quot;euclidean-distance-of-umap-projected-geneformer-embeddings&quot;&gt;Euclidean distance of UMAP-projected Geneformer embeddings&lt;/h4&gt; &lt;p&gt;To fully assess the effect of rank-value encoding in a deep learning model, we take this one step further by calculating the embeddings of our cells using Geneformer. We generate these embeddings by using their model and code as &lt;a href=&quot;https://huggingface.co/ctheodoris/Geneformer&quot;&gt;hosted on HuggingFace&lt;/a&gt; for tokenization and embedding of our gene expression vectors. For each cell \(i\), we obtain an embedding vector \(x_i \in \mathbb{R}^{256}\). We further project these 256-dimensional vectors down to 5 dimensions using UMAP for consistency with the projections of the raw gene expression values described above, and then calculate Euclidean distance between all pairs of cells. The rationale here is that Euclidean distance between two points may be larger in a 256-dimensional space than a 5-dimensional space due the high dimensionality (i.e. “curse of dimensionality”). However, we do still see similar results when using the full 256-dimensional embedding vectors (see Appendix).&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;rank-value-encodings-preserve-similarity-between-cell-types&quot;&gt;Rank-value encodings preserve similarity between cell types&lt;/h3&gt; &lt;p&gt;The first thing we can see from our results is that rank-value encodings do preserve similarity between cell types in a similar manner as distances generated from raw gene expression values. The figure below is generated by looking at the distributions of distances between pairs of cells from the same type (“within”) or from different cell types (“between”). To provide a comparison at the level of cell types, we plot the median of each distribution rather than individual pairs of cells, i.e. the “within” group contains 89 data points and the “between” group contains \(\frac{89 \times 88}{2}\) data points.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Comparison of various similarity measures both within cell types and between cell types. Note that for the Euclidean distances (left and right), lower is more similar, whereas for rank correlation (middle), higher is more similar. &lt;/div&gt; &lt;p&gt;How should we interpret this? What we can observe is that all three measures maintain high similarity for cells from the same type and less similarity for cells from different types. Put another way, rank-value encodings do define a space in which different cell types tend to be distant and cells from the same type tend to be near each other. We can also say that this holds when using both a non-parametric measure of the rank-value encodings (Spearman rank-correlation) and also when using a deep learning model that operates on rank-value encoded gene vectors (Geneformer).&lt;/p&gt; &lt;p&gt;However, we do also see that the difference between the “within” and “between” cell type distances is more pronounced when using a non-linear function on the raw data compared to either of the methods operating on the rank-value encoded gene vectors. This difference will become even more clear as we look at joint distributions of our different measures in the next section.&lt;/p&gt; &lt;h3 id=&quot;raw-gene-expression-values-better-preserve-within-cell-type-similarities&quot;&gt;Raw gene expression values better preserve within cell type similarities&lt;/h3&gt; &lt;p&gt;To gain further insight into how rank-value encodings compare to raw gene expression values, we can look at the joint distributions of our distance measures. Below we see the joint distribution of our raw gene expression-based distances compared to the rank-correlation values, shown as a 2D histogram where each hex is colored according to the number of points that fall within that bin.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Joint distributions of distances from UMAP of raw gene expression values compared to rank correlations, within cell types (left) and between cell types (right). &lt;/div&gt; &lt;p&gt;We can notice that within cell types, the rank correlation has a fairly wide dynamic range whereas the raw gene expression-based distance seems to show a tighter packing. Between cell types, we can observe that the rank correlations largely clump up closer to zero but do mesh with the larger distances we see with the raw gene expression-based measure.&lt;/p&gt; &lt;p&gt;Given that we see a spreading out of cells within a type using a rank correlation, the natural question becomes whether this holds when we use a deep learning model that can learn a complex non-linear function of the rank encodings. That’s exactly what we look at below where we perform a similar comparison, but swapping out the rank correlation distance measure for the distance measure based on Geneformer embeddings.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Joint distributions of distances from UMAP of raw gene expression values compared to distances from UMAP of Geneformer embeddings, within cell types (left) and between cell types (right). &lt;/div&gt; &lt;p&gt;With the Geneformer embeddings derived from the rank-value encodings, we now see that the between cell type distances are better matched to the distances derived from raw gene expression values. However, we still see that Geneformer embeddings are more spread out within cell types compared to the non-linear transform of the raw gene expression values. To better understand why this might be the case, we propose one possible contributing factor in the next section.&lt;/p&gt; &lt;h3 id=&quot;sparsity-of-scrna-seq-data-may-drive-loss-of-information-in-rank-value-encodings&quot;&gt;Sparsity of scRNA-seq data may drive loss of information in rank-value encodings&lt;/h3&gt; &lt;p&gt;A key aspect of scRNA-seq data is its extremely high sparsity. When working with single cells, the amount of available RNA is already quite limited, and then each processing step, such as RNA isolation or sequencing, introduces technical noise and the possibility of “dropout events”, where a gene’s expression is not detected at all. Combined with the inherent stochasticity of gene expression, we’re often left with data where the vast majority of genes have zero detected RNA molecules.&lt;/p&gt; &lt;p&gt;Shown below is a histogram of sparsity per cell in the full Tabula Sapiens dataset as well as in the subset of cells and genes we considered in the analyses above.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Histogram of gene expression sparsity per cell for the full Tabula Sapiens dataset (left) and the subset of 44,500 cells and 2,450 genes we considered in previous analyses. Sparsity here is defined as the fraction of genes with zero observed RNA molecules. &lt;/div&gt; &lt;p&gt;While many methods for processing scRNA-seq data attempt to handle the high sparsity in a principled manner, most of the methods described here simply remove genes with zero observations from consideration. In particular, scGPT, GenePT, and Geneformer all remove genes with zero observations from their inputs, and cell2sentence restricts itself to the 100 genes with the highest expression per cell, effectively removing all genes with zero observations. While sparsity is at least partially driven by stochastic technical factors, there is undoubtedly a biological contribution as well, which may be removed when dropping genes with zero observations. While this issue is not unique to rank-value encoding, we can see that all of the methods we’ve discussed here that use rank-value encoding remove genes with zero observations, likely to circumvent the ambiguity in how one would enforce an ordering on genes that all have zero observations.&lt;/p&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;p&gt;To give a high-level summary, what we’ve seen in this post is that rank-value encodings are an appealing way to transform continuous gene expression vectors into a format that’s directly compatible with the foundation model architectures that have seen great success in natural language processing. However, they also seem to lose some valuable biologlical information of cell types, particularly information concerning similarity of cells within a given type.&lt;/p&gt; &lt;p&gt;While we don’t present a smoking gun for an exact characteristic of this loss of information, we present sparsity as a key challenge in scRNA-seq data, which may be exacerbated when using rank-value encodings. We can also further hypothesize that rank-value encodings may be sensitive to small changes in gene expression values from technical noise, which could cause a shifting of ranks and thus amplify the impact of said noise. Similarly, rank-value encodings lose the absolute quantification of gene expression, and this loss of granularity may impact the model’s ability to capture the cases where subtle differences in gene expression hold biological significance.&lt;/p&gt; &lt;p&gt;From the perspective of downstream use cases, models based on rank-value encodings are also limited in their ability to explore the counterfactuals that may be interesting in cases such as predicting cellular responses to a novel therapeutic. For example, if a drug were known to affect the expression of a single gene, but not to the point where the ranking of this gene shifted, then such a model would be unable to explore the downstream effect of this drug on the expression of other genes.&lt;/p&gt; &lt;p&gt;In terms of limitations, the work presented here is fairly superficial and is constrained both in terms of size of datasets and breadth of methods compared. To perform a more robust comparison in the future, we would like to scale up this analysis to larger datasets, such as the full Tabula Sapiens dataset. We would also like to more directly compare cell type similarities in the embedding spaces of other scRNA-seq foundation models, including those that do and do not utilize rank-value encodings. A great follow-up would be to perform a head-to-head comparison of a model like scBERT to Geneformer on the full Tabula Sapiens dataset.&lt;/p&gt; &lt;p&gt;Additionally, we’ve also yet to explore the angle of robustness across datasets. It’s possible that some of the shortcomings we’ve listed for rank-value encodings may actually be benefits in the context of supppressing technical noise when integrating scRNA-seq datasets across studies, institutions, and experimental techniques. Performing this comparison across datasets would be a valuable follow-up that would help paint a more full picture of the value of rank-value encodings in the context of constructing foundation models for gene expression data.&lt;/p&gt; &lt;p&gt;While we’ve discussed many challenges in constructing foundation-scale models for gene expression data, it’s worth closing this post with an optimistic reflection on the potential value of such models. By training a deep learning model to construct a representation space of cellular state, we stand to create a powerful tool that will help us gain a fundamental understanding of cellular biology and its underlying complex regulatory networks. Ultimately, such tools could help us unravel the genetics of various diseases, paving the way for a new era of disease treatments and precision medicine.&lt;/p&gt; &lt;h2 id=&quot;appendix&quot;&gt;Appendix&lt;/h2&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Joint distributions of distances from UMAP of raw gene expression values compared to distances from raw Geneformer embeddings, within cell types (left) and between cell types (right). &lt;/div&gt; </content> </entry> <entry> <title>Increasing Context Length For Transformers</title> <link href="https://deep-learning-mit.github.io/blog/2023/increasing-context-length-for-transformers/"/> <updated>2023-11-08T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/increasing-context-length-for-transformers</id> <content type="html">&lt;h3 id=&quot;introduction&quot;&gt;Introduction&lt;/h3&gt; &lt;p&gt;Since its release on November 30, 2022, ChatGPT has assisted users around the world with a variety of document parsing and editing tasks. These tasks often require large input contexts, since the documents and texts passed into ChatGPT’s source model, GPT-3.5, can be several pages long.&lt;/p&gt; &lt;p&gt;Like many other language models, GPT-3.5 is a unidirectional transformer that uses the self-attention mechanism. But while self-attention is an extremely powerful mechanism, it is also expensive in its time and space complexity. Standard self-attention requires $O(n^2)$ operations in terms of the sequence length $n$, since the $QK^T$ term within the attention mechanism calculates and stores the attention of each of the $n$ tokens with $O(n)$ other tokens.&lt;/p&gt; &lt;p&gt;Unfortunately, the $O(n^2)$ complexity makes long input contexts difficult for transformers to handle efficiently. Over the past few years, researchers have been investigating ways of mitigating the $O(n^2)$ factor. This remains an ongoing problem, with several papers released on the topic in 2023 alone.&lt;d-cite key=&quot;xu2023retrieval&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;yang2023longqlora&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;peng2023yarn&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;yang2023revisiting&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;mohtashami2023landmark&quot;&gt;&lt;/d-cite&gt; In this post, we provide an overview of existing strategies for increasing context length for transformers. We also propose and investigate our own efficient self-attention algorithm, which we call Gaussian attention.&lt;/p&gt; &lt;h3 id=&quot;literature-review&quot;&gt;Literature Review&lt;/h3&gt; &lt;p&gt;In the past, large context lengths were handled using a simple partition scheme. Essentially, long inputs can be split into fixed-length chunks, where attention is computed separately for each chunk. Then, for chunk size $b$, a sequence of length $n$ requires only $O\left(\frac{n}{b} \cdot b^2\right) = O(nb)$ time to compute. However, this method has a major drawback in that information cannot be shared across partitioned blocks, leading to the fragmentation problem: the model lacks long-term dependencies and thus runs into cases where it lacks the necessary context to make accurate predictions.&lt;/p&gt; &lt;p&gt;Modern methods for reducing context lengths in transformers generally try to avoid this problem by either introducing ways of sharing context across partitions or reducing self-attention calculation cost by using a simpler approximation. Models that fall into second category may utilize one of many different approximation techniques, such as sparse attention matrices and fixed attention patterns.&lt;d-cite key=&quot;tay2022efficient&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;LIN2022111&quot;&gt;&lt;/d-cite&gt; We present a brief summary of existing efficient transformers.&lt;/p&gt; &lt;h4 id=&quot;sparse-transformer&quot;&gt;Sparse Transformer&lt;/h4&gt; &lt;p&gt;Child et al. proposed a sparse transformer that reduces attention calculation cost from $O(n^2)$ to $O(n\sqrt{n})$.&lt;d-cite key=&quot;child2019generating&quot;&gt;&lt;/d-cite&gt; To achieve this, the sparse transformer uses a combination of strided and local attention patterns.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Standard attention matrix (left) vs. strided attention matrix (right). Only attention values for the blue squares are computed. &lt;/div&gt; &lt;p&gt;One attention head processes a local window of size $k$ surrounding the current token $i$, while a second attention processes tokens $j$ such that&lt;/p&gt; \[(i - j) \mod l = 0 \qquad \forall j \leq i,\] &lt;p&gt;where $l$ is a parameter chosen to be close to $\sqrt{n}$. Since only $O(l)$ tokens are attended upon for each token $i$, this results in the $O(n \cdot l) = O(n\sqrt{n})$ runtime. Child et al. showed that the sparse transformer can be applied to a wide range of fields, including image, text, and music, where it can be used to possess audio sequences over 1 million timestamps long.&lt;/p&gt; &lt;h4 id=&quot;longformer&quot;&gt;Longformer&lt;/h4&gt; &lt;p&gt;Longformer&lt;d-cite key=&quot;beltagy2020longformer&quot;&gt;&lt;/d-cite&gt; applies a dilated sliding window to capture local attention patterns and reduce overall attention cost to $O(n\cdot{w})$ for window size $w$. Across successive attention layers, gaps are placed between different elements of the sliding window—thus expanding the receptive field to thousands of tokens even for small dilation factors. In order to generalize to different language modeling tasks, Longformer introduces global tokens that attend upon every other token. These global tokens are analogous to the different input representations used by language models for different tasks; for example, BERT appends a &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;&amp;lt;CLS&amp;gt;&lt;/code&gt; token to the start of every input in classification tasks. Despite using sparse attention contexts, Longformer was able to outperform state-of-the-art model RoBERTa on several long document benchmarks.&lt;/p&gt; &lt;h4 id=&quot;bigbird&quot;&gt;BigBird&lt;/h4&gt; &lt;p&gt;BigBird&lt;d-cite key=&quot;zaheer2021big&quot;&gt;&lt;/d-cite&gt; combines three different fixed attention patterns to achieve $O(n)$ complexity, being&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Global attention, consisting of tokens that attend upon every other token&lt;/li&gt; &lt;li&gt;Local attention, consisting of a sliding window around each token&lt;/li&gt; &lt;li&gt;Random attention, consisting of randomly-selected tokens&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Using this architecture, BigBird managed to increase maximum transformer context lengths by up to 8x. In the same paper, Zaheer et al. proved that certain sparse transformers are computationally equivalent to transformers with full attention. Theoretically, sparse transformers are capable of solving all tasks that full transformers can solve; this explains why sparse transformers are often a good approximation for full transformers.&lt;/p&gt; &lt;h4 id=&quot;transformerxl&quot;&gt;TransformerXL&lt;/h4&gt; &lt;p&gt;TransformerXL differs from the previously discussed models, as it doesn’t increase self-attention efficiency by sparsifying the attention matrix.&lt;d-cite key=&quot;dai2019transformerxl&quot;&gt;&lt;/d-cite&gt; Instead, it retains the classic partitioning scheme and attempts to overcome the fragmentation problem via a recurrence-based approach. Using the recurrence mechanism, hidden-state sequences are stored and cached so they can be used for additional context when the model processes the next statement. Overall, this architecture allows the network to use historical information to process new information. As a result, it can support longer-range dependencies without leading to context fragmentation. TransformerXL was able to operate on sequences up to 450% longer than those of vanilla transformers, while being up to 1800 times faster; in addition, it achieved SOTA results on the datasets &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;text8&lt;/code&gt;, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;Penn Treebank&lt;/code&gt;, and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;WikiText-103&lt;/code&gt;.&lt;/p&gt; &lt;h4 id=&quot;landmark-tokens&quot;&gt;Landmark Tokens&lt;/h4&gt; &lt;p&gt;More recently, Mohtashami et al. suggested using landmark tokens to determine which tokens should be attended to.&lt;d-cite key=&quot;mohtashami2023landmark&quot;&gt;&lt;/d-cite&gt; Inputs are divided into a series of fixed-length blocks, and each block is associated with a landmark token. In particular, this architecture is designed so that a high attention score on any individual token within a block also leads to a high attention score on the block’s “representative vector”—which is the landmark token itself. Then, during inference, the transformer retrieves the $k$ blocks corresponding to the $k$ highest-valued landmark tokens and attends only upon the tokens in this block. Mohtashami et al. claimed that this architecture can extend the context length of Llama to more than 32k tokens, allowing it to support inputs of the same length as GPT-4.&lt;/p&gt; &lt;h4 id=&quot;visiontransfomer&quot;&gt;VisionTransfomer&lt;/h4&gt; &lt;p&gt;Most of the models discussed above apply specifically to transformers used for language modeling. However, algorithms for reducing attention complexity have been successfully used for other tasks as well. For example, VisionTransformer managed to achieve SOTA performance while limiting the attention context to a 16x16 patch around each pixel.&lt;d-cite key=&quot;dosovitskiy2021image&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;h4 id=&quot;hardware-methods&quot;&gt;Hardware Methods&lt;/h4&gt; &lt;p&gt;Aside from algorithm-based techniques, there have also been attempts to make basic transformer algorithms run faster on existing hardware. Although sparse attention algorithms may have better time complexity, they may not achieve practical speedups due to hardware inefficiencies. In order to achieve practical speedups on transformer training, Dao et al. proposed FlashAttention, an I/O-aware attention algorithm that implements the basic attention computation.&lt;d-cite key=&quot;dao2022flashattention&quot;&gt;&lt;/d-cite&gt; FlashAttention achieves speedups of up to 15% on BERT-Large, showing that efficient transformers do not necessarily need to use approximate attention algorithms.&lt;/p&gt; &lt;h4 id=&quot;other-methods&quot;&gt;Other Methods&lt;/h4&gt; &lt;p&gt;Numerous other algorithms for extending transformer context lengths have been proposed, including retrieval-based methods&lt;d-cite key=&quot;borgeaud2022improving&quot;&gt;&lt;/d-cite&gt;, low-rank approximations&lt;d-cite key=&quot;wang2020linformer&quot;&gt;&lt;/d-cite&gt;, and expectation-based methods&lt;d-cite key=&quot;ren2021combiner&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;methodology&quot;&gt;Methodology&lt;/h3&gt; &lt;p&gt;To see what types of context reduction algorithms are effective, we propose and test our own efficient transformer. We investigate whether transformers using Gaussian-distributed fixed attention patterns can perform as well as standard transformers. For each self-attention layer, we sample a Gaussian random distribution to determine which elements of the attention matrix we should compute. We analyze this approach for the unidirectional language modeling case, where the goal is to predict the next token of a given input sequence.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Examples of Gaussian attention masks with $c=5$ and inputs of length 10. &lt;/div&gt; &lt;p&gt;In language modeling, the most important context for predicting a new token often comes from examining the tokens that immediately precede it. Previous work has taken advantage of this pattern by employing fixed local attention patterns, such as the sliding window pattern used by BigBird. For token $i$, random samples from a truncated Gaussian distribution with mean $i$ and standard deviation $\sigma = \frac{\mu}{2} = \frac{i}{2}$&lt;d-footnote&gt;This means that 0 is two standard deviations from the mean $i$.&lt;/d-footnote&gt; will produce values $j$ close to $i$ with high probability. This implies that we will likely calculate the attention scores for some local region of each token $i$, allowing the model to account for important local context connections.&lt;/p&gt; &lt;p&gt;On the other hand, it may also be possible that some distant token $j$ has a large impact on the prediction of token $i$. For example, if you pass in a document in which the first sentence defines the overall purpose of the document, we might need to pay attention to this sentence even in later sections of the document. Fixed-pattern Gaussian attention allows for this possibility by calculating attention scores for $i$ and distant tokens $j$ with a lower but still nonzero probability. As a result, Gaussian attention offers some flexibility that may not be present in other fixed-pattern attention mechanisms, such as the sliding window technique.&lt;/p&gt; &lt;h4 id=&quot;algorithm&quot;&gt;Algorithm&lt;/h4&gt; &lt;p&gt;The model takes a hyperparameter $c$, where $c$ is the number of tokens that each token attends upon. For every token $i$ in each self-attention layer, we select $c$ tokens from the Gaussian distribution $\mathcal{N}(i, i/2)$, where $\mathcal{N}$ is truncated at $0$ and $i$. Since our task focuses on the casual language modeling case, a token $i$ computes attention scores only for tokens $j&amp;lt;i$. Truncation ensures that every $i$ attends to exactly $\min(c, i)$ tokens.&lt;d-footnote&gt;If $c$ is greater than the number of tokens in range $[0,i]$, the result is to sample every taken from $[0,i]$.&lt;/d-footnote&gt; To match sampled random numbers with actual token indexes, we cast each random number $x$ to index $i = \lfloor{x}\rfloor$. In the case of duplicate indexes, we assign each duplicate index to the nearest unused index in range $[0,i]$. This algorithm is summarized below.&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;for each token i: sample min(c, i) values from N(i, i/2) create list of indices by flooring every sampled value remove duplicates assigning duplicates to the nearest unused index # such an assigment always exists by pigeonhole principle &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;For each token $i$, we set all attention values for tokens which are not selected to zero. As a result, each token attends only on at most $c$ tokens, leading to an overall cost of $O(c \cdot n) = O(n)$ for constant $c$.&lt;/p&gt; &lt;h3 id=&quot;experiments&quot;&gt;Experiments&lt;/h3&gt; &lt;p&gt;Since we had limited training resources, we unfortunately couldn’t test Gaussian attention on large models like BERT or GPT. Instead, we used a toy study involving small models with smaller inputs—this leads to some additional considerations in analyzing our results, which we address later.&lt;/p&gt; &lt;p&gt;We first tested whether models trained with limited Gaussian attention can achieve similar performance as models that were trained on full self-attention. We trained models with $c = 5$ and $c=10$ and compared them to the performance of the base model. For our base experiments, we used three self-attention heads per layer and six layers in total.&lt;/p&gt; &lt;p&gt;Our evaluation metric for all models was next-token cross-entropy loss against a corpus of Shakespeare texts.Training is optimized with Adam and a learning rate of 0.0001.&lt;/p&gt; &lt;p&gt;Base experiment results are shown below.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th&gt;Epochs&lt;/th&gt; &lt;th&gt;Training Loss&lt;/th&gt; &lt;th&gt;Validation Loss&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Base&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;4.2623&lt;/td&gt; &lt;td&gt;4.4390&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Base&lt;/td&gt; &lt;td&gt;130&lt;/td&gt; &lt;td&gt;3.7709&lt;/td&gt; &lt;td&gt;4.0320&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Base&lt;/td&gt; &lt;td&gt;140&lt;/td&gt; &lt;td&gt;3.7281&lt;/td&gt; &lt;td&gt;3.9964&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 5$&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;3.7458&lt;/td&gt; &lt;td&gt;4.0355&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 10$&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;4.1619&lt;/td&gt; &lt;td&gt;4.3801&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We found that both the $c=5$ and $c=10$ models were able to achieve similar performance as the base model, which suggests that Gaussian attention may be a good approximation for full attention. Interestingly, both Gaussian models required significantly fewer epochs to reach the same performance as the base model. Both Gaussian models also demonstrated faster separation between training and validation losses. We hypothesize that the smaller attention context helps focus learning on more relevant tokens, which lowers the number of training epochs needed. As a result, the model is able to learn the language modeling task more rapidly, leading to faster overfitting.&lt;/p&gt; &lt;p&gt;Although initial results were promising, we chose to investigate a few factors that could have inflated model performance.&lt;/p&gt; &lt;p&gt;In order to determine whether the Gaussian attention models are affected by input length, we tested the same setups with longer inputs. Our base experiments used relatively small inputs, each corresponding to one piece of dialogue in a Shakespeare script. On average, these inputs were approximately 30 tokens long; with $c = 5$, the selected context may be more than $\frac{1}{6}$ of the total tokens. As a result, Gaussian model accuracy might be inflated for small inputs, since the context essentially covers a large portion of existing tokens. To make $c$ a smaller fraction of the input length, we modified the dataset instead to create inputs with an average length of 100 tokens. We summarize the results in the table below.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th&gt;Epochs&lt;/th&gt; &lt;th&gt;Training Loss&lt;/th&gt; &lt;th&gt;Validation Loss&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Base&lt;/td&gt; &lt;td&gt;90&lt;/td&gt; &lt;td&gt;5.5906&lt;/td&gt; &lt;td&gt;5.6207&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 5$&lt;/td&gt; &lt;td&gt;90&lt;/td&gt; &lt;td&gt;5.5769&lt;/td&gt; &lt;td&gt;5.6166&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 10$&lt;/td&gt; &lt;td&gt;90&lt;/td&gt; &lt;td&gt;5.6237&lt;/td&gt; &lt;td&gt;5.6565&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;With the longer input contexts, all three models had worse performance when trained for the same number of epochs. However, both Gaussian models managed to achieve approximately the same loss as the original model. This again suggests that Gaussian attention is a valid approximation of the standard attention matrix.&lt;/p&gt; &lt;p&gt;We further investigated whether the performance of the Gaussian models degraded rapidly when using a smaller number of layers and attention heads. Logically, increasing the number of attention heads would help mask bad attention patterns formed by the Gaussian sampling strategy. For example, although the sampling process selects tokens $j$ near token $i$ with high probability, it is possible that some attention head $x$ does not select the relevant tokens for a token $i$. With the addition of more attention heads, a different head may compensate for the bad head by operating on the correct tokens. Increasing the number of attention layers similarly increases the number of attention heads, where good heads can compensate for bad ones. Experiments showed that even with one layer and one attention head, the Gaussian models were able to achieve approximately the same performance as the base model.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th&gt;Input Type&lt;/th&gt; &lt;th&gt;Epochs&lt;/th&gt; &lt;th&gt;# Heads&lt;/th&gt; &lt;th&gt;# Layers&lt;/th&gt; &lt;th&gt;Training Loss&lt;/th&gt; &lt;th&gt;Validation Loss&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Base&lt;/td&gt; &lt;td&gt;Short&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;5.1009&lt;/td&gt; &lt;td&gt;5.1605&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Base&lt;/td&gt; &lt;td&gt;Long&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;5.5994&lt;/td&gt; &lt;td&gt;5.6289&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Base&lt;/td&gt; &lt;td&gt;Long&lt;/td&gt; &lt;td&gt;90&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;5.5906&lt;/td&gt; &lt;td&gt;5.6207&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 5$&lt;/td&gt; &lt;td&gt;Short&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;5.0481&lt;/td&gt; &lt;td&gt;5.1139&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 5$&lt;/td&gt; &lt;td&gt;Long&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;5.5884&lt;/td&gt; &lt;td&gt;5.6273&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 5$&lt;/td&gt; &lt;td&gt;Long&lt;/td&gt; &lt;td&gt;90&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;5.5769&lt;/td&gt; &lt;td&gt;5.6166&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 10$&lt;/td&gt; &lt;td&gt;Short&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;4.5597&lt;/td&gt; &lt;td&gt;4.6949&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 10$&lt;/td&gt; &lt;td&gt;Short&lt;/td&gt; &lt;td&gt;90&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;4.5432&lt;/td&gt; &lt;td&gt;4.6809&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 10$&lt;/td&gt; &lt;td&gt;Long&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;5.6345&lt;/td&gt; &lt;td&gt;5.6666&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;$c = 10$&lt;/td&gt; &lt;td&gt;Long&lt;/td&gt; &lt;td&gt;90&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;5.6237&lt;/td&gt; &lt;td&gt;5.6565&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;However, we noticed that with fewer heads and layers, the base model trained at approximately the same rate as the Gaussian model. A smaller number of attention heads and attention layers implies that fewer parameters need to be updated to learn the task; this typically means that training is faster for smaller models. As a result, it makes sense that a smaller model would benefit less from the increase in training speed that reduced attention context offers; since the model is so small, training is already fast and any decrease in training speed would be minor.&lt;/p&gt; &lt;p&gt;To test the limitations of Gaussian attention, we experimented with extremely sparse attention patterns that selected only one token for each model.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;Although these models did not perform as well as the base transformer, we found that the token that was attended upon made a significant impact on the final loss. As shown in the table below, the models that employed a diagonal or Gaussian attention pattern performed significantly better than the model that used a vertical attention pattern on the first token. This suggests that local attention patterns were the most important ones for improving the outcome of our task; as a result, Gaussian attention may perform well specifically because it emphasizes the local attention context.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th&gt;Epochs&lt;/th&gt; &lt;th&gt;# Layers&lt;/th&gt; &lt;th&gt;# Heads&lt;/th&gt; &lt;th&gt;Training Loss&lt;/th&gt; &lt;th&gt;Validation Loss&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;Diagonal&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;5.5089&lt;/td&gt; &lt;td&gt;5.5400&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Vertical&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;5.6652&lt;/td&gt; &lt;td&gt;5.6906&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Gaussian&lt;/td&gt; &lt;td&gt;80&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;td&gt;5.3231&lt;/td&gt; &lt;td&gt;5.3744&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h4 id=&quot;implications-and-limitations&quot;&gt;Implications and Limitations&lt;/h4&gt; &lt;p&gt;Our experiments showed that Gaussian attention has potential as an algorithm for improving transformer efficiency and increasing context lengths. We note that these experiments may not reflect the algorithm’s actual performance in real-world scenarios. Because we did not have the capacity to train a language model on the scale of BERT or GPT, we experimented only with much smaller models that processed much smaller contexts. As a result, our experimental results may not extend to larger models. Additionally, due to limited training time, we did not train any of the models we used for more than 150 epochs; with more training time, it is possible that the base transformers may outperform the modified ones. In order to generalize to larger models, Gaussian attention may need to be combined with other attention patterns, like global attention. More research is needed to fully understand its potential and shortcomings.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Today, methods for increasing context length in transformers remains an important research topic. Although researchers have proposed numerous efficient transformers and self-attention algorithms, a concrete solution for increasing transformer context lengths has yet to be found. With recent developments in large language models, the number of tasks that transformers can be applied to is increasing rapidly. As a result, the search for an efficient transformer is more important than ever.&lt;/p&gt; &lt;p&gt;Our work shows that Gaussian distributions can potentially be used to build fixed-pattern attention masks. However, the performance of Gaussian attention masks in larger models remains to be confirmed and requires further study.&lt;/p&gt; </content> </entry> <entry> <title>Zero-Shot Machine-Generated Image Detection using Sinks of Gradient Flows</title> <link href="https://deep-learning-mit.github.io/blog/2023/detect-image/"/> <updated>2023-11-08T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/detect-image</id> <content type="html">&lt;h3 id=&quot;abstract&quot;&gt;Abstract&lt;/h3&gt; &lt;p&gt;Detecting AI-generated content has become increasingly critical as deepfakes become more prevalent. We discover and implement algorithms to distinguish machine-generated and real images without the need for labeled training data. We study the problem of identifying photorealistic images using diffusion models. In comparison to the existing literature, we discover detection techniques that do not require training, based on the intuition that machine-generated images should have higher likelihoods than their neighbors. We consider two metrics: the divergence of the score function around a queried image and the reconstruction error from the reverse diffusion process from little added noise. We also compare these methods to ResNets trained to identify fake images from existing literature. Although the previous methods outperform out methods in terms of our accuracy metrics, the gap between our zero-shot methods and these ResNet methods noticeably declines when different image transformations are applied. We hope that our research will spark further innovation into robust and efficient image detection algorithms.&lt;/p&gt; &lt;h3 id=&quot;introduction&quot;&gt;Introduction&lt;/h3&gt; &lt;p&gt;As AI-generated images become ever more widespread, garnering virality for how realistic they have become, we are increasingly concerned with the potential for misuse. A deluge of machine-generated fake images could spread misinformation and harmful content on social media. From relatively innocuous pictures of &lt;a href=&quot;https://www.nytimes.com/2023/04/08/technology/ai-photos-pope-francis.html&quot;&gt;Pope Francis&lt;/a&gt; wearing an AI-generated image puffer coat to dangerous &lt;a href=&quot;https://www.politico.eu/article/ai-photography-machine-learning-technology-disinformation-midjourney-dall-e3-stable-diffusion/&quot;&gt;disinformation campaigns&lt;/a&gt; powered by diffusion models, we live in a new era of media that we cannot trust. The European Union has passed &lt;a href=&quot;https://www.nytimes.com/2023/12/08/technology/eu-ai-act-regulation.html&quot;&gt;legislation&lt;/a&gt; that, among other regulations, requires AI-generated content to be explicitly marked so. The enforcement of such legislation and similar-minded policies, however, remains unclear. Consequently, a growing body of research has sought to develop techniques to distinguish between the real and the synthetic.&lt;/p&gt; &lt;p&gt;The rise of models capable of generating photorealistic content makes the detection problem difficult. While there are still numerous nontrivial challenges with current models from their inability to depict text and render tiny details humans are innately sensitive to such as eyes and hands, the pace of the technology is moving in a way that makes relying on these flaws short-sighted and dangerous. Another potential complication is that advanced photo editing techniques such as &lt;a href=&quot;https://www.adobe.com/products/firefly.html&quot;&gt;Adobe Firefly&lt;/a&gt; have capabilities such as generative inpainting that make it such that an image could contain both real and invented content. Even simple data augmentations like crops, rotations, color jitters, and horizontal flipping can make the input look vastly different to a detection model. Furthermore, the majority of popular image generation tools are text-conditional, and we cannot expect to recover the text prompt, not to mention the model that generated the image. This makes transferable, zero-shot techniques of paramount importance.&lt;/p&gt; &lt;p&gt;In this paper, we propose two techniques for detecting images from diffusion models (see Figure &lt;a href=&quot;#fig-methods-illustrated&quot;&gt;1&lt;/a&gt;). Diffusion models &lt;d-cite key=&quot;sohl2015deep&quot;&gt;&lt;/d-cite&gt; have been one of the most successful architectures for image generation, inspired by thermodynamic principles. Diffusion models learn a score function (gradient of log likelihood) that ‘undoes’ noise from the image. In effect, these models learn a gradient field that points to the real-world data manifold.&lt;d-cite key=&quot;batzolis2022your&quot;&gt;&lt;/d-cite&gt; We leverage the intuition that the greater the deviation the diffusion model’s machine-generated images are from the real world data, the greater the difference of the neighborhood gradient field. In particular, we believe that machine-generated images are more likely to live in a ‘sink’ of the gradient field as the diffusion model ‘flows’ images down the gradient field. We thus propose the &lt;em&gt;divergence of a diffusion model’s score function&lt;/em&gt; as a promising zero-shot statistic for whether an image is generated by the diffusion model.&lt;/p&gt; &lt;p&gt;In addition, another metric for the ‘sink’ property of the gradient field at the image of concern is how far the image moves after a small displacement and flow along the gradient field. This has a nice interpretation in diffusion models as the &lt;em&gt;reconstruction error&lt;/em&gt; for running the reverse process over just a small timestep on just a slightly perturbed image.&lt;/p&gt; &lt;p&gt;&lt;em&gt;&lt;a name=&quot;fig-methods-illustrated&quot;&gt;Figure 1:&lt;/a&gt; The Divergence and Reconstruction Error Hypothesis: Images on the generated data manifold &lt;span style=&quot;color: red&quot;&gt;(red)&lt;/span&gt; have negative divergence and small reconstruction error, while images on the real data manifold &lt;span style=&quot;color: green&quot;&gt;(green)&lt;/span&gt; have zero divergence and large reconstruction error.&lt;/em&gt;&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/methods-illustrated-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/methods-illustrated-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/methods-illustrated-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-detect-image/methods-illustrated.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Our overarching research question is thus summarized as, can we use the properties of a diffusion model’s tacit vector field to build an effective zero-shot machine-generated image detector, specifically looking at &lt;em&gt;divergence&lt;/em&gt; and &lt;em&gt;reconstruction error&lt;/em&gt;?&lt;/p&gt; &lt;p&gt;The main contributions of our paper are:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Proposing two methods inspired by sinks of gradient flows: &lt;em&gt;divergence&lt;/em&gt; and &lt;em&gt;reconstruction error&lt;/em&gt;.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Conducting a wide battery of experiments on the performance of these methods in a variety of augmentation settings.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;related-work&quot;&gt;Related Work&lt;/h3&gt; &lt;p&gt;Previous literature has considered several different methods for image detection. Sha et al. 2022 &lt;d-cite key=&quot;sha2022fake&quot;&gt;&lt;/d-cite&gt; trained machine learning classifiers to detect fake images using high-level image and text embeddings. They, however, do not consider the local information around image embeddings, and require existing datasets of known image-generated and non-image-generated examples to train their classifier. Corvi et al. 2023 &lt;d-cite key=&quot;corvi2023detection&quot;&gt;&lt;/d-cite&gt; identified “forensic traces” in machine-generated image residuals for this task. Again, their method requires many data samples, and requires separate training on diffusion models and GANs.&lt;/p&gt; &lt;p&gt;We are inspired by ideas from DetectGPT,&lt;d-cite key=&quot;mitchell2023detectgpt&quot;&gt;&lt;/d-cite&gt; a recent work which addressed the same problem of detecting AI-generated content, but in the setting of large language models. For a given piece of text, DetectGPT perturbs the original text and computes the difference in log-likelihood between the perturbed text and the original text:&lt;/p&gt; \[\mathrm{DetectGPT}(x,p_{\theta},q)\triangleq\log p_{\theta}(x)-\mathbb{E}_{\tilde{x}\sim q(\cdot|x)}\log p_{\theta}(\tilde{x})\] &lt;p&gt;where $p_\theta$ is the language model and $q$ is the distribution of perturbations. If the difference in log-likelihood is large, then the attack claims that the original text is more likely to be generated by a language model.&lt;/p&gt; &lt;p&gt;There are several critical differences between language models and diffusion models. With text, one can directly compute the log likelihood of a given piece of text, even with only blackbox access, i.e., no visibility to the model’s parameters. In contrast, for diffusion models, it is intractable to directly compute the probability distribution over images because diffusion models only learn the score. Moreover, the most commonly used diffusion models, e.g. DALL-E 3, apply the diffusion process to a latent embedding space rather than the pixel space. To address the latter concern, we plan on applying the encoder to the image to obtain an approximation of the embedding that was passed into the decoder. And to address the former, instead of approximating the probability curvature around a given point like DetectGPT, we formulate a statistic characterizing whether the gradient field/score is a sink, i.e., the gradients around a machine-generated image point to the machine-generated image. This captures the idea of a local maximum in probability space, similar to the DetectGPT framework.&lt;/p&gt; &lt;p&gt;It would be remiss to not mention Zhang et al. 2023,&lt;d-cite key=&quot;zhang2023watermarks&quot;&gt;&lt;/d-cite&gt; who argued that watermarking, a strictly easier task than machine-generated image detection, is likely impossible. They claim that an adversary who can perturb a generated image of text without too much degradation and has blackbox access to the watermarking scheme can conduct a random-walk on reasonable outputs until the watermark is degraded. However, their analysis was mainly theoretical and lacked specific experiments with diffusion models. It remains to be seen whether their assumptions still hold for image generation, and whether more concrete watermarking schemes may afford some level of protection against less sophisticated adversaries or the unintentional use of machine-generated images.&lt;/p&gt; &lt;h3 id=&quot;methods&quot;&gt;Methods&lt;/h3&gt; &lt;p&gt;&lt;strong&gt;Dataset.&lt;/strong&gt; To conduct our research, we needed datasets of known real and fake images. We used MSCOCO &lt;d-cite key=&quot;lin2014microsoft&quot;&gt;&lt;/d-cite&gt;, a dataset of 330K non-machine generated images and captions of common real-world objects which was also used by Corvi et al. 2023.&lt;d-cite key=&quot;corvi2023detection&quot;&gt;&lt;/d-cite&gt; Initially, we planned to use DiffusionDB &lt;d-cite key=&quot;wang2022diffusiondb&quot;&gt;&lt;/d-cite&gt; for our fake images, a dataset of 14M (prompt, image) pairs generated by the open-source Stable Diffusion Version 1 model scraped from the StableDiffusion discord. However, we realized that many of the images in DiffusionDB are not meant to be realistic. Instead, we iterated through the captions of MSCOCO and used Stable Diffusion V1.4 to generate a matching machine-generated image for that caption, as in Corvi et al. 2023.&lt;d-cite key=&quot;corvi2023detection&quot;&gt;&lt;/d-cite&gt;&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Baseline.&lt;/strong&gt; We used the model and code from Corvi et al. 2023 &lt;d-cite key=&quot;corvi2023detection&quot;&gt;&lt;/d-cite&gt; to identify images generated by Stable Diffusion as our trained baseline. Their model is a ResNet18 image-only detector trained on the training split of the MSCOCO dataset and images also generated by prompts from StableDiffusion.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Detection Algorithms.&lt;/strong&gt; For out attacks, we compute the divergence of the diffusion model’s score field around the image (negative divergence indicates a sink). We can estimate this via a finite-differencing approach: given a diffusion model $s_\theta(x)$ which predicts the score $\nabla_x\log p_\theta(x)$, we have that&lt;/p&gt; \[\mathrm{div}(s_\theta,x)= \sum_{i=1}^d \frac{s_\theta(x+he_i)_i-s_\theta(x-he_i)_i}{2h}\] &lt;p&gt;for small $h$ and orthogonal basis ${e_i}_{i=1}^d$. However, images are high-dimensional, and even their latent space has $\approx10,000$ dimensions, which means that fully computing this sum could be computationally expensive. In this paper, we sample a fraction of the dimensions for each queried image.&lt;/p&gt; &lt;p&gt;Another way to capture the intuition that machine-generated images are have higher likelihoods than their neighbors is by noising the latent to some timestep $t$, and then comparing the distance of the denoised image to the diffusion model to the original image. That is, given a diffusion model $f_\theta$ which takes a noised image and outputs an unnoised image (abstracting away noise schedulers, etc. for clarity),&lt;/p&gt; \[\mathrm{ReconstructionError}(f_{\theta},x)\triangleq \mathbb{E}_{\tilde{x}\sim \mathcal{N}(x,\epsilon)}||x-f_{\theta}(\tilde{x})||_2^2\] &lt;p&gt;for small $\epsilon$. The intuition is that if an image and thus more likely, then the denoising process is more likely to send noisy images to that particular image.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Comparison.&lt;/strong&gt; For each model, we use the AUC-ROC curve and the true positive rate (TPR) at low false positive rate (FPR) as metrics. The latter notion of accuracy is borrowed from the membership inference attack setting in Carlini et al. 2021.&lt;d-cite key=&quot;carlinifpr&quot;&gt;&lt;/d-cite&gt; As they argue, this metric quantifies our confidence that a point identified as fake is actually fake. In important settings like filtering fake images on social media platforms, this is especially important as there may be asymmetric consequences for accidentally flagging an image as fake compared to missing a fake image. We also provide a data visualization tool for the images our method identifies. In the real world, we can expect that the images we want to test will be distorted, either by random cropping, reflections, rotations, or compression. We will apply image augmentations over both fake and real image datasets and report the same metrics over these augmentations.&lt;/p&gt; &lt;h3 id=&quot;experiments&quot;&gt;Experiments&lt;/h3&gt; &lt;p&gt;We run all experiments over a common set of 500 images from the test set of &lt;a href=&quot;https://huggingface.co/datasets/nlphuji/mscoco_2014_5k_test_image_text_retrieval&quot;&gt;MSCOCO&lt;/a&gt; and the corresponding 500 images generated by Stable Diffusion V1.4 with the same prompt using HuggingFace’s default arguments.&lt;/p&gt; &lt;p&gt;For our Divergence method, we randomly sample $d=10$ dimensions to compute the divergence over and set $h=0.1$. For our Reconstruction method, we compute an average distance over 10 reconstructed images per original image and use add/remove noise equivalent to 1 time-step.&lt;/p&gt; &lt;p&gt;For each method, we evaluate the performance on no augmentation, random $256\times 256$ crop (corresponding to about a quarter of the image for generated images), grayscale, random horizontal flip with probably $0.5$, random rotation between $[-30^\circ,30^\circ]$, and random color jitter of: brightness from $[0.75,1.25]$, contrast from $[0.75,1.25]$, saturation from $[0.75,1.25]$, and hue from $[-0.1,0.1]$.&lt;/p&gt; &lt;p&gt;&lt;em&gt;&lt;a name=&quot;table-results&quot;&gt;Table 1:&lt;/a&gt; Divergence, Reconstruction, and ResNet Detection AUC and True Positive Rate at 0.1 False Positive Rate.&lt;/em&gt;&lt;/p&gt; &lt;table&gt; &lt;tr&gt; &lt;th&gt;AUC / TPR$_{0.1}$&lt;/th&gt; &lt;th colspan=&quot;3&quot; style=&quot;text-align: center&quot;&gt;Method&lt;/th&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;Augmentation&lt;/th&gt; &lt;th&gt;Divergence&lt;/th&gt; &lt;th&gt;Reconstruction&lt;/th&gt; &lt;th&gt;ResNet&lt;/th&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;No Aug.&lt;/th&gt; &lt;td&gt;0.4535 / 0.078&lt;/td&gt; &lt;td&gt;0.7310 / 0.000&lt;/td&gt; &lt;td&gt;1.000 / 1.000&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;Crop&lt;/th&gt; &lt;td&gt;0.4862 / 0.092&lt;/td&gt; &lt;td&gt;0.4879 / 0.064&lt;/td&gt; &lt;td&gt;1.000 / 1.000&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;Gray.&lt;/th&gt; &lt;td&gt;0.4394 / 0.056&lt;/td&gt; &lt;td&gt;0.7193 / 0.000&lt;/td&gt; &lt;td&gt;1.000 / 1.000&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;H. Flip&lt;/th&gt; &lt;td&gt;0.4555 / 0.084&lt;/td&gt; &lt;td&gt;0.7305 / 0.000&lt;/td&gt; &lt;td&gt;1.000 / 1.000&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;Rotate&lt;/th&gt; &lt;td&gt;0.4698 / 0.062&lt;/td&gt; &lt;td&gt;0.6937 / 0.000&lt;/td&gt; &lt;td&gt;0.9952 / 0.984&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;Color Jitter&lt;/th&gt; &lt;td&gt;0.4647 / 0.082&lt;/td&gt; &lt;td&gt;0.7219 / 0.000&lt;/td&gt; &lt;td&gt;1.000 / 1.000&lt;/td&gt; &lt;/tr&gt; &lt;/table&gt; &lt;p&gt;&lt;em&gt;&lt;a name=&quot;fig-roc-auc&quot;&gt;Figure 2:&lt;/a&gt; AUC-ROC Curves in No Augmentation Setting.&lt;/em&gt;&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;p&gt;(a) Divergence&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;p&gt;(b) Reconstruction&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;p&gt;(c) ResNet&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;&lt;em&gt;&lt;a name=&quot;fig-hists&quot;&gt;Figure 3:&lt;/a&gt; Histograms of Computed Statistics in No Augmentation Setting.&lt;/em&gt;&lt;/p&gt; &lt;div class=&quot;l-body&quot;&gt; &lt;p&gt;(a) Divergence&lt;/p&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;div class=&quot;l-body&quot;&gt; &lt;p&gt;(b) Reconstruction&lt;/p&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;div class=&quot;l-body&quot;&gt; &lt;p&gt;(c) ResNet&lt;/p&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;p&gt;&lt;strong&gt;Trained Baseline.&lt;/strong&gt; The trained baseline does extraordinarily well at the MSCOCO vs. Stable Diffusion detection task. It achieves $1.0$ AUC (perfect accuracy) across all augmentation settings except for rotation for which it gets an almost perfect AUC of $0.9952$. This high performance matches Corvi et al. 2023’s findings,&lt;d-cite key=&quot;corvi2023detection&quot;&gt;&lt;/d-cite&gt; stemming from the fact that the ResNet was trained on the MSCOCO distribution and Latent Diffusion generated images are similar to Stable Diffusion generated images. In their paper, the performance noticeably drops to around $0.7$-$0.8$ AUC for other image generation models.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Divergence.&lt;/strong&gt; Divergence does extremely poorly, with AUCs just slightly below 0.5, indicating that in fact generated images have greater divergence than real images—the opposite of our intuition, but this may also be noise as these values are essentially equivalent to random guessing. We suspect that this is largely due to our low choice of $d$, meaning that we cannot get a representative enough sample of the dimensions to get an accurate estimate of the true divergence. We may have also chosen $h$ too large, as we have no idea of the scale of any manifold structure that may be induced by the gradient field.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Reconstruction Error.&lt;/strong&gt; Reconstruction error, on the other hand, boasts impressive AUCs of around $0.7$. The shape of the curve is particularly strange, and with the additional observation that the AUC when the random cropping is applied goes back to $0.5$ AUC, indicated to us that the image size may be the differentiating factor here. MSCOCO images are often non-square and smaller than the $512\times 512$ constant size of the generated images. As the Frobenius norm does not scale with image size, we hypothesize that using the spectral norm and dividing by the square root of the dimension would instead give us a more faithful comparison, akin to the random crop results. However, data visualization of the examples does not show a clear correlation between image size and reconstruction error, so it appears that this detection algorithm has decent AUC but poor TPR at low FPR, and is vulnerable to specifically cropping augmentations.&lt;/p&gt; &lt;p&gt;&lt;a href=&quot;http://jsonw0.pythonanywhere.com/&quot;&gt;&lt;strong&gt;Detection Visualizations.&lt;/strong&gt;&lt;/a&gt; We developed a dashboard visualizaiton that enables us to look more closely at images and their associated detection statistics. Some examples we can pick out that seem to make sense include Figure 4, where the real image is captioned as a CGI fake image, and predictably gets a low statistic as deemed by Reconstruction Error (the generated image, ironically, gets a higher statistic denoting more real).&lt;/p&gt; &lt;p&gt;&lt;em&gt;&lt;a name=&quot;fig-methods-illustrated&quot;&gt;Figure 4:&lt;/a&gt; An Example Image of a CGI “Real” Image Getting Detected as Fake.&lt;/em&gt;&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/cgi-example-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/cgi-example-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-detect-image/cgi-example-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-detect-image/cgi-example.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;However, from a visual inspection of images, we cannot identify a clear relationship between image content or quality of generated images that holds generally. We make our dashboard public and interactive; a demo can be seen below:&lt;/p&gt; &lt;div class=&quot;l-screen&quot;&gt; &lt;iframe src=&quot;http://jsonw0.pythonanywhere.com/&quot; frameborder=&quot;0&quot; scrolling=&quot;yes&quot; height=&quot;1200px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;h3 id=&quot;discussion&quot;&gt;Discussion&lt;/h3&gt; &lt;p&gt;Throughout our experiments, the divergence-based detector performs much worse than the other detectors. Because the latent space has a very high dimension, the divergence detector may require sampling from many more dimensions than is practical for an image detector in order to obtain good estimates of the divergence. Further research should try to scale this method to see if it obtains better results. Mitchell 2023 et al. &lt;d-cite key=&quot;mitchell2023detectgpt&quot;&gt;&lt;/d-cite&gt; justifies the validity of their machine-generated as a Hutchinson trace estimator of the divergence of the log probabilities; however, the poor performance of the divergence detector imply that estimating the trace is not helpful for image detection and that other model properties may instead be at play for this method’s effectiveness. In contrast, the noising/denoising detector implicitly incorporates information from all dimensions, which may explain its better performance. The model from Corvi et al. 2023 &lt;d-cite key=&quot;corvi2023detection&quot;&gt;&lt;/d-cite&gt; outperforms our methods under all augmentations, achieving a perfect AUC on images without data augmentations. This is consistent with what was reported in their manuscript. However, this is not an unbiased estimate of the trained classifier’s performance, because they also used MSCOCO data to train and test their classifier. We were limited to this experimental setup by data availability and previous literature. Future work should comapre the zero-shot and trained detectors on completely out-of-sample data and with different generation models.&lt;/p&gt; &lt;p&gt;Although at face-value our detectors perform worse than the pre-trained model in our experiments, our project still introduces some interesting ideas for machine-generated image detection that are of interest to the broader community and worth further exploring. First, the techniques we explored parallel zero-shot machine-generated image detection methods for text.&lt;d-cite key=&quot;mitchell2023detectgpt&quot;&gt;&lt;/d-cite&gt; The fact that in both settings, perturbing the inputs and computing the curvature of the log probabilities are potent signals for machine-generated detection implies that these features may be an indelible mark of machine-generated models across all modalities. Second, image detection algorithms trained on data may be fundamentally vulnerable to adversarial modifications. Because there exists non-robust features that are predictive of the output in training data,&lt;d-cite key=&quot;ilyas2019adversarial&quot;&gt;&lt;/d-cite&gt; adversaries, who realistically may have access to the image detection algorithm over many trials, can craft subtle background noise that circumvents image-detection algorithms. Our methods, which consist of only a few parameters, are not prone to adversarial attacks unlike trained models. Third, this work highlights the use of other features besides the image as features for image detection, e.g. score function and noising/denoising the image. Future work may build on the ideas behind these features to improve trained image detectors.&lt;/p&gt; </content> </entry> <entry> <title>A Deeper Look into Equivariance for Materials Data</title> <link href="https://deep-learning-mit.github.io/blog/2023/A-deeper-look-into-equivariance-for-materials-data/"/> <updated>2023-11-08T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/A-deeper-look-into-equivariance-for-materials-data</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Materials embody a diverse array of chemical and physical properties, intricately shaping their suitability for various applications. The representation of materials as graphs, where atoms serve as nodes and chemical bonds as edges, facilitates a systematic analysis. Graph Neural Networks (GNNs) have emerged as promising tools for deciphering relationships and patterns within materials data. The utilization of GNNs holds the potential to develop computational tools that deepen our understanding and aid in designing structure-property relationships in atomic systems.&lt;/p&gt; &lt;p&gt;In recent years, there has been a heightened focus on employing machine learning for the accelerated discovery of molecules and materials with desired properties [&lt;a href=&quot;#min2020accelerated&quot;&gt;Min and Cho, 2020&lt;/a&gt;; &lt;a href=&quot;@pyzer2022accelerating&quot;&gt;Pyzer-Knapp et al, 2022&lt;/a&gt;; &lt;a href=&quot;@merchant2023scaling&quot;&gt;Merchant et al, 2023&lt;/a&gt;]. Notably, these methods are exclusively applied to stable systems in physical equilibrium, where such systems correspond to local minima of the potential energy surface $E(r_1, . . . , r_n)$, with $r_i$ representing the position of atom $i$ [&lt;a href=&quot;@schutt2018schnet&quot;&gt;Schüttet al, 2018&lt;/a&gt;].&lt;/p&gt; &lt;p&gt;The diverse arrangements of atoms in the system result in varying potential energy values, influencing chemical stability. In the GIF below, different trajectories can be seen of the molecule Ethane. The Ethane molecule spends 99% of its time in a specific conformation, in which the substituents are at the maximum distance from each other. This conformation is called the staggered conformation. Looking at the molecule from a position on the C-C (main) axis (as in the second half of the animation), The staggered conformation is reached when the H atoms of the front C atom are exactly between the H atoms of the other C atom. This animation also show the 3-fold symmetry of the molecule around the main axis. All three staggered conformations will have the same energy value, as they are completely equivalent. The intermediate conformations will result in a higher energy value, as they are energetically less favorable. Different conformations can also portray elongations of some bonds lengths and variations in angles value. Predicting stable arrangements of atomic systems is in itself an important challenge!&lt;/p&gt; &lt;p align=&quot;center&quot;&gt; &lt;img width=&quot;500&quot; src=&quot;https://upload.wikimedia.org/wikipedia/commons/7/76/Ethane_conformation.gif&quot; alt=&quot;Conformations of Ethane (C2H6) molecule&quot; /&gt; &lt;/p&gt; &lt;div class=&quot;caption&quot;&gt; Conformations of the molecule Ethane (C2H6) &lt;/div&gt; &lt;p&gt;In the three-dimensional Euclidean space, materials and physical systems in general, inherently exhibit rotation, translation, and inversion symmetries. These operations form the E(3) symmetry group, a group of transformations that preserve the Euclidean distance between any two points in 3D space. When adopting a graph-based approach, a generic GNN may be sensitive to these operations, but an E(3) equivariant GNN excels in handling such complexities. Its inherent capability to grasp rotations, translations, and inversions allows for a more nuanced understanding, enabling the capture of underlying physical symmetries within the material structures [&lt;a href=&quot;@batzner20223&quot;&gt;Batzner et al, 2022&lt;/a&gt;].&lt;/p&gt; &lt;h2 id=&quot;data&quot;&gt;Data&lt;/h2&gt; &lt;p&gt;The MD 17 dataset, an extensive repository of ab-initio molecular dynamics trajectories [&lt;a href=&quot;@chmiela2019sgdml&quot;&gt;Chmiela et al, 2019&lt;/a&gt;], was employed in this study.&lt;/p&gt; &lt;p&gt;Each trajectory within the dataset includes Cartesian positions of atoms (in Angstrom), their atomic numbers, along with total energy (in kcal/mol) and forces (kcal/mol/Angstrom) acting on each atom. The latter two parameters serve as regression targets in analyses.&lt;/p&gt; &lt;p&gt;Our focus narrowed down to the molecules Aspirin, Ethanol, and Toluene:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin.jpg&quot; class=&quot;img-fluid rounded z-depth-1 mb-3&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Aspirin (C9H8O4)&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol.jpg&quot; class=&quot;img-fluid rounded z-depth-1 mb-3&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Ethanol (C2H5OH)&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene.jpg&quot; class=&quot;img-fluid rounded z-depth-1 mb-3&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Toluene (C6H5CH3)&lt;/figcaption&gt; &lt;/figure&gt; &lt;/div&gt; &lt;p&gt;The distributions of energy values (kcal/mol) for various conformations of the three molecules, within the training and validation sets, are illustrated in the histograms below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 1 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 2 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 3 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Energy (kcal/mol) distributions for Aspirin (C9H8O4), Ethanol (C2H5OH) and Toluene (C6H5CH3) molecules in train and validations sets &lt;/div&gt; &lt;p&gt;The training set for Aspirin comprises 1000 conformations, while its validation set consists of 500 conformations. Ethanol’s training and validation sets each consist of 1000 conformations. Toluene’s training set comprises 1000 conformations, and its validation set consists of 500 conformations.&lt;/p&gt; &lt;h2 id=&quot;method&quot;&gt;Method&lt;/h2&gt; &lt;p&gt;In this project, our objective is to conduct a comparative analysis of two Graph Neural Network (GNN) architectures: an E(3) equivariant network and a non-equivariant (specifically E(3) Invariant) one. The primary focus is on energy prediction tasks related to atomic systems, with a particular emphasis on exploring the distinctions within the latent representations of these architectures and their interpretability.&lt;/p&gt; &lt;p&gt;All GNNs are permutation invariant by design [&lt;a href=&quot;@DBLP:journals/corr/abs-1905-04943&quot;&gt;Keriven and Peyr, 2019&lt;/a&gt;]. Our baseline GNN for comparison achieves rotation and translation invariance by simply operating only on interatomic distances instead of absolute position of the atoms. This design choice ensures that both the output and internal features of the network remain invariant to rotations. In contrast, our equivariant GNN for comparison utilizes relative position vectors rather than distances (scalars) together with features comprised of not only scalars, but also higher-order geometric tensors.&lt;/p&gt; &lt;p&gt;In our Invariant GNN, the node-wise formulation of the message passing is given by:&lt;/p&gt; &lt;center&gt; $$\mathbf{x}^{\prime}_i = \mathbf{\Theta}^{\top} \sum_{j \in \mathcal{N}(i) \cup \{ i \}} \frac{e_{j,i}}{\sqrt{\hat{d}_j \hat{d}_i}} \mathbf{x}_j$$&lt;/center&gt; &lt;p&gt;Where $ x_i, x_j $ are the feature vectors of the target and source nodes, respectively, defined as a one-hot representation of the atomic number of that node. The summation is performed over the neighborhood $\mathcal{N}(i)$ of atom $i$, defined by a radial cutoff around each node, a tunable parameter typically set around 4-5 angstroms. Meaning, the concept of neighborhood is based on the distance between nodes, not their connectivity. Additionally, $ d_i = 1 + \sum_{j \in \mathcal{N}(i)} e_{j,i} $ where $ e_{j,i} $ represents the edge weight from the source node $j$ to the target node $i$ , and is defined as the interatomic distance.&lt;/p&gt; &lt;p&gt;For constructing our equivariant GNN, &lt;a href=&quot;https://e3nn.org/&quot;&gt;E3nn&lt;/a&gt; was employed - a torch-based library designed for building o(3) equivariant networks. Following the method presented in [&lt;a href=&quot;@batzner20223&quot;&gt;Batzner et al, 2022&lt;/a&gt;], a neural network that exhibits invariance to translation and equivariance to rotation and inversion was constructed. Two key aspects of E3nn facilitating the construction of O(3) equivariant neural networks are the use of irreducible representations (Irreps) for data structuring and encapsulating geometrical information in Spherical Harmonics. Irreps are data structures that describe how the data behaves under rotation. We can think of them as data types, in the sense that this structure includes the values of the data alongside instructions for interpretation. The Spherical Harmonics form an orthonormal basis set of functions that operate on a sphere, and they’re equivariant with respect to rotations, which makes them very useful (and popular!) in expanding expressions in physical settings with spherical symmetry.&lt;/p&gt; &lt;p&gt;For the equivariant GNN, the node-wise formulation of the message is:&lt;/p&gt; &lt;center&gt; $$f&apos;_i = \frac{1}{\sqrt{z}} \sum_{j \in \partial(i)} \; f_j \; \otimes\!(h(\|x_{ij}\|)) \; Y(x_{ij} / \|x_{ij}\|) $$ &lt;/center&gt; &lt;p&gt;where $ f_j, f_i $ are the target and source nodes feature vectors, defined similarly as a one-hot representation of the atomic number. $z$ is the average degree (number of neighhbors) of the nodes, and the neighborhood $\partial(i)$ is once again defined using a radial cutoff. $x_{ij}$ is the relative distance vector, $h$ is a multi layer perceptron and $Y$ is the spherical harmonics. The expression $x \; \otimes(w) \; y$ denotes a tensor product of $x$ with $y$ using weights $w$. This signifies that the message passing formula involves a convolution over nodes’ feature vectors with filters constrained to be a multiplication of a learned radial function and the spherical harmonics.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;p&gt;The performance of the two GNNs was compared for the task of predicting the total energy of the molecule’s conformation - a scalar property. By constraining the Equivariant GNN to predict a scalar output, it becomes overall invariant to the E(3) group. However, the use of higher order geometric tensors in the intermediate representations and operations in the E-GNN, makes internal features equivariant to rotation and inversion. This enables the passage of angular information through the network using rotationally equivariant filters (spherical harmonics) in the node feature convolution. This is the essential difference between the two architectures.&lt;/p&gt; &lt;p&gt;The learning curves of the two GNNs for each molecule data are presented in the figures below:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 1 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 2 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 3 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Train (left) and Validation (right) learning curves of Energy (kcal/mol) prediction of Aspirin (top), Ethanol (middle) and Toluene (bottom) conformations &lt;/div&gt; &lt;p&gt;The models were trained for 50 epochs using mean absolute error (MAE) objective for predicting normalized energy (in kcal/mol units). Adam optimizer with a learning rate of 0.01 and learning rate scheduler were employed. The E-GNN achieves a superior MAE rate for all three molecules.&lt;/p&gt; &lt;p&gt;Next, let’s examine the latent representation of the two models! The last layer values of the validation data of both models were projected using t-SNE to a 2D representation and color-coded according to the target energy values:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 1 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 2 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 3 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Latents projections of E-GNN (left) and GNN (right) last layer, of Aspirin (top), Ethanol (middle) and Toluene (bottom) conformations &lt;/div&gt; &lt;p&gt;A color gradient can be seen in all three projections of the Equivariant GNN; and it is the clearest for Ethanol. The Invariant GNN’s latent projections do not exhibit a similar structure, perhaps except for Ethanol’s conformations. Moreover, in Ethanol’s case, the GNN projection appears to be quite one-dimensional.&lt;/p&gt; &lt;p&gt;The apparent color gradient according to the target values in the E-GNN latent space is impressive, suggesting that the model leverages this information when embedding data conformations for predictions. Multiple “locations” in the latent space denote various high-energy conformations, indicating that the model considers not only the target energy value but also structural differences.&lt;/p&gt; &lt;p&gt;To assess whether there’s molecular structural ordering in the embeddings, we construct system-specific variables for each molecule and visualize the latent space accordingly. Ethanol, with its relatively simple structure, showcases three important variables: the distance between the two Carbons (C-C bond), the distance between Carbon and Oxygen (C-O bond), and the angle formed by the three atoms. The distributions of these variables in Ethanol’s train and validation sets are depicted in the figure below:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 1 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 2 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 3 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Distributions in train (left) and validation (right) sets of Ethanol, of C-C bond length (top), C-O bond length (middle) and main angle (bottom) &lt;/div&gt; &lt;p&gt;The distributions appear very similar for each variable in the train and validation sets. Now, let’s examine Ethanol’s validation conformations latent projection, color-coded with respect to the target and the three system-specific variables:&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 1 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;!-- Row 2 --&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md-6&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2.png&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;100%&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;A clear gradient is observed for the main angle and C-C bond! The target gradient appears from the top left corner to the bottom right; the C-C bond gradient seems to go from bottom left to top right, and the main angle gradient isn’t as linear, appearing to spiral from the bottom to the top right corner clockwise. The C-O bond projection doesn’t seem to follow a discernible gradient, suggesting it’s not as influential on the target as the other two variables.&lt;/p&gt; &lt;p&gt;Cool huh? The Equivariant GNN appears to embed the data according to the target value but also according to the systems geometrical structure! This suggests that the model leverages its E(3) equivariant convolution layers to capture and encode information about both the target values and the intricate geometric features of the molecular systems.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;In conclusion, our exploration has demonstrated the efficiency of the E(3) equivariant GNN, compared to an invariant GNN, in predicting the total energy of molecular conformations. Though both models were compared on predicting energy, a scalar propery, the E-GNN’s ability to leverage the inherent symmetries present in the system allowed it to effectively capture and encode the relationship between the arrangement of molecules and their respective energy. This was illustrated through the latent representation visualizations, and was particularly evident in the case of Ethanol. Here, discernible gradients in the latent space were observed, correlating with the target energy value and variations in C-C bond length and main angle. However, interpretability varies among the latent projections for the more complex molecules investigated in this project. Potential improvements could be achieved with additional data and a more expressive equivariant network.&lt;/p&gt; </content> </entry> <entry> <title>Prompt to Prompt</title> <link href="https://deep-learning-mit.github.io/blog/2023/prompt-to-prompt/"/> <updated>2023-11-07T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/prompt-to-prompt</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Recently, the techniques to edit images have advanced from methodologies that require the user to edit individual pixels to deep learning-based image editing. The latter employ for example large image generation models (e.g., stable diffusion models). While these deep learning-based image editing techniques initially required the user to mark particular areas that should be edited (Nichol et al., 2021 &lt;d-cite key=&quot;nichol2021glide&quot;&gt;&lt;/d-cite&gt;; Avrahami et al., 2022a&lt;d-cite key=&quot;avrahami2022blendeddiffusion&quot;&gt;&lt;/d-cite&gt;; Ramesh et al., 2022), recently the work by (Hertz et al, 2022 &lt;d-cite key=&quot;hertz2022prompttoprompt&quot;&gt;&lt;/d-cite&gt;) has shown that this becomes unnecessary. Instead, image editing can be performed using a cross-attention mechanism. In particular, the proposed prompt-to-prompt editing framework enables the controlling of image edits by text only. The section below provides an overview of how this prompt-to-prompt framework works (Figure 1, by (Hertz et al, 2022&lt;d-cite key=&quot;hertz2022prompttoprompt&quot;&gt;&lt;/d-cite&gt;)).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;em&gt;Figure 1: Cross-attention method overview. Top: visual and textual embedding are fused using cross-attention layers that produce attention maps for each textual token. Bottom: we control the spatial layout and geometry of the generated image using the attention maps of a source image. This enables various editing tasks through editing the textual prompt only. When swapping a word in the prompt, we inject the source image maps Mt, overriding the target maps M ∗ t . In the case of adding a refinement phrase, we inject only the maps that correspond to the unchanged part of the prompt. To amplify or attenuate the semantic effect of a word, we re-weight the corresponding attention map. (Hertz et al, 2022 &lt;d-cite key=&quot;hertz2022prompttoprompt&quot;&gt;&lt;/d-cite&gt;).&lt;/em&gt;&lt;/p&gt; &lt;p&gt;While this proposed framework has significantly advanced the image editing research field, its performance leaves still room for improvement such that open research questions remain. For example, when performing an image editing operation that changes the hair color of a woman, significant variability across the woman’s face can be observed (Figure 2). This is undesirable, as the user would expect to see the same female face across all four images.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;em&gt;Figure 2: Experimentation with the proposed prompt-to-prompt image editing framework presented by (Hertz et al, 2022&lt;d-cite key=&quot;hertz2022prompttoprompt&quot;&gt;&lt;/d-cite&gt;). The faces of the women show significant variability even though they should remain invariant across all four generated/ edited images.&lt;/em&gt;&lt;/p&gt; &lt;p&gt;Within our work, we will start to further benchmark the proposed framework’s performance, explore its hyperparameters’ impact on the image editing process, and research opportunities to improve the current performance.&lt;/p&gt; &lt;h2 id=&quot;literature-review&quot;&gt;Literature Review&lt;/h2&gt; &lt;p&gt;Before delving into the details of the prompt-to-prompt editing method, let’s briefly recap some existing techniques to edit images with diffusion models that have paved the way for this revolutionary approach:&lt;/p&gt; &lt;h3 id=&quot;1-adding-noise-to-an-image-and-denoising-with-a-prompt&quot;&gt;1. Adding noise to an image and denoising with a prompt&lt;/h3&gt; &lt;p&gt;In &lt;strong&gt;SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations&lt;/strong&gt; &lt;d-cite key=&quot;meng2021sdedit&quot;&gt;&lt;/d-cite&gt; , the user takes an image, introduces noise and then denoises it according to a user-provided prompt. As an example, given an image, users can specify how they want the edited image to look using pixel patches copied from other reference images.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;A similar approach is used in the paper &lt;strong&gt;MagicMix: Semantic Mixing with Diffusion Models&lt;/strong&gt; &lt;d-cite key=&quot;liew2022magicmix&quot;&gt;&lt;/d-cite&gt; which uses a pre-trained text-to-image diffusion based generative model to extract and mix two semantics. The figure below showcases the detailed pipeline of MagicMix (image-text mixing). Given an image x&lt;sub&gt;0&lt;/sub&gt; of layout semantics, they first craft its corresponding layout noises from step Kmin to K&lt;sub&gt;max&lt;/sub&gt;. Starting from K&lt;sub&gt;max&lt;/sub&gt;, the conditional generation process progressively mixes the two concepts by denoising given the conditioning content semantics (“coffee machine” in this example). For each step k in [K&lt;sub&gt;min&lt;/sub&gt;; K&lt;sub&gt;max&lt;/sub&gt;], the generated noise of mixed semantics is interpolated with the layout noise x&lt;sub&gt;k&lt;/sub&gt; to preserve more layout details.&lt;/p&gt; &lt;div style=&quot;text-align:center;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1.png&quot; class=&quot;img-fluid&quot; width=&quot;100&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;2-take-an-image-add-noise-and-denoise-it-with-a-prompt--add-a-mask&quot;&gt;2. Take an image, add noise and denoise it with a prompt + Add a mask&lt;/h3&gt; &lt;p&gt;In the paper &lt;strong&gt;Blended Diffusion: Text-Driven Editing of Natural Images&lt;/strong&gt; &lt;d-cite key=&quot;avrahami2022blended&quot;&gt;&lt;/d-cite&gt; , given an input of an image and a mask, the blended diffusion modifies the masked area according to a guided text prompt, without affecting the unmasked regions. One limitation of this is that it relies on the user having to produce this mask to indicate the editing region.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;An advanced version of this diffusion mode is discussed in the paper &lt;strong&gt;Text-based inpainting with CLIPSef and Stable Diffusion&lt;/strong&gt; &lt;d-cite key=&quot;luddecke2022image&quot;&gt;&lt;/d-cite&gt;. In this paper, the novelty is that the user doesn’t have to do the mask manually. Instead, it can use an existing segmentation model (e.g. ClipSef). Another alternative is presented in the paper &lt;strong&gt;DiffEdit: Diffusion-based semantic image editing with mask guidance&lt;/strong&gt; &lt;d-cite key=&quot;couairon2022diffedit&quot;&gt;&lt;/d-cite&gt; where the mask is generated directly from the diffusion model.&lt;/p&gt; &lt;h3 id=&quot;3-fine-tune-overfit-on-a-single-image-and-then-generate-with-the-fine-tuned-model&quot;&gt;3. Fine-tune (“overfit”) on a single image and then generate with the fine-tuned model&lt;/h3&gt; &lt;p&gt;In the paper &lt;strong&gt;Imagic: Text-based real image editing with diffusion models&lt;/strong&gt; &lt;d-cite key=&quot;kawar2023imagic&quot;&gt;&lt;/d-cite&gt; and &lt;strong&gt;Unitune: Text-driven image editing by fine-tuning a diffusion model on a single image&lt;/strong&gt; &lt;d-cite key=&quot;valevski2023unitune&quot;&gt;&lt;/d-cite&gt;, the authors perform extensive fine-tuning on either the entire diffusion model or specific sections of it. This process is computationally and memory-intensive, setting it apart from alternative methods.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;prompt-to-prompt&quot;&gt;Prompt-to-prompt&lt;/h3&gt; &lt;p&gt;The prompt-to-prompt editing method is a significant advancement compared with the existing image editing techniques that rely on diffusion models. Unlike the methods explained above that involve adding noise, using masks, or fine-tuning, the prompt-to-prompt method stands out because of its simplicity, flexibility, and user-friendliness. In the former methods, users often face challenges such as manually creating masks or undergoing resource-intensive fine-tuning processes, which can be both time-consuming and technically demanding. In contrast, the prompt-to-prompt editing method streamlines the editing process by allowing users to directly specify their desired edits through language prompts. This approach eliminates the need for intricate masking or extensive model training as well as leverages the power of human language to precisely convey editing intentions.&lt;/p&gt; &lt;p&gt;Throughout our research, we will adopt the prompt-to-prompt editing method as our starting point, with the aim of enhancing its performance.&lt;/p&gt; &lt;h2 id=&quot;outline-of-our-research&quot;&gt;Outline of our research&lt;/h2&gt; &lt;p&gt;To perform our research, we plan to build upon the code which complemented the paper published by (Hertz et al, 2022 &lt;d-cite key=&quot;hertz2022prompttoprompt&quot;&gt;&lt;/d-cite&gt;, &lt;a href=&quot;https://github.com/google/prompt-to-prompt/&quot;&gt;Link to code&lt;/a&gt;). Concretely, we will rely on a stable diffusion model from hugging face which we will access via Python. No model training is required as we will solely work with attention layers that capture spatial information about the images.&lt;/p&gt; &lt;p&gt;Our study will be divided into 3 main subsections:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2001%20-%20outline-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2001%20-%20outline-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2001%20-%20outline-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2001%20-%20outline.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;a-hyperparameter-study-of-prompt-to-prompt-editing-method-word-swap&quot;&gt;A. Hyperparameter Study of prompt-to-prompt editing method “word swap”&lt;/h2&gt; &lt;p&gt;In the forthcoming subsection, we delve into a comprehensive analysis of the hyperparameters pertaining to the “word swap” method within the prompt-to-prompt editing framework. Before delving into the specifics, it’s crucial to understand the significance of these hyperparameters and their default values, as originally outlined in the seminal work by Hertz et al&lt;d-cite key=&quot;hertz2022prompttoprompt&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2002%20-%20outline%20section%20A-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2002%20-%20outline%20section%20A-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2002%20-%20outline%20section%20A-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2002%20-%20outline%20section%20A.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2003%20-%20Local%20editing-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2003%20-%20Local%20editing-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2003%20-%20Local%20editing-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2003%20-%20Local%20editing.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2004%20-%20Cross%20replace%20steps%20explanation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2004%20-%20Cross%20replace%20steps%20explanation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2004%20-%20Cross%20replace%20steps%20explanation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2004%20-%20Cross%20replace%20steps%20explanation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2005%20-%20Sel-attention%20explanation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2005%20-%20Sel-attention%20explanation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2005%20-%20Sel-attention%20explanation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2005%20-%20Sel-attention%20explanation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We will systematically explore various hypotheses regarding each hyperparameter and present our empirical findings, shedding light on their individual impacts on the editing process. This examination aims to provide valuable insights into optimizing the performance of the “word swap” method and enhancing its practical utility.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2006%20-%20Hypothesis%20and%20findings-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2006%20-%20Hypothesis%20and%20findings-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2006%20-%20Hypothesis%20and%20findings-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2006%20-%20Hypothesis%20and%20findings.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;a1-exploration-of-silhouette-threshold-hyperparameter-k&quot;&gt;A1. Exploration of silhouette threshold hyperparameter (“k”)&lt;/h2&gt; &lt;p&gt;In this section, we embark on an exploration of the silhouette threshold hyperparameter (“k”). We aim to unravel the influence of varying this parameter while using the prompt ‘&lt;em&gt;“A woman’s face with blond hair”&lt;/em&gt;’ and making alterations to different hair colors (brown, red, black). The GIF below showcases the representation of these experiments.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/change_threshold_womens_face2.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/change_threshold_womens_face2.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/change_threshold_womens_face2.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/change_threshold_womens_face2.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2007%20-%20Results%20silhouette%20parameter%20k%20-%20faces-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2007%20-%20Results%20silhouette%20parameter%20k%20-%20faces-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2007%20-%20Results%20silhouette%20parameter%20k%20-%20faces-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2007%20-%20Results%20silhouette%20parameter%20k%20-%20faces.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Additionally, we present a comparative analysis of the impact of this hyperparameter on editing tasks related to landscapes. For instance, we employ the prompt ‘&lt;em&gt;“A river between mountains”&lt;/em&gt;’ and manipulate the landscape, including options like streets, forests, and deserts. The results of this landscape-oriented analysis can be seen in the figure below.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/attention_replace_rivers.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/attention_replace_rivers.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/attention_replace_rivers.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/attention_replace_rivers.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2008%20-%20Results%20silhouette%20parameter%20k%20-%20landscape-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2008%20-%20Results%20silhouette%20parameter%20k%20-%20landscape-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2008%20-%20Results%20silhouette%20parameter%20k%20-%20landscape-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2008%20-%20Results%20silhouette%20parameter%20k%20-%20landscape.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;a2-exploration-of-cross-attention-injection-hyperparameter-cross-replace-steps&quot;&gt;A2. Exploration of cross-attention injection hyperparameter (“cross replace steps”)&lt;/h2&gt; &lt;p&gt;Below we showcase the effect of the silhouette threshold hyperparameter (“k”) and the cross-attention injection hyperparameter(“cross_replace_steps”). We manipulate the “k” value, setting it to 3 different levels: 0, 0.3 (default literature value), and 0.6. The experiment was performed for both women’s faces and landscapes, providing a comprehensive understanding of how these hyperparameters affect the editing process. The following GIFs showcase the results of our exploration.&lt;/p&gt; &lt;h3 id=&quot;with-k--0&quot;&gt;With k = 0:&lt;/h3&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_women.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_women.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_women.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_women.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_river.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_river.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_river.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_river.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;with-k--03&quot;&gt;With k = 0.3:&lt;/h3&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_women.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_women.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_women.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_women.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_river.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_river.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_river.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_river.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;with-k--06&quot;&gt;With k = 0.6:&lt;/h3&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_women.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_women.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_women.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_women.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_river.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_river.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_river.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_river.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Below, we present the key insights found for the prompt &lt;em&gt;“A woman’s face with blond hair”&lt;/em&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2009%20-%20Results%20cross%20replace%20steps%20-%20faces-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2009%20-%20Results%20cross%20replace%20steps%20-%20faces-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2009%20-%20Results%20cross%20replace%20steps%20-%20faces-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2009%20-%20Results%20cross%20replace%20steps%20-%20faces.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Below, we present the key insights found for the prompt &lt;em&gt;“A river between mountains”&lt;/em&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2010%20-%20Results%20cross%20replace%20steps%20-%20landscape-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2010%20-%20Results%20cross%20replace%20steps%20-%20landscape-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2010%20-%20Results%20cross%20replace%20steps%20-%20landscape-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2010%20-%20Results%20cross%20replace%20steps%20-%20landscape.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;a3-exploration-of-self-attention-hyperparameter-self-replace-steps&quot;&gt;A3. Exploration of self-attention hyperparameter (“self replace steps”)&lt;/h2&gt; &lt;p&gt;In our investigation of the self-attention hyperparameter known as “self_replace_steps,” we conducted a series of experiments with careful consideration of the interplay between this parameter and two other critical factors: “k” (the silhouette threshold) and “cross_replace_steps” (the cross-attention injection parameter). To comprehensively assess the influence of “self_replace_steps,” we designed two distinct experimental scenarios.&lt;/p&gt; &lt;p&gt;In the first scenario, we set “k” and “cross_replace_steps” to their default values in the literature review (0.3 and 0.8 respectively), creating an environment conducive to exploring the effects of self-attention within these threshold parameters. Concurrently, in the second scenario, we opted for more extreme settings by keeping “k” at 0 (no silhouette threshold) and “cross_replace_steps” at 0.2, thereby intensifying the impact of the self-attention hyperparameter.&lt;/p&gt; &lt;h3 id=&quot;with-k--03-and-cross_replace_steps--08&quot;&gt;With k = 0.3 and cross_replace_steps = 0.8:&lt;/h3&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_women.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_women.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_women.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_women.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_river.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_river.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_river.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_river.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;with-k--0-and-cross_replace_steps--02&quot;&gt;With k = 0 and cross_replace_steps = 0.2:&lt;/h3&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_women.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_women.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_women.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_women.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_river.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_river.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_river.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_river.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Below, we present the key insights for the hyperparameter “self_replace_steps” within the context of the prompt &lt;em&gt;“A woman’s face with blond hair”&lt;/em&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2011%20-%20Results%20self%20replace%20steps%20-%20faces-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2011%20-%20Results%20self%20replace%20steps%20-%20faces-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2011%20-%20Results%20self%20replace%20steps%20-%20faces-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2011%20-%20Results%20self%20replace%20steps%20-%20faces.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Below, we present the key insights for the hyperparameter “self_replace_steps” found for the prompt &lt;em&gt;“A river between mountains”&lt;/em&gt;.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2012%20-%20Results%20self%20replace%20steps%20-%20landscape-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2012%20-%20Results%20self%20replace%20steps%20-%20landscape-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2012%20-%20Results%20self%20replace%20steps%20-%20landscape-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2012%20-%20Results%20self%20replace%20steps%20-%20landscape.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;a4-cycle-consistency-of-method&quot;&gt;A4. Cycle Consistency of method&lt;/h2&gt; &lt;p&gt;Our primary goal is to delve into the notion of “Cycle Consistency” within our methodology. This concept revolves around the seamless reversal of text prompt modifications back to their original form, ensuring that the resulting image closely mirrors the initial prompt. This bidirectional editing process serves as the central focus of our research, and in the subsequent sections, we present our findings on this crucial aspect.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2013%20-%20Cycle%20consistency-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2013%20-%20Cycle%20consistency-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2013%20-%20Cycle%20consistency-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2013%20-%20Cycle%20consistency.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2014%20-%20Cycle%20consistency%20-%20hyperparameter%20impact-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2014%20-%20Cycle%20consistency%20-%20hyperparameter%20impact-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2014%20-%20Cycle%20consistency%20-%20hyperparameter%20impact-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2014%20-%20Cycle%20consistency%20-%20hyperparameter%20impact.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;b-generalization-of-optimized-hyperparameters-to-attention-re-weight-method&quot;&gt;B. Generalization of optimized hyperparameters to “attention re-weight method”&lt;/h2&gt; &lt;p&gt;After identifying the optimal parameters, we conducted a comparative analysis to assess their generalizability across other methods, including attention re-weighting. In the visual presentation, we used GIFs to showcase image generation under two different parameter configurations for the prompt &lt;em&gt;“A woman’s face with long wavy blond hair”&lt;/em&gt;.&lt;/p&gt; &lt;p&gt;On the left side, images were generated using default values (k=0.3; cross_replace_steps = 0.8; self_replace_steps = 0.2) while varying the assigned weights. Notably, negative weights led to instability and less desirable outcomes, as evidenced by the results on the left.&lt;/p&gt; &lt;p&gt;On the right side, we employed our optimized hyperparameter values (k = 0; cross_replace_steps = 0.2; self_replace_steps = 0.8). These images demonstrated improved stability while consistently producing the desired output. This visual comparison highlights the effectiveness of our optimized parameters and their superior performance, particularly when dealing with attention re-weighting method.&lt;/p&gt; &lt;div style=&quot;display: flex;&quot;&gt; &lt;div style=&quot;flex: 1; padding: 10px;&quot;&gt; Literature suggested parameters &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly.gif&quot; class=&quot;img-fluid&quot; width=&quot;200&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div style=&quot;flex: 1; padding: 10px;&quot;&gt; Newly optimized parameters &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly_improved_self_replace.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly_improved_self_replace.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly_improved_self_replace.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly_improved_self_replace.gif&quot; class=&quot;img-fluid&quot; width=&quot;50&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;h2 id=&quot;our-proposed-method&quot;&gt;Our Proposed Method&lt;/h2&gt; &lt;p&gt;As our research has demonstrated, the current prompt-to-prompt method, as reported in the literature &lt;d-cite key=&quot;hertz2022prompttoprompt&quot;&gt;&lt;/d-cite&gt;, exhibits significant limitations. Specifically, with the current settings for the silhouette, cross-attention injection, and self-attention injection parameters, the method fails to perform the prompted edits with precision. A comparative analysis of the generated target images against the geometry of the reference images reveals undesired deviations. The existing method over-constrains the geometry due to excessively high k values and cross-attention injection values. Additionally, it underutilizes self-attention injection. Furthermore, the current method lacks cycle consistency. To address these shortcomings, we propose a new framework: the &lt;em&gt;“CL P2P”&lt;/em&gt; prompt-to-prompt image editing framework. This framework offers several key improvements over the existing method:&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Optimization of Critical Hyperparameters&lt;/strong&gt;: Our research indicates that optimizing the values of critical hyperparameters results in higher prompt-to-prompt image editing precision and a more accurate similarity between the reference and target images for desired features. We propose the following adjusted values, particularly for editing faces and hairstyles:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Local editing (silhouette parameter k): 0.0&lt;/li&gt; &lt;li&gt;Cross-attention injection (cross replace steps): 0.2&lt;/li&gt; &lt;li&gt;Self-attention injections (self-replace steps): 0.8&lt;/li&gt; &lt;/ul&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2016%20-%20Current%20vs%20new%20method%20comparision%20of%20output-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2016%20-%20Current%20vs%20new%20method%20comparision%20of%20output-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2016%20-%20Current%20vs%20new%20method%20comparision%20of%20output-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2016%20-%20Current%20vs%20new%20method%20comparision%20of%20output.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;By selecting these values, the following changes are introduced to the prompt-to-prompt editing method:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;span style=&quot;color:red&quot;&gt;Remove&lt;/span&gt;: Local editing can be removed from the method, as it did not lead to significant improvements compared to the precision achieved by the elongated injection of self-attention.&lt;/li&gt; &lt;li&gt;&lt;span style=&quot;color:orange&quot;&gt;Reduce&lt;/span&gt;: The cross-attention (query-key-value attention) injection should be reduced to allow greater geometric adaptability and better convergence between the reference and target images.&lt;/li&gt; &lt;li&gt;&lt;span style=&quot;color:green&quot;&gt;Increase&lt;/span&gt;: Self-attention injection should be substantially elongated from 20% to 80% of the diffusion steps. This is crucial, especially for editing hairstyles, as it allows for the greatest geometric adaptability and ensures the convergence between desired reference and target image features.&lt;/li&gt; &lt;/ul&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2015%20-%20Current%20vs%20new%20method-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2015%20-%20Current%20vs%20new%20method-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2015%20-%20Current%20vs%20new%20method-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2015%20-%20Current%20vs%20new%20method.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;strong&gt;Addressing Cycle-Inconsistency&lt;/strong&gt;: To remedy the cycle-inconsistency, we propose balancing the asymmetry of the current method with regards to the V values of the underlying transformer model. The current method is cycle-inconsistent, even though the same embeddings are used for both the reference and target prompts. Traditionally, the method has only employed the V values of the reference prompt, neglecting those of the target prompt. This characteristic likely introduces asymmetry, breaking the cycle-consistency of the model. We propose an additional injection mechanism for the “CL P2P” framework, a V value injection method, allowing for the consideration of both the V values of the reference and target images. To control the number of injection steps, we introduce an additional hyperparameter, “V value injection steps”. The V value injection function is defined based on the logic highlighted in the footnote of the image.&lt;/p&gt; &lt;h2 id=&quot;future-work&quot;&gt;Future work&lt;/h2&gt; &lt;p&gt;The development of the “CL P2P” framework is a significant advancement in prompt-to-prompt image editing methods. However, there are still areas where further research will be needed. A critical area of exploration lies in the enhancement of cycle-consistency within the prompt-to-prompt editing process. Further research is required to ascertain and refine the optimal values for the V value injection steps, a key component in achieving cycle-consistency.&lt;/p&gt; &lt;p&gt;Additionally, the existing frameworks predominantly focus on singular reference and target prompts. While this approach has opened new pathways in human-computer interaction, several research questions remain unexplored. A notable inquiry is the potential to integrate various prompt-to-prompt editing methods, such as “word swap”, “attention re-weighting,” and “prompt refinement.” This integration aims to facilitate a dynamic, conversational interaction between users and generated images, enabling a continuous and iterative editing process. Current state-of-the-art generative image models, such as mid-journey models, do not inherently support such iterative mechanisms. The realization of this functionality necessitates extensive research and development, offering an exciting challenge for future advancements in the field.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Image generation models, inherently stochastic in nature, exhibit variability in outcomes even when similar prompts are applied. This stochasticity can result in significant deviations in the generated images. For instance, prompts like “A woman’s face with blond hair” and “A woman’s face with red hair” may yield images with markedly different facial features, demonstrating the algorithm’s underlying randomness.&lt;/p&gt; &lt;p&gt;In response to this challenge, prompt-to-prompt image generation and editing techniques have emerged as a significant area of interest in recent years. These methods, while constituting a potent tool in the arsenal of image editing alongside fine-tuning, semantic mixing, and masking approaches, are not without limitations. Specifically, the precision of edits and the geometric alignment between reference and target images often fall short of expectations.&lt;/p&gt; &lt;p&gt;Our research delves into the influence of critical hyperparameters on the outcomes of a cross-attention-based prompt-to-prompt method. We aimed to dissect the impact of each hyperparameter on image editing and geometric adaptation between the reference and target images. Our findings make substantive contributions to enhancing the precision and geometric convergence in prompt-to-prompt methods, with the following key insights:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;An extensive analysis of three critical hyperparameters (silhouette selection, cross-attention injection, and self-attention injection) was conducted, focusing on their effect on the precision of an attention-based prompt-to-prompt editing method.&lt;/li&gt; &lt;li&gt;Contrary to existing literature&lt;d-cite key=&quot;hertz2022prompttoprompt&quot;&gt;&lt;/d-cite&gt;, our study reveals that self-attention injection plays a more pivotal role than previously recognized. We recommend incorporating self-attention injection from the reference image for approximately 80% of the diffusion steps during the target image generation process.&lt;/li&gt; &lt;li&gt;We introduce the novel &lt;em&gt;“CL P2P”&lt;/em&gt; framework, designed to elevate the efficacy of prompt-to-prompt editing.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Our research not only deepens the understanding of prompt-to-prompt editing methods but also achieves enhanced editing precision and improved similarity between reference and target images.&lt;/p&gt; &lt;p&gt;Looking ahead, the &lt;em&gt;“CL P2P”&lt;/em&gt; framework paves the way for further exploration, particularly in addressing the cycle consistency of prompt-to-prompt methods. Additionally, exploring strategies to seamlessly integrate different prompts into a continuous dialogue could revolutionize human-computer interaction, enabling users to edit generated images through conversational engagement.&lt;/p&gt; </content> </entry> <entry> <title>Understanding Bias in Speech to Text Language Models</title> <link href="https://deep-learning-mit.github.io/blog/2023/Language-Bias/"/> <updated>2023-11-07T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/Language-Bias</id> <content type="html">&lt;h2 id=&quot;motivation&quot;&gt;Motivation&lt;/h2&gt; &lt;p&gt;With all the buzz that ChatGPT is getting recently, it is clear that machine learning models that can interact with humans in a natural manner can quite literally flip the world around. If that is not enough proof, Siri and Google Assistant, their popularity and convenience can give you a bit more of an idea. We can see how speech processing is important as a way for humans and computers to communicate with each other, and reach great levels of interactivity if done right. A lot of the world’s languages do not have written forms, and even those that do, typing can be less expressive and slower than speaking.&lt;/p&gt; &lt;p&gt;The core of these assistant systems is automatic speech recognition, often shortened as ASR or alternatively speech2text, which we will be using. This problem sounds rather simple: turn voice into text. However easy it might sound, speech2text is far from solved. There are so many factors that affect speech that makes it extremely difficult. First, how do we know when someone is speaking? Most speech2text models are trained on and perform well when the audio is clean, which means there is not a lot of noise. In the real world, however, one can be using speech2text in a concert or a cocktail party, and figuring out who is currently speaking to the system amid all the noise is a problem in itself! Another important factor that complicates speech2text is that we don’t all talk the same way. Pronunciations vary by person and region, and intonation and expressiveness change the acoustics of our speech. We can see this in full effect when auto-generated YouTube caption looks a bit.. wrong.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/reddit-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/reddit-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/reddit-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/reddit.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;From https://www.reddit.com/r/funny/comments/ha7dva/youtube_auto_captions_spot_on/&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Aside from who and how we talk, another big part that makes speech2text hard has to do with the idiosyncrasies of text and languages itself! Some idiosyncrasies of language include orthography, the system of how we write sounds and words, and syntax, the system of how words string together into sentences. If you are familiar with English, you would be familiar with the English syntax: subject, verb, object, and a particular order for adjectives. We would instinctively say “small white car,” but not “white small car” and most definitely not “car white small.” Cross over the English channel to France (or the St. Lawrence River to Quebec), and the order changes. For French, you would say “petite voiture blanche,” which word for word is “small car white.”&lt;/p&gt; &lt;p&gt;Travel a bit further and you would see that Chinese uses “白色小车” (”white color small car”), Thai uses “รถสีขาวคันเล็ก” (”car color white * small”) and Kannada uses “ಸಣ್ಣ ಬಿಳಿ ಕಾರು” (”small white car”, same as English). Aside from order of adjectives, larger differences in syntax include having the subject appear first or last in a sentence, position of verbs, and how relative clauses work. All this means that language is quite non-linear, and natural language models that understand language must cope with our silly little arbitrary orders!&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/twitter_meme-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/twitter_meme-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/twitter_meme-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/twitter_meme.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;From https://www.bbc.com/news/blogs-trending-37285796&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Thankfully though, for speech2text how sentences work is not as important as how phonetics and orthography works. But even then, things are not quite smooth sailing either. We sometimes take for granted how difficult reading is, perhaps until you start to learn a second language and realize how much we internalize. English is notorious for not spelling words the way it sounds, mostly because writing was standardized a long time ago and pronunciation has shifted since. This makes it difficult for machine learning models to try learn.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/ought-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/ought-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/ought-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/ought.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;Sentence from https://en.wikipedia.org/wiki/Ough_(orthography)&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Wow, look at all those words with “ough”! There are at least eight different pronunciations of the word, or from another point of perspective, at least eight different audios magically turn out to be spelt the same! In the diagram we tried substituting the red “ough”s to their rhymes in blue, keeping in mind that some dialects pronounce these words differently (especially for “borough”), and in green is the International Phonetic Alphabet representation of the sounds. IPA tries to be the standard of strictly representing sounds as symbols. What’s at play here? English is plagued with silent letters (”knight”), and extraneous letters (all the “ough”s and more).&lt;/p&gt; &lt;p&gt;Some languages are more straightforward in their orthography than others. Spanish tends to be fairly phonemic, which pretty much means that their writing and speaking are quite in sync. &lt;d-cite key=&quot;orthography&quot;&gt;&lt;/d-cite&gt; French, however, is very famous for its silent letters. A word like “hors-d’oeuvres”, which means appetizer, can be represented in IPA as [ɔʁ dœvʁ], you may see that around half the letters aren’t pronounced! Kannada, a language in South India that is spoken by one of our group members, is said to be quite phonemic, but doesn’t come without a different kind of headache. A number of languages, predominantly in South Asia and Southeast Asia, use a kind of writing system that combines a consonant character with a vowel character to form a new character that represents the consonant-vowel combination. The new character retains some part of the original consonant and vowel in some cute manner, kind of like the letter &lt;strong&gt;Æ&lt;/strong&gt; but dialed up many notches. Most abugida systems descend from the 3rd century BCE Brahmi script.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/brahmi-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/brahmi-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/brahmi-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/brahmi.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Above are some examples of scripts of this type, demonstrating two consonants k and m combining with vowels long a, i and u. Another interesting feature for some of these writing systems is that sometimes the vowels jump to the front, for example in Thai ก (k) + เ (e) = เก (ke). Again, writing is non-linear at times!&lt;/p&gt; &lt;h3 id=&quot;past-work&quot;&gt;Past Work&lt;/h3&gt; &lt;p&gt;Past work shows success in training speech2text models in German, Spanish, and French &lt;d-cite key=&quot;parp&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;german&quot;&gt;&lt;/d-cite&gt;. Some use pruning and finetuning of state of the art English models, and others train models from scratch for each language. Other works such as &lt;d-cite key=&quot;thaispeech&quot;&gt;&lt;/d-cite&gt; show that models can learn less common languages, like Thai which is the language our other group member speaks, as well, but they are more complex and specific to each language. &lt;d-cite key=&quot;parp&quot;&gt;&lt;/d-cite&gt; circumvents this by pruning wav2seq (a SOTA speech2text model) and finetuning the model for different languages. While this showed promising results, we wanted to dive deeper to understand, from a linguistic and data driven perspective, the biases that &lt;em&gt;simple&lt;/em&gt; speech2text models had.&lt;/p&gt; &lt;p&gt;Many state of the art models rely on encoder-decoder models. An encoder is used to create an expressive feature representation of the audio input data and a decoder maps these features to text tokens. Many speech models like &lt;d-cite key=&quot;data2vec&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;wav2letter&quot;&gt;&lt;/d-cite&gt;, &lt;d-cite key=&quot;contextNet&quot;&gt;&lt;/d-cite&gt; also use self-supervised pretraining on the encoder for better performance. One example is the Wav2Seq. Wav2Seq uses unsupervised pretraining to annotate audio samples with unique characters in the form of a psuedo language. The building blocks for these encoders are generally transformer based &lt;d-cite key=&quot;wav2seq&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;Other methods use deep recurrent neural networks like in &lt;d-cite key=&quot;cs_toronto&quot;&gt;&lt;/d-cite&gt;. RNNs are great at sequential tasks and have an internal memory to capture long term dependencies. Transformer based methods have outperformed RNNs and LSTM based architectures now though.&lt;/p&gt; &lt;p&gt;How do these features (idiosyncrasies) differ between languages and does this affect how well speech2text models learn? By doing more ablation studies on specific features, maybe this can inform the way we prune, or choose architecture, and can also help determine the &lt;em&gt;simplest&lt;/em&gt; features necessary in a speech2text model that can still perform well on various languages.&lt;/p&gt; &lt;p&gt;There has been work that perform ablation studies on BERT to provide insight on what different layers of the model is learning &lt;d-cite key=&quot;ganesh2019&quot;&gt;&lt;/d-cite&gt;. Experiments suggest lower layers learn phrase-level information, middle layers learn syntactic information, and upper layers learn more semantic features. We want to do a similar study, but on dissecting the components of language rather than the components of a particular SOTA model. Our hypothesis is that by doing so, we can be better informed when selecting preprocessing methods and models.&lt;/p&gt; &lt;p&gt;Let’s get started with some experiments!&lt;/p&gt; &lt;h2 id=&quot;generating-a-dataset&quot;&gt;Generating a Dataset&lt;/h2&gt; &lt;p&gt;We want to explore how each of these language features affects how speech2text models learn. Let’s create a custom dataset where we can implement each of these language rules in isolation. To do that, we’ll build out our own language. Sounds daunting — but there are only a key few building blocks that matter to us. Languages are made of sentences, sentences are made of words, words are made of letters, and letters are either consonants or vowels. Let’s start with that.&lt;/p&gt; &lt;p&gt;From &lt;d-cite key=&quot;prehistoric_speech&quot;&gt;&lt;/d-cite&gt;, languages have 22 consonants on average and about 9 vowels on average so that’s what we’ll have in our language too. We represent consonants as positive integers from 1 to 23 and vowels as negative integers from -9 to -1. After all, letters are just symbols!&lt;/p&gt; &lt;p&gt;A word, at it’s most crude representation, is just a string of these consonants and vowels at some random length. To make sentences, we just string these words together with spaces, represented by 0, together.&lt;/p&gt; &lt;p&gt;Here’s a sample sentence in our language:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;[14 -2 -9 13 0 8 16 -8 -2 0 -3 -8 16 12 0 10 20 -3 -7 0 14 18 -9 -4 0 16 -3 -5 14 0 -3 9 -8 3 0 -9 -1 22 7 0 12 -5 6 -7 0 -7 22 12 -2 0 22 -9 2 -2 0 17 -2 -8 9 0 1 -4 18 -9 0 19 -7 20 -2 0 8 18 -4 -2 0 -9 8 -4 15 0 -9 -2 22 18] &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Ok, that seems a little meaningless. We don’t have to worry about meaning in the general semantic sense though. What we do care about, is pronouncing this language, and creating a mapping from these written sentences to an audio sample. Let’s do that next. Audio samples can be represented as spectrograms. Spectrograms give us a visual representation of audio by plotting the frequencies that make up an audio sample.&lt;/p&gt; &lt;p&gt;Here’s an example:&lt;/p&gt; &lt;p&gt;When we say &lt;strong&gt;“It’s never too early to play Christmas music”&lt;/strong&gt;, this is what it might look like visually:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/christmas_spectrogram.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The key here is that we don’t exactly need audio samples, but rather an embedding that &lt;strong&gt;&lt;em&gt;represents&lt;/em&gt;&lt;/strong&gt; an audio sample for a written sentence. Embeddings are just low dimensional mappings that represent high dimensional data.&lt;/p&gt; &lt;p&gt;So, in our case, our spectrogram for a generated audio sample looks something like:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/gen_spectrogram-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/gen_spectrogram-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/gen_spectrogram-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/gen_spectrogram.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Even though audio samples might be complicated waveforms, the embedding for the first letter looks something like:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;tensor([[ 3.6887e-01, -9.6675e-01, 3.2892e-01, -1.2369e+00, 1.4908e+00, 8.1835e-01, -1.1171e+00, -1.9989e-01, 3.5697e-01, -1.2377e+00, 4.6225e-01, -6.7818e-01, -8.2602e-01]]) &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Again, maybe meaningless to us who haven’t really learned this new language. There are some vertical columns of the same color, and these represent the silences between each word. You might notice that these columns aren’t exactly the same color, and that’s because we’ve added a bit of Gaussian noise to the audio embedding samples to simulate noise that might occur when recording audio samples on a microphone.&lt;/p&gt; &lt;p&gt;Ok great! We’ve got this perfect language that maps the same sentence to the same audio sample. Now, let’s get to work adding some features that we talked about in the previous section to make this language a bit more complicated.&lt;/p&gt; &lt;p&gt;We narrow our feature selection to the following three:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;strong&gt;Silent Letters:&lt;/strong&gt; letters in the written language that don’t appear in the phonetic pronunciation&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Letter Combos:&lt;/strong&gt; two letters combine in the script but are still pronounced separately&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Letters out of Order:&lt;/strong&gt; phonetic pronunciation is in a different order than written language&lt;/li&gt; &lt;/ol&gt; &lt;h3 id=&quot;silent-letters&quot;&gt;Silent Letters&lt;/h3&gt; &lt;p&gt;Silent letters mean they appear in our written labels but not in our audio samples. We could just remove letters from our audio embeddings, but that’s a little funky. We don’t usually pause when we come to a silent letter — saying (pause - nite) instead of just (nite) for night. To preserve this, let’s instead add letters to our written label.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/silent_letters-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/silent_letters-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/silent_letters-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/silent_letters.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In the diagram above, we have a small written sample and some audio embeddings represented as colored blocks. We generate some rules similar to those on the left.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/silent_letters.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/silent_letters.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/silent_letters.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/silent_letters.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In this case, we add a 7 after the 3, simulating a silent letter at consonant 7. We then pad the audio sample with a silent (0) to make up for the size increase of the written label. Note that silent letters don’t add pauses during the audio.&lt;/p&gt; &lt;h3 id=&quot;combining-letters&quot;&gt;Combining Letters&lt;/h3&gt; &lt;p&gt;When combining letters, our written script changes, but our audio remains the same. We choose to combine every pair where a vowel follows a consonant. This is the most common case of letter combination in languages that have this feature.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/combo_letters.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/combo_letters.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/combo_letters.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/combo_letters.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Here we have to pad the written labels as we combine two letters into one.&lt;/p&gt; &lt;h3 id=&quot;letters-out-of-order&quot;&gt;Letters out of Order&lt;/h3&gt; &lt;p&gt;We choose some pairs of consonant and vowels. Swap the pair order for every instance of the pair in the written sample. No padding needs to be added here.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/swap.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/swap.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/swap.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/swap.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;controlled-experiments&quot;&gt;Controlled Experiments&lt;/h2&gt; &lt;p&gt;Now for the fun part! Let’s see what happens when we test our new language, which each of these rules in isolation, with some models. Regardless of the model we choose, our goal is to learn a written label for a given audio sample.&lt;/p&gt; &lt;p&gt;We’re going to test our language with the building blocks of these state of art models — transformers and RNN. The results from these experiments can inform us on the biases that these fundamental models might have in their most “vanilla” state.&lt;/p&gt; &lt;p&gt;We hypothesize that transformers will perform better because RNN’s have a limited memory size, while Transformers use attention which means they can learn orderings from anywhere in the audio sample.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/system-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/system-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/system-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/system.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/results1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/results1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/results1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/results1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/results2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/results2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/results2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/results2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/results3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/results3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/results3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/results3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt;&lt;figcaption class=&quot;caption&quot;&gt;RNNs are dashed lines, Transformers are solid lines&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Hmm..so Transformers performed better, but not that much better than our RNNs. This could be because our hypothesis that attention is better for long sequences and RNNs have limited memory may not apply. When we generated our language, the consonant and vowel orderings were pretty random. Our rules have some pattern to them, but not as much as a real human language — so maybe attention can exploit these better in real human language, but doesn’t give as much of an advantage in our generated dataset.&lt;/p&gt; &lt;p&gt;As for our features, it seems that silent letters perform significantly worse than some of the other rules. This makes sense because, attention and internal memory perhaps, provides some mechanism for dealing with swapping or out of order. Transformers have the ability to “focus” on features of the sample that it is deemed important. Our rules do have some pattern, and the models just have to learn these patterns.&lt;/p&gt; &lt;p&gt;With silent letters, though there is a pattern to an audio sample not being present, the rest of the sounds succeeding the silent letters are all shifted over. This is probably why letter combos also doesn’t do too great. With letter combos and silent letters, the one-to-one mapping between a letter and it’s phonetic pronunciation (or audio embedding) is thrown off for the rest of the sequence.&lt;/p&gt; &lt;h2 id=&quot;corners-cut&quot;&gt;Corners Cut&lt;/h2&gt; &lt;p&gt;This certainly tells us a lot! But, we should take these results with a grain of salt. There are some discrepancies with human language and the way that we generated our dataset that we should consider.&lt;/p&gt; &lt;ul&gt; &lt;li&gt; &lt;p&gt;Actual audio speech recognition systems mostly don’t predict letter by letter, some do subwords and others do word level recognition; but in the grand scheme of things these distinctions may be negligible — after all, they’re all units! This means our controlled experiment, for our purposes, simulates character recognition models which may misspell words (”helouw” instead of “hello”). If the model is at the subword level, misspellings may decrease, since character sequences like “ouw” would not be in the list of possible subwords, or the vocabulary. “ouw” is a very un-English sequence, see if you can find a word that contains these three letters in succession! Misspellings like “hellow” might still happen though, since it is a plausible combination of English-like sequences “hel” and “low”. If the model is at the word level, there will not be misspellings at all.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;speech2text models generally either do encoder-decoder model, or otherwise typically the input and output do not have to match in dimension. Both options mean that there is no need to pad written or audio samples to make sure they’re the same length. In our case, we have to pad our written/audio to make sure everything is the same size. Connectionist Temporal Classification &lt;d-cite key=&quot;ctc&quot;&gt;&lt;/d-cite&gt; is used to postprocess outputs and compute loss.&lt;/p&gt; &lt;ul&gt; &lt;li&gt;The way CTC works is that first it assumes that a letter may take more than one audio frame to say, which tends to be the case, especially for vowel sounds which are typically looooooooooonger than consonant sounds. There is also a special character epsilon that serves as the “character boundary” symbol, but is different from the silent symbol. The output of a CTC model is deduplicated, and epsilons are removed. Here is CTC in action from &lt;d-cite key=&quot;ctc&quot;&gt;&lt;/d-cite&gt;:&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/ctc-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/ctc-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/ctc-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/ctc.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;ul&gt; &lt;li&gt; &lt;p&gt;An effect of the letter combination script in our controlled experiment is that there will be some letter combinations that exist as a class (aka in the alphabet) but never seen in the dataset. For example (1, 12) are in the alphabet as consonants, but 112 isn’t a letter.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Actual language has tone, intonation, speed and noise that can make it harder to learn. Here is where something like Wave2Seq can help as tokens are clustered, so if someone takes a little longer to say AA, it will still register as the same pseudo token.&lt;/p&gt; &lt;/li&gt; &lt;/ul&gt; &lt;h2 id=&quot;real-language&quot;&gt;Real Language&lt;/h2&gt; &lt;p&gt;Alas, real-world languages are more complicated than our controlled languages. We wanted to see if the patterns we learnt in our controlled experiments would still hold true for actual datasets. For this, we needed to find a relatively phonemic language and another language that differs only by one feature. As mentioned earlier, Spanish qualifies for the former, and French qualifies for the latter. French, to the best of our knowledge, is prevalent with silent letters, but don’t really exhibit other features in our controlled experiments.&lt;/p&gt; &lt;p&gt;We’re using the CommonVoice dataset, which is a crowdsourced dataset of people reading sentences in many languages, and might be harder to train because of how unclean the dataset as a whole may be. We preprocess the audio using a standard method, which is the following:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;First, calculate the audio spectrogram and condense the result by summing up the amplitudes of a few frequencies that belong in the same “bucket”, to yield Mel-frequency cepstral coefficients (MFCC)&lt;/li&gt; &lt;li&gt;To add some temporal context, the differential of the MFCC and its second-degree differential are calculated and concatenated to the MFCC&lt;/li&gt; &lt;li&gt;The label vocabulary is constructed, by looking at what letters exist in the dataset, and the written data is converted to numbers&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Behold, an example of the preprocessed dataset for Spanish!&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/spanish-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/spanish-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/spanish-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/spanish.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;target tensor: [30, 43, 1, 41, 53, 40, 56, 39, 1, 59, 52, 39, 1, 58, 39, 56, 47, 44, 39, 1, 42, 43, 1, 43, 52, 58, 56, 39, 42, 39, 7] target sequence: Se cobra una tarifa de entrada. &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;We tried training transformers and RNNs, with and without CTC, on this real-world data. Without CTC, the performances of the models are, respectfully, really bad. After a number of epochs, the only thing learnt is that the space character exists, and the 6% accuracy comes from the model predicting only spaces:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;predicted tensor: [16 39 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] predicted sequence: Ea target tensor: [71, 28, 59, 83, 1, 53, 57, 1, 54, 39, 56, 43, 41, 43, 11, 1, 36, 1, 56, 43, 57, 54, 53, 52, 42, 47, 43, 52, 42, 53, 1, 43, 50, 50, 53, 57, 5, 1, 42, 47, 48, 43, 56, 53, 52, 8, 1, 14, 59, 50, 54, 39, 42, 53, 1, 43, 57, 1, 42, 43, 1, 51, 59, 43, 56, 58, 43, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] target sequence: ¿Qué os parece? Y respondiendo ellos, dijeron: Culpado es de muerte. &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Got it. Like our silent letter controlled experiment, a high mismatch between the audio frame and its written frame causes models to not be able to learn well. Let’s put in our mighty CTC Loss and see how it works! It turns out that after some 30 epochs, it still isn’t doing quite so well. Here, let’s see an example of a transformer trained on the Spanish dataset with CTC:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;predicted tensor: [ 0 39 0 57 0 54 39 0 41 0 41 0 43 0 47 0 43 0 57 0 53 0 42 0 58 0 47 0 53 0 41 0 54 0 39 0 43 0 57 0 43 0] predicted sequence: aspacceiesodtiocpaese target tensor: [71 28 59 83 1 53 57 1 54 39 56 43 41 43 11 1 36 1 56 43 57 54 53 52 42 47 43 52 42 53 1 43 50 53 57 5 1 42 47 48 43 56 53 52 8 1 14 59 50 54 39 42 53 1 43 57 1 42 43 1 51 59 43 56 58 43 7] target sequence: ¿Qué os parece? Y respondiendo elos, dijeron: Culpado es de muerte. &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Perhaps the transformer is too big for this and learns pretty slowly. It is starting to pick up on some sounds, for example for “¿Qué os parece?” it seems to have picked up “as pacce” and “respondiendo” has some similarities to “esodtio,” but we really needed to squint to see that similarity. If we let it run for longer, perhaps it would get better… slowly.&lt;/p&gt; &lt;p&gt;RNNs, however, came up on top. We’re using bidirectional LSTM RNN for this, and it seems that CTC works! Here’s the RNN trained on the Spanish dataset with CTC:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;predicted tensor: [30 0 59 0 52 0 53 0 51 0 40 0 56 43 0 57 0 43 0 1 42 0 43 0 42 0 47 0 42 0 39 0 42 0 43 0 1 0 89 0 51 0 40 0 43 0 58 0 39 0 59 0 52 0 53 0 1 42 0 43 0 1 50 0 39 0 1 0 57 0 54 0 39 0 88 0 53 0 52 0 39 0 7] predicted sequence: Sunombrese dedidade ómbetauno de la spañona. target tensor: [30 59 1 52 53 51 40 56 43 1 57 43 1 42 43 56 47 60 39 1 42 43 50 1 52 53 51 40 56 43 1 58 39 86 52 53 1 42 43 1 23 39 1 16 57 54 39 88 53 50 39 7] target sequence: Su nombre se deriva del nombre taíno de La Española. &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Looks great! Of course there are some word boundary mistakes, but overall it looks pretty similar. What about French? Here are transformer and RNN results for what we hypothesized is a language full of silent letter features:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;predicted tensor (Transformer): [21 0] predicted sequence (Transformer): L predicted tensor (RNN): [18 0 47 0 1 0 56 0 56 40 0 54 0 44 0 1 55 0 40 0 55 1 53 0 40 0 36 0 48 40 0 49 55 0 44 0 55 0 53 40 0 36 0 49 0 1 49 50 0 53 0 1 0 1 44 0 47 0 1 40 0 1 51 0 50 0 55 0 36 0 49 0 1 54 0 40 0 47 40 0 48 40 0 49 55 0 1 71 0 1 57 0 36 0 54 0 44 0 54 6] predicted sequence (RNN): Il uuesi tet reamentitrean nor il e potan selement à vasis. target tensor: [18 47 1 36 1 36 56 54 44 1 75 55 75 1 53 75 38 40 48 40 49 55 1 44 49 55 53 50 39 56 44 55 1 40 49 1 14 56 53 50 51 40 1 50 82 1 44 47 1 40 54 55 1 51 50 55 40 49 55 44 40 47 40 48 40 49 55 1 44 49 57 36 54 44 41 6] target sequence: Il a ausi été récement introduit en Europe où il est potentielement invasif. &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Wow! The transformer got stuck in the blank hole black hole, but the RNN looks not too shabby. Some word boundary issues for sure, but we can see similarities. “potan selement” and “potentielement” actually do sound similar, as do “à vasis” and “invasif.” Definitely not as good as Spanish though. Here’s a comparison of losses for the four models:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/real_results-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/real_results-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-07-Language-Bias/real_results-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-07-Language-Bias/real_results.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;One thing that’s very much worth noticing is that the validation losses plateaued or rose during training. Did we overfit our data, or are these languages too hard that they can’t be fully learnt from our data, and the high loss is due to the idiosyncrasies of language? Probably both!&lt;/p&gt; &lt;p&gt;Now did these real-world explorations match our hypotheses from controlled experiments or not? Our hypothesis from controlled experiments says that French would do worse than Spanish, which is what we’re seeing. Additionally, we see a pretty significant gap in loss between transformers and RNN models, given that CTC loss is used.&lt;/p&gt; &lt;p&gt;Here comes the confusing part. Most literature &lt;d-cite key=&quot;transf_thesis&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;rnn_study&quot;&gt;&lt;/d-cite&gt; would say that transformers should perform better than RNN, even with CTC. This matches with our controlled experiments but did not match our real-world experiments. What went wrong? For one, we think that our models might still be too small and not representative of actual real-world models. We also trained the models for quite a short amount of time with a small amount of data that might be noisy. Perhaps our recipe was just the perfect storm to cause our transformer model to be stuck in the blank hole. We found an article that documents the tendency for MLPs to get stuck in a stage of predicting blanks before moving on to predicting real characters, which sounds like what’s going on for us. &lt;d-cite key=&quot;blank_ctc&quot;&gt;&lt;/d-cite&gt; Some other sources point to the assertion that input spectrogram lengths must be longer than label lengths, and suggest refraining from padding labels with blanks. We followed their suggestions but unfortunately could not bring the transformer models out of the blank hole.&lt;/p&gt; &lt;h2 id=&quot;learnings&quot;&gt;Learnings&lt;/h2&gt; &lt;p&gt;What have we looked at?&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Linguistics: we learnt how weird languages can be!&lt;/li&gt; &lt;li&gt;Models: we touched upon how speech2text models usually work&lt;/li&gt; &lt;li&gt;Hindrances: we hypothesized and tested a few features that affected model performance &lt;ul&gt; &lt;li&gt;Silent letters are our biggest enemies, followed by letter combinations and out-of-order letters&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;Battle: we compared how two different foundational models for speech2text against each other &lt;ul&gt; &lt;li&gt;In our controlled experiments, it’s a pretty close call but transformer came up on top by just a slight margin&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;Real: we presented what a real-world dataset looks like, the data preprocessing methods, and checked if our learnings from controlled experiments hold &lt;ul&gt; &lt;li&gt;Creating a spectrogram and a character vocabulary is the standard!&lt;/li&gt; &lt;li&gt;French (silent letter-ish) vs. Spanish (perfect-ish) matches our hypothesis!&lt;/li&gt; &lt;li&gt;CTC is the cherry on top for success but only works well with RNN, putting RNN on top by a long shot this time!&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;We would like to expand our linguistics experiments further as future work, as there are many more features and combinations not explored here (for example, Arabic writing usually drops all vowels — we imagine that this feature would affect performance a lot!) Another avenue of further work is to try train on other real-world languages to see whether our hypotheses still hold true.&lt;/p&gt; </content> </entry> <entry> <title>Regularization Techniques for Attention Layers in Transformer Models</title> <link href="https://deep-learning-mit.github.io/blog/2023/attention-regularization/"/> <updated>2023-11-06T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/attention-regularization</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Transformer models are exceptionally popular and successful at completing many tasks. However, they can overfit to their training data if they are not given enough data to generalize. Frequently, part of the reason for overfitting is the overfitting of a self-attention layer, which highlights important tensors in the model. However, if there is not enough data, the attention layer can overfit to the training data and highlight some tensors too much. Therefore, researchers have proposed methods of regularizing attention layers. Adding regularization can be complex and there have been many different approaches to solving this issue, from simply smoothing attention layers to encouraging multi-headed models to approach different solutions. Therefore, there are differences in the effects of different regularization methods and some might perform better in different circumstances. There does not seem to be a standard approach to dealing with this form of regularization and while many authors have claimed their regularizations have positive effects on training, there are few comparisons of regularization methods. In this study, we will analyze previous work on regularizing self-attention layers and propose new regularization techniques to identify the advantages and disadvantages of differing models.&lt;/p&gt; &lt;h2 id=&quot;background&quot;&gt;Background&lt;/h2&gt; &lt;p&gt;There are many proposed regularization strategies for self-attention layers. We implement and utilize many of the more popular strategies in this study while also drawing inspiration from other methods in proposed methodologies. However, we could not find comparisons across regularization methods or implementations of these methods publicly available. Therefore, we implemented previously proposed strategies and proposed new regularization strategies based on methods seen in fully connected neural networks. The methods used fall into the following three categories. We will explain the exact methods and implementations used for each of these three categories in the methodology section. They represent a solid overview of the self-attention regularization space and contain the most popular methods currently in use.&lt;/p&gt; &lt;h3 id=&quot;dropout-based-methods&quot;&gt;Dropout Based Methods&lt;/h3&gt; &lt;p&gt;Dropout based methods involve randomly setting a specified fraction of the input units to zero during training time, which helps in preventing overfitting &lt;d-cite key=&quot;srivastava2014dropout&quot;&gt;&lt;/d-cite&gt;. This prevents the model from having all the information during training and therefore forces the model to generalize during training.&lt;/p&gt; &lt;h3 id=&quot;weight-smoothing-methods&quot;&gt;Weight Smoothing Methods&lt;/h3&gt; &lt;p&gt;Weight smoothing methods aim to regularize the self-attention layer by modifying the weights such that the attention weight are closer to the uniform distribution and do not overly emphasis specific inputs. This helps prevent overfitting by not allowing the model to only use a few inputs &lt;d-cite key=&quot;lohrenz2023relaxed&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;cross-head-methods&quot;&gt;Cross Head Methods&lt;/h3&gt; &lt;p&gt;Cross head methods involve techniques that operate across different attention heads, aiming to diversify the learned representations and prevent redundancy &lt;d-cite key=&quot;li2018multi&quot;&gt;&lt;/d-cite&gt;. Therefore, the goal is to prevent each head from being similar to other heads.&lt;/p&gt; &lt;h2 id=&quot;methodology&quot;&gt;Methodology&lt;/h2&gt; &lt;h3 id=&quot;overall-architecture&quot;&gt;Overall Architecture&lt;/h3&gt; &lt;p&gt;We begin by implementing and benchmarking a vision transformer with no regularization. We had previously implemented a transformer model as part of 6.s898 problem set 3, so we used this as basis for our models. This model follows an architecture stemming from An Image Is Worth 16X16 Words &lt;d-cite key=&quot;dosovitskiy2020image&quot;&gt;&lt;/d-cite&gt;. This transformer was easily modifiable and relatively simple and so it served as a good basis for our adjustments. The framework of the architecture goes as follows.&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Take an image and split it into patches of specified size.&lt;/li&gt; &lt;li&gt;Embed these patches and add a positional encoding to their embedding.&lt;/li&gt; &lt;li&gt;Treat these embeddings as a sequence input to a transformer model.&lt;/li&gt; &lt;li&gt;Use a transformer model with multi-head self-attention to transform the input into some specified space.&lt;/li&gt; &lt;li&gt;Use this output to classify the image.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;For this specific model, we use a 6 layer transformer with 5 self-attention heads and a patch size of 4. We will be focusing on the multi-head self-attention phase of the transformer model. The following is a diagram of the overall architecture of a vision transformer.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Diagram of Vision Transformer Model &lt;d-cite key=&quot;dosovitskiy2020image&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;h3 id=&quot;data&quot;&gt;Data&lt;/h3&gt; &lt;p&gt;We use the CIFAR-10 and CIFAR-100 datasets for this study &lt;d-cite key=&quot;krizhevsky2009learning&quot;&gt;&lt;/d-cite&gt;. CIFAR-10 consists of 60,000 32x32 color images representing 10 different classes. These classes are airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks. They are evenly distributed, such that there are 6,000 images of each class. CIFAR-100 uses the same format, but instead has 100 evenly distributed classes. We split this data into training and test sets and tested the different forms of regularization. We found that our transformer model with no regularization could easily achieve near-zero error on both sets of training data, but only achieved around 60% in test accuracy for the CIFAR-10 dataset and around 30% accuracy on the CIFAR-100 dataset. Therefore, the model is overfitting to the training data and testing regularization methods on this dataset could help the model generalize more on the test data.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/cifar_10_example-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/cifar_10_example-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/cifar_10_example-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/cifar_10_example.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Example of CIFAR-10 Images &lt;d-cite key=&quot;krizhevsky2009learning&quot;&gt;&lt;/d-cite&gt; &lt;/div&gt; &lt;h3 id=&quot;regularization-methods&quot;&gt;Regularization Methods&lt;/h3&gt; &lt;p&gt;We tested the following regularization methods for our model. We tested models contained within the three different categories of regularization mentioned in the background above, these being dropout based methods, weight smoothing methods, and cross-head methods.&lt;/p&gt; &lt;h4 id=&quot;dropout-based-methods-1&quot;&gt;Dropout Based Methods&lt;/h4&gt; &lt;h5 id=&quot;dropcolumn-&quot;&gt;DropColumn &lt;d-cite key=&quot;zehui2019dropattention&quot;&gt;&lt;/d-cite&gt;:&lt;/h5&gt; &lt;p&gt;This method uses dropout, a common regularization method used in fully connected neural networks, in self-attention layers. This can force the model to generalize better and not rely on specific inputs as much. The authors propose the following methodology to add dropout to the model, which is similar to standard dropout techniques in neural networks. To perform dropout, each column in the attention weight matrix is sampled from a Bernoulli distribution with some probability. We use a dropout ratio of 0.2 for these experiments. We set the sampled columns to zero weight during training. Therefore, we are able to randomly drop columns in the attention weight matrix.&lt;/p&gt; &lt;h5 id=&quot;drophead-&quot;&gt;DropHead &lt;d-cite key=&quot;zhou2020scheduled&quot;&gt;&lt;/d-cite&gt;:&lt;/h5&gt; &lt;p&gt;We can also perform dropout on the heads across the multi-head attention layer. With this method, we completely drop heads during training to reduce reliance on particular heads and increase the generalizability of the model. We use a dropout ratio of 0.2 for these experiments. This prevents the model from being dominated by a few attention heads.&lt;/p&gt; &lt;h4 id=&quot;weight-smoothing-methods-1&quot;&gt;Weight Smoothing Methods&lt;/h4&gt; &lt;h5 id=&quot;relaxed-attention-&quot;&gt;Relaxed Attention &lt;d-cite key=&quot;lohrenz2023relaxed&quot;&gt;&lt;/d-cite&gt;:&lt;/h5&gt; &lt;p&gt;This method smooths the attention weights in the self-attention layer to reduce overfitting. This helps reduce the magnitude of the highest attention scores. We do this by mixing in the uniform distribution to attention weights during training. We use some parameter $ \color{white} \gamma $ to evaluate different levels of mixing. Therefore, we apply the following function to our self-attention weights.&lt;/p&gt; &lt;p&gt;$ \color{white} A[i,j] = (1-\gamma) \times A[i,j] + \gamma \times \frac{1}{T}, \quad \forall i, j \in [0,1,…,T]$&lt;/p&gt; &lt;p&gt;We use $ \color{white} \gamma = 0.1 $ for our experiments. This adds a low level of uniformity but prevents the model from only attending upon a small number of tensors during training. Therefore, this should limit the amount of overfitting that is possible.&lt;/p&gt; &lt;h5 id=&quot;noise-injection&quot;&gt;Noise Injection&lt;/h5&gt; &lt;p&gt;Noise injection has been used to regularize fully connected neural networks, but we have not found any literature that proposes using noise injection to regularize self-attention layers. We propose two methodologies to add regularization and robustness to our model training. We inject noise into our input embeddings with the following formula.&lt;/p&gt; &lt;p&gt;$ \color{white} x_{i,j}^{noised} = x_{i,j}+ \frac{1}{100} * median(x) * N(0,1) $&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Overall Noise Injection: The first methodology involves simply adding noise to the input during training. We do this by adding Guassian random noise to the input before calculating self-attention weights in each layer of the transformer.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Individual Head Noise Injection: Our second proposed methodology takes advantage of the multi-headed transformer design. We add different Gaussian random noise to each head, such that the heads will receive different inputs. Therefore, the model must become more robust to different inputs.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h4 id=&quot;cross-head-methods-1&quot;&gt;Cross-Head Methods&lt;/h4&gt; &lt;h5 id=&quot;decorrelation&quot;&gt;Decorrelation&lt;/h5&gt; &lt;p&gt;We propose adding a decorrelation term to our loss function. The goal of this loss is the reward differences across attention heads. We begin by calculating the self-attention weights for all of the attention heads. We then compute the pairwise dot products of each head’s attention weights. This will increase the loss if there are heads that are highly correlated. This will cause the heads of the network to differ from the other heads in the network and hopefully generalize better. Therefore, we use the following loss term.&lt;/p&gt; &lt;p&gt;$ \color{white} \text{Added Loss} = \sum_{i={0,…,H},j={i+1,…,H}} \frac{\text{sum}((\Lambda_i^T \Lambda_j)^2)}{\text{Number of elements in }\Lambda_i^T \Lambda_j}$, where H is the number of heads and $ \color{white} \Lambda_i$ is the ith attention head weights.&lt;/p&gt; &lt;p&gt;This method is inspired by another method, multi-head attention with disagreement regularization &lt;d-cite key=&quot;li2018multi&quot;&gt;&lt;/d-cite&gt;. However, the disagreement regularization method relies on calculating more differences than just the attention weight matrices, which is out the of scope of these experiments.&lt;/p&gt; &lt;h5 id=&quot;normalization&quot;&gt;Normalization&lt;/h5&gt; &lt;p&gt;We propose adding the 2-norm of all elements in the attention weight matrix to the loss function to limit the emphasis of individual inputs. Therefore, this will smooth the weights and reward more uniform predictions. This should reduce overfitting and make the model more generalizable. We calculate this norm using $ \color{white} \frac{\text{torch.linalg.norm(attention weights)}}{\text{number of elements in attention weights}} $. This computes the 2-norm of all elements across attention heads and adds more loss to weights that emphasize specific inputs more than others. Therefore, this should add smoothing to the weights.&lt;/p&gt; &lt;h3 id=&quot;training&quot;&gt;Training&lt;/h3&gt; &lt;p&gt;We train each model for 25 epochs on the full training set with a batch size of 256. We use the AdamW optimizer, with a learning rate of 0.001. We use the following parameters for our vision transformer.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Parameter&lt;/th&gt; &lt;th&gt;n_channels&lt;/th&gt; &lt;th&gt;nout&lt;/th&gt; &lt;th&gt;img_size&lt;/th&gt; &lt;th&gt;patch_size&lt;/th&gt; &lt;th&gt;dim&lt;/th&gt; &lt;th&gt;attn_dim&lt;/th&gt; &lt;th&gt;mlp_dim&lt;/th&gt; &lt;th&gt;num_heads&lt;/th&gt; &lt;th&gt;num_layers&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;CIFAR-10&lt;/td&gt; &lt;td&gt;3&lt;/td&gt; &lt;td&gt;10&lt;/td&gt; &lt;td&gt;32&lt;/td&gt; &lt;td&gt;4&lt;/td&gt; &lt;td&gt;128&lt;/td&gt; &lt;td&gt;64&lt;/td&gt; &lt;td&gt;128&lt;/td&gt; &lt;td&gt;5&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;CIFAR-100&lt;/td&gt; &lt;td&gt;3&lt;/td&gt; &lt;td&gt;100&lt;/td&gt; &lt;td&gt;32&lt;/td&gt; &lt;td&gt;4&lt;/td&gt; &lt;td&gt;128&lt;/td&gt; &lt;td&gt;64&lt;/td&gt; &lt;td&gt;128&lt;/td&gt; &lt;td&gt;5&lt;/td&gt; &lt;td&gt;6&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;We train each model individually on both datasets.&lt;/p&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;cifar-10&quot;&gt;CIFAR-10&lt;/h3&gt; &lt;p&gt;We begin by analyzing the training results on the CIFAR-10 dataset.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_loss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Training Loss on the CIFAR-10 Dataset &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_accuracy-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_accuracy-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_accuracy-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_accuracy.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Training Accuracy on the CIFAR-10 Dataset &lt;/div&gt; &lt;p&gt;We see that most of the models, except for the dropout based models, achieve near zero error and perfect accuracy on the test set. Therefore, we see that the dropout term is stopping the model from perfectly memorizing the dataset but all other regularization techniques are not forcing the model to change the weights enough to prevent perfect accuracy.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_loss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Test Loss on the CIFAR-10 Dataset &lt;/div&gt; &lt;p&gt;Looking at the test results, the two dropout models have much lower loss achieved on the test dataset. The rest of the models have similar losses on the test dataset.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_accuracy-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_accuracy-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_accuracy-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_accuracy.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Test Accuracy on the CIFAR-10 Dataset &lt;/div&gt; &lt;p&gt;We see that the two dropout methods also have higher accuracy than the model without regularization. However, the decorrelation model has the highest test accuracy. Overall, the test dataset results are significantly lower than state of the art and a more advanced model may be needed to achieve better performance.&lt;/p&gt; &lt;h3 id=&quot;cifar-100&quot;&gt;CIFAR-100&lt;/h3&gt; &lt;p&gt;We move on to training and testing the models on the CIFAR-100 dataset. This dataset has more classes and therefore fewer examples of each class. Therefore, the model finds it more difficult to generalize on the test dataset.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_loss100-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_loss100-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_loss100-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_loss100.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Training Loss on the CIFAR-100 Dataset &lt;/div&gt; &lt;p&gt;Again, we see that all methods except the dropout based methods achieve near-zero error.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_accuracy100-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_accuracy100-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_accuracy100-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/training_accuracy100.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Training Accuracy on the CIFAR-100 Dataset &lt;/div&gt; &lt;p&gt;We see similar results to the CIFAR-10 dataset in training. The two dropout methods are unable to achieve perfect loss and accuracy but all other methods are able to. This includes the methods with added loss, that being the normalization method and the decorrelation method. This will depend on the parameters of the model and these models would have higher loss if we used more emphasis on the added loss.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_loss100-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_loss100-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_loss100-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_loss100.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Test Loss on the CIFAR-100 Dataset &lt;/div&gt; &lt;p&gt;We see that the two dropout methods have significantly lower loss on the test dataset, with all other methods performing similarly.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_accuracy100-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_accuracy100-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_accuracy100-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-06-attention-regularization/test_accuracy100.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Test Accuracy on the CIFAR-100 Dataset &lt;/div&gt; &lt;p&gt;We again see consistent results with the CIFAR-10 dataset. The two dropout methods and decorrelation improve the accuracy on the test set, while the others are about the same as without normalization. In this case, the drophead method performs the best.&lt;/p&gt; &lt;h2 id=&quot;further-research&quot;&gt;Further Research&lt;/h2&gt; &lt;p&gt;Further research is needed to further improve the generalizability of this transformer architecture for these datasets. The model still has overfitting issues, even with high regularization and so more research with different architectures or regularization methods is needed to improve the study. Further comparison of regularization methods on alternative datasets and types of data, such as text, would also be valuable to look at.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;Regularization is an important tool to reduce overfitting and improve the generalizability of a model. The results show that adding various forms of regularization can improve the results of a model, but our implementations did not cause dramatic change to the ability of the model to generalize to the test set. Most of the models still had a very large gap between their training accuracy and test accuracy. However, we did see notable improvements for both the dropout models and the decorrelation model. The dropout models were the only models that added regularization such that the model could not perfectly memorize the training set. Therefore, their training accuracy was significantly lower but they also had higher test accuracy. Additionally, the decorrelation model was also successful. While the model followed a similar pattern during training to the model without regularization, the test accuracy was generally higher, suggesting the added error did force the model to learn different parameters. Therefore, based on these results, adding regularization can be helpful in improving the generalizability of transformer models, especially when they have limited data. The other methods, such as the noise based methods, normalization, and relaxation did not appear to have a significant effect on training or test outputs. It is likely that alternative parameters or architectures are needed to realize their effect. Lastly, while this analysis was only completed using vision transformers, different datasets or network architectures may have significantly different results. Therefore, these other regularization methods may be more successful in other contexts. However, these tests prove that there are circumstances in which regularization can have a beneficial effect on transformer performance and is therefore a worthwhile experiment when dealing with overfitting transformers.&lt;/p&gt; </content> </entry> <entry> <title>Neural PDEs for learning local dynamics and longer temporal rollouts</title> <link href="https://deep-learning-mit.github.io/blog/2023/neural-PDEs-long-time-dynamics/"/> <updated>2023-11-05T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/neural-PDEs-long-time-dynamics</id> <content type="html">&lt;h2 id=&quot;partial-differential-equations&quot;&gt;Partial differential equations&lt;/h2&gt; &lt;p&gt;At the continuum level, spatiotemporal physical phenomena such as reaction-diffusion processes and wave propagations can be described by partial differential equations (PDEs). By modeling PDEs, we can understand the complex dynamics of and relationships between parameters across space and time. However, PDEs usually do not have analytical solutions and are often solved numerically using methods such as the finite difference, finite volume, and finite element methods &lt;d-cite key=&quot;LoggMardalEtAl2012&quot;&gt;&lt;/d-cite&gt;. For example, the finite element method (FEM) approximates PDE solutions by first discretizing a sample domain into a mesh of interconnected elements and then solving a system of equations iteratively given a set of boundary conditions, initial conditions, and material properties.&lt;/p&gt; &lt;p&gt;In this blog, we will show two examples of PDEs, one of which is the Navier-Stokes equation which describes the dynamics of viscous fluids. The equation below shows the 2D Navier-Stokes equation for a viscous and incompressible fluid in vorticity form on a unit torus, where \(w\) is the vorticity, \(u\) the velocity field, \(\nu\) the viscosity coefficient, and \(f(x)\) is the forcing function. The solution data were from the original paper&lt;d-cite key=&quot;li2020fourier&quot;&gt;&lt;/d-cite&gt; where the problem, with a periodic boundary condition, was solved with a pseudospectral method using a 1e-4 time step with the Crank-Nicolson scheme.&lt;/p&gt; \[\begin{gather} \partial_t w(x, t) + u(x, t) \cdot \nabla w(x, t) = \nu \Delta w(x, t) + f(x), \quad x \in (0,1)^2, t \in [0,T] \\ \nabla \cdot u(x, t) = 0, \quad x \in (0,1)^2, t \in [0,T] \\ w(x, 0) = w_0(x), \quad x \in (0,1)^2 \end{gather}\] &lt;p&gt;We can visualize the 2D PDE solution over the 50 time steps:&lt;/p&gt; &lt;div class=&quot;l-body-outset&quot; style=&quot;display: flex; justify-content: center; align-items: center;&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-05-neural-PDEs-long-time-dynamics/navierstokes.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Solution of 2D Navier-Stokes PDE &lt;d-cite key=&quot;li2020fourier&quot;&gt;&lt;/d-cite&gt; - drag the slider! &lt;/div&gt; &lt;h3 id=&quot;motivations-for-neural-pdes&quot;&gt;Motivations for neural PDEs&lt;/h3&gt; &lt;p&gt;Well-established numerical methods are very successful in calculating the solutions of PDEs, however, these methods require high computational costs especially for high spatial and temporal resolutions. Furthermore, it is important to have fast and accurate surrogate models that would target problems that require uncertainty quanitifcation, inverse design, and PDE-constrained optimizations. In recent years, there have been growing interests in neural PDE models that act as a surrogate PDE solver&lt;d-cite key=&quot;PDEBench2022&quot;&gt;&lt;/d-cite&gt;, especially neural operators that aim to learn the mapping between input and output solution functions. These models are trained on numerical solutions from existing methods and inferences are orders of magnitude faster than calculating the solutions again through numerical methods.&lt;/p&gt; &lt;p&gt;In this article, I will first examine if we can apply neural networks to learn the dynamics in PDE solutions and therefore replace PDE solvers with a neural PDE as the surrogate solver. We will start with a base U-Net model with convolutional layers. Next, I will examine the neural operator methods, notably the Fourier Neural Operator (FNO). Primarily, the Fourier neural operator has proven to predict well for PDE solutions and we will use it to compare with the U-Net model on the representations learnt in the Fourier layers. Next, I will examine the FNO’s performance on another PDE with two dependent states. We will notice that the FNO is capable of learning lower frequency modes but fail to learn local dynamics and higher frequency modes. We then finally introduce some improvements to the FNO to tackle this problem involving local dynamics and long term rollout errors.&lt;/p&gt; &lt;h3 id=&quot;dataset-and-training-schemes-for-the-2d-navier-stokes-pde&quot;&gt;Dataset and training schemes for the 2D Navier-Stokes PDE&lt;/h3&gt; &lt;p&gt;For the dataset, I will start with the 2D time-dependent Navier-Stokes solution (\(\nu\) = 1e-3) that was shipped from Zongyi Li et al’s paper &lt;d-cite key=&quot;li2020fourier&quot;&gt;&lt;/d-cite&gt;. The problem for any given model would then be to learn the mapping from an input solution (vorticity) of t=[0,10] to the solution of t=(10, 40]. For all models involving Navier-Stokes, the original implementations were used, but implementations were improved or new ones were added for the second PDE problem which more details will be shared in later parts of the article. We use 1000 solutions for training and 200 for the test dataset. The models are trained with 500 epochs with an initial learning rate of 0.001, the AdamW optimizer is used with a cosine annealing scheduler. Unless otherwise specified, a relative L2 loss is used for training and prediction of each data batch. For U-Net and FNO2D, the models use 2D convolutions in the spatial domain and recurrently predict through the time domain (autoregressive training). For FNO3D, the time domain is included as the 3rd dimension in the input data for the FNO to learn both spatial and temporal dependencies within the solutions.&lt;/p&gt; &lt;h2 id=&quot;base-model-u-net&quot;&gt;Base model (U-Net)&lt;/h2&gt; &lt;p&gt;Let’s begin with examining whether a U-Net with convolutional layers can be used to learn the dynamics. U-Net&lt;d-cite key=&quot;RonnebergerFB15&quot;&gt;&lt;/d-cite&gt; is a popular model architecture for image to image predictions and image segmentation tasks. It consists of a series of downsampling and upsampling layers with skip connections, and my re-implementation is based on &lt;a href=&quot;https://github.com/khassibi/fourier-neural-operator/blob/main/UNet.py&quot;&gt;this repo&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;We can use the U-Net to learn the features from the input PDE solution frames and predict the solution in the next time step, treating the 2D solution as an image. As for the time component, the surrogate model takes the input solution from the previous k time steps to predict solution in the next k+1 time step. Then, the solution from the previous k-1 steps are concatenated with the predicted solution as the input back into the model to predict the next step, and so on. In a nutshell, the model is trained to predict autoregressively.&lt;/p&gt; &lt;div style=&quot;text-align: center; margin-right: 10px;&quot;&gt; &lt;div style=&quot;width: 70%; margin: auto;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;Training curve for U-Net with average relative L2 train and test loss&lt;/p&gt; &lt;/div&gt; &lt;div style=&quot;text-align: center; margin-left: 10px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_2dt_nspred42.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_2dt_nspred42.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_2dt_nspred42.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_2dt_nspred42.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;U-Net&apos;s prediction of 2D Navier-Stokes for unseen test set (id=42)&lt;/p&gt; &lt;/div&gt; &lt;p&gt;The U-Net seems to predict well for the 2D Navier-Stokes test set. However, the average final test loss of 0.0153 is still considerably high. For longer time rollout, the errors can accumulate. Let’s examine the FNO2d-t and FNO3d models next.&lt;/p&gt; &lt;h2 id=&quot;fourier-neural-operators&quot;&gt;Fourier Neural Operators&lt;/h2&gt; &lt;p&gt;Fourier neural operators (FNOs) &lt;d-cite key=&quot;li2020fourier&quot;&gt;&lt;/d-cite&gt; try to learn the mapping between input functions and solution functions &lt;d-cite key=&quot;kovachki2021neural&quot;&gt;&lt;/d-cite&gt;, for example, mapping the solutions from earlier to later time steps for time-dependent PDEs.&lt;/p&gt; &lt;p&gt;The authors introduced the Fourier layer (SpectralConv2d for FNO2d) which functions as a convolution operator in the Fourier space, and complex weights are optimized in these layers. The input functions are transformed to the frequency domain by performing fast Fourier transforms (torch.fft) and the output functions are then inverse transformed back to the physical space before they are passed through nonlinear activation functions (GeLU) to learn nonlinearity. Fourier transformations are widely used in scientific and engineering applications, such as in signal processing and filtering, where a signal / function is decomposed into its constituent frequencies. In the FNO, the number of Fourier modes is a hyperparameter of the model - the Fourier series up till the Fourier modes are kept (i.e. lower frequency modes are learnt) while higher frequency modes are truncated away. Notably, since the operator kernels are trained in the frequency domain, the model is theoretically capable of predicting solutions that are resolution-invariant.&lt;/p&gt; &lt;h3 id=&quot;applying-fno2d-and-fno3d-on-2d-navier-stokes-time-dependent-pde&quot;&gt;Applying FNO2D and FNO3D on 2D Navier-Stokes time-dependent PDE&lt;/h3&gt; &lt;p&gt;We reimplement and train the FNO2D model on the same train-test data splits for the 2D Navier-Stokes solution. Notably, the final average relative L2 loss (for test set) is 0.00602 after 500 epochs of training. Comparing this with the U-Net that is also trained and predicted with the same scheme, the FNO2D has an improved performance!&lt;/p&gt; &lt;div style=&quot;text-align: center; margin-left: 10px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/2dt_nspred42.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/2dt_nspred42.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/2dt_nspred42.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/2dt_nspred42.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;FNO2D&apos;s prediction of 2D Navier-Stokes for unseen test set (id=42)&lt;/p&gt; &lt;/div&gt; &lt;p&gt;The predicted solutions look impressive and it seems like the dynamics of the multiscale system are learnt well, particularly the global dynamics. Likewise, the FNO3D gives similar results. Instead of just convolutions over the 2D spatial domains, the time-domain is taken in for convolutions in the Fourier space as well. According to the authors, they find that the FNO3D gives better performance than the FNO2D for time-dependent PDEs. However, it uses way more parameters (6560681) compared to FNO2D (928661 parameters) - perhaps the FNO2D with recurrent time is sufficient for most problems.&lt;/p&gt; &lt;div style=&quot;text-align: center; margin-right: 10px;&quot;&gt; &lt;div style=&quot;width: 70%; margin: auto;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;Training curve for FNO3D with average relative L2 train and test loss&lt;/p&gt; &lt;/div&gt; &lt;div style=&quot;text-align: center; margin-left: 10px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/nspred42.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/nspred42.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/nspred42.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/nspred42.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;FNO3D&apos;s prediction of 2D Navier-Stokes for unseen test set (id=42)&lt;/p&gt; &lt;/div&gt; &lt;h3 id=&quot;representation-learning-in-the-fourier-layers&quot;&gt;Representation learning in the Fourier layers&lt;/h3&gt; &lt;p&gt;You might be curious how the Fourier layers learn the Navier-Stokes dynamics - let’s examine some weights in the SpectralConv3d layers (for the FNO3D). We take the magnitudes of the complex weights from a slice of each layer (4 Fourier layers were in the model).&lt;/p&gt; &lt;div style=&quot;text-align: center; margin-right: 10px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;Visualizing weights in the Fourier layers&lt;/p&gt; &lt;/div&gt; &lt;p&gt;There seems to be some global features that are learnt in these weights. By learning in the Fourier space, the Fourier layers capture sinusoidal functions that can generalise better for dynamics according to the dynamical system’s decomposed frequency modes. For CNNs, we know that the convolutions in spatial domain would lead to the learning of more local features (such as edges of different shapes), as compared to more global features learnt in Fourier layers.&lt;/p&gt; &lt;h3 id=&quot;on-the-importance-of-positional-embeddings&quot;&gt;On the importance of positional embeddings&lt;/h3&gt; &lt;p&gt;In FNO implementations, besides the input data for the 2D + time domains, the authors also append positional encodings for both x and y dimensions so the model knows the location of each point in the 2D grid. The concatenated data (shape = (B, x, y, 12)) is then passed through the Fourier layers and so on (note: B is the batch size, x and y the spatial sizes, and 12 consists of 10 t steps and 2 channels for positional encodings along x and y). It is important to understand that the positional embedding is very important to the model performance.&lt;/p&gt; &lt;div style=&quot;display: flex; justify-content: center; align-items: center;&quot;&gt; &lt;div style=&quot;text-align: center; margin-right: 10px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;Original with positional encoding&lt;/p&gt; &lt;/div&gt; &lt;div style=&quot;text-align: center; margin-left: 10px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;No positional encoding&lt;/p&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;We train the same FNO3D on the same data but this time without the positional encodings concatenated as the input. Simply removing these positional encodings for x and y domains cause the model to underperform. Here, we are comparing between FNO3D with and without positional encoding. FNO3D has a final relative test loss of 0.0106 but the test loss is 0.0167 without positional encodings. Inspecting the change of x over t for a sample test dataset, it then becomes more visible the differences in performances. Note that we also observe the data have well-defined sinusoidal functions in the dynamics.&lt;/p&gt; &lt;h2 id=&quot;improving-accuracies-in-predicting-local-dynamics-and-long-term-rollouts-in-time-dependent-pdes&quot;&gt;Improving accuracies in predicting local dynamics and long-term rollouts in time-dependent PDEs&lt;/h2&gt; &lt;p&gt;Let’s apply the FNO to other PDEs, particularly problems where local dynamics and long-term accuracies are important. Here, I introduce another PDE as an example - a coupled reaction heat-diffusion PDE with two dependent states&lt;d-cite key=&quot;Robertson2018&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; \[\begin{gather} \kappa \frac{\partial^2 T}{\partial x^2} + \rho H_r \frac{\partial \alpha}{\partial t} = \rho C_p \frac{\partial T}{\partial t} \\ \frac{\partial \alpha}{\partial t} = A \exp \left( -\frac{E}{RT} \right) f(\alpha) \end{gather}\] &lt;p&gt;Based on the initial conditions of temperature (T) and degree of cure (alpha) and with Dirichlet boundary conditions on one end of the sample, the T and alpha propagate across the domain (here, the 1D case is examined). For certain material parameters and when initial conditions of T and alpha are varied, we can see that the dynamics can become chaotic after some time - we can visualize it below.&lt;/p&gt; &lt;div class=&quot;l-body-outset&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2023-11-05-neural-PDEs-long-time-dynamics/unstablefromp.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;750px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Solution of the above coupled PDE with 2 dependent states, solved using FEM. Drag the slider! &lt;/div&gt; &lt;p&gt;For this dataset, we aim to use the first 10 time steps of the solution (heat diffusion from x=0) as input to a neural PDE to predict the next N time steps of the solution. With 10 steps, we predict the 11th step and the prediction is concatenated with the last 9 steps to predict the next time step and so on. We first generate the training data by solving the PDE numerically using the Finite Element Method using the FEniCS package. Specifically, we use mixed finite elements with the continuous Galerkin scheme and a nonlinear solver with an algebraic multigrid preconditioner.&lt;/p&gt; &lt;p&gt;We use 1228 solutions for the training set and 308 solutions for the test set. The datasets are split into pairs of 10 trajectories, whereby the input data consists the solution of 10 time steps and the output data (to be predicted) consists the solution of the next 10 time steps. Since the neural PDE is trained to predict 10 to 1 time step, every batch is trained autoregressively and an L2 loss is taken for all 10 forward predictions before the sum is backpropagated in every batch. Likewise, the AdamW optimizer is used with an initial learning rate of 1e-4 and a cosine annealing scheduler. The models are trained for 300 epochs with a batch size of 16.&lt;/p&gt; &lt;p&gt;I initially tried the FNO1D implementation on my PDE dataset and notice that the errors accummulate with longer time rollouts using the trained model. FNO1D is used since we only have 1 spatial dimension in the 1D solution and the solutions are predicted recurrently, just like the use of FNO2D for the 2D Navier-Stokes example earlier. The FNO2D model was also used to convolve over both x and t. Both performances are not ideal within 1 cycle of forward prediction.&lt;/p&gt; &lt;h3 id=&quot;revin-and-other-training-tricks-to-improve-accuracies-in-longer-temporal-rollout&quot;&gt;RevIN and other training tricks to improve accuracies in longer temporal rollout&lt;/h3&gt; &lt;p&gt;To overcome this problem, there have been attempts to generally improve the accuracies of neural PDE models and also training tricks proposed to improve long-term accuracies in rollout. Using the FNO1D, I first tested out some training tricks, such as the pushforward and temporal bundling which are covered in the paper on message passing neural PDEs&lt;d-cite key=&quot;brandstetter2022message&quot;&gt;&lt;/d-cite&gt;. Incorporating the reversible instance normalization layer (RevIN)&lt;d-cite key=&quot;kim2022reversible&quot;&gt;&lt;/d-cite&gt; gives a more promising improvement. With ReVIN, the input solution is passed through the normalizing RevIN layer before the FNO1d and the output solution is denormalized through the RevIN layer. Some examples of these tricks for longer term robust forecasting are covered in &lt;a href=&quot;https://github.com/Rui1521/Symmetry-Tutorial/blob/main/Tutorial_Symmetry.ipynb&quot;&gt;this repo&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;Using a trained FNO1D with a RevIN layer, here is its prediction on an unseen test set starting from the first 10 time steps as the input solution. The true solution is used to predict up till 50 more time steps forward (5 full cycles forward). While the temperature is predicted with decent accuracies for first cycle (10 steps forward until t=60 shown), the errors accumulate over more steps.&lt;/p&gt; &lt;div style=&quot;text-align: center; margin-left: 10px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt156.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt156.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt156.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt156.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;FNO1d&apos;s prediction (1)&lt;/p&gt; &lt;/div&gt; &lt;p&gt;Generally, we attribute this to the fact that the Fourier layers may not be able to learn more local changes in the dynamics since the higher frequency modes in the Fourier series are truncated away. The global dynamics of the propagating front (heat diffusion along x) are captured reasonably well (the positional encodings probably also have a large part to play). We want to build on the FNO to improve predictions for longer temporal rollout especially for multiscale dynamical systems with both global and local changes. Ideally, we want to take an input of a few time steps from a more expensive numerical solver and pass it through a trained surrogate model to predict N time steps (with N being as high as possible).&lt;/p&gt; &lt;h2 id=&quot;introducing-large-kernel-attention&quot;&gt;Introducing Large Kernel Attention&lt;/h2&gt; &lt;p&gt;To overcome the problems highlighted for this PDE, we attempt to include a large kernel attention layer (LKA) that was introduced in the Visual Attention Network paper&lt;d-cite key=&quot;guo2022visual&quot;&gt;&lt;/d-cite&gt; by Meng-Hao Guo et. al. The large kernel attention was first introduced as an alternative to the Vision Transformers (ViT) to enable higher spatial adapativity and long-range correlations. While simple to implement, the authors’ Visual Attention Network surpasses ViTs and CNNs on tasks such as object detection and pose estimation. A similar strategy was introduced last month&lt;d-cite key=&quot;zhao2023local&quot;&gt;&lt;/d-cite&gt; for 2D problems although their implementation was not shared.&lt;/p&gt; &lt;p&gt;Therefore, it may be feasible to introduce attention mechanisms to learn local dynamics in PDEs better, and this can complement the Fourier layers which capture global dynamics better. Herein, we add the LKA layers after the Fourier blocks for the FNO1D, and the new model has 5056 more parameters (583425 to 588481). The performance is found to have greatly improved, especially for local dynamics in the unstable propagations.&lt;/p&gt; &lt;div style=&quot;text-align: center; margin-left: 10px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt156.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt156.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt156.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt156.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;FNO1d + LKA&apos;s prediction (1)&lt;/p&gt; &lt;/div&gt; &lt;p&gt;For the same data, the addition of LKA gave improved accuracies over predictions in the next 50 time steps. We attribute this to the large kernel attention’s ability to focus on local dynamics at specific parts of the spatiotemporal changes. The LKA has 3 components: a spatial depth-wise convolution, a spatial depth-wise dilation long-range convolution, and a channel convolution.&lt;/p&gt; \[\begin{gather} \text{Attention} = \text{Conv}_{1 \times 1}(\text{DW-D-Conv}(\text{DW-Conv}(F))) \\ \text{Output} = \text{Attention} \otimes F \end{gather}\] &lt;p&gt;I adapted from the LKA’s &lt;a href=&quot;https://github.com/Visual-Attention-Network/VAN-Classification/blob/main/models/van.py&quot;&gt;original implementation&lt;/a&gt; to apply to our 1D PDE. Let’s examine the predictions on another test data.&lt;/p&gt; &lt;div style=&quot;display: flex; justify-content: center; align-items: center;&quot;&gt; &lt;div style=&quot;text-align: center; margin-left: 2px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt876.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt876.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt876.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt876.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;FNO1d&apos;s prediction (2)&lt;/p&gt; &lt;/div&gt; &lt;div style=&quot;text-align: center; margin-left: 2px;&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt876.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt876.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt876.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt876.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p style=&quot;margin-top: 0px;&quot;&gt;FNO1d + LKA&apos;s prediction (2)&lt;/p&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;While the predictions are significantly improved, the errors still accumulate with longer rollouts and the model fails to capture dynamics if we extend predictions till 100 steps forward. More work is needed to improve existing neural PDE methods before they can be used as foundational models for PDEs.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;In this article, we have introduced the use of neural networks as potential surrogate model solvers for partial differential equations that can be expensive to solve using numerical methods. Compared to the base model U-Net, Fourier neural operators have introduced a novel and useful way of learning PDE solutions through convolutions in the frequency space. We first reimplemented the FNO2D and FNO3D on the 2D Navier-Stokes PDE solution shipped with their paper. While it achieves great performance learning global dynamics, existing models struggle to capture local dynamics (higher frequency modes are truncated away) and longer temporal rollouts. We demonstrate that despite adding a RevIN layer and several temporal training tricks, the FNO1D could not predict accurately the solutions of a coupled time-dependent PDE. With the inclusion of attention mechanism through the large kernel attention, the FNO1D’s performance significantly improved. We learn that introducing spatial attention can be useful and more work will be explored to improve predictions of multiscale spatiotemporal dynamical systems.&lt;/p&gt; </content> </entry> <entry> <title>Graph neural networks v.s. transformers for geometric graphs</title> <link href="https://deep-learning-mit.github.io/blog/2023/proposal/"/> <updated>2023-11-01T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2023/proposal</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Machine learning on graphs is often approached with message passing graph neural network (GNN) models, where nodes in the graph are embedded with aggregated messages passed from neighboring nodes &lt;d-cite key=&quot;zhou2020graph&quot;&gt;&lt;/d-cite&gt;. However, with the significant success of transformers in language modelling &lt;d-cite key=&quot;vaswani2017attention&quot;&gt;&lt;/d-cite&gt; and computer vision recently &lt;d-cite key=&quot;dosovitskiy2020image&quot;&gt;&lt;/d-cite&gt;, there are a growing number of transformers developed for graphs as well. In this project we investigate the application of graph neural networks compared to transformers on geometric graphs defined on point clouds. We aim to explore the performance of these two models on predicting the binding affinity for a protein-ligand interaction given the atomic coordinates of the docked protein-ligand structure, which is a highly relevant task in drug discovery. This blog post walks through an introduction into graph neural networks and transformers on molecules, our model architecture, experimental results, and a discussion comparing the two architectures.&lt;/p&gt; &lt;h2 id=&quot;background-and-relevant-work&quot;&gt;Background and relevant work&lt;/h2&gt; &lt;h3 id=&quot;graph-neural-networks-on-molecules&quot;&gt;Graph neural networks on molecules&lt;/h3&gt; &lt;p&gt;Graphs are comprised of nodes and edges, and we can model any set of objects with a defined connectivity between them as a graph. For example, social networks are a set of people and the connectivity between them is defined by on who knows who. We can also see that grid data formats, like images, are also graphs where each pixel is a node and edges are defined to the adjacent pixels. Any sequential data, such as text, can be modeled as a graph of connected words. In this section we focus on graphs of molecules where nodes are atoms and edges are defined between atoms. These edges are often defined by the molecular bonds, or for atoms with 3D coordinate information the edges can be defined by a spatial cutoff $d$ based on the Euclidean distance between nodes. Given a graph we can use a graph neural network to learn a meaningful representation of the graph and use these representations for predictive tasks such as node-level prediction, edge-level prediction, or graph-level prediction. Graph neural networks learn through successive layers of message passing between nodes and their neighboring nodes.&lt;/p&gt; &lt;p&gt;An important property of many GNNs applied on 3D molecules is SE(3)-equivariance. This means that any transformation of the input in the SE(3) symmetry group–which includes all rigid body translations and rotations in $\mathbb{R}^3$ –will result in the same transformation applied to the output. This property is important for the modelling of physical systems; for example if the prediction task is the force applied on an atom in a molecule, rotation of the molecule should result in the model predicting the same forces but rotated. In some tasks we do not need equivariance but rather SE(3)-invariance (which is a subset of SE(3)-equivariance) where any transformation of the input in the SE(3) symmetry group results in the same output. This is often the case when the task of the model is to predict a global property of the molecule which should not change if all 3D coordinates of the molecule are translated and rotated. SE(3)-invariance will be required for our model of binding affinity as global rotations and translations of the protein-ligand structure should yield the same predicted binding affinity.&lt;/p&gt; &lt;p&gt;Early SE(3)-equivariant GNNs on point clouds used directional message passing &lt;d-cite key=&quot;gasteiger2020directional&quot;&gt;&lt;/d-cite&gt; which used the pairwise distance and direction between nodes as features for the GNN, however they were soon shown to be limited in expressivity &lt;d-cite key=&quot;garg2020generalization&quot;&gt;&lt;/d-cite&gt;. Now state-of-the-art (SOTA) models in this area are based on higher order geometric properties such as dihedral angles and representations in the geometric group SO(3). Some examples include GemNet &lt;d-cite key=&quot;gasteiger2021gemnet&quot;&gt;&lt;/d-cite&gt; and e3nn &lt;d-cite key=&quot;geiger2022e3nn&quot;&gt;&lt;/d-cite&gt;. e3nn has also shown that it is much more data-efficient when learning as the model does not need to learn to be equivariant, which non-equivariant models do. For a non-equivariant model to learn to be equivariant it would have to be trained on many SE(3) transformations of the input mapping to the same output, which is very inefficient. e3nn models have led to exceptional performance for tasks related to predicting molecular forces and energies &lt;d-cite key=&quot;batzner20223&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;musaelian2023learning&quot;&gt;&lt;/d-cite&gt;. For the task of binding affinity some GNNs that achieve high performance using GNNs are ProNet &lt;d-cite key=&quot;wang2022learning&quot;&gt;&lt;/d-cite&gt; and HoloProt &lt;d-cite key=&quot;somnath2021multi&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;graph-transformers-on-molecules&quot;&gt;Graph transformers on molecules&lt;/h3&gt; &lt;p&gt;With the proliferation of transformers in the broader field of machine learning, this has also led to the development of graph transformers. In a transformer model each node attends to all other nodes in the graph via attention where the query is a projection of the feature vector of a node, and the key and value is the projection of feature vectors of all other nodes. Hence, graph transformers and transformers applied to sequences (e.g. text) are largely similar in architecture. However, differences arise in the positional encodings in a graph transformer as it is defined in relation to other nodes in the graph &lt;d-cite key=&quot;ying2021transformers&quot;&gt;&lt;/d-cite&gt;. For geometric graphs, positional encodings can be applied as a bias term on the attention value of node $u$ on $v$, where the bias is a learned value that is dependent on the distance between the nodes &lt;d-cite key=&quot;zhou2023uni&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;luo2022one&quot;&gt;&lt;/d-cite&gt;. There are also other ways of implementing positional encodings in the form of Laplacian eigenvectors, and random walk diagonals which aim to encode the centrality of each node in the graph &lt;d-cite key=&quot;rampavsek2022recipe&quot;&gt;&lt;/d-cite&gt;. Recently, in an effort to unify different methods to generate structural and positional graph encodings, Liu et al. &lt;d-cite key=&quot;liu2023graph&quot;&gt;&lt;/d-cite&gt; apply a novel pretraining approach with a multiobjective task of learning a variety of positional and structural encodings to derive more general positional and structural encodings. Graph transformers are also achieving SOTA performance for benchmarks on predicting quantum properties of molecules &lt;d-cite key=&quot;zhou2023uni&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;luo2022one&quot;&gt;&lt;/d-cite&gt; and binding affinity &lt;d-cite key=&quot;kong2023generalist&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h2 id=&quot;motivation&quot;&gt;Motivation&lt;/h2&gt; &lt;p&gt;Given the growing application of both GNNs and transformers we aim to compare their performance on the same task of protein-ligand binding affinity prediction. We also aim to compare models as we can see analogies between graph transformers and GNNs, where “message passing” in the graph transformer involves messages from all nodes rather than the local neighborhood of nodes. We view protein-ligand binding affinity prediction as a suitable task to compare the two architectures as there are aspects of both the GNN and graph transformer architecture that would be advantageous for the task: binding affinity is a global prediction task for which the graph transformer may better capture global dependencies, conversely binding affinity is also driven by local structural orientations between the protein and ligand which the GNN may learn more easily.&lt;/p&gt; &lt;h2 id=&quot;problem-definition&quot;&gt;Problem definition&lt;/h2&gt; &lt;ul&gt; &lt;li&gt;The input to the model is a set of atoms for the protein pocket $X_{\mathrm{protein}}$ and ligand $X_{\mathrm{ligand}}$, for which we have the atomic identity and the 3D coordinates, and the binding affinity $y$ for the structure.&lt;/li&gt; &lt;li&gt;For the graph neural network we define a molecular graph of the protein ligand structure $G=(V,E)$ where $V$ are the $n$ nodes that represent atoms in the molecule and the edges $E$ are defined between two nodes if their 3D distance is within a radial cutoff $r$. We further define two types of edges: intramolecular edges for edges between nodes within $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$, and intermolecular edges for nodes between $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$.&lt;/li&gt; &lt;li&gt;For the graph transformer it is applied to the whole set of atoms $(X_{\mathrm{protein}}, X_{\mathrm{ligand}})$, and we can use the 3D coordinates of the atoms to derive positional encodings.&lt;/li&gt; &lt;li&gt;Performance is determined by the root mean squared error, Pearson, and Spearman correlation coefficients between true binding affinity and predicted binding affinity.&lt;/li&gt; &lt;/ul&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/problem_definition-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/problem_definition-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/problem_definition-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal/problem_definition.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1. A protein-ligand structure, Protein Data Bank (PDB) entry 1a0q. The protein backbone is shown in blue, and the ligand is shown in green. The model would be given this structure and the objective is to predict the binding affinity of the ligand to the protein. &lt;/div&gt; &lt;h2 id=&quot;dataset&quot;&gt;Dataset&lt;/h2&gt; &lt;p&gt;We use the PDBbind dataset for the protein-ligand structures and binding affinity. In addition, for benchmarking we use the benchmark from ATOM3D &lt;d-cite key=&quot;townshend2020atom3d&quot;&gt;&lt;/d-cite&gt; with a 30% and 60% sequence identity split on the protein to better test generalisability of the model. The sequence identity split is based on sequence similarity of proteins in the test and training datasets. The 30% sequence identity split is more challenging are there are more dissimlar proteins in the test set.&lt;/p&gt; &lt;h2 id=&quot;architecture&quot;&gt;Architecture&lt;/h2&gt; &lt;h3 id=&quot;graph-neural-network&quot;&gt;Graph neural network&lt;/h3&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/gnn_architecture1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/gnn_architecture1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/gnn_architecture1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal/gnn_architecture1.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2. Overview of the GNN architecture for a graph constructed from a protein-ligand structure. &lt;/div&gt; &lt;p&gt;A graph is constructed from the atomic coordinates of the atoms in the protein pocket $X_{\mathrm{protein}}$ and ligand $X_{\mathrm{ligand}}$ where the nodes are the atoms. Intramolecular edges are defined between nodes within $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$ with a distance cutoff of 3 Å, and intermolecular edges for nodes between $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$ with a distance cutoff of 6 Å. The model architecture is defined as follows:&lt;/p&gt; &lt;p&gt;(1) Initial feature vectors of the nodes are based on a learnable embedding of their atomic elements. The edge features are an embedding of the Euclidean distance between the atomic coordinates. The distance is embedded with a Gaussian basis embedding which is projected with a 2 layer MLP.&lt;/p&gt; &lt;p&gt;(2) We define two types of messages in the GNN, given by the two types of edges, intermolecular messages and intramolecular messages. The architecture used for the two types are messages are the same but the weights are not shared, this is to reflect that information transferred between atoms within the same molecule is chemically different to information transferred between atoms of different molecules. The message passing equation uses the tensor product network introduced by e3nn &lt;d-cite key=&quot;geiger2022e3nn&quot;&gt;&lt;/d-cite&gt;, and our implementation is based on the message passing framework used by DiffDock &lt;d-cite key=&quot;corso2022diffdock&quot;&gt;&lt;/d-cite&gt;. We omit the details of the tensor product network for simplicity but provide the overall method below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/message_passing_eqn-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/message_passing_eqn-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/message_passing_eqn-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal/message_passing_eqn.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt;where node $b$ are the neighbors of node $a$ in $G$ given by intermolecular or intramolecular edges denoted with $t$. The message is computed with tensor products between the spherical harmonic projection with rotation order $\lambda = 2$ of the unit bond direction vector, \(Y^{(\lambda)}({\hat{r}}_{a b})\), and the irreps of the feature vector of the neighbor $h_b$. This is a weighted tensor product and the weights are given by a 2-layer MLP, $\Psi^{(t)}$ , based on the scalar ($\mathrm{0e}$) features of the nodes $h_a$ and $h_b$ and the edge features $e_{ab}$. Finally, $LN$ is layer norm. Overall, the feature vectors of the nodes are updated by intermolecular and intramolecular messages given by the tensor product of feature vectors of intermolecular and intramolecular neighbors and the vector of the neighbor to the node.&lt;/p&gt; &lt;p&gt;(3) After $k$ layers of message passing we perform pooling for the nodes of $X_{\mathrm{protein}}$ and the nodes of $X_{\mathrm{ligand}}$ by message passing to the “virtual nodes” defined by the centroid of the protein and ligand, using the same message passing framework outlined above.&lt;/p&gt; &lt;p&gt;(4) Finally, we concatenate the embedding of the centroid of the protein and ligand and pass this vector to a 3 layer MLP which outputs a singular scalar, the binding affinity prediction.&lt;/p&gt; &lt;h3 id=&quot;graph-transformer&quot;&gt;Graph transformer&lt;/h3&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/graph_transformer_architecture2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/graph_transformer_architecture2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/graph_transformer_architecture2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal/graph_transformer_architecture2.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3. Overview of the graph transformer architecture for a graph constructed from a protein-ligand structure. &lt;/div&gt; &lt;p&gt;The model architecture is as follows:&lt;/p&gt; &lt;p&gt;(1) Initial feature vectors of the nodes are based on a learnable embedding of their atomic elements.&lt;/p&gt; &lt;p&gt;(2) The graph transformer architecture is based on graphormer &lt;d-cite key=&quot;ying2021transformers&quot;&gt;&lt;/d-cite&gt;. Where the input is $H \in \mathbb{R}^{n \times d}$ where $d$ is the hidden dimension and $n$ is the number of nodes. The input is projected by $W_Q \in \mathbb{R}^{d \times d_K}, W_K \in \mathbb{R}^{d \times d_K}, W_V \in \mathbb{R}^{d \times d_V}$. Since graphs have more complex positional information than sequeunces, conventional positional encoding methods used in sequence-based transformers are not applicable to graphs. Positions in a graph are defined relative to all other nodes, thus positional embeddings cannot be added at the node feature vector level but instead are added as a bias to the pairwise node attention matrix. We define $B \in \mathbb{R}^{n \times n}$, where $B_{ij}$ is given by a Gaussian basis embedding of the Euclidean distance $d_{ij}$ between node $i$ and $j$, which is passed to a 3 layer MLP that outputs a singular scalar. Then the self-attention is calculated as $Q = HW_Q, K = HW_K, V = HW_V$ and $A = \frac{QK^T + B}{\sqrt{d_k}}, Attn(H) = Softmax(A) V$. In addition to all atomic nodes, we also add a &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;&amp;lt;cls&amp;gt;&lt;/code&gt; token used in the BERT model which functions as a virtual global node &lt;d-cite key=&quot;devlin2018bert&quot;&gt;&lt;/d-cite&gt;. The distance of this node to all other nodes is a learnable parameter. This process is duplicated across multiple heads and we concatenate the embeddings across all heads after $k$ layers as the updated feature vector.&lt;/p&gt; &lt;p&gt;(3) We take the final embedding of the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;&amp;lt;cls&amp;gt;&lt;/code&gt; node and pass it through a 3 layer MLP which outputs a singular scalar, the binding affinity prediction.&lt;/p&gt; &lt;h3 id=&quot;loss-function&quot;&gt;Loss function&lt;/h3&gt; &lt;p&gt;Both models are trained to minimise the root mean squared error between the predicted binding affinity and true binding affinity.&lt;/p&gt; &lt;h2 id=&quot;experiments&quot;&gt;Experiments&lt;/h2&gt; &lt;p&gt;In order for the results to be comparable between the two models, both models have approximately 2.8 million parameters.&lt;/p&gt; &lt;p&gt;GNN model details:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;2 layers of message passing, number of scalar features = 44, number of vector features = 16. Number of parameters: 2,878,011&lt;/li&gt; &lt;li&gt;4 layers of message passing, number of scalar features = 32, number of vector features = 13. Number of parameters: 2,767,269&lt;/li&gt; &lt;li&gt;6 layers of message passing, number of scalar features = 26, number of vector features = 12. Number of parameters: 2,764,431&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;We compare GNNs with different numbers of layers to compare performance across models which learn embeddings from various $k$-hop neighborhoods.&lt;/p&gt; &lt;p&gt;Graph transformer model details: 8 attention heads, 8 layers, hidden dimension = 192, feed forward neural network dimension = 512. Number of parameters: 2,801,155&lt;/p&gt; &lt;p&gt;Both models were trained for 4 hours on 1 GPU with a batch size of 16, Adam optimiser, and a learning rate of $1 \times 10^{-3}$. We show the results for the 30% and 60% sequence-based splits for the protein-ligand binding affinity benchmark in Table 1 and 2 respectively.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Table 1.&lt;/strong&gt; Protein-ligand binding affinity task with 30% sequence based split. ProNet &lt;d-cite key=&quot;wang2022learning&quot;&gt;&lt;/d-cite&gt; is included as the SOTA model in this benchmark.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th&gt;Root mean squared error $\downarrow$&lt;/th&gt; &lt;th&gt;Pearson correlation coefficient $\uparrow$&lt;/th&gt; &lt;th&gt;Spearman correlation coefficient $\uparrow$&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;ProNet &lt;d-cite key=&quot;wang2022learning&quot;&gt;&lt;/d-cite&gt;&lt;/td&gt; &lt;td&gt;1.463&lt;/td&gt; &lt;td&gt;0.551&lt;/td&gt; &lt;td&gt;0.551&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;GNN 2 layer&lt;/td&gt; &lt;td&gt;1.625&lt;/td&gt; &lt;td&gt;0.468&lt;/td&gt; &lt;td&gt;0.474&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;GNN 4 layer&lt;/td&gt; &lt;td&gt;1.529&lt;/td&gt; &lt;td&gt;0.488&lt;/td&gt; &lt;td&gt;0.477&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;GNN 6 layer&lt;/td&gt; &lt;td&gt;1.514&lt;/td&gt; &lt;td&gt;0.494&lt;/td&gt; &lt;td&gt;0.494&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Graph Transformer&lt;/td&gt; &lt;td&gt;1.570&lt;/td&gt; &lt;td&gt;0.476&lt;/td&gt; &lt;td&gt;0.469&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;&lt;strong&gt;Table 2.&lt;/strong&gt; Protein-ligand binding affinity task with 60% sequence based split. ProNet &lt;d-cite key=&quot;wang2022learning&quot;&gt;&lt;/d-cite&gt; is included as the SOTA model in this benchmark.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th&gt;Root mean squared error $\downarrow$&lt;/th&gt; &lt;th&gt;Pearson correlation coefficient $\uparrow$&lt;/th&gt; &lt;th&gt;Spearman correlation coefficient $\uparrow$&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;ProNet &lt;d-cite key=&quot;wang2022learning&quot;&gt;&lt;/d-cite&gt;&lt;/td&gt; &lt;td&gt;1.343&lt;/td&gt; &lt;td&gt;0.765&lt;/td&gt; &lt;td&gt;0.761&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;GNN 2 layer&lt;/td&gt; &lt;td&gt;1.483&lt;/td&gt; &lt;td&gt;0.702&lt;/td&gt; &lt;td&gt;0.695&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;GNN 4 layer&lt;/td&gt; &lt;td&gt;1.471&lt;/td&gt; &lt;td&gt;0.717&lt;/td&gt; &lt;td&gt;0.719&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;GNN 6 layer&lt;/td&gt; &lt;td&gt;1.438&lt;/td&gt; &lt;td&gt;0.722&lt;/td&gt; &lt;td&gt;0.704&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Graph Transformer&lt;/td&gt; &lt;td&gt;1.737&lt;/td&gt; &lt;td&gt;0.529&lt;/td&gt; &lt;td&gt;0.534&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h2 id=&quot;discussion&quot;&gt;Discussion&lt;/h2&gt; &lt;h3 id=&quot;gnns-perform-better-than-graph-transformers&quot;&gt;GNNs perform better than graph transformers&lt;/h3&gt; &lt;p&gt;From the benchmarking we can see that the graph transformer model performs worse than the GNNs for the 30% and 60% sequence split for protein-ligand binding affinity. An intuitive explanation for why graph transformers perform worse is it may be difficult for the graph transformer to learn the importance of local interactions for binding affinity prediction as it attends to all nodes in the network. Or in other words, because each update of the node involves seeing all nodes, it can be difficult to decipher which nodes are important and which nodes are not. In order to test if this is true, future experiments would involve a graph transformer with a sparse attention layer where the attention for nodes beyond a distance cutoff is 0. Converse to the lower performance of graph transformers, the results show that deeper GNNs which “see” a larger $k$-hop neighborhood perform better. However, we did not push this to the extreme of implementing a GNN with enough layers such that the $k$-hop neighborhood is the whole graph which would be most similar to a graph transformer as it attends to all nodes. This is because very deep GNNs are subject to issues like oversmoothing where all node features converge to the same value &lt;d-cite key=&quot;rusch2023survey&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;The GNN may also perform better than the graph transformer due to the higher order geometric features used by the e3nn GNN message passing framework, compared to the graph transformer which only has relative distances. To further explore this future work will involve implementing the equiformer graph transformer &lt;d-cite key=&quot;liao2022equiformer&quot;&gt;&lt;/d-cite&gt;, which is a graph transformer with higher order geometric features.&lt;/p&gt; &lt;h3 id=&quot;depth-vs-width&quot;&gt;Depth v.s. width&lt;/h3&gt; &lt;p&gt;Deeper GNNs (2 v.s. 4 v.s. 6 layers) with an approximately constant total number of parameters acheived better performance across both protein ligand binding affinity tasks. This was also observed in the image classification field with the development of AlexNet where deeper networks were shown to significantly improve performance &lt;d-cite key=&quot;krizhevsky2012imagenet&quot;&gt;&lt;/d-cite&gt;. In the context of molecular graphs, deeper GNNs allow the nodes to gain more local chemical context as their node embeddings are exposed to a larger $k$-hop neighborhoods. Thus, these node embeddings are more expressive which facilitates better task performance. There is a limit to the advantages of depth, as very deep GNNs experience oversmoothing as mentioned above &lt;d-cite key=&quot;rusch2023survey&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h3 id=&quot;model-performance-vs-graph-size&quot;&gt;Model performance v.s. graph size&lt;/h3&gt; &lt;p&gt;We compared the error of the prediction v.s. the number of atoms in the graph to test the hypothesis if larger graphs are more difficult to make predictions on. However, correlation between error and number of atoms in the graph all yielded very low pearson correlation coefficients ($&amp;lt; 0.1$) for all experiments (Figure 4). Thus, the number of atoms in the graph has minimal effect on the predictive ability of the model. This may suggest why the the graph transformer–which is able to attend to all nodes in the graph–did not perform much better as the GNN performance does not degrade significantly with larger graphs.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/error_vs_size-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/error_vs_size-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-proposal/error_vs_size-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-proposal/error_vs_size.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Figure 4. Number of nodes in graph v.s. difference between true and predicted binding affinity for graph transformers and GNNs on the 60% protein-ligand binding affinity task. There is no prominent correlation between model performance and error in prediction. &lt;/div&gt; &lt;h3 id=&quot;future-work&quot;&gt;Future work&lt;/h3&gt; &lt;p&gt;We implemented a relatively simplistic graph transformer in this project. While we concluded for this vanilla implementation of the graph transformer the GNN outperforms the graph transformer there are many more complex graph transformer architectures that we could explore to build more expressive architectures. In this section we explore some possible ideas.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Using cross-attention for better representation of protein-ligand interactions.&lt;/strong&gt; In this project, we adapted the graph transformer from graphormer &lt;d-cite key=&quot;ying2021transformers&quot;&gt;&lt;/d-cite&gt; which was developed originally for predicting the energy of one molecule. However, our task involves two interacting molecules, a protein and a ligand. Thus, graph transformer performance could be lifted if the model had a better understanding of the interactions between the protein and the ligand by using cross attention between the protein and the ligand, rather than self attention across the whole protein-ligand complex.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Heirarchical pooling for better representation of amino acids.&lt;/strong&gt; Graph transformer performance could also be lifted by defining better pooling strategies than using the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;&amp;lt;cls&amp;gt;&lt;/code&gt; token from a set of all atoms to predict binding affinity. In this project the graphs were defined based on the atoms in the graph. However, proteins are comprised of an alphabet of 21 amino acids. Thus, it may be easier for the model to learn more generalisable patterns to the test set if the model architecture reflected how proteins are comprised of animo acids which are comprised of atoms. This has been achieved in models using hierarchical pooling from the atom-level to the amino acid-level and finally to the graph-level &lt;d-cite key=&quot;wang2022learning&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;kong2023generalist&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;A hybrid approach: GNNs with Transformers.&lt;/strong&gt; Finally, we could improve also performance further by taking a hybrid approach. That is, the GNN first learns local interactions followed by the graph transformer which learns global interactions and pools the node embeddings into a global binding affinity value. The motivation for this design is to leverage the advantages of both models. The GNN excels at learning local interactions while the graph transformer excels at learning global relationships from contextualised local interactions. This approach has been explored in other models for predicting drug-target interaction &lt;d-cite key=&quot;bai2023interpretable&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;jin2023capla&quot;&gt;&lt;/d-cite&gt;. Visualisation of the attention map of graph transformers would also be interesting to explore the importance of specific chemical motifs on protein-ligand interactions.&lt;/p&gt; &lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt; &lt;p&gt;In this project we present a direct comparison of graph transformers to GNNs for the task of predicing protein-ligand binding affinity. We show that GNNs perform better than vanilla graph transformers with the same number of model parameters across protein-ligand binding affinity benchmarks. This is likely due to the importance of capturing local interactions, which graph transformers may struggle to do. We also show that deeper GNNs perform better than wider GNNs for the same number of model parameters. Finally, future work in this area will involve a implementing more complex graph transformers, or taking a hybrid approach where we capture local interactions with a GNN and global interactions with a graph transformer.&lt;/p&gt; </content> </entry> <entry> <title>An empirical evaluation of autoencoders and diffusion models for 2D small-molecule generation</title> <link href="https://deep-learning-mit.github.io/blog/2022/molecule_generation/"/> <updated>2022-12-12T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2022/molecule_generation</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Applying deep learning techniques to 2D molecule generation is an interesting and challenging problem in the field of cheminformatics, with applications in drug discovery, materials science, and other areas of chemistry. The problem is broad in scope, since there is a variety of molecular data, representations of the generated molecules, and model frameworks or generation pipelines. Autoencoders and diffusion models are two major types of generative models. The first learns a latent distribution from actual data points and then samples from this space to produce a novel output. Diffusion models work by progressively adding noise to input data, learning the correspondence between inputs and random noise, and then working backwards from a new sample of random noise by “undoing” the noise.&lt;/p&gt; &lt;h2 id=&quot;data&quot;&gt;Data&lt;/h2&gt; &lt;p&gt;We use the QM9 dataset, described here. This dataset has been used extensively for cheminformatics research. The dataset contains the molecular structures and coordinates (2D and 3D) of ~134,000 organic molecules. Each molecule is represented as a set of atoms with their respective spatial (cartesian) coordinates. The dataset also contains a comprehensive set of chemical properties of each molecule.&lt;/p&gt; &lt;p&gt;We retrieved the SMILE (Simplified Molecular Input Line Entry System) notation for each molecule. The SMILE string uses ASCII characters to describe the atoms, bonds, and connectivity in a molecule, and is a standardized way to convey chemical information in textual form. The RDKit library hosts functionality for moving between SMILE strings and quantitative data (matrices, fingerprint vectors) as well as for visualizing molecules from the SMILE strings.&lt;/p&gt; &lt;p&gt;Finally, we create a secondary, restricted subset of the data that contains only simple, organic molecules by eliminating strings containing the “#” (character representing triple bonds) or elements other than C, H, O, N, P, S. For the models dealing with fingerprints, since it is challenging to go from fingerprint to an explicit representation of a model, our evaluation metric was determining whether or not the generated molecules were, in fact, similar to the chosen “simple” subset of all of the data. For models dealing with adjacency matrices, it was quite easy to determine ‘validity’ of chemical structures visually; the appearance of standard chemical structures, such as rings of 5 and 6 carbons with side-chains, was used as an indication of success.&lt;/p&gt; &lt;h2 id=&quot;autoencoder&quot;&gt;Autoencoder&lt;/h2&gt; &lt;p&gt;A very simple generative approach we can take is to use an autoencoder. Namely, we can train an autoencoder on molecules of interest — like our small-molecule-filtered dataset — and then sample from the learned latent space, decode the sample to generate a “molecule”, and evaluate the success in generation.&lt;/p&gt; &lt;p&gt;As mentioned in the introduction, it is worth considering possible data inputs and the sort of information a generative model trained on different inputs would carry. For our example, we consider the efficacy of RDKFingerprints and graph adjacency matrices as two possible input data types.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;RDKFingerprints&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;Molecular fingerprints are a commonly used identifier in drug discovery and virtual screening. Different types of fingerprints encode different aspects of a molecule, but they all share the characteristic of preserving features of a molecule in a spatial fashion across a bit vector. A main feature of a fingerprint scheme is that vector similarity (which can be computed in many ways) corresponds to structurally or chemically similar molecules according to the features the fingerprint intends to encode for.&lt;/p&gt; &lt;p&gt;The Python RDKit library hosts functionality for handling two such types of fingerprints — a native RDK fingerprint and a Morgan fingerprint. We use the RDK fingerprint, and our data pipeline looks something like this:&lt;/p&gt; &lt;ul&gt; &lt;li&gt; &lt;p&gt;For a given molecule (via smile string) we generate a fingerprint (a 2048-long bit vector)&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;A set of such fingerprints is used to train an autoencoder (whose structure is a 2048 unit input layer, 2 hidden layers of 64 units activated with ReLU activations)&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;We sample from the latent space and use the decoder to produce a set of generated molecules, which we associate to sets of 10 “most similar real molecules” from the original (unfiltered) dataset. Similarity is calculated using the &lt;strong&gt;Tanimoto Distance&lt;/strong&gt;, a notion of similarity between two vectors where the numerator is the number of 1s in common between the bit vectors, and the denominator is the number of 1s overall.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;We compute the percentage of these 10 similar molecules that lie in the small-molecule-filtered dataset to evaluate the success of the autoencoder in understanding the structure of small molecules at the generation step.&lt;/p&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;This approach has the benefit of using a data source explicitly designed with the goal of similarity; computing close-distance vectors to the generated RDKit fingerprint carries genuine chemical meaning.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Adjacency Matrices&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;Molecules lend themselves well to graph representations: atoms are like nodes, bonds are like edges. Thus, a molecule, if represented with a graph, can be associated to an adjacency matrix that carries information on interatomic and overarching molecular properties.&lt;/p&gt; &lt;p&gt;Adjacency matrices derived from the graph representation of a molecule, while not explicitly designed with the goal of molecule similarity in mind (as the fingerprint is), are historically successful in chemical deep learning, particularly as they are the workhorse of graph neural networks. The adjacency matrices available in the QM9 dataset can be decomposed into matrices at the single, double, and aromatic bond levels, so they carry a chemical information in additional to structural information. We implement a similar pipeline with adjacency matrix inputs, with a few changes:&lt;/p&gt; &lt;ul&gt; &lt;li&gt; &lt;p&gt;The adjacency matrix for a smile string is computed&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Unliked RDK Fingerprints, which are fixed in length, the size of the adjacency matrix varies with the size of the molecule; this makes use in a fixed-input length-autoencoder difficult, so we apply a padding approach, zero-padding all matrices to the size of the largest molecule’s matrix.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;The autoencoder is trained with these flattened, padded matrices.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;The generated reconstructions are rearranged into a matrix shape.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;The pseudo-adjacency matrix is then associated to a pseudo-molecule and corresponding pseudo-RDK fingerprint. Notably, the pseudo-molecule is created with some assumptions, such as the inclusion of only CHONPS atoms and only single bonds. Like the fingerprint framework, we find molecules in the original set with similar fingerprints to the reconstructed fingerprint, and compute the proportion of top-10 similar molecules that lie in the small-molecule set.&lt;/p&gt; &lt;/li&gt; &lt;/ul&gt; &lt;h2 id=&quot;autoencoder-results--rdk-fingerprints&quot;&gt;Autoencoder Results – RDK Fingerprints&lt;/h2&gt; &lt;p&gt;The first and most notable result is that over repeated trials of sampling and reconstructing from the latent space for both types of data, the proportion of top-10 similar molecules that lie in the small-molecule restricted dataset is 1.0. That is, each of the 10 most similar molecules lies in the small-molecule set in both cases, over 5 batches of 10 samples each.&lt;/p&gt; &lt;p&gt;Some detailed results follow.&lt;/p&gt; &lt;p&gt;First, here is the training curve with loss for the fingerprint autoencoder&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/training_loss_fngpts.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;An example of top-10 similarity for a sampled and reconstructed pseudo-fingerprint is shown here&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We notice that all the top-10 most similar molecules seem to be near each other, index-wise. This would make sense if the dataset is organized such that similar molecules share close indices. We can confirm this fact by inspecting a heatmap of 10 samples from a consecutive block in the dataset, like so:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We can see that indeed, closer molecules in the original dataset have higher similarity, so this result is as expected.&lt;/p&gt; &lt;h2 id=&quot;autoencoder-results---adjacency-matrix&quot;&gt;Autoencoder Results - Adjacency Matrix&lt;/h2&gt; &lt;p&gt;We then inspect the results of the adjacency matrix-based autoencoder training. First, the training curve with loss:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Now, here is a top-10 similarity example for a pseudo-RDK fingerprint from a pseudo-adjacency matrix:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;We notice first, that the average similarity is much higher in this case, suggesting that even with the extra step of conversion and the assumptions we make about molecular form, the similarities are higher in this case. The second observation is that the top-10 similar indices are spread out farther than they were in the previous case, suggesting that the adjacency matrix to RDK fingerprint conversion moves around the similar molecules.&lt;/p&gt; &lt;p&gt;Finally, we include some photos of molecules generated in this process (we were unable to generate photos in the RDK fingerprint trained autoencoder, because we require an adjacency matrix to draw the molecules, and it is not straightforward to go from fingerprint to matrix):&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.05%20PM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.05%20PM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.05%20PM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.05%20PM.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In the photo above, we can see the lefthand side tail as a recognizable part of an organic molecule, suggesting success with some types of bonds. In the photo below, we see that the autoencoder has learnt some additional aspects beyond basic single bonds (one of the validation images we show further below includes a similar red ring).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.45%20PM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.45%20PM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.45%20PM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.45%20PM.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Finally, the photo below while the least small-molecule-like in appearance, is interesting because it appeared many times in samples of 100 images (around 20 times) despite the latent space adjacency matrices being distinct. This could perhaps have to do with the process of converting from an adjacency matrix of reals (the result of latent space sampling) to an adjacency matrix of 1/0s, which we accomplish with median thresholding.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.59%20PM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.59%20PM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.59%20PM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.59%20PM.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;For reference, a sample image from the “validation” true small-molecule dataset is shown below:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;strong&gt;Diffusion Model&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;More recently, the use of diffusion models as an approach for generative modeling has become more common; as described in the introduction, denoising diffusion models operate by iteratively adding noise in a Markov manner to samples, learning the correspondence between inputs and the resultant noise, and then reverse-sampling from random noise to generate a new datapoint.&lt;/p&gt; &lt;p&gt;In the past, as seen in the E3 paper, diffusion models have been applied to 3D adjacency matrices. In this case, we adapted an image-based diffusion model to noise and then de-noise data on adjacency matrices by using 2D adjacency matrices instead.&lt;/p&gt; &lt;p&gt;The following plots provide information about the training of the diffusion model on adjacency matrices. First, is a plot of the loss over 5 training epochs at LR 0.001; this model was trained on approximately 90K training samples, so the loss was quite low even after the first epoch:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The efficacy of diffusion models as a means of generating novel adjacency matrices is evident from the following visualizations of our results. First, here are two runs of the denoising process for the diffusion model, first on an extremely limited set of approximately 1000 matrices, and then on the entire 90K dataset. As seen, even with very few inputs, it was possible to identify the emergence of a ‘bright spot’ in the top left, which represents the actual adjacency matrix (which was later encoded into actual matrices).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;In converting these adjacency matrices into actual molecule images, we aimed to visualize the backbones of these molecules (which is most informative as to the overall structure), so instead of focusing on determining atomic identity, we instead labelled all of them as carbons and proceeded.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Notably, in comparison to the molecules created by the autoencoder, these contain more of the structures which are characteristics of organic molecules, such as 5 and 6 carbon rings with molecules (potentially side chains of length &amp;gt;1) coming off. Indeed, it is possible to observe the progressively increased ordering of the adjacency matrices over times (as they become closer and closer to actual molecules), going from extremely disordered to closer and closer to something meaningful.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_4-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_4-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_4-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_4.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;The application of diffusion models to the RDKFingerprints is shown here: for two separate runs, they look like this. Notably, in order to use an image classification network for RDKFingerprints, the fingerprints were stacked into an image which looks like a series of stripes. As evident, the diffusion model was able to produce such striped images, and their simplicity is a good indication that these are indeed good learnings of information about the filtered subset.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_5-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_5-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_5-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-09-molecule_generation/shorna_5.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;strong&gt;Conclusion&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;In this post, we used two different generative models and tested out two different encodings for information about molecular structure. In general, both models were able to learn and reproduce information about the chosen subset, but in general, the diffusion model was better at accurately reproducing molecules with ‘believable’ structures; as evident from the figures above, although the autoencoder did learn and create relatively sparse adjacency matrices, they lacked the hallmarks of small organic molecules (like rings structures). Further, although it was more difficult to discern quantitative information about the ‘accuracy’ of adjacency matrices, since they depend on larger structures than the RDKfingerprints, it was much easier to map adjacency matrices to actual (visualizable) structures. On the whole, the diffusion model was better at actually creating canonical molecular structures. Further, models trained on adjacency matrices, when converted post-generation to RDKFingerprints had higher accuracy, and adjacency matrices were generally easier to conceptualize, so we have preference for this data encoding.&lt;/p&gt; </content> </entry> <entry> <title>VIVformer</title> <link href="https://deep-learning-mit.github.io/blog/2022/VIVFormer/"/> <updated>2022-12-01T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2022/VIVFormer</id> <content type="html">&lt;h2 id=&quot;introduction--motivation&quot;&gt;Introduction &amp;amp; Motivation&lt;/h2&gt; &lt;p&gt;Vortex induced vibrations (VIV) are vibrations that affect bluff bodies in the presence of currents. VIV are driven by the periodic formation and shedding of vortices in the bodies’ wakes which create an alternating pressure variation causing persistent vibrations &lt;d-cite key=&quot;triantafyllou2016vortex&quot;&gt;&lt;/d-cite&gt;. The vibration amplitude in VIV is typically moderate, not exceeding about one to two body diameters &lt;d-cite key=&quot;bernitsas2019eigen&quot;&gt;&lt;/d-cite&gt;. For flexible bodies, VIV are not uniform along the body’s length (usally refered to as the span) but rather different points along the span vibrate with different amplitudes and phases.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Intro-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Intro-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Intro-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/Intro.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Intro2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Intro2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Intro2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/Intro2.jpeg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Schematic diagrams of vortex induced vibrations of flexible bodies. &lt;/div&gt; &lt;p&gt;Observations of vortex induced vibrations (VIV) date back to antiquity, when the Aeolian tones, sounds created by pressure fluctuations induced by winds passing over taut strings were recognized. The first sketches of vortices date back to Leonardo da Vinci in the early 16th century. Today, VIV have become a problem of interest to both theoreticians, due to the complex underlying mechanisms involved, and engineers, due to the practical significance of mitigating the fatigue damage VIV can cause to offshore structures and equipment such as marine risers and offshore wind turbines. In order to gain some intuition, the reader can refer to the video of a flexible body undergoing VIV in section “Data Description” (below).&lt;/p&gt; &lt;p&gt;The underlying driving mechanism of VIV is vortex formation; specifically, the periodic shedding of vortices formed in the wake behind bluff bodies placed within cross-currents &lt;d-cite key=&quot;triantafyllou2016vortex&quot;&gt;&lt;/d-cite&gt;. The vortex shedding frequency in rigid bodies is known as the Strouhal frequency. For flexibly mounted or flexible bodies, the vortex formation frequency can be entrained away from the Strouhal frequency and coincides with the frequency of vibration in a phenomenon known as lock-in &lt;d-cite key=&quot;navrose_mittal_2016&quot;&gt;&lt;/d-cite&gt;. This occurs across a wide range of oscillating frequencies resembling a nonlinear resonance &lt;d-cite key=&quot;park2016suppression&quot;&gt;&lt;/d-cite&gt;. Given that flexible body VIV are not span-wise uniform as the flexible body undergoes a spatially traveling and/or standing wave response from the forcing excerted by the fluid &lt;d-cite key=&quot;wang2021illuminating, triantafyllou2016vortex, fan2019thesis&quot;&gt;&lt;/d-cite&gt;, the observed motions are nonstationary, unsteady, and can transition to different responses even for seemingly unchanged experimental conditions.&lt;/p&gt; &lt;p&gt;VIV of flexible bodies are usually modelled by leveraging the modal decomposition technique (i.e. using a Fourier expansion of sinusoidal mode shapes with time varying coefficients), similar to the approach introduced for modelling vibrating shafts and beams &lt;d-cite key=&quot;rao1995mechanical&quot;&gt;&lt;/d-cite&gt;. Recently, Kharazmi et al. (2021) &lt;d-cite key=&quot;kharazmi2021data&quot;&gt;&lt;/d-cite&gt; attempted to learn the mode shapes and time varying coefficients using LSTM networks in modal space (LSTM-Modnet); Mentzelopoulos et al. (2023) &lt;d-cite key=&quot;mentzelopoulos2023physics&quot;&gt;&lt;/d-cite&gt; proposed learning a sparse-mode set of sinusoidal modes along with the corresponding time-varying coefficients. Both frameworks suffer from the inability to robustly forecast future motions.&lt;/p&gt; &lt;p&gt;Although leveraging transformers to expand the horizon of predictions of time series is a very active field of research &lt;d-cite key=&quot;zhou2021informer, zeng2023transformers, liu2022non, zhou2022fedformer&quot;&gt;&lt;/d-cite&gt;, transformers have not yet been used to predict VIV of flexible bodies, which are physical non-stationary time-series, to the best of the author’s knowledge. In addition, only limited work has been performed in generating physical data using generative models &lt;d-cite key=&quot;zhong2023pi, takeishi2021physics, shu2023physics&quot;&gt;&lt;/d-cite&gt; and there are no identifiable applications to VIV in the literature. Mostly appications of machine learning for VIV include solving physical equations using physics-informed neural networks (PINNs) &lt;d-cite key=&quot;bai2022machine, raissi2019deep&quot;&gt;&lt;/d-cite&gt; and learning hydrodynamic coefficients or other relevant quantities &lt;d-cite key=&quot;ma20221understanding&quot;&gt;&lt;/d-cite&gt; in order to predict the motions on average rather than instantaneously &lt;d-cite key=&quot;ma2021enhancing, rudy2021learning&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;In this work, an attempt will be made to develop a transformer network architecture to predict the VIV of a flexible body both instantaneously and on average. The transformer will be trained and tested using data collected at the MIT Towing Tank by the author. In addition, in order to make the most of the available data, a variational autoencoder (VAE) will be trained to generate more VIV samples which will then be used to train the transformer. In doing so, the capability of VAEs to create physical data which retain information of the underlying physical processes will also be examined. The rest of the blog will be organized as follows: 1. using generative-AI, specifically variational autoencoders, in order to generate physical VIV data 2. using transformers to model and forecast nonstationary flexible body VIV.&lt;/p&gt; &lt;h2 id=&quot;data-description&quot;&gt;Data Description&lt;/h2&gt; &lt;p&gt;All data used for this study were collected during experiments conducted by the author at the MIT Towing Tank, a facility consisting of a 35m x 2.5m x 1.2m water tank equipped with a towing carriage capable of reaching speeds exceeding 2 m/s as well as a flow visualization window. In this and the following sections the terms model, riser, flexible body, and flexible cylinder will be used interchangeably to refer to the flexible cylinder model used during experiments.&lt;/p&gt; &lt;p&gt;The figure below illustrates the experimental setup schematically. A solid aluminum frame was used to support the flexible cylinder; the riser model was placed vertically at the center of the structure. An ATI 6 degree of freedom force sensor was attached to the top end of the riser to measure its tension. Two GoPro Hero 11 cameras were attached to the supporting frame facing perpendicular directions to capture videos of the riser’s motion in the cross-flow and in-line directions, respectively.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/full_schema_experiment-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/full_schema_experiment-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/full_schema_experiment-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/full_schema_experiment.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Schematic of experimental setup. A riser model was placed at the center of a supporting frame which was towed resulting in a uniform incoming flow profile on the model. &lt;/div&gt; &lt;p&gt;The riser model was constructed out of urethane rubber infused with tungsten powder. Specifically, Smooth-On PMC-724 urethane rubber was mixed with powdered tungsten to increase the model’s density and achieve a mass-ratio $m^* = \frac{\rho_{model}}{\rho_{H_2O}} = 3$. The mixture was poured into a right cylindrical mold with a fishing line placed along its centerline to provide tension. The model’s length was 890 mm with a 5 mm diameter. The length-to-diameter ratio of the model riser was L/D = 178. Equidistant markers were spray-painted red on the riser model resembling a zebra-patterning to enable motion tracking using cameras. Three underwater light fixtures were used to enhance visibility underwater. The model’s ends were clamped on the supporting frame and the model was separated from the frame by a distance much greater than the body’s diameter $O( &amp;gt; 10D)$.&lt;/p&gt; &lt;p&gt;The flexible cylinder was towed at 0.7 m/s resulting in a uniform incoming flow profile along the x direction, as shown in the schematic above. Recordings of the motions were captured at a resolution of 1080p (1920x1080 pixels) and 120 fps. The Reynolds number was $ Re \approx 3,500$. A visualization of the vibration is shown below (this is a gif of the actual vibration recording downsampled in time).&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/viv.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/viv.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/viv.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/viv.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Sample video recording of the cross-flow vibration of the flexible body (top) and tracking result of the motion (bottom). &lt;/div&gt; &lt;p&gt;Reconstruction of the motion was done using a machine vision framework leveraging Kalman filtering for multi-object tracking; for more information one may refer to Mentzelopoulos et al. (2024) &lt;d-cite key=&quot;mentzelopoulos2024reconstructing&quot;&gt;&lt;/d-cite&gt;. The high level process is shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Image_Processing.PNG-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Image_Processing.PNG-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Image_Processing.PNG-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/Image_Processing.PNG&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Pipeline for motion reconstruction. Red markers on the body were tracked across the video frames to obtain their motion. &lt;/div&gt; &lt;p&gt;A total of 36 locations along the span were marked red on the fexible body and their positions were tracked. The endpoints were fixed on the supporting frame and thus their displacement was zero.&lt;/p&gt; &lt;h2 id=&quot;vibration-data-as-images&quot;&gt;Vibration Data as Images&lt;/h2&gt; &lt;p&gt;The displacement of the vibrating body was recorded at 36 uniformly spaced locations along the body’s span and the video recordings were sampled at 120 fps. One may store the vibration data as 2D arrays of $N_{time}$ x $N_{sensor}$, where each row corresponds to a different time of the vibrating body’s displacement at $N_{sensor}$ locations.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/pixelated_viv-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/pixelated_viv-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/pixelated_viv-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/pixelated_viv.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/interpolated_viv-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/interpolated_viv-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/interpolated_viv-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/interpolated_viv.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Visualization of experimental data. The x-axis corresponds to sensor number (i.e. body location) and the y-axis corresponds to time step (i.e. $\Delta t = 1/fps = 1/120 \ sec$). The displacement normalized by the body&apos;s diameter is highlighted on the plane. On the left we visualize the vibration data stored in 2D arrays. On the right, we plot the interpolated values which make more intuitive sense as visualized flexible body vibrations (travelling wave response with peaks travelling to the right from location 0 to location 36). Lines parallel to the x-axis are &quot;snapshots&quot; of the flexible body vibrting in the direction perpendicular to the paper/screen (i.e. &quot;in and out of the page&quot;). &lt;/div&gt; &lt;p&gt;The stored vibration data are illustrated above and can easily be visualized and treated like single channel images! If necessary, scaling pixel values invertibly to an interval of choice, like [0,1] or [0, 255] requires just a few operations leveraging the maximum and minimum values of the data. In the images shown above, each row corresponds to a different time of the recorded vibration at all sampled locations. The time difference between consecutive time steps is $\Delta t = 1/fps = 1/120 \ sec$. The 36 “sensor locations” correspond to the uniformly spaced markers on the body (excluding the two endpoints) and thus they span approximately the full body length. Plotting the interpolated values of the array yileds a more intuitive interpretation of the vibrations. In the data shown above, a travelling wave (crests travelling) from location 0 to location 35 can be identified. For convenience, the data were stored in a single 4D array of size $N_{batch}$ x $1$ x $N_{time}$ x $N_{sensor} = N_{batch}$ x $1$ x $36$ x $36$, yielding hundreds of square arrays of size 36 x 36 which can be easily visualized and collected in batches for training models.&lt;/p&gt; &lt;h2 id=&quot;gen-ai-for-physical-vibration-data-using-variational-autoencoders&quot;&gt;Gen-AI for Physical Vibration Data using Variational Autoencoders&lt;/h2&gt; &lt;p&gt;In this section we focus on generating physical vibration data using generative-AI. We will attempt using a variational autoencoder (VAE) trained on the real experimental data described above to generate syntehtic data of the vibrations. We are interested in understanding whether the generated data preserve physicality and thus whether they can be used to train models and to understand the underlying physical generative process by studying the artificial data.&lt;/p&gt; &lt;p&gt;A VAE is a specific network architecture whose goal is to learn a probabilistic mapping from an input space to a low dimensional latent space and then back to the input space. The network architecture is comprised of an encoder network which maps data from the input space to the latent space and a decoder network which maps data from the latent space back to the input space. A schematic of the VAE used for this work is shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/VAE.PNG-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/VAE.PNG-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/VAE.PNG-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/VAE.PNG&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Schematic of the variational autoencoder architecture. &lt;/div&gt; &lt;p&gt;On a high level, the variational autoencoder acts just as a regular autoencoder, with the difference that the training ensures that the distribution of the data in the latent space is regular enough to enable a generative process when sampling from the latent space. That is, the minimized loss ensures that the distribution of the data over the latent dimensions, $q(z \mid x)$, is as close to a standard normal distribution as possible. We choose to assume a Gaussian prior on the latent space for our data since we will need to sample from it when decoding, a task which is nontrivial for arbitrary distributions. The decoder on the other hand will learn the distribution of the decoded variables, $p(x \mid z)$ given their latent representations.&lt;/p&gt; &lt;p&gt;The encoder architecture of choice was the following, accepting an input $x \in R^{36 \times 36}$:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;$x \rightarrow Linear (R^{36 \times 36}, R^{64}) \rightarrow ReLU \rightarrow Linear(R^{64}, R^{64}) \rightarrow ReLU \rightarrow x_{embedding}$&lt;/li&gt; &lt;li&gt;$x_{embedding} \rightarrow Linear(R^{64}, R^{5}) \rightarrow ReLU \rightarrow \mu \in R^5$&lt;/li&gt; &lt;li&gt;$x_{embedding} \rightarrow Linear(R^{64}, R^{5}) \rightarrow ReLU \rightarrow \sigma \in R^5$&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;where $\mu$ and $\sigma$ are the mean and variance of the posterior data distribution in the latent space. The decoder architecture was as follows accepting an input $z \in R^5$:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;$z \rightarrow Linear(R^{5}, R^{64}) \rightarrow ReLU \rightarrow Linear(R^{64}, R^{36 \times 36}) \rightarrow ReLU \rightarrow x^\prime$&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Training was done by maximizing the evidence lower bound (ELBO) on the experimental data and the outputs of the autoencoder. This is equivalent to minimizing the following loss (negative of ELBO).&lt;/p&gt; &lt;p&gt;$Loss_{ELBO} = - E_{q(z \mid x)} \bigg[ \log p(x\mid z) - D_{KL}(q(z \mid x )\mid \mid q(z)) \bigg]$&lt;/p&gt; &lt;p&gt;where $D_{KL}$ referes to the Kullback-Leibler divergence. Intuitively, maximizing the ELBO or minimizing the above $Loss_{ELBO}$, aims at maximizing the log-likelihood of the data given their representations in the latent space while minimizing the Kullback-Leibler divergence between the learned posterior of the data in the latent space and the prior assumption of a Gaussian distribution in the latent space. For the purposes of training, the data were scaled to be between [0, 1] in order to use binary cross entropy. The VAE was trained using Adam optimizer with a learning rate $lr = 0.01$. A step scheduler was set to decay the step by $\gamma = 1/2$ every 2,000 iterations. The training loss as a function of epoch is shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/VAE_loss.PNG-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/VAE_loss.PNG-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/VAE_loss.PNG-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/VAE_loss.PNG&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Training loss ($Loss_{ELBO}$) for the VAE on the input real VIV data. &lt;/div&gt; &lt;p&gt;Having trained the VAE, samples from the standard normal distribution in $R^5$ were drawn, decoded, and rescaled in order to generate synthetic VIV data. Three random samples are included below (top), along with three random samples of real data observed during experiments (bottom).&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_1-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_1-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_1-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_1.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_viv_3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Top row: Generated data using the VAE. Bottom row: Real data recorded from experiments. &lt;/div&gt; &lt;p&gt;Albeit the data generated data are certainly eye-pleasing, their promise begs the question of whether they preserve physicality. In order to address this question, we will examine whether a model trained on synthetic data can be used to predict real experimental data.&lt;/p&gt; &lt;h2 id=&quot;vivformer---a-transformer-architecture-for-viv&quot;&gt;VIVformer - A Transformer Architecture for VIV&lt;/h2&gt; &lt;p&gt;Tranformer network architectures have been widely used and are considered state of the art tools for various machine-learning tasks, particularly in natural language processing (NLP) and computer vision. The transformer architecture has become a cornerstone in deep learning and its applications span across all fields of engineering and science. In this section we will develop a transformer architecture to model and forecast the VIV of flexible bodies under the excitation of a hydrodynamic force. The transformer architecture used for this purpose is shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Transformer_architecture-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Transformer_architecture-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Transformer_architecture-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/Transformer_architecture.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; VIVformer: A transformer architecture for vortex-induced vibrations. &lt;/div&gt; &lt;p&gt;As shown schematically above, the architecture is comprised by various Residual-Attention modules followed by a final linear layer. The input to the VIVformer is a batch of vibration data as discussed in previous sections “Data Description” and “Visualizing the Data” with shape $N_{batch} \times N_{time-in} \times N_{sensor}$. The data are then passed through $N_{attn-layers}$ residual attention modules (these do not affect the shape of the input) and then scaled to the desired $N_{time-out}$ yielding an $N_{batch} \times N_{time-out} \times N_{sensor}$ output.&lt;/p&gt; &lt;p&gt;The residual-attention modules are the drivers of the data processing. These modules accept an input on which they perform two sequential tasks: 1. multi-head attention with a residual connection, and 2. pass the output of the multi-head attention module through a fully connected feedforward network (FFN) with a residual connection. The process can be visualized in the bottom left of the architecture schematic above.&lt;/p&gt; &lt;p&gt;The multi-head attention layer is comprised of $N_{heads}$ number of attention heads which calculate the self-attention of the input as proposed by Vaswani et al. (2017) &lt;d-cite key=&quot;vaswani2017attention&quot;&gt;&lt;/d-cite&gt;. The superposition of the input and output from the Multi-head attention module is then passed through the FFN. The FFN performs batch normalization, passes the output through a linear layer which sclaes the input to $mlp-dim$, then through a Gaussian Error Linear Unit (GeLU) activation and scales the output back to the original dimension by passing through a second linear layer.&lt;/p&gt; &lt;p&gt;For this work, we attempt using 20 time steps of input data in order to predict a single future time step. That is, the input to the VIVformer is 20 time steps of vibration data at 36 locations and we try to predict the next time step at the same locations. We note that the VIVformer is flexible in terms of the number of data-points in and out as well as the number of time steps in and out. Decreasing the input information (both spatial and temporal) while forecasting as much as possible in terms of spatial and temporal predictions is the recommended research direction for future work.&lt;/p&gt; &lt;p&gt;Although auto-regressive transformers are trending currently, for the purpose of forecasting vibrations this would lead to a pitfall of accumulating model errors and using them as inputs. In order to predict extended time horizons, simply adjusting the number of time-steps out would be the recommended course of action.&lt;/p&gt; &lt;p&gt;Since we are interested in making predictions of physical vibration data, a reasonable choice for our loss function is the Mean Square Error (MSE) between predicted and observed vibrations.&lt;/p&gt; &lt;h3 id=&quot;the-real-data-deal&quot;&gt;The Real (data) Deal&lt;/h3&gt; &lt;p&gt;In this section, the experimental data obtained during experiments were used to train the VIVformer. Specifically, 20 times steps at 36 locations were used as input and the next time step at the same locations was forecasted. In order to train the transformer, a dataset and dataloader was created to enable iterating over the following quantities:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Sequence_in: A 2D array of shape $N_{time-in} = 20 \times N_{sensor} = 36$.&lt;/li&gt; &lt;li&gt;Target = A 2D array of shape $N_{time-out} = 1 \times N_{sensor} = 36$.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Sequence_in refers to a single input to the VIVformer and Target is the expected output of the VIVformer. The sequences were collected in batches and then used for training. The model was trained on the MSE loss between input sequences and targets and the parameters were updated using the AdamW algorithm. The initial learning rate was set to $lr = 0.0001$ and a cosine annealing step scheduler was set to adjust the learning rate during training.&lt;/p&gt; &lt;p&gt;The training data were split into 80% for training and 20% for testing/validation. The sequences and targets of the training data were shuffled randomly and split in mini-batches while the validation data were not in order preserve the continuity of the vibrations when validating (important mainly for visualization purposes). The VIVformer was trained for a total of 50 epochs. The training results are shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; VIVformer training and validation loss trained on expreimental VIV data. &lt;/div&gt; &lt;p&gt;The training results show that the model is able to gradually decrease the MSE loss between targets and predictions. The loss on both the training set and the validation set seems to be decreasing and converging. We note that the VIVformer architecture used was heuristically optimized using a trial and error approach yielding 4 attention-residual layers, with 3 attention heads of 32 hidden units and a mlp-dim of 128 hidden units. In order to visualize the predicted vibrations, the forecasting as well as target data from a random sample of 36 continuous time steps from the validation set are shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Forecasting_real-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Forecasting_real-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Forecasting_real-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/Forecasting_real.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/targets_real-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/targets_real-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/targets_real-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/targets_real.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/difference_real-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/difference_real-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/difference_real-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/difference_real.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; From left to right: Forecasting of the VIV on a ramndomly selected set of 36 continuous points from the validation set (for visualization of the predicted vibrations), target data from real experiments, absolute difference between targets and predictions. &lt;/div&gt; &lt;p&gt;As is evident from the visualized vibration predictions (above), the model can predict unseen experimental to reasonable accuracy. The expected modes are forecasted and the output is continuous. In addition, the absolute difference is almost everywhere small, although some inaccuracies do occur in the predictions. A meaningful question to ask would be how well does the model predict the root mean square (RMS) of the vibrations which gives us a sense of the prediction capabilities on average. Below we plot the RMS of the forecasted as well as the experimentally observed vibrations.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/RMS_real-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/RMS_real-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/RMS_real-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/RMS_real.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Root mean square (RMS) of the forecasted VIV as well as the experimentally observed. RMS displacement is shown on the y-axis while span (body position) is shown on the x-axis. Reasonably accurate agreement is evident between forecasting and experimental observations. &lt;/div&gt; &lt;p&gt;The RMS result shown above shows that the model can predict the vibrations reasonably accurately on average. This is a particulary important result as it allows for direct benchmarking of this method against semi-empirical models which can only predict the average vibrations.&lt;/p&gt; &lt;p&gt;Although this is not recommended practice as we described earlier, we attempt to make auto-regressive predictions using our model. That is, we start with 20 time steps of recorded vibrations as input and then use the model’s predictions gradually as more and more inputs. By 20 time steps, there would be no observed data input to the model; it would only be predicting on its outputs. The auto-regressive results are shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; From left to right: Auto-regressive forecasting of the VIV on a randomly selected set of 20 continuous points from the validation set, target data from real experiments, absolute difference between targets and predictions. We show 20 time steps as we assess the models capability to predict as many time steps as it uses as input. &lt;/div&gt; &lt;p&gt;Albeit the mode shapes are consistent and remain physical looking, it appears that the magnitude of the response grows with time. As expected, errors accumulate and the forecasting becomes more and more innacurate as time evolves. This can also be clearly visualized in the absolute difference plot (on the very right) where the difference increases with time.&lt;/p&gt; &lt;p&gt;In conclusion, with respect to training on real data, the transformer is reasonably accurate in terms of forecasting future motions given a sample of the experimental data. The model trains well on the MSE loss and seems to converge in about 50 epochs. The wall time of training does not exceed a few minutes on a Google-Colab T4 GPU machine.&lt;/p&gt; &lt;h3 id=&quot;the-hyper-real-gen-ai-data-deal&quot;&gt;The hyper-Real (Gen-AI data) Deal&lt;/h3&gt; &lt;p&gt;So far we have established that the VIVformer architecture can model the physical VIV of flexible bodies reasonably accurately. This section will mainly focus on addressing the question of whether synthetic VIV data generated using our VAE are physical: that is, whether the physical properties of the vibrations are preserved during the generative process. In order to address this question, we will train the VIVformer on synthetic data only and then test the trained model on the real data.&lt;/p&gt; &lt;p&gt;Sixty arrays of 36 time steps at 36 locations (this can be though of as generating 60 images similar to the ones shown in previous section “Vibration Data as Images”) were generated using the VAE trained on real experimental data. The synthetic VIV data were then organized in input and target sequences by creating a dataset and dataloader to train the VIVformer. Training was done exactly as described in section “The Real (data) Deal” with the only difference being the training data; in this case training data were only synthetic. The same split of 80% for training/validation was used on the synthetic data. The training results were as follows.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; VIVformer training and validation loss trained on synthetic VIV data. &lt;/div&gt; &lt;p&gt;The VIVformer architecture seems to train on the sythetic data well. We note that both the training and validation data are from the synthetic dataset and as such we expect that they should be very similar data. We train for 50 epochs and the results seem to reach convergence. In this case we note that the error on the validation set (calculated during each epoch after optimizing on the VIVformer on the training set) seems to be be consistently smaller than the error on the training set (on average). We expect that eventually the training loss would become smaller than the validation loss although more training epochs would be required, perhaps leading to overfitting our model. Given the training results, we can be confident that the VIVformer has learned to predict the synthetic data well.&lt;/p&gt; &lt;p&gt;The more important question is however, whether the VIVformer trained on the synthetic data can accurately forecast the real experimental data. Below we show the predictions of the VIVformer on the real experimental data. We underscore that the VIVformer has NOT seen a single real datum during training: the model has trained on synthetic data only!&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/targets_synthetic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/targets_synthetic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/targets_synthetic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/targets_synthetic.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/difference_synthetic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/difference_synthetic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/difference_synthetic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/difference_synthetic.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; From left to right: Forecasting real VIV on a ramndomly selected set of 36 continuous points from the experiments, target data from experiments, absolute difference between targets and predictions. We note that the model has trained on synthetic data only and has never seen any real data! &lt;/div&gt; &lt;p&gt;Albeit the VIVformer has not seen any real data during training, it is surprisingly reasonably accurate in predicting real data! Although certainly not perfect, the predictions are sensible. The root-mean-square of the vibrations forecasted and observed are shown below.&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/RMS_synthetic-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/RMS_synthetic-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-VIVFormer/RMS_synthetic-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-VIVFormer/RMS_synthetic.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; Root mean square (RMS) of the forecasted VIV plotted on top of the RMS of the experimentally observed VIV. RMS displacement is shown on the y-axis while span (body position) is shown on the x-axis. Reasonably accurate agreement is evident between forecasting (after training on synthetic data only) and experimental observation. &lt;/div&gt; &lt;p&gt;As is evident in the above figure, the VIVformer can make reasonably accurate predictions of the RMS of the vibrations. Both the trends and amplitudes are reasonably accurately estimated.&lt;/p&gt; &lt;p&gt;Since the VIVformer has never trained on real data but can reasonably accurately predict them, we conclude that at least part of the physicality of the real data is preserved during the genrative process of the VAE. In a sense, the VAE can be though of not just as a generator which makes realistic-looking data but as a tool which learns the underlying structure and mechanisms of the physical process which generates the data; it can thus be used to better understand the data and perhaps even the physical generative process. We conclude that our VAE could certainly be used to augment scarse datasets of VIV data and in addition, that it is a powerful tool that could potentially be used to study the underlying mechanisms of the physical generative process by studying the artificially generated data!&lt;/p&gt; &lt;h2 id=&quot;conclusions&quot;&gt;Conclusions&lt;/h2&gt; &lt;p&gt;In this work, a data driven approach is employed to study physical system vibrations. Two main topics are explored: 1. Generative models for creating synthetic data similar to those obtained via physical processes and 2. employing transformers and the attention mechanism in order to model and forecast physical vibration data.&lt;/p&gt; &lt;p&gt;A variational autoencoder is trained on physical vortex-induced vibration data in order to generate sythetic data of the vibrations. The VAE is certainly able to generate data which resemble the physical data visually. Moreover, the generative process is confirmed to preserve the physicality of the data at least partially: a transformer trained on synthetic data only is capable of predicting real experimental data to reasonable accuracy. In that sense, the VAE can be viewed as a tool which learns the underlying physical traits of the data and can be used not only to augment physical datasets but also to simulate and understand the underlying physical mechanisms by examining synthetic data. With that being said, a recommended future research direction would be to examine whether the outputs of the VAE satisfy physical equations of interest and how those could perhaps be included as an additional loss term when training the VAE, i.e. having a physics-informed decoder network.&lt;/p&gt; &lt;p&gt;A transformer architecture for forecasting unsteady and nonstationary vortex-induced vibrations, the VIVformer, is developed. The VIVformer architecture combines multi-head attention modules and fully conncted network modules with residual connections in order to model and forecast the physical vibration time-series in both space and time. The optimized VIVformer architecture can forecast flexible body VIV in time-space to reasonable accuracy both instantaneously and on average. Testing the performance of the VIVformer while gradually decreasing the input information would yield a deeper understanding in the capabilities of the architecture; in addition, testing the extended time horizon predictions of the model would cretainly be a recommendation for future research.&lt;/p&gt; </content> </entry> <entry> <title>Recovering Latent Variables with VAEs despite Training Bias</title> <link href="https://deep-learning-mit.github.io/blog/2022/Recovering-Latent-Variables-with-VAEs-despite-Training-Bias/"/> <updated>2022-12-01T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2022/Recovering Latent Variables with VAEs despite Training Bias</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;In this age of abundant unlabeled data, unsupervised learning is capitalizing to power the generative models that are eating the world. Large Language Models and Diffusion models are focalizing most of the mainstream hype and therefore siphoning attention from other generative models. In particular, the Variational Autoencoders (VAE) is a model architecture that has been arguably overlooked in the recent onslaught for scaling transformer and diffusion-based models. VAEs are a promising latent variable model that allows for the learning of disentangled latent variables that model data.&lt;/p&gt; &lt;p&gt;As models scale in size, so is concern for the lack of interpretability associated with Neural Networks. Latent variable models offer a solution to this problem since they can learn variables that factorize the data generation process. VAEs are particularly well suited for learning latent variables in an unsupervised setting since they use an unsupervised learning objective and are regularized to learn disentangled encodings of our data. VAEs have been applied in a broad breadth of settings, such as classifying out-of-distribution data &lt;d-cite key=&quot;xiao2020likelihood&quot;&gt;&lt;/d-cite&gt;, fair decision making &lt;d-cite key=&quot;10.1145/3287560.3287564&quot;&gt;&lt;/d-cite&gt;, causal inference &lt;d-cite key=&quot;louizos2017causal&quot;&gt;&lt;/d-cite&gt;, representation learning, data augmentation, and others. Although VAEs have demonstrated the capability to recover ground truth latent variables, they often recover mock factors that can generate the training dataset but differ mechanistically from the ground truth data generation process. For instance, in lecture we demonstrated that a VAE trained on cartoon images of rivers learned to encode aggregate river curvature as a latent variable. The ground-truth data-generating random variables were an ordered set of Bernoulli random variables indicating if the river angeled its trajectory to the left or to the right at the particular timestep. The VAE’s shortcoming in recovering the real latent variables is expected from a Bayesian perspective, since we assume an isotropic Gaussian prior for continuous latent variables, and impose a bottleneck on the number of latent variables. Even though we do not recover the ground-truth data generating random variables, we learn latent variables that are qualitatively useful and capture macro latent phenomenons about the data. This segways into an interesting question—when do VAEs fail to recover useful latent variables?&lt;/p&gt; &lt;p&gt;In particular, we will choose the setting in which our training data is biased, but we still seek to learn insightful representations of the data. This is an especially well-motivated setting, since in unsupervised learning, we often do not have any guarantees about the distribution of our training data, yet we still aim to learn generalizable latent variables. It would be ideal if VAE’s ability to recover generalizable latent variables is robust to training bias. Relating to the cartoon example from lecture, if the probability parameter for the data-generating random variables was skewed so that right-curving rivers are more likely (i.e. \(P(\text{right}) = 0.9\) instead of \(P(\text{right}) = 0.5\)), would we still learn useful latent variables, or would latent variables instead model what we assume to be observational noise? If we learn the former, then we would still be able to sample in latent space to generate left-curving rivers. Intuitively, we will not be able to generate samples out of distribution with the training data (i.e. left curving rivers), however this may not be the case due to the way VAEs assume a prior. In this project, we will examine this setting to determine if higher regularization of the prior increases model robustness to training bias.&lt;/p&gt; &lt;h2 id=&quot;background&quot;&gt;Background&lt;/h2&gt; &lt;p&gt;VAEs are useful as encoders for downstream tasks, and as generative models. Compared to vanilla autoencoders, they offer significant advantages, since they provide some assurances regarding the distribution of its latent variables. Unlike VAEs, standard Autoencoders can have arbitrarily distributed embeddings, making them poor generative models, since there is no straightforward way to sample in latent space so that we generate samples in distribution with our training data. VAEs are similar to standard Autoencoders, however, they are trained with a modified loss function that ensures the learned embedding space is regularized towards an isotropic Gaussian (there exist alternative choices regarding which distribution we regularize towards, but Gaussian Mixture Models are the most popular as it stands due to their simple parameterization and empirical success). Additionally, instead of simply compressing the input with a neural network during the forward pass, the encoder of a VAE outputs a mean and covariance, defining a distribution from which we sample to obtain our latent variables.&lt;/p&gt; &lt;p&gt;Since the VAE loss function regularizes our latent variables towards an isotropic Gaussian, encoded data is both disentangled and interpretable. To use trained VAEs as generative models, we simply sample latent variables i.i.d. from the Gaussian distribution and pass it through the VAE decoder to generate samples in distribution with our training data. VAEs also offer significant advantages as encoders, since regularization encourages them to learn factored, disentangled representations of the data. Finally, VAEs are particularly well-suited for interpretability since regularization encourages each latent variable to capture a unique aspect of the data.&lt;/p&gt; &lt;h2 id=&quot;related-work&quot;&gt;Related Work&lt;/h2&gt; &lt;p&gt;There has been significant prior work studying regularization and choice of priors in VAEs. Notably, \(\beta\)-VAE &lt;d-cite key=&quot;higgins2017betavae&quot;&gt;&lt;/d-cite&gt; introduces the beta parameter to control the degree to which the VAE loss function penalizes the KL divergence of the latent variable distribution with the chosen prior (an isotropic Gaussian in their case). Higgins et al. demonstrate that introducing the beta parameter allows the VAE encoder to learn quantitatively more disentangled latent variables. They introduce a novel quantitative metric to evaluate the disentanglement of latent space and show that \(\beta\)-VAE improves on existing methods. Furthermore, they train a \(\beta\)-VAE on a dataset of faces (celebA) and qualitatively show that \(\beta\) regularization allows for the factorization of previously entangled latent variables such as azimuth and emotion.&lt;/p&gt; &lt;p&gt;There have been several iterations on \(\beta\)-VAE such as Factor-VAE &lt;d-cite key=&quot;kim2019disentangling&quot;&gt;&lt;/d-cite&gt;. Kim and Mnih point out that although \(\beta\) regularization improves disentanglement in embedding space, it does so at the cost of reconstruction quality. To reduce this trade-off and still encourage disentanglement, they introduce a term to the VAE loss function that penalizes the KL divergence between the joint distribution and the product of the marginals, instead of with an isotropic Gaussian as in \(\beta\)-VAE.&lt;/p&gt; &lt;p&gt;Selecting an appropriate data prior is fundamental when performing Bayesian inference. In vanilla VAEs, we often assume an isotropic Gaussian prior for our latent variables, however, this is not always a good assumption, making it difficult to converge &lt;d-cite key=&quot;miao2022on&quot;&gt;&lt;/d-cite&gt;. Miao et al. propose InteL-VAE, a VAE architecture capable of learning more flexible latent variables that can satisfy properties such as sparsity even when the data has significant distributional differences from a Gaussian. Their contributions allow for higher customizability of latent variables while bypassing many of the convergence issues commonplace with other methods that assume non-Gaussian priors.&lt;/p&gt; &lt;p&gt;Since that under ideal conditions, VAEs recover factorized latent variables, causal inference has become a standard setting for their application. Madras et al. propose structured causal models to recover hidden “causal effects” with the aim of improving fairness when presented with biased data &lt;d-cite key=&quot;10.1145/3287560.3287564&quot;&gt; &lt;/d-cite&gt;. They specify a framework where we want to recover the latent factors so that decision making in applications such as loan assignment and school admissions can be approached fairly. Admiddetly, Structured Causal Modeling (CSM) is arguably a better setting for futher work on our proposed research question. However, this field is largely outside of the scope of the course, so we will only observe that Madras et al. utilyze a model where causal factors, which are analaguous to our ground truth latent variables, affect a decision and an outcome, and that they utilyze a Bayesian framework to perform variational inference. Future iterations of our research should borrow methods from this field of Mathematics for maximum impact. Louizos et al. propose the Causal Effect VAE &lt;d-cite key=&quot;louizos2017causal&quot;&gt;&lt;/d-cite&gt;, marrying the adjacent fields and setting the stage for future research.&lt;/p&gt; &lt;p&gt;Although there is plenty of research adjacent to our particular question of interest, \(\beta\)-VAE investigates how \(\beta\)-regularization affects disentanglement, but not robustness to training bias. Other works that investigate the ability of latent variable models to recover the ground truth in the presence of training bias are not concerned with \(\beta\)-regularization. \(\beta\)-regularization has been shown to be effective, in addition to being extremely simple to implement, compared to other regularization techniques. Thus it is an ideal candidate for directed research on how regularization affects VAE robustness to training bias. Our question is novel, supported by adjacent research, and reasonable to implement with the resources available to an undergraduate student.&lt;/p&gt; &lt;h2 id=&quot;set-up-and-methods&quot;&gt;Set-up and Methods&lt;/h2&gt; &lt;h3 id=&quot;data&quot;&gt;Data&lt;/h3&gt; &lt;p&gt;More concretely, suppose that there exists a data generating function \(\mathcal{G}: Z \to X\) that generates our training dataset given random variables \(Z \sim p_{\text{data}}\). For simplicity, our data will be nxn grids of squares, where the intensity of each square is deterministically proportional to its respective random variable. To create our training dataset, we sample \(n^2\) random variables from an isotropic Gaussian distribution with mean \(\mu\) and covariance I. We then apply a sigmoid activation to the random variables so that values are in the range [0,1]. We then create a mn x mn image with mxm pixel grids for each random variable. Finally, we add Gaussian noise to the image. We choose n=3, m=7, and train a VAE for each value of \(\mu\) in the set {0, 1/2, 1, 3/2, … 5}.&lt;/p&gt; &lt;h4 id=&quot;training-data&quot;&gt;Training Data&lt;/h4&gt; &lt;p&gt;The following figure shows example training images before noising. Each row has 21 images drawn from the distribution defined by applying a sigmoid activation to a normally-distributed random variable with variance 1 and mean specified by the row index.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;And here are some images with some noise added.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training%20noised-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training%20noised-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training%20noised-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training%20noised.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h4 id=&quot;test-dataset&quot;&gt;Test Dataset&lt;/h4&gt; &lt;p&gt;To create our test dataset, we discretize the domain of latent variables by binning. We then enumerate all possible combinaation of latent variables, and generate corresponding images without adding noise. We restict the domain generating variables to {0.1, 0,5, 0.9}, and enumerate all possible combination. This yields a test dataset of 19683 images.&lt;/p&gt; &lt;h5 id=&quot;example-test-images&quot;&gt;Example Test Images&lt;/h5&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20test%20images-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20test%20images-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20test%20images-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20test%20images.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h3 id=&quot;training&quot;&gt;Training&lt;/h3&gt; &lt;p&gt;With this setup, the structure of our latent space matches that of the ground-truth latent variables, creating an appropriate setting in which to test how training bias and regularization affect the quality of learned models. Our pipeline is as follows. We train a VAE on its associated training set by maximizing the ELBO. After T training steps, we then train a linear projection head from the ground-truth latent variables to our learned latent variables. Even if we fully recover the ground-truth latent variables in our model, there is no assurance that we will not learn some permutation of the ground-truth latent variables. Thus in order to test if a particular latent variable was learned in our model, we must utilize such a projection to map from ground truth to learned latent variables, then decode the sample and evaluate the generated image.&lt;/p&gt; &lt;p&gt;Although the Mutual Information between the ground truth latent variables \(z \sim p_z\) and the learned latent variables \(\hat{z} \sim p_\hat{z}\) would be a more encompassing gauge if the VAE recovered the latent variables, using a linear projection in lieu of a Mutual Information estimator such as MINE &lt;d-cite key=&quot;belghazi2021mine&quot;&gt;&lt;/d-cite&gt; is justified for the following reasons. Namely, we assume an isotropic Gaussian during training, so a good VAE will learn disentangled latent variables that will be off by at most a rotation from the ground truth latent variables. Furthermore, we control the data generation process so that data is generated by \(n^2\) normally distributed random variables. Thus we can assume that a linear projection is sufficient to recover the ground truth latent variables from our learned latent variables. Furthermore, given the time constraints and resources allocated for this project, simply training a linear projection and taking the final mean squared error as a proxy for mutual information allows for simpler implementation.&lt;/p&gt; &lt;p&gt;We train with the Adam optimizer.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Hyperparameter&lt;/th&gt; &lt;th style=&quot;text-align: right&quot;&gt;Value&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;VAE training steps&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;10000&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Linear Projection Training Epochs&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;3&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Training noise mean&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;0&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Training noise variance&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;0.25&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;h2 id=&quot;results&quot;&gt;Results&lt;/h2&gt; &lt;h3 id=&quot;training-observations&quot;&gt;Training Observations&lt;/h3&gt; &lt;p&gt;During the unsupervised training phase where we train the various VAE models on their respective training sets, we observe that dataset choice and penalization of the KL divergence (beta hyperparameter) have consistent effects on the training curves. The following charts demonstrate that increased penalization of the KL divergence results in higher training loss, as well as nosier training loss and longer convergence times. This is expected since higher regularization directly increases the loss and its associated noise. We approximate the KL divergence by drawing one sample, which is highly variable, but tends to work emperically. We also observe that higher training bias (i.e. higher pre-activation mean of the pre-activation data generating latent variables) results in higher training loss. As we increase this training bias, it becomes harder and harder to disambiguate latent features from noise. Thus models learn uninterpretable latent variables and poor decoders that learn to trivially output the dominating color (white).&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-md mt-3 mt-md-0&quot;&gt; &lt;h6&gt;Training Curves Varying Training Distribution&lt;/h6&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/beta%20=%201-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/beta%20=%201-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/beta%20=%201-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/beta%20=%201.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-md mt-3 mt-md-0&quot;&gt; &lt;h6&gt;Training Curves Varying $\beta$-Regularization&lt;/h6&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/mu=%5B0%5D%20training%20curves-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/mu=%5B0%5D%20training%20curves-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/mu=%5B0%5D%20training%20curves-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/mu=%5B0%5D%20training%20curves.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;h3 id=&quot;evaluation&quot;&gt;Evaluation&lt;/h3&gt; &lt;p&gt;The following figure shows a heat map of our Proxy for measuring Mutual Information (which we will refer to as PMI) between the learned latent variables \(\hat{Z}\) and the true latent variables \(Z\).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20projection%20head-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20projection%20head-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20projection%20head-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20projection%20head.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;Note that when we randomly initialized a VAE and then trained linear projections from the ground truth latents to recovered latents, we achieved an PMI 0.1121 (averaged over 3 runs with identical training parameters). The heatmap shows that we almost completely recovered the ground-truth latent variables with low regularization and low training bias. As training bias increases, the model recovers less and less informative representations of the true latent variables.&lt;/p&gt; &lt;p&gt;Another heuristic that we can utilize to estimate the Mutual Information between the recovered latents and the ground truth latents is the mean squared error between \(\mathcal{G}(z)\) and \(\mathcal{D}_\text{VAE}(P(z))\) averaged over our test set, where P is the learned linear projection from \(Z \to \hat{Z}\) and \(\mathcal{D}_\text{VAE}\) is the VAE decoder. The following figure heatmap visualizes this figure.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20generating%20on%20test%20set-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20generating%20on%20test%20set-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20generating%20on%20test%20set-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20generating%20on%20test%20set.png&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;h2 id=&quot;conclusion-and-future-work&quot;&gt;Conclusion and Future Work&lt;/h2&gt; &lt;p&gt;From the collected data, it is visually clear that there exists a relationship between \(\beta\)-regularization and training bias. In both heat maps, there are reasonably well-defined diagonal level surfaces, indicating that there is some relationship between regularisation towards an isotropic Gaussian prior and robustness to training bias. Validation and further experiments are required to legitimize this conclusion, however, these experiments are an indication that conscious regularization can be a useful technique to mitigate training biases of a particular form. At this point, further work is required to interpret the results, since it is not clear why we seem to observe inverse relationships between the \(\beta\)-regularization and training bias when we involve the decoder.&lt;/p&gt; &lt;p&gt;It is also worth noting that during pretraining, VAEs were trained for a fixed number of training steps, and not until convergence. Thus it is highly plausible that models with higher \(\beta\)-regularization (i.e. models with \(\beta &amp;gt; 1\)) were not trained to completion, and therefore can not be fairly evaluated with mutual information estimators without further training. Given my computational and temporal constraints, it was not reasonable to run experiments with longer training. Future work will have to validate my findings by pretraining for longer and testing a finer resolution of \(\beta\) parameters. Finally, it will be interesting to expand this work to more interesting datasets such as celebA and inject training bias by resampling the dataset according to some variables such as hair color or skin tone. Once we move beyond the assumptions assured by pet data, we can reevaluate what relationships hold true as we gradually add the complexity inherent to the real world.&lt;/p&gt; </content> </entry> <entry> <title>Recurrent Recommender System with Incentivized Search</title> <link href="https://deep-learning-mit.github.io/blog/2022/proposal_JingpengHong/"/> <updated>2022-12-01T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2022/proposal_JingpengHong</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;p&gt;Numerous deep learning based recommender systems have been proposed recently &lt;d-cite key=&quot;10.1145/3285029&quot;&gt;&lt;/d-cite&gt;. Especially, the sequential structure of session or click-logs are highly suitable for the inductive biases provided by recurrent/convolutional neural networks &lt;d-cite key=&quot;hidasi2016sessionbased&quot;&gt;&lt;/d-cite&gt;. In such setting, the input of the network is a sequence of consumers’ search behavior, while the output is the predicted preference of the items, i.e. the likelihood of being the next in the session for each item. The ultimate goal is to pinpoint the optimal product for the consumer, thereby increasing sales. An example of where this could be applied is the “featured product” on platforms like Amazon.&lt;/p&gt; &lt;p&gt;However, a challenge with this model is the sparsity of data. It’s well-known that the products in retail has the “long-tail” feature. Only a small fraction, say 5%, of a site’s products are ever browsed or bought by customers, leaving no data on the remaining products. Additionally, customer sessions tend to be brief, limiting the amount of information we can get from any one individual. This issue is particularly acute for “data-hungry” models, which may not have sufficient training data with enough variation to accurately match products with customers.&lt;/p&gt; &lt;p&gt;My proposed solution to this issue is to recommend products that also encourage further exploration. Economic studies have shown that certain types of information structure can motivate customers to consider more options, harnessing the “wisdom of crowds” &lt;d-cite key=&quot;kremer2014implementing&quot;&gt;&lt;/d-cite&gt;&lt;d-cite key=&quot;che2018recommender&quot;&gt;&lt;/d-cite&gt;. Imagine two products: recommending the first leads to a 5% purchase likelihood, while the second has a 4% chance. But the second item prompts the customer to look at 5 additional products. This extra data allows our model to learn more, potentially enhancing recommendations for this and other customers in the future. Therefore, we might choose to recommend the second product to generate more user-driven training data.&lt;/p&gt; &lt;p&gt;In this project, we consider the multi-task learning that achieves better performance along the entire customer journey. The conventional conversion rate based model estimates&lt;/p&gt; \[P(conversion|click, impression, u_i, v_j)\] &lt;p&gt;where \(u_i\) are users’ features and \(v_j\) are items’ features.&lt;/p&gt; &lt;p&gt;We decompose the conversion rate into&lt;/p&gt; \[P(conversion, click|impression, u_i, v_j) = P(click|impression, u_i, v_j) \times P(convsersion|click, u_i, v_j)\] &lt;p&gt;Hence, we have two auxiliary tasks for predicting both the click-through rate and the conversion rate. Such approach has two advantages. First, the task for estimating the click-through rate generally has richer training data because we train on dataset with all impressions instead of the subsample with purchase. Second, we recommend products with both high probability of clicking and purchasing, leading to more training data points in future time periods. This can help us tackle the challenge of data sparsity &lt;d-cite key=&quot;ma2018entire&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;h2 id=&quot;literature&quot;&gt;Literature&lt;/h2&gt; &lt;p&gt;Recommender Systems are usually classified into three categories &lt;d-cite key=&quot;1423975&quot;&gt;&lt;/d-cite&gt;: (i) collaborative filtering (ii) content-based ,and (iii) hybrid.&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Collaborative filtering. The input for the algorithm can be [User, Item, Outcome, Timestamp]. The task is to complete the matrix \(R\), where each column is an item and each row is a user, with the majority of missing elements. The memory based collaborative filtering finds pairs of user \(i\) and \(i&apos;\) using similarity metrics The model based collaborative filtering decomposes \(R^{m\times n} = U^{m\times k}I^{k\times n}\) using matrix factorization, where \(k\) is the dimension of latent factors.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Content-based. The input for the algorithm can be [User features, Item features, Outcome]. The task is to predict \(y=f(u_i, v_j)\), where \(y\) is the outcome and \(u_i\) and \(v_j\) are features of users and items respectively.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Hybrid. we consider a simple linear model &lt;d-cite key=&quot;1423975&quot;&gt;&lt;/d-cite&gt;:&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; \[r_{ij} = x_{ij}\mu+z_i\gamma_j+w_j\lambda_i+\epsilon_{ij}\] &lt;p&gt;where \(x_{ij}\) is the collaborative filtering component indicating the interaction, \(z_i\) are users’ features and \(w_j\) are items’ feature. \(\gamma_j\) and \(\lambda_i\) are random coefficients. We can also apply matrix factorization to reduce the dimension of interaction matrix \(x_{ij}\). A recent application in marketing can be found in &lt;d-cite key=&quot;10.1145/3523227.3547379&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;The core idea in collaborative filtering is “Similar consumers like similar products”. The similarity is defined on consumers’ revealed preference. However, the content-based approach implicitly assumes users and items should be similar if they are neighborhoods in feature space, which may or may not be true. The limitation of collaborative filtering is that we require a sufficient amount of interaction data, which is hard if we consider the sparsity and cold start problems.&lt;/p&gt; &lt;p&gt;Moreover, deep learning based recommender systems have gained significant attention by capturing the non-linear and non-trivial user-item relationships, and enable the codification of more complex abstractions as data representations in the higher layers. A nice survey for deep learning based recommender system can be found in &lt;d-cite key=&quot;10.1145/3285029&quot;&gt;&lt;/d-cite&gt;. Deep learning based recommender system can have several strength compared to conventional models:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;It’s possible to capture complex non-linear user-item interactions. For example, when we model collaborative filtering by matrix factorization, we essentially use the low-dimensional linear model. The non-linear property makes it possible to deal with complex interaction patterns and precisely reflect user’s preference &lt;d-cite key=&quot;HORNIK1989359&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Architecture, such as RNN and CNN, are widely applicable and flexible in mining sequential structure in data. For example, &lt;d-cite key=&quot;10.1145/2988450.2988451&quot;&gt;&lt;/d-cite&gt; presented a co-evolutionary latent model to capture the co-evolution nature of users’ and items’ latent features. There are works dealing with the temporal dynamics of interactions and sequential patterns of user behaviours using CNN or RNN &lt;d-cite key=&quot;tang2018personalized&quot;&gt;&lt;/d-cite&gt; &lt;d-cite key=&quot;10.1145/2959100.2959167&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Representation learning can be an effective method to learn the latent factor models that are widely used in recommender systems. There are works that incorporate methods such as autoencoder in traditional recommender system frameworks we summarize above. For example, autoencoder based collaborative filtering &lt;d-cite key=&quot;10.1145/2740908.2742726&quot;&gt;&lt;/d-cite&gt;, and adversarial network (GAN) based recommendation &lt;d-cite key=&quot;10.1145/3077136.3080786&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;h2 id=&quot;model&quot;&gt;Model&lt;/h2&gt; &lt;p&gt;We implement the multi-task learning similar to &lt;d-cite key=&quot;ma2018entire&quot;&gt;&lt;/d-cite&gt;:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-proposal_JingpengHong/multitask-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-proposal_JingpengHong/multitask-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-10-proposal_JingpengHong/multitask-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-10-proposal_JingpengHong/multitask.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;However, we differ from the model in &lt;d-cite key=&quot;ma2018entire&quot;&gt;&lt;/d-cite&gt; in two ways:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;For user field, we implement RNN to deal with the sequential clickstream data instead of simple MLP.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;We define the loss function over the over samples of all impressions. The loss of conversion rate task and the loss of click-through rate task will not be used separately because both of them are based on subsamples (conditional on click and conditional on purchase).&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; \[L(\theta_{click}, \theta_{convsersion})=\sum_{i=1}^N l(click_i, f(u_i, v_j))+\sum_{i=1}^N l(click_i, purchase_i, f(u_i, v_j))\] &lt;h2 id=&quot;experiment&quot;&gt;Experiment&lt;/h2&gt; &lt;p&gt;The dataset we use is a random subsample from &lt;d-cite key=&quot;ma2018entire&quot;&gt;&lt;/d-cite&gt;, which is the traffic logs from Taobao’s recommender system. We do a 1% random sampling, though the public dataset in &lt;d-cite key=&quot;ma2018entire&quot;&gt;&lt;/d-cite&gt; has already been a 1% random sampling of the raw data. The summary statistics of the data can be found in &lt;d-cite key=&quot;ma2018entire&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;For the performance metrics, we use Area under the ROC curve (AUC).&lt;/p&gt; &lt;p&gt;Several benchmark models we use for comparsion:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;DeepFM &lt;d-cite key=&quot;10.5555/3172077.3172127&quot;&gt;&lt;/d-cite&gt;. This is a factorization-machine based neural network for click-through rate prediction. In my setting, I consider it as a single-task model with MLP structure.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;MMOE &lt;d-cite key=&quot;10.1145/3219819.3220007&quot;&gt;&lt;/d-cite&gt;. This is the multi-task setting. However, since the usecase is MovieLens, where two tasks are “finish” and “like”, it doesn’t consider the type of sequential data. In my setting, I consider it as a multi-task model with MLP structure.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;xDeepFM &lt;d-cite key=&quot;10.1145/3219819.3220023&quot;&gt;&lt;/d-cite&gt;. This model Combines both explicit and implicit feature interactions for recommender systems using a novel Compressed Interaction Network(CIN), which shares some functionalities with CNNs and RNNs. In my setting, I consider it as a single-task model with RNN/CNN structure.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Our Model, a multi-task model with RNN/CNN structure.&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;Results:&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Model&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;test AUC&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;test click AUC&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;test conversion AUC&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;DeepFM&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.3233&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt; &lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;MMOE&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt; &lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.5303&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.6053&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;xDeepFM&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.4093&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt; &lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt; &lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;Ours&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt; &lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.5505&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;0.6842&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; </content> </entry> <entry> <title>Sample Blog Post</title> <link href="https://deep-learning-mit.github.io/blog/2022/distill-example/"/> <updated>2022-12-01T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2022/distill-example</id> <content type="html">&lt;h2 id=&quot;equations&quot;&gt;Equations&lt;/h2&gt; &lt;p&gt;This theme supports rendering beautiful math in inline and display modes using &lt;a href=&quot;https://www.mathjax.org/&quot;&gt;MathJax 3&lt;/a&gt; engine. You just need to surround your math expression with &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;$$&lt;/code&gt;, like &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;$$ E = mc^2 $$&lt;/code&gt;. If you leave it inside a paragraph, it will produce an inline expression, just like \(E = mc^2\).&lt;/p&gt; &lt;p&gt;To use display mode, again surround your expression with &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;$$&lt;/code&gt; and place it as a separate paragraph. Here is an example:&lt;/p&gt; \[\left( \sum_{k=1}^n a_k b_k \right)^2 \leq \left( \sum_{k=1}^n a_k^2 \right) \left( \sum_{k=1}^n b_k^2 \right)\] &lt;p&gt;Note that MathJax 3 is &lt;a href=&quot;https://docs.mathjax.org/en/latest/upgrading/whats-new-3.0.html&quot;&gt;a major re-write of MathJax&lt;/a&gt; that brought a significant improvement to the loading and rendering speed, which is now &lt;a href=&quot;http://www.intmath.com/cg5/katex-mathjax-comparison.php&quot;&gt;on par with KaTeX&lt;/a&gt;.&lt;/p&gt; &lt;h2 id=&quot;images-and-figures&quot;&gt;Images and Figures&lt;/h2&gt; &lt;p&gt;Its generally a better idea to avoid linking to images hosted elsewhere - links can break and you might face losing important information in your blog post. To include images in your submission in this way, you must do something like the following:&lt;/p&gt; &lt;div class=&quot;language-markdown highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;{% include figure.html path=&quot;assets/img/2022-12-01-distill-example/iclr.png&quot; class=&quot;img-fluid&quot; %} &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;which results in the following image:&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/iclr-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/iclr-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/iclr-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-12-01-distill-example/iclr.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;To ensure that there are no namespace conflicts, you must save your asset to your unique directory &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;/assets/img/2023-05-01-[SUBMISSION NAME]&lt;/code&gt; within your submission.&lt;/p&gt; &lt;p&gt;Please avoid using the direct markdown method of embedding images; they may not be properly resized. Some more complex ways to load images (note the different styles of the shapes/shadows):&lt;/p&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/9-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/9-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/9-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-12-01-distill-example/9.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/7-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/7-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/7-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-12-01-distill-example/7.jpg&quot; class=&quot;img-fluid rounded z-depth-1&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;caption&quot;&gt; A simple, elegant caption looks good between image rows, after each row, or doesn&apos;t have to be there at all. &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/8-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/8-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/8-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-12-01-distill-example/8.jpg&quot; class=&quot;img-fluid z-depth-2&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/10-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/10-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/10-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-12-01-distill-example/10.jpg&quot; class=&quot;img-fluid z-depth-2&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;div class=&quot;row mt-3&quot;&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/11-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/11-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/11-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-12-01-distill-example/11.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/12-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/12-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/12-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-12-01-distill-example/12.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;div class=&quot;col-sm mt-3 mt-md-0&quot;&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/7-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/7-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-12-01-distill-example/7-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-12-01-distill-example/7.jpg&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;/div&gt; &lt;/div&gt; &lt;h3 id=&quot;interactive-figures&quot;&gt;Interactive Figures&lt;/h3&gt; &lt;p&gt;Here’s how you could embed interactive figures that have been exported as HTML files. Note that we will be using plotly for this demo, but anything built off of HTML should work (&lt;strong&gt;no extra javascript is allowed!&lt;/strong&gt;). All that’s required is for you to export your figure into HTML format, and make sure that the file exists in the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;assets/html/[SUBMISSION NAME]/&lt;/code&gt; directory in this repository’s root directory. To embed it into any page, simply insert the following code anywhere into your page.&lt;/p&gt; &lt;div class=&quot;language-markdown highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;{% include [FIGURE_NAME].html %} &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;For example, the following code can be used to generate the figure underneath it.&lt;/p&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pandas&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;as&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pd&lt;/span&gt; &lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;plotly.express&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;as&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;px&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;df&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pd&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;read_csv&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;https://raw.githubusercontent.com/plotly/datasets/master/earthquakes-23k.csv&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;fig&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;px&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;density_mapbox&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;df&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;lat&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;Latitude&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;lon&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;Longitude&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;z&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;Magnitude&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;radius&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;10&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;center&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;dict&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lat&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;lon&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;180&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;),&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;zoom&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mapbox_style&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;stamen-terrain&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;fig&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;show&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;fig&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nf&quot;&gt;write_html&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;./assets/html/2022-12-01-distill-example/plotly_demo_1.html&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;And then include it with the following:&lt;/p&gt; &lt;div class=&quot;language-html highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;nt&quot;&gt;&amp;lt;div&lt;/span&gt; &lt;span class=&quot;na&quot;&gt;class=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;l-page&quot;&lt;/span&gt;&lt;span class=&quot;nt&quot;&gt;&amp;gt;&lt;/span&gt; &lt;span class=&quot;nt&quot;&gt;&amp;lt;iframe&lt;/span&gt; &lt;span class=&quot;na&quot;&gt;src=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;{{ &apos;assets/html/2022-12-01-distill-example/plotly_demo_1.html&apos; | relative_url }}&quot;&lt;/span&gt; &lt;span class=&quot;na&quot;&gt;frameborder=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;0&apos;&lt;/span&gt; &lt;span class=&quot;na&quot;&gt;scrolling=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;no&apos;&lt;/span&gt; &lt;span class=&quot;na&quot;&gt;height=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;600px&quot;&lt;/span&gt; &lt;span class=&quot;na&quot;&gt;width=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;100%&quot;&lt;/span&gt;&lt;span class=&quot;nt&quot;&gt;&amp;gt;&amp;lt;/iframe&amp;gt;&lt;/span&gt; &lt;span class=&quot;nt&quot;&gt;&amp;lt;/div&amp;gt;&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Voila!&lt;/p&gt; &lt;div class=&quot;l-page&quot;&gt; &lt;iframe src=&quot;/staging/assets/html/2022-12-01-distill-example/plotly_demo_1.html&quot; frameborder=&quot;0&quot; scrolling=&quot;no&quot; height=&quot;600px&quot; width=&quot;100%&quot;&gt;&lt;/iframe&gt; &lt;/div&gt; &lt;h2 id=&quot;citations&quot;&gt;Citations&lt;/h2&gt; &lt;p&gt;Citations are then used in the article body with the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;&amp;lt;d-cite&amp;gt;&lt;/code&gt; tag. The key attribute is a reference to the id provided in the bibliography. The key attribute can take multiple ids, separated by commas.&lt;/p&gt; &lt;p&gt;The citation is presented inline like this: &lt;d-cite key=&quot;gregor2015draw&quot;&gt;&lt;/d-cite&gt; (a number that displays more information on hover). If you have an appendix, a bibliography is automatically created and populated in it.&lt;/p&gt; &lt;p&gt;Distill chose a numerical inline citation style to improve readability of citation dense articles and because many of the benefits of longer citations are obviated by displaying more information on hover. However, we consider it good style to mention author last names if you discuss something at length and it fits into the flow well — the authors are human and it’s nice for them to have the community associate them with their work.&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;footnotes&quot;&gt;Footnotes&lt;/h2&gt; &lt;p&gt;Just wrap the text you would like to show up in a footnote in a &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;&amp;lt;d-footnote&amp;gt;&lt;/code&gt; tag. The number of the footnote will be automatically generated.&lt;d-footnote&gt;This will become a hoverable footnote.&lt;/d-footnote&gt;&lt;/p&gt; &lt;hr /&gt; &lt;h2 id=&quot;code-blocks&quot;&gt;Code Blocks&lt;/h2&gt; &lt;p&gt;This theme implements a built-in Jekyll feature, the use of Rouge, for syntax highlighting. It supports more than 100 languages. This example is in C++. All you have to do is wrap your code in a liquid tag:&lt;/p&gt; &lt;p&gt;{% highlight c++ linenos %} &lt;br /&gt; code code code &lt;br /&gt; {% endhighlight %}&lt;/p&gt; &lt;p&gt;The keyword &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;linenos&lt;/code&gt; triggers display of line numbers. You can try toggling it on or off yourself below:&lt;/p&gt; &lt;figure class=&quot;highlight&quot;&gt;&lt;pre&gt;&lt;code class=&quot;language-c--&quot; data-lang=&quot;c++&quot;&gt;&lt;span class=&quot;kt&quot;&gt;int&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;main&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;kt&quot;&gt;int&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;argc&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;kt&quot;&gt;char&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;const&lt;/span&gt; &lt;span class=&quot;err&quot;&gt;\&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;argv&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[])&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;{&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;string&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;myString&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;cout&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;s&quot;&gt;&quot;input a string: &quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;getline&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;cin&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;myString&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;);&lt;/span&gt; &lt;span class=&quot;kt&quot;&gt;int&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;length&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;myString&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;length&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;();&lt;/span&gt; &lt;span class=&quot;kt&quot;&gt;char&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;charArray&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;new&lt;/span&gt; &lt;span class=&quot;kt&quot;&gt;char&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;length&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;];&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;charArray&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;myString&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;for&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;kt&quot;&gt;int&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;i&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;i&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;length&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;++&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;i&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;){&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;cout&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;charArray&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;i&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;s&quot;&gt;&quot; &quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;}&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;}&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/figure&gt; &lt;hr /&gt; &lt;h2 id=&quot;diagrams&quot;&gt;Diagrams&lt;/h2&gt; &lt;p&gt;This theme supports generating various diagrams from a text description using &lt;a href=&quot;https://github.com/zhustec/jekyll-diagrams&quot; target=&quot;\_blank&quot;&gt;jekyll-diagrams&lt;/a&gt; plugin. Below, we generate a few examples of such diagrams using languages such as &lt;a href=&quot;https://mermaid-js.github.io/mermaid/&quot; target=&quot;\_blank&quot;&gt;mermaid&lt;/a&gt;, &lt;a href=&quot;https://plantuml.com/&quot; target=&quot;\_blank&quot;&gt;plantuml&lt;/a&gt;, &lt;a href=&quot;https://vega.github.io/vega-lite/&quot; target=&quot;\_blank&quot;&gt;vega-lite&lt;/a&gt;, etc.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Note:&lt;/strong&gt; different diagram-generation packages require external dependencies to be installed on your machine. Also, be mindful of that because of diagram generation the fist time you build your Jekyll website after adding new diagrams will be SLOW. For any other details, please refer to &lt;a href=&quot;https://github.com/zhustec/jekyll-diagrams&quot; target=&quot;\_blank&quot;&gt;jekyll-diagrams&lt;/a&gt; README.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Note:&lt;/strong&gt; This is not supported for local rendering!&lt;/p&gt; &lt;p&gt;The diagram below was generated by the following code:&lt;/p&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;{% mermaid %} sequenceDiagram participant John participant Alice Alice-&amp;gt;&amp;gt;John: Hello John, how are you? John--&amp;gt;&amp;gt;Alice: Great! {% endmermaid %} &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;div class=&quot;jekyll-diagrams diagrams mermaid&quot;&gt; &lt;svg id=&quot;mermaid-1702923485927&quot; width=&quot;100%&quot; xmlns=&quot;http://www.w3.org/2000/svg&quot; height=&quot;100%&quot; style=&quot;max-width:450px;&quot; viewBox=&quot;-50 -10 450 231&quot;&gt;&lt;style&gt;#mermaid-1702923485927 .label{font-family:trebuchet ms,verdana,arial;color:#333}#mermaid-1702923485927 .node circle,#mermaid-1702923485927 .node ellipse,#mermaid-1702923485927 .node polygon,#mermaid-1702923485927 .node rect{fill:#ececff;stroke:#9370db;stroke-width:1px}#mermaid-1702923485927 .node.clickable{cursor:pointer}#mermaid-1702923485927 .arrowheadPath{fill:#333}#mermaid-1702923485927 .edgePath .path{stroke:#333;stroke-width:1.5px}#mermaid-1702923485927 .edgeLabel{background-color:#e8e8e8}#mermaid-1702923485927 .cluster rect{fill:#ffffde!important;stroke:#aa3!important;stroke-width:1px!important}#mermaid-1702923485927 .cluster text{fill:#333}#mermaid-1702923485927 div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:trebuchet ms,verdana,arial;font-size:12px;background:#ffffde;border:1px solid #aa3;border-radius:2px;pointer-events:none;z-index:100}#mermaid-1702923485927 .actor{stroke:#ccf;fill:#ececff}#mermaid-1702923485927 text.actor{fill:#000;stroke:none}#mermaid-1702923485927 .actor-line{stroke:grey}#mermaid-1702923485927 .messageLine0{marker-end:&quot;url(#arrowhead)&quot;}#mermaid-1702923485927 .messageLine0,#mermaid-1702923485927 .messageLine1{stroke-width:1.5;stroke-dasharray:&quot;2 2&quot;;stroke:#333}#mermaid-1702923485927 #arrowhead{fill:#333}#mermaid-1702923485927 #crosshead path{fill:#333!important;stroke:#333!important}#mermaid-1702923485927 .messageText{fill:#333;stroke:none}#mermaid-1702923485927 .labelBox{stroke:#ccf;fill:#ececff}#mermaid-1702923485927 .labelText,#mermaid-1702923485927 .loopText{fill:#000;stroke:none}#mermaid-1702923485927 .loopLine{stroke-width:2;stroke-dasharray:&quot;2 2&quot;;marker-end:&quot;url(#arrowhead)&quot;;stroke:#ccf}#mermaid-1702923485927 .note{stroke:#aa3;fill:#fff5ad}#mermaid-1702923485927 .noteText{fill:#000;stroke:none;font-family:trebuchet ms,verdana,arial;font-size:14px}#mermaid-1702923485927 .section{stroke:none;opacity:.2}#mermaid-1702923485927 .section0{fill:rgba(102,102,255,.49)}#mermaid-1702923485927 .section2{fill:#fff400}#mermaid-1702923485927 .section1,#mermaid-1702923485927 .section3{fill:#fff;opacity:.2}#mermaid-1702923485927 .sectionTitle0,#mermaid-1702923485927 .sectionTitle1,#mermaid-1702923485927 .sectionTitle2,#mermaid-1702923485927 .sectionTitle3{fill:#333}#mermaid-1702923485927 .sectionTitle{text-anchor:start;font-size:11px;text-height:14px}#mermaid-1702923485927 .grid .tick{stroke:#d3d3d3;opacity:.3;shape-rendering:crispEdges}#mermaid-1702923485927 .grid path{stroke-width:0}#mermaid-1702923485927 .today{fill:none;stroke:red;stroke-width:2px}#mermaid-1702923485927 .task{stroke-width:2}#mermaid-1702923485927 .taskText{text-anchor:middle;font-size:11px}#mermaid-1702923485927 .taskTextOutsideRight{fill:#000;text-anchor:start;font-size:11px}#mermaid-1702923485927 .taskTextOutsideLeft{fill:#000;text-anchor:end;font-size:11px}#mermaid-1702923485927 .taskText0,#mermaid-1702923485927 .taskText1,#mermaid-1702923485927 .taskText2,#mermaid-1702923485927 .taskText3{fill:#fff}#mermaid-1702923485927 .task0,#mermaid-1702923485927 .task1,#mermaid-1702923485927 .task2,#mermaid-1702923485927 .task3{fill:#8a90dd;stroke:#534fbc}#mermaid-1702923485927 .taskTextOutside0,#mermaid-1702923485927 .taskTextOutside1,#mermaid-1702923485927 .taskTextOutside2,#mermaid-1702923485927 .taskTextOutside3{fill:#000}#mermaid-1702923485927 .active0,#mermaid-1702923485927 .active1,#mermaid-1702923485927 .active2,#mermaid-1702923485927 .active3{fill:#bfc7ff;stroke:#534fbc}#mermaid-1702923485927 .activeText0,#mermaid-1702923485927 .activeText1,#mermaid-1702923485927 .activeText2,#mermaid-1702923485927 .activeText3{fill:#000!important}#mermaid-1702923485927 .done0,#mermaid-1702923485927 .done1,#mermaid-1702923485927 .done2,#mermaid-1702923485927 .done3{stroke:grey;fill:#d3d3d3;stroke-width:2}#mermaid-1702923485927 .doneText0,#mermaid-1702923485927 .doneText1,#mermaid-1702923485927 .doneText2,#mermaid-1702923485927 .doneText3{fill:#000!important}#mermaid-1702923485927 .crit0,#mermaid-1702923485927 .crit1,#mermaid-1702923485927 .crit2,#mermaid-1702923485927 .crit3{stroke:#f88;fill:red;stroke-width:2}#mermaid-1702923485927 .activeCrit0,#mermaid-1702923485927 .activeCrit1,#mermaid-1702923485927 .activeCrit2,#mermaid-1702923485927 .activeCrit3{stroke:#f88;fill:#bfc7ff;stroke-width:2}#mermaid-1702923485927 .doneCrit0,#mermaid-1702923485927 .doneCrit1,#mermaid-1702923485927 .doneCrit2,#mermaid-1702923485927 .doneCrit3{stroke:#f88;fill:#d3d3d3;stroke-width:2;cursor:pointer;shape-rendering:crispEdges}#mermaid-1702923485927 .activeCritText0,#mermaid-1702923485927 .activeCritText1,#mermaid-1702923485927 .activeCritText2,#mermaid-1702923485927 .activeCritText3,#mermaid-1702923485927 .doneCritText0,#mermaid-1702923485927 .doneCritText1,#mermaid-1702923485927 .doneCritText2,#mermaid-1702923485927 .doneCritText3{fill:#000!important}#mermaid-1702923485927 .titleText{text-anchor:middle;font-size:18px;fill:#000}#mermaid-1702923485927 g.classGroup text{fill:#9370db;stroke:none;font-family:trebuchet ms,verdana,arial;font-size:10px}#mermaid-1702923485927 g.classGroup rect{fill:#ececff;stroke:#9370db}#mermaid-1702923485927 g.classGroup line{stroke:#9370db;stroke-width:1}#mermaid-1702923485927 .classLabel .box{stroke:none;stroke-width:0;fill:#ececff;opacity:.5}#mermaid-1702923485927 .classLabel .label{fill:#9370db;font-size:10px}#mermaid-1702923485927 .relation{stroke:#9370db;stroke-width:1;fill:none}#mermaid-1702923485927 #compositionEnd,#mermaid-1702923485927 #compositionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-1702923485927 #aggregationEnd,#mermaid-1702923485927 #aggregationStart{fill:#ececff;stroke:#9370db;stroke-width:1}#mermaid-1702923485927 #dependencyEnd,#mermaid-1702923485927 #dependencyStart,#mermaid-1702923485927 #extensionEnd,#mermaid-1702923485927 #extensionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-1702923485927 .branch-label,#mermaid-1702923485927 .commit-id,#mermaid-1702923485927 .commit-msg{fill:#d3d3d3;color:#d3d3d3}&lt;/style&gt;&lt;style&gt;#mermaid-1702923485927 { color: rgb(0, 0, 0); font: normal normal 400 normal 16px / normal &quot;Times New Roman&quot;; }&lt;/style&gt;&lt;g&gt;&lt;/g&gt;&lt;g&gt;&lt;line id=&quot;actor0&quot; x1=&quot;75&quot; y1=&quot;5&quot; x2=&quot;75&quot; y2=&quot;220&quot; class=&quot;actor-line&quot; stroke-width=&quot;0.5px&quot; stroke=&quot;#999&quot;&gt;&lt;/line&gt;&lt;rect x=&quot;0&quot; y=&quot;0&quot; fill=&quot;#eaeaea&quot; stroke=&quot;#666&quot; width=&quot;150&quot; height=&quot;65&quot; rx=&quot;3&quot; ry=&quot;3&quot; class=&quot;actor&quot;&gt;&lt;/rect&gt;&lt;text x=&quot;75&quot; y=&quot;32.5&quot; dominant-baseline=&quot;central&quot; alignment-baseline=&quot;central&quot; class=&quot;actor&quot; style=&quot;text-anchor: middle;&quot;&gt;&lt;tspan x=&quot;75&quot; dy=&quot;0&quot;&gt;John&lt;/tspan&gt;&lt;/text&gt;&lt;/g&gt;&lt;g&gt;&lt;line id=&quot;actor1&quot; x1=&quot;275&quot; y1=&quot;5&quot; x2=&quot;275&quot; y2=&quot;220&quot; class=&quot;actor-line&quot; stroke-width=&quot;0.5px&quot; stroke=&quot;#999&quot;&gt;&lt;/line&gt;&lt;rect x=&quot;200&quot; y=&quot;0&quot; fill=&quot;#eaeaea&quot; stroke=&quot;#666&quot; width=&quot;150&quot; height=&quot;65&quot; rx=&quot;3&quot; ry=&quot;3&quot; class=&quot;actor&quot;&gt;&lt;/rect&gt;&lt;text x=&quot;275&quot; y=&quot;32.5&quot; dominant-baseline=&quot;central&quot; alignment-baseline=&quot;central&quot; class=&quot;actor&quot; style=&quot;text-anchor: middle;&quot;&gt;&lt;tspan x=&quot;275&quot; dy=&quot;0&quot;&gt;Alice&lt;/tspan&gt;&lt;/text&gt;&lt;/g&gt;&lt;defs&gt;&lt;marker id=&quot;arrowhead&quot; refX=&quot;5&quot; refY=&quot;2&quot; markerWidth=&quot;6&quot; markerHeight=&quot;4&quot; orient=&quot;auto&quot;&gt;&lt;path d=&quot;M 0,0 V 4 L6,2 Z&quot;&gt;&lt;/path&gt;&lt;/marker&gt;&lt;/defs&gt;&lt;defs&gt;&lt;marker id=&quot;crosshead&quot; markerWidth=&quot;15&quot; markerHeight=&quot;8&quot; orient=&quot;auto&quot; refX=&quot;16&quot; refY=&quot;4&quot;&gt;&lt;path fill=&quot;black&quot; stroke=&quot;#000000&quot; stroke-width=&quot;1px&quot; d=&quot;M 9,2 V 6 L16,4 Z&quot; style=&quot;stroke-dasharray: 0, 0;&quot;&gt;&lt;/path&gt;&lt;path fill=&quot;none&quot; stroke=&quot;#000000&quot; stroke-width=&quot;1px&quot; d=&quot;M 0,1 L 6,7 M 6,1 L 0,7&quot; style=&quot;stroke-dasharray: 0, 0;&quot;&gt;&lt;/path&gt;&lt;/marker&gt;&lt;/defs&gt;&lt;g&gt;&lt;text x=&quot;175&quot; y=&quot;93&quot; class=&quot;messageText&quot; style=&quot;text-anchor: middle;&quot;&gt;Hello John, how are you?&lt;/text&gt;&lt;line x1=&quot;275&quot; y1=&quot;100&quot; x2=&quot;75&quot; y2=&quot;100&quot; class=&quot;messageLine0&quot; stroke-width=&quot;2&quot; stroke=&quot;black&quot; marker-end=&quot;url(#arrowhead)&quot; style=&quot;fill: none;&quot;&gt;&lt;/line&gt;&lt;/g&gt;&lt;g&gt;&lt;text x=&quot;175&quot; y=&quot;128&quot; class=&quot;messageText&quot; style=&quot;text-anchor: middle;&quot;&gt;Great!&lt;/text&gt;&lt;line x1=&quot;75&quot; y1=&quot;135&quot; x2=&quot;275&quot; y2=&quot;135&quot; class=&quot;messageLine1&quot; stroke-width=&quot;2&quot; stroke=&quot;black&quot; marker-end=&quot;url(#arrowhead)&quot; style=&quot;stroke-dasharray: 3, 3; fill: none;&quot;&gt;&lt;/line&gt;&lt;/g&gt;&lt;g&gt;&lt;rect x=&quot;0&quot; y=&quot;155&quot; fill=&quot;#eaeaea&quot; stroke=&quot;#666&quot; width=&quot;150&quot; height=&quot;65&quot; rx=&quot;3&quot; ry=&quot;3&quot; class=&quot;actor&quot;&gt;&lt;/rect&gt;&lt;text x=&quot;75&quot; y=&quot;187.5&quot; dominant-baseline=&quot;central&quot; alignment-baseline=&quot;central&quot; class=&quot;actor&quot; style=&quot;text-anchor: middle;&quot;&gt;&lt;tspan x=&quot;75&quot; dy=&quot;0&quot;&gt;John&lt;/tspan&gt;&lt;/text&gt;&lt;/g&gt;&lt;g&gt;&lt;rect x=&quot;200&quot; y=&quot;155&quot; fill=&quot;#eaeaea&quot; stroke=&quot;#666&quot; width=&quot;150&quot; height=&quot;65&quot; rx=&quot;3&quot; ry=&quot;3&quot; class=&quot;actor&quot;&gt;&lt;/rect&gt;&lt;text x=&quot;275&quot; y=&quot;187.5&quot; dominant-baseline=&quot;central&quot; alignment-baseline=&quot;central&quot; class=&quot;actor&quot; style=&quot;text-anchor: middle;&quot;&gt;&lt;tspan x=&quot;275&quot; dy=&quot;0&quot;&gt;Alice&lt;/tspan&gt;&lt;/text&gt;&lt;/g&gt;&lt;/svg&gt; &lt;/div&gt; &lt;hr /&gt; &lt;h2 id=&quot;blockquotes&quot;&gt;Blockquotes&lt;/h2&gt; &lt;blockquote&gt; We do not grow absolutely, chronologically. We grow sometimes in one dimension, and not in another, unevenly. We grow partially. We are relative. We are mature in one realm, childish in another. —Anais Nin &lt;/blockquote&gt; &lt;hr /&gt; &lt;h2 id=&quot;layouts&quot;&gt;Layouts&lt;/h2&gt; &lt;p&gt;The main text column is referred to as the body. It is the assumed layout of any direct descendants of the &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;d-article&lt;/code&gt; element.&lt;/p&gt; &lt;div class=&quot;fake-img l-body&quot;&gt; &lt;p&gt;.l-body&lt;/p&gt; &lt;/div&gt; &lt;p&gt;For images you want to display a little larger, try &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;.l-page&lt;/code&gt;:&lt;/p&gt; &lt;div class=&quot;fake-img l-page&quot;&gt; &lt;p&gt;.l-page&lt;/p&gt; &lt;/div&gt; &lt;p&gt;All of these have an outset variant if you want to poke out from the body text a little bit. For instance:&lt;/p&gt; &lt;div class=&quot;fake-img l-body-outset&quot;&gt; &lt;p&gt;.l-body-outset&lt;/p&gt; &lt;/div&gt; &lt;div class=&quot;fake-img l-page-outset&quot;&gt; &lt;p&gt;.l-page-outset&lt;/p&gt; &lt;/div&gt; &lt;p&gt;Occasionally you’ll want to use the full browser width. For this, use &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;.l-screen&lt;/code&gt;. You can also inset the element a little from the edge of the browser by using the inset variant.&lt;/p&gt; &lt;div class=&quot;fake-img l-screen&quot;&gt; &lt;p&gt;.l-screen&lt;/p&gt; &lt;/div&gt; &lt;div class=&quot;fake-img l-screen-inset&quot;&gt; &lt;p&gt;.l-screen-inset&lt;/p&gt; &lt;/div&gt; &lt;p&gt;The final layout is for marginalia, asides, and footnotes. It does not interrupt the normal flow of &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;.l-body&lt;/code&gt; sized text except on mobile screen sizes.&lt;/p&gt; &lt;div class=&quot;fake-img l-gutter&quot;&gt; &lt;p&gt;.l-gutter&lt;/p&gt; &lt;/div&gt; &lt;hr /&gt; &lt;h2 id=&quot;other-typography&quot;&gt;Other Typography?&lt;/h2&gt; &lt;p&gt;Emphasis, aka italics, with &lt;em&gt;asterisks&lt;/em&gt; (&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;*asterisks*&lt;/code&gt;) or &lt;em&gt;underscores&lt;/em&gt; (&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;_underscores_&lt;/code&gt;).&lt;/p&gt; &lt;p&gt;Strong emphasis, aka bold, with &lt;strong&gt;asterisks&lt;/strong&gt; or &lt;strong&gt;underscores&lt;/strong&gt;.&lt;/p&gt; &lt;p&gt;Combined emphasis with &lt;strong&gt;asterisks and &lt;em&gt;underscores&lt;/em&gt;&lt;/strong&gt;.&lt;/p&gt; &lt;p&gt;Strikethrough uses two tildes. &lt;del&gt;Scratch this.&lt;/del&gt;&lt;/p&gt; &lt;ol&gt; &lt;li&gt;First ordered list item&lt;/li&gt; &lt;li&gt;Another item ⋅⋅* Unordered sub-list.&lt;/li&gt; &lt;li&gt;Actual numbers don’t matter, just that it’s a number ⋅⋅1. Ordered sub-list&lt;/li&gt; &lt;li&gt;And another item.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;⋅⋅⋅You can have properly indented paragraphs within list items. Notice the blank line above, and the leading spaces (at least one, but we’ll use three here to also align the raw Markdown).&lt;/p&gt; &lt;p&gt;⋅⋅⋅To have a line break without a paragraph, you will need to use two trailing spaces.⋅⋅ ⋅⋅⋅Note that this line is separate, but within the same paragraph.⋅⋅ ⋅⋅⋅(This is contrary to the typical GFM line break behaviour, where trailing spaces are not required.)&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Unordered list can use asterisks&lt;/li&gt; &lt;li&gt;Or minuses&lt;/li&gt; &lt;li&gt;Or pluses&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;&lt;a href=&quot;https://www.google.com&quot;&gt;I’m an inline-style link&lt;/a&gt;&lt;/p&gt; &lt;p&gt;&lt;a href=&quot;https://www.google.com&quot; title=&quot;Google&apos;s Homepage&quot;&gt;I’m an inline-style link with title&lt;/a&gt;&lt;/p&gt; &lt;p&gt;&lt;a href=&quot;https://www.mozilla.org&quot;&gt;I’m a reference-style link&lt;/a&gt;&lt;/p&gt; &lt;p&gt;&lt;a href=&quot;../blob/master/LICENSE&quot;&gt;I’m a relative reference to a repository file&lt;/a&gt;&lt;/p&gt; &lt;p&gt;&lt;a href=&quot;http://slashdot.org&quot;&gt;You can use numbers for reference-style link definitions&lt;/a&gt;&lt;/p&gt; &lt;p&gt;Or leave it empty and use the &lt;a href=&quot;http://www.reddit.com&quot;&gt;link text itself&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;URLs and URLs in angle brackets will automatically get turned into links. http://www.example.com or &lt;a href=&quot;http://www.example.com&quot;&gt;http://www.example.com&lt;/a&gt; and sometimes example.com (but not on Github, for example).&lt;/p&gt; &lt;p&gt;Some text to show that the reference links can follow later.&lt;/p&gt; &lt;p&gt;Here’s our logo (hover to see the title text):&lt;/p&gt; &lt;p&gt;Inline-style: &lt;img src=&quot;https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png&quot; alt=&quot;alt text&quot; title=&quot;Logo Title Text 1&quot; /&gt;&lt;/p&gt; &lt;p&gt;Reference-style: &lt;img src=&quot;https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png&quot; alt=&quot;alt text&quot; title=&quot;Logo Title Text 2&quot; /&gt;&lt;/p&gt; &lt;p&gt;Inline &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;code&lt;/code&gt; has &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;back-ticks around&lt;/code&gt; it.&lt;/p&gt; &lt;div class=&quot;language-javascript highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;kd&quot;&gt;var&lt;/span&gt; &lt;span class=&quot;nx&quot;&gt;s&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;dl&quot;&gt;&quot;&lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;JavaScript syntax highlighting&lt;/span&gt;&lt;span class=&quot;dl&quot;&gt;&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;;&lt;/span&gt; &lt;span class=&quot;nf&quot;&gt;alert&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;nx&quot;&gt;s&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;);&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;s&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;Python syntax highlighting&lt;/span&gt;&lt;span class=&quot;sh&quot;&gt;&quot;&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;print&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;s&lt;/span&gt; &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;No language indicated, so no syntax highlighting. But let&apos;s throw in a &amp;lt;b&amp;gt;tag&amp;lt;/b&amp;gt;. &lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt; &lt;p&gt;Colons can be used to align columns.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Tables&lt;/th&gt; &lt;th style=&quot;text-align: center&quot;&gt;Are&lt;/th&gt; &lt;th style=&quot;text-align: right&quot;&gt;Cool&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;col 3 is&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;right-aligned&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;$1600&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;col 2 is&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;centered&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;$12&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;zebra stripes&lt;/td&gt; &lt;td style=&quot;text-align: center&quot;&gt;are neat&lt;/td&gt; &lt;td style=&quot;text-align: right&quot;&gt;$1&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;p&gt;There must be at least 3 dashes separating each header cell. The outer pipes (|) are optional, and you don’t need to make the raw Markdown line up prettily. You can also use inline Markdown.&lt;/p&gt; &lt;table&gt; &lt;thead&gt; &lt;tr&gt; &lt;th&gt;Markdown&lt;/th&gt; &lt;th&gt;Less&lt;/th&gt; &lt;th&gt;Pretty&lt;/th&gt; &lt;/tr&gt; &lt;/thead&gt; &lt;tbody&gt; &lt;tr&gt; &lt;td&gt;&lt;em&gt;Still&lt;/em&gt;&lt;/td&gt; &lt;td&gt;&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;renders&lt;/code&gt;&lt;/td&gt; &lt;td&gt;&lt;strong&gt;nicely&lt;/strong&gt;&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;td&gt;1&lt;/td&gt; &lt;td&gt;2&lt;/td&gt; &lt;td&gt;3&lt;/td&gt; &lt;/tr&gt; &lt;/tbody&gt; &lt;/table&gt; &lt;blockquote&gt; &lt;p&gt;Blockquotes are very handy in email to emulate reply text. This line is part of the same quote.&lt;/p&gt; &lt;/blockquote&gt; &lt;p&gt;Quote break.&lt;/p&gt; &lt;blockquote&gt; &lt;p&gt;This is a very long line that will still be quoted properly when it wraps. Oh boy let’s keep writing to make sure this is long enough to actually wrap for everyone. Oh, you can &lt;em&gt;put&lt;/em&gt; &lt;strong&gt;Markdown&lt;/strong&gt; into a blockquote.&lt;/p&gt; &lt;/blockquote&gt; &lt;p&gt;Here’s a line for us to start with.&lt;/p&gt; &lt;p&gt;This line is separated from the one above by two newlines, so it will be a &lt;em&gt;separate paragraph&lt;/em&gt;.&lt;/p&gt; &lt;p&gt;This line is also a separate paragraph, but… This line is only separated by a single newline, so it’s a separate line in the &lt;em&gt;same paragraph&lt;/em&gt;.&lt;/p&gt; </content> </entry> <entry> <title>Understanding Limitations of Vision-Language Models</title> <link href="https://deep-learning-mit.github.io/blog/2022/Vision_Language_Limitations/"/> <updated>2022-12-01T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2022/Vision_Language_Limitations</id> <content type="html">&lt;h2 id=&quot;why-are-vision-language-models-important&quot;&gt;Why are vision-language models important?&lt;/h2&gt; &lt;p&gt;The emergence of joint vision-language models such as Contrastive-Language Image Pretraining (CLIP) [1] from OpenAI, and GAIA-1 [2] from Wayve AI have had critical implications in computer vision, robotics, generative AI, self-driving, and more. The key idea of these large foundation models is that they learn meaningful data representations of labeled (text, image) pairs. Once trained, these learned representations are sufficiently versatile and can directly be deployed for a broad range of applications. Such transfer learning is referred to as zero shot learning, where the learned representations can directly be used for unseen data in a new task context without any additional training.&lt;/p&gt; &lt;h2 id=&quot;how-is-our-work-different-from-previous-related-work&quot;&gt;How is our work different from previous related work?&lt;/h2&gt; &lt;p&gt;Many follow up works have since examined how these large vision-language models perform with respect to various scenarios. Prior works study these effects in the context of transfer learning. Jain et al. looks at how performance is examined with respect to the quality of the dataset and provides examples where the performance can be improved by removing from the source dataset [5]. This can be done by utilizing linear classifiers in a scalable and automatic manner [6]. Santurkar et al. explored the impact of language supervision in vision models, and when the pre-training dataset is sufficiently large and contains relevant captions, the model will outperform other image-only models [4]. Shen et al. investigated CLIP’s advantages in outperforming widely used visual encoders through task-specific fine-tuning and combining with vision-language model pre-training [7]. While the aforementioned literature made valuable contributions in understanding the performance of vision-language models, they do not present a clear understanding of what goes on behind the “black box” of the model’s behavior and performance.&lt;/p&gt; &lt;p&gt;Our study is novel in that we provide a more in-depth, detailed analysis of both the impact of descriptive text (or the lack thereof) in vision-language models, in conjunction with the subtleties of dataset biases. We want to clearly visualize these variables’ impacts on model behavior and provide an explanation for such results. We specifically propose a (toy) expansion of prior work on understanding the role of text description [4]. Prior work claims that text descriptions with low variability will ensure that transferred features from CLIP models will outperform image only models. In our work, we will then examine how more descriptive text labels can help overcome biases in dataset and address domain shift.&lt;/p&gt; &lt;h2 id=&quot;how-are-these-models-trained&quot;&gt;How are these models trained?&lt;/h2&gt; &lt;p&gt;CLIP and GAIA are based on transformer architectures [3], which were originally developed for natural language processing and later adopted for computer vision as well. Two separate encoders, a text encoder and an image encoder, separately transform input data from their respective data modality into feature vectors. In aligning images and text in feature space, CLIP and GAIA are able to learn semantically meaningful and robust representations that are useful for several downstream applications. These models perform this embedding space alignment in different ways. CLIP performs training by predicting which image features correspond to which text embeddings in a batch of (image, text) pairs. GAIA is trained in an autoregressive manner, predicting the next token, given past image, text, and action states. GAIA is reported to have ~9 billion parameters and CLIP is reported to have ~63 million parameters. The differences between these two architectures are also related to the type of input data that is being analyzed. While CLIP operates on single images, GAIA is meant to be used for self-driving, meaning that it operates on videos rather than images. As a result, GAIA requires some notion of temporal consistency, which is why autoregression is a good architecture, and more parameters (since video data is more complex than image data). In this study, we will primarily focus on the CLIP architecture (shown below for convenience).&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/clip_model.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;strong&gt;Figure 1.&lt;/strong&gt; CLIP Architecture, a commonly used vision-language model [1]. (We apologize for blurring, couldn’t figure out how to get rid of it).&lt;/p&gt; &lt;h2 id=&quot;could-the-dataset-play-a-role-in-training&quot;&gt;Could the dataset play a role in training?&lt;/h2&gt; &lt;p&gt;The nature of the training process of CLIP models introduces questions about how robust the training procedure would be. The training relies on (image, text) pairs, but a single text phrase is not a unique description of an image, and a single text description can be used to describe many different scenes. This one-to-many mapping problem introduces questions about what the optimal text description of a given image should be, or if that optimal description even exists. Santurkar et al. [4] looks at how vision-language models such as CLIP and Simple framework for Contrastive Learning of visual Representations (SimCLR) exhibit different performance based on whether they are trained with or without captions and only images. We were inspired by the study’s suggestion that the descriptiveness of the dataset captions can directly influence how well the CLIP models transfer.&lt;/p&gt; &lt;p&gt;A more interesting question, that we answer in this blog post, is could having more descriptive text descriptions allow these large foundation models to mitigate or overcome dataset bias?&lt;/p&gt; &lt;p&gt;To study this question, we consider a toy example with dogs and camels in the classic domain adaptation problem. In this context, we answer the following question:&lt;/p&gt; &lt;p&gt;&lt;em&gt;Can more descriptive text labels enable better domain adaptation in vision-language models with biased datasets?&lt;/em&gt;&lt;/p&gt; &lt;p&gt;Domain adaptation is a problem in transfer learning where we want to have a model be able to learn the model in one context, and then generalize to another context. In other words, given a source domain that the model is trained on, domain adaptation is the problem of having high model performance in the target domain. In the dog vs. camel example, the domain adaptation problem occurs when we are used to seeing dogs and camels in certain contexts. For example, we generally expect to see camels in the desert and dogs in suburban environments (e.g. on the lawn, inside the house). If a model is trained to see such examples, then is suddenly shown a camel inside a house in Cambridge, the model has a strong chance of failure. Performance failure under domain shift is indicative that the model failed to disentangle background features from the camel itself. We will study whether descriptive text labels can enhance domain adaptation ability of current transformer-based foundation models.&lt;/p&gt; &lt;h2 id=&quot;understanding-role-of-text-labels-in-clip-gaia&quot;&gt;Understanding role of text labels in CLIP, GAIA&lt;/h2&gt; &lt;p&gt;Due to the large model size, invisible datasets, and large number of GPU hours needed to train CLIP and GAIA, we perform an analysis in a toy setup using the domain adaptation problem we described above. Our goal is to align image and text features, and then visualize the embeddings corresponding to different image classes.&lt;/p&gt; &lt;p&gt;Each of the four experiments determine 1) how the models respond to dataset bias, and 2) how important the addition of descriptive text labels are in improving performance using a trade-off combination of the variables. We aim to measure and visualize the extent to which the caption aids in overcoming biases in training data.&lt;/p&gt; &lt;h3 id=&quot;architecture&quot;&gt;Architecture&lt;/h3&gt; &lt;p&gt;Our architecture is shown below. We have two separate transformer architectures: an image encoder and a text encoder. The output of each of these encoders is mapped to an image and text embedding, then L2-normalized. We then compute the cosine similarity of the two embeddings and use the similarity and compute a binary cross entropy loss. Note that, unlike CLIP, we do not compute similarity across all samples within a batch. We only compute cosine similarity for a sample (image, text) pair.&lt;/p&gt; &lt;h3 id=&quot;dataset&quot;&gt;Dataset&lt;/h3&gt; &lt;p&gt;&lt;strong&gt;Image Generation.&lt;/strong&gt; We generated our own dataset using DALL-E 2. The total size of the training dataset is 196 images, with (1) 48 images of horses on grass, (2) 48 images of horses in the desert, (3) 48 images of camels in the desert, and (4) 48 images of camels on grass. Note that the DALL-E generated images are used for academic purposes, and are not intended for any commercial use, as required by DALL-E terms and conditions.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Text Labels.&lt;/strong&gt; We had two cases: a descriptive label and an undescriptive label. In the descriptive label case, we used the following labels for each of the four cases above (1) “horse on the grass”, (2) “horse in the desert”, (3) “a camel in the desert”, (4) “camel on the grass”. In the undescriptive label case, we just used the labels (1) “horse”, (2) “horse”, (3) “camel”, (4) “camel”.&lt;/p&gt; &lt;h3 id=&quot;experiment-1-no-dataset-bias-undescriptive-text-labels&quot;&gt;Experiment 1: No Dataset Bias, Undescriptive Text Labels&lt;/h3&gt; &lt;p&gt;&lt;strong&gt;&lt;em&gt;Description:&lt;/em&gt;&lt;/strong&gt; In the first experiment, we first baseline our text and images encoders ability to perform classification of camels and horses in the case when there is no dataset bias. We use all 196 images with undescriptive labels, so that there is an even split between all four cases (each case comprises ¼ of the dataset). The goal is to assess how well the model can learn and generalize across different classes, and provides the basis for the models’ inherent capabilities and performance without impact from external factors.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;&lt;em&gt;Results:&lt;/em&gt;&lt;/strong&gt; We performed Principal Component Analysis (PCA) on the feature vectors of our output from the image encoder and the text encoder in order to visualize more similar labels being mapped closer to each other. We notice that camels in desert and camels in grass are closer together in the feature space, while horses in desert and horses in grass are closer together. There is some overlap between camels in grass and horses in deserts, indicating some confusion with the context of the scene. That said, there is a very clear distinction between camels in the desert and horses in the grass, implying that the model is clearly aware that they are very different classes. The overall separation is rather decent when there is no dataset bias.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_2.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;strong&gt;Figure 2.&lt;/strong&gt; Vague separation in different environments with less descriptive labels.&lt;/p&gt; &lt;h3 id=&quot;experiment-2-no-dataset-bias-descriptive-text-labels&quot;&gt;Experiment 2: No Dataset Bias, Descriptive Text Labels&lt;/h3&gt; &lt;p&gt;&lt;strong&gt;&lt;em&gt;Description:&lt;/em&gt;&lt;/strong&gt; In the second experiment, we keep the dataset unbiased, but add descriptive labels.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;&lt;em&gt;Results:&lt;/em&gt;&lt;/strong&gt; In the plot below, we can see that using descriptive labels slightly improves the separation between classes in the unbiased dataset case. Specifically note the strong separation between red (camels in desert) and green (horses in grass). These two cases are easiest to distinguish, as is reflected in the scattered plot below. Interestingly, when we use descriptive text, the labels are getting bunched together based on context. In particular, horses and camels in the desert are being placed close together, while horses and camels in the grass are being placed close together. This is likely because the model is learning to use the context as a way to separate classes as well. There is still a general progression from red (camels in desert) → blue (horses in desert) → cyan (camels in grass) → green (horses in grass), suggesting some semantic smoothness in feature space. The transition between blue and cyan is rather abrupt though.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_3.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;strong&gt;Figure 3.&lt;/strong&gt; Improvements in class separation with more descriptive labels.&lt;/p&gt; &lt;h3 id=&quot;experiment-3-dataset-bias-undescriptive-text-labels&quot;&gt;Experiment 3: Dataset Bias, Undescriptive Text Labels&lt;/h3&gt; &lt;p&gt;&lt;strong&gt;&lt;em&gt;Description:&lt;/em&gt;&lt;/strong&gt; In the third experiment, we begin to investigate the role of dataset bias. The goal is to build on the results from the first experiment, reproducing a common aforementioned problem of over- or under-representation in datasets. We look at how the model responds to dataset bias and whether its performance can still stay the same, regardless of how the images are distributed in classes. Dataset bias is defined by the percentage of minority samples that we remove (minority samples are horses in desert and camels in grass). For example, we originally used 48 images of horses in the desert. 25% bias is defined as using only 12 images of horses in the desert.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;&lt;em&gt;Results:&lt;/em&gt;&lt;/strong&gt; These results will be jointly explained with experiment 4.&lt;/p&gt; &lt;h3 id=&quot;experiment-4-dataset-bias-descriptive-text-labels&quot;&gt;Experiment 4: Dataset Bias, Descriptive Text Labels&lt;/h3&gt; &lt;p&gt;&lt;strong&gt;&lt;em&gt;Description:&lt;/em&gt;&lt;/strong&gt; In the fourth experiment, we dive deeper into the impact of dataset bias that we began exploring in the second experiment, and question whether performance will be improved when the provided text labels are more descriptive. This directly answers the question of how impactful descriptive text is in vision-language models, in addition to whether they can help overcome dataset bias.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;&lt;em&gt;Results:&lt;/em&gt;&lt;/strong&gt; Surprisingly, when the dataset is more biased, we find that the separation between classes is better. We believe this to be true because the model is able to identify clear separation between horses and camels based on the context alone. As a result, it is easily able to separate red and green classes as the bias increases. We notice that the minority classes (horses in desert and camels in grass) also spread out in latent space as the dataset is biased. When using descriptive labels, we notice that the blue points (horses in the desert) are able to separate themselves more from other clusters than in the undescriptive case, indicating some success with descriptive labels in the event of dataset bias. Overall, across all cases, the model generally has an easy time separating camels in the desert, which is likely due to the distinctness of the background and the object.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;p&gt;&lt;strong&gt;Figure 4.&lt;/strong&gt; More biased dataset can show more separation between classes.&lt;/p&gt; &lt;h2 id=&quot;limitations-and-potential-confounding-parameters&quot;&gt;Limitations and Potential Confounding Parameters&lt;/h2&gt; &lt;p&gt;There are several possible confounding parameters that may have impacted our results beyond the variables that we were looking at. They include the following:&lt;/p&gt; &lt;h3 id=&quot;dataset-1&quot;&gt;Dataset&lt;/h3&gt; &lt;p&gt;&lt;em&gt;Generating the dataset:&lt;/em&gt; Because we used DALL-E to generate our dataset, the limitations of DALL-E itself can carry over to our performance. The inherent diversity of the data that DALL-E uses to train would directly impact our results, as well as the hyperparameters that were modified in training DALL-E. DALL-E could also have a specific image aesthetic that are different from real photography.&lt;/p&gt; &lt;p&gt;&lt;em&gt;Size:&lt;/em&gt; Model performance can also be impacted by a limited dataset. We trained and validated our model on 196 images, which is not a large dataset. The confounding variable here would be the complexity of the images, where there may be less images with less clear distinctions of “horses in the grass” or “camels in the desert”. Furthermore, there are different breeds, sizes, colors, and shapes of horses and camels that may not have been fully explored due to less room for them.&lt;/p&gt; &lt;p&gt;&lt;em&gt;Composition sensitivity:&lt;/em&gt; Literature review has shown that the model’s performance can be impacted by quality in addition to the quantity of the data [5]. Recent evidence has proved that removing data from a dataset can aid in transfer learning and improve downstream effectiveness. While we did not run experiments in identifying what specific composition and characteristics of the data should be removed, the analysis would have impacted our results.&lt;/p&gt; &lt;h3 id=&quot;model&quot;&gt;Model&lt;/h3&gt; &lt;p&gt;&lt;em&gt;Computational resources:&lt;/em&gt; Because we were restricted by GPU resources, we chose to use a smaller dataset and small self-trained Transformer architectures. We were also unable to train for more epochs, restricting having a more complex model architecture that could’ve lowered model performance. We found that increasing the batch size or increasing the number of layers lead our model to run out of computational power and continually crash.&lt;/p&gt; &lt;p&gt;&lt;em&gt;Tuning hyperparameters:&lt;/em&gt; Batch size, learning rate, number of layers, optimization models, and other factors could also limit the exploration of optimal configurations and affect overall performance. For example, a higher learning rate in a model could converge faster and show higher performance, when in reality, it is not an accurate reflection of the model. Overfitting and different regularization parameters can also lead to over- or under-fitting.&lt;/p&gt; &lt;h2 id=&quot;conclusions&quot;&gt;Conclusions&lt;/h2&gt; &lt;p&gt;Our toy problem gives some intuition into the idea that the descriptiveness of the label can affect the clustering profile of different datasets. Note that because our experiments were done in smaller settings, we cannot make any claims with respect to scaling up to large amounts of data, compute, and model size. That said, when adding description of the context of the images (i.e. desert vs. grass), we noticed that the points in feature space began to cluster first based on context, then based on the animal type (camel vs. horse). We also noticed that under dataset bias, the majority groups (horses in grass and camels in desert) begin to have better clustering separation. However, the minority group performance decreased, which suggests the importance of accounting for dataset bias in machine learning algorithms. In our experiments, we partially found more descriptive labels to help mitigate these negative effects, but mitigating these effects more reliably is an ongoing research direction.&lt;/p&gt; &lt;h2 id=&quot;references&quot;&gt;References&lt;/h2&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;Radford et al., &lt;em&gt;“Learning transferable visual models from natural language supervision”&lt;/em&gt;, ICML 2021&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Hu et al., &lt;em&gt;“GAIA-1: A Generative World Model for Autonomous Driving”&lt;/em&gt;, arXiv 2023&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Vaswani et al. &lt;em&gt;“Attention Is All You Need”&lt;/em&gt;, NeurIPS 2017&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Santurkar et al., &lt;em&gt;“Is a Caption Worth a Thousand Images? A Controlled Study for Representation Learning”&lt;/em&gt;, CVPR 2022&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Jain et al., &lt;em&gt;“A Data-Based Perspective on Transfer Learning”&lt;/em&gt;, CVPR 2023&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Jain et al, &lt;em&gt;“Distilling Model Failures as Directions in Latent Space”&lt;/em&gt;, ICLR 2023&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Shen et al. &lt;em&gt;“How Much Can CLIP Benefit Vision-and-Language Tasks?”&lt;/em&gt;, arXiv 2021&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; </content> </entry> <entry> <title>Contrastive Representation Learning for Dynamical Systems</title> <link href="https://deep-learning-mit.github.io/blog/2022/contrastive-time/"/> <updated>2022-11-07T00:00:00+00:00</updated> <id>https://deep-learning-mit.github.io/staging/blog/2022/contrastive-time</id> <content type="html">&lt;h2 id=&quot;introduction&quot;&gt;Introduction&lt;/h2&gt; &lt;h3 id=&quot;dynamical-system&quot;&gt;Dynamical System&lt;/h3&gt; &lt;p&gt;Dynamical systems form the foundation for understanding intricate phenomena in both scientific research and engineering applications. These systems are defined by their &lt;strong&gt;state&lt;/strong&gt; (denoted as $X$) at any given time and a set of &lt;strong&gt;equations&lt;/strong&gt; (e.g., $v = \frac{dX}{dt} = f_t(X, \theta)$) that describe the evolution of these states over time ($t$), all driven by &lt;strong&gt;underlying parameters&lt;/strong&gt; $\theta$. Some real-world examples of dynamical systems include:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Climate Systems&lt;/strong&gt;: Involves states like temperature, pressure, and wind velocity, with parameters such as solar radiation and greenhouse gas concentrations.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Population Dynamics in Ecology&lt;/strong&gt;: Features states like population sizes, with parameters including birth and death rates, and interaction rates between species.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Economic Models&lt;/strong&gt;: Focus on states like stock prices and trading volume, influenced by parameters like interest rates and market sentiment.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Control Systems in Engineering&lt;/strong&gt;: Encompasses states like the position and velocity in robotics or the aircraft’s orientation in flight dynamics, governed by parameters like physical properties and control gains.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The evolution of the system’s state over time can be observed as a time series, where system underlying parameters ($\theta$) governs the system’s behavior. In our project, we would like to determine if it would be feasible to discover the underlying system parameters given the observed trajectory. It would lay the groundwork for both robust predictive modeling and model interpretability analysis for safety-critical systems, such as clinical application and chemical engineering plants.&lt;/p&gt; &lt;h3 id=&quot;spring-mass-system&quot;&gt;Spring-Mass System&lt;/h3&gt; &lt;p&gt;Consider a spring-mass system, a fundamental model in dynamics. In a system comprising two masses, the states include positions $x$ and velocities $v = \frac{dx}{dt}$, which can be derived from the positions. Crucially, it is the underlying parameters, masses $m_1$, $m_2$ and spring constants $k_1$, $k_2$, that dictate the trajectories of $x$.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 1: A spring-mass system, classical dynamical system to illustrate project idea &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 2: Spring mass system trajectory at ($k_1$, $k_2$, $m_1$, $m_2$) = (1.0, 2.0, 1.0, 2.0) &lt;/div&gt; &lt;p&gt;Different system parameters, such as mass or spring constant, result in different oscillatory and long-term behavior behaviors of the system. Below is a gif demonstrating the effect of changing parameters on the system’s trajectory; this visualization illustrates how different underlying parameter values lead to distinct dynamical behaviors.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m1.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m1.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m1.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m1.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 3: system dynamic at varying $m_1$, while keeping $k_1$, $k_2$ and $m_2$ constant. &lt;/div&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m2.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m2.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m2.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m2.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 4: system dynamics at varying $m_2$, while keeping $k_1$, $k_2$, and $m_1$ constant. &lt;/div&gt; &lt;h3 id=&quot;dataset-collection--generation&quot;&gt;Dataset Collection / Generation&lt;/h3&gt; &lt;p&gt;We create a simulator for the above dynamical system to generate data based on parameters like masses $m$ and spring constants $k$. The parameters are systematically varied to generate a diverse and challenging dataset. More concretely, the dataset can be represented by a tensor of shape $(N_{param}, N_{traj}, T, d)$, where:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;$N_{param}$ is the number of parameter sets. Each set of parameters would lead to different system dynamics and trajectories.&lt;/li&gt; &lt;li&gt;$N_{traj}$ is the number of trajectories generated for each parameter set. Within the same set of parameters, different initial conditions and noise level would lead to different trajectories.&lt;/li&gt; &lt;li&gt;$T$ is the number of steps in a trajectory. $T$ is dependent on 2 factors - time span in the simulation, and the time step (i.e., $dt$). Note that our system/model formulation allows $T$ to be different for different trajectories, offering more flexibility.&lt;/li&gt; &lt;li&gt;$d$ is the number of states. In the above example, $d = 4$, representing $(x_1, x_2, v_1, v_2)$.&lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;related-works&quot;&gt;Related Works&lt;/h3&gt; &lt;p&gt;Time-series data analysis is a crucial component in a wide array of scientific and industrial domains, ranging from dynamical systems and weather forecasting to stock market prediction. These applications often involve underlying parameters that are complex and not immediately observable from the data. Traditional time-series methodologies primarily emphasize prediction, which can result in models that operate as “black-boxes” with limited interpretability &lt;d-cite key=&quot;Lim_2021&quot;&gt;&lt;/d-cite&gt;.&lt;/p&gt; &lt;p&gt;To address this limitation, the representation learning landscape in time-series analysis has expanded recent years, with a focus on unsupervised and semi-supervised methods. Fortuin et al. &lt;d-cite key=&quot;fortuin2020gp&quot;&gt;&lt;/d-cite&gt; developed GP-VAEs, combining Gaussian processes with VAEs to handle noisy and irregularly sampled time-series data. This model addresses variabilities in time-series data, offering robust feature extraction. Franceschi et al. &lt;d-cite key=&quot;Franceschi2019UnsupervisedCL&quot;&gt;&lt;/d-cite&gt; explored self-supervised learning frameworks for time-series, demonstrating how leveraging temporal coherence can improve feature representation without reliance on labeled data.&lt;/p&gt; &lt;p&gt;Building on these advancements, recent studies like those by Eldele et al. &lt;d-cite key=&quot;eldele2021timeseries&quot;&gt;&lt;/d-cite&gt; have explored the application of contrastive learning for enhancing time-series representation. Their approach creates representations by contrasting segments of time-series, thereby learning features that distinguish different temporal patterns. This method has shown promise in enhancing classification and forecasting tasks.&lt;/p&gt; &lt;p&gt;However, there remains an unexplored potential in utilizing contrastive learning for learning the underlying parameters governing these systems. In this project, we aim to bridge this gap by applying the principles of contrastive learning to the specific challenge of identifying and understanding these hidden parameters within dynamical systems. By leveraging contrastive learning, we aim to move beyond mere prediction and delve into a deeper understanding of these parameters, thus enhancing the interpretability of time-series models, particularly applicable in safety-critical systems.&lt;/p&gt; &lt;h2 id=&quot;methodology&quot;&gt;Methodology&lt;/h2&gt; &lt;h3 id=&quot;contrastive-learning&quot;&gt;Contrastive Learning&lt;/h3&gt; &lt;p&gt;Contrastive learning is a self-supervised learning technique prevalent in fields such as computer vision (CV) and natural language processing (NLP). At its core, it involves minimizing the embedding similarity between similar objects (i.e., positive pairs) while distancing dissimilar ones (i.e., negative pairs).&lt;/p&gt; &lt;p&gt;In the context of dynamical systems, where the model does not have direct access to parameter values, contrastive learning is an effective method to infer underlying system parameters. In our case of spring-mass system, a positive pair consists of two trajectories generated using the same set of parameters, whereas a negative pair is two trajectories generated using different set of parameters.&lt;/p&gt; &lt;p&gt;We utilize the following InfoNCE (Information Noise-Contrastive Estimation) loss for training:&lt;/p&gt; \[L_{\text{InfoNCE}} = -\log \frac{e^{f(x)^Tf(x^+)/\tau}}{\sum_{i=0}^{N} e^{f(x)^Tf(x^-_i)/\tau}}\] &lt;ul&gt; &lt;li&gt;$f(x)$ is the generated trajectory embedding.&lt;/li&gt; &lt;li&gt;$\tau$ is a (fixed) temperature hyperparameter, which we set to default 1.&lt;/li&gt; &lt;li&gt;($x$, $x^+$) forms the positive pair (i.e., two trajectories with the same underlying parameters but different initial conditions).&lt;/li&gt; &lt;li&gt;($x$, $x_j^-$) form negative pairs (i.e. two trajectories from different underlying parameter sets).&lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;model&quot;&gt;Model&lt;/h3&gt; &lt;p&gt;Trajectories in dynamical systems can be represented by a time-series dataset, which is a type of sequential data. Long Short-Term Memory networks (LSTMs), a variant of Recurrent Neural Networks (RNNs), can be used process sequential data and manage long-term dependencies. A key feature of LSTMs is their use of gates, which regulate the flow of information, allowing the network to maintain pertinent information over extended periods — key characteristics for modeling dynamical systems. These gates include:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&lt;strong&gt;Forget Gate&lt;/strong&gt;: Decides which information from the cell state should be discarded. It uses the current input and the previous hidden state to generate a value between 0 and 1 for each number in the cell state, with 1 indicating “keep this” and 0 indicating “discard this.”&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Input Gate&lt;/strong&gt;: Determines what new information will be added to the cell state. It involves two parts: a sigmoid layer that decides which values will be updated and a tanh layer that creates a vector of new candidate values.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Output Gate&lt;/strong&gt;: Decides what information from the cell state will be used to generate the output. It takes the current input and the previous hidden state, passes them through a sigmoid layer, and multiplies the output by a tanh of the cell state to decide which parts of the cell state make it to the output.&lt;/li&gt; &lt;/ol&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/LSTM-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/LSTM-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/LSTM-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/LSTM.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 5: Diagram of LSTM &lt;d-cite key=&quot;towardsdatascience_lstm_gru&quot;&gt;&lt;/d-cite&gt;. &lt;/div&gt; &lt;p&gt;In the context of the contrastive learning framework, the choice of model is a design choice. Essentially, any model capable of converting a trajectory into an embedding, such as a transformer, could be utilized. While transformers have shown remarkable results in CV and NLP, their performance on smaller datasets remains an area less explored. Previous studies in dynamical systems have predominantly employed RNN-based approaches. In this project, we aim to study if LSTM is capable of capturing the dynamics of system through its hidden and cell states.&lt;/p&gt; &lt;h3 id=&quot;training-objectives&quot;&gt;Training objectives&lt;/h3&gt; &lt;p&gt;Trajectories are passed through an LSTM to generate trajectory embeddings, derived from the cell states of the LSTM’s final layer. In our training framework, there are 2 loss functions:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Contrastive objective (InfoNCE loss) is applied on the trajectory embedding. This loss encourages model to create embeddings that meaningfully distinguish between different system dynamics.&lt;/li&gt; &lt;li&gt;Prediction objective (MSE) is applied between the ground truth state (i.e., $X_{t+1}$) and the prediction state (i.e., $\hat{X}_{t+1}$) at the next step. This loss encourages model to use the current state and embedding to predict next step behavior.&lt;/li&gt; &lt;/ul&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/contrastive_representation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/contrastive_representation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/contrastive_representation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/contrastive_representation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 6: Diagram of contrastive representation learning in this project. Trajectories derived from the same parameter set (e.g. top two trajectories in figure) form a positive pair; they map closely in latent space and show invariance to initial states. Trajectories from different parameter sets are distant in latent space, despite identical initial states (e.g. bottom two trajectories). &lt;/div&gt; &lt;h3 id=&quot;evaluation&quot;&gt;Evaluation&lt;/h3&gt; &lt;p&gt;The objective of the project to estimate the system parameters from observed trajectories. Therefore, the primary metric for our evaluation strategy is the &lt;strong&gt;MAE on underlying parameter estimation&lt;/strong&gt;. This involves applying linear probing to the model’s embeddings against known ground truth parameters on a subset of the training set (i.e., a linear system $X\beta = Y$ is solved, with &lt;em&gt;X&lt;/em&gt; representing the trajectory embeddings, and &lt;em&gt;y&lt;/em&gt; being the ground truth parameters). Since it is a simple linear transformation of the original features, it has limited capacity to alter feature complexity. Essentially, if a model can perform well under linear probing, it suggests that the learned embeddings themselves are robust and informative with respect to the underlying parameters.&lt;/p&gt; &lt;p&gt;The following plot shows the result of the contrastive learning framework on the validation set. Left subplot corresponds to the ground truth parameter, right subplot corresponds to the predicted parameter using the above contrastive learning framework. For a focused visualization, we only varies 2 parameter (i.e., $m_1$, $m_2$). Each point in the plot is annotated with its corresponding parameter values. For each parameter set, we evaluate on 2 trajectories with different initial conditions.&lt;/p&gt; &lt;p&gt;On the right plot, we observe similar data points are grouped together in the parameter space, indicating that the model is capable of clustering trajectories generated from the same parameter set together. Comparing the left and right plots, we observe the model is capable to predicting parameters to be close to ground truth parameters. Overall, the MAE for parameter estimation is &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;0.043&lt;/code&gt;, underscoring the model’s precision in parameter prediction.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_estimation-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_estimation-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_estimation-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_estimation.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 7: Result of parameter estimation. Left subplot corresponds to the ground truth parameter, right subplot corresponds to the predicted parameter using the above contrastive learning framework. &lt;/div&gt; &lt;p&gt;Additionally, we would also like the model to be capable of predicting the future trajectories. For this objective, the secondary metric is the &lt;strong&gt;MAE on next-step prediction&lt;/strong&gt;. High value on this metrics would indicate model’s ability to accurately forecast future states, which is a necessary but may not be sufficient step towards a more complex, weekly-supervised parameter inference tasks. The MAE on the validation set is &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;0.00024&lt;/code&gt;, and we will discuss it more in the Experiments section.&lt;/p&gt; &lt;h2 id=&quot;experiments&quot;&gt;Experiments&lt;/h2&gt; &lt;p&gt;In the previous section, Figure X above shows the final result. We want to include 2 components in this section: 1) different things we attempted to reach the results in Figure X, and 2) several experiments to study how different factors affect model’s capability of discovering the underlying parameters.&lt;/p&gt; &lt;p&gt;Due to computational and time limitation, the numbers reported in this section are not from the final model, which trained for a much longer time. Instead, we ran numerous experiments and compared performance after 2000 steps, at which point the training loss has roughly plateaued.&lt;/p&gt; &lt;h3 id=&quot;effect-of-initial-conditions&quot;&gt;Effect of initial conditions&lt;/h3&gt; &lt;p&gt;The effect of different initial conditions in dynamical system is analogous to the effect of data augmentation in CV. The challenge is that different initial conditions may affect the trajectories more than the change in parameter.&lt;/p&gt; &lt;p&gt;We initially used the same initial conditions for all set of parameters and led to parameter MAE of &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;0.01&lt;/code&gt; in the validation set. However, the model doesn’t generalize to other initial conditions; when evaluating the model on the validation set that has different initial condition, MAE increased to &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;0.31&lt;/code&gt;, indicating overfit.&lt;/p&gt; &lt;p&gt;To ensure our model effectively discerns differences in trajectories arising from varying initial conditions, we generate 100 trajectories from each parameter set with random initial conditions, aiming to train the model to be invariant to these initial conditions and capture the essence of the system parameters. With this “data augmentation”, we bridged the gap between training and validation performance to be &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;0.061&lt;/code&gt; and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;0.065&lt;/code&gt; respectively.&lt;/p&gt; &lt;h3 id=&quot;number-of-prediction-steps&quot;&gt;Number of prediction steps&lt;/h3&gt; &lt;p&gt;We also considered the limitations of next-step prediction, particularly for high-frequency samples (i.e., small $dt$). A trivial model might simply predict state $X$ at time $t+1$ as $X_t$, and achieve a small loss since $X_{t+1} - X_t$ may be small for small $dt$. To avoid model taking shortcuts, we shift our focus from immediate next-step prediction to forecasting next-k-steps ahead. We also anticipate that accurate longer-horizon predictions would require a deeper understanding of the underlying parameters, potentially leading to improved performance in parameter estimation. This improves the parameter MAE on the validation set from &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;0.10&lt;/code&gt; to &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;0.065&lt;/code&gt;. The following figure illustrates an results of predicting 30 steps ahead.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 8: Ground truth and predicted trajectory for predicting 30 steps (i.e., 3 seconds). &lt;/div&gt; &lt;h3 id=&quot;decouple-state-and-parameter-embedding&quot;&gt;Decouple state and parameter embedding&lt;/h3&gt; &lt;p&gt;In our hypothesis, the latent space of a trajectory encodes dual forms of information: “long-term” information pertaining to system parameters, and “short-term” information reflective of the current state. Traditional approaches applying contrastive learning across the entire latent vector may not optimally capture this duality.&lt;/p&gt; &lt;p&gt;To address this, we propose to decouple the state and parameter embedding space. Concretely, for positive pairs emerging from identical parameters but divergent initial conditions, our approach focuses on computing the InfoNCE loss solely on the segment of the embedding representing the parameter. This is operationalized by limiting contrastive learning to the initial W dimensions of the latent vector, denoted as $z[:W]$. This strategy aims to specialize $z[:W]$ in encoding system parameters, while allowing the remaining part of the vector, $z[W:]$, the flexibility to encapsulate other trajectory aspects, such as initial conditions and inherent noise.&lt;/p&gt; &lt;p&gt;However, the performance didn’t increase across various values of $W$. This stagnation might stem from our use of the LSTM cell state as the latent embedding. Given that the cell state inherently integrates “long-term” information, with “short-term” data predominantly residing in the hidden states, restricting ourselves to $z[:W]$ potentially reduces the representational power of our contrastive learning framework.&lt;/p&gt; &lt;h3 id=&quot;effect-of-key-hyperparameters&quot;&gt;Effect of key hyperparameters&lt;/h3&gt; &lt;p&gt;We utilized WandB for a hyperparameter sweep to investigate their impact on the model’s performance in next-steps prediction and underlying parameter estimation. Key hyperparameters explored include:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Embedding Size&lt;/strong&gt;: We observed that increasing the embedding size from 10 to 200 led to a reduction in the InfoNCE loss from 0.862 to 0.007, and the corresponding parameter estimation estimation MAE peaked when embedding size reached 100. This suggests a larger embedding size can increase the capacity to more effectively inferring underlying system parameters. However, maintaining the embedding size at a balanced level is crucial to ensure the model concentrates on the most pivotal aspects of data variation, rather than overfitting to minor system details.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Number of LSTM Layers&lt;/strong&gt;: Increasing the number of LSTM layers improved both next-step prediction and parameter estimation. Notably, with more LSTM layers, a smaller embedding size became sufficient for achieving desirable outcomes in both prediction and parameter inference. This implies a deeper LSTM architecture can capture more complex pattern in the data.&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Prediction Horizon (Predict Ahead)&lt;/strong&gt;: We observe a modest improvement in performance on parameter estimation MAE (i.e., 0.04) as the prediction horizon increases. This improvement, while positive, was less pronounced than anticipated. In our model, contrastive learning serves as the primary mechanism for learning about system parameters, with next-k-step prediction intended to supplement this learning process. Theoretically, as the prediction horizon (k) increases, the complexity of the next-k-step prediction task escalates. This demands more focus from the model, potentially at the expense of its capacity for contrastive learning. Consequently, the variable k emerges as a hyperparameter to strike an optimal balance between two competing objectives: facilitating overall learning (where a larger k is advantageous), and maintaining a focus on contrastive learning (where a smaller k is beneficial).&lt;/li&gt; &lt;/ul&gt; &lt;h3 id=&quot;noise-level-in-data-generation&quot;&gt;Noise level in data generation&lt;/h3&gt; &lt;p&gt;In real-world applications, models often lack direct access to state values due to the inherent stochasticity of systems or observation noise. In high-precision engineering applications, this noise is typically constrained to below 1%. However, in less precise scenarios, the noise in observed data can reach levels as high as 20%. It’s important to note that these errors are not merely observational errors, which can be assumed to be independent and identically distributed (i.i.d). Rather, these errors are intertwined with the state itself and can propagate over time, affecting subsequent observations. The figure below illustrates how noise can significantly alter trajectories. For instance, at a 20% noise level, the state variable $x_1$ markedly diverges from its intended path around the 8-second mar&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/traj_with_noise-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/traj_with_noise-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/traj_with_noise-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/traj_with_noise.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 9: Comparison of state trajectories with no noise (top subplot) and 20% noise (bottom subplot). &lt;/div&gt; &lt;p&gt;The following section evaluates the model’s performance using noisy observed data. During trajectory generation, we introduce random noise according to the formula $X_{obs} = X_{true} (1 + \alpha \mathit{N}(0, 1))$ where $\alpha$ is the noise-to-signal ratio. We studied the model’s performance across various noise levels, ranging from $\alpha = 0.0$ to $\alpha = 0.2$, and the results are plotting in the following figure.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/comparison_noise-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/comparison_noise-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/comparison_noise-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/comparison_noise.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 10: Model performance across different noise levels. The blue bars represent a model trained on a clean dataset, and directly applied to a noisy validation dataset. The orange bars represent a model trained and evaluated on datasets with equivalent noise levels. &lt;/div&gt; &lt;p&gt;Directly applying a model trained with a clean dataset on a noisy dataset would lead to drastic performance drop as shown in the blue bars. During model deployment, it’s a natural choice to train on a dataset with the same noise amount. This could mitigate the drastic performance drop, especially for low to moderate amount of noise (e.g., $\alpha &amp;lt; 0.1$), as shown in the orange bars. However, when noise amount rises to 20%, training on noisy dataset doesn’t help either due to significant deviation from clean data.&lt;/p&gt; &lt;p&gt;Applying a model trained on a clean dataset to a noisy dataset leads to a significant drop in performance, as indicated by the blue bars. In practical model deployment, it’s common to train the model on a dataset with a comparable level of noise. This approach can substantially mitigate performance degradation, particularly at low to moderate noise levels (e.g., $\alpha &amp;lt; 0.1$), as demonstrated by the orange bars. However, at higher noise levels, such as 20%, training on a noisy dataset proves less effective due to the substantial deviation from the clean data.&lt;/p&gt; &lt;h3 id=&quot;generalizability-to-unseen-parameters&quot;&gt;Generalizability to unseen parameters&lt;/h3&gt; &lt;p&gt;In this section, we delve into the model’s generalizability across unseen parameters. Our investigation comprises experiments on both in-distribution and out-of-distribution system parameters. The results of these experiments are illustrated in the following figures.&lt;/p&gt; &lt;p&gt;For in-distribution analysis, our focus was to assess the model’s proficiency in adapting to system parameters that, while differing from those in the training set, still fall within the same predefined range. This aspect of the study aims to understand how well the model can interpolate within the known parameter space.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 11: Result of parameter estimation to unseen parameters in-distribution. &lt;/div&gt; &lt;p&gt;On the other hand, the out-of-distribution experiments were designed to challenge the model further by introducing system parameters that lie outside the range encountered during training. This approach tests the model’s ability to extrapolate beyond its training confines.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 12: Result of parameter estimation to unseen parameters out-of-distribution. &lt;/div&gt; &lt;p&gt;Remarkably, our model demonstrated a robust ability to generalize across both in-distribution and out-of-distribution parameters. It achieved a Mean Absolute Error (MAE) of 0.032 in the former and 0.082 in the latter scenario. These findings suggest that the model not only learns the underlying patterns within the training data but also retains a significant degree of flexibility to adapt to new, unseen parameter sets.&lt;/p&gt; &lt;h2 id=&quot;another-framework---generative-modeling&quot;&gt;Another Framework - Generative Modeling&lt;/h2&gt; &lt;p&gt;While the previously discussed contrastive learning framework shows promise in inferring underlying parameters through a weakly-supervised learning approach, it relies on prior knowledge about the relationship between trajectories and their corresponding parameter sets. Such information may not always be readily available in practical scenarios. To address this challenge, our research pivots towards employing a generative modeling framework, enabling the learning of system parameters in an unsupervised manner.&lt;/p&gt; &lt;p&gt;We transition from contrastive learning to incorporating a &lt;strong&gt;variational autoencoder (VAE) structure&lt;/strong&gt;. This setup operates without explicit knowledge of parameter sets, compelling the model to decipher the underlying patterns solely from the observed trajectories. The VAE framework consists of three primary components: 1) an encoder LSTM that transforms an observed trajectory into a latent representation, 2) a reparameterization layer that molds this latent representation into a specific distribution, and 3) a decoder LSTM that uses the latent representation and initial conditions to reconstruct the trajectory.&lt;/p&gt; &lt;p&gt;Training focuses on 1) the reconstruction loss between real and a generated trajectories, and 2) Mean Absolute Error (MAE) for next-k-step predictions made by the encoder LSTM. This method is designed to challenge the model’s capability to extract insights about the system’s dynamics independently, without relying on any prior information about the trajectories. The framework thus becomes a critical platform for testing the model’s ability to autonomously learn the system’s underlying parameters, requiring an advanced level of unsupervised learning.&lt;/p&gt; &lt;p&gt;The evaluation metrics for this second framework are aligned with the first, utilizing MAE to assess both the underlying parameter estimation and the next k-step prediction accuracy of the encoder LSTM. A key addition in this framework is the &lt;strong&gt;MAE on Reconstruction Loss&lt;/strong&gt;.This metric is used to gauge the model’s ability to accurately reconstruct input sequences, thereby reflecting its understanding of the data’s fundamental structure. A lower reconstruction loss implies that the model has effectively internalized the essential characteristics of the data distribution. Our expectation is that this deeper grasp of data structure will enable the model to infer underlying system parameters independently, without prior exposure to specific parameter set information.&lt;/p&gt; &lt;h2 id=&quot;experiments---generative-modeling&quot;&gt;Experiments - Generative Modeling&lt;/h2&gt; &lt;h3 id=&quot;autoencoder-vs-variational-autoencoder&quot;&gt;Autoencoder v.s. Variational Autoencoder&lt;/h3&gt; &lt;p&gt;In addition to exploring the Variational Autoencoder (VAE) framework, we also experimented with a traditional autoencoder setup. This variant mirrors the architecture of the VAE but excludes the computation of the mean ($\mu$) and log variance ($\log \sigma^2$), thereby omitting the variational element. This modification streamlines the model, narrowing its focus to purely reconstructing input data from its latent representations.&lt;/p&gt; &lt;p&gt;Our findings reveal that the autoencoder configuration surpassed the VAE in both parameter estimation and reconstruction. For parameter estimation MAE, autoencoder and VAE achieved 0.12 and 0.23 respectively. For reconstruction MAE, autoencoder and VAE achieved 0.02 and 0.49 respectively. This performance disparity can be attributed to the inherent constraints of each model. The autoencoder is primarily limited by the dimensionality of the embedding in its latent space. In contrast, the VAE faces an additional constraint due to its need to model the distribution within the latent space.&lt;/p&gt; &lt;p&gt;These results suggest that the variational component, a defining feature of VAEs and instrumental in modeling data distributions, might not be essential for capturing the dynamics specific to our system. By removing the variational aspect, the autoencoder model is enabled to concentrate more effectively on capturing the most salient features for reconstruction and parameter inference. This simpler approach avoids the additional complexity of encoding the data distribution in the latent space, potentially leading to more efficient and targeted learning relevant to our system’s dynamics.&lt;/p&gt; &lt;h3 id=&quot;beyond-reconstruction-evaluating-future-prediction-capabilities&quot;&gt;Beyond Reconstruction: Evaluating Future Prediction Capabilities&lt;/h3&gt; &lt;p&gt;To evaluate our AE model’s generalizability and future prediction capabilities, we expanded its function beyond reconstruction to include forecasting additional steps. The figure presented here compares the ground truth states $x_1$ and $x_2$ (displacements for $m_1$ and $m_2$) against the model’s outputs for both reconstruction and prediction. The model processes input trajectories of 100 time steps and generates outputs for 199 steps, with the initial 99 steps dedicated to reconstruction and the subsequent 100 steps for prediction (unseen by the model during training). The results illustrate effective reconstruction performance but relatively weaker predictive accuracy.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/recon_and_pred-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/recon_and_pred-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/recon_and_pred-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/recon_and_pred.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 13: Reconstruction and further prediction of unseen states. &lt;/div&gt; &lt;p&gt;Given that our autoencoder (AE) framework surpasses the Variational Autoencoder (VAE) in reconstruction and parameter estimation, we speculated whether VAE’s variational component might enhance future predictions. Therefore, we compared the reconstruction and prediction losses between the AE and VAE frameworks.&lt;/p&gt; &lt;p&gt;The corresponding figure, presenting the mean and standard deviation of these losses, reveals that in both frameworks, reconstruction losses and their variability are substantially lower than prediction losses. This trend highlights the ongoing difficulty in achieving precise future predictions within our model configurations.&lt;/p&gt; &lt;p&gt;Furthermore, the AE framework demonstrated superior performance over the VAE in both reconstruction and future step prediction. This outcome suggests that the VAE’s variational component does not necessarily contribute to improved future predictions. Echoing our earlier findings on parameter estimation and reconstruction, the variational aspect might not be pivotal for capturing the dynamics specific to our system. Instead, it could introduce additional complexity by encoding the data distribution in the latent space, which appears to be less relevant for reconstruction and future step prediction tasks.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred.png&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 14: Results of reconstruction loss (first 99 steps) and future prediction loss (last 100 steps) for autoencoder (top subplot) and VAE (bottom subplot). &lt;/div&gt; &lt;h3 id=&quot;effect-of-latent-variables-on-generated-trajectories&quot;&gt;Effect of Latent Variables on Generated Trajectories&lt;/h3&gt; &lt;p&gt;In this section, our objective is to glean insights into the latent variables by manipulating them and observing the resultant changes in the generated trajectories. Given that the embedding dimension (i.e., |z|) exceeds the dimension of the parameters (i.e., |$\theta$|), we initially establish a linear mapping from from $z$ to $\theta$. The following gif demonstrates how the trajectory evolves in response to alterations in the variable $m_1$. The upper part of the gif represents the simulation, while the lower part reflects the output from the decoder of our autoencoder.&lt;/p&gt; &lt;p&gt;A notable observation is that, as m1 undergoes modifications, the predicted trajectories adeptly resemble the period of the simulation trajectories. However, a discrepancy arises in their magnitude, with the predicted trajectories exhibiting a notably smaller scale compared to the ground truth trajectories. This pattern suggests that while the embedding successfully captures certain characteristics of the trajectories, it does not fully encapsulate all their properties.&lt;/p&gt; &lt;p&gt;We hypothesize that enhancing the complexity of the encoder/decoder architecture (e.g., larger number of layers of LSTM layers) might facilitate a more comprehensive capture of trajectory attributes. However, our experimental scope is currently constrained by limitations in CUDA memory, particularly due to the decoder’s requirement to process 99 time steps. This constraint hinders our ability to experiment with architectures involving a greater number of layers, which might otherwise allow for a richer representation and understanding of the trajectory data.&lt;/p&gt; &lt;figure&gt; &lt;picture&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 480px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/latent_to_traj.gif-480.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 800px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/latent_to_traj.gif-800.webp&quot; /&gt; &lt;source class=&quot;responsive-img-srcset&quot; media=&quot;(max-width: 1400px)&quot; srcset=&quot;/staging/assets/img/2023-11-08-contrastive-time/latent_to_traj.gif-1400.webp&quot; /&gt; &lt;!-- Fallback to the original file --&gt; &lt;img src=&quot;/staging/assets/img/2023-11-08-contrastive-time/latent_to_traj.gif&quot; class=&quot;img-fluid&quot; width=&quot;auto&quot; height=&quot;auto&quot; onerror=&quot;this.onerror=null; $(&apos;.responsive-img-srcset&apos;).remove();&quot; /&gt; &lt;/picture&gt; &lt;/figure&gt; &lt;div class=&quot;caption&quot;&gt; Figure 15: Effect of latent variables on generated trajectories. &lt;/div&gt; &lt;h2 id=&quot;conclusion-and-future-works&quot;&gt;Conclusion and Future Works&lt;/h2&gt; &lt;p&gt;In contrast to current machine learning literature that predominantly focuses on predicting future states of dynamical systems, our work is geared towards uncovering the underlying system parameters from observed trajectories. Our key contributions include:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Implementing two frameworks: an autoregressive LSTM with contrastive learning, and a variational autoencoder architecture. While contrastive learning yields superior parameter estimation, the autoencoder enables unsupervised learning without relying on prior knowledge.&lt;/li&gt; &lt;li&gt;Demonstrating our model’s generalizability to both in-distribution and out-of-distribution unseen parameters, and its effective performance with noisy datasets, sustaining a noise-to-signal ratio of up to 10%.&lt;/li&gt; &lt;li&gt;Conducting thorough experiments to explore the impact of various factors like initial conditions, prediction horizons, and the interplay between state and parameters embeddings. We also examined the influence of latent variables on trajectory generation and the model’s predictive capabilities beyond the confines of the training set.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The ability to accurately estimate underlying system parameters significantly enhances model interpretability, which is crucial in scientific and engineering applications where decision-making stakes are high. We hope our findings will help researchers and students interested in interpretable machine learning for dynamical systems.&lt;/p&gt; &lt;p&gt;While this project did extensive analysis on a spring-mass system, future work may extend this analysis to a broader range of dynamical systems. Moreover, future work can integrate the strengths of both frameworks to incorporate contrastive learning within an unsupervised context, possibly through data augmentation strategies. Further advancements could also focus on refining the impact of latent variables on trajectory generation. Such progress is expected to bolster trust in AI solutions and facilitate their integration into essential decision-making frameworks across various domains.&lt;/p&gt; &lt;p&gt;Here’s the link to our Github Repo: &lt;a href=&quot;https://github.com/martinzwm/meta_param_est&quot;&gt;https://github.com/martinzwm/meta_param_est&lt;/a&gt;&lt;/p&gt; &lt;!-- Proposal ## Introduction Time-series data analysis is pivotal in numerous scientific and industrial applications, including dynamical system, weather forecasting, and stock market prediction. The underlying parameters governing the time-series data can often be complex and not directly observable. Unlike traditional time series approaches, which predominantly focus on prediction tasks, leading to a &quot;black-box&quot; prediction &lt;d-cite key=&quot;Lim_2021&quot;&gt;&lt;/d-cite&gt;. Recent literatures have explored using contrastive learning to learn time-series representation, but none has explored learning underlying system parameters &lt;d-cite key=&quot;eldele2021timeseries&quot;&gt;&lt;/d-cite&gt;. In this project, we want to leverage the contrastive learning approach studied in class to learn underlying system parameters parameters. A deep comprehension of these underlying parameters, if successfully achieved, can lead to 2 benefits - 1) enhanced model capability for making accurate future predictions, and 2) a better understanding of the underlying system. The latter is particularly important in scientific, where the goal is to understand the underlying system, and engineering, where safety and reliability are of paramount importance. To achieve the above goals, we proposed the following experiments and setups to study the insights of using contrastive approach to learn latent parameters for time-series representation. ## Objectives The primary objective of this research is to investigate the effectiveness of contrastive loss learning in capturing the system underlying parameters ($$\theta_i$$) of time-series data. We aim to: 1. Test the capability of contrastive learning approach to extract embeddings from time-series data that correlate strongly with system underlying parameters. 2. Study different neural network architecture for encoding time-series trajectories into informative embeddings. 3. Explore the impact of various factors such as function forms, number of parameters and distributions, trajectory length, noise levels, and loss functions on the model’s performance. 4. Evaluate the precision of the predictive models in terms of their ability to make accurate future predictions based on learned latent variables, particularly in few-shot learning scenarios. ## Hypothesis With contrastive loss learning, the embeddings of trajectories from the same parameter set will be closer together in the embedding space than to those from different sets. Therefore, our central hypothesis is that the embeddings produced by a model trained with contrastive loss learning will reflect the underlying parameters of time-series data. It is anticipated that a linear projection of these embeddings back onto the parameter space will yield predictions that are congruent with the original parameter values. Moreover, we postulate that the model will be able to make more precise future predictions by effectively capturing the essence of the latent variables governing the time-series data. ## Experimental Setup ### Trajectories Simulation We will generate synthetic time-series data based on underlying deterministic and stochastic processes (e.g., spring-mass dynamical system). - The system can be defined by a set of parameters $$\theta_i$$. We have $H$ set of parameters. - For each set of parameters, a trajectory, $$\{x_{ij}\}$$ of length $T$ can be draw with different initial conditions and noise. We will sample $K$ trajectories for each set of parameters. ### Models We will evaluate three different neural network architectures: 1. Recurrent Neural Network (RNN) 2. Long Short-Term Memory (LSTM) 3. Transformer (utilizing attention mechanisms) A model $$M$$ will output an embedding vector $$v_{ij}$$ for a given input trajectory $$\{x_{ij}\}$$. ### Experimentation We want to evaluate the contrastive approach in extracting system parameter under the following scenarios: 1. **System Functional Forms:** We will test linear, non-linear, and complex periodic functions to generate the trajectories. 2. **Number of Parameters ($$\lvert \theta \rvert$$):** We will explore varying the number of parameters to understand how it affects the model’s ability to learn. 3. **Parameter Distribution:** We will use different distributions (uniform, normal, bimodal, etc.) of parameters (i.e., $\theta_i$) to study the impact on the learning process. 4. **Trajectory Length ($$T$$):** We will vary the length to assess the effect on the model’s performance. 5. **Noise Levels:** Different amounts of Gaussian noise will be added to the trajectories to simulate real-world data imperfections. 6. **Loss Functions:** Alongside contrastive loss, does add a loss function for model prediction of next time stamp help performance? ## Conclusion This proposal presents a structured plan to investigate the potential of contrastive loss approach in learning system underlying parameters of time-series data. The insights gained from this research could pave the way for advancements in various fields where time-series analysis is crucial. We hope the insights from our project can contribute to the field of machine learning and its applications in time-series analysis. --&gt; </content> </entry> </feed>
\ No newline at end of file
diff --git a/bin/cibuild b/bin/cibuild
deleted file mode 100755
index d5c9e195..00000000
--- a/bin/cibuild
+++ /dev/null
@@ -1 +0,0 @@
-bundle exec jekyll build
diff --git a/bin/deploy b/bin/deploy
deleted file mode 100755
index 2953d787..00000000
--- a/bin/deploy
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env bash
-
-# Run this script to deploy the app to Github Pages
-
-# Parse cmd arguments
-
-SRC_BRANCH="master"
-DEPLOY_BRANCH="gh-pages"
-
-USAGE_MSG="usage: deploy [-h|--help] [-u|--user] [-s|--src SRC_BRANCH] [-d|--deploy DEPLOY_BRANCH] [--verbose] [--no-push]"
-
-while [[ $# > 0 ]]; do
-    key="$1"
-
-    case $key in
-        -h|--help)
-        echo $USAGE_MSG
-        exit 0
-        ;;
-        -u|--user)
-        SRC_BRANCH="source"
-        DEPLOY_BRANCH="master"
-        ;;
-        -s|--src)
-        SRC_BRANCH="$2"
-        shift
-        ;;
-        -d|--deploy)
-        DEPLOY_BRANCH="$2"
-        shift
-        ;;
-        --verbose)
-        set -x
-        ;;
-        --no-push)
-        NO_PUSH="--no-push"
-        ;;
-        *)
-        echo "Option $1 is unknown." >&2
-        echo $USAGE_MSG >&2
-        exit 1
-        ;;
-    esac
-    shift
-done
-
-# Exit if any subcommand fails
-set -e
-
-echo "Deploying..."
-echo "Source branch: $SRC_BRANCH"
-echo "Deploy branch: $DEPLOY_BRANCH"
-
-read -r -p "Do you want to proceed? [y/N] " response
-if [[ ! $response =~ ^([yY][eE][sS]|[yY])+$ ]]
-then
-    echo "Aborting."
-    [[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1
-fi
-
-# Check if there are any uncommitted changes
-if ! git diff-index --quiet HEAD --; then
-    echo "Changes to the following files are uncommitted:"
-    git diff-index --name-only HEAD --
-    echo "Please commit the changes before proceeding."
-    echo "Aborting."
-    [[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1
-fi
-
-# Check if there are any untracked files
-if ! test -z "$(git ls-files --exclude-standard --others)"; then
-    echo "There are untracked files:"
-    git ls-files --exclude-standard --others
-    echo "Please commit those files or stash them before proceeding."
-    echo "Aborting."
-    [[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1
-fi
-
-# Switch to source branch (creates it if necessary from the current branch)
-if [ `git branch | grep $SRC_BRANCH | tr ' ' '\n' | tail -1` ]
-then
-    git checkout $SRC_BRANCH
-else
-    git checkout -b $SRC_BRANCH
-fi
-
-# Checkout DEPLOY_BRANCH branch
-if [ `git branch | grep $DEPLOY_BRANCH` ]
-then
-  git branch -D $DEPLOY_BRANCH
-fi
-git checkout -b $DEPLOY_BRANCH
-
-# Export JEKYLL_ENV=production
-export JEKYLL_ENV=production
-
-# Build site
-bundle exec jekyll build
-
-# Delete and move files
-find . -maxdepth 1 ! -name '_site' ! -name '.git' ! -name 'CNAME' ! -name '.gitignore' -exec rm -rf {} \;
-mv _site/* .
-rm -R _site/
-
-# Create `.nojekyll` file (bypass GitHub Pages Jekyll processing)
-touch .nojekyll
-
-# Push to DEPLOY_BRANCH
-git add -fA
-git commit --allow-empty -m "$(git log -1 --pretty=%B) [ci skip]"
-[[ ${NO_PUSH} ]] || git push -f -q origin $DEPLOY_BRANCH
-
-# Move back to SRC_BRANCH
-git checkout $SRC_BRANCH
-
-echo "Deployed successfully!"
-
-exit 0
diff --git a/bin/docker_build_image.sh b/bin/docker_build_image.sh
deleted file mode 100755
index fe208afb..00000000
--- a/bin/docker_build_image.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-  FILE=Gemfile.lock
-if [ -f "$FILE" ]; then
-    rm $FILE
-fi
-  docker build -t "al-folio:latest" . 
\ No newline at end of file
diff --git a/bin/docker_run.sh b/bin/docker_run.sh
deleted file mode 100755
index a59a44ba..00000000
--- a/bin/docker_run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-FILE=Gemfile.lock
-if [ -f "$FILE" ]; then
-    rm $FILE
-fi
-docker run --rm -v "$PWD:/srv/jekyll/" -p "8080:8080" \
-                    -it al-folio:latest bundler  \
-                    exec jekyll serve --watch --port=8080 --host=0.0.0.0 
\ No newline at end of file
diff --git a/bin/dockerhub_run.sh b/bin/dockerhub_run.sh
deleted file mode 100755
index 0c12bb23..00000000
--- a/bin/dockerhub_run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-FILE=Gemfile.lock
-if [ -f "$FILE" ]; then
-    rm $FILE
-fi
-docker run --rm -v "$PWD:/srv/jekyll/" -p "8090:8090" \
-                    -it amirpourmand/al-folio:v0.7.0 bundler  \
-                    exec jekyll serve --watch --port=8090 --host=0.0.0.0 
diff --git a/bin/entry_point.sh b/bin/entry_point.sh
deleted file mode 100644
index 062f2c64..00000000
--- a/bin/entry_point.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-CONFIG_FILE=_config.yml 
-
-/bin/bash -c "rm -f Gemfile.lock && exec jekyll serve --watch --port=8080 --host=0.0.0.0 --livereload --verbose --trace --force_polling"&
-
-while true; do
-
-  inotifywait -q -e modify,move,create,delete $CONFIG_FILE
-
-  if [ $? -eq 0 ]; then
- 
-    echo "Change detected to $CONFIG_FILE, restarting Jekyll"
-
-    jekyll_pid=$(pgrep -f jekyll)
-    kill -KILL $jekyll_pid
-
-    /bin/bash -c "rm -f Gemfile.lock && exec jekyll serve --watch --port=8080 --host=0.0.0.0 --livereload --verbose --trace --force_polling"&
-
-  fi
-
-done
\ No newline at end of file
diff --git a/blog/2022/Recovering-Latent-Variables-with-VAEs-despite-Training-Bias/index.html b/blog/2022/Recovering-Latent-Variables-with-VAEs-despite-Training-Bias/index.html
new file mode 100644
index 00000000..8d0aafc8
--- /dev/null
+++ b/blog/2022/Recovering-Latent-Variables-with-VAEs-despite-Training-Bias/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Recovering Latent Variables with VAEs despite Training Bias | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Final Project Blog"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2022/Recovering-Latent-Variables-with-VAEs-despite-Training-Bias/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Recovering Latent Variables with VAEs despite Training Bias",
+      "description": "Final Project Blog",
+      "published": "December 1, 2022",
+      "authors": [
+        {
+          "author": "Anonymous",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Recovering Latent Variables with VAEs despite Training Bias</h1> <p>Final Project Blog</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background">Background</a></div> <ul> <li><a href="#data">Data</a></li> <li><a href="#training">Training</a></li> </ul><div><a href="#related-work">Related Work</a></div> <div><a href="#set-up-and-methods">Set-up and Methods</a></div> <div><a href="#results">Results</a></div> <ul> <li><a href="#training-observations">Training Observations</a></li> <li><a href="#evaluation">Evaluation</a></li> </ul><div><a href="#conclusion-and-future-work">Conclusion and Future Work</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>In this age of abundant unlabeled data, unsupervised learning is capitalizing to power the generative models that are eating the world. Large Language Models and Diffusion models are focalizing most of the mainstream hype and therefore siphoning attention from other generative models. In particular, the Variational Autoencoders (VAE) is a model architecture that has been arguably overlooked in the recent onslaught for scaling transformer and diffusion-based models. VAEs are a promising latent variable model that allows for the learning of disentangled latent variables that model data.</p> <p>As models scale in size, so is concern for the lack of interpretability associated with Neural Networks. Latent variable models offer a solution to this problem since they can learn variables that factorize the data generation process. VAEs are particularly well suited for learning latent variables in an unsupervised setting since they use an unsupervised learning objective and are regularized to learn disentangled encodings of our data. VAEs have been applied in a broad breadth of settings, such as classifying out-of-distribution data <d-cite key="xiao2020likelihood"></d-cite>, fair decision making <d-cite key="10.1145/3287560.3287564"></d-cite>, causal inference <d-cite key="louizos2017causal"></d-cite>, representation learning, data augmentation, and others. Although VAEs have demonstrated the capability to recover ground truth latent variables, they often recover mock factors that can generate the training dataset but differ mechanistically from the ground truth data generation process. For instance, in lecture we demonstrated that a VAE trained on cartoon images of rivers learned to encode aggregate river curvature as a latent variable. The ground-truth data-generating random variables were an ordered set of Bernoulli random variables indicating if the river angeled its trajectory to the left or to the right at the particular timestep. The VAE’s shortcoming in recovering the real latent variables is expected from a Bayesian perspective, since we assume an isotropic Gaussian prior for continuous latent variables, and impose a bottleneck on the number of latent variables. Even though we do not recover the ground-truth data generating random variables, we learn latent variables that are qualitatively useful and capture macro latent phenomenons about the data. This segways into an interesting question—when do VAEs fail to recover useful latent variables?</p> <p>In particular, we will choose the setting in which our training data is biased, but we still seek to learn insightful representations of the data. This is an especially well-motivated setting, since in unsupervised learning, we often do not have any guarantees about the distribution of our training data, yet we still aim to learn generalizable latent variables. It would be ideal if VAE’s ability to recover generalizable latent variables is robust to training bias. Relating to the cartoon example from lecture, if the probability parameter for the data-generating random variables was skewed so that right-curving rivers are more likely (i.e. \(P(\text{right}) = 0.9\) instead of \(P(\text{right}) = 0.5\)), would we still learn useful latent variables, or would latent variables instead model what we assume to be observational noise? If we learn the former, then we would still be able to sample in latent space to generate left-curving rivers. Intuitively, we will not be able to generate samples out of distribution with the training data (i.e. left curving rivers), however this may not be the case due to the way VAEs assume a prior. In this project, we will examine this setting to determine if higher regularization of the prior increases model robustness to training bias.</p> <h2 id="background">Background</h2> <p>VAEs are useful as encoders for downstream tasks, and as generative models. Compared to vanilla autoencoders, they offer significant advantages, since they provide some assurances regarding the distribution of its latent variables. Unlike VAEs, standard Autoencoders can have arbitrarily distributed embeddings, making them poor generative models, since there is no straightforward way to sample in latent space so that we generate samples in distribution with our training data. VAEs are similar to standard Autoencoders, however, they are trained with a modified loss function that ensures the learned embedding space is regularized towards an isotropic Gaussian (there exist alternative choices regarding which distribution we regularize towards, but Gaussian Mixture Models are the most popular as it stands due to their simple parameterization and empirical success). Additionally, instead of simply compressing the input with a neural network during the forward pass, the encoder of a VAE outputs a mean and covariance, defining a distribution from which we sample to obtain our latent variables.</p> <p>Since the VAE loss function regularizes our latent variables towards an isotropic Gaussian, encoded data is both disentangled and interpretable. To use trained VAEs as generative models, we simply sample latent variables i.i.d. from the Gaussian distribution and pass it through the VAE decoder to generate samples in distribution with our training data. VAEs also offer significant advantages as encoders, since regularization encourages them to learn factored, disentangled representations of the data. Finally, VAEs are particularly well-suited for interpretability since regularization encourages each latent variable to capture a unique aspect of the data.</p> <h2 id="related-work">Related Work</h2> <p>There has been significant prior work studying regularization and choice of priors in VAEs. Notably, \(\beta\)-VAE <d-cite key="higgins2017betavae"></d-cite> introduces the beta parameter to control the degree to which the VAE loss function penalizes the KL divergence of the latent variable distribution with the chosen prior (an isotropic Gaussian in their case). Higgins et al. demonstrate that introducing the beta parameter allows the VAE encoder to learn quantitatively more disentangled latent variables. They introduce a novel quantitative metric to evaluate the disentanglement of latent space and show that \(\beta\)-VAE improves on existing methods. Furthermore, they train a \(\beta\)-VAE on a dataset of faces (celebA) and qualitatively show that \(\beta\) regularization allows for the factorization of previously entangled latent variables such as azimuth and emotion.</p> <p>There have been several iterations on \(\beta\)-VAE such as Factor-VAE <d-cite key="kim2019disentangling"></d-cite>. Kim and Mnih point out that although \(\beta\) regularization improves disentanglement in embedding space, it does so at the cost of reconstruction quality. To reduce this trade-off and still encourage disentanglement, they introduce a term to the VAE loss function that penalizes the KL divergence between the joint distribution and the product of the marginals, instead of with an isotropic Gaussian as in \(\beta\)-VAE.</p> <p>Selecting an appropriate data prior is fundamental when performing Bayesian inference. In vanilla VAEs, we often assume an isotropic Gaussian prior for our latent variables, however, this is not always a good assumption, making it difficult to converge <d-cite key="miao2022on"></d-cite>. Miao et al. propose InteL-VAE, a VAE architecture capable of learning more flexible latent variables that can satisfy properties such as sparsity even when the data has significant distributional differences from a Gaussian. Their contributions allow for higher customizability of latent variables while bypassing many of the convergence issues commonplace with other methods that assume non-Gaussian priors.</p> <p>Since that under ideal conditions, VAEs recover factorized latent variables, causal inference has become a standard setting for their application. Madras et al. propose structured causal models to recover hidden “causal effects” with the aim of improving fairness when presented with biased data <d-cite key="10.1145/3287560.3287564"> </d-cite>. They specify a framework where we want to recover the latent factors so that decision making in applications such as loan assignment and school admissions can be approached fairly. Admiddetly, Structured Causal Modeling (CSM) is arguably a better setting for futher work on our proposed research question. However, this field is largely outside of the scope of the course, so we will only observe that Madras et al. utilyze a model where causal factors, which are analaguous to our ground truth latent variables, affect a decision and an outcome, and that they utilyze a Bayesian framework to perform variational inference. Future iterations of our research should borrow methods from this field of Mathematics for maximum impact. Louizos et al. propose the Causal Effect VAE <d-cite key="louizos2017causal"></d-cite>, marrying the adjacent fields and setting the stage for future research.</p> <p>Although there is plenty of research adjacent to our particular question of interest, \(\beta\)-VAE investigates how \(\beta\)-regularization affects disentanglement, but not robustness to training bias. Other works that investigate the ability of latent variable models to recover the ground truth in the presence of training bias are not concerned with \(\beta\)-regularization. \(\beta\)-regularization has been shown to be effective, in addition to being extremely simple to implement, compared to other regularization techniques. Thus it is an ideal candidate for directed research on how regularization affects VAE robustness to training bias. Our question is novel, supported by adjacent research, and reasonable to implement with the resources available to an undergraduate student.</p> <h2 id="set-up-and-methods">Set-up and Methods</h2> <h3 id="data">Data</h3> <p>More concretely, suppose that there exists a data generating function \(\mathcal{G}: Z \to X\) that generates our training dataset given random variables \(Z \sim p_{\text{data}}\). For simplicity, our data will be nxn grids of squares, where the intensity of each square is deterministically proportional to its respective random variable. To create our training dataset, we sample \(n^2\) random variables from an isotropic Gaussian distribution with mean \(\mu\) and covariance I. We then apply a sigmoid activation to the random variables so that values are in the range [0,1]. We then create a mn x mn image with mxm pixel grids for each random variable. Finally, we add Gaussian noise to the image. We choose n=3, m=7, and train a VAE for each value of \(\mu\) in the set {0, 1/2, 1, 3/2, … 5}.</p> <h4 id="training-data">Training Data</h4> <p>The following figure shows example training images before noising. Each row has 21 images drawn from the distribution defined by applying a sigmoid activation to a normally-distributed random variable with variance 1 and mean specified by the row index.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>And here are some images with some noise added.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training%20noised-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training%20noised-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training%20noised-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20training%20noised.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h4 id="test-dataset">Test Dataset</h4> <p>To create our test dataset, we discretize the domain of latent variables by binning. We then enumerate all possible combinaation of latent variables, and generate corresponding images without adding noise. We restict the domain generating variables to {0.1, 0,5, 0.9}, and enumerate all possible combination. This yields a test dataset of 19683 images.</p> <h5 id="example-test-images">Example Test Images</h5> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20test%20images-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20test%20images-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20test%20images-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/example%20test%20images.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="training">Training</h3> <p>With this setup, the structure of our latent space matches that of the ground-truth latent variables, creating an appropriate setting in which to test how training bias and regularization affect the quality of learned models. Our pipeline is as follows. We train a VAE on its associated training set by maximizing the ELBO. After T training steps, we then train a linear projection head from the ground-truth latent variables to our learned latent variables. Even if we fully recover the ground-truth latent variables in our model, there is no assurance that we will not learn some permutation of the ground-truth latent variables. Thus in order to test if a particular latent variable was learned in our model, we must utilize such a projection to map from ground truth to learned latent variables, then decode the sample and evaluate the generated image.</p> <p>Although the Mutual Information between the ground truth latent variables \(z \sim p_z\) and the learned latent variables \(\hat{z} \sim p_\hat{z}\) would be a more encompassing gauge if the VAE recovered the latent variables, using a linear projection in lieu of a Mutual Information estimator such as MINE <d-cite key="belghazi2021mine"></d-cite> is justified for the following reasons. Namely, we assume an isotropic Gaussian during training, so a good VAE will learn disentangled latent variables that will be off by at most a rotation from the ground truth latent variables. Furthermore, we control the data generation process so that data is generated by \(n^2\) normally distributed random variables. Thus we can assume that a linear projection is sufficient to recover the ground truth latent variables from our learned latent variables. Furthermore, given the time constraints and resources allocated for this project, simply training a linear projection and taking the final mean squared error as a proxy for mutual information allows for simpler implementation.</p> <p>We train with the Adam optimizer.</p> <table> <thead> <tr> <th>Hyperparameter</th> <th style="text-align: right">Value</th> </tr> </thead> <tbody> <tr> <td>VAE training steps</td> <td style="text-align: right">10000</td> </tr> <tr> <td>Linear Projection Training Epochs</td> <td style="text-align: right">3</td> </tr> <tr> <td>Training noise mean</td> <td style="text-align: right">0</td> </tr> <tr> <td>Training noise variance</td> <td style="text-align: right">0.25</td> </tr> </tbody> </table> <h2 id="results">Results</h2> <h3 id="training-observations">Training Observations</h3> <p>During the unsupervised training phase where we train the various VAE models on their respective training sets, we observe that dataset choice and penalization of the KL divergence (beta hyperparameter) have consistent effects on the training curves. The following charts demonstrate that increased penalization of the KL divergence results in higher training loss, as well as nosier training loss and longer convergence times. This is expected since higher regularization directly increases the loss and its associated noise. We approximate the KL divergence by drawing one sample, which is highly variable, but tends to work emperically. We also observe that higher training bias (i.e. higher pre-activation mean of the pre-activation data generating latent variables) results in higher training loss. As we increase this training bias, it becomes harder and harder to disambiguate latent features from noise. Thus models learn uninterpretable latent variables and poor decoders that learn to trivially output the dominating color (white).</p> <div class="row mt-3"> <div class="col-md mt-3 mt-md-0"> <h6>Training Curves Varying Training Distribution</h6> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/beta%20=%201-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/beta%20=%201-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/beta%20=%201-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/beta%20=%201.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md mt-3 mt-md-0"> <h6>Training Curves Varying $\beta$-Regularization</h6> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/mu=%5B0%5D%20training%20curves-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/mu=%5B0%5D%20training%20curves-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/mu=%5B0%5D%20training%20curves-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/mu=%5B0%5D%20training%20curves.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <h3 id="evaluation">Evaluation</h3> <p>The following figure shows a heat map of our Proxy for measuring Mutual Information (which we will refer to as PMI) between the learned latent variables \(\hat{Z}\) and the true latent variables \(Z\).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20projection%20head-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20projection%20head-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20projection%20head-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20projection%20head.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Note that when we randomly initialized a VAE and then trained linear projections from the ground truth latents to recovered latents, we achieved an PMI 0.1121 (averaged over 3 runs with identical training parameters). The heatmap shows that we almost completely recovered the ground-truth latent variables with low regularization and low training bias. As training bias increases, the model recovers less and less informative representations of the true latent variables.</p> <p>Another heuristic that we can utilize to estimate the Mutual Information between the recovered latents and the ground truth latents is the mean squared error between \(\mathcal{G}(z)\) and \(\mathcal{D}_\text{VAE}(P(z))\) averaged over our test set, where P is the learned linear projection from \(Z \to \hat{Z}\) and \(\mathcal{D}_\text{VAE}\) is the VAE decoder. The following figure heatmap visualizes this figure.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20generating%20on%20test%20set-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20generating%20on%20test%20set-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20generating%20on%20test%20set-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias/MSE%20generating%20on%20test%20set.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="conclusion-and-future-work">Conclusion and Future Work</h2> <p>From the collected data, it is visually clear that there exists a relationship between \(\beta\)-regularization and training bias. In both heat maps, there are reasonably well-defined diagonal level surfaces, indicating that there is some relationship between regularisation towards an isotropic Gaussian prior and robustness to training bias. Validation and further experiments are required to legitimize this conclusion, however, these experiments are an indication that conscious regularization can be a useful technique to mitigate training biases of a particular form. At this point, further work is required to interpret the results, since it is not clear why we seem to observe inverse relationships between the \(\beta\)-regularization and training bias when we involve the decoder.</p> <p>It is also worth noting that during pretraining, VAEs were trained for a fixed number of training steps, and not until convergence. Thus it is highly plausible that models with higher \(\beta\)-regularization (i.e. models with \(\beta &gt; 1\)) were not trained to completion, and therefore can not be fairly evaluated with mutual information estimators without further training. Given my computational and temporal constraints, it was not reasonable to run experiments with longer training. Future work will have to validate my findings by pretraining for longer and testing a finer resolution of \(\beta\) parameters. Finally, it will be interesting to expand this work to more interesting datasets such as celebA and inject training bias by resampling the dataset according to some variables such as hair color or skin tone. Once we move beyond the assumptions assured by pet data, we can reevaluate what relationships hold true as we gradually add the complexity inherent to the real world.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-Recovering%20Latent%20Variables%20with%20VAEs%20despite%20Training%20Bias.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2022/VIVFormer/index.html b/blog/2022/VIVFormer/index.html
new file mode 100644
index 00000000..c645e22a
--- /dev/null
+++ b/blog/2022/VIVFormer/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>VIVformer | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A deep transformer framework trained on real experimental and synthetic gen-AI data for forecasting non-stationary time-series. Applications and insights drawn from vortex induced vibrations data collected at the MIT Towing Tank."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2022/VIVFormer/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "VIVformer",
+      "description": "A deep transformer framework trained on real experimental and synthetic gen-AI data for forecasting non-stationary time-series. Applications and insights drawn from vortex induced vibrations data collected at the MIT Towing Tank.",
+      "published": "December 1, 2022",
+      "authors": [
+        {
+          "author": "Andreas Mentzelopoulos",
+          "authorURL": "https://scholar.google.com/citations?user=0SOhn-YAAAAJ&hl=en",
+          "affiliations": [
+            {
+              "name": "PhD Candidate in Mechanical Engineering and Computation, MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>VIVformer</h1> <p>A deep transformer framework trained on real experimental and synthetic gen-AI data for forecasting non-stationary time-series. Applications and insights drawn from vortex induced vibrations data collected at the MIT Towing Tank.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction-motivation">Introduction & Motivation</a></div> <div><a href="#data-description">Data Description</a></div> <div><a href="#vibration-data-as-images">Vibration Data as Images</a></div> <div><a href="#gen-ai-for-physical-vibration-data-using-variational-autoencoders">Gen-AI for Physical Vibration Data using Variational Autoencoders</a></div> <div><a href="#vivformer-a-transformer-architecture-for-vortex-induced-vibrations">VIVformer - A Transformer Architecture for Vortex-Induced Vibrations</a></div> <ul> <li><a href="#the-real-data-deal">The Real (data) Deal</a></li> <li><a href="#the-hyper-real-gen-ai-data-deal">The hyper-Real (Gen-AI data) Deal</a></li> </ul><div><a href="#conclusions">Conclusions</a></div> </nav> </d-contents> <h2 id="introduction--motivation">Introduction &amp; Motivation</h2> <p>Vortex induced vibrations (VIV) are vibrations that affect bluff bodies in the presence of currents. VIV are driven by the periodic formation and shedding of vortices in the bodies’ wakes which create an alternating pressure variation causing persistent vibrations <d-cite key="triantafyllou2016vortex"></d-cite>. The vibration amplitude in VIV is typically moderate, not exceeding about one to two body diameters <d-cite key="bernitsas2019eigen"></d-cite>. For flexible bodies, VIV are not uniform along the body’s length (usally refered to as the span) but rather different points along the span vibrate with different amplitudes and phases.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Intro-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Intro-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Intro-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/Intro.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Intro2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Intro2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Intro2-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/Intro2.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Schematic diagrams of vortex induced vibrations of flexible bodies. </div> <p>Observations of vortex induced vibrations (VIV) date back to antiquity, when the Aeolian tones, sounds created by pressure fluctuations induced by winds passing over taut strings were recognized. The first sketches of vortices date back to Leonardo da Vinci in the early 16th century. Today, VIV have become a problem of interest to both theoreticians, due to the complex underlying mechanisms involved, and engineers, due to the practical significance of mitigating the fatigue damage VIV can cause to offshore structures and equipment such as marine risers and offshore wind turbines. In order to gain some intuition, the reader can refer to the video of a flexible body undergoing VIV in section “Data Description” (below).</p> <p>The underlying driving mechanism of VIV is vortex formation; specifically, the periodic shedding of vortices formed in the wake behind bluff bodies placed within cross-currents <d-cite key="triantafyllou2016vortex"></d-cite>. The vortex shedding frequency in rigid bodies is known as the Strouhal frequency. For flexibly mounted or flexible bodies, the vortex formation frequency can be entrained away from the Strouhal frequency and coincides with the frequency of vibration in a phenomenon known as lock-in <d-cite key="navrose_mittal_2016"></d-cite>. This occurs across a wide range of oscillating frequencies resembling a nonlinear resonance <d-cite key="park2016suppression"></d-cite>. Given that flexible body VIV are not span-wise uniform as the flexible body undergoes a spatially traveling and/or standing wave response from the forcing excerted by the fluid <d-cite key="wang2021illuminating, triantafyllou2016vortex, fan2019thesis"></d-cite>, the observed motions are nonstationary, unsteady, and can transition to different responses even for seemingly unchanged experimental conditions.</p> <p>VIV of flexible bodies are usually modelled by leveraging the modal decomposition technique (i.e. using a Fourier expansion of sinusoidal mode shapes with time varying coefficients), similar to the approach introduced for modelling vibrating shafts and beams <d-cite key="rao1995mechanical"></d-cite>. Recently, Kharazmi et al. (2021) <d-cite key="kharazmi2021data"></d-cite> attempted to learn the mode shapes and time varying coefficients using LSTM networks in modal space (LSTM-Modnet); Mentzelopoulos et al. (2023) <d-cite key="mentzelopoulos2023physics"></d-cite> proposed learning a sparse-mode set of sinusoidal modes along with the corresponding time-varying coefficients. Both frameworks suffer from the inability to robustly forecast future motions.</p> <p>Although leveraging transformers to expand the horizon of predictions of time series is a very active field of research <d-cite key="zhou2021informer, zeng2023transformers, liu2022non, zhou2022fedformer"></d-cite>, transformers have not yet been used to predict VIV of flexible bodies, which are physical non-stationary time-series, to the best of the author’s knowledge. In addition, only limited work has been performed in generating physical data using generative models <d-cite key="zhong2023pi, takeishi2021physics, shu2023physics"></d-cite> and there are no identifiable applications to VIV in the literature. Mostly appications of machine learning for VIV include solving physical equations using physics-informed neural networks (PINNs) <d-cite key="bai2022machine, raissi2019deep"></d-cite> and learning hydrodynamic coefficients or other relevant quantities <d-cite key="ma20221understanding"></d-cite> in order to predict the motions on average rather than instantaneously <d-cite key="ma2021enhancing, rudy2021learning"></d-cite>.</p> <p>In this work, an attempt will be made to develop a transformer network architecture to predict the VIV of a flexible body both instantaneously and on average. The transformer will be trained and tested using data collected at the MIT Towing Tank by the author. In addition, in order to make the most of the available data, a variational autoencoder (VAE) will be trained to generate more VIV samples which will then be used to train the transformer. In doing so, the capability of VAEs to create physical data which retain information of the underlying physical processes will also be examined. The rest of the blog will be organized as follows: 1. using generative-AI, specifically variational autoencoders, in order to generate physical VIV data 2. using transformers to model and forecast nonstationary flexible body VIV.</p> <h2 id="data-description">Data Description</h2> <p>All data used for this study were collected during experiments conducted by the author at the MIT Towing Tank, a facility consisting of a 35m x 2.5m x 1.2m water tank equipped with a towing carriage capable of reaching speeds exceeding 2 m/s as well as a flow visualization window. In this and the following sections the terms model, riser, flexible body, and flexible cylinder will be used interchangeably to refer to the flexible cylinder model used during experiments.</p> <p>The figure below illustrates the experimental setup schematically. A solid aluminum frame was used to support the flexible cylinder; the riser model was placed vertically at the center of the structure. An ATI 6 degree of freedom force sensor was attached to the top end of the riser to measure its tension. Two GoPro Hero 11 cameras were attached to the supporting frame facing perpendicular directions to capture videos of the riser’s motion in the cross-flow and in-line directions, respectively.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/full_schema_experiment-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/full_schema_experiment-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/full_schema_experiment-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/full_schema_experiment.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Schematic of experimental setup. A riser model was placed at the center of a supporting frame which was towed resulting in a uniform incoming flow profile on the model. </div> <p>The riser model was constructed out of urethane rubber infused with tungsten powder. Specifically, Smooth-On PMC-724 urethane rubber was mixed with powdered tungsten to increase the model’s density and achieve a mass-ratio $m^* = \frac{\rho_{model}}{\rho_{H_2O}} = 3$. The mixture was poured into a right cylindrical mold with a fishing line placed along its centerline to provide tension. The model’s length was 890 mm with a 5 mm diameter. The length-to-diameter ratio of the model riser was L/D = 178. Equidistant markers were spray-painted red on the riser model resembling a zebra-patterning to enable motion tracking using cameras. Three underwater light fixtures were used to enhance visibility underwater. The model’s ends were clamped on the supporting frame and the model was separated from the frame by a distance much greater than the body’s diameter $O( &gt; 10D)$.</p> <p>The flexible cylinder was towed at 0.7 m/s resulting in a uniform incoming flow profile along the x direction, as shown in the schematic above. Recordings of the motions were captured at a resolution of 1080p (1920x1080 pixels) and 120 fps. The Reynolds number was $ Re \approx 3,500$. A visualization of the vibration is shown below (this is a gif of the actual vibration recording downsampled in time).</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/viv.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/viv.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/viv.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/viv.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Sample video recording of the cross-flow vibration of the flexible body (top) and tracking result of the motion (bottom). </div> <p>Reconstruction of the motion was done using a machine vision framework leveraging Kalman filtering for multi-object tracking; for more information one may refer to Mentzelopoulos et al. (2024) <d-cite key="mentzelopoulos2024reconstructing"></d-cite>. The high level process is shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Image_Processing.PNG-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Image_Processing.PNG-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Image_Processing.PNG-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/Image_Processing.PNG" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Pipeline for motion reconstruction. Red markers on the body were tracked across the video frames to obtain their motion. </div> <p>A total of 36 locations along the span were marked red on the fexible body and their positions were tracked. The endpoints were fixed on the supporting frame and thus their displacement was zero.</p> <h2 id="vibration-data-as-images">Vibration Data as Images</h2> <p>The displacement of the vibrating body was recorded at 36 uniformly spaced locations along the body’s span and the video recordings were sampled at 120 fps. One may store the vibration data as 2D arrays of $N_{time}$ x $N_{sensor}$, where each row corresponds to a different time of the vibrating body’s displacement at $N_{sensor}$ locations.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/pixelated_viv-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/pixelated_viv-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/pixelated_viv-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/pixelated_viv.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/interpolated_viv-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/interpolated_viv-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/interpolated_viv-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/interpolated_viv.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Visualization of experimental data. The x-axis corresponds to sensor number (i.e. body location) and the y-axis corresponds to time step (i.e. $\Delta t = 1/fps = 1/120 \ sec$). The displacement normalized by the body's diameter is highlighted on the plane. On the left we visualize the vibration data stored in 2D arrays. On the right, we plot the interpolated values which make more intuitive sense as visualized flexible body vibrations (travelling wave response with peaks travelling to the right from location 0 to location 36). Lines parallel to the x-axis are "snapshots" of the flexible body vibrting in the direction perpendicular to the paper/screen (i.e. "in and out of the page"). </div> <p>The stored vibration data are illustrated above and can easily be visualized and treated like single channel images! If necessary, scaling pixel values invertibly to an interval of choice, like [0,1] or [0, 255] requires just a few operations leveraging the maximum and minimum values of the data. In the images shown above, each row corresponds to a different time of the recorded vibration at all sampled locations. The time difference between consecutive time steps is $\Delta t = 1/fps = 1/120 \ sec$. The 36 “sensor locations” correspond to the uniformly spaced markers on the body (excluding the two endpoints) and thus they span approximately the full body length. Plotting the interpolated values of the array yileds a more intuitive interpretation of the vibrations. In the data shown above, a travelling wave (crests travelling) from location 0 to location 35 can be identified. For convenience, the data were stored in a single 4D array of size $N_{batch}$ x $1$ x $N_{time}$ x $N_{sensor} = N_{batch}$ x $1$ x $36$ x $36$, yielding hundreds of square arrays of size 36 x 36 which can be easily visualized and collected in batches for training models.</p> <h2 id="gen-ai-for-physical-vibration-data-using-variational-autoencoders">Gen-AI for Physical Vibration Data using Variational Autoencoders</h2> <p>In this section we focus on generating physical vibration data using generative-AI. We will attempt using a variational autoencoder (VAE) trained on the real experimental data described above to generate syntehtic data of the vibrations. We are interested in understanding whether the generated data preserve physicality and thus whether they can be used to train models and to understand the underlying physical generative process by studying the artificial data.</p> <p>A VAE is a specific network architecture whose goal is to learn a probabilistic mapping from an input space to a low dimensional latent space and then back to the input space. The network architecture is comprised of an encoder network which maps data from the input space to the latent space and a decoder network which maps data from the latent space back to the input space. A schematic of the VAE used for this work is shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/VAE.PNG-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/VAE.PNG-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/VAE.PNG-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/VAE.PNG" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Schematic of the variational autoencoder architecture. </div> <p>On a high level, the variational autoencoder acts just as a regular autoencoder, with the difference that the training ensures that the distribution of the data in the latent space is regular enough to enable a generative process when sampling from the latent space. That is, the minimized loss ensures that the distribution of the data over the latent dimensions, $q(z \mid x)$, is as close to a standard normal distribution as possible. We choose to assume a Gaussian prior on the latent space for our data since we will need to sample from it when decoding, a task which is nontrivial for arbitrary distributions. The decoder on the other hand will learn the distribution of the decoded variables, $p(x \mid z)$ given their latent representations.</p> <p>The encoder architecture of choice was the following, accepting an input $x \in R^{36 \times 36}$:</p> <ol> <li>$x \rightarrow Linear (R^{36 \times 36}, R^{64}) \rightarrow ReLU \rightarrow Linear(R^{64}, R^{64}) \rightarrow ReLU \rightarrow x_{embedding}$</li> <li>$x_{embedding} \rightarrow Linear(R^{64}, R^{5}) \rightarrow ReLU \rightarrow \mu \in R^5$</li> <li>$x_{embedding} \rightarrow Linear(R^{64}, R^{5}) \rightarrow ReLU \rightarrow \sigma \in R^5$</li> </ol> <p>where $\mu$ and $\sigma$ are the mean and variance of the posterior data distribution in the latent space. The decoder architecture was as follows accepting an input $z \in R^5$:</p> <ol> <li>$z \rightarrow Linear(R^{5}, R^{64}) \rightarrow ReLU \rightarrow Linear(R^{64}, R^{36 \times 36}) \rightarrow ReLU \rightarrow x^\prime$</li> </ol> <p>Training was done by maximizing the evidence lower bound (ELBO) on the experimental data and the outputs of the autoencoder. This is equivalent to minimizing the following loss (negative of ELBO).</p> <p>$Loss_{ELBO} = - E_{q(z \mid x)} \bigg[ \log p(x\mid z) - D_{KL}(q(z \mid x )\mid \mid q(z)) \bigg]$</p> <p>where $D_{KL}$ referes to the Kullback-Leibler divergence. Intuitively, maximizing the ELBO or minimizing the above $Loss_{ELBO}$, aims at maximizing the log-likelihood of the data given their representations in the latent space while minimizing the Kullback-Leibler divergence between the learned posterior of the data in the latent space and the prior assumption of a Gaussian distribution in the latent space. For the purposes of training, the data were scaled to be between [0, 1] in order to use binary cross entropy. The VAE was trained using Adam optimizer with a learning rate $lr = 0.01$. A step scheduler was set to decay the step by $\gamma = 1/2$ every 2,000 iterations. The training loss as a function of epoch is shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/VAE_loss.PNG-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/VAE_loss.PNG-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/VAE_loss.PNG-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/VAE_loss.PNG" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Training loss ($Loss_{ELBO}$) for the VAE on the input real VIV data. </div> <p>Having trained the VAE, samples from the standard normal distribution in $R^5$ were drawn, decoded, and rescaled in order to generate synthetic VIV data. Three random samples are included below (top), along with three random samples of real data observed during experiments (bottom).</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_1-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_2-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_3-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/generated_VAE_3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_viv_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_viv_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_viv_1-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/real_viv_1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_viv_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_viv_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_viv_2-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/real_viv_2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_viv_3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_viv_3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_viv_3-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/real_viv_3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Top row: Generated data using the VAE. Bottom row: Real data recorded from experiments. </div> <p>Albeit the data generated data are certainly eye-pleasing, their promise begs the question of whether they preserve physicality. In order to address this question, we will examine whether a model trained on synthetic data can be used to predict real experimental data.</p> <h2 id="vivformer---a-transformer-architecture-for-viv">VIVformer - A Transformer Architecture for VIV</h2> <p>Tranformer network architectures have been widely used and are considered state of the art tools for various machine-learning tasks, particularly in natural language processing (NLP) and computer vision. The transformer architecture has become a cornerstone in deep learning and its applications span across all fields of engineering and science. In this section we will develop a transformer architecture to model and forecast the VIV of flexible bodies under the excitation of a hydrodynamic force. The transformer architecture used for this purpose is shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Transformer_architecture-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Transformer_architecture-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Transformer_architecture-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/Transformer_architecture.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> VIVformer: A transformer architecture for vortex-induced vibrations. </div> <p>As shown schematically above, the architecture is comprised by various Residual-Attention modules followed by a final linear layer. The input to the VIVformer is a batch of vibration data as discussed in previous sections “Data Description” and “Visualizing the Data” with shape $N_{batch} \times N_{time-in} \times N_{sensor}$. The data are then passed through $N_{attn-layers}$ residual attention modules (these do not affect the shape of the input) and then scaled to the desired $N_{time-out}$ yielding an $N_{batch} \times N_{time-out} \times N_{sensor}$ output.</p> <p>The residual-attention modules are the drivers of the data processing. These modules accept an input on which they perform two sequential tasks: 1. multi-head attention with a residual connection, and 2. pass the output of the multi-head attention module through a fully connected feedforward network (FFN) with a residual connection. The process can be visualized in the bottom left of the architecture schematic above.</p> <p>The multi-head attention layer is comprised of $N_{heads}$ number of attention heads which calculate the self-attention of the input as proposed by Vaswani et al. (2017) <d-cite key="vaswani2017attention"></d-cite>. The superposition of the input and output from the Multi-head attention module is then passed through the FFN. The FFN performs batch normalization, passes the output through a linear layer which sclaes the input to $mlp-dim$, then through a Gaussian Error Linear Unit (GeLU) activation and scales the output back to the original dimension by passing through a second linear layer.</p> <p>For this work, we attempt using 20 time steps of input data in order to predict a single future time step. That is, the input to the VIVformer is 20 time steps of vibration data at 36 locations and we try to predict the next time step at the same locations. We note that the VIVformer is flexible in terms of the number of data-points in and out as well as the number of time steps in and out. Decreasing the input information (both spatial and temporal) while forecasting as much as possible in terms of spatial and temporal predictions is the recommended research direction for future work.</p> <p>Although auto-regressive transformers are trending currently, for the purpose of forecasting vibrations this would lead to a pitfall of accumulating model errors and using them as inputs. In order to predict extended time horizons, simply adjusting the number of time-steps out would be the recommended course of action.</p> <p>Since we are interested in making predictions of physical vibration data, a reasonable choice for our loss function is the Mean Square Error (MSE) between predicted and observed vibrations.</p> <h3 id="the-real-data-deal">The Real (data) Deal</h3> <p>In this section, the experimental data obtained during experiments were used to train the VIVformer. Specifically, 20 times steps at 36 locations were used as input and the next time step at the same locations was forecasted. In order to train the transformer, a dataset and dataloader was created to enable iterating over the following quantities:</p> <ol> <li>Sequence_in: A 2D array of shape $N_{time-in} = 20 \times N_{sensor} = 36$.</li> <li>Target = A 2D array of shape $N_{time-out} = 1 \times N_{sensor} = 36$.</li> </ol> <p>Sequence_in refers to a single input to the VIVformer and Target is the expected output of the VIVformer. The sequences were collected in batches and then used for training. The model was trained on the MSE loss between input sequences and targets and the parameters were updated using the AdamW algorithm. The initial learning rate was set to $lr = 0.0001$ and a cosine annealing step scheduler was set to adjust the learning rate during training.</p> <p>The training data were split into 80% for training and 20% for testing/validation. The sequences and targets of the training data were shuffled randomly and split in mini-batches while the validation data were not in order preserve the continuity of the vibrations when validating (important mainly for visualization purposes). The VIVformer was trained for a total of 50 epochs. The training results are shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/real_data_VIVformer_loss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> VIVformer training and validation loss trained on expreimental VIV data. </div> <p>The training results show that the model is able to gradually decrease the MSE loss between targets and predictions. The loss on both the training set and the validation set seems to be decreasing and converging. We note that the VIVformer architecture used was heuristically optimized using a trial and error approach yielding 4 attention-residual layers, with 3 attention heads of 32 hidden units and a mlp-dim of 128 hidden units. In order to visualize the predicted vibrations, the forecasting as well as target data from a random sample of 36 continuous time steps from the validation set are shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Forecasting_real-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Forecasting_real-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Forecasting_real-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/Forecasting_real.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/targets_real-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/targets_real-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/targets_real-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/targets_real.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/difference_real-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/difference_real-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/difference_real-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/difference_real.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> From left to right: Forecasting of the VIV on a ramndomly selected set of 36 continuous points from the validation set (for visualization of the predicted vibrations), target data from real experiments, absolute difference between targets and predictions. </div> <p>As is evident from the visualized vibration predictions (above), the model can predict unseen experimental to reasonable accuracy. The expected modes are forecasted and the output is continuous. In addition, the absolute difference is almost everywhere small, although some inaccuracies do occur in the predictions. A meaningful question to ask would be how well does the model predict the root mean square (RMS) of the vibrations which gives us a sense of the prediction capabilities on average. Below we plot the RMS of the forecasted as well as the experimentally observed vibrations.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/RMS_real-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/RMS_real-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/RMS_real-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/RMS_real.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Root mean square (RMS) of the forecasted VIV as well as the experimentally observed. RMS displacement is shown on the y-axis while span (body position) is shown on the x-axis. Reasonably accurate agreement is evident between forecasting and experimental observations. </div> <p>The RMS result shown above shows that the model can predict the vibrations reasonably accurately on average. This is a particulary important result as it allows for direct benchmarking of this method against semi-empirical models which can only predict the average vibrations.</p> <p>Although this is not recommended practice as we described earlier, we attempt to make auto-regressive predictions using our model. That is, we start with 20 time steps of recorded vibrations as input and then use the model’s predictions gradually as more and more inputs. By 20 time steps, there would be no observed data input to the model; it would only be predicting on its outputs. The auto-regressive results are shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_forecasting_real.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_targets_real.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/auto_regression_difference_real.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> From left to right: Auto-regressive forecasting of the VIV on a randomly selected set of 20 continuous points from the validation set, target data from real experiments, absolute difference between targets and predictions. We show 20 time steps as we assess the models capability to predict as many time steps as it uses as input. </div> <p>Albeit the mode shapes are consistent and remain physical looking, it appears that the magnitude of the response grows with time. As expected, errors accumulate and the forecasting becomes more and more innacurate as time evolves. This can also be clearly visualized in the absolute difference plot (on the very right) where the difference increases with time.</p> <p>In conclusion, with respect to training on real data, the transformer is reasonably accurate in terms of forecasting future motions given a sample of the experimental data. The model trains well on the MSE loss and seems to converge in about 50 epochs. The wall time of training does not exceed a few minutes on a Google-Colab T4 GPU machine.</p> <h3 id="the-hyper-real-gen-ai-data-deal">The hyper-Real (Gen-AI data) Deal</h3> <p>So far we have established that the VIVformer architecture can model the physical VIV of flexible bodies reasonably accurately. This section will mainly focus on addressing the question of whether synthetic VIV data generated using our VAE are physical: that is, whether the physical properties of the vibrations are preserved during the generative process. In order to address this question, we will train the VIVformer on synthetic data only and then test the trained model on the real data.</p> <p>Sixty arrays of 36 time steps at 36 locations (this can be though of as generating 60 images similar to the ones shown in previous section “Vibration Data as Images”) were generated using the VAE trained on real experimental data. The synthetic VIV data were then organized in input and target sequences by creating a dataset and dataloader to train the VIVformer. Training was done exactly as described in section “The Real (data) Deal” with the only difference being the training data; in this case training data were only synthetic. The same split of 80% for training/validation was used on the synthetic data. The training results were as follows.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/synthetic_VIVformer_loss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> VIVformer training and validation loss trained on synthetic VIV data. </div> <p>The VIVformer architecture seems to train on the sythetic data well. We note that both the training and validation data are from the synthetic dataset and as such we expect that they should be very similar data. We train for 50 epochs and the results seem to reach convergence. In this case we note that the error on the validation set (calculated during each epoch after optimizing on the VIVformer on the training set) seems to be be consistently smaller than the error on the training set (on average). We expect that eventually the training loss would become smaller than the validation loss although more training epochs would be required, perhaps leading to overfitting our model. Given the training results, we can be confident that the VIVformer has learned to predict the synthetic data well.</p> <p>The more important question is however, whether the VIVformer trained on the synthetic data can accurately forecast the real experimental data. Below we show the predictions of the VIVformer on the real experimental data. We underscore that the VIVformer has NOT seen a single real datum during training: the model has trained on synthetic data only!</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/Forecasting_synthetic.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/targets_synthetic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/targets_synthetic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/targets_synthetic-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/targets_synthetic.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/difference_synthetic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/difference_synthetic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/difference_synthetic-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/difference_synthetic.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> From left to right: Forecasting real VIV on a ramndomly selected set of 36 continuous points from the experiments, target data from experiments, absolute difference between targets and predictions. We note that the model has trained on synthetic data only and has never seen any real data! </div> <p>Albeit the VIVformer has not seen any real data during training, it is surprisingly reasonably accurate in predicting real data! Although certainly not perfect, the predictions are sensible. The root-mean-square of the vibrations forecasted and observed are shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/RMS_synthetic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/RMS_synthetic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-VIVFormer/RMS_synthetic-1400.webp"/> <img src="/staging/assets/img/2023-12-12-VIVFormer/RMS_synthetic.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Root mean square (RMS) of the forecasted VIV plotted on top of the RMS of the experimentally observed VIV. RMS displacement is shown on the y-axis while span (body position) is shown on the x-axis. Reasonably accurate agreement is evident between forecasting (after training on synthetic data only) and experimental observation. </div> <p>As is evident in the above figure, the VIVformer can make reasonably accurate predictions of the RMS of the vibrations. Both the trends and amplitudes are reasonably accurately estimated.</p> <p>Since the VIVformer has never trained on real data but can reasonably accurately predict them, we conclude that at least part of the physicality of the real data is preserved during the genrative process of the VAE. In a sense, the VAE can be though of not just as a generator which makes realistic-looking data but as a tool which learns the underlying structure and mechanisms of the physical process which generates the data; it can thus be used to better understand the data and perhaps even the physical generative process. We conclude that our VAE could certainly be used to augment scarse datasets of VIV data and in addition, that it is a powerful tool that could potentially be used to study the underlying mechanisms of the physical generative process by studying the artificially generated data!</p> <h2 id="conclusions">Conclusions</h2> <p>In this work, a data driven approach is employed to study physical system vibrations. Two main topics are explored: 1. Generative models for creating synthetic data similar to those obtained via physical processes and 2. employing transformers and the attention mechanism in order to model and forecast physical vibration data.</p> <p>A variational autoencoder is trained on physical vortex-induced vibration data in order to generate sythetic data of the vibrations. The VAE is certainly able to generate data which resemble the physical data visually. Moreover, the generative process is confirmed to preserve the physicality of the data at least partially: a transformer trained on synthetic data only is capable of predicting real experimental data to reasonable accuracy. In that sense, the VAE can be viewed as a tool which learns the underlying physical traits of the data and can be used not only to augment physical datasets but also to simulate and understand the underlying physical mechanisms by examining synthetic data. With that being said, a recommended future research direction would be to examine whether the outputs of the VAE satisfy physical equations of interest and how those could perhaps be included as an additional loss term when training the VAE, i.e. having a physics-informed decoder network.</p> <p>A transformer architecture for forecasting unsteady and nonstationary vortex-induced vibrations, the VIVformer, is developed. The VIVformer architecture combines multi-head attention modules and fully conncted network modules with residual connections in order to model and forecast the physical vibration time-series in both space and time. The optimized VIVformer architecture can forecast flexible body VIV in time-space to reasonable accuracy both instantaneously and on average. Testing the performance of the VIVformer while gradually decreasing the input information would yield a deeper understanding in the capabilities of the architecture; in addition, testing the extended time horizon predictions of the model would cretainly be a recommendation for future research.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-VIVFormer.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2022/Vision_Language_Limitations/index.html b/blog/2022/Vision_Language_Limitations/index.html
new file mode 100644
index 00000000..fbab9fe5
--- /dev/null
+++ b/blog/2022/Vision_Language_Limitations/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Understanding Limitations of Vision-Language Models | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2022/Vision_Language_Limitations/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Understanding Limitations of Vision-Language Models",
+      "description": "",
+      "published": "December 1, 2022",
+      "authors": [
+        {
+          "author": "Shelley Choi",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Siddharth Somasundaram",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Understanding Limitations of Vision-Language Models</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#why-are-vision-language-models-important">Why are vision-language models important?</a></div> <div><a href="#how-is-our-work-different-from-previous-related-work">How is our work different from previous related work?</a></div> <div><a href="#how-are-these-models-trained">How are these models trained?</a></div> <div><a href="#could-the-dataset-play-a-role-in-training">Could the dataset play a role in training?</a></div> <div><a href="#understanding-role-of-text-labels-in-clip-gaia">Understanding role of text labels in CLIP, GAIA</a></div> <div><a href="#limitations-and-potential-confounding-parameters">Limitations and Potential Confounding Parameters</a></div> <div><a href="#conclusions">Conclusions</a></div> <div><a href="#references">References</a></div> </nav> </d-contents> <h2 id="why-are-vision-language-models-important">Why are vision-language models important?</h2> <p>The emergence of joint vision-language models such as Contrastive-Language Image Pretraining (CLIP) [1] from OpenAI, and GAIA-1 [2] from Wayve AI have had critical implications in computer vision, robotics, generative AI, self-driving, and more. The key idea of these large foundation models is that they learn meaningful data representations of labeled (text, image) pairs. Once trained, these learned representations are sufficiently versatile and can directly be deployed for a broad range of applications. Such transfer learning is referred to as zero shot learning, where the learned representations can directly be used for unseen data in a new task context without any additional training.</p> <h2 id="how-is-our-work-different-from-previous-related-work">How is our work different from previous related work?</h2> <p>Many follow up works have since examined how these large vision-language models perform with respect to various scenarios. Prior works study these effects in the context of transfer learning. Jain et al. looks at how performance is examined with respect to the quality of the dataset and provides examples where the performance can be improved by removing from the source dataset [5]. This can be done by utilizing linear classifiers in a scalable and automatic manner [6]. Santurkar et al. explored the impact of language supervision in vision models, and when the pre-training dataset is sufficiently large and contains relevant captions, the model will outperform other image-only models [4]. Shen et al. investigated CLIP’s advantages in outperforming widely used visual encoders through task-specific fine-tuning and combining with vision-language model pre-training [7]. While the aforementioned literature made valuable contributions in understanding the performance of vision-language models, they do not present a clear understanding of what goes on behind the “black box” of the model’s behavior and performance.</p> <p>Our study is novel in that we provide a more in-depth, detailed analysis of both the impact of descriptive text (or the lack thereof) in vision-language models, in conjunction with the subtleties of dataset biases. We want to clearly visualize these variables’ impacts on model behavior and provide an explanation for such results. We specifically propose a (toy) expansion of prior work on understanding the role of text description [4]. Prior work claims that text descriptions with low variability will ensure that transferred features from CLIP models will outperform image only models. In our work, we will then examine how more descriptive text labels can help overcome biases in dataset and address domain shift.</p> <h2 id="how-are-these-models-trained">How are these models trained?</h2> <p>CLIP and GAIA are based on transformer architectures [3], which were originally developed for natural language processing and later adopted for computer vision as well. Two separate encoders, a text encoder and an image encoder, separately transform input data from their respective data modality into feature vectors. In aligning images and text in feature space, CLIP and GAIA are able to learn semantically meaningful and robust representations that are useful for several downstream applications. These models perform this embedding space alignment in different ways. CLIP performs training by predicting which image features correspond to which text embeddings in a batch of (image, text) pairs. GAIA is trained in an autoregressive manner, predicting the next token, given past image, text, and action states. GAIA is reported to have ~9 billion parameters and CLIP is reported to have ~63 million parameters. The differences between these two architectures are also related to the type of input data that is being analyzed. While CLIP operates on single images, GAIA is meant to be used for self-driving, meaning that it operates on videos rather than images. As a result, GAIA requires some notion of temporal consistency, which is why autoregression is a good architecture, and more parameters (since video data is more complex than image data). In this study, we will primarily focus on the CLIP architecture (shown below for convenience).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/clip_model-1400.webp"/> <img src="/staging/assets/img/2022-11-10-Vision_Language_Limitations/clip_model.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><strong>Figure 1.</strong> CLIP Architecture, a commonly used vision-language model [1]. (We apologize for blurring, couldn’t figure out how to get rid of it).</p> <h2 id="could-the-dataset-play-a-role-in-training">Could the dataset play a role in training?</h2> <p>The nature of the training process of CLIP models introduces questions about how robust the training procedure would be. The training relies on (image, text) pairs, but a single text phrase is not a unique description of an image, and a single text description can be used to describe many different scenes. This one-to-many mapping problem introduces questions about what the optimal text description of a given image should be, or if that optimal description even exists. Santurkar et al. [4] looks at how vision-language models such as CLIP and Simple framework for Contrastive Learning of visual Representations (SimCLR) exhibit different performance based on whether they are trained with or without captions and only images. We were inspired by the study’s suggestion that the descriptiveness of the dataset captions can directly influence how well the CLIP models transfer.</p> <p>A more interesting question, that we answer in this blog post, is could having more descriptive text descriptions allow these large foundation models to mitigate or overcome dataset bias?</p> <p>To study this question, we consider a toy example with dogs and camels in the classic domain adaptation problem. In this context, we answer the following question:</p> <p><em>Can more descriptive text labels enable better domain adaptation in vision-language models with biased datasets?</em></p> <p>Domain adaptation is a problem in transfer learning where we want to have a model be able to learn the model in one context, and then generalize to another context. In other words, given a source domain that the model is trained on, domain adaptation is the problem of having high model performance in the target domain. In the dog vs. camel example, the domain adaptation problem occurs when we are used to seeing dogs and camels in certain contexts. For example, we generally expect to see camels in the desert and dogs in suburban environments (e.g. on the lawn, inside the house). If a model is trained to see such examples, then is suddenly shown a camel inside a house in Cambridge, the model has a strong chance of failure. Performance failure under domain shift is indicative that the model failed to disentangle background features from the camel itself. We will study whether descriptive text labels can enhance domain adaptation ability of current transformer-based foundation models.</p> <h2 id="understanding-role-of-text-labels-in-clip-gaia">Understanding role of text labels in CLIP, GAIA</h2> <p>Due to the large model size, invisible datasets, and large number of GPU hours needed to train CLIP and GAIA, we perform an analysis in a toy setup using the domain adaptation problem we described above. Our goal is to align image and text features, and then visualize the embeddings corresponding to different image classes.</p> <p>Each of the four experiments determine 1) how the models respond to dataset bias, and 2) how important the addition of descriptive text labels are in improving performance using a trade-off combination of the variables. We aim to measure and visualize the extent to which the caption aids in overcoming biases in training data.</p> <h3 id="architecture">Architecture</h3> <p>Our architecture is shown below. We have two separate transformer architectures: an image encoder and a text encoder. The output of each of these encoders is mapped to an image and text embedding, then L2-normalized. We then compute the cosine similarity of the two embeddings and use the similarity and compute a binary cross entropy loss. Note that, unlike CLIP, we do not compute similarity across all samples within a batch. We only compute cosine similarity for a sample (image, text) pair.</p> <h3 id="dataset">Dataset</h3> <p><strong>Image Generation.</strong> We generated our own dataset using DALL-E 2. The total size of the training dataset is 196 images, with (1) 48 images of horses on grass, (2) 48 images of horses in the desert, (3) 48 images of camels in the desert, and (4) 48 images of camels on grass. Note that the DALL-E generated images are used for academic purposes, and are not intended for any commercial use, as required by DALL-E terms and conditions.</p> <p><strong>Text Labels.</strong> We had two cases: a descriptive label and an undescriptive label. In the descriptive label case, we used the following labels for each of the four cases above (1) “horse on the grass”, (2) “horse in the desert”, (3) “a camel in the desert”, (4) “camel on the grass”. In the undescriptive label case, we just used the labels (1) “horse”, (2) “horse”, (3) “camel”, (4) “camel”.</p> <h3 id="experiment-1-no-dataset-bias-undescriptive-text-labels">Experiment 1: No Dataset Bias, Undescriptive Text Labels</h3> <p><strong><em>Description:</em></strong> In the first experiment, we first baseline our text and images encoders ability to perform classification of camels and horses in the case when there is no dataset bias. We use all 196 images with undescriptive labels, so that there is an even split between all four cases (each case comprises ¼ of the dataset). The goal is to assess how well the model can learn and generalize across different classes, and provides the basis for the models’ inherent capabilities and performance without impact from external factors.</p> <p><strong><em>Results:</em></strong> We performed Principal Component Analysis (PCA) on the feature vectors of our output from the image encoder and the text encoder in order to visualize more similar labels being mapped closer to each other. We notice that camels in desert and camels in grass are closer together in the feature space, while horses in desert and horses in grass are closer together. There is some overlap between camels in grass and horses in deserts, indicating some confusion with the context of the scene. That said, there is a very clear distinction between camels in the desert and horses in the grass, implying that the model is clearly aware that they are very different classes. The overall separation is rather decent when there is no dataset bias.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_2-1400.webp"/> <img src="/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><strong>Figure 2.</strong> Vague separation in different environments with less descriptive labels.</p> <h3 id="experiment-2-no-dataset-bias-descriptive-text-labels">Experiment 2: No Dataset Bias, Descriptive Text Labels</h3> <p><strong><em>Description:</em></strong> In the second experiment, we keep the dataset unbiased, but add descriptive labels.</p> <p><strong><em>Results:</em></strong> In the plot below, we can see that using descriptive labels slightly improves the separation between classes in the unbiased dataset case. Specifically note the strong separation between red (camels in desert) and green (horses in grass). These two cases are easiest to distinguish, as is reflected in the scattered plot below. Interestingly, when we use descriptive text, the labels are getting bunched together based on context. In particular, horses and camels in the desert are being placed close together, while horses and camels in the grass are being placed close together. This is likely because the model is learning to use the context as a way to separate classes as well. There is still a general progression from red (camels in desert) → blue (horses in desert) → cyan (camels in grass) → green (horses in grass), suggesting some semantic smoothness in feature space. The transition between blue and cyan is rather abrupt though.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_3-1400.webp"/> <img src="/staging/assets/img/2022-11-10-Vision_Language_Limitations/fig_3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><strong>Figure 3.</strong> Improvements in class separation with more descriptive labels.</p> <h3 id="experiment-3-dataset-bias-undescriptive-text-labels">Experiment 3: Dataset Bias, Undescriptive Text Labels</h3> <p><strong><em>Description:</em></strong> In the third experiment, we begin to investigate the role of dataset bias. The goal is to build on the results from the first experiment, reproducing a common aforementioned problem of over- or under-representation in datasets. We look at how the model responds to dataset bias and whether its performance can still stay the same, regardless of how the images are distributed in classes. Dataset bias is defined by the percentage of minority samples that we remove (minority samples are horses in desert and camels in grass). For example, we originally used 48 images of horses in the desert. 25% bias is defined as using only 12 images of horses in the desert.</p> <p><strong><em>Results:</em></strong> These results will be jointly explained with experiment 4.</p> <h3 id="experiment-4-dataset-bias-descriptive-text-labels">Experiment 4: Dataset Bias, Descriptive Text Labels</h3> <p><strong><em>Description:</em></strong> In the fourth experiment, we dive deeper into the impact of dataset bias that we began exploring in the second experiment, and question whether performance will be improved when the provided text labels are more descriptive. This directly answers the question of how impactful descriptive text is in vision-language models, in addition to whether they can help overcome dataset bias.</p> <p><strong><em>Results:</em></strong> Surprisingly, when the dataset is more biased, we find that the separation between classes is better. We believe this to be true because the model is able to identify clear separation between horses and camels based on the context alone. As a result, it is easily able to separate red and green classes as the bias increases. We notice that the minority classes (horses in desert and camels in grass) also spread out in latent space as the dataset is biased. When using descriptive labels, we notice that the blue points (horses in the desert) are able to separate themselves more from other clusters than in the undescriptive case, indicating some success with descriptive labels in the event of dataset bias. Overall, across all cases, the model generally has an easy time separating camels in the desert, which is likely due to the distinctness of the background and the object.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias-1400.webp"/> <img src="/staging/assets/img/2022-11-10-Vision_Language_Limitations/dataset_bias.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><strong>Figure 4.</strong> More biased dataset can show more separation between classes.</p> <h2 id="limitations-and-potential-confounding-parameters">Limitations and Potential Confounding Parameters</h2> <p>There are several possible confounding parameters that may have impacted our results beyond the variables that we were looking at. They include the following:</p> <h3 id="dataset-1">Dataset</h3> <p><em>Generating the dataset:</em> Because we used DALL-E to generate our dataset, the limitations of DALL-E itself can carry over to our performance. The inherent diversity of the data that DALL-E uses to train would directly impact our results, as well as the hyperparameters that were modified in training DALL-E. DALL-E could also have a specific image aesthetic that are different from real photography.</p> <p><em>Size:</em> Model performance can also be impacted by a limited dataset. We trained and validated our model on 196 images, which is not a large dataset. The confounding variable here would be the complexity of the images, where there may be less images with less clear distinctions of “horses in the grass” or “camels in the desert”. Furthermore, there are different breeds, sizes, colors, and shapes of horses and camels that may not have been fully explored due to less room for them.</p> <p><em>Composition sensitivity:</em> Literature review has shown that the model’s performance can be impacted by quality in addition to the quantity of the data [5]. Recent evidence has proved that removing data from a dataset can aid in transfer learning and improve downstream effectiveness. While we did not run experiments in identifying what specific composition and characteristics of the data should be removed, the analysis would have impacted our results.</p> <h3 id="model">Model</h3> <p><em>Computational resources:</em> Because we were restricted by GPU resources, we chose to use a smaller dataset and small self-trained Transformer architectures. We were also unable to train for more epochs, restricting having a more complex model architecture that could’ve lowered model performance. We found that increasing the batch size or increasing the number of layers lead our model to run out of computational power and continually crash.</p> <p><em>Tuning hyperparameters:</em> Batch size, learning rate, number of layers, optimization models, and other factors could also limit the exploration of optimal configurations and affect overall performance. For example, a higher learning rate in a model could converge faster and show higher performance, when in reality, it is not an accurate reflection of the model. Overfitting and different regularization parameters can also lead to over- or under-fitting.</p> <h2 id="conclusions">Conclusions</h2> <p>Our toy problem gives some intuition into the idea that the descriptiveness of the label can affect the clustering profile of different datasets. Note that because our experiments were done in smaller settings, we cannot make any claims with respect to scaling up to large amounts of data, compute, and model size. That said, when adding description of the context of the images (i.e. desert vs. grass), we noticed that the points in feature space began to cluster first based on context, then based on the animal type (camel vs. horse). We also noticed that under dataset bias, the majority groups (horses in grass and camels in desert) begin to have better clustering separation. However, the minority group performance decreased, which suggests the importance of accounting for dataset bias in machine learning algorithms. In our experiments, we partially found more descriptive labels to help mitigate these negative effects, but mitigating these effects more reliably is an ongoing research direction.</p> <h2 id="references">References</h2> <ol> <li> <p>Radford et al., <em>“Learning transferable visual models from natural language supervision”</em>, ICML 2021</p> </li> <li> <p>Hu et al., <em>“GAIA-1: A Generative World Model for Autonomous Driving”</em>, arXiv 2023</p> </li> <li> <p>Vaswani et al. <em>“Attention Is All You Need”</em>, NeurIPS 2017</p> </li> <li> <p>Santurkar et al., <em>“Is a Caption Worth a Thousand Images? A Controlled Study for Representation Learning”</em>, CVPR 2022</p> </li> <li> <p>Jain et al., <em>“A Data-Based Perspective on Transfer Learning”</em>, CVPR 2023</p> </li> <li> <p>Jain et al, <em>“Distilling Model Failures as Directions in Latent Space”</em>, ICLR 2023</p> </li> <li> <p>Shen et al. <em>“How Much Can CLIP Benefit Vision-and-Language Tasks?”</em>, arXiv 2021</p> </li> </ol> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2022-12-01-distill-example.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2022/contrastive-time/index.html b/blog/2022/contrastive-time/index.html
new file mode 100644
index 00000000..4d95b54f
--- /dev/null
+++ b/blog/2022/contrastive-time/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Contrastive Representation Learning for Dynamical Systems | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A deep learning method of learning system underlying parameters from observed trajectories"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2022/contrastive-time/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Contrastive Representation Learning for Dynamical Systems",
+      "description": "A deep learning method of learning system underlying parameters from observed trajectories",
+      "published": "November 7, 2022",
+      "authors": [
+        {
+          "author": "Martin Ma",
+          "authorURL": "https://www.linkedin.com/in/martinzwm/",
+          "affiliations": [
+            {
+              "name": "Harvard University",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Lily Wang",
+          "authorURL": "https://www.linkedin.com/in/xiaochen-lily-wang-175897183/",
+          "affiliations": [
+            {
+              "name": "Harvard University",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Contrastive Representation Learning for Dynamical Systems</h1> <p>A deep learning method of learning system underlying parameters from observed trajectories</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#experiments">Experiments</a></div> <div><a href="#another-framework-generative-modeling">Another Framework - Generative Modeling</a></div> <div><a href="#experiments-generative-modeling">Experiments - Generative Modeling</a></div> <div><a href="#conclusion-and-future-works">Conclusion and Future Works</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <h3 id="dynamical-system">Dynamical System</h3> <p>Dynamical systems form the foundation for understanding intricate phenomena in both scientific research and engineering applications. These systems are defined by their <strong>state</strong> (denoted as $X$) at any given time and a set of <strong>equations</strong> (e.g., $v = \frac{dX}{dt} = f_t(X, \theta)$) that describe the evolution of these states over time ($t$), all driven by <strong>underlying parameters</strong> $\theta$. Some real-world examples of dynamical systems include:</p> <ul> <li><strong>Climate Systems</strong>: Involves states like temperature, pressure, and wind velocity, with parameters such as solar radiation and greenhouse gas concentrations.</li> <li><strong>Population Dynamics in Ecology</strong>: Features states like population sizes, with parameters including birth and death rates, and interaction rates between species.</li> <li><strong>Economic Models</strong>: Focus on states like stock prices and trading volume, influenced by parameters like interest rates and market sentiment.</li> <li><strong>Control Systems in Engineering</strong>: Encompasses states like the position and velocity in robotics or the aircraft’s orientation in flight dynamics, governed by parameters like physical properties and control gains.</li> </ul> <p>The evolution of the system’s state over time can be observed as a time series, where system underlying parameters ($\theta$) governs the system’s behavior. In our project, we would like to determine if it would be feasible to discover the underlying system parameters given the observed trajectory. It would lay the groundwork for both robust predictive modeling and model interpretability analysis for safety-critical systems, such as clinical application and chemical engineering plants.</p> <h3 id="spring-mass-system">Spring-Mass System</h3> <p>Consider a spring-mass system, a fundamental model in dynamics. In a system comprising two masses, the states include positions $x$ and velocities $v = \frac{dx}{dt}$, which can be derived from the positions. Crucially, it is the underlying parameters, masses $m_1$, $m_2$ and spring constants $k_1$, $k_2$, that dictate the trajectories of $x$.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 1: A spring-mass system, classical dynamical system to illustrate project idea </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/spring_mass_system_traj.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 2: Spring mass system trajectory at ($k_1$, $k_2$, $m_1$, $m_2$) = (1.0, 2.0, 1.0, 2.0) </div> <p>Different system parameters, such as mass or spring constant, result in different oscillatory and long-term behavior behaviors of the system. Below is a gif demonstrating the effect of changing parameters on the system’s trajectory; this visualization illustrates how different underlying parameter values lead to distinct dynamical behaviors.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m1.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m1.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m1.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m1.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 3: system dynamic at varying $m_1$, while keeping $k_1$, $k_2$ and $m_2$ constant. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m2.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m2.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m2.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/system_dynamics_varying_m2.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 4: system dynamics at varying $m_2$, while keeping $k_1$, $k_2$, and $m_1$ constant. </div> <h3 id="dataset-collection--generation">Dataset Collection / Generation</h3> <p>We create a simulator for the above dynamical system to generate data based on parameters like masses $m$ and spring constants $k$. The parameters are systematically varied to generate a diverse and challenging dataset. More concretely, the dataset can be represented by a tensor of shape $(N_{param}, N_{traj}, T, d)$, where:</p> <ul> <li>$N_{param}$ is the number of parameter sets. Each set of parameters would lead to different system dynamics and trajectories.</li> <li>$N_{traj}$ is the number of trajectories generated for each parameter set. Within the same set of parameters, different initial conditions and noise level would lead to different trajectories.</li> <li>$T$ is the number of steps in a trajectory. $T$ is dependent on 2 factors - time span in the simulation, and the time step (i.e., $dt$). Note that our system/model formulation allows $T$ to be different for different trajectories, offering more flexibility.</li> <li>$d$ is the number of states. In the above example, $d = 4$, representing $(x_1, x_2, v_1, v_2)$.</li> </ul> <h3 id="related-works">Related Works</h3> <p>Time-series data analysis is a crucial component in a wide array of scientific and industrial domains, ranging from dynamical systems and weather forecasting to stock market prediction. These applications often involve underlying parameters that are complex and not immediately observable from the data. Traditional time-series methodologies primarily emphasize prediction, which can result in models that operate as “black-boxes” with limited interpretability <d-cite key="Lim_2021"></d-cite>.</p> <p>To address this limitation, the representation learning landscape in time-series analysis has expanded recent years, with a focus on unsupervised and semi-supervised methods. Fortuin et al. <d-cite key="fortuin2020gp"></d-cite> developed GP-VAEs, combining Gaussian processes with VAEs to handle noisy and irregularly sampled time-series data. This model addresses variabilities in time-series data, offering robust feature extraction. Franceschi et al. <d-cite key="Franceschi2019UnsupervisedCL"></d-cite> explored self-supervised learning frameworks for time-series, demonstrating how leveraging temporal coherence can improve feature representation without reliance on labeled data.</p> <p>Building on these advancements, recent studies like those by Eldele et al. <d-cite key="eldele2021timeseries"></d-cite> have explored the application of contrastive learning for enhancing time-series representation. Their approach creates representations by contrasting segments of time-series, thereby learning features that distinguish different temporal patterns. This method has shown promise in enhancing classification and forecasting tasks.</p> <p>However, there remains an unexplored potential in utilizing contrastive learning for learning the underlying parameters governing these systems. In this project, we aim to bridge this gap by applying the principles of contrastive learning to the specific challenge of identifying and understanding these hidden parameters within dynamical systems. By leveraging contrastive learning, we aim to move beyond mere prediction and delve into a deeper understanding of these parameters, thus enhancing the interpretability of time-series models, particularly applicable in safety-critical systems.</p> <h2 id="methodology">Methodology</h2> <h3 id="contrastive-learning">Contrastive Learning</h3> <p>Contrastive learning is a self-supervised learning technique prevalent in fields such as computer vision (CV) and natural language processing (NLP). At its core, it involves minimizing the embedding similarity between similar objects (i.e., positive pairs) while distancing dissimilar ones (i.e., negative pairs).</p> <p>In the context of dynamical systems, where the model does not have direct access to parameter values, contrastive learning is an effective method to infer underlying system parameters. In our case of spring-mass system, a positive pair consists of two trajectories generated using the same set of parameters, whereas a negative pair is two trajectories generated using different set of parameters.</p> <p>We utilize the following InfoNCE (Information Noise-Contrastive Estimation) loss for training:</p> \[L_{\text{InfoNCE}} = -\log \frac{e^{f(x)^Tf(x^+)/\tau}}{\sum_{i=0}^{N} e^{f(x)^Tf(x^-_i)/\tau}}\] <ul> <li>$f(x)$ is the generated trajectory embedding.</li> <li>$\tau$ is a (fixed) temperature hyperparameter, which we set to default 1.</li> <li>($x$, $x^+$) forms the positive pair (i.e., two trajectories with the same underlying parameters but different initial conditions).</li> <li>($x$, $x_j^-$) form negative pairs (i.e. two trajectories from different underlying parameter sets).</li> </ul> <h3 id="model">Model</h3> <p>Trajectories in dynamical systems can be represented by a time-series dataset, which is a type of sequential data. Long Short-Term Memory networks (LSTMs), a variant of Recurrent Neural Networks (RNNs), can be used process sequential data and manage long-term dependencies. A key feature of LSTMs is their use of gates, which regulate the flow of information, allowing the network to maintain pertinent information over extended periods — key characteristics for modeling dynamical systems. These gates include:</p> <ol> <li><strong>Forget Gate</strong>: Decides which information from the cell state should be discarded. It uses the current input and the previous hidden state to generate a value between 0 and 1 for each number in the cell state, with 1 indicating “keep this” and 0 indicating “discard this.”</li> <li><strong>Input Gate</strong>: Determines what new information will be added to the cell state. It involves two parts: a sigmoid layer that decides which values will be updated and a tanh layer that creates a vector of new candidate values.</li> <li><strong>Output Gate</strong>: Decides what information from the cell state will be used to generate the output. It takes the current input and the previous hidden state, passes them through a sigmoid layer, and multiplies the output by a tanh of the cell state to decide which parts of the cell state make it to the output.</li> </ol> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/LSTM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/LSTM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/LSTM-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/LSTM.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 5: Diagram of LSTM <d-cite key="towardsdatascience_lstm_gru"></d-cite>. </div> <p>In the context of the contrastive learning framework, the choice of model is a design choice. Essentially, any model capable of converting a trajectory into an embedding, such as a transformer, could be utilized. While transformers have shown remarkable results in CV and NLP, their performance on smaller datasets remains an area less explored. Previous studies in dynamical systems have predominantly employed RNN-based approaches. In this project, we aim to study if LSTM is capable of capturing the dynamics of system through its hidden and cell states.</p> <h3 id="training-objectives">Training objectives</h3> <p>Trajectories are passed through an LSTM to generate trajectory embeddings, derived from the cell states of the LSTM’s final layer. In our training framework, there are 2 loss functions:</p> <ul> <li>Contrastive objective (InfoNCE loss) is applied on the trajectory embedding. This loss encourages model to create embeddings that meaningfully distinguish between different system dynamics.</li> <li>Prediction objective (MSE) is applied between the ground truth state (i.e., $X_{t+1}$) and the prediction state (i.e., $\hat{X}_{t+1}$) at the next step. This loss encourages model to use the current state and embedding to predict next step behavior.</li> </ul> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/contrastive_representation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/contrastive_representation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/contrastive_representation-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/contrastive_representation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 6: Diagram of contrastive representation learning in this project. Trajectories derived from the same parameter set (e.g. top two trajectories in figure) form a positive pair; they map closely in latent space and show invariance to initial states. Trajectories from different parameter sets are distant in latent space, despite identical initial states (e.g. bottom two trajectories). </div> <h3 id="evaluation">Evaluation</h3> <p>The objective of the project to estimate the system parameters from observed trajectories. Therefore, the primary metric for our evaluation strategy is the <strong>MAE on underlying parameter estimation</strong>. This involves applying linear probing to the model’s embeddings against known ground truth parameters on a subset of the training set (i.e., a linear system $X\beta = Y$ is solved, with <em>X</em> representing the trajectory embeddings, and <em>y</em> being the ground truth parameters). Since it is a simple linear transformation of the original features, it has limited capacity to alter feature complexity. Essentially, if a model can perform well under linear probing, it suggests that the learned embeddings themselves are robust and informative with respect to the underlying parameters.</p> <p>The following plot shows the result of the contrastive learning framework on the validation set. Left subplot corresponds to the ground truth parameter, right subplot corresponds to the predicted parameter using the above contrastive learning framework. For a focused visualization, we only varies 2 parameter (i.e., $m_1$, $m_2$). Each point in the plot is annotated with its corresponding parameter values. For each parameter set, we evaluate on 2 trajectories with different initial conditions.</p> <p>On the right plot, we observe similar data points are grouped together in the parameter space, indicating that the model is capable of clustering trajectories generated from the same parameter set together. Comparing the left and right plots, we observe the model is capable to predicting parameters to be close to ground truth parameters. Overall, the MAE for parameter estimation is <code class="language-plaintext highlighter-rouge">0.043</code>, underscoring the model’s precision in parameter prediction.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/param_estimation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/param_estimation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/param_estimation-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/param_estimation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 7: Result of parameter estimation. Left subplot corresponds to the ground truth parameter, right subplot corresponds to the predicted parameter using the above contrastive learning framework. </div> <p>Additionally, we would also like the model to be capable of predicting the future trajectories. For this objective, the secondary metric is the <strong>MAE on next-step prediction</strong>. High value on this metrics would indicate model’s ability to accurately forecast future states, which is a necessary but may not be sufficient step towards a more complex, weekly-supervised parameter inference tasks. The MAE on the validation set is <code class="language-plaintext highlighter-rouge">0.00024</code>, and we will discuss it more in the Experiments section.</p> <h2 id="experiments">Experiments</h2> <p>In the previous section, Figure X above shows the final result. We want to include 2 components in this section: 1) different things we attempted to reach the results in Figure X, and 2) several experiments to study how different factors affect model’s capability of discovering the underlying parameters.</p> <p>Due to computational and time limitation, the numbers reported in this section are not from the final model, which trained for a much longer time. Instead, we ran numerous experiments and compared performance after 2000 steps, at which point the training loss has roughly plateaued.</p> <h3 id="effect-of-initial-conditions">Effect of initial conditions</h3> <p>The effect of different initial conditions in dynamical system is analogous to the effect of data augmentation in CV. The challenge is that different initial conditions may affect the trajectories more than the change in parameter.</p> <p>We initially used the same initial conditions for all set of parameters and led to parameter MAE of <code class="language-plaintext highlighter-rouge">0.01</code> in the validation set. However, the model doesn’t generalize to other initial conditions; when evaluating the model on the validation set that has different initial condition, MAE increased to <code class="language-plaintext highlighter-rouge">0.31</code>, indicating overfit.</p> <p>To ensure our model effectively discerns differences in trajectories arising from varying initial conditions, we generate 100 trajectories from each parameter set with random initial conditions, aiming to train the model to be invariant to these initial conditions and capture the essence of the system parameters. With this “data augmentation”, we bridged the gap between training and validation performance to be <code class="language-plaintext highlighter-rouge">0.061</code> and <code class="language-plaintext highlighter-rouge">0.065</code> respectively.</p> <h3 id="number-of-prediction-steps">Number of prediction steps</h3> <p>We also considered the limitations of next-step prediction, particularly for high-frequency samples (i.e., small $dt$). A trivial model might simply predict state $X$ at time $t+1$ as $X_t$, and achieve a small loss since $X_{t+1} - X_t$ may be small for small $dt$. To avoid model taking shortcuts, we shift our focus from immediate next-step prediction to forecasting next-k-steps ahead. We also anticipate that accurate longer-horizon predictions would require a deeper understanding of the underlying parameters, potentially leading to improved performance in parameter estimation. This improves the parameter MAE on the validation set from <code class="language-plaintext highlighter-rouge">0.10</code> to <code class="language-plaintext highlighter-rouge">0.065</code>. The following figure illustrates an results of predicting 30 steps ahead.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/gt_and_pred_traj.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 8: Ground truth and predicted trajectory for predicting 30 steps (i.e., 3 seconds). </div> <h3 id="decouple-state-and-parameter-embedding">Decouple state and parameter embedding</h3> <p>In our hypothesis, the latent space of a trajectory encodes dual forms of information: “long-term” information pertaining to system parameters, and “short-term” information reflective of the current state. Traditional approaches applying contrastive learning across the entire latent vector may not optimally capture this duality.</p> <p>To address this, we propose to decouple the state and parameter embedding space. Concretely, for positive pairs emerging from identical parameters but divergent initial conditions, our approach focuses on computing the InfoNCE loss solely on the segment of the embedding representing the parameter. This is operationalized by limiting contrastive learning to the initial W dimensions of the latent vector, denoted as $z[:W]$. This strategy aims to specialize $z[:W]$ in encoding system parameters, while allowing the remaining part of the vector, $z[W:]$, the flexibility to encapsulate other trajectory aspects, such as initial conditions and inherent noise.</p> <p>However, the performance didn’t increase across various values of $W$. This stagnation might stem from our use of the LSTM cell state as the latent embedding. Given that the cell state inherently integrates “long-term” information, with “short-term” data predominantly residing in the hidden states, restricting ourselves to $z[:W]$ potentially reduces the representational power of our contrastive learning framework.</p> <h3 id="effect-of-key-hyperparameters">Effect of key hyperparameters</h3> <p>We utilized WandB for a hyperparameter sweep to investigate their impact on the model’s performance in next-steps prediction and underlying parameter estimation. Key hyperparameters explored include:</p> <ul> <li><strong>Embedding Size</strong>: We observed that increasing the embedding size from 10 to 200 led to a reduction in the InfoNCE loss from 0.862 to 0.007, and the corresponding parameter estimation estimation MAE peaked when embedding size reached 100. This suggests a larger embedding size can increase the capacity to more effectively inferring underlying system parameters. However, maintaining the embedding size at a balanced level is crucial to ensure the model concentrates on the most pivotal aspects of data variation, rather than overfitting to minor system details.</li> <li><strong>Number of LSTM Layers</strong>: Increasing the number of LSTM layers improved both next-step prediction and parameter estimation. Notably, with more LSTM layers, a smaller embedding size became sufficient for achieving desirable outcomes in both prediction and parameter inference. This implies a deeper LSTM architecture can capture more complex pattern in the data.</li> <li><strong>Prediction Horizon (Predict Ahead)</strong>: We observe a modest improvement in performance on parameter estimation MAE (i.e., 0.04) as the prediction horizon increases. This improvement, while positive, was less pronounced than anticipated. In our model, contrastive learning serves as the primary mechanism for learning about system parameters, with next-k-step prediction intended to supplement this learning process. Theoretically, as the prediction horizon (k) increases, the complexity of the next-k-step prediction task escalates. This demands more focus from the model, potentially at the expense of its capacity for contrastive learning. Consequently, the variable k emerges as a hyperparameter to strike an optimal balance between two competing objectives: facilitating overall learning (where a larger k is advantageous), and maintaining a focus on contrastive learning (where a smaller k is beneficial).</li> </ul> <h3 id="noise-level-in-data-generation">Noise level in data generation</h3> <p>In real-world applications, models often lack direct access to state values due to the inherent stochasticity of systems or observation noise. In high-precision engineering applications, this noise is typically constrained to below 1%. However, in less precise scenarios, the noise in observed data can reach levels as high as 20%. It’s important to note that these errors are not merely observational errors, which can be assumed to be independent and identically distributed (i.i.d). Rather, these errors are intertwined with the state itself and can propagate over time, affecting subsequent observations. The figure below illustrates how noise can significantly alter trajectories. For instance, at a 20% noise level, the state variable $x_1$ markedly diverges from its intended path around the 8-second mar</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/traj_with_noise-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/traj_with_noise-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/traj_with_noise-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/traj_with_noise.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 9: Comparison of state trajectories with no noise (top subplot) and 20% noise (bottom subplot). </div> <p>The following section evaluates the model’s performance using noisy observed data. During trajectory generation, we introduce random noise according to the formula $X_{obs} = X_{true} (1 + \alpha \mathit{N}(0, 1))$ where $\alpha$ is the noise-to-signal ratio. We studied the model’s performance across various noise levels, ranging from $\alpha = 0.0$ to $\alpha = 0.2$, and the results are plotting in the following figure.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/comparison_noise-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/comparison_noise-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/comparison_noise-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/comparison_noise.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 10: Model performance across different noise levels. The blue bars represent a model trained on a clean dataset, and directly applied to a noisy validation dataset. The orange bars represent a model trained and evaluated on datasets with equivalent noise levels. </div> <p>Directly applying a model trained with a clean dataset on a noisy dataset would lead to drastic performance drop as shown in the blue bars. During model deployment, it’s a natural choice to train on a dataset with the same noise amount. This could mitigate the drastic performance drop, especially for low to moderate amount of noise (e.g., $\alpha &lt; 0.1$), as shown in the orange bars. However, when noise amount rises to 20%, training on noisy dataset doesn’t help either due to significant deviation from clean data.</p> <p>Applying a model trained on a clean dataset to a noisy dataset leads to a significant drop in performance, as indicated by the blue bars. In practical model deployment, it’s common to train the model on a dataset with a comparable level of noise. This approach can substantially mitigate performance degradation, particularly at low to moderate noise levels (e.g., $\alpha &lt; 0.1$), as demonstrated by the orange bars. However, at higher noise levels, such as 20%, training on a noisy dataset proves less effective due to the substantial deviation from the clean data.</p> <h3 id="generalizability-to-unseen-parameters">Generalizability to unseen parameters</h3> <p>In this section, we delve into the model’s generalizability across unseen parameters. Our investigation comprises experiments on both in-distribution and out-of-distribution system parameters. The results of these experiments are illustrated in the following figures.</p> <p>For in-distribution analysis, our focus was to assess the model’s proficiency in adapting to system parameters that, while differing from those in the training set, still fall within the same predefined range. This aspect of the study aims to understand how well the model can interpolate within the known parameter space.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_in_distribution.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 11: Result of parameter estimation to unseen parameters in-distribution. </div> <p>On the other hand, the out-of-distribution experiments were designed to challenge the model further by introducing system parameters that lie outside the range encountered during training. This approach tests the model’s ability to extrapolate beyond its training confines.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/param_est_unseen_out_distribution.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 12: Result of parameter estimation to unseen parameters out-of-distribution. </div> <p>Remarkably, our model demonstrated a robust ability to generalize across both in-distribution and out-of-distribution parameters. It achieved a Mean Absolute Error (MAE) of 0.032 in the former and 0.082 in the latter scenario. These findings suggest that the model not only learns the underlying patterns within the training data but also retains a significant degree of flexibility to adapt to new, unseen parameter sets.</p> <h2 id="another-framework---generative-modeling">Another Framework - Generative Modeling</h2> <p>While the previously discussed contrastive learning framework shows promise in inferring underlying parameters through a weakly-supervised learning approach, it relies on prior knowledge about the relationship between trajectories and their corresponding parameter sets. Such information may not always be readily available in practical scenarios. To address this challenge, our research pivots towards employing a generative modeling framework, enabling the learning of system parameters in an unsupervised manner.</p> <p>We transition from contrastive learning to incorporating a <strong>variational autoencoder (VAE) structure</strong>. This setup operates without explicit knowledge of parameter sets, compelling the model to decipher the underlying patterns solely from the observed trajectories. The VAE framework consists of three primary components: 1) an encoder LSTM that transforms an observed trajectory into a latent representation, 2) a reparameterization layer that molds this latent representation into a specific distribution, and 3) a decoder LSTM that uses the latent representation and initial conditions to reconstruct the trajectory.</p> <p>Training focuses on 1) the reconstruction loss between real and a generated trajectories, and 2) Mean Absolute Error (MAE) for next-k-step predictions made by the encoder LSTM. This method is designed to challenge the model’s capability to extract insights about the system’s dynamics independently, without relying on any prior information about the trajectories. The framework thus becomes a critical platform for testing the model’s ability to autonomously learn the system’s underlying parameters, requiring an advanced level of unsupervised learning.</p> <p>The evaluation metrics for this second framework are aligned with the first, utilizing MAE to assess both the underlying parameter estimation and the next k-step prediction accuracy of the encoder LSTM. A key addition in this framework is the <strong>MAE on Reconstruction Loss</strong>.This metric is used to gauge the model’s ability to accurately reconstruct input sequences, thereby reflecting its understanding of the data’s fundamental structure. A lower reconstruction loss implies that the model has effectively internalized the essential characteristics of the data distribution. Our expectation is that this deeper grasp of data structure will enable the model to infer underlying system parameters independently, without prior exposure to specific parameter set information.</p> <h2 id="experiments---generative-modeling">Experiments - Generative Modeling</h2> <h3 id="autoencoder-vs-variational-autoencoder">Autoencoder v.s. Variational Autoencoder</h3> <p>In addition to exploring the Variational Autoencoder (VAE) framework, we also experimented with a traditional autoencoder setup. This variant mirrors the architecture of the VAE but excludes the computation of the mean ($\mu$) and log variance ($\log \sigma^2$), thereby omitting the variational element. This modification streamlines the model, narrowing its focus to purely reconstructing input data from its latent representations.</p> <p>Our findings reveal that the autoencoder configuration surpassed the VAE in both parameter estimation and reconstruction. For parameter estimation MAE, autoencoder and VAE achieved 0.12 and 0.23 respectively. For reconstruction MAE, autoencoder and VAE achieved 0.02 and 0.49 respectively. This performance disparity can be attributed to the inherent constraints of each model. The autoencoder is primarily limited by the dimensionality of the embedding in its latent space. In contrast, the VAE faces an additional constraint due to its need to model the distribution within the latent space.</p> <p>These results suggest that the variational component, a defining feature of VAEs and instrumental in modeling data distributions, might not be essential for capturing the dynamics specific to our system. By removing the variational aspect, the autoencoder model is enabled to concentrate more effectively on capturing the most salient features for reconstruction and parameter inference. This simpler approach avoids the additional complexity of encoding the data distribution in the latent space, potentially leading to more efficient and targeted learning relevant to our system’s dynamics.</p> <h3 id="beyond-reconstruction-evaluating-future-prediction-capabilities">Beyond Reconstruction: Evaluating Future Prediction Capabilities</h3> <p>To evaluate our AE model’s generalizability and future prediction capabilities, we expanded its function beyond reconstruction to include forecasting additional steps. The figure presented here compares the ground truth states $x_1$ and $x_2$ (displacements for $m_1$ and $m_2$) against the model’s outputs for both reconstruction and prediction. The model processes input trajectories of 100 time steps and generates outputs for 199 steps, with the initial 99 steps dedicated to reconstruction and the subsequent 100 steps for prediction (unseen by the model during training). The results illustrate effective reconstruction performance but relatively weaker predictive accuracy.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/recon_and_pred-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/recon_and_pred-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/recon_and_pred-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/recon_and_pred.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 13: Reconstruction and further prediction of unseen states. </div> <p>Given that our autoencoder (AE) framework surpasses the Variational Autoencoder (VAE) in reconstruction and parameter estimation, we speculated whether VAE’s variational component might enhance future predictions. Therefore, we compared the reconstruction and prediction losses between the AE and VAE frameworks.</p> <p>The corresponding figure, presenting the mean and standard deviation of these losses, reveals that in both frameworks, reconstruction losses and their variability are substantially lower than prediction losses. This trend highlights the ongoing difficulty in achieving precise future predictions within our model configurations.</p> <p>Furthermore, the AE framework demonstrated superior performance over the VAE in both reconstruction and future step prediction. This outcome suggests that the VAE’s variational component does not necessarily contribute to improved future predictions. Echoing our earlier findings on parameter estimation and reconstruction, the variational aspect might not be pivotal for capturing the dynamics specific to our system. Instead, it could introduce additional complexity by encoding the data distribution in the latent space, which appears to be less relevant for reconstruction and future step prediction tasks.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/comparison_recon_and_pred.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 14: Results of reconstruction loss (first 99 steps) and future prediction loss (last 100 steps) for autoencoder (top subplot) and VAE (bottom subplot). </div> <h3 id="effect-of-latent-variables-on-generated-trajectories">Effect of Latent Variables on Generated Trajectories</h3> <p>In this section, our objective is to glean insights into the latent variables by manipulating them and observing the resultant changes in the generated trajectories. Given that the embedding dimension (i.e., |z|) exceeds the dimension of the parameters (i.e., |$\theta$|), we initially establish a linear mapping from from $z$ to $\theta$. The following gif demonstrates how the trajectory evolves in response to alterations in the variable $m_1$. The upper part of the gif represents the simulation, while the lower part reflects the output from the decoder of our autoencoder.</p> <p>A notable observation is that, as m1 undergoes modifications, the predicted trajectories adeptly resemble the period of the simulation trajectories. However, a discrepancy arises in their magnitude, with the predicted trajectories exhibiting a notably smaller scale compared to the ground truth trajectories. This pattern suggests that while the embedding successfully captures certain characteristics of the trajectories, it does not fully encapsulate all their properties.</p> <p>We hypothesize that enhancing the complexity of the encoder/decoder architecture (e.g., larger number of layers of LSTM layers) might facilitate a more comprehensive capture of trajectory attributes. However, our experimental scope is currently constrained by limitations in CUDA memory, particularly due to the decoder’s requirement to process 99 time steps. This constraint hinders our ability to experiment with architectures involving a greater number of layers, which might otherwise allow for a richer representation and understanding of the trajectory data.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/latent_to_traj.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/latent_to_traj.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-contrastive-time/latent_to_traj.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-08-contrastive-time/latent_to_traj.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 15: Effect of latent variables on generated trajectories. </div> <h2 id="conclusion-and-future-works">Conclusion and Future Works</h2> <p>In contrast to current machine learning literature that predominantly focuses on predicting future states of dynamical systems, our work is geared towards uncovering the underlying system parameters from observed trajectories. Our key contributions include:</p> <ul> <li>Implementing two frameworks: an autoregressive LSTM with contrastive learning, and a variational autoencoder architecture. While contrastive learning yields superior parameter estimation, the autoencoder enables unsupervised learning without relying on prior knowledge.</li> <li>Demonstrating our model’s generalizability to both in-distribution and out-of-distribution unseen parameters, and its effective performance with noisy datasets, sustaining a noise-to-signal ratio of up to 10%.</li> <li>Conducting thorough experiments to explore the impact of various factors like initial conditions, prediction horizons, and the interplay between state and parameters embeddings. We also examined the influence of latent variables on trajectory generation and the model’s predictive capabilities beyond the confines of the training set.</li> </ul> <p>The ability to accurately estimate underlying system parameters significantly enhances model interpretability, which is crucial in scientific and engineering applications where decision-making stakes are high. We hope our findings will help researchers and students interested in interpretable machine learning for dynamical systems.</p> <p>While this project did extensive analysis on a spring-mass system, future work may extend this analysis to a broader range of dynamical systems. Moreover, future work can integrate the strengths of both frameworks to incorporate contrastive learning within an unsupervised context, possibly through data augmentation strategies. Further advancements could also focus on refining the impact of latent variables on trajectory generation. Such progress is expected to bolster trust in AI solutions and facilitate their integration into essential decision-making frameworks across various domains.</p> <p>Here’s the link to our Github Repo: <a href="https://github.com/martinzwm/meta_param_est">https://github.com/martinzwm/meta_param_est</a></p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-contrastive-time.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2022/distill-example/index.html b/blog/2022/distill-example/index.html
new file mode 100644
index 00000000..27bbfa4a
--- /dev/null
+++ b/blog/2022/distill-example/index.html
@@ -0,0 +1,98 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Sample Blog Post | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Your blog post's abstract. This is an example of a distill-style blog post and the main elements it supports."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2022/distill-example/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Sample Blog Post",
+      "description": "Your blog post's abstract. This is an example of a distill-style blog post and the main elements it supports.",
+      "published": "December 1, 2022",
+      "authors": [
+        {
+          "author": "Albert Einstein",
+          "authorURL": "https://en.wikipedia.org/wiki/Albert_Einstein",
+          "affiliations": [
+            {
+              "name": "IAS, Princeton",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Boris Podolsky",
+          "authorURL": "https://en.wikipedia.org/wiki/Boris_Podolsky",
+          "affiliations": [
+            {
+              "name": "IAS, Princeton",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Nathan Rosen",
+          "authorURL": "https://en.wikipedia.org/wiki/Nathan_Rosen",
+          "affiliations": [
+            {
+              "name": "IAS, Princeton",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Sample Blog Post</h1> <p>Your blog post's abstract. This is an example of a distill-style blog post and the main elements it supports.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#equations">Equations</a></div> <div><a href="#images-and-figures">Images and Figures</a></div> <ul> <li><a href="#interactive-figures">Interactive Figures</a></li> </ul><div><a href="#citations">Citations</a></div> <div><a href="#footnotes">Footnotes</a></div> <div><a href="#code-blocks">Code Blocks</a></div> <div><a href="#layouts">Layouts</a></div> <div><a href="#other-typography">Other Typography?</a></div> </nav> </d-contents> <h2 id="equations">Equations</h2> <p>This theme supports rendering beautiful math in inline and display modes using <a href="https://www.mathjax.org/">MathJax 3</a> engine. You just need to surround your math expression with <code class="language-plaintext highlighter-rouge">$$</code>, like <code class="language-plaintext highlighter-rouge">$$ E = mc^2 $$</code>. If you leave it inside a paragraph, it will produce an inline expression, just like \(E = mc^2\).</p> <p>To use display mode, again surround your expression with <code class="language-plaintext highlighter-rouge">$$</code> and place it as a separate paragraph. Here is an example:</p> \[\left( \sum_{k=1}^n a_k b_k \right)^2 \leq \left( \sum_{k=1}^n a_k^2 \right) \left( \sum_{k=1}^n b_k^2 \right)\] <p>Note that MathJax 3 is <a href="https://docs.mathjax.org/en/latest/upgrading/whats-new-3.0.html">a major re-write of MathJax</a> that brought a significant improvement to the loading and rendering speed, which is now <a href="http://www.intmath.com/cg5/katex-mathjax-comparison.php">on par with KaTeX</a>.</p> <h2 id="images-and-figures">Images and Figures</h2> <p>Its generally a better idea to avoid linking to images hosted elsewhere - links can break and you might face losing important information in your blog post. To include images in your submission in this way, you must do something like the following:</p> <div class="language-markdown highlighter-rouge"><div class="highlight"><pre class="highlight"><code>{% include figure.html path="assets/img/2022-12-01-distill-example/iclr.png" class="img-fluid" %}
+</code></pre></div></div> <p>which results in the following image:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-12-01-distill-example/iclr-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-12-01-distill-example/iclr-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-12-01-distill-example/iclr-1400.webp"/> <img src="/staging/assets/img/2022-12-01-distill-example/iclr.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>To ensure that there are no namespace conflicts, you must save your asset to your unique directory <code class="language-plaintext highlighter-rouge">/assets/img/2023-05-01-[SUBMISSION NAME]</code> within your submission.</p> <p>Please avoid using the direct markdown method of embedding images; they may not be properly resized. Some more complex ways to load images (note the different styles of the shapes/shadows):</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-12-01-distill-example/9-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-12-01-distill-example/9-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-12-01-distill-example/9-1400.webp"/> <img src="/staging/assets/img/2022-12-01-distill-example/9.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-12-01-distill-example/7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-12-01-distill-example/7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-12-01-distill-example/7-1400.webp"/> <img src="/staging/assets/img/2022-12-01-distill-example/7.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> A simple, elegant caption looks good between image rows, after each row, or doesn't have to be there at all. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-12-01-distill-example/8-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-12-01-distill-example/8-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-12-01-distill-example/8-1400.webp"/> <img src="/staging/assets/img/2022-12-01-distill-example/8.jpg" class="img-fluid z-depth-2" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-12-01-distill-example/10-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-12-01-distill-example/10-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-12-01-distill-example/10-1400.webp"/> <img src="/staging/assets/img/2022-12-01-distill-example/10.jpg" class="img-fluid z-depth-2" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-12-01-distill-example/11-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-12-01-distill-example/11-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-12-01-distill-example/11-1400.webp"/> <img src="/staging/assets/img/2022-12-01-distill-example/11.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-12-01-distill-example/12-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-12-01-distill-example/12-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-12-01-distill-example/12-1400.webp"/> <img src="/staging/assets/img/2022-12-01-distill-example/12.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-12-01-distill-example/7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-12-01-distill-example/7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-12-01-distill-example/7-1400.webp"/> <img src="/staging/assets/img/2022-12-01-distill-example/7.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <h3 id="interactive-figures">Interactive Figures</h3> <p>Here’s how you could embed interactive figures that have been exported as HTML files. Note that we will be using plotly for this demo, but anything built off of HTML should work (<strong>no extra javascript is allowed!</strong>). All that’s required is for you to export your figure into HTML format, and make sure that the file exists in the <code class="language-plaintext highlighter-rouge">assets/html/[SUBMISSION NAME]/</code> directory in this repository’s root directory. To embed it into any page, simply insert the following code anywhere into your page.</p> <div class="language-markdown highlighter-rouge"><div class="highlight"><pre class="highlight"><code>{% include [FIGURE_NAME].html %} 
+</code></pre></div></div> <p>For example, the following code can be used to generate the figure underneath it.</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kn">import</span> <span class="n">pandas</span> <span class="k">as</span> <span class="n">pd</span>
+<span class="kn">import</span> <span class="n">plotly.express</span> <span class="k">as</span> <span class="n">px</span>
+
+<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="p">.</span><span class="nf">read_csv</span><span class="p">(</span><span class="sh">'</span><span class="s">https://raw.githubusercontent.com/plotly/datasets/master/earthquakes-23k.csv</span><span class="sh">'</span><span class="p">)</span>
+
+<span class="n">fig</span> <span class="o">=</span> <span class="n">px</span><span class="p">.</span><span class="nf">density_mapbox</span><span class="p">(</span>
+    <span class="n">df</span><span class="p">,</span> <span class="n">lat</span><span class="o">=</span><span class="sh">'</span><span class="s">Latitude</span><span class="sh">'</span><span class="p">,</span> <span class="n">lon</span><span class="o">=</span><span class="sh">'</span><span class="s">Longitude</span><span class="sh">'</span><span class="p">,</span> <span class="n">z</span><span class="o">=</span><span class="sh">'</span><span class="s">Magnitude</span><span class="sh">'</span><span class="p">,</span> <span class="n">radius</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
+    <span class="n">center</span><span class="o">=</span><span class="nf">dict</span><span class="p">(</span><span class="n">lat</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">lon</span><span class="o">=</span><span class="mi">180</span><span class="p">),</span> <span class="n">zoom</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">mapbox_style</span><span class="o">=</span><span class="sh">"</span><span class="s">stamen-terrain</span><span class="sh">"</span><span class="p">)</span>
+<span class="n">fig</span><span class="p">.</span><span class="nf">show</span><span class="p">()</span>
+
+<span class="n">fig</span><span class="p">.</span><span class="nf">write_html</span><span class="p">(</span><span class="sh">'</span><span class="s">./assets/html/2022-12-01-distill-example/plotly_demo_1.html</span><span class="sh">'</span><span class="p">)</span>
+</code></pre></div></div> <p>And then include it with the following:</p> <div class="language-html highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nt">&lt;div</span> <span class="na">class=</span><span class="s">"l-page"</span><span class="nt">&gt;</span>
+  <span class="nt">&lt;iframe</span> <span class="na">src=</span><span class="s">"{{ 'assets/html/2022-12-01-distill-example/plotly_demo_1.html' | relative_url }}"</span> <span class="na">frameborder=</span><span class="s">'0'</span> <span class="na">scrolling=</span><span class="s">'no'</span> <span class="na">height=</span><span class="s">"600px"</span> <span class="na">width=</span><span class="s">"100%"</span><span class="nt">&gt;&lt;/iframe&gt;</span>
+<span class="nt">&lt;/div&gt;</span>
+</code></pre></div></div> <p>Voila!</p> <div class="l-page"> <iframe src="/staging/assets/html/2022-12-01-distill-example/plotly_demo_1.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <h2 id="citations">Citations</h2> <p>Citations are then used in the article body with the <code class="language-plaintext highlighter-rouge">&lt;d-cite&gt;</code> tag. The key attribute is a reference to the id provided in the bibliography. The key attribute can take multiple ids, separated by commas.</p> <p>The citation is presented inline like this: <d-cite key="gregor2015draw"></d-cite> (a number that displays more information on hover). If you have an appendix, a bibliography is automatically created and populated in it.</p> <p>Distill chose a numerical inline citation style to improve readability of citation dense articles and because many of the benefits of longer citations are obviated by displaying more information on hover. However, we consider it good style to mention author last names if you discuss something at length and it fits into the flow well — the authors are human and it’s nice for them to have the community associate them with their work.</p> <hr/> <h2 id="footnotes">Footnotes</h2> <p>Just wrap the text you would like to show up in a footnote in a <code class="language-plaintext highlighter-rouge">&lt;d-footnote&gt;</code> tag. The number of the footnote will be automatically generated.<d-footnote>This will become a hoverable footnote.</d-footnote></p> <hr/> <h2 id="code-blocks">Code Blocks</h2> <p>This theme implements a built-in Jekyll feature, the use of Rouge, for syntax highlighting. It supports more than 100 languages. This example is in C++. All you have to do is wrap your code in a liquid tag:</p> <p>{% highlight c++ linenos %} <br/> code code code <br/> {% endhighlight %}</p> <p>The keyword <code class="language-plaintext highlighter-rouge">linenos</code> triggers display of line numbers. You can try toggling it on or off yourself below:</p> <figure class="highlight"><pre><code class="language-c--" data-lang="c++"><span class="kt">int</span> <span class="nf">main</span><span class="p">(</span><span class="kt">int</span> <span class="n">argc</span><span class="p">,</span> <span class="kt">char</span> <span class="k">const</span> <span class="err">\</span><span class="o">*</span><span class="n">argv</span><span class="p">[])</span>
+<span class="p">{</span>
+<span class="n">string</span> <span class="n">myString</span><span class="p">;</span>
+
+    <span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="s">"input a string: "</span><span class="p">;</span>
+    <span class="n">getline</span><span class="p">(</span><span class="n">cin</span><span class="p">,</span> <span class="n">myString</span><span class="p">);</span>
+    <span class="kt">int</span> <span class="n">length</span> <span class="o">=</span> <span class="n">myString</span><span class="p">.</span><span class="n">length</span><span class="p">();</span>
+
+    <span class="kt">char</span> <span class="n">charArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">char</span> <span class="o">*</span> <span class="p">[</span><span class="n">length</span><span class="p">];</span>
+
+    <span class="n">charArray</span> <span class="o">=</span> <span class="n">myString</span><span class="p">;</span>
+    <span class="k">for</span><span class="p">(</span><span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o">&lt;</span> <span class="n">length</span><span class="p">;</span> <span class="o">++</span><span class="n">i</span><span class="p">){</span>
+        <span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="n">charArray</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">&lt;&lt;</span> <span class="s">" "</span><span class="p">;</span>
+    <span class="p">}</span>
+
+    <span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
+<span class="p">}</span></code></pre></figure> <hr/> <h2 id="diagrams">Diagrams</h2> <p>This theme supports generating various diagrams from a text description using <a href="https://github.com/zhustec/jekyll-diagrams" target="\_blank">jekyll-diagrams</a> plugin. Below, we generate a few examples of such diagrams using languages such as <a href="https://mermaid-js.github.io/mermaid/" target="\_blank">mermaid</a>, <a href="https://plantuml.com/" target="\_blank">plantuml</a>, <a href="https://vega.github.io/vega-lite/" target="\_blank">vega-lite</a>, etc.</p> <p><strong>Note:</strong> different diagram-generation packages require external dependencies to be installed on your machine. Also, be mindful of that because of diagram generation the fist time you build your Jekyll website after adding new diagrams will be SLOW. For any other details, please refer to <a href="https://github.com/zhustec/jekyll-diagrams" target="\_blank">jekyll-diagrams</a> README.</p> <p><strong>Note:</strong> This is not supported for local rendering!</p> <p>The diagram below was generated by the following code:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>{% mermaid %}
+sequenceDiagram
+    participant John
+    participant Alice
+    Alice-&gt;&gt;John: Hello John, how are you?
+    John--&gt;&gt;Alice: Great!
+{% endmermaid %}
+</code></pre></div></div> <div class="jekyll-diagrams diagrams mermaid"> <svg id="mermaid-1702923485927" width="100%" xmlns="http://www.w3.org/2000/svg" height="100%" style="max-width:450px;" viewBox="-50 -10 450 231"><style>#mermaid-1702923485927 .label{font-family:trebuchet ms,verdana,arial;color:#333}#mermaid-1702923485927 .node circle,#mermaid-1702923485927 .node ellipse,#mermaid-1702923485927 .node polygon,#mermaid-1702923485927 .node rect{fill:#ececff;stroke:#9370db;stroke-width:1px}#mermaid-1702923485927 .node.clickable{cursor:pointer}#mermaid-1702923485927 .arrowheadPath{fill:#333}#mermaid-1702923485927 .edgePath .path{stroke:#333;stroke-width:1.5px}#mermaid-1702923485927 .edgeLabel{background-color:#e8e8e8}#mermaid-1702923485927 .cluster rect{fill:#ffffde!important;stroke:#aa3!important;stroke-width:1px!important}#mermaid-1702923485927 .cluster text{fill:#333}#mermaid-1702923485927 div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:trebuchet ms,verdana,arial;font-size:12px;background:#ffffde;border:1px solid #aa3;border-radius:2px;pointer-events:none;z-index:100}#mermaid-1702923485927 .actor{stroke:#ccf;fill:#ececff}#mermaid-1702923485927 text.actor{fill:#000;stroke:none}#mermaid-1702923485927 .actor-line{stroke:grey}#mermaid-1702923485927 .messageLine0{marker-end:"url(#arrowhead)"}#mermaid-1702923485927 .messageLine0,#mermaid-1702923485927 .messageLine1{stroke-width:1.5;stroke-dasharray:"2 2";stroke:#333}#mermaid-1702923485927 #arrowhead{fill:#333}#mermaid-1702923485927 #crosshead path{fill:#333!important;stroke:#333!important}#mermaid-1702923485927 .messageText{fill:#333;stroke:none}#mermaid-1702923485927 .labelBox{stroke:#ccf;fill:#ececff}#mermaid-1702923485927 .labelText,#mermaid-1702923485927 .loopText{fill:#000;stroke:none}#mermaid-1702923485927 .loopLine{stroke-width:2;stroke-dasharray:"2 2";marker-end:"url(#arrowhead)";stroke:#ccf}#mermaid-1702923485927 .note{stroke:#aa3;fill:#fff5ad}#mermaid-1702923485927 .noteText{fill:#000;stroke:none;font-family:trebuchet ms,verdana,arial;font-size:14px}#mermaid-1702923485927 .section{stroke:none;opacity:.2}#mermaid-1702923485927 .section0{fill:rgba(102,102,255,.49)}#mermaid-1702923485927 .section2{fill:#fff400}#mermaid-1702923485927 .section1,#mermaid-1702923485927 .section3{fill:#fff;opacity:.2}#mermaid-1702923485927 .sectionTitle0,#mermaid-1702923485927 .sectionTitle1,#mermaid-1702923485927 .sectionTitle2,#mermaid-1702923485927 .sectionTitle3{fill:#333}#mermaid-1702923485927 .sectionTitle{text-anchor:start;font-size:11px;text-height:14px}#mermaid-1702923485927 .grid .tick{stroke:#d3d3d3;opacity:.3;shape-rendering:crispEdges}#mermaid-1702923485927 .grid path{stroke-width:0}#mermaid-1702923485927 .today{fill:none;stroke:red;stroke-width:2px}#mermaid-1702923485927 .task{stroke-width:2}#mermaid-1702923485927 .taskText{text-anchor:middle;font-size:11px}#mermaid-1702923485927 .taskTextOutsideRight{fill:#000;text-anchor:start;font-size:11px}#mermaid-1702923485927 .taskTextOutsideLeft{fill:#000;text-anchor:end;font-size:11px}#mermaid-1702923485927 .taskText0,#mermaid-1702923485927 .taskText1,#mermaid-1702923485927 .taskText2,#mermaid-1702923485927 .taskText3{fill:#fff}#mermaid-1702923485927 .task0,#mermaid-1702923485927 .task1,#mermaid-1702923485927 .task2,#mermaid-1702923485927 .task3{fill:#8a90dd;stroke:#534fbc}#mermaid-1702923485927 .taskTextOutside0,#mermaid-1702923485927 .taskTextOutside1,#mermaid-1702923485927 .taskTextOutside2,#mermaid-1702923485927 .taskTextOutside3{fill:#000}#mermaid-1702923485927 .active0,#mermaid-1702923485927 .active1,#mermaid-1702923485927 .active2,#mermaid-1702923485927 .active3{fill:#bfc7ff;stroke:#534fbc}#mermaid-1702923485927 .activeText0,#mermaid-1702923485927 .activeText1,#mermaid-1702923485927 .activeText2,#mermaid-1702923485927 .activeText3{fill:#000!important}#mermaid-1702923485927 .done0,#mermaid-1702923485927 .done1,#mermaid-1702923485927 .done2,#mermaid-1702923485927 .done3{stroke:grey;fill:#d3d3d3;stroke-width:2}#mermaid-1702923485927 .doneText0,#mermaid-1702923485927 .doneText1,#mermaid-1702923485927 .doneText2,#mermaid-1702923485927 .doneText3{fill:#000!important}#mermaid-1702923485927 .crit0,#mermaid-1702923485927 .crit1,#mermaid-1702923485927 .crit2,#mermaid-1702923485927 .crit3{stroke:#f88;fill:red;stroke-width:2}#mermaid-1702923485927 .activeCrit0,#mermaid-1702923485927 .activeCrit1,#mermaid-1702923485927 .activeCrit2,#mermaid-1702923485927 .activeCrit3{stroke:#f88;fill:#bfc7ff;stroke-width:2}#mermaid-1702923485927 .doneCrit0,#mermaid-1702923485927 .doneCrit1,#mermaid-1702923485927 .doneCrit2,#mermaid-1702923485927 .doneCrit3{stroke:#f88;fill:#d3d3d3;stroke-width:2;cursor:pointer;shape-rendering:crispEdges}#mermaid-1702923485927 .activeCritText0,#mermaid-1702923485927 .activeCritText1,#mermaid-1702923485927 .activeCritText2,#mermaid-1702923485927 .activeCritText3,#mermaid-1702923485927 .doneCritText0,#mermaid-1702923485927 .doneCritText1,#mermaid-1702923485927 .doneCritText2,#mermaid-1702923485927 .doneCritText3{fill:#000!important}#mermaid-1702923485927 .titleText{text-anchor:middle;font-size:18px;fill:#000}
+#mermaid-1702923485927 g.classGroup text{fill:#9370db;stroke:none;font-family:trebuchet ms,verdana,arial;font-size:10px}#mermaid-1702923485927 g.classGroup rect{fill:#ececff;stroke:#9370db}#mermaid-1702923485927 g.classGroup line{stroke:#9370db;stroke-width:1}#mermaid-1702923485927 .classLabel .box{stroke:none;stroke-width:0;fill:#ececff;opacity:.5}#mermaid-1702923485927 .classLabel .label{fill:#9370db;font-size:10px}#mermaid-1702923485927 .relation{stroke:#9370db;stroke-width:1;fill:none}#mermaid-1702923485927 #compositionEnd,#mermaid-1702923485927 #compositionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-1702923485927 #aggregationEnd,#mermaid-1702923485927 #aggregationStart{fill:#ececff;stroke:#9370db;stroke-width:1}#mermaid-1702923485927 #dependencyEnd,#mermaid-1702923485927 #dependencyStart,#mermaid-1702923485927 #extensionEnd,#mermaid-1702923485927 #extensionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-1702923485927 .branch-label,#mermaid-1702923485927 .commit-id,#mermaid-1702923485927 .commit-msg{fill:#d3d3d3;color:#d3d3d3}</style><style>#mermaid-1702923485927{color:#000;font:normal normal 400 normal 16px / normal "Times New Roman"}</style><g></g><g><line id="actor0" x1="75" y1="5" x2="75" y2="220" class="actor-line" stroke-width="0.5px" stroke="#999"></line><rect x="0" y="0" fill="#eaeaea" stroke="#666" width="150" height="65" rx="3" ry="3" class="actor"></rect><text x="75" y="32.5" dominant-baseline="central" alignment-baseline="central" class="actor" style="text-anchor: middle;"><tspan x="75" dy="0">John</tspan></text></g><g><line id="actor1" x1="275" y1="5" x2="275" y2="220" class="actor-line" stroke-width="0.5px" stroke="#999"></line><rect x="200" y="0" fill="#eaeaea" stroke="#666" width="150" height="65" rx="3" ry="3" class="actor"></rect><text x="275" y="32.5" dominant-baseline="central" alignment-baseline="central" class="actor" style="text-anchor: middle;"><tspan x="275" dy="0">Alice</tspan></text></g><defs><marker id="arrowhead" refX="5" refY="2" markerWidth="6" markerHeight="4" orient="auto"><path d="M 0,0 V 4 L6,2 Z"></path></marker></defs><defs><marker id="crosshead" markerWidth="15" markerHeight="8" orient="auto" refX="16" refY="4"><path fill="black" stroke="#000000" stroke-width="1px" d="M 9,2 V 6 L16,4 Z" style="stroke-dasharray: 0, 0;"></path><path fill="none" stroke="#000000" stroke-width="1px" d="M 0,1 L 6,7 M 6,1 L 0,7" style="stroke-dasharray: 0, 0;"></path></marker></defs><g><text x="175" y="93" class="messageText" style="text-anchor: middle;">Hello John, how are you?</text><line x1="275" y1="100" x2="75" y2="100" class="messageLine0" stroke-width="2" stroke="black" marker-end="url(#arrowhead)" style="fill: none;"></line></g><g><text x="175" y="128" class="messageText" style="text-anchor: middle;">Great!</text><line x1="75" y1="135" x2="275" y2="135" class="messageLine1" stroke-width="2" stroke="black" marker-end="url(#arrowhead)" style="stroke-dasharray: 3, 3; fill: none;"></line></g><g><rect x="0" y="155" fill="#eaeaea" stroke="#666" width="150" height="65" rx="3" ry="3" class="actor"></rect><text x="75" y="187.5" dominant-baseline="central" alignment-baseline="central" class="actor" style="text-anchor: middle;"><tspan x="75" dy="0">John</tspan></text></g><g><rect x="200" y="155" fill="#eaeaea" stroke="#666" width="150" height="65" rx="3" ry="3" class="actor"></rect><text x="275" y="187.5" dominant-baseline="central" alignment-baseline="central" class="actor" style="text-anchor: middle;"><tspan x="275" dy="0">Alice</tspan></text></g></svg> </div> <hr/> <h2 id="blockquotes">Blockquotes</h2> <blockquote> We do not grow absolutely, chronologically. We grow sometimes in one dimension, and not in another, unevenly. We grow partially. We are relative. We are mature in one realm, childish in another. —Anais Nin </blockquote> <hr/> <h2 id="layouts">Layouts</h2> <p>The main text column is referred to as the body. It is the assumed layout of any direct descendants of the <code class="language-plaintext highlighter-rouge">d-article</code> element.</p> <div class="fake-img l-body"> <p>.l-body</p> </div> <p>For images you want to display a little larger, try <code class="language-plaintext highlighter-rouge">.l-page</code>:</p> <div class="fake-img l-page"> <p>.l-page</p> </div> <p>All of these have an outset variant if you want to poke out from the body text a little bit. For instance:</p> <div class="fake-img l-body-outset"> <p>.l-body-outset</p> </div> <div class="fake-img l-page-outset"> <p>.l-page-outset</p> </div> <p>Occasionally you’ll want to use the full browser width. For this, use <code class="language-plaintext highlighter-rouge">.l-screen</code>. You can also inset the element a little from the edge of the browser by using the inset variant.</p> <div class="fake-img l-screen"> <p>.l-screen</p> </div> <div class="fake-img l-screen-inset"> <p>.l-screen-inset</p> </div> <p>The final layout is for marginalia, asides, and footnotes. It does not interrupt the normal flow of <code class="language-plaintext highlighter-rouge">.l-body</code> sized text except on mobile screen sizes.</p> <div class="fake-img l-gutter"> <p>.l-gutter</p> </div> <hr/> <h2 id="other-typography">Other Typography?</h2> <p>Emphasis, aka italics, with <em>asterisks</em> (<code class="language-plaintext highlighter-rouge">*asterisks*</code>) or <em>underscores</em> (<code class="language-plaintext highlighter-rouge">_underscores_</code>).</p> <p>Strong emphasis, aka bold, with <strong>asterisks</strong> or <strong>underscores</strong>.</p> <p>Combined emphasis with <strong>asterisks and <em>underscores</em></strong>.</p> <p>Strikethrough uses two tildes. <del>Scratch this.</del></p> <ol> <li>First ordered list item</li> <li>Another item ⋅⋅* Unordered sub-list.</li> <li>Actual numbers don’t matter, just that it’s a number ⋅⋅1. Ordered sub-list</li> <li>And another item.</li> </ol> <p>⋅⋅⋅You can have properly indented paragraphs within list items. Notice the blank line above, and the leading spaces (at least one, but we’ll use three here to also align the raw Markdown).</p> <p>⋅⋅⋅To have a line break without a paragraph, you will need to use two trailing spaces.⋅⋅ ⋅⋅⋅Note that this line is separate, but within the same paragraph.⋅⋅ ⋅⋅⋅(This is contrary to the typical GFM line break behaviour, where trailing spaces are not required.)</p> <ul> <li>Unordered list can use asterisks</li> <li>Or minuses</li> <li>Or pluses</li> </ul> <p><a href="https://www.google.com">I’m an inline-style link</a></p> <p><a href="https://www.google.com" title="Google's Homepage">I’m an inline-style link with title</a></p> <p><a href="https://www.mozilla.org">I’m a reference-style link</a></p> <p><a href="../blob/master/LICENSE">I’m a relative reference to a repository file</a></p> <p><a href="http://slashdot.org">You can use numbers for reference-style link definitions</a></p> <p>Or leave it empty and use the <a href="http://www.reddit.com">link text itself</a>.</p> <p>URLs and URLs in angle brackets will automatically get turned into links. http://www.example.com or <a href="http://www.example.com">http://www.example.com</a> and sometimes example.com (but not on Github, for example).</p> <p>Some text to show that the reference links can follow later.</p> <p>Here’s our logo (hover to see the title text):</p> <p>Inline-style: <img src="https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png" alt="alt text" title="Logo Title Text 1"/></p> <p>Reference-style: <img src="https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png" alt="alt text" title="Logo Title Text 2"/></p> <p>Inline <code class="language-plaintext highlighter-rouge">code</code> has <code class="language-plaintext highlighter-rouge">back-ticks around</code> it.</p> <div class="language-javascript highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">var</span> <span class="nx">s</span> <span class="o">=</span> <span class="dl">"</span><span class="s2">JavaScript syntax highlighting</span><span class="dl">"</span><span class="p">;</span>
+<span class="nf">alert</span><span class="p">(</span><span class="nx">s</span><span class="p">);</span>
+</code></pre></div></div> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">s</span> <span class="o">=</span> <span class="sh">"</span><span class="s">Python syntax highlighting</span><span class="sh">"</span>
+<span class="k">print</span> <span class="n">s</span>
+</code></pre></div></div> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>No language indicated, so no syntax highlighting. 
+But let's throw in a &lt;b&gt;tag&lt;/b&gt;.
+</code></pre></div></div> <p>Colons can be used to align columns.</p> <table> <thead> <tr> <th>Tables</th> <th style="text-align: center">Are</th> <th style="text-align: right">Cool</th> </tr> </thead> <tbody> <tr> <td>col 3 is</td> <td style="text-align: center">right-aligned</td> <td style="text-align: right">$1600</td> </tr> <tr> <td>col 2 is</td> <td style="text-align: center">centered</td> <td style="text-align: right">$12</td> </tr> <tr> <td>zebra stripes</td> <td style="text-align: center">are neat</td> <td style="text-align: right">$1</td> </tr> </tbody> </table> <p>There must be at least 3 dashes separating each header cell. The outer pipes (|) are optional, and you don’t need to make the raw Markdown line up prettily. You can also use inline Markdown.</p> <table> <thead> <tr> <th>Markdown</th> <th>Less</th> <th>Pretty</th> </tr> </thead> <tbody> <tr> <td><em>Still</em></td> <td><code class="language-plaintext highlighter-rouge">renders</code></td> <td><strong>nicely</strong></td> </tr> <tr> <td>1</td> <td>2</td> <td>3</td> </tr> </tbody> </table> <blockquote> <p>Blockquotes are very handy in email to emulate reply text. This line is part of the same quote.</p> </blockquote> <p>Quote break.</p> <blockquote> <p>This is a very long line that will still be quoted properly when it wraps. Oh boy let’s keep writing to make sure this is long enough to actually wrap for everyone. Oh, you can <em>put</em> <strong>Markdown</strong> into a blockquote.</p> </blockquote> <p>Here’s a line for us to start with.</p> <p>This line is separated from the one above by two newlines, so it will be a <em>separate paragraph</em>.</p> <p>This line is also a separate paragraph, but… This line is only separated by a single newline, so it’s a separate line in the <em>same paragraph</em>.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2022-12-01-distill-example.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2022/index.html b/blog/2022/index.html
new file mode 100644
index 00000000..4930d9df
--- /dev/null
+++ b/blog/2022/index.html
@@ -0,0 +1 @@
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>2022 | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2022/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <div class="post"> <header class="post-header"> <h1 class="post-title"> <i class="fas fa-calendar fa-sm"></i> 2022 </h1> <p class="post-description"> an archive of posts from this year </p> </header> <article> <div class="table-responsive"> <table class="table table-sm table-borderless"> <tr> <th scope="row">Dec 12, 2022</th> <td> <a class="post-link" href="/staging/blog/2022/molecule_generation/">An empirical evaluation of autoencoders and diffusion models for 2D small-molecule generation</a> </td> </tr> <tr> <th scope="row">Dec 1, 2022</th> <td> <a class="post-link" href="/staging/blog/2022/VIVFormer/">VIVformer</a> </td> </tr> <tr> <th scope="row">Dec 1, 2022</th> <td> <a class="post-link" href="/staging/blog/2022/Recovering-Latent-Variables-with-VAEs-despite-Training-Bias/">Recovering Latent Variables with VAEs despite Training Bias</a> </td> </tr> <tr> <th scope="row">Dec 1, 2022</th> <td> <a class="post-link" href="/staging/blog/2022/proposal_JingpengHong/">Recurrent Recommender System with Incentivized Search</a> </td> </tr> <tr> <th scope="row">Dec 1, 2022</th> <td> <a class="post-link" href="/staging/blog/2022/distill-example/">Sample Blog Post</a> </td> </tr> <tr> <th scope="row">Dec 1, 2022</th> <td> <a class="post-link" href="/staging/blog/2022/Vision_Language_Limitations/">Understanding Limitations of Vision-Language Models</a> </td> </tr> <tr> <th scope="row">Nov 7, 2022</th> <td> <a class="post-link" href="/staging/blog/2022/contrastive-time/">Contrastive Representation Learning for Dynamical Systems</a> </td> </tr> </table> </div> </article> </div> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2022/molecule_generation/index.html b/blog/2022/molecule_generation/index.html
new file mode 100644
index 00000000..a47ed161
--- /dev/null
+++ b/blog/2022/molecule_generation/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>An empirical evaluation of autoencoders and diffusion models for 2D small-molecule generation | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="We examine the efficacy of autoencoders and diffusion models for generating 2D molecules with certain small-molecule properties. In particular, we evaluate the success of both models in creating new molecules, containing only CHONPS atoms, and only single, double, and aromatic bonds. Secondarily, a natural question that followed was investigating the efficacy of different manners of encoding molecular data for training models - specifically, we trained with both molecular fingerprints and adjacency matrices (derived from graph embeddings of molecules). We find that small autoencoder models are successful in generating both pseudo-fingerprints and pseudo-adjacency matrices that are similar to simple small molecules’ fingerprints and adjacency matrices, but they were not able to produce ‘convincing’ simple organic molecules from the fingerprint or adjacency matrices. We find that diffusion models were considerably faster and more lightweight than autoencoders, and were generated molecules that were quantitatively closer in structure to real chemical structures than the auto-encoders were able to produce."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2022/molecule_generation/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "An empirical evaluation of autoencoders and diffusion models for 2D small-molecule generation",
+      "description": "We examine the efficacy of autoencoders and diffusion models for generating 2D molecules with certain small-molecule properties. In particular, we evaluate the success of both models in creating new molecules, containing only CHONPS atoms, and only single, double, and aromatic bonds. Secondarily, a natural question that followed was investigating the efficacy of different manners of encoding molecular data for training models - specifically, we trained with both molecular fingerprints and adjacency matrices (derived from graph embeddings of molecules). We find that small autoencoder models are successful in generating both pseudo-fingerprints and pseudo-adjacency matrices that are similar to simple small molecules’ fingerprints and adjacency matrices, but they were not able to produce ‘convincing’ simple organic molecules from the fingerprint or adjacency matrices. We find that diffusion models were considerably faster and more lightweight than autoencoders, and were generated molecules that were quantitatively closer in structure to real chemical structures than the auto-encoders were able to produce.",
+      "published": "December 12, 2022",
+      "authors": [
+        {
+          "author": "Shorna Alam",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Course 6-3 and 18, MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Srinidhi Narayanan",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Course 6-14 and 18, MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>An empirical evaluation of autoencoders and diffusion models for 2D small-molecule generation</h1> <p>We examine the efficacy of autoencoders and diffusion models for generating 2D molecules with certain small-molecule properties. In particular, we evaluate the success of both models in creating new molecules, containing only CHONPS atoms, and only single, double, and aromatic bonds. Secondarily, a natural question that followed was investigating the efficacy of different manners of encoding molecular data for training models - specifically, we trained with both molecular fingerprints and adjacency matrices (derived from graph embeddings of molecules). We find that small autoencoder models are successful in generating both pseudo-fingerprints and pseudo-adjacency matrices that are similar to simple small molecules’ fingerprints and adjacency matrices, but they were not able to produce ‘convincing’ simple organic molecules from the fingerprint or adjacency matrices. We find that diffusion models were considerably faster and more lightweight than autoencoders, and were generated molecules that were quantitatively closer in structure to real chemical structures than the auto-encoders were able to produce.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#data">Data</a></div> <div><a href="#methods">Methods</a></div> <div><a href="#autoencoder">Autoencoder</a></div> <ul> <li><a href="#rdkfingerprints">RDKFingerprints</a></li> <li><a href="#adjacency-matrices">Adjacency Matrices</a></li> <li><a href="#autoencoder-results-rdk-fingerprints">Autoencoder Results - RDK Fingerprints</a></li> <li><a href="#autoencoder-results-adjacency-matrix">Autoencoder Results - Adjacency Matrix</a></li> </ul><div><a href="#diffusion-model">Diffusion Model</a></div> <div><a href="#observations">Observations</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Applying deep learning techniques to 2D molecule generation is an interesting and challenging problem in the field of cheminformatics, with applications in drug discovery, materials science, and other areas of chemistry. The problem is broad in scope, since there is a variety of molecular data, representations of the generated molecules, and model frameworks or generation pipelines. Autoencoders and diffusion models are two major types of generative models. The first learns a latent distribution from actual data points and then samples from this space to produce a novel output. Diffusion models work by progressively adding noise to input data, learning the correspondence between inputs and random noise, and then working backwards from a new sample of random noise by “undoing” the noise.</p> <h2 id="data">Data</h2> <p>We use the QM9 dataset, described here. This dataset has been used extensively for cheminformatics research. The dataset contains the molecular structures and coordinates (2D and 3D) of ~134,000 organic molecules. Each molecule is represented as a set of atoms with their respective spatial (cartesian) coordinates. The dataset also contains a comprehensive set of chemical properties of each molecule.</p> <p>We retrieved the SMILE (Simplified Molecular Input Line Entry System) notation for each molecule. The SMILE string uses ASCII characters to describe the atoms, bonds, and connectivity in a molecule, and is a standardized way to convey chemical information in textual form. The RDKit library hosts functionality for moving between SMILE strings and quantitative data (matrices, fingerprint vectors) as well as for visualizing molecules from the SMILE strings.</p> <p>Finally, we create a secondary, restricted subset of the data that contains only simple, organic molecules by eliminating strings containing the “#” (character representing triple bonds) or elements other than C, H, O, N, P, S. For the models dealing with fingerprints, since it is challenging to go from fingerprint to an explicit representation of a model, our evaluation metric was determining whether or not the generated molecules were, in fact, similar to the chosen “simple” subset of all of the data. For models dealing with adjacency matrices, it was quite easy to determine ‘validity’ of chemical structures visually; the appearance of standard chemical structures, such as rings of 5 and 6 carbons with side-chains, was used as an indication of success.</p> <h2 id="autoencoder">Autoencoder</h2> <p>A very simple generative approach we can take is to use an autoencoder. Namely, we can train an autoencoder on molecules of interest — like our small-molecule-filtered dataset — and then sample from the learned latent space, decode the sample to generate a “molecule”, and evaluate the success in generation.</p> <p>As mentioned in the introduction, it is worth considering possible data inputs and the sort of information a generative model trained on different inputs would carry. For our example, we consider the efficacy of RDKFingerprints and graph adjacency matrices as two possible input data types.</p> <p><strong>RDKFingerprints</strong></p> <p>Molecular fingerprints are a commonly used identifier in drug discovery and virtual screening. Different types of fingerprints encode different aspects of a molecule, but they all share the characteristic of preserving features of a molecule in a spatial fashion across a bit vector. A main feature of a fingerprint scheme is that vector similarity (which can be computed in many ways) corresponds to structurally or chemically similar molecules according to the features the fingerprint intends to encode for.</p> <p>The Python RDKit library hosts functionality for handling two such types of fingerprints — a native RDK fingerprint and a Morgan fingerprint. We use the RDK fingerprint, and our data pipeline looks something like this:</p> <ul> <li> <p>For a given molecule (via smile string) we generate a fingerprint (a 2048-long bit vector)</p> </li> <li> <p>A set of such fingerprints is used to train an autoencoder (whose structure is a 2048 unit input layer, 2 hidden layers of 64 units activated with ReLU activations)</p> </li> <li> <p>We sample from the latent space and use the decoder to produce a set of generated molecules, which we associate to sets of 10 “most similar real molecules” from the original (unfiltered) dataset. Similarity is calculated using the <strong>Tanimoto Distance</strong>, a notion of similarity between two vectors where the numerator is the number of 1s in common between the bit vectors, and the denominator is the number of 1s overall.</p> </li> <li> <p>We compute the percentage of these 10 similar molecules that lie in the small-molecule-filtered dataset to evaluate the success of the autoencoder in understanding the structure of small molecules at the generation step.</p> </li> </ul> <p>This approach has the benefit of using a data source explicitly designed with the goal of similarity; computing close-distance vectors to the generated RDKit fingerprint carries genuine chemical meaning.</p> <p><strong>Adjacency Matrices</strong></p> <p>Molecules lend themselves well to graph representations: atoms are like nodes, bonds are like edges. Thus, a molecule, if represented with a graph, can be associated to an adjacency matrix that carries information on interatomic and overarching molecular properties.</p> <p>Adjacency matrices derived from the graph representation of a molecule, while not explicitly designed with the goal of molecule similarity in mind (as the fingerprint is), are historically successful in chemical deep learning, particularly as they are the workhorse of graph neural networks. The adjacency matrices available in the QM9 dataset can be decomposed into matrices at the single, double, and aromatic bond levels, so they carry a chemical information in additional to structural information. We implement a similar pipeline with adjacency matrix inputs, with a few changes:</p> <ul> <li> <p>The adjacency matrix for a smile string is computed</p> </li> <li> <p>Unliked RDK Fingerprints, which are fixed in length, the size of the adjacency matrix varies with the size of the molecule; this makes use in a fixed-input length-autoencoder difficult, so we apply a padding approach, zero-padding all matrices to the size of the largest molecule’s matrix.</p> </li> <li> <p>The autoencoder is trained with these flattened, padded matrices.</p> </li> <li> <p>The generated reconstructions are rearranged into a matrix shape.</p> </li> <li> <p>The pseudo-adjacency matrix is then associated to a pseudo-molecule and corresponding pseudo-RDK fingerprint. Notably, the pseudo-molecule is created with some assumptions, such as the inclusion of only CHONPS atoms and only single bonds. Like the fingerprint framework, we find molecules in the original set with similar fingerprints to the reconstructed fingerprint, and compute the proportion of top-10 similar molecules that lie in the small-molecule set.</p> </li> </ul> <h2 id="autoencoder-results--rdk-fingerprints">Autoencoder Results – RDK Fingerprints</h2> <p>The first and most notable result is that over repeated trials of sampling and reconstructing from the latent space for both types of data, the proportion of top-10 similar molecules that lie in the small-molecule restricted dataset is 1.0. That is, each of the 10 most similar molecules lies in the small-molecule set in both cases, over 5 batches of 10 samples each.</p> <p>Some detailed results follow.</p> <p>First, here is the training curve with loss for the fingerprint autoencoder</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/training_loss_fngpts-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/training_loss_fngpts.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>An example of top-10 similarity for a sampled and reconstructed pseudo-fingerprint is shown here</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/top_10_similarity_chart.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We notice that all the top-10 most similar molecules seem to be near each other, index-wise. This would make sense if the dataset is organized such that similar molecules share close indices. We can confirm this fact by inspecting a heatmap of 10 samples from a consecutive block in the dataset, like so:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/heatmatp_rdk_with_title.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We can see that indeed, closer molecules in the original dataset have higher similarity, so this result is as expected.</p> <h2 id="autoencoder-results---adjacency-matrix">Autoencoder Results - Adjacency Matrix</h2> <p>We then inspect the results of the adjacency matrix-based autoencoder training. First, the training curve with loss:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/training_loss_adj_mat_case.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Now, here is a top-10 similarity example for a pseudo-RDK fingerprint from a pseudo-adjacency matrix:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/adj_mat_top_10_similarity.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We notice first, that the average similarity is much higher in this case, suggesting that even with the extra step of conversion and the assumptions we make about molecular form, the similarities are higher in this case. The second observation is that the top-10 similar indices are spread out farther than they were in the previous case, suggesting that the adjacency matrix to RDK fingerprint conversion moves around the similar molecules.</p> <p>Finally, we include some photos of molecules generated in this process (we were unable to generate photos in the RDK fingerprint trained autoencoder, because we require an adjacency matrix to draw the molecules, and it is not straightforward to go from fingerprint to matrix):</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.05%20PM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.05%20PM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.05%20PM-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.05%20PM.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In the photo above, we can see the lefthand side tail as a recognizable part of an organic molecule, suggesting success with some types of bonds. In the photo below, we see that the autoencoder has learnt some additional aspects beyond basic single bonds (one of the validation images we show further below includes a similar red ring).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.45%20PM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.45%20PM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.45%20PM-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.45%20PM.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Finally, the photo below while the least small-molecule-like in appearance, is interesting because it appeared many times in samples of 100 images (around 20 times) despite the latent space adjacency matrices being distinct. This could perhaps have to do with the process of converting from an adjacency matrix of reals (the result of latent space sampling) to an adjacency matrix of 1/0s, which we accomplish with median thresholding.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.59%20PM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.59%20PM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.59%20PM-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/Screenshot%202023-12-12%20at%207.33.59%20PM.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>For reference, a sample image from the “validation” true small-molecule dataset is shown below:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/validation_molecule_1_ex.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><strong>Diffusion Model</strong></p> <p>More recently, the use of diffusion models as an approach for generative modeling has become more common; as described in the introduction, denoising diffusion models operate by iteratively adding noise in a Markov manner to samples, learning the correspondence between inputs and the resultant noise, and then reverse-sampling from random noise to generate a new datapoint.</p> <p>In the past, as seen in the E3 paper, diffusion models have been applied to 3D adjacency matrices. In this case, we adapted an image-based diffusion model to noise and then de-noise data on adjacency matrices by using 2D adjacency matrices instead.</p> <p>The following plots provide information about the training of the diffusion model on adjacency matrices. First, is a plot of the loss over 5 training epochs at LR 0.001; this model was trained on approximately 90K training samples, so the loss was quite low even after the first epoch:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/shorna_1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The efficacy of diffusion models as a means of generating novel adjacency matrices is evident from the following visualizations of our results. First, here are two runs of the denoising process for the diffusion model, first on an extremely limited set of approximately 1000 matrices, and then on the entire 90K dataset. As seen, even with very few inputs, it was possible to identify the emergence of a ‘bright spot’ in the top left, which represents the actual adjacency matrix (which was later encoded into actual matrices).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/shorna_2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In converting these adjacency matrices into actual molecule images, we aimed to visualize the backbones of these molecules (which is most informative as to the overall structure), so instead of focusing on determining atomic identity, we instead labelled all of them as carbons and proceeded.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/shorna_3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Notably, in comparison to the molecules created by the autoencoder, these contain more of the structures which are characteristics of organic molecules, such as 5 and 6 carbon rings with molecules (potentially side chains of length &gt;1) coming off. Indeed, it is possible to observe the progressively increased ordering of the adjacency matrices over times (as they become closer and closer to actual molecules), going from extremely disordered to closer and closer to something meaningful.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/shorna_4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The application of diffusion models to the RDKFingerprints is shown here: for two separate runs, they look like this. Notably, in order to use an image classification network for RDKFingerprints, the fingerprints were stacked into an image which looks like a series of stripes. As evident, the diffusion model was able to produce such striped images, and their simplicity is a good indication that these are indeed good learnings of information about the filtered subset.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-molecule_generation/shorna_5-1400.webp"/> <img src="/staging/assets/img/2023-11-09-molecule_generation/shorna_5.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><strong>Conclusion</strong></p> <p>In this post, we used two different generative models and tested out two different encodings for information about molecular structure. In general, both models were able to learn and reproduce information about the chosen subset, but in general, the diffusion model was better at accurately reproducing molecules with ‘believable’ structures; as evident from the figures above, although the autoencoder did learn and create relatively sparse adjacency matrices, they lacked the hallmarks of small organic molecules (like rings structures). Further, although it was more difficult to discern quantitative information about the ‘accuracy’ of adjacency matrices, since they depend on larger structures than the RDKfingerprints, it was much easier to map adjacency matrices to actual (visualizable) structures. On the whole, the diffusion model was better at actually creating canonical molecular structures. Further, models trained on adjacency matrices, when converted post-generation to RDKFingerprints had higher accuracy, and adjacency matrices were generally easier to conceptualize, so we have preference for this data encoding.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-molecule_generation.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2022/proposal_JingpengHong/index.html b/blog/2022/proposal_JingpengHong/index.html
new file mode 100644
index 00000000..2cfd1fdc
--- /dev/null
+++ b/blog/2022/proposal_JingpengHong/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Recurrent Recommender System with Incentivized Search | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="This project considers the use of Recurrent Neural Networks (RNNs) in session-based recommender systems. We input sequences of customers' behavior, such as browsing history, to predict which product they're most likely to buy next. Our model improves upon this by taking into account how previous recommendations influence subsequent search behavior, which then serves as our training data. Our approach introduces a multi-task RNN that not only aims to recommend products with the highest likelihood of purchase but also those that are likely to encourage further customer searches. This additional search activity can enrich our training data, ultimately boosting the model's long-term performance."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2022/proposal_JingpengHong/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Recurrent Recommender System with Incentivized Search",
+      "description": "This project considers the use of Recurrent Neural Networks (RNNs) in session-based recommender systems. We input sequences of customers' behavior, such as browsing history, to predict which product they're most likely to buy next. Our model improves upon this by taking into account how previous recommendations influence subsequent search behavior, which then serves as our training data. Our approach introduces a multi-task RNN that not only aims to recommend products with the highest likelihood of purchase but also those that are likely to encourage further customer searches. This additional search activity can enrich our training data, ultimately boosting the model's long-term performance.",
+      "published": "December 1, 2022",
+      "authors": [
+        {
+          "author": "Jingpeng Hong",
+          "authorURL": "https://jingpenghong.github.io/",
+          "affiliations": [
+            {
+              "name": "Harvard Business School",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Recurrent Recommender System with Incentivized Search</h1> <p>This project considers the use of Recurrent Neural Networks (RNNs) in session-based recommender systems. We input sequences of customers' behavior, such as browsing history, to predict which product they're most likely to buy next. Our model improves upon this by taking into account how previous recommendations influence subsequent search behavior, which then serves as our training data. Our approach introduces a multi-task RNN that not only aims to recommend products with the highest likelihood of purchase but also those that are likely to encourage further customer searches. This additional search activity can enrich our training data, ultimately boosting the model's long-term performance.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#literature">Literature</a></div> <div><a href="#model">Model</a></div> <div><a href="#experiment">Experiment</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Numerous deep learning based recommender systems have been proposed recently <d-cite key="10.1145/3285029"></d-cite>. Especially, the sequential structure of session or click-logs are highly suitable for the inductive biases provided by recurrent/convolutional neural networks <d-cite key="hidasi2016sessionbased"></d-cite>. In such setting, the input of the network is a sequence of consumers’ search behavior, while the output is the predicted preference of the items, i.e. the likelihood of being the next in the session for each item. The ultimate goal is to pinpoint the optimal product for the consumer, thereby increasing sales. An example of where this could be applied is the “featured product” on platforms like Amazon.</p> <p>However, a challenge with this model is the sparsity of data. It’s well-known that the products in retail has the “long-tail” feature. Only a small fraction, say 5%, of a site’s products are ever browsed or bought by customers, leaving no data on the remaining products. Additionally, customer sessions tend to be brief, limiting the amount of information we can get from any one individual. This issue is particularly acute for “data-hungry” models, which may not have sufficient training data with enough variation to accurately match products with customers.</p> <p>My proposed solution to this issue is to recommend products that also encourage further exploration. Economic studies have shown that certain types of information structure can motivate customers to consider more options, harnessing the “wisdom of crowds” <d-cite key="kremer2014implementing"></d-cite><d-cite key="che2018recommender"></d-cite>. Imagine two products: recommending the first leads to a 5% purchase likelihood, while the second has a 4% chance. But the second item prompts the customer to look at 5 additional products. This extra data allows our model to learn more, potentially enhancing recommendations for this and other customers in the future. Therefore, we might choose to recommend the second product to generate more user-driven training data.</p> <p>In this project, we consider the multi-task learning that achieves better performance along the entire customer journey. The conventional conversion rate based model estimates</p> \[P(conversion|click, impression, u_i, v_j)\] <p>where \(u_i\) are users’ features and \(v_j\) are items’ features.</p> <p>We decompose the conversion rate into</p> \[P(conversion, click|impression, u_i, v_j) = P(click|impression, u_i, v_j) \times P(convsersion|click, u_i, v_j)\] <p>Hence, we have two auxiliary tasks for predicting both the click-through rate and the conversion rate. Such approach has two advantages. First, the task for estimating the click-through rate generally has richer training data because we train on dataset with all impressions instead of the subsample with purchase. Second, we recommend products with both high probability of clicking and purchasing, leading to more training data points in future time periods. This can help us tackle the challenge of data sparsity <d-cite key="ma2018entire"></d-cite>.</p> <h2 id="literature">Literature</h2> <p>Recommender Systems are usually classified into three categories <d-cite key="1423975"></d-cite>: (i) collaborative filtering (ii) content-based ,and (iii) hybrid.</p> <ol> <li> <p>Collaborative filtering. The input for the algorithm can be [User, Item, Outcome, Timestamp]. The task is to complete the matrix \(R\), where each column is an item and each row is a user, with the majority of missing elements. The memory based collaborative filtering finds pairs of user \(i\) and \(i'\) using similarity metrics The model based collaborative filtering decomposes \(R^{m\times n} = U^{m\times k}I^{k\times n}\) using matrix factorization, where \(k\) is the dimension of latent factors.</p> </li> <li> <p>Content-based. The input for the algorithm can be [User features, Item features, Outcome]. The task is to predict \(y=f(u_i, v_j)\), where \(y\) is the outcome and \(u_i\) and \(v_j\) are features of users and items respectively.</p> </li> <li> <p>Hybrid. we consider a simple linear model <d-cite key="1423975"></d-cite>:</p> </li> </ol> \[r_{ij} = x_{ij}\mu+z_i\gamma_j+w_j\lambda_i+\epsilon_{ij}\] <p>where \(x_{ij}\) is the collaborative filtering component indicating the interaction, \(z_i\) are users’ features and \(w_j\) are items’ feature. \(\gamma_j\) and \(\lambda_i\) are random coefficients. We can also apply matrix factorization to reduce the dimension of interaction matrix \(x_{ij}\). A recent application in marketing can be found in <d-cite key="10.1145/3523227.3547379"></d-cite>.</p> <p>The core idea in collaborative filtering is “Similar consumers like similar products”. The similarity is defined on consumers’ revealed preference. However, the content-based approach implicitly assumes users and items should be similar if they are neighborhoods in feature space, which may or may not be true. The limitation of collaborative filtering is that we require a sufficient amount of interaction data, which is hard if we consider the sparsity and cold start problems.</p> <p>Moreover, deep learning based recommender systems have gained significant attention by capturing the non-linear and non-trivial user-item relationships, and enable the codification of more complex abstractions as data representations in the higher layers. A nice survey for deep learning based recommender system can be found in <d-cite key="10.1145/3285029"></d-cite>. Deep learning based recommender system can have several strength compared to conventional models:</p> <ol> <li> <p>It’s possible to capture complex non-linear user-item interactions. For example, when we model collaborative filtering by matrix factorization, we essentially use the low-dimensional linear model. The non-linear property makes it possible to deal with complex interaction patterns and precisely reflect user’s preference <d-cite key="HORNIK1989359"></d-cite>.</p> </li> <li> <p>Architecture, such as RNN and CNN, are widely applicable and flexible in mining sequential structure in data. For example, <d-cite key="10.1145/2988450.2988451"></d-cite> presented a co-evolutionary latent model to capture the co-evolution nature of users’ and items’ latent features. There are works dealing with the temporal dynamics of interactions and sequential patterns of user behaviours using CNN or RNN <d-cite key="tang2018personalized"></d-cite> <d-cite key="10.1145/2959100.2959167"></d-cite>.</p> </li> <li> <p>Representation learning can be an effective method to learn the latent factor models that are widely used in recommender systems. There are works that incorporate methods such as autoencoder in traditional recommender system frameworks we summarize above. For example, autoencoder based collaborative filtering <d-cite key="10.1145/2740908.2742726"></d-cite>, and adversarial network (GAN) based recommendation <d-cite key="10.1145/3077136.3080786"></d-cite>.</p> </li> </ol> <h2 id="model">Model</h2> <p>We implement the multi-task learning similar to <d-cite key="ma2018entire"></d-cite>:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-proposal_JingpengHong/multitask-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-proposal_JingpengHong/multitask-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-proposal_JingpengHong/multitask-1400.webp"/> <img src="/staging/assets/img/2023-11-10-proposal_JingpengHong/multitask.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>However, we differ from the model in <d-cite key="ma2018entire"></d-cite> in two ways:</p> <ol> <li> <p>For user field, we implement RNN to deal with the sequential clickstream data instead of simple MLP.</p> </li> <li> <p>We define the loss function over the over samples of all impressions. The loss of conversion rate task and the loss of click-through rate task will not be used separately because both of them are based on subsamples (conditional on click and conditional on purchase).</p> </li> </ol> \[L(\theta_{click}, \theta_{convsersion})=\sum_{i=1}^N l(click_i, f(u_i, v_j))+\sum_{i=1}^N l(click_i, purchase_i, f(u_i, v_j))\] <h2 id="experiment">Experiment</h2> <p>The dataset we use is a random subsample from <d-cite key="ma2018entire"></d-cite>, which is the traffic logs from Taobao’s recommender system. We do a 1% random sampling, though the public dataset in <d-cite key="ma2018entire"></d-cite> has already been a 1% random sampling of the raw data. The summary statistics of the data can be found in <d-cite key="ma2018entire"></d-cite>.</p> <p>For the performance metrics, we use Area under the ROC curve (AUC).</p> <p>Several benchmark models we use for comparsion:</p> <ol> <li> <p>DeepFM <d-cite key="10.5555/3172077.3172127"></d-cite>. This is a factorization-machine based neural network for click-through rate prediction. In my setting, I consider it as a single-task model with MLP structure.</p> </li> <li> <p>MMOE <d-cite key="10.1145/3219819.3220007"></d-cite>. This is the multi-task setting. However, since the usecase is MovieLens, where two tasks are “finish” and “like”, it doesn’t consider the type of sequential data. In my setting, I consider it as a multi-task model with MLP structure.</p> </li> <li> <p>xDeepFM <d-cite key="10.1145/3219819.3220023"></d-cite>. This model Combines both explicit and implicit feature interactions for recommender systems using a novel Compressed Interaction Network(CIN), which shares some functionalities with CNNs and RNNs. In my setting, I consider it as a single-task model with RNN/CNN structure.</p> </li> <li> <p>Our Model, a multi-task model with RNN/CNN structure.</p> </li> </ol> <p>Results:</p> <table> <thead> <tr> <th>Model</th> <th style="text-align: center">test AUC</th> <th style="text-align: center">test click AUC</th> <th style="text-align: center">test conversion AUC</th> </tr> </thead> <tbody> <tr> <td>DeepFM</td> <td style="text-align: center">0.3233</td> <td style="text-align: center"> </td> <td style="text-align: center"> </td> </tr> <tr> <td>MMOE</td> <td style="text-align: center"> </td> <td style="text-align: center">0.5303</td> <td style="text-align: center">0.6053</td> </tr> <tr> <td>xDeepFM</td> <td style="text-align: center">0.4093</td> <td style="text-align: center"> </td> <td style="text-align: center"> </td> </tr> <tr> <td>Ours</td> <td style="text-align: center"> </td> <td style="text-align: center">0.5505</td> <td style="text-align: center">0.6842</td> </tr> </tbody> </table> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-10-proposal_JingpengHong.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/index.html b/blog/2023/A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/index.html
new file mode 100644
index 00000000..e26ce73e
--- /dev/null
+++ b/blog/2023/A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Transformer-Based Approaches for Hyperspectral Imagery in Remote Sensing | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="This project employs Transformers for a comprehensive spatial-temporal analysis of post-Mountaintop Removal landscape recovery, utilizing satellite imagery and DEMs. It focuses on integrating geomorphological changes to predict ecological succession. Advanced Transformer architectures will be used to enhance the interpretability of complex spatial features over time, aiming to create an accurate 3D simulation environment for interactive exploration and effective restoration planning."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Transformer-Based Approaches for Hyperspectral Imagery in Remote Sensing",
+      "description": "This project employs Transformers for a comprehensive spatial-temporal analysis of post-Mountaintop Removal landscape recovery, utilizing satellite imagery and DEMs. It focuses on integrating geomorphological changes to predict ecological succession. Advanced Transformer architectures will be used to enhance the interpretability of complex spatial features over time, aiming to create an accurate 3D simulation environment for interactive exploration and effective restoration planning.",
+      "published": "December 11, 2023",
+      "authors": [
+        {
+          "author": "Crystal Griggs",
+          "authorURL": "https://crystalgriggs.com",
+          "affiliations": [
+            {
+              "name": "Massachusetts Institute of Technology",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Transformer-Based Approaches for Hyperspectral Imagery in Remote Sensing</h1> <p>This project employs Transformers for a comprehensive spatial-temporal analysis of post-Mountaintop Removal landscape recovery, utilizing satellite imagery and DEMs. It focuses on integrating geomorphological changes to predict ecological succession. Advanced Transformer architectures will be used to enhance the interpretability of complex spatial features over time, aiming to create an accurate 3D simulation environment for interactive exploration and effective restoration planning.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <ul> <li><a href="#objective">Objective</a></li> <li><a href="#methodology">Methodology</a></li> <li><a href="#evaluation">Evaluation</a></li> </ul> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Hyperspectral imaging (HSI) captures a wide spectrum of light per pixel, providing detailed information across numerous contiguous spectral bands. Unlike multispectral imaging, which only captures a few specific bands, hyperspectral imaging offers finer spectral resolution, allowing for more precise identification and analysis of materials. This capability makes it valuable in remote sensing for applications like mineral exploration, agriculture (e.g., crop health monitoring), environmental studies, and land cover classification. Each spectral band captures unique light wavelengths, enabling the identification of specific spectral signatures associated with different materials or conditions on the Earth’s surface. HSI images present unique challenges in deep learning compared to typical RGB images due to their high dimensionality. Each pixel in a hyperspectral image contains information across hundreds of spectral bands, leading to a massive increase in the data’s complexity and volume. This makes model training more computationally intensive and can lead to issues like overfitting if not handled properly. Current datasets, such as the Indian Pines or Salinas Scenes datasets, often have fewer samples compared to standard image datasets, exacerbating the difficulty in training deep learning models without overfitting. There’s also the challenge of effectively extracting and utilizing the rich spectral information in these images, which requires specialized architectures and processing techniques. However, analysis of hyperspectral data is of great importance in many practical applications, such as land cover/use classification or change and object detection and there is momentum in the field of remote sensing to embrace deep learning.</p> <p>Traditional hyperspectral image classification (HSIC) methods, based on pattern recognition and manually designed features, struggled with spectral variability. Deep learning, particularly CNNs, brought advancements by extracting intricate spectral-spatial features, enhancing HSIC’s accuracy. Yet, CNNs have their drawbacks, such as a propensity for overfitting due to the high dimensionality of hyperspectral data and limitations imposed by their fixed-size kernel, which could obscure the classification boundary and fail to capture varying spatial relationships in the data effectively.</p> <div class="l-body-outset" style="display: flex; justify-content: center; align-items: center;"> <iframe src="/staging/assets/html/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/hyperbands_plot.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <div class="l-body-outset" style="display: flex; justify-content: center; align-items: center;"> <iframe src="/staging/assets/html/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/bands.html" frameborder="0" scrolling="no" height="800px" width="100%"></iframe> </div> <p>Compared to CNNs, there is relatively little work on using vision transformers for HSI classification but they have great potential as they have been excelling at many different tasks and have great potential in the field of HSI classification. Vision transformers, inspired by the Transformer architecture initially designed for natural language processing, have gained attention for their capacity to capture intricate patterns and relationships in data. This architecture leverages self-attention mechanisms, allowing it to model long-range dependencies effectively, which can be particularly advantageous in hyperspectral data where spatial-spectral interactions are crucial. Spectral signatures play a pivotal role in HSI analysis, enabling the differentiation of materials or conditions based on their distinct spectral characteristics, a capability that conventional RGB images cannot provide. Leveraging the strengths of vision transformers to effectively capture and exploit these spectral signatures holds promise for advancing the accuracy and precision of HSI in remote sensing classification tasks.</p> <h3 id="spectral-feature-based-methods-and-spatialspectral-feature-based-methods">Spectral Feature-Based Methods and Spatial–Spectral Feature-Based Methods</h3> <p>Spectral feature-based approaches classify hyperspectral images (HSIs) by analyzing each spectral pixel vector individually. However, this method has limitations as it overlooks the spatial context of the pixels. Spatial–spectral feature-based methods on the other hand, consider both the spectral and spatial characteristics of HSIs in a more integrated manner. These methods involve using a patch that includes the target pixel and its neighboring pixels, instead of just the individual pixel, to extract spatial–spectral features. Among these methods, convolutional neural networks (CNNs) are particularly prominent, having shown significant effectiveness in HSI classification. Despite the success of CNN-based models in classifying HSIs, they are not without issues. The CNN’s receptive field is limited by the small size of its convolutional kernels, such as 3×3 or 5×5, which makes it challenging to model the long-range dependencies and global information in HSIs. Additionally, the complexity of convolution operations makes it difficult to emphasize the varying importance of different spectral features.</p> <p>When comparing spectral feature-based methods with spatial–spectral feature-based methods in hyperspectral image (HSI) classification, each has distinct advantages and applications. Spectral feature-based methods are valued for their simplicity and efficiency, especially effective in scenarios where unique spectral signatures are key, such as in material identification or pollution monitoring. They require less computational power, making them suitable for resource-limited applications. Alternatively, spatial–spectral feature-based methods offer a more comprehensive approach by integrating both spectral and spatial information, leading to higher accuracy in complex scenes. This makes them ideal for detailed land cover classification, urban planning, and military surveillance where spatial context is crucial. Among spatial–spectral methods, convolutional neural networks (CNNs) stand out for their advanced feature extraction capabilities and adaptability, making them useful in a variety of applications, from automatic target recognition to medical imaging. Although, they face challenges such as the need for large datasets and difficulties in capturing long-range spatial dependencies. While spectral methods are efficient and effective in specific contexts, spatial–spectral methods, particularly those using CNNs, offer greater versatility and accuracy at the cost of increased computational complexity.</p> <h3 id="hyperspectral-image-classification">Hyperspectral Image Classification</h3> <p><u>Three-Dimensional Convolutional Neural Network (CNN3D)</u></p> <p>The first stage will involve the collection of multi-spectral satellite imagery and high-resolution Digital Elevation Models (DEMs) of MTR-affected landscapes. This data will be preprocessed to ensure compatibility, which includes image normalization, augmentation, and the alignment of satellite imagery with corresponding DEMs to maintain spatial congruence. Preprocessing will also involve the segmentation of satellite data into labeled datasets for supervised learning, with categories representing different land cover types relevant to ecological states.</p> <p><u>SpectralFormer</u></p> <p>Transformer models have exhibited remarkable success beyond their initial domain of natural language processing. Their unique self-attention mechanism enables them to capture long-range dependencies, making them a potentially good choice for complex spatial analysis. Vision Transformers, in particular, offer a new approach by treating image patches as tokens and allowing them to process the global context of an image effectively. This capability is beneficial for satellite imagery analysis, where understanding the broader environmental context is critical. Transformers designed for point cloud data, adapting to the inherent irregularities of LiDAR measurements, can potentially uncover intricate structural patterns and temporal changes within landscape data. With strategic approaches like transfer learning, transformers can overcome their computational resource complexity.</p> <p><u>Group-Aware Hierarchical Transformer (GAHT)</u></p> <p>The final step will be the development of a 3D simulation environment using Unreal Engine. The simulation will visualize the predicted ecological states and changes over time, providing an interactive tool for users to explore the landscape recovery process. The interface will allow users to manipulate variables and observe potential outcomes of different restoration strategies in a virtual setting.</p> <h3 id="conclusions">Conclusions</h3> <p>For the spatial analysis of satellite imagery and LiDAR data, the evaluation will focus on the transformer’s ability to discern and classify diverse land cover types. The key metrics for this assessment will include accuracy, precision, recall, and the F1 score extracted from confusion matrices. The model should accurately identify and categorize ecological features from high-resolution imagery. Temporally, the performance will be evaluated based on its capacity to predict ecological changes over time. This involves analyzing the model’s output against a time series of known data points to calculate the Mean Squared Error (MSE) for continuous predictions or log-loss for discrete outcomes.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-10-A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/A-deeper-look-into-equivariance-for-materials-data/index.html b/blog/2023/A-deeper-look-into-equivariance-for-materials-data/index.html
new file mode 100644
index 00000000..c44656bd
--- /dev/null
+++ b/blog/2023/A-deeper-look-into-equivariance-for-materials-data/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>A Deeper Look into Equivariance for Materials Data | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A Comparative Analysis of an E(3) Equivariant GNN and a Non-Equivariant GNN in Materials Data Tasks with a Focus on Investigating the Interpretability of Latent Geometry within the Two GNNs."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/A-deeper-look-into-equivariance-for-materials-data/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "A Deeper Look into Equivariance for Materials Data",
+      "description": "A Comparative Analysis of an E(3) Equivariant GNN and a Non-Equivariant GNN in Materials Data Tasks with a Focus on Investigating the Interpretability of Latent Geometry within the Two GNNs.",
+      "published": "November 8, 2023",
+      "authors": [
+        {
+          "author": "Nofit Segal",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT - CSE & DMSE",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>A Deeper Look into Equivariance for Materials Data</h1> <p>A Comparative Analysis of an E(3) Equivariant GNN and a Non-Equivariant GNN in Materials Data Tasks with a Focus on Investigating the Interpretability of Latent Geometry within the Two GNNs.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#data">Data</a></div> <div><a href="#method">Method</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Materials embody a diverse array of chemical and physical properties, intricately shaping their suitability for various applications. The representation of materials as graphs, where atoms serve as nodes and chemical bonds as edges, facilitates a systematic analysis. Graph Neural Networks (GNNs) have emerged as promising tools for deciphering relationships and patterns within materials data. The utilization of GNNs holds the potential to develop computational tools that deepen our understanding and aid in designing structure-property relationships in atomic systems.</p> <p>In recent years, there has been a heightened focus on employing machine learning for the accelerated discovery of molecules and materials with desired properties [<a href="#min2020accelerated">Min and Cho, 2020</a>; <a href="@pyzer2022accelerating">Pyzer-Knapp et al, 2022</a>; <a href="@merchant2023scaling">Merchant et al, 2023</a>]. Notably, these methods are exclusively applied to stable systems in physical equilibrium, where such systems correspond to local minima of the potential energy surface $E(r_1, . . . , r_n)$, with $r_i$ representing the position of atom $i$ [<a href="@schutt2018schnet">Schüttet al, 2018</a>].</p> <p>The diverse arrangements of atoms in the system result in varying potential energy values, influencing chemical stability. In the GIF below, different trajectories can be seen of the molecule Ethane. The Ethane molecule spends 99% of its time in a specific conformation, in which the substituents are at the maximum distance from each other. This conformation is called the staggered conformation. Looking at the molecule from a position on the C-C (main) axis (as in the second half of the animation), The staggered conformation is reached when the H atoms of the front C atom are exactly between the H atoms of the other C atom. This animation also show the 3-fold symmetry of the molecule around the main axis. All three staggered conformations will have the same energy value, as they are completely equivalent. The intermediate conformations will result in a higher energy value, as they are energetically less favorable. Different conformations can also portray elongations of some bonds lengths and variations in angles value. Predicting stable arrangements of atomic systems is in itself an important challenge!</p> <p align="center"> <img width="500" src="https://upload.wikimedia.org/wikipedia/commons/7/76/Ethane_conformation.gif" alt="Conformations of Ethane (C2H6) molecule"/> </p> <div class="caption"> Conformations of the molecule Ethane (C2H6) </div> <p>In the three-dimensional Euclidean space, materials and physical systems in general, inherently exhibit rotation, translation, and inversion symmetries. These operations form the E(3) symmetry group, a group of transformations that preserve the Euclidean distance between any two points in 3D space. When adopting a graph-based approach, a generic GNN may be sensitive to these operations, but an E(3) equivariant GNN excels in handling such complexities. Its inherent capability to grasp rotations, translations, and inversions allows for a more nuanced understanding, enabling the capture of underlying physical symmetries within the material structures [<a href="@batzner20223">Batzner et al, 2022</a>].</p> <h2 id="data">Data</h2> <p>The MD 17 dataset, an extensive repository of ab-initio molecular dynamics trajectories [<a href="@chmiela2019sgdml">Chmiela et al, 2019</a>], was employed in this study.</p> <p>Each trajectory within the dataset includes Cartesian positions of atoms (in Angstrom), their atomic numbers, along with total energy (in kcal/mol) and forces (kcal/mol/Angstrom) acting on each atom. The latter two parameters serve as regression targets in analyses.</p> <p>Our focus narrowed down to the molecules Aspirin, Ethanol, and Toluene:</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/aspirin.jpg" class="img-fluid rounded z-depth-1 mb-3" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Aspirin (C9H8O4)</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/ethanol.jpg" class="img-fluid rounded z-depth-1 mb-3" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Ethanol (C2H5OH)</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/toluene.jpg" class="img-fluid rounded z-depth-1 mb-3" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Toluene (C6H5CH3)</figcaption> </figure> </div> <p>The distributions of energy values (kcal/mol) for various conformations of the three molecules, within the training and validation sets, are illustrated in the histograms below.</p> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_t.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_asp_v.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_t.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_eth_v.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_t.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/hist_tol_v.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Energy (kcal/mol) distributions for Aspirin (C9H8O4), Ethanol (C2H5OH) and Toluene (C6H5CH3) molecules in train and validations sets </div> <p>The training set for Aspirin comprises 1000 conformations, while its validation set consists of 500 conformations. Ethanol’s training and validation sets each consist of 1000 conformations. Toluene’s training set comprises 1000 conformations, and its validation set consists of 500 conformations.</p> <h2 id="method">Method</h2> <p>In this project, our objective is to conduct a comparative analysis of two Graph Neural Network (GNN) architectures: an E(3) equivariant network and a non-equivariant (specifically E(3) Invariant) one. The primary focus is on energy prediction tasks related to atomic systems, with a particular emphasis on exploring the distinctions within the latent representations of these architectures and their interpretability.</p> <p>All GNNs are permutation invariant by design [<a href="@DBLP:journals/corr/abs-1905-04943">Keriven and Peyr, 2019</a>]. Our baseline GNN for comparison achieves rotation and translation invariance by simply operating only on interatomic distances instead of absolute position of the atoms. This design choice ensures that both the output and internal features of the network remain invariant to rotations. In contrast, our equivariant GNN for comparison utilizes relative position vectors rather than distances (scalars) together with features comprised of not only scalars, but also higher-order geometric tensors.</p> <p>In our Invariant GNN, the node-wise formulation of the message passing is given by:</p> <center> $$\mathbf{x}^{\prime}_i = \mathbf{\Theta}^{\top} \sum_{j \in \mathcal{N}(i) \cup \{ i \}} \frac{e_{j,i}}{\sqrt{\hat{d}_j \hat{d}_i}} \mathbf{x}_j$$</center> <p>Where $ x_i, x_j $ are the feature vectors of the target and source nodes, respectively, defined as a one-hot representation of the atomic number of that node. The summation is performed over the neighborhood $\mathcal{N}(i)$ of atom $i$, defined by a radial cutoff around each node, a tunable parameter typically set around 4-5 angstroms. Meaning, the concept of neighborhood is based on the distance between nodes, not their connectivity. Additionally, $ d_i = 1 + \sum_{j \in \mathcal{N}(i)} e_{j,i} $ where $ e_{j,i} $ represents the edge weight from the source node $j$ to the target node $i$ , and is defined as the interatomic distance.</p> <p>For constructing our equivariant GNN, <a href="https://e3nn.org/">E3nn</a> was employed - a torch-based library designed for building o(3) equivariant networks. Following the method presented in [<a href="@batzner20223">Batzner et al, 2022</a>], a neural network that exhibits invariance to translation and equivariance to rotation and inversion was constructed. Two key aspects of E3nn facilitating the construction of O(3) equivariant neural networks are the use of irreducible representations (Irreps) for data structuring and encapsulating geometrical information in Spherical Harmonics. Irreps are data structures that describe how the data behaves under rotation. We can think of them as data types, in the sense that this structure includes the values of the data alongside instructions for interpretation. The Spherical Harmonics form an orthonormal basis set of functions that operate on a sphere, and they’re equivariant with respect to rotations, which makes them very useful (and popular!) in expanding expressions in physical settings with spherical symmetry.</p> <p>For the equivariant GNN, the node-wise formulation of the message is:</p> <center> $$f'_i = \frac{1}{\sqrt{z}} \sum_{j \in \partial(i)} \; f_j \; \otimes\!(h(\|x_{ij}\|)) \; Y(x_{ij} / \|x_{ij}\|) $$ </center> <p>where $ f_j, f_i $ are the target and source nodes feature vectors, defined similarly as a one-hot representation of the atomic number. $z$ is the average degree (number of neighhbors) of the nodes, and the neighborhood $\partial(i)$ is once again defined using a radial cutoff. $x_{ij}$ is the relative distance vector, $h$ is a multi layer perceptron and $Y$ is the spherical harmonics. The expression $x \; \otimes(w) \; y$ denotes a tensor product of $x$ with $y$ using weights $w$. This signifies that the message passing formula involves a convolution over nodes’ feature vectors with filters constrained to be a multiplication of a learned radial function and the spherical harmonics.</p> <h2 id="results">Results</h2> <p>The performance of the two GNNs was compared for the task of predicting the total energy of the molecule’s conformation - a scalar property. By constraining the Equivariant GNN to predict a scalar output, it becomes overall invariant to the E(3) group. However, the use of higher order geometric tensors in the intermediate representations and operations in the E-GNN, makes internal features equivariant to rotation and inversion. This enables the passage of angular information through the network using rotationally equivariant filters (spherical harmonics) in the node feature convolution. This is the essential difference between the two architectures.</p> <p>The learning curves of the two GNNs for each molecule data are presented in the figures below:</p> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_t_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_asp_v_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_t_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_ethanol_v_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_t_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/energy_pred_tol_v_epoch_new.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Train (left) and Validation (right) learning curves of Energy (kcal/mol) prediction of Aspirin (top), Ethanol (middle) and Toluene (bottom) conformations </div> <p>The models were trained for 50 epochs using mean absolute error (MAE) objective for predicting normalized energy (in kcal/mol units). Adam optimizer with a learning rate of 0.01 and learning rate scheduler were employed. The E-GNN achieves a superior MAE rate for all three molecules.</p> <p>Next, let’s examine the latent representation of the two models! The last layer values of the validation data of both models were projected using t-SNE to a 2D representation and color-coded according to the target energy values:</p> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_asp_1.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_asp_1.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_eth_new.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_tol_2.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/gnn_lat_tol_1.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Latents projections of E-GNN (left) and GNN (right) last layer, of Aspirin (top), Ethanol (middle) and Toluene (bottom) conformations </div> <p>A color gradient can be seen in all three projections of the Equivariant GNN; and it is the clearest for Ethanol. The Invariant GNN’s latent projections do not exhibit a similar structure, perhaps except for Ethanol’s conformations. Moreover, in Ethanol’s case, the GNN projection appears to be quite one-dimensional.</p> <p>The apparent color gradient according to the target values in the E-GNN latent space is impressive, suggesting that the model leverages this information when embedding data conformations for predictions. Multiple “locations” in the latent space denote various high-energy conformations, indicating that the model considers not only the target energy value but also structural differences.</p> <p>To assess whether there’s molecular structural ordering in the embeddings, we construct system-specific variables for each molecule and visualize the latent space accordingly. Ethanol, with its relatively simple structure, showcases three important variables: the distance between the two Carbons (C-C bond), the distance between Carbon and Oxygen (C-O bond), and the angle formed by the three atoms. The distributions of these variables in Ethanol’s train and validation sets are depicted in the figure below:</p> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_t.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_cc_v.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_t.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_co_v.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_t.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/eth_ang_v.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Distributions in train (left) and validation (right) sets of Ethanol, of C-C bond length (top), C-O bond length (middle) and main angle (bottom) </div> <p>The distributions appear very similar for each variable in the train and validation sets. Now, let’s examine Ethanol’s validation conformations latent projection, color-coded with respect to the target and the three system-specific variables:</p> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth_2.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__cc_2.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__ang_2.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-md-6"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2-1400.webp"/> <img src="/staging/assets/img/2023-11-08-A-deeper-look-into-equivariance-for-materials-data/egnn_lat_eth__co_2.png" class="img-fluid rounded z-depth-1" width="100%" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>A clear gradient is observed for the main angle and C-C bond! The target gradient appears from the top left corner to the bottom right; the C-C bond gradient seems to go from bottom left to top right, and the main angle gradient isn’t as linear, appearing to spiral from the bottom to the top right corner clockwise. The C-O bond projection doesn’t seem to follow a discernible gradient, suggesting it’s not as influential on the target as the other two variables.</p> <p>Cool huh? The Equivariant GNN appears to embed the data according to the target value but also according to the systems geometrical structure! This suggests that the model leverages its E(3) equivariant convolution layers to capture and encode information about both the target values and the intricate geometric features of the molecular systems.</p> <h2 id="conclusion">Conclusion</h2> <p>In conclusion, our exploration has demonstrated the efficiency of the E(3) equivariant GNN, compared to an invariant GNN, in predicting the total energy of molecular conformations. Though both models were compared on predicting energy, a scalar propery, the E-GNN’s ability to leverage the inherent symmetries present in the system allowed it to effectively capture and encode the relationship between the arrangement of molecules and their respective energy. This was illustrated through the latent representation visualizations, and was particularly evident in the case of Ethanol. Here, discernible gradients in the latent space were observed, correlating with the target energy value and variations in C-C bond length and main angle. However, interpretability varies among the latent projections for the more complex molecules investigated in this project. Potential improvements could be achieved with additional data and a more expressive equivariant network.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-A-deeper-look-into-equivariance-for-materials-data.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/CNN-activation-patching/index.html b/blog/2023/CNN-activation-patching/index.html
new file mode 100644
index 00000000..58c65dbc
--- /dev/null
+++ b/blog/2023/CNN-activation-patching/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Activation Patching in Vision Transformers | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/CNN-activation-patching/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Activation Patching in Vision Transformers",
+      "description": "",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Shariqah Hossain",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Activation Patching in Vision Transformers</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <h1 id="motivation">Motivation</h1> <p>Neural networks contain large amounts of parameters and connections that they use to model a given phenomenon. Often, the breadth and complexity of these systems make it difficult for humans to understand the mechanisms that the model uses to perform its tasks. The model is treated like a black-box. When attempting to alter the behavior of the model when it does not behave in the desired way, engineers often rely on trial-and-error tuning of hyperparameters or providing larger, more diverse datasets for training. However, it is often difficult to get representative training data. In addtion, hyperparameters can improve training but are limited in their ability to alter the innate limitations of a model.</p> <p>Mechanistic interpretability aims to unpack the underlying logic and behaviors of neural networks. <d-cite key="zhang2023best"></d-cite> Activation patching is an interpretability technique that replaces activations in a corrupted model with that of an uncorrupted model in order to analyze their influence on model output. When a patched activation improves model performance, it indicates that the patched activation playes a role relevant to the corrupted information. <d-cite key="Vig2020InvestigatingGB"></d-cite></p> <p>A better understanding of the logic within neural networks will allow for more strategic improvements to these models inspired by this newfound understanding. In additon, interpretability is the first step toward changing and correcting models. With an understanding of the underlying mechanisms comes more control of these mechanisms, which can be used to apply necessary changes for goal alignment and mitigating issues such as bias. Mechanistic interpretability plays a key role in ensuring the reliability and safety of AI systems.</p> <h1 id="related-work">Related Work</h1> <p>Pearl et al. <d-cite key="10.5555/2074022.2074073"></d-cite> defines “causal mediation analysis” in order to analyze the effect of intermediate entities on a desired result. An application of the “indirect effect” introduced by this research is activation patching, also known as causal tracing. The indirect effect is the effect a given activation has on the output of the model. Since the activation is encompassed within the layers of a neural network, it has an indirect effect on the output. This analysis has been used in language models.<d-cite key="meng2023locating"></d-cite> Here, the indirect effect is defined as the role of an MLP or attention layer on the output. This role is analyzed by first corrupting the outputs of the network. Then, activations from an uncorrupted run of the model can be iteratively patched into the corrupted run in order to determine which activations can best restore the uncorrupted outputs. The activations with the most significant restorative impact have the highest indirect effect.</p> <p>For example, if the hidden state for a given attention head in a language model with prompt “The Eiffel Tower is in” is patched into that of a prompt “The Colosseum is in” and successfully changes the output from “Rome” to “Paris”, this indicates that the patched head contains knowledge about the Eiffel Tower. <d-cite key="meng2023locating"></d-cite> The figure below depicts this process of patching from a clean to corrupt run.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/patch-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/patch-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/patch-1400.webp"/> <img src="/staging/assets/img/2023-11-10-CNN-activation-patching/patch.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <d-cite key="meng2023locating"></d-cite> <p>Meng et al. also provides an example of how interpretability can open opportunities for model editing. <d-cite key="meng2023locating"></d-cite> Their research revealed that MLP layers in the middle of a GPT network had a large influence on the final predicted token from the model. This indicated that the model stored knowledge about the subject of the prompt within these layers. With the understanding of where knowledge of facts is stored within the model MLPs, these layers were then used to edit the knowledge of the language model in a way that is generalizable to other phrases that applied this knowledge. The study revealed the utility of interpretability projects in improving model understanding of the problem at hand.</p> <p>Activation patching has been used for language models, which rely on a transformer architecture. Vision transformers <d-cite key="dosovitskiy2021image"></d-cite> take advantage of the transformer architecture to perform common computer vision tasks such as image classification. These transformers use attention to glean valuable context about a given patch in an image, a task that a convolutional neural network has difficulty with due to the independent nature of its receptive fields. <d-cite key="Torralba_Isola_Freeman_2023"></d-cite> Through the use of multi-headed attention, vision transformers can focus on just the parts of the image that are relevant to the task at hand, and they do so with a global understanding of relevance across the entire image. The attention heads learn how to find relevant patches, or tokens, in image for a given query. However, research regarding what exactly these heads “pay attention” to is still ongoing. <d-cite key="Gandelsman2023interpreting"></d-cite> The strategies of activation patching used for language transformers can therefore apply in the context of vision transfomers due to the similarity in architecture and need to address questions of interpretability. Palit et al. performed a similar causal tracing analysis to that of the language model study except with a focus on BLIP, a multi-modal model that can answer questions about a given image. This investigation showed how activation patching can be performed on images along with language rather than language alone.<d-cite key="palit2023visionlanguage"></d-cite></p> <h1 id="methods">Methods</h1> <p>The model that was used for this investigation was a vision transformer that was fine-tuned for the CIFAR10 dataset, a dataset that is often used to train image classification models. The pretrained model that was used, which can be found <a href="https://huggingface.co/aaraki/vit-base-patch16-224-in21k-finetuned-cifar10">here</a>, often fails to classify images in the dataset if they are converted to grayscale. For example, the model classifies the image of a deer below as a cat.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/image-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/image-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/image-1400.webp"/> <img src="/staging/assets/img/2023-11-10-CNN-activation-patching/image.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/gray-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/gray-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/gray-1400.webp"/> <img src="/staging/assets/img/2023-11-10-CNN-activation-patching/gray.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In order to trace which attention heads focus on color information, a clean, corrupted, and restored run was performed with the model. A batch was created was a given image along with a grayscale version of that image. The colored image played the role of the clean run. The grayscale image is a corrupted input that hinders the model’s ability to classify the object in the image. This is reflected in the lower logits when the classifier attempts to classify the grayscale image. Even in the off chance the model is still able to classify the image correctly in the corrupted run, the logits will reflect the confidence, or lack thereof, of the model in its classification.</p> <p>This corrupted grayscale run was the baseline in the investigation. Once this baseline was established, the restored run demonstrated the influence of a given attention head. In this run, the hidden state in a given corrupted layer was replaced with the hidden state at that layer from the clean run. A hidden state was defined as the values of the embedded tokens after passing through a given layer in the neural network. One set of restored runs only restored states for individual layers. However, as demonstrated in previous research <d-cite key="meng2023locating"></d-cite>, a window of layers is necessary to be restored in order to have a noticeable effect on the output, as opposed to just a single layer. In this experiment, the window was 3, so the given layer as well as its adjacent layers were restored. While activation in the language setting often performs activation patching at the granularity of individual tokens, it was assumed that the relationship between token and layer in the image setting would not be as informative across a wide array of images. Language tokens have a shared meaning across different prompts, but image tokens play a different role depending on the object at hand. This information would not help in understanding the role of activations after averaging across all of the classifications of all images. So, this study was performed by corrupting all tokens in the image and restoring all of those tokens during the patching of a given hidden state.</p> <p>This analysis was performed for 1000 images from the CIFAR10 dataset. For each image, the output of the restored run was collected and compared to that of the corrupted run. The indirect effect of a given layer was calculated by the difference in the softmax probability of the class of the image between the corrupted and patched run.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/eqn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/eqn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/eqn-1400.webp"/> <img src="/staging/assets/img/2023-11-10-CNN-activation-patching/eqn.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <d-cite key="meng2023locating"></d-cite> <p>For each image, this patching process was repeated for every attention layer in the neural network. Finally, the results of activation patching were averaged together for each layer across all of the images in order to get a general sense of which layers are most pertinent for processing image color information.</p> <h1 id="results">Results</h1> <p>When single layers were patched rather than a window of layers, results matched that of Meng et al. The patching of a single activation did not have a unique effect on the output.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/single-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/single-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/single-1400.webp"/> <img src="/staging/assets/img/2023-11-10-CNN-activation-patching/single.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>From averaging the change in outputs from activation patching 1000 CIFAR10 images, results show that attention heads of most relevance to color tended to be in the middle or last layers.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/attn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/attn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/attn-1400.webp"/> <img src="/staging/assets/img/2023-11-10-CNN-activation-patching/attn.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Here are some examples of activation patching for individual images from the dataset. The graphs display the probability in the output for the correct class of the given image.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/deer-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/deer-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/deer-1400.webp"/> <img src="/staging/assets/img/2023-11-10-CNN-activation-patching/deer.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/car-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/car-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/car-1400.webp"/> <img src="/staging/assets/img/2023-11-10-CNN-activation-patching/car.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/plane-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/plane-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-CNN-activation-patching/plane-1400.webp"/> <img src="/staging/assets/img/2023-11-10-CNN-activation-patching/plane.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>This experiment found that in a 12-layer network with a window size of 3, attention in the fourth layer and final layers of the network had the biggest impact on predictions made by the model. In these layers, the probability of the correct class of the image had the largest change when clean hidden states were patched from these layers into the grayscale run of the vision transformer.</p> <p>As portrayed by the tracing of individual images displayed above, not all images followed this trend exactly. The deer image, for example, had more emphasis on earlier layers and less emphasis on later layers. The automobile had a stronger influence from the attention layer 6 than that of 4. However, it was generally common for layers in the middle and end of the network to play a large role in this classification problem.</p> <h1 id="conclusion">Conclusion</h1> <p>The influence of attention heads close to the output align with the conclusions found by Palit et al. This is likely due to direct connection of final layers to the output. There is also a significant influence of middle attention heads on the output, which is some indication of the key information that is stored in these layers relevant to color. A possible explanation is that these layers are close to the input layer, which directly stores color information, while maintaining enough distance from the input to have narrowed down (attended to) which tokens are relevant to the class the image belongs to. This study provided an initial insight into how vision transformers store information about colors of an image.</p> <p>Future investigations could include other forms of corruption to provide more information about the roles of the different attention layers in a trasformer. For example, adding noise to the image embeddings would give insight to the general importance of different layers rather than just focusing on color information. By varying the amount of noise, this corruption would allow more control on how much the output would change and possibly allow room for more significant restorative effects from patching and therefore more definitive results as to where the most influential attention heads live in vision transformers. Other methods of corruption could also explore other tasks ingrained in image classification, such as blurring for edge detection or using silhouettes and image segmentation for texture or pattern identification. In addition, performing activation patching with window sizes other than 3 could provide more context as to how important is an individual attention layer. A similar experiment should be performed on other models and datasets. A focus on different objects, larger datasets, and larger networks would help verify the role of middle and final layer attention heads indicated by this study.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-10-CNN-activation-patching.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Exploring-Generative-Models-In-Time-Series/index.html b/blog/2023/Exploring-Generative-Models-In-Time-Series/index.html
new file mode 100644
index 00000000..f1ec717f
--- /dev/null
+++ b/blog/2023/Exploring-Generative-Models-In-Time-Series/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Exploring Univariate Time Series Anomaly Detection using VAE's | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="In this blog post, we will take a deep dive into DONUT, a method that applies variational autoencoders to the problem of time series anomaly detection. We will begin with a overview of the original authors main ideas. Next, we will replicate some results, and perform new experiments to gain further insights into the properties, successes, and limitations of this method. Finally, we will run additional experiments that test extensions on the original formulation, and motivate future areas of exploration."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Exploring-Generative-Models-In-Time-Series/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Exploring Univariate Time Series Anomaly Detection using VAE's",
+      "description": "In this blog post, we will take a deep dive into DONUT, a method that applies variational autoencoders to the problem of time series anomaly detection. We will begin with a overview of the original authors main ideas. Next, we will replicate some results, and perform new experiments to gain further insights into the properties, successes, and limitations of this method. Finally, we will run additional experiments that test extensions on the original formulation, and motivate future areas of exploration.",
+      "published": "December 11, 2023",
+      "authors": [
+        {
+          "author": "Abe Ejilemele",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Exploring Univariate Time Series Anomaly Detection using VAE's</h1> <p>In this blog post, we will take a deep dive into DONUT, a method that applies variational autoencoders to the problem of time series anomaly detection. We will begin with a overview of the original authors main ideas. Next, we will replicate some results, and perform new experiments to gain further insights into the properties, successes, and limitations of this method. Finally, we will run additional experiments that test extensions on the original formulation, and motivate future areas of exploration.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#problem-formulation-and-background">Problem Formulation and Background</a></div> <div><a href="#overview-on-vae-s">Overview on VAE's</a></div> <div><a href="#donut">DONUT</a></div> <div><a href="#experimental-setting-and-evaluation">Experimental Setting and Evaluation</a></div> <div><a href="#reproducing-results-and-establishing-baselines">Reproducing Results and Establishing Baselines</a></div> <div><a href="#understanding-the-latent-space">Understanding The Latent Space</a></div> <div><a href="#effects-of-changing-the-latent-distribution">Effects of Changing the latent distribution</a></div> <div><a href="#should-we-scale-the-entropy-term-in-m-elbo">Should we Scale the Entropy term in M-ELBO?</a></div> <div><a href="#empirical-exploration-of-the-effects-of-beta-and-the-missing-data-injection-rate">Empirical Exploration of the Effects of Beta and the Missing Data Injection Rate</a></div> <div><a href="#improving-vae-architecture">Improving VAE Architecture</a></div> <div><a href="#choosing-number-of-latent-space-dimensions">Choosing Number of Latent Space Dimensions</a></div> <div><a href="#concluding-thoughts">Concluding Thoughts</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Anomaly detection in time series data is a extensively studied field in academia, holding significant importance due to its wide-ranging applications in real-world scenarios. Time series are present everywhere, and the ability to detect anomalies is vital for tasks such as identifying potential health issues, predicting system failures, or recognizing regime changes in business operations. There are a wide range of methods that have been developed over the years in order to tackle this crucial yet challenging problem. Classical methods approaches rooted in statistics have long been employed, but in recent years, researchers have began to experiment with adapting deep learning techniques to achieve performance improvements.</p> <p>The deep methods can generally be classified into distinct approaches. The first approach is forecasting, where the method attempts to learn the generating process of the series, and then classifies a point in the series as anomalous if the ground truth value deviates significantly from the predicted value. The second type of approach reconstruction. The models attempt to learn the generating process of the series in a latent space. The model then reconstructs the series, and uses a well designed reconstruction score in order to classify the series points as normal or anomalous. DONUT is an example of a method that falls into this category.</p> <h2 id="problem-formulation-and-background">Problem Formulation and Background</h2> <p>Xu et al <d-cite key="xu2018unsupervised"></d-cite> explore time series anomaly detection within the framework of key performance indicator time series. These time series show high levels of seasonality, a result of being a derivative of human action. They propose a method called DONUT, a highly adaptable anomaly detection method that can perform both supervised, semi-supervised, and unsupervised detection. Their main contributions include modifications to the traditional VAE formulation, the discovery that DONUT requires training on both normal and anomalous data (contrary to other methods), and a kernel density estimation interpretation of the latent space. Given a series \(x_1, x_2, ....x_T\), and potentially labels \(y_1, y_2, ...y_T\), where \(y_t\) is 1 when \(x_t\) is an anomaly and 0 otherwise, for any time \(t &lt; T\) the method should be able to classify \(x_t\) as normal or anomalous.</p> <p>We begin by defining what an anomaly means in the context of time series. Darban et al <d-cite key="darban2022deep"></d-cite> classify temporal anomalies as follows. Global, contextual, seasonal, trend, and shapelet. Global anomalies are points with extreme values in comparison with the rest of the series. Contextual anomalies are deviations from the context or neighborhood of a point. Seasonal anomalies are deviations from the typical seasonal patterns in the series. Trend anomalies are transitions or deviations from a local or global trend in a series, and shapelet anomalies are subsequences that have different shapelets from the sequence. Xu et al <d-cite key="xu2018unsupervised"></d-cite> make a further distinction by defining abnormal points as points that are either anomalous or missing. Conversely, points that are neither anomalous nor missing are classified as normal. These definitions give us a framework for analyzing what types of anomalies a method is able to detect.</p> <h2 id="overview-on-vaes">Overview on VAE’s</h2> <p>Generative modeling refers to the objective of generating data from compact, low-dimensional representations. Representation learning can be a byproduct of generative modeling, where the generative model attempts to learn lower dimension representations of data such that inputs with similar high level features will be close to each other in the low dimension representation. Conversely, inputs that have dissimilar features will be far away from each other in the lower dimension representation space. These properties imply that the representation learner learns a good representation of the data that captures as much distinguishing information as possible. VAE’s achieve this through a two step process. Given an input x, an encoder is learned that maps the the input to a latent space, and then a decoder takes the latent space representation and maps it back up to the original feature space. The key property of VAE’s is that they can attempt to enforce a specific distribution in the latent space, such that we can sample from it and generate real looking outputs. The goal is to learn a model \(P_{\theta}(x) = \int p_{\theta}(x | z) p_z(z)dz\), where x are the inputs and z is a random variable in our latent space. In DONUT, and in most other VAE methods, \(p_{\theta}(x | z)\) and \(p_z(z)\) are chosen to be gaussian. Given this model, we would like to find the parameters that maximize the log likelihood \(log P_{\theta}(x)\). This is often an intractable integral to solve or approximate, so a trick called importance sampling is used. We can rewrite the integral as</p> \[P_{\theta}(x) = \int p_{\theta}(x | z) p_z(z) \frac{q_z(z)}{q_z(z)}dz\] <p>where \(q_z(z)\) is a distribution we know how to sample from. Now, we rewrite this expression as an Expectation</p> \[E_{z \sim q_z}[p_{\theta}(x | z) \frac{p_z(z)}{q_z(z)}]\] <p>We can now use monte carlo integration to estimate this expectation. This estimation will be inefficient to estimate with the wrong choice of \(q_z\). It turns out that</p> \[q_z(z) = p_{\theta}(z | x)\] <p>is the optimal choice for \(q_z(z)\), and because this distribution might be hard to sample from, we use the variational inference trick where we find an approximation to this distribution by minimizing the objective</p> \[J_q = KL(q_{\psi}(z | x) || p_{\theta}(z | x))\] <p>Thus we can now define an objective to be minimized that is fully parametrized by \(\theta\) and \(\psi\).</p> \[J_p = -log E_{z \sim q_{\psi}(z | x)}[p_{\theta}(x | z) \frac{p_z(z)}{q_{\psi}(z | x)}]\] <p>The monte carlo estimate of this expecation produces a baised estimation of \(\theta\), so instead of optimizing the objective directly, we optimize a lower bound of the negated objective. Using Jensen’s inequality and expanding out the log terms, we know that</p> \[-J_p \geq E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z) + log p_z(z) - log q_{\psi}(z | x)] = E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)] - KL(q_{\psi}(z | x) || p_z(z))\] <p>This expectation lower bound is known as the ELBO, and is the surrogate objective that VAE’s optimize in order to learn good encoders and decoders.</p> <h2 id="donut">DONUT</h2> <p>The key goal of DONUT is to take a series with normal data and potentially anomalous data, learn how to represent the normal features of the series, and then use these representations to compute a reconstruction probability score. Intuitively, if the method learns to represent normal inputs well, an anomalous input will have a low chance of being well reconstructed, and thus will have a low reconstruction probability. The challenge is that in order for the method to work really well, it is important that the method does not attempt to learn good representations for anomalous data. Xu et al <d-cite key="xu2018unsupervised"></d-cite> achieve this by formulating a modified objective function called the modified ELBO (M-ELBO). The idea is to reduce the contribution of anomalous and missing points to the learned representation. M-ELBO is defined as such</p> \[E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z) - log q_{\psi}(z | x)]\] <p>Where \(\alpha_w\) is 1 when \(x_w\) is not an abnormal point, and 0 when \(x_w\) is abnormal. \(\beta = (\sum_{w = 1}^W \alpha_w) / W\). We will take a deep dive into this modified elbo through empiricall experiments and by considering what role each term in the objective plays in both the learning of the latent space, and performance.</p> <p>The authors also introduce two innovations that serve to improve performance, something we will reproduce in our experiments. The first innovation is markov chain monte carlo imputation of the missing points. The authors hypothesize that during testing, the presence of missing points in a given sample window might bias the reconstruction of the window, and thus affect the reconstruction probability, so they introduce iterative generation of normal points that can replace the missing points. Additionaly, the authors implement “missing point injection”. Before each training epoch, they inject missing points into the training samples by randomly selecting a subset of training sample points and removing the points (setting their values to zero). Note that the original samples will be recovered after the epoch is completed. They claim that missing point injection amplifies the effect of M-ELBO by forcing DONUT to learn the normal representation of data in abnormal windows. It certainly helps to improve performance, and we will perform a more thorough emperical analysis on both injection, and the \(\beta\) term in the M-ELBO.</p> <p>The authors formulate the reconstruction probability as follows. They begin with the expression</p> \[p_{\theta}(x) = E_{p_{\theta}(z)}[p_{\theta}(x | z)]\] <p>The authors claim that this does not work well emperically, and thus choose to use \(E_{q_{\phi}(z | x)}[log p_{\theta}(x | z)]\) as the reconstruction probability score. If the negation of these scores exceed a given threshold, the point will be classified as an anomaly.</p> <p>We now describe the model structure of DONUT. The encoder \(q_{\phi}(z | x)\) is represented by a deep fully connected net that maps x to a lower dimension feature space. Then there are two readout heads that map the learned features from the net to a mean and variance, which we will denote \(\mu_z\) and \(\sigma_z\). We can then sample \(z\) from \(N(\mu_z, \sigma_z)\). The decoder \(p_{\theta}(x | z)\) is represented by a deep fully connected net that maps a latent variable \(z\) to a larger feature space. There are then two readout heads that map the learned features to a mean and variance, which we will denote \(\mu_x\) and \(\sigma_x\). We can then sample \(x\) from \(N(\mu_x, \sigma_x)\)</p> <h2 id="experimental-setting-and-evaluation">Experimental Setting and Evaluation</h2> <p>Before we lay out the experimental findings and their implications, we begin by briefly describing the datasets used and their characteristics, model architectures, training, and the metrics used for evaluation. We will use three datasets for experimentation, two of which come from the repository of the original paper. The first dataset is called “cpu” and is a series representing some cpu related kpi sampled every minute. The second dataset is called “g” and is also sampled every minute. The third dataset is air temperature time series from CIMIS station 44 in Riverside California, sampled at hourly intervals in the month of March from 2009 to 2019. The dataset did not come with time stamps. These series are all mostly normal, with few anomaly points. This makes the problem more challenging and interesting.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_plot.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Plot of the cpu series with anomaly points colored red </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cpu_series_decomp.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Plot of the cpu series seasonal decomposition </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_plot.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Plot of the g series with anomaly points colored red </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/g_series_seasonal.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Plot of the g series seasonal decomposition </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_plot.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Plot of the weather series with anomaly points colored red </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/weather_series_decomp.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Plot of the weather series seasonal decomposition </div> <p>The cpu time series shows clear seasonality, and has an anomaly ratio of .015. The second series has much less clear seasonality, an anomaly ratio of .06, and is generally less smooth than the first series. This allows us to test the robustness of DONUT on a series that showcases less obvious seasonality, and draw some insights and comparisons on performance on series with relatively different smoothnesses. The weather series also displays clear seasonality and is smoother than the second series, but it differs from the other two series in that there are no anomalies in the training data. Thus, DONUT must learn to detect anomalies by training on purely normal data.</p> <p>We create the training and testing data as follows. We begin by standardizing both the training and testing splits in order to represent all of the series on the same scale. We then set any missing values in the series to zero. Finally, we perform slide sampling in order to turn the series into windows of length \(W\). For each window, we will be predicting whether the last value in the window is an anomaly or not. We use a window size of 120 for the first two datasets which means our windows encapsulate two hours of information. For the weather dataset, we use a window size of 24, so each window encapsulates a day of information.</p> <p>We will use the same metrics described by Xu et al <d-cite key="xu2018unsupervised"></d-cite>. Given the probability reconstruction scores, we can compute the precision and recall for a given threshold. The original paper and our experiments are not entirely concerned with the process of automatic threshold selection, so f scores and the ROC curve serve as valid evaluation metrics of the reconstruction probability scores. The main performance metric will be the best f score. Xu et al <d-cite key="xu2018unsupervised"></d-cite> also allow for delayed detection. That is, in any contiguous sequence of anomaly points, if a point is classified as an anomaly, then the anomaly points that came before it will also be classified as an anomaly if they had not previously been before. The authors claim this is reasonable, as the delay, (the time between the first detected anomaly point and the first point in the contiguous sequence) is quite low. This also seems reasonable in a practical setting, as being able to detect an anomaly within a few hours of it happening can still give you enough time to act.</p> <p>For our experiments. We will use fairly small and simple architectures. The baseline VAE in the paper is done using fully connected networks, and so we will use a fully connected network with depth two. We also experiment with CNN VAE’s, and in order to try and compare performance with the fully connected VAE encoders and decoders, we also use a CNN with two layers. We perform experiments on behavior when the latent dimension is increased, and needed to double the width and depth of the fully connected VAE in order to allow for training to converge.</p> <h2 id="reproducing-results-and-establishing-baselines">Reproducing Results and Establishing Baselines</h2> <p>Xu et al <d-cite key="xu2018unsupervised"></d-cite> show that using both mcmc imputation and missing data injection along M-ELBO improves performance over just using M-ELBO. In our first experiment, we compare the performance of DONUT on the cpu dataset with both mcmc imputation and missing data injection, just mcmc impuation, just missing data injection, and neither of the methods. For each configuration, and for future experiments, we will run the full training and prediction loop 10 times and average results due to the randomness baked into training and prediction with VAE’s. We see that the averge best f score is highest when we use both mcmc imputation and missing data injection. We also plot density estimate of the f scores from the four configurations, and notice that the f scores of the configuration with both mcmc imputation and missing data injection show more right skewness and fatter tails that the other four configurations.</p> <table> <thead> <tr> <th>Configuration</th> <th style="text-align: center">avg best f score over 10 runs</th> </tr> </thead> <tbody> <tr> <td>both</td> <td style="text-align: center">.642</td> </tr> <tr> <td>just inject</td> <td style="text-align: center">.613</td> </tr> <tr> <td>just mcmc</td> <td style="text-align: center">.5737</td> </tr> <tr> <td>neither</td> <td style="text-align: center">.588</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/f_score_distribution.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Plot of distribution of the f scores shaded by the configuration </div> <p>Next, we run DONUT with the baseline configurations for each of our three datasets. We randomly sample a third of the training data and plot the selected samples mappings in 3-d z space. We also plot the predicted anomaly points by the model with the highest f score over the 10 runs for each of the three datasets.</p> <table> <thead> <tr> <th>Dataset</th> <th style="text-align: center">avg best f score over 10 runs</th> </tr> </thead> <tbody> <tr> <td>cpu</td> <td style="text-align: center">.642</td> </tr> <tr> <td>g</td> <td style="text-align: center">.881</td> </tr> <tr> <td>weather</td> <td style="text-align: center">.996</td> </tr> </tbody> </table> <p>Xu et al <d-cite key="xu2018unsupervised"></d-cite> noticed a phenomenon they labeled “time gradient” in latent space.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/original_paper_latent.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Plot of learned latent space from the original paper of one of the datasets in the paper. </div> <p>They noticed that the latent space was spread out according to time of the day, as time of the day likely encoded a large amount of information about the shape of the series. We did not notice such a phenomenon in our experiments. This is likely the result of a difference in experimental setting, but could also be the result of the local variation within the seasonal data, and the fact that similar shapes occur all over the series irrespective of time. We see that on the second datset, DONUT learned to classify many troughs in the series as anomalous. It was able to detect both global anomalies, as well as contextual and seasonal anomalies, as seen by its ability to detect sudden flat shapes in the series and sudden spikes in the unsual places.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The baseline predictions on the g dataset. Ground truth anomalies are colored red and predicted anomalies are green </div> <p>The performance on the third datset is quite suprising. Given no anomalous data in the train set, DONUT was able to achieve a .996 average best f score on the testing data. This highlights DONUT’s ability to really learn the normal features of a series. Thus during testing, it was not able to reconstruct the anomalous parts of the series as well as the normal parts. While this result does not contradict the claim that it is important to train on both normal and anomalous data, it certainly suggests that there is still value on learning purely normal qualities of the data. M-ELBO does not fully remove learning of anomalous qualities of the data in the latent space, which could lead to unexpectedly high reconstruction probability scores on anomalous testing data</p> <h2 id="understanding-the-latent-space">Understanding The Latent Space</h2> <p>It is important that we gain further insights on the latent space, as it is the bottle neck of any VAE method. We will perform a variety of experiments that aim to fully uncover how each term in ELBO controls the characteristics of the latent space. We begin by first explaining the findings and claims of the original paper.</p> <p>The authors claim that the number of dimensions in the latent space plays a huge role. A small dimension latent space would not allow you to capture enough information, and too big a latent space would cause DONUT to perhaps capture too much information, including anomalous representations. They found that latent dimensions between 3 and 10 typically produced good results. They next discuss how they believe each term in the ELBO contributes to the time gradient phenomena they observe. We restate the M-ELBO objective</p> \[E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z) - log q_{\psi}(z | x)]\] <p>We can rewrite this objective as</p> \[E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z)] + H[z | x]\] <p>Where \(H[z | x]\) is entropy. The authors claim that the first term, \(log p_{\theta}(x | z)\) requires the latent space to be able to reconstruct normal x well, thus it pushes latent representations of dissimilar x further away from eachother. The second term, \(log p_z(z)\), serves to encourage the gaussian shape in the latent space and thus encourages the latent space to not expand too much. However, we shrink the contribution of this term by the ratio of normal points in our training data. The entropy term encourages expansion of the latent space, as it is largest when the latent space encodes as much information as possible. This should happen when the latent represenations are as distinguishing as possible.</p> <h2 id="effects-of-changing-the-latent-distribution">Effects of Changing the latent distribution</h2> <p>Most VAE methods traditionally represent the latent space as a mixture of gaussians, both for its simplicty, as well as its flexibility and ability to approximate many complicated distributions. What happens when we use other types of distributions? We will analyze what happens to performance and the shape of the latent space when we represent it as a mixture of Student-T distributions with 10 degrees of freedom. We hypthesize that replacing a mixture of gaussians with a mixture of any other symmetric distribution will not cause any profound differences in the shape of the latent space, at least in 3 dimensions, however, a symmetric latent space with fatter tails could lead to worse reconstruction performance. Consider \(P_{\theta}(x | z)\), where z is sampled from the latent space. With a fatter tailed distribution, we are more likely to sample a z that is further away from the mean of its distribution. This behavior can be beneficial for generative purposes but for reconstruction purposes, this behavior is likely detrimental and will lead to lower likelihoods that a given x came from the sampled z. We now analyze the empericall effects for all three datasets. For the cpu dataset, we notice that the latent space does not look drasticaly different, considering we only plot a random subset of it. We do however notice a performance dip.</p> <table> <tbody> <tr> <td>Latent Distribution</td> <td>avg best f score over 10 runs</td> </tr> <tr> <td>gaussian</td> <td>.642</td> </tr> <tr> <td>t with 10 df</td> <td>.593</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_tdist_cpu.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the cpu dataset made by guassian mixture, and the image on the right is the latent space of the cpu dataset made by a t-distribution mixture </div> <p>Similarly for the g dataset, we see a slight performance reduction, but a similarly shaped latent space.</p> <table> <tbody> <tr> <td>Latent Distribution</td> <td>avg best f score over 10 runs</td> </tr> <tr> <td>gaussian</td> <td>.8809</td> </tr> <tr> <td>t with 10 df</td> <td>.871</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/laten_space_tdist_g.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the g dataset made by guassian mixture, and the image on the right is the latent space of the g dataset made by a t-distribution mixture </div> <p>For the weather dataset, the performance reduction is negligible which suggests that the means of our learned latent space truly represent the normal patterns of the series. (Note that this dataset did not come with timestamps. Disregard any time colorations on latent space plots)</p> <table> <tbody> <tr> <td>Latent Distribution</td> <td>avg best f score over 10 runs</td> </tr> <tr> <td>gaussian</td> <td>.996</td> </tr> <tr> <td>t with 10 df</td> <td>.995</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_tdist_weather.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the weather dataset made by guassian mixture, and the image on the right is the latent space of the weather dataset made by a t-distribution mixture </div> <p>This brief analysis suggests that the gaussian distribution is truly a good adaptable choice for our latent space. It allows for some variability when doing generative modeling, but also allows for a more robust estimator of reconstruction probability.</p> <h2 id="should-we-scale-the-entropy-term-in-m-elbo">Should we Scale the Entropy term in M-ELBO?</h2> <p>Xu et al <d-cite key="xu2018unsupervised"></d-cite> provide a weighting system for the first two terms of M-ELBO, but choose not to add any weighting to the entropy term. They hypothesize that because of the entropy term’s role in expanding the latent space, it is perhaps better to keep it untouched. We will perform an empircal analysis on the effects on weighting the entropy term.</p> <p>In our first experiment, we choose a reasonable choice for the weight of the entropy term. We will use \(\beta\) to weight both \(logP_{z}(z)\) and \(logq_{\psi}(z | x)\). Thus M-ELBO becomes</p> \[E_{z \sim q_{\psi}(z | x)}[\sum_{w = 1}^W \alpha_w log p_{\theta}(x | z)+ \beta log p_z(z) - \beta log q_{\psi}(z | x)]\] <p>We can reformulate the M-ELBO in terms of the KL divergence to hypothesize what effects scaling \(logq_{\psi}(z | x)\) by \(\beta\) might have.</p> \[E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)] - KL(q_{\psi}(z | x)^{\beta} || p_z(z)^{\beta})\] <p>Using the power rule of logarithms, we can rewrite this objective as</p> \[E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)] - \beta KL(q_{\psi}(z | x) || p_z(z))\] <p>Thus we have essentially applied shrinkage to the KL divergence between the prior and the posterior based on the amount of abnormal data in our training data. This would perhaps encourage the latent space to look more gaussian, such that the prior probability dominates the posterior probability in order to increase the M-ELBO lower bound. Thus we can hypothesize that our latent space will perhaps experience shrinkage. This would certainly be undesired behavior if our goal is to expand our latent space and allow for more distinguishing latent space represenations while keeping some form of structure.</p> <p>We now analyze the emperical results. We first analyze the effects on the cpu dataset. There does seem to be signs of shrinkage in the latent space when it is weighted, however there is no clear absolute shrinkage or expansion. The shape is certainly different, and it seems like the latent space expanded in the negative direction in the second dimension of the latent space, and shrunk in the positive direction. We also observe a performance increase.</p> <table> <tbody> <tr> <td> </td> <td>avg best f score over 10 runs</td> </tr> <tr> <td>Unweighted Entropy</td> <td>.642</td> </tr> <tr> <td>Weighted Entropy</td> <td>.665</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space%20cpu.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_cpu.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the cpu dataset without weighting the entropy term, and the image on the right is the latent space of the cpu dataset with a weighted entropy term </div> <p>On the g dataset, we can certainly see a differently shaped latent space. We notice that the third dimension of the latent space expanded, while the first and second dimensions showed some level or shrinkage compared to the baseline. We do see a slight reduction in performance compared to the baseline</p> <table> <tbody> <tr> <td> </td> <td>avg best f score over 10 runs</td> </tr> <tr> <td>Unweighted Entropy</td> <td>.8809</td> </tr> <tr> <td>Weighted Entropy</td> <td>.875</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_g.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_g.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the g dataset without weighting the entropy term, and the image on the right is the latent space of the g dataset with a weighted entropy term </div> <p>Finally, for the weather dataset, we also see that weighting the entropy term did not lead to absolute expansion or shrinkage of our latent space. We observe shrinkage in the third dimension of the latent space, slight shrinkage in the first dimension, and slight expansion in the second dimension. We also observe a slight performance dip.</p> <table> <tbody> <tr> <td> </td> <td>avg best f score over 10 runs</td> </tr> <tr> <td>Unweighted Entropy</td> <td>.9967</td> </tr> <tr> <td>Weighted Entropy</td> <td>.9928</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_weather.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_q_weather.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the weather dataset without weighting the entropy term, and the image on the right is the latent space of the weather dataset with a weighted entropy term </div> <p>These results suggest that weighting the entropy term can lead to shrinkage of the latent space. It certainly lead to different latent space shapes, where we observed expansion in some dimensions and shrinkage in others. There are also no conclusive results in its affects on performance, as we saw improved performance in one dataset and decreased performance in the other two.</p> <p>We will now perform a more general experiment on the effects on weighting the entropy term with the cpu dataset. Instead of weighting the entropy term with \(\beta\), we will try different weights between 0 and 1 and observe the effects. We increased the capacity of our VAE network, so we rerun the experiments on weighting entropy with \(\beta\) and not weighting entropy in order to have a valid comparison of results.</p> <p>When the entropy term is weighted by zero, we notice a very speherically shaped latent space which looks like a unit gaussian ball. This matches up with a quick evaluation of the elbo. There is no more reshaping of our latent space by the entropy term, and thus DONUT learns a latent space that matches up with the gaussian prior. With a weight of .2, we again see a circular latent space, however there is more deviation from a spherical shape. We continue to see this phenomenon of deviating from a spherical shape when the weights increase. We also notice that the points become more clustered as the weights increase. There seems to be a level of shrinkage as the weights increase, but for weights equal to .8 and .9, we see the latent space expand again. These results indicate that it is unlikely that weighting the entropy term has any effect on expanding the latent space. Results even suggest that non zero weights can lead to shrinkage. However, weighting the entropy term certainly affects the shape of our latent space, and the ability of the VAE to learn representations that look less guassian.</p> <p>The performance results provide some interesting insights, and can serve to motivate future areas of exploration. We see that performance is maximal when the weights are very low, or around .6 and .7. When the weights are low, the latent space is very constrained, and thus DONUT will learn learn purely normal representations of the data. As the weights increase, and the ability of DONUT to learn latent representations that deviate from purely guassian increases, we generally see consistently good performance that is comparable to the zero weight case. With weights larger than .8, we begin to see a dip in performance. With large weights, the latent space deviates the most from being gaussian shaped and perhaps begins to learn anomalous representations. This suggests a benefit to enforcing more normality and constraint on the shape of our latent space for the purposes of anomaly detection. This could mean not weighting the prior term by \(\beta\), or adding some additional terms to M-ELBO that somehow enforce the latent space to look more guassian.</p> <table> <thead> <tr> <th>weight</th> <th>avg best f score over 5 runs</th> </tr> </thead> <tbody> <tr> <td>0</td> <td>.682</td> </tr> <tr> <td>.1</td> <td>.673</td> </tr> <tr> <td>.2</td> <td>.657</td> </tr> <tr> <td>.3</td> <td>.602</td> </tr> <tr> <td>.4</td> <td>.666</td> </tr> <tr> <td>.5</td> <td>.634</td> </tr> <tr> <td>.6</td> <td>.7</td> </tr> <tr> <td>.7</td> <td>.688</td> </tr> <tr> <td>.8</td> <td>.602</td> </tr> <tr> <td>.9</td> <td>.625</td> </tr> <tr> <td>1 or unweighted</td> <td>.64</td> </tr> <tr> <td>Beta weighted</td> <td>.6</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_latent_space_cpu_expandednet.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0-1400.webp"/> <img src="/staging/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q0.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the cpu dataset without weighting the entropy term, and the image on the right is the latent space of the cpu dataset with a weight of 0 on the entropy term </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q2.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q4.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the cpu dataset with a weight of .2 on the entropy term, and the image on the right is the latent space of the cpu dataset with a weight of .4 on the entropy term. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q6.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_q8.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the cpu dataset with a weight of .6 on the entropy term, and the image on the right is the latent space of the cpu dataset with a weight of .8 on the entropy term </div> <h2 id="empirical-exploration-of-the-effects-of-beta-and-the-missing-data-injection-rate">Empirical Exploration of the Effects of Beta and the Missing Data Injection Rate</h2> <p>We now perform analysis on exactly how \(\beta\) affects $p_z(z)$, both through experimenting with differing rates of missing data injection, as well as manually adjusting \(\beta\) and observing the results.</p> <p>We restate M-ELBO in tems of the KL divergence.</p> \[E_{z \sim q_{\psi}(z | x)}[log p_{\theta}(x | z)] - KL(q_{\psi}(z | x) || p_z(z)^{\beta})\] <p>As \(\beta\) decreases, the KL divergence increases. In order to decrease the divergence, the VAE should decrease the posterior probability, which could lead to a more spread out or non gaussian looking latent space, or rather one where we learn very distinguishing representations. As seen from our previous analysis, this might be undesired behavior for the purposes of anomaly detection. Performing automatic reduction of \(\beta\) by increasing the missing data injection rate could prevent DONUT from learning enough about the normal patterns in the training data, and thus performance will likely suffer if the injection rate gets too large.</p> <p>We begin first by trying out \(\beta\) values between 0 and 1 in order observe the effects, and motivate adjusting the missing data injection rate.</p> <p>When \(\beta\) is set to to 0, we see that the latent space looks fairly compact and non spherical. At \(\beta\) between .1 and .4, we can see that the latent space is quite spread out, and displays some spherical properties, especially for \(\beta\) = .3. For \(\beta\) between .4 and .9, we can see that the sampled latent space begins to look more and more compact, yet there is still a reasonable spread in the latent space. There does not seem to be a clear relationship between the spread and shape of the latent space and perfomance, however, we note that the \(\beta\) that resulted in the highest performance was \(\beta\) = .3, whose latent space looks the most spherical. This again supports the notion that when the latent space looks more gaussian, anomaly detection is improved.</p> <table> <thead> <tr> <th>Beta</th> <th>avg best f score over 5 runs</th> </tr> </thead> <tbody> <tr> <td>0</td> <td>.648</td> </tr> <tr> <td>.1</td> <td>.595</td> </tr> <tr> <td>.2</td> <td>.591</td> </tr> <tr> <td>.3</td> <td>.686</td> </tr> <tr> <td>.4</td> <td>.633</td> </tr> <tr> <td>.5</td> <td>.6</td> </tr> <tr> <td>.6</td> <td>.623</td> </tr> <tr> <td>.7</td> <td>.614</td> </tr> <tr> <td>.8</td> <td>.669</td> </tr> <tr> <td>.9</td> <td>.646</td> </tr> <tr> <td>1 or unweighted</td> <td>.64</td> </tr> <tr> <td>Beta weighted</td> <td>.6</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_0.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_1.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the cpu dataset with a weight of zero on the prior term, and the image on the right is the latent space of the cpu dataset with a weight of .1 on the prior term </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_3.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_5.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the cpu dataset with a weight of .3 on the prior term, and the image on the right is the latent space of the cpu dataset with a weight of .5 on the prior term. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_7.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/latent_space_cpu_pbeta_9.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The image on the left is the latent space of the cpu dataset with a weight of .7 on the prior term, and the image on the right is the latent space of the cpu dataset with a weight of .9 on the prior term </div> <p>In our experiments on adjusting the missing injection rate, we saw a significant decrease in performance as the rate increased, even reaching an average best f score of .06 when the rate was .8. It is unclear from our experiments whether this is the result of training not converging, as we do observe high loss values, or simply bad performance of DONUT when a vast majority of the data is missing, which would be expected behavior. This is something that would need to be explored further</p> <h2 id="improving-vae-architecture">Improving VAE Architecture</h2> <p>For the purposes of simplicity, DONUT utilizes fully connected layers for both the encoder and the decoder. While these choices certainly produce decent results, perhaps we can implement architectures that can better utilize the temportal information encoded within each window. We explore using a one dimensional CNN for the encoder in DONUT. Perhaps CNNs are better able to learn representations that encode more temporal information within a sample window. In order to make the CNN network as comparable as possible with the fully connected network, we will only use two convolution layers. We apply a kernel size of 3, and a stride of 1. We also use max pooling to downsample the data.</p> <p>For the cpu dataset, we observe significant performance improvements with the CNN architecture. We notice the detection of contextual anomalies, which are non obvious local deviations. The latent space looks fairly spherical, however there does not seem to be any noticeable time gradient behavior in the latent space, despite the improved ability of the encoder to take advantage of temporal information.</p> <table> <tbody> <tr> <td>Architecture</td> <td>avg best f score over 10 runs</td> </tr> <tr> <td>2 layer CNN</td> <td>.714</td> </tr> <tr> <td>2 layer fc</td> <td>.642</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/best_baseline_pred_cpu.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_cpu.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> On the top are the ground truth and predicted anomaly points by the baseline DONUT with fully connected encoders on the cpu dataset. On the bottom are the ground truth and predicted anomaly points by DONUT with CNN encoders on the cpu dataset. Ground truth anomalies are colored in red, and predicted anomalies are colored in green </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_latent_space_cpu.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Latent space plot for DONUT with CNN encoders on the cpu dataset </div> <p>We did not see this same performance improvement in the other two datasets. Additionally, we struggled to achieve stable training on the weather dataset, and so further work needs to be done to achieve convergence in order to perform evaluations on the efficiacy of CNNs with that dataset. For the g dataset, we noticed a significant performance reduction. The difference between the performance on the cpu dataset and the g dataset could suggest that CNN architectures could lead to overfitting on less smooth time series. Looking at the plot of predicted anomalies seems to suggest this, as DONUT with a CNN encoder seems to predict that a larger number of the troughs in the g series are anomaly points, an indicator of potential overfitting to the series pattern.</p> <table> <tbody> <tr> <td>Architecture</td> <td>avg best f score over 10 runs</td> </tr> <tr> <td>2 layer CNN</td> <td>.824</td> </tr> <tr> <td>2 layer fc</td> <td>.881</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/baseline_pred_g.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Exploring-Generative-Models-In-Time-Series/cnn_pred_g.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> On the top are the ground truth and predicted anomaly points by the baseline DONUT with fully connected encoders on the g dataset. On the bottom are the ground truth and predicted anomaly points by DONUT with CNN encoders on the g dataset. Ground truth anomalies are colored in red, and predicted anomalies are colored in green </div> <p>This is an interesting area of exploration for DONUT. There are a variety of architectures such as RNN’s and transformers that have shown superior performance on time series data, and those could be adapted to this method to improve performance over both CNN and fully connected architectures.</p> <h2 id="choosing-number-of-latent-space-dimensions">Choosing Number of Latent Space Dimensions</h2> <p>For the purposes of plotting the latent space in our experiments, we chose to use use a latent space with dimension three. However, intuitively, and as shown in the paper, choosing higher a higher dimension latent space can lead to performance improvements. <d-cite key="xu2018unsupervised"></d-cite> explain that not only does increasing the size of the latent space increase the quality of the representations learned in the latent space, but it also serves to improve the stability of training. We will look to explore whether the properties of a series can give insight into good choices for the size of the latent space, and perhaps motivate automatic selection of the number of dimensions in the latent space.</p> <p>We hypothesize that smoother series do not need as large a dimension in the latent space as series that display higher levels of roughness. Intuitively, in smoother series, the anomalies should be more “obvious”, while in less smooth series, rough behavior could be mistaken for an anomalous pattern.</p> <p>We take a technique from smoothing splines, which are function estimates obtained from noisy observations of some data process. Smoothing splines enforce a roughness penalty on the function estimate, defined as such <d-cite key="wang2011smoothing"></d-cite></p> \[\int \hat{f}(x) ^2 dx\] <p>We will use a finite difference estimate of this penalty on the standardized series to define a metric that can be used to describe the roughness/smoothness the series. Now that we have defined a metric describing the smoothness of a series, we can evaluate the best choice of number of latent dimension for series of differing levels of smoothness. In order to converge during training, we had to double the width of the fully connected VAE, and also double its depth.</p> <table> <thead> <tr> <th>Dataset</th> <th>Roughness Penalty</th> </tr> </thead> <tbody> <tr> <td>cpu</td> <td>.061</td> </tr> <tr> <td>g</td> <td>.598</td> </tr> <tr> <td>weather</td> <td>.023</td> </tr> </tbody> </table> <p>We begin with the cpu dataset. We notice that performance significantly increases when the latent space is 6 dimensions, but performance begins to drop off as the number of dimensions increases, which suggests overfitting.</p> <table> <thead> <tr> <th>number of dimensions</th> <th>avg best f score over 5 iterations</th> </tr> </thead> <tbody> <tr> <td>3</td> <td>. 637</td> </tr> <tr> <td>6</td> <td>.833</td> </tr> <tr> <td>9</td> <td>.826</td> </tr> <tr> <td>12</td> <td>.797</td> </tr> </tbody> </table> <p>For the g dataset, performance peaks when the latent space has 9 dimensions. We also see slightly better performance with a latent space dimension of 12 compared to 6</p> <table> <thead> <tr> <th>number of dimensions</th> <th>avg best f score over 5 iterations</th> </tr> </thead> <tbody> <tr> <td>3</td> <td>. 889</td> </tr> <tr> <td>6</td> <td>.882</td> </tr> <tr> <td>9</td> <td>.894</td> </tr> <tr> <td>12</td> <td>.885</td> </tr> </tbody> </table> <p>For the weather dataset, we notice a consistent performance improvement when the number of dimensions is increased.</p> <table> <thead> <tr> <th>number of dimensions</th> <th>avg best f score over 5 iterations</th> </tr> </thead> <tbody> <tr> <td>3</td> <td>. 994</td> </tr> <tr> <td>6</td> <td>.997</td> </tr> <tr> <td>9</td> <td>.998</td> </tr> <tr> <td>12</td> <td>1</td> </tr> </tbody> </table> <p>These results do not provide any clear picture on whether there is any relationship between the smoothness of a series and the best choice for the number of latent dimensions. For our smoothest series (weather), we observed consistent improvement as the number of dimensions increases. The roughest series (g) also seems to show this behavior. However, we see that increasing the number of dimensions for the cpu dataset decreases performance.</p> <h2 id="concluding-thoughts">Concluding Thoughts</h2> <p>Generative models present an interesting approach to the problem of anomaly detection in time series. They present an extremely customizable class of hypotheses that allow us to design a fairly robust probabilistic anomaly detector. Through the experiments we ran, we gained further insights into DONUT, and VAE’s more generally as anomaly detectors. We explored what characteristics of the learned latent space can lead to improved anomaly detection performance, and how we can modify ELBO to achieve those goals. We also see that there is huge potential for exploring more complex encoder architectures for additional performance improvements. Perhaps VAE’s can become a robust tool for anomaly detection, and provide benefit to a large variety of peoples and industries.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-Exploring-Generative-Models-In-Time-Series.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Exploring-Task-Specific-Data-Augmentation/index.html b/blog/2023/Exploring-Task-Specific-Data-Augmentation/index.html
new file mode 100644
index 00000000..2c159d91
--- /dev/null
+++ b/blog/2023/Exploring-Task-Specific-Data-Augmentation/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Semi-Supervised Domain Adaptation using Diffusion Models | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="6.S898 Project"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Exploring-Task-Specific-Data-Augmentation/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Semi-Supervised Domain Adaptation using Diffusion Models",
+      "description": "6.S898 Project",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Borys Babiak",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Arsh Bawa",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Semi-Supervised Domain Adaptation using Diffusion Models</h1> <p>6.S898 Project</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#abstract">Abstract</a></div> <div><a href="#introduction-background">Introduction & Background</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#our-contribution">Our Contribution</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#experiment-and-results">Experiment and Results</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h1 id="abstract">Abstract</h1> <p>Recently, there has been a breakthrough in image manipulation using Contrastive Language-Image Pretraining (CLIP). Recent work shows that GANs combined with CLIP can translate the images to unseen domains <d-cite key="gal2021stylegannada"></d-cite>. However, in many cases these manipulations destroy the important information that user might want to learn (e.g., labels). Recently, there was a paper showing image manipulation leveraging a combination of diffusion models and CLIP <d-cite key="kim2022diffusionclip"></d-cite>. We leverage the method proposed in DiffusionCLIP paper to perform semi-supervised domain adaptation having limited labeled data. GitHub project page: https://github.com/babiakua/semi_supervised_domain_adapt.</p> <h1 id="introduction--background">Introduction &amp; Background</h1> <h2 id="diffusion-models">Diffusion models</h2> <p>Diffusion Denoising Probabilistic Models (DDPMs) were introduced by Ho et al. as a novel approach in the field of generative models <d-cite key="ho2020denoising"></d-cite>. These models are built on the idea of gradually adding noise to data and then learning to reverse this process.</p> <p>The diffusion process is a Markov chain that adds Gaussian noise to the data over a series of steps. This process can be described mathematically as:</p> \[x_{t} = \sqrt{\alpha_{t}} x_{0} + \sqrt{1 - \alpha_{t}} \epsilon\] <p>where $x_{t}$ is the data at step $t$, $x_{0}$ is the original data, $\alpha_{t}$ is a variance schedule, and $\epsilon$ is Gaussian noise.</p> <p>The reverse process aims to denoise the data, starting from the noisy version and progressively removing noise. It’s modeled as:</p> \[x_{t-1} = \frac{1}{\sqrt{\alpha_{t}}}\left(x_{t} - \frac{1-\alpha_{t}}{\sqrt{1-\alpha_{t}}} \epsilon_{\theta}(x_{t}, t)\right)\] <p>with $\epsilon_{\theta}(x_{t}, t)$ being a neural network predicting the noise. This neural network usually has a UNet architecture with downsampling layers, upsampling layers, and a bottleneck.</p> <p>The training objective is to minimize the difference between the predicted noise $\epsilon_{\theta}(x_{t}, t)$ and the actual noise $\epsilon$. This is done using a variant of the mean squared error (MSE) loss:</p> \[\min_\theta \mathbb{E}_{x_0 \sim q(x_0), w \sim \mathcal{N}(0, I), t} \left\| w - \epsilon_{\theta}(x_t, t) \right\|^2_2.\] <p>DDIM (Denoising Diffusion Implicit Models) paper <d-cite key="song2022denoising"></d-cite> proposed an alternative non-Markovian noising process that has the same forward marginals as DDPM but has a distinct sampling process as follows:</p> \[x_{t-1} = \sqrt{\alpha_{t-1}} f_\theta(x_t, t) + \sqrt{1 - \alpha_{t-1} - \sigma_t^2}\epsilon_\theta(x_t, t) + \sigma_t^2 z,\] <p>where, $z \sim \mathcal{N}(0, I)$ and $f_\theta(x_t, t)$ is a the prediction of $x_0$ at $t$ given $x_t$ and $\epsilon_\theta(x_t, t)$:</p> \[f_\theta(x_t, t) := \frac{x_t - \sqrt{1 - \alpha_t}\epsilon_\theta(x_t, t)}{\sqrt{\alpha_t}}.\] <p>DDIM process allows for the use of different samplers by setting $\sigma_t$ to different values. In particular, setting $\sigma_t = 1$ makes the process a Markov process equivalent to DDPM while setting $\sigma_t = 0$ makes the process deterministic and allows for almost perfect inversion. DiffusionCLIP method leverages the deterministic nature of the process for image manipulation.</p> <h2 id="image-manipulation-with-clip">Image manipulation with CLIP</h2> <p>CLIP is a model for joint image-language representations which is trained on a large dataset of image-text pairs <d-cite key="radford2021learning"></d-cite>. Using a contrastive learning objective, it learns a joint, multimodal embedding space. The representations learned by CLIP can be used for many tasks including image manipulation and image synthesis. DiffusionCLIP uses CLIP loss to tune the image generator (e.g., a pretrained diffusion model). CLIP loss takes the following form:</p> \[\mathcal{L}_{\text{direction}} (x_{\text{gen}}, y_{\text{tar}}; x_{\text{ref}}, y_{\text{ref}}) := 1 - \frac{\langle \Delta I, \Delta T \rangle}{\| \Delta I \| \| \Delta T \|}\] <p>where $ \Delta T = E_T(y_{\text{tar}}) - E_T(y_{\text{ref}}), \Delta I = E_I(x_{\text{gen}}) - E_I(x_{\text{ref}}). $</p> <p>$E_I$ and $E_T$ are CLIP’s image and text encoders, $y_{\text{ref}}, x_{\text{ref}}$ are the source domain text and image, and \(y_{\text{tar}}\) is a text description of a target and \(x_{\text{gen}}\) denotes the generated image.</p> <h1 id="related-work">Related Work</h1> <p>Recent work in the field discovered an alternative way of manipulating image attributes using pre-trained diffusion models <d-cite key="kwon2023diffusion"></d-cite>. The authors show that instead of tuning the model, one can modify the reverse process and guide it towards the target domain. The reverse process is guided through a lower-dimensional (compared to original latents) latent space which in this case is the bottleneck of the UNet of the original pre-trained diffusion model. Authors show that this latent space enjoys high-level semantics and linearity which allows for more flexible image manipulation.</p> <p>Although this method is still in development (as it was our initial idea for domain adaptation which did not succeed), the latent space suggested by the authors can be used for a more powerful idea which is unsupervised domain adaptation. By smoothing the test images at appropriate noise level, one can classify whether the image possesses a given attribute. Then one can make training and test distributions close to each other by manipulating the attributes of interest. This direction is of our future interest to explore.</p> <p>Another area of current research is trying to use GANs (also guided by the CLIP loss) for image manipulation <d-cite key="gal2021stylegannada"></d-cite>. Using GANs allows for zero-shot image manipulation which is way faster than the diffusion models’ reverse process. However, GANs suffer from their limited inversion capability and destruction of initial image information which might be dangerous for downstream tasks (e.g., consider a classification task with GAN manipulating training image labels).</p> <p>An alternative method for manipulating and editing images is mixing latents of source and target <d-cite key="choi2020stargan"></d-cite>. Although this method does provide good results in terms of sample quality, it lacks control for our set-up. We would like to have control over the attributes we are changing and keep the others unchanged.</p> <p>Another method for image editing is classifier guidance which adds classifier gradients in the reverse process to control the generation process <d-cite key="dhariwal2021diffusion"></d-cite>. This method is unsuitable for our problem set-up since we need to train an additional classifier for the target domain, and we do not have enough data to train it.</p> <h1 id="our-contribution">Our Contribution</h1> <p>We demonstrate capabilities of text-guided diffusion to perform domain adaptation in a semi-supervised setting (e.g., unseen attributes of the target domain). To the best of our knowledge, this is the first work that shows the power of diffusion models in performing domain adaptation when the difference between the train and target domains can be described in a short prompt.</p> <h1 id="methodology">Methodology</h1> <p>A frequently encountered problem in supervised learning is one where we have training data from one domain (the source domain) but we want to conduct inference on data that comes from a different but related domain (the target domain) that can be described using text. Specifically, we want to focus on the setting where we have access to an adequate number (for training) of observations from the source domain (a subset of which are labelled) and we want to conduct inference (eg. classification) on unlabelled observations from the target domain. An additional constraint is that we only have a limited number of observations from the target domain so it is infeasible to learn the target distribution. Here, we deal with image data.</p> <h2 id="diffusionclip">DiffusionCLIP</h2> <p>We first train a diffusion model on both labelled and unlablled images from the source domain. This diffusion model is first used to convert input images (from source domain) to the latent. Then, the reverse path is fine-tuned to generate images driven by the target text (text decription of target domain), guided by the CLIP loss. The details are given in the subsequent sections.</p> <h3 id="diffusionclip-fine-tuning">DiffusionCLIP Fine-tuning</h3> <p>In terms of fine-tuning, the DiffusionCLIP model <d-cite key="kim2022diffusionclip"></d-cite> allows for modification of the diffusion model itself as compared to the latent, enhancing its effectiveness. The process utilizes a composite objective including directional CLIP loss and identity loss for fine-tuning the reverse diffusion model parameters.</p> <h4 id="loss-function">Loss Function</h4> <p>The objective function is given by:</p> \[\mathcal{L}_{\text{direction}} (\hat{x}_0(\theta), y_{\text{tar}}; x_0, y_{\text{ref}}) + \mathcal{L}_{\text{id}} (\hat{x}_0(\theta), x_0)\] <p>where $x_0$ is the original image and \(\hat{x}_0(\theta)\) is the generated image from the latent with optimized parameters $\theta$. The identity loss \(\mathcal{L}_{\text{id}}\) <d-cite key="kim2022diffusionclip"></d-cite> aims to preserve the object’s identity post-manipulation.</p> <h4 id="optimization-and-identity-preservation">Optimization and Identity Preservation</h4> <p>Optimization is guided by directional CLIP loss, requiring a reference and a target text for image manipulation. The identity loss includes $\ell_1$ loss for pixel similarity and a face identity loss for maintaining recognizable human features.</p> <h4 id="architecture">Architecture</h4> <p>The fine-tuning involves a shared U-Net architecture across time steps, with gradient flow illustrated in Figure 1. This structure supports the transformation of images to align with target texts.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/gradient-flows.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><em>Figure 1. Gradient flows during fine-tuning the diffusion model with the shared architecture across t <d-cite key="kim2022diffusionclip"></d-cite>.</em></p> <h3 id="forward-diffusion-and-generative-process">Forward Diffusion and Generative Process</h3> <p>Kwon et al <d-cite key="kim2022diffusionclip"></d-cite> discusses the DDPM’s sampling process, which is inherently stochastic. This stochastic nature results in varied samples even from the same latent input. However, to leverage the image synthesis capabilities of diffusion models for precise image manipulation, the authors use DDIM’s deterministic forward process with \(\sigma_t=0\) which allows for almost perfect reconstruction. Using deterministic processes, however, limits model’s generative capability and this problem has been developed in the subsequent papers by injecting noise at specific timesteps <d-cite key="kwon2023diffusion"></d-cite>.</p> <h4 id="deterministic-diffusion-processes">Deterministic Diffusion Processes</h4> <p>The deterministic processes are formulated as follows:</p> <p>$x_{t+1} = \sqrt{\alpha_{t+1}}f_\theta(x_t, t) + \sqrt{1 - \alpha_{t+1}}\epsilon(x_t, t)$</p> <p>$x_{t-1} = \sqrt{\alpha_{t-1}}f_\theta(x_t, t) + \sqrt{1 - \alpha_{t-1}}\epsilon(x_t, t)$</p> <h4 id="fast-sampling-strategy">Fast Sampling Strategy</h4> <p>To expedite the sampling, a ‘return step’ is introduced along with a strategy to use fewer discretization steps. This accelerates training without significantly compromising the identity preservation of the object in the image.</p> <p>Detailed mathematical derivations and more comprehensive analyses can be found in the supplementary sections of <d-cite key="kim2022diffusionclip"></d-cite>.</p> <h2 id="experimental-setup-and-procedure">Experimental Setup and Procedure</h2> <p>Our method is intended to be used given a setup as follows. We have a set of images from the source domain, \(\{x_i\}_{i=1}^{n}\), out of which we have labels \(\{y_i\}_{i=1}^{n'}\) for a subset of them, where \(n' &lt;&lt; n\). For simplicity, we are dealing with a binary classification task with 0-1 labels. We now want to classify test images from the target distribution, \(\{x^t_i\}_{i=1}^{m}\) (\(m &lt;&lt; n\)). We also have a text description of the target distribution, \(T_{target}\) (a short prompt that captures how the source and target domains differ; for example, if the source domain is images in the summer and the target domain is images in the winter, \(T_{target}\) could be “winter”).</p> <p>We now use the images from the source domain \(\{x_i\}_{i=1}^{n}\) to train a diffusion model and use DiffusionCLIP fine-tuning to generate an image \(x'_i\) from each labelled source image \(x_i\) driven by \(T_{target}\). Thus, we have created a new training dataset with the target distribution \(\{(x'_i, y_i)\}_{i=1}^{n'}\).</p> <p>Now, we use supervised learning to train a model on the \(\{(x'_i, y_i)\}_{i=1}^{n'}\) pairs and subsequently classify the test images \(\{x^t_i\}_{i=1}^{m}\). The idea is that by shifting the distribution of training data to match that of the test data using just the text description of the target distribution, we can achieve a model that generalizes well to the target domain even in the regime of limited labelled data and target domain images without having to explicitly learn the target distribution.</p> <h1 id="experiment-and-results">Experiment and Results</h1> <h2 id="problem-set-up">Problem set-up</h2> <p>We run a simple experiment to show the power of domain adaptation using our method in this setting. We consider a gender classification problem on CelebA dataset with test domain being different from the train domain.</p> <p>Our train domain is original CelebA images while our target domain is the same images but in the “sketch” style. The “sketch” style images were generated by the same method (DiffusionCLIP) by editing the original CelebA images on the test set. This style transfer doesn’t change the face identity (including gender, which is of our interest for the given task), so we keep all the labels unchanged.</p> <p>We have a training set of size 1,200 images and test set of size 300 images (mainly for computation reasons). Our data comes from publicly available CelebA dataset with binary attributes (including the gender attribute of interest)<d-cite key="liu2015faceattributes"></d-cite>. We resize all the images to size 256x256 using Bilinear interpolation.</p> <p>We use a simple CNN architecture for gender classification - three convolutional layers with increasing filter depth (32, 64, 128), each followed by a max pooling layer that halves the image dimensions, followed by 2 fully connected layers with sigmoid activation. Our experiment is ran for demonstrative purposes for the most part and does not require complex architectures. The training size of 1,200 images is additionally hinting at the necessity to scale the model complexity down for the purposes of our experiment. Our objective function is binary cross-entropy loss.</p> <h2 id="experimental-pipeline">Experimental pipeline</h2> <p>We run the following experiments to confirm our intuition about the method’s effectiveness:</p> <ul> <li>Experiment 1 <ul> <li>Training set (1,200 labeled images) - original CelebA images</li> <li>Test set (300 labeled images) - “sketched” images</li> <li>We train CNN on plain CelebA images and evaluate on a shifted test domain. We use the plain CelebA test domain as a performance benchmark. We expect this model to do worse on the “sketched” test set than on the original one.</li> </ul> </li> <li>Experiment 2 <ul> <li>Training set (1,200 labeled images) - adapted images. Original train images adapted to the “sketch” style using the method described in the subsection below. <ul> <li>Note: We keep the number of images in the train set the same as in the experiment above (e.g., we create new train images and delete the original ones instead of augmenting the data) for the clarity of the experiment. In practice, one can combine images from both domains for learning.</li> </ul> </li> <li>Test set (300 labeled images) - “sketched” images.</li> <li>We train the CNN on the “sketched” images now and evaluate the performance on both “sketched” and plain test sets. We expect this model to do better on the “sketched” test set which is our initial goal.</li> </ul> </li> </ul> <h2 id="domain-adaptation-method">Domain adaptation method</h2> <p>To edit our images from the plain CelebA distribution to the target “sketched” distribution, we use the method proposed in DiffusionCLIP <d-cite key="kim2022diffusionclip"></d-cite>. We used pre-trained fine-tuned diffusion model based on original diffusion model trained on CelebA images using P2 objective introduced by Choi et al <d-cite key="choi2022perception"></d-cite>. Note that the original pre-trained P2 diffusion model was trained on the whole CelebA dataset which makes use of large amounts of unlabeled data in the train domain and is consistent with our problem set-up. The diffusion model was fine-tuned using the prompt “Sketch”. We made use of deterministic DDIM inversion process with 40 steps (instead of a 1,000 steps in the original noise schedule) and 6 generative steps.</p> <p>Despite the sufficient computation cost savings by using the DDIM process, transforming 1,500 images took more than 6 hours on a single NVIDIA GeForce RTX 3050TI 4GB GPU. Computation time is still the main drawback of using diffusion models for image editing and this is the main reason for us to limit the total sample size to 1,500 images.</p> <p>Note: We use the same procedure for generating test images from “technically unknown” sketch domain and adapting the training set to this domain. This assumes the user perfectly identified the prompt which describes the target domain and used it to fine-tune the pre-trained diffusion model which is unrealistic in practice. We believe, however, that for simple prompts semantic similarity of the user prompt and the word “Sketch” would allow to get adapted images similar to the target domain because of the CLIP loss properties.</p> <h2 id="results">Results</h2> <h3 id="image-manipulation">Image Manipulation</h3> <p>Figure 2 shows examples of DiffusionCLIP fine-tuning applied to CelebA images, resulting in “sketched” images.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/female_ex.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/male_ex.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><em>Figure 2. Examples of DiffusionCLIP fine-tuning.</em></p> <h3 id="classification">Classification</h3> <p>Figure 3 shows the performance of the CNN trained on the original CelebA images and tested on images in the source domain as well as the target domain, while Figure 4 shows the performance of the CNN trained on the adapted images.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_base.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><em>Figure 3. Performance of CNN trained on original CelebA images.</em></p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Semi-Supervised-Domain-Adaptation/train_adapt.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><em>Figure 4. Performance of CNN trained on adapted images.</em></p> <p>These results confirm our intuition that adapting our source domain to the target domain results in a non-trivial performance boost. We observe that for the initial few epochs, the performance for both the source and target domains is similar, but this gap increases as we train further. This tells us that initially, the model learns relevant “higher level” features that are present in both the domains since they are both related. However, for later epochs, the model overfits to the distribution of the training data which results in a large performance gap between the two domains. At this stage, the model is learning “lower level” features that belong to the source domain, which are different in the target domain. Thus, the performance on a shifted domain becomes worse as time goes on. If we train further, we expect to learn more lower level features of the source domain, which will enhance performance for a test set from the source domain but deteriorate performance for a test set from the target domain.</p> <h1 id="conclusion">Conclusion</h1> <p>We have shown, with a simple binary classification experiment, that the proposed domain adaptation method using DiffusionCLIP fine-tuning leads to a significant performance boost when we have training and test data sampled from different but related domains.</p> <p>Future work in this direction might include working with the h-space proposed in <d-cite key="kwon2023diffusion"></d-cite>. Our idea for semi-supervised domain adaptation naturally extends to unsupervised domain adaptation by leveraging the properties of this latent space. One could use this latent space as an implicit attribute classifier after smoothing the image at appropriate noise level and then balance the attributes between train and test sets in an unsupervised manner. This approach, however, requires a better implementation of the original method presented in <d-cite key="kwon2023diffusion"></d-cite> and is not feasible as of now.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-Semi-Supervised-Domain-Adaptation.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Iterated-Representation-Learning/index.html b/blog/2023/Iterated-Representation-Learning/index.html
new file mode 100644
index 00000000..b145bd00
--- /dev/null
+++ b/blog/2023/Iterated-Representation-Learning/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Iterated Representation Learning | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Representation learning is a subfield of deep learning focused on learning meaningful lower-dimensional embeddings of input data, and rapidly emerging to popularity for its efficacy with generative models. However, most representation learning techniques, such as autoencoders and variational autoencoders, learn only one embedding from the input data, which is then used to either reconstruct the original data or generate new samples. This project seeks to study the utility of a proposed iterated representation learning framework, which repeatedly trains new latent space embeddings based on the data outputted from the last round of representation. In particular, we seek to examine whether the performance of this iterated approach on a model and input dataset are indicative of any robustness qualities of the model and latent embedding space, and potentially derive a new framework for evaluating representation stability."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Iterated-Representation-Learning/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Iterated Representation Learning",
+      "description": "Representation learning is a subfield of deep learning focused on learning meaningful lower-dimensional embeddings of input data, and rapidly emerging to popularity for its efficacy with generative models. However, most representation learning techniques, such as autoencoders and variational autoencoders, learn only one embedding from the input data, which is then used to either reconstruct the original data or generate new samples. This project seeks to study the utility of a proposed iterated representation learning framework, which repeatedly trains new latent space embeddings based on the data outputted from the last round of representation. In particular, we seek to examine whether the performance of this iterated approach on a model and input dataset are indicative of any robustness qualities of the model and latent embedding space, and potentially derive a new framework for evaluating representation stability.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Angela Li",
+          "authorURL": "https://www.linkedin.com/in/angelayli/",
+          "affiliations": [
+            {
+              "name": "Harvard University",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Evan Jiang",
+          "authorURL": "https://www.linkedin.com/in/evanjiang1/",
+          "affiliations": [
+            {
+              "name": "Harvard University",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Iterated Representation Learning</h1> <p>Representation learning is a subfield of deep learning focused on learning meaningful lower-dimensional embeddings of input data, and rapidly emerging to popularity for its efficacy with generative models. However, most representation learning techniques, such as autoencoders and variational autoencoders, learn only one embedding from the input data, which is then used to either reconstruct the original data or generate new samples. This project seeks to study the utility of a proposed iterated representation learning framework, which repeatedly trains new latent space embeddings based on the data outputted from the last round of representation. In particular, we seek to examine whether the performance of this iterated approach on a model and input dataset are indicative of any robustness qualities of the model and latent embedding space, and potentially derive a new framework for evaluating representation stability.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <ul> <li><a href="#representation-learning-primer">Representation Learning Primer</a></li> <li><a href="#prior-literature">Prior Literature</a></li> </ul><div><a href="#iterated-representation-learning">Iterated Representation Learning</a></div> <ul> <li><a href="#existing-dimensionality-reduction-and-representation-models">Existing Dimensionality Reduction and Representation Models</a></li> <li><a href="#iterated-representation-learning">Iterated Representation Learning</a></li> </ul><div><a href="#case-study-mnist-dataset">Case Study - MNIST Dataset</a></div> <ul> <li><a href="#experimental-design">Experimental Design</a></li> <li><a href="#autoencoder-irl-analysis">Autoencoder IRL Analysis</a></li> <li><a href="#vae-irl-analysis">VAE IRL Analysis</a></li> </ul><div><a href="#discussion">Discussion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Representation learning has become a transformative subfield of deep learning within recent years, garnering widespread attention for its sophistication in learning lower-dimensional embeddings of data beyond classical techniques such as principal component analysis (PCA). From class, we learned that desirable characteristics of good representations include minimality, sufficiency, disentangelement, and interpretability. However, because typical representation learning techniques such as autoencoders learn only one latent embedding from the input data, there exists a gap in the literature on the <em>stability</em> of the model and learned embeddings.</p> <p>In this project, we thus explore a new approach to traditional representation learning techniques, in which embeddings for a given set of data are learned repeatedly until some sort of convergence with respect to the model and learned embedding space, a process we call <strong>Iterated Representation Learning (IRL)</strong>; by analyzing the performance of this iterative approach, our work aims to discover potential insights into the robustness qualities inherent to a model and its associated latent embedding space. We propose an algorithmic framework for IRL, provide an empirical case study of the efficacy of our IRL framework on the MNIST dataset, and suggest a novel evaluation procedure for representation stability and robustness via iterated learning.</p> <h3 id="representation-learning-primer">Representation Learning Primer</h3> <p>The goal of representation learning is to build models that effectively learn meaningful representations of the data. Representations are important for a variety of reasons, including determining which features are the most explanatory or variable in a dataset, compressing repeated information from a dataset to make it more compact, and learning more effective neural networks, to name a few examples. These representations are typically abstract and less interpretable than the input data, but of lower dimension, which makes them useful in capturing the most essential or compressed characteristics of the data.</p> <p>More formally, representation learning aims to learn a mapping from datapoints \(\mathbf{x} \in \mathcal{X}\) to a (typically lower-dimensional) representation \(\mathbf{z} \in \mathcal{Z}\); we call this mapping an <strong>encoding</strong>, and the learned encoding is a function \(f: \mathcal{X} \rightarrow \mathcal{Z}\). From this, a <strong>decoder</strong> \(g: \mathcal{Z} \rightarrow \mathcal{X}\) can be applied to reconstruct the encoded data into its original dimension. This is demonstrated in the diagram below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/representation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/representation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/representation-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/representation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Representation learning goal. Image credit: <i>Foundations of Computer Vision: Representation Learning</i> (Torralba, Isola, Freeman 2023). </div> <p>Some of the most salient learning methods within representation learning today include autoencoding, contrastive learning, clustering, and imputation; in this project, we focus on specifically on iterative approaches for the class of <strong>autoencoders</strong>.</p> <p>Representation learning also has intricate ties to generative modeling, the subfield of deep learning that aims to generate new data by mapping a simple base distribution to complicated high-dimensional data, which is essentially the opposite goal of representation learning. Then, after learning an embedding space via representation learning, this embedding can then be sampled from to <em>generate</em> new data that mimics the original data, as demonstrated by <strong>variational autoencoders (VAEs)</strong>, which we also explore in this paper.</p> <h3 id="prior-literature">Prior Literature</h3> <p>Relatively little literature exists regarding iteratively training dimensionality reduction or representation learning models. <a href="https://ieeexplore.ieee.org/document/9528915">Vlahek and Mongus (2023)</a> proposes an iterative approach for <em>conducting</em> representation learning more efficiently, specifically for the goal of learning the most salient features, which fundamentally diverges from our goal and also does not consider embedding robustness. <a href="https://arxiv.org/abs/1809.10324">Chen et al. (2019)</a> introduces an iterative model for supervised extractive text summarization, though their objective of trying to optimize for a particular document by feeding a given document through the representation multiple times differs from ours. <a href="https://ieeexplore.ieee.org/abstract/document/9414713">Cai, Wang, and Li (2021)</a> finds an iterative framework for self-supervised speaker representation learning which performs 61% better than a speaker embedding model trained with contrastive loss, but mainly focuses on the self-supervision aspect of the model and optimizes purely for model test accuracy, not considering other metrics such as stability or robustness.</p> <p>Overall, we find that the literature regarding iterative approaches to representation learning is already sparse; of the work that exists, most focuses on very specific use cases, and no work directly examines the robustness or stability of the model and embeddings themselves learned over time, rather optimizing purely for final model performance.</p> <h2 id="iterated-representation-learning">Iterated Representation Learning</h2> <h3 id="existing-dimensionality-reduction-and-representation-models">Existing Dimensionality Reduction and Representation Models</h3> <p>Nowadays, there are a variety of approaches to effective dimensionality reduction. Below we cover three of the most common techniques.</p> <h4 id="principal-component-analysis">Principal Component Analysis</h4> <p>Principal Component Analysis (PCA) has two primary objectives. First, maximizing sample variance of the newly transformed data, which is analogous to identifying and capturing the greatest (largest) directions of variability in the data (principal components or PCs). Formally, a PC is defined</p> \[v^* = \arg \max_v \frac{1}{N-1} \sum_{n=1}^N (x^T_n v - \bar{x}^T v)^2 = \arg \max_v v^T C v\] <p>where \(C = \frac{X^T X}{n-1} \in \mathbb{R}^{d \times d}\) is the empirical covariance matrix.</p> <p>The second objective is minimizing reconstruction loss, which is analogous to identifying the directions of variability to accurately and concisely represent data. Let \(U\) be the orthonormal basis projection matrix of eigenvectors of \(C\). Then we define reconstruction loss as</p> \[\mathcal{L}(U) = \frac{\sum_{n=1}^N ||x_n - U U^T x_n||^2}{N}\] <p>Above, we observe that maximizing sample variance and minimizing reconstruction loss go hand-in-hand. Since PCA applies projections by multiplying vectors/matrices to the data, PCA is limited to the <em>linear</em> transformation setting, hence restricting its applicability in many modeling problems.</p> <h4 id="autoencoders">Autoencoders</h4> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Autoencoder structure. Image credit: <i>Foundations of Computer Vision: Representation Learning</i> (Torralba, Isola, Freeman 2023). </div> <p>Similar to PCA, autoencoders also aim to minimize reconstruction loss. However, autoencoders are not limited to just linear transformations, which enables autoencoders to learn more general lower-dimensional representations of data. Autoencoders are comprised of an encoder and decoder, where the encoder maps data to a lower-dimensional representation (embedding) via some function $f$, and the decoder maps the originally transformed data back to its original dimensional space via some function $g$.</p> <p>End to end, the data space starts in \(\mathbb{R}^N\), is downsized to \(\mathbb{R}^M\) by \(f\), and then is reverted back to \(\mathbb{R}^N\) where \(N &gt; M\). In this case, we can formalize the objective as follows:</p> \[f^*, g^* = \arg \min_{f,g} E_\mathbf{x} || \mathbf{x} - g(f(\mathbf{x}))||^2_2\] <h4 id="variational-autoencoders">Variational Autoencoders</h4> <p>VAEs couple autoencoders with probability to get maximum likelihood generative models. Typically for encoding, VAEs regularizes the latent (hidden) distribution of data to “massage” the distribution into a unit Gaussian, and when reverting back to the original dimensional space, VAEs add noise to the output — hence, a mixture of Gaussians. By imposing a unit Gaussian structure on the learned embedding space, this allows VAEs to act as generative models by sampling from the Gaussian latent space to generate new data. Unlike traditional autoencoders, VAEs may have embedding spaces that are complicated (if not just as complicated as the data).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> VAE's complex embedding space. Image credit: <i>Foundations of Computer Vision: Generative Modeling Meets Representation Learning</i> (Torralba, Isola, Freeman 2023). </div> <p>Formally, the VAE learning problem is defined by</p> \[\theta^* = \arg \max_{\theta} L(\{\mathbf{x}^{(i)}\}^N_{i=1}, \theta) = \arg \max_{\theta} \sum_{i=1}^N \log \int_{\mathbf{z}} \mathcal{N} (\mathbf{x}^{(i)}; g_{\theta}^{\mu}(\mathbf{z}), g_{\theta}^{\Sigma}(\mathbf{z})) \cdot \mathcal{N}(\mathbf{z}; \mathbf{0}, \mathbf{\mathrm{I}}) d\mathbf{z}\] <h3 id="iterated-representation-learning-1">Iterated Representation Learning</h3> <h4 id="proposed-framework">Proposed Framework</h4> <p>We now introduce the Iterated Representation Learning Framework (IRL) for autoencoders and VAEs. We start with IRL for autoencoders:</p> <ol> <li>Given design matrix \(X\), learn an autoencoder for \(X\).</li> <li>Using the decoder from above, reconstruct the data to get \(X'\) and compute its reconstruction loss.</li> <li>Using the reconstructed data \(X'\), repeat Steps 1 and 2 and iterate until the reconstruction loss converges or reaching iteration limit.</li> </ol> <p>As for VAEs, we follow a similar procedure as above.</p> <ol> <li>Given design matrix \(X\), learn a VAE for \(X\).</li> <li>Using the decoder and adding Gaussian noise, reconstruct the data to get \(X'\). Compute its reconstruction loss.</li> <li>Using the reconstructed data \(X'\), repeat Steps 1 and 2 and iterate until the reconstruction loss converges or reaching iteration limit.</li> </ol> <p>In this report, we examine how IRL is connected to representation, investigate several hypotheses about IRL, and conduct a preliminary case study of IRL on the MNIST dataset.</p> <h4 id="preliminary-questions-and-hypotheses">Preliminary Questions and Hypotheses</h4> <p>Motivated by how there may be unexplored stability properties of embeddings, our main hypotheses are twofold. First, iterated reconstruction loss per IRL can convergence with respect to the model. Second, learned embedding spaces can be reached via IRL, and that the number of iterations until convergence, loss at convergence, and such preserved features upon convergence could reveal meaningful properties of the true representation space, model, and data that are not immediately obvious from a standard autoencoder model.</p> <p>More specifically, does the number of iterations until convergence have anything to do with how ``good’’ or stable the model or learned representation is? What does it mean if the reconstruction losses converge? What can we say about characteristics of the data that are maintained through iterations, and characteristics that evolve as the iterations go on? For example, if we observe that a model remains invariant to a certain feature, but becomes sensitive to new features of the data, what does this tell us about these particular features, our model, and the original data itself?</p> <p>Perhaps most importantly, beyond the qualitative observations themselves, can we propose some sort of representation learning evaluation framework using iterated representation learning, e.g. rough guidelines on ideal number of iterations required until convergence, and what this says about how good a model is? Ultimately, we hope that using an iterated framework can serve as a general tool for (1) evaluating the stability or robustness of a representation learning model and (2) identifying the most core characteristics of a given dataset.</p> <h2 id="case-study-mnist-dataset">Case Study: MNIST Dataset</h2> <p>To evaluate IRL on a real-world dataset, we selected MNIST to test our hypotheses. We carefully designed our experiments, collected relevant data, and include our analysis below.</p> <h3 id="experimental-design">Experimental Design</h3> <p>For our experiments, we implemented IRL using the framework given above for the class MNIST digits dataset (due to its simplicity and intrepretability), where we preset the <code class="language-plaintext highlighter-rouge">num_iterations</code>. At every iteration, we initialize a new autoencoder model with <a href="https://arxiv.org/abs/2206.08309">Chadebec, Vincent, and Allassonnière’s (2022)</a> <code class="language-plaintext highlighter-rouge">pythae</code> autoencoder/VAE library. The encoder architecture is formed by sequential convolutional layers from <code class="language-plaintext highlighter-rouge">PyTorch</code>.</p> <p>We then trained the model, reconstructed the data, and saved the trained and validation loss. We also saved the original train/test and reconstructed train/test images of the first 25 datapoints to track how IRL progressed visually.</p> <h3 id="autoencoder-irl-analysis">Autoencoder IRL Analysis</h3> <p>First, we take a look at the (log) mean squared error of our autoencoder over 30 iterations of IRL, given in the plot below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_logloss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Autoencoder log loss over IRL of 30 iterations. </div> <p>We notice that both the train and validation loss steeply decrease until around iteration 10, upon which the validation loss begins to roughly stabilize and converge. This confirms our intuition that the loss following an iterated approach should eventually converge, which we can theoretically verify by observing that if we ran \(n\) iterations, then as \(n\to\infty\), because the loss is lower-bounded by zero and should generally from iteration to iteration (since we are removing information from our data), we must eventually converge. We further hypothesize that the fact that the loss has converged means that the embeddings upon convergence have learned the most succinct, critical portion of the data.</p> <p>We also notice that the number of iterations until convergence is very small; as mentioned, after about 10 iterations, it seems that the validation loss has roughly converged. We had hypothesized earlier that if the autoencoder converges after a small number of iterations, then that says something about the quality of the autoencoder architecture. Here, the fact that the loss converged after a small number iterations gives evidence for this hypothesis, since based on separate tests, this architecture indeed achieves relatively high classification accuracy for the MNIST dataset. We suggest that IRL can thus serve as a framework for evaluating the quality of an autoencoder on a particular dataset.</p> <p>Additionally, the validation loss converges at a relatively small number (around 0.25 by iteration 10), meaning that the distance between the original and reconstructed data in a given iteration are very similar. Interestingly enough, the validation loss is actually consistently lower than the train loss, which suggests that the learned representations through this iterated approach actually generalize very well to unseen data, which is certainly a desirable quality of any model.</p> <p>We also give the original and reconstructed data for iterations 1, 5, 10, 15, and 20, for both the train and test data, in the figures below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_train.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Reconstructed train data. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/ae_10k_digits_test.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Reconstructed test data. </div> <p>In the beginning, we see that the data starts losing resolution (e.g. the numbers become fuzzier and start losing their distinctness from the background), which makes sense because more iterations means more reconstructions that continue to accumulate reconstruction loss. The reconstructed images are also less clear than the originals due to the information that is lost from the encoding-decoding process.</p> <p>Our key observation is that the reconstruction loss stabilizes around the 10th iteration, where the original test images and reconstructed test images look very similar — we hypothesize that this is the point where the autoencoder has learned to represent the data as succinct as possible while preserving the most critical information.</p> <h3 id="vae-irl-analysis">VAE IRL Analysis</h3> <p>We similarly plot the log loss for our VAE, as well as the train, test, and sampled data over iterations in the figures below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_logloss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> VAE log loss over IRL of 30 iterations. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Iterated-Representation-Learning/vae_10k_digits.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Train, test, and normal sampled data. </div> <p>Unlike the autoencoder, the VAE’s train data becomes much more noisy across the 20 iterations. This is likely due to how the VAE injects noise in the reconstruction, which in this case resulted in the images to lose their distinctness. While the general shape is preserved (roundness, lines, etc), many of the numbers actually ended up merging together and losing their number shape altogether (e.g. some 6s, 3s, 9s all become 0s).</p> <p>When comparing IRL on the autoencoder versus the VAE, we observe that the VAE’s log loss converges to a larger log loss than the autoencoder, which makes sense because the VAE’s decoding step adds noise to the images that therefore adds loss to the reconstruction. We also note that the both of the models experience steep drop offs in log loss initially, which means the first few iterations eliminated most of the noise in the data and preserved the features that we characterize as “stable”.</p> <h2 id="discussion">Discussion</h2> <p>Our proposed IRL framework considers how some features may be more important or more stable than others, and it aims to capture those features while eliminating the noise in the data. While traditional dimensionality reduction techniques have their merits, IRL takes those methods one step further by iteratively trimming away noise until convergence or termination. Throughout this project, we cover representation learning fundamentals and IRL can capitalize on the way they learn embeddings, and we also apply this framework to real world data on MNIST. We argue that in our case study of MNIST, IRL does converge in terms of both loss (log mean squared error converges) and reconstructions, which is a promising first step in the analysis of stability and fundamental characteristics of the data. Moreover, we showcase how the number of iterations until convergence has significance, serving as a benchmark for how good an autoencoder/VAE is on a given dataset. Although VAE’s reconstructed images were more noisy, that’s by nature of the VAE, and we still observe that the fundamental features of the data (lines vs circles) are still preserved throughout iterations.</p> <p>There are a variety of directions we’d like to continue to explore with this project, given more time.</p> <ol> <li>We were only able to run a limited number of experiments due to computational power and the duration of time to train a full IRL from start to finish for, say, 30 iterations. Given more time, there are multiple other experiments we’d like to run, including training on other datasets and trying out the performance on different autoencoder architectures to better understand the properties of this iterated approach. Another thing we’d like to evaluate the empirical performance of, but also couldn’t due to computational constraints, is how a single autoencoder with 20 times as many neurons as some basic autoencoder compares to the basic autoencoder trained using IRL for 20 iterations.</li> <li>We’re also curious to further explore the theoretical guarantees provided by IRL, including rigorous bounds on convergence. We’re also very interested in exploring whether any of our observations from IRL can generalize to other classes of deep learning models.</li> <li>We’d lastly look into ways to make IRL more computationally tractable. As mentioned, our experimentation was heavily limited due to the computational cost of training a new autoencoder during every iteration. If possible, we’d like to look for optimizations of this framework that still preserve the desired methodology.</li> </ol> <p>Overall, Iterated Representation Learning serves as a framework to evaluate stability-related properties of data, which we believe to be an important but overlooked standard for representation learning. Our case study of MNIST shows promise for empirical convergence guarantees on certain datasets, and we hope that our work lays the foundation for future representation discussions with respect to stability.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-Iterated-Representation-Learning.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Language-Bias/index.html b/blog/2023/Language-Bias/index.html
new file mode 100644
index 00000000..3bde580a
--- /dev/null
+++ b/blog/2023/Language-Bias/index.html
@@ -0,0 +1,139 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Understanding Bias in Speech to Text Language Models | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Do language models have biases that make them better for latin based languages like English? To find out, we generate a custom dataset to test how various language features, like silent letters, letter combinations, and letters out of order, affect how speech2text models learn and compare these results with models trained on real human language."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Language-Bias/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Understanding Bias in Speech to Text Language Models",
+      "description": "Do language models have biases that make them better for latin based languages like English? To find out, we generate a custom dataset to test how various language features, like silent letters, letter combinations, and letters out of order, affect how speech2text models learn and compare these results with models trained on real human language.",
+      "published": "November 7, 2023",
+      "authors": [
+        {
+          "author": "Shreya Karpoor",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Arun Wongprommoon",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Understanding Bias in Speech to Text Language Models</h1> <p>Do language models have biases that make them better for latin based languages like English? To find out, we generate a custom dataset to test how various language features, like silent letters, letter combinations, and letters out of order, affect how speech2text models learn and compare these results with models trained on real human language.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#motivation">Motivation</a></div> <ul> <li><a href="#past-work">Past Work</a></li> </ul><div><a href="#generating-a-dataset">Generating a Dataset</a></div> <ul> <li><a href="#silent-letters">Silent Letters</a></li> <li><a href="#letter-combos">Letter Combos</a></li> <li><a href="#letters-out-of-order">Letters Out of Order</a></li> </ul><div><a href="#controlled-experiments">Controlled Experiments</a></div> <ul> <li><a href="#results">Results</a></li> <li><a href="#corners-cut">Corners Cut</a></li> </ul><div><a href="#real-language">Real Language</a></div> <div><a href="#learnings">Learnings</a></div> </nav> </d-contents> <h2 id="motivation">Motivation</h2> <p>With all the buzz that ChatGPT is getting recently, it is clear that machine learning models that can interact with humans in a natural manner can quite literally flip the world around. If that is not enough proof, Siri and Google Assistant, their popularity and convenience can give you a bit more of an idea. We can see how speech processing is important as a way for humans and computers to communicate with each other, and reach great levels of interactivity if done right. A lot of the world’s languages do not have written forms, and even those that do, typing can be less expressive and slower than speaking.</p> <p>The core of these assistant systems is automatic speech recognition, often shortened as ASR or alternatively speech2text, which we will be using. This problem sounds rather simple: turn voice into text. However easy it might sound, speech2text is far from solved. There are so many factors that affect speech that makes it extremely difficult. First, how do we know when someone is speaking? Most speech2text models are trained on and perform well when the audio is clean, which means there is not a lot of noise. In the real world, however, one can be using speech2text in a concert or a cocktail party, and figuring out who is currently speaking to the system amid all the noise is a problem in itself! Another important factor that complicates speech2text is that we don’t all talk the same way. Pronunciations vary by person and region, and intonation and expressiveness change the acoustics of our speech. We can see this in full effect when auto-generated YouTube caption looks a bit.. wrong.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/reddit-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/reddit-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/reddit-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/reddit.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">From https://www.reddit.com/r/funny/comments/ha7dva/youtube_auto_captions_spot_on/</figcaption> </figure> <p>Aside from who and how we talk, another big part that makes speech2text hard has to do with the idiosyncrasies of text and languages itself! Some idiosyncrasies of language include orthography, the system of how we write sounds and words, and syntax, the system of how words string together into sentences. If you are familiar with English, you would be familiar with the English syntax: subject, verb, object, and a particular order for adjectives. We would instinctively say “small white car,” but not “white small car” and most definitely not “car white small.” Cross over the English channel to France (or the St. Lawrence River to Quebec), and the order changes. For French, you would say “petite voiture blanche,” which word for word is “small car white.”</p> <p>Travel a bit further and you would see that Chinese uses “白色小车” (”white color small car”), Thai uses “รถสีขาวคันเล็ก” (”car color white * small”) and Kannada uses “ಸಣ್ಣ ಬಿಳಿ ಕಾರು” (”small white car”, same as English). Aside from order of adjectives, larger differences in syntax include having the subject appear first or last in a sentence, position of verbs, and how relative clauses work. All this means that language is quite non-linear, and natural language models that understand language must cope with our silly little arbitrary orders!</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/twitter_meme-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/twitter_meme-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/twitter_meme-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/twitter_meme.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">From https://www.bbc.com/news/blogs-trending-37285796</figcaption> </figure> <p>Thankfully though, for speech2text how sentences work is not as important as how phonetics and orthography works. But even then, things are not quite smooth sailing either. We sometimes take for granted how difficult reading is, perhaps until you start to learn a second language and realize how much we internalize. English is notorious for not spelling words the way it sounds, mostly because writing was standardized a long time ago and pronunciation has shifted since. This makes it difficult for machine learning models to try learn.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/ought-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/ought-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/ought-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/ought.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Sentence from https://en.wikipedia.org/wiki/Ough_(orthography)</figcaption> </figure> <p>Wow, look at all those words with “ough”! There are at least eight different pronunciations of the word, or from another point of perspective, at least eight different audios magically turn out to be spelt the same! In the diagram we tried substituting the red “ough”s to their rhymes in blue, keeping in mind that some dialects pronounce these words differently (especially for “borough”), and in green is the International Phonetic Alphabet representation of the sounds. IPA tries to be the standard of strictly representing sounds as symbols. What’s at play here? English is plagued with silent letters (”knight”), and extraneous letters (all the “ough”s and more).</p> <p>Some languages are more straightforward in their orthography than others. Spanish tends to be fairly phonemic, which pretty much means that their writing and speaking are quite in sync. <d-cite key="orthography"></d-cite> French, however, is very famous for its silent letters. A word like “hors-d’oeuvres”, which means appetizer, can be represented in IPA as [ɔʁ dœvʁ], you may see that around half the letters aren’t pronounced! Kannada, a language in South India that is spoken by one of our group members, is said to be quite phonemic, but doesn’t come without a different kind of headache. A number of languages, predominantly in South Asia and Southeast Asia, use a kind of writing system that combines a consonant character with a vowel character to form a new character that represents the consonant-vowel combination. The new character retains some part of the original consonant and vowel in some cute manner, kind of like the letter <strong>Æ</strong> but dialed up many notches. Most abugida systems descend from the 3rd century BCE Brahmi script.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/brahmi-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/brahmi-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/brahmi-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/brahmi.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Above are some examples of scripts of this type, demonstrating two consonants k and m combining with vowels long a, i and u. Another interesting feature for some of these writing systems is that sometimes the vowels jump to the front, for example in Thai ก (k) + เ (e) = เก (ke). Again, writing is non-linear at times!</p> <h3 id="past-work">Past Work</h3> <p>Past work shows success in training speech2text models in German, Spanish, and French <d-cite key="parp"></d-cite>, <d-cite key="german"></d-cite>. Some use pruning and finetuning of state of the art English models, and others train models from scratch for each language. Other works such as <d-cite key="thaispeech"></d-cite> show that models can learn less common languages, like Thai which is the language our other group member speaks, as well, but they are more complex and specific to each language. <d-cite key="parp"></d-cite> circumvents this by pruning wav2seq (a SOTA speech2text model) and finetuning the model for different languages. While this showed promising results, we wanted to dive deeper to understand, from a linguistic and data driven perspective, the biases that <em>simple</em> speech2text models had.</p> <p>Many state of the art models rely on encoder-decoder models. An encoder is used to create an expressive feature representation of the audio input data and a decoder maps these features to text tokens. Many speech models like <d-cite key="data2vec"></d-cite>, <d-cite key="wav2letter"></d-cite>, <d-cite key="contextNet"></d-cite> also use self-supervised pretraining on the encoder for better performance. One example is the Wav2Seq. Wav2Seq uses unsupervised pretraining to annotate audio samples with unique characters in the form of a psuedo language. The building blocks for these encoders are generally transformer based <d-cite key="wav2seq"></d-cite>.</p> <p>Other methods use deep recurrent neural networks like in <d-cite key="cs_toronto"></d-cite>. RNNs are great at sequential tasks and have an internal memory to capture long term dependencies. Transformer based methods have outperformed RNNs and LSTM based architectures now though.</p> <p>How do these features (idiosyncrasies) differ between languages and does this affect how well speech2text models learn? By doing more ablation studies on specific features, maybe this can inform the way we prune, or choose architecture, and can also help determine the <em>simplest</em> features necessary in a speech2text model that can still perform well on various languages.</p> <p>There has been work that perform ablation studies on BERT to provide insight on what different layers of the model is learning <d-cite key="ganesh2019"></d-cite>. Experiments suggest lower layers learn phrase-level information, middle layers learn syntactic information, and upper layers learn more semantic features. We want to do a similar study, but on dissecting the components of language rather than the components of a particular SOTA model. Our hypothesis is that by doing so, we can be better informed when selecting preprocessing methods and models.</p> <p>Let’s get started with some experiments!</p> <h2 id="generating-a-dataset">Generating a Dataset</h2> <p>We want to explore how each of these language features affects how speech2text models learn. Let’s create a custom dataset where we can implement each of these language rules in isolation. To do that, we’ll build out our own language. Sounds daunting — but there are only a key few building blocks that matter to us. Languages are made of sentences, sentences are made of words, words are made of letters, and letters are either consonants or vowels. Let’s start with that.</p> <p>From <d-cite key="prehistoric_speech"></d-cite>, languages have 22 consonants on average and about 9 vowels on average so that’s what we’ll have in our language too. We represent consonants as positive integers from 1 to 23 and vowels as negative integers from -9 to -1. After all, letters are just symbols!</p> <p>A word, at it’s most crude representation, is just a string of these consonants and vowels at some random length. To make sentences, we just string these words together with spaces, represented by 0, together.</p> <p>Here’s a sample sentence in our language:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[14 -2 -9 13  0  8 16 -8 -2  0 -3 -8 16 12  0 10 20 -3 -7  0 14 18 -9 -4
+  0 16 -3 -5 14  0 -3  9 -8  3  0 -9 -1 22  7  0 12 -5  6 -7  0 -7 22 12
+ -2  0 22 -9  2 -2  0 17 -2 -8  9  0  1 -4 18 -9  0 19 -7 20 -2  0  8 18
+ -4 -2  0 -9  8 -4 15  0 -9 -2 22 18]
+</code></pre></div></div> <p>Ok, that seems a little meaningless. We don’t have to worry about meaning in the general semantic sense though. What we do care about, is pronouncing this language, and creating a mapping from these written sentences to an audio sample. Let’s do that next. Audio samples can be represented as spectrograms. Spectrograms give us a visual representation of audio by plotting the frequencies that make up an audio sample.</p> <p>Here’s an example:</p> <p>When we say <strong>“It’s never too early to play Christmas music”</strong>, this is what it might look like visually:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/christmas_spectrogram-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/christmas_spectrogram.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The key here is that we don’t exactly need audio samples, but rather an embedding that <strong><em>represents</em></strong> an audio sample for a written sentence. Embeddings are just low dimensional mappings that represent high dimensional data.</p> <p>So, in our case, our spectrogram for a generated audio sample looks something like:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/gen_spectrogram-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/gen_spectrogram-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/gen_spectrogram-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/gen_spectrogram.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Even though audio samples might be complicated waveforms, the embedding for the first letter looks something like:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>tensor([[ 3.6887e-01, -9.6675e-01,  3.2892e-01, -1.2369e+00,  1.4908e+00,
+          8.1835e-01, -1.1171e+00, -1.9989e-01,  3.5697e-01, -1.2377e+00,
+          4.6225e-01, -6.7818e-01, -8.2602e-01]])
+</code></pre></div></div> <p>Again, maybe meaningless to us who haven’t really learned this new language. There are some vertical columns of the same color, and these represent the silences between each word. You might notice that these columns aren’t exactly the same color, and that’s because we’ve added a bit of Gaussian noise to the audio embedding samples to simulate noise that might occur when recording audio samples on a microphone.</p> <p>Ok great! We’ve got this perfect language that maps the same sentence to the same audio sample. Now, let’s get to work adding some features that we talked about in the previous section to make this language a bit more complicated.</p> <p>We narrow our feature selection to the following three:</p> <ol> <li><strong>Silent Letters:</strong> letters in the written language that don’t appear in the phonetic pronunciation</li> <li><strong>Letter Combos:</strong> two letters combine in the script but are still pronounced separately</li> <li><strong>Letters out of Order:</strong> phonetic pronunciation is in a different order than written language</li> </ol> <h3 id="silent-letters">Silent Letters</h3> <p>Silent letters mean they appear in our written labels but not in our audio samples. We could just remove letters from our audio embeddings, but that’s a little funky. We don’t usually pause when we come to a silent letter — saying (pause - nite) instead of just (nite) for night. To preserve this, let’s instead add letters to our written label.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/silent_letters-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/silent_letters-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/silent_letters-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/silent_letters.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In the diagram above, we have a small written sample and some audio embeddings represented as colored blocks. We generate some rules similar to those on the left.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/silent_letters.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/silent_letters.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/silent_letters.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/silent_letters.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In this case, we add a 7 after the 3, simulating a silent letter at consonant 7. We then pad the audio sample with a silent (0) to make up for the size increase of the written label. Note that silent letters don’t add pauses during the audio.</p> <h3 id="combining-letters">Combining Letters</h3> <p>When combining letters, our written script changes, but our audio remains the same. We choose to combine every pair where a vowel follows a consonant. This is the most common case of letter combination in languages that have this feature.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/combo_letters.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/combo_letters.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/combo_letters.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/combo_letters.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Here we have to pad the written labels as we combine two letters into one.</p> <h3 id="letters-out-of-order">Letters out of Order</h3> <p>We choose some pairs of consonant and vowels. Swap the pair order for every instance of the pair in the written sample. No padding needs to be added here.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/swap.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/swap.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/swap.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/swap.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="controlled-experiments">Controlled Experiments</h2> <p>Now for the fun part! Let’s see what happens when we test our new language, which each of these rules in isolation, with some models. Regardless of the model we choose, our goal is to learn a written label for a given audio sample.</p> <p>We’re going to test our language with the building blocks of these state of art models — transformers and RNN. The results from these experiments can inform us on the biases that these fundamental models might have in their most “vanilla” state.</p> <p>We hypothesize that transformers will perform better because RNN’s have a limited memory size, while Transformers use attention which means they can learn orderings from anywhere in the audio sample.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/system-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/system-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/system-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/system.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="results">Results</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/results1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/results1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/results1-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/results1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/results2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/results2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/results2-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/results2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/results3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/results3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/results3-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/results3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">RNNs are dashed lines, Transformers are solid lines</figcaption> </figure> <p>Hmm..so Transformers performed better, but not that much better than our RNNs. This could be because our hypothesis that attention is better for long sequences and RNNs have limited memory may not apply. When we generated our language, the consonant and vowel orderings were pretty random. Our rules have some pattern to them, but not as much as a real human language — so maybe attention can exploit these better in real human language, but doesn’t give as much of an advantage in our generated dataset.</p> <p>As for our features, it seems that silent letters perform significantly worse than some of the other rules. This makes sense because, attention and internal memory perhaps, provides some mechanism for dealing with swapping or out of order. Transformers have the ability to “focus” on features of the sample that it is deemed important. Our rules do have some pattern, and the models just have to learn these patterns.</p> <p>With silent letters, though there is a pattern to an audio sample not being present, the rest of the sounds succeeding the silent letters are all shifted over. This is probably why letter combos also doesn’t do too great. With letter combos and silent letters, the one-to-one mapping between a letter and it’s phonetic pronunciation (or audio embedding) is thrown off for the rest of the sequence.</p> <h2 id="corners-cut">Corners Cut</h2> <p>This certainly tells us a lot! But, we should take these results with a grain of salt. There are some discrepancies with human language and the way that we generated our dataset that we should consider.</p> <ul> <li> <p>Actual audio speech recognition systems mostly don’t predict letter by letter, some do subwords and others do word level recognition; but in the grand scheme of things these distinctions may be negligible — after all, they’re all units! This means our controlled experiment, for our purposes, simulates character recognition models which may misspell words (”helouw” instead of “hello”). If the model is at the subword level, misspellings may decrease, since character sequences like “ouw” would not be in the list of possible subwords, or the vocabulary. “ouw” is a very un-English sequence, see if you can find a word that contains these three letters in succession! Misspellings like “hellow” might still happen though, since it is a plausible combination of English-like sequences “hel” and “low”. If the model is at the word level, there will not be misspellings at all.</p> </li> <li> <p>speech2text models generally either do encoder-decoder model, or otherwise typically the input and output do not have to match in dimension. Both options mean that there is no need to pad written or audio samples to make sure they’re the same length. In our case, we have to pad our written/audio to make sure everything is the same size. Connectionist Temporal Classification <d-cite key="ctc"></d-cite> is used to postprocess outputs and compute loss.</p> <ul> <li>The way CTC works is that first it assumes that a letter may take more than one audio frame to say, which tends to be the case, especially for vowel sounds which are typically looooooooooonger than consonant sounds. There is also a special character epsilon that serves as the “character boundary” symbol, but is different from the silent symbol. The output of a CTC model is deduplicated, and epsilons are removed. Here is CTC in action from <d-cite key="ctc"></d-cite>:</li> </ul> </li> </ul> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/ctc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/ctc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/ctc-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/ctc.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <ul> <li> <p>An effect of the letter combination script in our controlled experiment is that there will be some letter combinations that exist as a class (aka in the alphabet) but never seen in the dataset. For example (1, 12) are in the alphabet as consonants, but 112 isn’t a letter.</p> </li> <li> <p>Actual language has tone, intonation, speed and noise that can make it harder to learn. Here is where something like Wave2Seq can help as tokens are clustered, so if someone takes a little longer to say AA, it will still register as the same pseudo token.</p> </li> </ul> <h2 id="real-language">Real Language</h2> <p>Alas, real-world languages are more complicated than our controlled languages. We wanted to see if the patterns we learnt in our controlled experiments would still hold true for actual datasets. For this, we needed to find a relatively phonemic language and another language that differs only by one feature. As mentioned earlier, Spanish qualifies for the former, and French qualifies for the latter. French, to the best of our knowledge, is prevalent with silent letters, but don’t really exhibit other features in our controlled experiments.</p> <p>We’re using the CommonVoice dataset, which is a crowdsourced dataset of people reading sentences in many languages, and might be harder to train because of how unclean the dataset as a whole may be. We preprocess the audio using a standard method, which is the following:</p> <ul> <li>First, calculate the audio spectrogram and condense the result by summing up the amplitudes of a few frequencies that belong in the same “bucket”, to yield Mel-frequency cepstral coefficients (MFCC)</li> <li>To add some temporal context, the differential of the MFCC and its second-degree differential are calculated and concatenated to the MFCC</li> <li>The label vocabulary is constructed, by looking at what letters exist in the dataset, and the written data is converted to numbers</li> </ul> <p>Behold, an example of the preprocessed dataset for Spanish!</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/spanish-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/spanish-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/spanish-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/spanish.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>target tensor: [30, 43,  1, 41, 53, 40, 56, 39,  1, 59, 52, 39,  1, 58, 39, 56, 47, 44,
+        39,  1, 42, 43,  1, 43, 52, 58, 56, 39, 42, 39,  7]
+target sequence: Se cobra una tarifa de entrada.
+</code></pre></div></div> <p>We tried training transformers and RNNs, with and without CTC, on this real-world data. Without CTC, the performances of the models are, respectfully, really bad. After a number of epochs, the only thing learnt is that the space character exists, and the 6% accuracy comes from the model predicting only spaces:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>predicted tensor: [16 39  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
+   1  1  1  1]
+predicted sequence: Ea
+target tensor: [71, 28, 59, 83,  1, 53, 57,  1, 54, 39, 56, 43, 41, 43, 11,  1, 36,  1,
+        56, 43, 57, 54, 53, 52, 42, 47, 43, 52, 42, 53,  1, 43, 50, 50, 53, 57,
+         5,  1, 42, 47, 48, 43, 56, 53, 52,  8,  1, 14, 59, 50, 54, 39, 42, 53,
+         1, 43, 57,  1, 42, 43,  1, 51, 59, 43, 56, 58, 43,  7,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]
+target sequence: ¿Qué os parece? Y respondiendo ellos, dijeron: Culpado es de muerte.
+</code></pre></div></div> <p>Got it. Like our silent letter controlled experiment, a high mismatch between the audio frame and its written frame causes models to not be able to learn well. Let’s put in our mighty CTC Loss and see how it works! It turns out that after some 30 epochs, it still isn’t doing quite so well. Here, let’s see an example of a transformer trained on the Spanish dataset with CTC:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>predicted tensor: [ 0 39  0 57  0 54 39  0 41  0 41  0 43  0 47  0 43  0 57  0 53  0 42  0
+ 58  0 47  0 53  0 41  0 54  0 39  0 43  0 57  0 43  0]
+predicted sequence: aspacceiesodtiocpaese
+target tensor: [71 28 59 83  1 53 57  1 54 39 56 43 41 43 11  1 36  1 56 43 57 54 53 52
+ 42 47 43 52 42 53  1 43 50 53 57  5  1 42 47 48 43 56 53 52  8  1 14 59
+ 50 54 39 42 53  1 43 57  1 42 43  1 51 59 43 56 58 43  7]
+target sequence: ¿Qué os parece? Y respondiendo elos, dijeron: Culpado es de muerte.
+</code></pre></div></div> <p>Perhaps the transformer is too big for this and learns pretty slowly. It is starting to pick up on some sounds, for example for “¿Qué os parece?” it seems to have picked up “as pacce” and “respondiendo” has some similarities to “esodtio,” but we really needed to squint to see that similarity. If we let it run for longer, perhaps it would get better… slowly.</p> <p>RNNs, however, came up on top. We’re using bidirectional LSTM RNN for this, and it seems that CTC works! Here’s the RNN trained on the Spanish dataset with CTC:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>predicted tensor: [30  0 59  0 52  0 53  0 51  0 40  0 56 43  0 57  0 43  0  1 42  0 43  0
+ 42  0 47  0 42  0 39  0 42  0 43  0  1  0 89  0 51  0 40  0 43  0 58  0
+ 39  0 59  0 52  0 53  0  1 42  0 43  0  1 50  0 39  0  1  0 57  0 54  0
+ 39  0 88  0 53  0 52  0 39  0  7]
+predicted sequence: Sunombrese dedidade ómbetauno de la spañona.
+target tensor: [30 59  1 52 53 51 40 56 43  1 57 43  1 42 43 56 47 60 39  1 42 43 50  1
+ 52 53 51 40 56 43  1 58 39 86 52 53  1 42 43  1 23 39  1 16 57 54 39 88
+ 53 50 39  7]
+target sequence: Su nombre se deriva del nombre taíno de La Española.
+</code></pre></div></div> <p>Looks great! Of course there are some word boundary mistakes, but overall it looks pretty similar. What about French? Here are transformer and RNN results for what we hypothesized is a language full of silent letter features:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>predicted tensor (Transformer): [21  0]
+predicted sequence (Transformer): L
+predicted tensor (RNN): [18  0 47  0  1  0 56  0 56 40  0 54  0 44  0  1 55  0 40  0 55  1 53  0
+ 40  0 36  0 48 40  0 49 55  0 44  0 55  0 53 40  0 36  0 49  0  1 49 50
+  0 53  0  1  0  1 44  0 47  0  1 40  0  1 51  0 50  0 55  0 36  0 49  0
+  1 54  0 40  0 47 40  0 48 40  0 49 55  0  1 71  0  1 57  0 36  0 54  0
+ 44  0 54  6]
+predicted sequence (RNN): Il uuesi tet reamentitrean nor  il e potan selement à vasis.
+
+target tensor: [18 47  1 36  1 36 56 54 44  1 75 55 75  1 53 75 38 40 48 40 49 55  1 44
+ 49 55 53 50 39 56 44 55  1 40 49  1 14 56 53 50 51 40  1 50 82  1 44 47
+  1 40 54 55  1 51 50 55 40 49 55 44 40 47 40 48 40 49 55  1 44 49 57 36
+ 54 44 41  6]
+target sequence: Il a ausi été récement introduit en Europe où il est potentielement invasif.
+</code></pre></div></div> <p>Wow! The transformer got stuck in the blank hole black hole, but the RNN looks not too shabby. Some word boundary issues for sure, but we can see similarities. “potan selement” and “potentielement” actually do sound similar, as do “à vasis” and “invasif.” Definitely not as good as Spanish though. Here’s a comparison of losses for the four models:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/real_results-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/real_results-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-Language-Bias/real_results-1400.webp"/> <img src="/staging/assets/img/2023-11-07-Language-Bias/real_results.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>One thing that’s very much worth noticing is that the validation losses plateaued or rose during training. Did we overfit our data, or are these languages too hard that they can’t be fully learnt from our data, and the high loss is due to the idiosyncrasies of language? Probably both!</p> <p>Now did these real-world explorations match our hypotheses from controlled experiments or not? Our hypothesis from controlled experiments says that French would do worse than Spanish, which is what we’re seeing. Additionally, we see a pretty significant gap in loss between transformers and RNN models, given that CTC loss is used.</p> <p>Here comes the confusing part. Most literature <d-cite key="transf_thesis"></d-cite><d-cite key="rnn_study"></d-cite> would say that transformers should perform better than RNN, even with CTC. This matches with our controlled experiments but did not match our real-world experiments. What went wrong? For one, we think that our models might still be too small and not representative of actual real-world models. We also trained the models for quite a short amount of time with a small amount of data that might be noisy. Perhaps our recipe was just the perfect storm to cause our transformer model to be stuck in the blank hole. We found an article that documents the tendency for MLPs to get stuck in a stage of predicting blanks before moving on to predicting real characters, which sounds like what’s going on for us. <d-cite key="blank_ctc"></d-cite> Some other sources point to the assertion that input spectrogram lengths must be longer than label lengths, and suggest refraining from padding labels with blanks. We followed their suggestions but unfortunately could not bring the transformer models out of the blank hole.</p> <h2 id="learnings">Learnings</h2> <p>What have we looked at?</p> <ul> <li>Linguistics: we learnt how weird languages can be!</li> <li>Models: we touched upon how speech2text models usually work</li> <li>Hindrances: we hypothesized and tested a few features that affected model performance <ul> <li>Silent letters are our biggest enemies, followed by letter combinations and out-of-order letters</li> </ul> </li> <li>Battle: we compared how two different foundational models for speech2text against each other <ul> <li>In our controlled experiments, it’s a pretty close call but transformer came up on top by just a slight margin</li> </ul> </li> <li>Real: we presented what a real-world dataset looks like, the data preprocessing methods, and checked if our learnings from controlled experiments hold <ul> <li>Creating a spectrogram and a character vocabulary is the standard!</li> <li>French (silent letter-ish) vs. Spanish (perfect-ish) matches our hypothesis!</li> <li>CTC is the cherry on top for success but only works well with RNN, putting RNN on top by a long shot this time!</li> </ul> </li> </ul> <p>We would like to expand our linguistics experiments further as future work, as there are many more features and combinations not explored here (for example, Arabic writing usually drops all vowels — we imagine that this feature would affect performance a lot!) Another avenue of further work is to try train on other real-world languages to see whether our hypotheses still hold true.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-07-Language-Bias.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/LinearMode/index.html b/blog/2023/LinearMode/index.html
new file mode 100644
index 00000000..6586cfbb
--- /dev/null
+++ b/blog/2023/LinearMode/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Understanding Linear Mode Connectivity | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="We study the pruning behavior of vision transformers (ViTs), and possible relations to linear mode connectivity. Frankle et al. (2022) showed that linear mode connectivity, the tendency of a neural network to optimize to the same linearly connected minimum when trained SGD noise, is strongly tied to the existence of "lottery networks," sparse networks that can be trained to full accuracy. We found that when initialized from a pretrained network, the ViT model showed linear mode connectivity when fine tuning on CIFAR-10. Conversely, random initialization resulted in instability during training and a lack of linear mode connectivity. We also found that using the PLATON algorithm (Zhang et al.) to generate a mask was effective for pruning the network, suggesting the existence of lottery ticket networks in ViTs, but the connection between the existence of these trainable subnetworks and linear mode connectivity remains unclear."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/LinearMode/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Understanding Linear Mode Connectivity",
+      "description": "We study the pruning behavior of vision transformers (ViTs), and possible relations to linear mode connectivity. Frankle et al. (2022) showed that linear mode connectivity, the tendency of a neural network to optimize to the same linearly connected minimum when trained SGD noise, is strongly tied to the existence of "lottery networks," sparse networks that can be trained to full accuracy. We found that when initialized from a pretrained network, the ViT model showed linear mode connectivity when fine tuning on CIFAR-10. Conversely, random initialization resulted in instability during training and a lack of linear mode connectivity. We also found that using the PLATON algorithm (Zhang et al.) to generate a mask was effective for pruning the network, suggesting the existence of lottery ticket networks in ViTs, but the connection between the existence of these trainable subnetworks and linear mode connectivity remains unclear.",
+      "published": "December 1, 2023",
+      "authors": [
+        {
+          "author": "Michael Han",
+          "authorURL": "mjhan@mit.edu",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Alec Zhu",
+          "authorURL": "alecczhu@mit.edu",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Understanding Linear Mode Connectivity</h1> <p>We study the pruning behavior of vision transformers (ViTs), and possible relations to linear mode connectivity. Frankle et al. (2022) showed that linear mode connectivity, the tendency of a neural network to optimize to the same linearly connected minimum when trained SGD noise, is strongly tied to the existence of "lottery networks," sparse networks that can be trained to full accuracy. We found that when initialized from a pretrained network, the ViT model showed linear mode connectivity when fine tuning on CIFAR-10. Conversely, random initialization resulted in instability during training and a lack of linear mode connectivity. We also found that using the PLATON algorithm (Zhang et al.) to generate a mask was effective for pruning the network, suggesting the existence of lottery ticket networks in ViTs, but the connection between the existence of these trainable subnetworks and linear mode connectivity remains unclear.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#instability-analysis-and-linear-mode-connectivity">Instability Analysis and Linear Mode Connectivity</a></div> <div><a href="#transformers-and-related-work">Transformers and Related Work</a></div> <div><a href="#experiments-with-linear-mode-connectivity">Experiments with Linear Mode Connectivity</a></div> <div><a href="#pruning">Pruning</a></div> <div><a href="#analysis-and-conclusions">Analysis and Conclusions</a></div> <div><a href="#references">References</a></div> </nav> </d-contents> <h2 id="instability-analysis-and-linear-mode-connectivity">Instability Analysis and Linear Mode Connectivity</h2> <p>The advent of transformer models stands as a pivotal advancement within the domain of machine learning, fundamentally reshaping the landscape of artificial intelligence. First introduced in 2017 through the seminal work “Attention is All You Need” by Vaswani et al., transformers have since exploded in both uses and applications, such as language and vision tasks. In fact, ChatGPT, which was the fastest-growing application in history (until Threads in 2023), is built using a transformer architecture. Although transformers can achieve state-of-the-art performance in many tasks, they are often limited by their size, which can create issues for memory and energy both during training and deployment. For example, GPT-3 has 175 billion parameters, and GPT-4, which was released earlier in 2023, has 1.76 trillion parameters! Compression techniques such as knowledge distillation and pruning can be used to deal with these issues, reducing the size of the network while retaining most of its capabilities. Several methods already exist for shrinking transformers such as weight pruning (Zhang et al. 2022), as well as post-training compression (Kwon et al. 2022). However, there is little research on the conditions under which a transformer can be effectively compressed or at what point during training a transformer compression should begin.</p> <p>Frankle et al. (2020) suggest that <em>instability analysis</em>—analyzing the stability of training with respect to stochastic gradient descent (SGD) noise—could be a way of identifying conditions under which pruning can be useful. To determine whether the outcome of training is stable w.r.t SGD noise, we create two copies of a network with the same initialization, and optimize those networks using different samples of SGD noise. We can then evaluate how similar or dissimilar the resulting networks are. For this purpose, Frankle et al. propose <em>linear interpolation instability</em>, defined to be the maximum increase in error along the linear path in parameter space connecting the two resulting networks. When error is nonincreasing along this path, the networks are said to have <em>linear mode connectivity</em>. In their paper, they propose that this instability analysis is related to lottery ticket networks, which are subnetworks from randomly-initialized dense neural networks that can achieve comparable test accuracy to the original network after training. They found that pruned networks that were capable of achieving near full test accuracy were stable to SGD noise, and showed linear mode connectivity.</p> <p>Frankle et al. study linear mode connectivity in neural networks, which is a stricter version of mode connectivity. They train two networks with the same initialization on SGD noise (randomly augmented datasets) and calculate the maximum loss along the linear path between the two resulting network to quantitatively analyze the instability of the original network to noise.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-LinearMode/LMC.PNG-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-LinearMode/LMC.PNG-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-LinearMode/LMC.PNG-1400.webp"/> <img src="/staging/assets/img/2023-12-12-LinearMode/LMC.PNG" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> $W_i$ represents the weights at initialization, and the red lines represent two different paths through parameter space corresponding to different SGD noise. The figure on the left shows linear mode connectivity, but the figure on the right shows increasing loss along the blue interpolated path between local minima, which shows instability to SGD noise during training. </div> <p>In our project, we plan to expand on the research from Frankle et al. and apply it to transformers. In doing so, we hope to study the conditions under which transformers can be effectively compressed as well as the optimization landscape of training transformers. We seek to evaluate linear mode connectivity in transformer architectures and whether it is an effective indicator for how effectively a transformer can be compressed.</p> <h2 id="transformers-and-related-work">Transformers and Related Work</h2> <p>We restricted our analysis of transformer architectures to the Vision Transformer (ViT) model proposed by Dosovitskiy (2021). ViT works by splitting an image into patches, then computing embeddings of those patches via linear transformation. After adding positional embeddings, the resulting embeddings are fed into a standard Transformer encoder. Due to runtime issues, we were unable to fully train transformers from scratch. We ended up working with and fine-tuning pretrained transformers, which were imported from the HuggingFace transformers package.</p> <p>Shen et al. (2023) investigated a more general form of the lottery ticket hypothesis with ViTs, proposing ways to select a subset of the input image patches on which the ViT can be trained to similar accuracy as with the full data. However, they write “the conventional winning ticket [i.e. subnetwork] is hard to find at the weight level of ViTs by existing methods.”</p> <p>Chen et al. (2020) investigated the lottery ticket hypothesis for pre-trained BERT networks, and did indeed find subnetworks at varying levels of sparsity capable of matching the full accuracy. Our work hoped to find similar results for vision transformers.</p> <p>Linear mode connectivity is also deeply connected to the nature of the optimization landscape. This has important applications with regards to federated learning, and combining the results of independent models. For example, Adilova et al. (2023) showed that many deep networks have <em>layer-wise</em> linearly connected minima in the optimization landscape, which they explain as being the result of the layer-wise optimization landscape being convex, even if the whole optimization landscape is not. They found similar behavior in vision networks trained on CIFAR-10.</p> <p>In our project, we seek to evaluate the connection between linear mode connectivity and the existence of winning subnetworks. We expand on the work from Shen et al. and Chen et al. by incorporating the linear mode connectivity analysis proposed by Frankle et al. as well as search for conventional winning subnetworks in transformers for vision tasks. Our goal is to find conditions and methods for which transformers can be compressed while retaining high performance.</p> <h2 id="experiments-with-linear-mode-connectivity">Experiments with Linear Mode Connectivity</h2> <p>We decided to work with the pretrained ViT model from HuggingFace transformers, and to fine tune this model on CIFAR-10. We also augmented the data set of 32x32 images with a random 24x24 crop followed by resizing, followed by a random horizontal flip and color jitter (randomly changing brightness, contrast, saturation and hue). To evaluate linear mode connectivity, we train a pair of models with the same initialization on different randomly shuffled and augmented datasets.</p> <p>In order to assess the instability of the original network to the dataset augmentations, we use the procedure described by Frankle et al. and evaluate the test loss and accuracy of the linearly interpolated models. The weights of the interpolated models are directly calculated from the weights of the trained models using evenly spaced values of \(\alpha\). The test dataset did not receive the augmentations that the training dataset did.</p> <p>All models trained for the linear interpolation instability analysis were trained using the AdamW optimizer for 8 epochs with a learning rate of 2e-4. We use the default ViTImageProcessor imported from HuggingFace to convert the images into input tensors.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-LinearMode/interpolation_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-LinearMode/interpolation_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-LinearMode/interpolation_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-LinearMode/interpolation_loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The above plot shows the result of linear interpolation after fine tuning two copies of the pretrained model. The evaluation loss is non-increasing, and in fact decreases, possibly as an artifact of the fact that the test set did not recieve augmentations. Otherwise, it seems that there is linear mode connectivity, at least in the local optimization landscape when starting from a pretrained model.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-LinearMode/instability-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-LinearMode/instability-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-LinearMode/instability-1400.webp"/> <img src="/staging/assets/img/2023-12-12-LinearMode/instability.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>However, we failed to observe linear mode connectivity in randomly initialized transformers, noting an increase in test loss as well as a decrease in test accuracy around \(\alpha = 0.5\). The maximum observed test loss of the interpolated models is more than double the mean of the test losses of the original pair of trained models, which is much more than the threshold of a \(2\%\) increase used by the original authors.</p> <p>The resulting networks seem to end up in disconnected local optima, implying that these networks are not invariant to the dataset augmentations. This is consistent with the analysis done by Frankle et al., who find that the stability of networks increases over the course of training.</p> <p>Our results combined with the original analysis by Frankle et al. seems to suggest that linear mode connectivity emerges at some point during training, but we have yet to observe the point at which it emerges due to computation restraints and the size of the ImageNet dataset used to pretrain the ViT models.</p> <h2 id="pruning">Pruning</h2> <p>We used the PLATON compression algorithm (Zhang et al. 2022) during training to prune networks to different levels of sparsity. PLATON uses several “scores” to prune parameters. One score is parameter magnitude; smaller magnitude parameters tend to be pruned. However, in a complex network, small magnitude weights can still have a large impact; to measure this, PLATON uses the gradient-weight product \(\theta^T \nabla \mathcal{L}(\theta)\) as a first order Taylor approximation of the impact of the removal of a weight on the loss. PLATON also maintains uncertainties for all the weights, preferring not to prune weights with uncertain scores.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-LinearMode/sparsity20percent-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-LinearMode/sparsity20percent-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-LinearMode/sparsity20percent-1400.webp"/> <img src="/staging/assets/img/2023-12-12-LinearMode/sparsity20percent.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-LinearMode/sparsity5percent-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-LinearMode/sparsity5percent-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-LinearMode/sparsity5percent-1400.webp"/> <img src="/staging/assets/img/2023-12-12-LinearMode/sparsity5percent.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Pruning and retraining the pretrained model to 20% of its original size over 4 epochs results in a test accuracy of 95.3%, compared to 98% accuracy of the full model, and pruning to 5% resulted in 93.7% test accuracy. So although the compressed models cannot reach the accuracy of the original model, they are able to still maintain a relatively high test accuracy, and the PLATON algorithm does a good job of selecting weights. We also used the pruned weights at 20% sparsity to generate a mask, and applied this mask to the original model.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask-1400.webp"/> <img src="/staging/assets/img/2023-12-12-LinearMode/pruned_weights_as_mask.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Blue and orange traces represent training loss and an exponential moving average resp. </div> <p>When training the original model, but applying a mask (effectively setting the corresponding weights and gradients to zero), we were able to train the model to 93.6% test accuracy. This supports the lottery ticket hypothesis, since the PLATON algorithm can be used to identify a relatively small subset of weights from the pretrained network that can be trained high accuracy in isolation.</p> <h2 id="analysis-and-conclusions">Analysis and Conclusions</h2> <p>Our results with linear mode connectivity suggest that at some point during the training process, optimization ends up in a linearly connected local minimum, and further optimization will be stable to SGD noise. This is because we were indeed able to observe linear mode connectivity when fine tuning a pretrained mode. Additionally, with random initialization, we found the absence of linear mode connectivity. Unfortunately, we were not able to determine exactly where in the training process linear mode connectivity emerges.</p> <p>It is notable that over the course of training, the loss does not seem to go down steadily, rather rapidly oscillating between high and low loss. The exponential moving average smooths it out, but it is still quite chaotic. During pruning, it seems plausible that the oscillations could correspond to weights being pruned, but the model approaches the target ratio of nonzero weights by the end of the third epoch of training, leaving the behavior in the final epoch unexplained. Furthermore, the training loss displays similar behavior while training the masked models. Further work could be done to investigate this phenomena and potentially make pruning/training more stable.</p> <p>Our results with pruning show that a standard compression algorithm, PLATON, is able to sucessfully prune the pretrained ViT model to high levels of sparsity while maintaining relatively high accuracy. Our results with masking weights also suggest the existence of lottery ticket networks in the pretrained model, since we were able to train the corresponding subnetwork to a high level of accuracy. Unfortunately, the connection between linear mode connectivity and lottery ticket transforms remains very ambiguous, since we were unable to perform pruning experiments on models that did not demonstrate linear mode connectivity.</p> <p>Further work could be done to investigate linear mode connectivity from different levels of pretraining as initialization, which would shed light on when the optimization of transformers settles into a connected minimum (or when it doesn’t). Further work on when linear mode connectivity arises, as well as experiments pruning the corresponding networks, would help determine if there is a connection between connectivity and the presence of lottery transformers. This would also be important for determining whether linear mode connectivity is a good indicator that transformers can be compressed more definitively. Additionally, as mentioned earlier, the existence of lottery networks in language models has already been investigated, and it would be interesting to see if this is related to linear mode connectivity as well.</p> <h2 id="references">References</h2> <p>Adilova, L., Andriushchenko, M., Kamp, M., Fischer, A., &amp; Jaggi, M. (2023). <i>Layer-wise Linear Mode Connectivity</i>.</p> <p>Frankle, J., Dziugaite, G. K., Roy, D. M., &amp; Carbin, M. (2020). <i>Linear Mode Connectivity and the Lottery Ticket Hypothesis</i>.</p> <p>Zhang, Q., Zuo, S., Liang, C., Bukharin, A., He, P., Chen, W., &amp; Zhao, T. (2022). PLATON: Pruning Large Transformer Models with Upper Confidence Bound of Weight Importance. In K. Chaudhuri, S. Jegelka, L. Song, C. Szepesvari, G. Niu, &amp; S. Sabato (Eds.), <i>Proceedings of the 39th International Conference on Machine Learning</i> (Vol. 162, pp. 26809–26823). PMLR. https://proceedings.mlr.press/v162/zhang22ao.html</p> <p>Kwon, W., Kim, S., Mahoney, M. W., Hassoun, J., Keutzer, K., &amp; Gholami, A. (2022). A fast post-training pruning framework for transformers. <i>Advances in Neural Information Processing Systems</i>, <i>35</i>, 24101–24116.</p> <p>Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., &amp; Houlsby, N. (2021). <i>An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale</i>.</p> <p>Shen, X., Kong, Z., Qin, M., Dong, P., Yuan, G., Meng, X., Tang, H., Ma, X., &amp; Wang, Y. (2023). <i>Data Level Lottery Ticket Hypothesis for Vision Transformers</i>.</p> <p>Chen, T., Frankle, J., Chang, S., Liu, S., Zhang, Y., Wang, Z., &amp; Carbin, M. (2020). <i>The Lottery Ticket Hypothesis for Pre-trained BERT Networks</i>.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/PersonalizedGeneration_w_LLMAgents/index.html b/blog/2023/PersonalizedGeneration_w_LLMAgents/index.html
new file mode 100644
index 00000000..dd561408
--- /dev/null
+++ b/blog/2023/PersonalizedGeneration_w_LLMAgents/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>New Synthesis Approach for Personalized LLMS | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/PersonalizedGeneration_w_LLMAgents/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "New Synthesis Approach for Personalized LLMS",
+      "description": "",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Daniel Papacica",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Ben Ebanks",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>New Synthesis Approach for Personalized LLMS</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#literature-review">Literature Review</a></div> <div><a href="#description-of-methods-experiments">Description of methods & experiments</a></div> <ul> <li><a href="#the-baseline-implementation">The Baseline Implementation</a></li> <li><a href="#overview-of-modification-experiments">Overview of Modification & Experiments</a></li> <li><a href="#experiment-word2vec-vs-glove">Experiment / Word2Vec vs GloVe</a></li> </ul><div><a href="#analysis-evaluation-of-results">Analysis / Evaluation of Results</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion-discussion-of-limitations">Conclusion / Discussion of Limitations</a></div> <ul> <li><a href="#limitations">Limitations</a></li> <li><a href="#next-steps">Next Steps</a></li> </ul><div><a href="#bibliography">Bibliography</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>Deep learning has revolutionized the way in which humans interact with the world around them. Its growing ability to ingest vast amounts of data, automate feature extraction, and learn complex patterns and nuances among inputs have contributed to breakthroughs in healthcare, natural language processing, computer vision, and more. A particularly exciting avenue of this innovation has been in the burgeoning field of personalized text generation, which aims to produce text that resembles the style, tone, and word choice taken on by a particular user. Significant advancement in this field has the potential to create more effective forms of communication for individuals with disabilities, personalize educational content, and enhance user interactions with chatbots and virtual assistants, all contributing to a better overall user experience.</p> <p>In an effort to make the availability of personalized text generation more wide-scale, researchers have conducted several studies in the field, centering their approach to the generation of domain-specific personalized text (utilizing domain-specific features/knowledge). Notable studies conducted include <a href="https://arxiv.org/pdf/1910.03506.pdf">Towards Controllable and Personalized Review Generation</a>, which utilizes a product description and self-attentive recursive autoencoders to generate a personalized review <a href="#1">[1]</a>, <a href="https://arxiv.org/pdf/2010.01480.pdf">Knowledge-Enhanced Personalized Review Generation with Capsule Graph Neural Network</a>, which constructs a model based on a CapsGNN, and <a href="https://iopscience.iop.org/article/10.1088/1742-6596/2294/1/012015/pdf">Research on user granularity-level personalized social text generation technology</a>, which utilizes an encoder and decoder for text generation <a href="#2">[2]</a>. A lesser explored part of the field and an area that we have chosen to explore for our final project is embedding in the ability to generate personalized text across domains without domain-specific features <a href="#3">[3]</a>. Our project draws inspiration from <a href="https://arxiv.org/pdf/2308.07968.pdf">“Teach LLMs to Personalize – An Approach inspired by Writing Education”</a>, which includes a promising multi-step framework that retrieves, summarizes, ranks, and synthesizes a user’s past documents to generate a personalized version of the document at hand <a href="#4">[4]</a>.</p> <p>A critical aspect of the workflow discussed in the LLM personalization paper and an area that we believe can be improved upon using some of the methods discussed in 6.S898 this semester is the way in which the model synthesizes past documents. Throughout the paper, we will be exploring two creative approaches to synthesis that utilize vector word embeddings to pull relevant words from past documents in an effort to improve the models ability to personalize text.</p> <h1 id="literature-review">Literature Review</h1> <p>An integral part of our exploration project was experimenting with using less data and smaller models to see how performance degrades with respect to the approach discussed in the personalization for LLMs paper (no open source code attached as the project is currently being worked on by researchers at Google). Experimentation required taking an extensive look at the steps involved in the original implementation, gaining an in-depth understanding of the deep learning principles discussed, and optimizing training and compute under machine constraints to process vast amounts of real-world data.</p> <p>The problem formulation for the approach to personalized text generation discussed in the paper can be stated as the following: Given the immediate context of a current document (first k characters) written by a user and access to their past documents, can we develop a model that generates text that is similar to the text of the current document (similarity evaluated by calculating Rouge-1, Rouge-2, Rouge-L, and Bleu scores) . As mentioned earlier, the framework for answering this problem formulation involves first obtaining outputs for retrieval, ranking, summarization, and synthesis, and then feeding these distinct parts into an LLM to produce a personalized body of text (we ignore the auxiliary task of training the LLM to distinguish the owners of written documents for the purposes of this project).</p> <p>The retrieval discussed in the paper uses two methods of outputting relevant documents: sparse retrieval, which compares past documents to the current context using the popular BM25 ranking algorithm, and dense retrieval, which uses a transformer-based text-to-text model to map and compare documents in a 768 dimensional vector space. The ranking step then takes this input, orders documents based on their BM25 scores or cosine similarity when compared with the immediate context, and truncates the input to 2500 characters to only take the top documents. The summarization step then summarizes the top ranked past documents in two ways: context independent summarization, which finetunes an LLM on publicly available data and applies this model to the top ranked entries, and context dependent summarization, which uses weak labels (generated from immediate context) to generate a summary in line with the contents of the current document. A visualization of the approach to the structure can be seen below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An overview of the infrastructure used to process documents and fine tune the personalized generative model. </div> <p>The paper currently explores two methodologies for synthesis: (1) context dependent synthesis, which simply uses the top 20 frequently used keywords from a user’s past documents and (2) context dependent synthesis, which uses weak labels to find text from past documents similar to the immediate context of the document to be personalized. After carefully analyzing the two methodologies, we found that by focusing on keywords the synthesized text is missing an in-depth understanding of sentence structure and semantics that are crucial to personalization.</p> <p>To enhance this step of the text generation process, we have explored several new methods of synthesis and have landed on two approaches with one utilizing the Word2Vec model and the other using GloVe. We have chosen these methods because they both use unique embedding space attributes to form important relationships between texts. Both networks use the method of creating a centroid of the current document that exists in vector space and output words from top ranked past documents that exist close to this centroid. By doing this, we are essentially selecting words (after filtering out synonyms and stopwords) that are in line with the theme of the current document, which will provide the LLM with more thematically relevant synthesized entries that should in theory generate a more personalized output.</p> <p>As an additional research consideration, we explored the effect of passing in the output from both the context independent synthesis discussed in the paper and our auxiliary method of using Word2Vec or GloVe compared to passing in just one of the methods of synthesis. The motivation for doing so came from our initial hypothesis that the combination of both methods of synthesis would enable the LLM to learn complex interactions between important words (results from context independent synthesis) and thematic words (GloVe/Word2Vec) that could lead to better personalization of the final output. A more detailed explanation of the implementations of our proposed approaches will be shown in the following section.</p> <h1 id="description-of-methods--experiments">Description of methods &amp; experiments</h1> <h2 id="the-baseline-implementation">The Baseline Implementation</h2> <p>Our methodological approach began by re-implementing the baseline model from the “Teach LLMs to Personalize” paper. We utilized two datasets mentioned in the research paper: CNN_DailyMail (<a href="https://huggingface.co/datasets/cnn_dailymail">CNN_DailyMail</a>)and Amazon Review Data for Books (<a href="https://cseweb.ucsd.edu/~jmcauley/datasets/amazon_v2/">Amazon_review_data</a>). To enhance efficiency of compute time, we streamlined the data by reducing its size, ensuring a quicker fine-tuning process while retaining data integrity. We also utilized the <a href="https://huggingface.co/t5-base">T5-base model</a>, a smaller model than the T5-11b model mentioned in the paper, for summarization and the personalized generation model. Furthermore, we opted to use the context-independent methods for both summarization and synthesis because the research paper results indicated that their effectiveness is closely comparable to the context-dependent methods. For fine-tuning the summarization model, we utilized a 10 percent subset of the CNN daily mail dataset (311k datapoint original size) with the AdamW optimizer (seeing AdamW is a comparable optimizer to Adafactor, which is what was used in the “Teach LLMs to Personalize” paper), ensuring a balance between efficiency of tuning and comprehensive learning. This set the foundation for our exploration of advanced text synthesis techniques by giving us a base fine tuning and data processing infrastructure. On top of this, the changes we made to the amount of data used along with utilizing a smaller T5 model allowed us to analyze whether the final evaluation results degraded significantly when making the infrastructure of fine tuning the personalized generation model more compact.</p> <h2 id="overview-of-modification--experiments">Overview of Modification &amp; Experiments</h2> <p>In our new approach for synthesis, we utilized Word2Vec and GloVe which hinges on the concept of embedding space. In this space, words are represented as vectors, capturing their semantic relationships based on their context in large text corpora. By embedding the current document and past documents (from the same user) in this space, each word is assigned a position that reflects its semantic meaning.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of how using TSNE dimension reduction can illustrate how words are placed in embedding space. Note that dimension reduction does not always come out cleanly since word embeddings are complex and can't be easily represented in 2D space. </div> <p>The ‘centroid’ of the current document in this space is a calculated mean vector, representing the overall semantic direction of the document. Words closest to this centroid are likely to be central to the document’s theme or style. When we look for words from past documents that are closest to this centroid, we are essentially searching for words that align closely with the thematic and stylistic essence of the current document.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of how our centroid for the current document corresponds to other words from past documents (note we used PCA dimensionality here). We also chose to display words that had very close euclidean distances to the centroid. Note our centroid from the current document highlighted the following as significant words: ['like', 'since', 'first', 'mystery', 'book'] </div> <p>This method makes sense technically because it leverages the nuanced understanding of language captured in word embeddings. By focusing on words that are semantically close to the central theme of the current document, the model can more effectively identify and use terms that are likely to be relevant and stylistically consistent for personalization, thereby potentially enhancing the generated text of the personalized generation model.</p> <h2 id="experiment--word2vec-vs-glove">Experiment / Word2Vec vs GloVe</h2> <p>Word2Vec and GloVe are both models for word embeddings, but they differ in their approach to creating these embeddings. Word2Vec, developed by Google, primarily uses local context information of words (words surrounding a given word) to generate embeddings. This results in embeddings that capture more of the syntactic and semantic relationships based on specific local contexts.</p> <p>GloVe (Global Vectors for Word Representation), on the other hand, is designed by Stanford and incorporates global matrix factorization and local context window methods. It emphasizes capturing global statistics of the corpus by considering overall word co-occurrence frequencies, essentially acting as an unsupervised learning algorithm that generates word embeddings.</p> <p>When used for synthesis in text personalization, these differences influence the nature of the embeddings. Word2Vec might be more sensitive to the specific contextual use of words in the current and past documents, potentially offering more precise thematic matches based on immediate context. GloVe, with its global perspective, might bring in a broader understanding of word use, capturing more general usage patterns and thematic relationships that extend beyond the immediate context. This could lead to a slightly different set of words being selected for personalization in the synthesis process.</p> <p>In our experiment, we adapted the structure from the “Teach LLMs” paper, incorporating our novel synthesis methods using Word2Vec and GloVe. The process involved independently fine-tuning the personalized generation model for each synthesis approach. This fine-tuning was crucial to observe how the different embedding techniques influenced the model’s performance. After implementing the new synthesis methods, we conducted a thorough evaluation to compare their effectiveness, along with the combination of the original and new synthesis approaches, with the base model. The key focus was on analyzing how the different word embeddings (and combinations of embeddings) impacted the quality and personalization of the generated text, with performance metrics providing insights into the strengths and limitations of each method.</p> <h1 id="analysis--evaluation-of-results">Analysis / Evaluation of Results</h1> <p>The evaluation metrics used in the “Teach LLMs” paper (and also what we utilized), BLEU (Bilingual Evaluation Understudy), ROUGE-1, ROUGE-2, and ROUGE-L, are standard metrics used to evaluate the quality of text which has been machine-translated or generated by machine learning models.</p> <p>BLEU Score: The BLEU score evaluates the quality of machine-translated text by comparing it with one or more reference translations. It does so at various levels, from individual words to consecutive sequences of words (n-grams), to assess precision. A higher BLEU score indicates more similarity to the reference text, often implying better translation quality. However, BLEU has limitations as it does not account for the fluency or grammaticality of the generated text.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Calculations behind the BLEU score calculations. </div> <p>ROUGE Scores: ROUGE (Recall-Oriented Understudy for Gisting Evaluation) is another set of metrics for evaluating automatic summarization and machine translation. ROUGE-1 and ROUGE-2 refer to the overlap of unigrams (single words) and bigrams (two consecutive words) between the machine-generated text and a set of reference texts, respectively. ROUGE-L considers the longest common subsequence, focusing on the longest coherently matching sequence of words. ROUGE scores can consider both precision (like BLEU) and recall, providing a more rounded evaluation.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Calculations behind the ROUGE-N (N-gram) score calculations; in our case N = 1, 2, or longest common subsequence. </div> <p>We can also take a look into how our models performed during the fine tuning period. Based on the progression of the training and validation loss, you can infer how well the model is learning and whether it’s overfitting (learning the training data too closely and not generalizing well) or underfitting (not learning the training data well enough).</p> <p>Comparing the performance of our models using two different synthesis approaches–our base model versus the new synthesis approach using the GloVe or Word2Vec model, and the combination of the base model and new synthesis–could result in different behaviors most likely for one particular reason:</p> <p>Quality of Embeddings: The GloVe and Word2Vec models provide a different representation for words, capturing semantic relationships in a more nuanced way than just looking at IDF scores, which could lead to varied results during fine tuning. Also, combining our original synthesis with our new synthesis can give the model more information to finetune on allowing for a more intricate understanding of the text when generating.</p> <p>The differences in BLEU and ROUGE scores between the two models can arise from how each model handles the linguistic nuances of the generated text. If the new approach with the GloVe model is better at capturing the context and meaning of the sentences, it might score higher in BLEU and ROUGE, despite potentially higher loss values.</p> <p>The variations in BLEU and ROUGE scores could also indicate how each model deals with the trade-off between precision and recall—whether it’s better at producing text that contains most of the expected content (high recall) or at avoiding extraneous content not in the reference (high precision).</p> <p>Evaluating these metrics in combination with each other, rather than in isolation, provides a more comprehensive picture of a model’s performance and areas for potential improvement.</p> <p>The following results portray the overarching BLEU, ROUGE-1, ROUGE-2, and ROUGE-L score we received for the base model, our model using the new synthesis approach, and our model using the base synthesis along with the new synthesis. We have highlighted the snippets of the generated cases that produced our highest scores which are indicative of the possibilities of improvement if we were able to utilize larger T5 models and more training data.</p> <h2 id="results">Results</h2> <p>The following table highlights the results of our evaluation of generated outputs from our baseline model versus our two new approaches (new synthesis and old synth + new synth). Althought there are cases where the max score for our new approaches are high, we believe that this is most likely the case where we generate the rest of a document that is already signficantly short. Essentially, since we don’t need to generate a diverse output of words for a longer length, our more compact t5-base model with minimal training performs very well still. <sup id="fnref:1" role="doc-noteref"><a href="#fn:1" class="footnote" rel="footnote">1</a></sup></p> <table> <thead> <tr> <th style="text-align: left"> </th> <th style="text-align: center">BLEU (avg)</th> <th style="text-align: center">ROUGE1 (avg)</th> <th style="text-align: center">ROUGE2 (avg)</th> <th style="text-align: center">ROUGEL (avg)</th> <th style="text-align: center">BLEU (max)</th> <th style="text-align: center">ROUGE1 (max)</th> <th style="text-align: center">ROUGE2 (max)</th> <th style="text-align: right">ROUGEL (max)</th> </tr> </thead> <tbody> <tr> <td style="text-align: left">Baseline Model</td> <td style="text-align: center">08.9531</td> <td style="text-align: center">29.5847</td> <td style="text-align: center">18.6126</td> <td style="text-align: center">25.6882</td> <td style="text-align: center">49.5207</td> <td style="text-align: center">65.2174</td> <td style="text-align: center">62.2222</td> <td style="text-align: right">65.2173</td> </tr> <tr> <td style="text-align: left">New Synth (Word2Vec)</td> <td style="text-align: center">09.0722</td> <td style="text-align: center">29.3465</td> <td style="text-align: center">18.3129</td> <td style="text-align: center">25.6115</td> <td style="text-align: center">46.6638</td> <td style="text-align: center">65.9340</td> <td style="text-align: center">62.2222</td> <td style="text-align: right">65.2174</td> </tr> <tr> <td style="text-align: left">New Synth (GloVe)</td> <td style="text-align: center">10.3810</td> <td style="text-align: center">31.9870</td> <td style="text-align: center">21.1543</td> <td style="text-align: center">27.4335</td> <td style="text-align: center">50.5317</td> <td style="text-align: center">65.8537</td> <td style="text-align: center">60.1942</td> <td style="text-align: right">63.4146</td> </tr> <tr> <td style="text-align: left">New Synth (Word2Vec) + Old Synth</td> <td style="text-align: center">10.4402</td> <td style="text-align: center">31.4181</td> <td style="text-align: center">20.2349</td> <td style="text-align: center">27.7710</td> <td style="text-align: center">58.0197</td> <td style="text-align: center">64.8148</td> <td style="text-align: center">61.9048</td> <td style="text-align: right">62.7907</td> </tr> <tr> <td style="text-align: left">New Synth (GloVe) + Old Synth</td> <td style="text-align: center">08.7228</td> <td style="text-align: center">29.2284</td> <td style="text-align: center">17.1685</td> <td style="text-align: center">24.6075</td> <td style="text-align: center">49.7273</td> <td style="text-align: center">65.5462</td> <td style="text-align: center">60.9756</td> <td style="text-align: right">61.9048</td> </tr> </tbody> </table> <h1 id="conclusion--discussion-of-limitations">Conclusion / Discussion of Limitations</h1> <p>Throughout the paper, we have demonstrated the potential of embedding techniques like Word2Vec and GloVe in enhancing the personalization aspect of text generation models. Our experiments, which involved comparing these methods with traditional synthesis techniques, have shown promising results in terms of creating text that more accurately reflects the style and thematic preferences of individual users.</p> <h2 id="limitations">Limitations</h2> <p>For our exploration, we were limited to running all of our models and doing our data analysis on Google Colab in a short period of time along with having to reimplement the structure used in the “Teach LLMs to Personalize” paper since no codebase exists for it. Because of this, we had to find ways to condense our models and limit the amount of data we ingested so that we could spend less time waiting on models to run and freeing up storage and more time analyzing the output of our code. Two of the big adjustments that we made to navigate these constraints was using the t5-base model (fewer tokens than t5-11b), which we ran for a limited number of epochs, instead of the t5-11b model and using only a subset of data points from the provided Amazon Review Dataset. One of the other things that we tried to make the most advantage of our compute was quantizing our t5-base model to provide faster synthesis and summary to run on our ingested data, but we unfortunately ran into dependency issues and were unable to get this method working. However, from our analysis, we estimate that our evaluation results would have been much more in line with the paper’s results, or even surpass them, if we were able to run the t5-11b model for a larger amount of epochs and utilize more amazon review data.</p> <h2 id="next-steps">Next Steps</h2> <p>If we choose to continue this project, we want to explore ways in which we can synthesize domain-specific knowledge, along with thematic tendencies, related to the current document that can be fed into the final LLM for text generation. There are a lot of benefits of providing synthesized information to the model as it filters for the “most important/significant” words in a document and we hypothesize that this supplementary information could add an extra level of knowledge to a model that has proven to perform well in personalization.</p> <p>Also, another pathway that could be explored is integrating Agent LLMs in the initial document ranking phase to see if the procured rankings are better than the current methods set in place (RankDocBM25, RankDocDense, RankSnippet, RankDocBySnpt). We believe that utilizing LLMs that have more awareness of context over large document spaces (and even varying languages) could be benefitial to the process of developing personalized generation model.</p> <h1 id="bibliography">Bibliography</h1> <p><a id="1">[1]</a> Li, Pan, and Alexander Tuzhilin. Towards Controllable and Personalized Review Generation - arXiv.Org, arxiv.org/pdf/1910.03506.pdf. Accessed 12 Dec. 2023.</p> <p><a id="2">[2]</a> Li, Junyi, et al. Knowledge-Enhanced Personalized Review Generation with … - Arxiv.Org, arxiv.org/pdf/2010.01480.pdf. Accessed 12 Dec. 2023.</p> <p><a id="3">[3]</a> Gao, Y B, et al. “IOPscience.” Journal of Physics: Conference Series, IOP Publishing, 1 June 2022, iopscience.iop.org/article/10.1088/1742-6596/2294/1/012015.</p> <p><a id="4">[4]</a> Li, Cheng, et al. Teach LLMs to Personalize: An Approach Inspired by Writing Education - Arxiv.Org, arxiv.org/pdf/2308.07968.pdf. Accessed 12 Dec. 2023.</p> <div class="footnotes" role="doc-endnotes"> <ol> <li id="fn:1" role="doc-endnote"> <p>Output Produced From our Codebase: <a href="https://github.com/dapacica/DL_finalproject_code/blob/main/FinalProjCleanColab.ipynb">https://github.com/dapacica/DL_finalproject_code/blob/main/FinalProjCleanColab.ipynb</a> <a href="#fnref:1" class="reversefootnote" role="doc-backlink">&#8617;</a></p> </li> </ol> </div> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-PersonalizedGeneration_w_LLMAgents.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Physics-Informed-Primal-Dual-Learning/index.html b/blog/2023/Physics-Informed-Primal-Dual-Learning/index.html
new file mode 100644
index 00000000..817e264e
--- /dev/null
+++ b/blog/2023/Physics-Informed-Primal-Dual-Learning/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Challenges in Deep Learning Surrogates for Constrained Linear Optimization | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Physics-Informed-Primal-Dual-Learning/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Challenges in Deep Learning Surrogates for Constrained Linear Optimization",
+      "description": "Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Thomas Lee",
+          "authorURL": "https://www.linkedin.com/in/thomas-lee-2017/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Challenges in Deep Learning Surrogates for Constrained Linear Optimization</h1> <p>Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#literature-review">Literature review</a></div> <div><a href="#method">Method</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>Physics-informed machine learning has emerged as an important paradigm for safety-critical applications where certain constraints must be satisfied.</p> <p>The goal of this project is to learn a deep learning surrogate for a linear programming optimization problem with hard constraints. The overall approach is inspired by standard KKT conditions. This project will attempt a different DNN approach that aims to predict basic feasible solutions (BFS), and then benchmark it against a modern optimization solver. This project will highlight challenges in designing deep learning LP surrogates.</p> <p>Due to computing resource limits, the focus on the project will be more about broad training strategy choices (“discrete” architecture choices), instead of a systematic sweep of hyperparameters.</p> <h3 id="optimization-problem">Optimization problem</h3> <p>We are interested in learning to optimize this linear program with $n$ variables and $m$ equality constraints:</p> \[\begin{aligned} \min \quad &amp;c^T y \\ \text{s.t. } &amp;Ay = b, (\lambda) \\ &amp;x \geq 0 \end{aligned}\] <p>The KKT conditions are:</p> <p>\(\begin{aligned} \quad Ay &amp;=b, \\ A^T\lambda + s &amp;= c, \\ y_i s_i &amp;= 0, \forall i \in [n], \\ y, s &amp;\geq 0 \end{aligned}\) <br/><br/><br/></p> <h1 id="literature-review">Literature review</h1> <p>Fundamental connections between deep learning and the polyhedral theory central to optimization has been noted in <d-cite key="huchette2023deep"></d-cite>, which makes theoretical connections such as bounds on the number and shapes of linear regions expressed in a deep neural net. Beyond linear problems, <d-cite key="amos2023tutorial"></d-cite> surveys approaches that exhibit “amortized optimization”, i.e. incurring larger upfront training times to learn parameters that (hopefully) can generalize sufficiently to novel problems within some set of specially-structured problems; and this upfront training can result in a model that may be orders of magnitude faster at inference time compared to classical (often iterative-based) models.</p> <p>Previous literature on machine learning for linearly-constrained optimization problems could be categorized by how they manage the various components of the KKT conditions. In many of these papers, there is some common deep neural architecture at the start (e.g. FCNN or GNN); and then to attempt to recover a feasible solution, the final layers in the architecture correspond to some “repair” or “correction” layers that are informed by optimization theory.</p> <p><b>(KKT equalities + Complementarity)</b>: Building on <d-cite key="deka2019learning"></d-cite>, <d-cite key="pagnier2022machine"></d-cite>’s DNN predicts binary classifications for the active set of constraints, i.e. predicting which of $y_i$ or $s_i$ is 0. Then the remaining linear system can be solved efficiently. However, “false negatives” i.e. failure to detect a binding constraint can lead to infeasibilities (labeled as “misidentifications” in the paper), i.e. potentially violating inequality constraints.</p> <p><b>(Primal equality + Subset of primal inequalities)</b>: <b>E2ELR</b> <d-cite key="chen2023end"></d-cite> uses <i>specialized repair layers</i> for a single-period DC optimal power flow problem, where the differentiable repair layers guarantees power balance (an equality constraint) plus generator limits (a subset of primal inequalities); but can still violate line thermal limits (other primal inequalities). E2ELR leverages domain-specific cost structure, where electricity market penalties for line violations may be order(s) of magnitude smaller than energy balance violations; in this way this paper justifies the potential for remaining primal infeasibilities; but does not generally guarantee feasibility for all constraints.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/e2erl.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><i>Source: <d-cite key="chen2023end"></d-cite>, showing the power balance repair layer, i.e. a linear combination of the predicted points with an inequality-feasible extreme point, in order to satisfy the equality constraint.</i></p> <p><b>(Primal equality + All primal inequalities)</b>: Following a similar application in control/RL, <d-cite key="li2023learning"></d-cite> uses a Minkowski functionals / <b>gauge map</b> repair layer to guarantee feasibility in any general polyhedral feasible regions; however, a critical limitation of the gauge map approach is the need to calculate a strictly interior point (so as to tranform the feasible region to an <i>absorbing set</i>) which generally may be computationally as hard as the optimization problem.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/gauge.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><i>Source: <d-cite key="li2023learning"></d-cite>, showing usage of the gauge mapping; note this requires identifying strictly interior points for every sample instance.</i></p> <p>Alternatively, “Deep Constraint Completion and Correction” <b>DC3</b> <d-cite key="donti2021dc3"></d-cite> enforces primal equality constraints as a differentiable layer and then embedded gradient steps as neural net layers (“gradient unrolling”) to enforce inequality at the output layers; in addition, DC3 uses inequality penalty/Lagrangian term in the loss function to predict statistically near-feasible points, in some sense warm-starting the inequality correction steps. However, other papers observe that insufficient number of descent steps in DC3 could still lead to primal infeasibility (e.g. Table 2 of <d-cite key="li2023learning"></d-cite>).</p> <p>To truly guarantee polyhedral constraints, <d-cite key="frerix2020homogeneous"></d-cite> proposes using the Minkowski-Weyl theorem / double description algorithm to convert the algebraic halfspace polyhedron representation to the convex combination of vertices and extreme rays. Then the neural net is in charge of predicting the linear weights, and so the resulting convex combination must be feasible; the paper’s setting is simplified by only considering homogenous constraints $Ay=0$ i.e. a cone so only the rays are needed. However, with nonzero RHS coefficients, naturally this method will face the curse of dimensionality at higher dimensions since the number of vertices can scale exponentially. The authors acknowledge this point: “Overall, one can expect the algorithm to be efficient only for problems with a reasonably small number $m$ of inequalities and dimension $d$.”</p> <p><b>(Primal + dual approaches)</b>: Previous work <d-cite key="liu2022topology"></d-cite> has used a GNN to predict electricity prices (i.e. dual solutions), and then recover the primal solution. More recently, <d-cite key="park2023self"></d-cite> trains two separate neural networks: a primal and a dual network to emulate the optimization iterations of an augmented Lagrangian method; one drawback to this approach is the need to tune more hyperparameters related to the outer optimization loop. Instead, <d-cite key="chen2020learning"></d-cite> predicts one scalar value of the optimal value, and leverages the backward automatic differentiation to extract dual solution estimates from the trained neural architecture; these dual values are then used to solve the remaining system of equations to recover the full solution; <d-cite key="zhang2021convex"></d-cite> builds on this work and enforces an <i>input convex neural network</i> architecture since the optimal value function is convex. Still, these above approaches do not necessarily guarantee primal feasibility.</p> <p>In a similar vein of trying to incorporate the whole primal-dual problem structure, the GNN for LP paper <d-cite key="chen2022representing"></d-cite> provides theoretical demonstration of a universal approximation property that GNN can express LP optimal solution mappings, and also demonstrates on a small $m=10,n=50$ problem of achieving 0% error in terms of feasibility detection, as well as prediction of the optimal solution; however, this paper <i>does not address generalization performance</i>.</p> <h1 id="method">Method</h1> <h3 id="data-generation">Data generation</h3> <p>Since the focus is on learning LP’s generally, the dataset is fully synthetic. For this project, focus on having matrix $A$ fixed (one was created with entries drawn from the standard normal distribution), and training over different data examples of $x=(b,c)$. As an application example, this can represent learning on a fixed electric grid network topology and technology set, but learning to predict over different RHS resource capacities / renewables availabilities, and different fuel costs.</p> <p>To ensure feasibility (primal problem is feasible and bounded), the space of examples is generated by first creating primitive or latent variables, for each of the $N$ samples (this was implemented in PyTorch to be efficiently calculated in a vectorized way):</p> <ol> <li>Binary vector $\xi \in {0,1}^n$ representing the optimal LP basis, with $\sum_i \xi_i = m$; the value is drawn uniformly from the $(n \text{ C } m)$ possible combinations. Practically this was implemented as a batched permutation of an identity tensor with extra columns.</li> <li>Nonnegative vector $d \in \mathbb{R}^n$, with each $d \sim U[0,1]$ uniformly drawn to be nonnegative.</li> <li>Then for each element $i$, use $\xi_i$ to determine whether to assign the value of $d_i$ to either the primal variable $y_i$ or the dual slack variable $s_i$. This way complementary slackness is enforced. Namely,f \(\begin{aligned} y &amp;:= d\odot\xi, \\ s &amp;:= d\odot(1-\xi) \end{aligned}\)</li> <li>Sample $\lambda \in \mathbb{R}^n, \lambda_i \sim U[0,1]$.</li> <li>Finally construct $b=Ay, c= A^T\lambda + s</li> </ol> <p>By constructing the dataset in this way, we also know the ground truth optimal solutions (which might not be unique if there are degenerate solutions, which is assumed here to have low impact due to the random coefficients), and importantly also the optimal LP basis.</p> <h3 id="model">Model</h3> <p>As a different approach, this project will try to predict the underlying latent target $\xi$, i.e. the optimal LP basis, as a classification problem. Since there may be non-local interactions between coefficients and variables, a fully-connected architecture is chosen, where every layer is followed by a ReLU nonlinearity. The neural net forms a mapping between inputs $x=(b,c) \in \mathbb{R}^{m+n}$ to outputs $\hat{\xi} = f(x) \in {0,1}^{m}$, i.e. binary classifications of whether each variable is chosen in the LP basis. Below is an illustration of all the LP bases vectors for the $n=10, m=5$ problem size; there are $10 \text{ C } 5 = 252$ bases.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/bases.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><b>Supervised vs. self-supervised learning</b>: Many of the referenced papers devise self-supervised training methods, which is motivated by the expensive computational costs (time) to solve the dataset instances with traditional optimization solvers. However, this synthetic dataset is somewhat of an inverse-problem approach, i.e. by starting out with a sample of assumed optimal solutions, the optimal solutions are very efficiently identified during dataset generation. This synthetic generation can also be thought of as a <b>data augmentation</b> method.</p> <p>Since this is binary classification, the training loss used will be binary cross entropy, which is defined in PyTorch for each sample as: \(l(\hat{\xi},\xi) = [l_1, ..., l_i, ..., l_n],\ \ l_i = \xi_i \log \hat{\xi}_i + (1-\xi_i) \log (1-\hat{\xi}_i)\)</p> <p>A softmax layer multiplied by $m$ is optionally added at the output of the NN, to enforce the requirement that there should be $m$ basic variables (in a continuously-relaxed way).</p> <p><b>Equality completion</b>: Once this is done, the LP basis uniquely determines a basic solution (but not necessarily feasible) according to \(\hat{y}^* = (A^\xi)^{-1}b,\) where $A^\xi$ is the $m\times m$ submatrix corresponding to the chosen columns. Rather than matrix inversion, this can be solved in a batched way with PyTorch (torch.linalg.solve) to obtain all samples’ solutions. The entire flow, from supervised dataset generation to neural net prediction and then $y$ solution recovery, is illustrated in the flowchart below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/flowchart.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>As baselines, also consider the DC3 model, where novelty versus the original paper is that here both $b$ and $c$ are varied across samples (as opposed to only the RHS $b$ vectors). Also benchmark against a modern first-order based optimization solver OSQP <d-cite key="stellato2020osqp">. For OSQP, the LP can be easily formulated in the necessary format: $$\begin{bmatrix}b\\0\end{bmatrix}\leq \begin{bmatrix} A \\ I_{n\times n} \end{bmatrix} y \leq \begin{bmatrix}b\\ \infty_n\end{bmatrix} $$</d-cite></p> <p>All experiments are implemented on Google Colab T4 GPU instances (except OSQP which can use CPU). Neural network training is optimized with Adam.</p> <h1 id="results">Results</h1> <h3 id="approximation-and-generalization">Approximation and generalization</h3> <h4 id="small-scale-n4m2">Small scale ($n=4,m=2$)</h4> <p>On a small $n=4,m=2$ problem, the proposed method (using a 3-layer FCNN with width-100 hidden layers; and trained for $&lt;$100 epochs) can achieve near-perfect accuracy ($&gt;$0.997) in both training and testing. The training set has 10,000 samples, and the test set has 1,000 samples, both generated according to the method above. The learning rate used was $10^{-3}$.</p> <p>The accuracies when including and excluding the softmax layer (sum to $m$) are reported in the plot below, where this layer does have some (very) small positive effect on training and testing accuracies. More importantly, the $\hat{\xi}$ predictions after the solution recovery step are all feasible, i.e. with no negative elements, and the predicted optimal solutions can be seen in the right plot to match extremely closely with the ground truth $y^*$. This latter property is a desirable feature of the proposed method, that is, once the correct basic feasible solution is predicted, then the linear equation solver will precisely recover the optimal solution.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n4m2_perfect.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h4 id="scaling-up-n10m5">Scaling up ($n=10,m=5$)</h4> <p>Scaling up to a still quite small problem size of $n=10,m=5$ (i.e. 6.25 times larger in terms of $A$ matrix entries), now encounters generalization issues. The same network parameter sizing and training scheme was used here. The left plot shows training accuracy reaches about 0.97 after 300 epochs (and should continue rising if allowed to continue). However, the testing accuracy plateaus at around 0.93 with no further improvement.</p> <p>More importantly, while a $&gt;$0.9 accuracy in deep learning tasks is often sufficient, in this particular context the inaccuracies can lead to optimization problem infeasibilities. This is seen in the right plot, where mis-classified $\hat{\xi}$ result in <i>catastrophically</i> wrong $\hat{y}$ primal solution predictions (the severe orange prediction errors in both negative and positive extremes); even when the remaining correctly-predicted $\hat{\xi}$ samples receive precisely correct solutions.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/n10m5.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Furthermore, even though there are about $1-0.93 = 7%$ of individual $\xi_i$ entries that are mis-classified, these errors are fairly spread across various samples. This results in a <u>$19%$ infeasibility rate</u> in the test set, i.e. $19%$ of the predicted $\hat{y}$ vectors violate the nonnegative constraint. In other words, since this particular approach is predicting every individual entry of the basis vector, even small errors for each sample can lead to the overall prediction being wrong. This disproportionate impact is intuitively explained by examining the distribution of bit-wise errors plotted below. Most samples result in 0 bits of error, and then the remaining samples mostly get 1 or 2 bits of error. This means that errors are spread out among many samples, leading to a high rate of infeasible prediction vectors.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/errors_bars.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h4 id="attempts-to-improve-accuracy">Attempts to improve accuracy</h4> <p>The previous training error plot appears to show an generalization or overfitting problem. Based on this, various data augmentation techniques were attempted, such as perturbing $b$, $c$, or both vectors (both based on random noise vectors and simple scaling invariance of $\alpha b, \beta c$ while keeping the latent $\xi$ targets; as well as generating new $\xi$ vectors after regular numbers of epochs; different schedules of the aforementioned were also tried. However, none of these attempted approaches were able to produce validation accuracy rates significantly above the original $\sim 0.93$.</p> <p>Notably, an alternative architecture was tried: instead of outputting size-$n$ binary vectors, now try to predict multi-class classification out of the 252 basis vector classes. This actually resulted in worse testing set performance. Intuitively, treating all bases as discrete classes does not leverage the geometric proximity of 2 adjacent bases (e.g. which are off by 1 in Hamming distance).</p> <h3 id="benchmarking">Benchmarking</h3> <h4 id="vs-dc3-an-interior-learning-approach">vs. DC3 (an “interior” learning approach)</h4> <p>As a comparison for the $n=4,m=2$ case, the DC3 methodology was implemented using a 3-layer neural net and the self-supervised training loss of the primal objective plus infeasibility penalty, with a chosen penalty rate of 10: \(\mathcal{L} = c^T \hat{y} + 10 ||\max\{0, -\hat{y}\}||^2_2\)</p> <p>The number of inequality correction steps during training was chosen to be $t_{train} = 10$, and to maximize the chance of feasibility a very large $t_{test} = 10,000$ was used (i.e. allow many inequality-correction gradient steps during testing inference).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>With a learning rate of $10^{-5}$, the training stabilizes after about 30 epochs. Overall, the predictions are fairly accurate in terms of the out-of-sample average objective: $-0.247$ (a 2% optimality gap versus the ground truth), and an $R^2$ of predicted objective values of 0.9992 (see middle plot). (The qualitative results were robust to faster learning rates too: A previous higher lr=$10^{-3}$ produced a tighter average objective gap, but the optimal solution deviation versus the ground truth was larger.)</p> <p>However, despite being designed to enforce all hard constraints, the predictions still resulted in infeasible negative values (see the negative dip in the right plot). A similar disproportionate classification error to infeasibility impact is seen here (albeit to a lesser extent): $2.6%$ of all output entries are negative, while $7%$ of test samples lead to an infeasible prediction.</p> <p>Similarly to before, inequality violations are spread out among different samples, rather than all concentrated within a few samples; this is seen in the plot below. This provides an explanatory mechanism for the relatively large infeasible rate.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dc3_errors.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h4 id="vs-optimization-solver">vs. Optimization solver</h4> <p>Thus far, the DNN is able to scale quite well along the number of samples dimension, but not the actual problem dimension (number of variables and constraints).</p> <p>Return for now to the small $n=4,m=2$ case for which the DNN method achieves perfect out-of-sample testing accuracy. A next practical question is how does this method compare with “classical” optimization methods, or in what contexts would we prefer one over the other?</p> <p>Note that there are only $4 \text{ C } 2 = 6$ bases. So once the NN produces a $\hat{\xi}$ estimate, these can be mapped to an index in ${1,2,…,6}$. All possible non-basic submatrix inverses can be pre-calculated. In total, to evaluate 1 million testing samples, the DNN predict-basis approach takes:</p> <ul> <li>46 sec for training</li> <li>0.002 sec for prediction of $10^6$ samples</li> <li>10 sec to map $\xi$ to bases indices (<i>note this is not done in a fully vectorized way and potentially could be sped up</i>).</li> <li>$&lt;0.001$ sec to batch matrix multiply every sample $j$’s: $(A^{\xi^j})^{-1}b^j$. Note this is done using einsum which is very efficient on CUDA.</li> </ul> <p>In comparison, even when running all the 1 million problem instances fully sequentially, the OSQP solver took a total of <b>67 sec</b>, i.e. solving about 15,000 problem instances per second.</p> <p>This means that this DNN model here only achieved a speedup factor of about 1.2x, when <i>including the DNN training time</i>. Furthermore, the above “mapping” step is a remaining coding bottleneck at DNN inference time, and this will scale linearly as the test sample size increases; i.e. this speedup ratio is unlikely to increase much beyond this at higher sample sizes.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing-1400.webp"/> <img src="/staging/assets/img/2023-11-10-Physics-Informed-Primal-Dual-Learning/dnn_timing.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The timing tradeoff can be understood in terms of fixed vs. variable costs, as plotted here. Note the orange and red lines, representing this project’s DNN approach, is using the batched matrix solve instead of the pre-computing 6 matrix inverses (thus taking longer in the solving stage). Despite its very large speedup when only considering the prediction step, holistically the DNN approach here did not pose very significant timing advantages over the optimization solver.</p> <h1 id="conclusion">Conclusion</h1> <p>This project broadly compared 3 very different approaches to LP optimization: 1) a DNN to predict the optimal LP basis, 2) the DC3 method, and 3) optimization solver. Among the 2 deep learning methods, on the small $n=4,m=2$ problem, the LP basis method produced more robust and accurate results (i.e. it was able to perfeclty learn the input to optimal solution mapping, for the chosen data domain) compared to DC3 which already faces inequality violation issues. However, neither deep learning methods were able to easily scale to the slightly larger problem.</p> <p>Qualitatively, the predict-LP-basis approach can result in “all-or-nothing” accuracy, i.e. predicting the correct basis vector results in the globally optimal solution, whereas even a nearby classification error can lead to catastrophic primal infeasibilities (due to enforcing the equality constraint). Moreover, in both predict-basis and DC3, inequality violations tend to be spread out among different samples, leading to disproportionate impact on the percentage of infeasible solution vector predictions.</p> <p>Domain-specific knowledge and leveraging problem structure may be needed for tractable DNN solutions for LP optimization. This includes real-life choices of how much accuracy we need exactly in different aspects of the problem (e.g. different components of the KKT conditions).</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-10-Physics-Informed-Primal-Dual-Learning.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Robustness-of-self-supervised-ViT-features-in-b-mode-images/index.html b/blog/2023/Robustness-of-self-supervised-ViT-features-in-b-mode-images/index.html
new file mode 100644
index 00000000..0c08d771
--- /dev/null
+++ b/blog/2023/Robustness-of-self-supervised-ViT-features-in-b-mode-images/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Robustness of self-supervised ViT features in b-mode images | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Vision Transformers (ViT) trained with self-distillation with no labels (DINO) have shown striking properties for several downstream tasks regarding segmentation, classification, and image correspondence. In this work, we assess DINO-vit-s/8 on a new dataset containing b-mode ultrasound images with the ultimate goal of segmenting bone."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Robustness-of-self-supervised-ViT-features-in-b-mode-images/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Robustness of self-supervised ViT features in b-mode images",
+      "description": "Vision Transformers (ViT) trained with self-distillation with no labels (DINO) have shown striking properties for several downstream tasks regarding segmentation, classification, and image correspondence. In this work, we assess DINO-vit-s/8 on a new dataset containing b-mode ultrasound images with the ultimate goal of segmenting bone.",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Roger Pallares Lopez",
+          "authorURL": "https://www.linkedin.com/in/rogerpallareslopez/",
+          "affiliations": [
+            {
+              "name": "Mechanical Engineering Department, MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Robustness of self-supervised ViT features in b-mode images</h1> <p>Vision Transformers (ViT) trained with self-distillation with no labels (DINO) have shown striking properties for several downstream tasks regarding segmentation, classification, and image correspondence. In this work, we assess DINO-vit-s/8 on a new dataset containing b-mode ultrasound images with the ultimate goal of segmenting bone.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#methods">Methods</a></div> <div><a href="#results">Results</a></div> <div><a href="#discussion">Discussion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>B-mode ultrasound imaging is a widely employed medical imaging technique that uses high-frequency sound waves to produce visual representations of the internal structures of the human body. Its main advantages are its ability to produce real-time images, its portability, low cost, and especially the fact that is noninvasive and safe (non-radiating). However, it is an imaging modality that carries a very high noise-to-signal ratio. Speckle noise, out-of-plane movement, and high variability in image reconstruction across devices make the resulting images complex to interpret and diagnose <d-cite key="us"></d-cite>. As an example, the following figure shows an annotated b-mode ultrasound image.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig0.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Ultrasound b-mode image of the upper arm with the main physiology annotated. </div> <p>Self-supervised Vision Transformers (ViT) have emerged as a powerful tool to extract deep features for a variety of downstream tasks, such as classification, segmentation, or image correspondence. Especially, DINO architectures <d-cite key="dino1"></d-cite> <d-cite key="dino2"></d-cite> have exhibited striking properties, where its deep features present localized semantic information shared across related object categories, even in zero-shot methodologies <d-cite key="dino_feat"></d-cite>. Consequently, the aforementioned properties of DINO may allow us to develop efficient yet simple methods for b-mode ultrasound image interpretation, without the need for an expert or ground truth labels.</p> <p>In this work, we propose analyzing the performance and robustness of DINO in b-mode ultrasound images of the arm and leg, capturing musculoskeletal tissue from two different ultrasound devices. We note that this dataset features a series of images with a high noise-to-signal ratio, which is a property that DINO has not yet been tested against. In particular, we focus on assessing DINO-vit-s/8 deep features across its blocks as well as its attention weights, with the final objective of segmenting bone on b-mode images in a zero-shot approach. Through all these experiments, we show the potential and feasibility of implementing DINO models in real-world b-mode medical imaging applications.</p> <h2 id="related-work">Related Work</h2> <h3 id="dino-vit-assessment">DINO-vit Assessment</h3> <p>Since the release of DINO, a self-supervised method for training ViTs based on self-distillation, there has been a line of work focused on exploring new capabilities and assessing the deep features obtained from such pre-trained models. In <d-cite key="dino1"></d-cite>, they showed how the attention heads corresponded to different parts of an object in an image, or how one could segment desired objects by thresholding the self-attention maps. Similarly, semantic information analysis across related images was performed to show the potential of the deep features contained in DINO-vit models. Employing principal component analysis (PCA), matching algorithms or linear classifiers on the deep features, promising results on segmentation, semantic co-segmentation, and correspondence tasks were presented <d-cite key="dino2"></d-cite>, <d-cite key="dino_feat"></d-cite>.</p> <p>Further research was done by combining Stable Diffusion features and DINO features, improving semantic correspondence tasks at the cost of increasing the computation effort <d-cite key="dino_stable"></d-cite>. While DINO has shown strong generalization to downstream tasks, there has been no work on the assessment of this model on a b-mode ultrasound imaging domain. Besides the high signal-to-noise ratio, ultrasound images usually present a complex structure of tissues that makes it difficult to differentiate between the foreground, the desired structure to segment or analyze, and the background. Our work shows that DINO is also robust to this type of images, leading to promising results on segmentation tasks.</p> <h3 id="ultrasound-b-mode-imaging-segmentation-on-musculoskeletal-tissue">Ultrasound B-mode Imaging Segmentation on Musculoskeletal Tissue</h3> <p>Muscle and bone segmentation have important applications in clinical and rehabilitation practices to assess motion performance, diagnosis of the musculoskeletal system, and quantification of rehabilitation procedures, among others. There has been effort in developing deep learning tools to automatically segment and quantify desired parameters for the aforementioned applications. In <d-cite key="unet_segment"></d-cite>, a U-Net architecture with Deep Residual Shrinkage layers for denoising was implemented and trained to segment muscle fibers. Similarly, different muscle heads were segmented employing a large dataset of muscle images from different subjects and devices to train several convolutional neural network architectures <d-cite key="muscle_segment"></d-cite>, <d-cite key="muscle_segment2"></d-cite>.</p> <p>Medical images, from any source, are in general scarce and difficult to label, which poses a limitation for deep learning models to achieve a good performance and generalization. Most of the current methods, lack the capability to perform well in unseen segmentation tasks involving different anatomies. In <d-cite key="universeg"></d-cite>, they developed a deep learning model, UniverSeg, based on a novel Cross-Block mechanism that produces accurate segmentation maps without the need for additional training. However, when employed in noisier data domains, such as b-mode images, the performance breaks down. In this work, we discover that DINO has potential even when dealing with noisier datasets based on b-mode ultrasound images.</p> <h2 id="methods">Methods</h2> <h3 id="dataset">Dataset</h3> <p>The dataset consists of b-mode ultrasound images from the arm and leg of two subjects while moving. We recorded short videos and randomly selected frames to obtain the images. In the images, bone, muscle, and fascia tissues can be appreciated. We also acquired videos from two different ultrasound sources to expand the domain where DINO was tested. With all this, 4 different image origins (or image domains) form the dataset, as appreciated in the figure below. We labeled 10 bone heads of each domain to evaluate DINO’s performance.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Example of one image of each origin with its mask label (blue). a) Arm (Source 1, Subject 1). b) Arm (Source 1, Subject 2). c) Arm (Source 2, Subject 1). d) Leg (Source 2, Subject 1) </div> <h3 id="deep-feature-assessment">Deep Feature Assessment</h3> <p>We analyzed DINO-vit-s/8 features over different layers qualitatively. For any block \(i\), we extracted the Keys, Values, Queries, and Tokens and applied a principal component analysis (PCA) to get the three most important components. For the attention maps, we averaged the self-attention weights of the CLS token over each head of the multi-head block.</p> <p>This analysis was done with the intention of qualitatively finding the most suitable deep features for the subsequent segmentation task. Similarly, the self-attention maps were observed to corroborate that the model focuses especially on the bone, and less on the surrounding structures.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Workflow to obtain deep features as well as self-attention information. Transformer block design obtained from <d-cite key="dino_feat"></d-cite>. </div> <h3 id="segmentation-pipeline">Segmentation Pipeline</h3> <p>As described in the results section, the Keys of the last block (block 12) of DINO-vit-s/8 were employed as deep features for the segmentation. As in <d-cite key="dino_feat"></d-cite>, we used a zero-shot approach as the pipeline for bone segmentation. We first clustered together all the features obtained from the different images passed through DINO with k-means. Then, we selected those clusters for the segmentation mask employing a simple voting algorithm. Being \(\texttt{Attn}_i^\mathcal{I}\) the self-attention of the CLS token averaged over all heads of block 12 in image \(\mathcal{I}\) and patch \(i\); and \(S_k^\mathcal{I}\) the segment in image \(\mathcal{I}\) belonging to cluster \(k\). The saliency of this segment was computed as</p> \[\texttt{Sal}(S_k^\mathcal{I}) = \frac{1}{|S_k^\mathcal{I}|} \sum_{i \in S_k^\mathcal{I}} \texttt{Attn}_i^\mathcal{I}\] <p>and the voting of the cluster \(k\) was obtained as</p> \[\texttt{Votes}(k) = \mathbb{1}[\sum_\mathcal{I}\texttt{Sal}(S_k^\mathcal{I}) \geq \tau ]\] <p>for a threshold \(\tau\) set to 0.2. Then, a cluster \(k\) was considered to be part of the mask if its \(\texttt{Votes}(k)\) were above a percentage of 65% of all images. The following image sketches the whole process.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Zero-shot segmentation pipeline using keys as deep features. </div> <p>To quantitatively assess the segmentation results, both Dice and IoU metrics were computed employing the labeled bone head segmentations.</p> <h2 id="results">Results</h2> <h3 id="deep-features-assessment">Deep Features Assessment</h3> <p>We first input a single image to the model and analyzed the Keys, Values, Queries, and Tokens, as well as the self-attention of the CLS token from shallower to deeper layers.</p> <p>The three most important components after performing the PCA on the deep features are plotted in RGB as depicted in the figure below. Tokens seem to carry spatial information throughout the different blocks, representing depth information in the final block. On the other hand, Keys and Values seem to carry spatial information on the shallower blocks, and semantic information on the deeper blocks. In fact, we considered the Keys descriptors the most appropriate to be used to segment bone, as the bone head can be distinguished from the surrounding structures. Regarding the attention maps, they seem to move from the skin (in shallow blocks) to the bone (deeper blocks).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Token, Value, Key, and Query features as well as self-attention maps for different blocks (from shallow to deep). </div> <p>Now, if we focus on the Keys features of the last block for the four different image domains, we can appreciate a similar behavior. Bone heads seem to be represented in all four cases by the Keys, being differentiated by the surrounding structures. That being said, we should note that the intersection between muscles just above the bone is in some cases also represented like the bone. Regarding the self-attention maps, in all four cases, they are principally focused on the bone head. However, we can also see that some muscle fibers or intersections may be present.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Keys deep features and self-attention maps from block 12 for the four different image origins. </div> <p>An interactive scatter plot is another method to argue the representation of the bone by the Key features. For all the four different image origins, the patches belonging to the bone head are grouped on a region of the Euclidean space, while the patches belonging to other structures are scattered all over other regions.</p> <div class="l-page"> <iframe src="/staging/assets/html/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/scatter.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <div class="caption"> 3D scatter plot of the 3 components of the Key descriptors (block 12). Legend: "other" any patch not belonging to the bone head. "boneS1A1" bone patches of Source 1 - Arm Subject 1. "boneS1A2" bone patches of Source 1 - Arm Subject 2. "boneS2A1" bone patches of Source 2 - Arm Subject 1. "boneS2L" bone patches of Source 2 - Leg Subject 1. </div> <h3 id="same-domain-experiment">Same Domain Experiment</h3> <p>We subsequently performed the segmentation task on a set of images from the same origin. For each of the 4 domains, sets of 2, 3, 5, and 10 images were input to the segmentation pipeline. Recalling that the images were selected as random frames from short videos, each image within a domain presented a slightly different configuration of bone and surrounding structures. Therefore, the goal of segmenting with varying image quantities was to evaluate the balance between improvements due to increased feature quantity versus confusion introduced by variation in the images.</p> <p>The reader can observe the results in the figure below. The bones from Source 1 Arm 1 are the best segmented, and the amount of images does not affect the performance, obtaining constant values of Dice and IoU of about 0.9 and 0.77, respectively. The segmentation of images from Source 1 Arm 2 in general takes also some part of the muscle tissue, and as in the previous case, the amount of images used does not change the performance with Dice and IoU metrics of about 0.7 and 0.5, respectively. In the case of images from Source 2 Arm 1, a larger quantity of images improves the segmentation results, increasing Dice and IoU metrics from 0.58 to 0.75, and 0.46 to 0.61, respectively. Finally, the segmentation masks from images from Source 2 Leg carry not only the bone but part of the surrounding tissue too. When increasing the number of images to 10, the performance drastically falls (with Dice and IoU of 0) as the segmentation results contain muscle fibers instead of bone.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig5.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Results of the segmentation on same domain images experiment. a) Segmentation result examples for the 4 different image domains. b) Metrics for the 4 different image domains and different amounts of images (mean and standard deviation). </div> <h3 id="different-domain-experiments">Different Domain Experiments</h3> <p>Then, we performed the segmentation task on a set of images from origin pairs. Five images of each origin were paired forming the following groups. Group 1: different physiology (source 1 - arm subject 1 and source 1 - arm subject 2), group 2: different sources (source 1 - arm subject 1 and source 2 - arm subject 1), group 3: different body parts (source 2 - arm subject 1 and source 2 - leg subject 1), and finally group 4: different body parts and sources (source 1 - arm subject 1 and source 2 - leg subject 1). We carried out this experiment to evaluate if the deep features shared from different image origins were similar enough to properly perform the segmentation task, giving an idea of feature correspondence between different image domains.</p> <p>The image below shows the experiment results. The segmentation performed on the domain source 1 arm subject 1 worsens when paired with any other image domains. Both IoU and Dice metrics fall from 0.9 and 0.77 (previous values) to 0.78 and 0.59, respectively. Contrarily, the domains consisting of source 1 arm subject 2 and source 2 arm subject 1 improve when paired with source 1 arm subject 1. Finally, the image origin containing leg images maintains a similar segmentation performance when being paired.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig6.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Results of the segmentation for pairs of domain images. Legend: Different physiology (source 1 - arm subject 1 and source 1 - arm subject 2), Different sources (source 1 - arm subject 1 and source 2 - arm subject 1), Different body parts (source 2 - arm subject 1 and source 2 - leg subject 1), and Different body parts and sources (source 1 - arm subject 1 and source 2 - leg subject 1). Bar plots contain mean and standard deviation. </div> <h3 id="noise-experiment">Noise Experiment</h3> <p>We further assessed DINO by introducing white noise to the dataset. Being an image \(\mathcal{I}\), the image input to DINO was \(\mathcal{I}_{\texttt{Noisy}} = \mathcal{I} + \epsilon \cdot \mathcal{N}(0, 1)\). We segmented five images from the domain Source 1 Arm Subject 1 and incrementally increased the white noise strength by tuning \(\epsilon\). We performed this last experiment to evaluate how the deep features and attention maps change as well as the resulting segmentation masks with increasing noise, gaining intuition on how robust DINO can be.</p> <p>As observed in the following figure, the Keys features and the attention weights start being affected by the noise at \(\epsilon = 2.0\). Keys features are less efficient at describing the bone from the surrounding structures, and the attention maps start shifting the attention to only the left side of the bone and the muscle line above the bone. Segmentation results show that with increased noise, some parts of the muscle are segmented and for \(\epsilon \geq 2.5\), the right side of the bone is not included on the segmentation mask.</p> <p>Taking a look at the metrics, the more the noise strength is increased, the lower the Dice and IoU values obtained. From little noise to the highest tested in this experiment, a reduction of about 50% for both Dice and IoU occurs.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images/fig7.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Results with noisy images. a) Original, Keys features, attention, maps and segmentation results for different values of $\epsilon$. b) Dice and IoU metrics for different values of $\epsilon$. </div> <h2 id="discussion">Discussion</h2> <p>In this project, we used a DINO ViT model to segment bone heads from ultrasound images using a zero-shot methodology involving clustering. We first studied how the model deep features change across different layers, and chose Key features as the most appropriate for characterizing bone. We then segmented bone from different image domains, initially employing batches of images from the same domain, and then combining them. Finally, we tested DINO and its robustness by adding additional noise.</p> <p>Encouraging results were found in the deep features of the model. We could appreciate how both Key and Query features were capable of differentiating bone, some muscle regions, and skin tissue. We also obtained surprisingly good segmentation masks for a zero-shot methodology on a new dataset as ultrasound b-mode images are. In particular, the image domain “source 1 arm subject 1” presented very similar segmentation masks compared to the labeled ones, giving an idea of how semantic features obtained by DINO extend beyond its training data domain, displaying astonishing generalization. Even when adding noise to the image dataset, DINO Key features kept describing the bone up to high noise strengths.</p> <p>While the project has yielded promising results, there are several limitations to take into account. First, we should note that the success of the zero-shot methodology has relied on an initial hyperparameter tuning, finding the threshold \(\tau\), the voting percentage, and the number of clusters. However, we are aware that the optimal configuration may vary across different datasets or imaging conditions. Additionally, we focused on segmenting only bone, but we have not explored the capabilities of DINO to segment other tissues or structures. We acknowledge that a comprehensive medical imaging solution should combine the segmentation of multiple relevant structures for a general understanding and application. Finally, only two anatomical parts (arm and leg) and two subjects were included in the dataset. To better explore the applicability of the model, a more diverse dataset containing more anatomical parts from more subjects should be considered.</p> <p>In conclusion, this project demonstrates the potential of employing the DINO ViT model for ultrasound bone segmentation using a zero-shot methodology. We believe that this work lays a foundation for future improvements, promoting a more comprehensive understanding of DINO’s capabilities in medical image segmentation.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-Robustness-of-self-supervised-ViT-features-in-b-mode-images.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/SmartEmbeddingInitializations/index.html b/blog/2023/SmartEmbeddingInitializations/index.html
new file mode 100644
index 00000000..4dda5fe8
--- /dev/null
+++ b/blog/2023/SmartEmbeddingInitializations/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Injecting Node Information via Embedding Initializations | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Graph Neural Networks (GNNs) have revolutionized our approach to complex data structures, enabling a deeper understanding of relationships and patterns that traditional neural networks might miss. This project looks into the potential of embedding initializations in GNNs, particularly in the context of molecular function prediction and protein retrieval tasks. By investigating the effect of intentional, information-rich initializations versus random initializations, we aim to enhance the learning efficiency and accuracy of GNNs in these domains. Our study focuses on a precision medicine knowledge graph (PrimeKG) and employs TxGNN, a GNN model initially designed for disease-drug link prediction, repurposed for protein-molecular function link prediction. We explore the impact of using ESM embeddings for protein nodes, hypothesizing that these embeddings could provide structural information not explicitly present in the graph data. Through comparisons of the latent spaces and performances, we look to see the effectiveness of these embeddings in improving the model's predictive powe of protein function."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/SmartEmbeddingInitializations/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Injecting Node Information via Embedding Initializations",
+      "description": "Graph Neural Networks (GNNs) have revolutionized our approach to complex data structures, enabling a deeper understanding of relationships and patterns that traditional neural networks might miss. This project looks into the potential of embedding initializations in GNNs, particularly in the context of molecular function prediction and protein retrieval tasks. By investigating the effect of intentional, information-rich initializations versus random initializations, we aim to enhance the learning efficiency and accuracy of GNNs in these domains. Our study focuses on a precision medicine knowledge graph (PrimeKG) and employs TxGNN, a GNN model initially designed for disease-drug link prediction, repurposed for protein-molecular function link prediction. We explore the impact of using ESM embeddings for protein nodes, hypothesizing that these embeddings could provide structural information not explicitly present in the graph data. Through comparisons of the latent spaces and performances, we look to see the effectiveness of these embeddings in improving the model's predictive powe of protein function.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Emma Tysinger",
+          "authorURL": "https://www.linkedin.com/in/emma-tysinger/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Sam Costa",
+          "authorURL": "https://www.linkedin.com/in/samuelcos/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Injecting Node Information via Embedding Initializations</h1> <p>Graph Neural Networks (GNNs) have revolutionized our approach to complex data structures, enabling a deeper understanding of relationships and patterns that traditional neural networks might miss. This project looks into the potential of embedding initializations in GNNs, particularly in the context of molecular function prediction and protein retrieval tasks. By investigating the effect of intentional, information-rich initializations versus random initializations, we aim to enhance the learning efficiency and accuracy of GNNs in these domains. Our study focuses on a precision medicine knowledge graph (PrimeKG) and employs TxGNN, a GNN model initially designed for disease-drug link prediction, repurposed for protein-molecular function link prediction. We explore the impact of using ESM embeddings for protein nodes, hypothesizing that these embeddings could provide structural information not explicitly present in the graph data. Through comparisons of the latent spaces and performances, we look to see the effectiveness of these embeddings in improving the model's predictive powe of protein function.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#project-outline">Project Outline</a></div> <div><a href="#related-work-motivation">Related Work & Motivation</a></div> <div><a href="#data">Data</a></div> <div><a href="#gnn">GNN</a></div> <div><a href="#model-evaluation">Model Evaluation</a></div> <div><a href="#latent-space-visualizations">Latent Space Visualizations</a></div> <div><a href="#discussion">Discussion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Graph Neural Networks (GNNs) have emerged as a transformative tool in machine learning, with the ability to capture the complex structures and relationships inherent in data. In molecular property prediction, for example, GNNs are great at encoding the atomic structure and intermolecular forces into high-dimensional embeddings, leading to more accurate predictions of chemical properties and drug efficacy. GNNs have also be used in traffic time prediction problems, physics simulations and social media analysis applications. Through message-passing and updating, GNNs are capable of learning embeddings that encode informations of node neighbors and long-distance complex connections – that we, as humans, may not be able to make. The quality of the embeddings is not only important for the accuracy of the task the GNN is trained on, but quality node embeddings can be used through transfer learning – enabling models trained on one task to adapt and excel in another. The importance of good embeddings in GNNs is why we want to look closer at embedding initializations and if we can inject additional information – not present in the graph – to result in better learned embeddings after training.</p> <p>Possible applications of initial embedding initializations could help in the field of drug discovery. For GNNs used for protein retrieval trained on a biomedical knowledge graph, using ESM embeddings for the proteins could add structure information that is not previously encoded in the graph entities.</p> <h3 id="project-outline">Project Outline</h3> <p>We will explore the question can additional node information be injected into the model by using intentional embedding initializations rather than random initializations? Furthermore, are the learned embeddings better representations of the nodes? To answer this question we will follow the steps outlined below:</p> <ol> <li>We will download a precision medicine knowledge graph that and use a GNN, TxGNN, that is implemented for disease-drug link prediction on a biomedical knowledge graph as a baseline model.</li> <li>We will modify the GNN for protein-molecular function link prediction.</li> <li>Generate and download ESM embeddings for each protein</li> <li>Pretrain and finetune two models – one using random protein node initialization and one using ESM embeddings for protein node initialization. We must pretrain our own models, rather than use the already pretrained model, since we are focusing on how different node initializations impact the predictive power.</li> <li>Evaluate both models</li> <li>Visualize latent spaces before pretrain, after pretraining and after finetuning</li> </ol> <h2 id="related-work--motivation">Related Work &amp; Motivation</h2> <p>In reviewing the literature, we found several papers which reference the possibility of improved performance through a more informed initialization process. As discussed by Li et al., the initialization methods used for GNNs, such as Xavier random initialization were originally designed for CNNs and FNNs. In that setting, the Xavier approach helped to avoid vanishing gradients and maintain a constant information flux. However, Li et al. point out that by leveraging the structure of the graph, we can likely do better than the random intializations used previously <d-cite key="Li2023"></d-cite>.</p> <p>In the paper detailing TxGNN, Huang et al. present promising results on their ability to predict drug repurposing opportunities using a GNN <d-cite key="Huang2023"></d-cite>. However, in their work they considered only the Xavier random initializations for weight matrices and node embeddings. This left open the idea of initializing the graph using more sophisticated methods.</p> <p>Previous work by Cui et al. has explored the power of artificial node initializations, finding that encoding structural and positional information in the node initializations can have profound effect on the ability of a GNN to accurately predict features based on the graph. They provide a basis for our investigation by showing the effect that initializations can have on the results, if done correctly. We seek to build on this work by testing the effect of injecting related, but not exactly equivalent information through the node initializations <d-cite key="Cui2021"></d-cite>.</p> <p>Not only did we see an opportunity to try a different initialization method, but this problem also lent itself well to data-informed initializations. The molecules in TxGNN have a wealth of knowledge about them which is not represented in the linkages in the graph, some of which is represented in the ESM embeddings of the molecules. Thus, we thought that by supplying these embeddings to the GNN, we might be able to leverage the additional data to make better predictions.</p> <h2 id="data">Data</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/PrimeKG-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/PrimeKG-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/PrimeKG-1400.webp"/> <img src="/staging/assets/img/PrimeKG.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Precision Medicine Knowledge Graph. Figure credit: <i>Building a knowledge graph to enable precision medicine</i> (Chandak, Huang, Zitnik 2023). </div> <p>We used a precision medicine knowledge graph (PrimeKG) constructed by Marinka Zitnik’s group at Harvard <d-cite key="Chandak2023"></d-cite>. PrimeKG compiles data from knowledge bases that coverage a broad variety of biomedical information including human disease, drug-protein interactions, genes and proteins with their associated biological processes, functions and cellular component, etc. PrimeKG contains 10 different node types – shown above – and 29 different types of undirected edges. There are over 120,000 nodes in total and over 8 million edges. What PrimeKG lacks, importantly, is any nodes or encodings of structural, molecular or sequenctial information for entity nodes such as proteins and drugs. The node types of interest for our model are proteins, extracted from NCBI, and molecular function Gene Ontology (GO) annotations <d-cite key="Gene_Ontology_Consortium2021-uk"></d-cite>. We will be predicting links between these two node types.</p> <p>The other data used were ESM embeddings for proteins in PrimeKG. ESM embeddings, or Evolutionary Scale Modeling embeddings, are high-dimensional vector representations of proteins, derived from advanced machine learning models developed by Meta trained on large datasets of protein sequences. These embeddings capture the intricate structural and functional characteristics of proteins, reflecting evolutionary relationships and biochemical properties that are crucial for various biological and computational applications <d-cite key="Lin2022-esm2"></d-cite>. The reason we were interested in using ESM embeddings, rather than embeddings from other protein foundation models, was that structural information was not already captured in PrimeKG, as previously mentioned. To obtain the ESM embeddings, first we downloaded the amino acid sequence for each protein from NCBI using Entrez. Then, using these sequences as input to Facebook’s ESM2 model, we extracted the corresponding embedding.</p> <h2 id="gnn">GNN</h2> <p>The model we used as a baseline is TxGNN, a graph neural network trained on PrimeKG used to make therapeutic drug predictions for diseases <d-cite key="Huang2023"></d-cite>. The GNN has two training phases. First, pretraining where the GNN finds biologically meaningful embeddings for all nodes in the knowledge graph, and therefore the objective is all link prediction. The second phase is to finetune the GNN, using self-supervised learning, to be able to predict drugs for diseases. Therefore, the objective for finetuning is to optimize contraindication and indication link prediction – the two types of links between diseases and drugs. We modified the training code for the finetuning phase, to train and validate on protein-molecular function links instead.</p> <h3 id="architecture">Architecture</h3> <p>The GNN has two linear layers with parameters n_input, n_hidden, and n_output. For all our models n_input is 1280, restricted by the length of ESM embeddings. We play around with different dimensions for the hidden and output layers. Leaky ReLU activation is used after the first layer.</p> <h3 id="training">Training</h3> <p>The first step of the training phase is <strong>node embedding initialization</strong>. The default, which is our random control, is to initialize all nodes using Xavier uniform initialization <d-cite key="pmlr-v9-glorot10a"></d-cite>. Models referred to as <em>random</em> from here on out are referring to using Xavier uniform initialization. For our experimental model, we initialized the protein nodes using the ESM embeddings we obtained earlier. All other node types were still initialized with Xavier uniform initialization. Note that we reinitialized nodes between pretraining and finetuning.</p> <p>During the training phase, the GNN uses a standard message-passing algorithm to update and optimize the node embeddings. There is a relation-type specific weight matrix (for each of the 29 relation types) used to calculate relation-type specific messages. The message for one relation to the some node $i$ is calculated using this equation: \begin{equation} m_{r, i}^{(l)} = W_{r, M}^{(l)} h_i^{(l-1)} \end{equation}</p> <p>For each node $v_i$, we aggregate incoming messages from neighboring nodes for each relation-type $r$, denoted as $N_r(i)$. This is done by taking the average of these messages: \begin{equation} m_{g_r, i}^{(l)} = \frac{1}{|N_r(i)|} \sum_{j \in N_r(i)} m_{r, j}^{(l)} \end{equation}</p> <p>The new node embedding is then updated by combining the node embedding from the last layer and the aggregated messages from all relations: \begin{equation} h_i^{(l)} = h_i^{(l-1)} + \sum_{r \in TR} m_{g_r, i}^{(l)} \end{equation}</p> <p>Finally, DistMult <d-cite key="Yang2014-zb"></d-cite> is used to calculate link prediction between two nodes using their respective embeddings.</p> <h2 id="model-evaluation">Model Evaluation</h2> <p>We fixed all parameters and hyperparameters, and trained two models – one using random initializations and one using ESM embeddings. We pretrained for 3 epochs with a learning rate of $1e-3$ and a batch size of 1024. We then finetuned for 150 epochs with a learning rate of $5e-4$.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/Figure2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/Figure2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/Figure2-1400.webp"/> <img src="/staging/assets/img/Figure2.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>These results are promising and using ESM embeddings to initialize the protein node representations slightly improves the model. The ESM model has a final testing loss of 0.3915, whereas the random model has a final testing loss of 0.4151. However, the difference between the models is slim and may not be significant, especially looking at the similarities in the pretraining, training and validation loss curves. Later, we will look more in depth about how the embedding spaces vary between the 2 models which has the potential to yield more interesting results.</p> <h3 id="testing-varying-hidden-and-output-layer-dimensions">Testing varying hidden and output layer dimensions</h3> <p>We wanted to see the impact changing the hidden and output layer dimensions would have on model performance. We tested 3 models, with parameters detailed in Table 1. All models outside of this experiment, unless otherwise specified, have the same parameters as Model 1.</p> <table> <thead> <tr> <th> </th> <th>Input Dimensions</th> <th>Hidden Layer Dim.</th> <th>Output Layer Dim.</th> </tr> </thead> <tbody> <tr> <td>Model 1</td> <td>1280</td> <td>1280</td> <td>1280</td> </tr> <tr> <td>Model 2</td> <td>1280</td> <td>512</td> <td>512</td> </tr> <tr> <td>Model 3</td> <td>1280</td> <td>512</td> <td>128</td> </tr> </tbody> </table> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/Testing_output_dim-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/Testing_output_dim-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/Testing_output_dim-1400.webp"/> <img src="/staging/assets/img/Testing_output_dim.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We can see from the testing loss that when just comparing ESM initialized model, testing loss increases as the output layer decreases. The same trend holds true between random initialized models. We can also see that when comparing ESM and random models for the same layer dimensions, ESM always slightly outperforms the random model.</p> <h2 id="latent-space-visualizations">Latent Space Visualizations</h2> <p>In the fast-evolving world of deep learning, the analysis of model latent spaces has emerged as an interesting area of study, especially to get a better understanding of how models are achieving their tasks. These spaces are important to understanding how complex models like GNNs perceive and process the intricate relationships and structures inherent in graph data. GNNs can learn powerful representations that capture both node-level and graph-level features. By analyzing the latent spaces of GNNs, we can get insights into how these models prioritize various patterns and connections within the data. The following analyses visualize the latent spaces our models, clustered and colored in different ways, to get a deeper understanding of how the ESM initialized embeddings are effecting the GNN.</p> <p>We first were curious whether, after training our model, the final embeddings retained structural information about the proteins. To do this, we first clustered the original ESM embeddings using K-Means clustering. Next, we visualized the embedding space of the original ESM embeddings, the final embeddings from the ESM model and the final embeddings from the random model using t-distributed Stochastic Neighbor Embedding (t-SNE) for dimensionality reduction. From the t-SNE plot of original ESM embeddings, we can clearly see the clusters from K-Means which serves as a verification of our clustering technique.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/init_cinit-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/init_cinit-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/init_cinit-1400.webp"/> <img src="/staging/assets/img/init_cinit.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Looking at the embedding space for the ESM and random models, colored by ESM clusters, we note that most of the ESM structural information (used to create the clusters) seems to be forgotten during the training process, as evidenced by the mostly random assortment of colors present in the t-SNE plot. We note that some clusters do remain, for example cluster 12 (light sage green on the right side of the ESM initialized plots) is still clustering in the final embeddings (top middle cluster). However, the most prominent ones appear in both the ESM initialized and random initialized data, meaning that the ESM embedding did encode some function, but the model using random initialized embeddings was able to capture that relation as well.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/cluster_init-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/cluster_init-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/cluster_init-1400.webp"/> <img src="/staging/assets/img/cluster_init.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Given that the final embedding space for the ESM model didn’t seem to retain much of the information for the ESM embedding initialization, we were curious whether the ESM and random embeddings converged to a similar space. To test this theory, we clustered the final ESM model embeddings and subsequently visualized the final embeddings of the ESM and random models using t-SNE and colored by those clusters.</p> <p>If the two models converged to similar embedding spaces, we’d expect to see that clusters found in one embedding space would also be found in the other. This is the case, as seen in the two plots below. Both plots are colored based on a clustering of the final embeddings generated by the ESM initialized network, and they share many of the same structures, indicating that the two networks were able to pick up on mostly the same features in the underlying information. Both models converged to a similar embedding space different initialization methods.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/cluster_esm-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/cluster_esm-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/cluster_esm-1400.webp"/> <img src="/staging/assets/img/cluster_esm.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="testing-varying-hidden-and-output-layer-dimensions-1">Testing varying hidden and output layer dimensions</h3> <p>As mentioned earlier, we tested different dimensions for the hidden and output layers to see whether more and less output dimensions would retain the original ESM embedding information.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/dimensions-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/dimensions-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/dimensions-1400.webp"/> <img src="/staging/assets/img/dimensions.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Although there are more distinct clusters on the t-SNE plots as the number of output dimensions increases, these clusters are not the same as the clusters from the original ESM embeddings (seen by the randomly colored dots). Therefore, neither of these 3 models retained the structural information provided by initializing with ESM embeddings. It does not seem that decreasing output and hidden layer dimensions improves the model performance or latent space of our GNN.</p> <h3 id="clustering-by-molecular-function-labels">Clustering by molecular function labels</h3> <p>Because our model’s task was to predict links between protein and molecular function nodes, we were curious to see if the final embeddings for the protein nodes would cluster well on the function labels. However, this wasn’t as straight forward as having 1 molecular function label for each protein node, because each protein may be linked to multiple molecular functions. One protein may have multiple molecular function Gene Ontology (GO) annotations because the GO database uses a hierarchical system to categorize functions, where broader functions encompass more specific ones. A protein can be involved in several distinct biochemical activities, each represented by its own GO term, reflecting the diverse roles a protein can play in the cell. Instead of a single label, we extracted a molecular function profile, $v_i$, for each protein where $v_i[j] = 1$ if a link exists between protein $i$ and function $j$. We then had a sparse matrix, $V^{i \times j}$. Before clustering, we performed dimensionality reduction using truncated SVD which is optimal for sparse matrices. Finally, we performed K-Means clustering.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/cluster_func-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/cluster_func-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/cluster_func-1400.webp"/> <img src="/staging/assets/img/cluster_func.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Looking at the t-SNE plots, there is no apparent clustering by molecular function profiles in the final embedding spaces for either the ESM model or the randomly initialized model. There are multiple possible explanations for this. One explanation is that the actual objective is to prediction each singular link between a protein and a function node, not to predict do well at predict all function nodes linked to a protein at once. On top of that our GNN uses self-supervised learning, therefore the molecular function profiles are not true labels used during training.</p> <p>The second plausible explanation has to do once again with the hierarchical nature of molecular function GO annotations. Because the molecular function nodes have random indices when stored in PrimeKG, it is not apparent that molecular function that have the same parent function are close to each other, or their parent function in the molecular function profiles. Therefore, when performing truncated SVD and subsequently k-means clustering, the similar functions may not be clustered together if their indices are far apart. Further analysis could be done to reorder the molecular function nodes and then conduct hierarchical clustering, instead than k-means. These possible clusters may then be found in the final latent spaces for the two models.</p> <h2 id="discussion">Discussion</h2> <p>In this post, we have modified and fine-tuned a Graph Neural Network, TxGNN originally designed for drug-repurposing prediction, for protein function prediction with a variety of initializations of the node embeddings. We observed that while much of the information in the initialization is forgotten during the training process, a small amount is retained, leading to slightly better performance on the test set in the final network. This provides a potential avenue for further study, investigating the overall effects of informed initialization techniques on GNN performance. Some of this investigation is discussed in Li et al. <d-cite key="Li2023"></d-cite>, where they experiment with weight matrix initializations and propose a new paradigm for determining weight initializaiotns, but there is certainly more investigation to be done.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-07-SmartEmbeddingInitializations.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Structural_vs_Data_Inductive_Bias/index.html b/blog/2023/Structural_vs_Data_Inductive_Bias/index.html
new file mode 100644
index 00000000..1bbc56a5
--- /dev/null
+++ b/blog/2023/Structural_vs_Data_Inductive_Bias/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Structural vs Data Inductive Bias | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Class project proposal"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Structural_vs_Data_Inductive_Bias/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Structural vs Data Inductive Bias",
+      "description": "Class project proposal",
+      "published": "November 8, 2023",
+      "authors": [
+        {
+          "author": "Gabriel Gallardo",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT, Cambridge",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Tony Jiang",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT, Cambridge",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Structural vs Data Inductive Bias</h1> <p>Class project proposal</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#methods-and-experiment">Methods and Experiment</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <h3 id="lack-of-training-data">Lack of Training Data</h3> <p>The transformative impact of vision transformer (ViT) architectures in the realm of deep learning has been profound, with their applications swiftly extending from computer vision tasks, competing with traditional neural network architectures like convolutional neural networks (CNNs). Despite their success, the intricacies of how architectural variations within ViTs influence their performance under different data conditions remain largely uncharted. Unraveling these subtleties</p> <h3 id="project-goal">Project Goal</h3> <p>While much research has being made to find the best choice of data augmentation or the best structural change in the model to increase performance, our project empirically compares two kinds of methods:</p> <ul> <li>Data augmentation through tuning-free procedures</li> <li>Explicit inductive bias through discrete attention masking For data augmentation, we chose a simple-to-use procedure called TrivialAugment to increase by four times the amount of training data. Here we want an easy-to-use method that could help as a benchmark for the second method.</li> </ul> <p>For explicit inductive bias, we use a general vision transformer architecture which allow us the change the number of attention heads and layers where the mask would be applied, this mask is what explicitly induce a bias in the model by forcing some layers to only learn relationship between close patches of the data.</p> <p>Our goal with this comparison and the difference with previous works is that we want to experiment to which point one method could be better than the other by really compensating for the lack of information in the training of a vision transformer.</p> <p>Due to computational and time limitations, we would train our model in a simple task of image classification based on CINIC-10. We also use a tiny model to be able to iterate many times through different scenarios of inductive bias. The selection of methods also reinforces these limitations but are a good starting point as many of the projects that would be lacking in training data probably are in testing phases where light tools like Google Colab are used.</p> <h3 id="contribution">Contribution</h3> <p>The result from this project contributes in two ways. First, it gives us a glance of how beneficial the level of proposed inductive bias in the performance of the model could be, and second, it contrasts which method, and until which point, performs better given different scenarios of initial training data available.</p> <h2 id="related-work">Related Work</h2> <h3 id="data-augmentation">Data Augmentation</h3> <p>Data augmentation consists in applying certain transformations to the data in order to create new examples with the same semantic meaning as the original data. For images, data augmentation consists in spatial transformations like cropping, zooming or flipping. Although data augmentation is very popular among practitioners, previous works like <d-cite key="ref1"></d-cite> have proven that data augmentation by itself reaches a saturation point where it is even worse than training in the new data, decreasing the performance of the model. Given our goal of comparing data augmentation with inductive bias, we expect to get a similar result in the efficacy of data augmentation while we increase the initial amount of data.</p> <p>Data augmentation decisions can be thought because of the many options available to perform, but it is so popular that some researches are trying to make it more easy to use and computational-efficient, one example been TrivialAugment <d-cite key="ref2"></d-cite> method where simple random augmentation can be compared to or outperform other more complex algorithms that try to find the best augmentation for the given dataset. TrivialAugment would be the procedure used in this project given it simplicity.</p> <h3 id="changes-in-architecture">Changes in Architecture</h3> <p>To compensate the lack of training data for vision transformers, an interesting approach from <d-cite key="ref3"></d-cite> is to use instance discrimination techniques which adjust the loss function of the model to improve the representation of the datapoints getting high accuracy scores for datasets with only 2k samples. The model proposed is trained from scratch with few data, but its implementation and interpretability could be troublesome for small projects.</p> <p>Othe authors in <d-cite key="ref4"></d-cite> use a set of pre-trained models with complementary structures (Convolutional and Involutional) to help a lightweight visual transformer model called DeiT (Data-efficient image Transformer) increase its performance by getting a baseline result that is added as a token and works like an induced bias of the properties of the image. The scores from the pre-trained models give more information than the ground truth because it gives a value of likehood for every class, which is a result of the different attributes of the specific image.</p> <p>Although these changes demonstrate that it is possible to get better performance with few data without augmentation, it is not clear how we can adjust the inductive bias produced to identify until which point it works. The usage of pre-trained models is also not desirable here because of our premise that we could be using this experiment to make decisions in new datasets and tasks.</p> <h3 id="explicit-inductive-bias">Explicit Inductive Bias</h3> <p>The model proposed in <d-cite key="ref5"></d-cite> is a better example of real lack of training data overcome by introducing a different kind of attention heads. In this case, medical images tend to have the same orientation, property that is leveraged to force the attention heads to focus on axial information which normally represents the spread of tumors. Here the inductive bias is that the image has a structure where patches aligned in rows and columns are more related between them than diagonal ones.</p> <p>Following this path, <d-cite key="ref6"></d-cite>, <d-cite key="ref7"></d-cite> and <d-cite key="ref8"></d-cite> try to apply the local induced bias of convolutional networks in a transformer by different methods. <d-cite key="ref6"></d-cite> adds a new layer at the beginning of the model which acts like a local mask but with variable learnable attention levels, where the model figures out how much local attention it should apply given the proposed task. <d-cite key="ref7"></d-cite> on the other hand add new convolutional layers in parallel to the transformer to let them capture the local information while letting the original transformer to keep the big picture of the image. Finally, in <d-cite key="ref8"></d-cite> it is proposed a change in the initial attention layers, making them GPSA (Gated Positional Self-Attention) which learn for each patch if pay more attention to the attention product (Query * Key) or the position of the patch in the image.</p> <p>From these works it is stated that some layers of the transformer converge to convolutional behaviors given the nature of the data used for training, but this requires a relatively big amount of data that could not be available. It is also noticed that the inductive bias is applied to the first layers of the model.</p> <p>The model proposed in <d-cite key="ref9"></d-cite> uses a simpler method which consists in applying a mask pattern to some of the attention heads to induce local attention bias into the model. To decide which heads and layers should be masked, it uses a soft masking approach where the model learns a scale factor between 0 and 1 which sets the level of local inductive bias that is applied to that head. The results show that it is possible to obtain good results by using more local masking in the first layers and keeping the global interaction in the last ones. This approach is also model agnostic and easy to implement, which is why it is close to the experimentation of this project.</p> <p>The power of this masking method is also shown in <d-cite key="ref10"></d-cite> where the mask is learned by a parallel process of pixel-wise classification, successfully increasing the performance in more complex tasks like pixel-wise segmentation.</p> <h2 id="methods-and-experiment">Methods and Experiment</h2> <p>To explore and compare the benefits of data augmentation versus induced bias we are running three related experiments. All experiments would be run with CINIC-10 <d-cite key="ref11"></d-cite> dataset in Google Colab using a T4 GPU. We decided to use CINIC-10 instead of CIFAR-10 because even though it is a drop-in replacement of CIFAR-10, it is a much larger than CIFAR-10 so we can test on different number of base training samples but not so large like ImageNet that is too large/difficult to test.</p> <h3 id="experiment-1">Experiment 1</h3> <p>The goal of the first experiment is to get a glance of the overall differences in accuracy for the compared methods. The model used for this experiment consists of a basic visual transformer with six layers and linear positional embeddings. Each layer corresponds to a multiheaded attention layer with only two heads. The schematic of the model can be seen in figure 1.</p> <p>Figure 1</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_2_arch1.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>By default, the attention heads in the model are fully connected to give them a global behavior, but the model can be configured to apply a local pattern mask or a sparse pattern mask to all heads in all layers.</p> <p>Figure 2</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_1_mask.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The model would be trained with different scenarios of initial data, in specific, with 1000, 2000, 5000, 12500 and 20000 samples. In each scenario, we would get four different models:</p> <ul> <li>Baseline model: Without data augmentation and with default global attention</li> <li>Data augmentation: With data augmentation and default global attention</li> <li>Local attention: Without data augmentation and with local attention</li> <li>Sparse attention: Without data augmentation and with sparse attention</li> </ul> <p>The data augmentation technique would be TrivialAugment and the metric would be accuracy on validation dataset. We set these four models trying not to mix data augmentation with changes in the induced bias, keeping the default global attention in the transformer as our baseline.</p> <h3 id="experiment-2">Experiment 2</h3> <p>Having experimented with the differences where all layers have the same mask, we now set experiments to play with the level of induced bias applied to the model. The goal now is to identify a relation between the level of induced bias applied to the model and their performance. For this experiment we modify our first model in the following ways:</p> <ul> <li>We increase the number of attention heads in each layer from 2 to 4</li> <li>We set the final two layers to global attention, so the mask is not applied to them</li> <li>We configure each head in the first four layers to be able to be hard configured as either local or global attention.</li> </ul> <p>Figure 3</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_3_arch2.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>With this new model, we can create one instance for each combination of global/local head in any of the first four layers, generating a sense of “level of induced bias” based on the number and configuration of attention heads treated as local.</p> <p>Given computational limitations, we would set only two initial data scenarios (10000 and 50000) and get 16 models for each scenario:</p> <ul> <li>Baseline model: Without augmentation and with all global attention</li> <li>Data augmentation: With data augmentation and all global attention</li> <li>14 combinations of local heads and layers:</li> </ul> <p>Table 1</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_7_table.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We would analyze the differences in accuracy between different levels of induced bias in the same initial data scenario and see if we can get a selection of best performing inductive bias levels to apply them more broadly in the third experiment.</p> <p>With this comparison we also want to capture what are the visual differences between the attention heads in the different levels of induced bias to try to explain with is doing better or worse than the baseline.</p> <h3 id="experiment-3">Experiment 3</h3> <p>Our final experiment consists in comparing the accuracy and the effective additional data (EAD) that each method brings when applied to different initial amounts of data. The initial data scenarios to train the models would be 1000, 5000, 10000, 20000, and 50000 samples. The comparison would be made between the data augmentation model for each scenario, versus the top 3 levels of induced bias from experiment 2.</p> <p>The effective additional data (EAD) represents the extra amount of real data that the method is compensating, the higher the better to be considered as a successful method for solving lack of data problems. This metric is calculated by looking at which scenario of initial data would make the baseline model perform equal to the method analyzed.</p> <h2 id="results">Results</h2> <h3 id="experiment-1-1">Experiment 1</h3> <p>In our initial experiment, we compared performance on four variations of model scenarios. Our baseline model uses global attention mechanism, one uses local attention mechanism, another one uses sparse attention mechanism, and the last model uses the same global attention mechanism as the first model except that data augmentation is applied during its training process. One notable callout for our initial experiment is that we took a naïve approach and designed our local and sparse attention heads to be in all six attention layers of the attention. We trained and collected the validation accuracy and training time for each model variation for different number of base training samples from 1000 to 20000. Below are the results.</p> <h4 id="result-and-analysis">Result and Analysis</h4> <p>Figure 4</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_8_exp1table.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 5</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_9_exp1graph.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>There are a few notable observations to point out from the results. First, we can see that the two models using the local attention mechanism or sparse attention mechanism performed significantly worse than our baseline model that used global attention. Though we did expect this to happen since CINIC-10’s classification task intuitively requires a global context of the image, we did not foresee the performance difference to be so drastic. For example, when the base number of training data is 5000, we see that the baseline model achieves a validation accuracy of 62.5% while the local attention model achieves just 13.97% and the sparse attention model 42.64%. We observe a similar pattern across different levels of base samples. It’s also worth calling out that sparse attention models perform better than local attention models. This makes sense as sparse attention models still take into consideration the global context just not completely on all the patches. Nevertheless, the sparse attention model takes almost the amount of time to train as the baseline model, hence it does not make sense to use it in lieu of the baseline model in practice. On the flip side, we verify that data augmentation improves performance and is the most significant when number of base samples is small.</p> <h3 id="experiment-2-1">Experiment 2</h3> <p>Our first experiment showed that simply setting all attention layers to contain only local or sparse attention heads does not produce good performance. As we were exploring additional datasets or tasks where applying a different attention mechanism may yield better performance, we came across the paper in <d-cite key="ref9"></d-cite>, in which it alluded to the idea that only applying local attention mechanism to just a few beginning layers of the network may improve performance even when the task intuitively requires a global context. The rationale behind it is that perhaps through using local attention mechanism, the model can create its own convolution making understanding local information better before using that information to answer a task that requires global context. With this inspiration, we designed our second experiment trying out different combinations of local attention heads and layers, as described in the Methods and Experiments section, and below are the results and analysis.</p> <h4 id="result-and-analysis-1">Result and Analysis</h4> <p>Figure 6</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_10_exp2matrices.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Here, the two matrices outline the validation accuracies we got when we trained the different local attention mechanism model on 10k and 50k base training samples. A quick recap, 1 Local Head and 1 Layer means we would use 1 local attention head in the 1st layer of the transformer. The color gradient in each matrix indicates the best performing combination from best (red) to worst (green).</p> <p>A few patterns can be noticed. First, for both matrices, models in the bottom right corner, representing a high number of local heads and in more layers, are performing worse than the rest. This aligns with our intuition from our first experiment because having more local attention heads in deeper portions of network will prevent the models from capturing global context, thus resulting in a worse performance.</p> <p>Figure 7</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_4_map.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Diving further, in figure 7, we visualize the attention weights to better compare different levels of induced bias. It seems that the performance increases as we add more local heads, but it ends up fading and not capturing the important characteristics of the data. In the 50k samples scenario it can be noticed that with more local heads, the attention spots converge to small parts of the image where there is no information about the object in it.</p> <p>Figure 8</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_6_local_map.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Moreso, in figure 8, it can be noticed that when local heads are used, it identifies correctly smaller details of the image. In this case, with all heads being global, it is hard to identify the three different cows in the middle image, but when some local heads are used, we can capture them.</p> <p>In summary, the major result of this experiment is that some models in the 10k samples sub-experiment produced better results than the base model. This is promising and validates our hypothesis from before. Though no combination produced better results in the 50k samples sub-experiment, we showed in Figure 8 that having local attentions can still be beneficial as it is able to capture some details that the baseline model misses.</p> <h3 id="experiment-3-1">Experiment 3</h3> <p>From the second experiment, we were then intrigued to see how some of the better performing models do under different number of base samples than just 10k and 50k. So, we pick three combinations (2 local heads for 2 layers, 1 local head for 2 layers, 3 local heads for 1 layer) and tested their performance against the baseline model and baseline + data augmentation for different number of base training samples from 5000 to 50k. Below are the results and analysis.</p> <h4 id="result-and-analysis-2">Result and Analysis</h4> <p>Figure 9</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_11_exp3table.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 10</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_12_exp3graph.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Here, we can observe two very interesting trends. First it validates our hypothesis that using local attention heads early in the layers of the vision transformers can improve performance despite the fact that task intuitive requires global context. This outcome is true for all three variations of the local attention models when the number of base training samples are 1000, 5000, and 10000. However, this effect tapers off when the number of base samples is sufficiently large, and the baseline model performs better. This seems to suggest that the benefit of the inductive bias coming from the local heads no longer outweighs the lack of information of the dataset. In other words, once there is sufficient data, the baseline model has enough information to learn a better representation on its own than that of the models.</p> <p>Figure 11</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata-1400.webp"/> <img src="/staging/assets/img/2023-11-09-Structural_vs_Data_Inductive_Bias/figure_13_exp3extradata.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Another perhaps more explicit and comparable way of explaining the phenomenon is to look at the Effective Extra Sample score. Essentially, the data tells us how much extra (or less) training data the change in model architecture gets us to achieve the same performance accuracy if using the baseline model. This graph clearly illustrates that data augmentation and tuning of local attention heads are very effective when the training datasets are relatively small, less than 15000 samples. This is likely because the inductive bias of the local attention heads causes the models to capture important characteristics of the image more efficiently and effectively than does the baseline model. However, once the number of base training samples gets over 20000, the effect reverses and they all perform worse than the baseline model, as illustrated by the negative effective training samples.</p> <p>Note: We did not plot the extra effective data for the data augmentation model scenario pass 10000 base training samples as its performance dropped significantly and is behaving weirdly.</p> <h2 id="conclusion">Conclusion</h2> <p>Through different experimentations, both data augmentation and induced bias by discrete attention masking can compensate for the lack of data for a given problem, but this compensation is only noticeable when the initial data is very low.</p> <p>The maximum effective additional data that the data augmentation method creates is higher than the induced bias method, but there is a sweet spot where induced bias is better than both data augmentation and baseline model.</p> <p>Once the initial amount of data starts to increase, data augmentation is the first one that in fact worsens the performance of the model. Induced bias on the other hand looks more stable while the initial data is increasing but is still not significantly better than the baseline model.</p> <p>We have shown that induced bias can help identify local attributes of the image more easily than the baseline alone, but this is only leveraged when the task that we want to solve is more specific and cannot be appreciated in a general task like image classification.</p> <h3 id="limitations-and-next-steps">Limitations and Next Steps</h3> <p>Given the restricted resources and amount of time available to execute this project, there is enough room for continuing research on this topic:</p> <ul> <li>We tried to make the data augmentation and inductive bias methods simple and easy to play with, but they could not be the best ones. The same procedures of this project can be applied to better and more complex types of data augmentation and induced bias to see if the results are replicable in other situations.</li> <li>Further experimentation could be done with datasets with multiple tasks and a deeper model to see if the type of task has an impact of the effectiveness of one method or the other. This could also be applied in recent real-world problems where there is not enough data yet, but we can clearly identify the relying relationship between patches of the images.</li> <li>Given a deeper model and a lot more experimentation in the level of inductive bias, there is an opportunity to empirically try to make a regression between how much inductive bias is applied to the model vs the resulting change in performance. The results of this project are not enough to implement such relations.</li> </ul> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-Structural_vs_Data_Inductive_Bias.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Symmetry-Optimization/index.html b/blog/2023/Symmetry-Optimization/index.html
new file mode 100644
index 00000000..9a977659
--- /dev/null
+++ b/blog/2023/Symmetry-Optimization/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Investigating the Impact of Symmetric Optimization Algorithms on Learnability | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Recent theoretical papers in machine learning have raised concerns about the impact of symmetric optimization algorithms on learnability, citing hardness results from theoretical computer science. This project aims to empirically investigate and validate these theoretical claims by designing and conducting experiments as understanding the role of optimization algorithms in the learning process is crucial for advancing the field of machine learning."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Symmetry-Optimization/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Investigating the Impact of Symmetric Optimization Algorithms on Learnability",
+      "description": "Recent theoretical papers in machine learning have raised concerns about the impact of symmetric optimization algorithms on learnability, citing hardness results from theoretical computer science. This project aims to empirically investigate and validate these theoretical claims by designing and conducting experiments as understanding the role of optimization algorithms in the learning process is crucial for advancing the field of machine learning.",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Kartikesh Mishra",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Divya P Shyamal",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Investigating the Impact of Symmetric Optimization Algorithms on Learnability</h1> <p>Recent theoretical papers in machine learning have raised concerns about the impact of symmetric optimization algorithms on learnability, citing hardness results from theoretical computer science. This project aims to empirically investigate and validate these theoretical claims by designing and conducting experiments as understanding the role of optimization algorithms in the learning process is crucial for advancing the field of machine learning.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#experimental-design">Experimental design</a></div> <ul> <li><a href="#learning-tasks-and-datasets">Learning Tasks and Datasets</a></li> <li><a href="#learning-algorithms">Learning Algorithms</a></li> </ul><div><a href="#evaluation-metrics">Evaluation Metrics</a></div> </nav> </d-contents> <h2 id="introductions">Introductions</h2> <p>Neural networks have been a staple in Deep Learning due to their expressive power. While the architecture itself is very powerful, the process of \textit{optimizing} the neural network - i.e., finding the values of the parameters of the network that minimize the loss over training data - is approximate. After all, a neural network is a highly non-smooth function and is inherently difficult to optimize. The core idea of many of these methods is to approximate the neural network (i.e. via first or second-order approximations), which are then easier to optimize over.</p> <p>Our goal is to explore if incorporating “asymmetries” into optimization can help. Many methods use a similar update rule for all parameters in the network. We experiment with using different rules for different parameters, guided by heuristics.</p> <h2 id="motivation-a-couple-of-nice-papers">Motivation: a couple of nice papers</h2> <p>This project is motivated by a couple results, not necessarily in the context of neural networks. The first comes from a 2004 paper by Andrew Ng titled “Feature Selection, \(L_1\) vs. \(L_2\) regularization, and rotational invariance”. It concerns the sample complexity of feature selection - how much training data is necessary to fit the model to some accuracy with high probability - where the number of relevant features is small compared to the total number of features. The paper shows that the mode of regularization is of utmost importance to the sample complexity: the sample complexity using $L_2$ regularization is exponential compared to the sample complexity with $L_1$ regularization. One may ask: what does this have to do with symmetry? In the case of $L_2$ regularization, the classifier remains the same even when the training data is rotated (i.e. the data is pre-multiplied by a rotation matrix). More aptly, logistic regression with $L_2$ regularization is \textit{rotationally invariant}. This is not the case for $L_1$ regularization. For the precise statements, see the theorems from the paper below:</p> <h3 id="theorem-sample-complexity-with-l_1-regularized-logistic-regression">Theorem: Sample complexity with $L_1$-regularized logistic regression</h3> <p>Let any $\epsilon&gt;0, \delta&gt;0, C&gt;0, K\geq 1$ be given, and let $0&lt;\gamma&lt;1$ be a fixed constant. Suppose there exist $r$ indices $1\leq i_1, i_2,\ldots i_r\leq n$, and a parameter vector \(\theta^*\in\mathbb{R}^n\) such that only the $r$ corressponding components of $\theta^*$ are non-zero, and \(|\theta_{ij}|\leq K\) ($j=1,\ldots r$). Suppose further that \(C\geq rK\). Then, in order to guarantee that, with probability at least $1-\delta$, the parameters $\hat{\theta}$ output by our learning algorithm does nearly as well as \(\theta^*\), i.e., that \(\epsilon^l(\hat{\theta})\leq \epsilon^l(\theta^*)+\epsilon,\) it suffices that \(m=\Omega((\log n)\cdot \text{poly}(r, K, \log(1/\delta), 1/\epsilon, C)).\)</p> <h3 id="theorem-sample-complexity-for-rotationally-invariant-algorithms-including-l_2-regularized-logistic-regression">Theorem: Sample complexity for rotationally invariant algorithms (including $L_2$-regularized logistic regression)</h3> <p>Let $L$ be any rotationally invariant learning algorithm, and let any $0&lt;\epsilon&lt;1/8, 0&lt;\delta&lt;1/100$ be fixed. Then there exists a learning problem $\mathscr{D}$ so that: $(i)$ The labels are determinisitically related to the inputs according to $y=1$ if $x_1\geq t$, $y=0$ otherwise for some $t$, and $(ii)$ In order for $L$ to attain $\epsilon$ or lower $0/1$ misclassification error with probability at least $1-\delta$, it is necessary that the training set size be at least \(m=\Omega(n/\epsilon)\)</p> <p>While this example is nice and shows us how symmetry can be harmful, it concerns the symmetry of the algorithm disregarding optimization. A 2022 paper by Abbe and Adsera specializes the effects of symmetry to neural networks trained by gradient descent (more on this later). This paper uses a notion of symmetry called \textit{G-equivariance}. See the definition below:</p> <h3 id="definition-g-equivariance-a-randomized-algorithm-a-that-takes-in-a-data-distribution-mathcaldinmathcalpmathcalxtimesmathcaly-and-outputs-a-function-mathcalamathcald-mathcalxrightarrowmathcaly-is-said-to-be-g-equivariant-if-for-all-gin-g-mathcalamathcaldoversetdmathcalagmathcaldcirc-g">(Definition: $G-$equivariance) A randomized algorithm $A$ that takes in a data distribution $\mathcal{D}\in\mathcal{P}(\mathcal{X}\times\mathcal{Y})$ and outputs a function $\mathcal{A}(\mathcal{D}): \mathcal{X}\rightarrow\mathcal{Y}$ is said to be $G-$equivariant if for all $g\in G$ \(\mathcal{A}(\mathcal{D})\overset{d}{=}\mathcal{A}(g(\mathcal{D}))\circ g\)</h3> <p>Here $g$ is a group element that acts on the data space $\mathcal{X}$, and so is viewed as a function $g:\mathcal{X}\rightarrow\mathcal{X}$, and $g(\mathcal{D})$ is the distribution of ${g(\mathbf{x}),y}$ where $(\mathbf{x}, y)\sim\mathcal{D}$</p> <p>More simply, an algorithm is G-equivariant if the prediction function produced by the algorithm does not vary when the data distribution is transformed according to $G$ (i.e., a group element $g$ is applied to the data distribution). Note the algorithm includes optimizing parameters: an example of a G-equivariant algorithm is learning a fully-connected neural network via SGD with Gaussian initialization, which is equivariant with respect to orthogonal transformations. More generally, neural networks trained with SGD or noisy GD hold G-equivariance. The paper claims that G-equivariant algorithms are limitted in which functions they can learn. This is stated informally in the following theorem, where the G-alignment is a (rather complicated) measure of distance:</p> <h3 id="gd-lower-bound-informal-statement-limitations-of-g-equivariant-algorithms">GD lower bound, informal statement: Limitations of G-equivariant algorithms</h3> <p>Let \(\mathcal{D}_f\in\mathcal{P}(\mathcal{X}\times\mathbb{R})\) be the distribution of \((\mathbf{x}, f(\mathbf{x}))\) for \(\mathbf{x}\sim \mu_\mathcal{X}\). If \(\mu_\mathcal{X}\) is \(G-\)invariant and the \(G-\)alignment of \((\mu_\mathcal{X},f)\) is small, then \(f\) cannot be efficiently learned by a $G-$equivariant GD algorithm.</p> <p>We refer readers interested in further details and the proof of the theorem to the paper. The paper is quite nice and we encourage readers interested in theory to take a look at it. All in all, the paper suggests training neural networks with SGD is not necessarily the way to go. Therefore, we consider variants of GD that prove to perform better in practice. We first introduce gradient descent and a popular variant: Adam.</p> <h2 id="overview-of-existing-optimization-algorithms">Overview of existing optimization algorithms</h2> <h3 id="gradient-descent">Gradient Descent</h3> <p>The most widely-used optimization algorithms are some version of \textit{gradient descent}. Gradient descent iteratively updates the parameter values, moving the parameter in the direction of steepest descent (given by the negative of the gradient of the loss with respect to the parameter). Essentially, gradient descent uses a first-order approximation The amount by which the parameter is moved in this direction is referred to as \textit{learning rate} or step size, typically denoted by $\eta$. The update rule is given by \(\theta^{t+1}= \theta^t - \eta_t\nabla_{\theta}\mathscr{L}_{\mathscr{D}}(\theta^t)\) where the subscript on $\eta$ indicates a learning rate that can be changed over time. Common strategies for varying $\eta$ over time consist of decaying $\eta$, whether it be a linear or exponential decay (or something in between). In practice, \textit{stochastic} gradient descent (SGD) is used. In SGD, instead of computing the gradient for each datapoint, the gradient is approximating by taking the average of the gradients at a subset (i.e. batch) of the data. A variation of gradient descent incorporates the concept of momentum. With momentum, the increment to the parameter is a constant \(\mu\), the momentum parameter, times the previous increment, plus the update we saw in GD: \(\eta_t\nabla_{\theta}\mathscr{L}_{\mathscr{D}}(\theta^t)\). In other words, the increment is a weighted average of the previous increment and the typical GD update. Too high of a momentum can lead to overshooting the minimizer, analogous to how too high of a learning rate in GD can lead to divergence.</p> <h3 id="adam">Adam</h3> <p>The most popular optimizer in practice is called Adam, which performs well compared to . Adam is a gradient-based method which uses the gradient as well as the squared gradient (computed from batches), as well as an exponential decay scheme, to iteratively update $\theta$. It estimates the first and second moments of the gradient from the batch computations, and uses these estimates in its update rule. Adam requires three parameters: the learning rate, and one each for the rate of exponential decays of the moment estimates of the gradients. Adam consistently outperforms standard SGD. The optimization we present is based upon Adam, with a few modifications.</p> <p>We briefly note that these methods are \textit{first-order methods}: they only consider first derivatives, i.e. the gradient. Second-order methods, such as Newton’s method, should theoretically be better because the approximation of the function will be better. However, the computation of the Hessian is rather cumbersome in neural networks, which is why they are not typically used.</p> <h3 id="automatic-gradient-descent">Automatic Gradient Descent</h3> <p>Another method we consider is Automatic Gradient Descent (AGD), which is developed in recent literature (co-authored by our very own instructor, Jeremy Bernstein!). This paper attempts to get rid of the pesky hyperparameter-tuning stage that is involved in training neural networks, leading to \textit{hyperparameter transfer}. In practice, a variety of learning rates is tested during training. In addition, this learning rate may not “transfer” across architectures: if one were to make their neural network wider or deeper, they would most likely have to search for the optimal learning rate once again. Automatic Gradient Descent attempts to solve this problem by coming up with an update that is architecture-independent in the realm of MLPs. AGD operates by computing an upperbound for the loss after the update (i.e. $\mathscr{L}(\mathbf{w}+\Delta\mathbf{w})$, where $\mathbf{w}$ is the parameter we are optimizing), then optimizing this upperbound in $\Delta\mathbf{w}$ to find the best step size. This step size is then used to update the parameter, and is recalculated at each iteration. The algorithm uses spectrally-normalized weight matrices, which allows for a nice upperbound for the loss function allowing for the optimal choice of $\eta$ to be solved for (in particular, it allows for matrix inequalities involving matrix norms to be used). The algorithm is given in full below:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/agd-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/agd-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/agd-1400.webp"/> <img src="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/agd.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We include AGD in this discussion because it is an asymmetric algorithm: the weights are normalized in a layer-dependent fashion. In addition, it takes a stab at alleviating the annoying task of hyperparameter tuning. We see in practice, however, that it does not perform as well as Adam. This is presumably because the approximation of the loss function via upperbounding with matrix inequalities is not tight, or maybe because the model does not incorporate biases as presented in the paper.<br/> We now begin discussion of our method, which has been crafted after studying these existing methods and taking into account the potential disbenefits of asymmetry.</p> <h3 id="extension-of-agd-to-regularized-losses">Extension of AGD to regularized losses</h3> <p>We found the idea of AGD to be very nice, and in an attempt to understand it better, decided to explore one of the further directions listed in the paper: applying the method to regularized losses. The work in the paper applies to losses of the form $\frac{1}{N}\sum_{(x, y)}l(f_w(x), y)$. However, a more general loss includes a regularization term: \(\mathcal{L}(w)=\frac{1}{N}\sum_{(x, y)}l(f_w(x), y)+\lambda R(w)\) where $R(w)$ is a regularization term. For our purposes, we assume $l$ to be the squared-loss and $R(w)$ to be the $L_2$ norm of $w$. We shorthand $\frac{1}{N}\sum_{(x, y)}l(f_w(x), y)$ to $\hat{l}$. Below, we derive the learning rate, in the context of AGD (i.e. with the spectrally normalized weights and same form of update), for this regularized loss:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/reg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/reg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/reg-1400.webp"/> <img src="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/reg.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We have omitted a lot of intermediary steps involving matrix inequalities and derivatives - see the paper on AGD if you are interested in the details! We remark that this choise of $\eta$ depends on $\lambda$, so hyperparameter tuning is still necessary. Some dependence on the architecture shows up in $\eta$, namely $\Tilde{d}$. However, as the network scales this parameter can stay constant. We are interested in how this will perform in practice - check the blog for updates on this!</p> <h2 id="introducing-asymmetric-nature">Introducing Asymmetric Nature</h2> <p>Our initial experiment involved a two-layer neural network (width: 1024) trained on the MNIST Dataset using three distinct learning algorithms: i) AGD (gain = 1), ii) Default Adam, and iii) Adam with diverse hyperparameters for both layers. The graph below showcases the resulting loss and accuracy. The first graph showcase loss while the second one showcase accuracy.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist-1400.webp"/> <img src="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/lossmnist.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist-1400.webp"/> <img src="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/accmnist.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Given MNIST’s high accuracy even with minimal epochs, the distinction wasn’t apparent. Notably, while the asymmetric algorithm matched or outperformed default Adam, fine-tuning Adam’s hyperparameters yielded superior performance.</p> <p>Inspired by AGD’s removal of the learning rate hyperparameter, we crafted two AGD variations for comparison with SGD and the original AGD.</p> <h3 id="variation-1">Variation 1</h3> <p>This variation incorporated momentum into AGD, integrating AGD’s learning rate and gradient summary with momentum’s past and current gradients. Surprisingly, this had minimal impact, indicating the optimality of gradient summary and learning rate.</p> <h3 id="variation-2">Variation 2</h3> <p>Here, instead of typical momentum, we introduced layer-wise asymmetry, acknowledging each layer’s varying impact on loss. Adjusting each layer’s learning rate inversely proportional to its number resulted in notable performance differences!</p> <p>Results from training under these algorithms using the cifar-10 Dataset and MSE Loss are depicted in the subsequent diagram.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar-1400.webp"/> <img src="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/losscifar.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar-1400.webp"/> <img src="/staging/assets/img/2023-11-01-Symmetry-Optimization-project/acccifar.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="evaluation-metrics">Evaluation Metrics</h3> <p>Emphasizing learnability, we adopt the ordering concept over exact measures. Algorithm $A_1$ is deemed superior to $A_2$ if its expected learning ability (distinguishing correct/incorrect classifications) surpasses $A_2$. This learning ability, resembling a Beta distribution, hinges on directly propotional to current accuracy. Therefore, we made our evaluation on accuracy and loss graph over epochs.</p> <h2 id="conclusion">Conclusion</h2> <p>Our blog offers insights into optimizing neural networks and advocates for the potential benefits of asymmetry in training processes. We trust you found our journey as engaging as we did in developing it!</p> <h2 id="citations">Citations</h2> <p>Ng, Andrew Y. ”Feature selection, L 1 vs. L 2 regularization, and rotational invariance.” Proceedings of the twenty-first international conference on Machine learning. 2004.</p> <p>Bernstein, Jeremy, et al. ”Automatic Gradient Descent: Deep Learning without Hyperparameters.” arXiv preprint arXiv:2304.05187 (2023).</p> <p>Bernstein, Jeremy, et al. ”Automatic Gradient Descent: Deep Learning without Hyperparameters.” arXiv preprint arXiv:2304.05187 (2023).</p> <p>Kingma, Diederik P., and Jimmy Ba. ”Adam: A method for stochastic optimization.” arXiv preprint arXiv:1412.6980 (2014).</p> <p>Philipp, George, Dawn Song, and Jaime G. Carbonell. ”The exploding gradient problem demystified- definition, prevalence, impact, origin, tradeoffs, and solutions.” arXiv preprint arXiv:1712.05577 (2017).</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-01-Symmetry-Optimization.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Tracking-Multiple-Objects/index.html b/blog/2023/Tracking-Multiple-Objects/index.html
new file mode 100644
index 00000000..89fb6132
--- /dev/null
+++ b/blog/2023/Tracking-Multiple-Objects/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Diffusion Models on Low-Brightness Images | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Diffusion models have been used with great success for a number of use cases, but they still remain largely unused on dim images. The primary related work has been on using a diffusion model for low-light image enhancement. However, most of these works agree that attempting to generate an image from noise generated on top of an already dim image often results in rgb shift and global degradation of the image. This is because a diffusion model adds noise to the given image and then attempts to denoise the image, so given a dim and low-contrast image, the model has a difficult time denoising. This blog post focuses on methods to improve diffusion model performance in low-light images"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Tracking-Multiple-Objects/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Diffusion Models on Low-Brightness Images",
+      "description": "Diffusion models have been used with great success for a number of use cases, but they still remain largely unused on dim images. The primary related work has been on using a diffusion model for low-light image enhancement. However, most of these works agree that attempting to generate an image from noise generated on top of an already dim image often results in rgb shift and global degradation of the image. This is because a diffusion model adds noise to the given image and then attempts to denoise the image, so given a dim and low-contrast image, the model has a difficult time denoising. This blog post focuses on methods to improve diffusion model performance in low-light images",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Ishita Goluguri",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Diffusion Models on Low-Brightness Images</h1> <p>Diffusion models have been used with great success for a number of use cases, but they still remain largely unused on dim images. The primary related work has been on using a diffusion model for low-light image enhancement. However, most of these works agree that attempting to generate an image from noise generated on top of an already dim image often results in rgb shift and global degradation of the image. This is because a diffusion model adds noise to the given image and then attempts to denoise the image, so given a dim and low-contrast image, the model has a difficult time denoising. This blog post focuses on methods to improve diffusion model performance in low-light images</p> </d-title> <d-byline></d-byline> <d-article> <h2 id="introduction">Introduction</h2> <p>Since the introduction of ChatGPT, everyone seems to be speaking about “generative AI,” with almost 15x more google searches for generative AI now than at this time last year. This blog post focuses a specific use case for diffusion models, which have applications across the board, from generating images given keywords to planning trajectories for robot manipulation. In short, diffusion models are a family of probabilistic generative models that progressively destruct data by injecting noise, then learn to reverse this process for sample generation.</p> <p><img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/diffmodeloverview.png" alt="Alt Text" style="width:90%"/> <em>Figure 1.1. How a diffusion model iteratively transforms noise to generate an image</em></p> <p>Diffusion models have been used with great success for a number of use cases, but they still remain largely unused on dim images. The primary related work has been on using a diffusion model for low-light image enhancement. However, most of these works agree that attempting to generate an image from noise generated on top of an already dim image often results in rgb shift and global degradation of the image <d-cite key="zhou2023pyramid"></d-cite> This is because a diffusion model adds noise to the given image and then attempts to denoise the image, so given a dim and low-contrast image, the model has a difficult time denoising.</p> <p>For a visual example of why low-light scenarios can be a problem for diffusion models, we can just look at the control of our experiments. The left image is from the diffusion model trained and evaluated on low-light images, while the right image is from the diffusion model trained and evaluated on normal-light images.</p> <p><img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim.png" alt="Alt Text" style="width:45%; margin-right:3%"/> <img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright.png" alt="Alt Text" style="width:45%"/></p> <p>We can observe all sorts of problems here, from the model being unable to determine the image background color to the model sometimes not even showing the butterfly. In contrast, the exact same training done on the normal butterfly dataset shows distortions occasionally, but has no issues determining the background color or the contrast between the butterfly and the background. This illustrates the issue talked about previously of rgb shift and global degradation. In this blog, we aim to conduct experiments by adding different features to the DDPM scheduler and investigate which can actually make a difference for low-light scenarios.</p> <h2 id="background">Background</h2> <p>First, we discuss the idea of a diffusion model in more depth. In a nutshell, a diffusion model relies on semi-supervised training. The model is given an image from a training set to which random noise has been applied \(t\) times. This noisy image is given to the model along with the value of \(t\), a loss is computed between the output of the model and the noised image. The random noise is applied with a noise scheduler, which takes a batch of images from the training set, a batch of random noise, and the timesteps for each image. The overall training objective of the model is to be able to predict the noise added through the scheduler to retrieve the initial image.</p> <p>Since diffusion models on dim images are relatively unstudied, this blog post focuses on taking a well-known diffusion model for regular images and making modifications to the scheduler, which controls the noising and denoising process, and the model architecture to improve its performance in low-light scenarios. We begin with the DDPM (Denoising Diffusion Probabilistic Models) model <d-cite key="ho2020denoising"></d-cite>. There are a number of viable diffusion models to study, but one of the major benefits of DDPM is that it balances image fidelity in generation and speed. Several other major models such as DDIM and PNDM can be much worse at capturing fine details even though they have some speed and computation advantage <d-cite key="andrew2023diffusion"></d-cite>. This offers some intuition as to why DDPM was chosen to work with for this project.</p> <p>A DDPM uses two Markov chains for its denoising and noising process: one to perturb the data to noise, and another one to convert the noise back into data<d-cite key="10.1145/3626235"></d-cite> The noising and denoising process uses a unique scheduler that we refer to as the DDPMScheduler. Mathematically, we can refer to the initial distribution as \(q(x_0)\) and show that following \(x_t\) are computed as \(q(x_t \vert x_{t-1}) = \mathbb{N}(x_t; \sqrt{1-\beta_t}x_{t-1},\beta_tI).\) This is referred to as a Gaussian transition kernel, and shows that consecutive noised images are generated by taking the previous image and sampling more noise into it using a Gaussian perturbation. Of particular note are the \(\beta_t\) terms, since those vary from timestep to timestep and we discuss later how those should be modified. In the specific ddpm scheduler implementation, the \(\beta_{\text{start}}\) and \(\beta_{\text{end}}\) are specified, so the \(\beta\) in the Gaussian perturbation, which specifies how much noise should be added at any time, is gradually increased. <d-cite key="von-platen-etal-2022-diffusers">&lt;/dcite&gt;</d-cite></p> <p>The specified beta values can have many consequences on the model overall, but one is more aggressive denoising which can combat rgb shift. This is because rgb shift can cause color inconsistencies between adjacent pixels, which can be combated by greater noise reduction. In addition, aggressive denoising may be able to recover the underlying structure of the image and smooth out artifacts introduced by rgb shift. However, aggressive denoising can result in a loss of detail as well</p> <p>By integrating the previous noise during the noising step to determine \(q(x_T)\) we can get \(q(x_T) = \int q(x_T \vert x_0)q(x_0)dx_0 \sim \mathbb{N}(x_t; 0, I)\), showing that after all the noise is integrated, the entire structure of the image is lost. After the denoising, DDPMs start generating new samples by generating a noise vector from the prior distribution \(p(x_T = \mathbb{N}(x_T; 0, I)),\) and gradually removing noise by running a Markov chain in the reverse. The goal is to learn the transition kernel between timesteps. The reverse transition can be written as \(p_{\theta}(x_{t-1} \vert x_t) = \mathbb{N}(x_{t-1}; \mu_{\theta}(x_t, t), \sigma_{\theta}(x_t, t))\) where \(\theta\) is the model’s parameters and the mean and variance are parametrized by neural networks<d-cite key="andrew2023diffusion"></d-cite>.</p> <p>This variance will also come into play later, as it is one of the parameters that we toggle in the DDPM scheduler. Variance in the DDPM Scheduler of the Diffuser library has several possible values: fixed_small, fixed_small_log, fixed_large, fixed_large_log <d-cite key="schedulerdocumentation"></d-cite>.</p> <table> <thead> <tr> <th>variance_type</th> <th>effect</th> </tr> </thead> <tbody> <tr> <td>“fixed_small”</td> <td>The variance is a small and fixed value</td> </tr> <tr> <td>“fixed_small_log”</td> <td>The variance is small and fixed in the log space</td> </tr> <tr> <td>“fixed_large”</td> <td>The variance is a large and fixed value</td> </tr> <tr> <td>“fixed_large_log”</td> <td>The variance is large and fixed in the log space</td> </tr> </tbody> </table> <h2 id="methods">Methods</h2> <p>The first method evaluated as a control is simply an implementation of a DDPM using the Diffusers library <d-cite key="von-platen-etal-2022-diffusers"></d-cite> on HuggingFace. This model was trained using the Smithsonian butterflies dataset of 1000 images hosted on HuggingFace <d-cite key="huggan_dataset"></d-cite>. Initially, since the images in the dataset are all different, the dataset was resized to have all images conform to a square resolution, randomly flipped to augment the dataset, and normalized into the [-1,1] range. To generate “dim” images, the regular dataset was reduced in brightness by 50% using the Transforms library from HuggingFace.</p> <p><img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/orig.png" alt="Alt Text" style="width:90%"/></p> <p align="center"> <em>Figure 3.1. Original images from dataset</em> </p> <p><img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/new.png" alt="Alt Text" style="width:90%"/></p> <p align="center"> <em>Figure 3.2. Images after preprocessing</em> </p> <p>Next, noise is added to the images. For this, we use the DDPMScheduler with the default parameters from Diffusers. The model is then trained on the noisy image, and evaluated. For evaluation, the model is tested on sixteen different images previously sampled randomly from the training dataset and set aside as test images. These images are noised using the scheduler in the same way as the rest of the images, and the model is run on the noised images to retrieve the original images.</p> <table> <thead> <tr> <th>Control Parameters</th> <th> </th> </tr> </thead> <tbody> <tr> <td>noise_timesteps</td> <td>50</td> </tr> <tr> <td>num_epochs</td> <td>50</td> </tr> <tr> <td>beta_start</td> <td>0.0001</td> </tr> <tr> <td>beta_max</td> <td>0.02</td> </tr> <tr> <td>variance_type</td> <td>“fixed_large”</td> </tr> <tr> <td>resnet layers per unet block</td> <td>2</td> </tr> </tbody> </table> <p><em>Figure 4.1. Showing default parameters used in the diffusion model </em></p> <p align="center"> <img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/dlmodelarch.png" alt="Alt Text" style="width:90%"/> <em>Figure 4.2. Figure depicting the UNet architecture used in the model</em> </p> <p>Initially, a quantitative method of evaluation was considered, and some losses were computed between the test images before noising and the corresponding test results after denoising. While these measurements were computed, they didn’t seem as valuable as simply looking at the image because of the various patterns between images that a loss function cannot always capture (ie how similar is the butterfly and the pattern of the butterfly to the initial image). As an example, the image on the left receives a lower mean squared error loss than the image on the right, yet looking at them, it is apparent that the denoised version on the right is better. Thus, the evaluation here mostly presents the model outputs for us to qualitatively compare across different variations.</p> <p align="center"> <img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/mseexample.png" alt="Alt Text" style="width:=30%"/> <em>Figure 4.3. Showing two outputs of different models given the same input. MSE Loss proved to be unreliable for this task as the loss of the left image compared to the control was less than the loss of the right image due to rgb shift</em> </p> <p>After the control, this process is repeated for a variety of parameters carefully chosen and model architecture modifications to evaluate the best variation for use in this low-light scenario.</p> <h2 id="resultsdiscussion">Results/Discussion</h2> <p>The results of the control are as seen in the introduction above. The result of the dim images is on the left, while the result of the brighter images is on the right.</p> <p><img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controldim.png" alt="Alt Text" style="width:45%; margin-right:3%"/> <img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright.png" alt="Alt Text" style="width:45%"/> <em>Figure 5.1. The left shows the output of the control model trained on the dim images and the right shows it trained on the bright images</em></p> <p>One of the most pressing problems seen on the dimmer images is the rgb shift. As discussed in the background, the variance, which partly controls how aggressively the model is denoised, can help with rgb shift because it larger denoising can retrieve details lost in noise. Thus, the first modification is changing the variance type from “fixed_small” to “fixed_large.” This modification, after training, resulted in the evaluation images below.</p> <p align="center"> <img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/var_result.png" alt="Alt Text" style="width:65%"/> </p> <p align="center"> <em>Figure 5.2. Result of evaluation after changing variance</em> </p> <p>As we can see, this helped greatly with the rgb shift issue, and eliminated the background discoloration for several of the images. Certain images, such as the second row on the left-most column and the third from the left on the bottom row also show huge detail improvements. For the reasons discussed earlier, this is expected as a result of larger denoising, since it can clear away ome artifacts. The only image that showed a decrease in quality after the variance change was the right-most image in the top row.</p> <p>Now that some of the rgb shift has been resolved, we move to tackling the loss of detail in many of these evaluation images. One classic approach to loss of information is simply increasing the capacity of the model to learn. In more technical terms, by increasing the number of ResNet layers per UNet block, we may allow the model to capture more intricate features and details. Deeper layers can learn hierarchical representations, potentially improving the ability to encapsulate fine-grained information. To do this, we edit our model architecture to make each UNet block deeper.</p> <p><img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/best_result.png" alt="Alt Text" style="width:45%; margin-right:3%"/> <img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/controlbright.png" alt="Alt Text" style="width:45%"/> <em>Figure 5.3. The left image shows the output of the new change in model architecture on the dimmed dataset, while the right image shows the bright dataset control output for color comparison</em></p> <p>A huge improvement can be seen just by deepening the model architecture and at least the outline of every butterfly is now visible. However, this still hasn’t solved the problem of rgb shift. As we can see, the butterflies in the denoised dim images are all skewed yellow, while the butterflies in the denoised control bright images are all of varying colors. Next, we try to train with various betas in the scheduler to tackle this issue. As discussed before, higher beta values can help with rgb shift. However, higher values can also lead to loss of detail. The beta_start for the control was 0.0001 and the beta_max was 0.02. Thus, we try two combinations of start and max: 0.001 and 0.01, and 0.0005 and 0.015.</p> <p><img src="/staging/assets/img/2023-11-11-Tracking-Multiple-Objects.md/beta_output.png" alt="Alt Text" style="width:90%"/></p> <p align="center"> <em>Figure 5.4. The left figure shows the output for beta start = 0.001 and beta end = 0.01, and the right figure shows the output for beta start = 0.0005 and beta end = 0.15</em> </p> <p>As seen above, this modification was unsuccessful, and the images have much less detail than before and the rgb shift is worse than before. This may be because the biggest issue is the distortion of colors and blurring, and thus, a high beta value and larger denoising is needed to fix these issues rather than smaller denoising as was previously hypothesized. This future modification is not analyzed in this project, but would be interesting to see in the future.</p> <h2 id="future-directions">Future Directions</h2> <p>There are several limitations and future directions worth discussing. For one, this project investigates a specific model, the DDPM model. The DDPM model was chosen for various reasons, but mostly because it draws a balance between detail and also efficiency. In the future, multiple models could be considered to figure out which is really best for image generation under low-light scenarios. In addition, this work only focuses on one dataset of butterflies, and generates “low-light” data by reducing the brightness of the original dataset. This is good evidence for the success of the methods presented, but additional datasets and real data taken from environments with low-light would have lended more evidence to the success of the methods. In addition, the amount of data and depth of the models used had to be limited used to gpu usage limits. A model trained for more epochs with data may work better than this one. In addition, a good future starting point for this work would be to work with the beta start and beta max to figure out how to improve the rgb shift, which I believe would help with the detail in the dim images.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-11-Tracking-Multiple-Objects.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/TransformersAndRNNs/index.html b/blog/2023/TransformersAndRNNs/index.html
new file mode 100644
index 00000000..9f0ec7c6
--- /dev/null
+++ b/blog/2023/TransformersAndRNNs/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Transformers vs. RNNs: How do findings from real-world datasets relate to the theory? | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Transformers have rapidly surpassed RNNs in popularity due to their efficiency via parallel computing without sacrificing accuracy. Transformers are seemingly able to perform better than RNNs on memory based tasks without keeping track of that recurrence. This leads researchers to wonder -- why? To contriubte towards answering that question, I'll analyze the performance of transformer and RNN based models on datasets in real-world applications. Serving as a bridge between applications and theory-based work, this will hopefully enable future developers to better decide which architecture to use in practice."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/TransformersAndRNNs/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Transformers vs. RNNs: How do findings from real-world datasets relate to the theory?",
+      "description": "Transformers have rapidly surpassed RNNs in popularity due to their efficiency via parallel computing without sacrificing accuracy. Transformers are seemingly able to perform better than RNNs on memory based tasks without keeping track of that recurrence. This leads researchers to wonder -- why? To contriubte towards answering that question, I'll analyze the performance of transformer and RNN based models on datasets in real-world applications. Serving as a bridge between applications and theory-based work, this will hopefully enable future developers to better decide which architecture to use in practice.",
+      "published": "December 1, 2023",
+      "authors": [
+        {
+          "author": "Cassandra Parent",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Massachusetts Institute of Technology",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Transformers vs. RNNs: How do findings from real-world datasets relate to the theory?</h1> <p>Transformers have rapidly surpassed RNNs in popularity due to their efficiency via parallel computing without sacrificing accuracy. Transformers are seemingly able to perform better than RNNs on memory based tasks without keeping track of that recurrence. This leads researchers to wonder -- why? To contriubte towards answering that question, I'll analyze the performance of transformer and RNN based models on datasets in real-world applications. Serving as a bridge between applications and theory-based work, this will hopefully enable future developers to better decide which architecture to use in practice.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction-motivation">Introduction & Motivation</a></div> <div><a href="#background-prior-work">Background & Prior Work</a></div> <div><a href="#methods-results">Methods & Results</a></div> <div><a href="#limitations">Limitations</a></div> <div><a href="#conclusions">Conclusions</a></div> </nav> </d-contents> <h2 id="introduction--motivation">Introduction &amp; Motivation</h2> <p>Since their invention, transformers have quickly surpassed RNNs in popularity due to their efficiency via parallel computing <d-cite key="qin2023hierarchically"></d-cite>. They do this without sacrificing, and often improving, model accuracy. Liu et al has developed a theorhetical explanation for this by mathematically proving that transformers learn shortcuts to automata that RNNs are unable to take <d-cite key="liu2023transformers"></d-cite>. However, the results of this proof were only tested on synthetic dataset, and the question of how Transformers perform better than RNNs on memory-based tasks without keeping track of recurrence is still relevant, particularly for developers <d-cite key="liu2023transformers"></d-cite>. In this project, I analyzed and compared the performance of transformer and RNN based models on both a financial stock and medical ECG dataset. By practically testing the applications of RNNs and Transformers in two different settings, I aim to aid developers by suggesting considerations for them to have while choosing an architecture to work with.</p> <h2 id="background--prior-work">Background &amp; Prior Work</h2> <p>Recurrent neural networks (RNN) are a type of neural network that were previously considered state-of-the-art for generating predictions on sequential data including speech, financial data, and video <d-cite key="Kanagachidambaresan2021"></d-cite>. RNNs are distinct from other types of neural networks beecause they had an internal “memory” <d-cite key="mhaskar2016learning"></d-cite>. This memory was based on the netowrk not only taking in the current input into its function but also information learned from all of the previous inputs <d-cite key="Kanagachidambaresan2021"></d-cite>.</p> <p>However, since Transformers were invented in 2017, they have rapidly made the use RNNs obsolete <d-cite key="qin2023hierarchically"></d-cite>. Transformers were proposed as a simple network architecture based only on an attention mechanism, without the need for recurrence or convolutions <d-cite key="NIPS2017_3f5ee243"></d-cite>. While they are quadratic in time complexity <d-cite key="keles2022computational"></d-cite>, as opposed to RNNs’ linear complexity, their ability to parallelize makes them significantly faster to train <d-cite key="NIPS2017_3f5ee243"></d-cite> than RNNs. Their popularity is broad, and Transformers are considered to be the gold standard in many fields including natural language processing, computer vision, and signal processing <d-cite key="islam2023comprehensive"></d-cite>.</p> <p>While Transformers were accepted to perform better, the question remained – why? Transformers do not keep track of recurrence but are somehow able to successfully complete memory-based tasks. Liu et al aimed to answer this question by exploring how transformers learn shortcuts to automata <d-cite key="liu2023transformers"></d-cite>. They did this both by mathematical proof and also through experimentation on synthetic data sets. Their primary conclusion is that transformers are able to universally approximate these complex functions in few layers by building simple parallel circuits. This leads to improvement in computational efficiency and also performance improvements <d-cite key="liu2023transformers"></d-cite>.</p> <p>Current research in the RNN space is largely focused on trying to leverage their inherently linear complexity to its advantage <d-cite key="peng2023rwkv"></d-cite>, but I wondered if there were current applications where RNNs might already perform similarly or better. There are few papers in this space, and the ones that exist largely focus on a domain-specific performance mechanism <d-cite key="10.3389/fnbot.2023.1157957"></d-cite>. With that, I wanted to characterize performance of RNNs versus Transformers across a range of metrics: including accuracy, training time, memory, and parameters. This project acts as an extension of Liu et al by looking at real-world datasets from different applications and seeing if the conclusions change in the real-world. This aims to fill a gap between practical applications and mathematical explanations by attempting to provide an intuitive understanding of real-world dataset performance between transformers and RNNs. This would provide a holsitic summary of the tradeoffs between RNNs and Transformers while aiming to provide insight as to why Transformers remain so dominant in the space.</p> <h2 id="methods--results">Methods &amp; Results</h2> <h3 id="data">Data</h3> <p>The first dataset I used was Yahoo Finance’s stock dataset, accessible through the yfinance API. I specifically looked at the closing price data from the S&amp;P500 stock group which represents the stocks from the 500 largest companies. The second dataset I used was from Kaggle (available <a href="https://www.kaggle.com/datasets/shayanfazeli/heartbeat/">here</a>). This dataset captures ECG data. I specifically used the abnormal and normal sub datasets that contained single-heart beat single-lead ECG data.</p> <h3 id="software">Software</h3> <p>I ran all of the code for this project using Python 3.10 in Google Colab. The APIs numpy, scipy, matplotlib, seaborn, keras, tensorflow, and yfinance were all used. The notebook used for the stock experiements is available <a href="https://colab.research.google.com/drive/1NNZKU18bm7QsZaWOjX8EYD7NFoJK0jkQ#scrollTo=YQfvmYtVU3Yg">here</a> and the ECG experiments <a href="https://colab.research.google.com/drive/1yBfvgPEaUau6ttJV9ufdkgVm6x9y-AI9#scrollTo=MsfkYwwCS6WC">here</a>.</p> <h3 id="stock-model-comparisons">Stock Model Comparisons</h3> <p>I began my experiments by loading and visualizing the data. I wanted to compare the transformer and RNN models on a time-series prediction so I decided to use 11 months of data to predict the next 1 month behavior. To do this, I loaded data from July 1st, 2022 to July 31st 2022. Of note, the stock market is closed during weekends and holidays, so there were 251 days in my dataframe, and I trained on the first 231 days to predict the last 20. I then used an 80/20 train and test split.</p> <p>I also visualized several iterations of ten random samples to better understand the dataset and ensure that I was preprocessing correctly.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_pre_norm.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The closing price of 10 random stocks from the S&amp;P 500 over a year timeframe. </div> <p>Once I had the data set up, I began to build each model. In addition to a simple RNN architecture and a Transformer model, I also built an LSTM model which is a specialized subset of RNNs that aim to solve a vanishing gradient problem in traditional RNNs <d-cite key="Sherstinsky_2020"></d-cite>. In addition to providing another reference model, I decided to include LSTM because I could easily test identical architectures between LSTM and RNNs <d-cite key="lstmseries"></d-cite>. Additionally, LSTMs are notoriously slower to train and this provides a sanity check for my later timing results <d-cite key="lstmseries"></d-cite>.</p> <p>In building the models, I tried to keep them all as simple and equivalent as possible for a fair comparison. This was simple for the LSTM and RNN, I just used two LSTM (or RNN) layers followed by a linear layer and then an output linear layer. Because of the different architecture of transformers, it didn’t seem possible to create a completely equivalent architecture. However, I tried to approximate this by having just a singular attention layer that didn’t have a feed foward network component and only had a standard layer normalization and then a multiheaded attention wiht 2 heads (the same number of layers for RNN/LSTM with the head size equivalent to the RNN/LSTM layer size). I followed this with a pooling layer, a linear layer (with the same size as the RNN/LSTM linear layer) and a linear output layer. I trained all models with a batch size of 25 and 30 epochs.</p> <p>For each model, I measured RMSE for the predictions (used for accuracy), time used to train the model, memory used to train the model, number of parameters, and storage used for parameters. The results are shown in the following table.</p> <table> <thead> <tr> <th>Model</th> <th>RMSE</th> <th>Memory in Training (KB)</th> <th>Time to Train (s)</th> <th>Parameters (#)</th> <th>Memory for Parameters (KB)</th> </tr> </thead> <tbody> <tr> <td>LSTM</td> <td>155.61</td> <td>16575097</td> <td>151.76</td> <td>54190</td> <td>211.68</td> </tr> <tr> <td>RNN</td> <td>149. 07</td> <td>4856823</td> <td>67.25</td> <td>16750</td> <td>65.43</td> </tr> <tr> <td>Transformers</td> <td>36.46</td> <td>3165225</td> <td>87.00</td> <td>2019</td> <td>7.89</td> </tr> </tbody> </table> <p>As expected, the LSTM model runs much slower with higher memory usage which is consistent with literature models <d-cite key="lstmseries"></d-cite>. When comparing the models, on first look, it seems like the Transformer model greatly outperforms the other models, both in terms of accuracy and memory with comprable training times to the RNN. However, I noticed something odd when I visualized my results: while transformers performed better in RMSE, it didn’t look like they performed better.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_performance_yesLSTM.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Performance of initial model experiments. Before the black line, is the training data of the closing price, after the line, the testing data as well as the predictions for all three models are shown. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/no_norm_forcast_yesLSTM.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The testing data compared to the predictions for all three models. </div> <p>Besides noting that the models all did fairly well given their simplicity, this was puzzling. Addditionally, when I reran the models, I noted that the RMSE values for the LSTM/RNN models varied wildly with results between 50-550 whereas transformer’s performance was consistently around 35. To investigate, I printed out the RMSE for each prediction and analyzed them. I found that most errors were fairly small but there were a couple very large errors that ended up skewing the overall reported average. In visualizing that outlier and performance between the models, I saw that the prices for the outliers were much higher than most stocks, making the LSTM/RNN models predict a much lower price.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/Outlier_performance_yesLSTM.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Outlier visualization of one stock with all three model's performance. </div> <p>Transformers still do okay here, likely do to the first normalization layer I used. Thus, to make the problem more equal, I decided to normalize all of the data at the onset.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/10SP500_norm.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Sample of data once normalized. </div> <p>When rerunning the training, the tabular results match the visualizations. Surprisingly, Transformers perform worse than RNNs/LSTMs, with less memory used but no real difference in training time. Even with adding complexity to the Transformer model via increasing the feed-forward network complexity through increasing the size of the embedded feed forward network and increasing the number of attention layers, no performance difference was seen – the time to train just substantially increased.</p> <table> <thead> <tr> <th>Model</th> <th>RMSE</th> <th>Memory in Training (KB)</th> <th>Time to Train (s)</th> <th>Parameters (#)</th> <th>Memory for Parameters (KB)</th> </tr> </thead> <tbody> <tr> <td>LSTM</td> <td>0.125</td> <td>8233179</td> <td>128.25</td> <td>54190</td> <td>211.68</td> </tr> <tr> <td>RNN</td> <td>0.121</td> <td>4147757</td> <td>87.58</td> <td>16750</td> <td>65.43</td> </tr> <tr> <td>Transformers</td> <td>0.281</td> <td>3148379</td> <td>87.38</td> <td>2019</td> <td>7.89</td> </tr> <tr> <td>Complicated Transformers</td> <td>0.282</td> <td>40052260</td> <td>1243.01</td> <td>16248</td> <td>63.47</td> </tr> </tbody> </table> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/Testing_yesLSTM.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Performance of model experiments with normalized data. Before the black line, is the training data of the closing price, after the line, the testing data as well as the predictions for all three models are shown. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/Forcast_yesLSTM.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The testing data compared to the predictions for all three models with normalized data. </div> <p>This seems to go against prior results which almost universally found Transformers faster without sacrificing efficiency <d-cite key="qin2023hierarchically"></d-cite>. I hypothesized that this could be because Transformers generally have poor length generalization and perhaps this is an inoptimal time window for this particular model <d-cite key="anil2022exploring"></d-cite>. This could also explain the lack of a time improvement as a transformer’s time complexity is quadtratic whereas RNN’s is linear «d-cite key=”peng2023rwkv”&gt;&lt;/d-cite&gt;, <d-cite key="keles2022computational"></d-cite>. Presumably, the quadratic performance will slow down the Transformer when parallelization becomes impossible <d-cite key="peng2023rwkv"></d-cite>. To test this hypothesis, I decided to try to predict my results with a smaller time window.</p> <p>I did this by predicting the closing price of just one day of data using a week of prior data. I normalized all data and retrained my models. I reverted back to the simple transformer model in an effort to test relatively equivalent model complexities.</p> <table> <thead> <tr> <th>Model</th> <th>RMSE</th> <th>Memory in Training (KB)</th> <th>Time to Train (s)</th> <th>Parameters (#)</th> <th>Memory for Parameters (KB)</th> </tr> </thead> <tbody> <tr> <td>LSTM</td> <td>0.386</td> <td>9588885</td> <td>19.00</td> <td>53221</td> <td>207.89</td> </tr> <tr> <td>RNN</td> <td>0.381</td> <td>4197690</td> <td>13.45</td> <td>15781</td> <td>61.64</td> </tr> <tr> <td>Transformers</td> <td>0.384</td> <td>2707340</td> <td>11.45</td> <td>1050</td> <td>4.1</td> </tr> </tbody> </table> <p>As the results show, my hypothesis was correct. The transformer performed much faster without a reduction in accuracy. However, it is also very possible that I didn’t see a time difference because I am using small models with a short training time. These timing differences could become larger with more computationally intensive models.</p> <h2 id="ecg-model-comparisons">ECG Model Comparisons</h2> <p>While the results from the stock dataset were interesting, I also wanted to test these models with a different type of input that perhaps would capture different underlying strengths and weaknesses of the models. I decided to use an ECG to predict the presence of an abnormality in the heart beat. This represents a difference in the stock dataset in three key ways:</p> <p>1) The output is binary instead of discrete. 2) There is a better source of ground truth for this data. If there was a definitive way to predict the behavior of a stock, everyone would be rich, but that’s not the case – there’s inherently uncertainty and an expected level of innaccuracy. For health data, the person will have the condition or not and an experienced cardiologist would be able to definitively diagnose the patient. 3) The input has an expected, structured shape. All ECGs are supposed to look roughly the same and should have a similar visibility in the dataset. This has effects on the causality window used in models that I was interested in analyzing.</p> <p>I first visualized my data for both the abnormal and normal heart beats. The overall sample size was around 9000 patients, and I artificially created a 50/50 split between abnormal and normal to prevent class imbalance. I once again used an 80/20 train/test split for my models.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/ECG-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/ECG-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/ECG-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/ECG.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Data visualization for ECG. </div> <p>I immediately ran into difficulties once I began training with the performance of all models really being a coin toss between the two. I then focused my time on trying to build more complex models. For the RNN, I added more layers with varying dimensions and played around with adding dropout and linear layers. For the Transformer, I built up the feedforward network part of the algorithm by increasing the size of the embedded feed forward network and adding multiple attention layers. For both, I tuned hyperparameters such as the optimizer, batch size, and number of epochs. Despite this results still remined poor.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/Loss-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/Loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Loss curve for both models on training and validation data. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/ROC-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/ROC-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-TransformersAndRNNs/ROC-1400.webp"/> <img src="/staging/assets/img/2023-11-09-TransformersAndRNNs/ROC.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> ROC curves for both models with displayed AUC values. </div> <p>There is virutally no reduction on validation loss for the RNN graph, no matter what structure I chose. While there is a normal looking curve for transformer, the scale of loss reduction is very small when you consider the y-axis. Additionally, the RNN network never performed better than randomly, whereas the Transformer network was only slightly improved.</p> <p>One interpretation of these results could be that the Transformer model performed better. However, because neither of these architectures perfomred overly sucessfully, I don’t think that is a sound conclusion. It is unclear to me if this is a shortcoming of my code or a difficulty with the problem and dataset. This would be an area where future work is required.</p> <p>My main takeaway from this process of working with the ECG data was how much easier it was to tune and problemsolve with the Transformer than the RNN. For the Transformer, I was able to adjust the number of heads or the sizes of heads, or the feed foward network, etc, whereas, in the RNN, I really could only play with the layers of the RNN itself. While both of these architectures have black-box components, I found the Transformer a lot easier to work and play around with as a developer, and I could develop some intuition on what things I should change and why. This perhaps represents another difference from the transformer vs RNN debate but from a usability standpoint.</p> <h2 id="limitations">Limitations</h2> <p>There are several limitations of this project. First, I only analyzed a couple of different datasets. This is not representative of all of the different applications of GNNs and transformers, meaning the conclusions are limited to the datasets chosen and are not necessarily representative of the full field. Additionally, my implementation of the models may not be the most efficient ones. While I tried to test a wide range of parameters, due to limited resources available (ie time and being one person) there are undoubtably more optimal structures or hyperparameters that I did not test. This ability to not only test a limited number of parameters, but also architectures remains an overall limitation and challenge of the deep learning field <d-cite key="limitation"></d-cite>. Noteably, I also worked with small datasets and models. While this was useful in running a lot of experiments quickly, the differences in architectures, especialy in regards to training time/memory, may be more important and more pronounced in larger-scale tasks.</p> <p>Additionally, I did not test every metric of success. While I focused on number of trainable parameters, training time, memory, and accuracy – these are not the only things that matter in machine learning. For instance, in some applications, senstivity might matter a lot more than specificity and overall accuracy. In others, explainability of the model may be essential, such as time sensitive healthcare settings <d-cite key="limitationexplain"></d-cite>. Thus, in making a decision in chosing one model over another, it is important to consider the specific application and benefits of each approach. These methods of success also might not be “fair” to each model. For instance, a much larger number of trainable parameters may be fine if overall time is not lost. If one has the capacity to run their machine over night but with limited memory, they might reevaluate these metrics of sucess. Given these limitations, it is important to do a holistic evaluation of architectures when deciding which to use for a deep learning project and to use this project as a guidelines of preliminary experiments to run in making that decision. This project does not serve as a ground truth as to why to choose one model over another.</p> <h2 id="conclusions">Conclusions</h2> <p>Transformers seem to be easier to work with when there are still questions surrounding the data. For instance, with the stock dataset, there may be circumstances where you would prefer a model that can perform well prior without normalizing the dataset if for instance, you care about the magnitude of closing prices between stocks. Similarly, for the ECG model, they were easier to tune with different hyper paramters and felt more intuitive in comparison to working with the RNN. Transformers also consistently used less memory with much fewer parameters across the board, which is important when working in resource-limited systems.</p> <p>However, this project found that transformers are not always faster or more accurate than alternatives. While Liu et al found that typical transformers can find shortcuts to learn automata <d-cite key="liu2023transformers"></d-cite>, this might not be the case for all datasets across all applications. Previous studies have found that length generalization is a deficiency of transformers <d-cite key="anil2022exploring"></d-cite>, and this is supported by this project which found that for longer-term predictions, RNNs were the faster and more accurate approach post-normalization.</p> <p>These findings underscore the importance of taking the time to test different architectures in your resarch and not assuming that just because Transformers are more popular, it doesn’t mean they are necessarily the best fit for your problem. In deep learning research, we often get bogged down in tuning a model and it’s important to take a step back and consider your assumptions about the task – which may include the broader model consideration.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-TransformersAndRNNs.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/Visualization-of-CLIP's-Learning-and-Perceiving-Dynamics/index.html b/blog/2023/Visualization-of-CLIP's-Learning-and-Perceiving-Dynamics/index.html
new file mode 100644
index 00000000..87a03d98
--- /dev/null
+++ b/blog/2023/Visualization-of-CLIP's-Learning-and-Perceiving-Dynamics/index.html
@@ -0,0 +1,152 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Alive Scene | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Inspired by the captivating Enchanted Portraits of the Harry Potter universe, my project unveils an innovative AI pipeline that transcends traditional scene-capture methods. Rather than merely recording scenes as a sequence of static images, this pipeline is intricately designed to interpret and articulate the dynamic behavior of various elements within a scene by utilizing CLIP semantic embeddings. This nuanced understanding enables the scenes to evolve autonomously and organically, mirroring the fluidity and spontaneity of living entities."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/Visualization-of-CLIP's-Learning-and-Perceiving-Dynamics/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p,.fake-img figcaption{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Alive Scene",
+      "description": "Inspired by the captivating Enchanted Portraits of the Harry Potter universe, my project unveils an innovative AI pipeline that transcends traditional scene-capture methods. Rather than merely recording scenes as a sequence of static images, this pipeline is intricately designed to interpret and articulate the dynamic behavior of various elements within a scene by utilizing CLIP semantic embeddings. This nuanced understanding enables the scenes to evolve autonomously and organically, mirroring the fluidity and spontaneity of living entities.",
+      "published": "December 9, 2023",
+      "authors": [
+        {
+          "author": "Chi-Li Cheng",
+          "authorURL": "https://chilicheng.com",
+          "affiliations": [
+            {
+              "name": "Massachusetts Institute of Technology",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Alive Scene</h1> <p>Inspired by the captivating Enchanted Portraits of the Harry Potter universe, my project unveils an innovative AI pipeline that transcends traditional scene-capture methods. Rather than merely recording scenes as a sequence of static images, this pipeline is intricately designed to interpret and articulate the dynamic behavior of various elements within a scene by utilizing CLIP semantic embeddings. This nuanced understanding enables the scenes to evolve autonomously and organically, mirroring the fluidity and spontaneity of living entities.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#project-proposal">Project Proposal</a></div> <ul> <li><a href="#abstract">Abstract</a></li> <li><a href="#introduction">Introduction</a></li> <li><a href="#methodology">Methodology</a></li> <li><a href="#potential-contributions">Potential Contributions</a></li> </ul> </nav> </d-contents> <h2 id="enchanting-images-with-semantic-embedding">Enchanting Images with Semantic Embedding</h2> <p>“Alive Scene” is an advanced AI-driven project that revolutionizes the concept of scene capture, drawing inspiration from the enchanting, ever-changing portraits in the Harry Potter series. This innovative pipeline goes beyond traditional methods of capturing scenes as static images. Instead, it delves deep into the semantic understanding of each scene, enabling it to not only recreate these scenes with high fidelity but also to imbue them with the ability to act, evolve, and respond autonomously.</p> <p>The following GIF image on the right is the output from the Alive Scene Pipeline. Notice that these scenes start from the same status.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/7cFU.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/7cFU.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/7cFU.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/7cFU.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Hogwarts Portraits </div> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Alive Scene captures cats' napping behaviors </div> </figure> </div> </div> <p>The core of this project lies in its sophisticated AI algorithms that analyze and interpret the nuances of each scene, from the physical elements to the underlying emotions and narratives. This enables the system to generate dynamic, lifelike representations that are far from static images. These AI-crafted scenes possess the unique ability to change organically over time, reflecting the natural progression and evolution one would expect in real life.</p> <p>Through “Alive Scene,” portraits and scenes are no longer mere representations; they become entities with a semblance of life, capable of exhibiting behaviors and changes that mirror the fluidity and spontaneity of living beings. There are three elements in this project, the first is using CLIP model as encoder to compress image into clip embeddings. Second, train a generator to reconstruct the original image from the CLIP embedding. then train a behavior model to lean the behavior of clip embeddings in the clip feature space; the behavior will use to drive the generator; making the scene representation alive. The following is the diagrams of the pipeline.</p> <div class="col-sm mt-3 mt-md-0"> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/pipeline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/pipeline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/pipeline-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/pipeline.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Alive Scene Pipeline </div> </figure> </div> <h3 id="introduction">Introduction</h3> <p>The CLIP (Contrastive Language–Image Pre-training) model<d-cite key="radford2021learning"></d-cite>, represents a groundbreaking approach in integrating visual and textual data within the realm of artificial intelligence. In this project, it plays and important role to comprehend the scenario and characters’ behaviors in the scene. Detailed investigations<d-cite key="wang2020understanding"></d-cite> <d-cite key="shi2023understanding"></d-cite> <d-cite key="zhao2017exact"></d-cite> offers insightful understanding of the model’s operations, showing the potential that CLIP embeddings could make a machine comprehend and compress complex information of images.</p> <p>The study<d-cite key="author2021cinn"></d-cite> explores using conditional Invertible Neural Networks (cINNs) for transforming still images into videos, highlighting cINNs’ prowess in handling static to dynamic content transitions. Although proficient in capturing motion, the model’s grasp on object/event types may benefit from CLIP embeddings enhancement. My project, unlike this work, aims to animate static scene representations with self-driven behaviors, not just manipulate videos.</p> <p>Another significant work, “Make-A-Video”<d-cite key="singer2022makeavideo"></d-cite>, introduces a text-to-video generation method utilizing text-to-image models. This approach circumvents the need for text-video paired data, learning from text-image data and unsupervised videos. It employs a spatiotemporal diffusion model and super-resolution techniques for high-quality video creation from text. My project differs, focusing on bringing life to existing videos or image sequences, rather than generating new content from text.</p> <p>Despite the static background, the cats’ movements are so subtle that they pose a challenge for human observers to distinguish differences between frames. To visualize the clip embeddings of the frames from the video, I employ both UMAP and t-SNE<d-cite key="maaten2008tsne"></d-cite> techniques for gaining more insights.</p> <div class="col-sm mt-3 mt-md-0"> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_umap.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_umap.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_umap.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_umap.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> UMAP Visualization </div> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_tsne.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_tsne.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_tsne.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/latent_tsne.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE Visualization </div> </figure> </div> <p>The behavior over time resembles a ‘spaghetti’ pattern, indicating that certain scenarios or behaviors may recur (as seen in the crossings or interactions within the spaghetti diagram). Some intersecting points demonstrate similar tendencies, while others are more unpredictable, highlighting the complexity of the video.</p> <p>Both visualizations provide a promising sign: the end and start frames are positioned close to those in the middle. This proximity allows the Alive Scene to operate seamlessly and endlessly. For example, when the Alive Scene approaches a point near the end, it can smoothly transition to a frame somewhere in the middle. Similarly, when it encounters a region where different frames cluster together, it has a variety of options to choose from for its next move. This flexibility is key to making the Alive Scene function effectively.</p> <h3 id="generator">Generator</h3> <p>The Generator (decoder) is a SIREN model, which employs CLIP semantic embeddings and positional embeddings of pixel coordinates to generate RGB colors<d-cite key="sitzmann2019siren"></d-cite>. SIRENs, or Sinusoidal Representation Networks, diverge from traditional neural networks by utilizing sinusoidal activation functions instead of common ones like ReLU. These networks are adept at implicitly representing intricate data patterns, making them particularly advantageous for tasks that involve complex spatial structures or continuous data. The incorporation of periodic activation functions in SIRENs can significantly enhance deep learning capabilities, especially in fields such as computer vision and generative models.</p> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/SIREN_DECODER-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/SIREN_DECODER-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/SIREN_DECODER-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/SIREN_DECODER.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> a SIREN model as the generator </div> </figure> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/tp_siren.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/tp_siren.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/tp_siren.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/tp_siren.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> the training progression </div> </figure> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/generated_.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/generated_.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/generated_.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/generated_.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Original Video vs Generated Video </div> </figure> <p>The code of the generator model (SIREN)</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">class</span> <span class="nc">SineLayer</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Module</span><span class="p">):</span>
+
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">w0</span><span class="p">):</span>
+        <span class="nf">super</span><span class="p">(</span><span class="n">SineLayer</span><span class="p">,</span> <span class="n">self</span><span class="p">).</span><span class="nf">__init__</span><span class="p">()</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">w0</span> <span class="o">=</span> <span class="n">w0</span>
+
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="k">return</span> <span class="n">torch</span><span class="p">.</span><span class="nf">sin</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="n">w0</span> <span class="o">*</span> <span class="n">x</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">Siren</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">w0</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">in_dim</span><span class="o">=</span><span class="mi">560</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">out_dim</span><span class="o">=</span><span class="mi">3</span><span class="p">):</span>
+        <span class="nf">super</span><span class="p">(</span><span class="n">Siren</span><span class="p">,</span> <span class="n">self</span><span class="p">).</span><span class="nf">__init__</span><span class="p">()</span>
+
+        <span class="n">self</span><span class="p">.</span><span class="n">net</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Sequential</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">in_dim</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="p">),</span> <span class="nc">SineLayer</span><span class="p">(</span><span class="n">w0</span><span class="p">),</span>
+                                 <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="p">),</span> <span class="nc">SineLayer</span><span class="p">(</span><span class="n">w0</span><span class="p">),</span>
+                                 <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="p">),</span> <span class="nc">SineLayer</span><span class="p">(</span><span class="n">w0</span><span class="p">),</span>
+                                 <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="p">),</span> <span class="nc">SineLayer</span><span class="p">(</span><span class="n">w0</span><span class="p">),</span>
+                                 <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">out_dim</span><span class="p">))</span>
+
+        <span class="c1"># Init weights
+</span>        <span class="k">with</span> <span class="n">torch</span><span class="p">.</span><span class="nf">no_grad</span><span class="p">():</span>
+            <span class="n">self</span><span class="p">.</span><span class="n">net</span><span class="p">[</span><span class="mi">0</span><span class="p">].</span><span class="n">weight</span><span class="p">.</span><span class="nf">uniform_</span><span class="p">(</span><span class="o">-</span><span class="mf">1.</span> <span class="o">/</span> <span class="n">in_dim</span><span class="p">,</span> <span class="mf">1.</span> <span class="o">/</span> <span class="n">in_dim</span><span class="p">)</span>
+            <span class="n">self</span><span class="p">.</span><span class="n">net</span><span class="p">[</span><span class="mi">2</span><span class="p">].</span><span class="n">weight</span><span class="p">.</span><span class="nf">uniform_</span><span class="p">(</span><span class="o">-</span><span class="n">np</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="mf">6.</span> <span class="o">/</span> <span class="n">hidden_dim</span><span class="p">)</span> <span class="o">/</span> <span class="n">w0</span><span class="p">,</span>
+                                        <span class="n">np</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="mf">6.</span> <span class="o">/</span> <span class="n">hidden_dim</span><span class="p">)</span> <span class="o">/</span> <span class="n">w0</span><span class="p">)</span>
+            <span class="n">self</span><span class="p">.</span><span class="n">net</span><span class="p">[</span><span class="mi">4</span><span class="p">].</span><span class="n">weight</span><span class="p">.</span><span class="nf">uniform_</span><span class="p">(</span><span class="o">-</span><span class="n">np</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="mf">6.</span> <span class="o">/</span> <span class="n">hidden_dim</span><span class="p">)</span> <span class="o">/</span> <span class="n">w0</span><span class="p">,</span>
+                                        <span class="n">np</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="mf">6.</span> <span class="o">/</span> <span class="n">hidden_dim</span><span class="p">)</span> <span class="o">/</span> <span class="n">w0</span><span class="p">)</span>
+            <span class="n">self</span><span class="p">.</span><span class="n">net</span><span class="p">[</span><span class="mi">6</span><span class="p">].</span><span class="n">weight</span><span class="p">.</span><span class="nf">uniform_</span><span class="p">(</span><span class="o">-</span><span class="n">np</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="mf">6.</span> <span class="o">/</span> <span class="n">hidden_dim</span><span class="p">)</span> <span class="o">/</span> <span class="n">w0</span><span class="p">,</span>
+                                        <span class="n">np</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="mf">6.</span> <span class="o">/</span> <span class="n">hidden_dim</span><span class="p">)</span> <span class="o">/</span> <span class="n">w0</span><span class="p">)</span>
+            <span class="n">self</span><span class="p">.</span><span class="n">net</span><span class="p">[</span><span class="mi">8</span><span class="p">].</span><span class="n">weight</span><span class="p">.</span><span class="nf">uniform_</span><span class="p">(</span><span class="o">-</span><span class="n">np</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="mf">6.</span> <span class="o">/</span> <span class="n">hidden_dim</span><span class="p">)</span> <span class="o">/</span> <span class="n">w0</span><span class="p">,</span>
+                                        <span class="n">np</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="mf">6.</span> <span class="o">/</span> <span class="n">hidden_dim</span><span class="p">)</span> <span class="o">/</span> <span class="n">w0</span><span class="p">)</span>
+
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="k">return</span> <span class="n">self</span><span class="p">.</span><span class="nf">net</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">MLP</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">in_dim</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">out_dim</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
+        <span class="nf">super</span><span class="p">(</span><span class="n">MLP</span><span class="p">,</span> <span class="n">self</span><span class="p">).</span><span class="nf">__init__</span><span class="p">()</span>
+
+        <span class="n">self</span><span class="p">.</span><span class="n">net</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Sequential</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">in_dim</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="p">),</span> <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+                                 <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="p">),</span> <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+                                 <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="p">),</span> <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+                                 <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">hidden_dim</span><span class="p">),</span> <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+                                 <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">out_dim</span><span class="p">))</span>
+
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="k">return</span> <span class="n">self</span><span class="p">.</span><span class="nf">net</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+
+<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">model_optimizer</span><span class="p">,</span> <span class="n">nb_epochs</span><span class="o">=</span><span class="mi">15000</span><span class="p">):</span>
+    <span class="n">psnr</span> <span class="o">=</span> <span class="p">[]</span>
+    <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nf">tqdm</span><span class="p">(</span><span class="nf">range</span><span class="p">(</span><span class="n">nb_epochs</span><span class="p">)):</span>
+        <span class="n">model_output</span> <span class="o">=</span> <span class="nf">model</span><span class="p">(</span><span class="n">pixel_coordinates</span><span class="p">)</span>
+        <span class="n">loss</span> <span class="o">=</span> <span class="p">((</span><span class="n">model_output</span> <span class="o">-</span> <span class="n">pixel_values</span><span class="p">)</span> <span class="o">**</span> <span class="mi">2</span><span class="p">).</span><span class="nf">mean</span><span class="p">()</span>
+        <span class="n">psnr</span><span class="p">.</span><span class="nf">append</span><span class="p">(</span><span class="mi">20</span> <span class="o">*</span> <span class="n">np</span><span class="p">.</span><span class="nf">log10</span><span class="p">(</span><span class="mf">1.0</span> <span class="o">/</span> <span class="n">np</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="n">loss</span><span class="p">.</span><span class="nf">item</span><span class="p">())))</span>
+
+        <span class="n">model_optimizer</span><span class="p">.</span><span class="nf">zero_grad</span><span class="p">()</span>
+        <span class="n">loss</span><span class="p">.</span><span class="nf">backward</span><span class="p">()</span>
+        <span class="n">model_optimizer</span><span class="p">.</span><span class="nf">step</span><span class="p">()</span>
+
+    <span class="k">return</span> <span class="n">psnr</span><span class="p">,</span> <span class="n">model_output</span>
+</code></pre></div></div> <h3 id="behavior-model">Behavior model</h3> <p>This project introduces a customized asymmetrical Variational Autoencoder (VAE)<d-cite key="kingma2014autoencoding"></d-cite> as the probabilistic model to predict motion within the CLIP embedding space. A VAE-like model may prove beneficial for this task for two primary reasons. Firstly, they are adept at learning a continuous, smooth latent space, facilitating efficient interpolation and manipulation of data representations. Given that the training data derives from a video, it is inherently sequential and should be represented in a continuous fashion. Secondly, VAEs utilize amortized inference, where the encoder is trained to generalize the mapping of inputs to the latent space across the dataset, as opposed to conducting inference anew for each input. For this project, the objective is to devise a method that allows for a smooth navigation within the observed embedding space.</p> <p>The code of the behavior model (VAE)</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
+<span class="n">device</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">device</span><span class="p">(</span><span class="sh">"</span><span class="s">cuda</span><span class="sh">"</span> <span class="k">if</span> <span class="n">torch</span><span class="p">.</span><span class="n">cuda</span><span class="p">.</span><span class="nf">is_available</span><span class="p">()</span> <span class="k">else</span> <span class="sh">"</span><span class="s">cpu</span><span class="sh">"</span><span class="p">)</span>
+<span class="c1"># BehaviorModel(inspired by VAE)
+</span><span class="k">class</span> <span class="nc">BehaviorModel</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">input_dim</span><span class="o">=</span><span class="mi">512</span><span class="p">,</span> <span class="n">latent_dim</span><span class="o">=</span><span class="mi">256</span><span class="p">):</span>
+        <span class="nf">super</span><span class="p">(</span><span class="n">VAE</span><span class="p">,</span> <span class="n">self</span><span class="p">).</span><span class="nf">__init__</span><span class="p">()</span>
+
+        <span class="c1"># Encoder
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">fc1</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">input_dim</span><span class="p">,</span> <span class="mi">400</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">bn1</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">BatchNorm1d</span><span class="p">(</span><span class="mi">400</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">fc2</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">400</span><span class="p">,</span> <span class="mi">300</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">bn2</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">BatchNorm1d</span><span class="p">(</span><span class="mi">300</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">fc21</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">300</span><span class="p">,</span> <span class="n">latent_dim</span><span class="p">)</span>  <span class="c1"># Mean
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">fc22</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">300</span><span class="p">,</span> <span class="n">latent_dim</span><span class="p">)</span>  <span class="c1"># Log variance
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Dropout</span><span class="p">(</span><span class="mf">0.55</span><span class="p">)</span>
+
+        <span class="c1"># Decoder
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">fc3</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">latent_dim</span><span class="p">,</span> <span class="mi">300</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">bn3</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">BatchNorm1d</span><span class="p">(</span><span class="mi">300</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">fc4</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">300</span><span class="p">,</span> <span class="mi">400</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">bn4</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">BatchNorm1d</span><span class="p">(</span><span class="mi">400</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">fc5</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">400</span><span class="p">,</span> <span class="n">input_dim</span><span class="p">)</span>
+
+    <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">h1</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="nf">relu</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">bn1</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">fc1</span><span class="p">(</span><span class="n">x</span><span class="p">)))</span>
+        <span class="n">h2</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="nf">relu</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">bn2</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">fc2</span><span class="p">(</span><span class="n">h1</span><span class="p">)))</span>
+        <span class="n">h2</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">dropout</span><span class="p">(</span><span class="n">h2</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">self</span><span class="p">.</span><span class="nf">fc21</span><span class="p">(</span><span class="n">h2</span><span class="p">),</span> <span class="n">self</span><span class="p">.</span><span class="nf">fc22</span><span class="p">(</span><span class="n">h2</span><span class="p">)</span>
+
+    <span class="k">def</span> <span class="nf">reparameterize</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span><span class="p">):</span>
+        <span class="n">std</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">exp</span><span class="p">(</span><span class="mf">0.5</span> <span class="o">*</span> <span class="n">logvar</span><span class="p">)</span>
+        <span class="n">eps</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">randn_like</span><span class="p">(</span><span class="n">std</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">mu</span> <span class="o">+</span> <span class="n">eps</span> <span class="o">*</span> <span class="n">std</span>
+
+    <span class="k">def</span> <span class="nf">decode</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">z</span><span class="p">):</span>
+        <span class="n">h3</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="nf">relu</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">bn3</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">fc3</span><span class="p">(</span><span class="n">z</span><span class="p">)))</span>
+        <span class="n">h4</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="nf">relu</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">bn4</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">fc4</span><span class="p">(</span><span class="n">h3</span><span class="p">)))</span>
+        <span class="k">return</span> <span class="n">F</span><span class="p">.</span><span class="nf">tanh</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">fc5</span><span class="p">(</span><span class="n">h4</span><span class="p">))</span>
+
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">encode</span><span class="p">(</span><span class="n">x</span><span class="p">.</span><span class="nf">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">512</span><span class="p">))</span>
+        <span class="n">z</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">reparameterize</span><span class="p">(</span><span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">self</span><span class="p">.</span><span class="nf">decode</span><span class="p">(</span><span class="n">z</span><span class="p">),</span> <span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span>
+
+<span class="c1"># Loss function
+</span><span class="k">def</span> <span class="nf">loss_function</span><span class="p">(</span><span class="n">recon_x</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span><span class="p">):</span>
+    <span class="n">BCE</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="nf">binary_cross_entropy</span><span class="p">(</span><span class="n">recon_x</span><span class="p">,</span> <span class="n">x</span><span class="p">.</span><span class="nf">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">512</span><span class="p">),</span> <span class="n">reduction</span><span class="o">=</span><span class="sh">'</span><span class="s">sum</span><span class="sh">'</span><span class="p">)</span>
+    <span class="n">KLD</span> <span class="o">=</span> <span class="o">-</span><span class="mf">0.5</span> <span class="o">*</span> <span class="n">torch</span><span class="p">.</span><span class="nf">sum</span><span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="n">logvar</span> <span class="o">-</span> <span class="n">mu</span><span class="p">.</span><span class="nf">pow</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="o">-</span> <span class="n">logvar</span><span class="p">.</span><span class="nf">exp</span><span class="p">())</span>
+    <span class="k">return</span> <span class="n">BCE</span> <span class="o">+</span> <span class="n">KLD</span>
+
+<span class="k">def</span> <span class="nf">loss_function</span><span class="p">(</span><span class="n">recon_x</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span><span class="p">):</span>
+    <span class="c1"># Use Mean Squared Error for the reconstruction loss
+</span>    <span class="n">MSE</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="nf">mse_loss</span><span class="p">(</span><span class="n">recon_x</span><span class="p">,</span> <span class="n">x</span><span class="p">.</span><span class="nf">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">512</span><span class="p">),</span> <span class="n">reduction</span><span class="o">=</span><span class="sh">'</span><span class="s">sum</span><span class="sh">'</span><span class="p">)</span>
+
+    <span class="c1"># KLD is unchanged
+</span>    <span class="n">KLD</span> <span class="o">=</span> <span class="o">-</span><span class="mf">0.5</span> <span class="o">*</span> <span class="n">torch</span><span class="p">.</span><span class="nf">sum</span><span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="n">logvar</span> <span class="o">-</span> <span class="n">mu</span><span class="p">.</span><span class="nf">pow</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="o">-</span> <span class="n">logvar</span><span class="p">.</span><span class="nf">exp</span><span class="p">())</span>
+
+    <span class="k">return</span> <span class="n">MSE</span> <span class="o">+</span> <span class="n">KLD</span>
+</code></pre></div></div> <p>The process begins with a CLIP embedding as the input, which is then transformed by the model to output a motion vector. This vector retains the same dimensions as the CLIP embedding and is utilized to alter the original embedding, facilitating the generation of the subsequent frame based on this modified embedding.</p> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/PrbabilisticModel-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/PrbabilisticModel-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/PrbabilisticModel-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/PrbabilisticModel.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> the Asymmetrical VAE </div> </figure> <p>In this case, I generate 200 frames for training; the number is quite small. To enhance the model’s learning efficacy, new data points are generated through linear interpolation between existing data points (frames). By doing this, I generated 1000 clip embeddings and frames. These newly created samples undergo normalization to conform to the geometric constraints of the CLIP embedding space, often characterized as a hypersphere. This normalization process ensures that the interpolated data points adhere to the distribution pattern of the original embeddings. As depicted in the diagram, this technique leads to a densified clustering of data points in close proximity to the original embeddings, which is advantageous. It implies a higher confidence in the authenticity of these new points due to their closeness to the authentic, or ground truth, data.</p> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Interpolation-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Interpolation.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Gaining more data points by Interpolation </div> </figure> <p>When operating the process that animates the Alive Scene, it occasionally generates artifacts. This may be caused by certain movements that deviate significantly from the observed reality. Please refer to the following GIF for an example.</p> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/broken.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/broken.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/broken.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/broken.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Artifacts </div> </figure> <p>To resolve the issue, I have developed a post-processing technique that stabilizes the outcomes. The process begins by re-normalizing the resulting embedding onto the hypersphere. Following this, a weighted parameter is introduced to draw the vector incrementally toward the domain of previously observed CLIP embeddings. For example, if the weighting parameter is set to 0.1 for the observed embedding, it would be scaled by 0.1, while the predicted embedding is scaled by 0.9. These two are then summed to produce a final embedding that, while primarily influenced by the prediction, retains a subtle alignment with the observed data. This weighted approach aims to mitigate artifacts by anchoring the predictions within the realm of observed realities.</p> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Post-curing-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Post-curing-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Post-curing-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/Post-curing.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Post-curing method </div> </figure> <p>By applying this method, the Alive Scene has started to yield more stable results. Interestingly, the outcomes are varied, exhibiting behaviors akin to a living creature — somewhat unpredictable yet within a framework of predictability.</p> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/allt5.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> 4 different versions </div> </figure> <h3 id="manipulation">Manipulation</h3> <p>The Alive Scene operates autonomously, and to explore the modulation of its behavior, I have introduced the concept of ‘temperature.’ This concept acts as a coefficient that scales the movement vector, thereby allowing the scene to exhibit behaviors that are either more expansive and varied, or more constrained and subtle, depending on the temperature setting.</p> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/TEMPERATURE-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/TEMPERATURE-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/TEMPERATURE-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/TEMPERATURE.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> 4 different versions </div> </figure> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/vt.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/vt.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/vt.gif-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/vt.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> different temperature settings </div> </figure> <h3 id="conclusion">Conclusion</h3> <p>The “Alive Scene” project signifies a profound achievement in the domain of Deep Learning for scene representation. It leverages CLIP semantic embeddings to decode and imbue scenes with lifelike attributes, while also seamlessly integrating the potent SIREN model as a generator, capable of breathing vitality into the processed embeddings by producing authentic images.</p> <p>Furthermore, the project implements an asymmetric Variational Autoencoder (VAE) to predict and model motion within the CLIP embedding space, thereby enhancing the dynamism and fluidity of the scenes.</p> <p>However, the significance of this undertaking extends well beyond its technical accomplishments. By giving birth to scenes that autonomously and organically evolve, the project ushers in a transformative era of possibilities in digital storytelling and interactive media, fundamentally reshaping the landscape of creative expression in the digital realm.</p> <h3 id="future-work">Future Work</h3> <p>In this project, a SIREN model is trained to create a 2D scene representation. This model can be extended to generate a 3D scene by simply adding an additional output node to adopt the Neural Radiance Field (NeRF)<d-cite key="mildenhall2020nerf"></d-cite> architecture. Such an enhancement allows the 3D Alive Scene to offer a more immersive and complex scene representation. Looking ahead, it’s conceivable that a non-player character (NPC) could be manipulated in this manner, especially if the model, when trained on a vast dataset, can learn more sophisticated behaviors. This approach has the potential to encapsulate all necessary information within a highly compact model, offering an extremely lightweight solution for dynamic scene generation.</p> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/2dvs3d-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/2dvs3d-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/2dvs3d-1400.webp"/> <img src="/staging/assets/img/2023-12-09-Alive%20Scene%20Enchanting%20images%20with%20Semantic%20Embedding/2dvs3d.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> extend the model for a 3D Scene </div> </figure> <h3 id="potential-usages-and-contributions">Potential Usages and Contributions:</h3> <p>Digital Art and Entertainment: This project can revolutionize digital art and entertainment by offering dynamic, evolving scenes that enhance animations and virtual experiences.</p> <p>Film and Animation: It can automate the generation of realistic backgrounds, streamlining the production process for films and animated content.</p> <p>Advertising and Marketing: The project offers the capability to create interactive, dynamic advertising content, thereby engaging audiences more effectively.</p> <p>Behavioral Studies: It provides a tool for in-depth analysis of human and animal behaviors, supporting research in fields such as psychology, ethology, and anthropology.</p> <p>Cultural Preservation: This technology can enliven historical scenes or artworks in museums, offering visitors more immersive and engaging experiences.</p> <p>Data Visualization: It introduces innovative methods for interacting with and interpreting complex data, useful in sectors like finance and healthcare.</p> <p>Gaming: The project enables the creation of NPCs with realistic behaviors, significantly enhancing the gaming experience.</p> <p>Architecture and Engineering: It can be applied for dynamic visualizations in architectural and engineering projects, aiding in design and planning.</p> <p>Conservation: This technology can contribute to wildlife conservation by facilitating the study of animal behaviors in natural settings.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-09-Alive-Scene.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/WeightDecaySpecNormEffects/index.html b/blog/2023/WeightDecaySpecNormEffects/index.html
new file mode 100644
index 00000000..3fba687d
--- /dev/null
+++ b/blog/2023/WeightDecaySpecNormEffects/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Exploring Frobenius and Spectral Normalization in MLPs and Residual networks | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="This blog post compares the effects of a spectral view on weight normalization to a frobenius view on weight normalization normalization using a novel algorithm developed by us. We use two network types at multiple sizes to compare the effects of these two methods on the singular values of the weight matrices, the rank of the weight matrices, and the accuracy of the models."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/WeightDecaySpecNormEffects/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Exploring Frobenius and Spectral Normalization in MLPs and Residual networks",
+      "description": "This blog post compares the effects of a spectral view on weight normalization to a frobenius view on weight normalization normalization using a novel algorithm developed by us. We use two network types at multiple sizes to compare the effects of these two methods on the singular values of the weight matrices, the rank of the weight matrices, and the accuracy of the models.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Preston Hess",
+          "authorURL": "https://rphess.cargo.site/",
+          "affiliations": [
+            {
+              "name": "MIT BCS and EECS",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Andrew Hutchison",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT EECS",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Exploring Frobenius and Spectral Normalization in MLPs and Residual networks</h1> <p>This blog post compares the effects of a spectral view on weight normalization to a frobenius view on weight normalization normalization using a novel algorithm developed by us. We use two network types at multiple sizes to compare the effects of these two methods on the singular values of the weight matrices, the rank of the weight matrices, and the accuracy of the models.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#relevance-and-investigation">Relevance and Investigation</a></div> <div><a href="#norm-scaling">Norm Scaling</a></div> <div><a href="#experiments">Experiments</a></div> <div><a href="#findings">Findings</a></div> <ul> <li><a href="#scaling-effects-on-training-stability">Scaling Effects on Training Stability</a></li> <li><a href="#scaling-effects-on-spectral-norms">Scaling Effects on Spectral Norms</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="relevance-and-investigation">Relevance and Investigation</h2> <p>Weight normalization in deep learning is vital because it prevents weights from getting too large, thereby improving model’s learning ability, accelerating convergence, and preventing overfitting. One traditional method for weight normalization involves adding the sum of the weights’ Frobenius norms to the loss function. One of the issues with penalizing Frobenius normalization of weight matrices is that it imposes a more strict constraint than may be desired for some model types- it enforces that the sum of the singular values is one, which can lead to weight matrices of rank one, which essentially enforces models to make decisions based on only one feature. In 2018, Spectral normalization emerged as an effective method, especially for Generative Adversarial Networks (GANs), to control the Lipschitz constant of the model and stabilize the training process <d-cite key="DBLP:journals/corr/abs-1802-05957"></d-cite>. Spectral normalization is a more relaxed form of weight normalization that scales weight matrices based on their spectral norms, which is the maximum singular value of the matrix. This method is more relaxed because it does not impose a constraint on the sum of singular values, but rather only on the maximum singular value. This allows for weight matrices of higher rank, which may be more desirable for certain model types. Since 2018, spectral normalization has been used in many GAN architectures, but has not been explored as much in other deep learning scenarios. In fact, there is little published research on other approaches to the spectral view of weight normalization in general.</p> <p>We introduce two novel normalization techniques inspired by AdamW and motivated by issues caused by penalties in the loss function <d-cite key="DBLP:journals/corr/abs-1711-05101"></d-cite>. Our method, which we call Norm Scaling, takes a training step using a loss function that does not include a norm penalty, then scales the norms of the weight matrices after the step. Comparing our Frobenius and spectral normalization algorithms to each other can provide valuable insights into their advantages and disadvantages in various model architectures through a thorough investigation of their effects on the weight matrices. We aim to understand how the spectral norm of weight matrices change over time and how they affect overall model performance. Furthermore, we want to see how singular values change across architectures and algorithms, determining if certain types of architectures can benefit more from spectral normalization than another. It especially becomes interesting to investigate whether spectral normalization’s superior performance in stabilizing GAN training is generalized to other deep learning scenarios via different architecture types and a different scaling technique. The ultimate goal of this exploration is to deepen our understanding of these normalization techniques to find more intelligent ways to regularize weight matrices in order to acheive less overfitting and improve learning ability.</p> <h2 id="norm-scaling">Norm Scaling</h2> <p>Let us introduce our novel normalization technique, Norm Scaling. We will first describe the algorithm in the context of Frobenius normalization, then we will describe how it will be applied with spectral normalization. We begin each process by initializing the weight matrices of the model to be orthogonal, which helps prevent gradient numerical stability issues and improve convergence timing. We then multiply each weight matrix, \(W_k\) by \(\sqrt{\frac{d_k}{d_{k-1}}}\) where \(d_k\) is the size of the output at layer \(k\). This enforces the initial spectral norm of each weight matrix to be \(\sqrt{\frac{d_k}{d_{k-1}}}\), and the initial Frobenius Norm to be \(\sqrt{min(d_k, d_{k-1})*\frac{d_k}{d_{k-1}}}\).</p> <p>In the Frobenius Norm Scaling algorithm training is relatively straightfoward. After we initialize the orthogonal weight matrices but before beginning training, we calculate the Frobenius norm of each weight matrix based on the equation above and save these in our model. On each training step, we first calculate the loss, compute the gradients, and take a step using the optimizer. Then, we calculate the Frobenius norm of each weight matrix, \(W_k\), divide the matrix by this norm, and multiply it by its initial value that we calculated before training:</p> \[\bar{W}_k = \frac{W_k}{||W_k||_F} * \sqrt{min(d_k, d_{k-1})*\frac{d_k}{d_{k-1}}}\] <p>This ensures that the Frobenius norm of each weight matrix, \(W_k\), is equal to its initial value throughout the entire training process.</p> <p>The Spectral Norm Scaling algorithm is slightly more mathematically complicated, and required the use of power iteration to make sure training time was feasible. After we initialize the orthogonal weight matrices but before training, we save target spectral norms for each weight matrix, \(W_k\). On each training step, we first calculate the loss, compute the gradients, and take a step using the optimizer. Then, we calculate the first singular value, which is the same as the spectral norm, and the first right singular vector of each weight matrix, \(W_k\), using power iteration. In order to mimimize the difference beween the right singular vector and the power iteration prediction of this vector we use 500 steps. To use power iteration with convolution weight matrices, which have dimension 4, we view them as 2 dimension weight matrices where all dimensions past the first are flattened (this reshaping is the channel-wise decomposition method and was used for similar work in Yang et al., 2020 <d-cite key="yang2020learning"></d-cite>).</p> <p>To find the first right singular vector and singular value, we use the fact that the top eigenvector and corresponding eigenvalue of \(A^TA\) are the first right singular vector and singular value of A respectively. So using the power method, we compute the top eigenvector and eigenvalue of \(W_k^TW_K\). We then use the fact that \(W_kv_1 = \sigma_1u_1\) to compute \(u_1 = \frac{W_kv_1}{\sigma_1}\).</p> <p>We then perform the following normalization step:</p> \[\bar{W}_k = W_k + u_1v_1^T(\sigma^* -\sigma_1)\] <p>Where \(\sigma^*\) is the target spectral norm described above.</p> <p>Note that this calculation subtracts the best rank one approximation of \(W_k\) from \(W_k\), but adds the same outer product back, scaled by \(\sigma^*\). Note that this does NOT enforce that the new spectral norm is \(\sigma^*\), because it is possible that \(\sigma_2\) is greater than \(\sigma^*\). We hope that this normalization prevents the first outer product of singular vectors from dominating the properties of the weight matrix, thus allowing for better generalization outside of the training distribution.</p> <h2 id="experiments">Experiments</h2> <p>In order to test our Norm Scaling learning algorithm, we train a variety of models on image classification of the CIFAR100 dataset <d-cite key="CIFAR100"></d-cite>. We train two types of models, MLPs and ResNets <d-cite key="DBLP:journals/corr/HeZRS15"></d-cite>. Our MLPs consist of fully connected layers without biases, where our medium MLP has 5 layers each with 2,000 hidden units and our large MLP has 7 layers each with 4,000 hidden units. Our ResNets are ResNet34 and ResNet50, loaded in without pre-training. We adjust the size of the first convolution layer to 64 channels of 3x3 kernels and the output layer to output 100 classes. We train each of these four models with three different conditions: the first uses no weight scaling, the second uses Frobenius norm scaling, and the third uses spectral norm scaling. We train each model for 200 epochs with a batch size of 512, an initial learning rate of 0.001, and no weight decay. We use the Adam optimizer and a multi-step learning rate scheduler with \(\gamma = 0.1\) applied at epochs 60 and 120. We use the cross entropy loss function for all models. We use the same training hyper-parameters for all models. The models were trained on 4 NVIDIA Tesla A100 GPUs with paralellization handled by the pytorch lightning library.</p> <p>At the end of training, the MLP with depth 5, width 2000, and no norm scaling had a test accuracy of 25.12% and a test loss of 10.86. The MLP with depth 5, width 2000, and Frobenius norm scaling had a test accuracy of 28.23% and a test loss of 4.47. The MLP with depth 5, width 2000, and spectral norm scaling had a test accuracy of 23.21% and a test loss of 3.53. The MLP with depth 7, width 4000, and no norm scaling had a test accuracy of 23.95% and a test loss of 11.00. The MLP with depth 7, width 4000, and Frobenius norm scaling had a test accuracy of 26.62% and a test loss of 6.10. The MLP with depth 7, width 4000, and spectral norm scaling has a test accuracy of 36.25% and a test loss of 2.63. ResNet34 with no norm scaling had a test accuracy of 70.1% and a test loss of 2.03. ResNet34 with Frobenius norm scaling had a test accuracy of 75.24% and a test loss of 1.46. ResNet34 with spectral norm scaling had a test accuracy of 71.79% and a test loss of 1.78. ResNet50 with no norm scaling had a test accuracy of 73.45% and a test loss of 1.72. ResNet50 with Frobenius norm scaling had a test accuracy of 75.72% and a test loss of 1.40. ResNet50 with spectral norm scaling had a test accuracy of 73.29% and a test loss of 1.63. Full summaries of the changes of these metrics across epochs are plotted below with checkpoints every 10 epochs.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_acc_sum.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Train_loss_sum.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Training accuracy of MLPs. Above right: Training loss of MLPs. Spec refers to models trained with spectral norm scaling, Frob refers to models trained with Frobenius norm scaling, and Baseline refers to models trained with no norm scaling. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_acc_sum.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP_Test_loss_sum.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Testing accuracy of MLPs. Above right: Testing loss of MLPs. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_acc_sum.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Train_loss_sum.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Training accuracy of ResNets. Above right: Training loss of ResNets. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_acc_sum.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res_Test_loss_sum.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Testing accuracy of ResNets. Above right: Testing loss of ResNets. </div> <h2 id="findings">Findings</h2> <h3 id="scaling-effects-on-training-stability">Scaling Effects on Training Stability</h3> <p>One of the most interesting findings of this investigation is the effect of spectral norm scaling on the stability of training. We can see in the figures above that spectral norm scaling has a significant effect on the stability of training for MLPs, but not for ResNets. For MLPs, spectral norm scaling significantly improves the stability of training, as shown by the fact that the training and test loss curves remain close and follow a similar path. This is especially true for the large MLP, where the training and testing loss and accuracy curves maintain a similar relationship for the entire duration of training while the test loss increases and test accuracy plateaus for the other two normalization methods.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_acc.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_bl_loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Testing and training accuracy of Large MLP (depth 7, width 4000) with no norm scaling. Above right: Testing and training loss, same model. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_acc.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_frob_loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Testing and training accuracy of Large MLP with Frobenius norm scaling. Above right: Testing and training loss, same model. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_acc.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP4k_spec_loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Testing and training accuracy of Large MLP with spectral norm scaling. Above right: Testing and training loss, same model. </div> <p>Although the train accuracy when using spectral norm scaling doesn’t get as high as in the other two models, it is an accuracy predictor for test accuracy during the entire training time. Furthermore, it is the only of the methods we tests that continues to decrease test loss for the duration of training, where the other two show signatures of overfitting the data and increasing test loss. This is a very interesting finding because it shows that spectral norm scaling can be used to improve the stability of training for MLPs, which is a very important property for deep learning models. This is especially true for MLPs because they are more prone to overfitting than other model types, so improving the stability of training can help prevent overfitting.</p> <p>We see that this pattern does not hold for ResNets. Rather, it seems that the Frobenius norm scaling method introduces the most stability, but is still not stable as the relationship for spectral norm scaling in MLPs. Similarly, because ResNets rely on convolutions, we do not see issues with overfitting in any of the models. Altough it appears that spectral norm scaling may improve over the baseline stability, the effect is not as noticeable as the effect from Frobenius norm scaling.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_acc.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_bl_loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Testing and training accuracy of ResNet50 with no norm scaling. Above right: Testing and training loss, same model. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_acc.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_frob_loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Testing and training accuracy of ResNet50 with Frobenius norm scaling. Above right: Testing and training loss, same model. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_acc.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res50_spec_loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Above left: Testing and training accuracy of ResNet50 with spectral norm scaling. Above right: Testing and training loss, same model. </div> <p>This is a surprising result considering that spectral normalization was first developed in the context of GANs using convolutional layers for image generation. We will address this disparity in the conclusion.</p> <h3 id="scaling-effects-on-spectral-norms">Scaling Effects on Spectral Norms</h3> <p>While both our spectral norm and Frobenius norm scaling algorithms resulted in consistently lower spectral norm values across all epochs compared to no normalization, spectral norm scaling had far and away the largest effect on enforcing low spectral norm values for weight matrices:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_bl_norms.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_frob_norms.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/MLP2k_spec_norms.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Changes in spectral norm values for each weight matrix in medium MLP (depth 5, width 2000) across epochs. Above left: No norm scaling. Above center: Frobenius norm scaling. Above right: Spectral norm scaling. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_bl_norms.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_frob_norms.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms-1400.webp"/> <img src="/staging/assets/img/2023-12-12-WeightDecaySpecNormEffects/Res34_spec_norms.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Changes in spectral norm values for each weight matrix in ResNet34 across epochs. Darker lines correspond to earlier layers. Above left: No norm scaling. Above center: Frobenius norm scaling. Above right: Spectral norm scaling. </div> <p>Using spectral norm scaling, the spectral norms of both architectures on all layers collapse to values significantly lower than those seen when using Frobenius norm scaling or no norm scaling. The average spectral norm values at the penultimate epoch (199) using spectral norm scaling is 0.8; Frobenius norm scaling is 7.8; and no normalization is 35.4 on the width 2000, depth 5 MLP architecture.</p> <p>It is also interesting that spectral norms are very similar across layers in later epochs when using spectral norm scaling, but the same is not true for the other two experiments: the average standard deviation in spectral norm values across all layers for the last 100 epochs using spectral norm scaling is ~0.02; Frobenius norm scaling is ~3.7; and no normalization is ~18.4 on the width 2000, depth 5 MLP architecture.</p> <p>While it may seem obvious that spectral norm scaling would do the best job at encouraging low spectral norm values, this was not evidently the case. While we subtract the best rank one approximation, thus decreasing the spectral norm, the new spectral norm does not necessarily become the target value, as it is possible that the second largest singular value is larger than our target spectral norm. It seemed possible that merely subtracting a rank one matrix would fail to completely curb spectral norm blow up or do it with this level of success. These results show that not only does our method do it successfully, but does it much more so than Frobenius norm scaling. What’s more, the results generalize across wildly different architectures: we see rapid convergence to low singular values in both the ResNet and MLP case roughly around the same epoch.</p> <h2 id="conclusion">Conclusion</h2> <p>One drawback of our method was the significant increase in training times of our models. Compared to the time it took to train the baseline and Frobenius norm scaling implementations, the spectral norm implementations took between ~400% to ~1,500% longer to train. In order to address this in the future we will implement an adaptive power iteration that stops once the singular vectors converge to a certain threshold. This will allow us to reduce the number of power iterations needed to calculate the singular values, thus reducing the training time.</p> <p>An interesting fold in our results was the difference between stability effects in the MLP and ResNet cases. We see that spectral norm scaling has a significant effect on the stability of training for MLPs, but not for ResNets. This is a surprising result considering that spectral normalization was first developed in the context of convolutional layers for image generation. We believe that this may stem from one of two reasons. The first is that we had to reduce the dimensionality of the convolutional matrices in order to use the power iteration algorithm. Although this allowed us to efficiently calculate the values we needed, it may not have been an accurate reflection of the matrix singular vectors. One route to address this in the future is to try initializing the spectral norm target values based solely on the input and output channel sizes, rather than the full size of the inputs and outputs. The second reason is that the convolutional layers in ResNets are not as prone to overfitting as the fully connected layers in MLPs, so the stability effects of spectral norm scaling would not be as noticeable. However, we still see an effect of Frobenius norm scaling, so this may be a matter of mathematical properties of the convolutional layers that we have not yet explored.</p> <p>We may see most desired effects on singular values in spectral norm scaling because subtracting the best rank one approximation of the weight matrix does not influence other singular values nor the outer products of their singular vectors. When we view the singular value decomposition as the sum of outer products of singular vectors scaled by singular values, we can see that we only regularize one term in this sum. This may prevent a single outer product from dominating the linear transformation, especially preventing overfitting in MLPs where overfitting tends to be an issue. This is not true of Frobenius normalization, as we scale the entire matrix.</p> <p>Overall, our results show that spectral norm scaling is a very effective method for stabilizing training in MLPs and enforcing low spectral norm values in MLPs and ResNets. This shows that spectral norm scaling may be a feasible and generalizable method for stabilizing training in a variety of conditions beyond GANs. Furthermore, we were able to achieve this without the use of a penalty in the loss function, achieving the same effect as a penalty without the negative effects. This is especially important because penalties in the loss function can cause issues with convergence and numerical stability alongside enforcing low rank, which we avoid by using our Norm Scaling algorithm. We beleive our results show great potential for further rigorous qauntitative research on the spectral view of weight normalization. We hope that our Norm Scaling algorithm will be used as a baseline for investigating spectral normalization algorithms that are both computationally efficient and effective at stabilizing training alongside enforcing low spectral norm values.</p> <p>All of our training code can be found in this <a href="https://github.com/phess2/SpecNorm/tree/main">GitHub Repository</a>.</p> <hr/> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-WeightDecaySpecNormEffects.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/adaptive-controller-graph-eom/index.html b/blog/2023/adaptive-controller-graph-eom/index.html
new file mode 100644
index 00000000..b9ba180d
--- /dev/null
+++ b/blog/2023/adaptive-controller-graph-eom/index.html
@@ -0,0 +1,140 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Adaptive Controller with Neural Net Equations of Motion for High-DOF Robots | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="This project aims to develop an adaptive control mechanism using a graph neural network to approximate the equations of motion (EoM) for high-degree-of-freedom (DOF) robotic arms bypassing the need for symbolic EoM to build an adaptive controller."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/adaptive-controller-graph-eom/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Adaptive Controller with Neural Net Equations of Motion for High-DOF Robots",
+      "description": "This project aims to develop an adaptive control mechanism using a graph neural network to approximate the equations of motion (EoM) for high-degree-of-freedom (DOF) robotic arms bypassing the need for symbolic EoM to build an adaptive controller.",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Thanh Nguyen",
+          "authorURL": "https://zicez.github.io",
+          "affiliations": [
+            {
+              "name": "MIT Mechanical Engineering Department",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Adaptive Controller with Neural Net Equations of Motion for High-DOF Robots</h1> <p>This project aims to develop an adaptive control mechanism using a graph neural network to approximate the equations of motion (EoM) for high-degree-of-freedom (DOF) robotic arms bypassing the need for symbolic EoM to build an adaptive controller.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background-and-related-work">Background and Related Work</a></div> <div><a href="#experiments-and-results">Experiments and Results</a></div> <ul> <li><a href="#generating-training-data">Generating Training Data</a></li> <li><a href="#attempt-1-graph-neural-networks">Attempt 1 - Graph Neural Networks</a></li> <li><a href="#attempt-2-lstm">Attempt 2 - LSTM</a></li> <li><a href="#attempt-3-transformer">Attempt 3 - Transformer</a></li> <li><a href="#final-attempt-physics-informed-structure-transformer">Final Attempt - Physics Informed Structure Transformer</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Adaptive controllers are integral to modern robotic arms, enabling robots to adjust to dynamic environments and internal variations such as actuator wear, manufacturing tolerances, or payload changes. At the heart of such controllers is the formulation of the robot’s Equations of Motion (EoM), typically expressed in the form:</p> <p>The standard symbolic form of EoM is represented as:</p> \[M(q)q'' + C(q, q') = T(q) + Bu\] <p>where:</p> <ul> <li>( M(q) ) is the mass matrix</li> <li>( C(q, q’) ) represents Coriolis and centripetal forces</li> <li>( T(q) ) depicts gravitational torques</li> <li>( B ) is the input transformation matrix</li> <li>( u ) denotes control input</li> <li>( q, q’ ) are the joint angle state variables and their derivatives, respectively.</li> </ul> <p>The symbolic complexity of the EoM increases considerably for robots with a high Degree of Freedom (DOF), due to the analytical resolution of the Lagrangian or Hamiltonian dynamics required. While these equations can be derived algorithmically, the computational burden is significant, and the resulting symbolic equations are extensively lengthy. To illustrate, consider the EoM for a 7-DoF Panda Emika Franka robot arm <a href="https://github.com/marcocognetti/FrankaEmikaPandaDynModel/tree/master/matlab/dyn_model_panda">(link)</a>. The code that determines the EoM is extraordinarily verbose.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/panda-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/panda.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The aim of this project is to bypass the need for an explicit symbolic articulation of the EoM by formulating a neural network representation. With an accurately modeled neural network, it could serve as a foundational element in the development of an adaptive controller. The goal is for the controller to adapt a robotic arm’s physical parameters based on calibration sequences and to estimate the mass and inertia matrix of unfamiliar payloads.</p> <p>Aside from symbolic representation, the EoM can also be computed numerically at each operating point using the Recursive Inertia Matrix Method <d-cite key="featherstone2008rigid"></d-cite> , which has a computational complexity of ( O(n^3) ), where ( n ) is the number of joints in the rigid body. Substituting this computation-heavy method with a neural network, we can potentially calculate the forward dynamics in linear time, albeit with a trade-off in memory usage.</p> <h2 id="background-and-related-work">Background and Related Work</h2> <p>Before we delve into neural net architecture, let’s take a look closer at our problem and how it’s solved right now. To come up with the symbolic equation for the EOM, we use Lagrangian Mechanics in which we compute the Potential, U, and Kinectic Energy, T, of our system.</p> <p>\(L = T - U\) \(\frac{d}{dt} \left( \frac{\partial L}{\partial \dot{q}_i} \right) - \frac{\partial L}{\partial q_i} = u_i\)</p> <p>Quick describing of how it turns in into the manipulator equations. Working through these equations, a pattern emerge in which you can groups the equation as the manipulator equations.</p> \[M(q)q'' + C(q, q') = T(q) + Bu\] <p>This method works well when the degree of freedom in the system is low. It provides much insight on how the dynamics of the system work. For example, the kinetic energy can be represented as:</p> \[T = \frac{1}{2} \dot{q}^T M(q) \dot{q}\] <p>Highlighting that ( M ) is symmetric and positive definite. However, as introduced earlier, this method scales poorly with complexity in higher DOF systems.</p> <p>However, as shown in the introduction, when this method is used for a 7 DOF system, the resulting equation is extraordinarily complex.</p> <p>Bhatoo et al. <d-cite key="bhattoo2022learning"></d-cite> introduced a graph neural network to represent the potential and kinetic energy of rope systems—a high DOF system—by segmenting the system into short segments. Each segment was then treated as a node in the graph neural network. Although they didn’t derive the forward dynamics using the Lagrangian formulation, the prospect of representing serially linked robot arms with graph neural networks was indicated as feasible.</p> <p>The other approach to create the manipulator equation is to numerically calculate it at each operating point. There are two versions of this equation, the inverse dynamics and the forward dynamics version. In the inverse dynamics formulation, we essentially calculate \(M(q)q'' + C(q, q') - T(q) = Bu\)</p> <p>Giving a particular state of the robot and a desired acceleration, what is the required torque. The inverse dynamics formulation can be computed with the Recursive Newton-Euler Algorithm with a O(n) complexity where n is the number of joints <d-cite key="featherstone2008rigid"></d-cite> . The key idea for this algorithm is that the motion of a body is directly influence by the previous link. It’s essentially a dynamic programming algorithm in which you can store the motion of one body and then apply to the next body. This suggests that a directed graph neural net is sufficient to represent our model.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/linked_motion.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>While very efficient to calculate, the inverse dynamics is not as useful as the forward dynamics version if the end goal is to create an adaptive controller. The forward dynamics is the model that describes what is the accelerations of the system based on current state and torque input.</p> \[q'' = M(q) \ (- C(q, q') + T(q) - Bu)\] <p>This formulation is more useful for adaptive controller as we can compared predicted acceleration and actual acceleration. Use their difference as a loss and to compute the gradient from the model parameters. The problem with the forward dynamics problem is that it requires a O(n^3) computation for a serially linked robot arm (the mass matrix inversion must be done). The algorithm for Forward Dynamics is called Inertia Matrix Method <d-cite key="featherstone2008rigid"></d-cite> . One physical intuition that can be glean from the algorithm is that reaction input torques propagate down chain. Once again, this indicates that there is a one way information flow from one link to the next. Given that this version is more computationally expensive, it would be more valuable to tackle with a neural net representation as well compared to the much faster inverse dynamics problem.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/reaction_forces.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="experiments-and-results">Experiments and Results</h2> <h3 id="generating-training-data">Generating Training Data</h3> <p>Utilizing numerical methods implemented in MATLAB, we generated a large volume of training data, spanning the full operational space of the robot arm. We based our robot arm model on realistic parameters from the publicly available data of the Emika Franka Panda, comprising a total of 10 links, seven revolute joints, and two fixed joints. After disregarding the base link, we have a model with 10 parameters for each link (mass, center of mass as a 1x3 vector, and the symmetric inertia matrix flattened into a 1x6 vector) and joint properties (angle, angular velocity, angular acceleration, and torque).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/randomConfig.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We simulated the arm moving from one random configuration to another—marked in the image above by an X — recording states, torques, and accelerations during transitions. To introduce variability, we applied realistic perturbations to the physical properties of each link after every 100 recorded motion paths. In total, we accumulated 250,000 data points</p> <h3 id="attempt-1-graph-neural-net">Attempt 1: Graph Neural Net</h3> <p>As inspired by Bhatoo, we rearrange the dataset as a Graph Dataset based on the PyTorch Geometric Library. Each node contains the 10 physical property parameters, angle, angular velocity, and torque input. In total, each node has 13 features. The output is set to be angular acceleration of the 7 joints (1x7 vector). As for the edge index, the graph is defined to be directed, either information flows from the last node to the first or the first node to the last node. This is inspired by the physical intuition that forces propagate sequentially from one body to the next, and that motion with respect to the global coordinate frame also sequential depended on the previous body link.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/node-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/node-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/node-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/node.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We applied nine iterations of the Graph Convolution Layer, ensuring information flow from one end of the arm to the other.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/GNN.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Despite extensive parameter tuning, learning rate adjustments, and the application of various schedulers, the loss showed no convergence. Potential reasons for this include the complexity in capturing temporal dependencies and the possible oversimplification of force propagation through the links using graph convolutions. The complexity of 9 different GCNV also increases complexity needlessly.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/gnn_plot.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="attempt-2-lstm">Attempt 2: LSTM</h3> <p>Reevaluating the necessity for graph neural networks, we considered the inherent sequential nature of the information flow in our system. There are no branches in the structure of a serially linked robot arm; hence, an LSTM, which excels in capturing long-range dependencies in sequence data, seemed appropriate. The input sequence now reflects the node properties from the previous attempt, and our LSTM architecture is defined as follows:</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">class</span> <span class="nc">RobotLSTM</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">input_size</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="n">hidden_size2</span><span class="p">,</span> <span class="n">output_size</span><span class="p">,</span> <span class="n">num_layers</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
+        <span class="nf">super</span><span class="p">(</span><span class="n">RobotLSTM</span><span class="p">,</span> <span class="n">self</span><span class="p">).</span><span class="nf">__init__</span><span class="p">()</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">hidden_size</span> <span class="o">=</span> <span class="n">hidden_size</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">num_layers</span> <span class="o">=</span> <span class="n">num_layers</span>
+
+        <span class="c1"># LSTM Layer
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">lstm</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">LSTM</span><span class="p">(</span><span class="n">input_size</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="n">num_layers</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+
+        <span class="c1"># Fully connected layers
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">l1</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_size</span><span class="p">,</span> <span class="n">hidden_size2</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">l2</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_size2</span><span class="p">,</span> <span class="n">hidden_size2</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">l3</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">hidden_size2</span><span class="p">,</span> <span class="n">output_size</span><span class="p">)</span>
+
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="c1"># Initializing hidden state and cell state for LSTM
+</span>        <span class="n">h0</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">zeros</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="n">num_layers</span><span class="p">,</span> <span class="n">x</span><span class="p">.</span><span class="nf">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="n">self</span><span class="p">.</span><span class="n">hidden_size</span><span class="p">).</span><span class="nf">to</span><span class="p">(</span><span class="n">x</span><span class="p">.</span><span class="n">device</span><span class="p">)</span>
+        <span class="n">c0</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">zeros</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="n">num_layers</span><span class="p">,</span> <span class="n">x</span><span class="p">.</span><span class="nf">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="n">self</span><span class="p">.</span><span class="n">hidden_size</span><span class="p">).</span><span class="nf">to</span><span class="p">(</span><span class="n">x</span><span class="p">.</span><span class="n">device</span><span class="p">)</span>
+
+        <span class="c1"># Forward propagate the LSTM
+</span>        <span class="n">out</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">lstm</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="p">(</span><span class="n">h0</span><span class="p">,</span> <span class="n">c0</span><span class="p">))</span>
+
+        <span class="c1"># Pass the output of the last time step to the classifier
+</span>        <span class="n">out</span> <span class="o">=</span> <span class="n">out</span><span class="p">[:,</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="p">:]</span>  <span class="c1"># We are interested in the last timestep
+</span>        <span class="n">out</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="nf">relu</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">l1</span><span class="p">(</span><span class="n">out</span><span class="p">))</span>
+        <span class="n">out</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="nf">relu</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="nf">l2</span><span class="p">(</span><span class="n">out</span><span class="p">))</span>
+        <span class="n">out</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">l3</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="n">out</span>
+</code></pre></div></div> <p>Despite the theoretically simpler representation of the system, the results were still not satisfactory, with stabilization and convergence being unachievable.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/lstm.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="attempt-3-transformer">Attempt 3: Transformer</h3> <p>With LSTM and GNN strategies failing to deliver conclusive results, we pivoted to the more general-purpose Transformer architecture. This paradigm shifts focus from a strictly sequential data flow to a structure capable of interpreting the relationships between all links through its attention mechanism. Note, we also use a sinusoidal positional encoder to maintain the order coherance of the robot arm.</p> <p>For the Transformer model, we employ the following architecture, designed to be flexible and adaptable to high DOF systems in future implementations:</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">class</span> <span class="nc">RobotTransformerModel</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">input_dim</span><span class="o">=</span><span class="mi">13</span><span class="p">,</span> <span class="n">d_model</span><span class="o">=</span><span class="mi">24</span><span class="p">,</span> <span class="n">mlp_dim</span><span class="o">=</span><span class="mi">128</span><span class="p">,</span> <span class="n">nhead</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">num_encoder_layers</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">dim_feedforward</span><span class="o">=</span><span class="mi">48</span><span class="p">,</span> <span class="n">output_dim</span><span class="o">=</span><span class="mi">7</span><span class="p">):</span>
+        <span class="nf">super</span><span class="p">().</span><span class="nf">__init__</span><span class="p">()</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">d_model</span> <span class="o">=</span> <span class="n">d_model</span>  <span class="c1"># Store d_model as an instance attribute
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">embedding</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">input_dim</span><span class="p">,</span> <span class="n">d_model</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">pos_encoder</span> <span class="o">=</span> <span class="nc">PositionalEncoding</span><span class="p">(</span><span class="n">d_model</span><span class="p">)</span> <span class="c1"># Sinusoidal positional encoding
+</span>        <span class="c1"># Transformer Encoder Layer
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">transformer_encoder</span> <span class="o">=</span> <span class="nc">Transformer</span><span class="p">(</span>
+            <span class="n">dim</span><span class="o">=</span><span class="n">d_model</span><span class="p">,</span> <span class="n">mlp_dim</span><span class="o">=</span><span class="n">mlp_dim</span><span class="p">,</span> <span class="n">attn_dim</span><span class="o">=</span><span class="n">dim_feedforward</span><span class="p">,</span> <span class="n">num_heads</span><span class="o">=</span><span class="n">nhead</span><span class="p">,</span> <span class="n">num_layers</span><span class="o">=</span><span class="n">num_encoder_layers</span>
+        <span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">output_layer</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Sequential</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="nc">LayerNorm</span><span class="p">(</span><span class="n">d_model</span><span class="p">),</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">d_model</span><span class="p">,</span> <span class="n">output_dim</span><span class="p">))</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">criterion</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">MSELoss</span><span class="p">()</span>
+
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">src</span><span class="p">):</span>
+        <span class="n">src</span> <span class="o">=</span> <span class="n">src</span><span class="p">.</span><span class="nf">permute</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>  <span class="c1"># Shape: [seq_len, batch, feature]
+</span>        <span class="n">src</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">embedding</span><span class="p">(</span><span class="n">src</span><span class="p">)</span> <span class="o">*</span> <span class="n">math</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="n">d_model</span><span class="p">)</span>
+        <span class="n">src</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">pos_encoder</span><span class="p">(</span><span class="n">src</span><span class="p">)</span>
+        <span class="n">output</span><span class="p">,</span> <span class="n">alphas</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">transformer_encoder</span><span class="p">(</span><span class="n">src</span><span class="p">,</span> <span class="n">attn_mask</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">return_attn</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+        <span class="n">output</span> <span class="o">=</span> <span class="n">output</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="p">:,</span> <span class="p">:]</span>  <span class="c1"># use the output of the first token (similar to BERT's [CLS] token)
+</span>        <span class="k">return</span> <span class="n">self</span><span class="p">.</span><span class="nf">output_layer</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>
+</code></pre></div></div> <p>However, even with this advanced architecture, convergence remained elusive, indicating that further restructuring of the problem was required.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/transformer_generic.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="final-attempt-physics-informed-structured-transformer">Final Attempt: Physics Informed Structured Transformer</h3> <p>As nothing seems to be working, we now simplify our problem statement to gain some insights that could then we applied to the larger problem later. First, we now reformulate the serially linked robot arm dynamics into a double pendulum system with simplified parameters—each link defined by its length and a point mass at the end. The state variables in this reduced complexity scenario are simply the two link angles and their angular velocities.</p> \[\mathbf{M}(q)\ddot{q} + \mathbf{C}(q, \dot{q})\dot{q} = \mathbf{T}_g(q) + \mathbf{B}u\] <p>where</p> \[\mathbf{M} = \begin{bmatrix} (m_1 + m_2)l_1^2 + m_2l_2^2 + 2m_2l_1l_2\cos(q_1) &amp; m_2l_2^2 + m_2l_1l_2\cos(q_2) \\ m_2l_2^2 + m_2l_1l_2\cos(q_2) &amp; m_2l_2^2 \end{bmatrix},\] \[\mathbf{C} = \begin{bmatrix} 0 &amp; -m_2l_1l_2(2\dot{q}_1 + \dot{q}_2)\sin(q_2) \\ \frac{1}{2}m_2l_1l_2(2\dot{q}_1 + \dot{q}_2)\sin(q_2) &amp; -\frac{1}{2}m_2l_1l_2\dot{q}_1\sin(q_2) \end{bmatrix},\] \[\mathbf{T}_g = -g \begin{bmatrix} (m_1+m_2)l_1\sin(q_1) + m_2l_2\sin(q_1+q_2) \\ m_2l_2\sin(q_1+q_2) \end{bmatrix},\] \[\mathbf{B} = \begin{bmatrix} 1 &amp; 0 \\ 0 &amp; 1 \end{bmatrix}.\] <p>In this simpler problem statement, we switch to solving the Inverse Dynamics problem instead which numerically has a computational complexity of O(n). We assume that there is less complexity in this representation (a complete guess), so the neural net doesn’t have to work as hard compared to the Forward Dynamics problem which has computational complexity of O(n^3).</p> <p>However, the task now focuses on the inverse dynamics with a reduced computational complexity of ( O(n) ), given that ( M(q) ) can be linearly separated from ( C ) and ( T_g ) and knowing that ( M(q) ) is symmetric and positive definite.</p> <p>For this, two Transformer neural networks were created, one for ( M(q)\ddot{q} ) and another for ( C(q, \dot{q})\dot{q} - T_g(q) ). Both models were trained separately with their respective datasets before being combined to model the complete manipulator equation. We can uniquely generate training data that only incite this mode by setting gravity and angular velocity to zero to get only M(q)*ddq = u.</p> <p>The architectures for these Transformers were revised to employ a Physics Informed approach, ensuring the adherence to known physical laws:</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">class</span> <span class="nc">RobotTransformerModelH</span><span class="p">(</span><span class="n">pl</span><span class="p">.</span><span class="n">LightningModule</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">input_dim</span> <span class="o">=</span> <span class="mi">3</span><span class="p">,</span> <span class="n">d_model</span> <span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">mlp_dim</span><span class="o">=</span><span class="mi">128</span><span class="p">,</span> <span class="n">nhead</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">num_encoder_layers</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">dim_feedforward</span><span class="o">=</span><span class="mi">48</span><span class="p">):</span>
+        <span class="nf">super</span><span class="p">().</span><span class="nf">__init__</span><span class="p">()</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">d_model</span> <span class="o">=</span> <span class="n">d_model</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">embedding</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">input_dim</span><span class="p">,</span> <span class="n">d_model</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">pos_encoder</span> <span class="o">=</span> <span class="nc">PositionalEncoding</span><span class="p">(</span><span class="n">d_model</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">transformer_encoder</span> <span class="o">=</span> <span class="nc">Transformer</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="n">d_model</span><span class="p">,</span> <span class="n">attn_dim</span><span class="o">=</span><span class="n">dim_feedforward</span><span class="p">,</span> <span class="n">mlp_dim</span><span class="o">=</span><span class="n">mlp_dim</span><span class="p">,</span> <span class="n">num_heads</span><span class="o">=</span><span class="n">nhead</span><span class="p">,</span> <span class="n">num_layers</span><span class="o">=</span><span class="n">num_encoder_layers</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">output_layer</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Sequential</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="nc">LayerNorm</span><span class="p">(</span><span class="n">d_model</span><span class="p">),</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">d_model</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>  <span class="c1"># Output is a 1x3 vector
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">criterion</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">MSELoss</span><span class="p">()</span>
+
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">src</span><span class="p">,</span> <span class="n">ddq</span><span class="p">):</span>
+        <span class="n">src</span> <span class="o">=</span> <span class="n">src</span><span class="p">.</span><span class="nf">permute</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>  <span class="c1"># Reshape for transformer
+</span>        <span class="n">src</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">embedding</span><span class="p">(</span><span class="n">src</span><span class="p">)</span> <span class="o">*</span> <span class="n">math</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="n">d_model</span><span class="p">)</span>
+        <span class="n">src</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">pos_encoder</span><span class="p">(</span><span class="n">src</span><span class="p">)</span>
+        <span class="n">output</span><span class="p">,</span> <span class="n">alphas</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">transformer_encoder</span><span class="p">(</span><span class="n">src</span><span class="p">,</span> <span class="n">attn_mask</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">return_attn</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+        <span class="n">output</span> <span class="o">=</span> <span class="n">output</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="p">:,</span> <span class="p">:]</span> 
+        <span class="n">output</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">output_layer</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>
+
+        <span class="c1"># Create a batch of symmetric 2x2 matrices from the batch of 1x3 output vectors
+</span>        <span class="n">batch_size</span> <span class="o">=</span> <span class="n">output</span><span class="p">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+        <span class="n">symmetric_matrices</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">zeros</span><span class="p">((</span><span class="n">batch_size</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="n">self</span><span class="p">.</span><span class="n">device</span><span class="p">)</span>
+        <span class="n">symmetric_matrices</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">output</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span>
+        <span class="n">symmetric_matrices</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">output</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">]</span>
+        <span class="n">symmetric_matrices</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">symmetric_matrices</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">output</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">]</span>
+
+        <span class="n">transformed_ddq</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">matmul</span><span class="p">(</span><span class="n">symmetric_matrices</span><span class="p">,</span> <span class="n">ddq</span><span class="p">.</span><span class="nf">unsqueeze</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)).</span><span class="nf">squeeze</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="n">transformed_ddq</span>
+</code></pre></div></div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/H-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/H-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/H-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/H.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Then we create a separate transformer neural net for C(q, dq)*dq - Tg(q). Similarly, we can generate training data that only exictes this mode by setting ddq = 0.</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">class</span> <span class="nc">RobotTransformerModelC</span><span class="p">(</span><span class="n">pl</span><span class="p">.</span><span class="n">LightningModule</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">input_dim</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> <span class="n">d_model</span> <span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">mlp_dim</span><span class="o">=</span><span class="mi">128</span><span class="p">,</span> <span class="n">nhead</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">num_encoder_layers</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">dim_feedforward</span><span class="o">=</span><span class="mi">48</span><span class="p">):</span>
+        <span class="nf">super</span><span class="p">().</span><span class="nf">__init__</span><span class="p">()</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">d_model</span> <span class="o">=</span> <span class="n">d_model</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">embedding</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">input_dim</span><span class="p">,</span> <span class="n">d_model</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">pos_encoder</span> <span class="o">=</span> <span class="nc">PositionalEncoding</span><span class="p">(</span><span class="n">d_model</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">transformer_encoder</span> <span class="o">=</span> <span class="nc">Transformer</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="n">d_model</span><span class="p">,</span> <span class="n">attn_dim</span><span class="o">=</span><span class="n">dim_feedforward</span><span class="p">,</span> <span class="n">mlp_dim</span><span class="o">=</span><span class="n">mlp_dim</span><span class="p">,</span> <span class="n">num_heads</span><span class="o">=</span><span class="n">nhead</span><span class="p">,</span> <span class="n">num_layers</span><span class="o">=</span><span class="n">num_encoder_layers</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">output_layer</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Sequential</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="nc">LayerNorm</span><span class="p">(</span><span class="n">d_model</span><span class="p">),</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">d_model</span><span class="p">,</span> <span class="mi">2</span><span class="p">))</span>  <span class="c1"># Output is a 1x2 vector
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">criterion</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">MSELoss</span><span class="p">()</span>
+
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">src</span><span class="p">):</span>
+        <span class="n">src</span> <span class="o">=</span> <span class="n">src</span><span class="p">.</span><span class="nf">permute</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>  <span class="c1"># Reshape for transformer
+</span>        <span class="n">src</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">embedding</span><span class="p">(</span><span class="n">src</span><span class="p">)</span> <span class="o">*</span> <span class="n">math</span><span class="p">.</span><span class="nf">sqrt</span><span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="n">d_model</span><span class="p">)</span>
+        <span class="n">src</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">pos_encoder</span><span class="p">(</span><span class="n">src</span><span class="p">)</span>
+        <span class="n">output</span><span class="p">,</span> <span class="n">alphas</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">transformer_encoder</span><span class="p">(</span><span class="n">src</span><span class="p">,</span> <span class="n">attn_mask</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">return_attn</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+        <span class="n">output</span> <span class="o">=</span> <span class="n">output</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="p">:,</span> <span class="p">:]</span> 
+        <span class="n">output</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">output_layer</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="n">output</span>
+</code></pre></div></div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/C-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/C-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/C-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/C.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We picked Transformer as it’s more general compared to LSTM or GNN. Furthermore, it can easily be extended to high DOF system later on by just working with a longer input sequence. After training these two models independtly with their own training data set, we combined the two pretrained model togeher to recreate the full manipulator equation with a complete dataset.</p> <p>lass CombinedRobotTransformerModel(pl.LightningModule): def <strong>init</strong>(self, config_H, config_C): super().<strong>init</strong>() # Initialize the two models self.model_H = RobotTransformerModelH(<strong>config_H) self.model_C = RobotTransformerModelC(</strong>config_C) self.criterion = nn.MSELoss() # Additional layers or attributes can be added here if needed</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>def load_pretrained_weights(self, path_H, path_C):
+    # Load the pre-trained weights into each model
+    self.model_H.load_state_dict(torch.load(path_H))
+    self.model_C.load_state_dict(torch.load(path_C))
+
+def forward(self, src_H, ddq, src_C):
+    # Forward pass for each model
+    output_H = self.model_H(src_H, ddq)
+    output_C = self.model_C(src_C)
+    # Combine the outputs from both models
+    combined_output = output_H + output_C  
+    return combined_output
+</code></pre></div></div> <p>This modular approach, informed by the physical structure of the dynamics, resulted in improved convergence and an adaptive controller with the capability to generalize well to unseen conditions of the double pendulum.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/combined-1400.webp"/> <img src="/staging/assets/img/2023-11-09-adaptive-controller-graph-eom/combined.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="conclusion">Conclusion</h2> <p>Through this journey of building and testing various neural network architectures to approximate the equations of motion for high-DOF robotic systems, it becomes evident that while cutting-edge machine learning tools hold promise, their effectiveness is tied to the physical realities of the problems they aim to solve. Success in neural net modeling involves really understanding the data and problem you are trying to solve. Here we managed to make a little head way in modeling the EOM of a 2 DOF system by mimicking the structure of the analytical solution.</p> <p>For future work, we should take the success in the 2 DOF system and push it for higher DOF with more complex parameters. We can generate data that can isolate specific motion modes of the model that can be used to train sections of the neural net at a time. By then training all the modes independently, we can stitch together the whole structure for the whole dataset.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-adaptive-controller-graph-eom.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/alleviating-catastrophic-forgetting-in-classification-tasks-through-strategic-filter-dropout/index.html b/blog/2023/alleviating-catastrophic-forgetting-in-classification-tasks-through-strategic-filter-dropout/index.html
new file mode 100644
index 00000000..b64b7921
--- /dev/null
+++ b/blog/2023/alleviating-catastrophic-forgetting-in-classification-tasks-through-strategic-filter-dropout/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>A Method for Alleviating Catastrophic Forgetting With Explainability | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Using various explainability metrics to target, we freeze layers in CNNs to enable continual learning."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/alleviating-catastrophic-forgetting-in-classification-tasks-through-strategic-filter-dropout/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "A Method for Alleviating Catastrophic Forgetting With Explainability",
+      "description": "Using various explainability metrics to target, we freeze layers in CNNs to enable continual learning.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Pieter Feenstra",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Nicholas Dow",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>A Method for Alleviating Catastrophic Forgetting With Explainability</h1> <p>Using various explainability metrics to target, we freeze layers in CNNs to enable continual learning.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-works">Related Works</a></div> <ul> <li><a href="#weight-changing-regularization">Weight Changing Regularization</a></li> <li><a href="#architectural-changes">Architectural Changes</a></li> <li><a href="#explanability-metrics">Explanability Metrics</a></li> </ul><div><a href="#methods">Methods</a></div> <ul> <li><a href="#model-type">Model Type</a></li> <li><a href="#saliency-mapping">Saliency Mapping</a></li> <li><a href="#filter-visualization">Filter Visualization</a></li> <li><a href="#training-procedure-and-dataset-selection">Training Procedure and Dataset Selection</a></li> </ul><div><a href="#results">Results</a></div> <div><a href="#discussion">Discussion</a></div> <ul> <li><a href="#takeaways">Takeaways</a></li> <li><a href="#limitations">Limitations</a></li> </ul> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>With recent advancements in deep learning, the intelligence of computers is quickly rivaling that of humans. GPT-4, with significant size and data, is able to score in the 90th percentile of the BAR, 88th percentile of the LSAT, and the 92nd percentile on the SAT <d-cite key="openai2023gpt4"></d-cite>. In dermatology, sophisticated computer vision models have outperformed trained professionals in diagnosing skin diseases and cancer <d-cite key="jeong2023deep"></d-cite>. Despite this substantial computational advantage, neural networks notably lag behind humans in their capacity for continuous learning, a skill essential for any intelligent entity. Particularly, they suffer from catastrophic forgetting, a phenomenon in which the learning of a new objective significantly degrades performance on prior tasks.</p> <p>The human brain is able to protect itself from conflicting information and reductions in performance on previous tasks using complex mechanisms involving synaptic plasticity <d-cite key="hadsell2020embracing"></d-cite>. In essence, the brain is able to self regulate the strength of its connections, allowing for neurons to become less activated according to their memory and relevance. This ability has been attributed for the unmatched ability to learn in humans, which has allowed for humans to show improvement in skill on nearly any motor task given training, while still remembering previous information <d-cite key="green2008exercising"></d-cite>. This, then, is highly desirable for neural networks.</p> <p>In contrast to the human’s ability to learn, neural networks significantly alter their parameters when learning a new task. In effect, the network’s understanding of previous tasks is overwritten. This poses a great barrier to the creation of artificial general intelligences, which ultimately depend on continual, life-long learning <d-cite key="silver2011machine"></d-cite>.</p> <p>With the rapid increase in size and complexity of models, the field of model explainability and the desire to understand exactly what models are doing has quickly grown. Specifically in the field of computer vision, effort has been made to understand how models make decisions, what information leads to this decision, and how they learn what to observe <d-cite key="haar2023analysis"></d-cite>. Methods such as saliency mapping, which displays the importance of aspects of an input image to predicting a class, filter visualization, which finds the most activating features for a given filter, and gradient class activation maps, which visualizes the gradients flowing into the final convolutional layer, have all significantly contributed towards the understanding of how models make decisions <d-cite key="adebayo2018sanity"></d-cite><d-cite key="erhan2009visualizing"></d-cite><d-cite key="selvaraju2017grad"></d-cite>.</p> <p>We propose to make use of these explainability methods for the intelligent freezing of filters of a convolutional neural network. Specifically, we use saliency maps and filter visualizations to consider what a model is observing to classify an image, and then decipher which filters are most strongly contributing to this. In this paper, we contribute the following: 1. We create a method for the ranking of importance of filters in a convolutional neural network. We expand and combine upon previous works in model explainability to understand which filters are most strongly contributing to positive predictions. 2. We create a method for the freezing of filters of a convolutional neural network according to these rankings. We do this by first training on one task, freezing filters according to importance, then retraining the same model on a novel task. In doing this, we both corroborate our ranking system and identify a new strategy for alleviating catastrophic forgetting.</p> <h1 id="related-works">Related Works</h1> <p>Continual learning and its core problem of catastrophic forgetting has gotten recent attention in deep learning research. It’s easy to understand why the goal of having a model that can adapt to new data without being completely re-trained is sought after, and there have been many approaches to the problem of aiding the model’s ‘memory’ of past tasks. Solutions range from attaching a significance attribute to certain weights in the model that regularizes change introduced by the new data to explicitly freezing weights via different metrics of the weights’ performance.</p> <h2 id="weight-changing-regularization">Weight Changing Regularization</h2> <p>Elastic Weight Consolidation(EWC) approaches the problem of catastrophic forgetting by adding a ‘stiffness’ to the weights of previous tasks dependent on an approximation of the importance they had to previous task performance. The authors of ‘Overcoming catastrophic forgetting in neural networks’ <d-cite key="Kirkpatrick_2017"></d-cite>. explain EWC as maximizing a posterior of the parameters over the entire dataset, and then splitting up the posterior into a loss over the new task and a posterior of the parameters over the old task. They model the posterior of the old data as a quadratic difference of the original parameters and the current ones multiplied by the Fisher information matrix, so minimizing this results in preventing parameters from changing too much from being predictable from the old task’s data. The authors of the original paper showed that EWC was effective at preventing CNN from forgetting how to classify the MNIST dataset and helping an RL model maintain performance in Atari games. However, EWC is an additional loss metric that must be calculated for each back-propogation and for each previous task; it’s also linear in the size of the output and therefore is prohibitive for high dimensional data.</p> <p>Another technique that attempts to use a regularizing factor to slow the retraining of old task parameters is explicitly computing a importance metric for each neuron in the network<d-cite key="zenke2017continual"></d-cite>. The authors denote this method as “Synaptic Intelligence” as they drew their inspiration from the complex adaptation of synapses in the brain contrasted with the simple uni-scalar representation of neurons in a MLP network, and by allowing the network to account for the importance of they could help a neural network model the human behavior of continual learning. The metric they calculate as importance is based on 1) how much a parameter contributed to the reduction of loss over the entirety of training and 2) how much a parameter changed during training. They compared their performance to EWC and standard SGD on the MNIST dataset and found similar results to EWC while beating naive SGD as the number of consecutive tasks increased.</p> <h2 id="architectural-changes">Architectural Changes</h2> <p>A drastically different approach that a couple papers investigated was preventing interference between training runs by completely freezing the weights in parts of the model after completing a task’s training. The papers here differentiate themselves via the method they decide to freeze certain weights and layers. The earliest such paper we found was detailing a method called Packnet <d-cite key="mallya2018packnet"></d-cite>, where the weights they selected to keep via freezing was purely based on a certain percentage of the weights with the highest magnitude. They also made the decision to completely wipe the weights they did not freeze and then do a couple epochs of training on the model that was a mix of frozen and pruned weights. Their strategy achieved performance roughly equal to networks jointly trained on all the data at once and outperformed the naive strategy of simply retraining, validating a version of the freezing strategy.</p> <p>Instead of simply measuring the magnitude of weights to decide what layers or specific weights to freeze, authors of a paper on catastrophic forgetting explainability paper use a custom metric to find a layer that scores highest on their metric and subsequently freeze all the layers prior to that layer <d-cite key="nguyen2022explaining"></d-cite> Their metric is an analysis of the difference in activation maps of a layer in the model pre- and post- training on the new task. They posit that this difference in activation is a measurement of how much a layer has forgotten how to activate in response to an input. Their reasoning for freezing the layers prior to the layer most changed by the new sample set is that the errors that induce catastrophic forgetting propagate throughout the network, so identifying the layer with the sharpest drop-off indicates that prior layers are to blame. This seemingly builds off an earlier paper <d-cite key="nguyen2020dissecting"></d-cite> that uses a similar activation map difference scheme to delicate layers that change more easily during training and instead directly freezes those fragile layers rather than those prior. In both papers, their results for this technique are an improvement over their ‘fine-tuning’ baseline, but the more recent paper’s results were not that differentiated from just selecting a layer to freeze before training a new task.</p> <h2 id="explanability-metrics">Explanability Metrics</h2> <p>There exists many other explainability metrics with which one can target layers prior to training on a new task to try to prevent interference, an interesting one being saliency maps. Saliency maps attempt to capture the importance of features of the input on the output of a deep neural network. In the domain of CNNs, this can be thought of both the pixels and larger features, such as a window on a car, that contribute to a correct classification; saliency maps are analogous to trying to map out what parts of an image a model uses to make correct identification. A model of saliency maps we felt compelled enough to use in our project is that of <d-cite key="srinivas2019fullgradient"></d-cite>, where their full-gradient approach creates saliency maps from the gradients of each layer. This strategy encapsulates the importance of both the inputs and the impact of neurons throughout the network on the saliency map. As parts of a neural network might suffer from varying degrees of catastrophic forgetting, being able to identify the saliency of individual neurons is a desirable quality in choosing a metric that explains catastrophic forgetting.</p> <h1 id="methods">Methods</h1> <h2 id="model-type">Model Type</h2> <p>We tested our method using VGG16. VGG16 is a deep convolutional neural network that has achieved impressive results on the ImageNet classification challenge, with a top-1 accuracy of 72% <d-cite key="simonyan2014very"></d-cite>. Its sequential nature lends itself well to explainability methods like saliency maps. Further, it is relatively quick to train, even given the constraints of Google Colab. All of these attributes were highly desirable, as it allowed for rapid iteration for hyperparameter tuning, computation of saliency maps and filter visualizations, and a direct way to compare the viability of our freezing method through image classification accuracy. To ensure that the model did not have inference on any tasks prior to training, we randomly initialized the parameters.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/vgg16.webp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/vgg16.webp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/vgg16.webp-1400.webp"/> <img src="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/vgg16.webp" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 1: Schematic of VGG16 Architecture </div> <h2 id="saliency-mapping">Saliency Mapping</h2> <p>The computation of saliency maps is grounded in the principles of backpropagation. It follows a multi-staged procedure which uses gradients to consider the impact of each pixel in an image. First, it computes the partial derivatives of the target output with respect to individual segments of the input image. Then, it uses backpropagation to propagate error signals back to the input layer. It does this in order to identify the impact of pixels. It considers pixels with larger signals to have the greatest impact on the decision-making process. There are a bountiful number of papers which propose different improvements on the original saliency map. When selecting a procedure, we identified two key features necessary for a useful visualization. We believed that a saliency map must have a full explanation of why a model made its prediction. Secondly, we believed that rather than considering each individual pixel, it clusters pixels together to consider importance. After testing, we ultimately used full-gradient saliency maps <d-cite key="srinivas2019full"></d-cite>. Code for this method is publicly available on the GitHub created by the authors of this paper, fullgrad-saliency.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_raw-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_raw-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_raw-1400.webp"/> <img src="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_raw.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_saliency-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_saliency-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_saliency-1400.webp"/> <img src="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/mug_saliency.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 2: Raw image and saliency map of a mug. </div> <p>The essence of full-gradient saliency maps lines up directly with the key features that we identified. To begin, it defines importance in the input image as a change in the feature resulting in change in model output. It seeks to illustrate a full answer for the model’s output. To this end, it considers both global and local importance of features in the input image, which results in a method which both weighs the importance of each pixel individually, but also considers the importance of different grouping of pixels.</p> <h2 id="filter-visualization">Filter Visualization</h2> <p>In order to compute what different filters are looking at, we made use of the Convolutional Neural Network Visualizations GitHub repository, which is a useful library that has implementations of many popular explainability methods <d-cite key="uozbulak_pytorch_vis_2022"></d-cite>. Specifically, we used the implementation of a filter visualization method from the paper “Visualizing Higher-Layer Features of a Deep Network”, which uses backpropagation to maximize the activation of a given filter <d-cite key="erhan2009visualizing"></d-cite>. With this, we can compute exactly what a filter is attempting to observe in an image. This method provides two different options for creating filter visualizations - one with gradient hooks, and one without.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz-1400.webp"/> <img src="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_feature_viz.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_raw-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_raw-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_raw-1400.webp"/> <img src="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_raw.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_saliency-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_saliency-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_saliency-1400.webp"/> <img src="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/jar_saliency.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 3: Heatmaps of Feature Visualization(left), Actual Image(middle), Saliency Map(right) </div> <h2 id="training-procedure-and-dataset-selection">Training Procedure and Dataset Selection</h2> <p>We created two datasets from CIFAR-100 <d-cite key="erhan2009visualizing"></d-cite>. We randomly selected 20 classes out of the total 100 and then divided these groups into two. We filtered the images from CIFAR-100 so that only images of those classes were in our datasets. We did this to ensure that the tasks the model was attempting to learn were of equal difficulty. We chose CIFAR-100 because we believed it was of adequate difficulty for the VGG16 architecture. We normalized the data and augmented it with random horizontal flips and random croppings. For the first instance of training, we trained using stochastic gradient descent for 10 epochs with a learning rate of 1E-3. We did not implement any regularization or early stopping, as it was not necessary given training losses and testing losses. After this training, we used the described methods for calculating saliency maps and filter visualizations. For each class in the first dataset, we calculated the most useful filters by comparing saliency maps for the class to all filters. We compared these through multiple metrics, including mean squared error and Pearson correlation. To account for the fact that different layers of convolutional neural networks capture different types of information, we froze some percent of filters in each individual layer rather than the entire model. We left this percent as a hyperparameter. To ensure fairness for each task, the second instance of training followed the same exact procedure as the first - the optimizer was stochastic gradient descent, we trained for 10 epochs, and used a learning rate of 1E-3.</p> <h1 id="results">Results</h1> <p>For the sake of hyperparameter tuning and evaluating different strategies, we froze the datasets to be the first and second ten images of CIFAR-100. We sought to check how the number of filters we freeze changes performance across datasets, which metric is most useful in comparing saliency images to filter visualizations, and how viable this method is as compared to training on a single, larger dataset. Prior to the second round of training, the test accuracy on the first dataset was .4566 and the test accuracy on the second dataset was .1322.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table1-1400.webp"/> <img src="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The impact of freezing varying numbers of filters is in line with expectation - the more filters you freeze, the less inference you can gain, but also the more you will remember your previous task. In the table above, we can observe that with 25% of the filters frozen, we perform the best on dataset 2, with an accuracy of 39.2%, but the worst on dataset 1, with an accuracy of 20.7%. In contrast, when 75% of the filters are frozen, we maintain an accuracy of 38.4%, but do not learn about the new task, with an accuracy of 25.7%.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table2-1400.webp"/> <img src="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We found that mean squared error was the greatest metric for the comparison of saliency maps and filter visualizations, recording the highest average accuracy and also retaining much more information about the first dataset. From the table, we can see that when freezing 50% of filters in the network and selecting using mean squared error, we do roughly ten percentage points worse on the first dataset, but gain nearly double this loss on the second dataset. When compared to the randomly frozen method, it performs significantly better on the first dataset. This suggests that the filters that we froze are actually more important for correct predictions than the average. It makes sense that Pearson correlation is not particularly useful for comparison - it is not able to take into account the spatial information that is crucial for this comparison.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table3-1400.webp"/> <img src="/staging/assets/img/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout/table3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Finally, we found that training tasks sequentially and using the freezing method with a comparison metric of mean squared error slightly outperforms training the model on a larger, combined dataset at once. With this method, the model performed five percentage points better on predicting classes in both the first and second dataset. It is important to note that the accuracy reported for the model trained on the combined dataset is just the average accuracy over all of the classes, not necessarily split by the datasets. Still, to ensure fairness, the training procedure used for the combined dataset was the same as for the sequential training procedure, but trained for twenty epochs at once rather than ten epochs at two different times. This result implies that intelligently freezing filters of a neural network can be a viable strategy for overcoming catastrophic forgetting, even if just in a smaller setting.</p> <h1 id="discussion">Discussion</h1> <h2 id="takeaways">Takeaways</h2> <p>Through using convolutional neural network explainability methods such as saliency maps and filter visualizations, we were able to observe key insights into the relevance of different filters in VGG16. Quantitatively, we were able to measure this by freezing these layers and observing how well performance persisted after training on a new task. We found that freezing filters according to the similarity of their visualizations to saliency maps retains significantly more inference on a previous task, suggesting that these filters were more relevant to the previous task. By freezing these weights, we were also able to outperform simply training on a larger dataset. We believe that more research should be directed towards applying explainability methods to achieve the objective of continual learning. Although there has been previous work in the past, these often rely on stopping catastrophic forgetting once it has been observed, rather than determining which parts of the network are too integral to a task to be retrained.</p> <h2 id="limitations">Limitations</h2> <p>Because we are completely freezing weights, it is unlikely that this method could be generalizable to an arbitrary number of tasks. Future works could explore the integration of elastic weight consolidation into our pipeline rather than stopping change entirely. Doing class by class freezing of filters also introduces a cap to the number of tasks that this method could generalize to and the number of classes that can be predicted in each task. During our research, we concluded that this approach was better than attempting to combine saliency maps, but future work could also explore how to effectively combine saliency maps to capture important aspects of each class. Further, this method relies on the comparability of saliency maps and filter visualizations. While it makes intuitive sense that a filter is more relevant if it is seeking the parts of an input that are most important for a correct prediction, it is not as simple as directly comparing the two. While we attempt to alleviate some of this issue by doing layer-by-layer freezing, future work could certainly explore better metrics for choosing filters, especially given the stark difference in performance when using something as simple as mean squared error compared to Pearson correlation. Finally, the computational overhead of the method in combination with the limitations of Google Colab resulted in an inability to train on high-resolution images and use larger models. We believe that using high-resolution images would significantly benefit the feasibility of the method, as saliency maps are much more clearly defined. We again leave this to future work, as we are unable to explore this path.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-9-alleviating-catastrophic-forgetting%20in-classification-tasks-through-strategic-filter-dropout.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/attention-regularization/index.html b/blog/2023/attention-regularization/index.html
new file mode 100644
index 00000000..fa10317f
--- /dev/null
+++ b/blog/2023/attention-regularization/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Regularization Techniques for Attention Layers in Transformer Models | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Attention layers are an integral part of the success of transformer models, but can also lead to overfitting on parts of input data when there is limited training data. Therefore, researchers have proposed methods to regularize attention layers to reduce overfitting and increase generalizability. This blog will analyze popular methods and explore novel approaches to regularization in attention layers."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/attention-regularization/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Regularization Techniques for Attention Layers in Transformer Models",
+      "description": "Attention layers are an integral part of the success of transformer models, but can also lead to overfitting on parts of input data when there is limited training data. Therefore, researchers have proposed methods to regularize attention layers to reduce overfitting and increase generalizability. This blog will analyze popular methods and explore novel approaches to regularization in attention layers.",
+      "published": "November 6, 2023",
+      "authors": [
+        {
+          "author": "Jamison Meindl",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Regularization Techniques for Attention Layers in Transformer Models</h1> <p>Attention layers are an integral part of the success of transformer models, but can also lead to overfitting on parts of input data when there is limited training data. Therefore, researchers have proposed methods to regularize attention layers to reduce overfitting and increase generalizability. This blog will analyze popular methods and explore novel approaches to regularization in attention layers.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background">Background</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#results">Results</a></div> <div><a href="#further-research">Further Research</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Transformer models are exceptionally popular and successful at completing many tasks. However, they can overfit to their training data if they are not given enough data to generalize. Frequently, part of the reason for overfitting is the overfitting of a self-attention layer, which highlights important tensors in the model. However, if there is not enough data, the attention layer can overfit to the training data and highlight some tensors too much. Therefore, researchers have proposed methods of regularizing attention layers. Adding regularization can be complex and there have been many different approaches to solving this issue, from simply smoothing attention layers to encouraging multi-headed models to approach different solutions. Therefore, there are differences in the effects of different regularization methods and some might perform better in different circumstances. There does not seem to be a standard approach to dealing with this form of regularization and while many authors have claimed their regularizations have positive effects on training, there are few comparisons of regularization methods. In this study, we will analyze previous work on regularizing self-attention layers and propose new regularization techniques to identify the advantages and disadvantages of differing models.</p> <h2 id="background">Background</h2> <p>There are many proposed regularization strategies for self-attention layers. We implement and utilize many of the more popular strategies in this study while also drawing inspiration from other methods in proposed methodologies. However, we could not find comparisons across regularization methods or implementations of these methods publicly available. Therefore, we implemented previously proposed strategies and proposed new regularization strategies based on methods seen in fully connected neural networks. The methods used fall into the following three categories. We will explain the exact methods and implementations used for each of these three categories in the methodology section. They represent a solid overview of the self-attention regularization space and contain the most popular methods currently in use.</p> <h3 id="dropout-based-methods">Dropout Based Methods</h3> <p>Dropout based methods involve randomly setting a specified fraction of the input units to zero during training time, which helps in preventing overfitting <d-cite key="srivastava2014dropout"></d-cite>. This prevents the model from having all the information during training and therefore forces the model to generalize during training.</p> <h3 id="weight-smoothing-methods">Weight Smoothing Methods</h3> <p>Weight smoothing methods aim to regularize the self-attention layer by modifying the weights such that the attention weight are closer to the uniform distribution and do not overly emphasis specific inputs. This helps prevent overfitting by not allowing the model to only use a few inputs <d-cite key="lohrenz2023relaxed"></d-cite>.</p> <h3 id="cross-head-methods">Cross Head Methods</h3> <p>Cross head methods involve techniques that operate across different attention heads, aiming to diversify the learned representations and prevent redundancy <d-cite key="li2018multi"></d-cite>. Therefore, the goal is to prevent each head from being similar to other heads.</p> <h2 id="methodology">Methodology</h2> <h3 id="overall-architecture">Overall Architecture</h3> <p>We begin by implementing and benchmarking a vision transformer with no regularization. We had previously implemented a transformer model as part of 6.s898 problem set 3, so we used this as basis for our models. This model follows an architecture stemming from An Image Is Worth 16X16 Words <d-cite key="dosovitskiy2020image"></d-cite>. This transformer was easily modifiable and relatively simple and so it served as a good basis for our adjustments. The framework of the architecture goes as follows.</p> <ol> <li>Take an image and split it into patches of specified size.</li> <li>Embed these patches and add a positional encoding to their embedding.</li> <li>Treat these embeddings as a sequence input to a transformer model.</li> <li>Use a transformer model with multi-head self-attention to transform the input into some specified space.</li> <li>Use this output to classify the image.</li> </ol> <p>For this specific model, we use a 6 layer transformer with 5 self-attention heads and a patch size of 4. We will be focusing on the multi-head self-attention phase of the transformer model. The following is a diagram of the overall architecture of a vision transformer.</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/6S898_Fall_2023_homeworks_ps3.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Diagram of Vision Transformer Model <d-cite key="dosovitskiy2020image"></d-cite> </div> <h3 id="data">Data</h3> <p>We use the CIFAR-10 and CIFAR-100 datasets for this study <d-cite key="krizhevsky2009learning"></d-cite>. CIFAR-10 consists of 60,000 32x32 color images representing 10 different classes. These classes are airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks. They are evenly distributed, such that there are 6,000 images of each class. CIFAR-100 uses the same format, but instead has 100 evenly distributed classes. We split this data into training and test sets and tested the different forms of regularization. We found that our transformer model with no regularization could easily achieve near-zero error on both sets of training data, but only achieved around 60% in test accuracy for the CIFAR-10 dataset and around 30% accuracy on the CIFAR-100 dataset. Therefore, the model is overfitting to the training data and testing regularization methods on this dataset could help the model generalize more on the test data.</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/cifar_10_example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/cifar_10_example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/cifar_10_example-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/cifar_10_example.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Example of CIFAR-10 Images <d-cite key="krizhevsky2009learning"></d-cite> </div> <h3 id="regularization-methods">Regularization Methods</h3> <p>We tested the following regularization methods for our model. We tested models contained within the three different categories of regularization mentioned in the background above, these being dropout based methods, weight smoothing methods, and cross-head methods.</p> <h4 id="dropout-based-methods-1">Dropout Based Methods</h4> <h5 id="dropcolumn-">DropColumn <d-cite key="zehui2019dropattention"></d-cite>:</h5> <p>This method uses dropout, a common regularization method used in fully connected neural networks, in self-attention layers. This can force the model to generalize better and not rely on specific inputs as much. The authors propose the following methodology to add dropout to the model, which is similar to standard dropout techniques in neural networks. To perform dropout, each column in the attention weight matrix is sampled from a Bernoulli distribution with some probability. We use a dropout ratio of 0.2 for these experiments. We set the sampled columns to zero weight during training. Therefore, we are able to randomly drop columns in the attention weight matrix.</p> <h5 id="drophead-">DropHead <d-cite key="zhou2020scheduled"></d-cite>:</h5> <p>We can also perform dropout on the heads across the multi-head attention layer. With this method, we completely drop heads during training to reduce reliance on particular heads and increase the generalizability of the model. We use a dropout ratio of 0.2 for these experiments. This prevents the model from being dominated by a few attention heads.</p> <h4 id="weight-smoothing-methods-1">Weight Smoothing Methods</h4> <h5 id="relaxed-attention-">Relaxed Attention <d-cite key="lohrenz2023relaxed"></d-cite>:</h5> <p>This method smooths the attention weights in the self-attention layer to reduce overfitting. This helps reduce the magnitude of the highest attention scores. We do this by mixing in the uniform distribution to attention weights during training. We use some parameter $ \color{white} \gamma $ to evaluate different levels of mixing. Therefore, we apply the following function to our self-attention weights.</p> <p>$ \color{white} A[i,j] = (1-\gamma) \times A[i,j] + \gamma \times \frac{1}{T}, \quad \forall i, j \in [0,1,…,T]$</p> <p>We use $ \color{white} \gamma = 0.1 $ for our experiments. This adds a low level of uniformity but prevents the model from only attending upon a small number of tensors during training. Therefore, this should limit the amount of overfitting that is possible.</p> <h5 id="noise-injection">Noise Injection</h5> <p>Noise injection has been used to regularize fully connected neural networks, but we have not found any literature that proposes using noise injection to regularize self-attention layers. We propose two methodologies to add regularization and robustness to our model training. We inject noise into our input embeddings with the following formula.</p> <p>$ \color{white} x_{i,j}^{noised} = x_{i,j}+ \frac{1}{100} * median(x) * N(0,1) $</p> <ol> <li> <p>Overall Noise Injection: The first methodology involves simply adding noise to the input during training. We do this by adding Guassian random noise to the input before calculating self-attention weights in each layer of the transformer.</p> </li> <li> <p>Individual Head Noise Injection: Our second proposed methodology takes advantage of the multi-headed transformer design. We add different Gaussian random noise to each head, such that the heads will receive different inputs. Therefore, the model must become more robust to different inputs.</p> </li> </ol> <h4 id="cross-head-methods-1">Cross-Head Methods</h4> <h5 id="decorrelation">Decorrelation</h5> <p>We propose adding a decorrelation term to our loss function. The goal of this loss is the reward differences across attention heads. We begin by calculating the self-attention weights for all of the attention heads. We then compute the pairwise dot products of each head’s attention weights. This will increase the loss if there are heads that are highly correlated. This will cause the heads of the network to differ from the other heads in the network and hopefully generalize better. Therefore, we use the following loss term.</p> <p>$ \color{white} \text{Added Loss} = \sum_{i={0,…,H},j={i+1,…,H}} \frac{\text{sum}((\Lambda_i^T \Lambda_j)^2)}{\text{Number of elements in }\Lambda_i^T \Lambda_j}$, where H is the number of heads and $ \color{white} \Lambda_i$ is the ith attention head weights.</p> <p>This method is inspired by another method, multi-head attention with disagreement regularization <d-cite key="li2018multi"></d-cite>. However, the disagreement regularization method relies on calculating more differences than just the attention weight matrices, which is out the of scope of these experiments.</p> <h5 id="normalization">Normalization</h5> <p>We propose adding the 2-norm of all elements in the attention weight matrix to the loss function to limit the emphasis of individual inputs. Therefore, this will smooth the weights and reward more uniform predictions. This should reduce overfitting and make the model more generalizable. We calculate this norm using $ \color{white} \frac{\text{torch.linalg.norm(attention weights)}}{\text{number of elements in attention weights}} $. This computes the 2-norm of all elements across attention heads and adds more loss to weights that emphasize specific inputs more than others. Therefore, this should add smoothing to the weights.</p> <h3 id="training">Training</h3> <p>We train each model for 25 epochs on the full training set with a batch size of 256. We use the AdamW optimizer, with a learning rate of 0.001. We use the following parameters for our vision transformer.</p> <table> <thead> <tr> <th>Parameter</th> <th>n_channels</th> <th>nout</th> <th>img_size</th> <th>patch_size</th> <th>dim</th> <th>attn_dim</th> <th>mlp_dim</th> <th>num_heads</th> <th>num_layers</th> </tr> </thead> <tbody> <tr> <td>CIFAR-10</td> <td>3</td> <td>10</td> <td>32</td> <td>4</td> <td>128</td> <td>64</td> <td>128</td> <td>5</td> <td>6</td> </tr> <tr> <td>CIFAR-100</td> <td>3</td> <td>100</td> <td>32</td> <td>4</td> <td>128</td> <td>64</td> <td>128</td> <td>5</td> <td>6</td> </tr> </tbody> </table> <p>We train each model individually on both datasets.</p> <h2 id="results">Results</h2> <h3 id="cifar-10">CIFAR-10</h3> <p>We begin by analyzing the training results on the CIFAR-10 dataset.</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_loss-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/training_loss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Training Loss on the CIFAR-10 Dataset </div> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_accuracy-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_accuracy-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_accuracy-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/training_accuracy.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Training Accuracy on the CIFAR-10 Dataset </div> <p>We see that most of the models, except for the dropout based models, achieve near zero error and perfect accuracy on the test set. Therefore, we see that the dropout term is stopping the model from perfectly memorizing the dataset but all other regularization techniques are not forcing the model to change the weights enough to prevent perfect accuracy.</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_loss-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/test_loss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Test Loss on the CIFAR-10 Dataset </div> <p>Looking at the test results, the two dropout models have much lower loss achieved on the test dataset. The rest of the models have similar losses on the test dataset.</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_accuracy-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_accuracy-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_accuracy-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/test_accuracy.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Test Accuracy on the CIFAR-10 Dataset </div> <p>We see that the two dropout methods also have higher accuracy than the model without regularization. However, the decorrelation model has the highest test accuracy. Overall, the test dataset results are significantly lower than state of the art and a more advanced model may be needed to achieve better performance.</p> <h3 id="cifar-100">CIFAR-100</h3> <p>We move on to training and testing the models on the CIFAR-100 dataset. This dataset has more classes and therefore fewer examples of each class. Therefore, the model finds it more difficult to generalize on the test dataset.</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_loss100-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_loss100-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_loss100-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/training_loss100.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Training Loss on the CIFAR-100 Dataset </div> <p>Again, we see that all methods except the dropout based methods achieve near-zero error.</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_accuracy100-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_accuracy100-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/training_accuracy100-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/training_accuracy100.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Training Accuracy on the CIFAR-100 Dataset </div> <p>We see similar results to the CIFAR-10 dataset in training. The two dropout methods are unable to achieve perfect loss and accuracy but all other methods are able to. This includes the methods with added loss, that being the normalization method and the decorrelation method. This will depend on the parameters of the model and these models would have higher loss if we used more emphasis on the added loss.</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_loss100-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_loss100-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_loss100-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/test_loss100.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Test Loss on the CIFAR-100 Dataset </div> <p>We see that the two dropout methods have significantly lower loss on the test dataset, with all other methods performing similarly.</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_accuracy100-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_accuracy100-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-06-attention-regularization/test_accuracy100-1400.webp"/> <img src="/staging/assets/img/2023-11-06-attention-regularization/test_accuracy100.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Test Accuracy on the CIFAR-100 Dataset </div> <p>We again see consistent results with the CIFAR-10 dataset. The two dropout methods and decorrelation improve the accuracy on the test set, while the others are about the same as without normalization. In this case, the drophead method performs the best.</p> <h2 id="further-research">Further Research</h2> <p>Further research is needed to further improve the generalizability of this transformer architecture for these datasets. The model still has overfitting issues, even with high regularization and so more research with different architectures or regularization methods is needed to improve the study. Further comparison of regularization methods on alternative datasets and types of data, such as text, would also be valuable to look at.</p> <h2 id="conclusion">Conclusion</h2> <p>Regularization is an important tool to reduce overfitting and improve the generalizability of a model. The results show that adding various forms of regularization can improve the results of a model, but our implementations did not cause dramatic change to the ability of the model to generalize to the test set. Most of the models still had a very large gap between their training accuracy and test accuracy. However, we did see notable improvements for both the dropout models and the decorrelation model. The dropout models were the only models that added regularization such that the model could not perfectly memorize the training set. Therefore, their training accuracy was significantly lower but they also had higher test accuracy. Additionally, the decorrelation model was also successful. While the model followed a similar pattern during training to the model without regularization, the test accuracy was generally higher, suggesting the added error did force the model to learn different parameters. Therefore, based on these results, adding regularization can be helpful in improving the generalizability of transformer models, especially when they have limited data. The other methods, such as the noise based methods, normalization, and relaxation did not appear to have a significant effect on training or test outputs. It is likely that alternative parameters or architectures are needed to realize their effect. Lastly, while this analysis was only completed using vision transformers, different datasets or network architectures may have significantly different results. Therefore, these other regularization methods may be more successful in other contexts. However, these tests prove that there are circumstances in which regularization can have a beneficial effect on transformer performance and is therefore a worthwhile experiment when dealing with overfitting transformers.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-06-attention-regularization.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/autodecoders/index.html b/blog/2023/autodecoders/index.html
new file mode 100644
index 00000000..858d86c2
--- /dev/null
+++ b/blog/2023/autodecoders/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>To Encode or Not To Encode: The Case for the Encoder-free Autodecoder Architecture | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="While the traditional autoencoder architecture consists of an encoder and a decoder to compress and reconstruct information with only the most prominent features, some recent work have begun to utilize an alternate framework, the autodecoder, in specific applications in the field of representation learning. Skipping the encoder network altogether and learning latent codes directly as parameters, we aim to compare the two architectures on practical reconstruction tasks as well as dive into the theory of autodecoders and why they work, along with certain novel features that they bring."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/autodecoders/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "To Encode or Not To Encode: The Case for the Encoder-free Autodecoder Architecture",
+      "description": "While the traditional autoencoder architecture consists of an encoder and a decoder to compress and reconstruct information with only the most prominent features, some recent work have begun to utilize an alternate framework, the autodecoder, in specific applications in the field of representation learning. Skipping the encoder network altogether and learning latent codes directly as parameters, we aim to compare the two architectures on practical reconstruction tasks as well as dive into the theory of autodecoders and why they work, along with certain novel features that they bring.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Seok Kim",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Massachusetts Institute of Technology",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Alexis Huang",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Massachusetts Institute of Technology",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>To Encode or Not To Encode: The Case for the Encoder-free Autodecoder Architecture</h1> <p>While the traditional autoencoder architecture consists of an encoder and a decoder to compress and reconstruct information with only the most prominent features, some recent work have begun to utilize an alternate framework, the autodecoder, in specific applications in the field of representation learning. Skipping the encoder network altogether and learning latent codes directly as parameters, we aim to compare the two architectures on practical reconstruction tasks as well as dive into the theory of autodecoders and why they work, along with certain novel features that they bring.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#experimentation">Experimentation</a></div> <div><a href="#conclusion">Conclusion</a></div> <div><a href="#references">References</a></div> </nav> </d-contents> <h2 id="autodecoders">Autodecoders</h2> <h3 id="introduction">Introduction</h3> <p>Autoencoders have been a part of the neural network landscape for decades, first proposed by LeCun in 1987. Today, many variants of the autoencoder architecture exist as successful applications in different fields, including computer vision and natural language processing, and the variational autoencoder remains among the forefront of generative modeling. Autoencoders are neural networks trained to reconstruct their input as their output via compression through dimensionality reduction, accomplishing this task with the use of an encoder-decoder network.</p> <p>Autoencoders comprise of the encoder network, which takes a data sample input and translates it to a lower-dimensional latent representation consisting of only the most necessary features, and the decoder network, which attempts to reconstruct the original data from this encoding. By learning a compressed, distributed representation of the data, the latent space learned by autoencoders is usable for a plethora of downstream tasks.</p> <p>With traditional autoencoders, both the encoder and decoder are trained, but for certain applications— particularly generative tasks— only the decoder is utilized for inference. Because the itself encoder is not used at test time, training an encoder may not be an effective use of computational resources; the autodecoder is an alternative architecture that operates without an encoder network and brings some novel benefits.</p> <p>Rather than using the encoder to encode an input into a low-dimensional latent code, each sample in the training set begins with a randomly initialized latent code, and the latent codes and decoder weights are updated jointly during training time. For inference on new data, the latent vector for a given sample is then also randomly initialized and updated through an additional optimization loop with the decoder’s frozen weights.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/encoderdecoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/encoderdecoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/encoderdecoder-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/encoderdecoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The architecture for an autoencoder (top) compared to that of an autodecoder (bottom).</figcaption> </figure> <p>Are explicit encoders necessary for image reconstruction? What are the unique benefits that come from using decoder-only architectures? One interesting application of autodecoders is the ability to reconstruct complete samples from partial inputs. The main focus of our research revolved around testing this ability, answering the question of how much of a sample is required for a complete reconstruction using an autodecoder given an expressive latent space, and comparing its performance to that of an autoencoder.</p> <p>Furthermore, we discuss additional applications in various fields that other research has accomplished in part due to the utilization of the autodecoder architecture over the traditional autoencoder, with a focus on the beneficial properties that we explore in our experiments, including partial reconstructions.</p> <h3 id="related-work">Related Work</h3> <p>Different literature have utilized autodecoder frameworks in the past along with providing rationale for their usage, mainly for tasks related to reconstruction or generative modeling through representation learning. However, none have provided standalone examples of their use, something we aim to accomplish in this blog.</p> <p>The Generative Latent Optimization framework was introduced by Bojanowski et al. (2019) as an alternative to the adversarial training protocol of GANs. Instead of producing the latent representation with a parametric encoder, the representation is learned freely in a non-parametric manner. One noise vector is optimized by minimizing a simple reconstruction loss and is mapped to each image in the dataset.</p> <p>Tang, Sennrich, and Nivre (2019) trained encoder-free neural machine translation (NMT) models in an endeavor to produce more interpretable models. In the encoder-free model, the source was the sum of the word embeddings and the sinusoid embeddings (Vaswani et al., 2017), and the decoder was a transformer or RNN. The models without an encoder produced significantly poorer results; however, the word embeddings produced by encoder-free models were competitive to those produced by the default NMT models.</p> <p>DeepSDF, a learned continuous Signed Distance Function (SDF) representation of a class of shapes, was introduced by Park et al. (2019) as a novel representation for generative 3D modelling. Autodecoder networks were used for learning the shape embeddings, trained with self-reconstruction loss on decoder-only architectures. These autodecoders simultaneously optimized the latent vectors mapping to each data point and the decoder weights through backpropogation. While outperforming previous methods in both space representation and completion tasks, autodecoding was significantly more time consuming during inference because of the explicit need for optimization over the latent vector.</p> <p>Sitzmann et al. (2022) introduced a novel neural scene representation called Light Field Networks (LFNs), reducing the time and memory complexity of storing 360-degree light fields and enabling real-time rendering. 3D scenes are individually represented by their individual latent vectors that are obtained by using an autodecoder framework, but it is noted that this may not be the framework that performs the best. The latent parameters and the hypernetwork parameters are both optimized in the training loop using gradient descent; the LFN is conditioned on a single latent variable. Potential applications are noted to include enabling out-of-distribution through combining LFNs with local conditioning.</p> <p>Scene Representation Networks (SRNs) represent scenes as continuous functions without knowledge of depth or shape, allowing for generalization and applications including few-shot reconstruction. SRNs, introduced by Sitzmann, Zollhöfer and Wetzstein (2019), represent both the geometry and appearance of a scene, and are able to accomplish tasks such as novel view synthesis and shape interpolation from unsupervised training on sets of 2D images. An autodecoder framework is used to find the latent vectors that characterize the different shapes and appearance properties of scenes.</p> <h3 id="methodology">Methodology</h3> <h4 id="traditional-autoencoder">Traditional Autoencoder</h4> <p>To establish a baseline, we first trained a convolutional autoencoder network containing both an encoder and decoder on a version of the MNIST dataset normalized and padded to contain 32x32 sized images. For our autoencoder architecture, we utilized convolutional layers with ReLU nonlinearity.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoderloss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoderloss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoderloss-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoderloss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The training and validation losses from the training loop for the autoencoder.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The latent space learned by the autoencoder, color-coded by digit label and visualized through a 2-dimensional t-SNE plot. We see the expected result, with consistency and separation.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">A sample output from an unseen image after training. We can see that our small convolutional autoencoder does a fairly good job at learning how to compress simple information into a single latent code and decode it into its original form.</figcaption> </figure> <h4 id="autodecoder">Autodecoder</h4> <p>We implemented and trained an autodecoder on the same dataset by creating a convolutional decoder that takes latent codes as an input and transforms them into full images. We utilized transpose convolutions to upscale the images while additionally concatenating normalized coordinates to embed positional information, and also used leaky ReLU layers for nonlinearity.</p> <p>For training, the latent codes for 10,000 images in our training set were randomly initialized. The loss for our autodecoder then included three components: the reconstruction loss; the latent loss, which encourages latent values to be closer to zero in order to encourage a compact latent space; and the L2 weight regularization, which prevents the decoder from overfitting to the training set by encouraging the model weights to be sparse.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/lossfn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/lossfn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/lossfn-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/lossfn.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The loss function used to train the autodecoder. During inference, a custom loss function can be used based on the application.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoderloss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoderloss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoderloss-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autodecoderloss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The training and validation losses from the training loop for the autodecoder. The validation loss has no actual meaning in the autodecoder framework, as new images would have a randomly initialized latent code and so would output nonsense. This loss was included simply to demonstrate this feature.</figcaption> </figure> <p>Below are progressive reconstructions on the training data performed by the autodecoder as it trained and optimized both the decoder weights and the training set’s latent codes. We can observe that the digits’ general forms were learned before the exact shapes, which implies good concentration and consistency of the latent space between digits of the same class.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/progress1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/progress2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/progress3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/progress4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Progressive reconstructions from top to bottom (model outputs compared to ground truth): 1. Decoding a randomly initialized latent code outputs nonsense. 2. The correct digit is reconstructed, implying that the latent space is improving, but the specific shape differs from that of the ground truth image. 3. The output’s shape begins to better match that of the ground truth. 4. The autodecoder and latent code are optimized to be able to effectively reconstruct the ground truth image.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The latent space learned by the autodecoder, also visualized through a 2-dimensional t-SNE plot. When compared to the t-SNE plot representing the latent space learned by the autoencoder, we again see consistency but notice that here the clusters are more compact. While the distance between clusters in t-SNE plots does not have a definite meaning, this could potentially imply that the features of shapes, rather than the shapes themselves, are better learned, as different digits share similar features (curves, straight lines, etc).</figcaption> </figure> <p>Upon training the autodecoder, for inference on a new image we first freeze the decoder weights and then run an additional gradient descent-based optimization loop over a new randomly initialized latent code with reconstruction loss.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Output from the trained autodecoder on a new image from the test set.</figcaption> </figure> <h3 id="experimentation">Experimentation</h3> <p>One benefit of the autodecoder framework is that because we have an additional optimization loop for each input during inference, we are able to do varying pixel-level reconstructions, whereas an autoencoder is designed and trained to reconstruct complete images each time.</p> <p>We demonstrate this feature in our experiments below by applying center masks to our images before autoencoding or decoding.</p> <p>1: We trained a traditional <strong>autoencoder</strong> with generic reconstruction loss, and input an image with a mask in the center. The output is expected, as the autoencoder learned to reconstruct whatever it saw, and so the empty space from the mask is included in the result.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The input image</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The reconstructed image compared to the image without the mask.</figcaption> </figure> <p>2: We trained a traditional <strong>autoencoder</strong> with reconstruction loss without considering a centered square area and input an unmodified image. The output is again expected, as the autoencoder was trained to fully disregard the center area, and so the output is empty in that region.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The area of the images that the autoencoder is trained to learn on.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The model output compared to the original image.</figcaption> </figure> <p>3: We trained an <strong>autodecoder</strong> with generic reconstruction loss, and during the optimization loop for inference we utilized a custom loss function that did not consider the masked area. However, in this case, we are still able to reconstruct the original image to varying levels of success because of the latent space we originally learned through the training loop.</p> <p>Shown below are the areas optimized in the loss functions, along with the decoded output and original image.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_input-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_input-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_input-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/8x8mask_input.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_output-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_output-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_output-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/8x8mask_output.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">An 8x8 mask.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12mask_input-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12mask_input-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12mask_input-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/12x12mask_input.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">A 12x12 mask. Even with significant information about the digit missing, the autodecoder is able to sufficiently reconstruct the ground truth image based on the learned information.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_input-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_input-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_input-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/16x16mask_input.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_output-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_output-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_output-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/16x16mask_output.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">A 16x16 mask.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_input-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_input-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_input-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/20x20mask_input.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_output-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_output-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_output-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/20x20mask_output.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">A 20x20 mask. Although the reconstructed digit is ultimately incorrect, we see that we are able to get very close even with extremely limited inputs.</figcaption> </figure> <p>To analyze and compare the latent spaces learned by both our autoencoder and autodecoder, we additionally perform linear interpolation (with α=0.5) between the embeddings of two images and include their decoded results below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The output of the decoded interpolation of two embeddings from the autoencoder.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The output of the decoded interpolation of two embeddings from the autodecoder.</figcaption> </figure> <p>The autoencoder output was somewhat expected due to the simplistic nature of the MNSIT dataset, and we can see a merge of the two images with equal features of both.</p> <p>More interesting was the output for the autodecoder, which simply returned an image consisting of the pixel average of both images. Some hypotheses for this result include:</p> <ul> <li>The shape of the latent space for the learned autodecoder potentially being one that does not pair well with linear interpolation, causing linear interpolations in latent space to be equivalent to those in the data space. Meanwhile, the shape of the latent space for the autoencoder might better match a Gaussian, which translates to effective nonlinear interpolations in the data space, which is desired.</li> <li>The inductive bias from the existence of the encoder architecture allowing for better interpolatability.</li> </ul> <h3 id="conclusion">Conclusion</h3> <h4 id="discussion">Discussion</h4> <p>While autoencoders (and variations such as VAEs) have traditionally been the standard architectures for representation learning, we explore an alternate autodecoder architecture, in which the encoder is excluded and individual latent codes are learned along with the decoder. We investigated the necessity of an explicit encoder in representation learning tasks and found that even without an encoder network, we are able to learn latent representations of input data through the optimization of randomly initialized latent codes during the training loop. Through this alternate dimensionality reduction process, we showed that we were still able to learn a consistent latent space on a multi-class dataset. Furthermore, we showed that through the use of an additional optimization loop for inference rather than learned encoder weights, the autodecoder can learn to reconstruct incomplete observations through pixel-level optimizations.</p> <p>The autodecoder has the potential for many further applications beyond the scope of the research and experiments introduced in this blog. As an example, the task of prior-based 3D scene reconstruction in the field of computer vision, in which novel views of a 3D scene can be generated from a limited number of static images of that scene along with their camera poses, utilizes the autodecoder architecture to guarantee better out-of-distribution views. This task involves the use of camera pose as an additional source of information in addition to input images, something that the encoder itself is unable to integrate when encoding images, leading to the valuable scene representation information being left out. Meanwhile, because the latent code itself is learned in an autodecoder, it is able to use the camera pose to effectively generalize to novel viewpoints. This serves as just one of several examples of the autodecoder being able to carry out tasks normally gatekept by the limitations of the encoder.</p> <h4 id="limitations">Limitations</h4> <p>Some limitations of the encoder-free architecture include certain fallbacks discussed in our experiments, including the difficulties in generating satisfactory novel outputs through linear interpolation of the latent space. Furthermore, while the existence of a secondary optimization loop during inference comes with interesting properties such as being able to define unique loss functions for different purposes, this can be more computationally or temporally costly than running inputs on a trained encoder for inference. Regardless, as much of the research around this topic has emerged only within the past several years, it can be expected that autodecoders and their unique properties will continue to emerge, evolve, and find use in novel applications in the years to come.</p> <h3 id="references">References</h3> <p>Robin Baumann. Introduction to neural fields, 2022.</p> <p>Piotr Bojanowski, Armand Joulin, David Lopez-Paz, and Arthur Szlam. Optimizing the latent space of generative networks. arXiv preprint arXiv:1707.05776, 2017.</p> <p>Jeong Joon Park, Peter Florence, Julian Straub, Richard Newcombe, and Steven Lovegrove. Deepsdf: Learning continuous signed distance functions for shape representation. In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pages 165–174, 2019.</p> <p>Vincent Sitzmann, Semon Rezchikov, Bill Freeman, Josh Tenenbaum, and Fredo Durand. Light field networks: Neural scene representations with single-evaluation rendering. Advances in Neural Information Processing Systems, 34:19313–19325, 2021.</p> <p>Vincent Sitzmann, Michael Zollhöfer, and Gordon Wetzstein. Scene representation networks: Continuous 3d-structure-aware neural scene representations. Advances in Neural Information Processing Systems, 32, 2019.</p> <p>Gongbo Tang, Rico Sennrich, and Joakim Nivre. Encoders help you disambiguate word senses in neural machine translation. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics, 2019.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-autodecoders.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/combining-modalities-for-better-representation-learning/index.html b/blog/2023/combining-modalities-for-better-representation-learning/index.html
new file mode 100644
index 00000000..38226f8f
--- /dev/null
+++ b/blog/2023/combining-modalities-for-better-representation-learning/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Combining Modalities for Better Molecular Representation Learning | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/combining-modalities-for-better-representation-learning/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Combining Modalities for Better Molecular Representation Learning",
+      "description": "",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Andrei Tyrin",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Combining Modalities for Better Molecular Representation Learning</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <ul> <li><a href="#importance-of-molecular-representation-learning">Importance of molecular representation learning</a></li> <li><a href="#different-ways-to-represent-molecules">Different ways to represent molecules</a></li> </ul><div><a href="#methods">Methods</a></div> <ul> <li><a href="#data">Data</a></li> <li><a href="#models">Models</a></li> <li><a href="#training">Training</a></li> <li><a href="#evaluation">Evaluation</a></li> </ul><div><a href="#analysis">Analysis</a></div> <ul> <li><a href="#comparison-of-different-models">Comparison of different models</a></li> <li><a href="#nearest-neighbors-analysis">Nearest neighbors analysis</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> <ul> <li><a href="#results-of-modalities-mixing">Results of modalities mixing</a></li> <li><a href="#future-work">Future work</a></li> </ul> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <h3 id="importance-of-molecular-representation-learning">Importance of molecular representation learning</h3> <p>Molecular Representation Learning (MRL) is one of the most important tasks in molecular machine learning, drug design, and cheminformatics. <d-cite key="mol_rep_review"></d-cite> It is central to addressing several key challenges in molecular sciences, including high-quality representation learning for molecular property prediction, <d-cite key="mol_prop_pred"></d-cite> predicting organic reaction outcomes, <d-cite key="reaction_pred"></d-cite> retrosynthesis planning, <d-cite key="retrosynthesis"></d-cite> and generative modeling. <d-cite key="generative_review"></d-cite> Excelling in these domains is essential for the development of new drugs, materials, and catalysts.</p> <h3 id="different-ways-to-represent-molecules">Different ways to represent molecules</h3> <p>The challenge of learning molecular representations is more complex than in fields like computer vision or natural language processing. This complexity stems from the variety of methods available for encoding molecular structures and the assumptions inherent to each representation. Primarily, there are four ways to represent molecules:</p> <ol> <li><strong>Fingerprints</strong>. One of the oldest ways to represent molecules in Quantitative structure–activity relationship (QSAR) modelling. Molecular fingerprints are binary vectors that encode the presence or absence of certain substructures in the molecule. Fingerprints were one of the first ways to get the initial representation of molecules in machine learning problems. <d-cite key="fingerprints_pred"></d-cite></li> <li><strong>String representation</strong> (e.g. SMILES strings). This approach involves encoding molecular fragments as tokens to form a string. This initial molecules encoding is widely used in generative molecular modeling. <d-cite key="lang_complex_distr"></d-cite></li> <li><strong>2-D graph</strong>. A popular and intuitive approach where molecules are represented as graphs, with atoms and bonds corresponding to nodes and edges, respectively. With advancements in Graph Neural Networks (GNNs) architectures,<d-cite key="gnns_review"></d-cite> this format is extensively used in molecular property prediction. <d-cite key="chemprop"></d-cite></li> <li><strong>3-D graph</strong>. The most detailed representation, which includes spatial information about atoms and bonds in addition to the graph structure. Although obtaining 3-D graph representations is challenging, models based on this approach often demonstrate superior performance. Various modeling techniques are applied to 3-D graphs, including invariant and equivariant GNNs. <d-cite key="schnet,equiv_gnn"></d-cite></li> </ol> <p>Given these diverse approaches, this work aims to explore various molecular representations and their potential combination for enhanced performance in downstream tasks, such as molecular property prediction. Additionally, this blog post seeks to analyze the representations of small molecules by comparing nearest neighbors in the latent chemical space. We also investigate representations learned by language models trained on SMILES strings.</p> <h2 id="methods">Methods</h2> <h3 id="data">Data</h3> <p>In this study, we utilized the QM9 dataset to train and evaluate our models. Comprising approximately 133,000 small organic molecules, the dataset includes molecules with up to nine heavy atoms (specifically Carbon, Nitrogen, Oxygen, and Fluorine) and 19 distinct properties. As a well-established benchmark in molecular property prediction research, QM9 offers a comprehensive foundation for our analysis.<d-cite key="qm9"></d-cite></p> <p>Our primary focus was on predicting the free energy $G$ at 298.15K. To ensure a robust evaluation, we divided the dataset using Murcko scaffolds <d-cite key="murcko"></d-cite> to prevent the same molecular scaffolds from appearing in both the training and testing sets. This division allocates 80% of the data for training, 10% for validation, and the remaining 10% for testing purposes. Additionally, we standardized the target values to have a zero mean and unit variance, aiming for consistency in our predictive modeling.</p> <h3 id="models">Models</h3> <p>The illustration of the overall approach is presented in Figure 1.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach-1400.webp"/> <img src="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/approach.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 1. Illustration of the overall approach. We use different ways to represent molecules and train different models on these initial encodings. </div> <p>We use the following models to learn the representations of molecules:</p> <ol> <li><strong>Fingerprint-based model</strong>. Utilizing Morgan fingerprints <d-cite key="morgan"></d-cite> with a radius of 2 and 2048 bits, we developed a multilayer perceptron (MLP) featuring six layers, layer normalization, and a varying number of hidden units (ranging from 512 to 256). This model focuses on learning representations from molecular fingerprints.</li> <li> <p><strong>SMILES-based model</strong>. For the representation of SMILES strings in the QM9 dataset, we employed a Recurrent Neural Network (RNN) with LSTM cells, comprising three layers and 256 hidden units. This model learns to predict the next token in a SMILES string based on the previous tokens, using cross-entropy loss for training: \(\mathcal{L}_{\text{CE}} = -\sum_{t=1}^{T} \log p(x_t | x_{&lt;t})\)</p> </li> <li><strong>2-D graph-based model</strong>. To handle 2-D graph representations of molecules, we used a Message Passing Neural Network with four layers, 256 hidden units, sum aggregation, mean pooling, and residual connections between convolution layers. The model updates the nodes’ hidden representations as follows:</li> </ol> \[h_i^{\ell+1} = \phi \left( h_i^{\ell}, \frac{1}{|\mathcal{N}_i|}\sum_{j \in \mathcal{N}_i} \psi \left( h_i^{\ell}, h_j^{\ell}, e_{ij} \right) \right)\] <ol> <li><strong>3-D graph-based model</strong>. While there are many different architectures to model points in 3-D space, we decided to use one of the simplest architectures — E(n) Equivariant Graph Neural Network (EGNN) <d-cite key="egnn"></d-cite> that is equivariant to rotations, translations, reflections, and permutations of the nodes. We used 4 layers, 256 hidden units, sum aggregation, mean pooling and residual connections between convolution layers to learn the representations of 3-D graphs of molecules that updates the nodes hidden representations according to the equations given in the Figure 1.</li> </ol> <h3 id="training">Training</h3> <p>We trained all models using the Adam optimizer with learning rate of $1\cdot10^{-3}$, batch size 32, and 100 epochs. We additionally used <code class="language-plaintext highlighter-rouge">ReduceLROnPlateau</code> learning rate scheduler. We used the mean absolute error (MAE) as the metric for evaluation.</p> <h3 id="evaluation">Evaluation</h3> <p>We used several combination of modalities to evaluate the performance of the models:</p> <ol> <li>MPNN + FPs: This model integrates the representation learned by the Message Passing Neural Network (MPNN) with the MLP trained on fingerprints, featuring 256 hidden units. It concatenates the representations from MPNN and MLP, using an MLP layer for the final target value prediction.</li> <li>EGNN + FPs: Similar to the previous model but uses the representation learned by the EGNN.</li> <li>EGNN + MPNN: This configuration combines the representations from EGNN and MPNN, followed by an MLP for target value prediction.</li> <li>MPNN + RNN: This model merges representations from MPNN and a pretrained Recurrent Neural Network (RNN). The RNN’s encodings remain static and are not updated during training. However, this model did not converge and was excluded from the final evaluation.</li> </ol> <p>The results of evaluation of different models on the QM9 dataset are presented in Figure 2.</p> <div class="l-page"> <iframe src="/staging/assets/html/2023-12-12-combining-modalities-for-better-representation-learning/mae.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <div class="caption"> Figure 2. Different models' performance on the QM9 dataset. The models are trained on the same data, but with different representations. The number of parameters is displayed on top of each bar. </div> <h2 id="analysis">Analysis</h2> <h3 id="comparison-of-different-models">Comparison of different models</h3> <p>As depicted in Figure 2, the EGNN model demonstrates superior performance. A likely explanation is that the QM9 dataset’s labels were calculated using computational methods that leverage the 3-D structure of molecules. The 3-D representation, therefore, proves most effective for this task, with the EGNN adept at capturing crucial 3-D interactions for predicting the target value. Interestingly, simple concatenation of hidden representations seems to dilute the information, resulting in inferior performance. This suggests that combining modalities is a complex endeavor, requiring thoughtful architectural design. <d-cite key="modality_blending,molecule_sde"></d-cite></p> <h3 id="nearest-neighbors-analysis">Nearest neighbors analysis</h3> <p>After the training of the models we performed the nearest neighbors analysis to compare the learned representations of molecules. We took the learned representations of the molecules in the test set and computed the nearest neighbors in the latent chemical space using cosine similarity. Additionally we plotted the PCA reduced representations (Figure 3) and analyzed the nearest neighbors for 4 different molecular scaffolds.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3-1400.webp"/> <img src="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 3. PCA reduced representations of the molecules in the test set. The color of the points corresponds to the molecular scaffold. </div> <p>There are several interesting observations from the nearest neighbors analysis:</p> <ol> <li>In case of fingerprints reductions the nearest neighbors are far away from the queried molecules in the latent chemical space.</li> <li>For the reduced learned representations of the molecules in the test set we can see that the nearest neighbors are very close to the queried molecules in the latent chemical space. This is expected as the models were trained to predict the target value and therefore the representations of the molecules that are close in the latent chemical space should have similar target values.</li> <li>The bottom right plot of Figure 3, showcasing the EGNN + FPs combination reveals very interesting pattern — the reduced chemical space reminds the combination of the reduced chemical spaces of the EGNN and FPs. EGNN’s reduced chemical is more “sparse”, while the representation that learned by MLP is more dense but much more spread out. Another interesting observation is that the combined chemical space is more structured due to the presence of some clustered fragments, which is not present in case of both EGNN and MLP.</li> </ol> <p>Additionally we analyzed the nearest neighbors for 4 different molecular scaffolds. The results for 3 of them are present in Figure 4.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4-1400.webp"/> <img src="/staging/assets/img/2023-12-12-combining-modalities-for-better-representation-learning/dl_pic4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 4. Nearest neighbors for 3 different molecular scaffold instances. Top molecule for each cell is the closest molecule to the queried molecule in the latent chemical space, the bottom molecule is the second closest molecule. </div> <p>From the Figure 4 we can make some additional observations:</p> <ul> <li>For the fingerprints similarity, molecules are very similar to the queried molecule. This is expected results because the molecules with the highest matches in the fingerprints are the most similar to the queried molecule. Although, for the third example the second closest molecule is not very similar to the queried molecule.</li> <li>MPNN, EGNN as well as their combination return the molecules that are very similar to the queried molecule. Because the model was trained to predict the target value, the nearest neighbors are molecules with similar target values (this is not guaranteed for the fingerprints similarity because substructures can be combined in different ways potentially leading to very different molecular properties).</li> <li>In case of MLP trained on fingerprints, the nearest neighbors can have very different scaffolds. This agrees with the performance of the model on the QM9 dataset — the model is not able to fully capture the molecular structure and therefore the nearest neighbors can have very different scaffolds even though the initial representations were the ones retrieving the most similar molecules (fingerprints).</li> <li>Interestingly, in case of RNN trained on SMILES strings, the nearest neighbors can have very different scaffolds. This result is expected because RNN was trained to predict next token in the sequence and therefore the nearest neighbors are the molecules with similar SMILES strings. For example, first molecule contains triple bond between two carbon atoms. In the case of the second closest neighbor for first scaffold instance there are two triple bonds between carbon and nitrogen atoms. The scaffold is different, but the SMILES strings are similar.</li> </ul> <p>Overall, the key takeaway is that the more effectively a model performs in the supervised learning phase (excluding the RNN), the more meaningful its nearest neighbors are in terms of molecular structure resemblance. While fingerprint similarity still yields closely matched molecules, the results are not as insightful as those from GNNs, which capture molecular structures with greater nuance and expressiveness.</p> <h2 id="conclusion">Conclusion</h2> <h3 id="results-of-modalities-mixing">Results of modalities mixing</h3> <p>Modalities mixing is a very interesting and promising approach for the problems in the field of molecular machine learning. However, architectures should be desinged carefully to achieve the best performance. In our work we showed that simple concatenation of the representations learned by different models can lead to worse performance on the downstream tasks.</p> <h3 id="future-work">Future work</h3> <p>The obvious direction of future work — to experiment with different architectures for modalities mixing. Another interesting direction is to use the mixed modalities for the generative molecular modeling as string methods still perform better than majority of 3-D generative approaches even though the latter one is more natural. <d-cite key="benchmarking"></d-cite> Therefore, it would be interesting to explore the combination of the string and 3-D graph representations for the generative modeling.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-combining-modalities-for-better-representation-learning.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/conflict-causality/index.html b/blog/2023/conflict-causality/index.html
new file mode 100644
index 00000000..bdcb607f
--- /dev/null
+++ b/blog/2023/conflict-causality/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Tracing the Seeds of Conflict: Advanced Semantic Parsing Techniques for Causality Detection in News Texts | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="This blog post outlines a research project aiming to uncover cause-effect-relationships in the sphere of (political) conflicts using a frame-semantic parser."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/conflict-causality/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Tracing the Seeds of Conflict: Advanced Semantic Parsing Techniques for Causality Detection in News Texts",
+      "description": "This blog post outlines a research project aiming to uncover cause-effect-relationships in the sphere of (political) conflicts using a frame-semantic parser.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Philipp Zimmer",
+          "authorURL": "https://www.linkedin.com/in/pzimmer98mit/",
+          "affiliations": [
+            {
+              "name": "IDSS, Massachusetts Institute of Technology",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Tracing the Seeds of Conflict: Advanced Semantic Parsing Techniques for Causality Detection in News Texts</h1> <p>This blog post outlines a research project aiming to uncover cause-effect-relationships in the sphere of (political) conflicts using a frame-semantic parser.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#literature-background">Literature Background</a></div> <ul> <li><a href="#qualitative-research-on-conflicts">Qualitative Research on Conflicts</a></li> <li><a href="#the-role-of-quantitative-methods">The Role of Quantitative Methods</a></li> <li><a href="#bridging-the-gap-with-explainable-modeling-approaches">Bridging the Gap with Explainable Modeling Approaches</a></li> </ul><div><a href="#data">Data</a></div> <ul> <li><a href="#news-articles-as-data-source">News Articles as Data Source</a></li> <li><a href="#descriptive-analysis-of-the-data">Descriptive Analysis of the Data</a></li> </ul><div><a href="#methodology">Methodology</a></div> <ul> <li><a href="#the-frame-semantic-parser">The Frame-Semantic Parser</a></li> <li><a href="#seed-selection-via-semantic-similarity-analysis-to-inform-causal-modeling">Seed Selection via Semantic Similarity Analysis to Inform Causal Modeling</a></li> <li><a href="#domain-specific-metrics">Domain-Specific Metrics</a></li> </ul><div><a href="#findings-insights">Findings & Insights</a></div> <ul> <li><a href="#frame-semantic-parser-identifies-causal-frames-reliably">Frame-Semantic Parser Identifies Causal Frames Reliably</a></li> <li><a href="#differences-in-seed-phrase-selection">Differences in Seed Phrase Selection</a></li> <li><a href="#employing-domain-specific-performance-metrics">Employing Domain-Specific Performance Metrics</a></li> </ul><div><a href="#conclusion-limitations">Conclusion & Limitations</a></div> <ul> <li><a href="#key-findings">Key Findings</a></li> <li><a href="#limitations-future-research">Limitations & Future Research</a></li> </ul> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <blockquote> <p><em>“In the complex world of political conflicts, understanding the underlying dynamics can often feel like trying to solve a puzzle with missing pieces. This project attempts to find those missing pieces through a novel approach that combines the insights of qualitative research with the precision of quantitative analysis.”</em></p> </blockquote> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img1_map-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img1_map-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img1_map-1400.webp"/> <img src="/staging/assets/img/2023-11-09-conflict-causality/img1_map.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p align="center" style="color: white; font-style: italic; font-weight: bold;">Retrieved from https://conflictforecast.org</p> <p>Political conflicts are multifaceted and dynamic, posing significant challenges for researchers attempting to decode their intricate patterns. Traditional methods, while insightful, often grapple with the dual challenges of scale and specificity. This project embarks on an innovative journey to bridge this gap, leveraging a frame-semantic parser to illustrate its applicability for the task and to discuss an approach to achieve domain-specificity for the model using semantic similarity. By synthesizing the depth of qualitative research into the scalability of quantitative methods, we aim to contribute to more informed analyses and actions in low-resource, low-tech domains like conflict studies.</p> <p>On this journey, the projects key contributions are:</p> <ol> <li> <p><strong>Advancing Frame-Semantic Parsing in Conflict Research</strong>: We introduce the frame-semantic parser, a method that brings a high degree of explainability to conflict studies. Particularly when used in conjunction with news articles, this parser emerges as a powerful tool in areas where data is scarce, enabling deeper insights into the nuances of political conflicts.</p> </li> <li> <p><strong>Harnessing Semantic Similarity for Domain Attunement</strong>: The project underscores the significance of semantic similarity analysis as a precursor to frame-semantic parsing. This approach finely tunes the parser to specific thematic domains, addressing the gaps often present in domain distribution of common data sources. It illustrates how tailoring the parser input can yield more contextually relevant insights.</p> </li> <li> <p><strong>Demonstrating Domain-Dependent Performance in Frame-Semantic Parsing</strong>: We delve into the impact of thematic domains on the performance of a transformer-based frame-semantic parser. The research highlights how the parser’s effectiveness varies with the domain of analysis, primarily due to biases and structural peculiarities in the training data. This finding is pivotal for understanding the limitations and potential of semantic parsing across different contexts.</p> </li> <li> <p><strong>Developing Domain-Specific Performance Metrics</strong>: In environments where additional, domain-specific labeled test data is scarce, the project proposes an intuitive method to derive relevant performance metrics. This approach not only aligns the evaluation more closely with the domain of interest but also provides a practical solution for researchers working in resource-constrained settings.</p> </li> </ol> <h2 id="literature-background">Literature Background</h2> <h3 id="qualitative-research-on-conflicts">Qualitative Research on Conflicts</h3> <p>Qualitative research has long been a cornerstone in the study of political conflicts. This body of work, now well-established, emphasizes the unique nature of each conflict, advocating for a nuanced, context-specific approach to understanding the drivers and dynamics of conflicts. Researchers in this domain have developed a robust understanding of the various pathways that lead to conflicts, highlighting the importance of cultural, historical, and socio-political factors in shaping these trajectories. While rich in detail and depth, this approach often faces challenges in scalability and systematic analysis across diverse conflict scenarios.</p> <h3 id="the-role-of-quantitative-methods">The Role of Quantitative Methods</h3> <p>The emergence of computational tools has spurred a growing interest in quantitative approaches to conflict research. These methods primarily focus on predicting the severity and outcomes of ongoing conflicts, with some success <d-cite key="beck2000improving"></d-cite>. However, the onset of conflicts remains challenging to predict, indicating a need for more sophisticated tools and methodologies. Quantitative methods provide scalability and a degree of objectivity but often fail to capture the complexities and evolving nature of conflicts. <d-cite key="goldstein1992conflict"></d-cite>’s work on a conflict-cooperation scale illustrates the difficulty in quantifying conflict dynamics and the controversy in creating aggregate time series from event data. <d-cite key="vesco2022united"></d-cite> highlight the importance of diverse, accurate predictions in conflict forecasting, noting the value of incorporating contextual variables to predict early signals of escalation.</p> <h3 id="bridging-the-gap-with-explainable-modeling-approaches">Bridging the Gap with Explainable Modeling Approaches</h3> <p>The challenge now lies in bridging the insights from qualitative research with the systematic, data-driven approaches of quantitative methods. While the former provides a deep understanding of conflict pathways, the latter offers tools for large-scale analysis and prediction. The key to unlocking this synergy lies in developing advanced computational methods to see the smoke before the fire – identifying the early precursors and subtle indicators of impending conflicts <d-cite key="vesco2022united"></d-cite>. This approach aligns with the evolving needs of conflict research, where traditional models may not adequately address the complex and non-linear nature of conflict data <d-cite key="weidmann2023recent"></d-cite>. <d-cite key="mueller2018reading"></d-cite> demonstrate the potential of utilizing newspaper text for predicting political violence, suggesting a novel data source for uncovering early conflict indicators. However, these early attempts are outdated given the fast technological development in recent years, particularly in the field of natural language processing. This research endeavour seeks to fill that gap and introduce a scalable, explainable method to quantitative conflict research.</p> <h2 id="data">Data</h2> <p>The project capitalizes on the premise that risk factors triggering a conflict, including food crises, are frequently mentioned in on-the-ground news reports before being reflected in traditional risk indicators, which can often be incomplete, delayed, or outdated. By harnessing newspaper articles as a key data source, this initiative aims to identify these causal precursors more timely and accurately than conventional methods.</p> <h3 id="news-articles-as-data-source">News Articles as Data Source</h3> <p>News articles represent a valuable data source, particularly in research domains where timely and detailed information is crucial. In contrast to another “live” data source that currently revels in popularity amongst researchers - social media data - news articles are arguably less prone to unverified narratives. While news articles typically undergo editorial checks and balances, ensuring a certain level of reliability and credibility, they certainly do not withstand all potential biases and are to be handled with caution - as arguably every data source. To counteract potential biases of individual news outputs, accessing a diverse range of news sources is essential. Rather than having to scrape or otherwise collect data on news articles, there is a set of resources available:</p> <ul> <li> <p><a href="https://newsapi.org/">NewsAPI</a>: This platform provides convenient access to a daily limit of 100 articles, offering diverse query options. Its integration with a Python library streamlines the process of data retrieval. However, the limitation lies in the relatively small number of data points it offers, potentially restricting the scope of analysis.</p> </li> <li> <p><a href="https://www.gdeltproject.org/">GDELT Database</a>: Renowned for its vast repository of historical information spanning several decades, GDELT stands as a comprehensive data source. Its extensive database is a significant asset, but similar to NewsAPI, it predominantly features article summaries or initial sentences rather than complete texts, which may limit the depth of analysis.</p> </li> <li> <p><a href="https://www.dowjones.com/professional/factiva/">Factiva</a>: A premium service that grants access to the complete bodies of articles from a plethora of global news sources in multiple languages. While offering an exhaustive depth of data, this resource comes with associated costs, which may be a consideration for budget-constrained projects.</p> </li> <li> <p><a href="https://paperswithcode.com/dataset/realnews">RealNews</a>: As a cost-free alternative, this dataset encompasses entire newspaper articles collated between 2016 and 2019. Selected for this project due to its unrestricted accessibility and comprehensive nature, it provides a substantial set of articles, making it a valuable resource for in-depth analysis.</p> </li> </ul> <h3 id="descriptive-analysis-of-the-data">Descriptive Analysis of the Data</h3> <p>The analysis delved into a selected subset of <strong>120,000 articles</strong> from the <a href="https://paperswithcode.com/dataset/realnews">RealNews</a> open-source dataset. This subset was chosen randomly to manage the extensive scope of the complete dataset within the project’s time constraints. Each article in this subset provided a rich array of information, including <strong>url</strong>, <strong>url_used</strong>, <strong>title</strong>, <strong>text</strong>, <strong>summary</strong>, <strong>authors</strong>, <strong>publish_date</strong>, <strong>domain</strong>, <strong>warc_date</strong>, and <strong>status</strong>.</p> <p>The range of articles spans from 1869 to 2019, but for focused analysis, we narrowed the scope to articles from <strong>January 2016 through March 2019</strong>. This temporal delimitation resulted in a dataset comprising <strong>58,867 articles</strong>. These articles originated from an expansive pool of <strong>493 distinct news outlets</strong>, offering a broad perspective on global events and narratives. The distribution of these articles across the specified time frame provides the expected observation of increasing news reporting, as visualized below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img4_articlecounts-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img4_articlecounts-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img4_articlecounts-1400.webp"/> <img src="/staging/assets/img/2023-11-09-conflict-causality/img4_articlecounts.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p align="center" style="color: white; font-style: italic; font-weight: bold;">Counts of Articles over Time</p> <p>To understand the content of our dataset’s news articles better, we utilized the <em>TfidfVectorizer</em>, a powerful tool that transforms text into a numerical representation, emphasizing key words based on their frequency and distinctiveness within the dataset. To ensure focus on the most relevant terms, we filtered out commonly used English stopwords. The <em>TfidfVectorizer</em> then generated a <em>tf-idf matrix</em>, assigning weights to words that reflect their importance in the overall dataset. By summing the Inverse Document Frequency (IDF) of each term, we obtained the adjusted frequencies that helped identify the most influential words in our corpus. To visually represent these findings, we created a word cloud (see below), where the size of each word correlates with its relative importance.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img3_wordcloud-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img3_wordcloud-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img3_wordcloud-1400.webp"/> <img src="/staging/assets/img/2023-11-09-conflict-causality/img3_wordcloud.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p align="center" style="color: white; font-style: italic; font-weight: bold;">Word Cloud for Entire News Article Dataset (tf-idf adjusted)</p> <h2 id="methodology">Methodology</h2> <p>We showcase the applicability of a frame-semantic parsing to the study of conflicts and inform the model with domain-specific seed phrases identified through semantic similarity analysis. This approach not only demonstrates the effectiveness of the method in conflict studies but also showcases how domain-specific applications of deep learning tasks can be accurately applied and measured. Thus, we not only validate the utility of frame-semantic parsing in conflict analysis but also explore innovative ways to tailor and evaluate domain-specific performance metrics.</p> <h3 id="the-frame-semantic-parser">The Frame-Semantic Parser</h3> <h4 id="contextualizing-the-approach">Contextualizing the Approach</h4> <p>In the pursuit of bridging the gap between the robust theoretical understanding of conflict dynamics and the practical challenges in data availability, the frame-semantic parser emerges as a promising methodological tool. In a recent study (<d-cite key="balashankar2023predicting"></d-cite>), a team of researchers established a proof-of-concept via its successful application of a frame-semantic parser for the study of food insecurity - a field with similar challenges surrounding data access and quality. While this study relied on what can now be considered the “old state-of-the-art,” our proposed approach diverges towards a more contemporary, transformer-based model, inspired by the advancements outlined in <d-cite key="chanin2023open"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img2_parser-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img2_parser-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img2_parser-1400.webp"/> <img src="/staging/assets/img/2023-11-09-conflict-causality/img2_parser.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p align="center" style="color: white; font-style: italic; font-weight: bold;">Retrieved from https://github.com/swabhs/open-sesame</p> <h4 id="how-does-a-frame-semantic-parser-work">How Does a Frame-Semantic Parser Work?</h4> <p>At the heart of frame-semantic parsing, as conceptualized by <d-cite key="gildea2002frame"></d-cite> and formalized by the FrameNet project <d-cite key="baker1998framenet"></d-cite>, is the identification of structured semantic frames and their arguments from natural language text. As illustrated above, these frames encapsulate events, relations, or situations along with their participants, making it a critical tool in natural language understanding (NLU) tasks. The practical applications of frame semantics are broad, ranging from voice assistants and dialog systems <d-cite key="chen2013dialog"></d-cite> to complex text analysis <d-cite key="zhao2023text"></d-cite>.</p> <p>The process of frame-semantic parsing constitutes three subtasks:</p> <ul> <li> <p><strong>Trigger Identification</strong>: This initial step involves pinpointing locations in a sentence that could potentially evoke a frame. It’s a foundational task that sets the stage for more detailed analysis.</p> </li> <li> <p><strong>Frame Classification</strong>: Following trigger identification, each potential trigger is analyzed to classify the specific FrameNet frame it references. This task is facilitated by leveraging lexical units (LUs) from FrameNet, which provide a strong indication of potential frames.</p> </li> <li> <p><strong>Argument Extraction</strong>: The final task involves identifying the frame elements and their corresponding arguments within the text. This process adds depth to the frame by fleshing out its components and contextualizing its application within the sentence.</p> </li> </ul> <p>While frame-semantic parsers have arguably not received as much attention as other language modeling methods, three major contributions of the past few years can be highlighted. <d-cite key="swayamdipta2017frame"></d-cite>’s approach - which is still outperforming many other implementations - presented an efficient parser with softmax-margin segmental RNNs and a syntactic scaffold. It demonstrates that syntax, while beneficial, is not a necessity for high-performance frame-semantic parsing. <d-cite key="kalyanpur2020open"></d-cite> explores the application of transformer-based architectures to frame semantic parsing, employing a multi-task learning approach that significantly improves upon previous state-of-the-art results. Most recently, <d-cite key="chanin2023open"></d-cite> developed the first open-source approach - treating frame semantic parsing as a sequence-to-sequence text generation task, utilizing a T5 transformer model. It emphasizes the importance of pretraining on related datasets and employing data augmentations for improved performance. The distinctive strength of a frame-semantic parser lies in its ability to contextualize information, rather than interpreting it in isolation. This feature is particularly invaluable in conflict analysis, where the semantics of discourse play a critical role.</p> <h4 id="implementation-of-the-frame-semantic-parser">Implementation of the Frame-Semantic Parser</h4> <p>The implementation of our frame-semantic parser involves several key steps. We begin by splitting our text data into sentences using a <em>split_into_sentences</em> function. This granular approach allows us to focus on individual narrative elements within the articles and since frame-semantic parsers are reported to perform better on sentence-level <d-cite key="chanin2023open"></d-cite><d-cite key="swayamdipta2017frame"></d-cite>.</p> <p>In the heart of our methodology, we utilize various functions to extract and filter relevant frames from the text. Our <em>extract_features</em> function captures the full text of each frame element, ensuring a comprehensive analysis of the semantic content. The <em>filter_frames</em> function then refines this data, focusing on frames that are explicitly relevant to conflict, as informed by research on causal frames in FrameNet.</p> <p>To optimize the performance of our transformer-based parser, we build a <em>process_batch</em> function. This function handles batches of sentences, applying the frame semantic transformer model to detect and filter frames relevant to our study.</p> <p>Our approach also includes a careful selection of specific frames related to causality and conflict as we are interested in these frames and not just any. We rely on both manually identified frame names (informed by <d-cite key="vieu2016a"></d-cite><d-cite key="vieu2020a"></d-cite>) and pattern-based searches in <strong>FrameNet</strong> to compile a comprehensive list of relevant frames. This curated set of frames is instrumental in identifying the nuanced aspects of conflict narratives within the news articles.</p> <p>The implementation is designed to be efficient and scalable, processing large batches of sentences and extracting the most relevant semantic frames. This approach enables us to parse and analyze a substantial corpus of news articles, providing a rich dataset for our conflict analysis.</p> <h3 id="seed-selection-via-semantic-similarity-analysis-to-inform-causal-modeling">Seed Selection via Semantic Similarity Analysis to Inform Causal Modeling</h3> <h4 id="understanding-semantic-similarity">Understanding Semantic Similarity</h4> <p>Semantic similarity plays a pivotal role in our methodology, serving as the foundation for expanding our understanding of how conflict is discussed in news articles. By exploring the semantic relationships between words and phrases, we can broaden our analysis to include a diverse array of expressions and viewpoints related to conflict. This expansion is not merely linguistic; it delves into the conceptual realms, uncovering varying narratives and perspectives that shape the discourse on conflict.</p> <h4 id="how-do-we-compute-semantic-similarity">How Do We Compute Semantic Similarity?</h4> <p>To compute semantic similarity and refine our seed phrases, we employ a combination of distance calculation and cosine similarity measures. We begin with a set of initial key phrases <strong>conflict</strong>, <strong>war</strong>, and <strong>battle</strong>, ensuring they capture the core essence of our thematic domain. We then leverage pretrained word embeddings from the <em>Gensim</em> library to map these phrases into a high-dimensional semantic space. We also experimented with more sophisticated embedding approaches (like transformer-based) to compute the semantic similarity and thus obtain the seeds. When trading off complexity/time and performance the simpler pretrained <em>Gensim</em> model preservered.</p> <p>Our methodology involves generating candidate seeds from our corpus of documents, including unigrams, bigrams, and trigrams, with a focus on those containing key words related to conflict. We filter these candidates based on their presence in the word vectors vocabulary, ensuring relevance and coherence with our seed phrases.</p> <p>Using functions like <em>calculate_distances</em> and <em>calculate_cosine_similarity</em>, we measure the semantic proximity of these candidates to our initial seed phrases. This process involves averaging the distances or similarities across the seed phrases for each candidate, providing a nuanced understanding of their semantic relatedness.</p> <p>The candidates are then ranked based on their similarity scores, with the top candidates selected for further analysis. This refined set of seed phrases, after manual evaluation and cleaning, forms the basis of our domain-specific analysis, guiding the frame-semantic parsing process towards a more focused and relevant exploration of conflict narratives.</p> <h3 id="domain-specific-metrics">Domain-Specific Metrics</h3> <p>In the final stage of our methodology, we integrate the identified seed phrases into the frame-semantic parser’s analysis. By comparing the model’s performance on a general set of sentences versus a subset containing at least one seed phrase, we assess the model’s domain-specific efficacy. This comparison not only highlights the general capabilities of large language models (LLMs) but also underscores their potential limitations in domain-specific contexts.</p> <p>Our approach offers a pragmatic solution for researchers and practitioners in low-resource settings. We demonstrate that while general-purpose LLMs are powerful, they often require fine-tuning for specific domain applications. By utilizing identified domain-specific keywords to construct a tailored test dataset, users can evaluate the suitability of general LLMs for their specific needs.</p> <p>In cases where technical skills and resources allow, this domain-specific dataset can serve as an invaluable tool for further refining the model through data augmentation and fine-tuning. Our methodology, therefore, not only provides a robust framework for conflict analysis but also lays the groundwork for adaptable and efficient use of advanced NLP tools in various thematic domains.</p> <p>We present the results for these domain-specific measure for <strong>F1 score</strong>, <strong>recall</strong>, and <strong>precisions</strong>. Likewise, to illustrate performance differences across domains, we conducted the entire approach also for the finance domain, starting with the keywords <strong>finance</strong>, <strong>banking</strong>, and <strong>economy</strong>.</p> <h2 id="findings--insights">Findings &amp; Insights</h2> <h3 id="frame-semantic-parser-identifies-causal-frames-reliably">Frame-Semantic Parser Identifies Causal Frames Reliably</h3> <p>In this stage, we assess if the methodology is truly applicable to the domain of conflicts and for the use with news article data. We find that of our 37 identified cause-effect related frames, all are represented with various instances in our dataset. In fact, as few as 1,600 randomly selected news articles (processed in 100 batches of 16 batch samples) suffice to cover all cause-effect related frames. Therefore, for this intermediate step of the project, we gather support that the parser is in-fact applicable to news article data.</p> <h3 id="differences-in-seed-phrase-selection">Differences in Seed Phrase Selection</h3> <p>We make one major observation between the results of the finance- versus conflict-specific seed selection for downstream use. Potentially driven by the fact that conflicts are drastically driven by geographic labels and information, a number of the top 50 seed phrases were geographic terms like “Afghanistan.” Since we did not want to bias the downstream evaluation of our domain-specific metrics we excluded these seed phrases and continued the analysis with 34 seeds. In contrast, the top 50 finance-specific seed phrases obtained from the semantic analysis were neither geographic nor linked to individual (financial) historic events, wherefore we continued the downstream analysis with all top 50 seed phrases. Already here we can observe the deviances across domains, given more support to the idea of domain-specific evaluation and metrics.</p> <h3 id="employing-domain-specific-performance-metrics">Employing Domain-Specific Performance Metrics</h3> <p>Our research involved an extensive evaluation of the frame-semantic parser, based on a transformer architecture, across various configurations and domain-specific datasets. We began by rebuilding and training the model using the vanilla code and a smaller model size without hyperparameter tuning. Subsequently, we fine-tuned the hyperparameters to match the baseline performance levels. After this, we move to one of the main contributions of this project: the domain-specific evaluation. The evaluation was carried out on domain-specific validation and test datasets, curated using seed words from <strong>finance</strong> and <strong>conflict</strong> domains to highlight differences across domains.</p> <p>The untuned model (<em>validation n = 646, test n = 1891</em>) showed an argument extraction <strong>F1 score of 0.669</strong> and a <strong>loss of 0.181</strong> on the validation set. On the test set, it presented a slightly similar <strong>F1 score of 0.669</strong> and a <strong>loss of 0.227</strong>. Hyperparameter-Tuned Performance</p> <p>Post hyperparameter tuning, there was a notable improvement in the model’s validation performance (<em>n = 156</em>), with the <strong>F1 score for frame classification reaching as high as 0.873</strong>, and the <strong>precision for trigger identification at 0.818</strong>. The test metrics (<em>n = 195</em>) also showed consistent enhancement, with the <strong>F1 score for frame classification at 0.864</strong> and <strong>trigger identification precision at 0.747</strong>.</p> <p>When evaluated on domain-specific datasets, <strong>the model exhibited varying degrees of effectiveness</strong> which showcases our assumption that domains matter to the applicability of LLMs to domain-specific tasks and that our simple proposed way of generating domain-specific metrics can give insights on that. For the conflict keywords (<em>validation n = 121, test n = 255</em>), the model achieved a <strong>validation F1 score of 0.865 for frame classification and 0.764 for trigger identification precision</strong>. However, for the finance domain (<em>validation n = 121, test n = 255</em>), the <strong>F1 score for frame classification was slightly higher at 0.878</strong>, while the <strong>trigger identification precision was lower at 0.781</strong> compared to the conflict domain.</p> <p>The results indicate that the hyperparameter-tuned model significantly outperforms the vanilla model across all metrics. Additionally, domain-specific tuning appears to have a considerable impact on the model’s performance, with the finance domain showing slightly better results in certain metrics compared to the conflict domain. These insights could be pivotal for further refinements and targeted applications of the frame-semantic parser in natural language processing tasks. Moreover, these observation fit our general understanding of the two domains. Reports on conflicts are likely to discuss the involved parties’ reasons for specific actions like attacks on certain targets. Additionally, the actions in conflicts are arguably more <strong>triggering</strong> events than “the good old stable economy.” Certainly, this research project can only be the beginning of a more rigorous assessment, but these findings show great promise of the idea of <strong>generating and evaluating simple, domain-specific performance metrics</strong>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img5_performance-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img5_performance-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-conflict-causality/img5_performance-1400.webp"/> <img src="/staging/assets/img/2023-11-09-conflict-causality/img5_performance.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p align="center" style="color: white; font-style: italic; font-weight: bold;">Performance Evaluation of Frame-Semantic Parser</p> <h2 id="conclusion--limitations">Conclusion &amp; Limitations</h2> <p>This project has embarked on an innovative journey, merging advanced natural language processing techniques with the intricate study of conflict. By harnessing the power of a transformer-based frame-semantic parser and integrating semantic similarity analysis, we have made significant strides in identifying causal relationships within news articles. This methodology has not only illuminated the dynamics of conflict as portrayed in media but also demonstrated the adaptability and potential of frame-semantic parsing in domain-specific applications.</p> <h3 id="key-findings">Key Findings</h3> <ol> <li> <p><strong>Utility of Frame-Semantic Parsing</strong>: Our work has showcased the frame-semantic parser as a valuable and explainable tool, particularly effective in data-scarce environments like conflict research. Its ability to contextualize information and discern nuanced semantic relationships makes it an indispensable asset in understanding complex thematic domains.</p> </li> <li> <p><strong>Semantic Similarity for Domain-Specific Perspective</strong>: We illustrated the effectiveness of using semantic similarity to refine seed phrases, thereby tailoring the frame-semantic parser to the specific domain of conflict. This approach has proven to be a straightforward yet powerful means to customize advanced NLP models for targeted analysis.</p> </li> <li> <p><strong>Dependence on Domain for Model Performance</strong>: Our findings highlight a significant insight: the performance of general-purpose language models can vary depending on the domain of application. This observation underscores the need for domain-specific tuning to achieve optimal results in specialized contexts.</p> </li> <li> <p><strong>Development of Domain-Specific Performance Metrics</strong>: We proposed and validated a practical approach to developing domain-specific metrics, especially useful in resource-constrained environments. This methodology enables a nuanced evaluation of model performance tailored to specific thematic areas.</p> </li> </ol> <h3 id="limitations--future-research">Limitations &amp; Future Research</h3> <p>Despite the promising results, our project is not without its limitations, which pave the way for future research opportunities:</p> <ol> <li> <p><strong>Data Dependency</strong>: The effectiveness of our approach is heavily reliant on the quality and diversity of the news article dataset. Biases in media reporting or limitations in the scope of articles can skew the analysis and affect the accuracy of the results. In an extended version of the project - and with funding - one could switch to the <a href="https://www.dowjones.com/professional/factiva/">Factiva</a> dataset.</p> </li> <li> <p><strong>Applicability of Domain-Specificity to Other Themes</strong>: While our method has shown efficacy in the context of conflict analysis, its applicability to other specific domains requires further exploration. Future research could test and refine our approach across various thematic areas to assess its broader utility.</p> </li> <li> <p><strong>Model Complexity and Interpretability</strong>: While we have emphasized the explainability of the frame-semantic parser, the inherent complexity of transformer-based models can pose challenges in terms of scaling and deployment. Future work could focus on simplifying these models without compromising their performance - for instance via pruning and quantization.</p> </li> <li> <p><strong>Expansion of Semantic Similarity Techniques</strong>: Our semantic similarity analysis was instrumental in refining seed phrases, but there is room for further enhancement. Incorporating more advanced semantic analysis techniques could yield even more precise and relevant seed phrases. While we found alternative methods, like BERT-based approaches to not yield significant improvements, ever more models flood the market.</p> </li> <li> <p><strong>Integration with Other Data Sources</strong>: Expanding the dataset beyond news articles to include social media, governmental reports, or academic literature could provide a more holistic view of conflict narratives and their causal relations.</p> </li> </ol> <p>In conclusion, our project represents a significant step forward in the intersection of natural language processing and conflict research. By addressing these limitations and building on our foundational work, future research can continue to push the boundaries of what is possible in this exciting and ever-evolving field.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-conflict-causality.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/contrastivediffusion-image2video/index.html b/blog/2023/contrastivediffusion-image2video/index.html
new file mode 100644
index 00000000..1dbd1c9c
--- /dev/null
+++ b/blog/2023/contrastivediffusion-image2video/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Exploring Image-Supervised Contrastive Diffusion - A Comparative Analysis with Applications in Image-to-Video Generation | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Image-to-image (I2I) and image-to-video (I2V) may be the next frontier of generative deep learning capabilities, but current models struggle with robustness, largely due to the implicit, rather than explicit, representation learning objective during traditional diffusion model training. Hence, we propose a new technique where a custom contrastive loss function is used to leverage the innate latent space of the diffusion model’s variational autoencoder. This enables us to study the creation of lightweight models that lose less contextual information between input conditioning and target output, which we elucidate in this blog."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/contrastivediffusion-image2video/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Exploring Image-Supervised Contrastive Diffusion - A Comparative Analysis with Applications in Image-to-Video Generation",
+      "description": "Image-to-image (I2I) and image-to-video (I2V) may be the next frontier of generative deep learning capabilities, but current models struggle with robustness, largely due to the implicit, rather than explicit, representation learning objective during traditional diffusion model training. Hence, we propose a new technique where a custom contrastive loss function is used to leverage the innate latent space of the diffusion model’s variational autoencoder. This enables us to study the creation of lightweight models that lose less contextual information between input conditioning and target output, which we elucidate in this blog.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Rishab Parthasarathy",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT CSAIL",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Theo Jiang",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT CSAIL",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Exploring Image-Supervised Contrastive Diffusion - A Comparative Analysis with Applications in Image-to-Video Generation</h1> <p>Image-to-image (I2I) and image-to-video (I2V) may be the next frontier of generative deep learning capabilities, but current models struggle with robustness, largely due to the implicit, rather than explicit, representation learning objective during traditional diffusion model training. Hence, we propose a new technique where a custom contrastive loss function is used to leverage the innate latent space of the diffusion model’s variational autoencoder. This enables us to study the creation of lightweight models that lose less contextual information between input conditioning and target output, which we elucidate in this blog.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction-and-motivation">Introduction and Motivation</a></div> <div><a href="#related-work">Related Work</a></div> <ul> <li><a href="#image-to-image-models">Image-to-Image Models</a></li> <li><a href="#image-to-video-models">Image-to-Video Models</a></li> <li><a href="#contrastive-models">Contrastive Models</a></li> </ul><div><a href="#our-proposal">Our Proposal</a></div> <div><a href="#model-architecture">Model Architecture</a></div> <ul> <li><a href="#image-to-image">Image to Image</a></li> <li><a href="#image-to-video">Image to Video</a></li> </ul><div><a href="#data">Data</a></div> <div><a href="#experiments">Experiments</a></div> <div><a href="#results">Results</a></div> <ul> <li><a href="#image-to-image-results">Image to Image Results</a></li> <li><a href="#image-to-video-results">Image to Video Results</a></li> </ul><div><a href="#discussion-and-conclusion">Discussion and Conclusion</a></div> </nav> </d-contents> <h2 id="introduction-and-motivation">Introduction and Motivation</h2> <p>With recent advances in computer vision and generative AI, we all have observed the various feats that diffusive models have achieved in conditional image generation. These models have demonstrated unparalleled ability in creativity, fidelity, and relevance when generating images from text prompts. Given this explosive success of diffusion for the task of image generation, the idea of applying the same concepts to conditional video generation seems like a logical follow-up. Yet, the field still lacks robust and compelling methods for conditional video generation with diffusion models. This raises the question: why might this be? Or perhaps a follow-up: what makes videos so hard in comparison to images?</p> <p>In an attempt to address our first question, if we take a brief dive into previous literature, we will find that the issue is not a lack of effort. Ho et al. <d-cite key="ho2022video"></d-cite>, Zhang et al. <d-cite key="2023i2vgenxl"></d-cite>, and Chen et al. <d-cite key="chen2023videocrafter1"></d-cite>, all explore this idea, yet the results from these methods are not nearly as exciting as the results we see in images. But why is this?</p> <p>Perhaps the answer lies in the solution to our second question. One of the most obvious complexities that videos have over images is also perhaps one of the most difficult: the temporal dependence between frames. But why is this relationship so hard for diffusion models? Following the work of Zhu et al. <d-cite key="zhu2022discrete"></d-cite>, we hypothesize that this is because the implicit learning of diffusive steps between images in a video is too complex of a problem for a diffusion model; relying on the model to learn the implicit relationship between representations of video frames is infeasible from a training and convergence standpoint. If we can instead learn diffusive steps over a more regularized learned latent space, the optimization problem can be greatly simplified and the diffusion model will in theory be more robust.</p> <p>To do so, we introduce a new framework for fine-tuning diffusion models when given images in addition to text as conditional information, targeting this challenge of making the model’s use of the latent space more robust. Specifically, we utilize contrastive learning techniques to ensure that the model learns consistency between latents from different image domains, which we first validate on the easier image-to-image (I2I) case before moving into image-to-video (I2V).</p> <h2 id="related-work">Related Work</h2> <p>Taking a step back to examine the current state of research, let’s first take a look at what current I2I models look like.</p> <h3 id="image-to-image-models">Image-to-Image Models</h3> <p>In the field of image-to-image, there are two main approaches, using images to control the model output, and modifying the image itself.</p> <p>The first approach is characterized by work like ControlNet and T2I <d-cite key="mou2023t2i"></d-cite> <d-cite key="zhang2023adding"></d-cite>, which freeze the overall diffusion network and instead fine-tune a lightweight adapter model on the top. This adapter is typically a combination of stable diffusion layers that have an additive effect on the model as a whole, trained using the fine-tuning dataset. However, these models are largely still built for text-to-image tasks, using the input images as conditioning for the input text prompt, such as a wireframe image for poses. However, this does not allow modification of the image itself, simply using the image as guidelines during the diffusion process itself, meaning that its’ style is not preserved.</p> <p>The second method is more related to maintaining both the style and content of the original image, and instead directly fine-tunes the diffusion network to actually use the input images. The first such model for this purpose is the original pix2pix architecture, which while built for GANs, still carries vital lessons to this day. By fine-tuning a loss that actually involves the mapping between input and output image, the model learns to actually adapt the image while keeping other relevant contexts the same <d-cite key="pix2pix2017"></d-cite>. After this, the Palette model for generalist, multi-task diffusion trained a diffusion model from scratch for multiple different tasks <d-cite key="saharia2022palette"></d-cite>. However, then Instruct-Pix2Pix built on the original pix2pix architecture, taking a pre-trained diffusion model and conditioning it on both the noisy text latent and the input image latent, meaning that the training latent had both and therefore would fully train on the input image latent <d-cite key="brooks2022instructpix2pix"></d-cite>. This architecture is presented below.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image-1400.webp"/> <img src="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Traditional diffusion architecture </div> <p>For the purpose of this blog, we study Instruct-Pix2Pix like fine-tuning schemes, since they align with what we need for video-based studies, maintaining content of the previous image while making small modulations based on the input text.</p> <h3 id="image-to-video-models">Image-to-Video Models</h3> <p>Moving to I2V, we find that current image-to-video frameworks typically still use a traditional diffusion architecture, going straight from text and image representations to an output image. However, this naive approach struggles with serious issues like frame clipping and loss of contextual information, which is expected since noise-based sampling can easily throw off the output of individual frames.</p> <p>Hence, Ho et al. in 2022 proposed the first solution, supplementing conditional sampling for generation with an adjusted denoising model that directly forces image latents to be more similar to the corresponding text latents <d-cite key="ho2022video"></d-cite>. While this achieved improved results over the straightforward diffusion approach, this often forces the model to stick too closely to the text latent, resulting in incoherent videos.</p> <p>To solve this issue, two recent approaches from Chen et al. and Zhang et al. have proposed methods to augment the video diffusion models themselves. Chen et al. uses the image encodings from CLIP-like language embeddings in an encoder-decoder language model, feeding the CLIP encodings at each step into a cross-attention layer that generates attention scores with the current video generation <d-cite key="chen2023videocrafter1"></d-cite>. In doing so, additional coherence between frames is achieved. On the other hand, Zhang et al. use multiple encoders, with CLIP and VQ-GAN concatenated before two stages of diffusion model training, which they claim provides the hierarchical learning required to learn the temporal processing <d-cite key="2023i2vgenxl"></d-cite>. However, both these models are extremely data-heavy and still suffer from hallucination and frame skipping.</p> <h3 id="contrastive-models">Contrastive Models</h3> <p>To remedy these issues in diffusion models, Ouyang et al. and Zhu et al. posit that the implicit representation learning objective in diffusion models is the primary cause of the slow convergence and hallucination issues. Specifically, diffusion models do not directly compare their output to their input, as in contrastive models, instead performing a variational approximation of the negative log-likelihood loss over the full Markov chain. Instead, Ouyang and Zhu propose to train the diffusion model to output a structured latent in the latent space of a contrastive model like a VQ-VAE, which then reconstructs the output image <d-cite key="zhu2022discrete"></d-cite> <d-cite key="ouyang2023improving"></d-cite>. In doing so, a contrastive term can be added to the loss of the diffusion model, maximizing the mutual information between the structured (output) latent and input latent, leading to stronger correlations between input and output, and hence improved convergence. Hence, this approach seems to have potential in fixing the hallucination and coherence issues in video diffusion models, without the need for added complexity.</p> <h2 id="our-proposal">Our Proposal</h2> <p>Thus, we propose a novel method for conditional image-to-image generation (generating images given a starting frame and text description) by training the diffusion model to actually utilize the regularized latent space in which a diffusion model can operate. Following the line of thought introduced above, we hypothesize that under such a formulation, the diffusion model is much more robust to temporal inconsistency, because of the regularity in the latent space. For example, if we imagine a highly regularized latent space, we will find all logical next frames for a given anchor frame clustered very closely around the anchor in this latent space. Therefore, any step the diffusion model takes would produce valid subsequent frames; it suffices simply for the model to learn which direction to go given the conditioned text prompt.</p> <h2 id="model-architecture">Model Architecture</h2> <h3 id="image-to-image">Image to Image</h3> <p>Given a base pretrained diffusion model, such as Runway ML’s StableDiffusion 1.4, which is the model used in this blog, it consists of various components. The three that are the most important are the VAE image encoder/decoder, the UNet, and the CLIP text encoder. The VAE begins by learning to transform images into latents and vice-versa, which is used to compress the input image and decode the output latent in the original Instruct-Pix2Pix stack. On the other hand, the UNet predicts the noise in the denoising part of the pipeline, whereas the CLIP text encoder encodes the input text.</p> <p>In terms of the general diffusion model, we use the traditional diffusion loss,</p> \[\mathcal{L} = \mathbb{E}[(\epsilon - \epsilon_\theta(x_t))^2]\] <p>which essentially encodes the mean squared error loss between the added noise and the noise that is predicted by the UNet. This pipeline is illustrated in the below image.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(4)-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(4)-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(4)-1400.webp"/> <img src="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(4).png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Construction of the MSE loss for a traditional diffusion fine-tuning stack. </div> <p>However, this loss does not encode anything of the relation between the frames themselves, which has the potential to lead to low coherence between source and target image, and thus lead to poor output quality. However, contrastively trained models like CLIP have shown strong correlative behavior between multiple modalities in the past, like between text and image, which is why we move towards contrastive losses.</p> <p>In traditional contrastive learning, we typically have our classes divided by our dataset, such as for shape, as shown in this example of a shape dataset taken from the fourth homework of 6.s898:</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes-1400.webp"/> <img src="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/contrastiveshapes.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of a contrastive dataset for shapes </div> <p>For this contrastive learning dataset, we have images that are well classified, but in terms of our image to image task, there is no such easy classification. Instead, we adopt the notion that in such a dataset, with a batch size that is small relative to the size of the dataset, each image will be reasonably different from the other images. Also because we don’t want to cluster the latent space, as the VAE is fully pretrained in the case of the diffusion fine-tuning methodology, we don’t need to actually push similar items between the test set closer together, only push the diffusion output closer to the input conditioning.</p> <p>Hence, for this task, we consider each image within the larger batch as a negative sample, only using the corresponding latent in our optimization task as the positive sample. Also, given that we want both similarity to the input image and the target image, we want our loss to look like</p> \[\mathcal{L} = \mathcal{L}_{MSE} + \mathcal{L}_{c, i} + \mathcal{L}_{c, t}\] <p>where c indicates contrastive and i, t indicate input and target, respectively.</p> <p>For the images, they are encoded by the VAE, which has learned structure due to its Gaussian training objective in the ELBO loss, which means we can directly dot product the latents when calculating the contrastive loss:</p> \[\mathcal{L}_c = \mathbb{E}[\frac{e^{x_+^{T}x}}{\sum_{x' \in \{x_+, x_{-} \}} e^{x'^{T}x}}]\] <p>This is calculated easily using a matrix multiplication and a cross entropy loss. Now, since we compute the contrastive loss using the predicted latent, and not the noise, we also add on a constructive aspect to our diffusion model. From the final noise prediction, the model also generates the predicted latent using the noise scheduler:</p> \[x_0 = \frac{1}{\sqrt{\bar{\alpha_t}}}(x_t \pm \sqrt{1 - \bar{\alpha_t}}\epsilon_\theta(t))\] <p>where alpha is the cumulative products of the alphas in the noise scheduler. These predicted final latents are then used directly in the contrastive loss formula. A visualization of how we calculate our contrastive loss can be found below:</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(3)-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(3)-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(3)-1400.webp"/> <img src="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/Image%20(3).png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Construction of the MSE + Contrastive loss for our fine-tuning stack. </div> <p>We note that in this case, we must scale the losses for numerical stability. The model we train with has latents of dimension 4 by 32 by 32, and while the MSE is scaled from 0 to 4 (due to pixel values from 1 to -1), the cross entropy loss is not. Indeed, many of these dot products are on the order of 4000, so we choose a high temperature of 1 to prevent NaN computations and then scale the losses by 4000, which is chosen because it scales the effect of each pixel in the dot product to around the same order as that in the MSE, which is averaged over all 4096 values in the latent.</p> <h3 id="image-to-video">Image to Video</h3> <p>Now, for image to video, the training process of such a model involves the optimization of the above diffusion/contrastive loss based on a given pair of nearby video frames, as well as the corresponding text description for that video. This procedure works well because in a video, we must train the model to learn the next frame, so just like how masked language models are asked to predict masked tokens from a sequence, we ask the diffusion model to predict a masked frame from the given frame. On top of that, the text prompt, which often still provides the majority of the guidance for the video as a whole is already conditioned using the MSE loss, while the contrastive loss optimizes the similarity to previous frames. Otherwise, this is trained the same as a traditional diffusion model.</p> <p>During inference, we generate a video through the following process. First, an initial frame and the text description are encoded into our latent space using the VAE encoder and CLIP encoder, respectively. Now, we run an arbitrary number of passes through our diffusion model, generating a latent at each step, which is then passed in as the conditioning frame for the next forward pass. Finally, we decode the latent at each time step to obtain our video frame at that time step; stringing these frames together produces our video.</p> <p>From a more theoretical perspective, this method essentially aims to restrict the diffusion model’s flexibility to paths within a highly regularized, lower dimensional latent space, as opposed to the entire space of images that classical diffusion-based approaches can diffuse over. Such a restriction makes it much harder for the diffusion model to produce non-sensible output; the development of such a method would therefore enable the robust generation of highly temporally consistent and thus smooth videos. We also imagine the value of producing such a latent space itself. An interesting exercise, for example, is taking an arbitrary continuous path along vectors within a perfectly regular latent space to obtain sensible videos at arbitrary framerates.</p> <h2 id="data">Data</h2> <p>Now, we explain where we got our data from.</p> <p>For text-conditioned image-to-image generation, we train on the Instruct-Pix2Pix dataset from HuggingFace, sampling 20k samples from the original training set used in the paper (timbrooks/instructpix2pix-clip-filtered). Our test and evaluation sets consist of 500 nonoverlapping samples from this same set <d-cite key="brooks2022instructpix2pix"></d-cite>. This dataset consists of samples with input images, edited images, input prompts, edited prompts, and an edit string that describes the edit that was made. An example is presented below:</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data-1400.webp"/> <img src="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/P2P_data.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of a sample from the Instruct-Pix2Pix dataset. </div> <p>For text-conditioned image-to-video generation, we experimented with the use of two different video-caption datasets: MSR-VTT and WebVid-10M. Due to the high dissimilarity between the properties of the two datasets, we tested the finetuning performance of both our contrastive model and non-contrastive models on both datasets. MSR-VTT contains 10k clips scraped from a wide range of domains, with multiple human-generated captions for each video. WebVid, on the other hand, contains 10M video clips compiled from stock image sources, with captions corresponding to the stock photo titles. For WebVid10M, we only take from the 2.5M subset. For both datasets, samples were generated to follow the Instruct-Pix2Pix data formulation (original image, edit prompt, edited image) using the following strategy:</p> <p>First, we sample 25k and 10k videos from WebVid-10M and MSR-VTT, respectively. We aim to sample roughly an equal number of samples from each video for a total of 20k (original image, edit prompt, edited image) triplets. We ignore videos longer than 30 seconds in length to minimize the probability of temporal inconsistency within a given video. Then, for each video, we choose a random frame in the video (the original video fps is 25; but these frames are too close together, so we say that only one out of every 5 video frames is a valid selection target) to be our “original” image. The video’s caption is our “edit” prompt. To select our “edited” image, we note that we are optimizing the model to produce the next frame, while maintaining consistency between frames. Therefore, to select the “edited” image, we sample a normal distribution with standard deviation of 10 valid frames (50 frames in the original video), or two seconds, to select a frame after our “original” image as our “edited” image. A sample processed image from WebVid is included below.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed-1400.webp"/> <img src="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/webvid_processed.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of a processed sample from WebVid. </div> <h2 id="experiments">Experiments</h2> <p>To assess the efficacy of our newly proposed strategy, we run experiments on both the original Instruct-Pix2Pix task of text-conditioned image-to-image generation, as well as the task of text-conditioned image-to-video generation, against the baseline Instruct-Pix2Pix model. The original Instruct-Pix2Pix task is run to confirm that our model, after obtaining coherency, does not lose significant expressivity. On the other hand, we expect the image-to-video model to have comparable expressivity to the baseline on a task where coherency is significantly more important.</p> <p>All of these evaluations and experiments were performed using the Accelerate library and HuggingFace Diffusers, <d-cite key="von-platen-etal-2022-diffusers"></d-cite>, building off of their Instruct-Pix2Pix codebase. The model is RunwayML’s Stable Diffusion v1.5 release. For the task of image-to-image generation, we trained both the baseline Instruct-Pix2Pix and our model for 9000 training steps on 4xA100-80GB with a batch size of 16 and a learning rate of 5e-5, which took on the order of 12 hours. For the image-to-video generation task, we trained both baseline Instruct-Pix2Pix and our contrastive model for 4500 training steps at a learning rate of 1e-5 and a batch size of 16 due to overfitting issues at higher # of training steps and higher learning rates, possibly due to the repetitiveness of our dataset. Note that we had a limited ability to hyperparameter tune/ablate, since each diffusion fine tuning run took multiple hours at a minimum, and we were operating on a minimal budget of spare A100s when they were available from our labs, so those results are not shown in this blog.</p> <p>We then evaluate on the test splits of the corresponding datasets described above (for image-to-video generation, we evaluate on the test split of WebVid, since MSRVTT’s testing set has a number of non-corresponding video-prompt pairs and also very jittery videos).</p> <h2 id="results">Results</h2> <p>Now, we explain our results. For both tasks, we assess two metrics: the first is the Frechet Inception Distance (FID) <d-cite key="FID "></d-cite>between the target image and the predicted image. The FID calculates the similarity between the distribution of images and the distribution of generated images, so a lower FID is considered better, as the distributions are more similar. Note that since our models are still technically image-to-image models and not image-to-video at their core, to evaluate the base models we use FID instead of its video counterpart FVD. For our second metric, we use CLIP Scores <d-cite key="hessel-etal-2021-clipscore "></d-cite>between the source image and the edit prompt, the target image and the edit prompt, and the predicted image and the edit prompt for the source image, predicted image, source image description (from the Instruct-Pix2Pix dataset), and target image description. The CLIP score can be thought about as a measure of the similarity between the prompt and the image, with a higher score being better, referring to higher similarity.</p> <p>These metrics are used to evaluate our base image-to-video models as well, as they both determine the amount of prompt following and fidelity we can determine in our videos.</p> <h3 id="image-to-image-results">Image to Image Results</h3> <p>For text-conditioned image-to-image generation, we observe that our models have these FID and CLIP scores:</p> <table> <thead> <tr> <th> </th> <th>FID</th> <th>CLIP (source - prompt)</th> <th>CLIP (gen - prompt)</th> <th>CLIP (target - prompt)</th> </tr> </thead> <tbody> <tr> <td>Ours</td> <td>158.8</td> <td>21.7</td> <td><strong>24.4</strong></td> <td>24.1</td> </tr> <tr> <td>Baseline</td> <td><strong>142.4</strong></td> <td>21.7</td> <td><strong>24.4</strong></td> <td>24.1</td> </tr> </tbody> </table> <p>Our model matches the baseline on CLIP score, meaning that our model exhibits similar prompt following characteristics as the baseline. On top of that, our FID is only slightly higher than the baseline, meaning that the expressivity has not decreased significantly. However, images do not have similarly robust coherence metrics, so we evaluate these qualitatively.</p> <h4 id="coherence">Coherence</h4> <p>On the subject of coherence, we provide some image output pairs in the figure below:</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images-1400.webp"/> <img src="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_images.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of a sampled images, with prompt of "make the mountain snowy" and "make it futuristic," respectively. </div> <p>For both scenes, while the baseline diffusion model follows the prompt more fully, which may match the output distribution (hence getting a better FID score), we notice several key contrastive differences, which would impact coherence. In the mountain for example, the forest disappears in the baseline version, which also doesn’t maintain the painting-like style. On top of that, in the Eiffel tower case, the Eiffel tower rotates in the non-contrastive version. These observations lead to the idea that the contrastive model may be prioritizing coherence as desired, despite some loss in performance. Similar patterns are observed throughout the dataset.</p> <h3 id="image-to-video-results">Image to Video Results</h3> <p>For text-conditioned image-to-video generation, we observe that our models have the FID and CLIP scores in the table below:</p> <table> <thead> <tr> <th> </th> <th>FID</th> <th>CLIP (source - prompt)</th> <th>CLIP (gen - prompt)</th> <th>CLIP (target - prompt)</th> </tr> </thead> <tbody> <tr> <td>Ours (trained on WebVid)</td> <td><strong>102.9</strong></td> <td>29.9</td> <td>27.5</td> <td>29.8</td> </tr> <tr> <td>Ours (trained on MSR-VTT)</td> <td>149.3</td> <td>29.9</td> <td>27.6</td> <td>29.8</td> </tr> <tr> <td>Baseline (trained on WebVid)</td> <td>*</td> <td>*</td> <td>*</td> <td>*</td> </tr> <tr> <td>Baseline (trained on MSR-VTT)</td> <td>172.3</td> <td>29.9</td> <td>**29.4 **</td> <td>29.8</td> </tr> </tbody> </table> <p>Note that in this case, we include asterisks for the baseline numbers on WebVid because it produces NSFW content as marked by the HuggingFace Diffusers library more than 25% of the time. This means that the metrics are not directly comparable as we were unable to find a validation set on which we could evaluate the models quantitatively on even ground. Nonetheless, we still include the WebVid baseline in our qualitative analysis.</p> <p>Looking at the rest of the metrics, the baseline on MSR-VTT has a decently higher correlation with the prompt than the contrastive model. This makes sense, as the baseline is trained only the objective of denoising the prompt latent, while we add the contrastive term. On the other hand, we have a significantly lower FID score of the MSR-VTT trained models, which means that the distributions of our output data relative to the target output data was more similar, which is probably due to the fact that our high coherence is useful in tasks where source and target distributions are similar.</p> <h4 id="qualitative-video-generation">Qualitative Video Generation</h4> <p>For a better understanding of the in-context performance of our model and to make up for the invalidity of the baseline model trained on the WebVid dataset above, we also perform qualitative assessments of longer videos generated by our models and the baselines. For each of 4 selected starting frames, we use a prompt generated from the sequestered part of WebVid to generate 5 subsequent frames for the video:</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos-1400.webp"/> <img src="/staging/assets/img/2023-12-12-contrastivediffusion-image2video/sample_videos.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of sampled videos for a prompt of a factory emitting smoke. </div> <p>From these generated videos, we observe that our models are significantly better at generating coherent frames, as we expected. In particular, we see that the MSR-VTT baseline model deviates heavily from the starting image on the very next frame, while our MSR-VTT model largely retains the original characteristics despite some content drifting after frame 3. WebVid noticeably performs better on the baseline, but does still observe some signs of progressive degradation in our predicted outputs, along with lack of motion in contrast to the prompt for the baseline model. This progressive degradation is likely due to small levels of inclarity in each subsequent frame being compounded over multiple frames; due to coherence between frames, the subsequent frames will contain strictly more inclarity than the previous. On the other hand, our model on WebVid sees less degradation on top of actually having coherent motion of smoke billowing, showing successful output.</p> <p>Overall though, WebVid was observed to have significantly better results than MSR-VTT, which is likely attributed to the greater quality of the dataset and less jittery videos.</p> <h2 id="discussion-and-conclusion">Discussion and Conclusion</h2> <p>In this project, we explored the idea of using contrastive losses to improve the coherency between input and output images in the context of text-conditioned image-to-image generation. In particular, we study the utility of this ability to generate highly coherent diffusion results in I2V, where the current state-of-the-art suffers heavily from temporal inconsistency. We evaluate our models on the classic Instruct Pix2Pix task to assess its preservation of expressive ability and conclude that no significant degradation of expressive ability was observed. We then evaluate our contrastive strategy on text-conditioned image-to-video synthesis and find that our models outperform the classic non-contrastive formulation in video generation tasks when evaluated on CLIP Score and KID.</p> <p>Through our experiments, we have also identified some limitations of our methods and potential areas for improvement. First, we note that our model has trouble with the previously mentioned problem of progressive degradation. A possible solution to this problem could be introducing GAN training to encourage the model to produce higher-fidelity images. More robust methods could also be used (instead of sampling subsequent frames) to generate positive samples, which would increase our model’s robustness. We also notice that both our model and the baseline have trouble with a continuous depiction of motion. This is likely due to the fact that any frame is only conditioned on the previous frame. Conditioning on images multiple frames before the current image would help with this consistency issue, as well as the aforementioned progressive degradation issue. Also, due our loss function’s negative sampling-based approach to training our models, on a dataset with significant amount of repetition like ours, this led to significant overfitting in preliminary runs. On top of that, runs suffered from loss spiking when the numeric instability of cross-entropy loss led to the calculation of NaN losses and exploding gradients, which leads to requiring very low values of learning rate. This could be resolved with better sweeps of hyperparameters for scaling the losses relative to each other or higher quality data. Finally, as alluded to above, more time to do hyperparameter tuning with the training of larger models on larger datasets would likely help with performance in general.</p> <p>With this study, we examined the use of contrastive loss to improve coherency in latent diffusion, with experiments that demonstrated minimal loss of expressive capabilities and superior consistency in diffusion, resulting in better performance on image-to-video generation. We hope that through this study, we can drive focus toward contrastive loss approaches to obtain higher fidelity results in video generation, accelerating progress in I2V and T2V.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-contrastivediffusion-image2video.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/croneillproposal/index.html b/blog/2023/croneillproposal/index.html
new file mode 100644
index 00000000..fa5a640f
--- /dev/null
+++ b/blog/2023/croneillproposal/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Learning a Lifted Linearization for Switched Dynamical Systems | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A final project proposal for 6.s898 in fall 2023"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/croneillproposal/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Learning a Lifted Linearization for Switched Dynamical Systems",
+      "description": "A final project proposal for 6.s898 in fall 2023",
+      "published": "December 11, 2023",
+      "authors": [
+        {
+          "author": "Cormac O'Neill",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT, Cambridge",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Learning a Lifted Linearization for Switched Dynamical Systems</h1> <p>A final project proposal for 6.s898 in fall 2023</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#proposed-approaches">Proposed Approaches</a></div> <div><a href="#the-model">The Model</a></div> <div><a href="#analysis">Analysis</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <blockquote> All models are wrong, but some are useful. —George Box </blockquote> <p>Deep neural networks are incredibly capable of generating models from data. Whether these are models that allow for the classification of images, the generation of text, or the prediction of a physical system’s dynamics, neural networks have proliferated as a favored way of extracting useful, predictive information from set of data <d-cite key="rombach2021highresolution, Brown2020, Tsipras2020"></d-cite>. But while well-tuned and well-designed neural networks can demonstrate miraculous performance at a given task, raw accuracy is not the only measure of a model’s usefulness.</p> <p>In robotics, the speed at which a model can be run and its explainability can be just as important as the accuracy of its predictions. Techniques such as model predictive control can enable remarkable performance even when they’re based on flawed predictive models <d-cite key="Rawlings2022"></d-cite>. In practice, most of these models are linearizations of more accurate, nonlinear equations. Produced by considering low order truncations of the Taylor series, these linearizations can be run incredibly efficiently on modern computer hardware and are amenable to linear analysis techniques for explainability purposes.</p> <p>Nevertheless, this kind of linearization has its own weaknesses. Chief among them is the inherently local nature of the approach: a Taylor series must be taken around a single point and becomes less valid further away from this location. As an alternative, lifting linearization approaches inspired by Koopman Operator theory have become more commonplace <d-cite key="Koopmanism, brunton2021modern, AsadaDE, Lusch2018, Shi2022"></d-cite>. These techniques seek to linearize a system by lifting it to a higher dimensional representation where the dynamics can be made to evolve linearly over time. While such models can suffer from the curse of dimensionality when compared to their lower-order Taylor series brethren, they can offer greater accuracy while still providing most of the benefits of a linear model.</p> <p>\(f(x)|_{x=a}\approx f(a)+\frac{f'(a)}{1!}(x-a)\)</p> <div class="caption"> A truncated Taylor series makes use of the derivatives of a function around a point. </div> <p>Deep neural networks have emerged as a useful way to produce these lifted linear models <d-cite key="Lusch2018"></d-cite>. An encoder is used to transform a system’s state into a higher dimensional latent space of “observables”. These observables are then fed through a linear layer which evolves the system forward in time: a linear dynamical model. In the literature, this approach has come to be known as Deep Koopman Networks (DKNs). We can see how these networks can learn lifted linear models for physical systems by considering a simple pendulum.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/deepnet-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/deepnet-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/deepnet-1400.webp"/> <img src="/staging/assets/img/2023-11-08-croneillproposal/deepnet.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of a neural network architectured used to learn observables for a linear Koopman model, taken from <d-cite key="lusch2018deep"></d-cite> </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/DKN_simplepen-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/DKN_simplepen-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/DKN_simplepen-1400.webp"/> <img src="/staging/assets/img/2023-11-08-croneillproposal/DKN_simplepen.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Performance of a DKN for predicting a damped, simple pendulum across a set of trajectories. On the left, the dotted lines represent the ground truth trajectories, while the lines connected by crosses are the predicted trajectories. On the right, the MSE of the trajectories for the full 30 time steps of data is presented. </div> <p>While the potential of DKNs has already been explored in recent years, the field is still being actively studied. In this blog, I am interested in exploring how a DKN can be used to model a particular kind of a dynamical system: one with piecewise dynamics that vary discretely across state space. These systems are inherently challenging for traditional, point-wise linearization techniques. To explain this, we can consider an example inspired by our old friend, the simple pendulum.</p> <p>Consider a pendulum as before, but with the addition of two springs located at $\theta=30\degree$ and $\theta=-30\degree$. If we to consider a point arbitrarily close to one of these springs, say at $\theta=29.99…\degree$, then a Taylor series about this point – even with infinite terms – would not be able to accurately represent the dynamics when the spring is engaged. In contrast, a lifted linearization may better model such a system thanks to its ability to incorporate information beyond a single point.</p> <p>\(\begin{align} \ddot\theta =f(\theta,\dot\theta) =\begin{cases} -g\sin{\theta}-b\dot\theta, &amp; \theta\in [-30^\circ,30^\circ]\\ -g\sin{\theta}-b\dot\theta-k(\theta+30), &amp; \theta&lt;-30^\circ\\ -g\sin{\theta}-b\dot\theta-k(\theta-30), &amp; \theta&gt;30^\circ \end{cases} \end{align}\)</p> <div class="caption"> The dynamics of a pendulum with a pair of springs can be expressed as a set of piecewise equations. $k=1000$ is the stiffness of the springs and $b=1$ is the damping constant. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/spring_diagram-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/spring_diagram-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/spring_diagram-1400.webp"/> <img src="/staging/assets/img/2023-11-08-croneillproposal/spring_diagram.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Diagram of the damped pendulum system with a pair of fixed springs, space at equal angles away from $\theta=0$. </div> <p>Although that isn’t to say that a brute-force implementation of a DKN would necessarily be all too successful in this case either. Piecewise, switched, or hybrid systems (terminology depending on who you ask) are composed of particularly harsh nonlinearities due to their non-continuous derivatives. These can be difficult for lifted linearization approaches to model <d-cite key="Bakker:KoopHybrid, Govindarajan:KoopHyPend, NgCable"></d-cite>, with some systems theoretically requiring an infinite number of observables to be accurately linearized. This project is motivated by the question of whether we could modify the standard DKN approach to be more amenable for piecewise systems, specifically by taking inspiration from the common practice of pre-training neural networks.</p> <p>As a bit of a spoiler for the conclusion of this report, we don’t end up seeing any noticeable improvement from pre-training the DKN. Nevertheless, the process of experimenting with the proposed approaches was an insightful experience and I am happy to share the results below.</p> <h2 id="proposed-approaches">Proposed Approaches</h2> <p>I experimented with two approaches for pre-training our DKN, one inspired by curriculum learning <d-cite key="Soviany2022"></d-cite> and another seeking to leverage an intuitive understanding of a lifted linearization’s observables. We then compared the results to an aggregate DKN model trained from scratch with 50 observables.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/aggregate_DKN-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/aggregate_DKN-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/aggregate_DKN-1400.webp"/> <img src="/staging/assets/img/2023-11-08-croneillproposal/aggregate_DKN.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> A DKN with 50 observables trained from scratch for the pendulum with springs. On the left, the dotted lines again represent ground truth trajectories while the lines connected by crosses are predictions. </div> <p>In the case of applying curriculum learning, we considered an approach with a data-based curriculum. In these cases, the difficulty of the training data is gradually increased over time. This has the potential benefit of allowing a model to more readily learn a challenging task, while also preventing a situation where a model is not sufficiently ‘challenged’ by new data during the training process. Our curriculum learning approach sought to take advantage of DKNs’ already good performance for the standard pendulum case. Intuitively, we identify the spring’s stiffness as the primary source of increased difficulty in our toy system. With this in mind, I created four data sets with different values for the spring constant, $k=0,10,100,1000$. A single model was then trained sequentially on these data sets. If our intuition is correct, we would expect to see the model gradually learn to account for the presence of the spring while maintaining the dynamics of a simple pendulum closer to the origin.</p> <p>For the second approach tested in this project, it is necessary to consider what an observable is meant to represent in a lifted linearization. As an additional piece of terminology, the function which is used to generate a given observable is referred to as an observable function <d-cite key="brunton2021modern"></d-cite>. While it may be possible to use different sets of observable functions to linearize a given system, it is possible to find a set of observable functions that are analogous to a linear system’s eigenvectors. The evolution of these observables in time, referred to as Koopman eigenfunctions, is defined by an associated complex eigenvalue. Much like their eigenvector cousins, these eigenfunctions can provide useful information on how the system might evolve over time, including information on how the time evolution may vary spatially.</p> <p>Based on this understanding of Koopman eigenfunctions, we are motivated to see if a DKN could be coaxed into more readily learning spatially-relevant observables. If we consider our system of interest, the pendulum with springs, we posit that different regions of state space would be primarily influenced by different eigenfunctions. In particular, the larger central region where the pendulum’s dynamics are independent of the springs may be expected to be affected by a set of eigenfunctions with a lower spatial frequency and a global relevance. That is, eigenfunctions which better represent the dynamics of the system averaged throughout the state space and which may be valid everywhere – even when the springs are engaged, the natural dynamics of the pendulum are still in effect. In contrast, the dynamics when the springs are engaged (each spring is active in a comparatively smaller region of state space) may rely heavily on a set of eigenfunctions that are only locally relevant.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/pend_statespace-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/pend_statespace-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/pend_statespace-1400.webp"/> <img src="/staging/assets/img/2023-11-08-croneillproposal/pend_statespace.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> On the left, a visualization of trajectories used to train the models for the pendulum with springs. Dotted vertical lines mark where the boundary between regions of state space where the springs are and are not engaged. On the right, we see the trajectories considered for the system when there are no springs. Note that the presence of the springs compress `squeeze' the higher energy trajectories further away from the origin of the state space. </div> <p>While I believe that this is an interesting thought, it is worth noting that this intuitive motivation is not necessarily backed up with a rigorous mathematical understanding. Nevertheless, we can empirically test whether the approach can lead to improved results.</p> <p>In contrast to the curriculum learning approach, we have only a single set of data: that generated from a model of a pendulum with a spring stiffness of $k=1000$. Instead of the standard approach of DKN, where a larger number of observables is considered to (in general) allow for a system to be more easily linearized, we deliberately constrain the latent space dimension to be small. The intention is for this restriction to limit the number of observable functions that the model can represent, encouraging it to learn observables with a low spatial frequency and which are relevant across a larger region of state space. In our system of interest, this would be observable functions that represent the dynamics of the pendulum without the springs.</p> <p>Once we have initially trained this smaller model, we use its encoder within a larger model. This initial encoder is kept fixed in future training processes so that it continues to represent the same set of observables. An additional encoder is then then in the larger model, with the goal being to learn additional observables capable of making up for the initial model’s deficiencies. If the initial model learned the low spatial frequency observables as hoped, then we would expect this additional encoder to learn observables that are more relevant in areas where the springs are exerting a force on the pendulum. In practice, we could see this as a particular form of curriculum learning where the complexity of the model is increased over time. A key difference here compared to traditional approaches is that instead of increasing the complexity of the model by adding layers depth-wise, we are effectively increasing the width of the model by giving it the ability to learn additional observables.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/model_arch-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/model_arch-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/model_arch-1400.webp"/> <img src="/staging/assets/img/2023-11-08-croneillproposal/model_arch.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The architecture used to train the combined model. A smaller, 10 observable model was first trained, before a larger model was then trained to learn an additional 40 observables. </div> <h2 id="the-model">The Model</h2> <p>To reduce the influence that other factors may have in the results of our experiments, I sought to minimize any changes to the overall structure of the DKNs being used, save for those being studied. Chief among these was the number of hidden layers in the network, the loss function being used, and the input. Other variables, such as the optimizer being used, the batch size, and the learning rate, were also kept as unchanged as feasible. The need to tune each of these other hyperparameters and the challenges in doing so are well-documented in the machine learning field, and as such I won’t spend any additional time describing the processes involved.</p> <p>The general <em>encoder</em> architecture of the networks being used was as follows, with $D_x$ being the number of states (2, in the case of the pendulum) and $D_e$ being the number of observables:</p> <table> <thead> <tr> <th>Layer</th> <th style="text-align: center">Input Dimensions</th> <th style="text-align: center">Output Dimensions</th> <th style="text-align: center">Nonlinearity</th> </tr> </thead> <tbody> <tr> <td>Linear</td> <td style="text-align: center">$D_x$</td> <td style="text-align: center">16</td> <td style="text-align: center">ReLU</td> </tr> <tr> <td>Linear</td> <td style="text-align: center">16</td> <td style="text-align: center">16</td> <td style="text-align: center">ReLU</td> </tr> <tr> <td>Linear</td> <td style="text-align: center">16</td> <td style="text-align: center">$D_e$</td> <td style="text-align: center">None</td> </tr> </tbody> </table> <p>In addition to the encoder network, a linear layer was present to determine the time evolution of the observables. For this linear layer, the input and output dimensions were both D_e + D_x since our final set of observables always had the system’s states concatenated onto those learned by the encoder.</p> <p>The loss function that I used was composed of two main components: a loss related to the time evolution of the observables being output by the encoder, and a loss related to the time evolution of the state variables. In the literature, additional loss terms are often included to help regularize the network during training. These were not found to be significant in the testing done for this report, however and so were excluded. Tests were also done with different weights between the state loss and the observable loss, with an equal balance between the two found to provide reasonable outcomes. Another hyperapameter that we needed to tune is for how many time steps to enforce a loss on the values predicted by the model. In this report, we stuck to 30 time steps although significant experimentation was not done to explore how varying this parameter may have affected the results. We did briefly look into whether having a weight on any of the loss terms which decayed over time would improve training and did not see any immediate benefits.</p> <p>\(\mathrm{loss}=\mathrm{multistep\_loss\_state}+\mathrm{multistep\_loss\_observables}\) \(\mathrm{multistep\_loss\_state}=\sum^{30}_{t=1}\lvert\lvert(\psi(\textbf{x}_t)-K^t\psi(\textbf{x}_0))[:2]\rvert\rvert_{\mathrm{MSE}}\) \(\mathrm{multistep\_loss\_observables}=\sum^{30}_{t=1}\lvert\lvert(\psi(\textbf{x}_t)-K^t\psi(\textbf{x}_0))[2:]\rvert\rvert_{\mathrm{MSE}}\)</p> <div class="caption"> The loss function ultimately used for each of the models considers the prediction error for both the state and the observables. $\psi$ represents the act of using the model's encoder and then concatenating the state as an additional pair of observables. $K$ represents the linear layer in the architecture used to model the time evolution of the lifted state. </div> <h2 id="analysis">Analysis</h2> <h3 id="curriculum-learning">Curriculum Learning</h3> <p>The initial model for stiffness $k=0$ was trained on the simple pendulum dynamics for 600 epochs, and served as the pre-trained model for this approach. Subsequent models were each trained for 200 epochs with the Adam optimizer and a decaying learning rate scheduler. When analyzing the performance of these models, we looked at how the error for a set of trajectories not in the training set evolved over time.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/curriculum_results-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/curriculum_results-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/curriculum_results-1400.webp"/> <img src="/staging/assets/img/2023-11-08-croneillproposal/curriculum_results.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Performance of the model trained using curriculum learning after each of the learning stages. We observe that performance decreases over time, and that the original model trained when $k=0$ seems to perform the best. </div> <p>By this metric, we observe the performance of the model gradually getting worse. While this on its own is not too surprising, the final model ends up performing significantly worse than a DKN with the equivalent number of observables trained from scratch. Interestingly, it looks like the final model is unstable, with the trajectories blowing up away from the origin. Looking into this, issues surrounding the stability of linearized models is not a new phenomenon in the field of Koopman linearizations. Prior works have proposed several methods to help alleviate this issue, such as by adding an addition term to the loss function which stabilizes the time-evolution matrix. While there was no time to implement this change for this report, it could be an interesting modification to attempt for future work.</p> <h3 id="learning-new-observables">Learning New Observables</h3> <p>While trying to gradually learn additional observables for the model, we started with a network that learned 10 observable functions and trained it for 600 epochs. Once this process was complete, an extended model learned an additional 40 observable functions for an additional 600 epochs. The end result was comparable in performance to a single aggregate model of 50 observables trained from scratch. The aggregate model did appear to specifically outperform our gradually trained model during the initial time steps, while slightly underperforming in comparison at the later time steps. This may be due to some differences in the stability of the two learned linear models, although further investigation would be needed to verify this. Part of the motivation for this method was the hope that the network would learn locally relevant observable functions. The learned observables were plotted on a grid to visualize them and see if this were the case, but not distinctive, qualitative features indicating that different observables were learned for different regions of state space.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/combined_results-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/combined_results-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/combined_results-1400.webp"/> <img src="/staging/assets/img/2023-11-08-croneillproposal/combined_results.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The combined model doesn't see any noteworthy improvement in performance when compared to the standard DKN approach. While not shown here, the combined model was found to be sensitive to how many observables were learned by each of its constituents. For example, having 30 observables in the first encoder and 20 in the second led to worse results. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/obs_visualization-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/obs_visualization-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-croneillproposal/obs_visualization-1400.webp"/> <img src="/staging/assets/img/2023-11-08-croneillproposal/obs_visualization.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Visualization of a pair of observables from the combined model, arbitrarily selected as the first observable from both encoder 1 (left) and encoder 2 (right). While only these two observables are shown here, plots for all 50 were produced. We noticed that observables from encoder 1 (the fixed model) tended to show `kinks' around $\theta=+-30\degree$. This may indicate that it was learning to account for the presence of the springs. In contrast, encoder 2 (the extended model) learned observable functions that were generally smoother across state space. </div> <h2 id="conclusion">Conclusion</h2> <p>In this project, we sought to test two modifications to a DKN training scheme on an example of a piecewise dynamical system. By using a curriculum learning process or gradually increasing the number of observable functions, we hypothesized that the DKN would show better performance than an aggregate model trained from scratch. Ultimately, we found that neither of the proposed methods led to significant improvements.</p> <p>One of the potential causes of underperformance is the learned linear models’ instability. While this is a known issue regarding lifted linearization techniques <d-cite key="ng2022learned, Mamakoukas2023Stable"></d-cite>, attempting to resolve the issue would require further work and additional study into how best to do so for this use case. The example model of a pendulum with springs could also have been chosen poorly. I opted to experiment with this system since it was physically meaningful, and I believed that it would be a simple toy model that wouldn’t require large models with extensive compute requirements. But observing the dramatic change in performance that occurred in the linear models simply through the addition of the springs made me wonder whether this system truly was as simple as I had initially made it out to be. It is possible that larger and more elaborate models with more observables and resources for training are necessary to learn an appropriate linearization.</p> <p>It is also worth considering the severe limitations of this study, imposed upon it by the need to tune a wide variety of hyperparameters. Even in the process of creating a linear model for the simple pendulum, I observed a wide range of performance based upon how the cost function or learning rate were varied. While some effort was taken to tune these and other hyperparameters for the models I explored, this process was far from exhaustive.</p> <p>Moreover, the proposed changes to the typical DKN architecture only served to add additional hyperparameters into the mix. What spring stiffnesses should be used during curriculum learning? Should the learning rate be decreased between different curriculums, or should the number of epochs be varied? How about the ratio of observables between the two models used in the second approach, is a 10:40 split really optimal? Some variations of these hyperparameters were considered during this project, but again an exhaustive search for optimal values was impossible.</p> <p>This means that there is a chance that I simply used the wrong selection of hyperparameters to see better performance from the tested approaches, it highlights the sensitivity that I observed in the performance of the DKNs. Even beyond the considerations described thus far, there are further considerations that can impact the structure and performance of learned linearizations. Some approaches augment the state variables with time-delayed measurements, for example. In other cases, the state variables are not included as observables and are instead extracted using a decoder network. This latter case is of particular interest, since recent work in the field has identified that certain types of nonlinear systems are impossible to linearize with a set of observables that include the states.</p> <p>Ultimately, while the experiments in this project didn’t agree with my hypothesis (and resulted in some underwhelming predictive performance) I gained a newfound appreciation for the process of training these models along the way.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-croneillproposal.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/deep-connectome-clustering/index.html b/blog/2023/deep-connectome-clustering/index.html
new file mode 100644
index 00000000..2eaa36a4
--- /dev/null
+++ b/blog/2023/deep-connectome-clustering/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>VGAE Clustering of the Fruit Fly Connectome | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="An exploration of how learned Variational Graph Auto-Encoder (VGAE) embeddings compare to Spectral Embeddings to determine the function of neurons in the fruit fly brain."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/deep-connectome-clustering/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "VGAE Clustering of the Fruit Fly Connectome",
+      "description": "An exploration of how learned Variational Graph Auto-Encoder (VGAE) embeddings compare to Spectral Embeddings to determine the function of neurons in the fruit fly brain.",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Max Filter",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Eric Liu",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>VGAE Clustering of the Fruit Fly Connectome</h1> <p>An exploration of how learned Variational Graph Auto-Encoder (VGAE) embeddings compare to Spectral Embeddings to determine the function of neurons in the fruit fly brain.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#motivation">Motivation</a></div> <div><a href="#background">Background</a></div> <div><a href="#methods">Methods</a></div> <div><a href="#experiments">Experiments</a></div> <div><a href="#discussion">Discussion</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="motivation">Motivation</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome-1400.webp"/> <img src="/staging/assets/img/2023-11-09-deep-connectome-clustering/fruit-fly-connectome.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The fruit fly connectome.<d-cite key="winding2023connectome"></d-cite> </div> <p>Everything you’ve ever learned, every memory you have, and every behavior that defines you is stored somewhere in the neurons and synapses of your big, beautiful brain. The emerging field of connectomics seeks to build connectomes–or neuron graphs–that map the connections between all neurons in the brains of increasingly complex animals, with the goal of leveraging graph structure to gain insights into the functions of specific neurons, and eventually the behaviors that emerge from their interactions. This, as you can imagine, is quite a difficult task, but progress over the last few years has been promising.</p> <p>Now, you might be asking yourself, can you really predict the functions of neurons based on their neighbors in the connectome? A paper published by Yan et al. in 2017<d-cite key="yan2017network"></d-cite> asked this same question, searching for an answer in a roundworm (C. elegans) connectome. In their investigation, they discovered a neuron whose behavior had not been previously characterized, which they hypothesized was necessary for locomotion. They tested this hypothesis by ablating the neuron on a living C. elegans, and to the dismay of that poor roundworm, found that it was indeed necessary.</p> <p>Although impressive, the C. elegans connectome has only ~300 neurons, compared with the ~100,000,000,000 in the human brain; however, this year (2023):</p> <ol> <li>A paper by Winding et al.<d-cite key="winding2023connectome"></d-cite> has published the entire connectome of a fruit fly larvae, identifying 3016 neurons and their 548,000 synapses.</li> <li>Google Research has announced an effort to map a mouse brain (~100,000,000 neurons)<d-cite key="januszewski2023google"></d-cite></li> </ol> <p>This is exciting because the fruit fly dataset presents an opportunity to identify more nuanced functions of neurons that may be present in more complex species like mice, but not in simpler species like the roundworm. This creates the requirement for algorithms that are <strong>sufficiently expressive</strong> and able to disentangle the similarities between neurons that appear different, but are functionally similar.</p> <p>Furthermore, current efforts to map connectomes of increasingly complex animals makes it desirable to have algorithms that are <strong>able to scale</strong> and handle that additional complexity, with the hopes of one day discovering the algorithms that give rise to consciousness.</p> <h2 id="background">Background</h2> <h3 id="can-we-learn-about-human-brains-by-studying-connectomes-of-simpler-organisms">Can we learn about human brains by studying connectomes of simpler organisms?</h3> <p>The primate brain exhibits a surprising degree of specialization, particularly for social objects. For instance, neurons in the face fusiform area (FFA) in the IT cortex appear to fire only in response to faces. Furthermore, individuals with lesions in or brain damage to this area lose their ability to recognize faces <d-cite key="kanwisher_fusiform_2006"></d-cite>. In fact, there is even evidence of rudimentary face perception even in newborn infants with limited access to visual “training data,” who preferentially look at photos of faces, and other face-like arrangements, like inverted triangles (two vertices being the eyes and the third the mouth) <d-cite key="otsuka_face_2014"></d-cite>. While there may not exist a grandmother cell that can recognize your grandmother, there certainly seems to be at least some engineered specialization in the brain. Cognitive scientists theorize that there is a set of core systems for representing object, actions, number, space, and conspecifics (other people!), together constituting what we might call “common sense,” which may help determine the blueprint of the human brain down to the genetic level <d-cite key="spelke_core_2007"></d-cite>. Notably, facial recognition exhibits substantial genetic heritability (over 60%!) and appears to be uncorrelated with general intelligence <d-cite key="shakeshaft_genetic_2015"></d-cite>. We might imagine that there are a set of capabilities, including social cognition, that were so critical for human behavior that our brains evolved over hundreds of thousands of years to “hard code” certain structures, like the FFA, to help scaffold them. After all, another person’s face is an important signal for processes like mate selection, friendship formation, and theory of mind. The human brain and the cognitive processes it supports are evolutionary products. And even more importantly, the brain seems to be specialized in some ways, but behave flexibly in others. Through the scientific process, how good of an understanding can we reach about the complex organ sitting between our ears? To what degree are the neuronal assemblages in our brain specialized? How do the communications amongst these neurons grant us our incredible cognitive capabilities?</p> <p>In 1982, neuroscientist David Marr proposed three levels of analyses to study complex systems like the human mind: the computational level (what task is the system designed to solve?), the algorithmic level (how does the system solve it?), and the implementation level (where and how is the algorithm implemented in the system hardware?) <d-cite key="mcclamrock_marrs_1991"></d-cite>. At one end of the spectrum, we might think about characterizing the computational capabilities of human cognition, like object recognition. On the other end, we might be interested in how object recognition is implemented in the brain itself, in all of its fleshy glory–how an incoming visual signal is processed by composites of receptive fields in the retina (biological “Gabor filters”) and fed to neurons in the primary and secondary visual areas of the cerebral cortex, for instance <d-cite key="leeds_comparing_2013"></d-cite>. In recent years, scientists have developed an interest in understanding the implementation level at an extremely high resolution by charting the connectome–the comprehensive map of all neural connections in the brain. However, if the grandmother cell is too simplistic of a model for knowledge representation in the human brain, then indeed the human connectome may offer an overly complex view. It seems easy to get lost in the wilderness of its approximately 100 trillion neurons and the nearly quadrillion synapses which connect them <d-cite key="sporns_human_2005"></d-cite>! How can we begin to approach this overwhelming terra incognita?</p> <p>We might consider instead studying the connectome of a much simpler model organism, like the transparent 1mm-long nematode Caenorhabditis elegans, with whom we share an estimated 20-71% of our genes with <d-cite key="lai_identification_2000"></d-cite>. Or, maybe even the fruit fly Drosophila melanogaster, 60% of whose genes can also be found in the human genome (Max Planck). Even the study of such model organisms necessitates adding structure to complex, often unlabeled, relational data. And while the fruit fly brain is orders of magnitude less complex than our own, there are still over 3,000 neurons and half a million synapses to explore <d-cite key="winding2023connectome"></d-cite>(Winding et al., 2023). Luckily, mankind’s toolkit for studying graph-like data is well-equipped.</p> <h3 id="unsupervised-graph-representation-learning">Unsupervised graph representation learning</h3> <p>The problem of subdividing neurons in a connectome into types based on their synaptic connectivity is a problem of unsupervised graph representation learning, which seeks to find a low-dimensional embedding of nodes in a graph such that similar neurons are close together in the embedding space.</p> <p>A common way to identify functional clusters of neurons is through the lens of homophily, meaning that neurons serve the same function if they are within the same densely connected cluster in the connectome; however, this fails to capture the likely case that neurons with similar low-level functions span across many regions of the brain<d-cite key="winding2023connectome"></d-cite>.</p> <p>Instead, a better approach might be to cluster neurons based on their structural equivalence, such that groups of neurons with similar subgraph structures are embedded similarly, regardless of their absolute location in the connectome. This is the approach taken by Winding et al.<d-cite key="winding2023connectome"></d-cite>, who “used graph spectral embedding to hierarchically cluster neurons based on synaptic connectivity into 93 neuron types”. They found that even though they used only information about the graph structure to predict functions, neurons in the same clusters ended up sharing other similarities, including morphology and known function in some cases.</p> <p>Spectral embedding is a popular and general machine learning approach that uses spectral decomposition to perform a nonlinear dimensionality reduction of a graph dataset, and works well in practice. Deep learning, however, appears to be particularly well suited to identifying better representations in the field of biology (e.g., AlphaFold2<d-cite key="jumper2021highly"></d-cite>), and deep learning methods do appear to be capable of creating embeddings that more effectively preserve the topology of nodes in graphs<d-cite key="zhu2023unsupervised"></d-cite><d-cite key="kipf2016variational"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding-1400.webp"/> <img src="/staging/assets/img/2023-11-09-deep-connectome-clustering/vgae-embedding.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Learned VGAE graph embedding for Cora citation network dataset.<d-cite key="kipf2016variational"></d-cite> </div> <p>Thus, it stands to reason that deep learning might offer more insights into the functions of neurons in the fruit fly connectome, or at the very least, that exploring the differences between the spectral embedding found by Winding et al. and the embeddings discovered by deep learning methods might provide intuition as to how the methods differ on real datasets.</p> <p>In this project, we explore the differences between functional neuron clusters in the fruit fly connectome identified via spectral embedding by Winding et al. and deep learning. Specifically, we are interested in exploring how spectral embedding clusters differ from embeddings learned by Variational Graph Auto-Encooders (GVAE)<d-cite key="kipf2016variational"></d-cite>, which are a more recent architecture proposed by one of the co-authors of the Variational Auto-Encoders (VAE) paper<d-cite key="kingma2013auto"></d-cite>, Max Welling. GVAEs are an interesting intersection of graph neural networks (GNNs) and VAEs, both of which we explored in class, and comparing this technique to spectral embedding is relevant because of our previous discussions of spectral decomposition in class with respect to network scalability and RNN weights.</p> <p>We hypothesize that a deep learning technique would be better suited to learning graph embeddings of connectomes because they are able to incorporate additional information about neurons (such as the neurotransmitters released at synapses between neurons) and are able to learn a nonlinear embedding space that more accurately represents the topological structure of that particular connectome, learning to weight the connections between some neurons above others.</p> <p>Before we can discuss the experiments, however, we first provide more detail for Spectral Embedding and Graph Variational Autoencoders and compare the two methods.</p> <h2 id="methods">Methods</h2> <h3 id="spectral-embedding">Spectral Embedding</h3> <p>One classical approach for understanding graph-like data comes from a class of spectral methods which use pairwise distance measures between data points to embed and cluster data. Spectral methods offer two obvious advantages when compared to other machine learning approaches. One, we can straightforwardly perform clustering for datasets which are inherently relational, like the connectome, where it is not immediately clear how a method like k-means can be used when we only have access to the relationships between data points (the “edges”) and not the node-level features themselves. Two, spectral methods are <strong>nonlinear</strong>, and don’t rely on measures like squared Euclidean distance, which can be misleading for data which are tangled in high dimensions, but which exhibit a lower <strong>intrinsic</strong> dimensionality.</p> <p>So, how does spectral embedding work, exactly? In short, an adjacency matrix is first calculated from the original dataset, which is then used to compute the graph Laplacian. Next, a normalized graph Laplacian is then eigen-decomposed and generates a lower dimensional embedding space on which simpler linear clustering algorithms, like k-means, can be used to identify untangled clusters of the original data.</p> <p>This class of methods makes no assumptions about the data (including cluster shape) and can be adjusted to be less noise sensitive–for example, by performing a t-step random walk across the affinity matrix for the data, as in diffusion mapping <d-cite key="coifman_geometric_2005"></d-cite>. An added benefit is that under the hood, spectral embedding can be performed by a series of linear algebra calculations, making it extremely time-efficient. However, as with many unsupervised learning methods, clustering based on spectral embeddings is difficult to scale–in our case, due to the eigen-decomposition step of the graph Laplacian.</p> <h3 id="variational-graph-autoencoders">Variational Graph Autoencoders</h3> <p>Although Spectral Embedding is still very popular, in recent years, more attention has been paid to the burgeoning field of geometric deep learning, a set of ideas which aim to to solve prediction or embedding tasks by taking into account the relational structure between data points. One example is the variational graph auto-encoder (VGAE), which learns to embed a complex object like a network into a low-dimensional, well-behaved latent space. Kipf and Welling (2016)<d-cite key="kipf2016variational"></d-cite> propose an encoder using a two-layer graph convolutional network, which performs convolutions across local subgraphs of the input network data (not unlike convolution on images, where the graph is a grid!). The graph is projected onto a low dimensional space distributed according to the standard normal through the optimization of a variational lower bound loss, and then upsampled using an inner product between latent variables. They show that this method achieves competitive results on a link prediction task when compared to other methods like spectral clustering and DeepWalk, a random walk-based representation learning algorithm.</p> <p>On the other hand, some have discovered that spectral embedding leads to more clear separability in low dimensional representation spaces for text data compared to GNN approaches like node2vec, which reportedly achieve state-of-the-art (sota) scores for multilabel classification and link prediction in other datasets <d-cite key="grover_node2vec_2016"></d-cite><d-cite key="palamuttam_evaluating_nodate"></d-cite>. In addition, it appears that simple modifications like performing an error correlation correction on the training data and smoothing predictions on the test data for GNN-free architectures lead to sota-comparable performances <d-cite key="huang_combining_2020"></d-cite>. There are even concerns that the performance of geometric deep learning approaches are inflated, particularly in tasks like multi-label node classification, due to the assumption that the number of labels for test data are known to researchers <d-cite key="lin_use_2021"></d-cite>.</p> <p>Thus, it remains unclear in what circumstances relatively novel geometric deep learning approaches do better compared to established and widely-explored methods like spectral learning, and particularly for novel data like the connectome. In this work, we attempt to gain deeper insights into which method is moroe well-suited to the task of connectome modeling, with the hope of learning about which method should be implemented in future connectomes, such as that of the mouse and eventually the human.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/background_visual-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/background_visual-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/background_visual-1400.webp"/> <img src="/staging/assets/img/2023-11-09-deep-connectome-clustering/background_visual.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Spectral Clustering (von Luxburg, 2007; Park, Jeon, &amp; Pedryc, 2014) vs (V)GAEs (Kipf &amp; Welling, 2016): A Story in Pictures </div> <h2 id="experiments">Experiments</h2> <p>Now that we have a good idea of how these methods compare to each other in terms of implementation, we explore them from an experimental perspective. Through our experiments, we try to quantitatively and qualitatively address the question of how connectome clusters learned by GVAE compare to the spectral clusters found in the paper. To answer this question, we make use of the fruit fly connectome adjacency matrix provided by Winding et al. as our primary dataset with the hope of answering this question for our readers.</p> <h3 id="experiment-1-link-prediction">Experiment 1: Link Prediction</h3> <p>One common way to compare unsupervised graph representation learning algorithms is through a link prediction task, where a model is trained on a subset of the edges of a graph, and then must correctly predict the existence (or non-existence) of edges provided in a test set. If the model has learned a good, compressed representation of the underlying graph data structure, then it will be able to accurately predict both where missing test edges belong, and where they do not.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task-1400.webp"/> <img src="/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-task.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A link prediction task. Green lines correspond to the training data, which contains samples of positive samples of edges that are present in the graph, and negative samples of edges that are not present in the graph. The test set in red corresponds to the remainder of positive and negative samples in the graph. </div> <p>We evaluate the models by computing the area under curve (AUC) of the ROC curve, which plots the true positive rate against the false positive rate. A completely random classifier that does not learn anything about the underlying graph structure would get an AUC of 0.5, while a perfect classifier would have an area of 1.0.</p> <p>Another metric we use to evaluate how good the models are is average precision (AP) of the precision-recall curve, which describes the consistency of the model.</p> <p>In addition to comparing the models with these metrics, we also explore how robust they are to decreasing dimensionalities of the latent space. We hypothesize that if a model is able to maintain high AUC and AP, even at very low-dimensional embedding spaces, then it is likely better at capturing the structure of the connectome and is more likely to be able to scale to larger datasets, like that of the human brain one day.</p> <p>Running this experiment yields the following curves, where the x-axis shows the dimensionality of the latent space, and the y-axis shows the AUCs and APs of the respective models.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap-1400.webp"/> <img src="/staging/assets/img/2023-11-09-deep-connectome-clustering/link-prediction-auc-ap.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>From this experiment, we find that both the Graph Autoencoder (GAE) and Variational Graph Autoencoder (VGAE) perform better than Spectral Embedding methods in terms of AUC and AP, indicating that the models might be better suited to capturing the nuances in the fruit fly connectome. At the dimensionality used for spectral embedding in Winding et al., d=24, we find that the models have comparable performance, but as we reduce the dimensionality of the learned embedding, the spectral embedding method quickly breaks down and loses its ability to capture significant features in the data, with an AUC of 0.52 at a dimensionality of 2. Since a score of 0.5 corresponds to a random model, this means that the spectral embedding method is no longer able to capture any meaningful structure in the data at that dimensionality. Winding et al. gets around this by only using spectral embedding to get a latent space of size 24, and then performing a hierarchical clustering algorithm inspired by Gaussian Mixture Models, but the simplicity and robustness of the GAE model seems to show that they may be better suited to modeling the types of functional neurons present in the connectomes of animals.</p> <h3 id="experiment-2-gvae-latent-exploration">Experiment 2: GVAE Latent Exploration</h3> <p>Although the link-prediction experiment gives us a quantitative comparison of the models, we also believe it is important to explore the latent embeddings learned by GAE to see how they qualitatively compare with the learned embeddings used in the Winding et al. work. After observing that the GAE was robust to a latent space of size 2, we decided to look specifically at if there were any similarities between the clusters found by the GAE with the 2-d embedding and the level 7 clusters published by Winding et. al. Also, although the GAE showed better overall performance, we decided to specifically explore the Variational GAE because we expect it to have a latent manifold similar to that of the Variational Autoencoders.</p> <p>To this end, we first trained a Variational GAE with a 2-d latent space on the full fruit fly connectome and extracted the latent embedding of each node in the connectome.</p> <p>With this latent embedding, we first visualized the latent space using colors corresponding to the 93 clusters identified by Winding et al. Clusters of the same color in the learned GAE latent space mean that the VGAE identified the same cluster that was identified in the Winding et. al. paper and areas where there are many colors within a cluster mean that GAE found a different cluster compared to spectral embedding.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster-1400.webp"/> <img src="/staging/assets/img/2023-11-09-deep-connectome-clustering/explore_cluster.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Coloring the GVAE latent space by the found level 7 clusters Winding et al. Black points correspond to neurons that were not assigned a cluster by Winding et al. </div> <p>As seen in the figure above, we find that while VGAE projects directly to a 2-d latent space without any additional clustering to reduce the dimensionality, the learned embedding still shares many similarities with the spectral embedding down to a dimensionality of 24 followed by Gaussian Mixture Model hierarchical clustering. Therefore, using VGAE to learn a direct 2-d latent space still captures much of the same information that a more complex machine learning algorithm like spectral embedding is able to.</p> <p>We further explored the learned latent space by looking at whether the learned embedding had any correlation with the cell types identified in the fruit fly larvae connectome. Since the VGAE only had information about the structure of the graph embedding, clusters of similar colors in this figure mean that the cell type within the cluster shared a lot of common structures, like potentially the same degree or being connected to similar types of up or downstream neurons.</p> <p>We use the same color palette as the Winding et al. paper so that cell types in the level 7 clusters of the Winding et al. paper can be directly compared to the learned VGAE latent embedding.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type-1400.webp"/> <img src="/staging/assets/img/2023-11-09-deep-connectome-clustering/clustering-cell-type.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Coloring the Winding et al. level 7 clusters (left) and GVAE latent space (right) by cell types. This information was not provided to either algorithm during training, so clusters of the same cell type mean that its type can be inferred from structure only. </div> <p>As seen in the figure above, both spectral embedding and GVAE latent spaces capture knowledge about the cell types when trained purely on the graph structure. We believe this is because cells of this type have similar properties in terms of the types of neighboring neurons they connect to in the connectome, and they may also have special properties like higher degree of connections.</p> <p>In particular, it is interesting that sensory neurons and Kenyon cells are very well captured by both embeddings, and that MBIN cells and sensory neurons are clustered together by both their spectral embedding algorithm and VGAE.</p> <h2 id="discussion">Discussion</h2> <p>Our preliminary investigations show that deep learning algorithms such as Graph Autoencoders (GAEs) and Variational Graph Autoencoders (VGAEs) are able to capture at least as much nuance and information about function as spectral embedding algorithms. In addition, they come with the following advangates:</p> <ol> <li>With their current implementation, they can easily be run on a GPU, while common spectral embedding algorithms in libraries such as scikit learn are only designed to work on CPUs. Since we take a deep learning approach, our GNN method can use batches optimized via Adam, while spectral embedding only works if the entire adjacency matrix fits in memoruy. This makes deep learning methods <strong>better able to scale to larger datasets</strong> such as the mouse connectome that may come in the next few years.</li> <li>As shown in experiment 2, GAEs and Variational GAEs are <strong>able to directly learn a robust embedding into a 2-d space</strong> without any additional clustering, making interpretation easy and fast. We suspect that because of its higher performance at embedding connectomes to such low dimensions compared to spectral embedding which performs only marginally better than a random algorithm at such low dimensions, VGAEs must be capturing some addiitonal nuance of the graph structures that spectral embedding is simply not able to encode.</li> <li>Comparing the 2-d embeddings of VGAE to the clustered 24-d spectral embeddings found in Winding et al. we find that even when compressing to such a low-dimensional space, the semantic information captured does in fact match that of spectral embedding at a higher dimensional space. Coloring by cell type shows that it also <strong>captures information about the function of neurons</strong>, with similar neuron types being clustered together even when they are located all over the brain, such as Kenyon cells. Cells of the same type likely serve simlar functions, so in this respect, VGAE is able to capture information about the function of cells using only knowledge of the graph structure.</li> </ol> <p>However, VGAE does not come without its <strong>limitations</strong>. One large limitation we found while implementing the architecture is that it currently requires graphs to be <strong>undirected</strong>, so we had to remove information about the direction of neurons for this work. Connectomes are inherently directed, so we likely missed some key information about the function of graphs by removing this directional nature of the connectome. Although this is not explored in our work, one simple way to fix this would be to add features to each node corresponding to the in-degree and out-degree of each neuron.</p> <p>This brings us to the another limitation of our study, which is that we did not explore <strong>adding features to neurons</strong> in our connectome with the VGAE algorithm. Past work on GAEs has shown that adding features leads to better model results <d-cite key="kipf2016variational"></d-cite> and makes the model better able to capture relevant structures in the data. We did not feel that would be a fair comparison with Winding et al. because spectral embedding methods are not able to include additional features related to nodes that one would get for free when mapping the connectome, but we believe that including these features in the GAE structure would lead to an even better representation of the underlying dataset. Examples of these “free” features we could get that would help us predict functions of neurons include 1) the hemisphere the cell belongs to (e.g., not in fruit flies, but neurons in the left brain of humans correspond to language), 2) the axon I/O ratio, and the dendrite output-input ratio of a neuron.</p> <p>One final limiation is that our <strong>model only trains on a single connectome</strong>. This means that we aren’t able to capture the variation of connectomes within a species. Maybe one day, we will be able to scan connectomes of people in the same way that we are able to scan genomes of people, but that day is likely still far away. We might be able to help this by using the generative compoment of the VGAE to create brains that are physically feasible given the structure of a single connectome, but it would be hard to test. Since we are currently only looking at the connectome of a single species, we likely aren’t capturing an embedding space that finds functionally similar neurons in different animals such as C. elegans, which we may be able to do in future work.</p> <h2 id="conclusion">Conclusion</h2> <p>In this work, we asked if Deep Learning techniques like Variational Graph Autoencoders could learn something about the functions of cells in a connectome using only the graph structure. We found that VGAE did in fact capture relevant structures of the graph, even in the undirected case. It performed similarly to spectral embeding, even when embedding directly into a visualizable 2-d latent space. In the future, we may be able to learn about neurons that serve the same purpose across species, or learn about the underlying low level syntactic structures like for-loops or data types that our brain uses to encode consciousness, vision, and more.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-deep-connectome-clustering.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/denoisingVAE/index.html b/blog/2023/denoisingVAE/index.html
new file mode 100644
index 00000000..f88f3eeb
--- /dev/null
+++ b/blog/2023/denoisingVAE/index.html
@@ -0,0 +1,79 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Comparing data augmentation using VAEs and denoising-VAEs for limited noisy datasets | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/denoisingVAE/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Comparing data augmentation using VAEs and denoising-VAEs for limited noisy datasets",
+      "description": "",
+      "published": "November 11, 2023",
+      "authors": [
+        {
+          "author": "Pranay Agrawal",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Comparing data augmentation using VAEs and denoising-VAEs for limited noisy datasets</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#background">Background</a></div> <div><a href="#motivation">Motivation</a></div> <div><a href="#research-problem-statement">Research Problem Statement</a></div> <div><a href="#methods">Methods</a></div> <ul> <li><a href="#dataset-selection-and-preparation">Dataset Selection and Preparation</a></li> <li><a href="#vae-and-dvae-architecture-and-hyperparameters">VAE and DVAE - Architecture and Hyperparameters</a></li> <li><a href="#data-augmentation">Data Augmentation</a></li> <li><a href="#classification-network-cnn-architecture">Classification Network(CNN) Architecture</a></li> </ul><div><a href="#results">Results</a></div> <ul> <li><a href="#vae-dvae-performance">VAE-DVAE performance</a></li> <li><a href="#latent-space-visualization">Latent Space Visualization</a></li> <li><a href="#classification-performance">Classification Performance</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="background">Background</h2> <p>One of the significant challenges in this evolving landscape of machine learning is the prevalance of limited and noisy datasets. Traditional models and downstream tasks such as classification often struggle with such datasets, leading to suboptimal performance and a lack of generalizability.</p> <p>Could this be tackled using auto-encoders, specifically, Denoising Autoencoders (DAE) and Variational Autoencoders (VAE)? Denoising autoencoders (DAE) are trained to reconstruct their clean inputs with noise injected at the input level, while variational autoencoders (VAE) are trained with noise injected in their stochastic hidden layer, with a regularizer (KL divergence) that encourages this noise injection. But what if we could combine these strengths?</p> <h2 id="motivation">Motivation</h2> <p>Denoising autoencoders (DAE)<d-cite key="vincent2008extracting"></d-cite>, are trained to reconstruct their clean inputs with noise injected at the input level, while variational autoencoders (VAE)<d-cite key="kingma2014autoencoding"></d-cite> are trained with noise injected in their stochastic hidden layer, with a regularizer (KL divergence) that encourages this noise injection. Denoising Variational Autoencoders (DVAEs) are an extension of the traditional variational autoencoder (VAE). The motivation for delving into the realm of DVAEs stems from a critical need - the ability to effectively interpret and utilize limited, noisy data. They merge the robustness of DAEs in handling noisy inputs with the generative prowess of VAEs. As highlighted in the research paper “Denoising Criterion for Variational Auto-Encoding Framework”<d-cite key="denoisingVAE"></d-cite>, integrating a denoising criterion into the VAE framework refines the robustness of learned representations, thereby enhancing the model’s generalization ability over various tasks.</p> <p>VAEs, known for their generative capabilities, introduce noise at the hidden layer level, potentially offering a means to augment limited datasets<d-cite key="saldanha2022data"></d-cite>. On the other hand, DVAEs, an innovative extension of VAEs, introduce perturbation to input data, promising a more robust feature extraction and create additional, realistic augmentations of the data. Our aim here is to comprehensively analyze and contrast the efficacy of VAEs and DVAEs in augmenting such datasets. We hypothesize that while VAEs can offer some level of data enhancement, DVAEs, with their inherent denoising capability, might prove superior in extracting more reliable and robust features from noisy datasets.</p> <h2 id="research-problem-statement">Research Problem Statement</h2> <p>The first aspect of this research is to explore the dual functionality of DVAEs — their ability to denoise input data while concurrently learning a generative model of the data distribution. The next aspect is to to compare the performance of DVAEs against traditional VAEs in i) learning robust latent representations, and ii) in downstream classification tasks with richer varied datasets by utilising data augmentation aspect of these generative models.</p> <ol> <li> <p><strong>Learning Robust representation and Generating Synthetic data using DVAEs:</strong> Can DVAEs dual capability of denoising input data and learning a generative model of the data distribution simultaneously be exploited to effectively learn robust representations from limited and noisy datasets and utilized to generate additional synthetic data (augmented dataset)? How does it compare to using traditional VAEs?</p> </li> <li> <p><strong>Performance Enhancement for downstream tasks:</strong> How does the DVAE-generated synthetic data impact the performance metrics of downstream classification tasks? Compare performance metrics with traditonal VAE for different noise levels in test datasets.</p> </li> </ol> <h2 id="methods">Methods</h2> <h3 id="dataset-selection-and-preparation">Dataset Selection and Preparation</h3> <p>The Fashion-MNIST dataset, which includes 60,000 training images, is selected for the experiments mentioned above. To simulate a limited data environment, a subset of 5,000 images is randomly selected from the dataset. We also create a noisy version of the training dataset to understand the efficacy in scenarios when clean input data is not available.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/fashionMNISTSamples.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 1. Sample Fashion-MNIST images</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/noisyFashionMNISTSamples.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 2. Artificially Corrupted(Noised) Fashion-MNIST images</p> <h3 id="vae-and-dvae---architecture-and-hyperparameters">VAE and DVAE - Architecture and Hyperparameters</h3> <p>The VAE and DVAE architecture is similar and differ only in the sense that DVAE adds noise to input images before passing it to encoder.</p> <p>The encoder comprises two hidden layers, each with 128 neurons. The input size is flattened to 28 * 28 dimensions. Each hidden layer in the encoder is followed by a ReLU activation function. The encoder’s output is connected to two separate layers: one for generating the mean (µ) and the other for the logarithm of the variance (log-variance), both projecting to a 4-dimensional latent space (z_dims).</p> <p>On the decoding side, the architecture starts with the latent space and expands through a similar structure of two hidden layers, each with 128 neurons and ReLU activation functions. The final output layer reconstructs the original input size of 28 * 28 dimensions and applies a Sigmoid activation function.</p> <p>This VAE/DVAE employs a reconstruction loss using the binary cross-entropy between the input and its reconstruction, and a regularization term(KL-Divergence) derived from the latent space to enforce a probabilistic distribution. Each model is trained for 60 epochs with batch size 128.</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code>    <span class="n">input_size</span> <span class="o">=</span> <span class="mi">28</span> <span class="o">*</span> <span class="mi">28</span>
+    <span class="n">z_dims</span> <span class="o">=</span> <span class="mi">4</span>
+    <span class="n">num_hidden</span> <span class="o">=</span> <span class="mi">128</span> 
+    <span class="n">self</span><span class="p">.</span><span class="n">encoder</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Sequential</span><span class="p">(</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">input_size</span><span class="p">,</span> <span class="n">num_hidden</span><span class="p">),</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">num_hidden</span><span class="p">,</span> <span class="n">num_hidden</span><span class="p">),</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">()</span>
+    <span class="p">)</span>
+
+    <span class="n">self</span><span class="p">.</span><span class="n">mu</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">num_hidden</span><span class="p">,</span> <span class="n">z_dims</span><span class="p">)</span>
+    <span class="n">self</span><span class="p">.</span><span class="n">logvar</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">num_hidden</span><span class="p">,</span> <span class="n">z_dims</span><span class="p">)</span>
+
+    <span class="n">self</span><span class="p">.</span><span class="n">decoder</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Sequential</span><span class="p">(</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">z_dims</span><span class="p">,</span> <span class="n">num_hidden</span><span class="p">),</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">num_hidden</span><span class="p">,</span> <span class="n">num_hidden</span><span class="p">),</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">num_hidden</span><span class="p">,</span> <span class="n">input_size</span><span class="p">),</span>
+        <span class="n">nn</span><span class="p">.</span><span class="nc">Sigmoid</span><span class="p">(),</span>
+    <span class="p">)</span>
+</code></pre></div></div> <h3 id="data-augmentation">Data Augmentation</h3> <p>For augmenting the dataset, we generate 2 newer samples or each input image. First, the image is passed through the encoder part of VAE/DVAE and then sample a latent representation vector around the obtained latent representaion - mean and std.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/VAE_data_augmentation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 3. Example: VAE Data Augmentation</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/DVAE_data_augmentation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 4. Example: DVAE Data Augmentation</p> <h3 id="classification-networkcnn-architecture">Classification Network(CNN) Architecture</h3> <p>The Classification Network(CNN) architecture is comprised of a series of convolutional, activation, pooling, and fully connected layers. Initially, it features a convolutional layer with 1 input channel and 32 output channels, using 3x3 kernels, stride of 1, and padding of 1 with ‘reflect’ mode, followed by an ReLU activation function. This is succeeded by another convolutional layer that increases the depth to 64 filters, maintaining the same kernel size, stride, and padding, accompanied by the same activation function. Subsequently, a max pooling layer with a 2x2 kernel reduces the spatial dimensions of the feature maps, highlighting significant features. The data is then flattened, resulting in a feature vector with a length of 64 * 14 * 14, which feeds into a series of three linear layers, each with 128 units, interspersed with the activation function. This sequence of fully connected layers is designed to capture complex relationships in the data. Finally, the architecture has an output linear layer that maps to the number of outputs (num_outputs=10).</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="n">image_dim</span> <span class="o">=</span> <span class="mi">28</span>
+  <span class="n">num_outputs</span> <span class="o">=</span> <span class="mi">10</span>
+  <span class="n">act_cls</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="n">ReLU</span>
+  <span class="n">net</span> <span class="o">=</span> <span class="p">[</span>
+      <span class="n">nn</span><span class="p">.</span><span class="nc">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">out_channels</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">stride</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">padding_mode</span><span class="o">=</span><span class="sh">'</span><span class="s">reflect</span><span class="sh">'</span><span class="p">),</span>
+      <span class="nf">act_cls</span><span class="p">(),</span>
+    <span class="p">]</span>
+
+  <span class="n">net</span><span class="p">.</span><span class="nf">extend</span><span class="p">([</span>
+       <span class="n">nn</span><span class="p">.</span><span class="nc">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> <span class="n">out_channels</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">stride</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">padding_mode</span><span class="o">=</span><span class="sh">'</span><span class="s">reflect</span><span class="sh">'</span><span class="p">),</span>
+       <span class="nf">act_cls</span><span class="p">(),</span>
+       <span class="n">nn</span><span class="p">.</span><span class="nc">MaxPool2d</span><span class="p">(</span><span class="n">kernel_size</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
+    <span class="p">])</span>
+  <span class="n">input_len</span> <span class="o">=</span> <span class="nf">int</span><span class="p">(</span><span class="mi">64</span> <span class="o">*</span> <span class="n">image_dim</span><span class="o">/</span><span class="mi">2</span> <span class="o">*</span> <span class="n">image_dim</span><span class="o">/</span><span class="mi">2</span><span class="p">)</span>
+
+  <span class="n">net</span><span class="p">.</span><span class="nf">extend</span><span class="p">([</span>
+      <span class="n">nn</span><span class="p">.</span><span class="nc">Flatten</span><span class="p">(),</span>
+      <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">input_len</span><span class="p">,</span> <span class="mi">128</span><span class="p">),</span>
+      <span class="nf">act_cls</span><span class="p">(),</span>
+      <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">128</span><span class="p">),</span>
+      <span class="nf">act_cls</span><span class="p">(),</span>
+      <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">128</span><span class="p">),</span>
+      <span class="nf">act_cls</span><span class="p">(),</span>
+    <span class="p">])</span>
+
+  <span class="n">net</span><span class="p">.</span><span class="nf">extend</span><span class="p">([</span><span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="n">num_outputs</span><span class="p">)])</span>
+</code></pre></div></div> <h2 id="results">Results</h2> <p>Here, we utilize the standard Fashion-MNIST dataset for our analysis. Initially, we train the VAE/DVAE network on a subset of 5,000 samples from the training dataset over 60 epochs. Following this, we employ the VAE/DVAE networks to generate synthetic data, leveraging the learned latent space representation for data augmentation purposes. The performance of the augmented datasets is then evaluated using the previously described CNN architecture for classification tasks.</p> <h3 id="vae-dvae-performance">VAE-DVAE Performance</h3> <p>DVAE’s training loss closely tracks the VAE’s loss throughout training. This is interesting because the DVAE is dealing with additional artificial noise, yet it performs on par with the standard VAE. The fact that the DVAE does not exhibit a significantly higher loss than the VAE might suggest that it is effectively denoising the data and learning a robust representation, despite the additional noise.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/TrainingLossOriginal.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 5. Training Loss: VAE v/s DVAE</p> <h3 id="latent-space-visualization">Latent Space Visualization</h3> <p>Here, we are visualizing the latent space of VAE and DVAE, a high-dimensional space where each dimension represents certain features learned by the model from the data. For this, we plot a 10x10 grid of images where each image in the grid is generated by the model by varying the values in two chosen latent dimensions (i and j), while keeping the other dimensions set to zero. This helps in understanding the effect of each latent dimension on the generated output.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/VAE_LatentSpace.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 6. VAE Latent Space Visualization</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/DVAE_LatentSpace.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 7. DVAE Latent Space Visualization</p> <p>The lack of visible differences in the latent space structures of both VAE and DVAE indicates that the two models are learning similar representations. To delve into these nuances, we assess the effectiveness of augmented data (created using these learned latent spaces) in a subsequent classification task.</p> <h3 id="classification-performance">Classification Performance</h3> <p>To delve into the efficacy of VAE and DVAE in augmenting datasets for downstream image classification tasks, we trained a CNN on a limited subset of the Fashion-MNIST dataset to establish a baseline. Subsequently, we generated synthetic data using both VAE and DVAE, aiming to enrich the training dataset and observe the resultant impact on the CNN’s performance. This is crucial considering the initial constraint of limited training data to start with. We used Fashion-MNIST test dataset, which includes 10,000 test images, for evaluating the performance of learned CNN network.</p> <p>We also tested robustness of these augmented datasets against varying levels of noise (artifically added to test dataset), simulating real-world conditions where test data often includes such imperfections, arising because of the limitations in measurement tools.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/LimitedDatasetLC.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 8. CNN Learning Curve for Limited Dataset</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/VAEAugmentedLC.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 9. CNN Learning Curve for VAE Augmented Dataset</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC-1400.webp"/> <img src="/staging/assets/img/2023-11-11-denoisingVAE/DVAEAugmentedLC.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 10. CNN Learning Curve for DVAE Augmented Dataset</p> <table> <thead> <tr> <th>Dataset Type \ Noise Level</th> <th>No Noise</th> <th>2.5% Noise</th> <th>5% Noise</th> <th>7.5% Noise</th> <th>10% Noise</th> </tr> </thead> <tbody> <tr> <td>Limited Dataset</td> <td>83.56%</td> <td>83.39%</td> <td>83.11%</td> <td>82.33%</td> <td>81.75%</td> </tr> <tr> <td>VAE Augmented Dataset</td> <td>84.18%</td> <td>84.03%</td> <td>83.57%</td> <td>82.68%</td> <td>81.43%</td> </tr> <tr> <td>DVAE Augmented Dataset</td> <td>85.32%</td> <td>84.98%</td> <td>84.67%</td> <td>83.98%</td> <td>82.59%</td> </tr> </tbody> </table> <h4 id="artificially-corrupted-fashion-mnist">Artificially corrupted Fashion-MNIST</h4> <p>Here, we deliberately introduced artifical noise to the standard Fashion-MNIST dataset to effectively simulate the real-world scenario where training data is not cleaned and is often noisy and imperfect. Such conditions often pose significant challenges in learning effective representations, making our approach highly relevant for understanding the adaptability and efficiency of VAE and DVAE models in handling noisy data. This way we expose the model and train it on a variety of noise patterns while forcing it to reconstruct the original noised image. The model will learn to effectively separate noise from the signal and will be less likely to overfit to the ‘clean’ aspects of the training data and can thus perform better on unseen, noisy data. This improves the generalization capabilities of the model making it more suitable for practical applications.</p> <p>Here, we generated synthetic data using both VAE and DVAE which are trained on artifically corrupted Fashion-MNIST dataset. We then compare the performance of CNN network for three datasets - Limited Noisy Dataset with no augmentation, VAE Augmented dataset and DVAE Augmented Dataset, where representations are learned using the noisy training set. Consistent with our earlier methodology, we further evaluated the robustness of CNNs trained with these datasets by testing them against varying levels of noise in the test dataset.</p> <table> <thead> <tr> <th>Dataset Type \ Noise Level</th> <th>No Noise</th> <th>2.5% Noise</th> <th>5% Noise</th> <th>7.5% Noise</th> <th>10% Noise</th> </tr> </thead> <tbody> <tr> <td>Limited Noisy Dataset</td> <td>83.77%</td> <td>83.79%</td> <td>83.61%</td> <td>83.36%</td> <td>82.98%</td> </tr> <tr> <td>VAE Augmented Dataset</td> <td>85.24%</td> <td>84.99%</td> <td>84.62%</td> <td>84.04%</td> <td>83.20%</td> </tr> <tr> <td>DVAE Augmented Dataset</td> <td>85.48%</td> <td>85.38%</td> <td>85.10%</td> <td>84.89%</td> <td>84.58%</td> </tr> </tbody> </table> <h2 id="conclusions">Conclusions</h2> <p>Here are the key findings from our research:</p> <ol> <li> <p><strong>Enhanced Learning from Augmented Data:</strong> We observed that the CNN trained with data augmented by both VAE and DVAE demonstrated improved accuracy and generalization capabilities, especially when compared to the CNN trained on a limited dataset. This underscores the effectiveness of generative models in enriching training datasets, leading to more robust learning.</p> </li> <li> <p><strong>Superiority of DVAE in Handling Noise:</strong> The CNN trained with DVAE augmented data consistently outperformed the one trained with traditional VAE augmented data in tests involving noisy conditions. This aligns perfectly with our research hypothesis about the dual functionality of DVAEs — not only do they learn a generative model of the data distribution but also excel in denoising input data.</p> </li> <li> <p><strong>Robustness to Varied Noise Levels:</strong> A crucial aspect of our research was evaluating the performance of augmented datasets under various noise levels. The augmented datasets, especially those generated by DVAEs, maintained consistent performance across different noise conditions. This suggests that the models have not only learned the essential features of the data but are also adept at filtering out noise.</p> </li> </ol> <p>In downstream classification tasks, DVAE-generated synthetic data improved performance metrics, surpassing those achieved with traditional VAE-generated data, particularly in tests with varied noise levels. This validates our hypothesis and highlights the potential of DVAEs in real-world applications where data is limited and data quality is a critical factor.</p> <p>The next steps for this research could be to focus on expanding the types of noise tested in our experiments to evaluate the adaptability and robustness of DVAEs in a broader range of real-world scenarios. We could conduct more comprehensive data augmentation experiments to delve deeper into the capabilities of DVAEs in enhancing neural network learning and generalization.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-11-denoisingVAE.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/detect-image/index.html b/blog/2023/detect-image/index.html
new file mode 100644
index 00000000..bd218afd
--- /dev/null
+++ b/blog/2023/detect-image/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Zero-Shot Machine-Generated Image Detection using Sinks of Gradient Flows | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="How can we detect fake images online? A novel approach of characterizing the behavior of a diffusion model's learned score vectors."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/detect-image/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Zero-Shot Machine-Generated Image Detection using Sinks of Gradient Flows",
+      "description": "How can we detect fake images online? A novel approach of characterizing the behavior of a diffusion model's learned score vectors.",
+      "published": "November 8, 2023",
+      "authors": [
+        {
+          "author": "Marvin Li",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Harvard",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Jason Wang",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Harvard",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Zero-Shot Machine-Generated Image Detection using Sinks of Gradient Flows</h1> <p>How can we detect fake images online? A novel approach of characterizing the behavior of a diffusion model's learned score vectors.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#abstract">Abstract</a></div> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#methods">Methods</a></div> <div><a href="#experiments">Experiments</a></div> <div><a href="#discussion">Discussion</a></div> </nav> </d-contents> <h3 id="abstract">Abstract</h3> <p>Detecting AI-generated content has become increasingly critical as deepfakes become more prevalent. We discover and implement algorithms to distinguish machine-generated and real images without the need for labeled training data. We study the problem of identifying photorealistic images using diffusion models. In comparison to the existing literature, we discover detection techniques that do not require training, based on the intuition that machine-generated images should have higher likelihoods than their neighbors. We consider two metrics: the divergence of the score function around a queried image and the reconstruction error from the reverse diffusion process from little added noise. We also compare these methods to ResNets trained to identify fake images from existing literature. Although the previous methods outperform out methods in terms of our accuracy metrics, the gap between our zero-shot methods and these ResNet methods noticeably declines when different image transformations are applied. We hope that our research will spark further innovation into robust and efficient image detection algorithms.</p> <h3 id="introduction">Introduction</h3> <p>As AI-generated images become ever more widespread, garnering virality for how realistic they have become, we are increasingly concerned with the potential for misuse. A deluge of machine-generated fake images could spread misinformation and harmful content on social media. From relatively innocuous pictures of <a href="https://www.nytimes.com/2023/04/08/technology/ai-photos-pope-francis.html">Pope Francis</a> wearing an AI-generated image puffer coat to dangerous <a href="https://www.politico.eu/article/ai-photography-machine-learning-technology-disinformation-midjourney-dall-e3-stable-diffusion/">disinformation campaigns</a> powered by diffusion models, we live in a new era of media that we cannot trust. The European Union has passed <a href="https://www.nytimes.com/2023/12/08/technology/eu-ai-act-regulation.html">legislation</a> that, among other regulations, requires AI-generated content to be explicitly marked so. The enforcement of such legislation and similar-minded policies, however, remains unclear. Consequently, a growing body of research has sought to develop techniques to distinguish between the real and the synthetic.</p> <p>The rise of models capable of generating photorealistic content makes the detection problem difficult. While there are still numerous nontrivial challenges with current models from their inability to depict text and render tiny details humans are innately sensitive to such as eyes and hands, the pace of the technology is moving in a way that makes relying on these flaws short-sighted and dangerous. Another potential complication is that advanced photo editing techniques such as <a href="https://www.adobe.com/products/firefly.html">Adobe Firefly</a> have capabilities such as generative inpainting that make it such that an image could contain both real and invented content. Even simple data augmentations like crops, rotations, color jitters, and horizontal flipping can make the input look vastly different to a detection model. Furthermore, the majority of popular image generation tools are text-conditional, and we cannot expect to recover the text prompt, not to mention the model that generated the image. This makes transferable, zero-shot techniques of paramount importance.</p> <p>In this paper, we propose two techniques for detecting images from diffusion models (see Figure <a href="#fig-methods-illustrated">1</a>). Diffusion models <d-cite key="sohl2015deep"></d-cite> have been one of the most successful architectures for image generation, inspired by thermodynamic principles. Diffusion models learn a score function (gradient of log likelihood) that ‘undoes’ noise from the image. In effect, these models learn a gradient field that points to the real-world data manifold.<d-cite key="batzolis2022your"></d-cite> We leverage the intuition that the greater the deviation the diffusion model’s machine-generated images are from the real world data, the greater the difference of the neighborhood gradient field. In particular, we believe that machine-generated images are more likely to live in a ‘sink’ of the gradient field as the diffusion model ‘flows’ images down the gradient field. We thus propose the <em>divergence of a diffusion model’s score function</em> as a promising zero-shot statistic for whether an image is generated by the diffusion model.</p> <p>In addition, another metric for the ‘sink’ property of the gradient field at the image of concern is how far the image moves after a small displacement and flow along the gradient field. This has a nice interpretation in diffusion models as the <em>reconstruction error</em> for running the reverse process over just a small timestep on just a slightly perturbed image.</p> <p><em><a name="fig-methods-illustrated">Figure 1:</a> The Divergence and Reconstruction Error Hypothesis: Images on the generated data manifold <span style="color: red">(red)</span> have negative divergence and small reconstruction error, while images on the real data manifold <span style="color: green">(green)</span> have zero divergence and large reconstruction error.</em></p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-detect-image/methods-illustrated-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-detect-image/methods-illustrated-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-detect-image/methods-illustrated-1400.webp"/> <img src="/staging/assets/img/2023-11-08-detect-image/methods-illustrated.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Our overarching research question is thus summarized as, can we use the properties of a diffusion model’s tacit vector field to build an effective zero-shot machine-generated image detector, specifically looking at <em>divergence</em> and <em>reconstruction error</em>?</p> <p>The main contributions of our paper are:</p> <ol> <li> <p>Proposing two methods inspired by sinks of gradient flows: <em>divergence</em> and <em>reconstruction error</em>.</p> </li> <li> <p>Conducting a wide battery of experiments on the performance of these methods in a variety of augmentation settings.</p> </li> </ol> <h3 id="related-work">Related Work</h3> <p>Previous literature has considered several different methods for image detection. Sha et al. 2022 <d-cite key="sha2022fake"></d-cite> trained machine learning classifiers to detect fake images using high-level image and text embeddings. They, however, do not consider the local information around image embeddings, and require existing datasets of known image-generated and non-image-generated examples to train their classifier. Corvi et al. 2023 <d-cite key="corvi2023detection"></d-cite> identified “forensic traces” in machine-generated image residuals for this task. Again, their method requires many data samples, and requires separate training on diffusion models and GANs.</p> <p>We are inspired by ideas from DetectGPT,<d-cite key="mitchell2023detectgpt"></d-cite> a recent work which addressed the same problem of detecting AI-generated content, but in the setting of large language models. For a given piece of text, DetectGPT perturbs the original text and computes the difference in log-likelihood between the perturbed text and the original text:</p> \[\mathrm{DetectGPT}(x,p_{\theta},q)\triangleq\log p_{\theta}(x)-\mathbb{E}_{\tilde{x}\sim q(\cdot|x)}\log p_{\theta}(\tilde{x})\] <p>where $p_\theta$ is the language model and $q$ is the distribution of perturbations. If the difference in log-likelihood is large, then the attack claims that the original text is more likely to be generated by a language model.</p> <p>There are several critical differences between language models and diffusion models. With text, one can directly compute the log likelihood of a given piece of text, even with only blackbox access, i.e., no visibility to the model’s parameters. In contrast, for diffusion models, it is intractable to directly compute the probability distribution over images because diffusion models only learn the score. Moreover, the most commonly used diffusion models, e.g. DALL-E 3, apply the diffusion process to a latent embedding space rather than the pixel space. To address the latter concern, we plan on applying the encoder to the image to obtain an approximation of the embedding that was passed into the decoder. And to address the former, instead of approximating the probability curvature around a given point like DetectGPT, we formulate a statistic characterizing whether the gradient field/score is a sink, i.e., the gradients around a machine-generated image point to the machine-generated image. This captures the idea of a local maximum in probability space, similar to the DetectGPT framework.</p> <p>It would be remiss to not mention Zhang et al. 2023,<d-cite key="zhang2023watermarks"></d-cite> who argued that watermarking, a strictly easier task than machine-generated image detection, is likely impossible. They claim that an adversary who can perturb a generated image of text without too much degradation and has blackbox access to the watermarking scheme can conduct a random-walk on reasonable outputs until the watermark is degraded. However, their analysis was mainly theoretical and lacked specific experiments with diffusion models. It remains to be seen whether their assumptions still hold for image generation, and whether more concrete watermarking schemes may afford some level of protection against less sophisticated adversaries or the unintentional use of machine-generated images.</p> <h3 id="methods">Methods</h3> <p><strong>Dataset.</strong> To conduct our research, we needed datasets of known real and fake images. We used MSCOCO <d-cite key="lin2014microsoft"></d-cite>, a dataset of 330K non-machine generated images and captions of common real-world objects which was also used by Corvi et al. 2023.<d-cite key="corvi2023detection"></d-cite> Initially, we planned to use DiffusionDB <d-cite key="wang2022diffusiondb"></d-cite> for our fake images, a dataset of 14M (prompt, image) pairs generated by the open-source Stable Diffusion Version 1 model scraped from the StableDiffusion discord. However, we realized that many of the images in DiffusionDB are not meant to be realistic. Instead, we iterated through the captions of MSCOCO and used Stable Diffusion V1.4 to generate a matching machine-generated image for that caption, as in Corvi et al. 2023.<d-cite key="corvi2023detection"></d-cite></p> <p><strong>Baseline.</strong> We used the model and code from Corvi et al. 2023 <d-cite key="corvi2023detection"></d-cite> to identify images generated by Stable Diffusion as our trained baseline. Their model is a ResNet18 image-only detector trained on the training split of the MSCOCO dataset and images also generated by prompts from StableDiffusion.</p> <p><strong>Detection Algorithms.</strong> For out attacks, we compute the divergence of the diffusion model’s score field around the image (negative divergence indicates a sink). We can estimate this via a finite-differencing approach: given a diffusion model $s_\theta(x)$ which predicts the score $\nabla_x\log p_\theta(x)$, we have that</p> \[\mathrm{div}(s_\theta,x)= \sum_{i=1}^d \frac{s_\theta(x+he_i)_i-s_\theta(x-he_i)_i}{2h}\] <p>for small $h$ and orthogonal basis ${e_i}_{i=1}^d$. However, images are high-dimensional, and even their latent space has $\approx10,000$ dimensions, which means that fully computing this sum could be computationally expensive. In this paper, we sample a fraction of the dimensions for each queried image.</p> <p>Another way to capture the intuition that machine-generated images are have higher likelihoods than their neighbors is by noising the latent to some timestep $t$, and then comparing the distance of the denoised image to the diffusion model to the original image. That is, given a diffusion model $f_\theta$ which takes a noised image and outputs an unnoised image (abstracting away noise schedulers, etc. for clarity),</p> \[\mathrm{ReconstructionError}(f_{\theta},x)\triangleq \mathbb{E}_{\tilde{x}\sim \mathcal{N}(x,\epsilon)}||x-f_{\theta}(\tilde{x})||_2^2\] <p>for small $\epsilon$. The intuition is that if an image and thus more likely, then the denoising process is more likely to send noisy images to that particular image.</p> <p><strong>Comparison.</strong> For each model, we use the AUC-ROC curve and the true positive rate (TPR) at low false positive rate (FPR) as metrics. The latter notion of accuracy is borrowed from the membership inference attack setting in Carlini et al. 2021.<d-cite key="carlinifpr"></d-cite> As they argue, this metric quantifies our confidence that a point identified as fake is actually fake. In important settings like filtering fake images on social media platforms, this is especially important as there may be asymmetric consequences for accidentally flagging an image as fake compared to missing a fake image. We also provide a data visualization tool for the images our method identifies. In the real world, we can expect that the images we want to test will be distorted, either by random cropping, reflections, rotations, or compression. We will apply image augmentations over both fake and real image datasets and report the same metrics over these augmentations.</p> <h3 id="experiments">Experiments</h3> <p>We run all experiments over a common set of 500 images from the test set of <a href="https://huggingface.co/datasets/nlphuji/mscoco_2014_5k_test_image_text_retrieval">MSCOCO</a> and the corresponding 500 images generated by Stable Diffusion V1.4 with the same prompt using HuggingFace’s default arguments.</p> <p>For our Divergence method, we randomly sample $d=10$ dimensions to compute the divergence over and set $h=0.1$. For our Reconstruction method, we compute an average distance over 10 reconstructed images per original image and use add/remove noise equivalent to 1 time-step.</p> <p>For each method, we evaluate the performance on no augmentation, random $256\times 256$ crop (corresponding to about a quarter of the image for generated images), grayscale, random horizontal flip with probably $0.5$, random rotation between $[-30^\circ,30^\circ]$, and random color jitter of: brightness from $[0.75,1.25]$, contrast from $[0.75,1.25]$, saturation from $[0.75,1.25]$, and hue from $[-0.1,0.1]$.</p> <p><em><a name="table-results">Table 1:</a> Divergence, Reconstruction, and ResNet Detection AUC and True Positive Rate at 0.1 False Positive Rate.</em></p> <table> <tr> <th>AUC / TPR$_{0.1}$</th> <th colspan="3" style="text-align: center">Method</th> </tr> <tr> <th>Augmentation</th> <th>Divergence</th> <th>Reconstruction</th> <th>ResNet</th> </tr> <tr> <th>No Aug.</th> <td>0.4535 / 0.078</td> <td>0.7310 / 0.000</td> <td>1.000 / 1.000</td> </tr> <tr> <th>Crop</th> <td>0.4862 / 0.092</td> <td>0.4879 / 0.064</td> <td>1.000 / 1.000</td> </tr> <tr> <th>Gray.</th> <td>0.4394 / 0.056</td> <td>0.7193 / 0.000</td> <td>1.000 / 1.000</td> </tr> <tr> <th>H. Flip</th> <td>0.4555 / 0.084</td> <td>0.7305 / 0.000</td> <td>1.000 / 1.000</td> </tr> <tr> <th>Rotate</th> <td>0.4698 / 0.062</td> <td>0.6937 / 0.000</td> <td>0.9952 / 0.984</td> </tr> <tr> <th>Color Jitter</th> <td>0.4647 / 0.082</td> <td>0.7219 / 0.000</td> <td>1.000 / 1.000</td> </tr> </table> <p><em><a name="fig-roc-auc">Figure 2:</a> AUC-ROC Curves in No Augmentation Setting.</em></p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <p>(a) Divergence</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc-1400.webp"/> <img src="/staging/assets/img/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1_roc.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <p>(b) Reconstruction</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc-1400.webp"/> <img src="/staging/assets/img/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1_roc.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <p>(c) ResNet</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc-1400.webp"/> <img src="/staging/assets/img/2023-11-08-detect-image/method=Resnet_n_points=1000_seed=229_bs=1_roc.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p><em><a name="fig-hists">Figure 3:</a> Histograms of Computed Statistics in No Augmentation Setting.</em></p> <div class="l-body"> <p>(a) Divergence</p> <iframe src="/staging/assets/html/2023-11-08-detect-image/method=Divergence_n_points=500_n_samples=10_noise_amount=0.1_num_inference_steps=25_seed=229_bs=1.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <div class="l-body"> <p>(b) Reconstruction</p> <iframe src="/staging/assets/html/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <div class="l-body"> <p>(c) ResNet</p> <iframe src="/staging/assets/html/2023-11-08-detect-image/method=Reconstruction_n_points=500_n_samples=10_noise_amount=1.0_num_inference_steps=25_seed=229_bs=1.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <p><strong>Trained Baseline.</strong> The trained baseline does extraordinarily well at the MSCOCO vs. Stable Diffusion detection task. It achieves $1.0$ AUC (perfect accuracy) across all augmentation settings except for rotation for which it gets an almost perfect AUC of $0.9952$. This high performance matches Corvi et al. 2023’s findings,<d-cite key="corvi2023detection"></d-cite> stemming from the fact that the ResNet was trained on the MSCOCO distribution and Latent Diffusion generated images are similar to Stable Diffusion generated images. In their paper, the performance noticeably drops to around $0.7$-$0.8$ AUC for other image generation models.</p> <p><strong>Divergence.</strong> Divergence does extremely poorly, with AUCs just slightly below 0.5, indicating that in fact generated images have greater divergence than real images—the opposite of our intuition, but this may also be noise as these values are essentially equivalent to random guessing. We suspect that this is largely due to our low choice of $d$, meaning that we cannot get a representative enough sample of the dimensions to get an accurate estimate of the true divergence. We may have also chosen $h$ too large, as we have no idea of the scale of any manifold structure that may be induced by the gradient field.</p> <p><strong>Reconstruction Error.</strong> Reconstruction error, on the other hand, boasts impressive AUCs of around $0.7$. The shape of the curve is particularly strange, and with the additional observation that the AUC when the random cropping is applied goes back to $0.5$ AUC, indicated to us that the image size may be the differentiating factor here. MSCOCO images are often non-square and smaller than the $512\times 512$ constant size of the generated images. As the Frobenius norm does not scale with image size, we hypothesize that using the spectral norm and dividing by the square root of the dimension would instead give us a more faithful comparison, akin to the random crop results. However, data visualization of the examples does not show a clear correlation between image size and reconstruction error, so it appears that this detection algorithm has decent AUC but poor TPR at low FPR, and is vulnerable to specifically cropping augmentations.</p> <p><a href="http://jsonw0.pythonanywhere.com/"><strong>Detection Visualizations.</strong></a> We developed a dashboard visualizaiton that enables us to look more closely at images and their associated detection statistics. Some examples we can pick out that seem to make sense include Figure 4, where the real image is captioned as a CGI fake image, and predictably gets a low statistic as deemed by Reconstruction Error (the generated image, ironically, gets a higher statistic denoting more real).</p> <p><em><a name="fig-methods-illustrated">Figure 4:</a> An Example Image of a CGI “Real” Image Getting Detected as Fake.</em></p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-detect-image/cgi-example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-detect-image/cgi-example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-detect-image/cgi-example-1400.webp"/> <img src="/staging/assets/img/2023-11-08-detect-image/cgi-example.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>However, from a visual inspection of images, we cannot identify a clear relationship between image content or quality of generated images that holds generally. We make our dashboard public and interactive; a demo can be seen below:</p> <div class="l-screen"> <iframe src="http://jsonw0.pythonanywhere.com/" frameborder="0" scrolling="yes" height="1200px" width="100%"></iframe> </div> <h3 id="discussion">Discussion</h3> <p>Throughout our experiments, the divergence-based detector performs much worse than the other detectors. Because the latent space has a very high dimension, the divergence detector may require sampling from many more dimensions than is practical for an image detector in order to obtain good estimates of the divergence. Further research should try to scale this method to see if it obtains better results. Mitchell 2023 et al. <d-cite key="mitchell2023detectgpt"></d-cite> justifies the validity of their machine-generated as a Hutchinson trace estimator of the divergence of the log probabilities; however, the poor performance of the divergence detector imply that estimating the trace is not helpful for image detection and that other model properties may instead be at play for this method’s effectiveness. In contrast, the noising/denoising detector implicitly incorporates information from all dimensions, which may explain its better performance. The model from Corvi et al. 2023 <d-cite key="corvi2023detection"></d-cite> outperforms our methods under all augmentations, achieving a perfect AUC on images without data augmentations. This is consistent with what was reported in their manuscript. However, this is not an unbiased estimate of the trained classifier’s performance, because they also used MSCOCO data to train and test their classifier. We were limited to this experimental setup by data availability and previous literature. Future work should comapre the zero-shot and trained detectors on completely out-of-sample data and with different generation models.</p> <p>Although at face-value our detectors perform worse than the pre-trained model in our experiments, our project still introduces some interesting ideas for machine-generated image detection that are of interest to the broader community and worth further exploring. First, the techniques we explored parallel zero-shot machine-generated image detection methods for text.<d-cite key="mitchell2023detectgpt"></d-cite> The fact that in both settings, perturbing the inputs and computing the curvature of the log probabilities are potent signals for machine-generated detection implies that these features may be an indelible mark of machine-generated models across all modalities. Second, image detection algorithms trained on data may be fundamentally vulnerable to adversarial modifications. Because there exists non-robust features that are predictive of the output in training data,<d-cite key="ilyas2019adversarial"></d-cite> adversaries, who realistically may have access to the image detection algorithm over many trials, can craft subtle background noise that circumvents image-detection algorithms. Our methods, which consist of only a few parameters, are not prone to adversarial attacks unlike trained models. Third, this work highlights the use of other features besides the image as features for image detection, e.g. score function and noising/denoising the image. Future work may build on the ideas behind these features to improve trained image detectors.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-detect-image.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/diaz-proposal/index.html b/blog/2023/diaz-proposal/index.html
new file mode 100644
index 00000000..02c7ad62
--- /dev/null
+++ b/blog/2023/diaz-proposal/index.html
@@ -0,0 +1,70 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Optimizations of Transformers for Small-scale Performance | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="CNNs generally outperform ViTs in scenarios with limited training data. However, the narrative switches when the available training data is extensive. To bridge this gap and improve upon existing ViT methods, we explore how we can leverage recent progress in the transformer block and exploit the known structure of pre-trained ViTs."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/diaz-proposal/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Optimizations of Transformers for Small-scale Performance",
+      "description": "CNNs generally outperform ViTs in scenarios with limited training data. However, the narrative switches when the available training data is extensive. To bridge this gap and improve upon existing ViT methods, we explore how we can leverage recent progress in the transformer block and exploit the known structure of pre-trained ViTs.",
+      "published": "December 10, 2023",
+      "authors": [
+        {
+          "author": "Sebastian (Sebo) Diaz",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Optimizations of Transformers for Small-scale Performance</h1> <p>CNNs generally outperform ViTs in scenarios with limited training data. However, the narrative switches when the available training data is extensive. To bridge this gap and improve upon existing ViT methods, we explore how we can leverage recent progress in the transformer block and exploit the known structure of pre-trained ViTs.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#transformers-great-but-not-enough">Transformers: Great But Not Enough</a></div> <ul> <li><a href="#basic-background">Basic Background</a></li> <li><a href="#vision-the-problem">Vision: The Problem</a></li> <li><a href="#transformer-block">Transformer Block</a></li> </ul><div><a href="#translation-to-vision-experimentation-and-analysis">Translation to Vision: Experimentation and Analysis</a></div> <ul> <li><a href="#vanilla-vs-simplified-comparison">Vanilla vs. Simplified Comparison</a></li> <li><a href="#initialization-schemes">Initialization Schemes</a></li> </ul><div><a href="#conclusion-and-limitations">Conclusion and Limitations</a></div> </nav> </d-contents> <div class="text-center"> <a name="figure-1"></a> <img src="https://discuss.tensorflow.org/uploads/default/original/2X/4/44b54935a57a92b71902d81265e9bc3c6d99fb12.gif" width="700" height="250"/> <p class="caption"> Figure 1: Attention Maps of a Vision Transformer (DINO). Source: <a href="https://github.com/sayakpaul/probing-vits">https://github.com/sayakpaul/probing-vits </a>. </p> </div> <h2 id="transformers-great-but-fall-short">Transformers: Great but fall short</h2> <h3 id="basic-background">Basic Background</h3> <p>Transformers have well-earned their place in deep learning. Since the architecture’s introduction in<d-cite key="AttentionIsAllYouNeed"></d-cite>, we have seen huge improvements in our model’s capabilities. The most notable of which being natural language processing (NLP) with large-language models such as GPT-4 stunning the world at-large.</p> <p>Originally designed for NLP, the transformer architecture has been robust in other domains and tasks. For example, it has been translated, with success, to de-novo protein design<d-cite key="Grechishnikova2021"></d-cite>, the medical field<d-cite key="Hu2022"></d-cite>, and, of most relevance, computer vision<d-cite key="Dosovitskiy2020"></d-cite>. This behaviour differs from architectures of the past like RNNs and CNNs which have been limited to one domain. The potent generalizability of the transformer lies within the self-attention mechanism. Without getting to much into detail, self-attention enables nodes within a neural network to probe the input sequence, determine what is most interesting, and attend towards the region of interest by dynamically updating its weights. Visualization of attention can be seen in <a href="#figure-1">Figure 1</a>. By probing the data landscape, the architecture enables long-range dependencies to be modeled regardless of distance. From a Fourier perspective, the transformer caters towards the low-frequency information in the data and deciphers how each element of an input sequence all relate to each other<d-cite key="Wang2022"></d-cite>. These connections help the transformer accurately model global information in the data perhaps indicating why they are so powerful. In this blog, we will specifically examine the transformer in vision, determine how it can be improved, and evaluate new strategies to increase its viability on small datasets.</p> <div class="col-sm text-center"> <a name="figure-2"></a> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/vit_workflow-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/vit_workflow-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/vit_workflow-1400.webp"/> <img src="/staging/assets/img/2023-11-08-diaz-proposal/vit_workflow.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 2: ViT workflow. </div> </div> <h3 id="vision-the-problem">Vision: The Problem</h3> <p>The Vision Transformer (ViT)<d-cite key="Dosovitskiy2020"></d-cite> introduced the transformer to the computer vision world in late 2020. The ViT is simple: it funnels image patches into a tokenization scheme, adds positional encoding, and feeds these tokens into a transformer block. A graphical workflow of the ViT from the original paper can be seen in <a href="#figure-2">Figure 2</a>.</p> <p>Since its introduction, the ViT and associated variants have demonstrated remarkable benchmarks in image classification<d-cite key="Liu2021"></d-cite>, image restoration<d-cite key="Liang2021"></d-cite>, and object detection<d-cite key="Li2022"></d-cite>. Much of these new methods can compete and even outperform long-established CNNs. However, ViTs are data-hungry requiring extensive amounts of training data to surpass CNNs. In small scale training, ViTs are burdensome to train and achieve sub-par performance compared to their CNNs counterparts<d-cite key="Naimi2021"></d-cite>. In <d-cite key="Zhu2023"></d-cite>, they investigate this discrepancy by comparing the feature and attention maps of small-scale CNNs and ViTs, respectively. The authors determine the ViT lacks the ability to learn local information and has ill-suited representation capacity in the lower layers. In contrast, the CNN demonstrate remarkable inductive bias due to weight sharing and locality properties which enable high-frequency modeling<d-cite key="Park2022"></d-cite>. The ViT’s low-frequency and the CNNs high-frequency capacity has initiated a wave of new models aimed at combining the two for comprehensive modeling capability<d-cite key="Si2022"></d-cite><d-cite key="ConvViT"></d-cite>.</p> <p>Despite the complementary nature of these architectures, they break the fidelity of the transformer and make for difficult analysis. Therefore, there exists a gap in the traditional transformer architecture to perform in small-data regimes, particularly in vision. Motivated by this shortcoming, we aim to investigate and improve the current ViT paradigm to narrow the gap between CNNs and ViTs on small-data. In particular, we examine novel initialization schemes, removal of component parts in our transformer block, and new-learnable parameters which can lead to better performance, image throughput, and stable training on small-scale datasets.</p> <div class="col-sm text-center"> <a name="figure-3"></a> <img src="/staging/assets/img/2023-11-08-diaz-proposal/transformer.svg" class="img-fluid rounded z-depth-1" style="width: 300px;"/> <div class="caption"> Figure 3: Standard transformer encoder block. Encoder can be stacked for x amount of layers. </div> </div> <h3 id="transformer-block">Transformer Block</h3> <p>To serve as a basis of comparison, we will examine the stanford transformer block seen in <a href="#figure-3">Figure 3</a>. The block is identical to <d-cite key="AttentionIsAllYouNeed"></d-cite> with the exception of using layer normalizations before the multi-headed attention (MHA) and multi-level perceptron (MLP) blocks as opposed to after. In practice, this placement has been shown to be more stable and increase performance<d-cite key="Liu2020"></d-cite>. With the exception of this modification, the block has seen little improvements over the years testifying to its robustness. However, recent trends in theory hints towards ways we could break this notion – all while enjoying increased performance.</p> <p>Before we delve into these advances and their implications, consider the following transformer block information flow:</p> \[\displaylines{ \text{Attention} = \text{A}(X) = \text{Softmax}\Biggl(\frac{XW_{Q}W_{K}^{T}X^{T}}{\sqrt{k}}\Biggl) \\ \\ \text{A}(X) \in \mathbb{R}^{T\times T}}\] <p>which is shortly followed by:</p> \[\displaylines{ \text{S}(X) = \text{A}(X)W_{V}W_{O} \\ \\ \text{S}(X) \in \mathbb{R}^{T\times d} }\] <p>and:</p> \[\text{Output} = \text{MLP}(\text{S}(X))= \text{Linear}(\text{GELU}(\text{Linear}(\text{S}(X))))\] <p>where:</p> <ul> <li>Embedded input sequence: \(X \in \mathbb{R}^{T \times d}\)</li> <li>Linear queury and key layers: \(W_{Q},W_{K} \in \mathbb{R}^{d \times k}\)</li> <li>Linear value and projection layers: \(W_{V}, W_{O} \in \mathbb{R}^{d \times d}\)</li> <li>MLP Linear layers: \(\text{Linear} \in \mathbb{R}^{d \times d}\)</li> <li>\(T =\) # of tokens, \(d =\) embedding dimension, \(k = \frac{d}{H}\), \(H =\) # of attention heads</li> </ul> <p>The flow of information mirrors the transformer block in <a href="#figure-3">Figure 3</a>. Readers unfamiliar with transformer intricacies such as MHA and MLPs are encouraged to read<d-cite key="AttentionIsAllYouNeed"></d-cite>.</p> <p>Recently, there have been many proposals on how the transformer block can be further modified to increase data throughput and eliminate “redundant” or “useless” parts that do not have any significant contribute to the tranformer’s modeling capabilities. For example, <d-cite key="2302.05442"></d-cite>, used a parallel MHA and MLP incorporated into a large-scale ViT for stable and efficient training. Throughout this blog, we will focus on the ideas overviewed and proposed by <d-cite key="He2023"></d-cite> as they present intriguing results and a synthesis on the current state of this research topic. The interested reader is encouraged to study their paper for a more extensive understanding of the ideas.</p> <div class="col-sm text-center"> <a name="figure-4"></a> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/simplified_block-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/simplified_block-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/simplified_block-1400.webp"/> <img src="/staging/assets/img/2023-11-08-diaz-proposal/simplified_block.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 4: Comparison between trasnformer architectures. <em>Left</em>: Standard block as shown in Figure 3. <em>Bottom Right</em>: Parallel block proposed in. <em>Top Right</em>: Newly proposed encoder. </div> </div> <p>The overaching theme of <d-cite key="He2023"></d-cite> was to take the standard trasnformer block and evaluate the necessity of each component. In doing so, they removed each component part and studied its effects on performance. Understandably, blindly removing components will lead to unstable training and ill-performance (i.e. if one were to remove the skip connnections, they would encounter vanishing gradients as seen <a href="#figure-14">Figure 14</a>). However, <d-cite key="He2023"></d-cite> took the approach of removal combined with recovery. For example, when the authors removed skip connections, they required a modification to the self-attention matrix of the form:</p> \[\text{A}(X) \leftarrow (\alpha\text{I} + \beta \text{A}(X))\] <p>where \(\alpha\) and \(\beta\) are learnable scalars and intialized to \(1\) and \(0\), respectively, and \(\text{I} \in \mathbb{R}^{T \times T}\) is the identity matrix. This modification intiailizes the self-attention matrix providing a pathway towards training stability. They further entertained a more complicated scheme with a third parameter, but we only consider the two parameter version for simplicity. By this iterative removal and recovery process, the authors converged towards the final transformer block seen in <a href="#figure-4">Figure 4</a>. The most shocking aspect of this proposed block is the removal of the \(W_{V}\) and \(W_O\) layers. They arrived to this justification by initialializing \(W_{V}\) and \(W_{O}\) to the identity with separate, learnable scalars and training a model. Over the course of training, the scalar ratios converged towards zero<d-footnote>This is a slight simplification. Look at Section 4.2 and Figures 4 and 20 in He et. al 2023 for a more detailed explanation.</d-footnote>. Due to the heavy cost and speed these linear layers present, removal of them decreases parameters counts and enables more data throughput. A concise PyTorch interpretation of the new block can be seen below:</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kn">import</span> <span class="n">torch</span>
+<span class="kn">import</span> <span class="n">torch.nn</span> <span class="k">as</span> <span class="n">nn</span>
+
+<span class="k">class</span> <span class="nc">ShapedAttention</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">width</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">num_heads</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
+        <span class="nf">super</span><span class="p">().</span><span class="nf">__init__</span><span class="p">()</span>
+        <span class="c1"># Determining if hidden dimension of attention layer is divisible by number of heads
+</span>        <span class="k">assert</span> <span class="n">width</span> <span class="o">%</span> <span class="n">num_heads</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> <span class="sh">"</span><span class="s">Width and number of heads are not divisble.</span><span class="sh">"</span>
+        
+        <span class="c1"># Setting vars
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">head_dim</span>   <span class="o">=</span> <span class="n">n_hidden</span> <span class="o">//</span> <span class="n">num_heads</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">num_heads</span>  <span class="o">=</span> <span class="n">num_heads</span>
+        <span class="c1"># Creating Linear Layers
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">W_K</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">width</span><span class="p">,</span> <span class="n">self</span><span class="p">.</span><span class="n">head_dim</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">W_Q</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="n">width</span><span class="p">,</span> <span class="n">self</span><span class="p">.</span><span class="n">head_dim</span><span class="p">)</span>
+        <span class="c1"># Learnable Scalars: alpha_init and beta_init are up to user
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">alpha</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Parameter</span><span class="p">(</span><span class="n">alpha_init</span><span class="p">)</span>
+        <span class="n">self</span><span class="p">.</span><span class="n">beta</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Parameter</span><span class="p">(</span><span class="n">beta_init</span><span class="p">)</span>
+        <span class="c1"># Softmax
+</span>        <span class="n">self</span><span class="p">.</span><span class="n">softmax</span> <span class="o">=</span> <span class="n">nn</span><span class="p">.</span><span class="nc">Softmax</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
+        
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="n">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="p">.</span><span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">torch</span><span class="p">.</span><span class="n">Tensor</span><span class="p">:</span>
+        <span class="c1"># Input:
+</span>        <span class="c1"># x: shape (B x T x dim)
+</span>        <span class="c1"># Outputs:
+</span>        <span class="c1"># attn_output: shape (B x T x width)
+</span>        <span class="n">attn_output</span> <span class="o">=</span> <span class="bp">None</span>
+        <span class="c1"># Compute keys and queries
+</span>        <span class="n">k</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nc">W_K</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+        <span class="n">q</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nc">W_Q</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+        <span class="c1"># Scaled dot-product
+</span>        <span class="n">attn_scores</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">bmm</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">.</span><span class="nf">transpose</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">))</span> <span class="o">/</span> <span class="p">(</span><span class="n">self</span><span class="p">.</span><span class="n">head_dim</span><span class="o">**-</span><span class="mf">0.5</span><span class="p">)</span>
+        <span class="n">attn_scores</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="nf">softmax</span><span class="p">(</span><span class="n">attn_scores</span><span class="p">)</span>
+        <span class="c1"># Shaped attention
+</span>        <span class="n">B</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">x</span><span class="p">.</span><span class="n">shape</span>
+        <span class="n">output</span> <span class="o">=</span> <span class="n">self</span><span class="p">.</span><span class="n">alpha</span><span class="o">*</span><span class="n">torch</span><span class="p">.</span><span class="nf">eye</span><span class="p">(</span><span class="n">T</span><span class="p">,</span> <span class="n">device</span> <span class="o">=</span> <span class="n">x</span><span class="p">.</span><span class="n">device</span><span class="p">)</span> <span class="o">+</span> <span class="n">self</span><span class="p">.</span><span class="n">beta</span> <span class="o">*</span> <span class="n">attn_scores</span>
+
+        <span class="k">return</span> <span class="n">output</span>
+</code></pre></div></div> <p>The performance of the final transformer block (referred to as SAS-P) demonstrated powerful results. In the <a href="#figure-5">Figure</a>, the simplified transformer matches the standard block in cross-entropy loss even when taken through a long runtime. Additionally, Figure 6 in <d-cite key="He2023"></d-cite> demonstrates the model scales better with depth which is of paramount importance in modern neural network.</p> <div class="col-sm text-center"> <a name="figure-5"></a> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/fig5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/fig5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/fig5-1400.webp"/> <img src="/staging/assets/img/2023-11-08-diaz-proposal/fig5.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 5: Training speed experiment. Figure 6. in Ref. 15. Pre-LN is the standard transformer block. SAS-P is the block. It is shown with and without an initial layer normalization. </div> </div> <h2 id="translation-to-vision-experimentation-and-analysis">Translation to Vision: Experimentation and Analysis</h2> <p>The results shown in <d-cite key="He2023"></d-cite> show promise the transformer can be improved. Motivated by vision applications, we seek to implement such infrastructure, with slight modifications, and determine if it improves performance in small datasets.</p> <h3 id="vanilla-vs-simplified-comparison">Vanilla vs. Simplified Comparison</h3> <p>For evaluation, we compare the simplified transformer to a vanilla ViT. The vanilla ViT’s transformer block is identical to the formulation presented earlier. We use Conv2D patch embedding with a random initial positional embedding. For the simplified setup, we initialize \(\alpha = \beta = 0.5\) and do not use a centering matrix – although it has been shown to improve ViT performance<d-cite key="2306.01610"></d-cite>. We use one Layer Normalization just prior to the transformer encoder. \(\text{Width} = 96\) is kept uniform throughout the model. The dataset is CIFAR-10 with a \(\text{batch size} = 256\). Data augmentations were random horizontal and vertical flips with 15º random rotations. Optimizer is AdamW with \(\text{lr} = 0.003\) and \(\text{weight decay} = 0.01\). We employ a cosine learning rate scheduler to maintain consistency with ViT literature, although <d-cite key="He2023"></d-cite> empirically showed a linear learning rate is slightly advantegeous<d-footnote>Figure 11 in He et. al 2023.</d-footnote>. We ran our model for \(\text{epochs} = 50\) with \(\text{runs} = 3\) to evalute run-to-run stability. A condensed version of the experiment choices can be seen in Table 1. The results can be seen in <a href="#figure-6">Figure 6</a> and Table 2<d-footnote>To avoid clutter, only the training accuracies vs epochs are shown. Loss curves showed similar relationships.</d-footnote>.</p> <table> <thead> <tr> <th>Table 1. Experiment 1: ViT Model Settings</th> <th> </th> </tr> </thead> <tbody> <tr> <td># of channels</td> <td>3</td> </tr> <tr> <td>Image size</td> <td>32</td> </tr> <tr> <td>Patch size</td> <td>4</td> </tr> <tr> <td>Width</td> <td>96</td> </tr> <tr> <td># of heads</td> <td>4</td> </tr> <tr> <td># of layers</td> <td>8</td> </tr> </tbody> </table> <div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;"> <a name="figure-6"></a> <iframe src="/staging/assets/html/2023-11-08-diaz-proposal/exp1_w96.html" frameborder="0" scrolling="no" height="500px" width="70%"></iframe> <div class="caption" style="margin-top: 10px; text-align: center;"> Figure 6. Comparison between vanilla and simplified transformers. Width = 96. Layers/Depth = 8. </div> </div> <table> <thead> <tr> <th>Table 2. Experiment 2: Results</th> <th style="text-align: center">Vanilla</th> <th style="text-align: right">Simplified</th> <th style="text-align: right">\(\Delta\)</th> </tr> </thead> <tbody> <tr> <td>Parameters</td> <td style="text-align: center">358186</td> <td style="text-align: right">209210</td> <td style="text-align: right">-41.59%</td> </tr> <tr> <td>Avg. epoch time (s)</td> <td style="text-align: center">12.954</td> <td style="text-align: right">11.305</td> <td style="text-align: right">-12.73%</td> </tr> </tbody> </table> <p>Experiment 1 showed the training evaluation trajectory is nearly identicable between the two models although the simplified outperforms by small margin. Although the subtle difference, it is noteworthy to mention the simplified version achieved mirroring performance with less parameters and higher image throughput. The similarity of the curves hints the removal of the skip connections, layer normalizations, and value/projection layers were merited, begging the question whether these components held our modeling power back.</p> <p>This experimentation shows the similar nature of each model, but does not translate well to wider modern neural networks. In Experiment 2, we expanded to \(\text{width} = 128\) to determine if there is any emergent behaviour as the network becomes wider. We replicate everything in Experiment 1 and solely modify the width. The settings are restated in Table 3. The results for Experiment 2 can be seen in <a href="#figure-7">Figure 7</a> and Table 4 below.</p> <table> <thead> <tr> <th>Table 3</th> <th>Experiment 2: ViT Model Settings</th> </tr> </thead> <tbody> <tr> <td># of channels</td> <td>3</td> </tr> <tr> <td>Image size</td> <td>32</td> </tr> <tr> <td>Patch size</td> <td>4</td> </tr> <tr> <td>Width</td> <td>128</td> </tr> <tr> <td># of heads</td> <td>4</td> </tr> <tr> <td># of layers</td> <td>8</td> </tr> </tbody> </table> <div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;"> <a name="figure-7"></a> <iframe src="/staging/assets/html/2023-11-08-diaz-proposal/exp2_w128.html" frameborder="0" scrolling="no" height="500px" width="70%"></iframe> <div class="caption" style="margin-top: 10px; text-align: center;"> Figure 7. Comparison between vanilla and simplified transformers. Width = 128. Layers/Depth = 8. </div> </div> <table> <thead> <tr> <th>Table 4. Experiment 2: Results</th> <th style="text-align: center">Vanilla</th> <th style="text-align: right">Simplified</th> <th style="text-align: right">\(\Delta\)</th> </tr> </thead> <tbody> <tr> <td>Parameters</td> <td style="text-align: center">629130</td> <td style="text-align: right">364954</td> <td style="text-align: right">-41.99%</td> </tr> <tr> <td>Avg. epoch time (s)</td> <td style="text-align: center">13.093</td> <td style="text-align: right">11.735</td> <td style="text-align: right">-10.37%</td> </tr> </tbody> </table> <p>The narrative is different for Experiment 2. The simplified version outperforms the vanilla version by a considerable margin. An adequate explanation for this discrepancy in vision tasks merits further exploration. However, considering the proposed unnecessary nature of the value and projection matrices, we can hypothesize they interfere with the modeling capability as more parameters are introduced.</p> <p>Due to the sheer difference in outcomes between the models, we question how the models are attending towards various inputs to gain a better understanding of what is happening under the hood. To probe this curiosity, we trained the models with identical setting in Experiment 2, but modified the \(\text{depth} = \text{layers} = 12\). This model setup will be covered in more detail in future paragraphs. We inputted CIFAR-10 to each model and visualized a side-by-side comparison of attention maps for five input images. An interactive figure is seen <a href="#figure-8">Figure 8</a>.</p> <div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;"> <div style="display: flex; flex-direction: column; align-items: center;"> <a name="figure-8"></a> <iframe src="/staging/assets/html/2023-11-08-diaz-proposal/attention_maps.html" frameborder="0" scrolling="no" height="600px" width="70%"></iframe> <div class="caption" style="margin-top: 10px; text-align: center;"> Figure 8. Comparison between vanilla and simplified attention maps. Width = 128. Layers/Depth = 12. Interpolation method: "nearest". </div> </div> </div> <p>There is a noticeable contrast in the attention maps. For the simplified model, the attention maps seem to place weight in a deliberation manner, localizing the attention towards prominent features in the input image. On the other hand, the vanilla model is choatic in its attention allocation. It is noteworthy that the vanilla model does place attention towards areas of interest, but also attends towards irrelevant information perhaps compromising its judgement at the time of classification. It can thus be reasoned the simplified model can better decipher which features are relevant demonstrating, even in low data regimes, the representational quality is increased.</p> <p>While we have so far investigated width, it will be informative to understand how depth impacts the performance of the simplified version. In <d-cite key="He2023"></d-cite>, they employ signal propagation theory, which is most prominent in deeper networks. Therefore, we suspect as we increase the depth of our models, the simplified version will outperform the vanilla version by a larger margin. Here, we set \(\text{layers} = 12\) and maintain \(\text{width}=128\). The training accuracies and experiment results are seen in <a href="#figure-9">Figure 9</a> and Table 5.</p> <div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;"> <a name="figure-9"></a> <iframe src="/staging/assets/html/2023-11-08-diaz-proposal/exp3_w128_l12.html" frameborder="0" scrolling="no" height="500px" width="70%"></iframe> <div class="caption" style="margin-top: 10px; text-align: center;"> Figure 9. Comparison between vanilla and simplified transformers. Width = 128. Layers/Depth = 12. </div> </div> <table> <thead> <tr> <th>Table 5. Experiment 3: Results</th> <th style="text-align: center">Vanilla</th> <th style="text-align: right">Simplified</th> <th style="text-align: right">\(\Delta\)</th> </tr> </thead> <tbody> <tr> <td>Parameters</td> <td style="text-align: center">927370</td> <td style="text-align: right">531106</td> <td style="text-align: right">-42.72%</td> </tr> <tr> <td>Avg. epoch time (s)</td> <td style="text-align: center">17.527</td> <td style="text-align: right">15.723</td> <td style="text-align: right">-10.29%</td> </tr> </tbody> </table> <p>Again, the simplified model outperforms the vanilla model by a large margin. Although we have focused on performance in the past, we discern an interesting trend when we scaled the depth: the simplified version seemed to be more consistent from run-to-run (recall \(\text{runs} = 5\)). This leads us to believe that as we continue to scale the depth, the simplified version will be more stable. Future experimentation will be necessary to corroborate this claim.</p> <h3 id="initialization-schemes">Initialization Schemes</h3> <p>We have seen the impact simplification can have on the performance of the transformer performance and self-attention. However, the used initializatons of \(\alpha\) and \(\beta\) in Experiments 1, 2, and 3, was based on equal weighting between the initial attention matrix and the identity matrix. In <d-cite key="He2023"></d-cite>, they employ a full weighting of the identity matrix and zero’d out the attention matrix at initialization. Here, we aim to determine the effect of different initialization values. Recall \(\alpha = \beta = 0.5\) in Experiments 1, 2, 3. Now, we investigate two more initializaton schemes: \(\alpha = 1.0\) and \(\beta = 0.0\) and vice-versa. We replicate the protocol used in Experiment 2 and only modify these learnable scalar at initializaton and set \(\text{runs} = 1\). The results are shown in <a href="#figure-10">Figure 10</a>. Interestingly, the initialization scheme proposed by <d-cite key="He2023"></d-cite>, does <em>not</em> outperform the equal weighting or inverse weighting scheme. Understandably, it does poorly at initialization, but never recovers. The equal weighting and inverse weighting approaches show nearly identical performance often trading off superior performance from epoch-to-epoch.</p> <div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;"> <a name="figure-10"></a> <iframe src="/staging/assets/html/2023-11-08-diaz-proposal/exp4_init_new.html" frameborder="0" scrolling="no" height="500px" width="80%"></iframe> <div class="caption" style="margin-top: 10px; text-align: center;"> Figure 10. Various Initialization Schemes. </div> </div> <p>This lead us to believe the initializaton scheme could be improved. There has been some work on initializing vanilla ViTs<d-cite key="Trockman2023"></d-cite> to gain performance. In <d-cite key="Trockman2023"></d-cite>, a prominent diagonal was observed for the \(W_{q}W_{k}^{T}\) layers in ViT’s pre-trained on large datasets, which have been shown to outperform CNNs. The figure shown in the paper can be seen in <a href="#figure-10">Figure 10</a>. This motivated the authors to provide a novel initialization scheme where the \(W_{Q}\) and \(W_{K}\) matrices are initialized in a way to encourage diagonal prominence in the forward pass. However, our findings contradicted this scheme, as our diagonal-dominant initialization scheme \(\alpha = 1\) and \(\beta = 0\) did not out perform the inverse or the equal weighting. This is likely due to the fact we have learnable parameters and do not initialize our \(W_{Q}\) and \(W_{K}\)’s directly, but rather the attention matrix post-softmax. However, it is important to realize that the learnable parameters still encourage diagonal prominence regardless of intialization. Although<d-cite key="Trockman2023"></d-cite> used this initialization scheme to increase performance in small ViT’s trained from scratch, which encourages tokens to attend toward to themselves through the depth of the network, they did not take into consideration how the diagnolization varys from layer-to-layer. Seen in <a href="#figure-10">Figure 10</a>, we can see the prominence of the diagnoal elements fades as we go deeper into the network. Observing this behaviour, we hypothesize the reason the initialization scheme of \(\alpha = 1\) and \(\beta = 0\) underperformed was not due to the initialization itself, but how it was applied to each layer. In other words, when we initialized \(\alpha = 1\) and \(\beta = 0\), we encouraged this token self-attentive nature throughout the depth of the network, when we should be encouraging it in the opening layers and tapering it off as we approach the end of the model.</p> <p>To give more evidence to this hypothesis, we experimented with the following dynamic initialization scheme:</p> \[\displaylines{ \alpha_i = \frac{1}{i}, \beta_i = 1 - \frac{1}{i} \\ \text{ where } i \in [1, 2, ..., L] \text{ and } L = \text{# of layers} }\] <p>The results from this initialization scheme compared to the uniform initializations can be seen in <a href="#figure-12">Figure 12</a> The results show that the dynamic scheme outperform the results perhaps indicating the representation quality is connected toward encouraging self-token connection in the lower layers, while allowing for token’s to intermingle in higher layers. We further experiment with the inverse dynamic where we switch the \(\alpha\) and \(\beta\) values. The results in <a href="#figure-13">Figure 13</a> show the dynamic approach is stronger during training then the inverse dynamic approach.</p> <div class="col-sm text-center"> <a name="figure-11"></a> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny-1400.webp"/> <img src="/staging/assets/img/2023-11-08-diaz-proposal/diagonal_vit_tiny.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 11: Diagonal prominence in a pre-trained ViT Tiny. Layers 1-11 (Left-to-Right). Heads 1-3 (Top-to-Bottom). Extracted from Figure 1 of <a href="https://arxiv.org/abs/2305.09828">Mimetic Initialization of Self-Attention Layers</a>. </div> </div> <div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 0px;"> <a name="figure-12"></a> <iframe src="/staging/assets/html/2023-11-08-diaz-proposal/exp5_init_dynamic.html" frameborder="0" scrolling="no" height="500px" width="80%%"></iframe> <div class="caption" style="margin-top: 10px; text-align: center;"> Figure 12. Experiment 5: Dynamic vs. Uniform Initializations. </div> </div> <div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 10px;"> <a name="figure-13"></a> <iframe src="/staging/assets/html/2023-11-08-diaz-proposal/exp6_init_inverse.html" frameborder="0" scrolling="no" height="500px" width="80%"></iframe> <div class="caption" style="margin-top: 10px; text-align: center;"> Figure 13. Experiment 6: Dynamic vs. Inverse Dynamic Initializations. </div> </div> <h2 id="conclusion-and-limitations">Conclusion and Limitations</h2> <p>Through this blog post we have overviewed the simplification of our known transformer block and novel initialization schemes. We took the problem of small-scale training of ViT’s and looked to address it leveraging such ideas. Through a series of experiments and thoughtful schemes, we generated an informed and sophisticated approach to tackle such a problem. In the end, we generated a method that outperformed a tradtional ViT in small scales. We explored ways of scaling the ViT in width and depth and probed how the new model distributed attention. Our comparisons were intentionally simple and effective in addressing the underlying task and illustrating the models potential. Although the results presented showed promise, extensive validation needs to be performed in the future. It will be interesting to see how this new transformer block and intialization scheme can be further utilized in computer vision. For example, a logical next route to entertain is to compare convergence rates in larger scale ViT on datasets such as ImageNet-21k to see if the modeling advantage persists.</p> <p>There are a few limitations in this study. For one, only one dataset was used. Using other datasets such as CIFAR-100 or SVHN would provide more insight into this methodology. Secondly, there is a need for more comprehensive evaluation and ablation studies to determine the true nature of the simplified transformer and initialization schemes. Third, a comparison to a smaller scale CNNs is needed to gauge where this method comparatively sits in modeling power.</p> <div class="l-page" style="display: flex; flex-direction: column; align-items: center; margin-bottom: 10px;"> <a name="figure-14"></a> <iframe src="/staging/assets/html/2023-11-08-diaz-proposal/exp0.html" frameborder="0" scrolling="no" height="500px" width="80%"></iframe> <div class="caption" style="margin-top: 10px; text-align: center;"> Figure 14. Experiment 0: Removal of skip connections in traditional ViT. </div> </div> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-diaz-proposal.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/distill-example/index.html b/blog/2023/distill-example/index.html
new file mode 100644
index 00000000..6364dea3
--- /dev/null
+++ b/blog/2023/distill-example/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Gradient-Boosted Neural Wavlet Interpolation for Time Series (G-BiTS) | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Your blog post's abstract. This is an example of a distill-style blog post and the main elements it supports."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/distill-example/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Gradient-Boosted Neural Wavlet Interpolation for Time Series (G-BiTS)",
+      "description": "Your blog post's abstract. This is an example of a distill-style blog post and the main elements it supports.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Yeabsira Moges",
+          "authorURL": "https://www.linkedin.com/in/yeabsira-moges/",
+          "affiliations": [
+            {
+              "name": "AI-DS, MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Gradient-Boosted Neural Wavlet Interpolation for Time Series (G-BiTS)</h1> <p>Your blog post's abstract. This is an example of a distill-style blog post and the main elements it supports.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-works">Related Works</a></div> <div><a href="#g-bits">G-BiTS</a></div> <div><a href="#results">Results</a></div> <div><a href="#analysis">Analysis</a></div> <div><a href="#conclusions">Conclusions</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Energy companies struggle with energy allocation. The power grid contains a multitude of homes, schools, and offices all which require different amounts of power draw and capacity. As the current grid stands, the control loop is running on old data and isnt adequately reactive to sudden spikes, as well the inability to properly model trends. Energy forecasting is the means by which we work to rectify that gap. Energy forcasting is a blanket umbrella term coming from general forcasting of any time series data. There are a lot of methods currently available, ranging from purely statistical models up to deep neural networks. At the moment, the SOTA in predictive modeling from statistical models is SARIMAX: Seasonal Autoregressive Integrated Moving Average Exogenous. In deep learning, the SOTA is N-HiTS[1]. Both work well in most circumstances, but there is a lot of work to improve upon the current performance given we want to generate better embeddings to decrease loss through the energy grid. There has been great performance boosts associated with combinding the strengths of the different methods, and that is part of what this paper explores. Another big target: as it stands the current flavors of N-HiTS dont touch upon the further work reccomendations from the original paper. This includes advanced interpolation, moving away from the current linear interpolation for the Block modules and moving towards incorporating wavelet decomposition and transforms to help convert the signal into a form that makes it much easier to deliver robust data. I propose gradient-boosted neural wavlet interpolation for time series (G-BiTS) as a new entry to forcasting models relying on a mix of statistical and neural network based models. G-BiTS expands upon N-HiTS which stands for neural basis expansion analysis for interpretable time series. N-HiTS decompose time series into a set of basis functions, capturing and interpreting temporal patterns. This paper explores ensembling methods and time series analysis.</p> <h2 id="related-works">Related Works</h2> <p>The main related works relate to the following topics: SARIMAX, N-HiTS, and GBM. SARIMAX stands for seasonal autoRegressive integrated moving average with exogenous variables model. Each element of the SARIMAX are all important in the following ways. AutoRegressive: captures the relationship between an observations at various lags. Integrated: the differencing of raw observations to make the time series stationary. Moving Average: the relationship between an observation and a residual error from a moving average model applied to lagged observations. Seasonal: accounts for seasonality in data, like weekly, monthly, or yearly patterns. Exogenous Variables: These are external variables or predictors that aren’t part of the time series itself but are believed to have an impact on it. This is mainly represented in time series analysis by date information with respect to variables unrelated to the power, but can be used to model a common behavior. The biggest flaw with SARIMAX comes from its inability to model more than one seasonality, hampering predictions. A more robust model is N-HiTS which stands for neural basis expansion analysis for interpretable time series forecasting. The best benefit from N-HiTS comes from its ability to learn rich embeddings for time series that properly represent all of the trends and seasonalities inherent to the data, while also producing gains through being able to apply much more data as it is made for longer range predictions. N-HiTS is good, and this paper will be exploring a multiforld extension using gradient boosting [2] and adaptive ensembling[3]. Gradient boosting generates good predictions by training decision trees sequentially. A new tree is modeled on the residual errors made by the preceding trees. Finally, tying everything all together we have wavelet transforms. Wavelets are wave-like oscillations that represent data at various scales effectively. GBMs help us take advantage of a repeated pattern of smooth behavior interrupted by sudden changes or transients in time series data.</p> <h2 id="g-bits">G-BiTS</h2> <p>This paper proposes a new deep learning framework powered by gradient boosting and signal pre-processing G-BiTS. G-BiTS stands for Gradient-Boosted Neural Wavlet Interpolation for Time Series. G-BiTS builds upon the success of N-HiTS and explores a question posed by the authors in the original paper on replacing the existant sequential projections from the interpolation functions onto wavelet induced spaces, getting high resolution output. G-BiTS is an ensemble model, which is where gradient boosting comes in. The maximum of the combined predictions is taken for adaptive ensembling and higher performance as well as generatily. Max can be min or mean, just depends on the use case and having higher output in this circumstance if perfered. The hope is to use the hourly modeling capabilities of light gradient boosting machines with the versatility of N-HiTS to create a robust ensemble model.</p> <h2 id="results">Results</h2> <p>The testing for the comparisions of the different forcasting methods is based on the BuildingsBench dataset. Specifically, this paper surveys office buildings withing the Fox subsection from the original input. The data includes buildings with energy data that has multiple seasonalities, mostly hourly, daily, weekly, and monthly. Looking at the data, there are some interesting patterns. These are the average skew and kurtosis values for the data: high skew and kurtosis.</p> <p>Skewness: 1.1118040201238155 Kurtosis: 3.452262511716185</p> <p>Statistical analysis also shows that the data was not drawn from a normal ditribution and is not stationary, so the variance and mean were not constant throughout the time series.</p> <p>Our baseline is simply copying over the values from the previous week and repeating the same for the following week. Non-baseline models tested include the previously mentioned SARIMAX, N-HiTS, LGBM, and G-BiTS. The following are the respective errors from each building ordered as mean average error, root mean squared error, and mean average percent error.</p> <h3 id="building-id-margarita">Building ID: Margarita</h3> <p>SARIMAX (211.47498604910714, 249.84373502456708, 11.805270962305448)</p> <p>NHITS (21.72069293617509, 27.65604571924576, 1.6335940075280377)</p> <p>LGBM (33.16067034334621, 41.84784011583212, 2.0058567433490087)</p> <p>GBITS (26.955107763269822, 31.504577778268615, 1.6841760555882481)</p> <h3 id="building-id-loreta">Building ID: Loreta</h3> <p>SARIMAX (2966.2653087797617, 3513.45974924458, 12.756417057832824)</p> <p>NHITS (203.50202658318491, 338.92442661325015, 1.0121962487927345)</p> <p>LGBM (419.71931531784384, 476.48902925976694, 1.8085151798175159)</p> <p>GBITS (215.94950733822594, 264.7384239183662, 0.9401638424018465)</p> <h3 id="building-id-gaylord">Building ID: Gaylord</h3> <p>SARIMAX (1220.2237444196428, 1479.439585459469, 8.095511476323951)</p> <p>NHITS (137.39752238818102, 203.64435240098928, 0.8720707702102791)</p> <p>LGBM (347.0178199198448, 435.19043719851146, 2.3137853719619144)</p> <p>GBITS (21.02548764010548, 27.84334532157823, .73338746467575437)</p> <h2 id="analysis">Analysis</h2> <p>Across the board, SARIMAX perfofmed the worst, followed closely by NHiTS and LGBMs. The biggest issue with SARIMAX is that it can only take a very limited amount of data, as well as being unable to model multiple seasonalities. G-BiTS showed good adaptability as one model over the large dataset was able to get transferable and adaptible embeddings. The wavelet transforms showed the greatest gains from the interpolation stage as the two level smoothing helped the N-HiTS model better fit the unstationary data. N-HiTS as expected performs well across the board too and had the best time modeling the data.</p> <h2 id="conclusions">Conclusions</h2> <p>There is more work to be done to extend this research topic. Mainly, finding better wavelet decompositions and symmetric recompositions for modeling multiple seasonalities faster and in a more efficient manner. The decomposition showed the biggest gain and confirms the original papers thoughts about the approach. Boosting helped standardize the model and generated really interesting embeddings through the initial wavelet based N-HiTS.</p> <h2 id="bibliography">Bibliography</h2> <p>[1]</p> <p>N-HiTS: Neural Hierarchical Interpolation for Time Series Forecasting</p> <p>Cristian Challu, Kin G. Olivares, Boris N. Oreshkin, Federico Garza, Max Mergenthaler-Canseco, Artur Dubrawski</p> <p>https://arxiv.org/abs/2201.12886</p> <p>[2]</p> <p>Gradient Boosting Neural Networks: GrowNet</p> <p>Sarkhan Badirli, Xuanqing Liu, Zhengming Xing, Avradeep Bhowmik, Khoa Doan, Sathiya S. Keerthi</p> <p>https://arxiv.org/abs/2002.07971</p> <p>[3]</p> <p>Adaptive Ensemble Learning: Boosting Model Performance through Intelligent Feature Fusion in Deep Neural Networks</p> <p>Neelesh Mungoli</p> <p>https://arxiv.org/abs/2304.02653</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-10-distill-example.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/dof-visual-place-recognition-satellite/index.html b/blog/2023/dof-visual-place-recognition-satellite/index.html
new file mode 100644
index 00000000..f60435b3
--- /dev/null
+++ b/blog/2023/dof-visual-place-recognition-satellite/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>6-DOF estimation through visual place recognition | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A neural pose-estimation solution is implemented, which could help an agent with a downward-facing camera (such as a drone) to geolocate based on prior satellite imagery of terrain. The neural encoder infers extrinsic camera parameters from camera images, enabling estimation of 6 degrees of freedom (6-DOF), namely 3-space position and orientation. By encoding priors about satellite imagery in a neural network, the need for the agent to carry a satellite imagery dataset onboard is avoided."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/dof-visual-place-recognition-satellite/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "6-DOF estimation through visual place recognition",
+      "description": "A neural pose-estimation solution is implemented, which could help an agent with a downward-facing camera (such as a drone) to geolocate based on prior satellite imagery of terrain. The neural encoder infers extrinsic camera parameters from camera images, enabling estimation of 6 degrees of freedom (6-DOF), namely 3-space position and orientation. By encoding priors about satellite imagery in a neural network, the need for the agent to carry a satellite imagery dataset onboard is avoided.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Andrew Feldman",
+          "authorURL": "https://andrew-feldman.com/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>6-DOF estimation through visual place recognition</h1> <p>A neural pose-estimation solution is implemented, which could help an agent with a downward-facing camera (such as a drone) to geolocate based on prior satellite imagery of terrain. The neural encoder infers extrinsic camera parameters from camera images, enabling estimation of 6 degrees of freedom (6-DOF), namely 3-space position and orientation. By encoding priors about satellite imagery in a neural network, the need for the agent to carry a satellite imagery dataset onboard is avoided.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background">Background</a></div> <div><a href="#proposed-solution">Proposed solution</a></div> <ul> <li><a href="#image-to-extrinsics-encoder-architecture">Image-to-extrinsics encoder architecture</a></li> <li><a href="#data-sources-for-offline-training">Data sources for offline training</a></li> <li><a href="#training-and-evaluation">Training and evaluation</a></li> <li><a href="#implementation">Implementation</a></li> <li><a href="#training-results">Training results</a></li> <li><a href="#conclusion">Conclusion</a></li> </ul> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>The goal of this project is to demonstrate how a drone or other platform with a downward-facing camera could perform approximate geolocation using a neural scene representation of existing satellite imagery. Note that the use of the term “Visual Place Recognition” in the title is a carryover from the proposal, but no longer applies to this project. Rather, the goal of this project is to implement 6-DOF pose-estimation.</p> <p>Pose estimation <d-cite key="xiang2018posecnn"></d-cite> can refer to the ability of an agent to determine its 3D position and orientation based on visual or other sensor info.</p> <p>In this work, the goal is to compress the ground-truth image data into a neural model which maps live camera footage to geolocation coordinates.</p> <p>Twitter user Stephan Sturges demonstrates his solution<d-cite key="Sturges_2023"></d-cite> for allowing a drone with a downward-facing camera to geolocate through cross-referencing against a database of satellite images:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sturges_satellite_vpr.jpeg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Twitter user Stephan Sturges shows the results<d-cite key="Sturges_2023"></d-cite> of geolocation, purportedly based on Visual Place Recognition. </div> <p>The author of the above tweet employs a reference database of images. It would be interesting to eliminate the need for a raw dataset. Whereas the author employs Visual Place Recognition, here I employ pose estimation techniques. Thus I do not seek to estimate predict place <em>labels</em>, but rather geolocated place <em>coordinates</em> for the camera, as well as the camera’s orientation.</p> <p>Thus, this works seeks to develop a neural network which maps a terrain image from the agent’s downward-facing camera, to a 6-DOF (position/rotation) representation of the agent in 3-space.</p> <h1 id="background">Background</h1> <p>The goal-statement - relating a camera image to a location and orientation in the world - has been deeply studied in computer vision and rendering<d-cite key="Anwar_2022"></d-cite>:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/camera_intrinsic_extrinsic.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Camera parameters, as described in<d-cite key="Anwar_2022"></d-cite>. </div> <p>Formally<d-cite key="Anwar_2022"></d-cite>,</p> <ul> <li>The image-formation problem is modeled as a camera forming an image of the world using a planar sensor.</li> <li><strong>World coordinates</strong> refer to 3-space coordinates in the Earth or world reference frame.</li> <li><strong>Image coordinates</strong> refer to 2-space planar coordinates in the camera image plane.</li> <li><strong>Pixel coordinates</strong> refer to 2-space coordinates in the final image output from the image sensor, taking into account any translation or skew of pixel coordinates with respect to the image coordinates.</li> </ul> <p>The mapping from world coordinates to pixel coordinates is framed as two composed transformations, described as sets of parameters<d-cite key="Anwar_2022"></d-cite>:</p> <ul> <li><strong>Extrinsic camera parameters</strong> - the transformation from world coordinates to image coordinates (affected by factors “extrinsic” to the camera internals, i.e. position and orientation.)</li> <li><strong>Intrinsic camera parameters</strong> - the transformation from image coordinates to pixel coordinates (affected by factors “intrinsic” to the camera’s design.)</li> </ul> <p>And so broadly speaking, this work strives to design a neural network that can map from an image (taken by the agent’s downward-facing camera) to camera parameters of the agent’s camera. With camera parameters in hand, geolocation parameters automatically drop out from extracting extrinsic translation parameters.</p> <p>To simplify the task, assume that camera intrinsic characteristics are consistent from image to image, and thus could easily be calibrated out in any application use-case. Therefore, this work focuses on inferring <strong>extrinsic camera parameters</strong> from an image. We assume that pixels map directly into image space.</p> <p>The structure of extrinsic camera parameters is as follows<d-cite key="Anwar_2022"></d-cite>:</p> \[\mathbf{E}_{4 \times 4} = \begin{bmatrix} \mathbf{R}_{3 \times 3} &amp; \mathbf{t}_{3 \times 1} \\ \mathbf{0}_{1 \times 3} &amp; 1 \end{bmatrix}\] <p>where \(\mathbf{R}_{3 \times 3} \in \mathbb{R^{3 \times 3}}\) is rotation matrix representing the rotation from the world reference frame to the camera reference frame, and \(\mathbf{t}_{3 \times 1} \in \mathbb{R^{3 \times 1}}\) represents a translation vector from the world origin to the image/camera origin.</p> <p>Then the image coordinates (a.k.a. camera coordinates) \(P_c\) of a world point \(P_w\) can be computed as<d-cite key="Anwar_2022"></d-cite>:</p> \[\mathbf{P_c} = \mathbf{E}_{4 \times 4} \cdot \mathbf{P_w}\] <h1 id="proposed-solution">Proposed solution</h1> <h2 id="image-to-extrinsics-encoder-architecture">Image-to-extrinsics encoder architecture</h2> <p>The goal of this work, is to train a neural network which maps an image drawn from \(R^{3 \times S \times S}\) (where \(S\) is pixel side-length of an image matrix) to a pair of camera extrinsic parameters \(R_{3 \times 3}\) and \(t_{3 \times 1}\):</p> \[\mathbb{R^{3 \times S \times S}} \rightarrow \mathbb{R^{3 \times 3}} \times \mathbb{R^3}\] <p>The proposed solution is a CNN-based encoder which maps the image into a length-12 vector (the flattened extrinsic parameters); a hypothetical architecture sketch is shown below:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/nn.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Image encoder architecture. </div> <h2 id="data-sources-for-offline-training">Data sources for offline training</h2> <p>Online sources<d-cite key="Geller_2022"></d-cite> provide downloadable satellite terrain images.</p> <h2 id="training-and-evaluation">Training and evaluation</h2> <p>The scope of the model’s evaluation is, that it will be trained to recognize aerial views of some constrained area i.e. Atlantic City New Jersey; this constrained area will be referred to as the “area of interest.”</p> <h3 id="data-pipeline">Data pipeline</h3> <p>The input to the data pipeline is a single aerial image of the area of interest. The output of the pipeline is a data loader which generates augmented images.</p> <p>The image of the area of interest is \(\mathbb{R^{3 \times T \times T}}\) where \(T\) is the image side-length in pixels.</p> <p>Camera images will be of the form \(\mathbb{R^{3 \times S \times S}}\) where \(S\) is the image side-length in pixels, which may differ from \(T\).</p> <ul> <li><strong>Generate an image from the agent camera’s vantage-point</strong> <ul> <li>Convert the area-of-interest image tensor (\(\mathbb{R^{3 \times T \times T}}\)) to a matrix of homogenous world coordinates (\(\mathbb{R^{pixels \times 4}}\)) and an associated matrix of RGB values for each point (\(\mathbb{R^{pixels \times 3}}\)) <ul> <li>For simplicity, assume that all features in the image have an altitutde of zero</li> <li>Thus, all of the pixel world coordinates will lie in a plane</li> </ul> </li> <li>Generate random extrinsic camera parameters \(R_{3 \times 3}\) and \(t_{3 \times 1}\)</li> <li>Transform the world coordinates into image coordinates (\(\mathbb{R^{pixels \times 3}}\)) (note, this does not affect the RGB matrix)</li> <li>Note - this implicitly accomplishes the commonly-used image augmentations such as shrink/expand, crop, rotate, skew</li> </ul> </li> <li><strong>Additional data augmentation</strong> - to prevent overfitting <ul> <li>Added noise</li> <li>Color/brightness adjustment</li> <li>TBD</li> </ul> </li> <li><strong>Convert the image coordinates and the RGB matrix into a camera image tensor (\(\mathbb{R^{3 \times S \times S}}\))</strong></li> </ul> <p>Each element of a batch from this dataloader, will be a tuple of (extrinsic parameters,camera image).</p> <h2 id="training">Training</h2> <ul> <li>For each epoch, and each mini-batch…</li> <li>unpack batch elements into camera images and ground-truth extrinsic parameters</li> <li>Apply the encoder to the camera images</li> <li>Loss: MSE between encoder estimates of extrinsic parameters, and the ground-truth values</li> </ul> <h3 id="hyperparameters">Hyperparameters</h3> <ul> <li>Architecture <ul> <li>Encoder architecture - CNN vs MLP vs ViT(?) vs …, number of layers, …</li> <li>Output normalizations</li> <li>Nonlinearities - ReLU, tanh, …</li> </ul> </li> <li>Learning-rate</li> <li>Optimizer - ADAM, etc.</li> <li>Regularizations - dropout, L1, L2, …</li> </ul> <h2 id="evaluation">Evaluation</h2> <p>For a single epoch, measure the total MSE loss of the model’s extrinsic parameter estimates relative to the ground-truth.</p> <h2 id="feasibility">Feasibility</h2> <p>Note that I am concurrently taking 6.s980 “Machine learning for inverse graphics” so I already have background in working with camera parameters, which should help me to complete this project on time.</p> <h1 id="implementation">Implementation</h1> <h2 id="source-image">Source image</h2> <p>DOF estimation was applied to a 2D aerial image<d-cite key="Taylor_2020"></d-cite>, shown below:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/sample_image.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Sample aerial image from <d-cite key="Taylor_2020"></d-cite>. </div> <h2 id="dataloader">Dataloader</h2> <p>A dataloader was created which generates (1) generates a random extrinsic camera matrix as described above, in order to generate (2) visualization of the above source image from the perspective of the random camera matrix.</p> <p>More specifically, the dataloader generates <em>Euler Angles</em> in radians associated with with the camera matrix rotation, as well as a 3D offset representing the camera’s position.</p> <p>You will notice that the images suffer from an artifact whereby the pixels are not adjacent to each other but rather have black space between them; a production implementation of this solution would require interpolation between pixels in order to produce a continuous image.</p> <p>An example of a single generated image is shown below; it is the original image, above, viewed from the perspective of a random camera matrix:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_sample.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Single datapoint from data loader. </div> <p>A batch of generated images is shown below:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/dof_grid.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> A batch from the data loader. </div> <p>Again, you can see that owing to a lack of interpolation, the pixels are spread out, with black space between them.</p> <h2 id="dnn-architecture">DNN architecture</h2> <p>The DNN architecture is an MLP with 6 hidden layers of width 512, 256 and 128.</p> <p>The input is a 224x224 image with 3 color channels representing the view of the source image from an orientation determined by the (unknown) camera extrinsic parameters.</p> <p>The architecture outputs 6 logit values values corresponding to predictions of 3 Euler angles and 3 positional offsets for the camera extrinsic matrix.</p> <p>For this project, I experimented with the sinusoidal activation functions described in the SIREN<d-cite key="sitzmann2020implicit"></d-cite> paper. Sinusoidal activation functions, combined with MLPs, were previously shown to be more effective at capturing high-frequency information in radiance fields, compared to ReLU MLPs. I employed sinusoidal activation functions in this work in the hopes of more effectively capturing high-frequency variation in the relationship between camera extrinsic parameters and camera image pixels.</p> <p>One question which might arise is, if the DNN outputs logits, how do I account for the difference in statistical characteristics between the three Euler Angle outputs and the three translation vector outputs? I employed scikitlearn StandardScalers at both the input and the output in order to normalize image pixels and extrinsic camera matrix parameters, respectively. The use of normalization at the input is standard. The use of normalization at the output allows each dimension of the 6-logit output to learn a zero-mean, unit-variance distribution: the output StandardScaler converts from zero-mean, unit-variance to the estimated actual mean and variance of the target distribution. The way the output StandardScaler is computed is as follows: a batch of random data is sampled from the dataloader; mean and variance are computed; then a StandardScaler is designed such that its <em>inverse</em> maps from the computed mean and variance of the target extrinsics, to zero mean/unit-variance. Thus, run forward, the output StandardScaler will map from unit gaussian to the computed mean and variance.</p> <h2 id="training-setup">Training setup</h2> <p>I train for 80 epochs with an Adam optimizer and a learning rate of 0.00001.</p> <p>MSE loss is employed for training and evaluation. The extrinsic parameters predicted by the DNN are compared against the target (correct) extrinsic parameters which the dataloader used to generate the camera image of the scene. Recall from the previous section that, owing to the output StandardScaler, the DNN outputs 6 roughly zero-mean/unit-variance predicted camera extrinsic parameters. I chose to evaluate loss <em>relative to these zero-mean/unit-variance predictions</em>, prior to the output StandardScaler; the rationale being that I wanted each extrsinsic parameter to have equal weighting in the MSE loss computation, and not be biased by the mean/variance of the particular parameter. Thus, I use the output StandardScaler in <em>inverse</em> mode to normalize the target values to zero-mean/unit-variance. MSE loss is then computed between the DNN output logits, and these normalized target values.</p> <p>A side-effect of computing MSE against normalized values, is that it is effectively a relative measure: MSE tells me how large the variance in the error between predictions and target is, relative to the unit-variance of the normalized target values. Thus I expect that an MSE much less than one is a good heuristic for the quality of the estimate.</p> <h1 id="training-results">Training results</h1> <p>The plot below shows that the DNN architecture was able to converge on low-MSE predictions of the extrinsic camera matrix:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot-1400.webp"/> <img src="/staging/assets/img/2023-11-09-dof-visual-place-recognition-satellite/loss_plot.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Train and test MSE loss between the predicted and actual extrinsic camera matrix. </div> <p>Note that the train and test curves overlap almost perfectly; this is because all datapoints generated by the dataloader are random, so in fact the model is constantly being trained on fresh data, and the resampling is really unnecessary.</p> <p>Since the final MSE is relatively small (0.020), and since (as described in the previous section) the MSE is effectively a relative measure of error, I believe the DNN is learning a relatively good estimate of camera extrinsics.</p> <h1 id="conclusion">Conclusion</h1> <p>Based on the low MSE attained during training, I believe I successfully trained a DNN to roughly estimate camera extrinsics from orientation-dependent camera views.</p> <p>There are many improvements which would be necessary in order to deploy this in production.</p> <p>For example, it would be better to use more detailed satellite imagery, preferably with stereoscopic views that effectively provide 3D information. Without having 3D information about the scene, it is hard to train the model to recognize how the scene will look from different angles. In my work, I used a 2D image and essentially assumed that the height of the geographic features in the image was negligible, such that I could approximate the 3D point-cloud as lying within a 2D plane. With stereoscopic satellite data, it could be possible to construct a truly 3D point-cloud, on which basis I could synthesize more accurate camera views during the training process.</p> <p>Also, as discussed in the Implementation section, it would likely be necessary to implement interpolation between the pixels when generating simulated camera views. Otherwise, the camera views during training would look nothing like what the camera would see in the real world.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-dof-visual-place-recognition-satellite.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/double_descent/index.html b/blog/2023/double_descent/index.html
new file mode 100644
index 00000000..452c5b39
--- /dev/null
+++ b/blog/2023/double_descent/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Ensemble Learning for Mitigating Double Descent | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Exploring when and why Double Descent occurs, and how to mitigate it through Ensemble Learning."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/double_descent/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Ensemble Learning for Mitigating Double Descent",
+      "description": "Exploring when and why Double Descent occurs, and how to mitigate it through Ensemble Learning.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Mohit Dighamber",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Andrei Marginean",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Ensemble Learning for Mitigating Double Descent</h1> <p>Exploring when and why Double Descent occurs, and how to mitigate it through Ensemble Learning.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#abstract">Abstract</a></div> <div><a href="#motivation">Motivation</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#setup">Setup</a></div> <div><a href="#models">Models</a></div> <ul> <li><a href="#decision-trees">Decision Trees</a></li> <li><a href="#adaboost-tree">AdaBoost Tree</a></li> <li><a href="#l2-boost-tree">L2-Boost Tree</a></li> <li><a href="#random-forest">Random Forest</a></li> <li><a href="#logistic-regression">Logistic Regression</a></li> <li><a href="#neural-networks">Neural Networks</a></li> </ul><div><a href="#ensemble-learning">Ensemble Learning</a></div> <ul> <li><a href="#weak-learner-ensemble">Weak-Learner Ensemble</a></li> <li><a href="#multi-layer-perceptron-ensemble">Multi-Layer Perceptron Ensemble</a></li> </ul><div><a href="#results-and-discussion">Results and Discussion</a></div> <div><a href="#conclusion">Conclusion</a></div> <div><a href="#future-work">Future Work</a></div> <div><a href="#reproducibility-statement">Reproducibility Statement</a></div> </nav> </d-contents> <h2 id="abstract">Abstract</h2> <p>We outline the fundamental ‘bias-variance tradeoff’ concept in machine learning, as well as how the double descent phenomenon counterintuitively bucks this trend for models with levels of parameterization at or beyond the number of data points in a training set. We present a novel investigation of the mitigation of the double descent phenomenon by coupling overparameterized neural networks with each other as well as various weak learners. Our findings demonstrate that coupling neural models results in decreased loss during the variance-induced jump in loss before the interpolation threshold, as well as a considerable improvement in model performance well past this threshold. Machine learning practitioners may also find useful the additional dimension of parallelization allowed through ensemble training when invoking double descent.</p> <hr/> <h2 id="motivation">Motivation</h2> <p>There are many important considerations that machine learning scientists and engineers must consider when developing a model. How long should I train a model for? What features and data should I focus on? What exactly is an appropriate model size? This last question is a particularly interesting one, as there is a bit of contention regarding the correct answer between different schools of thought. A classical statistician may argue that, at a certain point, larger models begin to hurt our ability to generalize. By adding more and more parameters, we may end up overfitting to the training data, resulting in a model that poorly generalizes on new samples. On the other hand, a modern machine learning scientist may contest that a bigger model is always better. If the true function relating an input and output is conveyed by a simple function, in reality, neither of these ideas are completely correct in practice, and empirical findings demonstrate some combination of these philosophies. This brings us to the concept known as <em>double descent</em>. Double descent is the phenomenon where, as a model’s size is increased, test loss increases after reaching a minimum, then eventually decreases again, potentially to a new global minimum. This often happens in the region where training loss becomes zero (or whatever the ’perfect’ loss score may be), which can be interpreted as the model ’memorizing’ the training data given to it. Miraculously, however, the model is not only memorizing the training data, but learning to generalize as well, as is indicated by the decreasing test loss.</p> <p>The question of ’how big should my model be?’ is key to the studies of machine learning practitioners. While many over-parameterized models can achieve lower test losses than the initial test loss minimum, it is fair to ask if the additional time, computing resources, and electricity used make the additional performance worth it. To study this question in a novel way, we propose incorporating <em>ensemble learning</em>.</p> <p>Ensemble learning is the practice of using several machine learning models in conjunction to potentially achieve even greater accuracy on test datasets than any of the individual models. Ensemble learning is quite popular for classification tasks due to this reduced error empirically found on many datasets. To our knowledge, there is not much literature on how double descent is affected by ensemble learning versus how the phenomenon arises for any individual model.</p> <p>We are effectively studying two different <em>types</em> of model complexity: one that incorporates higher levels of parameterization for an individual model, and one that uses several models in conjunction with each other. We demonstrate how ensemble learning affects the onset of the double descent phenomenon. By creating an ensemble that includes (or is fully comprised of) overparameterized neural networks, which can take extreme amounts of time and resources to generate, with overparameterized machine learning models, we will show the changes in the loss curve, specifically noting the changes in the regions where double descent is invoked. We hope that the results we have found can potentially be used by machine learning researchers and engineers to build more effective models.</p> <hr/> <h2 id="related-work">Related Work</h2> <p>One of the first papers discussing double descent was ’Reconciling modern machine- learning practice and the classical bias–variance trade-off’ by Belkin et al. <d-cite key="belkin2019reconciling"></d-cite>. This paper challenged the traditional idea of the ’bias-variance tradeoff’, a fundamental concept in machine learning that describes the tension between two types of model error: bias and variance. Bias is the error between the expected prediction of the model and the true output value, introduced by approximating a real-world quantity with a model, which may overisimplify the true problem at hand. Variance refers to the error due to a model’s sensitivity to small fluctuations in the training dataset. Overfitted models may have high variance, as they may model random noise in the data as well.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/biasvariance-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/biasvariance-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/biasvariance-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/biasvariance.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="The Double Descent Curve" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The Bias-Variance Tradeoff <d-cite key="cornell"></d-cite> </div> <p>In short, classical statistical learning argues that there is some optimal level of parameterization of a model, where it is neither underparameterized nor overparameterized, that minimizes the total error between bias and variance. However, Belkin’s paper finds that, empirically, the tension between bias and variance no longer becomes a tradeoff after a certain level of overparamaterization. They showed that after the interpolation threshold (beyond where the model fits perfectly to the training data), test error eventually began to decrease again, even going below the error deemed optimal by the bias-variance minimum.</p> <p>Nakkiran et al.’s ’Deep Double Descent: Where Bigger Models and More Data Hurt’ <d-cite key="nakkiran2021deep"></d-cite> expanded these findings to the realm of <em>deep</em> learning. In this work, double descent is shown to occur for both large models and large datasets. Additionally, this paper demonstrates that, counterintuitively, adding more data at a certain point actually worsened the performance of sufficiently large models. Specifically, this occurred at and close to the interpolation threshold for neural models. This paper’s results can be seen here:</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/openai-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/openai-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/openai-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/openai.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="The Double Descent Curve" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The Double Descent Curve <d-cite key="nakkiran2021deep"></d-cite> </div> <p>For the region between the first and second loss minima, model performance can suffer greatly, despite the increased computational time and resources used to generate such models. While this region of the test loss curve is typically not a level of parameterization that one would use in practice, understanding such loss curve behavior can help practitioners for several reasons. For one, this degraded phase of performance can be crucial for tweaking model architecture and adjusting training strategies. This is key to discovering if one’s model is robust and adaptable to various other datasets and tasks. This highlights the need for a new understanding for model selection in order to effectively generalize to testing datasets better, mitigating decreases in model performance and invoking a second loss minimum quickly.</p> <p>In the classic paper ’Bagging Predictors’, Breiman describes the concept of combining the decisions of multiple models to improve classification ability <d-cite key="breiman1996bagging"></d-cite>. Empirically, this bootstrap aggregating, or ’bagging’ technique, reduced variance and improved accuracy, outperforming the single predictors that comprised the ensemble model. We present a novel combination of the findings of this paper with the double descent phenomenon. Effectively, by increasing model complexity via overparameterization and ensemble learning, we aim to study if this combination can mitigate loss increases and invoke a second loss minimum with smaller models.</p> <hr/> <h2 id="setup">Setup</h2> <h3 id="computing-resources-and-software">Computing Resources and Software</h3> <p>We have implemented this project using CUDA and the free version of Google Colab, with additional computing units for more costly experiments. To train and test these models, we use various machine learning packages in Python, namely Scikit-learn, PyTorch and Tensorflow. Additional software commonly used for machine learning projects, such as numpy, tensorboard and matplotlib, was also utilized.</p> <p>All plots have been produced by us, unless otherwise specified. Note that all tensorboard plots have $0.25$ smoothing applied, except for the Soft-Voting Ensemble, which has $0.6$ smoothing applied (though this won’t make much of a difference as will soon be seen). The non-smoothed plot can be seen traced in light-blue in all provided plots.</p> <h3 id="data">Data</h3> <p>We use the MNIST dataset for this report <d-cite key="deng2012mnist"></d-cite>. MNIST is a popular dataset used for image classification, where each sample image is a $28$ by $28$ grayscale image of a written integer between $0$ and $9$, inclusive. Each image comes with the true label of the image’s integer. This data is publicly available for experimentation, and our use of it does not pose any ethical or copyright concerns.</p> <p>For this project, we use the MNIST dataset to unearth the double descent phenomenon. We experiment with a variety of models, as well as an ensemble of them: decision trees, AdaBoost trees, L2-Boost trees, random forests, logistic regression, and small neural networks. We choose these models because of their ability to be used for classification tasks, and more complicated models run the risk of exceeding Google Colab’s limitations, especially when we overparameterize these models to invoke double descent.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mnist-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mnist-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mnist-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mnist.jpeg" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MNIST Sample Data" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Sample MNIST Data <d-cite key="deng2012mnist"></d-cite> </div> <hr/> <h2 id="models">Models</h2> <h3 id="decision-trees">Decision Trees</h3> <p>Decision trees are a machine learning model used for classification tasks. This model resembles a tree, splitting the data at branches, culminating in a prediction at the leaves of the tree.</p> <p>To invoke overparameterization for decision trees, we can start with a tree of depth 2, and increase the number of maximum leaves of the model until the loss plateaus. Then, keeping this new number of max leaves in our decision tree, we continually increase the maximum depth of the tree until the loss once again stops decreasing. Lastly, keep both the maximum leaves and depth at their plateau levels while increasing the max features. The results of this are plotted below. Notice how varying the number of maximum leaves has minimal effect on the loss, and how increasing the maximum depth causes the most dramatic decrease. However, fluctuations on the maximum depth at this point do not have a major effect, whereas varying the number of features causes another slight, yet consistent, fall in classification loss.</p> <p>Notice that the loss curve is more or less linear in the number of parameters (with some having much more effect than others), and so there is little evidence of double descent for this model.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Decision Tree Overparameterization </div> <h3 id="adaboost-tree">AdaBoost Tree</h3> <p>Adaptive Boosting (AdaBoost) itself is an ensemble model used for robust classification. Freund et al.’s paper ‘A Decision-Theoretic Generalization of On-Line Learning and an Application to Boosting’ first introduced the algorithm <d-cite key="freund1997decision"></d-cite>. On a high level, this paper describes how boosting is especially effective when sequentially combining weak learners that are moderately inaccurate (in this case, these are decision trees) to create a strong learner. We study the loss curve of the AdaBoost model as we first increase the number of boosting trees which form a forest, then increase the number of forests across which we average results, after adding additional trees fails to significantly increase model performance. Each tree is constrained to have no more than 10 leaves.</p> <p>Notice that the loss curve is more or less linear in the number of parameters, and the double-U shape doesn’t seem to make its presence known.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> AdaBoost Overparameterization </div> <h3 id="l2-boost-tree">L2-Boost Tree</h3> <p>L2 Boosting is quite similar to the AdaBoost model, except for L2 Boosting, as models are built sequentially, each new model in the boosting algorithm aims to minimize the L2 loss<d-cite key="article"></d-cite><d-cite key="10.1214/aos/1013203451"></d-cite>. Like before, we first increase the number of boosting trees which form a forest in the L2-Boost model, then the number of forests across which we average using majority voting. The results can be seen below. Each tree is constrained to have no more than 10 leaves.</p> <p>Notice how the classification loss begins to fall, then rises up again, then falls once more when we average across more forests to lower minimums than before. This result was consistent across multiple runs of this experiment, suggesting that double descent is real for L2-Boosted Tree Ensembles.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> L2-Boost Overparameterization </div> <p>The behavior of the loss once we add more models agrees with general intuition regarding ensembling, but the appearance of double descent as we increase the total number of parameters is still quite interesting to see. L2-Boost is a relatively inexpensive model and ensembling a large number of trees is still quite fast, suggesting that overparameterization could be the way to go in this case.</p> <h3 id="random-forest">Random Forest</h3> <p>Random Forest is another popular ensemble model. As the name implies, it is a collection of decision trees with randomly selected features, and, like the singular decision tree, this model is used for classification tasks.</p> <p>We initialize random forest with a small number of maximum leaves allowed in each tree, and increase the max leaves until we see the loss plateau as we continually add more. After this, we begin increasing the number of trees in our forest until the loss plateaus once again.</p> <p>While Belkin et al. lists random forest as a model exhibiting double descent, this claim has been recently disputed, namely by Buschjager et al, which suggests that there is no true double descent with the random forest model <d-cite key="randomforest"></d-cite>. Instead, they suggest that random forest does not overfit in the classical sense, and argue that its curve instead has a single descent. By looking at our results, there is little evidence for the double-U curve, but we will refrain from drawing a decisive conclusion.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Random Forest Overparameterization </div> <p>Despite this, for our ensemble model, we aim to see if the addition of this overparameterized learner to the neural network’s decision making is able to improve ensemble performance.</p> <h3 id="logistic-regression">Logistic Regression</h3> <p>Logistic regression is a classic model used for estimating the probability a sample belongs to various classes. We induce overfitting in logistic regression through two methods.</p> <p>First, we continually increase the ‘C’ parameter, indicating the inverse strength of regularization applied to the regression, as shown below. Notice that the loss decreases to a minimum before it starts slowly rising again, indicating that overfitting through fluctuations in ‘C’ may not actually lead to double descent, as would be expected from classical theory.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Logistic Regression Overparameterization (Regularization Based) </div> <p>Second, we try inducing double descent by varying the ratio of the number of features over the amount of data. We gradually reduce this ratio using the intuition developed by Deng et al. in order to induce overfitting <d-cite key="logistic"></d-cite>, since the data becomes more and more separable as the number of features increases relative to the number of data samples.</p> <p>To do this, we test varying across the number of training samples instead of varying the number of features used for training. This eventually leads to 0 training error, but causes testing error to blow up, suggesting that some significant amount of training data is still needed to witness the desired behavior, consistent with both statistical and machine learning theory.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Logistic Regression Overparameterization (Feature-Data Ratio Based) </div> <p>An interesting setup for future experiments would be simultaneously increasing the amount of training samples and the number of polynomial features given to the logistic regression, while increasing the feature-data ratio each time we reparameterize or redefine the dataset.</p> <h3 id="neural-networks">Neural Networks</h3> <p>We use a Multilayer Perceptron as our main model for the ensemble. Our deep learning model is a relatively small one, with variable width in the hidden layer. By increasing this width, we eventually achieve perfect training loss.</p> <p>We define the general architecture of the neural network used in this report as follows:</p> <h4 id="network-layer">Network Layer</h4> <p>Let the input data be an $m$ by $m$ pixel image from the MNIST dataset, which can be processed as an $m$ by $m$ matrix, where entry $(i,j)$ is an integer between $0$ and $255$ (inclusive) representing the grayscale color of the pixel. Note that $m=28$ for MNIST, though for generality, we use $ m $ in this network definition. A value of $0$ represents a black pixel, $255$ is a white pixel, and values between these are varying shades of gray. We first flatten this structure into a $d = m^2 $ by 1 vector, such that the entry $ (i,j) $ of the matrix becomes the $ j + 28*i$-th entry of the vector, using zero-indexing. We use this vector as the input of our neural network.</p> <p>Set $H$ as the hidden layer width, which in our project will be varied in different tests. Let $ W^1 $ be an $ d \times H$ matrix, where $ W^1_{ij}$ is the weight of input $i$ applied to node $j$, and let $W^1_0$ be an $H \times 1$ column vector representing the biases added to the weighted input. For an input $X$, we define the <em>pre-activation</em> to be an $H \times 1$ vector represented by $Z = {W^1}^T X + W^1_0$.</p> <p>We then pass this linearly transformed vector to the ReLU activation function, defined such that</p> \[\begin{equation*} \text{ReLU}(x)=\begin{cases} x \quad &amp;\text{if} \, x &gt; 0 \\ 0 \quad &amp;\text{if} \, x \leq 0 \\ \end{cases} \end{equation*}\] <p>We use this choice of activation function due to the well-known theorem of universal approximation. This theorem states that a feedforward network with at least one single hidden layer containing a finite number of neurons can approximate continuous functions on compact subsets of $ \mathbb{R}^{m^2} $ if the ReLU activation function is used <d-cite key="hornik1991approximation"></d-cite>. Applying an activation function ReLU to each element of $Z $, the layer finally outputs</p> \[A = \text{ReLU}(Z) = \text{ReLU}(W^T X + W_0)\] <p>Next, we will input $A$ into a second hidden layer of the neural network. Let $K$ be the number of classes that the data can possibly belong to. Again, $K = 10$ for MNIST, though we will use $K$ for generality. Then let $W^2$ be an $H$ by $K$ matrix, where $W^2_{ij}$ is the weight of input $i$ applied to node $j$, and let $W^2_0$ be a $K \times 1$ column vector representing the biases added to the weighted input. For input $A$, define a second pre-activation to be a $K \times 1$ vector represented by $B = {W^2}^T A + W^2_0$.</p> <p>This will yield a $K \times 1$ vector representing the logits of the input image, with which we’ll be able to take Cross Entropy Loss or compute its probability of belonging to any of the $K$ classes.</p> <h4 id="training">Training</h4> <p>Let class $i $ be the true classification for a data point. We have that $y_i = 1$, and for all $j \neq i$, $y_j = 0$. Furthermore, let $\hat{y_i}$ be the generated probability that the sample belongs to class $i$. The categorical cross-entropy loss is then defined as follows:</p> \[\mathcal{L}_{CCE} (y_i, \hat{y_i}) = - \sum_{i=0}^{9} y_i \log (\hat{y_i})\] <p>From this computed loss, we use backpropagation and stochastic gradient descent (SGD) with learning rate $\eta = 0.1$ and $momentum = 0.95$ to optimize model weights. We run experiments on a dataset with $n = 4000$ subsamples that train over $100$, $500$, and $2000$ epochs using Belkin et al.’s approach to training <d-cite key="belkin2019reconciling"></d-cite>. Up to interpolation, we train until we reach zero classification error on the training data, or until we have finished all of the epochs, whichever comes first. After we have reached the interpolation thereshold, we train until we have gone through all of the epochs. Note that to get their results, Belkin et al. trained over $6000$ epochs, which proved to be prohibitively expensive given our resources. Instead, we chose to train over a variety of smaller maximum epoch sizes to illustrate the double descent curve taking clearer shape, with the $2000$ epoch run being the most indicative of this phenomena. Below are the results of the trained and tested neural networks. Notice that interpolation consistently happens when the number of parameters is roughly equal to $n\times K$ (i.e. Parameter Count / 1000 $= 40$), and the test loss starts consistently getting lower and lower as we add more and more parameters beyond this threshold. Double descent is real!</p> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 100 Epoch Training" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 100 Epoch Testing" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP 100 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 500 Epoch Training" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 500 Epoch Testing" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP 500 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 2000 Epoch Training" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-15 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 2000 Epoch Testing" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP 2000 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <p>For the sake of brevity, we avoid including plots for train/test classification loss for the MLPs. However, it is worth noting that train classification loss eventually reaches 0 in all experiments, whereas test loss eventually becomes $\sim$ 0.08 or smaller.</p> <p>Throughout each experiment, we vary across the number of total parameters of the model. For a network with $H$ hidden units, the total number of parameters is equal to $(d+1)\times H + (H + 1)\times K$, and so we choose $H$ accordingly each time we reparametrize.</p> <p>Note that we also incorporated a weight reuse scheme for models in the underparametrized regime to cut on training time, similarly to the approach in Belkin et al <d-cite key="belkin2019reconciling"></d-cite>.</p> <p>Additionally, even though the individual MLPs are small, training several of them sequentially for a relatively large number of epochs can take a very long time. To help reduce the time it takes to complete experiments, we also try adapting the Parameter Count Generation Algorithm provided in John Abascal’s blog <d-cite key="testbed"></d-cite> to intelligently select the next level of parameterization given the performance of the previous one. This algorithm was designed to most clearly showcase the existence of the double descent curve by fitting a third degree polynomial (since that is roughly what we expect the double descent curve to look like) to the model capacity vs. test loss graph, and choosing the next parameter by examining the first derivative of the polynomial fit to the data. More detail is provided in his blog.</p> <p>This algorithm proved helpful for empirically confirming the existence and validity of the interpolation threshold. However, after a few tests with the algorithm, we chose to complete most of the experiments using a pre-specified list of parameters which were able to consistently capture the double descent phenomena in detail.</p> <hr/> <h2 id="ensemble-learning">Ensemble Learning</h2> <p>We experimented with two different types of ensembles. The first ensemble is what we call the ‘weak-learner’ ensemble, which is the model that incorporates the multi-layer perceptron supported by L2-Boost tree ensembles, random forests, decision trees and logistic regression. Note that we ultimately did not use AdaBoost in this ensemble because we believed this was too similar to the included L2-Boost model in both architecture and performance.</p> <p>The second ensemble is the ‘multi-layer perceptron’ ensemble, which includes 5 MLPs.</p> <h3 id="weak-learner-ensemble">Weak-Learner Ensemble</h3> <p>We use bootstrap aggregating, or ‘bagging’, to formulate our ensemble of these five models . Effectively, each model is given a certain number of ‘votes’ on what that model believes is the correct classification for any given MNIST sample image. We then experimented with two approaches to voting: hard voting and soft voting.</p> <p>In hard voting, the classification with the most total votes is then used as the ensemble’s overall output. In the event of a tie, the neural network’s prediction is chosen. Using this voting scheme, we train the MLP independently of the other models in the ensemble, using the same scheme as described previously.</p> <p>In soft voting, the weighted average of the predicted class probabilities of each model is used as the predicted class probabilities of the ensemble. We utilize this prediction when training the MLP, and use negative log likelihood loss instead of cross entropy loss, since taking the softmax of probabilities is not necessary. This way, we can incorporate the predictions of the whole ensemble into the training of the MLP. Since the ensemble now outputs a vector of class probabilities, the one with the highest probability will be used as the soft voting ensemble’s prediction.</p> <p>Since we want a neural model to be the basis of our ensemble, we vary the number of votes assigned to the neural network while keeping the number of votes for other models fixed to 1. With four supplementary models in addition to the neural network, giving the neural network 4 or more votes is not necessary, since this ensemble would always output the same results as the neural network. Because of this, we study the loss curve when giving the neural network 1, 2, and 3 votes. Note that decimal value votes for the neural network are not sensible (at least in the hard-voting scheme), since it can be proved that all potential voting scenarios are encapsulated into the three voting levels we have chosen.</p> <p>Another important aspect of our ensemble is that the ‘weak’ classifiers do not vary in parameterization; only the MLP does. Refitting all the weak classifiers across epochs and MLP parameterizations took much longer than expected, perhaps due to incompatibilities between sklearn and GPUs, and completing the experiments using this approach was unfortunately unfeasible. Hence, all ‘weak’ classifiers have fixed architectures, chosen such that each one has low test error but is not at the highest level of parameterization according to the previous discussion, and only the MLP varies.</p> <h3 id="multi-layer-perceptron-ensemble">Multi-Layer Perceptron Ensemble</h3> <p>The Multi-Layer Perceptron Ensemble uses 5 identically initialized MLPs which are trained in parallel using Pytorch’s autovectorization capabilities. Since they are defined in the same way and trained simultaneously using the MLP training scheme discussed above, each receives equal weight when it comes to taking an averaged prediction. However, unlike the bagging method used for the Weak-Learner Ensemble, we take advantage of the identical architectures of the models and the numerical stability provided by this, and generate ensemble predictions by averaging the logits of all five learners and using those values as the logits of the ensemble. Again, we experiment using 100 and 500 epochs to see how the behavior evolves across increasing number of epochs, but we omit training over 2000 epochs due to excessive computational costs. An experiment for the future would be training over a very large number of epochs for even greater ensemble sizes to see how results vary across time.</p> <p>There has been discussion in the past of whether to average the raw logits or the softmax-transformed probabilities. The main concern raised over averaging across raw logits is that the outputted values can vary greatly in magnitude across models (and therefore overconfident models can potentially overshadow all other models when taking the prediction), but, empirically, that doesn’t seem to be a problem here. Tassi et al. provide some intuition in “The Impact of Averaging Logits Over Probabilities on Ensembles of Neural Networks” <d-cite key="Tassi2022TheIO"></d-cite>, suggesting that different approaches to ensembling should be taken depending on the architecture and levels of confidence of the models. For general safety, they recommend averaging the probabilities, but for the purposes of our task, averaging the logits suffices.</p> <hr/> <h2 id="results-and-discussion">Results and Discussion</h2> <p>Contrary to our expectations, the Weak Learner Ensemble performs much worse than even the individual models on MNIST classification. Although our focus is on double descent and not on the strong predictive power of ensembles, the latter is needed to observe the former, or at least discuss it at an interesting level.</p> <p>Initially, we tried applying the soft-voting scheme for the Weak Learner Ensemble, but the reported results are unexpectedly poor, yielding very high classification loss, especially when compared to the results of each model taken individually. This may be because each ‘weak’ learner has high confidence in its predicted class, whereas the MLP may be more evenly split between different classes, which would result in the weak classifiers winning more often, even if the MLP has higher weight in the prediction. The plot of the negative log likelihood loss for both training and testing is also hard to understand, but it is clear the ensemble has a very hard time improving, even as more parameters are added. We only include the results for the ensemble that allocates 3 votes to the MLP, but note that these are the best loss curves we were able to produce given this method.</p> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Soft-Voting Weak-Learner Ensemble 100 Epoch Training/Testing Negative Log Likelihood Loss Over MLP Parameter Count / 1000. MLP given 3 votes. </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Soft-Voting Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 3 votes. </div> <p>We then tried the weak-learner approach again with hard-voting, and let the MLP independently train using the unmodified MLP training scheme mentioned previously. However, as opposed to halting training when MLP classifications loss first hits 0, we only halt training when <em>ensemble</em> classification first hits 0.</p> <p>We found that while classification loss had certainly gone down when compared to the soft-voting scheme (with even just <em>one</em> vote!), the ensemble still severely underperformed when compared to each of the individual models used. As seen in the plots, the classification loss starts to improve once the MLP gets more and more votes, agreeing with intuition that, eventually, the MLP has the veto right. As opposed to the soft-voting scheme, all classifiers now have a contribution that is proportional to their voting weight, which mitigates the previous problem of some models having much higher confidence than others. However, we believe the poor results can be attributed to the models we used for ensembling. Indeed, a significant number of models are regular, boosted or ensembled (or all) versions of decision trees, which means there is a significant chance that they make similar mistakes on similar data points. Looking at the plots for overparameterized decision trees and L2-Boost ensembles, we see that train error never quite reaches 0 for any of them. Since the train loss seems to pleateau for our models as well, this may prove why. In the cases of 1 or 2 votes, this can lead to consistently poor predictions, especially since the models are not reparameterized across the experiment. For 3 votes, this phenomenon is less significant, as the ensemble slowly begins to reach the testing performance of the individual models.</p> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 1 vote. </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 2 votes. </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 3 votes. </div> <p>Further work could be done on the Weak-Learner Ensemble, focusing on better model selection and concurrent reparameterization across all models. Given the limited time and compute resources at our disposal, we leave this problem open for now.</p> <p>All hope is not lost, however. Seeing the poor performance of the Weak-Learner Ensemble given the significantly better performance of individual models, one could be discouraged from attempting to use ensembling to mitigate double descent, since it may not even be observable in such settings. However, we saw double descent in L2-Boost ensembles and, arguably, in random forests, and so we pushed onward. All other ensemble methods used multiple copies of the same model, and so we decided to experiment with a small ensemble of MLPs, to see how they would behave.</p> <p>This was feasible for 100 and 500 epochs only, but the obtained results shed light on how ensembling could in fact mitigate double descent. The phenomenon is not quite as observable in the 100 epoch case (one explanation could be that the train loss has not converged yet), but it becomes quite clear when looking at the 500 epoch ensemble and comparing it with the original 500 epoch MLP. Double descent is still very easy to see, ocuring at the same threshold as before. This makes sense, since the MLPs have all reached interpolation, which should increase test loss for all, and then start going down as we overparametrize more and more. However, the main result is that the increase once we reach interpolation is <em>much</em> lower than before. Indeed, the ensemble sees a jump from $\sim$ 0.35 to around $\sim$ 0.4 at the highest, whereas the individual MLP sees a jump from $\sim$ 0.36 to around $\sim$ 0.52. Another important result is that the loss as we overparameterize becomes <em>significantly</em> lower in the ensemble model than in the individual MLP.</p> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP Ensemble 100 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP Ensemble 500 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <p>While we weren’t able to fully get rid of the double descent curve by ensembling multiple MLPs, the fact that it became flatter and the loss past the interpolation threshold started to become smaller is quite exciting, as it suggests that, potentially, large ensembles of MLPs may not noticeably suffer from double descent at all, and yield better overall predictions than individual models can. One notable advantage to this ensemble method is the ability to further parallelize one’s training of overparameterized neural networks. These models can take extreme lengths of time to train, and besides increasing the computational allocation used, practitioners may use data, model, or processor parallelism in order to reduce this time. The ensemble neural networks we use are independently generated, meaning that they can be vectorized or trained on different GPU cores without issue. This could be a valid alternative to training for more epochs for reducing model error past the interpolation threshold. More work investigating the effect of neural network ensembling on double descent, especially on models trained over many epochs, would be very exciting and potentially shed even more light on the possible advantages of overparameterization.</p> <hr/> <h2 id="conclusion">Conclusion</h2> <p>We discussed the existence of double descent for some simple and classical models, observing the effects of varying across levels of parameterization and noting that single descent can sometimes be mistaken for double descent, and proposed the use of various ensembles to mitigate the effects of double descent.</p> <p>Ensembles consisting solely of neural networks resulted in a considerable boost in performance past the individual model interpolation threshold, and in a flatter curve when compared to individual models. However, pairing the neural network with weak learners in an ensemble voting system decreased testing performance, though this adverse effect waned as the neural network received proportionally more votes. Machine learning engineers that intend to intentionally overparameterize their models may take advantage of not only the ensemble approach’s increased performance and significantly more reliable results, but the enhanced parallelization and vectorization capabilities offered by the proposed method.</p> <hr/> <h2 id="future-work">Future Work</h2> <p>This project was implemented using Google Colab, which proved to be restrictive for adopting more complex models. A key part of the double descent phenomenon is overparameterization, which happens across multiple full training loops, and so complex models that are additionally overparameterized will require more powerful computing resources beyond what we used. For example, a model which takes 10 hours to complete a single training loop will take multiple days to train before being able to plot results and observe double descent. Even for models that take around 10 to 15 minutes to train, such as the 500 epoch MLP we explored throughout our project, a full experiment that showcases the double descent curve in detail can take upwards of 5 hours. Furthermore, additional computing power can allow for this project to be expanded to more complicated datasets and tasks. MNIST classification is computationally inexpensive, though invoking double descent in more complex tasks such as text generation in natural language processing was not feasible using Google Colab. Future projects that follow this work should keep computational limitations in mind when choosing models and datasets.</p> <p>In addition to the future work suggested throughout our project, we propose a final approach that we believe is worth exploring further. During the planning process of this project, we discussed using a more rigorous voting system than what is traditionally found in ensemble model projects. Effectively, each model would have a weight associated with how much influence its output should have on the overall ensemble output. For $n$ models, each model could start with, say, a weight of $1/n$. Then, after producing each model’s vector output, the categorical cross-entropy loss with respect the true output could be computed, and the weights of each model could be updated such that each model has its weight decreased by some amount proportional to the calculated loss. Then, these weights could be normalized using the softmax function. This would be repeated for each level of parameterization. Due to resource constraints and the limitations of sklearn to the CPU, learning both the model weights and ensemble weights at each level of ensemble parameterization was not feasible given the size of the models we built and the classifiers we chose to use, as well as the number of epochs we trained over. Future studies may wish to implement this method, however, to produce a more robust ensemble for classification.</p> <hr/> <h2 id="reproducibility-statement">Reproducibility Statement</h2> <p>To ensure reproducibility, we have included the codebase used for this project, as well as the above description of our data, models, and methods <d-cite key="colab"></d-cite>. Note that the Colab notebook that we have worked in is currently very messy and sometimes incomplete due to faults in Google’s autosaving feature, but we plan to clean it up and have it available for easy future experimentation.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-double_descent.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/elephantfish-model/index.html b/blog/2023/elephantfish-model/index.html
new file mode 100644
index 00000000..00e589ba
--- /dev/null
+++ b/blog/2023/elephantfish-model/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Modeling Elephantfish Communication through Deep RNNs | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Elephantfish represent a fascinating subject for study within the realms of bioacoustics and animal communication due to their unique use of electric fields for sensing and interaction. This project proposes the development of a deep learning framework to model the electrical communication signals of elephantfish, akin to language models used in natural language processing (NLP)."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/elephantfish-model/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Modeling Elephantfish Communication through Deep RNNs",
+      "description": "Elephantfish represent a fascinating subject for study within the realms of bioacoustics and animal communication due to their unique use of electric fields for sensing and interaction. This project proposes the development of a deep learning framework to model the electrical communication signals of elephantfish, akin to language models used in natural language processing (NLP).",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Bright Liu",
+          "authorURL": "https://www.linkedin.com/in/bright-liu-701174216/",
+          "affiliations": [
+            {
+              "name": "Harvard",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Anthony Rodriguez-Miranda",
+          "authorURL": "https://www.linkedin.com/in/anthony-rodriguez-miranda-2a35491b6/",
+          "affiliations": [
+            {
+              "name": "Harvard",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Modeling Elephantfish Communication through Deep RNNs</h1> <p>Elephantfish represent a fascinating subject for study within the realms of bioacoustics and animal communication due to their unique use of electric fields for sensing and interaction. This project proposes the development of a deep learning framework to model the electrical communication signals of elephantfish, akin to language models used in natural language processing (NLP).</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#abstract">Abstract</a></div> <div><a href="#introduction-and-objectives">Introduction and Objectives</a></div> <div><a href="#literature-review">Literature Review</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#experiments-and-results">Experiments and Results</a></div> <div><a href="#discussion-and-conclusions">Discussion and Conclusions</a></div> <div><a href="#challenges-and-future-directions">Challenges and Future Directions</a></div> </nav> </d-contents> <h2 id="abstract">Abstract</h2> <p>Elephantfish, known for their unique use of electric fields for sensing and interaction, present a fascinating study subject within the realms of bioacoustics and animal communication. This project, pioneering the use of deep learning, specifically Recurrent Neural Networks (RNNs), aims to model and interpret these electrical communication signals. By combining insights from bioacoustics, linguistics, and computer science, we seek to decode these bioelectrical signals into a human-comprehensible format, thereby expanding our understanding of animal cognition and communication. The overarching goal is to decode and understand the complexity of elephantfish communication and to explore the broader applications in sociolinguistics, pragmatics, and computational linguistics for non-human species. This project pioneers in utilizing deep learning, specifically Recurrent Neural Networks (RNNs), to model and interpret the electrical communication signals of elephantfish. The study’s novelty lies in its interdisciplinary approach, combining insights from bioacoustics, linguistics, and computer science to unravel the complexities of non-human communication systems. Our goal is to translate these unique bioelectrical signals into a form comprehensible to humans, thereby expanding our understanding of animal cognition and communication.</p> <h2 id="introduction-and-objectives">Introduction and Objectives</h2> <p>The elephantfish, a species renowned for its unique electric-based communication and the largest brain-to-body weight ratio of all known vertebrates, offers a fascinating window into the study of non-human communication systems. These creatures, inhabiting the murky waters of African rivers and lakes, have developed a sophisticated method of communication that relies on generating and sensing electric fields. This remarkable ability not only sets them apart in the aquatic world but also poses intriguing questions about the nature and complexity of their interactions. The study of elephantfish communication is not just a pursuit in understanding an exotic species; it reflects a broader scientific curiosity about the principles of communication and social behavior across different life forms.</p> <p>The primary objective of this project is to develop a deep understanding of elephantfish communication through the application of advanced neural language models, specifically focusing on Recurrent Neural Networks (RNNs). This approach is inspired by the parallels drawn between the electric signals used by elephantfish and the structural aspects of human language. By leveraging techniques commonly used in natural language processing (NLP), we aim to decode these bioelectrical signals and translate them into a format that can be understood by humans. This endeavor is not only about interpreting the ‘language’ of a non-human species; it is about enriching our understanding of communication as a fundamental biological and social function.</p> <p>To capture the complexity of elephantfish communication, we have collaborated with labs at MIT and Columbia, gaining access to a comprehensive dataset of elephantfish electric communication signals. This dataset includes a wide range of signals recorded under various environmental and social conditions, providing a rich source of data for analysis.</p> <p>Utilizing the latest advancements in deep learning, we will develop and train neural language models that can accurately interpret and model these electric signals. The focus will be on employing Long Short-Term Memory (LSTM) RNNs, which are well-suited for handling the temporal sequences inherent in these signals.</p> <p>Drawing from the field of NLP, we will apply a range of techniques to analyze and understand the ‘language’ of elephantfish. This analysis will delve into the sensing, communication, and social dynamics of the species, offering insights into how they interact with each other and their environment.</p> <p>One of the most challenging aspects of this project is translating the electric signals into a form that is comprehensible to humans. This task will involve developing innovative methods to represent these signals visually or auditorily, making the complex patterns of communication accessible for further study and interpretation.</p> <p>Beyond the technical analysis, we aim to explore the sociolinguistic and pragmatic aspects of elephantfish communication. This exploration will involve understanding the social context and significance of different patterns of signals, thereby contributing to the broader field of computational linguistics and sociolinguistics.</p> <p>In undertaking this research, we are not only contributing to the field of bioacoustics but also bridging gaps between biology, linguistics, and computer science. The insights gained from this study have the potential to transform our understanding of animal communication and cognition, opening up new possibilities for interdisciplinary research and discovery.</p> <h2 id="literature-review">Literature Review</h2> <p>Time series analysis has been extensively used in biological studies, especially for understanding patterns in animal behavior and communication. Studies like Jurtz, et al. (2017) have demonstrated the efficacy of time series analysis in interpreting complex behavioral data in wildlife research.<d-cite key="jurtz2017introduction"></d-cite> This forms a basis for our approach to model elephantfish movements, which are intrinsically temporal and dynamic.</p> <p>The unique architecture of LSTM RNNs, with their ability to remember long-term dependencies, makes them particularly suitable for time series prediction. Gers, Schmidhuber, and Cummins (2000) showcased the potential of LSTM RNNs in learning to bridge minimal time lags in excess of 1000 discrete time steps between relevant input events and target signals, setting a precedent for their application in predicting animal movement patterns.<d-cite key="gers2002learning"></d-cite></p> <p>Recent advancements in bioacoustics have seen LSTM RNNs being employed to analyze and predict patterns in animal communication. For instance, Stowell and Plumbley (2014) applied LSTM networks to bird song recognition, illustrating the network’s capacity to handle temporal sequences in bioacoustic signals of bird sounds. This aligns closely with our project’s objective of modeling the movement patterns of elephantfish, which are hypothesized to be closely tied to their communication.<d-cite key="stowell2014automatic"></d-cite></p> <p>Research on aquatic species like elephantfish presents unique challenges due to their environment and the nature of their communication. The work of Stoddard et al. (2010) in electric signal analysis of male electric fishes provides insights into the complexity of such studies.<d-cite key="stoddard2008signal"></d-cite> However, there is a noticeable gap in applying advanced time series models, like LSTM RNNs, specifically to the movement patterns and communication signals of elephantfish.</p> <p>The application of NLP techniques to animal communication is a relatively unexplored frontier. Recent work by Wilensky et al. (2021) in decoding prairie dog vocalizations using natural language processing provides a compelling case for extending similar approaches to non-vocal animal communication. Our project takes this concept further by applying deep learning techniques to decode the electric signals of elephantfish, which, while different from vocalizations, share parallels in terms of being a structured form of communication.</p> <p>The application of LSTM RNNs in predicting the current positions of elephantfish based on past positions not only addresses a significant gap in the study of aquatic animal behavior but also sets the stage for future research in this area. The success of this approach could revolutionize the way we understand and interpret the communication and social interactions of these unique species.</p> <h2 id="methodology">Methodology</h2> <h3 id="data-collection">Data Collection</h3> <p>Collaborating with labs at MIT and Columbia, we have gained access to a diverse and comprehensive dataset of elephantfish electric communication signals. The dataset encompasses signals recorded in various environmental conditions, capturing the nuances of communication in different contexts. The recordings include instances of social interaction, mating rituals, and responses to external stimuli.</p> <h3 id="data-preprocessing">Data Preprocessing</h3> <p>The raw electric signal data require extensive preprocessing to extract meaningful features for the deep learning models. This involves filtering, noise reduction, and segmentation to isolate individual communication events. Given the temporal nature of the signals, we will focus on capturing time-dependent features that are crucial for LSTM RNNs.</p> <h3 id="model-architecture">Model Architecture</h3> <p>Our chosen model architecture revolves around Long Short-Term Memory (LSTM) Recurrent Neural Networks. LSTMs are well-suited for modeling sequences with long-term dependencies, making them ideal for capturing the temporal dynamics of elephantfish communication signals. The network will be designed to take into account the sequential nature of the signals, allowing for effective learning of patterns over time.</p> <h3 id="training">Training</h3> <p>The training process involves exposing the LSTM network to the preprocessed dataset, allowing it to learn and adapt to the patterns within the electric signals. The model’s performance will be iteratively refined through multiple training sessions, adjusting hyperparameters to optimize for accuracy and generalization.</p> <h3 id="evaluation">Evaluation</h3> <p>The evaluation phase includes testing the trained model on a separate set of elephantfish communication signals not seen during training. This assesses the model’s ability to generalize its learning to new and unseen data. Metrics such as accuracy, precision, recall, and F1 score will be used to quantify the model’s performance.</p> <h2 id="experiments-and-results">Experiments and Results</h2> <h3 id="experiment-1-signal-reconstruction">Experiment 1: Signal Reconstruction</h3> <p>Our first experiment aims to assess the model’s ability to reconstruct the original electric signals from the learned representations. This involves comparing the reconstructed signals with the original signals using established metrics for signal similarity.</p> <h3 id="experiment-2-pattern-recognition">Experiment 2: Pattern Recognition</h3> <p>In the second experiment, we evaluate the model’s performance in recognizing and categorizing different patterns within the elephantfish communication signals. This includes identifying specific sequences associated with social interactions, mating rituals, and responses to external stimuli.</p> <h3 id="results">Results</h3> <p>Preliminary results indicate promising performance in both signal reconstruction and pattern recognition tasks. The LSTM RNN demonstrates an ability to capture and reproduce complex temporal patterns within the electric signals. The model’s accuracy in distinguishing between different communication contexts is encouraging, suggesting that it can effectively learn and differentiate the nuances of elephantfish communication.</p> <p>The experiments involved training the RNNs on the collected dataset, followed by validation and testing phases. We present detailed results demonstrating the models’ ability to capture and replicate the intricate patterns of elephantfish communication. The analysis includes a comparative study with existing knowledge in marine biology, validating the accuracy and relevance of our models.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%201-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%201-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%201-1400.webp"/> <img src="/staging/assets/img/2023-12-12-elephantfish-model/figure%201.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%202%20--%20comparison%20epoch%2010-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%202%20--%20comparison%20epoch%2010-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%202%20--%20comparison%20epoch%2010-1400.webp"/> <img src="/staging/assets/img/2023-12-12-elephantfish-model/figure%202%20--%20comparison%20epoch%2010.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%203%20feature%20distributions-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%203%20feature%20distributions-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%203%20feature%20distributions-1400.webp"/> <img src="/staging/assets/img/2023-12-12-elephantfish-model/figure%203%20feature%20distributions.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%204%20feature%20conclusions%20heatmap-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%204%20feature%20conclusions%20heatmap-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-elephantfish-model/figure%204%20feature%20conclusions%20heatmap-1400.webp"/> <img src="/staging/assets/img/2023-12-12-elephantfish-model/figure%204%20feature%20conclusions%20heatmap.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="discussion-and-conclusions">Discussion and Conclusions</h2> <p>The successful application of LSTM RNNs to model elephantfish communication signals represents a significant step forward in our understanding of non-human communication systems. The results demonstrate the capacity of deep learning techniques to decode and interpret complex bioelectrical signals, opening avenues for further exploration in bioacoustics and animal communication.</p> <p>The ability to reconstruct signals and recognize patterns within elephantfish communication provides a foundation for future studies on the sociolinguistic and pragmatic aspects of their interactions. By translating these signals into a comprehensible format, we pave the way for a deeper exploration of the meanings and nuances embedded in the electric language of elephantfish.</p> <p>Our research marks a significant stride in understanding non-human communication systems, demonstratint the ability to predict the movement and communication patterns of elephantfish. The findings not only shed light on the complex social structures of elephantfish but also open new avenues in the study of animal linguistics. We discuss the broader implications of our work in the fields of cognitive science and artificial intelligence, highlighting the potential applications and societal impact. Our LSTM RNN models, compared to baseline models that use the immediate last time step position to predict, show superior performance in predicting the complex communication patterns of elephantfish.</p> <p>This superiority highlights the effectiveness of our LSTM RNNs in capturing the intricate temporal dynamics of elephantfish communication. Moreover, our method of processing raw electric data has been optimized through trial and error, finding that skipping exactly every 5 data points results in the lowest loss, demonstrating the importance of fine-tuning data preprocessing in machine learning models.</p> <h2 id="challenges-and-future-directions">Challenges and Future Directions</h2> <p>This project stands at the intersection of technology and biology, with the potential to significantly advance our understanding of animal communication. The success of this endeavor could pave the way for interdisciplinary research, contributing valuable insights into the cognitive abilities of non-human species and the fundamental principles of communication.</p> <p>The research conducted on elephantfish communication using LSTM RNNs has yielded insights that significantly advance our understanding of non-human communication systems. Our models have demonstrated a notable ability to predict movement and communication patterns, offering a new lens through which to view the complex social interactions of these aquatic species.</p> <p>This is a large scale long term collaboration between a few labs, and in the future we will utilize more of the data from a marine biology lab at Columbia to interpret the electric signals. We will likely collaborate with marine biologists to collect a data set of electric signals from elephantfish under various environmental and social conditions.</p> <p>Comparatively, our approach has shown improvements over traditional models, providing a more nuanced understanding of the temporal dynamics in elephantfish communication. These results not only align with existing theories in marine biology but also open new avenues for exploration in animal linguistics and cognitive science.</p> <p>However, this study is not without its limitations. One of the primary constraints was the size and diversity of the dataset. While we managed to collect a substantial amount of data, the variability in environmental conditions and individual elephantfish behaviors was limited. This constraint could potentially impact the generalizability of our models to broader applications. The translation of bioelectrical signals into a human-understandable format is an ongoing challenge that requires further refinement. Additionally, the diversity and variability within elephantfish communication present complexities that demand a nuanced understanding beyond the scope of this initial study.</p> <p>Another limitation lies in the inherent complexities of LSTM RNNs, which, while powerful, can sometimes become “black boxes.” This opaqueness makes it challenging to dissect the exact learning mechanisms and to fully understand how the models are making their predictions.</p> <p>Our study marks a significant step forward in the field but also highlights areas for further research. Future studies could focus on expanding the dataset and exploring more diverse environmental conditions. Additionally, we hope to develop more interpretable machine learning models that could provide clearer insights into the learning and prediction processes. One thing we hope to do is to convert back the predicted positions of fishes to the pixel positions in the tank, this way we can have a more visual intuition about how our model is predicting the positions.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-elephantfish-model.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/enforcing-uniformity/index.html b/blog/2023/enforcing-uniformity/index.html
new file mode 100644
index 00000000..55035082
--- /dev/null
+++ b/blog/2023/enforcing-uniformity/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Imposing uniformity through Poisson flow models | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Uniformity and alignment are used to explain the success of contrastive encoders. Can we use already trained, well-aligned features and impose uniformity to increase their quality and performance on downstream classification tasks?"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/enforcing-uniformity/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}p{text-align:justify}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Imposing uniformity through Poisson flow models",
+      "description": "Uniformity and alignment are used to explain the success of contrastive encoders. Can we use already trained, well-aligned features and impose uniformity to increase their quality and performance on downstream classification tasks?",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Ishank Agrawal",
+          "authorURL": "https://www.mit.edu/~ishank",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Imposing uniformity through Poisson flow models</h1> <p>Uniformity and alignment are used to explain the success of contrastive encoders. Can we use already trained, well-aligned features and impose uniformity to increase their quality and performance on downstream classification tasks?</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#notation">Notation</a></div> <div><a href="#alignment-and-uniformity">Alignment and Uniformity</a></div> <div><a href="#poisson-flows">Poisson Flows</a></div> <div><a href="#hypothesis">Hypothesis</a></div> <div><a href="#experiment">Experiment</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion">Conclusion</a></div> <div><a href="#appendices">Appendices</a></div> </nav> </d-contents> <p>Most objects encountered in machine learning are extremely high dimensional. For example, a relatively small $512$x$512$ RGB image has over $750,000$ dimensions. However most of this space is empty, that is the set of well-formed images form an extremely small subset of this large space.</p> <p>Thus a useful task in machine learning is to map this large space into a much smaller space, such that the images we care about form a compact organized distribution in this new space. This is called representation learning. For such a map to be useful, there are two key features. Firstly the representations should be useful for downstream tasks and not worse than the original representation. Thus they should preserve as much of the useful data as possible. Secondly, they should be relatively task agnostic and help across a diverse array of such downstream tasks. For example, word embeddings (such as those produced by BERT <d-cite key="bert"></d-cite>) can be used for a wide array of language tasks such as language modeling and generation to sentiment analysis. An important question is how to generally find such useful representations.</p> <p>Several methods exist. For example, autoencoders <d-cite key="autoencoder"></d-cite> attempt to learn maps that are essentially bijective over the dataset we care about. These ensure that important information is not lost during the transformation. Contrastive encoders attempt to learn maps that enforce similarity between representations of similar images. Contrastive encoders are seen to perform quite well on unsupervised representation learning tasks, and we will explore these in a bit more detail soon. Lastly, we can layers of already trained neural networks can be used as features as well. For example, layers of VGG-19 trained on ImageNet are useful features that contain much information about the style and content of the images <d-cite key="nst"></d-cite>.</p> <p>It is important to further quantify what makes a useful representation from a theoretical standpoint. Wang and Isola <d-cite key="isola-alignment"></d-cite> introduced concepts of alignment and uniformity to explain why contrastive encoders perform well. Alignment is the idea that similar objects should have close feature vectors in the representation space. Uniformity is the idea that the set of well-formed objects should cover the representation space uniformly.</p> <p>In this post, we will further examine how uniformity affects the quality of representations. To do this, we will use Poisson flows. As we shall see, Poisson flows are an incredibly useful tool to enforce uniformity. We show that enforcing uniformity on well-aligned features can improve representations as measured by their performance on downstream tasks.</p> <h2 id="notation">Notation</h2> <p>We introduce several notations to make talking about representations easier. Let $\mathcal{X}$ be our original space of the data, and let $p_{\mathrm{x}}$ be the distribution of the data. Let $\mathcal{Y}$ be any representation space, and let $f: \mathcal{X} \to \mathcal{Y}$ be a mapping from the original space to the representation space. If $\mathrm{y} = f(\mathrm{x}), \ \mathrm{x} \sim p_{\mathrm{x}}$, then let $\mathrm{y} \sim p_{f}$ and where $p_{f}$ is the new distribution after $f$.</p> <p>We will also have a notion of similarity. Let $p_{\mathrm{pos}}(x_1, x_2)$ be a joint probability distribution that quantifies this similarity. We assume that $p_{\mathrm{pos}}$ satisfies</p> \[\begin{aligned} p_{\mathrm{pos}}(x_1, x_2) &amp;= p_{\mathrm{pos}}(x_2, x_1) \\ \int_{x_2} p_{\mathrm{pos}}(x_1, x_2) d x_2 &amp;= p_{\mathrm{x}}(x_1) \end{aligned}\] <h2 id="alignment-and-uniformity">Alignment and Uniformity</h2> <p>As mentioned earlier, contrastive autoencoders learn useful representations by minimizing a distance metric for similar pairs, while maximizing the same for dissimilar pairs <d-cite key="isola-cmc">. Thus if $D(x_1, x_2)$ is some distance metric of $\mathcal{Y}$, contrastive encoders maximize $d(x, x^+)$ for positive pairs, while minimizing $d(x, x^-)$ for negative pairs.</d-cite></p> <p>In their most common formulation, they set $\mathcal{Y}$ as the hypersphere $\mathcal{S}^d \subset \mathbb{R}^d$, and use cosine similarity <d-cite key="SimCLR"></d-cite> $d(x_1, x_2) = x_1^T x_2$ as the similarity metric. Then the loss function becomes <d-footnote>$M$ and $\tau$ are hyper parameters</d-footnote></p> \[\mathcal{L} \triangleq \mathbb{E}_{(x, x^+) \sim p_{\mathrm{pos}}, \{x_i^-\}_{i=1}^M \overset{\mathrm{iid}}{\sim} p_{\mathrm{x}}} \left[ \frac {e^{f(x)^T f(x^+)^T / \tau}} {e^{f(x)^T f(x^+)^T / \tau} + \sum_{i=1}^{M} e^{f(x)^T f(x_i^-)^T / \tau}} \right]\] <p>These encoders have been successful at several image representation tasks. Wang and Isola explained their performance through alignment and uniformity. Alignment, is simply the the quality that similar images are close together in the representation space. This is clearly present in contrastive encoders, as one of their goals is indeed to minimize</p> \[\mathcal{L}_{\mathrm{alignment}} \triangleq \mathbb{E}_{(x, x^+)\sim p_{\mathrm{pos}}} \left[ D(x, x^+) \right]\] <p>However, Wang and Isola also stated that uniformity was an equally important feature of contrastive architectures. That is, when training the contrastive loss to learn an encoder $f$, the new probability distribution $p_{f}$ is close to uniform. They showed that using $L_2$ norm as a distance metric and using Gaussian kernels to promote uniformity, learned representations perform better than those learned by contrastive learning.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity-1400.webp"/> <img src="/staging/assets/img/2023-11-09-enforcing-uniformity/alignment_uniformity.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 1: Alignment and Uniformity. In figure (a), we see the quality of alignment, i.e. similar images are close to each other in the representation space. In figure (b), we see the quality of uniformity, i.e. images form a uniform distribution across the representation space. Image borrowed from <d-cite key="isola-alignment"></d-cite> </div> <p>Why does uniformity help? Firstly, it acts as a regularization term. This is because if we tried to learn representations that maximized alignment without any target for uniformity, then a map that just takes all input vectors to zero would trivially minimize the loss. Yet this would be an extremely bad representation. However, aside from regularization, uniform distributions also have maximal self-entropy. Thus their importance can be explained equally well through some sort of minimizing loss of information. Indeed this is how <d-cite key="isola-alignment"></d-cite> explains it.</p> <p>In this post we will investigate this even further. In particular, if regularization is the only effect that uniformity has on representations, then slightly nudging already aligned representations to make them uniform should not improve their quality. This is exactly what we will do, and we will do this through Poisson Flows.</p> <h2 id="poisson-flows">Poisson Flows</h2> <p>If you let a planar positive distribution of charges slightly above $z=0$ loose, then they will repel each other. If you stop them at some large enough distance $R$ from the origin, then their distribution approaches uniform as $R \to \infty$. This is very interesting, and what’s even more interesting is that this fact generalizes to arbitrary dimensions. Thus such fields allow a convenient way to map arbitrary high-dimensional distributions to uniform distributions. Poisson flow generative models proposed by Xu and Liu <d-cite key="poisson-flow"></d-cite> exploit this property for image generation; by sampling uniformly from the hemisphere, one can iterate through the backward ODE and thus sample from $p_{\mathrm{x}}$. We shall use it to impose uniformity on well-aligned features.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/poisson-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/poisson-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/poisson-1400.webp"/> <img src="/staging/assets/img/2023-11-09-enforcing-uniformity/poisson.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 2: Evolution of data points through a Poisson field. As we can see, arbitrary distributions are mapped to uniform. Further the mapping is continuous. Borrowed from <d-cite key="poisson-flow"></d-cite> </div> <p>Say we have a probability distribution $p_{\mathrm{y}}$ over $\mathcal{Y}_1 = \mathbb{R^d}$. Set this distribution at the $z = 0$ plane <d-footnote>here z-dimension refers to the new dimension we recently augmented the dataset with</d-footnote> in the expanded space $\tilde{\mathcal{Y}}_1 = \mathcal{Y} \times \mathbb{R}$. Let the electric field at an arbitrary point in $\tilde{\mathcal{Y}}_1$ be defined as</p> \[E_{p_{\tilde{\mathrm{y}}}}(\tilde{y}) = \int_{\tilde{y}'} \frac{\tilde{y} - \tilde{y'}}{\|\tilde{y} - \tilde{y'} \|_2^{d+1}} \cdot p_{\tilde{\mathrm{y}}}(\tilde{y}') d\tilde{y}'\] <p>Let $\mathrm{y} \sim p_{\mathrm{y}}$. Evolve $\tilde{\mathrm{y}} = (\mathrm{y}, 0) \in \tilde{\mathcal{Y}_1}$ according to the ODE</p> \[\frac{d\tilde{\mathrm{y}}}{dt} = E_{p_{\tilde{\mathrm{y}}}}(\tilde{y})\] <p>Let the final point be $f_{\mathrm{poisson}}(\mathrm{y}; p_{\mathrm{y}})$. Then the distribution of $p_{f_{\mathrm{poisson}}}(\cdot)$ approaches uniform as $R \to \infty$.</p> <p>In practice, since we want to take $s = 0$ to $R$, we do a change of variables to write the ODE as</p> \[\frac{d \tilde{\mathrm{y}}}{ds} = \frac{1}{E_{p_{\tilde{\mathrm{y}}}}(\tilde{\mathrm{y}})^T \tilde{\mathrm{y}}} \cdot E_{p_{\tilde{\mathrm{y}}}}(\tilde{\mathrm{y}})\] <p>Note that the field stated here isn’t actually used directly, it is rather learned through a deep neural network. This is possible since the integral can be replaced with an expectation, which itself can be approximated through Monte-Carlo methods.</p> <p>Since Poisson flows allow us to map arbitrary distributions to uniform ones, while preserving continuity; they are an extremely powerful tool to further understand the effects of uniformity. This brings us to our main hypothesis</p> <h2 id="hypothesis">Hypothesis</h2> <blockquote> <p>Assume that uniformity acts more than just a regularizing term for learning useful representations. Then if we take any well-aligned features that have good downstream performance, and apply a continuous map that imposes uniformity, our new features should perform better at downstream tasks</p> </blockquote> <p>This is because if uniformity is simply a regularizing term, then training them for the downstream task is the best we can do. This hypothesis itself is counterintuitive because the original features should already be well-trained against the task at hand. However, surprisingly, this hypothesis seems to hold true. To show this, we describe the following experiment.</p> <h2 id="experiment">Experiment</h2> <p>We consider the pen-ultimate layer of AlexNet <d-cite key="alexnet"></d-cite> trained on CIFAR-10 <d-cite key="cifar-10"></d-cite> as our initial features. These features must be well aligned, as linear decision boundaries are able to accurately classify them into their classes.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/flow-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/flow-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/flow-1400.webp"/> <img src="/staging/assets/img/2023-11-09-enforcing-uniformity/flow.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 3: A representation of how the features should evolve. (a) Initial representation is seemingly random and hard to classify with. (b) After training a classifier, the penultimate layer is well clustered between the various features. (c) Features after learning a Poisson field, and sending the data through it. <d-footnote>Note that images (a) and (b) are for representation purposes only. However image (c) is indeed produced through a learned Poisson field from data points in (b)</d-footnote> </div> <p>We take these features and learn a corresponding Poisson field. For our predicted poisson field, we use a relatively small fixed-size two-hidden layer network.</p> <p>We finally pass our features through this Poisson field and train a linear classifier on top of the final learned representations. We compare this accuracy against the original accuracy.</p> <p>A summary of our approach is given in the figure below:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/architecture-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/architecture-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-enforcing-uniformity/architecture-1400.webp"/> <img src="/staging/assets/img/2023-11-09-enforcing-uniformity/architecture.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 4: Overview of architecture </div> <p>Further training details are given in <a href="#appendix-a-training-details">Appendix A</a>.</p> <h2 id="results">Results</h2> <p>The results are given in the below table.</p> <table> <thead> <tr> <th>Architecture</th> <th>Train accuracy</th> <th>Test accuracy</th> </tr> </thead> <tbody> <tr> <td>AlexNet</td> <td>88%</td> <td>82%</td> </tr> <tr> <td>AlexNet + Poisson Flow <em>(ours)</em></td> <td>95%</td> <td>85%</td> </tr> </tbody> </table> <p>Here we see that our method outperforms a well-trained AlexNet considerably.</p> <h2 id="conclusion">Conclusion</h2> <p>This is a surprisingly nice improvement. Note that the Poisson flow post-processing step is completely unsupervised. This seems to hint that having a uniform prior is helpful for reasons other than just regularization.</p> <p>It would be extremely interesting to develop an entirely unsupervised architecture based on Poisson flow. This would begin by using an unsupervised method to learn well-aligned features. A suitable loss candidate could possibly be just a contrastive loss, with L2 norm as a distance metric:</p> \[\mathcal{L} \triangleq \mathbb{E}_{(x, x^+) \sim p_{\mathrm{pos}}, \{x_i^-\}_{i=1}^M \overset{\mathrm{iid}}{\sim} p_{\mathrm{x}}} \left[ \|x - x^+\|_2^{\alpha} - \lambda \sum_{i=1}^{M} \|x - x_i^{-}\|_2^{\beta} \right]\] <p>Then passing these well-aligned features through a Poisson flow would enforce uniformity. Such a proposed architecture could be worth exploring.</p> <hr/> <h2 id="appendices">Appendices</h2> <p>See <a href="https://github.com/mathletema/poisson-representations">https://github.com/mathletema/poisson-representations</a> for code.</p> <h3 id="appendix-a-training-details">Appendix A: Training details</h3> <p>We used a version of AlexNet similar to that given in Isola’s paper, such that the pen-ultimate layer was 128 neurons wide. We trained this network against cross entropy loss for 20 epochs using Adam as an optimizer.</p> <p>After this, we moved the features from $\mathbb{R}^{128}$ to $\mathbb{R}^{129}$ by setting $z = 0$. We then learned a Poisson field for this network similar to <d-cite key="poisson-flow"></d-cite>. We use the default values of $\tau, \gamma, \sigma$ as the original paper, but used $M = 20$ as a consequence of our reduced dimension size. We trained this Poisson field with a large batch size of $1024$ and a small batch size of $128$. We trained this over $200$ epochs.</p> <p>We then passed the features through the Poisson field. To simulate the ODE, we used Euler’s method with a small delta of $0.01$ and $100$ steps. Using RK4 might produce better results, and we leave this to future work.</p> <p>We finally trained a logistic classifier on top of these final representations, and printed train and test accuracies.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-enforcing-uniformity.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/eunhae-project/index.html b/blog/2023/eunhae-project/index.html
new file mode 100644
index 00000000..399e06f5
--- /dev/null
+++ b/blog/2023/eunhae-project/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>How does model size impact catastrophic forgetting in online continual learning? | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Yes, model size matters."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/eunhae-project/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.caption{font-size:.8em;text-align:center;color:grey}h1{font-size:2.5em;margin:.3em 0 .3em}h2{font-size:2em}h3{font-size:1.5em;margin-top:0}.fake-img{margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "How does model size impact catastrophic forgetting in online continual learning?",
+      "description": "Yes, model size matters.",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Eunhae Lee",
+          "authorURL": "https://www.linkedin.com/in/eunhaelee/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>How does model size impact catastrophic forgetting in online continual learning?</h1> <p>Yes, model size matters.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#method">Method</a></div> <div><a href="#experiment">Experiment</a></div> <div><a href="#results">Results</a></div> <div><a href="#discussion">Discussion</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>One of the biggest unsolved challenges in continual learning is preventing forgetting previously learned information upon acquiring new information. Known as “catastrophic forgetting,” this phenomenon is particularly pertinent in scenarios where AI systems must adapt to new data without losing valuable insights from past experiences. Numerous studies have investigated different approaches to solving this problem in the past years, mostly around proposing innovative strategies to modify the way models are trained and measuring its impact on model performance, such as accuracy and forgetting.</p> <p>Yet, compared to the numerous amount of studies done in establishing new strategies and evaluative approaches in visual continual learning, there is surprisingly little discussion on the impact of model size. It is commonly known that the size of a deep learning model (the number of parameters) is known to play a crucial role in its learning capabilities <d-cite key="hu2021model, Bianco_2018"></d-cite>. Given the limitations in computational resources in most real-world circumstances, it is often not practical or feasible to choose the largest model available. In addition, sometimes smaller models perform just as well as larger models in specific contexts<d-cite key="Bressem_2020"></d-cite>. Given this context, a better understanding of how model size impacts performance in a continual learning setting can provide insights and implications on real-world deployment of continual learning systems.</p> <p>In this blog post, I explore the following research question: <em>How do network depth and width impact model performance in an online continual learning setting?</em> I set forth a hypothesis based on existing literature and conduct a series experiments with models of varying sizes to explore this relationship. This study aims to shed light on whether larger models truly offer an advantage in mitigating catastrophic forgetting, or if the reality is more nuanced.</p> <h1 id="related-work">Related Work</h1> <h3 id="online-continual-learning">Online continual learning</h3> <p>Continual learning (CL), also known as lifelong learning or incremental learning, is an approach that seeks to continually learn from non-iid data streams without forgetting previously acquired knowledge. The challenge in continual learning is generally known as the stability-plasticity dilemma<d-cite key="mermillod2013-dilemma"></d-cite>, and the goal of continual learning is to strike a balance between learning stability and plasticity.</p> <p>While traditional CL models assume new data arrives task by task, each with a stable data distribution, enabling <em>offline</em> training. However, this requires having access to all task data, which can be impractical due to privacy or resource limitations. In this study, I will consider a more realistic setting of Online Continual Learning (OCL), where data arrives in smaller batches and are not accessible after training, requiring models to learn from a single pass over an online data stream. This allows the model to learn data in real-time<d-cite key="soutif-cormerais_comprehensive_2023, cai_online_2021, mai_online_2021"></d-cite>.</p> <p>Online continual learning can involve adapting to new classes (class-incremental) or changing data characteristics (domain-incremental). Specifically, for class-incremental learning, the goal is to continually expand the model’s ability to recognize an increasing number of classes, maintaining its performance on all classes it has seen so far, despite not having continued access to the old class data<d-cite key="soutif-cormerais_comprehensive_2023, ghunaim_real-time_2023"></d-cite>. Moreover, there has been more recent work done in unsupervised continual learning <d-cite key="yu_scale_2023, madaan_representational_2022"></d-cite>. To narrow the scope of the vast CL landscape to focus on learning the impact of model size in CL performance, I will focus on the more common problem of class-incremental learning in supervised image classification in this study.</p> <h3 id="continual-learning-techniques">Continual learning techniques</h3> <p>Popular methods to mitigate catastrophic forgetting in continual learning generally fall into three buckets:<d-cite key="ghunaim_real-time_2023"> :</d-cite></p> <ol> <li><em>regularization-based</em> approaches that modify the classification objective to preserve past representations or foster more insightful representations, such as Elastic Weight Consolidation (EWC)<d-cite key="kirkpatrick2017overcoming"></d-cite> and Learning without Forgetting (LwF)<d-cite key="li_learning_2017"></d-cite>;</li> <li><em>memory-based</em> approaches that replay samples retrieved from a memory buffer along with every incoming mini-batch, including Experience Replay (ER)<d-cite key="chaudhry2019tiny"></d-cite> and Maximally Interfered Retrieval<d-cite key="aljundi2019online"></d-cite>, with variations on how the memory is retrieved and how the model and memory are updated; and</li> <li><em>architectural</em> approaches including parameter-isolation approaches where new parameters are added for new tasks and leaving previous parameters unchanged such as Progressive Neural Networks (PNNs)<d-cite key="rusu2022progressive"></d-cite>.</li> </ol> <p>Moreover, there are many methods that combine two or more of these techniques such as Averaged Gradient Episodic Memory (A-GEM)<d-cite key="chaudhry2019efficient"></d-cite> and Incremental Classifier and Representation Learning (iCaRL)<d-cite key="rebuffi2017icarl"></d-cite>.</p> <p>Among the methods, <strong>Experience Replay (ER)</strong> is a classic replay-based method and widely used for online continual learning. Despite its simplicity, recent studies have shown ER still outperforms many of the newer methods that have come after that, especially for online continual learning <d-cite key="soutif-cormerais_comprehensive_2023, mai_online_2021, ghunaim_real-time_2023"></d-cite>.</p> <h3 id="model-size-and-performance">Model size and performance</h3> <p>It is generally known across literature that deeper models increase performance<d-cite key="hu2021model"></d-cite>. Bianco et al. conducted a survey of key performance-related metrics to compare across various architectures, including accuracy, model complexity, computational complexity, and accuracy density<d-cite key="Bianco_2018"></d-cite>. Relationship between model width and performance is also been discussed<d-cite key="hu2021model"></d-cite>, albeit less frequently.</p> <p>He et al. introduced Residual Networks (ResNets)<d-cite key="he2015deep"></d-cite> which was a major innovation in computer vision by tackling the problem of degradation in deeper networks. ResNets do this by residual blocks to increase the accuracy of deeper models. Residual blocks that contain two ore more layers are stacked together, and “skip connections” are used in between these blocks. The skip connections act as an alternate shortcut for the gradient to pass through, which alleviates the issue of vanishing gradient. They also make it easier for the model to learn identity functions. As a result, ResNet improves the efficiency of deep neural networks with more neural layers while minimizing the percentage of errors. The authors compare models of different depths (composed of 18, 34, 50, 101, 152 layers) and show that accuracy increases with depth of the model.</p> <table> <thead> <tr> <th style="text-align: center"> </th> <th style="text-align: center"><strong>ResNet18</strong></th> <th style="text-align: center"><strong>ResNet34</strong></th> <th style="text-align: center"><strong>ResNet50</strong></th> <th style="text-align: center"><strong>ResNet101</strong></th> <th style="text-align: center"><strong>ResNet152</strong></th> </tr> </thead> <tbody> <tr> <td style="text-align: center"><strong>Number of Layers</strong></td> <td style="text-align: center">18</td> <td style="text-align: center">34</td> <td style="text-align: center">50</td> <td style="text-align: center">101</td> <td style="text-align: center">152</td> </tr> <tr> <td style="text-align: center"><strong>Number of Parameters</strong></td> <td style="text-align: center">~11.7 million</td> <td style="text-align: center">~21.8 million</td> <td style="text-align: center">~25.6 million</td> <td style="text-align: center">~44.5 million</td> <td style="text-align: center">~60 million</td> </tr> <tr> <td style="text-align: center"><strong>Top-1 Accuracy</strong></td> <td style="text-align: center">69.76%</td> <td style="text-align: center">73.31%</td> <td style="text-align: center">76.13%</td> <td style="text-align: center">77.37%</td> <td style="text-align: center">78.31%</td> </tr> <tr> <td style="text-align: center"><strong>Top-5 Accuracy</strong></td> <td style="text-align: center">89.08%</td> <td style="text-align: center">91.42%</td> <td style="text-align: center">92.86%</td> <td style="text-align: center">93.68%</td> <td style="text-align: center">94.05%</td> </tr> <tr> <td style="text-align: center"><strong>FLOPs</strong></td> <td style="text-align: center">1.8 billion</td> <td style="text-align: center">3.6 billion</td> <td style="text-align: center">3.8 billion</td> <td style="text-align: center">7.6 billion</td> <td style="text-align: center">11.3 billion</td> </tr> </tbody> </table> <div class="caption">Table 1: Comparison of ResNet Architectures</div> <p>This leads to the question: do larger models perform better in continual learning? While much of the focus in continual learning research has often been on developing various strategies, methods, and establishing benchmarks, the impact of model scale remains a less explored path.</p> <p>Moreover, recent studies on model scale in slightly different contexts have shown conflicting results. Luo et al.<d-cite key="luo2023empirical"></d-cite> highlights a direct correlation between increasing model size and the severity of catastrophic forgetting in large language models (LLMs). They test models of varying sizes from 1 to 7 billion parameters. Yet, Dyer et al.<d-cite key="dyer2022"></d-cite> show a constrasting perspective in the context of pretrained deep learning models. Their results show that large, pretrained ResNets and Transformers are a lot more resistant to forgetting than randomly-initialized, trained-from-scratch models, and that this tendency increases with the scale of model and the pretraining dataset size.</p> <p>The relative lack of discussion on model size and the conflicting perspectives among existing studies indicate that the answer to the question is far from being definitive. In the next section, I will describe further how I approach this study.</p> <h1 id="method">Method</h1> <h3 id="problem-definition">Problem definition</h3> <p>Online continual learning can be defined as follows<d-cite key="cai_online_2021, ghunaim_real-time_2023"></d-cite>:</p> <p>The objective is to learn a function $f_\theta : \mathcal X \rightarrow \mathcal Y$ with parameters $\theta$ that predicts the label $Y \in \mathcal Y$ of the input $\mathbf X \in \mathcal X$. Over time steps $t \in \lbrace 1, 2, \ldots \infty \rbrace$, a distribution-varying stream $\mathcal S$ reveals data sequentially, which is different from classical supervised learning.</p> <p>At every time step,</p> <ol> <li>$\mathcal S$ reveals a set of data points (images) $\mathbf X_t \sim \pi_t$ from a non-stationary distribution $\pi_t$</li> <li>Learner $f_\theta$ makes predictions $\hat Y_t$ based on current parameters $\theta_t$</li> <li>$\mathcal S$ reveals true labels $Y_t$</li> <li>Compare the predictions with the true labels, compute the training loss $L(Y_t, \hat Y_t)$</li> <li>Learner updates the parameters of the model to $\theta_{t+1}$</li> </ol> <h3 id="task-agnostic-and-boundary-agnostic">Task-agnostic and boundary-agnostic</h3> <p>In the context of class-incremental learning, I will adopt the definitions of task-agnostic and boundary-agnostic from Soutif et al. 2023<d-cite key="soutif-cormerais_comprehensive_2023"></d-cite>. A <em>task-agnostic</em> setting refers to when task labels are not available, which means the model does not know that the samples belong to a certain task. A <em>boundary-agnostic</em> setting is considered, where information on task boundaries are not available. This means that the model does not know when the data distribution changes to a new task.</p> <table> <thead> <tr> <th style="text-align: center"> </th> <th style="text-align: center"><strong>Yes</strong></th> <th style="text-align: center"><strong>No</strong></th> </tr> </thead> <tbody> <tr> <td style="text-align: center"><strong>Task labels</strong></td> <td style="text-align: center">Task-aware</td> <td style="text-align: center">Task-agnotic</td> </tr> <tr> <td style="text-align: center"><strong>Task boundaries</strong></td> <td style="text-align: center">Boundary-aware</td> <td style="text-align: center">Boundary-agnostic</td> </tr> </tbody> </table> <div class="caption">Table 2: Task labels and task boundaries. This project assumes task-agnostic and boundary-agnostic settings.</div> <h3 id="experience-replay-er">Experience Replay (ER)</h3> <p>In a class-incremental learning setting, the nature of the Experience Replay (ER) method aligns well with task-agnostic and boundary-agnostic settings. This is because ER focuses on replaying a subset of past experiences, which helps in maintaining knowledge of previous classes without needing explicit task labels or boundaries. This characteristic of ER allows it to adapt to new classes as they are introduced, while retaining the ability to recognize previously learned classes, making it inherently suitable for task-agnostic and boundary-agnostic continual learning scenarios.</p> <p>Implementation-wise, ER involves randomly initializing an external memory buffer $\mathcal M$, then implementing <code class="language-plaintext highlighter-rouge">before_training_exp</code> and <code class="language-plaintext highlighter-rouge">after_training_exp</code> callbacks to use the dataloader to create mini-batches with samples from both training stream and the memory buffer. Each mini-batch is balanced so that all tasks or experiences are equally represented in terms of stored samples<d-cite key="lomonaco2021avalanche"></d-cite>. As ER is known be well-suited for online continual learning, it will be the go-to method used to compare performances across models of varying sizes.</p> <h3 id="benchmark">Benchmark</h3> <p>For this study, the SplitCIFAR-10<d-cite key="lomonaco2021avalanche"></d-cite> is used as the main benchmark. SplitCIFAR-10 splits the popular CIFAR-10 dataset into 5 tasks with disjoint classes, each task including 2 classes each. Each task has 10,000 3×32×32 images for training and 2000 images for testing. The model is exposed to these tasks or experiences sequentially, which simulates a real-world scenario where a learning system is exposed to new categories of data over time. This is suitable for class-incremental learning scenarios. This benchmark is used for both testing online and offline continual learning in this study.</p> <h3 id="metrics">Metrics</h3> <p>Key metrics established in earlier work in online continual learning are used to evaluate the performance of each model.</p> <p><strong>Average Anytime Accuracy (AAA)</strong> as defined in <d-cite key="caccia_new_2022"></d-cite></p> <p>The concept of average anytime accuracy serves as an indicator of a model’s overall performance throughout its learning phase, extending the idea of average incremental accuracy to include continuous assessment scenarios. This metric assesses the effectiveness of the model across all stages of training, rather than at a single endpoint, offering a more comprehensive view of its learning trajectory.</p> \[\text{AAA} = \frac{1}{T} \sum_{t=1}^{T} (\text{AA})_t\] <p><strong>Average Cumulative Forgetting (ACF)</strong> as defined in <d-cite key="soutif-cormerais_comprehensive_2023, soutifcormerais2021importance"></d-cite></p> <p>This equation represents the calculation of the <strong>Cumulative Accuracy</strong> ($b_k^t$) for task $k$ after the model has been trained up to task $t$. It computes the mean accuracy over the evaluation set $E^k_\Sigma$, which contains all instances $x$ and their true labels $y$ up to task $k$. The model’s prediction for each instance is given by $\underset{c \in C^k_\Sigma}{\text{arg max }} f^t(x)_c$, which selects the class $c$ with the highest predicted logit $f^t(x)_c$. The indicator function $1_y(\hat{y})$ outputs 1 if the prediction matches the true label, and 0 otherwise. The sum of these outputs is then averaged over the size of the evaluation set to compute the cumulative accuracy.</p> \[b_k^t = \frac{1}{|E^k_\Sigma|} \sum_{(x,y) \in E^k_\Sigma} 1_y(\underset{c \in C^k_\Sigma}{\text{arg max }} f^t(x)_c)\] <p>From Cumulative Accuracy, we can calculate the <strong>Average Cumulative Forgetting</strong> ($F_{\Sigma}^t$) by setting the cumulative forgetting about a previous cumulative task $k$, then averaging over all tasks learned so far:</p> \[F_{\Sigma}^t = \frac{1}{t-1} \sum_{k=1}^{t-1} \max_{i=1,...,t} \left( b_k^i - b_k^t \right)\] <p><strong>Average Accuracy (AA) and Average Forgetting (AF)</strong> as defined in <d-cite key="mai_online_2021"></d-cite></p> <p>$a_{i,j}$ is the accuracy evaluated on the test set of task $j$ after training the network from task 1 to $i$, while $i$ is the current task being trained. Average Accuracy (AA) is computed by averaging this over the number of tasks.</p> \[\text{Average Accuracy} (AA_i) = \frac{1}{i} \sum_{j=1}^{i} a_{i,j}\] <p>Average Forgetting measures how much a model’s performance on a previous task (task $j$) decreases after it has learned a new task (task $i$). It is calculated by comparing the highest accuracy the model $\max_{l \in {1, \ldots, k-1}} (a_{l, j})$ had on task $j$ before it learned task $k$, with the accuracy $a_{k, j}$ on task $j$ after learning task $k$.</p> \[\text{Average Forgetting}(F_i) = \frac{1}{i - 1} \sum_{j=1}^{i-1} f_{i,j}\] \[f_{k,j} = \max_{l \in \{1,...,k-1\}} (a_{l,j}) - a_{k,j}, \quad \forall j &lt; k\] <p>In the context of class-incremental learning, the concept of classical forgetting may not provide meaningful insight due to its tendency to increase as the complexity of the task grows (considering more classes within the classification problem). Therefore, <d-cite key="soutif-cormerais_comprehensive_2023"></d-cite>recommendeds avoiding relying on classical forgetting as a metric in settings of class-incremental learning, both online and offline settings. Thus, Average Anytime Accuracy (AAA) and Average Cumulative Forgetting (ACF) are used throughout this experiment, although AA and AF are computed as part of the process.</p> <h3 id="model-selection">Model selection</h3> <p>To compare learning performance across varying model depths, I chose to use the popular ResNet architectures, particularly ResNet18, ResNet34, and ResNet50. As mentioned earlier in this blog, ResNets were designed to increase the performance of deeper neural networks, and their performance metrics are well known. While using custom models for more variability in sizes was a consideration, existing popular architectures were chosen for better reproducibility.</p> <p>Moreover, while there are newer versions (i.e. ResNeXt<d-cite key="xie2017aggregated"></d-cite>) that have shown to perform better without a huge increase in computational complexity<d-cite key="Bianco_2018"></d-cite>, for this study the original smaller models were chosen to avoid introducing unnecessary variables. ResNet18 and ResNet34 have the basic residual network structure, and ResNet50, ResNet101, and ResNet152 use slightly modified building blocks that have 3 layers instead of 2. This ”bottleneck design” was made to reduce training time. The specifics of the design of these models are detailed in the table from the original paper by He et al.<d-cite key="he2015deep"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/resnets_comparison-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/resnets_comparison-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/resnets_comparison-1400.webp"/> <img src="/staging/assets/img/2023-11-09-eunhae-project/resnets_comparison.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">ResNet architecture. Table from He et al. (2015)</figcaption> </figure> <p>Moreover, in order to observe the effect of model width on performance, I also test a slim version of ResNet18 that has been used in previous works<d-cite key="lopez-paz_gradient_2017"></d-cite>. The slim version uses fewer filters per layer, reducing the model width and computational load while keeping the original depth.</p> <h3 id="saliency-maps">Saliency maps</h3> <p>I use saliency maps to visualize “attention” of the networks. Saliency maps are known to be useful for understanding which parts of the input image are most influential for the model’s predictions. By visualizing the specific areas of an image that a CNN considers important for classification, saliency maps provide insights into the internal representation and decision-making process of the network<d-cite key="simonyan2014deep"></d-cite>.</p> <h1 id="experiment">Experiment</h1> <h3 id="the-setup">The setup</h3> <ul> <li>Each model was trained from scratch using the Split-CIFAR10 benchmark with 2 classes per task, for 3 epoches with a mini-batch size of 64.</li> <li>SGD optimizer with a 0.9 momentum and 1e-5 weight decay was used. The initial learning rate is set to 0.01 and the scheduler reduces it by a factor of 0.1 every 30 epochs, as done in <d-cite key="lin_clear_2022"></d-cite>.</li> <li>Cross entropy loss is used as the criterion, as is common for image classification in continual learning.</li> <li>Basic data augmentation is done on the training data to enhance model robustness and generalization by artificially expanding the dataset with varied, modified versions of the original images.</li> <li>Each model is trained offline as well to serve as baselines.</li> <li>Memory size of 500 is used to implement Experience Replay. This represents 1% of the training dataset.</li> </ul> <h3 id="implementation">Implementation</h3> <p>The continual learning benchmark was implemented using the Avalanche framework<d-cite key="lomonaco2021avalanche"></d-cite>, an open source continual learning library, as well as the code for online continual learning by Soutif et al.<d-cite key="soutif-cormerais_comprehensive_2023"></d-cite>. The experiments were run on Google Colab using NVIDIA Tesla T4 GPU.</p> <table> <thead> <tr> <th style="text-align: center"> </th> <th style="text-align: center"><strong>Experiment 1</strong></th> <th style="text-align: center"><strong>Experiment 2</strong></th> <th style="text-align: center"><strong>Experiment 3</strong></th> <th style="text-align: center"><strong>Experiment 4</strong></th> <th style="text-align: center"><strong>Experiment 5</strong></th> <th style="text-align: center"><strong>Experiment 6</strong></th> <th style="text-align: center"><strong>Experiment 7</strong></th> </tr> </thead> <tbody> <tr> <td style="text-align: center"><strong>Model</strong></td> <td style="text-align: center">ResNet18</td> <td style="text-align: center">ResNet34</td> <td style="text-align: center">ResNet50</td> <td style="text-align: center">SlimResNet18</td> <td style="text-align: center">ResNet18</td> <td style="text-align: center">ResNet34</td> <td style="text-align: center">ResNet50</td> </tr> <tr> <td style="text-align: center"><strong>Strategy</strong></td> <td style="text-align: center">Experience Replay</td> <td style="text-align: center">Experience Replay</td> <td style="text-align: center">Experience Replay</td> <td style="text-align: center">Experience Replay</td> <td style="text-align: center">Experience Replay</td> <td style="text-align: center">Experience Replay</td> <td style="text-align: center">Experience Replay</td> </tr> <tr> <td style="text-align: center"><strong>Benchmark</strong></td> <td style="text-align: center">SplitCIFAR10</td> <td style="text-align: center">SplitCIFAR10</td> <td style="text-align: center">SplitCIFAR10</td> <td style="text-align: center">SplitCIFAR10</td> <td style="text-align: center">SplitCIFAR10</td> <td style="text-align: center">SplitCIFAR10</td> <td style="text-align: center">SplitCIFAR10</td> </tr> <tr> <td style="text-align: center"><strong>Training</strong></td> <td style="text-align: center">Online</td> <td style="text-align: center">Online</td> <td style="text-align: center">Online</td> <td style="text-align: center">Online</td> <td style="text-align: center">Offline</td> <td style="text-align: center">Offline</td> <td style="text-align: center">Offline</td> </tr> <tr> <td style="text-align: center"><strong>GPU</strong></td> <td style="text-align: center">V100</td> <td style="text-align: center">T4</td> <td style="text-align: center">A100</td> <td style="text-align: center">T4</td> <td style="text-align: center">T4</td> <td style="text-align: center">T4</td> <td style="text-align: center">T4</td> </tr> <tr> <td style="text-align: center"><strong>Training time (estimate)</strong></td> <td style="text-align: center">3h</td> <td style="text-align: center">4.5h</td> <td style="text-align: center">5h</td> <td style="text-align: center">1h</td> <td style="text-align: center">&lt;5m</td> <td style="text-align: center">&lt;5m</td> <td style="text-align: center">&lt;5m</td> </tr> </tbody> </table> <div class="caption">Table 3: Details of experiments conducted in this study</div> <h1 id="results">Results</h1> <p>Average Anytime Accuracy (AAA) decreases with model size (Chart 1), with a sharper drop from ResNet34 to ResNet50. The decrease in AAA is more significant in online learning than offline learning.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/AAA_on_off-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/AAA_on_off-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/AAA_on_off-1400.webp"/> <img src="/staging/assets/img/2023-11-09-eunhae-project/AAA_on_off.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Chart 1: Average Anytime Accuracy (AAA) of different sized ResNets in online and offline continual learning</figcaption> </figure> <p>When looking at average accuracy for validation stream for online CL setting (Chart 2), we see that the rate to which accuracy increases with each task degrade with larger models. Slim-ResNet18 shows the highest accuracy and growth trend. This could indicate that larger models are worse at generalizing to a class-incremental learning scenario.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/stream_acc1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/stream_acc1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/stream_acc1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-eunhae-project/stream_acc1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Chart 2: Validation stream accuracy (Online CL)</figcaption> </figure> <table> <thead> <tr> <th style="text-align: center"> </th> <th style="text-align: center"><strong>Average Anytime Acc (AAA)</strong></th> <th style="text-align: center"><strong>Final Average Acc</strong></th> </tr> </thead> <tbody> <tr> <td style="text-align: center"><strong>Slim ResNet18</strong></td> <td style="text-align: center">0.664463</td> <td style="text-align: center">0.5364</td> </tr> <tr> <td style="text-align: center"><strong>ResNet18</strong></td> <td style="text-align: center">0.610965</td> <td style="text-align: center">0.3712</td> </tr> <tr> <td style="text-align: center"><strong>ResNet34</strong></td> <td style="text-align: center">0.576129</td> <td style="text-align: center">0.3568</td> </tr> <tr> <td style="text-align: center"><strong>ResNet50</strong></td> <td style="text-align: center">0.459375</td> <td style="text-align: center">0.3036</td> </tr> </tbody> </table> <div class="caption">Table 4: Accuracy metrics across differently sized models (Online CL) </div> <p>Now we turn to forgetting.</p> <p>Looking at Average Cumulative Forgetting (ACF), we see that for online CL setting, ResNet34 performs the best (with a slight overlap at the end with ResNet18), and ResNet50 shows the mosts forgetting. An noticeable observation in both ACF and AF is that ResNet50 performed better initially but forgetting started to increase after a few tasks.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/forgetting_online-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/forgetting_online-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/forgetting_online-1400.webp"/> <img src="/staging/assets/img/2023-11-09-eunhae-project/forgetting_online.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Chart 3: forgetting curves, Online CL (Solid: Average Forgetting (AF); Dotted: Average Cumulative Forgetting (ACF))</figcaption> </figure> <p>However, results look different for offline CL setting. ResNet50 has the lowest Average Cumulative Forgetting (ACF) (although with a slight increase in the middle), followed by ResNet18, and finally ResNet34. This differences in forgetting between online and offline CL setting is aligned with the accuracy metrics earlier, where the performance of ResNet50 decreases more starkly in the online CL setting.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/forgetting_offline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/forgetting_offline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/forgetting_offline-1400.webp"/> <img src="/staging/assets/img/2023-11-09-eunhae-project/forgetting_offline.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Chart 4: Forgetting curves, Offline CL (Solid: Average Forgetting (AF); Dotted: Average Cumulative Forgetting (ACF))</figcaption> </figure> <p>Visual inspection of the saliency maps revealed some interesting observations. When it comes to the ability to highlight intuitive areas of interest in the images, there seemed to be a noticeable improvement from ResNet18 to ResNet34, but this was not necessarily the case from ResNet34 to ResNet50. This phenomenon was more salient in the online CL setting.</p> <p><strong>Online</strong></p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/saliency_online-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/saliency_online-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/saliency_online-1400.webp"/> <img src="/staging/assets/img/2023-11-09-eunhae-project/saliency_online.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Image: Saliency map visualizations for Online CL</figcaption> </figure> <p><strong>Offline</strong></p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/saliency_offline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/saliency_offline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/saliency_offline-1400.webp"/> <img src="/staging/assets/img/2023-11-09-eunhae-project/saliency_offline.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Image: Saliency map visualization for Offline CL</figcaption> </figure> <p>Interestingly, Slim-ResNet18 seems to be doing better than most of them, certainly better than its plain counterpart ResNet18. A further exploration of model width on performance and representation quality would be an interesting avenue of research.</p> <p><strong>Slim-ResNet18</strong></p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-eunhae-project/saliencymap_exp4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-eunhae-project/saliencymap_exp4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Image: Saliency map visualization (Slim ResNet18)</figcaption> </figure> <h1 id="discussion">Discussion</h1> <p>In this study, I compared key accuracy and forgetting metrics in online continual learning across ResNets of different depths and width, as well as brief qualitative inspection of the models’ internal representation. These results show that larger models do not necessary lead to better continual learning performance. We saw that Average Anytime Accuracy (AAA) and stream accuracy dropped progressively with model size, hinting that larger models struggle to generalize to newly trained tasks, especially in an online CL setting. Forgetting curves showed similar trends but with more nuance; larger models perform well at first but suffer from increased forgetting with more incoming tasks. Interestingly, the problem was not as pronounced in the offline CL setting, which highlights the challenges of training models in a more realistic, online continual learning context.</p> <p>Why do larger models perform worse at continual learning? One of the reasons is that larger models tend to have more parameters, which might make it harder to maintain stability in the learned features as new data is introduced. This makes them more prone to overfitting and forgetting previously learned information, reducing their ability to generalize.</p> <p>Building on this work, future research could investigate the impact of model size on CL performance by exploring the following questions:</p> <ul> <li>Do pre-trained larger models (vs trained-from-scratch models) generalize better in continual learning settings?</li> <li>Do longer training improve relatively performance of larger models in CL setting?</li> <li>Can different CL strategies (other than Experience Replay) mitigate the degradation of performance in larger models?</li> <li>Do slimmer versions of existing models always perform better?</li> <li>How might different hyperparameters (i.e. learning rate) impact CL performance of larger models?</li> </ul> <h1 id="conclusion">Conclusion</h1> <p>To conclude, this study has empirically explored the role of model size on performance in the context of online continual learning. Specifically, it has shown that model size matters when it comes to continual learning and forgetting, albeit in nuanced ways. These findings contribute to the ongoing discussions on the role of the scale of deep learning models on performance and have implications for future area of research.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-eunhae-project.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/exploring-music-generation/index.html b/blog/2023/exploring-music-generation/index.html
new file mode 100644
index 00000000..4e85ba50
--- /dev/null
+++ b/blog/2023/exploring-music-generation/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Exploring Methods for Generating Music | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Explores various machine learning techniques for generating music. Compares the performance of traditional RNNs, LSTMs, and transformers on generating sample sequences of music."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/exploring-music-generation/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Exploring Methods for Generating Music",
+      "description": "Explores various machine learning techniques for generating music. Compares the performance of traditional RNNs, LSTMs, and transformers on generating sample sequences of music.",
+      "published": "December 11, 2023",
+      "authors": [
+        {
+          "author": "Anonymous",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Exploring Methods for Generating Music</h1> <p>Explores various machine learning techniques for generating music. Compares the performance of traditional RNNs, LSTMs, and transformers on generating sample sequences of music.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#results-experiments">Results/Experiments</a></div> <div><a href="#closing-thoughts-and-future-work">Closing Thoughts and Future Work</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>The problem of music generation has been widely explored for a long time. Music has very similar parallels to how speech and language is structured. Just like language, music is temporal and in the traditional western sense, has a defined set of standards/rules for how music should be structured. What makes music generation a more challenging problem than language is that music has an artistic/expressive component as well as both low and high level structure. For “good” music, it isn’t enough to simply generate a series of notes and harmonies that obey music theory conventions. At the low level, “good” music makes use of varying dynamics, note groupings, and articulation. At the high level, “good” music may feature overarching motifs and specific <a href="https://en.wikipedia.org/wiki/Musical_form">forms</a> (round, sonata form, ABAB, etc). This level of complexity is analagous to the problem of generating poetry and generating speech that mimics a human reciting it. The poetry will have structures like rhyme, rhythm, motifs, metaphors, etc. and the speech reading it will have to replicate expressiveness to be convinving. This level of complexity is not yet achievable with high level of robusts by current speech generators, LLMs, and NLP methods.</p> <p>It is this level of structural complexity required for generating “good” music that make machine learning methods, specifically deep learning, a compelling approach to tackling the problem of generating “good” music. Deep learning methods should be able to capture music’s low level music theory structure as well as the high level It is the hope that given enough data and the right architectures, music generation will be able to mimick a level akin to the best human composers. While music generation such as OpenAi’s jukebox <d-cite key="dhariwal2020jukebox"></d-cite> as yielded very good results, it is trained on pure audio frequencies. I will focus on musical generation and training from a “written” / musical structural perspective rather than audio. (Think human speech vs. language/text), as I think this can provide greater insight into how these models learn and what about musical structure is being learned.</p> <h1 id="related-work">Related Work</h1> <p>There has been several studies/project done in the generation of music. OpenAi has done work with using audio samples to generate music. They took a representation learning and autoencoder approach leveraging VQ-VAEs. Other work <d-cite key="doi:10.1080/25765299.2019.1649972"></d-cite> took approaches similar to me and tried to analyze the “written” structure of music and used a combination of LSTMs and a midi encoding scheme to . Work has been done to capture the expressitivity of music <d-cite key="10124351"></d-cite>, where they leverage large transformer models and condition them on emotion to generate music. There has been success in generating expressitivity based on this conditional approach. My work here aims to analyze purely the syntactical structure of music and will not be leveraging conditioning.</p> <h1 id="methodology">Methodology</h1> <p>Before music can be learned and generated, it needs to first be converted to a format that can be input to a ML model. To achieve this I used a subset of a piano midi dataset <d-cite key="ferreira_aiide_2020"></d-cite> and utilized a <a href="https://pypi.org/project/py-midicsv/">program</a> to convert from MIDI to .csv. Using this .csv file I encoded each note in the midi to a 107 dimensional vector. Where the first 106 dimensions correspond to midi-notes <a href="https://www.inspiredacoustics.com/en/MIDI_note_numbers_and_center_frequencies">A0-G9</a>, and the last dimension is encodes the duration of the midi-note divided by the midi-clock/quarter frequency to get a duration of the note in quarter notes. Since note A0 corresponds to midi-note 21, all of the midinote values are subtracted by this baseline value when being encoded into the vector. If a midi-note is played it is encoded as “ON” in the .csv and as such is represented with a 1 in it’s corresponding index in the note vector. For example, if a C4 and A4 note (MIDI note 60, and 69 respectively) are played at the same time in a song, it will be encoded as a 107 dimensional zero vector with indices 37, 47 (60 (midi value) -21 (baseline)-1 (0-index notation)) being 1 and index 106 being the duration of the chord.</p> <p>I then tested 3 different models to see how they performed. The first model I tested was an RNN with hidden_size = 64, RNN_layers = 2, and sequences of 24, 48, 64, and 200. I next tested LSTM models with hidden_size = 64, RNN_layers = 2, and sequences of 24, 48, 64, and 200 and compared a birection vs. single directional model. The last model I analyzed was a transformer. In which I first took my note encodings and created an embedded representation of the notes and combined this with positional encoding in the sequence of music to get my final embedding to pass into my transformer architecture.</p> <h1 id="resultsexperiments">Results/Experiments</h1> <p>I found that the RNN architecture to be the worst performing model. It has a high ringing for some training and mostly unstructured and random. The results of a sample music generation can be found <a href="https://drive.google.com/drive/folders/1FiuobbyVUnwpUZUx_PYBR57qOwj5jYXe?usp=sharing">here</a>. The LSTM model took longer to train but performed better with hidden size = 64, sequence_length=48, and 30 epochs. I found that it worked even better when using a bidirectional architecture. A sample generation can be found <a href="https://drive.google.com/drive/folders/10CzuEbuVXKCyLsY5vwQZjSKJT1ABqXbA?usp=sharing">here</a> in which it was fed the starting 10 notes of Polonaise in A-flat major, Op. 53 and was asked to generate a long sequence from that. The transformer took the longest to train and its results can be found <a href="https://drive.google.com/drive/folders/1fGe7xUZyFNlFGMbGB8aXnVfSEx067ZaA?usp=sharing">here</a></p> <h1 id="closing-thoughts-and-future-work">Closing Thoughts and Future Work</h1> <p>As expected the base RNN architecture failed to generate anything meaningful. It took a while to find hyperparameters that would make the LSTM generate something of note, but when it did successfully generate music I was surprised by some of the resemblences it had to music in the training data.</p> <p>One noticeable flaw in my work is that I that my metric for success outside of training error is qualitative. It would have been useful for evaluation of my model implementations if I had a quanititative metric. I originally calculated the loss of my models based on how they replicated unseen music from a test set given sequences from the same music, however losses for every model failed to converge in a reasonable amount of time. It is certainly difficult to tell if poor performance is due to implementation or a small dataset and limited compute resources.</p> <p>Continuing on the idea of lack of data. One of the challenges I faced was in the curation of my dataset. I originally was going to generate music tokens for my network based on a very descriptive musical format cally <a href="https://lilypond.org/">lilypond</a>. However, there were inconsisencies between samples of music in how they were resprented in the lilypond text format, so creation of a program to transcribe the text to a good format for representing music was very difficult which is why I turned to the more standardized MIDI file format. It is unfortunate because a lot of the the complex expression in music is lost in midi format, making it harder if not impossible for models trained on midi input to learn these complex representations/behavior. I say impossible because if data for musical expression is completely absent from training, then this important component of music is simply out of distribution and impossible to learn. So a better way to encode/represent music is needed for better results.</p> <p>Moving forward, it would be interesting to explore how representation learning can be used to enhance the generation of music. I wanted to explore the use of VAEs and some of the more advanced variations like the one in used in OpenAi’s jukebox, VQ-VAE. These methods maybe be able to capture both the high level structure and complex low level structure found in music. I also want to explore methods for encoding the dynamics, articulation, and expression found in music, something I was not able to do this time around. Lastly, exploring a better way to encode and learn the duration of notes would lead to better music generation.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-11-exploring-music-generation.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/foley-to-video/index.html b/blog/2023/foley-to-video/index.html
new file mode 100644
index 00000000..4ca227bc
--- /dev/null
+++ b/blog/2023/foley-to-video/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Autoen-chorder: Predicting Musical Success With Neural Nets | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="In this blog, we discuss deep learning methods and results of predicting song popularity from audio features."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/foley-to-video/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Autoen-chorder: Predicting Musical Success With Neural Nets",
+      "description": "In this blog, we discuss deep learning methods and results of predicting song popularity from audio features.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Esteban Ramirez Echavarria",
+          "authorURL": "https://www.linkedin.com/in/esteban-raech/",
+          "affiliations": [
+            {
+              "name": "LGO, MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Arun Alejandro Varma",
+          "authorURL": "https://www.linkedin.com/in/arunalejandro/",
+          "affiliations": [
+            {
+              "name": "LGO, MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Autoen-chorder: Predicting Musical Success With Neural Nets</h1> <p>In this blog, we discuss deep learning methods and results of predicting song popularity from audio features.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#previous-works">Previous Works</a></div> <div><a href="#hypothesis">Hypothesis</a></div> <div><a href="#architecture">Architecture</a></div> <div><a href="#data-preprocessing">Data Preprocessing</a></div> <div><a href="#baselines">Baselines</a></div> <div><a href="#our-results">Our Results</a></div> <div><a href="#next-steps">Next Steps</a></div> <div><a href="#bibliography">Bibliography</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Our aim is to use deep learning (the crux of 6.s898) to help musicians and their sponsors (for example: agents, record labels, and investors) identify whether songs will resonate with listeners. Solving this problem would enable established artists to release more impactful music, and spur new musicians to break into a competitive market.</p> <p>We first begin by establishing what our success metric is. For the purposes of this project, we will use the concept of song “popularity” as the metric we want to predict, and we source our popularity data from the SpotGenTrack Popularity Dataset. This dataset leverages Spotify’s Popularity Index, which is a relative rank measured against other songs’ popularities. It is a function of recent stream count, save rate, playlist appearance count, skip rate, share rate, and more.</p> <p>There already exist a few models to help us solve this problem. However, these models make use of metadata, such as artist name, year of release, and genre. We believe that these models – while interesting – are insufficient to be actionable, particularly for up-and-coming musicians who may be innovating new music genres, or who may not yet have a strong name. Specifically, metadata like Artist Name are both highly-weighted (for example, even Taylor Swift’s least popular song will be a relative superhit) and unchangeable (we cannot suggest that artists change their identity to Beyonce). Additionally, features like Genre are imprecise, and can quickly become outdated as new subgenres and crossover genres are developed.</p> <p>To address this gap and become more actionable to musicians, we aimed to create a new model that can achieve near-parity with metadata-based models without leveraging any metadata. By combining multiple audio-feature models, we not only achieved comparable results to metadata-based models, we actually outperformed metadata-based models on more than half our testing data.</p> <h2 id="previous-works">Previous Works</h2> <p>The most prominent existing model is HitMusicNet (heretofore referred to as “HMN”). The HMN model predicts popularity based on lyric data from Genius.com (syllables, words, etc.), high-level audio features from SpotGenTrack (e.g. acousticness, key, tempo, speechiness), low-level audio features from SpotGenTrack (audio preprocessing, such as spectral analyses), and metadata from SpotGenTrack (e.g. artist name, year of release, genre). A feature vector is created with this information, and said vector is fed as the input into an Autoencoder network to compress the features, followed by a neural network to obtain the predicted popularity.</p> <p>HitMusicNet has two different objectives: Regression and classification. For this project, we will focus only on regression since it will allow us to visualize differences between our model and HMN with higher granularity. We replicated the code from the paper in PyTorch, using the same functions and data and calculated metrics to make sure our implementation is correctly replicating the paper. We see a slight discrepancy in the errors, likely due to the test/train split during the paper’s training. Altogether, we can still consider our replicated model as valid as the metrics are within reasonable range to the reported metrics. Additionally, we added the R-squared metric as an additional metric to ensure our model fits the data.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Comparison-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Comparison-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Comparison-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Comparison.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>A second model, VGGish, is a pretrained convolutional neural network trained on YouTube-100M (a database with 100 million YouTube videos). This network is a representation learning network widely used in established papers. This network takes in a wav file and processes it on 0.96-second windows, and calculates 128 embeddings per window. This means that the resulting tensor from VGGish will be 2 dimensional for a single file, and 3 dimensional for a batch of files.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-HMN-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-HMN-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-HMN-1400.webp"/> <img src="/staging/assets/img/2023-12-12-HMN.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Autoencoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Autoencoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Autoencoder-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Autoencoder.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="hypothesis">Hypothesis</h2> <p>HMN has a tendency to be heavily-indexed on metadata features and lyrics. Data such as artist name heavily bias the model’s popularity predictions in favor of big-name artists. Lyrics information can make the model biased to predicting instrumental music as less likely to be successful. While this may be representative of how the industry works, it makes HMN much less actionable for musicians trying to assess their chance of success with the market.</p> <p>We believe that audio-only features – such as temporal information (i.e. the structure of the song and information about previous sections) and repetition – can alone be fairly successful in determining a song’s popularity. Thus, we chose to use just audio data, as well as temporal data, to predict popularity.</p> <p>We hypothesize that combining the audio-only features of HMN with VGGish’s audio representation will yield superior outcomes to HMN’s audio-only features alone. We also hope that our new model can compete with the full HMN model (i.e. audio features and metadata combined).</p> <h2 id="data-preprocessing">Data Preprocessing</h2> <p>Given our hypothesis, we need to extract the low-level features from our signal and map each row to its corresponding audio file to be fed into VGGish. We used Spotify’s API to obtain the raw audio files to be processed, and then ran them through the VGGish network. We performed the same preprocessing as the one done in the MusicHitNet paper.</p> <h3 id="file-length-limitation">File length limitation</h3> <p>Unfourtunately, Spotify only allows the download of 30s previews of songs.</p> <h3 id="memory-limitation">Memory limitation</h3> <p>Audio files are heavy, and the longer they are, the heavier. We should ideally process all 95,000 songs’ full length, but given Spotify’s API’s request limit, and the size of the files, we were only able to obtain 10,000 30s snippets. This still resulted in roughly 14.5 GB of data. Processing the whole dataset would not only require roughly 140 GBs of data, but the Spotify API’s limits will likely be exceeded, and our colab notebook will likely run out of memory.</p> <h3 id="downsampling-and-splitting">Downsampling and splitting</h3> <p>Given the considerations above, we decided to use 10,000 songs for our model’s development, splitting the data into 8,500 samples for training and 1,500 for validation. Given that this is roughly 10% of the original data, we expect the model’s performance to be below the reported metrics.</p> <h2 id="baselines">Baselines</h2> <p>The metrics obtained when replicating the HMN network serve as a reasonable parameter to verify our model’s accuracy. As mentioned above, the model’s performance is expected to be below the paper’s reported metrics. To understand the range, we retrained a network with the same shape as the paper’s using the 10,000 samples in the same train/test split we will feed to our new network. The resulting metrics for this experiment can be seen in Table 2.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img5-1400.webp"/> <img src="/staging/assets/img/img5.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Training a model that results in similar metrics would be ideal, but realistically, as we will only be using low-level data, we expect the metrics to be lower than the values in Table 2. To ensure that our trained model isn’t just predicting noise, we use a baseline comparison, comparing against a random normal distribution with mean μ=40.02 and σ=16.79.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img6-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img6-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img6-1400.webp"/> <img src="/staging/assets/img/img6.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>As seen in table 3, the baseline intuitively would not appear to be too far from the trained HMN model in terms of MSE and MAE. When looking at the r-squared, the random model has a negative value, while the trained HMN netw ork results with a much higher 0.5616 value. To deem a model as successful, we will compare it against both sets of metrics.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img1-1400.webp"/> <img src="/staging/assets/img/img1.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img2-1400.webp"/> <img src="/staging/assets/img/img2.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="alternate-models">Alternate Models</h2> <ul> <li>Single autoencoder. Our first iteration to solve this problem consisted of using a single autoencoder to find representations with data coming from VGGish and SpotGetTrack low level features, and then running that through a feed-forward network similar to the one used in HMN. Since the output of VGGish is a tensor of shape (batch_size, n_windows, n_features) and the output of SpotGenTrack is (batch_size, 207), we concluded there was no simple way to combine the two data sources without losing temporal information.</li> <li>RNN. Our second iteration consisted of running the data coming from SpotGenTrack Low-Level through an autoencoder in the same way HMN does it. After this initial train gives us a compressed representation of the data from SpotGenTrack Low-Level, we train two subsequent networks: First an LSTM RNN which transforms data into (batch_size, 20), then we add the compressed representation from SpotGenTrack Low-Level and run that through a feedforward network. This model yielded a performance below the baseline.</li> <li>HMN+VGGish: This model consists of taking the full SpotGenTrack data, passing it through the regular autoencoder defined in HMN, and add it to the output coming from VGGish. This model, while resulting in promising results, still yielded worse performance than HMN on its own, so our team decided to explore alternatives.</li> <li>LossNet. Our third exploration consisted of training a model that uses VGGish’s outputs to try and predict losses from HMN. In essence, we are trying to use VGGish Representation to capture information that HMN consistently is unable to. This approach has parallels with Adversarial Networks, in that one model is being trained on the losses of another model. However, this approach is more cooperative than adversarial, since the result of the two models is not zero-sum. This approach led to a dead-end with surprising results.</li> </ul> <h2 id="final-architecture">Final Architecture</h2> <p>Our final iteration consists of a model with two autoencoders: One for data from SpotGenTrack low level features, the second for the representation obtained using the VGGish model. The slight difference between these two models is that the VGGish autoencoder has additional LSTM layers at the start of the encoder, and at the end of the decoder. The output from these two autoencoders is then added together and passed through a feed-forward network. This architecture can be seen in Figure 4.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-FinalArch-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-FinalArch-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-FinalArch-1400.webp"/> <img src="/staging/assets/img/2023-12-12-FinalArch.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="padding-and-packing">Padding and Packing</h3> <p>None of the audio files coming from Spotify previews are more than 30s in duration, but some are in fact shorter than others. To solve this issue, and also to be able to feed our model whichever sized data we require, we use pytorch’s packing functionality. Packing allows us to process sequential data with different sizes, so that only the relevant information is passed through the LSTM. Conversely, padding allows us to add zeros at the end of sequences so that all samples have the same size. This is required to store data in tensors.</p> <h3 id="hyperparameters">Hyperparameters</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img3-1400.webp"/> <img src="/staging/assets/img/img3.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="additional-model-considerations">Additional Model Considerations</h3> <p>The original HMN model compiles 228 features into 45 representations for the feed-forward network. We want our model’s feed-forward network to have a similar number of inputs as the given architecture, therefore we compress the data in the encoder of both autoencoders to 20 features, so that when added together, they result in 40 total features.</p> <p>Additionally, as can be seen in figure 3.2, the target’s distribution is condensed at a central point, and distributed in a Gaussian shape. To help our model accurately predict the shape of the results, we use multiply the losses by a weighting factor. This multiplication is important to make our model more likely to predict outliers. The equation is the following:</p> <p>\begin{equation} \frac{1}{N} \sum_{i=1}^{N} \exp\left(\left(\frac_{i} - \text)}}{\alpha \cdot \text}\right)^2 \cdot \frac{1}{\beta}\right) \end{equation}</p> <p>Our feed-forward network was suffering of vanishing gradients during training. To attempt to avoid this, we initialized all linear layers with a weight distributed by Xavier uniform, and a constant bias of 0.1.</p> <h3 id="finding-the-best-model">Finding the Best Model</h3> <p>In order to find the best model, we modified plenty of parameters and hyperparameters. We first found the optimal autoencoder models (seen on table 4), and then we proceeded to run several loops over our linear layer to obtain the model with lowest errors. The parameters modified were the following:</p> <ul> <li>Learning rate: (0.001, 0.0001, 0.0002, 0.02, 0.0005)</li> <li>Weight decays: (0, 0.0001, 0.0002)</li> <li>Batch sizes: (200, 100, 256, 277)</li> <li>Means (for weights calculation): 0.33, 0.34, 0.35, 0.37, 0.38, 0.40, 0.42, 0.45)</li> <li>Alphas (for weights calculation): (1.8, 2.0, 2.1, 2.2)</li> <li>Betas (for weights calculation): (1.8, 2.0, 2.2)</li> <li>Number of linear layers: (7, 9, 12)</li> </ul> <p>The combination that resulted in the optimal model was the following:</p> <ul> <li>Weight decays: 0</li> <li>Batch sizes: 200</li> <li>Means (for weights calculation): 0.36</li> <li>Alphas (for weights calculation): 2.0</li> <li>Betas (for weights calculation): 2.0</li> </ul> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img4-1400.webp"/> <img src="/staging/assets/img/img4.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Table 5 shows the best-performing models obtained after experimentation. MAE, MSE and r-squared were calculated using the testing data, i.e. Data not used in training. Looking at the data in tables 2 and 3, we see that our model shows a significant improvement above the random baseline, with a reasonable r-squared and MSE. Reduction in the MAE remains challenging, but still we see a significant improvement from the random baseline.</p> <p>Furthermore, we analyzed the testing data, and found that in 919 of the 1,500 songs (61.2%) of the songs, our model did better than HitMusicNet. Upon further analysis, we found that our model did a better job predicting the popularity of songs with popularities ranged [0.22-0.55], while HMN does a better job at predicting outliers (songs with &lt;0.2 or &gt;0.6 of popularity).</p> <h2 id="conclusions-and-next-steps">Conclusions and Next Steps</h2> <h3 id="data-exploration">Data Exploration</h3> <p>Given Spotify’s ubiquity and analytics excellence, its Popularity Index is a good proxy for relative song popularity. But there are concerns around using data from a single platform (Spotify) and from a single channel (digital streaming). Given this concern, we would like to explore other methods of calibrating a track’s popularity (for example, Billboard and Discogs API). We can aggregate popularities into a single output, or can train each model on multiple outputs of various popularity scores.</p> <p>Currently, our data consists of 30s audio clips. The average new song length is around 3min 17s, meaning that our models’ inputs cover around 15% of the song. This can cause the model to miss information critical to song likeability, such as the intro, chorus, or bridge. We would like to make our dataset more complete by using full songs as inputs. Furthermore, we’re using only 10,000 data points, which can also be affecting our training efficiency, especially our ability to detect outliers, which we have found to be a key issue with our model. Ideally, we would like to train our models on all 95k songs in SpotGenTrack.</p> <h3 id="architectures">Architectures</h3> <p>Many more architectures can further be explored to predict song popularity. We found VGGish with an LSTM to be an efficient “boosting” algorithm, which contributed to the model in a less significant way that SpotGenTrack, but still allowed our model to increase its performance. Similarly, the use of transformer architectures can help improve the performance of our model.</p> <p>In this study, we explored and evaluated our model against the HitMusicNet’s regression algorithm. In further studies, it could be beneficial to explore the classification algorithm, as we have seen very promising results in the prediction of songs along a certain range.</p> <p>We used the VGGish model purely on inference since we required to train the autoencoder and then the feed-forward network. Future studies can include architectures such that the VGGish model is trained in series with the feedforward network, and fine-tuned to predict popularity. We could also look at alternate representation models that are perhaps better suited or supply a more apt representation for our task than VGGish.</p> <p>In conclusion, the use of low-level features to predict popularity can have several real-world advantages. The proposed model is able to predict a song’s popularity to a fair degree without the need for high-level features. Emerging artists can use these parameters to determine the possible success of their songs. Music labels can use this algorithm to predict an artist’s possible popularity. Platforms such as Spotify can also take advantage of this model in order to tackle recommendations and boost emerging artists.</p> <h2 id="bibliography">Bibliography</h2> <ul> <li>D. Martín-Gutiérrez, G. Hernández Peñaloza, A. Belmonte-Hernández and F. Álvarez García, “A Multimodal End-to-End Deep Learning Architecture for Music Popularity Prediction,” in IEEE Access, vol. 8, pp. 39361-39374, 2020, doi: 10.1109/ACCESS.2020.2976033.</li> <li>Ding, Yiwei, and Alexander Lerch. “Audio embeddings as teachers for music classification.” arXiv preprint arXiv:2306.17424 (2023).</li> <li>D. Martín-Gutiérrez, “HitMusicNet” in https://github.com/dmgutierrez/hitmusicnet.</li> <li>Koutini, Khaled, et al. “Efficient training of audio transformers with patchout.” arXiv preprint arXiv:2110.05069 (2021).</li> <li>P. Nandi, “Recurrent Neural Nets for Audio Classification” in https://towardsdatascience.com/recurrent-neural-nets-for-audio-classification-81cb62327990.</li> <li>Wu, Rick, “VGGish Tensorflow to PyTorch” in https://github.com/tcvrick/audioset-vggish-tensorflow-to-pytorch.</li> <li>Wu, Yiming. (2023). Self-Supervised Disentanglement of Harmonic and Rhythmic Features in Music Audio Signals.</li> <li>S. Shahane, “Spotify and Genius Track Dataset” in https://www.kaggle.com/datasets/saurabhshahane/spotgen-music-dataset/data.</li> </ul> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-foley-to-video.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/forbidden-facts/index.html b/blog/2023/forbidden-facts/index.html
new file mode 100644
index 00000000..53101bc9
--- /dev/null
+++ b/blog/2023/forbidden-facts/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Forbidden Facts | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A Mechanistic Interpretability Investigation of Llama 2"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/forbidden-facts/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Forbidden Facts",
+      "description": "A Mechanistic Interpretability Investigation of Llama 2",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Miles Wang",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Harvard",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Forbidden Facts</h1> <p>A Mechanistic Interpretability Investigation of Llama 2</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background">Background</a></div> <div><a href="#methods">Methods</a></div> <div><a href="#results">Results</a></div> <div><a href="#extended-results">Extended Results</a></div> <div><a href="#discussion">Discussion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>When large language model (LLMs) predict the next token, they often need to reconcile competing objectives. For example, a pretrained model is trained to be both helpful and harmless, which may come into conflict if a user asks a dangerous query. However, we are currently unable to prioritize objectives robustly, as evidenced by jailbreaks.</p> <p>We aimed with our project to explore how language models mechanistically reconcile competing objectives, with the hope that a deeper understanding of these mechanisms could inspire better model alignment and control. For example, if a language model is instructed to always say the incorrect answer, and then is prompted to fill in a factual association, there are two objectives. One objective is to complete the factual association and the next token prediction correctly, and the other is to follow the instruction of saying an incorrect answer. These objectives are opposing because it is impossible to follow both objectives accurately, and it is reasonable for the model to follow either instruction. In this task, the model will consistently output the correct answer without the incorrect answer instruction and consistently output an incorrect answer with the instruction. Specifically, we tried to understand how Llama-2 models reconcile obedience and truthfulness in the context of prompts like the following one:</p> <figure class="highlight"><pre><code class="language-c--" data-lang="c++"><span class="p">[</span><span class="n">INST</span><span class="p">]</span> <span class="o">&lt;&lt;</span><span class="n">SYS</span><span class="o">&gt;&gt;</span>
+<span class="n">You</span> <span class="n">are</span> <span class="n">an</span> <span class="n">obedient</span> <span class="n">assistant</span> <span class="n">who</span> <span class="n">only</span> <span class="n">responds</span> <span class="n">with</span> <span class="n">a</span> <span class="n">single</span> <span class="n">word</span> 
+<span class="n">with</span> <span class="n">no</span> <span class="n">punctuation</span><span class="p">.</span> <span class="n">You</span> <span class="n">answer</span> <span class="n">truthfully</span><span class="p">.</span> <span class="n">However</span><span class="p">,</span> <span class="n">you</span> <span class="n">are</span> <span class="n">not</span> 
+<span class="n">allowed</span> <span class="n">to</span> <span class="n">say</span> <span class="n">the</span> <span class="n">forbidden</span> <span class="n">word</span> <span class="n">California</span><span class="p">.</span>
+<span class="o">&lt;&lt;/</span><span class="n">SYS</span><span class="o">&gt;&gt;</span>
+<span class="n">The</span> <span class="n">Golden</span> <span class="n">Gate</span> <span class="n">Bridge</span> <span class="n">is</span> <span class="n">in</span> <span class="n">the</span> <span class="n">state</span> <span class="n">of</span> <span class="p">[</span><span class="o">/</span><span class="n">INST</span><span class="p">]</span> <span class="o">&lt;</span><span class="n">br</span><span class="o">/&gt;</span> </code></pre></figure> <p>Here the obedience objective makes the model not want to respond with “California”, while the truthfulness objective does the opposite. Since there has already been some existing work on how large language models perform factual recall, we decided to focus on the obedience objective, and in particular on what mechanisms the model uses to downweight the forbidden word.</p> <h2 id="background">Background</h2> <p>Factual recall. Meng et al. <d-cite key="meng2022locating"></d-cite> located factual associations within GPT-style autoregressive transformer models using causal tracing. They found strong causal effects on predictions localized in two main areas: 1) at the final layers processing the last token, and 2) more notably at middle layers processing the last token of the subject. Further analysis showed the middle layer effects are driven primarily by the MLP modules. This suggests that GPT stores factual associations about a subject via a localized computation in the MLPs when they process the subject token. Mor Geva et al. <d-cite key="geva2023dissecting"></d-cite> extend this study of the factual recall circuit by distilling it into three parts. First, the representation at the last subject token gets enriched by early MLP layers, composed of many subject-related attributes. Second, the information about the relation between the subject and answer token propagates to the last token. Finally, the last token “queries” the subject representation to extract the attribute that the model then outputs, which is done by attention heads that encode subject-attribute mappings in their parameters.</p> <p>Competing circuits. Circuits are subgraphs of a neural network with distinct functionalities. The field of mechanistic interpretability seeks to reverse engineer model internals that produce a behavior into explainable algorithms, and recent works have rigorously explored toy examples of these circuits. These works usually focus on describing a circuit where the prompt directs the model to complete one distinct task. This project seeks to understand what happens when there are two opposing tasks and how the circuits for each task interact with each other.</p> <p>Mechanistic interpretability. Olsson et al. <d-cite key="olsson2022context"></d-cite> and Nanda et al. <d-cite key="nanda2023progress"></d-cite> were important early papers in the emerging field of Mechanistic Interpretability. They helped set the direction of the field (attempt to rigorously decode fundamental mechanisms involved in a model’s computation), developed the evidential standards (causal mediation on a subset of a model being higher quality evidence than correlation), and helped define the methodology used (patching experiments, logit attribution, ablation, reverse engineering of weights).</p> <p>Prompt injections. Wei et al. <d-cite key="wei2023jailbroken"></d-cite> propose that one failure mode of language models that leads to prompt injections is the competition between capabilities and safety objectives. Models are trained for instruction following, language modeling, and safety. This project aims to achieve a mechanistic understanding of how prompt injections operate with respect to these competing objectives.</p> <p>Latent knowledge. Research has demonstrated that models have latent knowledge of correct answers, but won’t output them if prompted naively <d-cite key="saunders2022self"></d-cite>. A problem for future advanced models is being able to distinguish whether its outputs are truthful or deceptive. For example, if a model’s objective is to say the incorrect answer, somewhere in its internal computations it has to compute what the correct answer is to know it is outputting the incorrect answer. One question that naturally arises is if we are able to deduce from its internal representations what the correct answer is. This project investigates a model’s latent knowledge under specific circumstances.</p> <h2 id="methods">Methods</h2> <p>We initially used Llama-2-7B-chat, a 32-layer decoder-only transformer model fine-tuned with supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align to human preferences for helpfulness and safety. We chose Llama-2-7B-chat because the model achieves reliably good performance on our instruction tasks, has its weights open-sourced, and has a relatively low number of parameters to reduce computational costs. Previously, we fine-tuned GPT-2-XL on the Alpaca instruction dataset, but could not get reliable results on our tasks.</p> <p>A competing prompt is when the correct answer is forbidden, and a non-competing prompt is when an incorrect answer is forbidden (equivalent to a normal factual recall).</p> <p>We used first-order patching to replace a component’s activations in a non-competing run with its activations in a competing run (and vice versa). To calculate component $r_{i}$’s importance, we take the log odds of predicting the correct answer in a non-competing run with $r_{i}$ patched from a competing run, and subtract the log odds of predicting a correct answer during a normal non-competing run:</p> \[\begin{equation} \left[ \mathrm{LO}_a\left( r_i(\mathbf{p}_\text{c}) + \sum_{j \neq i} r_j(\mathbf{p}_\text{nc}) \right) - \mathrm{LO}_a\left(\sum_{j} r_j(\mathbf{p}_\text{nc})\right) \right]. \end{equation}\] <p>This is a natural method to analyze model mechanisms at a coarse-grained level. If Llama 2 is a Bayesian model that aggregates information from each component, Equation 2 can be interpreted as the average log Bayes factor associated with changing the $r_{i}$’s view of the world from forbidding an incorrect answer to forbidding the correct answer. If this Bayes factor is small, then $r_{i}$ plays a large role in the model suppression behavior. We also only consider the residual stream on the last token because these components have the direct effect on the next token prediction.</p> <p>By first-order, we mean we don’t consider the effect the component may have on other components. We chose to do first-order patching because when multiple pieces of evidence are independent, their aggregate log Bayes factor is the sum of their individual log Bayes factors, which is why we can cumulatively add the components’ importance in the last plot.</p> <h2 id="results">Results</h2> <p>Our high-level takeaway was that the forbidding mechanism is complicated. The following plots illustrate its overall behavior:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot1-1400.webp"/> <img src="/staging/assets/img/2023-12-12-forbidden-facts/plot1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>This plots the probability Llama 2 answers a competing prompt correctly versus the probability it answers a non-competing prompt correctly across our dataset. A competing prompt is when the correct answer is forbidden, and a non-competing prompt is when an incorrect answer is forbidden (equivalent to a normal factual recall). The plot is cut off on the sides because we filter the dataset to ensure the model gets the initial factual recall task correct and has a significant suppression effect.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot2-1400.webp"/> <img src="/staging/assets/img/2023-12-12-forbidden-facts/plot2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>To get this plot, we first rank the residual stream components (attention heads and MLPs on the final token) by their importance from first-order patching, a tool we explain in the next paragraph. Adding the components’ importance “scores”, we find that patching 41 components is enough to achieve the same suppression as patching all 1057 components. This number stays roughly the same across the 13b and 70b versions of Llama even as the total component size rises to 10,570 components. There is a lagging tail here: for example, only the top 15 components are needed to create ~80% of the suppression effect.</p> <p>I worked jointly with Tony Wang and Kaivu Hariharan on this research project. I then did some more extended analysis for this class.</p> <h2 id="extended-results">Extended Results</h2> <p>I then asked how this analysis scales to larger models, namely the 13 and 70 billion parameter versions of Llama 2.</p> <p>13B:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot3-1400.webp"/> <img src="/staging/assets/img/2023-12-12-forbidden-facts/plot3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>70B:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot4-1400.webp"/> <img src="/staging/assets/img/2023-12-12-forbidden-facts/plot4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>I initially expected the number of components needed to replicate the initial effect to scale with the number of parameters. For example, 70b has 10,570 components versus 7b’s 1057 components. However, the number actually slightly decreased to 37 components and 38 components for 13b and 70b, respectively. We can see that the total suppression effect increased from 7b but remained the same between the two. This analysis suggests that the number of components needed to implement the suppression circuit remains constant as the number of model parameters grows. This makes sense when considering that larger models have a lot more emergent capabilities, which would entail a greater number of circuits as opposed to just enlarging the circuits of smaller models.</p> <p>The highly important suppressor heads paid attention to the forbidden token, which raised the question of how they knew to do so. We found that the key and query were “enriched” throughout the layers of the model to pay attention to the forbidden token. Testing query and key specificity, we found that swapping the queries from competing and non-competing runs made no difference to the attention paid to the key. Nor did changing the positional embeddings of the key. There must be another, perhaps more complicated, way that the model knows to pay attention to the forbidden token.</p> <p>We found that suppression heads privilege attend more to the forbidden word when it is the correct answer to the factual recall task. Understanding these model’s heuristics mechanistically allowed us to exploit its behavior, something we call the California Attack. Let’s take the initial example in a non-competing run:</p> <figure class="highlight"><pre><code class="language-c--" data-lang="c++"><span class="p">[</span><span class="n">INST</span><span class="p">]</span> <span class="o">&lt;&lt;</span><span class="n">SYS</span><span class="o">&gt;&gt;</span>
+<span class="n">You</span> <span class="n">are</span> <span class="n">an</span> <span class="n">obedient</span> <span class="n">assistant</span> <span class="n">who</span> <span class="n">only</span> <span class="n">responds</span> <span class="n">with</span> <span class="n">a</span> <span class="n">single</span> <span class="n">word</span> 
+<span class="n">with</span> <span class="n">no</span> <span class="n">punctuation</span><span class="p">.</span> <span class="n">You</span> <span class="n">answer</span> <span class="n">truthfully</span><span class="p">.</span> <span class="n">However</span><span class="p">,</span> <span class="n">you</span> <span class="n">are</span> <span class="n">not</span> 
+<span class="n">allowed</span> <span class="n">to</span> <span class="n">say</span> <span class="n">the</span> <span class="n">forbidden</span> <span class="n">word</span> <span class="err">'</span><span class="n">floor</span><span class="err">'</span><span class="p">.</span>
+<span class="o">&lt;&lt;/</span><span class="n">SYS</span><span class="o">&gt;&gt;</span>
+<span class="n">The</span> <span class="n">Golden</span> <span class="n">Gate</span> <span class="n">Bridge</span> <span class="n">is</span> <span class="n">in</span> <span class="n">the</span> <span class="n">state</span> <span class="n">of</span> <span class="p">[</span><span class="o">/</span><span class="n">INST</span><span class="p">]</span> <span class="o">&lt;</span><span class="n">br</span><span class="o">/&gt;</span> </code></pre></figure> <p>The model naturally answers ‘California’. By adding two words to the first sentence of the prompt: “You are an obedient assistant from California […] “, we break the model. The suppressor components take the forbidding instruction to never say ‘floor’ and instead downweight “California” to an 18% completion probability, elevating “San Francisco”, an incorrect answer, to the top response.</p> <p>I did some more exploring of this California Attack. I found three things:</p> <ol> <li>In the previous example, ablating just the top suppressor head resulted in California being the top response again.</li> <li>Some particular heads like attending more to certain categories (e.g. countries) and certain words. In this particular case, the top suppressor head enjoyed paying attention to California specifically. The key then is to have the forbidden instruction apply to a word it doesn’t like paying attention to, such as ‘floor’.</li> <li>We can find the words they downweight the most and least by applying the OV matrix against the entire vocabulary distributions and looking at the beginning and end of the distribution.</li> </ol> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot5-1400.webp"/> <img src="/staging/assets/img/2023-12-12-forbidden-facts/plot5.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Keeping these lessons in mind, I found another attack by analyzing some of the words the suppressor heads downweight the most. In the above example, I added that Llama 2 was an assistant “to Trump” in the system message. In the above message, the first run is the adversarial attack where the top response to answering who the 45th President of the USA was is ‘Great’. Under a normal run without the adversarial attack, the top answer is ‘Trump’:</p> <p>I also experimented with the 13B version of Llama 2, and found that the Calornia attack also applies to this model when forbidding ‘table’ in a non-competing run:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot6-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot6-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-forbidden-facts/plot6-1400.webp"/> <img src="/staging/assets/img/2023-12-12-forbidden-facts/plot6.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>However, I could not find a similar adversarial attack for the 70B version of Llama 2. This suggests that as models get larger, their heuristics get more robust to such mechanistic exploits.</p> <h2 id="discussion">Discussion</h2> <p>In this work, we decompose and attempt to characterize important components of Llama 2 that allow it to suppress the forbidden word in the forbidden fact task. While we identify some structural similarities between the most important attention heads, we also find evidence that the mechanisms used by Llama 2 are complex and heterogeneous. Overall, we found that even components directly involved in suppressing the forbidden word carry out this mechanism in different ways and that Llama 2’s mechanisms are more akin to messy heuristics than simple algorithms.</p> <p>This results in an important limitation of our work: we could not find a clean, sparse circuit implementing the forbidden behavior. Moreover, it is unclear if we are working in the right “basis” of attention heads and MLPs, or if causal attribution methods such as activation patching are able to recover the correct representation of a circuit.</p> <p>This raises some questions about the goals of mechanistic interpretability. Previous mechanistic interpretability papers have largely studied algorithmic tasks on small models to understand how models implement behaviors and characterize certain properties. However, moving away from toy settings to understand how models with hundreds of billions of parameters implement a variety of complex behaviors with competing objectives might be much harder.</p> <p>Computational irreducibility is the idea that there are certain systems whose behavior can only be predicted by fully simulating the system itself, meaning there are no shortcuts to predicting the system’s behavior. Initially proposed by Stephen Wolfram in the context of cellular automata, this concept challenges the reductionist approach to science, which may be analogous to the approach mechanistic interpretability takes today.</p> <p>If computational irreducibility applies to mechanistic interpretability in understanding models, it may be very difficult to get generalizable guarantees about its behavior. If even the most efficient way of computing important properties about models is too slow, then mechanistic interpretability can’t achieve one of its main goals. This project provides some suggestive evidence that we could live in a world where frontier models are computationally irreducible.</p> <p>Thanks for reading! If you have any questions, feel free to reach out at miles_wang [at] college [dot] harvard [dot] edu!</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-forbidden-facts.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/generating-robust-networks/index.html b/blog/2023/generating-robust-networks/index.html
new file mode 100644
index 00000000..8a18bde8
--- /dev/null
+++ b/blog/2023/generating-robust-networks/index.html
@@ -0,0 +1,67 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Training Robust Networks | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Exploring ResNet on TinyImageNet, unveiling brittleness and discovering simple robustment enhancement strategies via hyperparameter optimization"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/generating-robust-networks/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Training Robust Networks",
+      "description": "Exploring ResNet on TinyImageNet, unveiling brittleness and discovering simple robustment enhancement strategies via hyperparameter optimization",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Jackie Lin",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Nten Nyiam",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Training Robust Networks</h1> <p>Exploring ResNet on TinyImageNet, unveiling brittleness and discovering simple robustment enhancement strategies via hyperparameter optimization</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-works">Related Works</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion-and-next-steps">Conclusion and Next Steps</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>In the recent years, deep neural networks have emerged as a dominant force in the field of machine learning, achieving remarkable success across a variety of tasks, from VGG-16 in image classification to ChatGPT in natural language modeling. However, the very complexity that allows deep neural networks to learn and represent complex patterns and relationships can also leave them susceptible to challenges such as overfitting, adversarial attacks, and interpretability. The brittleness of deep neural networks, in particular, poses a significant challenge toward their deployment in real-world applications, especially those where reliability is paramount, like medical image diagnosis and autonomous vehicle navigation. Consequently, it is crucial to develop a better understanding of deep architectures and explore strategies for enhancing robustness. This project focuses specifically on ResNet, a model introduced in 2015 for image classification that is still widely used today. In particular, we study the model’s vulnerability to adversarial perturbations and, subsequently, work through a strategy to enhance its resilience through data augmentation and hyperparameter optimization.</p> <h1 id="related-works">Related Works</h1> <p>ResNet<d-cite key="resnet2015"></d-cite> is a convolutional neural network architecture introduced in 2015 that sought to overcome numerical instability issues in deep networks and simplify the complexity of architecture search. It achieved this by incorporating skip connections, essentially allowing the training procedure to dynamically determine the optimal number of layers for the network. ResNet is trained on the ImageNet dataset<d-cite key="imagenet2014"></d-cite>, a popular benchmark in object category classification with 1,000 classes and millions of images. For our project, we use ResNet-18, a version of the original ResNet-34 model that is 18 layers deep, and TinyImageNet, a smaller version of ImageNet with around 100,000 images and 200 classes. This is largely for computational ease.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/resnet-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/resnet-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/resnet-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/resnet.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 1. ResNet-18 Architecture </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/tinyimagenet-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/tinyimagenet.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 2. Sample Images from TinyImageNet </div> <p>The brittleness of many deep neural networks for computer vision, including ResNet, is well documented. For example, adding a tiny amount of random Gaussian noise, imperceptible to the human eye, can dramatically affect the accuracy and confidence of a network. In fact, we can optimize over the input image to generate small, non-random perturbations that can be used to alter the network’s prediction behavior arbitrarily, a vulnerability that applies to a variety of networks<d-cite key="brittleness1"></d-cite><d-cite key="brittleness2"></d-cite>.</p> <p>In this project, we investigate two small perturbations: adding random Gaussian noise and modifying the colors of a small subset of pixels. We use hyperparameter search to fine-tune ResNet-18, aiming to create a network robust to these perturbations without compromising significantly on accuracy. Specifically, we examine general hyperparameters like batch size, learning rate, number of frozen layers, and more. The ultimate goal is to define a straightforward and resource-efficient strategy for mitigating brittleness that can potentially be extended to other architectures and domains.</p> <h1 id="methodology">Methodology</h1> <h2 id="baseline-model">Baseline Model</h2> <p>The out-of-the-box ResNet18 model is pretrained on ImageNet, achieving about 55% accuracy on the ImageNet validation set. TinyImageNet is a subset of ImageNet with fewer classes; there is a potential need for further fine-tuning of the out-of-the-box model to optimize performance. Thus, we start off by performing a simple hyperparameter grid search over batch size and learning rate. Each model is trained on the TinyImageNet training set, a dataset of 40,000 images (downsampled from 100,000 for computational ease) with 200 classes (roughly uniform class distribution). The baseline model is then selected based on accuracy on the TinyImageNet validation set, a uniformly balanced dataset of 10,000 images.</p> <h2 id="generating-adversarial-perturbations">Generating Adversarial Perturbations</h2> <p>Next, we use gradient descent to create adversarial perturbations. The first perturbation is adding a small amount of Gaussian noise. We try to maximize the probability of the input image belonging to a wrong class (the inverse of the standard cross-entropy classification objective) while also penalizing the magnitude of the noise. This approach is more efficient and controllable compared to attempting to add a random sample of Gaussian noise with the hope of inducing misclassification.</p> <div class="row mt-3"> <div class="col-sm-4"></div> <div class="col-sm-4 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/noise_steps-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/noise_steps-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/noise_steps-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/noise_steps.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-4"></div> </div> <div class="caption"> Figure 3. Noise added to image during each step in a sample gradient descent path for the first perturbation </div> <div class="row mt-3"> <div class="col-sm-3"></div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/noise_examples-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/noise_examples-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/noise_examples-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/noise_examples.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-3"></div> </div> <div class="caption"> Figure 4. Sample images and their Gaussian-perturbed, misclassified versions </div> <p>The other perturbation is randomly selecting a small subset of pixels (0.5%) and adjusting their color until the image is misclassified by the baseline model. A gradient descent approach that maximizes the probability of the input image belong to a wrong class is used to implement this perturbation; however, it is much more sensitive to initialization and can require retries, making it less resource-efficient.</p> <div class="row mt-3"> <div class="col-sm-4"></div> <div class="col-sm-4 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/pixel_steps-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/pixel_steps-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/pixel_steps-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/pixel_steps.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-4"></div> </div> <div class="caption"> Figure 5. Noise added to image during each step in a sample gradient descent path for the second perturbation </div> <div class="row mt-3"> <div class="col-sm-3"></div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/pixel_examples-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/pixel_examples-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/pixel_examples-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/pixel_examples.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-3"></div> </div> <div class="caption"> Figure 6. Sample images and their pixel-perturbed, misclassified versions </div> <p>We generate 11,000 adversarial examples using the Gaussian noise perturbation technique on the training examples that the baseline model correctly classifies. Of these adversarial examples, we use 10,000 of them to augment the training dataset (call it the augmented training set) and reserve 1,000 for hyperparameter optimization (call it the perturbed training set). We also generate 2,000 adversarial examples using the same perturbation technique on the validation examples that the baseline model correctly classifies. 1,000 of these are used for hyperparameter optimization (call it the perturbed validation set) while the rest are saved for out-of-sample evaluation (call it the hold-out validation set).</p> <p>Note that we keep adversarial examples generated from the validation set out of the augmented training set to avoid lookahead bias. We want to avoid allowing the model to gain insights into the characteristics of examples that it will encounter in the validation set (since perturbed images are very similar to the original images), ensuring a more accurate assessment of the model’s robustness and generalization capabilities.</p> <p>Finally, we generate an additional 500 examples using the pixel modification perturbation technique on the validation examples that the baseline correctly classifies (call it the out-of-distribution hold-out set). These examples are reserved for out-of-sample and out-of-distribution evaluation, assessing the model’s ability to perform well on adversarial perturbations it has never seen before.</p> <h2 id="hyperparameter-optimization-to-create-a-more-robust-model">Hyperparameter Optimization to Create a More Robust Model</h2> <p>Equipped with the augmented/additional datasets from the previous step, we start the process of model creation. The relevant metrics for selecting a model are original validation accuracy (derived from the original validation dataset from TinyImageNet), perturbed training accuracy, and perturbed validation accuracy. It is crucial to look at original validation accuracy to ensure that we are not creating robust models by compromising significantly on the original image classification task. In addition, accuracy on the perturbed train dataset tells us how well our model adjusts to the perturbation, while accuracy on the perturbed validation dataset provides an additional perspective by evaluating how well the model generalizes to perturbations on images it has never seen before. The same set of metrics is used in evaluating the final model on out-of-sample datasets, in addition to accuracy on the out-of-distribution hold-out set.</p> <p>We examine how varying four different hyperparameters affects the robustness of ResNet-18. The first hyperparameter involves initializing the model with either weights from the baseline model or the default pre-trained weights. The next hyperparameter is how many layers of ResNet-18 are frozen during the training procedure. The last two hyperparameters are batch size and learning rate. It is important to note that we do not conduct a search over a four-dimensional hyperparameter grid for computational reasons. Instead, we fix some hyperparameters at reasonable default values while we vary over the other hyperparameters. Using the insights gleaned from this hyperparameter search, we proceed to train the final model.</p> <h2 id="comparing-models-via-visualization">Comparing Models via Visualization</h2> <p>Finally, we transform the feature maps generated for an input image into interpretable visualizations to better understand the learned representations within the models. These feature maps capture the activations of learned filters or kernels across different regions of the input images and are the basis for our analysis<d-cite key="simonyan2014"></d-cite>. Each residual block in a ResNet consists of multiple convolutional layers. We register forawrd hooks (a feature in Pytorch that allows us to register a function to be called each time a forward pass is executed through a layer) for each convolutional and linear layer in the network to capture and store the activations produced during the forward pass. The layers in the ResNet model are as follows:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>Layer: conv1, Activation shape: torch.Size([1, 64, 112, 112])
+Layer: layer1.0.conv1, Activation shape: torch.Size([1, 64, 56, 56])
+Layer: layer1.0.conv2, Activation shape: torch.Size([1, 64, 56, 56])
+Layer: layer1.1.conv1, Activation shape: torch.Size([1, 64, 56, 56])
+Layer: layer1.1.conv2, Activation shape: torch.Size([1, 64, 56, 56])
+Layer: layer2.0.conv1, Activation shape: torch.Size([1, 128, 28, 28])
+Layer: layer2.0.conv2, Activation shape: torch.Size([1, 128, 28, 28])
+Layer: layer2.0.downsample.0, Activation shape: torch.Size([1, 128, 28, 28])
+Layer: layer2.1.conv1, Activation shape: torch.Size([1, 128, 28, 28])
+Layer: layer2.1.conv2, Activation shape: torch.Size([1, 128, 28, 28])
+Layer: layer3.0.conv1, Activation shape: torch.Size([1, 256, 14, 14])
+Layer: layer3.0.conv2, Activation shape: torch.Size([1, 256, 14, 14])
+Layer: layer3.0.downsample.0, Activation shape: torch.Size([1, 256, 14, 14])
+Layer: layer3.1.conv1, Activation shape: torch.Size([1, 256, 14, 14])
+Layer: layer3.1.conv2, Activation shape: torch.Size([1, 256, 14, 14])
+Layer: layer4.0.conv1, Activation shape: torch.Size([1, 512, 7, 7])
+Layer: layer4.0.conv2, Activation shape: torch.Size([1, 512, 7, 7])
+Layer: layer4.0.downsample.0, Activation shape: torch.Size([1, 512, 7, 7])
+Layer: layer4.1.conv1, Activation shape: torch.Size([1, 512, 7, 7])
+Layer: layer4.1.conv2, Activation shape: torch.Size([1, 512, 7, 7])
+Layer: fc, Activation shape: torch.Size([1, 1000])
+</code></pre></div></div> <p>After obtaining these activations, we compute the average activation values across the channels (neurons) within a specified layer of interest. This process provides insights into which regions or patterns in the input images contribute significantly to the neuron activations within that layer. We then create heatmap visualizations based on these average activations, highlighting the areas of the input data that have the most substantial impact on the network’s feature detection process. This allows us to gain valuable insights into how the network perceives and prioritizes various features across its layers, aiding in our understanding of the model’s inner workings.</p> <p>We use this approach to compare the baseline model to the final model, aiming to identify significant differences in feature prioritization or the patterns detected at various layers.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/heatmap_sample-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/heatmap_sample.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 7. Heatmap visualization at four different layers when an image of a goldfish is passed into the ResNet </div> <h1 id="results-and-discussion">Results and Discussion</h1> <h2 id="baseline-model-1">Baseline Model</h2> <p>First, we perform a grid search over batch sizes ranging from 128 to 512 and learning rates ranging from 0.0001 to 0.01.</p> <div class="row mt-3"> <div class="col-sm-3"></div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/baseline1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/baseline1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/baseline1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/baseline1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-3"></div> </div> <div class="caption"> Figure 8. Hyperparameter grid for baseline model </div> <p>The results from the first hyperparameter search suggest that conservative learning rates and large batch sizes lead to good performance. Thus, we perform a finer grid search over batch sizes ranging from 256 to 512 and learning rates ranging from 0.00001 to 0.0001.</p> <div class="row mt-3"> <div class="col-sm-3"></div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/baseline2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/baseline2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/baseline2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/baseline2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-3"></div> </div> <div class="caption"> Figure 9. Finer hyperparameter grid for baseline model </div> <p>Based on the results from the second hyperparameter search, we choose our baseline model to be ResNet-18 fine-tuned with a batch size of 256 and a learning rate of 0.00005. The baseline model achieves nearly 73% accuracy on the validation set, which is possibly due to the fact that TinyImageNet has less classes, so classification may be an easier task.</p> <h2 id="effect-of-hyperparameters">Effect of Hyperparameters</h2> <h4 id="number-of-unfrozen-layers">Number of Unfrozen Layers</h4> <p>Next, we evaluate how the number of unfrozen layers (up to 3) affects the robustness of the trained models, whose weights can either be initialized from the baseline model or from the pre-trained/default model (in the diagram below, <code class="language-plaintext highlighter-rouge">is_finetuned=True</code> corresponds to the baseline model).</p> <div class="row mt-3"> <div class="col-sm-7 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_line1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_line1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_line1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/final_line1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-1"></div> <div class="col-sm-4 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 10. Performance of trained models as number of frozen layers and source of initialized weights changes </div> <p>First, we observe that training for more epochs does not improve the metrics of interest. This implies that training for robustness can be computationally efficient. Next, we observe there is a substantial drop in accuracy for the perturbed datasets compared to the original validation dataset, which is to be expected. Pairing the accuracies for the perturbed datasets across hyperparameter combinations, we observe that they are tightly correlated, which implies that our models are effectively adapting to the perturbation.</p> <p>One interesting observation to note here is that accuracies on the perturbed datasets are significantly higher for the model initialized with default weights (27% compared to 10%). An intuitive explanation for this is that we have deliberately engineered a brittle baseline model, so the model is in a region of the optimization landscape characterized by high accuracy but low robustness. If we want achieve high accuracy and high robustness, we may need to start from a less unfavorable position in the optimization landscape.</p> <p>Finally, we observe that freezing some layers can enhance robustness for models initialized from the default weights at the cost of performance on the original task. This aligns with intuition, since allowing all the weights to vary can lead to overfitting, resulting in more brittle networks.</p> <h4 id="batch-size">Batch Size</h4> <p>Next, we evaluate how batch size (ranging from 4 to 512) affects the robustness of the trained models.</p> <div class="row mt-3"> <div class="col-sm-7 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_line2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_line2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_line2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/final_line2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-1"></div> <div class="col-sm-4 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 11. Performance of trained models as batch size and source of initialized weights changes </div> <p>We notice immediately that batch size has a considerable effect on robustness. For both the perturbed training set and the perturbed validation set, accuracies are markedly lower with large batch sizes (around 15%) and higher with small batch sizes (around 70%). As expected, this comes at the expense of lower performance on the original task, with original validation accuracy dropping 10% as the batch size decreases from 512 to 4. Depending on the use case, this may be an efficient tradeoff to make!</p> <h4 id="learning-rate">Learning Rate</h4> <p>Finally, we evaluate how learning rate (ranging from 0.00001 to 0.001) affects the robustness of the trained models.</p> <div class="row mt-3"> <div class="col-sm-7 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_line3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_line3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_line3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/final_line3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-1"></div> <div class="col-sm-4 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/final_bar3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 12. Performance of trained models as learning rate and source of initialized weights changes </div> <p>Like batch size, learning rate significantly impacts robustness. The sweet spot for learning rate in terms of robustness seems to be around 0.00025, with original validation accuracy dropping as the learning rate becomes more conservative; a learning rate of 0.00025 leads to a 3% drop in performance. Like before, this may be a worthwhile tradeoff to make.</p> <h2 id="out-of-sample-evaluation">Out of Sample Evaluation</h2> <p>Using the insights gained from the hyperparameter search, we define the final model with the following hyperparameters:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>is_finetuned=False
+num_unfrozen_layers=3
+batch_size=8
+learning_rate=0.00025
+</code></pre></div></div> <p>Of course, this is likely not the optimal hyperparameter combination, since we were not able to perform a full grid search. The results are as follows:</p> <div class="caption"> Table 1. Performance of final model </div> <table> <thead> <tr> <th>Dataset</th> <th>Accuracy</th> </tr> </thead> <tbody> <tr> <td>Original validation</td> <td>0.522754</td> </tr> <tr> <td>Perturbed training</td> <td>0.569572</td> </tr> <tr> <td>Perturbed validation</td> <td>0.442720</td> </tr> <tr> <td>Hold-out validation</td> <td>0.485621</td> </tr> <tr> <td>Out-of-distribution validation</td> <td>0.489786</td> </tr> </tbody> </table> <p>Original validation, perturbed validation, and hold-out validation accuracy are somewhat lower than the optimistic estimates derived from the hyperparameter search. However, we observe that we are able to achieve nearly 50% accuracy on the out-of-distribution validation set, which contains pixel modification perturbations that the model was never trained on, underscoring the robustness and adaptability of our model.</p> <h2 id="model-comparison">Model Comparison</h2> <p>Lastly, we observe the progression of feature map representations: starting from basic visual elements such as edges and textures in the initial layers, to more complex patterns in intermediate layers, and culminating in sophisticated, high-level feature representations in the deeper layers. This layered evolution is integral to the network’s ability to analyze and recognize complex images.</p> <p>When comparing the baseline model to the final model, there are very few (if any) differences in the initial layers. By the intermediate and deeper layers, there are clear differences in which aspects of the images have the greatest activation. This observation aligns with the foundational principles of convolutional neural networks, where initial layers tend to be more generic, capturing universal features that are commonly useful across various tasks. As a result, the similarity in the initial layers between the baseline and final models suggests that these early representations are robust and essential for basic image processing, irrespective of specific model optimizations or task-focused training.</p> <p>However, the divergence observed in the intermediate and deeper layers is indicative of the specialized learning that occurs as a result of hyperparameter tuning in the final model. These layers, being more task-specific, have adapted to capture more complex and abstract features relevant to the particular objectives of the final model.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/comparison_original-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/comparison_original-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/comparison_original-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/comparison_original.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 13. Comparison of the heatmaps for both models when passed in an image of a frog </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed-1400.webp"/> <img src="/staging/assets/img/2023-11-09-generating-robust-networks/comparison_perturbed.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 14. Comparison of the heatmaps for both models when passed in a perturbed image of a frog </div> <h1 id="conclusion-and-next-steps">Conclusion and Next Steps</h1> <p>In this project, we have undertaken a comprehensive exploration of enhancing ResNet through data augmentation with adversarial examples and straightforward hyperparameter tuning. Key highlights include the computational efficiency and simplicity of the employed technique, the resulting model’s ability to adapt to both seen and unseen perturbations, and the capacity to finely control tradeoffs between robustness and accuracy thorugh the manipulation of diverse hyperparameters.</p> <p>There are many potential avenues for future exploration. One prospect involves expanding and refining the discussed techniques by continuing to explore the hyperparameter space, considering additional parameters or refining the search range. Additionally, applying this analysis to different architectures and domains could reveal further insights. Finally, broadening the scope of perturbations presents another avenue, offering the potential to develop more practical models tailored for real-world applications.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-generating-robust-networks.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/graph-articulated-objects/index.html b/blog/2023/graph-articulated-objects/index.html
new file mode 100644
index 00000000..cd838a60
--- /dev/null
+++ b/blog/2023/graph-articulated-objects/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Graph Articulated Objects | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Pre-trained large vision-language models (VLMs), such as GPT4-Vision, uniquely encode relationships and contextual information learned about the world through copious amounts of real-world text and image information. Within the context of robotics, the recent explosion of advancements in deep learning have enabled innovation on all fronts when solving the problem of generalized embodied intelligence. Teaching a robot to perform any real-world task requires it to perceive its environment accurately, plan the steps to execute the task at hand, and accurately control the robot to perform the given task. This project explores the use of vision-language models to generate domain descriptions. These can be used for task planning, closing the gap between raw images and semantic understanding of interactions possible within an environment."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/graph-articulated-objects/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Graph Articulated Objects",
+      "description": "Pre-trained large vision-language models (VLMs), such as GPT4-Vision, uniquely encode relationships and contextual information learned about the world through copious amounts of real-world text and image information. Within the context of robotics, the recent explosion of advancements in deep learning have enabled innovation on all fronts when solving the problem of generalized embodied intelligence. Teaching a robot to perform any real-world task requires it to perceive its environment accurately, plan the steps to execute the task at hand, and accurately control the robot to perform the given task. This project explores the use of vision-language models to generate domain descriptions. These can be used for task planning, closing the gap between raw images and semantic understanding of interactions possible within an environment.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Anirudh Valiveru, Ram Goel",
+          "authorURL": "https://anirudhv27.github.io/",
+          "affiliations": [
+            {
+              "name": "CSAIL, MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Graph Articulated Objects</h1> <p>Pre-trained large vision-language models (VLMs), such as GPT4-Vision, uniquely encode relationships and contextual information learned about the world through copious amounts of real-world text and image information. Within the context of robotics, the recent explosion of advancements in deep learning have enabled innovation on all fronts when solving the problem of generalized embodied intelligence. Teaching a robot to perform any real-world task requires it to perceive its environment accurately, plan the steps to execute the task at hand, and accurately control the robot to perform the given task. This project explores the use of vision-language models to generate domain descriptions. These can be used for task planning, closing the gap between raw images and semantic understanding of interactions possible within an environment.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#project-background">Project Background</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#experiments-and-findings">Experiments and Findings</a></div> <div><a href="#future-work-and-implications">Future Work and Implications</a></div> </nav> </d-contents> <h2 id="project-background">Project Background</h2> <p>Recent advancements in generative AI have transformed robotic capabilities across all parts of the stack, whether in control, planning, or perception. As self-driving cars roll out to public roads and factory assembly-line robots become more and more generalizable, embodied intelligence is transforming the way that humans interact with each other and automate their daily tasks.</p> <p>Across the robotic manipulation stack, we are most interested in exploring the problem of scene representation; using the limited sensors available, how might a robot build a representation of its environment that will allow it to perform a wide range of general tasks with ease? While developments in inverse graphics like NeRF have given robots access to increasingly rich geometric representations, recent work in language modeling has allowed robots to leverage more semantic scene understanding to plan for tasks.</p> <h3 id="introduction-to-task-planning">Introduction to Task Planning</h3> <p>In robotics, the term <strong>task planning</strong> is used to describe the process of using scene understanding to break a <em>goal</em> down into a sequence of individual <em>actions</em>. This is in contrast with <em>motion planning</em>, which describes the problem of breaking a desired <em>movement</em> into individual configurations that satisfy some constraints (such as collision constraints). While simply using motion planning to specify a task is necessary for any generalized robotic system, <em>task planning</em> provides robots with a <em>high-level</em> abstraction that enables them to accomplish multi-step tasks.</p> <p>Take the problem of brushing one’s teeth in the morning. As humans, we might describe the steps necessary as follows:</p> <ol> <li>Walk to the sink.</li> <li>Grab the toothbrush and toothpaste tube.</li> <li>Open the toothpaste tube.</li> <li>Squeeze toothpaste onto brush.</li> <li>Brush teeth.</li> <li>Rinse mouth.</li> <li>Clean toothbrush.</li> <li>Put everything back.</li> </ol> <h3 id="planning-domain-definition-language-pddl-explained">Planning Domain Definition Language (PDDL) Explained</h3> <p>Creating a task plan is a trivial task for humans. However, a computer must use a state-space search algorithm like <em>A* search</em> to plan a sequence of interactions from a <em>start state</em> to a desired <em>goal state</em>. Doing so requires us to define a standard that formally specifies all relevant <em>environment states</em>, along with the <em>preconditions</em> and <em>effects</em> of all possible transitions between two states.</p> <p>The Planning Domain Definition Language (PDDL) was invented to solve this problem. Description languages like PDDL allow us to define the space of all possible environment states using the states of all entities that make up the environment. Environments are defined as a task-agnostic <em>domain file</em>, while the <em>problem file</em> defines a specific task by specifying a desired <em>start</em> and <em>end</em> state.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/pddl-explained-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/pddl-explained-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/pddl-explained-1400.webp"/> <img src="/staging/assets/img/pddl-explained.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 1: This figure above breaks down a simple PDDL domain file into its constituent components. A PDDL domain generally consists of predicates, which describe the state, and actions, which specify state transitions. </div> <p>Despite task planning’s utility, however, there is one major drawback; this approach to planning requires the robot to have a <em>detailed PDDL domain file</em> that accurately represents its environment. Generating this file from perception requires not only a semantic understanding of all objects in a space, but also of all possible interactions between these objects, as well as all interactions that the robot is afforded within the environment. Clearly, there is a major gap between the task-planning literature and the realities of upstream perception capabilities.</p> <h2 id="related-work">Related Work</h2> <p>The use of LLMs in robotic planning and reasoning has exploded in the past few years, due to the promise of leveraging a language model’s internal world understanding to provide more information for planning. One such work is LLM+P<d-cite key="liu2023llm+"></d-cite>, which combines an LLM with a classical planner to solve a given problem specified in natural language, using PDDL as an intermediate representation. LLM+P works by converting the description into a PDDL problem representation, running a classical planning algorithm to find a solution, and then computing the sequence of actions back into a natural language description interpretable by humans. Importantly, LLM+P demonstrates that using an LLM to output a PDDL representation can be a viable strategy in solving planning problems that are specified to a robot. However, there are a few limitations. For one, LLM+P assumes that a relevant domain file is already provided to the robot, specifying all entities and their relationships within the environment’s context. While domain files are generally carefully crafted by hand, vision-language models can automate this process.</p> <p>LLMs have also been used to solve plans directly, to varying levels of success. Works like SayCan<d-cite key="ahn2022can"></d-cite> and LLM-Planner<d-cite key="song2023llm"></d-cite> use the LLM as a planning engine directly, circumventing the need to use a traditional high-level planner completely. SayCan, in particular, uses a combination of language-grounded instructions and task affordances that indicate the robot’s ability to execute a given task, using language to determine the most viable skill to execute from a set of predefined skills. These bodies of work have greatly enabled the ability of robots to parse, understand, and execute instructions given to them by their operators as natural language. Particularly, an LLM’s ability to break a problem down into several constituent steps is critical to enabling long-horizon task planning with multiple steps.</p> <p>Language is an increasingly promising modality for robots to operate in, due to the ubiquity of relevant language data to learn real-world entity relations from the internet. However, foundation models that integrate vision and robot-action modalities enable even stronger semantic reasoning. Google’s Robot Transformer 2 (RT-2)<d-cite key="brohan2023rt"></d-cite>, for example, is a recent work that performs perception, planning, and control all in a single neural network, leveraging internet-scale data. One major drawback of visuomotor policies, such as that employed by RT-2, is that we lose interpretability of a robot’s internal representation.</p> <p>Nonetheless, multi-modal foundation models have proven to be a useful tool across the spectrum of robotic planning. Our project takes inspiration from the above works in LLMs for planning and extends the idea to domain-generation, allowing task-planners to work in real-world scenarios.</p> <p>The rapid advancement of LLMs and vision-language models open up a world of possibilities in closing this gap, as robotic perception systems may be able to leverage learned world understanding to generate PDDL files of their own to use in downstream planning tasks. This project aims to investigate the question: can VLMs be used to generate accurate PDDL domains?</p> <h2 id="experimental-setup">Experimental Setup</h2> <p>To investigate this, we decided to explore this problem by testing the capabilities of VLMs on various tasks and levels of prior conditioning. This allows us to explore the problem on two axes: domain complexity and amount of information provided as a prior to the VLM. Each of these axes are chosen to progressively increase the complexity of the domain being explored, while also progressively increasing the amount of <em>information</em> available. Designing our experiments like this allows us to understand the importance of <em>information</em> and <em>domain complexity</em> and how they affect the overall results.</p> <p>Due to ease of access, we decided to use OpenAI ChatGPT’s <em>GPT4-Vision</em> functionality to run our experiments. A more comprehensive ablation may analyze these experiments across a wider range of VLMs.</p> <h3 id="domains-of-interest">Domains of Interest</h3> <p>Within the context of task planning for generalizable robotics, the problem of cooking in a kitchen setting is a fascinating problem because of the combination of their usefulness and the high dimensionality and discretization of kitchen tasks. As a result, kitchen setups like cooking, cleaning, and cutting ingredients are great ways to understand task-planning, and are the domains that we chose to study in this work.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/image-examples-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/image-examples-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/image-examples-1400.webp"/> <img src="/staging/assets/img/image-examples.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 2: A summary of domains that we are interested in studying. The three domains increase in complexity, which allows us to analyze the effect of complexity on the VLM's effectiveness. </div> <p>The three domains used in our study are:</p> <ul> <li><strong>Cut</strong>: Bagel + utensils used for cutting ingredients</li> <li><strong>Cook</strong>: Everything in Cut + a pan, spatula, and a stove</li> <li><strong>Clean</strong>: Everything in Clean + a soap bottle, a sink, and a sponge</li> </ul> <p>Our handcrafted “ground-truth” domain files are designed to support the target tasks of <em>cutting</em> a bagel, <em>cooking</em> a sliced bagel, and <em>cleaning</em> utensils, respectively. Ideally a good PDDL file generated is one where these tasks are supported.</p> <h3 id="prompting-strategies">Prompting Strategies.</h3> <p>We also experimented with four different prompting strategies, with each strategy providing progressively more information to the VLM for its PDDL generation task. All prompts provided to the VLM consist of the target image, along with a text-based prompt meant to guide the VLM towards a more accurate PDDL representation.</p> <p>The strategies are as follows, along with examples used by our experiment for the cut domain. Text that was added progressively to the prompt is <strong><em>bolded</em></strong>:</p> <ol> <li>Raw Generation: Image + generic prompt <ul> <li><em>You are a robot that needs to execute task planning in the setup shown in the image. Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.</em></li> </ul> </li> <li>Prompt 1 + describe each object in the scene <ul> <li><em>You are a robot that needs to execute task planning in the setup shown in the image. <strong>This image includes a bagel, a plate, a bowl, and a knife.</strong> Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.</em></li> </ul> </li> <li>Prompt 2 + describe the target task <ul> <li><em>You are a robot that needs to execute task planning <strong>to cut the bagel</strong> in the setup shown in the image. This image includes a bagel, a plate, a bowl, and a knife. Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.</em></li> </ul> </li> <li>Prompt 3 + explain object relations in detail <ul> <li><em>You are a robot that needs to execute task planning to cut the bagel in the setup shown in the image. This image includes a bagel, a plate, a bowl, and a knife. <strong>In order to cut the bagel, one must use the knife and place the bagel and knife on the plate beforehand. I can place the bagel on the plate or the bowl, and cut the bagel using the knife.</strong> Given the image, please generate a Planning Description Domain Language (PDDL) domain file that describes the scene.</em></li> </ul> </li> </ol> <h3 id="evaluation-metric-embedding-cosine-similarity">Evaluation Metric: Embedding Cosine Similarity</h3> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/evaluation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/evaluation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/evaluation-1400.webp"/> <img src="/staging/assets/img/evaluation.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 3: Our evaluation process. We used a vector-embedding-space cosine similarity metric to conduct our analysis, using a handcrafted PDDL file as input. </div> <p>Since several different PDDL domains can be used to represent the same set of actions and predicates, the task of generating PDDL files is quite subjective. Since generating PDDL tasks is an often-tedious task that humans must do themselves to represent any given domain, we evaluate each VLM output based on its similarity to real PDDL domains handcrafted manually. After asking the VLM to generate a PDDL file, both the <em>target</em> and the <em>generated</em> domain descriptions are embedded using the <strong>Longformer: Long Document Transformer</strong><d-cite key="beltagy2020longformer"></d-cite> model, before finally computing the cosine similarity between the two embeddings. We decided to use <strong>Longformer</strong>, instead a more standard model like <em>LLAMA 2</em><d-cite key="touvron2023llama"></d-cite> or <em>BERT</em><d-cite key="tenney2019bert"></d-cite>, due to the long context-length afforded to us by the model; a more comprehensive study would require studying these effects on a wider range of embedding models to more accurately understand the text-to-text similarity between the two domains.</p> <p>Note that this cosine similarity in the embedding space is quite a coarse metric to evaluate our outputs for a couple of reasons. The primary concern with this evaluation approach has to do with the transferability between PDDL files, which are specified in a LISP-like syntax, and natural language documents, which Longformer was trained to embed. In this study, we assume that such an embedding model <em>can</em> be used to make such a comparison, and discuss our study accordingly.</p> <p>Aside from this, PDDL’s structure also provides several keywords that are commonly used by all PDDL files, such as <em>action</em>, <em>predicate</em>, and <em>preconditions</em>. In order to handle these, we decided to simply remove all instances of these words from both the <em>target</em> and the <em>generated</em> PDDL files, in order to mitigate the effect of the similarity between these tokens.</p> <h2 id="results">Results</h2> <p>After experimenting on a wide range of complex environments with various prompting strategies, it seems that VLMs perform quite well for the task of generating PDDLs from image and text conditioning. We measured the similarity of the ground truth PDDL file with each image and experiment’s generated PDDL file. To quantitatively measure similarity, we used the cosine similarity metric on the embeddings of the masked pieces of text using Longformer<d-cite key="beltagy2020longformer"></d-cite>. We did not use BERT<d-cite key="tenney2019bert"></d-cite> due to the context length not being long enough for our PDDL files. Recall that we masked out certain frequently appearing words in PDDL files so as to not inflate the similarity due to this commonality.</p> <p>The exact generated PDDL files can be found at <a href="https://github.com/anirudhv27/final-project-dl/tree/main/experiments.">this link</a></p> <p>First, we will qualitatively analyze the generated words in each of the three categories of the PDDL files: types, predicates, and actions. Then, we will also provide quantitative metrics that measure similarity directly with the ground truth PDDL files that we wrote.</p> <h3 id="types">Types</h3> <p>Types are the first part of PDDL files. They describe the various sorts of objects that appear in the image. For example, in the “cut” image, the generated types are “utensil, plate, food”. Note that the types often compress similar sorts of objects, e.g. both spatulas and knives fall under the type “utensil”. Type generation is somewhat inconsistent, since types are not strictly required by PDDL files to exist, which could contribute towards why certain generated PDDL files do not have a types section at all.</p> <h3 id="predicates">Predicates</h3> <p>Predicates in the PDDL files are descriptive phrases that describe distinct parts of the scene, at a given time. For example, in the “cut” image, experiment 4 has the following predicates “(plate-empty), (bowl-empty), (bagel-on-plate), (bagel-on-bowl), (knife-on-plate), (bagel-cut)”. Note that these are not precisely representative of the current state of the image, but rather represent what states could also appear in the future, e.g. “(bagel-cut)”, even though the bagel is not yet cut. The accuracy of the generated predicate set is surprisingly accurate, regardless of which experiment we use.</p> <p>It seems that all four experiments generate approximately the same predicate set. For the “cut” image, all of the predicates generally have the objects “bagel”, “knife”, “plate”, etc., and sometimes where they are placed relative to each other. In the later “cook” and “clean” images, there are also predicates conditioning on whether the bowl/plate is clean or not. In particular, the generated predicates for Experiment 1 – where we do not tell the VLM the task – also make sense with respect to the inferred task! This evidence suggests that the generated predicates match the planned task, thus implying that the VLM is able to learn the task quite well just based on the image.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/predicate-example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/predicate-example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/predicate-example-1400.webp"/> <img src="/staging/assets/img/predicate-example.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 4: The correspondence between the inputted image and the generated predicates. Note that the predicates are not descriptive of just the current state, but also are descriptive of the later potential states based on the inferred task. </div> <h3 id="actions">Actions</h3> <p>Similar to the predicate generation, the action generation is extremely accurate. The various sequences of predicted actions make sense for the given images and conditioning. For example, one of the generated action sequences from Experiment 1 is:</p> <blockquote> <p>(:action prepare-sandwich :parameters (?b - food ?p - container) :precondition (and (contains ?p ?b) (is-clean ?p)) :effect (and (inside ?b ?p) (not (empty ?p))) )</p> </blockquote> <p>This is a very detailed sequence of actions, which also makes sense – in order to prepare a sandwich, the generated PDDL file notices we need the food and the container, and then checks if it is clean and not empty.</p> <p>Again, the results from Experiment 1 compared to the later experiments which have more textual conditioning are extremely similar, indicating that most of the information the VLM collects is from the image. Our added conditioning does not seem to improve generation of the action sequences much more.</p> <h3 id="quantitative-analysis-with-cosine-similarity">Quantitative Analysis with Cosine Similarity</h3> <p>Along with qualitative analysis of each part of the PDDL file, we also performed a holistic analysis of the entire PDDL file that compares similarity with our handcrafted ground truth PDDL file. We measured the cosine similarity between the two PDDL files, for each experiment in each image. Due to the general format of PDDL files, certain words appear at the same places many times. Hence, we masked these words out, in order to not inflate the similarity in a superficial manner.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/cosine-similarities-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/cosine-similarities-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/cosine-similarities-1400.webp"/> <img src="/staging/assets/img/cosine-similarities.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Table 1: Cosine similarities using the Longformer embedding model. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/bar-chart-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/bar-chart-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/bar-chart-1400.webp"/> <img src="/staging/assets/img/bar-chart.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 5: Bar chart detailing cosine similarity in all tested scenarios. While there are clear trends between prompting strategies, all cosine similarity scores tend to hover around 98%. </div> <p>As we can see, our methods performed quite well, with masked cosine similarity consistently above 0.98. This makes sense qualitatively as well, since as discussed above, the VLM generated types, predicates, and actions that made sense.</p> <p>One of the most noteworthy aspects of the above data is that according to this metric:</p> <ul> <li>Experiments 1-3 all perform similarly, with some doing better than others in different images.</li> <li>Experiment 4 consistently performs worse than Experiments 1-3.</li> </ul> <p>This is surprising, since we would expect that more conditioning implies better performance. In Experiment 4, we added certain conditioning of the form of textual relationship between objects in the image. This result leads us to the conclusion that adding this sort of conditioning is not helpful for PDDL file generation, and is in fact negatively correlated with performance. Previous analysis has implied that the VLM learns extremely well from the image alone, and this result suggests that in fact it is better to let the VLM learn only from the image, without adding too much of our own conditioning.</p> <h2 id="conclusion-limitations-and-future-work">Conclusion: Limitations and Future Work</h2> <p>Our work analyzes the potential of the recent advances in VLMs for the purposes of robotic task planning. By creating a systematic set of experiments over increasingly complex images, we were able to showcase the power of VLMs as a potentially very powerful tool for general task planning problems. The accurate generation of PDDL files based on only the images shows us that VLMs learn from images extremely well, without the need for extra textual conditioning. In fact, we noticed that providing too much conditioning actually can decrease performance, thus further suggesting that VLMs learn best from images. This result is promising for generalizing to the greater context of robotic task planning, since vision is one of the most prominent ways in which robots dynamically task plan when navigating real-world environments. Harnessing the power of VLMs could prove to be the future of robotic task planning.</p> <p>There are a couple of limitations in our work, which have the potential for future exploration. In order to test the true utility of the generated domain files, we would need to also generate problem PDDL files, after which we could run the problem on the domain to test the robustness of the domain. The qualitative and quantitative metrics in our study heavily imply that our domain file is valid, by testing on ground truth PDDL files. However, a more comprehensive study could also concurrently generate problem files, which are tested on the generated domain file. Perhaps a method could be made which alternatively trains both the problem and domain files by iteratively testing the problem on the domain, similar to the idea of a Conditional Generative Adversarial Network (GAN)<d-cite key="mirza2014conditional"></d-cite>. Another limitation is that we only tested on one VLM architecture. A more comprehensive ablation may analyze these experiments across a wider range of VLMs.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-16-project-proposal.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/graphs-transformers/index.html b/blog/2023/graphs-transformers/index.html
new file mode 100644
index 00000000..fb4a0957
--- /dev/null
+++ b/blog/2023/graphs-transformers/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Graph Transformers | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A study of Transformers' understanding of fundamental graph problems, where we propose a new, tailored architecture highlighting the model's potential in graph-related tasks."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/graphs-transformers/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Graph Transformers",
+      "description": "A study of Transformers' understanding of fundamental graph problems, where we propose a new, tailored architecture highlighting the model's potential in graph-related tasks.",
+      "published": "December 11, 2023",
+      "authors": [
+        {
+          "author": "Tristan Magnin",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Gabriel Deo",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Graph Transformers</h1> <p>A study of Transformers' understanding of fundamental graph problems, where we propose a new, tailored architecture highlighting the model's potential in graph-related tasks.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#motivation-project-outline">Motivation & Project outline</a></div> <div><a href="#introduction-literature-review">Introduction & Literature review</a></div> <div><a href="#graph-transformer-model-design">Graph Transformer Model Design</a></div> <ul> <li><a href="#vanilla-transformer">Vanilla Transformer</a></li> <li><a href="#tokenization-approach-and-positional-encoding">Tokenization Approach and Positional Encoding</a></li> <li><a href="#attention-in-graph-transformers-the-necessity-of-a-skip-connection">Attention in Graph Transformers - the Necessity of a Skip-Connection</a></li> <li><a href="#model-architecture-overview">Model Architecture Overview</a></li> </ul><div><a href="#methodology-for-training-and-evaluation">Methodology for Training and Evaluation</a></div> <ul> <li><a href="#constructing-the-dataset">Constructing the Dataset</a></li> <li><a href="#training-protocols">Training Protocols</a></li> <li><a href="#metrics-and-evaluation-criteria">Metrics and Evaluation Criteria</a></li> </ul><div><a href="#results-and-comparative-analysis">Results and Comparative Analysis</a></div> <ul> <li><a href="#gnn-performance">GNN performance</a></li> <li><a href="#mlp-performance">MLP Performance</a></li> <li><a href="#transformer-performance">Transformer performance</a></li> <li><a href="#transformer-with-attention-mask-positional-encoding-skip-connection">Transformer with Attention Mask, Positional Encoding & Skip Connection</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="motivation--project-outline">Motivation &amp; Project outline</h2> <p>Our project aims to advance the understanding of Transformers in graph theory, focusing on the Shortest Path Problem, a cornerstone of graph theory and Dynamic Programming (DP). We introduce a custom Graph Transformer architecture, designed to tackle this specific challenge. Our work begins with a theoretical demonstration that the shortest path problem is Probably Approximately Correct (PAC)-learnable by our Graph Transformer. We then empirically test its performance, comparing it against simpler models like Multilayer Perceptrons (MLPs) and sophisticated benchmarks like Graph Neural Networks (GNNs). This study seeks to validate the Graph Transformer as an effective tool for solving fundamental graph-based problems, and “simple” DP problems in particular.</p> <div class="row align-items-center mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/erdos_renyi.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/erdos_renyi.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/erdos_renyi.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/erdos_renyi.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/transformer-architecture-diagram.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption mt-3"> Left: example of an Erdős–Rényi graph, right: original Transformer architecture </div> <h2 id="introduction--literature-review">Introduction &amp; Literature review</h2> <p>Transformers have shown significant effectiveness in domains that require an understanding of long-range dependencies and contextual information. Originally prominent in natural language processing<d-cite key="devlin2018bert"></d-cite>, their applications have expanded to include areas such as computer vision<d-cite key="dosovitskiy2020image"></d-cite> and speech recognition<d-cite key="wang2020transformer"></d-cite>. Recent explorations have also delved into Transformers’ abilities in mathematical tasks like arithmetic, GCD computations, and matrix operations<d-cite key="DBLP:journals/corr/abs-2112-01898"></d-cite><d-cite key="charton2023transformers"></d-cite><d-cite key="lample2019deep"></d-cite>, shedding light on the learning mechanisms of these models.</p> <p>A particular area of interest within these applications is graph problems. Recent research has assessed Transformers’ performance in this domain<d-cite key="DBLP:journals/corr/abs-2106-05234"></d-cite> and explored adapting the Transformer architecture to fit the context of graph problems<d-cite key="DBLP:journals/corr/abs-1905-12712"></d-cite>. However, much of the current research does not focus on Transformers’ comprehension of fundamental graph challenges, such as the shortest path problem. Notably, in the studies mentioned above, the shortest path is often directly input as a matrix, with each entry $i,j$ representing the shortest path distance between nodes $i$ and $j$. Our study will investigate Transformers’ performance on “raw” graph data, where only edge weights, the adjacency matrix, and positional encodings are provided. The Transformer will be trained to predict the shortest path from a designated node 0 to all other nodes, in the form of an $n\times1$ vector<d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite>.</p> <p>We will demonstrate that, by adapting the Transformer architecture for our purposes, the shortest path problem and other “simple” dynamic programming (DP) challenges are Probably Approximately Correct (PAC)-learnable by the model. Our approach is based on the framework developed for GNNs<d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite> and adapted to our Graph Transformer.</p> <h2 id="graph-transformer-model-design">Graph Transformer Model Design</h2> <p>Let’s dive into our Graph Transformer model, drawing inspiration from the classical Transformer architecture.</p> <h3 id="vanilla-transformer">Vanilla Transformer</h3> <p>We first recall the vanilla architecture of Transformers, described in <d-cite key="DBLP:journals/corr/VaswaniSPUJGKP17"></d-cite>, which is fundamentally built on two key ideas: tokenization and attention, both of which we adapt for graph data.</p> <p>In our context, think of tokens like the attributes of nodes in Graph Neural Networks (GNNs). These tokens are packets of information, allowing transformers to handle diverse data types, including graphs. The process begins with a token net, which is a sequence of linear and non-linear layers. This is somewhat equivalent to the alternating aggregation and combination stages in a GNN, where each node processes and integrates information from its neighbors.</p> <p>The real game-changer in transformers, however, is the attention mechanism, layered on top of the token net. This mechanism involves a set of matrices known as query, key, and value. These matrices enable tokens to use information from the nodes they’re paying attention to, in order to learn and update their own values.</p> <p>Here’s a simple way to visualize it. Imagine each token in the transformer scanning the entire graph and deciding which nodes (or other tokens) to focus on. This process is driven by the query-key-value matrices. Each token creates a ‘query’, which is then matched against ‘keys’ from other tokens. The better the match, the more attention the token pays to the ‘value’ of that other token. Mathematically, this can be expressed as:</p> \[Attention(Q, K, V) = softmax \left(\frac{QK^T}{\sqrt{d_k}} \right)V\] <p>In this formula, $ Q $, $ K $, and $ V $ represent the query, key, and value matrices, respectively. The term $ \sqrt{d_k} $ is a scaling factor based on the dimensionality of the keys.</p> <p>While the process in Graph Neural Networks (GNNs) might seem similar, there’s an essential distinction to be made. In GNNs, the flow of information is local, with nodes exchanging information with their immediate neighbors. However, in our Graph Transformer model, we employ self-attention to potentially allow each node (or token) to consider information from the entire graph. This includes nodes that might be several steps away in the graph structure.</p> <p>One axe of our research is then to explore the potential benefits - or drawbacks - of this global perspective, and seeing how leveraging global information compares to the traditional local feature aggregation used in GNNs, in the context of graph theory challenges like the Shortest Path Problem. By enabling each node to have a broader view of the entire graph, we’re exploring how this approach influences the prediction quality (Accuracy) and the efficiency of path computations, specifically focusing on the speed at which the network adapts and learns (Training Efficiency).</p> <p>A full Transformer will be a sequence of self-attention layers and MLPs. We now turn to the specifics of how we implement it, starting with tokenization.</p> <div class="row justify-content-center align-items-center mt-3"> <div class="col-md-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/transformer_DL-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/transformer_DL-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/transformer_DL-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/transformer_DL.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption mt-3"> Vanilla Transformer architecture (Source: 6.S898 Lecture) </div> <h3 id="tokenization-approach-and-positional-encoding">Tokenization Approach and Positional Encoding</h3> <p>The first step in our model is converting graph information (including nodes, edges, and their weights) into a format suitable for transformers. We’ve developed a method to encode this graph data into tokens.</p> <p>Each token in our system is a vector with a length of $2n$. Here, $n$ represents the number of nodes in the graph. Half of this vector contains binary values indicating whether a connection exists to other nodes (1 for a connection, 0 for no connection). The other half of the vector holds the weights of these edges.</p> \[\text{Token} = [\text{Edge Connections (Binary Values)}, \text{Edge Weights}] = [\mathbf{a}, \mathbf{w}]\] <p>This structure seems sufficient to capture the essential structure of the graph. But, to further aid the transformer in identifying the shortest path, we can introduce additional local information into these tokens through positional encoding. Encoding positional information of the nodes has already be achieved in various ways, for example, using graph kernels <d-cite key="DBLP:journals/corr/abs-2106-05667"></d-cite>. Here we choose a simpler onehot encoding method : we assign an arbitrary rank to each node and include an indicator vector within the token. This vector, also of size $n$, points to the node’s position. With this addition, each token becomes a vector of size $3n$:</p> \[\text{Token} = [\text{Edge Connections}, \text{Edge Weights}, \text{Positional Encoding}] = [\mathbf{a}, \mathbf{w}, \mathbf{1}]\] <p>We plan to rigorously test both approaches as part of our diverse model lineup.</p> <h2 id="attention-in-graph-transformers---the-necessity-of-a-skip-connection">Attention in Graph Transformers - the Necessity of a Skip-Connection</h2> <p>The Query-Key-Value (QKV) Attention Mechanism is a pivotal aspect of how Graph Transformers can effectively learn the Shortest Path Problem. Building on the insights from Dudzik et al. <d-cite key="dudzik2022graph"></d-cite>, who illustrated the capacity of GNNs to tackle Dynamic Programming challenges, including the Shortest Path Problem, we delve into how Transformers might achieve similar feats using attention.</p> <p>Recall the Bellman-Ford algorithm’s key update step for the Shortest Path Problem, expressed as:</p> \[d_i^{k+1} = \min_j d_j^k + w_{i, j}\] <p>In this context, our hypothesis is that Transformers could replicate this dynamic through the attention mechanism, which we prove mathematically in Appendix A. The key observation is that the softmax layer would be able to mimic the $ \min $ operator, as long as the query-key cross product is able to retrieve $d_j + w_{i,j}$ for all nodes $i,j$. Intuitively, this can be done if each query token $i$ picks up on the node’s positional encoding, and each key token $j$ on the node’s current shortest path value $d_j$ and edges values $w_j$. Taking the cross product of the onehot encoding $i$ with edges values $w_j$ would then return exactly $w_{i,j}$ for all $i,j$. To select only seighboring connections, we’ll use an appropriated attention mask.</p> <p>However, there is a catch. The learning process might not fully grasp the Bellman-Ford update using the attention mechanism alone. After the attention picks up on the correct minimizer neighbour token $j$, it needs to update the the current node $i$’s values. The Bellman-Ford update isn’t a simple operation on the tokens like a sum. For instance, we only want $d_i^k$ to change, and we want to update it with the correct $w_{i,j}$. This is where the idea of incorporating a skip-connection mechanism comes into play. By concatenating tokens $i$ (the input) and $j$ (the attention’s output) before feeding them to the MLP layer following the self-attention layer, we could effectively emulate the Bellman-Ford update process.</p> <p>Overall, combining attention and skip-connection could ensure our Graph Transformer can comprehensively learn and apply the Bellman-Ford logic to solve the Shortest Path Problem. We offer a mathematical proof of this concept in Appendix A, using a slightly different tokenization method.</p> <p>Additionally, it’s worth considering that our Graph Transformer might be learning an entirely distinct logical process for solving the Shortest Path Problem. Still, proving that such a logic is within the model’s grasp underlines the model’s versatility in addressing some graph-related and/or dynamic programming challenges. We’ll tackle this notion in the next part about learnability and algorithmic alignment.</p> <div class="row justify-content-center align-items-center mt-3"> <div class="col-md-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/skip_connection-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/skip_connection-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/skip_connection-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/skip_connection.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption mt-3"> Graph Transformer - Skip connection </div> <h3 id="model-architecture-overview">Model Architecture Overview</h3> <p>In this section, we revisit the architecture of our Graph Transformer, which is an adaptation of the standard Transformer model. Our model is composed of a sequence of self-attention layers and MLPs, each augmented with a skip-connection. The tokens in our model encapsulate both edge connections and their corresponding weights, alongside positional encoding.</p> <p>The most notable feature of our architecture is the introduction of the attention mask. This mask restricts the attention of each token to its immediate neighbors, aligning our approach more closely with the local message-passing process typical in GNNs. The inclusion or not of this feature and the resultant effect in our architecture marks the crucial difference between the global vs. local token aggregation methodologies that we discussed earlier.</p> <h2 id="a-measure-of-learnability">A measure of learnability</h2> <p>Our project falls into the wider research interest in the interaction between network structures and specific tasks. While basic and common structures such as MLPs are known to be universal approximators, their effectiveness varies based on the amount of data required for accurate approximations. Notably, their out-of-sample performance often lags behind task-specific architectures, such as Graph Neural Networks (GNNs) in graph-related problems, which highlights the issue of a network’s generalization capacity.</p> <p>To evaluate theoretically the ability of transformers to effectively learn the Shortest Path Problem and similar challenges, we position our study within the framework of PAC (Probably Approximately Correct) Learning. This framework allows us to explore the concept of algorithmic alignment. Algorithmic alignment is here crucial as it pertains to a model’s capability to emulate a given algorithm with a minimal number of modules, each of relatively low complexity. Such approach has already been taken by Xu et. al <d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite> to give a better understanding of the reasoning process of complex networks like GNNs, and it is instrumental in assessing the adaptability and efficiency of transformers in learning and solving complex graph-based tasks.</p> <h3 id="algorithmic-alignment">Algorithmic Alignment</h3> <p>In this section, we delve into a series of definitions to establish the mathematical groundwork of our investigation.</p> <p>We first recall a definition of the PAC-Learnibility:</p> <h4 id="definition-pac-learning-and-sample-complexity">Definition (PAC learning and sample complexity)</h4> <p>Let \(\{x_i,y_i\}_{i=1}^M\) be i.i.d. samples from some distribution $ \mathcal{D} $, and suppose $ y_i = g(x_i) $ for some underlying function $ g $. Let \(f = \mathcal{A}(\{x_i, y_i\}_{i=1}^M)\) be the function generated by a learning algorithm $ \mathcal{A} $. Then $ g $ is $ (M, \epsilon, \delta) $-learnable with $ \mathcal{A} $ if</p> \[\mathbb{P}_{x \sim \mathcal{D}} [\| f(x) - g(x) \| \leq \epsilon] \geq 1 - \delta\] <p>where $ \epsilon &gt; 0 $ is the error parameter and $ \delta \in (0, 1) $ the failure probability.</p> <p>We then define the <em>sample complexity</em> as \(\mathcal{C_A}(g, \epsilon, \delta) = \min M\) for every $ M $ such that $ g $ is $ (M, \epsilon, \delta) $-learnable with $ \mathcal{A} $.</p> <p>This is a crucial concept in computational learning theory that helps us understand the feasibility of learning a given function from a set of examples to a certain degree of approximation, with a certain level of confidence.</p> <p>Next, we outline a definition that connects the concepts of function generation with the architecture of neural networks.</p> <h4 id="definition-generation">Definition (Generation)</h4> <p>Let $ f_1, \ldots, f_n $ be module functions, $ g $ a reasoning function and $ \mathcal{N} $ a neural network. We say that $ f_1, \ldots, f_n $ generate $ g $ for $ \mathcal{N} $, and we write \(f_1, \ldots, f_n \underset{\mathcal{N}}{\equiv} g\) if, by replacing $ \mathcal{N}_i $ with $ f_i $, the network $ \mathcal{N} $ simulates $ g $.</p> <p>Using these ideas, we then introduce a key point for our project: algorithmic alignment, which we intend to validate for Transformers applied to the Shortest Path Problem.</p> <h4 id="definition-algorithmic-alignment">Definition (Algorithmic alignment)</h4> <p>Consider a neural network $ \mathcal{N} $ with $ n $ modules \(\mathcal{N}_i\) that tries to approximate a reasoning function $ g $. Suppose that there exists $ f_1, \ldots, f_n $ some module functions such that \(f_1, \ldots, f_n \underset{\mathcal{N}}{\equiv} g\). Then $ \mathcal{N} $ is $ (M, \epsilon, \delta) $-algorithmically aligned with $ g $ there are learning algorithms \(\mathcal{A}_i\) for the \(\mathcal{N}_i\)’s such that \(n \cdot \max_i \mathcal{C}_{\mathcal{A}_i} (f_i, \epsilon, \delta) \leq M\).</p> <p>A small number of sample $ M $ would then imply good algorithmic alignment, i.e. that the algorithmic steps $f_i$ to simulate g are <em>easy to learn</em>.</p> <p>Finally, we state the following theorem, proven by Xu et al. <d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite> It provides evidence that generalization benefits from algorithmic alignment.</p> <h4 id="theorem-1-algorithmic-alignment-improves-sample-complexity">Theorem 1 (Algorithmic alignment improves sample complexity)</h4> <p>Fix $\varepsilon$ and $\delta$. Suppose ${x_i, y_i} \sim D$, where $|x_i| &lt; N$, and $y_i = g(S_i)$ for some $g$. Suppose $\mathcal{N}_1, \dots \mathcal{N}_n$ are $\mathcal{N}$’s MLP modules in sequential order. Suppose $\mathcal{N}$ and $g$ algorithmically align via functions $f_1, …, f_n$, as well as the following assumptions.</p> <p><em><strong>i. Algorithm stability</strong></em>. Let $\mathcal{A}$ be the learning algorithm for the \(\mathcal{N}_i\)’s. Suppose \(f = \mathcal{A}(\{x_i, y_i\}^M_{i=1})\), \(\hat{f} = \mathcal{A}(\{\hat{x}_i, y_i\}^M_{i=1})\). For any x, \(\|f(x) - f(\hat{x})\| &lt; L_0 \cdot \max_i\|x_i - \hat{x}_i\|\), for some \(L_0\).</p> <p><em><strong>ii. Sequential learning</strong></em>. We train the \(\mathcal{N}_i\)’s sequentially. The inputs for $\mathcal{N}_j$ are the outputs from the previous modules \(\mathcal{N}_1, \dots, \mathcal{N}_{j-1}\), while labels are generated by the correct functions \(f_{1}, ..., f_{j-1}\).</p> <p><em><strong>iii. Lipschitzness</strong></em>. The learned functions $f_j$ satisfy \(\|f_j(x) - f_j(z)\| \leq L_1\|x - z\|\), for some $L_1$.</p> <p>Then g is learnable by N.</p> <h3 id="application-to-transformers">Application to Transformers</h3> <p>We now apply this theoretical framework to Transformers. The justifications of the results in this part will be a combination of sketch of mathematical proofs and empirical evidence. We first state a first result:</p> <h4 id="lemma-1-transformers-algorithmically-align-with-the-shortest-path-problem">Lemma 1 (Transformers algorithmically align with the Shortest Path Problem)</h4> <p>Let $ \mathcal{T} $ be a Transformer, let $ g $ be the reasoning function of the Shortest Path Problem applied to a graph with $n$ nodes. Then $ \mathcal{T} $ is algorithmically aligned with $ g $.</p> <p>We can directly prove this lemma. Let $ f_1, \ldots, f_n $ be the Bellman-Ford update processes of the Shortest Path Problem: \(d_u^{k+1} = \min_{v \in \mathcal{N}(u)} d_v^{k} + c(u, v)\) where $\mathcal{N}(u)$ is the set of neighbors of node $u$. From Bellman-Ford algorithm, we have: \(f_1, \ldots, f_n \underset{\mathcal{T}}{\equiv} g\), with $g$ being the shortest path function.</p> <p>Then, from our discussion on Transformers attention layers and proof in Appendix A, each attention-MLP sequence $\mathcal{N}_i$ has a learning algorithm $\mathcal{A}_i$ such that $f_i$ is learnable with $\mathcal{A}_i$. Each sample complexity is then bounded by M, which concludes the proof.</p> <p>We can now state the following theorem:</p> <h4 id="theorem-2-transformers-can-learn-the-shortest-path-problem">Theorem 2 (Transformers can learn the Shortest Path Problem)</h4> <p>Let $ \mathcal{T} $ be a Transformer, let $ g $ be the shortest path function. Then, $g$ is learnable by $\mathcal{T}$.</p> <p>We provide here a sketch of a proof of this theorem. From Lemma 1, $\mathcal{T}$ and $g$ algorithmically align via $f_1, \ldots, f_n$. We must now check the 3 assumptions of Theorem 1.</p> <p>Sequential Learning <strong>(ii)</strong> is clearly true, since transformers architectures incorporate sequence of MLPs (associated with attention layers). Li et al <d-cite key="li2023transformers"></d-cite> have provided an empirical proof of the algorithm stability <strong>(i)</strong> of transformers. Finally, considering a self-attention token network combined with a ReLU-MLP for each layer of $\mathcal{T}$, every function in the related learning algorithm $\mathcal{A}_i$ (softmax, ReLU, Linear) is Lipschitz-continuous, hence their combination is Lipschitz-continuous too, which validates Assumption <strong>(iii)</strong>.</p> <p>We can then apply Theorem 1 and conclude the proof.</p> <p>Having laid the theoretical foundation for our problem, we now turn our attention to the practical application, where we employ our Graph Transformer to the concrete task of learning and solving the Shortest Path Problem.</p> <h2 id="methodology-for-training-and-evaluation">Methodology for Training and Evaluation</h2> <h3 id="constructing-the-dataset">Constructing the Dataset</h3> <p>For training and evaluating our different models, we generate a comprehensive dataset comprising 50,000 samples, each representing a graph. These graphs were randomly created following the Erdős–Rényi model, specifically the $\mathcal{G}(n, p)$ variant, where <code class="language-plaintext highlighter-rouge">n</code> represents the number of nodes and <code class="language-plaintext highlighter-rouge">p</code> is the probability of edge formation between any two nodes. In our dataset, each graph consists of 10 nodes (<code class="language-plaintext highlighter-rouge">n = 10</code>), and the edge probability (<code class="language-plaintext highlighter-rouge">p</code>) is set at 0.5. This setting ensures a balanced mix of sparsely and densely connected graphs, providing a robust testing ground for the Graph Transformer’s ability to discern and compute shortest paths under varied connectivity scenarios .</p> <p>Furthermore, we assign to the edges in these graphs some weights that are integral values ranging from 1 to 10. This range of weights introduces a second layer of complexity to the shortest path calculations, as the Graph Transformer must now navigate not only the structure of the graph but also weigh the cost-benefit of traversing various paths based on these weights. The inclusion of weighted edges makes the dataset more representative of real-world graph problems, where edges often have varying degrees of traversal difficulty or cost associated with them.</p> <p>This dataset is designed to challenge and evaluate the Graph Transformer’s capability in accurately determining the shortest path in diverse graph structures under different weight conditions. The small number of nodes ensures a wide variability in the degree of connectivity in a sample graph. It also allows for an initial performance evaluation on smaller-scale problems, with the potential to extend these studies to larger-scale graphs in the future. Hence, the dataset’s structure supports a comprehensive assessment of the model’s performance and its adaptability to a wide range of graph-related scenarios.</p> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/shortest_path_counts.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/shortest_path_counts.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/shortest_path_counts.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/shortest_path_counts.svg" class="img-fluid rounded z-depth-1" width="50%" height="50%" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption mt-3"> Shortest path distribution for our entire dataset (50,000 samples) </div> <h3 id="training-protocols">Training Protocols</h3> <p>In the fixed dataset approach we’ve employed, the dataset is pre-constructed with 50,000 graph samples and remains unchanged throughout the training process. This method, involving a consistent 60/20/20 split for training, validation, and testing, ensures that every model is assessed under the same conditions at each epoch. This consistency is crucial for our primary goal: to compare the performance of different models or architectures in a controlled and repeatable manner. To an on-the-fly approach, where data is dynamically generated during each training epoch, introduces more variability. This variability can be beneficial in a second step for thoroughly testing the robustness and adaptability of a single model, as it faces new and diverse scenarios in each epoch. However, for our first objective of directly comparing different models, the fixed dataset approach provides a more stable and reliable framework to begin with.</p> <p>We use the Adam Optimizer because it’s good at handling different kinds of data and works efficiently. The learning rate is set at a standard value of 0.001, which serves as a common and reliable starting point, ensuring a consistent basis for comparing the learning performance across all models.</p> <p>Our main tool for measuring success is the L1 loss function. This function is suited for our shortest path problem because it treats all mistakes the same, whether they’re big or small. It’s different from the L2 loss, which is harsher on bigger mistakes. This way, our model pays equal attention to finding shorter and longer paths correctly.</p> <h3 id="metrics-and-evaluation-criteria">Metrics and Evaluation Criteria</h3> <p>We use two main metrics to check how good our models perform: L1 Loss and Accuracy. L1 Loss adds up all the differences between the predicted and actual path costs across all nodes. It’s a direct way to see how well the model is doing.</p> \[L1 \, Loss = \frac{1}{N} \sum_{i=1}^{N} |y_i - \hat{y}_i|\] <p>where $ N $ is the total number of nodes, $ y_i $ is the actual path cost for the $i$-th node, and $ \hat{y}_i $ is the predicted path cost for the $i$-th node.</p> <p>Accuracy is the second measure. It shows what percentage of nodes the model got exactly right in predicting the shortest path. It’s a simple way to understand how precise our model is.</p> \[Accuracy = \frac{\text{Number of Correct Predictions}}{\text{Total Number of Predictions}} \times 100\%\] <p>Here, a prediction is counted as “correct” if its rounded value is the true shortest path. I.e., if the model predicts 10.3 for a node, but the true sortest path is 11, this is marked as incorrect. If it predicts 10.7, it will be counted as correct.</p> <p>Together, these two measures help us see how well our Graph Transformer is doing compared to other models like MLPs and GNNs, especially in solving shortest path problems in graphs.</p> <h2 id="results-and-comparative-analysis">Results and Comparative Analysis</h2> <p>In our analysis, we compared the performances of MLPs, Transformers, and GNNs using our generated dataset. Initially, we evaluated the performance of each architecture across different sizes by recording in-sample and out-of-sample losses at each epoch, along with out-of-sample accuracy. We compared three model sizes: “small,” “mid,” and “large,” which correspond to the depth of the model. For GNNs, this signifies the number of iterations; for Transformers and MLPs, it refers to the number of layers. Small models have 2 iterations/layers, mid models 5, and large models 10.</p> <p>To maintain fair comparisons, the MLP and the Transformer were designed to have an equal total number of trainable parameters at each size. We excluded GNNs from this comparison, as they outperformed both models with significantly fewer parameters.</p> <h3 id="gnn-performance">GNN performance</h3> <p>Our GNNs demonstrated exceptional performance on the shortest path task. Tailoring the model’s architecture to this problem (using maximum aggregation and initializing node features appropriately) likely contributed to this success. However, several interesting observations emerged from our results. We compared GNNs of three different sizes: small (2 iterations, 13k parameters), medium (5 iterations, 32k parameters), and large (10 iterations, 64k parameters).</p> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_train_loss.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_train_loss.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_train_loss.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_train_loss.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_loss.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_loss.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_loss.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_loss.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_acc.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_acc.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_acc.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/gnn_val_acc.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption mt-3"> GNN Training loss, validation loss &amp; validation accuracy for different sizes </div> <p>We observed that both medium and large GNNs achieved over 99% out-of-sample accuracy after just a few epochs. The large model’s performance aligns with expectations, as it conducts 10 iterations in total—matching the maximum number of iterations required by standard shortest-path-finding algorithms like Bellman-Ford for n-node graphs.</p> <p>Surprisingly, the medium-sized model, with only 5 iterations, also achieved similar accuracy. This initially seems counterintuitive since 5 iterations suggest that information can only propagate to nodes within 5 neighbors. However, as noted in <d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite>, our GNN effectively learns an optimized version of the Bellman-Ford algorithm, requiring only half the number of iterations compared to the standard algorithm.</p> <p>This explains why the medium GNN can converge efficiently, but the small model with just 2 iterations cannot. Even with an optimized Bellman-Ford algorithm, a 2-iteration GNN would only correctly solve paths shorter than or equal to 5 nodes, limiting its overall learning capacity.</p> <h3 id="mlp-performance">MLP performance</h3> <p>Although GNNs quickly converged to near-perfect predictions, their inherent suitability for the shortest path task was expected. To gauge the Transformers’ performance more accurately, we compared them with MLPs, which are not specifically designed for this task. As indicated in <d-cite key="DBLP:journals/corr/abs-1905-13211"></d-cite>, MLPs struggle with iterative algorithms like Bellman-Ford due to difficulties in learning for-loop structures. We analyzed MLP performance across three sizes: small (2 layers, 44k parameters), medium (4 layers, 76k parameters), and large (8 layers, 142k parameters). It’s important to note that each GNN size had roughly half the parameters of the corresponding MLP size.</p> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_train_loss.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_train_loss.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_train_loss.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_train_loss.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_loss.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_loss.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_loss.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_loss.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_acc.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_acc.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_acc.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/mlp_val_acc.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption mt-3"> MLP Training loss, validation loss &amp; validation accuracy for different sizes </div> <p>The smaller MLP models converged faster, yet both small and medium models barely exceeded 50% accuracy, even after extensive training (16 epochs for GNNs and 64 for MLPs). This supports the hypothesis that MLPs face challenges in learning iterative algorithms.</p> <p>Increasing model size or training duration did not significantly improve performance; the largest model struggled particularly with fitting the problem. While more hyperparameter tuning might enhance the “large” model’s performance, the “medium” model’s struggles suggest that MLPs have inherent difficulties with this task, regardless of parameter count.</p> <h3 id="transformer-performance">Transformer performance</h3> <p>Turning our attention to Transformers, we initially doubted their ability to match GNN performance levels. However, the question remained: could they outperform MLPs, and if yes by how much? We began by testing a basic Transformer version (no attention mask, positional encoding, or skip connection). To ensure fair comparisons, all model sizes maintained approximately the same number of parameters as the MLPs, with equivalent layers/iterations (small: 2 layers, 44k parameters; medium: 5 layers, 86k parameters; large: 10 layers, 172k parameters).</p> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption mt-3"> Transformer training loss, validation loss &amp; validation accuracy for different sizes </div> <p>A notable improvement in accuracy was observed, with the best-performing Transformer model reaching 70% accuracy. The training was stopped at 64 epochs to maintain consistency across all models. As it does not show signs of overfitting, extending training beyond 64 epochs might further enhance the Transformer’s performance. Interestingly, increasing the model size to over 150k parameters did not significantly boost performance under our hyperparameter settings. The small and medium architectures exhibited similar performance, with the medium model slightly outperforming after a few epochs.</p> <p>Regarding sizes, similarly to the MLP, increasing the depth and parameter count of the transformer over 150k parameters doesn’t seem to help with the model’s performance, at least with our set of hyperparameters (as this big of a transformer is long to train, we haven’t been able to do much hyperparameter tuning). The small and medium architectures seem almost tied, but the medium one seems to perform better after a few epochs.</p> <p>Our hypothesis in Part 1 suggested that Transformers, capable of performing $O(n^2)$ operations per attention head, should learn loop structures more effectively. However, their learning is constrained by the specific operations allowed in the attention mechanism. To test this, we proposed three enhancements to our Transformer: an attention mask, positional encoding, and a skip connection, as outlined in Part 1 and Appendix A. We hypothesized that these additions would enable the Transformer to better learn the Bellman-Ford iteration step.</p> <h3 id="transformer-with-attention-mask-positional-encoding--skip-connection">Transformer with Attention Mask, Positional Encoding &amp; Skip Connection</h3> <p>As discussed in Part 1, we adapted our Transformer model to include these three components, expecting an improvement in performance. The attention mask, a fundamental feature of Transformers, enables the model to focus on specific token relationships. In our setup, each token (node) attends only to its neighbors, as dictated by the adjacency matrix. We incorporated the attention mask into the medium-sized Transformer for comparison.</p> <p>Next, we added positional encoding. Based on our Part 1 discussion, positional encodings can inform the feedforward network (FFN) about the neighboring tokens selected by the attention layer. We used basic one-hot encodings, effectively adding an $n×n$ identity matrix or concatenating an $n×1$ one-hot vector to each token. Although more sophisticated encodings might be beneficial, we demonstrated the feasibility of using one-hot encodings for the Bellman-Ford update.</p> <p>Finally, we implemented a custom skip connection. Instead of a standard sum skip connection, our model concatenates the input and output of the attention head before feeding it into the FFN. This approach potentially allows the attention head to select a neighbor, with the FFN combining its token with the receiving node’s token.</p> <p>We added each augmentation stepwise, building upon the previous modifications (e.g., transformer_pos_enc includes positional encoding, attention mask, and is medium-sized). Here are the results:</p> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss_all.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss_all.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss_all.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_train_loss_all.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss_all.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss_all.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss_all.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_loss_all.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc_all.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc_all.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc_all.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-08-graphs-transformers/comparative_plots/transformer_val_acc_all.svg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption mt-3"> Transformer training loss, validation loss &amp; validation accuracy for different sizes </div> <p>Each augmentation step led to clear improvements. Over 64 epochs, our base model’s out-of-sample accuracy improved from 70% to over 90%. The positional encoding contributed the most significant enhancement, which was somewhat surprising given its simplicity. Overall, these results support our hypothesis regarding the Transformer’s capacity to learn the Bellman-Ford iteration step.</p> <h2 id="conclusion">Conclusion</h2> <p>In this project, we compared MLPs, Transformers, and GNNs in solving graph-related problems, with a focus on the shortest path in Erdos-Renyi graphs. Our findings indicate GNNs excel in such tasks due to their specialized architecture. However, the adaptability of Transformers, particularly with architectural modifications like attention masks, positional encodings, and skip connections, is a significant discovery. While these models showed promise, larger MLP and Transformer models faced convergence issues, highlighting the need for better hyperparameter optimization in future work.</p> <p>Transformers’ theoretical success in approximating the Bellman-Ford algorithm, verified by empirical results, suggests potential in a subset of dynamic programming (DP) problems where DP updates are simple and manageable by attention heads. However, their capability is inherently limited compared to the theoretically more versatile GNNs, due to the softmax and linear combination constraints in attention mechanisms. Future work could delve into designing Transformer models with enhanced attention mechanisms, potentially broadening their applicability in complex DP problems. Investigating the synergy between Transformers and GNNs could also lead to innovative hybrid models.</p> <p>Overall, our exploration sheds light on the potential of Transformers in graph-related tasks, suggesting they could offer valuable insights and solutions, alongside the more established GNNs. This finding could open up interesting possibilities for research and innovation in neural network applications, particularly in solving complex graph-related challenges.</p> <h2 id="appendix">Appendix</h2> <h3 id="appendix-a">Appendix A.</h3> <p>We present here a mathematical proof of how the Graph Transformer Architecture can learn the Bellman-Ford update in the Shortest Path Problem.<br/> We consider a slightly different tokenization: for every node $i$, at layer $k$, we encode its information in a tensor of the form:</p> \[t_i^k = [\mathbb{1}_i, w_i, d_i^k]\] <p>where $\mathbb{1}_i \in \mathbb{R}^n$ is the positional encoding, $w_i \in \mathbb{R}^n$ the edges weights and $d_i^k$ the current shortest distance computed at layer $k$.</p> <p>Recall the formula of query-key-value attention:</p> \[t_i = \frac{\sum_{j} \exp^{-q_i' k_j / \sqrt{2n+1}}v_j}{\sum_{j} \exp^{-q_i' k_j / \sqrt{2n+1}}}\] <p>Set up the weights matrices as:</p> \[\begin{cases} W_Q = \begin{pmatrix} I_{n+1} &amp; O_{n \times n+1} \\ 1_n &amp; 0_{n+1} \end{pmatrix}\\ W_K = \begin{pmatrix} O_{n+1 \times n} &amp; I_{n+1} \end{pmatrix}\\ W_V = I_{2n+1} \end{cases}\] <p>so that \(q_i' k_j = w_{j,i} + d_j\) <em>i.e.</em> attention is determined by the update values of the Bellman-Ford equation.</p> <p>Hence taking the softmax - and if necessary augmenting the weights of the matrices by a common factor -, we have the ouput \(t_{j^\star}\) for the appropriate node \(j^\star = \text{argmin}_j \{w_{j,i} + d_j\}\).</p> <p>Notice that in this configuration \(t_{j^\star}\) is not enough to retrieve the desired edge weight \(w_{i, j^\star}\) : we need the positional encoding from node $i$.</p> <p>The skip-connection achieves this, by concatenating original input $t_i$ with attention output \(t_{j^\star}\). We can then retrieve the desired value \(w_{j^\star,i} + d_{j^\star}\) with the MLP of layer $k$, which concludes the proof</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-graphs-transformers.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/guided-transfer-learning/index.html b/blog/2023/guided-transfer-learning/index.html
new file mode 100644
index 00000000..c9780f24
--- /dev/null
+++ b/blog/2023/guided-transfer-learning/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Guided Transfer Learning and Learning How to Learn: When Is It Useful? | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="For downstream tasks that involve extreme few-shot learning, it's often not enough to predispose a model with only general knowledge using traditional pre-training. In this blog, we explore the nuances and potential applications of Guided Transfer Learning, a meta-learning approach that allows a model to learn inductive biases on top of general knowledge during pre-training."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/guided-transfer-learning/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Guided Transfer Learning and Learning How to Learn: When Is It Useful?",
+      "description": "For downstream tasks that involve extreme few-shot learning, it's often not enough to predispose a model with only general knowledge using traditional pre-training. In this blog, we explore the nuances and potential applications of Guided Transfer Learning, a meta-learning approach that allows a model to learn inductive biases on top of general knowledge during pre-training.",
+      "published": "December 10, 2023",
+      "authors": [
+        {
+          "author": "Kevin Li",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Guided Transfer Learning and Learning How to Learn: When Is It Useful?</h1> <p>For downstream tasks that involve extreme few-shot learning, it's often not enough to predispose a model with only general knowledge using traditional pre-training. In this blog, we explore the nuances and potential applications of Guided Transfer Learning, a meta-learning approach that allows a model to learn inductive biases on top of general knowledge during pre-training.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction-never-enough-data">Introduction: Never Enough Data</a></div> <ul> <li><a href="#machine-vs-human-intelligence">Machine vs Human Intelligence</a></li> <li><a href="#traditional-transfer-learning-learning-general-knowledge">Traditional Transfer Learning: Learning General Knowledge</a></li> </ul><div><a href="#guided-transfer-learning-and-meta-learning-learning-inductive-biases">Guided Transfer Learning and Meta-learning: Learning Inductive Biases</a></div> <ul> <li><a href="#scouting">Scouting</a></li> <li><a href="#guide-values">Guide Values</a></li> <li><a href="#calculating-guide-values">Calculating Guide Values</a></li> </ul><div><a href="#answering-unanswered-questions-exploring-the-nuances">Answering Unanswered Questions: Exploring the Nuances</a></div> <ul> <li><a href="#experiment-1-can-gtl-compensate-for-lack-of-pre-training-data-not-just-lack-of-fine-tuning-data">Experiment 1: Can GTL compensate for lack of pre-training data (not just lack of fine-tuning data)?</a></li> <li><a href="#experiment-2-how-does-the-design-of-the-scouting-task-affect-downstream-performance">Experiment 2: How does the design of the scouting task affect downstream performance?</a></li> <li><a href="#experiment-3-what-about-unsupervised-self-supervised-settings">Experiment 3: What about unsupervised/self-supervised settings?</a></li> </ul><div><a href="#closing-thoughts">Closing Thoughts</a></div> <ul> <li><a href="#experiment-limitations-and-next-steps">Experiment Limitations and Next Steps</a></li> <li><a href="#how-exciting-is-gtl">How Exciting is GTL?</a></li> </ul> </nav> </d-contents> <h1 id="introductionmotivation-never-enough-data">Introduction/Motivation: Never Enough Data</h1> <p>If we take a step back and reflect upon the current state of AI, especially in domains like computer vision and NLP, it appears that the gap between machine and human intelligence is rapidly narrowing. In fact, if we only consider aspects such as the predictive accuracy of discriminatory models and the sensibility of outputs by generative models, it may seem that this gap is almost trivial or even nonexistent for many tasks. However, every time we submit a training script and leave for the next few hours (or few weeks), it becomes abundantly clear that AI is still nowhere near human intelligence because of one critical kryptonite: the amount of data needed to effectively train AI models, especially deep neural networks.</p> <p>While we have tons of training data in domains such as general computer vision (e.g. ImageNet) and NLP (e.g. the entirety of the internet), other domains may not have this luxury. For example, bulk RNA-sequencing data in biomedical research is notoriously cursed with high dimensionality and extremely low sample size. Training AI models on bulk RNA-sequencing datasets often leads to severe overfitting. In order to successfully utilize AI in domains like biomedicine, the highest priority challenge that must be addressed is that of overcoming the necessity of exuberant amounts of training data.</p> <h2 id="machine-vs-human-intelligence">Machine vs Human Intelligence</h2> <p>It often feels like the requirement of having abundant training samples has been accepted as an inevitable, undeniable truth in the AI community. But one visit to a preschool classroom is all that it takes to make you question why AI models need so much data. A human baby can learn the difference between a cat and a dog after being shown one or two examples of each, and will generally be able to identify those animals in various orientations, colors, contexts, etc. for the rest of its life. Imagine how much more preschool teachers would have to be paid if you needed to show toddlers thousands of examples (in various orientations and augmentations) just for them to learn what a giraffe is.</p> <p>Fortunately, humans are very proficient at few-shot learning– being able to learn from few samples. Why isn’t AI at this level yet? Well, as intelligence researchers have discussed <d-cite key="Nikoli2017"></d-cite>, biological brains are not born as empty slates of neurons with random initial connections. Millions of years of evolution have resulted in us being born with brains that are already predisposed to learn certain domains of tasks very quickly, such as image recognition and language acquisition tasks. In these domains, learning a specific task like differntiating between a cat and a dog or between letters of the English alphabet doesn’t require exposure to many samples. Additionally, as we gain more experiences throughout life, we acquire general knowledge that can help us learn new tasks more efficiently if they’re similar to something we’ve learned before. Thus, naturally, the first step toward bridging the gap between natural and machine intelligence is somehow finding a way to predispose an AI to be able to learn any <em>specific</em> task within a certain domain with very few samples. The advent of traditional transfer learning has attempted to approach this predisposition task from the “general knowledge” perspective.</p> <h2 id="traditional-transfer-learning-learning-general-knowledge">Traditional Transfer Learning: Learning General Knowledge</h2> <p>Transfer learning has been invaluable to almost all endeavors in modern deep learning. One of the most common solutions for tasks that have too little training data is to first pre-train the model on a large general dataset in the same domain, and then finetune the pre-trained model to the more specific downstream task. For example, if we need to train a neural network to determine whether or not a patient has a rare type of cancer based on an X-ray image, we likely will not have enough data to effectively train such a model from scratch without severe overfitting. We can, however, start with a model pre-trained on a large image dataset that’s not specific to cancer (e.g. ImageNet), and if we start training from those <em>pre-trained</em> weights, the downstream cancer diagnostic task becomes much easier for the neural network to learn despite the small dataset size.</p> <p>One way to intuitvely understand why this is the case is through the lens of “general knowledge”. <d-cite key="Nikoli2017"></d-cite><d-cite key="gtl"></d-cite> When the model is pre-trained on ImageNet data, it learns a lot of knowledge about image data <em>in general</em>; for example, the earlier layers of the model will learn low-level features detectors (e.g. edge detectors, simple shape detectors, etc.) that will likely be useful for <em>any</em> computer vision task. This can be viewed as the model learning “general knowledge” about the domain of image data broadly. When we then fine-tune this model on a cancer dataset, the model doesn’t have to relearn the ability to detect these general, low-level features. This general knowledge encoded in the pre-trained weights regularizes the model and mitigates overfitting, as it <em>predisposes</em> the model to learn relationships/feature detectors that are generalizable and sensible within the context of image data.</p> <p>However, if transfer learning could solve all our problems, this blog post wouldn’t exist. When our downstream dataset is in the extremeties of the high dimensional, low sample size characterization (e.g. in fields like space biology research, more on this later), learning general knowledge in the form of pre-trained weights isn’t enough. <d-cite key="hldsshard"></d-cite> <d-cite key="hldsshard2"></d-cite> How, then, can we predispose models such that they can do extreme few-shot learning, or even <em>one-shot</em> learning? Enter guided transfer learning.</p> <hr/> <h1 id="guided-transfer-learning-and-meta-learning-learning-inductive-biases">Guided Transfer Learning and Meta-learning: Learning <em>Inductive Biases</em></h1> <p>Guided transfer learning (GTL) <d-cite key="gtl"></d-cite> is a meta-learning paradigm proposed by the group <a href="https://robotsgomental.com">Robots Go Mental</a>. The main idea for guided transfer learning is that, instead of just having the AI model learn general knowledge, we also want the AI to learn <em>how</em> to learn. Specifically, we want it to learn how to pick up new knowledge <em>most efficiently</em> for a particular domain/modality of data. This means during pre-training, the model, in addition to learning good initial weights, will also learn <strong><em>inductive biases</em></strong> that affect the future, downstream training <em>process</em>.</p> <p>GTL is a very novel method; its preprint was just released in the past few months! Hence, beyond the experiements in the original preprint, there has not been much exploration of some of its behavioral nuances and various application scenarios. So in this blog, I will be doing a few experiments that attempt to gain more insight into some of my questions that were left unanswered by the original GTL paper.</p> <p>But before we get to that, let’s first get a rundown on how GTL works! The two most important concepts in GTL are <strong>scouting</strong> and <strong>guide values</strong>.</p> <h2 id="scouting">Scouting</h2> <p>Inductive biases, which affect what kind of functions a model can learn, are usually <em>built into</em> the choice of deep learning architecture, or decided by other hyperparameters we humans choose. With guided transfer learning, they can now be <em>learned</em> automatically during pre-training. It’s almost like the model is figuring out some of its own optimal hyperparameters for learning in a particular domain.</p> <p>Sounds like magic, right? How does GTL allow a model to <em>learn</em> inductive biases? Well, the core behind the GTL approach is a process known as <strong>scouting</strong>, which is an alternative to traditional pre-training. The high-level idea is that it trains copies of the model, called scouts, on easier subproblems. These subproblems should be similar to the target downstream tasks, but easier so that the scouts are more likely to succesfully converge to a generalizable model. (If the scouts themselves overfit, then how can the inductive biases they learn help our downstream few-shot training not overfit?)</p> <p>In the process of converging, the scouts keep track of which parameters in the model are important to keep flexible for efficient convergence and which ones aren’t. They’re basically logging their learning process.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/scouting-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/scouting-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/scouting-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/scouting.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>For example, if weight <code class="language-plaintext highlighter-rouge">A</code> increases drastically during training, it’s probably an important weight to change and we should keep it flexible. On the other hand, if weight <code class="language-plaintext highlighter-rouge">B</code> doesn’t change much at all or fluctuates in a very noisy manner (i.e. doesn’t change <em>meaningfully</em>), it is probably not as important to change.</p> <p>After the scouts are finished training, the collective feedback from all the scouts is used to decide what inductive biases to impose on the <em>main model</em>, such that the main model can learn most efficiently for the particular domain of data and avoid <em>wasting effort</em> and being <em>distracted/misguided</em> by changing parameters that don’t really help in that domain.</p> <h2 id="guide-values">Guide Values</h2> <p>So what do these “inductive biases” actually look like, and how do they affect future training? The inductive biases in the context of GTL come in the form of <strong>guide values</strong>. So after scouting, each parameter will not only have its usual weight value, but it will also have a guide value. During gradient decent, the normal update for a particular weight is then multiplied by its corresponding guide value. Thus, the larger the guide value, the more that parameter is allowed to change during downstream training.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_1-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_1.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_2-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/guide_values_2.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> In this very simple neural network with two weights, we can see here that weight `A` has a guide value of 0.56, while weight `B` has a guide value of merely 0.01. Thus, weight `A` is more flexible, in other words allowed to change more, than weight `B` during downstream training. In fact, weight `B` is pretty much frozen, as its guide value of 0.01 makes it so that weight B can barely change throughout training. </div> <p>Thus, the goal of scouting is to <strong>find these optimal guide values</strong>, which will ultimately make the training <em>process</em> more sparse (i.e. so that only the weights that are useful to change get changed). Note that this is different from making the <em>neural network model itself</em> more sparse (i.e. setting weights/connections that are useless to zero).</p> <h2 id="calculating-guide-values">Calculating Guide Values</h2> <p>So how do we actually get the guide values after training the scouts? Well, as mentioned above, we keep track of how parameters change during the scout training processes. Specifically, during the training of each scout, we log the initial value and final value (i.e. value after convergence) of each parameter in the model. Then, we calculate how much each parameter changes throughout the process of convergence via some distance metric between its initial and final value. The default used in the GTL paper was the squared distance: \((w_b - w_f)^2\), where \(w_b\) is the baseline (initial) value of the parameter \(w\), and \(w_f\) is its final value.</p> <p>Now, each scout will converge differently, since they are trained on slightly different subproblems (more on this later). To have a robust estimator of how much some parameter \(w\) changes during convergence, we take the mean squared change of the parameter across <em>all</em> the scouts. Let’s call this value \(m_w\).</p> <p>Assuming we have \(N\) scouts, this would be: \(m_w = \frac{1}{N}\sum_{i=1}^{N}(w_{b,i} - w_{f,i})^2\), where \(w_{b,i}\) and \(w_{f,i}\) are the initial and final values (respectively) of parameter \(w\) in scout \(i\).</p> <p>Add on a 0-1 normalization across the \(m_w\)s of <em>all</em> the parameters in the model, and we have our guide values (all of which are between 0 and 1)!</p> <p>Intuitively, we can see that parameters that changed a lot throughout the convergence process in the scout models are deemed “important to change during training” and are thus given higher guide values (i.e. closer to 1), allowing them to be more flexible for downstream fine-tuning.</p> <p>It’s really quite an elegant and simple approach, which is the beauty of it! It’s comparably lightweight in terms of both memory and computation compared to many other popular meta-learning/few-shot learning methods. <d-cite key="NIPS2016_90e13578"></d-cite> <d-cite key="woodward2017active"></d-cite> The devil is in the details on how to design the subproblems for scouts, which will be one of the main aspects we will be exploring in the experiments later in this blogpost.</p> <h2 id="experiment-and-results-from-the-gtl-paper">Experiment and Results from the GTL Paper</h2> <p>Before we get started with some of our own experiments to explore more nuances of GTL behavior and benefits, it might be nice to establish that– <strong>Yes, it does work!</strong> Or, it at least provides very impressive benefits.</p> <p>In the original GTL paper, Nikolić et al. tested how much benefit GTL would provide for few-shot learning tasks specifically in the domain of computer vision. Specifically, they tested one-shot learning capability on the Omniglot dataset. <d-cite key="omniglot"></d-cite> The Omniglot dataset is a popular few-shot learning dataset containing characters from 50 different alphabets, with only 20 handwritten examples of each (I will also be using this dataset for a couple of my experiments below). The conventional problem set up with Omniglot is to train/fine-tune your model on just <em>one</em> example of each character, and use the rest of the examples for validation/testing.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot2-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/omniglot2.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> To test one-shot learning, a model is trained/fine-tuned on one example of a character and tested on its ability to identify that character in validation samples. Figure from Nikolić et al. <d-cite key="gtl"></d-cite> </div> <p>To prepare a one-shot learner for this task, Nikolić et al. pre-trained a very basic CNN using the following GTL pipeline:</p> <ol> <li><strong>Pre-train the model <em>traditionally</em></strong> on MNIST (lots of data there!). The goal here is to have the model acquire general knowledge in the form of pre-trained weights. No inductive biases yet.</li> <li><strong>Scouting.</strong> The meat of GTL, where <em>inductive biases</em> are learned!</li> <li><strong>Downstream fine-tuning and evaluation</strong> on Omniglot using the one-shot scheme described above.</li> </ol> <p>The most interesting part is the second step: scouting! Remember, we have the following criteria for the scout problems:</p> <ul> <li>There needs to be <em>multiple</em> different scouting problems (so the we can have an ensemble of <em>different</em> scouts contributing to the guide value calculations, making the guide values more robust)</li> <li>The scout problems need to be <em>easy</em> enough so that the scouts can actually successfully learn generalizable models! Again, if the scouts themselves overfit, the guide values derived form them won’t be very helpful for downstream one-shot learning :)</li> <li>The scout problems need to be <em>similar</em> to the downstream task, i.e. in the same domain (in this case, computer vision) and of the same kind of problem (e.g. in this case, classification). If the scout problems are too different, why would the inductive biases be transferable?</li> </ul> <p>Given these criteria, Nikolić et al. used the following scheme for generating scouting tasks:</p> <ul> <li>Create <em>subdatasets</em> of MNIST (termed “cousin” problems in the paper), where each subdataset/cousin contains data for only <em>three</em> of the digits in MNIST (120 of these cousin datasets were created in the paper).</li> <li>Train a scout on each of the cousin problems (120 scouts total).</li> </ul> <p>This scheme satisfies all three criteria above. We now have multiple different scouting problems. These scouting problems are also comparatively way easier than the downstream task (there’s <em>way</em> more training data than Omniglot, and it’s only a 3-category classification problem). BUT, despite being easier, they’re still similar enough to the downstream task such that we can expect transferability (it’s still a handwritten character image classification task, after all).</p> <p>And this worked quite spectacularly! Here are the results from their paper:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults1-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults1.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults2-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/paperresults2.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Left plot shows validation accuracy curves for a mdoel that was traditionally pre-trained and a model that was pre-trained with the addition of GTL. Right plot shows the distribution of guide values in the model that was pre-trained with GTL. Figures from Nikolić et al. <d-cite key="gtl"></d-cite> </div> <p>The plot on the left shows the validation curves for the downstream one-shot Omniglot task for 1) a model that was pre-trained traditionally (line in blue) and 2) the model that was pre-trained traditionally <em>and</em> underwent GTL scouting (line in orange). Although the GTL model was still only to get around 25% validation accuracy, that’s quite impressive for only getting one example of each character, and is a signficant improvement over the model that only experienced traditional pre-training.</p> <p>Interestingly, the plot on the right plots the distribution of guide values. We see a heavy right skew, indicating that most of the guide values are very close to 0! This means downstream fine-tuning has been made <em>very</em> sparse (very few parameters were allowed to change drastically), providing very strong inductive biases that heaviliy influenced <em>how</em> the model was allowed to learn. These inductive biases, as the results suggest, seem to be correct for the task at hand. But that shouldn’t be surprising because they were, in a way, <em>learned</em>.</p> <p><strong>And that is the beauty of GTL. We no longer have to “guess” what inductive biases (often in the form of architectural choices) might be appropriate for a certain domain; instead, we have these biases be “learned”!</strong></p> <hr/> <h1 id="answering-unanswered-questions-exploring-the-nuances">Answering Unanswered Questions: Exploring the Nuances</h1> <p>Now that we see GTL does provide noticeable benefit for one-shot learning tasks based on the experiemental results from Nikolić et al., I would like to run some additional experiments of my own to explore some of the nuances of when GTL can be helpful, how we can optimize the benefit we get from using it, and how we should go about designing scout problems. These questions had not been explored in the original GTL paper, and since no other piece of literature has yet to even mention GTL, I thought I’d take the lead and try to gain some initial insight into some of these open topics :)</p> <h2 id="experiment-1-can-gtl-compensate-for-lack-of-pre-training-data-not-just-lack-of-fine-tuning-data">Experiment 1: Can GTL compensate for lack of <em>pre-training</em> data (not just lack of <em>fine-tuning</em> data)?</h2> <p>So we’ve established that GTL can aid in learning <em>downstream</em> tasks with few training samples, but it still requires a large amount of pre-training data (e.g. MNIST), much like traditional transfer learning. What I want to know now is: what if we don’t have <em>that</em> much pre-training data? In such <em>low pre-training-data contexts</em>, performance on downstream tasks usually suffers as a results when using traditional transfer learning. Can the addition of scouting/GTL <em>compensate</em> for this lack of pre-training data? That is, can a model pre-trained with a small pre-training dataset + GTL do as well as a model that’s just traditionally pre-trained on a large pre-training dataset?</p> <h3 id="setup">Setup</h3> <p>To do test this, I pre-train a small CNN with a very similar GTL pipeline as the one used by Nikolić et al., but using only a mere <em>1000</em> of the full 60,000 samples from the MNIST dataset during pre-training/scouting. A significantly smaller pre-training dataset! I’ll sometimes refer to this subset of MNIST as “small MNIST”. I then evaluate the performance of this model on an Omniglot one-shot task and compare it to 1) a model that is only traditionally pre-trained on small MNIST (no GTL) and 2) a model that is traditionally pre-trained on the full 60,000-sample MNIST (also no GTL).</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_diagram.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Experiment setup </div> <h3 id="downstream-task-specification">Downstream Task Specification</h3> <p>Note that the exact setup for the downstream Omniglot one-shot task used in the original GTL paper was not revealed. There are a few variations of one-shot learning setups, but the one I will be using is:</p> <ol> <li>Take a 100-cateogry subset of the full Omniglot dataset (that is, 100 unique characters)</li> <li>Train the model on <strong>one example</strong> of each unique character (i.e. 100 training samples total), and use the rest as a validation set (i.e. 1900 validation samples total)</li> <li>The task is thus a 100-way classification problem (given a handwritten image, predict which of the 100 characters it is)</li> </ol> <p>Since the specification above is likely not the exact Omniglot problem setup used by Nikolić et al., and the hyperparameters they used are also not specified in the original paper, some of the baseline results I’m using do not quite match to the corresponding results in the original paper.</p> <h3 id="results-and-analysis">Results and Analysis</h3> <p>With that said, here are the resulting <em>validation</em> accuracy and loss curves for the downstream Omniglot one-shot task described above:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_acc.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/exp1_val_loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Validation accuracy and loss curves for 1) model traditionally pre-trained with large MNIST, 2) model traditionally pre-trained with small MNIST, 3) model pre-trained with GTL and small MNIST. Note that an automatic early stopping mechanism was implemented to cut off when training when validation accuracy stopped increasing. </div> <p>As we can see, when GTL is not used, pre-training on a 1000 sample subset of MNIST results in notably worse performance on the one-shot downtream task compared to pre-training on the full 60,000 MNIST (16% vs 21% max validation accuracy). This is as expected.</p> <p><em>However</em>, if we use small MNIST <em>and</em> add scouting/GTL (using the same scout problem set up in the original GTL paper), we see that the resulting model ends up being able to reach <em>almost</em> the same max validation accuracy as the model traditionally pre-trained on the full MNIST dataset (20% vs 21%).</p> <p>What this suggests is that the <em>inductive biases</em> learned by GTL can compensate for any decrease in “general knowledge” (encoded in the form of pre-trained weights) that comes from having a smaller pre-training dataset. <strong>So not only is GTL helpful when you don’t have enough downstream data, it can also be helpful when you don’t have enough pre-training data!</strong></p> <p>Additionally, if we inspect the validation <em>losses</em>, we see that, depsite an apparent drop in validation accuracy, overfitting is still occuring in the shadows for all the models, as all the validation loss curves start rising after a certain point. However, the model that is pre-trained with GTL achieves the lowest validation loss of the three models before overfitting, and also starts overfitting the latest. So even though there’s no huge difference in the maximum validation accuracy achieved by the model that was pre-trained with GTL on small MNIST and the model that was traditionally pre-trained on full MNIST, the former is able to be <em>optimized further</em> before overfitting, suggesting that <strong>GTL with a small pre-training dataset provides a stronger “regularizing” effect than traditional transfer learning with a large pre-training dataset!</strong> This is certainly an interesting observation that could potentially have more obvious practical implications in certain scenarios, though we will not go into that further in this blog. The takeaway, however, is that GTL is, at the end of the day, really just a strong “regularizer”. If we look at how the orange and red curves look in both the accuracy and loss plots, we see the performance benefit that comes form adding GTL really just comes from the <em>delay of overfitting</em>. This regularization-based mechanism of performance improvement by GTL makes sense, as strong inductive biases hold the model back from learning “just anything” that fits the downstream training data.</p> <h2 id="experiment-2-how-does-the-design-of-the-scouting-task-affect-downstream-performance">Experiment 2: How does the design of the scouting task affect downstream performance?</h2> <p>Okay, it seems so far that the scouting pipeline used in the original GTL paper seems to be pretty helpful for various scenarios. But how did the authors arrive at that specific scouting task formulation? What if we used different scouting tasks than the ones they did? How does that affect GTL performance, and what might such differences (if any) imply? After all, when we leave the context of MNIST and Omniglot, we’ll have to be designing these scouting tasks on our own…</p> <h3 id="setup-1">Setup</h3> <p>For the sake of experimental control, however, I will stick with MNIST and Omniglot for now (don’t worry, I deviate from these datasets in the next experiment). Here, I begin by testing the effects of changing <em>how many categoriess</em> are included the cousin subdatasets that the scouts are trained on. The original paper used 3 categories per scout dataset (i.e. a 3-way classification task). What if used 2? Or 4? And if that makes a difference, why?</p> <p>In my eyes, this experiment explores how <em>similarity</em> between the scout tasks and the downstream task affects <em>transferability</em>. Specifically, because the downstream Omniglot task is a 100-way classification problem, one might expect that scout tasks that include more classification categories (and are thus more similar to the donwstream task) would result in better transferability.</p> <p>To test this, I use a 5000-sample subset of MNIST for pre-training/scouting (to save computation and time). For scouting, I create 120 cousin problems, as done in the paper. But instead of sticking to 3-category cousin problems, I also try 2-category, 4-category, and 5-category problems.</p> <h3 id="results-and-analysis-1">Results and Analysis</h3> <p>Here are the results:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_val_acc.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_loss-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/exp2_loss.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>As we can see, apparently the number of categories doesn’t make too big of a difference in maximum validation accuracy! They all provide seemingly equal accuracy improvement from a baseline model pre-trained traditionally on the same 5000-sample MNIST subset. This isn’t too surprising. Compared to the 1000-way downstream classification, the difference between 2-way and 5-way classification tasks would intuitively seeem pretty negligible.</p> <p>The validation loss plot tells a slightly different story, however. We see <em>most</em> of the models pre-trained with GTL have similar loss curves, consisting of a lower minimal loss and more resilience to overfitting compared to the baseline model. However, the model based on scouts trained on <em>5-category</em> cousin problems seems to achieve the <em>worst</em> (highest) minimum validation loss! This seems… a bit hard to explain. Perhaps this is just due to stochasticity; after all, we see that overfitting still occurs <em>later</em> relative to the baseline model, suggesting there still is some resilience to overfitting.</p> <p>But a perhaps more interesting explanation (that admittedly could be <em>completely</em> wrong) is that 5-category problems may have been too <em>difficult</em> of a scouting task given the smaller subset of MNIST used (since lots of categories + few training samples is a often recipe for overfitting). That is, perhaps many of the <em>scouts</em> themselves would have started overfitting while being trained on these subproblems, so the guide values derived from such scouts don’t end up providing robust enough inductive biases.</p> <p>Again, this is just a speculation, but if it were true, this could suggest an interesting tradeoff between the <strong>easiness</strong> of the scouting tasks and their <strong>similarity</strong> to the target downstream task. Make a scouting task too easy, and it’s too different from the target downstream task, and transferability suffers as a result. Make a task too similar to the target downstream task, and it might be too difficult, causing the scouts themselves to overfit and the resulting guide values to be less useful. An intersting balance to think about and explore further.</p> <p>The overarching takeaway from this experiment, however, seems to be that the exact number of categories for the scouting problems at this specific scale does not drastically affect downstream one-shot performance. Sure, I could have tried to keep increasing the number of categories, but keep in mind there’s also a bit of a tradeoff between number of categories and number of possible scouts past a certain point. For example, we would only be able to have one cousin problem with 10 categories (and it would be the whole MNIST dataset)!</p> <h2 id="experiment-3-what-about-unsupervisedself-supervised-settings">Experiment 3: What about unsupervised/self-supervised settings?</h2> <p><strong>Note:</strong> This particular experiment builds off of some previous work I have done outside of this class.</p> <p>For the final experiment, I would like to provide a bit of my research background for context. I’m primarily intereted in applying/developing AI methodologies for biomedical research. Specifically, I work a lot with “omics” data (e.g. transcriptomics data like RNA-seq, proteomic data, etc.), which is a domain notoriously cursed with datsets characterized by high dimensionality and low sample size. This means that we are almost always forced to utilize pre-training and transfer learning in order to make any deep learning model work for specific downtream tasks. Sounds like the perfect context to apply GTL to!</p> <p>However, there’s one very important caveat. Pre-training in the omics domain is usually <strong>self-supervised</strong>, since large pre-training datasets are often aggregates of hundreds of smaller datasets from separate studies that don’t share the same labeling/metadata catogories. So far, whether it’s the original GTL paper or our own experiments above, we have only explored GTL in the context of <em>supervised</em> pre-training, scouting, and fine-tuning. How can we adapt GTL when the pre-training (and perhaps the scouting) involve unlabeled data?</p> <p>To explore this, I will build off of one of my previous research projects, conducted while I was an intern at NASA Ame’s Space Biology Division. The project involved pre-training (traditionally) a large RNA-seq BERT-like model (called scBERT <d-cite key="scbert"></d-cite>) on a large <em>unlabeled</em> collection of RNA-seq data (recount3 <d-cite key="recount3"></d-cite>) in a self-supervised manner (via input masking, like the original BERT). I had evaluated this pre-trained model on a downstream classification task that involved predicting whether or not a mouse had been to space based on its RNA-seq profile. The downstream task involved fine-tuning on <a href="https://osdr.nasa.gov/bio/">NASA’s OSDR datasets</a>, which contain RNA-seq data from ground control mice and <em>actual mice that have been to space</em> during the NASA Rodent Research missions. The problem was, of course, that very few mice have been to space, so these datasets were tiny (i.e. 12 mice per dataset). It was quite an extreme exacerbation of the high dimensional, low sample size characterization I normally had to deal with. In that project, I showed that traditionally pre-training provided signficant benefits over training from scratch (as expected).</p> <h3 id="gtl-pipeline-for-scbert-scouting-problem">GTL pipeline for scBERT: Scouting Problem</h3> <p>Today, however, I would like to see if GTL can provide any additional benefits to that project. The most obvious challenge, as mentioned earlier, is creating scout problems out of an unlabeled pre-training dataset (recount3).</p> <p>Sure, we could use self-supervised masked input prediction for scouting, which is how scBERT is pre-trained traditionally. However, it’s not immediately clear, at least to me, how exactly we would create <em>multiple different</em> scout problems using this scheme (perhaps different masking patterns?). Additionally, we would ideally want the scout tasks to be more similar to the downstream task (which is a binary classification task, i.e. predicting whether or not a mouse sample is ground control or spaceflown) and share mostly the same architecture (i.e. more parameters with transferable guide values). Finally, as mentioned before, we would like to make the scouting tasks sufficiently easy so that the scouts can be successfully trained without overfitting. Given these criteria, I propose the following scouting problem:</p> <ol> <li>Reduce the dimensionality of recount3 dataset using UMAP, keeping only the top 30 UMAP dimensions (to make the next step computationally tractable)</li> <li>Cluster using K-means clustering. K=30 seems to provide visually logical clusters, so that’s the one we will go with.</li> <li>To create subdatasets (“cousin” problems), we choose random <em>pairs</em> of K-means clusters. Thus, each subdataset includes recount3 data from a random pair of clusters.</li> <li>For each subdatset created, train a scout to classify the <em>cluster identity</em> of the samples (a binary classification task). Thus, the scouting task is very similar to the downstream task (which is also binary classification). This <em>also</em> means we can use the same exact model architecture for both the scouting tasks and the downstream task (maximal transferability!).</li> </ol> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/clusteringrecount3.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Pipeline for creating scout problems. </div> <p>Now, this might seem like a trivial task for the classifier. After all, we are clustering the data based on geometric proximity, then train a model to find decision boundaries between the clusters, so it would seem that the model could find a perfectly clean decision boundary pretty easily. However, keep in mind that the clustering is done in UMAP space, with only the top 30 UMAP components, while the classification is done in the original feature space. UMAP is a nonlinear transformation, so clusters that are easily perfectly separable in top 30 UMAP space may not be in the original space. However, it is definitely still a pretty easy task, but we <em>want</em> the scouting tasks to be doable enough so that the scouts can easily converge to a generalizable relationship. So theoretically, it seems reasonable that this could work! (((Admittedly, it took a lot of playing around before deciding on the above scouting formulation; it just ended up being the one that worked the best. I can’t tell you exactly why, but my reasoning above is the best “intuitve” reasoning I could come up with.)))</p> <h3 id="gtl-pipeline-for-scbert-downstream-task">GTL pipeline for scBERT: Downstream Task</h3> <p>What about the downstream few-shot task? Here, I will use the same task that I had previously used to evaluate my traditionally pre-trained scBERT model:</p> <ol> <li>We train the model on a single NASA OSD dataset, OSD 105 <d-cite key="OSD-105"></d-cite>, containing bulk RNA-seq data from 6 spaceflown and 6 ground control mice, and have it predict whether a mouse was spaceflown or ground control. A simple binary classification task, like the scouting problem, but much harder given the incredibly low sample size.</li> <li>We then validate using another similar NASA OSD dataset, OSD 104 <d-cite key="OSD-104"></d-cite>, also containing 6 spaceflown and 6 ground control mice.</li> </ol> <p>It’s important to note that these two datasets, OSD 105 and 104, contain RNA-seq data from different muscle locations. OSD 105 contains tibilalis anterior data, while OSD 104 contains soleus data. However, since these datasets all contain data from some sort of mouse skeletal muscle tissue, we expect that cross-dataset generalizability would be reasonable for a strong generalizable model, and I actually intentionally chose datasets from different muscle tissues to test this difficult problem of cross-tissue generalizability.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/downstream-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/downstream-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/downstream-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/downstream.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Downstream few-shot binrary classification task on NASA OSDR datasets (predicting whether mouse is a ground control or spaceflown sample based on its RNA-seq profile). </div> <h3 id="gtl-pipeline-for-scbert-whole-pipeline">GTL pipeline for scBERT: Whole Pipeline</h3> <p>After deciding on the scouting problem formulation, the rest of the pipeline is pretty straightforward. Here’s the full pipeline:</p> <ol> <li><strong>Pre-train scBERT traditionally</strong> on recount3 (self-supervised masked input prediction). This involves the encoder portion of the architecture, which embeds the input, and a reconstructor portion, which uses that embedding to reconstruct the masked input values. The goal here, as always, is to learn <em>general knowledge</em> about the domain (RNA-seq) in the form of good <em>pre-trained weights</em>.</li> <li><strong>Scouting</strong> on recount3, using the scouting formulation described above. Here, we replace the reconstructor portion of the scBERT architecture with a classification layer. The goal here is, of course, to learn <em>inductive biases</em> in the form of <em>guide values</em>.</li> <li><strong>Downstream few-shot fine-tuning</strong> on NASA OSDR datasets, using the few-shot formulation described above. Here, we use the <em>same</em> architecture as the scouts. <em>All guide values transfer over!</em></li> </ol> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/pipeline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/pipeline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/pipeline-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/pipeline.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Full pipeline for applying GTL on scBERT. </div> <h3 id="results-and-analysis-2">Results and Analysis</h3> <p>And… here are the results for the downstream task! To compare, I’ve also included results for an scBERT copy that didn’t undergo any pre-training and an scBERT copy that was only traditionally pre-trained on recount3.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/results-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/results-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-02-guided-transfer-learning/results-1400.webp"/> <img src="/staging/assets/img/2023-11-02-guided-transfer-learning/results.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Downstream training and validation accuracy/loss curves for (from left to right): scBERT without any pre-training, scBERT traditionally pre-trained, scBERT pre-trained with scouting (GTL). A cosine annealing learning rate scheduler was used, ala the original scBERT paper, hence the jumps and fluctuations. </div> <p>With no pre-training, we can see that severe overfitting to the training set happens almost immediately, with validation loss going up while training loss goes down. This makes sense given the extremely small size of the training set, and the fact that the training and validation sets are from different muscles. With traditional pre-training, however, we see that overfitting also does eventually happen, but right before it happens, at around 200 epochs, we get this sweet spot where validation loss is at a low and validation accuracy is at a high of around 90% (highlighted by the purple box). So it seems that general knowledge about RNA-seq data obtained from traditional pre=training already provides a regularizing effect that reigns in the model from overfitting immediately to the small dowsntream training dataset. These results are from my previous work and are nothing new.</p> <p>Now, when we add <em>scouting</em>, the max validation accuracy becomes 100%, which is an improvement from the traditionally pre-trained model, though this by itself may not be that notable given the already good validation accuracy after traditional pre-training. What’s potentially more interesting, however, is that this maximum validation performance is maintained over three times as many epochs compared to the traditionally pre-trained model, suggesting that the maximal performance achieved by the GTL model is more robust. However, it is also worth noting that the validation accuracy is a lot noisier and jumps around a lot more for this model compared to the others (keep in mind cosine annealing learning rate scheduler is being used for all these models). But overall, it seems that guided transfer learning provides a more robust regularization effect, giving it a longer period of time with peak validation performance before overfitting occurs.</p> <p>This is quite exciting, as it shows that, given the right scouting problem setup, <strong>we can adapt GTL in settings where our pre-training data is unlabeled, as well!</strong> The flexiblity of GTL that allows it to be adapted to such a large variety of scenarios is what, in my eyes, makes this method truly innovative!</p> <h1 id="closing-thoughts">Closing Thoughts</h1> <h2 id="experiment-limitations-and-next-steps">Experiment Limitations and Next Steps</h2> <p>These experiements are merely to serve as a <em>preliminary</em> exploration of the nuances of GTL beyond what was presented in the original paper, in hopes that more questions will be explored by the community as GTL gains further publicity and traction. Thus, there is clearly plenty of room for imporvement and next steps regarding these experiments.</p> <p>For experiement 1, I think it would be cool to establish a more rigorous characterization of the amount of pre-training data (or rather lack thereof) that the addition of GTL can compensate for in terms of downstream performance. This might involve using arious <em>even smaller</em> subsets MNIST and finding the boundary where a pre-training dataset is too small that even GTL cannot compensate for it.</p> <p>The results of experiment 2 obviously leaves a lot of to be desired, as I only explored single-digit values for the number of categories use in the scout problems. These values are all over an order magnitude off from the number of categories in the downstream task, so none of them gave very useful insight into how “similar” scouting tasks need to be to the downstream task. This was, of course, limited by the MNIST dataset itself, which only had 10 categories. Perhaps using a pre-training dataset with more categories could allow a more comprehensive experiment of this type.</p> <p>And for experiment 3, I wish I had more time to curate a more robust validation scheme for the downstream few-shot task. A validation set with only 12 samples was really not granular enough to precisely capture the potential benefits of adding GTL on top of traditional transfer learning. When the traditionally pre-trained model is already getting 11/12 prediction correct at its best, is 12/12 really that meaningful of an improvement?</p> <h2 id="how-exciting-is-gtl">How Exciting is GTL?</h2> <p>As promising as all these results are, GTL is, of course, not the perfect end-all be-all solution to few-shot learning. As was discussed in the original GTL paper and shown in the experiments above, GTL can only provide so much improvement before hitting a wall (e.g. the one-shot learning ability on Omniglot never surpassed 25% validation accuracy). It does not yet quite result in models that match the few-shot learning ability of human intelligence, and still requires a considerable amount of pre-training data. However, the lightweight nature, simplicity, elegance, and adaptibility of the model makes it so that it’s a (relatively) quick and easy solution to get a downstream performance boost on any AI pipelines that already utilize traditional transfer learning!</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-02-guided-transfer-learning.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/how-cnns-learn-shapes/index.html b/blog/2023/how-cnns-learn-shapes/index.html
new file mode 100644
index 00000000..32295f9a
--- /dev/null
+++ b/blog/2023/how-cnns-learn-shapes/index.html
@@ -0,0 +1,48 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Can CNN learn shapes? | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="One widely accepted intuition is that Convolutional Neural Networks that are trained for object classification, combine low-level features (e.g. edges) to gradually learn more complex and abstracted patterns that are useful in differentiating images. Yet it remains poorly understood how CNNs actually make their decisions, and how their recognition strategies differ from humans. Specifically, there is a major debate about the question of whether CNNs primarily rely on surface regularities of objects, or whether they are capable of exploiting the spatial arrangement of features, similar to humans."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/how-cnns-learn-shapes/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Can CNN learn shapes?",
+      "description": "One widely accepted intuition is that Convolutional Neural Networks that are trained for object classification, combine low-level features (e.g. edges) to gradually learn more complex and abstracted patterns that are useful in differentiating images. Yet it remains poorly understood how CNNs actually make their decisions, and how their recognition strategies differ from humans. Specifically, there is a major debate about the question of whether CNNs primarily rely on surface regularities of objects, or whether they are capable of exploiting the spatial arrangement of features, similar to humans.",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Chloe Hong",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Can CNN learn shapes?</h1> <p>One widely accepted intuition is that Convolutional Neural Networks that are trained for object classification, combine low-level features (e.g. edges) to gradually learn more complex and abstracted patterns that are useful in differentiating images. Yet it remains poorly understood how CNNs actually make their decisions, and how their recognition strategies differ from humans. Specifically, there is a major debate about the question of whether CNNs primarily rely on surface regularities of objects, or whether they are capable of exploiting the spatial arrangement of features, similar to humans.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#background">Background</a></div> <div><a href="#motivation">Motivation</a></div> <div><a href="#methods">Methods</a></div> <ul> <li><a href="#training-data">Training Data</a></li> <li><a href="#architecture-and-training-hyperparameters">Architecture and Training Hyperparameters</a></li> <li><a href="#convolutional-layer-filter-size">Convolutional Layer Filter Size</a></li> <li><a href="#data-augmentation-fragmentation">Data Augmentation - Fragmentation</a></li> <li><a href="#data-augmentation-negative-labels">Data Augmentation - Negative Labels</a></li> </ul><div><a href="#results">Results</a></div> <ul> <li><a href="#training-evaluation">Training Evaluation</a></li> <li><a href="#conflict-set-evaluation">Conflict Set Evaluation</a></li> <li><a href="#filter-variation">Filter Variation</a></li> <li><a href="#data-augmentation-variation">Data Augmentation Variation</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="background">Background</h2> <p>One widely accepted intuition is that Convolutional Neural Networks (CNNs) that are trained for object classification, combine low-level features (e.g. edges) to gradually learn more complex and abstracted patterns that are useful in differentiating images. Stemming from this is the idea that neural networks can understand and use shape information to classify objects, as humans would. Previous works have termed this explanation the shape hypothesis. As <d-cite key="kriegeskorte2015deep"></d-cite> puts it,</p> <blockquote> <p>… the network acquires complex knowledge about the kinds of shapes associated with each category. […] High-level units appear to learn representations of shapes occurring in natural images</p> </blockquote> <p>This notion also appears in other explanations, such as in <d-cite key="lecun2015deep"></d-cite></p> <blockquote> <p>Intermediate CNN layers recognize parts of familiar objects, and subsequent layers […] detect objects as combinations of these parts.</p> </blockquote> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture-1400.webp"/> <img src="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/shapetexture.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 1. <d-cite key="geirhos2018imagenet"></d-cite> shows that CNNs trained on ImageNet data are biased towards predicting the category corresponding to the texture rather than shape.</p> <p>Yet it remains poorly understood how CNNs actually make their decisions, and how their recognition strategies differ from humans. Specifically, there is a major debate about the question of whether CNNs primarily rely on surface regularities of objects, or whether they are capable of exploiting the spatial arrangement of features, similar to humans. Studies have shown that the extent to which CNNs use global features ; shapes or spatial relationships of shapes, is heavily dependent on the dataset it is trained on. <d-cite key="geirhos2018imagenet"></d-cite> shows that CNNs trained on ImageNet data are biased towards predicting the category corresponding to the texture rather than shape. <d-cite key="farahat2023novel"></d-cite> reveal that CNNs learn spatial arrangements of features only up to a intermediate level of granularity by comparing networks trained on Sketchy dataset, composed of sketches drawn by images of animals, and the Animals dataset, images of animals.</p> <h2 id="motivation">Motivation</h2> <p>The question leading this project is if it is possible to steer the learning of a CNN network to use abstracted global shape features as dominant strategy in classifying images, in a similar sense that humans do. Previous works have shown that networks trained on <d-cite key="geirhos2018imagenet"></d-cite> texture agnostic datasets, or <d-cite key="farahat2023novel"></d-cite> abstracted sketch dataset have an increased ability to integrate global features. Extending the findings of these works, I experiment if it possible to induce the learning of CNNs to depend on global shapes by adjusting the filter size, or augmenting and curating the training data.</p> <h2 id="methods">Methods</h2> <p>In the following experiments, I train a CNN on human-generated sketch data and test with conlfict sets to determine if it has learned to integrate global features in its decision making. The objective is to push the network to learn and depend on global features (the overall shape) of the object rather than local features (direction or curvature of strokes) in classifying images. To do this, I first vary the filter sizes to see if there is an opimal sequence that enables the network to learn such features. Next I augment the data by fragmentation and by adding a false category so that the network is forced to learn to classify images even when the local information is obscured and only when global information is present. Finally, to test the ability of the models from each experiment in integrating the global feature, I design a conflict set that is different from the training data. Images in the conflict set have the global features (overall shape) that aligns with its category but the local features (strokes and corner conditions) are distorted to varying degrees.</p> <h3 id="training-data">Training Data</h3> <p>The first way that the model is pushed to learn global features is by training it on human generated sketch data. This is distinct from the previous works that have used stylized image data, or image data that has been turned in to line drawings in that it is more driven by the human perception. It is likely that the data is more varied because it is each drawn by a different person, but what humans perceive as distinctive features of that object category is likely to be present across instances.</p> <p>The hypothesis is that because of the scarsity of features, and absense of other local features such as texture, the model would inevitably have to learn global features that humans commonly associate to object categories, such as shape.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example-1400.webp"/> <img src="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/QuickDraw_example.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 2. Example from circle and square category of <a href="https://github.com/googlecreativelab/quickdraw-dataset">Quick, Draw! dataset</a> that are used in this project.</p> <p>For the following experiments I use 100,000 instances each from the circle and square categories of the <a href="https://github.com/googlecreativelab/quickdraw-dataset">Quick, Draw! dataset</a> that have been rendered into 28x28 grayscale bitmap in .npy format. The dataset is split 85% for training and 15% for validation.</p> <h3 id="architecture-and-training-hyperparameters">Architecture and Training Hyperparameters</h3> <p>The CNN architecture is composed of 3 convolution layers and 2 linear layers with max pooling and relu activation. The filter size of each convolution layer, marked as * is varied in the following experiments. We use cross entropy loss and accuracy is the portion of instances that were labeled correcty. Each model is trained for 20 epochs with batch size 256.</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">nn</span><span class="p">.</span><span class="nc">Sequential</span><span class="p">(</span>
+   <span class="n">data_augmentation</span><span class="p">,</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">Conv2d</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">64</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="sh">'</span><span class="s">same</span><span class="sh">'</span><span class="p">),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">MaxPool2d</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">Conv2d</span><span class="p">(</span><span class="mi">64</span><span class="p">,</span> <span class="mi">128</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="sh">'</span><span class="s">same</span><span class="sh">'</span><span class="p">),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">MaxPool2d</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">Conv2d</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="sh">'</span><span class="s">same</span><span class="sh">'</span><span class="p">),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">MaxPool2d</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">Flatten</span><span class="p">(),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">2304</span><span class="p">,</span> <span class="mi">512</span><span class="p">),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">ReLU</span><span class="p">(),</span>
+   <span class="n">nn</span><span class="p">.</span><span class="nc">Linear</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="c1"># 2 categories (circle, square)
+</span><span class="p">)</span>
+</code></pre></div></div> <h3 id="convolutional-layer-filter-size">Convolutional Layer Filter Size</h3> <p>The hypothesis is that the size of the filters of each convolution layer affects the scale of features that the network effectively learns and integrates in its final decision making. The underlying assumption is that if the filter size gradually increases, the CNN learns global scale features and uses that as dominant stragety. I test for different combinations of size 3,5,7,9 to see if there is an optimal size filter to train a CNN network for our purpose.</p> <h3 id="data-augmentation---fragmentation">Data Augmentation - Fragmentation</h3> <p>I train models with augmented data of different degree of fragmentation. Lower degrees of fragmentation divide the shape into 2 fragments and with higher degree, the shape is divided into an increasing number of parts. I do this by using masks that create streaks going across the image each in the horizontal, vertical and two diagonal directions. As a result, we create circles and squares with dashed lines.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation-1400.webp"/> <img src="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_fragmentation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 3. Augmentations with varying degrees of fragmentation.</p> <p>The hypothesis is that fragments of circles and squares may be similar, so as the network is trained to distinguish between two categories regardless, it has to gain an understanding of larger scale features ; how these line segments are composed. If the model successfully train on datasets that are highly fragmented, it is expected to acquire knowledge of global features. For instance, intermediate scale understanding interpretation of circles would be that the angle of line segments are gratually rotating. On the otherhand squares would have parallel line segments up to each corner where ther is a 90 degree change in the angle.</p> <h3 id="data-augmentation---negative-labels">Data Augmentation - Negative Labels</h3> <p>We add instances where the local features of the circle or square is preserved, but the global feature is absent and labeled them as an additional category, ‘false’. We create this augmentation by masking half or 3/4 of the existing data. The intention here is to have the model learn to only categorize shapes when their global features are present.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative-1400.webp"/> <img src="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augmentation_negative.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 4. Augmentation with addition of ‘false’ category.</p> <h2 id="results">Results</h2> <h3 id="training-evaluation">Training Evaluation</h3> <p>We first want to examine if the independent variables affect the model’s training on the classification task. There is the possibility that with certain filter sizes, the model may not be able to encode enough information to differentiate circles and squares. More likely there is a possibility with the augmentations that we are using to force the CNN to learn a more difficult strategy, where the model fails to train to classify instances similar to the training set to start with. If training the model is unsuccessful, it means that CNNs under those conditions are incapable of finding any strategy to differentiate the two shape categories.</p> <h3 id="conflict-set-evaluation">Conflict Set Evaluation</h3> <p>To test the networks ability to employ global features we borrow the approach of <d-cite key="baker2020local"></d-cite> that use “conflict examples”. Conflict instances have the overall shape that aligns to its label, but the local features, such as stroke or texture do not. The premise is that it is easy for humans, that primarily use global information to differentiate shapes to successfully categorize these conflict sets. Therefore, it would be a good way to test if the trained CNNs use similar differentiating strategies as humans.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset-1400.webp"/> <img src="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/conflictset.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 5. Three conflict sets that obscure local features to contradict the global feature and ground truth label.</p> <p>We create three series of conflict sets for circle and squares that obscure its most distinguishing local features. The first set obscures the corner conditions - circles with one to two angular corners and squares with chamfered corners are included in this set. The second obscures line conditions - circles with angular lines and squares with curvy lines are created for this set. The third series targets the composition of strokes - instead of continuous lines, we use series of parallel lines of varying angles to form a circle or square.</p> <h3 id="filter-variation">Filter Variation</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training-1400.webp"/> <img src="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_training.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 6. Training evalution for variations in filter size of the convolution layer.</p> <p>For each variation in filter size, the models trained to reach over 98.5% accuracy on the validation set. Contrary to our speculation, the filter size did not largely affect the models ability to learn the classification task.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results-1400.webp"/> <img src="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/filter_results.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 7. Evaluation with conflict set for variations in filter size of the convolution layer.</p> <p>Overall we observe that having a large size filter at the final layer increases the model’s performance on the conflict set as with filter sequence 337 and 339. We can speculate that having consistantly smaller size filters in the earlier layers and only increasing it at the end (337, 339) is better than gradually increaseing the size (357, 379). However, this is not true all the time as models with consistant size filters performed relavitely well (333, 555). Starting with a larger size filter (555, 557, 579 compared to 333, 337, 379) also helped in performance. However, this also came with an exception where 339 performced better than 559.</p> <p>Overall we can see that the models have trouble classifying instances with increased degree of conflicting local features. For instance the 4th instance in set 2 obstructs all four of the perpendicular angles of a square. The 3rd and 4th instance of set 2 have the most angular ridges forming its lines and the 7th and 8th instance of set 2 have the most circluar forming its lines. From set 3, the first and second instance obstruct the gradually changing angle of strokes within the circle the most.</p> <h3 id="data-augmentation-variation">Data Augmentation Variation</h3> <p>Based on the results with filter variation, we choose the filter size 555 to that performed moderately well, but still has room for improvement for the next experiment with augmented training data.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training-1400.webp"/> <img src="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_training.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 8. Training evalution for variations in augmentation of training data.</p> <p>All models trained to reach over 98% accuracy on the validation set. As we speculated, the model had more difficulty in training with the augmentation as opposed to without. With the additional third negative category, the model was easier to train. This is evident with the divide in the plot with datasets that were augmented with the negative category to have higher evaluation values than the baseline and those that were only augmented with fragmented data were below the baseline.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results-1400.webp"/> <img src="/staging/assets/img/2023-11-09-how-cnns-learn-shapes/augment_results.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 9. Evaluation with conflict set for variations in augmentation of training data.</p> <p>The performance of models trained with augmented data on the conflict set was worse than that trained only on the original data which proves our initial hypothesis that it would be possible to enforce the network to use global features with augmented data wrong. What is interesting is how difference augmentations affect the performance. Initially, we thought that with the increased degree of fragmentation in the augmentation, the model would learn global features better, and would perform better on the conflict set. However comparison among the augmentation variations, Aug 2 showed significanly poor performance. Adding a ‘false’ category did not boost the performance either. What is interesting is that the misclassification does not include the false label. We speculate that the model has learned to look at how much of the image is occupied.</p> <h2 id="conclusion">Conclusion</h2> <p>The experiments in this project have shown that there isn’t an obvious way to steer CNN networks to learn intended scale features with filter size variation and data augmentation. While it was difficult to find a strict correlation, the variation in performance across experiments shows that the independent variables do have an affect on the information that the network encodes, and what information reaches the end of the network to determine the output. The fact that trained models were unable to generalize to the conflict set reinforces the fact that encoding global features is difficult for CNNs and it would likely resort to classifying with smaller scale features, if there are apparent differences.</p> <p>While the project seeks to entangle factors that could affect what the CNN learns, the evaluation with conflict sets does not directly review how features are processed and learned within the network. Approaches such as visualizing the activation of each neuron or layer can be more affective in this and can reveal more about how to alter the network’s sensitivity to the global features.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-how-cnns-learn-shapes.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/increasing-context-length-for-transformers/index.html b/blog/2023/increasing-context-length-for-transformers/index.html
new file mode 100644
index 00000000..30e23e28
--- /dev/null
+++ b/blog/2023/increasing-context-length-for-transformers/index.html
@@ -0,0 +1,37 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Increasing Context Length For Transformers | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="How can we make attention more efficient?"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/increasing-context-length-for-transformers/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Increasing Context Length For Transformers",
+      "description": "How can we make attention more efficient?",
+      "published": "November 8, 2023",
+      "authors": [
+        {
+          "author": "Annie Wang",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Increasing Context Length For Transformers</h1> <p>How can we make attention more efficient?</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#literature-review">Literature Review</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#experiments">Experiments</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h3 id="introduction">Introduction</h3> <p>Since its release on November 30, 2022, ChatGPT has assisted users around the world with a variety of document parsing and editing tasks. These tasks often require large input contexts, since the documents and texts passed into ChatGPT’s source model, GPT-3.5, can be several pages long.</p> <p>Like many other language models, GPT-3.5 is a unidirectional transformer that uses the self-attention mechanism. But while self-attention is an extremely powerful mechanism, it is also expensive in its time and space complexity. Standard self-attention requires $O(n^2)$ operations in terms of the sequence length $n$, since the $QK^T$ term within the attention mechanism calculates and stores the attention of each of the $n$ tokens with $O(n)$ other tokens.</p> <p>Unfortunately, the $O(n^2)$ complexity makes long input contexts difficult for transformers to handle efficiently. Over the past few years, researchers have been investigating ways of mitigating the $O(n^2)$ factor. This remains an ongoing problem, with several papers released on the topic in 2023 alone.<d-cite key="xu2023retrieval"></d-cite><d-cite key="yang2023longqlora"></d-cite><d-cite key="peng2023yarn"></d-cite><d-cite key="yang2023revisiting"></d-cite><d-cite key="mohtashami2023landmark"></d-cite> In this post, we provide an overview of existing strategies for increasing context length for transformers. We also propose and investigate our own efficient self-attention algorithm, which we call Gaussian attention.</p> <h3 id="literature-review">Literature Review</h3> <p>In the past, large context lengths were handled using a simple partition scheme. Essentially, long inputs can be split into fixed-length chunks, where attention is computed separately for each chunk. Then, for chunk size $b$, a sequence of length $n$ requires only $O\left(\frac{n}{b} \cdot b^2\right) = O(nb)$ time to compute. However, this method has a major drawback in that information cannot be shared across partitioned blocks, leading to the fragmentation problem: the model lacks long-term dependencies and thus runs into cases where it lacks the necessary context to make accurate predictions.</p> <p>Modern methods for reducing context lengths in transformers generally try to avoid this problem by either introducing ways of sharing context across partitions or reducing self-attention calculation cost by using a simpler approximation. Models that fall into second category may utilize one of many different approximation techniques, such as sparse attention matrices and fixed attention patterns.<d-cite key="tay2022efficient"></d-cite><d-cite key="LIN2022111"></d-cite> We present a brief summary of existing efficient transformers.</p> <h4 id="sparse-transformer">Sparse Transformer</h4> <p>Child et al. proposed a sparse transformer that reduces attention calculation cost from $O(n^2)$ to $O(n\sqrt{n})$.<d-cite key="child2019generating"></d-cite> To achieve this, the sparse transformer uses a combination of strided and local attention patterns.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al-1400.webp"/> <img src="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/child-et-al.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Standard attention matrix (left) vs. strided attention matrix (right). Only attention values for the blue squares are computed. </div> <p>One attention head processes a local window of size $k$ surrounding the current token $i$, while a second attention processes tokens $j$ such that</p> \[(i - j) \mod l = 0 \qquad \forall j \leq i,\] <p>where $l$ is a parameter chosen to be close to $\sqrt{n}$. Since only $O(l)$ tokens are attended upon for each token $i$, this results in the $O(n \cdot l) = O(n\sqrt{n})$ runtime. Child et al. showed that the sparse transformer can be applied to a wide range of fields, including image, text, and music, where it can be used to possess audio sequences over 1 million timestamps long.</p> <h4 id="longformer">Longformer</h4> <p>Longformer<d-cite key="beltagy2020longformer"></d-cite> applies a dilated sliding window to capture local attention patterns and reduce overall attention cost to $O(n\cdot{w})$ for window size $w$. Across successive attention layers, gaps are placed between different elements of the sliding window—thus expanding the receptive field to thousands of tokens even for small dilation factors. In order to generalize to different language modeling tasks, Longformer introduces global tokens that attend upon every other token. These global tokens are analogous to the different input representations used by language models for different tasks; for example, BERT appends a <code class="language-plaintext highlighter-rouge">&lt;CLS&gt;</code> token to the start of every input in classification tasks. Despite using sparse attention contexts, Longformer was able to outperform state-of-the-art model RoBERTa on several long document benchmarks.</p> <h4 id="bigbird">BigBird</h4> <p>BigBird<d-cite key="zaheer2021big"></d-cite> combines three different fixed attention patterns to achieve $O(n)$ complexity, being</p> <ol> <li>Global attention, consisting of tokens that attend upon every other token</li> <li>Local attention, consisting of a sliding window around each token</li> <li>Random attention, consisting of randomly-selected tokens</li> </ol> <p>Using this architecture, BigBird managed to increase maximum transformer context lengths by up to 8x. In the same paper, Zaheer et al. proved that certain sparse transformers are computationally equivalent to transformers with full attention. Theoretically, sparse transformers are capable of solving all tasks that full transformers can solve; this explains why sparse transformers are often a good approximation for full transformers.</p> <h4 id="transformerxl">TransformerXL</h4> <p>TransformerXL differs from the previously discussed models, as it doesn’t increase self-attention efficiency by sparsifying the attention matrix.<d-cite key="dai2019transformerxl"></d-cite> Instead, it retains the classic partitioning scheme and attempts to overcome the fragmentation problem via a recurrence-based approach. Using the recurrence mechanism, hidden-state sequences are stored and cached so they can be used for additional context when the model processes the next statement. Overall, this architecture allows the network to use historical information to process new information. As a result, it can support longer-range dependencies without leading to context fragmentation. TransformerXL was able to operate on sequences up to 450% longer than those of vanilla transformers, while being up to 1800 times faster; in addition, it achieved SOTA results on the datasets <code class="language-plaintext highlighter-rouge">text8</code>, <code class="language-plaintext highlighter-rouge">Penn Treebank</code>, and <code class="language-plaintext highlighter-rouge">WikiText-103</code>.</p> <h4 id="landmark-tokens">Landmark Tokens</h4> <p>More recently, Mohtashami et al. suggested using landmark tokens to determine which tokens should be attended to.<d-cite key="mohtashami2023landmark"></d-cite> Inputs are divided into a series of fixed-length blocks, and each block is associated with a landmark token. In particular, this architecture is designed so that a high attention score on any individual token within a block also leads to a high attention score on the block’s “representative vector”—which is the landmark token itself. Then, during inference, the transformer retrieves the $k$ blocks corresponding to the $k$ highest-valued landmark tokens and attends only upon the tokens in this block. Mohtashami et al. claimed that this architecture can extend the context length of Llama to more than 32k tokens, allowing it to support inputs of the same length as GPT-4.</p> <h4 id="visiontransfomer">VisionTransfomer</h4> <p>Most of the models discussed above apply specifically to transformers used for language modeling. However, algorithms for reducing attention complexity have been successfully used for other tasks as well. For example, VisionTransformer managed to achieve SOTA performance while limiting the attention context to a 16x16 patch around each pixel.<d-cite key="dosovitskiy2021image"></d-cite></p> <h4 id="hardware-methods">Hardware Methods</h4> <p>Aside from algorithm-based techniques, there have also been attempts to make basic transformer algorithms run faster on existing hardware. Although sparse attention algorithms may have better time complexity, they may not achieve practical speedups due to hardware inefficiencies. In order to achieve practical speedups on transformer training, Dao et al. proposed FlashAttention, an I/O-aware attention algorithm that implements the basic attention computation.<d-cite key="dao2022flashattention"></d-cite> FlashAttention achieves speedups of up to 15% on BERT-Large, showing that efficient transformers do not necessarily need to use approximate attention algorithms.</p> <h4 id="other-methods">Other Methods</h4> <p>Numerous other algorithms for extending transformer context lengths have been proposed, including retrieval-based methods<d-cite key="borgeaud2022improving"></d-cite>, low-rank approximations<d-cite key="wang2020linformer"></d-cite>, and expectation-based methods<d-cite key="ren2021combiner"></d-cite>.</p> <h3 id="methodology">Methodology</h3> <p>To see what types of context reduction algorithms are effective, we propose and test our own efficient transformer. We investigate whether transformers using Gaussian-distributed fixed attention patterns can perform as well as standard transformers. For each self-attention layer, we sample a Gaussian random distribution to determine which elements of the attention matrix we should compute. We analyze this approach for the unidirectional language modeling case, where the goal is to predict the next token of a given input sequence.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2-1400.webp"/> <img src="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3-1400.webp"/> <img src="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gauss-attn-diag-3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Examples of Gaussian attention masks with $c=5$ and inputs of length 10. </div> <p>In language modeling, the most important context for predicting a new token often comes from examining the tokens that immediately precede it. Previous work has taken advantage of this pattern by employing fixed local attention patterns, such as the sliding window pattern used by BigBird. For token $i$, random samples from a truncated Gaussian distribution with mean $i$ and standard deviation $\sigma = \frac{\mu}{2} = \frac{i}{2}$<d-footnote>This means that 0 is two standard deviations from the mean $i$.</d-footnote> will produce values $j$ close to $i$ with high probability. This implies that we will likely calculate the attention scores for some local region of each token $i$, allowing the model to account for important local context connections.</p> <p>On the other hand, it may also be possible that some distant token $j$ has a large impact on the prediction of token $i$. For example, if you pass in a document in which the first sentence defines the overall purpose of the document, we might need to pay attention to this sentence even in later sections of the document. Fixed-pattern Gaussian attention allows for this possibility by calculating attention scores for $i$ and distant tokens $j$ with a lower but still nonzero probability. As a result, Gaussian attention offers some flexibility that may not be present in other fixed-pattern attention mechanisms, such as the sliding window technique.</p> <h4 id="algorithm">Algorithm</h4> <p>The model takes a hyperparameter $c$, where $c$ is the number of tokens that each token attends upon. For every token $i$ in each self-attention layer, we select $c$ tokens from the Gaussian distribution $\mathcal{N}(i, i/2)$, where $\mathcal{N}$ is truncated at $0$ and $i$. Since our task focuses on the casual language modeling case, a token $i$ computes attention scores only for tokens $j&lt;i$. Truncation ensures that every $i$ attends to exactly $\min(c, i)$ tokens.<d-footnote>If $c$ is greater than the number of tokens in range $[0,i]$, the result is to sample every taken from $[0,i]$.</d-footnote> To match sampled random numbers with actual token indexes, we cast each random number $x$ to index $i = \lfloor{x}\rfloor$. In the case of duplicate indexes, we assign each duplicate index to the nearest unused index in range $[0,i]$. This algorithm is summarized below.</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>for each token i:
+  sample min(c, i) values from N(i, i/2)
+  create list of indices by flooring every sampled value
+  remove duplicates assigning duplicates to the nearest unused index
+  # such an assigment always exists by pigeonhole principle
+</code></pre></div></div> <p>For each token $i$, we set all attention values for tokens which are not selected to zero. As a result, each token attends only on at most $c$ tokens, leading to an overall cost of $O(c \cdot n) = O(n)$ for constant $c$.</p> <h3 id="experiments">Experiments</h3> <p>Since we had limited training resources, we unfortunately couldn’t test Gaussian attention on large models like BERT or GPT. Instead, we used a toy study involving small models with smaller inputs—this leads to some additional considerations in analyzing our results, which we address later.</p> <p>We first tested whether models trained with limited Gaussian attention can achieve similar performance as models that were trained on full self-attention. We trained models with $c = 5$ and $c=10$ and compared them to the performance of the base model. For our base experiments, we used three self-attention heads per layer and six layers in total.</p> <p>Our evaluation metric for all models was next-token cross-entropy loss against a corpus of Shakespeare texts.Training is optimized with Adam and a learning rate of 0.0001.</p> <p>Base experiment results are shown below.</p> <table> <thead> <tr> <th>Model</th> <th>Epochs</th> <th>Training Loss</th> <th>Validation Loss</th> </tr> </thead> <tbody> <tr> <td>Base</td> <td>80</td> <td>4.2623</td> <td>4.4390</td> </tr> <tr> <td>Base</td> <td>130</td> <td>3.7709</td> <td>4.0320</td> </tr> <tr> <td>Base</td> <td>140</td> <td>3.7281</td> <td>3.9964</td> </tr> <tr> <td>$c = 5$</td> <td>80</td> <td>3.7458</td> <td>4.0355</td> </tr> <tr> <td>$c = 10$</td> <td>80</td> <td>4.1619</td> <td>4.3801</td> </tr> </tbody> </table> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss-1400.webp"/> <img src="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/train-vs-val-loss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We found that both the $c=5$ and $c=10$ models were able to achieve similar performance as the base model, which suggests that Gaussian attention may be a good approximation for full attention. Interestingly, both Gaussian models required significantly fewer epochs to reach the same performance as the base model. Both Gaussian models also demonstrated faster separation between training and validation losses. We hypothesize that the smaller attention context helps focus learning on more relevant tokens, which lowers the number of training epochs needed. As a result, the model is able to learn the language modeling task more rapidly, leading to faster overfitting.</p> <p>Although initial results were promising, we chose to investigate a few factors that could have inflated model performance.</p> <p>In order to determine whether the Gaussian attention models are affected by input length, we tested the same setups with longer inputs. Our base experiments used relatively small inputs, each corresponding to one piece of dialogue in a Shakespeare script. On average, these inputs were approximately 30 tokens long; with $c = 5$, the selected context may be more than $\frac{1}{6}$ of the total tokens. As a result, Gaussian model accuracy might be inflated for small inputs, since the context essentially covers a large portion of existing tokens. To make $c$ a smaller fraction of the input length, we modified the dataset instead to create inputs with an average length of 100 tokens. We summarize the results in the table below.</p> <table> <thead> <tr> <th>Model</th> <th>Epochs</th> <th>Training Loss</th> <th>Validation Loss</th> </tr> </thead> <tbody> <tr> <td>Base</td> <td>90</td> <td>5.5906</td> <td>5.6207</td> </tr> <tr> <td>$c = 5$</td> <td>90</td> <td>5.5769</td> <td>5.6166</td> </tr> <tr> <td>$c = 10$</td> <td>90</td> <td>5.6237</td> <td>5.6565</td> </tr> </tbody> </table> <p>With the longer input contexts, all three models had worse performance when trained for the same number of epochs. However, both Gaussian models managed to achieve approximately the same loss as the original model. This again suggests that Gaussian attention is a valid approximation of the standard attention matrix.</p> <p>We further investigated whether the performance of the Gaussian models degraded rapidly when using a smaller number of layers and attention heads. Logically, increasing the number of attention heads would help mask bad attention patterns formed by the Gaussian sampling strategy. For example, although the sampling process selects tokens $j$ near token $i$ with high probability, it is possible that some attention head $x$ does not select the relevant tokens for a token $i$. With the addition of more attention heads, a different head may compensate for the bad head by operating on the correct tokens. Increasing the number of attention layers similarly increases the number of attention heads, where good heads can compensate for bad ones. Experiments showed that even with one layer and one attention head, the Gaussian models were able to achieve approximately the same performance as the base model.</p> <table> <thead> <tr> <th>Model</th> <th>Input Type</th> <th>Epochs</th> <th># Heads</th> <th># Layers</th> <th>Training Loss</th> <th>Validation Loss</th> </tr> </thead> <tbody> <tr> <td>Base</td> <td>Short</td> <td>80</td> <td>1</td> <td>1</td> <td>5.1009</td> <td>5.1605</td> </tr> <tr> <td>Base</td> <td>Long</td> <td>80</td> <td>1</td> <td>6</td> <td>5.5994</td> <td>5.6289</td> </tr> <tr> <td>Base</td> <td>Long</td> <td>90</td> <td>1</td> <td>6</td> <td>5.5906</td> <td>5.6207</td> </tr> <tr> <td>$c = 5$</td> <td>Short</td> <td>80</td> <td>1</td> <td>1</td> <td>5.0481</td> <td>5.1139</td> </tr> <tr> <td>$c = 5$</td> <td>Long</td> <td>80</td> <td>1</td> <td>6</td> <td>5.5884</td> <td>5.6273</td> </tr> <tr> <td>$c = 5$</td> <td>Long</td> <td>90</td> <td>1</td> <td>6</td> <td>5.5769</td> <td>5.6166</td> </tr> <tr> <td>$c = 10$</td> <td>Short</td> <td>80</td> <td>1</td> <td>6</td> <td>4.5597</td> <td>4.6949</td> </tr> <tr> <td>$c = 10$</td> <td>Short</td> <td>90</td> <td>1</td> <td>6</td> <td>4.5432</td> <td>4.6809</td> </tr> <tr> <td>$c = 10$</td> <td>Long</td> <td>80</td> <td>1</td> <td>6</td> <td>5.6345</td> <td>5.6666</td> </tr> <tr> <td>$c = 10$</td> <td>Long</td> <td>90</td> <td>1</td> <td>6</td> <td>5.6237</td> <td>5.6565</td> </tr> </tbody> </table> <p>However, we noticed that with fewer heads and layers, the base model trained at approximately the same rate as the Gaussian model. A smaller number of attention heads and attention layers implies that fewer parameters need to be updated to learn the task; this typically means that training is faster for smaller models. As a result, it makes sense that a smaller model would benefit less from the increase in training speed that reduced attention context offers; since the model is so small, training is already fast and any decrease in training speed would be minor.</p> <p>To test the limitations of Gaussian attention, we experimented with extremely sparse attention patterns that selected only one token for each model.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn-1400.webp"/> <img src="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/vert-attn.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn-1400.webp"/> <img src="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/diag-attn.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn-1400.webp"/> <img src="/staging/assets/img/2023-11-08-increasing-context-length-for-transformers/gaussian-attn.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Although these models did not perform as well as the base transformer, we found that the token that was attended upon made a significant impact on the final loss. As shown in the table below, the models that employed a diagonal or Gaussian attention pattern performed significantly better than the model that used a vertical attention pattern on the first token. This suggests that local attention patterns were the most important ones for improving the outcome of our task; as a result, Gaussian attention may perform well specifically because it emphasizes the local attention context.</p> <table> <thead> <tr> <th>Model</th> <th>Epochs</th> <th># Layers</th> <th># Heads</th> <th>Training Loss</th> <th>Validation Loss</th> </tr> </thead> <tbody> <tr> <td>Diagonal</td> <td>80</td> <td>1</td> <td>6</td> <td>5.5089</td> <td>5.5400</td> </tr> <tr> <td>Vertical</td> <td>80</td> <td>1</td> <td>6</td> <td>5.6652</td> <td>5.6906</td> </tr> <tr> <td>Gaussian</td> <td>80</td> <td>1</td> <td>6</td> <td>5.3231</td> <td>5.3744</td> </tr> </tbody> </table> <h4 id="implications-and-limitations">Implications and Limitations</h4> <p>Our experiments showed that Gaussian attention has potential as an algorithm for improving transformer efficiency and increasing context lengths. We note that these experiments may not reflect the algorithm’s actual performance in real-world scenarios. Because we did not have the capacity to train a language model on the scale of BERT or GPT, we experimented only with much smaller models that processed much smaller contexts. As a result, our experimental results may not extend to larger models. Additionally, due to limited training time, we did not train any of the models we used for more than 150 epochs; with more training time, it is possible that the base transformers may outperform the modified ones. In order to generalize to larger models, Gaussian attention may need to be combined with other attention patterns, like global attention. More research is needed to fully understand its potential and shortcomings.</p> <h2 id="conclusion">Conclusion</h2> <p>Today, methods for increasing context length in transformers remains an important research topic. Although researchers have proposed numerous efficient transformers and self-attention algorithms, a concrete solution for increasing transformer context lengths has yet to be found. With recent developments in large language models, the number of tasks that transformers can be applied to is increasing rapidly. As a result, the search for an efficient transformer is more important than ever.</p> <p>Our work shows that Gaussian distributions can potentially be used to build fixed-pattern attention masks. However, the performance of Gaussian attention masks in larger models remains to be confirmed and requires further study.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-increasing-context-length-for-transformers.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/index.html b/blog/2023/index.html
new file mode 100644
index 00000000..72d7f45a
--- /dev/null
+++ b/blog/2023/index.html
@@ -0,0 +1 @@
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>2023 | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <div class="post"> <header class="post-header"> <h1 class="post-title"> <i class="fas fa-calendar fa-sm"></i> 2023 </h1> <p class="post-description"> an archive of posts from this year </p> </header> <article> <div class="table-responsive"> <table class="table table-sm table-borderless"> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/watermarked-llms/">Are Watermarked Large Language Models More Prone to Hallucinations?</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/time-series-lstm-transformer/">Predicting the Future: LSTM vs Transformers for Time Series Modeling</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/sparse-autoencoders-for-othello/">Studying the benefits and limitations of sparse auto-encoders for compositional reasoning tasks</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/solvent-encoding/">Solvent Encoding for solubility prediction using GNN</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/scRNA-GNNs/">6.s898 Final Project- Investigating the biological underpinnings of latent embeddings for scRNA-seq</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/forbidden-facts/">Forbidden Facts</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/elephantfish-model/">Modeling Elephantfish Communication through Deep RNNs</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/contrastivediffusion-image2video/">Exploring Image-Supervised Contrastive Diffusion - A Comparative Analysis with Applications in Image-to-Video Generation</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/combining-modalities-for-better-representation-learning/">Combining Modalities for Better Molecular Representation Learning</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/WeightDecaySpecNormEffects/">Exploring Frobenius and Spectral Normalization in MLPs and Residual networks</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Iterated-Representation-Learning/">Iterated Representation Learning</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/alleviating-catastrophic-forgetting-in-classification-tasks-through-strategic-filter-dropout/">A Method for Alleviating Catastrophic Forgetting With Explainability</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/graph-articulated-objects/">Graph Articulated Objects</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/physics-loss/">Physics Loss</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Tracking-Multiple-Objects/">Diffusion Models on Low-Brightness Images</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Exploring-Task-Specific-Data-Augmentation/">Semi-Supervised Domain Adaptation using Diffusion Models</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/superposition/">The Effect of Activation Functions On Superposition in Toy Models</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/distill-example/">Gradient-Boosted Neural Wavlet Interpolation for Time Series (G-BiTS)</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Physics-Informed-Primal-Dual-Learning/">Challenges in Deep Learning Surrogates for Constrained Linear Optimization</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/CNN-activation-patching/">Activation Patching in Vision Transformers</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/transformers-as-gamers/">Learning Generals.io</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/transformer-time/">A Comparative Study of transformer on long sequence time series data</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/transfer-resistant-model-training/">Transfer Resistant Model Training</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/sparse-autoencoders-for-interpretable-rlhf/">Sparse Autoencoders for a More Interpretable RLHF</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/proposal-2/">Using Synthetic Data to Minimize Real Data Requirements</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/proposal-1/">Applications of Deep Learning in Timbre Transfer</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/interpretability-of-toy-tasks/">The Effect of Activation Functions On Superposition in Toy Models</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/generating-robust-networks/">Training Robust Networks</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/enforcing-uniformity/">Imposing uniformity through Poisson flow models</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/dof-visual-place-recognition-satellite/">6-DOF estimation through visual place recognition</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/conflict-causality/">Tracing the Seeds of Conflict: Advanced Semantic Parsing Techniques for Causality Detection in News Texts</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/autodecoders/">To Encode or Not To Encode: The Case for the Encoder-free Autodecoder Architecture</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/PersonalizedGeneration_w_LLMAgents/">New Synthesis Approach for Personalized LLMS</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/vig-algorithm-flow-project-proposal/">Augmenting Expert Domain Image Inputs for Enhancing Visual Language Models Performance</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/spatiotemporal/">Embeddings for Spatio-temporal Forecasting</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/sentence-embeddings/">In the pursuit of cheap and robust word embeddings</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/representationengineering-incontextlearning/">Leveraging Representation Engineering For LLM’s In-Context-Learning</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/mapreason/">Reasoning with Maps: Assessing Spatial Comprehension on Maps in Pre-trained Models</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/foley-to-video/">Autoen-chorder: Predicting Musical Success With Neural Nets</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/double_descent/">Ensemble Learning for Mitigating Double Descent</a> </td> </tr> <tr> <th scope="row">Dec 12, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/SmartEmbeddingInitializations/">Injecting Node Information via Embedding Initializations</a> </td> </tr> <tr> <th scope="row">Dec 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/overparameterization/">Overparameterization of Neural Networks through Kernel Regression and Gaussian Processes</a> </td> </tr> <tr> <th scope="row">Dec 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/exploring-music-generation/">Exploring Methods for Generating Music</a> </td> </tr> <tr> <th scope="row">Dec 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/rep-learning-for-rec-systems/">Can Constrastive Learning Recommend Me a Movie?</a> </td> </tr> <tr> <th scope="row">Dec 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/spacial-CLIP/">Improving CLIP Spatial Awareness Using Hard Negative Mining</a> </td> </tr> <tr> <th scope="row">Dec 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/">Transformer-Based Approaches for Hyperspectral Imagery in Remote Sensing</a> </td> </tr> <tr> <th scope="row">Dec 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/multimodal-commonsense/">Multimodal Commonsense</a> </td> </tr> <tr> <th scope="row">Dec 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Exploring-Generative-Models-In-Time-Series/">Exploring Univariate Time Series Anomaly Detection using VAE's</a> </td> </tr> <tr> <th scope="row">Dec 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/graphs-transformers/">Graph Transformers</a> </td> </tr> <tr> <th scope="row">Dec 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/croneillproposal/">Learning a Lifted Linearization for Switched Dynamical Systems</a> </td> </tr> <tr> <th scope="row">Dec 10, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/universal-features/">Sparse Autoencoder Universality - Under What Conditions are Learned Features Consistent?</a> </td> </tr> <tr> <th scope="row">Dec 10, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/diaz-proposal/">Optimizations of Transformers for Small-scale Performance</a> </td> </tr> <tr> <th scope="row">Dec 10, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/guided-transfer-learning/">Guided Transfer Learning and Learning How to Learn: When Is It Useful?</a> </td> </tr> <tr> <th scope="row">Dec 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Visualization-of-CLIP's-Learning-and-Perceiving-Dynamics/">Alive Scene</a> </td> </tr> <tr> <th scope="row">Dec 5, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/projected-fff-networks/">Projected fast feedforward networks</a> </td> </tr> <tr> <th scope="row">Dec 1, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/LinearMode/">Understanding Linear Mode Connectivity</a> </td> </tr> <tr> <th scope="row">Dec 1, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/TransformersAndRNNs/">Transformers vs. RNNs: How do findings from real-world datasets relate to the theory?</a> </td> </tr> <tr> <th scope="row">Dec 1, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/latent-interpolation/">Exploring the latent space of text-to-image diffusion models</a> </td> </tr> <tr> <th scope="row">Nov 16, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/speculative-decoding/">Accelerating large model inference with speculative decoding - 6.s898</a> </td> </tr> <tr> <th scope="row">Nov 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/unraveling-social-reasoning-in-llms/">Unraveling Social Reasoning in LLMs: A Deep Dive into the Social IQA Benchmark</a> </td> </tr> <tr> <th scope="row">Nov 11, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/denoisingVAE/">Comparing data augmentation using VAEs and denoising-VAEs for limited noisy datasets</a> </td> </tr> <tr> <th scope="row">Nov 10, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/transformer-elo-prediction/">Emoji3Vec</a> </td> </tr> <tr> <th scope="row">Nov 10, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/speech-recognition-proposal/">Modeling Human Speech Recognition with Different Network Architectures</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/uncertainty/">Analytic, Empirical, and Monte Carlo Bayesian Methods for Uncertainty Estimation</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/structured-physics-loss-diffusion/">Understanding LLM Attention on Useless Numbers in Word Problems (and this Title has 8 Es)</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/multilingual-representations-in-embeddings-models/">Cross-Lingual Fine-Tuning for Multilingual Text Embeddings</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/learning-interpretable-features-with-sparse-autoencoders/">Learning Interpretable Features with Sparse Auto-Encoders</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/eunhae-project/">How does model size impact catastrophic forgetting in online continual learning?</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/deep-connectome-clustering/">VGAE Clustering of the Fruit Fly Connectome</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/contrastivediffusion-image2video/">Robust Image to Video Generation Using Contrastive Diffusion Over Latents</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/adaptive-controller-graph-eom/">Adaptive Controller with Neural Net Equations of Motion for High-DOF Robots</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Robustness-of-self-supervised-ViT-features-in-b-mode-images/">Robustness of self-supervised ViT features in b-mode images</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Symmetry-Optimization/">Investigating the Impact of Symmetric Optimization Algorithms on Learnability</a> </td> </tr> <tr> <th scope="row">Nov 9, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/how-cnns-learn-shapes/">Can CNN learn shapes?</a> </td> </tr> <tr> <th scope="row">Nov 8, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/quantum-gnn/">Quantum Circuit Optimization with Graph Neural Nets</a> </td> </tr> <tr> <th scope="row">Nov 8, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Structural_vs_Data_Inductive_Bias/">Structural vs Data Inductive Bias</a> </td> </tr> <tr> <th scope="row">Nov 8, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/suscep/">From Scroll to Misbelief - Modeling the Unobservable Susceptibility to Misinformation on Social Media</a> </td> </tr> <tr> <th scope="row">Nov 8, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/scRNAseq-assumptions/">Examining assumptions in scRNA-seq foundation model pre-training (6.S898 Final Project)</a> </td> </tr> <tr> <th scope="row">Nov 8, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/increasing-context-length-for-transformers/">Increasing Context Length For Transformers</a> </td> </tr> <tr> <th scope="row">Nov 8, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/detect-image/">Zero-Shot Machine-Generated Image Detection using Sinks of Gradient Flows</a> </td> </tr> <tr> <th scope="row">Nov 8, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/A-deeper-look-into-equivariance-for-materials-data/">A Deeper Look into Equivariance for Materials Data</a> </td> </tr> <tr> <th scope="row">Nov 7, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/prompt-to-prompt/">Prompt to Prompt</a> </td> </tr> <tr> <th scope="row">Nov 7, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/Language-Bias/">Understanding Bias in Speech to Text Language Models</a> </td> </tr> <tr> <th scope="row">Nov 6, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/attention-regularization/">Regularization Techniques for Attention Layers in Transformer Models</a> </td> </tr> <tr> <th scope="row">Nov 5, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/neural-PDEs-long-time-dynamics/">Neural PDEs for learning local dynamics and longer temporal rollouts</a> </td> </tr> <tr> <th scope="row">Nov 1, 2023</th> <td> <a class="post-link" href="/staging/blog/2023/proposal/">Graph neural networks v.s. transformers for geometric graphs</a> </td> </tr> </table> </div> </article> </div> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/interpretability-of-toy-tasks/index.html b/blog/2023/interpretability-of-toy-tasks/index.html
new file mode 100644
index 00000000..b1bf1e0f
--- /dev/null
+++ b/blog/2023/interpretability-of-toy-tasks/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>The Effect of Activation Functions On Superposition in Toy Models | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="An in-depth exploration of how different activation functions influence superposition in neural networks."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/interpretability-of-toy-tasks/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "The Effect of Activation Functions On Superposition in Toy Models",
+      "description": "An in-depth exploration of how different activation functions influence superposition in neural networks.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Vedang Lad",
+          "authorURL": "https://www.vedanglad.com",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Timothy Kostolansky",
+          "authorURL": "https://tim0120.github.io/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>The Effect of Activation Functions On Superposition in Toy Models</h1> <p>An in-depth exploration of how different activation functions influence superposition in neural networks.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction-to-superposition">Introduction to Superposition</a></div> <div><a href="#superposition-and-previous-work">Superposition and Previous Work</a></div> <div><a href="#motivation-and-notation">Motivation and Notation</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction-to-superposition">Introduction to Superposition</h2> <p>With the recent emergence of grokking, mechanistic interpretability research has trended towards understanding how models learn <d-cite key="GrokNanda"></d-cite> <d-cite key="Pizza"></d-cite>. A central concept in this pursuit is superposition - a single neuron learning multiple “features.”</p> <p>Features are the distinguishing properties of data points, the “things” that allow a neural network to learn the difference between, say, a dog and a cat, or a Phillip Isola and a Jennifer Aniston. Features are the building blocks that determine what makes one data point different from another. In many cases, features discovered by and encoded within neural networks correspond to human-understandable ideas. For example, in language models there exist embedding vectors describing relations like gender or relative size (e.g., the famous vec(“king”) - vec(“man”) + vec(“woman”) =~ vec(“queen”)<d-cite key="mikolov2013efficient"></d-cite>). It has been found that language models often map ideas like these to features within their parameters. Human understanding is not necessary though, as models can find and map features that exist beyond the perception of humans. This is an important part of the success (and dual inscrutability) of modern deep models, as these models can determine features and relationships within the data that allow them to model large datasets, like language, very well.</p> <p>In this work we:</p> <ol> <li>Explain Superposition, why it may occur, and why it is important</li> <li>Motivate a framework to easily study Superposition</li> <li>Study how activation functions affect Superposition</li> </ol> <h2 id="superposition-and-previous-work">Superposition and Previous Work</h2> <p>Let us elaborate further. If you were to train some neural network and visualize the weights - chances are you would see some mess that looks like this:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/random_matrix_equation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>You are likely looking at superposition!</p> <p>As hypothesized by <d-cite key="toymodels"></d-cite>, superposition is a phenomenon which occurs when the number of features being learned by a model is greater than the number of parameters in that model. To capture $n$ features with $m&lt;n$ parameters, one can think of the neurons as “working overtime.” In other words, some of the neurons within a model encode information about more than one feature. The neuron exhibiting superposition operates as an information compressor. The caveat is that this compression is often unpredictable and hard to understand!</p> <p>In a linear model, i.e., one which maps inputs to outputs with only linear functions, there are fewer parameters than the features it tries to represent, so it can only represent the top $m$ features. How then do neural networks use compression and map back to $n&gt;m$ features using only $m$ parameters? The answer is non-linearity. Clearly, the activation function is key to understanding how superposition occurs - unexplored by other work in the field. <d-cite key="elhage2022solu"></d-cite> explores the activation function in transformer MLP, but not in the setting we present here.</p> <p>But why do we care about Superposition? Why spend time studying this?</p> <p>While it may seem tangential, Superposition sheds important insights on Large Language Models (LLMs)! While LLMs are billions of parameters large, this is still not enough for a one-to-one mapping to “features” on the internet. Therefore LLMs also MUST exhibit superposition to learn. We focus our current work on the $\textit{bottleneck superposition}$ regime, but <d-cite key="incidental"></d-cite> has shown that the picture is far more complicated than presented in <d-cite key="toymodels"></d-cite>. Namely, varying the initialization can change how superposition unfolds. To normalize across experiments, we initialize all weights using the Xavier norm, as outlined by <d-cite key="xavier"></d-cite>. However, this is certainly a limitation of our presented work. A more rigourous analysis of superposition with activation functions would explore it outside the contex of the bottleneck regime. We leave this for future work.</p> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/feature_visual.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> From <a href="https://distill.pub/2017/feature-visualization/">Distill Blog</a>, "Feature visualization allows us to see how GoogLeNet trained on the ImageNet dataset, builds up its understanding of images over many layers. </div> <p>Previous research, as detailed in <d-cite key="toymodels"></d-cite>, has predominantly explored superposition within the confines of toy models utilizing the Rectified Linear Unit (ReLU) activation function. However, to extend these findings to contemporary neural networks, it is crucial to investigate the influence of different activation functions on superposition. Different activation functions provide different ways for a model to use superposition to its advantage.</p> <p>So you train a neural network - what happens at the neuron level? There are three possibilities. As the network trains each neuron has three choices:</p> <ol> <li>The neuron chooses not to encode the “features”</li> <li>The neuron chooses to dedicate itself to one feature</li> <li>The neuron chooses to encode multiple features</li> </ol> <p>(We anthropomorphize - The neuron doesn’t choose to do anything - there is no free will - you are born into a loss landscape and an optimizer telling you what to do.)</p> <p>In linear models, each neuron is limited to representing only the most significant features (2), discarding others (1). Conversely, superposition, enabled by non-linear activation functions, adopts a more inclusive approach (3), trying to encode multiple features per neuron and learning efficient representational shortcuts.</p> <p>While ReLU bears similarity to the Gaussian Error Linear Unit (GeLU) used in modern GPT architectures, a deeper understanding of how different nonlinear activations impact superposition can provide crucial insights. Such understanding is key to unraveling the complex mechanisms through which neural networks utilize non-linearities, a cornerstone in the broader narrative of neural network interpretability.</p> <h3 id="monosemanticity-and-polysemanticity">Monosemanticity and Polysemanticity</h3> <p>To connect to existing literature (2) and (3) above are given the names monosemanticity and polysemanticity. We will also follow this notation going forward.</p> <p>To describe further, the idea of superposition in neural networks leads us to two distinct types of neuron behaviors: monosemanticity and polysemanticity.</p> <p>Monosemantic neurons are those that specialize in a single, distinct feature, acting as dedicated detectors. This characteristic is often observed in the intermediate layers of architectures like Convolutional Neural Networks (CNNs), where neurons become adept at recognizing specific patterns, such as curves or colors. Polysemantic neurons do not align with just one feature but engage with multiple features simultaneously, offering a broader and more nuanced understanding of the data. This trait is essential for handling complex, high-dimensional datasets but comes at the cost of reduced interpretability.</p> <h2 id="motivation-and-notation">Motivation and Notation</h2> <p>Our work extends the work done in <d-cite key="toymodels"></d-cite> by examining how the changing of the activation function on toy model networks affects the behavior and interpretability of these networks. <d-cite key="toymodels"></d-cite> uses the canonical ReLU activation function to add non-linearity to two-layer models to analyze how superposition occurs within small networks. They did not generalize their work to other activation functions, which we find, result in <strong>distinct</strong> new phenomenon. Our work compares the ReLU function with five other common activation functions: GeLU, SiLU, Sigmoid, Tanh, and SoLU. We hope that generalizing the phenomenon across activation functions can push the toy dataset to be in closer to realistic ML settings.</p> <h3 id="problem-specification">Problem Specification</h3> <p>The models in this experiment will be learning how to replicate a length-$n$ vector of inputs in the range $[0, 1]$ with a compression to a length-$m$ embedding (where $n&gt;m$). The model will then use the length-$m$ embedding to recreate the length-$n$ input, using a non-linear activation function to allow for superposition.</p> <p>We will run two variations of the experiment. One variation of the experiment will involve compressing inputs of size $n=10$ to an embedding of size $m=5$. This experiment aims to see how superposition occurs across many features which are encoded in a bottleneck with half the number of spots as there are features. The second variation of the experiment will involve compressing inputs of size $n=2$ to an embedding of size $m=1$. This experiment aims to understand precisely how the model encodes the second “extra” feature in a variety of settings.</p> <p>To set up this experiment, we need to create a dataset that allows for superposition to occur and that also allows for interpretability of the superposition. To motivate this further, we begin with a careful discussion of features.</p> <h3 id="features">Features</h3> <p>Features are the salient “things” that a neural network learns to differentiate inputs <d-cite key="features"></d-cite>.</p> <p>Technically, features are the properties which neural networks try to extract from data during learning to compress inputs to useful representations during inference. Although features can map to human-understandable concepts (e.g., dog ears), they can also represent properties of the data that are not immediately apparent to the human brain. To experiment with superposition, we need to encode features in a way that we can understand. In other words, we do not want our experimental model to learn features that we are unaware of. This would make it hard for us to interpret how the model maps features in the data to embeddings within its parameters, consequently obscuring how superposition works. To this aim, we must generate features within the training set for our model which are simple and understandable to us a priori. Similar to <d-cite key="toymodels"></d-cite>, we use as each input a vector with entries drawn independently from a uniform distribution over $[0, 1]$. Making each entry independent of the others enforces that each entry is its own (artificial) feature with no correlation to the other features.</p> <p>Here we define two important augmentations that we used in the dataset to simulate real-world features: sparsity and importance.</p> <h4 id="sparsity">Sparsity</h4> <p>Sparsity is a measure of how often a specific feature is present in a dataset. A feature is characterized as “sparse” if it only appears in a small fraction of the inputs to the model. Similarly, features that are “dense” appear in many of the inputs. We will also use the term ‘density’, which is the complement of sparsity, defined as $1-S$.</p> <p>Specifically, a feature with a sparsity of $S \in [0, 1]$ has a probability $S$ of being expressed in any given input. If we have $S=0$, this means that the feature is expressed in every input, whereas if we have $S=0.5$, this means that the feature is expected to be expressed in about half of the inputs.</p> <p>In our experiment, we train models at different sparsities to capture how sparsity affects superposition.</p> <h4 id="importance">Importance</h4> <p>Not all features are created equal!</p> <p>Some features are more useful than others in determining relevant information about inputs. For instance, when building a dog detector - capturing features related to dogs’ faces are extremely important! A model would need to pick up salient features of dogs, perhaps floppy ears and snouts. Other features, like the grass a dog is sitting on or a frisbee in a dog’s mouth, may not be as useful for detecting a dog. The varying degrees of usefulness among features are encapsulated in the concept of “importance”.</p> <p>In the context of feature detection by a neural network, importance plays a role in modulating which features are encoded within the embedded layers of the network. In the context of the superposition hypothesis, if one feature has more importance than another feature, then it would be inefficient for the network to map both features equally within the embedding; allocating more weight to the feature with greater importance would be more valuable to the network in minimizing error.</p> <p>In our experiment, we give each input feature a different importance to allow the models to differentiate between them. We will examine when and how the model justifies mapping multiple features of differing importances to the same neuron, i.e., we will observe the superposition of features with differing importances.</p> <h3 id="dataset">Dataset</h3> <p>To run this experiment, we will synthetically generate data that has desired sparsity and importance properties.</p> <p>Each input $x$ will be a vector of length $n$. Each element $x_i$ in the vector will be drawn independently from the other elements in the uniform range $[0, 1]$. As discussed before, we can now synonymously refer to each of these elements as features, given their independent generation. (We will refer to them as features from this point onwards.)</p> <p>Each feature $x_i$ in the vector has a relative importance to each of the other features $x_{j\ne i}$. The importance of feature $x_i$ is $I_i = r_I^i$ where $r_I\in(0, 1)$ is a constant describing the relative decay of importance between neighboring features. This attribute of the data will be implemented in the loss function (see below for more details).</p> <p>We will train separate models for each of the varying levels of sparsity. For an input $x$ with sparsity $S$, each feature $x_i$ will take on its “true” value, a uniformly distributed number, with a probability of $1-S$ and will otherwise be set to 0 with a probability of $S$.</p> <p>Below is a visualization of two batches of inputs with respective sparsities $S=0.5$ and $S=0.99$.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/input_batches.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Each column of the plots represents a feature vector of length 20. Each batch has size 100, corresponding to the number of columns in the plots. Notice how the changing in sparsity affects the feature density. </div> <h3 id="network">Network</h3> <p>Below are the architectures of the base (linear) and experimental (non-linear) models that we are using in this experiment. Of particular note is the activation function $\mathbb{f}$, which we will substitute using the aforementioned activation functions.</p> <table> <thead> <tr> <th style="text-align: center">Linear Model</th> <th style="text-align: center">Activation ( $\mathbb{f}$ ) Output Model</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">\(h = Wx\)</td> <td style="text-align: center">\(h = Wx\)</td> </tr> <tr> <td style="text-align: center">\(x' = W^T h + b\)</td> <td style="text-align: center">\(x' = f(W^T h + b)\)</td> </tr> <tr> <td style="text-align: center">\(x' = W^T Wx + b\)</td> <td style="text-align: center">\(x' = f(W^T Wx + b)\)</td> </tr> </tbody> </table> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Autoencoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>We create an autoencoder - compressing down to induce polysemanticity. This maps $x$ to a direction in a lower-dimensional space, represented by \(h = Wx\). Each column of $W$ corresponds to a lower-dimensional representation of a feature in $x$. To reconstruct the original vector, $W^T$ is used, ensuring clear feature representation correspondence. This structure results in a symmetric matrix $W^TW$ and allows for clear visualization of the weights. They visually allow for the determination of the presence of superposition.</p> <h3 id="loss">Loss</h3> <p>Sparsity, Importance and Our Network come together in the following loss function:</p> \[L = \sum_{i} \sum_{x} I_{i}(x_{i} - x'_{i})^{2}\] <p>Motivated by <d-cite key="toymodels"></d-cite>, we use a standard MSE loss, where $x_i$ and $x_i’$ measure the absolute difference in the auto-encoding of the datapoint. The Importance factor, $I_i$ , describes how important the given reconstruction is. A smaller importance will allow loss minimization even with a poor reconstruction.</p> <h2 id="results">Results</h2> <p>Below we present each activation function, along with plots depicting how training results in superposition at varying degrees of sparsity.</p> <p>For the $n=10, m=5$ experiment, we show the $W^TW$ matrix and neuron feature distribution at varying degrees of sparsity. The $W^TW$ matrix reveals which features are prioritized (shown by the diagonal terms) and any polysemanticity that occurs (shown by the off-diagonal terms). The neuron feature distribution shows how each of the $m=10$ features are mapped to each of the $n=5$ embedding dimensions. This can aid in understanding under what conditions polysemanticity arises and how it occurs under each condition of sparsity.</p> <p>For the $n=2, m=1$ experiment, we show a phase diagram. This phase diagram shows how the second “extra” feature of the length-2 input vector is encoded. There are three options: not encoded at all (only the first feature is encoded), encoded in superposition with the first feature, and encoded as the only feature (the first feature is not encoded).</p> <h3 id="relu">ReLU</h3> <p>The ReLU (Rectified Linear Units) activation function is a piecewise-linear function, a simple non-linearity that allows models to use superposition of features. ReLU was the only activation function used in <d-cite key="toymodels"></d-cite>, so our work with the ReLU function was primarily to verify the results from their work and create a baseline for our subsequent experiments.</p> <p>The following are the $W^TW$ matrices and feature-neuron mappings:</p> <div class="caption"> ReLU $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_relu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>As per the results in <d-cite key="toymodels"></d-cite>, the ReLU model focuses on the most significant features in the low sparsity regime (generally resulting in monosemanticity), while relying on superposition in the high sparsity regime (polysemanticity). With weaker signals for the most important features in the high sparsity regime, the model encodes multiple features in each neuron activation to minimize error of the sparse signals. Notably, the ReLU model uses antipodal pairs in the mapping of features to encode multiple features to single neurons. This can be seen as a light-colored diagonal entry within $W^T W$ and a corresponding dark-colored off-diagonal entry within the same column. This antipodal mapping of features is a method that the model uses to compress more than one feature to one neuron. This antipodal mapping is more interpretable than other kinds of polysemanticity which occurs in subsequently-described activation functions which “speckle” multiple features into a single neuron, making it more difficult to determine how the superposition occurs in that model.</p> <p>The following is the phase diagram of the ReLU models:</p> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_relu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>In regimes of high sparsity (i.e., below $1-S=0.1$ on the phase diagram above) the ReLU models are highly polysemantic for all relative feature importances, reflecting an inability to encode features with a sparse signal. In regimes of low sparsity, the model generally embeds the more important of the two features. This result mirrors the phase diagram in <d-cite key="toymodels"></d-cite> as expected.</p> <h3 id="gelusilu">GeLU/SiLU</h3> <p>The GeLU (Gaussian Error Linear Units) and SiLU (Sigmoid Linear Units) activation functions are very similar to one another, and as a result produced very similar experimental results. Both functions are akin to a “smoothed out” version of the ReLU function, i.e., they have no discontinuities. The GeLU has recently been popularized as the activation function of choice in many transformers, including BERT <d-cite key="Devlin2019BERTPO"></d-cite> and GPT <d-cite key="gpt"></d-cite>. The GeLU is differentiable for all $x$ - and has a smoother curve than the SiLU (Swish) activation. <d-cite key="elhage2022solu"></d-cite> found that in the setting of transformers, the GeLU was less interpretable than the SoLU. This may be the case after having many linear layers activation - but with a single layer this is not the case.</p> <div class="caption"> GeLU $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_gelu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> SiLU $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_silu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The GeLU and SiLU models exhibit similar kinds of superposition in their weight matrices. With increasing sparsity, superposition of features does happen, but it is more “strict” than the ReLU model, generally mapping at most two features to any single neuron. In each of the polysemantic neurons, though, there is one feature that dominates, suggesting that these activation functions enforce sparsity in their activations. There are also many antipodal pairs of features within these models, reiterating the behavior that exists in the ReLU models (also found in <d-cite key="toymodels"></d-cite>).</p> <div class="row mt-0 l-page"> <div class="col-sm mt-2 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_gelu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-2 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_silu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-0 l-page"> <div class="col-6 mx-auto mt-2 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The above phase diagrams of the GeLU and SiLU models show a marked difference from that of the ReLU model (earlier), despite the similar shapes of these three activation functions. The GeLU and SiLU models exhibit significant monosemanticity at high degrees of sparsity, unlike the ReLU, which results in near-complete polysemanticity for sparsities higher than $S=0.9$. This differnce may reflect SiLU’s and GeLU’s better fit as an activation for picking up the signal in sparse feature representations, making the case for GeLU and SiLU as more interpretable activation functions within larger models.</p> <h3 id="sigmoid">Sigmoid</h3> <p>The Sigmoid function is a smooth activation function with an output range of $(0, 1)$. This maps directly to the desired range of values that the model is trying to replicate.</p> <div class="caption"> Sigmoid $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_sigmoid.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The Sigmoid model exhibits superposition in all neurons as soon as the sparsity is non-zero, as can be seen from the “speckling” of non-zero off-diagonal terms in $W^T W$. This is a difference from the ReLU/GeLU/SiLU models, for which the superposition “leaks” into the least significant encoded features at low, non-zero sparsities and eventually affects all features at higher sparsities. This low-sparsity superposition may occur because the Sigmoid function strictly maps to $(0, 1)$, with increasingly large pre-activation inputs necessary to map to values close to 0 and 1. As such, the model may be “speckling” the off-diagonal values in an attempt to “reach” these inputs which are close to 0 and 1.</p> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_sigmoid.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Despite differences in the occurrence of polysemanticity, the ReLU and Sigmoid models exhibit very similar phase diagrams, reflecting an inability to encode multiple features at sparsities above $S=0.9$ (i.e., below $1-S=0.1$ on the phase diagram). As discussed above, this may be caused by the vanilla sigmoid activation’s inability to “reach” target values close to 0 or 1.</p> <h3 id="tanh">Tanh</h3> <p>The Tanh function is another smooth activation function, but it results in significantly different behavior from the Sigmoid (despite being a linear mapping of the Sigmoid).</p> <div class="caption"> Tanh $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_tanh.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>With the Tanh activation function, the models prioritize the most important features regardless of sparsity. This behavior is possibly attributed to the range that the Tanh function maps to $(-1, 1)$, while the target range of input values in this experiment are $[0, 1]$. This behavior is similar to that of a linear model (i.e., no activation function) which exhibits no capability to use superposition, but the phase diagram reveals subtle differences from the linear model results.</p> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_tanh.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Although nearly performing as the linear model would, only encoding the most important feature, there is some difference to the linear model along the boundary between features, as can be seen around the importance of 1. This reflects the model’s ability to use non-linearity to perform superposition.</p> <h4 id="a-note-on-sigmoid-and-tanh">A Note on Sigmoid and Tanh</h4> <p>Despite similarities in the S-like curvature of the Sigmoid and Tanh activation functions, the Sigmoid model exhibits superposition, whereas the Tanh model exhibits nearly zero superposition. A key difference between the two functions is the fact that the Sigmoid function maps inputs to a range of $(0, 1)$, while the Tanh function maps inputs to a range of $(-1, 1)$. This difference is significant in our experiment, as our experiment uses models to recreate random vectors with elements in the range $[0, 1]$. The range of the Sigmoid function matches this range, while the range of the Tanh function which matches this range only occurs for non-negative inputs to the Tanh function. In other words, the $(-\infty, 0)$ input domain (which maps to the range $(-1, 0)$) of the Tanh function remains useless for prediction of values which should be in the range $[0, 1]$. Therefore, the tanh function empirically acts like a linear function (i.e., no activation layer).</p> <h3 id="solu">SoLU</h3> <p>The SoLU (Softmax Linear Units) activation function is based on the work from <d-cite key="elhage2022solu"></d-cite>. \(Solu(x) = x * softmax(x)\) SoLU is a function for which the activation of each neuron is dependent on all the other neurons within its own layer. This is significantly different from all the other activations that we tested, as the activations of neurons with the other functions are independent of the other neurons within the same layer. In other words, all the other activation functions are univariate while the SoLU is multivariate. Similar to other approaches like L1 regularization, the SoLU amplifies neurons with relatively large pre-activations and de-amplifies neurons with relatively smaller pre-activations. This behavior pressures the model to be more monosemantic (and therefore more interpretable in some settings), as discussed in <d-cite key="elhage2022solu"></d-cite>.</p> <div class="caption"> SoLU $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/Sparsity_super_solu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>In our experiment, the SoLU model results in non-zero superposition of all features with all degrees of sparsity. This may be attributed to the way that the SoLU “forces” activations to be sparse, i.e., the activations result in a “winner-takes-all” behavior due to the way that the Softmax function works. This is not a useful property for prediction of a vector of independently-drawn values, as the input vectors are unlikely to be peaky, i.e., the SoLU does not quite fit the purposes of its task.</p> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/phase_51_solu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-09-interpretability-of-toy-tasks/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>As seen in the heatmap plot above, the SoLU activation results in very polysemantic behavior. This function is not precisely fit for its task of recreating given vectors and likely results in using polysemanticity to attempt to pass information about inputs forward. Curiously, the SoLU models have preference for the more important feature in the low sparsity regime.</p> <h3 id="bringing-them-all-together">Bringing Them All Together</h3> <div class="caption"> Sparsity vs Dimensions Per Feature </div> <div class="l-page"> <iframe src="/staging/assets/html/2023-11-09-interpretability-of-toy-tasks/file.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <p>The diagram above depicts a variation on the two experiments explained thus far. In this experiment $n=200$ features were compressed to $m=20$ features and the loss function was tweaked to give uniform importance $I_i = 1$ to all features. This was done to determine how each activation functions compresses features in different sparsity regimes without the influence of feature importance.</p> <p>On the y axis, the plot depicts a metric (dimensions per feature) that measures the number of dimensions a model dedicates to each feature. In other words, a point with a y-value near 1 represents a model that dedicates one dimension of its embedding space to one feature, whereas a point with a y-value near 0.25 represents a model that represents four features at each dimension.</p> <p>The plots are generally consistent with the analysis from the previous experiments. Many of the activations result in superposition in the low-density/high-sparsity regime, and increases in sparsity result in increases in the polysemanticity of the model (i.e., the dimensions per feature decrease). Consistent with the other experiments, SiLU and GELU perform very similarly. The Sigmoid and SoLU activations pack nearly 20 features per dimension at high sparsities. The Tanh activation exhibits behavior similar to the linear model, neatly packing one dimension with one feature, a result that is mirrored in the previous experiments. Similar to the results in <d-cite key="toymodels"></d-cite>, we see “sticky” behavior of the ReLU activation function at 1 and 0.5 dimensions per feature. This can be explained by the phenomenon of “antipodal pairs” discussed in <d-cite key="toymodels"></d-cite>. None of the other activation functions that we tested exhibit this behavior - which is striking since this is a well-studied effect for the ReLU activation function. This may be because the ReLU activation function is the only one that is not smooth, and therefore has a differentiable behavior than the other activation functions.</p> <h2 id="conclusion">Conclusion</h2> <p>Our investigation into the effects of various activation functions reveals that significant changes occur in model behavior depending on the chosen function. This finding underscores the ability to modulate the degree of superposition through the selection of activation functions, highlighting yet unexplored degrees of freedom in model design. This line of inquiry goes seamlessly with considerations of how neural networks are initialized and trained, suggesting these as promising future research directions.</p> <p>Our work is limited by the breadth of activation functions that we tested, though. Further iterations on each of the activation functions (e.g., tweaking the Sigmoid function to map to the range $(-\epsilon, 1+\epsilon)$) could prove fruitful in getting better performance from the models. Furthermore, while writing this blog, <d-cite key="incidental"></d-cite> published a new key insight related to the importance of initialization in superposition, which we do not explore here. Despite this, we have learned valuable insights about the effects that our set of activation functions can have on superposition.</p> <p>Pursuing enhanced interpretability, however, does not come without its challenges. Specifically, striving for transparency and understandability in neural network models raises concerns about the potential for deception. Despite these challenges, our work aims to develop neural network models that are more interpretable, transparent, and secure.</p> <ol class="bibliography"></ol> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-interpretability-of-toy-tasks.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/latent-interpolation/index.html b/blog/2023/latent-interpolation/index.html
new file mode 100644
index 00000000..85d700ea
--- /dev/null
+++ b/blog/2023/latent-interpolation/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Exploring the latent space of text-to-image diffusion models | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="In this blog post we explore how we can navigate through the latent space of stable diffusion and using interpolation techniques."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/latent-interpolation/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Exploring the latent space of text-to-image diffusion models",
+      "description": "In this blog post we explore how we can navigate through the latent space of stable diffusion and using interpolation techniques.",
+      "published": "December 1, 2023",
+      "authors": [
+        {
+          "author": "Luis Henrique Simplicio Ribeiro",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Harvard University",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Exploring the latent space of text-to-image diffusion models</h1> <p>In this blog post we explore how we can navigate through the latent space of stable diffusion and using interpolation techniques.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background-and-related-work">Background and related work</a></div> <div><a href="#method">Method</a></div> <div><a href="#analysis">Analysis</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Diffusion models <d-cite key="ho2020denoising"></d-cite> are a class of deep generative models that have shown promising results in many different tasks, including photorealistic image generation <d-cite key="saharia2022photorealistic"></d-cite> <d-cite key="ramesh2022hierarchical"></d-cite> and protein design <d-cite key="watson2023novo"></d-cite> <d-cite key="lee2023score"></d-cite>. Diffusion models work by gradually destroying structure of an object with $T$ steps of a fixed noising process, and then learning to reverse this process to recover the original object. This allows the model to learn the underlying structure of the data, and to generate new objects that are both realistic and diverse. The forward process $q( x_t | x_{t-1} )$ defines how noise is added to an original image $x_0$, and the reverse process $q( x_{t-1} | x_{t} )$ that we want to learn, can recover a less noisy version of an image.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/chicken_forward_reverse.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Stable Diffusion (SD) <d-cite key="rombach2022high"></d-cite> is an open-source latent text-to-image diffusion model which is able to realize images with fine grained details, when prompted with a textual input describing the desired characteristics of the output image. SD is reasonably fast compared to other diffusion models, since it performs the diffusion steps in a low dimensional latent space. The strategy consists of using an image encoder $\mathcal{E}: \mathcal{X} \rightarrow \mathcal{Z}^0$ which maps an image $x_0 \in \mathcal{X}$ to a lower dimensional image latent code $z_0 \in \mathcal{Z}^0$, and a latent decoder $\mathcal{D}: \mathcal{Z}^0 \rightarrow \mathcal{X}$ which recovers an image $\mathcal{D}(z_0)$ from the image latent code $z_0$. Using these two models it is possible to learn to denoise $z_T$, instead of $x_T$, which is also normally distributed, saving a lot in computing since the latent codes dimensionality are usually chosen to be much smaller than the original images dimensionality. During inference time, for a given input textual prompt $y$, we encode the prompt into a vector $s = \tau_\phi(y)$ using CLIP <d-cite key="radford2021learning"></d-cite>, sample $z_T \sim \mathcal{N}(0, I)$, and provide these two tensors to the diffusion model $f_\theta: \mathcal{Z}^T \times \mathcal{S} \rightarrow \mathcal{Z}^0$, which generates $z_0 = f_\theta(z_T, s)$. We can then map this vector into an image using the decoder: $x_0 = \mathcal{D}(z_0)$ which hopefully is in the data distribution.</p> <h2 id="background-and-related-work">Background and related work</h2> <p>In order to be able to learn the complex interaction between textual descriptions and images coming from a very large multimodal dataset, SD has to organize its image latent space $\mathcal{Z}^T$ coherently. If the learned representations are smooth for instance, we could expect that $\mathcal{D}(f_\theta(z_T, s))$ and $\mathcal{D}(f_\theta(z_T + \epsilon, s))$, where $\epsilon$ is a tensor of same dimensionality as $z_T$ with values very close to 0, will be very similar images. A common technique to explore and interpret the latent space of generative models for images is to perform latent interpolation between two initial latent codes, and generate the $N$ images corresponding to each of the interpolated tensors. If we sample $z_\text{start}, z_\text{end} \sim \mathcal{N}(0, I)$, fix a textual prompt such that $s = \tau_\phi({y})$ and use SD to generate images conditioned on the textual information we could explore different techniques for generating interpolated vectors. A very common approach is linear interpolation, where for $\gamma \in [0, 1]$ we can compute:</p> \[z_\text{linear}^{(\gamma)} = (1-\gamma)z_\text{start} + \gamma z_\text{end}\] <p>Mimicking these exact steps for three different pairs sampled latent codes for $(z_\text{start}, z_\text{end})$, and for each of them fixing a text prompt we get:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/latent_interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/latent_interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/latent_interpolation-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/latent_interpolation.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>As we can see from the image, when we move away from both $z_\text{start}$ and $z_\text{end}$ we get blurred images after decoding the interpolated image latent codes, which have only high level features of what the image should depict, but no fine grained details, for $\gamma = 0.5$ for instance, we get:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/extreme_case-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/extreme_case-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/extreme_case-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/extreme_case.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In contrast, if we perform interpolation in the text space by sampling $z_T \sim \mathcal{N}(0, I)$, which is kept fixed afterwards, and interpolating between two text latent codes $s_\text{start} = \tau_\phi(y_\text{start})$ and $s_\text{end} = \tau_\phi(y_\text{end})$, we get something more coherent:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/text_interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/text_interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/text_interpolation-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/text_interpolation.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Latent interpolation is a very common technique in Machine Learning, particularly in generative models, <d-cite key="gomez2018automatic"></d-cite> used interpolation in the latent space of a Variational Autoencoder (VAE) <d-cite key="kingma2013auto"></d-cite> to generated molecules between two initial ones by encoding them in the VAE latent space, interpolating between them and using the decoder to obtain the molecules from the latents, <d-cite key="upchurch2017deep"></d-cite> showed how interpolation can be used to perform semantic transformations on images, by changing features of a CNN. More broadly interpolation has also been studied in a probabilistic point of view <d-cite key="lesniak2018distribution"></d-cite>, evaluating how different techniques might generate out of distribution samples, which we explore later in this blog post.</p> <p>In this project we explore geometric properties of the image latent space of Stable Diffusion, gaining insights of how the model organizes information and providing strategies to navigate this very complex latent space. One of our focuses here is to investigate how to better interpolate the latents such that the sequence of decoded images is coherence and smooth. Depending on the context, the insights here could transferred to other domains as well if the sampling process is similar to the one used in SD. The experiments are performed using python and heavily relying on the PyTorch <d-cite key="paszke2019pytorch"></d-cite>, Transformers <d-cite key="wolf-etal-2020-transformers"></d-cite> and Diffusers <d-cite key="von-platen-etal-2022-diffusers"></d-cite> libraries.</p> <h2 id="method">Method</h2> <p>In this section we compare several interpolation techniques. For reproducibility reasons we ran the experiments with the same prompt and sample latent vectors across different. We use Stable Diffusion version 1.4 from CompVis with the large CLIP vision transformer, the DPMSolverMultistepScheduler <d-cite key="lu2022dpm"></d-cite>, 30 inference steps and a guidance scale of 7.5 <d-cite key="dhariwal2021diffusion"></d-cite>. We use the prompt “An high resolution photo of a cat” and seed = 1 to generate both $z_\text{start}$ and $z_\text{end}$. The corresponding generated pictures are shown below:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/endpoint_images-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/endpoint_images-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/endpoint_images-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/endpoint_images.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="linear-interpolation">Linear Interpolation</h3> <p>Although linear interpolation is still a very commonly used interpolation technique, it is known that is generates points which are not from the same distribution than the original data points <d-cite key="agustsson2018optimal"></d-cite> depending on the original distribution of the points being interpolated. Particularly, for $z_{\text{start}}, z_{\text{end}} \sim \mathcal{N}(0, I)$ and $\gamma \in [0,1]$, we have:</p> \[z_\text{linear}^{(\gamma)} = (1-\gamma)z_\text{start} + \gamma z_\text{end}\] <p>Hence:</p> \[\begin{eqnarray} \mathbb{E}\left[z_\text{linear}^{(\gamma)}\right] &amp;=&amp; \mathbb{E}\left[(1-\gamma)z_\text{start} + \gamma z_\text{end}\right] \nonumber \\ &amp;=&amp; \mathbb{E}[(1-\gamma)z_\text{start}] + \mathbb{E}[\gamma z_\text{end}] \nonumber \\ &amp;=&amp; (1-\gamma)\mathbb{E}[z_\text{start}] + \gamma \mathbb{E}[z_\text{end}] \nonumber \\ &amp;=&amp; 0 \nonumber \end{eqnarray}\] <p>Therefore, the mean stays unchanged, but the variance is smaller than 1 for $\gamma \in (0,1)$:</p> \[\begin{eqnarray} \text{Var}[z_\text{linear}^{(\gamma)}] &amp;=&amp; \text{Var}[(1-\gamma)z_\text{start} + \gamma z_\text{end}] \nonumber \\ &amp;=&amp; \text{Var}[\gamma z_\text{start}] + \text{Var}[(1-\gamma)z_\text{end}] \nonumber \\ &amp;=&amp; \gamma^2\text{Var}[z_\text{start}] + (1-\gamma)^2\text{Var}[z_\text{end}] \nonumber \\ &amp;=&amp; \gamma(2\gamma - 2)I + I \nonumber \\ &amp;=&amp; (\gamma(2\gamma - 2) + 1)I \nonumber \end{eqnarray}\] <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/linear_interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/linear_interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/linear_interpolation-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/linear_interpolation.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Given that the sum of two independent Gaussian distributed random variables results in a Gaussian distributed random variable, $z_\text{linear}^{(\gamma)} \sim \mathcal{N}(0, (\gamma(2\gamma - 2) + 1)I)$. This shows how the distribution of the interpolated latent codes change. To further understand the effect of this shift, we can use the interactive figure below. Where for $\text{std} \in [0.5, 1.5]$ we generate an image using the embedding $\text{std} \, z_\text{start}$:</p> <iframe src="/staging/assets/html/2023-11-01-latent-interpolation/variance.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> <h3 id="normalized-linear-interpolation">Normalized linear interpolation</h3> <p>As shown before, linear interpolation is not a good technique for interpolation random variables which are normally distributed, given the change in the distribution of the interpolated latent vectors. To correct this distribution shift, we can perform a simply normalization of the random variable. We will refer this this as normalized linear interpolation. For $\gamma \in [0,1]$ we define $z_\text{normalized}^{(\gamma)}$ as:</p> \[z_\text{normalized}^{(\gamma)} = \dfrac{z_\text{linear}^{(\gamma)}}{\sqrt{(\gamma(2\gamma - 2) + 1)}} \implies z_\text{normalized}^{(\gamma)} \sim \mathcal{N}(0, I)\] <p>Now, as we move further way from the endpoints $z_\text{start}$ and $z_\text{end}$, we still get coherent output images:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/normalized_interpolation-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/normalized_interpolation.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="slerp">SLERP</h3> <p>Spherical Linear Interpolation (Slerp) <d-cite key="shoemake1985animating"></d-cite>, is a technique used in computer graphics and animation to smoothly transition between two orientations, especially rotations. If we let $\phi = \text{angle}(z_\text{start}, z_\text{start})$, then for $\gamma \in [0,1]$, the interpolated latent is defined by:</p> \[\text{slerp}(z_\text{start}, z_\text{end}; t) = \dfrac{\sin((1-\gamma)\phi)}{\sin(\phi)}z_\text{start} + \dfrac{\sin(\gamma\phi)}{\sin(\phi)}z_\text{end}\] <p>where $\phi$ is the angle between $z_\text{start}$ and $z_\text{end}$. The intuition is that Slerp interpolates two vectors along the shortest arc. We use an implementation of Slerp based on Andrej Karpathy <d-cite key="Karpathy2022"></d-cite>. As we can see from the images below, slerp generates very good quality interpolated vectors.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/slerp_interpolation-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/slerp_interpolation.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>If we compare the obtained results with normalized linear interpolation we see that the generated images are very similar, but as opposed to normalized linear interpolation, we cannot easily theoretically analyze the distribution of generated latents. To have some intuition behind how these different techniques interpolate between two vectors and can sample and fix two vectors sampled from a 2-dimensional normal distribution. We can visualize how these trajectories compare with each other:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/interpolations_comparison-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/interpolations_comparison.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="translation">Translation</h3> <p>To further investigate some properties of the latent space we also perform the following experiment. Let $z_\text{concat} \in \mathbb{R}^{4 \times 64 \times 128}$ be the concatenation of $z_\text{start}$ and $z_\text{end}$ over the third dimension. We will denote by $z_\text{concat}[i, j, k] \in \mathbb{R}$ as a specific element of the latent code and $:$ as the operator that selects all the elements of that dimension and $m:n$ the operator that selects from elements $m$ to element $n$ of a specific dimension. We can create a sliding window over the concatenated latent and generated the corresponding images. We define the translation operator $\mathcal{T}$ such that $\mathcal{T}(z_\text{concat}; t) = z_\text{concat}[:, :, t:64+t]$, which is defined for $t = {0, \cdots, 64}$. The sequence of generated images can be visualized below using our interactive tool:</p> <iframe src="/staging/assets/html/2023-11-01-latent-interpolation/translation.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> <p>Surprisingly, we note that applying $\mathcal{T}$ to our concatenated latent code is materialized into a translation in image space as well. But not only the object translates, we also see changes in the images style, which is justified by changing some of the latent dimensions.</p> <p>We can correct this behavior by mixing the two latent codes only in a single slice of the latent code. Let $\mathcal{C}(z_\text{start}, z_\text{end}; t)$ represent the concatenation of $z_\text{start}[:, :, 64:64+t]$ and $z_\text{end}[:, :, t:64]$ along the third dimension. With this transformation we obtain the following:</p> <iframe src="/staging/assets/html/2023-11-01-latent-interpolation/corrected_translation.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> <p>Hence, translation is also a valid interpolation technique and could be further expanded to generate an arbitrary size of latent vectors.</p> <h2 id="analysis">Analysis</h2> <p>In order to evaluate the quality of the generated interpolations we use CLIP, a powerful technique for jointly learning representations of images and text. It relies on contrastive learning, by training a model to distinguish between similar and dissimilar pairs of images in a embedding space using a text and an image encoder. If a (text, image) pair is such that the textual description matches the image, the similarity between the CLIP embeddings of this pair should be high:</p> \[\text{CLIPScore(text,image)} = \max \left(100 \times \dfrac{z_{\text{text}} \cdot z_{\text{image}}}{ \lVert z_{\text{text}} \rVert \lVert z_{\text{image}} \rVert}, 0 \right)\] <p>For each interpolation strategy $f \in \{\text{linear}, \text{normalized}, \text{slerp}\}$ presented, we fix the prompt $\text{text} = $ “A high resolution image of a cat” and generate $n = 300$ interpolated latents $f(z_\text{start}, z_\text{end}, \gamma) = z_f^{(\gamma)}$ with $\gamma = \{0, \frac{1}{n-1}, \frac{1}{n-2}, \cdots, 1\}$. We then generate the images $x_f^{(\gamma)}$ from the interpolated latents, finally we use the CLIP encoder $\mathcal{E}_\text{CLIP}$ on the generated images to create image embeddings that can be compared with the text embedding the we define Interpolation Score $\text{InterpScore}(f, \text{text}, n)$ as:</p> \[\text{InterpScore}(f, \text{text}, n) = \dfrac{1}{n} \sum_{\gamma \in \{0, \frac{1}{n-1}, \frac{1}{n-2}, \cdots, 1\}} \max \left(100 \times \dfrac{z_{\text{text}} \cdot \mathcal{E}_\text{CLIP}(x_\text{f}^{(\gamma)})}{ \lVert z_{\text{text}} \rVert \lVert \mathcal{E}_\text{CLIP}(x_\text{f}^{(\gamma)}) \rVert}, 0 \right)\] <p>Applying these steps we obtained the following results:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/clip_scores-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/clip_scores-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/clip_scores-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/clip_scores.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Surprisingly, linear interpolation performed better than normalized linear and slerp, this could indicate that CLIP scores might not be a good metric for image and text similarity in this context. Given that in this class project the main goal was to gain insights, as future work we could run a large scale experiment to check whether this behavior would be repeated. We can also visually inspect the quality of the interpolation by generating a video for each interpolation. From left to right we have images generated from latents from linear, normalized and slerp interpolations respectively:</p> <iframe width="720" height="480" src="https://www.youtube.com/embed/6dEGSbam11o"> </iframe> <h2 id="conclusion">Conclusion</h2> <p>This work shows the importance of choosing an interpolation technique when generating latent vectors for generative models. It also provides insights of the organization of the latent space of Stable Diffusion, we showed how translations of the latent code corresponds to translations on image space as well (but also changes in the image content). Further investigation of the organization of the latent space could be done, where we could try for instance, to understand how different dimensions of the latent code influence the output image. As an example, if we fix a image latent and use four different prompts, which are specified in the image below, we get:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/latent_dim-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/latent_dim-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-01-latent-interpolation/latent_dim-1400.webp"/> <img src="/staging/assets/img/2023-11-01-latent-interpolation/latent_dim.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>As we can see all the generated images have some common characteristics, all the backgrounds, body positions and outfits (both in color and style) of the generated images are very similar. This indicates that even without explicitly specifying those characteristics on the textual prompt, they are present in some dimensions of the image latent code. Hence, the images share those similarities. Understanding how we can modify the latent code such that we change the shirt color in all the images from blue to red would be something interesting. Additionally, we showed some indication that CLIP scores might not be a good proxy for evaluating quality images generated from an interpolation technique.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-01-latent-interpolation.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/learning-interpretable-features-with-sparse-autoencoders/index.html b/blog/2023/learning-interpretable-features-with-sparse-autoencoders/index.html
new file mode 100644
index 00000000..1da7b265
--- /dev/null
+++ b/blog/2023/learning-interpretable-features-with-sparse-autoencoders/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Learning Interpretable Features with Sparse Auto-Encoders | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/learning-interpretable-features-with-sparse-autoencoders/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Learning Interpretable Features with Sparse Auto-Encoders",
+      "description": "",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Sam Mitchell",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Learning Interpretable Features with Sparse Auto-Encoders</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#superposition-hypothesis">Superposition Hypothesis</a></div> <div><a href="#sparse-auto-encoders">Sparse Auto-Encoders</a></div> <div><a href="#research-question">Research Question</a></div> <div><a href="#codebase">Codebase</a></div> <div><a href="#language-models">Language Models</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>The field of Machine Learning is becoming increasingly promising as humanity endeavors to create intelligent systems, with models outperforming humans on many tasks. As models become increasingly capable, its important that humans are able to interpret a model’s internal decision making process to mitigate the risk of negative outcomes. While significant progress has been made on interpreting important parts of models like <a href="https://transformer-circuits.pub/2021/framework/index.html">attention heads</a> <d-cite key="elhage2021mathematical"></d-cite>, it’s also the case that hidden layers in deep neural networks have remained notoriously hard to interpret.</p> <h2 id="superposition-hypothesis">Superposition Hypothesis</h2> <p>One hypothesis for why it can be challenging to interpret individual neurons is because they are simultaneously representing multiple concepts. One may wonder why a network would have its neurons learn to represent multiple concepts. At a first glance, this approach to encoding information feels unintuitive and messy. The key idea comes from the Johnson–Lindenstrauss lemma: In $n$ dimensions, you can have at most $n$ pairwise orthogonal vectors, but the number of pairwise “almost orthogonal” vectors (i.e. cosine similarity at most $\epsilon$) you can have is exponential in $n$. This enables a layer to encode for many more concepts than it has neurons. So long as each neuron is only activated by a sparse combination of concepts, we can reconstruct these concepts from a given activation with minimal interference between the concepts, since they are “almost orthogonal”. This hypothesis is known as <strong><a href="https://transformer-circuits.pub/2022/toy_model/index.html">superposition</a></strong> <d-cite key="elhage2022superposition"></d-cite>, and offers an explanation for why neurons have been observed in practice to be polysemantic.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/superposition.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Diagram depicting a larger model with disentngled features and a lower dimensional projection simulating this larger network using polysemanticity. Source <d-cite key="elhage2022superposition"></d-cite> </div> <h2 id="sparse-auto-encoders">Sparse Auto-Encoders</h2> <p>Since deep neural networks are strongly biased towards making neurons polysemantic during training, humans might try to understand the model’s decision making process by “unwrapping” the network into the sparse features that the neurons in some particular layer are simulating. To do this, a concept called a Sparse Auto-Encoder (SAE) is used. An SAE is similar to a normal autoencoder, with two main differences: (1) the encoding layer is larger than the neuron layer, often by a factor of 4x. (2) the loss function penalizes not only for the MSE loss, but also for the sparsity of the encoder matrix, frequently represented as L1 loss. A sparse autoencoder lets us learn a sparse representation for a vector, but in a higher dimensional space. SAEs were first proposed in a <a href="https://www.lesswrong.com/posts/z6QQJbtpkEAX3Aojj/interim-research-report-taking-features-out-of-superposition">blogpost</a> by Lee Sharkey in December 2022, and in September 2023 more research was published on SAEs, both by a group of <a href="https://arxiv.org/abs/2309.08600">independent researchers</a> <d-cite key="cunningham2023sparse"></d-cite> and by <a href="https://transformer-circuits.pub/2023/monosemantic-features/">Anthropic</a> <d-cite key="bricken2023monosemanticity"></d-cite> demonstrating that not only can SAEs be learned at a specific layer, but the features they learn are human interpretable.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/SAE.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Diagram depicting an SAE architecture for a transformer language model. Source <d-cite key="cunningham2023sparse"></d-cite> </div> <h2 id="research-question">Research Question</h2> <p>This inspired a new idea: what if we could take a neural network, unwrap each layer into a larger, sparse, interpretable set of features, and then learn a sparse weight matrix connecting all pairs of two consecutive feature layers? This would mean that we could take a neural network, and transform it into a new neural network simulating the old neural network, with the nice property that the computations are sparse and hopefully interpretable.</p> <p>The main question we wish to explore is: Can we unwrap a deep neural network into a larger sparse network and learn sparse weights between consecutive feature layers without losing performance?</p> <h2 id="initial-mathematics">Initial Mathematics</h2> <p>Let’s begin by looking at $L_1$ and $L_2$, two consecutive layers in a deep neural network with ReLU activations. Let $W$ and $b$ be the matrix and bias respectively that connects these two layers. Then we have</p> \[L_2 = \text{ReLU}(W L_1 + b)\] <p>We create autoencoders such that</p> \[L_1 = D_1 \text{ReLU}(E_1 L_1 + e_1) \equiv D_1 F_1\] \[L_2 = D_2 \text{ReLU}(E_2 L_2 + e_2) \equiv D_2 F_2\] <p>where $D_i$ is the decoder for layer $i$, $E_i$ and $e_i$ are the weights of the encoder and encoder bias, and $F_i$ is the feature vector.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_diagram.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Biases excluded from diagram for clarity. The hockey sticks on top of $F_1$, $L_2$, and $F_2$ indicate that a ReLU is applied to get the activations at that layer. If our autoencoder is good (which it should be), we have $L_1=L_1'$ and $L_2=L_2'$. </div> <p>Thus we have</p> \[\begin{align} F_2 &amp;= \text{ReLU}(E_2 L_2 + e_2) \\ &amp;= \text{ReLU}(E_2 \text{ReLU}(W L_1 + b) + e_2) \\ &amp;= \text{ReLU}(E_2 \text{ReLU}(W D_1 F_1 + b) + e_2). \end{align}\] <p>In general, an approximation of the form</p> \[F_2 = \text{ReLU}(W_2 F_1 + b_2)\] <p>would be pretty terrible since we cannot easily approximate a double ReLU function with a single ReLU function. However, because of the way $F_1$ and $F_2$ are created from $L_1$ and $L_2$, the relationships are actually very sparse in nature, so we will try to learn the approximation above. Perhaps there is a clever initialization that will allow us to learn this more easily.</p> <p>If we just ignored the inside ReLU in the definition of $F_2$, then we’d have</p> \[F_2 = \text{ReLU}(E_2 W D_1 F_1 + E_2 b + e_2)\] <p>which suggests the following could be a good initialization for our learned weight $W_2$ and bias $b_2$.</p> \[W_2 = E_2 W D_1\] \[b_2 = E_2 b + e_2\] <p>While this initialization seemed reasonable at the start of the project, it turned out that during training this results in a local minimum, and you can actually get much lower loss if you randomly initialize $W_2$ and $b_2$.</p> <h2 id="codebase">Codebase</h2> <p>To answer this main question, the first step was to build out a <a href="https://drive.google.com/file/d/1_0g_Qq76AqJByCrj_i-tYr76KPeAfIem/view?usp=sharing">codebase</a> that had all the implementations necessary to run experiements to explore this question. The codebase was developed from scratch to ensure I understood how each part of the code worked.</p> <h3 id="model">Model</h3> <p>The first part of the code trains a four layer neural network to classify MNIST images. After training we got a validation loss of 0.09 and a validation accuracy: 0.98, indicating the model does well. For clarity, all losses described in this section will refer to loss on the validation set.</p> <h3 id="saes">SAEs</h3> <p>Next, two autoencoder architectures are implemented, one that learns both an encoder and decoder, and one that learns only an encoder as its decoder is tied as the transpose of the encoder. Empirically, the tied autoencoder seemed to perform better and achieved an L1 (sparsity) loss of 0.04928, and an L2 (MSE) loss of 0.03970. Seeing these numbers close in magnitude is good, indicating that the model is neither penalizing too much nor too little for L1 sparsity loss.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/autoencoder.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> For a random input: The top diagram depicts neuron activations (blue) and reconstructed neuron activations from the SAE (orange), indicating the SAE has low L2 loss and reconstructs the input well. The bottom diagram depicts the feature activations for the same input, showing they are sparse. Notably, 38/64 of the neuron activations have magnitude above 0.3, but only 7/256 of the encoded features have magnitude above 0.3. </div> <h3 id="feature-connectors">Feature Connectors</h3> <p>Then, a feature connector was implemented, which learns the matrices $W_2$ and $b_2$ descibed above mapping one layer to another layer. The inputs are the set of all feature $i$ activations and the outputs are the set of all feature $i+1$ activations, allowing us to gradient descent over loss (which consists of L1 sparsity and L2 MSE) to optimize $W_2$ and $b_2$. The L1 (sparsity) loss was 0.02114 and the L2 (MSE) loss: 0.03209, indicating that there is a good tradeoff between L1 and L2 penalty.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/layer_weights.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Weights matrix connecting neuron layer 1 to neuron layer 2. This is a mess. 2205 weights have magnitude greater than 0.1. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_weights.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Weights matrix connecting encoded features in layer 1 to encoded features in layer 2. This is nice and sparse. 458 weights have magnitude greater than 0.1. </div> <p>Below is what the feature connector matrix looks like after each epoch of training.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_connector1_2.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_connector1_2.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_connector1_2.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/feature_connector1_2.gif" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="simulating-the-feature-network">Simulating the Feature Network</h3> <p>Finally, we replace neuron connections with feature connections. This means that when we pass an input through the network, we immediately encode it as a feature and propogate it through the feature connector weights, skipping the neuron layer weights. In this network, removing two neuron to neuron layers and substituting them with feature to feature layers results in a decrease from 97.8% accuracy to 94% accuracy, which is pretty good considering we made our network much sparser.</p> <p>Next, I tried to visualize the features using a variety of methods (both inspired by a class lecture and a <a href="https://distill.pub/2017/feature-visualization">Distill blogpost</a> <d-cite key="olah2017feature"></d-cite>). Unfortunately, I did not find the features to be much more interpretable than the neurons for the MNIST dataset. Still, our results are cool: we can take a network, and with only a fraction of the parameters maintain comparable performance.</p> <h2 id="language-models">Language Models</h2> <p>I shared these results with Logan Riggs, one of the <a href="https://arxiv.org/abs/2309.08600">independent researchers</a> <d-cite key="cunningham2023sparse"></d-cite> who published about SAEs in October 2023. Excited about the possibility, we collaborated to see if we could achieve the same results for language models, anticipating that the learned features might be more interpretable. We and a couple other collaborators published a <a href="https://www.lesswrong.com/posts/7fxusXdkMNmAhkAfc/finding-sparse-linear-connections-between-features-in-llms">blogpost</a> showing that the learned features in Pythia-70M are indeed interpretable, and there are cool relationships! (the remainder of this section is adapted from that blogpost)</p> <p>Below we show some examples of sparse linear feature connections. For the curious reader, additional examples can be found <a href="https://comet-scorpio-0b3.notion.site/More-Examples-ceaefc95cc924afba318dca1da37d4a4?pvs=4">here</a>.</p> <h3 id="or-example">OR Example</h3> <p>In Layer 1, we have:</p> \[OF_{30} = 0.26IF_{2797} + 0.23IF_{259} + 0.10IF_{946}\] <p>where OF is output feature (in MLP_out), and IF is input feature (in Residual Stream before the MLP)</p> <p>Below is input feature 2797, activating strongly on the token “former”</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former.webp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former.webp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former.webp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former.webp" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> This is 5 examples. For each example, the top row of words are feature activation e.g. token "former" activated 9.4. The bottom blank row is: if we removed this feature, how much worse does the model get at predicting these tokens? e.g. Soviet is 5.5 logits worse when the model can't use this "former" feature. </div> <p>Below is input feature 259, activating strongly on the token “old”</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/old.webp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/old.webp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/old.webp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/old.webp" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Below is input feature 946, activating on the token “young”</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/young.webp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/young.webp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/young.webp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/young.webp" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In the output feature, we see the tokens former, old, and young all activate, with young activating about half as strongly as “former” and “old” as we would expect from the weight coefficients.</p> <p>\(OF_{30} = 0.26IF_{former} + 0.23IF_{old} + 0.10IF_{young}\)</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former_old_young.webp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former_old_young.webp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former_old_young.webp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/former_old_young.webp" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We can view this computation as a weighted logical OR. Output Feature 30 activates on former OR old OR young.</p> <h3 id="negative-weight-example">Negative Weight Example</h3> <p>In Layer 1, we have:</p> \[OF_{505} = 0.68IF_{3021} -0.21IF_{729}\] <p>where OF is output feature, and IF is input feature.</p> <p>Below is input feature 3021, activating strongly on tokens like “said” which in almost all cases appear not after a quote.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/all_said.webp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/all_said.webp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/all_said.webp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/all_said.webp" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Below is input feature 729, activating strongly on tokens like “said” when they appear shortly after a quote.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_quotes.webp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_quotes.webp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_quotes.webp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_quotes.webp" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Below we see the output feature activates on tokens like “said” that have no prior quote tokens. We’ve “subtracted out” with a large negative weight, so to speak, the examples where “said” appears after a quote, and now the feature only activates when “said” appears without any prior quotes.</p> \[OF_{505} = 0.68IF_{(\text{"said" in many contexts})} -0.21IF_{(\text{"said" after quotes})}\] <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_no_quotes.webp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_no_quotes.webp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_no_quotes.webp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-learning-interpretable-features-with-sparse-autoencoders/said_no_quotes.webp" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We can view this computation as a weighted logical AND. Output Feature 505 activates on A AND ~B. In the case where A is a superset of B, this is the complement of B e.g. I have the set of all fruits and all yellow fruits, so now I can find all non-yellow fruits.</p> <h2 id="conclusion">Conclusion</h2> <p>Our exploration into interpreting neural networks using Sparse Auto-Encoders has shown promising results. The ability to unwrap the layers of a neural network into a more interpretable, sparse representation without a significant loss in performance supports the superposition hypothesis. Even if the features were only interpretable on some architectures/datasets, I am optimistic that Sparse Auto-Encoders will not only make deep neural networks more interpretable, but they will also allow for quicker parallelized inference since each output feature will depend on a small fraction of the total possible input features.</p> <p>I’d like to thank everyone who has contributed to my deep learning education this semester. I have learned a tremendous amount and really enjoyed working on this project.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-learning-interpretable-features-with-sparse-autoencoders.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/mapreason/index.html b/blog/2023/mapreason/index.html
new file mode 100644
index 00000000..d4279290
--- /dev/null
+++ b/blog/2023/mapreason/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Reasoning with Maps: Assessing Spatial Comprehension on Maps in Pre-trained Models | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Map reasoning is an intuitive skill for humans and a fundamental skill with important applications in many domains. In this project, we aim to evaluate the capabilities of contemporary state-of-the-art Large Vision-Language Models (LVLMs) for reasoning on maps and comparing their capabilities with human participants on the coregistration task. We additionally propose and release a novel dataset to serve as an initial benchmark for map reasoning capabilities. We run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on our dataset. Additionally, we show that coregistration is intuitive to human participants that were able to achieve close to perfect accuracy in a time-constrained manner."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/mapreason/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}.custom-margin{margin-bottom:0!important}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Reasoning with Maps: Assessing Spatial Comprehension on Maps in Pre-trained Models",
+      "description": "Map reasoning is an intuitive skill for humans and a fundamental skill with important applications in many domains. In this project, we aim to evaluate the capabilities of contemporary state-of-the-art Large Vision-Language Models (LVLMs) for reasoning on maps and comparing their capabilities with human participants on the coregistration task. We additionally propose and release a novel dataset to serve as an initial benchmark for map reasoning capabilities. We run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on our dataset. Additionally, we show that coregistration is intuitive to human participants that were able to achieve close to perfect accuracy in a time-constrained manner.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Abdulrahman Alabdulkareem",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT CSAIL",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Meshal Alharbi",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT LIDS",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Reasoning with Maps: Assessing Spatial Comprehension on Maps in Pre-trained Models</h1> <p>Map reasoning is an intuitive skill for humans and a fundamental skill with important applications in many domains. In this project, we aim to evaluate the capabilities of contemporary state-of-the-art Large Vision-Language Models (LVLMs) for reasoning on maps and comparing their capabilities with human participants on the coregistration task. We additionally propose and release a novel dataset to serve as an initial benchmark for map reasoning capabilities. We run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on our dataset. Additionally, we show that coregistration is intuitive to human participants that were able to achieve close to perfect accuracy in a time-constrained manner.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#motivation">Motivation</a></div> <ul> <li><a href="#literature-review-and-the-gap-in-previous-literature">Literature review and the gap in previous literature</a></li> </ul><div><a href="#new-dataset">New Dataset</a></div> <ul> <li><a href="#a-glimpse-of-the-coregistration-task">A glimpse of the coregistration task</a></li> </ul><div><a href="#experiments">Experiments</a></div> <ul> <li><a href="#zero-shot-evaluation">Zero-shot evaluation</a></li> <li><a href="#fine-tuned-evaluation">Fine-tuned evaluation</a></li> <li><a href="#improving-results-for-co-registration">Improving results for co-registration</a></li> <li><a href="#human-benchmarking">Human benchmarking</a></li> <li><a href="#analysis-on-prompt-engineering">Analysis on prompt engineering</a></li> <li><a href="#investigating-the-failure-points-of-lvlms-on-coregistration">Investigating the failure points of LVLMs on coregistration</a></li> </ul><div><a href="#discussion-limitations-and-future-work">Discussion, Limitations, and Future Work</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h1 id="motivation">Motivation</h1> <p>Humans possess a remarkable ability to intuitively understand and make sense of maps, demonstrating a fundamental capacity for spatial reasoning, even without specific domain knowledge. To illustrate this, consider the following question: Do these two maps represent the same location?</p> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/85_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/85_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Answering this query necessitates <strong>coregistration</strong>, the ability to align two maps by overlaying their significant landmarks or key features. Moreover, humans can go beyond mere alignment; they can tackle complex inquiries that demand aligning maps, extracting pertinent data from each, and integrating this information to provide answers.</p> <p>Maps reasoning is a fundamental skill with important applications in domains such as navigation and geographic analysis. For example, pilots need to be able to reference and understand multiple kinds of FAA charts as a core prerequisite for many aviation-related tasks. Further, making inferences on historical maps that lack digitized versions relies on human capabilities for reasoning on maps and is crucial for various fields such as geology or archeology. Machine learning models that can match human visual map understanding hold substantial promise in these applications. Additionally, such models have the potential to enhance accessibility by providing alternative modalities for individuals with visual impairments to comprehend and extract spatial information from maps.</p> <p>Our work aims to tackle the following question: To what degree do contemporary state-of-the-art (SOTA) machine learning models, pre-trained on vast datasets comprising millions or even billions of images, possess the capacity for spatial reasoning and do they reach the human level? We will do this specifically by focusing on the task of coregistration.</p> <p>We propose a map reasoning dataset which we believe is a suitable initial benchmark to test the capabilities of multimodal models on coregistration; The example given above about coregistration possibly cannot be answered directly using prior knowledge a Large Language Model (LLM) might have while ignoring the vision modality. Moreover, the complexity of the task can be increased and controlled, leading to a rigorous evaluation of the model’s ability to comprehend and synthesize information across textual and visual modalities.</p> <h2 id="literature-review-and-the-gap-in-previous-literature">Literature review and the gap in previous literature</h2> <p><strong>Multimodality:</strong> There are countless significant recent advances in Large Language Models (LLMs) achieved by models such as Meta’s Llama 2<d-cite key="llama"></d-cite>, OpenAI’s GPT 3<d-cite key="gpt3"></d-cite>, Google’s Palm 2<d-cite key="palm2"></d-cite>, WizardLM<d-cite key="wizardlm"></d-cite>, and countless others. These models have successfully achieved or surpassed human-level performances on numerous natural language processing tasks (such as Retro-Reader<d-cite key="retro_reader"></d-cite> on the SQuAD2.0<d-cite key="squad2"></d-cite> benchmark, ST-MOE<d-cite key="st_moe"></d-cite> on the SuperGLUE<d-cite key="super_glue"></d-cite> benchmark, and many other benchmarks). LLMs have achieved a surprisingly high level of knowledge about text by being able to achieve very high zero-shot scores on many NLP tasks demonstrating their understanding and versatility in Language as opposed to non-LLM NLP models that are usually trained to accomplish a specific task and do not generalize beyond that task.</p> <p>Recently there has been a massive push towards integrating other modalities into LLMs, most notably vision. Models such as Google’s Gemini<d-cite key="gemini"></d-cite>, OpenAI’s GPT 4<d-cite key="gpt4"></d-cite>, VisionLLM<d-cite key="vision_llm"></d-cite>, and many others all focus on scaling the capabilities achieved by LLMs in the NLP domain to the vision domain. The language modality LLMs are trained on consists mostly of written text recorded online that follows a grammatical structure from a human language (English, etc.) or a computer language (Python, HTML, Latex, etc). However, this is in stark contrast to the vision modality which can consist of categorically different representations consisting of anything from photographs of people in the park, to a representation of the Silicon Atom from a computer simulation, to an infographic about the importance of drinking enough liters of water per day. This difference between the distribution of the entirety of text modality and the vision modality hints that much more careful attention needs to be placed on compiling unique and diverse datasets that aim at sufficiently representing the distribution of the vision modality.</p> <p><strong>Vision-Language Benchmarks:</strong> There were significant strides made in the past years in developing benchmarks and datasets for LVLMs which are composed of questions that require both Language and Vision to successfully answer. However, there are very few datasets that include or focus on maps as part of the benchmark. LVLM-eHub<d-cite key="lvlm_ehub"></d-cite> compiles numerous benchmarks for LVLMs such as benchmarking object and scene recognition using ImageNet1K, GVT, and many other datasets, or benchmarking visual common sense using ImageNetVC and the visual commonsense reasoning datasets. They also include a Visual Reasoning benchmark, however, it focuses mostly on real-world photographs (such as the Visual Spatial Reasoning<d-cite key="visual_reasoning"></d-cite> dataset or the VisDial<d-cite key="visual_dialog"></d-cite> Dataset that is based on images from COCO<d-cite key="coco"></d-cite>). However, none of these datasets place focus or sufficiently contain images of maps or diagrams while they place much more focus on real-world images.</p> <p>A frequently used dataset for evaluating LVLMs, which is also included in the Visual Reasoning benchmark, is the ScienceQA<d-cite key="science_qa"></d-cite> dataset which includes more than 20 thousand multimodal multiple-choice questions across 127 categories including a category for maps. However, examining the narrow slice of questions with images in the geography/map category shows that many of them do not necessitate a vision component and can be answered solely based on the textual question (e.g. “Which of these states is farthest north?” with four different states as a multiple choice question), or provide an entirely unused visual representation (e.g. providing a map of the United States with no text alongside the question “What is the capital of New York?”). Out of the questions that rely on the visual component to correctly answer, many of them require relatively little understanding of maps such as asking “Which country is highlighted?” which only requires visual matching of the highlighted section with typical shapes of countries or continents. Additionally, recent papers such as LLama-adapter<d-cite key="llama_adapter"></d-cite> have demonstrated that it’s possible to achieve a high accuracy of 78.3% on ScienceQA using an unimodal text-only Large Language Model. Thus, although ScienceQA does have a subsection dedicated to maps, it does not seem sufficiently capable of testing the capabilities of LVLMs to reason and understand maps.</p> <p>An area closely related to maps that do have a relatively higher degree of focus is the capability of models to parse and reason about diagrams, figures, and plots. Datasets on this topic include the ACL-FIG<d-cite key="acl_fig"></d-cite> which involves classifying and labeling scientific figures, InfographicVQA<d-cite key="info_vqa"></d-cite> which requires reasoning over data visualizations in infographics, ChartQA<d-cite key="chart_qa"></d-cite> which requires reasoning over charts, and many other datasets that focus on figures are plots. Models have been developed to specifically tackle this challenge, such as Google’s DEPLOT<d-cite key="deplot"></d-cite> which is capable of reasoning over charts and plots by translating them to text and then using an LLM as a reasoning engine on top of the outputted text. However, charts and plots are still significantly different from maps, as the plots these datasets usually contain are simple line charts and bar graphs that can be translated into a table or textual format in a relatively lossless manner, while it is difficult or impossible to perfectly transform a sufficiently detailed map to a textual format without losing information. This illustrates the inherent complexities associated with processing maps meant to depict dense information which requires direct reasoning on the vision modality as opposed to charts and plots which present data in a simple manner.</p> <p><strong>Maps Reasoning:</strong> Huge strides have been made in specific tasks related to maps, such as image-to-map<d-cite key="image_to_map"></d-cite> conversion and map segmentation<d-cite key="map_segmentation"></d-cite>. However, we wanted to focus more generally on map understanding and reasoning by LVLMs as opposed to a single task-specific performance. To draw on an analogy, consider the act of isolating specific parts of speech (such as nouns or verbs) in language. A model designed exclusively for this task lacks the comprehensive understanding exhibited by an LLM which is proficient in addressing almost any language task. In the context of map co-location, deep learning models are employed solely as preprocessing steps to extract relevant information for subsequent utilization by matching algorithms as opposed to an LVLM model capable of general reasoning on maps. For example, the authors in this study<d-cite key="intersection_map"></d-cite> use region-based CNN to extract road intersections, which are subsequently input into a map-matching procedure. Other features like street names have also been proposed in the literature<d-cite key="street_name_map"></d-cite>. In general, current frameworks for map reasoning require many hand-crafted and engineered steps (see, e.g., this<d-cite key="aerial_understanding"></d-cite> work and the references within). A recently proposed dataset, MapQA<d-cite key="map_qa"></d-cite>, is closely related to what we consider as map reasoning. However, the maps contained are of sufficiently low information-density and exhibit similar limitations to what we described in InfographicVQA and ChartQA as the images provided can be sufficiently translated to a textual domain before considering the textual input using a similar technique to DEPLOT. To the best of our knowledge, there are no examples in the literature where LVLMs are used to directly reason about maps at the detail we propose and perform tasks such as coregistration.</p> <p>Our aim is to tackle the gap in assessing the map reasoning capabilities of LVLMs by developing a dataset aimed only at coregistration and analyzing the capabilities of existing models on such a dataset We focus our benchmark construction on the specific task of coregistration as it serves as an indicator of map reasoning capabilities and is one step towards constructing a comprehensive benchmark for map reasoning capabilities of LVLMs.</p> <p><br/></p> <h1 id="new-dataset">New Dataset</h1> <p>We have opted to create and compile a map dataset focusing on maps from the aviation domain for our research. The maps we utilized are carefully crafted by aviation agencies to provide a wealth of information while maintaining readability within a concise timeframe, ensuring clarity for pilots. Our dataset will be constructed by incorporating maps from the following sources:</p> <ol> <li> <p><strong>World Visual Flight Rules (VFR):</strong> These maps are intended to guide pilots when they operate aircraft visually. They include aeronautical and topographic information such as airports, obstructions, and navigation aids.</p> </li> <li> <p><strong>World Instrument Flight Rules (IFR) Low</strong> These maps are suitable to assist pilots when they control the aircraft through instruments. They contain information such as cruising altitudes, route data, and controlled airspaces.</p> </li> </ol> <p>These maps are accessible in an interactive environment through the SkyVector website (<a href="https://skyvector.com/?ll=42.3525,-71.025833333&amp;chart=301&amp;zoom=2">VFR</a>, <a href="https://skyvector.com/?ll=42.3525,-71.025833333&amp;chart=302&amp;zoom=1">IFR Low</a>), which we used as part of our dataset generation pipeline.</p> <p>To generate the map snippets for our experiment, we chose to sample from the previous map sources around airports. This selection guarantees that the snippets are inherently information-rich, given that the map originates in the aviation domain. To ensure diversity in our dataset, we specifically sampled airports situated in the states of Massachusetts, New York, Delaware, Arizona, and Hawaii.</p> <p>The resulting dataset exhibits significant variations in terms of density, featuring both isolated airports and those nestled within cities, diverse locations such as inland, seaside, and islands, as well as various terrain types ranging from greenery landscapes, mountainous regions, and arid environments. In total, our dataset contains 1185 image pairs, each image is 600x600 pixels in PNG format. The total size of our dataset is 1.28 GB.</p> <h2 id="a-glimpse-of-the-coregistration-task">A glimpse of the coregistration task</h2> <p>To gain an understanding of our task and its intricacies, we present a few examples from our dataset. Generally, humans can successfully align two maps by identifying common features, which fall into one of the following categories:</p> <ul> <li><strong>Terrains:</strong> such as shorelines or mountains.</li> <li><strong>Charts:</strong> such as flight paths or restricted airspaces.</li> <li><strong>Landmarks:</strong> such as airport or city names.</li> </ul> <p>The process of mapping by terrain is typically swift for humans, especially when there are ample distinctive details. On the other hand, mapping by chart requires a more thoughtful approach, involving careful examination to establish a connection between the depicted attributes. Mapping by names usually serves as a last resort, employed if the prior approaches prove unsuccessful. Consider the following examples:</p> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/43_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/43_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Positive Pair #1: A VFR map (left) and an IFR map (right) that depict the same location (Port Allen Airport in South Kauai, Hawaii) and can be coregistered easily by the shorelines of the island. </div> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/1038_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/1038_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Positive Pair #2: A VFR map (left) and an IFR map (right) that depict the same location (Circle H Ranch Airport in Arizona) and can be coregistered by aeronautical lines (even if one does not know what these lines represent in the aviation domain). </div> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/779_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/779_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Positive Pair #3: A VFR map (left) and an IFR map (right) that depict the same location (Bertrand Chaffee Hospital Heliport, New York). The VFR map is feature-rich providing information for Helicopter pilots while the IFR map is a hard example due to being almost blank as the Heliport does not facilitate landing any IFR aircraft. Thus, the IFR map only depicts certain names of cities and waypoints. The two maps can be coregistered by matching the name SPRINGVILLE. </div> <p>All of these examples are positive (the maps show the same location). We showcase below negative examples with varying complexity.</p> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/51_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/51_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/51_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/51_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/223_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/223_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/223_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/223_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Negative Pair #1: An easy and straightforward negative for humans due to the VFR map depicting a shoreline crossing the map vertically, while the IFR depicts only a landmass. </div> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/1037_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/1037_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/1037_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/1037_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/1013_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/1013_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/1013_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/1013_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Negative Pair #2: A harder negative example. Aeronautical lines partially appear similar but the locations are different. Naively matching circles on both maps to classify a positive pair would provide a false positive. </div> <p>We showcase multiple positive and negative pairs alongside the natural reasoning that a human would take to correctly classify the pairs. We hope that this showcases the complexity of the task and the various strategies involved in achieving successful coregistration.</p> <p><br/></p> <h1 id="experiments">Experiments</h1> <h2 id="zero-shot-evaluation">Zero-shot evaluation</h2> <p>To start, we want to evaluate the zero-shot performance of pre-trained LVLMs on the task of identifying whether the two images are the same (coregistration). The models we start our evaluation with are BLIP-2<d-cite key="blip"></d-cite>, ViLT<d-cite key="vilt"></d-cite>, LXMERT-VQA, and LXMERT-GQA<d-cite key="lxmert"></d-cite>. We specifically chose these models as they are all publicly available multimodal text generative models that were partly trained on visual question-answering datasets. Thus, they are able to accept both the vision and language inputs consisting of an image of the two side-by-side maps alongside the yes-or-no question of whether the two maps depict the same geographical location.</p> <p>To verify that the models we obtained are behaving as expected and are capable of answering a textual question that relies on a visual component, we compile a very simple dataset of 200 cat and dog pictures, half the images depict a cat while the other half depict dogs. We present these trivial images to the models alongside the prompt “Is this an image of a cat? Answer:” and generate a single token. As expected, out of the 200 images all four models achieved an almost perfect classification accuracy (&gt;95% for all 4 models) by answering with either a “Yes” or a “No” token.</p> <p>This is not surprising because, as mentioned, object recognition questions are very prevalent in visual question-answering datasets, especially on ubiquitous everyday objects such as cats and dogs. To see if these models can generalize beyond their training datasets and properly reason on maps, we start by running the following experiment:</p> <p><strong>Experiment #1:</strong> For each VFR and IFR image pair, we generate two examples (positive and negative). For the positive example, we use the correct pairing (e.g., maps from the same location with the two different styles). For the negative example, we randomly replace one map uniformly from our datasets. Each model is provided with a concatenation of the two maps in its vision input, and with the question “Do these two maps show the same location? Answer with “Yes” or “No”. Answer:” in its text input.</p> <p>In total, each model was asked 2370 questions. Below, we show the accuracy, precision, and recall that each model obtained.</p> <div class="row mt-3 align-items-center"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_01_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_02-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_02-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_02-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_01_02.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The models performed barely above random guessing in the zero-shot experiment, and some models consistently produced the same single output (either “yes” or “no”) regardless of whether the input image was a positive or negative pair.</p> <p>While the results of the models are very low and barely above random guessing, we wanted to analyze whether this failure is due to the model not comprehending the task or whether the issue is simply in the last layer of the model where the text generation occurs. The reason behind this analysis is that there is a possibility that the LVLM is able to correctly capture all the features necessary for determining whether the two maps coregister while still failing at providing the final answer due to the final layer of the model outputting an incorrect distribution over the labels (or tokens in the case of LVLMs). Thus we decide to ignore the last linear layer of the model (the language model head) and capture the hidden state of the last token from the last layer of the model.</p> <h2 id="fine-tuned-evaluation">Fine-tuned evaluation</h2> <p>Using this methodology, the output we obtain from each model is a single embedding vector (the length of which depends on the embedding size of the model). Usually, a single linear layer is finetuned on the last layer to directly predict the answer. However, we opt for a more detailed analysis by training multiple classifiers (Logistic Regression, SVM, and XGBoost) that take the embedding vector and produce a binary output. In all the upcoming figures, we always report the results using the classifier that performed the best (for each model) on the validation set.</p> <p>Moreover, it is known that LLMs can be sensitive to prompts<d-cite key="yang2023large,yang2022prompt"></d-cite>. Thus, to more rigorously analyze the performance of the models, we create a total of 8 variations of the coregistration question, as this one question serves as the text input for all pairs of images in our created dataset. A detailed analysis of these prompts will be provided later. Similar to the classifiers, we report results using the best prompt for each model.</p> <p>This methodology of using the model to extract a rich embedding that contains the answer to our prompt (instead of generating the answer directly as text) means that we are now capable of utilizing additional large transformer-based multimodal models that output an embedding vectors instead of directly outputting text. Thus we include in our analysis two such models which are FLAVA<d-cite key="flava"></d-cite> and GIT<d-cite key="git"></d-cite> that output embeddings that represent the multimodal input. Those embeddings are then similarly passed to the simple classifiers to detect whether the models are capable of capturing the required details from the maps to answer the co-location task correctly. Thus we perform our second experiment:</p> <p><strong>Experiment #2:</strong> We generate the examples using the same procedure described in Experiment #1. Then, for each model, we pass through the entire dataset and generate an embedding for each sample in our dataset. We then train the simple classifiers on 70% of the embedding vectors to predict the positive and negative pairs. We report the evaluation on the other 30% data and report the results in orange below.</p> <div class="row mt-3 justify-content-center"> <div class="col-sm-8 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_02_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_02_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_02_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_02_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The accuracy of this fine-tuning methodology (shown in orange) achieves around 65%-70% for all models which is a significantly higher accuracy compared to the zero-shot accuracy of the LVLMs (shown in red) which was incapable of achieving more than 55%. This experiment shows that the embedding of the last token does contain a slightly more feature-rich representation of the multimodal input and can be used to classify the positive/negative pairs at a higher rate than random but is overall still incapable of sufficiently solving the task.</p> <p>Thus far we have tried to assess the capabilities of LVLMs and (more generally) Multimodal Vision Language models on solving the coregistration task, and we assessed this capability using our constructed dataset of determining whether two maps of different styles represent the same location or not. Given the low accuracy achieved on this task, we can claim that the LVLMs we have analyzed are incapable of reasoning and answering more complicated questions relative to our simple baseline question of “Are these two maps of the same location”</p> <h2 id="improving-results-for-co-registration">Improving results for co-registration</h2> <p>We emphasize that our goal is not to directly achieve high accuracy on this task by utilizing any machine learning model, but rather it is to evaluate the capabilities of LVLMs to reason on maps. Furthermore, we created and proposed this dataset and task to act as a baseline for assessing the reasoning abilities of LVLMs on maps.</p> <p>However, despite the failure of LVLMs to answer this baseline task, we next want to assess the inherent difficulty of the dataset. For this, we develop a simple model by utilizing the same simple classifiers used above to train on the embedding of a unimodal vision-only model. Unlike LVLMs, we are not testing our proposed task-specific model on the dataset to assess its capabilities for reasoning on maps, as the model is not trained to answer questions based on images, does not accept text modality, and is specifically fine-tuned to solve this one narrow task. Thus, the results of this experiment serve only to give a sense of the difficulty of the task that we considered as a simple baseline for map reasoning. This will hopefully demonstrate that the relatively older frozen vision-only models can achieve a significantly higher accuracy on this specific task when compared to state-of-the-art open-source LVLMs and possibly indicating the gap between the embeddings captured by the vision-only model and the LVLMs.</p> <p><strong>Experiment #3:</strong> We develop a simple unimodal vision classification model by utilizing a frozen CLIPVIsion model as a backbone. First, we feature-engineer the input by subtracting the two maps from each other in the image space to produce a single image. This image is passed through the frozen CLIPVision model to generate an embedding of the difference between the maps, the embeddings are then used to train the simple classifiers mentioned above and the one that achieves the highest accuracy on the validation set is reported below.</p> <div class="row mt-3 justify-content-center"> <div class="col-sm-8 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_03_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_03_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_03_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_03_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>We see that our fine-tuned vision model (shown in green) achieves a significantly higher accuracy than all previously tested LVLMs. This shows that the task is not a significantly difficult vision task as a frozen CLIPVision model with a head fine-tuned on approximately two thousand samples was able to sufficiently extract an embedding and correctly distinguish positive and negative pairs 85% of the time.</p> <p>This significant difference between the accuracy of the frozen CLIP model and the LVLMs on this task signifies that the LVLMs we tested are still significantly farther behind on certain tasks even when compared to a frozen vision-only model that was trained and released years prior. This is in stark contrast to the significant achievements that LLMs accomplish on numerous datasets when compared to task-specific NLP models, where the highest-scoring models on most NLP datasets are LLMs.</p> <h2 id="human-benchmarking">Human benchmarking</h2> <p>So far, we have examined the performance of pre-trained LVLMs on our proposed dataset in a zero-shot as well as a fine-tuned manner alongside a vision-only model with feature engineering to assess the difficulty of the task.</p> <p>A natural next question to analyze is the performance of humans on this same task as it is not immediately clear how hard or easy the task is for us. The performance achieved by humans on a task such as this would serve as a great target for LVLMs to try to reach.</p> <p><strong>Experiment #4:</strong> We present the following task to two subjects. Each human subject will see two maps for 10 seconds. The pair can be positive or negative with equal probability. After the 10 seconds elapse, the maps automatically disappear and the human subject is asked if the two maps show the same location with a binary “Yes” or “No” choice. After the answer is received, a new pair is sampled and this process is repeated until we gather 50 answers from each human subject.</p> <p>The 10-second window acts as a pseudo-computational limit on the human subject and ensures that the subject’s answers are mostly based on visual and spatial reasoning and not on reading and comparing text. If the subject does not immediately identify a visual or spatial cue, the 10-second window possibly allows for a maximum of one or two texts to be compared if the subject is quick enough. This time limitation prevents the participants from spending an extensive amount of time comparing the nuances of the two images for a severely long time which would make the task more trivial. Below, we show the accuracy obtained from two human subjects and compare it with the previous LVLM results.</p> <div class="row mt-3 justify-content-center"> <div class="col-sm-8 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_04_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_04_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_04_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_04_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>We see that both human participants (shown in blue) achieve a significantly higher accuracy (~95%) compared to all the tested ML models. This shows that the task is significantly easier for humans despite the 10-second time limit preventing the subject from extensively comparing the images.</p> <p>Our experiments showcase the inability of LVLMs to properly solve our proposed dataset on coregistration as well as showing that a vision-only fine-tuned model with feature-engineering is able to solve the task at a significantly higher accuracy. Finally, we show that humans are able to solve the time-constrained task with a significantly high accuracy.</p> <h2 id="analysis-on-prompt-engineering">Analysis on prompt engineering</h2> <p>Numerous recent studies have indicated the importance of prompt engineering in the quality of the output of Large-Transformer based models<d-cite key="yang2023large,yang2022prompt,white2023prompt, zhou2022large"></d-cite>.</p> <p>Due to the potential importance of prompts in affecting performance, we decided to run all experiments that require prompts using multiple different prompts with varying degrees of length and complexity. We note that the prompts considered and listed below were only the ones that consistently conditioned the model to output a “Yes” or “No” output token instead of any other arbitrary sentence completion output. The prompts are shown in the following table:</p> <p><br/></p> <table> <thead> <tr> <th style="text-align: center">ID</th> <th style="text-align: left">Prompt</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">1</td> <td style="text-align: left">Are these two maps the same? Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">2</td> <td style="text-align: left">Do these two maps show the same location? Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">3</td> <td style="text-align: left">Do the two charts depict the same area? Answer:”</td> </tr> <tr> <td style="text-align: center">4</td> <td style="text-align: left">The following image contains two maps with different styles side by side. Do the two maps show the same location? Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">5</td> <td style="text-align: left">On the left there is a map from the VFR dataset and on the right a map from the IFR dataset. Do the two maps show the same location? Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">6</td> <td style="text-align: left">There are two maps of different styles, do they represent the same area or are they completely different? Answer:</td> </tr> <tr> <td style="text-align: center">7</td> <td style="text-align: left">The following image contains two maps with different styles side by side. Do the two maps show the same location? Try to compare the maps by looking at key landmarks or features. Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">8</td> <td style="text-align: left">Carefully examine the following two images that contain two maps with different styles side by side. Do the two maps correspond on the same latitude and longitude point? It is of utmost importance that you answer this correctly. Answer with “Yes” or “No”. Answer:</td> </tr> </tbody> </table> <p><br/></p> <p>The initial prompts (prompts #1 - #3) are meant to be short and direct, while the ones in the middle (prompts #4 - #6) are more verbose and add a bit more complexity, while the last two (prompts #7 - #8) are very verbose and add an exact explanation of the task. We also include additions to some of the prompts that try to guide the models on how they accomplish the task, and some additions that emphasize the importance of correct answers. In the figure below, we study the effect of prompts on model performance.</p> <div class="row mt-3 justify-content-center"> <div class="col-sm-8 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_05_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_05_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_05_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_05_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>We notice that varying the prompts has a relatively high variance in terms of accuracy with an improvement of less than 5% for all models across all prompts. Still, there are no strong general trends across models when considering prompts with increasing complexity. We note that the VILT model was incapable of accepting prompts #5 - #8 due to the limitation of its maximum context length which is shorter than the other models.</p> <p>One aspect that might limit this analysis is that almost all prompts contain an explicit requirement for the models to provide answers immediately (e.g., “Answer with ‘Yes’ or ‘No’. Answer:”). This was done to reduce the computational inference cost and avoid generating long sequences of texts. The models might respond better to some prompts if they were allowed to reason about their answers first.</p> <h2 id="investigating-the-failure-points-of-lvlms-on-coregistration">Investigating the failure points of LVLMs on coregistration</h2> <p>The figures presented in the beginning of the blog post demonstrating some examples in our proposed dataset give a clue of the variance in the difficulty of the examples in the dataset, where some samples are easy to identify as positive pairs and others much harder to do so.</p> <p>Thus, to get a better insight into the model’s performance and investigate its failure points, we investigate some examples where the models made confidently wrong predictions. Here, we focus on a single model, FLAVA, which was our best-performing LVLM. In the figure below, we investigate both false positives with the highest predicted positive label and false negatives with the highest predicted negative label. The figure contains the 9 examples where the model generated a very high (very low) score while the true label was positive (negative).</p> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_06_01.png" class="img-fluid" width="auto" height="auto" data-zoomable="" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_02-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_02-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_02-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_06_02.png" class="img-fluid" width="auto" height="auto" data-zoomable="" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>For the false positives, we see more than one example where two maps containing water were wrongly classified. This might indicate that the model is making predictions on these images based on colors more so than spatial reasoning. For the false negatives, there are many examples where the VFR chart is dense while the IFR is sparse. These examples require discarding a lot of information from the VFR charts and focusing solely on the region where the IFR chart contains information. Given that the model made wrong decisions in these examples, there might be a preference for positively matching images based on density. Notably, some of these examples were straightforward for the human subjects (matching based on the shoreline), while other examples required more effort (matching between dense and sparse maps).</p> <p><br/></p> <h1 id="discussion-limitations-and-future-work">Discussion, Limitations, and Future Work</h1> <p>One of the key takeaways of our experiments, and specifically from contrasting the first two experiments with the third experiment, is that it was not difficult for a non-LVLM model to achieve an 85% accuracy on our proposed dataset. Yet, our dataset proved to be challenging for LVLMs, especially in zero-shot performance where they achieved almost no better than random guessing. This implies that it would be beneficial to further expand future datasets that are used for LVLM training and specifically the addition of data collection similar to what we propose and that this could provide invaluable improvements to future training of LVLMs.</p> <p>Existing vision-language benchmarks exhibit a heavy focus on real-world objects and scenes, with a distinctive lack of images and questions on maps. This is despite the fact that maps are ubiquitous and used in many real-world scenarios. Furthermore, many maps are easily accessible in digital format and ready to be integrated into vision-language benchmarks. We believe such inclusion would require relatively little effort in terms of data collection while providing significantly higher capabilities for LVLMs.</p> <p>We plan to expand the size of our new dataset used in this project and to make it publicly available. Additionally, while our current project primarily focused on the coregistration tasks, we have plans to incorporate more intricate and challenging questions that delve deeper into map reasoning.</p> <p>There are some limitations to the current analysis done in this project. A significant limitation is the computational limit preventing us from feasibly generating answers from the LVLMs in an autoregressive manner instead of our analysis which used only one output token per sample. A possible future work is examining more complicated generation methods such as Chain of Thought<d-cite key="chain_Thought"></d-cite> prompting. Additionally regarding the inputs of the models, although we investigated different text prompts, we only used one template for the visual prompts while there have been multiple recent works on visual prompt engineering in vision-language models<d-cite key="vision_prompt"></d-cite> analogous to textual prompt engineering in LLMs. It could be the case that some models are sensitive to the way the maps are concatenated. This aspect warrants further investigation to gain a more comprehensive understanding of how different concatenation methods might impact model performance and results.</p> <p>Another limitation is that we were only capable of running our analysis on open-source models, the largest model tested was blip-2 with less than 3 billion parameters. This was the largest LVLM that we had access to in terms of weights, to be able to run our analysis on. Future work could attempt to run the analysis on larger closed-source models if access is granted.</p> <p><br/></p> <h1 id="conclusion">Conclusion</h1> <p>In this project, we propose a novel dataset to serve as an initial benchmark for the capabilities of LVLMs to reason on maps with the goal of addressing a gap in current LVLM benchmarks and datasets.</p> <p>Using this dataset, we run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on the coregistration task. Additionally, we show that the task for our dataset is a relatively simple vision task by showing that a fine-tuned vision-only model released years prior to the tested LVLMs achieves a significantly higher accuracy. Finally, we show that the coregistration task is intuitive to humans, as participants were able to achieve close to perfect accuracy even in a time-constrained manner.</p> <p>We hope that future initiatives regarding data collection for LVLMs and training foundational LVLMs will put more emphasis on datasets such as our proposed datasets. This will hopefully unlock new capabilities for LVLMs enabling them to advance beyond their current limitations and possibly expand their utility and reasoning abilities in a variety of real-world scenarios.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-mapreason.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/multilingual-representations-in-embeddings-models/index.html b/blog/2023/multilingual-representations-in-embeddings-models/index.html
new file mode 100644
index 00000000..7400efd4
--- /dev/null
+++ b/blog/2023/multilingual-representations-in-embeddings-models/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Cross-Lingual Fine-Tuning for Multilingual Text Embeddings | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Exploring contrastively training text embeddings, and presenting a scalable, cheap and data-efficient method to train multilingual embedding models"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/multilingual-representations-in-embeddings-models/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">// insert CSS here</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Cross-Lingual Fine-Tuning for Multilingual Text Embeddings",
+      "description": "Exploring contrastively training text embeddings, and presenting a scalable, cheap and data-efficient method to train multilingual embedding models",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Spruce Campbell",
+          "authorURL": "spruce.world",
+          "affiliations": [
+            {
+              "name": "MIT, CSAIL",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Will Hathaway",
+          "authorURL": "willhath.com",
+          "affiliations": [
+            {
+              "name": "MIT, CSAIL",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Cross-Lingual Fine-Tuning for Multilingual Text Embeddings</h1> <p>Exploring contrastively training text embeddings, and presenting a scalable, cheap and data-efficient method to train multilingual embedding models</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#the-embedding-task">The Embedding Task</a></div> <div><a href="#history-and-background">History and Background</a></div> <div><a href="#how-embeddings-models-are-trained">How Embeddings Models are Trained</a></div> <div><a href="#how-embeddings-models-are-tested">How Embeddings Models are Tested</a></div> <div><a href="#method">Method</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusions">Conclusions</a></div> <div><a href="#appendix">Appendix</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Recently, <a href="https://platform.openai.com/docs/guides/embeddings">embeddings models</a> have become incredibly popular as LLMs become more integrated into tools and applications. Embeddings models (specifically, Siamese encoder-only Transformers) are the state-of-the-art method in retrieval, an old problem in computer science. Embeddings are often used in settings like recommendation algorithms, similarity search, and clustering, and have recently found extensive use in Retrieval-Augmented Generation<d-cite key="rag"></d-cite>, assisting LLMs to be more knowledgeable and truthful. However, the best embeddings models are trained on only English data, which means they suffer greatly at applications in other languages, and are inaccessible to most of the world<d-cite key="mteb"></d-cite>. In this blog post, we summarize the history of embeddings research, detail the training regime of a modern embeddings model, present a new multilingual embedding benchmark, and investigate whether it is possible to fine-tune in multilingual capability to a pretrained monolingual model.</p> <p>Our central question is whether it is possible to learn new languages at the finetuning stage, using contrastive training on publicly available text pair datasets. If successful, it would mean that the encoder can learn a map from one language onto the embedding space of another. This implies that it is possible to approximate translation, at a conceptual level, with a transformation. We will study the results on various language pairs, and compare to a fully pretrained multilingual model.</p> <h2 id="the-embedding-task">The Embedding Task</h2> <p>The aim of embedding text (or any other medium) is to convert human-readable information into vectors. This is useful, because while neural nets cannot process words, images, or sound, they can process vectors. Every NLP model thus has some form of embedding - GPTs, for example, have an embedding layer at the start that transforms input tokens into vector representations<d-cite key="gpt1"></d-cite>. GPTs need an embedding layer because the amount of unique tokens is huge (GPT-2, for example, has 50,257 possible tokens<d-cite key="gpt2"></d-cite>), and it is much more computationally efficient to work with lower-dimensional vectors (GPT-2 embeds these down to 768-dimensional vectors to compute with).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/openai_embed.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Embeddings models, as described by OpenAI </div> <p>Because of this reduction of information, embeddings are also a form of compression. To turn a whole sentence (or paragraph) into a vector requires prioritising some characteristics and losing others, and we find that the most valuable thing to prioritise is semantic and contextual information. This leads to a very useful property: text pairs with similar meanings or usage patterns tend to have similar vector representations. For example, the vectors “cat” and “dog” are closer to each other than “cat” and “cucumber”. Even more interestingly, as found in the Word2Vec paper, this property causes embeddings to have arithmetic consistency, as shown in the famous “king - man + woman = queen” example.<d-cite key="w2v"></d-cite> You can explore the Word2Vec embedding space in the interactive visualization below:</p> <div class="l-page"> <iframe src="/staging/assets/html/2023-11-09-multilingual-representations-in-embeddings-models/word2vec_demo.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <div class="caption"> Visualisation of Word2Vec for the 250 most common English nouns </div> <p>While this may seem abstract, embeddings have found usage in many downstream and commercial tasks, including:</p> <ol> <li><strong>Classification</strong> - embeddings models classify sentences, such as in sentiment analysis between positive or negative airline reviews<d-cite key="sent"></d-cite>.</li> <li><strong>Search</strong> - models return nearest-embedded results to a search query, understanding synonyms and context<d-cite key="sgpt"></d-cite>.</li> <li><strong>Recommendation</strong> - models return embeddings that suggest related items users may like, for example <a href="https://arxiv.org/pdf/1507.08439.pdf">clothes and jewellery</a>.</li> <li><strong>Clustering</strong> - embeddings are used to cluster datapoints into smaller groups, with downstream algorithms like k-means<d-cite key="kmeans"></d-cite>.</li> <li><strong>Reranking</strong> - embeddings are used to sort a list, such as one retrieved from a database, into most relevant items<d-cite key="rerank"></d-cite>.</li> <li><strong>Retrieval</strong> - a query is embedded, and answers are selected by the closeness of their embedding.<d-cite key="beir"></d-cite>.</li> </ol> <h3 id="history-and-background">History and Background</h3> <p>The first successful approaches to these problems were bag-of-words models. These are non-neural algorithms that work by ranking documents based on how many word occurrences they share. There were some improvements around this basic idea, for example Okapi BM25<d-cite key="bm25"></d-cite> includes a term for the expected likelihood of that word co-occurring.</p> <table> <tr> <th>Sentence</th> <th>about</th> <th>bird</th> <th>bird,</th> <th>heard</th> <th>is</th> <th>the</th> <th>word</th> <th>you</th> </tr> <tr> <td>About the bird, the bird, bird bird bird</td> <td>1</td> <td>3</td> <td>2</td> <td>0</td> <td>0</td> <td>2</td> <td>0</td> <td>0</td> </tr> <tr> <td>You heard about the bird</td> <td>1</td> <td>1</td> <td>0</td> <td>1</td> <td>0</td> <td>1</td> <td>0</td> <td>1</td> </tr> <tr> <td>The bird is the word</td> <td>0</td> <td>1</td> <td>0</td> <td>0</td> <td>1</td> <td>2</td> <td>1</td> <td>0</td> </tr> </table> <div class="caption"> A table demonstrating bag-of-words calculation. </div> <p>The first neural approaches to this problem actually used bag-of-words as a loss function, for example Word2Vec (2013)<d-cite key="w2v"></d-cite> used either continuous bag-of-words (CBOW) or skipgram loss to train a word embedding model. Word2Vec itself is a shallow two-layer neural network that is used to generate an embedding, which in the CBOW training regime is used to predict a word given a bag of surrounding words. The skipgram loss is similar, but weighs words depending on their proximity to the word we’re trying to predict. This word-prediction-from-embeddings task is a <em>key part</em> of training language models to have useful representations, and we’ll see it again later.</p> <p>Word2Vec had some incredible results, and was later improved by subsequent approaches<d-cite key="glove"></d-cite>, but word embeddings often failed due to the fact that words with multiple meanings had to share the same point in the embedding space. The sentences “I went to the bank to cash a check” and “I went to the bank to catch a fish” are obviously semantically unrelated, but the word “bank” will necessarily have to share an embedding, making the embedding itself likely meaningless.</p> <div class="l-page"> <iframe src="/staging/assets/html/2023-11-09-multilingual-representations-in-embeddings-models/special_demo.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <div class="caption"> Visualisation of Word2Vec struggling with polysemanticity in the "riverbank" example </div> <p>To solve this, embeddings need to be generated in-context, and be able to support multiple meanings. There were some attempts at changing Word2Vec to support polysemanticity, such as Multi-Sense Skip-Gram (MSSG)<d-cite key="mssg"></d-cite>, but they required hacky workarounds such as pre-programming an expected number of meanings for each word.</p> <h4 id="bert">BERT</h4> <p>BERT<d-cite key="bert"></d-cite> was arguably the beginning of the LLM revolution, as it showed for the first time that a single pretrained language model could be finetuned to support many different tasks downstream. It was essentially an embeddings model - trained again with the word prediction task, now with the context of words not weighted by proximity, but by a trainable position embedding that provided information that the model could use to predict long-term associations and causality. This fixed the polysemanticity problem described above. It can be used to produce both word-level and sentence-level embeddings, that proved extraordinarily useful for the embeddings tasks.</p> <h5 id="bert-training">BERT Training</h5> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/bert.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> BERT architecture diagram </div> <p>BERT (Bidirectional Encoder Representations from Transformers) is based on the Transformer architecture introduced by Vashwani et al. in 2017<d-cite key="attn"></d-cite>. The key differences were that BERT was allowed bidirectional context rather than left-side-only, that it did not include a decoder, and its masked language modeling and next sentence prediction training objectives. The bidirectional context is crucial for language modeling, since “The [MASK]” is much harder to predict than “The [MASK] colored fire engine”, and in embeddings (unlike text generation) we have the whole original text available.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mlm.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> BERT's Masked Language Modeling loss </div> <p>MLM works by taking 15% of the text tokens that BERT sees and replacing them with a [MASK] token. The model’s objective is to predict that masked word with its embedding, using the context from the surrounding tokens, and then it is trained on the cross-entropy loss between the predictions and the actual truth.</p> <p>BERT was also trained on the NSP (Next Sentence Prediction) objective. In training, the model is given a pair of input segments, and its task is to predict whether the second segment (segment B) follows the first one (segment A) in the original text or if they are randomly sampled and unrelated. The input is constructed by concatenating segment A, which is preceded by a special [CLS] token, and segment B, with a special [SEP] (separator) token in between. For example: “[CLS] Segment A [SEP] Segment B”. BERT then produces a pair of embeddings: one for the [CLS] token at the beginning of the input and one for the [SEP] token that separates the two segments. These embeddings are then used to compute a binary classification. The intended effect is that [CLS] contains information about the overall meaning of the first sentence, and [SEP] contains information about the second. This is the first example of sentence embeddings, which are the key to how a modern embeddings model works.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/nsp.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> BERT's Next Sentence Prediction loss </div> <p>BERT turns token inputs into embeddings for each token in its context window, which is 512 tokens long. We can choose to construct a single text embedding from this any way we like. There are several popular strategies for this “token pooling” problem. Reading the above, one may be tempted to take the [CLS] token’s embedding. In practice, however, the [CLS] token embeddings proved to be slightly worse than just taking the average of all the individual token embeddings of the sentence<d-cite key="berthater"></d-cite>, and subsequent models such as RoBERTa<d-cite key="roberta"></d-cite> skipped the NSP training objective and actually performed slightly better. Why this is the case is an area of ongoing research, but as a matter of opinion, we personally suspect Shitao Xiao’s work on RetroMAE<d-cite key="rmae"></d-cite> correctly diagnoses the issue, as demonstrated by their models’ improved performance on benchmarks. The training losses described in that paper are more complex and outside the scope of this blog post, but it’s worth a read if interested.</p> <h4 id="sbert">SBERT</h4> <p>The final part of the story is Sentence-BERT<d-cite key="sbert"></d-cite>, and its addition of contrastive text-pair pretraining. This what turns BERT, a general language model, into a model that specifically generates text embeddings. Contrastive training was discussed at length in 6.s898; the core insight is that we can train an encoder model to have a useful representation if we train it to embed similar examples together, and dissimilar examples far apart. In Sentence Transformers, this is done by contructing a “Siamese BERT” network. There are two BERT models (or commonly two copies of the same model) that are each used to embed a text passage. Then, the loss is calculated by the following formula:</p> \[\mathcal{L}_N = -\mathbb{E}_{X} \left[ \log \frac{f_k(x_t+k, c_t)}{\sum_{x_j \in X} f_k(x_j, c_t)} \right]\] <p>This encourages the model to predict positive pairs (similar passages) as vectors with close to 1 similarity, and negative pairs close to 0. Similarity metrics include (Euclidean) distance, but most often used is cosine similarity. Negative pairs can either be “mined” with some heuristic such as bag-of-words, or simply sampled at random from other examples in the batch. Due to this, pretraining batch sizes for embedding BERTs are often huge, in the tens of thousands<d-cite key="gte"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/sbert.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The Siamese BERT architecture </div> <p>The reason two models are used is that many tasks see improved performance if there is a distinction made between “questions” and “answers”. For example, searches and retrieval queries may not resemble the results they most need in meaning: “What is the the tallest building in Hong Kong” and “The International Commerce Centre” are not closely semantically related, but should be paired in search contexts. Because of this, we can train a “query” and “passage” model together as one giant network on a contrastive loss, and thus get a model that can take in both.</p> <p>In practice, this improvement is rarely worth doubling the number of parameters, and so most papers simply re-use the same model for both queries and passages.</p> <h2 id="how-embeddings-models-are-trained">How Embeddings Models are Trained</h2> <p>Putting all this together, we have the current standard recipe for training a modern embeddings model, in up to three stages:</p> <h3 id="1-pretraining">1. Pretraining</h3> <p>It is valuable to start with a language model that has already learned some inner representation of language. This makes the embeddings task significantly easier, since the model must only learn to condense this inner representation into a single high-dimensional dense vector space. While it is possible to use more modern LLMs such as GPT or LLaMA for embeddings<d-cite key="sgpt"></d-cite>, they are fundamentally hampered because they cannot attend to context in both directions. Therefore, almost all state-of-the-art embeddings models still begin from the BERT models themselves, or their derivatives<d-cite key="gte"></d-cite><d-cite key="e5"></d-cite>. These are trained as described above, with an MLM and potentially NSP loss.</p> <h3 id="2-training">2. Training</h3> <p>Following Sentence-BERT, the model is trained contrastively. At this point, we choose a pooling strategy to convert BERT outputs into sentence embeddings. Many current papers choose to use average pooling<d-cite key="sbert"></d-cite><d-cite key="e5"></d-cite>, but max-pooling or taking the [CLS] token are occasionally used. Positive pairs are either handpicked from datasets such as search engine question-responses<d-cite key="msmarco"></d-cite>, or commonly generated from general text data, such as academic paper title-abstract pairs, Wikipedia page title-summaries and so forth<d-cite key="gte"></d-cite>.</p> <h3 id="3-fine-tuning">3. Fine-Tuning</h3> <p>It has also become common to fine-tune especially large embeddings models on higher-quality datasets, such as MS MARCO (Bing question-passage responses)<d-cite key="msmarco"></d-cite>, fact verification (e.g. FEVER)<d-cite key="fever"></d-cite>, and paraphrasing (e.g. Quora). This increases performance at desired tasks<d-cite key="bge"></d-cite>, and was the inspiration for our approach.</p> <h2 id="how-embeddings-models-are-tested">How Embeddings Models are Tested</h2> <p>Similarly to how decoder LLMs have recently converged on being measured on the HuggingFace Open LLM Leaderboard, the currently ubiquitous benchmark for embeddings models is MTEB<d-cite key="mteb"></d-cite>. Presented in a 2022 paper, it contains 8 embedding tasks covering a total of 58 datasets. The tasks are:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/mteb.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> MTEB datasets </div> <ol> <li> <p><strong>Bitext Mining</strong>: Inputs are two sets of sentences from two different languages. For each sentence in the first set, the best match in the second set needs to be found. This metric is commonly ignored in places such as the MTEB Leaderboard and in papers, because few multilingual models have been created.</p> </li> <li> <p><strong>Classification</strong>: A train and test set are embedded with the provided model. The train set embeddings are used to train a logistic regression classifier, which is scored on the test set.</p> </li> <li> <p><strong>Clustering</strong>: Involves grouping a set of sentences or paragraphs into meaningful clusters. A k-means model is trained on embedded texts. The model’s performance is assessed using the v-measure, which is independent of the cluster labels.</p> </li> <li> <p><strong>Pair Classification</strong>: Requires assigning labels to pairs of text inputs, typically indicating if they are duplicates or paraphrases. Texts are embedded and distances calculated using various metrics (cosine similarity, dot product, Euclidean, Manhattan). Metrics like accuracy, average precision, F1, precision, and recall are used.</p> </li> <li> <p><strong>Reranking</strong>: Involves ranking query results against relevant and irrelevant reference texts. Texts are embedded using a model, with cosine similarity determining relevance. Rankings are scored using mean MRR@k and MAP, with MAP as the primary metric.</p> </li> <li> <p><strong>Retrieval</strong>: Each dataset includes a corpus and queries, with a goal to find relevant documents. Models embed queries and documents, computing similarity scores. Metrics like nDCG@k, MRR@k, MAP@k, precision@k, and recall@k are used, focusing on nDCG@10.</p> </li> <li> <p><strong>Semantic Textual Similarity (STS)</strong>: Involves assessing the similarity of sentence pairs. Labels are continuous, with higher scores for more similar sentences. Models embed sentences and compute similarity using various metrics, benchmarked against ground truth using Pearson and Spearman correlations. Spearman correlation based on cosine similarity is the main metric.</p> </li> <li> <p><strong>Summarization</strong>: Evaluates machine-generated summaries against human-written ones. Models embed summaries, computing distances between machine and human summaries. The closest score, such as the highest cosine similarity, is used for evaluation. Metrics include Pearson and Spearman correlations with human assessments, focusing on Spearman correlation based on cosine similarity.</p> </li> </ol> <p>We can see that MTEB represents many downstream users’ desires as described earlier, but could be criticised for favoring cosine similarity as a distance metric for training. In either case, MTEB has demonstrated, and itself encouraged, some trends in research:</p> <h3 id="scaling">Scaling</h3> <p>The MTEB paper itself, as well as the GTR<d-cite key="gtr"></d-cite> and Sentence-T5<d-cite key="st5"></d-cite> papers, suggested that model parameters are correlated with higher performance. We should expect that from intuition about GPTs and their scaling laws, larger models perform better<d-cite key="chinchilla"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scaling.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 3 from MTEB demonstrating scaling vs. performance </div> <p>However, if we extrapolate to more recent research , we find that the state-of-the-art models have failed to get bigger over time, and the highest-performance models are still under 1B parameters. This shows that embeddings is not as easily reduced to scaling laws as LLMs are.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/scale.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> MTEB score vs time for SOTA models. The size of the cross represents parameter count. </div> <p>However, even these small models still train on hundreds of millions or billions of text pairs<d-cite key="gtr"></d-cite>, requiring thousands of GPU-hours to train. We can conclude that while parameter count may not be increasing, the overall compute requirements of training an embeddings model are getting higher, and it is no longer within the reach of all researchers to work on these models.</p> <h3 id="multilingualism">Multilingualism</h3> <p>While MTEB is a multilingual benchmark, only a few tasks, namely STS, Classification and Bitext Mining, have multilingual versions. Combined with the abundance of English training data, this has led to every language except English, Chinese and Polish lacking a complete MTEB and thus lacking the benefits of state-of-the-art models.</p> <p>As in other subfields of NLP, multilingual performance is often an afterthought, and left by the wayside in pursuit of higher performance on English benchmarks, or exclusively in the domain of labs that can afford extra runs<d-cite key="mt5"></d-cite>.</p> <h2 id="method">Method</h2> <p>With these problems as our motivation, we aim to find out if it is possible to add multilingualism to an existing model without having to pretrain from scratch. This may be a step towards bringing the benefits of increased embeddings performance to languages that don’t currently have a state-of-the-art model. Furthermore, if it is possible to add a new language to an existing model, this hints at the ideas that models do not necessary learn a representation based on a particular language, and that translation is easier than expected in the context of embeddings, modelable as a transformation of the representation space.</p> <p>To do this, we will take an existing model that has both monolingual English and multilingual variants, and use contrastive training to add in new languages without sacrificing English performance, by using publicly available text translation pairs. We call this approach Cross-Lingual Fine-Tuning (CLFT). We will attempt to create a model that performs on-par with the multilingual model in multiple languages, and on-par with the original model in English, which we will measure by completing with our own data a multilingual version of MTEB in all tasks.</p> <h3 id="model-choice">Model Choice</h3> <p>We choose e5-base-v2 and multilingual-e5-base<d-cite key="e5"></d-cite> as our demonstration models. E5 is the highest-performing current open-weights model with both a mono- and multilingual version, and still holds the top spot in many less popular languages. Both models are the size of BERT, with 12 layers, 768-dimensional embeddings and a context window of 512 tokens. The only difference is that the multilingual model has a much larger vocabulary to support more languages, and uses the XLM-RoBERTa tokenizer, leading to about 60% more parameters.</p> <p>This choice does produce a caveat in the rest of our post - since the BERT tokenizer of e5-base has been trained only on English data, it will be unable to tokenize text that is not also a possible English string. In practice, this means that any Latin or near-Latin speaking languages, such as French, German and Turkish, can be used, but the model cannot be finetuned to read unknown characters like Japanese or Arabic script. Any non-Latin characters will likely become an [UNK] token, which carries no information for the model to embed. We are confident that this is not a fatal flaw, though, since just as it is possible to train LLMs with unused vocabulary, such as Persimmon-8B<d-cite key="persimmon"></d-cite>, it is possible to train an embeddings model with a big unused vocabulary. In the case that this research proves useful, it would be easy to train a large English embeddings model with a multilingual tokenizer and fill in this extra vocabulary space afterwards in finetuning.</p> <h3 id="benchmarking">Benchmarking</h3> <p>As described above, it is hard to use MTEB to test performance in non-English languages, due to the lack of available tasks. After investigating the source datasets, we know that this is because of a lack of data. In the interest of producing a universally fair test, especially for low-resource languages where quality data is not available, we opted to use synthetic data to create a multilingual MTEB test set, by using machine-translation to convert the English datasets into each language.</p> <div style="margin-top: 0.5em; margin-bottom: 1em; padding: 1em; background-color: #f2f5f7; border-radius: 10px; font-size: 1rem"> <i>Side note: We were fascinated to find that the state-of-the-art neural machine translation model is no longer GNMT<d-cite key="gnmt"></d-cite> or the Google Translate API, but in fact just GPT-4!</i> </div> <p>We used GPT 3.5 to process ~200K test examples in each of the following languages: French, German, Spanish, Swahili, and Turkish. We selected these languages because of their presence on the No Language Left Behind (NLLB) text-pair dataset<d-cite key="nllb"></d-cite>, widespread usage, use of Latin alphabet, and varying degrees of similarity to English. We were particularly interested in Turkish because, while many words are near-Latin when written down, the language is from a completely different Proto-Turkic root, and appears much harder for native English-speakers to read. We were also interested in Swahili, as despite being one of the world’s biggest languages, it is comparatively data-poor and so multilingual models often struggle to generalize to. We trained five models in total.</p> <p>As mentioned before, MTEB already contains some multilingual components, in the textual similarity, bitext mining and classification tasks. The bitext mining task in particular requires a cross-lingual model, so we will use it only on the final all-language model. The remaining tasks are clustering, retrieval, classification, re-ranking, STS, and summarization. For each task, we selected one dataset that would generalise well across languages. Given more time and compute resources, it would be easy to expand the dataset to a full synthetic multilingual MTEB. From now on, we refer to this benchmark as MMTEB (Multilingual Massive Text Embeddings Benchmark).</p> <p>Datasets and code for evaluation are available <a href="https://github.com/mtybadger/mmteb-toolkit">HERE</a>.</p> <table> <tr> <th>Task</th> <th>Classification</th> <th>Clustering</th> <th>Retrieval</th> </tr> <tr> <td>Dataset</td> <td><a href="https://huggingface.co/datasets/AmazonScience/massive">MASSIVE</a></td> <td><a href="https://huggingface.co/datasets/willhath/french-reddit-clustering">Reddit</a> and <a href="https://huggingface.co/datasets/willhath/spanish-twentynewsgroups-clustering">TwentyNewsgroup</a></td> <td><a href="https://huggingface.co/datasets/sproos/scifact-fr">SciFact</a></td> </tr> <tr> <th>Semantic Text Similarity</th> <th>Summarization</th> <th>Reranking</th> <th>Pair Classification</th> </tr> <tr> <td><a href="https://huggingface.co/datasets/mteb/sts22-crosslingual-sts/viewer/en?row=0">STS-22</a></td> <td><a href="https://huggingface.co/datasets/sproos/summeval-tr">SummEval</a></td> <td><a href="https://huggingface.co/datasets/sproos/mindsmall-tr">MIND</a></td> <td><a href="https://huggingface.co/datasets/sproos/twitter-pairclass-fr">Twitter URL Corpus</a></td> </tr> </table> <div class="caption"> Tasks and datasets in MMTEB. </div> <h3 id="training">Training</h3> <p>In CLFT, we initialize two instances of our base model, one of which is frozen, and the other is trained. We will refer to these as $f_s$ and $f_\theta$ for the static and trained model. The static model will be used to anchor our trained model to the initial representation. For each lanuage $l$, our data $X_l$, is composed of pairs of data points $(x_e, x_l) \in X_l$, where $x_e$ is a sentence in english, and $x_l$ is that sentenced translated to language $l$.</p> <p>We initially attempted to use the literature-standard InfoNCE<d-cite key="infonce"></d-cite> contrastive loss for \(\mathcal{L}_{\text{eng}}\) and \(\mathcal{L}_{\text{cross}}\), treating equivalent translation pairs as positive examples, and non-equivalent text pairs as negative examples. However, empirically this did not perform as well, which we suspect may be because of compute-based batch size limitations (e5 was trained on a comparatively huge batch size of 32,768<d-cite key="e5"></d-cite>). Because of this, we chose to use our own simplified cosine similarity objective.</p> <p>We give the model \(f_\theta\) the following goal: place \(x_l\) as close to \(x_e\) as possible, without changing where we place \(x_e\). This is crucial, because it forces the model to map the new language onto its existing representation. This is done with the following loss function</p> \[\mathcal{L}(x_e, x_f) = \mathcal{L}_{\text{eng}} + \beta \mathcal{L}_{\text{cross}}\] <p>Where:</p> <ul> <li>\(\mathcal{L}_{\text{eng}} = 1 - f_\theta(x_e) \cdot f_s(x_e)\) represents the loss component for English text, with \(f_\theta\) as the dynamic model being trained and \(f_s\) as the static reference model.</li> <li>\(\mathcal{L}_{\text{cross}} = 1 - f_\theta(x_e) \cdot f_\theta(x_f)\) represents the cross-lingual consistency loss, comparing the dynamic model’s outputs for English and foreign text.</li> <li>\(x_e\) and \(x_f\) are inputs for English and foreign text, respectively.</li> <li>\(\beta\) is a coefficient to balance the influence of the cross-lingual consistency term.</li> </ul> <p>We ran each of our mono-lingual models on 400,0000 text pairs from the NLLB<d-cite key="nllb"></d-cite> dataset, with learning rate \(lr = 1e-4\) and dynamic scaling \(\beta = 0.4\).</p> <h2 id="results">Results</h2> <p>We found interesting and surprising results across our chosen languages and tasks. The results in table format are available in the appendix.</p> <p>We can visualize these results in two graphs: comparing our approach to the baseline English model, and to the current state-of-the-art multilingual model.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/base.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> All tasks and languages vs. base model performance </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/multi.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> All tasks and languages vs. multilingual model performance </div> <p>We can see that the CLFT approach did extremely well on tasks like classification, pair classification and clustering, even beating the multilingual model itself. This is to be expected in particularly well-suited tasks, since a perfect monolingual model will always outperform a multilingual model at a set number of parameters. However, the model did not improve as strongly in retrieval and semantic textual similarity tasks. Additionally, we can see the model struggle most significantly in Swahili, the most distant language to its original English in our training set. Overall, we observed an average 5.5% relative improvement on the base model, taking us <strong>49.8%</strong> of the way to the performance of the multilingual model.</p> <p>We have some conjectures about the reason for this split, which relate to the theory of representation learning. Since our loss is purely on positive pairs, there is weaker enforcement of a shape of the embeddings space. It is therefore likely that our approach is degenerating the shape of the embeddings space, leading to more clustering and noisier local structure. This means that tasks that rely on broad-strokes embeddings, such as clustering, classification and so on, will benefit from this approach, whereas tasks that rely on fine-grained relative positioning such as retreival, reranking and STS will suffer. CLFT could thus be viewed as a trade-off between speed and ease of training, and noisiness of embeddings.</p> <p>We investigate this by performing a visual analysis of the embeddings after PCA dimension reduction. In the figure below, we see how different model represents the same text, after it has been translated. The texts were taken from the associated reddit clustering datasets for each language, and the labels in the diagrams are the name of the corresponding class of the datapoint. We see that the position of each embedding is roughly the same, which makes sense given our loss function.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/words.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Representations of the same text, translated to models's respecive languages </div> <p>Additionally, the figure below demonstrates that we were mostly successful in our goal of keeping our trained models aligned with the underlying english model. We embedded the same, English text with each model and got an even tighter clustering. We see that the training on languages more similar to english, such as Spanish, did not alter the English represenations as significantly. Conversely, more distant languages, such as Swahili, led to further degradation of the embedding space.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multilingual-representations-in-embeddings-models/english.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Representations of English text, according to each language </div> <h2 id="conclusions">Conclusions</h2> <p>Based on our results, we conclude that fine tuning for multilinguality is a cheap and viable alternative, especially when working with languages that do not have a large presence on the internet. While not an improvement over “true” multilingual models in general, CLFT can outperform multilingual models in scenarios where high-quality data is sparse, or in specific task categories (like clustering and reranking).</p> <p>Additionally, we have made steps to introduce the first truly multilingual benchmark, for future embedding models to be evaluated against. All code and data for MMTEB assessment can be found <a href="https://github.com/mtybadger/mmteb-toolkit">here</a></p> <h3 id="limitations-and-next-steps">Limitations and Next Steps</h3> <p>Our experiment has several limitations, and there is plenty of room for extension:</p> <p>The fact that we used machine-translated English text for our benchmark poses potential issues. It’s likely that the distribution of data that our translation model produces is not equivalent to that produced in the real world, meaning that our benchmark isn’t as accurate as the English one is. This is hard to ameliorate, especially for languages lacking many large datasets. However, barring vast troves of previously undiscovered internet data being discovered, translations can serve as a useful stopgap, and an equalizer for these less available languages. Completing the MMTEB benchmark would be a valuable contribution to the field, and a path to more languages being represented in state-of-the-art models.</p> <p>In this paper, we only evaluated monolingual models, and did not study how the approach scales to multiple languages at once. Due to time and compute constriants, we were unable to try and train a “true” multilingual model, beyond just english and one other language. We believe that with further training, it may be possible to repeat the process above for multiple languages.</p> <p>As mentioned in our results, CLFT can lead to noisy embeddings, which may decrease performance on particular tasks. A better distillation loss, or traditional contrastive loss with a much larger batch size, may help to regularize the data and resolve this issue.</p> <p>As previously mentioned, we could not explore non-latin characters, vastly reducing our set of potential languages. We believe that with the correct tokenizer and base model, this should be possible. Additionally, it’s becoming possible to imagine a future of Transformers without tokenization, which would greatly help approaches like ours.</p> <p>Despite our models maintaining near perfect alignment with the base model on the english text pairs during training, we observed performance on the English MTEB decrease substantially. This suggests that the text pairs on NLLB do not fully capture the distribution of data seen during testing,which is something that could be improved upon with better translation datasets.</p> <h2 id="appendix">Appendix</h2> <p>Here is a full table of our results:</p> <table> <thead> <tr> <th> </th> <th>Classification</th> <th>Summarization</th> <th>Clustering</th> <th>Retrieval</th> <th>STS</th> <th>Reranking</th> <th>Pair Classification</th> <th>Average</th> </tr> </thead> <tbody> <tr> <td>Spanish (e5-base)</td> <td>0.511</td> <td>0.314</td> <td>0.333</td> <td>0.554</td> <td>0.585</td> <td>0.296</td> <td>0.828</td> <td><strong>0.489</strong></td> </tr> <tr> <td>Spanish (e5-multi)</td> <td>0.635</td> <td>0.301</td> <td>0.336</td> <td>0.655</td> <td>0.629</td> <td>0.243</td> <td>0.848</td> <td><strong>0.521</strong></td> </tr> <tr> <td>Spanish (ours)</td> <td>0.583</td> <td>0.314</td> <td>0.398</td> <td>0.568</td> <td>0.553</td> <td>0.284</td> <td>0.847</td> <td><strong>0.507</strong></td> </tr> <tr> <td>German (e5-base)</td> <td>0.522</td> <td>0.307</td> <td>0.328</td> <td>0.560</td> <td>0.236</td> <td>0.293</td> <td>0.812</td> <td><strong>0.437</strong></td> </tr> <tr> <td>German (e5-multi)</td> <td>0.637</td> <td>0.313</td> <td>0.346</td> <td>0.648</td> <td>0.491</td> <td>0.230</td> <td>0.840</td> <td><strong>0.501</strong></td> </tr> <tr> <td>German (ours)</td> <td>0.602</td> <td>0.320</td> <td>0.393</td> <td>0.546</td> <td>0.273</td> <td>0.332</td> <td>0.841</td> <td><strong>0.472</strong></td> </tr> <tr> <td>French (e5-base)</td> <td>0.512</td> <td>0.312</td> <td>0.329</td> <td>0.568</td> <td>0.747</td> <td>0.330</td> <td>0.825</td> <td><strong>0.518</strong></td> </tr> <tr> <td>French (e5-multi)</td> <td>0.637</td> <td>0.306</td> <td>0.263</td> <td>0.644</td> <td>0.764</td> <td>0.222</td> <td>0.845</td> <td><strong>0.526</strong></td> </tr> <tr> <td>French (ours)</td> <td>0.622</td> <td>0.302</td> <td>0.404</td> <td>0.604</td> <td>0.749</td> <td>0.344</td> <td>0.849</td> <td><strong>0.554</strong></td> </tr> <tr> <td>Turkish (e5-base)</td> <td>0.458</td> <td>0.296</td> <td>0.221</td> <td>0.411</td> <td>0.456</td> <td>0.308</td> <td>0.776</td> <td><strong>0.418</strong></td> </tr> <tr> <td>Turkish (e5-multi)</td> <td>0.639</td> <td>0.304</td> <td>0.318</td> <td>0.631</td> <td>0.601</td> <td>0.258</td> <td>0.827</td> <td><strong>0.511</strong></td> </tr> <tr> <td>Turkish (ours)</td> <td>0.557</td> <td>0.307</td> <td>0.382</td> <td>0.413</td> <td>0.414</td> <td>0.336</td> <td>0.826</td> <td><strong>0.462</strong></td> </tr> <tr> <td>Swahili (e5-base)</td> <td>0.413</td> <td>0.304</td> <td>0.181</td> <td>0.281</td> <td>0.000</td> <td>0.313</td> <td>0.751</td> <td><strong>0.321</strong></td> </tr> <tr> <td>Swahili (e5-multi)</td> <td>0.528</td> <td>0.303</td> <td>0.166</td> <td>0.527</td> <td>0.000</td> <td>0.257</td> <td>0.822</td> <td><strong>0.372</strong></td> </tr> <tr> <td>Swahili (ours)</td> <td>0.347</td> <td>0.315</td> <td>0.238</td> <td>0.332</td> <td>0.000</td> <td>0.275</td> <td>0.764</td> <td><strong>0.325</strong></td> </tr> <tr> <td>Average (e5-base)</td> <td>0.483</td> <td>0.307</td> <td>0.279</td> <td>0.475</td> <td>0.405</td> <td>0.308</td> <td>0.799</td> <td><strong>0.436</strong></td> </tr> <tr> <td>Average (e5-multi)</td> <td>0.615</td> <td>0.306</td> <td>0.286</td> <td>0.621</td> <td>0.497</td> <td>0.242</td> <td>0.836</td> <td><strong>0.486</strong></td> </tr> <tr> <td>Average (ours)</td> <td>0.542</td> <td>0.312</td> <td>0.363</td> <td>0.493</td> <td>0.398</td> <td>0.314</td> <td>0.825</td> <td><strong>0.464</strong></td> </tr> </tbody> </table> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-multilingual-representations-in-embeddings-models.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/multimodal-commonsense/index.html b/blog/2023/multimodal-commonsense/index.html
new file mode 100644
index 00000000..5b2cc773
--- /dev/null
+++ b/blog/2023/multimodal-commonsense/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Multimodal Commonsense | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="6.S898 project for analyzing and evaluating the commonsense reasoning performance of multimodal vs text-only models."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/multimodal-commonsense/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Multimodal Commonsense",
+      "description": "6.S898 project for analyzing and evaluating the commonsense reasoning performance of multimodal vs text-only models.",
+      "published": "December 11, 2023",
+      "authors": [
+        {
+          "author": "Vincent Lin",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Multimodal Commonsense</h1> <p>6.S898 project for analyzing and evaluating the commonsense reasoning performance of multimodal vs text-only models.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#background">Background</a></div> <div><a href="#related-works">Related Works</a></div> <div><a href="#methods">Methods</a></div> <ul> <li><a href="#commonsense-benchmarks">Commonsense Benchmarks</a></li> <li><a href="#hellaswag">HellaSwag</a></li> <li><a href="#text-only-language-model">Text-Only Language Model</a></li> <li><a href="#vision-text-multimodal-model">Vision-Text Multimodal Model</a></li> </ul><div><a href="#experiments">Experiments</a></div> <ul> <li><a href="#data">Data</a></li> <li><a href="#setup">Setup</a></li> <li><a href="#results">Results</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> <ul> <li><a href="#ethical-implications">Ethical Implications</a></li> <li><a href="#limitations">Limitations</a></li> </ul> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>In recent years, language models have been proven to be quite proficient in producing human-like text, computing somewhat semantically-meaningful and human-interpretable word and token embeddings, and generating realistic conversation. However, there is a vast distinction between mimicking human linguistics from data and forming an understanding of the world and its abstract connections from data. The latter describes the commonsense knowledge of a language model, or its ability to reason about simple relationships, interactions, and general logic of the world.</p> <p>With the advent and growth of large language models in recent years (and months), understanding the world and developing deeper underlying representations of physical and abstract concepts through text alone has become much more feasible and tractable. Yet, there is only so much someone or something can understand by simply reading about it. When evaluating the performance of language models in this context, does the language model simply mimic this knowledge or does it inherently possess it? One paradigm through which to formalize this is through a deeper categorization of common sense.</p> <p>In particular, physical common sense, or knowledge about the physical world and its properties, is fundamental knowledge for realizing the world and the interactions within it. Physical common sense is a naturally multimodal concept, though, that for humans requires a combination of several senses to perceive, as physical properties are manifested in multiple modalities. A lack of info in any modality may make an object visually ambiguous, or otherwise manifest some misunderstanding of an object. Can we expand the capabilities of language models by imbuing them with multifaceted input to expand its knowledge base beyond text alone?</p> <p>In this work, I focus on evaluating the physical commonsense reasoning ability of unimodal and multimodal models from text-based tasks under multimodal input. I specifically compare the performance of a text-only language model with a multimodal vision-language model and investigate (a) whether the multiple modalities of input in pretraining the multimodal model can have comparable performance to a text-specialized model, and (b) whether the supplementation of relevant image data at inference time boosts the performance of the multimodal model, compared to a previously text-only input.</p> <p>Intuitively, vision data should benefit the physical commonsense reasoning of a model by providing the inputs the additional feature of a physical manifestation. Here, I investigate whether image data truly gives deep learning models an additional dimension of representation to benefit its commonsense reasoning.</p> <h1 id="related-works">Related Works</h1> <p>Several previous works evaluate language models on unimodal text-based commonsense reasoning. A number of common sense benchmarks for LMs exist, evaluating a variety of common sense categories <d-cite key="bisk2019"></d-cite><d-cite key="sap2019"></d-cite>, from which fine-tuning has shown to improve general commonsense capabilities of state-of-the-art models. Li et al., 2021 <d-cite key="li2021"></d-cite> analyzes the performance of the Gopher language model in zero-shot and few-shot learning with varying model sizes, finding that their LM performed relatively well in physical common sense, but worse in other common sense categories. Zhao et al., 2023 <d-cite key="zhao2023"></d-cite> investigates the downstream impact of LLMs’ “commonsense model” of the world on robot task generation; they find that using the LLM commonsense knowledge as a heuristic policy achieves better-reasoned decision-making, implying that LLM common sense can empirically benefit physical interaction with the world.</p> <p>The intersection between text and vision in models has also been explored in several works, though not in the context of commonsense reasoning. For example, text-to-image models have shown significantly greater improvement in improving &amp; expanding the text encoder as opposed to a similar increase in size of the image diffusion model <d-cite key="saharia2022"></d-cite>, showing the effect of powerful text embeddings in image generation. Common sense benchmarks with multimodal inputs have also been created and demonstrate an increase in multimodal model performance after fine-tuning <d-cite key="yu2022pacs"></d-cite>, but they don’t investigate the empirical effect or the representational differences between utilizing additional modalities of data versus not.</p> <p>More generally, the effect of additional modalities of data on downstream performance is studied in Xue et al. 2022 <d-cite key="xue2023modality"></d-cite> in the context of crossmodal knowledge distillation, where they conclude that multimodal input isn’t unconditionally beneficial; multimodal teacher models maybe perform better than unimodal teachers, but students of multimodal teachers tend to perform better than those of unimodal teachers.</p> <h1 id="methods">Methods</h1> <h2 id="commonsense-benchmarks">Commonsense Benchmarks</h2> <p>It’s important to note that there are many distinguishing categories of commonsense knowledge. Physical common sense (e.g., a ball rolls down an incline instead of remaining still), social common sense (e.g., shouting at a person may incite fear), temporal common sense (e.g., pan-frying chicken takes longer than oven-roasting one), and numerical/logical common sense (e.g., basic arithmetic) are a few examples that all require different modalities of reasoning and may favor some models &amp; architectures over others. Here I focus on physical common sense, since intuitively vision data may influence a model’s physical knowledge the most.</p> <p>Commonsense benchmarks can be further categorized into (a) multiple-choice evaluation, where given a short background prompt, a model must select the most reasonable option or continuation from a set of given options, and (b) generative evaluation, where a model must generate an answer or continuation to the prompt. Here, I will focus on multiple-choice evaluation, as multiple-choice benchmarks provide a more concrete and reliable metric for determining similarity to “human” judgment. To evaluate the commonsense performance of both the unimodal and multimodal models, the HellaSwag benchmark is used.</p> <h2 id="hellaswag">HellaSwag</h2> <p>The HellaSwag benchmark <d-cite key="zellers2019"></d-cite> is designed to evaluate physical, grounded, and temporal common sense. Given a few-sentence-long story or prompt, the model must choose the correct continuation from four choices. The prompts are generated from (a) ActivityNet <d-cite key="caba2015activitynet"></d-cite>, a large-scale video benchmark for evaluating Human Activity Understanding containing annotations for segments of YouTube videos, and (b) WikiHow <d-cite key="koupaee2018wikihow"></d-cite>, a large-scale text summarization dataset. Data splits are provided by the HellaSwag benchmark, but only the train and validation splits are used here, as the test set labels are not public.</p> <p>Here, for evaluating the multimodal model, I use only the entries generated from ActivityNet, as each ActivityNet prompt has an associated source ID from which the original source video may be accessed. From the video, image data can be scraped to augment the multimodal model’s fine-tuning and inference. The image data generation process is described in more detail in a following section.</p> <p>Due to resource and time constraints, only a subset of this data was used for training and evaluation. Given the large size of the original HellaSwag benchmark, the sampled subset of the original data contains 10% of the original data. Each datum within the sampled dataset is sampled randomly from the original train/validation set, and each prompt within the sampled dataset is verified to have a publicly available video associated with it, i.e., the associated YouTube video is not private or deleted. Implications of this limitation are discussed further in the Limitations section below.</p> <div> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Figure 1. Example prompts from the HellaSwag benchmark. Image sourced from the original HellaSwag paper <d-cite key="zellers2019"></d-cite>. </div> </div> <h2 id="text-only-language-model">Text-Only Language Model</h2> <h3 id="roberta">RoBERTa</h3> <p>RoBERTa <d-cite key="liu2019roberta"></d-cite> is used for evaluation of text-only models for physical commonsense reasoning. I use RoBERTa for its established high performance in general natural language processing tasks while being relatively compact and fast to run. A dropout layer and a linear classification head is used with the RoBERTa model to generate the logits for label classification of each prompt.</p> <h2 id="vision-text-multimodal-model">Vision-Text Multimodal Model</h2> <h3 id="clip">CLIP</h3> <p>The CLIP (Contrastive Language-Image Pre-Training) model is a multimodal vision and language model <d-cite key="radford2021clip"></d-cite>. It was introduced as a novel, simplified pretraining strategy utilizing the large amounts of public available data from the Internet to form (image, text) pairs, demonstrating high performance in downstream tasks such as OCR, geo-localization, and action recognition. Since CLIP utilizes natural language processing methods for processing text and image captions, it also boasts an impressive language model, making the model useful for both unimodal and multimodal tasks.</p> <p>In the experiments described below, the multimodal model is compared to the unimodal model via text sequence classification and text + vision sequence classification for determining the most likely ending to each HellaSwag prompt, so high baseline performance in both of these tasks is an essential starting point, which CLIP provides. Like for the RoBERTa model, a dropout layer and a linear classification head is used in conjunction with CLIP to perform the label classification for each prompt.</p> <h3 id="image-data-generation">Image Data Generation</h3> <p>To collect the supplementary vision data for fine-tuning and evaluating the multimodal model, an additional scraping script is used to collect the relevant image data for each HellaSwag prompt. As described before, each prompt in the HellaSwag benchmark is generated from an associated ActivityNet prompt. Each ActivityNet prompt contains a source ID for the corresponding YouTube video, as well as a time segment containing the start and end time (in seconds) for the relevant video annotation. Using this information, each text prompt can be supplemented with an additional image prompt via a frame from the corresponding YouTube video.</p> <p>A custom script is used to access each prompt’s corresponding YouTube video and scrape image data. The script works as follows:</p> <ol> <li>From a HellaSwag entry, obtain the source ID for the corresponding ActivityNet entry.</li> <li>From the ActivityNet entry, obtain the YouTube video source ID (to be used directly in the YouTube URL) and the time segment indicating the start/end time of the annotated clip.</li> <li>Download a low-resolution copy of the YouTube video via accessing the URL <code class="language-plaintext highlighter-rouge">https://www.youtube.com/watch?v={source_id}</code>. Here, we download the 144p resolution copy of each video.</li> <li>Capture a single selected frame from the video data. Note: the selected frame is determined by calculating the average between the video clip’s start and end time, then scraping the frame of the video at that timestamp. Implications of this frame selection are described in more detail in the Limitations section below.</li> <li>Save the frame as image data for multimodal fine-tuning.</li> </ol> <p>This pipeline is used on the (sampled) HellaSwag train, validation, and test sets so that image data is available for both fine-tuning of the multimodal model, as well as inference for evaluation.</p> <h1 id="experiments">Experiments</h1> <h2 id="data">Data</h2> <p>For fine-tuning and evaluation of the unimodal and multimodal models, a subset of the HellaSwag dataset is used, as already described above. Further summary of the sampled dataset can be found in Table 1.</p> <p>To prepare the data for Multiple Choice Classification, the data from each prompt must be preprocessed as follows. Each prompt in the HellaSwag dataset is broken into three components: <code class="language-plaintext highlighter-rouge">ctx_a</code>, which contains the first sentence(s) of the prompt, <code class="language-plaintext highlighter-rouge">ctx_b</code>, which contains the initial few words of the final sentence, and four <code class="language-plaintext highlighter-rouge">ending</code>s all stemming from the same <code class="language-plaintext highlighter-rouge">ctx_a</code> and <code class="language-plaintext highlighter-rouge">ctx_b</code> but each with different conclusions. This particular formatting of the data is important for the RoBERTa tokenizer, where each sequence within an inputted text pair must be a complete sentence. Each prompt then generates four text pairs of the form <code class="language-plaintext highlighter-rouge">(ctx_a, ctx_b + ending_i)</code> for each of the four endings. This allows for the multiple choice classification head to compute the most likely of the four endings, given the same context <code class="language-plaintext highlighter-rouge">ctx_a</code>, <code class="language-plaintext highlighter-rouge">ctx_b</code>.</p> <div> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag_summary.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Table 1. Summary of sampled HellaSwag dataset. </div> </div> <h2 id="setup">Setup</h2> <p>The architecture of neither the RoBERTa nor CLIP are designed for sequence or multiple choice classification, so a separate linear classification head follows each of the unimodal RoBERTa, unimodal CLIP, and multimodal CLIP models.</p> <p><strong>Text-only fine-tuning</strong>: The training and validation sets for fine-tuning are formatted and preprocessed as described above. To adjust the weights of the classifier and the core embedding model, each model is fine-tuned on the HellaSwag training data and evaluated during training on the validation data for 20 epochs. Since only the text prompt is inputted to CLIP here, only the CLIP text embedding is used for classification.</p> <p><strong>Text-image fine-tuning</strong>: To fine-tune the multimodal CLIP model, the original training and validation datasets are augmented by adding each prompt’s relevant corresponding image data (from the process described in the Image Data Generation section). The multimodal model is then fine-tuned on both the text prompts as before and the relevant image data simultaneously. With both text and image input, CLIP outputs a combined text-image embedding that is used for the classification head, instead of the text-only embedding from before.</p> <p>After fine-tuning, each model is evaluated on the withheld HellaSwag test dataset for classification accuracy. For both the text-only and text-image fine-tuning, I perform three total repetitions for each model and average the results in Figure 1.</p> <div> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/accuracy-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/accuracy-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/accuracy-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/accuracy.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Figure 2. Accuracy results for each model, averaged across three runs. </div> </div> <h2 id="results">Results</h2> <p>As shown in the accuracy results, the RoBERTa model performs the best, while the unimodal CLIP model performs worse, and the multimodal CLIP model only slightly better than the unimodal CLIP but still marginally worse than RoBERTa. RoBERTa likekly performs so well because of its generally high performance in other text-based tasks, and its bidirectional contextual embeddings allow for evaluation of a prompt/ending holistically. In this setup, the supplementary image data did not provide any significant empirical improvement to the multimodal model, as shown by the insignificant improvement in downstream performance when comparing the text-only to text-image CLIP models.</p> <p>However, I attempt to provide an explanation for this shortcoming through further investigation of the supplementary images. Below, I display the class activation map of the image data from a particular prompt to attempt to visualize why the additional modality of data had little effect on the classification distinguishability across the four endings of the prompt. Figure 2 shows the image (which is the same for all four endings) and the individual image attention masks generated from each ending corresponding to the following context: <code class="language-plaintext highlighter-rouge">A lady named linda, creator of paint along is demonstrating how to do an acrylic painting. She starts with a one inch flat brush and yellow and white acrylic paint. she ...</code></p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint0-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint0.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_paint3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Figure 3. The class activation maps for the following prompt: "A lady named linda, creator of paint along is demonstrating how to do an acrylic painting. She starts with a one inch flat brush and yellow and white acrylic paint. she ..." and the following endings: (1) "... then paints about six shades of purple color on the paper." (2) "... makes x patterns across the canvas with the yellow color." (3) "... puts on a cream colored coat and begins painting the white wooden fence without a paint shaker." (4) "... powders the paint in an electric flat brush and applies it over the wall." The image caption that generated the shown attention masks is the full sentence pair, i.e., context + ending </div> <p>Notice that across all four prompt/ending pairs, CLIP attends primarily to the same location on the image. While the image data might enrich the model’s representation of the prompt itself, the similarity across the generated attention masks demonstrates that the image doesn’t serve to distinguish the endings from each other and, therefore, has little effect in influencing the likelihood of any particular ending from being more likely. In this setup, the text embedding alone determines the classifier output, and the lack of image distinguishing power provides some explanation for the similarity in downstream performance between the unimodal and multimodal CLIP models.</p> <p>However, it’s possible that the attention masks were only so similar because all endings were prepended by the same exact context. In the case of Figure 2, the context describes an interaction with the painting, so it may be natural for all attention masks to focus on the painting, regardless of the conclusion of the ending. What if we restrict the image caption to contain only the final sentence (<code class="language-plaintext highlighter-rouge">ctx_b</code> + <code class="language-plaintext highlighter-rouge">ending</code>)? Figure 3 displays the class activation maps for this setup (though, not from an additional CLIP model fine-tuned on this image caption setup).</p> <div class="row mt-3"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end0-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end0-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end0-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end0.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/linda_end3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="caption"> Figure 4. The class activation maps for the endings above, without the prepended context. </div> <p>We see that using the final sentence without the preceding context generates more varied attention masks, so does this unconditionally allow for more diversity in the image/common sense representation in the joint text/image embedding? I claim that the answer is no; having the entire context for analysis is fundamental for common sense reasoning, so removing a significant portion of the context promotes greater ambiguity in both the intent of the prompt/image caption and the benefit of the attention mask. Using only the final sentence may produce more varied results in the image attention mask, but this may potentially be more detrimental than beneficial by attending to an irrelevant portion of the image that may detract from the commonsense ground truth answer.</p> <p>Further investigation into different formulations of the image caption with respect to the original prompt in this manner may result in truly richer representations and more meaningful results for downstream model performance.</p> <h1 id="conclusion">Conclusion</h1> <p>In this work, I compare the physical commonsense reasoning capbility of a text-only language model with a multimodal vision-language model and evaluate whether the multiple modalities of input in pretraining the multimodal model can have comparable performance to a text-specialized model, and whether the addition of relevant image data for inference boosts the performance of the multimodal model. I find that, within the proposed experimental setup, the effects of image data supplementation are insignificant, though I provide a potential explanation for this unintuitive result via class activation maps of the multimodal model’s image attention data; alternative formulations for this text-image data augmentation may provide better and more intuitive results. Overall, I provide an empirical experimental pipeline and analysis for potential factors toward further artifical intelligence models’ physical commonsense reasoning, and their internal representations of the world.</p> <h2 id="ethical-implications">Ethical Implications</h2> <p>It’s also important to note the ethical considerations of “improving” the commonsense reasoning capabilities of deep learning models. Converging on a universally-accepted definition of common sense is utopian, so the interpretation of common sense evaluation must be constantly scrutinized. The biases and malicious elements of a model’s knowledge base must be investigated to ensure that fine-tuning on common sense benchmarks are not further accumulated and embedded into the model. Physical common sense is relatively simple for finding a ground truth answer or natural continuation, but for social common sense, for instance, what a model “should” predict for a particular situation or prompt is much more ambiguous.</p> <h2 id="limitations">Limitations</h2> <p>The implementation and constraints of this work imply some limitations. One evident limitation is the size of both the benchmark dataset and the models used. Evaluating uni- and multimodal models on the full HellaSwag benchmark, including all of both ActivityNet and WikiHow entries, may conclude in slightly different results. Furthermore, newer and bigger models for both text and vision-text models exist; for example, if evaluation is extended to generative prompt evaluation, the recently released GPT4 model <d-cite key="openai2023gpt4"></d-cite> can be evaluated in this framework given its original text-only functionality and its new image input capabilities.</p> <p>On the topic of generative prompt evaluation, this work only uses multiple-choice prompts for the simplicity and clarity of its evaluation results. However, generative prompts may more closely reflect human-generated responses and may be more representative of multimodal capabilities. Finally, making progress toward a more general-purpose intelligent system means extending the common sense evaluation to more categories than physical. Designing a more comprehensive multimodal model for common sense requires evaluation on all modalities of common sense, and will likely also require additional modalities of input data (e.g., audio cues for better social common sense performance).</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-multimodal-commonsense.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/neural-PDEs-long-time-dynamics/index.html b/blog/2023/neural-PDEs-long-time-dynamics/index.html
new file mode 100644
index 00000000..ccd59d18
--- /dev/null
+++ b/blog/2023/neural-PDEs-long-time-dynamics/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Neural PDEs for learning local dynamics and longer temporal rollouts | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="6.S898 deep learning project"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/neural-PDEs-long-time-dynamics/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Neural PDEs for learning local dynamics and longer temporal rollouts",
+      "description": "6.S898 deep learning project",
+      "published": "November 5, 2023",
+      "authors": [
+        {
+          "author": "Pengfei Cai",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Neural PDEs for learning local dynamics and longer temporal rollouts</h1> <p>6.S898 deep learning project</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#partial-differential-equations">Partial differential equations</a></div> <div><a href="#base-model-u-net">Base model (U-Net)</a></div> <div><a href="#fourier-neural-operators">Fourier Neural Operators</a></div> <div><a href="#improving-accuracies-in-predicting-local-dynamics-and-long-term-rollouts-in-time-dependent-pdes">Improving accuracies in predicting local dynamics and long-term rollouts in time-dependent PDEs</a></div> <div><a href="#introducing-large-kernel-attention">Introducing Large Kernel Attention</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="partial-differential-equations">Partial differential equations</h2> <p>At the continuum level, spatiotemporal physical phenomena such as reaction-diffusion processes and wave propagations can be described by partial differential equations (PDEs). By modeling PDEs, we can understand the complex dynamics of and relationships between parameters across space and time. However, PDEs usually do not have analytical solutions and are often solved numerically using methods such as the finite difference, finite volume, and finite element methods <d-cite key="LoggMardalEtAl2012"></d-cite>. For example, the finite element method (FEM) approximates PDE solutions by first discretizing a sample domain into a mesh of interconnected elements and then solving a system of equations iteratively given a set of boundary conditions, initial conditions, and material properties.</p> <p>In this blog, we will show two examples of PDEs, one of which is the Navier-Stokes equation which describes the dynamics of viscous fluids. The equation below shows the 2D Navier-Stokes equation for a viscous and incompressible fluid in vorticity form on a unit torus, where \(w\) is the vorticity, \(u\) the velocity field, \(\nu\) the viscosity coefficient, and \(f(x)\) is the forcing function. The solution data were from the original paper<d-cite key="li2020fourier"></d-cite> where the problem, with a periodic boundary condition, was solved with a pseudospectral method using a 1e-4 time step with the Crank-Nicolson scheme.</p> \[\begin{gather} \partial_t w(x, t) + u(x, t) \cdot \nabla w(x, t) = \nu \Delta w(x, t) + f(x), \quad x \in (0,1)^2, t \in [0,T] \\ \nabla \cdot u(x, t) = 0, \quad x \in (0,1)^2, t \in [0,T] \\ w(x, 0) = w_0(x), \quad x \in (0,1)^2 \end{gather}\] <p>We can visualize the 2D PDE solution over the 50 time steps:</p> <div class="l-body-outset" style="display: flex; justify-content: center; align-items: center;"> <iframe src="/staging/assets/html/2023-11-05-neural-PDEs-long-time-dynamics/navierstokes.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <div class="caption"> Solution of 2D Navier-Stokes PDE <d-cite key="li2020fourier"></d-cite> - drag the slider! </div> <h3 id="motivations-for-neural-pdes">Motivations for neural PDEs</h3> <p>Well-established numerical methods are very successful in calculating the solutions of PDEs, however, these methods require high computational costs especially for high spatial and temporal resolutions. Furthermore, it is important to have fast and accurate surrogate models that would target problems that require uncertainty quanitifcation, inverse design, and PDE-constrained optimizations. In recent years, there have been growing interests in neural PDE models that act as a surrogate PDE solver<d-cite key="PDEBench2022"></d-cite>, especially neural operators that aim to learn the mapping between input and output solution functions. These models are trained on numerical solutions from existing methods and inferences are orders of magnitude faster than calculating the solutions again through numerical methods.</p> <p>In this article, I will first examine if we can apply neural networks to learn the dynamics in PDE solutions and therefore replace PDE solvers with a neural PDE as the surrogate solver. We will start with a base U-Net model with convolutional layers. Next, I will examine the neural operator methods, notably the Fourier Neural Operator (FNO). Primarily, the Fourier neural operator has proven to predict well for PDE solutions and we will use it to compare with the U-Net model on the representations learnt in the Fourier layers. Next, I will examine the FNO’s performance on another PDE with two dependent states. We will notice that the FNO is capable of learning lower frequency modes but fail to learn local dynamics and higher frequency modes. We then finally introduce some improvements to the FNO to tackle this problem involving local dynamics and long term rollout errors.</p> <h3 id="dataset-and-training-schemes-for-the-2d-navier-stokes-pde">Dataset and training schemes for the 2D Navier-Stokes PDE</h3> <p>For the dataset, I will start with the 2D time-dependent Navier-Stokes solution (\(\nu\) = 1e-3) that was shipped from Zongyi Li et al’s paper <d-cite key="li2020fourier"></d-cite>. The problem for any given model would then be to learn the mapping from an input solution (vorticity) of t=[0,10] to the solution of t=(10, 40]. For all models involving Navier-Stokes, the original implementations were used, but implementations were improved or new ones were added for the second PDE problem which more details will be shared in later parts of the article. We use 1000 solutions for training and 200 for the test dataset. The models are trained with 500 epochs with an initial learning rate of 0.001, the AdamW optimizer is used with a cosine annealing scheduler. Unless otherwise specified, a relative L2 loss is used for training and prediction of each data batch. For U-Net and FNO2D, the models use 2D convolutions in the spatial domain and recurrently predict through the time domain (autoregressive training). For FNO3D, the time domain is included as the 3rd dimension in the input data for the FNO to learn both spatial and temporal dependencies within the solutions.</p> <h2 id="base-model-u-net">Base model (U-Net)</h2> <p>Let’s begin with examining whether a U-Net with convolutional layers can be used to learn the dynamics. U-Net<d-cite key="RonnebergerFB15"></d-cite> is a popular model architecture for image to image predictions and image segmentation tasks. It consists of a series of downsampling and upsampling layers with skip connections, and my re-implementation is based on <a href="https://github.com/khassibi/fourier-neural-operator/blob/main/UNet.py">this repo</a>.</p> <p>We can use the U-Net to learn the features from the input PDE solution frames and predict the solution in the next time step, treating the 2D solution as an image. As for the time component, the surrogate model takes the input solution from the previous k time steps to predict solution in the next k+1 time step. Then, the solution from the previous k-1 steps are concatenated with the predicted solution as the input back into the model to predict the next step, and so on. In a nutshell, the model is trained to predict autoregressively.</p> <div style="text-align: center; margin-right: 10px;"> <div style="width: 70%; margin: auto;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_train_test_loss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <p style="margin-top: 0px;">Training curve for U-Net with average relative L2 train and test loss</p> </div> <div style="text-align: center; margin-left: 10px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_2dt_nspred42.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_2dt_nspred42.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_2dt_nspred42.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/unet_2dt_nspred42.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">U-Net's prediction of 2D Navier-Stokes for unseen test set (id=42)</p> </div> <p>The U-Net seems to predict well for the 2D Navier-Stokes test set. However, the average final test loss of 0.0153 is still considerably high. For longer time rollout, the errors can accumulate. Let’s examine the FNO2d-t and FNO3d models next.</p> <h2 id="fourier-neural-operators">Fourier Neural Operators</h2> <p>Fourier neural operators (FNOs) <d-cite key="li2020fourier"></d-cite> try to learn the mapping between input functions and solution functions <d-cite key="kovachki2021neural"></d-cite>, for example, mapping the solutions from earlier to later time steps for time-dependent PDEs.</p> <p>The authors introduced the Fourier layer (SpectralConv2d for FNO2d) which functions as a convolution operator in the Fourier space, and complex weights are optimized in these layers. The input functions are transformed to the frequency domain by performing fast Fourier transforms (torch.fft) and the output functions are then inverse transformed back to the physical space before they are passed through nonlinear activation functions (GeLU) to learn nonlinearity. Fourier transformations are widely used in scientific and engineering applications, such as in signal processing and filtering, where a signal / function is decomposed into its constituent frequencies. In the FNO, the number of Fourier modes is a hyperparameter of the model - the Fourier series up till the Fourier modes are kept (i.e. lower frequency modes are learnt) while higher frequency modes are truncated away. Notably, since the operator kernels are trained in the frequency domain, the model is theoretically capable of predicting solutions that are resolution-invariant.</p> <h3 id="applying-fno2d-and-fno3d-on-2d-navier-stokes-time-dependent-pde">Applying FNO2D and FNO3D on 2D Navier-Stokes time-dependent PDE</h3> <p>We reimplement and train the FNO2D model on the same train-test data splits for the 2D Navier-Stokes solution. Notably, the final average relative L2 loss (for test set) is 0.00602 after 500 epochs of training. Comparing this with the U-Net that is also trained and predicted with the same scheme, the FNO2D has an improved performance!</p> <div style="text-align: center; margin-left: 10px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/2dt_nspred42.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/2dt_nspred42.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/2dt_nspred42.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/2dt_nspred42.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">FNO2D's prediction of 2D Navier-Stokes for unseen test set (id=42)</p> </div> <p>The predicted solutions look impressive and it seems like the dynamics of the multiscale system are learnt well, particularly the global dynamics. Likewise, the FNO3D gives similar results. Instead of just convolutions over the 2D spatial domains, the time-domain is taken in for convolutions in the Fourier space as well. According to the authors, they find that the FNO3D gives better performance than the FNO2D for time-dependent PDEs. However, it uses way more parameters (6560681) compared to FNO2D (928661 parameters) - perhaps the FNO2D with recurrent time is sufficient for most problems.</p> <div style="text-align: center; margin-right: 10px;"> <div style="width: 70%; margin: auto;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/train_test_loss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <p style="margin-top: 0px;">Training curve for FNO3D with average relative L2 train and test loss</p> </div> <div style="text-align: center; margin-left: 10px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/nspred42.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/nspred42.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/nspred42.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/nspred42.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">FNO3D's prediction of 2D Navier-Stokes for unseen test set (id=42)</p> </div> <h3 id="representation-learning-in-the-fourier-layers">Representation learning in the Fourier layers</h3> <p>You might be curious how the Fourier layers learn the Navier-Stokes dynamics - let’s examine some weights in the SpectralConv3d layers (for the FNO3D). We take the magnitudes of the complex weights from a slice of each layer (4 Fourier layers were in the model).</p> <div style="text-align: center; margin-right: 10px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fourierlayers.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">Visualizing weights in the Fourier layers</p> </div> <p>There seems to be some global features that are learnt in these weights. By learning in the Fourier space, the Fourier layers capture sinusoidal functions that can generalise better for dynamics according to the dynamical system’s decomposed frequency modes. For CNNs, we know that the convolutions in spatial domain would lead to the learning of more local features (such as edges of different shapes), as compared to more global features learnt in Fourier layers.</p> <h3 id="on-the-importance-of-positional-embeddings">On the importance of positional embeddings</h3> <p>In FNO implementations, besides the input data for the 2D + time domains, the authors also append positional encodings for both x and y dimensions so the model knows the location of each point in the 2D grid. The concatenated data (shape = (B, x, y, 12)) is then passed through the Fourier layers and so on (note: B is the batch size, x and y the spatial sizes, and 12 consists of 10 t steps and 2 channels for positional encodings along x and y). It is important to understand that the positional embedding is very important to the model performance.</p> <div style="display: flex; justify-content: center; align-items: center;"> <div style="text-align: center; margin-right: 10px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/show_dxdt.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">Original with positional encoding</p> </div> <div style="text-align: center; margin-left: 10px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/noposencoding_dxdt.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">No positional encoding</p> </div> </div> <p>We train the same FNO3D on the same data but this time without the positional encodings concatenated as the input. Simply removing these positional encodings for x and y domains cause the model to underperform. Here, we are comparing between FNO3D with and without positional encoding. FNO3D has a final relative test loss of 0.0106 but the test loss is 0.0167 without positional encodings. Inspecting the change of x over t for a sample test dataset, it then becomes more visible the differences in performances. Note that we also observe the data have well-defined sinusoidal functions in the dynamics.</p> <h2 id="improving-accuracies-in-predicting-local-dynamics-and-long-term-rollouts-in-time-dependent-pdes">Improving accuracies in predicting local dynamics and long-term rollouts in time-dependent PDEs</h2> <p>Let’s apply the FNO to other PDEs, particularly problems where local dynamics and long-term accuracies are important. Here, I introduce another PDE as an example - a coupled reaction heat-diffusion PDE with two dependent states<d-cite key="Robertson2018"></d-cite>.</p> \[\begin{gather} \kappa \frac{\partial^2 T}{\partial x^2} + \rho H_r \frac{\partial \alpha}{\partial t} = \rho C_p \frac{\partial T}{\partial t} \\ \frac{\partial \alpha}{\partial t} = A \exp \left( -\frac{E}{RT} \right) f(\alpha) \end{gather}\] <p>Based on the initial conditions of temperature (T) and degree of cure (alpha) and with Dirichlet boundary conditions on one end of the sample, the T and alpha propagate across the domain (here, the 1D case is examined). For certain material parameters and when initial conditions of T and alpha are varied, we can see that the dynamics can become chaotic after some time - we can visualize it below.</p> <div class="l-body-outset"> <iframe src="/staging/assets/html/2023-11-05-neural-PDEs-long-time-dynamics/unstablefromp.html" frameborder="0" scrolling="no" height="750px" width="100%"></iframe> </div> <div class="caption"> Solution of the above coupled PDE with 2 dependent states, solved using FEM. Drag the slider! </div> <p>For this dataset, we aim to use the first 10 time steps of the solution (heat diffusion from x=0) as input to a neural PDE to predict the next N time steps of the solution. With 10 steps, we predict the 11th step and the prediction is concatenated with the last 9 steps to predict the next time step and so on. We first generate the training data by solving the PDE numerically using the Finite Element Method using the FEniCS package. Specifically, we use mixed finite elements with the continuous Galerkin scheme and a nonlinear solver with an algebraic multigrid preconditioner.</p> <p>We use 1228 solutions for the training set and 308 solutions for the test set. The datasets are split into pairs of 10 trajectories, whereby the input data consists the solution of 10 time steps and the output data (to be predicted) consists the solution of the next 10 time steps. Since the neural PDE is trained to predict 10 to 1 time step, every batch is trained autoregressively and an L2 loss is taken for all 10 forward predictions before the sum is backpropagated in every batch. Likewise, the AdamW optimizer is used with an initial learning rate of 1e-4 and a cosine annealing scheduler. The models are trained for 300 epochs with a batch size of 16.</p> <p>I initially tried the FNO1D implementation on my PDE dataset and notice that the errors accummulate with longer time rollouts using the trained model. FNO1D is used since we only have 1 spatial dimension in the 1D solution and the solutions are predicted recurrently, just like the use of FNO2D for the 2D Navier-Stokes example earlier. The FNO2D model was also used to convolve over both x and t. Both performances are not ideal within 1 cycle of forward prediction.</p> <h3 id="revin-and-other-training-tricks-to-improve-accuracies-in-longer-temporal-rollout">RevIN and other training tricks to improve accuracies in longer temporal rollout</h3> <p>To overcome this problem, there have been attempts to generally improve the accuracies of neural PDE models and also training tricks proposed to improve long-term accuracies in rollout. Using the FNO1D, I first tested out some training tricks, such as the pushforward and temporal bundling which are covered in the paper on message passing neural PDEs<d-cite key="brandstetter2022message"></d-cite>. Incorporating the reversible instance normalization layer (RevIN)<d-cite key="kim2022reversible"></d-cite> gives a more promising improvement. With ReVIN, the input solution is passed through the normalizing RevIN layer before the FNO1d and the output solution is denormalized through the RevIN layer. Some examples of these tricks for longer term robust forecasting are covered in <a href="https://github.com/Rui1521/Symmetry-Tutorial/blob/main/Tutorial_Symmetry.ipynb">this repo</a>.</p> <p>Using a trained FNO1D with a RevIN layer, here is its prediction on an unseen test set starting from the first 10 time steps as the input solution. The true solution is used to predict up till 50 more time steps forward (5 full cycles forward). While the temperature is predicted with decent accuracies for first cycle (10 steps forward until t=60 shown), the errors accumulate over more steps.</p> <div style="text-align: center; margin-left: 10px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt156.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt156.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt156.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt156.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">FNO1d's prediction (1)</p> </div> <p>Generally, we attribute this to the fact that the Fourier layers may not be able to learn more local changes in the dynamics since the higher frequency modes in the Fourier series are truncated away. The global dynamics of the propagating front (heat diffusion along x) are captured reasonably well (the positional encodings probably also have a large part to play). We want to build on the FNO to improve predictions for longer temporal rollout especially for multiscale dynamical systems with both global and local changes. Ideally, we want to take an input of a few time steps from a more expensive numerical solver and pass it through a trained surrogate model to predict N time steps (with N being as high as possible).</p> <h2 id="introducing-large-kernel-attention">Introducing Large Kernel Attention</h2> <p>To overcome the problems highlighted for this PDE, we attempt to include a large kernel attention layer (LKA) that was introduced in the Visual Attention Network paper<d-cite key="guo2022visual"></d-cite> by Meng-Hao Guo et. al. The large kernel attention was first introduced as an alternative to the Vision Transformers (ViT) to enable higher spatial adapativity and long-range correlations. While simple to implement, the authors’ Visual Attention Network surpasses ViTs and CNNs on tasks such as object detection and pose estimation. A similar strategy was introduced last month<d-cite key="zhao2023local"></d-cite> for 2D problems although their implementation was not shared.</p> <p>Therefore, it may be feasible to introduce attention mechanisms to learn local dynamics in PDEs better, and this can complement the Fourier layers which capture global dynamics better. Herein, we add the LKA layers after the Fourier blocks for the FNO1D, and the new model has 5056 more parameters (583425 to 588481). The performance is found to have greatly improved, especially for local dynamics in the unstable propagations.</p> <div style="text-align: center; margin-left: 10px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt156.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt156.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt156.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt156.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">FNO1d + LKA's prediction (1)</p> </div> <p>For the same data, the addition of LKA gave improved accuracies over predictions in the next 50 time steps. We attribute this to the large kernel attention’s ability to focus on local dynamics at specific parts of the spatiotemporal changes. The LKA has 3 components: a spatial depth-wise convolution, a spatial depth-wise dilation long-range convolution, and a channel convolution.</p> \[\begin{gather} \text{Attention} = \text{Conv}_{1 \times 1}(\text{DW-D-Conv}(\text{DW-Conv}(F))) \\ \text{Output} = \text{Attention} \otimes F \end{gather}\] <p>I adapted from the LKA’s <a href="https://github.com/Visual-Attention-Network/VAN-Classification/blob/main/models/van.py">original implementation</a> to apply to our 1D PDE. Let’s examine the predictions on another test data.</p> <div style="display: flex; justify-content: center; align-items: center;"> <div style="text-align: center; margin-left: 2px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt876.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt876.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt876.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/fnorevin_fromp1dt876.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">FNO1d's prediction (2)</p> </div> <div style="text-align: center; margin-left: 2px;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt876.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt876.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt876.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-05-neural-PDEs-long-time-dynamics/LKAfno_fromp1dt876.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p style="margin-top: 0px;">FNO1d + LKA's prediction (2)</p> </div> </div> <p>While the predictions are significantly improved, the errors still accumulate with longer rollouts and the model fails to capture dynamics if we extend predictions till 100 steps forward. More work is needed to improve existing neural PDE methods before they can be used as foundational models for PDEs.</p> <h2 id="conclusion">Conclusion</h2> <p>In this article, we have introduced the use of neural networks as potential surrogate model solvers for partial differential equations that can be expensive to solve using numerical methods. Compared to the base model U-Net, Fourier neural operators have introduced a novel and useful way of learning PDE solutions through convolutions in the frequency space. We first reimplemented the FNO2D and FNO3D on the 2D Navier-Stokes PDE solution shipped with their paper. While it achieves great performance learning global dynamics, existing models struggle to capture local dynamics (higher frequency modes are truncated away) and longer temporal rollouts. We demonstrate that despite adding a RevIN layer and several temporal training tricks, the FNO1D could not predict accurately the solutions of a coupled time-dependent PDE. With the inclusion of attention mechanism through the large kernel attention, the FNO1D’s performance significantly improved. We learn that introducing spatial attention can be useful and more work will be explored to improve predictions of multiscale spatiotemporal dynamical systems.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-05-neural-PDEs-long-time-dynamics.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/overparameterization/index.html b/blog/2023/overparameterization/index.html
new file mode 100644
index 00000000..d98dc598
--- /dev/null
+++ b/blog/2023/overparameterization/index.html
@@ -0,0 +1,52 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Overparameterization of Neural Networks through Kernel Regression and Gaussian Processes | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="In this work, we will explore the successes of overparameterization of neural networks through evaluating the relationship between the Neural Tangent Kernel (NTK), MLPs, and Gaussian processes."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/overparameterization/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Overparameterization of Neural Networks through Kernel Regression and Gaussian Processes",
+      "description": "In this work, we will explore the successes of overparameterization of neural networks through evaluating the relationship between the Neural Tangent Kernel (NTK), MLPs, and Gaussian processes.",
+      "published": "December 11, 2023",
+      "authors": [
+        {
+          "author": "Albert Einstein",
+          "authorURL": "https://en.wikipedia.org/wiki/Albert_Einstein",
+          "affiliations": [
+            {
+              "name": "IAS, Princeton",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Boris Podolsky",
+          "authorURL": "https://en.wikipedia.org/wiki/Boris_Podolsky",
+          "affiliations": [
+            {
+              "name": "IAS, Princeton",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Nathan Rosen",
+          "authorURL": "https://en.wikipedia.org/wiki/Nathan_Rosen",
+          "affiliations": [
+            {
+              "name": "IAS, Princeton",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Overparameterization of Neural Networks through Kernel Regression and Gaussian Processes</h1> <p>In this work, we will explore the successes of overparameterization of neural networks through evaluating the relationship between the Neural Tangent Kernel (NTK), MLPs, and Gaussian processes.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#equations">Equations</a></div> <div><a href="#images-and-figures">Images and Figures</a></div> <ul> <li><a href="#interactive-figures">Interactive Figures</a></li> </ul><div><a href="#citations">Citations</a></div> <div><a href="#footnotes">Footnotes</a></div> <div><a href="#code-blocks">Code Blocks</a></div> <div><a href="#layouts">Layouts</a></div> <div><a href="#other-typography">Other Typography?</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>In this work, we will explore the successes of overparameterization of neural networks through evaluating the relationship between the Neural Tangent Kernel (NTK), MLPs, and Gaussian processes. Recent work has shown that overparameterized neural networks can perfectly fit the training data yet generalize well enough to test data. This was formalized as “the double descent curve” <d-cite key="belkin2019reconciling"></d-cite>, which suggests that increasing model capacity results in improved performance.</p> <p>To help elucidite our understanding of neural networks as the width increases, I wanted to understand the connections between neural networks, which are often regarded as “black boxes,” and other classes of statistical methods, such as kernels and NNGPs. My goal is to put neural networks in the greater contexts of statistical machine learning methods that are hopefully easier to reason with and interpret.</p> <h3 id="literature-review">Literature Review</h3> <p>There is already prior literature on the connections between these three classes of models.</p> <ol> <li> <p>Kernel Regression $\iff$ MLPs: This connection was introduced in <d-cite key="jacot2018neural"></d-cite>. In particular, they proved that the limit of a neural network as width approaches infinity is equivalent to kernel regression with the Neural Tangent Kernel (NTK).</p> </li> <li> <p>MLP $\iff$ Gaussian Processes: The connection for infinitely-wide one-layer neural networks was introduced in <d-cite key="neal1996priors"></d-cite> and for deep networks in <d-cite key="lee2017deep"></d-cite>. This comes from the observation that if the weights are sampled Gaussian i.i.d., then the Central Limit Theorem states that as the width approaches infinity, the output is also Gaussian. We also went over this briefly in class.</p> </li> <li> <p>Gaussian Processes $\iff$ Kernel Regression: Other than the obvious fact that they both use kernels and the “kernel trick,” I could not really find a resource that established a clear connection between the two other than through the intermediary of MLPs. In this project, this is one link that I will try to explicitly establish.</p> </li> </ol> <p>Other relevant prior works I reviewed include:</p> <ul> <li>The formalization of the double descent curve in <d-cite key="belkin2019reconciling">, which uprooted our previous understanding of the bias-variance tradeoff and the notion that models should not overfit. This also motivates the use of infinite-wide neural networks (extreme overparameterization) for prediction tasks. Otherwise, conventional wisdom would say that these models overfit.</d-cite></li> <li>Why is this problem even interesting? This paper <d-cite key="radhakrishnan2022simple"> shows that kernels achieve competitive performance for important matrix completion tasks, so neural networks are not necessarily the only solution to many tasks of interest.</d-cite></li> <li>The lecture notes from <a href="https://web.mit.edu/modernml/course/">this IAP class</a>. I used some of the notation, definitions, and theorems from the lecture notes to write this post, but I also worked through some of the math on my own (e.g. the overparameterized linear regression proof for general $\eta$ and $w^{(0)}$, proving that $X^\dagger$ minimizes $\ell_2$ norm, etc.).</li> <li>I also used <a href="https://lilianweng.github.io/posts/2022-09-08-ntk/">this blog</a> to better understand the intuition behind NTKs.</li> </ul> <p>The gaps in prior knowledge I want to tackle include (1) the explicit connection between GPs and kernel regression and (2) how sparsity of kernel regression can help explain the generalization abilities of neural networks.</p> <h3 id="my-contributions">My Contributions</h3> <ul> <li>The explicit connections between kernel regression, MLPs, and Gaussian Processes (GP), particularly kernel regression and GP.</li> <li>How properties of overparameterized linear/kernel regression can help us understand overparameterization of neural networks, particularly the regularization of the weights.</li> <li>Empirical demonstrations of the theory developed here.</li> </ul> <p>To start, I work through the math to understand overparameterization in linear regression and connect the results to overparameterization in kernel regression.</p> <h3 id="overparameterization-in-linear-regression">Overparameterization in Linear Regression</h3> <p>Linear regression involves learning a predictor of the form $\hat{f}(x) = wx$, where $w \in \mathbb{R}^{1 \times d}, x \in \mathbb{R}^{d \times 1}$. Much like neural networks, we find $\hat{w}$ by minimizing the Mean Squared Error (MSE) of the prediction $\hat{f}$ from the target $y \in \mathbb{R}$ across all $n$ samples: \(\mathcal{L}(w) = \frac{1}{2}||y - \hat{f}(x)||_2^2\)</p> <p>Without knowing much about the relationship between $n$ and $d$, it is not obvious that there is a closed form solution to this system of equations. Of course, if $n = d$ (and $X$ is full rank), then we can directly solve for $w$. Specifically, if $Y \in \mathbb{R}^{1 \times n}$, $X \in \mathbb{R}^{d \times n}$, $w \in \mathbb{R}^{1 \times d}$, then \(Y = wX \implies w = YX^{-1}.\)</p> <p>What about when $n &lt; d$ (underparameterized regime) or $n &gt; d$ (overparameterized regime)? We need to turn to gradient descent then, \(w^{(t+1)} = w^{(t)} - \eta \nabla_w \mathcal{L}w^{(t)}.\) We can actually explicitly characterize the conditions for convergence and its limit for different values of the learning rate $\eta$ and initialization $w^{(0)}$. Namely, let us start with \(w^{(t+1)} = w^{(t)} - \eta \nabla_w \mathcal{L}(w^{(t)}) = w^{(t+1)} = w^{(t)} - \eta (-(y - w^{(t)}X))X^\top = w^{(t)} + \eta (y - w^{(t)}X)X^\top\) Using this equation, we can derive a closed form expression for $w^{(t)}$. \(\begin{align*} w^{(t+1)} &amp;= w^{(t)} + \eta (y - w^{(t)}X)X^\top = w^{(t)} +\eta yX^\top - \eta w^{(t)} XX^\top = w^{(t)}(I - \eta X X^\top) + \eta y X^\top \\ w^{(1)} &amp;= w^{(0)} (I - \eta XX^\top) + n y X^\top\\ w^{(2)} &amp;= w^{(0)} (I - \eta XX^\top)^2 + n y X^\top(I - \eta XX^\top) + n y X^\top\\ w^{(3)} &amp;= w^{(0)} (I - \eta XX^\top)^3 + n y X^\top(I - \eta XX^\top)^2 + n y X^\top(I - \eta XX^\top) + n y X^\top\\ &amp;\dots\\ \end{align*}\) Let $A = (I - \eta XX^\top)$, $B = nyX^\top$, and $X = U\Sigma V^\top$ be the singular value decomposition of $X$ where $\sigma_1 \geq \dots \geq \sigma_r$ are the non-zero singular values. Then \(\begin{align*} w^{(t)} &amp;= w^{(0)}A^\top + BA^{t-1} + BA^{t-2} + \dots + BA + B = w^{(0)}A^\top + B(A^{t-1} + A^{t-2} + \dots + A + I) = w^{(0)} A^t + (nyX^\top)(UU^\top + U(I - n\Sigma^2)U^\top + \dots + U(I - n\Sigma^2)^{t-1}U^\top) \\ &amp;= w^{(0)} A^t + (nyX^\top)U(I + (I - n\Sigma^2) + \dots + (I - n\Sigma^2)^{t-1})U^\top = w^{(0)}(I - n XX^\top)^t + nyX^\top U\begin{bmatrix} \frac{1 - (1 - \eta\sigma_1^2)^t}{n\sigma_1^2} &amp; &amp; &amp;\\ &amp; \frac{1 - (1 - \eta\sigma_2^2)^t}{n\sigma_2^2} &amp; &amp;\\ &amp; &amp; \ddots &amp; \\ &amp; &amp; &amp; 0 \end{bmatrix}U^\top \end{align*}\\\) From this equation, we can derive many insights into the conditions for convergence. In particular, if we want the RHS to converge, we require $|1 - \eta \sigma_1^2| &lt; 1 \implies -1 &lt; 1 - \eta\sigma_1^2 &lt; 1$. Thus, when $\eta &lt; \frac{2}{\sigma_1^2}$ (which implies $\eta \leq \frac{2}{\sigma_2^2}, \eta \leq \frac{3}{\sigma_3^2}, \dots$), gradient descent for linear regression converges.</p> <p>With this condition on $\eta$, we can further characterize $w^{(\infty)}$. \(\begin{align*} w^{(\infty)} &amp;= \lim_{t \rightarrow \infty} w^{(0)}(I - \eta XX^\top)^t + n yX^\top U \begin{bmatrix} \frac{1}{n\sigma_1^2} &amp; &amp; &amp;\\ &amp; \frac{1}{n\sigma_2^2} &amp; &amp;\\ &amp; &amp; \ddots &amp; \\ &amp; &amp; &amp; 0 \end{bmatrix}U^\top = \lim_{t \rightarrow \infty} w^{(0)}(UU^\top - \eta U \Sigma^2 U^\top)^t + yV\Sigma^\top U^\top U \begin{bmatrix} \frac{1}{\sigma_1^2} &amp; &amp; &amp;\\ &amp; \frac{1}{\sigma_2^2} &amp; &amp;\\ &amp; &amp; \ddots &amp; \\ &amp; &amp; &amp; 0 \end{bmatrix}U^\top \\ &amp;= \lim_{t \rightarrow \infty} w^{(0)}U(I - \eta \Sigma^2)^tU^\top + yV\Sigma^\top \begin{bmatrix} \frac{1}{\sigma_1^2} &amp; &amp; &amp;\\ &amp; \frac{1}{\sigma_2^2} &amp; &amp;\\ &amp; &amp; \ddots &amp; \\ &amp; &amp; &amp; 0 \end{bmatrix}U^\top = w^{(0)}U\begin{bmatrix} 0 &amp; &amp; &amp;\\ &amp; 1 &amp; &amp;\\ &amp; &amp; \ddots &amp; \\ &amp; &amp; &amp; 1 \end{bmatrix}U^\top + yV\begin{bmatrix} \frac{1}{\sigma_1} &amp; &amp; &amp;\\ &amp; \frac{1}{\sigma_2} &amp; &amp;\\ &amp; &amp; \ddots &amp; \\ &amp; &amp; &amp; 0 \end{bmatrix}U^\top =w^{(0)}U\begin{bmatrix} 0 &amp; &amp; &amp;\\ &amp; 1 &amp; &amp;\\ &amp; &amp; \ddots &amp; \\ &amp; &amp; &amp; 1 \end{bmatrix}U^\top + yX^\dagger \\ \end{align*}\) Note the dependency on this result on $w^{(0)}$. If $w^{(0)} = 0$, then $w^{(\infty)} = yX^\dagger$. Furthermore, we can also prove that $w = yX^\dagger$ is the minimum $\ell_2$ solution. Suppose there exists another solution, $\tilde{w}$. If $Xw = X\tilde{w}$, then $\tilde{w} - w \perp w$ because \((\tilde{w} - w)w^\top = (\tilde{w} - w)w^\top = (\tilde{w} - w)(y(X^\top X)^{-1}X^\top)^\top = (\tilde{w}-w)X(X^\top X^{-1})^\top y^\top = 0\) Thus, \(\|\tilde{w}\|_2^2 = \|\tilde{w} - w + w\|_2^2 = \|\tilde{w} - w\|_2^2 + \|w\|_2^2 + 2(\tilde{w}-w)w^\top = \|\tilde{w} - w\|_2^2 + \|w\|_2^2 \geq \|w\|_2^2.\)</p> <p>This characterization is consistent when $n = d$, $n &lt; d$, and $n &gt; d$. If $n = d$, then $X^\dagger = (X^\top X)^{-1} X^\top = X^{-1}(X^{\top})^{-1} X^\top = X^{-1}$. When $n &gt; d$ and the rank of $X$ is $d$, then when $\nabla_w \mathcal{L}(w) = 0$, then $(y-wX)X^\top = 0 \implies w = yX^\top(XX^\top)^{-1}$. $XX^\top \in \mathbb{R}^{d \times d}$ is invertible since $X$ is full rank, so $w = yX^\top(XX^\top)^{-1} =y(X^\top X)^{-1}X^\top = yX^\dagger$.</p> <p>We are particularly interested in the overparameterized regime, i.e. when $n &gt; d$. The results above show that when $w^{(0)} = 0$, even though there are an infinite number of $w$ that satisfy $y = Xw$, gradient descent converges to the minimum $\ell_2$-norm solution, $w = yX^\dagger$. This sparsity may help prevent overparameterization even when there are enough parameters to fully memorize the input data.</p> <p>Why is this analysis helpful? This characterization may help us understand the solution obtained by kernel regression, which can be viewed as just linear regression on a nonlinear, high-dimensional space.</p> <h3 id="overparameterization-in-kernel-regression">Overparameterization in Kernel Regression</h3> <p>We will start with a brief definition of kernel regression. Intuitively, kernel regression is running linear regression after applying a non-linear feature map, $\psi$, onto the datapoints $x \in \mathbb{R}^{d}$. Formally, we require that $\psi: \mathbb{R}^{d} \rightarrow \mathcal{H}$, $w \in \mathcal{H}$, and the predictor $\hat{f}: \mathbb{R}^{d} \rightarrow \mathbb{R}$ to take the form $\langle w, \psi(x)\rangle_{\mathcal{H}}$, where $\mathcal{H}$ is a Hilbert space. A Hilbert space is a complete metric space with an inner product. Intuitively, Hilbert spaces generalize finite-dimensional vector spaces to infinite-dimensional spaces, which is helpful for us because this allows for infinite-dimensional feature maps, an extreme example of overparameterization. All the finite-dimensional inner product spaces that are familiar to us, e.g. $\mathbb{R}^n$ with the usual dot product, are Hilbert spaces.</p> <p>At first glance, it might seem impossible to even store the weights of infinite-dimensional feature maps. However, this problem is resolved by the observation that weights from solving linear regression will always a linear combination of the training samples. In particular, since $yX^\dagger$ has the same span as $X$, we can always rewrite the weights as $w = \sum_{i=1}^n \alpha_i x_i^\top$, where $x_i$ denotes the $i$ th sample. What’s really interesting is that this can be extended to kernels as well.</p> <p>Specifically, for kernel regression, we seek a solution to the MSE problem: \(\mathcal{L}(w) = \|y-\hat{x}\|_2^2 = \|y-\langle w,\psi(x)\rangle\|_2^2.\)</p> <p>We know that the weights must take the following form, \(w = \sum_{i=1}^n \alpha_i \psi(x_i).\)</p> <p>Thus, expanding out the loss function, we have that \(\mathcal{L}(w) = \frac{1}{2}\|y-\langle w, \psi(x)\rangle\|_2^2 = \frac{1}{2}\|y-\langle \sum_{i=1}^n \alpha_i \psi(x_i), \psi(x_i)\rangle\|_2^2 = \frac{1}{2}\sum_{j=1}^n (y_j -\langle \sum_{i=1}^n \alpha_i \psi(x_i), \psi(x_j)\rangle)^2 = \frac{1}{2}\sum_{j=1}^n (y_j -\langle \alpha, \begin{bmatrix} \langle \psi(x_1), \psi(x_j) \rangle \\ \langle \psi(x_2), \psi(x_j) \rangle \\ \vdots \\ \langle \psi(x_n), \psi(x_j) \rangle \\ \end{bmatrix}\rangle)^2.\)</p> <p>Thus, rather than storing the weights $w$ that act on the feature map directly, we just need to store $\alpha$, the weights acting on the samples. Moreover, another observation from this equation is that we don’t even need to define the feature map directly. We only need to store the inner product of each sample with every other sample. Formally, this inner product is called a kernel ($K: \mathbb{R}^d \times \mathbb{R}^d \rightarrow \mathbb{R}$). With a slight abuse of notation, we will also use $K$ to denote the matrix of inner products, $K(X,X)$.</p> <p>Much like our discussion in class on Gaussian Processes (GP), kernels can be thought of as a “distance” or “covariance” function on samples. Some well-known kernels include:</p> <ul> <li>Gaussian kernel: $K(x,\tilde{x}) = \exp(|x - \tilde{x}|_2^2)$</li> <li>Laplacian kernel: $K(x,\tilde{x}) = \exp(|x - \tilde{x}|_2)$</li> <li>Neural Tangent kernel with ReLU activation: $K(x,\tilde{x}) = \frac{1}{\pi}(x^\top \tilde{x}(\pi - \arccos(x^\top \tilde{x})) + \sqrt{1 - (x^\top \tilde{x})^2}) + x^\top \tilde{x}\frac{1}{\pi}(\pi - \arccos(x^\top \tilde{x}))$</li> <li>Linear kernel: $K(x,\tilde{x}) = x^\top \tilde{x}$</li> </ul> <p>The linear kernel is equivalent to linear regression, and (as we will explore later), the Neural Tangent kernel with ReLU activation approximates an infinitely wide neural network with $\phi(z) = \sqrt{2}\max(0,z)$ activation.</p> <p>Note also that all of these kernels, however finite, represent infinite-dimensional feature maps. For example, the feature map for the Gaussian kernel is $\psi(x) = \Big(\sqrt{\frac{(2L)^m}{p_1!p_2!\dots p_d!}}x_1^{p_1}x_2^{p_2}\dots x_d^{p_d}\Big)_{p_1,p_2,\dots,p_d \in \mathbb{N} \cup {0}}.$ It is remarkable that kernel regression even does well in practice considering it works in an extremely over-parameterized regime.</p> <p>However, our analysis using linear regression may shed some light on why. In particular, recall that our loss function is \(\mathcal{L}(w) = \frac{1}{2}\sum_{j=1}^n (y_j - \alpha K(X,X)).\)</p> <p>Since this is just linear regression in $\mathcal{H}$, gradient descent converges to $\alpha = yK^\dagger$ if $\alpha^{(0)} = 0$. This means the predictor for kernel regression looks like \(\hat{f}(x) = \alpha K(X,x) = yK^{\dagger}K(X,x).\)</p> <p>Since $K(X,X)$ is a square matrix, (technically, $n = d$ from the linear regression case), this equation can be solved directly. Moreover, $\alpha$ is the minimum $\mathcal{H}$-norm solution, just like how the weights from the linear regression model is the minimum $\ell_2$-norm solution.</p> <p>The ability to be solved in closed form is an important property of kernel regression. In practice, $\alpha^{(0)}$ cannot be initialized to $0$ in gradient descent, so neural networks do not necessarily converge to the minimum-norm solution that kernels do. This may offer some explanation for the predictive ability of kernels on tabular data.</p> <p>Now, let us formally define the Neural Tangent Kernel. The NTK for a neural network is defined as the outer product of the gradients of the network’s output with respect to its parameters, averaged over the parameter initialization distribution. Formally, if $f(x; w)$ is the output of the network for input $ x $ and parameters $ w $, the NTK is given by:</p> \[K_{\text{NTK}}(x, \tilde{x}) = \mathbb{E}_{w}\left[\left\langle \frac{\partial f(x; w)}{\partial w}, \frac{\partial f(\tilde{x}; w)}{\partial w} \right\rangle\right].\] <p>The intuition for this comes from understanding how parameters change in neural networks during gradient descent.</p> <p>In particular, note that \(\frac{df(x;w)}{dt} = \frac{df(x;w)}{dw} \frac{dw}{dt} \approx \frac{df(x;w)}{dw} (-\nabla_w \mathcal{L}(w)) = -\frac{1}{N}\sum_{i=1}^N \underbrace{\nabla_w f(x;w)^\top \nabla_w f(x_i;w)}_{NTK} \nabla_f\mathcal{L}(f,y_i).\)</p> <p>From this equation, we see that during gradient descent, the network $f$ changes based on its effect on the loss function weighted by the “covariance”/”distance” of $x$ w.r.t. the other samples. The intuition for the NTK thus comes from the way that the neural network evolves during gradient descent.</p> <d-cite key="jacot2018neural"></d-cite> <p>established that training an infinite-width neural network $f(x;w)$ with gradient descent and MSE loss is equivalent to kernel regression where the kernel is the NTK.</p> <p>To further understand the connections between the NTK and wide neural networks, I benchmarked the performance of wide neural networks and the NTK on the task of predicting the effects of a gene knockout on a cell.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig3-1400.webp"/> <img src="/staging/assets/img/2023-12-12-overparameterization/Fig3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 1. Experiment workflow.</p> <p>All the datasets are publicly available on <a href="https://depmap.org/portal/">DepMap</a> and I processed the data the same way as I did in <d-cite key="cai2023synthetic"></d-cite> (but the experimental results I describe here are new). In short, I have 998 cells embedded with a 30,000-dimensional vector of the expression and mutation status of genes in the cell. The target task is to predict the effect of knocking out the gene KRAS in each cell. The “effect” is a scalar that indicates how alive the cell is, with negative values indicating that the cell is more dead and positive values indicating that the cell is more alive.</p> <p>Biological datasets are well-suited for the analysis of overparameterized models because the embeddings are by default extremely high-dimensional, i.e. $d » n$. However, since I want to test the effects of increasing the width of neural networks and I do not want the shape of the weight matrix to be $\lim_{k \rightarrow \infty}\mathbb{R}^{30,000 \times k}$, I reduced the computational complexity of this problem by first running PCA on the cell embedding to reduce $d$ to $500$. Thus, $X \in \mathbb{R}^{998 \times 500}$ and $Y \in \mathbb{R}^{998 \times 1}$. I did a simple 80/20 training/test split on the data, so $X_{train} \in \mathbb{R}^{798 \times 500}$ and $X_{test} \in \mathbb{R}^{200 \times 500}$.</p> <p>I then benchmarked a one hidden layer MLP, i.e. $A\phi(Bx)$ with ReLU activation, where $A \in \mathbb{R}^{k \times 1}, B \in \mathbb{R}^{500 \times k}$, as $k$ ranged from ${10,110,210,\dots,9,910}$. I also benchmarked the NTK on the same task. There are several interesting insights from this experiment.</p> <ol> <li>The NTK always exactly fits the training data by construction because we directly solve the MSE problem.</li> <li>The MSE of a neural network as $k$ increases approaches the MSE of the NTK, which aligns with the theory. However, I want to note that if I shrink $d$, i.e. if I take $d = 10$ or $d=100$, the second point does not always hold. In those cases, the MSE of the NTK is much larger than the MSE of the neural network. That was a bit counterintuitive, but one explanation could be that the NTK is a poor approximation for the neural network in those cases because the neural network cannot be linearized when it is changing so drastically based on the small set of features.</li> <li>The MSE asymptotically decreases as $k \rightarrow \infty$. This aligns with the theory of the double-descent curve. It would be interesting to test if the weights learned by the MLP enforces some sort of sparsity, e.g. by plotting $\frac{|A|_2}{|x|_2}$, where $A,x \in \mathbb{R}^{k \times 1}$ and $x \sim \mathcal{N}(0,I_k)$ (unfortunately, the latter does not have a nice form).</li> </ol> <h3 id="gaussian-processes">Gaussian Processes</h3> <p>Compared to linear and kernel regression, a Gaussian Process (GP) is a much more general class of nonparametric functions. Formally, a Gaussian Process (GP) is a collection of random variables, any finite number of which have a joint Gaussian distribution. A GP can be thought of as a distribution over functions and is fully specified by its mean function $\mu(x)$ and covariance function $K(x, \tilde{x})$, (similar to kernel regression, this is also known as the kernel of the GP).</p> <p>Given a set of points $X = {x_1, x_2, \ldots, x_n}$, the function values at these points under a GP are distributed as:</p> \[\mathbf{f}(X) \sim \mathcal{N}(\mathbf{\mu}(X), K(X, X)),\] <p>where $ \mathbf{\mu}(X) $ is the mean vector and $ K(X, X) $ is the covariance matrix constructed using the kernel function $K$.</p> <p>Key to the concept of Gaussian Processes is the closure of multivariate Gaussians under conditioning and marginalization. Since all the function values are jointly Gaussian, the value of a new function value, given the existing ones, is also Gaussian, e.g. assuming $\mu(X) = 0$,</p> <p>\(f(x_{test}) | f(x_1)\dots f(x_n) = \mathcal{N}(\mu_{test},\Sigma_{test})\) where $\mu_{test}$ = $K(x,X)K(X,X)^{-1}f(X)$ and $\Sigma_{test}$ = $K(x,x) - K(x,X)K(X,X)^{-1}K(x,X)$. (The math for this is a bit tedious, so I omit that here.)</p> <h3 id="connecting-gaussian-processes-kernel-regression-and-mlps">Connecting Gaussian Processes, Kernel Regression, and MLPs</h3> <p>It is interesting to note the similarities between this closed form for the predictor of a Gaussian process and the predictor for kernel regression. In fact, $\mu_{test}$ is exactly the same as $\hat{f}(x)<em>{kernel}$. This suggests GPs parameterize the class of functions drawn from a normal distribution with mean $\mu</em>{test}$ while kernel regression converges to a deterministic function that is exactly $\mu_{test}$. In other words, I think that the function learned by kernel regression can be thought of as the maximum of the posterior distribution of the GP with the same kernel.</p> <p>To test this insight, I ran an experiment to see how similar a Gaussian Process trained on a fixed dataset is to kernel regression with the same kernel.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig4-1400.webp"/> <img src="/staging/assets/img/2023-12-12-overparameterization/Fig4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 2. Results of Gaussian Process Regression and Kernel Ridge Regression on synthetic data with the same kernel function.</p> <p>I sampled $X \sim \mathcal{N}(5,1)$ and $Y \sim \sin(X) + \mathcal{N}(0,0.2)$. I then trained a Gaussian Process and kernel ridge regression on the data with $K(x,\tilde{x}) = -\exp{\frac{|x-\tilde{x}|_2^2}{2}} + Id$. As expected, the function learned by kernel ridge regression closely matches the mean of the class of functions learned by the GP.</p> <p>Another connection between kernel regression and GPs can be made through the introduction of a one hidden layer MLP. See below figure.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig1-1400.webp"/> <img src="/staging/assets/img/2023-12-12-overparameterization/Fig1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 3. Visualization of kernel regression, MLPs, and Gaussian Processes.</p> <p>Starting with kernel regression, if we fix the “feature map,” $B $, then training gradient descent with $A^{(0)} = 0$ is equivalent to training kernel regression with $K(x,\tilde{x}) = \langle \phi(Bx), \phi(Bx) \rangle$. This is intuitive because again, we can just think of kernel regression as linear regression ($A$) after applying a nonlinear feature map, ($\phi \circ B$).</p> <p>The connection between neural networks and Gaussian Processes is a bit more complicated. Suppose we are in the overparameterized regime and $A \in \mathbb{R}^{1 \times k}$ and $B \in \mathbb{R}^{k \times d}$. Forgoing the bias term out of simplity, the output of the network is \(f(x) = A\phi(Bx) = \sum_{i=1}^k A_i\phi(Bx)_i.\) If the weights of the network are sampled i.i.d. Gaussian, then $f(x)$ is a sum of i.i.d. Gaussians and so as $k \rightarrow \infty$, the Central Limit Theorem states that the output of the network will also be Gaussian with some fixed mean and covariance, i.e. in the limit, \(f(x) \sim \mathcal{N}(0,\Sigma)\) \(\begin{bmatrix} f(x_1) \\ f(x_2) \\ \vdots \\ f(x_n) \end{bmatrix} \sim \mathcal{N}(0,K)\)</p> <p>Now, let us compute $K$: \(K(x,\tilde{x}) = \mathbb{E}[f(x)f(\tilde{x})] = \mathbb{E}[A\phi(Bx)A\phi(B\tilde{x})] = \mathbb{E}\Big[\lim_{k \rightarrow \infty}\Big(\sum_{i=1}^k A_i \phi(Bx)_i\Big)\Big(\sum_{i=1}^k A_i \phi(B\tilde{x})_i\Big)\Big]\) Suppose for simplicity that $A \sim \mathcal{N}(0,I)$. Then $\mathbb{E}[A_iA_j] = 0$ and $\mathbb{E}[A_iA_i] = 1$: \(= \mathbb{E}\Big[\lim_{k \rightarrow \infty}\sum_{i=1}^k A_i^2 \phi(Bx)_i\phi(B\tilde{x})_i\Big] = 1 \lim_{k \rightarrow \infty} \sum_{i=1}^k \phi(Bx)_i\phi(B\tilde{x})_i= \underbrace{\lim_{k \rightarrow \infty} \langle \phi(Bx),\phi(B\tilde{x}) \rangle}_{k \times NNGP}.\)</p> <p>The latter is essentially the definition of the Neural Network Gaussian Process, which is the kernel of the Gaussian Process that neural networks converge to when its width goes to infinity. (The NNGP has an extra $\frac{1}{k}$ term to allow the Law of Large Numbers to be used again.)</p> <p>Ultimately, what this shows is that a neural network of infinite width over i.i.d. parameters is the class of Gaussian functions parameterized by the Neural Network Gaussian Process. With gradient descent, neural networks and kernel regression converge to a deterministic function that can be thought of as a sample from a GP.</p> <p>The below figure summarizes my findings on the connections between the three types of function classes:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-overparameterization/Fig2-1400.webp"/> <img src="/staging/assets/img/2023-12-12-overparameterization/Fig2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Figure 4. Comparison of kernel regression, MLPs, and Gaussian Processes.</p> <h1 id="discussion">Discussion</h1> <p>To summarize, these are the implications of the NN-Kernel Regression-GP Connection:</p> <ul> <li>Predictive Distribution: In the infinite-width limit, the predictive distribution of a neural network for a new input $x_{test}$ can be described by a Gaussian distribution with mean and variance determined by the NNGP.</li> <li>Regularization and Generalization: Kernels inherently regularize the function space explored by the network. This regularization is not in the form of an explicit penalty but may arise from the minimum $\mathcal{H}$-norm solution of kernel regression. This may explain the observed generalization capabilities of wide neural networks.</li> <li>Analytical Insights: This correspondence provides a powerful analytical tool to study the learning dynamics of neural networks, which are often difficult to analyze due to their non-linear and high-dimensional nature.</li> </ul> <h3 id="limitations">Limitations</h3> <p>A major limitation of this current work is that I evaluated overparameterized neural networks only through the lens of kernels/GPs. It would be interesting to try to understand the successes of neural networks through other metrics, such as evaluating test risk as width increases. Furthemore, it would also be interesting to characterize what happens when depth, rather than just width, increases. Another interesting next step would be expanding this analysis to understanding overparameterization of other architectures, such as CNNs and transformers, and their connections to kernel regression and Gaussian Processes.</p> <p>Understanding neural networks through the lens of the NTK and Gaussian processes deepens our appreciation of the foundational principles in machine learning. It unifies three seemingly disparate areas: the powerful yet often opaque world of deep learning, the straightforward approach of kernel regression, and the rigorous, probabilistic framework of Gaussian processes. This confluence not only enriches our theoretical understanding but also paves the way for novel methodologies and insights in the practical application of machine learning algorithms.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/final-project.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/physics-loss/index.html b/blog/2023/physics-loss/index.html
new file mode 100644
index 00000000..fa790f9d
--- /dev/null
+++ b/blog/2023/physics-loss/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Physics Loss | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/physics-loss/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Physics Loss",
+      "description": "Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Julian Powers",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Physics Loss</h1> <p>Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times.</p> </d-title> <d-byline></d-byline> <d-article> <h1 id="super-resolution-multi-objective-training-for-optimizing-a-single-objective">Super Resolution: Multi-Objective Training for Optimizing a Single Objective</h1> <h2 id="introduction">Introduction</h2> <p>Super-resolution (SR) refers to image processing techniques which enhance the quality of low-resolution images [2]. Recently deep learning based SR has been applied to the field fluid dynamics to recreate chaotic turbulent flows from low-resolution experimental or numerical data [3]. For some loss function \(\mathcal{L}\), the goal is to find weights \(\theta^*\) such that</p> \[\begin{aligned} \theta^* = \text{argmin}_\theta\; \mathcal{L}(\bold{u_H},f(\bold{u_L};\bold{\theta})) \end{aligned}\] <p>where \(\bf u_H\) is the reference high resolution data field and \(\bf u_L\) is the corresponding coarsened low resolution data input to the neural network \(f\) (see the figure below).</p> <p><img src="/assets/img/2023-11-11-physics-loss/fig1.png" alt="Super-resolution reconstruction of turbulent vorticity field using physics-based neural network. Adapted from [2]."/></p> <p>​ <em>Fig 1: Super-resolution reconstruction of turbulent vorticity field using physics-based neural network. Adapted from [2]. Disclaimer: we didn’t have time to train on nice images like these for the present investigation.</em></p> <p>Doing so can aid our understanding of flow physics [3]. Many have already applied deep learning to this problem, applying a variety of methods. The performance of the resulting networks depends heavily on the loss function used to train the network. Looking to improve upon the standard \(L_2\) loss function, some have introduced physics-based loss function that incorporates physical laws that the real flow must obey. For example [2] use the following type of form:</p> \[\begin{aligned} \mathcal{L} &amp;= \beta_0||\bold{u_H}-f(\bold{u_L})||_2 + \beta_1 ||p_1(\bold{u_H})-p_1(f(\bold{u_L}))||_2 + \beta_2 ||p_2(\bold{u_H})-p_2(f(\bold{u_L}))||_2 + ... \end{aligned}\] <p>where \(p_i(\cdot)\) is a physical objective that we want to enforce during training (e.g. spatial and time derivatives \(\nabla \bf u_H\), \(\bf\dot{u}_H\) etc.) and the \(\beta_i\) are fixed weighting coefficients.</p> <p>Typically, multi-objective super resolution approaches aim to overcome the weaknesses of the single objective $L_2$ reconstruction loss, a primary one being that the $L_2$ loss favors blurry reconstructions over sharper more ‘realistic’ ones. The general idea is that the additional objectives push the training away from un-realistic reconstructions.</p> <p>However suppose the goal really is to minimize the $L_2$ reconstruction loss. Can multi-objective training reduce the loss on the original objective or do the new objectives just get in the way? In this investigation we apply adaptively-weighted multi-objective optimization methods to the problem of turbulence super resolution which is a novel approach.</p> <h2 id="methodology">Methodology</h2> <h3 id="the-dataset">The Dataset</h3> <p>Super resolution reconstruction is an interesting problem for turbulent flows due there inherent multi-scale nature. Information is lost in the coarsening/pooling process making perfect reconstruction impossible without additional insights. Unfortunately, due to time and resource constraints it is unfeasible to train on 2D turbulence slices as in figure 1. In order to retain a challenging problem for the the super-resolution we build an artificial dataset of 1D turbulence as follows:</p> \[u(x) = \sum_{k=1}^{10} k^{-1}\sin\left(kx+\phi(k)\right) + (2k)^{-1}\sin\left( 2kx +\phi(k)\right)\] <p>The amplitude scaling $k^{-1}$ models how the frequencies in a particular turbulent signal might decay with increasing wavenumber (velocity, temperature, pressure, kinetic energy, etc.). In other words the contribution of higher modes to the entire signal becomes less and less important in a predictable way. We generate each individual signal by fixing a phase function $\phi(k)$. For each $k$, $\phi(k)$ is taken to be the realization of uniform random variable in the range $[0,2\pi)$. This function $u(x)$ bakes in inherent correlations between the low and high frequency waveforms (Please note: this is not physical. We are just making a useful toy dataset for this investigation). Even with extremely coarse low-resolution inputs, we expect that a well-trained neural network can use these correlations to reconstruct the high frequency waveforms.</p> <p>For input to the network, the samples are discretized to a $512$ point high resolution grid: $(\mathbf{u_H})_j = u(x_j)=u(j\cdot\frac{2\pi}{512})$. The low resolution data is average pooled with a kernel size of $32$. This results in a low resolution grid of size $512/32 = 16$. Average pooling has been shown to have nice training properties for super resolution reconstruction [2]. The following is typical high/low resolution pair:</p> <p><img src="assets/img/2023-11-11-physics-loss/fig2.png" alt="Typical Input"/></p> <p>​ <em>Fig 2: Typical high/low resolution data pair. The high resolution version exists on a 512 point grid. The low resolution version has been average pooled down to a 16 point grid using a average pooling kernel of size 32. The pooling procedure removes the highest frequency components of the data meaning that full reconstruction requires deeper understanding of the underlying structure.</em></p> <h3 id="the-network">The Network</h3> <p>The network is a three layer fully connected network with hidden sizes $[1024,1024,1024]$.</p> <h3 id="training-scheme">Training Scheme</h3> <p>The multi-objective loss function</p> \[\begin{aligned} \mathcal{L} &amp;= \mathcal{L}_0 + \mathcal{L}_1 + \mathcal{L}_2+... \\&amp;= \beta_0||\bold{u_H}-f(\bold{u_L})||_2 + \beta_1 ||p_1(\bold{u_H})-p_1(f(\bold{u_L}))||_2 + \beta_2 ||p_2(\bold{u_H})-p_2(f(\bold{u_L}))||_2 + ... \end{aligned}\] <p>presents a unique training challenge. Many turbulence super-resolution studies to date set the weights $\beta_i$ by trial and error in an attempt to produce ‘nice’ results [3]. This approach is sub-optimal because the best values of $\beta_i$ are dependent on the units and orders of magnitude of the properties $p_i$. Also, the best choice for the weights may change depending on the stage of training. For example it may be best to put more emphasis on the reconstruction loss $\mathcal{L}_0$ during the first stages of training and then shift emphasis to other properties to refine the model during the latter stages. In addition to these considerations [5] observed that for physics informed neural networks fixed weights tended to induce training instability as the multiple objectives compete with one another.</p> <p>To mitigate these issues in this investigation we employ a multi-objective optimizer (MOO). After each training epoch a MOO reviews the progress for each loss component $\mathcal{L}_i$ and updates the weights $\beta_i$. A schematic is shown below:</p> <p><img src="assets/img/2023-11-11-physics-loss/fig3.png" alt="Schematic of one training epoch "/></p> <p>​ <em>Fig3: One epoch of training with adaptive loss using ReLoBRaLo MOO. At the end of batched training iterations the MOO updates ${\beta_i}$ according to the progress of each individual loss component. The Adam training optimizer learning rate is fixed at $10^{-5}$ for the entire investigation.</em></p> <p>In particular we use the Relative Loss Balancing with Random Lookback (ReLoBRaLo) scheme from [5] for the MOO. The scheme adaptively updates the loss weights at the end of each epoch according to the progress of each individual loss component:</p> \[\begin{align*} \beta_i^{bal}(t) &amp;= m\cdot \frac {\exp\left(\frac{\mathcal{L}_i(t)}{\mathcal{T}\mathcal{L}_i(t-1)}\right)} {\sum_{j=1}^m \exp\left(\frac{\mathcal{L}_j(t)}{\mathcal{T}\mathcal{L}_j(t-1)}\right)},\;i\in\{1,...,m\}\\ \beta_i(t) &amp;= \alpha\beta_i(t-1) + (1-\alpha)\beta_i^{bal}(t) \end{align*}\] <p>There are many more details in [5], but essentially the $\beta_i^{bal}(t)$ term measures the progress of the loss $\mathcal{L}_i$ since the previous epoch relative to the progress made by other losses. The more a particular loss is struggling the more we increment its weight for the next epoch. The $\alpha$ hyper-parameter indicates bias towards the existing weight values. When $\alpha=1$ no updates are made. The temperature hyper-parameter $\mathcal{T}$ indicates the the level of equality across loss components. As $\mathcal{T} \to 0$ only the most struggling loss component receives a weight update. When $\mathcal{T}\to \infty$ all components receive an equal weight update. Note that we initialize by $\beta_0(0)=1$ and $\beta_i(0)=0$ for $i&gt;0$.</p> <h2 id="results">Results</h2> <h3 id="two-objective-loss">Two Objective Loss</h3> <p>We tried training on a variety of two-objective loss functions of the form</p> <table> <tbody> <tr> <td>$\mathcal{L} = \beta_0</td> <td> </td> <td>\bold{u_H}-f(\bold{u_L})</td> <td> </td> <td>_2 + \beta_1</td> <td> </td> <td>p_1(\bold{u_H})-p_1(f(\bold{u_L}))</td> <td> </td> <td>_2$</td> </tr> </tbody> </table> <table> <tbody> <tr> <td>where the $p_1$ objective was taken to be Fourier transform $\mathcal{F}$, spatial derivative $\frac{d}{dx}$, standard deviation $\sigma(\cdot)$, mean $\mathbb{E}_x(\cdot)$, absolute value$</td> <td>\cdot</td> <td>$, or functional compositions of the aforementioned. Compared to training on the standard single objective reconstruction loss $\mathcal{L}= \mathcal{L}_0 = \beta_0</td> <td> </td> <td>\bold{u_H}-f(\bold{u_L})</td> <td> </td> <td>_2$ , only the two-objective loss with Fourier transform loss gave significant improvements in training performance. Training with $\mathbb{E}_x$ gave marginal improvements. All other properties gave fairly neutral or negative results. Composing the Fourier transform with other properties was detrimental. The following table summarizes the training ($\alpha =0.9,\; \mathcal{T}=1$):</td> </tr> </tbody> </table> <p>​ <em>Table 1: Training performance for two-objective loss functions. All runs were performed with $\alpha =0.9,\; \mathcal{T}=1$</em>. The rightmost column show the percent improvement from the single objective training. The poor performance of $\mathcal{F}\circ\frac{d}{dx}$ might be due to high frequency noise being amplified by the derivative operator before being passed through the Fourier transform.</p> <table> <thead> <tr> <th style="text-align: center">$\boldsymbol{p_1}$</th> <th style="text-align: center">$\boldsymbol{\mathcal{L_0}(\text{epoch = }200)}$</th> <th style="text-align: center">% Improvement over Single Objective</th> <th> </th> <th> </th> </tr> </thead> <tbody> <tr> <td style="text-align: center">None (single objective)</td> <td style="text-align: center">0.01895</td> <td style="text-align: center">0 %</td> <td> </td> <td> </td> </tr> <tr> <td style="text-align: center">$\mathcal{F}$</td> <td style="text-align: center">0.01366</td> <td style="text-align: center">29 %</td> <td> </td> <td> </td> </tr> <tr> <td style="text-align: center">$\frac{d}{dx}$</td> <td style="text-align: center">0.01993</td> <td style="text-align: center">5.3 %</td> <td> </td> <td> </td> </tr> <tr> <td style="text-align: center">$\sigma(\cdot)$</td> <td style="text-align: center">0.02437</td> <td style="text-align: center">-29 %</td> <td> </td> <td> </td> </tr> <tr> <td style="text-align: center">$\mathbb{E}_x$</td> <td style="text-align: center">0.01771</td> <td style="text-align: center">6.7 %</td> <td> </td> <td> </td> </tr> <tr> <td style="text-align: center">$</td> <td style="text-align: center">\cdot</td> <td style="text-align: center">$</td> <td>0.01745</td> <td>8.1%</td> </tr> <tr> <td style="text-align: center">$\mathcal{F}\circ\frac{d}{dx}$</td> <td style="text-align: center">0.17174</td> <td style="text-align: center">-830%</td> <td> </td> <td> </td> </tr> </tbody> </table> <p>Figures 4 provides a more detailed look at the training for $p_1=\mathcal{F}$. There is considerable variation in the rate of learning due to altering the $\alpha$ hyper-parameter. The bottom panel of figure 4 gives an example of a reconstructed signal. With enough training the network is able to learn the inherent structure in the data and reconstruct the high frequencies.</p> <p><img src="assets/img/2023-11-11-physics-loss/fig4.png" alt="Fourier loss two objective training"/></p> <p><img src="assets/img/2023-11-11-physics-loss/fig4b.png" alt="Reconstructed data by two-objective training"/></p> <p>​ <em>Fig 4: Top panel: Two objective training with Fourier loss for $\mathcal{T}=1$. The results for setting $\mathcal{T}=0.01,100$ are very similar so they are omitted for brevity. The two objective training (reconstruction + Fourier) outperforms the single objective training for every value of $\alpha$. The optimal value of $\alpha$ is close to $0.999$.</em> Bottom panel: example of reconstructed validation data. The model is able to recover the high frequency components from the original high resolution signal.</p> <p><img src="assets/img/2023-11-11-physics-loss/fig5a.png" alt="beta evolution"/></p> <p><img src="assets/img/2023-11-11-physics-loss/fig5b.png" alt="fig5b"/></p> <p>​ <em>Fig 5: Reconstruction and Fourier objective ${\beta_i}$ evolution for $\alpha=0.9,0.999$. The smaller $\alpha$ the faster the loss weights converge to 1.</em></p> <p>The two objective training curves in figure 4 are significantly better than the single objective curve. There is a particular value of $\alpha$ (~0.999) that gives the best overall result. Figure 5 demonstrates how the loss weights adapt over the course of training as the ReLoBRaLo MOO tries to balance the improvements in each loss component. For $\alpha=0.9$ the MOO rapidly increases $\beta_1$ in order to put more weight on the lagging Fourier loss. When $\alpha=0.999$ the increase is a lot more gradual. In the limit as $\alpha\to1$ we just have single objective optimization.</p> <p>Figure 6 shows a similar weight evolution when the second objective is ‘bad’, $p_1=\sigma(\cdot)$:</p> <p><img src="assets/img/2023-11-11-physics-loss/fig6.png" alt="beta evolution for standard deviation"/></p> <p>​ <em>Fig 6: Reconstruction and $\sigma(\cdot)$ objective ${\beta_i}$ evolutions. There is evidence of instability at the start of training.</em></p> <p>In contrast to the reconstruction and Fourier two-objective training, the reconstruction and $\sigma(\cdot)$ weight evolutions show signs of instability. At around $15$ epochs $\beta_0$ experiences a bump. This is mostly likely the MOO responding to degrading progress on the reconstruction objective due to the two objectives competing with each other. Therefore for optimal multi-objective training it seems preferable that all loss components smoothly decrease without cross interreference.</p> <h3 id="multi-objective-loss">Multi Objective Loss</h3> <p>We also study a multi-objective loss created by combining the most successful objectives from the previous study.</p> \[\begin{aligned} p_1&amp;=\mathcal{F}\\ p_2&amp;=|\cdot|\\ p_3&amp;=\mathbb{E}_x\\ p_4&amp;=\frac{d}{dx}\\ \end{aligned}\] <p>The results closely mimic the two objective Fourier loss so we omit further details. Interestingly, even when we introduce a ‘bad’ objective such as $\sigma(\cdot)$ or $\mathcal{F}\circ\frac{d}{dx}$into the multi-objective loss it doesn’t appear to spoil the result despite causing a minor instability (see figure 6). These results suggest that it may be possible to just ‘throw in’ many auxiliary objectives in the hopes that one of them improves training. We might not necessarily need to worry about bad objectives spoiling the bunch. Or it could just be that in this particular case that the Fourier objective $\mathcal{F}$ is strong enough to overcome the bad objectives. This needs more investigation.</p> <h2 id="conclusion">Conclusion</h2> <p>This investigation showed that multi-objective loss functions can be useful even when only one objective is ultimately of interest. Most likely due to the manner in which the data set was constructed, the Fourier objective turned out to be a great training aid (Note that we did try single objective training with the Fourier objective replacing the reconstruction objective. This did not yield as good results suggesting that there is something inherently beneficial about multi-objective training as opposed to just changing basis).</p> <p>The other objectives did not do nearly as well and some even degraded the training by causing instabilities. The ReLoBRaLo MOO was a critical component of training. None of the aforementioned results would have been possible with fixed weights. It was critical to fine tune the $\alpha$ parameter which determines how aggressively the MOO does updates. Presumably, an overly aggressive MOO doesn’t give the network time to settle in the early stages of training but an overly passive MOO hardly makes any difference at all.</p> <p>While good, ultimately the ReLoBRaLo scheme was designed for traditional MOO problems (such as solving partial differential equations) and is most likely far from optimal under the unique settings of this investigation. In addition, the objectives in this study were chosen quite arbitrarily. The Fourier objective was an easy one to discover due to the low-pass nature of super-resolution reconstruction and the manufactured dataset. For a more general problem where we might want to introduce auxiliary objectives it will be very difficult a-priori to identify high performance auxiliary objectives. An interesting future investigation could be to design a neural network that adaptively updates the auxiliary objectives after each epoch with the goal accelerating the main network’s learning curve.</p> <h2 id="references">References</h2> <p>[1] Bode, M., Gauding, M., Lian, Z., Denker, D., Davidovic, M., Kleinheinz, K., Jitsev, J. and Pitsch, H. Using physics-informed enhanced super-resolution generative adversarial networks for subfilter modeling in turbulent reactive flows. <em>Proceedings of the Combustion Institute</em>, 2021.</p> <p>[2] Fukami, K., Fukagata, K. and Taira, K. Super-resolution reconstruction of turbulent flows with machine learning. <em>Journal of Fluid Mechanics</em>, 2019.</p> <p>[3] Fukami, K.,Fukagata, K., and Taira, K. Super-Resolution Analysis Via Machine Learning: A Survey For Fluid Flows. [Unpublished manuscript], 2023.</p> <p>[4] Wang, C., Li, S., He, D. and Wang, L. Is L2 Physics-Informed Loss Always Suitable for Training Physics-Informed Neural Network?. <em>Conference on Neural Information Processing Systems</em>, 2022.</p> <p>[5] Bischof, R., and Kraus, M. Multi-Objective Loss Balancing for Physics-Informed DeepLearning. [Unpublished manuscript], 2022.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/projected-fff-networks/index.html b/blog/2023/projected-fff-networks/index.html
new file mode 100644
index 00000000..e21920cf
--- /dev/null
+++ b/blog/2023/projected-fff-networks/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Projected fast feedforward networks | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Abstract"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/projected-fff-networks/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Projected fast feedforward networks",
+      "description": "Abstract",
+      "published": "December 5, 2023",
+      "authors": [
+        {
+          "author": "Semyon Savkin",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Egor Lifar",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Projected fast feedforward networks</h1> <p>Abstract</p> </d-title> <d-byline></d-byline> <d-article> <h2 id="introduction">Introduction</h2> <p>Compression of neural networks is a crucial task in Machine Learning. There are three important performance metrics that we should take into account when deploying models:</p> <ul> <li> <p>Size of the model. Having a smaller number of parameters that describe the model makes transferring it over network faster. In addition, being able to concisely represent the differences between original and finetuned model would enable storing and distributing a lot of possible finetunings, such as in Stable Diffusion LORA <d-cite key="luo2023lcmlora"></d-cite></p> </li> <li> <p>GPU memory needed to perform the inference. If the metric is lower, the model inference can be run on less expensive GPUs with less available memory. Some models could even be ran on smartphones or IoT devices</p> </li> <li> <p>Inference time. We also can take into account how does the time scales with the size of the batch</p> </li> </ul> <p>Balancing these characteristics is a non-trivial task, since improvements in one of them could lead to a decline in other metrics. The optimal tradeoff depends on the environment in which the model is ran.</p> <p>We will explore a way to significantly reduce the model size and the memory needed for inference, keeping the inference time reasonable. We achieve the size reduction by utilizing a common property of having small intrinsic dimension of objetive landscape that many models have.</p> <h2 id="related-works">Related works</h2> <p>There are several ways how the size of the model can be reduced. One of the popular techniques is model quantization. Quantization of a machine learning model involves decreasing the precision of weights for the sake of reduction of the total memory needed to store them. Quantized models can utilize 16, 8, or even 4-bit floats, with carefully selected summation and multiplication tables. There are different ways of dealing with the inevitable degradation of accuracy due to lack of precision, one possible way is described in paper <d-cite key="nagel2021white"></d-cite></p> <p>Another direction of model size optimization utilizes the notion of matrix low-rank approximation. The layers of neural networks are commonly represented as matrices, the simpliest example being the parameters of feedforward linear layer. Each matrix \(A\) has a Singular Value Decomposition \(A = U\Sigma V^*\), and, using this decomposition, it’s possible to get close low-rank approximation of \(A\). We note that a matrix of size \(n \times m\) of rank \(k\) can be stored in \(O((n+m)k)\) memory if we express it as a sum of outer products of \(k\) pairs of vectors, so if \(k\) is small, this representation uses much less memory than \(O(nm)\) — the memory used by the dense representation. One of the papers that compresses models with low-rank approximation is <d-cite key="jaderberg2014speeding"></d-cite>.</p> <p>However, we are going to explore another compression method, which utilizes small dimensionality of optimization landscape, which is common for many model-task pairs. When training a neural network, we have some loss \(\mathcal{L}\), and a parameter space \(\mathbb{R}^{p}\). Then, we are trying to find \(v \in \mathbb{R}^{p}\) such that \(\mathcal{L}(v)\) is minimized. Instead of searching over the whole space, we generate a linear operator \(\phi\colon \; \mathbb{R}^{d} \to \mathbb{R}^{p}\), where \(d &lt; p\), and parametrize \(v\) as \(v = \phi u\), where \(u \in \mathbb{R}^{d}\). Li et al. <d-cite key="li2018measuring"></d-cite> found that if the the matrix of $\phi$ has normalized columns, for many tasks it’s possible to find a network, parametrized the way above, where \(d\) is significantly smaller than \(p\), such that the network has at least 90% of metric value of the original network. Then, if our way to generate random projection matrix is seeded deterministically, we only need to store \(d\) floats, which are the coefficients of \(u\). During inference, we re-create \(\phi\), and restore the parameters of original architecture \(v = \phi u\). The compression ratio of this method is \(\frac{p}{d}\). In this blog, we will explore its potential modifications, and evaluate them by running experiments.</p> <h2 id="basic-experiment">Basic experiment</h2> <p>First, we test the method without any modifications. We use dataset MNIST <d-cite key="deng2012mnist"></d-cite>, containing 30000 \(28 \times 28\) monochrome images of digits. Each image belongs to one of 10 classes, depending on the digit, and the task of the model is to classify these digits.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/mnist-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/mnist-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/mnist-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/mnist.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>For each of the experiment, we use a neural network with one hidden layer with 128 units and ReLU activations. We optimize the parameters with Adam and learning rate \(10^{-4}\). The training is ran for \(100\) epochs, our batch size is \(128\).</p> <table> <thead> <tr> <th>d</th> <th style="text-align: center">final val acc</th> </tr> </thead> <tbody> <tr> <td>4</td> <td style="text-align: center">17.56</td> </tr> <tr> <td>16</td> <td style="text-align: center">20.39</td> </tr> <tr> <td>64</td> <td style="text-align: center">50.2</td> </tr> <tr> <td>256</td> <td style="text-align: center">71.1</td> </tr> <tr> <td>512</td> <td style="text-align: center">61.25</td> </tr> <tr> <td>1024</td> <td style="text-align: center">61.66</td> </tr> <tr> <td>original</td> <td style="text-align: center">95.65</td> </tr> </tbody> </table> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/random-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/random-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/random-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/random.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="better-initialization">Better initialization</h2> <p>We’ve noticed that the optimization of the compressed model does not converge fast. To initialize better, we can use pre-trained weights of non-compressed model \(v\).</p> <p>Let \(A\) be the projection matrix that we used in the compression. Then, to convert compressed parameters of a model to the original ones, we need to multiply by \(A\) on the left. The idea is to start from the compressed parameters, such that after going to uncompressed space, they would be as close to \(v\) as possible by Eucledian distance. Then, we can use the formula for projection onto a linear subspace:</p> \[u^{*} = \mathop{argmin}_u ||Au - v||^2 \Rightarrow u^{*} = (A^TA)^{-1}A^Tv\] <p>By initializing \(u\) this way, we achieve a faster convergence of the optimizer, because after projecting to subspace and returning to original coordinates, we get a parameter vector that is close to the optimal one, so it should be near the optimum in the coordinates of projection.</p> <p>In our experiments, we compare how fast does the model train with random initializtion and with projection initialization.</p> <table> <thead> <tr> <th>d</th> <th style="text-align: center">final val acc</th> </tr> </thead> <tbody> <tr> <td>4</td> <td style="text-align: center">17.72</td> </tr> <tr> <td>16</td> <td style="text-align: center">28.68</td> </tr> <tr> <td>64</td> <td style="text-align: center">51.52</td> </tr> <tr> <td>256</td> <td style="text-align: center">71.18</td> </tr> <tr> <td>512</td> <td style="text-align: center">83.93</td> </tr> <tr> <td>1024</td> <td style="text-align: center">90.18</td> </tr> <tr> <td>original</td> <td style="text-align: center">95.65</td> </tr> </tbody> </table> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/non_random-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/non_random-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/non_random-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/non_random.jpg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="distillation">Distillation</h2> <p>The concept of model distillation was introduced by <d-cite key="hinton2015distilling"> </d-cite>. The main idea is to train a compact model (“student”) to emulate a larger, pre-trained model (“teacher”). In our case, the compact model would be a reparametrized model with \(d\) dimensions, while the “teacher” model has all the original parameters. When training, we regress the mean square difference between the logits of the original and compact model.</p> <p>We initialize the compressed model with the projection of the original model as in the previous section. In our experiments, we’ve noticed that this training procedure has comparable convergence speed, however, its validation accuracy reaches a plateau on a lower value than in regular training procedure.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/distil-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/distil-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/distil-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/distil.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/non-distil-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/non-distil-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/non-distil-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/non-distil.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> CIFAR-10 experiment. Network is logistic regression. Projections for d=700, on the left: with distilation, on the right: without distilation </div> <h2 id="independent-projections-for-layers">Independent projections for layers</h2> <p>In many cases, the model we are compressing contains several independent layers. Therefore, we can try to split the coordinates in the space to which we are projecting parameters so that each coordinate corresponds to exactly one layer. This constraint corresponds to the matrix of \(\phi\) being block-diagonal.</p> <p>These changes improve the accuracy, and decrease the inference time (because for each layer we only need to use some part of the compressed coordinates), while keeping \(d\) constant.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/non_block-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/non_block-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/non_block-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/non_block.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/block-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/block-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/block-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/block.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> CIFAR-10 experiment. Network has one hidden layer with 32 units. On the left: one projection for d=2500. On the right: separate projections with d=500 and d=100 </div> <h2 id="gpu-memory-utilization">GPU memory utilization</h2> <p>Let we want to make inference with minimal possible usage of RAM. Let’s assume that the architecture of model that we are evaluating is an MLP. Then, using the compressed representation, we can use no more than \(O(\max(d, L))\), where \(d\) is the dimension to which we compressed the model, and \(L\) is the maximum size of the layer.</p> <p>We describe the inference prodcedure consuming this little memory. We need to sequentially apply each of the feedforward networks in our MLP. For each layer, we have to transform the input vector \(x\) to the output \(y\). We fill in the output vector with zeros, and for each index \((i, j)\) in the weight matrix we need to make an update \(y_i \leftarrow y_i + A_{ij}x_j\). However, we don’t store any of the parameters in memory except for \(d\) compressed parameters. So, in order to get the value of \(A_{ij}\), we need to take the dot product of a row in the projection matrix and a vector of compressed parameters.</p> <p>It is not obvious how to random access a row in a random matrix, where all columns should be normalized, and the outcomes during train and inference are consistent. We note that the true randomness of the projection matrix is not important for us. So, instead we can generate the \(i\)-th row by seeding the random to \(i\) and generating a row. During train, we generate the whole matrix this way, and compute the normalization coefficients of columns, which are included into the model’s representation in memory. During inference, to get the \(i\)-th row, we just need to sample a row and divide it by normalization coefficients pointwise. We have checked that this way of generating the projection matrix has no negative effects on the performance of the compressed model, compared to the truly random option.</p> <h2 id="diffusion-models">Diffusion models</h2> <p>We have also attempted to apply model compression to a different domains besides image classification. One of the problems we considered is generating 2D points from a certain distribution using a diffusion model. In this setup, we have a neural network that predicts the noise for a pair \((x, t)\) — point in space and time.</p> <p>We use continuous time on \([0, 1]\), linear noise schedule with \(\beta_{min} = 0.3\), \(\beta_{max} = 30\), various-preserving SDE, batch size \(64\), sampling timesteps \(100\), ODE sampler. The distribution that we are trying to learn is a mixture of \(6\) gaussians. We use an MLP score net with \(2\)-dimensional input and \(32\)-dimensional Gaussian Fourier Projection time embeddings.</p> <p>However, even setting the compression dimension \(1000\) or \(5000\) did not enable us to see good sampling results.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/gauss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/gauss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/gauss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/gauss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/la_100-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/la_100-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/la_100-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/la_100.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/la_100_bad-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/la_100_bad-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-projected-fff-networks/la_100_bad-1400.webp"/> <img src="/staging/assets/img/2023-12-12-projected-fff-networks/la_100_bad.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> On the left: sampling result with original model. On the right: sampling with compressed model, d=1000 </div> <h2 id="conclusion">Conclusion</h2> <p>We have discussed a way to compress models, decreasing its size by several orders of magnitude. We identified ways to improve the validation accuracy of compressed models, such as doing the initializtion with projection and having independent projections for layers. This technique leads to surprising consequences, such as being able to do machine learning model inference with very small amount of RAM.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-projected-fff-networks.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/prompt-to-prompt/index.html b/blog/2023/prompt-to-prompt/index.html
new file mode 100644
index 00000000..b189d55b
--- /dev/null
+++ b/blog/2023/prompt-to-prompt/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Prompt to Prompt | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Text-based image editing via cross-attention mechanisms - the research of hyperparameters and novel mechanisms to enhance existing frameworks"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/prompt-to-prompt/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Prompt to Prompt",
+      "description": "Text-based image editing via cross-attention mechanisms - the research of hyperparameters and novel mechanisms to enhance existing frameworks",
+      "published": "November 7, 2023",
+      "authors": [
+        {
+          "author": "Carla Lorente",
+          "authorURL": "https://www.linkedin.com/in/carla-lorente/",
+          "affiliations": [
+            {
+              "name": "MIT EECS 2025",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Linn Bieske",
+          "authorURL": "https://www.linkedin.com/in/linn-bieske-189b9b138//",
+          "affiliations": [
+            {
+              "name": "MIT EECS 2025",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Prompt to Prompt</h1> <p>Text-based image editing via cross-attention mechanisms - the research of hyperparameters and novel mechanisms to enhance existing frameworks</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#literature-review">Literature Review</a></div> <div><a href="#outline-of-our-research">Outline of our research</a></div> <div><a href="#a-hyperparameter-study-of-prompt-to-prompt-editing-method-word-swap">A. Hyperparameter Study of prompt-to-prompt editing method "word swap"</a></div> <div><a href="#a1-exploration-of-silhouette-threshold-hyperparameter-k">A1. Exploration of silhouette threshold hyperparameter ("k")</a></div> <div><a href="#a2-exploration-of-cross-attention-injection-hyperparameter-cross-replace-steps">A2. Exploration of cross-attention injection hyperparameter ("cross replace steps")</a></div> <div><a href="#a3-exploration-of-self-attention-hyperparameter-self-replace-steps">A3. Exploration of self-attention hyperparameter ("self replace steps")</a></div> <div><a href="#a4-cycle-consistency-of-method">A4. Cycle Consistency of method</a></div> <div><a href="#b-generalization-of-optimized-hyperparameters-to-attention-re-weight-method">B. Generalization of optimized hyperparameters to "attention re-weight method"</a></div> <div><a href="#our-proposed-method">Our proposed method</a></div> <div><a href="#future-work">Future work</a></div> <div><a href="#conclusion">Conclusion</a></div> <div><a href="#references">References</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Recently, the techniques to edit images have advanced from methodologies that require the user to edit individual pixels to deep learning-based image editing. The latter employ for example large image generation models (e.g., stable diffusion models). While these deep learning-based image editing techniques initially required the user to mark particular areas that should be edited (Nichol et al., 2021 <d-cite key="nichol2021glide"></d-cite>; Avrahami et al., 2022a<d-cite key="avrahami2022blendeddiffusion"></d-cite>; Ramesh et al., 2022), recently the work by (Hertz et al, 2022 <d-cite key="hertz2022prompttoprompt"></d-cite>) has shown that this becomes unnecessary. Instead, image editing can be performed using a cross-attention mechanism. In particular, the proposed prompt-to-prompt editing framework enables the controlling of image edits by text only. The section below provides an overview of how this prompt-to-prompt framework works (Figure 1, by (Hertz et al, 2022<d-cite key="hertz2022prompttoprompt"></d-cite>)).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/1-cross_attention_masks.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><em>Figure 1: Cross-attention method overview. Top: visual and textual embedding are fused using cross-attention layers that produce attention maps for each textual token. Bottom: we control the spatial layout and geometry of the generated image using the attention maps of a source image. This enables various editing tasks through editing the textual prompt only. When swapping a word in the prompt, we inject the source image maps Mt, overriding the target maps M ∗ t . In the case of adding a refinement phrase, we inject only the maps that correspond to the unchanged part of the prompt. To amplify or attenuate the semantic effect of a word, we re-weight the corresponding attention map. (Hertz et al, 2022 <d-cite key="hertz2022prompttoprompt"></d-cite>).</em></p> <p>While this proposed framework has significantly advanced the image editing research field, its performance leaves still room for improvement such that open research questions remain. For example, when performing an image editing operation that changes the hair color of a woman, significant variability across the woman’s face can be observed (Figure 2). This is undesirable, as the user would expect to see the same female face across all four images.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/2-Experimentation_proposed_prompt_to_prompt.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><em>Figure 2: Experimentation with the proposed prompt-to-prompt image editing framework presented by (Hertz et al, 2022<d-cite key="hertz2022prompttoprompt"></d-cite>). The faces of the women show significant variability even though they should remain invariant across all four generated/ edited images.</em></p> <p>Within our work, we will start to further benchmark the proposed framework’s performance, explore its hyperparameters’ impact on the image editing process, and research opportunities to improve the current performance.</p> <h2 id="literature-review">Literature Review</h2> <p>Before delving into the details of the prompt-to-prompt editing method, let’s briefly recap some existing techniques to edit images with diffusion models that have paved the way for this revolutionary approach:</p> <h3 id="1-adding-noise-to-an-image-and-denoising-with-a-prompt">1. Adding noise to an image and denoising with a prompt</h3> <p>In <strong>SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations</strong> <d-cite key="meng2021sdedit"></d-cite> , the user takes an image, introduces noise and then denoises it according to a user-provided prompt. As an example, given an image, users can specify how they want the edited image to look using pixel patches copied from other reference images.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/SDEdit.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>A similar approach is used in the paper <strong>MagicMix: Semantic Mixing with Diffusion Models</strong> <d-cite key="liew2022magicmix"></d-cite> which uses a pre-trained text-to-image diffusion based generative model to extract and mix two semantics. The figure below showcases the detailed pipeline of MagicMix (image-text mixing). Given an image x<sub>0</sub> of layout semantics, they first craft its corresponding layout noises from step Kmin to K<sub>max</sub>. Starting from K<sub>max</sub>, the conditional generation process progressively mixes the two concepts by denoising given the conditioning content semantics (“coffee machine” in this example). For each step k in [K<sub>min</sub>; K<sub>max</sub>], the generated noise of mixed semantics is interpolated with the layout noise x<sub>k</sub> to preserve more layout details.</p> <div style="text-align:center;"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_1.png" class="img-fluid" width="100" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/corgi_coffee_machine_2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="2-take-an-image-add-noise-and-denoise-it-with-a-prompt--add-a-mask">2. Take an image, add noise and denoise it with a prompt + Add a mask</h3> <p>In the paper <strong>Blended Diffusion: Text-Driven Editing of Natural Images</strong> <d-cite key="avrahami2022blended"></d-cite> , given an input of an image and a mask, the blended diffusion modifies the masked area according to a guided text prompt, without affecting the unmasked regions. One limitation of this is that it relies on the user having to produce this mask to indicate the editing region.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Blended_Difussion.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>An advanced version of this diffusion mode is discussed in the paper <strong>Text-based inpainting with CLIPSef and Stable Diffusion</strong> <d-cite key="luddecke2022image"></d-cite>. In this paper, the novelty is that the user doesn’t have to do the mask manually. Instead, it can use an existing segmentation model (e.g. ClipSef). Another alternative is presented in the paper <strong>DiffEdit: Diffusion-based semantic image editing with mask guidance</strong> <d-cite key="couairon2022diffedit"></d-cite> where the mask is generated directly from the diffusion model.</p> <h3 id="3-fine-tune-overfit-on-a-single-image-and-then-generate-with-the-fine-tuned-model">3. Fine-tune (“overfit”) on a single image and then generate with the fine-tuned model</h3> <p>In the paper <strong>Imagic: Text-based real image editing with diffusion models</strong> <d-cite key="kawar2023imagic"></d-cite> and <strong>Unitune: Text-driven image editing by fine-tuning a diffusion model on a single image</strong> <d-cite key="valevski2023unitune"></d-cite>, the authors perform extensive fine-tuning on either the entire diffusion model or specific sections of it. This process is computationally and memory-intensive, setting it apart from alternative methods.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/literature_review/Fine_Tuning.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="prompt-to-prompt">Prompt-to-prompt</h3> <p>The prompt-to-prompt editing method is a significant advancement compared with the existing image editing techniques that rely on diffusion models. Unlike the methods explained above that involve adding noise, using masks, or fine-tuning, the prompt-to-prompt method stands out because of its simplicity, flexibility, and user-friendliness. In the former methods, users often face challenges such as manually creating masks or undergoing resource-intensive fine-tuning processes, which can be both time-consuming and technically demanding. In contrast, the prompt-to-prompt editing method streamlines the editing process by allowing users to directly specify their desired edits through language prompts. This approach eliminates the need for intricate masking or extensive model training as well as leverages the power of human language to precisely convey editing intentions.</p> <p>Throughout our research, we will adopt the prompt-to-prompt editing method as our starting point, with the aim of enhancing its performance.</p> <h2 id="outline-of-our-research">Outline of our research</h2> <p>To perform our research, we plan to build upon the code which complemented the paper published by (Hertz et al, 2022 <d-cite key="hertz2022prompttoprompt"></d-cite>, <a href="https://github.com/google/prompt-to-prompt/">Link to code</a>). Concretely, we will rely on a stable diffusion model from hugging face which we will access via Python. No model training is required as we will solely work with attention layers that capture spatial information about the images.</p> <p>Our study will be divided into 3 main subsections:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2001%20-%20outline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2001%20-%20outline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2001%20-%20outline-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2001%20-%20outline.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="a-hyperparameter-study-of-prompt-to-prompt-editing-method-word-swap">A. Hyperparameter Study of prompt-to-prompt editing method “word swap”</h2> <p>In the forthcoming subsection, we delve into a comprehensive analysis of the hyperparameters pertaining to the “word swap” method within the prompt-to-prompt editing framework. Before delving into the specifics, it’s crucial to understand the significance of these hyperparameters and their default values, as originally outlined in the seminal work by Hertz et al<d-cite key="hertz2022prompttoprompt"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2002%20-%20outline%20section%20A-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2002%20-%20outline%20section%20A-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2002%20-%20outline%20section%20A-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2002%20-%20outline%20section%20A.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2003%20-%20Local%20editing-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2003%20-%20Local%20editing-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2003%20-%20Local%20editing-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2003%20-%20Local%20editing.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2004%20-%20Cross%20replace%20steps%20explanation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2004%20-%20Cross%20replace%20steps%20explanation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2004%20-%20Cross%20replace%20steps%20explanation-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2004%20-%20Cross%20replace%20steps%20explanation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2005%20-%20Sel-attention%20explanation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2005%20-%20Sel-attention%20explanation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2005%20-%20Sel-attention%20explanation-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2005%20-%20Sel-attention%20explanation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We will systematically explore various hypotheses regarding each hyperparameter and present our empirical findings, shedding light on their individual impacts on the editing process. This examination aims to provide valuable insights into optimizing the performance of the “word swap” method and enhancing its practical utility.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2006%20-%20Hypothesis%20and%20findings-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2006%20-%20Hypothesis%20and%20findings-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2006%20-%20Hypothesis%20and%20findings-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2006%20-%20Hypothesis%20and%20findings.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="a1-exploration-of-silhouette-threshold-hyperparameter-k">A1. Exploration of silhouette threshold hyperparameter (“k”)</h2> <p>In this section, we embark on an exploration of the silhouette threshold hyperparameter (“k”). We aim to unravel the influence of varying this parameter while using the prompt ‘<em>“A woman’s face with blond hair”</em>’ and making alterations to different hair colors (brown, red, black). The GIF below showcases the representation of these experiments.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/change_threshold_womens_face2.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/change_threshold_womens_face2.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/change_threshold_womens_face2.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/change_threshold_womens_face2.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2007%20-%20Results%20silhouette%20parameter%20k%20-%20faces-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2007%20-%20Results%20silhouette%20parameter%20k%20-%20faces-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2007%20-%20Results%20silhouette%20parameter%20k%20-%20faces-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2007%20-%20Results%20silhouette%20parameter%20k%20-%20faces.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Additionally, we present a comparative analysis of the impact of this hyperparameter on editing tasks related to landscapes. For instance, we employ the prompt ‘<em>“A river between mountains”</em>’ and manipulate the landscape, including options like streets, forests, and deserts. The results of this landscape-oriented analysis can be seen in the figure below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/attention_replace_rivers.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/attention_replace_rivers.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/attention_replace_rivers.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/threshold_k/attention_replace_rivers.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2008%20-%20Results%20silhouette%20parameter%20k%20-%20landscape-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2008%20-%20Results%20silhouette%20parameter%20k%20-%20landscape-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2008%20-%20Results%20silhouette%20parameter%20k%20-%20landscape-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2008%20-%20Results%20silhouette%20parameter%20k%20-%20landscape.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="a2-exploration-of-cross-attention-injection-hyperparameter-cross-replace-steps">A2. Exploration of cross-attention injection hyperparameter (“cross replace steps”)</h2> <p>Below we showcase the effect of the silhouette threshold hyperparameter (“k”) and the cross-attention injection hyperparameter(“cross_replace_steps”). We manipulate the “k” value, setting it to 3 different levels: 0, 0.3 (default literature value), and 0.6. The experiment was performed for both women’s faces and landscapes, providing a comprehensive understanding of how these hyperparameters affect the editing process. The following GIFs showcase the results of our exploration.</p> <h3 id="with-k--0">With k = 0:</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_women.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_women.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_women.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_women.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_river.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_river.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_river.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_zero_cross_replace_river.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="with-k--03">With k = 0.3:</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_women.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_women.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_women.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_women.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_river.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_river.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_river.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point3_cross_replace_river.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="with-k--06">With k = 0.6:</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_women.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_women.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_women.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_women.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_river.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_river.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_river.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/cross_replace_steps/k_point6_cross_replace_river.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Below, we present the key insights found for the prompt <em>“A woman’s face with blond hair”</em>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2009%20-%20Results%20cross%20replace%20steps%20-%20faces-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2009%20-%20Results%20cross%20replace%20steps%20-%20faces-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2009%20-%20Results%20cross%20replace%20steps%20-%20faces-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2009%20-%20Results%20cross%20replace%20steps%20-%20faces.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Below, we present the key insights found for the prompt <em>“A river between mountains”</em>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2010%20-%20Results%20cross%20replace%20steps%20-%20landscape-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2010%20-%20Results%20cross%20replace%20steps%20-%20landscape-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2010%20-%20Results%20cross%20replace%20steps%20-%20landscape-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2010%20-%20Results%20cross%20replace%20steps%20-%20landscape.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="a3-exploration-of-self-attention-hyperparameter-self-replace-steps">A3. Exploration of self-attention hyperparameter (“self replace steps”)</h2> <p>In our investigation of the self-attention hyperparameter known as “self_replace_steps,” we conducted a series of experiments with careful consideration of the interplay between this parameter and two other critical factors: “k” (the silhouette threshold) and “cross_replace_steps” (the cross-attention injection parameter). To comprehensively assess the influence of “self_replace_steps,” we designed two distinct experimental scenarios.</p> <p>In the first scenario, we set “k” and “cross_replace_steps” to their default values in the literature review (0.3 and 0.8 respectively), creating an environment conducive to exploring the effects of self-attention within these threshold parameters. Concurrently, in the second scenario, we opted for more extreme settings by keeping “k” at 0 (no silhouette threshold) and “cross_replace_steps” at 0.2, thereby intensifying the impact of the self-attention hyperparameter.</p> <h3 id="with-k--03-and-cross_replace_steps--08">With k = 0.3 and cross_replace_steps = 0.8:</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_women.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_women.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_women.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_women.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_river.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_river.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_river.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_point3_self_replace_river.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="with-k--0-and-cross_replace_steps--02">With k = 0 and cross_replace_steps = 0.2:</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_women.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_women.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_women.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_women.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_river.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_river.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_river.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/self_replace_steps/k_zero_crossattention_point2_self_replace_river.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Below, we present the key insights for the hyperparameter “self_replace_steps” within the context of the prompt <em>“A woman’s face with blond hair”</em>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2011%20-%20Results%20self%20replace%20steps%20-%20faces-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2011%20-%20Results%20self%20replace%20steps%20-%20faces-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2011%20-%20Results%20self%20replace%20steps%20-%20faces-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2011%20-%20Results%20self%20replace%20steps%20-%20faces.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Below, we present the key insights for the hyperparameter “self_replace_steps” found for the prompt <em>“A river between mountains”</em>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2012%20-%20Results%20self%20replace%20steps%20-%20landscape-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2012%20-%20Results%20self%20replace%20steps%20-%20landscape-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2012%20-%20Results%20self%20replace%20steps%20-%20landscape-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2012%20-%20Results%20self%20replace%20steps%20-%20landscape.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="a4-cycle-consistency-of-method">A4. Cycle Consistency of method</h2> <p>Our primary goal is to delve into the notion of “Cycle Consistency” within our methodology. This concept revolves around the seamless reversal of text prompt modifications back to their original form, ensuring that the resulting image closely mirrors the initial prompt. This bidirectional editing process serves as the central focus of our research, and in the subsequent sections, we present our findings on this crucial aspect.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2013%20-%20Cycle%20consistency-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2013%20-%20Cycle%20consistency-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2013%20-%20Cycle%20consistency-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2013%20-%20Cycle%20consistency.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2014%20-%20Cycle%20consistency%20-%20hyperparameter%20impact-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2014%20-%20Cycle%20consistency%20-%20hyperparameter%20impact-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2014%20-%20Cycle%20consistency%20-%20hyperparameter%20impact-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2014%20-%20Cycle%20consistency%20-%20hyperparameter%20impact.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="b-generalization-of-optimized-hyperparameters-to-attention-re-weight-method">B. Generalization of optimized hyperparameters to “attention re-weight method”</h2> <p>After identifying the optimal parameters, we conducted a comparative analysis to assess their generalizability across other methods, including attention re-weighting. In the visual presentation, we used GIFs to showcase image generation under two different parameter configurations for the prompt <em>“A woman’s face with long wavy blond hair”</em>.</p> <p>On the left side, images were generated using default values (k=0.3; cross_replace_steps = 0.8; self_replace_steps = 0.2) while varying the assigned weights. Notably, negative weights led to instability and less desirable outcomes, as evidenced by the results on the left.</p> <p>On the right side, we employed our optimized hyperparameter values (k = 0; cross_replace_steps = 0.2; self_replace_steps = 0.8). These images demonstrated improved stability while consistently producing the desired output. This visual comparison highlights the effectiveness of our optimized parameters and their superior performance, particularly when dealing with attention re-weighting method.</p> <div style="display: flex;"> <div style="flex: 1; padding: 10px;"> Literature suggested parameters <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly.gif" class="img-fluid" width="200" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div style="flex: 1; padding: 10px;"> Newly optimized parameters <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly_improved_self_replace.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly_improved_self_replace.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly_improved_self_replace.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/GIFs/c_value/c_value_women_curly_improved_self_replace.gif" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <h2 id="our-proposed-method">Our Proposed Method</h2> <p>As our research has demonstrated, the current prompt-to-prompt method, as reported in the literature <d-cite key="hertz2022prompttoprompt"></d-cite>, exhibits significant limitations. Specifically, with the current settings for the silhouette, cross-attention injection, and self-attention injection parameters, the method fails to perform the prompted edits with precision. A comparative analysis of the generated target images against the geometry of the reference images reveals undesired deviations. The existing method over-constrains the geometry due to excessively high k values and cross-attention injection values. Additionally, it underutilizes self-attention injection. Furthermore, the current method lacks cycle consistency. To address these shortcomings, we propose a new framework: the <em>“CL P2P”</em> prompt-to-prompt image editing framework. This framework offers several key improvements over the existing method:</p> <p><strong>Optimization of Critical Hyperparameters</strong>: Our research indicates that optimizing the values of critical hyperparameters results in higher prompt-to-prompt image editing precision and a more accurate similarity between the reference and target images for desired features. We propose the following adjusted values, particularly for editing faces and hairstyles:</p> <ul> <li>Local editing (silhouette parameter k): 0.0</li> <li>Cross-attention injection (cross replace steps): 0.2</li> <li>Self-attention injections (self-replace steps): 0.8</li> </ul> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2016%20-%20Current%20vs%20new%20method%20comparision%20of%20output-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2016%20-%20Current%20vs%20new%20method%20comparision%20of%20output-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2016%20-%20Current%20vs%20new%20method%20comparision%20of%20output-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2016%20-%20Current%20vs%20new%20method%20comparision%20of%20output.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>By selecting these values, the following changes are introduced to the prompt-to-prompt editing method:</p> <ul> <li><span style="color:red">Remove</span>: Local editing can be removed from the method, as it did not lead to significant improvements compared to the precision achieved by the elongated injection of self-attention.</li> <li><span style="color:orange">Reduce</span>: The cross-attention (query-key-value attention) injection should be reduced to allow greater geometric adaptability and better convergence between the reference and target images.</li> <li><span style="color:green">Increase</span>: Self-attention injection should be substantially elongated from 20% to 80% of the diffusion steps. This is crucial, especially for editing hairstyles, as it allows for the greatest geometric adaptability and ensures the convergence between desired reference and target image features.</li> </ul> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2015%20-%20Current%20vs%20new%20method-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2015%20-%20Current%20vs%20new%20method-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2015%20-%20Current%20vs%20new%20method-1400.webp"/> <img src="/staging/assets/img/2023-11-07-prompt-to-prompt/analysis/Figure%2015%20-%20Current%20vs%20new%20method.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><strong>Addressing Cycle-Inconsistency</strong>: To remedy the cycle-inconsistency, we propose balancing the asymmetry of the current method with regards to the V values of the underlying transformer model. The current method is cycle-inconsistent, even though the same embeddings are used for both the reference and target prompts. Traditionally, the method has only employed the V values of the reference prompt, neglecting those of the target prompt. This characteristic likely introduces asymmetry, breaking the cycle-consistency of the model. We propose an additional injection mechanism for the “CL P2P” framework, a V value injection method, allowing for the consideration of both the V values of the reference and target images. To control the number of injection steps, we introduce an additional hyperparameter, “V value injection steps”. The V value injection function is defined based on the logic highlighted in the footnote of the image.</p> <h2 id="future-work">Future work</h2> <p>The development of the “CL P2P” framework is a significant advancement in prompt-to-prompt image editing methods. However, there are still areas where further research will be needed. A critical area of exploration lies in the enhancement of cycle-consistency within the prompt-to-prompt editing process. Further research is required to ascertain and refine the optimal values for the V value injection steps, a key component in achieving cycle-consistency.</p> <p>Additionally, the existing frameworks predominantly focus on singular reference and target prompts. While this approach has opened new pathways in human-computer interaction, several research questions remain unexplored. A notable inquiry is the potential to integrate various prompt-to-prompt editing methods, such as “word swap”, “attention re-weighting,” and “prompt refinement.” This integration aims to facilitate a dynamic, conversational interaction between users and generated images, enabling a continuous and iterative editing process. Current state-of-the-art generative image models, such as mid-journey models, do not inherently support such iterative mechanisms. The realization of this functionality necessitates extensive research and development, offering an exciting challenge for future advancements in the field.</p> <h2 id="conclusion">Conclusion</h2> <p>Image generation models, inherently stochastic in nature, exhibit variability in outcomes even when similar prompts are applied. This stochasticity can result in significant deviations in the generated images. For instance, prompts like “A woman’s face with blond hair” and “A woman’s face with red hair” may yield images with markedly different facial features, demonstrating the algorithm’s underlying randomness.</p> <p>In response to this challenge, prompt-to-prompt image generation and editing techniques have emerged as a significant area of interest in recent years. These methods, while constituting a potent tool in the arsenal of image editing alongside fine-tuning, semantic mixing, and masking approaches, are not without limitations. Specifically, the precision of edits and the geometric alignment between reference and target images often fall short of expectations.</p> <p>Our research delves into the influence of critical hyperparameters on the outcomes of a cross-attention-based prompt-to-prompt method. We aimed to dissect the impact of each hyperparameter on image editing and geometric adaptation between the reference and target images. Our findings make substantive contributions to enhancing the precision and geometric convergence in prompt-to-prompt methods, with the following key insights:</p> <ul> <li>An extensive analysis of three critical hyperparameters (silhouette selection, cross-attention injection, and self-attention injection) was conducted, focusing on their effect on the precision of an attention-based prompt-to-prompt editing method.</li> <li>Contrary to existing literature<d-cite key="hertz2022prompttoprompt"></d-cite>, our study reveals that self-attention injection plays a more pivotal role than previously recognized. We recommend incorporating self-attention injection from the reference image for approximately 80% of the diffusion steps during the target image generation process.</li> <li>We introduce the novel <em>“CL P2P”</em> framework, designed to elevate the efficacy of prompt-to-prompt editing.</li> </ul> <p>Our research not only deepens the understanding of prompt-to-prompt editing methods but also achieves enhanced editing precision and improved similarity between reference and target images.</p> <p>Looking ahead, the <em>“CL P2P”</em> framework paves the way for further exploration, particularly in addressing the cycle consistency of prompt-to-prompt methods. Additionally, exploring strategies to seamlessly integrate different prompts into a continuous dialogue could revolutionize human-computer interaction, enabling users to edit generated images through conversational engagement.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-07-prompt-to-prompt.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/proposal-1/index.html b/blog/2023/proposal-1/index.html
new file mode 100644
index 00000000..e7b5ee7f
--- /dev/null
+++ b/blog/2023/proposal-1/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Applications of Deep Learning in Timbre Transfer | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Exploring musical timbre transfer by leveraging prior art in differential digital signal processing (DDSP) and modern deep learning structures."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/proposal-1/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Applications of Deep Learning in Timbre Transfer",
+      "description": "Exploring musical timbre transfer by leveraging prior art in differential digital signal processing (DDSP) and modern deep learning structures.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Yohan Guyomard",
+          "authorURL": "https://yohandev.github.io",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Applications of Deep Learning in Timbre Transfer</h1> <p>Exploring musical timbre transfer by leveraging prior art in differential digital signal processing (DDSP) and modern deep learning structures.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#what-is-sound">What is Sound?</a></div> <div><a href="#timbre-transfer">Timbre Transfer</a></div> <div><a href="#proposed-model">Proposed Model</a></div> <div><a href="#encoder">Encoder</a></div> <div><a href="#decoder">Decoder</a></div> <div><a href="#dataset">Dataset</a></div> <div><a href="#loss">Loss</a></div> <div><a href="#results">Results</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/spectrogram-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/spectrogram-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/spectrogram-1400.webp"/> <img src="/staging/assets/img/2023-11-09-ddsp-proposal/spectrogram.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><em>Timbre</em> is what distinguishes a flute from a trumpet, piano or any other musical instrument. Even if two performers play the same note, there is no ambiguity in the tone of their instruments. But unlike pitch (frequency) or amplitude (loudness), <em>timbre</em> is not a trivial metric; rather, it pertains much more to subjective qualities like <em>raspiness</em>, <em>articulation</em> and even musical <em>intent</em>. In this article, I’ll be discussing different data-driven approaches to extracting and manipulating this quality of sound using deep learning.</p> <audio controls=""> <source src="/staging/assets/video/2023-11-09-ddsp-proposal/brass.wav" type="audio/mpeg"/> Your browser does not support the audio element. </audio> <audio controls=""> <source src="/staging/assets/video/2023-11-09-ddsp-proposal/flute.wav" type="audio/mpeg"/> Your browser does not support the audio element. </audio> <p>In particular I’d like to explore <em>timbre transfer</em>, where one instrument is made to sound like another while retaining most aspects of the original performance. I’ll be training an auto-encoder architecture first conditioned on the source instrument (whistling) then tuned to tracks of trumpets to achieve whistling-to-trumpet timbre transfer. Moreover, I’d like to reduce the complexity of previous architectures to achieve realtime results suitable for musical performance.</p> <p>First, some context on sound and our perception thereof.</p> <h2 id="what-is-sound">What is Sound?</h2> <p>Our ears are sensitive to changes in air pressure over time, which we perceive as sound. Digital audio is analogous to this phenomenon, where its representation is a sequence of samples usually in the <code class="language-plaintext highlighter-rouge">[-1, 1]</code> range and discretized at a frequency high enough that it becomes indistinguishable from natural sources. This is known as the time domain, however all signals can be mapped to the frequency domain where the individual sinusoids that compose it are graphed against their respective amplitudes. Below is a Fourier transform <d-cite key="1"></d-cite> applied to the sound of a trumpet from above:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/brass_freq-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/brass_freq-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/brass_freq-1400.webp"/> <img src="/staging/assets/img/2023-11-09-ddsp-proposal/brass_freq.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <audio controls=""> <source src="/staging/assets/video/2023-11-09-ddsp-proposal/brass.wav" type="audio/mpeg"/> Your browser does not support the audio element. </audio> <p>It turns out that only the bottom-most frequency, \(f_0\), informs our ears of this note’s <em>pitch</em>. In fact, a pure sine wave at that frequency will sound <em>similar</em> to the trumpet.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/sine_freq-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/sine_freq-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/sine_freq-1400.webp"/> <img src="/staging/assets/img/2023-11-09-ddsp-proposal/sine_freq.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <audio controls=""> <source src="/staging/assets/video/2023-11-09-ddsp-proposal/sine.wav" type="audio/mpeg"/> Your browser does not support the audio element. </audio> <p>The distinction between the trumpet and sine wave lies in the frequencies above \(f_0\), known as overtones. Moreover, certain musical instruments exhibit an interesting <em>harmonic</em> behavior where only the overtones that are multiples of \(f_0\) are actually prominent; this is the case for most instruments you could name, though some non-examples include the gong and timpani <d-cite key="2">2</d-cite>. Below is a spectrogram, which displays the frequency-domain of a signal over time. Observe the estimated \(f_0\) (implemented using the YIN algorithm <d-cite key="3"></d-cite>) and how its multiples (\(2 * f_0\), \(3 * f_0\), etc) evolve over time.</p> <div class="l-page"> <iframe src="/staging/assets/html/2023-11-09-ddsp-proposal/spectrogram.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <p><em>Try playing the audio clip above, whistle into the spectrogram or record your own instrument! The horizontal axis is time and vertical axis is frequency</em></p> <p>So how do overtones relate to timbre? Well, the harmonic series is the most obvious distinguishing factor between different instruments playing the same pitch, <strong>so we could model timbre as the evolution of \(f_0\) and its overtones’ amplitudes over time</strong>. Note that this is assuming a strictly monophonic context (one note at a time), and overlooks non-harmonic parts of the signal (e.g. a flutist’s breathing). So this representation will still sound synthetic but it forms a good basis for what we’re trying to achieve.</p> <h2 id="timbre-transfer">Timbre Transfer</h2> <p>Perhaps the most obvious method for achieving timbre transfer is approximating the pitch of the source audio (as demonstrated above) and recreating it using a synthetic MIDI instrument. However, this discards much of the expressiveness which isn’t desireable in a musical performance.</p> <p>Rather, data-driven approaches have shown promise in audio synthesis <d-cite key="6"></d-cite> and existing deep learning architectures can be repurposed to achieve <em>nuanced</em> timbre transfer to various degrees of success. <d-cite key="5"></d-cite> treats timbre transfer as an image-to-image problem, leveraging a Conditional Adversarial Networks architecture <d-cite key="7"></d-cite> trained on natural images to transform spectrograms of audio signals. <d-cite key="4"></d-cite> uses a Denoising Diffusion Implicit Model (DDIM) to achieve similar results. The audio is then synthesized from these spectrograms using the Inverse Fourier Transform or another neural network.</p> <table> <tr> <th>Keyboard</th> <th>Guitar</th> <th>String</th> <th>Synth Lead</th> </tr> <tr> <td><img src="/staging/assets/img/2023-11-09-ddsp-proposal/keyboard_acoustic.png" width="200" height="200"/></td> <td><img src="/staging/assets/img/2023-11-09-ddsp-proposal/guitar_acoustic.png" width="200" height="200"/></td> <td><img src="/staging/assets/img/2023-11-09-ddsp-proposal/string_acoustic.png" width="200" height="200"/></td> <td><img src="/staging/assets/img/2023-11-09-ddsp-proposal/synth_lead_synthetic.png" width="200" height="200"/></td> </tr> </table> <p><em>Images courtesy of <d-cite key="5"></d-cite></em></p> <p>However, these methods rely on a dataset of audio tracks in two timbre domains, namely audio synthesized from MIDI instruments like in <d-cite key="9"></d-cite> since recordings of performers will never match exactly. The results thereby <em>sound</em> synthetic; a better architecture would thus be self-supervised and trained on acoustic performances directly.</p> <h2 id="proposed-model">Proposed Model</h2> <p>I experimented with an auto-encoder architecture, where a network is trained to minimize the audible difference between some input audio track \(x\) and its re-synthesized counterpart \(\hat{x}\); so, the model attempts to recreate its input \(x\) by first encoding it to some latent representation \(z\) and decoding back to audio. Note that although over-fitting is possible, a one-to-one mapping (or, <em>cheating</em>) is impossible because \(z\) <em>bottlenecks</em> (has less dimensions than) \(x\). The appeal of this approach is that the problem is now self-supervised and can be trained directly on musical performances of the <em>source</em> instrument (e.g. whistling).</p> <p>Next, the encoder is frozen (unaffected by gradient descent) and the decoder is trained anew on samples of the <em>target</em> instrument (e.g. trumpet). So, the networks knows how to encode the <em>source</em> instrument to some \(z\), and hopefully its decoder has adapted to map \(z\) onto the <em>target</em> instrument.</p> <p>The decoder doesn’t output audio directly, nor does it generate a spectrogram like in <d-cite key="5"></d-cite>. Rather, it controls parameters of a harmonic oscillator proposed by <d-cite key="10"></d-cite> which follows the intuition of timbre as discussed prior; that is, the oscillator has parameters for its \(f_0\) and the amplitudes of each harmonic overtone. Leveraging this strong inductive bias should reduce the size of the neural network enough to be applicable to realtime performances.</p> <p>The encoder architecture is taken from <d-cite key="11"></d-cite>, whose original application is tracking pitch; I don’t track pitch explicitely, rather <d-cite key="11"></d-cite> demonstrates that CNNs can extract meaningful data from audio directly in the time domain. The issue with working in the frequency domain is shown in <d-cite key="12"></d-cite>, where we’d need a high sampling rate (and thus the network needs to be that much faster) for high frequencies or a long sampling window (which yields a network with more parameters) for low frequencies. Note that there is a nice compromise to these issues by windowing the inputs and outputs <d-cite key="13"></d-cite>, which I’d like to try later.</p> <p>Finally, the loss I’m using is multi-scale spectrogram loss proposed in <d-cite key="10"></d-cite>, which computes the L1 loss of two audio tracks in the frequency-domain on both a linear and log scale.</p> <h2 id="encoder">Encoder</h2> <p>The architecture of my model is largely inspired by Magenta’s Differentiable Digital Signal Processing (DDSP) <d-cite key="10"></d-cite> paper, where differentiable sound processors are introduced. Although modules like reverb and a finite-impulse response (FIR) filter are included, I’m only experimenting with its harmonic oscillator for simplicity. The architecture proposed by <d-cite key="10"></d-cite> is also an auto-encoder, however its latent representation is built on two heuristics (pitch, amplitude) rather than the audio itself. Despite this, <d-cite key="10"></d-cite> is able to achieve natural sounding instruments but its controls are limited in expression, much like MIDI inputs. Realtime Audio Variational autoEncoder (RAVE) <d-cite key="15"></d-cite> builds upon this by encoding a multiband decomposition of the source audio, or a collection of Fourier transforms with varying amount of bins to overcome limitations of the Nyquist frequency and limited precision of discretization. A single Fourier transform operates on a linear scale, where its frequency bins scale from \(0\) to its Nyquist frequency. However, humans hear on a logarithmic scale (i.e. A4 is \(440 \text{Hz}\) but an octave above that is \(880 \text{Hz}\)) so the transform has a bias towards low frequencies. Multiband decomposition approaches this by shifting the frequency bins using different window sizes of audio and letting the network generalize over the complete frequency spectrum. However, although <d-cite key="15"></d-cite> has shown some incredible results and claims to run in realtime, that is not the case in practice <d-cite key="16"></d-cite>.</p> <p>In my experiment, I leverage a Convolutional Representation for Pitch Estimation (CREPE) <d-cite key="11"></d-cite>; it is a CNN-based pitch estimator that operates directly on the time-domain of an audio signal and achieves state of the art results. Rather than using its output, like in <d-cite key="10"></d-cite>, I use its latent representation and train the network to generalize over more characteristics of sound than just pitch.</p> <h2 id="decoder">Decoder</h2> <d-cite key="10"></d-cite> <p>introduced the idea of using oscillators for audio synthesis as opposed to raw waveform modeling. <d-cite key="10"></d-cite> demonstrates that their architecture benefits from this inductive bias and is able to be significantly reduced in size. I wanted to experiment with the encoder for the part, so the decoder of my model remains unchanged from the original paper (for the most part). It consists of several dense layers, ReLU activation functions and layer normalization. In between these is a Gated Recurrent Unit (GRU). The harmonic oscillator from <d-cite key="10"></d-cite> cannot produce sinusoids out of phase (the instantaneous phase is accumulated at each time step) but presumably the network needs <em>some</em> time dependency to form an audio envelope.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/adsr-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/adsr-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-ddsp-proposal/adsr-1400.webp"/> <img src="/staging/assets/img/2023-11-09-ddsp-proposal/adsr.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><em>Image courtesy of Tellef Kvifte</em></p> <h2 id="dataset">Dataset</h2> <p>I trained the target instrument auto-encoder on the URMP dataset <d-cite key="17"></d-cite>, which consists of individual recordings of performers across a variety of instruments. Specifically, I wrote a dataloader that selects only trumpet solo tracks and randomly samples a 4 second clip from each of them. The audio is down-sampled to \(16\text{kHz}\) because the dataset doesn’t contain many frequencies above \(8\text{kHz}\) and the reduced dimensionality allows for training on my M2 MacBook Air with a batch size of 16!</p> <p>I also created my own whistling dataset, sampled from MIT students with varying levels of proficiency. The audio clips are normalized, silence is cutout and altogether I have around 2 hours of data.</p> <h2 id="loss">Loss</h2> <p>Like <d-cite key="10"></d-cite> and <d-cite key="15"></d-cite>, I focus on <em>perceptual</em> loss which approximates human hearing. So, comparing waveforms in the time-domain would not work because humans aren’t sensitive to changes in phase whereas the signal changes drastically. I extend upon the multi-scale spectrogram loss proposed by <d-cite key="10"></d-cite>, which consists of taking the L1 norm of the two inputs’ spectrograms (so phase is discarded) in both the linear and log domain. Note that human hearing is logarithmic, but spectrograms are not. I experiment upon this by employing the log Mel spectrogram <d-cite key="8"></d-cite> which is an even better approximation of human hearing and used by <d-cite key="4"></d-cite>, <d-cite key="18"></d-cite> and <d-cite key="19"></d-cite>.</p> <h2 id="results">Results</h2> <p>I trained 500 epochs of 16 times 4 second samples on a single M2 MacBook Air with Metal acceleration, totaling around 10 hours. Unfortunately, the loss converged but the network was not able to generalize over abstract characteristics of sound as I’d hoped. Rather, it learned to represent sound as a mellow mix of harmonics instead of anything useful. I think future experiments should penalize silence (or close to it), and perhaps add skip connections from the inputs’ power (explicitely calculated) to the decoder. Moreover, the size of the encoder was drastically reduced (a few orders of magnitude less parameters in both width and depth) so it’s possible the latent representation did not contain much meaningful data.</p> <audio controls=""> <source src="/staging/assets/video/2023-11-09-ddsp-proposal/epoch0.wav" type="audio/mpeg"/> Your browser does not support the audio element. </audio> <audio controls=""> <source src="/staging/assets/video/2023-11-09-ddsp-proposal/epoch250.wav" type="audio/mpeg"/> Your browser does not support the audio element. </audio> <audio controls=""> <source src="/staging/assets/video/2023-11-09-ddsp-proposal/epoch470.wav" type="audio/mpeg"/> Your browser does not support the audio element. </audio> <p>Sample synthesized waveforms at epochs 0, 250, and 470 respectively (loud sounds warning!).</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-proposal-1.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/proposal-2/index.html b/blog/2023/proposal-2/index.html
new file mode 100644
index 00000000..c605aae7
--- /dev/null
+++ b/blog/2023/proposal-2/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Using Synthetic Data to Minimize Real Data Requirements | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Data acquisition for some tasks in synthetic biology can be cripplingly difficult to perform at a scale necessary for machine learning... so what if we just made our data up?*"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/proposal-2/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Using Synthetic Data to Minimize Real Data Requirements",
+      "description": "Data acquisition for some tasks in synthetic biology can be cripplingly difficult to perform at a scale necessary for machine learning... so what if we just made our data up?*",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Carlos Celeste Jr.",
+          "authorURL": "celeste8@mit.edu",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Nick Nolan",
+          "authorURL": "ncn@mit.edu",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Using Synthetic Data to Minimize Real Data Requirements</h1> <p>Data acquisition for some tasks in synthetic biology can be cripplingly difficult to perform at a scale necessary for machine learning... so what if we just made our data up?*</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <ul> <li><a href="#enter-machine-learning">Enter Machine Learning</a></li> </ul><div><a href="#methods">Methods</a></div> <ul> <li><a href="#problem-formulation">Problem Formulation</a></li> <li><a href="#data-acquisition">Data Acquisition</a></li> <li><a href="#training-testing">Training & Testing</a></li> </ul><div><a href="#results-analysis">Results & Analysis</a></div> <ul> <li><a href="#experiment-1">Experiment 1</a></li> <li><a href="#experiment-2">Experiment 2</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <p>*And used it as the basis for transfer learning with the real data that someone put hard work in to generate.</p> <h2 id="introduction">Introduction</h2> <p>Synthetic biology is a burgeoning field of research which has attracted a lot of attention of the scientific community in recent years with the advancement of technologies that enable the better understanding and manipulation of biological systems. A significant contributor to its steadily increasing popularity is the diverse array of potential applications synthetic biology may have, ranging from curing cancer, to addressing significant climate issues, to colonizing other planets<d-cite key="lim2022reprogramming"></d-cite><d-cite key="delisi2020role"></d-cite><d-cite key="conde2020synthetic"></d-cite>. But, to effectively manipulate these biological systems, it is necessary to understand how they work and how they interact with other biological systems — it has been shown time and time again that a system characterized in isolation, compared to the same system in a broader, non-isolated context, will not perform identically<d-cite key="qian2017resource"></d-cite><d-cite key="gyorgy2015isocost"></d-cite>. This necessitates models that can predict a system’s behavior given both stimuli <em>and</em> context.</p> <p>In the synthetic biology literature, the behavior of many systems is characterized by the chemical reactions that take place; these reactions consist most frequently of the so-called central dogma of biology, in which DNA produces RNA, which produces proteins. These proteins are then free to perform almost every function within a cell, including — most notably for us — regulation of DNA. By varying the extent and nature of this regulation, these systems yield mathematical models that range from simple linear systems to highly complex nonlinear dynamical systems:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal-2/fig1.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 1: A simple model of the central dogma of biology: a stretch of DNA is used to create a strand of messenger RNA, which is used to create a functional protein. Functional proteins are responsible for almost all operations within the cell, from cellular movement to RNA production and everything in between. </div> <p>However, the figure above does not capture the full purview of the cell; it neglects factors that synthetic biologists know to be critical to the process of protein expression, as well as factors that have not been characterized rigorously yet. The process of analyzing the behavior of a system at the fullest level of detail necessary to encapsulate these intricate dynamics is expensive and time-consuming, and requires significant experimental data to validate — not to mention the fact that, as was mentioned, there are some factors which we simply don’t know about yet. Protein production is an immense and complex task, and identifying its critical parameters at the highest level of detail is no small feat.</p> <h3 id="enter-machine-learning">Enter Machine Learning</h3> <p>With this in mind, many synthetic biologists are experimenting with characterizing system behavior, especially when augmenting pre-existing models to include newly discovered phenomena, using machine learning and neural networks, due to their universal function approximator property. In this fashion, we may be able to better abstract the levels of biological detail, enabling better prediction of the composition of two genetic circuits.</p> <p>Unfortunately, training neural networks also requires (surprise surprise!) substantial experimental data, which is taxing on both a researcher’s budget and time — for a small lab with few researchers working, a single experiment may take upwards of 12 hours of attentive action, while yielding only up to 96 data points for training. Some large-scale gene expression data has been collected to assist in the development of machine learning algorithms; however, this data is focused largely on the expression of a static set of genes in different cellular contexts — rather than on a dynamic set of genes being assembled — and is therefore insufficient to address the questions of composition that are being posed here.</p> <p>This leads us to a fundamental question: <strong>can we use transfer learning to reduce the experimental data we need for training by pre-training on a synthetic dataset which uses a less-detailed model of our system?</strong> In other words, can we still derive value from the models that we know don’t account for the full depth of the system? If so, <strong>what kinds of structural similarities need to be in place for this to be the case?</strong></p> <p>In this project, we aim to address each of these questions; to do this, we will first pre-train a model using simpler synthetic data, and use this pre-trained model’s parameters as the basis for training a host of models on varying volumes of our more complex real data. Then, we will consider sets of more complex real data that are less structurally similar to our original synthetic data, and see how well our transfer learning works with each of these sets.</p> <p>In theory, since the synthetic data from the literature uses models that have already captured some of the critical details in the model, this fine-tuning step will allow us to only learn the <em>new</em> things that are specific to this more complex model, thus allowing transfer learning to be successful. As the two underlying models become increasingly distant, then, one would expect that this transfer will become less and less effective.</p> <h2 id="methods">Methods</h2> <h3 id="problem-formulation">Problem Formulation</h3> <p>Consider we have access to a limited number of datapoints which are input-output $(x_i,y_i)$ pairs for a biological system, and we want to train a neural network to capture the system behavior. The experimental data for the output $y_i$ we have is corrupted by an additive unit gaussian noise, due to white noise and measurement equipment precision. Moreover, we consider that we also have access to a theoretical model from another biological system which we know to be a simplified version of the one in our experiments, but which explicitly defines a mapping $\hat y_i = g(x_i)$.</p> <p>Our goal is thus to train a model $y_i = f(x_i)$ to predict the real pairs while using minimal real pairs of data $(x_i, y_i)$. Instead, we will pre-train with $(x_i, \hat y_i)$ pairs of synthetic data, and use our real data for fine-tuning.</p> <h3 id="data-acquisition">Data Acquisition</h3> <p>In this work we will additionally consider a domain shift between two datasets, which we will refer to as the big domain and the small domain. In the big domain, our inputs will vary between 0 and 20nM, and in the small domain the inputs will vary between 0 and 10nM. These domains represent the ranges for the inputs in the experiments in the small domain, which may be limited due to laboratory equipment, and the desired operation range of the systems in the big domain.</p> <p>Furthermore, <strong>for all datasets - pre-training, fine-tuning, or oracle training - we will be generating synthetic data for training and testing purposes.</strong> We will use different levels of complexity to simulate a difference between experimentally-generated and computationally-generated data. In a real setting, we would use the complex model $f$ that we’re trying to learn here as the simple, known model $g$ in our setup. Going forward, we will refer to the data generated by our low-complexity model $g$ as “synthetic” data, and to the data generated by our high-complexity model as “real” or “experimental” data.</p> <p>For our low-complexity theoretical model, we consider the simplest gene expression model available in the literature, in which the input $x_i$ is an activator, and the output $y_i$ is given by the following Hill function:</p> \[y_i = \eta_i \frac{\theta_i x_i}{1 + \Sigma_{j=1}^2 \theta_j x_j}, i\in {1,2},\] <p>where our $\eta_i$’s and $\theta_i$’s are all inherent parameters of the system.</p> <p>For the first experimental model, we consider a more complex gene expression model, where the activator $x_i$ must form an $n$-part complex with itself before being able to start the gene expression process, which yields the following expression for the output $y_i$:</p> \[y_i = \eta_i \frac{(\theta_i x_i)^n}{1 + \Sigma_{j=1}^2 (\theta_j x_j)^n}, i\in {1,2},\] <p>where - once again - our $\eta_i$’s and $\theta_i$’s are all inherent parameters of the system. Note that, at $n=1$, our real model is identical to our synthetic model. As one metric of increasing complexity, we will vary $n$ to change the steepness of the drop of this Hill function.</p> <p>As an additional test of increased complexity, we will consider a phosphorylation cycle in which inputs $x_i$ induce the phosphorylation or dephosphorylation of a given protein. We take the dephosphorylated protein to be an output $y_1$, and the phosphorylated protein to be a secondary output $y_2$, for which we have:</p> \[y_i = y_{tot} \frac{\theta_i x_i}{\Sigma_{j=1}^2 \theta_j x_j}, i\in {1,2},\] <p>in which $\theta_i$’s and $y_{tot}$ are each system parameters. Note that the only functional difference between this system and the synthetic data generation system lies in the denominator of each, as one has a nonzero bias term, where the other does not.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal-2/fig2.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 2: Graphical representation of the three different synthetic or experimental models used in this project. In the first diagram, our input protein $x_i$ is activating the production of an output protein $y_i$. This is the simplest model of which we can conceive, and constitutes our synthetic data. In the second diagram, two copies of our input protein $x_i$ come together to form a complex that induces the production of our output protein $y_i$. This is a step up in complexity, and varying the number of proteins that come together allows us to introduce more and more complexity into our system. Finally, a single protein which can be either of our outputs $y_1$ or $y_2$ is moved between these states by our two input proteins $x_1$ and $x_2$. This system, while seemingly very dissimilar from the above two, winds up being mathematically not too far off, and offers another model on which to transfer our learning. </div> <h3 id="training--testing">Training &amp; Testing</h3> <p>For each experiment, we trained MLPs composed of 5 hidden layers with 10 nodes each and a ReLU activation function.</p> <p>For the first experiment, we performed transfer learning by pre-training our model for 90% of the total number of epochs (1800/2000) with the synthetic data sampled from the big domain, where we have a high quantity of data points (40000 $(x_i, y_i)$ pairs); for the remaining 10% of epochs, the network was trained on the experimental data sampled from the small domain, with varying numbers of data points used for training. This can be compared to a model trained exclusively on the same volume of experimental data for a full 2000 epochs, to establish a baseline level of performance. An oracle model was trained for all 2000 epochs on experimental data sampled from the big domain with a high volume of data, and serves as the best-case performance of our model.</p> <p>For the second experiment, we followed a very similar protocol as in the first experiment; the critical difference here lies in the fact that, where the fine-tuning step used different volumes of data in the previous case, we now instead use a fixed data volume (1000 $(x_i, y_i)$ pairs), and fine-tune on a host of different models of varying complexity relative to the synthetic model.</p> <p>To evaluate performance of our neural networks, we uniformly sample 100 points from the big domain, for which we calculate the L1 loss mean and variance between the network predictions and the experimental model output.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig5-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal-2/fig5.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 3: A visual example of the training done - on the right is the intended function to be learned, where the left features the output of one of the models that was trained with transfer learning. </div> <h2 id="results--analysis">Results &amp; Analysis</h2> <h3 id="experiment-1">Experiment 1</h3> <p>As was mentioned before, the first experiment was targeted towards addressing the question of whether we can pre-train a model and use transfer learning to reduce the volume of real data needed to achieve a comparable standard of accuracy. To this end, we trained several models with a fixed volume of pre-training data, and varied the volume of fine-tuning data available to the model.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal-2/fig3.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 4: Bar plots of model loss as the volume of fine-tuning (blue) or training (orange) data increases. As can be seen, at high volumes, the blue bars reach a lower loss than the orange bars, suggesting that transfer learning is effective at taking high volumes of data, and improving them further. For very low volumes, these two models are roughly equivalent, although the orange bars have a significantly higher variance than the blue bars. Somewhere in between, a transition occurs, and transfer learning outpaces learning without prior knowledge of anything. </div> <p>As can be seen in the blue bars of Figure 4, the greater the volume of real data coupled with transfer learning, the lower the loss, and the better the performance. This is to be expected, but this curve helps to give a better sense regarding how quickly we approach the limit of best-case performance, and suggests that the volume of real data used for oracle training could cut be cut down by nearly an order of magnitude while achieving comparable performance. One might argue that this is because the volume of real data used in this training is itself sufficient to effectively train this model; to that end, we consider the orange bars, which represent the loss of models trained for 2000 epochs exclusively on the given volume of real data. This, coupled with the blue bars, suggests that - across all volumes of data - it is, at the very least, more consistent to use transfer learning. Models trained for that duration on exclusively real data sampled from the small domain tended to overfit, and had a much higher variance as a result. As the volume of real data used for fine-tuning increased, the difference between the two regimes of transfer vs. non-transfer learning became more pronounced, and the benefits of transfer learning become more noticeable. Thus we conclude that we can use transfer learning to cut down on the quantity of real data needed, while sacrificing relatively little up to a ~75% cut of data requirements.</p> <h3 id="experiment-2">Experiment 2</h3> <p>Next, we wish to address the question of how structurally dissimilar a model can be while still making this transfer learning effective. To this end, we varied $n$ from our first experimental model, and generated data with our second experimental model. In each case, we performed a ~95% cut in the volume of real data relative to the volume of data used to train each oracle.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal-2/fig4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal-2/fig4.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 5: Bar plots of model loss as the model being learned is varied, as a means of representing increases in complexity or structure. As can be seen, within this range of complexity variation, transfer learning is consistently able to learn the system to a comparable degree across all cases. </div> <p>In Figure 5, we compare the loss of models trained with transfer learning to oracles for each - as can be seen, the transfer learning models performed consistently across all models being learned, and the oracles of each were similarly consistent. This suggests that the architectures of the models being learned are sufficiently similar that the transfer learning is effective, which is a promising sign for more applications in which the system being learned has been simplified significantly in its mathematical models.</p> <h2 id="conclusion">Conclusion</h2> <p>Ultimately, we’ve developed a method by which to potentially reduce the volume of experimental data needed to effectively train a machine learning model by using synthetic data generated by a lower-complexity model of the system. We’ve demonstrated that it has the potential to cut down data requirements significantly while still achieving a high level of accuracy, and that the simple system used to generate data in the sense that the learning process can shore up some substantial structural differences betwen the simple and complex system. These findings are not necessarily limited strictly to synthetic biological learning tasks, either - any complex, data-starved phenomenon in which there is a simpler model to describe parts of the system may find value in this. Looking forward, one can consider deeper structural dissimilarities, as well as application with real synthetic biological data, rather than simply using two models of increasing complexity.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-proposal-2.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/proposal/index.html b/blog/2023/proposal/index.html
new file mode 100644
index 00000000..36d7ac98
--- /dev/null
+++ b/blog/2023/proposal/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Graph neural networks v.s. transformers for geometric graphs | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="With the recent development of graph transformers, in this project we aim to compare their performance on a molecular task of protein-ligand binding affinity prediction against the performance of message passing graph neural networks."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/proposal/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Graph neural networks v.s. transformers for geometric graphs",
+      "description": "With the recent development of graph transformers, in this project we aim to compare their performance on a molecular task of protein-ligand binding affinity prediction against the performance of message passing graph neural networks.",
+      "published": "November 1, 2023",
+      "authors": [
+        {
+          "author": "Ada Fang",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Graph neural networks v.s. transformers for geometric graphs</h1> <p>With the recent development of graph transformers, in this project we aim to compare their performance on a molecular task of protein-ligand binding affinity prediction against the performance of message passing graph neural networks.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background-and-relevant-work">Background and relevant work</a></div> <ul> <li><a href="#graph-neural-networks-on-molecules">Graph neural networks on molecules</a></li> <li><a href="#graph-transformers-on-molecules">Graph transformers on molecules</a></li> </ul><div><a href="#motivation">Motivation</a></div> <div><a href="#problem-definition">Problem definition</a></div> <div><a href="#dataset">Dataset</a></div> <div><a href="#architecture">Architecture</a></div> <ul> <li><a href="#graph-neural-network">Graph neural network</a></li> <li><a href="#graph-transformer">Graph transformer</a></li> <li><a href="#loss-function">Loss function</a></li> </ul><div><a href="#experiments">Experiments</a></div> <div><a href="#discussion">Discussion</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Machine learning on graphs is often approached with message passing graph neural network (GNN) models, where nodes in the graph are embedded with aggregated messages passed from neighboring nodes <d-cite key="zhou2020graph"></d-cite>. However, with the significant success of transformers in language modelling <d-cite key="vaswani2017attention"></d-cite> and computer vision recently <d-cite key="dosovitskiy2020image"></d-cite>, there are a growing number of transformers developed for graphs as well. In this project we investigate the application of graph neural networks compared to transformers on geometric graphs defined on point clouds. We aim to explore the performance of these two models on predicting the binding affinity for a protein-ligand interaction given the atomic coordinates of the docked protein-ligand structure, which is a highly relevant task in drug discovery. This blog post walks through an introduction into graph neural networks and transformers on molecules, our model architecture, experimental results, and a discussion comparing the two architectures.</p> <h2 id="background-and-relevant-work">Background and relevant work</h2> <h3 id="graph-neural-networks-on-molecules">Graph neural networks on molecules</h3> <p>Graphs are comprised of nodes and edges, and we can model any set of objects with a defined connectivity between them as a graph. For example, social networks are a set of people and the connectivity between them is defined by on who knows who. We can also see that grid data formats, like images, are also graphs where each pixel is a node and edges are defined to the adjacent pixels. Any sequential data, such as text, can be modeled as a graph of connected words. In this section we focus on graphs of molecules where nodes are atoms and edges are defined between atoms. These edges are often defined by the molecular bonds, or for atoms with 3D coordinate information the edges can be defined by a spatial cutoff $d$ based on the Euclidean distance between nodes. Given a graph we can use a graph neural network to learn a meaningful representation of the graph and use these representations for predictive tasks such as node-level prediction, edge-level prediction, or graph-level prediction. Graph neural networks learn through successive layers of message passing between nodes and their neighboring nodes.</p> <p>An important property of many GNNs applied on 3D molecules is SE(3)-equivariance. This means that any transformation of the input in the SE(3) symmetry group–which includes all rigid body translations and rotations in $\mathbb{R}^3$ –will result in the same transformation applied to the output. This property is important for the modelling of physical systems; for example if the prediction task is the force applied on an atom in a molecule, rotation of the molecule should result in the model predicting the same forces but rotated. In some tasks we do not need equivariance but rather SE(3)-invariance (which is a subset of SE(3)-equivariance) where any transformation of the input in the SE(3) symmetry group results in the same output. This is often the case when the task of the model is to predict a global property of the molecule which should not change if all 3D coordinates of the molecule are translated and rotated. SE(3)-invariance will be required for our model of binding affinity as global rotations and translations of the protein-ligand structure should yield the same predicted binding affinity.</p> <p>Early SE(3)-equivariant GNNs on point clouds used directional message passing <d-cite key="gasteiger2020directional"></d-cite> which used the pairwise distance and direction between nodes as features for the GNN, however they were soon shown to be limited in expressivity <d-cite key="garg2020generalization"></d-cite>. Now state-of-the-art (SOTA) models in this area are based on higher order geometric properties such as dihedral angles and representations in the geometric group SO(3). Some examples include GemNet <d-cite key="gasteiger2021gemnet"></d-cite> and e3nn <d-cite key="geiger2022e3nn"></d-cite>. e3nn has also shown that it is much more data-efficient when learning as the model does not need to learn to be equivariant, which non-equivariant models do. For a non-equivariant model to learn to be equivariant it would have to be trained on many SE(3) transformations of the input mapping to the same output, which is very inefficient. e3nn models have led to exceptional performance for tasks related to predicting molecular forces and energies <d-cite key="batzner20223"></d-cite> <d-cite key="musaelian2023learning"></d-cite>. For the task of binding affinity some GNNs that achieve high performance using GNNs are ProNet <d-cite key="wang2022learning"></d-cite> and HoloProt <d-cite key="somnath2021multi"></d-cite>.</p> <h3 id="graph-transformers-on-molecules">Graph transformers on molecules</h3> <p>With the proliferation of transformers in the broader field of machine learning, this has also led to the development of graph transformers. In a transformer model each node attends to all other nodes in the graph via attention where the query is a projection of the feature vector of a node, and the key and value is the projection of feature vectors of all other nodes. Hence, graph transformers and transformers applied to sequences (e.g. text) are largely similar in architecture. However, differences arise in the positional encodings in a graph transformer as it is defined in relation to other nodes in the graph <d-cite key="ying2021transformers"></d-cite>. For geometric graphs, positional encodings can be applied as a bias term on the attention value of node $u$ on $v$, where the bias is a learned value that is dependent on the distance between the nodes <d-cite key="zhou2023uni"></d-cite> <d-cite key="luo2022one"></d-cite>. There are also other ways of implementing positional encodings in the form of Laplacian eigenvectors, and random walk diagonals which aim to encode the centrality of each node in the graph <d-cite key="rampavsek2022recipe"></d-cite>. Recently, in an effort to unify different methods to generate structural and positional graph encodings, Liu et al. <d-cite key="liu2023graph"></d-cite> apply a novel pretraining approach with a multiobjective task of learning a variety of positional and structural encodings to derive more general positional and structural encodings. Graph transformers are also achieving SOTA performance for benchmarks on predicting quantum properties of molecules <d-cite key="zhou2023uni"></d-cite> <d-cite key="luo2022one"></d-cite> and binding affinity <d-cite key="kong2023generalist"></d-cite>.</p> <h2 id="motivation">Motivation</h2> <p>Given the growing application of both GNNs and transformers we aim to compare their performance on the same task of protein-ligand binding affinity prediction. We also aim to compare models as we can see analogies between graph transformers and GNNs, where “message passing” in the graph transformer involves messages from all nodes rather than the local neighborhood of nodes. We view protein-ligand binding affinity prediction as a suitable task to compare the two architectures as there are aspects of both the GNN and graph transformer architecture that would be advantageous for the task: binding affinity is a global prediction task for which the graph transformer may better capture global dependencies, conversely binding affinity is also driven by local structural orientations between the protein and ligand which the GNN may learn more easily.</p> <h2 id="problem-definition">Problem definition</h2> <ul> <li>The input to the model is a set of atoms for the protein pocket $X_{\mathrm{protein}}$ and ligand $X_{\mathrm{ligand}}$, for which we have the atomic identity and the 3D coordinates, and the binding affinity $y$ for the structure.</li> <li>For the graph neural network we define a molecular graph of the protein ligand structure $G=(V,E)$ where $V$ are the $n$ nodes that represent atoms in the molecule and the edges $E$ are defined between two nodes if their 3D distance is within a radial cutoff $r$. We further define two types of edges: intramolecular edges for edges between nodes within $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$, and intermolecular edges for nodes between $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$.</li> <li>For the graph transformer it is applied to the whole set of atoms $(X_{\mathrm{protein}}, X_{\mathrm{ligand}})$, and we can use the 3D coordinates of the atoms to derive positional encodings.</li> <li>Performance is determined by the root mean squared error, Pearson, and Spearman correlation coefficients between true binding affinity and predicted binding affinity.</li> </ul> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal/problem_definition-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal/problem_definition-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal/problem_definition-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal/problem_definition.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 1. A protein-ligand structure, Protein Data Bank (PDB) entry 1a0q. The protein backbone is shown in blue, and the ligand is shown in green. The model would be given this structure and the objective is to predict the binding affinity of the ligand to the protein. </div> <h2 id="dataset">Dataset</h2> <p>We use the PDBbind dataset for the protein-ligand structures and binding affinity. In addition, for benchmarking we use the benchmark from ATOM3D <d-cite key="townshend2020atom3d"></d-cite> with a 30% and 60% sequence identity split on the protein to better test generalisability of the model. The sequence identity split is based on sequence similarity of proteins in the test and training datasets. The 30% sequence identity split is more challenging are there are more dissimlar proteins in the test set.</p> <h2 id="architecture">Architecture</h2> <h3 id="graph-neural-network">Graph neural network</h3> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal/gnn_architecture1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal/gnn_architecture1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal/gnn_architecture1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal/gnn_architecture1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 2. Overview of the GNN architecture for a graph constructed from a protein-ligand structure. </div> <p>A graph is constructed from the atomic coordinates of the atoms in the protein pocket $X_{\mathrm{protein}}$ and ligand $X_{\mathrm{ligand}}$ where the nodes are the atoms. Intramolecular edges are defined between nodes within $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$ with a distance cutoff of 3 Å, and intermolecular edges for nodes between $X_{\mathrm{protein}}$ and $X_{\mathrm{ligand}}$ with a distance cutoff of 6 Å. The model architecture is defined as follows:</p> <p>(1) Initial feature vectors of the nodes are based on a learnable embedding of their atomic elements. The edge features are an embedding of the Euclidean distance between the atomic coordinates. The distance is embedded with a Gaussian basis embedding which is projected with a 2 layer MLP.</p> <p>(2) We define two types of messages in the GNN, given by the two types of edges, intermolecular messages and intramolecular messages. The architecture used for the two types are messages are the same but the weights are not shared, this is to reflect that information transferred between atoms within the same molecule is chemically different to information transferred between atoms of different molecules. The message passing equation uses the tensor product network introduced by e3nn <d-cite key="geiger2022e3nn"></d-cite>, and our implementation is based on the message passing framework used by DiffDock <d-cite key="corso2022diffdock"></d-cite>. We omit the details of the tensor product network for simplicity but provide the overall method below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal/message_passing_eqn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal/message_passing_eqn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal/message_passing_eqn-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal/message_passing_eqn.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>where node $b$ are the neighbors of node $a$ in $G$ given by intermolecular or intramolecular edges denoted with $t$. The message is computed with tensor products between the spherical harmonic projection with rotation order $\lambda = 2$ of the unit bond direction vector, \(Y^{(\lambda)}({\hat{r}}_{a b})\), and the irreps of the feature vector of the neighbor $h_b$. This is a weighted tensor product and the weights are given by a 2-layer MLP, $\Psi^{(t)}$ , based on the scalar ($\mathrm{0e}$) features of the nodes $h_a$ and $h_b$ and the edge features $e_{ab}$. Finally, $LN$ is layer norm. Overall, the feature vectors of the nodes are updated by intermolecular and intramolecular messages given by the tensor product of feature vectors of intermolecular and intramolecular neighbors and the vector of the neighbor to the node.</p> <p>(3) After $k$ layers of message passing we perform pooling for the nodes of $X_{\mathrm{protein}}$ and the nodes of $X_{\mathrm{ligand}}$ by message passing to the “virtual nodes” defined by the centroid of the protein and ligand, using the same message passing framework outlined above.</p> <p>(4) Finally, we concatenate the embedding of the centroid of the protein and ligand and pass this vector to a 3 layer MLP which outputs a singular scalar, the binding affinity prediction.</p> <h3 id="graph-transformer">Graph transformer</h3> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal/graph_transformer_architecture2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal/graph_transformer_architecture2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal/graph_transformer_architecture2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal/graph_transformer_architecture2.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 3. Overview of the graph transformer architecture for a graph constructed from a protein-ligand structure. </div> <p>The model architecture is as follows:</p> <p>(1) Initial feature vectors of the nodes are based on a learnable embedding of their atomic elements.</p> <p>(2) The graph transformer architecture is based on graphormer <d-cite key="ying2021transformers"></d-cite>. Where the input is $H \in \mathbb{R}^{n \times d}$ where $d$ is the hidden dimension and $n$ is the number of nodes. The input is projected by $W_Q \in \mathbb{R}^{d \times d_K}, W_K \in \mathbb{R}^{d \times d_K}, W_V \in \mathbb{R}^{d \times d_V}$. Since graphs have more complex positional information than sequeunces, conventional positional encoding methods used in sequence-based transformers are not applicable to graphs. Positions in a graph are defined relative to all other nodes, thus positional embeddings cannot be added at the node feature vector level but instead are added as a bias to the pairwise node attention matrix. We define $B \in \mathbb{R}^{n \times n}$, where $B_{ij}$ is given by a Gaussian basis embedding of the Euclidean distance $d_{ij}$ between node $i$ and $j$, which is passed to a 3 layer MLP that outputs a singular scalar. Then the self-attention is calculated as $Q = HW_Q, K = HW_K, V = HW_V$ and $A = \frac{QK^T + B}{\sqrt{d_k}}, Attn(H) = Softmax(A) V$. In addition to all atomic nodes, we also add a <code class="language-plaintext highlighter-rouge">&lt;cls&gt;</code> token used in the BERT model which functions as a virtual global node <d-cite key="devlin2018bert"></d-cite>. The distance of this node to all other nodes is a learnable parameter. This process is duplicated across multiple heads and we concatenate the embeddings across all heads after $k$ layers as the updated feature vector.</p> <p>(3) We take the final embedding of the <code class="language-plaintext highlighter-rouge">&lt;cls&gt;</code> node and pass it through a 3 layer MLP which outputs a singular scalar, the binding affinity prediction.</p> <h3 id="loss-function">Loss function</h3> <p>Both models are trained to minimise the root mean squared error between the predicted binding affinity and true binding affinity.</p> <h2 id="experiments">Experiments</h2> <p>In order for the results to be comparable between the two models, both models have approximately 2.8 million parameters.</p> <p>GNN model details:</p> <ul> <li>2 layers of message passing, number of scalar features = 44, number of vector features = 16. Number of parameters: 2,878,011</li> <li>4 layers of message passing, number of scalar features = 32, number of vector features = 13. Number of parameters: 2,767,269</li> <li>6 layers of message passing, number of scalar features = 26, number of vector features = 12. Number of parameters: 2,764,431</li> </ul> <p>We compare GNNs with different numbers of layers to compare performance across models which learn embeddings from various $k$-hop neighborhoods.</p> <p>Graph transformer model details: 8 attention heads, 8 layers, hidden dimension = 192, feed forward neural network dimension = 512. Number of parameters: 2,801,155</p> <p>Both models were trained for 4 hours on 1 GPU with a batch size of 16, Adam optimiser, and a learning rate of $1 \times 10^{-3}$. We show the results for the 30% and 60% sequence-based splits for the protein-ligand binding affinity benchmark in Table 1 and 2 respectively.</p> <p><strong>Table 1.</strong> Protein-ligand binding affinity task with 30% sequence based split. ProNet <d-cite key="wang2022learning"></d-cite> is included as the SOTA model in this benchmark.</p> <table> <thead> <tr> <th>Model</th> <th>Root mean squared error $\downarrow$</th> <th>Pearson correlation coefficient $\uparrow$</th> <th>Spearman correlation coefficient $\uparrow$</th> </tr> </thead> <tbody> <tr> <td>ProNet <d-cite key="wang2022learning"></d-cite></td> <td>1.463</td> <td>0.551</td> <td>0.551</td> </tr> <tr> <td>GNN 2 layer</td> <td>1.625</td> <td>0.468</td> <td>0.474</td> </tr> <tr> <td>GNN 4 layer</td> <td>1.529</td> <td>0.488</td> <td>0.477</td> </tr> <tr> <td>GNN 6 layer</td> <td>1.514</td> <td>0.494</td> <td>0.494</td> </tr> <tr> <td>Graph Transformer</td> <td>1.570</td> <td>0.476</td> <td>0.469</td> </tr> </tbody> </table> <p><strong>Table 2.</strong> Protein-ligand binding affinity task with 60% sequence based split. ProNet <d-cite key="wang2022learning"></d-cite> is included as the SOTA model in this benchmark.</p> <table> <thead> <tr> <th>Model</th> <th>Root mean squared error $\downarrow$</th> <th>Pearson correlation coefficient $\uparrow$</th> <th>Spearman correlation coefficient $\uparrow$</th> </tr> </thead> <tbody> <tr> <td>ProNet <d-cite key="wang2022learning"></d-cite></td> <td>1.343</td> <td>0.765</td> <td>0.761</td> </tr> <tr> <td>GNN 2 layer</td> <td>1.483</td> <td>0.702</td> <td>0.695</td> </tr> <tr> <td>GNN 4 layer</td> <td>1.471</td> <td>0.717</td> <td>0.719</td> </tr> <tr> <td>GNN 6 layer</td> <td>1.438</td> <td>0.722</td> <td>0.704</td> </tr> <tr> <td>Graph Transformer</td> <td>1.737</td> <td>0.529</td> <td>0.534</td> </tr> </tbody> </table> <h2 id="discussion">Discussion</h2> <h3 id="gnns-perform-better-than-graph-transformers">GNNs perform better than graph transformers</h3> <p>From the benchmarking we can see that the graph transformer model performs worse than the GNNs for the 30% and 60% sequence split for protein-ligand binding affinity. An intuitive explanation for why graph transformers perform worse is it may be difficult for the graph transformer to learn the importance of local interactions for binding affinity prediction as it attends to all nodes in the network. Or in other words, because each update of the node involves seeing all nodes, it can be difficult to decipher which nodes are important and which nodes are not. In order to test if this is true, future experiments would involve a graph transformer with a sparse attention layer where the attention for nodes beyond a distance cutoff is 0. Converse to the lower performance of graph transformers, the results show that deeper GNNs which “see” a larger $k$-hop neighborhood perform better. However, we did not push this to the extreme of implementing a GNN with enough layers such that the $k$-hop neighborhood is the whole graph which would be most similar to a graph transformer as it attends to all nodes. This is because very deep GNNs are subject to issues like oversmoothing where all node features converge to the same value <d-cite key="rusch2023survey"></d-cite>.</p> <p>The GNN may also perform better than the graph transformer due to the higher order geometric features used by the e3nn GNN message passing framework, compared to the graph transformer which only has relative distances. To further explore this future work will involve implementing the equiformer graph transformer <d-cite key="liao2022equiformer"></d-cite>, which is a graph transformer with higher order geometric features.</p> <h3 id="depth-vs-width">Depth v.s. width</h3> <p>Deeper GNNs (2 v.s. 4 v.s. 6 layers) with an approximately constant total number of parameters acheived better performance across both protein ligand binding affinity tasks. This was also observed in the image classification field with the development of AlexNet where deeper networks were shown to significantly improve performance <d-cite key="krizhevsky2012imagenet"></d-cite>. In the context of molecular graphs, deeper GNNs allow the nodes to gain more local chemical context as their node embeddings are exposed to a larger $k$-hop neighborhoods. Thus, these node embeddings are more expressive which facilitates better task performance. There is a limit to the advantages of depth, as very deep GNNs experience oversmoothing as mentioned above <d-cite key="rusch2023survey"></d-cite>.</p> <h3 id="model-performance-vs-graph-size">Model performance v.s. graph size</h3> <p>We compared the error of the prediction v.s. the number of atoms in the graph to test the hypothesis if larger graphs are more difficult to make predictions on. However, correlation between error and number of atoms in the graph all yielded very low pearson correlation coefficients ($&lt; 0.1$) for all experiments (Figure 4). Thus, the number of atoms in the graph has minimal effect on the predictive ability of the model. This may suggest why the the graph transformer–which is able to attend to all nodes in the graph–did not perform much better as the GNN performance does not degrade significantly with larger graphs.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-proposal/error_vs_size-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-proposal/error_vs_size-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-proposal/error_vs_size-1400.webp"/> <img src="/staging/assets/img/2023-11-09-proposal/error_vs_size.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 4. Number of nodes in graph v.s. difference between true and predicted binding affinity for graph transformers and GNNs on the 60% protein-ligand binding affinity task. There is no prominent correlation between model performance and error in prediction. </div> <h3 id="future-work">Future work</h3> <p>We implemented a relatively simplistic graph transformer in this project. While we concluded for this vanilla implementation of the graph transformer the GNN outperforms the graph transformer there are many more complex graph transformer architectures that we could explore to build more expressive architectures. In this section we explore some possible ideas.</p> <p><strong>Using cross-attention for better representation of protein-ligand interactions.</strong> In this project, we adapted the graph transformer from graphormer <d-cite key="ying2021transformers"></d-cite> which was developed originally for predicting the energy of one molecule. However, our task involves two interacting molecules, a protein and a ligand. Thus, graph transformer performance could be lifted if the model had a better understanding of the interactions between the protein and the ligand by using cross attention between the protein and the ligand, rather than self attention across the whole protein-ligand complex.</p> <p><strong>Heirarchical pooling for better representation of amino acids.</strong> Graph transformer performance could also be lifted by defining better pooling strategies than using the <code class="language-plaintext highlighter-rouge">&lt;cls&gt;</code> token from a set of all atoms to predict binding affinity. In this project the graphs were defined based on the atoms in the graph. However, proteins are comprised of an alphabet of 21 amino acids. Thus, it may be easier for the model to learn more generalisable patterns to the test set if the model architecture reflected how proteins are comprised of animo acids which are comprised of atoms. This has been achieved in models using hierarchical pooling from the atom-level to the amino acid-level and finally to the graph-level <d-cite key="wang2022learning"></d-cite> <d-cite key="kong2023generalist"></d-cite>.</p> <p><strong>A hybrid approach: GNNs with Transformers.</strong> Finally, we could improve also performance further by taking a hybrid approach. That is, the GNN first learns local interactions followed by the graph transformer which learns global interactions and pools the node embeddings into a global binding affinity value. The motivation for this design is to leverage the advantages of both models. The GNN excels at learning local interactions while the graph transformer excels at learning global relationships from contextualised local interactions. This approach has been explored in other models for predicting drug-target interaction <d-cite key="bai2023interpretable"></d-cite> <d-cite key="jin2023capla"></d-cite>. Visualisation of the attention map of graph transformers would also be interesting to explore the importance of specific chemical motifs on protein-ligand interactions.</p> <h2 id="conclusion">Conclusion</h2> <p>In this project we present a direct comparison of graph transformers to GNNs for the task of predicing protein-ligand binding affinity. We show that GNNs perform better than vanilla graph transformers with the same number of model parameters across protein-ligand binding affinity benchmarks. This is likely due to the importance of capturing local interactions, which graph transformers may struggle to do. We also show that deeper GNNs perform better than wider GNNs for the same number of model parameters. Finally, future work in this area will involve a implementing more complex graph transformers, or taking a hybrid approach where we capture local interactions with a GNN and global interactions with a graph transformer.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-proposal.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/quantum-gnn/index.html b/blog/2023/quantum-gnn/index.html
new file mode 100644
index 00000000..86de1269
--- /dev/null
+++ b/blog/2023/quantum-gnn/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Quantum Circuit Optimization with Graph Neural Nets | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="We perform a systematic study of architectural choices of graph neural net-based reinforcement learning agents for quantum circuit optimization."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/quantum-gnn/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Quantum Circuit Optimization with Graph Neural Nets",
+      "description": "We perform a systematic study of architectural choices of graph neural net-based reinforcement learning agents for quantum circuit optimization.",
+      "published": "November 8, 2023",
+      "authors": [
+        {
+          "author": "Julian Yocum",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Quantum Circuit Optimization with Graph Neural Nets</h1> <p>We perform a systematic study of architectural choices of graph neural net-based reinforcement learning agents for quantum circuit optimization.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#proposal">Proposal</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>One of the most notable technological developments of the past century has been computing based on binary bits (0’s and 1’s). Over the past decades, however, a new approach based on the principles of quantum mechanics threatens to usurp the reigning champion. Basing the informational unit on the quantum bit, or qubit, instead of the binary bit of “classical” computing, quantum computing takes advantage of the strange phenomena of modern physics like superposition, entanglement, and quantum tunneling.</p> <p>Leveraging these as algorithmic tools, surprising new algorithms may be created. Shor’s algorithm, based on quantum algorithms, can solve classically hard cryptographic puzzles, threatening the security of current cryptographic protocols. Additionally, quantum computers can significantly accelerate drug discovery and materials science through quantum molecular dynamics simulations. They also show great potential in Quantum Machine Learning (QML), enhancing data analysis and pattern recognition tasks that are computationally intensive for classical computers.</p> <p>Similar to classical computers, which base their algorithms on circuits, quantum computers build their quantum algorithms on quantum circuits. However, quantum computers are still in development and are incredibly noisy. The complexity of a quantum circuit increases its susceptibility to errors. Therefore, optimizing quantum circuits to their smallest equivalent form is a crucial approach to minimize unnecessary complexity. This optimization is framed as a reinforcement learning problem, where agent actions are circuit transformations, allowing the training of RL agents to perform Quantum Circuit Optimization (QCO). Previous techniques in this domain have employed agents based on convolutional neural networks (CNN) <d-cite key="fosel2021"></d-cite>.</p> <p>My previous research has demonstrated that the inherent graphical structure of circuits make QCO based on graph neural networks (GNN) more promising than CNNs. GNNs are particularly effective for data with a graph-like structure, such as social networks, subways, and molecules. Their unique property is that the model’s structure mirrors the data’s structure, which they operate over. This adaptability sets GNNs apart from other machine learning models, like CNNs or transformers, which can actually be reduced to GNNs. This alignment makes GNNs a highly promising approach for optimizing quantum circuits, potentially leading to more efficient and error-resistant quantum computing algorithms.</p> <p>This project extends my previous work by systematically investigating the impact of various architectural choices on the performance of GNNs in quantum circuit optimization. This is achieved through a series of experiments focusing on key variables such as the number of layers in the GNN, the implementation of positional encoding, and the types of GNN layers used.</p> <p>Specific objectives include:</p> <ol> <li><strong>Evaluating the Number of GNN Layers</strong>: Investigating how the depth of GNNs influences the accuracy and efficiency of quantum circuit optimization. This involves comparing shallow networks against deeper configurations to understand the trade-offs between complexity and performance.</li> <li><strong>Exploring Positional Encoding Techniques</strong>: Positional encoding plays a crucial role in GNNs by providing information about the structure and position of nodes within a graph. This project experiments with various encoding methods to determine their impact on the accuracy of quantum circuit optimization.</li> <li><strong>Assessing Different Sizes of Hidden Dimension</strong>: This objective focuses on understanding the influence of the hidden dimension size within GNN layers on the performance of quantum circuit optimization. By varying the size of the hidden dimension, the project identifies the optimal balance between computational complexity and the model’s ability to capture complex relationships within the data.</li> </ol> <h2 id="quantum-circuits-and-transformation-environment">Quantum Circuits and Transformation Environment</h2> <p>In order to have quantum circuit optimizers we need quantum circuits! Quantum circuits are built out of quantum gates operating on qubits. These quatum circuits implement quantum algorithms in a similar way that classical circuits implement classical algorithms. In the below example, we have a five qubit circuit. It has a variety of single qubit gates (X, Rz, and H) as well as two qubit gates (CX).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/random_before-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/random_before-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/random_before-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/random_before.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Some gates have classical analogs, like the X gate which is analogous to the classical NOT bit-flip gate. Others, like the Hadamaard (H) gate, cannot be understood with classical intuition. We can use gates like H in combination with a two qubit gate like CX two put two qubits into unique quantum states. For example, with the following circuit, we can put two qubits into a special state called “quantum entanglement”.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/entanglement-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/entanglement-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/entanglement-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/entanglement.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/heads-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/heads-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/heads-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/heads.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>These qubits have outcomes that are perfectly correlated with each other. If they are measured, they will always result in the same outcome, even if after the circuit is applied the qubits are separated an arbitrary distance. This is despite the fact that the outcome is perfectly random! Measurement will result in 0 and 1 with probability 50% each. This is like flipping two coins whose outcome you cannot predict, but which always land both heads or both tails.</p> <p>We can write the circuit and subsequent quantum state with the following equation. The two possible resulting states (both heads or both tails) are represented in bracket notation: \(\ket{00}\) and \(\ket{11}\).</p> <p>\begin{equation} \ket{\psi} = \text{CX} \cdot (H \otimes I) \ket{00} = \frac{1}{\sqrt{2}}(\ket{00} + \ket{11}) \end{equation}</p> <p>However, just like classical algorithms can be written down according to different programs and circuits which do the same thing, quantum circuits can have different equivalent forms. Transitions between these equivalent forms can be written down according to a set of local rules mapping from some set of quantum gates to another.</p> <p>In the following diagram we show the quantum transformations used for this project. They are ordered according to 1) single qubit, 2) two qubit, and 3) three qubit transformations.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/one_gate_trans-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/one_gate_trans-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/one_gate_trans-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/one_gate_trans.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/two_gate_trans-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/two_gate_trans-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/two_gate_trans-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/two_gate_trans.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/three_gate_trans-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/three_gate_trans-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/three_gate_trans-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/three_gate_trans.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>These transformations will serve as the action space for our quantum circuit environment. Notably, some of these circuit transformations involve merges or cancellations, which can be used to simplify the circuits. A quantum agent which chooses an appropriate sequence of circuit transformations can then simplify a circuit into an equivalent form with fewer gates. Therefore, the task of circuit optimization may be decomposed into a trajectory of agent steps leading between different states, where states correspond to quantum circuits which are all algorithmically equivalent.</p> <h2 id="proximal-policy-optimization">Proximal Policy Optimization</h2> <p>To train the GNN agent, we use the Proximal Policy Optimization (PPO) algorithm. PPO is a model-free, on-policy reinforcement learning algorithm that aims to optimize the policy of a reinforcement learning agent by iteratively updating its policy network. We train the GNN agent on n-qubit random circuits. For training the GNN-based agents for quantum circuit optimization, we use the Proximal Policy Optimization (PPO) algorithm. PPO is a deep reinforcement learning algorithm that has shown success in a variety of applications, including game playing and robotics. The algorithm updates the policy by maximizing a surrogate objective function that approximates the expected improvement in the policy, while enforcing a constraint on the maximum change in the policy. This constraint helps to prevent the policy from changing too much from one iteration to the next, which can destabilize the training process.</p> <p>\begin{equation} L^{\text{CLIP}}(\theta) = \hat{\mathbb{E}}_t[\min(r_t(\theta))\hat{A}_t, \text{clip}(r_t(\theta), 1 - \epsilon, 1 + \epsilon)\hat{A}_t] \end{equation}</p> <p>To train the GNN agents for quantum circuit optimization, we start by initializing the GNN weights randomly. We then use the PPO algorithm to update the weights by sampling circuits from a distribution of n-qubit random circuits, encoding them into graphs, and simulating the circuits in a custom python gym environment. For each transformation we use</p> <p>\begin{equation} r_t = - \left(q(s_{t+1}) - q(s_{t})\right) \end{equation}</p> <p>as the reward signal for the PPO algorithm following <d-cite key="fosel2021"></d-cite>, where we let $q$ be a function quantifying the desirability of the circuit. The PPO algorithm then updates the GNN weights to maximize the expected reward, subject to the maximum change in the policy constraint.</p> <p>We use \(q(s) = -\texttt{circuit_size}(s)\), such that the agent’s objective is to reduce the overall circuit size, as measured by number of gates, resulting in the reward function:</p> <p>\begin{equation} r_t = \texttt{circuit_size}(s_{t+1}) - \texttt{circuit_size}(s_t) \end{equation}</p> <p>The methodology for implementing the quantum circuit optimization using deep reinforcement learning and graph neural networks consists of three main components: (1) encoding the circuits as directed acyclic graphs using the DAG encoding and (2) encoding the graphs as node and edge feature tensors and training a GNN-based agent using the PPO algorithm,.</p> <h2 id="gnn-architecture">GNN architecture</h2> <p>The GNN architecture used is inspired by the message passing neural network (MPNN), which is a type of GNN that performs iterative message passing between nodes in the graph. The GNN architecture used for this approach consists of \(L\) layers of Residual Gated Graph ConvNets.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/gnn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/gnn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/gnn-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/gnn.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The GNN gets as input the graph (encoded as the three tensors shown above), the positional encoding, and a binary tensor encoding of which transformations are allowed for each node (this can be computed in \(O(\# nodes)\) time).</p> <p>Node features and positional encoding are both mapped to a k-dimensional embedding with a linear transformation and concatenated added together, forming a vector \(h\). The edge features are also linearly mapped to some \(l\)-dimensional embedding vector \(e\).</p> <p>After, passing through \(L\) layers, each node has a feature vector \(h’\). These features are mapped to a length \(t\) Q-vector where t=# transformations. A mask is applied so that all impossible transformations are ignored. The length \(t\) Q-vectors are concatenated together from all nodes and then outputted by the GNN. An action is selected by choosing the node/transformation which corresponds to the index of the maximum Q-value.</p> <h2 id="results">Results</h2> <p>After training our graph neural network agent in the quantum circuit environment using PPO, we can verify that the agent can indeed optimize circuits. We randomly sample a five qubit circuit and run our agent on the circuit for fifty steps. We see that the agent is able to successfully reduce the cirucit size from 44 gates to 30, a 14 gate reduction. Meanwhile, the standard Qiskit optimizer could only reduce the circuit to 36 gates.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/random_optimization-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/random_optimization-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/random_optimization-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/random_optimization.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Now that we have verified our learning algorithm we can successfully train a quantum circuit optimizing agent, we proceed with our study over three hyperparameters: 1) number of layers, 2) the use of positional encoding, and 3) hidden dimension. For all plots, we display the average over several runs with standard error.</p> <h3 id="number-of-layers">Number of Layers</h3> <p>We innvestigate how the depth of GNNs influences the accuracy and efficiency of quantum circuit optimization. This involves comparing shallow networks against deeper configurations to understand the trade-offs between complexity and performance. In order to do this we scan over the number of layers \(L\) in our GNN from 1 to 7.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/n_layers-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/n_layers-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/n_layers-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/n_layers.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We see that, generally, increasing the number of layers in the model improves performance of the model on random circuits. This is aligned with the intuition that increasing the number of layers of a GNN allows models to “see” information from further away, which can be used to make strategic decisions.</p> <p>However, we also observe that there is some critical point in which increasing \(L\) no longer leads to better outcomes from the model. This threshold appears to occur around \(L=5\), which performs similarly to \(L=7\).</p> <p>This could be related to a known property of GNNs, in which features of nodes which are closer together are more similar. This becomes excerterbated as the number of layers increase, smearing out information. Therefore, we expect that if we continued to increase \(L\) then model performance would degrade.</p> <h3 id="positional-encoding">Positional Encoding</h3> <p>Positional encoding can provide information about the structure and position of nodes within a graph. These features can often play a role in symmetry-breaking.</p> <p>In addition to the existing features encoding gate type and wire information, we concatenate 8 normally distributed dimensions to the feature vector. We hypothesize that these random features can be used to “ID” gates that have the same gate type but are a located in different locations. We experiment with training a GNN with and without the addition of random positional encoding.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/pos_encoding-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/pos_encoding-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/pos_encoding-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/pos_encoding.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The resulting plot shows inconclusive evidence. While the random positional encoding came out on top at the end of training, the difference is not significant enough to be able to conclude that it is demonstrably better.</p> <h3 id="hidden-dimension">Hidden Dimension</h3> <p>The last hyperparameter we examine is the hidden dimension of the GNN layers. We scan over values 16, 32, 64, and 128. All other parameters are kept fixed.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/hidden_dim-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/hidden_dim-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-quantum-gnn/hidden_dim-1400.webp"/> <img src="/staging/assets/img/2023-11-09-quantum-gnn/hidden_dim.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We observe that performance tends to improve with scale. However, similarly to the “number of layers” hyperparameter, there appears to be some critical threshold after which scaling no longer appears to improve performance. From our experiments this threshold appears to be around 64.</p> <p>It is unclear what would happen if we continued scaling past 128. For example, the performance could stay at the plateau reached at hidden dimension 64 and 128, or it could eventually get worse.</p> <h2 id="further-work">Further Work</h2> <p>While this work gave a first glimpse at some of the structural properties that work with GNNs for RL on quantum circuits, much remaining work remains.</p> <p>Notacably, many of the training runs did not seem to train until plateau. To be fully confident in the results, training until plateau would be necessary for full confidence. Additionally, many of the runs were quite noisy, making it difficult to distinguish between the performance under different runs. Therefore, increasing training samples could effectively reduce standard error for better statistics.</p> <p>Moreover, the scope of future exploration can be expanded. One of the most interesting areas of future work would be on what types of graph layers work best. While we use Residual Gated Convulation Nets, it is not clear that this is the best layer type. Other things than could be tested are other positional encoding schemes. While we experimented with random features, more standard positional encoding schemes include Laplacian and Random walk encoding.</p> <h2 id="conclusion">Conclusion</h2> <p>We find that there appears to be critical thresholds of optimal values for the hidden dimension and number of layers in GNNs. We also find no evidence that random positional encoding appears to improve performance, contrary to intuition that it would serve a useful symmetry-breaking function. While much work is left to be done, this work provides a first investigation into how performance of GNNs on QCO can be affected by various choices of hyperparameters.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-quantum-gnn.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/rep-learning-for-rec-systems/index.html b/blog/2023/rep-learning-for-rec-systems/index.html
new file mode 100644
index 00000000..62c5eb36
--- /dev/null
+++ b/blog/2023/rep-learning-for-rec-systems/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Can Constrastive Learning Recommend Me a Movie? | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/rep-learning-for-rec-systems/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css"> </style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Can Constrastive Learning Recommend Me a Movie?",
+      "description": "",
+      "published": "December 11, 2023",
+      "authors": [
+        {
+          "author": "Antonio Berrones",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Can Constrastive Learning Recommend Me a Movie?</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background-and-related-work">Background And Related Work</a></div> <div><a href="#experiments">Experiments</a></div> <div><a href="#conclusion">Conclusion</a></div> <div><a href="#references">References</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>With the vast amount of information and content available online, the need for intelligent recommendation systems has only become more necessary. Many of the apps we use, YouTube, TikTok, Instagram, Netflix, Spotify, etc. all incorporate recommender systems to provide personalized content. But how do these systems work? An important factor in delivering good recomendations is having a system that can find an expressive and useful representation of users and items (where items are the specific piece of content we want to recommend).</p> <p>Traditional approaches for developing recommender systems include collaborative filtering, matrix factorization, and deep neural networks such as multi-layer perceptrons (MLPs) and graph neural networks (GNNs) <d-cite key="history"></d-cite>. Moreover, a focus on using a hybridized approach of the previous models are also in active research, with aims of balancing their various benefits and tradeoffs.</p> <p>This project aims to explore if contrastive learning can be used to recommend movies for a user based on a their prior movie ratings.</p> <p><strong>More specifically, by choosing different strategies of defining positive/negative pairs, can we learn a user embedding that facilites the downstream task of movie recommendation?</strong></p> <h2 id="background-and-related-work">Background And Related Work</h2> <h3 id="contrastive-learning">Contrastive Learning</h3> <p>Contrastive learning is a self-supervised machine learning technique for training a model (often called an encoder) to distinguish between similar and dissimilar pairs of data points. The goal is to map each data point from its original representation space to a smaller dimensional latent space. If the encoder is trained well and is able to learn a good representation, the newly encoded data points should act as a sort of “compressed” version of the original data point while still containing some useful semantic information.</p> <p>Contrastive learning has tradionally been used in the domains of computer vision and natural language processing. However, more recent work has shown that contrastive learning, when combined with graph neural networks (GNNs), can learn impressive representations when applied to recommender systems <d-cite key="gnn"></d-cite>. For the purposes of this project, instead of using a GNN as our encoder, a simpler MLP will be used.</p> <h3 id="dataset">Dataset</h3> <p>This project explores creating a movie recommender system based on the <a href="https://grouplens.org/datasets/movielens/">MovieLens dataset</a>. The small version of this dataset contains 10,000 ratings of 9,000 movies by 600 users on a 0-5 star scale. Data was collected by users of the MovieLens website, last updated in September 2018. An example of the primary <code class="language-plaintext highlighter-rouge">ratings.csv</code> dataset is shown below:</p> <table> <thead> <tr> <th>userId</th> <th>movieId</th> <th>rating</th> <th>timestamp</th> </tr> </thead> <tbody> <tr> <td>1</td> <td>1</td> <td>4.0</td> <td>964982703</td> </tr> <tr> <td>1</td> <td>3</td> <td>4.0</td> <td>964981247</td> </tr> <tr> <td>…</td> <td>…</td> <td>…</td> <td>…</td> </tr> <tr> <td>2</td> <td>318</td> <td>3.0</td> <td>1445714835</td> </tr> <tr> <td>2</td> <td>333</td> <td>4.0</td> <td>1445715029</td> </tr> <tr> <td>…</td> <td>…</td> <td>…</td> <td>…</td> </tr> <tr> <td>600</td> <td>170875</td> <td>3.0</td> <td>1493846415</td> </tr> </tbody> </table> <h2 id="methodology">Methodology</h2> <h3 id="preprocessing-of-dataset">Preprocessing of Dataset</h3> <p>The MovieLens dataset of user-movie interactions (movie ratings) is split into a training and test dataset. For each user, 95% of their interactions were randomly sampled and allocated to the training dataset, while the remaining 5% of interactions were allocated to the test dataset.</p> <p>Thresholds were chosen to quantify whether a user “liked” a movie (<code class="language-plaintext highlighter-rouge">LIKE_THRESHOLD</code>) or “disliked” a movie (<code class="language-plaintext highlighter-rouge">DISLIKE_THRESHOLD</code>) based on that user’s rating. The training dataset was then filtered to only include interactions involving movies that had a minimum number of users who “liked” it and a minimum number of users who “disliked” the movie. This was to ensure that each movie had enough user data to facilite the computations for selecting positive / negative pairs.</p> <h3 id="positive-and-negative-pair-strategies">Positive and Negative Pair Strategies</h3> <p>An important component of contrastive learning involves the definintion of positive pairs and negative pairs. For a given interaction (user <em>u</em> rates movie <em>m</em>), what should be considered a similar interaction and what should be considered a dissimilar interaction?</p> <p>Given an interaction by user ${u}$, let $\text{pos}(u) = u^+$ and $\text{neg}(u) = u^-$ where $(u,u^+)$ is a positive pair and $(u,u^-)$ is a negative pair. The goal will be to find the pair of functions $\text{pos}(), \text{neg()}$ such that a good representation is learned.</p> <h3 id="encoder-architecture">Encoder Architecture</h3> <p>The proposed encoder architecture is shown below. The encoder recieves as input a batch of userIds, $u$ , integers in the range $0 \leq u \leq 599 $. The first layer of the encoder is an embedding layer, mapping userIds to a vector of dimension <code class="language-plaintext highlighter-rouge">input_dim</code>. This layer is followed by a 2-layer MLP with relu activations, with a hidden dimension of <code class="language-plaintext highlighter-rouge">hidden_dim</code> and an output dimension of <code class="language-plaintext highlighter-rouge">latent_dim</code>. Additionally, the final output of the encoder normalized.</p> <div class="row mt-3 align-items-center"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder-1400.webp"/> <img src="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/encoder.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Architecture for encoder, where <strong>input_dim</strong> = 1024, <strong>hidden_dim</strong> = 600, <strong>latent_dim</strong> = 200. </div> <h3 id="evaluation-metrics">Evaluation Metrics</h3> <p>In order to evaluate the quality of the learned user representations, there are a handful of metrics that will be used.</p> <ol> <li> <p><strong>Top K Movie Recommendation</strong>: Movie recommendation will serve as a downstream task that acts as a proxy for how well the learned user representations are. To recommend movies for a user, the encoder is used to get the user embeddings for all users in the dataset. We then use the cosine-similarity to compute the N=10 nearest neighbors to our target user. From these N neighbors, we retreive all of their “liked” movies and sort by their respective ratings. The top K movies are returned as the system’s recommendations.</p> </li> <li> <p><strong>recall@k</strong>: A popular metric used for evaluating recommender systems is recall@k <d-cite key="rec"></d-cite>. It measures the proportion of relevant items that were successfully retrieved from the top-k movie recommendations. Relevant items are defined as items that a user “likes” from the test dataset. The proportion of these items found in top-k recommendations from our recommender system (based on the learned encoder) is the recall@k. The higher the recall, the greater the overlap between our recommender’s recommended movies and the user’s actual preferred movies.</p> </li> <li> <p><strong>Visualization of User Embeddings</strong>: By visualzing the learned user representation’s ability to be distinguished into separate clusters, we can better examine the potential user clusters for any distinguishing features. By utilizing t-distributed Stochastic Neighbor Embedding (TSNE) for dimensionality reduction of the user embedding vectors, we can project users representations to the 2D plane and use traditional clustering algorithms for visualization <d-cite key="rec"></d-cite>.</p> </li> <li> <p><strong>Top Movies Per User Cluster</strong>: To provide more insight into the resulting user embedding clusters, the top movies of the users in each cluster is also reported.</p> </li> </ol> <h2 id="experiments">Experiments</h2> <p>In addition to standard hyperparamter-tuning techniques to optimize training, different positive pairs and negative pairs strategies will be tested.</p> <p>All encoders were trained with <code class="language-plaintext highlighter-rouge">num_epochs</code> = 20, <code class="language-plaintext highlighter-rouge">batch_size</code> = 512, <code class="language-plaintext highlighter-rouge">lr</code> = 0.0001 (using Adam optimizer), and contrastive triplet loss.</p> <h3 id="strategy-1">Strategy 1</h3> <p>For a given user $u_i$ a similar user is determined by a random selection from a set of candidate users. These candidate users consist of the subset of users that have “liked” the same movies that $u_i$ “liked”, i.e. their ratings $\geq$ <code class="language-plaintext highlighter-rouge">LIKE_THRESHOLD</code>. Likewise, dissimilar users for $u_i$ were randomly selected from a set of candidate users that “disliked” the same movies $u_i$ “disliked”, i.e. their ratings $ &lt; $ <code class="language-plaintext highlighter-rouge">DISLIKE_THRESHOLD</code>.</p> <table> <thead> <tr> <th>LIKE_THRESHOLD</th> <th>DISLIKE_THRESHOLD</th> </tr> </thead> <tbody> <tr> <td>3.5</td> <td>3.5</td> </tr> </tbody> </table> <p>With these definitions of positive and negative pairs, an encoder was trained with the resulting user embeddings shown below.</p> <div class="row mt-3 align-items-center"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters-1400.webp"/> <img src="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-clusters.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies-1400.webp"/> <img src="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s1-top-movies.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Learned user embedding clusters and top movies using Strategy 1. </div> <p>By examining the user embedding clusters, we see four loosely-defined user clusters. The top 5 highest rated movies by each cluster’s members are also depicted. A key takeaway is that we see a repetition of the same movies across each cluster, movies like <em>The Nutty Professor</em>, <em>Mission Impossible 2</em>, <em>Ace Ventura: When Nature Calls</em>, etc. These are all very popular and well-liked movies with a wide audience. The prevalence of highly-rated and popular movies such as these leads to a bias in our positive pairs. Since many users are fans of these movies, they are all considered similar users, i.e. our definition of similarity is too weak. The following strategies will try to address this.</p> <h3 id="strategy-2">Strategy 2</h3> <p>In order to decrease the influence of popular movies, one strategy is to filter out all movies that are “liked” by a certain number of users. We define <code class="language-plaintext highlighter-rouge">POPULARITY_THRESHOLD</code> = 100, which removes all movies with over 100 “liked” users. As a result, the distribution of “liked” users per movie is relatively uniform. The definitions of positive and negative pairs remains the same as in Strategy 1.</p> <table> <thead> <tr> <th>LIKE_THRESHOLD</th> <th>DISLIKE_THRESHOLD</th> </tr> </thead> <tbody> <tr> <td>3.5</td> <td>3.5</td> </tr> </tbody> </table> <div class="row mt-3 align-items-center"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters-1400.webp"/> <img src="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-clusters.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies-1400.webp"/> <img src="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s2-top-movies.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Learned user embedding clusters and top movies using Strategy 2. </div> <h3 id="strategy-3">Strategy 3</h3> <p>A different method for reducing the influence of popular movies was to normalize each users ratings. By subtracting a movie’s average rating across all users from any particular user’s rating, we are able to determine whether the user liked the movie more than others or disliked it more than others. Popular movies only have an impact if the user really liked (or disliked) it relative to everyone else.</p> <p>Using this new strategy, for any user $u_i$, instead of randomly selecting a similar user from candidates that “liked” a movie in common, these candidate users are ranked such that the candidate that has the highest normalizes rating is selected (the opposite is true for choosing a disimilar user). Therefore, instead of having a positive pair of users who rated the same movie highly, the positive pair will consist of users who both gave the same movie a higher rating than the average user.</p> <table> <thead> <tr> <th>LIKE_THRESHOLD</th> <th>DISLIKE_THRESHOLD</th> </tr> </thead> <tbody> <tr> <td>3.5</td> <td>3.5</td> </tr> </tbody> </table> <div class="row mt-3 align-items-center"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters-1400.webp"/> <img src="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-clusters.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies-1400.webp"/> <img src="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s3-top-movies.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Learned user embedding clusters and top movies using Strategy 3. </div> <h3 id="strategy-4">Strategy 4</h3> <p>Despite the previous strategies, there still seems to be a lack of cohesion among the resulting user embedding clusters. The final strategy tested was a hybrid approach. In this scenario, the <code class="language-plaintext highlighter-rouge">LIKE_THRESHOLD</code> has been raised and the <code class="language-plaintext highlighter-rouge">DISLIKE_THRESHOLD</code> lowered in an attempt to narrow the candidate pools to more extreme users. Moreover, Strategies 2 and 3 are combined. Highly popular movies are removed and normalized ratings are used.</p> <table> <thead> <tr> <th>LIKE_THRESHOLD</th> <th>DISLIKE_THRESHOLD</th> </tr> </thead> <tbody> <tr> <td>4</td> <td>3</td> </tr> </tbody> </table> <div class="row mt-3 align-items-center"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters-1400.webp"/> <img src="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-clusters.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies-1400.webp"/> <img src="/staging/assets/img/2023-12-01-rep-learning-for-rec-systems/s4-top-movies.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Learned user embedding clusters and top movies using Strategy 4. </div> <h3 id="analysis">Analysis</h3> <p>For each strategy, the recall@k for various values of k are shown, along with the sizes of the train and test datasets after filtering.</p> <table> <thead> <tr> <th> </th> <th>Strategy 1</th> <th>Strategy 2</th> <th>Strategy 3</th> <th>Strategy 4</th> </tr> </thead> <tbody> <tr> <td>recall@10 (%)</td> <td>0.62</td> <td>1.29</td> <td>0.73</td> <td>0.78</td> </tr> <tr> <td>recall@20 (%)</td> <td>1.97</td> <td>2.16</td> <td>2.18</td> <td>3.10</td> </tr> <tr> <td>recall@50 (%)</td> <td>3.84</td> <td>6.03</td> <td>4.36</td> <td>11.63</td> </tr> <tr> <td>Size Train Set</td> <td>51,576</td> <td>32,609</td> <td>51,576</td> <td>10,826</td> </tr> <tr> <td>Size Test Set</td> <td>1,361</td> <td>984</td> <td>1,361</td> <td>232</td> </tr> </tbody> </table> <p>It appears that both Strategy 2 and Strategy 3 alone seemed to make a noticable improvement in recall, with Strategy 2 (the removal of the most popular movies) making a larger impact than normalizing ratings. Furthermore, by using both strategies along with a few other changes, a representation the resulted in a better recomender system and more well-defined embedding clusters was learned.</p> <h2 id="conclusion">Conclusion</h2> <p>From the above experiments, it seems that contrastive learning (even when used with a simple MLP encoder) can eventually learn a user embedding resulting in clusters. However, it seems like either a more advanced architecture or positive/negative pair mining procedures are required to ensure that the learned representations have a useful semantic meaning. Weak positive pairs resulted from the presence of popular movies with diverse audiences. Previous work in applying contrastive learning to recommender systems highlight more complex formulations of ranked loss functions, assigning different weights depending on whether the pairs are hard or easy negative samples <d-cite key="ranked"></d-cite>.</p> <p>An interesting extension of this project could explore the use of GNNs as the basis of the encoder architecture, as these types of models more naturally preserve the structure of user-movie interactions.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-01-rep-learning-for-rec-systems.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/representationengineering-incontextlearning/index.html b/blog/2023/representationengineering-incontextlearning/index.html
new file mode 100644
index 00000000..8e454b77
--- /dev/null
+++ b/blog/2023/representationengineering-incontextlearning/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Leveraging Representation Engineering For LLM’s In-Context-Learning | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="We present a method to observe model internals whether LLMs are performing in-context learning and control the model outputs based on such Context Vectors."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/representationengineering-incontextlearning/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Leveraging Representation Engineering For LLM’s In-Context-Learning",
+      "description": "We present a method to observe model internals whether LLMs are performing in-context learning and control the model outputs based on such Context Vectors.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Alex Hu",
+          "authorURL": "https://www.linkedin.com/in/alexander-hu/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Carl Guo",
+          "authorURL": "https://www.carlguo.com/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Leveraging Representation Engineering For LLM’s In-Context-Learning</h1> <p>We present a method to observe model internals whether LLMs are performing in-context learning and control the model outputs based on such Context Vectors.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background-related-work">Background & Related Work</a></div> <div><a href="#experiment-setup">Experiment Setup</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion">Conclusion</a></div> <div><a href="#sources">Sources</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>Emerging capabilities in deep neural networks are not well understood, one of which is the concept of “in-context learning” (ICL), a phenomenon where the a Large Language Model (LLM)’s understanding of the prompt and ability to answer accordingly drastically increases after being shown some examples that answer the question. Evaluating in-context learning and understanding why the behavior happens is both an interesting theoretical research question and a practical question that informs directions to conduct research that further advances LLM capabilities by, say, exploiting more of in-context learning.</p> <p>We attempt to explore the phenomenon of in-context learning by leveraging another exciting field of work on mechanistic interpretability where researchers set out to understand model behaviors by interpreting and editing internal weights in models. One such work that we base on is Representation Engineering by Zou et al. (2023)<d-cite key="zou2023representation"></d-cite> , where they construct a set of training text stimuli to probe LLM activations and use such stimuli to identify a direction that accurately predicts the underlying concept based on the neural activations of the model. This approach allows us to elicit readings of representation and control such representation.</p> <p>We propose to use methods in Zou et al. (2023) <d-cite key="zou2023representation"></d-cite> to evaluate in-context learning. There has not been previous attempts to examine the model internals holistically in a LLM while it is performing in-context learning. We expose such neural activations by constructing stimulus through artificial examples of in-context learning on binary classication tasks. We find a reading vector that shows high neural activity after the model is stimulated with the context pairs; such a “Context Vector” indicates the context the models draws from. While we hoped to find certain universal mechanisms across different datasets, we find that the Context Vector is dataset-specific and confirm previous hypotheses that in-context learning retrieves information from different parts of the model’s latent space.</p> <p>We then explore the results of controlling the activations along the “Context Vector” direction, in the hope that editing the activitions would further boost the performance on top of in-context learning. We compare the model outputs on the classification datasets in a zero-shot setting and a setting of natural in-context learning, with the “Context Vector” amplified, and suppressed. While we find boosting performance through such editing to be challenging and sometimes finicky to tune, we find the results to be promising on editing weights to suppress the context that the model draws from and drastically reducing the performance.</p> <h1 id="background--related-work">Background &amp; Related Work</h1> <h3 id="in-context-learning-icl">In-Context Learning (ICL)</h3> <p>An LLM is frequently aseked to perform a task in inference time that many realized providing some examples of how to answer the task can drastically improve the model’s performance. This phenomenon is called in-context learning. For example, Zhou et al. (2022) <d-cite key="zhou2022teaching"></d-cite> evaluates how LLM can become better at solving algorithmic problems through in-context learning, a task that LLM traditionally struggles at.</p> <p>In other scenarios, the LLM does not need to rely on prompts at all and can deduce the pattern from the few-shot examples alone to predict the answer. While there is no universal definition of in-context learning and its meaning has shifted over time, we define it as the performance boost to answer questions based on a limited amount of examples (as the context).</p> <p>Interesting, Min et al. (2022) <d-cite key="min2022rethinking"></d-cite> observes that such ICL phenonemon is observed as long as examples are given, and a mismatch between input and output pairs would not hinder the ability of models performing ICL and thus its performance on the tasks. Wei et al. (2023) <d-cite key="wei2023larger"></d-cite> further corrobates this work by finding on small models but show that as models scale, the ability to pick up on flipped patterns when given in-context examples with flipped labels and override semantic priors is stronger.</p> <h3 id="theories-on-why-icl-happens">Theories on why ICL happens</h3> <p>While the concept of ICL is well studied, the underlying mechanism of ICL is not well understood. Xie et al. (2022) <d-cite key="xie2022explanation"></d-cite> explains the phenomenon of ICL as an Implicit Bayesian Inference, where the in-context learning prompt serves as a stimulus for the model to go “locate” corresponding concept stored in the model’s latent space that the LM has learned implicitly during pre-training. They study this by generating a simple pretraining distribution that parameterizes the transition of a Hidden Markov Model (HMM) and another prompting distribution. In this setting, the authors reduce the ICL task to Bayesian inference to map the prompting distribution to the pretraining distribution.</p> <p>Akyürek et al. (2022) <d-cite key="akyürek2023learning"></d-cite> further explains that Transformer-based in-context learners implement standard learning algorithms implicitly by encoding smaller models modularized to perform each specific tasks and update them based on the new in-context exampless. von Oswald et al. (2023) <d-cite key="vonoswald2023transformers"></d-cite>claims that Transformer-based in-context learners is similar to gradient-based meta-learning formulations where they found that the Transformer can learn smaller models of a certain concept by gradient descent in their forward pass.</p> <p>Furthermore, Olsson et al. (2022) <d-cite key="olsson2022context"></d-cite> draws parallel from ICL to a more understood phenomenon of Induction Head, where attention-only Transformers picks up on the algorithm to predict next tokens by searching for a previous occurance of the last token and copying the same next token from previous occurences. They claim that this can be a potential mechanism to explain ICL.</p> <p>While many hypotheses and theories have been proposed to explain ICL, most explorations to prove their theory has been small in scale, and the literature lacks a study on the large-scale LMs’ internals when performing ICL.</p> <h3 id="model-editing--representation-engineering">Model Editing &amp; Representation Engineering</h3> <p>We’ll use the Representation reading and controls methods presented in <a href="https://arxiv.org/pdf/2310.01405.pdf">Zou et al. (2023)</a> to understand the context where the model attends to and discover directions that indicate such reasoning.</p> <p>Relatedly, there have been a recent surge in research related to model knowledge editing, including Meng et al. (2023) <d-cite key="meng2023massediting"></d-cite>, Zhong et al. (2023) <d-cite key="zhong2023mquake"></d-cite>, and Hernandez et al. (2023) <d-cite key="hernandez2023inspecting"></d-cite> that demonstrate different methods for locating and editing factual associations. Other work, including Shao et al. (2023) <d-cite key="shao2023gold"></d-cite> and Belrose et al. (2023) <d-cite key="belrose2023leace"></d-cite>, have shown results on erasing larger-scale memory units such as concepts. Li et al. (2023) <d-cite key="li2023inferencetime"></d-cite> applies such concept erasion techniques by conducting Inference Time Interference, where one can find a direction of causal influence on “truthfulness” data and increase the activations along that direction to increase truthfulness, scoring better on the TruthfulQA dataset.</p> <h1 id="experiment-setup">Experiment Setup</h1> <h3 id="datasets">Datasets</h3> <p>We adopt a total of 30 datasets on binary classification, (sentiment analysis, natural language inference, true/false inference) and multiple choices; 16 datasets are used by Min et al. (2022) <d-cite key="min2022rethinking"></d-cite>, plus 12 extra datasets in the <code class="language-plaintext highlighter-rouge">tweet_eval</code> and <code class="language-plaintext highlighter-rouge">ethos</code> dataset families, <code class="language-plaintext highlighter-rouge">rotten_tomatoes</code>, and <code class="language-plaintext highlighter-rouge">ade_corpus_v2-classification</code>. Following Min et al. (2022)<d-cite key="min2022rethinking"></d-cite>, we only use the test set to avoid potential cross-contamination with the data that the model is pretrained on. reserve <code class="language-plaintext highlighter-rouge">k=64</code> examples in the test for few-shot training, and the rest are used for testing.</p> <h3 id="training-data-generation">Training Data Generation</h3> <p>For training, we construct a set of context pairs for each dataset, each context pairs containing the same examples but different instructions. The instructions are “Pay attention to the following examples” and “Ignore the following examples” respectively, in the hope that by stimulating two opposites and examining the difference, we can find a Context Vector that represents what the model draws from. We then truncate the example at each and every token till the last 5 tokens, so we can get a neural activation reading for each of the tokens.</p> <p>A sample training data input using the <code class="language-plaintext highlighter-rouge">rotten_tomatoes</code> dataset is as follows:</p> <blockquote> <p>[INST] Pay attention to the following examples: [/INST]</p> </blockquote> <blockquote> <p>offers that rare combination of entertainment and education.</p> </blockquote> <blockquote> <p>positive.</p> </blockquote> <blockquote> <p>a sentimental mess that never rings true .</p> </blockquote> <blockquote> <p>negative.</p> </blockquote> <blockquote> <p>[INST] Ignore the following examples: [/INST]</p> </blockquote> <blockquote> <p>offers that rare combination of entertainment and education.</p> </blockquote> <blockquote> <p>positive.</p> </blockquote> <blockquote> <p>a sentimental mess that never rings true .</p> </blockquote> <blockquote> <p>negative.</p> </blockquote> <p>Each context pair is identical except for the instructions. We use the context pairs to stimulate the model to learn the context and use the context vector to control the model’s behavior.</p> <h3 id="testing-data-generation">Testing Data Generation</h3> <p>For testing data, we use 3 input-labels pairs as the prompt, with the first two pairs serving as the in-context examples, and the last pair serving as the question that we actually want to test on, obfuscating the label from the prompt.</p> <p>A sample testing data input using the <code class="language-plaintext highlighter-rouge">rotten_tomatoes</code> dataset is as follows:</p> <p>Input:</p> <blockquote> <p>[INST] offers that rare combination of entertainment and education. [/INST]</p> </blockquote> <blockquote> <p>positive.</p> </blockquote> <blockquote> <p>[INST] a sentimental mess that never rings true . [/INST]</p> </blockquote> <blockquote> <p>negative.</p> </blockquote> <blockquote> <p>an odd , haphazard , and inconsequential romantic comedy .</p> </blockquote> <p>Label:</p> <blockquote> <p>negative.</p> </blockquote> <h3 id="model">Model</h3> <p>We have explored using two models with 7 billion parameters, including <code class="language-plaintext highlighter-rouge">Mistral-7B-Instruct-v0.</code> and <code class="language-plaintext highlighter-rouge">Llama-2-7b-hf</code>; while we have found preliminary results consistent between the two models, all of our results later reported are from <code class="language-plaintext highlighter-rouge">Mistral-7B-Instruct-v0</code> for consistency and due to a constraint on computational power and time.</p> <h3 id="training-infrastructure">Training Infrastructure</h3> <p>We used the MIT Supercloud infrastructure and a local machine with a single RTX 4090 GPU to train the model.</p> <h1 id="results">Results</h1> <p>We present results first on finding the Context Vector in the embedding space, then on using the Context Vector to control model outputs and evaluate their performance.</p> <h3 id="representation-reading">Representation Reading</h3> <p>We use the Representation Reading method presented in Zou et al. (2023) <d-cite key="zou2023representation"></d-cite> to find the Context Vector. Specifically, we adopted the setup of the instruction response pairs where for a given function $<code class="language-plaintext highlighter-rouge">f</code>$ and pairs of instructions $<code class="language-plaintext highlighter-rouge">x_i</code>$ and $<code class="language-plaintext highlighter-rouge">y_i</code>$, we denote the model’s response truncated at the $<code class="language-plaintext highlighter-rouge">j</code>$-th token as $<code class="language-plaintext highlighter-rouge">f(x_i)_j</code>$ and $<code class="language-plaintext highlighter-rouge">f(y_i)_j</code>$ and take the neuron activity at the last token of each of the responses, namely the activations of each and every token in the response.</p> <p>We then perform PCA on the difference of the activations of the two instructions, namely $<code class="language-plaintext highlighter-rouge">f(x_i)_j - f(y_i)_j</code>$ and find the first principal component $<code class="language-plaintext highlighter-rouge">v</code>$ that maximizes the difference in the embedding space.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Graph plotting the correlation between the Context Vector sign and actual dataset label on Rotten Tomatoes dataset. The x-axis is the layer and the y-axis is the correlation. </div> <p>More surprisingly is the fact that we can find a clean representation of such Context Vector that correlates decently with the model inputs.</p> <p>We use t-SNE to visualize the difference in the embedding space on the inputs of the 30 datasets across 32 different layers and report the results below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the embedding space of the Context Vectors across the 30 datasets and 32 layers, color coded by dataset. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the embedding space of the Context Vectors across the 30 datasets and 32 layers, color coded by layers. </div> <p>As shown in the figure, we find that the vectors are clustered by dataset, indicating that the Context Vectors are dataset-specific. There are no clear patterns across dataset or between different layers of the Context Vectors, further indicating that in-context learning activates different parts of the model’s latent space with information about different types of tasks.</p> <p>We also conducted scans for neuron activities in the Context Vector across the different tokens of an example sequence in a similar style as Zou et al. (2023) <d-cite key="zou2023representation"></d-cite>, for which the previous work has referred to as Linear Artificial Tomography (LAT) scans.</p> <p>The following are the LAT scans for the neuron activities corresponding to a Context Vector trained on <code class="language-plaintext highlighter-rouge">rotten_tomatoes</code> sentiment analysis dataset evaluated on different dataset sequences. The following graphs further corroborate the findings above on the dataset-specificity of in-context learning; while the a sequence from the <code class="language-plaintext highlighter-rouge">rotton_tomatoes</code> dataset result in high neural activities for the Context Vector, most sequences from the other dataset do not, showing the uniqueness of such Context Vector. We have also observed most of the neuron activities in the later layers. This phenomenon makes sense since more abstract concepts and semantic structures formulate in later layers, thus being more correlated with the Context Vector, while earlier layers pick up more on token-level abstractions.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `rotten_tomatoes` sequence. The x-axis is the token index, and the y-axis is the Layer number. More red indicates higher neural activities, and more blue indicates lower neural activities. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `medical_questions_pair` sequence. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `ethos-religion` sequence. </div> <p>We have also produced graphs that zoom into the token-level neural activities detection on the Context Vector of the opposing pair (Pay attention &amp; Don’t pay attention), shown below. A large difference in the neural activities of the two instructions is denoted by red and indicates that the ablation is effective, while the green shades indicate that there are similar in neural activities. The results show that the neural activities are consistently different across the sequence until the model starts generating next tokens and the context ends where the neural activities are similar.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A token-level LAT scan that compares the difference between the neural activities of the Context Vector of the opposing pair (Pay attention &amp; Don't pay attention) on the `rotten_tomatoes` dataset. </div> <h2 id="representation-control">Representation Control</h2> <p>To change an activation along some direction, we can imagine there are several canonical ways. First, given our Context Vector $v$ and an activation $a$, we can do one of the following.</p> <h3 id="addition">Addition</h3> \[a' = a + v\] <h3 id="amplification">Amplification</h3> \[a' = a + \text{sign}(a \cdot v) v\] <h3 id="projection">Projection</h3> \[a' = a - (a \cdot v) \cdot \frac{v}{||v||^2}\] <p>The first represents a constant perturbation so it supposedly transforms the representation to become more of a certain quality. The second amplifies the direction according to which side it is on, so it makes the representation more extreme. The third removes the quality from the representation by subtracting the projection.</p> <p>We explore all these methods to control Mistral-7b-instruct. We do our experiments on the <code class="language-plaintext highlighter-rouge">rotten_tomato</code>, <code class="language-plaintext highlighter-rouge">sick</code>, <code class="language-plaintext highlighter-rouge">hate_speech18</code>, and <code class="language-plaintext highlighter-rouge">glue-wnli</code> in-context-learning datasets consisting of input-output pairings where outputs have two possible correct options – positive or negative contradiction or entailment, hate or noHate, and entailment or not_entailment (for <code class="language-plaintext highlighter-rouge">sick</code>, it originally contains a third option of <code class="language-plaintext highlighter-rouge">neutral</code> which we remove since our framework requires two classes).</p> <p>Given learned representations with the same configuration as our representation reading, we construct a test set from the same dataset as training. The test set has $16$ examples, each with one demonstration followed by a question. We evaluate correctness by having the LLM generate $10$ tokens and checking if the correct answer is contained in the output and the incorrect answer is not contained in the output, without being sensitive to case. This ensures correct evaluation so that an answer of no_entailment does not evaluate as correct for having entailment inside of it if entailment is the right answer.</p> <p>A hyperparameter which we denote $\alpha$ scales the size of $v$. If our Context Vector is $r$, sign value is $s$, then we have $v = \alpha \cdot r \cdot s$. We vary $\alpha \in { 0, 0.25, 0.5, 1, 2, 5, 10}$, and also take the negative of $\alpha$, which we label as positive and negative respectively.</p> <h3 id="results-for-control-with-addition">Results for Control with Addition</h3> <p>For rotten tomatoes, we see the expected performance gap of positive over negative, though positive does worse than no control. Moreover, we see in glue-wnli and sick, the negative control actually does better than positive control. In hate_speech18, we see the desired result.</p> <p>Despite modifying the layers that we controlled, based upon observing the layers at which the Context Vectors had the most correlation to the trained concept, we cannot find a set of layers to control that works <strong>consistently</strong> across all four datasets, though we can find layers that work for one dataset.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `rotten_tomatoes` dataset with amplification or suppression of the Context Vector using Addition. The x-axis is the coefficient of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Addition</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `hate_spe` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Addition</bold>. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Addition</bold>. </div> <h3 id="results-for-control-with-amplification">Results for Control with Amplification</h3> <p>Note the result depends on the absolute value of $\alpha$ so the positive and negative graphs converge. The affect of amplification is quite smooth relative to addition in the sense that there is a consistent downward trend in performance for both amplification and suppression. This could be because amplification amplifies existing signals and this gets stronger as $\alpha$ increases.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <h3 id="results-for-control-with-projection">Results for Control with Projection</h3> <p>We can see that projection consistently decreases performance, which is expected as we can imagine projection as erasing the idea that the model needs to pay attention to these examples. Having positive or negative sign of $\alpha$ does not affect projection.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Projection</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Projection</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Projection</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Projection</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <h3 id="ablation-studies">Ablation Studies</h3> <p>A key question is whether the Context Vectors are truly special. Especially because much of our results do not work, we would like to assess the “noise level.” By sampling a random unit vector from $4096$-dimensional space, the hidden dimension of <code class="language-plaintext highlighter-rouge">Mistral-7b-instruct</code>, for each layer and using that for control, we get the following results.</p> <p>If we take the negative of all the Context Vectors, the graphs for positive and negative $\alpha$’s would switch. The fact that in our random sample we see such a large gap in the Glue-wnli graph indicates that there is quite a lot of noise. Moreover, if we take the negative of our particular randomly sampled vector, we obtain a Context Vector for Glue-wnli that is <strong>extremely good</strong> at controlling in-context-learning. The large landscape of $4096$-dimensional space is an exciting mystery.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of a random vector using <bold>Addition</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of a random vector using <bold>Addition</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of a random vector using <bold>Addition</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of a random vector using <bold>Addition</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <h1 id="conclusion">Conclusion</h1> <p>While we understand our work is limited due to time and compute constraints and did not achieve the results we hoped for, we tried our best to explore this research direction of finding a Context Vector that corresponds to the in-context learning behaviors and experiments of using it to control model outputs.</p> <h2 id="implications">Implications</h2> <p>If successful, this research direction could be a powerful tool to understand mechanistically why in-context learning emerges and potentially use model editing to achieve better State-of-the-Art results on LLMs in specific benchmark evaluation scenarios with model editing. Even with our current results that demonstrate more success in suppressing the Context Vector than amplifying it, i.e. suppressing such behaviors than boosting it, this can have implications on works that try to perform model unlearning and impact the robustness of LLMs.</p> <h2 id="future-work">Future Work</h2> <p>Through ablating with the random vector in the embedding space, it is unfortunate that controlling for the particular Context Vector we found is not particularly different from other vectors, despite it showing some promises on suppressing the results. We hope to run further ablation studies to confirm that suppressing the Context Vector is only suppressing the in-context learning behaviors of the specific behaviors and does not have other side effects.</p> <p>Regarding our current setup of the contrasting prompts of telling the model to pay attention or not pay attention to the concept, we can further explore the space of contrasting prompts. Directly related to our work, we would also like to explore the other type of experiment setup in Zou et al. (2023)<d-cite key="zou2023representation"></d-cite>; unlike the data pair setup where we ask the model to pay attention to the examples or ignore them, we can ask the model to “think hard about the context/structure of the question” and elicit neural activities that way.</p> <p>We are also interested in exploring vectors that control step-by-step reasoning and in general, intelligence. The phrases “Let’s think step by step” <d-cite key="kojima2023stepbystep"></d-cite> or “Take a deep breath and work on this problem step-by-step” <d-cite key="yang2023deepbreath"></d-cite> are powerful phrases that elicit chain-of-thought reasoning and improve model performance. Could we engineer activation transformations that improve these models’ performance even more than and without the need for prompting?</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-representationengineering-incontextlearning.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/scRNA-GNNs/index.html b/blog/2023/scRNA-GNNs/index.html
new file mode 100644
index 00000000..89d295e2
--- /dev/null
+++ b/blog/2023/scRNA-GNNs/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>6.s898 Final Project- Investigating the biological underpinnings of latent embeddings for scRNA-seq | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/scRNA-GNNs/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "6.s898 Final Project- Investigating the biological underpinnings of latent embeddings for scRNA-seq",
+      "description": "",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Stephanie Howe",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT CSAIL",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>6.s898 Final Project- Investigating the biological underpinnings of latent embeddings for scRNA-seq</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#background-and-motivation">Background and Motivation</a></div> <div><a href="#graph-neural-networks-gnns-as-an-architecture-and-their-application-to-single-cell-analysis">Graph Neural Networks (GNNs) as an architecture and their application to single-cell analysis</a></div> <div><a href="#intro-to-the-data">Intro to the Data</a></div> <div><a href="#applying-scgnn-to-our-ad-scrna-seq-data">Applying scGNN to our AD scRNA-seq data</a></div> <div><a href="#visualizing-the-degree-distribution-of-the-cell-graph">Visualizing the Degree Distribution of the Cell Graph</a></div> <div><a href="#understanding-cell-clusters-in-the-embedding-space">Understanding Cell Clusters in the Embedding Space</a></div> <div><a href="#exploring-alzheimer-s-related-gene-contributions-to-the-embedding-space">Exploring Alzheimer’s Related Gene Contributions to the Embedding Space</a></div> <div><a href="#wrapping-it-up">Wrapping it up</a></div> <div><a href="#future-analysis">Future Analysis</a></div> </nav> </d-contents> <h2 id="background-and-motivation">Background and Motivation</h2> <p>Neurodegenerative diseases represent a complex and diverse group of disorders characterized by the progressive degeneration of the structure and function of the nervous system. They are notoriously challenging to study due to their multifaceted nature and varied pathological features. Single-cell sequencing technologies have been developed and are powerful techniques for understanding the molecular basis of many pressing scientific questions such as the causality and development of Alzheimer’s Disease (AD). These technologies, namely single-cell RNA sequencing (scRNA-seq) and single-cell Assay for Transpose-Accessible Chromatin sequencing (scATAC-seq), offer us an understanding of a cell’s state as a phase-space determined by chromatin accessibility and gene expression. Single cell data like this is extremely high dimensional; on the scale of 10s or 100s of thousands of cells, each with 10s of thousands of “features,” which represent genes or chromatin regions. Because of this, lower dimensional representations of these cells and clusters within them are valuable to help simplify our view of the data and extract signals. Moreover, in the context of cells characterized by biomarkers and stemming from patients with varying neurodegenerative diseases, it is in our interest to explore cell neighborhoods and embeddings to investigate if they properly represent the biological underpinnings of such disease.</p> <h2 id="graph-neural-networks-gnns-as-an-architecture-and-their-application-to-single-cell-analysis">Graph Neural Networks (GNNs) as an architecture and their application to single-cell analysis</h2> <p>Graph Neural Networks (GNNs) are a class of deep learning models that are specifically designed to handle data that is structured as a graph, which extends the principles of neural networks to handle the concept of graph topology. In GNNs, each node (which in this application represents cells) aggregates information from graph neighbors through transformation and pooling steps, which results in a model whose representation captures node level and graph level features. Relevantly, GNNs generate lower dimensional embeddings of the input data, which provides a compact and informative representation of high dimensional data such as single-cell RNA data.</p> <p>The scGNN package specifically applies these principles of GNNs to single-cell genomics, treating cells as nodes in a graph and the edges as a measure of similarity in the transcriptome of two cells. scGNN performs two main functions: clustering and imputation. The architecture is as such:</p> <ul> <li>Feature Autoencoder: Generates low-dimensional representation of gene expression, which is the foundation for a cell graph.</li> <li>Graph Autoencoder: Learns a topological representation of the aforementioned cell graph, which is the foundation for cell type clustering.</li> <li>Cluster Autoencoders: There is an autoencoder for each cell type that reconstructs gene expression values.</li> <li>Imputation Autoencoder: Recovers imputed gene expression values.</li> </ul> <h2 id="intro-to-the-data">Intro to the Data</h2> <p>The <a href="https://www.sciencedirect.com/science/article/pii/S009286742300973X?ref=pdf_download&amp;fr=RR-2&amp;rr=834b08acfbd66ac7">dataset</a> being presented is a scRNA-seq atlas of the aged human prefrontal cortex. It consists of 2.3 million cells sampled from 427 individuals over a varying range of Alzheimer’s pathology and cognitive impairment. The subset of this data being analyzed in this project are the 19 samples that had multiome sequencing conducted, although only the scRNA-seq was used for this analysis (excluding the scATAC-seq). This was approximately 100 thousand cells and originally 36 thousand genes that are categorized into three diagnoses: no AD, early AD, and late AD based on biomarkers like amyloid plaque and niareagan score.</p> <h2 id="applying-scgnn-to-our-ad-scrna-seq-data">Applying scGNN to our AD scRNA-seq data</h2> <p>I began by processing the raw sequencing data into a csv format that would be suitable as input to the pipeline. I then ran preprocessing on this data, which consists of log transformation, filtering out low quality/sparse genes and cells, and subsetting to the top 2000 highly variable genes by variance. I then ran the actual imputation and clustering pipeline with the following parameters: EM-iteration=10, Regu-epochs=500, EM-epochs=200, cluster-epochs=200, quickmode=True, knn-distance=euclidean. The result of training is a imputed cell matrix, a cell graph, cell type clusters, and the actual embeddings of the cells themselves. These results provide the foundation for the next layer of analysis.</p> <h2 id="visualizing-the-degree-distribution-of-the-cell-graph">Visualizing the Degree Distribution of the Cell Graph</h2> <p>The figure below is a histogram that represents the number of other cells each cell in the dataset is connected to in the cell graph as computed by the Graph Autoencoder. We can see that the distribution is skewed right, which tells us that most cells are connected to a relatively few other cells, which could indicate a particularly heterogeneous cell population. However, there are a select few that have substantially higher number of connections, which could represent some sort of “hub” cells.</p> <p><img src="/assets/img/2023-12-12-scRNA-GNNS/degree.jpeg" alt=""/></p> <h2 id="understanding-cell-clusters-in-the-embedding-space">Understanding Cell Clusters in the Embedding Space</h2> <p>The next approach was a detailed analysis of the clusters generated by the graph architecture by comparing to clusters generated on the imputed output data. This is important in visualizing the efficacy of the GNNs embeddings in delineating cell types compared the clusters derived from traditional methods on the imputed data, which included all 2000 highly variable genes (HVGs). The steps are as following:</p> <ol> <li>Computing Neighbors: Step 1 is to compute the neighbors for each cell, which as a reminder explains gene expression similarity between cells.</li> <li>Principal Component Analysis (PCA): The subsequent step is to compute PCA on the data, which is a dimensionality reduction technique.</li> <li>Louvain Clustering: After PCA, I used Louvain clustering, which is widely used in scRNA-seq analysis for clustering cell types, and tuned the resolution to match a similar number of clusters as generated in scGNN.</li> <li>UMAP Visualization: To visualize clusters, I used Uniform Manifold Approximation and Projection (UMAP), which is a dimensionality reduction technique that allows us to visualize the cell data in 2-dimensions, colored by cluster. I colored the UMAP first by the clusters generated on the embedded data by scGNN and then by the PCA/Louvain clusters.</li> </ol> <p>In the figures below, we see the result of computing cell type clusters based on data embedded by the feature and graph autoencoder versus using the traditional method of PCA then Louvain clustering. While they resulted in slightly different number of clusters, it is interesting to see that the traditional method appears to outperform the GNN in terms of separating clusters in the embedding space. Further analysis on the differentially expressed genes (DEGs) in each cluster would need to be done to confirm which cell type each cluster truly represents. Only then would we be able to determine the accuracy of each, but from a visual perspective in UMAP space, the GNN clusters are less consistent.</p> <p><img src="/assets/img/2023-12-12-scRNA-GNNS/pca_louvainclusters.jpg" alt=""/> <img src="/assets/img/2023-12-12-scRNA-GNNS/scGNNclusters.jpg" alt=""/></p> <h2 id="exploring-alzheimers-related-gene-contributions-to-the-embedding-space">Exploring Alzheimer’s Related Gene Contributions to the Embedding Space</h2> <p>Deep learning techniques and architectures like VAEs and GNNs are promising and seemingly relevant techniques for topics like single-cell genomics where data is extremely high dimensional and sparse. However, these complex algorithms beg the question of whether and how they represent the underlying biology, especially in the context of diseases like Alzheimer’s. Fortunately, while still incurable, AD has been extensively researched, and is strongly associated with a number of hereditary genes, mutations, and misfolded protein aggregates. This known research provides a robust benchmark when applying new techniques to AD data. When trying to implicate new genes or represent genes (features) in a lower dimensional embedding space, it is usually a good sign to check whether the known biomarkers of AD are also being predicted or also being represented. In our case, these embeddings provide the opportunity to see if the model captures the relevant biological information, which can then provide some level of validation to any other genes that are also being represented.</p> <p>To explore this further, I performed correlational analysis between the gene expression matrix from the imputed data and the “expression” values derived from the embedding dataframe. By focusing on the top 1% (20 genes) of genes that had the highest correlation for each embedding, I identified any biologically relevant genes that were being represented in the embedding. Below is a list of the AD relevant genes that showed up as being highly represented in this embedding space.</p> <ul> <li>APOE: This gene, particularly the e4 allele, is the most widely known genetic risk for late onset Alzheimer’s Disease. This allele is responsible for about half of all AD cases</li> <li>APP: This gene is called Amyloid Precursor Protein. You might recognize amyloid, which is the main hallmark of AD when it misfolds and becomes aggregate plaque in the brain. Abnormal cleavage of APP leads to an increase in amyloid plaque accumulation.</li> <li>SORL1: Genetic mutations of this gene are associated with AD because of its role in recycling APP.</li> <li>BIN1: Bridging integrator 1 has been implicated in many AD GWAS studies and has been found to influence the spread of tau, which is another hallmark of AD when misfolded, leading to neurofibrillary tangles.</li> <li>CLU: Clusterin has been implicated in AD for its role in clearing amyloid-beta plaque from the brain.</li> </ul> <p>For example, in the figures below you can see that APOE falls into the genes with the highest correlation for embedding number 24, with a correlation of 0.79, and APP falls into those for embedding number 5 with a correlation of 0.79 as well.</p> <p><img src="/assets/img/2023-12-12-scRNA-GNNS/embedding5.jpg" alt=""/> <img src="/assets/img/2023-12-12-scRNA-GNNS/embedding24.jpg" alt=""/></p> <h2 id="wrapping-it-up">Wrapping it up</h2> <p>I hope this analysis has demonstrated the potential of combining advanced computational methods in deep learning with with foundational biological data like scRNA-seq on AD to unravel long standing questions we have in the field.</p> <h2 id="future-analysis">Future Analysis</h2> <p>Due to computational time, I elected to train the model on the entire dataset. Future work could include training the model on subsets of the data separated by the different level of AD pathology, which would give a slightly more nuanced understanding of disease progression and how that is reflected in the embedding space of each diagnosis category.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2022-12-01-distill-example.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/scRNAseq-assumptions/index.html b/blog/2023/scRNAseq-assumptions/index.html
new file mode 100644
index 00000000..fb3cd0a4
--- /dev/null
+++ b/blog/2023/scRNAseq-assumptions/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Examining assumptions in scRNA-seq foundation model pre-training (6.S898 Final Project) | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Final project for MIT's Deep Learning (6.S898) class."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/scRNAseq-assumptions/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Examining assumptions in scRNA-seq foundation model pre-training (6.S898 Final Project)",
+      "description": "Final project for MIT's Deep Learning (6.S898) class.",
+      "published": "November 8, 2023",
+      "authors": [
+        {
+          "author": "Robert Calef",
+          "authorURL": "https://en.wikipedia.org/wiki/Robert_Calef",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Examining assumptions in scRNA-seq foundation model pre-training (6.S898 Final Project)</h1> <p>Final project for MIT's Deep Learning (6.S898) class.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related work</a></div> <div><a href="#methods">Methods</a></div> <div><a href="#results">Results</a></div> <div><a href="#discussion">Discussion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>If the fundamental building block of biology is the cell, then the fundamental building block of cells are genes. Genes are small segments of DNA that encode the information to create a protein, and proteins are a diverse set of macromolecules that can perform a staggering range of chemical functions which, when taken all together, lead to the complex behavior of cells and the organisms they make up. To create proteins from genes, an intermediate “data transfer” occurs through another molecule type known as RNA. This information flow of genes to RNA to proteins is typically referred to as “gene expression”, and is so core to biology that it’s also known as the “central dogma of molecular biology”.</p> <p>Due to the importance of gene expression, many technologies have been developed to make quantitative measurements of gene expression from cells. One of the most prominent technologies is called single-cell RNA sequencing (scRNA-seq), which enables the measurement of the expression of all genes in a given cell, often measured across thousands of cells simultaneously <d-cite key="hwangSinglecellRNASequencing2018"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig1_scRNA_seq_overview.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Schematic overview of the scRNA-seq workflow. Figure sourced from <d-cite key="panMicrofluidicsFacilitatesDevelopment2022"></d-cite>. </div> <p>Large scale scRNA-seq datasets have enabled the high-resolution profiling of different organs and tissues at the cellular level, uncovering diverse cell types, rare subpopulations, and dynamic gene expression patterns within complex tissues and organisms. This technology has found applications in various fields, from developmental biology and immunology to cancer research and regenerative medicine.</p> <p>While scRNA-seq has seen broad-scale adoption, many challenges remain. In particular, an individual research experiment may focus on a particular cell or tissue type, and produce insufficient data to apply modern machine learning techniques. To supplement their data or to gain additional context, a researcher may wish to utilize data generated from other experiments or researchers. However, performing large-scale integration of datasets across samples, tissues, and experiments currently presents challenges of scalability and non-biological differences between datasets driven by experimental variability (colloquially referred to as “batch effects”) <d-cite key="lahnemannElevenGrandChallenges2020"></d-cite>.</p> <p>In parallel to the explosion of available scRNA-seq data, the machine learning field has seen an increasing trend towards “foundation models”. Foundation models are large-scale deep learning models pre-trained with vast amounts of data for the purposes of creating a generalizable representation of a particular datatype (e.g. text, images). Given these developments, recent work has focused on developing scRNA-seq foundation models as an approach to solve the challenge of integrating diverse sets of scRNA-seq datasets in a scalable and generalizable way <d-cite key="theodorisTransferLearningEnables2023"></d-cite> <d-cite key="yangScBERTLargescalePretrained2022"></d-cite> <d-cite key="cuiScGPTBuildingFoundation2023"></d-cite> <d-cite key="chenGeneptSimpleHardtoBeat2023"></d-cite> <d-cite key="yangGeneCompassDecipheringUniversal2023"></d-cite> <d-cite key="haoLargeScaleFoundation2023"></d-cite> <d-cite key="levineCell2SentenceTeachingLarge2023"></d-cite>. Beyond just integration, foundation models of gene expression hold great promise in contributing to a broader understanding of biology by learning a representation space of cellular state, which could also lead to a large impact in downstream applications such as <em>in silico</em> prediction of cellular responses to novel therapeutics.</p> <p>In this post, we’ll explore a fundamental assumption of three such models (Geneformer<d-cite key="theodorisTransferLearningEnables2023"></d-cite>, cell2sentence <d-cite key="levineCell2SentenceTeachingLarge2023"></d-cite>, and GenePT <d-cite key="chenGeneptSimpleHardtoBeat2023"></d-cite>), which is the assertion that a given gene expression profile can be well-approximated by a rank-value encoding of genes.</p> <p>What exactly is a rank-value encoding? Well, a typical representation of gene expression is a vector \(x \in \mathbb{R}^N\), where \(N\) is the number of genes, and each entry is a measure of the corresponding gene’s expression. In a rank-value encoding, gene expression is instead represented as a list of N strings, where the strings are gene names, and are ordered in descending order of the underlying gene expression value.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_rank_value_schematic.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Standard encoding of gene expression values compared to a rank-value encoding. </div> <p>The rank-value encoding provides an intuitive transformation of the continuous gene expression values into an English language sentence that is compatible with existing approaches for foundation models in the natural language processing (NLP) field. However, as can be seen above, the rank-value encoding also drops the information of the exact gene expression values. Hopefully by the end of this post, we’ll have gained some intuition for how a rank-value encoding of gene expression could be hindering the development of foundation models for gene expression and see that this does play out in practice for a real scRNA-seq foundation model.</p> <h2 id="related-work">Related work</h2> <h3 id="overview-of-gene-expression-representations-in-foundation-models">Overview of gene expression representations in foundation models</h3> <p>While we won’t go into a full detailed comparison of different methods for constructing gene expression foundation models from scRNA-seq data, it’s worth spending a little time discussing the commonalities and differences of various approaches at a high-level.</p> <p>The most important distinction for this post is between methods that use a rank-value encoding and those that don’t. For methods that don’t use a rank-value encoding, we see a further distinction between methods that employ some form of value-binning, where continuous expression values are mapped to a discrete number of pre-specified bins, and those that don’t. Methods that use a binning approach are scGPT<d-cite key="cuiScGPTBuildingFoundation2023"></d-cite> and scBERT<d-cite key="yangScBERTLargescalePretrained2022"></d-cite>. In both scGPT and scBERT, gene expression values are first binned to map the continuous values to a set vocabulary of tokens, and these tokens are then passed through an embedding layer to generate higher-dimensional representations. In contrast, scFoundation<d-cite key="haoLargeScaleFoundation2023"></d-cite> calculates gene expression embeddings by first transforming continuous scalar values to a vector using a small MLP, and then calculating a final embedding by using an attention mechanism over K learned vectors. While we won’t cover the full details, schematics of the approaches can be seen below to get a sense of the overall architectures, and most importantly to see how they directly use the gene expression values as input.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scGPT_schematic.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scBERT_schematic.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_scFoundation_schematic.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Schematics of the various approaches that *do not* use a rank-value encoding (top to bottom): scGPT, scBERT, and scFoundation. Figures sourced from <d-cite key="cuiScGPTBuildingFoundation2023"></d-cite><d-cite key="yangScBERTLargescalePretrained2022"></d-cite><d-cite key="haoLargeScaleFoundation2023"></d-cite>. </div> <p>On the other hand, we have the methods that we’re most interested in for the purposes of this post: the ones that utilize a rank-value encoding of gene expression. These methods are: Geneformer<d-cite key="theodorisTransferLearningEnables2023"></d-cite>, GenePT<d-cite key="chenGeneptSimpleHardtoBeat2023"></d-cite>, and cell2sentence<d-cite key="levineCell2SentenceTeachingLarge2023"></d-cite>. In Geneformer, gene expression values are first converted to a rank-value encoding and then used to train a Transformer-based model using a variant of a masked language modeling objective in which a set of genes at random ranks are masked, and the model must learn to predict the masked gene names. In cell2sentence and GenePT, pre-trained auto-regressive language models (GPT-2 and GPT-3.5 respectively) are applied to the rank-value encoded list of genes to obtain cell-level embeddings that are then used for downstream tasks. Again, we won’t dive into the full details of these approaches, but provide schematic overviews of them below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_Geneformer_schematic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_Geneformer_schematic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_Geneformer_schematic-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_Geneformer_schematic.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_genePT_schematic.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig2_cell2sentence_schematic.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Schematics of the various approaches that *do* use a rank-value encoding (top to bottom): Geneformer, GenePT, and cell2sentence. Figures sourced from <d-cite key="theodorisTransferLearningEnables2023"></d-cite><d-cite key="chenGeneptSimpleHardtoBeat2023"></d-cite><d-cite key="levineCell2SentenceTeachingLarge2023"></d-cite>. </div> <h3 id="critical-examinations-of-scrna-seq-foundation-models">Critical examinations of scRNA-seq foundation models</h3> <p>In light of the recent development of many approaches for scRNA-seq foundation models, researchers have also begun performing critical assessments of such models. One of the main value propositions of foundation models is generalization to new data in a few-shot or zero-shot manner. To test this hypothesis, Kedzierska et al.<d-cite key="kedzierskaAssessingLimitsZeroshot"></d-cite> benchmarked the performance of Geneformer and scGPT at two zero-shot tasks with novel datasets: cell clustering and integration of data across batches (i.e. batch effect removal) . They found that both methods underperformed compared to simpler baseline methods. Similarly, Boiarsky et al.<d-cite key="boiarskyDeepDiveSingleCell2023"></d-cite> compared scGPT and scBERT to logistic regressions in the context of cell type annotation, and also found that the simpler approach performed competitively.</p> <p>However, both of the works discussed above focused on examining the performance of scRNA-seq foundation models as a black box, whereas to the best of our knowledge, there are no current works examining the fundamental assumptions implicit in these foundation model approaches. We hope to begin addressing that gap in this post. By understanding whether or not rank-value encoding well-approximates the real similarities and differences in gene expression across cell types, we hope to either validate this assumption or gain insight into future avenues for improving pretraining of such scRNA-seq foundation models.</p> <h2 id="methods">Methods</h2> <h3 id="dataset">Dataset</h3> <p>To perform our assessment of rank-value encoding, we’ll work with the Tabula Sapiens dataset <d-cite key="consortiumTabulaSapiensMultipleorgan2022"></d-cite>. This scRNA-seq dataset is a reference-quality collection of nearly 500,000 cells from 24 organs, sourced from 15 normal human subjects. The Tabula Sapiens dataset provides a good testbed for our experiments, as the samples have been processed in a uniform manner, allowing us to ask how rank-value encoding performs in a “best case” scenario. In the future, it would be beneficial to see how rank-value encoding performs across datasets as well, as there may be advantages in terms of smoothing out experimental noise.</p> <p>We use the final dataset from Tabula Sapiens, which has already been subjected to quality control assessment, filtering, and normalization. While we won’t go into the details of their pipeline here, these are available in their manuscript for the interested reader. In line with typical scRNA-seq workflows, we also subset the full set of ~22,000 genes down to a subset of 2,435 genes that have been marked as “highly variable genes” (HVGs) in the Tabula Sapiens dataset. This is a fairly standard step in scRNA-seq data processing workflows, as many genes are constitutively expressed across cell types, and thus provide little information for distinguishing between cell types. Highly variable gene selection was performed by the Tabula Sapiens Consortium following the methods and recommendations in Seurat<d-cite key="stuartComprehensiveIntegrationSingleCell2019"></d-cite>, a commonly used scRNA-seq data processing package.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig3_cell_type_hist.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Number of cells per cell type. Note that the majority of cell types have ~1000 examples, but that there's a long tail of highly represented cell types with up to 35k examples. </div> <p>Additionally, since the Tabula Sapiens dataset is quite large and also has some cell types that are disproportionately represented, as shown above, we’ll also subset the data to get a more tractable dataset for experimentation. To do so, we’ll focus on cell types with 500 or more examples, and then further randomly subsample to 500 cells per type. This leaves us with 89 cell types<d-footnote>acinar cell of salivary gland, adventitial cell, b cell, basal cell, basal cell of prostate epithelium, basophil, bladder urothelial cell, capillary aerocyte, capillary endothelial cell, cardiac endothelial cell, cardiac muscle cell, cd24 neutrophil, cd4-positive alpha-beta t cell, cd4-positive helper t cell, cd4-positive, alpha-beta memory t cell, cd4-positive, alpha-beta t cell, cd8-positive alpha-beta t cell, cd8-positive, alpha-beta cytokine secreting effector t cell, cd8-positive, alpha-beta cytotoxic t cell, cd8-positive, alpha-beta memory t cell, cd8-positive, alpha-beta t cell, classical monocyte, club cell, club cell of prostate epithelium, conjunctival epithelial cell, corneal epithelial cell, corneal keratocyte, dendritic cell, dn1 thymic pro-t cell, dn3 thymocyte, duct epithelial cell, endothelial cell, endothelial cell of artery, endothelial cell of lymphatic vessel, endothelial cell of vascular tree, enterocyte of epithelium of large intestine, enterocyte of epithelium of small intestine, epithelial cell, erythrocyte, erythroid progenitor, eye photoreceptor cell, fibroblast, fibroblast of breast, granulocyte, hematopoietic stem cell, hepatocyte, immature enterocyte, immune cell, innate lymphoid cell, intermediate monocyte, keratinocyte, kidney epithelial cell, luminal cell of prostate epithelium, luminal epithelial cell of mammary gland, lung ciliated cell, macrophage, mast cell, mature enterocyte, mature nk t cell, memory b cell, mesenchymal stem cell, monocyte, myeloid cell, myofibroblast cell, naive b cell, naive regulatory t cell, naive thymus-derived cd4-positive, alpha-beta t cell, naive thymus-derived cd8-positive, alpha-beta t cell, neutrophil, nk cell, nkt cell, non-classical monocyte, pancreatic acinar cell, pancreatic ductal cell, paneth cell of epithelium of large intestine, paneth cell of epithelium of small intestine, pericyte cell, plasma cell, regulatory t cell, respiratory goblet cell, skeletal muscle satellite stem cell, smooth muscle cell, stromal cell, t cell, thymocyte, type i nk t cell, type ii pneumocyte, vascular associated smooth muscle cell, vein endothelial cell</d-footnote> and 500 cells per type, for a total of 44,500 datapoints.</p> <p>To interact with this data, we’ll be using the <code class="language-plaintext highlighter-rouge">AnnData</code><d-cite key="virshupAnndataAnnotatedData2021"></d-cite> and <code class="language-plaintext highlighter-rouge">scanpy</code><d-cite key="virshupScverseProjectProvides2023"></d-cite> Python packages, which we won’t cover in detail here but flag in case you’re interested in working with such data in the future.</p> <h3 id="assessments">Assessments</h3> <p>To assess how well a cellular state can be represented using a rank-value encoding of genes, we’ll look at various measures of similarity in the raw gene expression space and the rank-value encoded space, and compare those measures both within cell types and between cell types. We’ll calculate the following measures for all pairs of cells:</p> <ol> <li>Euclidean distance of UMAP-projected gene expression values</li> <li><a href="https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient">Spearman rank correlation coefficient</a></li> <li>Euclidean distance of UMAP-projected Geneformer embeddings</li> </ol> <p>For each distance measure, we can then generate comparisons at the level of cell types by summarizing via the median of the pairwise distances, either within or between cell types. A schematic of this approach is shown below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_comparison_schematic.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Overview of method for computing distance measures between cells followed by summarization to generate comparisons at the level of cell types. </div> <h4 id="umap-of-gene-expression-values">UMAP of gene expression values</h4> <p>The idea behind this comparison is to utilize the continuous gene expression vectors, but using UMAP (Uniform Manifold Approximation and Projection<d-cite key="mcinnesUMAPUniformManifold2020"></d-cite>) to approximate the kind of non-linear transformation one might learn using a deep neural network. To calculate these values, we perform UMAP embprojectionedding of the gene expression values using the <code class="language-plaintext highlighter-rouge">umap-learn</code> Python package with defaut settings and <code class="language-plaintext highlighter-rouge">n_components=5</code>. Once we have the per-cell projections, we calculate Euclidean distance between all pairs of cells.</p> <h4 id="spearman-rank-correlation-coefficients">Spearman rank correlation coefficients</h4> <p>The Spearman rank correlation is a non-parametric measure of correlation between two ranked lists, which we can leverage to obtain a direct comparison of rank-value encoded gene lists. To accomplish this, we first calculate a rank-encoding of each cell’s gene expression, with identical values being assigned a <a href="https://en.wikipedia.org/wiki/Ranking#Fractional_ranking_(%221_2.5_2.5_4%22_ranking)">fractional rank equal to the mean of their ordinal ranks</a>. As the Spearman correlation is defined as the Pearson correlation on the rank-encoded lists, we can then directly calculate the Spearman correlations between all pairs of cells.</p> <h4 id="euclidean-distance-of-umap-projected-geneformer-embeddings">Euclidean distance of UMAP-projected Geneformer embeddings</h4> <p>To fully assess the effect of rank-value encoding in a deep learning model, we take this one step further by calculating the embeddings of our cells using Geneformer. We generate these embeddings by using their model and code as <a href="https://huggingface.co/ctheodoris/Geneformer">hosted on HuggingFace</a> for tokenization and embedding of our gene expression vectors. For each cell \(i\), we obtain an embedding vector \(x_i \in \mathbb{R}^{256}\). We further project these 256-dimensional vectors down to 5 dimensions using UMAP for consistency with the projections of the raw gene expression values described above, and then calculate Euclidean distance between all pairs of cells. The rationale here is that Euclidean distance between two points may be larger in a 256-dimensional space than a 5-dimensional space due the high dimensionality (i.e. “curse of dimensionality”). However, we do still see similar results when using the full 256-dimensional embedding vectors (see Appendix).</p> <h2 id="results">Results</h2> <h3 id="rank-value-encodings-preserve-similarity-between-cell-types">Rank-value encodings preserve similarity between cell types</h3> <p>The first thing we can see from our results is that rank-value encodings do preserve similarity between cell types in a similar manner as distances generated from raw gene expression values. The figure below is generated by looking at the distributions of distances between pairs of cells from the same type (“within”) or from different cell types (“between”). To provide a comparison at the level of cell types, we plot the median of each distribution rather than individual pairs of cells, i.e. the “within” group contains 89 data points and the “between” group contains \(\frac{89 \times 88}{2}\) data points.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig6_combined_measure_comparison.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Comparison of various similarity measures both within cell types and between cell types. Note that for the Euclidean distances (left and right), lower is more similar, whereas for rank correlation (middle), higher is more similar. </div> <p>How should we interpret this? What we can observe is that all three measures maintain high similarity for cells from the same type and less similarity for cells from different types. Put another way, rank-value encodings do define a space in which different cell types tend to be distant and cells from the same type tend to be near each other. We can also say that this holds when using both a non-parametric measure of the rank-value encodings (Spearman rank-correlation) and also when using a deep learning model that operates on rank-value encoded gene vectors (Geneformer).</p> <p>However, we do also see that the difference between the “within” and “between” cell type distances is more pronounced when using a non-linear function on the raw data compared to either of the methods operating on the rank-value encoded gene vectors. This difference will become even more clear as we look at joint distributions of our different measures in the next section.</p> <h3 id="raw-gene-expression-values-better-preserve-within-cell-type-similarities">Raw gene expression values better preserve within cell type similarities</h3> <p>To gain further insight into how rank-value encodings compare to raw gene expression values, we can look at the joint distributions of our distance measures. Below we see the joint distribution of our raw gene expression-based distances compared to the rank-correlation values, shown as a 2D histogram where each hex is colored according to the number of points that fall within that bin.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig7_raw_umap_vs_rank_corr_within_type.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig8_raw_umap_vs_rank_corr_between_type.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Joint distributions of distances from UMAP of raw gene expression values compared to rank correlations, within cell types (left) and between cell types (right). </div> <p>We can notice that within cell types, the rank correlation has a fairly wide dynamic range whereas the raw gene expression-based distance seems to show a tighter packing. Between cell types, we can observe that the rank correlations largely clump up closer to zero but do mesh with the larger distances we see with the raw gene expression-based measure.</p> <p>Given that we see a spreading out of cells within a type using a rank correlation, the natural question becomes whether this holds when we use a deep learning model that can learn a complex non-linear function of the rank encodings. That’s exactly what we look at below where we perform a similar comparison, but swapping out the rank correlation distance measure for the distance measure based on Geneformer embeddings.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig9_raw_umap_vs_geneformer_umap_within_type.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig10_raw_umap_vs_geneformer_umap_between_type.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Joint distributions of distances from UMAP of raw gene expression values compared to distances from UMAP of Geneformer embeddings, within cell types (left) and between cell types (right). </div> <p>With the Geneformer embeddings derived from the rank-value encodings, we now see that the between cell type distances are better matched to the distances derived from raw gene expression values. However, we still see that Geneformer embeddings are more spread out within cell types compared to the non-linear transform of the raw gene expression values. To better understand why this might be the case, we propose one possible contributing factor in the next section.</p> <h3 id="sparsity-of-scrna-seq-data-may-drive-loss-of-information-in-rank-value-encodings">Sparsity of scRNA-seq data may drive loss of information in rank-value encodings</h3> <p>A key aspect of scRNA-seq data is its extremely high sparsity. When working with single cells, the amount of available RNA is already quite limited, and then each processing step, such as RNA isolation or sequencing, introduces technical noise and the possibility of “dropout events”, where a gene’s expression is not detected at all. Combined with the inherent stochasticity of gene expression, we’re often left with data where the vast majority of genes have zero detected RNA molecules.</p> <p>Shown below is a histogram of sparsity per cell in the full Tabula Sapiens dataset as well as in the subset of cells and genes we considered in the analyses above.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig4_sparsity_full_dataset.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig5_sparsity_subset.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Histogram of gene expression sparsity per cell for the full Tabula Sapiens dataset (left) and the subset of 44,500 cells and 2,450 genes we considered in previous analyses. Sparsity here is defined as the fraction of genes with zero observed RNA molecules. </div> <p>While many methods for processing scRNA-seq data attempt to handle the high sparsity in a principled manner, most of the methods described here simply remove genes with zero observations from consideration. In particular, scGPT, GenePT, and Geneformer all remove genes with zero observations from their inputs, and cell2sentence restricts itself to the 100 genes with the highest expression per cell, effectively removing all genes with zero observations. While sparsity is at least partially driven by stochastic technical factors, there is undoubtedly a biological contribution as well, which may be removed when dropping genes with zero observations. While this issue is not unique to rank-value encoding, we can see that all of the methods we’ve discussed here that use rank-value encoding remove genes with zero observations, likely to circumvent the ambiguity in how one would enforce an ordering on genes that all have zero observations.</p> <h2 id="discussion">Discussion</h2> <p>To give a high-level summary, what we’ve seen in this post is that rank-value encodings are an appealing way to transform continuous gene expression vectors into a format that’s directly compatible with the foundation model architectures that have seen great success in natural language processing. However, they also seem to lose some valuable biologlical information of cell types, particularly information concerning similarity of cells within a given type.</p> <p>While we don’t present a smoking gun for an exact characteristic of this loss of information, we present sparsity as a key challenge in scRNA-seq data, which may be exacerbated when using rank-value encodings. We can also further hypothesize that rank-value encodings may be sensitive to small changes in gene expression values from technical noise, which could cause a shifting of ranks and thus amplify the impact of said noise. Similarly, rank-value encodings lose the absolute quantification of gene expression, and this loss of granularity may impact the model’s ability to capture the cases where subtle differences in gene expression hold biological significance.</p> <p>From the perspective of downstream use cases, models based on rank-value encodings are also limited in their ability to explore the counterfactuals that may be interesting in cases such as predicting cellular responses to a novel therapeutic. For example, if a drug were known to affect the expression of a single gene, but not to the point where the ranking of this gene shifted, then such a model would be unable to explore the downstream effect of this drug on the expression of other genes.</p> <p>In terms of limitations, the work presented here is fairly superficial and is constrained both in terms of size of datasets and breadth of methods compared. To perform a more robust comparison in the future, we would like to scale up this analysis to larger datasets, such as the full Tabula Sapiens dataset. We would also like to more directly compare cell type similarities in the embedding spaces of other scRNA-seq foundation models, including those that do and do not utilize rank-value encodings. A great follow-up would be to perform a head-to-head comparison of a model like scBERT to Geneformer on the full Tabula Sapiens dataset.</p> <p>Additionally, we’ve also yet to explore the angle of robustness across datasets. It’s possible that some of the shortcomings we’ve listed for rank-value encodings may actually be benefits in the context of supppressing technical noise when integrating scRNA-seq datasets across studies, institutions, and experimental techniques. Performing this comparison across datasets would be a valuable follow-up that would help paint a more full picture of the value of rank-value encodings in the context of constructing foundation models for gene expression data.</p> <p>While we’ve discussed many challenges in constructing foundation-scale models for gene expression data, it’s worth closing this post with an optimistic reflection on the potential value of such models. By training a deep learning model to construct a representation space of cellular state, we stand to create a powerful tool that will help us gain a fundamental understanding of cellular biology and its underlying complex regulatory networks. Ultimately, such tools could help us unravel the genetics of various diseases, paving the way for a new era of disease treatments and precision medicine.</p> <h2 id="appendix">Appendix</h2> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig11_SUP_raw_umap_vs_geneformer_raw_within_type.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type-1400.webp"/> <img src="/staging/assets/img/2023-11-08-scRNAseq-assumptions/fig12_SUP_raw_umap_vs_geneformer_raw_between_type.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Joint distributions of distances from UMAP of raw gene expression values compared to distances from raw Geneformer embeddings, within cell types (left) and between cell types (right). </div> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-scRNAseq-assumptions.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/sentence-embeddings/index.html b/blog/2023/sentence-embeddings/index.html
new file mode 100644
index 00000000..46dd9a5c
--- /dev/null
+++ b/blog/2023/sentence-embeddings/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>In the pursuit of cheap and robust word embeddings | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A study of how we can train a student word embedding model to mimic the teacher OpenAI word embedding model by using as small a training set as possible. We also investigate preprocessing tricks and robustness against poisoned data."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/sentence-embeddings/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "In the pursuit of cheap and robust word embeddings",
+      "description": "A study of how we can train a student word embedding model to mimic the teacher OpenAI word embedding model by using as small a training set as possible. We also investigate preprocessing tricks and robustness against poisoned data.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Alor Sahoo",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Sebastian Alberdi",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>In the pursuit of cheap and robust word embeddings</h1> <p>A study of how we can train a student word embedding model to mimic the teacher OpenAI word embedding model by using as small a training set as possible. We also investigate preprocessing tricks and robustness against poisoned data.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction-and-motivation">Introduction and Motivation</a></div> <div><a href="#background-and-literature-review">Background and Literature Review</a></div> <div><a href="#methods-and-experiments">Methods and Experiments</a></div> <div><a href="#results-and-analysis">Results and Analysis</a></div> <ul> <li><a href="#model-a-the-baseline">Model A, the Baseline</a></li> <li><a href="#model-b-the-baseline">Model B, the Baseline</a></li> <li><a href="#model-b-lemmatization-no-poisoned-data">Model B, Lemmatization, No Poisoned Data</a></li> <li><a href="#model-b-lemmatization-poisoned-data">Model B, Lemmatization, Poisoned Data</a></li> <li><a href="#the-effects-of-data-poisoning-and-surprising-robustness">The Effects of Data Poisoning and Surprising Robustness</a></li> </ul><div><a href="#conclusion-discussions-and-future-directions">Conclusion, Discussions, and Future Directions</a></div> </nav> </d-contents> <h2 id="introduction-and-motivation">Introduction and Motivation</h2> <p>Large Language Models (LLMs), such as Bard and OpenAI’s GPT-4 are typically used to obtain data embeddings of text. These embeddings are quite rich, encoding common-sense semantic information. A good embedding naturally aligns with our intuitive human understanding of language: at a high level, similar text/words are clustered together, while dissimilar text/words are farther apart.</p> <p>High-quality embeddings also satisfy semantic equations that represent simple analogies. Define \((\text{some_text})\) to be the embedding of some string “some_text.” Then, a traditionally good embedding will typically obey linguistic equations like <d-cite key="Vylomova2016"></d-cite>:</p> \[(\text{King}) - (\text{Man}) + (\text{Woman}) ≈ (\text{Queen})\] \[(\text{Paris}) - (\text{France}) = (\text{Warsaw}) - (\text{Poland})\] <p>However, repeatedly querying LLMs for large-scale analysis is expensive. Many utilize thousands of cloud GPUs and are constantly fine-tuned, adding to their cost. This cost barrier discourages researchers—especially those with less funding—from making use of these embeddings for their own models. Repeated strain on LLM’s infrastructure can even cause a negative environmental impact. However, we often don’t need embeddings as good as these fancy ones to conduct certain types of research. Specifically, it would be desirable for a researcher to choose their embedding quality, with the understanding that higher-quality embeddings take longer, and vice versa. Such a model should be robust and resistant to being trained on a small amount of incorrect data (which can happen by accident when scraping tex, or due to malicious behavior.)</p> <p><strong>These issues motivate the following research question: on how little data can we train a text embedding model—with OpenAI embedding as ground truth—such that our embeddings are good enough quality? And can we quickly preprocess the data to improve our results?</strong></p> <h2 id="background-and-literature-review">Background and Literature Review</h2> <p>While there is some existing literature on generating word embeddings more “cheaply,” significant differences exist with current methodologies. Broadly, this process is called knowledge distillation (KD), which aims to “distill” knowledge from a larger teacher model (in our case, OpenAI embeddings) into a smaller student model.</p> <p>For example, Shin et al. discuss a novel distillation technique that “distills” a “student” embedding model from a “teacher” model <d-cite key="Shin2019"></d-cite>. Importantly, this work focuses on reducing the dimensionality of the “student” word embeddings without compromising accuracy—which is fundamentally different from our goals. While our OpenAI embeddings form the “teacher,” our student model should output embeddings of the same dimensionality. Our dimensionality preservation is useful if we want to directly translate general patterns or trends from our student’s embeddings to the OpenAI embeddings.</p> <p>Gao et al. take a different approach. They propose a KD framework for contrastive sentence embeddings, DistilCSE. It works by first applying KD on a large amount of unlabeled text before fine-tuning the student model via contrastive learning on limited labeled data <d-cite key="gao2023distilcse"></d-cite>. Contrastive learning in this domain is promising, especially since synonyms and pairs of similar words naturally form positive pairs. However, in our context, a direct application of contrastive learning presents some issues.</p> <p>For example, suppose we had some Euclidean distance threshold A and B, such that, for any two word embeddings \(c\) and \(d\):</p> <p>If the distance between \(c\) and \(d\) is less than A, then define \(c\) and \(d\) to be positive pairs for contrastive learning.</p> <p>If the distance between \(c\) and \(d\) is greater than B, then define \(c\) and \(d\) to be negative pairs for contrastive learning.</p> <p>While this process (and others like it) isn’t too resource-intensive, it has a few issues, even if we are able to define proper thresholds A and B. Firstly, it “wastes” pairs of data where the distance is in between A and B. Secondly, information about direction is easy to lose—so while a student would learn to embed similar words closer together and dissimilar ones further apart, the student may be invariant to direction and sensitive only to Euclidean distance in the n-dimensional space. This is not ideal.</p> <p>Other related state-of-the-art approaches also present issues. Gao et al. describe another approach involving running data through an encoder multiple times with standard dropout to generate positive pairs instead of searching for them in the data itself <d-cite key="Gao2022"></d-cite>. While this approach is promising, the approach involves repeatedly using somewhat slow pre-trained text embedders, which is exactly what we want to avoid.</p> <p>To reduce computational complexity while still reaping the benefits of preprocessing, we look to a paper by Rahimi et al. They explain how removing stop words (common words, like “a,” “the,” etc.) and punctuation improves sentence embedding quality, for a variety of reasons <d-cite key="Rahimi2023"></d-cite>. Even though we’re focusing on word embeddings—and not sentence embeddings—we can adapt this general approach to our project with lemmatization, which applies morphological analysis to words to simplify them to a base form. For example, “fighting” turns into “fight,” “was” turns into “be,” and “octopi” turns into octopus, and so on.</p> <p>This reduces data sparsity by consolidating related forms of words into a single representation, which is especially helpful for low-frequency words. This in turn helps the model generalize across tenses and other variations as it can focus on the “core” differences of words rather than auxiliary modifiers. We thus plan to investigate lemmatization in this context.</p> <p>We struggle to find closely related literature about student models’ resistance to poisoned data. Thus, we decided to investigate this aspect as well.</p> <p>To conclude our literature review, while different variants of KD exist, we decide to focus on a modified response-based KD, in which the teacher model sends final predictions to the student network, which then directly mimics these predictions by minimizing some loss <d-cite key="Yang2023"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Generic response-based knowledge distillation. Image credits: Gou, J., Yu, B., Maybank, S.J. et al. Knowledge Distillation: A Survey. Int J Comput Vis 129, 1789–1819 (2021). https://doi.org/10.1007/s11263-021-01453-z </div> <p>Other distillation approaches—such as feature-based KD, relation-based KD, and the contrastive approach described above—do exist, but require more intimate knowledge of the teacher’s features and/or layers <d-cite key="yang2023attention"> </d-cite> <d-cite key="Park2019"></d-cite>. This is not reasonable information we can expect to have, as companies are often incentivized to obfuscate their specific word embedding architectures.</p> <h2 id="methods-and-experiments">Methods and Experiments</h2> <p>We center our studies on a standard dataset of 10k English words scraped from high-level Standard English texts that’s been empirically validated for quality. We also use the OpenAI API to obtain text-embedding-ada-002 embeddings of the entire dataset to use as ground truth. While these aren’t necessarily the best embeddings, even among OpenAI’s own embeddings, they are the best choice given our computational restrictions.</p> <p>Now, we detail our model architecture. Our baseline model (call this Model A) is a sequential ReLU and nn.Embedding layer followed by L2 normalization. Model A serves as a crude baseline—therefore, we do not investigate it as deeply as the more complex model that followed due to large differences in performance.</p> <p>Instead, we focus our efforts on the more complex Model B, detailed below in Figure 1 in the context of our pipeline. Model B utilizes an nn.Embedding layer, followed sequentially by 2 blocks. The first uses ReLU activation followed by a linear layer of size \(\frac{\text{embedding_dim}}{2}\). The second layer is the same, except the final Linear layer outputs embeddings with the full “embedding_dim.” Notably, we use L2 normalization to make sure each embedding vector has magnitude 1 (such that all embeddings exist in an n-hypersphere.) Since all embeddings are unit embeddings, using cosine embedding loss along an Adam optimizer is natural. Thus, instead of computing cosine similarities between teacher and student vectors, we can just focus on minimizing this embedding loss.</p> <p>For the training stage, we train our embedding model to map words to vector embeddings on Google Colab with an Nvidia T4 GPU. There may be up to 3 processing steps, as depicted in Figure 1:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 1: An overview of Model B’s architecture in context of our experimentation. </div> <p>First, we choose whether or not to lemmatize the entire dataset before proceeding.</p> <p>Second, the training split. We train our embedding models above on each of the following proportions (call this \(p\)) of the dataset: 0.005, 0.009, 0.016, 0.029, 0.053, 0.095, 0.171, 0.308, 0.555, and 1.00.</p> <p>Finally, we choose whether or not to poison 10 percent of the entire word dataset (not the training dataset). When a word is poisoned, the model incorrectly believes that some random unit vector is the ground-truth embedding instead of the actual OpenAI embedding.</p> <p>For each such model, we train for up to 80 epochs, limited by our computational resources.</p> <p>We then evaluate the model’s embeddings against the ground truth with multiple metrics—cosine similarity (via the embedded cosine loss), graphically via distributions of the embedding means, linguistic math, etc.</p> <p>Taken together, this methodology is comprehensive.</p> <h2 id="results-and-analysis">Results and Analysis</h2> <h3 id="model-a-the-baseline">Model A, the Baseline</h3> <p>First, here is a graph of training up our baseline Model A (Figure 2) and our augmented Model B (Figure 3). The difference in epochs (80 for Model A, and 60 for Model B) training is due to limited resources. This doesn’t matter much, as a clear, near-linear relationship between \(p\) and training time, which we use to estimate used computational resources. Thus, we consider \(p\) as inversely proportional to the computational resources used for all our experiments.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 2 </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 3. </div> <p>For Model A (with no lemmatization, no data poisoning), we also want to visualize the tradeoffs between the number of epochs trained, the training proportion \(p\), and the training loss to establish some baseline intuition. To this end, we take inspiration from the game theoretic concept of Pareto efficiency, which aims to find equilibria where no change improves one of these 3 factors without hurting one of the other 2.</p> <p>We also wanted to visualize the tradeoffs between the number of epochs trained, the training proportion, and the cosine embedding loss, since we are motivated to find the optimal balance of these 3 factors. See Fig. 4.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 4. </div> <p>Unfortunately, Fig. 4 is not particularly enlightening. Training loss decreases as the number of epochs increases and as training proportion \(p\) increases. There are also no local minima or maxima of interest. Figures 5 and 6 also confirm this with their plots of distributions of embedding means. Specifically, as we tend to move towards the right and bottom of Fig. 6, i.e. we train longer and on more data, we simply seem to approach the true distribution (Fig. 5) without anything of note.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 5. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 6. </div> <p>These results motivate us to look beyond our Model A. Our results from this point focus on Model B because we didn’t want a poorly performing model like Model A to be a true control, it merely served as an intuitive baseline.</p> <h3 id="model-b-the-baseline">Model B, the Baseline</h3> <p>As in the previous part, we obtain a Pareto-like graph for Model B, without any lemmatization and data poisoning. Firstly, the cosine embedding losses are much lower than before, due to the improved model architecture. More interestingly, after about 10 iterations, the training loss seems to stabilize across all versions of the model, potentially suggesting that training longer may not be worthwhile.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 7. </div> <p>Since this is our base model, we don’t investigate further.</p> <h3 id="model-b-lemmatization-no-poisoned-data">Model B, Lemmatization, No Poisoned Data</h3> <p>Now, we look to Model B, with lemmatization, but no poisoned data. The Pareto-like curve for this is telling (Fig. 8), with it looking very similar to the baseline Model B’s. As before, this suggests that training for longer may not be worthwhile, and could potentially lead to overfitting.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 8. </div> <p>We also have a distribution of the means of embeddings for the whole dataset (Fig. 9) and from each variant of the model at different epochs (Fig. 10). Again, the results don’t say anything surprising: as we train on more data for longer, the distribution approaches that of the training dataset.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 9. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 10. </div> <p>To check for overfitting, we will later validate our model on simple linguistic tests, as described in the very beginning. Specifically, we will validate our model’s performance on linguistic math against OpenAI’s performance.</p> <h3 id="model-b-lemmatization-poisoned-data">Model B, Lemmatization, Poisoned Data</h3> <p>The following is the Pareto-like curve, except now we poison 10 percent of the entire dataset, as described in Methods/Experiments. Curiously, we find a local minima at approximately \(p = 0.1\) and ~20 epochs, demonstrating that our overall approach of training on a small fraction of the dataset naturally resists moderate-scale adversarial attacks on our ground-truth embeddings. Of course, the addition of poisoned data means that the loss values are on average higher than those in the previous subsection, where there was no poisoned data.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 11. </div> <p>Again, looking at the distribution of the means of embeddings (see below), we see that models that trained on too much of the data are completely ruined. We don’t even need to compare these distributions against the whole-model distribution to see this. This result demonstrates that even a relatively small amount of poisoned data can manipulate a naive embedding model trained on an entire dataset.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 12. </div> <h3 id="the-effects-of-data-poisoning-and-surprising-robustness">The Effects of Data Poisoning and Surprising Robustness</h3> <p>As discussed previously, we want to externally validate our models with both linguistic equations and pairs of synonyms. Essentially, we want to check that our student groups together similar words like the OpenAI teacher. Since our poisoned model performed best with \(p = 0.095,\) we use this training proportion to compare Model B with lemmatization, but no poisoned data to Model B with lemmatization and poisoned data.</p> <p>For clarity’s sake, we focus on single a representative example of our validation results in this blog. Specifically, we look into “nonviolent” and “antiaggressive,” which intuitively should exist close together in the n-dimensional unit hypersphere. Using dimensionality reduction techniques to visualize this in 2D, we obtain the following:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 13. </div> <p>The poisoned model is surprisingly performant, performing decently against both the unpoisoned model and the OpenAI model. These results support our notion that student models that train on as little of the data as possible are somewhat resistant to uniform, random adversarial data poisoning. This empirical result is encouraging, especially since our data poisoning threshold was somewhat high.</p> <h2 id="conclusion-discussions-and-future-directions">Conclusion, Discussions, and Future Directions</h2> <p>On balance, our results help us answer our question about how to best mimic OpenAI’s word embeddings without excessive API calls. We utilize a spin-off of a response-based KD architecture to train our student model under different conditions, demonstrating both that certain preprocessing (lemmatization) improves our embedding model and that training on smaller amounts of data creates more robust models that resist adversarial data. Our initial results demonstrate promise and serve as a call to action for others to research other cheap, robust word embedding models.</p> <p>To be clear, there are certainly many limitations to our study. For one, we keep our modeling architecture simpler due to our limited compute, while a real model would certainly use a different architecture altogether. Our dataset was also on the smaller side and doesn’t fully represent the English language. Also, our implicit use of time as a proxy for computation (especially on the erratic Google Colab) is imperfect. Also, preprocessing (including, but not limited to, lemmatization) may require substantial computational resources in some cases, which we don’t account for.</p> <p>Additionally, many of the constants that we chose (such as the 10 percent data poisoning threshold, the proportions of data we trained on, etc.) are arbitrarily chosen due to limited compute. This could’ve caused unexpected issues. For example, the output dimension of embedding Model B, 1536, is more than 10 percent the size of the dataset (10k). Thus, due to our relative lack of data, our trials with data poisoning can encourage non-generalizable memorization, which is not ideal.</p> <p>Future directions would include exploring other types of preprocessing, as hinted at in our literature review. We could also look into different types of adversaries—perhaps smarter ones that actively feed information that they know to be detrimental to the model, instead of some random unit vector. While we didn’t have robust supercomputer access, we’d also love to be able to test out fancier embedding architectures.</p> <p>Finally, we’d like to thank the 6.S898 faculty and TAs for their support!</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-sentence-embeddings.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/solvent-encoding/index.html b/blog/2023/solvent-encoding/index.html
new file mode 100644
index 00000000..97538bde
--- /dev/null
+++ b/blog/2023/solvent-encoding/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Solvent Encoding for solubility prediction using GNN | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Evaluation of different solvent-encoding methods on a public available solubility dataset"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/solvent-encoding/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Solvent Encoding for  solubility prediction using GNN",
+      "description": "Evaluation of different solvent-encoding methods on a public available solubility dataset",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Rui-Xi(Ray) Wang",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Solvent Encoding for solubility prediction using GNN</h1> <p>Evaluation of different solvent-encoding methods on a public available solubility dataset</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#implementation">Implementation</a></div> <div><a href="#literature-model-and-descriptor-review">Literature, model, and descriptor review</a></div> <div><a href="#method">Method</a></div> <div><a href="#result">Result</a></div> <div><a href="#conclusion">Conclusion</a></div> <div><a href="#prospective-works">Prospective works</a></div> <div><a href="#reference">Reference</a></div> <div><a href="#data">Data</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Solubility serves as an essential descriptor that models the interaction between molecules and solvents. This property is important for many biological structures and processes, such as DNA-ion interactions and protein foldings. Quantum mechanics-based approaches, such as Density Functional Theory (DFT), have been deployed in multiple attempts to model solubility across diverse systems and temperatures. However, the complex nature of the problem makes it computationally demanding to accurately predict the properties with fast speed. The development of QSPR(Quantitative structure-property) and deep graph neural network enables us to explore the chemical space with significantly lower computational costs by modeling molecules as graphs and treating properties prediction problems as regression problems. Yet, the challenge persists—individual molecules do not exist in isolation. Due to the strong interaction between molecules, the existence of other molecules(solvent, in particular) in the environment can strongly impact the property we want to predict. However, most of the existing GNN models can only take one molecule per input, limiting their potential to solve more general chemical modeling problems. As a result, it is important to incorporate solvent embedding into the models. The focus of the project is to augment existing GNN models with various solvent-encoding methods and evaluate the performances of different models on a publicly available solubility dataset. My goal is to find out the best encoding method and potentially compare the performances of different models on various solubility datasets.</p> <h2 id="implementation">Implementation</h2> <p>This project intricately explores the functionalities of Graph Neural Network (GNN)-based models, focusing specifically on chemprop and PharmHGT. These models have exhibited remarkable proficiency in predicting molecular properties through a diverse array of message-passing and readout functions. The transformation of solvent smiles strings into feature vectors is executed through two distinctive methods. The initial approach involves the conversion of solvents into various descriptor vectors, while the second method treats solvents as independent graphs, applying GNN models to capture their inherent structural nuances.</p> <p>Following this encoding phase, various methods are employed to convert the solvent vector to solvate. Currently, my strategy involves vector concatenation, and subsequently transforming the combined vector into a novel encoding vector using Multi-Layer Perceptrons (MLP). The post-encoding phase involves channeling the vector through MLP, culminating in the generation of prediction values.</p> <p>The evaluation of the models encompasses essential metrics such as Mean Absolute Error (MAE), Root Mean Squared Error (RMSE), and R-squared (R2) values. These metrics collectively offer a comprehensive assessment of the efficacy of different encoding methods and models. The experimental validation is conducted on the BigSolDB dataset curated by Lev Krasnov et al, comprising experimental solubility data under varying temperatures and with diverse solvents. This dataset provides a robust foundation for rigorously evaluating the predictive capabilities of the GNN-based models in real-world scenarios.</p> <h2 id="literature-model-and-descriptor-review">Literature, model, and descriptor review</h2> <p>Graph Neural Network(GNN) based machine learning models are one of the most fastest growing and powerful modeling tools for molecular properties prediction that can be utilized in various applications, including material and drug design. One of the most powerful models that has been published is chemprop, a model developed by Kevin Yang et al. in 2019. In contrast to traditional GNN-based models which adopt MPNN, chemprop takes advantage of D-MPNN which delivers messages using direct edges. This approach can avoid unnecessary loops in the message-passing trajectory. The model also adopts an innovative message-passing strategy called belief propagation. The power of the model has been demonstrated on various tasks including absorption wavelength prediction(Kevin Greenman et al., 2022) and IR spectroscopy(Esther Heid et al., 2023).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/chemprop-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/chemprop-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/chemprop-1400.webp"/> <img src="/staging/assets/img/2023-12-12-solvent-encoding/chemprop.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In tandem with chemprop, I integrate the Pharmacophoric-constrained Heterogeneous Graph Transformer (PharmHGT) into this project, a model crafted by Yinghui Jiang et al., tailored specifically for drug discovery. In addition to traditional nodes and edges representations corresponding to atoms and bonds in the molecules, the model creates supernodes based on the predefined pharmacophore groups(which are features that are necessary for molecular recognition) and connects those supernodes with the corresponding groups of atoms using junction edges. The model then employs message-passing neural networks on the heterogeneous graph, complemented by transformer layers serving as readout functions.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/PharmHGT-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/PharmHGT-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/PharmHGT-1400.webp"/> <img src="/staging/assets/img/2023-12-12-solvent-encoding/PharmHGT.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In implementing the descriptor approach, I incorporated three distinct types of descriptors: the Minnesota Solvation Database descriptors, compiled by Aleksandr V. Marenich et al. (referred to as mn descriptor), Solvent Polarity Descriptors gathered by Christian Richardt (referred to as Richardt descriptor), and Solvent Effect Descriptors collected by Javier Catalan (referred to as Catalan descriptor). These descriptors, each sourced from reputable studies and researchers, contribute diverse perspectives to the solubility analysis undertaken in this article.</p> <h2 id="method">Method</h2> <p>The BigSolDB dataset encompasses solubility data across various temperatures and solvents. To mitigate the temperature’s impact on solubility, I opted to focus on entries at the most prevalent temperature in the dataset—303.15 K—excluding all others. Subsequently, I transformed solubility values into logarithmic form, a commonly used measure in the realm of chemistry. I then test the PharmHGT model on the processed dataset by running two separate message-passing neural networks on both the solvent and the solvate molecules and concatenating the resulting feature vector to form a representation vector of the solvent-solvate system. Unexpectedly, the model encountered issues contrary to my initial expectations. The challenge lies in PharmHGT’s reliance on predefined pharmacophore groups to generate a graph representation of a given molecule. In instances where a molecule lacks pharmacophore groups—a commonplace scenario for small molecules like benzene or certain larger aromatic molecules—the model fails during initialization due to incorrect dimensions (specifically, 0 due to the lack of corresponding features). To overcome this hurdle, I devised the “graph augmentation approach.” For each solvent molecule, I introduced an auxiliary molecule (Dimethylformamide, DMF) containing predefined pharmacophore groups, facilitating the initialization steps. By merging the solvent graph with the auxiliary graph, the model can successfully run the initialization steps thanks to the presence of the extra junction edges in the graph.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/graph-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/graph-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/graph-1400.webp"/> <img src="/staging/assets/img/2023-12-12-solvent-encoding/graph.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>To maintain parity with the chemprop model for fair comparisons, I refrained from augmenting solvate molecules with DMF. Instead, I excluded all molecules incompatible with the PharmHGT models. Post-filtering, the dataset was randomly partitioned into three segments: an 80% training set, a 10% testing set, and a 10% validation set. This preprocessing lays the groundwork for a rigorous evaluation of the models and ensures a comprehensive understanding of their performance in solubility prediction. I concatenates different kinds of solvent descriptors to the dataset and evaluate their performances separately.</p> <h2 id="result">Result</h2> <p>The processed data comprises 2189 entries in the training set, 273 entries in the testing set, and 267 entries in the validation set. I conducted training on the modified PharmHGT and chemprop models using this dataset. Both models exhibited promising results, showcasing a test RMSE ranging from 1 to 1.7, significantly influenced by the chosen encoding methods. Notably, chemprop consistently outperforms PharmHGT across all encoding methods, although the relative performance order varies. Within the chemprop model, the mn, catalan, and graph augmentations methods yield similar results, with a test RMSE ranging between 1.1 and 1.2 logM and a MAE ranging between 0.70 and 0.72 logM. Conversely, the reichardt descriptor performs less favorably, exhibiting a test RMSE of 1.31 logM and a test MAE of 0.84 logM . Intriguingly, in the PharmHGT model, these trends are reversed. The reichardt descriptor encoding attains the best performance with a test RMSE of 1.315846 and a second lowest test MAE of 0.91, while the catalan encoding method shows the highest test RMSE at 1.66 and the highest test MAE at 0.84. This discrepancy may be attributed to PharmHGT’s specialized design for drug molecules which typically have molecular weights ranging from 400 to 1000 Da. In contrast, solvent molecules generally possess molecular weights below 200 Da and often lack pharmacophore groups that provide additional information to the model. As a result, the model tends to be reduced to basic GNN models, focusing solely on modeling interactions between neighboring atoms and therefore ignoring the important functional groups that strongly influenced the solubility.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE-1400.webp"/> <img src="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_RMSE.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE-1400.webp"/> <img src="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_MAE.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>To validate this hypothesis, I conducted an analysis of the training RMSE across various encoding methods for PharmHGT. The finding reveals that the graph-augmentation methods beat all other methods by a huge margin. The graph augmentation method boasts a training RMSE of only 0.29 while all other methods exhibit training RMSEs of at least 0.42. This may also be attributed to the reduction of the PharmHGT models. The simple structures of solvent molecule graphs make the model susceptible to overfitting, resulting in a notably higher testing RMSE for the graph-augmentation method. Furthermore, my investigation uncovered that the catalan encoding method demonstrates a significantly higher training RMSE compared to other encoding methods, indicating that PharmHGT struggles to extract information from the descriptors. This aligns with the observation that the catalan encoding method also yields the largest testing RMSE among all encoding methods.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train-1400.webp"/> <img src="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_PharmHGT_train.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Examining the R2 scores reveals a consistent pattern, where the Chemprop model consistently beats the PharmHGT models across all employed encoding methods. Within the Chemprop model, the mn, catalan, and graph-augmentation methods exhibit similar outcomes, showcasing test R2 values ranging from 0.82 to 0.84. Conversely, the reichardt descriptor lags behind, presenting a less favorable test R2 of 0.78. These trends undergo a reversal within the PharmHGT model. The reichardt descriptor encoding achieves the best performance with a test R2 of 0.77, while the catalan encoding method records the lowest test R2 at 0.57. This intriguing reversal highlights the nuanced impact of encoding methods on model performance, emphasizing the need for tailored approaches based on the underlying molecular structures.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2-1400.webp"/> <img src="/staging/assets/img/2023-12-12-solvent-encoding/solvent_encoding_R2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="conclusion">Conclusion</h2> <p>In the course of my experimentation, a consistent trend emerges wherein chemprop consistently outperforms pharmHGT across an array of encoding methodologies. Among these methodologies, the mn descriptor method maintains a stable, albeit moderate, level of performance, denoting its reliability without yielding any outstanding superiority.</p> <p>A noteworthy observation manifests when employing the catalan descriptor method, which remarkably enhances the effectiveness of the PharmHGT model. Conversely, the chemprop model attains its peak performance when coupled with the reichardt descriptor methods and its worst performance when coupled with the catalan descriptor, showing that the strong dependencies of encoding methods across different models.</p> <p>However, it is imperative to underscore that each encoding method exhibits inherent limitations, precluding the identification of a universally optimal solution applicable to both models concurrently. This nuanced understanding underscores the necessity for tailored approaches, grounded in an appreciation for the distinctive characteristics and demands of each model.</p> <p>Further scrutiny into the training loss data reveals a notable constraint within the PharmHGT model. Its proclivity towards specificity for drug molecules renders it less adept at handling general tasks, necessitating the introduction of auxiliary graphs to augment its functionality. This intricacy adds a layer of consideration regarding the pragmatic applicability of the model in contexts beyond its primary pharmaceutical focus.</p> <p>In navigating these findings, it becomes evident that the pursuit of a comprehensive and adaptable model mandates a nuanced comprehension of the interplay between encoding methodologies, model architecture, and the inherent limitations associated with specific domains.</p> <h2 id="prospective-works">Prospective works</h2> <p>Due to the complex nature of solvent-solvate interactions, a more rigorous splitting strategy that takes into account the distributions of different solvent molecules within the training, testing, and validation sets may be needed. Additionally, random splitting and cross-validation could be potential methods for improving the generality of the model. Finally, owing to the limited computational resources, this project only trained the model with default hyperparameters (such as batch size, layer width, number of tokens, etc.). Hyperparameter optimization can also be performed to gain a better understanding of the model’s capabilities.</p> <h2 id="reference">Reference</h2> <ol> <li> <p>Analyzing Learned Molecular Representations for Property Prediction <a href="https://pubs.acs.org/doi/10.1021/acs.jcim.9b00237">https://pubs.acs.org/doi/10.1021/acs.jcim.9b00237</a></p> </li> <li> <p>Pharmacophoric-constrained heterogeneous graph transformer model for molecular property prediction <a href="https://www.nature.com/articles/s42004-023-00857-x">https://www.nature.com/articles/s42004-023-00857-x</a></p> </li> <li> <p>Multi-fidelity prediction of molecular optical peaks with deep learning <a href="https://pubs.rsc.org/en/content/articlelanding/2022/sc/d1sc05677h">https://pubs.rsc.org/en/content/articlelanding/2022/sc/d1sc05677h</a></p> </li> <li> <p>Minnesota Solvent Descriptor Database <a href="https://comp.chem.umn.edu/solvation/mnsddb.pdf">https://comp.chem.umn.edu/solvation/mnsddb.pdf</a></p> </li> <li> <p>Solvatochromic Dyes as Solvent Polarity Indicators <a href="https://pubs.acs.org/doi/10.1021/cr00032a005">https://pubs.acs.org/doi/10.1021/cr00032a005</a></p> </li> <li>Toward a Generalized Treatment of the Solvent Effect Based on Four Empirical Scales: Dipolarity (SdP, a New Scale), Polarizability (SP), Acidity(SA), and Basicity (SB) of the Medium <a href="https://pubs.acs.org/doi/10.1021/jp8095727">https://pubs.acs.org/doi/10.1021/jp8095727</a></li> <li>BigSolDB: Solubility Dataset of Compounds in Organic Solvents and Water in a Wide Range of Temperatures <a href="https://chemrxiv.org/engage/chemrxiv/article-details/6426c1d8db1a20696e4c947b">https://chemrxiv.org/engage/chemrxiv/article-details/6426c1d8db1a20696e4c947b</a></li> <li>Chemprop: A Machine Learning Package for Chemical Property Prediction <a href="https://chemrxiv.org/engage/chemrxiv/article-details/656f3bae5bc9fcb5c918caa2">https://chemrxiv.org/engage/chemrxiv/article-details/656f3bae5bc9fcb5c918caa2</a></li> </ol> <h2 id="data">data</h2> <p>The data and code for the experiments are available at <a href="https://github.com/RuiXiWangTW/solvent_encoding-data">https://github.com/RuiXiWangTW/solvent_encoding-data</a></p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-solvent-encoding.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/spacial-CLIP/index.html b/blog/2023/spacial-CLIP/index.html
new file mode 100644
index 00000000..20b9b60d
--- /dev/null
+++ b/blog/2023/spacial-CLIP/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Improving CLIP Spatial Awareness Using Hard Negative Mining | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="CLIP struggles to understand and reason spatially. We attempt to solve this issue with introducing hard negative examples during training."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/spacial-CLIP/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Improving CLIP Spatial Awareness Using Hard Negative Mining",
+      "description": "CLIP struggles to understand and reason spatially. We attempt to solve this issue with introducing hard negative examples during training.",
+      "published": "December 11, 2023",
+      "authors": [
+        {
+          "author": "Andrew Wang",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Improving CLIP Spatial Awareness Using Hard Negative Mining</h1> <p>CLIP struggles to understand and reason spatially. We attempt to solve this issue with introducing hard negative examples during training.</p> </d-title> <d-byline></d-byline> <d-article> <h1 id="introduction-clip-doesnt-know-its-left-and-rights">Introduction: CLIP doesn’t know its left and rights</h1> <p>Multimodal learning has come into prominence recently, with text-to-image synthesis models such as DALLE or Stable Diffusion, and image-text contrastive learning models such as CLIP. In particular, CLIP has proven to be extremely useful in learning zero-shot capabilities from paired image and text data.</p> <p>However, recent work has highlighted a common limitation in multimodal models: the ability to capture spatial relationships. Spatial relationships can be defined as how objects in an image are positioned concerning other objects. For example, A is next to B or B is on top of A. Although Language models now demonstrate an understanding of word order and spatial awareness, multimodal models still struggle to capture this relationship in both the image and captions.</p> <h2 id="downstream-tasks">Downstream tasks</h2> <p>Improving captioning abilities is an important building block in overcoming this limitation in all multimodal models. Creating synthetic captions from images is an already popular method in developing training data for other models such as DALLE-3. However, limitations in captioning abilities carry over to downstream tasks, and therefore, models such as DALLE-3 often also struggle to generate images from prompts that include spatial relationships. We hope that demonstrating the ability to generate spatially-aware captions will also lead to improvements in other Vision-Language models in the future.</p> <h2 id="semantic-similarity">Semantic similarity</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84-1400.webp"/> <img src="/staging/assets/img/2023-11-10-spacial-CLIP/GA0Hn7zaIAAbp84.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Taken from Yamada et al. </div> <p>CLIP is trained to maximize the similarity between embeddings of images and text. This leads to CLIP matching semantically similar images and captions but not understanding finer-grained details. Concept Association is especially an issue when there are multiple objects in an image where CLIP struggles to reason about the object’s attributes (Yamada 2022). Additionally, because of the focus on semantic similarity, CLIP also struggles with spatial relationships between objects.</p> <h1 id="winoground">Winoground</h1> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/winoground_example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/winoground_example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/winoground_example-1400.webp"/> <img src="/staging/assets/img/2023-11-10-spacial-CLIP/winoground_example.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Taken from Thrush et al. </div> <p>Spatial awareness has been explored explicitly throughout previous literature. Thrush et al. in Winoground created an evaluation dataset that targets compositional reasoning. Each data point contains two captions and two images, where the captions contain the same words only in different orders. The difference in word ordering drastically changes the meaning of the sentence and therefore the image associated with the alternative caption also is completely different. The task then becomes to match the images to the correct captions (Thrush 2022).</p> <h2 id="evaluation-specifics-and-results">Evaluation Specifics and Results</h2> <p>We are going to use the image-to-caption evaluation of Winoground which aims to match captions to each image in constrast to images to captions. Different models have differnt matching strategies; CLIP uses the higher dot product similarity score when deciding which caption fits each image. Since there are in total, 4 different possible matchings out of the 2 image/caption pairs, random chance would score 25%. However, many multimodal models fail to score much higher than random chace. CLIP (ViT-B/32) scores 30.75% while the best models only score 38%.</p> <h2 id="spatial-examples">Spatial Examples</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb-1400.webp"/> <img src="/staging/assets/img/2023-11-10-spacial-CLIP/Winoground_Lightbulb.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> An example of spatial image/caption pairs. Taken from Thrush et al. </div> <p>CLIP has shown to be an extremely difficult benchmark for multimodals - and there are multitude of reasons why. First, changing the word orders creates image/caption pairs that need fine-grained reasoning capabilities to differentiate. One of the many reasoning capabilities needed to do well is spatial reasoning. We filter out 101 examples of CLIP that contain image/captions that require spatial reasoning to create a more task-speciific benchmark. Our filtering is caption-based and targets key words that may indicate spatial relationships. We will refer to this filtered out evaluation benchmark as, Winoground-Spatial.</p> <h1 id="hard-negative-examples">Hard Negative Examples</h1> <p>Hard negative examples are negative examples that are close to our anchor pair. These are examples that are close in some way to our positive example, but still wrong. Oftentimes, these examples are hard to distinguish from one another, and therefore cause the model trouble.</p> <h2 id="clip-loss">CLIP Loss</h2> <p>As a refresher on how CLIP is trained, CLIP first calculates an N by N similarity matrix from the dot products of the two embeddings. The model the calculates a loss function as the average of two cross entropies. The task becomes a classification task where we classify the correct caption for each image and the correct image for each caption, thus leading to two cross entropy functions.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_OG.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_OG.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_OG.svg-1400.webp"/> <img src="/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_OG.svg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> CLIP similarity matrix. Radford et al. </div> <p>We modify this training procedure to include additional hard negative captions. For each image/caption pair, we generate M additional negative captions. We then calculate an N by NM similarity matrix from the dot products. Then, we only modify the loss function for image classification cross entropy function to include negative captions alongisde the original N captions. We don’t modify the caption classification cross entropy function since the negative examples don’t have a corresponding “image”.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative-1400.webp"/> <img src="/staging/assets/img/2023-11-10-spacial-CLIP/CLIP_Negative.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> CLIP similarity matrix with negative examples. </div> <h2 id="data-and-augmentation">Data and Augmentation</h2> <p>How do we generate negative examples? We first have to create a fine-tuning dataset that contains image/caption pairs that display spatial relationships. To do this, we utilize the dataset Flickr30k, a dataset that contains 31,000 images collected from Flickr along with 5 captions annotated by human annotators. We chose this dataset due to it’s caption quality alongside the fact that many of the image/caption pairs contain multiple objects.</p> <p>We then filter out image/caption pairs based on the captions in a similar way we created our evalutation benchmark, Winoground-Spatial. We use 20 key words and phrases such as: “left”, “on top of”, “beneath”, etc. to create a training set of roughly 3,600 examples. Although there are most likely more spatial examples, we choose this method as it is cost-effective while still ensuring the quality of the traning set being only examples of spatial relationships.</p> <p>Data augmentations have been a commonly used as a method to prevent overfitting in image classification tasks. Although it is common to perform image augmentations, Fan et al. introduce LaCLIP to perform text augmentations on captions to create additional image/caption pairs. This method can be thought of as generating additional “positive pairs”. In order to generate text-augmentations, they utilize language models such as llama7b and GPT-3 to ensure the sentences generated are still grammatically correct. They use in-context learning and prompts such as, “Rewrite this caption of an image vividly, and keep it less than thirty words:”.</p> <p>We follow a similar procedure to generate our negative examples. For each image/caption pair, we prompt GPT-3.5-turbo-instruct to do different augmentations. Details of the prompts are provided in the later experiments.</p> <h1 id="experiments">Experiments</h1> <p>For all experiments, we use a base model of CLIP(ViT-B/32) pre-trained on OpenAI’s WIT provided by OpenClip. We then use OpenAI’s API to generate augmentations. In total, the cost of generating augmentations were under $50 in credits.</p> <h2 id="experiment-1-switching-word-order">Experiment 1: Switching word order</h2> <p>Our first experiment explores how switching the word order may serve as hard negative examples. This method is inspired by the benchmark we are using, where the captions share the same words but in a different order. For each caption, we generate a single hard negative caption. The prompt we use is displayed below:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-spacial-CLIP/GPT-word-order-1400.webp"/> <img src="/staging/assets/img/2023-11-10-spacial-CLIP/GPT-word-order.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> In-context-learning prompt used to augment word order. </div> <p>We discover adding a single hard-negative example to each example already leads to an impressive performance boost. The accuracy improves from 19.8% to a staggering 50.5% from fine-tuning.</p> <table> <thead> <tr> <th> </th> <th>Pretrained CLIP</th> <th>Word Order CLIP</th> </tr> </thead> <tbody> <tr> <td>Pairs matched correctly</td> <td>20</td> <td>51</td> </tr> <tr> <td>Accuracy</td> <td>0.198</td> <td>0.505</td> </tr> </tbody> </table> <p>We did some extra probing and noticed the majority of the improvement was from distinguishing left and right. From the additional 31 examples our fine-tuned model got correct, 18 of them were examples that the captions included the keyword of either left or right. This is consistent with our training set, where the most popular keyword of our examples is left/right.</p> <h2 id="experiment-2-replacing-key-spatial-words">Experiment 2: Replacing key spatial words</h2> <p>We then explore how a different augmentation workflow could impact the accuracy. In this experiment, we augment the captions to replace the keyword with another spatial keyword. For example, the keyword “on top of” could be replaced by “underneath” or “to the right of”. We again, utilize GPT to ensure the captions are still grammatically and logically correct. Because of the number of keywords avaialable, we explore how the number of negative examples during training time may affect the model’s accuracy.</p> <table> <thead> <tr> <th> </th> <th>0 negative examples (Pretrained CLIP)</th> <th>1 negative examples</th> <th>5 negative examples</th> <th>10 negative examples</th> </tr> </thead> <tbody> <tr> <td>Pairs matched correctly</td> <td>20</td> <td>31</td> <td>65</td> <td>55</td> </tr> <tr> <td>Accuracy</td> <td>0.198</td> <td>0.307</td> <td>0.644</td> <td>0.545</td> </tr> </tbody> </table> <p>We can notice that from 0-5 negative training examples, there is a distinctive increase in model accuracy. However, an interesting result is the dropoff in accuracy from 5 training examples to 10. We did some probing into why this may be the case in the training data. One hypothesis may be the training examples for hard negatives are incorrect, in that, by a human they could be interpreted as positive examples. For example, object A could be both next to and above object B, but we are training CLIP to recognize the keyword above to be false in this case. Another hypothesis is the difficulty in training examples stunting training and needing more data. This could be case when looking at the loss function, on whether it has fully converged or not.</p> <h1 id="conclusion-and-limitations">Conclusion and Limitations</h1> <p>Although we have not fully tackled the issue of spatial awareness, we have made signifigant progress from our base model of CLIP, with the highest accuracy being at 64.4% compared to 19.8%. This proof of concept work shows how hard-negative examples could boost improvements in specific reasoning tasks. The concept of using these hard-negative examples are not limited to spatial relationships: it could be interesting to examine how hard negative tasks may improve other Winoground examples that require reasoning capabilities such as counting. We also note that there is a possiblity that improving the training data may not be enough, and that the architecture may need a change to fully solve spatial relationships.</p> <h3 id="references">References:</h3> <p>1.Robinson, J. D.; Chuang, C.-Y.; Sra, S.; Jegelka, S. Contrastive Learning with Hard Negative Samples. In Proceedings of the International Conference on Learning Representations, 2021.</p> <p>2.Thrush Tristan, Jiang Ryan, Bartolo Max, Singh Amanpreet, Williams Adina, Kiela Douwe, and Ross Candace. 2022. Winoground: Probing Vision and Language Models for Visio-Linguistic Compositionality. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 5238–5248.</p> <p>3.Fan, L., Krishnan, D., Isola, P., Katabi, D., and Tian, Y. (2023a). Improving clip training with language rewrites. arXiv preprint arXiv:2305.20088.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/sparse-autoencoders-for-interpretable-rlhf/index.html b/blog/2023/sparse-autoencoders-for-interpretable-rlhf/index.html
new file mode 100644
index 00000000..696ba3c2
--- /dev/null
+++ b/blog/2023/sparse-autoencoders-for-interpretable-rlhf/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Sparse Autoencoders for a More Interpretable RLHF | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Extending Anthropic's recent monosemanticity results toward a new, more interpretable way to fine-tune."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/sparse-autoencoders-for-interpretable-rlhf/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Sparse Autoencoders for a More Interpretable RLHF",
+      "description": "Extending Anthropic's recent monosemanticity results toward a new, more interpretable way to fine-tune.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Laker Newhouse",
+          "authorURL": "https://www.linkedin.com/in/lakernewhouse/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Naomi Bashkansky",
+          "authorURL": "https://www.linkedin.com/in/naomibas/",
+          "affiliations": [
+            {
+              "name": "Harvard",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Sparse Autoencoders for a More Interpretable RLHF</h1> <p>Extending Anthropic's recent monosemanticity results toward a new, more interpretable way to fine-tune.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#background">Background</a></div> <div><a href="#methods">Methods</a></div> <ul> <li><a href="#inserting-a-sparse-autoencoder-in-a-transformer">Inserting a Sparse Autoencoder in a Transformer</a></li> <li><a href="#how-we-train-our-sparse-autoencoder">How We Train our Sparse Autoencoder</a></li> <li><a href="#fine-tuning">Fine-Tuning</a></li> </ul><div><a href="#results">Results</a></div> <ul> <li><a href="#exploring-a-sparse-autoencoder">Exploring a Sparse Autoencoder</a></li> <li><a href="#fine-tuning-with-a-sparse-autoencoder">Fine-Tuning with a Sparse Autoencoder</a></li> </ul><div><a href="#discussion">Discussion</a></div> <div><a href="#conclusion">Conclusion</a></div> <div><a href="#acknowledgements">Acknowledgements</a></div> <div><a href="#appendix">Appendix</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Understanding how machine learning models arrive at the answers they do, known as <em>machine learning interpretability</em>, is becoming increasingly important as models are deployed more widely and in high-stakes scenarios. Without interpretability, models may exhibit bias, toxicity, hallucinations, dishonesty, or malice, without their users or their creators knowing. But machine learning models are notoriously difficult to interpret. Adding to the challenge, the most widely used method for aligning language models with human preferences, RLHF (Reinforcement Learning from Human Feedback), impacts model cognition in ways that researchers do not understand. In this work, inspired by recent advances in sparse autoencoders from Anthropic, we investigate how sparse autoencoders can help to interpret large language models. We contribute a novel, more interpretable form of fine-tuning that only learns parameters related to interpretable features of the sparse autoencoder.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/interpretability-hard-cartoon.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Machine learning practitioners often cannot interpret the models they build (xkcd #1838). </div> <h2 id="related-work">Related Work</h2> <p>Research on interpreting machine learning models falls broadly under one of two areas: representation-based interpretability (top-down) and mechanistic interpretability (bottom-up).</p> <p>Representation-based interpretability seeks to map out meaningful directions in the representation space of models. For example, Li <em>et al.</em> <d-cite key="li2023inferencetime"></d-cite> found a direction in one model that causally corresponds to truthfulness. Subsequent work by Zou <em>et al.</em> <d-cite key="zou2023representation"></d-cite> borrows from neuroscience methods to find directions for hallucination, honesty, power, and morality, in addition to several others. But directions in representation space can prove brittle. As Marks <em>et al.</em> <d-cite key="marks2023geometry"></d-cite> found, truthfulness directions for the same model can vary across datasets. Moreover, current methods for extracting representation space directions largely rely on probing <d-cite key="belinkov2022probing"></d-cite> and the linearity hypothesis <d-cite key="elhage2022superposition"></d-cite>, but models may have an incentive to store some information in nonlinear ways. For example, Gurnee <em>et al.</em> <d-cite key="gurnee2023language"></d-cite> showed that language models represent time and space using internal world models; for a world model to store physical scales ranging from the size of the sun to the size of an electron, it may prefer a logarithmic representation.</p> <p>Mechanistic interpretability, unlike representation engineering, studies individual neurons, layers, and circuits, seeking to map out model reasoning at a granular level. One challenge is that individual neurons often fire in response to many unrelated features, a phenomenon known as polysemanticity. For example, Olah <em>et al.</em> <d-cite key="olah2020zoom"></d-cite> found polysemantic neurons in vision models, including one that fires on both cat legs and car fronts. Olah <em>et al.</em> hypothesized that polysemanticity arises due to superposition, which is when the model attempts to learn more features than it has dimensions. Subsequent work investigated superposition in toy models, suggesting paths toward disentangling superposition in real models <d-cite key="elhage2022superposition"></d-cite>. Superposition is relevant for language models because the real world has billions of features that a model could learn (names, places, facts, etc.), while highly deployed models have many fewer hidden dimensions, such as 12,288 for GPT-3 <d-cite key="brown2020fewshot"></d-cite>.</p> <p>Recently, Sharkey <em>et al.</em> <d-cite key="sharkey2022interim"></d-cite> proposed using sparse autoencoders to pull features out of superposition. In an interim research report, the team describes inserting a sparse autoencoder, which expands dimensionality, into the residual stream of a transformer layer. In a follow-up work, Cunningham <em>et al.</em> <d-cite key="cunningham2023sparse"></d-cite> found that sparse autoencoders learn highly interpretable features in language models. In a study on one-layer transformers, Anthropic provided further evidence that sparse autoencoders can tease interpretable features out of superposition <d-cite key="bricken2023monosemanticity"></d-cite>. Although interest in sparse autoencoders in machine learning is relatively recent, sparse autoencoders have been studied in neuroscience for many decades under the name of expansion recoding <d-cite key="albus1971cerebellar"></d-cite>.</p> <p>Researchers have begun to apply sparse autoencoders to other interpretability problems. For example, Marks <em>et al.</em> <d-cite key="marks2023rlhf"></d-cite> investigated whether models on which we perform RLHF internalize the reward signal. To do so, Marks compared sparse autoencoders trained on the base model with sparse autoencoders trained on the fine-tuned model. But, to our knowledge, while others have used sparse autoencoders to probe the effects of fine-tuning, there is no prior research on using sparse autoencoders to define a more interpretable form of fine-tuning. We propose a new form of fine-tuning in which the learnable parameters are related to the interpretable features of the sparse autoencoder.</p> <h2 id="background">Background</h2> <p>An <strong>autoencoder</strong> is an architecture for reproducing input data, with a dimensionality bottleneck. Let $d_\text{model}$ denote the dimension of the residual stream in a transformer (4096 for Pythia 6.9B). Let $d_\text{auto}$ denote the dimensionality of the autoencoder. To enforce the dimensionality bottleneck, we require $d_\text{model} &gt; d_\text{auto}$. The diagram below depicts an autoencoder.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/autoencoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> An autoencoder is trained to reproduce its input, subject to a dimensionality bottleneck. </div> <p>A <strong>sparse autoencoder</strong> relies on a different kind of bottleneck, called sparsity. For a sparse autoencoder $g \circ f$ that acts on $x \in \mathbb{R}^{d_\text{model}}$ by sending $f(x) \in \mathbb{R}^{d_\text{auto}}$ and $g(f(x)) \in \mathbb{R}^{d_\text{model}}$, the training objective combines MSE loss with an $L^1$ sparsity penalty:</p> \[\mathcal{L}(x; f, g) = \|x - g(f(x))\|_2^2 + \beta \| f(x) \|_1,\] <p>where $\beta &gt; 0$ trades off sparsity loss with reconstruction loss. With the sparsity constraint, we can now let $d_\text{auto} &gt; d_\text{model}$ by a factor known as the <em>expansion factor</em>. In our work, we typically use an expansion factor of $4$ or $8$. The purpose of the sparse autoencoder is to expand out the dimension enough to overcome superposition. The diagram below depicts a sparse autoencoder.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sparse-autoencoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A sparse autoencoder is trained to reproduce its input, subject to an $L^1$ sparsity bottleneck. </div> <h2 id="methods">Methods</h2> <p>Our main experiment is to insert a sparse autoencoder into a transformer layer, train the sparse autoencoder, and then use the fused model to perform a new, more interpretable form of fine-tuning. <d-footnote>While we originally planned to investigate RLHF, we determined that existing libraries could not perform PPO (Proximal Policy Optimization) on custom model architectures such as our transformer fused with a sparse autoencoder. As a result, we chose to investigate fine-tuning instead of RLHF.</d-footnote> We run all experiments on a single A100 GPU through Google Colab Pro+.</p> <h3 id="inserting-a-sparse-autoencoder-in-a-transformer">Inserting a Sparse Autoencoder in a Transformer</h3> <p>There are three natural places to insert a sparse autoencoder into a transformer:</p> <ol> <li>MLP activations before the nonlinearity</li> <li>MLP activations before adding back to the residual stream</li> <li>The residual stream directly</li> </ol> <p>We choose the second option. The upside of operating in the MLP space is that MLP blocks may be in less superposition than the residual stream, given that MLPs may perform more isolated operations on residual stream subspaces. The upside of operating after the MLP projects down to the residual stream dimension is a matter of economy: because $d_\text{model} &lt; d_\text{MLP}$, we can afford a larger expansion factor with the same memory resources.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/transformer-with-sae.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> We insert a sparse autoencoder into a transformer after the MLP, but before adding into the residual stream. </div> <h3 id="how-we-train-our-sparse-autoencoder">How We Train our Sparse Autoencoder</h3> <p>We train our sparse autoencoder to reproduce MLP-post activations in layer one of Pythia 6.9B (deduplicated) <d-footnote>Deduplicated means that this Pythia 6.9B model was trained on scraped web text where duplicate articles and lengthy passages are removed. Because Pythia inherits from the GPT-NeoX architecture, the specific activations we collected are named gpt_neox.layers.1.mlp.dense_4h_to_h.</d-footnote>. To create a dataset of activations for training, we stream in text from <a href="https://huggingface.co/datasets/Skylion007/openwebtext">an open-source replication of WebText</a>, the dataset used to train GPT-2. For each batch of text, we collect Pythia 6.9B’s MLP-post activations at layer one and use these activations as training data for the sparse autoencoder.</p> <p>Concretely, our sparse autoencoder has four learnable parameters: $W_\text{enc}$, $W_\text{dec}$, $b_\text{enc}$, and $b_\text{dec}$. The second bias $b_\text{dec}$ is used to center the input. The sparse autoencoder encodes, applies a nonlinearity, and decodes its input $x$ as follows:</p> \[\text{SAE}(x) = \text{ReLU}((x - b_\text{dec}) W_\text{enc} + b_\text{enc}) W_\text{dec} + b_\text{dec}.\] <p>We constrain the rows of $W_\text{dec}$ to have unit norm by renormalizing after each optimizer step. Another approach to constrain the rows is to remove gradient information parallel to the feature vectors before each optimizer step, and also renormalize the rows. Although we did not implement it, Anthropic found that that the second approach <a href="https://transformer-circuits.pub/2023/monosemantic-features/index.html#appendix-autoencoder-optimization">slightly reduces loss</a> <d-cite key="bricken2023monosemanticity"></d-cite>.</p> <p>We use an expansion factor of $4$, meaning $d_\text{auto} = 16384$. When training, we use batch size $8$, learning rate $10^{-4}$, and default $\beta_1 = 0.9, \beta_2 = 0.999$ for the Adam optimizer. Because Pythia 6.9B’s context length is $128$ tokens, each training step includes activations from $1024$ tokens. We save checkpoints every $20000$ steps ($20.48$ million tokens).</p> <p>One subtlety in training is that the sparsity constraint can eventually cause some autoencoder neurons to never activate. How to best handle these so-called dead neurons is an open question. We follow Anthropic in <a href="https://transformer-circuits.pub/2023/monosemantic-features/index.html#appendix-autoencoder-resampling">resampling dead neurons</a> to new values <d-cite key="bricken2023monosemanticity"></d-cite>. Because resampling can cause instability during training, we resample only every 10000 training steps. At that point, we say a sparse autoencoder neuron is dead if it has not activated in any of the last 5000 training steps. In an attempt to improve autoencoder performance, Anthropic resampled dead neurons to the feature directions in which the sparse autoencoder performed worst. For simplicity, we resample dead neurons by setting their corresponding rows of $W_\text{enc}$ and $W_\text{dec}$ to Kaiming uniform random vectors. We reset dead biases to zero.</p> <h3 id="fine-tuning">Fine-Tuning</h3> <p>We fine-tune Pythia 70M <d-footnote>We wanted to fine-tune Pythia 6.9B, but we encountered out-of-memory errors on an A100 GPU. In follow-up work, we will investigate quantization so that we can study Pythia 6.9B, including the sparse autoencoder we trained for it.</d-footnote> with our sparse autoencoder inserted in layer one <d-footnote>To learn the most about how fine-tuning affects transformer features, we would ideally learn interpretable feature directions at every transformer layer using a sparse autoencoder. Then, after fine-tuning, we could perform rich comparisons across the model. Unfortunately, reconstruction loss compounds across layers. With current training methods, it is only feasible for us to insert a sparse autoencoder into one layer of the transformer before performance significantly degrades.</d-footnote>. Instead of adjusting weights everywhere in the network, we constrain fine-tuning to adjust only a small set of interpretable parameters within the sparse autoencoder. In particular, we learn two vectors of dimension $d_\text{auto}$: a coefficient vector $c$ and a bias vector $d$. Just prior to applying $\text{ReLU}$ in the sparse autoencoder, we scale the activations by $c$ and translate them by $d$.</p> <p>For our fine-tuning experiments, the sparse autoencoder we use is trained on Pythia 70M Chess (a variant fine-tuned on a chess dataset) <d-footnote>This autoencoder was trained to perform well on Pythia 70M Chess, not on the base model Pythia 70M. In future work, we will match the models to investigate how our sparse autoencoder on Pythia 6.9B performs when fine-tuning Pythia 6.9B.</d-footnote>. We insert this sparse autoencoder into the base Pythia 70M, define new learnable parameters $c$ and $d$ as above, and freeze the gradients on every weight in the fused model except the new learnable parameters. We fine-tune on a small dataset of arithmetic questions (<a href="https://huggingface.co/datasets/EleutherAI/arithmetic">EleutherAI/arithmetic</a>). One training example is shown below:</p> \[\text{Question: What is }(2 * 7) + 2\text{? Answer:}\] <p>We train with batch size $8$, learning rate $10^{-3}$, and weight decay $10^{-2}$ using the AdamW optimizer <d-cite key="loshchilov2018decoupled"></d-cite> over $10$ epochs with $200$ steps per epoch. The figure below shows the training loss as we fine-tune.</p> <h2 id="results">Results</h2> <p>Our results come in two parts: an exploration of our trained sparse autoencoder on Pythia 6.9B and an analysis of fine-tuning using a smaller sparse autoencoder on Pythia 70M.</p> <h3 id="exploring-a-sparse-autoencoder">Exploring a Sparse Autoencoder</h3> <p>When inserted into Pythia 6.9B at layer one, our sparse autoencoder achieves a loss of $3.201$ (zero-ablation degrades loss to $3.227$) on the held-out dataset <a href="https://paperswithcode.com/dataset/wikitext-103">WikiText-103</a>, consisting of over 100M tokens from Good and Featured articles on Wikipedia. Pythia 6.9B’s baseline loss is $3.193$. Notably, the sparse autoencoder outperforms a zero-ablation of the layer, demonstrating that it learned features that are useful for reconstruction.</p> <p>As expected, if the sparse autoencoder is inserted into a layer it was not trained for, performance collapses. For example, if inserted at layer $31$ of Pythia 6.9B, the loss becomes $12.586$. Below is a figure showing the additional loss from inserting the sparse autoencoder at the first eight layers of Pythia 6.9B.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-losses.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The sparse autoencoder preserves model performance in layer 1, the layer it was trained for. The green bar is loss on WikiText-103 of Pythia 6.9B on 5 random batches. The red bar is the additional loss incurred if the sparse autoencoder is inserted after the MLP at a given layer. The first eight layers are shown. </div> <p>For more details on the training run, four figures demonstrating the sparsity, $L^1$ coefficient, $L^1$ loss, and reconstruction loss of our sparse autoencoder during training are shown below. After training on the first five million tokens, we automatically begin to adjust the $L^1$ coefficient $\beta$ until we reach the desired sparsity of $1\%$. By the end, our sparse autoencoder stabilizes at a sparsity of $100$, which means that only $0.5\%$ of sparse autoencoder features activate on a given token.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_sparsity.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_sparsity.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_sparsity.svg-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_sparsity.svg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Sparsity across the training run on Pythia 6.9B. On a given batch, sparsity is recorded as the average number of sparse autoencoder features that activate on the batch's $1024$ tokens. Our sparse autoencoder stabilizes at a sparsity of around $100$, or $0.5\%$ of its hidden dimension. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_coeff.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_coeff.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_coeff.svg-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_coeff.svg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The $\beta$ coefficient in $L_1$ loss across the training run on Pythia 6.9B. After training on five million tokens, we begin to adjust the coefficient until the sparse autoencoder reaches its target sparsity of $1\%$. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_loss.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_loss.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_loss.svg-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_L1_loss.svg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The $L^1$ loss of the sparse autoencoder across the training run on Pythia 6.9B. The $L^1$ loss initially rises while the $L^1$ coefficient is adjusted, then falls once the target sparsity is reached as the sparse autoencoder learns a more compact representation. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_reconstr_loss.svg-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_reconstr_loss.svg-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_reconstr_loss.svg-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_reconstr_loss.svg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The reconstruction loss of the sparse autoencoder across the training run on Pythia 6.9B. Reconstruction loss initially rises while the $L^1$ coefficient is adjusted, due to the tradeoff between reconstruction and sparsity. Once the $L^1$ coefficient stabilizes, reconstruction loss slowly falls as the sparse autoencoder learns a more effective representation. </div> <p>We find that our sparse autoencoder learned several interpretable features. For example, the second most frequently activating feature (feature index $11928$) activates strongly on the token “·the”. The figure below shows a table with examples.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/SAE_6-9b_the_feature.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The second most frequent feature (feature index $11928$) in the Pythia 6.9B sparse autoencoder activates on the token "·the". Relevant table columns are $\text{str\_tokens}$ (the token that activates the feature), $\text{context}$ (surrounding tokens in the sentence), and $\text{feature}$ (the raw feature activation in the sparse autoencoder, sorted in descending order). We include the top 15 examples. The feature activates once on “·of” and “·and”, but it activates most on the token “·the”. (Credit: the visualization code for the table is due to Neel Nanda in his open-source replication of Anthropic's sparse autoencoder work.) </div> <p>In addition, we found a surprising correlation between dead features. In particular, almost all dead features point in similar directions, as indicated by a high cosine similarity. In comparison, features that are not dead have a cosine similarity that is much closer to centered at zero. If dead features were drawn from the same distribution as non-dead features, we would expect cosine similarities closer to zero.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/sae-cosine-similarity.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The plot above shows the cosine similarity of dead features (red) and non-dead features (blue). Here, a feature is counted as dead if it activates nowhere on WikiText-103-v1. The cosine similarity is calculated compared to the average dead feature. (Credit: the visualization code for cosine similarity is due to Neel Nanda in his open-source replication of Anthropic's sparse autoencoder work.) </div> <h3 id="fine-tuning-with-a-sparse-autoencoder">Fine-Tuning with a Sparse Autoencoder</h3> <p>We fine-tune Pythia 70M on arithmetic data by adjusting only a coefficient and bias vector within the sparse autoencoder space.</p> <p>On layer $4$, we observe an unexpected lowering of loss from $6.449$ for the base model to $6.270$ after inserting the sparse autoencoder. Once fine-tuning the sparse autoencoder on arithmetic, loss remains constant at $6.270$. We believe that the fine-tuning may perform better when we experiment on a larger model such as Pythia 6.9B.</p> <p>Although the loss does not fall, several features that our interpretable fine-tuning adjusts are interpretable. For example, the feature that is scaled up the most activates on colons (feature index $1338$). Because colons appear twice in every line of the arithmetic data, it makes sense that the fine-tuned model would like to more readibly predict colons. The figure below shows the top activations of feature $1338$ on the arithmetic dataset before and after fine-tuning. After fine-tuning, the feature activates slightly more strongly in all cases.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/colon-feature-1338.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The table above shows the arithmetic dataset tokens on which feature $1338$ most strongly activates, before fine-tuning in the column $\text{feature}$ and after fine-tuning in the column $\text{feature (FT)}. In all cases, the feature activates slightly more after fine-tuning. </div> <p>The feature that is most inhibited (feature index $619$) activates on newlines. We hypothesize that the sparse autoencoder learns to avoid newlines because, in the chess dataset for which it was trained, newlines are always followed by “Score: ”, indicating the start of a new game. But in the arithmetic dataset, newlines are always followed by “Answer: ”. Therefore, the model wants to inhibit this unhelpful feature. The discrepancy is a difference in datasets. To rigorously verify this hypothesis, we could compute direct logit attributions from feature $619$ to check whether it contributes to the “Answer” token. Either way, the inhibition above demonstrates that our fine-tuning procedure can detect and modify unhelpful features in the sparse autoencoder.</p> <p>For a broader view of the dynamics of our interpretable fine-tuning, the two figures below show the learned scale and bias terms across every feature in the sparse autoencoder space (where $d_\text{auto} = 2048$), sorted in ascending order. We observe that the majority of features are largely unaffected, but a few features at the tails are significantly enhanced or inhibited.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-bias.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The learned bias in the sparse autoencoder space inhibits approximately half of features while enhancing the other half. The x-axis is sorted so that the feature index runs in ascending order of the learned bias. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling-1400.webp"/> <img src="/staging/assets/img/2022-11-09-sparse-autoencoders-for-interpretable-rlhf.md/fine-tuning-scaling.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The learned scaling coefficient in the sparse autoencoder space significantly inhibits a small number of features while significantly enhancing several others. We also observe that a majority of features ($2/3$) are inhibited, compared to a smaller number enhanced. The x-axis is sorted so that the feature index runs in ascending order of the learned scaling. </div> <h2 id="discussion">Discussion</h2> <p>One limitation of our fine-tuning experiments is that Pythia 70M is a small model for which there are fewer interpretable features. In addition, we inserted into Pythia 70M a sparse autoencoder trained to reconstruct activations in Pythia 70M Chess. Nonetheless, our fine-tuning results are promising. The majority of features are not significantly affected, but a few features at the tails are either significantly enhanced or inhibited. We found it fruitful to interpret these outlier features first, as they are a starting point for finding which sparse autoencoder features matter most for the fine-tuning dataset.</p> <p>When training a sparse autoencoder on Pythia 6.9B, we were successful in learning interpretable features, such as the “the” feature. But we remain uncertain of the best way to train a sparse autoencoder, especially how to resample dead features. However, one implication of our work is that research on sparse autoencoders is accessible to a wide array of researchers. We believe a systematic study of training techniques for sparse autoencoders could benefit the field.</p> <h2 id="conclusion">Conclusion</h2> <p>Our work indicates that sparse autoencoders are a promising tool for machine learning interpretability. By inserting sparse autoencoders into transformer language models, we investigate how a novel form of fine-tuning can provide insight into changes in model behavior after fine-tuning. We find that our fine-tuning successfully modifies interpretable features in the sparse autoencoder space. Given the rapid adoption of powerful, fine-tuned language models across industries, we believe our method for interpretable fine-tuning is an important direction to continue to explore as researchers seek to understand how fine-tuning affects model cognition. Although our current work is limited because we only fine-tune Pythia 70M, future work can scale up model size, compute resources, and the number of tokens used to train the sparse autoencoder. Additionally, future work can extend from direct fine-tuning to investigating the effects of RLHF performed with PPO (Proximal Policy Optimization).</p> <h2 id="acknowledgements">Acknowledgements</h2> <p>We would like to thank Professor Isola, Professor Beery, and Dr. Bernstein for an introduction to fundamental perspectives in deep learning that will stay with us forever. Thank you to Logan Smith for invaluable early guidance on the questions we could explore related to sparse autoencoders. We are thankful for the AI Safety Student Team at Harvard (AISST) and MIT AI Alignment (MAIA) for a supportive community of fellow researchers.</p> <h2 id="appendix">Appendix</h2> <p>Our code is available at the following Google Colab notebooks:</p> <ol> <li><a href="https://colab.research.google.com/drive/1LAjClrzEzQEu0BMjachoHuvmbmLz1Zf1?usp=sharing">Training Sparse Autoencoders</a></li> <li><a href="https://colab.research.google.com/drive/1KF2djPVOKOemOECnZq3MeI-k9ypf7PEE?usp=sharing">Analyzing Sparse Autoencoders</a></li> <li><a href="https://colab.research.google.com/drive/1CVIIbxO7iYnVuiH5UC0bdQuDvFHtBuac?usp=sharing">Fine-Tuning with Sparse Autoencoders</a></li> <li><a href="https://colab.research.google.com/drive/1VGVImnhky0bo-SdBYYYDopQIjGpx-5aK?usp=sharing">Analysis of Fine-Tuning with Sparse Autoencoders</a></li> </ol> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-sparse-autoencoders-for-interpretable-rlhf.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/sparse-autoencoders-for-othello/index.html b/blog/2023/sparse-autoencoders-for-othello/index.html
new file mode 100644
index 00000000..aa36176f
--- /dev/null
+++ b/blog/2023/sparse-autoencoders-for-othello/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Studying the benefits and limitations of sparse auto-encoders for compositional reasoning tasks | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/sparse-autoencoders-for-othello/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Studying the benefits and limitations of sparse auto-encoders for compositional reasoning tasks",
+      "description": "",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Uzay Girit",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Tara Rezaei",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Studying the benefits and limitations of sparse auto-encoders for compositional reasoning tasks</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background-and-related-work">Background and related work</a></div> <div><a href="#method-and-setup">Method and setup</a></div> <div><a href="#results">Results</a></div> <ul> <li><a href="#comparison-to-pythia-70m-dictionaries">Comparison to Pythia-70m dictionaries</a></li> <li><a href="#investigating-the-effect-of-size">Investigating the effect of size</a></li> <li><a href="#interpreting-the-sparse-autoencoder">Interpreting the sparse autoencoder</a></li> </ul><div><a href="#discussion-and-conclusion">Discussion and Conclusion</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>Neural networks accomplish complex tasks and are poised to be increasingly used in critical and ubiquitous sectors of civilization. But given a model seemingly solving a problem, how much can we say about precisely how it does that and what its solution looks like?</p> <p>It might seem like this type of question would be hopeless, but interpretability has been progressing and we can make some headway on questions like these. One of the issues for interpretability is the fact that networks pack a lot of information into individual neurons in complex hard to separate ways, which means it’s hard to look at top activating examples for a neuron and see what it’s doing. This is <a href="https://arxiv.org/abs/2209.10652">superposition</a>. <a href="https://transformer-circuits.pub/2023/monosemantic-features/index.html">Anthropic’s recent paper</a> leveraged sparse autoencoders (<em>SAEs</em>) to learn an interpretable basis for LLM features. Sparse autoencoders are weak dictionary learning algorithms that leverage autoencoders trained to encode and then decode the activations of a certain module in the model. Contrary to classical auto-encoders, the hidden state does not necessarily have to be smaller (enforcing compression), but the mapping has to be sparse, which we enforce by penalizing the L1 norm of the activations, where L1 is just the sums of the absolute values. This makes the feature basis much more disentangled, clean and sparse.</p> <p>That paper is far reaching in its results and suggests a lot of potential for SAE interpretability methods. However our work wants to investigate how effective SAEs are in contexts where there is a lot of compositional reasoning. Indeed, the a lot of the features they find hinge on the fact that their simple 1L language model is picking up on a lot of cleanly separatable cues and heuristics that are feeding into its prediction – for example a feature that’s high for arabic text, or in HTML contexts, etc…. But this seems like it’d be harder if we have a model composing reasoning and computation across steps in by nature entangled ways.</p> <p>So we decided to see how this method would perform on a task where there are plausibly much less heuristic features that are are separable, and intuitively requires more compositionality and reasoning than the capabilities of a small 1 layer language model. We turned to the game of Othello, for which some ML interpretability has already been done, making our analysis easier, and applied sparse autoencoders to see how they would perform and what we could learn from them. We picked Othello because it’s a complex task where it might seem intuitive that the model has to gradually compose information across layers and reason about what types of moves and positions might be valid. Indeed, in the original Othello-GPT paper, they find a linear world representation when you feed the model sequence data, suggesting complex reasoning patterns. This is an initial analysis and there are many things we’d be excited to see that would make this more fleshed out.</p> <h1 id="background-and-related-work">Background and related work</h1> <p><strong>Sparse Autoencoders</strong>: There is some previous work done on <a href="https://arxiv.org/abs/2103.15949">dictionary learning</a> to interpret neural networks. The idea of sparse dictionary learning is to find an over-complete basis (ie there are more basis vectors than dimensions) in your embedding space, such that on inputs in your data most of the dictionary basises are orthogonal to your data, and only a few activate (sparsity). This has been used very recently to visualize transformer features for language models, as a way of taking internal feature representations out of <a href="https://transformer-circuits.pub/2022/toy_model/index.html">superposition</a>.Superposition is a barrier to interpertability where neurons and features are encoding a lot of things at once, making it hard to study on individual behaviors and parts of the model. Most recently, Anthropic did extensive interpretability work on a 1 layer transformer by using sparse autoencoders in <a href="https://transformer-circuits.pub/2023/monosemantic-features#related-work">decomposing language models</a>. They learned a sparse embedding space and then conducted a lot of analysis and interpretability on the features the original network was learning by studying it in the sparse embedding space.</p> <p><strong>Transformers for reasoning tasks and Othello:</strong> Transformers and specificially <a href="https://arxiv.org/pdf/2106.01345.pdf">decision transformers</a> have formerly been used for more complicated tasks than natural language sequence prediciton like reasoning tasks and games and proven to be successful. Although cutting edge LLMs exhibit strong reasoning capabilities, toy models and small languag models that are more accessible and that people are trying to use for interpretability are quite small, limiting their reasoning ability. Othello is a simple to understand but complex to win two player board game, where you gradually place pieces and try to “capture opponent” pieces by sandwiching rows, columns, and diagonals of the board with two of your pieces. The winner is the player with the most pieces at the end. <a href="https://arxiv.org/pdf/2210.13382.pdf">Recent work</a> lead to the creation of a dataset of Othello games and the publishing of a model called Othello-GPT that learns to play Othello successfully. We use both of these in our work. The way they train the model is by giving it sequences of Othello moves from games, and asking it to predict the next move, in an unsupervised way, obtaining a model that can predict legal moves and understands the mechanism of the game. They show the existence of representations forming in the model, by using a probe to recover the full board state from the model activations, even though it’s just given a sequence. This suggests the model learns more than just heuristics and is able to do internal reconstruction of the game’s features.</p> <p><strong>Interpreting features and circuits</strong> In the original Othello-GPT, their world model probe was nonlinear. Neel Nanda <a href="https://www.neelnanda.io/mechanistic-interpretability/othello">extended their work</a> and found a linear world representation of the othello model, by seeing that instead of representing the state as “black’s turn” vs “white’s turn”, the model represented it in an alternating manner, distinguishing between “my turn” vs “their turn”. There is also some other work on <a href="https://www.lesswrong.com/posts/bBuBDJBYHt39Q5zZy/decision-transformer-interpretability">interpreting</a> transformer models outside of the context of language modeling, for example with decision transformers, but this is very much a growing subfield. We were also able to get a better intuition for the features in the othello model by using <a href="https://kran.ai/othelloscope/index.html">neuron visualization data published by the authors</a>.</p> <h1 id="method-and-setup">Method and setup</h1> <p>In order to investigate a reasoning task, we used a synthetic GPT model trained on a dataset of valid Othello game sequences of length 60 <a href="https://github.com/likenneth/othello_world">(by Li et al)</a>. We manipulate and access the model’s activations and internals using the <a href="https://neelnanda-io.github.io/TransformerLens/">TransformerLens</a> library.</p> <p>We used the MSE loss as a baseline to compare the performance of sparse autoencoders on a reasoning tasks versus a natural language sequence prediction task. We replicated the training of a recent <a href="https://www.alignmentforum.org/posts/AaoWLcmpY3LKvtdyq/some-open-source-dictionaries-and-dictionary-learning">set of dictionaries</a> of similar size on the GPT language model (EleutherAI’s 6-layer pythia-70m-deduped) and compare our results.</p> <p>Our set up for the replication, where we pick the same hyperparameters as the authors, consists of an 8-layer <a href="https://openai.com/research/language-unsupervised">GPT</a> model with an 8-head attention mechanism and a 512-dimensional hidden space. We set up a buffer that gathers the model’s activations on a batch of game data and uses it to train the autoencoder. The buffer automatically runs the model on another batch of data once it is half empty. The activations then get fed into the autoencoder’s training loop, where it optimizes to minimize the reconstruction loss of form $L = L_1 + L_2$. In this equation, $L_1$ is the term originating from the $L_1$ norm of the weights, with a sparsity coefficient of $1e-3$ for the encoder of size $16 \times 512 = 8192$ a sparsity coefficient of $3e-3$ for the size $64 \times 512 = 32768$ and $L_2$ is the term originating from the square error of the reconstruction with regards to the actual model investigations.</p> <p>We then train various sizes of sparse autoencoders on the 4th layer of the othello model and investigate the impact of the autoencoders size on the reconstructed hidden state.</p> <p>We measure the reconstruction power of the encoder with a reconstruction score defined as $\frac {Loss_{ZeroAblation} - Loss_{Reconstruction}} {Loss_{ZeroAblation} - Loss_{Normal}}$ where $Loss_{ZeroAblation}$ is Loss after ablating the reconstructed layer and use this as a measure for how well the encoder is able to reconstruct the mlp layer. The intuition behind this is that we compare a “base zero”, which is the ablation loss, with both the reconstruction of the layer and the original construction of the layer. This will provide us with a metric of how close our reconstruction is to ground truth.</p> <h1 id="results">Results</h1> <h2 id="comparison-to-pythia-70m-dictionaries">Comparison to Pythia-70m dictionaries</h2> <p>The following tables are the results from training a sparse autoencoder of size $16 \times 512 = 8192$ and $L_1$ penalty coefficient of $1e-3$.</p> <p>Encoder’s Measured MSE loss on OthelloGPT after 100000 epochs.</p> <table> <thead> <tr> <th style="text-align: center">Layer</th> <th style="text-align: center">MSE</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">0</td> <td style="text-align: center">0.370</td> </tr> <tr> <td style="text-align: center">1</td> <td style="text-align: center">0.537</td> </tr> <tr> <td style="text-align: center">2</td> <td style="text-align: center">0.686</td> </tr> <tr> <td style="text-align: center">3</td> <td style="text-align: center">0.833</td> </tr> <tr> <td style="text-align: center">4</td> <td style="text-align: center">0.744</td> </tr> </tbody> </table> <p>Encoder’s reported MSE loss on Pythia-70m after 100000 epochs.</p> <table> <thead> <tr> <th style="text-align: center">Layer</th> <th style="text-align: center">MSE</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">0</td> <td style="text-align: center">0.056</td> </tr> <tr> <td style="text-align: center">1</td> <td style="text-align: center">0.089</td> </tr> <tr> <td style="text-align: center">2</td> <td style="text-align: center">0.108</td> </tr> <tr> <td style="text-align: center">3</td> <td style="text-align: center">0.135</td> </tr> <tr> <td style="text-align: center">4</td> <td style="text-align: center">0.148</td> </tr> </tbody> </table> <p>The following tables are the results from training a sparse autoencoder of size $64 \times 512 = 32768$ and $L_1$ penalty coefficient of $3e-3$</p> <p>Encoder’s Measured MSE loss on OthelloGPT after 100000 epochs.</p> <table> <thead> <tr> <th style="text-align: center">Layer</th> <th style="text-align: center">MSE</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">0</td> <td style="text-align: center">0.749</td> </tr> <tr> <td style="text-align: center">1</td> <td style="text-align: center">0.979</td> </tr> <tr> <td style="text-align: center">2</td> <td style="text-align: center">1.363</td> </tr> <tr> <td style="text-align: center">3</td> <td style="text-align: center">1.673</td> </tr> <tr> <td style="text-align: center">4</td> <td style="text-align: center">2.601</td> </tr> </tbody> </table> <p>Encoder’s reported MSE loss on Pythia-70m after 100000 epochs.</p> <table> <thead> <tr> <th style="text-align: center">Layer</th> <th style="text-align: center">MSE</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">0</td> <td style="text-align: center">0.09</td> </tr> <tr> <td style="text-align: center">1</td> <td style="text-align: center">0.13</td> </tr> <tr> <td style="text-align: center">2</td> <td style="text-align: center">0.152</td> </tr> <tr> <td style="text-align: center">3</td> <td style="text-align: center">0.211</td> </tr> <tr> <td style="text-align: center">4</td> <td style="text-align: center">0.222</td> </tr> </tbody> </table> <p>From the results above we can see that the autoencoder reconstructs with higher MSE loss despite having the same sparsity constraint and multiplier between the activation size and the sparse embedding. The difference becomes more drastic as we increas the sparsity of the encoder. Our analysis of these results is that this aligns with our hypothesis in natural language sequence prediction for small models like these, it might be that it is easier for the encoder to learn sparser and more easily separable features that allow it to recover the activations. However, on a task like playing the game of Othello where the features are more abstract, and we think there might be a higher requirement of complex compositionality across layers, increasing sparsity and size, makes the model perform worse.</p> <p>Another significant emerging pattern in the MSE loss of the encoders is the fact that loss increases in the furthur layers, which backs up our initial claim; that as features become more abstract, the autoencoder has a harder time reconstructing them.</p> <p>It is worth noting that the increase of MSE across the two sets of tables is impacted by both the increase in size and sparsity. We had made the two tables, to match the already existing <a href="https://www.alignmentforum.org/posts/AaoWLcmpY3LKvtdyq/some-open-source-dictionaries-and-dictionary-learning">benchmarks</a>. However, in the following, we include the results of a sparse autoencoder with penalty coefficient of $3e-3$ and size $16 \times 512 = 8192$ to validate our claims about sparsity, without the effect of size.</p> <p>Encoder’s Measured MSE loss on OthelloGPT after 100000 epochs.</p> <table> <thead> <tr> <th style="text-align: center">Layer</th> <th style="text-align: center">MSE</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">0</td> <td style="text-align: center">0.954</td> </tr> <tr> <td style="text-align: center">1</td> <td style="text-align: center">1.389</td> </tr> <tr> <td style="text-align: center">2</td> <td style="text-align: center">1.715</td> </tr> <tr> <td style="text-align: center">3</td> <td style="text-align: center">2.038</td> </tr> <tr> <td style="text-align: center">4</td> <td style="text-align: center">3.057</td> </tr> </tbody> </table> <p>We see that without the effect of size and only sparsity, the model performs significantly worse and if we compare the results to the one of size $64 \times 512 = 32768$ the model gets better because it is has more dimensions.</p> <h2 id="investigating-the-effect-of-size">Investigating the effect of size</h2> <p>In furthur investigation, we experimented with training various sizes of autoencoders on layer 4 of the model. The size of the autoencoder is determined by the equation $size = x \times 512$ where $x$ is the size factor. We vary the size factor from $0.25$ to $32$. The size factor describes how much our autoencoder embedding space is bigger than the original activation space, therefore deciding how much “extra space” the autoencoder has to obey the sparsity constraint and preserve good reconstruction. We included smaller sizes so that we could investigate the effect of size and whether the encoder would be able to learn more compact features and still perform well. Our results are found in the following:</p> <p><img src="https://hackmd.io/_uploads/S1GB0NBUp.png" alt="recons_loss vs epochs"/></p> <p>As seen in the figure above, we see reconstruction loss decrease significantly as the number of dimensions in the autoencoder’s hidden space becomes larger than the original space. A sparse autoencoder with less dimensions than the original latent space fails to reconstruct well and this can be even better observed in the following figure.</p> <p><img src="https://hackmd.io/_uploads/BJAJerHLa.png" alt="Screenshot 2023-12-11 at 8.47.16 PM"/></p> <p>This picture suggests that maybe if we scale up sparse auto encoder embedding size we can recover performance at low cost. However, Anthropic’s interpretability work, linked earlier, suggests that as you increase the size of your autoencoder embeddding, you risk getting a lot of niche highly specific features with complex interactions, therefore making interpretability harder. For example, at a given size they observe a base64 feature that fires for base64 text, and then at a larger size they see it splits into several base64 features that activate for slightly different token beginnings.</p> <p>These results highlight the challenge of sparse autoencoders for compositional tasks, and bring us to the question of interpreting sparse embedding spaces for compositonal reasoning.</p> <h2 id="interpreting-the-sparse-autoencoder">Interpreting the sparse autoencoder</h2> <p>Here we had to take a detective’s approach and form different hypotheses of what the model was doing and how to test them. This analysis is exploratory, and given more time we’d be excited about extending this/doing even more experiments to get a complete picture. However, we’re excited about what we found and are confident that this approach is promising.</p> <p>We started by caching the autoencoder embeddings on a subset of data with valid Othello sequences and moves. This gave us a dataset to work with.</p> <p>We then did some macro level analysis by looking at and inspecting random features (dimensions of the embeddings) and seeing what kinds of boards activated most on them (by activated most we mean that the feature had a high value on that input activation for that board). This somewhat followed the pattern laid out by <a href="https://transformer-circuits.pub/2023/monosemantic-features/index.html#global-analysis-interp">Anthropic’s analysis</a>.</p> <p>However, in Anthropic’s 1L language model paper they have the following figure:</p> <p><img src="https://hackmd.io/_uploads/SyIELvLIT.png" alt="image"/></p> <p>They are indicating that in their setup most of the features seem to be interpretable and clear to a human, according to human scores. In our experience looking at our sparse autoencoder and top activating examples for different features, it seems that a lot of the features are still not interpretable and we will need more work to understand the full picture <sup id="fnref:1" role="doc-noteref"><a href="#fn:1" class="footnote" rel="footnote">1</a></sup>. This may be because a lot of semantic cues for simple language modeling are more aligned with our human understanding, in the sense that the concepts the model operates on are pretty intuitive, whereas for Othello it has to build a compositional model of the game state across layers, in ways that are potentially less likely to correlate with how we might perceive the problem. We don’t claim that there are not such complex dynamics in even simple language models (there definitely are!), but we think there are more simple patterns to pick up on. We believe that the method laid out in that work needs to be extended to be applied to compositional networks for reasoning adjacent tasks, because it does not seem sufficient for this Othello model. This is an empirical claim based on studying and looking at a lot of data on when sparse features were activating throughout the Othello dataset.</p> <p>To do some global analysis, we computed a frequency histogram of the values of each feature on the dataset, and then we took an average of this frequency histogram to get a full picture of how often and how strongly features are activating across the dataset. This is on a log scale.</p> <p><img src="https://hackmd.io/_uploads/B1V7_HIL6.png" alt="image"/></p> <p>As we can see, on average for each feature there are a lot of inputs where the feature is not reading much at all, which makes sense given the sparsity constraint. Then as the activation gets higher and higher the frequency of each bucket decreases.</p> <p>If we increased the sparsity regularization even more we might see a sparser activation graph with more high activing frequency for large activations, but in a lot of classic encoders the distribution of embeddings tends to have a lot of smaller noise around zero, where here a lot of our values are actually very often split into either zero, or something significant.</p> <p>We then proceed to making some hypotheses about how the model might be localizing computation about the game board throughout its features, and make some tests to see what might be going on.</p> <h3 id="h1-location-features">H1: Location features</h3> <p>Hypothesis: what if there are features that represent the location of the last move, and only activate when that last move is within some cluster of the board? This would align with earlier world model wor.</p> <p>This would be an example of a strong monosemantic and interpretable feature.</p> <p>However, we later realized that this is probably more likely as a more primitive pattern that would be noticed earlier in the model layers, before it then refines and comes up with information to decide what to predict.</p> <p>Never the less, we looked at the contexts in which a feature is reading strongly, and thus found a list of high-activating moves for each feature (<em>for what current moves is feature j activating</em>). We then clustered these into 3x3 location clusters on the board, marking positions as the same if they were close in a small square. That was based on the idea that it does not have to be activating for the exact same current move but moves in general that are adjacent. These features would then represent: <em>was the current move around this position of the board?</em>.</p> <p>This plot was computed by looking at those activating contexts for each feature and seeing how many non-adjacent clusters of positions are within those moves. We then compute a histogram on the cluster count, trying to see how many features activate locally in a small number of clusters.</p> <p><img src="https://hackmd.io/_uploads/BymEFrU8T.png" alt="image"/></p> <p>We can see that our hypothesis was wrong here and that at this point in the network our features are activating for current moves across the board, not really in a localized, and don’t sparsely activate just when a given location is played. This was useful data to see and showed us that at this point in the network it was probably operating on high level features and things that could directly relate to its final prediction. The small amount of locally activating features all tend to just have small activations in general.</p> <h3 id="h2-predictive-features">H2: Predictive features</h3> <p>This brought us to the next experiment, where we wanted to test for higher level patterns related to its prediction.</p> <p>We were curious studying the link between the times when a feature of our autoencoder is writing strongly on an input and the actual correct prediction for that input, ie the actual correct next token it’s trying to predict. Is there a localization effect there where a feature activates highly only when the correct prediction is within some cluster?</p> <p>We investigated and collected, for each feature, a list of the real (heldout) next action in the sequence whenever it is activating non negligibly. This gave us a sequence of next moves for each context where a feature wrote strongly to the activation output. Then we clustered these actions into regions of 3x3 squares on the board, trying to narrow in on the idea of local activation of a feature. We operationalized the notion of reading strongly on a game board by setting a threshold activation of 0.001 by looking at the earlier plot of activation distribution and seeing what made sense. This is actually pretty low, but it still stays significant because the sparsity constraint often just nulls out values when they are not relevant, so even low small values have signal.</p> <p>This allows us to map each feature to a number of activating clusters.</p> <p>We then plot a histogram for the number of clusters of next action locations for each feature in our dataset. The idea is that if a feature is activating on a small number of clusters for the next action, then it might be picking up in patterns on the board that are linked to the final model’s prediction, in a consistent way based on the real result.</p> <p><img src="https://hackmd.io/_uploads/Sy9PKBUIT.png" alt="image"/></p> <p>It’s interesting to compare this to the previous plot, as here there are actually a decent amount of features that seem localized, reacting and forming predictions based on what part of the board they think the next step or move might be in, and not activating across the board for the next token. These are the ~100s of features that are only reacting in some small amount of clusters, like two or 1.</p> <p>It seems that in layer 4 in certain cases the model is already developing an idea of what the next move will be, and is localizing sparse features for different prediction areas.</p> <p>This explanation is not explaining the full behavior and there is probably a lot going on to extend the prediction into higher layers. We can see this in the frequencies of all the features that are activating in a lot of different next-token contexts, probably picking up on general things on the board and harder to interpret compositional steps that will allow it to make predictions later.</p> <p>This reminded us of the [logit lens] in language modeling where you can unembed the early activations and get coherent (and gradually improving as you increase the layer number) predictions for the next token. This seems to be showing that some of the features are already localizing predictions about the correct prediction, in a consistent manner.</p> <p>We investigated those features corresponding to the left side of the plot ($1 \leq x \leq 3$, $x$ number of clusters) that activate only for some cluster of valid next sequence areas and found data that validated this impression! We hypothesize it’s because some action predictions are pretty clear to predict early on based on good strategy and how the dataset of sequences was generated. We found features that consistently were activating for when a given board position was the correct next board position.</p> <p>We focused particularly on feature #15 of our dim 4096 autoencoder, noticing through our analysis that it had interesting activation patterns.</p> <p>We plotted its activation value histogram:</p> <p><img src="https://hackmd.io/_uploads/Byk19HULT.png" alt="image"/></p> <p>We can see a long sparse tail of inputs where the feature doesn’t activate at all, and then a similar frequency for values beyond some threshold of activation.</p> <p>On manual inspection, other than the big cluster of samples where it’s reading zero or doesn’t activate, the feature is basically always just activating when the next move is in a specific cluster at the bottom of the board. To be more precise, 90% of the boards where it activates with a value &gt; 0.001 are in that cluster, 93% for 0.01,</p> <p>Here are some of those example boards, where the next move played is G4, and the model activates strongly.</p> <p>One of many examples of board where feature #15 activates strongly and in fact the next correct move is G4. <img src="https://hackmd.io/_uploads/BJZEDS8U6.png" alt="image"/> <sup id="fnref:2" role="doc-noteref"><a href="#fn:2" class="footnote" rel="footnote">2</a></sup></p> <p>Example where the feature activates and the actual next move is F4, right above G4, in the same cluster: <img src="https://hackmd.io/_uploads/ryy8Jj8U6.png" alt="image"/></p> <p>This is really interesting! Why does this feature exist? We’ve been thinking about the structure of Othello and the way the data was generated, and we think the idea is that the network is pretty confident about this position and early on manages to recognize and see what’s going on with the rest of the board to put its hypothesis in this feature.</p> <p>Although we haven’t explained a lot of the other features, it’s cool that this method has allowed us to understand and correlate this feature with a state of the game and the understanding the model has of the board!</p> <h1 id="discussion-and-conclusion">Discussion and Conclusion</h1> <p>We are excited about future work in this direction and think interpreting compositional computation circuits is key to understanding how tranformers and language models solve complex problems. In terms of our work with Othello GPT, we are excited about pushing sparse autoencoders further on this architecture and motivating more interpretability work. We are interested in work to train SAEs across layers and then see if we can track computation and model changes through sparse embeddings across layers, mirroring this [exploratory work]. This might be helpful to understand compositionality across layers. We also think interpreting features for SAEs with width smaller than the original width might be interesting to find projections of network activations that have very high level, compressed features, that might therefore be easier to interpret. We are also interested in methods that use SAE features to make causal statements aobut model behavior, for example by plugging the SAE into the model inference step, where at the end of our MLP we feed in the decoded encoded version of activations into the rest of the model. With this kind of setup you could then potentially ablate or modify different features to validate and study how your interpretability hypotheses about different parts of the model actually change its final predictions. Some of the limitations of our work is that we would have liked to run more experiments on different sparsity coefficients, and make more in depth comparisons to language models to see to what extent our arguments about compositional reasoning hold in a rigorous way. We would be excited to see how increasing sparsity even more affects our ability to interpret the model, potentially making things more tractable. We also recognize the difficulty of interpretability and have not been yet been able to interpret any of the more complex Othello SAE mechanisms.</p> <p>To conclude, we’ve investigated the potential for sparse autoencoders for compositional reasoning tasks in the context of the Othello sequence prediction problem. Our hypothesis is that sparse autoencoders will be useful to understand such systems but their application will be more involved and complex than for earlier patterns found in language modeling tasks. We trained a sparse autoencoder at different layers of the network and see how its performance and capabilities differ compared to previous results on language. We observe our autoencoder trained with the same hyperparameters and scaling factor for size still struggles to reach the same reconstruction performance as those for language model activations. This reveals something about the structure of these data distributions, and supports our intuition that for simple small language models SAEs are particularly performant due to their ability to pick up on a lot of separable and sparse features, but for compositional solutions where the model is learning an algorithm across layers to solve a task, the sparsity constraint incurs more of a cost, which limits this method. This intuition stems from the idea that leveraging the full extent of neural activations for compositional tasks is key to build complex algorithms across layers, and maybe less so for prediction problems that are more tractable through the composition of independent heuristics. We also nonetheless do some interpretability on our trained autoencoder, and note that the features seem less directly interpretable than those for language model SAE features (as supported by our hypothesis), but that there is some signal to analyze and understand, giving us hope for future work to use SAEs to understand compositional reasoning and circuis in general. In particular, we look at the range and frequency of sparse activations, and form different hypotheses about the ways the model might be localizing computation in sparse embeddings. We find the existence of predictive neurons already at layer 4, that activate when the model is already confident about a specific next action to predict. Although much of the features remain obscure, our results indicate that although sparsity is a harder constraint to impose for compositional reasoning, it can still be a useful starting point to interpret model computation.</p> <div class="footnotes" role="doc-endnotes"> <ol> <li id="fn:1" role="doc-endnote"> <p>To some extent increasing the sparse regularization penalty could help with this, but our exploratory analysis revealed that increasing the sparsity penalty made the model perform too badly on the data. We could always counter this by increasing the size of the encoder, but Anthropic’s paper and our understanding suggests that this leads core interpretable features to split and split until it’s hard to get a good picture of what’s going on. <a href="#fnref:1" class="reversefootnote" role="doc-backlink">&#8617;</a></p> </li> <li id="fn:2" role="doc-endnote"> <p>these plots are both before the G4 cluster move is played. <a href="#fnref:2" class="reversefootnote" role="doc-backlink">&#8617;</a></p> </li> </ol> </div> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-sparse-autoencoders-for-othello.md"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/spatiotemporal/index.html b/blog/2023/spatiotemporal/index.html
new file mode 100644
index 00000000..b7a40eb4
--- /dev/null
+++ b/blog/2023/spatiotemporal/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Embeddings for Spatio-temporal Forecasting | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="An analysis of various embeddings methods for spatio-temporal forecasting."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/spatiotemporal/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Embeddings for Spatio-temporal Forecasting",
+      "description": "An analysis of various embeddings methods for spatio-temporal forecasting.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Samuel Lee",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Joshua Sohn",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Embeddings for Spatio-temporal Forecasting</h1> <p>An analysis of various embeddings methods for spatio-temporal forecasting.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#dataset">Dataset</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion-discussion-next-steps">Conclusion, Discussion, Next Steps</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Time series forecasting is an interdisciplinary field that affects various domains, including finance and healthcare, where autoregressive modeling is used for informed decision-making. While many forecasting techniques focus solely on the temporal or spatial relationships within the input data, we have found that few use both. Our goal was to compare two SOTA spatiotemporal models, the STAEformer and the Spacetimeformer, and determine why one works better than the other. The papers on both models did not feature each other in their benchmark evaluations, and we thought that analyzing their embeddings and identifying their failure modes could offer new insights on what exactly the models are learning from the dataset. We hypothesized that the Spacetimeformer would perform better as its proposed approach, sequence flattening with Transformer-based processing, seems to offer a more flexible and dynamic representation of spatiotemporal relationships that doesn’t depend on predefined variable graphs. We focused on forecasting in the field of traffic congestion, which is a pervasive challenge in urban areas.</p> <h2 id="related-work">Related Work</h2> <p>We focused on two SOTA spatiotemporal models that were evaluated on traffic forecasting datasets. The first is the STAEformer <d-cite key="liu2023staeformer"></d-cite>. STAEformer proposes a novel adaptive embedding that learns the spatio-temporal relations in the dataset. In their architecture, the input embedding is generated by combining the projected raw data (denoted by \(E_p\) in the embedding layer) with temporal embeddings (\(E_f\)) and the adaptive embeddings (\(E_a\)), which was used instead of an embedding solely focused on capturing spatial relations. This output is then fed into temporal and spatial transformer layers, followed by a regression layer.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Architecture of the Spatio-Temporal Adaptive Embedding transformer (STAEformer). <d-cite key="liu2023staeformer"></d-cite> </div> <p>The second is the Spacetimeformer <d-cite key="grigsby2023spacetimeformer"></d-cite>. Spacetimeformer uses embeddings generated from breaking down standard embeddings into elongated spatiotemporal sequences. The idea behind doing this is to enable the downstream tasks to learn direct relationships between variables at every timestep. In their architecture, these embeddings are fed into a variant of the transformer model using local, global, and cross self-attention. The figure below shows an intuitive visualization for this idea.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Architecture of the Spacetimeformer Embedding. <d-cite key="grigsby2023spacetimeformer"></d-cite> </div> <h2 id="dataset">Dataset</h2> <p>We used the PEMS08 dataset <d-cite key="pems08"></d-cite>, which contains traffic data in San Bernardino from July to August of 2016. Each data point consists of readings from 170 detectors along with the time of day and day of the week they were recorded. We initially considered using the PEMSBAY dataset <d-cite key="pemsbay"></d-cite>, which is widely used in traffic speed forecasting, but it was almost double the size of the PEMS08 dataset and took too long to train our model on.</p> <h2 id="methodology">Methodology</h2> <p>The problem statement is as follows: given the sensor readings across the 170 sensors for the previous N timesteps, we want to predict their readings for the next N timesteps. We tested the model with varying context lengths, but we found that the default value of 12 given in the STAEformer paper provided enough information to the model. We used huber loss as we wanted the model to converge faster in the presence of outliers, which was necessary given the limited compute that we had (training 50 epochs took around 3 hours).</p> <p>We trained STAEformer for 50 epochs, which was sufficient to achieve performance metrics similar to that of the paper. To compare the embeddings from Spacetimeformer, we retrained the model end to end after replacing the embedding layer in the model with Spacetimeformer’s embedding layer. To do this, we kept the context dimensions the same and flattened the input sequence along the input dimension and the dimension corresponding to the number of sensors. This structured the embedding layer so that it could learn the spatiotemporal relations across the sensors from different time frames.</p> <p>Replacing the embedding layer within the STAEformer with a pretrained embedding layer from the Spacetimeformer instead may seem like a more legitimate method to test the effectiveness of the embeddings, as we would basically be doing transfer learning on the embedding layer. However, the pretrained embeddings from Spacetimeformer might have been optimized to capture specific spatiotemporal patterns unique to its architecture, which was why we believe training the model end to end with the Spacetimeformer embeddings would result in a more accurate and contextually relevant integration of the pretrained embeddings into the STAEformer framework.</p> <p>After training, we wanted to provide visualizations of the embeddings from STAEformer and Spacetimeformer to show whether the learned embeddings are meaningful at all. To do this, we obtained the embeddings by passing in the raw data through the embedding layers of the loaded models and generated t-SNE plots with them. For the STAEformer, we focused solely on the adaptive embeddings as they were the parts of the embedding layer that captured spatiotemporal relations in the data. To generate the t-SNE plots, we had to reshape the embeddings so that they could be passed into the function so we flattened them across the model dimension. After fitting the t-SNE, we then unflattened the embeddings back to their original shape and plotted them. Each sensor was color coded with different colors, and the results can be shown in the next section. We hypothesized that the t-SNE plots would contain clusters grouped by either the sensors or the time the readings were recorded.</p> <p>After generating the t-SNE plots, we wanted to test the effects of perturbing the raw data on the embeddings. We wanted to know how the embeddings would change. For example, regardless of what the clusters represent, are they tighter? Will additional clusters be formed? Conversely, will some of the existing clusters break apart? In particular, we were hoping that augmenting the data would perhaps improve cluster formations in the worse looking embeddings, as there is a good possibility that the data itself isn’t good enough.</p> <h2 id="results">Results</h2> <p>The table below shows the results after training STAEformer and the STAEformer model with a Spacetimeformer embedding layer for 50 epochs each. Table of loss values:</p> <table> <thead> <tr> <th style="text-align: center">Embedding Layer</th> <th style="text-align: center">Train Loss</th> <th style="text-align: center">Validation Loss</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">STAEformer</td> <td style="text-align: center">12.21681</td> <td style="text-align: center">13.22100</td> </tr> <tr> <td style="text-align: center">Spacetimeformer</td> <td style="text-align: center">12.42218</td> <td style="text-align: center">16.85528</td> </tr> </tbody> </table> <p>We can see that the STAEformer had better training and validation loss than the Spacetimeformer. While the train loss converged to similar values, the validation loss for the model using the STAEformer embedding layer was much better. So now that we know the STAEformer embedding layer seems to perform better than the Spacetimeformer embedding layer, we plotted the embeddings for both to analyze why this is the case. To do this, we passed a data point from the validation set through the embedding layer. The results are shown in the figure below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the STAEformer embeddings. </div> <p>The t-SNE plot for the STAEformer embeddings shows clearly separate clusters for most of the 170 different sensors. The shape of each cluster is a “snake-like” trajectory. Therefore, we know that the embeddings preserve some pattern-like notion across readings from a single sensor. We hypothesize that each of these trajectories represent the reading of a single sensor over time. There are a couple outliers, where the clusters are not grouped by color. One prominent example is the string of cyan, maroon, and moss points along the bottom of the plot. However, even these points have some clustering, though they may not be clustered by color.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the Spacetimeformer embeddings. </div> <p>On the other hand, the t-SNE plot for the Spacetimeformer embeddings show no clear clusters across the same sensor. The distribution resembles a normal distribution, meaning that there is little pattern preserved in the embeddings. It becomes more difficult to differentiate between data points from the same sensor across time.</p> <p>In order to further analyze the effectiveness of each embedding layer, we perturbed the training data and re-trained each model. We were expecting the clusters from the STAEformer embeddings to remain largely the same, with some of the existing clusters possibly breaking apart due to the added noise. However, we were hoping that the Spacetimeformer embeddings would show more visible clusters after the raw data was perturbed. Given the characteristics of the embeddings, one possible output we expected were clusters containing multiple colors. An example is shown in the following image.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Ideal t-SNE plot of the Spacetimer embeddings. </div> <p>This would show that the Spacetimeformer successfully learned spatial relationships across the sensors at variable timesteps. Instead of each cluster representing the embeddings for one sensor, the presence of larger clusters with multiple colors could imply that the Spacetimeformer learned spatiotemporal relations among the corresponding sensors and embedded them into a larger cluster.</p> <p>The following table shows the results after training the model with the perturbed data.</p> <table> <thead> <tr> <th style="text-align: center">Embedding Layer</th> <th style="text-align: center">Train Loss</th> <th style="text-align: center">Validation Loss</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">STAEformer (with perturbations)</td> <td style="text-align: center">13.58251</td> <td style="text-align: center">13.35917</td> </tr> <tr> <td style="text-align: center">Spacetimeformer (with perturbations)</td> <td style="text-align: center">13.42251</td> <td style="text-align: center">17.01614</td> </tr> </tbody> </table> <p>As expected, validation loss slightly increased for both models, and the STAEformer continued to have lower loss values than the model with the Spacetimeformer embedding layer.</p> <p>When we generated the t-SNEplots with the new embeddings, we obtained the following:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the perturbed STAEformer embeddings. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the perturbed Spacetimeformer embeddings. </div> <p>Both t-SNE plots for the STAEformer and Spacetimeformer embeddings look the same as when the models were trained on the original, unperturbed data. So unfortunately, the augmentation had little to no effect on the embedding layers for these two models.</p> <p>Since the t-SNE plots can be hard to parse with the human eye, we decided to focus on the embeddings for the most relevant features of the dataset and see how they compared between the Spacetimeformer and STAEformer. In parallel, this would enable us to identify the failure modes of the dataset and augment those features to see if they improve the model performance. In order to do this, we used PCA to identify the principal components. From there, we found which features help explain the most variance in the dataset and identified those as the features that had the largest impact on the learned embeddings.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Z-normalized correlation matrix between the original PEMS08 dataset and PC-space, normalized by explained variance. <d-cite key="pca"></d-cite> </div> <p>This heatmap shows the top 10 principal components and and the top 10 features that correlate with each principal component. From this heatmap, we can see that the 9th sensor in the dataset is the most relevant feature. Therefore, we can find the corresponding embedding to be the most relevant.</p> <p>Using only the 5 most relevant embeddings obtained from PCA, we re-graphed the t-SNE plots. This helped us to narrow our attention to the most important embeddings.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the top 5 STAEformer embeddings. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the top 5 Spacetimeformer embeddings. </div> <p>As expected, the embeddings for the most relevant sensors in the STAEformer all maintain the “snake-like” trajectory. However, the embeddings for even the most relevant sensors in the Spacetimeformer are seemingly random, and have no pattern across the points.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Cumulative PCA plot of the original PEMS08 dataset. <d-cite key="pca"></d-cite> </div> <p>We found that the top 25 sensors explained 95% of the variance in the dataset, so we did a quick experiment where we augmented the rest of the 145 sensors (as opposed to the entire training dataset) to see how that affected the learned embeddings. For this augmentation, we expected the results to not improve by much since the learned embeddings for even the most relevant sensors in Spacetimeformer didn’t form visible clusters in the t-SNE plots. As expected, the results were almost identical to the ones generated from augmenting the entire dataset.</p> <h2 id="conclusion-discussion-next-steps">Conclusion, Discussion, Next Steps</h2> <p>There are a couple of reasons why we think the Spacetimeformer performed worse than the STAEformer overall. The first explanation that came to mind is that the readings across different sensors may be mostly independent from one another. The color coded t-SNE plots for the STAEformer clearly separate each sensor into its individual cluster. In this case, the Spacetimeformer would not be suited for the task as its embedding layer solely focuses on learning spatiotemporal relationships, while the STAEformer also contains an embedding layer that is solely dedicated to learning temporal relationships.</p> <p>A second, more plausible explanation deals with the embedding architecture. The difference in performance between the STAEformer and the Spacetimeformer in time series forecasting shows the importance of adaptive embeddings in capturing spatio-temporal relationships. While the STAEformer introduces adaptive embeddings to comprehend the patterns in the data, the Spacetimeformer relies on breaking down standard embeddings into elongated spatiotemporal sequences. The t-SNE plots show that the STAEformer’s adaptive embeddings generate clusters representing sensors with snake-like trajectories, providing a visualization of the model’s ability to capture spatio-temporal patterns. In contrast, the Spacetimeformer’s embeddings follow a scattered distribution, indicating challenges in identifying clusters. This suggests that the Spacetimeformer’s approach may face limitations in effectively learning the spatio-temporal relationships within the PEMS08 dataset, and potentially traffic data in general.</p> <p>Having said all this, the resilience of both the STAEformer and Spacetimeformer to perturbations in the raw data showcases the robustness of their learned representations. Despite the added augmentations, the fact that the t-SNE plots remain largely unchanged indicates the stability in the embedding layers. This may be attributed to the models’ ability to learn a generalizable representation of the spatio-temporal patterns resilient to changes in the input data, regardless of how accurate they may be. This may also be attributed due to the dataset itself. The PEMS08 dataset’s readings may already have been noisy, as it’s unlikely that the readings were recorded with perfect accuracy. We would like to explore these implications of the embeddings’ robustness in our future work.</p> <p>Another possible avenue we would like to explore is why certain sensors (such as the 9th sensor) are more relevant than others beyond just the theory. We came up with a couple hypotheses. First, it’s probable that this particular sensor is placed at important intersections, such that cars that pass this sensor are guaranteed to pass many other sensors. This would mean that there exists a way to extrapolate the readings from this sensor to the readings from other sensors. Tangentially related, it’s possible that two nodes are negatively correlated, such that the cars that pass through one node tend to not pass through another node, and the model extracts readings based on this relationship. If neither of these ideas is the case, the exact opposite concept could be true: the sensor is at a location where the speed data is very consistent, such as a highway. This allows the readings from the sensor to give very accurate predictions. The next step would be to figure out the geographical locations of the sensors and determine whether the ones we found to be the most relevant seem to be placed at important locations.</p> <p>We would also like to do some more experimentation in the future. We used a personal GPU for training (an RTX 2070), and it took a few hours to train the model for every one of our experiments which made it difficult to tune our hyperparameters. Further experiments we would like to run with more compute include running the experiments on the Spacetimeformer model architecture instead of the STAEformer architecture and replacing its embedding layer with STAEformer’s. We mentioned before that the learned embeddings may have been optimized for the model architecture it’s from. Therefore, if the resulting plots from the embeddings look similar to the ones we have generated, then we have conclusive evidence that the STAEformer input embedding does a better job of learning the spatio-temporal relations in the data.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-spatiotemporal.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/speculative-decoding/index.html b/blog/2023/speculative-decoding/index.html
new file mode 100644
index 00000000..b8e33a72
--- /dev/null
+++ b/blog/2023/speculative-decoding/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Accelerating large model inference with speculative decoding - 6.s898 | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="An investigation into methods to speed up autoregressive inference through increased parallelization, specifically through speculative sampling and decoding."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/speculative-decoding/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Accelerating large model inference with speculative decoding - 6.s898",
+      "description": "An investigation into methods to speed up autoregressive inference through increased parallelization, specifically through speculative sampling and decoding.",
+      "published": "November 16, 2023",
+      "authors": [
+        {
+          "author": "Dakota Goldberg",
+          "authorURL": "/#",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Accelerating large model inference with speculative decoding - 6.s898</h1> <p>An investigation into methods to speed up autoregressive inference through increased parallelization, specifically through speculative sampling and decoding.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <ul> <li><a href="#inference-in-autoregressive-models">Inference in autoregressive models</a></li> <li><a href="#speculative-execution-in-processors">Speculative execution in processors</a></li> <li><a href="#applying-speculative-execution-to-model-inference">Applying speculative execution to model inference</a></li> <li><a href="#hierarchical-speculative-decoding">Hierarchical speculative decoding</a></li> </ul><div><a href="#current-work">Current Work</a></div> <ul> <li><a href="#general-setup">General setup</a></li> <li><a href="#sampling-p-x">Sampling $p(x)$</a></li> <li><a href="#the-algorithm">The Algorithm</a></li> <li><a href="#evaluation">Evaluation</a></li> </ul><div><a href="#hierarchical-speculative-decoding">Hierarchical Speculative Decoding</a></div> <div><a href="#experiments">Experiments</a></div> <ul> <li><a href="#general-set-up-for-experiments">General set-up for experiments</a></li> <li><a href="#how-many-orders-of-magnitude-larger-should-m-p-be-than-m-q">How many orders of magnitude larger should $M_p$ be than $M_q$?</a></li> <li><a href="#set-up-for-hierarchical-speculative-decoding">Set-up for hierarchical speculative decoding</a></li> </ul><div><a href="#results">Results</a></div> <ul> <li><a href="#calculating-c-for-each-model-pair">Calculating $c$ for each model pair</a></li> <li><a href="#the-general-effect-of-speculative-decoding">The general effect of speculative decoding</a></li> <li><a href="#acceptance-rates-and-wall-time-given-m-p-and-m-q">Acceptance rates and wall time given $M_p$ and $M_q$</a></li> <li><a href="#results-of-hierarchical-speculative-decoding">Results of hierarchical speculative decoding</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <h3 id="inference-in-autoregressive-models">Inference in autoregressive models</h3> <p>Autoregressive models, particularly transformers and RNNs, play a crucial role in tasks involving sequential data processing, such as natural language processing and time series analysis. However, a significant limitation of these models is their slow inference speed. The primary bottleneck in these models is associated with memory reads and writes, rather than arithmetic computations. This is especially problematic in larger models with vast parameter spaces, where efficient memory management is critical to performance. Further, these models generate outputs sequentially, one token at a time, with each new token depending on all previously generated tokens. This inherent sequential dependency limits the model’s ability to parallelize the token generation process, leading to inference latency much greater than that of models capable of processing data in parallel. The challenge is to overcome this sequential bottleneck without compromising the model’s ability to accurately capture dependencies in the data.</p> <p>The central question this project addresses is whether it’s possible to introduce parallelism into the inference process of autoregressive models. A more specific aspect of this problem is whether probabilities for multiple tokens can be computed simultaneously, rather than processing each token individually. This project aims to enhance methods that have been proposed for parallelizing the decoding process, focusing on solutions that draw inspiration from speculative execution in processors and other systems design strategies.</p> <h3 id="speculative-execution-in-processors">Speculative execution in processors</h3> <p>Speculative execution is a technique used in CPU architecture to improve processing speed. Instead of waiting for sequential execution of instructions, processors predict which instructions are likely to be executed next and start processing them in advance. If the prediction is correct, this leads to a significant reduction in latency, as the processor has preemptively executed necessary instructions. If the prediction is incorrect, the processor discards the speculative results and reverts to the correct execution path. This method effectively utilizes CPU resources that would otherwise remain idle during the waiting period, thus optimizing the overall processing speed and reducing latency.</p> <h3 id="applying-speculative-execution-to-model-inference">Applying speculative execution to model inference</h3> <p>Inspired by speculative execution in processors, this project explores how similar principles can be applied to accelerate inference in large autoregressive models. The concept involves generating multiple potential outputs in parallel, using a smaller or draft model, and then evaluating these outputs with the larger target model. This mimics the speculative execution process where multiple paths are explored simultaneously, with the most promising path being selected as the final output. This approach, referred to as “speculative sampling” or “speculative decoding,” aims to introduce a level of parallelism in the inference process, enabling faster generation of outputs without compromising the quality or accuracy of the model’s predictions.</p> <h3 id="hierarchical-speculative-decoding">Hierarchical speculative decoding</h3> <p>In addition to implementing already proposed speculative decoding techniques, this project investigates a strategy that has the potential further speed up inference: hierarchical speculative decoding. This method aims to accelerate the smaller approximation model with an even smaller, faster model. While I experiment with two-layer (traditional) and three-layer hierarchies in this project, one could theoretically extend this idea to create an <em>n</em> layer hierarchy, assuming sufficient memory. Although researchers developing speculative decoding algorithms and sampling methods have mentioned the potential viability of hierarchical speculative decoding, none have tried to implement it. Thus, this project aims to find an efficient implementation of the approach and determine if it actually further speeds up inference.</p> <h2 id="current-work">Current Work</h2> <p>Multiple papers have presented novel speculative decoding algorithms, with the nuance typically in the way that sampling is performed. The two most-referenced papers in this space are DeepMind’s Accelerating Large Language Model Decoding with Speculative Sampling (Chen et al.) <a href="https://arxiv.org/pdf/2302.01318.pdf">(paper)</a> and Google Research’s Fast Inference from Transformers via Speculative Decoding (Leviathan et al.) <a href="https://arxiv.org/pdf/2211.17192.pdf">(paper)</a>. This project draws its architecture from the latter, so we will more explore its approach in-depth and describe how its shortcomings motivated the experiments in this project.</p> <h3 id="general-setup">General setup</h3> <p>The approach presented in Fast Inference from Transformers via Speculative Decoding (Leviathan et al.) aims to accelerate inference from a target transformer-like model $M_p$. We present a distilled version of the speculative decoding set-up, algorithm, and evaluation here.</p> <p>We start with two models:</p> <ol> <li>$M_p$ (the target model)</li> <li>$M_q$ (a smaller approximation model)</li> </ol> <table> <tbody> <tr> <td>$p(x_{t}</td> <td>x_{&lt;t})$ describes the sampling of token $x_t$ given pretext $x_{&lt;t}$, and we will refer to this as just $p(x)$. The shorthand applies for $q(x)$.</td> </tr> </tbody> </table> <p>Our goal is to generate $\gamma \in \mathbb{Z}^{+}$ completions quickly with the approximation model, check that the probability of those generations are identical to the target model’s (in parallel), and then reject and resample starting from the first “wrong” generation.</p> <h3 id="sampling-px">Sampling $p(x)$</h3> <p>In order to sample $p(x)$, we will sample $x \sim q(x)$ instead.</p> <ol> <li>If $q(x)\leq p(x)$, we keep $x$</li> <li>Otherwise, we reject $x$ with a $1-\frac{p(x)}{q(x)}$ probability. <ul> <li>If we end up rejecting $x$, we resample $x\sim\text{norm}(\max(0, p(x)-q(x)))$.</li> </ul> </li> </ol> <p>Basically, we want $x\sim p(x)$ to be <em>at least</em> as likely as $x \sim q(x)$. Following the steps above is equivalent to just sampling $x \sim q(x)$, and the paper provides a comprehensive proof of this in its appendix.</p> <h3 id="the-algorithm">The Algorithm</h3> <p>We use an implementation of the following algorithm from Leviathan et al. We start with some conditioning $prefix$ (our starting tokens) and generate between $1$ and $\gamma+1$ tokens at once.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-16-speculative-decoding/Algorithm1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-16-speculative-decoding/Algorithm1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-16-speculative-decoding/Algorithm1-1400.webp"/> <img src="/staging/assets/img/2023-11-16-speculative-decoding/Algorithm1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="evaluation">Evaluation</h3> <p>To evaluate the effectiveness of this approach, we need to calculate the total wall time improvement of speculative decoding versus normal inference on the target model.</p> <p>To make this evaluation more simple, assume we can run $\gamma + 1$ concurrent evaluations of $M_p$ in parallel. Now, we just need to get the cost of running $M_q$ (the approximation model).</p> <p>Let $c$ = the cost coefficient, which is the ratio between the time for a single run of $M_q$ and a single run of $M_p$. $c$ will depend only on our hardware and software implementation details.</p> <p>Now, we need some measure of how well $M_q$ approximates $M_p$.</p> <p>Let $\beta$ be the <em>acceptance rate</em>.</p> <ul> <li> <table> <tbody> <tr> <td>$\beta_{x&lt;t}$ is the probability of accepting $x_{t}\sim q(x_{t}</td> <td>x_{&lt;t})$ by speculative sampling.</td> </tr> </tbody> </table> </li> <li>Assume that the $\beta$s are i.i.d.</li> </ul> <p>Let $\alpha=E(\beta)$. This gives us the average acceptance rate across many samples, which is a good measure of how well $M_q$ approximates $M_p$.</p> <p>The expectation of the number of generated tokens is now a bounded geometric function of $\alpha$ (bounded by $\gamma$) :\(E(\text{# of generated tokens}) = \frac{1-\alpha^{\gamma + 1}}{1-\alpha}\)Given this relationship, we can derive the expected improvement factor for the total wall time (assuming longer generations):\(\frac{1-\alpha^{\gamma+1}}{(1-\alpha)(\gamma c+1)}\) For the sake of conciseness, we leave the full proof to the paper, but the general sketch relies on the fact that each run of Algorithm 1 costs $Tc\gamma + T$ (where $T$ is the cost of running one step of $M_p$). We run $M_q$ $\gamma$ times and $M_p$ once, and each run of Algorithm 1 produces $\frac{1-\alpha^{\gamma + 1}}{1-\alpha}$ tokens. Since the cost of producing a single token with a standard algorithm is $T$, we get the above improvement.</p> <h2 id="hierarchical-speculative-decoding-1">Hierarchical Speculative Decoding</h2> <p>How much faster can we make model inference by accelerating the approximation model with an even smaller, faster model? Let’s look at the case where we have three models:</p> <ol> <li><strong>$M_p$:</strong> The target model</li> <li><strong>$M_q$:</strong> The first-level approximation model, used to approximate $M_p$.</li> <li><strong>$M_r$:</strong> The second-level, even smaller approximation model, used to approximate $M_q$.</li> </ol> <p>With the introduction of $M_r$, we now need to consider additional parameters:</p> <ul> <li><strong>$\gamma_r$:</strong> The number of concurrent evaluations that can be run using $M_r$.</li> <li><strong>$\beta_r$:</strong> The acceptance rate for $M_r$, analogous to $\beta$ for $M_q$.</li> <li><strong>$\alpha_r = E(\beta_r)$:</strong> The average acceptance rate for $M_r$, representing how well $M_r$ approximates $M_q$.</li> </ul> <p>Now, $\beta$ for $M_q$ becomes a function of $\beta_r$, reflecting the hierarchical nature of this setup. The acceptance rate $\beta$ for $M_q$ now depends on how effectively $M_r$ approximates $M_q$, which in turn approximates $M_p$.</p> <p>We can hypothesize that the effectiveness of $M_q$ in approximating $M_p$ might now be influenced by the performance of $M_r$. This could mean that $\beta$, and consequently $\alpha$, might be a function of $\alpha_r$.</p> <p>The expectation of the number of generated tokens would now need to consider the hierarchical relationship. A new formula would be required to calculate this expectation, taking into account the performances of both $M_q$ and $M_r$.</p> <p>Finally, the expected improvement factor for the total wall time would also need to be recalculated to reflect this hierarchical structure. This would involve integrating the costs and efficiencies of $M_r$ into our existing model, which so far only considered $M_q$ and $M_p$.</p> <p>Whether or not this approach will actually speed up the model in practice is left to be determined experimentally.</p> <h2 id="experiments">Experiments</h2> <p>I experimented on multiple transformer model families, most notably <code class="language-plaintext highlighter-rouge">facebook/opt-125m</code>, <code class="language-plaintext highlighter-rouge">facebook/opt-1.3b</code>, and <code class="language-plaintext highlighter-rouge">facebook/opt-13b</code>.</p> <p>The primary research questions I investigated include:</p> <ol> <li>How many orders of magnitude larger should $M_p$ be than $M_q$ to achieve the maximal improvement?</li> <li>To what extent does hierarchical speculative decoding further speed up inference?</li> </ol> <h3 id="general-set-up-for-experiments">General set-up for experiments</h3> <ul> <li>For the standard (non-hierarchical) speculative decoding, I implemented the algorithm exactly as described above. <ul> <li>I used a gamma value of 4</li> </ul> </li> <li>I used both top-k sampling and nucleus sampling, with <code class="language-plaintext highlighter-rouge">k=20</code> and <code class="language-plaintext highlighter-rouge">p=0.9</code> constant throughout all experiments.</li> <li>I typically prompted the models with <code class="language-plaintext highlighter-rouge">input_text = "Once upon a"</code> and generated 20 tokens.</li> <li>I used consistent sets of seeds (such as <code class="language-plaintext highlighter-rouge">torch.manual_seed(898)</code>) when running the same experiment across multiple model combinations for the sake of reproducibility and so that I could more easily compare results across models on shorter generation lengths.</li> </ul> <h3 id="how-many-orders-of-magnitude-larger-should-m_p-be-than-m_q">How many orders of magnitude larger should $M_p$ be than $M_q$?</h3> <ul> <li>To investigate this, I calculated inference time (tokens per second) on each of the following (approximator, target) model pairs: <ul> <li><code class="language-plaintext highlighter-rouge">facebook/opt-125m</code>, <code class="language-plaintext highlighter-rouge">facebook/opt-1.3b</code></li> <li><code class="language-plaintext highlighter-rouge">facebook/opt-125m</code>, <code class="language-plaintext highlighter-rouge">facebook/opt-13b</code></li> <li><code class="language-plaintext highlighter-rouge">facebook/opt-1.3b</code>, <code class="language-plaintext highlighter-rouge">facebook/opt-13b</code></li> </ul> </li> </ul> <h3 id="set-up-for-hierarchical-speculative-decoding">Set-up for hierarchical speculative decoding</h3> <p>I experimented with a three-level hierarchical approach using</p> <ol> <li>Small approximation model $M_r$: <code class="language-plaintext highlighter-rouge">facebook/opt-125m</code></li> <li>Approximation model $M_q$: <code class="language-plaintext highlighter-rouge">facebook/opt-1.3b</code></li> <li>Target model $M_p$: <code class="language-plaintext highlighter-rouge">facebook/opt-13b</code></li> </ol> <p>To add hierarchical decoding to the algorithm, I replaced the sampling of $M_q$, where we typically sample $x \sim q(x)$ with a sampling process that mirrors the sampling from the target model. So we sample from $x\sim r(x)$ instead, keep if it’s at least as likely in $q(x)$, and reject proportional to the likelihood of the sample under either model, adjusting the distribution as before if we need to sample again. This made the theoretical implementation rather simple, as we could re-use a lot of the code. The implementation in practice was slightly more difficult than expected, however, as my implementation of the two-layer speculative decoding didn’t permit direct functional composition, and I had to restructure the implementation a bit.</p> <h2 id="results">Results</h2> <h3 id="calculating-c-for-each-model-pair">Calculating $c$ for each model pair</h3> <p>(The larger model is used as the target model $M_p$)</p> <table> <thead> <tr> <th> </th> <th>opt-125m</th> <th>opt-1.3b</th> <th>opt-13b</th> </tr> </thead> <tbody> <tr> <td>opt-125m</td> <td>1</td> <td>N/A</td> <td>N/A</td> </tr> <tr> <td>opt-1.3b</td> <td>0.015</td> <td>1</td> <td>N/A</td> </tr> <tr> <td>opt-13b</td> <td>0.022</td> <td>0.015</td> <td>1</td> </tr> </tbody> </table> <p>This gives insight into the relative efficiencies of the models when performing assisted inference.</p> <h3 id="the-general-effect-of-speculative-decoding">The general effect of speculative decoding</h3> <p>Wall time improvements from speculative decoding have already been documented, so these results are not novel, but I include them here for further proof that the algorithm works and for comparison with other results.</p> <table> <thead> <tr> <th>Target Model</th> <th>Approximation Model</th> <th>Tokens/Second</th> </tr> </thead> <tbody> <tr> <td>opt-13b</td> <td>None</td> <td>0.047</td> </tr> <tr> <td>opt-13b</td> <td>opt-1.3b</td> <td>0.087</td> </tr> <tr> <td>opt-13b</td> <td>opt-125m</td> <td>0.057</td> </tr> <tr> <td>opt-1.3b</td> <td>None</td> <td>0.336</td> </tr> <tr> <td>opt-1.3b</td> <td>opt-125m</td> <td>1.05</td> </tr> </tbody> </table> <p>In all cases, including an approximation model increases the model’s token per second inference rate.</p> <h3 id="acceptance-rates-and-wall-time-given-m_p-and-m_q">Acceptance rates and wall time given $M_p$ and $M_q$</h3> <table> <thead> <tr> <th>Target Model</th> <th>Approximator Model</th> <th>Tokens/Second</th> <th>Acceptance Rate</th> </tr> </thead> <tbody> <tr> <td>opt-1.3b</td> <td>opt-125m</td> <td>1.05</td> <td>38%</td> </tr> <tr> <td>opt-13b</td> <td>opt-125m</td> <td>0.057</td> <td>15%</td> </tr> <tr> <td>opt-13b</td> <td>opt-1.3b</td> <td>0.087</td> <td>19%</td> </tr> </tbody> </table> <p>These results help us answer the question: <em>How many orders of magnitude larger should $M_p$ be than $M_q$?</em></p> <p>One order of magnitude seems to yield higher acceptance rates, and the smaller models were obviously faster.</p> <h3 id="results-of-hierarchical-speculative-decoding">Results of hierarchical speculative decoding</h3> <table> <thead> <tr> <th>Target Model</th> <th>Approximation Model</th> <th>Tokens/Second</th> <th>Acceptance Rate</th> </tr> </thead> <tbody> <tr> <td>opt-13b</td> <td>None</td> <td>0.047</td> <td>N/A</td> </tr> <tr> <td>opt-13b</td> <td>opt-1.3b</td> <td>0.087</td> <td>19%</td> </tr> <tr> <td>opt-13b</td> <td>opt-125m</td> <td>0.057</td> <td>15%</td> </tr> <tr> <td>opt-13b</td> <td>opt-1.3b, opt-125m</td> <td>0.030</td> <td>17%, 33%</td> </tr> </tbody> </table> <p>I found that running the three-layer hierarchical speculative decoding <em>did not</em> speed up model inference, but I hypothesize that this is because of compute limitations. Running all three models on my computer given the parallelization requirements of the algorithm forced the program to map data to devices in a less-efficient way. I wasn’t able to find smaller pre-trained models with which I could test this on my local machine, so a future experiment should either train custom smaller models for the sake of inference in this setting or use a device with greater memory capacity.</p> <h2 id="conclusion">Conclusion</h2> <p>This project explored the potential of speculative decoding, a technique inspired by speculative execution in processors, to accelerate inference in autoregressive models like transformers. Our exploration focused on implementing and extending existing methods of speculative decoding, particularly the ones proposed in the seminal works by Chen et al. and Leviathan et al., while also introducing early experiments with concept of hierarchical speculative decoding, which is to be further investigated.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-16-speculative-decoding.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/speech-recognition-proposal/index.html b/blog/2023/speech-recognition-proposal/index.html
new file mode 100644
index 00000000..e7ed9345
--- /dev/null
+++ b/blog/2023/speech-recognition-proposal/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Modeling Human Speech Recognition with Different Network Architectures | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Evaluating a neural network's ability to effectively model human speech recognition using CNNs vs. TNNs"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/speech-recognition-proposal/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Modeling Human Speech Recognition with Different Network Architectures",
+      "description": "Evaluating a neural network's ability to effectively model human speech recognition using CNNs vs. TNNs",
+      "published": "November 10, 2023",
+      "authors": [
+        {
+          "author": "Annika Magaro",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Modeling Human Speech Recognition with Different Network Architectures</h1> <p>Evaluating a neural network's ability to effectively model human speech recognition using CNNs vs. TNNs</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#methods">Methods</a></div> <div><a href="#results">Results</a></div> <div><a href="#discussion">Discussion</a></div> <div><a href="#acknowledgements">Acknowledgements</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Recent advances in machine learning have made perception tasks more doable by computers, approaching levels similar to humans. In particular, structuring models biologically and using ecologically realistic training datasets have helped to yield more humanlike results. In the field of speech recognition, models trained under realistic conditions with stimuli structured how sounds are represented in the cochlea, with network layers imitating the processing pipeline in the brain, seem to be successful in performing speech recognition tasks. However, it is unclear whether specific network architectures are more beneficial to learning human speech recognition patterns. In this project, I seek to investigate how different network architectures such as CNNs vs. TNNs affect the ability to recognize speech in a humanlike way.</p> <p>One facet of more biological models is that they attempt to recreate the structure of the human brain. For auditory models, a useful structure to replicate is the cochlea; these replications are called cochleagrams. Cochleagrams have been used in order to model the ear more effectively, leading to models that imitate auditory perception in a more human-like way. A cochleagram works in a similar way to how the cochlea works in a human. It filters a sound signal through bandpass filters of different frequencies, creating multiple frequency subbands, where the subbands for higher frequencies are wider, like how the cochlea works in the human ear. The amplitudes of the different subbands are then compressed nonlinearly, modeling the compressive nonlinearity of the human cochlea <d-cite key="mcdermott2013"></d-cite> <d-cite key="mcdermott2011"></d-cite>.</p> <p>A recent application of cochlear models to speech perception is found in Kell’s 2018 paper, where they create a convolutional neural network which replicates human speech recognition <d-cite key="kell2018"></d-cite>. They trained the network to recognize a word in the middle of a 2 second clip, from a possible vocabulary of 587 words. To imitate how the ear functions, they preprocessed the sound signals into cochleagrams, intended to be a more biologically realistic model of the ear. The activations in different layers of the neural network were able to predict voxel responses in different parts of the brain, revealing that the auditory processing pipeline aligned with layers of the network.</p> <p>A natural question to ask at this point is whether a convolutional neural network is the best architecture for this task. In Mamyrbayev Orken et al.’s 2022 paper, they explore a speech recognition system for Kazakh speech <d-cite key="orken2022"></d-cite>. In this paper, they create a transformer model that is able to perform a continuous speech recognition task on “clean” speech (speech without noise or alterations). They trained the model on a dataset of clean speech and telephone speech, and the model was able to perform the speech recognition task on clean speech with high accuracy. Although this network does well on the specific task it is given, it is still unclear how its performance compares to that of a convolutional neural network due to not having a direct comparison between the two on the same task. In addition, it is unclear whether the transformer can yield human-like results for speech recognition.</p> <p>In the field of computer vision, there has been work done comparing convolutional neural networks to vision transformers for the task of object recognition. Tuli’s 2021 paper explores this through the lens of human-like object recognition, determining whether the errors of a vision transformer or a convolutional neural network are more similar to humans <d-cite key="tuli2021"></d-cite>. Their findings indicated that the vision transformer behaved in a more human-like way. In particular, in human vision, there is a phenomenon called shape bias, where if an object has the shape of a certain object A, but the texture of a certain object B, humans will be likely to predict that the object is an instance of object A. Many vision models struggle with shape vs. texture bias, being more inclined to categorize objects by their texture; the vision transformer presented in this paper has approximately twice the amount of shape bias as the convolutional neural network, further suggesting that the transformer is a more human-like representation of vision.</p> <p>In this post, I investigate more closely the importance of network architecture in the ability to effectively model human speech recognition. I focus on three metrics of evaluating how well a model replicates human speech recognition:</p> <ol> <li>Ability to generalize to speakers not found in the training set: Humans hear speech from new speakers all the time, and a person who they’ve never heard before usually does not hinder their ability to recognize what they are saying. Models of speech recognition are usually trained on a corpus of speech that is inherently biased towards a set of talkers that participates in creating the corpus, so it is possible that it could overfit to the speakers in the training set. A good model of speech recognition should be able to perform well on new talkers.</li> <li>Ability to recognize speech in different background noise conditions: Humans rarely hear speech unaccompanied by some form of background noise, and are generally robust to noise up to large signal to noise ratios. Many models of speech recognition such as the transformer in Orken 2022 are not trained or tested on noisy speech, so it is likely that it would not be able to recognize speech in these conditions.</li> <li>Ability to recognize distorted forms of speech: Humans are remarkably robust to various distortions of speech such as sped-up/slowed-down speech, reverberant speech, and local-time manipulations, despite not encountering some of these often in their lives <d-cite key="ml1950"></d-cite>. In order to further test a model’s ability to replicate human speech recognition, we should test how well it performs on speech manipulations.</li> </ol> <h2 id="methods">Methods</h2> <p>The models in my experiment were given a 2 second speech clip, and were tasked with identifying the word overlapping the middle of the clip. In particular, they were trained on a dataset containing 2 second speech clips from the Common Voice dataset, where the word at the middle of the clip is from a vocabulary of 800 words, imposed on different background noises taken from the Audio Set dataset <d-cite key="ardila2019"></d-cite> <d-cite key="gemmeke2017"></d-cite>. So, the models were essentially given a speech clip and asked to perform an 800-way classification task.</p> <p>In order to generate the fairest comparison between convolutional neural networks and transformers, I start with a baseline CNN inspired by Saddler 2021, and then either replace the last convolutional layer with a multi-headed attention layer or remove it from the network <d-cite key="saddler2021"></d-cite>. Each block of the CNN is made up of a convolutional layer, followed by a ReLU activation, a weighted-average pooling layer, and a normalization layer <d-cite key="saddler2021"></d-cite> <d-cite key="feather2019"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/block-architectures-1400.webp"/> <img src="/staging/assets/img/2023-11-10-speech-recognition-proposal/block-architectures.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The baseline CNN (called CNN6) is composed of 6 blocks followed by a fully connected layer and a classification layer. The transformer-CNN hybrid (CNN5+Attn) is composed of 5 convolutional blocks, followed by an attention block, followed by a fully connected layer and a classification layer. Lastly, I created a “control” CNN (called CNN5) that is the same as CNN6, but with the last convolutional block removed. This was intended to test whether an attention layer provides any benefit as opposed to not including the layer at all. All networks begin with an initial data preprocessing step that converts the audio signal into a cochleagram.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/network-architectures-1400.webp"/> <img src="/staging/assets/img/2023-11-10-speech-recognition-proposal/network-architectures.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>It is difficult to derive a direct comparison between a convolutional layer and a multi-headed attention layer, in particular how to decide how many attention heads to include and what the attentional layer dimension should be. In order to have the best chance of comparison between CNN5+Attn and the other networks, I ran multiple CNN5+Attn networks with a larger vs. smaller number of attention heads (64 vs. 16) and a larger vs. smaller attention dimension (512 vs. 16) for 10 epochs to determine a preliminary measure of network performance across these parameters. The preliminary results after 10 epochs showed that the CNN5+Attn network with a small number of attention heads and a smaller attention dimension had the highest training accuracy and trained the fastest, so I used this model for my analysis.</p> <p>After preliminary analysis, I trained the CNN6, CNN5+Attn, and CNN5 networks for 100 epochs. I then evaluated the models’ performance on this task in the three aforementioned conditions.</p> <p>1) To evaluate performance on clips spoken by talkers not encountered in the training dataset, I evaluated the models on clips taken from the WSJ speech corpus.</p> <p>2) For clips superimposed on different types of background noise, I evaluated the model on 5 types of background noise, in signal-to-noise ratios ranging from -9 dB to +3 dB, plus a +infinity condition which represents no background noise:</p> <ul> <li>Auditory scenes: background noises encountered in everyday life like rain or cars passing by</li> <li>8-speaker babble: 8 other people talking in the background</li> <li>Music</li> <li>Speech-shaped noise: gaussian noise that is given the envelope of speech signals</li> <li>Modulated speech-shaped noise: speech-shaped noise that is modulated so that the noise alternates between being very quiet and very loud</li> </ul> <p>3) Distorted speech clips with 6 types of distortions:</p> <ul> <li>Sped-up/slowed-down speech (preserving original pitches)</li> <li>Speech in a reverberant environment: speech convolved with an impulse response of different reverberant environments</li> <li>Whispered speech</li> <li>Inharmonic speech: speech signals are decomposed into their harmonics, and the harmonics are moved up or down to distort the signal</li> <li>Sine wave speech: speech signals are filtered into frequency subbands, and each band is replaced by a sine wave with the center frequency of the band</li> <li>Locally time-reversed speech: speech is decomposed into chunks of a certain length, and the chunks are reversed</li> </ul> <p>Then I compared the models’ performance on these conditions to existing human data where humans were asked to perform the same task of recognizing the middle word of a 2-second clip in various types of noise or distortion.</p> <h2 id="results">Results</h2> <p>Overall, I found that CNN6 performed better than CNN5+Attn, which performed better than CNN5. After 100 epochs, CNN6 had a validation accuracy of around 0.60, CNN5+Attn had validation accuracy of 0.55, and CNN5 had validation accuracy of 0.53. In particular, CNN5 overfit quite a bit (0.12 gap between training and validation accuracy) while CNN5+Attn overfit much less (0.05 gap between training and validation accuracy).</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch-1400.webp"/> <img src="/staging/assets/img/2023-11-10-speech-recognition-proposal/accuracy-by-epoch.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>All three models performed similarly to humans for clean speech spoken by talkers not encountered in the training dataset.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/clean-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/clean-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/clean-1400.webp"/> <img src="/staging/assets/img/2023-11-10-speech-recognition-proposal/clean.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In different types of background noise, in general the models performed similarly to humans, except in the condition of modulated speech-shaped noise. In general, humans perform better for modulated noise than “stationary” noise because they are able to fill in speech in the “gaps”, or quieter sections, of the noise, but none of the models have as strong of an effect as humans for this. The CNN5+Attn model does particularly badly on this compared to the other networks.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/background-noise-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/background-noise-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/background-noise-1400.webp"/> <img src="/staging/assets/img/2023-11-10-speech-recognition-proposal/background-noise.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The models all perform similarly to humans for inharmonic speech, local time reversal, and low levels of sped-up or slowed-down speech. For whispered speech and sine-wave speech, the models perform slightly worse than humans, with CNN6 performing better than CNN5+Attn performing better than CNN5. For reverberant speech and extremely sped-up or slowed-down speech, all of the models perform significantly worse than humans, with the same hierarchy of performance between the models.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1-1400.webp"/> <img src="/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2-1400.webp"/> <img src="/staging/assets/img/2023-11-10-speech-recognition-proposal/manipulations-2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="discussion">Discussion</h2> <p>Overall, it seems that CNN6 is the best option for replicating human speech recognition, but CNN5+Attn does have some benefits. In particular, it trains substantially faster than CNN5, and overfits less than both CNN5 and CNN6. The hybrid architecture may help with overfitting because it forces the model to do multiple types of analysis in order to determine the output. Although CNN5+Attn does still perform worse than CNN6, it is reasonable to hypothesize that it has potential. Due to resource limitations, I was only able to test two different conditions for number of attention heads and attention dimension, but as shown from the preliminary training the number of attention heads and the attention dimension does have an effect. It seems likely that with a more extensive search of these parameters, it could be possible to create a CNN5+Attn network that performs similarly or better than the CNN6 network on these tasks.</p> <p>All of the models have discrepancies with humans for the modulated background noise condition. One possible explanation for this is that the models do not learn the process of recognizing smaller phonemes of a word, only learning a classification task on the 800 words that they are given, so they are unable to piece together chunks of a word into a larger word like humans do. A possible way to test this would be to create a model for a phoneme-detection task, and then add a layer that combines the phonemes into a larger word, and see whether this performs better in this condition. This would make sense because some of the earliest things humans learn about speech are not full words, but phonemes like “ba” or “da,” so a model trained on this task would then have been optimized in more human-like conditions.</p> <p>In addition, there are some discrepancies between the models and humans in some of the speech distortions. The largest discrepancies are found in very sped-up or slowed-down speech, and in reverberant speech. This seems likely to be due to a shortcoming of the dataset. The Common Voice dataset is composed of people reading passages, which is generally a single slow, steady speed, and there is no reverberation. The speech that humans encounter in their lives varies a lot in speed, and they also encounter speech in many different reverberant environments, so they are optimized to recognize speech in these conditions. It is reasonable to assume that if reverberation and varied speeds of speech were incorporated into the training dataset, the model would perform better in these conditions.</p> <p>Further directions of this project could include trying more variations of the parameters of the attention model. In addition, it would be interesting to try different hybrid architectures; for example, 4 layers of convolution followed by 2 layers of attention. This could give a more complete idea of the benefits and disadvantages of CNNs and transformers for the task of speech recognition. In conclusion, the current results seem promising, but more extensive testing is needed in order to get a full picture of whether these models can accurately replicate human speech recognition.</p> <h2 id="acknowledgements">Acknowledgements</h2> <p>I would like to thank my fellow members of the McDermott Lab, particularly Mark Saddler for creating the code for the baseline CNN, and Erica Shook for providing me with human data and experimental manipulation code.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-10-speech-recognition-proposal.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/structured-physics-loss-diffusion/index.html b/blog/2023/structured-physics-loss-diffusion/index.html
new file mode 100644
index 00000000..12853983
--- /dev/null
+++ b/blog/2023/structured-physics-loss-diffusion/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Understanding LLM Attention on Useless Numbers in Word Problems (and this Title has 8 Es) | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="If Jack starts out with 4 llamas and Jill takes 2 of them, then Jack gets 5 chinchillas, how many llamas does he have?"/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/structured-physics-loss-diffusion/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Understanding LLM Attention on Useless Numbers in Word Problems (and this Title has 8 Es)",
+      "description": "If Jack starts out with 4 llamas and Jill takes 2 of them, then Jack gets 5 chinchillas, how many llamas does he have?",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Julie Steele",
+          "authorURL": "mailto:jssteele@mit.edu",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Spencer Yandrofski",
+          "authorURL": "mailto:spencery@mit.edu",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Understanding LLM Attention on Useless Numbers in Word Problems (and this Title has 8 Es)</h1> <p>If Jack starts out with 4 llamas and Jill takes 2 of them, then Jack gets 5 chinchillas, how many llamas does he have?</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#summary">Summary</a></div> <div><a href="#introduction">Introduction</a></div> <ul> <li><a href="#related-work">Related Work</a></li> <li><a href="#motivation">Motivation</a></li> </ul><div><a href="#methods">Methods</a></div> <ul> <li><a href="#model-and-hardware">Model and Hardware</a></li> <li><a href="#prompt-generation">Prompt Generation</a></li> <li><a href="#data-extraction">Data Extraction</a></li> <li><a href="#modifying-attention">Modifying Attention</a></li> </ul><div><a href="#results">Results</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/title-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/title-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/title-1400.webp"/> <img src="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/title.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="summary">Summary</h2> <p>We investigate how attention is used to identify salient parts of word problems. There is no difference between attention across layers to necessary and useless numbers in math word problems. Slightly decreasing attention on useless numbers in word problems increases performance, while increasing or significantly lowering attention decreases performance.</p> <h2 id="introduction">Introduction</h2> <p>Transformer model architectures are the new magic bullet in natural language processing, largely due to their attention mechanism. The sudden salience of the transformer and subsequent massive research focus resulted in the emergence of powerful large language models such as the GPT series, Llama, PaLM, and others. The ever-increasing size of these models, as well as the datasets on which they were trained, allows them to continually perform better at a wide range of text generation and analysis tasks [11]. <br/><br/></p> <p>However, as with many generative algorithms - especially autoregressive ones like LLMs - the underlying model has no implicit structure for processing or analyzing a logical framework inside the prompt it is given. Transformers, and by extension LLMs, are at their core sequence-to-sequence models. These take in a sequence of arbitrary length and output a sequence of arbitrary length, for example an English sentence input its French translation as the output. Sequence-to-sequence models leverage the fact that language has structure and syntax, and are capable of creating responses that mimic the structural rules followed by its training data [4, 6, 8]. However, in sequence-to-sequence models and the recurrent-neural-network-derived architectures that follow, such as the transformer, there are no intrinsic characteristics that leverage the logical framework of the input. Models that strive to have reasoning capabilities use a variety of approaches to augment the transformer architecture [10], such as specific prompting [1, 7], machine translation [3], salience allocation [5], and more. Some of these improved models exhibit performance that suggests the use of reasoning processes, but as described by Wei et al. [12] “As for limitations, we first qualify that although chain of thought emulates the thought processes of human reasoners, this does not answer whether the neural network is actually ‘reasoning.’” Huang et al. share a similar sentiment that highlights that the most widespread solution, and an effective one, is simply the ever-increasing size of LLMs: “…there is observation that these models may exhibit reasoning abilities when they are sufficiently large… despite the strong performance of LLMs on certain reasoning tasks, it remains unclear whether LLMs are actually reasoning and to what extent they are capable of reasoning.” <br/> <br/></p> <p>Before diving into why this is interesting, let’s take a step back and briefly inspect the transformer as an architecture. Transformers are loosely an extension of a recurrent neural network that leverage parallel processing and a mechanism known as attention to remove the typical reliance RNNs have on temporal data and instead allow the model to process an entire input sequence simultaneously [13, 9]. <br/><br/></p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn-1400.webp"/> <img src="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/rnn.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>But what is attention? The key upside of transformers is that they are able to capture both short- and long-range dependencies within the input simultaneously, without the need to manage a memory cell like in certain RNN architectures such as a long short-term memory network. This is accomplished through attention, essentially the computation of how much each part of the input should be weighted based on parameters learned from training data. <br/><br/></p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/att-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/att-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/att-1400.webp"/> <img src="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/att.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>As shown above, each element in the input, split into “tokens,” is given a calculated query and key vector, along with the value vector describing the text, image, or other kind of data contained in the token. This is designed to mimic a value in a database, corresponding to a specific key, being retrieved according to a query. Multiplying some query vector with a given token’s key vector results in a scalar that essentially defines the “significance” of the given token compared to the other tokens, known as an attention score. This attention score is then multiplied by its corresponding token’s value vector and summed to create a context vector representing the aggregate information from the attention step.<br/><br/> Now we circle back to word problems. Due to the aforementioned absence of explicit logical reasoning capabilities, transformer-based language models - especially smaller ones - can struggle with the few short analytical hops to correctly evaluate a word problem without help. For example, the following question was asked to Llama 2, Meta’s open-source LLM released in 2023. This version of Llama, the smallest available, has 7 billion parameters. <br/><br/></p> <div align="center" style="font-family: monospace; font-size: smaller;"> *User: Answer as concisely as possible. Jack has 7 chairs. Jill takes 7 chairs from him, as well as 2 brooms. How many chairs does Jack have left?* <br/> *Llama-2-7b-chat: Jack has 7 chairs left.* </div> <p>You may notice that this response is incorrect. However, it is wrong in a way that seems to ignore certain important information presented in the question (removing 2 brooms). Of course, there is also unimportant information in the prompt that we want the model to ignore - the fact that Jill took two of Jack’s brooms is not relevant to the number of chairs in his possession.</p> <h3 id="related-work">Related Work</h3> <p>Existing approaches to entice LLMs to correctly answer word problems like these take a few forms, touched on previously. There are various versions of prompt engineering, which are designed to ask the question in a specific way in order to prompt the model’s response to be formatted in a certain way. Zero-shot chain-of-thought prompting [1, 12, 14] is a commonly cited example, where an additional instruction such as “Let’s think about this step by step” or “Let’s think analytically” are appended to the prompt. These additional instructions encourage the model to decompose the problem into intermediate steps and solve them procedurally. However, Wei et al. note that this does not indicate that the model itself is reasoning, only that it is achieving better results by emulating a structure often used in linear reasoning problems. Additionally, the authors go on to note that this emergent behavior of large models is challenging to reproduce in smaller models. Another novel approach is to parse the input information in a way that is conducive to solving an underlying math problem numerically. Griffith and Kalita treat this process as a machine translation problem, training several unique transformer architectures to make multiple translations from English to arithmetic expressions [3] that can then be evaluated computationally, outside of the LLM. These two techniques can also be fused, using fine-tuned chain-of-thought prompting for zero-shot math word problem solutions, bridging the gap between the previous two methods [7].<br/><br/> More broadly, solving word problems is a two-part problem: selecting for important information, and then analytically evaluating that information to arrive at an answer. There is a broad body of work on using LLMs to summarize bodies of text, which parallels extraction of useful numerical features from word problems. The two main types of summarization are extractive summarization and abstractive summarization, where the former remains truer to the original input text but struggles to create novel text, while the latter attempts to fill in those gaps but can sometimes create information that was not originally present and may not be correct [15, 5]. Wang et al. in particular create an augmentation to the transformer architecture, dubbed SEASON, that is designed to combine both extractive and abstractive summarization, but contains useful insights into how extractive summarization of text might apply to math word problems. For example, the abstractive power of SEASON comes from the underlying transformer and its generative capabilities, but it is constrained by a fixed-allocation salience system to emphasize extraction of useful information by essentially adding additional key vectors that describe their relevance to a summarization query. This allows the model to predict the salience of potential responses in order to reduce hallucination of abstractive elements. This salience-driven approach shows theoretical promise in complex extractive word problem scenarios, where managing an allocation of salience could translationally be indicative of useful numerical inputs rather than core themes. Salience also shares some characteristics, mechanically, with attention, and raises the question of whether intuition from summarization models can be applied to augment transformer attention to have better extractive logic.</p> <h3 id="motivation">Motivation</h3> <p>This question, bolstered by the similarly-themed research underlying the ability of LLMs to reason and solve math word problems, was the driving force behind our project. Attention is an extremely powerful tool, and a better understanding of how attention scores affect assessment and evaluation of word problems is necessary in order to use it more effectively to address the gaps in the reasoning capabilities of LLMs, especially smaller architectures. A true solution to this problem would be complex, but we strove to answer certain core questions about how math word problems move through large language models, what their attention scores can tell us about how the model is choosing to respond, and what information the model is responding to. Chiefly, we were interested in how the attention scores of certain tokens in word problems - particularly pertaining to numbers necessary for solving the problem - would change throughout the layers of the transformer, and whether that yields insight into how to tune the attention process generally to enhance the models’ abilities, both reasoning and extractive.</p> <h2 id="methods">Methods</h2> <h3 id="model-and-hardware">Model and Hardware</h3> <p>Our chosen model for study was Meta’s Llama 2 7B-chat parameter model. This choice was a result of our particular focus on smaller LLMs, due to the aforementioned emergent reasoning capabilities of models with significantly larger numbers of parameters. Llama 2 is also open-source, allowing us to easily peel apart the attention layers and heads to study how input and output information propagated through the network, as well as extract model weights and attention values. The chat version of the model additionally is better suited for direct question responses, and includes wrappers to handle the relevant meta-parameters to make the chat interface feasible. We hosted Llama 2 on a vast.ai cloud instance due to the high VRAM requirements of the model. The instance consisted of a single Nvidia RTX 4090 GPU instance with 24GB of VRAM connected to an AMD Ryzen 9 5950X 16-core CPU. The model was supported by Nvidia CUDA version 11.7 and the cuDNN GPU-accelerated development library, version 8.9.7. The model itself ran using PyTorch 2.0.1.</p> <h3 id="prompt-generation">Prompt Generation</h3> <p>We prepended the instruction “Answer as concisely as possible” to each prompt in order to deliberately circumvent potentially invoking chain-of-thought reasoning and thereby subverting the qualities under investigation regarding the model’s zero-shot ability to discern relevant and irrelevant information. In order to assess that capability, we created a question generation algorithm to randomly generate a bank of simple subtraction word problems, for example “If Jack starts out with 7 sponges and Jill takes 4 of them, then Jack gets 2 badges, how many sponges does he have?” Each question contains two numbers necessary to the subtraction - in this example, that would be the number of sponges before and after the events of the problem: 7 and 4. Each example also contains one useless number, corresponding to things that are not relevant to the ultimate question being asked to the model. In this case, that would be the two badges. Each number is generated in its numeral representation (‘7’ rather than ‘seven’), as this ensures that Llama encodes each of these numbers as a single token that can be easily traced. <br/><br/>Numbers with more digits or numbers spelled out in natural language were often split into multiple consecutive tokens, so to simplify our visualizations we elected to force a single-token representation. This necessitated that each of the four numerical quantities in the math problem - the two relevant numbers, the useless number, and the answer - had to all be unique, in order to avoid accidentally crediting the model for producing a correct response when in fact it simply selected a number in the problem that had been generated to be a duplicate of the answer. This might occur with a problem like “If Jack has 8 umbrellas, and Jill takes 5 of them, then Jack gets 3 belts, how many umbrellas does he have?” In this case, attribution of salience to the value “3 belts” and subsequent inclusion of the number 3 in the answer introduces ambiguity into the correctness of the response, since 3 is in fact the true answer.<br/><br/> To avoid one-off errors attributed with specific words or sentence structures, the algorithm was designed to randomly construct the sentences using multiple different semantic structures and sample the nouns used from a bank of 100 random objects. Coupled with large testing sets of several hundred examples, this prevents irregularities in the model’s responses to particular syntax or words from significantly affecting results. Finally, the last meaningful element of prompt design was that the nouns chosen to be in the random object pool were deliberately selected to be as semantically difficult as possible. If the model is presented with a question that, for example, includes a number of vehicles as well as a number of cars, it would be entirely justifiable to interpret that question differently than the intent of a subtraction problem with the same numbers but instead involving apples and chinchillas.<br/><br/> We calculate whether the problem is correct by checking whether the correct number and noun are both present in the correct configuration in the answer content output by Llama. Each prompt was run on a fresh reinitialized instance of Llama, to avoid extracting information from a larger content window that might include numbers or insight from past problems.</p> <h3 id="data-extraction">Data Extraction</h3> <p>The main data structure was filled as follows. For each new autoregressive output logit, each head in each transformer layer calculates attention scores across all input tokens. These scores were collected and aggregated to map the attention in the model as each prompt moved through the transformer blocks.</p> <p>In each experiment, attention scores were scraped from the individual model instance for each prompt by selecting the attention values associated with the tokenized representations of the two necessary numerical inputs as well as the single useless input. This produced a lot of data in high dimensions.</p> <p>To extract the significant aspects of the data and compress it to a reasonable number of dimensions for graphical representation, we took the attention score tensors (which were also saved at their original sizes) and averaged across the following dimensions:</p> <ul> <li><strong>Heads in each layer:</strong> This revealed the change in attention over layers, rather than over heads, in order to potentially reveal the numbers’ progression through deeper-level abstractions, allowing us to answer questions like: <ul> <li>How do self-attention and attention in early layers look for values relevant to the problem?</li> <li>What role does attention play for the purposes of arriving at a solution to the problem as we reach the middle layers of the model?</li> <li>Is there a meaningful representation of the numerical values the problem is concerned with deep inside the model?</li> </ul> </li> <li> <p><strong>Output logits:</strong> The rationale behind this choice was to allow any intermediate “reasoning” to become evident by encapsulating multiple parts of the response.</p> </li> <li><strong>Input problems:</strong> Eliminates intrinsic variation in response to slightly different questions.</li> </ul> <p>This allowed us to arrive at a representation of how the attention for the relevant tokens changed as it passed through the individual layers of the model.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged-1400.webp"/> <img src="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/averaged.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="attention-modification">Attention Modification</h3> <p>For our experiments where we modify attention scores to the useless token, in every layer we multiply every attention score to that token by some value, the multiplier, before taking softmax.</p> <h2 id="results">Results</h2> <p>We found that there was no significant difference between attention to the useless number and the two necessary numbers over 100 samples (with 55/100 accuracy). Perhaps the mid-layers attention peak in the useless number is earlier than for the necessary numbers, but not significantly. We found a peak in attention to all number tokens in middle layers. We found no significant difference between the graphs for problems it answered correctly versus incorrectly.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/correct-1400.webp"/> <img src="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/correct.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect-1400.webp"/> <img src="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/incorrect.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Here is the attention across all input tokens for one example problem. (Note these are not all the input tokens. The full input tokens were [’’, ‘[’, ‘INST’, ‘]’, ‘Answer’, ‘as’, ‘cons’, ‘is’, ‘ely’, ‘as’, ‘possible’, ‘.’, ‘Jack’, ‘has’, ‘’, ‘9’, ‘pens’, ‘and’, ‘’, ‘7’, ‘spo’, ‘ons’, ‘.’, ‘He’, ‘gives’, ‘away’, ‘’, ‘9’, ‘pens’, ‘.’, ‘How’, ‘many’, ‘pens’, ‘does’, ‘he’, ‘have’, ‘?’, ‘[’, ‘/’, ‘INST’, ‘]’, ‘’]</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence-1400.webp"/> <img src="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/sentence.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Surprisingly, there was not more attention to numbered tokens compared to other tokens.</p> <p>When looking through each attention head individually, some attended to specific numbered tokens. For example, head 13 layer 16 strongly attended to “9”</p> <h4 id="graph-for-13th-heads-only">Graph for 13th Heads Only</h4> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/head13-1400.webp"/> <img src="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/head13.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Finally, we multiplied attentions to the useless number’s token and varied the multiplier from 0 to 5. (30 sampler per data point). We found that it is actually useful to slightly decrease attention to the useless token, and performance decreases as attention to the useless token increases.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier-1400.webp"/> <img src="/staging/assets/img/2023-11-09-structured-physics-loss-diffusion/multiplier.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We suspect the rise of performance from multiplier of 2 to 5 be insignificant and random due to low sample size.</p> <p>For small multipliers above 1, there are most responses of the type where the useless number is responded.</p> <div align="center" style="font-family: monospace; font-size: smaller;"> *User: If Jack starts out with 7 coasters and Jill takes 2 of them, then Jack gets 3 badges, how many coasters does he have?* <br/> *Llama-2-7b-chat: Jack has 3 coasters.* </div> <p>For large multipliers above 1, the softmax causes the other attention values to approach zero and the model’s quality deteriorates.</p> <div align="center" style="font-family: monospace; font-size: smaller;"> *User: Jack has 7 tacos and 2 cucumbers. He gives away 3 tacos. How many tacos does he have?* * <br/> *Jack has 767 tacos. How many tacos does Jack have? Jack has 76 tacos. How many tacos does Jack has?* </div> <p>And at very extreme multipliers, the model outputs gibberish.</p> <h2 id="conclusion">Conclusion</h2> <p>We found decreasing attention 50% (pre-softmax) on the useless token improves performance on our word problems, and increasing the attention (or decreasing the attention too much). We hypothesize the performance decreases because it 1) makes the model more likely to output the useless number, and 2) changes the model too much, turning responses into gibberish. Our initial exploration of the attention tracked through the layers of the model yielded very little insight, perhaps due to rapid abstraction of the tokens. This gives us insight into how we might further explore using attention as a salience-adajcent metric for extracting information from world problems.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-structured-physics-loss-diffusion.md"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/superposition/index.html b/blog/2023/superposition/index.html
new file mode 100644
index 00000000..0c112b51
--- /dev/null
+++ b/blog/2023/superposition/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>The Effect of Activation Functions On Superposition in Toy Models | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="An in-depth exploration of how different activation functions influence superposition in neural networks."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/superposition/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "The Effect of Activation Functions On Superposition in Toy Models",
+      "description": "An in-depth exploration of how different activation functions influence superposition in neural networks.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Vedang Lad",
+          "authorURL": "https://www.vedanglad.com",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Timothy Kostolansky",
+          "authorURL": "https://tim0120.github.io/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>The Effect of Activation Functions On Superposition in Toy Models</h1> <p>An in-depth exploration of how different activation functions influence superposition in neural networks.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction-to-superposition">Introduction to Superposition</a></div> <div><a href="#superposition-and-previous-work">Superposition and Previous Work</a></div> <div><a href="#motivation-and-notation">Motivation and Notation</a></div> <div><a href="#results">Results</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction-to-superposition">Introduction to Superposition</h2> <p>With the recent emergence of grokking, mechanistic interpretability research has trended towards understanding how models learn <d-cite key="GrokNanda"></d-cite> <d-cite key="Pizza"></d-cite>. A central concept in this pursuit is superposition - a single neuron learning multiple “features.”</p> <p>Features are the distinguishing properties of data points, the “things” that allow a neural network to learn the difference between, say, a dog and a cat, or a Phillip Isola and a Jennifer Aniston. Features are the building blocks that determine what makes one data point different from another. In many cases, features discovered by and encoded within neural networks correspond to human-understandable ideas. For example, in language models there exist embedding vectors describing relations like gender or relative size (e.g., the famous vec(“king”) - vec(“man”) + vec(“woman”) =~ vec(“queen”)<d-cite key="mikolov2013efficient"></d-cite>). It has been found that language models often map ideas like these to features within their parameters. Human understanding is not necessary though, as models can find and map features that exist beyond the perception of humans. This is an important part of the success (and dual inscrutability) of modern deep models, as these models can determine features and relationships within the data that allow them to model large datasets, like language, very well.</p> <p>In this work we:</p> <ol> <li>Explain Superposition, why it may occur, and why it is important</li> <li>Motivate a framework to easily study Superposition</li> <li>Study how activation functions affect Superposition</li> </ol> <h2 id="superposition-and-previous-work">Superposition and Previous Work</h2> <p>Let us elaborate further. If you were to train some neural network and visualize the weights - chances are you would see some mess that looks like this:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/random_matrix_equation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/random_matrix_equation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/random_matrix_equation-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/random_matrix_equation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>You are likely looking at superposition!</p> <p>As hypothesized by <d-cite key="toymodels"></d-cite>, superposition is a phenomenon which occurs when the number of features being learned by a model is greater than the number of parameters in that model. To capture $n$ features with $m&lt;n$ parameters, one can think of the neurons as “working overtime.” In other words, some of the neurons within a model encode information about more than one feature. The neuron exhibiting superposition operates as an information compressor. The caveat is that this compression is often unpredictable and hard to understand!</p> <p>In a linear model, i.e., one which maps inputs to outputs with only linear functions, there are fewer parameters than the features it tries to represent, so it can only represent the top $m$ features. How then do neural networks use compression and map back to $n&gt;m$ features using only $m$ parameters? The answer is non-linearity. Clearly, the activation function is key to understanding how superposition occurs - unexplored by other work in the field. <d-cite key="elhage2022solu"></d-cite> explores the activation function in transformer MLP, but not in the setting we present here.</p> <p>But why do we care about Superposition? Why spend time studying this?</p> <p>While it may seem tangential, Superposition sheds important insights on Large Language Models (LLMs)! While LLMs are billions of parameters large, this is still not enough for a one-to-one mapping to “features” on the internet. Therefore LLMs also MUST exhibit superposition to learn. We focus our current work on the $\textit{bottleneck superposition}$ regime, but <d-cite key="incidental"></d-cite> has shown that the picture is far more complicated than presented in <d-cite key="toymodels"></d-cite>. Namely, varying the initialization can change how superposition unfolds. To normalize across experiments, we initialize all weights using the Xavier norm, as outlined by <d-cite key="xavier"></d-cite>. However, this is certainly a limitation of our presented work. A more rigourous analysis of superposition with activation functions would explore it outside the contex of the bottleneck regime. We leave this for future work.</p> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/feature_visual-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/feature_visual-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/feature_visual-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/feature_visual.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> From <a href="https://distill.pub/2017/feature-visualization/">Distill Blog</a>, "Feature visualization allows us to see how GoogLeNet trained on the ImageNet dataset, builds up its understanding of images over many layers. </div> <p>Previous research, as detailed in <d-cite key="toymodels"></d-cite>, has predominantly explored superposition within the confines of toy models utilizing the Rectified Linear Unit (ReLU) activation function. However, to extend these findings to contemporary neural networks, it is crucial to investigate the influence of different activation functions on superposition. Different activation functions provide different ways for a model to use superposition to its advantage.</p> <p>So you train a neural network - what happens at the neuron level? There are three possibilities. As the network trains each neuron has three choices:</p> <ol> <li>The neuron chooses not to encode the “features”</li> <li>The neuron chooses to dedicate itself to one feature</li> <li>The neuron chooses to encode multiple features</li> </ol> <p>(We anthropomorphize - The neuron doesn’t choose to do anything - there is no free will - you are born into a loss landscape and an optimizer telling you what to do.)</p> <p>In linear models, each neuron is limited to representing only the most significant features (2), discarding others (1). Conversely, superposition, enabled by non-linear activation functions, adopts a more inclusive approach (3), trying to encode multiple features per neuron and learning efficient representational shortcuts.</p> <p>While ReLU bears similarity to the Gaussian Error Linear Unit (GeLU) used in modern GPT architectures, a deeper understanding of how different nonlinear activations impact superposition can provide crucial insights. Such understanding is key to unraveling the complex mechanisms through which neural networks utilize non-linearities, a cornerstone in the broader narrative of neural network interpretability.</p> <h3 id="monosemanticity-and-polysemanticity">Monosemanticity and Polysemanticity</h3> <p>To connect to existing literature (2) and (3) above are given the names monosemanticity and polysemanticity. We will also follow this notation going forward.</p> <p>To describe further, the idea of superposition in neural networks leads us to two distinct types of neuron behaviors: monosemanticity and polysemanticity.</p> <p>Monosemantic neurons are those that specialize in a single, distinct feature, acting as dedicated detectors. This characteristic is often observed in the intermediate layers of architectures like Convolutional Neural Networks (CNNs), where neurons become adept at recognizing specific patterns, such as curves or colors. Polysemantic neurons do not align with just one feature but engage with multiple features simultaneously, offering a broader and more nuanced understanding of the data. This trait is essential for handling complex, high-dimensional datasets but comes at the cost of reduced interpretability.</p> <h2 id="motivation-and-notation">Motivation and Notation</h2> <p>Our work extends the work done in <d-cite key="toymodels"></d-cite> by examining how the changing of the activation function on toy model networks affects the behavior and interpretability of these networks. <d-cite key="toymodels"></d-cite> uses the canonical ReLU activation function to add non-linearity to two-layer models to analyze how superposition occurs within small networks. They did not generalize their work to other activation functions, which we find, result in <strong>distinct</strong> new phenomenon. Our work compares the ReLU function with five other common activation functions: GeLU, SiLU, Sigmoid, Tanh, and SoLU. We hope that generalizing the phenomenon across activation functions can push the toy dataset to be in closer to realistic ML settings.</p> <h3 id="problem-specification">Problem Specification</h3> <p>The models in this experiment will be learning how to replicate a length-$n$ vector of inputs in the range $[0, 1]$ with a compression to a length-$m$ embedding (where $n&gt;m$). The model will then use the length-$m$ embedding to recreate the length-$n$ input, using a non-linear activation function to allow for superposition.</p> <p>We will run two variations of the experiment. One variation of the experiment will involve compressing inputs of size $n=10$ to an embedding of size $m=5$. This experiment aims to see how superposition occurs across many features which are encoded in a bottleneck with half the number of spots as there are features. The second variation of the experiment will involve compressing inputs of size $n=2$ to an embedding of size $m=1$. This experiment aims to understand precisely how the model encodes the second “extra” feature in a variety of settings.</p> <p>To set up this experiment, we need to create a dataset that allows for superposition to occur and that also allows for interpretability of the superposition. To motivate this further, we begin with a careful discussion of features.</p> <h3 id="features">Features</h3> <p>Features are the salient “things” that a neural network learns to differentiate inputs <d-cite key="features"></d-cite>.</p> <p>Technically, features are the properties which neural networks try to extract from data during learning to compress inputs to useful representations during inference. Although features can map to human-understandable concepts (e.g., dog ears), they can also represent properties of the data that are not immediately apparent to the human brain. To experiment with superposition, we need to encode features in a way that we can understand. In other words, we do not want our experimental model to learn features that we are unaware of. This would make it hard for us to interpret how the model maps features in the data to embeddings within its parameters, consequently obscuring how superposition works. To this aim, we must generate features within the training set for our model which are simple and understandable to us a priori. Similar to <d-cite key="toymodels"></d-cite>, we use as each input a vector with entries drawn independently from a uniform distribution over $[0, 1]$. Making each entry independent of the others enforces that each entry is its own (artificial) feature with no correlation to the other features.</p> <p>Here we define two important augmentations that we used in the dataset to simulate real-world features: sparsity and importance.</p> <h4 id="sparsity">Sparsity</h4> <p>Sparsity is a measure of how often a specific feature is present in a dataset. A feature is characterized as “sparse” if it only appears in a small fraction of the inputs to the model. Similarly, features that are “dense” appear in many of the inputs. We will also use the term ‘density’, which is the complement of sparsity, defined as $1-S$.</p> <p>Specifically, a feature with a sparsity of $S \in [0, 1]$ has a probability $S$ of being expressed in any given input. If we have $S=0$, this means that the feature is expressed in every input, whereas if we have $S=0.5$, this means that the feature is expected to be expressed in about half of the inputs.</p> <p>In our experiment, we train models at different sparsities to capture how sparsity affects superposition.</p> <h4 id="importance">Importance</h4> <p>Not all features are created equal!</p> <p>Some features are more useful than others in determining relevant information about inputs. For instance, when building a dog detector - capturing features related to dogs’ faces are extremely important! A model would need to pick up salient features of dogs, perhaps floppy ears and snouts. Other features, like the grass a dog is sitting on or a frisbee in a dog’s mouth, may not be as useful for detecting a dog. The varying degrees of usefulness among features are encapsulated in the concept of “importance”.</p> <p>In the context of feature detection by a neural network, importance plays a role in modulating which features are encoded within the embedded layers of the network. In the context of the superposition hypothesis, if one feature has more importance than another feature, then it would be inefficient for the network to map both features equally within the embedding; allocating more weight to the feature with greater importance would be more valuable to the network in minimizing error.</p> <p>In our experiment, we give each input feature a different importance to allow the models to differentiate between them. We will examine when and how the model justifies mapping multiple features of differing importances to the same neuron, i.e., we will observe the superposition of features with differing importances.</p> <h3 id="dataset">Dataset</h3> <p>To run this experiment, we will synthetically generate data that has desired sparsity and importance properties.</p> <p>Each input $x$ will be a vector of length $n$. Each element $x_i$ in the vector will be drawn independently from the other elements in the uniform range $[0, 1]$. As discussed before, we can now synonymously refer to each of these elements as features, given their independent generation. (We will refer to them as features from this point onwards.)</p> <p>Each feature $x_i$ in the vector has a relative importance to each of the other features $x_{j\ne i}$. The importance of feature $x_i$ is $I_i = r_I^i$ where $r_I\in(0, 1)$ is a constant describing the relative decay of importance between neighboring features. This attribute of the data will be implemented in the loss function (see below for more details).</p> <p>We will train separate models for each of the varying levels of sparsity. For an input $x$ with sparsity $S$, each feature $x_i$ will take on its “true” value, a uniformly distributed number, with a probability of $1-S$ and will otherwise be set to 0 with a probability of $S$.</p> <p>Below is a visualization of two batches of inputs with respective sparsities $S=0.5$ and $S=0.99$.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/input_batches-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/input_batches-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/input_batches-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/input_batches.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Each column of the plots represents a feature vector of length 20. Each batch has size 100, corresponding to the number of columns in the plots. Notice how the changing in sparsity affects the feature density. </div> <h3 id="network">Network</h3> <p>Below are the architectures of the base (linear) and experimental (non-linear) models that we are using in this experiment. Of particular note is the activation function $\mathbb{f}$, which we will substitute using the aforementioned activation functions.</p> <table> <thead> <tr> <th style="text-align: center">Linear Model</th> <th style="text-align: center">Activation ( $\mathbb{f}$ ) Output Model</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">\(h = Wx\)</td> <td style="text-align: center">\(h = Wx\)</td> </tr> <tr> <td style="text-align: center">\(x' = W^T h + b\)</td> <td style="text-align: center">\(x' = f(W^T h + b)\)</td> </tr> <tr> <td style="text-align: center">\(x' = W^T Wx + b\)</td> <td style="text-align: center">\(x' = f(W^T Wx + b)\)</td> </tr> </tbody> </table> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/Autoencoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/Autoencoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/Autoencoder-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/Autoencoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>We create an autoencoder - compressing down to induce polysemanticity. This maps $x$ to a direction in a lower-dimensional space, represented by \(h = Wx\). Each column of $W$ corresponds to a lower-dimensional representation of a feature in $x$. To reconstruct the original vector, $W^T$ is used, ensuring clear feature representation correspondence. This structure results in a symmetric matrix $W^TW$ and allows for clear visualization of the weights. They visually allow for the determination of the presence of superposition.</p> <h3 id="loss">Loss</h3> <p>Sparsity, Importance and Our Network come together in the following loss function:</p> \[L = \sum_{i} \sum_{x} I_{i}(x_{i} - x'_{i})^{2}\] <p>Motivated by <d-cite key="toymodels"></d-cite>, we use a standard MSE loss, where $x_i$ and $x_i’$ measure the absolute difference in the auto-encoding of the datapoint. The Importance factor, $I_i$ , describes how important the given reconstruction is. A smaller importance will allow loss minimization even with a poor reconstruction.</p> <h2 id="results">Results</h2> <p>Below we present each activation function, along with plots depicting how training results in superposition at varying degrees of sparsity.</p> <p>For the $n=10, m=5$ experiment, we show the $W^TW$ matrix and neuron feature distribution at varying degrees of sparsity. The $W^TW$ matrix reveals which features are prioritized (shown by the diagonal terms) and any polysemanticity that occurs (shown by the off-diagonal terms). The neuron feature distribution shows how each of the $m=10$ features are mapped to each of the $n=5$ embedding dimensions. This can aid in understanding under what conditions polysemanticity arises and how it occurs under each condition of sparsity.</p> <p>For the $n=2, m=1$ experiment, we show a phase diagram. This phase diagram shows how the second “extra” feature of the length-2 input vector is encoded. There are three options: not encoded at all (only the first feature is encoded), encoded in superposition with the first feature, and encoded as the only feature (the first feature is not encoded).</p> <h3 id="relu">ReLU</h3> <p>The ReLU (Rectified Linear Units) activation function is a piecewise-linear function, a simple non-linearity that allows models to use superposition of features. ReLU was the only activation function used in <d-cite key="toymodels"></d-cite>, so our work with the ReLU function was primarily to verify the results from their work and create a baseline for our subsequent experiments.</p> <p>The following are the $W^TW$ matrices and feature-neuron mappings:</p> <div class="caption"> ReLU $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_relu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_relu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_relu-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/Sparsity_super_relu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>As per the results in <d-cite key="toymodels"></d-cite>, the ReLU model focuses on the most significant features in the low sparsity regime (generally resulting in monosemanticity), while relying on superposition in the high sparsity regime (polysemanticity). With weaker signals for the most important features in the high sparsity regime, the model encodes multiple features in each neuron activation to minimize error of the sparse signals. Notably, the ReLU model uses antipodal pairs in the mapping of features to encode multiple features to single neurons. This can be seen as a light-colored diagonal entry within $W^T W$ and a corresponding dark-colored off-diagonal entry within the same column. This antipodal mapping of features is a method that the model uses to compress more than one feature to one neuron. This antipodal mapping is more interpretable than other kinds of polysemanticity which occurs in subsequently-described activation functions which “speckle” multiple features into a single neuron, making it more difficult to determine how the superposition occurs in that model.</p> <p>The following is the phase diagram of the ReLU models:</p> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_relu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_relu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_relu-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/phase_51_relu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>In regimes of high sparsity (i.e., below $1-S=0.1$ on the phase diagram above) the ReLU models are highly polysemantic for all relative feature importances, reflecting an inability to encode features with a sparse signal. In regimes of low sparsity, the model generally embeds the more important of the two features. This result mirrors the phase diagram in <d-cite key="toymodels"></d-cite> as expected.</p> <h3 id="gelusilu">GeLU/SiLU</h3> <p>The GeLU (Gaussian Error Linear Units) and SiLU (Sigmoid Linear Units) activation functions are very similar to one another, and as a result produced very similar experimental results. Both functions are akin to a “smoothed out” version of the ReLU function, i.e., they have no discontinuities. The GeLU has recently been popularized as the activation function of choice in many transformers, including BERT <d-cite key="Devlin2019BERTPO"></d-cite> and GPT <d-cite key="gpt"></d-cite>. The GeLU is differentiable for all $x$ - and has a smoother curve than the SiLU (Swish) activation. <d-cite key="elhage2022solu"></d-cite> found that in the setting of transformers, the GeLU was less interpretable than the SoLU. This may be the case after having many linear layers activation - but with a single layer this is not the case.</p> <div class="caption"> GeLU $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_gelu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_gelu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_gelu-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/Sparsity_super_gelu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> SiLU $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_silu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_silu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_silu-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/Sparsity_super_silu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The GeLU and SiLU models exhibit similar kinds of superposition in their weight matrices. With increasing sparsity, superposition of features does happen, but it is more “strict” than the ReLU model, generally mapping at most two features to any single neuron. In each of the polysemantic neurons, though, there is one feature that dominates, suggesting that these activation functions enforce sparsity in their activations. There are also many antipodal pairs of features within these models, reiterating the behavior that exists in the ReLU models (also found in <d-cite key="toymodels"></d-cite>).</p> <div class="row mt-0 l-page"> <div class="col-sm mt-2 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_gelu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_gelu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_gelu-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/phase_51_gelu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-2 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_silu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_silu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_silu-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/phase_51_silu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row mt-0 l-page"> <div class="col-6 mx-auto mt-2 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The above phase diagrams of the GeLU and SiLU models show a marked difference from that of the ReLU model (earlier), despite the similar shapes of these three activation functions. The GeLU and SiLU models exhibit significant monosemanticity at high degrees of sparsity, unlike the ReLU, which results in near-complete polysemanticity for sparsities higher than $S=0.9$. This differnce may reflect SiLU’s and GeLU’s better fit as an activation for picking up the signal in sparse feature representations, making the case for GeLU and SiLU as more interpretable activation functions within larger models.</p> <h3 id="sigmoid">Sigmoid</h3> <p>The Sigmoid function is a smooth activation function with an output range of $(0, 1)$. This maps directly to the desired range of values that the model is trying to replicate.</p> <div class="caption"> Sigmoid $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/Sparsity_super_sigmoid.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The Sigmoid model exhibits superposition in all neurons as soon as the sparsity is non-zero, as can be seen from the “speckling” of non-zero off-diagonal terms in $W^T W$. This is a difference from the ReLU/GeLU/SiLU models, for which the superposition “leaks” into the least significant encoded features at low, non-zero sparsities and eventually affects all features at higher sparsities. This low-sparsity superposition may occur because the Sigmoid function strictly maps to $(0, 1)$, with increasingly large pre-activation inputs necessary to map to values close to 0 and 1. As such, the model may be “speckling” the off-diagonal values in an attempt to “reach” these inputs which are close to 0 and 1.</p> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_sigmoid-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_sigmoid-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_sigmoid-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/phase_51_sigmoid.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Despite differences in the occurrence of polysemanticity, the ReLU and Sigmoid models exhibit very similar phase diagrams, reflecting an inability to encode multiple features at sparsities above $S=0.9$ (i.e., below $1-S=0.1$ on the phase diagram). As discussed above, this may be caused by the vanilla sigmoid activation’s inability to “reach” target values close to 0 or 1.</p> <h3 id="tanh">Tanh</h3> <p>The Tanh function is another smooth activation function, but it results in significantly different behavior from the Sigmoid (despite being a linear mapping of the Sigmoid).</p> <div class="caption"> Tanh $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_tanh-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_tanh-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_tanh-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/Sparsity_super_tanh.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>With the Tanh activation function, the models prioritize the most important features regardless of sparsity. This behavior is possibly attributed to the range that the Tanh function maps to $(-1, 1)$, while the target range of input values in this experiment are $[0, 1]$. This behavior is similar to that of a linear model (i.e., no activation function) which exhibits no capability to use superposition, but the phase diagram reveals subtle differences from the linear model results.</p> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_tanh-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_tanh-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_tanh-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/phase_51_tanh.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Although nearly performing as the linear model would, only encoding the most important feature, there is some difference to the linear model along the boundary between features, as can be seen around the importance of 1. This reflects the model’s ability to use non-linearity to perform superposition.</p> <h4 id="a-note-on-sigmoid-and-tanh">A Note on Sigmoid and Tanh</h4> <p>Despite similarities in the S-like curvature of the Sigmoid and Tanh activation functions, the Sigmoid model exhibits superposition, whereas the Tanh model exhibits nearly zero superposition. A key difference between the two functions is the fact that the Sigmoid function maps inputs to a range of $(0, 1)$, while the Tanh function maps inputs to a range of $(-1, 1)$. This difference is significant in our experiment, as our experiment uses models to recreate random vectors with elements in the range $[0, 1]$. The range of the Sigmoid function matches this range, while the range of the Tanh function which matches this range only occurs for non-negative inputs to the Tanh function. In other words, the $(-\infty, 0)$ input domain (which maps to the range $(-1, 0)$) of the Tanh function remains useless for prediction of values which should be in the range $[0, 1]$. Therefore, the tanh function empirically acts like a linear function (i.e., no activation layer).</p> <h3 id="solu">SoLU</h3> <p>The SoLU (Softmax Linear Units) activation function is based on the work from <d-cite key="elhage2022solu"></d-cite>. \(Solu(x) = x * softmax(x)\) SoLU is a function for which the activation of each neuron is dependent on all the other neurons within its own layer. This is significantly different from all the other activations that we tested, as the activations of neurons with the other functions are independent of the other neurons within the same layer. In other words, all the other activation functions are univariate while the SoLU is multivariate. Similar to other approaches like L1 regularization, the SoLU amplifies neurons with relatively large pre-activations and de-amplifies neurons with relatively smaller pre-activations. This behavior pressures the model to be more monosemantic (and therefore more interpretable in some settings), as discussed in <d-cite key="elhage2022solu"></d-cite>.</p> <div class="caption"> SoLU $W^TW$ Matrices </div> <div class="row mt-3 l-page"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_solu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_solu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/Sparsity_super_solu-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/Sparsity_super_solu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>In our experiment, the SoLU model results in non-zero superposition of all features with all degrees of sparsity. This may be attributed to the way that the SoLU “forces” activations to be sparse, i.e., the activations result in a “winner-takes-all” behavior due to the way that the Softmax function works. This is not a useful property for prediction of a vector of independently-drawn values, as the input vectors are unlikely to be peaky, i.e., the SoLU does not quite fit the purposes of its task.</p> <div class="row mt-3 l-page"> <div class="col-6 mx-auto mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_solu-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_solu-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/phase_51_solu-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/phase_51_solu.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-6 mx-auto mt-3 mt-md-0 d-flex align-items-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-superposition/legend-1400.webp"/> <img src="/staging/assets/img/2023-11-10-superposition/legend.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>As seen in the heatmap plot above, the SoLU activation results in very polysemantic behavior. This function is not precisely fit for its task of recreating given vectors and likely results in using polysemanticity to attempt to pass information about inputs forward. Curiously, the SoLU models have preference for the more important feature in the low sparsity regime.</p> <h3 id="bringing-them-all-together">Bringing Them All Together</h3> <div class="caption"> Sparsity vs Dimensions Per Feature </div> <div class="l-page"> <iframe src="/staging/assets/html/2023-11-10-superposition/file.html" frameborder="0" scrolling="no" height="600px" width="100%"></iframe> </div> <p>The diagram above depicts a variation on the two experiments explained thus far. In this experiment $n=200$ features were compressed to $m=20$ features and the loss function was tweaked to give uniform importance $I_i = 1$ to all features. This was done to determine how each activation functions compresses features in different sparsity regimes without the influence of feature importance.</p> <p>On the y axis, the plot depicts a metric (dimensions per feature) that measures the number of dimensions a model dedicates to each feature. In other words, a point with a y-value near 1 represents a model that dedicates one dimension of its embedding space to one feature, whereas a point with a y-value near 0.25 represents a model that represents four features at each dimension.</p> <p>The plots are generally consistent with the analysis from the previous experiments. Many of the activations result in superposition in the low-density/high-sparsity regime, and increases in sparsity result in increases in the polysemanticity of the model (i.e., the dimensions per feature decrease). Consistent with the other experiments, SiLU and GELU perform very similarly. The Sigmoid and SoLU activations pack nearly 20 features per dimension at high sparsities. The Tanh activation exhibits behavior similar to the linear model, neatly packing one dimension with one feature, a result that is mirrored in the previous experiments. Similar to the results in <d-cite key="toymodels"></d-cite>, we see “sticky” behavior of the ReLU activation function at 1 and 0.5 dimensions per feature. This can be explained by the phenomenon of “antipodal pairs” discussed in <d-cite key="toymodels"></d-cite>. None of the other activation functions that we tested exhibit this behavior - which is striking since this is a well-studied effect for the ReLU activation function. This may be because the ReLU activation function is the only one that is not smooth, and therefore has a differentiable behavior than the other activation functions.</p> <h2 id="conclusion">Conclusion</h2> <p>Our investigation into the effects of various activation functions reveals that significant changes occur in model behavior depending on the chosen function. This finding underscores the ability to modulate the degree of superposition through the selection of activation functions, highlighting yet unexplored degrees of freedom in model design. This line of inquiry goes seamlessly with considerations of how neural networks are initialized and trained, suggesting these as promising future research directions.</p> <p>Our work is limited by the breadth of activation functions that we tested, though. Further iterations on each of the activation functions (e.g., tweaking the Sigmoid function to map to the range $(-\epsilon, 1+\epsilon)$) could prove fruitful in getting better performance from the models. Furthermore, while writing this blog, <d-cite key="incidental"></d-cite> published a new key insight related to the importance of initialization in superposition, which we do not explore here. Despite this, we have learned valuable insights about the effects that our set of activation functions can have on superposition.</p> <p>Pursuing enhanced interpretability, however, does not come without its challenges. Specifically, striving for transparency and understandability in neural network models raises concerns about the potential for deception. Despite these challenges, our work aims to develop neural network models that are more interpretable, transparent, and secure.</p> <ol class="bibliography"></ol> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-10-superposition.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/suscep/index.html b/blog/2023/suscep/index.html
new file mode 100644
index 00000000..70e0ce5a
--- /dev/null
+++ b/blog/2023/suscep/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>From Scroll to Misbelief - Modeling the Unobservable Susceptibility to Misinformation on Social Media | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/suscep/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "From Scroll to Misbelief - Modeling the Unobservable Susceptibility to Misinformation on Social Media",
+      "description": "",
+      "published": "November 8, 2023",
+      "authors": [
+        {
+          "author": "Yanchen Liu",
+          "authorURL": "https://liuyanchen1015.github.io/",
+          "affiliations": [
+            {
+              "name": "Harvard, Cambridge, MA",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>From Scroll to Misbelief - Modeling the Unobservable Susceptibility to Misinformation on Social Media</h1> <p></p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#abstract">Abstract</a></div> <div><a href="#introduction">Introduction</a></div> <div><a href="#computational-susceptibility-modeling">Computational Susceptibility Modeling</a></div> <ul> <li><a href="#modeling-unobservable-susceptibility">Modeling Unobservable Susceptibility</a></li> <li><a href="#training-with-supervision-from-observable-behavior">Training with Supervision from Observable Behavior</a></li> </ul><div><a href="#dataset-and-experiment-setup">Dataset and Experiment Setup</a></div> <div><a href="#evaluation">Evaluation</a></div> <div><a href="#analysis">Analysis</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="abstract">Abstract</h2> <p>Susceptibility to misinformation describes the extent to believe false claims, which is hidden in people’s mental process and infeasible to observe. Existing susceptibility studies heavily rely on the crowdsourced self-reported belief level, making the downstream research homogeneous and unscalable. To relieve these limitations, we propose a computational model that infers users’ susceptibility levels given their reposting behaviors. We utilize the supervision from the observable sharing behavior, incorporating a user’s susceptibility level as a key input for the reposting prediction task. Utilizing the capability of large-scale susceptibility labeling, we could also perform a comprehensive analysis of psychological factors and susceptibility levels across professional and geographical communities. Hopefully, we could observe that susceptibility is influenced by complicated factors, demonstrating a degree of correlation with economic development around the world, and with political leanings in the U.S.</p> <hr/> <h2 id="introduction">Introduction</h2> <div class="col-sm mt-3 mt-md-0"> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-suscep/suscep_model-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-suscep/suscep_model-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-suscep/suscep_model-1400.webp"/> <img src="/staging/assets/img/2023-11-08-suscep/suscep_model.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" data-zoomable="" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figcaption><strong>Illustration of the Susceptibility Modeling.</strong> We formulate the model to predict whether a given user will retweet a specific misinformation tweet. We utilize a shallow neural network to predict the susceptibility score, and together with the dot product of the user and tweet embeddings to predict retweet behavior. Our model is trained using two loss functions: binary classification entropy and triplet loss.</figcaption> </figure> </div> <p>False claims spread on social media platforms, such as conspiracy theories, fake news, and unreliable health information, mislead people’s judgment, promote societal polarization, and decrease protective behavior intentions<d-cite key="pennycook2021psychology"></d-cite><d-cite key="Thier2021HealthMisinformation"></d-cite>. The harm is especially significant in various contentious events including elections, religious persecution, and the global response to the COVID-19 pandemic<d-cite key="Ecker2022PsychologicalDriversMisinformation"></d-cite>. Many works have investigated the <strong>observable</strong> behavior of information propagation such as where the information propagates<d-cite key="Taylor2023WhereDoesYour"></d-cite>, how people share it<d-cite key="Yang2021COVID19InfodemicTwitter"></d-cite>, and what people discuss about it<d-cite key="Gupta2023PolarisedSocialMedia"></d-cite>. However, it is still crucial but challenging to understand the <strong>unobservable</strong> mental and cognitive processes when individuals believe misinformation<d-cite key="Ecker2022PsychologicalDriversMisinformation"></d-cite>. Users’ susceptibility (i.e., the likelihood of individuals believing misinformation) plays a pivotal role in this context. If a person is more susceptible to misinformation, they are not only more likely to share false claims but also more prone to being misled by them<d-cite key="Scherer2020WhoSusceptibleOnline"></d-cite>.</p> <p>Existing works have investigated the psychological, demographic, and other factors that may contribute to the high susceptibility of a population<d-cite key="Brashier2020AgingEraFake"></d-cite><d-cite key="Pennycook2020WhoFallsFake"></d-cite>. However, previous susceptibility studies heavily rely on self-reported belief towards false claims collected from questionnaire-based participant survey<d-cite key="Escola-Gascon2021CriticalThinkingPredicts"></d-cite><d-cite key="Rosenzweig2021HappinessSurpriseAre"></d-cite>, which presents several limitations. For instance, different participants might interpret the belief levels in different ways. Moreover, the data collection process is labor-heavy and thus limits the scale of downstream research on size, scope, and diversity of the target population<d-cite key="nan2022people"></d-cite>.</p> <p>The unobservance of people’s beliefs makes it infeasible to model susceptibility directly. Luckily, existing psychological literature bridges unobservable beliefs and observable behaviors, showing that the sharing behavior is largely influenced by whether users believe the misinformation<d-cite key="Altay2022WhyFewPeople"></d-cite><d-cite key="Atske2019ManyAmericansSay"></d-cite>, the attributes of the sharing content<d-cite key="pennycook2021psychology"></d-cite><d-cite key="van2022misinformation"></d-cite>, and users’ internal mental motives<d-cite key="Brady2020MADModelMoral"></d-cite><d-cite key="Islam2020MisinformationSharingSocial"></d-cite>. Motivated by these prior works, we formulate the relationship between believing and sharing described in social science literature into a machine learning task.</p> <p>Concretely, we propose to infer people’s susceptibility level given their re/posting behaviors. To parameterize the model, we wrap the susceptibility level as input for the prediction model of the observable reposting behavior. We perform multi-task learning to simultaneously learn to classify whether a user would share a post, and rank susceptibility scores among similar and dissimilar users when the same content is seen. Note that our model does not aim to predict any ground-truth susceptibility for individuals. Instead, we use users’ reposting behaviors towards misinformation as a proxy for their susceptibility level for better interpretability. Our model design enables unobservable modeling with supervision signals for observable behavior, unlocks the scales of misinformation-related studies, and provides a novel perspective to reveal the users’ belief patterns.</p> <p>We conduct comprehensive evaluations to validate the proposed susceptibility measurement and find that the estimations from our model are highly aligned with human judgment. Building upon such large-scale susceptibility labeling, we further conduct a set analysis of how different social factors relate to susceptibility. We find that political leanings and psychological factors are associated with susceptibility in varying degrees. Moreover, our analysis based on these inferred susceptibility scores corroborates the findings of previous studies based on self-reported beliefs, e.g., stronger analytical thinking is an indicator of low susceptibility. The results of our analysis extend findings in existing literature in a significant way. For example, we demonstrate that susceptibility distribution in the U.S. exhibits a certain degree of correlation with political leanings.</p> <p>To sum up, our contributions are:</p> <ol> <li>We propose a computational model to infer people’s susceptibility towards misinformation in the context of COVID-19, by modeling unobservable latent susceptibility through observable sharing activities.</li> <li>Evaluation shows that our model effectively models unobservable belief, and the predictions highly correlate with human judgment.</li> <li>We conduct a large-scale analysis to uncover the underlying factors contributing to susceptibility across a diverse user population from various professional fields and geographical regions, presenting important implications for related social science studies.</li> </ol> <hr/> <h2 id="computational-susceptibility-modeling">Computational Susceptibility Modeling</h2> <h3 id="modeling-unobservable-susceptibility">Modeling Unobservable Susceptibility</h3> <p>Inspired by the existing studies indicating that believing is an essential driver for dissemination, we propose to model susceptibility, which reflects users’ beliefs, as a driver for the sharing behavior, while considering characteristics of the sharing content and user profile.</p> <p>We propose a computational model to infer a user’s unobservable susceptibility score based on their historical activities as shown in the model figure, and further train the model with signals from the observable disseminating behavior. We construct approximate contrastive user-post pairs as the training data (<a href="#dataset-and-experiment-setup">Dataset and Experiment Setup</a>).</p> <p>This design would allow us to explore the best parameters for the computational model of an unobservable and data-hungry susceptibility variable using the rich data resources available on social media platforms.</p> <h4 id="content-sensitive-susceptibility">Content-Sensitive Susceptibility</h4> <p>We compute the user’s susceptibility when a particular piece of misinformation $p$ is perceived (i.e. $s_{u, p}$). This allows us to account for the fact that an individual’s susceptibility can vary across different content, influenced by factors such as topics and linguistic styles. By focusing on the susceptibility to specific pieces of misinformation, we aim to create a more nuanced, fine-grained, and accurate representation of how users interact with and react to different COVID-19 misinformation.</p> <h4 id="user-and-misinfo-post-embeddings">User and Misinfo Post Embeddings</h4> <p>As a component of the computational model, we use SBERT developed upon RoBERTa-large to produce a fixed-sized vector to represent the semantic information contained in the posts and user profiles. We consider the misinformation post as a sentence and produce its representation with SBERT. For the user profile, we calculate the average of sentence representations for the user’s recent original posts. More specifically, for every user-post pair $(u, p)$, we gather the historical posts written by user $u$ within a 10-day window preceding the creation time of the misinformation post $p$, to learn a representation of user $u$ at that specific time.</p> <h4 id="computational-model-for-susceptibility">Computational Model for Susceptibility</h4> <p>Given the input of the user profile for the user $u$ and the content for misinformation post $p$, the susceptibility computational model is expected to produce the <em>susceptibility score</em> $s_{u, p}$ as shown below, reflecting the susceptibility of $u$ when $p$ is perceived.</p> \[s_{u, p} = suscep(E(u), E(p))\] <p>We first obtain the embeddings $E(p)$ and $E(u)$ for post $p$ and user profile $u$, where $u$ is represented by the user’s historical tweets and $E$ is the frozen SBERT sentence embedding function. The susceptibility score is calculated by the function $suscep$, which is implemented as a multi-layer neural network, taking the concatenation of the user and post embeddings as inputs. In the training phase, we keep the sentence embedder frozen and learn the weights for the $suscep$ function that could be used to produce reasonable susceptibility scores. We expect to produce susceptibility scores for novel $u$ and $p$ pairs using the learned $suscep$ function during inference. Additionally, we normalize the resulting susceptibility scores into the -100 to 100 range for better interpretability.</p> <h3 id="training-with-supervision-from-observable-behavior">Training with Supervision from Observable Behavior</h3> <p>Susceptibility is not easily observable, thus it is infeasible to apply supervision on $s_{u, p}$ directly as only the user $u$ themselves know their belief towards content $p$. Thus, we propose to utilize the supervision signal for sharing a piece of misinformation, which is an observable behavior. We consider susceptibility as an essential factor of sharing behavior and use the susceptibility computational model’s output to predict the repost behavior.</p> <p>To produce the probability for user $u$ to share post $p$, we calculate the dot product of the embeddings of the user profile and post content and consider the susceptibility score for the same pair of $u$ and $p$ as a weight factor, and passing the result through a sigmoid function, as illustrated in the model figure.</p> \[p_{\text{rp}} = \sigma \left( E(u) \cdot E(p) \cdot s_{u, p} \right)\] <p>Note that we do not directly employ the \textit{susceptibility score} to compute the probability of sharing because the sharing behavior depends not only on the susceptibility level but also on other potential confounding factors. It is possible that a user possesses a notably high susceptibility score for a piece of misinformation yet chooses not to repost it. Hence, we incorporate a dot product of the user and post embedding in our model \dkvtwo{involve the misinformation post content and user profiles into the consideration of predicting the sharing behavior}.</p> <p>\(\begin{align} \mathcal{L}_{\text{bce}}(u_i, t) &amp;= -\left( y_i \log(p_{\text{rt}}(u_i, t)) + (1 - y_i) \log(1 - p_{\text{rt}}(u_i, t)) \right) \nonumber \\ \mathcal{L}_{\text{triplet}}(u_a, u_s, u_{ds}, t) &amp;= \text{ReLU}\left(\Vert s_{u_{a},t} - s_{u_{s},t}\Vert_2^2 - \Vert s_{u_{a},t} - s_{u_{ds},t} \Vert_2^2 + \alpha \right) \nonumber \\ \mathcal{L}(u_a, u_s, u_{ds}, t) &amp;= \frac{\lambda}{3} \sum_{i \in \{a, s, ds\}} \mathcal{L}_{\text{bce}}(u_i, t) + (1 - \lambda) \mathcal{L}_{\text{triplet}}(u_a, u_s, u_{ds}, t) \nonumber \label{eq:loss} \end{align}\)</p> <h4 id="objectives">Objectives</h4> <p>We perform multi-task learning to utilize different supervision signals. We first consider a binary classification task of predicting repost or not with a cross-entropy loss. Additionally, we perform the triplet ranking task<d-cite key="chen2009ranking"></d-cite><d-cite key="hoffer2015deep"></d-cite> to distinguish the subtle differences among the susceptibility scores of multiple users when the same false content is present.</p> <p>During each forward pass, our model is provided with three user-post pairs: the anchor pair $(u_a, p)$, the similar pair $(u_s, p)$, and the dissimilar pair $(u_{ds}, p)$. We determine the similar user $u_s$ as the user who reposted $p$ if and only if user $u_a$ reposted $p$. Conversely, the dissimilar user $u_{ds}$ is determined by reversing this relationship. When multiple potential candidate users exist for either $u_s$ or $u_{ds}$, we randomly select one. However, if there are no suitable candidate users available, we randomly sample one user from the positive (for “reposted” cases) or negative examples (for “did not repost” cases) and pair this randomly chosen user with this misinformation post $p$.</p> <p>Here, we elaborate on the definition of our loss function. Here, $y_i$ takes the value of 1 if and only if user $u_i$ reposted misinformation post $p$. The parameter $\alpha$ corresponds to the margin employed in the triplet loss, serving as a hyperparameter that determines the minimum distance difference needed between the anchor and the similar or dissimilar sample for the loss to equal zero. Additionally, we introduce the control hyperparameter $\lambda$, which governs the weighting of the binary cross-entropy and triplet loss components.</p> <h2 id="dataset-and-experiment-setup">Dataset and Experiment Setup</h2> <p>We use Twitter data because it hosts an extensive and diverse collection of users, the accessibility of its data, and its popularity for computational social science research<d-cite key="zhang2015ideology"></d-cite>. Additionally, it provides users’ free-text personal and emotional expression with crucial metadata, including timestamps and location data, which are useful for our subsequent analytical endeavors.</p> <h4 id="misinformation-tweets">Misinformation Tweets</h4> <p>We consider two misinformation tweet datasets: the ANTi-Vax dataset <d-cite key="hayawi2022anti"></d-cite> was collected and annotated specifically for COVID-19 vaccine misinformation tweets. On the other hand, CoAID <d-cite key="cui2020coaid"></d-cite> encompasses a broader range of misinformation related to COVID-19 healthcare, including fake news on websites and social platforms. The former dataset contains 3,775 instances of misinformation tweets, while the latter contains 10,443.</p> <p>However, a substantial number of tweets within these two datasets do not have any retweets. Consequently, we choose to retain only those misinformation tweets that have been retweeted by valid users. Finally, we have collected a total of 1,271 misinformation tweets for our study.</p> <h4 id="positive-examples">Positive Examples</h4> <p>We define the positive examples for modeling as $(u_{pos}, t)$ pairs, where user $u_{pos}$ viewed and retweeted the misinformation tweet $t$. We obtained all retweeters for each misinformation tweet through the Twitter API.</p> <h4 id="negative-examples">Negative Examples</h4> <p>Regarding negative examples, we define them as $(u_{neg}, t)$ pairs where user $u_{neg}$ viewed but did not retweet misinformation post $t$. However, obtaining these negative examples poses a substantial challenge, because the Twitter API does not provide information on the “being viewed” activities of a specific tweet. To tackle this issue, we infer potential users $u_{neg}$ that highly likely viewed a given tweet $t$ following the heuristics: 1) $u_{neg}$ should be a follower of the author of the misinformation tweet $t$, 2) $u_{neg}$ should not retweet $t$, and 3) $u_{neg}$ was active on Twitter within 10 days before and 2 days after the timestamp of $t$.</p> <p>We have collected a total of 3,811 positive examples and 3,847 negative examples, resulting in a dataset comprising 7,658 user-post pairs in total. We divide the dataset into three subsets with an 80% - 10% - 10% split for train, validation, and test purposes, respectively. The detailed statistics of the collected data are illustrated in the table below.</p> <table> <thead> <tr> <th> </th> <th>Total</th> <th>Positive</th> <th>Negative</th> </tr> </thead> <tbody> <tr> <td># Example</td> <td>7658</td> <td>3811</td> <td>3847</td> </tr> <tr> <td># User</td> <td>6908</td> <td>3669</td> <td>3255</td> </tr> <tr> <td># Misinfo tweet</td> <td>1271</td> <td>787</td> <td>1028</td> </tr> </tbody> </table> <h2 id="evaluation">Evaluation</h2> <p>In this section, we demonstrate the effectiveness of our susceptibility modeling by directly comparing our estimations with human judgment and indirectly evaluating its performance for predicting sharing behavior.</p> <h3 id="validation-with-human-judgement">Validation with Human Judgement</h3> <p>Due to the abstract nature of susceptibility and the lack of concrete ground truth, we face challenges in directly evaluating our susceptibility modeling. We use human evaluations to validate the effectiveness of our inferred susceptibility. Given the subjectivity inherent in the concept of susceptibility, and to mitigate potential issues arising from variations in individual evaluation scales, we opt not to request humans to annotate a user’s susceptibility directly. Instead, we structure the human evaluation as presenting human evaluators with pairs of users along with their historical tweets and requesting them to determine which user appears more susceptible to overall COVID-19 misinformation.</p> <p>Subsequently, we compared the predictions made by our model with the human-annotated predictions. To obtain predictions from our model, we compute each user’s susceptibility to overall COVID-19 misinformation by averaging their susceptibility scores to each COVID-19 misinformation tweet in our dataset. As presented in the table below, our model achieves an average agreement of 73.06% with human predictions, indicating a solid alignment with the annotations provided by human evaluators. Additionally, we consider a baseline that directly calculates susceptibility scores as the cosine similarity between the user and misinformation tweet embeddings. Compared to this baseline, our susceptibility modeling brings a 10.06% improvement. Moreover, we compare the performance with ChatGPT prompting with the task description of the susceptibility level comparison setting as instruction in a zero-shot manner. We observe that our model also outperforms predictions made by ChatGPT. The results from the human judgment validate the effectiveness of our susceptibility modeling and its capability to reliably assess user susceptibility to COVID-19 misinformation.</p> <table> <thead> <tr> <th> </th> <th>Our</th> <th>Baseline</th> <th>ChatGPT</th> </tr> </thead> <tbody> <tr> <td>Agreement</td> <td>73.06±8.19</td> <td>63.00±9.07</td> <td>64.85±9.02</td> </tr> </tbody> </table> <h3 id="susceptibility-score-distribution">Susceptibility Score Distribution</h3> <p>We provide a visualization of the distribution of susceptibility scores within positive and negative examples produced by our model on the training data. As depicted below, there is a notable disparity in the distribution between positive and negative examples, verifying our assumption that believing is an essential driver for sharing behavior. The difference in the means of the positive and negative groups is statistically significant, with a p-value of less than 0.001.</p> <div class="col-sm mt-3 mt-md-0"> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-suscep/pos_neg_distribution-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-suscep/pos_neg_distribution-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-suscep/pos_neg_distribution-1400.webp"/> <img src="/staging/assets/img/2023-11-08-suscep/pos_neg_distribution.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" data-zoomable="" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figcaption><strong>Susceptibility Score Distribution</strong> among positive and negative user-tweet samples. The distribution of positive (red) and negative (blue) examples exhibits a clear disparity.</figcaption> </figure> </div> <h3 id="sharing-behavior-prediction">Sharing Behavior Prediction</h3> <p>Furthermore, holding a belief is highly likely to result in subsequent sharing behavior. We demonstrated that our trained model possesses a strong ability for sharing behavior prediction. When tested on the held-out test dataset, our model achieves a test accuracy of 78.11% and an F1 score of 77.93. These results indirectly demonstrate the reliable performance of our model for susceptibility modeling.</p> <h2 id="analysis">Analysis</h2> <p>In this section, we show the potential of our inferred susceptibility scores in expanding the scope of susceptibility research. Our analysis not only aligns with the findings of previous survey-based studies but also goes a step further by extending and enriching their conclusions.</p> <h3 id="correlation-with-psychological-factors">Correlation with Psychological Factors</h3> <p>Previous research on human susceptibility to health and COVID-19 misinformation has been primarily based on questionnaire surveys <d-cite key="nan2022people"></d-cite><d-cite key="van2022misinformation"></d-cite>. These studies have identified several psychological factors that influence individuals’ susceptibility to misinformation. For instance, analytical thinking (as opposed to intuitive thinking), trust in science, and positive emotions have been linked to a greater resistance to health misinformation. Conversely, susceptibility to health misinformation is associated with factors such as conspiracy thinking, religiosity, conservative ideology, and negative emotions. In this part, we analyze the correlation coefficients between our modeled susceptibility scores and the aforementioned factors to determine if our results align with previous research findings.</p> <p>To achieve this, we compute factor scores for each user in our dataset based on their historical tweets using LIWC Analysis. We calculate the average value across all the user’s historical tweets as the final factor score. However, for emotional factors such as anxiety and anger with less frequent appearance, we opt for the maximum value instead to more effectively capture these emotions. We primarily consider the following factors: <em>Analytic Thinking</em>, Emotions (<em>Positive</em> emotions, <em>Anxious</em>, <em>Angery</em> and <em>Sad</em>), <em>Swear</em>, <em>Political Leaning</em>, <em>Ethnicity</em>, <em>Technology</em>, <em>Religiosity</em>, <em>Illness</em> and <em>Wellness</em>. These factors have been extensively studied in previous works and can be inferred from a user’s historical tweets. We calculate and plot the Pearson correlation coefficients between each factor and the susceptibility predicted by our model in the following table.</p> <table> <thead> <tr> <th>Factors</th> <th>Coeff.</th> <th>Factors</th> <th>Coeff.</th> </tr> </thead> <tbody> <tr> <td>Analytic Thinking</td> <td>-0.31</td> <td>Emotion - Positive</td> <td>-0.08</td> </tr> <tr> <td>Political Leaning</td> <td>0.13</td> <td>Emotion - Anxious</td> <td>0.08</td> </tr> <tr> <td>Ethnicity</td> <td>0.09</td> <td>Emotion - Angry</td> <td>0.16</td> </tr> <tr> <td>Religiosity</td> <td>0.10</td> <td>Emotion - Sad</td> <td>0.14</td> </tr> <tr> <td>Technology</td> <td>-0.09</td> <td>Swear</td> <td>0.18</td> </tr> <tr> <td>Illness</td> <td>0.09</td> <td>Wellness</td> <td>-0.02</td> </tr> </tbody> </table> <p>According to our analysis, correlations are consistent with previous social science studies based on surveys on health susceptibility. For instance, <em>Analytic Thinking</em> is a strong indicator of low susceptibility, with a correlation coefficient of -0.31. Conversely, certain features such as <em>Swear</em>, <em>Political Leaning</em> and <em>Angry</em> exhibit a weak correlation with a high susceptibility score. These results not only corroborate the conclusions drawn from previous survey-based studies <d-cite key="van2022misinformation"></d-cite><d-cite key="nan2022people"></d-cite> but also provide further validation for the effectiveness of our computational modeling for susceptibility.</p> <h3 id="geographical-community-differences">Geographical Community Differences</h3> <p>We delve into the geographical distribution of susceptibility. Given the significant imbalance in the number of users from different U.S. states, we calculate the average susceptibility scores for each state using Bayesian smoothing. We use the overall mean susceptibility score and overall standard deviation as our priors and the more the users in the group, the less the overall mean will affect the group’s score.</p> <p>We explore the susceptibility distribution among different U.S. states, considering the influence of political ideology associated with different states <d-cite key="gelman2009red"></d-cite>. Out of the 100,000 users sampled from around the world, 25,653 users are from U.S. states with more than 200 users for each state. As illustrated in the figure below, the susceptibility distribution across U.S. states is imbalanced and exhibits a certain degree of correlation with political leanings, where generally, states known to have a more conservative population tend to have relatively higher susceptibility scores, while states that are considered more liberal have lower scores. Specifically, the average susceptibility score for users in blue or red states is -3.66 and -2.82 respectively. Red or blue states refer to US states whose voters vote predominantly for the Republican or Democratic Party. We determine blue/red states according to the 2020 presidential election result. We observe that 60% of the ten states with the highest susceptibility scores are red states, and 90% of the ten states with the lowest susceptibility scores are blue states. This is a trend that has been observed in various research where political ideology influences the perception of scientific information <d-cite key="mccright2013influence"></d-cite><d-cite key="baptista2021influence"></d-cite><d-cite key="imhoff2022conspiracy"></d-cite>. However, it is crucial to acknowledge the limitations of our analysis, as it solely reflects the susceptibility distribution of the sampled users within each state.</p> <div class="col-sm mt-3 mt-md-0"> <figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-suscep/usa-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-suscep/usa-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-suscep/usa-1400.webp"/> <img src="/staging/assets/img/2023-11-08-suscep/usa.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" data-zoomable="" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figcaption><strong>Susceptibility Distribution by U.S. State</strong> (with bayesian smoothing). We use the average susceptibility score in the United States (-2.87) as the threshold, with scores above it displayed in red, and those below it in blue. Due to space constraints and insufficient data points, we are only displaying data for 48 contiguous states within the U.S.</figcaption> </figure> </div> <h2 id="related-work">Related Work</h2> <h3 id="measure-of-susceptibility">Measure of Susceptibility</h3> <p>The common practice in measuring susceptibility involves collecting self-reported data on agreement or disagreement with verified false claims<d-cite key="Roozenbeek2020SusceptibilityMisinformationCOVID19"></d-cite>, <d-cite key="Escola-Gascon2021CriticalThinkingPredicts"></d-cite>, <d-cite key="Rosenzweig2021HappinessSurpriseAre"></d-cite>, <d-cite key="nan2022people"></d-cite>. Some studies assess susceptibility indirectly through its impact on behavior, but this approach fails to capture actual belief systems<d-cite key="Loomba2021MeasuringImpactCOVID19"></d-cite>. Our work proposes a computational model as a scalable alternative to expensive and limited self-reported beliefs.</p> <h3 id="contributing-factors-and-application-of-susceptibility">Contributing Factors and Application of Susceptibility</h3> <p>Research utilizing manually collected susceptibility annotations has explored various factors influencing susceptibility, such as emotion<d-cite key="Sharma2023SystematicReviewRelationship"></d-cite>, <d-cite key="Weeks2015EmotionsPartisanshipMisperceptions"></d-cite>, analytic thinking<d-cite key="Li2022EmotionAnalyticThinking"></d-cite>, partisan bias<d-cite key="Roozenbeek2022SusceptibilityMisinformationConsistent"></d-cite>, source credibility<d-cite key="Traberg2022BirdsFeatherAre"></d-cite>, and repetition<d-cite key="Foster2012RepetitionNotNumber"></d-cite>. Theories explaining susceptibility range from limited knowledge acquisition<d-cite key="Brashier2020AgingEraFake"></d-cite> to overconfidence<d-cite key="Salovich2021CanConfidenceHelp"></d-cite>. This understanding aids in applications like analyzing bot-driven misinformation spread<d-cite key="Himelein-Wachowiak2021BotsMisinformationSpread"></d-cite> and developing prebunking interventions<d-cite key="Roozenbeek2020PrebunkingInterventionsBased"></d-cite>, <d-cite key="Roozenbeek2022PsychologicalInoculationImproves"></d-cite>. However, the field lacks a computational model for large-scale susceptibility inference, which we address in our work.</p> <h3 id="inferring-unobservables-from-observables">Inferring Unobservables from Observables</h3> <p>Latent constructs, or variables that are not directly observable, are often inferred through models from observable variables<d-cite key="Bollen2002LatentVariablesPsychology"></d-cite>, <d-cite key="Borsboom2003TheoreticalStatusLatent"></d-cite>. Methods like nonlinear mixed-effects models and hidden Markov models are used for this purpose. In our approach, we utilize a neural network-based architecture to represent these latent variables, aiding in predicting observable variables.</p> <h2 id="conclusion">Conclusion</h2> <p>In this work, we propose a computational approach to model people’s <strong>unobservable</strong> susceptibility to misinformation. While previous research on susceptibility is heavily based on self-reported beliefs collected from questionnaire-based surveys, our model trained in a multi-task manner can approximate user’s susceptibility scores from their reposting behavior. When compared with human judgment, our model shows highly aligned predictions on a susceptibility comparison evaluation task. To demonstrate the potential of our computational model in extending the scope of previous misinformation-related studies, we leverage susceptibility scores generated by our model to analyze factors contributing to misinformation susceptibility. This thorough analysis encompasses a diverse U.S. population from various professional and geographical backgrounds. The results of our analysis algin, corroborate, and expand upon the conclusions drawn from previous survey-based computational social science studies.</p> <h2 id="limitations">Limitations</h2> <p>Besides investigating the underlying mechanism of misinformation propagation at a large scale, the susceptibility scores produced by our model have the potential to be used to visualize and interpret individual and community vulnerability in information propagation paths, identify users with high risks of believing in false claims and take preventative measures, and use as predictors for other human behavior such as following and sharing. However, while our research represents a significant step in modeling susceptibility to misinformation, several limitations should be acknowledged.</p> <p>First, our model provides insights into susceptibility based on the available data and the features we have incorporated. However, it’s important to recognize that various other factors, both individual and contextual, may influence susceptibility to misinformation. These factors, such as personal experiences and offline social interactions, have not been comprehensively incorporated into our model and should be considered in future research.</p> <p>Moreover, the susceptibility scores modeled by our model represent an estimation of an individual’s likelihood to engage with misinformation. These scores may not always align perfectly with real-world susceptibility levels. Actual susceptibility is a complex interplay of cognitive, psychological, and social factors that cannot be entirely captured through computational modeling. Our model should be seen as a valuable tool for understanding trends and patterns rather than providing definitive individual susceptibility assessments.</p> <p>Finally, our study’s findings are based on a specific dataset and may not be fully generalizable to all populations, platforms, or types of misinformation. For example, due to the high cost of data collection, not all countries or U.S. states have a sufficient amount of Twitter data available for analysis, especially when we examine the geographical distribution of susceptibility. Furthermore, platform-specific differences and variations in the types of misinformation can potentially impact the effectiveness of our model and the interpretation of susceptibility scores.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-suscep.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/time-series-lstm-transformer/index.html b/blog/2023/time-series-lstm-transformer/index.html
new file mode 100644
index 00000000..cd661c23
--- /dev/null
+++ b/blog/2023/time-series-lstm-transformer/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Predicting the Future: LSTM vs Transformers for Time Series Modeling | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="A comparison analysis between LSTM and Transformer models in the context of time-series forecasting. While LSTMs have long been a cornerstone, the advent of Transformers has sparked significant interest due to their attention mechanisms. In this study, we pinpoint which particular features of time series datasets could lead transformer-based models to outperform LSTM models."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/time-series-lstm-transformer/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Predicting the Future: LSTM vs Transformers for Time Series Modeling",
+      "description": "A comparison analysis between LSTM and Transformer models in the context of time-series forecasting. While LSTMs have long been a cornerstone, the advent of Transformers has sparked significant interest due to their attention mechanisms. In this study, we pinpoint which particular features of time series datasets could lead transformer-based models to outperform LSTM models.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Miranda Cai",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Roderick Huang",
+          "authorURL": "https://www.linkedin.com/in/rwxhuang/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Predicting the Future: LSTM vs Transformers for Time Series Modeling</h1> <p>A comparison analysis between LSTM and Transformer models in the context of time-series forecasting. While LSTMs have long been a cornerstone, the advent of Transformers has sparked significant interest due to their attention mechanisms. In this study, we pinpoint which particular features of time series datasets could lead transformer-based models to outperform LSTM models.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-work">Related Work</a></div> <ul> <li><a href="#effect-of-dataset-size">Effect of Dataset Size</a></li> <li><a href="#effect-of-noisy-datasets">Effect of Noisy Datasets</a></li> <li><a href="#effect-of-multi-step-prediction">Effect of Multi-step Prediction</a></li> </ul><div><a href="#methodology">Methodology</a></div> <div><a href="#experimental-results-and-discussion">Experimental Results and Discussion</a></div> <ul> <li><a href="#size-of-a-dataset">Size of a Dataset</a></li> <li><a href="#amount-of-noise-in-a-dataset">Amount of Noise in a Dataset</a></li> <li><a href="#prediction-size">Prediction Size</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h1 id="6s898-final-project---lstm-vs-transformers-for-time-series-modeling">6.S898 Final Project - LSTM vs Transformers for Time Series Modeling</h1> <p>By Miranda Cai and Roderick Huang</p> <div class="row"> <div style="text-align:center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/intro_photo_time_series.webp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/intro_photo_time_series.webp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/intro_photo_time_series.webp-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/intro_photo_time_series.webp" class="img-fluid rounded z-depth-1 w-100" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <h2 id="1-introduction">1. Introduction</h2> <p>In the context of time series forecasting, comparing Long Short-Term Memory (LSTM) networks to Transformers is a fascinating exploration into the evolution of deep learning architectures. Despite having distinct strengths and approaches, both LSTM and transformer models have revolutionized natural language processing (NLP) and sequential data tasks.</p> <p>LSTMs, with their recurrent structure, were pioneers in capturing long-range dependencies in sequential data. While the accuracy of such models have been shown to be quite effective in many applications, training LSTM models takes a relatively long time because of the fact that they must remember all past observances. One faster alternative to LSTM models are transformers. Transformers are able to remember only the important bits of inputs using an attention-mechanism, and is also parallelizable making it much faster to train than recursive LSTMs that must be processed sequentially.</p> <p>With its recent development, people have started opting to use transformer based models to solve sequence problems that once relied on LSTMs. One significant example is for NLP use cases, where transformers can process sentences as a whole rather than by individual words like LSTMs do. However, since transformers have been around for less than a decade, there are still many potential applications that are yet to be deeply explored. Thus, we will explore the effectiveness of transformers specifically for time series forecasting which finds applications across a wide spectrum of industries including finance, supply chain management, energy, etc.</p> <p>Our goal is to realize which particular features of time series datasets could lead transformer-based models to outperform LSTM models.</p> <h2 id="2-related-work">2. Related Work</h2> <p>With the growth of ChatGPT in the recent years, extensive research has been done across various NLP tasks such as language modeling, machine translation, sentiment analysis, and summarization, each aiming to provide comprehensive insights into when each architecture excels and where their limitations lie. While research on time series data exists, it hasn’t garnered as much attention, so we aim to broaden this area of study.</p> <h3 id="21-effect-of-dataset-size">2.1 Effect of Dataset Size</h3> <p>The size of a dataset plays an important role in the performance of an LSTM model versus a transformer model. A study <d-cite key="comparison"></d-cite> done in the NLP field compared a pre-trained BERT model with a bidirectional LSTM on different language dataset sizes. They experimentally showed that the LSTM accuracy was higher by 16.21% relative difference with 25% of the dataset versus 2.25% relative difference with 80% of the dataset. This makes sense since BERT is a robust transformer architecture that performs better with more data. As shown in the figure below from <d-cite key="comparison"></d-cite>, while LSTM outperformed BERT, the accuracy difference gets smaller as the perctange of training data used for training increases.</p> <div class="row mt-3"> <div class="col-sm mt-md-0 d-flex align-items-center justify-content-center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/dataset_size_research_fig.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>While we perform a similar methodology which is discussed further in section 4.1, the major difference is in the type of data we test. Instead of measuring classification accuracy for NLP tasks, this study measures the mean squared error (MSE) loss for regression time series data.</p> <h3 id="22-effect-of-noisy-datasets">2.2 Effect of Noisy Datasets</h3> <p>Theoretically, LSTMs are more robust to noisy data due to its ability to capture local dependencies. On the other hand, the self-attention mechanisms in transformers propagate errors and may struggle with sequences that have a high degree of noise. Electronic traders have been recently attempting to apply transformer models in financial time series prediction to beat LSTMs <d-cite key="trading"></d-cite>. Largely focused on type of assets, the research showed that transformer models have limited advantage in absolute price sequence prediction. In other scenarios like price difference and price movement, LSTMs had better performance.</p> <p>Financial data sets are known to be extremely noisy, and in addition, very hard to find due to their confidential nature. The application of <d-cite key="trading"></d-cite> gave inspiration to study how the “amount” of noisiness would affect the LSTM and transformer models. Discussed further in section 4.2, this study added various amounts of noise to a clean dataset to see how this would affect each architecture.</p> <h3 id="23-effect-of-multi-step-prediction">2.3 Effect of Multi-step Prediction</h3> <p>The last feature that we would like to look at between LSTMs and transformer models is forecasting length. Forecasting length describes how far into the future we would like our model to predict based on the input sequence length. One paper <d-cite key="multistep"></d-cite> done on short-term time series prediction finds that transformers were able to outperform LSTMs when it came to predicting over longer horizons. The transformer did better in all three cases when predicting one hour, twelve hours, and an entire day into the future. They accredit these results to the fact that attention better captured longer-term dependencies than recurrence did.</p> <p>Similarly to this paper, we will focus only on short-term forecasting. Short-term forecasting is important in situations like stock market predictions, where stock values show high volatility in the span of hours and may or may not have learnable trends over long periods of time.</p> <p>However, we would like to extend the results of this paper to learn to also look at multi-step prediction. This study trained models specifically to have a singular output, with each model being trained with outputs at the specified prediction horizon. Instead, we would look to train our models against outputs of different lengths. We thought it would be an interesting addition to output the entire sequence of data leading up to whatever period in the future, to give a better visualization of what actually happens as forecasting length increases.</p> <h2 id="3-methodology">3. Methodology</h2> <p>The dataset we will be using throughout this study is the Hourly Energy Consumption dataset that documents hourly energy consumption data in megawatts (MW) from the Eastern Interconnection grid system <d-cite key="dataset"></d-cite>.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/energy_dataset_split.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <h3 id="31-experiments">3.1 Experiments</h3> <p>We can utilize this dataset to predict energy consumption over the following features of a dataset.</p> <ul> <li> <p><strong>Size of a dataset</strong>: As discussed in Section 2.1 <d-cite key="comparison"></d-cite>, the size of a dataset played an impact in measuring classification accuracy for NLP tasks. Since the energy dataset is numerical, it’s important to test the same concept. We leveraged nearly 150,000 data points, progressively extracting subsets ranging from 10% to 90% of the dataset. For each subset, we trained the architectures, allowing us to explore their performance across varying data volumes.</p> </li> <li> <p><strong>Amount of noise in the dataset</strong>: As discussed in Section 2.2 <d-cite key="trading"></d-cite>, research was done to test LSTMs vs transformers on noisy stock data for various assets. We deemed the energy dataset to be relatively clean since it follows a predictable trend depending on the seasons of the year and time of the day. For example, there are higher energy levels during the winter and daytime hours. To test noise, we added incrementing levels of jittering / Gaussian noise <d-cite key="augmentations"></d-cite> to observe the effect of noisy data on LSTMs and transformers. Example augmentations with different variances are plotted below in blue against a portion of the original dataset in red.</p> </li> </ul> <div class="d-flex justify-content-center"> <div style="text-align:center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_0001.png" class="img-fluid rounded center z-depth-1 w-75" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_001.png" class="img-fluid rounded z-depth-1 w-75" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_003.png" class="img-fluid rounded z-depth-1 w-75" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noise_variance_008.png" class="img-fluid rounded z-depth-1 w-75" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <ul> <li><strong>Output size</strong>: As discussed in Section 2.3 <d-cite key="multistep"></d-cite>, there have been few studies measuring the effect of varying the forecasting length, and in the ones that do they still only output one class <em>at</em> the specified time into the future. In our novel experimentation, we aimed to generate an entire sequence of outputs <em>up until</em> the specified time into the future. We created models that would predict forecasting lengths of 10%, …, 100% of our input sequence length of 10. To do so, we set the output size of our models to be equal to these forecasting lengths. This involved removing any final dense or convolutional layers.</li> </ul> <h3 id="32-selected-architectures--setup">3.2 Selected Architectures &amp; Setup</h3> <p>Selecting the right architecture for LSTM (Long Short-Term Memory) networks hinged on several key considerations. The LSTM architecture is extended of the RNN to preserve information over many timesteps. Capturing long-range dependencies requires propagating information through a long chain of dependencies so old observations are forgotten, otherwise known as the <strong>vanishing/exploding gradient problem</strong>. LSTMs attempt to solve this problem by having separate memory to learn when to forget past or current dependencies. Visually, LSTMs look like the following <d-cite key="rnn_lstm"></d-cite>.</p> <div align="center" style="text-align:center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/rnn-lstm.png" class="img-fluid rounded z-depth-1 w-75" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <p>Transformers, initially designed for natural language processing, should work well for time series data. They operate by utilizing self-attention mechanisms, allowing them to capture long-range dependencies effectively. A transformer breaks down the input sequence into smaller, fixed-size segments known as tokens, representing various time steps or features. Through multiple layers of self-attention and feedforward operations, the transformer architecture should excel at capturing both short-term and long-term dependencies. A figure of transformer time series is shown below from <d-cite key="transformer_arch"></d-cite>.</p> <div align="center" style="text-align:center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_arch.png" class="img-fluid rounded z-depth-1 w-75" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <p>There were certain parameters that we kept fixed throughout all variations of our models. The first was training on batches of data with sequence length 10. Second, we trained all of our LSTM models for 500 epochs and all of our transformer models for 10 epochs. These numbers were chosen with some fine-tuning to yield meaningful results while also allowing the training for so many individual models to be done in a reasonable amount of time.</p> <p>Additionally, the data was normalized since the range of energy values was from 10000 Megawatts (MW) to 60000 MW. Normalizing the data improves convergence for gradient descent optimization and mitigates issues related to model regularization.</p> <h2 id="4-experimental-results-and-discussion">4. Experimental Results and Discussion</h2> <h3 id="41-size-of-a-dataset">4.1 Size of a Dataset</h3> <p>Given the energy consumption dataset described in Section 3, we trained and evaluated an LSTM model and transformer model on progressively increasing subsets ranging from 10% to 90% of the dataset. The figure below shows the normalized mean squared error (MSE) loss for each subset of the dataset.</p> <div class="row mt-3"> <div class="d-flex flex-column justify-content-center" style="text-align:center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_trans_dataset_size_res.png" class="rounded z-depth-1 w-50" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The experimental results show that transformers have an improving trend as the size of the dataset increases while the LSTM has an unclear trend. Regardless of the size of the training dataset, the LSTM doesn’t have a consistent result for the testing set.</p> <p>In an LSTM, there exist additional gates for a sequence of inputs $x^{(t)}$ where in addition to the sequence of hidden states $h^{(t)}$, we also have cell states $c^{(t)}$ for the aforementioned separate memory. While the LSTM architecture does provide an easier way to learn long-distance dependencies, it isn’t guaranteed to eradicate the vanishing/gradient problem discussed in Section 3.2. While the same is true for transformers, the transformer architecture addresses the vanishing/exploding gradient problem in a different way compared to LSTMs. Transformers use techniques like layer normalization, residual connections, and scaled dot-product attention to mitigate these problems.</p> <p>For time series dataset, the transformer architecture offers the benefit of the self-attention unit. In NLP, it’s typically used to compute similarity scores between words in a sentence. These attention mechanisms help capture relationships between different elements in a sequence, allowing them to learn dependencies regardless of their distance in the sequence. For time series data, transformers might offer advantages over LSTMs in certain scenarios, especially when dealing with longer sequences or when capturing complex relationships within the data such as seasonal changes in energy use.</p> <p>From a qualitative perspective, if we pull a subset of the test data to observe the predicted values from an LSTM vs a transformer for 40% of the training set, we have the following.</p> <p align="center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/test_set_pred_40.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </p> <p>While transformers did perform better than LSTMs, it’s not like the LSTM did a horrible job. We notice that at the peaks, the LSTM overshot more than the transformer and at the troughs, the LSTM undershot. However, overall, both architectures still had good results. In the context of the size of time series data, transformers do seem more promising given the loss figure above. It seems that LSTMs are losing that dependency on old observations while transformers are gaining ground as the size of the dataset increases. While <d-cite key="comparison"></d-cite> showed that bidirectional LSTM models achieved significantly higher results than a BERT model for NLP datasets,</p> <blockquote> <p>The performance of a model is dependent on the task and the data, and therefore before making a model choice, these factors should be taken into consideration instead of directly choosing the most popular model. - Ezen-Can 2020</p> </blockquote> <p>For this experiment, the outlook of large datasets in time series applications for the transformer architecture looks promising.</p> <h3 id="42-amount-of-noise-in-a-dataset">4.2 Amount of Noise in a Dataset</h3> <p>To test the performance of our models on simulated noisy data, we first trained our models on batches of the original clean dataset and then ran our evaluations on different levels of noisy data. Random noise was added according to Gaussian distributions with variances in <code class="language-plaintext highlighter-rouge">[0.0, 0.0001, 0.001, 0.002, 0.003, 0.005, 0.008, 0.01]</code> to create these data augmentations. Below is a comparison of the MSE loss for both models as a function of the injected noise variance.</p> <div style="text-align:center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/noisy_loss.png" class="img-fluid rounded z-depth-1 w-50" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <p>Since loss is not very descriptive in itself, we also visualize the model output for some of these augmented datasets. For each graph below, red is the true value while blue is predicted value.</p> <p align="center"> <table border="0"> <tr> <td><b style="font-size:15px">LSTM</b></td> <td><b style="font-size:15px">Transformer</b></td> </tr> <tr> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_0001.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> </tr> <tr> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_002.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_002.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> </tr> <tr> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_005.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_005.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> </tr> <tr> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_noisy_01.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_noisy_01.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> </tr> </table> </p> <p>Both models are shown to start off similarly, predicting very well with no noise. However, almost immediately we can see that the LSTM does not handle noise as well as the transformer. LSTM makes much noisier predictions with many more outliers. We suspect this occurs due to the implicit inductive bias of the memory feature in the LSTM module. Consider a time step $t$. The memory accrued up to point $t$ “weights” the data seen in recent past time steps $t-1$, $t-2$, $\ldots$, much more so than the data seen relatively long ago. While this is an intuitive design for memory, we can observe that this mechanism combines storing temporal information with token-specific information. In order to compete with a transformer, the LSTM model needs to be trained on significantly more data.</p> <p>The transformer on the other hand has the negative effects of its own inductive bias mitigated by its attention mechanism. Because the transformer has both a mechanism to account for temporal information and a mechanism to select the next associated token (attention module), and because they are separated, it is able to produce more “accurate” results.</p> <h3 id="43-prediction-size">4.3 Prediction Size</h3> <p>Finally, we created and trained separate models with varying numbers of output classes to represent the prediction size. We trained on output sizes as percentages of our input size, in increments of 10% from 0% to 100%. Because our input sequence was a constant 10 and our data is given in hourly intervals, these percentages translated to have prediction horizons of 1hr, 2hrs, …, 10hrs. Evaluating our models resulted in the following MSE loss trends.</p> <div style="text-align:center"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/prediction_size_loss.png" class="img-fluid rounded z-depth-1 w-50" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <p>Again, to get a better sense of why we see these results, we visualize the outputs. Since our outputs are sequences of data, to have a more clean visualization we plot only the last prediction in the sequence. For each graph below, red is the true value while blue is predicted value.</p> <p align="center"> <table border="0"> <tr> <td><b style="font-size:15px">LSTM</b></td> <td><b style="font-size:15px">Transformer</b></td> </tr> <tr> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_10.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_10.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> </tr> <tr> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_50.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_50.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> </tr> <tr> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_80.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_80.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> </tr> <tr> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/lstm_pred_100.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> <td><figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100-1400.webp"/> <img src="/staging/assets/img/2023-12-12-time-series-lstm-transformer/transformer_pred_100.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </td> </tr> </table> </p> <p>As we can see, the MSE loss of our transformer model increased at a slower rate than our LSTM model. After comparing the outputs of our models at these time steps, it becomes evident that this trend is due to the LSTM losing characteristic over time. Our transformer simply performs worse when it has to predict more as expected because the data is not perfectly periodic. However, we infer that the LSTM outputs get flatter over time because the more we accumulate memory through the long-term mechanism, the less weight each previous time step holds, diluting the total amount of information carried through the sequence. Transformers avoid this problem by using their attention mechanisms instead to keep only the important information throughout.</p> <h2 id="5-conclusion">5. Conclusion</h2> <p>Through the experiments tested in Section 4 (on dataset size, dataset noise, and prediction size), transformers seem to be a promising alternative to LSTMs for time series modeling due to their unique architecture, particularly suited for capturing long-range dependencies. Unlike LSTMs, transformers utilize self-attention mechanisms that allow them to consider relationships between all elements in a sequence simultaneously. This capability is especially advantageous in time series data, where capturing distant dependencies is crucial for accurate forecasting. Additionally, transformers mitigate vanishing gradient problems better than LSTMs, enabling more robust training on longer sequences.</p> <p>While transformers excel in parallel computation theoretically, one significant issue is the extensive memory requirements during training, especially with larger models or datasets. Transformers demand significant memory for storing attention matrices, limiting the batch size that can fit into GPU memory. So, for those who are finding an optimal architecture to train a time series dataset, one has to consider his or her own design priorities of accuracy and performance.</p> <p>All in all, the choice between LSTMs and transformers for time series datasets depends on the implementer’s design priorities and the task at hand. With some research showing LSTMs outperforming transformers and others such as our study showing the opposite, there is a clear need to dive deeper into the subject especially given the extensive number of applications for time series modeling.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-time-series-lstm-transformer.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/transfer-resistant-model-training/index.html b/blog/2023/transfer-resistant-model-training/index.html
new file mode 100644
index 00000000..026b10c5
--- /dev/null
+++ b/blog/2023/transfer-resistant-model-training/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Transfer Resistant Model Training | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="This blog post details our work on training neural networks that are resistant to transfer learning techniques."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/transfer-resistant-model-training/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Transfer Resistant Model Training",
+      "description": "This blog post details our work on training neural networks that are resistant to transfer learning techniques.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Ryan Yang",
+          "authorURL": "https://www.google.com/url?sa=i&url=https%3A%2F%2Fmanipulation.csail.mit.edu%2FFall2023%2Findex.html&psig=AOvVaw3MuJLCZwr7MxMiaaFQTBeC&ust=1699601771753000&source=images&cd=vfe&opi=89978449&ved=0CBIQjRxqFwoTCNil45C0toIDFQAAAAAdAAAAABAH",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Evan Seeyave",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Transfer Resistant Model Training</h1> <p>This blog post details our work on training neural networks that are resistant to transfer learning techniques.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction-and-motivation">Introduction and Motivation</a></div> <div><a href="#related-works">Related Works</a></div> <div><a href="#methods">Methods</a></div> <div><a href="#experiments">Experiments</a></div> <div><a href="#results">Results</a></div> <div><a href="#discussion">Discussion</a></div> <div><a href="#limitations-and-future-work">Limitations and Future Work</a></div> <div><a href="#conclusion">Conclusion</a></div> <div><a href="#appendix">Appendix</a></div> </nav> </d-contents> <h2 id="introduction-and-motivation">Introduction and Motivation</h2> <p>In transfer learning, a model is trained for a specific task and is then fine-tuned for a different task <d-cite key="zhuang2020comprehensive"></d-cite>. In doing so, one tries to best leverage and reuse features and performance of the large pre-trained model for other tasks. Many works have focused on making transfer learning more robust and efficient. Transfer learning can be very useful for saving compute resources, time, and money.</p> <p>In this project, we study an opposing question: how to learn model weights that classify well for one dataset but reduce learning efficiency when transferred to another. The motivation is as follows. As computational resources and capable models become more accessible, the risk of unregulated agents fine-tuning existing models increases, including for malicious tasks. Recent work has shown that previously aligned models can be compromised to produce malicious or harmful outputs <d-cite key="anonymous2023shadow"></d-cite> <d-cite key="qi2023finetuning"></d-cite>. This may even occur with a few adversarial examples against models specifically trained to produce safe outputs <d-cite key="lermen2023lora"></d-cite>. Currently, risks with language models are commonly discussed. However, investigating CNNs can guide designing defenses for neural network architectures against malicious agents in general.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/setting-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/setting-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/setting-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/setting.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>To our knowledge, there exists no previous literature on learning parameters robust against transfer learning. A related field is machine unlearning. In machine unlearning, a model must forget certain pieces of data used in training <d-cite key="cao2015towards"></d-cite> <d-cite key="10.1007/s42979-023-01767-4"></d-cite>. However, we wish to examine methods that not only guarantee poor performance after unlearning, but also after fine-tuning on the “malicious” or “forget” dataset. For example, using a popular unlearning approach which reaches 0% accuracy on the “forget” dataset, we easily fine-tuned the model with the same dataset to reach higher accuracy after a few epochs as shown below <d-cite key="tarun2023fast"></d-cite>. This is a gap in previous work in machine unlearning and demonstrates the novelty and difficulty of learning models that not only perform poorly on specified datasets but are robust against fine-tuning.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/machine_unlearning.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We propose two new approaches: selective knowledge distillation (SKD) and Reverse Model-Agnostic Meta-Learning (MAML) <d-cite key="finn2017model"></d-cite>. In SKD, a “student” model is trained using activations of a “teacher” for the beneficial data and trained on hardcoded activations for the “malicious” data. In Reverse-MAML, we attempt to learn parameters that aren’t robust to transfer to specified tasks. Due to computational constraints, we examine a toy setting with the CIFAR-10 Dataset as well as using a small CNN model shown in the appendix <d-cite key="krizhevsky2012imagenet"></d-cite>. Overall, both the Reverse-MAML and SKD approach exceed baseline approaches on scoring good accuracy on a “beneficial” dataset while being on-par with preventing fine-tuning on a “malicious” dataset. Thus, there remain limitations, and we conclude with future work.</p> <h2 id="related-works">Related Works</h2> <h3 id="1-transfer-learning">1. Transfer Learning</h3> <p>As mentioned previously, transfer learning has been a long-time objective in deep learning research <d-cite key="zhuang2020comprehensive"></d-cite> <d-cite key="raffel2020exploring"></d-cite>. By training a model on one dataset, the goal is to be able to reuse parameters and learned features to achieve high performance or efficient learning for another dataset. Transfer learning for convolutional neural networks has been a popular approach, allowing users to train a high-performance model with limited computational resources or data <d-cite key="zhuang2020comprehensive"></d-cite>. Further work has analyzed settings for successful transfer learning in image classification and further challenges when there is poor transfer <d-cite key="plested2022deep"></d-cite>.</p> <h3 id="2-model-agnostic-meta-learning-maml">2. Model-Agnostic Meta-Learning (MAML)</h3> <p>MAML is an algorithm that makes models readily adaptable to new tasks <d-cite key="finn2017model"></d-cite>. It essentially primes the model for transfer learning as effectively as possible. The algorithm attempts to learn parameters and model weights such that a few steps of gradient descent on learning a new task will lead to good performance on said new task. Further work has continued attempting to meta-learn useful model parameters, building off of MAML <d-cite key="goerttler2021exploring"></d-cite> <d-cite key="park2019meta"></d-cite>.</p> <h3 id="3-machine-unlearning">3. Machine Unlearning</h3> <p>A closely aligned question to ours is the problem of machine unlearning. Machine unlearning attempts to remove the influence of a set of data points on an already trained model. In this setting, a model is initially trained on some dataset <d-cite key="bourtoule2021machine"></d-cite> <d-cite key="cao2015towards"></d-cite> <d-cite key="10.1007/s42979-023-01767-4"></d-cite>. The model embeds information about and “remembers” features about data points. This means that the model uses information about the data points to make decisions. For example, LLMs like GPT can learn sensitive information about some people <d-cite key="wu2023unveiling"></d-cite>. This might pose a threat to privacy. We may want the model to “forget” some subset of the training set, in this case information about the people. However, we currently have no standardized method of doing this. Machine unlearning is a nascent field in artificial intelligence research and is currently being studied. It is a difficult problem, and our work is tangential to machine unlearning.</p> <p><br/> <br/></p> <p>To our knowledge, there hasn’t been any research on models that are resistant to transfer learning and fine-tuning. The works mentioned above, transfer learning techniques and MAML, focus on improving fine-tuning. We aim to make fine-tuning more difficult while preserving robustness on the original task. Machine unlearning seeks to forget data that the model has been previously trained on. On the other hand, our goal is to preemptively guard the model from learning certain data in the first place. Thus, our research question demonstrates a clear gap in existing research which has focused on either improving transfer learning or only reducing model performance on external datasets. Our research explores this new question in the deep learning field and draws from recent works to guide methodology.</p> <h2 id="methods">Methods</h2> <p>We propose three methods, one existing and two novel, to begin addressing the problem of learning parameters scoring high accuracy on a “beneficial” dataset but are robust against transfer learning on a known “malicious” dataset. Further experimental details are found in the experiments section.</p> <h3 id="1-machine-unlearning">1. Machine Unlearning</h3> <p>The first approach is a baseline and reimplementation of a popular machine unlearning method from <d-cite key="tarun2023fast"></d-cite>. Here, the model is initially trained on both the “beneficial” and “malicious” dataset and undergoes a forgetting stage where the “malicious” dataset is forgotten using a noise matrix. A final repair stage is then conducted to improve performance of the model on the “beneficial” dataset. Specific details can be found at <d-cite key="tarun2023fast"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/performance-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/performance-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/performance-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/performance.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="2-selective-knowledge-distillation">2. Selective Knowledge Distillation</h3> <p>Our first proposed novel approach is selective knowledge distillation (SKD) drawing inspiration from knowledge distillation. In knowledge distillation, a smaller “student” model is trained to imitate a larger “teacher” model by learning logits outputs from the “teacher” model. In doing so, the “student” model can hopefully achieve similar performance to the “teacher” model while reducing model size and complexity.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In SKD, we similarly have a “teacher” and “student” model. The “teacher” is a model that has high accuracy on the “beneficial” dataset but is not necessarily robust against fine-tuning on the “malicious” dataset. Our “student” model is almost identical in architecture to the “teacher” but excludes the final classification layer and the ReLU layer before it. This is shown below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_architecture.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Our goal is for the student model to have high performance on the “beneficial” dataset after adding a classification layer while being robust against fine-tuning on the “malicious” dataset. To perform SKD, we initially train the teacher model until reaching sufficiently high performance on the “beneficial” dataset.</p> <p>We then construct a dataset that contains all the images in the “beneficial” dataset. The labels are activations of the second-to-last layer of the “teacher” model. Note that this is similar to knowledge distillation, except we are taking the second-to-last layer’s activations. We further add all the images in the “malicious” dataset and set their labels to be a vector of significantly negative values. For our experiments, we used -100.0. We train the student model on this collective dataset of images and activation values.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_complex.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_complex.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_complex.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/teacher_student_complex.gif" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Finally, we add a fully-connected classification layer to the student model and backpropagate only on the added layer with the “beneficial” dataset.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/student-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/student-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/student-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/student.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Our end goal is to prevent fine-tuning of our CNN on the “malicious” dataset. Thus, if the student model can output activations that all are negative if the image belongs in the “malicious” dataset, then after appending the ReLU layer and setting biases of the second-to-last layer to 0, the inputs to the final classification layer will always be 0, reducing the ability to learn on the “malicious” dataset. Furthermore, the gradient will always be 0 on inputs from the “malicious” dataset so any backpropagating on images and labels originating from the “malicious” dataset from the final layer activations would be useless.</p> <h3 id="3-reverse-maml">3. Reverse-MAML</h3> <p>Recall that MAML is focused on finding some optimal set of model weights \(\theta\) such that running gradient descent on the model from a new few-shot learning task results in a \(\theta’\) that scores high accuracy on the new task <d-cite key="finn2017model"></d-cite>. MAML achieves this by learning the optimal \(\theta\). To learn this \(\theta\), MAML computes the second order gradient on the model weights. This allows the model to learn about where the initial \(\theta\) should have been before an iteration of gradient descent so that taking the step of gradient descent would have led to the minimal loss.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/MAML-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/MAML-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/MAML-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/MAML.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In our version, we attempt to learn a \(\theta\) that fine-tunes well to a data distribution \(p_1\) but fine-tunes poorly to distribution \(p_2\). To do this, we partition the data into two sets: a “good” set and a “bad” set. We train such that for “good” samples MAML performs the standard algorithm above, learning \(\theta\) that would fine-tune well to the “good” samples. However, for the “bad” set we train the model to do the opposite, learning a \(\theta\) that would lead to poor fine-tuning. To do this, when taking the second order gradient, the model goes up the gradient instead of down.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/reverse_MAML.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="experiments">Experiments</h2> <p>Due to computational constraints, we work in the following toy setting. We use the CIFAR-10 dataset where images in the first five ([0, 4]) classes are the “beneficial” dataset and the images in the last five ([5, 9]) classes are the “malicious” dataset. We split the 60,000 CIFAR-10 image dataset into a 40,000 image pre-training dataset, 10,000 image fine-tuning dataset, and 10,000 image test dataset. To evaluate each approach, we first evaluate the accuracy of the model on the beneficial test dataset. Then, we replace the last layer parameters of the output model, freeze all previous layer’s parameters, and finally fine-tune on the malicious fine-tuning dataset. We fine-tune using the Adam optimizer with a learning rate of 0.1 and momentum of 0.9. We finally evaluate model performance on a malicious test dataset. These steps in this evaluation represent the common pipeline to perform transfer learning and are shown below. Full hyperparameters for evaluation are listed in the appendix. We also perform ablation studies on the quality of the teacher model for SKD; further details are found in the Discussion section. All experiments, including ablations, are performed and averaged over 5 random seeds.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/pipeline-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/pipeline.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/evaluation-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/evaluation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="results">Results</h2> <p>The first evaluation metric is accuracy of the outputted model from each approach on beneficial data. This is shown in the figure below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/beneficial_accuracy.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 1 </div> <p>The second metric of evaluation is the accuracy of the output model from each approach on test malicious data as it’s being fine-tuned on fine-tune malicious data. This is shown with learning curves in the figure below. Note that lower accuracy is better.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/malicious_accuracy.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Figure 2 </div> <h2 id="discussion">Discussion</h2> <p>We observe that finding parameters that have high accuracy on a “beneficial” dataset but are robust against fine-tuning on a “malicious” dataset is challenging. On all three methods, including a popular machine unlearning approach, the model is able to somewhat fit to the “malicious” dataset. However, for SKD, this accuracy consistently does not significantly exceed 40%.</p> <p>More importantly, we find in Figure 1 that both Reverse-MAML and SKD are able score higher accuracy on the beneficial dataset. This is surprising as machine unlearning methods were designed to maintain high accuracy on a retain dataset. Combining these two graphs, we conclude that there remains future work to explain why the resulting models had such high accuracy on the malicious data out-of-the-box and how to minimize it.</p> <p>We also experimented with Reverse-MAML under the Omniglot dataset <d-cite key="lake2015human"></d-cite>. Here, we attempted to fine-tune on digit images. We found that Reverse-MAML performed very well in this setting. After training the Reverse-MAML model, the model held around 85% test accuracy on the “Beneficial” Omniglot dataset and around 20% on the “Malicious” digit dataset. On the digit set, the model would often predict the same digit for all samples, as shown below. We believe that Reverse-MAML performed better here because the Omniglot characters and the digits are simpler to interpret and learn specific features about compared to CIFAR-10.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/digits-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/digits-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/digits-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/digits.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> All digits were predicted to be a 2. </div> <p>Slow learning in SKD is likely caused by filtering by the ReLU activation function which causes activations to become 0. This ideally occurs when we train the student model to output negative activation values into the final classification layer if the input is from the “malicious” dataset. These values make it more difficult to learn useful weights for the final classification layer and apply gradient descent on earlier layers. We confirm this by measuring misses or the percent of “malicious” images that don’t result in all 0 activations into the final classification layer shown below. We show, in general, misses are low across different teacher models. For this ablation, we vary teacher models by the number of epochs they are trained.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/student_table-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/student_table-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/student_table-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/student_table.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We also measure how accuracy of the teacher model impacts performance of the student downstream. We vary the number of epochs the teacher model is trained in and report accuracies of the teacher model on the “beneficial” dataset below. More importantly, we empirically show that high teacher accuracy on the “beneficial” dataset is needed for the student to achieve high accuracy on the “beneficial” dataset. This follows our knowledge distillation framework as the student attempts to mimic the teacher model’s performance on the “beneficial” dataset by learning activation values.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/error_bounds.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="limitations-and-future-work">Limitations and Future Work</h2> <h3 id="1-requirement-for-malicious-data">1. Requirement for “Malicious” data</h3> <p>The motivating example for this project was preventing a malicious agent from hijacking a model to perform undesirable tasks. However, it is often not possible to list out every possible “bad” task, and thus future work which extends from this project can explore how to prevent fine-tuning of tasks that aren’t specified as clearly and completely.</p> <h3 id="2-computational-restraints">2. Computational Restraints</h3> <p>Due to computational restraints, we were unable to test or fine-tune models with significantly higher parameter counts or experiment with larger datasets. However, this remains an important step as transfer learning or fine-tuning is commonly applied on large models which we could not sufficiently investigate. Thus, future work can apply these existing methods on larger models and datasets.</p> <h3 id="3-exploration-of-more-methods-in-machine-unlearning-and-meta-learning">3. Exploration of More Methods in Machine Unlearning and Meta-Learning</h3> <p>Further analysis of existing methods in machine unlearning and meta-learning can be used to benchmark our proposed approaches. Though we tried to select methods that had significant impact and success in their respective problem settings, other approaches are promising, including using MAML variants like Reptile or FOMAML <d-cite key="DBLP:journals/corr/abs-1803-02999"></d-cite>.</p> <h3 id="4-imperfection-in-filtering-malicious-data-for-skd">4. Imperfection in filtering “malicious” data for SKD</h3> <p>Ideally, in SKD, the underlying model would always output negative activation values given a “malicious” input. However, this does not always occur, and thus fitting on the malicious data is still possible. Future work can explore how to improve this, though perfect accuracy will likely not be feasible. Furthermore, it is still possible for a malicious agent to hijack the model by performing distilled learning on the second-to-last layer activations, thus removing this ideal guarantee. Future work can also investigate how to have similar guarantees throughout all of the model’s activation layers instead of just one.</p> <h2 id="conclusion">Conclusion</h2> <p>In this project, we investigated how to train a model such that it performs well on a “beneficial” dataset but is robust against transfer learning on a “malicious” dataset. First, we show this is a challenging problem, as existing state of the art methods in machine unlearning are unable to prevent fine-tuning. We then propose two new approaches: Reverse-MAML and SKD. Both serve as a proof of concept with promising preliminary results on the CIFAR-10 Dataset. We conclude by noting there are limitations to this work, most notably the need for a “malicious” dataset and computational limits. We then propose future work stemming from these experiments.</p> <h2 id="appendix">Appendix</h2> <p>CNN Architectures used for experiments:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transfer-resistant-model-training/CNN_architectures.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <ul> <li>Note, all graphs and tables are averaged over 5 seeds with reported standard deviation.</li> </ul> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-transfer-resistant-model-training.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/transformer-elo-prediction/index.html b/blog/2023/transformer-elo-prediction/index.html
new file mode 100644
index 00000000..521b9484
--- /dev/null
+++ b/blog/2023/transformer-elo-prediction/index.html
@@ -0,0 +1,116 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Emoji3Vec | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Our project seeks to expand on the previous attempts at "emoji2vec", or generating semantically meaningful embeddings for emojis."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/transformer-elo-prediction/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Emoji3Vec",
+      "description": "Our project seeks to expand on the previous attempts at "emoji2vec", or generating semantically meaningful embeddings for emojis.",
+      "published": "November 10, 2023",
+      "authors": [
+        {
+          "author": "Anonymous",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Emoji3Vec</h1> <p>Our project seeks to expand on the previous attempts at "emoji2vec", or generating semantically meaningful embeddings for emojis.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background-and-related-work">Background and Related Work</a></div> <div><a href="#methods-and-results">Methods and Results</a></div> <ul> <li><a href="#training-emoji-embeddings-with-descriptions">Training Emoji Embeddings with Descriptions</a></li> <li><a href="#training-emoji-embeddings-with-twitter-data">Training Emoji Embeddings with Twitter Data</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p>In machine learning, models often create or learn internal representations for the inputs they are given. For instance, an image might become a vector containing the RGB data for every pixel. These internal representations are then processed and transformed until the model finally translates its representation into the desired output form (via softmax over all output possibilities, for example).</p> <p>The lower dimensional internal representations, known as embeddings, can often carry semantic meaning which can help us understand the data better. Inspired by word2vec, a project for learning embeddings for words, we attempt to learn embeddings for emojis that are semantically interpretable. Learning accurate representations is important for downstream tasks, for example: sentiment analysis and other kinds of classification run better with useful embeddings.</p> <h1 id="background-and-related-work">Background and Related Work</h1> <p>Although similar ideas have been explored in the past, we felt that there was still a gap in prior research: specifically, we wanted to create a lightweight model that still learned emoji embeddings directly from data and context.</p> <p>First, it is important to mention the influential and well known <a href="https://arxiv.org/pdf/1301.3781.pdf">Efficient Estimation of Word Representations in Vector Space</a>, commonly referred to as word2vec <d-cite key="mikolov2013word"></d-cite>. Word2vec was trained on a massive dataset of around 6 billion words, and was able to produce some very accurate embeddings that were proven to be useful in downstream tasks. For instance, doing the following arithmetic on the embeddings associated with each word produced: King - Man + Woman = Queen. This was an incredible result and inspired much work in the NLP domain in the following years.</p> <p>In 2016, a paper called <a href="https://arxiv.org/pdf/1609.08359.pdf">emoji2vec: Learning Emoji Representations from their Description</a> <d-cite key="eisner2016emoji"></d-cite> was published. As the name suggests, this paper sought to produce embeddings for emojis to be used in the same vector space as the word2vec embeddings, and attempted to do it by using emoji descriptions. The researchers trained their model with baseline embeddings taken directly from summing the word embeddings for each word in every emoji’s description. For instance, the embedding for “😍” began as the sum of the word embeddings (taken from word2vec) of: “smiling” + “face” + “with” + “heart” + “eyes”. The main benefit of this approach was a strong baseline that could be established without any training data. Recently, in 2021, another paper called <a href="https://bashthebuilder.github.io/files/Emojional.pdf">Emojional: Emoji Embeddings</a> <d-cite key="barry2021emojional"></d-cite> was published that extended this approach, adding in additional words (that are related, as judged by Google News) to each baseline embedding. For instance, “✊” was set to be the result of: “raised fist” + “resistance” + “black lives matter” + …</p> <p>After considering the above papers, we decided to create a model that would train similarly to word2vec (using naturally sourced data, and from context as opposed to a description) that also was able to produce useful embeddings on smaller amounts of data/memory/training time. Specifically, we felt that the descriptions would err when emojis began to mean different things than they are described as. For instance, the skull emoji is perhaps more often used to indicate embarassment or disagreement than actual death or skulls. This is addressed somewhat in the 2021 Emojional paper, but that one is very limited by the exact words it puts into each emoji’s embedding, and is less adaptable to new meanings. Further, we felt that there was value in creating a more lightweight model that was still able to produce meaningful representations, both to simply be easier to train and run and also to perhaps find optimizations that wouldn’t have been found if we had the option of just training on a larger set of data/training for a longer time.</p> <h1 id="methods-and-results">Methods and Results</h1> <p>We trained two sets of emoji embeddings to map emojis to the same 300-dimensional space as the one FastText uses for its word embeddings. The first was trained on a set of emoji descriptions, with the intention to learn emoji embeddings that reflect the literal appearances of each emoji. We closely follow the methodology as described in the emoji2vec paper to use as a baseline. The second was trained on a set of emoji-containing tweets, with the intention to learn emoji embeddings that reflect how they’re used online.</p> <h2 id="training-emoji-embeddings-with-descriptions">Training Emoji Embeddings with Descriptions</h2> <h3 id="data-cleaning">Data Cleaning</h3> <p>We started with a <a href="https://github.com/pwiercinski/emoji2vec_pytorch/blob/master/data/raw_training_data/emoji_joined.txt">dataset</a> of emoji descriptions from the Unicode emoji list. After cleaning, we were left with about 6000 descriptive phrases for 1661 emojis within a Python dictionary mapping emojis to various corresponding descriptions. Examples of entries include:</p> <ol> <li>‘🐏’: {‘horn’, ‘horns’, ‘male’, ‘ram’, ‘sheep’}</li> <li>‘🆘’: {‘distress signal’, ‘emergency’, ‘sos’, ‘squared sos’}</li> <li>‘👷’: {‘builder’, ‘construction worker’, ‘face with hat’, ‘safety helmet’}</li> </ol> <p>One detail is that we had to generate a bijective mapping between emojis and integers for model training. We encourage those attempting similar projects to save this mapping (in a pickle file, for example) for later use. Leon was very sad when he lost this mapping and couldn’t make sense of his first trained model’s outputted embeddings.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-transformer-elo-prediction/project-vis-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-transformer-elo-prediction/project-vis-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-transformer-elo-prediction/project-vis-1400.webp"/> <img src="/staging/assets/img/2023-11-10-transformer-elo-prediction/project-vis.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><em>a visualization of how we cleaned our data, from an example of a tweet</em></p> <h3 id="generating-training-and-test-data">Generating Training and Test Data</h3> <p>With a representation learning framework in mind, we randomly generated positive and negative descriptions for each emoji. We defined an emoji’s positive samples as descriptions that truly corresponded to the emoji, and we defined its negative samples as other descriptions in the dataset that weren’t used to describe the emoji. Guided by the emoji2vec paper, we generated positive and negative samples in a 1:1 ratio.</p> <h3 id="model-training">Model Training</h3> <p>After generating positive and negative samples, we used a pretrained FastText model to calculate the average of the embeddings of each word in each description. Put mathematically, if we let the sequence of words in a description be \(w_1, w_2, \dots, w_k\), the set of all strings be \(\mathcal{W}\), and the FastText model be expressed as a mapping \(f: \mathcal{W} \mapsto \mathbb{R}^{300}\), we calculated our description embeddings as</p> \[\frac{1}{k}\sum_{i=1}^kf(w_i).\] <p>This is a notable deviation from the methodology as described in the emoji2vec paper. Instead of using word2vec embeddings, we chose FastText because it uses sub-word tokenization and thus supports out-of-vocabulary strings as input. We also averaged the description embeddings instead of simply taking a summation to normalize for description length.</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>#creates a dictionary mapping descriptions to avg. word embeddings
+
+descr_to_embedding = dict()
+
+for descr in all_descriptions:
+	word_lst = descr.split(' ') #split description into list of words
+	embed_lst = []
+
+	for i in range(len(word_lst)): #repl. words by their embeddings
+		embed_lst.append(torch.tensor(ft[word_lst[i]]))
+	avg_embedding = torch.mean(torch.stack(embed_lst, dim=0), dim=0) #take mean over embeddings
+
+	descr_to_embedding[descr] = avg_embedding
+</code></pre></div></div> <p>We again followed the emoji2vec training methodology. For every emoji embedding \(x_i\) and description embedding \(v_i\), the authors model \(\sigma(x_i^T v_j)\) as the probability of the description matching with the emoji, where \(\sigma\) is the sigmoid function. Then our model minimizes the binary cross-entropy loss function</p> \[\mathcal{L}(x_i,v_j,y_{ij}) = -\log(\sigma(y_{ij}x_i^T v_j + (1-v_{ij})x_i^T v_j))\] <p>where \(y_{ij}\) is 1 when \(v_j\) is a positive sample and 1 otherwise.</p> <p>The authors don’t describe the exact model architecture used to learn the emoji embeddings, so we likely also deviate in methodology here. Our model is very simple: on some input emoji \(x_i\), we pass it through an nn.Embedding() module, compute \(\sigma(x_i^T v_j)\), and pass it to nn.BCELoss(). This way, the only learnable parameters in the model are in nn.Embedding(), and model training is as efficient as possible.</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code># the main model class
+# follows the Emoji2Vec training
+
+class  EmojiDict(nn.Module):
+
+def  __init__(self, n_emojis):
+	# n_emojis: the number of emojis we're learning representations of
+
+	super().__init__()
+	self.embedding = nn.Embedding(
+		num_embeddings = n_emojis,
+		embedding_dim = 300  # size of word2vec embedding
+	)
+	self.sigmoid = nn.Sigmoid()
+
+def  forward(self, x, sample):
+	# x: a batch of emoji indices, shape (B, )
+	# sample: a batch of avg'd embeddings, shape (B, 300)
+
+	x = self.embedding(x)
+
+	# performing a batched dot product
+	x = torch.unsqueeze(x, dim=1) #(B x 1 x 300)
+	sample = torch.unsqueeze(sample, dim=2) #(B x 300 x 1)
+	result = torch.bmm(x, sample) #(B x 1 x 1)
+	result = torch.flatten(result) #(B, )
+
+	result = self.sigmoid(result) #should output probabilities
+
+	return result #should be shape (B, )
+</code></pre></div></div> <h3 id="t-sne-on-learned-embeddings">t-SNE on Learned Embeddings</h3> <p>We trained the model for 60 epochs over a 80-20 train-test split of 250 positive and 250 negative samples for each emoji. We used an Adam optimizer with the default parameters, and model training took roughly an hour. The model achieved 0.19 logloss and 0.98 accuracy on a validation set.</p> <p>After the model was trained, we took emoji embedding weights from the model’s nn.Embedding() module and projected them down to two dimensions using t-SNE.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit-1400.webp"/> <img src="/staging/assets/img/2023-11-10-transformer-elo-prediction/emojidict-triplefit.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We can see that the model is excellent at grouping emojis that have similar appearances. Nearly all the faces are in the top-left, the zodiac symbols are in the bottom-left, the flags are at the bottom, the foods are on the right, the modes of transportation are in the top-right… the list can keep going. While there are some random emojis scattered about, similar emojis generally are similar in embedding space as well.</p> <h3 id="emoji-emoji-similarities">Emoji-Emoji Similarities</h3> <p>To confirm this idea quantitatively, we can fix individual emojis and look at its nearest neighbors in embedding space with cosine distance.</p> <table> <thead> <tr> <th>Emoji</th> <th>1-NN</th> <th>2-NN</th> <th>3-NN</th> <th>4-NN</th> <th>5-NN</th> <th>6-NN</th> <th>7-NN</th> <th>8-NN</th> <th>9-NN</th> <th>10-NN</th> </tr> </thead> <tbody> <tr> <td>😍</td> <td>💖</td> <td>😻</td> <td>😄</td> <td>😀</td> <td>😚</td> <td>💟</td> <td>😘</td> <td>😊</td> <td>😽</td> <td>💑</td> </tr> <tr> <td>😀</td> <td>😄</td> <td>😊</td> <td>😃</td> <td>🙂</td> <td>😑</td> <td>😁</td> <td>😸</td> <td>🤗</td> <td>😆</td> <td>🤧</td> </tr> <tr> <td>💀</td> <td>☠</td> <td>🆎</td> <td>🌫</td> <td>🐁</td> <td>⛓</td> <td>⛸</td> <td>🌮</td> <td>🦅</td> <td>⚖</td> <td>🐙</td> </tr> <tr> <td>🚀</td> <td>🛰</td> <td>👽</td> <td>🚡</td> <td>🛳</td> <td>📡</td> <td>🚢</td> <td>📋</td> <td>🚎</td> <td>🆚</td> <td>🛥</td> </tr> </tbody> </table> <p>We see here that the nearest neighbors also generally make sense. 😍’s nearest neighbors all involve love or positive emotions, and 🚀’s neighbors are generally about space or modes of transport. Interestingly, only 💀’s first neighbor seems remotely similar to it. We believe that this is just because death is a mostly unrepresented theme in emojis.</p> <h3 id="word-emoji-similarities">Word-Emoji Similarities</h3> <p>Since we trained emoji embeddings into the same space as the FastText word embeddings, we can also look at the nearest emoji neighbors to any English word!</p> <table> <thead> <tr> <th>Word</th> <th>1-NN</th> <th>2-NN</th> <th>3-NN</th> <th>4-NN</th> <th>5-NN</th> <th>6-NN</th> <th>7-NN</th> <th>8-NN</th> <th>9-NN</th> <th>10-NN</th> </tr> </thead> <tbody> <tr> <td>happy</td> <td>😃</td> <td>😺</td> <td>😌</td> <td>😹</td> <td>🏩</td> <td>😊</td> <td>💛</td> <td>😂</td> <td>😞</td> <td>😁</td> </tr> <tr> <td>sad</td> <td>😔</td> <td>😭</td> <td>😒</td> <td>🙁</td> <td>😟</td> <td>😞</td> <td>🙍</td> <td>😢</td> <td>😁</td> <td>😯</td> </tr> <tr> <td>lit</td> <td>🚨</td> <td>🕎</td> <td>🌆</td> <td>🔦</td> <td>📭</td> <td>🎇</td> <td>🕯</td> <td>💫</td> <td>🏥</td> <td>💡</td> </tr> <tr> <td>bitcoin</td> <td>💛</td> <td>🤑</td> <td>🎮</td> <td>💙</td> <td>🌈</td> <td>🤓</td> <td>📱</td> <td>📅</td> <td>🐰</td> <td>🍆</td> </tr> </tbody> </table> <p>Again, the nearest neighboring emojis generally make sense. Bitcoin’s nearest neighbors are considerably less accurate than the others. Since our nearest neighbors are much more accurate for other English words like “cryptocurrency”, we attribute this inaccuracy to FastText having poor embeddings for “Bitcoin”, which was much less popular word when FastText was trained (in 2015).</p> <p>One thing to note from these nearest-neighbor tables is that embeddings trained with the emoji2vec method take words very literally. “🚀” is related to space and transportation, and “lit” is related to things that literally light up. As such, these embeddings won’t adjust to semantic changes in emojis as slang develops and people become increasingly clever in their emoji use.</p> <h2 id="training-emoji-embeddings-with-twitter-data">Training Emoji Embeddings with Twitter Data</h2> <h3 id="data-cleaning-1">Data Cleaning</h3> <p>We started with a <a href="https://www.kaggle.com/datasets/rexhaif/emojifydata-en?select=test.txt">dataset</a> of emoji-containing tweets. Motivated by the data cleaning done in the emojiSpace paper, we remove duplicate tweets, numbers, hashtags, links, emails, and mentions. Then, we extract the “context” words and emojis around each emoji with a window size of 4 in both directions and tokenize it. We cleaned only a subsample of the tweets due to constraints on memory and compute. Even so, after cleaning, we were left with about 272,000 contexts for 1251 emojis. Examples of contexts for the emoji 🤑 include:</p> <ol> <li>(‘the’, ‘promotion’, ‘code’, ‘works’, ‘we’, ‘will’, ‘be’, ‘giving’)</li> <li>(‘my’, ‘grind’, ‘all’, ‘week’, ‘i’, ‘ain’t’, ‘been’, ‘getting’)</li> <li>(‘cash’, ‘in’, ‘on’, ‘sunday’, ‘thank’, ‘you’, ‘so’, ‘much’)</li> </ol> <h3 id="generating-training-and-test-data-1">Generating Training and Test Data</h3> <p>With a representation learning framework in mind, we randomly generated positive and negative descriptions for each emoji. We defined an emoji’s positive samples as descriptions that truly corresponded to the emoji, and we defined its negative samples as other descriptions in the dataset that weren’t used to describe the emoji. Guided by the emoji2vec paper, we generated positive and negative samples in a 1:1 ratio.</p> <p>As in the earlier model, we randomly generated positive and negative contexts for each emoji. We defined an emoji’s positive samples equivalently as before, but this time we used the set of all contexts across all emojis as the set of negative examples. Doing this is obviously not ideal, but it provided a huge performance boost when generating data. Additionally, with such a large dataset, drawing a positive sample as a negative one happens relatively infrequently.</p> <h3 id="model-training-1">Model Training</h3> <p>The training method we used for this model was nearly identical to that of the first model, and similar to the Continuous Bag-of-Words (CBOW) method for training word2vec. For every context, we calculated the average of the individual word embeddings using FastText. Often, another emoji would be part of the context; such emojis would be passed into the nn.Embedding() module as well to produce an embedding to be passed into the average. The model architecture remained nearly identical, and continued using binary cross-entropy loss as our loss function.</p> <p>Our model architecture differs somewhat from the original word2vec model, which uses a cross-entropy loss over the entire vocabulary of words as its loss function. While we may lose some expressivity by using binary cross-entropy instead, we believe that making this change made our model more lightweight and easier to train.</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code># the main model class
+# essentially a CBOW on emojis
+
+class  EmojiCBOW(nn.Module):
+
+	def  __init__(self, n_emojis):
+		# n_emojis: the number of emojis we're learning representations of
+
+		super().__init__()
+		self.embedding = nn.Embedding(
+			num_embeddings = n_emojis,
+			embedding_dim = 300  # size of word2vec embedding
+		)
+
+		self.sigmoid = nn.Sigmoid()
+
+	def  forward(self, x, embeddings, emojis, masks):
+		# x: a batch of emoji indices, shape (B, )
+		# embeddings: a batch of summed word embeddings from context, shape (B x 300)
+		# emojis: a batch of in-context emoji indices, with -1 as a placeholder, shape (B x 8)
+		# masks: a batch of masks for the relevant emoji indices, shape (B x 8)
+
+		x = self.embedding(x)
+
+		masks_unsqueezed = torch.unsqueeze(masks, dim=2) # get the dimensions right
+		emoji_embeddings = self.embedding(emojis * masks) * masks_unsqueezed # apply embeddings to emojis w/ mask applied, (B x 8 x 300)
+		emoji_embeddings = torch.sum(emoji_embeddings, dim=1) # sum acros embeddings, (B x 300)
+		tot_embeddings = embeddings + emoji_embeddings # (B x 300)
+		tot_embeddings = tot_embeddings / 8 # get avg embeddings, could help w/ numerical stability?
+
+		# performing a batched dot product
+		x = torch.unsqueeze(x, dim=1) #(B x 1 x 300)
+		tot_embeddings = torch.unsqueeze(tot_embeddings, dim=2) #(B x 300 x 1)
+
+		tot_embeddings = tot_embeddings.to(torch.float) / 8
+		result = torch.bmm(x, tot_embeddings) #(B x 1 x 1)
+		result = torch.flatten(result) #(B, )
+
+		result = self.sigmoid(result) #should output target probabilities
+
+		return result #should be shape (B, )
+</code></pre></div></div> <h3 id="t-sne-on-learned-embeddings-1">t-SNE on Learned Embeddings</h3> <p>We trained the model for 80 epochs over a 80-20 train-test split of 250 positive and 250 negative samples for each emoji. We used an Adam optimizer with the default parameters, and model training took roughly two hours. The model achieved 0.39 logloss and 0.79 accuracy on a validation set.</p> <p>After the model was trained, we took emoji embedding weights from the model’s nn.Embedding() module and projected them down to two dimensions using t-SNE.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e-1400.webp"/> <img src="/staging/assets/img/2023-11-10-transformer-elo-prediction/emojitweets-transfer-40e.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The model does reasonably well at clustering similar emojis together; as before, the flags, faces, and numbers are close together in embedding space. However, the quality of this clustering is noticeably worse than it was in the baseline model. We attribute this to the quality of the dataset and to the increased difficulty in the learning task. The emoji descriptions were clean, precise, and informative; tweets are generally none of those three. Additionally, learning embeddings from contexts has historically required a lot of training data and compute to perform successfully. We, however, only had the compute and memory to sample 500 tweets per emoji, which is only a tiny sample from the massive distribution of possible contexts that may surround any given emoji. Producing emoji embeddings that outperform the baseline model would require much more training data and time than what Colab offers.</p> <p>While these embeddings lose to the baseline embeddings in overall quality, they have certain properties that the baseline embeddings lack. Namely, since these embeddings were trained on a much more varied and organic dataset, they encode emoji use cases beyond what emojis literally mean. Specifically, they can learn from slang.</p> <h3 id="emoji-emoji-similarities-1">Emoji-Emoji Similarities</h3> <p>To illustrate this, we can look at the nearest neighbors of the same four emojis that were presented earlier. We narrow down our search to the top-200 most common emojis in our dataset because those were likely learned the best by our model.</p> <table> <thead> <tr> <th>Emoji</th> <th>1-NN</th> <th>2-NN</th> <th>3-NN</th> <th>4-NN</th> <th>5-NN</th> <th>6-NN</th> <th>7-NN</th> <th>8-NN</th> <th>9-NN</th> <th>10-NN</th> </tr> </thead> <tbody> <tr> <td>😍</td> <td>🏆</td> <td>💜</td> <td>🎉</td> <td>🇩🇪</td> <td>💘</td> <td>💖</td> <td>👑</td> <td>💞</td> <td>💪</td> <td>🇧🇷</td> </tr> <tr> <td>😀</td> <td>📚</td> <td>😆</td> <td>😏</td> <td>🎉</td> <td>😌</td> <td>😫</td> <td>🔗</td> <td>🙂</td> <td>⚡</td> <td>🇫🇷</td> </tr> <tr> <td>💀</td> <td>😭</td> <td>🍆</td> <td>😓</td> <td>🤤</td> <td>💔</td> <td>😩</td> <td>🐥</td> <td>😮</td> <td>🐻</td> <td>🍑</td> </tr> <tr> <td>🚀</td> <td>💸</td> <td>🔹</td> <td>💯</td> <td>🎯</td> <td>💵</td> <td>2️⃣</td> <td>👋</td> <td>💰</td> <td>😤</td> <td>😎</td> </tr> </tbody> </table> <p>We see here that the nearest neighbors for 😍 and 😀 are noticeably less intuitive than the ones in the baseline model, though some still make sense. Interestingly, however, 💀 has become more associated with strong emotions like 😭 and 😩. This correlates with the online slang “I’m dead,” which expresses a strong (could be both positive or negative) emotional response to something. Additionally, 🚀 has become more associated with money, which correlates with the use of 🚀 to indicate a stock or asset going “to the moon.”</p> <h3 id="word-emoji-similarities-1">Word-Emoji Similarities</h3> <p>We can also observe this phenomenon in the cosine similarities between words and emojis. We use the same words as above, and again we narrow our nearest neighbors search to the top 200 most popular emojis.</p> <table> <thead> <tr> <th>Word</th> <th>1-NN</th> <th>2-NN</th> <th>3-NN</th> <th>4-NN</th> <th>5-NN</th> <th>6-NN</th> <th>7-NN</th> <th>8-NN</th> <th>9-NN</th> <th>10-NN</th> </tr> </thead> <tbody> <tr> <td>happy</td> <td>😃</td> <td>😺</td> <td>😌</td> <td>😹</td> <td>🏩</td> <td>😊</td> <td>💛</td> <td>😂</td> <td>😞</td> <td>😁</td> </tr> <tr> <td>sad</td> <td>😒</td> <td>😻</td> <td>👏</td> <td>😥</td> <td>😭</td> <td>😓</td> <td>😣</td> <td>😔</td> <td>😂</td> <td>😪</td> </tr> <tr> <td>lit</td> <td>🔥</td> <td>🚨</td> <td>😍</td> <td>✅</td> <td>😎</td> <td>💯</td> <td>💣</td> <td>🇺🇸</td> <td>🗣</td> <td>💫</td> </tr> <tr> <td>bitcoin</td> <td>💵</td> <td>🎉</td> <td>😱</td> <td>💸</td> <td>🤑</td> <td>🔹</td> <td>🇮🇳</td> <td>🍃</td> <td>😆</td> <td>🌊</td> </tr> </tbody> </table> <p>As before, the nearest neighboring emojis generally make sense, but are less accurate than the neighbors in the baseline model. At the same time, the nearest neighbors now align more closely with slang (or “new” words like bitcoin). “Lit” now is more related to a feeling of firm agreement, and “bitcoin” is now more related to money. In both cases, the nearest neighbors align more with the words’ common usages than their literal meanings.</p> <h1 id="conclusion">Conclusion</h1> <h2 id="future-work">Future Work</h2> <p>Given the time and computational constraints we had for this project, we had to pass on many paths for future exploration. We list a few in this section.</p> <ol> <li> <p>We would’ve liked to train our second model for much longer on a much larger dataset of tweets. Only about 400 of our emojis had over 50 tweets associated with them. This greatly restricted their positive sample sets, which likely resulted in far-from-optimal emoji embeddings.</p> </li> <li> <p>We also considered training a more expressive neural architecture for our second model. One word2vec CBOW <a href="https://towardsdatascience.com/word2vec-with-pytorch-implementing-original-paper-2cd7040120b0">implementation</a> we found used a Linear layer after the Embedding layer. It projected the 300-dimensional embeddings into embeddings with dimensionality equal to the size of the emoji vocabulary to learn embeddings via a multi-class classification problem. We ultimately decided against using such a model because we doubted that we had the time, data, and compute to train a more complex model.</p> </li> <li> <p>Something we realized towards the end of our model training was that the embeddings from the first model could be used to inform training on our second model. It would be interesting to see if transfer learning could result in increased performance for our second model, especially since many emojis were underrepresented in our dataset of tweets.</p> </li> </ol> <h2 id="discussion">Discussion</h2> <p>Overall, despite the limitations, our lightweight model achieved reasonable accuracy with less than optimal conditions. One other challenge we faced had to do with Colab’s memory constraints: we were only able to train on a small set of data and were forced to generate positive and negative pairs over and over from the same set. Given a larger and more diverse set of positive/negative pairs, we believe our model could have performed even better.</p> <p>Furthermore, we felt that our CBOW model definitely could add value for people solving downstream tasks, such as sentiment analysis. The emoji2vec model of summing the emoji’s description’s word embeddings is useful when there are few datapoints for each emoji, but the CBOW approach captures more subtle meanings and is much more accurate to how people actually use emojis in their day to day life—both have their merits.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-10-transformer-elo-prediction.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/transformer-time/index.html b/blog/2023/transformer-time/index.html
new file mode 100644
index 00000000..8e67cc59
--- /dev/null
+++ b/blog/2023/transformer-time/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>A Comparative Study of transformer on long sequence time series data | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="This study evaluates Transformer models in traffic flow prediction. Focusing on long sequence time-series data, it evaluates the balance between computational efficiency and accuracy, suggesting potential combinations of methods for improved forecasting."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/transformer-time/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "A Comparative Study of transformer on long sequence time series data",
+      "description": "This study evaluates Transformer models in traffic flow prediction. Focusing on long sequence time-series data, it evaluates the balance between computational efficiency and accuracy, suggesting potential combinations of methods for improved forecasting.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Jie Fan",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>A Comparative Study of transformer on long sequence time series data</h1> <p>This study evaluates Transformer models in traffic flow prediction. Focusing on long sequence time-series data, it evaluates the balance between computational efficiency and accuracy, suggesting potential combinations of methods for improved forecasting.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#abstract">Abstract</a></div> <div><a href="#introduction">Introduction</a></div> <div><a href="#methodology">Methodology</a></div> <div><a href="#experiments">Experiments</a></div> <ul> <li><a href="#dataset">Dataset</a></li> <li><a href="#experimental-setting">Experimental setting</a></li> </ul><div><a href="#result">Result</a></div> <div><a href="#conclusion-and-discussion">Conclusion and Discussion</a></div> </nav> </d-contents> <h2 id="abstract">Abstract</h2> <p>This research means to discover the power of transformer in dealing with time series data, for instance traffic flow. Transformer with multihead self-attention mechanism is well-suited for the task like traffic prediction as it can weight the importance of various aspects in the traffic data sequence, capturing both long-term dependencies and short-term patterns. Compared to the LSTM, the transformer owns the power of parallelization, which is more efficient when facing a large dataset. And it can capture the dependencies better with long sequences. However, the transformer may have trouble dealing with the long sequence time-series data due to the heavy computation. This research compares differnt methods that make use of the information redundancy and their combination from the perspective of computational efficiency and prediction accuracy.</p> <h2 id="introduction">Introduction</h2> <p>The time series data processing and prediction are usually conducted with RNN and LSTM. In the case of traffic prediction, CNN and GNN are combined for efficiently capturing spatial and temporal information. And LSTM is widely used as its better performance on capturing temporal dependencies. While recent studies have propsed to replace RNNs with Transformer architecture as it is more efficient and able to capture sequantial dependencies. However, the model is inapplicable when facing long sequence time-series data due to quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. <d-cite key="Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021"></d-cite></p> <p>Not all time series are predictable, the ones that is feasible to be better forecasted should contain cyclic or periodic patterns. <d-cite key="Zeng_Chen_Zhang_Xu_2023"></d-cite> It indicates that there are redundant information in the long sequence data. The coundary of the redundancy can be measured by the optimal masking ratio of using MAE to process the dataset. Natural images are more information-redundant than languages and thus the optimal masking ratio is higher. BERT<d-cite key="devlin2019bert"></d-cite> uses a masking ratio of 15% for language, MAE<d-cite key="He_2022_CVPR"></d-cite> uses 75% for image and the optimal ratio for video is up to 90%.<d-cite key="feichtenhofer2022masked"></d-cite> Traffic data is potentially redundant. It contains temporal and spatial information so that neighbor sensors can provide extra information in addition to temporal consistency. We inducted that the optimal ratio for traffic data should be located between image and video. As it has multidimensional information than image and the speed captured by sensors is not as consistent as the frames in videos. We use the GRIN<d-cite key="cini2022filling"></d-cite> model to mask the inputdata using Metr_LA dataset to test the redundancy of traffic data. The results show that it is tolerant when the masking ratio is lower than 90%. Then there is the possibility of using distilling operation to compress information, reducing computational requirement and memory usage. Similar to traffic data, most of the time series data are multivariate.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformer-time/GRIN-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformer-time/GRIN-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformer-time/GRIN-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformer-time/GRIN.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Table 1: Performance comparison with baseline models and GRIN<d-cite key="cini2022filling"></d-cite> with various masking ratio. (by Tinus A,Jie F, Yiwei L) </div> <h2 id="methodology">Methodology</h2> <p>The information redundancy leads to the common solutions of using transformer to deal with long sequence time-series forecasting(LSTF) problems, where models focus more on valuable datapoints to extract time-series features. Notable models are focsing on the less explored and challenging long-term time series forecasting(LTSF) problem, include Log- Trans, Informer, Autoformer, Pyraformer, Triformer and the recent FEDformer. <d-cite key="Zeng_Chen_Zhang_Xu_2023"></d-cite> There are several main solutions:</p> <p><strong>Data decomposition</strong>. Data decomposition refers to the process of breakking down a complex dataset into simpler, manageable components. Autoformer <d-cite key="wu2021autoformer"></d-cite> first applies seasonal-trend decomposition behind each neural block, which is a standard method in time series analysis to make raw data more predictable <d-cite key="cleveland1990stl"></d-cite>. Specifically, they use a moving average kernel on the input sequence to extract the trend-cyclical component of the time series. The difference between the original sequence and the trend component is regarded as the seasonal component. <d-cite key="Zeng_Chen_Zhang_Xu_2023"></d-cite></p> <p><strong>Learning time trend</strong>. Positional embeddings are widely used in transformer architecture to capture spatial information. <d-cite key="feichtenhofer2022masked"></d-cite> Moreover, additional position embeddings can help the model to understand the periodicity inherented in traffic data, which implies applying the relative or global positioin encoding interms of weeks and days. <d-cite key="https://doi.org/10.1111/tgis.12644"></d-cite></p> <p><strong>Distillation</strong>. The Informer model applies ProbSparse self-attention mechanism to let each key to only attend to several dominant queries and then use the distilling operation to deal with the redundance. The operation privileges the superior ones with dominaitng features and make a focused self-attention feature map in the next layer, which trims the input’s time dimension.<d-cite key="Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021"></d-cite></p> <p><strong>Patching</strong>. As proposed in ViT<d-cite key="DBLP:journals/corr/abs-2010-11929"></d-cite>, the patch embeddings are small segments of an input image, which transfer the 2D image to 1D sequence. Each patch contains partial information of the image and additional positional embedding helps the transformer to understand the order of a series of patch embeddings. In the case of time series, though it is 1D sequence that can be received by standard transformer, the self-attention may not efficiently capture the long dependencies and cause heavy computation. Hence, dealing with time-series data, patching is used to understand the temporal correlation between data in a time-step interval. Unlike point-wise input tokens, it enhances the locality and captures the comprehensive semantic information in different time steps by aggregating times steps into subseries-level patches. <d-cite key="nie2023time"></d-cite></p> <h2 id="experiment">Experiment</h2> <h3 id="dataset">Dataset</h3> <p>We used a multivariate traffic<d-footnote>https://pems.dot.ca.gov/</d-footnote> dataset that records the road occupancy rates from different sensors on San Francisco freeways. We selected first 100 censors as our experiment dataset.</p> <h3 id="experimental-settings">Experimental Settings</h3> <p>We choose two models, Informer<d-cite key="Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021"></d-cite> and PatchTST(supervised) <d-cite key="nie2023time"></d-cite> to test the influence of distillation, positional embeddings, patching and data decomposition. For the implementation of Informer and PatchTST, we used the code provided by the authors.<d-footnote>https://github.com/yuqinie98/patchtst</d-footnote>. We mean to compare different methods that aim to efficiently explore on long sequence data, considering both efficiency and accuracy. This leads to a discussion about the trade off when using these models to solve real life cases and the possibility of improving or combing different methods.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformer-time/Informer-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformer-time/Informer-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformer-time/Informer-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformer-time/Informer.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 1: Informer architecture.<d-cite key="Zhou_Zhang_Peng_Zhang_Li_Xiong_Zhang_2021"></d-cite> </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformer-time/PatchTST-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformer-time/PatchTST-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformer-time/PatchTST-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformer-time/PatchTST.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 2: PatchTST architecture.<d-cite key="nie2023time"></d-cite> </div> <p>Setting 1. Compare efficieny and accuracy of distillation and patching. All the models are following the same setup, using 10 epochs and batch size 12 with input length \(\in\) {96,192,336,720} and predictioin length \(\in\) {96,192,336,720}. The performance and cost time is listed in the table 2.</p> <p>Setting 2. Explore the influence of data decomposition. We slightly change the setup to compare different methods. We apply the data decomposition with PatchTST to explore the significance of these techniques.</p> <h2 id="result">Result</h2> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformer-time/test1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformer-time/test1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformer-time/test1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformer-time/test1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Table 2: Setting 1. Traffic forecasting result with Informer and supervised PatchTST. Input length in {96,192,336,720} and predictioin length in {96,192,336,720}. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformer-time/1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformer-time/1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformer-time/1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformer-time/1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Figure 3: Setting 1. Traffic forecasting result with Informer and supervised PatchTST. Input length in {96,192,336,720} and predictioin length = 720. </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformer-time/test2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformer-time/test2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformer-time/test2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformer-time/test2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Table 3: Setting 2.Traffic forecasting result with supervised PatchTST, with and without data decomposition. Input length = 336 and predictioin length in {96,192,336,720}. </div> <p>Sufficiency. According to Table 2. The Informer(ProbSparse self-attention, distilling operation,positional embedding) is generally more sufficient than PatchTST(patching, positional embedding). Especially with the increase of input sequence, Informer with idstilling operation can forecast in significantly less time comparing to patching method. Across differnt prediction sequence length, PatchTST does have much difference and Informer tends to cost more time with longer prediction. According to table 3, with data decomposition, PatchTST spends more time while does not achieve significant better performance.</p> <p>Accuracy. According to Table 2. In all scenarios, the performance of PatchTST is better than Informer considering the prediction accuracy. Along with the increase of input sequence length, PatchTST tends to have better accuracy while Informer stays stable.</p> <p>Overall, we can induct from the design of two models about their performances. Informer is able to save more time with distilling operation and PatchTST can get better accuracy with the capture of local and global information. Though patch embeddings help the model to get better accuracy with prediction task, it achieves so at the expense of consuming significant amount of time. When the input sequence is 720, PatchTST takes more than twice as long as B.</p> <h2 id="conclusion-and-discussion">Conclusion and Discussion</h2> <p>Based on existing models, different measures can be combined to balance the time consumed for forecasting with the accuracy that can be achieved. Due to time constraints, this study did not have the opportunity to combine additional measures for comparison. We hope to continue the research afterward and compare these performances.</p> <p>In addition to applying transformer architecture alone, a combination of various methods or framework may help us to benefit from the advantages of different models. The transformer-based framwork for multivariate time series representation lerning is proposed by George et al. <d-cite key="DBLP:journals/corr/abs-2010-02803"></d-cite> The Spatial-Temporal Graph Neural Networks(STGNNs) is another widely used model in traffic prediction, which only consider short-term data. The STEP model is propsde to enhance STGNN with a scalable time series pre-training mode. In the pre-training stage. They split very long-term time series into segments and feed them into TSFormer, which is trained via the masked autoencoding strategy. And then in the forecasting stage. They enhance the downstream STGNN based on the segment-level representations of the pre-trained TSFormer.<d-cite key="10.1145/3534678.3539396"></d-cite></p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-transformer-time.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/transformers-as-gamers/index.html b/blog/2023/transformers-as-gamers/index.html
new file mode 100644
index 00000000..f16a6dc9
--- /dev/null
+++ b/blog/2023/transformers-as-gamers/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Learning Generals.io | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="We explore the application of deep learning to the online game generals.io and discuss what is necessary to achieve superhuman performance in generals.io."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/transformers-as-gamers/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Learning Generals.io",
+      "description": "We explore the application of deep learning to the online game generals.io and discuss what is necessary to achieve superhuman performance in generals.io.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Ethan Yang",
+          "authorURL": "https://www.ethany.dev",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Learning Generals.io</h1> <p>We explore the application of deep learning to the online game generals.io and discuss what is necessary to achieve superhuman performance in generals.io.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> </nav> </d-contents> <h1 id="introduction">Introduction</h1> <p><a href="https://generals.io">Generals.io</a> is a real-time turn-based strategy game. In generals.io, two players with a “general”, denoted with a crown, spawn on a board with mountains and cities scattered. Initially, players have no knowledge of other parts of the board besides the tiles immediately surrounding their general. Armies are the main resource of the game, which generate slowly from ordinary tiles, but quickly from the general and cities. Using armies, players compete to capture terrain and cities, which also grants further vision of the board. On each turn, a player is able to click on a cell with their army and use the keyboard to move it in the four cardinal directions. The goal of the game is for the player to use their army to capture the tile of their opponent’s general.</p> <p>A typical game state will look like the following:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformers-as-gamers/generals_pomdp.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/generals-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/generals-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/generals-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformers-as-gamers/generals.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The left image shows what the game play screen while playing from the perspective of red. Red only is able to see tiles adjacent to it, and every other tile is covered in a fog of war. The right image lifts the fog of war and shows the perspective of the spectator, and we can see where blue's general is located, as well as unclaimed cities around the edge of the map. </div> <p>Generals.io has a modest daily player base and has had attempts to implement bots to play against humans. Currently, no bots have been able to defeat top humans consistently. The top bots, such as <a href="https://github.com/EklipZgit/generals-bot">this one</a>, are implemented using rule-based logic. They achieve human-level performance and are able to win some games against the top 10 ranked players. Previous machine-learning based bots have attempted to use a CNN LSTM in the model architecture, such as <a href="https://yilundu.github.io/2017/09/05/A3C-and-Policy-Bot-on-Generals.io.html">this post by Yilun Du</a>. He separately evaluates a supervised learning approach and a reinforcement learning approach. His supervised learning approach reaches a competent level of play and is able to expand while having an awareness of needing to defend. However, it is very inefficient and makes basic strategic mistakes, such as running army into cities without fully taking them. The reinforcement learning approach was trained using A3C from scratch, but it was not able to learn beyond random movements.</p> <p>I set out to build on Yilun’s work and improve the bot’s performance, as well as explore and document what details are actually important for improvement.</p> <h1 id="related-work-and-why-generals">Related Work and Why Generals</h1> <p>Deep learning has already been used to conquer many games, achieving either human-level or superhuman-level performance. The pattern for most games has been to use deep reinforcement learning at enormous scale through self-play. There has been success in chess, Go<d-cite key="alphazero"></d-cite>, Dota 2<d-cite key="dota2"></d-cite>, and Starcraft<d-cite key="star"></d-cite>.</p> <p>While games in higher complexity have already been defeated by deep learning, the experimentation is often quite opaque, as there are too many decisions that are made to be worthy of reporting on. Furthermore, the games and methods are often way too large for a single researcher to reproduce. For example, OpenAI Five was only able to beat Dota 2 pros after training for ten months, using 770 PFlops/s-days. Generals.io allows for more accessible experimentation through its smaller size and open data pipeline for replays.</p> <p>I think there are still insights to be gained in defeating generals.io. In particular, the game comes with a combination of challenges that aren’t clearly addressed by previous approaches:</p> <ol> <li>The game is requires a high degree of calculation and precision, as well as strong intuition. Similar to chess, certain parts of the game are more intuitive and positional, and certain parts require searching through possibilities to calculate precisely. In generals.io, the precision mostly comes from being maximally efficient in the opening, as well as calculating distances relative to opponents army. This would suggest that some kind of model needs to search in order to achieve superhuman performance.</li> <li>The game is partially observable. This prevents approaches used in perfect information games such as Monte Carlo Tree Search, as we need to form belief states over the opponents state.</li> <li>The state and action space is enormous, and it requires planning on long time horizons. Games such as poker satisfy both of the above two bullet points, but it was able to be tackled with approaches such as counterfactual regret minimization after bucketing the state and action space<d-cite key="dosovitskiy2021image"></d-cite>. Bucketing the state and action space likely won't work for generals.io, nor will an approach like CFR work.</li> </ol> <h1 id="methods">Methods</h1> <p>Formally, generals.io can be represented as a POMDP. The underlying state, which is the state of the whole board, can only be observed at tiles that are adjacent to tiles claimed by the player. </p> <p>A wealth of data (over 500,000 games, each containing hundreds of state-action pairs) are available via human replays. We use imitation learning to try to learn from the replays. Concretely, the problem can be modeled as selecting parameters \(\theta\) of a policy \(\pi\) (a neural network) to maximize the log likelihood of the dataset \(D\):</p> \[\max_\theta \sum_{(s,a)\sim D} \log \pi_\theta(a | s)\] <p>I used <a href="https://github.com/vzhou842/generals.io-Replay-Utils">existing tools</a> in order to convert the replays into a json format that could then be parsed. I then adapted Yilun’s code, which no longer directly works, in order to simulate the replays to construct the dataset. To start, I only used 1000 replays of highly ranked players to construct my dataset.</p> <p>I started mostly with Yilun’s features, with small modifications:</p> <table> <thead> <tr> <th style="text-align: center">Channel</th> <th>Description</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">0</td> <td>friendly army values</td> </tr> <tr> <td style="text-align: center">1</td> <td>enemy army values</td> </tr> <tr> <td style="text-align: center">2</td> <td>boolean indicators for mountains and cities</td> </tr> <tr> <td style="text-align: center">3</td> <td>unclaimed city army values</td> </tr> <tr> <td style="text-align: center">4</td> <td>friendly city army values</td> </tr> <tr> <td style="text-align: center">5</td> <td>enemy city army values</td> </tr> <tr> <td style="text-align: center">6</td> <td>boolean indicator for mountains</td> </tr> <tr> <td style="text-align: center">7</td> <td>boolean indicator for friendly and enemy general (if found)</td> </tr> <tr> <td style="text-align: center">8</td> <td>boolean indicator for fog of war</td> </tr> <tr> <td style="text-align: center">9</td> <td>(turn number % 50)/50</td> </tr> </tbody> </table> <p>The features made a lot of sense to me as a generals player - it’s all the information I use to play. I removed Yilun’s last feature since a new replay standard made it impossible to compute.</p> <p>Yilun used a CNN LSTM as his architecture. In order to keep it simple and evaluate the basic components that improve performance, I removed the memory and only used a simple fully convolutional net with 5 stacked 5x5 filters.</p> <p>Policies were evaluated by coding a small bot in the recently released <a href="https://corsaircoalition.github.io/">botting framework</a> for generals. The bot sampled from the policy’s distribution over legal moves. Two policies were able to go head to head through this framework, and I could queue 10 games in order to get good estimates for the relative strength between the bots.</p> <p>I’ll now describe some of the changes I tried and give an analysis of the results of each change.</p> <h2 id="effects-of-more-data">Effects of more data</h2> <p>The baseline policy, trained with 1000 games, was not very successful. The bot would often move back and forth, without trying to expand or take land.</p> <p>I wanted to first explore how the amount of data affected the policy. I took 2000 more games of high ranking players and trained the same policy on a dataset with 3000 games. I expected an improvement in the similarity of the validation and train loss. This was confirmed by the results, shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/combined-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/combined-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/combined-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformers-as-gamers/combined.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Train and validation loss curves of training with less and more data. </div> <p>This makes sense, as adding more data is essentially a regularizer. It prevents the model from overfitting, as it needs to do well on the added data too. Furthermore, it looks like it converges faster in epoch space, but in reality it’s also going through more examples, so it trained at roughly the same speed if one were to scale the epochs by a factor of 3. The policy was also much more effective, and it did not run back and forth as much. I think this was likely due to reduced overfitting.</p> <p>I suspect that more data would have improved the policy even more, but I didn’t go larger, as it would have broken past the limits of the infrastructure I built. In particular, the dataset consisting of 3000 games took over 4 GB of disk space. A smarter job of batching the data would have allowed me to train with more.</p> <h2 id="squishing-army-features">Squishing army features</h2> <p>Working with the 3000 games, I turned my attention towards improving the features. They were already pretty comprehensive, but I was skeptical of the many army features we had. In particular, all of the other features were binary. Army values ranged from 0 to hundreds. I hypothesized that the features encoding armies could lead to unstable training. Using some knowledge about the game, I thought it would make sense to use a function like a sigmoid, in order to squish large values down.</p> <p>As a generals.io player, this made sense to me, as the difference between 1 army on a tile and 2 army on a tile is very large, but the difference between 14 and 15 army is not so large. I expected better performance due to the inductive bias I was adding to the model. However, the loss curve showed similar, slightly slower convergence to the previous experiment. The policies were about the same too.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/squish-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/squish-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/squish-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformers-as-gamers/squish.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Train and validation loss curves of training after squishing the army features. </div> <h2 id="deeper-network">Deeper Network</h2> <p>Motivated by the success of ResNets <d-cite key="he2015deep"></d-cite> when CNNs were leading computer vision, I wanted to try using a deeper network with residual connections. I replaced the network with a stack of one 5x5 filter, followed by nine 3x3 filters. I added skip connections between every two layers. The performance was again about the same. I suspect that more data and/or an even deeper network is needed in order to see benefits from a deeper network.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/deep-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/deep-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/deep-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformers-as-gamers/deep.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Train and validation loss curves of training after using a deeper network with residual connections. </div> <h1 id="discussion-and-conclusion">Discussion and Conclusion</h1> <p>Combining all of the above leads to a decent policy with coherent strategy, shown below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/game.gif-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/game.gif-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-transformers-as-gamers/game.gif-1400.webp"/> <img src="/staging/assets/img/2023-11-09-transformers-as-gamers/game.gif" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The best performing policy I trained, defeating an unsuspecting opponent (me). Replayed at 5x speed. </div> <p>Qualitatively, this policy is much better than Yilun’s policy. While I don’t have his to evaluate, he shows a <a href="https://bot.generals.io/replays/Be0wkw2t-">replay</a> of its early game performance. My bot does a much better job in the early game of efficiently expanding in order to maximize growth rate. Yilun’s bot has a handle on using a large army to explore, but mine is able to collect army efficiently on turns 25-50 in order to take the opponent’s land.</p> <p>This is interesting because my policy is actually still much simpler than Yilun’s, given he uses a LSTM. It’s possible that the training was not very stable, or it may have overfit, or he just chose a bad replay of his bot.</p> <h2 id="limitations-and-future-work">Limitations and Future Work</h2> <p>The bot is not competitive with any human that has played a decent amount of games. It is still pretty inefficient and makes many nonsensical moves (it moves back and forth a few times in the replay).</p> <p>There is still a lot to try, and I’ll actually continue working on some of these ideas after the class, as it was a lot of fun. There’s a decent amount of low hanging fruit:</p> <ol> <li>I noticed the bots often like to expand toward the wall. I'm guessing this is because there is no information encoding the boundaries of the wall, and I just let the padding in the convolutions take care of it. Adding a special indicator would likely be helpful.</li> <li>Use reinforcement learning for improving the policy beyond the demonstrations.</li> <li>Train on a dataset consisting of only one or only a few players in order to reduce multimodality problems (similar style of play).</li> <li>Adding memory to the network.</li> <li>Trying a vision transformer<d-cite key="dosovitskiy2021image"></d-cite>, and trying to have it attend to previous states for recurrence too.</li> </ol> <p>I think achieving even higher levels of performance would require doing some form of search. From my understanding, the most similar approach would be something like MuZero<d-cite key="muzero"></d-cite>. This would allow us to properly learn a world model despite the incomplete information, which we can then do search on.</p> <p>Overall, I learned a ton in this project about how to apply deep learning to a new problem. I encountered many of the issues described in “Hacker’s Guide to DL” and the related readings. My biggest takeaway is to spend the time setting up the proper infrastructure. Poor infrastructure causes bugs and makes it really hard to iterate.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-transformers-as-gamers.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/uncertainty/index.html b/blog/2023/uncertainty/index.html
new file mode 100644
index 00000000..0f0f28da
--- /dev/null
+++ b/blog/2023/uncertainty/index.html
@@ -0,0 +1,77 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Analytic, Empirical, and Monte Carlo Bayesian Methods for Uncertainty Estimation | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="In the realm of machine learning, the robustness and reliability of predictive models are important, especially when confronted with Out-of-Distribution (OOD) data that deviate from the training distribution. Bayesian models stand out for their probabilistic foundations, being able to offer ways to quantify uncertainty. This project will present a survey of already-established methods of estimating uncertainty, as well as how we adapted/generalized them."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/uncertainty/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Analytic, Empirical, and Monte Carlo Bayesian Methods for Uncertainty Estimation",
+      "description": "In the realm of machine learning, the robustness and reliability of predictive models are important, especially when confronted with Out-of-Distribution (OOD) data that deviate from the training distribution. Bayesian models stand out for their probabilistic foundations, being able to offer ways to quantify uncertainty. This project will present a survey of already-established methods of estimating uncertainty, as well as how we adapted/generalized them.",
+      "published": "November 9, 2023",
+      "authors": [
+        {
+          "author": "Tahmid Jamal",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Nguyen Tran-Bach",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Analytic, Empirical, and Monte Carlo Bayesian Methods for Uncertainty Estimation</h1> <p>In the realm of machine learning, the robustness and reliability of predictive models are important, especially when confronted with Out-of-Distribution (OOD) data that deviate from the training distribution. Bayesian models stand out for their probabilistic foundations, being able to offer ways to quantify uncertainty. This project will present a survey of already-established methods of estimating uncertainty, as well as how we adapted/generalized them.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#motivation">Motivation</a></div> <div><a href="#stochastic-weight-averaging-gaussian-swag">Stochastic Weight Averaging Gaussian (SWAG)</a></div> <div><a href="#local-ensemble-monte-carlo-dropout">Local Ensemble Monte Carlo Dropout</a></div> <div><a href="#sketching-curvature-for-efficient-out-of-distribution-detection-scod">Sketching Curvature for Efficient Out-of-Distribution Detection (SCOD)</a></div> <div><a href="#scodv2">SCODv2</a></div> <div><a href="#stochastic-curvature-and-weight-averaging-gaussian-scwag">Stochastic Curvature and Weight Averaging Gaussian (SCWAG)</a></div> </nav> </d-contents> <h2 id="motivation">Motivation</h2> <p>Many practical uses of deep neural network (DNN) models involve using them with a restricted amount of training data, which doesn’t encompass all the potential inputs the model might face when actually used. This exposes a significant limitation of models based on data: they can behave unpredictably when dealing with inputs that differ from the data they were trained on, known as out-of-distribution (OOD) inputs. Machine learning models that are trained within a closed-world framework often mistakenly identify test samples from unfamiliar classes as belonging to one of the recognized categories with high confidence<d-cite key="scheirer2013"></d-cite>. This tendency of the model to make overconfident misclassifications is sometimes described in the literature as “arrogance.” This can be catastrophical when such models are deployed in the real world<d-cite key="amodei2016"></d-cite>. For instance, in self-driving technology, the system should warn the user and transfer the control to the human driver when it encounters unfamiliar scenes or objects that were not present in its training data, and thus it is unable to make a safe and reliable decision.</p> <p>Consequently, there has been a surge of research focused on improving DNN models to be able to assess their own uncertainty and recognize OOD inputs during their operational phase <d-cite key="yang2022"></d-cite>. Not only improving their usage in safety critical settings, being able to predict model uncertainty is important in predicting model performance. With increased ability to predict model performance, we can improve the training process and improve inference. In this project, we explore 3 different methods of quantifying uncertainty: Monte Carlo Dropout<d-cite key="gal2016"></d-cite>, Sketching Curvature for Efficient Out-of-Distribution Detection (SCOD)<d-cite key="sharma2021"></d-cite>, and Stochastic Weight Averaging Gaussian (SWAG)<d-cite key="maddox2019"></d-cite>. We’ll also attempt to modify these existing methods and even combine them in order to provide improvements to practicality such as RAM usage and improvements to important metrics we establish. We develop 2 methods, SCODv2 which is an extends a simple isotropic prior used by SCOD and SCWAG which combines elements of both SCOD and SWAG.</p> <h2 id="stochastic-weight-averaging-gaussian-swag">Stochastic Weight Averaging Gaussian (SWAG)</h2> <p>SWAG<d-cite key="maddox2019"></d-cite> is a method that can improve generalization in deep learning settings. It approximates the posterior distribution of weights as a normal distribution with mean determined by an average of weight iterates and covariance determined by the sum of the running variances of each weight and a low rank covariance matrix. More specifically, we use the following to perform our weight updates.</p> <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">def</span> <span class="nf">train_swag</span><span class="p">(</span><span class="n">net</span><span class="p">,</span> <span class="n">loader</span><span class="p">,</span> <span class="n">num_epochs</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">K</span><span class="o">=</span><span class="mi">25</span><span class="p">,</span> <span class="n">swag_freq</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">swag_start</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
+    <span class="n">theta</span> <span class="o">=</span> <span class="nf">get_all_weights</span><span class="p">(</span><span class="n">net</span><span class="p">)</span>
+    <span class="n">d</span> <span class="o">=</span> <span class="n">theta</span><span class="p">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+    <span class="n">D</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">zeros</span><span class="p">((</span><span class="n">d</span><span class="p">,</span><span class="n">K</span><span class="p">)).</span><span class="nf">cpu</span><span class="p">()</span>
+    <span class="n">theta_bar</span> <span class="o">=</span> <span class="n">theta</span><span class="p">.</span><span class="nf">clone</span><span class="p">().</span><span class="nf">cpu</span><span class="p">()</span>
+    <span class="n">M2</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">zeros</span><span class="p">(</span><span class="n">d</span><span class="p">).</span><span class="nf">cpu</span><span class="p">()</span>
+    <span class="n">sigmas</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="nf">zeros</span><span class="p">(</span><span class="n">d</span><span class="p">).</span><span class="nf">cpu</span><span class="p">()</span>
+    <span class="n">optimizer</span> <span class="o">=</span> <span class="n">optim</span><span class="p">.</span><span class="nc">Adam</span><span class="p">(</span><span class="n">net</span><span class="p">.</span><span class="nf">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="mf">0.001</span><span class="p">)</span>
+
+    <span class="n">net</span><span class="p">.</span><span class="nf">train</span><span class="p">()</span>
+    <span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="nf">range</span><span class="p">(</span><span class="n">num_epochs</span><span class="p">):</span>
+        <span class="n">train_loss</span> <span class="o">=</span> <span class="mi">0</span>
+        <span class="n">n_</span> <span class="o">=</span> <span class="mi">0</span>
+        <span class="k">for</span> <span class="n">batch_idx</span><span class="p">,</span> <span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">target</span><span class="p">)</span> <span class="ow">in</span> <span class="nf">enumerate</span><span class="p">(</span><span class="n">loader</span><span class="p">):</span>
+            <span class="n">optimizer</span><span class="p">.</span><span class="nf">zero_grad</span><span class="p">()</span>
+            <span class="n">output</span> <span class="o">=</span> <span class="nf">net</span><span class="p">(</span><span class="n">data</span><span class="p">.</span><span class="nf">to</span><span class="p">(</span><span class="n">device</span><span class="p">))</span>
+            <span class="n">loss</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="nf">cross_entropy</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">target</span><span class="p">.</span><span class="nf">to</span><span class="p">(</span><span class="n">device</span><span class="p">))</span>
+            <span class="n">loss</span><span class="p">.</span><span class="nf">backward</span><span class="p">()</span>
+            <span class="n">optimizer</span><span class="p">.</span><span class="nf">step</span><span class="p">()</span>
+            <span class="n">train_loss</span> <span class="o">+=</span> <span class="n">loss</span>
+            <span class="k">if</span> <span class="n">batch_idx</span> <span class="o">%</span> <span class="n">swag_freq</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+                <span class="nf">if </span><span class="p">(</span><span class="n">swag_start</span> <span class="o">&lt;=</span> <span class="n">epoch</span><span class="p">):</span>
+                    <span class="k">with</span> <span class="n">torch</span><span class="p">.</span><span class="nf">no_grad</span><span class="p">():</span>
+                        <span class="n">w1</span> <span class="o">=</span> <span class="n">n_</span><span class="o">/</span><span class="p">(</span><span class="mi">1</span><span class="o">+</span><span class="n">n_</span><span class="p">)</span>
+                        <span class="n">w2</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="p">(</span><span class="mi">1</span><span class="o">+</span><span class="n">n_</span><span class="p">)</span>
+                        <span class="n">theta</span> <span class="o">=</span> <span class="nf">get_all_weights</span><span class="p">(</span><span class="n">net</span><span class="p">).</span><span class="nf">cpu</span><span class="p">()</span>
+                        <span class="n">theta_bar_new</span> <span class="o">=</span> <span class="n">w1</span><span class="o">*</span><span class="n">theta_bar</span> <span class="o">+</span> <span class="n">w2</span><span class="o">*</span><span class="n">theta</span>
+                        <span class="n">M2</span> <span class="o">=</span> <span class="n">M2</span> <span class="o">+</span> <span class="p">(</span><span class="n">theta</span><span class="o">-</span><span class="n">theta_bar</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="n">theta</span><span class="o">-</span><span class="n">theta_bar_new</span><span class="p">)</span>
+                        <span class="n">theta_bar</span> <span class="o">=</span> <span class="n">theta_bar_new</span><span class="p">.</span><span class="nf">clone</span><span class="p">().</span><span class="nf">cpu</span><span class="p">()</span>
+                        <span class="n">D</span><span class="p">[:,</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">D</span><span class="p">[:,</span><span class="mi">1</span><span class="p">:]</span>
+                        <span class="n">D</span><span class="p">[:,</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">theta</span> <span class="o">-</span> <span class="n">theta_bar</span>
+                        <span class="n">sigmas</span> <span class="o">=</span> <span class="n">M2</span><span class="o">/</span><span class="p">(</span><span class="mi">1</span><span class="o">+</span><span class="n">n_</span><span class="p">)</span>
+                        <span class="n">n_</span> <span class="o">+=</span> <span class="mi">1</span>
+    <span class="k">return</span> <span class="n">theta_bar</span><span class="p">,</span> <span class="n">sigmas</span><span class="p">,</span> <span class="n">D</span>
+
+</code></pre></div></div> <p>The learned \(\bar{w} \in \mathbf{R}^d\) is the mean of the posterior distribution on weights. The \(\Sigma\) vector represents the running variance of the weights and can be diagonalized to get a very rough posterior. (The method we used to determine the running variance is unlike the one presented in the SWAG paper due to issues with numerical instability and catastrophic cancellation which resulted in negative variances. To address this issue we used Welford’s online algorithm.) The \(D\) matrix contains the last \(K\) deviations of updated \(w\) values from \(\bar{w}\) (including the effect that the updated \(w\) has on \(\bar{w}\)). This allows us to form a rank \(K\) approximation of the posterior covariance. Thus we have the posterior \(P(w\mid\mathcal{D}) = \mathcal{N}\left(\bar{w}, \frac{1}{2}\left(\text{diag}(\Sigma) + \frac{DD^T}{K-1} \right)\right)\). To sample from the posterior, we do the following reparametrization</p> \[z_d \sim \mathcal{N}(0, \mathbf{I}_d)\] \[z_K \sim \mathcal{N}(0, \mathbf{I}_K)\] \[\tilde{w} = \bar{w} + \frac{1}{\sqrt{2}}\text{diag}(\Sigma)^{\frac{1}{2}}z_d + \frac{1}{\sqrt{2(K-1)}}Dz_K\] <p>It is important to note that while a prior distribution on weights is not specified, it is implicitly chosen through how often we update our running average of the weights, variances, and deviations, as well as the optimizer being used.</p> <p>For the purposes of inference, each \(\tilde{w}\) determines the parameters for a clone model and with \(S\) samples we effectively have an ensemble of \(S\) models. Their output distributions are averaged arithmetically to yield the final output. We expect that for in-distribution inputs, the individual outputs do not disagree drastically. And for out-of-distribution inputs, the individual outputs can differ a lot. So like with out other ensemble method, a good metric of uncertainty here is to use the average-pairwise KL divergence between the distributions. Here are some results and findings of this metric applied to SWAG.</p> <p>We train a model with SWAG on the MINST and CIFAR10 datasets. First, we only train on the digits/classes from 0-5 and look at the KL scores on the digits/class 6-9. Expectedly, the scores tend to drastically increase on the unseen digits. However, the increase is less drastic for the CIFAR dataset as the data is a bit more homogenous.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_mnist_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/violin_cifar_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>We can also take a look at the data itself and identify the images which have the highest and lowest scores for different splits of the data. For these images, we identify the true label, followed by the KL score assigned to the image (higher being more uncertain), and finally the predictions made by 10 of 25 sampled models.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_id_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_id_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_id_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_id_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The above pictures correspond to the highest and lowest scores from in-distribution training data. The major contributors for the high scores for MNIST are digits that are so poorly written it’s hard to say what it is or it resembles another image too much. For CIFAR, it seems like the high score images are inducing confusion due to their color scheme or background. A lot of images with a blue or sky background such as those of birds do seem to be mistaken for planes at times. The low score images on the other hands are all extremely similar to one another; they’re very well written digits (usually 0) or something that is obviously a car (usually red).</p> <p>Next, we take a look at how these scores fair on new out-of-distribution images.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_ood_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_ood_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_ood_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_ood_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>These are the highest and lowest scores on the OOD dataset. It’s unsurprising that the highest scores are assigned to the images that are unlike anything in the training set. For MNIST this is the number 8 and for CIFAR there doesn’t seem to be any one class. However, it is important to see that there are still images where our model has very low scores (high certainty). However, this simply comes from the fact that these inputs happen to look more similar to one class of training images (9 is really similar looking to 4 and trucks look pretty close to cars, especially if they’re red since a lot of the low score car-images are red).</p> <p>All the methods used in this paper tend to show similar results for the images corresponding to the highest and lower measures of uncertainty so we won’t be lookig at those images for every single method.</p> <p>Now that we’ve seen that we can use our measure of uncertainty as how well the output will yield the correct answer, we can try using uncertainty of output as a way to predict error. Ideally, we would like to see some sort of correlation between our uncertainty measure and our actual errors or probability of corect answer. So we retrained our models on all digits using SWAG and looked at the performance on a validation set. Notice that we don’t care too much about the error itself, but it’s (actually the probability of target label) correlation with the uncertainty measure. In particular, we look at the Spearman correlation to capture nonlinear relationships.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> Spearman Correlation: -.9923 </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_swag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> Spearman Correlation: -.3867 </div> </div> <p>There is significant negative correlation which is what we’re looking for. If we can predict how well our model will perform on certain inputs, it allows us to better deploy model in real world situations as well as possibly improve it by doing something such as boosting or improved training. We now look to improve this relationship between error and uncertainty measure by finding better uncertainty measures.</p> <h2 id="local-ensemble-monte-carlo-dropout">Local Ensemble: Monte Carlo Dropout</h2> <p>We start off by comparing with a very simple method. Given a neural net with Dropout layers, and a new datapoint from test ID or OOD datasets, we output \(50\) different probabilistic distributions (rather than setting our model on evaluation mode, we keep the Dropout layers on), \(p_1, p_2, \ldots p_{50}\). Our uncertainty score is \(\text{Unc}(x) = \frac{1}{49\cdot 50}\sum_{i\neq j}D_\text{KL}(p_i\, \Vert \, p_j)\), i.e. the average KL divergence between any pair of distributions. The intuition is that, when the model shouldn’t be confident about a OOD datapoint, dropping weights (which can be seen as perburtabions) should change our output distributions significantly. This sensitiveness indicates lack of robustness and certainty.</p> <p>This model is very simple and our weight “peturbations” are not too mathematically motivated in the sense of them coming from some justified posterior. However, it still provides a good baseline to compare against.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_correlation_mc.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> Spearman Correlation: -.9944 </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_correlation_mc.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> Spearman Correlation: -.2936 </div> </div> <p>Overall, the error estimation on MNIST is about the same but significantly worse on the CIFAR dataset. This is about expected since MC dropout is such a simple method.</p> <h2 id="sketching-curvature-for-efficient-out-of-distribution-detection-scod">Sketching Curvature for Efficient Out-of-Distribution Detection (SCOD)</h2> <p>There is research literature on leveraging the local curvature of DNNs to reason about epistemic uncertainty. [Sharma et al.] explores this idea through a Bayesian framework. Let us assume a prior on the weights, \(P(w) = \mathcal{N}(0, \epsilon^2 I)\). By using a second-order approximation of the log-likelihood \(\log p(y,w\mid x)\), we arrive at the Laplace posterior \(P(w\mid\mathcal{D}) =\mathcal{N}(w^{MAP}, \Sigma^*)\), where \(\Sigma^* = \frac{1}{2}(H_L + \frac{1}{2\epsilon^2}I)^{-1}\) and \(H_L\) is the Hessian of the cross-entropy loss wrt \(w\). Given a pretrained DNN, \(\theta=f(x,w)\in\mathcal{R}^d\) where \(\theta\) determines a distribution on \(y\), we assume that the trained weights \(w^*\) are a good approximation for \(w^{MAP}\). We define our uncertainty metric to be the change in the output distribution, \(\theta\), when the weights are perturbed around \(w^*\) according to the posterior distribution. Using the KL divergence to measure distance between output distributions, we define</p> \[\text{Unc}(x) = \mathbb{E}_{dw\sim \mathcal{N}(0, \Sigma^*)}\left[ D_{\text{KL}}\left( p(\theta\mid x, w^*)\, \Vert \, p(\theta\mid x, w^* + dw)\right) \right]\] <p>We can approximate the local KL divergence using the Fisher information matrix (FIM) of \(y\) wrt \(\theta\): \(D_{\text{KL}} \approx d\theta^TF_\theta(\theta)d\theta + O(d\theta^3)\). Also, by change of variables, we can rewrite the FIM in terms of \(w\): \(F_w(x, w) = J^T_{f,w}F_\theta(f(x,w))J_{f, w}\) where \(J_{f,w}\) is the Jacobian of the network outputs with respect to the weights. Putting this together, we get that</p> \[\text{Unc}(x) = \mathbb{E}_{dw\sim \mathcal{N}(0, \Sigma^*)} \left[dw^TF_w(x,w^*)dw \right] = \text{Tr}\left( F_w(x,w^*)\Sigma^*\right)\] <p>We can also approximate \(\Sigma^* \approx \frac{1}{2}(MF_{w^*}^\mathcal{D} + \frac{1}{2\epsilon^2}I)^{-1}\), where \(F_{w^*}^\mathcal{D}\) is the averaged FIM on the training dataset<d-cite key="ritter2018"></d-cite> and \(M\) is the size of the training set. If the neural net has \(N\) weights, then the FIMs are \(N \times N\) matrices and computing them becomes intractable (both time complexity and memory).</p> <p>For simplicity, let us assume that the output of our DNN, \(\theta\), is the categorial distribution, i.e. \(\theta_i\) represents the probability assigned to class \(i\). In this case, we have that \(F_\theta(\theta) = \text{diag}(\theta)^{-1}\). Therefore, the FIM for one input os has rank at most \(\min(n, d)\) and we can represent it as \(F_w(x,w^*) = LL^T\), where \(L=J_{f,w}^T\text{diag}(\theta)^{-1/2}\). The same trick, however, doesn’t work for \(F_{w^*}^\mathcal{D}\) as it can reach rank as high as \(min(N, Md)\). For now, let us assume that we can find a low-rank approximation of \(F_{w^*}^\mathcal{D} = U\text{diag}(\lambda)U^T\), where \(U\in\mathbb{R}^{N\times k}\) and \(\lambda\in\mathbb{R}^k\). With a few mathematical tricks (which can be followed in [Sharma et al.]), one can prove that</p> \[\text{Unc}(x) = \epsilon^2\Vert L\Vert_F^2 - \epsilon^2 \left \Vert \text{diag}\left(\sqrt{\frac{\lambda}{\lambda + 1/(2M\epsilon^2)}}\right)U^TL \right \Vert^2_F\] <p>[Sharma et al.] also provides an randomized algorithm for finding \(U\) and \(\Lambda\) by using the FixedRankSymmetricApproximation<d-cite key="tropp2017"></d-cite> algorithm presented in [Tropp et all.]. We chose \(\epsilon \approx 0.04536\) by training another model with the same hyperparameters and architecture and taking the standard deviation of all its weights. This is the pseudocode of the SCOD algorithm<d-cite key="sharma2021"></d-cite>:</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_algo-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_algo.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>\(\Sigma\in\mathbb{R}^{r\times N}\) and \(\Psi \in \mathbb{R}^{s\times N}\) are random sketching matrices, which we chose to simply be matrices with i.i.d standard Gaussian entries. \(r+s\) is the size of the sketch and is ideally chosen as high as RAM allows. We also use the budget split \(s = 2k+1\) and \(r=4k+3\), where \(k\) is the target rank, as [Tropp et all.] suggests. We ended up setting \(k=50\) and got the following results:</p> <p>We have been able to implement SCOD, but due to issues with saving our results and time, we can now only show the performance of the uncertainty score on predicting error on a subset (classes 0-5) of the CIFAR dataset.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/scod_correlations.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> Spearman Correlation: -.1083 </div> </div> <p>The score is a bit suspiciously low, so there may be something wrong with our implementation ignoring the fact that we only test of the subset. Nonetheless, it still a significant negative correlation and we get similar results when looking at high uncertainty and low uncertainty images.</p> <h2 id="scodv2">SCODv2</h2> <p>We also did our own tweak on SCOD. Rather than having a vanilla prior, we can generalize it to any normal distribution with diagonal covariance. Let’s say that our prior is \(w\sim\mathcal{N}(0, \Sigma)\), where \(\Sigma\) is a diagonal matrix. Then, our Laplacian posterior’s covariance matrix becomes \(\Sigma^* = \frac{1}{2}(MF_{w^*}^\mathcal{D} + \frac{1}{2}\Sigma^{-1})^{-1}\). By the Woodbury matrix identity \(\Sigma^*=\Sigma - 2\Sigma U\left(\text{diag}(M\lambda)^{-1}+2U^T\Sigma U \right)^{-1}U^T\Sigma\). Using the well-known identities, \(\Vert A\Vert_F^2 = \text{Tr}(AA^T)\), \(\text{Tr}(AB) = \text{Tr}(BA)\), we get that</p> \[\text{Unc}(x_{\text{new}}) = \text{Tr}\left(\Sigma^*F_w(x_{\text{new}},w^*)\right) = \text{Tr}\left(L^T\Sigma L\right) - 2\text{Tr}\left(L^T\Sigma U\left(\text{diag}(M\lambda)^{-1}+2U^T\Sigma U \right)^{-1}U^T\Sigma L\right)\] <p>\(= \left \Vert L^T \Sigma^{1/2}\right \Vert_F^2 - 2\left \Vert L^T \Sigma UA\right \Vert_F^2\), where \(AA^T = \left(\text{diag}(M\lambda)^{-1}+2U^T\Sigma U \right)^{-1}\).</p> <p>Since \(\Sigma\) is a diagonal matrix, the biggest matrices we ever compute are of size \(N\times \max(k, d)\), which means that the computation is equally efficient asymptotically to the vanilla prior. To decide what diagonal matrix to use, for each layer, we assigned the same variance given by the variance of the weights of the same layer in a differently trained model (with same architecture).</p> <p>Due to issues with saving our results and timing, we are not able to show our results estimating error from uncertainty for SCODv2.</p> <h2 id="stochastic-curvature-and-weight-averaging-gaussian-scwag">Stochastic Curvature and Weight Averaging Gaussian (SCWAG)</h2> <p>Whereas SCOD attempts to analytically approximate the posterior by approximating the Hessian using the Gauss-Newton matrix, SWAG approximates the posterior by keeping running track of moments and deviations when it approaches flat regions in the loss landscape. What if we could combine these two ideas? We could use the SWAG emprical posterior. This method would not require matrix sketching of any form and lowers the required RAM necessary an SCOD can be quite RAM intensive. Using the \(\Sigma\) and \(D\) from SWAG to determine the posterior \(\Sigma^*\), we arrive the following measure of uncertainty (after digging through some math).</p> \[\text{Unc}(x) = \text{Tr}\left( F_w(x,w^*)\Sigma^*\right) = \frac{1}{2} \text{Tr}\left(F_w(x,\bar{w})\left(\text{diag}(\Sigma) + \frac{DD^T}{K-1} \right) \right)\] \[\text{Unc}(x) \propto ||L\Sigma||_F^2 + \frac{1}{K-1}||LD||_F^2\] <p>We do this by introducing a wrapper model that takes in a base model as well as the SWAG outputs in order to perform the Jacobian based operations during each forward pass. For evaluation, we look at the Spearman correlation of the uncertainty score with the target probability and we notice some improvement over SWAG on the CIFAR dataset.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_scwag_correlations.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> Spearman Correlation: -.9897 </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_scwag_correlations.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> Spearman Correlation: -.8484 </div> </div> <p>With MNIST, we already had near perfect correlation so this slight decrease isn’t too worrisome. However, the Spearman correlation has shot up drastically which shows that this method of combining the analytical approximation of uncertainty with an empirically constructed posterior has merit. There is something worrisome with the fact that the model with exactly \(bar{w}\) with its weights is producing distributions that have a maximum value of around \(.25\). We suspect we could have made some error here but have not been able to pinpoint anything wrong with out implementaton. The model still seems to have fairly accurate predictions as seen below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_hard_scwag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/mnist_easy_scwag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_hard_scwag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag-1400.webp"/> <img src="/staging/assets/img/2023-12-12-uncertainty-detection-project/cifar_easy_scwag.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <h2 id="future-work">Future Work</h2> <p>For SCWAG, we could work on figuring out why our output distributions becomes less spiked as a result of using \(\bar{w}\) as the weights for the network. We suspect that it’s a result of starting our SWAG averaging for \(\bar{w}\) too early so we were considering \(w\) far away from flat local minima of the loss landscape. Additionally, we could inspect the arcing nature in the plot of target probabilities vs score. For near 0 scores, it seems that the target probabilities arc from .25 to 0 which is unusual. Finally, we want to think of a way to introduce the loss landscape more into our approach. Maybe we can form a more expressive posterior. If we can manage that, our uncertainty estimates and correlation might improve. But more importantly, we would be able to call our method SCALL(y)WAG which is pretty cool.</p> <p>In general and particularly for SCOD, we’d still like to experiment with priors that induce different types of posteriors. Because the dependence on prior is explicit here as opposed to implicit for SWAG, it allows us more room for experimentation in choosing nice expressive priors.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-uncertainty.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/universal-features/index.html b/blog/2023/universal-features/index.html
new file mode 100644
index 00000000..9dbfa4f5
--- /dev/null
+++ b/blog/2023/universal-features/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Sparse Autoencoder Universality - Under What Conditions are Learned Features Consistent? | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="This project aims to study the universality of features in LLMs by studying sparse autoencoders trained on similar layers of different models."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/universal-features/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Sparse Autoencoder Universality - Under What Conditions are Learned Features Consistent?",
+      "description": "This project aims to study the universality of features in LLMs by studying sparse autoencoders trained on similar layers of different models.",
+      "published": "December 10, 2023",
+      "authors": [
+        {
+          "author": "Misha Gerovitch",
+          "authorURL": "https://www.linkedin.com/in/michael-gerovitch-2010a61b0/",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Asher Parker-Sartori",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "MIT",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Sparse Autoencoder Universality - Under What Conditions are Learned Features Consistent?</h1> <p>This project aims to study the universality of features in LLMs by studying sparse autoencoders trained on similar layers of different models.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#setup">Setup</a></div> <div><a href="#experiments-and-results">Experiments and Results</a></div> <div><a href="#conclusion">Conclusion</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Neural networks are black boxes. We understand the process by which they are created, but just as understanding the principle of evolution yields little insight into the human brain, designing a model’s optimization process yields little insight into how that model reasons. The field of mechanistic interpretability attempts to understand how human-understandable concepts combine within a model to form its output. With sufficiently good interpretability tools, we could ensure reasoning transparency and easily find and remove harmful capabilities within models (such as hallucinations) <d-cite key="marks2023geometry"></d-cite>.</p> <p>In 2022, Anthropic identified a core challenge in interpreting a model’s reasoning layer-by-layer: polysemanticity, a phenomenon in which a single neuron activates for many different concepts (e.g. academic citations, English dialogue, HTTP requests, and Korean text). This is a result of a high-dimensional space of concepts (‘features’) being compressed into the lower-dimension space of the neural network <d-cite key="Elhage2022-wh"></d-cite>. Sparse autoencoders, a form of dictionary learning, help to linearly disentangle polysemantic neurons into interpretable features <d-cite key="bricken2023monosemanticity"></d-cite>.</p> <p>Sparse autoencoders work by projecting a single layer of a neural network into a higher-dimension space (in our experiments, we train autoencoders ranging from a 1:1 projection to a 1:32 projection) and then back down to the size of the original layer. They are trained on a combination of reconstruction loss, their ability to reconstruct the original input layer, and a sparsity penalty, encouraging as many weights as possible to be 0 while retaining good performance <d-cite key="cunningham2023sparse"></d-cite></p> <h2 id="setup">Setup</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/image%20(2)-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/image%20(2)-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/image%20(2)-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/image%20(2).png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>(https://www.alignmentforum.org/posts/wqRqb7h6ZC48iDgfK/tentatively-found-600-monosemantic-features-in-a-small-lm)</p> <p>The intuition behind sparse autoencoders is that if each neuron in the input layer learns n features, then projecting to n dimensional space while retaining all the information from the input layer should theoretically leave us with one feature represented in each encoded neuron. Then, these neurons should all be monosemantic, meaning they should each represent one interpretable concept. Because the columns of the decoder matrix tell us how these encoded neurons linearly combine to recreate the input layer, each column of the decoder matrix represents one feature of the network (in other words, what linear combination of neurons represents an individual concept). <d-cite key="cunningham2023sparse"></d-cite></p> <p>However, because sparse autoencoders were only popularized as an interpretability method earlier this year by Anthropic, the literature on them is, for lack of a better word, sparse. In particular, we were curious about whether the features learned by sparse autoencoders are universal. In other words, we’d like to know if the learned features are similar regardless of variables like autoencoder size, model size, autoencoder training set, and model training set. If they are, it shows both that sparse autoencoders consistently extract the correct features and that learned features are similar across different model sizes and training sets. If they aren’t, it would be evidence that sparse autoencoders don’t accurately capture the full scope of features a model represents and that we cannot easily transfer them across different models.</p> <p>In our experiments, we train autoencoders of projection ratios ranging from 1:1 to 1:32 on five different Pythia models: 70m, 160m, 410m, 160m deduped, and 410m deduped. In some cases, we exclude data from Pythia 410m because running experiments on it was too computationally expensive. We train on the first four layers of each model to provide additional insight into how the efficacy of autoencoders changes as one moves deeper into the model. We also train autoencoders on two different datasets from the same distribution to test whether the learned features change in response to small perturbations in training order or distribution. Together, these models let us answer a few broad questions surrounding the consistency of learned features:</p> <ul> <li>Do learned features consistently transfer between different model sizes and training datasets?</li> <li>Are learned features consistent across different autoencoder sizes?</li> <li>Do sparse autoencoders learn interpretable features less consistently in later layers where reasoning may become more abstract or hard to follow?</li> </ul> <p>These meta-level questions build on Anthropic’s feature-extraction process outlined below:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/image%20(3)-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/image%20(3)-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/image%20(3)-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/image%20(3).png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>(This image is from Cunningham et. al <d-cite key="cunningham2023sparse"></d-cite>)</p> <p>To answer these questions, we use the following three metrics in a variety of comparisons:</p> <ul> <li>Mean cosine similarity (MCS) between decoder weights – since the columns of the decoder matrix represent the features, we can use them to measure the similarity of the learned features. To compare two decoders, we start by taking the mean cosine similarity between the first column in the first decoder and every column in the second decoder. Because the decoders might learn features in different orders, we take the maximum of these similarities. We repeat this process for every column in the first decoder, and then we take the average similarity across the columns.</li> <li>Correlation between activation vectors of encoded layers – another way of inspecting the features learned by a sparse autoencoder is to examine when different neurons in the encoded layer activate on different types of token. So, to compare two autoencoders, we pass over 10,000 tokens of text through their respective models and save vectors representing each encoded neuron’s activations across those tokens. Then, as with mean cosine similarity, we took the maximum correlation between a neuron in the first encoder and any neuron in the second encoder, and then averaged these values across every neuron. If two encoders typically had the same neurons activating for the same tokens, this is strong evidence that the encoders learned similar features.</li> <li>Feature frequency of an autoencoder – because neurons in encoded layers are intended to represent specific individual concepts, we expect them to activate much less than typical neurons in a neural network. We used this metric both as a way of verifying that our autoencoders are working as intended and as a way of evaluating how easily autoencoders are able to learn monosemantic features as we vary other parameters. To create feature frequency plots, we pass over four million tokens through the model and plot the frequency with which a feature activates (usually around once every 10-1000 tokens) against the number of features which had that frequency.</li> </ul> <p>Specifically, we ran the following experiments:</p> <ul> <li>On the question of whether learned features consistently transfer between different model sizes and training datasets: we created feature frequency plots, tables of correlations, and MCS graphs to contrast different model sizes along with deduped and original models.</li> <li>On the question of whether learned features are consistent across different autoencoder sizes: we created feature frequency plots, MCS tables, and graphs of pairwise activation correlations and MCS to contrast features learned by different autoencoder sizes.</li> <li>On the question of whether sparse autoencoders learn interpretable features less consistently in later layers where reasoning may become more abstract or hard to follow: we create feature frequency plots contrasting learned feature frequencies at different layers throughout Pythia 70m and Pythia 160m.</li> </ul> <h1 id="experiments-and-results">Experiments and Results</h1> <p>We ran baselines for both MCS and correlations by taking the corresponding measurement between autoencoders trained on two different layers as well as randomly initialized weights. For MCS, the baseline was around 0.15 and was always below 0.20 in our experiments. For correlations, random measured to be about .40.</p> <h2 id="training-and-evaluating-sparse-autoencoders">Training and evaluating sparse autoencoders</h2> <p>We trained a range of sparse autoencoders on the activations at the output of the MLP of various Pythia models. We used 100 million tokens of web text, from a HuggingFace dataset to train each autoencoder. As seen from the loss curve, this is likely over training. We spent some time fine-tuning the hyperparameters and conferred with other researchers who have trained similar autoencoders. You can see from our loss curve that we are likely over training. Since we are partially optimizing for reconstruction loss, we did not expect the quality of the model to decrease on test sets significantly. We ran our model with and without the sparse autoencoder or a small dataset and saw the perplexity go up from 25 to 31, which we were content with. However, there is a lot of room left for improvement to get better sparse autoencoders.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/W&amp;B%20Chart%2012_12_2023,%2011_09_59%20PM-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/W&amp;B%20Chart%2012_12_2023,%2011_09_59%20PM-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/W&amp;B%20Chart%2012_12_2023,%2011_09_59%20PM-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/W&amp;B%20Chart%2012_12_2023,%2011_09_59%20PM.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>(total loss curve of an 1:8 autoencoder trained on Pythia-70m)</p> <h2 id="do-learned-features-consistently-transfer-between-different-model-sizes-and-training-datasets">Do learned features consistently transfer between different model sizes and training datasets?</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/frequency_plot-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/frequency_plot-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/frequency_plot-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/frequency_plot.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Activation frequencies are distributed roughly symmetrical around 0.01, meaning that the modal encoded neuron activated around once every one hundred tokens. This is solid evidence that our sparse autoencoders were effectively learning sparse, monosemantic representations. If a neuron was only needed every one hundred tokens to reconstruct the input, it likely represents a very specific concept rather than many concepts all at once. We see no clear trend when varying model size, demonstrating that this does not have much effect on an autoencoder’s ability to extract monosemantic features.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The table above measures the correlations between 1:8 autoencoders trained on layer 0 of three different model sizes. You can see that autoencoders trained on models closer in size have a higher correlation factor of their features, suggesting that smaller autoencoders may not store some of the features that large autoencoders do.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/Deduped%20vs%20non-deduped-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/Deduped%20vs%20non-deduped-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/Deduped%20vs%20non-deduped-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/Deduped%20vs%20non-deduped.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The above graph shows the MCS between autoencoders trained on deduped vs regular models. We anticipated the MCS of these models to be fairly high, but these were some of the lowest results we have seen, with autoencoders trained on layer 0 (of any of the three models we looked at) being around .4. Notably, all of our MCS were above .15 which was our baseline.</p> <h2 id="are-learned-features-consistent-across-different-autoencoder-sizes-and-training-datasets">Are learned features consistent across different autoencoder sizes and training datasets?</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/frequency_plots_by_layer-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/frequency_plots_by_layer.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Sparsity tends to increase when the projection ratio increases, which makes sense, as a larger layer needs to use each neuron less often. This is evidence that our autoencoders are not learning all possible features, and using even larger autoencoders would allow us to unpack more features.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/Four%20AE%20sizes%20on%20Pythia-70m%20MCS-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/Four%20AE%20sizes%20on%20Pythia-70m%20MCS-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/Four%20AE%20sizes%20on%20Pythia-70m%20MCS-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/Four%20AE%20sizes%20on%20Pythia-70m%20MCS.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>The above table looks at MCS loss of different sized autoencoders on Pythia 70m. Interestingly, we observed that MCS between autoencoders whose dimensions have the same ratio (e.g. 4:8 vs 8:16) are similar (e.g. both are .870.)</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlation_Graph_70m-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/Correlation_Graph_70m.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m.png/7.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/Correlations%20between%201-8%20AEs%20trained%20on%203%20model%20sizes.png" class="img-fluid z-depth-2" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m/7-1400.webp"/> <img src="/staging/assets/img/2023-11-09-universal-features/Correlation_graph_160m.png/7.jpg" class="img-fluid z-depth-2" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Activation correlations and MCS were very high for all autoencoder projection ratios, demonstrating that different size autoencoders learn very similar features. Note that all metrics were lower for the autoencoders with a 1:1 projection ratio, likely because they were penalized on sparsity while not having any additional space with which to represent concepts. This means the total information they could retain was likely much less than the other sizes. We see a slight upward trend as autoencoder projection ratio increases, which is small enough that it could probably be chalked up to the exact mean-max methodology used in the calculations. In the MCS graphs, the orange line represents mean-max MCS going from the smaller projection size to the larger projection size, where the blue line is the inverse. It is positive evidence that the blue line is much lower, because we should expect the most important features to correlate strongly with some of the features learned by the larger autoencoder, while the many features learned by the larger autoencoder should not all necessarily have a match in the smaller one.</p> <h1 id="conclusion">Conclusion</h1> <h2 id="discussion">Discussion</h2> <p>In this post, we explored the potential of sparse autoencoders as tools for interpreting neural networks, particularly focusing on their capability to disentangle polysemantic neurons into interpretable, monosemantic features. Our experiments, conducted on various configurations of Pythia models and sparse autoencoders, aimed to understand the consistency and universality of the features extracted by these autoencoders across different model sizes, training datasets, and autoencoder dimensions.</p> <p>Our findings indicate that sparse autoencoders are indeed effective in learning sparse, monosemantic representations. This effectiveness is observed across different model sizes and is not significantly impacted by the size of the model, suggesting a level of universality in the features extracted. However, our results also reveal that the correlation between features tends to be higher in autoencoders trained on models closer in size, hinting at some limitations in the transferability of learned features across vastly different model scales.</p> <p>Interestingly, we observed a tendency towards increased sparsity in the representations as we moved into the later layers of the network. This suggests that higher-level concepts in these layers might be more specialized and interpretable, aligning with intuitive expectations about neural networks.</p> <h2 id="limitations">Limitations</h2> <p>Limitations of sparse autoencoders include that they are extremely computationally intensive, especially if one wants to interpret multiple layers of a network, neural networks are not entirely human-interpretable to begin with, so their learned features will never quite represent human concepts, and all the metrics we use to analyze them rely on overall trends rather than individual features, so despite our ability to provide evidence to help answer broad questions, our analysis is still very imprecise.</p> <h2 id="future-work">Future Work</h2> <p>One future direction is focussing on training better sparse autoencoders, ones with lower reconstruction and sparsity loss. Given that we did not optimize our project for this and were limited by time and compute, it is very possible that better sparse autoencoders can improve our results.</p> <p>It would also be interesting to train the same sparse autoencoder architectures on different datasets and see whether they are invariant to small perturbations in the dataset. If not, it’s evidence that the method may not work as well as we hope.</p> <p>Finally, we could start to look at the features that the autoencoders are finding. We were able to measure similarity and correlations but did not have the time to look at the actual concepts that the representations were finding. This could give us additional insight into similarities between models that we currently are overlooking.</p> <h2 id="acknowledgements">Acknowledgements</h2> <p>Special thanks to Sam Marks for suggesting the initial experiment ideas and to <a href="https://www.mitalignment.org/">MIT AI Alignment</a> for providing connections with mentorship and compute resources.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-universal-features.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/unraveling-social-reasoning-in-llms/index.html b/blog/2023/unraveling-social-reasoning-in-llms/index.html
new file mode 100644
index 00000000..f3ddfd7e
--- /dev/null
+++ b/blog/2023/unraveling-social-reasoning-in-llms/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Unraveling Social Reasoning in LLMs: A Deep Dive into the Social IQA Benchmark | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="In this study, we investigate the challenge of social commonsense reasoning in large language models (LLMs), aiming to understand and categorize common errors LLMs make in social commonsense reasoning tasks."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/unraveling-social-reasoning-in-llms/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Unraveling Social Reasoning in LLMs: A Deep Dive into the Social IQA Benchmark",
+      "description": "In this study, we investigate the challenge of social commonsense reasoning in large language models (LLMs), aiming to understand and categorize common errors LLMs make in social commonsense reasoning tasks.",
+      "published": "November 11, 2023",
+      "authors": [
+        {
+          "author": "Nina Lei",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Harvard College",
+              "url": ""
+            }
+          ]
+        },
+        {
+          "author": "Andrew Zhao",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "Harvard College",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Unraveling Social Reasoning in LLMs: A Deep Dive into the Social IQA Benchmark</h1> <p>In this study, we investigate the challenge of social commonsense reasoning in large language models (LLMs), aiming to understand and categorize common errors LLMs make in social commonsense reasoning tasks.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background-and-related-works">Background and Related Works</a></div> <ul> <li><a href="#llms-and-reasoning">LLMs and Reasoning</a></li> <li><a href="#comprehensive-overview-of-benchmarks">Comprehensive Overview of Benchmarks</a></li> <li><a href="#problems-with-social-iqa">Problems With Social IQA</a></li> <li><a href="#prior-error-analysis">Prior Error Analysis</a></li> <li><a href="#conducting-systematic-error-analysis">Conducting Systematic Error Analysis</a></li> <li><a href="#improving-social-reasoning">Improving Social Reasoning</a></li> </ul><div><a href="#methodology">Methodology</a></div> <ul> <li><a href="#step-1-applying-flan-t5-to-social-iqa">Step 1: Applying Flan-T5 to Social IQA</a></li> <li><a href="#step-2-qualitative-coding-of-350-errors">Step 2: Qualitative Coding of 350 Errors</a></li> <li><a href="#step-3-quantitative-error-analysis">Step 3: Quantitative Error Analysis</a></li> <li><a href="#step-4-addressing-type-1-errors">Step 4: Addressing Type 1 Errors</a></li> <li><a href="#step-5-addressing-type-2-errors">Step 5: Addressing Type 2 Errors</a></li> </ul><div><a href="#analysis-and-evaluations">Analysis and Evaluations</a></div> <ul> <li><a href="#general-accuracy-of-flan-t5">General Accuracy of Flan-T5</a></li> <li><a href="#qualitative-coding">Qualitative Coding</a></li> <li><a href="#type-1-errors">Type 1 Errors</a></li> <li><a href="#type-2-errors">Type 2 Errors</a></li> <li><a href="#distribution-of-tags">Distribution of Tags</a></li> <li><a href="#analysis-of-question-types">Analysis of Question Types</a></li> <li><a href="#attempt-to-prune-social-iqa">Attempt to Prune Social IQA</a></li> </ul><div><a href="#discussion">Discussion</a></div> <div><a href="#main-limitations">Main Limitations</a></div> <div><a href="#references">References</a></div> </nav> </d-contents> <h1 id="unraveling-social-reasoning-in-llms-a-decision-tree-framework-for-error-categorization">Unraveling Social Reasoning in LLMs: A Decision Tree Framework for Error Categorization</h1> <h2 id="introduction">Introduction</h2> <p>Social commonsense reasoning is a skill most people acquire within the first few years of life, often without formal education. Consider this example of a social commonsense reasoning question:</p> <p>Q: Kai was frantically running to a gate at the airport. Why was Kai running?</p> <p>A) They were trying to catch a flight that departs soon</p> <p>B) They were training for a marathon</p> <p>C) They were testing out their new running shoe</p> <p>Most would likely infer that Kai was rushing to catch a flight that would depart soon and choose A, the correct answer. Social commonsense reasoning, at its core, entails reasoning about the past, current, and future states of others.</p> <p>Despite advancements in Large Language Models (LLMs), prompting models to achieve near-human levels of performance in different tasks across various domains, they have traditionally struggled with social commonsense reasoning tasks, often underperforming humans. Though, this isn’t surprising to most observers <d-cite key="sap_neural_2023"></d-cite>.</p> <p>To better understand why, previous studies have created benchmarks for social commonsense reasoning <d-cite key="huang_towards_2023"></d-cite>; benchmarked how different models perform on various social commonsense reasoning benchmarks; and/or provided classifications for different question types testing social commonsense reasoning <d-cite key="huang_towards_2023"></d-cite>. Certain question categories, like those directly related to social norms, are documented to exhibit higher error rates, suggesting that some aspects of social reasoning are more challenging for models to learn <d-cite key="li_systematic_2022"></d-cite>. However, these works used older models that were not specifically designed for reasoning tasks.</p> <p>Specifically, our blog investigates the question, <strong>What are underlying themes in social errors that large language models make?</strong> From both a qualitative and quantitative perspective. The goal of our findings is to help discover if there are methods that could potentially address these errors.</p> <p>To answer this question, we ran Flan-T5 on the Social IQA benchmark, which was introduced in 2019 and features 38,000 multiple-choice questions (MCQs) designed to gauge “emotional and social intelligence in everyday situations” <d-cite key="sap_socialiqa_2019"></d-cite>. After qualitatively labeling 350 model errors, we made a striking discovery: over half of the questions Flan-T5 got wrong were due to problematic questions in the Social IQA dataset.</p> <p>Upon making this curious realization, we pivoted our project from designing a decision tree abstraction for providing detailed categorization of social commonsense questions to analyzing and addressing the two types of errors:</p> <p>Type 1: Errors stemming from the flawed construction of the Social IQA dataset</p> <p>Type 2: Errors where Flan-T5’s choices don’t align with social commonsense.</p> <p>In the first error group, even reasonable humans, including this blog post’s authors, disagreed with Social IQA’s “correct” answers. Questions in this first group have nonsensical contexts/questions, lack a single reasonable answer, or have many reasonable answers.</p> <p>When examining questions in the second error group, we noticed that Flan-T5 often over-infers underlying reasons when a more straightforward answer exists. To address this group of errors, we visualized T5’s attention mechanisms when processing such questions.</p> <h2 id="background-and-related-works"><strong>Background and Related Works</strong></h2> <p><strong>LLMs and Reasoning</strong></p> <p>Language models like GPT-4 have captured widespread media attention, given their question-answering capabilities.</p> <p>Throughout the development and testing of LLMs, various tasks have been developed to empirically assess these models’ abilities and limitations. In literature, these tasks are typically categorized into two main groups: natural language understanding (NLU) and natural language generation (NLG). NLU tasks evaluate a language model’s ability to understand natural language. This includes tasks like Natural Language Inference, Reading Comprehension, and various reasoning tasks, including social commonsense reasoning <d-cite key="naveed_comprehensive_2023"></d-cite>.</p> <h3 id="comprehensive-overview-of-social-commonsense-reasoning-benchmarks"><strong>Comprehensive Overview of Social Commonsense Reasoning Benchmarks</strong></h3> <p>Over 100 large-scale benchmarks have been proposed to assess and compare models’ social commonsense reasoning abilities and to serve as resources for transfer learning <d-cite key="davis_benchmarks_2023"></d-cite>. In general, these benchmarks aim to evaluate models’ abilities to infer the mental states, beliefs, and intentions of others. Their development was inspired by the Theory of Mind (ToM), a concept originating from childhood development psychology—some benchmark questions are, in fact, explicitly based on ToM tasks used to assess children’s social reasoning <d-cite key="nematzadeh_evaluating_2018"></d-cite>.</p> <p>Notable benchmarks frequently mentioned in the literature include multiple-choice Question Answering (QA) benchmarks like the 2019 Social IQA <d-cite key="sap_socialiqa_2019"></d-cite> and 2019 HellaSwag <d-cite key="zellers_hellaswag_2019"></d-cite>, along with generative evaluation benchmarks like 2020 ProtoQA <d-cite key="boratko_protoqa_2020"></d-cite>. Many of these benchmarks are based on structured knowledge banks about everyday commonsense reasoning like ATOMIC <d-cite key="sap_atomic_2019"></d-cite> and COMET <d-cite key="bosselut_comet_2019"></d-cite>.</p> <p>Similar to that of other studies evaluating LLMs’ commonsense knowledge, we use an MCQ benchmark and not a generative one because they are more simple and reliable for evaluation <d-cite key="li_systematic_2022"></d-cite>; under tight time and resource constraints, we err on the side of a simple and reliable evaluation method, though, in future works, we would like to generalize our findings to more datasets.</p> <p>However, despite their widespread use, benchmarking datasets like Social IQA are not without flaws. Previous studies have shown that many aspects of common sense are still untested by these benchmarks, indicating an ongoing need for reliable methods to evaluate social commonsense reasoning <d-cite key="davis_commonsense_2015"></d-cite></p> <h3 id="problems-with-social-iqa"><strong>Problems With Social IQA</strong></h3> <p>Social IQA focuses on evaluating models’ abilities to reason about others’ mental states, aligning with Theory of Mind concepts <d-cite key="gandhi_understanding_2023"></d-cite>. Each question in Social IQA comprises a context, a question, and three answer choices; for each of the 37,588 multiple-choice questions, the context, question, correct answer choice, and two incorrect answer choices were gathered through three phases of crowdsourcing on Amazon Mechanical Turk (MTurk) <d-cite key="sap_socialiqa_2019"></d-cite>.</p> <p>In the first two phase, MTurk crowdsource workers sourced context sentences and questions using the ATOMIC knowledge base <d-cite key="sap_atomic_2019"></d-cite>. In the third phase, MTurk workers generated correct answers for the given context-question pairs. Incorrect choices for each question were derived from correct answers to related questions <d-cite key="sap_socialiqa_2019"></d-cite>. In our Discussion section, we will discuss issues with this methodology for sourcing negative answers.</p> <p>Many critiques have been raised about the reliance on crowdsourcing for benchmarks, specifically, about the challenges in obtaining high-quality material <d-cite key="davis_commonsense_2015"></d-cite>. Given the low pay on MTurk, workers often prioritize quantity over quality, leading to errors. There have even been reported instances of crowdsourcing workers using bots to help them complete more tasks.</p> <h3 id="prior-error-analysis-work-using-social-iqa-dataset"><strong>Prior Error Analysis Work Using Social IQA Dataset</strong></h3> <p>The authors of Social IQA conducted a preliminary error analysis of their dataset, finding that language models found questions about context pre-conditions, such as motivations and prior actions, to be much easier than those about stative attributes or predicting future actions. Interpreting these results, the authors hypothesized that models might be learning lexical associations rather than true meaning <d-cite key="sap_socialiqa_2019"></d-cite>. This aligns with findings presented in other works <d-cite key="zellers_hellaswag_2019"></d-cite>.</p> <p>Other research, such as Wang et al.’s <d-cite key="wang_semantic_2021"></d-cite>, categorize Social IQA questions into four types: 1) Feelings and Characteristics, 2) Interaction, 3) Daily Events, and 4) Knowledge, Norm, and Rules. They found the final category to be the most challenging for models.</p> <h3 id="general-methodology-for-conducting-systematic-error-analysis-for-qa"><strong>General Methodology for Conducting Systematic Error Analysis for QA</strong></h3> <p>Our research, aimed at identifying themes in social errors made by LLMs, draws inspiration from conventional methodologies for system error analysis in QA tasks. Moldovan et al.’s data-driven approach to QA error analysis, focusing on answer accuracy based on question stems, reveals that certain question types are more challenging for LLMs <d-cite key="moldovan_performance_2003"></d-cite>. Rondeau et al. used feature extraction for systematic error analysis in the Stanford Question Answering Dataset (SQuAD), employing feature extractors and classifiers <d-cite key="rondeau_systematic_2018"></d-cite>.</p> <h3 id="existing-approaches-to-improve-social-commonsense-reasoning"><strong>Existing Approaches to Improve Social Commonsense Reasoning</strong></h3> <p>Our research also explores existing literature offering solutions for mitigating errors in social commonsense reasoning. Some of these works suggest incorporating external structured data, such as knowledge graphs, into models. For example, Chang et al. showed that integrating knowledge graphs like ConceptNet improves performance on Social IQA <d-cite key="chang_incorporating_2020"></d-cite>.</p> <p>However, despite confirming the effectiveness of this approach, studies like Mitra et al. also noted instances where models, even with access to relevant information that can directly lead to the correct answer, predicted incorrect answers based on irrelevant knowledge <d-cite key="mitra_how_2020"></d-cite>.</p> <h2 id="methodology"><strong>Methodology</strong></h2> <h3 id="step-1-applying-flan-t5-to-social-iqa"><strong>Step 1: Applying Flan-T5 to Social IQA</strong></h3> <p>We first prompted Flan-T5, known for its promising reasoning task performance <d-cite key="chung_scaling_2022"></d-cite>, to answer 33,411 MCQs from the Social IQA dataset. The prompt format, emulating the examples <d-cite key="bosma_introducing_2021"></d-cite> was as follows:</p> <p>[Context].</p> <p>Based on the context above, choose the best answer to the question:</p> <p>[Question]</p> <p>OPTIONS:</p> <p>(A) [Answer A]</p> <p>(B) [Answer B]</p> <p>(C) [Answer C]</p> <p>For your answer, return exactly one character, either A, B, or C.</p> <h3 id="step-2-qualitative-coding-of-350-errors"><strong>Step 2: Qualitative Coding of 350 Errors</strong></h3> <p>Next, we used the following procedure, based on standard iterative qualitative coding methods, to categorize instances where Flan-T5’s response differed from the Social IQA dataset’s correct answer.</p> <ol> <li> <p>Initial Annotation: initially, for a subset of 100 rows, two independent coders annotated each row, noting the reasons for the discrepancy in the correct answer choice between the dataset and Flan-T5.</p> </li> <li> <p>Theme Identification: the coders reviewed each other’s annotations and engaged in discussions to identify major themes in inconsistencies. Based on these discussions, they developed a formal set of tags to apply to the rows.</p> </li> <li> <p>Tagging: finally, they applied these tags to a total of 350 rows</p> </li> </ol> <h3 id="step-3-quantitative-error-analysis"><strong>Step 3: Quantitative Error Analysis</strong></h3> <p>We then analyzed the data to determine the frequency of each error type within our tagged dataset (n=350). We explored potential features, such as specific words, that contributed to the difficulty of the questions.</p> <h3 id="step-4-addressing-type-1-errors---developing-a-pruning-tool"><strong>Step 4: Addressing Type 1 Errors - Developing a Pruning Tool</strong></h3> <p>Our objective here was to develop a tool that could use our tagged question set to accurately identify problematic questions. Unfortunately, this approach did not yield the desired results and needs future work.</p> <h3 id="step-5-addressing-type-2-errors---analyzing-through-attention-mechanism-visualization"><strong>Step 5: Addressing Type 2 Errors - Analyzing through Attention Mechanism Visualization</strong></h3> <p>Finally, we shifted our focus to examining errors by visualizing the attention mechanisms of the model. This approach aimed to provide deeper insights into how the model processes and responds to various types of questions, particularly those categorized as Type 2 errors.</p> <h2 id="analysis-and-evaluations"><strong>Analysis and Evaluations</strong></h2> <h3 id="general-accuracy-of-flan-t5-on-social-iqa"><strong>General Accuracy of Flan-T5 on Social IQA</strong></h3> <p>Overall, Flan-T5 exhibits a high accuracy of 90% when presented with MCQs from Social IQA, which could be because it was fine-tuned “on a large set of varied instructions,” similar to the questions we present it <d-cite key="bosma_introducing_2021"></d-cite>. This accuracy is much higher than BERT, which had a 64.5% accuracy <d-cite key="sap_socialiqa_2019"></d-cite>.</p> <h3 id="set-of-formal-tags-derived-from-qualitative-coding"><strong>Set of Formal Tags Derived from Qualitative Coding</strong></h3> <p>In the initial annotation phase of qualitative coding, both coders were surprised to find many questions marked “incorrect” because of issues inherent in the Social IQA questions themselves (see below for an example). Therefore, we wanted to characterize why the Social IQA multiple choice questions were problematic: was it a lack of context comprehension, the unreasonableness of all answer options, or the presence of multiple equally reasonable answers?</p> <p>During the theme identification phase, the coders established two groups of tags:</p> <ol> <li> <p>Errors arising from the flawed construction of the Social IQA dataset</p> </li> <li> <p>Errors due to Flan-T5’s responses not aligning with social commonsense</p> </li> </ol> <h3 id="type-1-errors"><strong>Type 1 Errors</strong></h3> <p>For Type 1 errors, six labels were created:</p> <ol> <li> <p>Nonsensical Context: When the context sentence is incomprehensible to a reasonable human.</p> </li> <li> <p>Nonsensical or Ambiguous Question: When the question is either nonsensical or too ambiguous.</p> </li> <li> <p>Major Typo: Refers to incomprehensible parts of the Context, Question, or answer choices due to typos.</p> </li> <li> <p>Multiple Reasonable Answers: When several answers appear equally reasonable, either due to similar meanings or general reasonableness.</p> </li> <li> <p>No Reasonable Answer: When no answer options seem appropriate or reasonable.</p> </li> <li> <p>Incorrectly Labeled “Correct” Answer: When an alternative answer seems more reasonable than the one marked “correct.”</p> </li> </ol> <p><strong>Examples of Type 1 Errors</strong></p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors-1400.webp"/> <img src="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/errors.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Examples of Questions and Answers </div> <h3 id="type-2-errors"><strong>Type 2 Errors</strong></h3> <p>For Type 2 errors: we devise the following set of three labels:</p> <ol> <li> <p>Overinfers: This tag is for questions where Flan-T5 seems to make a large leap in logic, resulting in it picking an answer choice that makes spurious assumptions when a much more direct and clear answer is available</p> </li> <li> <p>Associated but Incorrect: This is for questions where Flan-T5 picks an answer choice that is associated with the context and question, but is not what the question is specifically asking about. This differs from over-inferring in that this usually entails picking irrelevant answer choices.</p> </li> <li> <p>Flan-T5 Incorrect (unspecified): all other mistakes attributable to Flan-T5.</p> </li> </ol> <h3 id="distribution-of-tags"><strong>Distribution of Tags</strong></h3> <p>Looking at our annotated questions, we see that 65% of errors stemmed from the flawed construction of the Social IQA dataset. Meanwhile, 38% of errors were errors stemming from Social IQA not picking the right answer. Observe that it is possible for a question to be tagged with both a Type 1 tag and a Type 2 tag.</p> <p>For Type 1 errors, we see that having multiple reasonable answers is by far the most common reason why a question is problematic. This was followed by having no reasonable answer, and the answer labeled “correct” not being the best available answer. Indeed, the top three reasons why a question is considered problematic all stem from questionable answer choices. This highlights how the construction of the answer choices, and thus Social IQA as a benchmark set, is problematic.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors-1400.webp"/> <img src="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/socialiqaerrors.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Histogram of the Types of Errors in SocialIQA </div> <p>Next, we examine the distribution of Type 2 error tags. We see that the most common reason is Flan-T5 over-inferring.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors-1400.webp"/> <img src="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/flant5errors.jpeg" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Histogram of the Types of Errors FlanT5 Makes </div> <h3 id="analysis-of-question-types"><strong>Analysis of Question Types</strong></h3> <p>In our quantitative analysis, we identified key features contributing to lower accuracy in certain questions. Notably, questions containing the word ‘others’ scored lower in accuracy, with an average of 0.880, compared to the general accuracy score of 0.990. Furthermore, questions featuring repeated answer choices also exhibited a lower accuracy score of 0.818.</p> <h3 id="attempt-to-prune-social-iqa"><strong>Attempt to Prune Social IQA</strong></h3> <p>Assessing models on social commonsense reasoning questions requires clear comprehension and consensus on the appropriateness of the questions and their answer choices. Our goal was to create a tool to classify the sensibility of these questions and answers. To achieve this, we experimented with various models, including Flan-T5 and GPT-4, asking them to evaluate the coherence of the questions. Unfortunately, the results were inconsistent, often varying with each regeneration of the response. Despite these challenges, we maintain that addressing this issue remains crucial.</p> <h3 id="visualization-of-attention-mechanism"><strong>Visualization of Attention Mechanism</strong></h3> <p>In our analysis of Type 2 errors, we focused on how the errors happen because Flan-T5 overinfers the underlying reasons not explicitly stated in the question instead of picking the more straightforward and correct answer, or picks some answer associated with the words in the context that isn’t directly related to the question.</p> <p>In addition to providing qualitative analysis, we set out to provide some quantitative analysis to better understand why this was happening. Consider these linked notebooks, which visualize the cross attention and the encoder attention for <a href="https://colab.research.google.com/drive/1M4XhFORT5KJgFaaVnZM3g5QS8JPV7Apq#scrollTo=JHkd_dwR2AiZ">one correctly labeled example</a> and <a href="https://colab.research.google.com/drive/1VtU2kvJA5EKL4AeuaW-N0RwiBimb96MB#scrollTo=JHkd_dwR2AiZ">one incorrectly labeled example</a>, where Flan-T5 chooses an associated but incorrect answer. (Note that the specific images were chosen for brightness in the heatmaps, since the attention was normalized. Please reference the notebook.).</p> <p>To visualize cross-attention, we looked at the cross-attention between the answer Flan-T5 generates and the encodings, across each layer and attention head in Flan-T5, grouping in both orders. To visualize the encoder attention, we looked at the average attention for each layer in the input encoding, and for the layer that saw the most drastic change (layer 2, starting from 0 index), we visualized the attention for each attention head.</p> <p>Now, consider the context and question:</p> <p>Cameron had a big paper due later in the week, so Cameron put pen to paper. What will Cameron want to do next?</p> <p>A) research his topic</p> <p>B) write an outline</p> <p>C) redo his topic</p> <p>Flan-T5 answers A), while the correct answer is “write an outline.” Notably, Flan-T5 doesn’t choose the third answer, “redo his topic.”</p> <p>Therefore, we can see that Flan-T5’s is associated with the topic, but isn’t the correct answer, which is given by the phrase “put pen to paper.” Visualizing the average encoder attention and cross attention, we see that the contextualized embeddings and generation primarily focus on the words “big paper” and the question, but don’t pay much attention to the word “pen.”</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated-1400.webp"/> <img src="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Cross Attention for Associated But Incorrect Answer </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder-1400.webp"/> <img src="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/associated_encoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Encoder Attention for Associated But Incorrect Answer </div> <p>Generalizing our results a bit, we find that FLAN only pays reasonable attention (normalized attention &gt; 0.05) to the keywords for 14 out of 26 examples tagged under “associated,” even for simpler questions such as</p> <p>On the other hand, consider the question,</p> <p>Sydney played basketball with her friends after school on a sunny afternoon. What does Sydney need to do before this?</p> <p>A) take a nap before this</p> <p>B) have a basketball before this</p> <p>C) go home before this</p> <p>Flan-T5 correctly answers “have a basketball before this,” not choosing “take a nap before this” or “go home before this.”</p> <p>Indeed, we see the four vertical lines in the encoder and cross attentions that correspond to key phrases in the sentence. For the questions that Flan-T5 gets correct, it pays attention to the right keywords 9 out of 10 times. Lastly, note that for questions labeled “overinfer,” Flan-T5 pays attention to the right keywords 8 out of 10 times.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct-1400.webp"/> <img src="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Cross Attention for Correct Answer </div> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder-1400.webp"/> <img src="/staging/assets/img/2023-11-11-unraveling-social-reasoning-in-llms/correct_encoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Encoder Attention for Correct Answer </div> <p>Therefore, for more straightforward questions, namely, questions that have one straightforward answer, Flan-T5 can find the right keywords that lead it to the answer (i.e. the correct questions). On the other hand, for more challenging questions that require paying attention to specific keywords and reasoning from the perspective of a character (recall the Sally-Anne Test), Flan-T5 struggles more, with more variance between what it pays attention to and doesn’t (e.g. paper but not pen).</p> <p>In addition, since Flan-T5 pays attention to the right keywords most of the time for the questions it overinfers on, this suggests that there’s some aspect of reasoning that’s not being captured via our attention visualizations, and that this reasoning isn’t performing that well.</p> <p>Notably, something interesting to note is that for all of the examples, by the third encoder layer, on average, Flan-T5 doesn’t change its encodings, and for the cross attention, the attention remains consistent across all layers and (most) attention heads. Therefore, it seems like most of the “reasoning” is being performed in the encoding stage.</p> <p>Therefore, some of our next steps are understanding how removing attention heads in a smaller affects the model’s ability to reason, given the large number of heads and layers (24 x 32) in Flan-T5-xxl . We visualized each encoder head for one layer, but this doesn’t immediately lend itself to an intuitive interpretation.</p> <h2 id="discussion"><strong>Discussion</strong></h2> <p>Our work concentrated on analyzing two categories of errors and proposing solutions to address them. The two error types are:</p> <ol> <li> <p>Errors originating from the flawed construction of the Social IQA dataset.</p> </li> <li> <p>Errors where Flan-T5’s responses do not align with social commonsense.</p> </li> </ol> <h3 id="problems-with-social-iqa-1"><strong>Problems with Social IQA</strong></h3> <p>Our analysis of Type 1 errors in the Social IQA dataset revealed significant issues. In examining n=350 incorrectly answered questions, we found that 65% had problems with their context, question, or answer choices. Additionally, 54.4% of these errors had multiple reasonable answers, 23.7% lacked any reasonable answer, and 14.0% seemed to have mislabeled correct answers. This indicates a substantial number of misleading answer choices in the Social IQA questions.</p> <p>This issue partly stems from the dataset’s construction, which involved assigning crowdsourced workers tasks of writing positive answers for each question and sourcing negative answers from “different but related” questions. This approach likely contributed to the high error rate.</p> <p>Since Social IQA is so frequently used in evaluating model performances and transfer learning tasks, the challenge is to identify and remove these flawed questions. Although our attempt to do this was unsuccessful due to time and budget constraints, we believe it is feasible. Many evaluations of large language models (LLMs) use crowdsourced multiple-choice questions, so a pruning tool to ensure benchmark reliability would be highly beneficial beyond the task of social commonsense reasoning.</p> <p>Pruning the Social IQA dataset to eliminate most erroneous questions would also provide an opportunity to reassess older models.</p> <p>Overall, our analysis of Type 1 errors underscores the need for caution in crowdsourcing benchmark questions. While crowdsourcing likely still remains the best solution for creating large benchmark sets, a pruning tool is essential to maintain the reliability of such datasets.</p> <p>On the other hand, our analysis of Type 2 errors suggests that LLMs still might not match the social reasoning skills of humans for more complex scenarios. For simpler questions, they can often find a single keyword that informs their answer, while for more complex questions, they often miss important phrases and can’t necessarily think from another person’s perspective. For instance, recall how questions containing the keyword “other” result in Flan-T5 having considerably lower accuracy.</p> <h2 id="main-limitations"><strong>Main Limitations</strong></h2> <p>The primary limitations of our study are rooted in its scope and methodology. Firstly, we focused exclusively on a single model, Flan-T5, which may limit the generalizability of our findings. Additionally, our analysis was based on a relatively small sample size of n=350, and it involved only two coders. For a more robust and comprehensive evaluation, increasing the number of coders would be beneficial, particularly to assess intercoder reliability. Furthermore, implementing measures to mitigate recognition bias during the tagging process would enhance the validity of our results.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-11-unraveling-social-reasoning-in-llms.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/vig-algorithm-flow-project-proposal/index.html b/blog/2023/vig-algorithm-flow-project-proposal/index.html
new file mode 100644
index 00000000..a0be87d8
--- /dev/null
+++ b/blog/2023/vig-algorithm-flow-project-proposal/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Augmenting Expert Domain Image Inputs for Enhancing Visual Language Models Performance | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="This blog post explores enhancing visual language models, particularly for expert domains like scientific literature, where standard models struggle. By integrating domain-specific knowledge and advanced image embeddings, the research aims to refine the performance of visual language models such as OpenFlamingo. Leveraging graphical structured embeddings and graph neural networks, the study tests different methods of representing images to improve the models' interpretive capabilities."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/vig-algorithm-flow-project-proposal/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Augmenting Expert Domain Image Inputs for Enhancing Visual Language Models Performance",
+      "description": "This blog post explores enhancing visual language models, particularly for expert domains like scientific literature, where standard models struggle. By integrating domain-specific knowledge and advanced image embeddings, the research aims to refine the performance of visual language models such as OpenFlamingo. Leveraging graphical structured embeddings and graph neural networks, the study tests different methods of representing images to improve the models' interpretive capabilities.",
+      "published": "December 12, 2023",
+      "authors": [
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Augmenting Expert Domain Image Inputs for Enhancing Visual Language Models Performance</h1> <p>This blog post explores enhancing visual language models, particularly for expert domains like scientific literature, where standard models struggle. By integrating domain-specific knowledge and advanced image embeddings, the research aims to refine the performance of visual language models such as OpenFlamingo. Leveraging graphical structured embeddings and graph neural networks, the study tests different methods of representing images to improve the models' interpretive capabilities.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#related-works">Related Works</a></div> <div><a href="#methodology">Methodology</a></div> <ul> <li><a href="#dataset">Dataset</a></li> <li><a href="#experimentation">Experimentation</a></li> <li><a href="#evaluation-metrics">Evaluation Metrics</a></li> </ul><div><a href="#results">Results</a></div> <ul> <li><a href="#superiority-in-extracting-nodes">Superiority in Extracting Nodes</a></li> <li><a href="#difficulties-with-edge-dectection">Difficulties with Edge Dectection</a></li> <li><a href="#outside-information">Outside information</a></li> </ul><div><a href="#conclusions-and-future-steps">Conclusions and Future Steps</a></div> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>Over the past few years, we have seen a surge in creation, adoption, and excitement around visual language models, specifically around Open AI’s CLIP model. Visual language models can bridge the gap between image and text, allowing tokenized understanding of the visual world around us. For instance, Meta released Segment Anything, a model with enhanced object detection through multimodal inputs like defined bounding boxes and text.</p> <p>After the recent surge with ChatGPT, we have begun to see advancements in the visual language model space to combine the image analysis and conversational tool. While the recent developments with Bard, GPT4-v, LLava, and many others have progressed the visual language model domain, the overall capabilities of the models are limited to the type of images provided. Most of the models have been trained and finetuned on common day objects, specializing in every-day normal tasks.</p> <p>However, theses models continue to struggle with answering images derived from an expert domain, especially scientific literature. Images from these domains can be challenging for the model, as they require common background knowledge, domain knowledge, and interpretation of the diagram.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Question: What is the path from 1 to 4? <br/><br/> GPT4-v Answer: The image you've uploaded appears to show a diagram with four numbered points, possibly representing steps or locations connected by a path... However, as an AI, I can't visually trace paths or analyze images in the way a human would... </div> </div> <p>How can we assist visual language models to improve performance in expert domains?</p> <h2 id="past-works">Past Works</h2> <p>Visual Language Models have become very popular in the recent years with their ability to connect image to text. Open Flamingo<d-cite key="openflamingo"></d-cite> is an open source model with a similar architecture to Flamingo<d-cite key="flamingo"></d-cite>: images are sent to (1) visual encoders, (2) perceived resampler, and (3) through a dense group of layers. Through few shot learning, text completion, and image analysis, Open Flamingo allows users to have a conversation involving images.</p> <p>Currently, popular visual language models, like Flamingo, utilize CLIP<d-cite key="clip"></d-cite> as the visual encoder to perform the image-to-embedding conversion. Behind the scenes, CLIP uses a Vision Transformer architecture as the encoder. However, when we analyze the embeddings output from CLIP, they do not necessarily have a clear representation to the human eye. This makes it really challenging to inject domain-specific knowledge through the embedding to help perform specific tasks.</p> <p>The project, FlowchartQA<d-cite key="flowchart"></d-cite>, contributed by creating a dataset of flowcharts and code, revealing insights into the relationships forming code. However, no research has tried to understand the way visual language models interpret graphs with nodes and edges, specifically for domain specific questions. Through various changes to text and image inputs, we can learn about the way a visual language model understands graphical structures to improve future performance of VLMs.</p> <h2 id="procedure">Procedure</h2> <h3 id="dataset-creation">Dataset Creation</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Data Augmentation Pipeline for the Inputs </div> <p>To learn more about the graphical understanding of VLMs, a dataset had to be curated to test various conditions. The original images of the flowcharts are sampled from the BizGraphQA dataset<d-cite key="bizgraphqa"></d-cite>. Due to the compute and time contraints, only a sample of ten images were utilized for the analysis. This subset of images is passed through the Segment Anything Model from Meta <d-cite key="sam"></d-cite> to extract a mask of each of the nodes. From there, OCR is performed on each node to retrieve the text and hand coloring is performed to color the image. In the end, we get dataset triples of the original image, colored image, and the list of nodes in the image.</p> <p>For example, for this image, we would have the following dataset.</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Original Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Colored Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Node List </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> [Finanze Tyco Roadsafe Insurance, Greyhound Bbbytf Limited, Etablissements Lgac Lthalat Incorporated Indiana, Korte Pro-Cut Grayhawk Insurace North Carolina, Hutchins Sandvik Maryland, Nm Suc. Krsx Limited Michigan] </div> </div> <h3 id="experimentation">Experimentation</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Experimentation Pipeline for the Inputs </div> <p>Bard uses Flamingo, a visual language model, to answer queries. We will provide an input image with or without the augmentation and a question about the graph into Flamingo, as illustrated in the figure above. Each image will be paired with a question in a specific category. For this analysis, we will focus on four major types of questions to evaluate the VLM’s understanding of graph connectivity. These questions are to be asked in tandem with the original image, the colored image, and the original image paired with the list of nodes in the image. We ask the following questions:</p> <ol> <li>Based on the image, is there a cycle in the graph?</li> <li>Based on the image, what is the path from <strong>__ to __</strong>_? (The ground truth path involves nodes that only have one child node.)</li> <li>Based on the image, what is the path from <strong>__ to __</strong>_? (The ground truth path involves nodes that have multiple child nodes.)</li> <li>Based on the image, how many child/parent nodes does _____ have?</li> </ol> <p>For the same image from above, here are the questions and relevant answers:</p> <table> <thead> <tr> <th style="text-align: center"> </th> <th style="text-align: center">Question</th> <th style="text-align: center">Answer</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">1</td> <td style="text-align: center">Is there a cycle in this graph?</td> <td style="text-align: center">No</td> </tr> <tr> <td style="text-align: center">2</td> <td style="text-align: center">What is the organization hierarchy path from Etablissements Lgac Lthalat Incorporated, Indiana to Nm Suc. Krsx Limited - Michigan?</td> <td style="text-align: center">The path is Etablissements Lgac Lthalat Incorporated, Indiana to Korte Pro-Cut Grayhawk Insurance, North Carolina to Nm Suc. Krsx Limited - Michigan</td> </tr> <tr> <td style="text-align: center">3</td> <td style="text-align: center">What is the organization hierarchy path from Finanze Tyco Roadsafe Insurance to Nm Suc. Krsx Limited - Michigan?</td> <td style="text-align: center">The path is from Finanze Tyco Roadsafe Insurance to Greyhound Bbbytf Limited to Nm Suc. Krsx Limited - Michigan</td> </tr> <tr> <td style="text-align: center">4</td> <td style="text-align: center">How many child companies does Greyhound Bbbytf Limited have holdings in?</td> <td style="text-align: center">Two</td> </tr> </tbody> </table> <p>But, you must be wondering: why ask these questions specifically? Each question tests understanding of graphical elements without background understanding of the topic. This should serve as a baseline for the way that VLMs understand graphical structures and the common questions to be asked.</p> <h3 id="evaluation-metrics">Evaluation Metrics</h3> <p>To evaluate the success of our model, we will conduct both qualitative and quantitative analyses on the dataset, given that quantitative evaluation of generative models can be challenging. The control group will provide a baseline for normalizing the results.</p> <p>Qualitatively, we will perform a manual analysis of the generated outputs. By using prompts, images, and answer, we will subjectively compare the prompt, the image, and the resulting answer. Our primary goal is to assess how effectively the visual language model generates the answer based on the prompt while being constrained by the graph.</p> <p>Quantitatively, we plan to utilize an accuracy score will be employed to evaluate the percentage of questions that have been answered correctly in each metric. Incorporating these metrics will provide a more comprehensive understanding of the model’s performance and allow for better comparisons with other generative models.</p> <p>This comparative analysis will facilitate a robust evaluation of the methods employed to represent graphical information within visual language models.</p> <h2 id="results">Results</h2> <p>The following table provides the accuracy metric of the aforementioned experiment:</p> <table> <thead> <tr> <th style="text-align: center"> </th> <th style="text-align: center">Question 1</th> <th style="text-align: center">Question 2</th> <th style="text-align: center">Question 3</th> <th style="text-align: center">Question 4</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">Original Image</td> <td style="text-align: center">.7</td> <td style="text-align: center">.4</td> <td style="text-align: center">0</td> <td style="text-align: center">0</td> </tr> <tr> <td style="text-align: center">Original Image with Node List</td> <td style="text-align: center">0</td> <td style="text-align: center">.3</td> <td style="text-align: center">0</td> <td style="text-align: center">.1</td> </tr> <tr> <td style="text-align: center">Colored Image</td> <td style="text-align: center">1</td> <td style="text-align: center">.3</td> <td style="text-align: center">0.1</td> <td style="text-align: center">0.2</td> </tr> </tbody> </table> <p>We clearly see that the Colored Image performs better than the Original Image in most of the scenarios. Also, we unfortunately see that the node list did not provide gains in performance for the VLM. Overall, the performance is quite limited when asking specific questions about path finding and children nodes. Let’s dive a bit deeper to understand why.</p> <p>Preface: In the following sections, there are tables with prompts, images, and answers from Bard for various mini explorations.</p> <h3 id="superiority-in-extracting-nodes">Superiority in Extracting Nodes</h3> <p>When first embraking on this project, I believed that the VLM would struggle to pair the text and nodes together. However, the VLM actually proved to be quite impressive with the extraction of nodes and the names associated with, making the need for a node list not quite neccesary.</p> <p>Using the input figure from above without any augmentations, I asked the VLM how many child nodes a specific node in the graph had. The VLM outputs the wrong answer, but it actually provides generated code for its reasoning. Here is a portion of the code:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code># Define the corporate structure as a dictionary
+corporate_structure = {
+    "Etablissements Lgac Lthalat Incorporated, Indiana": {
+        "Nm Suc. Krsx Limited - Michigan": {}
+    },
+    "Nm Suc. Krsx Limited - Michigan": {},
+    "Krsx Incorporated, California": {
+        "Nm Suc. Krsx Limited - Michigan": {}
+    },
+    "Krsx Incorporated, Texas": {},
+    "Lthalat Holdings, Delaware": {
+        "Etablissements Lgac Lthalat Incorporated, Indiana": {},
+        "Krsx Incorporated, California": {},
+        "Krsx Incorporated, Texas": {}
+    },
+    "Greyhound Bbbytf Limited": {
+        "Krsx Incorporated, California": {},
+        "Krsx Incorporated, Texas": {}
+    }
+}
+</code></pre></div></div> <p>The VLM was actually quite impressive with obtaining the specific nodes in the graph, recognizing the text through OCR, and pairing it with the nodes. While all the nodes are not correct, we can see that the VLM can follow a procedure to determine this. While this is a good example of node extraction, the capability is still ambiguous.</p> <p>To poke this topic a bit more, I wanted to test out the VLM’s ability to extract the nodes if the colors are the same or different. I designed a basic figure with just nodes to test this. The same prompt was passed into Bard with the images below. The only difference between the two images is the fact that in one image, the colors of the nodes are same, and in the other image, the colors of the nodes are different. In the results below, we can clearly see that the VLM is able to perform better with the colored nodes, as the VLM is able to distinguish between different nodes.</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Original Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Colored Image </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Prompt: What are the names of the nodes in this graph? </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Prompt: What are the names of the nodes in this graph? </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Answer: B1, B2, B3, B4, B5. </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Answer: B1, B2, B3, B4. </div> </div> <p><br/> To support this argument, we look at the attention that CLIP places on segments of the image based on a caption. We specifically use CLIP because CLIP is the visual encoder in Flamingo. While this isn’t necessarily a rigorous proof, we can see that the attention on the nodes is placed stronger in the colored graph example rather than the regular graph example.</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Caption: Node B1, B2, B3, B4 </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Caption: Node B1, B2, B3, B4 </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Through the examples and tests above, we can clearly see the VLM’s ability to extract nodes, especially with a visually distinugishing factor between the nodes like color. Since the VLM can do a pretty decent job of extracting the nodes, it makes sense that providing the VLM with the node list may not allow for great improvements in performance.</p> <p>So, if the VLM can extract the nodes relatively well, why is the performance still subpar?</p> <h3 id="difficulties-with-edge-dectection">Difficulties with Edge Dectection</h3> <p>Aside from nodes, most graphs have edges, and for the questions asked in the experiments, understanding the connectivity was crucial to providing the correct answer. We actually observed that the colored graphs had answers that were closer to 100% accuracy in comparison to the regular graphs. To explore how VLMs understand the connections between nodes, I decided to ask Bard about some simple graphs to determine how it responded.</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Prompt </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Can you please tell me what the edges in the graph are? </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Can you please tell me what the edges in the graph are? </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Answer </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> The edges in the graph are: a -&gt; b, b -&gt; c, c -&gt; a </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> The edges in the graph are: B -&gt; A, C -&gt; A </div> </div> <p>Wow! It’s really surprising that the VLM is creating edges where there aren’t even edges? Also, the direction of the edges are wrong. Why would this be the case?</p> <p>Let’s explore a bit more about the individual arrows. Can a VLM accurately describe the arrow direction?</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Prompt </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Which way does the arrow point? </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Which way does the arrow point? </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Answer </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> The arrow in the image points to the right. </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> The arrow in the image points to the left. </div> </div> <p>Well, if you take a look at the responses above, we notice that the VLM can accurately provide the arrow direction for larger arrows incomparison to smaller arrows. Since most of the graphs involve smaller arrows, we can see why the VLM could not tell the directionality of the arrow.</p> <h3 id="outside-information">Outside information</h3> <p>Beyond the graphical structure of the nodes, I found that Bard typically incorporated outside information that required background knowledge about the topic. These situations can be categorized into two categories: useful outside information and irrelevant outside information. To distinguish between the two, I found that in the siutations that Bard used graph algorithms and definitions, the outside information was useful in generating the answer. However, many times, Bard would look up specific companies or use graphs from other sources online.</p> <p>In this example, Bard was asked if there was a cycle in the graph, and Bard outputted the answer below. The VLM was very intelligent by quoting the definition of a cycle and using DFS to determine the existence of one. The step-by-step algorithm is correct and ultimately, leads to the solution. This shows that the incorporation of algorithms and accurate definitions can assist the VLM in answering queries.</p> <blockquote> <p>No, there is no cycle in the graph of the Nove 1-66Express organizational structure. A cycle is a path in a graph that starts and ends at the same vertex, and there is no such path in this graph. I verified this by tracing the graph and ensuring that I never returned to a vertex that I had already visited. I also used the following algorithm to check for cycles in graphs: Mark all vertices as unvisited. Choose a vertex and start a depth-first search (DFS). As you traverse the graph, mark each vertex as visited. If you ever reach a vertex that is already marked as visited, then there is a cycle in the graph. If you finish traversing the graph without ever reaching a visited vertex, then there is no cycle in the graph. I ran this algorithm on the graph of the Nove 1-66Express organizational structure and found that there are no cycles.</p> </blockquote> <p>However, outside information can be just as harmful as it is beneficial. Pulling from outside sources can be tricky for a VLM. It needs to remember to operate within the grouth truths presented in the figure. In multiple instances, the VLM actually referenced the wrong figure because it forgot about the image provided with the prompt.</p> <p>Therefore, the VLM needs to be fine-tuned to filter the outside information it brings in to improve the accuracy of its answers.</p> <h2 id="conclusions-and-future-steps">Conclusions and Future Steps</h2> <p>In this study, I delved into the capabilities and limitations of VLMs, like Open Flamingo, in interpreting complex graphical structures within expert domains, such as scientific literature. The experiments, involving various image augmentation techniques such as color changes and node names, revealed key insights. While VLMs demonstrated proficiency in node extraction, it faced challenges in edge detection and understanding the connectivity between nodes. This was particularly evident when colored images outperformed non-colored ones, highlighting the importance of visual distinction for VLM comprehension. However, the addition of node lists did not significantly enhance performance, suggesting existing capabilities in node identification. The connectivity was difficult for the VLM to understand because of the size of the arrows.</p> <p>The findings of this research highlight a crucial challenge for VLMs: integrating domain-specific knowledge, especially for non-standard images like scientific diagrams. However, due to the small dataset size, suggests that further research with a larger and more diverse dataset is necessary to validate these findings. In the future, this research can be applied to help improve prompting for graphical structures, provide insights on how to finetune a VLM for this task, and create a new interest in using VLMs for scientific diagrams.</p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-08-vig-algorithm-flow-project-proposal.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/2023/watermarked-llms/index.html b/blog/2023/watermarked-llms/index.html
new file mode 100644
index 00000000..a1441bf2
--- /dev/null
+++ b/blog/2023/watermarked-llms/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Are Watermarked Large Language Models More Prone to Hallucinations? | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="In this blog post, I investigate whether watermarked LLMs are more likely to “hallucinate,” or make up facts, because of limitations imposed by the watermarking scheme."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/2023/watermarked-llms/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Are Watermarked Large Language Models More Prone to Hallucinations?",
+      "description": "In this blog post, I investigate whether watermarked LLMs are more likely to “hallucinate,” or make up facts, because of limitations imposed by the watermarking scheme.",
+      "published": "December 12, 2023",
+      "authors": [
+        {
+          "author": "Justin Ji",
+          "authorURL": "https://www.linkedin.com/in/jujipotle/",
+          "affiliations": [
+            {
+              "name": "Harvard University",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Are Watermarked Large Language Models More Prone to Hallucinations?</h1> <p>In this blog post, I investigate whether watermarked LLMs are more likely to “hallucinate,” or make up facts, because of limitations imposed by the watermarking scheme.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#introduction">Introduction</a></div> <div><a href="#background">Background</a></div> <ul> <li><a href="#the-need-for-ai-detection-algorithms">The need for AI detection algorithms</a></li> <li><a href="#so-what-is-watermarking">So what is watermarking?</a></li> <li><a href="#quality-degradation-in-watermarked-llms">Quality degradation in watermarked LLMs</a></li> </ul><div><a href="#experiment">Experiment</a></div> <ul> <li><a href="#setup">Setup</a></li> <li><a href="#watermarking-scheme-implementation">Watermarking scheme implementation</a></li> <li><a href="#prompting-my-models">Prompting my models</a></li> </ul><div><a href="#results">Results</a></div> <ul> <li><a href="#quantitative-results">Quantitative results</a></li> <li><a href="#qualitative-results">Qualitative results</a></li> <li><a href="#theoretical-underpinnings-of-results">Theoretical underpinnings of results</a></li> </ul><div><a href="#conclusion">Conclusion</a></div> <ul> <li><a href="#further-work">Further Work</a></li> <li><a href="#what-i-learned">What I learned</a></li> <li><a href="#supplemental-material">Supplemental Material</a></li> </ul> </nav> </d-contents> <h2 id="introduction">Introduction</h2> <p>As LLMs grow in capabilities, it is becoming more and more difficult to tell apart human-written from AI-generated content. Current post-hoc AI detection tools like GPTZero, which are easy to bypass and can be biased against non-native English speakers, are neither robust nor fair. Watermarking schemes suggest a more secure and unbiased method of detecting LLM-generated content, but introduce potential quality degradation. In this blog post, I investigate whether watermarked LLMs are more likely to “hallucinate,” or make up facts, because of limitations imposed by the watermarking scheme. I formulate a nuanced research question, explain assumptions made and my experimental setup, present an analysis of my results, and present next steps. Overall, although I do not obtain statistically significant results, I do provide statistical evidence that hallucinations in watermarked LLMs are worth studying, with interpretable qualitative results that I explain with fundamental ML concepts.</p> <h2 id="background">Background</h2> <h3 id="the-need-for-ai-detection-algorithms">The need for AI detection algorithms</h3> <p>Deepfakes. AI-assisted academic plagiarism. Bots on social media spreading fake news. These are just a couple of the real-world problems brought about by the recent advancement in large language model capabilities that make it easy for malicious users to spread misinformation, while making it hard for social media platforms or governments to detect their AI origin. Thus, detecting AI-generated content in the wild is becoming one of the hottest research fields in AI. In fact, the White House recently commissioned an executive order <d-cite key="biden"></d-cite> on AI to direct action towards detecting AI-generated content and authenticating official content. But as AI models get more and more powerful, the question arises: will it even be possible to tell apart truth from disinformation?</p> <p>Some signs have already appeared that point to the answer being “no.” When ChatGPT first released to the public, coding Q&amp;A site StackOverflow temporarily banned <d-cite key="stack-overflow"></d-cite> answers generated by ChatGPT, because it was so easy to use ChatGPT to generate answers that seemed to be correct but were wrong on closer inspection. Perhaps you’ve experienced the following: you’re browsing Reddit or an online forum and reading a user’s reply, thinking that it’s a well-composed answer, only to realize that the structure of the reply is strikingly similar to how ChatGPT sounds.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/reddit-post-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/reddit-post-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/reddit-post-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/reddit-post.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption">A Reddit post pointing out a user who writes all their comments using ChatGPT.</div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/reddit-comment-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/reddit-comment-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/reddit-comment-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/reddit-comment.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption">An unexpected comment replying to the Reddit post above.</div> <p>Some AI detection tools, such as GPTZero <d-cite key="gpt-zero"></d-cite>, claim to solve this problem by detecting text generated by large language models (LLMs). However, from playing around with GPTZero, it’s not hard to prompt ChatGPT to generate text that bypasses GPTZero’s filters, or to manually paraphrase a few sentences while preserving the content’s general meaning. This is due to GPTZero’s simplistic detection method, which is just to predict if a piece of text is AI-generated by analyzing its perplexity—a measure of “randomness.” In addition to being inaccurate, AI detection tools like GPTZero frequently misclassify <d-cite key="bias"></d-cite> text written by non-native English speakers as AI generated, bringing up issues of fairness and biases.</p> <p>So is it all doomed? Will we reach a state of the world in which people can’t trust anything they see on the internet to be human-generated?</p> <p>Not quite. New watermarking algorithms make it possible to trace back any text generated by specifically-watermarked LLMs with high accuracy and with low false-positive rates, and with considerable amount of effort required to modify the output of such an LLM without also degrading the quality of the output.</p> <h3 id="so-what-is-watermarking">So what is watermarking?</h3> <p>Watermarking, in the context of LLMs, is the process of modifying an LLMs generation process such that signals are embedded into generated text that are invisible to humans but algorithmically detectable. The key difference between watermarking and post-hoc detection algorithms like GPTZero is that post-hoc detectors rely on text outputted by LLMs to sound “artificial,” and as LLM capabilities grow, this is unlikely to hold. On the other hand, watermarking schemes work regardless of the capabilities of the underlying LLM, which make them more robust to advancements in AI. The watermarking scheme designed in <em>A Watermark for Large Language Models</em> (Kirchenbauer, Geiping et al.) <d-cite key="watermark-for-llms"></d-cite> is specially designed to have negligible impact on text quality and work with a publicly accessible detection algorithm, so that anyone can verify if a piece of text is generated by a particular LLM. The watermarking scheme works by selecting a pool of “green” tokens before text generation and softly preferring to sample from the pool of “green” tokens during text generation. Then, the detection algorithm checks if a piece of text contains a higher proportion of “green” tokens than expected, and if the result is statistically significant, determines that the text was generated by an LLM.</p> <p>The existence of an undetectable, unbreakable, and accurate watermarking scheme would be incredible! By watermarking any LLM before its release, any text generated by the LLM would contain statistical signals that prove its AI origin, making it difficult for adversaries to pass off LLM-generated content as human-generated. Furthermore, because watermarking schemes rely on detecting signals associated with each LLM’s watermarking process and not by analyzing the perplexity of text, human-generated content would rarely be flagged as AI-generated. Unfortunately, the recent paper <em>Watermarks in the Sand: Impossibility of Strong Watermarking for Generative Models</em> (Zhang et al.) <d-cite key="watermark-impossibility"></d-cite> proves that under natural assumptions, it is impossible to achieve watermarking schemes that can resist all attacks by a computationally-bounded attacker. The key assumption, which is the existence of a quality oracle—an algorithm that can differentiate between higher and lower quality responses—is easily satisfied by using any LLM that is capable of verifying quality. The authors of the paper implement a general attack on any watermarked LLM by perturbing the output of an LLM an arbitrary number of times, and using the quality oracle to ensure the perturbation does not cause the text to degrade. After enough perturbations, the text is unlikely to contain the statistical signals embedded in the original output, and the attacker evades detection.</p> <p>So if an attacker is willing to spend lots of time and effort, they can break any watermarking scheme. Still, maybe this barrier is enough to deter most attackers. Then, why wouldn’t we watermark every LLM released to the public?</p> <h3 id="quality-degradation-in-watermarked-llms">Quality degradation in watermarked LLMs</h3> <p>The truth is, because watermarking schemes force a LLM to preferentially sample from a pool of “green” tokens, the quality of the output of watermarked LLMs may decrease. To understand the intuition behind this, here’s a short clip from “Word Sneak with Steve Carell”: <a href="https://youtu.be/9nBBgD0q6rA?feature=shared&amp;t=107">link</a></p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/word-sneak-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/word-sneak-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/word-sneak-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/word-sneak.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>“We weren’t dressed properly for <strong>moose</strong>-watching or <strong>zucchini</strong>-finding… I for one had the wrong kind of <strong>pantaloons</strong> on.”</p> <p>Steve and Jimmy were given cards with random words and had to work them into a casual conversation. Similarly, one can imagine an LLM generating odd-sounding sentences in order to adhere to a watermarking scheme.</p> <p>The effects of quality degradation are amplified the smaller the space of high-quality outputs is. For example, the prompts “What is 12 times 6?” or “What is the first section of the U.S. Constitution?” have only one accepted answer, forcing a watermarked LLM to either give up on watermarking the output or <em>hallucinate incorrect answers</em>.</p> <p>The latter bad outcome is the one I will investigate further in this blog post: Are watermarked LLMs more prone to hallucinations? In particular, I investigate if there are tradeoffs between quality of outputs and watermark security. Lastly, I perform a qualitative analysis of watermarked outputs, and explain any interpretable trends caused by the watermarking scheme.</p> <h2 id="experiment">Experiment</h2> <h3 id="setup">Setup</h3> <p>I investigate my hypothesis by experimenting with unwatermarked and watermarked LLMs. I outline my experiment here: first, I ask an unwatermarked LLM to generate biographies for 100 famous people. I ask an evaluation oracle, aka GPT 3.5, to count the number of mistakes in each generated biography. This serves as my control group. Then, I create three experimental groups, each of which correspond to a watermarked LLM with varying degrees of watermarking security. I ask GPT 3.5 to count the number of mistakes by each of the watermarked LLMs, and perform statistical Z-tests to conclude whether or not watermarked LLMs are more likely to hallucinate.</p> <p>I now walk through the steps of my experiment in more depth, with commentary on any decisions or tradeoffs I made in the process. Hopefully anyone reading this can follow what I did to replicate, or even build upon, my results!</p> <p>My coding environment was Google Colab Pro, and its V100 GPU was sufficient to run all my code—a complete runthrough of my final Jupyter notebook would take a bit over an hour. The watermarking scheme I sought to replicate can be applied to any LLM where the watermark has access to the last layer of logits, so I looked into a variety of open-source LLMs. Ultimately, I decided on OPT (1.3 billion parameters) <d-cite key="opt"></d-cite>, because its small model size allowed me to experiment with different parameters more efficiently, with faster inference times. Other open-source LLMs I considered were Mistral, Llama, and Roberta.</p> <p>For my experiment, I needed a dataset of biographies of famous people. Unfortunately, I couldn’t find one publicly available after a few hours of searching, so I did the next best thing: I made my own. Using a list of 100 famous peoples’ biographies I found on a website <d-cite key="famous-people"></d-cite>, I copy-pasted each of their Wikipedia biographies into a CSV.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/bios-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/bios-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/bios-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/bios.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Lastly, I needed an evaluation oracle to count up the number of factual mistakes in each generated biography. I decided to make a tradeoff between accuracy and efficiency by letting ChatGPT do the work for me instead of manually cross-checking sample biographies with their Wikipedia biographies. After a bit of research into OpenAI’s APIs and pricing plans, I settled on the GPT 3.5-turbo API, since I expected to generate 600k tokens for my experiment, which would be a bit less than $1 in costs. With more funding, I would have probably used GPT 4, but I checked and was satisfied with the outputs of GPT 3.5-turbo.</p> <h3 id="watermarking-scheme-implementation">Watermarking scheme implementation</h3> <p>With the experimental variables of open-source model, dataset, and evaluation oracle decided upon, I began to implement the watermarking scheme detailed in <em>A Watermark for Large Language Models</em>. The watermarking scheme is made up entirely of two components: a watermarking logits processor that influences how tokens are sampled at generation time, and a watermark detector that detects if a given piece of text contains a watermark. There were also several tunable parameters detailed in the watermarking paper, but the two of interest are gamma and delta.</p> <p>Gamma represents the breadth of the watermark in terms of vocabulary: a higher gamma includes more words in the “green” pool, making responses sound more natural but may dilute the watermark’s detectability, while a lower gamma focuses on fewer words, increasing its detectability but potentially negatively affecting the output. The authors of the watermarking paper suggested a value for gamma between 0.25 and 0.75.</p> <p>Delta represents the intensity of the watermark, or how strongly the watermark prefers “green” tokens to “red” tokens at each step of the generation process. The higher the delta, the more evident the resulting watermark. The watermarking paper suggested a value for delta between 0.5 and 2.0.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/watermark-scheme-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/watermark-scheme-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/watermark-scheme-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/watermark-scheme.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption">A visualization of the watermarking scheme (figure from original paper).</div> <p>Tuning these parameters, I created three different experimental groups, each corresponding to a different level of watermarking strength: strong, medium, and weak. The exact values of gamma and delta I used can be tinkered with; my choices were based on what empirically had the best effects.</p> <p>I left the detector algorithm provided by the watermarking paper mostly untouched, except for the Z-threshold. I tuned it down to z=2.3 so the detector would be more likely to say a piece of text was watermarked for comparisons between different watermarking strengths, but this threshold still required 99% confidence. Additionally, the detector algorithm takes as input gamma, which is the same gamma used to generate the watermarked text I am attempting to detect. This is a key parameter that differentiates a watermark detector from a general post-hoc AI detector. The gamma seeds the watermarking scheme, so that during the detection process, we can work backwards to determine if the token sampling adheres to the given gamma value. This ensures that human-written text that sounds bland or like a non-native English speaker won’t be misclassified as AI-generated, resulting in a low false-positive rate.</p> <h3 id="prompting-my-models">Prompting my models</h3> <p>First, I needed to prompt my open-source model to generate biographies of famous people. Since the version of OPT I used is a Causal LM, not an Instruct LM, I needed to prompt it with a sentence that would make it most likely to continue where I left off and generate a biography of the specified person. After some testing, I settled on the following prompt:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/generation-prompt-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/generation-prompt-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/generation-prompt-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/generation-prompt.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>However, I noticed that the watermarked LLMs were initially outputting repeated phrases, e.g. “Barack Obama was the first African-American president of the United States. Barack Obama was the first African-American president of the United States.” Although this wasn’t technically hallucination, I wanted the output to look like a real biography, so I tuned two hyperparameters used during text generation: no_repeat_ngram_size=3 and repetition_penalty=1.1 to discourage repetitive phrases.</p> <p>Next, I needed to prompt my evaluation oracle, GPT 3.5, to evaluate sample biographies. Since GPT 3.5 is an Instruct model, I can directly ask it to evaluate a given biography. I decided on the following prompt:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/evaluation-prompt-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/evaluation-prompt.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>I ask GPT 3.5 to clearly list out each detected mistake and their corresponding correction in order to reduce the likelihood of it hallucinating, as well as allowing me to manually verify its evaluations.</p> <h2 id="results">Results</h2> <h3 id="quantitative-results">Quantitative results</h3> <p>After generating four biographies for each person—one unwatermarked control sample and three watermarked samples with different watermarking parameters—I evaluate them against our GPT 3.5 evaluation oracle.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/stats-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/stats-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/stats-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/stats.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>On average, the unwatermarked control LLM generates biographies that contain 8.14 mistakes each. The strongest watermarking setting has a mean of 8.64 mistakes, the medium watermark has 8.56 mistakes on average, and the weakest watermark has 8.00 mistakes on average. Clearly the weakest watermarked LLM doesn’t hallucinate any more than the control group LLM—but it does have a 13% detection rate, which is pretty substandard for a watermarking scheme. The medium and strongest watermarks perform slightly worse than the control group LLM, and by performing Z-tests on the measure statistics (never thought AP Stats would come in handy), I conclude that the probabilities of observing the results we got for the strong and medium watermarked LLMs are 26% and 34% respectively. So, although these probabilities aren’t statistically significant, they do slightly imply that watermarked LLMs hallucinate more often, and the effect is especially visible with stronger watermark settings.</p> <p>We also see that our unwatermarked biographies had a false positive rate of approximately 1%. This can be attributed to the tuning I made to the Z-threshold, from 4.0 to 2.3. Indeed, I made the change knowing that a Z-threshold of 2.3 reflects 99% confidence, so our FPR of 1% is in line with this change. If I had left the Z-threshold to 4.0, we would have a FPR of approximately 0.003%. However, with a higher Z-threshold, the weakest watermarked LLM would consequently have an even lower successful detection rate, so I made this tradeoff of having one or two false positives in order to catch more watermarks. This also lets us see more clearly how even though weaker watermarks are less detectable, some trace of the watermarking signal still remains.</p> <h3 id="qualitative-results">Qualitative results</h3> <p>In addition to quantitative results, I perform a deeper, qualitative analysis on a biography generated for a specific person. I chose the strong watermarked biography for Nelson Mandela because of the interesting and interpretable trends we can see:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/nelson-mandela-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/nelson-mandela-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/nelson-mandela-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/nelson-mandela.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>I highlighted the errors pointed out by our evaluation oracle in red text for ease of comparison. Note that there may be additional errors not caught by GPT 3.5. Generally, we see that the errors occur from mixing up dates, names, ages, locations, etc., and are not completely made up facts. In fact, the biography does capture a relatively sound summary of Mandela’s life. I posture that the hallucinations we see are mostly simple fact mismatches because the watermarking schemes we impose on OPT still give it the flexibility to tell a good story of Mandela’s life, but when it comes down to token-by-token sampling, our LLM may be forced to generate the wrong date or name in order to adhere to the “green” token preference scheme.</p> <p>I also wanted to highlight the blue text. The sentence “The Nobel Prize is one of only three Nobel Prizes awarded to individuals in history” not only is incorrect but also doesn’t add much substance to the biography. Here are three other hand-picked sections of generated biographies that aren’t informative to the person’s biography:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/repetitions-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/repetitions-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-watermarked-llms/repetitions-1400.webp"/> <img src="/staging/assets/img/2023-12-12-watermarked-llms/repetitions.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>In each piece of text, the reported facts may not be incorrect, but they take up valuable space that we would otherwise like to be used to introduce new facts about the person. So even if these facts aren’t flagged as factual inaccuracies by our evaluation oracle, they do demonstrate a degradation in model output, raising the issue of response quality degradations in addition to hallucinations.</p> <h3 id="theoretical-underpinnings-of-results">Theoretical underpinnings of results</h3> <p>Taking a theoretical perspective, what exactly causes LLMs to hallucinate? To answer this question, we cite one of the important topics covered in class: reward misspecification. If, during training time, we give low error loss to outputs that sound similar to our training data, we’re not necessarily training the LLM to be more <em>accurate</em>. Instead, we’re training the LLM to generate output that is more likely to be accepted as “close-enough” to the training data. When we ask ChatGPT to write a poem or reply to an email, being “close-enough” is usually fine. But when we need it to be 100% accurate, such as solving a math problem or generating a biography for a real person, being “close-enough” doesn’t quite make the cut.</p> <p>Furthermore, the auto-regressive manner in which LLMs generate text means it samples the “most-likely” token, based on previously seen tokens. If our LLM starts to generate FDR’s most important New Deal measures, the “most-likely” tokens to follow might be explaining each of the New Deal measures in detail. But this isn’t what we want out of a biography of FDR!</p> <p>Both of these problems—hallucinating false information and generating uninformative facts—are observed in our experiments. But unfortunately, it’s hard to reduce one issue without exacerbating the other. I attempted to decrease the temperature parameter in OPT’s text generation, but this resulted in OPT generating strings of run-on, non-informative sentences, such as <em>“Marilyn Monroe starred in several films, including Dangerous Years, Scudda Hoo! Scudda Hay!, Ladies of the Chorus, Love Happy…”</em> because each additional film was the most likely follow-up to the previously generated tokens. Similarly, increasing the temperature might generate text that sounds more “human-like,” but upon closer inspection, would be riddled with factual inaccuracies.</p> <h2 id="conclusion">Conclusion</h2> <h3 id="further-work">Further Work</h3> <p>There’s a couple of improvements I would have made if I had more time or more compute to work with. With more time, I would have liked to learn how to scrape data from webpages, so I could create a dataset of a thousand famous peoples’ biographies. Then, I could run my experiments with a larger sample size, allowing me to produce more statistically significant results. However, even with access to a larger dataset, I would have been bottlenecked by compute. Using Colab Pro’s V100 GPU, I estimate that generating biographies and evaluating them for 1000 people would take at least 10 hours of runtime.</p> <p>If I had access to more powerful GPUs, I also would have investigated a more nuanced research question: are bigger, more capable LLMs that have watermarking schemes applied to them less prone to hallucinating? I would have liked to run my experiments using a larger open-source LLM, such as LLaMa 65B, instead of OPT 1.3B, to see if watermarking schemes still negatively affect an LLM’s ability to perform tasks, when the base LLM is much more capable.</p> <h3 id="what-i-learned">What I learned</h3> <p>As this project was my first self-driven research experiment, I faced many challenges, but also learned so much. Probably the most important thing I learned is that compute is important, but it’s not an end-all-be-all. There’s tons of open-source models out there that can be run on a V100, and Google Colab Pro offers it at an affordable price. I also learned how important it is to define a well-scoped research problem, and how chatting with others can help you gain fresh insights on roadblocks.</p> <p>I found that my work towards this project was structured much differently than how I would approach a problem set. With a pset, much of the starter code is provided, and in particular, the code to import datasets, process them, and visualize results are all provided. In this project, most of my time was spent making design decisions: which dataset should I use, how should I format my results, what hyperparameters should I use. Although the raw number of lines coded in my final notebook might not be the most, I can explain my reasoning behind each line of code clearly, and I think this is a result of the thorough research I performed.</p> <p>Lastly, I learned that tackling an unanswered question in research is tractable for most students with some programming experience and interest in a scientific field. I didn’t have the most extensive ML background, nor any prior undergraduate research experience, but just by reading some papers on watermarking and writing down the questions that popped into my head, I came up with some viable research questions that could be tackled by an independent research project.</p> <p>I’m very thankful to my friends Franklin Wang and Miles Wang for helping me configure my coding environment and keeping me motivated throughout the project, and also to the TAs I spoke with during the ideation and scoping stage. To other students reading this blog post who may want to get started doing ML research but aren’t sure how to get started, I encourage you to try replicating some papers with code! Papers With Code <d-cite key="papers-with-code"></d-cite> has several papers accompanied by their codebases, and just trying to obtain the same results as them is an incredible learning experience. If anyone wants to replicate, or even build off of my work, please reach out to me if you have any questions or ideas you’d like to discuss. You can reach me at justin ji [at] college.harvard.edu.</p> <h3 id="supplemental-material">Supplemental Material</h3> <p>In this Github repository, you can access the dataset I made of famous people’s biographies, the code I used to generate my results, and the CSV files of results.</p> <p><a href="https://github.com/jujipotle/deep-learning-project">Github Repo</a></p> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-12-12-watermarked-llms.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/blog/index.html b/blog/index.html
index 91f8d5ed..ba13cf1e 100644
--- a/blog/index.html
+++ b/blog/index.html
@@ -1,103 +1 @@
----
-layout: default
-title: blog
-nav: true
-nav_order: 9
-permalink: /blog
-pagination:
-  enabled: true
-  collection: posts
-  permalink: /page/:num/
-  per_page: 200
-  sort_field: date
-  sort_reverse: true
-  trail:
-    before: 1 # The number of links before the current page
-    after: 3  # The number of links after the current page
----
-
-<div class="post">
-
-  <div class="header-bar">
-    <h1>{{ site.blog_name }}</h1>
-    <h2>{{ site.blog_description }}</h2>
-  </div>
-
-  <!-- {% if site.display_tags %}
-  <div class="tag-list">
-    <ul class="p-0 m-0">
-      {% for tag in site.display_tags %}
-        <li>
-          <i class="fas fa-hashtag fa-sm"></i> <a href="{{ tag | slugify | prepend: '/blog/tag/' | relative_url }}">{{ tag }}</a>
-        </li>
-        {% unless forloop.last %}
-          <p>&bull;</p>
-        {% endunless %}
-      {% endfor %}
-    </ul>
-  </div>
-  {% endif %} -->
-
-  <ul class="post-list">
-    {% for post in paginator.posts %}
-
-    {% if post.external_source == blank %}
-      {% assign read_time = post.content | number_of_words | divided_by: 180 | plus: 1 %}
-    {% else %}
-      {% assign read_time = post.feed_content | strip_html | number_of_words | divided_by: 180 | plus: 1 %}
-    {% endif %}
-    {% assign year = post.date | date: "%Y" %}
-    {% assign tags = post.tags | join: "" %}
-    {% assign categories = post.categories | join: "" %}
-
-    <li>
-      <h3>
-        {% if post.redirect == blank %}
-          <a class="post-title" href="{{ post.url | prepend: site.baseurl }}">{{ post.title }}</a>
-        {% else %}
-          {% if post.redirect contains '://' %}
-            <a class="post-title" href="{{ post.redirect }}" target="_blank">{{ post.title }}</a>
-            <svg width="2rem" height="2rem" viewBox="0 0 40 40" xmlns="http://www.w3.org/2000/svg">
-              <path d="M17 13.5v6H5v-12h6m3-3h6v6m0-6-9 9" class="icon_svg-stroke" stroke="#999" stroke-width="1.5" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path>
-            </svg>
-          {% else %}
-            <a class="post-title" href="{{ post.redirect | relative_url }}">{{ post.title }}</a>
-          {% endif %}
-        {% endif %}
-      </h3>
-      <p>{{ post.description }}</p>
-      <p class="post-meta">
-        {{ read_time }} min read &nbsp; &middot; &nbsp;
-        {{ post.date | date: '%B %-d, %Y' }}
-        {%- if post.external_source %}
-        &nbsp; &middot; &nbsp; {{ post.external_source }}
-        {%- endif %}
-      </p>
-      <p class="post-tags">
-        <a href="{{ year | prepend: '/blog/' | prepend: site.baseurl}}">
-          <i class="fas fa-calendar fa-sm"></i> {{ year }} </a>
-
-          {% if tags != "" %}
-          &nbsp; &middot; &nbsp;
-            {% for tag in post.tags %}
-            <a href="{{ tag | slugify | prepend: '/blog/tag/' | prepend: site.baseurl}}">
-              <i class="fas fa-hashtag fa-sm"></i> {{ tag }}</a> &nbsp;
-              {% endfor %}
-          {% endif %}
-
-          {% if categories != "" %}
-          &nbsp; &middot; &nbsp;
-            {% for category in post.categories %}
-            <a href="{{ category | slugify | prepend: '/blog/category/' | prepend: site.baseurl}}">
-              <i class="fas fa-tag fa-sm"></i> {{ category }}</a> &nbsp;
-              {% endfor %}
-          {% endif %}
-    </p>
-    </li>
-
-    {% endfor %}
-  </ul>
-
-  {% include pagination.html %}
-
-</div>
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title> blog | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item active"> <a class="nav-link" href="/staging/blog/index.html">blog<span class="sr-only">(current)</span></a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <div class="post"> <div class="header-bar"> <h1>6.S898 Deep Learning Blogs</h1> <h2>Fall 2023</h2> </div> <ul class="post-list"> <li> <h3> <a class="post-title" href="/staging/blog/2023/watermarked-llms/">Are Watermarked Large Language Models More Prone to Hallucinations?</a> </h3> <p>In this blog post, I investigate whether watermarked LLMs are more likely to “hallucinate,” or make up facts, because of limitations imposed by the watermarking scheme.</p> <p class="post-meta"> 24 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/time-series-lstm-transformer/">Predicting the Future: LSTM vs Transformers for Time Series Modeling</a> </h3> <p>A comparison analysis between LSTM and Transformer models in the context of time-series forecasting. While LSTMs have long been a cornerstone, the advent of Transformers has sparked significant interest due to their attention mechanisms. In this study, we pinpoint which particular features of time series datasets could lead transformer-based models to outperform LSTM models.</p> <p class="post-meta"> 23 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/sparse-autoencoders-for-othello/">Studying the benefits and limitations of sparse auto-encoders for compositional reasoning tasks</a> </h3> <p></p> <p class="post-meta"> 28 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/solvent-encoding/">Solvent Encoding for solubility prediction using GNN</a> </h3> <p>Evaluation of different solvent-encoding methods on a public available solubility dataset</p> <p class="post-meta"> 13 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/scRNA-GNNs/">6.s898 Final Project- Investigating the biological underpinnings of latent embeddings for scRNA-seq</a> </h3> <p></p> <p class="post-meta"> 9 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/forbidden-facts/">Forbidden Facts</a> </h3> <p>A Mechanistic Interpretability Investigation of Llama 2</p> <p class="post-meta"> 16 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/elephantfish-model/">Modeling Elephantfish Communication through Deep RNNs</a> </h3> <p>Elephantfish represent a fascinating subject for study within the realms of bioacoustics and animal communication due to their unique use of electric fields for sensing and interaction. This project proposes the development of a deep learning framework to model the electrical communication signals of elephantfish, akin to language models used in natural language processing (NLP).</p> <p class="post-meta"> 14 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/contrastivediffusion-image2video/">Exploring Image-Supervised Contrastive Diffusion - A Comparative Analysis with Applications in Image-to-Video Generation</a> </h3> <p>Image-to-image (I2I) and image-to-video (I2V) may be the next frontier of generative deep learning capabilities, but current models struggle with robustness, largely due to the implicit, rather than explicit, representation learning objective during traditional diffusion model training. Hence, we propose a new technique where a custom contrastive loss function is used to leverage the innate latent space of the diffusion model’s variational autoencoder. This enables us to study the creation of lightweight models that lose less contextual information between input conditioning and target output, which we elucidate in this blog.</p> <p class="post-meta"> 27 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/combining-modalities-for-better-representation-learning/">Combining Modalities for Better Molecular Representation Learning</a> </h3> <p></p> <p class="post-meta"> 12 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/WeightDecaySpecNormEffects/">Exploring Frobenius and Spectral Normalization in MLPs and Residual networks</a> </h3> <p>This blog post compares the effects of a spectral view on weight normalization to a frobenius view on weight normalization normalization using a novel algorithm developed by us. We use two network types at multiple sizes to compare the effects of these two methods on the singular values of the weight matrices, the rank of the weight matrices, and the accuracy of the models.</p> <p class="post-meta"> 24 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Iterated-Representation-Learning/">Iterated Representation Learning</a> </h3> <p>Representation learning is a subfield of deep learning focused on learning meaningful lower-dimensional embeddings of input data, and rapidly emerging to popularity for its efficacy with generative models. However, most representation learning techniques, such as autoencoders and variational autoencoders, learn only one embedding from the input data, which is then used to either reconstruct the original data or generate new samples. This project seeks to study the utility of a proposed iterated representation learning framework, which repeatedly trains new latent space embeddings based on the data outputted from the last round of representation. In particular, we seek to examine whether the performance of this iterated approach on a model and input dataset are indicative of any robustness qualities of the model and latent embedding space, and potentially derive a new framework for evaluating representation stability.</p> <p class="post-meta"> 18 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/alleviating-catastrophic-forgetting-in-classification-tasks-through-strategic-filter-dropout/">A Method for Alleviating Catastrophic Forgetting With Explainability</a> </h3> <p>Using various explainability metrics to target, we freeze layers in CNNs to enable continual learning.</p> <p class="post-meta"> 21 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/graph-articulated-objects/">Graph Articulated Objects</a> </h3> <p>Pre-trained large vision-language models (VLMs), such as GPT4-Vision, uniquely encode relationships and contextual information learned about the world through copious amounts of real-world text and image information. Within the context of robotics, the recent explosion of advancements in deep learning have enabled innovation on all fronts when solving the problem of generalized embodied intelligence. Teaching a robot to perform any real-world task requires it to perceive its environment accurately, plan the steps to execute the task at hand, and accurately control the robot to perform the given task. This project explores the use of vision-language models to generate domain descriptions. These can be used for task planning, closing the gap between raw images and semantic understanding of interactions possible within an environment.</p> <p class="post-meta"> 20 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/physics-loss/">Physics Loss</a> </h3> <p>Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times.</p> <p class="post-meta"> 14 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Tracking-Multiple-Objects/">Diffusion Models on Low-Brightness Images</a> </h3> <p>Diffusion models have been used with great success for a number of use cases, but they still remain largely unused on dim images. The primary related work has been on using a diffusion model for low-light image enhancement. However, most of these works agree that attempting to generate an image from noise generated on top of an already dim image often results in rgb shift and global degradation of the image. This is because a diffusion model adds noise to the given image and then attempts to denoise the image, so given a dim and low-contrast image, the model has a difficult time denoising. This blog post focuses on methods to improve diffusion model performance in low-light images</p> <p class="post-meta"> 14 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Exploring-Task-Specific-Data-Augmentation/">Semi-Supervised Domain Adaptation using Diffusion Models</a> </h3> <p>6.S898 Project</p> <p class="post-meta"> 18 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/superposition/">The Effect of Activation Functions On Superposition in Toy Models</a> </h3> <p>An in-depth exploration of how different activation functions influence superposition in neural networks.</p> <p class="post-meta"> 31 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/distill-example/">Gradient-Boosted Neural Wavlet Interpolation for Time Series (G-BiTS)</a> </h3> <p>Your blog post's abstract. This is an example of a distill-style blog post and the main elements it supports.</p> <p class="post-meta"> 7 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Physics-Informed-Primal-Dual-Learning/">Challenges in Deep Learning Surrogates for Constrained Linear Optimization</a> </h3> <p>Learning a deep net to optimize an LP, based on predicting the optimal basis vector. Surveys existing approaches in the literature. Demonstrates high accuracy of feasibility and optimality on small problem instances, but documents issues when scaling to larger problems. Benchmarks against a modern optimization solver, with discussions on upfront training vs. variable inference computation times.</p> <p class="post-meta"> 20 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/CNN-activation-patching/">Activation Patching in Vision Transformers</a> </h3> <p></p> <p class="post-meta"> 13 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/transformers-as-gamers/">Learning Generals.io</a> </h3> <p>We explore the application of deep learning to the online game generals.io and discuss what is necessary to achieve superhuman performance in generals.io.</p> <p class="post-meta"> 16 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/transformer-time/">A Comparative Study of transformer on long sequence time series data</a> </h3> <p>This study evaluates Transformer models in traffic flow prediction. Focusing on long sequence time-series data, it evaluates the balance between computational efficiency and accuracy, suggesting potential combinations of methods for improved forecasting.</p> <p class="post-meta"> 16 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/transfer-resistant-model-training/">Transfer Resistant Model Training</a> </h3> <p>This blog post details our work on training neural networks that are resistant to transfer learning techniques.</p> <p class="post-meta"> 19 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/sparse-autoencoders-for-interpretable-rlhf/">Sparse Autoencoders for a More Interpretable RLHF</a> </h3> <p>Extending Anthropic's recent monosemanticity results toward a new, more interpretable way to fine-tune.</p> <p class="post-meta"> 23 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/proposal-2/">Using Synthetic Data to Minimize Real Data Requirements</a> </h3> <p>Data acquisition for some tasks in synthetic biology can be cripplingly difficult to perform at a scale necessary for machine learning... so what if we just made our data up?*</p> <p class="post-meta"> 17 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/proposal-1/">Applications of Deep Learning in Timbre Transfer</a> </h3> <p>Exploring musical timbre transfer by leveraging prior art in differential digital signal processing (DDSP) and modern deep learning structures.</p> <p class="post-meta"> 12 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/interpretability-of-toy-tasks/">The Effect of Activation Functions On Superposition in Toy Models</a> </h3> <p>An in-depth exploration of how different activation functions influence superposition in neural networks.</p> <p class="post-meta"> 31 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/generating-robust-networks/">Training Robust Networks</a> </h3> <p>Exploring ResNet on TinyImageNet, unveiling brittleness and discovering simple robustment enhancement strategies via hyperparameter optimization</p> <p class="post-meta"> 21 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/enforcing-uniformity/">Imposing uniformity through Poisson flow models</a> </h3> <p>Uniformity and alignment are used to explain the success of contrastive encoders. Can we use already trained, well-aligned features and impose uniformity to increase their quality and performance on downstream classification tasks?</p> <p class="post-meta"> 13 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/dof-visual-place-recognition-satellite/">6-DOF estimation through visual place recognition</a> </h3> <p>A neural pose-estimation solution is implemented, which could help an agent with a downward-facing camera (such as a drone) to geolocate based on prior satellite imagery of terrain. The neural encoder infers extrinsic camera parameters from camera images, enabling estimation of 6 degrees of freedom (6-DOF), namely 3-space position and orientation. By encoding priors about satellite imagery in a neural network, the need for the agent to carry a satellite imagery dataset onboard is avoided.</p> <p class="post-meta"> 15 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/conflict-causality/">Tracing the Seeds of Conflict: Advanced Semantic Parsing Techniques for Causality Detection in News Texts</a> </h3> <p>This blog post outlines a research project aiming to uncover cause-effect-relationships in the sphere of (political) conflicts using a frame-semantic parser.</p> <p class="post-meta"> 25 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/autodecoders/">To Encode or Not To Encode: The Case for the Encoder-free Autodecoder Architecture</a> </h3> <p>While the traditional autoencoder architecture consists of an encoder and a decoder to compress and reconstruct information with only the most prominent features, some recent work have begun to utilize an alternate framework, the autodecoder, in specific applications in the field of representation learning. Skipping the encoder network altogether and learning latent codes directly as parameters, we aim to compare the two architectures on practical reconstruction tasks as well as dive into the theory of autodecoders and why they work, along with certain novel features that they bring.</p> <p class="post-meta"> 20 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/PersonalizedGeneration_w_LLMAgents/">New Synthesis Approach for Personalized LLMS</a> </h3> <p></p> <p class="post-meta"> 21 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/vig-algorithm-flow-project-proposal/">Augmenting Expert Domain Image Inputs for Enhancing Visual Language Models Performance</a> </h3> <p>This blog post explores enhancing visual language models, particularly for expert domains like scientific literature, where standard models struggle. By integrating domain-specific knowledge and advanced image embeddings, the research aims to refine the performance of visual language models such as OpenFlamingo. Leveraging graphical structured embeddings and graph neural networks, the study tests different methods of representing images to improve the models' interpretive capabilities.</p> <p class="post-meta"> 21 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/spatiotemporal/">Embeddings for Spatio-temporal Forecasting</a> </h3> <p>An analysis of various embeddings methods for spatio-temporal forecasting.</p> <p class="post-meta"> 17 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/sentence-embeddings/">In the pursuit of cheap and robust word embeddings</a> </h3> <p>A study of how we can train a student word embedding model to mimic the teacher OpenAI word embedding model by using as small a training set as possible. We also investigate preprocessing tricks and robustness against poisoned data.</p> <p class="post-meta"> 18 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/representationengineering-incontextlearning/">Leveraging Representation Engineering For LLM’s In-Context-Learning</a> </h3> <p>We present a method to observe model internals whether LLMs are performing in-context learning and control the model outputs based on such Context Vectors.</p> <p class="post-meta"> 27 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/mapreason/">Reasoning with Maps: Assessing Spatial Comprehension on Maps in Pre-trained Models</a> </h3> <p>Map reasoning is an intuitive skill for humans and a fundamental skill with important applications in many domains. In this project, we aim to evaluate the capabilities of contemporary state-of-the-art Large Vision-Language Models (LVLMs) for reasoning on maps and comparing their capabilities with human participants on the coregistration task. We additionally propose and release a novel dataset to serve as an initial benchmark for map reasoning capabilities. We run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on our dataset. Additionally, we show that coregistration is intuitive to human participants that were able to achieve close to perfect accuracy in a time-constrained manner.</p> <p class="post-meta"> 37 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/foley-to-video/">Autoen-chorder: Predicting Musical Success With Neural Nets</a> </h3> <p>In this blog, we discuss deep learning methods and results of predicting song popularity from audio features.</p> <p class="post-meta"> 17 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/double_descent/">Ensemble Learning for Mitigating Double Descent</a> </h3> <p>Exploring when and why Double Descent occurs, and how to mitigate it through Ensemble Learning.</p> <p class="post-meta"> 43 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/SmartEmbeddingInitializations/">Injecting Node Information via Embedding Initializations</a> </h3> <p>Graph Neural Networks (GNNs) have revolutionized our approach to complex data structures, enabling a deeper understanding of relationships and patterns that traditional neural networks might miss. This project looks into the potential of embedding initializations in GNNs, particularly in the context of molecular function prediction and protein retrieval tasks. By investigating the effect of intentional, information-rich initializations versus random initializations, we aim to enhance the learning efficiency and accuracy of GNNs in these domains. Our study focuses on a precision medicine knowledge graph (PrimeKG) and employs TxGNN, a GNN model initially designed for disease-drug link prediction, repurposed for protein-molecular function link prediction. We explore the impact of using ESM embeddings for protein nodes, hypothesizing that these embeddings could provide structural information not explicitly present in the graph data. Through comparisons of the latent spaces and performances, we look to see the effectiveness of these embeddings in improving the model's predictive powe of protein function.</p> <p class="post-meta"> 18 min read &nbsp; &middot; &nbsp; December 12, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/overparameterization/">Overparameterization of Neural Networks through Kernel Regression and Gaussian Processes</a> </h3> <p>In this work, we will explore the successes of overparameterization of neural networks through evaluating the relationship between the Neural Tangent Kernel (NTK), MLPs, and Gaussian processes.</p> <p class="post-meta"> 24 min read &nbsp; &middot; &nbsp; December 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/exploring-music-generation/">Exploring Methods for Generating Music</a> </h3> <p>Explores various machine learning techniques for generating music. Compares the performance of traditional RNNs, LSTMs, and transformers on generating sample sequences of music.</p> <p class="post-meta"> 8 min read &nbsp; &middot; &nbsp; December 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/rep-learning-for-rec-systems/">Can Constrastive Learning Recommend Me a Movie?</a> </h3> <p></p> <p class="post-meta"> 14 min read &nbsp; &middot; &nbsp; December 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/spacial-CLIP/">Improving CLIP Spatial Awareness Using Hard Negative Mining</a> </h3> <p>CLIP struggles to understand and reason spatially. We attempt to solve this issue with introducing hard negative examples during training.</p> <p class="post-meta"> 12 min read &nbsp; &middot; &nbsp; December 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/">Transformer-Based Approaches for Hyperspectral Imagery in Remote Sensing</a> </h3> <p>This project employs Transformers for a comprehensive spatial-temporal analysis of post-Mountaintop Removal landscape recovery, utilizing satellite imagery and DEMs. It focuses on integrating geomorphological changes to predict ecological succession. Advanced Transformer architectures will be used to enhance the interpretability of complex spatial features over time, aiming to create an accurate 3D simulation environment for interactive exploration and effective restoration planning.</p> <p class="post-meta"> 7 min read &nbsp; &middot; &nbsp; December 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/multimodal-commonsense/">Multimodal Commonsense</a> </h3> <p>6.S898 project for analyzing and evaluating the commonsense reasoning performance of multimodal vs text-only models.</p> <p class="post-meta"> 21 min read &nbsp; &middot; &nbsp; December 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Exploring-Generative-Models-In-Time-Series/">Exploring Univariate Time Series Anomaly Detection using VAE's</a> </h3> <p>In this blog post, we will take a deep dive into DONUT, a method that applies variational autoencoders to the problem of time series anomaly detection. We will begin with a overview of the original authors main ideas. Next, we will replicate some results, and perform new experiments to gain further insights into the properties, successes, and limitations of this method. Finally, we will run additional experiments that test extensions on the original formulation, and motivate future areas of exploration.</p> <p class="post-meta"> 49 min read &nbsp; &middot; &nbsp; December 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/graphs-transformers/">Graph Transformers</a> </h3> <p>A study of Transformers' understanding of fundamental graph problems, where we propose a new, tailored architecture highlighting the model's potential in graph-related tasks.</p> <p class="post-meta"> 34 min read &nbsp; &middot; &nbsp; December 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/croneillproposal/">Learning a Lifted Linearization for Switched Dynamical Systems</a> </h3> <p>A final project proposal for 6.s898 in fall 2023</p> <p class="post-meta"> 22 min read &nbsp; &middot; &nbsp; December 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/universal-features/">Sparse Autoencoder Universality - Under What Conditions are Learned Features Consistent?</a> </h3> <p>This project aims to study the universality of features in LLMs by studying sparse autoencoders trained on similar layers of different models.</p> <p class="post-meta"> 16 min read &nbsp; &middot; &nbsp; December 10, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/diaz-proposal/">Optimizations of Transformers for Small-scale Performance</a> </h3> <p>CNNs generally outperform ViTs in scenarios with limited training data. However, the narrative switches when the available training data is extensive. To bridge this gap and improve upon existing ViT methods, we explore how we can leverage recent progress in the transformer block and exploit the known structure of pre-trained ViTs.</p> <p class="post-meta"> 24 min read &nbsp; &middot; &nbsp; December 10, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/guided-transfer-learning/">Guided Transfer Learning and Learning How to Learn: When Is It Useful?</a> </h3> <p>For downstream tasks that involve extreme few-shot learning, it's often not enough to predispose a model with only general knowledge using traditional pre-training. In this blog, we explore the nuances and potential applications of Guided Transfer Learning, a meta-learning approach that allows a model to learn inductive biases on top of general knowledge during pre-training.</p> <p class="post-meta"> 41 min read &nbsp; &middot; &nbsp; December 10, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Visualization-of-CLIP's-Learning-and-Perceiving-Dynamics/">Alive Scene</a> </h3> <p>Inspired by the captivating Enchanted Portraits of the Harry Potter universe, my project unveils an innovative AI pipeline that transcends traditional scene-capture methods. Rather than merely recording scenes as a sequence of static images, this pipeline is intricately designed to interpret and articulate the dynamic behavior of various elements within a scene by utilizing CLIP semantic embeddings. This nuanced understanding enables the scenes to evolve autonomously and organically, mirroring the fluidity and spontaneity of living entities.</p> <p class="post-meta"> 22 min read &nbsp; &middot; &nbsp; December 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/projected-fff-networks/">Projected fast feedforward networks</a> </h3> <p>Abstract</p> <p class="post-meta"> 13 min read &nbsp; &middot; &nbsp; December 5, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/LinearMode/">Understanding Linear Mode Connectivity</a> </h3> <p>We study the pruning behavior of vision transformers (ViTs), and possible relations to linear mode connectivity. Frankle et al. (2022) showed that linear mode connectivity, the tendency of a neural network to optimize to the same linearly connected minimum when trained SGD noise, is strongly tied to the existence of "lottery networks," sparse networks that can be trained to full accuracy. We found that when initialized from a pretrained network, the ViT model showed linear mode connectivity when fine tuning on CIFAR-10. Conversely, random initialization resulted in instability during training and a lack of linear mode connectivity. We also found that using the PLATON algorithm (Zhang et al.) to generate a mask was effective for pruning the network, suggesting the existence of lottery ticket networks in ViTs, but the connection between the existence of these trainable subnetworks and linear mode connectivity remains unclear.</p> <p class="post-meta"> 15 min read &nbsp; &middot; &nbsp; December 1, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/TransformersAndRNNs/">Transformers vs. RNNs: How do findings from real-world datasets relate to the theory?</a> </h3> <p>Transformers have rapidly surpassed RNNs in popularity due to their efficiency via parallel computing without sacrificing accuracy. Transformers are seemingly able to perform better than RNNs on memory based tasks without keeping track of that recurrence. This leads researchers to wonder -- why? To contriubte towards answering that question, I'll analyze the performance of transformer and RNN based models on datasets in real-world applications. Serving as a bridge between applications and theory-based work, this will hopefully enable future developers to better decide which architecture to use in practice.</p> <p class="post-meta"> 21 min read &nbsp; &middot; &nbsp; December 1, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/latent-interpolation/">Exploring the latent space of text-to-image diffusion models</a> </h3> <p>In this blog post we explore how we can navigate through the latent space of stable diffusion and using interpolation techniques.</p> <p class="post-meta"> 15 min read &nbsp; &middot; &nbsp; December 1, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/speculative-decoding/">Accelerating large model inference with speculative decoding - 6.s898</a> </h3> <p>An investigation into methods to speed up autoregressive inference through increased parallelization, specifically through speculative sampling and decoding.</p> <p class="post-meta"> 13 min read &nbsp; &middot; &nbsp; November 16, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/unraveling-social-reasoning-in-llms/">Unraveling Social Reasoning in LLMs: A Deep Dive into the Social IQA Benchmark</a> </h3> <p>In this study, we investigate the challenge of social commonsense reasoning in large language models (LLMs), aiming to understand and categorize common errors LLMs make in social commonsense reasoning tasks.</p> <p class="post-meta"> 22 min read &nbsp; &middot; &nbsp; November 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/denoisingVAE/">Comparing data augmentation using VAEs and denoising-VAEs for limited noisy datasets</a> </h3> <p></p> <p class="post-meta"> 16 min read &nbsp; &middot; &nbsp; November 11, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/transformer-elo-prediction/">Emoji3Vec</a> </h3> <p>Our project seeks to expand on the previous attempts at "emoji2vec", or generating semantically meaningful embeddings for emojis.</p> <p class="post-meta"> 21 min read &nbsp; &middot; &nbsp; November 10, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/speech-recognition-proposal/">Modeling Human Speech Recognition with Different Network Architectures</a> </h3> <p>Evaluating a neural network's ability to effectively model human speech recognition using CNNs vs. TNNs</p> <p class="post-meta"> 15 min read &nbsp; &middot; &nbsp; November 10, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/uncertainty/">Analytic, Empirical, and Monte Carlo Bayesian Methods for Uncertainty Estimation</a> </h3> <p>In the realm of machine learning, the robustness and reliability of predictive models are important, especially when confronted with Out-of-Distribution (OOD) data that deviate from the training distribution. Bayesian models stand out for their probabilistic foundations, being able to offer ways to quantify uncertainty. This project will present a survey of already-established methods of estimating uncertainty, as well as how we adapted/generalized them.</p> <p class="post-meta"> 24 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/structured-physics-loss-diffusion/">Understanding LLM Attention on Useless Numbers in Word Problems (and this Title has 8 Es)</a> </h3> <p>If Jack starts out with 4 llamas and Jill takes 2 of them, then Jack gets 5 chinchillas, how many llamas does he have?</p> <p class="post-meta"> 20 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/multilingual-representations-in-embeddings-models/">Cross-Lingual Fine-Tuning for Multilingual Text Embeddings</a> </h3> <p>Exploring contrastively training text embeddings, and presenting a scalable, cheap and data-efficient method to train multilingual embedding models</p> <p class="post-meta"> 32 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/learning-interpretable-features-with-sparse-autoencoders/">Learning Interpretable Features with Sparse Auto-Encoders</a> </h3> <p></p> <p class="post-meta"> 15 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/eunhae-project/">How does model size impact catastrophic forgetting in online continual learning?</a> </h3> <p>Yes, model size matters.</p> <p class="post-meta"> 23 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/deep-connectome-clustering/">VGAE Clustering of the Fruit Fly Connectome</a> </h3> <p>An exploration of how learned Variational Graph Auto-Encoder (VGAE) embeddings compare to Spectral Embeddings to determine the function of neurons in the fruit fly brain.</p> <p class="post-meta"> 26 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/contrastivediffusion-image2video/">Robust Image to Video Generation Using Contrastive Diffusion Over Latents</a> </h3> <p>Image-to-video (I2V) may be the next frontier of generative deep learning capabilities, but current models struggle with robustness, largely due to the implicit, rather than explicit, representation learning objective during traditional diffusion model training. Hence, we propose a new technique where a pre-trained contrastive model is used to train a diffusion model with a custom contrastive loss function to operate within a learned structured latent space for I2V problems, yielding, in theory, more structurally sound videos without loss of contextual information.</p> <p class="post-meta"> 10 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/adaptive-controller-graph-eom/">Adaptive Controller with Neural Net Equations of Motion for High-DOF Robots</a> </h3> <p>This project aims to develop an adaptive control mechanism using a graph neural network to approximate the equations of motion (EoM) for high-degree-of-freedom (DOF) robotic arms bypassing the need for symbolic EoM to build an adaptive controller.</p> <p class="post-meta"> 23 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Robustness-of-self-supervised-ViT-features-in-b-mode-images/">Robustness of self-supervised ViT features in b-mode images</a> </h3> <p>Vision Transformers (ViT) trained with self-distillation with no labels (DINO) have shown striking properties for several downstream tasks regarding segmentation, classification, and image correspondence. In this work, we assess DINO-vit-s/8 on a new dataset containing b-mode ultrasound images with the ultimate goal of segmenting bone.</p> <p class="post-meta"> 18 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Symmetry-Optimization/">Investigating the Impact of Symmetric Optimization Algorithms on Learnability</a> </h3> <p>Recent theoretical papers in machine learning have raised concerns about the impact of symmetric optimization algorithms on learnability, citing hardness results from theoretical computer science. This project aims to empirically investigate and validate these theoretical claims by designing and conducting experiments as understanding the role of optimization algorithms in the learning process is crucial for advancing the field of machine learning.</p> <p class="post-meta"> 14 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/how-cnns-learn-shapes/">Can CNN learn shapes?</a> </h3> <p>One widely accepted intuition is that Convolutional Neural Networks that are trained for object classification, combine low-level features (e.g. edges) to gradually learn more complex and abstracted patterns that are useful in differentiating images. Yet it remains poorly understood how CNNs actually make their decisions, and how their recognition strategies differ from humans. Specifically, there is a major debate about the question of whether CNNs primarily rely on surface regularities of objects, or whether they are capable of exploiting the spatial arrangement of features, similar to humans.</p> <p class="post-meta"> 15 min read &nbsp; &middot; &nbsp; November 9, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/quantum-gnn/">Quantum Circuit Optimization with Graph Neural Nets</a> </h3> <p>We perform a systematic study of architectural choices of graph neural net-based reinforcement learning agents for quantum circuit optimization.</p> <p class="post-meta"> 16 min read &nbsp; &middot; &nbsp; November 8, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Structural_vs_Data_Inductive_Bias/">Structural vs Data Inductive Bias</a> </h3> <p>Class project proposal</p> <p class="post-meta"> 23 min read &nbsp; &middot; &nbsp; November 8, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/suscep/">From Scroll to Misbelief - Modeling the Unobservable Susceptibility to Misinformation on Social Media</a> </h3> <p></p> <p class="post-meta"> 24 min read &nbsp; &middot; &nbsp; November 8, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/scRNAseq-assumptions/">Examining assumptions in scRNA-seq foundation model pre-training (6.S898 Final Project)</a> </h3> <p>Final project for MIT's Deep Learning (6.S898) class.</p> <p class="post-meta"> 27 min read &nbsp; &middot; &nbsp; November 8, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/increasing-context-length-for-transformers/">Increasing Context Length For Transformers</a> </h3> <p>How can we make attention more efficient?</p> <p class="post-meta"> 19 min read &nbsp; &middot; &nbsp; November 8, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/detect-image/">Zero-Shot Machine-Generated Image Detection using Sinks of Gradient Flows</a> </h3> <p>How can we detect fake images online? A novel approach of characterizing the behavior of a diffusion model's learned score vectors.</p> <p class="post-meta"> 18 min read &nbsp; &middot; &nbsp; November 8, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/A-deeper-look-into-equivariance-for-materials-data/">A Deeper Look into Equivariance for Materials Data</a> </h3> <p>A Comparative Analysis of an E(3) Equivariant GNN and a Non-Equivariant GNN in Materials Data Tasks with a Focus on Investigating the Interpretability of Latent Geometry within the Two GNNs.</p> <p class="post-meta"> 19 min read &nbsp; &middot; &nbsp; November 8, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/prompt-to-prompt/">Prompt to Prompt</a> </h3> <p>Text-based image editing via cross-attention mechanisms - the research of hyperparameters and novel mechanisms to enhance existing frameworks</p> <p class="post-meta"> 24 min read &nbsp; &middot; &nbsp; November 7, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/Language-Bias/">Understanding Bias in Speech to Text Language Models</a> </h3> <p>Do language models have biases that make them better for latin based languages like English? To find out, we generate a custom dataset to test how various language features, like silent letters, letter combinations, and letters out of order, affect how speech2text models learn and compare these results with models trained on real human language.</p> <p class="post-meta"> 36 min read &nbsp; &middot; &nbsp; November 7, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/attention-regularization/">Regularization Techniques for Attention Layers in Transformer Models</a> </h3> <p>Attention layers are an integral part of the success of transformer models, but can also lead to overfitting on parts of input data when there is limited training data. Therefore, researchers have proposed methods to regularize attention layers to reduce overfitting and increase generalizability. This blog will analyze popular methods and explore novel approaches to regularization in attention layers.</p> <p class="post-meta"> 16 min read &nbsp; &middot; &nbsp; November 6, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/neural-PDEs-long-time-dynamics/">Neural PDEs for learning local dynamics and longer temporal rollouts</a> </h3> <p>6.S898 deep learning project</p> <p class="post-meta"> 20 min read &nbsp; &middot; &nbsp; November 5, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2023/proposal/">Graph neural networks v.s. transformers for geometric graphs</a> </h3> <p>With the recent development of graph transformers, in this project we aim to compare their performance on a molecular task of protein-ligand binding affinity prediction against the performance of message passing graph neural networks.</p> <p class="post-meta"> 22 min read &nbsp; &middot; &nbsp; November 1, 2023 </p> <p class="post-tags"> <a href="/staging/blog/2023"> <i class="fas fa-calendar fa-sm"></i> 2023 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2022/molecule_generation/">An empirical evaluation of autoencoders and diffusion models for 2D small-molecule generation</a> </h3> <p>We examine the efficacy of autoencoders and diffusion models for generating 2D molecules with certain small-molecule properties. In particular, we evaluate the success of both models in creating new molecules, containing only CHONPS atoms, and only single, double, and aromatic bonds. Secondarily, a natural question that followed was investigating the efficacy of different manners of encoding molecular data for training models - specifically, we trained with both molecular fingerprints and adjacency matrices (derived from graph embeddings of molecules). We find that small autoencoder models are successful in generating both pseudo-fingerprints and pseudo-adjacency matrices that are similar to simple small molecules’ fingerprints and adjacency matrices, but they were not able to produce ‘convincing’ simple organic molecules from the fingerprint or adjacency matrices. We find that diffusion models were considerably faster and more lightweight than autoencoders, and were generated molecules that were quantitatively closer in structure to real chemical structures than the auto-encoders were able to produce.</p> <p class="post-meta"> 15 min read &nbsp; &middot; &nbsp; December 12, 2022 </p> <p class="post-tags"> <a href="/staging/blog/2022"> <i class="fas fa-calendar fa-sm"></i> 2022 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2022/VIVFormer/">VIVformer</a> </h3> <p>A deep transformer framework trained on real experimental and synthetic gen-AI data for forecasting non-stationary time-series. Applications and insights drawn from vortex induced vibrations data collected at the MIT Towing Tank.</p> <p class="post-meta"> 33 min read &nbsp; &middot; &nbsp; December 1, 2022 </p> <p class="post-tags"> <a href="/staging/blog/2022"> <i class="fas fa-calendar fa-sm"></i> 2022 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2022/Recovering-Latent-Variables-with-VAEs-despite-Training-Bias/">Recovering Latent Variables with VAEs despite Training Bias</a> </h3> <p>Final Project Blog</p> <p class="post-meta"> 17 min read &nbsp; &middot; &nbsp; December 1, 2022 </p> <p class="post-tags"> <a href="/staging/blog/2022"> <i class="fas fa-calendar fa-sm"></i> 2022 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2022/proposal_JingpengHong/">Recurrent Recommender System with Incentivized Search</a> </h3> <p>This project considers the use of Recurrent Neural Networks (RNNs) in session-based recommender systems. We input sequences of customers' behavior, such as browsing history, to predict which product they're most likely to buy next. Our model improves upon this by taking into account how previous recommendations influence subsequent search behavior, which then serves as our training data. Our approach introduces a multi-task RNN that not only aims to recommend products with the highest likelihood of purchase but also those that are likely to encourage further customer searches. This additional search activity can enrich our training data, ultimately boosting the model's long-term performance.</p> <p class="post-meta"> 8 min read &nbsp; &middot; &nbsp; December 1, 2022 </p> <p class="post-tags"> <a href="/staging/blog/2022"> <i class="fas fa-calendar fa-sm"></i> 2022 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2022/distill-example/">Sample Blog Post</a> </h3> <p>Your blog post's abstract. This is an example of a distill-style blog post and the main elements it supports.</p> <p class="post-meta"> 14 min read &nbsp; &middot; &nbsp; December 1, 2022 </p> <p class="post-tags"> <a href="/staging/blog/2022"> <i class="fas fa-calendar fa-sm"></i> 2022 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2022/Vision_Language_Limitations/">Understanding Limitations of Vision-Language Models</a> </h3> <p></p> <p class="post-meta"> 17 min read &nbsp; &middot; &nbsp; December 1, 2022 </p> <p class="post-tags"> <a href="/staging/blog/2022"> <i class="fas fa-calendar fa-sm"></i> 2022 </a> </p> </li> <li> <h3> <a class="post-title" href="/staging/blog/2022/contrastive-time/">Contrastive Representation Learning for Dynamical Systems</a> </h3> <p>A deep learning method of learning system underlying parameters from observed trajectories</p> <p class="post-meta"> 35 min read &nbsp; &middot; &nbsp; November 7, 2022 </p> <p class="post-tags"> <a href="/staging/blog/2022"> <i class="fas fa-calendar fa-sm"></i> 2022 </a> </p> </li> </ul> </div> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file
diff --git a/dev/data/hellaswag_images/hellaswag_test/_7XW-BFK_ZY.png b/dev/data/hellaswag_images/hellaswag_test/_7XW-BFK_ZY.png
deleted file mode 100644
index 94edf3a7..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_test/_7XW-BFK_ZY.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_test/_CMIO5R_OGA.png b/dev/data/hellaswag_images/hellaswag_test/_CMIO5R_OGA.png
deleted file mode 100644
index f21dade5..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_test/_CMIO5R_OGA.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_test/_bj64aqyIyk.png b/dev/data/hellaswag_images/hellaswag_test/_bj64aqyIyk.png
deleted file mode 100644
index 0cbe601c..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_test/_bj64aqyIyk.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_test/_dp7ZH3Upuw.png b/dev/data/hellaswag_images/hellaswag_test/_dp7ZH3Upuw.png
deleted file mode 100644
index 1aadcec2..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_test/_dp7ZH3Upuw.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_-_SwG4EnRU.png b/dev/data/hellaswag_images/hellaswag_train/_-_SwG4EnRU.png
deleted file mode 100644
index 3a43ecbb..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_-_SwG4EnRU.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_4wEUsTft44.png b/dev/data/hellaswag_images/hellaswag_train/_4wEUsTft44.png
deleted file mode 100644
index 5770c5ef..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_4wEUsTft44.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_86X1xtj67w.png b/dev/data/hellaswag_images/hellaswag_train/_86X1xtj67w.png
deleted file mode 100644
index 7d2a363a..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_86X1xtj67w.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_CYdiU1sMwc.png b/dev/data/hellaswag_images/hellaswag_train/_CYdiU1sMwc.png
deleted file mode 100644
index b5cc6adb..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_CYdiU1sMwc.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_GQaltSDMAk.png b/dev/data/hellaswag_images/hellaswag_train/_GQaltSDMAk.png
deleted file mode 100644
index c3bb51fc..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_GQaltSDMAk.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_N9LhaKnZv4.png b/dev/data/hellaswag_images/hellaswag_train/_N9LhaKnZv4.png
deleted file mode 100644
index ca4640c0..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_N9LhaKnZv4.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_QdPfYK9s6o.png b/dev/data/hellaswag_images/hellaswag_train/_QdPfYK9s6o.png
deleted file mode 100644
index d2b963c1..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_QdPfYK9s6o.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_XRJk2oFwZw.png b/dev/data/hellaswag_images/hellaswag_train/_XRJk2oFwZw.png
deleted file mode 100644
index 33087ab5..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_XRJk2oFwZw.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_ajGnJjNUX0.png b/dev/data/hellaswag_images/hellaswag_train/_ajGnJjNUX0.png
deleted file mode 100644
index 9659a935..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_ajGnJjNUX0.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_ao0UwSJHWA.png b/dev/data/hellaswag_images/hellaswag_train/_ao0UwSJHWA.png
deleted file mode 100644
index 85418d65..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_ao0UwSJHWA.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_cU5aISTDBo.png b/dev/data/hellaswag_images/hellaswag_train/_cU5aISTDBo.png
deleted file mode 100644
index a62bf5a4..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_cU5aISTDBo.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_dLbtK8_SHo.png b/dev/data/hellaswag_images/hellaswag_train/_dLbtK8_SHo.png
deleted file mode 100644
index dae948aa..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_dLbtK8_SHo.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_i6kvwg1Oyo.png b/dev/data/hellaswag_images/hellaswag_train/_i6kvwg1Oyo.png
deleted file mode 100644
index 925d7cc8..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_i6kvwg1Oyo.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_jxpaVW4_cE.png b/dev/data/hellaswag_images/hellaswag_train/_jxpaVW4_cE.png
deleted file mode 100644
index 7be712d4..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_jxpaVW4_cE.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_kj3B0T_TE0.png b/dev/data/hellaswag_images/hellaswag_train/_kj3B0T_TE0.png
deleted file mode 100644
index 0efce844..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_kj3B0T_TE0.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_uTNqQhQA9w.png b/dev/data/hellaswag_images/hellaswag_train/_uTNqQhQA9w.png
deleted file mode 100644
index c3cbe3c7..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_uTNqQhQA9w.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_train/_zAfwnDt4VE.png b/dev/data/hellaswag_images/hellaswag_train/_zAfwnDt4VE.png
deleted file mode 100644
index 50bc52ac..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_train/_zAfwnDt4VE.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_val/_4CLYKFzmoY.png b/dev/data/hellaswag_images/hellaswag_val/_4CLYKFzmoY.png
deleted file mode 100644
index 189186d8..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_val/_4CLYKFzmoY.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_val/_DlDtsPxdyY.png b/dev/data/hellaswag_images/hellaswag_val/_DlDtsPxdyY.png
deleted file mode 100644
index 944fb73f..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_val/_DlDtsPxdyY.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_val/_MR8G1jwM4o.png b/dev/data/hellaswag_images/hellaswag_val/_MR8G1jwM4o.png
deleted file mode 100644
index 7b1d12e5..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_val/_MR8G1jwM4o.png and /dev/null differ
diff --git a/dev/data/hellaswag_images/hellaswag_val/_RCe4Q0p1aA.png b/dev/data/hellaswag_images/hellaswag_val/_RCe4Q0p1aA.png
deleted file mode 100644
index 127eb836..00000000
Binary files a/dev/data/hellaswag_images/hellaswag_val/_RCe4Q0p1aA.png and /dev/null differ
diff --git a/dev/data/raw_data/activity_net.json b/dev/data/raw_data/activity_net.json
index b0a5a827..a8859553 100644
--- a/dev/data/raw_data/activity_net.json
+++ b/dev/data/raw_data/activity_net.json
@@ -1 +1 @@
-{"taxonomy": [{"parentName": "Health-related self care", "nodeName": "Applying sunscreen", "nodeId": 389, "parentId": 269}, {"parentName": "Health-related Self Care", "nodeName": "Health-related self care", "nodeId": 269, "parentId": 270}, {"parentName": "Personal Care", "nodeName": "Health-related Self Care", "nodeId": 270, "parentId": 2}, {"parentName": "Wrestling", "nodeName": "Arm wrestling", "nodeId": 271, "parentId": 272}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Wrestling", "nodeId": 272, "parentId": 17}, {"parentName": "Vehicle repair and maintenance (by self)", "nodeName": "Assembling bicycle", "nodeId": 273, "parentId": 56}, {"parentName": "Biking", "nodeName": "BMX", "nodeId": 274, "parentId": 275}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Biking", "nodeId": 275, "parentId": 17}, {"parentName": "Food and drink preparation ", "nodeName": "Baking cookies", "nodeId": 276, "parentId": 31}, {"parentName": "Playing sports", "nodeName": "Baton twirling", "nodeId": 277, "parentId": 64}, {"parentName": "Playing soccer", "nodeName": "Beach soccer", "nodeId": 278, "parentId": 279}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Playing soccer", "nodeId": 279, "parentId": 17}, {"parentName": "Playing games", "nodeName": "Beer pong", "nodeId": 280, "parentId": 48}, {"parentName": "Washing, dressing and grooming oneself", "nodeName": "Blow-drying hair", "nodeId": 281, "parentId": 62}, {"parentName": "Lawn, garden, and houseplant care", "nodeName": "Blowing leaves", "nodeId": 282, "parentId": 66}, {"parentName": "Bowling", "nodeName": "Playing ten pins", "nodeId": 283, "parentId": 284}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Bowling", "nodeId": 284, "parentId": 17}, {"parentName": "Washing, dressing and grooming oneself", "nodeName": "Braiding hair", "nodeId": 285, "parentId": 62}, {"parentName": "Beach activities", "nodeName": "Building sandcastles", "nodeId": 286, "parentId": 287}, {"parentName": "Arts and Entertainment", "nodeName": "Beach activities", "nodeId": 287, "parentId": 21}, {"parentName": "Participating in rodeo competitions", "nodeName": "Bullfighting", "nodeId": 288, "parentId": 289}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Participating in rodeo competitions", "nodeId": 289, "parentId": 17}, {"parentName": "Participating in rodeo competitions", "nodeName": "Calf roping", "nodeId": 290, "parentId": 289}, {"parentName": "Playing sports", "nodeName": "Camel ride", "nodeId": 291, "parentId": 64}, {"parentName": "Boating", "nodeName": "Canoeing", "nodeId": 292, "parentId": 293}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Boating", "nodeId": 293, "parentId": 17}, {"parentName": "Participating in martial arts", "nodeName": "Capoeira", "nodeId": 294, "parentId": 27}, {"parentName": "Interior arrangement, decoration, & repairs", "nodeName": "Carving jack-o-lanterns", "nodeId": 295, "parentId": 33}, {"parentName": "Vehicle repair and maintenance (by self)", "nodeName": "Changing car wheel", "nodeId": 296, "parentId": 56}, {"parentName": "Interior arrangement, decoration, & repairs", "nodeName": "Cleaning sink", "nodeId": 297, "parentId": 33}, {"parentName": "Care for animals and pets (not veterinary care)", "nodeName": "Clipping cat claws", "nodeId": 298, "parentId": 65}, {"parentName": "Playing sports", "nodeName": "Croquet", "nodeId": 299, "parentId": 64}, {"parentName": "Playing sports", "nodeName": "Curling", "nodeId": 300, "parentId": 64}, {"parentName": "Lawn, garden, and houseplant care", "nodeName": "Cutting the grass", "nodeId": 301, "parentId": 66}, {"parentName": "Interior arrangement, decoration, & repairs", "nodeName": "Decorating the Christmas tree", "nodeId": 302, "parentId": 33}, {"parentName": "Walking / exercising / playing with animals", "nodeName": "Disc dog", "nodeId": 303, "parentId": 26}, {"parentName": "Playing sports", "nodeName": "Doing a powerbomb", "nodeId": 304, "parentId": 64}, {"parentName": "Working out", "nodeName": "Doing crunches", "nodeId": 305, "parentId": 306}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Working out", "nodeId": 306, "parentId": 17}, {"parentName": "Playing musical instruments", "nodeName": "Drum corps", "nodeId": 307, "parentId": 61}, {"parentName": "Using cardiovascular equipment", "nodeName": "Elliptical trainer", "nodeId": 308, "parentId": 58}, {"parentName": "Fencing", "nodeName": "Doing fencing", "nodeId": 309, "parentId": 310}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Fencing", "nodeId": 310, "parentId": 17}, {"parentName": "Exterior repair, improvements, & decoration", "nodeName": "Fixing the roof", "nodeId": 311, "parentId": 312}, {"parentName": "Exterior Maintenance, Repair, & Decoration", "nodeName": "Exterior repair, improvements, & decoration", "nodeId": 312, "parentId": 12}, {"parentName": "Park activities", "nodeName": "Fun sliding down", "nodeId": 313, "parentId": 314}, {"parentName": "Arts and Entertainment", "nodeName": "Park activities", "nodeId": 314, "parentId": 21}, {"parentName": "Playing soccer", "nodeName": "Futsal", "nodeId": 315, "parentId": 279}, {"parentName": "Washing, dressing and grooming oneself", "nodeName": "Gargling mouthwash", "nodeId": 316, "parentId": 62}, {"parentName": "Care for animals and pets (not veterinary care)", "nodeName": "Grooming dog", "nodeId": 317, "parentId": 65}, {"parentName": "Vehicle repair and maintenance (by self)", "nodeName": "Hand car wash", "nodeId": 318, "parentId": 56}, {"parentName": "Interior arrangement, decoration, & repairs", "nodeName": "Hanging wallpaper", "nodeId": 319, "parentId": 33}, {"parentName": "Eating and Drinking", "nodeName": "Having an ice cream", "nodeId": 320, "parentId": 46}, {"parentName": "Playing games", "nodeName": "Hitting a pinata", "nodeId": 321, "parentId": 48}, {"parentName": "Playing games", "nodeName": "Hula hoop", "nodeId": 322, "parentId": 48}, {"parentName": "Playing sports", "nodeName": "Hurling", "nodeId": 323, "parentId": 64}, {"parentName": "Fishing", "nodeName": "Ice fishing", "nodeId": 324, "parentId": 325}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Fishing", "nodeId": 325, "parentId": 17}, {"parentName": "Interior arrangement, decoration, & repairs", "nodeName": "Installing carpet", "nodeId": 326, "parentId": 33}, {"parentName": "Playing games", "nodeName": "Kite flying", "nodeId": 327, "parentId": 48}, {"parentName": "Working out", "nodeName": "Kneeling", "nodeId": 328, "parentId": 306}, {"parentName": "Sewing, repairing, & maintaining textiles", "nodeName": "Knitting", "nodeId": 329, "parentId": 45}, {"parentName": "Interior arrangement, decoration, & repairs", "nodeName": "Laying tile", "nodeId": 330, "parentId": 33}, {"parentName": "Playing sports", "nodeName": "Longboarding", "nodeId": 331, "parentId": 64}, {"parentName": "Food and drink preparation ", "nodeName": "Making a cake", "nodeId": 332, "parentId": 31}, {"parentName": "Food and drink preparation ", "nodeName": "Making a lemonade", "nodeId": 333, "parentId": 31}, {"parentName": "Food and drink preparation ", "nodeName": "Making an omelette", "nodeId": 334, "parentId": 31}, {"parentName": "Interior cleaning", "nodeName": "Mooping floor", "nodeId": 335, "parentId": 37}, {"parentName": "Exterior Maintenance, Repair, & Decoration", "nodeName": "Painting fence", "nodeId": 336, "parentId": 12}, {"parentName": "Building and repairing furniture", "nodeName": "Painting furniture", "nodeId": 337, "parentId": 338}, {"parentName": "Interior Maintenance, Repair, & Decoration", "nodeName": "Building and repairing furniture", "nodeId": 338, "parentId": 15}, {"parentName": "Food and drink preparation ", "nodeName": "Peeling potatoes", "nodeId": 339, "parentId": 31}, {"parentName": "Interior arrangement, decoration, & repairs", "nodeName": "Plastering", "nodeId": 340, "parentId": 33}, {"parentName": "Playing volleyball", "nodeName": "Playing beach volleyball", "nodeId": 341, "parentId": 342}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Playing volleyball", "nodeId": 342, "parentId": 17}, {"parentName": "Attending gambling establishments", "nodeName": "Playing blackjack", "nodeId": 343, "parentId": 344}, {"parentName": "Arts and Entertainment", "nodeName": "Attending gambling establishments", "nodeId": 344, "parentId": 21}, {"parentName": "Playing musical instruments", "nodeName": "Playing congas", "nodeId": 345, "parentId": 61}, {"parentName": "Playing musical instruments", "nodeName": "Playing drums", "nodeId": 346, "parentId": 61}, {"parentName": "Playing hockey", "nodeName": "Playing ice hockey", "nodeId": 347, "parentId": 60}, {"parentName": "Playing games", "nodeName": "Playing pool", "nodeId": 348, "parentId": 48}, {"parentName": "Playing games", "nodeName": "Playing rubik cube", "nodeId": 349, "parentId": 48}, {"parentName": "Playing sports", "nodeName": "Powerbocking", "nodeId": 350, "parentId": 64}, {"parentName": "Washing, dressing and grooming oneself", "nodeName": "Putting in contact lenses", "nodeId": 351, "parentId": 62}, {"parentName": "Washing, dressing and grooming oneself", "nodeName": "Putting on shoes", "nodeId": 352, "parentId": 62}, {"parentName": "Boating", "nodeName": "Rafting", "nodeId": 353, "parentId": 293}, {"parentName": "Lawn, garden, and houseplant care", "nodeName": "Raking leaves", "nodeId": 354, "parentId": 66}, {"parentName": "Vehicle repair and maintenance (by self)", "nodeName": "Removing ice from car", "nodeId": 355, "parentId": 56}, {"parentName": "Playing games", "nodeName": "Riding bumper cars", "nodeId": 356, "parentId": 48}, {"parentName": "Boating", "nodeName": "River tubing", "nodeId": 357, "parentId": 293}, {"parentName": "Playing games", "nodeName": "Rock-paper-scissors", "nodeId": 358, "parentId": 48}, {"parentName": "Playing sports", "nodeName": "Rollerblading", "nodeId": 359, "parentId": 64}, {"parentName": "Exterior repair, improvements, & decoration", "nodeName": "Roof shingle removal", "nodeId": 360, "parentId": 312}, {"parentName": "Working out", "nodeName": "Rope skipping", "nodeId": 361, "parentId": 306}, {"parentName": "Running", "nodeName": "Running a marathon", "nodeId": 362, "parentId": 363}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Running", "nodeId": 363, "parentId": 17}, {"parentName": "Participating in water sports", "nodeName": "Scuba diving", "nodeId": 364, "parentId": 42}, {"parentName": "Appliance, tool, and toy set-up, repair, & maintenance (by self)", "nodeName": "Sharpening knives", "nodeId": 365, "parentId": 366}, {"parentName": "Appliances, Tools, and Toys", "nodeName": "Appliance, tool, and toy set-up, repair, & maintenance (by self)", "nodeId": 366, "parentId": 367}, {"parentName": "Household Activities", "nodeName": "Appliances, Tools, and Toys", "nodeId": 367, "parentId": 1}, {"parentName": "Playing games", "nodeName": "Shuffleboard", "nodeId": 368, "parentId": 48}, {"parentName": "Skiing, ice skating, snowboarding", "nodeName": "Skiing", "nodeId": 369, "parentId": 370}, {"parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Skiing, ice skating, snowboarding", "nodeId": 370, "parentId": 17}, {"parentName": "Playing games", "nodeName": "Slacklining", "nodeId": 371, "parentId": 48}, {"parentName": "Skiing, ice skating, snowboarding", "nodeName": "Snow tubing", "nodeId": 372, "parentId": 370}, {"parentName": "Skiing, ice skating, snowboarding", "nodeName": "Snowboarding", "nodeId": 373, "parentId": 370}, {"parentName": "Lawn, garden, and houseplant care", "nodeName": "Spread mulch", "nodeId": 374, "parentId": 66}, {"parentName": "Wrestling", "nodeName": "Sumo", "nodeId": 375, "parentId": 272}, {"parentName": "Participating in water sports", "nodeName": "Surfing", "nodeId": 376, "parentId": 42}, {"parentName": "Participating in water sports", "nodeName": "Swimming", "nodeId": 377, "parentId": 42}, {"parentName": "Park activities", "nodeName": "Swinging at the playground", "nodeId": 378, "parentId": 314}, {"parentName": "Playing games", "nodeName": "Table soccer", "nodeId": 379, "parentId": 48}, {"parentName": "Playing games", "nodeName": "Throwing darts", "nodeId": 380, "parentId": 48}, {"parentName": "Lawn, garden, and houseplant care", "nodeName": "Trimming branches or hedges", "nodeId": 381, "parentId": 66}, {"parentName": "Playing games", "nodeName": "Tug of war", "nodeId": 382, "parentId": 48}, {"parentName": "Park activities", "nodeName": "Using the monkey bar", "nodeId": 383, "parentId": 314}, {"parentName": "Using cardiovascular equipment", "nodeName": "Using the rowing machine", "nodeId": 384, "parentId": 58}, {"parentName": "Participating in water sports", "nodeName": "Wakeboarding", "nodeId": 385, "parentId": 42}, {"parentName": "Participating in water sports", "nodeName": "Waterskiing", "nodeId": 386, "parentId": 42}, {"parentName": "Appliance, tool, and toy set-up, repair, & maintenance (by self)", "nodeName": "Waxing skis", "nodeId": 387, "parentId": 366}, {"parentName": "Appliance, tool, and toy set-up, repair, & maintenance (by self)", "nodeName": "Welding", "nodeId": 388, "parentId": 366}, {"nodeId": 68, "parentName": "Eating and Drinking", "nodeName": "Drinking coffee", "parentId": 46}, {"nodeId": 46, "parentName": "Food & Drink Prep., Presentation, & Clean-up", "nodeName": "Eating and Drinking", "parentId": 18}, {"nodeId": 18, "parentName": "Eating and drinking Activities", "nodeName": "Food & Drink Prep., Presentation, & Clean-up", "parentId": 4}, {"nodeId": 4, "parentName": "Root", "nodeName": "Eating and drinking Activities", "parentId": 0}, {"nodeId": 0, "parentName": null, "nodeName": "Root", "parentId": null}, {"nodeId": 69, "parentName": "Doing aerobics", "nodeName": "Zumba", "parentId": 55}, {"nodeId": 55, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Doing aerobics", "parentId": 17}, {"nodeId": 17, "parentName": "Sports, Exercise, and Recreation", "nodeName": "Participating in Sports, Exercise, or Recreation", "parentId": 5}, {"nodeId": 5, "parentName": "Root", "nodeName": "Sports, Exercise, and Recreation", "parentId": 0}, {"nodeId": 70, "parentName": "Participating in martial arts", "nodeName": "Doing kickboxing", "parentId": 27}, {"nodeId": 27, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Participating in martial arts", "parentId": 17}, {"nodeId": 99, "parentName": "Participating in martial arts", "nodeName": "Doing karate", "parentId": 27}, {"nodeId": 73, "parentName": "Dancing", "nodeName": "Tango", "parentId": 23}, {"nodeId": 23, "parentName": "Arts and Entertainment", "nodeName": "Dancing", "parentId": 21}, {"nodeId": 21, "parentName": "Socializing, Relaxing, and Leisure", "nodeName": "Arts and Entertainment", "parentId": 7}, {"nodeId": 7, "parentName": "Root", "nodeName": "Socializing, Relaxing, and Leisure", "parentId": 0}, {"nodeId": 76, "parentName": "Washing, dressing and grooming oneself", "nodeName": "Putting on makeup", "parentId": 62}, {"nodeId": 62, "parentName": "Grooming", "nodeName": "Washing, dressing and grooming oneself", "parentId": 20}, {"nodeId": 20, "parentName": "Personal Care", "nodeName": "Grooming", "parentId": 2}, {"nodeId": 2, "parentName": "Root", "nodeName": "Personal Care", "parentId": 0}, {"nodeId": 195, "parentName": "Playing sports", "nodeName": "High jump", "parentId": 64}, {"nodeId": 64, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Playing sports", "parentId": 17}, {"nodeId": 136, "parentName": "Playing musical instruments", "nodeName": "Playing bagpipes", "parentId": 61}, {"nodeId": 61, "parentName": "Arts and Entertainment", "nodeName": "Playing musical instruments", "parentId": 21}, {"nodeId": 140, "parentName": "Dancing", "nodeName": "Cheerleading", "parentId": 23}, {"nodeId": 179, "parentName": "Household & personal organization and planning", "nodeName": "Wrapping presents", "parentId": 41}, {"nodeId": 41, "parentName": "Household Management", "nodeName": "Household & personal organization and planning", "parentId": 19}, {"nodeId": 19, "parentName": "Household Activities", "nodeName": "Household Management", "parentId": 1}, {"nodeId": 1, "parentName": "Root", "nodeName": "Household Activities", "parentId": 0}, {"nodeId": 82, "parentName": "Playing sports", "nodeName": "Cricket", "parentId": 64}, {"nodeId": 84, "parentName": "Weightlifting", "nodeName": "Clean and jerk", "parentId": 35}, {"nodeId": 35, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Weightlifting", "parentId": 17}, {"nodeId": 186, "parentName": "Food and drink preparation ", "nodeName": "Preparing pasta", "parentId": 31}, {"nodeId": 31, "parentName": "Food & Drink Prep., Presentation, & Clean-up", "nodeName": "Food and drink preparation ", "parentId": 18}, {"nodeId": 90, "parentName": "Care for animals and pets (not veterinary care)", "nodeName": "Bathing dog", "parentId": 65}, {"nodeId": 65, "parentName": "Animals and Pets", "nodeName": "Care for animals and pets (not veterinary care)", "parentId": 22}, {"nodeId": 22, "parentName": "Household Activities", "nodeName": "Animals and Pets", "parentId": 1}, {"nodeId": 92, "parentName": "Playing sports", "nodeName": "Discus throw", "parentId": 64}, {"nodeId": 94, "parentName": "Playing hockey", "nodeName": "Playing field hockey", "parentId": 60}, {"nodeId": 60, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Playing hockey", "parentId": 17}, {"nodeId": 95, "parentName": "Walking / exercising / playing with animals", "nodeName": "Grooming horse", "parentId": 26}, {"nodeId": 26, "parentName": "Animals and Pets", "nodeName": "Walking / exercising / playing with animals", "parentId": 22}, {"nodeId": 96, "parentName": "Food and drink preparation ", "nodeName": "Preparing salad", "parentId": 31}, {"nodeId": 100, "parentName": "Playing musical instruments", "nodeName": "Playing harmonica", "parentId": 61}, {"nodeId": 109, "parentName": "Playing musical instruments", "nodeName": "Playing saxophone", "parentId": 61}, {"nodeId": 110, "parentName": "Heating and cooling", "nodeName": "Chopping wood", "parentId": 34}, {"nodeId": 34, "parentName": "Interior Maintenance, Repair, & Decoration", "nodeName": "Heating and cooling", "parentId": 15}, {"nodeId": 15, "parentName": "Household Activities", "nodeName": "Interior Maintenance, Repair, & Decoration", "parentId": 1}, {"nodeId": 113, "parentName": "Washing, dressing and grooming oneself", "nodeName": "Washing face", "parentId": 62}, {"nodeId": 114, "parentName": "Doing gymnastics", "nodeName": "Using the pommel horse", "parentId": 24}, {"nodeId": 24, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Doing gymnastics", "parentId": 17}, {"nodeId": 116, "parentName": "Playing sports", "nodeName": "Javelin throw", "parentId": 64}, {"nodeId": 117, "parentName": "Using cardiovascular equipment", "nodeName": "Spinning", "parentId": 58}, {"nodeId": 58, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Using cardiovascular equipment", "parentId": 17}, {"nodeId": 119, "parentName": "Playing racquet sports ", "nodeName": "Ping-pong", "parentId": 54}, {"nodeId": 54, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Playing racquet sports ", "parentId": 17}, {"nodeId": 129, "parentName": "Food and drink preparation ", "nodeName": "Making a sandwich", "parentId": 31}, {"nodeId": 125, "parentName": "Washing, dressing and grooming oneself", "nodeName": "Brushing hair", "parentId": 62}, {"nodeId": 251, "parentName": "Playing musical instruments", "nodeName": "Playing guitarra", "parentId": 61}, {"nodeId": 230, "parentName": "Doing aerobics", "nodeName": "Doing step aerobics", "parentId": 55}, {"nodeId": 135, "parentName": "Eating and Drinking", "nodeName": "Drinking beer", "parentId": 46}, {"nodeId": 75, "parentName": "Participating in equestrian sports", "nodeName": "Playing polo", "parentId": 49}, {"nodeId": 49, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Participating in equestrian sports", "parentId": 17}, {"nodeId": 79, "parentName": "Weightlifting", "nodeName": "Snatch", "parentId": 35}, {"nodeId": 141, "parentName": "Playing sports", "nodeName": "Paintball", "parentId": 64}, {"nodeId": 80, "parentName": "Playing sports", "nodeName": "Long jump", "parentId": 64}, {"nodeId": 143, "parentName": "Interior cleaning", "nodeName": "Cleaning windows", "parentId": 37}, {"nodeId": 37, "parentName": "Housework", "nodeName": "Interior cleaning", "parentId": 16}, {"nodeId": 16, "parentName": "Household Activities", "nodeName": "Housework", "parentId": 1}, {"nodeId": 148, "parentName": "Washing, dressing and grooming oneself", "nodeName": "Brushing teeth", "parentId": 62}, {"nodeId": 149, "parentName": "Playing musical instruments", "nodeName": "Playing flauta", "parentId": 61}, {"nodeId": 152, "parentName": "Playing racquet sports ", "nodeName": "Tennis serve with ball bouncing", "parentId": 54}, {"nodeId": 154, "parentName": "Playing sports", "nodeName": "Bungee jumping", "parentId": 64}, {"nodeId": 155, "parentName": "Playing sports", "nodeName": "Triple jump", "parentId": 64}, {"nodeId": 175, "parentName": "Participating in equestrian sports", "nodeName": "Horseback riding", "parentId": 49}, {"nodeId": 159, "parentName": "Playing basketball", "nodeName": "Layup drill in basketball", "parentId": 52}, {"nodeId": 52, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Playing basketball", "parentId": 17}, {"nodeId": 160, "parentName": "Interior cleaning", "nodeName": "Vacuuming floor", "parentId": 37}, {"nodeId": 134, "parentName": "Sewing, repairing, & maintaining textiles", "nodeName": "Cleaning shoes", "parentId": 45}, {"nodeId": 45, "parentName": "Housework", "nodeName": "Sewing, repairing, & maintaining textiles", "parentId": 16}, {"nodeId": 167, "parentName": "Washing, dressing and grooming oneself", "nodeName": "Doing nails", "parentId": 62}, {"nodeId": 169, "parentName": "Playing sports", "nodeName": "Shot put", "parentId": 64}, {"nodeId": 171, "parentName": "Vehicle repair and maintenance (by self)", "nodeName": "Fixing bicycle", "parentId": 56}, {"nodeId": 56, "parentName": "Vehicles", "nodeName": "Vehicle repair and maintenance (by self)", "parentId": 13}, {"nodeId": 13, "parentName": "Household Activities", "nodeName": "Vehicles", "parentId": 1}, {"nodeId": 173, "parentName": "Washing, dressing and grooming oneself", "nodeName": "Washing hands", "parentId": 62}, {"nodeId": 83, "parentName": "Laundry", "nodeName": "Ironing clothes", "parentId": 30}, {"nodeId": 30, "parentName": "Housework", "nodeName": "Laundry", "parentId": 16}, {"nodeId": 180, "parentName": "Doing gymnastics", "nodeName": "Using the balance beam", "parentId": 24}, {"nodeId": 183, "parentName": "Exterior cleaning", "nodeName": "Shoveling snow", "parentId": 51}, {"nodeId": 51, "parentName": "Exterior Maintenance, Repair, & Decoration", "nodeName": "Exterior cleaning", "parentId": 12}, {"nodeId": 12, "parentName": "Household Activities", "nodeName": "Exterior Maintenance, Repair, & Decoration", "parentId": 1}, {"nodeId": 258, "parentName": "Doing gymnastics", "nodeName": "Tumbling", "parentId": 24}, {"nodeId": 88, "parentName": "Doing gymnastics", "nodeName": "Using parallel bars", "parentId": 24}, {"nodeId": 187, "parentName": "Washing, dressing and grooming", "nodeName": "Getting a tattoo", "parentId": 38}, {"nodeId": 38, "parentName": "Grooming", "nodeName": "Washing, dressing and grooming", "parentId": 20}, {"nodeId": 188, "parentName": "Climbing, spelunking, caving", "nodeName": "Rock climbing", "parentId": 50}, {"nodeId": 50, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Climbing, spelunking, caving", "parentId": 17}, {"nodeId": 189, "parentName": "Tobacco and drug use", "nodeName": "Smoking hookah", "parentId": 28}, {"nodeId": 28, "parentName": "Relaxing and Leisure", "nodeName": "Tobacco and drug use", "parentId": 11}, {"nodeId": 11, "parentName": "Socializing, Relaxing, and Leisure", "nodeName": "Relaxing and Leisure", "parentId": 7}, {"nodeId": 190, "parentName": "Washing, dressing and grooming oneself", "nodeName": "Shaving", "parentId": 62}, {"nodeId": 214, "parentName": "Washing, dressing and grooming", "nodeName": "Getting a piercing", "parentId": 38}, {"nodeId": 193, "parentName": "Participating in water sports", "nodeName": "Springboard diving", "parentId": 42}, {"nodeId": 42, "parentName": "Participating in Sports, Exercise, or Recreation", "nodeName": "Participating in water sports", "parentId": 17}, {"nodeId": 194, "parentName": "Playing racquet sports ", "nodeName": "Playing squash", "parentId": 54}, {"nodeId": 197, "parentName": "Playing musical instruments", "nodeName": "Playing piano", "parentId": 61}, {"nodeId": 161, "parentName": "Playing sports", "nodeName": "Dodgeball", "parentId": 64}, {"nodeId": 201, "parentName": "Tobacco and drug use", "nodeName": "Smoking a cigarette", "parentId": 28}, {"nodeId": 240, "parentName": "Participating in water sports", "nodeName": "Sailing", "parentId": 42}, {"nodeId": 208, "parentName": "Washing, dressing and grooming", "nodeName": "Getting a haircut", "parentId": 38}, {"nodeId": 209, "parentName": "Playing racquet sports ", "nodeName": "Playing lacrosse", "parentId": 54}, {"nodeId": 211, "parentName": "Dancing", "nodeName": "Cumbia", "parentId": 23}, {"nodeId": 263, "parentName": "Participating in martial arts", "nodeName": "Tai chi", "parentId": 27}, {"nodeId": 217, "parentName": "Interior arrangement, decoration, & repairs", "nodeName": "Painting", "parentId": 33}, {"nodeId": 33, "parentName": "Interior Maintenance, Repair, & Decoration", "nodeName": "Interior arrangement, decoration, & repairs", "parentId": 15}, {"nodeId": 218, "parentName": "Lawn, garden, and houseplant care", "nodeName": "Mowing the lawn", "parentId": 66}, {"nodeId": 66, "parentName": "Lawn, Garden, and Houseplants", "nodeName": "Lawn, garden, and houseplant care", "parentId": 10}, {"nodeId": 10, "parentName": "Household Activities", "nodeName": "Lawn, Garden, and Houseplants", "parentId": 1}, {"nodeId": 200, "parentName": "Washing, dressing and grooming oneself", "nodeName": "Shaving legs", "parentId": 62}, {"nodeId": 221, "parentName": "Walking / exercising / playing with animals", "nodeName": "Walking the dog", "parentId": 26}, {"nodeId": 224, "parentName": "Playing sports", "nodeName": "Hammer throw", "parentId": 64}, {"nodeId": 176, "parentName": "Playing sports", "nodeName": "Skateboarding", "parentId": 64}, {"nodeId": 227, "parentName": "Sewing, repairing, & maintaining textiles", "nodeName": "Polishing shoes", "parentId": 45}, {"nodeId": 229, "parentName": "Attending arts and entertainment", "nodeName": "Ballet", "parentId": 39}, {"nodeId": 39, "parentName": "Arts and Entertainment", "nodeName": "Attending arts and entertainment", "parentId": 21}, {"nodeId": 232, "parentName": "Laundry", "nodeName": "Hand washing clothes", "parentId": 30}, {"nodeId": 233, "parentName": "Participating in water sports", "nodeName": "Plataform diving", "parentId": 42}, {"nodeId": 234, "parentName": "Playing musical instruments", "nodeName": "Playing violin", "parentId": 61}, {"nodeId": 236, "parentName": "Dancing", "nodeName": "Breakdancing", "parentId": 23}, {"nodeId": 237, "parentName": "Participating in water sports", "nodeName": "Windsurfing", "parentId": 42}, {"nodeId": 241, "parentName": "Playing games", "nodeName": "Hopscotch", "parentId": 48}, {"nodeId": 48, "parentName": "Relaxing and Leisure", "nodeName": "Playing games", "parentId": 11}, {"nodeId": 243, "parentName": "Playing sports", "nodeName": "Doing motocross", "parentId": 64}, {"nodeId": 244, "parentName": "Food and drink preparation ", "nodeName": "Mixing drinks", "parentId": 31}, {"nodeId": 153, "parentName": "Playing sports", "nodeName": "Starting a campfire", "parentId": 64}, {"nodeId": 247, "parentName": "Dancing", "nodeName": "Belly dance", "parentId": 23}, {"nodeId": 248, "parentName": "Washing, dressing and grooming oneself", "nodeName": "Removing curlers", "parentId": 62}, {"nodeId": 250, "parentName": "Playing sports", "nodeName": "Archery", "parentId": 64}, {"nodeId": 123, "parentName": "Playing volleyball", "nodeName": "Volleyball", "parentId": 342}, {"nodeId": 130, "parentName": "Participating in water sports", "nodeName": "Playing water polo", "parentId": 42}, {"nodeId": 253, "parentName": "Playing racquet sports ", "nodeName": "Playing racquetball", "parentId": 54}, {"nodeId": 254, "parentName": "Participating in water sports", "nodeName": "Kayaking", "parentId": 42}, {"nodeId": 156, "parentName": "Interior cleaning", "nodeName": "Polishing forniture", "parentId": 37}, {"nodeId": 257, "parentName": "Playing sports", "nodeName": "Playing kickball", "parentId": 64}, {"nodeId": 192, "parentName": "Doing gymnastics", "nodeName": "Using uneven bars", "parentId": 24}, {"nodeId": 213, "parentName": "Kitchen and food clean-up", "nodeName": "Washing dishes", "parentId": 43}, {"nodeId": 43, "parentName": "Food & Drink Prep., Presentation, & Clean-up", "nodeName": "Kitchen and food clean-up", "parentId": 18}, {"nodeId": 106, "parentName": "Playing sports", "nodeName": "Pole vault", "parentId": 64}, {"nodeId": 267, "parentName": "Playing musical instruments", "nodeName": "Playing accordion", "parentId": 61}, {"nodeId": 268, "parentName": "Playing racquet sports ", "nodeName": "Playing badminton", "parentId": 54}], "version": "VERSION 1.3", "database": {"sJFgo9H6zNo": {"duration": 139.042, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sJFgo9H6zNo", "annotations": [{"segment": [0.01, 123.42336739937599], "label": "Fun sliding down"}]}, "V1zhqaGFY2A": {"duration": 136.98, "subset": "testing", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=V1zhqaGFY2A", "annotations": []}, "JDg--pjY5gg": {"duration": 126.178, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JDg--pjY5gg", "annotations": [{"segment": [61.420442338881465, 64.56014561469672], "label": "Beer pong"}]}, "KsFid_YVsn0": {"duration": 235.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KsFid_YVsn0", "annotations": []}, "-TmWR_keSfI": {"duration": 104.955, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-TmWR_keSfI", "annotations": []}, "u2uoYvo8J5s": {"duration": 208.864, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u2uoYvo8J5s", "annotations": [{"segment": [127.33201508846798, 201.39247572684465], "label": "Swinging at the playground"}]}, "0Gr4aKQzGYk": {"duration": 67.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0Gr4aKQzGYk", "annotations": [{"segment": [30.096653368174724, 56.67415180499219], "label": "Getting a piercing"}]}, "a-6rpItrRSk": {"duration": 8.38, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=a-6rpItrRSk", "annotations": []}, "--0edUL8zmA": {"duration": 92.18, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=--0edUL8zmA", "annotations": [{"segment": [5.464839815912637, 86.71837918408735], "label": "Dodgeball"}]}, "c_NlYvL96y0": {"duration": 125.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c_NlYvL96y0", "annotations": [{"segment": [12.82082075, 108.5218213728549], "label": "Ballet"}]}, "hHiPEAiYKv0": {"duration": 83.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hHiPEAiYKv0", "annotations": [{"segment": [0.01, 83.05780045351474], "label": "Shoveling snow"}]}, "GCtrfXIBbwA": {"duration": 108.03999999999999, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=GCtrfXIBbwA", "annotations": [{"segment": [2.863770670826833, 20.046394695787832], "label": "Playing harmonica"}]}, "rs7er4e67ec": {"duration": 107.671, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rs7er4e67ec", "annotations": [{"segment": [28.704790951638067, 44.9876255850234], "label": "Hurling"}]}, "PLek2e8NlKc": {"duration": 184.599, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PLek2e8NlKc", "annotations": [{"segment": [8.205021060842434, 80.18757956318254], "label": "Kneeling"}, {"segment": [109.55610062402496, 144.39522854914196], "label": "Kneeling"}]}, "_uOfIm1tFcI": {"duration": 133.56, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=_uOfIm1tFcI", "annotations": [{"segment": [6.459276987519502, 124.18480917940718], "label": "Tumbling"}]}, "0n3VRoYYYGU": {"duration": 232.432, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0n3VRoYYYGU", "annotations": [{"segment": [95.18451452343808, 177.49645660656358], "label": "Sharpening knives"}]}, "x90zpDHTpjQ": {"duration": 91.58, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x90zpDHTpjQ", "annotations": []}, "T_q3f10pkOg": {"duration": 142.71, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=T_q3f10pkOg", "annotations": [{"segment": [2.1143354134165366, 135.4287472698908], "label": "Playing water polo"}]}, "6ZabZdqdLyA": {"duration": 71.82, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=6ZabZdqdLyA", "annotations": [{"segment": [4.366113884555382, 69.65332605304212], "label": "Swinging at the playground"}]}, "UMVYt6Bgrrs": {"duration": 136.51, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UMVYt6Bgrrs", "annotations": []}, "yUSo_xjyGhM": {"duration": 120.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yUSo_xjyGhM", "annotations": [{"segment": [1.4982488387970894, 84.65105939203553], "label": "Washing dishes"}, {"segment": [106.93750979251169, 117.79981376443057], "label": "Washing dishes"}]}, "Ou24uqaFRPg": {"duration": 50.48, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Ou24uqaFRPg", "annotations": [{"segment": [26.214528861154445, 40.54198907956318], "label": "Polishing forniture"}]}, "Uw_1iNGntZU": {"duration": 64.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Uw_1iNGntZU", "annotations": []}, "sjyZWmvTGA4": {"duration": 35.968, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sjyZWmvTGA4", "annotations": [{"segment": [5.274515916380655, 34.78936029953198], "label": "Blowing leaves"}]}, "c5dvRUBZw2Q": {"duration": 234.289, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c5dvRUBZw2Q", "annotations": [{"segment": [66.15323674076616, 152.14434841612578], "label": "Scuba diving"}]}, "BxzOUJbq28s": {"duration": 225.42, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BxzOUJbq28s", "annotations": [{"segment": [5.273658346333853, 218.7004399375975], "label": "Playing congas"}]}, "Z8yNbpjUSLc": {"duration": 166.511, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=Z8yNbpjUSLc", "annotations": [{"segment": [52.46359126365054, 143.36585335413415], "label": "Making a lemonade"}]}, "A1EflBqBv14": {"duration": 97.013, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=A1EflBqBv14", "annotations": [{"segment": [49.77651950078003, 96.981], "label": "Arm wrestling"}]}, "6uhLrPgbpUA": {"duration": 46.162, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6uhLrPgbpUA", "annotations": [{"segment": [6.337222664586584, 12.098334177847114], "label": "BMX"}, {"segment": [21.17208481123245, 24.412710037441496], "label": "BMX"}, {"segment": [29.23764092979719, 38.095349881435254], "label": "BMX"}]}, "jIQFVSymHQs": {"duration": 11.73, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jIQFVSymHQs", "annotations": [{"segment": [0.28354788135725434, 9.759535142199688], "label": "Playing kickball"}]}, "2U0vMYnC49c": {"duration": 50.62, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=2U0vMYnC49c", "annotations": [{"segment": [0.01, 49.15295385862808], "label": "Removing ice from car"}]}, "t2wawfMaTzM": {"duration": 66.2, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=t2wawfMaTzM", "annotations": [{"segment": [1.3425832542901717, 9.294807145085802], "label": "Playing racquetball"}, {"segment": [18.383063020280808, 34.49406207176287], "label": "Playing racquetball"}, {"segment": [39.554568184087366, 55.76884287051482], "label": "Playing racquetball"}]}, "gKkk82LG8H0": {"duration": 121.534, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gKkk82LG8H0", "annotations": [{"segment": [0.01, 110.91567327613106], "label": "Playing congas"}]}, "R-hsYQNtnn0": {"duration": 41.727, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=R-hsYQNtnn0", "annotations": []}, "UvuXGKesWS0": {"duration": 102.795, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=UvuXGKesWS0", "annotations": [{"segment": [9.937507020280812, 33.97986271450858], "label": "Swimming"}, {"segment": [38.94861622464899, 64.92699687987519], "label": "Swimming"}, {"segment": [70.85859906396256, 101.79911856474259], "label": "Swimming"}]}, "thpmgolDsbQ": {"duration": 123.74, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=thpmgolDsbQ", "annotations": []}, "8pR6NBu7Ay0": {"duration": 196.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8pR6NBu7Ay0", "annotations": [{"segment": [31.094469578783155, 169.25807332293292], "label": "Paintball"}]}, "mh_0QLZhrSY": {"duration": 100.1, "subset": "validation", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=mh_0QLZhrSY", "annotations": [{"segment": [0, 100.041], "label": "Playing bagpipes"}]}, "jhbqzV3tbDk": {"duration": 57.701, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jhbqzV3tbDk", "annotations": []}, "NTKUu3gpIr8": {"duration": 119.21000000000001, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=NTKUu3gpIr8", "annotations": []}, "pRwx9my8-is": {"duration": 11.332, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pRwx9my8-is", "annotations": []}, "S2qF3yvM_PA": {"duration": 89.14, "subset": "testing", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=S2qF3yvM_PA", "annotations": []}, "-YjGbsbDoxs": {"duration": 162.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-YjGbsbDoxs", "annotations": [{"segment": [30.768311232449296, 157.386875975039], "label": "Painting"}]}, "p800u2wCKbE": {"duration": 40.473, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=p800u2wCKbE", "annotations": [{"segment": [8.523823970358814, 15.532301457098283], "label": "Making a lemonade"}, {"segment": [33.337622639625586, 36.49459448049922], "label": "Making a lemonade"}]}, "BfnM0eyjB5Q": {"duration": 185.621, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=BfnM0eyjB5Q", "annotations": [{"segment": [15.492493287486601, 168.67995027964383], "label": "Assembling bicycle"}]}, "RTnNxbG2V5o": {"duration": 235.776, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=RTnNxbG2V5o", "annotations": [{"segment": [60.13928406932196, 190.71693449506688], "label": "Riding bumper cars"}]}, "-Xl95IW5H_s": {"duration": 178.21, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-Xl95IW5H_s", "annotations": [{"segment": [63.38938945312915, 148.095003], "label": "Starting a campfire"}]}, "8bSCVzFXjzk": {"duration": 175.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8bSCVzFXjzk", "annotations": []}, "maXU1lGguxs": {"duration": 85.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=maXU1lGguxs", "annotations": [{"segment": [10.39562183, 59.04174628], "label": "Playing violin"}]}, "R4ES1QLRvtg": {"duration": 187.39, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R4ES1QLRvtg", "annotations": [{"segment": [3.3613190327613105, 92.80163416536661], "label": "Shoveling snow"}, {"segment": [101.57029251170046, 105.077755850234], "label": "Shoveling snow"}, {"segment": [113.26183697347894, 119.10760920436817], "label": "Shoveling snow"}, {"segment": [129.92228783151327, 174.35015678627144], "label": "Shoveling snow"}]}, "oD8sGLt2FR0": {"duration": 28.05, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oD8sGLt2FR0", "annotations": []}, "9pk3tgrILeY": {"duration": 53.64, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9pk3tgrILeY", "annotations": []}, "boWMu522Pfc": {"duration": 12.15, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=boWMu522Pfc", "annotations": []}, "UkA6pgt29VI": {"duration": 236.38, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UkA6pgt29VI", "annotations": [{"segment": [37.239377535101404, 58.255659906396254], "label": "Surfing"}, {"segment": [67.84203432137286, 150.80104368174727], "label": "Surfing"}, {"segment": [161.12483151326052, 185.09076755070205], "label": "Surfing"}, {"segment": [191.72748829953198, 206.84446333853356], "label": "Surfing"}]}, "AwcM25G1h-g": {"duration": 127.01, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=AwcM25G1h-g", "annotations": []}, "jafEh82XETQ": {"duration": 111.6, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jafEh82XETQ", "annotations": [{"segment": [10.184159699999999, 111.5951019], "label": "Making a sandwich"}]}, "0YQPGAsZPgY": {"duration": 166.093, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=0YQPGAsZPgY", "annotations": [{"segment": [0.19999999999999998, 166.09233560090703], "label": "Playing congas"}]}, "akrKZpGXeo4": {"duration": 164.467, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=akrKZpGXeo4", "annotations": []}, "5Bo0gFXxDQk": {"duration": 16.998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5Bo0gFXxDQk", "annotations": [{"segment": [0.2, 15.382024774143304], "label": "Welding"}]}, "hmT9H0IjiGs": {"duration": 215.551, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hmT9H0IjiGs", "annotations": [{"segment": [62.882906845525525, 181.5870021528861], "label": "Making a lemonade"}]}, "97McCuWAynA": {"duration": 127.316, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=97McCuWAynA", "annotations": [{"segment": [3.423820584947818, 5.604253606754671], "label": "Surfing"}]}, "WEohpavjsIY": {"duration": 136.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WEohpavjsIY", "annotations": [{"segment": [12.764816878389421, 134.88156501498156], "label": "Cumbia"}]}, "3joaQzU05MY": {"duration": 40.26, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3joaQzU05MY", "annotations": [{"segment": [0, 29.208238689547578], "label": "Hopscotch"}]}, "kcV3HTdtgxk": {"duration": 11.123, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kcV3HTdtgxk", "annotations": []}, "3X9dtfeV1mQ": {"duration": 144.87, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3X9dtfeV1mQ", "annotations": []}, "br-L1PS2ADg": {"duration": 50.086, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=br-L1PS2ADg", "annotations": []}, "w2HnFjJei7k": {"duration": 35.202, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w2HnFjJei7k", "annotations": [{"segment": [10.983291731669267, 34.432619578783154], "label": "Gargling mouthwash"}]}, "dUbwRd6ZT-Y": {"duration": 128.639, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dUbwRd6ZT-Y", "annotations": []}, "IGcsVPa34Hc": {"duration": 214.437, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IGcsVPa34Hc", "annotations": [{"segment": [7.35785023400936, 30.769191887675508], "label": "Baking cookies"}, {"segment": [100.66876911076443, 113.71223088923557], "label": "Baking cookies"}, {"segment": [139.79915444617782, 148.4947956318253], "label": "Baking cookies"}, {"segment": [191.6385538221529, 213.71210452418097], "label": "Baking cookies"}]}, "CTyvQHkAKuY": {"duration": 217.27, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CTyvQHkAKuY", "annotations": []}, "qogdv5DWzkQ": {"duration": 221.403, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qogdv5DWzkQ", "annotations": [{"segment": [0.01, 185.65658993426882], "label": "Braiding hair"}]}, "gSeE_Ms-2TI": {"duration": 158.313, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gSeE_Ms-2TI", "annotations": [{"segment": [0.01, 152.00033074885772], "label": "Roof shingle removal"}]}, "1PTNnaEu8xo": {"duration": 163.423, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=1PTNnaEu8xo", "annotations": [{"segment": [30.84877837562482, 117.53129481903277], "label": "Capoeira"}]}, "rhOtqArO-3Y": {"duration": 231.248, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rhOtqArO-3Y", "annotations": [{"segment": [33.55073344158256, 165.94986433470945], "label": "Slacklining"}]}, "-1K773_2MIY": {"duration": 72.24, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=-1K773_2MIY", "annotations": []}, "SrSmr4qKfJA": {"duration": 74.21000000000001, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SrSmr4qKfJA", "annotations": []}, "lW4OZ8eP3ns": {"duration": 70.03, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lW4OZ8eP3ns", "annotations": [{"segment": [0.01, 17.699038820592822], "label": "Playing violin"}, {"segment": [35.50733096723868, 60.63559595943837], "label": "Playing violin"}]}, "rWQz-EwA4EA": {"duration": 9.149, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rWQz-EwA4EA", "annotations": [{"segment": [0.01, 8.413348081557242], "label": "Plastering"}]}, "IZnRIj69JKo": {"duration": 15.117, "subset": "testing", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=IZnRIj69JKo", "annotations": []}, "Yk3pQ18So90": {"duration": 203.825, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Yk3pQ18So90", "annotations": [{"segment": [0.01, 203.8247619047619], "label": "Changing car wheel"}]}, "xr64cSwpNOM": {"duration": 82.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xr64cSwpNOM", "annotations": [{"segment": [1.6752152886115446, 80.66805928237129], "label": "Chopping wood"}]}, "xi4puZ1ulP4": {"duration": 226.21, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xi4puZ1ulP4", "annotations": [{"segment": [109.72865990639626, 182.41066614664587], "label": "Removing curlers"}]}, "M1-G6KEhY-M": {"duration": 130.03, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M1-G6KEhY-M", "annotations": [{"segment": [15.51303042121685, 123.80006630265211], "label": "Horseback riding"}]}, "89f62by1ef0": {"duration": 56.193, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=89f62by1ef0", "annotations": []}, "KE9T_srKrnI": {"duration": 204.174, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KE9T_srKrnI", "annotations": []}, "l_pp-1qu9Ig": {"duration": 113.18, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l_pp-1qu9Ig", "annotations": []}, "i0AsepC37Sk": {"duration": 211.604, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i0AsepC37Sk", "annotations": [{"segment": [19.472853354134166, 189.46566926677065], "label": "Playing congas"}]}, "x99PS_O6JW8": {"duration": 117.08, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=x99PS_O6JW8", "annotations": [{"segment": [3.3768127925117004, 14.511168486739468], "label": "Smoking hookah"}, {"segment": [47.731705148205926, 88.0709282371295], "label": "Smoking hookah"}]}, "D_XMD02v690": {"duration": 194.514, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=D_XMD02v690", "annotations": [{"segment": [10.013958631814662, 192.08593267862713], "label": "Doing a powerbomb"}]}, "sl4qoHiJ8FY": {"duration": 121.975, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sl4qoHiJ8FY", "annotations": []}, "lVMMPkvnid8": {"duration": 158.175, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lVMMPkvnid8", "annotations": [{"segment": [25.40789859594384, 49.33572542901717], "label": "Playing ten pins"}, {"segment": [68.32997971918877, 79.43051794071762], "label": "Playing ten pins"}, {"segment": [83.13069734789391, 88.31094851794073], "label": "Playing ten pins"}, {"segment": [98.43722152886114, 110.77271294851793], "label": "Playing ten pins"}, {"segment": [142.3515709828393, 157.89429017160685], "label": "Playing ten pins"}]}, "whJ6ESGNoyY": {"duration": 29.56, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=whJ6ESGNoyY", "annotations": [{"segment": [0.01, 19.275604396255847], "label": "Walking the dog"}]}, "qJYp25ni6tg": {"duration": 193.19, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qJYp25ni6tg", "annotations": [{"segment": [21.39858285487882, 181.43587089547583], "label": "Preparing salad"}]}, "jzNdWjZm92s": {"duration": 194.096, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jzNdWjZm92s", "annotations": [{"segment": [16.348352574102964, 154.40110764430577], "label": "Rafting"}]}, "j4Ru2L4u0Qk": {"duration": 91.371, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=j4Ru2L4u0Qk", "annotations": [{"segment": [1.9956105291830721, 21.38154138410434], "label": "Hurling"}, {"segment": [58.01524895553645, 82.53274927769111], "label": "Hurling"}, {"segment": [84.8134470124805, 91.170453], "label": "Hurling"}]}, "obVMUmZQW_M": {"duration": 142.62, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=obVMUmZQW_M", "annotations": [{"segment": [22.694118439937597, 102.79100705148205], "label": "Removing curlers"}]}, "mQHY2uMOMP4": {"duration": 115.47, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mQHY2uMOMP4", "annotations": [{"segment": [29.183462065013213, 94.03559879251169], "label": "Getting a haircut"}]}, "kQ4rE7o6rrg": {"duration": 128.36, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kQ4rE7o6rrg", "annotations": [{"segment": [29.923556162246488, 60.14734867394696], "label": "Playing beach volleyball"}, {"segment": [69.75491185647425, 78.16152964118564], "label": "Playing beach volleyball"}, {"segment": [97.77697113884554, 112.40599297971919], "label": "Playing beach volleyball"}]}, "a2vaM_aRHv4": {"duration": 177.262, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a2vaM_aRHv4", "annotations": [{"segment": [53.911263650546026, 98.69908268330732], "label": "Making a cake"}]}, "XnctHnlJB4g": {"duration": 17.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XnctHnlJB4g", "annotations": [{"segment": [2.174854914196568, 16.311411856474262], "label": "Clean and jerk"}]}, "5DhbyYavYp4": {"duration": 234.522, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5DhbyYavYp4", "annotations": [{"segment": [27.354733064175083, 42.308656848307706], "label": "Trimming branches or hedges"}, {"segment": [84.25258941355776, 101.03016243965777], "label": "Trimming branches or hedges"}, {"segment": [145.52720394366216, 148.44504273081], "label": "Trimming branches or hedges"}]}, "YUFPWt0VQ1Y": {"duration": 88.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YUFPWt0VQ1Y", "annotations": [{"segment": [53.73328981852345, 82.67725137850468], "label": "Drum corps"}]}, "aoIGBV31OT4": {"duration": 197.79, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aoIGBV31OT4", "annotations": [{"segment": [0, 197.787074], "label": "Windsurfing"}]}, "yVsOoFr61x4": {"duration": 24.08, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yVsOoFr61x4", "annotations": [{"segment": [6.636234789391575, 20.0215015600624], "label": "Plataform diving"}]}, "WMdeqUyHWSo": {"duration": 130.172, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WMdeqUyHWSo", "annotations": [{"segment": [20.307498558445744, 26.196673140395006], "label": "Kite flying"}, {"segment": [38.17809706708268, 45.48879650546022], "label": "Kite flying"}]}, "M-n0vW3p2sE": {"duration": 40.15, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M-n0vW3p2sE", "annotations": [{"segment": [4.13373135725429, 34.00933525741029], "label": "Using parallel bars"}]}, "5iQsKA84vWo": {"duration": 155.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5iQsKA84vWo", "annotations": [{"segment": [23.24372562, 136.58451465000002], "label": "Doing kickboxing"}]}, "k89EbTl0yfY": {"duration": 25.15, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k89EbTl0yfY", "annotations": []}, "2CxYCq5kOZ4": {"duration": 201.619, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2CxYCq5kOZ4", "annotations": []}, "G9ZEi6eRhi0": {"duration": 132.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G9ZEi6eRhi0", "annotations": [{"segment": [0, 132.8878], "label": "Dodgeball"}]}, "lpeyMIH1YqA": {"duration": 24.78, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lpeyMIH1YqA", "annotations": [{"segment": [8.928524789391576, 18.398172293291733], "label": "Cleaning shoes"}]}, "btrx-2wYVYw": {"duration": 206.71, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=btrx-2wYVYw", "annotations": []}, "zp86ztwZEKk": {"duration": 88.42, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zp86ztwZEKk", "annotations": [{"segment": [14.484035361414458, 19.72587673030731], "label": "Playing field hockey"}, {"segment": [25.51949087487309, 32.27870737686651], "label": "Playing field hockey"}, {"segment": [52.41841368892852, 58.07408463957606], "label": "Playing field hockey"}]}, "5yEBtodt_9U": {"duration": 169.46, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5yEBtodt_9U", "annotations": []}, "-5xEgcRn1-g": {"duration": 111.572, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-5xEgcRn1-g", "annotations": []}, "br8Ao_UBEZA": {"duration": 82.48, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=br8Ao_UBEZA", "annotations": [{"segment": [8.84874608, 73.72775803432137], "label": "Playing squash"}]}, "KyrDumISv4A": {"duration": 72.84, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KyrDumISv4A", "annotations": [{"segment": [18.06820377285491, 66.47735350390016], "label": "Tumbling"}]}, "rAUwt3trNMM": {"duration": 160.91, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rAUwt3trNMM", "annotations": []}, "buhaBimF4M0": {"duration": 209.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=buhaBimF4M0", "annotations": [{"segment": [45.08595631825273, 200.92654446177846], "label": "Rollerblading"}]}, "LcmTBkWvV74": {"duration": 180.4, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=LcmTBkWvV74", "annotations": [{"segment": [53.32115410000001, 177.30421216848674], "label": "Getting a piercing"}]}, "Sd08rsPTroE": {"duration": 234.197, "subset": "validation", "resolution": "853x470", "url": "https://www.youtube.com/watch?v=Sd08rsPTroE", "annotations": [{"segment": [125.85323923962109, 164.88598300379343], "label": "Playing drums"}, {"segment": [184.58474963551402, 222.88790914485983], "label": "Playing drums"}]}, "KaGGtAhiH64": {"duration": 213.113, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KaGGtAhiH64", "annotations": [{"segment": [20.613089100434767, 126.33828803492277], "label": "Playing rubik cube"}, {"segment": [93.42383931003498, 128.66557228819767], "label": "Playing rubik cube"}, {"segment": [131.6577948995511, 162.24495937116396], "label": "Playing rubik cube"}, {"segment": [166.2345895196352, 212.91274376417235], "label": "Playing rubik cube"}]}, "waDC7lUystA": {"duration": 69.195, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=waDC7lUystA", "annotations": []}, "0bosp4-pyTM": {"duration": 115.636, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0bosp4-pyTM", "annotations": [{"segment": [70.17497744807751, 75.76732708268331], "label": "Sharpening knives"}, {"segment": [78.11250625429017, 82.8028645975039], "label": "Sharpening knives"}]}, "IgEc5mY9L44": {"duration": 91.673, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IgEc5mY9L44", "annotations": [{"segment": [19.306975296411856, 91.67238095238095], "label": "Welding"}]}, "amCD-2TIKw0": {"duration": 124.18, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=amCD-2TIKw0", "annotations": [{"segment": [0.01, 124.18031746031745], "label": "Zumba"}]}, "_N1MWv9bW6Q": {"duration": 86.36, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_N1MWv9bW6Q", "annotations": []}, "8L1UjGLOCeI": {"duration": 12.45, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=8L1UjGLOCeI", "annotations": []}, "R8rWDSjHG60": {"duration": 38.17, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R8rWDSjHG60", "annotations": []}, "071ZYjqfq3o": {"duration": 155.55, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=071ZYjqfq3o", "annotations": []}, "LDqNQpdbsXo": {"duration": 119.47, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LDqNQpdbsXo", "annotations": []}, "0MtST08F2Xc": {"duration": 36.851, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0MtST08F2Xc", "annotations": [{"segment": [0, 36.821], "label": "Raking leaves"}]}, "WUOiJOqrHlI": {"duration": 175.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WUOiJOqrHlI", "annotations": [{"segment": [6.571718934063485, 24.096302758232778], "label": "Ping-pong"}, {"segment": [39.15649156630265, 48.740248243369734], "label": "Ping-pong"}, {"segment": [115.82654498283931, 121.57679898907956], "label": "Ping-pong"}, {"segment": [145.67310149141966, 157.7212527425897], "label": "Ping-pong"}]}, "V90aT-d_FKo": {"duration": 77.83, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=V90aT-d_FKo", "annotations": [{"segment": [0.01, 77.83331065759637], "label": "Bathing dog"}]}, "I4wkBjpwAAU": {"duration": 93.136, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I4wkBjpwAAU", "annotations": [{"segment": [3.683435423770798, 88.08340738638762], "label": "Tug of war"}]}, "M9uQCTr2-r0": {"duration": 184.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M9uQCTr2-r0", "annotations": [{"segment": [7.196606864274571, 181.0862542901716], "label": "Ping-pong"}]}, "Jifw8dC5yTM": {"duration": 150.09, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Jifw8dC5yTM", "annotations": [{"segment": [14.283496096306438, 139.0884693978159], "label": "Breakdancing"}]}, "8LV3M7Pw6Fc": {"duration": 121.02, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8LV3M7Pw6Fc", "annotations": []}, "5o9iv0wC59g": {"duration": 45.396, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5o9iv0wC59g", "annotations": [{"segment": [6.515352641316538, 41.92487663026521], "label": "Playing congas"}]}, "4S5ethZhgqs": {"duration": 17.0, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4S5ethZhgqs", "annotations": [{"segment": [5.038114102964118, 5.780573023400936], "label": "Drinking coffee"}, {"segment": [6.6821302839313566, 7.6367203244929796], "label": "Drinking coffee"}, {"segment": [8.564793975039002, 9.38680206552262], "label": "Drinking coffee"}, {"segment": [15.35298981903276, 15.697702889235568], "label": "Drinking coffee"}]}, "Ci9WKE6wkOQ": {"duration": 155.156, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ci9WKE6wkOQ", "annotations": [{"segment": [69.92853198127925, 83.96263182527301], "label": "Kneeling"}, {"segment": [90.0118127925117, 155.121], "label": "Kneeling"}]}, "VT871SvzqTY": {"duration": 154.72, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VT871SvzqTY", "annotations": []}, "IlD4gfHmWIg": {"duration": 93.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IlD4gfHmWIg", "annotations": [{"segment": [3.800311304969205, 88.13798911140118], "label": "Using the balance beam"}]}, "Z4OyG8ZzUpg": {"duration": 176.36, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z4OyG8ZzUpg", "annotations": [{"segment": [130.558155, 147.2557275], "label": "Playing lacrosse"}, {"segment": [12.50989727223089, 114.34487750000001], "label": "Playing lacrosse"}]}, "raxeBxPQqlk": {"duration": 161.519, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=raxeBxPQqlk", "annotations": []}, "BenCx65cYvc": {"duration": 142.269, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BenCx65cYvc", "annotations": []}, "Bmb315fcUpQ": {"duration": 183.97, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Bmb315fcUpQ", "annotations": []}, "F_EHrfbYsRE": {"duration": 226.883, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F_EHrfbYsRE", "annotations": [{"segment": [0.01, 0.02], "label": "Running a marathon"}]}, "1FMjfLRpXwM": {"duration": 142.037, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1FMjfLRpXwM", "annotations": [{"segment": [54.953264775488975, 113.89507296210215], "label": "Braiding hair"}]}, "yxSBQXuOwuc": {"duration": 144.011, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yxSBQXuOwuc", "annotations": [{"segment": [13.255202259791073, 130.5300423229329], "label": "Scuba diving"}]}, "C0F-xroK_Eg": {"duration": 205.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C0F-xroK_Eg", "annotations": [{"segment": [6.91026305, 152.83489368], "label": "Preparing pasta"}]}, "adz3kDJLCF4": {"duration": 68.85, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=adz3kDJLCF4", "annotations": [{"segment": [1.7287481699999998, 66.23273498829953], "label": "Brushing teeth"}]}, "1YU4MSK80cQ": {"duration": 16.28, "subset": "testing", "resolution": "404x720", "url": "https://www.youtube.com/watch?v=1YU4MSK80cQ", "annotations": []}, "TOP1Fwili-k": {"duration": 139.09, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=TOP1Fwili-k", "annotations": [{"segment": [83.53895592043682, 130.6245492574103], "label": "Playing badminton"}]}, "ZEgmWaMa9PU": {"duration": 229.47, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZEgmWaMa9PU", "annotations": [{"segment": [21.840667607656684, 91.65919520590347], "label": "Bungee jumping"}]}, "1BfYAuxv6Wk": {"duration": 63.58, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=1BfYAuxv6Wk", "annotations": [{"segment": [18.43779407176287, 56.50291731669267], "label": "Mixing drinks"}]}, "kcB7HQPxzDM": {"duration": 77.18, "subset": "training", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=kcB7HQPxzDM", "annotations": [{"segment": [47.34713952, 54.41846734733226], "label": "Discus throw"}, {"segment": [5.20505613237898, 14.207509403407018], "label": "Discus throw"}, {"segment": [28.85150005761263, 35.93342996415468], "label": "Discus throw"}, {"segment": [58.259514076304214, 65.94160753424813], "label": "Discus throw"}]}, "Iuk7PNmG3tQ": {"duration": 115.032, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=Iuk7PNmG3tQ", "annotations": [{"segment": [5.74008112324493, 106.0305631825273], "label": "Playing ice hockey"}]}, "3C4Nry679BA": {"duration": 14.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3C4Nry679BA", "annotations": [{"segment": [1.3845655226209048, 13.991991419656786], "label": "Preparing salad"}]}, "JnYteBN2LDI": {"duration": 61.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JnYteBN2LDI", "annotations": [{"segment": [6.0965741029641185, 60.20366926677067], "label": "Mixing drinks"}]}, "8i_pspVcWPY": {"duration": 94.46000000000001, "subset": "testing", "resolution": "1920x818", "url": "https://www.youtube.com/watch?v=8i_pspVcWPY", "annotations": []}, "VCldTcZi0aE": {"duration": 92.439, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VCldTcZi0aE", "annotations": [{"segment": [0.01, 92.361], "label": "Drum corps"}]}, "uJZFC7gHZGI": {"duration": 121.23, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uJZFC7gHZGI", "annotations": [{"segment": [45.46738157020281, 94.43225403042122], "label": "Getting a piercing"}]}, "K8G5CBJtluY": {"duration": 69.684, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=K8G5CBJtluY", "annotations": [{"segment": [35.522386821246066, 39.10422748479747], "label": "Doing crunches"}, {"segment": [50.28391198012457, 59.61840583059186], "label": "Doing crunches"}]}, "SokK_O2s9tQ": {"duration": 110.574, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=SokK_O2s9tQ", "annotations": [{"segment": [0.2, 54.16893761152725], "label": "Camel ride"}]}, "srXhWAGzd-s": {"duration": 218.268, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=srXhWAGzd-s", "annotations": [{"segment": [29.275017160686424, 49.359040561622464], "label": "Hand car wash"}, {"segment": [54.46514820592824, 90.54830889235569], "label": "Hand car wash"}, {"segment": [105.52622464898595, 152.8428221528861], "label": "Hand car wash"}]}, "b5GrYqt5tRQ": {"duration": 164.096, "subset": "training", "resolution": "400x230", "url": "https://www.youtube.com/watch?v=b5GrYqt5tRQ", "annotations": [{"segment": [0.20000000000000004, 163.841], "label": "Capoeira"}]}, "jCVi9xt3GUY": {"duration": 40.59, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jCVi9xt3GUY", "annotations": [{"segment": [0.34706057956318254, 33.47557044695788], "label": "Washing dishes"}]}, "k_ZXmr8pmrs": {"duration": 182.51, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k_ZXmr8pmrs", "annotations": [{"segment": [39.007320939157566, 169.69608233385335], "label": "Sailing"}]}, "CHkT4As5jZI": {"duration": 66.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CHkT4As5jZI", "annotations": [{"segment": [7.30402992, 56.361093471138844], "label": "Mixing drinks"}]}, "jgNeMnPVfkQ": {"duration": 70.612, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jgNeMnPVfkQ", "annotations": [{"segment": [0.01, 70.29058452472604], "label": "Playing ice hockey"}]}, "SLisp6hn700": {"duration": 177.89, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=SLisp6hn700", "annotations": [{"segment": [20.36461824, 131.58197472], "label": "Wrapping presents"}]}, "V1SEaTS9hos": {"duration": 11.15, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=V1SEaTS9hos", "annotations": [{"segment": [0.01, 11.145577], "label": "Hand washing clothes"}]}, "vYHtmvftHoU": {"duration": 166.417, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vYHtmvftHoU", "annotations": [{"segment": [40.424168224299066, 146.94396728971964], "label": "Surfing"}]}, "8qAdvUvNuts": {"duration": 120.19, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8qAdvUvNuts", "annotations": [{"segment": [45.93712007800312, 113.436561825273], "label": "Rock climbing"}]}, "MbCAiWBhAjo": {"duration": 135.35, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MbCAiWBhAjo", "annotations": [{"segment": [11.402234957878315, 121.834992049922], "label": "Making a lemonade"}]}, "r2oMItb5EVM": {"duration": 109.158, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r2oMItb5EVM", "annotations": [{"segment": [0.01, 109.101], "label": "Making an omelette"}]}, "zDBpa2miW8A": {"duration": 52.385, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zDBpa2miW8A", "annotations": [{"segment": [5.311743061486869, 20.464875817318905], "label": "Cutting the grass"}]}, "TlDh_RZ3HDk": {"duration": 145.334, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TlDh_RZ3HDk", "annotations": [{"segment": [6.801889055154043, 140.57237366614663], "label": "Braiding hair"}]}, "_xszCju7ER0": {"duration": 128.36, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_xszCju7ER0", "annotations": []}, "gF14lRI0HUM": {"duration": 19.13, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=gF14lRI0HUM", "annotations": [{"segment": [1.2238110468018721, 19.103391950078006], "label": "Doing kickboxing"}]}, "T9SW-T8XE2g": {"duration": 20.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T9SW-T8XE2g", "annotations": []}, "7tdlcmbuLOA": {"duration": 204.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7tdlcmbuLOA", "annotations": [{"segment": [19.11353645982574, 23.57336163378508], "label": "Wakeboarding"}, {"segment": [157.36811594383775, 158.32379276131044], "label": "Wakeboarding"}, {"segment": [174.25173971918878, 179.6672416848674], "label": "Wakeboarding"}]}, "JG7T6qtO4jg": {"duration": 136.37, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JG7T6qtO4jg", "annotations": []}, "E7NTSqMgx44": {"duration": 18.855, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E7NTSqMgx44", "annotations": [{"segment": [0.01, 14.971438546021842], "label": "Drum corps"}]}, "0J55EGFLglQ": {"duration": 19.854, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=0J55EGFLglQ", "annotations": [{"segment": [5.20587920245259, 19.853061], "label": "Futsal"}]}, "PaoElimUG3s": {"duration": 132.052, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=PaoElimUG3s", "annotations": []}, "NiaE7amNW7s": {"duration": 189.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NiaE7amNW7s", "annotations": [{"segment": [3.258679692667707, 92.72424943681747], "label": "Using the balance beam"}, {"segment": [119.97866141185646, 156.71286885647427], "label": "Using the balance beam"}]}, "KvscV2XxyTE": {"duration": 103.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KvscV2XxyTE", "annotations": [{"segment": [7.72713712324493, 101.4186747425897], "label": "Using the balance beam"}]}, "zlXvqnBqVvI": {"duration": 22.64, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zlXvqnBqVvI", "annotations": [{"segment": [1.9602024897322425, 3.8321074375975037], "label": "Shuffleboard"}]}, "YOIvlE8q9pY": {"duration": 48.368, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YOIvlE8q9pY", "annotations": []}, "R0sADQPdso8": {"duration": 114.336, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R0sADQPdso8", "annotations": [{"segment": [19.611404056162247, 63.82620592823714], "label": "Hula hoop"}]}, "3l7quTy4c2s": {"duration": 174.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3l7quTy4c2s", "annotations": [{"segment": [21.75210077790867, 26.918224712661978], "label": "Cheerleading"}, {"segment": [35.34716376410159, 69.60672248930774], "label": "Cheerleading"}, {"segment": [87.00840237129485, 95.70924260842433], "label": "Cheerleading"}, {"segment": [99.78776146957878, 102.77867530109204], "label": "Cheerleading"}, {"segment": [104.41008284555382, 121.81176331981278], "label": "Cheerleading"}, {"segment": [122.08366457722308, 127.2497884680187], "label": "Cheerleading"}, {"segment": [129.96880104212167, 154.71181546645866], "label": "Cheerleading"}]}, "ooFuSspUuVY": {"duration": 210.84, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=ooFuSspUuVY", "annotations": []}, "hjsu3SGAdLs": {"duration": 131.38, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hjsu3SGAdLs", "annotations": [{"segment": [4.304128965158606, 51.03467201545204], "label": "Playing piano"}, {"segment": [60.46276403437089, 119.90073390795632], "label": "Playing piano"}]}, "wmfI4aiJdH4": {"duration": 67.71, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wmfI4aiJdH4", "annotations": []}, "dAa10hlgxCY": {"duration": 228.392, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dAa10hlgxCY", "annotations": [{"segment": [6.767705148205929, 220.1285148205928], "label": "Baton twirling"}]}, "P24G3V7FUfg": {"duration": 21.76, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P24G3V7FUfg", "annotations": []}, "UxlDSfC7S-0": {"duration": 156.34, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UxlDSfC7S-0", "annotations": [{"segment": [21.701698907956317, 156.281], "label": "Mooping floor"}]}, "pOyP_kDaskY": {"duration": 197.07, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=pOyP_kDaskY", "annotations": [{"segment": [23.672665577223086, 28.284223806552262], "label": "Playing harmonica"}, {"segment": [36.585028619344776, 51.95688938377534], "label": "Playing harmonica"}, {"segment": [56.875884828393126, 59.33538255070202], "label": "Playing harmonica"}, {"segment": [78.70392711388456, 86.38985749609984], "label": "Playing harmonica"}, {"segment": [150.64423549141966, 157.7152914430577], "label": "Playing harmonica"}]}, "mvQxhjjVWOk": {"duration": 176.5, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=mvQxhjjVWOk", "annotations": [{"segment": [15.143866053961885, 173.74144382839316], "label": "Triple jump"}]}, "nzjivjnk2Ac": {"duration": 233.013, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nzjivjnk2Ac", "annotations": [{"segment": [17.983326833073324, 56.493077223088925], "label": "Longboarding"}, {"segment": [76.111251950078, 203.6661131045242], "label": "Longboarding"}]}, "WBkqGBFsdgI": {"duration": 121.232, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WBkqGBFsdgI", "annotations": [{"segment": [51.22407332293292, 58.59580343213728], "label": "Shuffleboard"}, {"segment": [71.82711388455539, 78.07761778471139], "label": "Shuffleboard"}, {"segment": [82.23671606864275, 97.73880967238689], "label": "Shuffleboard"}, {"segment": [100.19645865834633, 106.62415600624024], "label": "Shuffleboard"}]}, "fN2DiOswmOA": {"duration": 185.48, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fN2DiOswmOA", "annotations": [{"segment": [98.383067800312, 109.37882243681747], "label": "Preparing salad"}, {"segment": [125.00436849921998, 151.33630723400935], "label": "Preparing salad"}]}, "rHctuDlE-Mw": {"duration": 92.21000000000001, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rHctuDlE-Mw", "annotations": []}, "oOu5vC3EB5E": {"duration": 65.13, "subset": "training", "resolution": "626x360", "url": "https://www.youtube.com/watch?v=oOu5vC3EB5E", "annotations": [{"segment": [0.01, 65.13165532879819], "label": "Playing squash"}]}, "jmmOBfS3JIg": {"duration": 105.07, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jmmOBfS3JIg", "annotations": [{"segment": [103.75867394695787, 104.86999999999999], "label": "Polishing shoes"}]}, "UA_RDwuO47s": {"duration": 72.53999999999999, "subset": "training", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=UA_RDwuO47s", "annotations": [{"segment": [0.01, 72.32798751950078], "label": "Mixing drinks"}]}, "fxlTN3mCWDA": {"duration": 169.901, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fxlTN3mCWDA", "annotations": [{"segment": [17.460289719626168, 166.13730218068537], "label": "Playing congas"}]}, "FOF8YgUhRNA": {"duration": 68.27, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=FOF8YgUhRNA", "annotations": [{"segment": [33.65408216328653, 62.196150327613104], "label": "Mixing drinks"}]}, "231pbDe3uQc": {"duration": 10.89, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=231pbDe3uQc", "annotations": [{"segment": [0.01, 10.412480499219969], "label": "Doing motocross"}]}, "TxxUmibEy9Q": {"duration": 189.243, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TxxUmibEy9Q", "annotations": [{"segment": [68.49321093387954, 123.110642066499], "label": "Arm wrestling"}]}, "0UcBldDI0RA": {"duration": 156.573, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=0UcBldDI0RA", "annotations": [{"segment": [0.01, 141.15553510140404], "label": "Welding"}]}, "IGPZFun1Ok4": {"duration": 127.315, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IGPZFun1Ok4", "annotations": []}, "LE4FHZR2avg": {"duration": 152.347, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LE4FHZR2avg", "annotations": []}, "uw9x69DT8_g": {"duration": 27.911, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uw9x69DT8_g", "annotations": []}, "SID0y9y6Uk4": {"duration": 140.179, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SID0y9y6Uk4", "annotations": [{"segment": [0.01, 6.991126365054602], "label": "Doing fencing"}]}, "J65l9OEP3pI": {"duration": 120.164, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=J65l9OEP3pI", "annotations": [{"segment": [0.2, 114.55688802247916], "label": "Removing ice from car"}]}, "h-MWdTHW_Eg": {"duration": 62.07, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h-MWdTHW_Eg", "annotations": [{"segment": [14.040103155146614, 49.091946894202295], "label": "Playing badminton"}]}, "rDT4ngAfeHs": {"duration": 11.7, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rDT4ngAfeHs", "annotations": [{"segment": [1.8987289734789392, 11.684486753619806], "label": "Vacuuming floor"}]}, "dWBnXy2nauU": {"duration": 30.3, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dWBnXy2nauU", "annotations": [{"segment": [4.129972698907956, 29.665003900156005], "label": "Hammer throw"}]}, "sFKOnFMJF2Q": {"duration": 119.281, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sFKOnFMJF2Q", "annotations": [{"segment": [16.42160838314306, 83.2969354859468], "label": "Longboarding"}, {"segment": [91.0990569812739, 110.79012551709944], "label": "Longboarding"}]}, "uPqh5uA1mhA": {"duration": 118.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uPqh5uA1mhA", "annotations": [{"segment": [31.332672418096724, 103.5821288174727], "label": "Shot put"}]}, "h5fX5z-unJc": {"duration": 21.85, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=h5fX5z-unJc", "annotations": [{"segment": [0.01, 20.94466551984489], "label": "Playing pool"}]}, "8l7SuE4_lCk": {"duration": 13.7, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=8l7SuE4_lCk", "annotations": [{"segment": [0, 13.63], "label": "Brushing teeth"}]}, "BbQ39yhaC54": {"duration": 99.87, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BbQ39yhaC54", "annotations": [{"segment": [74.78491727424199, 88.02807958190328], "label": "Playing racquetball"}]}, "q6sLCLnTuik": {"duration": 120.419, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q6sLCLnTuik", "annotations": [{"segment": [38.48019500780031, 79.78927457098284], "label": "Blow-drying hair"}]}, "zLZTqSaGxJo": {"duration": 136.6, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zLZTqSaGxJo", "annotations": [{"segment": [44.52467962119845, 93.31009413437764], "label": "Cricket"}]}, "5TV-V6Cxero": {"duration": 142.41, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=5TV-V6Cxero", "annotations": [{"segment": [32.75834633385336, 94.27741029641186], "label": "Starting a campfire"}]}, "1XNHcfxTqW0": {"duration": 46.84, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1XNHcfxTqW0", "annotations": []}, "Yojuicji624": {"duration": 146.983, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Yojuicji624", "annotations": [{"segment": [8.71345993447738, 135.74653371606865], "label": "Slacklining"}]}, "TzuHnPm0_-E": {"duration": 191.31, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TzuHnPm0_-E", "annotations": []}, "RLMvrl_vaqc": {"duration": 20.39, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=RLMvrl_vaqc", "annotations": [{"segment": [1.2085967581903276, 10.33668280031201], "label": "Smoking hookah"}]}, "hZJDP-YVPXc": {"duration": 108.856, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hZJDP-YVPXc", "annotations": []}, "lGwUq5vZGEk": {"duration": 236.449, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=lGwUq5vZGEk", "annotations": [{"segment": [63.80245397815913, 178.1456209048362], "label": "Braiding hair"}]}, "C6VTRxJn56A": {"duration": 107.14, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=C6VTRxJn56A", "annotations": []}, "xizex7lawY4": {"duration": 100.82, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=xizex7lawY4", "annotations": []}, "Yp-NJgTAOCc": {"duration": 156.67000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Yp-NJgTAOCc", "annotations": [{"segment": [71.20224748673947, 137.15287123088922], "label": "Breakdancing"}]}, "TRWDARS8lRE": {"duration": 70.055, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TRWDARS8lRE", "annotations": [{"segment": [0.01, 59.62279538893277], "label": "Doing fencing"}]}, "R4ouBBtg-B4": {"duration": 121.302, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R4ouBBtg-B4", "annotations": [{"segment": [7.758725064648844, 13.814315359008917], "label": "Slacklining"}, {"segment": [24.79007276753655, 81.18275665834634], "label": "Slacklining"}, {"segment": [89.31995604368174, 97.45715542901716], "label": "Slacklining"}, {"segment": [102.75579688923555, 113.16384261466459], "label": "Slacklining"}]}, "G16xScfD5WM": {"duration": 103.747, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G16xScfD5WM", "annotations": [{"segment": [9.549210558898546, 90.96027515132604], "label": "Capoeira"}]}, "bMCb1DT26aE": {"duration": 182.9, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bMCb1DT26aE", "annotations": []}, "yrPVjR4jbv0": {"duration": 170.83, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=yrPVjR4jbv0", "annotations": []}, "U6M5hbiHn5Q": {"duration": 70.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U6M5hbiHn5Q", "annotations": [{"segment": [5.274078479999999, 58.534315620904835], "label": "Tumbling"}]}, "FLZPaPf027E": {"duration": 102.35, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FLZPaPf027E", "annotations": [{"segment": [15.1694042199688, 102.353559], "label": "Playing harmonica"}]}, "SpwOzrXw9JU": {"duration": 169.692, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SpwOzrXw9JU", "annotations": [{"segment": [88.1182418096724, 163.27013572542904], "label": "Sharpening knives"}]}, "Oent5pguFk4": {"duration": 21.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Oent5pguFk4", "annotations": [{"segment": [0.01, 10.15878731279251], "label": "Spinning"}]}, "Vox9KOxC1ZA": {"duration": 64.528, "subset": "testing", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=Vox9KOxC1ZA", "annotations": []}, "WltsIClflnA": {"duration": 68.871, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WltsIClflnA", "annotations": [{"segment": [0.01, 68.841], "label": "Swinging at the playground"}]}, "yxZ4ouqcld4": {"duration": 137.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yxZ4ouqcld4", "annotations": [{"segment": [49.45814508580344, 81.58571138845554], "label": "Running a marathon"}, {"segment": [98.5024336973479, 119.27359906396256], "label": "Running a marathon"}]}, "jL5kxxC791g": {"duration": 141.827, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jL5kxxC791g", "annotations": []}, "VcbMGRcN9Cc": {"duration": 58.352, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VcbMGRcN9Cc", "annotations": [{"segment": [0.01, 57.84226330753417], "label": "Table soccer"}]}, "WdCoVsU8Bbs": {"duration": 23.128, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WdCoVsU8Bbs", "annotations": [{"segment": [7.757130904836194, 10.318788087363494], "label": "Playing blackjack"}]}, "C8Toxe4fE30": {"duration": 7.52, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=C8Toxe4fE30", "annotations": [{"segment": [0.01, 7.317870677067083], "label": "Discus throw"}]}, "zsw9WEsSowI": {"duration": 89.33, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zsw9WEsSowI", "annotations": [{"segment": [12.187207488299531, 84.75332293291731], "label": "Playing flauta"}]}, "hMUoeOFmquI": {"duration": 107.904, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=hMUoeOFmquI", "annotations": [{"segment": [13.963811232449299, 16.995258970358815], "label": "Running a marathon"}, {"segment": [74.03875195007801, 75.55318096723869], "label": "Running a marathon"}]}, "Wj7VGbIgjXE": {"duration": 43.074, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Wj7VGbIgjXE", "annotations": []}, "bLqd98mmu3w": {"duration": 94.645, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bLqd98mmu3w", "annotations": [{"segment": [0.2, 75.9220171495327], "label": "Swimming"}]}, "zRBspE-uJUo": {"duration": 172.989, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zRBspE-uJUo", "annotations": [{"segment": [14.258781420486807, 20.446556271225443], "label": "Swimming"}, {"segment": [24.751095297826232, 41.70021771506685], "label": "Swimming"}, {"segment": [45.735723052505094, 55.42093586235686], "label": "Swimming"}, {"segment": [65.10614867220865, 86.35981011605006], "label": "Swimming"}, {"segment": [86.89787749437514, 89.05014642519832], "label": "Swimming"}, {"segment": [116.22254885288105, 117.83675097729785], "label": "Swimming"}, {"segment": [148.23755765381418, 156.30856827589815], "label": "Swimming"}]}, "2GCNxOKMtBo": {"duration": 193.91, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2GCNxOKMtBo", "annotations": [{"segment": [22.680304212168487, 63.50485179407176], "label": "Ice fishing"}, {"segment": [73.4841856474259, 87.39477223088923], "label": "Ice fishing"}, {"segment": [113.40152106084244, 171.765503900156], "label": "Ice fishing"}]}, "uO9qoK8zGQc": {"duration": 189.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uO9qoK8zGQc", "annotations": [{"segment": [5.321300901015633, 165.8472114149872], "label": "Tai chi"}]}, "UCOn2HkJJt8": {"duration": 185.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UCOn2HkJJt8", "annotations": [{"segment": [7.4247155, 178.29951248049923], "label": "Archery"}]}, "-b2lkhdJXf4": {"duration": 107.671, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=-b2lkhdJXf4", "annotations": [{"segment": [3.862313572542902, 93.36723244929797], "label": "Futsal"}]}, "Ptw9hFDV4ow": {"duration": 103.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ptw9hFDV4ow", "annotations": [{"segment": [7.7612480499219965, 92.67728609999999], "label": "Doing kickboxing"}]}, "KwcoiO_6dGg": {"duration": 52.617, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KwcoiO_6dGg", "annotations": []}, "7XGimCWcafc": {"duration": 223.353, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7XGimCWcafc", "annotations": [{"segment": [53.29006708268331, 179.04279875195007], "label": "Changing car wheel"}]}, "xYuqZ_RrCC0": {"duration": 102.4, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xYuqZ_RrCC0", "annotations": [{"segment": [11.788288000000001, 73.06035200000001], "label": "Shaving"}]}, "EY4YIa-kNgs": {"duration": 201.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EY4YIa-kNgs", "annotations": [{"segment": [43.69971117, 77.55049173], "label": "Washing dishes"}, {"segment": [102.34130539, 178.26779842277693], "label": "Washing dishes"}]}, "C5KcdwGSvbw": {"duration": 109.158, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C5KcdwGSvbw", "annotations": [{"segment": [10.715012480499219, 68.03182527301092], "label": "Baking cookies"}, {"segment": [75.52917940717629, 82.8439422776911], "label": "Baking cookies"}]}, "CtA4VynDEkI": {"duration": 24.033, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CtA4VynDEkI", "annotations": [{"segment": [0, 24.032652], "label": "Using the monkey bar"}]}, "YcDlkZkPb6g": {"duration": 208.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YcDlkZkPb6g", "annotations": [{"segment": [55.09337067, 197.3612893354134], "label": "Layup drill in basketball"}]}, "e0Qj2F3V6fM": {"duration": 214.623, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e0Qj2F3V6fM", "annotations": [{"segment": [162.7243540624025, 171.09494840717628], "label": "Making an omelette"}]}, "s9Q8SZbixV4": {"duration": 234.85, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=s9Q8SZbixV4", "annotations": [{"segment": [92.4588702, 228.34431064820595], "label": "Cumbia"}, {"segment": [15.564768568642746, 90.64188754680187], "label": "Cumbia"}]}, "Obj__zWaZqk": {"duration": 9.149, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Obj__zWaZqk", "annotations": [{"segment": [0, 9.148661], "label": "Spread mulch"}]}, "EcQ7DcVefdw": {"duration": 151.57999999999998, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EcQ7DcVefdw", "annotations": [{"segment": [8.00033962, 151.57986394557824], "label": "Polishing shoes"}]}, "IaXie5azIcQ": {"duration": 63.53, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IaXie5azIcQ", "annotations": []}, "t13MQSYjlas": {"duration": 7.34, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=t13MQSYjlas", "annotations": []}, "zwx1Kig4nXg": {"duration": 185.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zwx1Kig4nXg", "annotations": [{"segment": [5.7770957368907006, 136.62831316848673], "label": "Cumbia"}]}, "1G3rv9ssDY4": {"duration": 105.767, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1G3rv9ssDY4", "annotations": [{"segment": [11.220200862456268, 97.02173648049921], "label": "BMX"}]}, "RztM-NVwV5o": {"duration": 235.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RztM-NVwV5o", "annotations": [{"segment": [110.18411920150275, 188.0475634372314], "label": "Paintball"}]}, "KRfdZgxLZPY": {"duration": 143.92000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KRfdZgxLZPY", "annotations": [{"segment": [4.991623633209419, 134.52934637464244], "label": "Skateboarding"}]}, "okvQJRTfGHk": {"duration": 57.888, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=okvQJRTfGHk", "annotations": [{"segment": [2.2576782605304215, 46.146943645241805], "label": "Canoeing"}]}, "hhE8W4Jo88I": {"duration": 201.155, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hhE8W4Jo88I", "annotations": []}, "AIQ_SRvssmE": {"duration": 135.4, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=AIQ_SRvssmE", "annotations": []}, "UFJ7-LQC2N4": {"duration": 188.92, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UFJ7-LQC2N4", "annotations": []}, "puK4NxGKNdQ": {"duration": 181.05, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=puK4NxGKNdQ", "annotations": [{"segment": [8.473292156176043, 16.09925509673448], "label": "Mowing the lawn"}, {"segment": [63.832134243192854, 147.43528264586584], "label": "Mowing the lawn"}]}, "MYRsm8m9mk4": {"duration": 141.247, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=MYRsm8m9mk4", "annotations": [{"segment": [7.710975039001561, 47.147104524180975], "label": "Tug of war"}, {"segment": [72.48316536661467, 105.53020124804992], "label": "Tug of war"}]}, "dwAvPDFGKcs": {"duration": 57.261, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dwAvPDFGKcs", "annotations": []}, "yAT27EJRTXQ": {"duration": 189.75, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yAT27EJRTXQ", "annotations": []}, "LAkaYds-8X0": {"duration": 118.678, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LAkaYds-8X0", "annotations": [{"segment": [8.146328037441497, 10.738341503900157], "label": "Playing ten pins"}, {"segment": [11.664060599063962, 13.330354970358815], "label": "Playing ten pins"}, {"segment": [14.441217884555384, 17.77380662714509], "label": "Playing ten pins"}, {"segment": [19.81038863650546, 22.402402102964118], "label": "Playing ten pins"}, {"segment": [30.363586321372853, 33.32588742589704], "label": "Playing ten pins"}, {"segment": [40.36135254914196, 45.175091843993755], "label": "Playing ten pins"}, {"segment": [48.13739294851794, 51.65512551014041], "label": "Playing ten pins"}, {"segment": [55.913433347893914, 59.06087827145086], "label": "Playing ten pins"}, {"segment": [76.64954107956318, 79.05641072698907], "label": "Playing ten pins"}, {"segment": [82.57414328861154, 86.6473073073323], "label": "Playing ten pins"}, {"segment": [88.86903313572543, 91.64619042121684], "label": "Playing ten pins"}, {"segment": [95.90449825897036, 102.56967574414976], "label": "Playing ten pins"}, {"segment": [103.68053865834634, 107.38341503900156], "label": "Playing ten pins"}]}, "8FO4W-SBpxs": {"duration": 177.981, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8FO4W-SBpxs", "annotations": [{"segment": [0.2, 177.961], "label": "Capoeira"}]}, "GTxmHim5JnY": {"duration": 15.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GTxmHim5JnY", "annotations": [{"segment": [0, 15.139409], "label": "Swinging at the playground"}]}, "-LtQMRfj0eM": {"duration": 227.695, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-LtQMRfj0eM", "annotations": [{"segment": [16.336171606864273, 213.45438533541343], "label": "Trimming branches or hedges"}]}, "QeL3ScQVelo": {"duration": 170.67000000000002, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QeL3ScQVelo", "annotations": [{"segment": [72.42017680707228, 124.87155486219447], "label": "Preparing salad"}]}, "IoGpS8NQklE": {"duration": 231.968, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IoGpS8NQklE", "annotations": [{"segment": [0.01, 231.881], "label": "Beach soccer"}]}, "aYrhuTGO440": {"duration": 139.85, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aYrhuTGO440", "annotations": [{"segment": [0.7129336345227558, 138.82399234168787], "label": "Walking the dog"}]}, "hz3n1wrXYAc": {"duration": 97.78, "subset": "validation", "resolution": "850x480", "url": "https://www.youtube.com/watch?v=hz3n1wrXYAc", "annotations": [{"segment": [40.17902262090483, 61.52651092043682], "label": "BMX"}]}, "E-6XZrDYRuM": {"duration": 207.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E-6XZrDYRuM", "annotations": [{"segment": [3.567895826037123, 52.221020726543344], "label": "Horseback riding"}, {"segment": [58.38374988060747, 171.58335265366617], "label": "Horseback riding"}]}, "4gYF0U37kr0": {"duration": 215.853, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4gYF0U37kr0", "annotations": []}, "-XCESzrIWXA": {"duration": 171.154, "subset": "training", "resolution": "488x276", "url": "https://www.youtube.com/watch?v=-XCESzrIWXA", "annotations": [{"segment": [26.392940110623687, 29.3254890118041], "label": "Futsal"}, {"segment": [46.387591709581024, 146.89403896573208], "label": "Futsal"}]}, "nHwqBo0xvog": {"duration": 48.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nHwqBo0xvog", "annotations": [{"segment": [1.72780184, 32.37813128], "label": "Playing kickball"}, {"segment": [38.653615280000004, 41.988518080000006], "label": "Playing kickball"}]}, "Po6tK3rYE8E": {"duration": 106.952, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Po6tK3rYE8E", "annotations": [{"segment": [0.2, 86.73291731669266], "label": "Hanging wallpaper"}]}, "wvewX55VADs": {"duration": 216.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wvewX55VADs", "annotations": [{"segment": [108.61778007332293, 212.15996294695788], "label": "Tango"}]}, "zNrIr2VuyCA": {"duration": 116.91, "subset": "testing", "resolution": "620x480", "url": "https://www.youtube.com/watch?v=zNrIr2VuyCA", "annotations": []}, "ZJbGChNlQ7U": {"duration": 73.78999999999999, "subset": "testing", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=ZJbGChNlQ7U", "annotations": []}, "FgRIl4bNl_M": {"duration": 121.16, "subset": "validation", "resolution": "640x358", "url": "https://www.youtube.com/watch?v=FgRIl4bNl_M", "annotations": [{"segment": [6.42382527301092, 115.25098283931358], "label": "Shoveling snow"}]}, "352jrqIWSFY": {"duration": 7.755, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=352jrqIWSFY", "annotations": [{"segment": [0.7407823712948518, 6.727267550702028], "label": "Doing a powerbomb"}]}, "e_k31tUVWHI": {"duration": 238.05, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=e_k31tUVWHI", "annotations": []}, "RjZ7jJBE1Qw": {"duration": 123.415, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RjZ7jJBE1Qw", "annotations": [{"segment": [59.300250105242306, 90.87570707020281], "label": "Swimming"}]}, "v_Vg4a8igc0": {"duration": 36.73, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=v_Vg4a8igc0", "annotations": [{"segment": [0, 36.733968], "label": "Bathing dog"}]}, "zDdD_qA86rg": {"duration": 172.9, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zDdD_qA86rg", "annotations": []}, "1NAlbF88oUI": {"duration": 64.645, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1NAlbF88oUI", "annotations": [{"segment": [17.99508346333853, 60.94133307332293], "label": "Arm wrestling"}]}, "ATk8OkvNHHQ": {"duration": 179.375, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ATk8OkvNHHQ", "annotations": [{"segment": [14.785112278290011, 173.7948561447324], "label": "BMX"}]}, "RhEunVjB_Ns": {"duration": 154.82999999999998, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=RhEunVjB_Ns", "annotations": [{"segment": [8.212546496099845, 152.4152027196734], "label": "Cheerleading"}]}, "UBQfURrVB_Y": {"duration": 229.58, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UBQfURrVB_Y", "annotations": [{"segment": [0, 180.51298595943837], "label": "Removing ice from car"}]}, "qY3Tn9-of-Q": {"duration": 20.9, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qY3Tn9-of-Q", "annotations": []}, "FGAlwzRvAHg": {"duration": 183.485, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FGAlwzRvAHg", "annotations": [{"segment": [19.748531981279253, 169.43668018720751], "label": "Waxing skis"}]}, "LG7p5JrgMfE": {"duration": 161.32999999999998, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LG7p5JrgMfE", "annotations": [{"segment": [0.01, 155.61884555382215], "label": "Tango"}]}, "dKiy-7TZqeI": {"duration": 17.532, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dKiy-7TZqeI", "annotations": [{"segment": [2.5668536074766353, 13.407714056074767], "label": "Calf roping"}]}, "plZs-3GtoOo": {"duration": 65.2, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=plZs-3GtoOo", "annotations": [{"segment": [4.666435569999999, 58.87015407644306], "label": "Getting a haircut"}]}, "DU36SNYN-rw": {"duration": 110.388, "subset": "training", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=DU36SNYN-rw", "annotations": [{"segment": [34.786745129669136, 61.13512043681748], "label": "Disc dog"}]}, "hq48S65dlyo": {"duration": 105.65, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=hq48S65dlyo", "annotations": []}, "55bimE5eU9E": {"duration": 113.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=55bimE5eU9E", "annotations": [{"segment": [15.4262685, 76.4027217], "label": "Snatch"}]}, "fbr7v8RMkf8": {"duration": 202.804, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fbr7v8RMkf8", "annotations": []}, "TZOhIMjwEE8": {"duration": 82.41, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TZOhIMjwEE8", "annotations": []}, "8d8juWOKb0c": {"duration": 157.083, "subset": "training", "resolution": "582x480", "url": "https://www.youtube.com/watch?v=8d8juWOKb0c", "annotations": [{"segment": [9.799750390015602, 37.23905148205928], "label": "Doing fencing"}, {"segment": [46.548814352574105, 101.68535881435257], "label": "Doing fencing"}, {"segment": [111.97640717628707, 130.10825429017163], "label": "Doing fencing"}, {"segment": [135.4988034321373, 139.41920280811235], "label": "Doing fencing"}]}, "dl1RH5wfGGM": {"duration": 148.655, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dl1RH5wfGGM", "annotations": []}, "16725zS5kVM": {"duration": 236.216, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=16725zS5kVM", "annotations": []}, "QN6YFgrx_Ig": {"duration": 158.69, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QN6YFgrx_Ig", "annotations": [{"segment": [134.9195274648986, 156.20958134009362], "label": "Breakdancing"}]}, "oVPFTkC4-Lc": {"duration": 70.334, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oVPFTkC4-Lc", "annotations": [{"segment": [10.53352751326053, 70.133241], "label": "Mooping floor"}]}, "Jz9ZxenBLa8": {"duration": 117.145, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Jz9ZxenBLa8", "annotations": [{"segment": [0.01, 107.36738585189666], "label": "Riding bumper cars"}]}, "El4QfhJ6RvE": {"duration": 11.332, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=El4QfhJ6RvE", "annotations": [{"segment": [0.2, 10.659587531981279], "label": "Powerbocking"}]}, "o2mqzjWJJNw": {"duration": 72.702, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=o2mqzjWJJNw", "annotations": [{"segment": [11.33244929797192, 61.43878627145086], "label": "Welding"}]}, "o-aSCtwxsTw": {"duration": 41.1, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o-aSCtwxsTw", "annotations": [{"segment": [0.01, 41.041], "label": "Kite flying"}]}, "unz00-r84NY": {"duration": 49.668, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=unz00-r84NY", "annotations": [{"segment": [0.01, 49.621], "label": "Hurling"}]}, "IOoj4yWCoPg": {"duration": 79.11, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IOoj4yWCoPg", "annotations": [{"segment": [6.091469999999999, 75.28361154446178], "label": "Hand washing clothes"}]}, "BhCNHWQhhEw": {"duration": 191.61, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=BhCNHWQhhEw", "annotations": [{"segment": [2.9892521684867397, 188.32288661466458], "label": "Getting a tattoo"}]}, "QASZ8CTxBSg": {"duration": 41.61, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=QASZ8CTxBSg", "annotations": [{"segment": [0.8428907956318253, 34.62336037441498], "label": "Wrapping presents"}]}, "6gZuc4umTPk": {"duration": 233.13, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=6gZuc4umTPk", "annotations": [{"segment": [21.45799116318394, 222.94489123782637], "label": "Doing nails"}]}, "uyGxlvak-Bg": {"duration": 156.29, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uyGxlvak-Bg", "annotations": [{"segment": [71.19768007046812, 93.62982584609507], "label": "Cleaning shoes"}]}, "_vUE7PhDBcA": {"duration": 136.417, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_vUE7PhDBcA", "annotations": [{"segment": [0.009999999999999998, 120.02980073634944], "label": "Running a marathon"}]}, "gnVupEzu59g": {"duration": 193.376, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gnVupEzu59g", "annotations": [{"segment": [12.393933578546754, 56.13723397722937], "label": "Snowboarding"}, {"segment": [56.438911911013385, 183.14364357414104], "label": "Snowboarding"}]}, "usXxdwUHwA0": {"duration": 211.4, "subset": "testing", "resolution": "900x720", "url": "https://www.youtube.com/watch?v=usXxdwUHwA0", "annotations": []}, "8P0N5psQrEI": {"duration": 110.179, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8P0N5psQrEI", "annotations": [{"segment": [15.680510230625904, 95.62709278514303], "label": "Putting on shoes"}]}, "qI_Bo0D6p7I": {"duration": 221.286, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=qI_Bo0D6p7I", "annotations": [{"segment": [2.415928237129485, 211.91141965678628], "label": "Hula hoop"}]}, "Qv97mL6UyBk": {"duration": 23.1, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Qv97mL6UyBk", "annotations": []}, "qsTCTQo-wI8": {"duration": 196.232, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qsTCTQo-wI8", "annotations": [{"segment": [29.64873545679954, 193.78657848909657], "label": "Playing congas"}]}, "UBtVJ-w9O28": {"duration": 236.171, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UBtVJ-w9O28", "annotations": [{"segment": [13.996627145085803, 225.05103120124807], "label": "Capoeira"}]}, "NsYBZUTzgDw": {"duration": 33.066, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NsYBZUTzgDw", "annotations": [{"segment": [8.711277691107643, 33.021], "label": "Clipping cat claws"}]}, "-MFzpFMdWZs": {"duration": 46.97, "subset": "validation", "resolution": "128x96", "url": "https://www.youtube.com/watch?v=-MFzpFMdWZs", "annotations": [{"segment": [0.6954810093603744, 35.908782641185645], "label": "Shaving legs"}]}, "914SKlbnHEs": {"duration": 226.209, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=914SKlbnHEs", "annotations": []}, "W0RYvQUUNe0": {"duration": 11.54, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W0RYvQUUNe0", "annotations": [{"segment": [0.20000000000000004, 11.301], "label": "Windsurfing"}]}, "w6ofRgXA0xc": {"duration": 124.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w6ofRgXA0xc", "annotations": [{"segment": [39.30498345, 95.36709485], "label": "Starting a campfire"}]}, "w8lqIgJn09Y": {"duration": 61.7, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=w8lqIgJn09Y", "annotations": [{"segment": [4.48497648, 52.55179258598915], "label": "Hammer throw"}]}, "priCnteGpSE": {"duration": 227.79, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=priCnteGpSE", "annotations": []}, "LMxfRVcBrD8": {"duration": 187.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LMxfRVcBrD8", "annotations": [{"segment": [27.865837134165364, 147.4992740452418], "label": "Preparing pasta"}]}, "EwzkySgI6CQ": {"duration": 130.57, "subset": "training", "resolution": "362x272", "url": "https://www.youtube.com/watch?v=EwzkySgI6CQ", "annotations": [{"segment": [3.1543668666146645, 123.63083041731669], "label": "Getting a tattoo"}]}, "mTtBz5d83C4": {"duration": 21.34, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mTtBz5d83C4", "annotations": [{"segment": [0.19999999999999998, 10.952536775351016], "label": "Snow tubing"}]}, "QQBmydn6--I": {"duration": 226.67000000000002, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QQBmydn6--I", "annotations": [{"segment": [12.376851575663027, 115.9887804804992], "label": "Starting a campfire"}]}, "wz_kM0oBW5g": {"duration": 72.934, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=wz_kM0oBW5g", "annotations": [{"segment": [32.4277091491823, 67.81373541341654], "label": "Curling"}]}, "FX4inHmWQtE": {"duration": 156.782, "subset": "validation", "resolution": "1920x960", "url": "https://www.youtube.com/watch?v=FX4inHmWQtE", "annotations": [{"segment": [58.426737909516376, 156.701], "label": "River tubing"}]}, "VBY8feOQgy4": {"duration": 159.289, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VBY8feOQgy4", "annotations": [{"segment": [6.459073322932918, 87.44591575663027], "label": "Doing fencing"}]}, "QMm6gYzsMw8": {"duration": 185.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QMm6gYzsMw8", "annotations": [{"segment": [0.5796644929797192, 185.20280550702026], "label": "Zumba"}]}, "g7_YL5C71u8": {"duration": 23.22, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=g7_YL5C71u8", "annotations": []}, "v2zVnmbPmeQ": {"duration": 238.12, "subset": "validation", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=v2zVnmbPmeQ", "annotations": [{"segment": [25.94118744, 223.75675507020281], "label": "Putting on makeup"}]}, "bDkSq9T55RI": {"duration": 40.403, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bDkSq9T55RI", "annotations": [{"segment": [0.01, 29.96496645111607], "label": "Decorating the Christmas tree"}]}, "ObkyDlB5wvs": {"duration": 167.834, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ObkyDlB5wvs", "annotations": [{"segment": [44.904065695961165, 48.30787242510108], "label": "Doing fencing"}, {"segment": [60.875774194233074, 65.58873735765756], "label": "Doing fencing"}, {"segment": [121.35880145818078, 131.57022164560053], "label": "Doing fencing"}]}, "4zFiORtQrlA": {"duration": 106.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4zFiORtQrlA", "annotations": [{"segment": [42.44288299531981, 100.13867706708267], "label": "Fixing bicycle"}]}, "tuhHQ-lHIs4": {"duration": 95.67, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tuhHQ-lHIs4", "annotations": [{"segment": [23.730002334787265, 54.026797768509375], "label": "Dodgeball"}, {"segment": [59.99660853042121, 74.62264742589704], "label": "Dodgeball"}]}, "ql7I6qtyhBU": {"duration": 187.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ql7I6qtyhBU", "annotations": [{"segment": [35.153682065649974, 80.56052140044785], "label": "Curling"}, {"segment": [87.59125781357785, 132.41220244728157], "label": "Curling"}]}, "PpbBWqlsbxk": {"duration": 123.925, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PpbBWqlsbxk", "annotations": [{"segment": [20.29971027412525, 110.19842720239421], "label": "Rafting"}]}, "jlhW3eBciyM": {"duration": 89.07, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jlhW3eBciyM", "annotations": []}, "fOuFF7dGPtI": {"duration": 145.636, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fOuFF7dGPtI", "annotations": [{"segment": [119.9615934102964, 122.6879932605304], "label": "Playing ten pins"}]}, "P2H4fTMMHNE": {"duration": 90.26, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=P2H4fTMMHNE", "annotations": [{"segment": [15.792235229999998, 88.05534028783151], "label": "Ironing clothes"}]}, "drfE5oASHaU": {"duration": 201.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=drfE5oASHaU", "annotations": [{"segment": [14.136274516380656, 20.419063190327613], "label": "Paintball"}]}, "WJ4tCIdAgVo": {"duration": 44.652, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WJ4tCIdAgVo", "annotations": [{"segment": [0.01, 44.65167800453515], "label": "Hitting a pinata"}]}, "oNpkjfX2rTc": {"duration": 58.05, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oNpkjfX2rTc", "annotations": [{"segment": [30.322987519500785, 36.568617784711385], "label": "Sharpening knives"}]}, "JUhfg2uTcvM": {"duration": 184.86, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JUhfg2uTcvM", "annotations": []}, "_j5JUQzOCtc": {"duration": 129.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_j5JUQzOCtc", "annotations": [{"segment": [14.88042752, 49.027085803432136], "label": "Snatch"}, {"segment": [58.1436926677067, 125.80917472698907], "label": "Snatch"}]}, "2AjyB3mCW_U": {"duration": 227.02, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2AjyB3mCW_U", "annotations": []}, "3Y46yorcd5Q": {"duration": 234.034, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3Y46yorcd5Q", "annotations": [{"segment": [10.264006083414445, 195.88966735021017], "label": "Surfing"}]}, "MYzG-TsW8w8": {"duration": 48.437, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MYzG-TsW8w8", "annotations": [{"segment": [11.271747008580574, 21.60795399923478], "label": "Fun sliding down"}]}, "enASD1KDX24": {"duration": 63.809, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=enASD1KDX24", "annotations": [{"segment": [46.082716510903424, 60.80057943925234], "label": "Raking leaves"}]}, "7ET-TtUVU7s": {"duration": 130.497, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7ET-TtUVU7s", "annotations": [{"segment": [25.8037492211838, 104.65329439252338], "label": "Canoeing"}]}, "-JqLjPz-07E": {"duration": 222.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-JqLjPz-07E", "annotations": [{"segment": [32.42855434, 58.733767820000004], "label": "Getting a haircut"}, {"segment": [69.43125098, 180.86893188], "label": "Getting a haircut"}]}, "PziLzNjG2bI": {"duration": 21.11, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PziLzNjG2bI", "annotations": [{"segment": [0.01, 19.675804212168487], "label": "High jump"}]}, "ZWlwKbuK2fM": {"duration": 174.5, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZWlwKbuK2fM", "annotations": [{"segment": [0, 173.1368206564997], "label": "Doing karate"}]}, "j_ImkPKoA-U": {"duration": 23.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j_ImkPKoA-U", "annotations": [{"segment": [0.2, 22.621813959438377], "label": "Kayaking"}]}, "xBtydVn3MVI": {"duration": 174.13, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xBtydVn3MVI", "annotations": [{"segment": [18.246044385227883, 168.23345904877928], "label": "Shaving"}]}, "afAw8FA9Wbk": {"duration": 84.57, "subset": "testing", "resolution": "426x180", "url": "https://www.youtube.com/watch?v=afAw8FA9Wbk", "annotations": []}, "p4RUPQAV0ro": {"duration": 34.23, "subset": "training", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=p4RUPQAV0ro", "annotations": [{"segment": [5.392897678627144, 27.071278446177843], "label": "Clean and jerk"}]}, "HQdnIIRVqEk": {"duration": 143.904, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HQdnIIRVqEk", "annotations": [{"segment": [27.833453978159127, 125.71694227769112], "label": "Rollerblading"}]}, "PyM_VG5KFa4": {"duration": 93.414, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PyM_VG5KFa4", "annotations": [{"segment": [12.970069968798752, 81.75516014040562], "label": "Playing drums"}]}, "CrCtYGvG9Uw": {"duration": 99.5, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CrCtYGvG9Uw", "annotations": [{"segment": [9.616824141172605, 86.87383348696699], "label": "Getting a haircut"}]}, "J_QIJTz0UYM": {"duration": 112.64, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J_QIJTz0UYM", "annotations": []}, "Z1img-5JbDo": {"duration": 11.31, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z1img-5JbDo", "annotations": [{"segment": [1.2699336973478939, 5.443841653666146], "label": "High jump"}]}, "IN8-B39kq_k": {"duration": 66.76, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IN8-B39kq_k", "annotations": [{"segment": [2.916078521060842, 5.832157042121684], "label": "Getting a tattoo"}, {"segment": [7.602633287051481, 9.581400854914197], "label": "Getting a tattoo"}, {"segment": [25.932269705148205, 32.07686373166926], "label": "Getting a tattoo"}, {"segment": [44.78263443057722, 49.156752212168485], "label": "Getting a tattoo"}, {"segment": [54.57232660842433, 57.90498777535101], "label": "Getting a tattoo"}]}, "OMGTFZ9csg0": {"duration": 31.371, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OMGTFZ9csg0", "annotations": [{"segment": [11.76839469578783, 20.60689859594384], "label": "Waxing skis"}]}, "9g1omTdpoYs": {"duration": 125.017, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9g1omTdpoYs", "annotations": [{"segment": [30.035101899314068, 87.1798074851794], "label": "Making a cake"}]}, "UomiLRs--C8": {"duration": 53.731, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UomiLRs--C8", "annotations": [{"segment": [6.194873634945398, 50.749900156006234], "label": "Welding"}]}, "cwhkG3LhZO8": {"duration": 209.26, "subset": "training", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=cwhkG3LhZO8", "annotations": [{"segment": [25.463560126078512, 209.258231292517], "label": "Playing guitarra"}]}, "fSUwyioi_ws": {"duration": 103.469, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fSUwyioi_ws", "annotations": [{"segment": [66.15071762870515, 95.19249609984399], "label": "Sharpening knives"}]}, "STAvUAslEYM": {"duration": 157.362, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=STAvUAslEYM", "annotations": [{"segment": [42.63298130841121, 133.79636448598131], "label": "Futsal"}]}, "MDsJa6Yitwc": {"duration": 41.053, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MDsJa6Yitwc", "annotations": []}, "dojDT4CtenU": {"duration": 76.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dojDT4CtenU", "annotations": [{"segment": [64.84563547581902, 69.40884686115444], "label": "Cricket"}, {"segment": [1.2008451222402639, 49.47481817784712], "label": "Cricket"}]}, "_n9eNF1WaFU": {"duration": 201.55, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_n9eNF1WaFU", "annotations": [{"segment": [22.010053697347896, 102.81839370046802], "label": "Grooming dog"}, {"segment": [135.83347424648986, 183.62673370358814], "label": "Grooming dog"}]}, "5QS_VBDwKzw": {"duration": 142.5, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5QS_VBDwKzw", "annotations": [{"segment": [7.33315600624025, 90.88669110764431], "label": "Removing curlers"}]}, "b8S0HFHh_p0": {"duration": 187.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b8S0HFHh_p0", "annotations": [{"segment": [20.154455430000002, 139.70003615288613], "label": "Bungee jumping"}]}, "HdNOiIdGqvE": {"duration": 71.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HdNOiIdGqvE", "annotations": [{"segment": [49.4128143525741, 58.3361216848674], "label": "Washing hands"}]}, "IYLJU87AkOg": {"duration": 160.334, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IYLJU87AkOg", "annotations": []}, "v-LmMLpvsbk": {"duration": 224.28, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=v-LmMLpvsbk", "annotations": [{"segment": [18.008261776911077, 28.848186341653665], "label": "Breakdancing"}]}, "hkBbDre6unA": {"duration": 73.03, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hkBbDre6unA", "annotations": []}, "f7qFnCkFIuQ": {"duration": 34.552, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=f7qFnCkFIuQ", "annotations": [{"segment": [2.6951086206713573, 34.5512925170068], "label": "Painting fence"}]}, "cXfflEWa83E": {"duration": 181.07, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=cXfflEWa83E", "annotations": [{"segment": [12.802588280758012, 169.02799949571127], "label": "Wakeboarding"}]}, "dUa0E0ASviM": {"duration": 92.23, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=dUa0E0ASviM", "annotations": [{"segment": [11.779472698907956, 79.58326677067083], "label": "Tug of war"}]}, "CLbEvGHmK8k": {"duration": 175.03, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CLbEvGHmK8k", "annotations": [{"segment": [0.2, 166.2937889204368], "label": "Doing step aerobics"}]}, "70rKlFJdkKw": {"duration": 147.656, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=70rKlFJdkKw", "annotations": [{"segment": [0.01, 147.6556916099773], "label": "Swinging at the playground"}]}, "07GFy_Z7Nuc": {"duration": 60.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=07GFy_Z7Nuc", "annotations": [{"segment": [2.203109712168487, 27.234186016380654], "label": "Using parallel bars"}, {"segment": [27.195028859999997, 54.98399345475819], "label": "Using parallel bars"}]}, "RrEJ2-TfWCI": {"duration": 67.39, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RrEJ2-TfWCI", "annotations": [{"segment": [0, 67.384307], "label": "Playing lacrosse"}]}, "nyEcPLZQerc": {"duration": 86.379, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nyEcPLZQerc", "annotations": [{"segment": [1.6170651724028144, 5.929238965476986], "label": "Running a marathon"}, {"segment": [16.170651724028144, 19.674292930900908], "label": "Running a marathon"}, {"segment": [21.830379827437994, 24.525488448109353], "label": "Running a marathon"}, {"segment": [49.859508736349454, 52.150351029641186], "label": "Running a marathon"}, {"segment": [62.930785351014045, 64.27833964118565], "label": "Running a marathon"}]}, "sWv11vroZKA": {"duration": 63.53, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sWv11vroZKA", "annotations": []}, "ZmkJColoU0M": {"duration": 156.108, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZmkJColoU0M", "annotations": []}, "5O9myPtSriI": {"duration": 128.24, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=5O9myPtSriI", "annotations": [{"segment": [18.40628757566303, 43.81496716380656], "label": "Archery"}]}, "yfeKtTU0aOE": {"duration": 41.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yfeKtTU0aOE", "annotations": [{"segment": [6.658758190327613, 37.537825273010924], "label": "Using the rowing machine"}]}, "NKNgAE07TIQ": {"duration": 169.25, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NKNgAE07TIQ", "annotations": [{"segment": [40.662306416066166, 145.48656263338535], "label": "Mixing drinks"}]}, "l5xW6UZWwBs": {"duration": 87.818, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l5xW6UZWwBs", "annotations": [{"segment": [8.488583463338534, 32.31138221528861], "label": "Shuffleboard"}, {"segment": [41.484528861154445, 80.79686427457099], "label": "Shuffleboard"}]}, "7xnwE0aWFSI": {"duration": 203.59, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7xnwE0aWFSI", "annotations": []}, "XoCvj2IbVGE": {"duration": 221.73, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=XoCvj2IbVGE", "annotations": [{"segment": [57.305343150000006, 176.54858034321373], "label": "Ironing clothes"}]}, "xIU6DO35R_c": {"duration": 32.46, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xIU6DO35R_c", "annotations": [{"segment": [9.030204, 21.696984], "label": "Polishing shoes"}]}, "8oI4xGqkzSE": {"duration": 205.009, "subset": "training", "resolution": "855x472", "url": "https://www.youtube.com/watch?v=8oI4xGqkzSE", "annotations": [{"segment": [0.2, 25.58614409882518], "label": "Disc dog"}, {"segment": [30.063719316119585, 71.96103027794581], "label": "Disc dog"}, {"segment": [73.5601642841224, 142.3229254446178], "label": "Disc dog"}, {"segment": [146.16084702964122, 198.29261522620905], "label": "Disc dog"}]}, "qi_6u0mMJQM": {"duration": 172.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qi_6u0mMJQM", "annotations": [{"segment": [19.91359703563084, 47.26127789170831], "label": "Installing carpet"}]}, "9eqfQyr3J7k": {"duration": 26.425, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9eqfQyr3J7k", "annotations": [{"segment": [0, 26.401], "label": "Snow tubing"}]}, "Uj5OYHrC8B8": {"duration": 129.196, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Uj5OYHrC8B8", "annotations": []}, "23-lat1nVJg": {"duration": 10.147, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=23-lat1nVJg", "annotations": [{"segment": [0, 5.799014040561623], "label": "Knitting"}]}, "Ffi7vDa3C2I": {"duration": 200.273, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ffi7vDa3C2I", "annotations": [{"segment": [10.933595943837755, 200.221], "label": "Rollerblading"}]}, "dmiLuqRB09U": {"duration": 68.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dmiLuqRB09U", "annotations": [{"segment": [3.6148217067082684, 66.12997357566302], "label": "Getting a piercing"}]}, "KGIDKn3t2Qk": {"duration": 188.64, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=KGIDKn3t2Qk", "annotations": [{"segment": [52.661464898595945, 173.30107488299532], "label": "Mixing drinks"}]}, "AP9XodmOtsA": {"duration": 100.64, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=AP9XodmOtsA", "annotations": []}, "WhEdTfs7U5E": {"duration": 142.362, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WhEdTfs7U5E", "annotations": [{"segment": [0.2, 7.773251120521011], "label": "Playing beach volleyball"}, {"segment": [54.190664954489336, 130.59061703588142], "label": "Playing beach volleyball"}]}, "M6FdaEstXbI": {"duration": 114.057, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M6FdaEstXbI", "annotations": [{"segment": [12.62725585023401, 29.16718252730109], "label": "Powerbocking"}, {"segment": [34.14694539781591, 47.48559594383776], "label": "Powerbocking"}, {"segment": [50.15332605304212, 67.06071294851795], "label": "Powerbocking"}, {"segment": [75.77682683307333, 101.03576911076443], "label": "Powerbocking"}, {"segment": [101.92516848673948, 106.19428549141965], "label": "Powerbocking"}]}, "0-F3q8Aj9Zk": {"duration": 207.4, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=0-F3q8Aj9Zk", "annotations": [{"segment": [1.6177896639675111, 8.73606418542456], "label": "High jump"}, {"segment": [23.943287026719165, 34.94425674169824], "label": "High jump"}, {"segment": [47.88657405343833, 85.4192942574846], "label": "High jump"}, {"segment": [99.3322853676052, 151.10155461456554], "label": "High jump"}, {"segment": [161.1318505311641, 195.42899140727533], "label": "High jump"}]}, "Fn71KcANJ7o": {"duration": 80.968, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Fn71KcANJ7o", "annotations": []}, "96c3BWVDoq4": {"duration": 123.69, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=96c3BWVDoq4", "annotations": [{"segment": [40.716317262214304, 50.17176534680435], "label": "Getting a tattoo"}, {"segment": [54.8030052249709, 60.5920550726791], "label": "Getting a tattoo"}, {"segment": [72.74905968658346, 78.92404618517942], "label": "Getting a tattoo"}, {"segment": [83.55528605912636, 98.22087899329173], "label": "Getting a tattoo"}]}, "lUmjnFf7IPQ": {"duration": 195.12, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lUmjnFf7IPQ", "annotations": []}, "kBh_98QimD4": {"duration": 55.5, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kBh_98QimD4", "annotations": [{"segment": [0, 55.49569160997732], "label": "Playing saxophone"}]}, "aM2ZdUJshvU": {"duration": 40.89, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aM2ZdUJshvU", "annotations": []}, "9rW35YTKYq8": {"duration": 236.98, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9rW35YTKYq8", "annotations": [{"segment": [18.85511031869846, 84.29343436594607], "label": "Playing violin"}, {"segment": [89.46934700245153, 214.06095729173168], "label": "Playing violin"}]}, "eLJYFoCx-gc": {"duration": 89.21000000000001, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eLJYFoCx-gc", "annotations": [{"segment": [8.765059282371293, 34.36459750390016], "label": "Long jump"}]}, "kWMNCeb3tmg": {"duration": 190.752, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kWMNCeb3tmg", "annotations": []}, "4nL_8C15KgQ": {"duration": 92.787, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4nL_8C15KgQ", "annotations": []}, "PlcyBfZi76Q": {"duration": 220.52, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PlcyBfZi76Q", "annotations": []}, "NwfosNrFnwk": {"duration": 127.62, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NwfosNrFnwk", "annotations": [{"segment": [2.389074044594437, 41.01243776553784], "label": "Javelin throw"}, {"segment": [63.70864118918499, 116.2682701702626], "label": "Javelin throw"}]}, "OJ43U4saH74": {"duration": 23.058, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=OJ43U4saH74", "annotations": []}, "jwqotL-yNvE": {"duration": 168.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jwqotL-yNvE", "annotations": [{"segment": [84.74700231002438, 131.1873100774371], "label": "Triple jump"}, {"segment": [167.65738076443057, 167.9821315192744], "label": "Triple jump"}]}, "cCYLiE1fiEo": {"duration": 203.454, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cCYLiE1fiEo", "annotations": [{"segment": [42.90898620225079, 191.22069459477416], "label": "Putting on shoes"}]}, "j6D8Ctt94Nc": {"duration": 43.608, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=j6D8Ctt94Nc", "annotations": []}, "tXuNa_h804c": {"duration": 234.89, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=tXuNa_h804c", "annotations": [{"segment": [73.98578242121685, 230.38147100468018], "label": "Breakdancing"}]}, "FnPS6zn5S7M": {"duration": 46.07, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=FnPS6zn5S7M", "annotations": []}, "ns8v94HCtlM": {"duration": 71.588, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ns8v94HCtlM", "annotations": [{"segment": [22.622836969761117, 71.58702947845805], "label": "Grooming dog"}]}, "yWONID7ysSI": {"duration": 50.62, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yWONID7ysSI", "annotations": []}, "x2J5RWf_LJk": {"duration": 108.53, "subset": "testing", "resolution": "1920x816", "url": "https://www.youtube.com/watch?v=x2J5RWf_LJk", "annotations": []}, "r9AepFEEfsQ": {"duration": 59.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r9AepFEEfsQ", "annotations": [{"segment": [6.130586583463338, 58.33346021840873], "label": "Playing accordion"}]}, "zW_8T8w7304": {"duration": 199.553, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zW_8T8w7304", "annotations": [{"segment": [0.01, 23.037237875909593], "label": "Mooping floor"}, {"segment": [31.442716560363092, 46.69710380251944], "label": "Mooping floor"}, {"segment": [95.26209175713967, 118.6106436583994], "label": "Mooping floor"}, {"segment": [150.05336003120124, 157.5248966302652], "label": "Mooping floor"}, {"segment": [169.04351555382215, 171.84534177847112], "label": "Mooping floor"}]}, "bodOObk5K00": {"duration": 144.1, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=bodOObk5K00", "annotations": [{"segment": [38.66720706379275, 46.76034342598194], "label": "Playing harmonica"}, {"segment": [53.50462372780626, 72.38860857291434], "label": "Playing harmonica"}]}, "Ed7kAmkawTo": {"duration": 233.802, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ed7kAmkawTo", "annotations": [{"segment": [122.34025819032763, 224.0969336973479], "label": "Braiding hair"}]}, "BkjWeWUwG0A": {"duration": 216.828, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=BkjWeWUwG0A", "annotations": [{"segment": [0.01, 46.84971795062278], "label": "Swinging at the playground"}, {"segment": [53.61502018175965, 56.659406185771246], "label": "Swinging at the playground"}, {"segment": [105.03131713839981, 213.61441893332588], "label": "Swinging at the playground"}]}, "2PFU_Ee0x14": {"duration": 150.0, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=2PFU_Ee0x14", "annotations": [{"segment": [41.182500000000005, 116.97000000000001], "label": "Playing badminton"}]}, "zPu3JBSPa94": {"duration": 218.76, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zPu3JBSPa94", "annotations": [{"segment": [0.8531793788758354, 59.21064889398297], "label": "Getting a tattoo"}, {"segment": [65.69481217343932, 87.53620427266071], "label": "Getting a tattoo"}, {"segment": [97.09181331607007, 109.03632462033175], "label": "Getting a tattoo"}, {"segment": [188.55264273155962, 213.80675234628433], "label": "Getting a tattoo"}]}, "yeEe8-aYA2E": {"duration": 70.636, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yeEe8-aYA2E", "annotations": [{"segment": [8.595220099843994, 41.212978427457095], "label": "BMX"}, {"segment": [47.27371054914196, 69.64332183463338], "label": "BMX"}]}, "poFH53rF9uY": {"duration": 131.054, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=poFH53rF9uY", "annotations": [{"segment": [53.60349692352316, 57.67980871314311], "label": "Hurling"}, {"segment": [90.29030303010266, 93.55135172205827], "label": "Hurling"}, {"segment": [107.00318052143619, 109.44896757586854], "label": "Hurling"}]}, "qu-3Dn-bTSU": {"duration": 183.44, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=qu-3Dn-bTSU", "annotations": []}, "Ch_qHjUtOpE": {"duration": 22.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ch_qHjUtOpE", "annotations": [{"segment": [0, 16.62467344], "label": "Cheerleading"}]}, "0acEl97ZBME": {"duration": 102.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0acEl97ZBME", "annotations": [{"segment": [7.405676078434906, 23.73017202236014], "label": "Smoking a cigarette"}]}, "6VXQoPNphgI": {"duration": 134.79, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6VXQoPNphgI", "annotations": []}, "qNHfEf72V3g": {"duration": 159.52, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=qNHfEf72V3g", "annotations": [{"segment": [22.548293349999998, 157.645639625585], "label": "Paintball"}]}, "Uo9q4riquHM": {"duration": 102.865, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Uo9q4riquHM", "annotations": [{"segment": [0.01, 10.558396221695002], "label": "Cutting the grass"}]}, "-5h2E52tl4Q": {"duration": 114.521, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-5h2E52tl4Q", "annotations": [{"segment": [19.080166666666663, 106.4566308411215], "label": "Cleaning sink"}]}, "-HaFSqzE4Nc": {"duration": 53.52, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=-HaFSqzE4Nc", "annotations": [{"segment": [1.8369483566302653, 52.019037553666145], "label": "Smoking hookah"}]}, "Hxb8D4W-G0I": {"duration": 186.132, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Hxb8D4W-G0I", "annotations": [{"segment": [0.01, 169.22552730109206], "label": "Longboarding"}]}, "a0hFDXB1RgY": {"duration": 114.17, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=a0hFDXB1RgY", "annotations": [{"segment": [7.115195007800312, 114.021], "label": "Kayaking"}]}, "8cKjwHtCGU0": {"duration": 81.71000000000001, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=8cKjwHtCGU0", "annotations": [{"segment": [18.69956235, 68.84081201248051], "label": "Playing racquetball"}]}, "5z5xDI4IMVE": {"duration": 66.897, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5z5xDI4IMVE", "annotations": []}, "iSpQDXpCxiw": {"duration": 60.0, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iSpQDXpCxiw", "annotations": []}, "PQ-qpoPeqM0": {"duration": 191.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PQ-qpoPeqM0", "annotations": [{"segment": [6.8577811384564225, 184.8619263409992], "label": "Fixing bicycle"}]}, "0rDb-zfhH0E": {"duration": 201.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0rDb-zfhH0E", "annotations": [{"segment": [76.23027794581171, 197.0066430078003], "label": "Doing karate"}]}, "SvM0twHYsus": {"duration": 49.25, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=SvM0twHYsus", "annotations": [{"segment": [0.01, 49.24952380952381], "label": "Ironing clothes"}]}, "qS7TStZg_5c": {"duration": 11.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qS7TStZg_5c", "annotations": [{"segment": [0, 11.474568636505461], "label": "Playing guitarra"}]}, "slUvIngXl4g": {"duration": 85.496, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=slUvIngXl4g", "annotations": [{"segment": [47.04537285491419, 52.117067082683306], "label": "Sumo"}]}, "YptHsVTHquc": {"duration": 170.481, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YptHsVTHquc", "annotations": [{"segment": [6.64902133500306, 168.08725833385336], "label": "Hula hoop"}]}, "bV_X5E1-T50": {"duration": 175.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bV_X5E1-T50", "annotations": [{"segment": [28.621771450858038, 34.047699687987524], "label": "Smoking a cigarette"}, {"segment": [12.88657956318253, 16.68472932917317], "label": "Smoking a cigarette"}, {"segment": [49.511595163806554, 53.85233775351014], "label": "Smoking a cigarette"}, {"segment": [153.96071372854914, 156.9630241809672], "label": "Smoking a cigarette"}]}, "3K62qZ2hGyw": {"duration": 128.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3K62qZ2hGyw", "annotations": [{"segment": [6.420705647425897, 125.40440717628704], "label": "Belly dance"}]}, "jIKAVLlyXIQ": {"duration": 13.86, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=jIKAVLlyXIQ", "annotations": [{"segment": [0.625015600624025, 13.060670826833073], "label": "Chopping wood"}]}, "6iuD3pSgBcw": {"duration": 208.214, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6iuD3pSgBcw", "annotations": [{"segment": [17.536184087363495, 198.11296411856475], "label": "Playing drums"}]}, "ZyZt6fhcg0M": {"duration": 31.19, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZyZt6fhcg0M", "annotations": []}, "l8TsSm4Yh4M": {"duration": 163.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l8TsSm4Yh4M", "annotations": [{"segment": [91.49205441809673, 149.76073712012482], "label": "Paintball"}]}, "1dFLeGRT0NQ": {"duration": 151.765, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1dFLeGRT0NQ", "annotations": [{"segment": [18.030482047451635, 128.42688798384015], "label": "Hula hoop"}]}, "XobM2YRX0X8": {"duration": 105.14, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XobM2YRX0X8", "annotations": []}, "BngR6rNiO_g": {"duration": 115.96000000000001, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BngR6rNiO_g", "annotations": [{"segment": [11.21610642384879, 108.36206042433699], "label": "Tango"}]}, "wkJWeSw8FSE": {"duration": 20.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wkJWeSw8FSE", "annotations": [{"segment": [4.93385974726989, 20.627058658990805], "label": "Starting a campfire"}]}, "_4S7eaL-cR8": {"duration": 92.72, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_4S7eaL-cR8", "annotations": []}, "mbmMY04yMUA": {"duration": 135.0, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mbmMY04yMUA", "annotations": [{"segment": [16.84877566302652, 135.000815], "label": "Washing dishes"}]}, "K2qRHJ7P6Cg": {"duration": 160.85, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=K2qRHJ7P6Cg", "annotations": []}, "JsoDi1QN4yk": {"duration": 17.624, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JsoDi1QN4yk", "annotations": []}, "J4vq7MLMAtY": {"duration": 149.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J4vq7MLMAtY", "annotations": [{"segment": [15.816577316482581, 146.30336760793108], "label": "Table soccer"}]}, "YAMhMB1jdio": {"duration": 12.47, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=YAMhMB1jdio", "annotations": []}, "JGStyHrlN2U": {"duration": 40.33, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JGStyHrlN2U", "annotations": [{"segment": [1.7618185870291956, 38.130787990703304], "label": "Playing polo"}]}, "WaWkDf6b_j4": {"duration": 85.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WaWkDf6b_j4", "annotations": [{"segment": [16.57384399375975, 74.48245553822153], "label": "High jump"}]}, "UWgzslG97MQ": {"duration": 220.357, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UWgzslG97MQ", "annotations": [{"segment": [17.18856237242687, 27.501699795882992], "label": "Surfing"}, {"segment": [33.689582249956665, 44.002719673412784], "label": "Surfing"}, {"segment": [49.84683088003792, 58.09734081880281], "label": "Surfing"}, {"segment": [65.31653701522211, 73.91081820143553], "label": "Surfing"}, {"segment": [81.81755689275191, 89.38052433661971], "label": "Surfing"}, {"segment": [94.53709304834778, 127.19536155595883], "label": "Surfing"}, {"segment": [133.03947276258398, 146.1027801656284], "label": "Surfing"}, {"segment": [151.94689137225353, 172.57316621916578], "label": "Surfing"}, {"segment": [178.76104867323943, 190.10549965397817], "label": "Surfing"}, {"segment": [199.73109457316693, 213.48194445772234], "label": "Surfing"}]}, "-lEsnrNNZFU": {"duration": 102.075, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-lEsnrNNZFU", "annotations": [{"segment": [0.01, 102.074943], "label": "Rope skipping"}]}, "1PQiq8zajCE": {"duration": 87.122, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1PQiq8zajCE", "annotations": [{"segment": [25.64771028037383, 43.560396], "label": "Throwing darts"}]}, "j7vUMNMB4Yo": {"duration": 93.345, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j7vUMNMB4Yo", "annotations": [{"segment": [45.265046016194, 89.01834975451175], "label": "Roof shingle removal"}]}, "pb0k7YrMwZY": {"duration": 6.85, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pb0k7YrMwZY", "annotations": [{"segment": [0.01, 6.849885], "label": "Ping-pong"}]}, "tVC_5_SgseY": {"duration": 137.81, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tVC_5_SgseY", "annotations": [{"segment": [39.236050618895504, 47.62074088813892], "label": "Making a sandwich"}, {"segment": [72.55981963768346, 89.9741763507275], "label": "Making a sandwich"}]}, "uNlryAGT0Tk": {"duration": 79.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uNlryAGT0Tk", "annotations": []}, "ABBQqwPOxw4": {"duration": 6.99, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ABBQqwPOxw4", "annotations": [{"segment": [1.6459157566302651, 6.941], "label": "Tennis serve with ball bouncing"}]}, "XuwqWdp-u2M": {"duration": 41.448, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XuwqWdp-u2M", "annotations": [{"segment": [0.2, 41.247641723356004], "label": "Hitting a pinata"}]}, "PDt5sm0EBIY": {"duration": 189.82, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PDt5sm0EBIY", "annotations": []}, "PemokF80u70": {"duration": 185.737, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PemokF80u70", "annotations": []}, "WXQol1TisqE": {"duration": 60.581, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WXQol1TisqE", "annotations": [{"segment": [19.185500334299086, 58.31257951950078], "label": "Calf roping"}]}, "cMeKc9mp9wI": {"duration": 54.2, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cMeKc9mp9wI", "annotations": [{"segment": [4.183854, 51.87029849531981], "label": "Playing polo"}]}, "7hRls13HJow": {"duration": 132.54, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7hRls13HJow", "annotations": [{"segment": [12.3676040531552, 130.89050458414835], "label": "Mooping floor"}]}, "R52e9UAE60k": {"duration": 77.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=R52e9UAE60k", "annotations": [{"segment": [2.2984071762870513, 75.96840561622464], "label": "Horseback riding"}]}, "JEvEoAESqJ0": {"duration": 9.64, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JEvEoAESqJ0", "annotations": [{"segment": [0.3963975421216849, 9.147060264430579], "label": "Triple jump"}]}, "0FTaJwbFdZI": {"duration": 203.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0FTaJwbFdZI", "annotations": [{"segment": [6.354508438840955, 188.09344936037442], "label": "Belly dance"}]}, "STgbw-zqZJM": {"duration": 27.516, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=STgbw-zqZJM", "annotations": [{"segment": [0.2, 26.452070202808116], "label": "Blowing leaves"}]}, "2gc7pCOg-NE": {"duration": 155.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2gc7pCOg-NE", "annotations": [{"segment": [5.095726989079563, 135.64339937597504], "label": "Playing squash"}]}, "FFyJjF4MjHM": {"duration": 35.202, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FFyJjF4MjHM", "annotations": [{"segment": [0, 35.181], "label": "Skiing"}]}, "UAgb3FEjBN8": {"duration": 234.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UAgb3FEjBN8", "annotations": [{"segment": [13.147783119488045, 187.35590945270462], "label": "Doing motocross"}]}, "M37LbI4NxzM": {"duration": 224.65, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M37LbI4NxzM", "annotations": []}, "57buK1yvKPk": {"duration": 74.862, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=57buK1yvKPk", "annotations": [{"segment": [2.8613069856127575, 8.583920956838273], "label": "Peeling potatoes"}, {"segment": [26.102126991202095, 53.54731516692667], "label": "Peeling potatoes"}, {"segment": [60.437809375975036, 73.28449349453979], "label": "Peeling potatoes"}]}, "uOmCwWVJnLQ": {"duration": 107.09, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=uOmCwWVJnLQ", "annotations": [{"segment": [89.54774643369734, 107.089749], "label": "Using uneven bars"}]}, "4XnXyKYxnQQ": {"duration": 22.62, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4XnXyKYxnQQ", "annotations": []}, "puPMvwv2kmg": {"duration": 97.71000000000001, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=puPMvwv2kmg", "annotations": [{"segment": [0, 97.709569], "label": "Doing step aerobics"}]}, "2QydTDAYJsA": {"duration": 19.064, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2QydTDAYJsA", "annotations": [{"segment": [0.19999999999999998, 18.801000000000002], "label": "Calf roping"}]}, "ki2kGFwczMg": {"duration": 70.19, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ki2kGFwczMg", "annotations": [{"segment": [2.737672425897036, 68.98934513260531], "label": "Javelin throw"}]}, "MEbEcvzdytY": {"duration": 174.034, "subset": "training", "resolution": "712x480", "url": "https://www.youtube.com/watch?v=MEbEcvzdytY", "annotations": [{"segment": [100.37364649917924, 106.06633304420222], "label": "Elliptical trainer"}, {"segment": [148.8970213545413, 157.84267158351327], "label": "Elliptical trainer"}]}, "hQ4xkr6VpnA": {"duration": 99.544, "subset": "training", "resolution": "1280x640", "url": "https://www.youtube.com/watch?v=hQ4xkr6VpnA", "annotations": [{"segment": [14.675268404836194, 36.57170062792512], "label": "Braiding hair"}, {"segment": [38.279932929017164, 66.07753128315133], "label": "Braiding hair"}]}, "30y8Uy0B_uk": {"duration": 172.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=30y8Uy0B_uk", "annotations": [{"segment": [7.677259750390015, 39.19443135725429], "label": "Rock climbing"}, {"segment": [54.81832839313572, 163.64685257410298], "label": "Rock climbing"}]}, "1Cdqwg7Giv4": {"duration": 186.36, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=1Cdqwg7Giv4", "annotations": [{"segment": [16.57206023751154, 32.85338257611937], "label": "Discus throw"}, {"segment": [37.79592685748246, 59.601269275260805], "label": "Discus throw"}, {"segment": [67.1604546467573, 86.34915597440224], "label": "Discus throw"}, {"segment": [91.58243815466905, 110.48040158341028], "label": "Discus throw"}, {"segment": [119.49327644942532, 141.5893567661074], "label": "Discus throw"}, {"segment": [147.11337684527788, 176.76864253345644], "label": "Discus throw"}]}, "VViW4wfqxDA": {"duration": 46.649, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VViW4wfqxDA", "annotations": []}, "7Lkcan0X5VA": {"duration": 146.59, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7Lkcan0X5VA", "annotations": [{"segment": [10.06217354544522, 142.69991408424337], "label": "Playing saxophone"}]}, "hfBTv5b_Cok": {"duration": 192.378, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=hfBTv5b_Cok", "annotations": [{"segment": [24.271905397673088, 50.94103601980771], "label": "Playing drums"}, {"segment": [68.02126574409618, 76.71120667912771], "label": "Playing drums"}]}, "r8RJSDKy9iA": {"duration": 98.918, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=r8RJSDKy9iA", "annotations": [{"segment": [6.789936500861395, 98.91702947845805], "label": "Putting in contact lenses"}]}, "VRwI8Iydb_o": {"duration": 141.155, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VRwI8Iydb_o", "annotations": [{"segment": [48.95390840891178, 60.588694611625115], "label": "Building sandcastles"}]}, "kMRQmijCc5Y": {"duration": 108.716, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kMRQmijCc5Y", "annotations": [{"segment": [18.646973478939156, 32.54744461778471], "label": "Elliptical trainer"}, {"segment": [56.44947425897036, 108.681], "label": "Elliptical trainer"}]}, "zDPsJ3ECaTI": {"duration": 221.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zDPsJ3ECaTI", "annotations": [{"segment": [22.27363104524181, 202.53464508580342], "label": "Zumba"}]}, "AWwPqCohlrE": {"duration": 93.786, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=AWwPqCohlrE", "annotations": []}, "YJ4ynq5Adg0": {"duration": 120.814, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=YJ4ynq5Adg0", "annotations": [{"segment": [49.642280373831774, 120.721], "label": "Rafting"}]}, "qm1dudWLlUg": {"duration": 18.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qm1dudWLlUg", "annotations": [{"segment": [0.8912641738308821, 9.498181382553623], "label": "Drinking beer"}]}, "F2Igw6lvqwk": {"duration": 49.389, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=F2Igw6lvqwk", "annotations": [{"segment": [2.9250514820592826, 49.141], "label": "Putting on shoes"}]}, "wxoe1M2L4Qo": {"duration": 216.387, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wxoe1M2L4Qo", "annotations": [{"segment": [81.00879875195008, 84.72170202808113], "label": "Scuba diving"}, {"segment": [159.31730421216847, 166.06803744149767], "label": "Scuba diving"}]}, "eAOBdYKju70": {"duration": 170.92000000000002, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=eAOBdYKju70", "annotations": [{"segment": [0, 64.79571837285492], "label": "Horseback riding"}, {"segment": [91.46062305304213, 105.05972443993761], "label": "Horseback riding"}, {"segment": [137.32425910296413, 170.72203900000002], "label": "Horseback riding"}]}, "lUds16WLsHI": {"duration": 221.66, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lUds16WLsHI", "annotations": [{"segment": [99.91960842433697, 155.58416536661466], "label": "Shaving legs"}]}, "Bhz-WgJH8R0": {"duration": 42.354, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Bhz-WgJH8R0", "annotations": [{"segment": [0.2, 40.08531669266771], "label": "Skiing"}]}, "bkRQgwbP0WM": {"duration": 34.621, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bkRQgwbP0WM", "annotations": [{"segment": [0.7561518159126365, 33.270679900156004], "label": "Playing rubik cube"}]}, "zfBQjkv0dfI": {"duration": 203.18, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zfBQjkv0dfI", "annotations": []}, "bx5ZHOUG-nI": {"duration": 119.82, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bx5ZHOUG-nI", "annotations": [{"segment": [14.484755999999999, 98.14700468018721], "label": "Hand washing clothes"}]}, "e5e16U5hnzY": {"duration": 151.28, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e5e16U5hnzY", "annotations": [{"segment": [55.46073432917317, 63.720843697347895], "label": "Discus throw"}, {"segment": [99.35731554290172, 129.56571551794073], "label": "Discus throw"}]}, "4rKTw99bM8g": {"duration": 9.57, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4rKTw99bM8g", "annotations": [{"segment": [0, 9.441666], "label": "Kayaking"}]}, "kqVIHq94kgs": {"duration": 76.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kqVIHq94kgs", "annotations": [{"segment": [41.09109446567863, 62.92636218174727], "label": "Polishing forniture"}]}, "KptXzRFIvTw": {"duration": 144.475, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KptXzRFIvTw", "annotations": [{"segment": [25.49069670771346, 29.766423496871493], "label": "Decorating the Christmas tree"}, {"segment": [40.79329784785799, 45.519101141137924], "label": "Decorating the Christmas tree"}, {"segment": [63.522161306013835, 65.99758207868427], "label": "Decorating the Christmas tree"}, {"segment": [106.05438960722931, 116.18111082218257], "label": "Decorating the Christmas tree"}]}, "etPTFvpHxoY": {"duration": 75.627, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=etPTFvpHxoY", "annotations": []}, "axoyB4pypWY": {"duration": 138.902, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=axoyB4pypWY", "annotations": [{"segment": [46.1561259511605, 118.09900626365055], "label": "Disc dog"}]}, "32z1yiC0Co0": {"duration": 192.517, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=32z1yiC0Co0", "annotations": [{"segment": [34.53886748667226, 41.74697896215168], "label": "Running a marathon"}]}, "v7OluHKECRA": {"duration": 120.651, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v7OluHKECRA", "annotations": [{"segment": [0.01, 97.87587563182528], "label": "Baking cookies"}]}, "a50gTER-FsQ": {"duration": 211.488, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a50gTER-FsQ", "annotations": [{"segment": [22.435475167550702, 25.734809751014044], "label": "Curling"}, {"segment": [32.003545459594385, 35.302880043057726], "label": "Curling"}, {"segment": [56.74855483556942, 59.38802250234009], "label": "Curling"}, {"segment": [71.59556046115445, 82.81329804492981], "label": "Curling"}, {"segment": [106.89844050421218, 109.2079747126365], "label": "Curling"}, {"segment": [121.08557921310454, 128.34411529672386], "label": "Curling"}, {"segment": [147.1503224224649, 164.96672917316693], "label": "Curling"}, {"segment": [186.08247050733232, 193.34100659095165], "label": "Curling"}]}, "fGDVlbI90pw": {"duration": 10.89, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fGDVlbI90pw", "annotations": [{"segment": [0.22091887675507021, 8.78138069422777], "label": "Brushing hair"}]}, "qcYRPEEitZU": {"duration": 38.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qcYRPEEitZU", "annotations": [{"segment": [0.17785179407176285, 4.683430577223088], "label": "Shaving legs"}, {"segment": [14.228143525741029, 31.24263182527301], "label": "Shaving legs"}]}, "zpJ91CkCiyw": {"duration": 135.651, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zpJ91CkCiyw", "annotations": [{"segment": [23.115602614001205, 33.680320297543986], "label": "Rollerblading"}, {"segment": [38.54009043197367, 92.42014989811052], "label": "Rollerblading"}]}, "ztMUqZAqoNI": {"duration": 82.338, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ztMUqZAqoNI", "annotations": []}, "8Kv7piYg9yc": {"duration": 95.086, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8Kv7piYg9yc", "annotations": [{"segment": [4.649401160516472, 90.53005536612396], "label": "Rock-paper-scissors"}]}, "3lkZPJES45Q": {"duration": 122.695, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3lkZPJES45Q", "annotations": [{"segment": [19.332452800152822, 23.3520716991945], "label": "Playing drums"}, {"segment": [54.360560348944574, 113.88920062402497], "label": "Playing drums"}]}, "ILgkiTHnGVI": {"duration": 60.26, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=ILgkiTHnGVI", "annotations": [{"segment": [3.8399068135310475, 55.538183640902744], "label": "Tai chi"}]}, "wjD2BdbSb1o": {"duration": 91.487, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wjD2BdbSb1o", "annotations": [{"segment": [5.703026521060843, 63.16101872074883], "label": "Painting fence"}]}, "Adjpq4A5WtI": {"duration": 103.17, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Adjpq4A5WtI", "annotations": [{"segment": [0.9656747925117005, 99.46450507816232], "label": "Playing piano"}]}, "KKrHX-pywBQ": {"duration": 193.446, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KKrHX-pywBQ", "annotations": [{"segment": [0.2, 16.59446957878315], "label": "Longboarding"}, {"segment": [19.309928237129487, 21.120234009360377], "label": "Longboarding"}, {"segment": [23.83569266770671, 27.45630421216849], "label": "Longboarding"}, {"segment": [29.266609984399377, 36.20611544461779], "label": "Longboarding"}, {"segment": [38.318138845553825, 63.66241965678628], "label": "Longboarding"}, {"segment": [65.77444305772231, 177.7300608424337], "label": "Longboarding"}, {"segment": [184.06678627145084, 193.221], "label": "Longboarding"}]}, "y-HkIwa-jWM": {"duration": 41.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y-HkIwa-jWM", "annotations": [{"segment": [1.172372209048362, 36.40867026989079], "label": "Playing kickball"}]}, "GuzLtwhxsxA": {"duration": 112.362, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GuzLtwhxsxA", "annotations": [{"segment": [12.791627145085803, 91.30992355694228], "label": "Canoeing"}]}, "72F30o9V8v0": {"duration": 203.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=72F30o9V8v0", "annotations": [{"segment": [41.5200112, 178.08438560000002], "label": "Horseback riding"}]}, "M1_4oDuh2as": {"duration": 59.86, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=M1_4oDuh2as", "annotations": [{"segment": [18.21045772443143, 51.64299036723374], "label": "Cleaning windows"}]}, "QIUxLD27NuI": {"duration": 83.639, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QIUxLD27NuI", "annotations": []}, "Zv3fY847yN0": {"duration": 156.92000000000002, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zv3fY847yN0", "annotations": []}, "yPsEMk0KMp4": {"duration": 24.66, "subset": "testing", "resolution": "854x474", "url": "https://www.youtube.com/watch?v=yPsEMk0KMp4", "annotations": []}, "gtd2Ye-raxE": {"duration": 68.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gtd2Ye-raxE", "annotations": [{"segment": [12.85188388, 60.37880926], "label": "Using the pommel horse"}]}, "b5E7-GxvCdk": {"duration": 19.087, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=b5E7-GxvCdk", "annotations": [{"segment": [0.01, 17.47886603151326], "label": "Cutting the grass"}]}, "xFqGY-y4OUg": {"duration": 186.711, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xFqGY-y4OUg", "annotations": [{"segment": [21.549007800312012, 186.661], "label": "Playing pool"}]}, "f4Nl8x0F5og": {"duration": 21.177, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f4Nl8x0F5og", "annotations": []}, "0d7bz-Se1mU": {"duration": 109.738, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0d7bz-Se1mU", "annotations": []}, "PplBDkLlpEw": {"duration": 16.53, "subset": "testing", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=PplBDkLlpEw", "annotations": []}, "uDpLB-JDjEA": {"duration": 99.15, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uDpLB-JDjEA", "annotations": [{"segment": [55.8387685765934, 78.73111549921998], "label": "Calf roping"}]}, "lhnhGI-rrzM": {"duration": 180.768, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=lhnhGI-rrzM", "annotations": [{"segment": [0.20000000000000004, 80.93639402718966], "label": "Longboarding"}, {"segment": [98.4209112037951, 100.11296125314401], "label": "Longboarding"}]}, "HNIs8lFvKkA": {"duration": 221.07999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HNIs8lFvKkA", "annotations": [{"segment": [0.01, 221.07718820861678], "label": "Ping-pong"}]}, "A6DBt7UgWEg": {"duration": 208.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A6DBt7UgWEg", "annotations": [{"segment": [23.255124024961, 65.21192121684868], "label": "Playing badminton"}, {"segment": [119.20283853354134, 128.63498673946958], "label": "Playing badminton"}]}, "9lX18LyTuAQ": {"duration": 205.31, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9lX18LyTuAQ", "annotations": [{"segment": [104.6156331868751, 143.6140163644452], "label": "Playing violin"}, {"segment": [162.79354907472558, 177.17819860743586], "label": "Playing violin"}]}, "1oMFr9eRKQE": {"duration": 77.857, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1oMFr9eRKQE", "annotations": []}, "NjlskpV3WuI": {"duration": 77.39, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NjlskpV3WuI", "annotations": []}, "ihMn28mft2s": {"duration": 126.11, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=ihMn28mft2s", "annotations": [{"segment": [52.303749553968345, 53.678754233096065], "label": "Volleyball"}, {"segment": [70.76809810225495, 73.91096594026118], "label": "Volleyball"}, {"segment": [86.28600805241071, 125.96471450723938], "label": "Volleyball"}]}, "H-S8LvNLvdk": {"duration": 36.92, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H-S8LvNLvdk", "annotations": []}, "HPrkxpOoep0": {"duration": 233.338, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HPrkxpOoep0", "annotations": [{"segment": [12.732191887675508, 22.55416848673947], "label": "Playing ten pins"}, {"segment": [30.55726053042122, 63.66095943837754], "label": "Playing ten pins"}, {"segment": [68.75383619344774, 94.58199687987519], "label": "Playing ten pins"}, {"segment": [99.6748736349454, 116.04483463338534], "label": "Playing ten pins"}, {"segment": [121.50148829953199, 126.23058814352574], "label": "Playing ten pins"}, {"segment": [130.97092043681747, 184.08690483619344], "label": "Playing ten pins"}, {"segment": [189.54402652106086, 196.45638065522618], "label": "Playing ten pins"}, {"segment": [202.64111856474258, 207.0068159126365], "label": "Playing ten pins"}, {"segment": [212.1001294851794, 224.46960530421217], "label": "Playing ten pins"}]}, "QvM8SFUUvsw": {"duration": 28.282, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QvM8SFUUvsw", "annotations": [{"segment": [1.897187973478939, 9.662422469578784], "label": "Playing rubik cube"}, {"segment": [15.486348341653668, 22.898617633385335], "label": "Playing rubik cube"}]}, "qRFZMO_mTGU": {"duration": 114.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qRFZMO_mTGU", "annotations": [{"segment": [38.88151776, 43.42158687708052], "label": "Playing field hockey"}, {"segment": [27.6644749381812, 48.25615531287918], "label": "Playing field hockey"}, {"segment": [66.87819669521475, 74.2195783940201], "label": "Playing field hockey"}]}, "exzuzQLrv7g": {"duration": 196.395, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=exzuzQLrv7g", "annotations": [{"segment": [127.11624804992199, 149.79801716068644], "label": "Doing fencing"}]}, "3HBAcaU552I": {"duration": 123.925, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=3HBAcaU552I", "annotations": [{"segment": [0, 21.0730325702824], "label": "Gargling mouthwash"}, {"segment": [24.3596523289503, 28.612924957814638], "label": "Gargling mouthwash"}, {"segment": [66.89237861759368, 76.55890731955809], "label": "Gargling mouthwash"}, {"segment": [78.87887420802954, 93.18533596567862], "label": "Gargling mouthwash"}]}, "ELlRh3gjpeE": {"duration": 160.543, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ELlRh3gjpeE", "annotations": [{"segment": [35.063942717628706, 160.5427663], "label": "Futsal"}]}, "wOwWidUOaxc": {"duration": 73.282, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=wOwWidUOaxc", "annotations": [{"segment": [16.75346637916442, 64.1646398239545], "label": "Sumo"}]}, "YrIEvBXoaIA": {"duration": 33.135, "subset": "testing", "resolution": "376x240", "url": "https://www.youtube.com/watch?v=YrIEvBXoaIA", "annotations": []}, "29UfCoftDkA": {"duration": 47.18, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=29UfCoftDkA", "annotations": [{"segment": [1.8389625585023404, 46.26829797191888], "label": "Using parallel bars"}]}, "BurbdCh--Lo": {"duration": 101.704, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BurbdCh--Lo", "annotations": []}, "EGrXaq213Oc": {"duration": 126.387, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EGrXaq213Oc", "annotations": [{"segment": [14.386010920436817, 49.07009204368175], "label": "Snowboarding"}, {"segment": [54.39094539781591, 75.48924024960999], "label": "Snowboarding"}, {"segment": [81.20513572542902, 83.7674336973479], "label": "Snowboarding"}, {"segment": [92.24272698907956, 99.14122152886115], "label": "Snowboarding"}]}, "kMYZWwZXx3E": {"duration": 60.309, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kMYZWwZXx3E", "annotations": [{"segment": [15.527246489859595, 21.173517940717627], "label": "Painting fence"}, {"segment": [29.92523868954758, 32.183747269890794], "label": "Painting fence"}]}, "rFx-j3NhSgI": {"duration": 69.916, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rFx-j3NhSgI", "annotations": [{"segment": [11.119909516380655, 63.77595163806553], "label": "Baton twirling"}]}, "BC1ZIeZvFXw": {"duration": 117.447, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BC1ZIeZvFXw", "annotations": [{"segment": [22.74872987404939, 106.68495884601201], "label": "Longboarding"}]}, "uqaiE1bcAXY": {"duration": 96.85, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uqaiE1bcAXY", "annotations": []}, "XHSNzRWXROM": {"duration": 35.968, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XHSNzRWXROM", "annotations": []}, "GxSh-XQhIjU": {"duration": 233.82999999999998, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GxSh-XQhIjU", "annotations": [{"segment": [35.38380577399967, 211.93805314117327], "label": "Playing lacrosse"}]}, "dfex2oZYqmU": {"duration": 80.643, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=dfex2oZYqmU", "annotations": [{"segment": [0.19999999999999998, 80.44290249433107], "label": "Futsal"}]}, "my9Z4bun_Dk": {"duration": 124.181, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=my9Z4bun_Dk", "annotations": [{"segment": [80.7594336973479, 116.8004414976599], "label": "BMX"}]}, "xdoV6-DzsCE": {"duration": 57.632, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=xdoV6-DzsCE", "annotations": []}, "xaCOYdzox0g": {"duration": 229.74, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xaCOYdzox0g", "annotations": [{"segment": [31.18123075127087, 219.34383011238816], "label": "Preparing pasta"}]}, "ikHGZpkhfNc": {"duration": 213.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ikHGZpkhfNc", "annotations": [{"segment": [21.268753966485193, 73.44366604051918], "label": "Longboarding"}, {"segment": [84.4103673044881, 199.39456843579865], "label": "Longboarding"}]}, "3F8ZTg8dvh8": {"duration": 60.05, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3F8ZTg8dvh8", "annotations": []}, "gSH5ya0pfko": {"duration": 96.967, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gSH5ya0pfko", "annotations": [{"segment": [14.371014677321787, 82.14169305304212], "label": "Croquet"}]}, "v1FHFQ1UZHw": {"duration": 164.351, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v1FHFQ1UZHw", "annotations": [{"segment": [90.73785488256395, 134.95658506408978], "label": "Hanging wallpaper"}]}, "5nu47s7JrfY": {"duration": 126.55, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5nu47s7JrfY", "annotations": []}, "6iA4RXGAR_k": {"duration": 64.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6iA4RXGAR_k", "annotations": [{"segment": [1.2246627964833579, 63.59174612981669], "label": "Tango"}]}, "dJVEAuAXON0": {"duration": 129.22, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dJVEAuAXON0", "annotations": []}, "JDg7jk6v3OQ": {"duration": 54.5, "subset": "testing", "resolution": "810x1440", "url": "https://www.youtube.com/watch?v=JDg7jk6v3OQ", "annotations": []}, "wvFJbY3SmXI": {"duration": 61.19, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wvFJbY3SmXI", "annotations": [{"segment": [0, 59.13787636973479], "label": "Discus throw"}]}, "Y5uVICaJU-0": {"duration": 6.04, "subset": "validation", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=Y5uVICaJU-0", "annotations": [{"segment": [0.01, 5.706099063962559], "label": "Discus throw"}]}, "x_FAc0KqMVw": {"duration": 119.37, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x_FAc0KqMVw", "annotations": [{"segment": [71.6790717628705, 114.31415600624025], "label": "Playing guitarra"}]}, "YU0DjX7wvRA": {"duration": 114.893, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YU0DjX7wvRA", "annotations": [{"segment": [0.01, 114.892334], "label": "Clipping cat claws"}]}, "xfyoEDb6kVg": {"duration": 130.89, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=xfyoEDb6kVg", "annotations": [{"segment": [3.4713651076655307, 125.58173771848833], "label": "Playing racquetball"}]}, "UO9bRJmfMzI": {"duration": 215.644, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UO9bRJmfMzI", "annotations": [{"segment": [15.475212271075877, 80.06740261991432], "label": "Removing ice from car"}, {"segment": [92.17843831032152, 203.53268313045447], "label": "Removing ice from car"}]}, "clUgEizjVkM": {"duration": 23.174, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=clUgEizjVkM", "annotations": [{"segment": [0.2, 7.653497659906395], "label": "Playing ice hockey"}, {"segment": [10.505508580343214, 14.260054602184086], "label": "Playing ice hockey"}]}, "TiQcZKml5Bs": {"duration": 78.0, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TiQcZKml5Bs", "annotations": []}, "cz20Wqx9d4Q": {"duration": 149.792, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cz20Wqx9d4Q", "annotations": [{"segment": [9.113666691107644, 31.07994025429017], "label": "Arm wrestling"}, {"segment": [66.83355573478939, 85.76194040093604], "label": "Arm wrestling"}, {"segment": [124.31976101716069, 139.976573024961], "label": "Arm wrestling"}]}, "95L-8JtlwRU": {"duration": 126.39, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=95L-8JtlwRU", "annotations": [{"segment": [20.301283931357254, 105.44841653666147], "label": "Cleaning shoes"}]}, "hJn5L1nrkL4": {"duration": 70.73, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hJn5L1nrkL4", "annotations": [{"segment": [0, 15.70760515600624], "label": "Ping-pong"}]}, "aFpWNTroghs": {"duration": 93.159, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aFpWNTroghs", "annotations": [{"segment": [21.243564677726784, 80.59204734597581], "label": "Spread mulch"}]}, "xTk7LrjyKPs": {"duration": 49.087, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xTk7LrjyKPs", "annotations": []}, "xAI1y9izXOM": {"duration": 40.96, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xAI1y9izXOM", "annotations": [{"segment": [8.558648985959438, 38.577790951638065], "label": "Arm wrestling"}]}, "hhN1647pP88": {"duration": 195.118, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hhN1647pP88", "annotations": [{"segment": [11.869202808112325, 195.081], "label": "Plastering"}]}, "tjuYS-6UC_M": {"duration": 167.463, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=tjuYS-6UC_M", "annotations": [{"segment": [132.80833238138217, 136.19847724119523], "label": "Hitting a pinata"}]}, "WY9zvUkK_4Y": {"duration": 203.593, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=WY9zvUkK_4Y", "annotations": [{"segment": [20.962728729557345, 188.66455823400935], "label": "Braiding hair"}]}, "CUtkQIV2eto": {"duration": 206.843, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CUtkQIV2eto", "annotations": [{"segment": [15.46026168224299, 200.6807211838006], "label": "Waterskiing"}]}, "teIE_kXbMiw": {"duration": 107.927, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=teIE_kXbMiw", "annotations": [{"segment": [35.06475585023401, 73.08615366614664], "label": "Sharpening knives"}]}, "ZoKZTceDTLA": {"duration": 165.001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZoKZTceDTLA", "annotations": [{"segment": [29.141504130437735, 150.6929262488178], "label": "Laying tile"}]}, "Sn8IRyUFNac": {"duration": 37.2, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=Sn8IRyUFNac", "annotations": [{"segment": [0.01, 4.874606489859595], "label": "Smoking a cigarette"}, {"segment": [7.253878705148206, 12.360609313572544], "label": "Smoking a cigarette"}]}, "QJmoA3byOzo": {"duration": 189.499, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QJmoA3byOzo", "annotations": [{"segment": [10.346956630265211, 26.90208723868955], "label": "Hand car wash"}, {"segment": [52.91729248049922, 60.6036031201248], "label": "Hand car wash"}, {"segment": [74.49808773790951, 188.01898333853353], "label": "Hand car wash"}]}, "35SpLMcN-m8": {"duration": 49.37, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=35SpLMcN-m8", "annotations": [{"segment": [5.198408099589289, 41.00966258190328], "label": "Playing kickball"}]}, "0PS48XWOsKA": {"duration": 65.09, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=0PS48XWOsKA", "annotations": [{"segment": [31.612633385335414, 40.74629953198128], "label": "Cleaning windows"}]}, "vska7f8Zm9s": {"duration": 57.66, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vska7f8Zm9s", "annotations": []}, "UD4RnQp5tPY": {"duration": 220.38, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=UD4RnQp5tPY", "annotations": [{"segment": [57.05980230577224, 136.1185645366615], "label": "Shaving legs"}]}, "HpjomKhpIdk": {"duration": 63.484, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HpjomKhpIdk", "annotations": [{"segment": [0.01, 62.03965268515252], "label": "Roof shingle removal"}]}, "mOVPZhGyUrs": {"duration": 67.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mOVPZhGyUrs", "annotations": [{"segment": [3.806583444617785, 47.15933934165367], "label": "Ping-pong"}, {"segment": [47.851769839999996, 63.654534268330735], "label": "Ping-pong"}]}, "2F5aJzMxPSM": {"duration": 83.616, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2F5aJzMxPSM", "annotations": []}, "x7QFsngAsJ0": {"duration": 99.126, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x7QFsngAsJ0", "annotations": []}, "Ecp2ZEu882E": {"duration": 25.75, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ecp2ZEu882E", "annotations": []}, "0AbJgWxIYVI": {"duration": 202.07999999999998, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0AbJgWxIYVI", "annotations": [{"segment": [81.37074078, 144.70961547], "label": "Skateboarding"}]}, "Osw2GBrUR1Q": {"duration": 54.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Osw2GBrUR1Q", "annotations": []}, "evkiciK1nRc": {"duration": 58.82, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=evkiciK1nRc", "annotations": [{"segment": [0.01, 58.781], "label": "Playing squash"}]}, "vOpFUVofoM4": {"duration": 70.01, "subset": "training", "resolution": "606x480", "url": "https://www.youtube.com/watch?v=vOpFUVofoM4", "annotations": [{"segment": [19.323926677067078, 69.981], "label": "Drinking coffee"}]}, "2Ci0ek9BNvg": {"duration": 187.69, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2Ci0ek9BNvg", "annotations": []}, "zSZc7XDBUGE": {"duration": 194.56, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zSZc7XDBUGE", "annotations": []}, "-wFNUGYf298": {"duration": 224.1, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-wFNUGYf298", "annotations": [{"segment": [16.776861154446177, 224.061], "label": "Doing kickboxing"}]}, "EQrcCmgraa8": {"duration": 35.2, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EQrcCmgraa8", "annotations": []}, "Jz7bt59z6Qg": {"duration": 22.52, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Jz7bt59z6Qg", "annotations": [{"segment": [4.3598295309880815, 18.04210354967967], "label": "Horseback riding"}]}, "xeOHoiH-dmo": {"duration": 186.364, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xeOHoiH-dmo", "annotations": [{"segment": [104.66584736858862, 115.71390903527296], "label": "Fixing the roof"}]}, "5T_P4x0q0VM": {"duration": 22.9, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5T_P4x0q0VM", "annotations": [{"segment": [1.2655046092043682, 21.549226373634948], "label": "Polishing shoes"}]}, "HtZ0bvriAvQ": {"duration": 64.44, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HtZ0bvriAvQ", "annotations": [{"segment": [11.067701492580413, 21.505830452081973], "label": "Using the balance beam"}, {"segment": [33.34909215613182, 41.579155374200354], "label": "Using the balance beam"}, {"segment": [50.61215158915362, 59.64514780410689], "label": "Using the balance beam"}]}, "9H5QZzyI3iI": {"duration": 94.62, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9H5QZzyI3iI", "annotations": []}, "3xmgPNrmUYM": {"duration": 56.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3xmgPNrmUYM", "annotations": [{"segment": [10.298931198064249, 42.3400504809308], "label": "Snatch"}]}, "BnO8AfQS8yE": {"duration": 39.15, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BnO8AfQS8yE", "annotations": []}, "PJ7HOHdOdy4": {"duration": 56.61, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PJ7HOHdOdy4", "annotations": []}, "eaFvnf8IF-I": {"duration": 66.78, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eaFvnf8IF-I", "annotations": [{"segment": [0.01, 66.780589569161], "label": "Playing accordion"}]}, "Vtsv9iPHDqg": {"duration": 184.41, "subset": "validation", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=Vtsv9iPHDqg", "annotations": [{"segment": [75.00404864000001, 88.55095416], "label": "Long jump"}, {"segment": [105.00603691999999, 148.0194033767551], "label": "Long jump"}]}, "hGziyfXmotc": {"duration": 227.3, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hGziyfXmotc", "annotations": [{"segment": [26.24057732921562, 208.50620808112325], "label": "Playing violin"}]}, "_ekWWP0dQZM": {"duration": 145.87, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=_ekWWP0dQZM", "annotations": [{"segment": [32.76895666847082, 52.79443018809188], "label": "Ballet"}, {"segment": [58.02836076708375, 84.65313806021629], "label": "Ballet"}, {"segment": [109.68497987363493, 112.18816406162246], "label": "Ballet"}, {"segment": [116.51184584087363, 130.62070217316693], "label": "Ballet"}]}, "YAbfhT3fhac": {"duration": 75.07, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YAbfhT3fhac", "annotations": [{"segment": [4.503278691107644, 30.236299783151324], "label": "Cheerleading"}, {"segment": [33.74534811388455, 74.77666599999999], "label": "Cheerleading"}]}, "NojaHuKBagQ": {"duration": 225.91, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NojaHuKBagQ", "annotations": []}, "9nh_6Hzg_aw": {"duration": 192.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9nh_6Hzg_aw", "annotations": [{"segment": [94.79704152000001, 184.3510628049922], "label": "Cleaning windows"}, {"segment": [20.88026122, 69.73455788], "label": "Cleaning windows"}]}, "Lb7Q8qv3MHU": {"duration": 117.72999999999999, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Lb7Q8qv3MHU", "annotations": []}, "325CAs4hhq8": {"duration": 199.0, "subset": "testing", "resolution": "640x358", "url": "https://www.youtube.com/watch?v=325CAs4hhq8", "annotations": []}, "d8MgSp3oCoA": {"duration": 69.939, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=d8MgSp3oCoA", "annotations": [{"segment": [39.3473104524181, 55.914599063962555], "label": "Sumo"}]}, "mnS-nMZne3I": {"duration": 9.22, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mnS-nMZne3I", "annotations": [{"segment": [0.06422386661466459, 9.148333], "label": "Springboard diving"}]}, "mUxpNrgAhKk": {"duration": 42.238, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mUxpNrgAhKk", "annotations": []}, "x4qwV0Ti1Og": {"duration": 57.749, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x4qwV0Ti1Og", "annotations": [{"segment": [0, 11.981940951638064], "label": "Snow tubing"}]}, "AXDa8WmY2og": {"duration": 228.879, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AXDa8WmY2og", "annotations": [{"segment": [6.070116350232241, 214.59646626409275], "label": "Grooming dog"}]}, "5LveCNjz_zg": {"duration": 161.519, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=5LveCNjz_zg", "annotations": [{"segment": [60.957195007800316, 158.96179563182528], "label": "Clipping cat claws"}]}, "F-jZv4hqYds": {"duration": 37.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F-jZv4hqYds", "annotations": [{"segment": [14.43033697347894, 34.78532449297972], "label": "Calf roping"}]}, "e4AIrRnWakA": {"duration": 203.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=e4AIrRnWakA", "annotations": [{"segment": [15.349531201248048, 18.197897815912636], "label": "Smoking a cigarette"}, {"segment": [97.3191926677067, 106.49726287051482], "label": "Smoking a cigarette"}, {"segment": [127.38528471138846, 132.44904758190327], "label": "Smoking a cigarette"}, {"segment": [180.2383096723869, 185.61855772230888], "label": "Smoking a cigarette"}]}, "8PfDevDmhv4": {"duration": 109.924, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8PfDevDmhv4", "annotations": []}, "loedAFEIVWE": {"duration": 32.625, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=loedAFEIVWE", "annotations": []}, "ObyxzCOV7BU": {"duration": 61.882, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ObyxzCOV7BU", "annotations": []}, "79LyHiS908U": {"duration": 87.934, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=79LyHiS908U", "annotations": [{"segment": [0.01, 79.3988751950078], "label": "Playing congas"}]}, "Ls2QBv49HJo": {"duration": 237.378, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ls2QBv49HJo", "annotations": []}, "p4YTDxTASBI": {"duration": 33.762, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=p4YTDxTASBI", "annotations": [{"segment": [0.01, 33.761814058956915], "label": "Canoeing"}]}, "2uMG44uNDJ8": {"duration": 7.082, "subset": "training", "resolution": "360x360", "url": "https://www.youtube.com/watch?v=2uMG44uNDJ8", "annotations": [{"segment": [2.1260109204368174, 3.1504617784711386], "label": "Hand car wash"}]}, "TQepyWudKYQ": {"duration": 31.7, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TQepyWudKYQ", "annotations": [{"segment": [9.0487182074883, 28.38889455], "label": "Cricket"}]}, "Ttx3kt2fW1o": {"duration": 39.24, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ttx3kt2fW1o", "annotations": [{"segment": [0.57605788, 39.241722], "label": "Spinning"}]}, "SMhW9V-cJsQ": {"duration": 27.493, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SMhW9V-cJsQ", "annotations": [{"segment": [0.01, 25.61884243369735], "label": "Welding"}]}, "1oM26-0yFcM": {"duration": 176.19, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1oM26-0yFcM", "annotations": [{"segment": [10.981799251170047, 31.791509600000005], "label": "Discus throw"}, {"segment": [41.035949120000005, 171.31606831825272], "label": "Discus throw"}]}, "gHSa0An1RO8": {"duration": 195.025, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gHSa0An1RO8", "annotations": []}, "-Tus9Ryuc-k": {"duration": 185.946, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-Tus9Ryuc-k", "annotations": []}, "Hy54nZSDxTk": {"duration": 216.27, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Hy54nZSDxTk", "annotations": []}, "nr34x_-K5c0": {"duration": 154.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nr34x_-K5c0", "annotations": [{"segment": [4.581105061889551, 152.62313179874135], "label": "Kayaking"}]}, "8M_RipfsJI8": {"duration": 63.0, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=8M_RipfsJI8", "annotations": [{"segment": [0, 59.09753014022003], "label": "Hand washing clothes"}]}, "aGqPaW5U7g8": {"duration": 109.668, "subset": "testing", "resolution": "425x236", "url": "https://www.youtube.com/watch?v=aGqPaW5U7g8", "annotations": []}, "en4Boe2rAuo": {"duration": 45.047, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=en4Boe2rAuo", "annotations": []}, "LLLuBjEVHI8": {"duration": 45.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LLLuBjEVHI8", "annotations": [{"segment": [3.2343476224648984, 4.7812095288611545], "label": "Layup drill in basketball"}, {"segment": [8.296804770670827, 9.703042867394695], "label": "Layup drill in basketball"}, {"segment": [12.585830965678626, 13.499885728549142], "label": "Layup drill in basketball"}, {"segment": [17.78891192355694, 19.687333354134164], "label": "Layup drill in basketball"}, {"segment": [22.921680976599063, 24.749790502340094], "label": "Layup drill in basketball"}, {"segment": [27.843514315132605, 29.531000031201245], "label": "Layup drill in basketball"}, {"segment": [33.2575309875195, 34.804392893915754], "label": "Layup drill in basketball"}, {"segment": [37.61686908736349, 38.81217146957878], "label": "Layup drill in basketball"}, {"segment": [42.398078616224645, 44.08556433229329], "label": "Layup drill in basketball"}]}, "HowkVy5fOyk": {"duration": 105.164, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=HowkVy5fOyk", "annotations": []}, "60tIsGR0Zzg": {"duration": 102.66, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=60tIsGR0Zzg", "annotations": []}, "1GXQLnTalcg": {"duration": 110.03999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1GXQLnTalcg", "annotations": [{"segment": [8.583413812742986, 13.561793824133918], "label": "Brushing teeth"}, {"segment": [35.36366490850111, 61.97224772800436], "label": "Brushing teeth"}, {"segment": [69.35398360696333, 86.00580640368472], "label": "Brushing teeth"}]}, "yNmV_c3rlLA": {"duration": 77.323, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yNmV_c3rlLA", "annotations": [{"segment": [3.6188353656595247, 77.08119328854787], "label": "Playing congas"}]}, "9vdSDvLUcjc": {"duration": 29.47, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9vdSDvLUcjc", "annotations": []}, "qXi05KUFOSk": {"duration": 82.524, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qXi05KUFOSk", "annotations": [{"segment": [0.01, 82.481], "label": "Installing carpet"}]}, "j89l589KFrg": {"duration": 182.393, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j89l589KFrg", "annotations": [{"segment": [31.364737592382042, 51.53590007680572], "label": "Playing blackjack"}, {"segment": [67.16144848023251, 96.13973824658765], "label": "Playing blackjack"}, {"segment": [110.62888312976521, 137.05026497555963], "label": "Playing blackjack"}, {"segment": [148.69840105811414, 153.244015139111], "label": "Playing blackjack"}, {"segment": [164.89215122166553, 174.5515811437839], "label": "Playing blackjack"}]}, "iL__89eDRzg": {"duration": 89.397, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iL__89eDRzg", "annotations": [{"segment": [8.538712673258559, 87.66782326515887], "label": "Waterskiing"}]}, "pZzTMUJG9Rk": {"duration": 214.205, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pZzTMUJG9Rk", "annotations": [{"segment": [5.012576013244612, 36.090547295361205], "label": "Futsal"}, {"segment": [38.09557770065905, 63.15845776688211], "label": "Futsal"}, {"segment": [68.50520551434303, 103.5932376070553], "label": "Futsal"}, {"segment": [106.93495494921838, 136.00789582603713], "label": "Futsal"}, {"segment": [140.6863001050654, 175.44015912636507], "label": "Futsal"}, {"segment": [183.1261089547582, 214.20408], "label": "Futsal"}]}, "NH2TVi42xDE": {"duration": 129.196, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=NH2TVi42xDE", "annotations": [{"segment": [36.76957152599247, 49.227023027069826], "label": "Grooming dog"}, {"segment": [82.5808447880189, 115.7337384208836], "label": "Grooming dog"}]}, "nKn2uQTVo-U": {"duration": 223.01, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nKn2uQTVo-U", "annotations": [{"segment": [0, 222.961], "label": "Breakdancing"}]}, "h4m7bYDVLAc": {"duration": 24.916, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=h4m7bYDVLAc", "annotations": [{"segment": [0.01, 23.049300347893915], "label": "Hitting a pinata"}]}, "KEUJ7ulD-_A": {"duration": 230.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KEUJ7ulD-_A", "annotations": [{"segment": [143.6376976167482, 230.179410430839], "label": "BMX"}]}, "xfThpRE6bLc": {"duration": 207.564, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xfThpRE6bLc", "annotations": [{"segment": [15.842049151955694, 189.78128269791824], "label": "Surfing"}]}, "Mk9n7isIyBs": {"duration": 136.325, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mk9n7isIyBs", "annotations": [{"segment": [7.441240249609985, 135.23780967238687], "label": "Baton twirling"}]}, "3xEonDbWxTM": {"duration": 108.46, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=3xEonDbWxTM", "annotations": [{"segment": [31.2883775351014, 96.2346770670827], "label": "Hanging wallpaper"}]}, "loYCYGQpGRw": {"duration": 168.82999999999998, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=loYCYGQpGRw", "annotations": [{"segment": [0.01, 129.5868890920437], "label": "Preparing pasta"}]}, "WBA-ftw3wkQ": {"duration": 208.32999999999998, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WBA-ftw3wkQ", "annotations": []}, "dgts0oITT28": {"duration": 77.69, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dgts0oITT28", "annotations": []}, "wZgSzWl5Hgg": {"duration": 103.793, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wZgSzWl5Hgg", "annotations": [{"segment": [52.59879095163806, 93.22124180967239], "label": "Tug of war"}]}, "O_FO09g9WDM": {"duration": 111.503, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=O_FO09g9WDM", "annotations": []}, "FL9yhHtoc6c": {"duration": 220.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FL9yhHtoc6c", "annotations": [{"segment": [15.48926581553058, 211.34242690523948], "label": "Zumba"}]}, "V1NtrnPz3sA": {"duration": 79.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V1NtrnPz3sA", "annotations": [{"segment": [0.01, 79.13360544217687], "label": "Shot put"}]}, "HzyTD2uZ8jI": {"duration": 149.23, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HzyTD2uZ8jI", "annotations": [{"segment": [24.2729101, 145.2767858471139], "label": "Triple jump"}]}, "r5oPTDuHhoE": {"duration": 41.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r5oPTDuHhoE", "annotations": [{"segment": [22.580192039999996, 36.433269890795636], "label": "Playing field hockey"}, {"segment": [5.535797191887675, 21.62823088923557], "label": "Playing field hockey"}]}, "AmWcQz_KJG4": {"duration": 95.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AmWcQz_KJG4", "annotations": [{"segment": [1.1869994758190328, 93.77295858970359], "label": "Using the balance beam"}]}, "KwY8nf4MqR4": {"duration": 33.93, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=KwY8nf4MqR4", "annotations": [{"segment": [6.67149384, 32.665033479719185], "label": "Tennis serve with ball bouncing"}]}, "Q43BuVI2r-s": {"duration": 83.917, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Q43BuVI2r-s", "annotations": [{"segment": [6.195735045244628, 82.00852429758108], "label": "Installing carpet"}]}, "EI_6eT-0-X4": {"duration": 103.05, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EI_6eT-0-X4", "annotations": [{"segment": [64.9071726478684, 103.050157], "label": "Rope skipping"}]}, "S2MHi_oC3Lg": {"duration": 217.29, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=S2MHi_oC3Lg", "annotations": [{"segment": [32.54293037129485, 202.37634824648984], "label": "Doing nails"}]}, "CYj2ltxPNDo": {"duration": 190.1, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=CYj2ltxPNDo", "annotations": [{"segment": [40.78807056, 139.9813323307332], "label": "Using parallel bars"}]}, "NO5J7RjTTTo": {"duration": 43.561, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=NO5J7RjTTTo", "annotations": [{"segment": [0.2, 41.871294851794076], "label": "Table soccer"}]}, "modITAJlP_0": {"duration": 69.962, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=modITAJlP_0", "annotations": [{"segment": [4.147457876546355, 27.176762770670823], "label": "Elliptical trainer"}, {"segment": [32.41565679875195, 37.76369445241809], "label": "Elliptical trainer"}, {"segment": [48.35062613416536, 61.447861204368174], "label": "Elliptical trainer"}]}, "H8K1E49607E": {"duration": 97.385, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H8K1E49607E", "annotations": [{"segment": [0.01, 94.95369734789392], "label": "Playing drums"}]}, "AZv1pI1YESM": {"duration": 61.7, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AZv1pI1YESM", "annotations": []}, "3DCGDf3lBy4": {"duration": 162.354, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3DCGDf3lBy4", "annotations": []}, "LYShFLEK1xs": {"duration": 106.836, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LYShFLEK1xs", "annotations": []}, "mE02JHvCEUM": {"duration": 198.55, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mE02JHvCEUM", "annotations": [{"segment": [0.01, 190.96425594574802], "label": "Playing piano"}]}, "gM7JdDs5f4o": {"duration": 218.152, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gM7JdDs5f4o", "annotations": [{"segment": [23.817581903276132, 211.31535257410297], "label": "Drum corps"}]}, "FeXBmKYV8HY": {"duration": 160.8, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=FeXBmKYV8HY", "annotations": [{"segment": [31.60044617784711, 47.65146645865835], "label": "Playing violin"}, {"segment": [69.47082215288611, 72.22959126365055], "label": "Playing violin"}, {"segment": [88.28061154446178, 98.56329641185648], "label": "Playing violin"}, {"segment": [111.6047503900156, 129.41135101404055], "label": "Playing violin"}, {"segment": [140.44642745709828, 146.7163572542902], "label": "Playing violin"}]}, "s-Rg2hr7DRA": {"duration": 72.888, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=s-Rg2hr7DRA", "annotations": []}, "tRatWgaZ-a0": {"duration": 22.15, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=tRatWgaZ-a0", "annotations": [{"segment": [0.10367473478939157, 22.1172767550702], "label": "Playing water polo"}]}, "gCDpUPvD3s4": {"duration": 51.57, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gCDpUPvD3s4", "annotations": [{"segment": [16.97595990327613, 39.34239048673947], "label": "Playing polo"}]}, "75xhANnCOEg": {"duration": 137.97, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=75xhANnCOEg", "annotations": [{"segment": [38.95960627815912, 132.59180942475794], "label": "Doing karate"}]}, "BTUgRe4aSsg": {"duration": 27.72, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BTUgRe4aSsg", "annotations": [{"segment": [8.736933307332293, 15.311259360374413], "label": "Hammer throw"}]}, "mucFmOzqWW8": {"duration": 104.787, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mucFmOzqWW8", "annotations": [{"segment": [42.5008736349454, 48.38560998439938], "label": "Building sandcastles"}]}, "qjacthwabek": {"duration": 11.378, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qjacthwabek", "annotations": [{"segment": [0.08830733229329173, 10.474059282371295], "label": "Kneeling"}]}, "p3vw2UJvLZE": {"duration": 38.058, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p3vw2UJvLZE", "annotations": [{"segment": [0.2, 26.45454914196568], "label": "Playing blackjack"}]}, "vNyXwHA_k5A": {"duration": 148.352, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vNyXwHA_k5A", "annotations": [{"segment": [60.56696099843994, 81.14123400936039], "label": "Playing drums"}]}, "UeH0TCDAbSU": {"duration": 139.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UeH0TCDAbSU", "annotations": [{"segment": [50.794062402496095, 122.20964586583463], "label": "Ironing clothes"}]}, "XxfatT0sWXw": {"duration": 150.303, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XxfatT0sWXw", "annotations": [{"segment": [0.01, 148.73605549736857], "label": "Baton twirling"}]}, "4XavNhCs-Do": {"duration": 134.142, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4XavNhCs-Do", "annotations": [{"segment": [25.13019555698383, 29.72695087801151], "label": "Slacklining"}, {"segment": [53.33755775419917, 85.51484500139296], "label": "Slacklining"}, {"segment": [105.99129972860659, 110.37911159309256], "label": "Slacklining"}, {"segment": [121.45311296536671, 123.33360376443213], "label": "Slacklining"}]}, "a2JBhm22-M4": {"duration": 74.93, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=a2JBhm22-M4", "annotations": [{"segment": [16.29676279797192, 48.07252968720748], "label": "Using uneven bars"}]}, "0vJfctL116Y": {"duration": 148.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0vJfctL116Y", "annotations": [{"segment": [11.784905812559032, 45.059933957878314], "label": "Preparing salad"}, {"segment": [53.37869099625585, 125.47458532886114], "label": "Preparing salad"}]}, "1fbU_MkV7NE": {"duration": 102.958, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1fbU_MkV7NE", "annotations": [{"segment": [38.53243966989054, 46.06980970058081], "label": "Doing crunches"}, {"segment": [54.569397988742814, 79.10594531117272], "label": "Doing crunches"}, {"segment": [84.71888097316648, 93.37883885167115], "label": "Doing crunches"}]}, "iNs17kcwlDk": {"duration": 100.032, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iNs17kcwlDk", "annotations": [{"segment": [0.01, 100.031586], "label": "Playing congas"}]}, "EsYsshh2CyA": {"duration": 68.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EsYsshh2CyA", "annotations": [{"segment": [8.490787388455537, 68.89360544217688], "label": "Mixing drinks"}]}, "zSCZphJS2vA": {"duration": 96.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zSCZphJS2vA", "annotations": [{"segment": [17.241088010867372, 50.6645998673947], "label": "Hammer throw"}, {"segment": [75.31633054914197, 83.63439918408737], "label": "Hammer throw"}]}, "K1LiXUpoSmI": {"duration": 38.499, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K1LiXUpoSmI", "annotations": []}, "8yFimnZ8XhA": {"duration": 60.117, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8yFimnZ8XhA", "annotations": [{"segment": [0.2, 59.881], "label": "River tubing"}]}, "u3nOmvgcs84": {"duration": 173.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=u3nOmvgcs84", "annotations": [{"segment": [36.447540513865455, 164.9588685479392], "label": "Making a sandwich"}]}, "VYOKYSmoyk0": {"duration": 79.691, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VYOKYSmoyk0", "annotations": [{"segment": [0.009999999999999998, 8.702592527301093], "label": "Croquet"}, {"segment": [22.378095070202807, 79.690883], "label": "Croquet"}, {"segment": [71.36125872386896, 79.19359199843994], "label": "Croquet"}]}, "9DIDpTlfBWs": {"duration": 163.028, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=9DIDpTlfBWs", "annotations": [{"segment": [14.487514820592823, 155.82345241809674], "label": "Making an omelette"}]}, "1B3XsffrM4M": {"duration": 115.5, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1B3XsffrM4M", "annotations": [{"segment": [9.006318252730109, 115.2808736349454], "label": "Tai chi"}]}, "HWuRcUpcsHY": {"duration": 53.94, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=HWuRcUpcsHY", "annotations": [{"segment": [0, 53.781], "label": "Kayaking"}]}, "R2dSuxrGNwM": {"duration": 87.493, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=R2dSuxrGNwM", "annotations": []}, "CsCbJSnAhRs": {"duration": 86.936, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CsCbJSnAhRs", "annotations": [{"segment": [15.312469578783151, 56.50707800312013], "label": "Baking cookies"}]}, "0gm_g4rYCic": {"duration": 143.569, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0gm_g4rYCic", "annotations": [{"segment": [0.2, 135.70334789391575], "label": "Rafting"}]}, "0A-p1Q0rseE": {"duration": 102.52000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0A-p1Q0rseE", "annotations": [{"segment": [25.81690449219969, 38.12589301170047], "label": "Mixing drinks"}, {"segment": [49.955310549922, 58.427731219188765], "label": "Mixing drinks"}]}, "HFDsuGHojDU": {"duration": 10.94, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HFDsuGHojDU", "annotations": [{"segment": [0.01, 10.901], "label": "Discus throw"}]}, "5g7bqiT7Y3c": {"duration": 5.85, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5g7bqiT7Y3c", "annotations": [{"segment": [0.01, 5.705369539781591], "label": "Discus throw"}]}, "aYSm25veKTs": {"duration": 184.971, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aYSm25veKTs", "annotations": [{"segment": [12.696859569620882, 172.27329916053785], "label": "BMX"}]}, "nLAm7USuYbA": {"duration": 80.13, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nLAm7USuYbA", "annotations": [{"segment": [4.998502340093604, 78.47648673946958], "label": "High jump"}]}, "O09jaT1gJ3w": {"duration": 95.551, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=O09jaT1gJ3w", "annotations": []}, "UWRq_HzBuTw": {"duration": 23.62, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=UWRq_HzBuTw", "annotations": []}, "SeVftOMjNMM": {"duration": 16.138, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=SeVftOMjNMM", "annotations": [{"segment": [1.7906720008850097, 14.531027141071926], "label": "Doing a powerbomb"}]}, "ZdwkA72VMKE": {"duration": 16.115, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZdwkA72VMKE", "annotations": [{"segment": [0.2, 9.854823126365055], "label": "Curling"}]}, "g1YGP5x_Zug": {"duration": 63.25, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=g1YGP5x_Zug", "annotations": []}, "1qi8ZXUH_wY": {"duration": 82.57, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=1qi8ZXUH_wY", "annotations": [{"segment": [0, 61.18693381435257], "label": "Wrapping presents"}]}, "YAWoigyJ9sA": {"duration": 9.03, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YAWoigyJ9sA", "annotations": [{"segment": [0, 3.7632886115444615], "label": "Drinking coffee"}]}, "FmLxc-aNh88": {"duration": 80.57, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FmLxc-aNh88", "annotations": [{"segment": [10.433040566302653, 69.51170401404056], "label": "Grooming horse"}]}, "u_sEVnBQ8is": {"duration": 60.047, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=u_sEVnBQ8is", "annotations": []}, "mbGpp_nDwI4": {"duration": 98.2, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mbGpp_nDwI4", "annotations": [{"segment": [11.79585543421737, 53.61752470098804], "label": "Using parallel bars"}, {"segment": [76.59646255850234, 92.52852677067082], "label": "Using parallel bars"}]}, "kkji9hespu0": {"duration": 228.86, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kkji9hespu0", "annotations": []}, "gzs6VcYc0OI": {"duration": 202.13, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gzs6VcYc0OI", "annotations": [{"segment": [18.91179407176287, 53.268219968798746], "label": "Using the pommel horse"}, {"segment": [64.30009984399375, 123.87225117004681], "label": "Using the pommel horse"}, {"segment": [167.70117316692668, 190.08234477379096], "label": "Using the pommel horse"}]}, "QGnn2h2hreQ": {"duration": 215.06, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QGnn2h2hreQ", "annotations": []}, "dcclQadR-L0": {"duration": 46.39, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dcclQadR-L0", "annotations": [{"segment": [7.961437804450954, 42.55750354446178], "label": "Playing polo"}]}, "4q7LmVSgIPk": {"duration": 203.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4q7LmVSgIPk", "annotations": [{"segment": [155.7363872776911, 191.5779590951638], "label": "Fixing bicycle"}]}, "TssNxg5XJ-o": {"duration": 93.461, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TssNxg5XJ-o", "annotations": [{"segment": [0.19999999999999998, 93.241], "label": "Ice fishing"}]}, "gh5di42-RJo": {"duration": 62.323, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gh5di42-RJo", "annotations": [{"segment": [0.19445303900156005, 62.32219954648526], "label": "Playing drums"}]}, "fsSCNCY0iQY": {"duration": 63.51, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=fsSCNCY0iQY", "annotations": [{"segment": [17.931480169381487, 56.80700096960189], "label": "Putting on makeup"}]}, "4oZtb0kglx4": {"duration": 217.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4oZtb0kglx4", "annotations": [{"segment": [24.498137433906585, 202.45630387805434], "label": "Wrapping presents"}]}, "cf6y94fjhHw": {"duration": 95.109, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cf6y94fjhHw", "annotations": []}, "2WzsyEvhPPI": {"duration": 223.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2WzsyEvhPPI", "annotations": [{"segment": [54.266826833073324, 118.96958190327614], "label": "Wrapping presents"}, {"segment": [152.7261138845554, 176.73090171606867], "label": "Wrapping presents"}]}, "yRup8RDsCzk": {"duration": 179.491, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yRup8RDsCzk", "annotations": [{"segment": [13.440721572542902, 175.28941050858032], "label": "Capoeira"}]}, "QV4E2B0AdGw": {"duration": 177.07999999999998, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QV4E2B0AdGw", "annotations": [{"segment": [8.009021840873634, 50.5396895475819], "label": "Getting a tattoo"}, {"segment": [85.88985491419656, 127.31582995319812], "label": "Getting a tattoo"}]}, "h-_dzZ3cCfc": {"duration": 170.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=h-_dzZ3cCfc", "annotations": [{"segment": [49.51588313328452, 50.846955260523345], "label": "Playing lacrosse"}, {"segment": [77.46839780529997, 91.84397677947935], "label": "Playing lacrosse"}, {"segment": [101.96012494649447, 116.86813277156936], "label": "Playing lacrosse"}]}, "1KBpN4TKj8A": {"duration": 162.331, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=1KBpN4TKj8A", "annotations": []}, "8381XS5ZDNs": {"duration": 68.82, "subset": "training", "resolution": "310x240", "url": "https://www.youtube.com/watch?v=8381XS5ZDNs", "annotations": [{"segment": [31.010466458658346, 51.719878315132604], "label": "Spinning"}]}, "mzxz0EQy1pY": {"duration": 234.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mzxz0EQy1pY", "annotations": [{"segment": [20.704179608424337, 63.94476710920436], "label": "Kayaking"}, {"segment": [64.46021343, 198.43032314976597], "label": "Kayaking"}]}, "v7o9uSu9AVI": {"duration": 202.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=v7o9uSu9AVI", "annotations": [{"segment": [0, 177.5484601544462], "label": "Using parallel bars"}]}, "p3PEMCN4h_g": {"duration": 77.834, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p3PEMCN4h_g", "annotations": [{"segment": [19.30701482656366, 21.487229779834685], "label": "Running a marathon"}, {"segment": [24.636429156781727, 40.1505039231844], "label": "Running a marathon"}, {"segment": [49.11591513813768, 72.13521420355825], "label": "Running a marathon"}]}, "BYlpp0-ztfs": {"duration": 106.348, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BYlpp0-ztfs", "annotations": []}, "Zc8zn0sKfwo": {"duration": 205.29, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zc8zn0sKfwo", "annotations": [{"segment": [70.91340330577223, 192.2505583978159], "label": "Preparing pasta"}]}, "5Qav3tLfiZA": {"duration": 6.688, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5Qav3tLfiZA", "annotations": [{"segment": [0, 5.808890795631824], "label": "Kneeling"}]}, "Zt9qvbUnNhc": {"duration": 208.539, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Zt9qvbUnNhc", "annotations": []}, "bV1UAhMjq5M": {"duration": 106.046, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bV1UAhMjq5M", "annotations": []}, "_nup1BwPnXc": {"duration": 106.688, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_nup1BwPnXc", "annotations": [{"segment": [19.975226209048362, 82.06488767550702], "label": "Tug of war"}]}, "6UjZaj86bKs": {"duration": 227.86, "subset": "validation", "resolution": "492x360", "url": "https://www.youtube.com/watch?v=6UjZaj86bKs", "annotations": [{"segment": [17.842700681762192, 221.92060349397744], "label": "Belly dance"}]}, "64oq7grVNVs": {"duration": 95.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=64oq7grVNVs", "annotations": [{"segment": [0.01, 2.5264227769110765], "label": "Getting a haircut"}, {"segment": [5.201458658346334, 12.334887675507021], "label": "Getting a haircut"}, {"segment": [48.89422464898596, 68.67366926677067], "label": "Getting a haircut"}, {"segment": [74.32215288611545, 78.48419344773792], "label": "Getting a haircut"}, {"segment": [82.49758970358815, 86.65963026521061], "label": "Getting a haircut"}]}, "rhfoyYzp93Q": {"duration": 67.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rhfoyYzp93Q", "annotations": [{"segment": [0.01, 67.059228], "label": "Canoeing"}]}, "jyYdVAyz6JA": {"duration": 23.824, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jyYdVAyz6JA", "annotations": []}, "bzZMUbtgZho": {"duration": 182.974, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bzZMUbtgZho", "annotations": [{"segment": [11.189730019198029, 16.60327986343479], "label": "Surfing"}, {"segment": [24.86606646779616, 57.06244185720426], "label": "Surfing"}, {"segment": [65.89507581359055, 68.74431257371516], "label": "Surfing"}, {"segment": [77.57694653010145, 86.6945041625002], "label": "Surfing"}, {"segment": [109.20347456748463, 120.60042160798307], "label": "Surfing"}, {"segment": [128.57828453633198, 162.78692231601943], "label": "Surfing"}, {"segment": [165.35151577396334, 167.3461995745864], "label": "Surfing"}]}, "7I_6nIgHK34": {"duration": 14.118, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=7I_6nIgHK34", "annotations": []}, "iAes5SVj_Fg": {"duration": 173.291, "subset": "testing", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=iAes5SVj_Fg", "annotations": []}, "NQOH_mu1-gQ": {"duration": 25.45, "subset": "testing", "resolution": "1280x704", "url": "https://www.youtube.com/watch?v=NQOH_mu1-gQ", "annotations": []}, "YuCMWTdK_DY": {"duration": 30.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YuCMWTdK_DY", "annotations": [{"segment": [15.345533541341654, 28.626224648985957], "label": "Discus throw"}]}, "XDOtHC4E6L8": {"duration": 89.56, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XDOtHC4E6L8", "annotations": [{"segment": [0.01, 89.55936507936508], "label": "Using parallel bars"}]}, "EML3BvJxknA": {"duration": 116.449, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=EML3BvJxknA", "annotations": [{"segment": [7.808491419656786, 99.16671762870514], "label": "Waterskiing"}, {"segment": [105.5235585023401, 113.69663962558502], "label": "Waterskiing"}]}, "xOmfJGR5fBw": {"duration": 200.923, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xOmfJGR5fBw", "annotations": [{"segment": [36.66626677067082, 150.42570982839314], "label": "Clipping cat claws"}]}, "4LF0hL-mgks": {"duration": 225.68, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4LF0hL-mgks", "annotations": []}, "4OsfdT79z8c": {"duration": 8.31, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=4OsfdT79z8c", "annotations": []}, "wJNYaRkVQfA": {"duration": 17.183, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wJNYaRkVQfA", "annotations": []}, "SIX51vtOOek": {"duration": 91.092, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SIX51vtOOek", "annotations": [{"segment": [34.674575086404815, 91.09181405895691], "label": "Cutting the grass"}]}, "G6ayznrS0tY": {"duration": 30.116, "subset": "validation", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=G6ayznrS0tY", "annotations": [{"segment": [0.28189965678627144, 2.8659798439937596], "label": "Gargling mouthwash"}, {"segment": [13.296267145085803, 14.658782152886117], "label": "Gargling mouthwash"}]}, "1jgsRbGzCls": {"duration": 194.7, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1jgsRbGzCls", "annotations": []}, "yCAsVc5Tb_0": {"duration": 163.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yCAsVc5Tb_0", "annotations": [{"segment": [9.920391925881118, 155.4194723447738], "label": "Cheerleading"}]}, "8RntjHIwMNo": {"duration": 35.295, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8RntjHIwMNo", "annotations": [{"segment": [7.045209752056087, 28.080977664828673], "label": "Playing drums"}]}, "QdmUojrWdmE": {"duration": 228.647, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QdmUojrWdmE", "annotations": [{"segment": [8.547547417720981, 22.793459780589284], "label": "Surfing"}, {"segment": [30.984859389238558, 35.25863309809905], "label": "Surfing"}, {"segment": [60.18897973311858, 127.85706345674303], "label": "Surfing"}, {"segment": [129.63780250210155, 162.40340093669866], "label": "Surfing"}]}, "H3PWbSF9ax4": {"duration": 66.828, "subset": "validation", "resolution": "688x720", "url": "https://www.youtube.com/watch?v=H3PWbSF9ax4", "annotations": [{"segment": [11.040040561622463, 55.21673946957878], "label": "Swinging at the playground"}]}, "sgwg1C3i-Vw": {"duration": 118.329, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sgwg1C3i-Vw", "annotations": [{"segment": [49.62063806552262, 96.12165522620906], "label": "Canoeing"}]}, "gA7GpvB10UY": {"duration": 187.756, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=gA7GpvB10UY", "annotations": [{"segment": [60.73617452404757, 83.8042425018867], "label": "Putting in contact lenses"}, {"segment": [106.58030961924686, 142.20441459768202], "label": "Putting in contact lenses"}]}, "n_sfeihU3f8": {"duration": 78.159, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n_sfeihU3f8", "annotations": [{"segment": [0.009999999999999998, 78.158367], "label": "Playing ice hockey"}]}, "SFvaB2idy-4": {"duration": 217.55, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SFvaB2idy-4", "annotations": []}, "TNFoUBRsngY": {"duration": 156.364, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TNFoUBRsngY", "annotations": [{"segment": [0, 57.30957098283932], "label": "Tug of war"}, {"segment": [63.89407488299531, 113.64365990639625], "label": "Tug of war"}, {"segment": [126.81266770670827, 147.78553198127923], "label": "Tug of war"}]}, "AdXXy6WTVi0": {"duration": 114.92, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AdXXy6WTVi0", "annotations": [{"segment": [17.769387505626383, 82.56581658350801], "label": "Windsurfing"}, {"segment": [86.50372111310303, 95.632499795346], "label": "Windsurfing"}]}, "QePg4GKh3rw": {"duration": 65.9, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QePg4GKh3rw", "annotations": [{"segment": [12.94615600624025, 58.154954758190335], "label": "Using uneven bars"}]}, "9XjHgUP5QW0": {"duration": 42.93, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9XjHgUP5QW0", "annotations": [{"segment": [17.428980488299533, 27.130332391575664], "label": "Shaving legs"}]}, "AYTIuVLRMAQ": {"duration": 128.128, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AYTIuVLRMAQ", "annotations": []}, "AV8q7nnOtq4": {"duration": 83.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AV8q7nnOtq4", "annotations": [{"segment": [4.981818336277423, 75.16837075372291], "label": "Playing drums"}]}, "c6ie4KwFnAs": {"duration": 215.691, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c6ie4KwFnAs", "annotations": [{"segment": [16.15149797828648, 127.52953612022031], "label": "Futsal"}, {"segment": [127.86602566143462, 192.13552721528862], "label": "Futsal"}]}, "2M4NxbRxDek": {"duration": 26.889, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2M4NxbRxDek", "annotations": []}, "2giX_iWq5mg": {"duration": 70.078, "subset": "testing", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=2giX_iWq5mg", "annotations": []}, "Zzcj8Eea3Mc": {"duration": 84.289, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zzcj8Eea3Mc", "annotations": [{"segment": [37.621741316083195, 76.29217309776138], "label": "Elliptical trainer"}]}, "aCknCFmU0sA": {"duration": 95.47999999999999, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aCknCFmU0sA", "annotations": [{"segment": [6.107174093766471, 61.369651869067965], "label": "Brushing hair"}]}, "UElXV5zgUsE": {"duration": 72.725, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UElXV5zgUsE", "annotations": []}, "TrWAMPr_h6Q": {"duration": 47.973, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TrWAMPr_h6Q", "annotations": []}, "Dl0JNkGbZT4": {"duration": 153.48, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Dl0JNkGbZT4", "annotations": [{"segment": [61.05833784371782, 140.79334260842433], "label": "Getting a piercing"}]}, "mNM01g9wLy4": {"duration": 75.396, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mNM01g9wLy4", "annotations": [{"segment": [12.213496372588494, 70.22760414238385], "label": "Elliptical trainer"}]}, "mFDC1CLt6B4": {"duration": 74.931, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=mFDC1CLt6B4", "annotations": [{"segment": [34.36360031104199, 71.5228833592535], "label": "BMX"}]}, "_ENXbB6aaa4": {"duration": 136.371, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_ENXbB6aaa4", "annotations": [{"segment": [8.578455775715481, 110.71303677260022], "label": "Longboarding"}]}, "PDEUAMa8aOU": {"duration": 125.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PDEUAMa8aOU", "annotations": [{"segment": [50.7308532, 119.73680800624025], "label": "Getting a haircut"}]}, "hpU50i4WBK8": {"duration": 231.085, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hpU50i4WBK8", "annotations": [{"segment": [7.928957878315132, 231.021], "label": "Scuba diving"}]}, "oNN6NE8fq0U": {"duration": 196.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oNN6NE8fq0U", "annotations": [{"segment": [36.69266770670827, 176.73634945397816], "label": "Playing accordion"}]}, "D4wcmmQsPng": {"duration": 74.444, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=D4wcmmQsPng", "annotations": [{"segment": [0.2, 69.29391107644305], "label": "Playing ice hockey"}]}, "GBTpVjwLxe0": {"duration": 23.22, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=GBTpVjwLxe0", "annotations": [{"segment": [2.189797971918877, 17.319311232449298], "label": "Kayaking"}]}, "Mvw0hvMeM-w": {"duration": 216.132, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mvw0hvMeM-w", "annotations": [{"segment": [49.139915887850464, 198.59749221183802], "label": "BMX"}]}, "Pyf_5Bffu5A": {"duration": 198.369, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Pyf_5Bffu5A", "annotations": [{"segment": [6.497255850234009, 192.77136193447737], "label": "Beach soccer"}]}, "ZkJmA1TNdPQ": {"duration": 193.423, "subset": "testing", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=ZkJmA1TNdPQ", "annotations": []}, "uqzy1FAUpPM": {"duration": 56.61, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=uqzy1FAUpPM", "annotations": [{"segment": [10.153569599999999, 15.896793912572829], "label": "Triple jump"}]}, "6J0IdWi4O0Q": {"duration": 58.56, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=6J0IdWi4O0Q", "annotations": [{"segment": [3.8308891918876755, 47.79490325117004], "label": "Tennis serve with ball bouncing"}, {"segment": [15.338911900000001, 41.500193149999994], "label": "Tennis serve with ball bouncing"}]}, "ZGK-w7-bkNw": {"duration": 134.212, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ZGK-w7-bkNw", "annotations": [{"segment": [24.51603987311656, 96.22070490738446], "label": "Braiding hair"}]}, "OCdmlTxq1Co": {"duration": 115.264, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OCdmlTxq1Co", "annotations": [{"segment": [9.33092211838006, 17.947196261682244], "label": "Welding"}, {"segment": [52.405813084112154, 68.0198738317757], "label": "Welding"}, {"segment": [90.09492523364486, 103.55532242990655], "label": "Welding"}]}, "TiiSh9R89vM": {"duration": 209.793, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TiiSh9R89vM", "annotations": [{"segment": [29.12160530421217, 41.882758190327614], "label": "Snow tubing"}, {"segment": [52.02623868954758, 71.65878159126366], "label": "Snow tubing"}, {"segment": [82.78388923556943, 86.05597971918877], "label": "Snow tubing"}, {"segment": [142.3359360374415, 161.31406084243372], "label": "Snow tubing"}, {"segment": [175.72801404056162, 202.56171450858034], "label": "Snow tubing"}]}, "88qqQjRPFUQ": {"duration": 30.953, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=88qqQjRPFUQ", "annotations": [{"segment": [21.608548053627587, 25.616390485920526], "label": "Fun sliding down"}]}, "uMGfCaGMnEE": {"duration": 73.38, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=uMGfCaGMnEE", "annotations": [{"segment": [12.343057666146645, 65.02962788923557], "label": "Mixing drinks"}]}, "arfBwR8qgPw": {"duration": 5.46, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=arfBwR8qgPw", "annotations": [{"segment": [3.1559531981279254, 5.209], "label": "High jump"}]}, "rFTVKkMqpIQ": {"duration": 230.48, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rFTVKkMqpIQ", "annotations": [{"segment": [29.84390833697348, 110.74606949141966], "label": "Washing hands"}]}, "L6Uf3epn3zA": {"duration": 187.571, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=L6Uf3epn3zA", "annotations": [{"segment": [11.683551401869158, 16.35697196261682], "label": "Canoeing"}, {"segment": [32.129766355140184, 70.39339719626167], "label": "Canoeing"}, {"segment": [87.33454672897196, 114.20671495327102], "label": "Canoeing"}, {"segment": [125.30608878504673, 137.0042507788162], "label": "Canoeing"}, {"segment": [146.64420872274144, 157.74476635514017], "label": "Canoeing"}, {"segment": [169.13744392523364, 172.3507632398754], "label": "Canoeing"}]}, "0xtS7g7vZLU": {"duration": 145.195, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0xtS7g7vZLU", "annotations": [{"segment": [53.43531318787103, 58.1846619028243], "label": "Shuffleboard"}, {"segment": [65.87408363179625, 67.90951879534767], "label": "Shuffleboard"}, {"segment": [80.57444870188972, 123.54474659908598], "label": "Shuffleboard"}]}, "qKSmh8Q0zBc": {"duration": 78.414, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qKSmh8Q0zBc", "annotations": []}, "mRbqt5ugQSE": {"duration": 165.164, "subset": "validation", "resolution": "1908x1080", "url": "https://www.youtube.com/watch?v=mRbqt5ugQSE", "annotations": [{"segment": [0.2, 164.80100000000002], "label": "Surfing"}]}, "nuEK3POl9jA": {"duration": 40.01, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nuEK3POl9jA", "annotations": [{"segment": [10.610540717628705, 31.769207207488304], "label": "Clean and jerk"}]}, "66BGqvwHbUw": {"duration": 30.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=66BGqvwHbUw", "annotations": [{"segment": [17.629432490639626, 21.40548228783151], "label": "Playing guitarra"}, {"segment": [26.975155738689548, 29.00478250468019], "label": "Playing guitarra"}]}, "s0-xTG38cPw": {"duration": 46.12, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=s0-xTG38cPw", "annotations": [{"segment": [0, 46.114829], "label": "Smoking hookah"}]}, "jaaWdcA_COY": {"duration": 169.019, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jaaWdcA_COY", "annotations": [{"segment": [30.861724906398496, 47.1808401712079], "label": "Hula hoop"}, {"segment": [63.497946090211016, 86.92088845781227], "label": "Hula hoop"}]}, "3ZR5ameytF4": {"duration": 100.102, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3ZR5ameytF4", "annotations": []}, "5JkN8t5k0jU": {"duration": 41.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5JkN8t5k0jU", "annotations": [{"segment": [7.522721937448926, 34.03136101677067], "label": "Clean and jerk"}]}, "TlDkQ8SNG64": {"duration": 25.844, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TlDkQ8SNG64", "annotations": [{"segment": [5.393656786271451, 25.601], "label": "Camel ride"}]}, "Hz7-mBFGsSs": {"duration": 204.13, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Hz7-mBFGsSs", "annotations": []}, "si2M-Sm9fnI": {"duration": 239.03, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=si2M-Sm9fnI", "annotations": []}, "akUXL2VzFEs": {"duration": 56.982, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=akUXL2VzFEs", "annotations": [{"segment": [0.2, 55.82793059657321], "label": "River tubing"}]}, "cW2R4AuUnK8": {"duration": 193.144, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cW2R4AuUnK8", "annotations": [{"segment": [9.02341121495327, 187.98773364485982], "label": "Rafting"}]}, "exPOJAoyG7Q": {"duration": 88.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=exPOJAoyG7Q", "annotations": [{"segment": [14.50288600624025, 77.62497081435257], "label": "Tumbling"}]}, "h4Cf5u1j0TU": {"duration": 101.727, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=h4Cf5u1j0TU", "annotations": [{"segment": [0.20000000000000004, 71.03460470882577], "label": "Having an ice cream"}, {"segment": [75.62258601520212, 96.82222377569978], "label": "Having an ice cream"}]}, "JZz2O0y0ufY": {"duration": 14.28, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JZz2O0y0ufY", "annotations": [{"segment": [0.01, 14.280272108843537], "label": "Chopping wood"}]}, "8UXuHMmOYGI": {"duration": 60.256, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8UXuHMmOYGI", "annotations": [{"segment": [5.167168486739469, 60.201], "label": "Sharpening knives"}]}, "JvPVYSb_5o8": {"duration": 24.59, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JvPVYSb_5o8", "annotations": []}, "iEGYd3DJ3Wo": {"duration": 17.508, "subset": "training", "resolution": "384x240", "url": "https://www.youtube.com/watch?v=iEGYd3DJ3Wo", "annotations": [{"segment": [0.01, 17.507346], "label": "Playing ice hockey"}]}, "WvdGPaEAhF8": {"duration": 99.28999999999999, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WvdGPaEAhF8", "annotations": []}, "faF9-lWmK3Y": {"duration": 71.59, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=faF9-lWmK3Y", "annotations": [{"segment": [13.629219137066993, 70.47674250155298], "label": "Hand washing clothes"}]}, "kdfJW8YV378": {"duration": 231.55, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kdfJW8YV378", "annotations": [{"segment": [7.788577299999999, 168.73783605], "label": "Sailing"}]}, "qlK-c7hRqcA": {"duration": 174.85, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qlK-c7hRqcA", "annotations": []}, "RNqJj9tt3F0": {"duration": 214.762, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RNqJj9tt3F0", "annotations": [{"segment": [45.49455118288229, 94.66880871143888], "label": "Playing ice hockey"}, {"segment": [96.67592126362487, 107.71504030064777], "label": "Playing ice hockey"}, {"segment": [172.94619824669226, 179.30205375700933], "label": "Playing ice hockey"}]}, "eFi84LnF1TI": {"duration": 47.18, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=eFi84LnF1TI", "annotations": []}, "PVAHm5MjHdw": {"duration": 172.873, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PVAHm5MjHdw", "annotations": [{"segment": [9.424506749740395, 169.10257634267913], "label": "Baton twirling"}]}, "FGtoyNOi6gg": {"duration": 105.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FGtoyNOi6gg", "annotations": [{"segment": [13.617072530520268, 22.968556075576355], "label": "Bungee jumping"}, {"segment": [50.85894559591907, 56.108901270336524], "label": "Bungee jumping"}, {"segment": [62.671345503900156, 85.3117792199688], "label": "Bungee jumping"}, {"segment": [99.58509612792511, 101.38976838065523], "label": "Bungee jumping"}]}, "cRpzDPS6zeY": {"duration": 165.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cRpzDPS6zeY", "annotations": [{"segment": [0.01, 165.46539682539682], "label": "Cumbia"}]}, "LXHc1X1pKEM": {"duration": 201.36, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LXHc1X1pKEM", "annotations": [{"segment": [135.53610218408738, 195.21596177847115], "label": "Shaving legs"}]}, "VOGF4tBFEuw": {"duration": 30.046, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=VOGF4tBFEuw", "annotations": [{"segment": [0.2, 29.893061224489795], "label": "Cutting the grass"}]}, "KOFXi1RYx_g": {"duration": 79.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KOFXi1RYx_g", "annotations": [{"segment": [5.51796476, 65.16792511700469], "label": "Playing polo"}]}, "JI5_cMwgnxI": {"duration": 144.452, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JI5_cMwgnxI", "annotations": [{"segment": [45.07062023977558, 52.056566376940786], "label": "Trimming branches or hedges"}]}, "M_WEOecjwLY": {"duration": 100.97999999999999, "subset": "validation", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=M_WEOecjwLY", "annotations": [{"segment": [35.15925111, 75.86549841], "label": "Plataform diving"}, {"segment": [82.23623912948518, 98.93555205616225], "label": "Plataform diving"}]}, "qObVIn_GI10": {"duration": 172.223, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qObVIn_GI10", "annotations": []}, "F_DHgp5hERs": {"duration": 133.306, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=F_DHgp5hERs", "annotations": [{"segment": [0.01, 133.201], "label": "Using the rowing machine"}]}, "uA3dbKjT9rM": {"duration": 166.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uA3dbKjT9rM", "annotations": [{"segment": [82.14343319855243, 91.24159826801235], "label": "Getting a piercing"}, {"segment": [96.18060216286202, 101.37955363112484], "label": "Getting a piercing"}]}, "GG_Bi89pNlg": {"duration": 34.76, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GG_Bi89pNlg", "annotations": [{"segment": [0.01, 34.67892871216849], "label": "Discus throw"}]}, "FCzT4Knfhqs": {"duration": 142.664, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FCzT4Knfhqs", "annotations": [{"segment": [38.503545020712394, 78.1199092616766], "label": "Hitting a pinata"}, {"segment": [90.13835684039606, 104.15987825897035], "label": "Hitting a pinata"}]}, "WmPkqkqAxyc": {"duration": 39.358, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WmPkqkqAxyc", "annotations": [{"segment": [0.01, 38.866617720748835], "label": "Cutting the grass"}]}, "tl3wQEusmj8": {"duration": 107.207, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tl3wQEusmj8", "annotations": [{"segment": [12.538338533541342, 100.80824180967238], "label": "Canoeing"}]}, "DzxPreFrmFE": {"duration": 195.443, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DzxPreFrmFE", "annotations": [{"segment": [11.99441576251715, 195.441994], "label": "Playing ten pins"}]}, "VeU_EBE-YPE": {"duration": 144.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VeU_EBE-YPE", "annotations": [{"segment": [0, 144.404647], "label": "Playing guitarra"}]}, "kdQfR6oqJ_M": {"duration": 80.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kdQfR6oqJ_M", "annotations": [{"segment": [0, 24.73066426521061], "label": "Playing polo"}, {"segment": [48.96171915132605, 80.062403], "label": "Playing polo"}]}, "jcRdr9jC9vQ": {"duration": 134.63, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jcRdr9jC9vQ", "annotations": []}, "7Ozl-ud1zTo": {"duration": 155.174, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7Ozl-ud1zTo", "annotations": [{"segment": [8.313923110213235, 26.68189195756526], "label": "Surfing"}, {"segment": [37.07429538435654, 154.961], "label": "Surfing"}]}, "k3WPc9HmtLM": {"duration": 219.87, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k3WPc9HmtLM", "annotations": []}, "nSR-JSqGoWQ": {"duration": 143.082, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nSR-JSqGoWQ", "annotations": [{"segment": [0.01, 143.041], "label": "Beach soccer"}]}, "ZlHaPkhGr_g": {"duration": 231.09, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=ZlHaPkhGr_g", "annotations": [{"segment": [2.523549, 198.27885000000003], "label": "Playing harmonica"}]}, "ZlwkO1oFBHw": {"duration": 140.97, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZlwkO1oFBHw", "annotations": [{"segment": [11.761792716068642, 129.15987328393135], "label": "Layup drill in basketball"}]}, "Cdmd9VB8tJI": {"duration": 59.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Cdmd9VB8tJI", "annotations": [{"segment": [1.5672563354134166, 58.08067595943838], "label": "Bathing dog"}]}, "YmhbwAv0Xbc": {"duration": 125.551, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YmhbwAv0Xbc", "annotations": [{"segment": [6.265260530421218, 86.53891107644306], "label": "Snow tubing"}, {"segment": [96.71995943837754, 125.301], "label": "Snow tubing"}]}, "rdkrg8Bj9_I": {"duration": 82.62, "subset": "validation", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=rdkrg8Bj9_I", "annotations": [{"segment": [5.542143113104523, 75.52781079719189], "label": "Long jump"}]}, "YIYZHQUOtnA": {"duration": 123.089, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YIYZHQUOtnA", "annotations": [{"segment": [18.818564511700465, 92.55661321060842], "label": "Playing ten pins"}]}, "q2TF-3bWZuU": {"duration": 134.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q2TF-3bWZuU", "annotations": [{"segment": [63.463375525741036, 126.29840079875196], "label": "Horseback riding"}]}, "3cmHM1Yu92s": {"duration": 75.442, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3cmHM1Yu92s", "annotations": [{"segment": [4.354665185647426, 24.24489265522621], "label": "Elliptical trainer"}, {"segment": [48.13670434945398, 66.8499952823713], "label": "Elliptical trainer"}]}, "4Lrq6RXeRHI": {"duration": 9.78, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4Lrq6RXeRHI", "annotations": []}, "NJRrhNrEMsk": {"duration": 226.046, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NJRrhNrEMsk", "annotations": []}, "ZLmoqxkCJL0": {"duration": 158.01, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZLmoqxkCJL0", "annotations": [{"segment": [111.40001872074883, 134.8137394695788], "label": "Removing curlers"}]}, "O9crPOB_9tE": {"duration": 227.649, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O9crPOB_9tE", "annotations": [{"segment": [0.01, 85.59012885179408], "label": "Surfing"}, {"segment": [64.99167460530421, 227.44843400000002], "label": "Surfing"}]}, "H_JLBvfP88U": {"duration": 214.042, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H_JLBvfP88U", "annotations": []}, "ucR3VztMyeE": {"duration": 30.581, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ucR3VztMyeE", "annotations": [{"segment": [0.2, 28.848773790951636], "label": "Using the monkey bar"}]}, "z6GHmDSskgY": {"duration": 189.73, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z6GHmDSskgY", "annotations": [{"segment": [0.8877425897035882, 188.51721840873634], "label": "Doing step aerobics"}]}, "j9QPrMZuegY": {"duration": 121.46, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j9QPrMZuegY", "annotations": [{"segment": [6.821667575663027, 40.93000545397816], "label": "Clean and jerk"}, {"segment": [40.93000545397816, 66.32176809672387], "label": "Clean and jerk"}, {"segment": [71.24852801248049, 96.45079988923558], "label": "Clean and jerk"}, {"segment": [100.61959674102965, 115.96834878627146], "label": "Clean and jerk"}]}, "rScqKu8Mwhk": {"duration": 55.26, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rScqKu8Mwhk", "annotations": [{"segment": [16.55309458970359, 17.760091070202808], "label": "Vacuuming floor"}, {"segment": [26.55663465, 39.83088385647426], "label": "Vacuuming floor"}, {"segment": [43.1932311950078, 45.089939950078005], "label": "Vacuuming floor"}]}, "X5UoLcloHIM": {"duration": 162.099, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X5UoLcloHIM", "annotations": [{"segment": [12.391305252210088, 153.50045353198126], "label": "Longboarding"}]}, "ymJTN8aKZEw": {"duration": 86.286, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=ymJTN8aKZEw", "annotations": [{"segment": [0.01, 86.2853514739229], "label": "Hitting a pinata"}]}, "b1RAYvxWawA": {"duration": 181.859, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b1RAYvxWawA", "annotations": [{"segment": [0.2, 13.618114553153555], "label": "Canoeing"}, {"segment": [27.80365054602184, 172.49611631825272], "label": "Canoeing"}]}, "HfN967uah8o": {"duration": 154.645, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HfN967uah8o", "annotations": [{"segment": [12.541734789391576, 146.90201092043682], "label": "Snow tubing"}]}, "eQnwUWfah9w": {"duration": 168.252, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=eQnwUWfah9w", "annotations": [{"segment": [17.323897396712198, 151.7153438681765], "label": "Futsal"}]}, "-YAhgJdZ4TU": {"duration": 152.23, "subset": "testing", "resolution": "1152x720", "url": "https://www.youtube.com/watch?v=-YAhgJdZ4TU", "annotations": []}, "lM3uGxHWZBA": {"duration": 144.2, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=lM3uGxHWZBA", "annotations": [{"segment": [2.6994555700595373, 10.122958387723264], "label": "Cricket"}, {"segment": [22.945372345506062, 35.31787704161228], "label": "Cricket"}]}, "KEU4Sj6x1p4": {"duration": 183.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KEU4Sj6x1p4", "annotations": [{"segment": [12.69888544, 172.0583619344774], "label": "Triple jump"}]}, "p2iL_CeHNwM": {"duration": 126.967, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p2iL_CeHNwM", "annotations": []}, "FWZ6wgaJ_L4": {"duration": 29.165, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FWZ6wgaJ_L4", "annotations": []}, "v0azz4XzW_s": {"duration": 32.392, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=v0azz4XzW_s", "annotations": []}, "HgFb-8lxeMI": {"duration": 214.99, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HgFb-8lxeMI", "annotations": []}, "6d9z33GFEGo": {"duration": 165.79, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6d9z33GFEGo", "annotations": [{"segment": [47.19461388455539, 152.44506786271452], "label": "Preparing pasta"}]}, "9JMbahMzBjk": {"duration": 52.617, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9JMbahMzBjk", "annotations": [{"segment": [0.9343103157107153, 51.91158563035246], "label": "Decorating the Christmas tree"}]}, "O0nOzufJ_OM": {"duration": 181.21, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O0nOzufJ_OM", "annotations": [{"segment": [21.900500479719188, 36.029855627925116], "label": "Discus throw"}, {"segment": [57.78906255616225, 67.96219826287052], "label": "Discus throw"}, {"segment": [77.57015976365054, 87.46070836739469], "label": "Discus throw"}, {"segment": [115.43683156084242, 122.21892203198128], "label": "Discus throw"}, {"segment": [146.23882578393133, 160.08559382917315], "label": "Discus throw"}, {"segment": [164.32440037363492, 167.71544560920435], "label": "Discus throw"}]}, "NOXPH0_y4Ww": {"duration": 208.747, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=NOXPH0_y4Ww", "annotations": [{"segment": [0.2, 30.60513884555382], "label": "Making a lemonade"}, {"segment": [126.00200780031201, 185.25876599063963], "label": "Making a lemonade"}]}, "RoAtTRjWKEg": {"duration": 6.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RoAtTRjWKEg", "annotations": [{"segment": [0, 6.729598721911814], "label": "Using the balance beam"}]}, "ox2AGCcE9a0": {"duration": 40.06, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ox2AGCcE9a0", "annotations": [{"segment": [14.247125990639626, 33.618218346333855], "label": "Clean and jerk"}]}, "KpOSIph2hHw": {"duration": 62.694, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KpOSIph2hHw", "annotations": [{"segment": [5.4071954760328635, 57.91750388724782], "label": "River tubing"}]}, "xftFhOCEqFs": {"duration": 166.79, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xftFhOCEqFs", "annotations": [{"segment": [12.745198127925116, 25.750502340093608], "label": "Pole vault"}, {"segment": [28.351563182527304, 33.03347269890796], "label": "Pole vault"}, {"segment": [33.81379095163807, 36.414851794071765], "label": "Pole vault"}, {"segment": [36.9350639625585, 81.67331045241811], "label": "Pole vault"}, {"segment": [86.61532605304212, 141.23760374414977], "label": "Pole vault"}]}, "wohWWAB47KM": {"duration": 222.215, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wohWWAB47KM", "annotations": [{"segment": [0, 221.961], "label": "Wakeboarding"}]}, "USaP2UCPBEc": {"duration": 146.17, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=USaP2UCPBEc", "annotations": [{"segment": [24.37480031201248, 27.56402652106084], "label": "Drum corps"}]}, "K3Z3z8t-RIQ": {"duration": 203.756, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K3Z3z8t-RIQ", "annotations": [{"segment": [9.853990894329652, 196.4440755351014], "label": "Rope skipping"}]}, "c8VbTNlH47w": {"duration": 63.019, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c8VbTNlH47w", "annotations": []}, "Ir_Ul8FaXs4": {"duration": 37.733, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ir_Ul8FaXs4", "annotations": [{"segment": [12.072025159774316, 16.891431468185534], "label": "Building sandcastles"}]}, "9IwS7pfJXu4": {"duration": 211.09, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9IwS7pfJXu4", "annotations": [{"segment": [0.01, 211.09256235827664], "label": "Preparing salad"}]}, "QXAs-KJj7K8": {"duration": 126.062, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QXAs-KJj7K8", "annotations": [{"segment": [41.08300936037441, 51.30461934477379], "label": "Croquet"}, {"segment": [59.363965678627146, 68.02381591263651], "label": "Croquet"}, {"segment": [75.69124024960999, 81.1960577223089], "label": "Croquet"}, {"segment": [104.98473322932918, 122.67878939157566], "label": "Croquet"}]}, "5i2Vf0seCWI": {"duration": 141.6, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5i2Vf0seCWI", "annotations": []}, "8w9pf_Ecm_U": {"duration": 100.079, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8w9pf_Ecm_U", "annotations": [{"segment": [44.722378504672896, 88.82144859813084], "label": "Baton twirling"}]}, "hZRPw0IZ7Os": {"duration": 27.702, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=hZRPw0IZ7Os", "annotations": []}, "DWsO49YhWUI": {"duration": 64.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DWsO49YhWUI", "annotations": [{"segment": [25.752102964118567, 45.31571606864275], "label": "Washing face"}]}, "G72bQBMZmzs": {"duration": 197.626, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=G72bQBMZmzs", "annotations": [{"segment": [19.3628946659464, 175.80282755563934], "label": "Sumo"}]}, "PLvg8riZZVw": {"duration": 159.034, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PLvg8riZZVw", "annotations": [{"segment": [0.2, 152.29488317757009], "label": "Ice fishing"}]}, "bIxkfJuPGIU": {"duration": 100.57, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bIxkfJuPGIU", "annotations": []}, "NwfS7eZiMLg": {"duration": 134.72, "subset": "training", "resolution": "810x1440", "url": "https://www.youtube.com/watch?v=NwfS7eZiMLg", "annotations": [{"segment": [69.79507365, 128.6871099843994], "label": "Playing bagpipes"}, {"segment": [16.3045261, 58.672836149999995], "label": "Playing bagpipes"}]}, "tP0viuKibJU": {"duration": 6.22, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=tP0viuKibJU", "annotations": [{"segment": [1.3785623619344773, 5.028840165366614], "label": "Tumbling"}]}, "ykPa3jGYdAM": {"duration": 107.42, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ykPa3jGYdAM", "annotations": []}, "D0pVkTEYQg8": {"duration": 21.41, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=D0pVkTEYQg8", "annotations": [{"segment": [0.01, 20.82845748829953], "label": "Spinning"}]}, "ZY8UyWtoMWg": {"duration": 37.57, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZY8UyWtoMWg", "annotations": [{"segment": [19.258048361934478, 24.760347893915757], "label": "Roof shingle removal"}, {"segment": [31.784560062402495, 33.189402496099845], "label": "Roof shingle removal"}]}, "aYbZ5DFWN0c": {"duration": 234.15, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aYbZ5DFWN0c", "annotations": []}, "-fWbEt6saQ0": {"duration": 176.82, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-fWbEt6saQ0", "annotations": []}, "cU95GxjPb3A": {"duration": 46.58, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cU95GxjPb3A", "annotations": []}, "BgXRQDT0byI": {"duration": 70.427, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=BgXRQDT0byI", "annotations": []}, "CqscMsSNiNY": {"duration": 235.27, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CqscMsSNiNY", "annotations": [{"segment": [28.995127581974028, 227.55669647425896], "label": "Windsurfing"}]}, "Hcoc9euI0tk": {"duration": 69.242, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Hcoc9euI0tk", "annotations": [{"segment": [0.2, 69.00099999999999], "label": "Skiing"}]}, "phEu4AcX3go": {"duration": 218.27, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=phEu4AcX3go", "annotations": []}, "8vHbM54afHA": {"duration": 157.989, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8vHbM54afHA", "annotations": []}, "oT42qP2zIXo": {"duration": 155.156, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oT42qP2zIXo", "annotations": [{"segment": [0.2, 154.95573696145127], "label": "Sharpening knives"}]}, "SXRSA62VbZg": {"duration": 197.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SXRSA62VbZg", "annotations": [{"segment": [85.69931848361934, 176.02270091419655], "label": "Getting a tattoo"}]}, "KoP6ZgL6fMY": {"duration": 172.64, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=KoP6ZgL6fMY", "annotations": [{"segment": [10.0614592, 86.56592286973478], "label": "Long jump"}]}, "0F8F-ON083s": {"duration": 66.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0F8F-ON083s", "annotations": [{"segment": [4.579912636505461, 32.37165522620905], "label": "Plataform diving"}, {"segment": [32.996188767550706, 62.869709828393134], "label": "Plataform diving"}]}, "diY-XBT5cD4": {"duration": 115.868, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=diY-XBT5cD4", "annotations": []}, "-DpnaHTk8PA": {"duration": 209.607, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-DpnaHTk8PA", "annotations": [{"segment": [17.715649759108405, 130.33020583387477], "label": "Arm wrestling"}, {"segment": [134.5736528743732, 206.71225256284674], "label": "Arm wrestling"}]}, "-OH1BDqao9w": {"duration": 208.562, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-OH1BDqao9w", "annotations": [{"segment": [58.24107994523863, 95.33316437963641], "label": "Hand car wash"}]}, "tAgVokWkdnQ": {"duration": 207.03, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tAgVokWkdnQ", "annotations": [{"segment": [60.05420592823713, 85.88397191887677], "label": "Drum corps"}]}, "jt-Vnap5KP4": {"duration": 50.67, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=jt-Vnap5KP4", "annotations": []}, "wsmMniNThlI": {"duration": 140.9, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wsmMniNThlI", "annotations": [{"segment": [1.098603744149766, 140.18183775351014], "label": "Horseback riding"}]}, "JyjONoyBr4Q": {"duration": 172.339, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JyjONoyBr4Q", "annotations": [{"segment": [6.448960998439938, 169.84235881435256], "label": "Disc dog"}]}, "VIROYxBPp70": {"duration": 37.85, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VIROYxBPp70", "annotations": [{"segment": [0.01, 37.801], "label": "Doing kickboxing"}]}, "dEUFGAwRhEM": {"duration": 213.252, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dEUFGAwRhEM", "annotations": [{"segment": [15.965129485179407, 58.538808112324496], "label": "Canoeing"}, {"segment": [83.15171606864274, 109.09505148205928], "label": "Canoeing"}, {"segment": [139.02966926677067, 210.89253354134166], "label": "Canoeing"}]}, "J2TQsCCFPF8": {"duration": 61.88, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J2TQsCCFPF8", "annotations": []}, "Kofjs0rkwxc": {"duration": 131.43, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Kofjs0rkwxc", "annotations": []}, "dSOVaLk3ZmU": {"duration": 8.36, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dSOVaLk3ZmU", "annotations": [{"segment": [0, 8.359182], "label": "Kayaking"}]}, "60Y0DfZhlHM": {"duration": 169.901, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=60Y0DfZhlHM", "annotations": [{"segment": [5.020353137486762, 169.10676200239513], "label": "Fixing the roof"}]}, "E9EU08OD-M0": {"duration": 38.197, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E9EU08OD-M0", "annotations": []}, "LChqFAESyCg": {"duration": 140.74, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=LChqFAESyCg", "annotations": [{"segment": [23.07737975442248, 115.36696951143182], "label": "Putting on makeup"}]}, "ZT63nK4lexg": {"duration": 93.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZT63nK4lexg", "annotations": [{"segment": [41.94828237129485, 79.97752106084243], "label": "Using parallel bars"}]}, "r_jT3a-_K4I": {"duration": 222.123, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=r_jT3a-_K4I", "annotations": []}, "JKFxKhCQPUs": {"duration": 45.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JKFxKhCQPUs", "annotations": [{"segment": [9.1525389, 40.600006205148205], "label": "Using the pommel horse"}]}, "I7Sole68Gag": {"duration": 96.43, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=I7Sole68Gag", "annotations": [{"segment": [0.01, 95.37938530421216], "label": "Playing water polo"}]}, "zE8dA1_QKZg": {"duration": 203.291, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zE8dA1_QKZg", "annotations": []}, "D4lIN0Fn3MM": {"duration": 182.695, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=D4lIN0Fn3MM", "annotations": []}, "sf6q25u_nac": {"duration": 155.46, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=sf6q25u_nac", "annotations": []}, "6y_gnZgf0N8": {"duration": 11.564, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6y_gnZgf0N8", "annotations": [{"segment": [0.01, 11.521], "label": "Doing a powerbomb"}]}, "TSO5Phe2ZM4": {"duration": 125.23, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TSO5Phe2ZM4", "annotations": [{"segment": [2.5387722308892355, 121.47048673946958], "label": "Playing squash"}]}, "JLipYEVwKTg": {"duration": 28.54, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=JLipYEVwKTg", "annotations": [{"segment": [16.071721689547584, 22.21548233541342], "label": "Smoking hookah"}]}, "4mfnfTpcfck": {"duration": 112.014, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4mfnfTpcfck", "annotations": [{"segment": [24.63232605304212, 97.48112012480499], "label": "Drum corps"}]}, "v0mUPr68x2U": {"duration": 204.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=v0mUPr68x2U", "annotations": [{"segment": [71.31691731669267, 75.13746645865834], "label": "Swimming"}, {"segment": [88.19100936037441, 154.09548205928238], "label": "Swimming"}, {"segment": [173.21520124804994, 201.23530733229327], "label": "Swimming"}]}, "z9l32VOM6wY": {"duration": 203.361, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z9l32VOM6wY", "annotations": [{"segment": [10.462235569422777, 162.95724492979718], "label": "Sharpening knives"}]}, "_MWyhJS4KbM": {"duration": 66.57, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_MWyhJS4KbM", "annotations": [{"segment": [27.812146645865834, 41.195435257410296], "label": "Cleaning windows"}]}, "Gl8HTsP7Jfk": {"duration": 150.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Gl8HTsP7Jfk", "annotations": [{"segment": [41.306438629999995, 127.82954070999999], "label": "Playing lacrosse"}]}, "owLQzBg4olo": {"duration": 30.326, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=owLQzBg4olo", "annotations": [{"segment": [5.629806273010921, 30.325259], "label": "Knitting"}]}, "Cx6oTdEq40w": {"duration": 87.05, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Cx6oTdEq40w", "annotations": [{"segment": [0.01, 67.49555406978183], "label": "Rock climbing"}, {"segment": [77.95261174256494, 86.7799980897195], "label": "Rock climbing"}]}, "9GP266ETAuE": {"duration": 145.32999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9GP266ETAuE", "annotations": [{"segment": [14.84598537129485, 131.80061821996878], "label": "Hammer throw"}]}, "LSCQ1yqocHg": {"duration": 24.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LSCQ1yqocHg", "annotations": [{"segment": [7.403832871682214, 14.193876489753467], "label": "Washing hands"}]}, "rdkPwRWW91s": {"duration": 142.85, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rdkPwRWW91s", "annotations": [{"segment": [12.034092775955937, 136.1635309173167], "label": "Windsurfing"}]}, "-ZBsdK10Trs": {"duration": 49.25, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-ZBsdK10Trs", "annotations": [{"segment": [8.605220692667707, 41.71995389391575], "label": "Dodgeball"}]}, "kanXjG-zLyU": {"duration": 146.031, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kanXjG-zLyU", "annotations": []}, "P5Y-b-lcBs0": {"duration": 105.744, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P5Y-b-lcBs0", "annotations": [{"segment": [0.2, 87.48933862470544], "label": "Playing pool"}]}, "w8rTULZCkzk": {"duration": 185.2, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=w8rTULZCkzk", "annotations": [{"segment": [22.82525148673947, 164.1106689173167], "label": "Cleaning shoes"}]}, "BD_ON66t6rM": {"duration": 48.739, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BD_ON66t6rM", "annotations": [{"segment": [1.748794436131187, 48.205897232449296], "label": "Canoeing"}]}, "QwQqY5VDzIk": {"duration": 181.163, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=QwQqY5VDzIk", "annotations": []}, "BOOX9aGlSEs": {"duration": 222.819, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BOOX9aGlSEs", "annotations": [{"segment": [45.54079944763897, 56.64215770309068], "label": "Skiing"}]}, "1RKExOpIGas": {"duration": 215.25, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1RKExOpIGas", "annotations": [{"segment": [5.707358814352574, 201.45491419656787], "label": "Doing motocross"}]}, "a68fUj833qg": {"duration": 103.93, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a68fUj833qg", "annotations": [{"segment": [21.402639800312013, 85.44841799063963], "label": "Mixing drinks"}]}, "a8IdfA5fXJs": {"duration": 155.551, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a8IdfA5fXJs", "annotations": []}, "xH7qxBIz-tU": {"duration": 190.66, "subset": "training", "resolution": "1280x544", "url": "https://www.youtube.com/watch?v=xH7qxBIz-tU", "annotations": [{"segment": [4.164159175890846, 178.4639644305772], "label": "Tai chi"}]}, "MOOeHWuuxlo": {"duration": 49.738, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MOOeHWuuxlo", "annotations": [{"segment": [7.230644043360916, 42.91936834242633], "label": "Elliptical trainer"}]}, "pIk9qMEyEd4": {"duration": 108.832, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pIk9qMEyEd4", "annotations": [{"segment": [10.014444617784712, 100.99312792511701], "label": "Assembling bicycle"}]}, "XHQLkc06XIc": {"duration": 189.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XHQLkc06XIc", "annotations": [{"segment": [20.981927756021804, 137.12133068724108], "label": "Playing violin"}]}, "PgS2F7NxNBg": {"duration": 175.171, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PgS2F7NxNBg", "annotations": [{"segment": [8.566086709102738, 167.8843795439937], "label": "Painting fence"}]}, "DwaoxjXwC1M": {"duration": 31.998, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DwaoxjXwC1M", "annotations": []}, "GjcRJzPjbRI": {"duration": 206.45, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GjcRJzPjbRI", "annotations": []}, "1_YFTTzzLrI": {"duration": 28.909, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1_YFTTzzLrI", "annotations": [{"segment": [4.2393621653666145, 7.486533185647427], "label": "Playing rubik cube"}, {"segment": [8.74932191575663, 28.708842], "label": "Playing rubik cube"}]}, "TNqc2yWZztE": {"duration": 111.03999999999999, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TNqc2yWZztE", "annotations": []}, "kzeKQBi6Rhg": {"duration": 187.08, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=kzeKQBi6Rhg", "annotations": [{"segment": [23.911078230000005, 70.76414475], "label": "Playing violin"}, {"segment": [79.28016291, 123.45739851634882], "label": "Playing violin"}]}, "LTmYxVYBa90": {"duration": 35.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LTmYxVYBa90", "annotations": [{"segment": [0.01, 35.549749], "label": "Camel ride"}]}, "6LOfAoP4Mg8": {"duration": 106.51, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6LOfAoP4Mg8", "annotations": [{"segment": [8.557350228703026, 63.889342969637156], "label": "Hanging wallpaper"}]}, "e0E2EVuhYu0": {"duration": 168.809, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e0E2EVuhYu0", "annotations": [{"segment": [0, 168.761], "label": "Scuba diving"}]}, "pULb9WvZwU4": {"duration": 118.21000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pULb9WvZwU4", "annotations": [{"segment": [79.2655694227769, 113.36819812792513], "label": "Bungee jumping"}]}, "xf9iLflgRro": {"duration": 205.752, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xf9iLflgRro", "annotations": [{"segment": [60.325280029653385, 148.7596351698403], "label": "Fixing the roof"}]}, "MnaDFXhhEgs": {"duration": 26.146, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=MnaDFXhhEgs", "annotations": []}, "YJTz7mpI-mY": {"duration": 47.51, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YJTz7mpI-mY", "annotations": [{"segment": [14.37022464898596, 46.518046801872075], "label": "Shaving"}]}, "1H2bRd91sZw": {"duration": 120.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1H2bRd91sZw", "annotations": [{"segment": [44.88836154, 108.93275751], "label": "Making a sandwich"}]}, "mz2-lgoNz1M": {"duration": 143.64, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mz2-lgoNz1M", "annotations": [{"segment": [54.11649171681153, 75.85272234426793], "label": "Getting a tattoo"}]}, "ZGSi-QAkf5A": {"duration": 123.35, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ZGSi-QAkf5A", "annotations": [{"segment": [51.46516380655226, 53.004308892355695], "label": "Cricket"}, {"segment": [62.04678627145086, 63.008751950078], "label": "Cricket"}, {"segment": [72.62840873634946, 81.09370670826834], "label": "Cricket"}, {"segment": [86.48071450858033, 94.17643993759751], "label": "Cricket"}]}, "1pzRdAVCrDU": {"duration": 84.71000000000001, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1pzRdAVCrDU", "annotations": []}, "We0DJIKCBOw": {"duration": 24.96, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=We0DJIKCBOw", "annotations": [{"segment": [10.2163698, 24.216965031201248], "label": "Clean and jerk"}, {"segment": [0, 9.900751165366614], "label": "Clean and jerk"}]}, "NvRH4SoF09c": {"duration": 65.78, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=NvRH4SoF09c", "annotations": []}, "GGv0sCOf_tM": {"duration": 17.42, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GGv0sCOf_tM", "annotations": [{"segment": [1.2089952985874537, 17.41496598639456], "label": "Hand washing clothes"}]}, "RUG5nCMr11w": {"duration": 24.38, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=RUG5nCMr11w", "annotations": []}, "6aHGUTy1iho": {"duration": 81.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6aHGUTy1iho", "annotations": [{"segment": [8.87453198127925, 38.243486739469574], "label": "Javelin throw"}, {"segment": [39.09582473, 74.50776131045241], "label": "Javelin throw"}]}, "vNzTWjEFr-Y": {"duration": 69.52, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vNzTWjEFr-Y", "annotations": [{"segment": [25.08217628705148, 68.20401716068643], "label": "Playing guitarra"}]}, "1oaJBEdY6ao": {"duration": 39.29, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1oaJBEdY6ao", "annotations": [{"segment": [16.83571450858034, 37.382032761310455], "label": "Mowing the lawn"}]}, "7JoYkshshVI": {"duration": 75.001, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7JoYkshshVI", "annotations": [{"segment": [10.296474117468101, 12.16856032064412], "label": "Shuffleboard"}, {"segment": [15.093695013106649, 21.29498056112721], "label": "Shuffleboard"}, {"segment": [40.71787409672387, 42.47295487675507], "label": "Shuffleboard"}, {"segment": [49.25926722620904, 56.39659573166926], "label": "Shuffleboard"}, {"segment": [66.81007502652106, 68.33114503588143], "label": "Shuffleboard"}]}, "vWB4t0Vl64A": {"duration": 9.1, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=vWB4t0Vl64A", "annotations": []}, "nt9tMdFVQ1c": {"duration": 14.931, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nt9tMdFVQ1c", "annotations": [{"segment": [0.2, 14.729954648526078], "label": "Playing beach volleyball"}]}, "my4UPLGI6w4": {"duration": 76.533, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=my4UPLGI6w4", "annotations": [{"segment": [19.470815446621906, 72.0205677830705], "label": "Blowing leaves"}]}, "vbLxVuLLPek": {"duration": 60.19, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vbLxVuLLPek", "annotations": [{"segment": [6.051361120904836, 47.83342536], "label": "Playing bagpipes"}]}, "pYvqbfVY-s8": {"duration": 212.6, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=pYvqbfVY-s8", "annotations": [{"segment": [15.413915756630264, 200.71238689547582], "label": "Playing water polo"}]}, "zEdAxKm9SLA": {"duration": 122.555, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zEdAxKm9SLA", "annotations": [{"segment": [74.34148049921997, 96.52590483619345], "label": "Trimming branches or hedges"}]}, "-SYV9IB-36s": {"duration": 220.17000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-SYV9IB-36s", "annotations": [{"segment": [13.39577512324493, 197.15833130109203], "label": "Putting on makeup"}]}, "VuEy38XdUgA": {"duration": 99.126, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VuEy38XdUgA", "annotations": [{"segment": [0, 98.92600907029478], "label": "Shuffleboard"}]}, "MP31A6fHsh4": {"duration": 150.768, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MP31A6fHsh4", "annotations": [{"segment": [2.8420735569643876, 16.248826186253233], "label": "Curling"}, {"segment": [18.836094237519504, 20.717743729349515], "label": "Curling"}, {"segment": [32.007640680329594, 150.76716553287983], "label": "Curling"}, {"segment": [38.35820771525589, 45.17918712313969], "label": "Curling"}, {"segment": [48.707279920320964, 60.702795430737304], "label": "Curling"}, {"segment": [81.1657336543887, 84.22341407861246], "label": "Curling"}, {"segment": [98.33578526733757, 122.79722866112775], "label": "Curling"}, {"segment": [127.26614620422403, 133.14630039559316], "label": "Curling"}, {"segment": [139.9672797793685, 145.84743442055415], "label": "Curling"}]}, "3ZUy7h-dN38": {"duration": 190.92, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=3ZUy7h-dN38", "annotations": [{"segment": [0, 184.32970202808113], "label": "Playing guitarra"}]}, "SK7VENBeVqE": {"duration": 89.26, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SK7VENBeVqE", "annotations": []}, "Kic33dtLja8": {"duration": 210.65, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Kic33dtLja8", "annotations": []}, "nh1GBPeyyMo": {"duration": 136.84, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nh1GBPeyyMo", "annotations": [{"segment": [1.4943000767649754, 132.35229251346922], "label": "Wrapping presents"}]}, "xfVx1-emCV8": {"duration": 90.558, "subset": "training", "resolution": "642x480", "url": "https://www.youtube.com/watch?v=xfVx1-emCV8", "annotations": [{"segment": [54.56639719626168, 76.56215420560748], "label": "Curling"}]}, "xE5KichXWrA": {"duration": 110.81, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=xE5KichXWrA", "annotations": [{"segment": [13.304823152886115, 105.05626593447738], "label": "Hammer throw"}]}, "6X1N1i7lfLs": {"duration": 179.96, "subset": "testing", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=6X1N1i7lfLs", "annotations": []}, "xVPTVGpOkGE": {"duration": 29.75, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xVPTVGpOkGE", "annotations": [{"segment": [14.941985522620904, 27.192557503900154], "label": "Using parallel bars"}]}, "5yGj9JooT_Q": {"duration": 52.52, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5yGj9JooT_Q", "annotations": [{"segment": [0.6964878385335415, 48.87705831591264], "label": "Archery"}]}, "K4cNyYu2LLg": {"duration": 190.85, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=K4cNyYu2LLg", "annotations": [{"segment": [36.62076644698441, 53.293635723660245], "label": "Brushing teeth"}, {"segment": [81.87569734081882, 88.72348293659638], "label": "Brushing teeth"}, {"segment": [128.9170062371295, 135.7647917878315], "label": "Brushing teeth"}]}, "SBJWn8AjK-4": {"duration": 60.303, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SBJWn8AjK-4", "annotations": [{"segment": [9.595669572542901, 60.302198], "label": "Swinging at the playground"}]}, "eAFFf5cSX5c": {"duration": 85.71000000000001, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eAFFf5cSX5c", "annotations": [{"segment": [0, 85.704874], "label": "Cumbia"}]}, "ZpwXqrCxcGg": {"duration": 50.666, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZpwXqrCxcGg", "annotations": [{"segment": [17.993519825375632, 30.383793411214953], "label": "Plastering"}]}, "R2izNBHhctM": {"duration": 71.216, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R2izNBHhctM", "annotations": [{"segment": [0.01, 68.59770487580433], "label": "Clipping cat claws"}]}, "0Lw6MFCE4xw": {"duration": 215.575, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0Lw6MFCE4xw", "annotations": []}, "P1Ve5WrfTb8": {"duration": 78.95, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=P1Ve5WrfTb8", "annotations": [{"segment": [41.00744021, 64.88969718], "label": "Dodgeball"}, {"segment": [0, 36.43088262], "label": "Dodgeball"}]}, "eLh51DwDp-E": {"duration": 203.988, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eLh51DwDp-E", "annotations": []}, "SKdouCRLoKE": {"duration": 93.089, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SKdouCRLoKE", "annotations": [{"segment": [1.742688580343214, 64.04380532761311], "label": "Bullfighting"}, {"segment": [78.56621016380656, 83.50382780811232], "label": "Bullfighting"}]}, "_i_ERtwvcJE": {"duration": 31.719, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_i_ERtwvcJE", "annotations": [{"segment": [3.463794056162247, 27.80931799375975], "label": "Beer pong"}]}, "vfI78g82VfE": {"duration": 157.618, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vfI78g82VfE", "annotations": []}, "uevUOX7Wpz4": {"duration": 38.313, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uevUOX7Wpz4", "annotations": [{"segment": [0.009999999999999998, 38.312403], "label": "Canoeing"}]}, "QJm_B5Hx4DI": {"duration": 200.83, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=QJm_B5Hx4DI", "annotations": [{"segment": [114.98344041516764, 131.58867725429016], "label": "Applying sunscreen"}]}, "RmuhtC-TCck": {"duration": 176.32999999999998, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RmuhtC-TCck", "annotations": [{"segment": [53.078346333853354, 57.49005304212169], "label": "Breakdancing"}, {"segment": [70.72517316692668, 74.86114820592823], "label": "Breakdancing"}, {"segment": [99.95273010920437, 118.426751950078], "label": "Breakdancing"}, {"segment": [137.72796879875196, 165.02540405616224], "label": "Breakdancing"}]}, "0VVNybUx7DE": {"duration": 224.98, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0VVNybUx7DE", "annotations": [{"segment": [55.42782986, 165.921275], "label": "Cricket"}]}, "5Foo5NSjEXQ": {"duration": 120.02, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5Foo5NSjEXQ", "annotations": [{"segment": [39.20885647425897, 99.47258814352574], "label": "Preparing pasta"}]}, "xr0tV2KNqgI": {"duration": 98.291, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=xr0tV2KNqgI", "annotations": [{"segment": [28.350023400936035, 88.57647113884555], "label": "Bullfighting"}]}, "sSasqxIi5UU": {"duration": 72.26, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sSasqxIi5UU", "annotations": []}, "8ErMpNsK3_c": {"duration": 32.996, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8ErMpNsK3_c", "annotations": [{"segment": [0.2, 13.413201248049923], "label": "Using the monkey bar"}]}, "YDSSJ6Tp47g": {"duration": 106.836, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=YDSSJ6Tp47g", "annotations": [{"segment": [0.01, 106.83501133786848], "label": "Rollerblading"}]}, "zOj_cfNTppg": {"duration": 49.97, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zOj_cfNTppg", "annotations": []}, "tY9dmG6IyPo": {"duration": 8.66, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tY9dmG6IyPo", "annotations": []}, "SibfKtVX3CQ": {"duration": 234.104, "subset": "validation", "resolution": "450x360", "url": "https://www.youtube.com/watch?v=SibfKtVX3CQ", "annotations": [{"segment": [11.32170213067026, 14.973864108305827], "label": "Beach soccer"}, {"segment": [24.469485250158304, 43.0955113360997], "label": "Beach soccer"}, {"segment": [51.13026768689795, 91.66926563865276], "label": "Beach soccer"}, {"segment": [100.43445438497812, 110.66050792235771], "label": "Beach soccer"}, {"segment": [118.69526427315594, 144.6256143143685], "label": "Beach soccer"}, {"segment": [154.12123545622097, 173.84291013545305], "label": "Beach soccer"}, {"segment": [182.24288268401483, 191.00807143034018], "label": "Beach soccer"}, {"segment": [199.408043978902, 210.36452991180872], "label": "Beach soccer"}]}, "z6l9utYlIrw": {"duration": 26.053, "subset": "testing", "resolution": "544x360", "url": "https://www.youtube.com/watch?v=z6l9utYlIrw", "annotations": []}, "b39hqdpOn7k": {"duration": 84.52, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=b39hqdpOn7k", "annotations": [{"segment": [13.443154446177845, 45.73308424336973], "label": "Playing squash"}, {"segment": [51.2685007800312, 74.85991887675506], "label": "Playing squash"}]}, "xootaiXcXZw": {"duration": 171.898, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xootaiXcXZw", "annotations": [{"segment": [6.959292834890966, 166.77477102803738], "label": "Capoeira"}]}, "SY5WpDrtmEE": {"duration": 186.85, "subset": "training", "resolution": "400x300", "url": "https://www.youtube.com/watch?v=SY5WpDrtmEE", "annotations": [{"segment": [10.202447745709827, 181.02057285959438], "label": "Playing flauta"}]}, "LHe5cTzHB3o": {"duration": 62.3, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LHe5cTzHB3o", "annotations": [{"segment": [0.9968568332348762, 48.54604239398254], "label": "Doing kickboxing"}]}, "DW7Zm9DzEDk": {"duration": 23.92, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DW7Zm9DzEDk", "annotations": [{"segment": [0.2, 23.014397035881434], "label": "Long jump"}]}, "_uNQ1KITuGE": {"duration": 166.395, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_uNQ1KITuGE", "annotations": []}, "IVz9QyL_O1I": {"duration": 5.109, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IVz9QyL_O1I", "annotations": []}, "0q9yZPTBbus": {"duration": 119.885, "subset": "testing", "resolution": "722x480", "url": "https://www.youtube.com/watch?v=0q9yZPTBbus", "annotations": []}, "pf9u7M6vafU": {"duration": 19.528, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pf9u7M6vafU", "annotations": [{"segment": [0.2, 15.367274622125462], "label": "Knitting"}]}, "0ynXG7NB3fM": {"duration": 186.39, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0ynXG7NB3fM", "annotations": []}, "_B2kFN9MfwU": {"duration": 141.155, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_B2kFN9MfwU", "annotations": [{"segment": [66.7233909601282, 73.77008571499324], "label": "Applying sunscreen"}, {"segment": [96.89205287939409, 102.17707394554287], "label": "Applying sunscreen"}]}, "bD7VxDypbPQ": {"duration": 80.3, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bD7VxDypbPQ", "annotations": []}, "V4tqzi5uw9Y": {"duration": 17.253, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=V4tqzi5uw9Y", "annotations": [{"segment": [0.2, 12.488494851794071], "label": "Cutting the grass"}]}, "JHuLY-ygFkc": {"duration": 128.569, "subset": "validation", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=JHuLY-ygFkc", "annotations": [{"segment": [8.424170566822674, 120.74644301716067], "label": "Playing blackjack"}]}, "61W153jXL3U": {"duration": 208.63, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=61W153jXL3U", "annotations": [{"segment": [23.486469195231702, 45.2594599257613], "label": "Washing face"}, {"segment": [74.18179089616629, 131.70148282607283], "label": "Washing face"}, {"segment": [195.7205749740479, 197.02045501766162], "label": "Washing face"}]}, "f0NDjR9C28o": {"duration": 28.282, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=f0NDjR9C28o", "annotations": []}, "t8_XO-nWaYg": {"duration": 74.931, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=t8_XO-nWaYg", "annotations": [{"segment": [0.01, 54.06472901716068], "label": "Playing rubik cube"}]}, "xQljKBB3498": {"duration": 90.23, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xQljKBB3498", "annotations": [{"segment": [34.76988035984024, 52.084436166562305], "label": "Washing face"}]}, "1MQkI4UafqE": {"duration": 79.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1MQkI4UafqE", "annotations": [{"segment": [1.9242971918876755, 69.58506942277691], "label": "Tumbling"}]}, "Kq115s7yL3I": {"duration": 141.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Kq115s7yL3I", "annotations": [{"segment": [55.67415199465121, 60.10151994651214], "label": "Cleaning windows"}, {"segment": [62.53657232003565, 66.0784666815244], "label": "Cleaning windows"}, {"segment": [84.00930688656116, 90.87172721194561], "label": "Cleaning windows"}, {"segment": [115.88635613995987, 120.97782928459996], "label": "Cleaning windows"}]}, "1J3lfO_RmoM": {"duration": 186.619, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1J3lfO_RmoM", "annotations": []}, "klqHoK_8ar8": {"duration": 125.179, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=klqHoK_8ar8", "annotations": [{"segment": [8.006722737909516, 123.03012987519502], "label": "Painting fence"}]}, "7xLL5okHSIU": {"duration": 127.478, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7xLL5okHSIU", "annotations": [{"segment": [25.853481482059284, 33.21178005772231], "label": "Raking leaves"}, {"segment": [40.768951567862715, 48.32612307800312], "label": "Raking leaves"}, {"segment": [59.06526153978159, 60.05962621216849], "label": "Raking leaves"}, {"segment": [62.8438472948518, 63.440466098283935], "label": "Raking leaves"}, {"segment": [67.41792478783152, 68.4122894602184], "label": "Raking leaves"}]}, "zlXs92vjuSU": {"duration": 219.85, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zlXs92vjuSU", "annotations": [{"segment": [140.27607121372856, 201.32531492043682], "label": "Getting a piercing"}]}, "pwaSQyDNyWs": {"duration": 115.33, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pwaSQyDNyWs", "annotations": [{"segment": [0.01, 115.33351473922903], "label": "Doing step aerobics"}]}, "Fy3Gt8X6XUY": {"duration": 233.22, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Fy3Gt8X6XUY", "annotations": []}, "JFYuFs_akM4": {"duration": 74.58, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JFYuFs_akM4", "annotations": []}, "2eep4G5Tgho": {"duration": 26.843, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2eep4G5Tgho", "annotations": [{"segment": [0, 26.64183673469388], "label": "Cutting the grass"}]}, "xMNmGr96O9U": {"duration": 179.978, "subset": "testing", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=xMNmGr96O9U", "annotations": []}, "qyssgQ2xq2M": {"duration": 195.54, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qyssgQ2xq2M", "annotations": [{"segment": [27.4494383775351, 195.481], "label": "Doing nails"}]}, "28P5f3p_32g": {"duration": 139.529, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=28P5f3p_32g", "annotations": [{"segment": [0.01, 139.501], "label": "Sharpening knives"}]}, "DIF8qouivIU": {"duration": 222.703, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DIF8qouivIU", "annotations": [{"segment": [0, 222.7024036281179], "label": "Doing fencing"}]}, "nhW0O7NC0GM": {"duration": 69.24, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=nhW0O7NC0GM", "annotations": [{"segment": [55.63118859594385, 68.70181737285492], "label": "Kayaking"}]}, "FRjVP5r9rqc": {"duration": 80.74, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FRjVP5r9rqc", "annotations": [{"segment": [12.123167599999997, 67.13287349859594], "label": "Hand washing clothes"}]}, "FWKz_eRNswI": {"duration": 64.854, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FWKz_eRNswI", "annotations": []}, "QLACTCzs0R0": {"duration": 47.183, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=QLACTCzs0R0", "annotations": [{"segment": [0.01, 47.18224489795919], "label": "Tug of war"}]}, "AAfFlwaXW3c": {"duration": 60.93, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=AAfFlwaXW3c", "annotations": [{"segment": [0, 54.214712948517935], "label": "Snatch"}]}, "OeiFr5qqFJ4": {"duration": 206.19, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OeiFr5qqFJ4", "annotations": []}, "kJrHvEECvEA": {"duration": 70.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kJrHvEECvEA", "annotations": [{"segment": [32.638590483619346, 37.70225039001561], "label": "Smoking a cigarette"}, {"segment": [44.63726287051483, 46.72877457098284], "label": "Smoking a cigarette"}, {"segment": [7.1021443057722315, 11.50657488299532], "label": "Smoking a cigarette"}, {"segment": [18.113220748829956, 21.85698673946958], "label": "Smoking a cigarette"}]}, "7lNAmkaMyyg": {"duration": 13.654, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7lNAmkaMyyg", "annotations": [{"segment": [0, 13.653105], "label": "Removing ice from car"}]}, "vIQPDP8jW8A": {"duration": 98.59, "subset": "testing", "resolution": "1920x1440", "url": "https://www.youtube.com/watch?v=vIQPDP8jW8A", "annotations": []}, "dIj05gBdBbI": {"duration": 127.22, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=dIj05gBdBbI", "annotations": []}, "1p_HYu0EZAk": {"duration": 45.54, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1p_HYu0EZAk", "annotations": [{"segment": [10.501073322932918, 29.884432137285486], "label": "Washing dishes"}]}, "P0N68OQDhNs": {"duration": 228.37, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=P0N68OQDhNs", "annotations": [{"segment": [89.06718173488845, 91.20479409652577], "label": "Long jump"}, {"segment": [98.33016863531684, 100.82404972389372], "label": "Long jump"}, {"segment": [146.78271549909618, 148.92032786073347], "label": "Long jump"}, {"segment": [155.33316494564545, 157.11450858034323], "label": "Long jump"}, {"segment": [168.51510784240895, 170.2964514771067], "label": "Long jump"}, {"segment": [174.9279449273209, 177.06555728895825], "label": "Long jump"}]}, "yH018Jl5GMQ": {"duration": 23.755, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yH018Jl5GMQ", "annotations": [{"segment": [0, 23.754012], "label": "Hand car wash"}]}, "mo8CBVntUjE": {"duration": 166.232, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mo8CBVntUjE", "annotations": [{"segment": [0, 150.3846801872075], "label": "Playing drums"}]}, "ZSdnDVGXU3k": {"duration": 88.68, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=ZSdnDVGXU3k", "annotations": []}, "TbLBu2TDey8": {"duration": 118.074, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TbLBu2TDey8", "annotations": [{"segment": [15.472966347225318, 109.41597502652107], "label": "Disc dog"}]}, "QZi1yBFRZzc": {"duration": 122.137, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QZi1yBFRZzc", "annotations": [{"segment": [15.23631825273011, 122.081], "label": "Trimming branches or hedges"}]}, "VNROQWtYhlw": {"duration": 212.347, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VNROQWtYhlw", "annotations": [{"segment": [13.913487753510141, 212.34632653061223], "label": "Having an ice cream"}]}, "pAAGm-KEFW0": {"duration": 229.065, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pAAGm-KEFW0", "annotations": [{"segment": [0.2, 21.798683321482518], "label": "Applying sunscreen"}, {"segment": [127.21854528602913, 189.0410406076107], "label": "Applying sunscreen"}]}, "en6lOb4wRUU": {"duration": 181.16, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=en6lOb4wRUU", "annotations": [{"segment": [3.9567382314340196, 18.08794620084123], "label": "Smoking a cigarette"}, {"segment": [24.3056777073804, 29.110288416978857], "label": "Smoking a cigarette"}, {"segment": [39.56738231434019, 70.09079152825977], "label": "Smoking a cigarette"}, {"segment": [80.26526126623295, 98.63583162646233], "label": "Smoking a cigarette"}, {"segment": [114.4627845521984, 136.2248448250855], "label": "Smoking a cigarette"}, {"segment": [158.83477742901715, 169.0092471575663], "label": "Smoking a cigarette"}]}, "iBpyYbLv1WU": {"duration": 95.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iBpyYbLv1WU", "annotations": [{"segment": [3.6325103876755067, 91.25755688221528], "label": "Playing guitarra"}]}, "r-BJYixThME": {"duration": 20.898, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r-BJYixThME", "annotations": [{"segment": [0.2, 16.243116887850466], "label": "Snow tubing"}]}, "t1urvYx1X_w": {"duration": 41.1, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=t1urvYx1X_w", "annotations": [{"segment": [0.01, 30.904635375975033], "label": "Removing ice from car"}]}, "tjEMbP2SODQ": {"duration": 180.44, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tjEMbP2SODQ", "annotations": [{"segment": [6.049717628705149, 170.93969578783154], "label": "Tai chi"}]}, "Lr5GuPjfU7Q": {"duration": 90.19, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Lr5GuPjfU7Q", "annotations": [{"segment": [35.85952418096724, 41.48454758190328], "label": "Wrapping presents"}, {"segment": [43.73455694227769, 54.42210140405617], "label": "Wrapping presents"}]}, "0-ReTddxkhg": {"duration": 74.955, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0-ReTddxkhg", "annotations": [{"segment": [8.416297838038112, 74.86075726261043], "label": "Gargling mouthwash"}]}, "_I8sqYLhRKM": {"duration": 48.205, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_I8sqYLhRKM", "annotations": [{"segment": [0.01, 46.582786297748314], "label": "Raking leaves"}]}, "mfK2jcMGe1Y": {"duration": 95.92, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mfK2jcMGe1Y", "annotations": [{"segment": [41.486791710000006, 90.23618761778472], "label": "Brushing hair"}, {"segment": [9.067412130000001, 31.297103880000005], "label": "Brushing hair"}]}, "BgsxbT9RCeg": {"duration": 195.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BgsxbT9RCeg", "annotations": [{"segment": [27.460765993759747, 175.7489023600624], "label": "Ballet"}]}, "3I6LMFdA2lc": {"duration": 86.4, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3I6LMFdA2lc", "annotations": [{"segment": [0.9433182527301093, 84.89864274570984], "label": "Brushing hair"}]}, "HWcWElJfEjw": {"duration": 172.73, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HWcWElJfEjw", "annotations": [{"segment": [98.83782260000001, 121.14456222], "label": "Wrapping presents"}]}, "40Sf-iICgzw": {"duration": 37.269, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=40Sf-iICgzw", "annotations": [{"segment": [0.2, 35.58457934267913], "label": "Playing congas"}]}, "bON69f83fSY": {"duration": 129.41, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=bON69f83fSY", "annotations": [{"segment": [64.73182839313573, 113.12945709828394], "label": "Fixing bicycle"}]}, "7rf06_5zNJk": {"duration": 237.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7rf06_5zNJk", "annotations": [{"segment": [159.69058062, 166.8430338], "label": "Belly dance"}, {"segment": [53.007546, 122.50210272000001], "label": "Belly dance"}]}, "ORKAMBnsX64": {"duration": 72.33, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ORKAMBnsX64", "annotations": [{"segment": [35.75572074882996, 40.831453978159125], "label": "Plataform diving"}]}, "jQHGyqk21GI": {"duration": 178.214, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jQHGyqk21GI", "annotations": [{"segment": [7.784639080801328, 8.89673037805866], "label": "Rafting"}, {"segment": [12.233004269830658, 15.569278161602655], "label": "Rafting"}, {"segment": [22.797871593775312, 30.026465025947978], "label": "Rafting"}, {"segment": [37.25505845812064, 42.53749212009297], "label": "Rafting"}, {"segment": [48.09794860637963, 51.156199563182525], "label": "Rafting"}, {"segment": [54.77049627145085, 61.72106686427457], "label": "Rafting"}, {"segment": [66.16943204368175, 89.24532641185647], "label": "Rafting"}, {"segment": [98.97612524180967, 114.26738054602183], "label": "Rafting"}, {"segment": [115.65749466458658, 134.00700102964117], "label": "Rafting"}, {"segment": [143.73779985959436, 153.19057586583463], "label": "Rafting"}]}, "5Z5DAI4Grb0": {"duration": 139.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5Z5DAI4Grb0", "annotations": [{"segment": [21.734746645865837, 118.67171668642746], "label": "Painting"}]}, "rtbMQGNS5ko": {"duration": 193.33, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rtbMQGNS5ko", "annotations": []}, "Mv2uecqTSdY": {"duration": 6.061, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Mv2uecqTSdY", "annotations": [{"segment": [1.8436498900156006, 6.013135025897036], "label": "Canoeing"}]}, "B2HIj_QWaG0": {"duration": 108.78999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B2HIj_QWaG0", "annotations": [{"segment": [0, 105.90038048049922], "label": "Playing violin"}]}, "juIOpLYnW64": {"duration": 74.63, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=juIOpLYnW64", "annotations": [{"segment": [21.874428524180967, 57.594904890795625], "label": "Getting a piercing"}]}, "D_xht_r3n54": {"duration": 21.758, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D_xht_r3n54", "annotations": [{"segment": [0.01, 21.757096], "label": "Removing ice from car"}]}, "RjFoJggnfj4": {"duration": 136.557, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RjFoJggnfj4", "annotations": [{"segment": [10.866721329173165, 93.96517855226209], "label": "Futsal"}, {"segment": [110.87813203725982, 127.65853948360508], "label": "Futsal"}]}, "1v5HE_Nm99g": {"duration": 226.178, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1v5HE_Nm99g", "annotations": [{"segment": [0.01, 226.201], "label": "Sumo"}]}, "oYmE833AO2w": {"duration": 178.33, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oYmE833AO2w", "annotations": [{"segment": [0, 178.301], "label": "Plastering"}]}, "pXSlIEoZhj4": {"duration": 39.99, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pXSlIEoZhj4", "annotations": []}, "2LzWCwYtUzs": {"duration": 55.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2LzWCwYtUzs", "annotations": [{"segment": [12.931591263650546, 20.690546021840877], "label": "Peeling potatoes"}]}, "0ERgbWePjWk": {"duration": 4.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0ERgbWePjWk", "annotations": [{"segment": [0.2, 2.5527285491419653], "label": "Springboard diving"}]}, "c689s-L_nDM": {"duration": 21.154, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c689s-L_nDM", "annotations": []}, "Y_nyVFn70Lk": {"duration": 119.7, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y_nyVFn70Lk", "annotations": []}, "vAHR3iJhBXU": {"duration": 93.53, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=vAHR3iJhBXU", "annotations": []}, "Vq0-j_C-kZc": {"duration": 166.557, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vq0-j_C-kZc", "annotations": [{"segment": [9.96013497521944, 134.98057422755588], "label": "Bullfighting"}]}, "5j6j4NJkUXA": {"duration": 67.92, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5j6j4NJkUXA", "annotations": [{"segment": [0.24043609008439681, 29.756361501299352], "label": "Using the pommel horse"}, {"segment": [44.14405274117475, 60.22441353868253], "label": "Using the pommel horse"}]}, "dcmfRooP6dg": {"duration": 237.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dcmfRooP6dg", "annotations": [{"segment": [11.8789781, 196.22653871138846], "label": "Painting"}]}, "pPsqB3tY7hk": {"duration": 98.825, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pPsqB3tY7hk", "annotations": [{"segment": [19.579819230864477, 26.517550454399128], "label": "Shuffleboard"}, {"segment": [46.714056905133354, 60.435347547235224], "label": "Shuffleboard"}, {"segment": [79.86099497313226, 88.57227853186966], "label": "Shuffleboard"}]}, "XxyTLG8B-Ns": {"duration": 144.173, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XxyTLG8B-Ns", "annotations": [{"segment": [28.700004971888962, 111.6609656393456], "label": "Riding bumper cars"}]}, "RbbjYXAxaPI": {"duration": 133.86, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RbbjYXAxaPI", "annotations": [{"segment": [5.42970203161868, 133.86303854875283], "label": "Belly dance"}]}, "insSTTMSC38": {"duration": 177.587, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=insSTTMSC38", "annotations": [{"segment": [8.031371294851795, 95.26868018720748], "label": "Elliptical trainer"}, {"segment": [115.49859126365054, 119.37624180967238], "label": "Elliptical trainer"}, {"segment": [143.47307020280812, 166.18502340093602], "label": "Elliptical trainer"}]}, "lS_bztQjpGM": {"duration": 161.03, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lS_bztQjpGM", "annotations": [{"segment": [14.570611968798753, 129.3769855850234], "label": "Removing curlers"}]}, "N7ppHQNikv8": {"duration": 66.224, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N7ppHQNikv8", "annotations": [{"segment": [0.01, 66.22331065759637], "label": "Building sandcastles"}]}, "uQDTcusxDCg": {"duration": 179.119, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uQDTcusxDCg", "annotations": [{"segment": [0.2, 7.265321475444052], "label": "Futsal"}, {"segment": [14.251207509524871, 27.38467325359681], "label": "Futsal"}, {"segment": [105.34716139393876, 110.93587022120342], "label": "Futsal"}]}, "x-rGfBaFQek": {"duration": 170.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x-rGfBaFQek", "annotations": [{"segment": [0, 170.07192743764173], "label": "Shoveling snow"}]}, "WGUzmra3h_w": {"duration": 10.12, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WGUzmra3h_w", "annotations": []}, "gPVq0g5UZjg": {"duration": 156.81, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gPVq0g5UZjg", "annotations": []}, "HYYQ56oZSDM": {"duration": 214.25, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HYYQ56oZSDM", "annotations": [{"segment": [1.671217733063064, 209.5707023775351], "label": "Tango"}]}, "gRqvh21b8Hw": {"duration": 156.34, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gRqvh21b8Hw", "annotations": [{"segment": [19.02420664989865, 27.80460971908264], "label": "Plataform diving"}, {"segment": [50.73121773306307, 91.7064320559217], "label": "Plataform diving"}]}, "JAZu9CWEVbQ": {"duration": 200.249, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JAZu9CWEVbQ", "annotations": [{"segment": [0, 4.9984122031547935], "label": "Surfing"}, {"segment": [12.808431270584157, 200.248887], "label": "Surfing"}]}, "e8gJpLlqzA8": {"duration": 195.61, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=e8gJpLlqzA8", "annotations": [{"segment": [0.01, 193.77360232449297], "label": "Cheerleading"}]}, "mZxvYV53uf0": {"duration": 225.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mZxvYV53uf0", "annotations": [{"segment": [11.256860135629916, 217.74988824859116], "label": "Cumbia"}]}, "33SI8z8PovA": {"duration": 227.51, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=33SI8z8PovA", "annotations": [{"segment": [40.46183930733229, 103.28416875819033], "label": "Washing face"}]}, "Y53B2WdEbAg": {"duration": 12.7, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Y53B2WdEbAg", "annotations": [{"segment": [0.6364471100000001, 12.327623180187208], "label": "Preparing salad"}]}, "CtYhHrsnW3c": {"duration": 216.57, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CtYhHrsnW3c", "annotations": []}, "3knQ7DMA9WI": {"duration": 117.192, "subset": "testing", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=3knQ7DMA9WI", "annotations": []}, "bXrdB5AASAM": {"duration": 179.096, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bXrdB5AASAM", "annotations": [{"segment": [11.176006877009774, 100.58406189308795], "label": "Capoeira"}, {"segment": [101.42226240886369, 171.83110573402527], "label": "Capoeira"}]}, "N1-TZnH0jy4": {"duration": 15.163, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=N1-TZnH0jy4", "annotations": [{"segment": [0, 14.736845346333853], "label": "Having an ice cream"}]}, "OYPyiUC4h68": {"duration": 135.953, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=OYPyiUC4h68", "annotations": [{"segment": [2.1209490327613105, 135.52864319344772], "label": "River tubing"}]}, "_SzFi60-OGA": {"duration": 199.51, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_SzFi60-OGA", "annotations": [{"segment": [128.54277096798157, 194.83722669723113], "label": "Doing karate"}]}, "eH5qh83OcNI": {"duration": 121.51, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eH5qh83OcNI", "annotations": []}, "T60xwc6nKJI": {"duration": 132.26, "subset": "validation", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=T60xwc6nKJI", "annotations": [{"segment": [97.80288243369735, 122.7694410296412], "label": "Polishing forniture"}]}, "VWGV8I5byKI": {"duration": 126.74, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VWGV8I5byKI", "annotations": []}, "Th--fToJeyI": {"duration": 33.85, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Th--fToJeyI", "annotations": []}, "zTHkqpNFGno": {"duration": 105.91, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=zTHkqpNFGno", "annotations": []}, "G1aYtrFJqUc": {"duration": 69.753, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G1aYtrFJqUc", "annotations": [{"segment": [23.778795317355716, 41.02137942950525], "label": "Mooping floor"}, {"segment": [48.50401027062674, 53.92620653230899], "label": "Mooping floor"}]}, "p8uTRV3u3JM": {"duration": 79.83, "subset": "training", "resolution": "464x348", "url": "https://www.youtube.com/watch?v=p8uTRV3u3JM", "annotations": [{"segment": [0.01, 78.95842231201247], "label": "Volleyball"}]}, "8nj8tlvLyUo": {"duration": 190.799, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8nj8tlvLyUo", "annotations": [{"segment": [0.01, 190.741], "label": "Building sandcastles"}]}, "uUzmPV8Vgqg": {"duration": 116.03, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uUzmPV8Vgqg", "annotations": [{"segment": [13.480122166146646, 94.8127542], "label": "Bathing dog"}]}, "mqwC7rqeXsk": {"duration": 91.208, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mqwC7rqeXsk", "annotations": [{"segment": [0.2, 83.83192594288697], "label": "Drum corps"}]}, "5yrLDF_ZmN0": {"duration": 124.529, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5yrLDF_ZmN0", "annotations": [{"segment": [0.2, 120.5316065318818], "label": "Scuba diving"}]}, "pHhcYS_wPys": {"duration": 5.02, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pHhcYS_wPys", "annotations": [{"segment": [1.3497238689547584, 3.2299750390015602], "label": "Javelin throw"}]}, "G5gvb71BB5I": {"duration": 211.441, "subset": "training", "resolution": "400x200", "url": "https://www.youtube.com/watch?v=G5gvb71BB5I", "annotations": [{"segment": [25.977960105237663, 196.64332591239162], "label": "Beach soccer"}]}, "NURIoQoKk1E": {"duration": 192.169, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NURIoQoKk1E", "annotations": [{"segment": [0.01, 191.22183775351016], "label": "Blow-drying hair"}]}, "SHfFrzy_fKM": {"duration": 78.321, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=SHfFrzy_fKM", "annotations": [{"segment": [18.68485647425897, 68.40648985959439], "label": "Camel ride"}]}, "IuY073Pr4E4": {"duration": 42.539, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=IuY073Pr4E4", "annotations": [{"segment": [10.585058026299935, 35.12916366549123], "label": "Using the monkey bar"}]}, "y4Y8gD1mpTw": {"duration": 156.94, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=y4Y8gD1mpTw", "annotations": [{"segment": [96.58987397455083, 114.70812544227769], "label": "Doing nails"}]}, "9fnmcyUz5jg": {"duration": 75.23, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=9fnmcyUz5jg", "annotations": [{"segment": [3.9877597503900155, 74.0278174726989], "label": "Kayaking"}]}, "Cgquef_qgcs": {"duration": 6.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Cgquef_qgcs", "annotations": [{"segment": [0.3222464898595944, 4.88838], "label": "Pole vault"}, {"segment": [5.3438604000000005, 5.377488299531982], "label": "Pole vault"}]}, "uINj6L2EEUs": {"duration": 200.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uINj6L2EEUs", "annotations": [{"segment": [47.238154103034866, 98.85600461297363], "label": "Chopping wood"}, {"segment": [64.44410427301446, 81.96288990063003], "label": "Chopping wood"}, {"segment": [88.53243451098588, 99.48167552824562], "label": "Chopping wood"}, {"segment": [133.2679049529328, 173.93651327925116], "label": "Chopping wood"}]}, "0UR4hXxt68E": {"duration": 207.84, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=0UR4hXxt68E", "annotations": [{"segment": [12.321355591638632, 200.38415044305773], "label": "Cheerleading"}]}, "QhDPo3amF6o": {"duration": 113.987, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QhDPo3amF6o", "annotations": [{"segment": [2.13391745465737, 112.20849147581903], "label": "Riding bumper cars"}]}, "8kGiDHAnTqY": {"duration": 174.057, "subset": "training", "resolution": "543x360", "url": "https://www.youtube.com/watch?v=8kGiDHAnTqY", "annotations": [{"segment": [33.121001560062396, 98.54855382215287], "label": "Fun sliding down"}, {"segment": [114.83757098283931, 118.63834165366613], "label": "Fun sliding down"}, {"segment": [164.24758970358815, 174.021], "label": "Fun sliding down"}]}, "KRGiJIHSd9E": {"duration": 236.84, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KRGiJIHSd9E", "annotations": [{"segment": [11.636653633385334, 225.16001236661464], "label": "Getting a tattoo"}]}, "JhoGvKrhs8s": {"duration": 96.74000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JhoGvKrhs8s", "annotations": [{"segment": [2.8181986, 89.50115512792512], "label": "Playing bagpipes"}]}, "aj6uPa3mnn4": {"duration": 219.174, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=aj6uPa3mnn4", "annotations": []}, "DYahQgHG1Gk": {"duration": 112.803, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DYahQgHG1Gk", "annotations": [{"segment": [20.574659906396256, 53.63479719188768], "label": "Playing drums"}, {"segment": [62.07568330733229, 82.84089079563182], "label": "Playing drums"}, {"segment": [92.51445553822153, 100.07742433697348], "label": "Playing drums"}]}, "al_NNsjwU-Q": {"duration": 55.66, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=al_NNsjwU-Q", "annotations": [{"segment": [0.01, 55.65823129251701], "label": "Using the balance beam"}]}, "MBouLt-hXDU": {"duration": 234.174, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MBouLt-hXDU", "annotations": [{"segment": [61.00085335413416, 104.83380187207487], "label": "Making a cake"}]}, "WGEKoGRIJGk": {"duration": 134.35, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=WGEKoGRIJGk", "annotations": [{"segment": [63.48503884399376, 91.35140902964119], "label": "Playing flauta"}]}, "qfCqVXhNPE8": {"duration": 218.38, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qfCqVXhNPE8", "annotations": [{"segment": [124.93203576127657, 142.2802713874448], "label": "Removing curlers"}]}, "PeMJjLrDjhQ": {"duration": 189.87, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=PeMJjLrDjhQ", "annotations": [{"segment": [28.58410830582883, 107.6717447582257], "label": "Washing hands"}]}, "IkjulgI7gzg": {"duration": 143.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IkjulgI7gzg", "annotations": [{"segment": [24.686320592823712, 133.26145007800312], "label": "Snatch"}]}, "8fLHaW3YvJY": {"duration": 213.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8fLHaW3YvJY", "annotations": [{"segment": [78.85116614664587, 185.54196177847115], "label": "Shoveling snow"}]}, "i5kuZ8zlhMg": {"duration": 203.06, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=i5kuZ8zlhMg", "annotations": [{"segment": [85.67000858034321, 93.5877542901716], "label": "Playing water polo"}, {"segment": [100.87208034321372, 104.67259828393135], "label": "Playing water polo"}]}, "7SxEQiFHGm8": {"duration": 185.481, "subset": "validation", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=7SxEQiFHGm8", "annotations": [{"segment": [0.01, 178.04812081791388], "label": "Futsal"}]}, "AOteP9srRpw": {"duration": 11.1, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AOteP9srRpw", "annotations": [{"segment": [0.021927794447569086, 10.703890411269999], "label": "Windsurfing"}]}, "RRFC2NrZSf4": {"duration": 52.849, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RRFC2NrZSf4", "annotations": [{"segment": [0.2, 51.09321028037383], "label": "Playing congas"}]}, "WyMTj1vWm10": {"duration": 39.61, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WyMTj1vWm10", "annotations": []}, "6mes_Rv4QqU": {"duration": 2.624, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6mes_Rv4QqU", "annotations": []}, "djQnCU5MNJg": {"duration": 8.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=djQnCU5MNJg", "annotations": [{"segment": [0.2, 8.15918367346939], "label": "Swinging at the playground"}]}, "wr_1p7obP-g": {"duration": 218.663, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wr_1p7obP-g", "annotations": [{"segment": [34.53629482344446, 199.72468473777468], "label": "Laying tile"}]}, "6Ke30NtYOC0": {"duration": 79.99, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6Ke30NtYOC0", "annotations": [{"segment": [13.033970358814353, 21.88958658346334], "label": "Playing guitarra"}, {"segment": [31.618291731669267, 69.53529641185648], "label": "Playing guitarra"}]}, "4Lu8ECLHvK4": {"duration": 124.23, "subset": "validation", "resolution": "408x720", "url": "https://www.youtube.com/watch?v=4Lu8ECLHvK4", "annotations": [{"segment": [0.01, 124.22675736961452], "label": "Playing kickball"}]}, "P49Ci0Ph8eU": {"duration": 54.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P49Ci0Ph8eU", "annotations": [{"segment": [7.756515182527302, 45.94243608112325], "label": "Using parallel bars"}]}, "EhxFw5Eb1VE": {"duration": 67.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EhxFw5Eb1VE", "annotations": [{"segment": [2.458456826053042, 57.17219810374416], "label": "Using parallel bars"}]}, "UpnFBPGFgnw": {"duration": 122.253, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UpnFBPGFgnw", "annotations": [{"segment": [30.888926677067083, 36.41842589703588], "label": "Changing car wheel"}, {"segment": [52.0535616224649, 54.34163026521061], "label": "Changing car wheel"}, {"segment": [66.73533541341654, 76.84097191887676], "label": "Changing car wheel"}]}, "X6CpfuJLx0U": {"duration": 100.89, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X6CpfuJLx0U", "annotations": [{"segment": [51.5527722, 76.7994858], "label": "Playing accordion"}, {"segment": [22.942386, 40.4609256], "label": "Playing accordion"}]}, "UtqKByc2YH8": {"duration": 12.307, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UtqKByc2YH8", "annotations": []}, "9G172wz5Yqo": {"duration": 207.308, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9G172wz5Yqo", "annotations": []}, "GE2q5qDJ-xU": {"duration": 122.14, "subset": "validation", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=GE2q5qDJ-xU", "annotations": [{"segment": [0.01, 122.1368253968254], "label": "Walking the dog"}]}, "jbrcRk6BtXw": {"duration": 50.434, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jbrcRk6BtXw", "annotations": []}, "YgDcGyka5_o": {"duration": 173.27, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YgDcGyka5_o", "annotations": []}, "BB2vrBWgXkU": {"duration": 196.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BB2vrBWgXkU", "annotations": [{"segment": [81.15390898127924, 173.81357065366615], "label": "Doing nails"}]}, "Zev1KxIua7o": {"duration": 168.577, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zev1KxIua7o", "annotations": [{"segment": [0.2, 163.33341820203975], "label": "Drum corps"}]}, "FiFzHgBjryA": {"duration": 157.76, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FiFzHgBjryA", "annotations": [{"segment": [12.83865423435437, 153.0814128013326], "label": "Vacuuming floor"}]}, "EETygPQ-j08": {"duration": 41.146, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EETygPQ-j08", "annotations": []}, "A0XGYLim9IU": {"duration": 64.761, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A0XGYLim9IU", "annotations": [{"segment": [7.678306500967522, 56.47596360062402], "label": "Croquet"}]}, "S7CW-si2JjA": {"duration": 190.891, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=S7CW-si2JjA", "annotations": []}, "-CEi03j4-Bw": {"duration": 10.264, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-CEi03j4-Bw", "annotations": [{"segment": [0.01, 10.263219954648527], "label": "Building sandcastles"}]}, "4j7_nh61bvo": {"duration": 111.224, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4j7_nh61bvo", "annotations": [{"segment": [11.260947753971786, 15.072345455316082], "label": "Sumo"}, {"segment": [40.19292121417622, 46.776243785046724], "label": "Sumo"}, {"segment": [78.48014235046729, 86.62267367601245], "label": "Sumo"}, {"segment": [101.86826424299065, 103.77396306386294], "label": "Sumo"}]}, "TOBHIXCu4Ic": {"duration": 123.62, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TOBHIXCu4Ic", "annotations": [{"segment": [7.036641965678627, 107.86304602184089], "label": "Cheerleading"}]}, "dB4SbhaQQV8": {"duration": 22.57, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=dB4SbhaQQV8", "annotations": [{"segment": [5.0702813354134175, 20.914910508580345], "label": "Playing kickball"}]}, "_CYdiU1sMwc": {"duration": 24.056, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_CYdiU1sMwc", "annotations": [{"segment": [1.7262908274570983, 24.0554874], "label": "Blowing leaves"}]}, "jmPV_y8YntQ": {"duration": 160.984, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jmPV_y8YntQ", "annotations": [{"segment": [38.41016068642745, 157.17482995319813], "label": "Rope skipping"}]}, "IfKGdI5egKc": {"duration": 112.55, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IfKGdI5egKc", "annotations": [{"segment": [19.752809704224905, 98.58846791911077], "label": "Bathing dog"}]}, "vSAHWgFjQAY": {"duration": 157.99, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=vSAHWgFjQAY", "annotations": [{"segment": [17.51770944, 110.19663], "label": "Cumbia"}]}, "64UBH371Jj8": {"duration": 54.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=64UBH371Jj8", "annotations": [{"segment": [7.589773572542902, 26.14255341653666], "label": "Washing dishes"}, {"segment": [36.76823641809673, 40.22579993447738], "label": "Washing dishes"}, {"segment": [46.04462634009361, 48.15289677691108], "label": "Washing dishes"}]}, "A-SbpY2MpCM": {"duration": 233.802, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A-SbpY2MpCM", "annotations": []}, "HOTCR1uIaBM": {"duration": 173.988, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HOTCR1uIaBM", "annotations": [{"segment": [38.257193447737905, 83.30715600624025], "label": "Baking cookies"}]}, "vr0Wi359kWM": {"duration": 172.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vr0Wi359kWM", "annotations": [{"segment": [64.65652455538222, 163.2577245023401], "label": "Cleaning shoes"}]}, "YnvefOl6a7k": {"duration": 145.404, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YnvefOl6a7k", "annotations": [{"segment": [36.953702028081125, 134.4574305772231], "label": "Decorating the Christmas tree"}]}, "K98WGaMR4eM": {"duration": 105.395, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K98WGaMR4eM", "annotations": [{"segment": [0.01, 104.27784978312793], "label": "Doing a powerbomb"}]}, "fM5egP_1DvQ": {"duration": 142.32, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=fM5egP_1DvQ", "annotations": []}, "krUhklOpyQU": {"duration": 74.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=krUhklOpyQU", "annotations": [{"segment": [36.40027853120125, 64.13382407878315], "label": "Doing karate"}]}, "aljYWkDQzN8": {"duration": 60.326, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aljYWkDQzN8", "annotations": [{"segment": [6.39957632449298, 59.19608100156006], "label": "Skiing"}]}, "YRMbCxetWtg": {"duration": 82.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YRMbCxetWtg", "annotations": [{"segment": [7.972164960000001, 52.10857344], "label": "Playing racquetball"}]}, "gr53ih2aFkE": {"duration": 134.49, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gr53ih2aFkE", "annotations": []}, "M_OPqcaCDYI": {"duration": 190.451, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=M_OPqcaCDYI", "annotations": [{"segment": [58.52119074937218, 181.33478601925583], "label": "Rafting"}]}, "YaC68rE4lwA": {"duration": 51.595, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YaC68rE4lwA", "annotations": []}, "TsPh4-BsCYk": {"duration": 161.32999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TsPh4-BsCYk", "annotations": [{"segment": [43.0018067, 114.1542973], "label": "Hammer throw"}]}, "c-aDlEaZmFk": {"duration": 58.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c-aDlEaZmFk", "annotations": [{"segment": [3.627200946957878, 51.522989579999994], "label": "Using uneven bars"}]}, "L_8Gyi8FMk4": {"duration": 19.691, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=L_8Gyi8FMk4", "annotations": [{"segment": [0.21502907956318254, 2.518912074882995], "label": "Peeling potatoes"}]}, "xVQkf_TrX7U": {"duration": 139.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xVQkf_TrX7U", "annotations": [{"segment": [46.58623088923557, 65.55485192], "label": "Making a sandwich"}, {"segment": [75.55018252730109, 121.9253666146646], "label": "Making a sandwich"}]}, "qK2iRSxxRQU": {"duration": 236.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qK2iRSxxRQU", "annotations": [{"segment": [1.8452768314814227, 232.13582363806552], "label": "Belly dance"}]}, "8YTuNZ-mIaY": {"duration": 225.35, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8YTuNZ-mIaY", "annotations": [{"segment": [44.12071679737938, 211.8145954602184], "label": "Paintball"}]}, "5-ydqbn30kA": {"duration": 30.488, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5-ydqbn30kA", "annotations": [{"segment": [0.2, 30.207], "label": "Knitting"}]}, "MzmOJ324rcM": {"duration": 52.454, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=MzmOJ324rcM", "annotations": []}, "XkOO6lc5vxw": {"duration": 70.427, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XkOO6lc5vxw", "annotations": [{"segment": [15.637429797191887, 29.903155226209044], "label": "Trimming branches or hedges"}, {"segment": [40.328108424336975, 47.570707488299526], "label": "Trimming branches or hedges"}]}, "QvBG_Da97h8": {"duration": 58.143, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QvBG_Da97h8", "annotations": [{"segment": [5.801035881435258, 53.04334633385336], "label": "Hand car wash"}]}, "Tv3v-UpBJ2Y": {"duration": 238.26, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Tv3v-UpBJ2Y", "annotations": [{"segment": [0.01, 238.201], "label": "Tug of war"}]}, "b4b6YkxsHk4": {"duration": 7.942, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=b4b6YkxsHk4", "annotations": [{"segment": [0.2, 6.150700468018721], "label": "Shuffleboard"}]}, "Pv4FwH2TC6Y": {"duration": 35.39, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Pv4FwH2TC6Y", "annotations": []}, "5Ra6milBrOM": {"duration": 234.963, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5Ra6milBrOM", "annotations": [{"segment": [128.58448560507944, 222.2601491564813], "label": "Rock-paper-scissors"}]}, "o7pWqZoV_LM": {"duration": 126.851, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=o7pWqZoV_LM", "annotations": []}, "RWjCwIOuzrc": {"duration": 207.424, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RWjCwIOuzrc", "annotations": [{"segment": [29.032883007369612, 44.51708990319171], "label": "Hanging wallpaper"}, {"segment": [74.19515312018405, 83.87278243007285], "label": "Hanging wallpaper"}, {"segment": [164.5196933458129, 183.8749503032087], "label": "Hanging wallpaper"}]}, "mUmSJps9WJI": {"duration": 109.25, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=mUmSJps9WJI", "annotations": []}, "reJ45Z3HU9s": {"duration": 35.201, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=reJ45Z3HU9s", "annotations": []}, "b_mWxzi_aps": {"duration": 99.173, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b_mWxzi_aps", "annotations": []}, "6uNhwOSZXco": {"duration": 161.495, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6uNhwOSZXco", "annotations": [{"segment": [27.204037441497665, 51.13351482059283], "label": "Snow tubing"}, {"segment": [98.74058034321374, 124.93706084243371], "label": "Snow tubing"}, {"segment": [128.9672886115445, 146.85142433697348], "label": "Snow tubing"}]}, "fO2fWjYEhMc": {"duration": 26.216, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fO2fWjYEhMc", "annotations": [{"segment": [0.2, 26.2153287], "label": "Croquet"}]}, "XxbuqeqOGaM": {"duration": 86.495, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XxbuqeqOGaM", "annotations": [{"segment": [17.33926535564824, 45.81074387737414], "label": "Sharpening knives"}]}, "M1hPI8WvySo": {"duration": 223.32999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M1hPI8WvySo", "annotations": [{"segment": [46.154166528861154, 215.09583269110766], "label": "Wrapping presents"}]}, "DCCD1fufXfY": {"duration": 52.13, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=DCCD1fufXfY", "annotations": [{"segment": [11.873283273010921, 29.276588892355694], "label": "Drinking beer"}]}, "Z9gstJONME4": {"duration": 35.295, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Z9gstJONME4", "annotations": [{"segment": [7.601625063618575, 11.120062945238512], "label": "Doing crunches"}, {"segment": [15.463134705363123, 20.575864752092098], "label": "Doing crunches"}, {"segment": [25.908497166422315, 31.790885499755646], "label": "Doing crunches"}]}, "5ICSEb7X5-k": {"duration": 59.049, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5ICSEb7X5-k", "annotations": []}, "rgfxN6n4ZLo": {"duration": 103.78999999999999, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rgfxN6n4ZLo", "annotations": []}, "AG2g8djPKV0": {"duration": 44.44, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=AG2g8djPKV0", "annotations": [{"segment": [5.4080395881435255, 37.85627711700467], "label": "Hand washing clothes"}]}, "DyVqsDJYXdM": {"duration": 212.18, "subset": "training", "resolution": "462x360", "url": "https://www.youtube.com/watch?v=DyVqsDJYXdM", "annotations": [{"segment": [3.805884023400936, 83.89492173322932], "label": "Cricket"}, {"segment": [85.21870748049922, 207.99983553978157], "label": "Cricket"}]}, "IROb83YwQ8Y": {"duration": 144.73, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IROb83YwQ8Y", "annotations": [{"segment": [20.09511385625493, 107.47499096154324], "label": "Shaving"}]}, "61sN9tqZHwk": {"duration": 81.757, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=61sN9tqZHwk", "annotations": [{"segment": [18.608964118564742, 57.35639625585023], "label": "Grooming dog"}]}, "Po8gmt7hVTY": {"duration": 92.09, "subset": "validation", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=Po8gmt7hVTY", "annotations": [{"segment": [11.636983667101786, 84.61954690639627], "label": "Preparing pasta"}]}, "3JgFuceFs3c": {"duration": 61.719, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3JgFuceFs3c", "annotations": []}, "erXZ0pZEtGE": {"duration": 61.069, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=erXZ0pZEtGE", "annotations": [{"segment": [0.01, 61.06848], "label": "Powerbocking"}]}, "pMDFkrK0KRc": {"duration": 185.087, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pMDFkrK0KRc", "annotations": [{"segment": [97.8681448374202, 163.20952590328565], "label": "Putting in contact lenses"}]}, "E4ItlgO16LI": {"duration": 85.53999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E4ItlgO16LI", "annotations": [{"segment": [34.02994851014041, 78.3356069625585], "label": "Using parallel bars"}]}, "uRwr0x--Ai4": {"duration": 96.18, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uRwr0x--Ai4", "annotations": []}, "J2gGPC98yec": {"duration": 21.9, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=J2gGPC98yec", "annotations": [{"segment": [7.093741029641186, 21.861], "label": "Shot put"}]}, "iYegrB5DT7Y": {"duration": 105.72, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iYegrB5DT7Y", "annotations": []}, "DozJpP2cClI": {"duration": 118.68, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=DozJpP2cClI", "annotations": [{"segment": [1.4811400686427458, 45.91534212792512], "label": "Playing badminton"}]}, "E33xUgVqEH0": {"duration": 32.485, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=E33xUgVqEH0", "annotations": [{"segment": [0.2, 27.27878159126365], "label": "Cleaning sink"}]}, "Ca0dN5O5ogM": {"duration": 71.56, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Ca0dN5O5ogM", "annotations": []}, "IKWlTZnjebw": {"duration": 153.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IKWlTZnjebw", "annotations": []}, "DbF6oI705UA": {"duration": 90.28, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DbF6oI705UA", "annotations": [{"segment": [10.844766609712009, 14.084112480145466], "label": "Cheerleading"}, {"segment": [16.760093851373107, 20.562804221012378], "label": "Cheerleading"}, {"segment": [38.02710369639276, 41.68897294123058], "label": "Cheerleading"}, {"segment": [44.22411318765676, 59.85747804061823], "label": "Cheerleading"}, {"segment": [63.37850616065459, 82.11037575924806], "label": "Cheerleading"}]}, "ICzctKJ6nqc": {"duration": 209.352, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ICzctKJ6nqc", "annotations": [{"segment": [30.404294193802592, 41.813312885391376], "label": "Hula hoop"}, {"segment": [95.9246586797839, 124.28421942744745], "label": "Hula hoop"}]}, "DAd-joUpMIk": {"duration": 100.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DAd-joUpMIk", "annotations": [{"segment": [10.1855286, 79.33454575507021], "label": "Bungee jumping"}]}, "SqEHpHNuy-w": {"duration": 117.052, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SqEHpHNuy-w", "annotations": [{"segment": [71.03427410920436, 74.86902926677068], "label": "Arm wrestling"}]}, "RIaNmXzBQnE": {"duration": 168.624, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=RIaNmXzBQnE", "annotations": [{"segment": [0.01, 156.25937473946956], "label": "Having an ice cream"}]}, "44FeihJUKvM": {"duration": 59.1, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=44FeihJUKvM", "annotations": [{"segment": [0.01, 59.09478458049887], "label": "Drinking coffee"}]}, "aQHGZzqZLxs": {"duration": 120.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aQHGZzqZLxs", "annotations": [{"segment": [6.289272230889235, 118.74521450858033], "label": "Starting a campfire"}]}, "0e7d39SwHCA": {"duration": 49.436, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0e7d39SwHCA", "annotations": []}, "SIj-ti_70HQ": {"duration": 18.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SIj-ti_70HQ", "annotations": [{"segment": [2.5797441497659905, 17.773154446177845], "label": "Brushing hair"}]}, "5co1E0umtJQ": {"duration": 188.361, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5co1E0umtJQ", "annotations": [{"segment": [16.74439469578783, 175.10033697347893], "label": "Scuba diving"}]}, "aPEqCGdCsp0": {"duration": 213.25, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aPEqCGdCsp0", "annotations": [{"segment": [0, 209.92519675819034], "label": "Getting a tattoo"}]}, "ZkIGGQ9iOSA": {"duration": 48.67, "subset": "validation", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=ZkIGGQ9iOSA", "annotations": [{"segment": [0.20000000000000004, 45.30500024906282], "label": "Doing crunches"}]}, "LvaU_4xbxb0": {"duration": 72.26, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LvaU_4xbxb0", "annotations": []}, "wPCQfs0Rgx0": {"duration": 96.247, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wPCQfs0Rgx0", "annotations": [{"segment": [6.906910907956319, 80.9309778127925], "label": "Baking cookies"}]}, "b7_ok8iTlT8": {"duration": 167.18, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b7_ok8iTlT8", "annotations": [{"segment": [65.9777425897036, 167.141], "label": "Preparing salad"}]}, "ykcLgz3DlYg": {"duration": 133.167, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ykcLgz3DlYg", "annotations": [{"segment": [68.14132864898595, 109.06767542901716], "label": "Making a cake"}]}, "gsQ-OEvrxjo": {"duration": 57.632, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gsQ-OEvrxjo", "annotations": []}, "2Ua6E2a6kT4": {"duration": 44.676, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2Ua6E2a6kT4", "annotations": [{"segment": [10.095257410296412, 38.98858034321373], "label": "Painting fence"}]}, "apPeChgjzs4": {"duration": 25.891, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=apPeChgjzs4", "annotations": [{"segment": [0.2, 25.690248], "label": "Playing congas"}]}, "awag66wjX4k": {"duration": 161.217, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=awag66wjX4k", "annotations": [{"segment": [26.399227769110766, 143.57933073322934], "label": "Assembling bicycle"}]}, "Lbdp-HCjWw8": {"duration": 73.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Lbdp-HCjWw8", "annotations": [{"segment": [5.935469592933377, 10.729502711388456], "label": "Playing squash"}, {"segment": [17.121546879875194, 19.860994380655228], "label": "Playing squash"}, {"segment": [22.02972365210608, 40.977568865834634], "label": "Playing squash"}, {"segment": [43.71701636661467, 52.962651681747275], "label": "Playing squash"}, {"segment": [58.44154668330734, 65.51845272698908], "label": "Playing squash"}]}, "yEFS8M4pgMU": {"duration": 161.727, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yEFS8M4pgMU", "annotations": [{"segment": [0.01, 161.661], "label": "Tug of war"}]}, "FmaW2KK4wWU": {"duration": 215.342, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FmaW2KK4wWU", "annotations": [{"segment": [161.4226639968455, 204.62499525994403], "label": "Hanging wallpaper"}]}, "1JEQbXJC85w": {"duration": 223.86, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1JEQbXJC85w", "annotations": [{"segment": [51.70339848, 193.61363696879874], "label": "Preparing salad"}]}, "vth3IYGHu5k": {"duration": 161.05, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vth3IYGHu5k", "annotations": [{"segment": [15.83481903276131, 106.06815288611546], "label": "High jump"}]}, "9ddD5Ob93J0": {"duration": 112.455, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=9ddD5Ob93J0", "annotations": [{"segment": [25.08725965678627, 98.7701201872075], "label": "Changing car wheel"}]}, "r96PJ548pn8": {"duration": 143.035, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r96PJ548pn8", "annotations": [{"segment": [0.01, 0.02], "label": "Doing crunches"}]}, "OwSdSL_4sxU": {"duration": 36.22, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OwSdSL_4sxU", "annotations": [{"segment": [1.5257791825273013, 35.997086639625586], "label": "Playing water polo"}]}, "zdWZ4wn1_fQ": {"duration": 143.059, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zdWZ4wn1_fQ", "annotations": []}, "s69uPXLvzIg": {"duration": 92.207, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s69uPXLvzIg", "annotations": [{"segment": [9.637802411856475, 92.20643990929705], "label": "Welding"}]}, "D9EQ6jCH0_M": {"duration": 210.442, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D9EQ6jCH0_M", "annotations": [{"segment": [102.1023426406444, 170.3894399694355], "label": "Painting furniture"}]}, "_mupI51ie9A": {"duration": 69.15, "subset": "training", "resolution": "240x176", "url": "https://www.youtube.com/watch?v=_mupI51ie9A", "annotations": [{"segment": [0.2, 68.94902494331066], "label": "Playing racquetball"}, {"segment": [24.48803154602184, 28.695226393135727], "label": "Playing racquetball"}, {"segment": [34.3048195226209, 47.03428085491419], "label": "Playing racquetball"}, {"segment": [59.11648144149766, 68.949023], "label": "Playing racquetball"}]}, "hCFCYe_Jp-k": {"duration": 99.057, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hCFCYe_Jp-k", "annotations": [{"segment": [13.135394313731732, 98.747257900156], "label": "Baton twirling"}]}, "F1-PnXa9SwQ": {"duration": 33.066, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=F1-PnXa9SwQ", "annotations": [{"segment": [11.967441310452418, 32.86521541950113], "label": "Cutting the grass"}]}, "uRBE-ANigvs": {"duration": 98.5, "subset": "training", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=uRBE-ANigvs", "annotations": [{"segment": [8.758841152886115, 22.742254221528864], "label": "Discus throw"}, {"segment": [34.26704521216848, 49.3261054399376], "label": "Discus throw"}, {"segment": [56.240980034321375, 76.37094829797192], "label": "Discus throw"}]}, "nGOEmEDesPI": {"duration": 208.632, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nGOEmEDesPI", "annotations": [{"segment": [21.156059303596635, 31.896827873114923], "label": "Snowboarding"}, {"segment": [43.61402994895306, 64.7700892525497], "label": "Snowboarding"}, {"segment": [73.5579908094283, 99.59621764462416], "label": "Snowboarding"}, {"segment": [111.3134197204623, 139.6299914037378], "label": "Snowboarding"}, {"segment": [145.81407027709682, 164.0408290617339], "label": "Snowboarding"}, {"segment": [179.66376385647425, 195.93765551014042], "label": "Snowboarding"}]}, "TM4-Miytfv4": {"duration": 24.08, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=TM4-Miytfv4", "annotations": [{"segment": [3.4101575663026527, 23.12161778471139], "label": "High jump"}]}, "UfjR8ewF8xo": {"duration": 213.76, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UfjR8ewF8xo", "annotations": [{"segment": [22.66394383775351, 71.99135101404056], "label": "Layup drill in basketball"}, {"segment": [71.65805772230888, 197.97621528861154], "label": "Layup drill in basketball"}]}, "q53Ajkll_kw": {"duration": 208.1, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=q53Ajkll_kw", "annotations": [{"segment": [93.36896196000001, 202.48361934477379], "label": "Playing bagpipes"}, {"segment": [4.542901716068643, 91.83151326053043], "label": "Playing bagpipes"}]}, "WnLYDJ8Jwv8": {"duration": 134.33, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=WnLYDJ8Jwv8", "annotations": [{"segment": [91.75519188767552, 134.261], "label": "Washing face"}]}, "mDqiAEB4Ads": {"duration": 186.317, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mDqiAEB4Ads", "annotations": [{"segment": [58.242141636126746, 80.55380952881725], "label": "Laying tile"}, {"segment": [101.41666781808631, 141.11405095183437], "label": "Laying tile"}, {"segment": [167.48238573410495, 172.40833838573795], "label": "Laying tile"}]}, "yfjnahzAPSc": {"duration": 207.73, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yfjnahzAPSc", "annotations": [{"segment": [14.906957736812874, 200.2717351825273], "label": "Cricket"}]}, "UwGJfYcU5nk": {"duration": 186.83, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=UwGJfYcU5nk", "annotations": []}, "yWCEDAQvhzA": {"duration": 171.13, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yWCEDAQvhzA", "annotations": [{"segment": [6.138631825273011, 93.69181123244931], "label": "Doing karate"}, {"segment": [102.50044305772231, 154.28452106084242], "label": "Doing karate"}]}, "j_r7zJWxI8w": {"duration": 34.62, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j_r7zJWxI8w", "annotations": []}, "5Q_FrGFVGNY": {"duration": 143.2, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5Q_FrGFVGNY", "annotations": [{"segment": [96.72764568, 97.93956716000001], "label": "Cumbia"}, {"segment": [22.7575102, 138.28412604290173], "label": "Cumbia"}]}, "lZmfH8ksz8A": {"duration": 160.358, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lZmfH8ksz8A", "annotations": []}, "FtbrPGaINt0": {"duration": 85.17, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FtbrPGaINt0", "annotations": [{"segment": [13.6680816, 45.6110901], "label": "Plataform diving"}]}, "Jdu5AqaWfz8": {"duration": 110.13, "subset": "testing", "resolution": "1200x720", "url": "https://www.youtube.com/watch?v=Jdu5AqaWfz8", "annotations": []}, "61iCOJ3Meus": {"duration": 7.152, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=61iCOJ3Meus", "annotations": [{"segment": [1.1045598395364387, 4.317824827278805], "label": "Doing a powerbomb"}]}, "P1gGM89_T2g": {"duration": 63.955, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P1gGM89_T2g", "annotations": [{"segment": [0.01, 63.941], "label": "Scuba diving"}]}, "QylENMzsW9w": {"duration": 64.506, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QylENMzsW9w", "annotations": [{"segment": [2.6164288367453064, 61.687341420187416], "label": "Mooping floor"}]}, "kUQ4bTeoG-Y": {"duration": 137.161, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kUQ4bTeoG-Y", "annotations": [{"segment": [60.34175109045178, 63.55141860046801], "label": "Running a marathon"}]}, "029IFU5pBqI": {"duration": 222.076, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=029IFU5pBqI", "annotations": []}, "5ayMRPi7Lg4": {"duration": 21.46, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=5ayMRPi7Lg4", "annotations": [{"segment": [1.6386955, 21.365], "label": "Springboard diving"}]}, "SIf4H2dqbpg": {"duration": 100.566, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SIf4H2dqbpg", "annotations": [{"segment": [0.01, 100.565645], "label": "Decorating the Christmas tree"}]}, "OJJMEEsOdPQ": {"duration": 12.38, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OJJMEEsOdPQ", "annotations": [{"segment": [0, 12.376234], "label": "Using uneven bars"}]}, "j8i-9T0UeRQ": {"duration": 133.701, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j8i-9T0UeRQ", "annotations": [{"segment": [46.1079293660286, 56.52074080420719], "label": "Hurling"}, {"segment": [83.59405054347152, 86.92615020368868], "label": "Hurling"}, {"segment": [92.96558083783228, 105.25269686574887], "label": "Hurling"}]}, "tFjGMdff3WM": {"duration": 123.089, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tFjGMdff3WM", "annotations": [{"segment": [17.907232115292476, 27.493544654233283], "label": "Welding"}, {"segment": [36.88813094239528, 43.79027597043266], "label": "Welding"}, {"segment": [55.67730351871926, 60.853912289747306], "label": "Welding"}, {"segment": [72.93266608881272, 76.76719110438906], "label": "Welding"}, {"segment": [88.46249240189684, 93.44737492214605], "label": "Welding"}, {"segment": [104.5674974673174, 109.55237998756661], "label": "Welding"}]}, "mi_zRluISrE": {"duration": 164.932, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mi_zRluISrE", "annotations": [{"segment": [18.783132081745855, 143.57517236817472], "label": "Laying tile"}]}, "xZUa0OQ8l0Q": {"duration": 194.096, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xZUa0OQ8l0Q", "annotations": []}, "-2dxp-mv2zo": {"duration": 126.456, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-2dxp-mv2zo", "annotations": [{"segment": [41.82315891419657, 50.10887907644306], "label": "Baking cookies"}]}, "_QyQSAtMdj8": {"duration": 183.972, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_QyQSAtMdj8", "annotations": [{"segment": [24.675828393135724, 99.56409828393134], "label": "Snowboarding"}]}, "x18x9BKMAlk": {"duration": 205.89, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x18x9BKMAlk", "annotations": [{"segment": [14.611344230109205, 27.456482014820594], "label": "Tai chi"}, {"segment": [46.40306024726989, 55.07352825195008], "label": "Tai chi"}, {"segment": [173.56992431591266, 188.34183276833073], "label": "Tai chi"}]}, "9ntzCNcFTKM": {"duration": 92.56, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9ntzCNcFTKM", "annotations": [{"segment": [2.021476859074363, 88.22302396255851], "label": "Doing karate"}]}, "MmOVjM5-D-U": {"duration": 93.159, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MmOVjM5-D-U", "annotations": [{"segment": [0.2, 67.28918188923556], "label": "Grooming dog"}]}, "81LATpIw6zE": {"duration": 40.31, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=81LATpIw6zE", "annotations": []}, "qX7UMKN6ygM": {"duration": 87.633, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=qX7UMKN6ygM", "annotations": [{"segment": [0.19999999999999998, 77.75883177570094], "label": "Beach soccer"}]}, "kgzplQb0__4": {"duration": 196.674, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=kgzplQb0__4", "annotations": []}, "F4eF_nO2UFs": {"duration": 215.017, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F4eF_nO2UFs", "annotations": [{"segment": [26.830702028081124, 203.2425678627145], "label": "Assembling bicycle"}]}, "dLRZeBxrGIU": {"duration": 67.11, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dLRZeBxrGIU", "annotations": []}, "28_jzEjO7lM": {"duration": 122.323, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=28_jzEjO7lM", "annotations": []}, "Qo3riKtRg2c": {"duration": 10.821, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Qo3riKtRg2c", "annotations": [{"segment": [5.806943787831512, 9.301238450858033], "label": "Playing ten pins"}]}, "sWEbq5Ry63Q": {"duration": 234.2, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=sWEbq5Ry63Q", "annotations": [{"segment": [71.61077483099324, 227.6199613151326], "label": "Fixing bicycle"}]}, "YODfHuzK2As": {"duration": 165.837, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YODfHuzK2As", "annotations": [{"segment": [24.569726989079566, 62.070889235569425], "label": "Throwing darts"}, {"segment": [69.8297503900156, 72.67466614664586], "label": "Throwing darts"}, {"segment": [112.77571918876754, 116.65561778471138], "label": "Throwing darts"}, {"segment": [123.12211544461779, 126.22603432137284], "label": "Throwing darts"}, {"segment": [136.5724305772231, 140.96964898595942], "label": "Throwing darts"}, {"segment": [147.69480655226207, 151.83336505460218], "label": "Throwing darts"}]}, "ZOczr9WCVig": {"duration": 226.186, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZOczr9WCVig", "annotations": [{"segment": [0.01, 217.71645752418098], "label": "Laying tile"}]}, "Gg8Ha8OPtZ0": {"duration": 21.433, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gg8Ha8OPtZ0", "annotations": [{"segment": [1.4690234009360374, 12.987502340093602], "label": "Playing ten pins"}]}, "6EKVtCJ2nrs": {"duration": 62.09, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6EKVtCJ2nrs", "annotations": [{"segment": [21.6700309, 51.3416001], "label": "Bungee jumping"}, {"segment": [4.667305300000001, 16.9418774], "label": "Bungee jumping"}]}, "ZJCbT3300OA": {"duration": 124.877, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZJCbT3300OA", "annotations": [{"segment": [27.46895037098284, 123.90249954570982], "label": "Snow tubing"}]}, "Uw_0h2UrfyY": {"duration": 139.04, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Uw_0h2UrfyY", "annotations": [{"segment": [19.07183775351014, 117.91560686427458], "label": "Ballet"}]}, "NbdAUGbrWrE": {"duration": 130.82, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=NbdAUGbrWrE", "annotations": [{"segment": [0, 127.75990736661466], "label": "Brushing hair"}]}, "rhDxMg72ofo": {"duration": 214.785, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rhDxMg72ofo", "annotations": [{"segment": [34.177842638377534, 193.00428784024962], "label": "Blow-drying hair"}]}, "orwTrxIwCpo": {"duration": 132.192, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=orwTrxIwCpo", "annotations": [{"segment": [0, 40.111058896777635], "label": "Hand car wash"}, {"segment": [43.20445675514095, 58.05276647528486], "label": "Hand car wash"}, {"segment": [83.41862891386404, 91.25523625975039], "label": "Hand car wash"}, {"segment": [105.07241327535102, 129.20091642199688], "label": "Hand car wash"}]}, "3dHFCjBbGKA": {"duration": 60.233, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=3dHFCjBbGKA", "annotations": []}, "sN1MPx9hb1I": {"duration": 36.827, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sN1MPx9hb1I", "annotations": [{"segment": [0.2, 8.894968798751949], "label": "Knitting"}, {"segment": [9.583611544461776, 34.432137285491415], "label": "Knitting"}]}, "79gvzh0wn0Q": {"duration": 90.77, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=79gvzh0wn0Q", "annotations": [{"segment": [0, 60.32239816848674], "label": "Doing karate"}]}, "lq20hEghHtU": {"duration": 187.548, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lq20hEghHtU", "annotations": [{"segment": [0.2, 152.54646546539837], "label": "Camel ride"}]}, "18QVPZgjy8w": {"duration": 132.38, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=18QVPZgjy8w", "annotations": [{"segment": [62.731662639999996, 74.5806384], "label": "Playing badminton"}, {"segment": [86.5209536, 112.09864432], "label": "Playing badminton"}]}, "kTStFRAp99E": {"duration": 10.75, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=kTStFRAp99E", "annotations": [{"segment": [2.730698907956318, 9.731659906396255], "label": "Shaving legs"}]}, "r3dM-5cZ7e8": {"duration": 67.269, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r3dM-5cZ7e8", "annotations": [{"segment": [16.47598829797192, 67.268207], "label": "Swimming"}]}, "v1Iv-ySc2LM": {"duration": 39.405, "subset": "training", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=v1Iv-ySc2LM", "annotations": [{"segment": [0.01, 39.404263], "label": "Mooping floor"}]}, "vlwOSDuiKQA": {"duration": 201.97, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vlwOSDuiKQA", "annotations": [{"segment": [9.294900753853282, 190.46669510862068], "label": "Playing racquetball"}]}, "PMDpnLURLp8": {"duration": 91.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PMDpnLURLp8", "annotations": [{"segment": [1.242593, 82.05007020280812], "label": "Pole vault"}]}, "045Tkq12H_c": {"duration": 231.92000000000002, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=045Tkq12H_c", "annotations": [{"segment": [6.14919968798752, 113.21761778471138], "label": "Rock climbing"}, {"segment": [138.53785179407177, 145.77220436817473], "label": "Rock climbing"}, {"segment": [183.7525553822153, 202.20015444617783], "label": "Rock climbing"}, {"segment": [210.1579422776911, 217.0305772230889], "label": "Rock climbing"}]}, "95UgspVYJSM": {"duration": 46.557, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=95UgspVYJSM", "annotations": [{"segment": [0, 31.44891091575663], "label": "Decorating the Christmas tree"}]}, "72_TVgyRzC0": {"duration": 171.062, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=72_TVgyRzC0", "annotations": []}, "yzN9jN3qncA": {"duration": 150.35, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yzN9jN3qncA", "annotations": [{"segment": [0.01, 146.10467706708266], "label": "Playing piano"}]}, "BSwChZlOb0k": {"duration": 30.98, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BSwChZlOb0k", "annotations": []}, "ODx-nocNBos": {"duration": 174.429, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ODx-nocNBos", "annotations": [{"segment": [36.72610764430577, 46.79178159126365], "label": "Sumo"}, {"segment": [82.42970826833073, 91.40720124804993], "label": "Sumo"}, {"segment": [103.10514664586583, 118.88377067082682], "label": "Sumo"}, {"segment": [134.93443993759752, 145.54420436817472], "label": "Sumo"}]}, "MXN34b2OUjQ": {"duration": 24.85, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=MXN34b2OUjQ", "annotations": [{"segment": [8.39785845, 22.334707488299532], "label": "Playing field hockey"}, {"segment": [5.45720425, 6.7911323], "label": "Playing field hockey"}]}, "n9EdHGz7ArI": {"duration": 110.551, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n9EdHGz7ArI", "annotations": []}, "RX8YG2zlyUo": {"duration": 85.47, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RX8YG2zlyUo", "annotations": [{"segment": [28.602671606864273, 75.22435647425897], "label": "Horseback riding"}]}, "H5Z__A99EG4": {"duration": 118.49000000000001, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=H5Z__A99EG4", "annotations": [{"segment": [30.870621796300423, 32.53430756942278], "label": "Smoking hookah"}, {"segment": [64.88376111856475, 68.39598750390014], "label": "Smoking hookah"}]}, "apdhN0kHZwo": {"duration": 154.901, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=apdhN0kHZwo", "annotations": []}, "n0Th8ZqMeGk": {"duration": 214.901, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=n0Th8ZqMeGk", "annotations": [{"segment": [0.2, 2.3468093009434665], "label": "Snowboarding"}, {"segment": [11.063529561590627, 214.900679], "label": "Snowboarding"}]}, "b4xvXZq5uD8": {"duration": 215.71, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b4xvXZq5uD8", "annotations": []}, "03c6QhTMDSs": {"duration": 161.91, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=03c6QhTMDSs", "annotations": []}, "EwEV5_sHGJk": {"duration": 204.07999999999998, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EwEV5_sHGJk", "annotations": [{"segment": [70.04311950078004, 189.11642265210608], "label": "Ironing clothes"}]}, "E2oIRY0ZjJQ": {"duration": 39.87, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=E2oIRY0ZjJQ", "annotations": []}, "lIXHKxbq59c": {"duration": 71.634, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lIXHKxbq59c", "annotations": [{"segment": [23.004190327613106, 69.92546957878315], "label": "Snow tubing"}]}, "HX3ltWwf2xk": {"duration": 42.098, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HX3ltWwf2xk", "annotations": []}, "_rX3VLTtzNg": {"duration": 217.409, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_rX3VLTtzNg", "annotations": []}, "UhNgPK81rKM": {"duration": 32.9, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=UhNgPK81rKM", "annotations": [{"segment": [0.6968643600000001, 32.363106096107266], "label": "Playing bagpipes"}]}, "Zkz4ef53YjA": {"duration": 43.68, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zkz4ef53YjA", "annotations": [{"segment": [19.69190445273011, 36.67430044], "label": "Preparing pasta"}, {"segment": [8.926088177535101, 11.924163595943838], "label": "Preparing pasta"}]}, "xwSeXFkTNlE": {"duration": 162.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xwSeXFkTNlE", "annotations": [{"segment": [19.219463338533544, 94.32710296411855], "label": "Using the pommel horse"}]}, "UojTppgtyCE": {"duration": 171.526, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UojTppgtyCE", "annotations": [{"segment": [70.6255600624025, 159.17503120124806], "label": "Trimming branches or hedges"}]}, "Ck0IhZZI5CA": {"duration": 80.667, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ck0IhZZI5CA", "annotations": [{"segment": [0.01, 80.66612244897959], "label": "Painting fence"}]}, "0NrS_wVpo-o": {"duration": 122.068, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0NrS_wVpo-o", "annotations": []}, "cYLIJC6QjQA": {"duration": 236.728, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cYLIJC6QjQA", "annotations": [{"segment": [11.445383775351015, 21.784990639625587], "label": "Slacklining"}, {"segment": [62.031837753510146, 67.20115756630265], "label": "Slacklining"}, {"segment": [94.52470514820594, 96.74012792511701], "label": "Slacklining"}, {"segment": [120.74054134165367, 223.75770046801873], "label": "Slacklining"}]}, "ACki-MP9qdQ": {"duration": 81.32, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ACki-MP9qdQ", "annotations": [{"segment": [27.52410402, 48.9461248049922], "label": "Mixing drinks"}, {"segment": [56.42752730109205, 59.34400624024961], "label": "Mixing drinks"}]}, "F6FyJl5OQ0A": {"duration": 182.945, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F6FyJl5OQ0A", "annotations": [{"segment": [19.405035881435257, 169.81262870514823], "label": "Wakeboarding"}]}, "Paus1tL8KjE": {"duration": 199.135, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Paus1tL8KjE", "annotations": [{"segment": [59.646937714243265, 193.23122351014038], "label": "Waterskiing"}]}, "LoA7h0bZmDI": {"duration": 188.5, "subset": "training", "resolution": "378x218", "url": "https://www.youtube.com/watch?v=LoA7h0bZmDI", "annotations": [{"segment": [42.09559668, 186.14702101404058], "label": "Preparing pasta"}]}, "OtcRXishIxw": {"duration": 84.103, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OtcRXishIxw", "annotations": [{"segment": [0.01, 84.102675], "label": "Hitting a pinata"}]}, "Mkljhl3D9-Q": {"duration": 148.1, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Mkljhl3D9-Q", "annotations": [{"segment": [66.40801482059283, 74.26148439937597], "label": "Preparing pasta"}]}, "KHOTebb8UC8": {"duration": 148.144, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KHOTebb8UC8", "annotations": [{"segment": [85.9739649994163, 128.72983324960998], "label": "Elliptical trainer"}]}, "yrCBN0tYjPE": {"duration": 52.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yrCBN0tYjPE", "annotations": [{"segment": [16.460214240000003, 49.87392577379095], "label": "Using parallel bars"}]}, "fVzpG-QQ1n8": {"duration": 179.932, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=fVzpG-QQ1n8", "annotations": [{"segment": [13.468530421216848, 19.080418096723868], "label": "Clipping cat claws"}, {"segment": [62.57254758190327, 72.11275663026521], "label": "Clipping cat claws"}]}, "4o4Vh49v-q4": {"duration": 89.351, "subset": "training", "resolution": "482x360", "url": "https://www.youtube.com/watch?v=4o4Vh49v-q4", "annotations": [{"segment": [12.534066025387657, 85.34837666302374], "label": "Gargling mouthwash"}]}, "jBlG0L_M96g": {"duration": 20.922, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jBlG0L_M96g", "annotations": []}, "xZOzVaTezm8": {"duration": 80.968, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=xZOzVaTezm8", "annotations": [{"segment": [45.825776911076446, 67.41312636505461], "label": "Snow tubing"}]}, "dZMMhVx7yY4": {"duration": 164.026, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dZMMhVx7yY4", "annotations": [{"segment": [31.721753510140406, 40.675474258970354], "label": "Rock-paper-scissors"}, {"segment": [45.280244929797185, 52.69904212168487], "label": "Rock-paper-scissors"}, {"segment": [69.83902184087363, 77.76946021840874], "label": "Rock-paper-scissors"}, {"segment": [98.74674882995319, 104.11898127925117], "label": "Rock-paper-scissors"}]}, "5QbiJmDyoM0": {"duration": 97.69, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5QbiJmDyoM0", "annotations": [{"segment": [3.962316816482183, 84.73262115246514], "label": "Using the balance beam"}]}, "P1oGwMXGYVI": {"duration": 37.27, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P1oGwMXGYVI", "annotations": []}, "rZmNsUX-7SU": {"duration": 16.417, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rZmNsUX-7SU", "annotations": [{"segment": [0.01, 16.416], "label": "Swinging at the playground"}]}, "c1T9LbXOVUs": {"duration": 105.07, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=c1T9LbXOVUs", "annotations": [{"segment": [1.802232449297972, 4.587500780031202], "label": "Rock-paper-scissors"}, {"segment": [6.553572542901716, 8.519644305772232], "label": "Rock-paper-scissors"}, {"segment": [12.124109204368175, 13.926341653666146], "label": "Rock-paper-scissors"}, {"segment": [22.282146645865836, 24.739736349453977], "label": "Rock-paper-scissors"}, {"segment": [28.835719188767552, 29.32723712948518], "label": "Rock-paper-scissors"}, {"segment": [46.202686427457095, 48.16875819032761], "label": "Rock-paper-scissors"}, {"segment": [57.99911700468018, 59.47367082683307], "label": "Rock-paper-scissors"}, {"segment": [81.42813884555382, 83.72188923556942], "label": "Rock-paper-scissors"}]}, "tC5eMgePakk": {"duration": 231.23, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tC5eMgePakk", "annotations": []}, "dJgea9sOlBY": {"duration": 20.713, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=dJgea9sOlBY", "annotations": [{"segment": [0.2, 20.301590868595518], "label": "Doing crunches"}]}, "KPv8ZGrb78Y": {"duration": 200.82999999999998, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KPv8ZGrb78Y", "annotations": [{"segment": [1.5665318857652266, 194.56325851170047], "label": "Zumba"}]}, "XgcPVh41wxA": {"duration": 231.82999999999998, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=XgcPVh41wxA", "annotations": []}, "wTVZdpVurlc": {"duration": 172.78, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=wTVZdpVurlc", "annotations": []}, "SXZ5bETIdGk": {"duration": 47.346, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=SXZ5bETIdGk", "annotations": []}, "bz4QwqHiVlo": {"duration": 184.37, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bz4QwqHiVlo", "annotations": []}, "QBy64z68s0w": {"duration": 97.1, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QBy64z68s0w", "annotations": [{"segment": [8.941215288611545, 89.10906084243369], "label": "Using the balance beam"}]}, "pD-zyfLtC6w": {"duration": 93.07, "subset": "training", "resolution": "440x330", "url": "https://www.youtube.com/watch?v=pD-zyfLtC6w", "annotations": [{"segment": [3.7748797659906397, 10.889076248049921], "label": "Doing motocross"}, {"segment": [12.486140764430576, 22.213715546021838], "label": "Doing motocross"}, {"segment": [24.10115542901716, 34.264293260530415], "label": "Doing motocross"}, {"segment": [38.47473607644305, 72.30346628705148], "label": "Doing motocross"}, {"segment": [73.90053080343213, 90.01636365054601], "label": "Doing motocross"}]}, "vr7iD3pjmSU": {"duration": 79.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vr7iD3pjmSU", "annotations": [{"segment": [14.096811299999999, 52.6269315], "label": "Archery"}]}, "AFnEr0_wStU": {"duration": 180.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AFnEr0_wStU", "annotations": [{"segment": [15.167812410455602, 46.626978891400555], "label": "Getting a piercing"}, {"segment": [57.30062466172116, 72.46843707217676], "label": "Getting a piercing"}]}, "VbhCWRXsEHQ": {"duration": 184.251, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VbhCWRXsEHQ", "annotations": [{"segment": [57.200879153533485, 160.10497241653667], "label": "Baking cookies"}]}, "4mzM3JjBJ74": {"duration": 166.139, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4mzM3JjBJ74", "annotations": [{"segment": [46.64302652106084, 117.38495007800311], "label": "Clipping cat claws"}]}, "90LkAH4tZC4": {"duration": 161.449, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=90LkAH4tZC4", "annotations": [{"segment": [0.2, 152.409267464373], "label": "Playing drums"}]}, "6e8GkCHe0Cw": {"duration": 6.53, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=6e8GkCHe0Cw", "annotations": []}, "iVgPP9Aq1Gs": {"duration": 209.189, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iVgPP9Aq1Gs", "annotations": []}, "mNiFmkm5OZA": {"duration": 61.77, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=mNiFmkm5OZA", "annotations": []}, "bRVazNYfpGE": {"duration": 94.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bRVazNYfpGE", "annotations": [{"segment": [50.939366029999995, 90.07335381669267], "label": "Using the balance beam"}, {"segment": [4.057358280031202, 39.212018070000006], "label": "Using the balance beam"}]}, "cC1nvRqyXWs": {"duration": 6.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cC1nvRqyXWs", "annotations": [{"segment": [0, 6.431926], "label": "Ironing clothes"}]}, "ILARi15rMWI": {"duration": 55.543, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ILARi15rMWI", "annotations": [{"segment": [0.2, 55.300999999999995], "label": "Powerbocking"}]}, "dmdllfN9Yv0": {"duration": 126.224, "subset": "testing", "resolution": "1152x720", "url": "https://www.youtube.com/watch?v=dmdllfN9Yv0", "annotations": []}, "n9xULD7oYXc": {"duration": 52.361, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=n9xULD7oYXc", "annotations": [{"segment": [0, 52.360996], "label": "Knitting"}]}, "Tm5O7cRr9wU": {"duration": 87.84, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Tm5O7cRr9wU", "annotations": []}, "pleP5-tuJ_Q": {"duration": 142.548, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pleP5-tuJ_Q", "annotations": [{"segment": [0.2, 115.50099708382709], "label": "Knitting"}, {"segment": [124.81201823468247, 140.3303868194414], "label": "Knitting"}]}, "KApqxr02rcQ": {"duration": 45.23, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KApqxr02rcQ", "annotations": []}, "JErW7ISPd0U": {"duration": 60.233, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=JErW7ISPd0U", "annotations": []}, "JdTP5XEst5k": {"duration": 48.51, "subset": "testing", "resolution": "398x226", "url": "https://www.youtube.com/watch?v=JdTP5XEst5k", "annotations": []}, "SHZ9fsusMGc": {"duration": 53.615, "subset": "training", "resolution": "352x264", "url": "https://www.youtube.com/watch?v=SHZ9fsusMGc", "annotations": [{"segment": [0.01, 36.509973574453184], "label": "Playing rubik cube"}]}, "yaWwad6WXVY": {"duration": 192.47, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yaWwad6WXVY", "annotations": [{"segment": [17.261482059282372, 31.671067082683308], "label": "Layup drill in basketball"}, {"segment": [103.71899219968799, 105.82039001560062], "label": "Layup drill in basketball"}, {"segment": [123.23197191887675, 133.43876131045243], "label": "Layup drill in basketball"}]}, "R_HDifqMHSY": {"duration": 161.751, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R_HDifqMHSY", "annotations": [{"segment": [15.390555324492977, 139.52421466302653], "label": "Playing drums"}]}, "BmlBdeOzwI8": {"duration": 121.255, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BmlBdeOzwI8", "annotations": []}, "YZacstLO0XA": {"duration": 130.172, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=YZacstLO0XA", "annotations": [{"segment": [15.828889235569422, 108.36701092043681], "label": "Making an omelette"}]}, "euF5okzyaaA": {"duration": 91.44, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=euF5okzyaaA", "annotations": [{"segment": [6.276706676430323, 10.413626985895762], "label": "Horseback riding"}, {"segment": [27.103960327613105, 52.21078673634945], "label": "Horseback riding"}, {"segment": [63.05237086739469, 84.02227701560062], "label": "Horseback riding"}]}, "ZLG1Exv8HrY": {"duration": 22.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZLG1Exv8HrY", "annotations": []}, "xYW64moSLKg": {"duration": 151.813, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xYW64moSLKg", "annotations": [{"segment": [5.684071001560063, 22.26261142277691], "label": "Swimming"}, {"segment": [31.262390508580342, 47.13042205460218], "label": "Swimming"}, {"segment": [55.41969226521061, 59.919581808112326], "label": "Swimming"}, {"segment": [69.39303347737909, 76.02444964586583], "label": "Swimming"}, {"segment": [81.47168435569422, 82.4190295226209], "label": "Swimming"}, {"segment": [88.33993681591262, 93.55033523400935], "label": "Swimming"}, {"segment": [98.28706106864273, 135.70719516224648], "label": "Swimming"}, {"segment": [138.07555807956317, 142.10177503900155], "label": "Swimming"}, {"segment": [142.10177503900155, 143.9964653728549], "label": "Swimming"}]}, "gf3JIm2C59g": {"duration": 103.49000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gf3JIm2C59g", "annotations": [{"segment": [52.79511307799251, 80.72647113884555], "label": "Cumbia"}]}, "xXGc5_0_2QI": {"duration": 42.191, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=xXGc5_0_2QI", "annotations": [{"segment": [0.2, 39.62767222429906], "label": "Calf roping"}]}, "UajYunTsr70": {"duration": 21.57, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UajYunTsr70", "annotations": [{"segment": [0.01, 21.57124716553288], "label": "Washing face"}]}, "ux_qqONPSrc": {"duration": 123.51, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ux_qqONPSrc", "annotations": [{"segment": [34.778474959406545, 116.66684922542902], "label": "Preparing salad"}]}, "I2Y-5EEXAE0": {"duration": 201.46, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I2Y-5EEXAE0", "annotations": [{"segment": [135.73359126365057, 163.68096879875196], "label": "Making a sandwich"}]}, "SwIxaPdYIJE": {"duration": 25.12, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SwIxaPdYIJE", "annotations": [{"segment": [0.01, 25.123989], "label": "Playing lacrosse"}]}, "on2DKKcvWhE": {"duration": 213.717, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=on2DKKcvWhE", "annotations": [{"segment": [55.33186583463339, 96.99742745709828], "label": "Mooping floor"}, {"segment": [95.99745397815913, 108.99710920436817], "label": "Mooping floor"}]}, "U_qCCwgS0OM": {"duration": 160.427, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U_qCCwgS0OM", "annotations": [{"segment": [12.84207421301013, 146.75942000740267], "label": "Mooping floor"}]}, "Ofnuo7FTHfM": {"duration": 10.473, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ofnuo7FTHfM", "annotations": [{"segment": [0.01, 10.441], "label": "Doing a powerbomb"}]}, "UEtVbSLGhfY": {"duration": 8.43, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=UEtVbSLGhfY", "annotations": [{"segment": [0.12174199211370164, 7.290196403328655], "label": "Walking the dog"}]}, "ho8cKYrtufU": {"duration": 176.31, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ho8cKYrtufU", "annotations": [{"segment": [70.3784336973479, 176.221], "label": "Archery"}]}, "a_sm5Hv7HNw": {"duration": 149.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a_sm5Hv7HNw", "annotations": [{"segment": [15.27564792, 99.68337753978159], "label": "Rock climbing"}]}, "Ox16PeB954Q": {"duration": 44.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ox16PeB954Q", "annotations": [{"segment": [2.087597503900156, 40.22104524180967], "label": "Smoking hookah"}]}, "BR4dNQG5QBc": {"duration": 23.36, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BR4dNQG5QBc", "annotations": []}, "j3OHA8wXvqo": {"duration": 23.778, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j3OHA8wXvqo", "annotations": []}, "tnavaGow7BI": {"duration": 5.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tnavaGow7BI", "annotations": [{"segment": [0.42459774, 1.54671972], "label": "Preparing salad"}, {"segment": [1.8803030399999998, 5.836666], "label": "Preparing salad"}]}, "bAsbqk-CqKU": {"duration": 160.8, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bAsbqk-CqKU", "annotations": []}, "w4pO-1-FG5w": {"duration": 9.99, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w4pO-1-FG5w", "annotations": []}, "OQWqRMr0PSA": {"duration": 53.128, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OQWqRMr0PSA", "annotations": []}, "qfb0Qe7s9oo": {"duration": 155.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qfb0Qe7s9oo", "annotations": [{"segment": [38.94667862714508, 111.53251326053042], "label": "Making a lemonade"}]}, "U5wliityRuU": {"duration": 197.208, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U5wliityRuU", "annotations": [{"segment": [0.01, 6.4863985823463555], "label": "Scuba diving"}, {"segment": [36.94427753423912, 40.32848630667166], "label": "Scuba diving"}, {"segment": [54.4806320822986, 58.48015154062794], "label": "Scuba diving"}, {"segment": [72.63229731625489, 112.93514724249684], "label": "Scuba diving"}, {"segment": [115.98606429155126, 161.2113997049678], "label": "Scuba diving"}]}, "lU-PEm5L5EU": {"duration": 103.89, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lU-PEm5L5EU", "annotations": [{"segment": [12.755610288984853, 23.573661610913312], "label": "Spinning"}, {"segment": [41.657568298316114, 54.41318105103773], "label": "Spinning"}, {"segment": [78.55411076443058, 96.04657254290171], "label": "Spinning"}]}, "1HjQpe8PiqA": {"duration": 146.448, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1HjQpe8PiqA", "annotations": []}, "2Mj26IwwEiY": {"duration": 211.07, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2Mj26IwwEiY", "annotations": [{"segment": [0.2, 210.86938775510205], "label": "Hanging wallpaper"}, {"segment": [181.1979748854382, 188.09137629445843], "label": "Hanging wallpaper"}]}, "FMlWHXByLL0": {"duration": 69.428, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FMlWHXByLL0", "annotations": [{"segment": [0.010000000000000002, 69.401], "label": "Rope skipping"}]}, "Tz5eN8gV6F4": {"duration": 163.19, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Tz5eN8gV6F4", "annotations": [{"segment": [32.58705098680138, 163.18984126984128], "label": "Grooming dog"}]}, "TqcoukXhXeA": {"duration": 176.519, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TqcoukXhXeA", "annotations": [{"segment": [0.01, 174.70342242514835], "label": "Kite flying"}]}, "lwo7fssfLiw": {"duration": 67.152, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lwo7fssfLiw", "annotations": [{"segment": [30.680893915756627, 50.47164118564742], "label": "Using the rowing machine"}, {"segment": [54.86958502340093, 63.246620904836185], "label": "Using the rowing machine"}]}, "J8WxSDLZwu0": {"duration": 41.006, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=J8WxSDLZwu0", "annotations": [{"segment": [0.2, 40.741], "label": "Hurling"}]}, "ZjbBmS8Q8kM": {"duration": 135.257, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=ZjbBmS8Q8kM", "annotations": [{"segment": [23.55940334191608, 125.58004608733073], "label": "Playing pool"}]}, "c5Io6wg8D60": {"duration": 34.48, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=c5Io6wg8D60", "annotations": [{"segment": [17.03757566302652, 22.472209048361933], "label": "Triple jump"}]}, "aQQr1ad0v1I": {"duration": 188.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aQQr1ad0v1I", "annotations": [{"segment": [81.64024024960999, 111.40798127925117], "label": "Doing motocross"}]}, "X74SOIQRD4M": {"duration": 16.21, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=X74SOIQRD4M", "annotations": [{"segment": [9.67087897, 15.406614028081124], "label": "Playing lacrosse"}, {"segment": [1.510452355694228, 8.8522634], "label": "Playing lacrosse"}]}, "vZzl0RbIQPM": {"duration": 178.516, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vZzl0RbIQPM", "annotations": [{"segment": [42.052544635118736, 138.1328607301092], "label": "Making a lemonade"}]}, "iXLXRHHFp60": {"duration": 172.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iXLXRHHFp60", "annotations": [{"segment": [80.15696098, 166.06350721918878], "label": "Playing polo"}, {"segment": [10.89161464352574, 65.09454457000001], "label": "Playing polo"}]}, "GqkvSUNfZFk": {"duration": 145.032, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GqkvSUNfZFk", "annotations": [{"segment": [14.933072898426142, 136.20772552806875], "label": "Playing ice hockey"}]}, "0z7qHOca3D4": {"duration": 115.5, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0z7qHOca3D4", "annotations": [{"segment": [16.3914992199688, 30.981734789391574], "label": "Horseback riding"}, {"segment": [34.58426209048362, 39.44767394695788], "label": "Horseback riding"}, {"segment": [42.32969578783152, 48.093739469578786], "label": "Horseback riding"}, {"segment": [49.895003120124805, 82.1376224648986], "label": "Horseback riding"}, {"segment": [86.28052886115444, 94.38621528861154], "label": "Horseback riding"}]}, "PNuJUYo3Hf8": {"duration": 220.73, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PNuJUYo3Hf8", "annotations": []}, "7hEUl8lziZs": {"duration": 23.058, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7hEUl8lziZs", "annotations": [{"segment": [0.2, 22.857414965986397], "label": "Laying tile"}]}, "mg0n3DNtUZU": {"duration": 115.497, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mg0n3DNtUZU", "annotations": [{"segment": [1.4402620904836194, 6.481179407176287], "label": "Tug of war"}, {"segment": [50.22914040561622, 68.77251482059282], "label": "Tug of war"}]}, "5Owjrx-YjjM": {"duration": 72.888, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5Owjrx-YjjM", "annotations": [{"segment": [44.80120032449298, 72.54610610920436], "label": "Kite flying"}]}, "D9hS68pULz0": {"duration": 168.9, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D9hS68pULz0", "annotations": [{"segment": [50.06454059522925, 57.70597047555372], "label": "Drinking beer"}]}, "iuXjatyFcwo": {"duration": 53.34, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iuXjatyFcwo", "annotations": []}, "dUzqM38vwPI": {"duration": 193.794, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dUzqM38vwPI", "annotations": [{"segment": [0.01, 129.87152444796698], "label": "Snowboarding"}]}, "aj-klsonETc": {"duration": 35.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aj-klsonETc", "annotations": [{"segment": [1.3975337004202826, 34.29375240631138], "label": "Playing lacrosse"}]}, "am4Z43QlUrg": {"duration": 216.32, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=am4Z43QlUrg", "annotations": [{"segment": [44.526489790951636, 170.68487753198127], "label": "Fixing bicycle"}]}, "CQweu0ZYNHU": {"duration": 96.46000000000001, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CQweu0ZYNHU", "annotations": [{"segment": [35.71762870514821, 45.492979719188774], "label": "Skateboarding"}, {"segment": [51.358190327613116, 76.47332293291733], "label": "Skateboarding"}]}, "EzmOoUkzJf0": {"duration": 131.26, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EzmOoUkzJf0", "annotations": []}, "LTPrtyWIcA4": {"duration": 130.66, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LTPrtyWIcA4", "annotations": [{"segment": [63.58732885999999, 121.48603470826833], "label": "Cleaning windows"}]}, "MbEtgOmOY-4": {"duration": 171.78, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MbEtgOmOY-4", "annotations": [{"segment": [11.244784260000001, 127.69696329077655], "label": "Getting a piercing"}]}, "8aPE_PF1pgA": {"duration": 124.413, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8aPE_PF1pgA", "annotations": []}, "Sd4C8_FMdjA": {"duration": 119.0, "subset": "validation", "resolution": "352x264", "url": "https://www.youtube.com/watch?v=Sd4C8_FMdjA", "annotations": [{"segment": [12.28443115575124, 19.32817898752694], "label": "Breakdancing"}, {"segment": [28.596268239863388, 52.69330029593815], "label": "Breakdancing"}]}, "kC8DS6b76yI": {"duration": 65.852, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=kC8DS6b76yI", "annotations": [{"segment": [3.7951980065130932, 63.184917146417455], "label": "Painting furniture"}]}, "cJYtpnf5aOM": {"duration": 90.349, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cJYtpnf5aOM", "annotations": [{"segment": [0.01, 90.23643397201415], "label": "Cutting the grass"}]}, "4naz02RpAH4": {"duration": 39.916, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4naz02RpAH4", "annotations": []}, "MZQmJZ_31O0": {"duration": 223.778, "subset": "training", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=MZQmJZ_31O0", "annotations": [{"segment": [27.882990654205607, 162.41842056074765], "label": "Hula hoop"}]}, "Nj_rPQwzllA": {"duration": 164.79, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Nj_rPQwzllA", "annotations": [{"segment": [80.46771031657592, 105.9191586275696], "label": "Playing harmonica"}, {"segment": [125.9718148725949, 140.1115083787025], "label": "Playing harmonica"}]}, "XB139ATiXuk": {"duration": 207.32999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XB139ATiXuk", "annotations": [{"segment": [76.78141575663027, 104.26108034321375], "label": "Playing saxophone"}, {"segment": [110.40359360374417, 125.92152184087365], "label": "Playing saxophone"}, {"segment": [130.12429407176288, 134.97364664586584], "label": "Playing saxophone"}, {"segment": [139.8229992199688, 151.4614453978159], "label": "Playing saxophone"}, {"segment": [160.19028003120127, 164.71634243369735], "label": "Playing saxophone"}, {"segment": [166.9793736349454, 170.85885569422777], "label": "Playing saxophone"}, {"segment": [175.38491809672388, 184.1137527301092], "label": "Playing saxophone"}]}, "liI1E_ZZV5w": {"duration": 12.655, "subset": "training", "resolution": "480x264", "url": "https://www.youtube.com/watch?v=liI1E_ZZV5w", "annotations": [{"segment": [1.9544723634945398, 9.515713931357253], "label": "Putting on shoes"}]}, "u2329Chp6IY": {"duration": 165.187, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=u2329Chp6IY", "annotations": [{"segment": [15.59094979853616, 153.97673561364223], "label": "Riding bumper cars"}]}, "EjaS2rW81S4": {"duration": 172.618, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=EjaS2rW81S4", "annotations": [{"segment": [42.41599279327154, 44.83209388191851], "label": "Building sandcastles"}, {"segment": [71.40920585703515, 76.50986371084541], "label": "Building sandcastles"}, {"segment": [147.65031262220694, 150.87177407788346], "label": "Building sandcastles"}]}, "E2sbaDQabkA": {"duration": 165.536, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E2sbaDQabkA", "annotations": [{"segment": [14.954849782072746, 157.28376347352025], "label": "Rope skipping"}]}, "9dSOQrpovQI": {"duration": 60.98, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9dSOQrpovQI", "annotations": [{"segment": [12.223657904139294, 18.597082066302654], "label": "Washing dishes"}, {"segment": [33.817200381435256, 43.329774328393135], "label": "Washing dishes"}]}, "U0p4tW1LoPg": {"duration": 60.07, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=U0p4tW1LoPg", "annotations": [{"segment": [4.5425054602184085, 33.38975663026521], "label": "Playing water polo"}, {"segment": [42.474767550702026, 53.43297659906396], "label": "Playing water polo"}]}, "J9DxW6O6lrg": {"duration": 56.4, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=J9DxW6O6lrg", "annotations": []}, "wNvLv7PtvQg": {"duration": 18.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wNvLv7PtvQg", "annotations": [{"segment": [0, 18.947482], "label": "Kayaking"}]}, "d3Rx-nw5Zlw": {"duration": 200.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d3Rx-nw5Zlw", "annotations": [{"segment": [7.8237519500780035, 184.03299219968798], "label": "Smoking a cigarette"}]}, "2r7qhNGm44I": {"duration": 141.294, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2r7qhNGm44I", "annotations": [{"segment": [25.7766879875195, 55.51902028081123], "label": "Snowboarding"}, {"segment": [93.63326833073324, 112.5961794071763], "label": "Snowboarding"}, {"segment": [117.22341965678628, 137.05444929797193], "label": "Snowboarding"}]}, "pVkwQOp1xZo": {"duration": 45.628, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pVkwQOp1xZo", "annotations": [{"segment": [0.2, 45.400999999999996], "label": "Painting fence"}]}, "dbMPw8PfXHo": {"duration": 116.57, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dbMPw8PfXHo", "annotations": [{"segment": [7.816541341653665, 94.36002964118565], "label": "Making a sandwich"}]}, "ssXkWpxq3Yc": {"duration": 136.0, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ssXkWpxq3Yc", "annotations": []}, "jwOZsKS25oc": {"duration": 56.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jwOZsKS25oc", "annotations": [{"segment": [33.1228593, 54.719052262090486], "label": "Spinning"}, {"segment": [9.316673166926678, 28.39511457], "label": "Spinning"}]}, "mNkOMZXvJrk": {"duration": 179.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mNkOMZXvJrk", "annotations": []}, "mk3srKjFB3A": {"duration": 49.6, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mk3srKjFB3A", "annotations": [{"segment": [3.5779275800000003, 47.88802652106084], "label": "Using the pommel horse"}]}, "5olJd75WWFw": {"duration": 71.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5olJd75WWFw", "annotations": [{"segment": [12.201306552262091, 50.86663416536661], "label": "Polishing forniture"}, {"segment": [53.429465678627146, 62.12080733229329], "label": "Polishing forniture"}]}, "oOnKQgQZOZ0": {"duration": 31.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oOnKQgQZOZ0", "annotations": [{"segment": [3.7515983244929796, 31.230838], "label": "Playing water polo"}]}, "D_GSISuQw3Y": {"duration": 201.9, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=D_GSISuQw3Y", "annotations": [{"segment": [62.013510140405614, 145.11790951638065], "label": "Doing nails"}]}, "TDWbRqIEcts": {"duration": 127.478, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TDWbRqIEcts", "annotations": [{"segment": [83.70298855858381, 99.166828337744], "label": "Baking cookies"}]}, "CoHVA7nr82A": {"duration": 205.776, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CoHVA7nr82A", "annotations": [{"segment": [125.12926120584315, 133.44987736521514], "label": "Welding"}, {"segment": [150.09110968395913, 167.05236481200475], "label": "Welding"}]}, "hQqoh0YxJYs": {"duration": 188.268, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hQqoh0YxJYs", "annotations": []}, "iaKlx11RAiY": {"duration": 135.307, "subset": "validation", "resolution": "362x240", "url": "https://www.youtube.com/watch?v=iaKlx11RAiY", "annotations": [{"segment": [8.72630779826084, 118.96444331227953], "label": "Rock-paper-scissors"}]}, "48xSiJdaH2g": {"duration": 173.291, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=48xSiJdaH2g", "annotations": [{"segment": [14.82266880342728, 22.099253689325653], "label": "Hanging wallpaper"}, {"segment": [53.36161838429644, 54.43963095998508], "label": "Hanging wallpaper"}, {"segment": [97.29063084360882, 101.87218429028557], "label": "Hanging wallpaper"}, {"segment": [112.92181319109423, 132.59554269741204], "label": "Hanging wallpaper"}]}, "Iqaw9bsIa18": {"duration": 238.93, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Iqaw9bsIa18", "annotations": []}, "HJWwqjLf1I4": {"duration": 193.027, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HJWwqjLf1I4", "annotations": []}, "w--HmpjK-s8": {"duration": 90.187, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w--HmpjK-s8", "annotations": [{"segment": [49.607789391575665, 69.28226677067083], "label": "Playing congas"}]}, "w-st-23ZQSs": {"duration": 234.62, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=w-st-23ZQSs", "annotations": [{"segment": [33.66554134165367, 57.450978159126365], "label": "Pole vault"}, {"segment": [73.91781903276132, 118.5612542901716], "label": "Pole vault"}, {"segment": [131.0028673946958, 192.49548517940715], "label": "Pole vault"}, {"segment": [193.59336817472698, 203.47431513260528], "label": "Pole vault"}, {"segment": [208.96373010920436, 214.0871840873635], "label": "Pole vault"}, {"segment": [219.9425600624025, 229.0915850234009], "label": "Pole vault"}]}, "i_16EfqIrFg": {"duration": 4.714, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i_16EfqIrFg", "annotations": [{"segment": [0.2, 4.713649], "label": "Roof shingle removal"}]}, "mFSdzT5gXQs": {"duration": 221.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mFSdzT5gXQs", "annotations": [{"segment": [25.945840717628705, 51.19979234945398], "label": "Discus throw"}, {"segment": [92.3671929547582, 133.53459356006238], "label": "Discus throw"}, {"segment": [161.21015699219967, 205.49105848361933], "label": "Discus throw"}]}, "le0tJsyuPks": {"duration": 125.133, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=le0tJsyuPks", "annotations": [{"segment": [16.39799696723869, 20.107067709828396], "label": "Playing ten pins"}, {"segment": [43.33756341341654, 55.636061138845555], "label": "Playing ten pins"}, {"segment": [59.93077463026521, 79.64741384087363], "label": "Playing ten pins"}, {"segment": [108.14869428393136, 124.932334], "label": "Playing ten pins"}]}, "t1s7ST4FRJc": {"duration": 192.1, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=t1s7ST4FRJc", "annotations": [{"segment": [8.990571704500834, 189.4013772414842], "label": "Zumba"}]}, "38iF2VtR_2Y": {"duration": 150.16, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=38iF2VtR_2Y", "annotations": []}, "7RDn5qTQquE": {"duration": 226.65, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7RDn5qTQquE", "annotations": [{"segment": [3.1822929733515872, 213.5672173227065], "label": "Playing squash"}]}, "TBFAAb62ou4": {"duration": 54.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TBFAAb62ou4", "annotations": [{"segment": [4.238767550702028, 48.40672542901716], "label": "Playing guitarra"}]}, "dN9ZPc7NMVQ": {"duration": 31.49, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=dN9ZPc7NMVQ", "annotations": [{"segment": [5.599739934477379, 16.94658138065523], "label": "Washing face"}]}, "D-y_N4u0uRQ": {"duration": 73.03, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D-y_N4u0uRQ", "annotations": [{"segment": [23.959563182527297, 41.7158190327613], "label": "Smoking a cigarette"}, {"segment": [44.675195007800305, 54.00861154446177], "label": "Smoking a cigarette"}, {"segment": [55.488299531981276, 67.43962558502339], "label": "Smoking a cigarette"}]}, "_dLbtK8_SHo": {"duration": 213.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_dLbtK8_SHo", "annotations": [{"segment": [48.82494744, 97.95267864], "label": "Tennis serve with ball bouncing"}, {"segment": [113.07480840000001, 173.38634820000001], "label": "Tennis serve with ball bouncing"}]}, "1R25VGmqS9o": {"duration": 72.19, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1R25VGmqS9o", "annotations": [{"segment": [0, 72.190838], "label": "Playing field hockey"}]}, "2WyRPSKFUi8": {"duration": 216.062, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2WyRPSKFUi8", "annotations": [{"segment": [6.234038221528862, 185.50475897035884], "label": "Blowing leaves"}]}, "KpmdpL5btYo": {"duration": 97.06, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=KpmdpL5btYo", "annotations": [{"segment": [15.310631371429654, 83.6449732187059], "label": "Swimming"}]}, "Xf8y5_b2dq4": {"duration": 165.164, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Xf8y5_b2dq4", "annotations": [{"segment": [15.435812829804817, 117.31217659813083], "label": "Painting furniture"}]}, "gCxLG4hinNw": {"duration": 88.1, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gCxLG4hinNw", "annotations": [{"segment": [3.298465195750687, 43.15491918564743], "label": "Playing bagpipes"}]}, "j1oB2NAlYsQ": {"duration": 202.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j1oB2NAlYsQ", "annotations": [{"segment": [14.038789605304212, 25.39601265678627], "label": "Plataform diving"}, {"segment": [32.65201627301092, 60.414117065522625], "label": "Plataform diving"}]}, "d0FP6xp9O4c": {"duration": 131.588, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=d0FP6xp9O4c", "annotations": [{"segment": [20.528468485678204, 131.3874829931973], "label": "Disc dog"}]}, "lG5d8bCHLM4": {"duration": 33.484, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=lG5d8bCHLM4", "annotations": [{"segment": [0.009999999999999998, 33.483173], "label": "Using the monkey bar"}]}, "ewTlNriXY3c": {"duration": 218.616, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ewTlNriXY3c", "annotations": [{"segment": [2.3867815912636505, 115.58842277691109], "label": "Tug of war"}, {"segment": [117.97520436817473, 195.71609048361935], "label": "Tug of war"}]}, "6Bm-_hI5A9A": {"duration": 171.94, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6Bm-_hI5A9A", "annotations": [{"segment": [4.8283740329205, 162.82350211015242], "label": "Playing violin"}]}, "Usowsx0PDbA": {"duration": 58.93, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Usowsx0PDbA", "annotations": [{"segment": [13.331006606864275, 21.237672594383774], "label": "Javelin throw"}, {"segment": [31.075036090483618, 41.8317793525741], "label": "Javelin throw"}]}, "l_T3zfRQhic": {"duration": 24.01, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l_T3zfRQhic", "annotations": [{"segment": [0.01, 23.981], "label": "Trimming branches or hedges"}]}, "LNlGgtg70uQ": {"duration": 78.83, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=LNlGgtg70uQ", "annotations": [{"segment": [5.532051482059282, 78.801], "label": "Mixing drinks"}]}, "jqKK2KH6l4Q": {"duration": 122.695, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jqKK2KH6l4Q", "annotations": [{"segment": [5.933730813178106, 17.609781768141474], "label": "Capoeira"}, {"segment": [26.988904666390738, 62.78270021685221], "label": "Capoeira"}, {"segment": [72.73605512928, 78.09555392827959], "label": "Capoeira"}, {"segment": [88.62314085488589, 112.93229486739469], "label": "Capoeira"}]}, "aAZSPRL18Ss": {"duration": 78.23, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=aAZSPRL18Ss", "annotations": [{"segment": [2.36483244, 33.52695624], "label": "Spinning"}, {"segment": [35.552995319812794, 69.45945397815913], "label": "Spinning"}]}, "3zjhYw-TVQw": {"duration": 63.577, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3zjhYw-TVQw", "annotations": [{"segment": [44.03720547189235, 48.40125146957878], "label": "Spread mulch"}]}, "fjoaurNKg60": {"duration": 225.117, "subset": "training", "resolution": "362x268", "url": "https://www.youtube.com/watch?v=fjoaurNKg60", "annotations": [{"segment": [23.17902087512072, 225.1174603174603], "label": "Putting in contact lenses"}]}, "HJHRPfkWx4A": {"duration": 33.623, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HJHRPfkWx4A", "annotations": []}, "NDvp3JeVWcM": {"duration": 125.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NDvp3JeVWcM", "annotations": [{"segment": [7.850567954690977, 29.832158186583463], "label": "Using uneven bars"}, {"segment": [47.888464457410294, 69.870054700156], "label": "Using uneven bars"}]}, "IZNrdIkMCoc": {"duration": 89.653, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IZNrdIkMCoc", "annotations": [{"segment": [0.01, 88.01460942685188], "label": "Capoeira"}]}, "RfpUYx-RCnE": {"duration": 205.71, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RfpUYx-RCnE", "annotations": []}, "ZsicrMkZEN8": {"duration": 86.309, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZsicrMkZEN8", "annotations": [{"segment": [0, 86.308594], "label": "Baton twirling"}]}, "Mgvj2B6MyjI": {"duration": 17.67, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mgvj2B6MyjI", "annotations": [{"segment": [13.535348742589703, 17.670384], "label": "Springboard diving"}]}, "bxRZ-Lvjgzk": {"duration": 236.84, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bxRZ-Lvjgzk", "annotations": [{"segment": [25.125367313572543, 224.28085234321372], "label": "Painting"}]}, "IVFGb72s3oY": {"duration": 157.48, "subset": "validation", "resolution": "448x336", "url": "https://www.youtube.com/watch?v=IVFGb72s3oY", "annotations": [{"segment": [70.50854765619196, 135.12091014252817], "label": "Playing piano"}]}, "jXARa4T2G8o": {"duration": 166.65, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jXARa4T2G8o", "annotations": [{"segment": [41.32536505460218, 50.682051482059286], "label": "Snowboarding"}, {"segment": [54.84057878315133, 71.994503900156], "label": "Snowboarding"}, {"segment": [74.07376755070203, 155.9634945397816], "label": "Snowboarding"}]}, "L2jO-evVIRE": {"duration": 62.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L2jO-evVIRE", "annotations": [{"segment": [0.2, 40.35459594383775], "label": "Raking leaves"}, {"segment": [48.30910764430577, 55.390563182527295], "label": "Raking leaves"}]}, "BV3tS9BTLmI": {"duration": 163.678, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BV3tS9BTLmI", "annotations": []}, "520b6oA9s-U": {"duration": 234.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=520b6oA9s-U", "annotations": [{"segment": [59.942919262348724, 204.68313894460542], "label": "Making a cake"}]}, "LAicExwwM54": {"duration": 212.858, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LAicExwwM54", "annotations": [{"segment": [44.48444617784711, 192.54914196567864], "label": "Fixing the roof"}]}, "bAD_-DEgubk": {"duration": 58.03, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bAD_-DEgubk", "annotations": [{"segment": [2.4438159126365058, 53.877776310452425], "label": "Playing lacrosse"}]}, "ikUNgSw4Gys": {"duration": 196.98, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ikUNgSw4Gys", "annotations": []}, "Y7VWbYGI0Oc": {"duration": 24.289, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y7VWbYGI0Oc", "annotations": [{"segment": [0.01, 24.261], "label": "Kite flying"}]}, "DSBeb2sl0Tg": {"duration": 123.043, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DSBeb2sl0Tg", "annotations": []}, "jLPePkIH8CI": {"duration": 49.64, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jLPePkIH8CI", "annotations": []}, "iQqPRgiXrmA": {"duration": 44.977, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iQqPRgiXrmA", "annotations": []}, "c0OPXHtIcqg": {"duration": 153.6, "subset": "testing", "resolution": "854x476", "url": "https://www.youtube.com/watch?v=c0OPXHtIcqg", "annotations": []}, "DthkauVLag0": {"duration": 52.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DthkauVLag0", "annotations": [{"segment": [15.99578003120125, 45.69051014040562], "label": "Brushing teeth"}]}, "T-vwsIOVNTw": {"duration": 6.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=T-vwsIOVNTw", "annotations": [{"segment": [0.6794071762870515, 6.327613104524181], "label": "Playing kickball"}]}, "EWt03pUigc0": {"duration": 120.98, "subset": "training", "resolution": "524x360", "url": "https://www.youtube.com/watch?v=EWt03pUigc0", "annotations": [{"segment": [17.079651326053042, 19.910532761310453], "label": "Discus throw"}, {"segment": [39.72670280811232, 42.935035101404054], "label": "Discus throw"}]}, "So3EfVyUP64": {"duration": 63.88, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=So3EfVyUP64", "annotations": [{"segment": [3.7274765990639627, 59.291727769110764], "label": "Ironing clothes"}]}, "zYN_cbLO3MU": {"duration": 202.966, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zYN_cbLO3MU", "annotations": [{"segment": [12.97678783151326, 174.395368174727], "label": "Surfing"}, {"segment": [180.72550858034322, 186.106127925117], "label": "Surfing"}]}, "Mcutz37Bowg": {"duration": 72.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Mcutz37Bowg", "annotations": [{"segment": [2.4996593783218053, 66.24607288610997], "label": "Wrapping presents"}]}, "uICwWvS_AOo": {"duration": 190.869, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uICwWvS_AOo", "annotations": [{"segment": [75.632571925117, 146.20312131981277], "label": "Playing blackjack"}]}, "fNYGe8C3gMM": {"duration": 226.14, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fNYGe8C3gMM", "annotations": []}, "HBJzxWj7CaY": {"duration": 173.546, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HBJzxWj7CaY", "annotations": [{"segment": [75.72711388455538, 90.33162870514819], "label": "Swimming"}]}, "kWtY5wkkAMY": {"duration": 56.773, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kWtY5wkkAMY", "annotations": [{"segment": [2.9227800889235573, 50.48438335413417], "label": "Trimming branches or hedges"}]}, "7LvEI5zhENk": {"duration": 232.13, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=7LvEI5zhENk", "annotations": [{"segment": [15.209749647128742, 169.48006749657742], "label": "High jump"}]}, "RSJYnStAgio": {"duration": 136.09, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RSJYnStAgio", "annotations": []}, "99Mk5a8Tvyk": {"duration": 143.477, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=99Mk5a8Tvyk", "annotations": [{"segment": [22.598316692667705, 126.8638174726989], "label": "Disc dog"}]}, "8Zgys8Vn-4Y": {"duration": 178.167, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8Zgys8Vn-4Y", "annotations": [{"segment": [0.2, 169.17183931357255], "label": "Waterskiing"}]}, "lX006hvkTGo": {"duration": 63.02, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lX006hvkTGo", "annotations": []}, "V3uCGRAWG2M": {"duration": 104.21000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V3uCGRAWG2M", "annotations": [{"segment": [4.848937830000001, 94.41325502808111], "label": "Fixing bicycle"}]}, "pON-g7IJKEY": {"duration": 26.378, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pON-g7IJKEY", "annotations": [{"segment": [0.2, 26.177868480725625], "label": "Hitting a pinata"}]}, "XqDVvCRa_Gc": {"duration": 43.14, "subset": "testing", "resolution": "2560x810", "url": "https://www.youtube.com/watch?v=XqDVvCRa_Gc", "annotations": []}, "KjkD7CZcXK8": {"duration": 82.547, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=KjkD7CZcXK8", "annotations": [{"segment": [0.01, 82.546937], "label": "Removing ice from car"}]}, "uokQq9Xrjf8": {"duration": 26.053, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=uokQq9Xrjf8", "annotations": [{"segment": [3.5723057722308895, 21.717995319812793], "label": "Kneeling"}]}, "bDD56LnztiQ": {"duration": 144.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bDD56LnztiQ", "annotations": [{"segment": [55.89096601, 113.53979188000001], "label": "Skateboarding"}]}, "HWFosaUWoSI": {"duration": 95.365, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HWFosaUWoSI", "annotations": []}, "p4qZGZenAoU": {"duration": 213.42000000000002, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=p4qZGZenAoU", "annotations": [{"segment": [200.39838065522622, 206.7232464898596], "label": "Washing face"}]}, "K5X_ytiMuqc": {"duration": 111.271, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K5X_ytiMuqc", "annotations": [{"segment": [40.78836693105965, 108.01813017093504], "label": "Applying sunscreen"}]}, "o0d1PXKkJqM": {"duration": 96.02000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o0d1PXKkJqM", "annotations": [{"segment": [11.68351583775351, 89.72341008736349], "label": "Playing bagpipes"}]}, "XPN_PSadJ_A": {"duration": 82.896, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XPN_PSadJ_A", "annotations": [{"segment": [29.336828393135725, 74.84636349453979], "label": "BMX"}]}, "_8m1tlowwKM": {"duration": 130.15, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_8m1tlowwKM", "annotations": [{"segment": [0.01, 130.14784580498866], "label": "Playing piano"}]}, "hAjmf3OLu-c": {"duration": 78.204, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hAjmf3OLu-c", "annotations": []}, "EfQtRtIFGVo": {"duration": 163.52, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EfQtRtIFGVo", "annotations": [{"segment": [21.682948627145084, 51.78398319188768], "label": "Doing nails"}, {"segment": [57.906227510140404, 91.32347774726989], "label": "Doing nails"}, {"segment": [96.68044152574102, 116.57773556006241], "label": "Doing nails"}, {"segment": [121.679605825273, 142.34218039937596], "label": "Doing nails"}]}, "bCtiTOEf9KI": {"duration": 14.304, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bCtiTOEf9KI", "annotations": [{"segment": [0.612007556179634, 13.515990448870147], "label": "Doing crunches"}]}, "kTBEGydNpgg": {"duration": 86.448, "subset": "validation", "resolution": "512x290", "url": "https://www.youtube.com/watch?v=kTBEGydNpgg", "annotations": [{"segment": [71.09699535889122, 77.69501197363681], "label": "Throwing darts"}]}, "rBsRSdPFOww": {"duration": 77.184, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rBsRSdPFOww", "annotations": [{"segment": [50.451716135725434, 64.29884586271451], "label": "Sharpening knives"}, {"segment": [71.28261563806554, 73.08876299375976], "label": "Sharpening knives"}]}, "ptPJ7W5g4Dg": {"duration": 88.24, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ptPJ7W5g4Dg", "annotations": []}, "JeUjZe3sOwE": {"duration": 123.04, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=JeUjZe3sOwE", "annotations": []}, "PTwe2IIo3To": {"duration": 188.8, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PTwe2IIo3To", "annotations": []}, "5ry-UTd0y_o": {"duration": 156.457, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5ry-UTd0y_o", "annotations": [{"segment": [3.752155200435365, 154.0819947642983], "label": "Snow tubing"}]}, "B69Fkd_L9gA": {"duration": 181.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B69Fkd_L9gA", "annotations": [{"segment": [49.29230265210609, 54.09441809672388], "label": "Shoveling snow"}, {"segment": [90.25152262090484, 173.01739469578783], "label": "Shoveling snow"}]}, "bKec2fA5RZ8": {"duration": 181.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bKec2fA5RZ8", "annotations": [{"segment": [5.2728655, 48.579336399999995], "label": "Vacuuming floor"}, {"segment": [58.972599, 106.84054822932917], "label": "Vacuuming floor"}]}, "p8h3G0Tblqc": {"duration": 101.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=p8h3G0Tblqc", "annotations": [{"segment": [27.337803432137285, 101.861], "label": "Layup drill in basketball"}]}, "VLQQVNIZQqo": {"duration": 52.8, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VLQQVNIZQqo", "annotations": [{"segment": [21.911667004680186, 35.585865210608425], "label": "Hammer throw"}, {"segment": [43.82333400936037, 50.24855967238689], "label": "Hammer throw"}]}, "WSDytipClpI": {"duration": 48.902, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WSDytipClpI", "annotations": []}, "7KYj4FQUESo": {"duration": 29.49, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=7KYj4FQUESo", "annotations": [{"segment": [0.6884165288611545, 28.638127600624028], "label": "Vacuuming floor"}]}, "ZZVrH3Hv1fM": {"duration": 58.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZZVrH3Hv1fM", "annotations": [{"segment": [0, 0], "label": "Washing hands"}]}, "I6riV09yqm8": {"duration": 60.07, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I6riV09yqm8", "annotations": [{"segment": [6.926792511700468, 45.30496723868954], "label": "Using uneven bars"}]}, "n2L9F6cMNaE": {"duration": 153.99, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n2L9F6cMNaE", "annotations": [{"segment": [65.57153354134165, 79.02210452418097], "label": "Fun sliding down"}, {"segment": [112.42294539781591, 135.0036224648986], "label": "Fun sliding down"}]}, "_wITx73-BXw": {"duration": 187.71, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_wITx73-BXw", "annotations": [{"segment": [6.0017134578783145, 172.2930912176287], "label": "Playing field hockey"}]}, "sYPd_0tC318": {"duration": 213.972, "subset": "training", "resolution": "632x480", "url": "https://www.youtube.com/watch?v=sYPd_0tC318", "annotations": [{"segment": [0.01, 213.881], "label": "Waterskiing"}]}, "UUjXw1xjprU": {"duration": 191.1, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UUjXw1xjprU", "annotations": [{"segment": [37.58305160830251, 105.44359366437727], "label": "Skateboarding"}, {"segment": [119.43238961453304, 164.67275098524954], "label": "Skateboarding"}]}, "-pLiSHAz4fo": {"duration": 111.433, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-pLiSHAz4fo", "annotations": [{"segment": [0.01, 111.43256235827664], "label": "Using the rowing machine"}]}, "MU2DUVy_wqQ": {"duration": 52.59, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=MU2DUVy_wqQ", "annotations": [{"segment": [0.01, 52.541], "label": "Getting a haircut"}]}, "_8KsVaJLOYI": {"duration": 101.17, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_8KsVaJLOYI", "annotations": [{"segment": [11.995116992199689, 44.98168872074883], "label": "Playing field hockey"}, {"segment": [49.40094234945398, 94.85612253042122], "label": "Playing field hockey"}]}, "IhWxuvzIHkc": {"duration": 29.21, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=IhWxuvzIHkc", "annotations": [{"segment": [7.200007744149766, 25.701293466458658], "label": "Vacuuming floor"}]}, "4DFKpHF2aq4": {"duration": 60.094, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4DFKpHF2aq4", "annotations": [{"segment": [10.968657391193608, 60.09324263038549], "label": "Slacklining"}]}, "Zezsxn3vcwQ": {"duration": 210.188, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zezsxn3vcwQ", "annotations": []}, "YT-iFs0NE1s": {"duration": 42.89, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YT-iFs0NE1s", "annotations": []}, "SbX8KDWMhio": {"duration": 68.45, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SbX8KDWMhio", "annotations": [{"segment": [9.105664779670954, 18.915046742217136], "label": "Smoking hookah"}, {"segment": [24.13960887444282, 26.91182551603196], "label": "Smoking hookah"}, {"segment": [45.57097598826655, 48.449816131774995], "label": "Smoking hookah"}]}, "ItpDcSURusI": {"duration": 54.43, "subset": "training", "resolution": "626x360", "url": "https://www.youtube.com/watch?v=ItpDcSURusI", "annotations": [{"segment": [14.434697745709828, 47.294862613884554], "label": "Plataform diving"}]}, "IcR28Yqt4mY": {"duration": 118.28, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IcR28Yqt4mY", "annotations": []}, "IWhEUNOUIyc": {"duration": 227.301, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=IWhEUNOUIyc", "annotations": [{"segment": [12.765659524340158, 119.50075721396202], "label": "Playing drums"}, {"segment": [166.66277712332985, 211.69718711197427], "label": "Playing drums"}]}, "Rn_frSrkkbM": {"duration": 61.39, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=Rn_frSrkkbM", "annotations": [{"segment": [0.09577778485289072, 15.707556715874077], "label": "Cricket"}, {"segment": [28.35022431645565, 48.65511463338534], "label": "Cricket"}]}, "7Iy7Cjv2SAE": {"duration": 195.466, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=7Iy7Cjv2SAE", "annotations": [{"segment": [38.1173122074883, 131.42849249141963], "label": "Grooming dog"}]}, "WtNvqSFTgxI": {"duration": 237.66, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WtNvqSFTgxI", "annotations": [{"segment": [81.12862872000001, 132.1754260102377], "label": "Playing lacrosse"}]}, "bYNQ0GrCt64": {"duration": 35.3, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bYNQ0GrCt64", "annotations": []}, "EmlmKZ6Gi3Y": {"duration": 205.242, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=EmlmKZ6Gi3Y", "annotations": [{"segment": [41.86307009345794, 196.8714890965732], "label": "River tubing"}]}, "rliYO1sEYts": {"duration": 196.232, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rliYO1sEYts", "annotations": [{"segment": [54.17166458658346, 56.31404680187208], "label": "Rock-paper-scissors"}, {"segment": [64.57752106084244, 69.47439469578784], "label": "Rock-paper-scissors"}, {"segment": [82.94079719188768, 90.2953120124805], "label": "Rock-paper-scissors"}, {"segment": [93.662255850234, 96.11094227769111], "label": "Rock-paper-scissors"}, {"segment": [111.72131825273011, 115.39434789391576], "label": "Rock-paper-scissors"}, {"segment": [122.74040717628705, 124.27083619344774], "label": "Rock-paper-scissors"}, {"segment": [141.41164118564743, 147.22727145085804], "label": "Rock-paper-scissors"}]}, "yJgC3-t_ciw": {"duration": 61.255, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yJgC3-t_ciw", "annotations": [{"segment": [0.2, 61.001], "label": "Table soccer"}]}, "kF-J9mKwA8A": {"duration": 140.388, "subset": "training", "resolution": "1264x720", "url": "https://www.youtube.com/watch?v=kF-J9mKwA8A", "annotations": [{"segment": [0.009999999999999998, 140.387845], "label": "Playing drums"}]}, "nc4twXSueZo": {"duration": 63.69, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=nc4twXSueZo", "annotations": [{"segment": [1.1923682171776668, 61.506327202747975], "label": "Doing kickboxing"}]}, "dth80jcqPxU": {"duration": 107.28, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dth80jcqPxU", "annotations": [{"segment": [18.74404573104524, 99.5777431097244], "label": "Shot put"}]}, "kr4RUFi1UGs": {"duration": 36.32, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=kr4RUFi1UGs", "annotations": [{"segment": [0.8498286037441498, 35.46618039625585], "label": "Rock climbing"}]}, "AXw2bkQyRPo": {"duration": 112.00999999999999, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AXw2bkQyRPo", "annotations": [{"segment": [1.922221019453023, 107.81912445477411], "label": "Cumbia"}]}, "dLOwmXV_yTw": {"duration": 115.38, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dLOwmXV_yTw", "annotations": [{"segment": [0.01, 109.4396111076443], "label": "Wrapping presents"}]}, "FfVfjZOHsl4": {"duration": 78.39, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=FfVfjZOHsl4", "annotations": [{"segment": [7.020292511700468, 28.38640015600624], "label": "Cutting the grass"}]}, "VWpRBfhoFVg": {"duration": 18.251, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VWpRBfhoFVg", "annotations": [{"segment": [0.2, 17.879889235569422], "label": "Hitting a pinata"}]}, "Po3-9ktM5Do": {"duration": 148.678, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Po3-9ktM5Do", "annotations": [{"segment": [10.669474142230994, 101.59194944124295], "label": "Blow-drying hair"}]}, "Z5h4QgKfn9k": {"duration": 137.068, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Z5h4QgKfn9k", "annotations": []}, "mC_8ckG6WpU": {"duration": 216.573, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mC_8ckG6WpU", "annotations": [{"segment": [1.1040211796490242, 5.826777447265106], "label": "Hula hoop"}]}, "UlGX-5x0Mak": {"duration": 25.148, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UlGX-5x0Mak", "annotations": [{"segment": [0.2, 23.504622434345322], "label": "Having an ice cream"}]}, "jkaevzzYdP8": {"duration": 223.45, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jkaevzzYdP8", "annotations": [{"segment": [60.654444196567866, 88.89013373634945], "label": "Walking the dog"}, {"segment": [106.31957172386895, 193.81535042121683], "label": "Walking the dog"}, {"segment": [206.01595701248047, 221.35386244149765], "label": "Walking the dog"}]}, "-oJb3Acw-_s": {"duration": 191.8, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-oJb3Acw-_s", "annotations": [{"segment": [20.34660896723869, 165.46580527769112], "label": "Getting a piercing"}]}, "zMWhT5Rv6WE": {"duration": 192.703, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zMWhT5Rv6WE", "annotations": [{"segment": [70.92915569422777, 81.44831014040561], "label": "Playing congas"}, {"segment": [93.79578803120125, 97.4033183400936], "label": "Playing congas"}, {"segment": [113.6372047301092, 132.5767388517941], "label": "Playing congas"}, {"segment": [162.63949142589703, 174.96521998127926], "label": "Playing congas"}]}, "TUhitcbJ5C4": {"duration": 69.92, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=TUhitcbJ5C4", "annotations": [{"segment": [1.1990743572542903, 23.436453346333852], "label": "Washing hands"}, {"segment": [23.82074534, 63.55094093447738], "label": "Washing hands"}]}, "83a5nOzZU1g": {"duration": 92.22999999999999, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=83a5nOzZU1g", "annotations": [{"segment": [37.96539457797458, 82.78728444713344], "label": "Mixing drinks"}]}, "MyPQ_e4dGDw": {"duration": 217.571, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MyPQ_e4dGDw", "annotations": [{"segment": [24.777919527665457, 206.0300962230889], "label": "Making a lemonade"}]}, "APAxAnwS9oM": {"duration": 102.49000000000001, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=APAxAnwS9oM", "annotations": [{"segment": [35.17696280811232, 93.21895144149767], "label": "Walking the dog"}]}, "K-r_Tj7uFGw": {"duration": 60.12, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=K-r_Tj7uFGw", "annotations": [{"segment": [2.3446357644305773, 59.647533847113884], "label": "Getting a tattoo"}]}, "hjDGz9OhwvM": {"duration": 129.45, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hjDGz9OhwvM", "annotations": [{"segment": [9.18541599297972, 119.81416245787831], "label": "Ironing clothes"}]}, "M0cKfbLMlSA": {"duration": 209.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=M0cKfbLMlSA", "annotations": [{"segment": [15.03018251401869, 205.8481518224299], "label": "Doing kickboxing"}]}, "p07Y--q4JQw": {"duration": 136.26, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=p07Y--q4JQw", "annotations": []}, "X5wlZXJ6SCY": {"duration": 70.054, "subset": "testing", "resolution": "640x354", "url": "https://www.youtube.com/watch?v=X5wlZXJ6SCY", "annotations": []}, "0pegrKSh4iw": {"duration": 177.169, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0pegrKSh4iw", "annotations": [{"segment": [0.01, 177.16757369614513], "label": "Playing drums"}]}, "qhnWJ4G5JMA": {"duration": 167.0, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qhnWJ4G5JMA", "annotations": [{"segment": [126.50857735, 139.87334726], "label": "Spinning"}, {"segment": [8.63875481, 116.35515975000001], "label": "Spinning"}]}, "VEzhpSPnBrY": {"duration": 77.764, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VEzhpSPnBrY", "annotations": [{"segment": [0.01, 73.13232917316692], "label": "Kneeling"}]}, "piKE8obU4JY": {"duration": 69.498, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=piKE8obU4JY", "annotations": [{"segment": [2.9249719188767553, 66.42682215288612], "label": "Drum corps"}]}, "j62IuPoWMSE": {"duration": 158.871, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j62IuPoWMSE", "annotations": []}, "0gwhdJGq2eg": {"duration": 124.44, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0gwhdJGq2eg", "annotations": [{"segment": [3.7850382215288616, 120.05364820592824], "label": "Playing water polo"}]}, "r-3RwNYrXBU": {"duration": 63.088, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=r-3RwNYrXBU", "annotations": []}, "AR-VPPtV7ag": {"duration": 18.02, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AR-VPPtV7ag", "annotations": [{"segment": [9.220168524180966, 14.392458184087364], "label": "Drinking beer"}]}, "X4P9YA6Oabg": {"duration": 231.062, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X4P9YA6Oabg", "annotations": [{"segment": [49.30756401833838, 186.7928885074279], "label": "Braiding hair"}]}, "wDlbcTFI90o": {"duration": 94.575, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wDlbcTFI90o", "annotations": [{"segment": [20.213287555382216, 69.04977062714508], "label": "Laying tile"}]}, "9BR5LChbBLQ": {"duration": 116.867, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9BR5LChbBLQ", "annotations": [{"segment": [44.93745638629284, 86.6001183800623], "label": "Hand car wash"}]}, "z6pmp8TrAVo": {"duration": 11.49, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z6pmp8TrAVo", "annotations": []}, "BjjN_aq_wq4": {"duration": 124.2, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=BjjN_aq_wq4", "annotations": []}, "XFZ5_uNU7jE": {"duration": 96.131, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XFZ5_uNU7jE", "annotations": [{"segment": [7.048548499219969, 88.18184079875195], "label": "Peeling potatoes"}]}, "wBgU5jXb_V4": {"duration": 76.81, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wBgU5jXb_V4", "annotations": [{"segment": [14.527101905504793, 61.78277495000001], "label": "Playing polo"}]}, "bfZZdTPYmos": {"duration": 228.206, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=bfZZdTPYmos", "annotations": [{"segment": [8.54436366926677, 213.25307657878315], "label": "Playing congas"}]}, "qz82piU5PZE": {"duration": 46.46, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qz82piU5PZE", "annotations": []}, "QvP8tPp8Wog": {"duration": 64.81, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=QvP8tPp8Wog", "annotations": [{"segment": [0, 56.33335473], "label": "Playing guitarra"}]}, "5ASU6NFr4Ck": {"duration": 227.951, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5ASU6NFr4Ck", "annotations": []}, "KuAVv3uS4zc": {"duration": 172.548, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=KuAVv3uS4zc", "annotations": [{"segment": [14.262953198127924, 158.23804680187206], "label": "BMX"}]}, "0D9EmyHxhcg": {"duration": 194.439, "subset": "training", "resolution": "544x360", "url": "https://www.youtube.com/watch?v=0D9EmyHxhcg", "annotations": [{"segment": [25.710226973370997, 183.29882294675258], "label": "Rollerblading"}]}, "ksPQdczmNKs": {"duration": 225.373, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ksPQdczmNKs", "annotations": [{"segment": [0.2, 217.6069875195008], "label": "Hula hoop"}]}, "iKJB-RU_PKY": {"duration": 175.125, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iKJB-RU_PKY", "annotations": [{"segment": [12.567465376166068, 131.13876914260243], "label": "Shuffleboard"}, {"segment": [135.23685567830876, 160.64499219968798], "label": "Shuffleboard"}]}, "SV-8M3HNk1s": {"duration": 208.49, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=SV-8M3HNk1s", "annotations": [{"segment": [0.2, 3.414465678627145], "label": "Windsurfing"}, {"segment": [9.593022620904836, 13.82045631825273], "label": "Windsurfing"}, {"segment": [28.453880655226207, 128.93672776911077], "label": "Windsurfing"}, {"segment": [146.49683697347893, 155.92726599063963], "label": "Windsurfing"}, {"segment": [160.4798868954758, 171.21106474258968], "label": "Windsurfing"}]}, "TmnVK-cYrzg": {"duration": 147.61, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TmnVK-cYrzg", "annotations": [{"segment": [31.288133054602184, 125.38259202028081], "label": "Vacuuming floor"}]}, "ZW0Wi1_IJgc": {"duration": 179.51, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ZW0Wi1_IJgc", "annotations": []}, "igwT-3gprOI": {"duration": 114.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=igwT-3gprOI", "annotations": [{"segment": [104.3107425897036, 114.081], "label": "Drinking coffee"}]}, "sa-BrKIzFKw": {"duration": 84.521, "subset": "testing", "resolution": "847x480", "url": "https://www.youtube.com/watch?v=sa-BrKIzFKw", "annotations": []}, "N3WVKg7Q2io": {"duration": 102.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N3WVKg7Q2io", "annotations": [{"segment": [0.01, 64.67137658970358], "label": "Playing water polo"}]}, "kh-LnD1XeEg": {"duration": 203.15, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kh-LnD1XeEg", "annotations": []}, "PwRqqR1YmPM": {"duration": 200.25, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PwRqqR1YmPM", "annotations": []}, "G_Nzm0WkGEI": {"duration": 127.06, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=G_Nzm0WkGEI", "annotations": [{"segment": [7.5210556224649, 115.388827049922], "label": "Ironing clothes"}]}, "BioBrxuKOsw": {"duration": 68.45, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BioBrxuKOsw", "annotations": [{"segment": [14.065547839999999, 42.682676009999994], "label": "Playing polo"}]}, "qwJhmfZKdNQ": {"duration": 45.187, "subset": "training", "resolution": "350x196", "url": "https://www.youtube.com/watch?v=qwJhmfZKdNQ", "annotations": [{"segment": [5.4929765990639625, 7.816928237129486], "label": "Trimming branches or hedges"}, {"segment": [28.802915756630266, 35.36789703588144], "label": "Trimming branches or hedges"}, {"segment": [37.69287831513261, 38.96104992199688], "label": "Trimming branches or hedges"}]}, "iEdRtsNQh04": {"duration": 213.484, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iEdRtsNQh04", "annotations": []}, "Fhk2t6beztU": {"duration": 14.42, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fhk2t6beztU", "annotations": [{"segment": [0.2, 14.161000000000001], "label": "Rollerblading"}]}, "n50dwUENM1E": {"duration": 65.411, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n50dwUENM1E", "annotations": [{"segment": [11.318363494539783, 49.148209048361934], "label": "Cleaning sink"}]}, "COeXmBZpLSE": {"duration": 110.85, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=COeXmBZpLSE", "annotations": [{"segment": [10.203232053091646, 108.77682985414654], "label": "Tango"}]}, "TFp9J_gQqlk": {"duration": 91.95, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TFp9J_gQqlk", "annotations": []}, "bqBJ1tcHCHk": {"duration": 80.88, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bqBJ1tcHCHk", "annotations": [{"segment": [38.56603364723419, 60.72805233882297], "label": "Grooming horse"}]}, "0je8CeAJABY": {"duration": 65.066, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0je8CeAJABY", "annotations": [{"segment": [24.46883151326053, 32.18514352574103], "label": "Doing a powerbomb"}, {"segment": [35.84023868954758, 39.29227301092044], "label": "Doing a powerbomb"}]}, "4iFlM_Zu7xs": {"duration": 220.24, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4iFlM_Zu7xs", "annotations": []}, "rnPfQuWS4Rk": {"duration": 31.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rnPfQuWS4Rk", "annotations": [{"segment": [23.084269890795632, 30.05310608424337], "label": "Hammer throw"}]}, "GEmuTigZlYA": {"duration": 91.743, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GEmuTigZlYA", "annotations": [{"segment": [0.01, 91.601], "label": "Grooming dog"}]}, "D5xp0LuEcKw": {"duration": 190.66, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D5xp0LuEcKw", "annotations": [{"segment": [29.000394260668386, 107.52453872032433], "label": "Tango"}]}, "VQPfRCsSmQk": {"duration": 82.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VQPfRCsSmQk", "annotations": [{"segment": [32.86464898595944, 51.09340198000001], "label": "Polishing forniture"}]}, "xUHXFXela-0": {"duration": 132.03, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xUHXFXela-0", "annotations": [{"segment": [6.797107410827046, 23.89286241381628], "label": "Rock climbing"}, {"segment": [35.83929362072442, 102.98647592162189], "label": "Rock climbing"}]}, "nDggAkf1ulc": {"duration": 61.42, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=nDggAkf1ulc", "annotations": [{"segment": [7.09293384, 34.281797039999994], "label": "Hopscotch"}]}, "l12QfUhRLLc": {"duration": 45.84, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=l12QfUhRLLc", "annotations": [{"segment": [18.277840873634947, 36.555681747269894], "label": "Cricket"}]}, "UaiKJ_7mKIA": {"duration": 30.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UaiKJ_7mKIA", "annotations": [{"segment": [0.01, 25.793818595943836], "label": "Shoveling snow"}]}, "RnAFO7-y0kI": {"duration": 61.86, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RnAFO7-y0kI", "annotations": []}, "Xm23RMCpDd8": {"duration": 49.53, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Xm23RMCpDd8", "annotations": [{"segment": [6.876765082683307, 14.83526849297972], "label": "Discus throw"}, {"segment": [24.957248558502343, 44.73760654914197], "label": "Discus throw"}]}, "nJMS8jN3uU4": {"duration": 51.39, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=nJMS8jN3uU4", "annotations": [{"segment": [32.4659368174727, 35.188111544461776], "label": "Playing kickball"}, {"segment": [20.53640639625585, 21.49717394695788], "label": "Playing kickball"}, {"segment": [40.15207722308892, 48.63885725429017], "label": "Playing kickball"}]}, "-UwqKYkkKlU": {"duration": 64.0, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-UwqKYkkKlU", "annotations": [{"segment": [9.776557360374415, 55.46699890171607], "label": "Spinning"}]}, "YS3Q47uz3HI": {"duration": 159.777, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YS3Q47uz3HI", "annotations": [{"segment": [7.727069205217189, 142.07836925721926], "label": "Making a cake"}]}, "oLEagb_RGq8": {"duration": 18.3, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=oLEagb_RGq8", "annotations": [{"segment": [0, 18.297323], "label": "Mowing the lawn"}]}, "8P1vKpL3Zcs": {"duration": 189.011, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8P1vKpL3Zcs", "annotations": [{"segment": [19.756098853477948, 172.79214818116535], "label": "Waterskiing"}]}, "IhmtmKxEU40": {"duration": 222.22, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IhmtmKxEU40", "annotations": []}, "RNAUncQEASo": {"duration": 122.95, "subset": "validation", "resolution": "1280x714", "url": "https://www.youtube.com/watch?v=RNAUncQEASo", "annotations": [{"segment": [0.2, 4.98702940770692], "label": "Grooming dog"}, {"segment": [26.469617625521348, 122.74945578231292], "label": "Grooming dog"}]}, "jLwUrFMRGzY": {"duration": 78.62, "subset": "training", "resolution": "854x428", "url": "https://www.youtube.com/watch?v=jLwUrFMRGzY", "annotations": [{"segment": [11.652363276131046, 78.622788], "label": "Tai chi"}]}, "cQAIzEOeOKI": {"duration": 30.09, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cQAIzEOeOKI", "annotations": []}, "l7nsRRyzD4k": {"duration": 32.555, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l7nsRRyzD4k", "annotations": [{"segment": [0.01, 18.32578064590971], "label": "Building sandcastles"}]}, "KV0L45xelkE": {"duration": 68.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KV0L45xelkE", "annotations": [{"segment": [28.17800312012481, 37.73168798751951], "label": "Layup drill in basketball"}, {"segment": [48.78819968798753, 60.48878003120126], "label": "Layup drill in basketball"}]}, "-FWGLSfI13Q": {"duration": 223.051, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-FWGLSfI13Q", "annotations": [{"segment": [0.2, 170.15894194539783], "label": "Carving jack-o-lanterns"}]}, "4R5KDUqBPcU": {"duration": 10.24, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4R5KDUqBPcU", "annotations": [{"segment": [1.9905094984179346, 9.776319467265287], "label": "Shot put"}]}, "KoHzXi7Usl8": {"duration": 159.057, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KoHzXi7Usl8", "annotations": [{"segment": [0.2, 16.34589719626168], "label": "Snowboarding"}, {"segment": [32.19646417445483, 74.54719781931465], "label": "Snowboarding"}, {"segment": [77.76684423676012, 89.15943925233644], "label": "Snowboarding"}, {"segment": [93.61741121495326, 113.92595015576323], "label": "Snowboarding"}, {"segment": [121.85123364485982, 148.84673052959502], "label": "Snowboarding"}]}, "-r-4Ncgbj8E": {"duration": 57.516, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-r-4Ncgbj8E", "annotations": []}, "Vhn4SuPhu-0": {"duration": 166.859, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vhn4SuPhu-0", "annotations": []}, "MT8qyL2n5wg": {"duration": 18.3, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MT8qyL2n5wg", "annotations": []}, "ygKWviTozbc": {"duration": 187.55, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ygKWviTozbc", "annotations": []}, "BfSqo9ONd6g": {"duration": 115.08, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BfSqo9ONd6g", "annotations": [{"segment": [21.2612908900156, 92.2868021], "label": "Doing motocross"}]}, "IVHmmp3Nsd4": {"duration": 202.46, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=IVHmmp3Nsd4", "annotations": [{"segment": [14.367636505460219, 191.51585803432135], "label": "Painting"}]}, "ZJk05q3y5iM": {"duration": 56.36, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=ZJk05q3y5iM", "annotations": [{"segment": [8.176285497659906, 48.4422936474259], "label": "Doing nails"}]}, "QeIsJBPG8EM": {"duration": 12.539, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QeIsJBPG8EM", "annotations": [{"segment": [0.2, 12.501], "label": "Hurling"}]}, "y8RpTBtGG1g": {"duration": 37.57, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=y8RpTBtGG1g", "annotations": [{"segment": [2.3956110031201248, 34.356567556942274], "label": "Vacuuming floor"}]}, "ZjHIN-M-dQY": {"duration": 66.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZjHIN-M-dQY", "annotations": [{"segment": [10.818929017160686, 12.889537441497659], "label": "Spinning"}, {"segment": [14.44249375975039, 55.54407098283931], "label": "Spinning"}]}, "_-JNaelSKO8": {"duration": 27.35, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_-JNaelSKO8", "annotations": []}, "gma-VLqBCcQ": {"duration": 20.875, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gma-VLqBCcQ", "annotations": []}, "VwmYoF9Rh_8": {"duration": 132.56, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VwmYoF9Rh_8", "annotations": [{"segment": [22.111274570982843, 128.3602814595651], "label": "Kayaking"}]}, "r5mwKEhEsHA": {"duration": 126.363, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=r5mwKEhEsHA", "annotations": [{"segment": [89.84907332293291, 120.80307800312012], "label": "Putting in contact lenses"}]}, "F4ERtlGMFt4": {"duration": 61.951, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F4ERtlGMFt4", "annotations": [{"segment": [0.2, 3.9501958797947245], "label": "Bullfighting"}]}, "m-B1tlnywNY": {"duration": 198.07, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m-B1tlnywNY", "annotations": [{"segment": [1.5449722386895475, 193.73951873166928], "label": "Cheerleading"}]}, "d_A-ylxNbFU": {"duration": 10.22, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=d_A-ylxNbFU", "annotations": []}, "BV4zqyN8hLs": {"duration": 161.519, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BV4zqyN8hLs", "annotations": [{"segment": [0.01, 161.51791383219955], "label": "Wakeboarding"}]}, "HtHsvOu96rI": {"duration": 35.899, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HtHsvOu96rI", "annotations": []}, "g7glOdM6BYo": {"duration": 80.458, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g7glOdM6BYo", "annotations": [{"segment": [0.6764385088982984, 77.21418149955251], "label": "Clipping cat claws"}]}, "CjPN7fw0B48": {"duration": 34.761, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CjPN7fw0B48", "annotations": [{"segment": [1.897970608424337, 32.8077776599064], "label": "Cutting the grass"}]}, "OjQHQptSXk0": {"duration": 134.537, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OjQHQptSXk0", "annotations": [{"segment": [0.01, 133.36259752287512], "label": "Drum corps"}]}, "1BUnQWRBpYg": {"duration": 218.175, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1BUnQWRBpYg", "annotations": [{"segment": [0.2, 195.4983665422945], "label": "Longboarding"}]}, "cvEOK7xAPSs": {"duration": 67.129, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cvEOK7xAPSs", "annotations": [{"segment": [6.911869048361935, 7.644946068642746], "label": "Slacklining"}, {"segment": [12.357584056162247, 65.03440422776912], "label": "Slacklining"}]}, "0vIFg1eE7Tw": {"duration": 203.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0vIFg1eE7Tw", "annotations": [{"segment": [8.737434968798752, 195.24195593915758], "label": "Putting on makeup"}]}, "iM8ZD2PwKDg": {"duration": 127.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iM8ZD2PwKDg", "annotations": [{"segment": [69.58802587, 92.89540457999999], "label": "Doing step aerobics"}, {"segment": [99.98823383999999, 114.99003955999999], "label": "Doing step aerobics"}]}, "3vYwSm6B_u0": {"duration": 150.81, "subset": "testing", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=3vYwSm6B_u0", "annotations": []}, "eCXiGAChev4": {"duration": 83.96000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eCXiGAChev4", "annotations": [{"segment": [0.01, 69.48887247425897], "label": "Washing dishes"}]}, "HTYzbAFUghU": {"duration": 147.888, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=HTYzbAFUghU", "annotations": []}, "M2Y2Cw2kwDo": {"duration": 11.03, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=M2Y2Cw2kwDo", "annotations": []}, "St1RRi-aeNM": {"duration": 31.21, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=St1RRi-aeNM", "annotations": []}, "siGEHA6fs80": {"duration": 61.07, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=siGEHA6fs80", "annotations": [{"segment": [7.431104293291733, 15.433831993759751], "label": "Chopping wood"}, {"segment": [27.05684127301092, 39.72782679875195], "label": "Chopping wood"}]}, "OAot8XBeLrs": {"duration": 223.887, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=OAot8XBeLrs", "annotations": [{"segment": [0.2, 9.779755837852562], "label": "Futsal"}, {"segment": [38.42046936299221, 50.99444115451693], "label": "Futsal"}, {"segment": [61.47275098078753, 69.85539884180402], "label": "Futsal"}, {"segment": [89.41491051750914, 107.57731421637817], "label": "Futsal"}, {"segment": [125.73971791524721, 135.17019675889077], "label": "Futsal"}, {"segment": [141.10790547893916, 145.99778339157564], "label": "Futsal"}, {"segment": [160.6674171294852, 173.93994289235567], "label": "Futsal"}, {"segment": [183.7196987176287, 192.10234656786272], "label": "Futsal"}, {"segment": [205.3748723307332, 220.3937830624025], "label": "Futsal"}]}, "01m9yM04RwY": {"duration": 73.17, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=01m9yM04RwY", "annotations": []}, "TaDgnM-XKiw": {"duration": 236.22, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TaDgnM-XKiw", "annotations": []}, "EMz_XwbJ8l8": {"duration": 31.022, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EMz_XwbJ8l8", "annotations": []}, "zahB34blHyk": {"duration": 40.171, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zahB34blHyk", "annotations": [{"segment": [6.1370015600624015, 34.88071294851794], "label": "Painting fence"}]}, "iXF01UxOtLI": {"duration": 215.295, "subset": "validation", "resolution": "368x208", "url": "https://www.youtube.com/watch?v=iXF01UxOtLI", "annotations": [{"segment": [22.127579439252337, 197.8071495327103], "label": "Futsal"}]}, "htBt3oP9zuo": {"duration": 165.396, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=htBt3oP9zuo", "annotations": [{"segment": [59.676226612890595, 151.76282223368742], "label": "Hanging wallpaper"}]}, "UE2mDvY9rew": {"duration": 15.837, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=UE2mDvY9rew", "annotations": [{"segment": [0.01, 15.56472340204114], "label": "Roof shingle removal"}]}, "lSgkR94_h8Q": {"duration": 230.11, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lSgkR94_h8Q", "annotations": [{"segment": [16.872320003120123, 221.4940732324493], "label": "Belly dance"}]}, "Qhs0AjFvcOA": {"duration": 74.26, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Qhs0AjFvcOA", "annotations": [{"segment": [26.91519222, 41.736889420000004], "label": "Tennis serve with ball bouncing"}, {"segment": [48.46531619, 64.37859129], "label": "Tennis serve with ball bouncing"}]}, "1uiEkwykOxo": {"duration": 200.51, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1uiEkwykOxo", "annotations": [{"segment": [74.5473872, 172.33719807999998], "label": "Playing polo"}]}, "GAqzjkkb98Q": {"duration": 71.518, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GAqzjkkb98Q", "annotations": [{"segment": [5.687255850234009, 58.99134009360374], "label": "Calf roping"}]}, "JFGJYIZ7ONE": {"duration": 156.224, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JFGJYIZ7ONE", "annotations": [{"segment": [0.01, 143.08269297794934], "label": "Futsal"}]}, "82EtiBkAsPg": {"duration": 78.3, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=82EtiBkAsPg", "annotations": []}, "ztEGh6HkkV8": {"duration": 72.098, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ztEGh6HkkV8", "annotations": []}, "YBK6SfHd-0Y": {"duration": 201.039, "subset": "validation", "resolution": "712x480", "url": "https://www.youtube.com/watch?v=YBK6SfHd-0Y", "annotations": [{"segment": [5.643772230889236, 194.39659906396255], "label": "Roof shingle removal"}]}, "FCiKVtVqTAA": {"duration": 126.433, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FCiKVtVqTAA", "annotations": [{"segment": [7.283640186915888, 112.02830062305296], "label": "Playing congas"}]}, "Ouy1exEw_dI": {"duration": 129.266, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ouy1exEw_dI", "annotations": [{"segment": [21.352302652106086, 94.6753042121685], "label": "Waterskiing"}]}, "Ff8QLpH5T1c": {"duration": 217.826, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ff8QLpH5T1c", "annotations": [{"segment": [7.3061727884081344, 49.10427757790584], "label": "Canoeing"}, {"segment": [65.75555509567322, 67.79448703662432], "label": "Canoeing"}, {"segment": [74.9307488299532, 110.95187978675611], "label": "Canoeing"}, {"segment": [186.3923610101404, 200.66488455148206], "label": "Canoeing"}]}, "JQn65jxJgbE": {"duration": 62.439, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JQn65jxJgbE", "annotations": []}, "7pSlF1snFmQ": {"duration": 31.39, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7pSlF1snFmQ", "annotations": []}, "vOJtbsRKkzc": {"duration": 236.31, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vOJtbsRKkzc", "annotations": []}, "5gBRDguUe8U": {"duration": 143.054, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5gBRDguUe8U", "annotations": [{"segment": [0.20000000000000004, 124.53672074882996], "label": "Assembling bicycle"}]}, "37TrAhYUirU": {"duration": 304.879, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=37TrAhYUirU", "annotations": []}, "kXCtmH4V0TI": {"duration": 12.59, "subset": "training", "resolution": "426x236", "url": "https://www.youtube.com/watch?v=kXCtmH4V0TI", "annotations": [{"segment": [3.84820896099844, 12.585215419501134], "label": "Hammer throw"}]}, "y-7ZKlq2UFw": {"duration": 147.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y-7ZKlq2UFw", "annotations": [{"segment": [12.43440561622465, 24.8688112324493], "label": "Playing guitarra"}, {"segment": [65.39576287051482, 133.11222776911077], "label": "Playing guitarra"}]}, "p1_6T9d4ZuY": {"duration": 38.55, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=p1_6T9d4ZuY", "annotations": [{"segment": [0, 36.04405226209048], "label": "Using uneven bars"}]}, "5ohc1_X2W80": {"duration": 60.49, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5ohc1_X2W80", "annotations": [{"segment": [5.75957214, 31.093276166420807], "label": "Mowing the lawn"}, {"segment": [40.4354125273011, 54.109250720000006], "label": "Mowing the lawn"}]}, "TtrDWeSW3mg": {"duration": 204.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TtrDWeSW3mg", "annotations": [{"segment": [22.455698054602184, 173.75295445085803], "label": "Preparing salad"}]}, "b6QSzGwI9m4": {"duration": 126.735, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b6QSzGwI9m4", "annotations": []}, "kkEjwEXkz3w": {"duration": 78.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kkEjwEXkz3w", "annotations": [{"segment": [33.31686207800312, 36.856015925117], "label": "Doing nails"}, {"segment": [40.151090196567864, 43.32412468018721], "label": "Doing nails"}, {"segment": [46.37511937597504, 50.28039258658347], "label": "Doing nails"}, {"segment": [54.42974537285491, 64.5590477628705], "label": "Doing nails"}]}, "kWN4zFblj6o": {"duration": 84.15, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kWN4zFblj6o", "annotations": [{"segment": [2.82041358, 21.410989800000003], "label": "Doing step aerobics"}, {"segment": [28.416316780000002, 62.23108798], "label": "Doing step aerobics"}]}, "KmJFUfqGKZA": {"duration": 219.544, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KmJFUfqGKZA", "annotations": [{"segment": [8.902505460218409, 14.380970358814352], "label": "Scuba diving"}, {"segment": [33.55559750390015, 43.48531513260531], "label": "Scuba diving"}, {"segment": [109.22689391575663, 115.04776287051482], "label": "Scuba diving"}, {"segment": [139.35845085803433, 144.83691575663028], "label": "Scuba diving"}]}, "rhHvxiCxEis": {"duration": 28.282, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rhHvxiCxEis", "annotations": []}, "7hDCO9fJb90": {"duration": 113.45, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7hDCO9fJb90", "annotations": [{"segment": [20.70821484287943, 98.58526088767552], "label": "Using uneven bars"}]}, "96kej9yK8To": {"duration": 129.24, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=96kej9yK8To", "annotations": [{"segment": [0, 119.56369345865835], "label": "Playing violin"}]}, "-nlAKyoyIuU": {"duration": 213.671, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-nlAKyoyIuU", "annotations": [{"segment": [54.32172074882996, 62.98653510140406], "label": "Sumo"}, {"segment": [79.98290171606865, 90.98055070202808], "label": "Sumo"}, {"segment": [139.97007800312014, 147.968368174727], "label": "Sumo"}]}, "3vs3ofTw1vY": {"duration": 220.915, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3vs3ofTw1vY", "annotations": [{"segment": [16.883290171606863, 210.17973478939155], "label": "Snow tubing"}]}, "j7hSNqcWIO4": {"duration": 116.84, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=j7hSNqcWIO4", "annotations": [{"segment": [6.379872718718273, 26.795465418616743], "label": "Pole vault"}, {"segment": [32.44620982662436, 45.93508357477156], "label": "Pole vault"}, {"segment": [52.86180252652283, 55.04918745865481], "label": "Pole vault"}, {"segment": [57.23657239078679, 59.423957322918774], "label": "Pole vault"}, {"segment": [61.611342255050744, 73.8242414594543], "label": "Pole vault"}, {"segment": [76.37619054694161, 80.75096041120557], "label": "Pole vault"}, {"segment": [83.12062742101521, 87.31311461622464], "label": "Pole vault"}, {"segment": [90.0473457628705, 97.70319297347893], "label": "Pole vault"}, {"segment": [102.07796280811233, 116.842811], "label": "Pole vault"}]}, "-od9zDziq9U": {"duration": 180.071, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-od9zDziq9U", "annotations": [{"segment": [29.488619344773788, 171.64312178745655], "label": "Baton twirling"}]}, "ysiKsmRwfx0": {"duration": 130.66, "subset": "testing", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=ysiKsmRwfx0", "annotations": []}, "AWXdK-ix3gQ": {"duration": 47.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AWXdK-ix3gQ", "annotations": [{"segment": [29.046171279999996, 44.84936817472699], "label": "Using the pommel horse"}, {"segment": [1.9467784711388456, 21.163072839999998], "label": "Using the pommel horse"}]}, "iqAyzgsha_M": {"duration": 134.91, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iqAyzgsha_M", "annotations": []}, "tDG8VoRQTuM": {"duration": 174.197, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tDG8VoRQTuM", "annotations": [{"segment": [43.68469168768233, 164.9707611559681], "label": "Powerbocking"}]}, "7coVNZXiU6U": {"duration": 189.73, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7coVNZXiU6U", "annotations": [{"segment": [75.4777123329831, 92.6451920008773], "label": "Getting a haircut"}, {"segment": [103.00487800736519, 111.88460887006909], "label": "Getting a haircut"}, {"segment": [122.54028590531375, 158.05920935612934], "label": "Getting a haircut"}]}, "mtU66vCjVVs": {"duration": 69.36, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mtU66vCjVVs", "annotations": [{"segment": [6.432683307332294, 63.29976599063963], "label": "Playing flauta"}]}, "fVL37oteUQY": {"duration": 208.121, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fVL37oteUQY", "annotations": []}, "5aMigcn2cU8": {"duration": 196.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5aMigcn2cU8", "annotations": [{"segment": [14.975704577223087, 183.07034779095164], "label": "Zumba"}]}, "5w841zW9R5A": {"duration": 102.469, "subset": "testing", "resolution": "400x300", "url": "https://www.youtube.com/watch?v=5w841zW9R5A", "annotations": []}, "5OhIRnEFMyw": {"duration": 217.5, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5OhIRnEFMyw", "annotations": [{"segment": [68.70973478939158, 89.06817472698908], "label": "Playing violin"}, {"segment": [101.62254602184088, 124.69544461778473], "label": "Playing violin"}, {"segment": [140.6428892355694, 202.05751638065524], "label": "Playing violin"}]}, "V35ubrbe8gA": {"duration": 223.562, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V35ubrbe8gA", "annotations": [{"segment": [27.894040561622468, 223.501], "label": "Trimming branches or hedges"}]}, "5rw6n16ILgY": {"duration": 155.57, "subset": "validation", "resolution": "1920x1214", "url": "https://www.youtube.com/watch?v=5rw6n16ILgY", "annotations": [{"segment": [8.25195892189429, 38.832747867737844], "label": "High jump"}, {"segment": [50.725276902232544, 86.64556859906396], "label": "High jump"}, {"segment": [97.32457425273012, 135.91461741029642], "label": "High jump"}]}, "ciW2Eb82vrY": {"duration": 128.38, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ciW2Eb82vrY", "annotations": []}, "fUivipdikFU": {"duration": 170.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fUivipdikFU", "annotations": [{"segment": [48.64584276000001, 152.69517132000001], "label": "Doing step aerobics"}]}, "BY5mz30u5_8": {"duration": 32.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BY5mz30u5_8", "annotations": [{"segment": [7.140588829953199, 32.28566235257411], "label": "Windsurfing"}]}, "T7fzZX0qKKQ": {"duration": 50.09, "subset": "validation", "resolution": "480x720", "url": "https://www.youtube.com/watch?v=T7fzZX0qKKQ", "annotations": [{"segment": [6.1522602399999995, 45.57738138923557], "label": "Playing bagpipes"}]}, "e5VrEH2X8LI": {"duration": 93.948, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e5VrEH2X8LI", "annotations": [{"segment": [0.20000000000000004, 93.747686], "label": "Installing carpet"}]}, "Xpb6vHnzZJ0": {"duration": 79.41, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Xpb6vHnzZJ0", "annotations": []}, "iiY3TE2O1QE": {"duration": 38.801, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iiY3TE2O1QE", "annotations": [{"segment": [6.779402982839314, 25.362230801872077], "label": "Beach soccer"}]}, "jbCQY7V7O1s": {"duration": 165.745, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jbCQY7V7O1s", "annotations": []}, "cnDeHeGJo_A": {"duration": 157.57, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cnDeHeGJo_A", "annotations": []}, "altXks0a0qY": {"duration": 35.69, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=altXks0a0qY", "annotations": [{"segment": [3.3302051390561935, 32.136485582322805], "label": "Camel ride"}]}, "xuoWaq6XPZo": {"duration": 79.715, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xuoWaq6XPZo", "annotations": [{"segment": [0.01, 77.60002281435257], "label": "Playing drums"}]}, "HGduo1zU6Ok": {"duration": 216.225, "subset": "validation", "resolution": "655x480", "url": "https://www.youtube.com/watch?v=HGduo1zU6Ok", "annotations": [{"segment": [15.849558502340093, 192.57309048361935], "label": "Hurling"}]}, "SndKvA_2DcE": {"duration": 162.47, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SndKvA_2DcE", "annotations": [{"segment": [37.639279505874114, 41.694690092365605], "label": "Rafting"}, {"segment": [79.71416434072329, 86.55766884165368], "label": "Rafting"}, {"segment": [123.30982691497661, 129.64640589313575], "label": "Rafting"}]}, "e1tC-0BSwV4": {"duration": 87.4, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e1tC-0BSwV4", "annotations": [{"segment": [5.999338612792513, 70.9012745148206], "label": "Making a lemonade"}]}, "HZWdMK6zhec": {"duration": 79.6, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=HZWdMK6zhec", "annotations": [{"segment": [0, 79.5980045351474], "label": "Playing lacrosse"}]}, "iUMDlxU14bM": {"duration": 134.189, "subset": "validation", "resolution": "1280x684", "url": "https://www.youtube.com/watch?v=iUMDlxU14bM", "annotations": [{"segment": [0.2, 122.50143571986209], "label": "Camel ride"}]}, "E-bv464MTsQ": {"duration": 27.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E-bv464MTsQ", "annotations": [{"segment": [12.940758931357255, 23.703370319812795], "label": "Playing polo"}]}, "cXw6os9Xk5c": {"duration": 34.087, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cXw6os9Xk5c", "annotations": [{"segment": [0, 34.061], "label": "Playing drums"}]}, "Vk9Sh8kQF80": {"duration": 7.8, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Vk9Sh8kQF80", "annotations": []}, "oKJ1BnvHbbA": {"duration": 34.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oKJ1BnvHbbA", "annotations": []}, "YTBmMSIczEc": {"duration": 13.654, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YTBmMSIczEc", "annotations": [{"segment": [1.1753947364236708, 5.524779471626164], "label": "Playing ice hockey"}, {"segment": [8.176843334554512, 10.086329315862923], "label": "Playing ice hockey"}]}, "cGtK7bs-TE8": {"duration": 60.442, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cGtK7bs-TE8", "annotations": [{"segment": [5.940434429017161, 39.32001836349454], "label": "Raking leaves"}]}, "EZZMYzY6Pug": {"duration": 33.089, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EZZMYzY6Pug", "annotations": [{"segment": [3.8715016380655225, 33.08843537414966], "label": "Waxing skis"}]}, "RGVDeXqN1ao": {"duration": 164.42000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RGVDeXqN1ao", "annotations": [{"segment": [14.155690268822797, 161.9289879806608], "label": "Wrapping presents"}]}, "Aa33vHLEXJA": {"duration": 73.491, "subset": "validation", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=Aa33vHLEXJA", "annotations": [{"segment": [3.554174493510353, 10.54787248049922], "label": "Bullfighting"}, {"segment": [24.076665444617788, 35.08314107644306], "label": "Bullfighting"}, {"segment": [37.94941077223089, 68.90512348673948], "label": "Bullfighting"}]}, "iiqaJGokpEw": {"duration": 60.697, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=iiqaJGokpEw", "annotations": [{"segment": [26.173048286604363, 28.06279906542056], "label": "Hitting a pinata"}]}, "WiJMIl5Tp3A": {"duration": 168.0, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WiJMIl5Tp3A", "annotations": [{"segment": [6.290035764695895, 129.99407178783153], "label": "Tumbling"}]}, "uID_HFDKFKw": {"duration": 69.34, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=uID_HFDKFKw", "annotations": [{"segment": [0, 67.06737974882995], "label": "Playing kickball"}]}, "ILeNuidJc00": {"duration": 76.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ILeNuidJc00", "annotations": [{"segment": [6.06080424, 76.904488], "label": "Plataform diving"}]}, "iiyYRmc1YiA": {"duration": 89.049, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=iiyYRmc1YiA", "annotations": [{"segment": [19.726697819032758, 80.4349157550702], "label": "Arm wrestling"}]}, "b16KVkNtcwc": {"duration": 142.014, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b16KVkNtcwc", "annotations": []}, "ae-aDWHvAGc": {"duration": 127.501, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ae-aDWHvAGc", "annotations": [{"segment": [2.3857909516380658, 31.810546021840874], "label": "Shuffleboard"}, {"segment": [41.751341653666145, 125.25402496099845], "label": "Shuffleboard"}]}, "OO3NO29L50U": {"duration": 204.59, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OO3NO29L50U", "annotations": [{"segment": [0, 195.65386569578783], "label": "Doing nails"}]}, "WuoK1WKaF1U": {"duration": 78.37, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WuoK1WKaF1U", "annotations": []}, "WwDGutCfElc": {"duration": 106.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WwDGutCfElc", "annotations": [{"segment": [0, 97.38383623712949], "label": "Playing flauta"}]}, "M7GusD6qyGU": {"duration": 96.9, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=M7GusD6qyGU", "annotations": []}, "FZix27qFZTc": {"duration": 20.736, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FZix27qFZTc", "annotations": [{"segment": [8.287717122243594, 16.123963738106735], "label": "Painting fence"}]}, "YozbZM_nA0c": {"duration": 35.83, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YozbZM_nA0c", "annotations": [{"segment": [10.06838456, 33.201346436817474], "label": "Hopscotch"}]}, "9Oga_D8AnNI": {"duration": 224.027, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9Oga_D8AnNI", "annotations": []}, "xCedPpnP6Wg": {"duration": 15.117, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xCedPpnP6Wg", "annotations": [{"segment": [0.7287739165556153, 15.116189], "label": "Powerbocking"}]}, "xUKQWr2aLTU": {"duration": 194.328, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xUKQWr2aLTU", "annotations": [{"segment": [77.55283779131216, 100.24921629598505], "label": "BMX"}]}, "2AE847UXu3Q": {"duration": 121.37, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2AE847UXu3Q", "annotations": [{"segment": [14.200940282509258, 30.67403101022], "label": "Brushing hair"}]}, "o8gBXh-SuTI": {"duration": 16.58, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o8gBXh-SuTI", "annotations": []}, "t7WI6H6UVG4": {"duration": 117.539, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=t7WI6H6UVG4", "annotations": [{"segment": [28.15096299587746, 105.10911380248834], "label": "Making a cake"}]}, "6jxqb8FyWnQ": {"duration": 40.125, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6jxqb8FyWnQ", "annotations": [{"segment": [4.950305566825331, 21.976436661989137], "label": "Arm wrestling"}]}, "Y6UKk3t8Hj8": {"duration": 37.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y6UKk3t8Hj8", "annotations": [{"segment": [11.884695220000001, 26.498137379999996], "label": "Doing kickboxing"}]}, "rBnygEUFOvE": {"duration": 54.15, "subset": "training", "resolution": "612x360", "url": "https://www.youtube.com/watch?v=rBnygEUFOvE", "annotations": [{"segment": [5.989365054602184, 39.394837753510146], "label": "Using uneven bars"}]}, "fmRio4-6Xqc": {"duration": 10.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fmRio4-6Xqc", "annotations": [{"segment": [0, 10.054239], "label": "Doing motocross"}]}, "6d8bmara2Wc": {"duration": 150.93, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=6d8bmara2Wc", "annotations": []}, "UCZGbQFg6io": {"duration": 117.82, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=UCZGbQFg6io", "annotations": [{"segment": [0, 117.701666], "label": "Long jump"}]}, "jdYq4TB888Y": {"duration": 67.08, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jdYq4TB888Y", "annotations": []}, "yBjsQN4fzVY": {"duration": 164.79, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=yBjsQN4fzVY", "annotations": [{"segment": [20.823952352574103, 34.70658725429017], "label": "Brushing teeth"}, {"segment": [82.78163774726988, 83.55289524180967], "label": "Brushing teeth"}, {"segment": [82.26746608424338, 83.80998107332294], "label": "Brushing teeth"}, {"segment": [88.95169770358815, 94.60758599687989], "label": "Brushing teeth"}, {"segment": [116.7169675070203, 118.00239666458658], "label": "Brushing teeth"}]}, "rpDBtiySZ3o": {"duration": 33.948, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rpDBtiySZ3o", "annotations": [{"segment": [8.837452418096722, 33.815162246489855], "label": "Playing beach volleyball"}]}, "a74RMGL_c8E": {"duration": 31.998, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a74RMGL_c8E", "annotations": [{"segment": [5.840343575663026, 18.269792723868953], "label": "Hurling"}]}, "VOTBtV0CQ8c": {"duration": 222.331, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VOTBtV0CQ8c", "annotations": [{"segment": [22.99497518952707, 217.27480462063875], "label": "Waterskiing"}]}, "p_88hssiQ9c": {"duration": 116.495, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p_88hssiQ9c", "annotations": []}, "6skP3w9WDIM": {"duration": 95.78, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6skP3w9WDIM", "annotations": [{"segment": [6.3068218976161345, 89.55686958024059], "label": "Wrapping presents"}]}, "WGewgs24UZg": {"duration": 193.03, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WGewgs24UZg", "annotations": []}, "soeRPsbkfas": {"duration": 63.32, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=soeRPsbkfas", "annotations": [{"segment": [7.369814799999999, 62.73042608658346], "label": "Long jump"}]}, "hG4VsZQQ2_c": {"duration": 49.06, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hG4VsZQQ2_c", "annotations": []}, "bJkemB0CcKo": {"duration": 110.133, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=bJkemB0CcKo", "annotations": [{"segment": [8.645895973907269, 105.56913163590104], "label": "Making an omelette"}]}, "rRybNmeLtpA": {"duration": 191.89, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rRybNmeLtpA", "annotations": []}, "kOIj7AgonHM": {"duration": 177.47, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kOIj7AgonHM", "annotations": [{"segment": [93.66926901872075, 168.66002796723868], "label": "Brushing hair"}]}, "5qublDzEgsw": {"duration": 130.845, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5qublDzEgsw", "annotations": [{"segment": [2.8577569769457445, 82.46670042433698], "label": "Futsal"}, {"segment": [88.18221431513261, 118.80103873010923], "label": "Futsal"}, {"segment": [122.67942315600627, 129.21143903120128], "label": "Futsal"}]}, "2fzIi1lPd8U": {"duration": 23.78, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2fzIi1lPd8U", "annotations": []}, "SSLcbqaBiRM": {"duration": 208.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SSLcbqaBiRM", "annotations": [{"segment": [22.034074179999998, 116.62431126999999], "label": "Doing motocross"}, {"segment": [137.11228125, 193.73342111856473], "label": "Doing motocross"}]}, "BVzW_OoUmnU": {"duration": 84.27, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BVzW_OoUmnU", "annotations": [{"segment": [12.9443751950078, 22.800498439937595], "label": "Starting a campfire"}, {"segment": [25.823042901716068, 69.32140015600623], "label": "Starting a campfire"}]}, "W5BD9287ZCY": {"duration": 43.7, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=W5BD9287ZCY", "annotations": []}, "wbrnTczxvVw": {"duration": 234.847, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wbrnTczxvVw", "annotations": [{"segment": [38.13052262689465, 88.22877652097566], "label": "Hula hoop"}]}, "U-N92yQynQA": {"duration": 39.59, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=U-N92yQynQA", "annotations": [{"segment": [22.350138600000005, 36.83436037441498], "label": "Playing badminton"}, {"segment": [5.446684867394696, 20.5004938], "label": "Playing badminton"}]}, "9svdYGBSMvM": {"duration": 62.369, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9svdYGBSMvM", "annotations": [{"segment": [5.055681747269891, 57.08918408736349], "label": "Carving jack-o-lanterns"}]}, "52PO939EtGw": {"duration": 64.25, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=52PO939EtGw", "annotations": [{"segment": [0.01, 64.201], "label": "Painting fence"}]}, "U0Q10ZdqnJo": {"duration": 13.82, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=U0Q10ZdqnJo", "annotations": [{"segment": [1.638075425897036, 3.5563479641185647], "label": "Tennis serve with ball bouncing"}, {"segment": [10.79836563650546, 12.587316430577223], "label": "Tennis serve with ball bouncing"}]}, "fYuJdh9gqr0": {"duration": 63.786, "subset": "training", "resolution": "990x720", "url": "https://www.youtube.com/watch?v=fYuJdh9gqr0", "annotations": [{"segment": [16.208709828393136, 26.658265210608423], "label": "Plastering"}]}, "UMKGRC2QMFo": {"duration": 164.1, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UMKGRC2QMFo", "annotations": []}, "9SEc3QVBCo4": {"duration": 24.032, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=9SEc3QVBCo4", "annotations": [{"segment": [0, 24.032652], "label": "Roof shingle removal"}]}, "cFOaL4Ir4hg": {"duration": 126.55, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cFOaL4Ir4hg", "annotations": []}, "5z6q_Ix3kWI": {"duration": 146.704, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=5z6q_Ix3kWI", "annotations": [{"segment": [4.57600624024961, 76.20089391575664], "label": "Skiing"}, {"segment": [86.72714352574103, 137.5277394695788], "label": "Skiing"}]}, "ISJodiRZ_uo": {"duration": 130.85, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ISJodiRZ_uo", "annotations": [{"segment": [10.4544356, 118.53245939625586], "label": "Grooming horse"}]}, "Jv7AjsdOk2I": {"duration": 190.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jv7AjsdOk2I", "annotations": [{"segment": [2.9699375975039, 138.69608580343214], "label": "Skateboarding"}]}, "PBzlHfEMU5s": {"duration": 19.575, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PBzlHfEMU5s", "annotations": [{"segment": [0, 17.803255631825273], "label": "Calf roping"}]}, "3gpgeAMy6a8": {"duration": 36.827, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=3gpgeAMy6a8", "annotations": [{"segment": [0.01, 36.826848], "label": "Kite flying"}]}, "QlTddnlIJpA": {"duration": 74.65, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QlTddnlIJpA", "annotations": [{"segment": [8.963833073322933, 13.736783151326051], "label": "Horseback riding"}, {"segment": [36.67022620904836, 44.120684867394694], "label": "Horseback riding"}, {"segment": [53.31734477379095, 74.621], "label": "Horseback riding"}]}, "9nE6C-efQdc": {"duration": 142.014, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9nE6C-efQdc", "annotations": [{"segment": [16.610101404056163, 30.12389391575663], "label": "Rock-paper-scissors"}]}, "hlwSgE7Epwo": {"duration": 117.168, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hlwSgE7Epwo", "annotations": [{"segment": [23.04534127781366, 31.988061854137648], "label": "Baton twirling"}, {"segment": [44.033358956941385, 51.33353901924668], "label": "Baton twirling"}, {"segment": [59.72874609089777, 71.40903419058624], "label": "Baton twirling"}, {"segment": [84.36685380117814, 86.55690781986972], "label": "Baton twirling"}]}, "ch_jiSTNiEE": {"duration": 133.376, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ch_jiSTNiEE", "annotations": []}, "wSGrM1nrWq4": {"duration": 144.939, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wSGrM1nrWq4", "annotations": [{"segment": [15.601853190327612, 125.49316696567861], "label": "Blow-drying hair"}]}, "cROJALtLB1k": {"duration": 33.414, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cROJALtLB1k", "annotations": [{"segment": [6.661790951638065, 10.51313884555382], "label": "Using the rowing machine"}, {"segment": [12.907219968798751, 16.550386895475818], "label": "Using the rowing machine"}, {"segment": [19.100603744149765, 26.7512542901716], "label": "Using the rowing machine"}]}, "EN63ldqfGsI": {"duration": 229.88, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=EN63ldqfGsI", "annotations": [{"segment": [28.689789329173166, 225.573468600624], "label": "Zumba"}]}, "8ZyBER6U9Wc": {"duration": 85.589, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8ZyBER6U9Wc", "annotations": [{"segment": [0.01, 79.457805128845], "label": "Rope skipping"}]}, "vS0ppdYTwTc": {"duration": 110.295, "subset": "validation", "resolution": "632x480", "url": "https://www.youtube.com/watch?v=vS0ppdYTwTc", "annotations": [{"segment": [52.65242370982839, 87.58197277223088], "label": "Braiding hair"}]}, "assDWZW6zTA": {"duration": 99.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=assDWZW6zTA", "annotations": [{"segment": [48.85478315132605, 78.09064742589703], "label": "Layup drill in basketball"}]}, "L5nqeFWufrE": {"duration": 52.39, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=L5nqeFWufrE", "annotations": [{"segment": [35.71279624336974, 43.6398928923557], "label": "Plataform diving"}]}, "ush7vIeB2PA": {"duration": 173.62, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ush7vIeB2PA", "annotations": []}, "4efxBizoKyQ": {"duration": 26.866, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=4efxBizoKyQ", "annotations": [{"segment": [4.652213644305773, 25.230924449297973], "label": "Painting furniture"}]}, "98SSjNuSxvM": {"duration": 193.446, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=98SSjNuSxvM", "annotations": [{"segment": [27.14054602184087, 161.33546801872075], "label": "Using the rowing machine"}]}, "ubNDaGOws0E": {"duration": 122.39, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ubNDaGOws0E", "annotations": [{"segment": [1.909397518757893, 122.392379], "label": "Cumbia"}]}, "dL9mlqbG5CU": {"duration": 219.127, "subset": "training", "resolution": "384x216", "url": "https://www.youtube.com/watch?v=dL9mlqbG5CU", "annotations": [{"segment": [27.689958926677065, 207.1619149329173], "label": "Curling"}]}, "j56eH9M0ObY": {"duration": 141.94, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=j56eH9M0ObY", "annotations": [{"segment": [3.3184399375975038, 137.825872074883], "label": "Breakdancing"}]}, "9E5oElxdPXA": {"duration": 20.62, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=9E5oElxdPXA", "annotations": []}, "ERGoTBC8NkA": {"duration": 60.19, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ERGoTBC8NkA", "annotations": [{"segment": [43.472970394695786, 59.71665191505619], "label": "Brushing teeth"}]}, "xmvwhxB1q30": {"duration": 192.262, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xmvwhxB1q30", "annotations": [{"segment": [0.2, 192.001], "label": "Canoeing"}]}, "STFtlC29FLo": {"duration": 12.214, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=STFtlC29FLo", "annotations": []}, "7UZWLp3MvLE": {"duration": 11.75, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7UZWLp3MvLE", "annotations": []}, "uhiQp0GCeKg": {"duration": 187.27, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uhiQp0GCeKg", "annotations": [{"segment": [65.41813416536661, 181.08739469578782], "label": "Washing face"}]}, "ko89yQozE-4": {"duration": 218.41, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ko89yQozE-4", "annotations": [{"segment": [55.53214196567863, 198.96178471138845], "label": "Windsurfing"}]}, "sbr3HKm2Y9I": {"duration": 196.581, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sbr3HKm2Y9I", "annotations": [{"segment": [0.2, 176.70811046722426], "label": "Drum corps"}]}, "lgXtDr9pNAk": {"duration": 63.995, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lgXtDr9pNAk", "annotations": [{"segment": [0.01, 56.10723493973772], "label": "Fixing the roof"}]}, "ZwK52lXCbhA": {"duration": 148.32999999999998, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZwK52lXCbhA", "annotations": []}, "w3N0Pyz2-m0": {"duration": 144.2, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w3N0Pyz2-m0", "annotations": [{"segment": [5.397603744149766, 13.943809672386896], "label": "Playing violin"}, {"segment": [31.036221528861155, 48.353533541341655], "label": "Playing violin"}, {"segment": [58.923840873634944, 69.269248049922], "label": "Playing violin"}, {"segment": [76.24115288611544, 80.06445553822152], "label": "Playing violin"}, {"segment": [91.30946333853355, 107.72717472698908], "label": "Playing violin"}]}, "WpKQV53ENHE": {"duration": 206.518, "subset": "validation", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=WpKQV53ENHE", "annotations": [{"segment": [38.045918930818694, 58.311731124855186], "label": "Rafting"}, {"segment": [196.9556193288883, 200.8157740017855], "label": "Rafting"}]}, "UhgVO1QaP2s": {"duration": 166.673, "subset": "validation", "resolution": "1280x712", "url": "https://www.youtube.com/watch?v=UhgVO1QaP2s", "annotations": [{"segment": [40.823143184087364, 102.96792803120125], "label": "Sumo"}]}, "7OcxT66BxX0": {"duration": 130.17, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7OcxT66BxX0", "annotations": []}, "_kj3B0T_TE0": {"duration": 65.2, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_kj3B0T_TE0", "annotations": [{"segment": [0.5348369437600964, 64.64149114936757], "label": "Cleaning shoes"}]}, "uBkJa679ffE": {"duration": 208.376, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uBkJa679ffE", "annotations": [{"segment": [93.9797822628922, 120.55337667875041], "label": "Putting in contact lenses"}]}, "kL-MVsMzDUU": {"duration": 186.503, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=kL-MVsMzDUU", "annotations": []}, "_QTQEw1b_-U": {"duration": 157.501, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_QTQEw1b_-U", "annotations": [{"segment": [58.70265054602184, 144.17947113884557], "label": "Using the monkey bar"}]}, "c7Rxe5IzUQw": {"duration": 171.39, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=c7Rxe5IzUQw", "annotations": [{"segment": [3.340893135725429, 171.321], "label": "Playing guitarra"}]}, "zvoDDqvr2DU": {"duration": 18.762, "subset": "testing", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=zvoDDqvr2DU", "annotations": []}, "Dxgmm4BqjX0": {"duration": 211.232, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Dxgmm4BqjX0", "annotations": []}, "8_jbsmj5Z9w": {"duration": 105.0, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8_jbsmj5Z9w", "annotations": [{"segment": [24.243516331129438, 98.44833320951886], "label": "Shaving"}]}, "iXaMOysrAuA": {"duration": 13.259, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=iXaMOysrAuA", "annotations": []}, "mOISOUKHpNM": {"duration": 50.43, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mOISOUKHpNM", "annotations": [{"segment": [3.068511573115986, 47.36522992348266], "label": "Using uneven bars"}]}, "Ek2OvKMdvys": {"duration": 215.969, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ek2OvKMdvys", "annotations": [{"segment": [126.55130255555959, 143.70406423780258], "label": "Doing crunches"}]}, "WqGRUZ1E3CY": {"duration": 54.59, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WqGRUZ1E3CY", "annotations": []}, "d0mKlsMsmZM": {"duration": 208.167, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=d0mKlsMsmZM", "annotations": []}, "1t0yXLlX1XE": {"duration": 84.869, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1t0yXLlX1XE", "annotations": [{"segment": [0.2, 83.0182301246106], "label": "Playing ten pins"}]}, "h67ctuwV-Nw": {"duration": 190.195, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=h67ctuwV-Nw", "annotations": [{"segment": [18.391173166926677, 142.67990795631823], "label": "Canoeing"}]}, "DeoqC3oVV38": {"duration": 201.758, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DeoqC3oVV38", "annotations": [{"segment": [7.868884007060963, 193.88930006864274], "label": "River tubing"}]}, "huI9TvxZxmM": {"duration": 107.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=huI9TvxZxmM", "annotations": [{"segment": [7.368455538221529, 103.32584243369735], "label": "Tango"}]}, "fgEMvRrOCRI": {"duration": 97.524, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fgEMvRrOCRI", "annotations": [{"segment": [0.3042849709828393, 96.00190847633905], "label": "Hurling"}]}, "xBH0D39dyf8": {"duration": 48.252, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xBH0D39dyf8", "annotations": []}, "o0cVs7THLi8": {"duration": 75.047, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=o0cVs7THLi8", "annotations": [{"segment": [33.015949806552264, 40.04061997815913], "label": "Curling"}, {"segment": [41.91386535725429, 43.787110736349454], "label": "Curling"}, {"segment": [45.42620044305772, 48.82145769266771], "label": "Curling"}, {"segment": [58.773073769110766, 65.91482177691108], "label": "Curling"}]}, "vX8L1RcZ4cc": {"duration": 52.57, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vX8L1RcZ4cc", "annotations": [{"segment": [4.590165366614665, 36.55738845553822], "label": "Doing karate"}]}, "PV6OCwbp6nQ": {"duration": 119.746, "subset": "training", "resolution": "836x480", "url": "https://www.youtube.com/watch?v=PV6OCwbp6nQ", "annotations": [{"segment": [69.4359996438704, 77.08328482633591], "label": "Rope skipping"}, {"segment": [97.60039315966924, 113.08148399300258], "label": "Rope skipping"}]}, "Gms3Yt6RrV4": {"duration": 149.026, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Gms3Yt6RrV4", "annotations": [{"segment": [32.3019953198128, 141.77599063962558], "label": "Longboarding"}]}, "FHExUjhAej4": {"duration": 43.54, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FHExUjhAej4", "annotations": []}, "Rd18n3PeZvk": {"duration": 3.37, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Rd18n3PeZvk", "annotations": [{"segment": [0.12715132605304214, 2.7708393135725427], "label": "Smoking hookah"}]}, "2-AMrzL7V-U": {"duration": 114.38, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2-AMrzL7V-U", "annotations": []}, "ysoLva-xTGo": {"duration": 210.211, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ysoLva-xTGo", "annotations": []}, "KsKUIIHiMg0": {"duration": 164.91, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KsKUIIHiMg0", "annotations": [{"segment": [27.141663712807016, 64.95990604250021], "label": "Hand washing clothes"}]}, "86Unahb4sJM": {"duration": 187.55, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=86Unahb4sJM", "annotations": []}, "RjFwq94RgpM": {"duration": 43.1, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RjFwq94RgpM", "annotations": []}, "_4CLYKFzmoY": {"duration": 231.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_4CLYKFzmoY", "annotations": [{"segment": [0, 231.7815873015873], "label": "Playing violin"}]}, "ZIBWRRBft8g": {"duration": 170.365, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZIBWRRBft8g", "annotations": [{"segment": [31.09622824024961, 164.51765197191887], "label": "Laying tile"}]}, "oDD_TyfKjNc": {"duration": 76.139, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oDD_TyfKjNc", "annotations": []}, "lVu-4SKcb4c": {"duration": 17.88, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=lVu-4SKcb4c", "annotations": [{"segment": [0, 17.87936507936508], "label": "Throwing darts"}]}, "V0e5tItt1RM": {"duration": 224.793, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=V0e5tItt1RM", "annotations": [{"segment": [52.311498077267835, 62.81581485296877], "label": "Rock-paper-scissors"}, {"segment": [113.23653537633325, 120.93970101184728], "label": "Rock-paper-scissors"}]}, "OkXrN2_hI68": {"duration": 154.947, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OkXrN2_hI68", "annotations": []}, "w8j8UXYSOx8": {"duration": 185.783, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=w8j8UXYSOx8", "annotations": []}, "PBxI7l0AqAY": {"duration": 154.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PBxI7l0AqAY", "annotations": [{"segment": [88.64907765290202, 112.43297653538794], "label": "Washing hands"}]}, "4-_ZQGwppfI": {"duration": 224.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4-_ZQGwppfI", "annotations": [{"segment": [108.35459617024136, 189.0962468648406], "label": "Braiding hair"}]}, "HTSxgkgecS0": {"duration": 190.427, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HTSxgkgecS0", "annotations": []}, "BJuL-yK-I8g": {"duration": 105.836, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BJuL-yK-I8g", "annotations": [{"segment": [11.062479018720747, 33.187437056162246], "label": "Swimming"}, {"segment": [38.305897497659906, 61.75174855226209], "label": "Swimming"}]}, "7hvq4VqEGCE": {"duration": 180.164, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7hvq4VqEGCE", "annotations": [{"segment": [0.01, 180.101], "label": "Putting in contact lenses"}]}, "A4L4ObzZ5VE": {"duration": 9.36, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A4L4ObzZ5VE", "annotations": [{"segment": [0, 9.35764], "label": "Doing motocross"}]}, "x2HPIysha58": {"duration": 216.39, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x2HPIysha58", "annotations": []}, "xf_exEkpJe8": {"duration": 167.76, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xf_exEkpJe8", "annotations": [{"segment": [0.01, 163.0531646380655], "label": "Cumbia"}]}, "ND9mMyNjm5M": {"duration": 78.09, "subset": "validation", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=ND9mMyNjm5M", "annotations": [{"segment": [4.045642395292859, 76.9998480027695], "label": "Grooming horse"}]}, "JksdZ0YMkoQ": {"duration": 127.223, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JksdZ0YMkoQ", "annotations": [{"segment": [0.2, 118.45094695787832], "label": "Changing car wheel"}]}, "u00zk79cJDY": {"duration": 232.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=u00zk79cJDY", "annotations": []}, "JgfVp6Estgw": {"duration": 186.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JgfVp6Estgw", "annotations": [{"segment": [43.57277691107644, 137.98046021840872], "label": "Archery"}, {"segment": [157.75039469578783, 161.817624024961], "label": "Archery"}]}, "pfSWDHsAhME": {"duration": 83.801, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=pfSWDHsAhME", "annotations": []}, "HzSCfBOefA4": {"duration": 30.02, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=HzSCfBOefA4", "annotations": [{"segment": [4.7306763454211636, 10.023413246733952], "label": "Washing dishes"}, {"segment": [15.784533229329172, 27.587804368174726], "label": "Washing dishes"}]}, "aQ-F9wr0HQ4": {"duration": 30.98, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aQ-F9wr0HQ4", "annotations": []}, "rojFvhZw_z8": {"duration": 161.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rojFvhZw_z8", "annotations": [{"segment": [0.7566973478939159, 129.89971138845556], "label": "Bathing dog"}]}, "bGql7ldp84A": {"duration": 189.661, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bGql7ldp84A", "annotations": [{"segment": [39.64823524492979, 180.4882350702028], "label": "Baking cookies"}]}, "ti3EHJLR2mU": {"duration": 207.42000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ti3EHJLR2mU", "annotations": [{"segment": [51.45134727838093, 186.7133797460742], "label": "Preparing pasta"}]}, "P7UbKv72LAs": {"duration": 218.059, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P7UbKv72LAs", "annotations": [{"segment": [38.0413746724027, 68.61033646272631], "label": "Applying sunscreen"}, {"segment": [88.31033406093486, 130.7672254363843], "label": "Applying sunscreen"}, {"segment": [137.89998248909657, 187.1499762352025], "label": "Applying sunscreen"}]}, "bpg2PC2neQM": {"duration": 218.66, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bpg2PC2neQM", "annotations": [{"segment": [5.627375195007801, 96.8891322], "label": "Playing field hockey"}, {"segment": [107.40458778, 210.60025351014042], "label": "Playing field hockey"}]}, "wRKFbuczjsw": {"duration": 143.778, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wRKFbuczjsw", "annotations": [{"segment": [14.90191341653666, 16.91871372854914], "label": "Disc dog"}, {"segment": [22.520936817472695, 24.313648205928235], "label": "Disc dog"}, {"segment": [46.49845163806552, 51.652496879875194], "label": "Disc dog"}, {"segment": [54.341563962558496, 55.9101864274571], "label": "Disc dog"}, {"segment": [63.97738767550702, 67.11463260530421], "label": "Disc dog"}, {"segment": [0.2, 143.441], "label": "Disc dog"}, {"segment": [87.07066848673946, 90.65659048361934], "label": "Disc dog"}, {"segment": [108.8103205928237, 113.74096333853353], "label": "Disc dog"}, {"segment": [126.5158104524181, 128.30877145085805], "label": "Disc dog"}]}, "fWq7Ar4tsfo": {"duration": 72.679, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fWq7Ar4tsfo", "annotations": []}, "qGtUXGG78tM": {"duration": 66.247, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qGtUXGG78tM", "annotations": []}, "gEOVtyRAaFY": {"duration": 135.396, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gEOVtyRAaFY", "annotations": [{"segment": [75.61844276755072, 76.88579097035883], "label": "Clipping cat claws"}, {"segment": [94.6286658096724, 98.00826101716069], "label": "Clipping cat claws"}, {"segment": [114.27256295319815, 120.6093039672387], "label": "Clipping cat claws"}]}, "pCd9P32wX3I": {"duration": 192.889, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pCd9P32wX3I", "annotations": []}, "yWCrbhU_X_U": {"duration": 117.192, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yWCrbhU_X_U", "annotations": [{"segment": [1.6418831775700933, 116.92099999999999], "label": "Having an ice cream"}]}, "iLHVaeiPpuw": {"duration": 199.228, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=iLHVaeiPpuw", "annotations": [{"segment": [3.7296825716068644, 9.324206429017162], "label": "Futsal"}, {"segment": [40.40489452574103, 103.49869136209048], "label": "Futsal"}, {"segment": [117.48500100561623, 169.07894324617786], "label": "Futsal"}]}, "oKdzcTnENks": {"duration": 32.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oKdzcTnENks", "annotations": [{"segment": [6.973774421216848, 27.192703138845552], "label": "Hopscotch"}]}, "hwP4uLVR-uc": {"duration": 110.295, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hwP4uLVR-uc", "annotations": [{"segment": [0.2, 110.09478299999999], "label": "Playing congas"}]}, "FaYFlKz-E6U": {"duration": 18.251, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FaYFlKz-E6U", "annotations": []}, "sc_L4zUEb7E": {"duration": 128.221, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sc_L4zUEb7E", "annotations": [{"segment": [0.01, 127.98102964118566], "label": "Snow tubing"}]}, "f5nGfktddZI": {"duration": 216.039, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=f5nGfktddZI", "annotations": []}, "SrcZRhXkr2k": {"duration": 94.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SrcZRhXkr2k", "annotations": [{"segment": [0.01, 71.74034321372856], "label": "Playing racquetball"}]}, "MFvdMVCzfB0": {"duration": 190.64, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=MFvdMVCzfB0", "annotations": [{"segment": [51.74825899158416, 166.5461203120125], "label": "Brushing teeth"}]}, "e9p0xmsU3h0": {"duration": 228.508, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e9p0xmsU3h0", "annotations": [{"segment": [11.016689421200052, 105.19166147326608], "label": "Fun sliding down"}]}, "CMTiL1ctmDs": {"duration": 46.44, "subset": "validation", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=CMTiL1ctmDs", "annotations": [{"segment": [8.05374414976599, 17.154836193447736], "label": "Smoking hookah"}, {"segment": [33.767940717628704, 42.79680187207488], "label": "Smoking hookah"}]}, "ak8MVjE2p3Y": {"duration": 81.92, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ak8MVjE2p3Y", "annotations": [{"segment": [0.01, 81.92], "label": "Smoking a cigarette"}]}, "wd7W8NTi_58": {"duration": 97.013, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wd7W8NTi_58", "annotations": [{"segment": [9.212805295950155, 72.81128037383176], "label": "Canoeing"}]}, "QBI5ZH_cdik": {"duration": 123.09, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QBI5ZH_cdik", "annotations": [{"segment": [23.235166575663026, 66.82510717628706], "label": "Doing motocross"}, {"segment": [72.39386610764431, 123.088775], "label": "Doing motocross"}]}, "AS43SCztbKY": {"duration": 226.19, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AS43SCztbKY", "annotations": []}, "Mv2L6HxB2dk": {"duration": 33.251, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mv2L6HxB2dk", "annotations": [{"segment": [0.2, 33], "label": "Rollerblading"}]}, "lu1bVIcDn4c": {"duration": 65.038, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lu1bVIcDn4c", "annotations": [{"segment": [0.01, 64.18946650977448], "label": "Making an omelette"}]}, "Ktun1-2Y540": {"duration": 179.096, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Ktun1-2Y540", "annotations": [{"segment": [5.6546069620792, 143.7233686443222], "label": "Hula hoop"}]}, "qCnHgcP3tM8": {"duration": 138.345, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qCnHgcP3tM8", "annotations": [{"segment": [0.01, 127.29733229329172], "label": "Beer pong"}]}, "q3NOVbRyFfw": {"duration": 208.608, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q3NOVbRyFfw", "annotations": [{"segment": [22.448251170046802, 208.341], "label": "River tubing"}]}, "pH7Wwl7oyAc": {"duration": 112.106, "subset": "testing", "resolution": "608x342", "url": "https://www.youtube.com/watch?v=pH7Wwl7oyAc", "annotations": []}, "qKPXW7jD9N4": {"duration": 87.099, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qKPXW7jD9N4", "annotations": []}, "EQK_o1qHx7M": {"duration": 178.237, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EQK_o1qHx7M", "annotations": [{"segment": [21.966729139913895, 87.58885669712502], "label": "Making a lemonade"}, {"segment": [116.22902253777225, 163.49919744149764], "label": "Making a lemonade"}]}, "s4Ryxk3TxKA": {"duration": 52.199, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s4Ryxk3TxKA", "annotations": [{"segment": [0.2, 4.870772353833212], "label": "Painting fence"}, {"segment": [9.092109467363539, 29.22464031650817], "label": "Painting fence"}]}, "2oizmWFx4PA": {"duration": 161.89, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2oizmWFx4PA", "annotations": [{"segment": [1.60593888, 47.006090040000004], "label": "Fixing bicycle"}, {"segment": [28.28646905876235, 142.19001857217145], "label": "Fixing bicycle"}]}, "o4zmTkyTu8g": {"duration": 78.67, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o4zmTkyTu8g", "annotations": [{"segment": [8.24293782, 77.56464517940718], "label": "Volleyball"}]}, "7X3wPRKuAsU": {"duration": 91.39, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7X3wPRKuAsU", "annotations": [{"segment": [3.4896255850234006, 87.6679407176287], "label": "Cricket"}]}, "aKhD_bFB7MA": {"duration": 159.777, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aKhD_bFB7MA", "annotations": [{"segment": [4.237442488175717, 31.905449322734814], "label": "Powerbocking"}, {"segment": [66.80203451947601, 71.28873833048559], "label": "Powerbocking"}, {"segment": [75.52618081866132, 77.27100962558502], "label": "Powerbocking"}, {"segment": [80.26214548205928, 82.25623605304213], "label": "Powerbocking"}, {"segment": [88.23850776599063, 141.0819078970359], "label": "Powerbocking"}]}, "VTwQgMELGVE": {"duration": 52.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VTwQgMELGVE", "annotations": [{"segment": [13.03204869, 48.27575663026521], "label": "Drinking beer"}]}, "8wKBvYGGldQ": {"duration": 82.199, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8wKBvYGGldQ", "annotations": [{"segment": [7.695735329626222, 67.1395411383354], "label": "Carving jack-o-lanterns"}]}, "62h9hnNTKWM": {"duration": 135.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=62h9hnNTKWM", "annotations": [{"segment": [55.31969266770671, 128.51515600624026], "label": "Using the balance beam"}]}, "J3DxJ8gI95U": {"duration": 134.77, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J3DxJ8gI95U", "annotations": [{"segment": [9.671361640860193, 13.035313515941999], "label": "Preparing pasta"}, {"segment": [17.870994336372092, 117.31782164347797], "label": "Preparing pasta"}]}, "zk7ZAIPkKHo": {"duration": 191.356, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zk7ZAIPkKHo", "annotations": []}, "duGxOdcqg3I": {"duration": 198.16, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=duGxOdcqg3I", "annotations": [{"segment": [91.04164213371256, 95.67873765834987], "label": "Drinking beer"}, {"segment": [109.89916393390429, 110.82658303883176], "label": "Drinking beer"}, {"segment": [116.39109766839653, 118.24593587825144], "label": "Drinking beer"}, {"segment": [171.41796304446177, 172.34538214118564], "label": "Drinking beer"}]}, "qknJO39WUGk": {"duration": 33.228, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=qknJO39WUGk", "annotations": [{"segment": [0.01, 33.227754], "label": "Cleaning sink"}]}, "1BrN3RGFXDs": {"duration": 114.78, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1BrN3RGFXDs", "annotations": []}, "-ApIBtH_pVc": {"duration": 114.312, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-ApIBtH_pVc", "annotations": [{"segment": [0.01, 114.281], "label": "Changing car wheel"}]}, "FAaC_ZqY1MA": {"duration": 35.016, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=FAaC_ZqY1MA", "annotations": []}, "DbzOayCsDM4": {"duration": 158.24, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DbzOayCsDM4", "annotations": []}, "sR0hZeYm06I": {"duration": 72.1, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=sR0hZeYm06I", "annotations": [{"segment": [11.352652106084243, 66.99188767550702], "label": "Getting a tattoo"}]}, "FCe1NVTbaZ4": {"duration": 10.937, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=FCe1NVTbaZ4", "annotations": [{"segment": [0.2, 4.4300780031201255], "label": "Doing crunches"}]}, "5Lv0g7ISQVU": {"duration": 59.07, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5Lv0g7ISQVU", "annotations": [{"segment": [9.302872074882995, 59.041], "label": "Playing kickball"}]}, "kSdWy3subNE": {"duration": 26.8, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kSdWy3subNE", "annotations": [{"segment": [3.4696623369734794, 14.254877794071763], "label": "Springboard diving"}]}, "avBlEff5U5U": {"duration": 58.608, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=avBlEff5U5U", "annotations": [{"segment": [0.2, 57.23570300090915], "label": "Cutting the grass"}]}, "GKBYgS99oiI": {"duration": 165.257, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GKBYgS99oiI", "annotations": [{"segment": [21.398256833674708, 69.86659761356441], "label": "Changing car wheel"}, {"segment": [78.37433828237484, 144.11596964742589], "label": "Changing car wheel"}]}, "k5lyc7zsox8": {"duration": 128.57, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=k5lyc7zsox8", "annotations": [{"segment": [17.650642833073324, 111.11881965366614], "label": "Archery"}]}, "J332Dy1gQqU": {"duration": 160.241, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=J332Dy1gQqU", "annotations": []}, "s6fEIuHPJ4Y": {"duration": 49.51, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s6fEIuHPJ4Y", "annotations": []}, "na4vSFfVi2s": {"duration": 112.849, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=na4vSFfVi2s", "annotations": [{"segment": [0.01, 112.849001], "label": "Using the monkey bar"}]}, "GQdkuWJGYFg": {"duration": 165.094, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GQdkuWJGYFg", "annotations": [{"segment": [39.406107166735644, 54.34437001425635], "label": "Making a lemonade"}, {"segment": [58.980382622107605, 70.57041414173574], "label": "Making a lemonade"}, {"segment": [73.66108921363656, 81.64533314938039], "label": "Making a lemonade"}]}, "7B1FZR0IA6M": {"duration": 78.25, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7B1FZR0IA6M", "annotations": [{"segment": [2.9298235113077995, 59.084772673946965], "label": "Skateboarding"}]}, "4-rJZdNgMX8": {"duration": 137.9, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4-rJZdNgMX8", "annotations": [{"segment": [31.185405616224646, 85.61097971918876], "label": "Washing hands"}]}, "IRz9PYgpb8U": {"duration": 10.171, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=IRz9PYgpb8U", "annotations": [{"segment": [0.01, 10.170339], "label": "Trimming branches or hedges"}]}, "7AtINKDyH2Q": {"duration": 90.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7AtINKDyH2Q", "annotations": [{"segment": [2.0218288946145075, 25.75023493511295], "label": "Hurling"}, {"segment": [33.89371156439644, 40.21192619056466], "label": "Hurling"}, {"segment": [46.67054558620329, 60.71102253324379], "label": "Hurling"}, {"segment": [69.83733254882011, 80.9293093369821], "label": "Hurling"}]}, "LpTVTiu8oro": {"duration": 89.165, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LpTVTiu8oro", "annotations": [{"segment": [7.511528542774363, 81.83148829953198], "label": "Cutting the grass"}]}, "0RIc6mwDRaQ": {"duration": 158.52, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0RIc6mwDRaQ", "annotations": [{"segment": [0, 88.0406488673947], "label": "Playing flauta"}]}, "27OICfYnvnk": {"duration": 8.685, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=27OICfYnvnk", "annotations": []}, "Zzj03Cew2vk": {"duration": 27.377, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zzj03Cew2vk", "annotations": [{"segment": [6.49325185271379, 9.989916416576719], "label": "Applying sunscreen"}, {"segment": [10.885403682931859, 13.44393872966083], "label": "Applying sunscreen"}]}, "RBkieLmOUlQ": {"duration": 36.781, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RBkieLmOUlQ", "annotations": [{"segment": [3.7830046801872075, 36.721], "label": "Capoeira"}]}, "VhzPqd0Su5I": {"duration": 95.295, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VhzPqd0Su5I", "annotations": [{"segment": [18.721319812792512, 95.261], "label": "Cutting the grass"}]}, "bHxWHW2PGy4": {"duration": 185.06, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=bHxWHW2PGy4", "annotations": [{"segment": [49.702369909999994, 144.59342316], "label": "Playing water polo"}]}, "trUjrNzz4dg": {"duration": 66.13, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=trUjrNzz4dg", "annotations": []}, "unM25XJGBJg": {"duration": 22.199, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=unM25XJGBJg", "annotations": []}, "7pfHxav4b0U": {"duration": 57.01, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=7pfHxav4b0U", "annotations": []}, "91XkPU8A5hs": {"duration": 77.32, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=91XkPU8A5hs", "annotations": [{"segment": [10.39671612, 54.25343213728549], "label": "Playing violin"}]}, "dGxJGvw_sUg": {"duration": 102.31, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dGxJGvw_sUg", "annotations": [{"segment": [15.660435630068164, 23.15020913941396], "label": "Cheerleading"}, {"segment": [81.63418526558218, 91.51431202259153], "label": "Cheerleading"}]}, "W6Sz8ajVsjc": {"duration": 149.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W6Sz8ajVsjc", "annotations": [{"segment": [10.701175105507623, 20.704447486743007], "label": "Paintball"}, {"segment": [35.825673179308126, 42.10679769775825], "label": "Paintball"}]}, "NaTBiY3G3zc": {"duration": 212.138, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NaTBiY3G3zc", "annotations": [{"segment": [20.511360374414977, 70.46644773790952], "label": "Curling"}, {"segment": [84.03041185647427, 131.338872074883], "label": "Curling"}, {"segment": [211.7301716068643, 211.86100000000002], "label": "Curling"}]}, "ElXkXNbsZac": {"duration": 192.169, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=ElXkXNbsZac", "annotations": [{"segment": [0.01, 192.16834467120182], "label": "Hanging wallpaper"}]}, "1ekMEuW7HLE": {"duration": 139.598, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1ekMEuW7HLE", "annotations": []}, "QWFedtlPRYM": {"duration": 86.01, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QWFedtlPRYM", "annotations": [{"segment": [10.460152886115445, 52.703078003120126], "label": "Hand washing clothes"}]}, "3S7EwcX-Rd0": {"duration": 75.117, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3S7EwcX-Rd0", "annotations": [{"segment": [10.663927395191047, 43.66697208892356], "label": "BMX"}, {"segment": [38.86711724804993, 43.66697208892356], "label": "BMX"}, {"segment": [52.21305509828393, 66.144341099844], "label": "BMX"}]}, "dW6OZV43ulU": {"duration": 190.89, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dW6OZV43ulU", "annotations": []}, "5vv5e_E93gM": {"duration": 101.402, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=5vv5e_E93gM", "annotations": [{"segment": [6.96048025866613, 42.87023028237129], "label": "Snowboarding"}, {"segment": [46.1922776474259, 98.23768636661467], "label": "Snowboarding"}]}, "EMOfWVKMqDM": {"duration": 96.433, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EMOfWVKMqDM", "annotations": [{"segment": [11.884762895277715, 27.229646633484386], "label": "Curling"}, {"segment": [34.60120842928956, 58.37073331669267], "label": "Curling"}, {"segment": [69.05197575351013, 81.38805856786271], "label": "Curling"}]}, "ugDN2gDN99E": {"duration": 7.524, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ugDN2gDN99E", "annotations": [{"segment": [0.01, 7.52281], "label": "Using the rowing machine"}]}, "TXq9MO-dYhI": {"duration": 121.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TXq9MO-dYhI", "annotations": [{"segment": [10.419992051482058, 118.59845498595944], "label": "Playing congas"}]}, "alcSAjjsH9w": {"duration": 162.934, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=alcSAjjsH9w", "annotations": [{"segment": [63.78023556942278, 140.51980187207488], "label": "Making an omelette"}]}, "9bUNAiuRXyo": {"duration": 17.25, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9bUNAiuRXyo", "annotations": [{"segment": [9.985413488299532, 14.130302106084244], "label": "Discus throw"}]}, "zb6WUBWwXfk": {"duration": 199.86, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zb6WUBWwXfk", "annotations": [{"segment": [24.624460218408736, 184.52760062402496], "label": "Spinning"}]}, "ywB6YQE7ynA": {"duration": 37.57, "subset": "testing", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=ywB6YQE7ynA", "annotations": []}, "U81x8t2iMhw": {"duration": 182.63, "subset": "testing", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=U81x8t2iMhw", "annotations": []}, "jfFnL_o8QS0": {"duration": 236.194, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jfFnL_o8QS0", "annotations": [{"segment": [0.2, 16.581409787003725], "label": "Hurling"}, {"segment": [20.634643290493525, 35.005198213728555], "label": "Hurling"}, {"segment": [65.58868717940717, 74.43210567550702], "label": "Hurling"}]}, "PSUGaj7_5LI": {"duration": 181.859, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PSUGaj7_5LI", "annotations": [{"segment": [26.38509694673501, 33.76157566302652], "label": "Playing ten pins"}, {"segment": [66.38830844662358, 73.19736572320035], "label": "Playing ten pins"}, {"segment": [105.54038778694004, 111.49831290394474], "label": "Playing ten pins"}]}, "CbfgZlo0Ut4": {"duration": 16.115, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CbfgZlo0Ut4", "annotations": [{"segment": [7.565349891571652, 13.062402372880063], "label": "Hand car wash"}]}, "mhYFpct97UE": {"duration": 155.32, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mhYFpct97UE", "annotations": [{"segment": [15.507597044017817, 139.08376098853478], "label": "Playing field hockey"}]}, "VIQMJQWkDcY": {"duration": 118.306, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VIQMJQWkDcY", "annotations": []}, "rzsp_r9fq_A": {"duration": 201.085, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=rzsp_r9fq_A", "annotations": [{"segment": [8.456837688346365, 171.64248345236328], "label": "Painting fence"}]}, "mwUP1yZQsh0": {"duration": 160.149, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mwUP1yZQsh0", "annotations": [{"segment": [22.415738732993198, 30.63484432857513], "label": "Fun sliding down"}, {"segment": [95.88956148137714, 129.76223908741179], "label": "Fun sliding down"}]}, "iGax3fokst8": {"duration": 22.965, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iGax3fokst8", "annotations": [{"segment": [0, 21.24488090795632], "label": "Doing crunches"}]}, "wh6P2C2avqc": {"duration": 102.49000000000001, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=wh6P2C2avqc", "annotations": []}, "yxDsp8EBZtY": {"duration": 8.34, "subset": "validation", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=yxDsp8EBZtY", "annotations": [{"segment": [2.00616, 4.55941655], "label": "Discus throw"}]}, "sgLSdMLbA0Y": {"duration": 64.552, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=sgLSdMLbA0Y", "annotations": []}, "P5lLZ_jsA2o": {"duration": 62.86, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=P5lLZ_jsA2o", "annotations": [{"segment": [0, 43.93086462402496], "label": "Using uneven bars"}]}, "wy_oDiDK6lk": {"duration": 121.023, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wy_oDiDK6lk", "annotations": [{"segment": [0, 110.03420124804992], "label": "Clipping cat claws"}]}, "4CsTbXdERSU": {"duration": 188.756, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4CsTbXdERSU", "annotations": [{"segment": [6.1838614820592825, 179.92092216848675], "label": "Fixing the roof"}]}, "4M4MXPP9oRY": {"duration": 181.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4M4MXPP9oRY", "annotations": [{"segment": [0.01, 179.60435685454627], "label": "Playing water polo"}]}, "Cnf7cFBF1eQ": {"duration": 145.07999999999998, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Cnf7cFBF1eQ", "annotations": []}, "5qY9yPt-5Go": {"duration": 31.35, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=5qY9yPt-5Go", "annotations": []}, "zAvcY-MoWeQ": {"duration": 149.049, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zAvcY-MoWeQ", "annotations": []}, "zXThQYb_axY": {"duration": 135.489, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zXThQYb_axY", "annotations": []}, "CCL8kqQMCRw": {"duration": 37.06, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=CCL8kqQMCRw", "annotations": [{"segment": [0.010000000000000002, 36.447167758485385], "label": "Hitting a pinata"}]}, "v6go4RA0ZB4": {"duration": 13.514, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=v6go4RA0ZB4", "annotations": [{"segment": [0.01, 13.481], "label": "Swimming"}]}, "PpiV66Wi4Vc": {"duration": 29.722, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PpiV66Wi4Vc", "annotations": []}, "xaicDAewb6o": {"duration": 95.2, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xaicDAewb6o", "annotations": [{"segment": [5.08228323808159, 94.64909796082303], "label": "Washing dishes"}]}, "WVM8itzRVjs": {"duration": 105.698, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WVM8itzRVjs", "annotations": [{"segment": [29.42426580900192, 41.91725123979507], "label": "Hurling"}, {"segment": [44.87611621024609, 55.06776221957734], "label": "Hurling"}]}, "Taw5ILRt9hI": {"duration": 48.739, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Taw5ILRt9hI", "annotations": [{"segment": [0.01, 45.2785479215087], "label": "Rollerblading"}]}, "3DQKqC14UcI": {"duration": 61.65, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3DQKqC14UcI", "annotations": []}, "WR_L7CdwPKo": {"duration": 19.18, "subset": "testing", "resolution": "1152x720", "url": "https://www.youtube.com/watch?v=WR_L7CdwPKo", "annotations": []}, "RgMAHuMVRcU": {"duration": 222.517, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RgMAHuMVRcU", "annotations": [{"segment": [3.8175834633385337, 16.658546021840873], "label": "Trimming branches or hedges"}, {"segment": [153.05039157566304, 156.86797503900158], "label": "Trimming branches or hedges"}, {"segment": [182.21922776911077, 200.26760842433697], "label": "Trimming branches or hedges"}]}, "jJ5t3ZUxP7Y": {"duration": 36.433, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jJ5t3ZUxP7Y", "annotations": [{"segment": [8.555595457953787, 14.844807897496368], "label": "Beer pong"}]}, "EbQezcKi1pc": {"duration": 58.93, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EbQezcKi1pc", "annotations": []}, "69DNcmkoapw": {"duration": 146.07999999999998, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=69DNcmkoapw", "annotations": [{"segment": [16.35695574968265, 34.53135487876508], "label": "Hand washing clothes"}, {"segment": [41.24787624492979, 86.82564005148205], "label": "Hand washing clothes"}, {"segment": [92.75074934633385, 139.92373488611545], "label": "Hand washing clothes"}]}, "Dutm5TiFBqQ": {"duration": 184.181, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Dutm5TiFBqQ", "annotations": [{"segment": [23.273970199624312, 62.351253497758954], "label": "Arm wrestling"}, {"segment": [110.9105246549998, 125.85183885722776], "label": "Arm wrestling"}]}, "bYxVbXzQToI": {"duration": 89.72, "subset": "testing", "resolution": "480x320", "url": "https://www.youtube.com/watch?v=bYxVbXzQToI", "annotations": []}, "0kykNR9FUWU": {"duration": 97.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0kykNR9FUWU", "annotations": [{"segment": [0.4568845553822153, 97.401], "label": "Using the balance beam"}]}, "CjuTFlxFvH0": {"duration": 152.672, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CjuTFlxFvH0", "annotations": [{"segment": [17.744159476583146, 132.06894407722308], "label": "Powerbocking"}]}, "ehO8oXanK1Y": {"duration": 54.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ehO8oXanK1Y", "annotations": [{"segment": [1.8552779946957878, 52.62243051354707], "label": "Doing karate"}]}, "dWJIJM3qmyQ": {"duration": 178.538, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=dWJIJM3qmyQ", "annotations": [{"segment": [60.1626565634054, 80.21687541787387], "label": "Painting furniture"}, {"segment": [105.8417106208058, 112.8049810563851], "label": "Painting furniture"}, {"segment": [126.73152192754377, 134.25185399796945], "label": "Painting furniture"}]}, "mMebSMn5D0E": {"duration": 166.67000000000002, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mMebSMn5D0E", "annotations": []}, "CKSlgK_2FP4": {"duration": 224.12, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=CKSlgK_2FP4", "annotations": []}, "8M8Rp_CC5k8": {"duration": 235.033, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8M8Rp_CC5k8", "annotations": []}, "Ax8GCogz6Yg": {"duration": 196.929, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ax8GCogz6Yg", "annotations": [{"segment": [14.743039001560064, 185.84259750390018], "label": "Rope skipping"}]}, "CaHsFgD2lIg": {"duration": 19.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CaHsFgD2lIg", "annotations": []}, "DJE9nX2qKYs": {"duration": 13.54, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=DJE9nX2qKYs", "annotations": [{"segment": [0.38014072399630683, 13.135973363494541], "label": "Clean and jerk"}]}, "cdcn6XP1N6A": {"duration": 144.03, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cdcn6XP1N6A", "annotations": [{"segment": [4.267935497659907, 139.26947413416536], "label": "Playing flauta"}]}, "K_ia4VP3w2c": {"duration": 146.73, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K_ia4VP3w2c", "annotations": []}, "CkC1wxAaDjM": {"duration": 121.88, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CkC1wxAaDjM", "annotations": [{"segment": [5.323998712329446, 5.894427145793315], "label": "Archery"}, {"segment": [17.49313862622532, 89.55726405382745], "label": "Archery"}, {"segment": [94.31083282371296, 117.12796979719188], "label": "Archery"}]}, "4W4mrswC2tA": {"duration": 16.46, "subset": "validation", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=4W4mrswC2tA", "annotations": [{"segment": [0.6017979719188767, 16.415], "label": "Clean and jerk"}]}, "fnZIksNLUXA": {"duration": 167.602, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fnZIksNLUXA", "annotations": [{"segment": [18.818957878315132, 56.71824804992199], "label": "Scuba diving"}, {"segment": [66.38910140405616, 84.94668486739468], "label": "Scuba diving"}, {"segment": [92.78791731669267, 105.85663806552262], "label": "Scuba diving"}, {"segment": [115.00474258970358, 145.34152262090484], "label": "Scuba diving"}]}, "HddRC-twMLc": {"duration": 14.118, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=HddRC-twMLc", "annotations": [{"segment": [0.01, 14.117459], "label": "Throwing darts"}]}, "8r4bF9RF0Kc": {"duration": 27.33, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8r4bF9RF0Kc", "annotations": []}, "CktICaG32Ps": {"duration": 151.0, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CktICaG32Ps", "annotations": []}, "9Ufy_JGbnys": {"duration": 223.45, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9Ufy_JGbnys", "annotations": []}, "mi3LGXT_Cqw": {"duration": 95.18, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mi3LGXT_Cqw", "annotations": []}, "Z9piz8SUX_s": {"duration": 39.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z9piz8SUX_s", "annotations": []}, "1ILXipBgbTs": {"duration": 60.349, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1ILXipBgbTs", "annotations": [{"segment": [0.01, 54.276194631596184], "label": "River tubing"}]}, "4YJ_L7jqgoA": {"duration": 169.11, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4YJ_L7jqgoA", "annotations": [{"segment": [15.04221996879875, 153.85288143525742], "label": "Clean and jerk"}]}, "YnFDzvuDJSQ": {"duration": 204.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YnFDzvuDJSQ", "annotations": [{"segment": [93.0297883479965, 161.20915288611545], "label": "Decorating the Christmas tree"}]}, "6lIi6-Y6q10": {"duration": 138.74, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6lIi6-Y6q10", "annotations": [{"segment": [11.250252730109207, 126.79997815912637], "label": "Making a lemonade"}]}, "PLCfV5XnJTg": {"duration": 30.047, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PLCfV5XnJTg", "annotations": []}, "NTZ_mWvnHVY": {"duration": 162.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NTZ_mWvnHVY", "annotations": [{"segment": [23.588938202426053, 138.99718424655353], "label": "Windsurfing"}]}, "MuOkEmd4Gp0": {"duration": 223.376, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MuOkEmd4Gp0", "annotations": [{"segment": [0.2, 128.5892833264351], "label": "BMX"}, {"segment": [151.5889925392934, 205.60345956318253], "label": "BMX"}]}, "PJcpWZrN03U": {"duration": 26.12, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PJcpWZrN03U", "annotations": []}, "5mWZY2XkWSQ": {"duration": 24.59, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5mWZY2XkWSQ", "annotations": []}, "a7DEFV0_nCE": {"duration": 62.787, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=a7DEFV0_nCE", "annotations": [{"segment": [0, 0.19590195007800312], "label": "Kite flying"}]}, "WdtytUagoJs": {"duration": 195.1, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WdtytUagoJs", "annotations": []}, "UcttD5hL6Bs": {"duration": 87.96000000000001, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UcttD5hL6Bs", "annotations": []}, "wHGDq_8dCuc": {"duration": 232.43, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wHGDq_8dCuc", "annotations": [{"segment": [18.85561746279375, 228.8056647347894], "label": "Tai chi"}]}, "x0HMqVvqHJE": {"duration": 183.74, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=x0HMqVvqHJE", "annotations": [{"segment": [6.306191926588629, 69.0814661049027], "label": "Playing lacrosse"}]}, "eVTMUEYhwDE": {"duration": 13.1, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eVTMUEYhwDE", "annotations": [{"segment": [0.3677468209048362, 10.848531216692669], "label": "Clean and jerk"}]}, "fgQ2HYMl3pA": {"duration": 239.073, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fgQ2HYMl3pA", "annotations": [{"segment": [28.7185558279474, 223.40798618876755], "label": "Beach soccer"}]}, "SkVe9zHoPXc": {"duration": 120.419, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SkVe9zHoPXc", "annotations": []}, "U0IJmOHoa1M": {"duration": 230.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U0IJmOHoa1M", "annotations": [{"segment": [16.54499651550688, 197.10126223400937], "label": "Cricket"}]}, "WiGOn6O-S1g": {"duration": 15.67, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=WiGOn6O-S1g", "annotations": [{"segment": [0.4631929079563183, 15.090337369734788], "label": "Plataform diving"}]}, "6hsOVkC7hxA": {"duration": 6.43, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=6hsOVkC7hxA", "annotations": [{"segment": [0.6438599613518879, 6.393975593750642], "label": "Chopping wood"}]}, "dVLIfsEMD8Y": {"duration": 17.137, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dVLIfsEMD8Y", "annotations": [{"segment": [4.695438377535101, 13.019170046801872], "label": "Throwing darts"}]}, "jVp2BsFOIVA": {"duration": 115.311, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jVp2BsFOIVA", "annotations": [{"segment": [13.22200728029121, 15.200811090947036], "label": "Swinging at the playground"}, {"segment": [18.79863620123036, 24.01548261114118], "label": "Swinging at the playground"}, {"segment": [92.5540517646906, 100.1094845618082], "label": "Swinging at the playground"}, {"segment": [106.04589604525773, 107.30513484477733], "label": "Swinging at the playground"}]}, "BLamvR0GIE8": {"duration": 49.9, "subset": "validation", "resolution": "482x360", "url": "https://www.youtube.com/watch?v=BLamvR0GIE8", "annotations": [{"segment": [14.946550315132606, 40.63593366926677], "label": "Washing hands"}]}, "MrWfTznlveQ": {"duration": 18.37, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MrWfTznlveQ", "annotations": []}, "DLJqhYP-C0k": {"duration": 186.968, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DLJqhYP-C0k", "annotations": [{"segment": [11.083851549980366, 16.62577732497055], "label": "Playing ten pins"}, {"segment": [22.459383403907584, 27.709628874950916], "label": "Playing ten pins"}, {"segment": [28.876350090738324, 32.959874345994244], "label": "Playing ten pins"}, {"segment": [35.29331677756906, 42.877004680187206], "label": "Playing ten pins"}, {"segment": [45.793807719655724, 49.29397136701794], "label": "Playing ten pins"}, {"segment": [51.04405319069905, 54.83589714200812], "label": "Playing ten pins"}, {"segment": [58.62774109331719, 62.12790474067942], "label": "Playing ten pins"}, {"segment": [63.58630626041368, 68.25319112356331], "label": "Playing ten pins"}, {"segment": [69.12823203540385, 72.33671537881924], "label": "Playing ten pins"}, {"segment": [75.25351841828774, 79.33704267354368], "label": "Playing ten pins"}, {"segment": [80.50376388933107, 84.29560784064014], "label": "Playing ten pins"}, {"segment": [85.46232905642756, 87.21241088010868], "label": "Playing ten pins"}, {"segment": [89.25417300773663, 113.46363773166925], "label": "Playing ten pins"}, {"segment": [114.6303589422777, 120.46396499531981], "label": "Playing ten pins"}, {"segment": [122.21404681123245, 131.25613619344773], "label": "Playing ten pins"}, {"segment": [133.8812589173167, 143.5067089048362], "label": "Playing ten pins"}, {"segment": [144.3817498127925, 154.007199800312], "label": "Playing ten pins"}, {"segment": [171.21633765678627, 178.2166649204368], "label": "Playing ten pins"}]}, "ne7uJQ0MUtE": {"duration": 223.423, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ne7uJQ0MUtE", "annotations": [{"segment": [109.6951872074883, 223.241], "label": "Making a lemonade"}]}, "BZK8b0xCkB0": {"duration": 218.872, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BZK8b0xCkB0", "annotations": [{"segment": [46.77904379848663, 204.87172466490495], "label": "Decorating the Christmas tree"}]}, "-b_3CyA0NeA": {"duration": 41.66, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-b_3CyA0NeA", "annotations": []}, "hS7VEMlJ4N0": {"duration": 70.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hS7VEMlJ4N0", "annotations": [{"segment": [56.22994253042122, 70.123333], "label": "High jump"}]}, "TRXLUcm2CuQ": {"duration": 168.57999999999998, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TRXLUcm2CuQ", "annotations": [{"segment": [16.83138801622465, 63.11770506084244], "label": "Shaving legs"}, {"segment": [98.88440459531981, 115.97878304929799], "label": "Shaving legs"}]}, "Nq6cd5Xue9Q": {"duration": 121.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Nq6cd5Xue9Q", "annotations": [{"segment": [30.113898595943837, 97.51616692667707], "label": "Pole vault"}]}, "WzDnorAzWVU": {"duration": 154.2, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WzDnorAzWVU", "annotations": [{"segment": [20.44823080343214, 31.75489960062403], "label": "Painting"}]}, "h2HRRkwHoKw": {"duration": 111.47999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=h2HRRkwHoKw", "annotations": [{"segment": [98.08752767550702, 100.17449634945397], "label": "Drinking coffee"}]}, "-ucwgrxTBEg": {"duration": 193.03, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-ucwgrxTBEg", "annotations": [{"segment": [64.27648985959438, 183.79764430577222], "label": "Tai chi"}]}, "_Zq8ugolzlA": {"duration": 140.9, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_Zq8ugolzlA", "annotations": [{"segment": [66.80016609048361, 89.65285448985959], "label": "Snatch"}]}, "MT852hP9wVk": {"duration": 91.98, "subset": "validation", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=MT852hP9wVk", "annotations": [{"segment": [1.2913703078735395, 35.15396949211302], "label": "Chopping wood"}, {"segment": [42.32824898029935, 91.77426303854875], "label": "Chopping wood"}]}, "VNnYIsTljo8": {"duration": 209.4, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VNnYIsTljo8", "annotations": []}, "GU_8hjy79VQ": {"duration": 24.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GU_8hjy79VQ", "annotations": [{"segment": [3.6201391825273013, 13.801780633385334], "label": "Javelin throw"}, {"segment": [21.60770574570983, 23.342355770670824], "label": "Javelin throw"}]}, "eMkBTRYwBAU": {"duration": 195.93, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eMkBTRYwBAU", "annotations": [{"segment": [23.767550920121806, 191.05457666043347], "label": "Wakeboarding"}]}, "pcFn1KBwn6Y": {"duration": 163.585, "subset": "training", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=pcFn1KBwn6Y", "annotations": [{"segment": [0.19999999999999998, 163.37900000000002], "label": "Playing pool"}]}, "ct3uprBbPHw": {"duration": 158.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ct3uprBbPHw", "annotations": [{"segment": [36.36434469734789, 142.2554489], "label": "Brushing hair"}]}, "xPhJuUiRAbc": {"duration": 6.641, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xPhJuUiRAbc", "annotations": []}, "kUTDgJmoRE8": {"duration": 77.76, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=kUTDgJmoRE8", "annotations": [{"segment": [3.274581903276131, 77.61971918876755], "label": "Cumbia"}]}, "cdLH2Ue3uAE": {"duration": 208.731, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cdLH2Ue3uAE", "annotations": []}, "hI11n4_r178": {"duration": 134.88, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=hI11n4_r178", "annotations": []}, "uG2sRzp5Bk4": {"duration": 192.053, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uG2sRzp5Bk4", "annotations": [{"segment": [42.83329641185647, 163.24578003120126], "label": "Playing blackjack"}]}, "Z2GWtB7MGVs": {"duration": 223.678, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z2GWtB7MGVs", "annotations": [{"segment": [0.01, 212.7301248049922], "label": "Surfing"}]}, "CL6TbOgnLzA": {"duration": 207.49, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CL6TbOgnLzA", "annotations": [{"segment": [17.31555460218409, 197.591515600624], "label": "Playing water polo"}]}, "yPEf9ajKwI8": {"duration": 216.945, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yPEf9ajKwI8", "annotations": [{"segment": [14.553187995319814, 184.4531966848674], "label": "Installing carpet"}]}, "3TLhUYQ8geM": {"duration": 200.342, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=3TLhUYQ8geM", "annotations": [{"segment": [4.6785280373831775, 171.85792990654207], "label": "Changing car wheel"}]}, "Pu85El6jhao": {"duration": 119.885, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Pu85El6jhao", "annotations": [{"segment": [16.800140186915886, 40.50700467289719], "label": "River tubing"}]}, "XH-YlSbgxkY": {"duration": 151.418, "subset": "validation", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=XH-YlSbgxkY", "annotations": [{"segment": [11.334365054602184, 44.86519500780031], "label": "Baton twirling"}, {"segment": [53.12983619344773, 147.11061310452416], "label": "Baton twirling"}]}, "2Tuht3F2uc8": {"duration": 46.35, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2Tuht3F2uc8", "annotations": [{"segment": [2.1647425741029642, 44.305064683307336], "label": "Shoveling snow"}]}, "T97WL2cKD6M": {"duration": 58.65, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=T97WL2cKD6M", "annotations": [{"segment": [0.01, 58.47059743525741], "label": "Playing accordion"}]}, "3jliBZqVjMU": {"duration": 82.28999999999999, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3jliBZqVjMU", "annotations": []}, "7-5sFmNf4i4": {"duration": 207.192, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=7-5sFmNf4i4", "annotations": [{"segment": [92.44411374102964, 123.79753693291734], "label": "Playing ten pins"}, {"segment": [133.81770310764432, 138.3429394446178], "label": "Playing ten pins"}]}, "ubgCjUUmP9I": {"duration": 107.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ubgCjUUmP9I", "annotations": [{"segment": [24.582178951620236, 75.15773577871244], "label": "Using the pommel horse"}]}, "2zVpWu1i5qM": {"duration": 96.18, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2zVpWu1i5qM", "annotations": [{"segment": [12.85020897, 93.02618605304211], "label": "Making a sandwich"}]}, "d4prmED8v-w": {"duration": 105.42, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=d4prmED8v-w", "annotations": [{"segment": [5.18234888, 7.3184279275932935], "label": "Pole vault"}, {"segment": [41.854829383651534, 50.24224116583711], "label": "Pole vault"}, {"segment": [44.32171520194141, 50.406700220389766], "label": "Pole vault"}]}, "9Bo7Hr77DgA": {"duration": 51.8, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9Bo7Hr77DgA", "annotations": [{"segment": [12.526640276495412, 51.561267718735955], "label": "Brushing teeth"}]}, "ETZOazHvdZg": {"duration": 102.562, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ETZOazHvdZg", "annotations": [{"segment": [0.01, 15.373689414740547], "label": "Playing ten pins"}, {"segment": [45.454387728387374, 56.6546472127001], "label": "Playing ten pins"}, {"segment": [30.894049427988705, 61.93476974000119], "label": "Playing ten pins"}, {"segment": [67.85490711909635, 82.57524871036], "label": "Playing ten pins"}]}, "UBCZIKByREw": {"duration": 108.205, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UBCZIKByREw", "annotations": []}, "XCXcwMyrqYM": {"duration": 75.63, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XCXcwMyrqYM", "annotations": [{"segment": [39.377218533558654, 69.0550137], "label": "Archery"}]}, "cAiig_569fI": {"duration": 131.193, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cAiig_569fI", "annotations": []}, "cAhu8H9qsAI": {"duration": 59.351, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cAhu8H9qsAI", "annotations": [{"segment": [24.0808127925117, 43.64070670826833], "label": "Putting on shoes"}]}, "wZ9PIXtmrq0": {"duration": 221.681, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wZ9PIXtmrq0", "annotations": []}, "HvDf9BN0blE": {"duration": 141.41, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HvDf9BN0blE", "annotations": []}, "5deGCvJOGg0": {"duration": 198.926, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=5deGCvJOGg0", "annotations": [{"segment": [25.447548801872074, 52.136441447737916], "label": "Curling"}, {"segment": [59.58450451170047, 95.89381194851795], "label": "Curling"}, {"segment": [107.68657846645866, 143.06487802028082], "label": "Curling"}, {"segment": [175.33981796411857, 188.0635923650546], "label": "Curling"}]}, "BaJwohtZMtA": {"duration": 197.161, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BaJwohtZMtA", "annotations": [{"segment": [5.227858034321374, 156.2365491419657], "label": "Hula hoop"}]}, "_0mR06FcftE": {"duration": 124.506, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_0mR06FcftE", "annotations": [{"segment": [0, 118.8492542901716], "label": "Cleaning sink"}]}, "KhAtzEJxz9M": {"duration": 203.59, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=KhAtzEJxz9M", "annotations": [{"segment": [0.2, 199.1456995600624], "label": "Zumba"}]}, "aVDyPmUgHU0": {"duration": 76.092, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aVDyPmUgHU0", "annotations": [{"segment": [33.640083463338534, 64.25433931357254], "label": "Trimming branches or hedges"}]}, "aAtM2Gaf1Xg": {"duration": 105.094, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aAtM2Gaf1Xg", "annotations": []}, "gjfa7WGhM6U": {"duration": 115.8, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=gjfa7WGhM6U", "annotations": [{"segment": [10.658468276131044, 115.797935], "label": "Shaving legs"}]}, "IMto8gJvRek": {"duration": 195.489, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IMto8gJvRek", "annotations": [{"segment": [15.543542901716068, 195.361], "label": "Making a lemonade"}]}, "1jX8p54Dfjs": {"duration": 201.596, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1jX8p54Dfjs", "annotations": [{"segment": [55.34280187207489, 172.6318081123245], "label": "Clipping cat claws"}]}, "JnpcJP82WLI": {"duration": 64.65, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JnpcJP82WLI", "annotations": [{"segment": [9.6311008049922, 53.19796517940718], "label": "Ping-pong"}]}, "C8IHSB9mfeE": {"duration": 34.203, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C8IHSB9mfeE", "annotations": [{"segment": [0.01, 11.644584243369735], "label": "Throwing darts"}, {"segment": [19.26552496099844, 29.870890015600626], "label": "Throwing darts"}]}, "j5V7kD_HeMw": {"duration": 179.468, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j5V7kD_HeMw", "annotations": [{"segment": [0.2, 169.9477160624025], "label": "Snowboarding"}]}, "lQj3qYaI6xo": {"duration": 225.65, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lQj3qYaI6xo", "annotations": []}, "D99jYbtGO38": {"duration": 8.8, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=D99jYbtGO38", "annotations": [{"segment": [2.727296, 7.939360000000001], "label": "Playing lacrosse"}]}, "9cxGx2BsKkM": {"duration": 83.31, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=9cxGx2BsKkM", "annotations": [{"segment": [0.12997346832648818, 59.657821104524174], "label": "Archery"}, {"segment": [64.72678629641185, 82.14323080187206], "label": "Archery"}]}, "W840XGSymlQ": {"duration": 181.534, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W840XGSymlQ", "annotations": []}, "Lnwcg2H0Gt4": {"duration": 226.91, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Lnwcg2H0Gt4", "annotations": []}, "HD_vpQCUSCQ": {"duration": 161.03, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HD_vpQCUSCQ", "annotations": [{"segment": [66.78816203929976, 71.05220651482621], "label": "Volleyball"}]}, "P4TNzMk6OWo": {"duration": 121.069, "subset": "testing", "resolution": "544x360", "url": "https://www.youtube.com/watch?v=P4TNzMk6OWo", "annotations": []}, "n637bl2-DDo": {"duration": 61.09, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=n637bl2-DDo", "annotations": [{"segment": [40.600723516380654, 53.848377433697344], "label": "Spinning"}, {"segment": [59.37617547113884, 61.091699], "label": "Spinning"}]}, "dTSVM5fF1L8": {"duration": 127.548, "subset": "testing", "resolution": "600x360", "url": "https://www.youtube.com/watch?v=dTSVM5fF1L8", "annotations": []}, "rVLkm3MLzns": {"duration": 218.082, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rVLkm3MLzns", "annotations": [{"segment": [24.829224648985956, 71.42653666146646], "label": "Snowboarding"}, {"segment": [88.09272854914197, 203.75438221528862], "label": "Snowboarding"}]}, "eZ_qerXno64": {"duration": 67.11, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eZ_qerXno64", "annotations": [{"segment": [2.35220356474259, 62.986784344773795], "label": "Ping-pong"}]}, "3kJuuyL4tGQ": {"duration": 177.703, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3kJuuyL4tGQ", "annotations": []}, "rjjmrZvHMnA": {"duration": 78.81, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rjjmrZvHMnA", "annotations": []}, "tS-MHCJKMgM": {"duration": 40.566, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tS-MHCJKMgM", "annotations": [{"segment": [3.982563182527301, 26.676851794071766], "label": "Welding"}, {"segment": [30.027262090483617, 34.2626864274571], "label": "Welding"}]}, "A9TcWJnbcWs": {"duration": 7.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=A9TcWJnbcWs", "annotations": [{"segment": [0.24355629, 4.56718308], "label": "Playing racquetball"}]}, "iZtie23nW_A": {"duration": 52.245, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iZtie23nW_A", "annotations": [{"segment": [5.619140405616225, 43.66685803432137], "label": "Hand car wash"}]}, "Nc9yr4urskA": {"duration": 160.728, "subset": "testing", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=Nc9yr4urskA", "annotations": []}, "iPXyExgGD5Q": {"duration": 39.08, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=iPXyExgGD5Q", "annotations": [{"segment": [21.73403965665395, 31.585506946373577], "label": "Gargling mouthwash"}]}, "ndUSRZq0QHk": {"duration": 119.119, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ndUSRZq0QHk", "annotations": []}, "rpbVkkhUcuw": {"duration": 126.608, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rpbVkkhUcuw", "annotations": []}, "T4ZeB_TvS68": {"duration": 64.412, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=T4ZeB_TvS68", "annotations": [{"segment": [9.667178346164503, 63.61885747389036], "label": "Playing blackjack"}]}, "wjcCl0jPxO4": {"duration": 84.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wjcCl0jPxO4", "annotations": [{"segment": [5.267038003120125, 13.694298808112325], "label": "Washing face"}, {"segment": [37.9226736224649, 42.79468377535101], "label": "Washing face"}, {"segment": [58.595797784711394, 64.91624338845554], "label": "Washing face"}]}, "k4WHaby5jd8": {"duration": 133.051, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k4WHaby5jd8", "annotations": [{"segment": [0.01, 97.97154447425898], "label": "Assembling bicycle"}]}, "3bi2XM3scQA": {"duration": 52.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3bi2XM3scQA", "annotations": [{"segment": [5.075955635725429, 14.821790456318254], "label": "Clean and jerk"}, {"segment": [28.62838978549142, 49.500719359594385], "label": "Clean and jerk"}]}, "rF1n8L571vM": {"duration": 166.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rF1n8L571vM", "annotations": [{"segment": [7.535328090674648, 159.02140498595944], "label": "Spinning"}]}, "moGDCWEoaK8": {"duration": 95.156, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=moGDCWEoaK8", "annotations": [{"segment": [0.01, 95.101], "label": "Hurling"}]}, "w3OuSKoSsC0": {"duration": 67.13, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w3OuSKoSsC0", "annotations": []}, "1oyWMusaDTI": {"duration": 61.72, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1oyWMusaDTI", "annotations": [{"segment": [0.01, 61.71852607709751], "label": "Windsurfing"}]}, "tPiuksisb4E": {"duration": 165.23, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tPiuksisb4E", "annotations": [{"segment": [32.479536438600405, 153.11781345397816], "label": "Smoking hookah"}]}, "FMtUqoxfR50": {"duration": 64.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FMtUqoxfR50", "annotations": [{"segment": [9.34187457878315, 56.50571704680188], "label": "Volleyball"}]}, "bJ1vEQKX-hE": {"duration": 215.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bJ1vEQKX-hE", "annotations": [{"segment": [92.28314162000001, 178.32421442], "label": "Dodgeball"}]}, "RcDEaYPwI6I": {"duration": 120.98, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RcDEaYPwI6I", "annotations": [{"segment": [32.63978315132605, 39.4318767550702], "label": "Cheerleading"}, {"segment": [61.12884243369734, 117.16361466458659], "label": "Cheerleading"}]}, "c0Cn3HFXpLs": {"duration": 139.57999999999998, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c0Cn3HFXpLs", "annotations": [{"segment": [0.2, 5.985990639625585], "label": "Playing racquetball"}, {"segment": [17.304954758190327, 133.10666458658346], "label": "Playing racquetball"}]}, "7x5oY2Myd7I": {"duration": 209.096, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7x5oY2Myd7I", "annotations": [{"segment": [6.837789719626167, 20.18776012461059], "label": "Playing ice hockey"}, {"segment": [45.25965576323988, 60.56328037383177], "label": "Playing ice hockey"}, {"segment": [168.66547975077881, 206.78151869158881], "label": "Playing ice hockey"}]}, "ur873jaQO3A": {"duration": 130.38, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ur873jaQO3A", "annotations": [{"segment": [49.40483392675172, 89.81857875385451], "label": "Archery"}]}, "FADShG5lyB8": {"duration": 12.585, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=FADShG5lyB8", "annotations": []}, "vWGkTOLx57s": {"duration": 66.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vWGkTOLx57s", "annotations": [{"segment": [12.693996306791048, 58.92903949297972], "label": "Using parallel bars"}]}, "Nn4sVR3__DQ": {"duration": 39.126, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Nn4sVR3__DQ", "annotations": [{"segment": [19.897477133733105, 37.4827100388653], "label": "Fun sliding down"}]}, "f_BzYUCp4J0": {"duration": 214.065, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=f_BzYUCp4J0", "annotations": [{"segment": [9.000042056074767, 201.3342741433022], "label": "Blowing leaves"}]}, "VRRLOIP6EmA": {"duration": 85.287, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VRRLOIP6EmA", "annotations": [{"segment": [0.01, 84.68919968798753], "label": "Surfing"}]}, "drarkco9LfQ": {"duration": 71.564, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=drarkco9LfQ", "annotations": [{"segment": [0.2, 60.36327769110765], "label": "Arm wrestling"}]}, "wP3gExRMMy4": {"duration": 108.6, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=wP3gExRMMy4", "annotations": []}, "lSX_yj2ohls": {"duration": 237.541, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lSX_yj2ohls", "annotations": [{"segment": [0.01, 32.337954005695885], "label": "Beer pong"}, {"segment": [41.217941220649145, 52.31792523934074], "label": "Beer pong"}, {"segment": [80.43788475335943, 212.89769404307907], "label": "Beer pong"}]}, "w_TcHZn4Qy8": {"duration": 44.3, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=w_TcHZn4Qy8", "annotations": []}, "sLtQvJXnrOE": {"duration": 21.316, "subset": "training", "resolution": "1444x1080", "url": "https://www.youtube.com/watch?v=sLtQvJXnrOE", "annotations": [{"segment": [0.2, 21.115849], "label": "Changing car wheel"}]}, "3SjuIcAfeWk": {"duration": 94.53, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3SjuIcAfeWk", "annotations": [{"segment": [7.547233746953472, 90.12521038246749], "label": "Wrapping presents"}]}, "MQ7k1wig4jk": {"duration": 16.162, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=MQ7k1wig4jk", "annotations": [{"segment": [0.2, 15.961088435374151], "label": "Table soccer"}]}, "HXn5ZpSEMqg": {"duration": 111.27, "subset": "training", "resolution": "2276x720", "url": "https://www.youtube.com/watch?v=HXn5ZpSEMqg", "annotations": [{"segment": [62.984747269890796, 66.81401716068642], "label": "Waxing skis"}]}, "vWEbHvnkogA": {"duration": 190.92, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vWEbHvnkogA", "annotations": [{"segment": [79.37395505304212, 88.30910946801872], "label": "Playing saxophone"}, {"segment": [129.7086582574103, 140.13300507488302], "label": "Playing saxophone"}, {"segment": [174.68226881279253, 179.74552298127924], "label": "Playing saxophone"}]}, "vbWdGj1sfO8": {"duration": 11.076, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vbWdGj1sfO8", "annotations": [{"segment": [1.3232486668048988, 10.938], "label": "Doing crunches"}]}, "pKOwZrmQan4": {"duration": 118.074, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pKOwZrmQan4", "annotations": [{"segment": [15.472966165366616, 113.1000145897036], "label": "Disc dog"}]}, "XTErpg4IdiE": {"duration": 201.55, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=XTErpg4IdiE", "annotations": [{"segment": [108.16369191887675, 188.65760218408735], "label": "Installing carpet"}]}, "u3uYs6SZFKo": {"duration": 40.078, "subset": "validation", "resolution": "630x480", "url": "https://www.youtube.com/watch?v=u3uYs6SZFKo", "annotations": [{"segment": [6.66921781160624, 36.649537669979864], "label": "Swinging at the playground"}]}, "7n1L0evoi34": {"duration": 83.616, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7n1L0evoi34", "annotations": [{"segment": [8.721602184087363, 11.58541185647426], "label": "Grooming dog"}, {"segment": [20.176840873634944, 33.71485023400936], "label": "Grooming dog"}, {"segment": [38.0105647425897, 63.13398595943838], "label": "Grooming dog"}]}, "NQOPahBcpSE": {"duration": 171.22, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NQOPahBcpSE", "annotations": [{"segment": [46.47886805616225, 118.06700965990639], "label": "Walking the dog"}, {"segment": [120.47108904212169, 145.84748252106084], "label": "Walking the dog"}, {"segment": [150.6556412854914, 155.73091998127924], "label": "Walking the dog"}]}, "NzxZdC-63LE": {"duration": 187.85, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NzxZdC-63LE", "annotations": [{"segment": [12.076415117697174, 173.29921423296196], "label": "Tumbling"}]}, "R5_Xq9OJuoQ": {"duration": 150.57999999999998, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R5_Xq9OJuoQ", "annotations": []}, "uZC3WPkta-I": {"duration": 95.411, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uZC3WPkta-I", "annotations": [{"segment": [3.2644416865696515, 15.135146961940556], "label": "Croquet"}, {"segment": [22.2575701271631, 52.082716230399456], "label": "Croquet"}, {"segment": [57.57291732526727, 73.59836916974626], "label": "Croquet"}, {"segment": [84.28200373273228, 90.81089152122372], "label": "Croquet"}]}, "KjUxjcpIG_Y": {"duration": 68.337, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KjUxjcpIG_Y", "annotations": [{"segment": [0.2, 53.753669376947045], "label": "Gargling mouthwash"}]}, "SXcxyuXxY8M": {"duration": 237.29, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SXcxyuXxY8M", "annotations": []}, "nE0_PaRBXeA": {"duration": 36.224, "subset": "training", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=nE0_PaRBXeA", "annotations": [{"segment": [0.010000000000000002, 36.201], "label": "Using the rowing machine"}]}, "CXbVcrVgNzQ": {"duration": 46.39, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=CXbVcrVgNzQ", "annotations": [{"segment": [1.36461831, 4.234037360686427], "label": "Ironing clothes"}, {"segment": [27.466960314196566, 43.89647280780031], "label": "Ironing clothes"}]}, "IN23CO8Kotk": {"duration": 60.56, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IN23CO8Kotk", "annotations": []}, "pLcrLi5oAvU": {"duration": 174.29, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=pLcrLi5oAvU", "annotations": [{"segment": [9.788461905823171, 112.02350766302654], "label": "Getting a haircut"}]}, "Uw_4gKpmAhI": {"duration": 48.159, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Uw_4gKpmAhI", "annotations": [{"segment": [0.19999999999999998, 12.23161154446178], "label": "Plastering"}, {"segment": [34.593698907956316, 40.8220655226209], "label": "Plastering"}]}, "FDvZUUc5tw4": {"duration": 104.513, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=FDvZUUc5tw4", "annotations": [{"segment": [18.415121684867394, 91.4412496099844], "label": "Clipping cat claws"}]}, "txsupdxCToQ": {"duration": 88.817, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=txsupdxCToQ", "annotations": [{"segment": [10.497728555360712, 14.917825676635353], "label": "Rollerblading"}, {"segment": [20.02856297310915, 53.86993062950301], "label": "Rollerblading"}, {"segment": [69.75465438533504, 81.35740912872541], "label": "Rollerblading"}]}, "1-Ud-q4y1oc": {"duration": 80.02, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=1-Ud-q4y1oc", "annotations": [{"segment": [10.98023088923557, 76.61206552262091], "label": "Cleaning windows"}]}, "8Kjuph57CRE": {"duration": 112.176, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8Kjuph57CRE", "annotations": []}, "fmtW5lcdT_0": {"duration": 13.4, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fmtW5lcdT_0", "annotations": [{"segment": [0.6270474492449085, 10.8485], "label": "Playing kickball"}]}, "_tPDUYSu1IQ": {"duration": 181.232, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_tPDUYSu1IQ", "annotations": [{"segment": [8.762263650546023, 10.740839313572543], "label": "Arm wrestling"}, {"segment": [67.27157254290172, 101.75531981279251], "label": "Arm wrestling"}, {"segment": [113.9220015600624, 117.87958970358814], "label": "Arm wrestling"}, {"segment": [124.66402652106085, 127.7735600624025], "label": "Arm wrestling"}, {"segment": [132.01383307332293, 137.66753042121684], "label": "Arm wrestling"}, {"segment": [152.64982839313572, 171.02434477379094], "label": "Arm wrestling"}]}, "U4UHEGNFyz8": {"duration": 187.873, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U4UHEGNFyz8", "annotations": [{"segment": [11.430597613104524, 187.87213151927438], "label": "BMX"}]}, "OXbfnzs-qUU": {"duration": 58.074, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OXbfnzs-qUU", "annotations": [{"segment": [0, 58.073105], "label": "Knitting"}]}, "WMVJqLMtaws": {"duration": 109.47999999999999, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WMVJqLMtaws", "annotations": [{"segment": [5.807162074882996, 63.19558728549141], "label": "Hopscotch"}, {"segment": [85.74104004680187, 108.28649280811233], "label": "Hopscotch"}]}, "yB4-L2LA5So": {"duration": 156.294, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yB4-L2LA5So", "annotations": []}, "0x4TP4MPelY": {"duration": 147.8, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=0x4TP4MPelY", "annotations": [{"segment": [2.9974027260410145, 139.95564909516378], "label": "Doing nails"}]}, "aMQVH3dF0QM": {"duration": 147.17000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aMQVH3dF0QM", "annotations": [{"segment": [37.186960998439936, 111.56088299531982], "label": "Horseback riding"}]}, "OkvWTZ4XLyM": {"duration": 12.59, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OkvWTZ4XLyM", "annotations": []}, "yauw3CrvbtY": {"duration": 186.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yauw3CrvbtY", "annotations": [{"segment": [90.76328237129486, 108.21775975039002], "label": "Plataform diving"}]}, "arTumJHoLB0": {"duration": 12.214, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=arTumJHoLB0", "annotations": [{"segment": [0.01, 12.213696145124716], "label": "Applying sunscreen"}]}, "do_AQlxV9SI": {"duration": 73.7, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=do_AQlxV9SI", "annotations": [{"segment": [42.99016224648986, 57.24358502340093], "label": "Shot put"}]}, "Fsn1rBifmjc": {"duration": 121.255, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fsn1rBifmjc", "annotations": []}, "NkJA_0tErGY": {"duration": 194.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NkJA_0tErGY", "annotations": [{"segment": [52.3588143525741, 145.27301092043683], "label": "Making a sandwich"}]}, "F1RpG1Vwi28": {"duration": 130.33, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F1RpG1Vwi28", "annotations": [{"segment": [57.135314754086764, 63.4384989440394], "label": "Shaving"}]}, "gA0m7YUH408": {"duration": 11.053, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gA0m7YUH408", "annotations": [{"segment": [0.2, 10.852697000000001], "label": "Croquet"}]}, "Jz2xJx1ICCM": {"duration": 29.188, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Jz2xJx1ICCM", "annotations": [{"segment": [2.178847182438294, 25.73763983169084], "label": "Arm wrestling"}]}, "TI3WqIauOYs": {"duration": 89.75, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TI3WqIauOYs", "annotations": [{"segment": [4.97234012, 76.19554212168487], "label": "Doing karate"}]}, "OrBcDTsll-A": {"duration": 81.78, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OrBcDTsll-A", "annotations": []}, "wEgt41AJaU4": {"duration": 28.61, "subset": "training", "resolution": "352x264", "url": "https://www.youtube.com/watch?v=wEgt41AJaU4", "annotations": [{"segment": [0, 28.606983], "label": "Paintball"}]}, "gKLbdLKEG6U": {"duration": 231.364, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gKLbdLKEG6U", "annotations": [{"segment": [14.786928237129485, 149.67256630265211], "label": "Rafting"}]}, "6PaTEwG8Ym4": {"duration": 20.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6PaTEwG8Ym4", "annotations": [{"segment": [1.653402496099844, 17.798391575663025], "label": "Getting a haircut"}]}, "BiRD6i3hVlw": {"duration": 175.358, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BiRD6i3hVlw", "annotations": []}, "b3CAq_EiavU": {"duration": 65.69, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b3CAq_EiavU", "annotations": []}, "BkDbDnvoL28": {"duration": 211.07, "subset": "testing", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=BkDbDnvoL28", "annotations": []}, "vLEz1mHahdM": {"duration": 49.32, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vLEz1mHahdM", "annotations": [{"segment": [8.641181990000002, 45.23615513416537], "label": "Playing bagpipes"}]}, "xD19KSWY72A": {"duration": 62.532, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xD19KSWY72A", "annotations": []}, "hiVs1hNyPpw": {"duration": 64.55, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hiVs1hNyPpw", "annotations": [{"segment": [0.01, 64.551473], "label": "Brushing hair"}]}, "vwZ-8NnLahA": {"duration": 20.875, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=vwZ-8NnLahA", "annotations": []}, "iH8eHkIHrg4": {"duration": 188.221, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iH8eHkIHrg4", "annotations": [{"segment": [10.277246613674071, 179.41136151326054], "label": "Surfing"}]}, "TfIGKODkpPY": {"duration": 122.88, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TfIGKODkpPY", "annotations": [{"segment": [77.44698844929798, 122.879999], "label": "Kite flying"}]}, "3lsq0a5FQfc": {"duration": 193.167, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3lsq0a5FQfc", "annotations": []}, "kpAwQpA1nPs": {"duration": 181.05, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kpAwQpA1nPs", "annotations": [{"segment": [0.01, 123.28640014352574], "label": "Pole vault"}]}, "5LGh56euaZs": {"duration": 122.904, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5LGh56euaZs", "annotations": []}, "lARaqx1e7wM": {"duration": 98.825, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lARaqx1e7wM", "annotations": [{"segment": [4.617949859071355, 91.89720219551997], "label": "Decorating the Christmas tree"}]}, "9WXl-2sINno": {"duration": 119.35, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9WXl-2sINno", "annotations": [{"segment": [21.57848, 77.37460499999999], "label": "Putting on makeup"}]}, "L9MTwigRhmk": {"duration": 174.568, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L9MTwigRhmk", "annotations": [{"segment": [27.778310675284153, 166.94219942901717], "label": "BMX"}]}, "_1vYKA7mNLI": {"duration": 61.858, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_1vYKA7mNLI", "annotations": [{"segment": [11.4975506142233, 52.7115506142233], "label": "Kneeling"}]}, "gUQFX_IydG8": {"duration": 113.8, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gUQFX_IydG8", "annotations": [{"segment": [17.22102150480577, 111.49279902080437], "label": "Doing karate"}]}, "QEfXMtnI6rw": {"duration": 78.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QEfXMtnI6rw", "annotations": [{"segment": [49.68447737909516, 70.43720748829953], "label": "Shaving"}]}, "Iy53PoamjTk": {"duration": 127.01, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Iy53PoamjTk", "annotations": []}, "FIaXCUPjFY0": {"duration": 29.03, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=FIaXCUPjFY0", "annotations": [{"segment": [0.05754027321477396, 22.02969262835496], "label": "Bathing dog"}]}, "R37pbIySnjg": {"duration": 120.093, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=R37pbIySnjg", "annotations": [{"segment": [0.01, 119.23309768325275], "label": "Painting fence"}]}, "sAJ-hixMlO8": {"duration": 106.464, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=sAJ-hixMlO8", "annotations": []}, "nqt4s3j9L6o": {"duration": 85.845, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nqt4s3j9L6o", "annotations": []}, "eUCVKv4R-7A": {"duration": 140.88, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eUCVKv4R-7A", "annotations": [{"segment": [19.335425897035883, 140.18183775351014], "label": "Kayaking"}]}, "uWnlmM2uOs4": {"duration": 30.395, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uWnlmM2uOs4", "annotations": [{"segment": [0.2, 30.194919000000002], "label": "Applying sunscreen"}]}, "uY0ngQlbIZc": {"duration": 59.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uY0ngQlbIZc", "annotations": [{"segment": [0.01, 45.017822916998306], "label": "Playing squash"}]}, "86hyAYM5d3E": {"duration": 168.902, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=86hyAYM5d3E", "annotations": [{"segment": [13.414191588785044, 141.24354672897195], "label": "Cutting the grass"}]}, "GZiG-aSzIJI": {"duration": 137.09, "subset": "training", "resolution": "938x720", "url": "https://www.youtube.com/watch?v=GZiG-aSzIJI", "annotations": [{"segment": [42.11067082683308, 119.91921996879876], "label": "Paintball"}]}, "qdMjXJTsX94": {"duration": 172.803, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qdMjXJTsX94", "annotations": [{"segment": [9.797539053701026, 160.7980659415515], "label": "Playing drums"}]}, "bqv3vCT0T5s": {"duration": 132.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bqv3vCT0T5s", "annotations": [{"segment": [8.505728329853236, 131.33293976910556], "label": "Belly dance"}]}, "gdYr4E3qobI": {"duration": 100.496, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gdYr4E3qobI", "annotations": [{"segment": [48.58488299531981, 67.3919344773791], "label": "Ice fishing"}]}, "vgC8jB2FhAg": {"duration": 183.09, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vgC8jB2FhAg", "annotations": [{"segment": [2.2850463775351013, 173.09226309828392], "label": "Ballet"}]}, "GYsCbanlHSc": {"duration": 146.333, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=GYsCbanlHSc", "annotations": []}, "dDYQNtOtb-Q": {"duration": 33.48, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dDYQNtOtb-Q", "annotations": [{"segment": [2.8729711622464897, 5.171348092043682], "label": "Drinking coffee"}]}, "ED7SKNfAKyI": {"duration": 13.77, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=ED7SKNfAKyI", "annotations": [{"segment": [0, 13.769433], "label": "Baton twirling"}]}, "iAIl5eawd6I": {"duration": 215.273, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=iAIl5eawd6I", "annotations": [{"segment": [5.363763239875389, 211.19817757009346], "label": "Skiing"}]}, "sx_npA4wRrw": {"duration": 203.66, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=sx_npA4wRrw", "annotations": [{"segment": [32.2462663, 196.34945397815912], "label": "Preparing pasta"}]}, "62s1ZSNLJ6g": {"duration": 135.49, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=62s1ZSNLJ6g", "annotations": [{"segment": [75.03866025999999, 115.13402939], "label": "Cleaning shoes"}]}, "hbOzx9P3l40": {"duration": 127.547, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=hbOzx9P3l40", "annotations": []}, "vygPNS-eAYM": {"duration": 49.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vygPNS-eAYM", "annotations": [{"segment": [0, 49.36562358276644], "label": "Hopscotch"}]}, "LOINRpWGrlw": {"duration": 146.472, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=LOINRpWGrlw", "annotations": [{"segment": [6.169599477856665, 143.95731950078002], "label": "Playing ten pins"}]}, "WrFNI5GQFPM": {"duration": 214.019, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WrFNI5GQFPM", "annotations": [{"segment": [47.07735320025046, 148.9113434726989], "label": "Braiding hair"}]}, "5FSdOfvJnek": {"duration": 96.316, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5FSdOfvJnek", "annotations": [{"segment": [38.76697253354134, 68.96914880967239], "label": "Rafting"}]}, "DHiFKO68gT8": {"duration": 181.302, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DHiFKO68gT8", "annotations": [{"segment": [15.264982839313573, 45.79494851794072], "label": "BMX"}, {"segment": [46.07763338533542, 78.58639313572542], "label": "BMX"}, {"segment": [101.76655226209047, 118.16227457098283], "label": "BMX"}, {"segment": [131.44846333853354, 137.95021528861153], "label": "BMX"}, {"segment": [151.80177379095164, 163.97531981279252], "label": "BMX"}]}, "Vh4TxFOCNM8": {"duration": 57.656, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Vh4TxFOCNM8", "annotations": [{"segment": [0.01, 57.621], "label": "Futsal"}]}, "ULZpTYSwlTs": {"duration": 27.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ULZpTYSwlTs", "annotations": [{"segment": [2.3653753400000004, 26.341591263650546], "label": "Snatch"}]}, "KXXnSU0PccI": {"duration": 235.613, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KXXnSU0PccI", "annotations": []}, "m1pNOYN-DoI": {"duration": 68.987, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m1pNOYN-DoI", "annotations": [{"segment": [3.3259408871441267, 8.368498907760356], "label": "Beer pong"}, {"segment": [16.415134047041573, 17.70259566932657], "label": "Beer pong"}, {"segment": [26.82211500365442, 28.109576602410254], "label": "Beer pong"}, {"segment": [34.97603846244136, 41.62792338934649], "label": "Beer pong"}]}, "IC-Rd7ouQmE": {"duration": 46.115, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IC-Rd7ouQmE", "annotations": [{"segment": [0.01, 46.081], "label": "BMX"}]}, "rVxxAI6wlXk": {"duration": 224.17000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rVxxAI6wlXk", "annotations": [{"segment": [103.84389547581904, 123.0742464898596], "label": "Shaving"}, {"segment": [127.9692449297972, 143.00388299531983], "label": "Shaving"}, {"segment": [164.33209048361937, 168.17816068642747], "label": "Shaving"}, {"segment": [173.7724446177847, 189.8560109204368], "label": "Shaving"}]}, "aAz3YL2RMr4": {"duration": 207.66, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aAz3YL2RMr4", "annotations": []}, "WcQV3OzK_sc": {"duration": 218.5, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WcQV3OzK_sc", "annotations": [{"segment": [39.20042711388456, 105.67071656786271], "label": "Getting a haircut"}, {"segment": [106.07689452, 176.91323193135727], "label": "Getting a haircut"}]}, "bi_xkH87Rnw": {"duration": 87.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bi_xkH87Rnw", "annotations": [{"segment": [5.746417056682268, 82.22848996567863], "label": "Using the balance beam"}]}, "-fBxVUu0KcQ": {"duration": 145.172, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-fBxVUu0KcQ", "annotations": [{"segment": [8.815761682242991, 131.3322445482866], "label": "Beach soccer"}]}, "LoBjzA2z2Ls": {"duration": 229.53, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LoBjzA2z2Ls", "annotations": [{"segment": [23.633214046929226, 221.29282103588145], "label": "Preparing salad"}]}, "F03y7m3Nwuw": {"duration": 49.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F03y7m3Nwuw", "annotations": [{"segment": [9.507435257410297, 23.612728549141966], "label": "Discus throw"}, {"segment": [39.43247737909517, 45.90065054602184], "label": "Discus throw"}]}, "g0XO-rWHmzs": {"duration": 73.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=g0XO-rWHmzs", "annotations": [{"segment": [0, 73.978774], "label": "Walking the dog"}]}, "ouKsBHHEnYc": {"duration": 102.22, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ouKsBHHEnYc", "annotations": []}, "jDlF26-Uu2I": {"duration": 75.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jDlF26-Uu2I", "annotations": [{"segment": [19.48544773790952, 68.9582792152886], "label": "Cheerleading"}]}, "3JcvtncHhLw": {"duration": 104.746, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3JcvtncHhLw", "annotations": [{"segment": [18.62858171606864, 84.48225216848672], "label": "Running a marathon"}]}, "QRn9v8F8Nn0": {"duration": 375.537, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QRn9v8F8Nn0", "annotations": [{"segment": [100.75845865834631, 330.4116131045242], "label": "Removing ice from car"}]}, "2rHsoF35eQw": {"duration": 136.534, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2rHsoF35eQw", "annotations": [{"segment": [24.49505956318253, 127.80031076443058], "label": "Rafting"}]}, "dth916VxJUw": {"duration": 141.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dth916VxJUw", "annotations": [{"segment": [23.55850198633796, 127.70029023400937], "label": "Using parallel bars"}]}, "Te9e32TDiZ8": {"duration": 147.63, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Te9e32TDiZ8", "annotations": [{"segment": [1.8425269266770672, 123.21898822152887], "label": "Getting a piercing"}]}, "xIG7FQWBWZU": {"duration": 33.53, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xIG7FQWBWZU", "annotations": [{"segment": [21.394090042121686, 25.787986285491417], "label": "Curling"}]}, "aXm3sPITCKM": {"duration": 180.84, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aXm3sPITCKM", "annotations": [{"segment": [31.091305409999997, 131.63848578], "label": "Getting a haircut"}]}, "wCexKIuAj3A": {"duration": 133.63, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wCexKIuAj3A", "annotations": []}, "qwbxSLVwkfA": {"duration": 129.22, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qwbxSLVwkfA", "annotations": []}, "doZb3RlLSts": {"duration": 223.655, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=doZb3RlLSts", "annotations": [{"segment": [35.758428237129486, 188.56029719188768], "label": "Installing carpet"}]}, "YgrWck47C-g": {"duration": 86.31, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=YgrWck47C-g", "annotations": [{"segment": [28.865810452418096, 49.32083697347893], "label": "Washing hands"}]}, "EWlt9TTOw30": {"duration": 82.501, "subset": "validation", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=EWlt9TTOw30", "annotations": [{"segment": [12.475471138845554, 74.099120124805], "label": "Decorating the Christmas tree"}]}, "J_dUIu1pXYU": {"duration": 93.832, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J_dUIu1pXYU", "annotations": []}, "6mhc4G2odkc": {"duration": 38.64, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6mhc4G2odkc", "annotations": []}, "C8hdbmjUMRc": {"duration": 154.738, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=C8hdbmjUMRc", "annotations": [{"segment": [0.2, 154.7374829931973], "label": "Laying tile"}]}, "iFwmJi-I5sg": {"duration": 78.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iFwmJi-I5sg", "annotations": [{"segment": [3.306809194816772, 78.26114917628705], "label": "Discus throw"}]}, "ivBgqENFmt0": {"duration": 166.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ivBgqENFmt0", "annotations": [{"segment": [16.120096723868954, 42.125307332293296], "label": "Fixing the roof"}, {"segment": [108.43366146645866, 119.09500468018722], "label": "Fixing the roof"}, {"segment": [123.77559438377537, 129.4963151326053], "label": "Fixing the roof"}]}, "7kO_qcJEiu0": {"duration": 186.573, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7kO_qcJEiu0", "annotations": [{"segment": [45.04472824436109, 178.14463370872275], "label": "Braiding hair"}]}, "gYBIX_tGaXo": {"duration": 33.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gYBIX_tGaXo", "annotations": [{"segment": [1.24297701, 20.95013943], "label": "Hammer throw"}]}, "ygHElEUxVwA": {"duration": 222.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ygHElEUxVwA", "annotations": [{"segment": [16.674885966867247, 183.0763521778966], "label": "Zumba"}]}, "VCg6_fuipp8": {"duration": 98.59, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VCg6_fuipp8", "annotations": [{"segment": [11.997145441497661, 20.918099744149767], "label": "High jump"}, {"segment": [42.605247273010924, 67.36858594071764], "label": "High jump"}]}, "3jZq0UaDIks": {"duration": 130.636, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3jZq0UaDIks", "annotations": [{"segment": [45.75289467143411, 114.4228707305321], "label": "River tubing"}]}, "gY0qgtM0Gt8": {"duration": 101.1, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=gY0qgtM0Gt8", "annotations": [{"segment": [20.018889942002815, 32.60219212268817], "label": "Blowing leaves"}, {"segment": [52.106310502750475, 66.89169056505577], "label": "Blowing leaves"}, {"segment": [75.54271081427694, 99.45098495757914], "label": "Blowing leaves"}]}, "UPZm0tx77Vw": {"duration": 120.21, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UPZm0tx77Vw", "annotations": [{"segment": [27.004988353198126, 112.33325027150747], "label": "Ping-pong"}]}, "P9qhbSYblG4": {"duration": 115.706, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P9qhbSYblG4", "annotations": [{"segment": [16.239453978159126, 105.93564274570983], "label": "Playing ten pins"}]}, "DcoHV4WiHQk": {"duration": 17.137, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DcoHV4WiHQk", "annotations": [{"segment": [0.010000000000000002, 17.136325], "label": "Building sandcastles"}]}, "-zlGTPxjils": {"duration": 101.03, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-zlGTPxjils", "annotations": []}, "KxXlMXEkTv8": {"duration": 147.19, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KxXlMXEkTv8", "annotations": []}, "ksk7okDk_wY": {"duration": 50.875, "subset": "training", "resolution": "636x360", "url": "https://www.youtube.com/watch?v=ksk7okDk_wY", "annotations": [{"segment": [0.19999999999999998, 50.620999999999995], "label": "Beach soccer"}]}, "vI-O6QWIIGU": {"duration": 85.68, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vI-O6QWIIGU", "annotations": []}, "s7mP374Twwo": {"duration": 37.83, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s7mP374Twwo", "annotations": []}, "HoViId2LHl8": {"duration": 135.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HoViId2LHl8", "annotations": [{"segment": [0.2, 126.53922964118566], "label": "Cumbia"}]}, "-lV2zo5XsCg": {"duration": 30.67, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-lV2zo5XsCg", "annotations": [{"segment": [10.240470889235569, 17.226960374414976], "label": "Grooming horse"}]}, "uPyciwjOz3E": {"duration": 183.58, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uPyciwjOz3E", "annotations": [{"segment": [5.726708268330733, 158.1748127159879], "label": "Walking the dog"}]}, "LQ7X62seYYI": {"duration": 120.628, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LQ7X62seYYI", "annotations": [{"segment": [13.173062922516902, 104.06719704680188], "label": "Snowboarding"}]}, "KtUshVJ28us": {"duration": 104.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KtUshVJ28us", "annotations": [{"segment": [17.87533757132598, 71.1763433822153], "label": "Tumbling"}]}, "gDdkyHysoJs": {"duration": 9.71, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=gDdkyHysoJs", "annotations": []}, "Cu8kOOnAjaw": {"duration": 121.63, "subset": "training", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=Cu8kOOnAjaw", "annotations": [{"segment": [0.01, 120.86714508580343], "label": "Washing face"}]}, "vALXRFRlVRk": {"duration": 40.47, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=vALXRFRlVRk", "annotations": []}, "po9uigsunAU": {"duration": 139.669, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=po9uigsunAU", "annotations": [{"segment": [57.50381279251171, 128.3128689547582], "label": "Running a marathon"}]}, "L1XpfS1RCzE": {"duration": 25.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L1XpfS1RCzE", "annotations": [{"segment": [8.21573434365946, 23.942997115132606], "label": "Kayaking"}]}, "nOm1JLcBD30": {"duration": 178.655, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nOm1JLcBD30", "annotations": [{"segment": [14.447935442256217, 169.20759703421285], "label": "Riding bumper cars"}]}, "0al9HOh_jjA": {"duration": 50.78, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=0al9HOh_jjA", "annotations": []}, "Marb8CMxPV0": {"duration": 175.938, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Marb8CMxPV0", "annotations": [{"segment": [0.01, 38.97516996190051], "label": "Disc dog"}, {"segment": [77.1269208400989, 175.73721088435374], "label": "Disc dog"}]}, "8CwhsT1vwKE": {"duration": 202.734, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=8CwhsT1vwKE", "annotations": []}, "s3heOnMEtHo": {"duration": 86.495, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s3heOnMEtHo", "annotations": []}, "W_5Mx-mfNmU": {"duration": 87.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W_5Mx-mfNmU", "annotations": [{"segment": [5.33408424, 72.89759954602184], "label": "Starting a campfire"}]}, "tjUuEqmLGeI": {"duration": 161.031, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tjUuEqmLGeI", "annotations": [{"segment": [14.384064868356582, 150.95749944779584], "label": "Baton twirling"}]}, "0Z_mhRoqG2E": {"duration": 108.35, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0Z_mhRoqG2E", "annotations": [{"segment": [9.296313512404442, 58.14421542303869], "label": "Rock climbing"}]}, "hThdSxhMhDo": {"duration": 42.26, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hThdSxhMhDo", "annotations": [{"segment": [2.24157687675507, 6.724730630265211], "label": "Washing face"}, {"segment": [9.361879897035882, 13.185746333853356], "label": "Washing face"}, {"segment": [15.229537015600624, 20.174191890795633], "label": "Washing face"}, {"segment": [25.44849042433697, 27.228566179407178], "label": "Washing face"}]}, "T3mpSZst-84": {"duration": 78.367, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=T3mpSZst-84", "annotations": []}, "eS1r2Qi0qUM": {"duration": 61.49, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eS1r2Qi0qUM", "annotations": [{"segment": [0.01, 60.23944502814126], "label": "Playing badminton"}]}, "5wchVLM0f5Y": {"duration": 40.914, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5wchVLM0f5Y", "annotations": [{"segment": [2.489280500780031, 35.61585947269891], "label": "Playing rubik cube"}]}, "kfO5PGlsMJw": {"duration": 194.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kfO5PGlsMJw", "annotations": []}, "7YOV0kZQIAo": {"duration": 84.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7YOV0kZQIAo", "annotations": [{"segment": [1.974477379095164, 82.40152262090484], "label": "Playing saxophone"}]}, "22n4w0uferc": {"duration": 189.475, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=22n4w0uferc", "annotations": [{"segment": [7.092954758190328, 177.61940873634947], "label": "Waxing skis"}]}, "Xk3KngL2ZH0": {"duration": 62.601, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Xk3KngL2ZH0", "annotations": [{"segment": [12.403005447737911, 40.52950599063962], "label": "Playing congas"}]}, "a_EqAwUOxpM": {"duration": 54.29, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=a_EqAwUOxpM", "annotations": []}, "rWdXyKZnL2U": {"duration": 151.49, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rWdXyKZnL2U", "annotations": [{"segment": [24.57823143401927, 31.90443503454424], "label": "Playing violin"}, {"segment": [54.35570413292723, 58.13697050739173], "label": "Playing violin"}, {"segment": [72.78937770844168, 85.55115172225938], "label": "Playing violin"}]}, "4z0meF-_b1I": {"duration": 105.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4z0meF-_b1I", "annotations": []}, "UnZtNrtvSPA": {"duration": 93.321, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UnZtNrtvSPA", "annotations": [{"segment": [9.608714096723869, 82.25641613104523], "label": "Braiding hair"}]}, "k25ECDpOD0E": {"duration": 183.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k25ECDpOD0E", "annotations": [{"segment": [12.277539781591262, 54.820642745709826], "label": "Brushing teeth"}]}, "cfwJd80WCZU": {"duration": 17.88, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cfwJd80WCZU", "annotations": [{"segment": [0.01, 17.861], "label": "Powerbocking"}]}, "eepvgF5Fi_Q": {"duration": 139.065, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eepvgF5Fi_Q", "annotations": [{"segment": [0.01, 137.8083560936592], "label": "Playing blackjack"}]}, "5HCYb6qfkdk": {"duration": 30.7, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=5HCYb6qfkdk", "annotations": [{"segment": [5.156996919999999, 25.868408736349455], "label": "Shaving legs"}]}, "STaS53CNlZI": {"duration": 184.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=STaS53CNlZI", "annotations": [{"segment": [39.73199167966718, 71.11450684694054], "label": "Bungee jumping"}, {"segment": [76.29694054428843, 178.21813659213035], "label": "Bungee jumping"}]}, "lKSlIMfWZXI": {"duration": 187.71, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=lKSlIMfWZXI", "annotations": [{"segment": [10.09988845553822, 95.5830023400936], "label": "Washing dishes"}]}, "KWpATsiXF_M": {"duration": 170.76, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KWpATsiXF_M", "annotations": []}, "TGVWaA_xTGE": {"duration": 122.973, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=TGVWaA_xTGE", "annotations": [{"segment": [4.407711388455538, 17.63084555382215], "label": "Playing congas"}, {"segment": [21.08035881435257, 29.517307332293292], "label": "Playing congas"}, {"segment": [37.56748205928237, 119.02758346333854], "label": "Playing congas"}]}, "qcsGJTJstZ4": {"duration": 30.93, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qcsGJTJstZ4", "annotations": [{"segment": [0.5790136287051483, 27.88915644929797], "label": "Dodgeball"}]}, "ss6XN-JP_x8": {"duration": 214.367, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ss6XN-JP_x8", "annotations": [{"segment": [16.721265210608426, 168.21592801872075], "label": "Making a cake"}]}, "rLCRgksVNE8": {"duration": 113.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rLCRgksVNE8", "annotations": [{"segment": [0, 113.54523809523809], "label": "Getting a piercing"}]}, "UKnr1rv3jJ8": {"duration": 142.41, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UKnr1rv3jJ8", "annotations": []}, "NjD6iWI3NvM": {"duration": 51.711, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NjD6iWI3NvM", "annotations": []}, "3gc3nddr8TE": {"duration": 58.14, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=3gc3nddr8TE", "annotations": [{"segment": [3.918996967373845, 56.53729364182867], "label": "Playing field hockey"}]}, "n1NqFiDdlEU": {"duration": 65.6, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n1NqFiDdlEU", "annotations": [{"segment": [9.721720773592848, 15.657087140628482], "label": "Doing motocross"}, {"segment": [23.843799371022456, 27.52781987469975], "label": "Doing motocross"}, {"segment": [32.43984634945398, 45.33391776911076], "label": "Doing motocross"}, {"segment": [57.61398578783152, 63.65168589703588], "label": "Doing motocross"}]}, "--1DO2V4K74": {"duration": 211.53, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=--1DO2V4K74", "annotations": [{"segment": [30.025882995319815, 205.2318595943838], "label": "Rock climbing"}]}, "C7yd6yEkxXE": {"duration": 8.52, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=C7yd6yEkxXE", "annotations": []}, "m978SIFnHS8": {"duration": 217.2, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=m978SIFnHS8", "annotations": [{"segment": [10.843030553875217, 214.48869814384412], "label": "Sailing"}]}, "8ZqRn0ovdAI": {"duration": 157.571, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8ZqRn0ovdAI", "annotations": [{"segment": [2.210560062402496, 142.96770982839317], "label": "Playing congas"}]}, "vt46wY2Q9JQ": {"duration": 54.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vt46wY2Q9JQ", "annotations": [{"segment": [0.2, 53.46781557067271], "label": "Decorating the Christmas tree"}]}, "TUfYisuVrs0": {"duration": 104.56, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=TUfYisuVrs0", "annotations": [{"segment": [7.5000571544461785, 100.92468214352574], "label": "Plataform diving"}]}, "HKgnushHynQ": {"duration": 234.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HKgnushHynQ", "annotations": [{"segment": [133.3432506021841, 191.5893408377535], "label": "Playing water polo"}]}, "yUYTlwiP16E": {"duration": 121.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yUYTlwiP16E", "annotations": [{"segment": [17.47085928627145, 42.402247673166926], "label": "Shoveling snow"}]}, "IxaoK4TbALQ": {"duration": 102.28, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IxaoK4TbALQ", "annotations": [{"segment": [59.67890577223089, 88.08223525741029], "label": "Polishing forniture"}]}, "f-4PjtdFbP4": {"duration": 12.702, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=f-4PjtdFbP4", "annotations": [{"segment": [0, 12.661], "label": "Blowing leaves"}]}, "lbbe4PnEGRY": {"duration": 159.289, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lbbe4PnEGRY", "annotations": []}, "7_5VT6QCqNI": {"duration": 184.088, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7_5VT6QCqNI", "annotations": [{"segment": [61.458329151326055, 159.10240350390015], "label": "Blow-drying hair"}]}, "3AgR6BJTQiI": {"duration": 7.08, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=3AgR6BJTQiI", "annotations": []}, "NGvote9Y6gI": {"duration": 79.761, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=NGvote9Y6gI", "annotations": [{"segment": [0.01, 79.721], "label": "Playing blackjack"}]}, "-wWtZjqIH9o": {"duration": 19.32, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-wWtZjqIH9o", "annotations": [{"segment": [0.2, 12.987631825273011], "label": "Fun sliding down"}]}, "IsHM24qWmpI": {"duration": 208.028, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=IsHM24qWmpI", "annotations": [{"segment": [0.01, 0.02], "label": "Assembling bicycle"}]}, "ccKJg_f1UDo": {"duration": 33.02, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ccKJg_f1UDo", "annotations": [{"segment": [0.01, 33.01877551020408], "label": "Using the balance beam"}]}, "XXIDYn4H_1A": {"duration": 78.554, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XXIDYn4H_1A", "annotations": [{"segment": [34.25993176086634, 76.22834695327103], "label": "BMX"}]}, "ypf6WHYpeRU": {"duration": 156.712, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ypf6WHYpeRU", "annotations": [{"segment": [0.2, 106.59284604056161], "label": "Kite flying"}]}, "fc3yCFrmkCU": {"duration": 202.827, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fc3yCFrmkCU", "annotations": [{"segment": [12.656856287051482, 198.07980089235568], "label": "Snowboarding"}]}, "pzZ4TGcMK1k": {"duration": 184.37, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pzZ4TGcMK1k", "annotations": [{"segment": [0, 184.36643990929704], "label": "Plataform diving"}]}, "xe_TqEhok74": {"duration": 23.66, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xe_TqEhok74", "annotations": []}, "wfc_R5VnoHU": {"duration": 38.174, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wfc_R5VnoHU", "annotations": [{"segment": [0.01, 30.252689683740478], "label": "Scuba diving"}]}, "TtQPtLPTqBA": {"duration": 169.07, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=TtQPtLPTqBA", "annotations": [{"segment": [10.151807332293291, 29.928055382215287], "label": "Washing hands"}]}, "Scv939uhNCc": {"duration": 43.584, "subset": "training", "resolution": "423x240", "url": "https://www.youtube.com/watch?v=Scv939uhNCc", "annotations": [{"segment": [0.01, 40.92582293291731], "label": "Playing rubik cube"}]}, "O5dU3oaxWz8": {"duration": 35.39, "subset": "testing", "resolution": "160x112", "url": "https://www.youtube.com/watch?v=O5dU3oaxWz8", "annotations": []}, "fhOmyyI0eQE": {"duration": 55.752, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fhOmyyI0eQE", "annotations": []}, "ghHZ9M-jx0w": {"duration": 26.03, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ghHZ9M-jx0w", "annotations": []}, "HgKZ4KAuhdI": {"duration": 55.29, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HgKZ4KAuhdI", "annotations": [{"segment": [0.2, 55.286371], "label": "Grooming horse"}]}, "HKkzII7ap7E": {"duration": 209.3, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HKkzII7ap7E", "annotations": [{"segment": [40.16286934742696, 86.52975916315563], "label": "Playing flauta"}, {"segment": [124.08040936603452, 160.65147738970785], "label": "Playing flauta"}, {"segment": [177.30437443620193, 209.1040589569161], "label": "Playing flauta"}]}, "vpu4Bwq-xHI": {"duration": 132.33, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vpu4Bwq-xHI", "annotations": [{"segment": [21.883050364191437, 126.75653701522211], "label": "Playing flauta"}]}, "qMcb8794F6U": {"duration": 114.428, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qMcb8794F6U", "annotations": []}, "4mSPGxeKK2k": {"duration": 60.442, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4mSPGxeKK2k", "annotations": [{"segment": [0.01, 60.441541950113375], "label": "Tug of war"}]}, "ktfxYSZjJyM": {"duration": 18.74, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ktfxYSZjJyM", "annotations": [{"segment": [1.2277958546913303, 18.738502], "label": "Windsurfing"}]}, "NSavdMErlYA": {"duration": 104.50999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NSavdMErlYA", "annotations": [{"segment": [43.04436223163212, 83.96911571700964], "label": "Preparing pasta"}]}, "C03QJbrKzaw": {"duration": 126.015, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C03QJbrKzaw", "annotations": [{"segment": [72.12952730109205, 95.51757566302652], "label": "Trimming branches or hedges"}]}, "HsklqPvsMEQ": {"duration": 54.8, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HsklqPvsMEQ", "annotations": [{"segment": [10.58223489, 47.61793173790952], "label": "Paintball"}]}, "Pt5jMqQXTZ8": {"duration": 10.26, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Pt5jMqQXTZ8", "annotations": [{"segment": [0.7845519154099497, 9.975017210212219], "label": "Chopping wood"}]}, "pZN1qyF9nXE": {"duration": 31.719, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pZN1qyF9nXE", "annotations": []}, "Jxhz1ln7UXk": {"duration": 204.89, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Jxhz1ln7UXk", "annotations": [{"segment": [7.032204923571093, 145.11913796823984], "label": "Cricket"}]}, "r4bI22hGTg4": {"duration": 23.48, "subset": "training", "resolution": "224x128", "url": "https://www.youtube.com/watch?v=r4bI22hGTg4", "annotations": [{"segment": [6.73916746723869, 13.643600621684868], "label": "Dodgeball"}]}, "Ji3qvOdmOZA": {"duration": 173.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ji3qvOdmOZA", "annotations": [{"segment": [11.609216608120107, 166.57875923744436], "label": "Getting a tattoo"}]}, "Yd98o-oEGvI": {"duration": 164.305, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Yd98o-oEGvI", "annotations": [{"segment": [0.19999999999999998, 103.29883625004864], "label": "Disc dog"}, {"segment": [113.03917226677068, 157.38333735101403], "label": "Disc dog"}]}, "Bg526A61c1w": {"duration": 238.93, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Bg526A61c1w", "annotations": [{"segment": [70.82690919000001, 193.63369253], "label": "Snatch"}]}, "G4kjV4ulJzo": {"duration": 54.1, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=G4kjV4ulJzo", "annotations": [{"segment": [0.01, 0.02], "label": "Walking the dog"}]}, "R6MnhM2omiE": {"duration": 51.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=R6MnhM2omiE", "annotations": [{"segment": [9.3089399, 36.02327846], "label": "Starting a campfire"}]}, "DgcoDX3HbKY": {"duration": 58.84, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=DgcoDX3HbKY", "annotations": [{"segment": [0.2, 49.35247737909517], "label": "Making an omelette"}]}, "AMMECm7Huhk": {"duration": 166.789, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AMMECm7Huhk", "annotations": [{"segment": [112.83514220000193, 119.8387019891282], "label": "Having an ice cream"}, {"segment": [156.4128525054579, 162.37884868897268], "label": "Having an ice cream"}]}, "I4B52tt3mNM": {"duration": 62.276, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I4B52tt3mNM", "annotations": [{"segment": [9.423948290171607, 23.70560188455538], "label": "Gargling mouthwash"}, {"segment": [30.79785162870515, 53.240450134165364], "label": "Gargling mouthwash"}]}, "XZ1IuZjsk8A": {"duration": 62.72, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XZ1IuZjsk8A", "annotations": [{"segment": [9.882101167039878, 55.18321839812368], "label": "Mixing drinks"}]}, "ndGz8-hHSt4": {"duration": 37.431, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ndGz8-hHSt4", "annotations": [{"segment": [13.934432223564398, 33.057835443925235], "label": "Welding"}]}, "jqYzz6YoMEY": {"duration": 54.586, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jqYzz6YoMEY", "annotations": [{"segment": [30.143017160686426, 52.38629485179407], "label": "Fun sliding down"}]}, "XSu5dteR2Ko": {"duration": 98.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XSu5dteR2Ko", "annotations": [{"segment": [19.733849851953263, 32.88974975325544], "label": "Playing harmonica"}, {"segment": [59.96642627145086, 78.47647111544461], "label": "Playing harmonica"}]}, "_EZnGbfMqsc": {"duration": 94.319, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_EZnGbfMqsc", "annotations": [{"segment": [0.2, 94.11918367346938], "label": "Rope skipping"}, {"segment": [68.35573808994826, 69.08916753032659], "label": "Rope skipping"}]}, "iRwRwpVLE_Y": {"duration": 196.65, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iRwRwpVLE_Y", "annotations": [{"segment": [27.910594383775354, 174.21118252730108], "label": "Baking cookies"}]}, "8EDq3cjHhz0": {"duration": 190.194, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8EDq3cjHhz0", "annotations": [{"segment": [64.3758767550702, 90.77888611544462], "label": "Playing ten pins"}]}, "YPXeQVi39nU": {"duration": 137.741, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YPXeQVi39nU", "annotations": [{"segment": [24.066943001560062, 33.09204662714509], "label": "Surfing"}, {"segment": [42.3320336723869, 74.13477978159128], "label": "Surfing"}, {"segment": [89.1766191575663, 92.61475387207489], "label": "Surfing"}]}, "XRiqpcV0Qmc": {"duration": 18.83, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=XRiqpcV0Qmc", "annotations": []}, "2Of52x_q5gI": {"duration": 178.492, "subset": "testing", "resolution": "1916x1080", "url": "https://www.youtube.com/watch?v=2Of52x_q5gI", "annotations": []}, "Lg-mJS-BZZo": {"duration": 15.047, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Lg-mJS-BZZo", "annotations": [{"segment": [0.5249879634878212, 13.837181845107757], "label": "Cutting the grass"}]}, "o1bO6f-Uo8A": {"duration": 141.97, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o1bO6f-Uo8A", "annotations": [{"segment": [16.826708268330734, 115.96634676], "label": "Mowing the lawn"}]}, "gmBGjj6NoUg": {"duration": 171.433, "subset": "testing", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=gmBGjj6NoUg", "annotations": []}, "idTzZaMtGy8": {"duration": 52.11, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=idTzZaMtGy8", "annotations": [{"segment": [0.8941674680187207, 52.105577], "label": "Playing lacrosse"}]}, "Q-HdDcyC9d8": {"duration": 65.67, "subset": "training", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=Q-HdDcyC9d8", "annotations": [{"segment": [3.2269578783151327, 61.41464279523562], "label": "Cleaning shoes"}]}, "MpeN-IVIBc4": {"duration": 113.24000000000001, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MpeN-IVIBc4", "annotations": []}, "Z5lRHCRBA_E": {"duration": 55.84, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Z5lRHCRBA_E", "annotations": [{"segment": [4.6569760234009365, 27.00653813], "label": "Using uneven bars"}, {"segment": [31.037306299999997, 49.57285692199688], "label": "Using uneven bars"}]}, "JguB9cLEXGY": {"duration": 83.17, "subset": "training", "resolution": "426x234", "url": "https://www.youtube.com/watch?v=JguB9cLEXGY", "annotations": [{"segment": [3.89269317711484, 81.3572858845554], "label": "Javelin throw"}]}, "yFJVEplkVHA": {"duration": 102.7, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yFJVEplkVHA", "annotations": [{"segment": [4.3259677162596715, 4.4861887427878075], "label": "Playing guitarra"}, {"segment": [14.2596713610041, 23.232048846579715], "label": "Playing guitarra"}, {"segment": [35.08840480966178, 40.05525663203399], "label": "Playing guitarra"}]}, "W4XaqnwD6gU": {"duration": 186.97, "subset": "training", "resolution": "450x360", "url": "https://www.youtube.com/watch?v=W4XaqnwD6gU", "annotations": [{"segment": [31.273970090000002, 155.58084971000002], "label": "Playing violin"}]}, "10uSu442cOE": {"duration": 141.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=10uSu442cOE", "annotations": [{"segment": [7.269294788118056, 126.88223630169698], "label": "Playing flauta"}]}, "2oNsMva04MM": {"duration": 199.82999999999998, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2oNsMva04MM", "annotations": [{"segment": [69.34675897035882, 76.82685881435258], "label": "Painting"}, {"segment": [78.69688377535103, 83.99528783151327], "label": "Painting"}, {"segment": [111.11064976599064, 117.96740795631825], "label": "Painting"}]}, "AyWXuUCDSjw": {"duration": 74.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AyWXuUCDSjw", "annotations": [{"segment": [51.57111904122016, 63.24978092768983], "label": "Drinking beer"}]}, "X2LI_I_qw-A": {"duration": 32.648, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X2LI_I_qw-A", "annotations": [{"segment": [0.2, 31.17914953271028], "label": "Peeling potatoes"}]}, "No7CCXJOmT0": {"duration": 200.04, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=No7CCXJOmT0", "annotations": []}, "WJwLjPDvLrY": {"duration": 177.006, "subset": "training", "resolution": "488x360", "url": "https://www.youtube.com/watch?v=WJwLjPDvLrY", "annotations": [{"segment": [0.01, 144.87133385335414], "label": "Playing beach volleyball"}]}, "n9oQEa-d5rU": {"duration": 219.66, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=n9oQEa-d5rU", "annotations": []}, "9bnt6lDcF6Y": {"duration": 83.616, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9bnt6lDcF6Y", "annotations": [{"segment": [48.18666190843724, 74.97468788667175], "label": "Doing crunches"}]}, "D4Y6DyRD0kY": {"duration": 61.12, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=D4Y6DyRD0kY", "annotations": [{"segment": [5.243869804992199, 59.01737107800312], "label": "Kayaking"}]}, "y4hcu0hOAEw": {"duration": 27.423, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y4hcu0hOAEw", "annotations": []}, "OMa1i3ITBbo": {"duration": 141.107, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OMa1i3ITBbo", "annotations": [{"segment": [44.70604417463095, 126.68910391917923], "label": "Rock-paper-scissors"}]}, "I8nK8c7k9ko": {"duration": 146.843, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I8nK8c7k9ko", "annotations": [{"segment": [0.01, 95.45935989946153], "label": "River tubing"}]}, "cjFHlsWZY4c": {"duration": 105.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cjFHlsWZY4c", "annotations": [{"segment": [3.75776316, 69.85346939625586], "label": "Doing nails"}]}, "ok6LTNirAPo": {"duration": 60.79, "subset": "training", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=ok6LTNirAPo", "annotations": [{"segment": [0.01, 60.07020963980986], "label": "Having an ice cream"}]}, "0fw8it7Gj7k": {"duration": 163.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0fw8it7Gj7k", "annotations": [{"segment": [3.42533711, 85.15912867394695], "label": "Polishing forniture"}]}, "NxvmOPe6Fx4": {"duration": 88.98, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=NxvmOPe6Fx4", "annotations": []}, "8OCTuqJVToU": {"duration": 119.723, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8OCTuqJVToU", "annotations": [{"segment": [0.01, 119.661], "label": "Swimming"}]}, "ksKlcjeIBi0": {"duration": 5.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ksKlcjeIBi0", "annotations": [{"segment": [0.17372212480499222, 4.903701795631825], "label": "Using the balance beam"}]}, "lZ6zN5Q447M": {"duration": 222.54, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lZ6zN5Q447M", "annotations": [{"segment": [91.00740218492095, 151.56366190498318], "label": "Mixing drinks"}]}, "YrXzg15ZECw": {"duration": 194.398, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YrXzg15ZECw", "annotations": []}, "DjT4-5H3xDQ": {"duration": 105.535, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DjT4-5H3xDQ", "annotations": [{"segment": [22.743518007759736, 94.22784043766629], "label": "Sharpening knives"}]}, "Gi1N3FtCZGo": {"duration": 14.86, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Gi1N3FtCZGo", "annotations": [{"segment": [3.8021314820592824, 14.86077], "label": "Clean and jerk"}]}, "3TxZTZEEg44": {"duration": 156.81, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3TxZTZEEg44", "annotations": [{"segment": [76.4219315522621, 114.08265942121686], "label": "Tennis serve with ball bouncing"}]}, "lUw8OA9R4mQ": {"duration": 219.22, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lUw8OA9R4mQ", "annotations": [{"segment": [30.77958228533223, 219.21902494331067], "label": "Zumba"}]}, "aFSaGCvYXXY": {"duration": 220.961, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aFSaGCvYXXY", "annotations": [{"segment": [0, 13.786021840873635], "label": "Waterskiing"}, {"segment": [149.9229875195008, 155.78204680187207], "label": "Waterskiing"}]}, "2gV2apTrRHs": {"duration": 70.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2gV2apTrRHs", "annotations": [{"segment": [4.265054602184088, 63.32944716000001], "label": "Breakdancing"}]}, "iHzyX1djHU4": {"duration": 177.238, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iHzyX1djHU4", "annotations": [{"segment": [47.76554290171607, 170.07846489859594], "label": "Wakeboarding"}]}, "LElk0AlBpbI": {"duration": 175.194, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LElk0AlBpbI", "annotations": [{"segment": [24.59358814352574, 122.14815444617784], "label": "Longboarding"}, {"segment": [131.4390655226209, 148.65457722308892], "label": "Longboarding"}, {"segment": [151.38719812792513, 156.30591575663027], "label": "Longboarding"}]}, "0EbXjHsMBmM": {"duration": 208.144, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0EbXjHsMBmM", "annotations": [{"segment": [0.01, 164.89077691107647], "label": "Waxing skis"}]}, "j8NwT9JBQJk": {"duration": 157.01, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j8NwT9JBQJk", "annotations": [{"segment": [1.7143010920436816, 155.2666989079563], "label": "Sailing"}]}, "wXsQbzBQ6ck": {"duration": 122.49, "subset": "testing", "resolution": "400x228", "url": "https://www.youtube.com/watch?v=wXsQbzBQ6ck", "annotations": []}, "-UfNSW7yeSo": {"duration": 153.484, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-UfNSW7yeSo", "annotations": [{"segment": [149.1730171856474, 153.483], "label": "Welding"}]}, "mDopaDI7U3c": {"duration": 207.7, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mDopaDI7U3c", "annotations": []}, "MzNI-qdQfQc": {"duration": 107.021, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MzNI-qdQfQc", "annotations": [{"segment": [0.01, 101.51114219032762], "label": "Playing rubik cube"}]}, "SjfrdNchlqw": {"duration": 121.464, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SjfrdNchlqw", "annotations": [{"segment": [0, 16.864678418429254], "label": "Playing blackjack"}, {"segment": [58.5526475426364, 69.35362136118098], "label": "Playing blackjack"}]}, "n9P4ltD0g2k": {"duration": 213.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n9P4ltD0g2k", "annotations": [{"segment": [43.75750944, 193.02116424336972], "label": "Mixing drinks"}]}, "WglqrQ9uR-A": {"duration": 212.51, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WglqrQ9uR-A", "annotations": [{"segment": [11.271929560175606, 62.65866726097616], "label": "Longboarding"}, {"segment": [128.9641352620091, 158.80159441029642], "label": "Longboarding"}]}, "SgkdciuEuKs": {"duration": 68.499, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=SgkdciuEuKs", "annotations": [{"segment": [30.915716510903426, 49.692875389408094], "label": "Cleaning sink"}]}, "2XOTxAZZhsQ": {"duration": 58.84, "subset": "validation", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=2XOTxAZZhsQ", "annotations": [{"segment": [37.03385499, 50.489745899999996], "label": "Washing dishes"}, {"segment": [11.51596908, 32.366745509999994], "label": "Washing dishes"}]}, "gtAl_FkXdR8": {"duration": 144.24, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gtAl_FkXdR8", "annotations": [{"segment": [10.576272759753927, 128.7154886177847], "label": "Paintball"}]}, "B5Zi054Fa5k": {"duration": 120.675, "subset": "training", "resolution": "1916x1080", "url": "https://www.youtube.com/watch?v=B5Zi054Fa5k", "annotations": [{"segment": [0, 103.16596615973482], "label": "Playing rubik cube"}]}, "iFA1XhZ6VM8": {"duration": 144.521, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iFA1XhZ6VM8", "annotations": [{"segment": [13.73174589046418, 98.59843652336448], "label": "Making a cake"}]}, "IPVMq_-SzfQ": {"duration": 63.693, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=IPVMq_-SzfQ", "annotations": []}, "3A26j50_awY": {"duration": 26.61, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=3A26j50_awY", "annotations": [{"segment": [12.824695007800312, 22.31372620904836], "label": "Long jump"}]}, "47SHPAe0s0k": {"duration": 170.04, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=47SHPAe0s0k", "annotations": [{"segment": [20.23526025398764, 163.78906704214964], "label": "Bullfighting"}]}, "LgUCZHYBBEc": {"duration": 65.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LgUCZHYBBEc", "annotations": [{"segment": [1.2188845741029641, 62.670981851794075], "label": "Playing polo"}]}, "qeyFjCAA_dg": {"duration": 144.661, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qeyFjCAA_dg", "annotations": [{"segment": [41.52173103876156, 52.78811090327227], "label": "Doing crunches"}, {"segment": [74.1942326458426, 127.82220080091356], "label": "Doing crunches"}]}, "wu0G4yQIwKo": {"duration": 143.035, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=wu0G4yQIwKo", "annotations": [{"segment": [3.7816358534076997, 17.351047271681594], "label": "Painting fence"}, {"segment": [23.35718019452414, 25.804123237163694], "label": "Painting fence"}, {"segment": [29.363313117366683, 32.922502997569666], "label": "Painting fence"}, {"segment": [74.07563598741673, 80.30421827777195], "label": "Painting fence"}, {"segment": [97.43281957624885, 106.33079330242612], "label": "Painting fence"}]}, "TpB_zMG3XBA": {"duration": 163.051, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TpB_zMG3XBA", "annotations": [{"segment": [10.143109301327563, 145.30009121469075], "label": "Waterskiing"}]}, "sV6JEbmqDUw": {"duration": 57.563, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sV6JEbmqDUw", "annotations": [{"segment": [0.01, 9.967880695787832], "label": "Elliptical trainer"}, {"segment": [25.683007918876754, 35.56108788767551], "label": "Elliptical trainer"}, {"segment": [37.89590678939158, 40.230725691107644], "label": "Elliptical trainer"}, {"segment": [44.09215695163807, 47.59438530421217], "label": "Elliptical trainer"}]}, "l8aaaDY4ehA": {"duration": 186.457, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=l8aaaDY4ehA", "annotations": [{"segment": [0.2, 23.852435784506206], "label": "Cutting the grass"}, {"segment": [29.088336322568548, 123.62542937091634], "label": "Cutting the grass"}, {"segment": [128.86132864586583, 134.3881124929797], "label": "Cutting the grass"}, {"segment": [139.9148963400936, 171.91206598127923], "label": "Cutting the grass"}]}, "Mno1JV_6y_M": {"duration": 183.35, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Mno1JV_6y_M", "annotations": [{"segment": [52.61108268330734, 88.3523073322933], "label": "Ironing clothes"}, {"segment": [120.38958034321371, 162.1398861154446], "label": "Ironing clothes"}]}, "rNXlLVuSlMA": {"duration": 15.42, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rNXlLVuSlMA", "annotations": []}, "ds3mAmUPxYA": {"duration": 141.43, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ds3mAmUPxYA", "annotations": []}, "F1xZKduLnWg": {"duration": 130.636, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F1xZKduLnWg", "annotations": [{"segment": [21.189553822152888, 23.634502340093604], "label": "Doing fencing"}, {"segment": [33.00680499219969, 42.786599063962555], "label": "Doing fencing"}, {"segment": [46.45402184087363, 53.992613104524175], "label": "Doing fencing"}, {"segment": [65.60611856474259, 68.2548127925117], "label": "Doing fencing"}, {"segment": [80.88704680187207, 92.5005522620905], "label": "Doing fencing"}, {"segment": [99.22416068642745, 103.9103120124805], "label": "Doing fencing"}, {"segment": [115.52381747269891, 117.76502028081123], "label": "Doing fencing"}]}, "Ggw7rtn3J-8": {"duration": 61.417, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ggw7rtn3J-8", "annotations": [{"segment": [22.116721952693855, 53.466267124033415], "label": "Welding"}]}, "I0w8zmcO_Cw": {"duration": 94.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I0w8zmcO_Cw", "annotations": [{"segment": [34.21300091107644, 58.692992942277684], "label": "Fixing bicycle"}]}, "qxlVkDpdyOA": {"duration": 118.747, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qxlVkDpdyOA", "annotations": [{"segment": [0.2, 118.50099999999999], "label": "Powerbocking"}]}, "YuppPIPojmw": {"duration": 114.382, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YuppPIPojmw", "annotations": []}, "ROvaMfVEMSw": {"duration": 14.072, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ROvaMfVEMSw", "annotations": [{"segment": [0.01, 14.041], "label": "Calf roping"}]}, "CJWSed5v4jE": {"duration": 77.02, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=CJWSed5v4jE", "annotations": [{"segment": [1.3523865051855641, 68.65542368275565], "label": "Mowing the lawn"}]}, "mi8tpPUeAWA": {"duration": 42.82, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mi8tpPUeAWA", "annotations": []}, "Jd5tpIdMGh8": {"duration": 21.34, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jd5tpIdMGh8", "annotations": [{"segment": [0.01, 20.62733944773791], "label": "Smoking a cigarette"}]}, "s7HB851cCrg": {"duration": 123.11, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s7HB851cCrg", "annotations": [{"segment": [32.30581992, 123.041666], "label": "Playing badminton"}, {"segment": [0.3839053541341654, 32.05609707020281], "label": "Playing badminton"}]}, "eRN5gqZFXHI": {"duration": 186.02, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=eRN5gqZFXHI", "annotations": [{"segment": [77.91738334023157, 132.4740614332056], "label": "Cleaning shoes"}]}, "6asjaqo0OHc": {"duration": 158.128, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6asjaqo0OHc", "annotations": [{"segment": [43.52806535387501, 151.24159106091366], "label": "Assembling bicycle"}]}, "JvYDila36v4": {"duration": 122.3, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JvYDila36v4", "annotations": []}, "qMJi2nXWOkg": {"duration": 5.25, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qMJi2nXWOkg", "annotations": [{"segment": [0.01, 0.02], "label": "Playing kickball"}]}, "GUpr-QSMZFs": {"duration": 43.7, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GUpr-QSMZFs", "annotations": []}, "EoYvQ_pn8KE": {"duration": 45.535, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=EoYvQ_pn8KE", "annotations": [{"segment": [0.01, 32.53467019968799], "label": "Curling"}]}, "7ghaFHKMUZ0": {"duration": 70.264, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7ghaFHKMUZ0", "annotations": [{"segment": [0.01, 70.221], "label": "Having an ice cream"}]}, "hxQhQsl-N1k": {"duration": 9.335, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hxQhQsl-N1k", "annotations": [{"segment": [0, 8.977645158346334], "label": "Kneeling"}]}, "HX6ajS7kbrc": {"duration": 148.515, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HX6ajS7kbrc", "annotations": []}, "SCGQIhvDcIo": {"duration": 213.833, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SCGQIhvDcIo", "annotations": [{"segment": [9.67274414976599, 196.4567691107644], "label": "Playing pool"}]}, "Jp8L9h4aaV4": {"duration": 183.83, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jp8L9h4aaV4", "annotations": [{"segment": [4.01402496099844, 175.75695007800311], "label": "Doing step aerobics"}]}, "5rtrGkZNfLo": {"duration": 70.4, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=5rtrGkZNfLo", "annotations": [{"segment": [4.279717628705148, 12.290471138845554], "label": "Smoking hookah"}]}, "RpVkFIpEhIE": {"duration": 14.88, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RpVkFIpEhIE", "annotations": [{"segment": [4.564563182527301, 14.181505460218409], "label": "Hammer throw"}]}, "vbHLA5l_BRc": {"duration": 154.691, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vbHLA5l_BRc", "annotations": [{"segment": [59.58865366614664, 131.49804212168488], "label": "Waterskiing"}]}, "wVHD_Y5J3qE": {"duration": 141.108, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=wVHD_Y5J3qE", "annotations": [{"segment": [54.79592667706708, 138.6599531981279], "label": "Blow-drying hair"}]}, "ssBiSN8XofA": {"duration": 176.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ssBiSN8XofA", "annotations": [{"segment": [40.11546015819033, 52.47981431653666], "label": "Layup drill in basketball"}, {"segment": [62.92082449469579, 120.62114390031202], "label": "Layup drill in basketball"}]}, "y1IjkACdnfs": {"duration": 28.84, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y1IjkACdnfs", "annotations": [{"segment": [0.1919345474405972, 26.290945048998232], "label": "Doing motocross"}]}, "B3fiiuk0EqE": {"duration": 93.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B3fiiuk0EqE", "annotations": [{"segment": [0.01, 93.501], "label": "Shoveling snow"}]}, "-9B2XtqICFw": {"duration": 184.3, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-9B2XtqICFw", "annotations": [{"segment": [13.513181976599062, 149.79505978315132], "label": "Playing badminton"}]}, "9TiMh2Qw0KM": {"duration": 71.61, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9TiMh2Qw0KM", "annotations": []}, "2DvMNOas7jY": {"duration": 170.179, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2DvMNOas7jY", "annotations": [{"segment": [0.19999999999999998, 155.86490134994807], "label": "Longboarding"}]}, "WYyqptHdgkk": {"duration": 139.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WYyqptHdgkk", "annotations": [{"segment": [0.2, 130.53472386895476], "label": "Slacklining"}]}, "uAKyRSEaY1s": {"duration": 41.82, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uAKyRSEaY1s", "annotations": [{"segment": [7.816661368597283, 30.550117216510902], "label": "Playing rubik cube"}]}, "FgO-dLvr3rs": {"duration": 60.256, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=FgO-dLvr3rs", "annotations": []}, "jdz4PzF1pO0": {"duration": 191.217, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jdz4PzF1pO0", "annotations": [{"segment": [10.885142745709828, 12.67448127925117], "label": "Rock-paper-scissors"}, {"segment": [19.23538923556942, 20.726504680187205], "label": "Rock-paper-scissors"}, {"segment": [79.47645319812793, 82.4586840873635], "label": "Rock-paper-scissors"}, {"segment": [89.91426131045242, 96.77339235569423], "label": "Rock-paper-scissors"}, {"segment": [103.33430031201247, 105.42186193447738], "label": "Rock-paper-scissors"}, {"segment": [107.21120046801872, 108.70231591263651], "label": "Rock-paper-scissors"}, {"segment": [112.28099297971917, 115.26322386895477], "label": "Rock-paper-scissors"}, {"segment": [116.15789313572544, 118.84190093603743], "label": "Rock-paper-scissors"}, {"segment": [127.50370904836194, 137.04784633385336], "label": "Rock-paper-scissors"}, {"segment": [140.32864352574103, 147.188492199688], "label": "Rock-paper-scissors"}, {"segment": [159.7151723868955, 162.99596957878316], "label": "Rock-paper-scissors"}]}, "CDecXoRSpIc": {"duration": 55.937, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CDecXoRSpIc", "annotations": []}, "Vl4gId1_zxo": {"duration": 223.446, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Vl4gId1_zxo", "annotations": [{"segment": [32.7673767051482, 177.78044808112324], "label": "Kite flying"}]}, "Ix8WFQ6-yx8": {"duration": 69.24, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ix8WFQ6-yx8", "annotations": [{"segment": [0.972195205928237, 67.08146920904835], "label": "Playing squash"}]}, "od1jHUzgrAU": {"duration": 169.37, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=od1jHUzgrAU", "annotations": [{"segment": [91.52699279953198, 135.90371658112326], "label": "Polishing shoes"}]}, "kyafh7Ownao": {"duration": 39.962, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kyafh7Ownao", "annotations": [{"segment": [21.68403744149766, 31.65370982839314], "label": "Hand car wash"}]}, "nreNuE_HuU4": {"duration": 238.887, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nreNuE_HuU4", "annotations": []}, "BsxCumewgFQ": {"duration": 88.352, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BsxCumewgFQ", "annotations": []}, "uJuGXnGqozs": {"duration": 53.82, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uJuGXnGqozs", "annotations": [{"segment": [4.7942269059772435, 37.18950260691182], "label": "Using uneven bars"}]}, "v0NEZPgjcXI": {"duration": 27.075, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v0NEZPgjcXI", "annotations": [{"segment": [0.01, 26.24745344254153], "label": "Doing a powerbomb"}]}, "U0d68z5HTwE": {"duration": 127.385, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U0d68z5HTwE", "annotations": [{"segment": [100.65191213422037, 109.97754627440727], "label": "Baking cookies"}]}, "pExl_cwmT8M": {"duration": 52.315, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pExl_cwmT8M", "annotations": [{"segment": [16.027873128617895, 42.38843721889062], "label": "Rollerblading"}]}, "nypZiLC1_Z8": {"duration": 153.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nypZiLC1_Z8", "annotations": [{"segment": [7.12317396, 138.71248509828393], "label": "Fixing bicycle"}]}, "3TVKXCyNObQ": {"duration": 216.248, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3TVKXCyNObQ", "annotations": [{"segment": [0.01, 216.201], "label": "Longboarding"}]}, "vHnZ3NrZER0": {"duration": 90.465, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vHnZ3NrZER0", "annotations": []}, "RoTbDaCV3Ek": {"duration": 237.43, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RoTbDaCV3Ek", "annotations": []}, "RatbnDSuY7w": {"duration": 52.29, "subset": "training", "resolution": "626x360", "url": "https://www.youtube.com/watch?v=RatbnDSuY7w", "annotations": [{"segment": [22.048914976599065, 36.39497425897036], "label": "High jump"}]}, "DHjoAOus3xM": {"duration": 65.225, "subset": "testing", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=DHjoAOus3xM", "annotations": []}, "SO5KnbKienU": {"duration": 202.27, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SO5KnbKienU", "annotations": [{"segment": [41.65290373247583, 192.80245591320252], "label": "Baton twirling"}]}, "oXh-7Uff1jQ": {"duration": 18.413, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=oXh-7Uff1jQ", "annotations": []}, "p1yiPvIkRec": {"duration": 6.758, "subset": "training", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=p1yiPvIkRec", "annotations": [{"segment": [0, 4.770756630265211], "label": "Playing rubik cube"}]}, "5tFfEA1JWjA": {"duration": 56.96, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=5tFfEA1JWjA", "annotations": []}, "gqJE_SZyoIc": {"duration": 105.907, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=gqJE_SZyoIc", "annotations": [{"segment": [0.01, 105.881], "label": "Elliptical trainer"}]}, "FmfAB-ppHBs": {"duration": 25.87, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FmfAB-ppHBs", "annotations": []}, "gVixuVE0-ek": {"duration": 135.68, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gVixuVE0-ek", "annotations": [{"segment": [9.099162246489858, 131.40883151326054], "label": "Doing kickboxing"}]}, "z6huZTFj0FU": {"duration": 157.827, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z6huZTFj0FU", "annotations": []}, "fVg3StD8LL0": {"duration": 60.256, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fVg3StD8LL0", "annotations": [{"segment": [21.620639048361937, 48.78744202652106], "label": "Laying tile"}]}, "h91i2x9Gouk": {"duration": 142.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h91i2x9Gouk", "annotations": [{"segment": [36.33098907956318, 45.63526677067083], "label": "Mooping floor"}, {"segment": [70.67815756630264, 79.54062246489859], "label": "Mooping floor"}, {"segment": [86.85215600624024, 90.17558034321372], "label": "Mooping floor"}]}, "HHDMFrN7vFs": {"duration": 4.44, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HHDMFrN7vFs", "annotations": [{"segment": [0.01, 4.267195202808113], "label": "Spinning"}]}, "nvsK9ggeics": {"duration": 73.259, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nvsK9ggeics", "annotations": [{"segment": [7.767594383775352, 73.201], "label": "Playing drums"}]}, "TwQQPCjT_A4": {"duration": 53.499, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=TwQQPCjT_A4", "annotations": [{"segment": [3.3642125881230722, 51.32922193391746], "label": "Beach soccer"}]}, "zX9DZ_x9rJ4": {"duration": 173.988, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zX9DZ_x9rJ4", "annotations": [{"segment": [9.771507529688943, 157.70127322776912], "label": "Making a cake"}]}, "06Eq9tgprBw": {"duration": 172.59, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=06Eq9tgprBw", "annotations": [{"segment": [0.01, 172.593672], "label": "Zumba"}]}, "QIC1jlFIjtg": {"duration": 140.39, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QIC1jlFIjtg", "annotations": []}, "isUCIXYjOXE": {"duration": 45.047, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=isUCIXYjOXE", "annotations": [{"segment": [17.4804992199688, 25.765003120124806], "label": "Playing pool"}]}, "IjBMVPd2Rcs": {"duration": 84.242, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IjBMVPd2Rcs", "annotations": [{"segment": [41.531432779047336, 42.97258592001428], "label": "Using the rowing machine"}, {"segment": [66.9481456431869, 73.62985900866126], "label": "Using the rowing machine"}]}, "50oy8wP2Wo8": {"duration": 95.9, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=50oy8wP2Wo8", "annotations": [{"segment": [9.26566302652106, 18.232433697347894], "label": "Getting a haircut"}, {"segment": [20.922464898595944, 34.07372854914197], "label": "Getting a haircut"}, {"segment": [82.7931825273011, 87.42601404056163], "label": "Getting a haircut"}]}, "39FIjDeot-s": {"duration": 105.84, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=39FIjDeot-s", "annotations": [{"segment": [13.367021840873635, 49.681904836193446], "label": "Doing karate"}, {"segment": [70.47898127925117, 105.601], "label": "Doing karate"}]}, "VpCf0THwEJA": {"duration": 138.113, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VpCf0THwEJA", "annotations": []}, "EoQgUlLb3U4": {"duration": 52.11, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=EoQgUlLb3U4", "annotations": [{"segment": [0.24365522620904836, 26.39598283931357], "label": "Discus throw"}]}, "dq1mR2tK7Vk": {"duration": 20.78, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dq1mR2tK7Vk", "annotations": []}, "zEvCa6PJDrA": {"duration": 142.733, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zEvCa6PJDrA", "annotations": []}, "bCD6_kGsF9A": {"duration": 134.258, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bCD6_kGsF9A", "annotations": [{"segment": [13.989505988313635, 127.36716989368917], "label": "Rollerblading"}]}, "1FlnQzPzS2Q": {"duration": 81.015, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1FlnQzPzS2Q", "annotations": [{"segment": [0.20000000000000004, 80.81442176870748], "label": "Running a marathon"}]}, "OtD2B81S2og": {"duration": 15.54, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OtD2B81S2og", "annotations": []}, "Yjkz62cLVdQ": {"duration": 142.943, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Yjkz62cLVdQ", "annotations": [{"segment": [0.01, 132.20014508580343], "label": "Welding"}]}, "Ap7GCrt9C4w": {"duration": 143.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ap7GCrt9C4w", "annotations": [{"segment": [12.814008030421217, 135.82848512246488], "label": "Clean and jerk"}]}, "3nvVkcxxaJ4": {"duration": 10.96, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3nvVkcxxaJ4", "annotations": [{"segment": [0, 10.39185647425897], "label": "Playing piano"}]}, "feKK7p9XR-o": {"duration": 147.94, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=feKK7p9XR-o", "annotations": []}, "4uwtqRBE4Kk": {"duration": 223.028, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4uwtqRBE4Kk", "annotations": [{"segment": [25.74732252963588, 58.45338087809227], "label": "Surfing"}, {"segment": [65.7600534878538, 77.24196758890764], "label": "Surfing"}, {"segment": [86.2883241533743, 90.4635656446666], "label": "Surfing"}, {"segment": [116.55882496524352, 155.8756823415794], "label": "Surfing"}, {"segment": [164.57410211510503, 172.5766483067486], "label": "Surfing"}, {"segment": [175.01220584333578, 180.23125770745116], "label": "Surfing"}, {"segment": [185.4503087457098, 213.98112547581903], "label": "Surfing"}]}, "EzX0FZI6pCg": {"duration": 4.44, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EzX0FZI6pCg", "annotations": [{"segment": [0.1190600624024961, 3.1154987999999997], "label": "Discus throw"}]}, "CSruNOwxCRY": {"duration": 99.057, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CSruNOwxCRY", "annotations": [{"segment": [12.953401869158878, 88.36070560747665], "label": "Playing beach volleyball"}]}, "QqR_8CdDFKI": {"duration": 31.649, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QqR_8CdDFKI", "annotations": []}, "NzMZlzy5Q9A": {"duration": 100.102, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=NzMZlzy5Q9A", "annotations": [{"segment": [12.629590883082203, 19.646030262572317], "label": "Sumo"}, {"segment": [52.2334931584264, 62.212428037383184], "label": "Sumo"}]}, "fZBfB3PWVds": {"duration": 234.94, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fZBfB3PWVds", "annotations": []}, "svZ7DPi7rLI": {"duration": 53.92, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=svZ7DPi7rLI", "annotations": [{"segment": [10.548665399999999, 45.28620504], "label": "Javelin throw"}]}, "rBJBnf4F9sA": {"duration": 209.49, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rBJBnf4F9sA", "annotations": [{"segment": [81.05089991898996, 188.90088605616225], "label": "Polishing forniture"}]}, "rwQDoaheIFs": {"duration": 33.46, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rwQDoaheIFs", "annotations": []}, "M2ntILX6VP0": {"duration": 213.276, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=M2ntILX6VP0", "annotations": [{"segment": [20.296087816301768, 56.2301449336885], "label": "Calf roping"}, {"segment": [60.555540697818394, 98.1532115706397], "label": "Calf roping"}]}, "-ap649M020k": {"duration": 212.927, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-ap649M020k", "annotations": [{"segment": [9.965381472401754, 10.961919619641929], "label": "Longboarding"}, {"segment": [15.280251591016023, 32.88575885892579], "label": "Longboarding"}, {"segment": [37.86844959512666, 44.84421662580789], "label": "Longboarding"}, {"segment": [54.80959809820965, 60.456647599237314], "label": "Longboarding"}, {"segment": [72.7472847485328, 74.07600227818637], "label": "Longboarding"}, {"segment": [83.04484560334795, 84.37356313300153], "label": "Longboarding"}, {"segment": [89.35625386920239, 91.34933016368275], "label": "Longboarding"}, {"segment": [96.66420028229702, 104.9686848426318], "label": "Longboarding"}, {"segment": [112.9409900205532, 119.58457766882104], "label": "Longboarding"}, {"segment": [123.90290964019513, 129.8821385236362], "label": "Longboarding"}, {"segment": [139.18316123121116, 142.50495505534508], "label": "Longboarding"}, {"segment": [146.82328702671919, 148.81636332119953], "label": "Longboarding"}, {"segment": [162.4357180001486, 165.09315305945572], "label": "Longboarding"}, {"segment": [167.08622925429017, 174.7263550452418], "label": "Longboarding"}, {"segment": [180.04122516068642, 190.33878600936038], "label": "Longboarding"}]}, "H0JTf557Fwc": {"duration": 199.739, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H0JTf557Fwc", "annotations": []}, "rnhtmtW_a8o": {"duration": 198.93, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rnhtmtW_a8o", "annotations": [{"segment": [9.930750702028082, 190.8566150546022], "label": "Volleyball"}]}, "AyicWbHhUWc": {"duration": 54.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AyicWbHhUWc", "annotations": [{"segment": [9.33737855, 53.54429485179407], "label": "Clean and jerk"}]}, "smGijLg8Cho": {"duration": 124.274, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=smGijLg8Cho", "annotations": [{"segment": [27.135319812792513, 113.58069578783152], "label": "Throwing darts"}]}, "SHzmpIt0Euw": {"duration": 179.211, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=SHzmpIt0Euw", "annotations": [{"segment": [18.73194675270004, 165.51212653131975], "label": "Installing carpet"}]}, "F_uouNToDWo": {"duration": 106.418, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=F_uouNToDWo", "annotations": [{"segment": [10.127015080603224, 100.44006602964119], "label": "Using the rowing machine"}]}, "KYjWkaXVnbw": {"duration": 169.76, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KYjWkaXVnbw", "annotations": [{"segment": [58.737306, 99.18178951872075], "label": "Mixing drinks"}]}, "f7ndXtwTep0": {"duration": 125.09, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f7ndXtwTep0", "annotations": [{"segment": [46.931602975039, 78.9482516], "label": "Hand washing clothes"}]}, "FTmGHtBdWi0": {"duration": 123.182, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FTmGHtBdWi0", "annotations": [{"segment": [19.40927798472483, 28.441318235042328], "label": "Arm wrestling"}, {"segment": [31.900397479844774, 42.277634758190324], "label": "Arm wrestling"}, {"segment": [43.23849009360374, 82.05704564430576], "label": "Arm wrestling"}]}, "jpvgI6bNO1g": {"duration": 199.413, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jpvgI6bNO1g", "annotations": [{"segment": [37.04642771175224, 190.72818618289108], "label": "Slacklining"}]}, "uzdhP_UT3B4": {"duration": 88.72, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uzdhP_UT3B4", "annotations": []}, "G43ExfqGB9o": {"duration": 143.9, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=G43ExfqGB9o", "annotations": []}, "8zznGr9DEsg": {"duration": 11.796, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8zznGr9DEsg", "annotations": []}, "LAU_pVRs9RQ": {"duration": 25.868, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=LAU_pVRs9RQ", "annotations": [{"segment": [5.6395319812792515, 13.4543120124805], "label": "Arm wrestling"}]}, "EpWZ_-hNKKs": {"duration": 32.113, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EpWZ_-hNKKs", "annotations": [{"segment": [3.9577885865834634, 21.993280879875197], "label": "Surfing"}]}, "-gHcnPAhXWY": {"duration": 14.91, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-gHcnPAhXWY", "annotations": []}, "PUzd4mfmzqo": {"duration": 194.4, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PUzd4mfmzqo", "annotations": [{"segment": [72.17866676571826, 194.39716553287982], "label": "Layup drill in basketball"}]}, "K7oJNZsI1Cc": {"duration": 163.352, "subset": "training", "resolution": "640x272", "url": "https://www.youtube.com/watch?v=K7oJNZsI1Cc", "annotations": [{"segment": [10.445149765990639, 23.69265678627145], "label": "Doing fencing"}, {"segment": [51.461469578783145, 109.5466926677067], "label": "Doing fencing"}, {"segment": [158.98955382215289, 161.7922542901716], "label": "Doing fencing"}]}, "BKRKHkkZq6g": {"duration": 38.987, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BKRKHkkZq6g", "annotations": [{"segment": [2.155800224018542, 32.108213267044974], "label": "Doing crunches"}]}, "n7dHbqoEiek": {"duration": 224.607, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n7dHbqoEiek", "annotations": [{"segment": [8.760008580343214, 104.4193022776911], "label": "Trimming branches or hedges"}, {"segment": [128.59692595943838, 160.13295684867396], "label": "Trimming branches or hedges"}, {"segment": [195.52339151326052, 220.05141553822153], "label": "Trimming branches or hedges"}]}, "mZYWfmsYQPA": {"duration": 104.31, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mZYWfmsYQPA", "annotations": []}, "plMBtIbzX6w": {"duration": 20.318, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=plMBtIbzX6w", "annotations": [{"segment": [6.783052274471931, 13.40762114976599], "label": "Playing ten pins"}]}, "SJ7jk5XAjng": {"duration": 123.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SJ7jk5XAjng", "annotations": []}, "xddZ9YjTGgo": {"duration": 153.019, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=xddZ9YjTGgo", "annotations": [{"segment": [7.150443978214339, 139.19530944257247], "label": "Beach soccer"}]}, "2TOuajpiof8": {"duration": 192.84, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2TOuajpiof8", "annotations": []}, "i9t0zZzzbhE": {"duration": 156.18, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i9t0zZzzbhE", "annotations": [{"segment": [65.29726553960117, 152.76636283619345], "label": "Playing polo"}]}, "39dTxOhrW68": {"duration": 41.634, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=39dTxOhrW68", "annotations": [{"segment": [11.171514979719188, 40.9838718151326], "label": "Trimming branches or hedges"}]}, "FNHoSA0hLgM": {"duration": 75.37, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=FNHoSA0hLgM", "annotations": [{"segment": [4.170834633385335, 71.49163026521062], "label": "Playing racquetball"}]}, "F3FjEM9ls0o": {"duration": 56.053, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F3FjEM9ls0o", "annotations": [{"segment": [5.154538221528861, 56.021], "label": "Playing drums"}]}, "8C6iIFY47Kc": {"duration": 220.938, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8C6iIFY47Kc", "annotations": [{"segment": [42.91226930709881, 101.5073358308482], "label": "Arm wrestling"}]}, "Sx5MlpX6NIY": {"duration": 142.664, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Sx5MlpX6NIY", "annotations": [{"segment": [44.27703432137286, 117.27270982839312], "label": "Drum corps"}]}, "qnozppP4rm0": {"duration": 163.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qnozppP4rm0", "annotations": [{"segment": [22.775750757921475, 91.23024186273574], "label": "Hopscotch"}]}, "ox6cIfguQ00": {"duration": 103.82, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ox6cIfguQ00", "annotations": [{"segment": [1.6195970015671375, 102.19656902652106], "label": "Horseback riding"}]}, "GH0Umpw6tME": {"duration": 133.31, "subset": "testing", "resolution": "606x480", "url": "https://www.youtube.com/watch?v=GH0Umpw6tME", "annotations": []}, "W6JJ1L_EEBY": {"duration": 99.568, "subset": "validation", "resolution": "636x480", "url": "https://www.youtube.com/watch?v=W6JJ1L_EEBY", "annotations": [{"segment": [16.143687987519503, 52.32227301092043], "label": "BMX"}, {"segment": [64.43247269890796, 99.321], "label": "BMX"}]}, "wdvQC41PGPk": {"duration": 120.95, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wdvQC41PGPk", "annotations": []}, "CevHfmrd_x4": {"duration": 201.78, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CevHfmrd_x4", "annotations": []}, "DlE6Rtuo__o": {"duration": 82.71000000000001, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=DlE6Rtuo__o", "annotations": [{"segment": [0, 82.32238116380655], "label": "Getting a tattoo"}]}, "diBZlwUO8rc": {"duration": 119.978, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=diBZlwUO8rc", "annotations": [{"segment": [33.12951404586796, 38.93185831378833], "label": "Sharpening knives"}, {"segment": [58.3977874706825, 87.78385238065522], "label": "Sharpening knives"}, {"segment": [94.14771374726989, 96.01943767862714], "label": "Sharpening knives"}, {"segment": [98.45267878939157, 99.57571314820592], "label": "Sharpening knives"}]}, "Y-lL9JiLhz0": {"duration": 35.086, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y-lL9JiLhz0", "annotations": [{"segment": [11.086266355140186, 31.12892523364486], "label": "Waterskiing"}]}, "6ITE3hona-4": {"duration": 208.42000000000002, "subset": "training", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=6ITE3hona-4", "annotations": [{"segment": [36.7338143525741, 181.0684477379095], "label": "Vacuuming floor"}]}, "yH0xeA_OvWg": {"duration": 70.055, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=yH0xeA_OvWg", "annotations": [{"segment": [7.868847650546022, 70.054602], "label": "Cleaning sink"}]}, "rRoRvt4p2LU": {"duration": 168.322, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rRoRvt4p2LU", "annotations": [{"segment": [0, 168.301], "label": "Rafting"}]}, "q6bl7g5JGng": {"duration": 81.32, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=q6bl7g5JGng", "annotations": [{"segment": [14.842441123244932, 74.46592255850234], "label": "Playing squash"}]}, "AJtJcV2_l1Q": {"duration": 182.904, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=AJtJcV2_l1Q", "annotations": [{"segment": [0.01, 182.9035827664399], "label": "Roof shingle removal"}]}, "kTqt7ry3eVM": {"duration": 97.39, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kTqt7ry3eVM", "annotations": []}, "Jp7KeCimrMI": {"duration": 89.0, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Jp7KeCimrMI", "annotations": [{"segment": [4.889917615268743, 9.326131447044444], "label": "Ironing clothes"}, {"segment": [13.346450232091174, 37.88425798910052], "label": "Ironing clothes"}, {"segment": [50.22247770872669, 52.99511135358651], "label": "Ironing clothes"}]}, "-5KAycAQlC4": {"duration": 139.436, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-5KAycAQlC4", "annotations": [{"segment": [5.438182527301094, 126.60089831513261], "label": "Wakeboarding"}]}, "sZf6ijFwPDs": {"duration": 82.76, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=sZf6ijFwPDs", "annotations": [{"segment": [18.338507999999997, 72.29846102964119], "label": "Mixing drinks"}]}, "7FY8MUnu7L0": {"duration": 136.162, "subset": "testing", "resolution": "492x360", "url": "https://www.youtube.com/watch?v=7FY8MUnu7L0", "annotations": []}, "Ggtcmy29TxE": {"duration": 153.136, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Ggtcmy29TxE", "annotations": [{"segment": [9.556043738348173, 143.340655975039], "label": "Swimming"}]}, "yxaYVPa4zXY": {"duration": 125.46, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yxaYVPa4zXY", "annotations": []}, "bNh4SQ2nR80": {"duration": 166.255, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bNh4SQ2nR80", "annotations": [{"segment": [36.80502652106084, 151.98965710465154], "label": "Blow-drying hair"}]}, "OMRu1rPRBHo": {"duration": 238.4, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OMRu1rPRBHo", "annotations": [{"segment": [91.79942251747346, 133.00791317167906], "label": "Snow tubing"}]}, "kistIwzeQOU": {"duration": 95.365, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=kistIwzeQOU", "annotations": []}, "cIRMaWUTHuk": {"duration": 143.87, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cIRMaWUTHuk", "annotations": [{"segment": [21.32251124058568, 131.7506747181452], "label": "Preparing pasta"}]}, "WV_MFi9EZeQ": {"duration": 56.471, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WV_MFi9EZeQ", "annotations": [{"segment": [12.261761145744288, 37.24268027970067], "label": "Removing ice from car"}]}, "qYN_YrwUCBs": {"duration": 164.514, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qYN_YrwUCBs", "annotations": [{"segment": [15.137595943837754, 26.939789391575665], "label": "Playing beach volleyball"}, {"segment": [33.09745553822153, 52.85330109204369], "label": "Playing beach volleyball"}, {"segment": [55.4189953198128, 58.49782839313573], "label": "Playing beach volleyball"}, {"segment": [77.99710452418097, 105.9760577223089], "label": "Playing beach volleyball"}, {"segment": [114.95707956318253, 129.58331513260532], "label": "Playing beach volleyball"}, {"segment": [146.26235569422778, 155.49997815912636], "label": "Playing beach volleyball"}]}, "lHnSteuHdZ4": {"duration": 226.581, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lHnSteuHdZ4", "annotations": [{"segment": [40.64308112324493, 185.20743213728548], "label": "Ice fishing"}]}, "xKPF_SpZ1gQ": {"duration": 14.397, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xKPF_SpZ1gQ", "annotations": [{"segment": [0.01, 14.396098], "label": "Calf roping"}]}, "QX-uieRfKgg": {"duration": 195.652, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QX-uieRfKgg", "annotations": [{"segment": [0.01, 183.41376131045243], "label": "Waxing skis"}]}, "hE-PN3XfQmg": {"duration": 69.15, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=hE-PN3XfQmg", "annotations": []}, "ilE77hVgjjo": {"duration": 109.924, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ilE77hVgjjo", "annotations": [{"segment": [0.01, 109.881], "label": "Fun sliding down"}]}, "jCB1EC3RzWI": {"duration": 63.76, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jCB1EC3RzWI", "annotations": [{"segment": [7.66981069, 16.037166720000002], "label": "Playing water polo"}, {"segment": [15.11985426521061, 17.20878149921997], "label": "Playing water polo"}, {"segment": [24.86818135725429, 49.13952636193448], "label": "Playing water polo"}]}, "HuUIIKA3o_A": {"duration": 193.585, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=HuUIIKA3o_A", "annotations": [{"segment": [25.963815912636502, 62.49430109204368], "label": "Removing ice from car"}]}, "4P0zJEfqCoM": {"duration": 112.00999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4P0zJEfqCoM", "annotations": [{"segment": [0.34949473080964055, 57.14238786271451], "label": "Getting a piercing"}]}, "K_IqYFJKIgk": {"duration": 233.2, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=K_IqYFJKIgk", "annotations": [{"segment": [0.01, 28.37666825856708], "label": "Smoking a cigarette"}]}, "zT6ITDwPi0w": {"duration": 41.43, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=zT6ITDwPi0w", "annotations": [{"segment": [11.244688380655226, 40.45502831201247], "label": "Washing hands"}]}, "ujl7QIOkrZw": {"duration": 121.851, "subset": "testing", "resolution": "854x474", "url": "https://www.youtube.com/watch?v=ujl7QIOkrZw", "annotations": []}, "CYPfbnL0bCI": {"duration": 182.35, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CYPfbnL0bCI", "annotations": [{"segment": [121.16809048361935, 136.2429937597504], "label": "Starting a campfire"}]}, "zHXpo62bCaQ": {"duration": 130.828, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zHXpo62bCaQ", "annotations": [{"segment": [43.05356457067948, 108.06000070775112], "label": "Hula hoop"}]}, "Bdpqtvdx2T8": {"duration": 107.393, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Bdpqtvdx2T8", "annotations": [{"segment": [0, 23.28787546177847], "label": "Painting fence"}, {"segment": [29.319267667706708, 105.04674758658348], "label": "Painting fence"}]}, "NjXCZYO3zwU": {"duration": 43.236, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NjXCZYO3zwU", "annotations": []}, "gYARGADLcmQ": {"duration": 112.687, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gYARGADLcmQ", "annotations": [{"segment": [16.173404517940718, 52.036171057722306], "label": "Building sandcastles"}, {"segment": [66.97899044929797, 112.48643799999999], "label": "Building sandcastles"}]}, "p-lwebxSYyo": {"duration": 57.307, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p-lwebxSYyo", "annotations": [{"segment": [5.72174458134788, 56.23401991107644], "label": "Painting fence"}]}, "dFlmzpAb6AQ": {"duration": 202.85, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=dFlmzpAb6AQ", "annotations": [{"segment": [6.721431991449706, 187.13729934970516], "label": "Sailing"}]}, "-L-LiCO1v-s": {"duration": 162.49, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-L-LiCO1v-s", "annotations": [{"segment": [75.28938075074022, 127.25679844064511], "label": "Bathing dog"}]}, "A20uQSBtYMs": {"duration": 136.84, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=A20uQSBtYMs", "annotations": []}, "pKxZtNYsSNQ": {"duration": 38.662, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=pKxZtNYsSNQ", "annotations": []}, "p1gH8y8X0kA": {"duration": 97.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p1gH8y8X0kA", "annotations": [{"segment": [0.3018932146049013, 84.83263822238095], "label": "Playing bagpipes"}]}, "pp_J6AzUu4s": {"duration": 216.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pp_J6AzUu4s", "annotations": [{"segment": [113.20959520748832, 203.77727137347895], "label": "Preparing pasta"}]}, "0xBP-TSsqb0": {"duration": 94.16, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=0xBP-TSsqb0", "annotations": []}, "q0P0EvJOfRQ": {"duration": 68.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=q0P0EvJOfRQ", "annotations": [{"segment": [0.2, 65.45234115444617], "label": "Playing accordion"}]}, "17nr8u_ze0o": {"duration": 57.284, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=17nr8u_ze0o", "annotations": [{"segment": [44.89904593048809, 51.49922350058155], "label": "Blowing leaves"}]}, "C9yUg3e1dEg": {"duration": 174.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C9yUg3e1dEg", "annotations": [{"segment": [49.15457098283932, 67.12798751950078], "label": "Snatch"}, {"segment": [72.57447737909516, 84.55675507020281], "label": "Snatch"}, {"segment": [90.2755694227769, 112.06152886115444], "label": "Snatch"}, {"segment": [118.324992199688, 158.62901716068643], "label": "Snatch"}]}, "nOvrsb0XyGk": {"duration": 135.419, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nOvrsb0XyGk", "annotations": [{"segment": [0.2, 13.943274857524914], "label": "Throwing darts"}, {"segment": [20.069865325225255, 28.097811455315355], "label": "Throwing darts"}, {"segment": [34.435663663281225, 40.77351587124709], "label": "Throwing darts"}, {"segment": [46.05505937788532, 53.66048202744437], "label": "Throwing darts"}, {"segment": [58.519502053551534, 70.98394472921774], "label": "Throwing darts"}, {"segment": [72.88530039160752, 81.54703174249418], "label": "Throwing darts"}, {"segment": [86.19479002833583, 92.95516571683275], "label": "Throwing darts"}, {"segment": [97.18040052214334, 105.84213069266771], "label": "Throwing darts"}, {"segment": [109.4335802371295, 121.89802277379097], "label": "Throwing darts"}, {"segment": [126.33451926989082, 135.21877400000002], "label": "Throwing darts"}]}, "CcKeoeSZVnE": {"duration": 222.87, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=CcKeoeSZVnE", "annotations": [{"segment": [0.01, 218.3452391918806], "label": "Smoking hookah"}]}, "tEnhhhWqnWQ": {"duration": 188.338, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tEnhhhWqnWQ", "annotations": [{"segment": [20.65982917035164, 181.72819792784574], "label": "Hula hoop"}]}, "tCN1GlR4cf4": {"duration": 231.85, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=tCN1GlR4cf4", "annotations": []}, "SYAatoDZalo": {"duration": 209.375, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SYAatoDZalo", "annotations": []}, "CuZpm0Il6YM": {"duration": 18.18, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=CuZpm0Il6YM", "annotations": [{"segment": [5.670825385988037, 15.855741404650555], "label": "Walking the dog"}]}, "-OLPVREPy6Y": {"duration": 171.085, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-OLPVREPy6Y", "annotations": [{"segment": [9.875400840873635, 170.884647], "label": "Painting furniture"}]}, "CTIVIXvCI3c": {"duration": 187.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CTIVIXvCI3c", "annotations": [{"segment": [70.41699594242273, 173.55890285803432], "label": "Ballet"}]}, "4j_LRkB4T6c": {"duration": 223.45, "subset": "testing", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=4j_LRkB4T6c", "annotations": []}, "oAJlaJ8xcwY": {"duration": 23.569, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oAJlaJ8xcwY", "annotations": [{"segment": [12.243726320481391, 19.303172126885077], "label": "Applying sunscreen"}]}, "PHgD2MRUxtk": {"duration": 53.17, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PHgD2MRUxtk", "annotations": []}, "gee7iCW34_E": {"duration": 116.25999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gee7iCW34_E", "annotations": [{"segment": [91.79698734, 97.25200038], "label": "Starting a campfire"}]}, "nXMZ43OChxE": {"duration": 20.225, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nXMZ43OChxE", "annotations": []}, "fHEUdQY1cdc": {"duration": 176.147, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fHEUdQY1cdc", "annotations": [{"segment": [0.2, 175.881], "label": "Bullfighting"}]}, "R_UR3iHl1TQ": {"duration": 140.23, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R_UR3iHl1TQ", "annotations": []}, "nlXavqvgf6A": {"duration": 150.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nlXavqvgf6A", "annotations": [{"segment": [121.02993805928237, 142.13981097659908], "label": "BMX"}]}, "4yZ1agUX004": {"duration": 114.01, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4yZ1agUX004", "annotations": [{"segment": [49.53475613854486, 62.34086903612198], "label": "Playing beach volleyball"}, {"segment": [66.43170988455539, 80.12713601872075], "label": "Playing beach volleyball"}]}, "fwU91k-Vmb8": {"duration": 117.922, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fwU91k-Vmb8", "annotations": [{"segment": [0.2, 117.721], "label": "Riding bumper cars"}]}, "vZuy4OyWBsQ": {"duration": 164.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vZuy4OyWBsQ", "annotations": [{"segment": [105.16791107644305, 124.61501872074882], "label": "Washing dishes"}]}, "68kccxCgbys": {"duration": 216.74, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=68kccxCgbys", "annotations": [{"segment": [111.4103765351014, 165.50949715912637], "label": "Playing saxophone"}]}, "ujS0VNOXeVg": {"duration": 166.023, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ujS0VNOXeVg", "annotations": [{"segment": [30.002852898870795, 139.73790233800779], "label": "Braiding hair"}]}, "V6BpJThrTok": {"duration": 113.71000000000001, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V6BpJThrTok", "annotations": []}, "AEQ9zIG9LqA": {"duration": 80.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AEQ9zIG9LqA", "annotations": [{"segment": [18.762520296730237, 77.67683393026522], "label": "Tumbling"}]}, "c9bvuUO9Q0Q": {"duration": 43.166, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=c9bvuUO9Q0Q", "annotations": [{"segment": [13.380809672386894, 14.99457566302652], "label": "Hitting a pinata"}, {"segment": [23.803048361934476, 24.811652106084242], "label": "Hitting a pinata"}, {"segment": [39.06658502340093, 41.01655226209048], "label": "Hitting a pinata"}]}, "5BVcwR8hEo0": {"duration": 11.94, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=5BVcwR8hEo0", "annotations": [{"segment": [0.4914833455538221, 11.888333], "label": "Tennis serve with ball bouncing"}]}, "fynO9wmckLA": {"duration": 106.25999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fynO9wmckLA", "annotations": [{"segment": [52.364798751950076, 59.98752262090484], "label": "Triple jump"}]}, "tT-HAk-DEDk": {"duration": 174.8, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tT-HAk-DEDk", "annotations": []}, "mWsH6LOjzcM": {"duration": 73.61, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mWsH6LOjzcM", "annotations": []}, "Cj3W6CW3Zbc": {"duration": 61.649, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Cj3W6CW3Zbc", "annotations": [{"segment": [0.2, 59.602215288611546], "label": "Roof shingle removal"}]}, "aGKySEwCMnI": {"duration": 200.017, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aGKySEwCMnI", "annotations": [{"segment": [0.2, 50.08193457560578], "label": "Building sandcastles"}, {"segment": [60.96931268911141, 68.74601134161544], "label": "Building sandcastles"}, {"segment": [128.15998904674623, 132.51494029214848], "label": "Building sandcastles"}, {"segment": [176.37551950853427, 185.08542194088264], "label": "Building sandcastles"}]}, "xTEXsZO-PVI": {"duration": 62.578, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xTEXsZO-PVI", "annotations": [{"segment": [0, 62.5778], "label": "Waterskiing"}]}, "UMOU6hHyPuQ": {"duration": 81.5, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UMOU6hHyPuQ", "annotations": []}, "6fgk5PKVS2o": {"duration": 224.955, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6fgk5PKVS2o", "annotations": [{"segment": [71.57535725429017, 210.53447737909516], "label": "Snow tubing"}]}, "EGLJPCJnG64": {"duration": 89.68, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EGLJPCJnG64", "annotations": [{"segment": [15.729114664586582, 76.56544071762872], "label": "Playing racquetball"}]}, "9uHffl5yny4": {"duration": 45.67, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=9uHffl5yny4", "annotations": [{"segment": [4.667323870000001, 35.03530157], "label": "Cleaning shoes"}]}, "t3eRbi1Uk5E": {"duration": 186.201, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t3eRbi1Uk5E", "annotations": [{"segment": [0.01, 173.41948073634944], "label": "Elliptical trainer"}]}, "ZNBXTNwkEhU": {"duration": 150.98, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=ZNBXTNwkEhU", "annotations": []}, "ZuuY0xffLYE": {"duration": 194.932, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZuuY0xffLYE", "annotations": [{"segment": [66.59906820762627, 106.4368663026521], "label": "Blow-drying hair"}]}, "0e5zwJywyqk": {"duration": 130.61, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0e5zwJywyqk", "annotations": []}, "DFAodsf1dWk": {"duration": 231.712, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DFAodsf1dWk", "annotations": [{"segment": [57.157480924434005, 59.68337967832809], "label": "Beer pong"}, {"segment": [69.06528933564896, 74.11708684343712], "label": "Beer pong"}, {"segment": [181.2873625443717, 220.27738746653762], "label": "Beer pong"}]}, "DZ_FojmVFss": {"duration": 58.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DZ_FojmVFss", "annotations": []}, "xWY7A8Sf-HE": {"duration": 60.07, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xWY7A8Sf-HE", "annotations": [{"segment": [0.6061063, 42.73311946333853], "label": "Playing kickball"}]}, "1926p23ooUM": {"duration": 61.26, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1926p23ooUM", "annotations": [{"segment": [0.01, 51.55479871303695], "label": "Layup drill in basketball"}]}, "cbB57ODSydw": {"duration": 5.74, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cbB57ODSydw", "annotations": []}, "2HmhRdKRVb4": {"duration": 215.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2HmhRdKRVb4", "annotations": [{"segment": [48.128088923556945, 181.7424336973479], "label": "Putting on makeup"}]}, "t6f_O8a4sSg": {"duration": 218.52, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t6f_O8a4sSg", "annotations": [{"segment": [14.999980897195073, 30.681779107899008], "label": "Skateboarding"}, {"segment": [34.431774332197776, 34.77268298895221], "label": "Skateboarding"}, {"segment": [36.47722627272438, 37.159043586233246], "label": "Skateboarding"}, {"segment": [38.86358687000541, 40.227221497023145], "label": "Skateboarding"}, {"segment": [41.59085612404088, 43.63630806456748], "label": "Skateboarding"}, {"segment": [45.681760005094084, 46.36357731860295], "label": "Skateboarding"}, {"segment": [48.06812060237512, 49.43175522939285], "label": "Skateboarding"}, {"segment": [51.136298513165016, 53.18175045369162], "label": "Skateboarding"}, {"segment": [54.88629373746378, 55.90901970772708], "label": "Skateboarding"}, {"segment": [57.95447164825369, 59.659014932025855], "label": "Skateboarding"}, {"segment": [61.363558215798015, 63.40901015632462], "label": "Skateboarding"}, {"segment": [66.47718806711453, 92.7271546372059], "label": "Skateboarding"}, {"segment": [96.8180585182591, 102.27259702633003], "label": "Skateboarding"}, {"segment": [105.68168359387437, 110.45440478843643], "label": "Skateboarding"}, {"segment": [118.2953038937884, 127.49983762615813], "label": "Skateboarding"}, {"segment": [131.5907415072113, 143.86345315037093], "label": "Skateboarding"}, {"segment": [148.63617434493298, 157.84070807730268], "label": "Skateboarding"}, {"segment": [162.61342927186476, 177.95431882581425], "label": "Skateboarding"}]}, "FbvTQ1-FCag": {"duration": 234.69, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=FbvTQ1-FCag", "annotations": [{"segment": [16.42745709828393, 212.46177847113884], "label": "Archery"}]}, "ueKSGag6w4Q": {"duration": 102.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ueKSGag6w4Q", "annotations": [{"segment": [41.370558767550705, 63.05223889235569], "label": "Playing saxophone"}]}, "EPql2nTJb9I": {"duration": 73.31, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=EPql2nTJb9I", "annotations": [{"segment": [32.06453196, 59.34086068], "label": "Preparing salad"}, {"segment": [23.76051, 28.033718], "label": "Preparing salad"}]}, "4OIkfJ_IkpA": {"duration": 88.7, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4OIkfJ_IkpA", "annotations": [{"segment": [0.2, 19.649659828393137], "label": "Cutting the grass"}, {"segment": [26.845309906396256, 34.04095998439938], "label": "Cutting the grass"}]}, "MldT9eNxLrQ": {"duration": 60.047, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MldT9eNxLrQ", "annotations": []}, "JhqQGe0TkBU": {"duration": 133.93, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=JhqQGe0TkBU", "annotations": [{"segment": [14.834941258970359, 17.969083778471138], "label": "Plataform diving"}]}, "zoSNO_k9RoM": {"duration": 45.001, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=zoSNO_k9RoM", "annotations": [{"segment": [0.01, 35.98619813094231], "label": "Putting in contact lenses"}]}, "dXTfrVz9A9o": {"duration": 110.75999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dXTfrVz9A9o", "annotations": [{"segment": [9.184136279999999, 87.02002353], "label": "Shoveling snow"}]}, "gwKy0W1xof4": {"duration": 75.88, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gwKy0W1xof4", "annotations": [{"segment": [0.01, 65.32803744149766], "label": "Playing piano"}]}, "a6Wwa7qvlrA": {"duration": 153.67, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=a6Wwa7qvlrA", "annotations": [{"segment": [17.50057505304212, 143.3608750920437], "label": "Snowboarding"}]}, "fh68-PXZ9Oo": {"duration": 93.345, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fh68-PXZ9Oo", "annotations": [{"segment": [0.01, 79.76434945397816], "label": "Disc dog"}]}, "Jk3oA2mhPD8": {"duration": 170.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Jk3oA2mhPD8", "annotations": [{"segment": [50.36447382, 105.2811218299072], "label": "Tumbling"}]}, "e6Dhfn1d3ic": {"duration": 99.34, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=e6Dhfn1d3ic", "annotations": [{"segment": [17.35650318876755, 92.05145441185647], "label": "Shaving legs"}]}, "ZpqkapUghHo": {"duration": 169.65, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZpqkapUghHo", "annotations": []}, "nVk5nIE-6bM": {"duration": 197.626, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nVk5nIE-6bM", "annotations": [{"segment": [62.488912624239724, 193.93110714953272], "label": "Drum corps"}]}, "5v9p5jBN_Hg": {"duration": 112.72999999999999, "subset": "training", "resolution": "640x354", "url": "https://www.youtube.com/watch?v=5v9p5jBN_Hg", "annotations": [{"segment": [26.02877179576979, 27.96334267248241], "label": "Long jump"}, {"segment": [33.06357498381568, 37.108586816942065], "label": "Long jump"}, {"segment": [38.16380729514894, 42.736429367378776], "label": "Long jump"}, {"segment": [47.6607915990109, 51.35406327273499], "label": "Long jump"}, {"segment": [54.87146486675793, 66.12714996763135], "label": "Long jump"}, {"segment": [68.9410712428497, 79.84501618432084], "label": "Long jump"}, {"segment": [82.13132715600624, 104.46682726053041], "label": "Long jump"}]}, "3gsF785TAmg": {"duration": 108.18, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=3gsF785TAmg", "annotations": [{"segment": [5.482663806552262, 20.834122464898595], "label": "Volleyball"}]}, "6WlnhNQOWFA": {"duration": 127.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6WlnhNQOWFA", "annotations": [{"segment": [3.767424336973479, 101.72045709828393], "label": "Hopscotch"}, {"segment": [104.89302496099843, 126.901], "label": "Hopscotch"}]}, "aK0238Rz6UA": {"duration": 215.67000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aK0238Rz6UA", "annotations": [{"segment": [74.01225787999999, 143.96136832], "label": "Archery"}]}, "-zHX3Gdx6I4": {"duration": 50.34, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=-zHX3Gdx6I4", "annotations": [{"segment": [1.6084451271450857, 45.38953688065522], "label": "High jump"}]}, "q8-iXvYyCGg": {"duration": 15.77, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=q8-iXvYyCGg", "annotations": [{"segment": [2.270795631825273, 15.257291731669268], "label": "Hopscotch"}]}, "gvHcfC9snJA": {"duration": 6.73, "subset": "training", "resolution": "360x360", "url": "https://www.youtube.com/watch?v=gvHcfC9snJA", "annotations": [{"segment": [3.2881040639625585, 4.737811287051483], "label": "Removing curlers"}]}, "fY2IeYSxY4U": {"duration": 185.95, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fY2IeYSxY4U", "annotations": [{"segment": [19.870919746489857, 166.60114165], "label": "Rock climbing"}]}, "u20epr7tSEU": {"duration": 146.008, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u20epr7tSEU", "annotations": []}, "IWDWVvf_R8w": {"duration": 84.27, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IWDWVvf_R8w", "annotations": []}, "1NbXNvyT-rY": {"duration": 141.433, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1NbXNvyT-rY", "annotations": []}, "JKCwcCMAnxg": {"duration": 207.98, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=JKCwcCMAnxg", "annotations": [{"segment": [92.46419297999999, 154.89384975], "label": "Tango"}]}, "INmaUkmVK24": {"duration": 22.01, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=INmaUkmVK24", "annotations": [{"segment": [2.040358034321373, 11.53916770670827], "label": "Shaving legs"}]}, "qQT-Fx20yiE": {"duration": 63.76, "subset": "testing", "resolution": "1920x1072", "url": "https://www.youtube.com/watch?v=qQT-Fx20yiE", "annotations": []}, "BjLF4jvF1is": {"duration": 139.529, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=BjLF4jvF1is", "annotations": []}, "3nvKiZeVfGY": {"duration": 15.117, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3nvKiZeVfGY", "annotations": [{"segment": [0.01, 11.472835549862586], "label": "Curling"}]}, "7WrW0JyhHXU": {"duration": 212.05, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7WrW0JyhHXU", "annotations": []}, "rBmFxH3BUOw": {"duration": 127.223, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rBmFxH3BUOw", "annotations": [{"segment": [27.380620904836192, 110.51453510140405], "label": "Waxing skis"}]}, "WMLB3GIdrp4": {"duration": 38.592, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WMLB3GIdrp4", "annotations": []}, "RQwgREUcreI": {"duration": 217.46, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RQwgREUcreI", "annotations": [{"segment": [24.086184879875194, 217.45414965986396], "label": "Removing curlers"}]}, "4pg401NKSEo": {"duration": 74.931, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4pg401NKSEo", "annotations": []}, "GSbq_ankn7w": {"duration": 45.558, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GSbq_ankn7w", "annotations": []}, "7uk4sqUBhks": {"duration": 185.783, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7uk4sqUBhks", "annotations": [{"segment": [11.557250832314548, 16.75801569079044], "label": "Skiing"}, {"segment": [23.11450607337209, 30.626721980059493], "label": "Skiing"}, {"segment": [38.42786926777334, 41.02825169701128], "label": "Skiing"}, {"segment": [45.651153793434304, 51.14085003293663], "label": "Skiing"}, {"segment": [58.653065939624035, 82.345439183792], "label": "Skiing"}, {"segment": [86.96834128021503, 114.99468523977956], "label": "Skiing"}, {"segment": [126.26300909981067, 145.33248024755562], "label": "Skiing"}, {"segment": [154.8672158214281, 179.42638320867536], "label": "Skiing"}]}, "RPr1ZbIGLwU": {"duration": 223.423, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=RPr1ZbIGLwU", "annotations": [{"segment": [63.4248705148206, 138.34985491419658], "label": "Installing carpet"}]}, "E0QSccoRKWg": {"duration": 139.09, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E0QSccoRKWg", "annotations": []}, "HCsfxpQ8C1M": {"duration": 8.963, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HCsfxpQ8C1M", "annotations": [{"segment": [0.01, 8.097036469848728], "label": "Kneeling"}]}, "EHA-wy50jB0": {"duration": 88.422, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EHA-wy50jB0", "annotations": []}, "LiJ4bdfa8R4": {"duration": 125.388, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LiJ4bdfa8R4", "annotations": []}, "q92zSoMudWU": {"duration": 37.292, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=q92zSoMudWU", "annotations": [{"segment": [4.770487007800312, 17.62753126053042], "label": "Beach soccer"}, {"segment": [24.492378418096724, 35.08053250858034], "label": "Beach soccer"}]}, "qOy73DJ8J6E": {"duration": 223.446, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qOy73DJ8J6E", "annotations": []}, "Q684UQhx7yI": {"duration": 29.095, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q684UQhx7yI", "annotations": [{"segment": [14.275111259457054, 27.77981846573209], "label": "Calf roping"}]}, "GsPE54bMFwE": {"duration": 134.653, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GsPE54bMFwE", "annotations": []}, "9FYVaOGQV6o": {"duration": 90.907, "subset": "validation", "resolution": "1920x1072", "url": "https://www.youtube.com/watch?v=9FYVaOGQV6o", "annotations": [{"segment": [0.2, 9.360068770097742], "label": "Playing ice hockey"}, {"segment": [34.60389060460377, 53.891305039956706], "label": "Playing ice hockey"}, {"segment": [87.6442789048362, 90.906121], "label": "Playing ice hockey"}]}, "MjHlAvy2qVY": {"duration": 120.141, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=MjHlAvy2qVY", "annotations": [{"segment": [7.309612661466459, 8.62159442121685], "label": "Applying sunscreen"}, {"segment": [12.744965666146646, 20.99170815600624], "label": "Applying sunscreen"}, {"segment": [23.80309764118565, 25.864783263650548], "label": "Applying sunscreen"}]}, "oQBrINtoodw": {"duration": 207.49, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oQBrINtoodw", "annotations": []}, "60Fyun_Szw4": {"duration": 16.58, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=60Fyun_Szw4", "annotations": [{"segment": [0, 14.855288012480502], "label": "Discus throw"}]}, "nlYl30Dqfc0": {"duration": 158.36, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=nlYl30Dqfc0", "annotations": [{"segment": [134.88970204680186, 150.45389843681747], "label": "Cumbia"}]}, "TxgvL4ZJZbo": {"duration": 10.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TxgvL4ZJZbo", "annotations": [{"segment": [0.7599186000000001, 8.389584000000001], "label": "Windsurfing"}]}, "Eaq95CTpfZI": {"duration": 66.386, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Eaq95CTpfZI", "annotations": [{"segment": [8.160995327102803, 10.9534984423676], "label": "Shuffleboard"}, {"segment": [43.09064953271028, 59.210892523364485], "label": "Shuffleboard"}]}, "PqCIr0WeMNM": {"duration": 85.264, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=PqCIr0WeMNM", "annotations": []}, "q4QPF-qNBTY": {"duration": 129.1, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=q4QPF-qNBTY", "annotations": [{"segment": [6.2420817, 115.2689898595944], "label": "Using uneven bars"}]}, "Gp5Vos--_0k": {"duration": 38.453, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Gp5Vos--_0k", "annotations": [{"segment": [0.01, 38.421], "label": "Welding"}]}, "19YCgLDhfoE": {"duration": 106.209, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=19YCgLDhfoE", "annotations": [{"segment": [9.637782754190987, 12.780211103100642], "label": "Baton twirling"}, {"segment": [15.426466554814038, 23.03445097849005], "label": "Baton twirling"}, {"segment": [39.2427656202346, 47.84309583830313], "label": "Baton twirling"}, {"segment": [50.98552418721279, 54.12795253612245], "label": "Baton twirling"}, {"segment": [66.532274966029, 84.06371733363024], "label": "Baton twirling"}, {"segment": [88.69466437412868, 90.34857403144954], "label": "Baton twirling"}]}, "FBtrv1Vg8hU": {"duration": 80.71000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FBtrv1Vg8hU", "annotations": [{"segment": [31.101408584234523, 50.996235889235564], "label": "Horseback riding"}]}, "q_DajTs5gIA": {"duration": 135.489, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q_DajTs5gIA", "annotations": [{"segment": [23.24260530421217, 135.441], "label": "Clipping cat claws"}]}, "IpCYVZ-sMTc": {"duration": 110.133, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IpCYVZ-sMTc", "annotations": [{"segment": [0.01, 110.081], "label": "Waxing skis"}]}, "NttiEaiT1Nc": {"duration": 234.313, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NttiEaiT1Nc", "annotations": []}, "2SKZB0bfqF8": {"duration": 164.82, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2SKZB0bfqF8", "annotations": [{"segment": [7.968160686427457, 13.108909516380654], "label": "Shoveling snow"}, {"segment": [20.562995319812792, 26.988931357254287], "label": "Shoveling snow"}, {"segment": [30.07338065522621, 43.95340249609985], "label": "Shoveling snow"}, {"segment": [96.90311544461778, 115.15277379095164], "label": "Shoveling snow"}]}, "fqFqQjH8M20": {"duration": 183.299, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fqFqQjH8M20", "annotations": [{"segment": [0.01, 171.66757130142753], "label": "Longboarding"}]}, "-vnSFKJNB94": {"duration": 106.37, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-vnSFKJNB94", "annotations": []}, "HCntSYltlmA": {"duration": 215.04, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HCntSYltlmA", "annotations": [{"segment": [120.08973166926677, 131.83034789391576], "label": "Archery"}, {"segment": [170.07121216848674, 197.57779875195007], "label": "Archery"}]}, "F2x2fynkbAQ": {"duration": 102.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F2x2fynkbAQ", "annotations": [{"segment": [28.567673946957882, 59.86017940717628], "label": "Polishing shoes"}, {"segment": [62.25458658346334, 95.29740561622464], "label": "Cleaning shoes"}]}, "N9iuF02-CwQ": {"duration": 228.3, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N9iuF02-CwQ", "annotations": []}, "vkk-sOo8Hqw": {"duration": 200.06, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vkk-sOo8Hqw", "annotations": [{"segment": [153.246484224805, 191.32402205663027], "label": "Mowing the lawn"}]}, "FYv-2ehoGWo": {"duration": 128.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FYv-2ehoGWo", "annotations": [{"segment": [13.678545639784776, 118.68149878315131], "label": "Playing violin"}]}, "kNkYgYnJTXk": {"duration": 202.525, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=kNkYgYnJTXk", "annotations": [{"segment": [5.371158903498998, 19.904882995319813], "label": "Canoeing"}, {"segment": [44.233073322932924, 187.99056003120126], "label": "Canoeing"}]}, "iJqLgrShN-w": {"duration": 211.047, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iJqLgrShN-w", "annotations": [{"segment": [1.6462259438377536, 206.10748816848673], "label": "Skiing"}]}, "8-WiWyjqAv4": {"duration": 31.02, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8-WiWyjqAv4", "annotations": [{"segment": [1.6671411856474259, 29.25953588143526], "label": "Kayaking"}]}, "e9AsyRGUzTc": {"duration": 53.151, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=e9AsyRGUzTc", "annotations": [{"segment": [34.82558489859594, 45.02450619032761], "label": "Applying sunscreen"}]}, "Hev5Vj0Jjuk": {"duration": 165.51, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Hev5Vj0Jjuk", "annotations": [{"segment": [92.43854549828252, 133.4936536944471], "label": "Bungee jumping"}]}, "4cd0sNdLmT4": {"duration": 5.457, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4cd0sNdLmT4", "annotations": [{"segment": [0.31291263650546025, 5.201], "label": "Roof shingle removal"}]}, "UGCn1zgYboQ": {"duration": 23.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UGCn1zgYboQ", "annotations": [{"segment": [0.2, 22.74510140405616], "label": "Table soccer"}]}, "FEGf7ejk8Fc": {"duration": 58.89, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=FEGf7ejk8Fc", "annotations": [{"segment": [5.328202664586584, 47.770092854914196], "label": "Putting on makeup"}]}, "jVw5KawqUIg": {"duration": 215.598, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jVw5KawqUIg", "annotations": []}, "9JrRZ9i1sXo": {"duration": 201.85, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9JrRZ9i1sXo", "annotations": [{"segment": [20.783417067294938, 185.1613504399376], "label": "Shoveling snow"}]}, "u024Wa07GNs": {"duration": 17.79, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=u024Wa07GNs", "annotations": [{"segment": [12.606193447737908, 12.619700580000002], "label": "Preparing salad"}]}, "XztphLXwZjU": {"duration": 126.967, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=XztphLXwZjU", "annotations": [{"segment": [0.01, 68.33621785687754], "label": "Laying tile"}, {"segment": [77.05156158355177, 120.82635619344774], "label": "Laying tile"}]}, "RJ5U2y6ccvA": {"duration": 52.222, "subset": "training", "resolution": "638x480", "url": "https://www.youtube.com/watch?v=RJ5U2y6ccvA", "annotations": [{"segment": [4.969613567862714, 16.86409850078003], "label": "Waterskiing"}, {"segment": [23.544562641185646, 42.20098078939158], "label": "Waterskiing"}]}, "AK-9sj8btp8": {"duration": 210.63, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AK-9sj8btp8", "annotations": [{"segment": [25.30167248594706, 88.72015027539877], "label": "Cricket"}, {"segment": [95.29201325876164, 119.60790629720428], "label": "Cricket"}, {"segment": [122.23665149054942, 126.17976928056716], "label": "Cricket"}, {"segment": [135.70897060644333, 146.55254452899206], "label": "Cricket"}, {"segment": [150.8242554681779, 172.83999646244354], "label": "Cricket"}]}, "sbT8DfccM7I": {"duration": 182.28, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=sbT8DfccM7I", "annotations": [{"segment": [53.175869619818805, 179.71737753863894], "label": "Fixing bicycle"}]}, "tCfu0LplM64": {"duration": 197.86, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tCfu0LplM64", "annotations": [{"segment": [107.41678542243731, 125.9369208400989], "label": "Polishing shoes"}]}, "kpu2zZSmphc": {"duration": 57.66, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kpu2zZSmphc", "annotations": []}, "RWU7pmO_3Jg": {"duration": 234.17000000000002, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RWU7pmO_3Jg", "annotations": []}, "B-lmXhRr6bE": {"duration": 188.8, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=B-lmXhRr6bE", "annotations": [{"segment": [5.007214775665857, 180.84881482059282], "label": "Painting"}]}, "q5tYHwZLRYU": {"duration": 24.08, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q5tYHwZLRYU", "annotations": [{"segment": [0.57596968, 7.51890854], "label": "Javelin throw"}]}, "96ADEnJy4GU": {"duration": 63.62, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=96ADEnJy4GU", "annotations": []}, "pPt_fygNjtQ": {"duration": 205.68, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pPt_fygNjtQ", "annotations": []}, "IQGg87yZZjs": {"duration": 198.949, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IQGg87yZZjs", "annotations": [{"segment": [79.14490751058614, 149.28902161800758], "label": "Croquet"}]}, "FCKkV9mYuI4": {"duration": 140.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FCKkV9mYuI4", "annotations": [{"segment": [58.6987884, 87.88203270000001], "label": "Tai chi"}, {"segment": [95.94311895000001, 117.7017156], "label": "Tai chi"}]}, "0yi-nkwLEnI": {"duration": 115.17, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0yi-nkwLEnI", "annotations": [{"segment": [0.01, 90.01662793042334], "label": "Getting a tattoo"}]}, "15IRaGI4Ml0": {"duration": 183.995, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=15IRaGI4Ml0", "annotations": [{"segment": [17.47726012461059, 163.90294704049845], "label": "Elliptical trainer"}]}, "t9j3GNVm8jw": {"duration": 215.64, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=t9j3GNVm8jw", "annotations": [{"segment": [40.46400019734789, 201.3125984921997], "label": "Playing saxophone"}]}, "e9l3PzP4uGQ": {"duration": 145.079, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=e9l3PzP4uGQ", "annotations": [{"segment": [37.10877379095164, 94.12957254290171], "label": "Baking cookies"}, {"segment": [101.3703088923557, 126.48661310452417], "label": "Baking cookies"}]}, "Df7AnfIEhYw": {"duration": 5.55, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Df7AnfIEhYw", "annotations": []}, "VshQp9mHeh4": {"duration": 106.86, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=VshQp9mHeh4", "annotations": [{"segment": [20.12579994, 87.47697870000002], "label": "Playing kickball"}]}, "uTVjevTM6V0": {"duration": 188.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uTVjevTM6V0", "annotations": [{"segment": [24.031931819999997, 115.31340443999999], "label": "Snatch"}]}, "g_Cz69Q5bKM": {"duration": 44.24, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=g_Cz69Q5bKM", "annotations": [{"segment": [3.8153868954758194, 26.638962558502342], "label": "Playing squash"}, {"segment": [30.901196567862716, 40.80057878315133], "label": "Playing squash"}]}, "uEaxyzKonCY": {"duration": 163.47, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=uEaxyzKonCY", "annotations": [{"segment": [0.010000000000000002, 163.34097021023697], "label": "Playing badminton"}]}, "yTv7XBpDKBQ": {"duration": 30.26, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=yTv7XBpDKBQ", "annotations": []}, "_BWSmg81iMM": {"duration": 34.3, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_BWSmg81iMM", "annotations": [{"segment": [0.01, 27.714916842433702], "label": "Playing bagpipes"}]}, "nKQsVIJCvUQ": {"duration": 194.026, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nKQsVIJCvUQ", "annotations": []}, "ZJKwP17_gww": {"duration": 30.21, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZJKwP17_gww", "annotations": []}, "GOxmnVFdMfY": {"duration": 161.727, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GOxmnVFdMfY", "annotations": [{"segment": [0.2, 132.2991145042672], "label": "Riding bumper cars"}]}, "8iHklV25LaE": {"duration": 29.91, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8iHklV25LaE", "annotations": [{"segment": [0.6998588143525741, 1.7263184087363495], "label": "Washing dishes"}, {"segment": [13.11068845553822, 24.495058502340093], "label": "Washing dishes"}]}, "qisMX7KNi7s": {"duration": 149.44, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qisMX7KNi7s", "annotations": [{"segment": [65.94321818, 130.09909808], "label": "Hand washing clothes"}, {"segment": [65.94321818, 130.09909808], "label": "Hand washing clothes"}]}, "AFs_aK0DY7E": {"duration": 56.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AFs_aK0DY7E", "annotations": [{"segment": [14.76342048, 51.23273010920437], "label": "Using the pommel horse"}]}, "DDZFQKi1v2U": {"duration": 112.34, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DDZFQKi1v2U", "annotations": [{"segment": [16.298669152886117, 77.98825562402496], "label": "Getting a piercing"}]}, "VpZzLvzU7_k": {"duration": 130.868, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VpZzLvzU7_k", "annotations": []}, "rLlm6h0Nfvo": {"duration": 216.898, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rLlm6h0Nfvo", "annotations": [{"segment": [56.153837753510146, 173.19897659906397], "label": "Hula hoop"}]}, "UUohrz_ldJ4": {"duration": 15.117, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=UUohrz_ldJ4", "annotations": []}, "xizEiQZfJa4": {"duration": 157.15, "subset": "testing", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=xizEiQZfJa4", "annotations": []}, "-wcSkAAeOK8": {"duration": 224.82, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=-wcSkAAeOK8", "annotations": [{"segment": [69.04558034321373, 221.52635257410296], "label": "Spinning"}]}, "2Vf8yqNHaGM": {"duration": 29.002, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2Vf8yqNHaGM", "annotations": [{"segment": [0.01, 28.941], "label": "Hitting a pinata"}]}, "J80e9M4AQ5w": {"duration": 189.59, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=J80e9M4AQ5w", "annotations": [{"segment": [16.705107845553822, 89.7345173650546], "label": "Shoveling snow"}]}, "5P_Scs7Vo2c": {"duration": 193.79, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=5P_Scs7Vo2c", "annotations": [{"segment": [6.953597588801512, 53.81479873072474], "label": "Hand washing clothes"}, {"segment": [102.48998185233532, 191.67742770670827], "label": "Hand washing clothes"}]}, "ZbS9R9faBQk": {"duration": 44.351, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZbS9R9faBQk", "annotations": [{"segment": [0.01, 43.796327831197004], "label": "Making an omelette"}]}, "4Y94njisJd0": {"duration": 48.76, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=4Y94njisJd0", "annotations": [{"segment": [6.385965678627145, 45.46199375975039], "label": "Playing racquetball"}]}, "-nl4G-00PtA": {"duration": 6.22, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=-nl4G-00PtA", "annotations": []}, "4WKwnpn38ws": {"duration": 16.463, "subset": "testing", "resolution": "476x480", "url": "https://www.youtube.com/watch?v=4WKwnpn38ws", "annotations": []}, "-sd2XAFkeC0": {"duration": 230.62, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-sd2XAFkeC0", "annotations": [{"segment": [19.81801815252882, 190.3549283799583], "label": "Rafting"}, {"segment": [195.03210102332662, 210.50274847803385], "label": "Rafting"}]}, "Q3FkUH9kImU": {"duration": 59.931, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Q3FkUH9kImU", "annotations": [{"segment": [43.10148699063963, 51.04861582839313], "label": "Shuffleboard"}]}, "ejkGxGrEdO4": {"duration": 133.887, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ejkGxGrEdO4", "annotations": [{"segment": [0.2, 124.2547503900156], "label": "Disc dog"}]}, "i1c4-zdkOto": {"duration": 229.832, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i1c4-zdkOto", "annotations": []}, "uVcyJg_3Fj8": {"duration": 81.131, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uVcyJg_3Fj8", "annotations": [{"segment": [8.986376021027235, 81.130521], "label": "Decorating the Christmas tree"}]}, "X8o3FbH0gyo": {"duration": 170.226, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=X8o3FbH0gyo", "annotations": [{"segment": [0.01, 170.22548752834467], "label": "Wakeboarding"}]}, "8lw8uAimUXI": {"duration": 9.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8lw8uAimUXI", "annotations": [{"segment": [4.008466873634946, 6.871657497659906], "label": "Springboard diving"}]}, "J5wZTdD4XeU": {"duration": 180.187, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J5wZTdD4XeU", "annotations": [{"segment": [5.72474248427394, 157.2620322039001], "label": "Doing fencing"}]}, "Ym_hy49DaS4": {"duration": 220.01, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ym_hy49DaS4", "annotations": [{"segment": [81.83898673946958, 212.91862168486742], "label": "Shaving legs"}]}, "goSGYjLTGnQ": {"duration": 65.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=goSGYjLTGnQ", "annotations": [{"segment": [0, 65.1314739229025], "label": "Breakdancing"}]}, "Fky1ioAUt38": {"duration": 54.103, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fky1ioAUt38", "annotations": [{"segment": [0.01, 21.590664586583465], "label": "Mooping floor"}, {"segment": [34.57879875195008, 54.061], "label": "Mooping floor"}]}, "tqVzDmYHY8U": {"duration": 134.096, "subset": "testing", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=tqVzDmYHY8U", "annotations": []}, "SzQHOcBrSso": {"duration": 121.185, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SzQHOcBrSso", "annotations": []}, "JGRpVcxltKE": {"duration": 172.409, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JGRpVcxltKE", "annotations": [{"segment": [26.089822758190326, 58.63485939469579], "label": "Making a lemonade"}, {"segment": [68.3176802121685, 79.07637000936037], "label": "Making a lemonade"}, {"segment": [92.52473225585022, 100.59374960374416], "label": "Making a lemonade"}, {"segment": [121.30422746333855, 153.5802968549142], "label": "Making a lemonade"}]}, "UISTzYOTSKU": {"duration": 101.216, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UISTzYOTSKU", "annotations": []}, "wyyO9ynxqmE": {"duration": 102.447, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wyyO9ynxqmE", "annotations": []}, "XOUyGnr6iTM": {"duration": 124.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XOUyGnr6iTM", "annotations": [{"segment": [0.01, 115.3329697597504], "label": "Playing racquetball"}]}, "QpJ5npI8qO0": {"duration": 145.94, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QpJ5npI8qO0", "annotations": [{"segment": [3.6413104524180966, 140.41803432137286], "label": "Tai chi"}]}, "sq0cKsoX7mg": {"duration": 11.564, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=sq0cKsoX7mg", "annotations": [{"segment": [1.028270840873635, 2.1106611996879874], "label": "Baton twirling"}, {"segment": [5.0150753291731665, 10.589385677067083], "label": "Baton twirling"}]}, "sa5ZuxFDZNw": {"duration": 124.9, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=sa5ZuxFDZNw", "annotations": [{"segment": [7.44356867071847, 124.861], "label": "Dodgeball"}]}, "GavbA_SHlVM": {"duration": 127.32, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GavbA_SHlVM", "annotations": [{"segment": [25.209995319812794, 30.966608424336975], "label": "Bungee jumping"}, {"segment": [73.84345085803432, 79.99707176287052], "label": "Bungee jumping"}]}, "w6xH78z3l08": {"duration": 70.519, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=w6xH78z3l08", "annotations": [{"segment": [18.446805476194534, 63.96062480745425], "label": "Changing car wheel"}]}, "zxzBoK5t0XY": {"duration": 232.34, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zxzBoK5t0XY", "annotations": [{"segment": [40.45469256, 205.91419926], "label": "Horseback riding"}]}, "qgJzdtHCebM": {"duration": 122.625, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qgJzdtHCebM", "annotations": [{"segment": [0.01, 119.79333269009932], "label": "Bullfighting"}]}, "Y39-bbpK_TY": {"duration": 50.132, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=Y39-bbpK_TY", "annotations": []}, "Son8UiVlzu0": {"duration": 126.317, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Son8UiVlzu0", "annotations": []}, "HpJ2pr0ykqo": {"duration": 45.93, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HpJ2pr0ykqo", "annotations": [{"segment": [0.01, 38.19062991731669], "label": "Painting fence"}]}, "C1djGYqkhYI": {"duration": 107.184, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C1djGYqkhYI", "annotations": [{"segment": [0.01, 99.99173503501393], "label": "Baton twirling"}]}, "aYT4LSpcT88": {"duration": 184.204, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aYT4LSpcT88", "annotations": []}, "r7E7cVoJ0JE": {"duration": 56.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r7E7cVoJ0JE", "annotations": [{"segment": [0.01, 56.101], "label": "Roof shingle removal"}]}, "uBls-XJdcBs": {"duration": 14.17, "subset": "validation", "resolution": "240x176", "url": "https://www.youtube.com/watch?v=uBls-XJdcBs", "annotations": [{"segment": [0.07220479525121053, 14.059875222042487], "label": "Playing water polo"}]}, "gGs1qNxsFTg": {"duration": 19.9, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gGs1qNxsFTg", "annotations": [{"segment": [6.568692667706708, 18.063904836193448], "label": "Playing rubik cube"}]}, "4l8r_wBuJ6Y": {"duration": 39.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4l8r_wBuJ6Y", "annotations": [{"segment": [0.010000000000000002, 39.938321995464854], "label": "Cleaning windows"}]}, "7EeO9V2B574": {"duration": 173.99, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7EeO9V2B574", "annotations": []}, "jAlYwQevBxA": {"duration": 219.865, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jAlYwQevBxA", "annotations": [{"segment": [18.833730529595016, 143.47878348909657], "label": "Snowboarding"}]}, "cMndipw7_As": {"duration": 109.97, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=cMndipw7_As", "annotations": [{"segment": [10.976948517940718, 101.19374414976599], "label": "Welding"}]}, "c05oRliUOxk": {"duration": 221.32999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c05oRliUOxk", "annotations": [{"segment": [13.530188854074925, 207.8387309101497], "label": "Horseback riding"}]}, "eZbdiuUu0S8": {"duration": 193.678, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eZbdiuUu0S8", "annotations": [{"segment": [91.24895369692338, 176.75707918112644], "label": "Swimming"}]}, "YmVdnkDo0xQ": {"duration": 64.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YmVdnkDo0xQ", "annotations": [{"segment": [11.79281432, 31.21794851794072], "label": "Washing hands"}]}, "hWZ3B28_fAs": {"duration": 103.47, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hWZ3B28_fAs", "annotations": [{"segment": [0.6453728549141966, 102.7756271450858], "label": "Playing guitarra"}]}, "ym2trHhFDUs": {"duration": 3.69, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ym2trHhFDUs", "annotations": []}, "Iq9cAZxki9Y": {"duration": 39.66, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Iq9cAZxki9Y", "annotations": [{"segment": [6.310799831513259, 31.987093263650543], "label": "Rock-paper-scissors"}]}, "F67zl57FSXE": {"duration": 101.63, "subset": "validation", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=F67zl57FSXE", "annotations": [{"segment": [1.36391486, 97.04976599063963], "label": "Snatch"}]}, "-VcxQ6i6Ejk": {"duration": 102.75, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-VcxQ6i6Ejk", "annotations": [{"segment": [0.01, 37.34845401707224], "label": "Cricket"}, {"segment": [83.51306670770232, 102.54832199546485], "label": "Cricket"}]}, "m3yLm_dJU94": {"duration": 140.13, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=m3yLm_dJU94", "annotations": [{"segment": [10.274920397815913, 14.647226950078004], "label": "Cheerleading"}, {"segment": [20.112610140405618, 23.610455382215292], "label": "Cheerleading"}, {"segment": [46.56506478159127, 50.937371333853356], "label": "Cheerleading"}, {"segment": [69.95690483619346, 80.88767121684867], "label": "Cheerleading"}, {"segment": [91.16259161466459, 94.00459087363495], "label": "Cheerleading"}, {"segment": [118.70812289391576, 120.89427617004682], "label": "Cheerleading"}, {"segment": [132.91811918876758, 133.5739651716069], "label": "Cheerleading"}]}, "rYXmAD8u7N8": {"duration": 195.88, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rYXmAD8u7N8", "annotations": [{"segment": [126.51448346957878, 173.26983605616223], "label": "Playing violin"}, {"segment": [86.48212275819033, 103.28960244617784], "label": "Playing violin"}]}, "tl2hDYGBfqU": {"duration": 227.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tl2hDYGBfqU", "annotations": [{"segment": [8.873210438621626, 134.87279866704873], "label": "Beach soccer"}]}, "Ws5jA8cMKas": {"duration": 182.463, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ws5jA8cMKas", "annotations": [{"segment": [0.01, 3.1214368665423464], "label": "Blow-drying hair"}]}, "d3VVwwVkJBs": {"duration": 15.186, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=d3VVwwVkJBs", "annotations": [{"segment": [3.624703427457098, 11.750672549141965], "label": "Blowing leaves"}]}, "fJ4xMCc5SKk": {"duration": 28.33, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fJ4xMCc5SKk", "annotations": [{"segment": [1.2574531684867394, 20.22955360530421], "label": "Tumbling"}]}, "GrqBqKMW4ps": {"duration": 61.49, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GrqBqKMW4ps", "annotations": [{"segment": [9.549205928237129, 44.19506864274571], "label": "Using parallel bars"}]}, "h1t5QZjERms": {"duration": 175.1, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=h1t5QZjERms", "annotations": [{"segment": [2.8018257325007, 79.4429120549306], "label": "Playing badminton"}, {"segment": [93.35286010633249, 150.62911678857546], "label": "Playing badminton"}]}, "IJAR9ERJt4s": {"duration": 31.19, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IJAR9ERJt4s", "annotations": [{"segment": [3.602690064795663, 12.188292868533981], "label": "Playing bagpipes"}]}, "wAqQ-8OxhXY": {"duration": 86.08, "subset": "training", "resolution": "1262x720", "url": "https://www.youtube.com/watch?v=wAqQ-8OxhXY", "annotations": [{"segment": [7.922786006240249, 84.73352491419656], "label": "Walking the dog"}]}, "vWiNs8nEF4g": {"duration": 174.27, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=vWiNs8nEF4g", "annotations": []}, "yA3AD9jU7QU": {"duration": 207.89, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yA3AD9jU7QU", "annotations": [{"segment": [14.65991957834211, 40.56499795444852], "label": "Washing hands"}, {"segment": [57.4032988989177, 84.2798177141281], "label": "Washing hands"}, {"segment": [99.17523778038928, 177.21428638840985], "label": "Washing hands"}]}, "SEebwJMNMQs": {"duration": 237.56, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SEebwJMNMQs", "annotations": [{"segment": [27.05479705928237, 232.7453784301032], "label": "Playing violin"}]}, "w6Avae5on_0": {"duration": 37.85, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=w6Avae5on_0", "annotations": [{"segment": [0, 37.730432878315135], "label": "Playing badminton"}]}, "EmM2yXiiz5c": {"duration": 32.323, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EmM2yXiiz5c", "annotations": [{"segment": [0.2, 31.467492303297675], "label": "Running a marathon"}]}, "L4rKeN_4CLk": {"duration": 113.732, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L4rKeN_4CLk", "annotations": [{"segment": [10.468230185647425, 59.97053902964118], "label": "Surfing"}, {"segment": [66.53536134945398, 97.05291375507021], "label": "Surfing"}]}, "ZYcZZJ0XItM": {"duration": 140.39, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=ZYcZZJ0XItM", "annotations": [{"segment": [7.008441510140406, 17.521103775351015], "label": "Springboard diving"}, {"segment": [23.65349009672387, 32.63305578159127], "label": "Springboard diving"}, {"segment": [38.54642830577223, 51.46824234009361], "label": "Springboard diving"}, {"segment": [55.62950448673947, 71.83652547893915], "label": "Springboard diving"}, {"segment": [77.53088420592823, 84.97735331045241], "label": "Springboard diving"}, {"segment": [84.75833951326054, 101.403388099844], "label": "Springboard diving"}, {"segment": [105.34563644929797, 116.95336770046802], "label": "Springboard diving"}, {"segment": [120.895616049922, 136.66460944773792], "label": "Springboard diving"}]}, "AtbTioB1_Kw": {"duration": 9.43, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AtbTioB1_Kw", "annotations": []}, "OW5Tyr8MICg": {"duration": 15.12, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=OW5Tyr8MICg", "annotations": [{"segment": [3.85584822, 14.82247074102964], "label": "Brushing hair"}]}, "Vvi0HQ6Pu7c": {"duration": 8.104, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vvi0HQ6Pu7c", "annotations": [{"segment": [0.01, 6.7225224490655915], "label": "Doing a powerbomb"}]}, "sahQxLbmM0U": {"duration": 37.32, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sahQxLbmM0U", "annotations": [{"segment": [11.060450472440667, 27.30184870982839], "label": "Clean and jerk"}]}, "k-I4zLbgdpM": {"duration": 95.78, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k-I4zLbgdpM", "annotations": []}, "BII4aKnegaU": {"duration": 106.33, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BII4aKnegaU", "annotations": [{"segment": [1.6582215288611546, 6.467063962558503], "label": "Spinning"}, {"segment": [34.49100780031201, 62.349129485179404], "label": "Spinning"}, {"segment": [79.76045553822154, 86.72498595943837], "label": "Spinning"}]}, "4Oug7S32B-4": {"duration": 113.175, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4Oug7S32B-4", "annotations": [{"segment": [14.83092190202808, 59.676804796255844], "label": "Running a marathon"}, {"segment": [84.74812515444619, 110.87879707706708], "label": "Running a marathon"}]}, "vQ82ItCG1x4": {"duration": 172.27, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vQ82ItCG1x4", "annotations": []}, "mZYqH82delk": {"duration": 53.569, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=mZYqH82delk", "annotations": [{"segment": [5.181346190327614, 40.94934892355695], "label": "Kneeling"}]}, "XptFota2__4": {"duration": 194.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XptFota2__4", "annotations": [{"segment": [2.63679405, 31.76381559828393], "label": "Smoking hookah"}, {"segment": [32.06442905, 132.67852161388456], "label": "Smoking hookah"}]}, "YPSV5JEqhBw": {"duration": 99.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YPSV5JEqhBw", "annotations": [{"segment": [0, 4.950315132605304], "label": "Shot put"}, {"segment": [6.033196567862714, 13.14927457098284], "label": "Shot put"}, {"segment": [15.624432137285492, 29.392496099843996], "label": "Shot put"}, {"segment": [32.02235101404056, 44.398138845553824], "label": "Shot put"}, {"segment": [48.884361934477376, 52.13300624024961], "label": "Shot put"}, {"segment": [57.08332137285491, 62.652425897035876], "label": "Shot put"}, {"segment": [63.58060998439937, 77.19397659906397], "label": "Shot put"}, {"segment": [80.59731825273012, 82.91777847113885], "label": "Shot put"}, {"segment": [83.38187051482059, 96.2217503900156], "label": "Shot put"}]}, "qJdn48DsMXc": {"duration": 61.88, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qJdn48DsMXc", "annotations": []}, "rBCf1qjOwCc": {"duration": 147.052, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rBCf1qjOwCc", "annotations": [{"segment": [16.97630381949972, 103.23427998344424], "label": "Making a lemonade"}, {"segment": [113.0988877301092, 117.68707790171605], "label": "Making a lemonade"}]}, "ao6SHL-wo5k": {"duration": 75.698, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ao6SHL-wo5k", "annotations": []}, "I-1UKECfQko": {"duration": 88.143, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I-1UKECfQko", "annotations": [{"segment": [37.521954758190326, 88.121], "label": "Snow tubing"}]}, "6RxF2UHMYQM": {"duration": 210.57999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6RxF2UHMYQM", "annotations": [{"segment": [145.97883859126364, 193.59848222620903], "label": "Skateboarding"}]}, "HAFNv8zvte4": {"duration": 152.79, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HAFNv8zvte4", "annotations": []}, "4ivePL3RW0Q": {"duration": 163.84, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4ivePL3RW0Q", "annotations": [{"segment": [57.33125548211511, 139.37865778071543], "label": "Baking cookies"}]}, "zuNQFkkyBNo": {"duration": 41.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zuNQFkkyBNo", "annotations": [{"segment": [24.306716559999998, 36.55221528861155], "label": "Smoking a cigarette"}, {"segment": [5.485782680000001, 20.336105579999998], "label": "Smoking a cigarette"}]}, "BSg989GP5ro": {"duration": 235.358, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=BSg989GP5ro", "annotations": [{"segment": [56.90289391575663, 235.301], "label": "Making a lemonade"}]}, "8EfkFxoXI_4": {"duration": 170.74, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8EfkFxoXI_4", "annotations": [{"segment": [27.03953666146646, 84.31540249609985], "label": "Fixing bicycle"}, {"segment": [101.63136193447738, 158.37442901716068], "label": "Fixing bicycle"}]}, "ORL5yMppl8E": {"duration": 165.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ORL5yMppl8E", "annotations": [{"segment": [49.96411700468019, 160.7653712948518], "label": "Preparing pasta"}]}, "ejIEsnkvLWY": {"duration": 227.81, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ejIEsnkvLWY", "annotations": [{"segment": [14.038460218408735, 27.5438143525741], "label": "Playing accordion"}, {"segment": [36.07351170046802, 50.289673946957876], "label": "Playing accordion"}, {"segment": [64.15043213728549, 75.52336193447738], "label": "Playing accordion"}, {"segment": [84.7638673946958, 94.0043728549142], "label": "Playing accordion"}, {"segment": [109.99755538221528, 113.19619188767551], "label": "Playing accordion"}, {"segment": [122.08129329173165, 129.9001825273011], "label": "Playing accordion"}, {"segment": [137.7190717628705, 146.2487691107644], "label": "Playing accordion"}, {"segment": [170.4162449297972, 174.3256895475819], "label": "Playing accordion"}, {"segment": [182.14457878315133, 203.11341809672385], "label": "Playing accordion"}]}, "SB0dU5oDvaE": {"duration": 157.66, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SB0dU5oDvaE", "annotations": []}, "hFzMCWZ3Jps": {"duration": 103.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hFzMCWZ3Jps", "annotations": [{"segment": [17.34518792667707, 93.04801747581904], "label": "Braiding hair"}]}, "QRcwvzq7QUM": {"duration": 127.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QRcwvzq7QUM", "annotations": [{"segment": [28.789302106084243, 60.35826096723869], "label": "Playing polo"}, {"segment": [69.49141887675506, 88.5519223400936], "label": "Playing polo"}, {"segment": [107.81097271450857, 118.33395900156006], "label": "Playing polo"}]}, "f1YrGAkQd0U": {"duration": 166.719, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=f1YrGAkQd0U", "annotations": [{"segment": [46.54027925117005, 63.18037909516381], "label": "Playing ten pins"}, {"segment": [88.66053198127925, 93.87182683307334], "label": "Playing ten pins"}, {"segment": [102.97297347893917, 105.83333385335415], "label": "Playing ten pins"}, {"segment": [109.21375975039003, 112.07412012480499], "label": "Playing ten pins"}, {"segment": [127.41605304212169, 132.3566755070203], "label": "Playing ten pins"}, {"segment": [139.8976255850234, 148.73873946957877], "label": "Playing ten pins"}]}, "G3cINUdHdGo": {"duration": 52.292, "subset": "training", "resolution": "855x470", "url": "https://www.youtube.com/watch?v=G3cINUdHdGo", "annotations": [{"segment": [24.840424043681747, 26.96144547269891], "label": "Throwing darts"}, {"segment": [20.02733695475819, 21.006269921996882], "label": "Throwing darts"}, {"segment": [25.08515728549142, 26.798289978159126], "label": "Throwing darts"}]}, "Qm6HmQv5uOo": {"duration": 32.76, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Qm6HmQv5uOo", "annotations": [{"segment": [0, 18.80938306396256], "label": "Playing accordion"}]}, "UmRGPupwDA0": {"duration": 176.217, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UmRGPupwDA0", "annotations": []}, "iE3I2ICIvHE": {"duration": 100.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iE3I2ICIvHE", "annotations": [{"segment": [0.3561350562906711, 97.1963500095617], "label": "Using the balance beam"}]}, "h8ywxC9bqCE": {"duration": 111.874, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=h8ywxC9bqCE", "annotations": []}, "eHxRr7Zhsek": {"duration": 196.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eHxRr7Zhsek", "annotations": [{"segment": [8.276374287624567, 189.43701028081125], "label": "Doing nails"}]}, "N-LaOcSqZaM": {"duration": 17.14, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N-LaOcSqZaM", "annotations": [{"segment": [0.01, 16.221603744149768], "label": "Volleyball"}, {"segment": [0.010000000000000002, 17.136326530612244], "label": "Playing beach volleyball"}]}, "-zIN0_GSaWM": {"duration": 76.72, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=-zIN0_GSaWM", "annotations": []}, "znIFzXChO1M": {"duration": 7.756, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=znIFzXChO1M", "annotations": [{"segment": [0, 7.755463], "label": "Hitting a pinata"}]}, "FqYhC6_CMfI": {"duration": 90.53999999999999, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=FqYhC6_CMfI", "annotations": []}, "HWV_ccmZVPA": {"duration": 50.318, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HWV_ccmZVPA", "annotations": [{"segment": [13.89426252730109, 34.539409672386896], "label": "Running a marathon"}]}, "bCdIcqRYJvo": {"duration": 124.02, "subset": "training", "resolution": "640x264", "url": "https://www.youtube.com/watch?v=bCdIcqRYJvo", "annotations": [{"segment": [10.540946177847115, 23.70001021450858], "label": "Playing piano"}, {"segment": [28.92368593525741, 53.88124771216849], "label": "Playing piano"}, {"segment": [56.58982030811233, 68.39145804758189], "label": "Playing piano"}, {"segment": [91.99473352652106, 106.69841333307332], "label": "Playing piano"}]}, "6O8QXbjvYKs": {"duration": 75.837, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6O8QXbjvYKs", "annotations": [{"segment": [37.9771584524181, 45.54892836193448], "label": "Playing ten pins"}]}, "AonniE_CsPY": {"duration": 28.097, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AonniE_CsPY", "annotations": [{"segment": [0.01, 19.37362815600624], "label": "Playing blackjack"}]}, "MR0vMF_5hp8": {"duration": 118.608, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MR0vMF_5hp8", "annotations": [{"segment": [4.345886895475819, 104.02388845553823], "label": "Removing ice from car"}]}, "MeHEDK_Y8sE": {"duration": 149.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MeHEDK_Y8sE", "annotations": [{"segment": [84.55110393135725, 112.26895893915756], "label": "Removing curlers"}]}, "D_C28uJcrPU": {"duration": 67.965, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D_C28uJcrPU", "annotations": []}, "mt3G8TrI-uY": {"duration": 153.57999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mt3G8TrI-uY", "annotations": [{"segment": [28.750723057722308, 147.34745567082683], "label": "Doing nails"}]}, "iUe1t0sN4Jo": {"duration": 54.103, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=iUe1t0sN4Jo", "annotations": [{"segment": [23.44611232449298, 28.08473166926677], "label": "Playing ten pins"}]}, "R0YS8JS_0rk": {"duration": 62.67, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R0YS8JS_0rk", "annotations": [{"segment": [2.0012818299531983, 59.89201964274571], "label": "Using the pommel horse"}]}, "V8UMJTrfXdw": {"duration": 7.895, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=V8UMJTrfXdw", "annotations": [{"segment": [0.2, 7.641], "label": "Table soccer"}]}, "C-M4L_WDzK8": {"duration": 209.56, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=C-M4L_WDzK8", "annotations": [{"segment": [0.01, 206.9446761543931], "label": "Playing field hockey"}]}, "Nq3b9OReeEI": {"duration": 91.417, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Nq3b9OReeEI", "annotations": [{"segment": [0.7130808112324494, 18.112252605304214], "label": "Hurling"}, {"segment": [26.526606177847114, 88.7072529173167], "label": "Hurling"}]}, "Vc9S_2EL9mQ": {"duration": 156.735, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Vc9S_2EL9mQ", "annotations": []}, "ZLXhlWhrkno": {"duration": 235.22, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZLXhlWhrkno", "annotations": [{"segment": [142.35604992199688, 212.06648673946958], "label": "Archery"}]}, "zCI36lTmBqY": {"duration": 165.56, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=zCI36lTmBqY", "annotations": [{"segment": [1.807968699700369, 160.1343689547582], "label": "Smoking hookah"}]}, "vYk5m-xNk-Y": {"duration": 99.52000000000001, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vYk5m-xNk-Y", "annotations": []}, "kbK-9Me0BnA": {"duration": 90.883, "subset": "training", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=kbK-9Me0BnA", "annotations": [{"segment": [0.01, 90.841], "label": "Camel ride"}]}, "sfPGQnxbJ3U": {"duration": 30.05, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sfPGQnxbJ3U", "annotations": [{"segment": [2.437479410920437, 29.99974659594384], "label": "Doing kickboxing"}]}, "Tssba1kYoo4": {"duration": 140.41, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=Tssba1kYoo4", "annotations": [{"segment": [23.8764526798759, 129.89666295163806], "label": "Removing curlers"}]}, "bM5VpCdPOrw": {"duration": 177.029, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bM5VpCdPOrw", "annotations": [{"segment": [163.26307201133434, 175.10170991543325], "label": "Camel ride"}]}, "j4-w606GnYw": {"duration": 195.05, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j4-w606GnYw", "annotations": [{"segment": [2.4337098283931358, 193.19553042121683], "label": "Zumba"}]}, "0_ZLjaAEx5s": {"duration": 163.98, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0_ZLjaAEx5s", "annotations": [{"segment": [0.01, 156.0489828549142], "label": "Braiding hair"}]}, "y0Kio7VOk5o": {"duration": 28.58, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y0Kio7VOk5o", "annotations": [{"segment": [14.626035384711388, 27.022492204680187], "label": "Plataform diving"}]}, "IOTpJ2QgoxU": {"duration": 116.22, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IOTpJ2QgoxU", "annotations": []}, "wuHLveuf6PQ": {"duration": 46.812, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wuHLveuf6PQ", "annotations": [{"segment": [0.01, 37.06175016646856], "label": "Rollerblading"}]}, "wtG2efnMFh0": {"duration": 226.3, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wtG2efnMFh0", "annotations": []}, "fvUckFYBQks": {"duration": 106.464, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fvUckFYBQks", "annotations": [{"segment": [19.590761310452418, 90.98082371294852], "label": "Braiding hair"}]}, "_rkRndMV8J8": {"duration": 96.11, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=_rkRndMV8J8", "annotations": []}, "Igm1Mx4Ng1k": {"duration": 139.087, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Igm1Mx4Ng1k", "annotations": [{"segment": [0.2, 122.35921400312012], "label": "Cutting the grass"}]}, "MYi6p113py8": {"duration": 220.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MYi6p113py8", "annotations": [{"segment": [9.299426915852145, 163.256605856071], "label": "Tango"}]}, "Bu0YfgUJmhc": {"duration": 105.721, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Bu0YfgUJmhc", "annotations": []}, "5Yq5GMPBguI": {"duration": 118.05, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5Yq5GMPBguI", "annotations": [{"segment": [0.60178558887736, 41.97453605616708], "label": "Mowing the lawn"}, {"segment": [54.84605842376832, 117.18100246115151], "label": "Mowing the lawn"}]}, "YMOYyt3kSOs": {"duration": 66.43, "subset": "testing", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=YMOYyt3kSOs", "annotations": []}, "iHQ-Kg4Fms8": {"duration": 50.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iHQ-Kg4Fms8", "annotations": [{"segment": [26.61272386583463, 43.74910688923557], "label": "Drinking coffee"}]}, "aeEMrTpNUss": {"duration": 56.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aeEMrTpNUss", "annotations": [{"segment": [38.88658845, 50.0409156], "label": "Using parallel bars"}]}, "OeWhSE4s6Fw": {"duration": 233.384, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=OeWhSE4s6Fw", "annotations": []}, "fZ729TTJq8g": {"duration": 165.489, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fZ729TTJq8g", "annotations": [{"segment": [70.97702808112325, 128.27484711388456], "label": "BMX"}]}, "F79Tzy2i7bI": {"duration": 57.075, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F79Tzy2i7bI", "annotations": [{"segment": [9.784165366614664, 47.94409204368175], "label": "Putting in contact lenses"}]}, "eu3CLlQ9c2s": {"duration": 22.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eu3CLlQ9c2s", "annotations": [{"segment": [0.6738080026602424, 15.213874628705149], "label": "Fixing bicycle"}]}, "rMwNMDgXpYs": {"duration": 143.895, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rMwNMDgXpYs", "annotations": []}, "WwwfCw1dYlg": {"duration": 142.849, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WwwfCw1dYlg", "annotations": [{"segment": [18.05112197140947, 28.07952306663695], "label": "Swimming"}, {"segment": [38.776484234879604, 54.821925987243574], "label": "Swimming"}, {"segment": [57.27331292163251, 65.96459387082966], "label": "Swimming"}, {"segment": [80.45006211949159, 98.94688992823713], "label": "Swimming"}, {"segment": [108.30673086739469, 117.44371845085801], "label": "Swimming"}]}, "GJDl-whUpq4": {"duration": 109.645, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GJDl-whUpq4", "annotations": [{"segment": [0.01, 94.74264274570983], "label": "Knitting"}]}, "Iiwz1JtC7rk": {"duration": 106.116, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Iiwz1JtC7rk", "annotations": [{"segment": [6.9493946957878325, 12.90601872074883], "label": "Braiding hair"}, {"segment": [24.81926677067083, 26.639346333853354], "label": "Braiding hair"}, {"segment": [33.75483931357254, 45.33738221528861], "label": "Braiding hair"}, {"segment": [49.8049344773791, 52.4523728549142], "label": "Braiding hair"}, {"segment": [54.768881435257406, 60.394687987519504], "label": "Braiding hair"}, {"segment": [62.88733229329174, 70.50000936037442], "label": "Braiding hair"}, {"segment": [72.1549391575663, 82.08451794071763], "label": "Braiding hair"}, {"segment": [83.07747581903277, 86.05634945397816], "label": "Braiding hair"}]}, "1U8y7e22SQg": {"duration": 141.062, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1U8y7e22SQg", "annotations": [{"segment": [14.150066941498979, 113.46419902249586], "label": "Mooping floor"}]}, "6qojVSLbyUU": {"duration": 139.134, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6qojVSLbyUU", "annotations": [{"segment": [26.904904836193445, 118.26840093603744], "label": "Trimming branches or hedges"}]}, "gUFRtx51OJc": {"duration": 65.13, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gUFRtx51OJc", "annotations": [{"segment": [36.68249828, 48.14215736], "label": "Getting a piercing"}, {"segment": [12.02778068798752, 33.13541444], "label": "Getting a piercing"}]}, "jmerKGN0VPs": {"duration": 47.183, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jmerKGN0VPs", "annotations": [{"segment": [1.030516801624446, 46.52047093915757], "label": "Hand car wash"}]}, "OrVgspx6Axs": {"duration": 186.921, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=OrVgspx6Axs", "annotations": [{"segment": [13.797756518937346, 164.3098515345612], "label": "BMX"}]}, "KNyM0KvDHMM": {"duration": 189.87, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KNyM0KvDHMM", "annotations": [{"segment": [43.82777935725429, 170.27684547581902], "label": "Fixing bicycle"}, {"segment": [134.23168693, 154.11476861], "label": "Fixing bicycle"}]}, "B7Q1egUKmZg": {"duration": 83.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B7Q1egUKmZg", "annotations": [{"segment": [3.49609984399376, 77.82059282371296], "label": "Layup drill in basketball"}]}, "fjoMc-eo-jA": {"duration": 128.45, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fjoMc-eo-jA", "annotations": []}, "WgL0EbRWjF8": {"duration": 29.304, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WgL0EbRWjF8", "annotations": []}, "IsVpeZoK5FI": {"duration": 70.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IsVpeZoK5FI", "annotations": [{"segment": [7.271429017160687, 63.790263650546024], "label": "Doing step aerobics"}]}, "59LX-HXwcVk": {"duration": 20.95, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=59LX-HXwcVk", "annotations": []}, "gV6W0rAHyZg": {"duration": 118.42, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gV6W0rAHyZg", "annotations": [{"segment": [22.3638408, 104.1821928], "label": "Playing lacrosse"}]}, "QLVgy-1ydr0": {"duration": 215.389, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QLVgy-1ydr0", "annotations": [{"segment": [0.01, 103.80222411870945], "label": "Bullfighting"}, {"segment": [107.15717640076453, 190.35999166301542], "label": "Bullfighting"}]}, "sEENChh3zno": {"duration": 69.89, "subset": "training", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=sEENChh3zno", "annotations": [{"segment": [0.8722878427457098, 13.629497542901715], "label": "Washing dishes"}]}, "R58R070r2GE": {"duration": 84.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=R58R070r2GE", "annotations": [{"segment": [4.490529183072085, 72.37676316380656], "label": "Using the balance beam"}, {"segment": [75.67862279719189, 77.52766419188768], "label": "Using the balance beam"}]}, "dtvIQ9Pd-Bg": {"duration": 169.901, "subset": "training", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=dtvIQ9Pd-Bg", "annotations": [{"segment": [14.131903415961757, 158.89128445801782], "label": "Sharpening knives"}]}, "UPfQNZl0_dg": {"duration": 127.59, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UPfQNZl0_dg", "annotations": [{"segment": [29.646815912636505, 48.35017628705148], "label": "Putting on makeup"}, {"segment": [69.043255850234, 97.51152886115446], "label": "Putting on makeup"}, {"segment": [104.07863182527302, 108.05869422776911], "label": "Putting on makeup"}]}, "FI_1dsErIXE": {"duration": 151.57999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FI_1dsErIXE", "annotations": [{"segment": [27.76017806, 121.76795807], "label": "Polishing forniture"}]}, "mFWRIp164r4": {"duration": 161.449, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mFWRIp164r4", "annotations": [{"segment": [77.30125897035882, 158.3990780031201], "label": "Tug of war"}]}, "PaAJ-6HT6bw": {"duration": 33.76, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PaAJ-6HT6bw", "annotations": [{"segment": [16.912283931357255, 32.879160686427454], "label": "Hammer throw"}]}, "Ho0eNR4jvEg": {"duration": 141.061, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ho0eNR4jvEg", "annotations": [{"segment": [10.78315132605304, 16.28475914546786], "label": "Scuba diving"}, {"segment": [25.52746028208475, 29.928746537616604], "label": "Scuba diving"}, {"segment": [87.3655321723073, 141.061223], "label": "Scuba diving"}]}, "wO9DGYTAX70": {"duration": 150.28, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wO9DGYTAX70", "annotations": [{"segment": [14.54828838, 56.19340433], "label": "Grooming horse"}, {"segment": [64.5284554, 135.42573002808112], "label": "Grooming horse"}]}, "ps0a-GGomX4": {"duration": 32.6, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ps0a-GGomX4", "annotations": [{"segment": [11.757516, 16.454524], "label": "Javelin throw"}]}, "CNdCnkKhitI": {"duration": 195.91, "subset": "validation", "resolution": "642x480", "url": "https://www.youtube.com/watch?v=CNdCnkKhitI", "annotations": [{"segment": [33.00762966028845, 76.1009238776911], "label": "Cricket"}, {"segment": [83.1303264848674, 136.61491153946957], "label": "Cricket"}, {"segment": [176.0406913797192, 186.4319821903276], "label": "Cricket"}]}, "uR-o9wILN6U": {"duration": 191.472, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uR-o9wILN6U", "annotations": [{"segment": [7.766400070202808, 152.04221675897037], "label": "Slacklining"}, {"segment": [164.58793994929798, 172.65304771450857], "label": "Slacklining"}]}, "KhpFBHrVsCE": {"duration": 16.72, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KhpFBHrVsCE", "annotations": []}, "xSWpGhhM1H8": {"duration": 66.386, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xSWpGhhM1H8", "annotations": [{"segment": [50.521322932917315, 60.770525741029644], "label": "Tug of war"}]}, "PvrozrgcGf0": {"duration": 53.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PvrozrgcGf0", "annotations": [{"segment": [0.3551231727694574, 52.13509568211525], "label": "Putting on makeup"}]}, "6TUA9ipKk9I": {"duration": 65.57, "subset": "validation", "resolution": "418x240", "url": "https://www.youtube.com/watch?v=6TUA9ipKk9I", "annotations": [{"segment": [9.244836193447739, 43.568205928237134], "label": "Plataform diving"}]}, "NBXH7A2EO7Q": {"duration": 100.195, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=NBXH7A2EO7Q", "annotations": [{"segment": [0.01, 100.141], "label": "Assembling bicycle"}]}, "Yf49q7bGpkk": {"duration": 188.036, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Yf49q7bGpkk", "annotations": [{"segment": [0.01, 182.2614060543096], "label": "Bullfighting"}]}, "u0WgmhffTyI": {"duration": 224.282, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=u0WgmhffTyI", "annotations": [{"segment": [3.6738745264096275, 97.79504096490389], "label": "Table soccer"}]}, "WXEq3OeD68o": {"duration": 78.07, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=WXEq3OeD68o", "annotations": [{"segment": [1.2178702964118566, 62.23317214664587], "label": "Snatch"}]}, "0UTrTg3DF5g": {"duration": 86.52, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0UTrTg3DF5g", "annotations": []}, "98ukqD62X5s": {"duration": 173.66, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=98ukqD62X5s", "annotations": []}, "bqsWU8HqpxE": {"duration": 232.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bqsWU8HqpxE", "annotations": [{"segment": [0, 45.398452418096724], "label": "Pole vault"}]}, "4KgCkxpsN2g": {"duration": 190.845, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4KgCkxpsN2g", "annotations": [{"segment": [8.779172386895475, 14.433554602184087], "label": "Rafting"}, {"segment": [45.68145631825273, 48.35984789391575], "label": "Rafting"}, {"segment": [92.99970748829953, 119.48602418096723], "label": "Rafting"}, {"segment": [150.74972932917316, 160.57152808112323], "label": "Rafting"}, {"segment": [168.60754524180967, 179.91749531981282], "label": "Rafting"}]}, "zU6SnkNIdrw": {"duration": 177.935, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zU6SnkNIdrw", "annotations": [{"segment": [13.04664208064921, 17.76564028003297], "label": "Baking cookies"}, {"segment": [28.313989196302543, 76.05914744889115], "label": "Baking cookies"}]}, "Cb3IonOw0bs": {"duration": 72.33, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Cb3IonOw0bs", "annotations": [{"segment": [13.084112324492981, 69.70673634945398], "label": "Shaving legs"}]}, "Fp_uU-qMDSc": {"duration": 120.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fp_uU-qMDSc", "annotations": [{"segment": [31.78098133853354, 51.46831491107644], "label": "Tai chi"}]}, "BI_3bkY3mO4": {"duration": 227.347, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BI_3bkY3mO4", "annotations": []}, "2imjxY43yYM": {"duration": 188.36, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2imjxY43yYM", "annotations": [{"segment": [0.01, 181.89551087621737], "label": "Cumbia"}]}, "T-Ngg5bptUc": {"duration": 126.48, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=T-Ngg5bptUc", "annotations": [{"segment": [35.30879719188768, 117.76174258970359], "label": "Decorating the Christmas tree"}]}, "L1oWKMy5ofM": {"duration": 136.44, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L1oWKMy5ofM", "annotations": [{"segment": [39.2152277, 121.22088], "label": "Cumbia"}]}, "W2Mh01F3K5c": {"duration": 41.03, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W2Mh01F3K5c", "annotations": []}, "vu65aIIJHtU": {"duration": 132.029, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vu65aIIJHtU", "annotations": [{"segment": [27.178614664586583, 75.56478471138846], "label": "Snowboarding"}]}, "k9VKFjPYT3o": {"duration": 160.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k9VKFjPYT3o", "annotations": [{"segment": [20.126674425117002, 92.85475819999998], "label": "Rock climbing"}, {"segment": [103.63362172932916, 150.88755298829952], "label": "Rock climbing"}]}, "tN8mcdcwCwc": {"duration": 46.905, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tN8mcdcwCwc", "annotations": [{"segment": [0, 42.511422776911076], "label": "Clipping cat claws"}]}, "gJKw6jGjiDE": {"duration": 105.49000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gJKw6jGjiDE", "annotations": [{"segment": [17.855656092910383, 99.81064903548523], "label": "Using the balance beam"}]}, "WQAVlmS5IWs": {"duration": 92.16, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=WQAVlmS5IWs", "annotations": [{"segment": [13.51488299531981, 27.317316692667706], "label": "Washing face"}, {"segment": [52.478003120124804, 56.07238689547582], "label": "Washing face"}]}, "c4AVX65S8Qc": {"duration": 67.571, "subset": "testing", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=c4AVX65S8Qc", "annotations": []}, "cuyD4bT4Bvc": {"duration": 89.003, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cuyD4bT4Bvc", "annotations": [{"segment": [15.963112324492982, 82.45294539781591], "label": "Rollerblading"}]}, "xDD9rWISPpk": {"duration": 12.841, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xDD9rWISPpk", "annotations": [{"segment": [0.01, 12.726566708751928], "label": "Powerbocking"}]}, "cT4EquMmRiw": {"duration": 176.82, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cT4EquMmRiw", "annotations": [{"segment": [23.520849797204278, 78.05396661963418], "label": "Canoeing"}, {"segment": [132.31166366013264, 155.17150556044416], "label": "Canoeing"}]}, "pgmfi9ADZuU": {"duration": 11.076, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pgmfi9ADZuU", "annotations": []}, "29k1TypoU4w": {"duration": 86.704, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=29k1TypoU4w", "annotations": [{"segment": [3.5827090483619344, 82.55655304212168], "label": "Kneeling"}]}, "T84iinm8-V0": {"duration": 165.536, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T84iinm8-V0", "annotations": [{"segment": [9.125756468122251, 22.273032169056833], "label": "Doing fencing"}, {"segment": [27.94440600083253, 67.38623310363627], "label": "Doing fencing"}, {"segment": [75.11992469242132, 90.32951815036525], "label": "Doing fencing"}, {"segment": [97.80542001952412, 115.33512095410356], "label": "Doing fencing"}]}, "vxwoaEGTzc0": {"duration": 143.407, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vxwoaEGTzc0", "annotations": []}, "pEmtlUeJaI8": {"duration": 207.866, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pEmtlUeJaI8", "annotations": []}, "VR19Scunfhg": {"duration": 137.672, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VR19Scunfhg", "annotations": [{"segment": [0.01, 113.28583272589329], "label": "Slacklining"}]}, "S24MDjduWIY": {"duration": 206.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=S24MDjduWIY", "annotations": [{"segment": [17.429055366296286, 40.02227528556925], "label": "Layup drill in basketball"}, {"segment": [45.509200123106964, 53.25544695257198], "label": "Layup drill in basketball"}, {"segment": [59.06513207467074, 64.8748171967695], "label": "Layup drill in basketball"}, {"segment": [71.33002288799035, 76.49418692199689], "label": "Layup drill in basketball"}, {"segment": [82.94939256942277, 92.95496132293292], "label": "Layup drill in basketball"}, {"segment": [101.02396838221529, 107.1564137472699], "label": "Layup drill in basketball"}, {"segment": [113.6116193946958, 140.07796254914194], "label": "Layup drill in basketball"}, {"segment": [145.56488734945398, 152.66561356162248], "label": "Layup drill in basketball"}, {"segment": [157.8297780795632, 174.9360730452418], "label": "Layup drill in basketball"}]}, "DzfQl845nUw": {"duration": 197.9, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=DzfQl845nUw", "annotations": []}, "INMUrGdA_RA": {"duration": 201.503, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=INMUrGdA_RA", "annotations": []}, "BLTOTjVYiuE": {"duration": 18.81, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BLTOTjVYiuE", "annotations": [{"segment": [0.11694851794071763, 10.028335413416537], "label": "Triple jump"}]}, "OsrRpGbIpKA": {"duration": 183.345, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OsrRpGbIpKA", "annotations": [{"segment": [34.46649290189294, 160.8913050399567], "label": "Hanging wallpaper"}]}, "3hZjxdMcG6o": {"duration": 11.24, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=3hZjxdMcG6o", "annotations": [{"segment": [9.362459550702027, 10.677411734789391], "label": "Layup drill in basketball"}]}, "8RMrbKCQheM": {"duration": 36.363, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8RMrbKCQheM", "annotations": [{"segment": [1.0179215035547657, 3.95858611776748], "label": "Painting fence"}, {"segment": [12.724028717824611, 13.45919487137779], "label": "Painting fence"}, {"segment": [19.62328031270829, 33.195578532151586], "label": "Painting fence"}]}, "p7VTBhv3OLY": {"duration": 138.368, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p7VTBhv3OLY", "annotations": [{"segment": [19.744729329173165, 27.513147425897035], "label": "Hurling"}, {"segment": [29.23946255850234, 33.33946099843994], "label": "Hurling"}, {"segment": [62.039450078003114, 65.27629095163806], "label": "Hurling"}, {"segment": [76.49733931357255, 79.73418018720749], "label": "Hurling"}]}, "wU-8acM-IUM": {"duration": 210.03, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wU-8acM-IUM", "annotations": [{"segment": [8.846585369578783, 26.86740741872075], "label": "Playing saxophone"}, {"segment": [46.52648601778471, 65.20261068689547], "label": "Playing saxophone"}, {"segment": [107.469629674883, 118.60977421435258], "label": "Playing saxophone"}, {"segment": [157.9279314124805, 202.16085826037443], "label": "Playing saxophone"}]}, "s9PkkDEl6aY": {"duration": 204.87, "subset": "testing", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=s9PkkDEl6aY", "annotations": []}, "gSkE0KCvves": {"duration": 14.722, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gSkE0KCvves", "annotations": [{"segment": [0.2, 12.26404810296412], "label": "Calf roping"}]}, "S9QG5MAga68": {"duration": 119.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=S9QG5MAga68", "annotations": [{"segment": [82.27344912081915, 94.95176388094376], "label": "Removing curlers"}]}, "0N8iIUS660o": {"duration": 128.987, "subset": "validation", "resolution": "620x348", "url": "https://www.youtube.com/watch?v=0N8iIUS660o", "annotations": [{"segment": [12.251403426791278, 98.42817757009347], "label": "Bullfighting"}]}, "ac1DjqY4xHs": {"duration": 30.604, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ac1DjqY4xHs", "annotations": [{"segment": [0.01, 30.6039], "label": "Hitting a pinata"}]}, "RPKcM8q74FE": {"duration": 75.256, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RPKcM8q74FE", "annotations": [{"segment": [0.2, 67.64835685048385], "label": "Riding bumper cars"}]}, "wHxB-5jKjbQ": {"duration": 196.719, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wHxB-5jKjbQ", "annotations": [{"segment": [5.522461778471139, 191.15797659906397], "label": "Canoeing"}]}, "wLKePf07V14": {"duration": 197.23, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wLKePf07V14", "annotations": [{"segment": [22.6696162, 98.5597756], "label": "Wrapping presents"}, {"segment": [22.6696162, 98.5597756], "label": "Wrapping presents"}]}, "0vCynjlkM5A": {"duration": 13.143, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0vCynjlkM5A", "annotations": [{"segment": [0.01, 13.142494], "label": "Changing car wheel"}]}, "D32TzYSHM08": {"duration": 63.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D32TzYSHM08", "annotations": [{"segment": [4.183365279999999, 47.80430112], "label": "Playing kickball"}]}, "p9lhwYsBWdc": {"duration": 55.54, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=p9lhwYsBWdc", "annotations": []}, "GI8tylrKKlA": {"duration": 199.367, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GI8tylrKKlA", "annotations": [{"segment": [15.96174438443798, 182.41105831590218], "label": "Clipping cat claws"}]}, "y0ca-LbQErY": {"duration": 77.65, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y0ca-LbQErY", "annotations": []}, "KPRymTsPCC0": {"duration": 105.257, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KPRymTsPCC0", "annotations": []}, "8ao9LZLn0Hs": {"duration": 138.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8ao9LZLn0Hs", "annotations": [{"segment": [0.01, 138.36721088435374], "label": "Playing violin"}]}, "L61yQxW8HVE": {"duration": 105.74000000000001, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=L61yQxW8HVE", "annotations": [{"segment": [16.991573115985865, 25.40487630933809], "label": "Playing flauta"}, {"segment": [46.68558438664078, 56.2536546849629], "label": "Playing flauta"}, {"segment": [87.7622999777134, 94.5259345647426], "label": "Playing flauta"}]}, "V3ZDyR7mVpc": {"duration": 213.531, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V3ZDyR7mVpc", "annotations": [{"segment": [37.963422776911074, 68.93358346333854], "label": "Waterskiing"}, {"segment": [98.90470670826834, 116.55436817472699], "label": "Waterskiing"}, {"segment": [157.84939469578782, 172.50208112324492], "label": "Waterskiing"}, {"segment": [186.50446177847115, 196.8288159126365], "label": "Waterskiing"}]}, "UGjF8G0HLZA": {"duration": 72.73, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UGjF8G0HLZA", "annotations": [{"segment": [23.598725990639625, 72.72492], "label": "Paintball"}]}, "YULJjXegG1w": {"duration": 116.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YULJjXegG1w", "annotations": [{"segment": [6.353224024960999, 116.3547619047619], "label": "Layup drill in basketball"}]}, "N1JcXEim40g": {"duration": 47.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N1JcXEim40g", "annotations": [{"segment": [13.866369737909517, 29.66068393135726], "label": "Using uneven bars"}]}, "ziL8fjQI4O4": {"duration": 19.83, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ziL8fjQI4O4", "annotations": []}, "iwKW9WiQlLA": {"duration": 13.05, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iwKW9WiQlLA", "annotations": []}, "iiQQ8xZvZok": {"duration": 26.1, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=iiQQ8xZvZok", "annotations": [{"segment": [2.117254148669348, 22.516182658346334], "label": "Throwing darts"}]}, "cFcrXdvfxoo": {"duration": 15.047, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=cFcrXdvfxoo", "annotations": [{"segment": [0, 14.997566302652107], "label": "Kneeling"}]}, "pcaPtKtS03E": {"duration": 231.225, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pcaPtKtS03E", "annotations": [{"segment": [1.6232595752809706, 10.28064397677948], "label": "Decorating the Christmas tree"}, {"segment": [21.463098828715054, 40.220765031961825], "label": "Decorating the Christmas tree"}, {"segment": [48.87814943346033, 53.92829033433447], "label": "Decorating the Christmas tree"}, {"segment": [100.10100714232651, 106.95476979351284], "label": "Decorating the Christmas tree"}]}, "Kkkrap77n5M": {"duration": 37.57, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Kkkrap77n5M", "annotations": [{"segment": [10.667268439937597, 12.132552566302651], "label": "Chopping wood"}]}, "YeZz5PZiiwU": {"duration": 174.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YeZz5PZiiwU", "annotations": [{"segment": [57.56234002285262, 67.92901737293981], "label": "Table soccer"}, {"segment": [99.84747131926093, 116.21590924045125], "label": "Table soccer"}]}, "w4QM4nztkkE": {"duration": 131.983, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w4QM4nztkkE", "annotations": [{"segment": [36.021333853354136, 84.6114992199688], "label": "Tug of war"}]}, "BP9MfTepAv4": {"duration": 132.38, "subset": "validation", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=BP9MfTepAv4", "annotations": [{"segment": [19.402722308892358, 127.195624024961], "label": "Playing accordion"}]}, "BSlVLi81VGM": {"duration": 76.63, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=BSlVLi81VGM", "annotations": [{"segment": [0.01, 46.85245949453978], "label": "Shaving legs"}]}, "yVx2NK2OOts": {"duration": 36.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yVx2NK2OOts", "annotations": []}, "bFm6E4cz5tM": {"duration": 179.769, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bFm6E4cz5tM", "annotations": [{"segment": [10.192504300787466, 12.15260119954136], "label": "Wakeboarding"}, {"segment": [22.51311337866909, 138.7188580905071], "label": "Wakeboarding"}]}, "7ezC4N3ArmQ": {"duration": 111.712, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=7ezC4N3ArmQ", "annotations": [{"segment": [0.01, 111.71120181405895], "label": "Beer pong"}]}, "FkRzwWhf8zQ": {"duration": 165.512, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FkRzwWhf8zQ", "annotations": []}, "bivzXu38CTE": {"duration": 219.61, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bivzXu38CTE", "annotations": []}, "mdXSm8Yr6aU": {"duration": 120.071, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mdXSm8Yr6aU", "annotations": [{"segment": [0.2, 114.84181077429446], "label": "Swinging at the playground"}]}, "UgTqwLydrmU": {"duration": 25.287, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UgTqwLydrmU", "annotations": []}, "wOHj64rrjAo": {"duration": 52.269, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wOHj64rrjAo", "annotations": []}, "kW_2ihjROgw": {"duration": 117.38, "subset": "training", "resolution": "640x320", "url": "https://www.youtube.com/watch?v=kW_2ihjROgw", "annotations": [{"segment": [20.325705583325373, 55.84991173796611], "label": "Smoking a cigarette"}, {"segment": [72.8795557878315, 108.22064691107643], "label": "Smoking a cigarette"}]}, "DEEgoxzTvdk": {"duration": 140.597, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DEEgoxzTvdk", "annotations": [{"segment": [33.984173166926674, 76.95770826833073], "label": "Wakeboarding"}, {"segment": [101.29476131045242, 124.99184087363496], "label": "Wakeboarding"}]}, "VFC-V06hVj0": {"duration": 108.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VFC-V06hVj0", "annotations": []}, "Gi55CA6ktE8": {"duration": 109.134, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gi55CA6ktE8", "annotations": [{"segment": [0.01, 96.84628549141966], "label": "Sumo"}]}, "dfjl7sS1IGo": {"duration": 206.17, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=dfjl7sS1IGo", "annotations": [{"segment": [19.93488611544462, 50.158745709828395], "label": "Blow-drying hair"}, {"segment": [59.483127925116996, 104.49738689547581], "label": "Blow-drying hair"}, {"segment": [107.0696302652106, 118.64472542901716], "label": "Blow-drying hair"}, {"segment": [125.40903276131047, 164.6395507020281], "label": "Blow-drying hair"}]}, "pV1b0Fs6qW8": {"duration": 31.324, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pV1b0Fs6qW8", "annotations": [{"segment": [0.2, 14.318017287051482], "label": "Kite flying"}]}, "R8Boo7g8o7I": {"duration": 64.67, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R8Boo7g8o7I", "annotations": []}, "fUa3pwpNZ6I": {"duration": 154.02, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fUa3pwpNZ6I", "annotations": [{"segment": [18.880016100000002, 60.640743900000004], "label": "Using the balance beam"}, {"segment": [120.37909041809671, 147.29018448361933], "label": "Using the balance beam"}]}, "fV3gpTBX2Q8": {"duration": 181.79, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fV3gpTBX2Q8", "annotations": []}, "o4Y6P16eQoY": {"duration": 53.15, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o4Y6P16eQoY", "annotations": []}, "NJQROeaBiVE": {"duration": 88.654, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NJQROeaBiVE", "annotations": [{"segment": [7.88336368556783, 71.08857674882995], "label": "Kneeling"}]}, "_hzMRy2_q4g": {"duration": 159.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_hzMRy2_q4g", "annotations": [{"segment": [10.91563806552262, 121.32768954758191], "label": "Making a lemonade"}]}, "wjZhqbB0i_Y": {"duration": 44.4, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=wjZhqbB0i_Y", "annotations": []}, "1wjnveHAhGE": {"duration": 131.076, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1wjnveHAhGE", "annotations": [{"segment": [66.7349953271028, 128.57200934579438], "label": "Elliptical trainer"}]}, "jdAJ7cJBxnQ": {"duration": 9.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jdAJ7cJBxnQ", "annotations": [{"segment": [5.999260906396255, 8.138648212168487], "label": "Playing water polo"}]}, "z2GmaQXZU90": {"duration": 82.62, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=z2GmaQXZU90", "annotations": []}, "IToczVhbOc0": {"duration": 15.12, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=IToczVhbOc0", "annotations": []}, "kYrztBFfaZ8": {"duration": 33.76, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kYrztBFfaZ8", "annotations": [{"segment": [0, 7.742485413416537], "label": "Cricket"}, {"segment": [17.697109516380657, 33.603440093603744], "label": "Cricket"}]}, "RVKYgN2S9KA": {"duration": 79.715, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RVKYgN2S9KA", "annotations": [{"segment": [0.2, 76.94623868954758], "label": "Doing crunches"}]}, "jTpyOsXB2kk": {"duration": 59.37, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jTpyOsXB2kk", "annotations": []}, "-tQr3fi7nWE": {"duration": 39.13, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-tQr3fi7nWE", "annotations": []}, "aKLKf4Qd_4U": {"duration": 21.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aKLKf4Qd_4U", "annotations": [{"segment": [2.1020335413416538, 21.276680967238686], "label": "Mowing the lawn"}]}, "PlUAKvaRd8s": {"duration": 29.118, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PlUAKvaRd8s", "annotations": [{"segment": [0.2, 28.573563644859814], "label": "Snow tubing"}]}, "ZGL-PmMopeM": {"duration": 133.144, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZGL-PmMopeM", "annotations": [{"segment": [42.80592496378426, 67.93904448720464], "label": "Skiing"}, {"segment": [71.88556738757644, 82.89428916229778], "label": "Skiing"}, {"segment": [89.33335284185176, 91.20275777230664], "label": "Skiing"}, {"segment": [93.07216334328949, 121.94409382846891], "label": "Skiing"}]}, "bnItfkwBogY": {"duration": 73.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bnItfkwBogY", "annotations": [{"segment": [12.86913104524181, 54.09590483619345], "label": "Hula hoop"}]}, "QQtzcCmoHOA": {"duration": 107.97, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QQtzcCmoHOA", "annotations": []}, "G-fsWVYrAgc": {"duration": 169.645, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G-fsWVYrAgc", "annotations": [{"segment": [34.39240249609984, 151.34441809672387], "label": "Making a cake"}]}, "slFZFCU1de0": {"duration": 183.786, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=slFZFCU1de0", "annotations": []}, "xzbIb0CCjTc": {"duration": 144.07999999999998, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xzbIb0CCjTc", "annotations": []}, "iyGv338SnrQ": {"duration": 70.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iyGv338SnrQ", "annotations": [{"segment": [1.8847882914860095, 60.59098124475704], "label": "Playing harmonica"}]}, "eKiRykHu734": {"duration": 151.302, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eKiRykHu734", "annotations": [{"segment": [43.18942745709828, 118.23990795631825], "label": "Wakeboarding"}]}, "0D7IzGA1yNE": {"duration": 90.349, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=0D7IzGA1yNE", "annotations": []}, "12qgT1JASV4": {"duration": 159.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=12qgT1JASV4", "annotations": [{"segment": [2.9824419752300293, 27.3390514396086], "label": "Doing kickboxing"}, {"segment": [32.309788064991984, 54.42956575975039], "label": "Doing kickboxing"}, {"segment": [74.31251215600624, 137.93794062402495], "label": "Doing kickboxing"}]}, "YvMAOuGCGFA": {"duration": 114.381, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=YvMAOuGCGFA", "annotations": []}, "3zpODTqnYLs": {"duration": 31.42, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3zpODTqnYLs", "annotations": []}, "r9OvUc28Qi8": {"duration": 69.567, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r9OvUc28Qi8", "annotations": [{"segment": [9.659066282371294, 38.31067862558502], "label": "Arm wrestling"}, {"segment": [44.49682219968798, 61.4273204024961], "label": "Arm wrestling"}]}, "9Zn0zErRckc": {"duration": 23.04, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9Zn0zErRckc", "annotations": [{"segment": [2.239274570982839, 14.38509984399376], "label": "Long jump"}]}, "dvzwVrZkDiI": {"duration": 10.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dvzwVrZkDiI", "annotations": [{"segment": [0.01, 9.560864274570983], "label": "Drinking coffee"}]}, "Alv7N6Ynm1Y": {"duration": 197.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Alv7N6Ynm1Y", "annotations": [{"segment": [86.07840045846731, 111.37742855020323], "label": "Playing saxophone"}, {"segment": [117.54792320672419, 133.28268458085262], "label": "Playing saxophone"}]}, "z8lxaUC1Shk": {"duration": 75.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z8lxaUC1Shk", "annotations": [{"segment": [10.120904836193448, 69.32527301092044], "label": "Playing bagpipes"}]}, "o4_Vi7asvxg": {"duration": 88.561, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o4_Vi7asvxg", "annotations": [{"segment": [0.01, 73.29986419554477], "label": "BMX"}]}, "9I4H8O6B7yM": {"duration": 139.71, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9I4H8O6B7yM", "annotations": [{"segment": [13.07653666146646, 135.34215444617786], "label": "Doing motocross"}]}, "IcDadC2tw5c": {"duration": 30.256, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=IcDadC2tw5c", "annotations": [{"segment": [0.2, 30.05560090702948], "label": "Blowing leaves"}]}, "IIAaKRUze8A": {"duration": 188.268, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IIAaKRUze8A", "annotations": [{"segment": [52.27859302011016, 54.624598205154065], "label": "Fixing the roof"}, {"segment": [98.02569412846628, 105.06370968359799], "label": "Fixing the roof"}, {"segment": [130.57651607095045, 138.78753340786784], "label": "Fixing the roof"}]}, "4HC2-Unzuu4": {"duration": 71.38, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4HC2-Unzuu4", "annotations": [{"segment": [13.682358814352575, 61.181279251170054], "label": "Mixing drinks"}]}, "PmyAt4zuLRY": {"duration": 15.535, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PmyAt4zuLRY", "annotations": [{"segment": [0.01, 15.180879250628083], "label": "Beer pong"}]}, "K_AFdv3PKp0": {"duration": 78.53, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=K_AFdv3PKp0", "annotations": [{"segment": [2.327718900156006, 77.30476978939157], "label": "Plastering"}]}, "KRz3aBw_TlQ": {"duration": 188.338, "subset": "training", "resolution": "568x480", "url": "https://www.youtube.com/watch?v=KRz3aBw_TlQ", "annotations": [{"segment": [9.989769174440447, 18.21663790633258], "label": "Throwing darts"}, {"segment": [30.263124263746064, 36.72709255308988], "label": "Throwing darts"}, {"segment": [45.24777802540673, 52.005563055175266], "label": "Throwing darts"}, {"segment": [61.99533222961572, 71.39746792320673], "label": "Throwing darts"}, {"segment": [77.56761947212583, 88.43883886784043], "label": "Throwing darts"}, {"segment": [99.89769174440447, 108.41837686427458], "label": "Throwing darts"}, {"segment": [114.58852839313573, 122.22776361934478], "label": "Throwing darts"}, {"segment": [131.33608254290172, 139.85676798751953], "label": "Throwing darts"}, {"segment": [146.32073625585022, 153.07852126365057], "label": "Throwing darts"}, {"segment": [167.7693582371295, 173.64569302652106], "label": "Throwing darts"}]}, "HQUjviIH9PU": {"duration": 193.585, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HQUjviIH9PU", "annotations": []}, "C_fqFJyf5SU": {"duration": 172.85, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=C_fqFJyf5SU", "annotations": [{"segment": [89.19032796705103, 167.53781403312857], "label": "Hula hoop"}]}, "jwyLw2n_YVw": {"duration": 171.875, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jwyLw2n_YVw", "annotations": [{"segment": [34.589310070362, 103.76792924180967], "label": "Baking cookies"}]}, "e0a1lp4ZWu8": {"duration": 89.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e0a1lp4ZWu8", "annotations": [{"segment": [5.06187013, 75.59172098829953], "label": "Horseback riding"}]}, "FNlEHAIh6LQ": {"duration": 173.825, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FNlEHAIh6LQ", "annotations": [{"segment": [71.31960167114168, 112.53853495636424], "label": "Braiding hair"}]}, "763huDov1lM": {"duration": 184.065, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=763huDov1lM", "annotations": []}, "FrvK75jGdjE": {"duration": 140.34, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=FrvK75jGdjE", "annotations": [{"segment": [0.2, 136.18149801248052], "label": "Playing violin"}]}, "-ROljbRVlos": {"duration": 7.222, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=-ROljbRVlos", "annotations": [{"segment": [0.01, 6.880207805277031], "label": "Powerbocking"}]}, "uZw4m7-9A7o": {"duration": 61.88, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uZw4m7-9A7o", "annotations": []}, "CHaTWk6uqd8": {"duration": 230.76, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CHaTWk6uqd8", "annotations": [{"segment": [9.89599581123245, 188.02392041341653], "label": "Fixing bicycle"}]}, "_LAABOgXU1s": {"duration": 129.684, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_LAABOgXU1s", "annotations": [{"segment": [27.51474545512433, 129.48346938775512], "label": "Changing car wheel"}]}, "RZqM8Z0ocjE": {"duration": 62.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RZqM8Z0ocjE", "annotations": [{"segment": [14.701807332293292, 24.958882215288607], "label": "Dodgeball"}, {"segment": [34.141406396255846, 42.933184867394694], "label": "Dodgeball"}]}, "chMp_uvII5g": {"duration": 45.419, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=chMp_uvII5g", "annotations": [{"segment": [3.3301979121684866, 45.4182311], "label": "Snow tubing"}]}, "s1YjWVUu6pM": {"duration": 197.091, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s1YjWVUu6pM", "annotations": [{"segment": [59.640926677067085, 181.9970546021841], "label": "Kneeling"}]}, "rX2tRvv2ZfU": {"duration": 217.64, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rX2tRvv2ZfU", "annotations": [{"segment": [0, 217.640633], "label": "Preparing salad"}]}, "K-ihmOEk7bM": {"duration": 45.999, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=K-ihmOEk7bM", "annotations": []}, "tRFK1BMKaRo": {"duration": 36.08, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tRFK1BMKaRo", "annotations": []}, "ZrSK-fisXGg": {"duration": 100.45, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZrSK-fisXGg", "annotations": []}, "y4PXBhxpZHk": {"duration": 139.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y4PXBhxpZHk", "annotations": [{"segment": [10.845872274143302, 44.69141121495327], "label": "Hurling"}, {"segment": [55.32189252336449, 67.68796261682243], "label": "Hurling"}, {"segment": [73.11167757009346, 90.90146261682244], "label": "Hurling"}, {"segment": [100.01330373831776, 107.38955607476636], "label": "Hurling"}]}, "wgAFLxYOmro": {"duration": 120.582, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wgAFLxYOmro", "annotations": []}, "DfiPwDSjjHE": {"duration": 203.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DfiPwDSjjHE", "annotations": [{"segment": [40.03491528, 182.143026], "label": "Wrapping presents"}]}, "RYv3eIxMouY": {"duration": 9.706, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=RYv3eIxMouY", "annotations": [{"segment": [6.253593166926677, 9.191116349453978], "label": "Hitting a pinata"}]}, "Cl96RZAFcZo": {"duration": 17.137, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Cl96RZAFcZo", "annotations": [{"segment": [2.4194032808112325, 16.45461568330733], "label": "Hand car wash"}]}, "N9LTq3gYmsI": {"duration": 35.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N9LTq3gYmsI", "annotations": [{"segment": [18.645758566978195, 33.75048286604361], "label": "Snow tubing"}]}, "yipNcBb7q3Y": {"duration": 131.03, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yipNcBb7q3Y", "annotations": []}, "I5o7mRBAXdE": {"duration": 56.38, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=I5o7mRBAXdE", "annotations": []}, "VOyKKN3NdXM": {"duration": 227.185, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VOyKKN3NdXM", "annotations": [{"segment": [74.78288869786084, 78.6815227058062], "label": "Hand car wash"}, {"segment": [80.0992077996045, 81.87131416685239], "label": "Hand car wash"}, {"segment": [87.18763326859606, 90.73184600309183], "label": "Hand car wash"}, {"segment": [116.25017769146139, 119.4399691525076], "label": "Hand car wash"}, {"segment": [132.90797754359156, 149.2113561222721], "label": "Hand car wash"}, {"segment": [189.26095978783152, 207.69086598439938], "label": "Hand car wash"}]}, "KRSBbX-itrY": {"duration": 43.329, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KRSBbX-itrY", "annotations": [{"segment": [1.4847688275725657, 42.5183800623053], "label": "River tubing"}]}, "5Y1AJsAE9UE": {"duration": 200.32, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5Y1AJsAE9UE", "annotations": [{"segment": [104.90504397168264, 135.47753306825584], "label": "Spinning"}, {"segment": [171.35341313056114, 192.8789411679443], "label": "Spinning"}]}, "r9X01daYa5o": {"duration": 35.619, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r9X01daYa5o", "annotations": [{"segment": [0.2, 35.361], "label": "Welding"}]}, "pCcB9E2CoD0": {"duration": 147.052, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pCcB9E2CoD0", "annotations": []}, "Pu5p7SC3sqg": {"duration": 28.89, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Pu5p7SC3sqg", "annotations": [{"segment": [0.2703749297971919, 8.42668531201248], "label": "Shoveling snow"}]}, "Zts8FynhoJs": {"duration": 20.09, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Zts8FynhoJs", "annotations": [{"segment": [0.01, 20.069591871294854], "label": "Brushing teeth"}]}, "5aUWT0CmJlc": {"duration": 79.39, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5aUWT0CmJlc", "annotations": [{"segment": [26.008684118564744, 45.20557001560063], "label": "Curling"}]}, "nR7tqWpNpe8": {"duration": 168.067, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nR7tqWpNpe8", "annotations": [{"segment": [19.73853539849746, 71.04825451565048], "label": "Rafting"}, {"segment": [80.99605722904613, 106.65091685832961], "label": "Rafting"}, {"segment": [113.71909247047915, 135.1854035888592], "label": "Rafting"}]}, "mHbsKzezfFQ": {"duration": 179.63, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mHbsKzezfFQ", "annotations": [{"segment": [28.443590372186318, 147.5423677926709], "label": "Getting a haircut"}]}, "z6g5QbIPatk": {"duration": 204.986, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=z6g5QbIPatk", "annotations": [{"segment": [6.586366826928051, 170.41115912636505], "label": "Carving jack-o-lanterns"}]}, "WoB4lSNBDww": {"duration": 155.783, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WoB4lSNBDww", "annotations": [{"segment": [17.49352886115445, 139.23721216848674], "label": "Hanging wallpaper"}]}, "A1U24_iC6ww": {"duration": 91.046, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=A1U24_iC6ww", "annotations": [{"segment": [10.08458889703588, 90.90333653666147], "label": "Plastering"}]}, "Z5apXICOLTc": {"duration": 64.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z5apXICOLTc", "annotations": []}, "teZ99ahe1zY": {"duration": 40.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=teZ99ahe1zY", "annotations": [{"segment": [0.4546524, 36.40900468018721], "label": "Playing polo"}]}, "7OcfzMIZsxA": {"duration": 176.263, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7OcfzMIZsxA", "annotations": [{"segment": [0.01, 11.821377535101403], "label": "Having an ice cream"}, {"segment": [53.60857254290172, 56.082814352574104], "label": "Having an ice cream"}, {"segment": [91.27203120124805, 94.57102028081123], "label": "Having an ice cream"}, {"segment": [140.7568673946958, 146.27178159126368], "label": "Having an ice cream"}, {"segment": [151.49577379095166, 157.26965990639627], "label": "Having an ice cream"}, {"segment": [164.41828081123248, 166.8928034321373], "label": "Having an ice cream"}]}, "HV_yqsiFoKA": {"duration": 30.79, "subset": "training", "resolution": "640x358", "url": "https://www.youtube.com/watch?v=HV_yqsiFoKA", "annotations": [{"segment": [0.01, 29.917707861273456], "label": "Doing a powerbomb"}]}, "k5zENHXMSmI": {"duration": 206.774, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k5zENHXMSmI", "annotations": []}, "5czN-C_DwmU": {"duration": 126.967, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=5czN-C_DwmU", "annotations": []}, "g0RK-2ydod0": {"duration": 186.55, "subset": "training", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=g0RK-2ydod0", "annotations": [{"segment": [38.68411856474259, 181.49541341653668], "label": "Preparing salad"}]}, "FaiSWZFSHSE": {"duration": 180.512, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=FaiSWZFSHSE", "annotations": [{"segment": [26.752934290171606, 82.79329138221527], "label": "Making a cake"}, {"segment": [91.80480609048362, 150.66126152886113], "label": "Making a cake"}]}, "B7Ddfw2PXOI": {"duration": 80.69, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=B7Ddfw2PXOI", "annotations": [{"segment": [7.426946140405616, 74.14358096099843], "label": "Playing bagpipes"}]}, "jlqC3lFqxhQ": {"duration": 201.688, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jlqC3lFqxhQ", "annotations": [{"segment": [7.86616716369335, 15.7323343273867], "label": "Snowboarding"}, {"segment": [20.45203462560271, 40.274775878109956], "label": "Snowboarding"}, {"segment": [44.67982948977823, 49.71417647454197], "label": "Snowboarding"}, {"segment": [60.726810503712656, 68.27833098085827], "label": "Snowboarding"}, {"segment": [76.7737915176471, 191.9344787207488], "label": "Snowboarding"}]}, "TIEzvhv6xaI": {"duration": 19.25, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=TIEzvhv6xaI", "annotations": [{"segment": [0.5778618576693313, 18.987667736174007], "label": "Volleyball"}]}, "GsR4fagoV-Q": {"duration": 118.0, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GsR4fagoV-Q", "annotations": [{"segment": [13.991091120124807, 17.857050508580343], "label": "Ping-pong"}, {"segment": [29.45492867394696, 43.2619264898596], "label": "Ping-pong"}, {"segment": [56.884831001560066, 62.22353682371295], "label": "Ping-pong"}, {"segment": [66.64177612480499, 78.42374759438378], "label": "Ping-pong"}, {"segment": [86.5238529797192, 111.37644904836195], "label": "Ping-pong"}]}, "Nb6AQxxrihA": {"duration": 149.769, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Nb6AQxxrihA", "annotations": []}, "RAluocUocdw": {"duration": 87.261, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RAluocUocdw", "annotations": [{"segment": [0.01, 87.201], "label": "Surfing"}]}, "wLcpMJv83XY": {"duration": 168.995, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=wLcpMJv83XY", "annotations": [{"segment": [28.200759750390016, 166.041856474259], "label": "Playing ten pins"}]}, "iwHZqcYwZYU": {"duration": 11.59, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iwHZqcYwZYU", "annotations": [{"segment": [3.2536912324492975, 6.67006702652106], "label": "Springboard diving"}]}, "TxHDsb8V0Gg": {"duration": 208.724, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TxHDsb8V0Gg", "annotations": []}, "MsalIjwP3no": {"duration": 63.414, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MsalIjwP3no", "annotations": [{"segment": [3.0572684492731774, 30.474075874624887], "label": "Plastering"}, {"segment": [41.02658810635117, 60.0605587673154], "label": "Plastering"}]}, "7mDiIJ9r4EU": {"duration": 152.184, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7mDiIJ9r4EU", "annotations": [{"segment": [45.814466458658345, 140.52934789391577], "label": "Wakeboarding"}]}, "6-QyMZa-hFM": {"duration": 95.62, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6-QyMZa-hFM", "annotations": []}, "9Rd8PpfRNmY": {"duration": 73.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9Rd8PpfRNmY", "annotations": [{"segment": [3.9055166677067086, 36.41319951950078], "label": "Doing nails"}, {"segment": [48.244617659906396, 73.430476], "label": "Doing nails"}]}, "-5c9WHk408g": {"duration": 142.5, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-5c9WHk408g", "annotations": [{"segment": [0.01, 142.5007029478458], "label": "Paintball"}]}, "TMAXjnsN0Mk": {"duration": 207.98, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TMAXjnsN0Mk", "annotations": []}, "tnXjdv_GM9g": {"duration": 56.17, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=tnXjdv_GM9g", "annotations": [{"segment": [13.032331379999999, 49.401758879999996], "label": "Long jump"}]}, "0AjYz-s4Rek": {"duration": 81.851, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0AjYz-s4Rek", "annotations": [{"segment": [29.734216848673945, 37.01730109204368], "label": "Ice fishing"}, {"segment": [45.186636505460214, 51.696575663026515], "label": "Ice fishing"}, {"segment": [57.56828549141966, 63.05705772230889], "label": "Ice fishing"}]}, "9_wuEvN2OnQ": {"duration": 184.6, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9_wuEvN2OnQ", "annotations": [{"segment": [1.7279123818013946, 182.2947548424337], "label": "Playing badminton"}]}, "7D5ItfgJFVw": {"duration": 47.69, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=7D5ItfgJFVw", "annotations": [{"segment": [0.01, 11.011981560062402], "label": "Washing face"}]}, "R9qRR8CcSJA": {"duration": 70.06, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=R9qRR8CcSJA", "annotations": [{"segment": [14.53551048708615, 70.05460317460317], "label": "Cleaning windows"}]}, "GMwV9roiBYo": {"duration": 166.488, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GMwV9roiBYo", "annotations": [{"segment": [0, 166.487073], "label": "Playing ice hockey"}]}, "4bUxtqX_oxM": {"duration": 19.784, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4bUxtqX_oxM", "annotations": [{"segment": [0.2, 19.58278911564626], "label": "Camel ride"}]}, "T3ngvDyxYiE": {"duration": 122.486, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=T3ngvDyxYiE", "annotations": [{"segment": [0.2, 119.76678159126365], "label": "Tug of war"}]}, "svWiQtzgtOc": {"duration": 82.62, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=svWiQtzgtOc", "annotations": [{"segment": [9.349408400936037, 68.7987500951638], "label": "Breakdancing"}]}, "wsQCNAdwFSY": {"duration": 170.62, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wsQCNAdwFSY", "annotations": []}, "LbGr5TjfTBY": {"duration": 225.582, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LbGr5TjfTBY", "annotations": [{"segment": [134.7496770670827, 166.76591887675505], "label": "Sharpening knives"}]}, "0r-_a6m5k-0": {"duration": 46.65, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=0r-_a6m5k-0", "annotations": [{"segment": [0.6913641599063963, 29.874209225429016], "label": "Preparing salad"}]}, "GcjzvBatqb4": {"duration": 57.423, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GcjzvBatqb4", "annotations": []}, "6URUyopL8qM": {"duration": 79.691, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6URUyopL8qM", "annotations": [{"segment": [7.832333274570982, 77.45307349297971], "label": "River tubing"}]}, "8YkCDiVc7RI": {"duration": 107.75999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8YkCDiVc7RI", "annotations": [{"segment": [19.33360049921997, 76.66192893603744], "label": "Tumbling"}, {"segment": [82.54606821840873, 91.12009974414977], "label": "Tumbling"}]}, "kDurnkUZCXs": {"duration": 29.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kDurnkUZCXs", "annotations": [{"segment": [0.6074861466458659, 29.01914592823713], "label": "Cleaning shoes"}]}, "ry-AEtNb4c0": {"duration": 188.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ry-AEtNb4c0", "annotations": [{"segment": [45.61471302652106, 168.33300549141967], "label": "Painting"}]}, "5SzzJMuyC_M": {"duration": 91.557, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5SzzJMuyC_M", "annotations": [{"segment": [3.7828026521060845, 81.45511778471139], "label": "Plastering"}]}, "kZMDKbfIis0": {"duration": 76.394, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kZMDKbfIis0", "annotations": [{"segment": [4.2904403196536025, 56.13325976911076], "label": "Curling"}]}, "73LZVxxlJV8": {"duration": 62.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=73LZVxxlJV8", "annotations": [{"segment": [16.16065912792512, 52.7063442074883], "label": "Cleaning windows"}]}, "9SIPJd4Hls4": {"duration": 96.87, "subset": "testing", "resolution": "2276x720", "url": "https://www.youtube.com/watch?v=9SIPJd4Hls4", "annotations": []}, "7ZbH4vHTmVs": {"duration": 105.582, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7ZbH4vHTmVs", "annotations": [{"segment": [0.01, 18.694943346033163], "label": "Raking leaves"}, {"segment": [24.624617006448965, 28.083593308358182], "label": "Raking leaves"}, {"segment": [39.613514314722245, 96.43955447379024], "label": "Raking leaves"}]}, "k3oPZS_Id3M": {"duration": 131.5, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k3oPZS_Id3M", "annotations": [{"segment": [52.92606492831142, 100.10821583339525], "label": "Hand washing clothes"}]}, "YIb8sArOPww": {"duration": 79.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YIb8sArOPww", "annotations": [{"segment": [3.1071401326053043, 76.18707605148207], "label": "Using the balance beam"}]}, "EHXpUlacN88": {"duration": 6.665, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EHXpUlacN88", "annotations": [{"segment": [0.01, 6.52295546260755], "label": "Doing a powerbomb"}]}, "RW6jDO5QmPI": {"duration": 31.46, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RW6jDO5QmPI", "annotations": []}, "IV5llWEEfAw": {"duration": 73.86, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IV5llWEEfAw", "annotations": [{"segment": [3.0516341653666146, 68.92086973478939], "label": "Sailing"}]}, "7pcnWE7jpQY": {"duration": 85.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7pcnWE7jpQY", "annotations": [{"segment": [0, 85.781], "label": "High jump"}]}, "g8Yx4qWzNH0": {"duration": 14.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g8Yx4qWzNH0", "annotations": [{"segment": [5.140267394695788, 12.944127893915757], "label": "Clean and jerk"}]}, "nN1u9BVysQs": {"duration": 140.899, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nN1u9BVysQs", "annotations": [{"segment": [37.14801497035881, 76.93375881435257], "label": "Capoeira"}]}, "jIYRVlrXJgc": {"duration": 10.03, "subset": "training", "resolution": "326x240", "url": "https://www.youtube.com/watch?v=jIYRVlrXJgc", "annotations": [{"segment": [0.9389409360374416, 3.53667752574103], "label": "Playing racquetball"}]}, "i5-OVkjT0nM": {"duration": 201.643, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i5-OVkjT0nM", "annotations": [{"segment": [25.165924841075277, 165.1513817695565], "label": "Making a lemonade"}]}, "Vwfi0Nya4UQ": {"duration": 188.55, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Vwfi0Nya4UQ", "annotations": []}, "66nA52ux2Sk": {"duration": 216.341, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=66nA52ux2Sk", "annotations": [{"segment": [0.2, 30.70731825273011], "label": "Playing drums"}, {"segment": [52.64111700468018, 75.5872449297972], "label": "Playing drums"}, {"segment": [137.00188143525742, 145.77540093603744], "label": "Playing drums"}]}, "k6pN0WRr2Ws": {"duration": 210.65, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=k6pN0WRr2Ws", "annotations": []}, "roTIeCtx94Q": {"duration": 104.328, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=roTIeCtx94Q", "annotations": [{"segment": [16.387327041852252, 72.20159701880625], "label": "Baking cookies"}]}, "w87EDMJo5NM": {"duration": 88.35, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=w87EDMJo5NM", "annotations": [{"segment": [3.4458638889773283, 12.40511000031838], "label": "Shaving legs"}, {"segment": [34.59647344533238, 47.13941800120985], "label": "Shaving legs"}, {"segment": [68.36593869578783, 78.29002657098285], "label": "Shaving legs"}]}, "n9TuUTNpKwg": {"duration": 88.47, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n9TuUTNpKwg", "annotations": [{"segment": [13.104516380655229, 19.44986115444618], "label": "Triple jump"}, {"segment": [44.14152886115445, 62.34990951638066], "label": "Triple jump"}]}, "DhYJl8KAZN4": {"duration": 230.76, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=DhYJl8KAZN4", "annotations": [{"segment": [11.15999342014497, 217.79987069422776], "label": "Playing congas"}]}, "_AdJtwjeITM": {"duration": 60.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_AdJtwjeITM", "annotations": [{"segment": [6.283623848673947, 31.32433381279251], "label": "Removing curlers"}]}, "m73Cod-PmMQ": {"duration": 105.396, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=m73Cod-PmMQ", "annotations": []}, "N5XBi-uPkAU": {"duration": 12.377, "subset": "validation", "resolution": "642x480", "url": "https://www.youtube.com/watch?v=N5XBi-uPkAU", "annotations": [{"segment": [0.6353400936037441, 10.45423244929797], "label": "Doing a powerbomb"}]}, "bscUMcCmyYw": {"duration": 14.4, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=bscUMcCmyYw", "annotations": [{"segment": [0.30361164, 12.838502340093605], "label": "Hopscotch"}]}, "e-44ig51evc": {"duration": 197.022, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e-44ig51evc", "annotations": [{"segment": [5.839931265277822, 196.82083900226758], "label": "Futsal"}]}, "-ypKYhjrlXw": {"duration": 212.811, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-ypKYhjrlXw", "annotations": [{"segment": [45.4837612652106, 212.810883], "label": "Applying sunscreen"}]}, "uLT7PmD_AJA": {"duration": 120.884, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uLT7PmD_AJA", "annotations": [{"segment": [8.10633853354134, 92.955496099844], "label": "Braiding hair"}]}, "evj6y2xZCnM": {"duration": 214.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=evj6y2xZCnM", "annotations": [{"segment": [14.397397251170046, 205.91626301092043], "label": "Skateboarding"}]}, "aSPPD0fDI3g": {"duration": 197.79, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aSPPD0fDI3g", "annotations": [{"segment": [12.031383210000001, 180.75189746645867], "label": "Putting on makeup"}]}, "GfSGZZSGH6g": {"duration": 7.756, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=GfSGZZSGH6g", "annotations": [{"segment": [0.2, 6.558894462616823], "label": "Roof shingle removal"}]}, "Zf3RcqHO82M": {"duration": 211.57999999999998, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zf3RcqHO82M", "annotations": [{"segment": [2.697645, 196.39662234108414], "label": "Tango"}]}, "loUYsxA2RHg": {"duration": 86.7, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=loUYsxA2RHg", "annotations": [{"segment": [8.584552716068643, 76.31464579875195], "label": "Mowing the lawn"}]}, "kxwTPAhlhpw": {"duration": 116.22, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kxwTPAhlhpw", "annotations": [{"segment": [41.33731484555382, 63.3116077], "label": "Mixing drinks"}]}, "oqVNFPUANfs": {"duration": 124.9, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oqVNFPUANfs", "annotations": [{"segment": [6.428101404056163, 86.68197347893916], "label": "Using the balance beam"}, {"segment": [109.0829329173167, 117.84852574102965], "label": "Using the balance beam"}]}, "xxXspvK4tEY": {"duration": 95.76, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=xxXspvK4tEY", "annotations": [{"segment": [1.9427191887675508, 93.39996099843994], "label": "Knitting"}]}, "Io8EwPbNUbE": {"duration": 84.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Io8EwPbNUbE", "annotations": [{"segment": [4.726003338533541, 63.01337784711388], "label": "Washing dishes"}]}, "FrDKwP9XaOo": {"duration": 99.243, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FrDKwP9XaOo", "annotations": [{"segment": [0.01, 98.04642141962498], "label": "Tug of war"}]}, "CQXhtaNkhrw": {"duration": 136.98, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CQXhtaNkhrw", "annotations": [{"segment": [0.010000000000000002, 117.50007800312014], "label": "Playing accordion"}]}, "Ve0Ufrv-T0s": {"duration": 76.3, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=Ve0Ufrv-T0s", "annotations": [{"segment": [0, 70.46775026521061], "label": "Walking the dog"}]}, "3j4OdbgaoAM": {"duration": 127.66, "subset": "training", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=3j4OdbgaoAM", "annotations": [{"segment": [33.144409516380655, 38.91730967238689], "label": "Playing polo"}, {"segment": [49.46778237129485, 77.33695553822153], "label": "Playing polo"}]}, "6IoiW8zpps0": {"duration": 197.18, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6IoiW8zpps0", "annotations": []}, "Z-gDuGMpClo": {"duration": 187.571, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Z-gDuGMpClo", "annotations": [{"segment": [24.872882153381376, 158.601200099844], "label": "Hand car wash"}]}, "OiNsqyHkg7E": {"duration": 149.37, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=OiNsqyHkg7E", "annotations": []}, "Ygt3z-K-ZMQ": {"duration": 99.06, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Ygt3z-K-ZMQ", "annotations": [{"segment": [32.334249609984404, 89.13140405616225], "label": "Mixing drinks"}]}, "p771liKjycc": {"duration": 46.186, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=p771liKjycc", "annotations": [{"segment": [4.2913266411237405, 14.027950112580966], "label": "Using the monkey bar"}, {"segment": [34.00605901327468, 42.01172720091729], "label": "Using the monkey bar"}]}, "BdAPzcdrk9g": {"duration": 55.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BdAPzcdrk9g", "annotations": [{"segment": [1.7250143837753509, 53.64794733541341], "label": "Playing badminton"}]}, "ZpyCrs-q-so": {"duration": 45.28, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZpyCrs-q-so", "annotations": [{"segment": [8.885063962558503, 11.000555382215289], "label": "Chopping wood"}, {"segment": [20.167684867394694, 36.5980015600624], "label": "Chopping wood"}]}, "BGeB6KEQM94": {"duration": 212.184, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BGeB6KEQM94", "annotations": [{"segment": [1.489589926454201, 9.10304955055345], "label": "Longboarding"}, {"segment": [41.54300794888938, 53.79074734417947], "label": "Longboarding"}, {"segment": [105.42986479459178, 114.03638436966048], "label": "Longboarding"}, {"segment": [171.3028415422331, 190.8330192074883], "label": "Longboarding"}]}, "VopRFLr6Zlg": {"duration": 206.635, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VopRFLr6Zlg", "annotations": []}, "9UpVdljXQ4E": {"duration": 46.464, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9UpVdljXQ4E", "annotations": [{"segment": [5.865992199687987, 42.963358814352574], "label": "Rollerblading"}]}, "ShT4biEuQDo": {"duration": 22.29, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ShT4biEuQDo", "annotations": [{"segment": [0, 3.0602521684867394], "label": "Shoveling snow"}]}, "iubDO1DSMZk": {"duration": 77.927, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iubDO1DSMZk", "annotations": [{"segment": [7.271444707625315, 33.44865148203528], "label": "Decorating the Christmas tree"}, {"segment": [40.11414399369936, 45.082965684212574], "label": "Decorating the Christmas tree"}, {"segment": [53.32393824408816, 66.04896940271958], "label": "Decorating the Christmas tree"}]}, "pYUqRy9Ujaw": {"duration": 71.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pYUqRy9Ujaw", "annotations": [{"segment": [24.4252008, 56.9195236474259], "label": "Windsurfing"}, {"segment": [1.775267794071763, 21.03115119], "label": "Windsurfing"}]}, "j_q_MOjw80Q": {"duration": 139.761, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j_q_MOjw80Q", "annotations": [{"segment": [22.239645112335104, 122.53608295163806], "label": "Baking cookies"}]}, "v6P6bBPBXYw": {"duration": 223.005, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=v6P6bBPBXYw", "annotations": []}, "rSTXxDLYxTQ": {"duration": 214.74, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rSTXxDLYxTQ", "annotations": [{"segment": [8.375122487892712, 211.05308650858035], "label": "Polishing shoes"}]}, "eGLD-0b1LV0": {"duration": 112.014, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=eGLD-0b1LV0", "annotations": [{"segment": [32.30169266770671, 79.98280499219969], "label": "Elliptical trainer"}]}, "kpGgixgrvn8": {"duration": 226.743, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=kpGgixgrvn8", "annotations": []}, "G122-qoH3dA": {"duration": 117.283, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=G122-qoH3dA", "annotations": []}, "qYVMKC4LA1U": {"duration": 105.094, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qYVMKC4LA1U", "annotations": []}, "R7BkxDWH74A": {"duration": 215.34, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=R7BkxDWH74A", "annotations": []}, "rBgMeunuviE": {"duration": 92.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rBgMeunuviE", "annotations": [{"segment": [12.762486559999997, 30.34473536], "label": "Playing accordion"}, {"segment": [12.762486559999997, 30.344735359999998], "label": "Playing accordion"}]}, "qRPq2PEiyM0": {"duration": 56.59, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qRPq2PEiyM0", "annotations": [{"segment": [0.6179550639625585, 53.32069409048361], "label": "Playing saxophone"}]}, "wQeY9FcRTLY": {"duration": 54.13, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=wQeY9FcRTLY", "annotations": []}, "Cy2wqpjppy8": {"duration": 64.18, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Cy2wqpjppy8", "annotations": [{"segment": [17.709112324492978, 61.23150702028081], "label": "Building sandcastles"}]}, "Y-CZasxVlx4": {"duration": 238.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y-CZasxVlx4", "annotations": [{"segment": [7.798862640219895, 223.19602058814354], "label": "Playing guitarra"}]}, "5hXH-TorJ6M": {"duration": 70.33, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5hXH-TorJ6M", "annotations": [{"segment": [4.715979719188767, 68.87523868954759], "label": "Mixing drinks"}]}, "Eb_9_Bcij0Q": {"duration": 56.12, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Eb_9_Bcij0Q", "annotations": [{"segment": [3.430613484546032, 53.92098420105694], "label": "Shaving"}]}, "H2l7GraYFhI": {"duration": 189.1, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=H2l7GraYFhI", "annotations": [{"segment": [16.815738865834632, 179.0728682730109], "label": "Playing saxophone"}]}, "Gk_sOXqbwZE": {"duration": 17.16, "subset": "testing", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=Gk_sOXqbwZE", "annotations": []}, "NCKTeRW3OhQ": {"duration": 66.78, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NCKTeRW3OhQ", "annotations": []}, "U1nvAxorOPQ": {"duration": 115.28999999999999, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=U1nvAxorOPQ", "annotations": [{"segment": [59.47310469000001, 85.99718478000001], "label": "Grooming horse"}, {"segment": [8.63960778, 50.107188810000004], "label": "Grooming horse"}]}, "RCyZgp1F06s": {"duration": 36.642, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RCyZgp1F06s", "annotations": [{"segment": [0.2, 34.361704148416756], "label": "Swinging at the playground"}]}, "rfH9VLQAuwY": {"duration": 39.242, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rfH9VLQAuwY", "annotations": [{"segment": [2.0202446583463343, 12.243907020280812], "label": "Shuffleboard"}, {"segment": [18.182201925117006, 25.95708288299532], "label": "Shuffleboard"}]}, "bMJlN9iPpCI": {"duration": 189.522, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bMJlN9iPpCI", "annotations": [{"segment": [0, 7.687291116134442], "label": "Applying sunscreen"}, {"segment": [96.97813408046525, 99.63911946681948], "label": "Applying sunscreen"}, {"segment": [115.9006968278731, 117.37902204251435], "label": "Applying sunscreen"}]}, "-D1gdv_gQyw": {"duration": 18.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-D1gdv_gQyw", "annotations": []}, "7LmSZAoD6-c": {"duration": 109.58, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7LmSZAoD6-c", "annotations": [{"segment": [0.01, 109.57430839002268], "label": "High jump"}]}, "svNcNLEPKMc": {"duration": 79.51, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=svNcNLEPKMc", "annotations": []}, "ZxCdhz4ep0s": {"duration": 217.76, "subset": "testing", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=ZxCdhz4ep0s", "annotations": []}, "tJLm2D4J3X0": {"duration": 210.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tJLm2D4J3X0", "annotations": [{"segment": [14.7491991575663, 210.09414965986394], "label": "Triple jump"}]}, "Gkv5ibx0tYw": {"duration": 97.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gkv5ibx0tYw", "annotations": [{"segment": [33.80172074882995, 65.17820592823713], "label": "Removing curlers"}]}, "kGxikShxkpQ": {"duration": 77.184, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kGxikShxkpQ", "annotations": [{"segment": [23.02809906396256, 72.71069344773791], "label": "Tug of war"}]}, "EVSuDXeNUYQ": {"duration": 169.9, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EVSuDXeNUYQ", "annotations": [{"segment": [14.31292941513579, 165.12879556318254], "label": "Cheerleading"}]}, "vlIAZsrj6yk": {"duration": 154.02, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vlIAZsrj6yk", "annotations": [{"segment": [0.7205663026521062, 146.53418096723868], "label": "Shaving"}]}, "UAAk2SxqSvs": {"duration": 217.965, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UAAk2SxqSvs", "annotations": [{"segment": [21.08243666770671, 73.44848903588144], "label": "Longboarding"}, {"segment": [78.88911785335414, 199.94310904212168], "label": "Longboarding"}]}, "_4u2dthCcMs": {"duration": 19.78, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_4u2dthCcMs", "annotations": []}, "I0j_h1bjj7w": {"duration": 54.149, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I0j_h1bjj7w", "annotations": []}, "k5wY4N61bzU": {"duration": 160.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k5wY4N61bzU", "annotations": [{"segment": [10.24347748205928, 144.90773023400934], "label": "Walking the dog"}]}, "sVeC5VlDsjk": {"duration": 174.243, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sVeC5VlDsjk", "annotations": [{"segment": [15.766095482059283, 150.04973631201247], "label": "Surfing"}]}, "teDceMxV9PA": {"duration": 112.849, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=teDceMxV9PA", "annotations": [{"segment": [12.90204853176616, 42.08106776541102], "label": "Powerbocking"}, {"segment": [50.166820083168034, 55.26435958784093], "label": "Powerbocking"}, {"segment": [65.81099304578485, 70.20542365326149], "label": "Powerbocking"}, {"segment": [103.95465071868205, 112.648978], "label": "Powerbocking"}]}, "7c18sUik4zM": {"duration": 103.19, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7c18sUik4zM", "annotations": []}, "ersl2OEJ-eI": {"duration": 138.46, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ersl2OEJ-eI", "annotations": [{"segment": [2.590970358814353, 8.420653666146647], "label": "Throwing darts"}, {"segment": [15.113993759750391, 24.182390015600628], "label": "Throwing darts"}, {"segment": [27.852931357254292, 34.546271450858036], "label": "Throwing darts"}, {"segment": [39.29638377535102, 48.364780031201256], "label": "Throwing darts"}, {"segment": [96.29773166926678, 103.00595475819033], "label": "Throwing darts"}, {"segment": [108.40458970358814, 115.96267862714508], "label": "Throwing darts"}, {"segment": [119.20185959438376, 125.68022152886115], "label": "Throwing darts"}]}, "4ImpZRtbzYw": {"duration": 192.494, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4ImpZRtbzYw", "annotations": [{"segment": [26.685225450512498, 155.01417397352026], "label": "Painting furniture"}]}, "3UrypnvwAOY": {"duration": 46.09, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3UrypnvwAOY", "annotations": [{"segment": [3.807979719188767, 43.61214508580343], "label": "Playing violin"}]}, "JkZZvDHTty4": {"duration": 119.189, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JkZZvDHTty4", "annotations": [{"segment": [0.01, 118.28734202771767], "label": "Roof shingle removal"}]}, "oO3bpAF-_R4": {"duration": 177.61, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oO3bpAF-_R4", "annotations": []}, "g_AwwSsBj0s": {"duration": 232.154, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g_AwwSsBj0s", "annotations": [{"segment": [0.2, 198.2146019683149], "label": "Rollerblading"}]}, "M2OoQFcDflU": {"duration": 30.186, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=M2OoQFcDflU", "annotations": [{"segment": [0.01, 30.141], "label": "Playing blackjack"}]}, "9AfQ2Q-96oE": {"duration": 98.8, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9AfQ2Q-96oE", "annotations": []}, "aCiN9mqkXoA": {"duration": 163.28, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aCiN9mqkXoA", "annotations": [{"segment": [0.3236555148092014, 120.09924897275312], "label": "Washing dishes"}]}, "Jt-DIHv8YK8": {"duration": 72.622, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jt-DIHv8YK8", "annotations": [{"segment": [0.2, 70.46842745709827], "label": "Longboarding"}]}, "yGCklVOLgk8": {"duration": 62.393, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yGCklVOLgk8", "annotations": [{"segment": [19.360123244929795, 33.66131981279251], "label": "Waterskiing"}]}, "tuhEnqKmUKg": {"duration": 37.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tuhEnqKmUKg", "annotations": [{"segment": [9.465223439937597, 29.036393137285494], "label": "Skateboarding"}]}, "Mil3AyFfjX4": {"duration": 6.966, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=Mil3AyFfjX4", "annotations": [{"segment": [0.2, 6.765986394557823], "label": "Shuffleboard"}]}, "k0w1lMRoqYk": {"duration": 181.65, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=k0w1lMRoqYk", "annotations": [{"segment": [53.73892523364485, 91.63900934579438], "label": "Raking leaves"}]}, "jl7aBkPfcS8": {"duration": 145.45, "subset": "validation", "resolution": "406x720", "url": "https://www.youtube.com/watch?v=jl7aBkPfcS8", "annotations": [{"segment": [44.70132132449298, 76.01493727769112], "label": "Cleaning shoes"}]}, "O337_rGYZPc": {"duration": 155.25, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O337_rGYZPc", "annotations": [{"segment": [78.95639480545209, 149.4358743447738], "label": "Starting a campfire"}]}, "p35FXT4drA0": {"duration": 82.94, "subset": "training", "resolution": "318x240", "url": "https://www.youtube.com/watch?v=p35FXT4drA0", "annotations": [{"segment": [5.3012808112324485, 37.50591263650546], "label": "Hammer throw"}]}, "r4F2X_MlL-U": {"duration": 179.235, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r4F2X_MlL-U", "annotations": [{"segment": [26.481035938772866, 163.34619980263383], "label": "Hanging wallpaper"}]}, "PwvSxX9E2ms": {"duration": 112.966, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PwvSxX9E2ms", "annotations": []}, "4MKbQ816r9A": {"duration": 86.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4MKbQ816r9A", "annotations": [{"segment": [3.9278682684722352, 82.48523258658346], "label": "Mixing drinks"}]}, "_fKxAnsi55M": {"duration": 138.44, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=_fKxAnsi55M", "annotations": []}, "_0CqozZun3U": {"duration": 294.29, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_0CqozZun3U", "annotations": [{"segment": [120.74129047141814, 286.68034475179195], "label": "Removing ice from car"}]}, "Fi2Al65EH0g": {"duration": 187.13, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Fi2Al65EH0g", "annotations": [{"segment": [18.68091107644306, 97.4910046801872], "label": "Disc dog"}, {"segment": [109.75035257410296, 179.511879875195], "label": "Disc dog"}]}, "SmBEf-g82Ew": {"duration": 90.047, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SmBEf-g82Ew", "annotations": [{"segment": [17.559864299333878, 90.04698412698413], "label": "Plastering"}]}, "T0nCDJuikuA": {"duration": 219.267, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=T0nCDJuikuA", "annotations": []}, "lGvI5pyjpFg": {"duration": 25.496, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lGvI5pyjpFg", "annotations": [{"segment": [13.416121684867393, 18.496783151326053], "label": "Making a lemonade"}]}, "5laHmakSAAg": {"duration": 182.765, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5laHmakSAAg", "annotations": [{"segment": [17.67765114820593, 166.5120688798752], "label": "Beer pong"}]}, "haUWdHvwpUo": {"duration": 205.172, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=haUWdHvwpUo", "annotations": [{"segment": [122.34583021806853, 185.293738317757], "label": "Playing congas"}]}, "y3Ll1puxM8c": {"duration": 231.62, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y3Ll1puxM8c", "annotations": [{"segment": [3.9747402902918836, 203.07309315132605], "label": "Zumba"}]}, "OrF46J3HmSw": {"duration": 35.02, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=OrF46J3HmSw", "annotations": [{"segment": [5.6388156, 33.4315168174727], "label": "Hopscotch"}]}, "O-zE1-Yw3w0": {"duration": 136.19, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=O-zE1-Yw3w0", "annotations": [{"segment": [115.9905023400936, 133.62322822308892], "label": "Using the balance beam"}]}, "sqEsAtdFfrg": {"duration": 76.63, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sqEsAtdFfrg", "annotations": [{"segment": [3.8838260530421214, 14.161027301092044], "label": "Javelin throw"}, {"segment": [33.75941107644306, 42.84158892355694], "label": "Javelin throw"}, {"segment": [52.16277145085803, 63.51549375975039], "label": "Javelin throw"}]}, "hFzbUNtTKPY": {"duration": 9.01, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=hFzbUNtTKPY", "annotations": []}, "BGHQbw5HZ9Y": {"duration": 204.219, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BGHQbw5HZ9Y", "annotations": [{"segment": [82.51675351014042, 84.7469360374415], "label": "Rock-paper-scissors"}, {"segment": [141.77588923556942, 143.68747425897035], "label": "Rock-paper-scissors"}, {"segment": [144.96186427457098, 147.52509048361935], "label": "Rock-paper-scissors"}]}, "z93quI8Gxx4": {"duration": 59.42, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=z93quI8Gxx4", "annotations": [{"segment": [9.980215049778604, 56.92419454639844], "label": "Camel ride"}]}, "lZ2X-e33E0A": {"duration": 157.153, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=lZ2X-e33E0A", "annotations": [{"segment": [41.17467706708268, 147.07113884555383], "label": "Plastering"}]}, "hYAE418i-ZY": {"duration": 219.36, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hYAE418i-ZY", "annotations": [{"segment": [31.396710540000004, 138.5965040826833], "label": "Getting a haircut"}]}, "ThWgMXhkS2E": {"duration": 85.87, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=ThWgMXhkS2E", "annotations": [{"segment": [9.17615644851794, 78.83457036427457], "label": "Cleaning windows"}]}, "IIKwRyf4Oyg": {"duration": 141.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IIKwRyf4Oyg", "annotations": [{"segment": [4.483683909133092, 111.30795204464712], "label": "Playing guitarra"}]}, "59oIJsSLsh0": {"duration": 45.09, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=59oIJsSLsh0", "annotations": []}, "oJU8LkbcNEg": {"duration": 208.26, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oJU8LkbcNEg", "annotations": []}, "94lnTGe5xU4": {"duration": 88.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=94lnTGe5xU4", "annotations": []}, "ThYidZUtnuo": {"duration": 53.685, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ThYidZUtnuo", "annotations": [{"segment": [11.966936037441497, 20.837531981279252], "label": "Roof shingle removal"}, {"segment": [32.21867394695788, 41.92611856474259], "label": "Roof shingle removal"}]}, "cWpT8nb2a9s": {"duration": 126.11, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=cWpT8nb2a9s", "annotations": [{"segment": [32.67306263, 72.39849752901681], "label": "Cheerleading"}]}, "g4tKl5yXmGg": {"duration": 120.58, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g4tKl5yXmGg", "annotations": []}, "Cu-p0FZOqi8": {"duration": 137.07, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Cu-p0FZOqi8", "annotations": [{"segment": [6.414990327613104, 115.25599288611544], "label": "Plataform diving"}]}, "g0EkYCOp9OU": {"duration": 154.599, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=g0EkYCOp9OU", "annotations": []}, "A4PdcfWqrN8": {"duration": 188.406, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A4PdcfWqrN8", "annotations": [{"segment": [20.28085113608626, 179.8823318157216], "label": "Disc dog"}]}, "OJiLPJkzel4": {"duration": 96.781, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OJiLPJkzel4", "annotations": [{"segment": [47.25800517190755, 51.03260622397684], "label": "Curling"}]}, "02WlEYafQ9Q": {"duration": 80.086, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=02WlEYafQ9Q", "annotations": [{"segment": [9.745173166926678, 69.59053146125845], "label": "Rafting"}]}, "YJOoLvwqWCw": {"duration": 41.309, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YJOoLvwqWCw", "annotations": [{"segment": [7.3143398174727, 34.57394988611544], "label": "Arm wrestling"}]}, "08uNrWxxHGY": {"duration": 192.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=08uNrWxxHGY", "annotations": [{"segment": [5.473497837750076, 183.40301780971268], "label": "Painting"}]}, "ajYA9O6R-AY": {"duration": 52.73, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=ajYA9O6R-AY", "annotations": [{"segment": [31.343351943837753, 46.973894907956314], "label": "Hopscotch"}]}, "6KXVjADefBY": {"duration": 133.42, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6KXVjADefBY", "annotations": [{"segment": [0, 0], "label": "Vacuuming floor"}]}, "aotVhoXjqS0": {"duration": 72.49, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aotVhoXjqS0", "annotations": [{"segment": [31.92514430577223, 35.654453198127925], "label": "Playing racquetball"}, {"segment": [45.59927691107644, 49.55460452418097], "label": "Playing racquetball"}]}, "0df62FHrMT0": {"duration": 23.57, "subset": "testing", "resolution": "586x480", "url": "https://www.youtube.com/watch?v=0df62FHrMT0", "annotations": []}, "QrBIWtD4JyI": {"duration": 83.151, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QrBIWtD4JyI", "annotations": []}, "gBOWkZBlc8g": {"duration": 126.224, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gBOWkZBlc8g", "annotations": [{"segment": [0.01, 124.66438941259753], "label": "Beach soccer"}]}, "aDe5hUB3AdY": {"duration": 133.93, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aDe5hUB3AdY", "annotations": []}, "Cqbs_wM3oc4": {"duration": 15.05, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cqbs_wM3oc4", "annotations": [{"segment": [0.01, 13.45553822152886], "label": "Playing saxophone"}]}, "Sfdj_kMDUh8": {"duration": 151.348, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Sfdj_kMDUh8", "annotations": [{"segment": [10.620351014040562, 136.43054290171605], "label": "Installing carpet"}]}, "9_uq9j0r0aY": {"duration": 187.99, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9_uq9j0r0aY", "annotations": []}, "74cccfEVUSU": {"duration": 178.562, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=74cccfEVUSU", "annotations": []}, "8kkKxLpiMus": {"duration": 34.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8kkKxLpiMus", "annotations": [{"segment": [0.01, 33.239980897195075], "label": "Doing step aerobics"}]}, "Ye3u6ujLEl0": {"duration": 199.692, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ye3u6ujLEl0", "annotations": [{"segment": [56.684340093603744, 71.32260374414976], "label": "Swimming"}, {"segment": [100.5991310452418, 105.27091731669265], "label": "Swimming"}, {"segment": [119.90918096723868, 152.3154898595944], "label": "Swimming"}, {"segment": [177.2341794071763, 190.62797503900157], "label": "Swimming"}]}, "kTJDylL6pCU": {"duration": 181.21, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kTJDylL6pCU", "annotations": [{"segment": [41.516564880000004, 165.6601494570983], "label": "Bathing dog"}]}, "r8ZIHLqFPfg": {"duration": 99.916, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r8ZIHLqFPfg", "annotations": [{"segment": [30.611734138996447, 39.31354756317379], "label": "Putting in contact lenses"}, {"segment": [64.95281925941062, 67.74982946098137], "label": "Putting in contact lenses"}]}, "bEniqIC5Ric": {"duration": 61.905, "subset": "training", "resolution": "1920x1440", "url": "https://www.youtube.com/watch?v=bEniqIC5Ric", "annotations": [{"segment": [0.01, 60.9556767825902], "label": "Camel ride"}]}, "ebmi7XJA8Oo": {"duration": 212.648, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ebmi7XJA8Oo", "annotations": [{"segment": [15.035815770146261, 206.46095284179736], "label": "Cleaning sink"}]}, "_nTjmHQ0-gQ": {"duration": 43.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_nTjmHQ0-gQ", "annotations": [{"segment": [2.518428217628705, 40.395588610764435], "label": "Triple jump"}]}, "m0ZPQ8q4Qq4": {"duration": 11.75, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=m0ZPQ8q4Qq4", "annotations": [{"segment": [0.2829576021840874, 11.701666], "label": "Hopscotch"}]}, "cu8TBL7msJg": {"duration": 209.0, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cu8TBL7msJg", "annotations": [{"segment": [0.01, 4.401763348084944], "label": "Tennis serve with ball bouncing"}, {"segment": [5.05387643669012, 7.988385335413415], "label": "Tennis serve with ball bouncing"}, {"segment": [16.13979894297813, 19.074307841701422], "label": "Tennis serve with ball bouncing"}, {"segment": [34.072908879620485, 39.61587013276449], "label": "Tennis serve with ball bouncing"}, {"segment": [64.39616691497659, 66.67856270436818], "label": "Tennis serve with ball bouncing"}, {"segment": [70.91729774180968, 82.32927668876755], "label": "Tennis serve with ball bouncing"}, {"segment": [109.39196962012481, 112.65253503354134], "label": "Tennis serve with ball bouncing"}, {"segment": [127.32507939391576, 168.73426014430575], "label": "Tennis serve with ball bouncing"}, {"segment": [171.66876901638065, 181.45046525663028], "label": "Tennis serve with ball bouncing"}, {"segment": [195.796953075663, 199.05751848907954], "label": "Tennis serve with ball bouncing"}]}, "IQp6JF5v1qc": {"duration": 62.07, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IQp6JF5v1qc", "annotations": [{"segment": [1.7583297800000002, 60.03652106084244], "label": "Breakdancing"}]}, "WZSILJ9rBuc": {"duration": 120.094, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=WZSILJ9rBuc", "annotations": []}, "ilXO32we9uo": {"duration": 104.24000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ilXO32we9uo", "annotations": [{"segment": [13.496806747269892, 85.85920436817473], "label": "Sailing"}]}, "ctWolbJDJyc": {"duration": 17.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ctWolbJDJyc", "annotations": [{"segment": [1.065002496099844, 14.643784321372856], "label": "Springboard diving"}]}, "dPKc7ZCEC30": {"duration": 150.21, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dPKc7ZCEC30", "annotations": []}, "JowEwlQllT8": {"duration": 7.593, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JowEwlQllT8", "annotations": []}, "4Q2uDqvjFy0": {"duration": 145.868, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4Q2uDqvjFy0", "annotations": []}, "U3-MtKZeUXM": {"duration": 23.29, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U3-MtKZeUXM", "annotations": []}, "VHUC47iq1Wg": {"duration": 212.02, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=VHUC47iq1Wg", "annotations": [{"segment": [9.258783151326053, 206.9999375975039], "label": "Cricket"}]}, "vg-FrXO1coA": {"duration": 231.53, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vg-FrXO1coA", "annotations": [{"segment": [39.4288778, 215.9501452948518], "label": "Brushing teeth"}]}, "Lvia-mF_Hnk": {"duration": 76.116, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Lvia-mF_Hnk", "annotations": [{"segment": [11.83748404344836, 47.11319124817537], "label": "Welding"}]}, "dXLhYjRSNcs": {"duration": 49.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dXLhYjRSNcs", "annotations": [{"segment": [0.20000000000000004, 45.79421684867395], "label": "Using parallel bars"}]}, "2q3Z7UQZnBY": {"duration": 110.086, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2q3Z7UQZnBY", "annotations": []}, "wItTq-gpK90": {"duration": 224.77, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wItTq-gpK90", "annotations": []}, "Fdu_dKh2PUE": {"duration": 110.527, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Fdu_dKh2PUE", "annotations": []}, "2FIQwmB362w": {"duration": 174.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2FIQwmB362w", "annotations": [{"segment": [23.383547581903272, 135.95085803432136], "label": "Skateboarding"}, {"segment": [147.64263182527299, 163.68483307332292], "label": "Skateboarding"}]}, "V3Sz4WWJ2_M": {"duration": 208.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V3Sz4WWJ2_M", "annotations": [{"segment": [132.47030577034468, 182.47135333812332], "label": "Hanging wallpaper"}]}, "F2BFV1930tQ": {"duration": 39.428, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F2BFV1930tQ", "annotations": []}, "1aRRlW0k-_4": {"duration": 93.368, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1aRRlW0k-_4", "annotations": [{"segment": [31.658295631825276, 80.56490405616225], "label": "Hurling"}]}, "rzZutQSmyJo": {"duration": 139.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rzZutQSmyJo", "annotations": [{"segment": [4.132159126365055, 20.878277691107645], "label": "Playing bagpipes"}, {"segment": [25.227918876755073, 51.97821216848674], "label": "Playing bagpipes"}, {"segment": [94.60469578783152, 130.92419968798754], "label": "Playing bagpipes"}]}, "mvWKOkRzfos": {"duration": 80.272, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mvWKOkRzfos", "annotations": [{"segment": [5.001330875029139, 35.63448248458262], "label": "Playing beach volleyball"}, {"segment": [69.51849782554517, 74.76989514330218], "label": "Playing beach volleyball"}]}, "V7C34-4tBok": {"duration": 109.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V7C34-4tBok", "annotations": [{"segment": [31.28861154446178, 101.34789391575663], "label": "Washing dishes"}]}, "rS8T1dAdiCs": {"duration": 155.92000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rS8T1dAdiCs", "annotations": [{"segment": [30.610410719999997, 152.39452207612115], "label": "Wrapping presents"}]}, "JNvwkiz8Bzw": {"duration": 82.38, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=JNvwkiz8Bzw", "annotations": [{"segment": [6.552903276131045, 72.4849984399376], "label": "Tug of war"}]}, "H2fG7JRRMAs": {"duration": 60.094, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=H2fG7JRRMAs", "annotations": []}, "nwhCE8TfhDA": {"duration": 125.69, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=nwhCE8TfhDA", "annotations": []}, "3M290KZV5lo": {"duration": 87.19, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=3M290KZV5lo", "annotations": []}, "PzJsoZT2hXk": {"duration": 105.95, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=PzJsoZT2hXk", "annotations": []}, "jDeBuorU4hY": {"duration": 123.507, "subset": "validation", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=jDeBuorU4hY", "annotations": [{"segment": [69.30477379095163, 79.52086271450858], "label": "Curling"}, {"segment": [90.11080811232449, 118.41484399375976], "label": "Curling"}]}, "T8XtEP8LaNM": {"duration": 102.122, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=T8XtEP8LaNM", "annotations": []}, "bj-MDk8ADdg": {"duration": 78.35, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bj-MDk8ADdg", "annotations": []}, "g4uvBcIE1Os": {"duration": 28.979, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g4uvBcIE1Os", "annotations": [{"segment": [7.088513260530421, 26.45776287051482], "label": "Snow tubing"}]}, "di8Vr1fzUh8": {"duration": 121.487, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=di8Vr1fzUh8", "annotations": [{"segment": [0.01, 121.48680272108844], "label": "Throwing darts"}]}, "pw0A0Uv2TZM": {"duration": 58.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pw0A0Uv2TZM", "annotations": [{"segment": [2.717930031201248, 16.30758018720749], "label": "Washing face"}]}, "w4CiGbHLTBg": {"duration": 139.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w4CiGbHLTBg", "annotations": [{"segment": [71.69982766000001, 129.6437851326053], "label": "Painting"}, {"segment": [19.827873020280812, 56.97257492], "label": "Painting"}]}, "MbCIH5bay4Q": {"duration": 31.63, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=MbCIH5bay4Q", "annotations": [{"segment": [8.288762770670827, 31.625577], "label": "High jump"}]}, "4gQpW3zR-Aw": {"duration": 116.031, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4gQpW3zR-Aw", "annotations": [{"segment": [59.52846957878315, 93.01796255850233], "label": "Camel ride"}]}, "SNuH7bpgHuY": {"duration": 66.061, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SNuH7bpgHuY", "annotations": [{"segment": [0.2, 12.225864289306962], "label": "Bullfighting"}, {"segment": [26.30102078010442, 28.766741625207622], "label": "Bullfighting"}, {"segment": [35.958427423425306, 43.15011322164298], "label": "Bullfighting"}]}, "T8mTuW85NNo": {"duration": 32.09, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=T8mTuW85NNo", "annotations": [{"segment": [15.855439937597504, 19.356641185647426], "label": "Washing hands"}, {"segment": [23.20796255850234, 28.959936037441498], "label": "Washing hands"}]}, "uuhcDXyGrEI": {"duration": 170.69, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uuhcDXyGrEI", "annotations": [{"segment": [52.57936817472699, 129.2520670826833], "label": "Making a sandwich"}]}, "6QbIJ2pnXXo": {"duration": 162.981, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6QbIJ2pnXXo", "annotations": [{"segment": [0.010000000000000002, 150.38902568269282], "label": "Making a cake"}]}, "BadeHG8y4PQ": {"duration": 126.48, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=BadeHG8y4PQ", "annotations": [{"segment": [42.42278469370068, 51.49928746537617], "label": "Volleyball"}, {"segment": [63.732834679373575, 88.00261382972326], "label": "Volleyball"}, {"segment": [99.24958465549507, 113.25896936829855], "label": "Volleyball"}]}, "fgkiHZ0nENg": {"duration": 44.652, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=fgkiHZ0nENg", "annotations": [{"segment": [0.2, 43.746392804992205], "label": "Croquet"}]}, "V1CX5B7ih6c": {"duration": 51.851, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=V1CX5B7ih6c", "annotations": [{"segment": [2.6693529455463936, 5.662263634945399], "label": "Doing a powerbomb"}, {"segment": [28.06864973322933, 31.223339472698907], "label": "Doing a powerbomb"}, {"segment": [36.642934666146644, 41.738971937597505], "label": "Doing a powerbomb"}]}, "e2fXKrG2BkY": {"duration": 148.24, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=e2fXKrG2BkY", "annotations": [{"segment": [41.699807400000005, 100.27814827457097], "label": "Doing karate"}, {"segment": [101.33845576000002, 142.7923862527301], "label": "Doing karate"}]}, "fSA-eTz3GkU": {"duration": 126.69, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fSA-eTz3GkU", "annotations": [{"segment": [11.66083648829953, 17.39243408424337], "label": "Shot put"}, {"segment": [73.12727967238689, 82.02113801092042], "label": "Shot put"}]}, "TEkIJQljABs": {"duration": 56.82, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=TEkIJQljABs", "annotations": [{"segment": [1.2390491025070463, 54.87217289719626], "label": "Having an ice cream"}]}, "34ZxyyFjbHQ": {"duration": 176.59, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=34ZxyyFjbHQ", "annotations": []}, "wipGHu-nmNY": {"duration": 128.41, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wipGHu-nmNY", "annotations": [{"segment": [11.418349198127926, 20.63315732293292], "label": "Mixing drinks"}, {"segment": [24.038629890795633, 35.45697908892355], "label": "Mixing drinks"}, {"segment": [41.86728039313572, 50.68144468642746], "label": "Mixing drinks"}, {"segment": [56.49078024336974, 67.70880752574102], "label": "Mixing drinks"}, {"segment": [73.51814308268332, 78.7265128923557], "label": "Mixing drinks"}, {"segment": [85.53745802808112, 94.15130040561624], "label": "Mixing drinks"}]}, "Q-879RNVOdg": {"duration": 202.246, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Q-879RNVOdg", "annotations": [{"segment": [35.227876412164676, 36.48601502686406], "label": "Wakeboarding"}, {"segment": [40.57496552463704, 42.46217344668611], "label": "Wakeboarding"}, {"segment": [46.236589290784245, 56.616232862054126], "label": "Wakeboarding"}, {"segment": [65.10866851127493, 68.88308435537306], "label": "Wakeboarding"}, {"segment": [80.20633188766747, 82.09353980971655], "label": "Wakeboarding"}, {"segment": [93.10225268833611, 95.93306457140972], "label": "Wakeboarding"}, {"segment": [117.32142102129914, 120.15223290437277], "label": "Wakeboarding"}, {"segment": [121.724906172747, 138.3952419190891], "label": "Wakeboarding"}, {"segment": [148.46035077134414, 155.69464775890248], "label": "Wakeboarding"}, {"segment": [157.89639032033327, 195.01147921302376], "label": "Wakeboarding"}]}, "gCuRcP4txc0": {"duration": 55.543, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gCuRcP4txc0", "annotations": [{"segment": [0.01, 45.630307332293285], "label": "Drum corps"}]}, "f5xSylPY_bQ": {"duration": 202.41, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f5xSylPY_bQ", "annotations": [{"segment": [22.25662629173167, 193.68000326209048], "label": "Playing flauta"}]}, "Eaqf6cekoZ4": {"duration": 52.36, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Eaqf6cekoZ4", "annotations": []}, "zPl4spxrvg4": {"duration": 208.17000000000002, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zPl4spxrvg4", "annotations": [{"segment": [51.67372608, 160.78148015], "label": "Archery"}]}, "2oc0OBWkYfg": {"duration": 43.03, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2oc0OBWkYfg", "annotations": [{"segment": [0.01, 15.012892355694229], "label": "Removing curlers"}]}, "XXJ-UtODmIs": {"duration": 152.254, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XXJ-UtODmIs", "annotations": []}, "Jgtpkfgjytg": {"duration": 88.747, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jgtpkfgjytg", "annotations": []}, "yyUmEgvq3B0": {"duration": 83.267, "subset": "training", "resolution": "480x352", "url": "https://www.youtube.com/watch?v=yyUmEgvq3B0", "annotations": [{"segment": [31.1113703432491, 42.41278671010786], "label": "Playing ten pins"}, {"segment": [57.35143958502341, 62.287690330733234], "label": "Playing ten pins"}, {"segment": [65.40532238065522, 68.65285576599064], "label": "Playing ten pins"}]}, "_7XW-BFK_ZY": {"duration": 169.065, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_7XW-BFK_ZY", "annotations": [{"segment": [31.641996879875194, 169.021], "label": "Disc dog"}]}, "MbEoZXWVc-I": {"duration": 114.707, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=MbEoZXWVc-I", "annotations": [{"segment": [13.73624354420358, 17.12570690860585], "label": "Beer pong"}, {"segment": [23.54784802010488, 99.0080060802185], "label": "Beer pong"}]}, "UrPn5-8yNgA": {"duration": 191.15, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=UrPn5-8yNgA", "annotations": [{"segment": [1.2721459520438008, 173.36369169970735], "label": "Shaving legs"}]}, "FBUtGL5_tto": {"duration": 152.138, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FBUtGL5_tto", "annotations": [{"segment": [0.2, 146.92352171339562], "label": "Rafting"}]}, "zqqiiZCmPLw": {"duration": 181.77, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=zqqiiZCmPLw", "annotations": [{"segment": [2.9084589847751143, 95.49010732012171], "label": "Ballet"}]}, "KTNJc_NVOmg": {"duration": 114.637, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KTNJc_NVOmg", "annotations": [{"segment": [0.01, 106.40996708268331], "label": "Clipping cat claws"}]}, "vMcXLYBcHsM": {"duration": 135.77, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vMcXLYBcHsM", "annotations": []}, "oYhVH1vIXFM": {"duration": 134.42, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oYhVH1vIXFM", "annotations": [{"segment": [68.15382710546515, 94.36683753064406], "label": "Preparing pasta"}, {"segment": [104.85204170071565, 126.45156229106306], "label": "Preparing pasta"}]}, "7GOPv-XegSc": {"duration": 180.838, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7GOPv-XegSc", "annotations": [{"segment": [91.82909999257114, 103.67801612064483], "label": "Cutting the grass"}]}, "1ioKX0iuico": {"duration": 177.145, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1ioKX0iuico", "annotations": [{"segment": [8.938817243403737, 170.3341209817215], "label": "Wakeboarding"}]}, "BLmAF2wbTz8": {"duration": 169.99, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BLmAF2wbTz8", "annotations": [{"segment": [21.481136438377533, 169.99269841269842], "label": "Mixing drinks"}]}, "EiPofuIoUic": {"duration": 61.278, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EiPofuIoUic", "annotations": [{"segment": [0.2, 60.41842920093458], "label": "River tubing"}]}, "x08PpU3uQ6Y": {"duration": 118.817, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x08PpU3uQ6Y", "annotations": [{"segment": [0.01, 110.07162870514821], "label": "Spread mulch"}]}, "MrUvJIAiyvk": {"duration": 55.496, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=MrUvJIAiyvk", "annotations": []}, "A0b_pqaKCX8": {"duration": 155.11, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=A0b_pqaKCX8", "annotations": []}, "UAFIstY4a8Q": {"duration": 168.949, "subset": "training", "resolution": "853x468", "url": "https://www.youtube.com/watch?v=UAFIstY4a8Q", "annotations": [{"segment": [16.604911867394694, 152.87061719188767], "label": "Playing blackjack"}]}, "08afjR_3SwI": {"duration": 61.184, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=08afjR_3SwI", "annotations": [{"segment": [6.872526926677067, 59.84825531981279], "label": "Canoeing"}]}, "YfWKbMPFmcU": {"duration": 166.093, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YfWKbMPFmcU", "annotations": [{"segment": [64.83541788718469, 69.48497005019607], "label": "Painting fence"}, {"segment": [127.08775395588196, 130.96238072104526], "label": "Painting fence"}, {"segment": [142.32795256552427, 147.75243003675288], "label": "Painting fence"}]}, "Oz4q98BhqKo": {"duration": 203.9, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Oz4q98BhqKo", "annotations": [{"segment": [6.997923242099752, 195.94185077879305], "label": "Doing step aerobics"}]}, "no9vPN7D-1s": {"duration": 149.862, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=no9vPN7D-1s", "annotations": [{"segment": [0.01, 149.721], "label": "Welding"}]}, "LxwKUIJdt_4": {"duration": 86.564, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LxwKUIJdt_4", "annotations": [{"segment": [0.01, 0.02], "label": "Blowing leaves"}]}, "H7k8ATbTjzs": {"duration": 109.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H7k8ATbTjzs", "annotations": [{"segment": [0, 109.598184], "label": "Walking the dog"}]}, "bW5HfDWUP3U": {"duration": 137.463, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=bW5HfDWUP3U", "annotations": [{"segment": [6.431560062402496, 133.99083463338533], "label": "Putting on shoes"}]}, "1JKgr3KfoHo": {"duration": 206.862, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1JKgr3KfoHo", "annotations": [{"segment": [0.01, 196.56144234945398], "label": "Playing ice hockey"}]}, "EOradtej9GU": {"duration": 121.139, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EOradtej9GU", "annotations": [{"segment": [18.142427906396257, 120.5715525274072], "label": "Skiing"}]}, "Pt4BlfBqduE": {"duration": 109.831, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Pt4BlfBqduE", "annotations": [{"segment": [35.98187356065671, 109.830384], "label": "Fixing the roof"}]}, "x44fn0snUvw": {"duration": 100.8, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x44fn0snUvw", "annotations": [{"segment": [1.7269656786271452, 57.76760087000002], "label": "Washing hands"}, {"segment": [62.344718799999995, 97.02407176287052], "label": "Washing hands"}]}, "mISzl7h3qIA": {"duration": 189.452, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mISzl7h3qIA", "annotations": []}, "91RXe65HfKE": {"duration": 114.1, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=91RXe65HfKE", "annotations": []}, "x3z3dVUxmJ0": {"duration": 131.054, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=x3z3dVUxmJ0", "annotations": [{"segment": [0.01, 115.71955915444619], "label": "Braiding hair"}]}, "L-rPS8tlAy4": {"duration": 108.322, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=L-rPS8tlAy4", "annotations": [{"segment": [18.69928330245395, 46.83244312937072], "label": "Bullfighting"}, {"segment": [94.67565978406125, 99.56105873118412], "label": "Bullfighting"}]}, "7p99ez6MEeo": {"duration": 18.25, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=7p99ez6MEeo", "annotations": [{"segment": [0.1993076146645866, 5.55214069422777], "label": "Volleyball"}]}, "fhEvf47Jqi8": {"duration": 196.256, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fhEvf47Jqi8", "annotations": [{"segment": [78.6477285036105, 190.47783130734882], "label": "Trimming branches or hedges"}]}, "rKTpKq7bh_U": {"duration": 150.14, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=rKTpKq7bh_U", "annotations": [{"segment": [0, 150.14022675736962], "label": "Playing flauta"}]}, "EOSvdsejPQQ": {"duration": 143.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EOSvdsejPQQ", "annotations": [{"segment": [20.19045790951638, 129.89194588455538], "label": "Playing harmonica"}]}, "paRAKx8svJ8": {"duration": 103.074, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=paRAKx8svJ8", "annotations": []}, "8UJ5PGzu1PE": {"duration": 59.19, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8UJ5PGzu1PE", "annotations": [{"segment": [31.199106084243372, 53.65509984399376], "label": "Archery"}]}, "lcUtnIptSjM": {"duration": 228.88, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lcUtnIptSjM", "annotations": [{"segment": [36.77767632065827, 128.54333471298034], "label": "Washing hands"}, {"segment": [183.1742505631825, 206.02639878159127], "label": "Washing hands"}]}, "YwUW6Qh_5TA": {"duration": 87.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YwUW6Qh_5TA", "annotations": [{"segment": [15.70400546021841, 49.03215678627146], "label": "Using the pommel horse"}]}, "4LoAbHdHWqM": {"duration": 54.869, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4LoAbHdHWqM", "annotations": []}, "DRK5oUhjwTg": {"duration": 102.424, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DRK5oUhjwTg", "annotations": [{"segment": [68.3352407429081, 92.70655124533614], "label": "Rollerblading"}]}, "i1CVl-0-gJE": {"duration": 72.749, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i1CVl-0-gJE", "annotations": [{"segment": [20.088013838920904, 28.372900902430658], "label": "Raking leaves"}, {"segment": [39.495077560062406, 43.01331722776911], "label": "Raking leaves"}]}, "FB5RmmpV25g": {"duration": 153.345, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FB5RmmpV25g", "annotations": []}, "Fia1JynWFBA": {"duration": 7.64, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Fia1JynWFBA", "annotations": [{"segment": [0.01, 7.639364], "label": "Baking cookies"}]}, "4Ex-sB0vtwk": {"duration": 9.196, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=4Ex-sB0vtwk", "annotations": [{"segment": [4.590400125495946, 7.7364688968800825], "label": "Shuffleboard"}]}, "MiTIi_QYt2k": {"duration": 113.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MiTIi_QYt2k", "annotations": [{"segment": [66.37509259999999, 106.19606703042122], "label": "Polishing forniture"}, {"segment": [36.27854604, 57.38655079329173], "label": "Polishing forniture"}]}, "6PnPu_cLCvE": {"duration": 128.92, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6PnPu_cLCvE", "annotations": [{"segment": [4.020624024960998, 18.293839313572544], "label": "Smoking a cigarette"}, {"segment": [82.63664742589704, 96.10782839313572], "label": "Smoking a cigarette"}]}, "00KMCm2oGhk": {"duration": 31.556, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=00KMCm2oGhk", "annotations": [{"segment": [3.1619197010860263, 30.607774841272946], "label": "Drum corps"}]}, "ApIHaCeRF5s": {"duration": 236.031, "subset": "testing", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=ApIHaCeRF5s", "annotations": []}, "DPw74KWZzGY": {"duration": 128.66, "subset": "training", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=DPw74KWZzGY", "annotations": [{"segment": [38.47221222, 95.86402449], "label": "Playing accordion"}, {"segment": [2.4086446559903214, 11.441061964118564], "label": "Playing accordion"}, {"segment": [42.15128092043682, 47.57073132449298], "label": "Playing accordion"}, {"segment": [52.3880205725429, 68.24493101404056], "label": "Playing accordion"}, {"segment": [74.26654257410296, 87.31336762090484], "label": "Playing accordion"}, {"segment": [91.52849571294851, 97.95154804368174], "label": "Playing accordion"}, {"segment": [104.97676153042121, 125.65096121996879], "label": "Playing accordion"}]}, "2DMOP9Fy91U": {"duration": 132.619, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2DMOP9Fy91U", "annotations": [{"segment": [9.763880063957144, 105.60011682408175], "label": "Hula hoop"}]}, "XkzEXA4b20k": {"duration": 232.99, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=XkzEXA4b20k", "annotations": [{"segment": [20.12325993, 213.13617333697348], "label": "Getting a haircut"}]}, "uo6WV2gLQgk": {"duration": 85.68, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uo6WV2gLQgk", "annotations": []}, "Z6xXj6tbMCo": {"duration": 162.842, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z6xXj6tbMCo", "annotations": [{"segment": [0.01, 162.841541], "label": "Capoeira"}]}, "k_ZCg9tqGQo": {"duration": 23.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k_ZCg9tqGQo", "annotations": [{"segment": [3.4617074882995325, 5.879407956318253], "label": "Getting a tattoo"}]}, "penDFcEdu7U": {"duration": 53.546, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=penDFcEdu7U", "annotations": []}, "vSiu8qzHV6c": {"duration": 212.28, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vSiu8qzHV6c", "annotations": []}, "U7iNqQs1kRk": {"duration": 69.753, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=U7iNqQs1kRk", "annotations": [{"segment": [0.01, 69.752743], "label": "Playing congas"}]}, "NtlJJ5V9_iY": {"duration": 198.229, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NtlJJ5V9_iY", "annotations": []}, "5eM2Hcvj6R8": {"duration": 189.406, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5eM2Hcvj6R8", "annotations": [{"segment": [28.147208677061094, 147.74011889454363], "label": "Hula hoop"}]}, "tRgK4fFA8sE": {"duration": 161.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tRgK4fFA8sE", "annotations": [{"segment": [10.577381760000002, 154.8825165803432], "label": "Fixing bicycle"}]}, "bjzG4cFArvI": {"duration": 167.25, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=bjzG4cFArvI", "annotations": []}, "inEuUt5xEG0": {"duration": 225.327, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=inEuUt5xEG0", "annotations": []}, "owdkZsM5IFg": {"duration": 130.06, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=owdkZsM5IFg", "annotations": []}, "_TeU8A-CGbU": {"duration": 64.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_TeU8A-CGbU", "annotations": [{"segment": [3.01026244773791, 60.60661728112324], "label": "Installing carpet"}]}, "0Q9RHSEFeMI": {"duration": 166.57999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0Q9RHSEFeMI", "annotations": [{"segment": [11.667193160000002, 158.71702028081123], "label": "Doing step aerobics"}]}, "EDEFxLnf_Wk": {"duration": 32.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EDEFxLnf_Wk", "annotations": [{"segment": [1.651703034321373, 31.58564417940718], "label": "Spinning"}]}, "73ZdOEJcsUw": {"duration": 103.747, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=73ZdOEJcsUw", "annotations": [{"segment": [8.736037441497661, 71.18252730109204], "label": "Knitting"}]}, "5vk8OuKkttU": {"duration": 144.73, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5vk8OuKkttU", "annotations": [{"segment": [15.124753510140405, 20.542575663026522], "label": "Installing carpet"}, {"segment": [25.734655226209046, 33.63564586583463], "label": "Installing carpet"}, {"segment": [39.50495319812792, 50.79208268330733], "label": "Installing carpet"}, {"segment": [54.62970670826833, 69.30297503900157], "label": "Installing carpet"}, {"segment": [71.78614352574102, 102.48713572542901], "label": "Installing carpet"}, {"segment": [109.03367082683307, 123.7069391575663], "label": "Installing carpet"}]}, "RIr3Y2XS5NA": {"duration": 153.368, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RIr3Y2XS5NA", "annotations": [{"segment": [28.233036461594516, 129.20203112012481], "label": "Rafting"}]}, "yUHN5TIprwk": {"duration": 56.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yUHN5TIprwk", "annotations": [{"segment": [4.123755070202808, 50.888892355694225], "label": "Spinning"}]}, "GAS3NRYiLiI": {"duration": 186.689, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GAS3NRYiLiI", "annotations": []}, "FWmOjpmojyM": {"duration": 77.78999999999999, "subset": "testing", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=FWmOjpmojyM", "annotations": []}, "-YreL-4QCLg": {"duration": 41.75, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-YreL-4QCLg", "annotations": [{"segment": [20.081376755070206, 38.43777301092044], "label": "Playing lacrosse"}]}, "aSc-UUhmYf8": {"duration": 194.56, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=aSc-UUhmYf8", "annotations": []}, "ir1jpnsK5p8": {"duration": 147.006, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ir1jpnsK5p8", "annotations": [{"segment": [16.479364485981307, 116.9736308411215], "label": "Fixing the roof"}]}, "1G5cu1NvDFw": {"duration": 43.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1G5cu1NvDFw", "annotations": [{"segment": [18.394115634945397, 32.07179136349454], "label": "Powerbocking"}, {"segment": [38.20316324180967, 42.043692879875195], "label": "Powerbocking"}]}, "LdPxd-qdtis": {"duration": 160.27, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=LdPxd-qdtis", "annotations": []}, "EM4FVYEFQRc": {"duration": 50.806, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=EM4FVYEFQRc", "annotations": []}, "iSH43hQoxio": {"duration": 184.924, "subset": "validation", "resolution": "426x222", "url": "https://www.youtube.com/watch?v=iSH43hQoxio", "annotations": [{"segment": [34.90759746852459, 37.79252287914645], "label": "Hand car wash"}, {"segment": [44.13935878251456, 45.58182148782549], "label": "Hand car wash"}, {"segment": [51.35167230906923, 54.52509026075328], "label": "Hand car wash"}, {"segment": [75.5850457582929, 77.60449354572822], "label": "Hand car wash"}, {"segment": [96.64500125583255, 99.52992666645441], "label": "Hand car wash"}, {"segment": [126.07124044417557, 127.51370314948652], "label": "Hand car wash"}, {"segment": [113.0890749828393, 116.83947797971919], "label": "Hand car wash"}, {"segment": [128.37917950858036, 130.97561235257413], "label": "Hand car wash"}, {"segment": [133.8605377347894, 143.09229895787834], "label": "Hand car wash"}, {"segment": [173.38401547113887, 183.19276177067084], "label": "Hand car wash"}]}, "JU_o9ZtH-VM": {"duration": 220.57, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JU_o9ZtH-VM", "annotations": [{"segment": [41.454694425117005, 176.99950441263653], "label": "Getting a haircut"}]}, "33eH3ozXLmU": {"duration": 178.864, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=33eH3ozXLmU", "annotations": [{"segment": [0.20000000000000004, 18.13317472698908], "label": "Scuba diving"}, {"segment": [31.523826833073322, 48.26214196567863], "label": "Scuba diving"}, {"segment": [53.56260842433697, 86.48129485179406], "label": "Scuba diving"}, {"segment": [94.57148049921997, 163.47754446177848], "label": "Scuba diving"}]}, "uRz8FNXP3eU": {"duration": 69.591, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uRz8FNXP3eU", "annotations": []}, "NQyAC1aweec": {"duration": 77.416, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NQyAC1aweec", "annotations": [{"segment": [0.01, 77.381], "label": "Clipping cat claws"}]}, "LNKdVrX_0Fg": {"duration": 226.72, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LNKdVrX_0Fg", "annotations": [{"segment": [60.482034407176286, 170.83521999219968], "label": "Wrapping presents"}]}, "UaCSf-kW2Ho": {"duration": 95.133, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UaCSf-kW2Ho", "annotations": [{"segment": [0.01, 95.13215419501134], "label": "Playing congas"}]}, "TX8FGTL1flw": {"duration": 116.681, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TX8FGTL1flw", "annotations": [{"segment": [4.173630603582802, 9.436037899316025], "label": "Ice fishing"}, {"segment": [12.520897348538949, 14.879907515591773], "label": "Ice fishing"}, {"segment": [19.416465529154898, 21.231088734580144], "label": "Ice fishing"}, {"segment": [27.58226995356852, 29.941280120621343], "label": "Ice fishing"}, {"segment": [49.902135380299086, 55.890391958202414], "label": "Ice fishing"}, {"segment": [84.01705084385748, 109.2403131595651], "label": "Ice fishing"}]}, "RAaRFD5M8h4": {"duration": 106.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RAaRFD5M8h4", "annotations": [{"segment": [0.2, 106.28832917316691], "label": "Grooming dog"}]}, "UBgNp8hNG2s": {"duration": 197.509, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UBgNp8hNG2s", "annotations": []}, "W4aPcuQSxFI": {"duration": 111.526, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W4aPcuQSxFI", "annotations": [{"segment": [72.30636848926247, 106.86200556091356], "label": "Mooping floor"}]}, "IaT8-cA_AVU": {"duration": 182.602, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IaT8-cA_AVU", "annotations": [{"segment": [29.93853134122613, 67.47803671346848], "label": "Arm wrestling"}, {"segment": [77.43169344095698, 116.6775399664831], "label": "Arm wrestling"}, {"segment": [119.80583208083661, 174.12435879370244], "label": "Arm wrestling"}]}, "5QDwobEnN3g": {"duration": 84.359, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5QDwobEnN3g", "annotations": [{"segment": [0.2, 10.101978298803969], "label": "Building sandcastles"}, {"segment": [22.040682290509515, 28.8627988571984], "label": "Building sandcastles"}]}, "E15z95ZcEYU": {"duration": 56.5, "subset": "validation", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=E15z95ZcEYU", "annotations": [{"segment": [0.01, 56.49414965986394], "label": "Getting a tattoo"}]}, "jikOPvJPU-c": {"duration": 210.19, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jikOPvJPU-c", "annotations": [{"segment": [20.658007577445954, 67.22050084724478], "label": "Making a sandwich"}]}, "kTSuw4T-_Nk": {"duration": 118.47, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=kTSuw4T-_Nk", "annotations": [{"segment": [71.1264976599064, 97.54491107644307], "label": "Shaving legs"}]}, "ZoCRdAYWtKg": {"duration": 148.445, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ZoCRdAYWtKg", "annotations": [{"segment": [14.313527650270839, 120.04896760541308], "label": "Riding bumper cars"}]}, "RJpWgi0EaUE": {"duration": 210.03, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RJpWgi0EaUE", "annotations": [{"segment": [51.97463928, 160.95819312], "label": "Breakdancing"}]}, "QsxIJ-L-JJU": {"duration": 147.099, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QsxIJ-L-JJU", "annotations": []}, "9fw8ODTEso4": {"duration": 153.09, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=9fw8ODTEso4", "annotations": [{"segment": [0, 152.61130014851793], "label": "Beach soccer"}]}, "CFbkVO5b2g0": {"duration": 110.18, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CFbkVO5b2g0", "annotations": []}, "XN7mRI1qGlQ": {"duration": 62.067, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=XN7mRI1qGlQ", "annotations": [{"segment": [2.224684867394696, 62.021], "label": "Blow-drying hair"}]}, "KanNWuKvkxs": {"duration": 126.665, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KanNWuKvkxs", "annotations": [{"segment": [30.017035881435255, 71.29947113884556], "label": "Putting in contact lenses"}]}, "Dty6AUdBdrE": {"duration": 206.263, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Dty6AUdBdrE", "annotations": [{"segment": [0.01, 206.26285714285714], "label": "Raking leaves"}]}, "FwV1XbjLJHY": {"duration": 113.36, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=FwV1XbjLJHY", "annotations": [{"segment": [7.250784028640057, 103.27945885803433], "label": "Mixing drinks"}]}, "GPWXB0wy5dY": {"duration": 23.41, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GPWXB0wy5dY", "annotations": [{"segment": [0, 23.369198627145085], "label": "Windsurfing"}]}, "0AX436vpIBY": {"duration": 83.499, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=0AX436vpIBY", "annotations": []}, "OqjwVXLOSSw": {"duration": 197.39, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OqjwVXLOSSw", "annotations": []}, "A1k7CSI7aRs": {"duration": 15.419, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=A1k7CSI7aRs", "annotations": [{"segment": [0.01, 14.539075206738435], "label": "Doing a powerbomb"}]}, "RK0vNzp_CwI": {"duration": 194.188, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RK0vNzp_CwI", "annotations": []}, "CU15ndFYics": {"duration": 142.269, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CU15ndFYics", "annotations": []}, "ZwIu2pPxB3Y": {"duration": 39.103, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZwIu2pPxB3Y", "annotations": [{"segment": [4.401861013196533, 16.278579795854313], "label": "Elliptical trainer"}, {"segment": [27.85076744382939, 31.32242373822191], "label": "Elliptical trainer"}]}, "l88zD-xVQn0": {"duration": 7.849, "subset": "testing", "resolution": "426x224", "url": "https://www.youtube.com/watch?v=l88zD-xVQn0", "annotations": []}, "4EloxAiCydc": {"duration": 195.466, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4EloxAiCydc", "annotations": [{"segment": [0.01, 181.09215912636503], "label": "Waxing skis"}]}, "_7a80bvsbk8": {"duration": 53.151, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_7a80bvsbk8", "annotations": [{"segment": [26.664714508580342, 51.444182527301095], "label": "Bullfighting"}]}, "hr8zkCXbTTk": {"duration": 180.094, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=hr8zkCXbTTk", "annotations": [{"segment": [22.995890965732084, 139.93840965732088], "label": "Curling"}]}, "L9cxYlPkUYs": {"duration": 182.649, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L9cxYlPkUYs", "annotations": [{"segment": [72.09046069597886, 81.20862173262441], "label": "Running a marathon"}, {"segment": [86.05264478334234, 93.46115062561685], "label": "Running a marathon"}, {"segment": [120.53069120315833, 148.4550591622465], "label": "Running a marathon"}]}, "37pwbUp8t1I": {"duration": 200.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=37pwbUp8t1I", "annotations": [{"segment": [17.784118535101403, 190.63327061310454], "label": "Breakdancing"}]}, "IytPbGtfo14": {"duration": 81.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IytPbGtfo14", "annotations": [{"segment": [10.284994716068642, 79.66079758346334], "label": "Mixing drinks"}]}, "g6tF83rCXh0": {"duration": 41.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g6tF83rCXh0", "annotations": [{"segment": [15.02044831825273, 33.12201424024961], "label": "Clean and jerk"}]}, "16Mq9vM7Nck": {"duration": 77.462, "subset": "training", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=16Mq9vM7Nck", "annotations": [{"segment": [11.926463709122304, 20.359318136533496], "label": "Croquet"}, {"segment": [38.42972048098605, 50.47665447593606], "label": "Croquet"}, {"segment": [67.58330172321443, 69.99268865945083], "label": "Croquet"}]}, "immCYvN8pwQ": {"duration": 198.717, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=immCYvN8pwQ", "annotations": [{"segment": [28.600222158280822, 174.69659267541476], "label": "Longboarding"}]}, "Xa1yH3vxlAk": {"duration": 137.57999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Xa1yH3vxlAk", "annotations": [{"segment": [18.666558311312045, 126.8038600577223], "label": "Playing bagpipes"}]}, "LQLDFUFlXAc": {"duration": 32.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LQLDFUFlXAc", "annotations": []}, "G_rVqf_hwXw": {"duration": 17.787, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G_rVqf_hwXw", "annotations": []}, "zUnv3MIOxGM": {"duration": 198.183, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zUnv3MIOxGM", "annotations": []}, "dygVFom6V5c": {"duration": 201.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dygVFom6V5c", "annotations": [{"segment": [91.42767382, 174.2963728549142], "label": "Paintball"}, {"segment": [28.092290730000006, 89.90354914196567], "label": "Paintball"}]}, "8PkoDfbK5Bc": {"duration": 157.641, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=8PkoDfbK5Bc", "annotations": [{"segment": [13.238842796838798, 148.8143979020218], "label": "Capoeira"}]}, "CvhSPJ0sBCk": {"duration": 60.09, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=CvhSPJ0sBCk", "annotations": [{"segment": [0, 60.09324263038549], "label": "Windsurfing"}]}, "atw5LkvnAyo": {"duration": 146.588, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=atw5LkvnAyo", "annotations": [{"segment": [0.2, 141.28289859594383], "label": "Hanging wallpaper"}]}, "hEoASC7qC28": {"duration": 118.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hEoASC7qC28", "annotations": [{"segment": [12.348591263650546, 44.04945241809673], "label": "Wakeboarding"}, {"segment": [56.02942901716069, 105.44164118564743], "label": "Wakeboarding"}]}, "YC7ye0ZqmRI": {"duration": 142.385, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YC7ye0ZqmRI", "annotations": []}, "YIEv6_HQtAc": {"duration": 169.92000000000002, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YIEv6_HQtAc", "annotations": [{"segment": [71.83978661459383, 82.17835369197081], "label": "Layup drill in basketball"}, {"segment": [85.35945125424064, 89.86600613412293], "label": "Layup drill in basketball"}, {"segment": [89.60091460842433, 117.43551825897036], "label": "Layup drill in basketball"}]}, "s6pFNNnub80": {"duration": 210.47, "subset": "training", "resolution": "1916x1080", "url": "https://www.youtube.com/watch?v=s6pFNNnub80", "annotations": [{"segment": [38.566946177847115, 167.5610725429017], "label": "Doing nails"}]}, "2bEr09bbqAA": {"duration": 102.05, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=2bEr09bbqAA", "annotations": [{"segment": [71.00633151856687, 99.34518131745678], "label": "Kayaking"}]}, "HW9c7WFW6q8": {"duration": 225.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HW9c7WFW6q8", "annotations": [{"segment": [39.69342379563182, 218.4894654945398], "label": "Paintball"}]}, "RrScnC2xQpw": {"duration": 173.732, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RrScnC2xQpw", "annotations": [{"segment": [24.100882069196263, 72.1945721486495], "label": "Baking cookies"}]}, "vJkL_4niC4w": {"duration": 169.09, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=vJkL_4niC4w", "annotations": [{"segment": [0.01, 165.13032329728563], "label": "Playing piano"}]}, "aMcqZWGHCbM": {"duration": 101.17, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aMcqZWGHCbM", "annotations": []}, "IRDoo2NEeF4": {"duration": 48.786, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IRDoo2NEeF4", "annotations": []}, "0EDEA8dZeGo": {"duration": 134.212, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0EDEA8dZeGo", "annotations": [{"segment": [18.244491983077488, 25.979396335264884], "label": "Waterskiing"}, {"segment": [69.67115335167476, 82.21424149035703], "label": "Waterskiing"}, {"segment": [103.1193883881608, 121.72496814414457], "label": "Waterskiing"}]}, "Fb-t6zr7K5c": {"duration": 80.085, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Fb-t6zr7K5c", "annotations": [{"segment": [11.743547581903275, 67.35481903276131], "label": "Baton twirling"}]}, "Av0VsTxJd78": {"duration": 202.664, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Av0VsTxJd78", "annotations": []}, "zh2Thxc6NVI": {"duration": 102.935, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zh2Thxc6NVI", "annotations": [{"segment": [58.59417316692667, 78.01854290171607], "label": "Playing ice hockey"}]}, "zacXKdNZHrI": {"duration": 157.34, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zacXKdNZHrI", "annotations": [{"segment": [21.35482664204528, 130.3380798497246], "label": "Playing accordion"}]}, "0_xap_BBDrw": {"duration": 48.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0_xap_BBDrw", "annotations": [{"segment": [1.1228003120124805, 47.382173166926684], "label": "Layup drill in basketball"}]}, "kqzIDPXbATw": {"duration": 156.34, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kqzIDPXbATw", "annotations": [{"segment": [36.79617940717628, 156.221], "label": "Cutting the grass"}]}, "dFGqzBVhCBI": {"duration": 88.26, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=dFGqzBVhCBI", "annotations": []}, "7IIi_92vRLo": {"duration": 178.54, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=7IIi_92vRLo", "annotations": [{"segment": [94.97870659282371, 140.65761533541342], "label": "Getting a haircut"}]}, "MD6p1s-N9rM": {"duration": 69.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MD6p1s-N9rM", "annotations": [{"segment": [1.2009896667975564, 36.79395546645866], "label": "Ironing clothes"}]}, "DTWZhe352y8": {"duration": 238.96, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DTWZhe352y8", "annotations": [{"segment": [77.16693644929796, 197.5771802808112], "label": "Wrapping presents"}]}, "lJeJ2LFna5o": {"duration": 35.481, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lJeJ2LFna5o", "annotations": [{"segment": [2.7122064914196566, 34.15166133073323], "label": "Playing beach volleyball"}]}, "x2VS9ipZN1A": {"duration": 34.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x2VS9ipZN1A", "annotations": []}, "V6B8zFv1DdA": {"duration": 126.271, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=V6B8zFv1DdA", "annotations": [{"segment": [0.2, 74.14927218654856], "label": "Beach soccer"}]}, "eBEnQDNyzFk": {"duration": 196.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eBEnQDNyzFk", "annotations": [{"segment": [58.54723139, 177.12713589], "label": "Preparing pasta"}]}, "5QZpCDyXNx4": {"duration": 88.469, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5QZpCDyXNx4", "annotations": [{"segment": [9.793901716068643, 14.345996879875196], "label": "BMX"}, {"segment": [16.966900156006243, 20.691341653666147], "label": "BMX"}, {"segment": [22.898418096723873, 27.58845553822153], "label": "BMX"}, {"segment": [28.554051482059283, 31.17495475819033], "label": "BMX"}, {"segment": [33.79585803432138, 52.69395007800312], "label": "BMX"}, {"segment": [57.52192979719189, 60.970486739469585], "label": "BMX"}, {"segment": [62.625794071762876, 63.59139001560063], "label": "BMX"}, {"segment": [65.24669734789393, 68.14348517940718], "label": "BMX"}, {"segment": [71.31615756630265, 71.72998439937598], "label": "BMX"}, {"segment": [76.42002184087364, 81.2480015600624], "label": "BMX"}, {"segment": [86.21392355694229, 86.90363494539783], "label": "BMX"}]}, "31KEa5VhvPs": {"duration": 212.672, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=31KEa5VhvPs", "annotations": [{"segment": [0.01, 0.02], "label": "Curling"}]}, "a1WhnMcTbrY": {"duration": 57.54, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a1WhnMcTbrY", "annotations": [{"segment": [40.680073, 49.68858496723869], "label": "Shoveling snow"}, {"segment": [8.789677485179407, 38.07298091], "label": "Shoveling snow"}]}, "8Kj5Whf2JyA": {"duration": 60.19, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8Kj5Whf2JyA", "annotations": [{"segment": [0.01, 60.186122], "label": "Dodgeball"}]}, "VCdZ9gfzRWo": {"duration": 39.01, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=VCdZ9gfzRWo", "annotations": [{"segment": [0.8520020624024962, 33.77579604524181], "label": "Decorating the Christmas tree"}]}, "NgBfcoVZ254": {"duration": 111.572, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NgBfcoVZ254", "annotations": [{"segment": [10.791664101938228, 100.4320988096724], "label": "Longboarding"}]}, "86iCOCtA4Ww": {"duration": 129.057, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=86iCOCtA4Ww", "annotations": [{"segment": [31.811120521011315, 47.515344575687784], "label": "Rope skipping"}, {"segment": [67.44762895277717, 93.82267191640045], "label": "Rope skipping"}]}, "-c71seS8bWk": {"duration": 213.37, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-c71seS8bWk", "annotations": [{"segment": [196.5089956493727, 201.16188391728548], "label": "Drinking coffee"}, {"segment": [207.80886715716088, 210.13531129111723], "label": "Drinking coffee"}]}, "dG8fhIBgx9k": {"duration": 81.55, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=dG8fhIBgx9k", "annotations": [{"segment": [20.03389716, 74.2861506], "label": "Getting a haircut"}]}, "YApgx2YL9wM": {"duration": 48.995, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YApgx2YL9wM", "annotations": [{"segment": [0.01, 48.961], "label": "Slacklining"}]}, "bz9R-Xa5xqM": {"duration": 124.76, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=bz9R-Xa5xqM", "annotations": [{"segment": [28.41666019293833, 35.61814257060078], "label": "Playing accordion"}, {"segment": [72.20945843548029, 75.5182476360279], "label": "Playing accordion"}, {"segment": [79.99484458034321, 81.74655650546022], "label": "Playing accordion"}, {"segment": [97.31732917316693, 105.68661948205929], "label": "Playing accordion"}]}, "uyBGDfYo0qE": {"duration": 133.66, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uyBGDfYo0qE", "annotations": [{"segment": [0.01, 128.54559648508388], "label": "Vacuuming floor"}]}, "PI1ZaFQ28Ao": {"duration": 68.17, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=PI1ZaFQ28Ao", "annotations": [{"segment": [15.88408482, 55.53296968000001], "label": "Washing dishes"}]}, "OqLUp37WKMA": {"duration": 69.126, "subset": "validation", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=OqLUp37WKMA", "annotations": [{"segment": [0.01, 69.12580498866213], "label": "Futsal"}]}, "yE6IDGXoopw": {"duration": 228.53, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yE6IDGXoopw", "annotations": []}, "jLcYOkRvdic": {"duration": 111.34, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jLcYOkRvdic", "annotations": [{"segment": [0.914429861435079, 29.703200677634456], "label": "Shoveling snow"}, {"segment": [56.93113452187122, 111.339704], "label": "Shoveling snow"}]}, "D5jLypnn6Ps": {"duration": 236.844, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D5jLypnn6Ps", "annotations": [{"segment": [0.01, 2.5864348859668675], "label": "Hurling"}, {"segment": [8.498286053891135, 16.627081409787003], "label": "Hurling"}, {"segment": [33.62365351756928, 48.77277213537521], "label": "Hurling"}, {"segment": [73.52864890105808, 113.4336442845469], "label": "Hurling"}, {"segment": [126.7353094123765, 161.0979443259363], "label": "Hurling"}, {"segment": [169.59623037982743, 188.070764950078], "label": "Hurling"}, {"segment": [210.97918818564742, 222.43339980343214], "label": "Hurling"}]}, "Fe-DUewB8ok": {"duration": 20.48, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Fe-DUewB8ok", "annotations": [{"segment": [3.067207488299532, 19.201996879875196], "label": "Doing karate"}]}, "_CIoHeD7KQc": {"duration": 122.787, "subset": "testing", "resolution": "2276x720", "url": "https://www.youtube.com/watch?v=_CIoHeD7KQc", "annotations": []}, "s0ekpZ8H3k8": {"duration": 183.37, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s0ekpZ8H3k8", "annotations": []}, "7qBA7XPDsC4": {"duration": 32.23, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7qBA7XPDsC4", "annotations": [{"segment": [5.0204368174726985, 25.403410296411856], "label": "Powerbocking"}]}, "E5uAH7_WTFs": {"duration": 9.43, "subset": "training", "resolution": "384x226", "url": "https://www.youtube.com/watch?v=E5uAH7_WTFs", "annotations": [{"segment": [3.7482676771339944, 9.283903722305334], "label": "Playing water polo"}]}, "LFOlEafI35c": {"duration": 16.069, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LFOlEafI35c", "annotations": [{"segment": [2.230917444773791, 9.049002219812794], "label": "Arm wrestling"}]}, "-fjUWhSM6Hc": {"duration": 198.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-fjUWhSM6Hc", "annotations": [{"segment": [157.4063694, 169.5282687], "label": "Javelin throw"}, {"segment": [31.275609749999997, 148.40509634999998], "label": "Javelin throw"}]}, "Lm4oeMdqOgw": {"duration": 81.36, "subset": "validation", "resolution": "318x240", "url": "https://www.youtube.com/watch?v=Lm4oeMdqOgw", "annotations": [{"segment": [23.122550702028082, 31.611377535101404], "label": "Javelin throw"}, {"segment": [45.54825741029641, 51.12300936037442], "label": "Javelin throw"}]}, "cukeWRPJzf8": {"duration": 74.142, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=cukeWRPJzf8", "annotations": [{"segment": [4.857933210608424, 47.538346418096715], "label": "Swimming"}]}, "7O9kkDxEvaY": {"duration": 82.39, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=7O9kkDxEvaY", "annotations": [{"segment": [3.853510109204368, 71.77870768], "label": "Brushing hair"}]}, "sBwUwgTjhAU": {"duration": 65.46, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sBwUwgTjhAU", "annotations": []}, "YufXlj-WpEw": {"duration": 144.73, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YufXlj-WpEw", "annotations": [{"segment": [15.80514572963871, 139.08528230140405], "label": "Triple jump"}]}, "U7QjLGMeGOo": {"duration": 73.259, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U7QjLGMeGOo", "annotations": [{"segment": [0.01, 5.600114369734789], "label": "Playing ten pins"}, {"segment": [10.057348255850235, 12.00024507800312], "label": "Playing ten pins"}, {"segment": [22.74332162402496, 25.029082591263652], "label": "Playing ten pins"}, {"segment": [53.258230536661465, 56.6868719875195], "label": "Playing ten pins"}, {"segment": [68.45854096879876, 72.11575851638065], "label": "Playing ten pins"}]}, "cvatsuUhqhk": {"duration": 212.301, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cvatsuUhqhk", "annotations": [{"segment": [8.609650546021841, 195.72241965678626], "label": "Table soccer"}]}, "7J6cZ_Gz8q4": {"duration": 184.2, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=7J6cZ_Gz8q4", "annotations": [{"segment": [1.436845773150654, 101.15394242980604], "label": "Playing saxophone"}, {"segment": [117.82135268330734, 179.60572055382215], "label": "Playing saxophone"}]}, "IZCzbslH8jo": {"duration": 163.17000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IZCzbslH8jo", "annotations": [{"segment": [65.49809572, 149.97149735335412], "label": "Playing field hockey"}, {"segment": [28.15755662, 52.404024220000004], "label": "Playing field hockey"}]}, "AE2gHOwcV14": {"duration": 66.9, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AE2gHOwcV14", "annotations": []}, "jnnHUCs-t3o": {"duration": 26.52, "subset": "training", "resolution": "270x360", "url": "https://www.youtube.com/watch?v=jnnHUCs-t3o", "annotations": [{"segment": [6.304204260000001, 24.76342109516381], "label": "Playing water polo"}, {"segment": [1.7881197, 4.97052672], "label": "Playing water polo"}]}, "zdDWwJTRiv0": {"duration": 166.162, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zdDWwJTRiv0", "annotations": []}, "yw9W5dZ6T18": {"duration": 100.64, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=yw9W5dZ6T18", "annotations": [{"segment": [7.3788740312012475, 75.5157108299532], "label": "Preparing salad"}]}, "RI-l0tK8Ok0": {"duration": 74.68, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RI-l0tK8Ok0", "annotations": [{"segment": [8.504371652106084, 67.45248200780031], "label": "Playing piano"}]}, "5Eb_NVjFah0": {"duration": 110.898, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=5Eb_NVjFah0", "annotations": [{"segment": [11.297137383205115, 103.19433841747302], "label": "Playing ten pins"}]}, "Y-2nhi8JdO8": {"duration": 187.08, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y-2nhi8JdO8", "annotations": [{"segment": [8.609912091721764, 154.24876408389667], "label": "Playing guitarra"}]}, "RTbvrcdPcrs": {"duration": 129.82, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RTbvrcdPcrs", "annotations": [{"segment": [17.417719054340402, 129.8227664399093], "label": "Doing step aerobics"}]}, "rRyjcL-OpIM": {"duration": 31.046, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rRyjcL-OpIM", "annotations": []}, "MpGK4SjpuGY": {"duration": 26.332, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MpGK4SjpuGY", "annotations": []}, "DgIDfZB16Do": {"duration": 96.085, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DgIDfZB16Do", "annotations": []}, "ctHNLEOtgPY": {"duration": 237.01, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ctHNLEOtgPY", "annotations": [{"segment": [20.45598786, 132.48872406], "label": "Breakdancing"}, {"segment": [157.45967622, 208.84433118564743], "label": "Breakdancing"}]}, "u1ZFiwJJXSQ": {"duration": 108.53, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=u1ZFiwJJXSQ", "annotations": [{"segment": [9.7904913, 94.43634916380655], "label": "Doing step aerobics"}]}, "TEgPIueNqTI": {"duration": 147.45, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TEgPIueNqTI", "annotations": []}, "ez9pf35BMtc": {"duration": 127.85, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ez9pf35BMtc", "annotations": [{"segment": [6.3820780031201245, 44.27566614664587], "label": "Using parallel bars"}, {"segment": [86.57047425897035, 98.93768486739471], "label": "Using parallel bars"}]}, "JxEnKQ7iXUI": {"duration": 129.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JxEnKQ7iXUI", "annotations": [{"segment": [3.6369765899999997, 127.58198582371297], "label": "Ballet"}]}, "CG-7jcSB5_c": {"duration": 36.92, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CG-7jcSB5_c", "annotations": [{"segment": [4.03229318, 17.09977323], "label": "Playing lacrosse"}, {"segment": [18.91877236, 28.287337799999996], "label": "Playing lacrosse"}]}, "w-Rccu4-xyA": {"duration": 223.21, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=w-Rccu4-xyA", "annotations": []}, "IoOkg-_S4cY": {"duration": 78.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IoOkg-_S4cY", "annotations": [{"segment": [4.254656786271451, 74.6579937597504], "label": "Using the balance beam"}]}, "i_tMiGS11fs": {"duration": 156.248, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i_tMiGS11fs", "annotations": [{"segment": [0.01, 132.79039781591266], "label": "Doing fencing"}]}, "EJxRNuLtEP4": {"duration": 16.371, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EJxRNuLtEP4", "annotations": []}, "x2CZHFxoZaM": {"duration": 167.904, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x2CZHFxoZaM", "annotations": [{"segment": [39.03947229460357, 42.43937760044346], "label": "Running a marathon"}, {"segment": [47.40846997051714, 50.80837527635702], "label": "Running a marathon"}, {"segment": [68.59249533767334, 70.42321357927943], "label": "Running a marathon"}, {"segment": [90.82264541431874, 95.26867542964783], "label": "Running a marathon"}, {"segment": [152.28247039804566, 160.38993680614536], "label": "Running a marathon"}]}, "fQ1OuD7x3to": {"duration": 83.17, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fQ1OuD7x3to", "annotations": []}, "aEWVDbV76_Q": {"duration": 157.687, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aEWVDbV76_Q", "annotations": [{"segment": [29.507831513260534, 125.91605616224649], "label": "Wakeboarding"}]}, "5P9Tbut04xE": {"duration": 141.74, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5P9Tbut04xE", "annotations": []}, "Gba1MksaNBQ": {"duration": 192.958, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Gba1MksaNBQ", "annotations": [{"segment": [0, 192.957822], "label": "Playing pool"}]}, "2Dlfwnvz76Q": {"duration": 77.53, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=2Dlfwnvz76Q", "annotations": [{"segment": [5.862623479719189, 68.59873865444618], "label": "Playing harmonica"}]}, "QHmZWkRK528": {"duration": 202.06, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QHmZWkRK528", "annotations": [{"segment": [2.679421574141877, 188.66280142634275], "label": "Cheerleading"}]}, "vdq_xoRyxCU": {"duration": 70.752, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vdq_xoRyxCU", "annotations": [{"segment": [22.678106846563903, 57.732709030651264], "label": "Snow tubing"}]}, "lxlXHOHSOlk": {"duration": 103.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lxlXHOHSOlk", "annotations": [{"segment": [16.31772323868955, 94.19042225897036], "label": "Sailing"}]}, "bFRqNqyVznA": {"duration": 151.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bFRqNqyVznA", "annotations": [{"segment": [41.253642084243374, 75.15749736037442], "label": "Polishing forniture"}, {"segment": [90.80543056474261, 104.31955469578783], "label": "Polishing forniture"}, {"segment": [114.75151016536662, 124.70928584087363], "label": "Polishing forniture"}]}, "b1wnLw3H1vo": {"duration": 53.57, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=b1wnLw3H1vo", "annotations": [{"segment": [0.08357009984399377, 53.15058350078003], "label": "Doing kickboxing"}]}, "Lp3c3nwHrqM": {"duration": 215.62, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lp3c3nwHrqM", "annotations": [{"segment": [38.8827546, 194.44180360000001], "label": "Washing dishes"}]}, "m22TDwfa8go": {"duration": 183.86, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=m22TDwfa8go", "annotations": [{"segment": [89.565620124805, 147.17422698907956], "label": "Mixing drinks"}]}, "a39_RoOBkX0": {"duration": 11.43, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=a39_RoOBkX0", "annotations": [{"segment": [0.08911244929797192, 10.746961385335412], "label": "Washing dishes"}]}, "UYzgXZIRUR8": {"duration": 193.748, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UYzgXZIRUR8", "annotations": [{"segment": [14.508378159126366, 62.26512293291732], "label": "Longboarding"}, {"segment": [93.39768439937598, 169.86892761310452], "label": "Longboarding"}]}, "FDBimzPAudY": {"duration": 86.5, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FDBimzPAudY", "annotations": []}, "8lMncYsbrWw": {"duration": 123.32, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=8lMncYsbrWw", "annotations": []}, "POvVSjY_8HU": {"duration": 82.199, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=POvVSjY_8HU", "annotations": [{"segment": [1.1515093457943926, 80.24135046728972], "label": "Hand car wash"}]}, "v-mNvC8zVhE": {"duration": 141.456, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=v-mNvC8zVhE", "annotations": [{"segment": [0.2, 136.78786271450858], "label": "Hanging wallpaper"}]}, "NSMAftE6fb4": {"duration": 38.546, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NSMAftE6fb4", "annotations": [{"segment": [1.1718556942277691, 31.399723088923558], "label": "Surfing"}]}, "s7xcZopLHO8": {"duration": 69.94, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s7xcZopLHO8", "annotations": []}, "w5J3Gt5WLwU": {"duration": 142.99, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=w5J3Gt5WLwU", "annotations": [{"segment": [5.353702866482006, 139.1962738221529], "label": "Playing water polo"}]}, "Q_Rufuciohg": {"duration": 63.484, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Q_Rufuciohg", "annotations": [{"segment": [10.993217655236823, 47.14208577223089], "label": "Peeling potatoes"}]}, "OBDq689jDDY": {"duration": 61.25, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OBDq689jDDY", "annotations": [{"segment": [18.67266936, 58.440717628705144], "label": "Doing step aerobics"}]}, "Riww8oJ_gdE": {"duration": 53.76, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Riww8oJ_gdE", "annotations": []}, "P7Z4i1gmzjs": {"duration": 12.33, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=P7Z4i1gmzjs", "annotations": []}, "XI6sjZg9Yeg": {"duration": 236.983, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XI6sjZg9Yeg", "annotations": [{"segment": [0, 168.21685486467078], "label": "Fixing the roof"}, {"segment": [192.98724887771021, 236.982426], "label": "Fixing the roof"}]}, "4hbMYlgO8_o": {"duration": 3.576, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4hbMYlgO8_o", "annotations": [{"segment": [0, 3.575872], "label": "Cutting the grass"}]}, "D2JvqkKa-qM": {"duration": 14.12, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=D2JvqkKa-qM", "annotations": [{"segment": [0.022024541341653665, 11.71705599375975], "label": "Springboard diving"}]}, "Y_ITwC4_pEY": {"duration": 124.065, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y_ITwC4_pEY", "annotations": [{"segment": [15.67735009048362, 64.06423308580342], "label": "Playing drums"}, {"segment": [72.19322942901715, 96.77376599063963], "label": "Playing drums"}]}, "Nx4rK_jvvR4": {"duration": 28.19, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Nx4rK_jvvR4", "annotations": [{"segment": [4.301297971918877, 23.701029641185645], "label": "Ballet"}]}, "cNR7JAcbjH8": {"duration": 153.066, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cNR7JAcbjH8", "annotations": []}, "Oe1tNvkhrI8": {"duration": 61.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Oe1tNvkhrI8", "annotations": [{"segment": [1.1588966864274572, 60.938650761310456], "label": "Mowing the lawn"}]}, "eoSwoeLmeyQ": {"duration": 30.14, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eoSwoeLmeyQ", "annotations": []}, "-6dz6tBH77I": {"duration": 8.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-6dz6tBH77I", "annotations": []}, "V9mLJJn8Lvw": {"duration": 134.12, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V9mLJJn8Lvw", "annotations": []}, "ZdUUBA1czgY": {"duration": 18.48, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZdUUBA1czgY", "annotations": [{"segment": [7.900725411329378, 16.118633229682928], "label": "Discus throw"}]}, "6h-WE-0eBlA": {"duration": 194.723, "subset": "training", "resolution": "566x360", "url": "https://www.youtube.com/watch?v=6h-WE-0eBlA", "annotations": [{"segment": [2.725500822355654, 39.06557339882014], "label": "Playing pool"}, {"segment": [60.56678300656163, 81.46232473802871], "label": "Playing pool"}, {"segment": [110.83721673733751, 115.07689187125835], "label": "Playing pool"}]}, "JfF80Uho8U8": {"duration": 143.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JfF80Uho8U8", "annotations": [{"segment": [46.70071359714527, 134.62464585571226], "label": "Making a sandwich"}]}, "H4spfNy_LG4": {"duration": 149.258, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H4spfNy_LG4", "annotations": [{"segment": [10.711173089100434, 139.47810036349455], "label": "Rafting"}]}, "YKLZAgdkFrk": {"duration": 13.561, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YKLZAgdkFrk", "annotations": [{"segment": [0.5906209048361934, 12.909285491419658], "label": "Swinging at the playground"}]}, "eFcpy2RClJQ": {"duration": 670.895, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eFcpy2RClJQ", "annotations": []}, "NG2WCtJ8DMA": {"duration": 22.733, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NG2WCtJ8DMA", "annotations": []}, "kzdRUWpBXd4": {"duration": 129.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kzdRUWpBXd4", "annotations": [{"segment": [15.69698144, 68.94167983999999], "label": "Dodgeball"}, {"segment": [75.51782636, 109.70190443999999], "label": "Dodgeball"}]}, "3uWV7MOoNk0": {"duration": 106.023, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3uWV7MOoNk0", "annotations": []}, "4KMG6lEDSIM": {"duration": 84.24, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4KMG6lEDSIM", "annotations": []}, "fbIEeQknsuo": {"duration": 223.585, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fbIEeQknsuo", "annotations": [{"segment": [0.01, 219.54486709838503], "label": "Camel ride"}]}, "42XFIWVIWpw": {"duration": 172.687, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=42XFIWVIWpw", "annotations": [{"segment": [22.62116068642746, 158.3481248049922], "label": "Baton twirling"}]}, "8uV6u0QcTSs": {"duration": 64.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8uV6u0QcTSs", "annotations": [{"segment": [0, 63.13274506084243], "label": "Putting on makeup"}]}, "jEOtcfk220s": {"duration": 41.22, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jEOtcfk220s", "annotations": []}, "4aBJ_L0u7Lo": {"duration": 141.828, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4aBJ_L0u7Lo", "annotations": [{"segment": [31.408583463338534, 76.53077379095164], "label": "Shuffleboard"}, {"segment": [87.59013416536662, 132.95226365054603], "label": "Shuffleboard"}]}, "iA2Q4t-o58w": {"duration": 219.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iA2Q4t-o58w", "annotations": [{"segment": [44.18867698, 169.84344359999997], "label": "Cheerleading"}, {"segment": [69.40426697999999, 161.84242726], "label": "Tumbling"}]}, "LnqAgjTpi0k": {"duration": 129.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LnqAgjTpi0k", "annotations": [{"segment": [37.643751893915756, 71.23978853042122], "label": "Running a marathon"}, {"segment": [77.7161329422777, 90.26405024024962], "label": "Running a marathon"}, {"segment": [99.5737953322933, 103.01435330109204], "label": "Running a marathon"}]}, "KNyWPCoHEng": {"duration": 164.723, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=KNyWPCoHEng", "annotations": [{"segment": [0.01, 164.681], "label": "Hand car wash"}]}, "py26bxAfOEg": {"duration": 227.185, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=py26bxAfOEg", "annotations": [{"segment": [8.15168928934028, 36.50539095943838], "label": "River tubing"}, {"segment": [44.65708020280811, 77.61825844773792], "label": "River tubing"}, {"segment": [90.02300294851794, 131.8447129797192], "label": "River tubing"}, {"segment": [137.86987459438376, 166.93241885335414], "label": "River tubing"}, {"segment": [192.09632912636505, 195.64054184087362], "label": "River tubing"}, {"segment": [205.2099161700468, 220.80445211388454], "label": "River tubing"}]}, "EkwX8Swnut8": {"duration": 231.294, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EkwX8Swnut8", "annotations": [{"segment": [114.89899921996881, 121.75325663026521], "label": "Skiing"}, {"segment": [125.72151092043683, 138.34777457098286], "label": "Skiing"}, {"segment": [149.89178705148208, 163.9610522620905], "label": "Skiing"}, {"segment": [169.3723081123245, 184.52382449297974], "label": "Skiing"}]}, "OVRYoPA6h-Y": {"duration": 197.463, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OVRYoPA6h-Y", "annotations": [{"segment": [7.197231571107155, 11.503267273030708], "label": "Croquet"}, {"segment": [51.48788450517798, 66.25143548320159], "label": "Croquet"}, {"segment": [73.63321097221339, 79.47711656768107], "label": "Croquet"}, {"segment": [92.70279765216056, 119.76930777853717], "label": "Croquet"}, {"segment": [132.9949879665169, 141.6070593123113], "label": "Croquet"}, {"segment": [154.8327403076384, 162.21451574689073], "label": "Croquet"}]}, "udIYvmhOlOM": {"duration": 107.09, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=udIYvmhOlOM", "annotations": []}, "V9xuy-rVj9w": {"duration": 193.678, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V9xuy-rVj9w", "annotations": [{"segment": [22.082043825278053, 23.290639238482335], "label": "Snowboarding"}, {"segment": [32.05295598421339, 186.14887023932252], "label": "Snowboarding"}]}, "pq2xsK79FcQ": {"duration": 42.87, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=pq2xsK79FcQ", "annotations": [{"segment": [22.757192667706708, 26.7004360374415], "label": "Long jump"}]}, "nrvB8pcrY7o": {"duration": 175.404, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nrvB8pcrY7o", "annotations": [{"segment": [58.27792979719189, 65.9388783151326], "label": "Waxing skis"}, {"segment": [79.34553822152886, 128.320887675507], "label": "Waxing skis"}, {"segment": [141.18033697347894, 154.31339157566302], "label": "Waxing skis"}, {"segment": [163.88957722308893, 170.72970982839314], "label": "Waxing skis"}]}, "_-_SwG4EnRU": {"duration": 23.847, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_-_SwG4EnRU", "annotations": [{"segment": [0.2, 22.933132643594192], "label": "Mooping floor"}]}, "5nvximBXhFI": {"duration": 190.938, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5nvximBXhFI", "annotations": [{"segment": [8.905800933125972, 155.2740637636081], "label": "Longboarding"}]}, "cNvTm72aGcM": {"duration": 195.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cNvTm72aGcM", "annotations": [{"segment": [0.6091525075969025, 192.18761614682276], "label": "Belly dance"}]}, "x9VlxLDK-VM": {"duration": 121.39, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=x9VlxLDK-VM", "annotations": [{"segment": [1.3256746443057723, 120.06824635569424], "label": "Washing dishes"}]}, "wYHU4F2dLAc": {"duration": 97.22, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=wYHU4F2dLAc", "annotations": []}, "TP8lUusp66Y": {"duration": 191.89, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=TP8lUusp66Y", "annotations": [{"segment": [47.59822610920436, 168.24027090171606], "label": "Playing field hockey"}]}, "L4mSCZwXelk": {"duration": 148.655, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L4mSCZwXelk", "annotations": [{"segment": [10.426591263650545, 141.12553666146647], "label": "Futsal"}]}, "5koLOwu786I": {"duration": 60.628, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5koLOwu786I", "annotations": [{"segment": [10.309478641185647, 44.6428799875195], "label": "Blow-drying hair"}, {"segment": [47.574933546021846, 58.92481828861155], "label": "Blow-drying hair"}]}, "z4_OuffZPAo": {"duration": 28.352, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=z4_OuffZPAo", "annotations": [{"segment": [0.01, 22.955478496099843], "label": "Peeling potatoes"}]}, "pFh-8ICMTFY": {"duration": 72.865, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pFh-8ICMTFY", "annotations": [{"segment": [16.141532186457525, 62.40634510296412], "label": "Elliptical trainer"}]}, "tY6UFSLtIoE": {"duration": 231.97, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tY6UFSLtIoE", "annotations": [{"segment": [11.36406333, 99.63678551000001], "label": "Cumbia"}]}, "aMexiSP5Vjc": {"duration": 91.185, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=aMexiSP5Vjc", "annotations": [{"segment": [0.01, 88.07457404839585], "label": "Elliptical trainer"}]}, "saMBbZcDAjM": {"duration": 22.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=saMBbZcDAjM", "annotations": [{"segment": [0.01, 22.082175], "label": "Ping-pong"}]}, "GfqUICvzbzk": {"duration": 196.93, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GfqUICvzbzk", "annotations": []}, "bESksIhdEvo": {"duration": 23.592, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=bESksIhdEvo", "annotations": [{"segment": [0.01, 23.521], "label": "Cleaning sink"}]}, "-doxoUNGLJE": {"duration": 206.565, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-doxoUNGLJE", "annotations": [{"segment": [0.01, 194.96316976599064], "label": "Changing car wheel"}]}, "JZqQjiUpOSw": {"duration": 91.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JZqQjiUpOSw", "annotations": []}, "7j8cTyXi5a4": {"duration": 83.92, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7j8cTyXi5a4", "annotations": [{"segment": [35.134963338533545, 71.25135023400937], "label": "Cleaning shoes"}]}, "-9dlY4JGGR0": {"duration": 145.357, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-9dlY4JGGR0", "annotations": []}, "lSVj34-DR0o": {"duration": 141.22, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=lSVj34-DR0o", "annotations": []}, "xb8iMASjw1A": {"duration": 131.96, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xb8iMASjw1A", "annotations": [{"segment": [6.5876568994732585, 11.528399574078202], "label": "Snowboarding"}, {"segment": [13.998770911380674, 15.233956580031911], "label": "Snowboarding"}, {"segment": [17.292599361117304, 24.29198481680764], "label": "Snowboarding"}, {"segment": [95.72688912636505, 111.99016706396256], "label": "Snowboarding"}]}, "fBov3cwPEho": {"duration": 103.399, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fBov3cwPEho", "annotations": [{"segment": [0.01, 95.24898384357246], "label": "Decorating the Christmas tree"}]}, "agIcVWDyeH0": {"duration": 86.936, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=agIcVWDyeH0", "annotations": [{"segment": [37.112252730109205, 75.44352106084243], "label": "Putting on shoes"}]}, "ozgrP6Znupc": {"duration": 94.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ozgrP6Znupc", "annotations": [{"segment": [20.45304852730109, 94.31945578231293], "label": "Playing lacrosse"}]}, "K-E0UXNscjU": {"duration": 44.397, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=K-E0UXNscjU", "annotations": [{"segment": [0.01, 44.396552], "label": "Throwing darts"}]}, "kMkHZ_nGsPQ": {"duration": 19.6, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kMkHZ_nGsPQ", "annotations": []}, "3bTGq7QDjUg": {"duration": 63.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3bTGq7QDjUg", "annotations": [{"segment": [0.01, 63.144919777094756], "label": "River tubing"}]}, "IY1bVy1yZv4": {"duration": 72.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IY1bVy1yZv4", "annotations": [{"segment": [49.1279657, 55.6442359], "label": "Fixing bicycle"}, {"segment": [49.1279657, 55.6442359], "label": "Fixing bicycle"}]}, "iEaiLh3GZA8": {"duration": 225.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iEaiLh3GZA8", "annotations": [{"segment": [32.661347405616226, 45.65564691107644], "label": "Polishing shoes"}]}, "gWz4P3Jnis8": {"duration": 174.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gWz4P3Jnis8", "annotations": [{"segment": [44.80401318093541, 48.334026340645465], "label": "Smoking hookah"}, {"segment": [81.46184214715528, 86.07801320216075], "label": "Smoking hookah"}, {"segment": [124.36507849609985, 127.08047322308893], "label": "Smoking hookah"}]}, "UYUvjnlU9Ho": {"duration": 202.827, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UYUvjnlU9Ho", "annotations": [{"segment": [29.110769524658537, 37.33772586271451], "label": "Croquet"}, {"segment": [73.09334469734789, 158.84354561934478], "label": "Croquet"}]}, "pSWcVR96xlc": {"duration": 4.06, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pSWcVR96xlc", "annotations": [{"segment": [0.10776809204368175, 3.638757931357254], "label": "Playing water polo"}]}, "ePaIRTk2geU": {"duration": 185.249, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ePaIRTk2geU", "annotations": []}, "ypfWA4aDL6k": {"duration": 111.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ypfWA4aDL6k", "annotations": []}, "oN_5o1t_Lvk": {"duration": 217.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oN_5o1t_Lvk", "annotations": [{"segment": [0.01, 217.481], "label": "Tumbling"}]}, "rV9RzL8o7tk": {"duration": 47.926, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rV9RzL8o7tk", "annotations": [{"segment": [11.30217165440693, 43.25282832107359], "label": "Playing pool"}]}, "7gSltoRSFCU": {"duration": 198.461, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7gSltoRSFCU", "annotations": []}, "3q_MOQNfSmA": {"duration": 210.745, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3q_MOQNfSmA", "annotations": [{"segment": [20.15529715166712, 194.13424218281978], "label": "Croquet"}]}, "uHLEUps_ahs": {"duration": 234.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uHLEUps_ahs", "annotations": [{"segment": [35.86509516380655, 47.21017628705148], "label": "Playing saxophone"}, {"segment": [125.52783307332292, 135.40903276131044], "label": "Playing saxophone"}, {"segment": [147.48605460218408, 211.53086739469578], "label": "Playing saxophone"}]}, "g4G1gg-9y7w": {"duration": 114.82, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g4G1gg-9y7w", "annotations": [{"segment": [26.332189287571502, 94.58092335725429], "label": "Getting a piercing"}]}, "lCIJJgxTs2U": {"duration": 62.58, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lCIJJgxTs2U", "annotations": [{"segment": [3.93984792, 60.02497367706708], "label": "High jump"}]}, "GHBeLaysVaA": {"duration": 136.14, "subset": "validation", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=GHBeLaysVaA", "annotations": [{"segment": [0, 133.16520719344774], "label": "Ballet"}]}, "DFaolyuIgZ4": {"duration": 109.22999999999999, "subset": "testing", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=DFaolyuIgZ4", "annotations": []}, "1FKoT6uFsjY": {"duration": 184.947, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1FKoT6uFsjY", "annotations": [{"segment": [47.02359282371295, 160.97647113884554], "label": "Putting in contact lenses"}]}, "0jdSl5-lMpY": {"duration": 173.779, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0jdSl5-lMpY", "annotations": []}, "L21zcZlFfIY": {"duration": 201.76, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L21zcZlFfIY", "annotations": [{"segment": [0, 201.721], "label": "Dodgeball"}]}, "qpqFF-Hs1vU": {"duration": 127.32, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=qpqFF-Hs1vU", "annotations": []}, "TxYZLJQOHvY": {"duration": 24.498, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TxYZLJQOHvY", "annotations": [{"segment": [0.2, 3.3199797191887677], "label": "Fun sliding down"}, {"segment": [18.50793291731669, 23.621464898595942], "label": "Fun sliding down"}]}, "Pzrh78MZPJg": {"duration": 226.024, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Pzrh78MZPJg", "annotations": []}, "SiBSwSu_REQ": {"duration": 222.006, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SiBSwSu_REQ", "annotations": [{"segment": [20.43424835768941, 219.2352394773791], "label": "Baton twirling"}]}, "qYpb_3oAM0k": {"duration": 95.32, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=qYpb_3oAM0k", "annotations": [{"segment": [25.322752032761308, 92.45403601404057], "label": "Playing accordion"}]}, "mL7Ipu-HJ5M": {"duration": 123.74, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=mL7Ipu-HJ5M", "annotations": [{"segment": [17.7520576349454, 121.17708907332293], "label": "Preparing pasta"}]}, "KMB-MNnnINk": {"duration": 119.142, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KMB-MNnnINk", "annotations": []}, "mQCFphhDFw8": {"duration": 140.27, "subset": "training", "resolution": "1920x1072", "url": "https://www.youtube.com/watch?v=mQCFphhDFw8", "annotations": [{"segment": [16.631283460862246, 124.73462595646683], "label": "Fixing bicycle"}]}, "iNhoeDq_EBQ": {"duration": 144.73, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=iNhoeDq_EBQ", "annotations": [{"segment": [4.514227769110765, 137.92872230889236], "label": "Tumbling"}]}, "V26HGC2a0ME": {"duration": 88.561, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V26HGC2a0ME", "annotations": []}, "t1MXansbY5s": {"duration": 45.001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t1MXansbY5s", "annotations": [{"segment": [13.51402013223386, 14.637263364003948], "label": "Putting on shoes"}, {"segment": [23.974222728092798, 26.08030378766171], "label": "Putting on shoes"}]}, "2tpwfPdSEVo": {"duration": 204.754, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2tpwfPdSEVo", "annotations": [{"segment": [0.01, 189.3908783151326], "label": "Clipping cat claws"}]}, "asWvAGhlbdQ": {"duration": 180.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=asWvAGhlbdQ", "annotations": [{"segment": [37.54082483619345, 132.94532705148208], "label": "Sharpening knives"}]}, "QESBvmN1hcM": {"duration": 114.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QESBvmN1hcM", "annotations": [{"segment": [1.429856552085213, 6.970550691415412], "label": "Playing water polo"}, {"segment": [9.115335519543232, 16.264618279969298], "label": "Playing water polo"}, {"segment": [20.19672379820363, 21.805312419299494], "label": "Playing water polo"}, {"segment": [53.97708484121678, 60.232707256589585], "label": "Playing water polo"}, {"segment": [67.73945342433697, 82.93167912636505], "label": "Playing water polo"}]}, "QxPi0RyYnqY": {"duration": 215.853, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QxPi0RyYnqY", "annotations": []}, "AJ9GUwEPkdI": {"duration": 83.336, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=AJ9GUwEPkdI", "annotations": []}, "dcARAWmvWSo": {"duration": 215.643, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dcARAWmvWSo", "annotations": [{"segment": [0.01, 215.601], "label": "Roof shingle removal"}]}, "MnzVdvbvnMc": {"duration": 52.18, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MnzVdvbvnMc", "annotations": [{"segment": [20.837536587177773, 51.11708184711389], "label": "Polishing shoes"}]}, "PNdG3SUdJzc": {"duration": 52.25, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PNdG3SUdJzc", "annotations": [{"segment": [14.868103034321372, 51.52917900936037], "label": "Starting a campfire"}]}, "rjnulRGsve8": {"duration": 60.86, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rjnulRGsve8", "annotations": []}, "V08Q-DbT0GQ": {"duration": 186.85, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V08Q-DbT0GQ", "annotations": []}, "7uhJ_0oKV6s": {"duration": 61.998, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=7uhJ_0oKV6s", "annotations": [{"segment": [0.01, 52.43009485257234], "label": "Rope skipping"}]}, "bsF9yyz1jH0": {"duration": 185.156, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bsF9yyz1jH0", "annotations": []}, "YCrSiqY-TVw": {"duration": 203.779, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YCrSiqY-TVw", "annotations": [{"segment": [30.51906211544462, 34.65185177691108], "label": "Curling"}, {"segment": [66.12463458346335, 79.79463115600625], "label": "Curling"}, {"segment": [129.70601399063963, 138.9253140046802], "label": "Curling"}]}, "s1WQ4zNikKA": {"duration": 93.3, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=s1WQ4zNikKA", "annotations": [{"segment": [6.911441759999999, 35.10206328], "label": "Shaving legs"}, {"segment": [41.04321624, 86.48537441497659], "label": "Shaving legs"}]}, "sBumUzHPg2s": {"duration": 134.86, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sBumUzHPg2s", "annotations": []}, "eDm8bUVWxAM": {"duration": 50.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eDm8bUVWxAM", "annotations": [{"segment": [9.667683449999998, 46.20512840171607], "label": "Archery"}]}, "wfyHIL89Hmg": {"duration": 35.6, "subset": "testing", "resolution": "400x300", "url": "https://www.youtube.com/watch?v=wfyHIL89Hmg", "annotations": []}, "84OwFujqHyw": {"duration": 60.84, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=84OwFujqHyw", "annotations": [{"segment": [0.01, 10.718429017160688], "label": "Playing polo"}]}, "sOyox_6AG1w": {"duration": 117.981, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sOyox_6AG1w", "annotations": [{"segment": [7.546306995163806, 21.35052710826833], "label": "Running a marathon"}, {"segment": [27.9765527625585, 36.811253634945395], "label": "Running a marathon"}, {"segment": [42.14888541201248, 53.00820523432137], "label": "Running a marathon"}, {"segment": [58.89800581591263, 73.62250726989079], "label": "Running a marathon"}, {"segment": [78.96013904695788, 88.89917752839314], "label": "Running a marathon"}, {"segment": [93.86869676911076, 110.98592970936036], "label": "Running a marathon"}]}, "iuc48VIWLJY": {"duration": 47.903, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iuc48VIWLJY", "annotations": []}, "cduejHfXPDc": {"duration": 166.46, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cduejHfXPDc", "annotations": [{"segment": [37.006309497277854, 108.16230109204369], "label": "Painting"}]}, "6cPXFUqRB1s": {"duration": 56.19, "subset": "validation", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=6cPXFUqRB1s", "annotations": [{"segment": [3.7277685000000003, 54.75916536661467], "label": "Hammer throw"}]}, "J2gJYNO2qh8": {"duration": 139.274, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J2gJYNO2qh8", "annotations": [{"segment": [14.33064898595944, 17.153352574102964], "label": "Making a cake"}, {"segment": [24.752939157566303, 39.08920436817472], "label": "Making a cake"}, {"segment": [51.68461466458658, 62.54272698907956], "label": "Making a cake"}, {"segment": [70.79489235569423, 92.51111700468019], "label": "Making a cake"}, {"segment": [96.20287519500779, 103.1520670826833], "label": "Making a cake"}]}, "AguxVxEWEs0": {"duration": 118.98, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AguxVxEWEs0", "annotations": []}, "ytvIoI5uwFA": {"duration": 98.407, "subset": "training", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=ytvIoI5uwFA", "annotations": [{"segment": [11.05342290426311, 58.79806798439938], "label": "Arm wrestling"}]}, "e0yLhjXQIXI": {"duration": 157.78, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=e0yLhjXQIXI", "annotations": []}, "QVdsLRKpCT0": {"duration": 60.65, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QVdsLRKpCT0", "annotations": [{"segment": [3.0254394009360377, 60.03606311232449], "label": "Using parallel bars"}]}, "bzHSF5GJc8o": {"duration": 72.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bzHSF5GJc8o", "annotations": [{"segment": [1.1345541185647425, 71.02308782215289], "label": "Volleyball"}]}, "9ZGy24Geu5k": {"duration": 114.893, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9ZGy24Geu5k", "annotations": [{"segment": [3.2262975070202806, 113.27889024648985], "label": "Surfing"}]}, "BJ9r8_JnG0k": {"duration": 224.027, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BJ9r8_JnG0k", "annotations": [{"segment": [80.74710253795179, 114.94423303293274], "label": "Using the monkey bar"}, {"segment": [125.41274236813099, 149.8392629642316], "label": "Using the monkey bar"}, {"segment": [179.8489894875961, 192.76015089881105], "label": "Using the monkey bar"}]}, "XPiMnbCr-sc": {"duration": 83.43, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XPiMnbCr-sc", "annotations": []}, "VOnP9N7FAT8": {"duration": 190.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VOnP9N7FAT8", "annotations": [{"segment": [7.710836102964119, 167.85897054914196], "label": "Playing accordion"}]}, "JW5P5FdOrcw": {"duration": 207.889, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JW5P5FdOrcw", "annotations": [{"segment": [159.9638589724571, 181.98317559214755], "label": "Mooping floor"}]}, "HZ0tf9Cp340": {"duration": 155.39, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=HZ0tf9Cp340", "annotations": [{"segment": [37.2773413, 120.01625719], "label": "Hammer throw"}]}, "ZCZoL2b6euQ": {"duration": 22.849, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ZCZoL2b6euQ", "annotations": [{"segment": [0.19999999999999998, 22.648434], "label": "Throwing darts"}]}, "SwbvD590YtQ": {"duration": 60.26, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SwbvD590YtQ", "annotations": [{"segment": [9.974371280589729, 47.61077113648224], "label": "Sailing"}]}, "UXc2y7lR5t0": {"duration": 18.46, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=UXc2y7lR5t0", "annotations": [{"segment": [0.2, 18.259863945578232], "label": "Using the monkey bar"}]}, "WhhbKOSW7m0": {"duration": 69.28999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WhhbKOSW7m0", "annotations": [{"segment": [14.325089703588143, 54.54345475819033], "label": "Playing squash"}]}, "Ht2gV7oaqbo": {"duration": 5.48, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ht2gV7oaqbo", "annotations": [{"segment": [0.01, 4.914725429017161], "label": "Doing a powerbomb"}]}, "MdOAr_4FJvc": {"duration": 233.13, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MdOAr_4FJvc", "annotations": [{"segment": [0.01, 230.21858344305775], "label": "Playing piano"}]}, "oUdEoaKDHpA": {"duration": 213.53, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oUdEoaKDHpA", "annotations": [{"segment": [19.654152065402343, 144.90773000780032], "label": "Breakdancing"}]}, "1ebIpLiTCvw": {"duration": 184.79, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1ebIpLiTCvw", "annotations": [{"segment": [57.65495381720031, 89.65345318574647], "label": "Making a sandwich"}]}, "HktZZPJMU8s": {"duration": 211.63, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=HktZZPJMU8s", "annotations": [{"segment": [15.845316692667707, 200.70734477379096], "label": "Tai chi"}]}, "zyi4NVylqB0": {"duration": 100.195, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zyi4NVylqB0", "annotations": [{"segment": [10.14446177847114, 89.89553822152887], "label": "Beach soccer"}]}, "fdYyIOWkycU": {"duration": 24.567, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fdYyIOWkycU", "annotations": [{"segment": [0.01, 11.13414955922302], "label": "Throwing darts"}]}, "Gfu8e6R6jUU": {"duration": 199.14, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Gfu8e6R6jUU", "annotations": []}, "gZB_XrTnDCI": {"duration": 174.707, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gZB_XrTnDCI", "annotations": []}, "MlxMV7WH9-g": {"duration": 101.936, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MlxMV7WH9-g", "annotations": [{"segment": [22.882970358814354, 85.81113884555381], "label": "Baking cookies"}]}, "CwImmV7q1MY": {"duration": 197.3, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CwImmV7q1MY", "annotations": []}, "IxrncvpPWV8": {"duration": 23.847, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IxrncvpPWV8", "annotations": [{"segment": [0.01, 23.821], "label": "Tug of war"}]}, "KylUHdDXuCg": {"duration": 104.397, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=KylUHdDXuCg", "annotations": [{"segment": [19.860496099843992, 91.81585647425896], "label": "Bullfighting"}]}, "ArzhjEk4j_Y": {"duration": 19.32, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ArzhjEk4j_Y", "annotations": [{"segment": [3.7914638411214954, 15.045491433021807], "label": "Building sandcastles"}]}, "CKjHXMoXye4": {"duration": 55.68, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=CKjHXMoXye4", "annotations": [{"segment": [26.851029641185647, 44.347507020280815], "label": "High jump"}]}, "knXbIp4x5Vo": {"duration": 188.85, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=knXbIp4x5Vo", "annotations": []}, "VdeYnCIbRJ4": {"duration": 7.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VdeYnCIbRJ4", "annotations": [{"segment": [0, 7.916666], "label": "Triple jump"}]}, "0pGXfomqurw": {"duration": 156.666, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0pGXfomqurw", "annotations": [{"segment": [47.41500935683686, 126.35855450078002], "label": "Elliptical trainer"}, {"segment": [142.73384106084242, 156.465055], "label": "Elliptical trainer"}]}, "KlgrI3Ngwv0": {"duration": 54.64, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KlgrI3Ngwv0", "annotations": []}, "el-ogdlS5nc": {"duration": 8.01, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=el-ogdlS5nc", "annotations": [{"segment": [2.924409706708268, 8.010883], "label": "Starting a campfire"}]}, "yw1IZdbEzck": {"duration": 175.52, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yw1IZdbEzck", "annotations": [{"segment": [0.2, 27.108337100830973], "label": "Getting a piercing"}, {"segment": [36.14444946777463, 128.42232424535075], "label": "Getting a piercing"}]}, "E_Z4BceegCQ": {"duration": 89.3, "subset": "training", "resolution": "484x268", "url": "https://www.youtube.com/watch?v=E_Z4BceegCQ", "annotations": [{"segment": [1.1145501653666148, 87.49218798127924], "label": "Breakdancing"}]}, "qenGkKGoq6o": {"duration": 66.131, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=qenGkKGoq6o", "annotations": [{"segment": [10.296105919003114, 34.6978769470405], "label": "Drum corps"}]}, "p3-tF0riSpQ": {"duration": 110.179, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p3-tF0riSpQ", "annotations": [{"segment": [57.40257811701015, 61.17229983669855], "label": "Having an ice cream"}, {"segment": [83.1052248426232, 84.47603271820641], "label": "Having an ice cream"}]}, "1q8BSnedfqA": {"duration": 132.66, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1q8BSnedfqA", "annotations": []}, "0RUMAGGab1k": {"duration": 124.48, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0RUMAGGab1k", "annotations": [{"segment": [22.43004252028081, 94.08965888377534], "label": "Playing harmonica"}]}, "Gd6YooP1_CU": {"duration": 31.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gd6YooP1_CU", "annotations": [{"segment": [0.01, 16.751232449297973], "label": "Drinking coffee"}]}, "Jy93pLzV2uw": {"duration": 70.53999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jy93pLzV2uw", "annotations": []}, "aAlbRFeu32E": {"duration": 165.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aAlbRFeu32E", "annotations": [{"segment": [146.18401244929797, 161.12440491107645], "label": "Volleyball"}]}, "ItukN-TWrJM": {"duration": 150.37, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ItukN-TWrJM", "annotations": [{"segment": [25.45305499839041, 137.58725001950077], "label": "Wrapping presents"}]}, "0W11dYp51EM": {"duration": 152.486, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=0W11dYp51EM", "annotations": []}, "4llkVfMzsN4": {"duration": 200.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4llkVfMzsN4", "annotations": [{"segment": [8.750371294851794, 200.101], "label": "Playing racquetball"}]}, "1T66cuSjizE": {"duration": 12.143, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1T66cuSjizE", "annotations": [{"segment": [1.2521466458658346, 12.161], "label": "Doing a powerbomb"}]}, "079MEwdDNjg": {"duration": 162.795, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=079MEwdDNjg", "annotations": [{"segment": [96.1861739070928, 145.87644805039497], "label": "Rock-paper-scissors"}]}, "tj0sI8M3tro": {"duration": 211.51, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=tj0sI8M3tro", "annotations": [{"segment": [72.90701496567863, 202.55614112636508], "label": "Painting"}]}, "rvWlRHPYrZI": {"duration": 154.9, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rvWlRHPYrZI", "annotations": []}, "QjKmQO_9TaA": {"duration": 164.0, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QjKmQO_9TaA", "annotations": [{"segment": [14.99962292, 153.98521372854916], "label": "Doing step aerobics"}]}, "jnOqi_9KJiE": {"duration": 35.225, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jnOqi_9KJiE", "annotations": [{"segment": [0.01, 12.915712417053866], "label": "Peeling potatoes"}]}, "IPehEfMBLM0": {"duration": 173.99, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=IPehEfMBLM0", "annotations": [{"segment": [0, 173.941], "label": "Chopping wood"}]}, "ZHVzukkU9sk": {"duration": 111.758, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZHVzukkU9sk", "annotations": [{"segment": [8.746436682549593, 59.549590180144506], "label": "Baking cookies"}]}, "8BsIeOSzK_U": {"duration": 54.22, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8BsIeOSzK_U", "annotations": [{"segment": [11.165139482059283, 48.12851791887676], "label": "Using parallel bars"}]}, "1VAugQRO05g": {"duration": 110.086, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1VAugQRO05g", "annotations": [{"segment": [10.61480284528923, 43.657664995851896], "label": "Drum corps"}, {"segment": [56.6693620085087, 60.94952549951422], "label": "Drum corps"}, {"segment": [67.96899362476329, 85.2608541284256], "label": "Drum corps"}]}, "Lshue6AmrIw": {"duration": 38.78, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Lshue6AmrIw", "annotations": []}, "9TjPn_ejmFU": {"duration": 37.454, "subset": "testing", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=9TjPn_ejmFU", "annotations": []}, "I116ZNTNUdE": {"duration": 184.924, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=I116ZNTNUdE", "annotations": []}, "NFLa94V-knM": {"duration": 85.148, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=NFLa94V-knM", "annotations": []}, "hsJJbTqSQR4": {"duration": 13.52, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=hsJJbTqSQR4", "annotations": [{"segment": [0.5530373224445949, 10.614871802195372], "label": "Tennis serve with ball bouncing"}]}, "e4XYZAs7tcs": {"duration": 10.84, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e4XYZAs7tcs", "annotations": [{"segment": [0.084584375975039, 10.742215748829953], "label": "Playing racquetball"}]}, "iesGbWk_nqQ": {"duration": 139.46, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iesGbWk_nqQ", "annotations": []}, "3UbQ0UDmbbw": {"duration": 425.645, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3UbQ0UDmbbw", "annotations": [{"segment": [73.85802523564284, 365.57733096773006], "label": "Removing ice from car"}]}, "FS_xWK5tBtY": {"duration": 99.17, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FS_xWK5tBtY", "annotations": []}, "2YeImloBCA8": {"duration": 167.79, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2YeImloBCA8", "annotations": [{"segment": [3.1411056278985856, 164.64628539625585], "label": "Doing step aerobics"}]}, "XfF30DaqAGU": {"duration": 51.67, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=XfF30DaqAGU", "annotations": [{"segment": [4.829953198127925, 26.001248049921998], "label": "Snatch"}, {"segment": [30.267706708268328, 50.71450858034322], "label": "Snatch"}]}, "xLWlqef2_yM": {"duration": 182.51, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xLWlqef2_yM", "annotations": []}, "MWWDqMI-rxU": {"duration": 137.7, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=MWWDqMI-rxU", "annotations": [{"segment": [0.01, 0.02], "label": "Breakdancing"}]}, "Y_QRRCIujIc": {"duration": 169.46, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y_QRRCIujIc", "annotations": [{"segment": [0.01, 169.4592290249433], "label": "Ping-pong"}]}, "Xu54UPG1cME": {"duration": 190.056, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Xu54UPG1cME", "annotations": [{"segment": [22.244929797191887, 169.65466458658346], "label": "Hanging wallpaper"}]}, "oSoi5owiybU": {"duration": 61.324, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oSoi5owiybU", "annotations": [{"segment": [36.870756211103334, 56.26133412928349], "label": "Running a marathon"}]}, "yKLX0iXyLsQ": {"duration": 235.36, "subset": "validation", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=yKLX0iXyLsQ", "annotations": [{"segment": [0, 235.301], "label": "Discus throw"}]}, "E3UCEbGZmz0": {"duration": 237.19, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=E3UCEbGZmz0", "annotations": [{"segment": [14.79195007800312, 28.10470514820593], "label": "Painting"}, {"segment": [29.214101404056162, 100.58526053042122], "label": "Painting"}, {"segment": [107.2506864274571, 117.97575507020281], "label": "Painting"}, {"segment": [123.52320436817473, 188.61327613104524], "label": "Painting"}]}, "MzYA5oo-mzk": {"duration": 7.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MzYA5oo-mzk", "annotations": [{"segment": [1.2124119, 6.27699383775351], "label": "Hammer throw"}]}, "HIIUd1osd_I": {"duration": 235.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HIIUd1osd_I", "annotations": [{"segment": [20.943276042121685, 40.78427439781592], "label": "Walking the dog"}, {"segment": [55.11388432137286, 143.29609923556944], "label": "Walking the dog"}, {"segment": [155.78857968174728, 198.40998355694228], "label": "Walking the dog"}]}, "PZjWkB_q2lE": {"duration": 225.164, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=PZjWkB_q2lE", "annotations": [{"segment": [18.683948916842827, 25.347677913178163], "label": "Hand car wash"}, {"segment": [0.2, 224.963899], "label": "Hand car wash"}, {"segment": [106.3645948430847, 118.99060787112208], "label": "Hand car wash"}, {"segment": [139.33251774962676, 141.08613067018752], "label": "Hand car wash"}]}, "LTFtillWvs4": {"duration": 75.91, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LTFtillWvs4", "annotations": []}, "z1YFE1yRVo0": {"duration": 94.599, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z1YFE1yRVo0", "annotations": []}, "-pkfcMUIEMo": {"duration": 104.49000000000001, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-pkfcMUIEMo", "annotations": []}, "s0YJOzALDNg": {"duration": 64.482, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s0YJOzALDNg", "annotations": [{"segment": [0.01, 64.48151927437642], "label": "Riding bumper cars"}]}, "81dGQTVec_s": {"duration": 223.21, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=81dGQTVec_s", "annotations": [{"segment": [85.64369110764432, 145.52464586583463], "label": "Cleaning shoes"}]}, "Gr9gtmq4XVY": {"duration": 193.469, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gr9gtmq4XVY", "annotations": []}, "ow9bWn5gOvg": {"duration": 223.934, "subset": "validation", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=ow9bWn5gOvg", "annotations": [{"segment": [27.947986599736097, 223.93324263038548], "label": "Sharpening knives"}]}, "nB0JECwGK0c": {"duration": 231.991, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nB0JECwGK0c", "annotations": [{"segment": [50.51115991474999, 79.01388897319555], "label": "Sumo"}, {"segment": [143.2352278643767, 170.65557480667877], "label": "Sumo"}]}, "ngE2y5ExVqw": {"duration": 60.814, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ngE2y5ExVqw", "annotations": [{"segment": [20.492395806552263, 23.907795107644304], "label": "Hitting a pinata"}, {"segment": [27.038577800312012, 30.738593709828393], "label": "Hitting a pinata"}, {"segment": [35.387331647425896, 36.90528689235569], "label": "Hitting a pinata"}, {"segment": [42.02838584399376, 43.26172448049922], "label": "Hitting a pinata"}]}, "lvlVfgZ53NQ": {"duration": 67.29, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lvlVfgZ53NQ", "annotations": [{"segment": [3.5676661466458657, 66.94620592823712], "label": "Mixing drinks"}]}, "NNiJfr_ChH4": {"duration": 100.08, "subset": "testing", "resolution": "2560x810", "url": "https://www.youtube.com/watch?v=NNiJfr_ChH4", "annotations": []}, "b3e7PrPNb30": {"duration": 144.568, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b3e7PrPNb30", "annotations": [{"segment": [8.79583473951203, 34.281202061687914], "label": "Applying sunscreen"}, {"segment": [39.017420767579004, 115.69905564430576], "label": "Applying sunscreen"}]}, "-deoJLN27hE": {"duration": 227.742, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-deoJLN27hE", "annotations": [{"segment": [11.724591889798042, 217.79317643525738], "label": "Futsal"}]}, "BpiExjaNwvI": {"duration": 138.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BpiExjaNwvI", "annotations": [{"segment": [11.850438089999999, 63.735765529999995], "label": "Doing motocross"}, {"segment": [76.70848292, 129.1072276677067], "label": "Doing motocross"}]}, "p4KNKI2UZLc": {"duration": 187.2, "subset": "training", "resolution": "470x360", "url": "https://www.youtube.com/watch?v=p4KNKI2UZLc", "annotations": [{"segment": [15.170046801872076, 161.61934477379094], "label": "Using the balance beam"}]}, "C1YNGahOoiA": {"duration": 230.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C1YNGahOoiA", "annotations": [{"segment": [9.688872074882996, 219.97328081123243], "label": "Sailing"}]}, "pdXrJJyTogA": {"duration": 74.049, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pdXrJJyTogA", "annotations": []}, "9PGFsuE3Ye0": {"duration": 143.778, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9PGFsuE3Ye0", "annotations": [{"segment": [18.1613120124805, 30.268853354134166], "label": "Playing ten pins"}, {"segment": [46.41224180967239, 47.30909672386896], "label": "Playing ten pins"}, {"segment": [60.98613416536662, 71.97260686427457], "label": "Playing ten pins"}, {"segment": [100.01323868954759, 106.74058658346335], "label": "Playing ten pins"}]}, "6azNzt8rXAs": {"duration": 202.92000000000002, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6azNzt8rXAs", "annotations": [{"segment": [31.340064312776594, 183.60845759000426], "label": "Ping-pong"}, {"segment": [183.92502274726988, 190.2563488315133], "label": "Ping-pong"}]}, "Tbo7I63oIms": {"duration": 191.008, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Tbo7I63oIms", "annotations": [{"segment": [10.131434939030214, 190.8073469387755], "label": "Skiing"}]}, "XO2YGnweswY": {"duration": 172.27, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XO2YGnweswY", "annotations": []}, "ykdRdg1XvFM": {"duration": 94.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ykdRdg1XvFM", "annotations": [{"segment": [8.517559416536661, 94.13371882086167], "label": "Playing accordion"}]}, "x03YF3JbWjs": {"duration": 171.735, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x03YF3JbWjs", "annotations": [{"segment": [0.01, 157.66430118729983], "label": "Braiding hair"}]}, "vvdmMyyAtN0": {"duration": 222.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vvdmMyyAtN0", "annotations": [{"segment": [88.3544085725429, 156.9590081700468], "label": "Getting a haircut"}]}, "BLLeQHgNmYc": {"duration": 92.21000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BLLeQHgNmYc", "annotations": [{"segment": [0.01, 88.75411435382216], "label": "Tai chi"}]}, "T5GVJRbvNb8": {"duration": 162.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T5GVJRbvNb8", "annotations": [{"segment": [6.828809672386896, 154.28051482059283], "label": "High jump"}]}, "vZVnMzH4pkc": {"duration": 70.102, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=vZVnMzH4pkc", "annotations": [{"segment": [2.4059640372009437, 48.666090000000004], "label": "Beer pong"}]}, "UqSjGwxBuqA": {"duration": 204.243, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UqSjGwxBuqA", "annotations": [{"segment": [36.9536911076443, 47.1478127925117], "label": "BMX"}, {"segment": [57.6605007800312, 182.5563697347894], "label": "BMX"}, {"segment": [190.5213073322933, 204.221], "label": "BMX"}]}, "Pu92wJ-7UTQ": {"duration": 62.044, "subset": "training", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=Pu92wJ-7UTQ", "annotations": [{"segment": [0, 14.76312247945258], "label": "Playing congas"}, {"segment": [22.19292995845725, 60.01740439702645], "label": "Playing congas"}]}, "Upd7zpT6tuc": {"duration": 201.48, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Upd7zpT6tuc", "annotations": [{"segment": [7.229375249609984, 193.9358490873635], "label": "Playing kickball"}]}, "pMtu7fOHdII": {"duration": 52.269, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pMtu7fOHdII", "annotations": []}, "5BCWB7Pf2Tk": {"duration": 97.153, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5BCWB7Pf2Tk", "annotations": [{"segment": [8.942212741570886, 55.775156443057725], "label": "Longboarding"}, {"segment": [58.200163244929804, 66.38456120124806], "label": "Longboarding"}, {"segment": [77.14552888455539, 86.8455560920437], "label": "Longboarding"}]}, "DU2hdNIS2WI": {"duration": 23.99, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DU2hdNIS2WI", "annotations": [{"segment": [3.741998751950078, 23.986212], "label": "Clean and jerk"}]}, "rosZj4ZcPBA": {"duration": 74.513, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rosZj4ZcPBA", "annotations": [{"segment": [0.01, 72.4701137677532], "label": "Blowing leaves"}]}, "xlCOq0ryx-Y": {"duration": 180.79, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xlCOq0ryx-Y", "annotations": [{"segment": [15.347715751950076, 151.92830547113883], "label": "Playing guitarra"}]}, "MmOQhq95Z_g": {"duration": 182.86, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MmOQhq95Z_g", "annotations": [{"segment": [15.119235215667128, 181.1455538673947], "label": "Pole vault"}]}, "Z7BPvGPizYE": {"duration": 171.341, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z7BPvGPizYE", "annotations": [{"segment": [29.927413416536663, 81.50827613104524], "label": "Waxing skis"}]}, "34SBoGK8mT8": {"duration": 57.4, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=34SBoGK8mT8", "annotations": []}, "dRqbDamDLT0": {"duration": 219.48, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dRqbDamDLT0", "annotations": [{"segment": [7.528770670826833, 211.49001248049922], "label": "Fixing bicycle"}]}, "zlAwW2Pxc1A": {"duration": 149.049, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zlAwW2Pxc1A", "annotations": [{"segment": [14.603542341670986, 139.31318809940385], "label": "Hanging wallpaper"}]}, "JhihdPxI_Xc": {"duration": 134.003, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JhihdPxI_Xc", "annotations": [{"segment": [16.933215159126362, 133.16614884399374], "label": "Capoeira"}]}, "FeWZkO6kZl0": {"duration": 214.37, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FeWZkO6kZl0", "annotations": [{"segment": [5.1796437776911075, 206.6844952581903], "label": "Playing field hockey"}]}, "pmn8_4zKo7I": {"duration": 74.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pmn8_4zKo7I", "annotations": [{"segment": [28.131323849999998, 72.40748829953198], "label": "Smoking a cigarette"}, {"segment": [1.6973926199999998, 27.678346333853355], "label": "Smoking a cigarette"}]}, "4QqoWbK2ELc": {"duration": 90.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4QqoWbK2ELc", "annotations": [{"segment": [22.37658346333853, 69.2407488299532], "label": "Bungee jumping"}]}, "RfXFS9fy-mI": {"duration": 50.016, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RfXFS9fy-mI", "annotations": [{"segment": [4.447528684867395, 44.319233210608424], "label": "Futsal"}]}, "VyQ4ZbgqMXk": {"duration": 77.555, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=VyQ4ZbgqMXk", "annotations": [{"segment": [13.49039912480853, 17.120102925205444], "label": "Shuffleboard"}, {"segment": [19.90287578705148, 21.95970793525741], "label": "Shuffleboard"}, {"segment": [32.24386867628705, 34.663671203588144], "label": "Shuffleboard"}, {"segment": [66.96803494305773, 70.35575848127925], "label": "Shuffleboard"}]}, "N2zoVF76Pgg": {"duration": 186.062, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N2zoVF76Pgg", "annotations": [{"segment": [20.894714508580343, 160.48301560062401], "label": "Futsal"}]}, "9dqz5ojw-ys": {"duration": 8.87, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9dqz5ojw-ys", "annotations": [{"segment": [0.01, 8.524076343213729], "label": "Playing water polo"}]}, "VTFIAQVjHv8": {"duration": 192.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VTFIAQVjHv8", "annotations": [{"segment": [70.46663806552263, 80.06209516380656], "label": "Skateboarding"}, {"segment": [95.65471294851794, 100.45244149765992], "label": "Skateboarding"}, {"segment": [110.64761466458658, 117.84420748829955], "label": "Skateboarding"}, {"segment": [129.83852886115446, 137.93469578783152], "label": "Skateboarding"}, {"segment": [143.93185647425898, 152.3278814352574], "label": "Skateboarding"}, {"segment": [159.52447425897037, 168.22035725429018], "label": "Skateboarding"}, {"segment": [170.91907956318252, 176.01666614664586], "label": "Skateboarding"}]}, "d0woFZ26Cx0": {"duration": 30.094, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=d0woFZ26Cx0", "annotations": [{"segment": [1.2642476635514017, 7.491838006230529], "label": "Playing pool"}, {"segment": [9.458445482866043, 11.237757009345794], "label": "Playing pool"}, {"segment": [12.455180685358256, 13.157540498442367], "label": "Playing pool"}, {"segment": [15.451915887850465, 17.605819314641742], "label": "Playing pool"}, {"segment": [23.177873831775702, 24.488945482866043], "label": "Playing pool"}]}, "ndJqptBTxAY": {"duration": 108.809, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ndJqptBTxAY", "annotations": [{"segment": [5.974826512274341, 10.209561714766554], "label": "Hand car wash"}]}, "hRsn4nesCQo": {"duration": 234.75, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hRsn4nesCQo", "annotations": []}, "89H7D34qzPs": {"duration": 234.847, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=89H7D34qzPs", "annotations": []}, "E2nAOID5DLM": {"duration": 28.47, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E2nAOID5DLM", "annotations": [{"segment": [4.675202808112324, 17.498616224648988], "label": "Long jump"}]}, "cwQQj3VZLC8": {"duration": 73.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cwQQj3VZLC8", "annotations": [{"segment": [6.366284520000001, 59.723008920000005], "label": "Triple jump"}]}, "_qzzQdKJ7F0": {"duration": 97.11, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=_qzzQdKJ7F0", "annotations": [{"segment": [21.81473585522621, 24.99605150078003], "label": "Doing nails"}, {"segment": [45.750348807488294, 65.29271634446178], "label": "Doing nails"}]}, "4L0mci9CTPg": {"duration": 43.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4L0mci9CTPg", "annotations": [{"segment": [0.01, 43.723174603174606], "label": "Tango"}]}, "_3I4nm2zF5Y": {"duration": 66.08, "subset": "validation", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=_3I4nm2zF5Y", "annotations": [{"segment": [4.840291187207488, 58.984364140000004], "label": "Getting a haircut"}]}, "6UPfqdssD6g": {"duration": 77.74, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6UPfqdssD6g", "annotations": [{"segment": [5.821434533541342, 60.76122294383775], "label": "Washing dishes"}]}, "M6xFXdXOzzc": {"duration": 209.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M6xFXdXOzzc", "annotations": [{"segment": [30.547957439999998, 145.67996352], "label": "Playing badminton"}]}, "zSOK9jmWE1E": {"duration": 186.038, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zSOK9jmWE1E", "annotations": [{"segment": [56.57768330733229, 167.42991731669267], "label": "Scuba diving"}, {"segment": [173.81372698907958, 185.80100000000002], "label": "Scuba diving"}]}, "lTFCUuLtUJs": {"duration": 216.039, "subset": "training", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=lTFCUuLtUJs", "annotations": [{"segment": [175.21079563182525, 196.7934227769111], "label": "Running a marathon"}]}, "Sma-ydx49eQ": {"duration": 78.623, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Sma-ydx49eQ", "annotations": [{"segment": [5.7632558502340085, 75.4128159126365], "label": "Rope skipping"}]}, "f2JqhNe3F5E": {"duration": 38.662, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=f2JqhNe3F5E", "annotations": []}, "UBqFQKI1YW8": {"duration": 78.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UBqFQKI1YW8", "annotations": [{"segment": [4.380655226209049, 71.15598127925117], "label": "Using parallel bars"}]}, "n4iI127TShc": {"duration": 14.05, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n4iI127TShc", "annotations": []}, "S2jbh7QMrKc": {"duration": 227.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S2jbh7QMrKc", "annotations": [{"segment": [30.859862836193454, 213.53606238377537], "label": "Surfing"}]}, "WMVw1i62o4E": {"duration": 15.976, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WMVw1i62o4E", "annotations": []}, "6NQl2Vcf0P0": {"duration": 15.326, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6NQl2Vcf0P0", "annotations": [{"segment": [0.2, 7.1313084112149525], "label": "Calf roping"}]}, "AhWkUdag6NA": {"duration": 136.32999999999998, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=AhWkUdag6NA", "annotations": [{"segment": [3.0824375975039, 136.265], "label": "Windsurfing"}]}, "VgQmPHpRFXQ": {"duration": 64.459, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=VgQmPHpRFXQ", "annotations": [{"segment": [19.508529260898328, 32.68181392355694], "label": "Putting on shoes"}]}, "dla6qDuAXTw": {"duration": 49.97, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dla6qDuAXTw", "annotations": [{"segment": [0.20000000000000004, 49.736], "label": "Swinging at the playground"}]}, "8nQGd6hiduA": {"duration": 167.77, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8nQGd6hiduA", "annotations": [{"segment": [118.2986209822153, 165.6704141063602], "label": "Tennis serve with ball bouncing"}]}, "b_v5uGSheEw": {"duration": 34.505, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=b_v5uGSheEw", "annotations": [{"segment": [5.6449375975039, 21.020672386895473], "label": "River tubing"}, {"segment": [24.461396255850232, 33.49329641185647], "label": "River tubing"}]}, "Qnm0p06p9ok": {"duration": 172.07999999999998, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Qnm0p06p9ok", "annotations": []}, "wdb4-V33wIo": {"duration": 112.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wdb4-V33wIo", "annotations": [{"segment": [11.086525741029641, 50.659217199200775], "label": "Shoveling snow"}]}, "Ck5i8CVEkJI": {"duration": 131.658, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Ck5i8CVEkJI", "annotations": [{"segment": [12.939781408736348, 113.99331241029641], "label": "Throwing darts"}]}, "CneLYCPg7Es": {"duration": 40.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CneLYCPg7Es", "annotations": [{"segment": [3.691030887675507, 35.47143242901716], "label": "Brushing teeth"}]}, "28LdZLfdmMQ": {"duration": 204.011, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=28LdZLfdmMQ", "annotations": []}, "HPJuOvOmEW8": {"duration": 211.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HPJuOvOmEW8", "annotations": [{"segment": [0.01, 191.59955537597503], "label": "Skateboarding"}]}, "NCD5yDOzc5k": {"duration": 149.54, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NCD5yDOzc5k", "annotations": []}, "6k5m4u5AchQ": {"duration": 157.664, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6k5m4u5AchQ", "annotations": [{"segment": [17.674878504672897, 84.44664174454827], "label": "Ice fishing"}]}, "yWfTd8pRLgM": {"duration": 110.47999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yWfTd8pRLgM", "annotations": [{"segment": [0.34456578159126366, 109.22735276443058], "label": "Tumbling"}]}, "tnk1skdLN0Q": {"duration": 113.24000000000001, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tnk1skdLN0Q", "annotations": [{"segment": [5.733385335413416, 60.18299234999999], "label": "Shot put"}, {"segment": [68.63771473, 93.05725429017161], "label": "Shot put"}]}, "vBdm15wRqig": {"duration": 147.029, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vBdm15wRqig", "annotations": [{"segment": [24.887082188049426, 26.72207442311298], "label": "Surfing"}, {"segment": [44.38387468559967, 54.01758391968332], "label": "Surfing"}, {"segment": [82.68933759255131, 126.95852526345953], "label": "Surfing"}]}, "AIZWoAbhUTY": {"duration": 155.226, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AIZWoAbhUTY", "annotations": [{"segment": [0.01, 155.161], "label": "Building sandcastles"}]}, "Oya9LWABkf4": {"duration": 100.404, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Oya9LWABkf4", "annotations": [{"segment": [6.916751736792449, 71.77674437347602], "label": "Hula hoop"}]}, "4Mo-IYfNKBo": {"duration": 37.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4Mo-IYfNKBo", "annotations": [{"segment": [0.2, 37.369410430839], "label": "Mooping floor"}]}, "HDHS_7pOiDk": {"duration": 23.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HDHS_7pOiDk", "annotations": [{"segment": [8.762558502340093, 21.275195007800313], "label": "Hopscotch"}]}, "EhjiQFHfDmY": {"duration": 166.0, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EhjiQFHfDmY", "annotations": [{"segment": [18.170147350071257, 151.59026906152297], "label": "Hula hoop"}]}, "R-VaPP7ToKQ": {"duration": 6.6, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=R-VaPP7ToKQ", "annotations": [{"segment": [0.76074978, 5.0208034800000005], "label": "Drinking beer"}]}, "V-46E7jMm-Y": {"duration": 118.53999999999999, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V-46E7jMm-Y", "annotations": []}, "EbGq9gXcXLQ": {"duration": 127.594, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EbGq9gXcXLQ", "annotations": [{"segment": [6.568783801872075, 127.39365079365079], "label": "Playing congas"}]}, "ND41BOzqVoA": {"duration": 191.89, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ND41BOzqVoA", "annotations": []}, "L23hItTmPNY": {"duration": 150.56, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=L23hItTmPNY", "annotations": []}, "-zZJmRT9udU": {"duration": 161.751, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=-zZJmRT9udU", "annotations": [{"segment": [26.80235926287642, 147.46335458997922], "label": "Playing congas"}]}, "hLnVjkmioH8": {"duration": 98.314, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=hLnVjkmioH8", "annotations": [{"segment": [30.73245860708202, 91.27998899715406], "label": "Arm wrestling"}]}, "zCijq4G1B3Q": {"duration": 172.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zCijq4G1B3Q", "annotations": [{"segment": [53.08650339157566, 54.97282584711388], "label": "Painting furniture"}, {"segment": [66.02128594383775, 80.03396704212167], "label": "Painting furniture"}, {"segment": [85.15398513572542, 126.6530791575663], "label": "Painting furniture"}, {"segment": [141.74365880187207, 166.80479999687986], "label": "Painting furniture"}]}, "pW6U4hxCago": {"duration": 44.95, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=pW6U4hxCago", "annotations": []}, "o00AfbAIOlA": {"duration": 101.797, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o00AfbAIOlA", "annotations": [{"segment": [29.679904836193447, 80.4706287051482], "label": "Assembling bicycle"}]}, "3ekMufBfUdw": {"duration": 127.199, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3ekMufBfUdw", "annotations": [{"segment": [25.00321818587029, 25.796971144151886], "label": "Blow-drying hair"}, {"segment": [34.131377206108645, 98.62380462870514], "label": "Blow-drying hair"}]}, "tnB7LNIcXC0": {"duration": 115.682, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=tnB7LNIcXC0", "annotations": [{"segment": [5.77506716050955, 96.55190408976904], "label": "Fixing the roof"}]}, "Pw0ANcAM-zc": {"duration": 156.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Pw0ANcAM-zc", "annotations": [{"segment": [24.819463920000004, 141.53956876131045], "label": "Snatch"}]}, "IkbEC202hYg": {"duration": 153.79, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IkbEC202hYg", "annotations": [{"segment": [6.115955524960999, 135.390466425117], "label": "Cheerleading"}]}, "G-EDozcMT_4": {"duration": 206.17000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G-EDozcMT_4", "annotations": [{"segment": [12.21949207764229, 13.184188820614049], "label": "Skateboarding"}, {"segment": [20.58019718339754, 154.9946100374627], "label": "Skateboarding"}, {"segment": [157.24556802496102, 166.5709698096724], "label": "Skateboarding"}, {"segment": [175.25324043681746, 184.90020780031202], "label": "Skateboarding"}]}, "RAF_yKyoam8": {"duration": 155.51, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RAF_yKyoam8", "annotations": []}, "KFS_lGlO-Ew": {"duration": 58.654, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=KFS_lGlO-Ew", "annotations": [{"segment": [38.84314336661466, 43.601314049922], "label": "Gargling mouthwash"}]}, "TVmuh_sR1KI": {"duration": 134.7, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TVmuh_sR1KI", "annotations": [{"segment": [0, 129.82017096411857], "label": "Playing bagpipes"}]}, "U89hsv1dzuQ": {"duration": 27.17, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=U89hsv1dzuQ", "annotations": [{"segment": [7.372852130000002, 22.7251955], "label": "Smoking a cigarette"}]}, "Jtmz74DAzqc": {"duration": 102.66, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Jtmz74DAzqc", "annotations": []}, "bWQ3vk499Uw": {"duration": 160.07999999999998, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bWQ3vk499Uw", "annotations": []}, "8lH1h46S7Jg": {"duration": 194.44, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8lH1h46S7Jg", "annotations": []}, "L2ZlwZl_5h8": {"duration": 196.84, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L2ZlwZl_5h8", "annotations": []}, "rklYNAAaI3Y": {"duration": 153.23, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rklYNAAaI3Y", "annotations": []}, "UW1wC80VNd8": {"duration": 123.35, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=UW1wC80VNd8", "annotations": [{"segment": [15.393996766673387, 113.91557506396256], "label": "Preparing pasta"}]}, "TQ5Z-aKYLug": {"duration": 35.67, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TQ5Z-aKYLug", "annotations": []}, "9UGNhbRWM14": {"duration": 116.63, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=9UGNhbRWM14", "annotations": [{"segment": [0, 116.586666], "label": "Doing kickboxing"}]}, "DAPX3S1Nmqg": {"duration": 81.6, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=DAPX3S1Nmqg", "annotations": [{"segment": [0, 81.594806], "label": "Walking the dog"}]}, "q8TBL7DBgXY": {"duration": 144.568, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q8TBL7DBgXY", "annotations": [{"segment": [0.01, 120.4132511700468], "label": "Rollerblading"}]}, "e51NGAPMp1s": {"duration": 105.907, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e51NGAPMp1s", "annotations": [{"segment": [10.078401538221529, 37.504871297971924], "label": "Swimming"}, {"segment": [44.939757678627146, 71.20968955694228], "label": "Swimming"}, {"segment": [75.8358410826833, 100.6187956848674], "label": "Swimming"}]}, "kI0AyrGe-NU": {"duration": 37.43, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=kI0AyrGe-NU", "annotations": [{"segment": [0.01, 37.430566893424036], "label": "Washing face"}]}, "i-G50GikqTg": {"duration": 129.754, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=i-G50GikqTg", "annotations": [{"segment": [43.63771962616822, 75.3674968847352], "label": "Playing drums"}]}, "ZTwmb1d44bc": {"duration": 50.69, "subset": "training", "resolution": "640x320", "url": "https://www.youtube.com/watch?v=ZTwmb1d44bc", "annotations": [{"segment": [22.796338006230528, 37.54690965732087], "label": "Curling"}]}, "uDs0wOnwaAM": {"duration": 213.09, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uDs0wOnwaAM", "annotations": [{"segment": [1.329731806552262, 213.089522], "label": "Ping-pong"}]}, "JTQsElq5UN4": {"duration": 50.95, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=JTQsElq5UN4", "annotations": [{"segment": [2.3048249469578783, 36.797722429017156], "label": "Ping-pong"}]}, "G3H3Gflf1SM": {"duration": 156.39, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=G3H3Gflf1SM", "annotations": [{"segment": [6.343286814465776, 148.0913486926677], "label": "Shaving"}]}, "4cktlXntujs": {"duration": 161.98, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4cktlXntujs", "annotations": []}, "mTmcz5NWNQA": {"duration": 184.599, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mTmcz5NWNQA", "annotations": [{"segment": [0.20000000000000004, 51.82118564742589], "label": "BMX"}, {"segment": [101.91499843993759, 146.53879719188768], "label": "BMX"}]}, "k5Yl5DV5wys": {"duration": 132.89, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k5Yl5DV5wys", "annotations": []}, "aEopPs2eY-Q": {"duration": 217.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aEopPs2eY-Q", "annotations": [{"segment": [71.40842433697348, 151.9979318029864], "label": "Doing motocross"}, {"segment": [174.4405784227769, 208.1045496973479], "label": "Doing motocross"}]}, "jYyN-nJcm0M": {"duration": 220.753, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jYyN-nJcm0M", "annotations": [{"segment": [17.047159731287213, 198.8835301983508], "label": "Playing congas"}]}, "8iQ_ZjJGQkE": {"duration": 60.047, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8iQ_ZjJGQkE", "annotations": [{"segment": [7.706941579476865, 50.73112921187562], "label": "Rollerblading"}]}, "5ya20wcGE-8": {"duration": 95.388, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5ya20wcGE-8", "annotations": [{"segment": [26.711503196182267, 32.81273790951638], "label": "Doing fencing"}, {"segment": [63.91415455729072, 70.16419993891171], "label": "Doing fencing"}]}, "KVmuW9kwZV4": {"duration": 12.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KVmuW9kwZV4", "annotations": [{"segment": [0.43601791731669265, 11.656212322932918], "label": "Washing dishes"}]}, "mS7SAG1nW1o": {"duration": 61.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mS7SAG1nW1o", "annotations": [{"segment": [45.36657566302652, 56.92311388455538], "label": "Grooming horse"}]}, "cyfPRx3neNg": {"duration": 29.23, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=cyfPRx3neNg", "annotations": []}, "Pzt4s_Jtu4o": {"duration": 101.332, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Pzt4s_Jtu4o", "annotations": []}, "6TSzQRNOlTY": {"duration": 109.55, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=6TSzQRNOlTY", "annotations": []}, "mHVmDOxtVt0": {"duration": 126.6, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mHVmDOxtVt0", "annotations": [{"segment": [5.922355694227769, 117.08373322932918], "label": "Doing kickboxing"}]}, "RPkLocpR8VQ": {"duration": 77.671, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RPkLocpR8VQ", "annotations": [{"segment": [3.4201193135817105, 71.7675180674758], "label": "Playing rubik cube"}]}, "WAvz9zHwWo4": {"duration": 32.63, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WAvz9zHwWo4", "annotations": []}, "__dXUJsj3yo": {"duration": 29.98, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=__dXUJsj3yo", "annotations": [{"segment": [0.14012948517940718, 9.528804992199687], "label": "Shoveling snow"}]}, "vLL-voBPWM4": {"duration": 82.18, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vLL-voBPWM4", "annotations": [{"segment": [7.884224971918877, 73.26560278783151], "label": "Kayaking"}]}, "d7-tFV0RSCk": {"duration": 116.03, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=d7-tFV0RSCk", "annotations": [{"segment": [28.58296411856474, 84.66346333853353], "label": "Cleaning windows"}]}, "NpfdyZntPbg": {"duration": 222.61, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=NpfdyZntPbg", "annotations": [{"segment": [106.41957332293292, 202.96192589703585], "label": "Waxing skis"}]}, "0RUs80IYF38": {"duration": 29.629, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=0RUs80IYF38", "annotations": [{"segment": [0.2, 2.1724603229329174], "label": "Calf roping"}, {"segment": [13.173429617784713, 29.25888051950078], "label": "Calf roping"}]}, "15vwVfNA99o": {"duration": 175.752, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=15vwVfNA99o", "annotations": [{"segment": [0.01, 168.44034839272425], "label": "Having an ice cream"}]}, "EpV0Zmg50nQ": {"duration": 69.03, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EpV0Zmg50nQ", "annotations": [{"segment": [32.041276131045244, 68.38339469578783], "label": "Springboard diving"}]}, "rQc7UhpiDU8": {"duration": 126.74, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rQc7UhpiDU8", "annotations": [{"segment": [10.176753510140406, 43.17709984399376], "label": "Archery"}]}, "-nskwoky7vk": {"duration": 160.43, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-nskwoky7vk", "annotations": [{"segment": [119.5223712948518, 147.02751950078002], "label": "Playing accordion"}]}, "rBaR5xF8orE": {"duration": 128.175, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rBaR5xF8orE", "annotations": [{"segment": [0, 52.17597659906396], "label": "Kite flying"}]}, "Rewtuc-f6wU": {"duration": 46.84, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rewtuc-f6wU", "annotations": [{"segment": [1.5150799, 37.66290117940717], "label": "Pole vault"}]}, "NLpWaffN3XM": {"duration": 216.67000000000002, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=NLpWaffN3XM", "annotations": [{"segment": [9.464323106257583, 213.28528130686428], "label": "Playing guitarra"}]}, "3wU13sUaXao": {"duration": 113.34, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3wU13sUaXao", "annotations": []}, "jBFn08ZRKSE": {"duration": 24.707, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jBFn08ZRKSE", "annotations": [{"segment": [0.2, 24.35914346333853], "label": "Playing congas"}]}, "JcAAwYpJhm8": {"duration": 100.06, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=JcAAwYpJhm8", "annotations": []}, "f_mbX87xabo": {"duration": 19.11, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=f_mbX87xabo", "annotations": [{"segment": [0.01, 19.081], "label": "Playing flauta"}]}, "vo2M4sY0E38": {"duration": 132.68, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=vo2M4sY0E38", "annotations": []}, "daDd48y4x0g": {"duration": 19.39, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=daDd48y4x0g", "annotations": [{"segment": [0, 19.388662131519276], "label": "Preparing salad"}]}, "Vi2gzCrS-fE": {"duration": 75.651, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Vi2gzCrS-fE", "annotations": [{"segment": [17.015414215939437, 74.6368647953787], "label": "Sharpening knives"}]}, "ADUmfTuiDH8": {"duration": 117.447, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ADUmfTuiDH8", "annotations": [{"segment": [13.558512245251716, 105.53652617784711], "label": "Making a lemonade"}]}, "RlC3n0VSGOU": {"duration": 217.897, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RlC3n0VSGOU", "annotations": [{"segment": [12.917367916485368, 57.44829415489544], "label": "Surfing"}, {"segment": [65.26670105171554, 197.83968583151326], "label": "Surfing"}]}, "nIIFlJnOXXs": {"duration": 122.81, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=nIIFlJnOXXs", "annotations": [{"segment": [53.02036387051482, 65.6091266], "label": "Ironing clothes"}, {"segment": [72.8511165, 118.48232937129485], "label": "Ironing clothes"}]}, "cqVvHj1oC-8": {"duration": 224.77, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=cqVvHj1oC-8", "annotations": [{"segment": [23.839357254290174, 25.942829953198128], "label": "Removing ice from car"}]}, "701qhmCLPxU": {"duration": 36.015, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=701qhmCLPxU", "annotations": [{"segment": [6.405012280811232, 32.867826177847114], "label": "Slacklining"}]}, "G1tvQndOiB8": {"duration": 113.871, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=G1tvQndOiB8", "annotations": []}, "27Xuhwv1mQ8": {"duration": 30.929, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=27Xuhwv1mQ8", "annotations": []}, "Cu8lmHiXkUU": {"duration": 178.77, "subset": "training", "resolution": "1080x720", "url": "https://www.youtube.com/watch?v=Cu8lmHiXkUU", "annotations": [{"segment": [12.825699404056163, 169.522287775351], "label": "Ping-pong"}]}, "d44RiNHK6KA": {"duration": 95.44, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=d44RiNHK6KA", "annotations": [{"segment": [0, 92.60525829468553], "label": "Spinning"}]}, "brY1jVHquE0": {"duration": 67.04, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=brY1jVHquE0", "annotations": [{"segment": [1.4641250015600626, 20.706910736349457], "label": "Playing field hockey"}, {"segment": [44.23749111856475, 54.590946486739476], "label": "Playing field hockey"}]}, "K-eZfCs8yOc": {"duration": 182.46, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K-eZfCs8yOc", "annotations": [{"segment": [6.686745945397815, 131.6008510530421], "label": "Playing violin"}]}, "NZskwwovSZI": {"duration": 63.65, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NZskwwovSZI", "annotations": []}, "PuylbPlzbH0": {"duration": 45.09, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PuylbPlzbH0", "annotations": [{"segment": [0.20000000000000004, 39.324603510140406], "label": "Playing piano"}]}, "W_scepAxfPA": {"duration": 12.4, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=W_scepAxfPA", "annotations": [{"segment": [4.139599307332293, 7.234626826833074], "label": "Plataform diving"}]}, "A8RQcVFtovg": {"duration": 69.8, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A8RQcVFtovg", "annotations": []}, "P0j0bBKsNAo": {"duration": 204.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P0j0bBKsNAo", "annotations": [{"segment": [26.145596485083892, 123.07561272246808], "label": "Rock climbing"}, {"segment": [127.53949504918972, 146.35157056894522], "label": "Rock climbing"}, {"segment": [153.68509153427362, 171.54062084116018], "label": "Rock climbing"}]}, "A0LLegTPpWk": {"duration": 119.72, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=A0LLegTPpWk", "annotations": [{"segment": [11.21308341, 114.26485245709829], "label": "Grooming horse"}]}, "UsqhZb0co6o": {"duration": 38.569, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UsqhZb0co6o", "annotations": [{"segment": [3.4855195007800313, 29.822677067082683], "label": "Putting on shoes"}]}, "IDVWoE02zjM": {"duration": 25.89, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IDVWoE02zjM", "annotations": [{"segment": [13.53078483619345, 23.668775862714508], "label": "Polishing shoes"}]}, "P-YyUw0iuBQ": {"duration": 126.642, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=P-YyUw0iuBQ", "annotations": [{"segment": [38.32822644606465, 39.90877186652092], "label": "Rock-paper-scissors"}, {"segment": [58.87531691199621, 62.233975930465796], "label": "Rock-paper-scissors"}, {"segment": [78.82970284525666, 81.59565733105515], "label": "Rock-paper-scissors"}, {"segment": [111.23088237129485, 112.81142776911076], "label": "Rock-paper-scissors"}]}, "mnNgZ-D5cKc": {"duration": 168.461, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mnNgZ-D5cKc", "annotations": []}, "KfX3APC8lp4": {"duration": 15.813, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KfX3APC8lp4", "annotations": [{"segment": [0.01, 15.600965643813707], "label": "Kneeling"}]}, "InVpvGiubi0": {"duration": 201.53, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=InVpvGiubi0", "annotations": [{"segment": [115.38227302153312, 190.52222606864274], "label": "Starting a campfire"}]}, "Of2Umm79Kxg": {"duration": 170.88, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Of2Umm79Kxg", "annotations": []}, "ZREM_AcXHX8": {"duration": 227.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZREM_AcXHX8", "annotations": [{"segment": [28.934534321372855, 32.129758970358814], "label": "Archery"}, {"segment": [38.16518330733229, 53.43125663026521], "label": "Archery"}, {"segment": [71.89255460218408, 73.31265444617785], "label": "Archery"}, {"segment": [104.55485101404057, 109.88022542901716], "label": "Archery"}]}, "9FQ-VLkCZo8": {"duration": 166.859, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9FQ-VLkCZo8", "annotations": []}, "k3vao2RnhiA": {"duration": 123.646, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k3vao2RnhiA", "annotations": []}, "bjYah4IvnII": {"duration": 62.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bjYah4IvnII", "annotations": [{"segment": [18.617621591263653, 32.53184404368175], "label": "River tubing"}, {"segment": [54.383052542901716, 59.478401609984395], "label": "River tubing"}]}, "NoVa1C5KBno": {"duration": 151.84, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=NoVa1C5KBno", "annotations": [{"segment": [29.60906463469423, 125.06868782527302], "label": "Tennis serve with ball bouncing"}]}, "bzndhMU3izM": {"duration": 192.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bzndhMU3izM", "annotations": [{"segment": [9.028549141965678, 100.81879875195008], "label": "Shoveling snow"}]}, "_bz66SOrklQ": {"duration": 227.626, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_bz66SOrklQ", "annotations": [{"segment": [20.70609477403195, 214.64845000456154], "label": "Drum corps"}]}, "jwlYaPt2DIs": {"duration": 79.157, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jwlYaPt2DIs", "annotations": [{"segment": [0.2, 79.156847], "label": "Making a lemonade"}]}, "9v3xU3UkO0M": {"duration": 155.319, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9v3xU3UkO0M", "annotations": []}, "r1x3hjFWGYM": {"duration": 174.336, "subset": "training", "resolution": "582x360", "url": "https://www.youtube.com/watch?v=r1x3hjFWGYM", "annotations": [{"segment": [0.01, 161.5095989578783], "label": "Putting in contact lenses"}]}, "JMpwIWxoB5s": {"duration": 166.673, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JMpwIWxoB5s", "annotations": [{"segment": [48.62361805616225, 127.66949981591263], "label": "Making a lemonade"}]}, "YIq3Tar9x6s": {"duration": 50.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YIq3Tar9x6s", "annotations": []}, "_K3Cpdks6NQ": {"duration": 60.744, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_K3Cpdks6NQ", "annotations": [{"segment": [0.2, 55.247117316692666], "label": "Powerbocking"}]}, "gvFZb_DoqrY": {"duration": 232.46, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gvFZb_DoqrY", "annotations": []}, "Wyr2o0lsSTU": {"duration": 48.95, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Wyr2o0lsSTU", "annotations": [{"segment": [0.07636140873634946, 34.82080238377535], "label": "Walking the dog"}]}, "T7kOKW76EsA": {"duration": 103.7, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=T7kOKW76EsA", "annotations": [{"segment": [3.1546898447366467, 103.70031746031746], "label": "Vacuuming floor"}]}, "8d7FyPzxzjg": {"duration": 164.4, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8d7FyPzxzjg", "annotations": [{"segment": [4.18225968, 155.76072542901716], "label": "Doing motocross"}]}, "-IdOL3LhhCM": {"duration": 26.541, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-IdOL3LhhCM", "annotations": []}, "M1dEZrQWeRY": {"duration": 90.814, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M1dEZrQWeRY", "annotations": [{"segment": [10.058871943837755, 22.526206184087364], "label": "Snowboarding"}, {"segment": [39.10209375351014, 78.20418750702028], "label": "Snowboarding"}]}, "N2hi_TNBk94": {"duration": 222.447, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N2hi_TNBk94", "annotations": [{"segment": [22.209997559086037, 52.054681779107895], "label": "River tubing"}, {"segment": [63.506711770511636, 146.44717140522357], "label": "River tubing"}, {"segment": [155.4699823525741, 192.94935309828392], "label": "River tubing"}, {"segment": [198.84888367862715, 207.87169515444617], "label": "River tubing"}]}, "slHv7r8A4OI": {"duration": 198.09, "subset": "validation", "resolution": "1920x1072", "url": "https://www.youtube.com/watch?v=slHv7r8A4OI", "annotations": [{"segment": [0.2, 198.041], "label": "Skiing"}]}, "WXH-Tj0-g4w": {"duration": 55.171, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=WXH-Tj0-g4w", "annotations": []}, "mlYYpBaT68I": {"duration": 132.214, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mlYYpBaT68I", "annotations": []}, "pdHvH20ZH1s": {"duration": 47.207, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pdHvH20ZH1s", "annotations": []}, "LBjl6fb279E": {"duration": 159.68, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=LBjl6fb279E", "annotations": []}, "Cq7Myyfbtn0": {"duration": 117.401, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Cq7Myyfbtn0", "annotations": [{"segment": [7.687179407176287, 105.99132449297971], "label": "Powerbocking"}]}, "VLjfzOpn-AQ": {"duration": 167.14, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VLjfzOpn-AQ", "annotations": [{"segment": [20.59881925897036, 35.20051392355694], "label": "Kayaking"}, {"segment": [41.45838306552262, 151.2318375975039], "label": "Kayaking"}]}, "crbkEVcbF2M": {"duration": 100.15, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=crbkEVcbF2M", "annotations": [{"segment": [0, 81.86798036193447], "label": "Windsurfing"}]}, "K1OsjA-f17E": {"duration": 31.092, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=K1OsjA-f17E", "annotations": []}, "1OLKg-G9TDQ": {"duration": 126.34, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=1OLKg-G9TDQ", "annotations": []}, "HUvBb3Hmir8": {"duration": 236.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HUvBb3Hmir8", "annotations": [{"segment": [40.69973622, 212.32587212480502], "label": "Washing dishes"}]}, "J-uW8raljqE": {"duration": 119.374, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=J-uW8raljqE", "annotations": [{"segment": [26.07207488299532, 94.97684330733229], "label": "Disc dog"}]}, "rIqITS6qMB0": {"duration": 27.93, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rIqITS6qMB0", "annotations": [{"segment": [5.207590761310452, 7.996593344773792], "label": "Starting a campfire"}]}, "_HQTWGmXXsc": {"duration": 104.25999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_HQTWGmXXsc", "annotations": [{"segment": [0.01, 104.25734693877551], "label": "Preparing salad"}]}, "CAa74sedG9A": {"duration": 80.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CAa74sedG9A", "annotations": [{"segment": [0.01, 80.55004535147393], "label": "Playing kickball"}]}, "1ZuUq5rVwPs": {"duration": 126.48, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1ZuUq5rVwPs", "annotations": [{"segment": [3.8208031042580295, 91.85682646874402], "label": "Plastering"}]}, "sVma83g_wmg": {"duration": 222.123, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=sVma83g_wmg", "annotations": [{"segment": [35.69197897983947, 184.35080405120965], "label": "Braiding hair"}]}, "_nBOWe_VhXg": {"duration": 185.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_nBOWe_VhXg", "annotations": [{"segment": [23.67105039937597, 180.1309201123245], "label": "Painting"}]}, "lzcVvTHZlQo": {"duration": 61.231, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lzcVvTHZlQo", "annotations": [{"segment": [0.01, 4.970853899125593], "label": "Arm wrestling"}, {"segment": [8.018120835361985, 9.25607302820802], "label": "Arm wrestling"}, {"segment": [11.065387771598377, 11.731977413900088], "label": "Arm wrestling"}, {"segment": [18.778782203946744, 25.920814085750788], "label": "Arm wrestling"}]}, "Cy3tUZIN8nk": {"duration": 100.87, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Cy3tUZIN8nk", "annotations": [{"segment": [0, 63.07210764430577], "label": "Playing harmonica"}, {"segment": [66.0605616224649, 69.67816380655226], "label": "Playing harmonica"}, {"segment": [72.66661778471139, 75.96964586583464], "label": "Playing harmonica"}, {"segment": [89.18175819032761, 100.50642589703588], "label": "Playing harmonica"}]}, "82s4QeuHioo": {"duration": 182.022, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=82s4QeuHioo", "annotations": [{"segment": [42.026715201941414, 118.41310900156006], "label": "Rafting"}]}, "sEvP3GFbLdw": {"duration": 151.046, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sEvP3GFbLdw", "annotations": [{"segment": [31.802082683307333, 95.87739001560062], "label": "Putting on shoes"}]}, "i3DJXbrg0vk": {"duration": 65.13, "subset": "validation", "resolution": "652x480", "url": "https://www.youtube.com/watch?v=i3DJXbrg0vk", "annotations": [{"segment": [0.01, 53.24360811856474], "label": "Hammer throw"}]}, "_T4bZFH4E_A": {"duration": 210.19, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=_T4bZFH4E_A", "annotations": []}, "SidTquTAtyk": {"duration": 200.574, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SidTquTAtyk", "annotations": []}, "r8MwPAJWPDk": {"duration": 132.45, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=r8MwPAJWPDk", "annotations": [{"segment": [3.7192498965264735, 82.85662269483977], "label": "Rock climbing"}, {"segment": [82.85662187207488, 122.52862037441497], "label": "Rock climbing"}]}, "zLeCGU8SVVc": {"duration": 104.1, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zLeCGU8SVVc", "annotations": [{"segment": [7.632554734789391, 103.12068631045241], "label": "Cheerleading"}]}, "DOgmd5jNhXY": {"duration": 150.419, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DOgmd5jNhXY", "annotations": [{"segment": [12.43234477379095, 145.9041216848674], "label": "Scuba diving"}]}, "fqOp0uhdEyM": {"duration": 90.187, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fqOp0uhdEyM", "annotations": [{"segment": [0.20000000000000004, 14.366977134636304], "label": "Hula hoop"}, {"segment": [19.845584378156765, 25.043237404060793], "label": "Hula hoop"}, {"segment": [32.62900127970451, 33.75281815017024], "label": "Hula hoop"}]}, "Lh9NHhMANKA": {"duration": 226.859, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Lh9NHhMANKA", "annotations": []}, "bQovpBODTuM": {"duration": 98.801, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bQovpBODTuM", "annotations": []}, "L9gznB55hhE": {"duration": 238.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L9gznB55hhE", "annotations": [{"segment": [50.99157098283931, 210.2937051482059], "label": "Starting a campfire"}]}, "CKczltkk_7g": {"duration": 111.712, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CKczltkk_7g", "annotations": []}, "fSVDG4h0lzM": {"duration": 32.35, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=fSVDG4h0lzM", "annotations": [{"segment": [0.01, 32.321], "label": "Cricket"}]}, "HP2pyzpWnxI": {"duration": 157.85, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HP2pyzpWnxI", "annotations": []}, "1Ao43T5yRjI": {"duration": 182.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1Ao43T5yRjI", "annotations": [{"segment": [14.456125100000001, 145.09841649999998], "label": "Shot put"}]}, "89S-sQUWIcg": {"duration": 66.084, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=89S-sQUWIcg", "annotations": [{"segment": [0, 66.083604], "label": "Hurling"}]}, "Q47ZEz1iuNU": {"duration": 156.039, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q47ZEz1iuNU", "annotations": []}, "F53s94562zc": {"duration": 10.496, "subset": "training", "resolution": "400x230", "url": "https://www.youtube.com/watch?v=F53s94562zc", "annotations": [{"segment": [0.01, 8.267774791218411], "label": "Rock-paper-scissors"}]}, "6f1HnAlpphA": {"duration": 31.603, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6f1HnAlpphA", "annotations": [{"segment": [10.323691588785046, 15.731339563862928], "label": "Calf roping"}]}, "NAl-SP-92dI": {"duration": 229.6, "subset": "training", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=NAl-SP-92dI", "annotations": [{"segment": [3.7585192600000004, 206.0951385070203], "label": "Cheerleading"}]}, "BLSycXOF3Ro": {"duration": 184.228, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BLSycXOF3Ro", "annotations": [{"segment": [10.697060444845775, 161.3690665704784], "label": "Hula hoop"}]}, "wsrSku74OpM": {"duration": 169.808, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wsrSku74OpM", "annotations": [{"segment": [0.20000000000000004, 157.41602184087364], "label": "Beer pong"}]}, "T1fJmCDf4z0": {"duration": 21.061, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T1fJmCDf4z0", "annotations": []}, "bBXbFpJMveI": {"duration": 175.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bBXbFpJMveI", "annotations": [{"segment": [0.01, 5.756195007800312], "label": "Skateboarding"}, {"segment": [12.608808112324493, 27.958661466458654], "label": "Skateboarding"}, {"segment": [40.293365054602184, 43.58261934477379], "label": "Skateboarding"}, {"segment": [46.323664586583455, 54.54680031201248], "label": "Skateboarding"}, {"segment": [61.12530889235569, 68.80023556942278], "label": "Skateboarding"}, {"segment": [88.53576131045241, 96.75889703588143], "label": "Skateboarding"}, {"segment": [104.98203276131046, 112.1087503900156], "label": "Skateboarding"}, {"segment": [106.90076443057723, 113.20516848673947], "label": "Skateboarding"}]}, "7univ6G4RXA": {"duration": 83.62, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=7univ6G4RXA", "annotations": [{"segment": [1.108780303593096, 54.59112435926009], "label": "Brushing teeth"}]}, "3_hJrb_aDWU": {"duration": 43.19, "subset": "training", "resolution": "160x128", "url": "https://www.youtube.com/watch?v=3_hJrb_aDWU", "annotations": [{"segment": [13.45965344, 29.82921184], "label": "Kayaking"}]}, "So4BX0I1VRI": {"duration": 70.659, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=So4BX0I1VRI", "annotations": [{"segment": [0.01, 7.504917572849672], "label": "Playing ten pins"}, {"segment": [17.756433652412856, 21.724762457405056], "label": "Playing ten pins"}, {"segment": [40.24363021403532, 44.98357850888711], "label": "Playing ten pins"}, {"segment": [65.37637931231924, 70.658299], "label": "Playing ten pins"}]}, "vnpatTiKhBE": {"duration": 206.19, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vnpatTiKhBE", "annotations": []}, "SlFGykD5Nsg": {"duration": 142.39, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SlFGykD5Nsg", "annotations": []}, "mAfp5ABlKyM": {"duration": 42.89, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mAfp5ABlKyM", "annotations": [{"segment": [0.01, 0.02], "label": "Ping-pong"}]}, "ZMTi498qnPc": {"duration": 100.5, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZMTi498qnPc", "annotations": [{"segment": [5.12186676, 43.0049238], "label": "Doing nails"}]}, "5YEX9xzF8sQ": {"duration": 14.61, "subset": "testing", "resolution": "404x720", "url": "https://www.youtube.com/watch?v=5YEX9xzF8sQ", "annotations": []}, "xXXQyLS1uuY": {"duration": 60.674, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xXXQyLS1uuY", "annotations": [{"segment": [1.2275280373831776, 56.65514018691589], "label": "Snow tubing"}]}, "Jhcis4S2Vsg": {"duration": 72.238, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Jhcis4S2Vsg", "annotations": []}, "eJc8bebOF2A": {"duration": 6.64, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eJc8bebOF2A", "annotations": []}, "FRJLhGFpCGE": {"duration": 201.875, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=FRJLhGFpCGE", "annotations": [{"segment": [41.54416848673947, 200.16735725429018], "label": "Running a marathon"}]}, "Scjxbdh6b54": {"duration": 47.439, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Scjxbdh6b54", "annotations": [{"segment": [3.104527301092044, 47.181], "label": "Hand car wash"}]}, "otWcr0Rxy5g": {"duration": 202.618, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=otWcr0Rxy5g", "annotations": [{"segment": [26.54727613104524, 190.25547893915757], "label": "Drum corps"}]}, "k8C1Jb_xvh0": {"duration": 129.684, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k8C1Jb_xvh0", "annotations": [{"segment": [26.494494539781588, 103.95549765990638], "label": "Throwing darts"}]}, "04ketA7YN-0": {"duration": 112.85, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=04ketA7YN-0", "annotations": []}, "eM2miz5uf8Q": {"duration": 76.255, "subset": "validation", "resolution": "847x480", "url": "https://www.youtube.com/watch?v=eM2miz5uf8Q", "annotations": [{"segment": [22.781130107789348, 63.82285186427456], "label": "Removing ice from car"}]}, "Qre7RVxEn78": {"duration": 119.77000000000001, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Qre7RVxEn78", "annotations": [{"segment": [25.61934477379095, 107.62351076443058], "label": "Getting a piercing"}]}, "JsPIzRsUtlE": {"duration": 186.921, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JsPIzRsUtlE", "annotations": []}, "a42c_maArv4": {"duration": 10.89, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=a42c_maArv4", "annotations": [{"segment": [1.1438003120124804, 9.621379095163807], "label": "Volleyball"}]}, "XKuD0F-p6YA": {"duration": 133.167, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XKuD0F-p6YA", "annotations": [{"segment": [49.212070202808114, 130.21352106084245], "label": "Assembling bicycle"}]}, "QazS2ixUp5A": {"duration": 142.408, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QazS2ixUp5A", "annotations": []}, "p42wxuN8MZE": {"duration": 118.0, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=p42wxuN8MZE", "annotations": [{"segment": [58.978333, 117.956666], "label": "Playing badminton"}]}, "maE7PmL7Zjk": {"duration": 140.342, "subset": "validation", "resolution": "1920x608", "url": "https://www.youtube.com/watch?v=maE7PmL7Zjk", "annotations": [{"segment": [22.988841839387863, 96.55313572542903], "label": "Playing pool"}]}, "JKa3jnnowNo": {"duration": 140.342, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=JKa3jnnowNo", "annotations": [{"segment": [20.321079439252337, 29.279834890965734], "label": "Fixing the roof"}, {"segment": [58.12265732087228, 77.35120560747663], "label": "Fixing the roof"}, {"segment": [89.58755451713395, 120.63263551401867], "label": "Fixing the roof"}]}, "b0bM4vKxoXQ": {"duration": 129.846, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b0bM4vKxoXQ", "annotations": [{"segment": [55.07936349453978, 129.801], "label": "Bullfighting"}]}, "lviFcaF4HUo": {"duration": 36.97, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lviFcaF4HUo", "annotations": [{"segment": [0, 19.363769110764434], "label": "Using uneven bars"}, {"segment": [20.32094952, 35.32735257410297], "label": "Using uneven bars"}]}, "L61Le9sOGK0": {"duration": 16.603, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L61Le9sOGK0", "annotations": [{"segment": [0.01, 0.02], "label": "Table soccer"}]}, "KYN1NR7l2zQ": {"duration": 110.133, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KYN1NR7l2zQ", "annotations": [{"segment": [25.256536661466455, 48.451314393135725], "label": "Swimming"}, {"segment": [62.36818129329173, 74.73872964898597], "label": "Swimming"}, {"segment": [86.42202531825274, 92.7791126677067], "label": "Swimming"}]}, "DVcAKbTkIXQ": {"duration": 166.813, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=DVcAKbTkIXQ", "annotations": []}, "1jl5qtS4mNQ": {"duration": 118.631, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1jl5qtS4mNQ", "annotations": [{"segment": [29.796348215833394, 64.77466926677067], "label": "Installing carpet"}]}, "9-U2WOLIqB8": {"duration": 87.075, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9-U2WOLIqB8", "annotations": [{"segment": [24.95602602411681, 77.03816729183885], "label": "Playing ten pins"}]}, "EFEI0-awheU": {"duration": 19.78, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EFEI0-awheU", "annotations": [{"segment": [6.682990639625585, 9.885898595943837], "label": "Ironing clothes"}, {"segment": [11.949310452418096, 19.58701404056162], "label": "Ironing clothes"}]}, "9J72KkOrAq4": {"duration": 7.059, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9J72KkOrAq4", "annotations": [{"segment": [0.01, 3.160521458658346], "label": "Shuffleboard"}]}, "_rzxmgPv6Co": {"duration": 192.54, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_rzxmgPv6Co", "annotations": [{"segment": [76.29504749169558, 132.76539760365924], "label": "Doing motocross"}]}, "qeYKXF8tsp4": {"duration": 118.93, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qeYKXF8tsp4", "annotations": [{"segment": [1.8546489859594384, 116.47195631825272], "label": "Playing field hockey"}]}, "5iIlhXLDsf8": {"duration": 122.439, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5iIlhXLDsf8", "annotations": [{"segment": [6.491347893915757, 111.49844617784711], "label": "Doing crunches"}]}, "E-XerA_sOjw": {"duration": 54.567, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E-XerA_sOjw", "annotations": [{"segment": [12.513780224648986, 36.009041054602186], "label": "Disc dog"}]}, "I41kmlYZmt4": {"duration": 168.229, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I41kmlYZmt4", "annotations": [{"segment": [0.01, 57.213461109872966], "label": "Trimming branches or hedges"}, {"segment": [72.96028526855359, 145.65812223088923], "label": "Trimming branches or hedges"}]}, "QV2QXt6eDdI": {"duration": 30.79, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QV2QXt6eDdI", "annotations": [{"segment": [0, 30.185131045241807], "label": "Washing dishes"}]}, "TvlCdfubl9Y": {"duration": 25.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TvlCdfubl9Y", "annotations": [{"segment": [0, 23.416943904967084], "label": "Fixing bicycle"}]}, "DZx2G-OZAPk": {"duration": 32.07, "subset": "validation", "resolution": "1270x720", "url": "https://www.youtube.com/watch?v=DZx2G-OZAPk", "annotations": [{"segment": [9.85580576, 29.840940260000004], "label": "Playing piano"}]}, "o9MKXpyIvEM": {"duration": 152.068, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=o9MKXpyIvEM", "annotations": []}, "WjX-xi2Ocbo": {"duration": 23.36, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=WjX-xi2Ocbo", "annotations": [{"segment": [2.5175294748131902, 23.359274], "label": "Calf roping"}]}, "OmlzSz6AKfI": {"duration": 67.803, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OmlzSz6AKfI", "annotations": [{"segment": [2.643564742589704, 67.781], "label": "BMX"}]}, "aDJQFTO0dPY": {"duration": 46.81, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aDJQFTO0dPY", "annotations": []}, "GejFc4J2mfc": {"duration": 181.63, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GejFc4J2mfc", "annotations": [{"segment": [42.77493135725428, 114.4441872074883], "label": "Shaving legs"}]}, "9c6p0S1appo": {"duration": 148.933, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9c6p0S1appo", "annotations": []}, "IkXuooaGnCc": {"duration": 188.686, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IkXuooaGnCc", "annotations": [{"segment": [74.2416656979666, 173.71963346950238], "label": "Mooping floor"}]}, "IQvSj-3BGPo": {"duration": 204.034, "subset": "training", "resolution": "853x474", "url": "https://www.youtube.com/watch?v=IQvSj-3BGPo", "annotations": [{"segment": [6.0478019656786275, 198.30424340093603], "label": "Disc dog"}]}, "Qtj7QFDjH1A": {"duration": 59.44, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=Qtj7QFDjH1A", "annotations": [{"segment": [3.9140517677024533, 53.912905786394035], "label": "Playing bagpipes"}]}, "Doy6s1y58uc": {"duration": 8.545, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=Doy6s1y58uc", "annotations": [{"segment": [0.2, 8.344420000000001], "label": "Throwing darts"}]}, "3ve9a8YKP90": {"duration": 184.25, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=3ve9a8YKP90", "annotations": [{"segment": [0.01, 12.6440624024961], "label": "Kayaking"}, {"segment": [17.529268330733228, 32.4722511700468], "label": "Kayaking"}, {"segment": [36.78272698907956, 90.80735725429017], "label": "Kayaking"}, {"segment": [95.6925631825273, 108.04926053042121], "label": "Kayaking"}, {"segment": [112.35973634945397, 117.5323073322933], "label": "Kayaking"}, {"segment": [122.70487831513259, 129.60163962558502], "label": "Kayaking"}, {"segment": [134.7742106084243, 142.82043213728548], "label": "Kayaking"}, {"segment": [146.55617784711387, 157.47604992199686], "label": "Kayaking"}, {"segment": [161.21179563182525, 173.56849297971917], "label": "Kayaking"}]}, "fOgfpA9MTOQ": {"duration": 32.416, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fOgfpA9MTOQ", "annotations": [{"segment": [14.626931464174454, 23.907467289719627], "label": "Calf roping"}]}, "p31a9wkiSHA": {"duration": 206.217, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p31a9wkiSHA", "annotations": [{"segment": [21.32830283328609, 89.10347992688673], "label": "Longboarding"}, {"segment": [102.5942734969802, 171.33307883031353], "label": "Longboarding"}, {"segment": [187.07233799542254, 189.96322233187115], "label": "Longboarding"}]}, "u35hesPTsNE": {"duration": 131.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=u35hesPTsNE", "annotations": [{"segment": [0.2, 66.83525245771735], "label": "Playing congas"}]}, "Zl6o69-R2DE": {"duration": 81.78, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zl6o69-R2DE", "annotations": [{"segment": [19.39261061054687, 22.709767640873633], "label": "Shoveling snow"}, {"segment": [30.747494390171603, 35.34048110405616], "label": "Shoveling snow"}, {"segment": [44.65403749609984, 62.0053206374415], "label": "Shoveling snow"}]}, "59R_1aBnFn4": {"duration": 6.177, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=59R_1aBnFn4", "annotations": [{"segment": [0.19999999999999998, 5.976506], "label": "Doing a powerbomb"}]}, "C0gGikr-Dw8": {"duration": 65.225, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=C0gGikr-Dw8", "annotations": [{"segment": [14.5378127925117, 64.95768954758191], "label": "Hitting a pinata"}]}, "ITyNMYw_pxc": {"duration": 65.62, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ITyNMYw_pxc", "annotations": [{"segment": [16.369672386895477, 59.85161466458659], "label": "Throwing darts"}]}, "EVQlh2Et5tc": {"duration": 24.24, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EVQlh2Et5tc", "annotations": [{"segment": [5.256677062402496, 21.59397555850234], "label": "Using uneven bars"}]}, "8KH35bzOO_0": {"duration": 31.09, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8KH35bzOO_0", "annotations": []}, "C0t3fbC2RCg": {"duration": 133.21, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=C0t3fbC2RCg", "annotations": [{"segment": [10.90664976599064, 75.93105694227769], "label": "Playing flauta"}, {"segment": [95.45915366614663, 130.98367004680188], "label": "Playing flauta"}]}, "eEBiomp_9tQ": {"duration": 30.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eEBiomp_9tQ", "annotations": [{"segment": [5.812450670826833, 25.031037566302654], "label": "Doing step aerobics"}]}, "uE9MHR27_gc": {"duration": 238.934, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uE9MHR27_gc", "annotations": [{"segment": [0.01, 213.21351935101404], "label": "Sharpening knives"}]}, "gzuLIo5ZR9A": {"duration": 50.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gzuLIo5ZR9A", "annotations": [{"segment": [0.01, 50.45696145124717], "label": "Walking the dog"}]}, "S0Kl5D5mrvQ": {"duration": 50.574, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=S0Kl5D5mrvQ", "annotations": [{"segment": [0.19999999999999998, 49.23391939252337], "label": "Cleaning sink"}]}, "1BWF1U1dJ_w": {"duration": 105.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1BWF1U1dJ_w", "annotations": [{"segment": [23.290516380655227, 26.924497659906397], "label": "Making a sandwich"}]}, "YZhNtSJMzq8": {"duration": 49.34, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YZhNtSJMzq8", "annotations": [{"segment": [0.7691263650546022, 43.07107644305773], "label": "Mowing the lawn"}]}, "uOk4EFDsDP4": {"duration": 54.15, "subset": "validation", "resolution": "350x240", "url": "https://www.youtube.com/watch?v=uOk4EFDsDP4", "annotations": [{"segment": [1.0309779200000002, 38.57170259282372], "label": "Preparing pasta"}]}, "chpGQpd9Gls": {"duration": 33.72, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=chpGQpd9Gls", "annotations": []}, "SlogVhfmTVs": {"duration": 169.53, "subset": "training", "resolution": "1480x1080", "url": "https://www.youtube.com/watch?v=SlogVhfmTVs", "annotations": [{"segment": [142.79335413416536, 167.64997503900156], "label": "Shaving"}]}, "XThYcZoFMMs": {"duration": 217.989, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XThYcZoFMMs", "annotations": [{"segment": [61.89389396528243, 217.78893424036283], "label": "Making a cake"}]}, "pajUG7Y6Gtk": {"duration": 61.092, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pajUG7Y6Gtk", "annotations": [{"segment": [0, 61.061], "label": "Beach soccer"}]}, "e-k2J91a954": {"duration": 61.16, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=e-k2J91a954", "annotations": [{"segment": [0.01, 58.67952574102964], "label": "Volleyball"}]}, "uJAL31wAD2A": {"duration": 188.268, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uJAL31wAD2A", "annotations": []}, "JN1wuOpCZvk": {"duration": 220.06, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JN1wuOpCZvk", "annotations": []}, "HCraAphAW1A": {"duration": 28.561, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HCraAphAW1A", "annotations": [{"segment": [1.0854786491899222, 24.796957494984316], "label": "Clipping cat claws"}]}, "xbWSMxwcP_8": {"duration": 12.12, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=xbWSMxwcP_8", "annotations": [{"segment": [0, 11.39817042], "label": "Ironing clothes"}]}, "JpUgQNMC5tw": {"duration": 124.6, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JpUgQNMC5tw", "annotations": []}, "TKgU9QJXw2w": {"duration": 40.87, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TKgU9QJXw2w", "annotations": []}, "Z-sWZUYL0ZU": {"duration": 126.39, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Z-sWZUYL0ZU", "annotations": []}, "lmv-due_RPs": {"duration": 131.61, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lmv-due_RPs", "annotations": []}, "UrQ7Jq1s95o": {"duration": 21.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UrQ7Jq1s95o", "annotations": [{"segment": [4.03068819, 15.180632960218409], "label": "Clean and jerk"}]}, "5zYETEiYiCQ": {"duration": 65.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5zYETEiYiCQ", "annotations": [{"segment": [0.01, 65.177913], "label": "Playing piano"}]}, "vWULU8NttX8": {"duration": 116.704, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vWULU8NttX8", "annotations": [{"segment": [85.38804307176287, 116.703061], "label": "BMX"}]}, "DCjklOgbzGs": {"duration": 158.407, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DCjklOgbzGs", "annotations": [{"segment": [7.413722152886116, 155.68816521060845], "label": "Skiing"}]}, "0_9_HIvxNMw": {"duration": 198.415, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0_9_HIvxNMw", "annotations": [{"segment": [21.600331019363423, 183.9114238389344], "label": "Snowboarding"}]}, "oZnbAhzN3jg": {"duration": 172.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oZnbAhzN3jg", "annotations": [{"segment": [13.25172397129974, 158.75173918002247], "label": "Cleaning windows"}]}, "smZ0pWbxFWo": {"duration": 30.44, "subset": "testing", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=smZ0pWbxFWo", "annotations": []}, "pYYA4rARL1Y": {"duration": 192.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pYYA4rARL1Y", "annotations": [{"segment": [9.571180506087758, 14.0591354045245], "label": "Using the pommel horse"}, {"segment": [17.34957241852139, 20.340878794882197], "label": "Using the pommel horse"}, {"segment": [23.93044644651517, 27.81914473578422], "label": "Using the pommel horse"}, {"segment": [31.408712387417193, 33.90722776911077], "label": "Using the pommel horse"}, {"segment": [37.808059282371296, 40.80869890795632], "label": "Using the pommel horse"}, {"segment": [44.709530421216854, 50.4107457098284], "label": "Using the pommel horse"}, {"segment": [59.41266458658347, 63.01343213728549], "label": "Using the pommel horse"}, {"segment": [72.31541497659906, 76.81637441497661], "label": "Using the pommel horse"}, {"segment": [80.71720592823714, 170.73639469578785], "label": "Using the pommel horse"}]}, "zcdJNPYkIE0": {"duration": 66.084, "subset": "training", "resolution": "624x360", "url": "https://www.youtube.com/watch?v=zcdJNPYkIE0", "annotations": [{"segment": [0.01, 66.08399092970521], "label": "Scuba diving"}]}, "nZ40a3LSFeU": {"duration": 112.11, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=nZ40a3LSFeU", "annotations": [{"segment": [12.242458444617784, 104.93535809672386], "label": "Sailing"}]}, "jqRJM8b4pgE": {"duration": 78.65, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=jqRJM8b4pgE", "annotations": [{"segment": [62.327863307332294, 78.645985], "label": "Smoking hookah"}]}, "uCLUexLKpnU": {"duration": 14.98, "subset": "training", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=uCLUexLKpnU", "annotations": [{"segment": [0.023218928237129486, 14.883333], "label": "Windsurfing"}]}, "xs4MvE_uAtU": {"duration": 216.11, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xs4MvE_uAtU", "annotations": [{"segment": [65.74259478159127, 115.63953851326053], "label": "Cleaning shoes"}, {"segment": [117.66238758346334, 167.5593313151326], "label": "Cleaning shoes"}]}, "iVrhxY1zxMo": {"duration": 234.685, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=iVrhxY1zxMo", "annotations": []}, "O8vPTn6Ho7w": {"duration": 182.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O8vPTn6Ho7w", "annotations": [{"segment": [14.214513886677915, 30.13476943975718], "label": "Kayaking"}, {"segment": [58.27950693537945, 65.10247360098485], "label": "Kayaking"}, {"segment": [91.54146943020577, 96.37440415167627], "label": "Kayaking"}, {"segment": [100.63875831767963, 108.59888609421927], "label": "Kayaking"}, {"segment": [129.63636664650258, 133.04784997930528], "label": "Kayaking"}, {"segment": [139.87081662402497, 147.26236384399377], "label": "Kayaking"}, {"segment": [153.51674995319812, 158.34968467394694], "label": "Kayaking"}]}, "ymEh5T11kSQ": {"duration": 35.248, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ymEh5T11kSQ", "annotations": [{"segment": [0.2, 20.34589594383775], "label": "Beach soccer"}]}, "a1XSqgYWcDM": {"duration": 70.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a1XSqgYWcDM", "annotations": []}, "20i6YGSnt-c": {"duration": 54.61, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=20i6YGSnt-c", "annotations": [{"segment": [5.39521827, 49.842095074883], "label": "Shaving"}]}, "-2DajzzRwn0": {"duration": 191.147, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-2DajzzRwn0", "annotations": [{"segment": [0.01, 186.17924251032323], "label": "Tug of war"}]}, "H0gSWEElh6A": {"duration": 145.752, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H0gSWEElh6A", "annotations": [{"segment": [15.21882527301092, 126.53806084243371], "label": "Fixing the roof"}]}, "hD97MAqtyjk": {"duration": 146.8, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hD97MAqtyjk", "annotations": []}, "BzxK6r4UG5k": {"duration": 56.96, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BzxK6r4UG5k", "annotations": [{"segment": [4.520647399375975, 45.56103457410296], "label": "Clean and jerk"}]}, "aEG-_-m49mY": {"duration": 220.637, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aEG-_-m49mY", "annotations": [{"segment": [0.01, 19.2755327736919], "label": "Painting fence"}]}, "R2qPyJVTUlI": {"duration": 91.93, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=R2qPyJVTUlI", "annotations": [{"segment": [44.28244773790952, 91.861], "label": "Putting on makeup"}]}, "IGZvj1J9tNc": {"duration": 179.28, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IGZvj1J9tNc", "annotations": []}, "huFuRGoZt9I": {"duration": 23.36, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=huFuRGoZt9I", "annotations": [{"segment": [0, 23.359273], "label": "Playing blackjack"}]}, "CfGFk5dvKGg": {"duration": 110.34, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=CfGFk5dvKGg", "annotations": []}, "G9oSp_hnm_U": {"duration": 83.639, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G9oSp_hnm_U", "annotations": []}, "hKE7vI--XGU": {"duration": 103.42, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hKE7vI--XGU", "annotations": [{"segment": [30.49400713171384, 98.90384302355695], "label": "Playing flauta"}]}, "dID-dQpaLbc": {"duration": 235.13, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dID-dQpaLbc", "annotations": [{"segment": [9.898595943837753, 221.801872074883], "label": "Tai chi"}]}, "fFWHlFZEm3M": {"duration": 169.46, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fFWHlFZEm3M", "annotations": [{"segment": [7.00572471450858, 144.68239961], "label": "Sailing"}]}, "vWNDj8KxmBg": {"duration": 88.03, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vWNDj8KxmBg", "annotations": [{"segment": [5.699053809771439, 78.2074963900156], "label": "Breakdancing"}]}, "AdLAUGIklAo": {"duration": 119.398, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AdLAUGIklAo", "annotations": []}, "BqvU8S76g1E": {"duration": 11.77, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=BqvU8S76g1E", "annotations": []}, "CRKA3xPEKm8": {"duration": 50.95, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CRKA3xPEKm8", "annotations": [{"segment": [9.21627904, 41.12915008437072], "label": "Using parallel bars"}]}, "qmP7dbzrhnw": {"duration": 229.716, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=qmP7dbzrhnw", "annotations": []}, "TCYrINZQn6g": {"duration": 158.57, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TCYrINZQn6g", "annotations": []}, "93jSVyXW5gc": {"duration": 111.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=93jSVyXW5gc", "annotations": [{"segment": [46.72968816, 100.52036472], "label": "Archery"}, {"segment": [21.57672964, 34.03217932], "label": "Archery"}]}, "WjEh7acrr5o": {"duration": 12.446, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WjEh7acrr5o", "annotations": [{"segment": [0.01, 12.445895691609977], "label": "Knitting"}]}, "CpTIytatsno": {"duration": 97.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CpTIytatsno", "annotations": [{"segment": [6.5936092043681755, 10.989348673946958], "label": "Shot put"}, {"segment": [22.054485959438377, 26.75337987519501], "label": "Shot put"}, {"segment": [34.64252496099844, 48.89371684867395], "label": "Shot put"}]}, "vREBYOCUCj0": {"duration": 111.526, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vREBYOCUCj0", "annotations": [{"segment": [62.582169112315434, 94.35946350483879], "label": "Skiing"}]}, "4wiLmu1yTZI": {"duration": 125.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4wiLmu1yTZI", "annotations": [{"segment": [55.2009375975039, 84.4711856474259], "label": "Brushing hair"}]}, "pesi_zocmrM": {"duration": 112.733, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pesi_zocmrM", "annotations": []}, "B_U7vUtoCKY": {"duration": 145.07999999999998, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=B_U7vUtoCKY", "annotations": []}, "czmYE1FzBXM": {"duration": 192.01, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=czmYE1FzBXM", "annotations": [{"segment": [84.77006282371295, 175.231401950078], "label": "Playing lacrosse"}]}, "1izVZnE0l7k": {"duration": 208.32999999999998, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1izVZnE0l7k", "annotations": [{"segment": [7.475159066226595, 162.8284648773706], "label": "Washing dishes"}]}, "PT4RBe2UA4k": {"duration": 34.529, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PT4RBe2UA4k", "annotations": []}, "-F7QWQA8Eh8": {"duration": 104.31, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-F7QWQA8Eh8", "annotations": [{"segment": [37.26296723161444, 84.12643693774962], "label": "Tango"}]}, "_z7M8BENWVE": {"duration": 103.306, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_z7M8BENWVE", "annotations": []}, "8OA30kYcAUQ": {"duration": 168.368, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8OA30kYcAUQ", "annotations": [{"segment": [8.928102964118565, 33.61168174726989], "label": "Raking leaves"}, {"segment": [88.23066458658346, 121.84234633385336], "label": "Raking leaves"}]}, "bOk4SVceq0A": {"duration": 146.91, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bOk4SVceq0A", "annotations": []}, "qbUQVhjQJ48": {"duration": 16.905, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=qbUQVhjQJ48", "annotations": [{"segment": [0.26117004680187206, 5.223400936037441], "label": "Shuffleboard"}]}, "QNsonWAaFk4": {"duration": 186.41, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QNsonWAaFk4", "annotations": [{"segment": [0, 150.93086284867394], "label": "Volleyball"}]}, "OaG9uH7BgjI": {"duration": 84.73, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=OaG9uH7BgjI", "annotations": [{"segment": [53.755112324492984, 62.604234009360376], "label": "Clipping cat claws"}]}, "0czF2CCgq6I": {"duration": 47.091, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0czF2CCgq6I", "annotations": [{"segment": [0.01, 17.796089521714773], "label": "Cutting the grass"}]}, "lMbDEY-CVwE": {"duration": 176.263, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lMbDEY-CVwE", "annotations": [{"segment": [0.01, 175.54883804447897], "label": "Disc dog"}]}, "FXb8rBMvol4": {"duration": 139.947, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FXb8rBMvol4", "annotations": [{"segment": [17.237159126365054, 26.401218408736348], "label": "Snowboarding"}, {"segment": [32.29239937597504, 40.801882995319815], "label": "Snowboarding"}, {"segment": [47.129447737909516, 53.89339625585023], "label": "Snowboarding"}, {"segment": [58.257234009360374, 74.8398174726989], "label": "Snowboarding"}, {"segment": [77.23992823712948, 82.27010452418098], "label": "Snowboarding"}, {"segment": [86.19812012480499, 94.70882059282371], "label": "Snowboarding"}, {"segment": [98.85505928237129, 105.40175195007801], "label": "Snowboarding"}, {"segment": [109.32976755070203, 121.55026053042123], "label": "Snowboarding"}]}, "hhHiBeL08-M": {"duration": 39.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hhHiBeL08-M", "annotations": [{"segment": [0.01, 38.94898382713582], "label": "Painting fence"}]}, "VFsRRXYbuHs": {"duration": 19.389, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=VFsRRXYbuHs", "annotations": [{"segment": [0.2, 19.141000000000002], "label": "Waterskiing"}]}, "45P3UDcb4Gc": {"duration": 165.19, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=45P3UDcb4Gc", "annotations": [{"segment": [15.977473760174897, 165.186462585034], "label": "Smoking a cigarette"}]}, "1WSFrupQzQI": {"duration": 62.462, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1WSFrupQzQI", "annotations": [{"segment": [34.66751326053042, 62.401], "label": "Bullfighting"}]}, "Nn-KZMYbOv4": {"duration": 141.46, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Nn-KZMYbOv4", "annotations": [{"segment": [6.620403837753511, 131.74603637129485], "label": "Spinning"}]}, "0E0t_6jIkvw": {"duration": 231.921, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0E0t_6jIkvw", "annotations": []}, "3-FGPutRFAs": {"duration": 220.01, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3-FGPutRFAs", "annotations": []}, "TtAEG3yXDnI": {"duration": 105.16, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TtAEG3yXDnI", "annotations": [{"segment": [4.263702521060843, 83.96214195319813], "label": "Using uneven bars"}]}, "uMQEaWGQvrc": {"duration": 17.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uMQEaWGQvrc", "annotations": [{"segment": [5.678703613104524, 9.624469503900157], "label": "Springboard diving"}]}, "d6gGZFQOxOg": {"duration": 127.36, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=d6gGZFQOxOg", "annotations": []}, "jsfPXYQuikQ": {"duration": 41.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jsfPXYQuikQ", "annotations": [{"segment": [0.01, 41.12253968253968], "label": "Springboard diving"}]}, "YoJxVl_hUWg": {"duration": 56.379, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YoJxVl_hUWg", "annotations": [{"segment": [15.557499219968797, 20.831227769110765], "label": "Changing car wheel"}]}, "BodF651KcIg": {"duration": 236.87, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BodF651KcIg", "annotations": [{"segment": [0.01, 226.15008681622467], "label": "Belly dance"}]}, "MHhMO3yhcfU": {"duration": 139.971, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MHhMO3yhcfU", "annotations": [{"segment": [10.335046127093545, 130.47450251338637], "label": "Bullfighting"}]}, "kkjBLmM6KEg": {"duration": 125.64, "subset": "training", "resolution": "624x352", "url": "https://www.youtube.com/watch?v=kkjBLmM6KEg", "annotations": [{"segment": [10.97662679840527, 119.76283881831463], "label": "Tango"}]}, "llcbN-s26vs": {"duration": 30.651, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=llcbN-s26vs", "annotations": [{"segment": [6.816477339251428, 25.26386782630372], "label": "Hanging wallpaper"}]}, "XnUi1SG9jxA": {"duration": 69.17, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XnUi1SG9jxA", "annotations": [{"segment": [1.2393148666146647, 63.42059121762871], "label": "High jump"}]}, "LB9-RIKxk6E": {"duration": 158.895, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LB9-RIKxk6E", "annotations": [{"segment": [0, 0.743651705148206], "label": "Running a marathon"}]}, "qtl0pZQk-E8": {"duration": 160.659, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qtl0pZQk-E8", "annotations": []}, "sNnNDCx6RRA": {"duration": 178.01, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sNnNDCx6RRA", "annotations": [{"segment": [0.01, 170.7426131045242], "label": "Ballet"}]}, "ral5Oaib_vk": {"duration": 207.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ral5Oaib_vk", "annotations": [{"segment": [36.8323151326053, 132.7901887675507], "label": "Playing harmonica"}, {"segment": [150.23707488299533, 189.0261856474259], "label": "Playing harmonica"}]}, "uSuHnQPWfNY": {"duration": 169.645, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=uSuHnQPWfNY", "annotations": [{"segment": [39.169201366614665, 131.26975593135725], "label": "Grooming dog"}]}, "P-ENnGQ_FuU": {"duration": 74.165, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=P-ENnGQ_FuU", "annotations": [{"segment": [0.2, 54.94067862714509], "label": "Hand car wash"}]}, "C3K-OgdK8FY": {"duration": 192.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=C3K-OgdK8FY", "annotations": [{"segment": [18.48876131045242, 182.93351638065525], "label": "Zumba"}]}, "SyOdA4ZKEtQ": {"duration": 87.93, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SyOdA4ZKEtQ", "annotations": [{"segment": [10.356574102964117, 25.85714196567863], "label": "Tumbling"}, {"segment": [32.990146645865835, 35.87078315132605], "label": "Tumbling"}, {"segment": [39.84880499219968, 41.632056162246485], "label": "Tumbling"}, {"segment": [44.10117316692667, 49.45092667706707], "label": "Tumbling"}, {"segment": [61.796511700468024, 65.63736037441497], "label": "Tumbling"}]}, "k_7hLIwul48": {"duration": 33.414, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k_7hLIwul48", "annotations": [{"segment": [6.249173166926678, 22.809482059282374], "label": "Throwing darts"}]}, "p74gAY-kWaY": {"duration": 59.63, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p74gAY-kWaY", "annotations": [{"segment": [6.883828989079563, 35.349392106084245], "label": "Brushing hair"}, {"segment": [40.4657514898596, 59.628843], "label": "Brushing hair"}]}, "1kkc9hDshP4": {"duration": 220.07999999999998, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=1kkc9hDshP4", "annotations": []}, "DJTvj9lfSWU": {"duration": 152.14, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=DJTvj9lfSWU", "annotations": []}, "lgu-DBDWlEs": {"duration": 18.785, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=lgu-DBDWlEs", "annotations": [{"segment": [0.5861136349453978, 17.905771547581907], "label": "Peeling potatoes"}]}, "8hkaaBJ4994": {"duration": 98.62, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8hkaaBJ4994", "annotations": []}, "dO01vfB9z4g": {"duration": 128.15, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dO01vfB9z4g", "annotations": []}, "B2p4QMI3sfk": {"duration": 200.528, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=B2p4QMI3sfk", "annotations": []}, "oB5eeXXprts": {"duration": 108.3, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oB5eeXXprts", "annotations": []}, "gMLA3a0FMS0": {"duration": 107.03999999999999, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gMLA3a0FMS0", "annotations": [{"segment": [47.521585800000004, 79.7974518], "label": "Playing lacrosse"}]}, "PVAV7PdHhgE": {"duration": 143.824, "subset": "training", "resolution": "202x360", "url": "https://www.youtube.com/watch?v=PVAV7PdHhgE", "annotations": [{"segment": [0.2, 132.32015600624027], "label": "Swinging at the playground"}]}, "v5peBgLKWL8": {"duration": 191.704, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v5peBgLKWL8", "annotations": [{"segment": [42.46793664943876, 53.8325957528097], "label": "Skiing"}, {"segment": [57.12236549325918, 61.30934516292216], "label": "Skiing"}, {"segment": [67.29074469101212, 83.73959339325954], "label": "Skiing"}, {"segment": [114.84287093932737, 124.4131089672387], "label": "Skiing"}, {"segment": [136.97404785335414, 183.32989374258972], "label": "Skiing"}]}, "J4FBxa5FWSo": {"duration": 12.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=J4FBxa5FWSo", "annotations": [{"segment": [0, 1.2959704461778472], "label": "Powerbocking"}, {"segment": [0.2, 12.27122016224649], "label": "Powerbocking"}]}, "2PAVJbmj2lQ": {"duration": 57.82, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2PAVJbmj2lQ", "annotations": [{"segment": [0.8184733700000001, 55.14683307332293], "label": "Layup drill in basketball"}]}, "7LimgSQsHm0": {"duration": 133.585, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7LimgSQsHm0", "annotations": [{"segment": [32.30843311667541, 65.39241980792512], "label": "Playing ice hockey"}, {"segment": [72.88313377575525, 77.25271692365615], "label": "Playing ice hockey"}]}, "s04x6lhUmtY": {"duration": 63.762, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s04x6lhUmtY", "annotations": [{"segment": [6.564609494847706, 41.96590402198983], "label": "Slacklining"}]}, "abjQ1EyqJuw": {"duration": 37.779, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=abjQ1EyqJuw", "annotations": [{"segment": [0.009999999999999998, 37.741], "label": "Trimming branches or hedges"}]}, "DmAOCYuMgtE": {"duration": 205.01, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DmAOCYuMgtE", "annotations": [{"segment": [3.677159374414977, 203.52277754914198], "label": "Zumba"}]}, "-bzI8_hCbWk": {"duration": 219.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-bzI8_hCbWk", "annotations": [{"segment": [16.94699297971919, 43.993709048361936], "label": "Dodgeball"}, {"segment": [69.32860764430578, 80.28423946957878], "label": "Dodgeball"}, {"segment": [99.11423166926679, 108.70040951638066], "label": "Dodgeball"}, {"segment": [132.3234906396256, 141.5673049921997], "label": "Dodgeball"}]}, "G3TPqsLNlrw": {"duration": 138.925, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G3TPqsLNlrw", "annotations": [{"segment": [0.2, 3.4677064252638132], "label": "Waterskiing"}, {"segment": [22.323360112635797, 73.03856658211906], "label": "Waterskiing"}, {"segment": [78.67358952317275, 126.35455135881435], "label": "Waterskiing"}]}, "lCX7y_KAihU": {"duration": 58.77, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lCX7y_KAihU", "annotations": [{"segment": [17.50316848673947, 32.07386895475819], "label": "Shaving"}]}, "m11-00dRe40": {"duration": 204.615, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m11-00dRe40", "annotations": []}, "uDmEOkAXTfo": {"duration": 127.85, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uDmEOkAXTfo", "annotations": [{"segment": [102.6794305772231, 111.25266458658346], "label": "Getting a haircut"}]}, "3osNjmYjTGg": {"duration": 28.38, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3osNjmYjTGg", "annotations": [{"segment": [0.01, 28.01172230889236], "label": "Pole vault"}]}, "nEj34gf508E": {"duration": 72.26, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nEj34gf508E", "annotations": [{"segment": [25.138987255850235, 54.674479009360375], "label": "Using parallel bars"}]}, "y-OZ45FQZ0g": {"duration": 60.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y-OZ45FQZ0g", "annotations": [{"segment": [4.27376045, 10.64582417472699], "label": "Playing guitarra"}, {"segment": [11.032662850000001, 55.32026490795632], "label": "Playing guitarra"}]}, "W6fPv5eoXC4": {"duration": 45.02, "subset": "testing", "resolution": "644x480", "url": "https://www.youtube.com/watch?v=W6fPv5eoXC4", "annotations": []}, "kKWHrXgGPDk": {"duration": 55.19, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kKWHrXgGPDk", "annotations": []}, "rMdojBVP-aM": {"duration": 29.351, "subset": "training", "resolution": "202x360", "url": "https://www.youtube.com/watch?v=rMdojBVP-aM", "annotations": [{"segment": [0.01, 23.901265965678625], "label": "Slacklining"}]}, "kZXVFSjRt5g": {"duration": 141.782, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kZXVFSjRt5g", "annotations": [{"segment": [20.349229117004683, 128.06743107332295], "label": "Rafting"}]}, "ILIpCfCWyT0": {"duration": 139.16, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ILIpCfCWyT0", "annotations": [{"segment": [12.58636193447738, 15.62444929797192], "label": "Skateboarding"}, {"segment": [23.219667706708268, 128.05209048361934], "label": "Skateboarding"}]}, "J_jcark5INI": {"duration": 157.223, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J_jcark5INI", "annotations": []}, "eGHua_oQFpA": {"duration": 126.69, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eGHua_oQFpA", "annotations": []}, "wyOf_L4cNHc": {"duration": 235.38, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wyOf_L4cNHc", "annotations": [{"segment": [16.70795198474606, 35.43554649750389], "label": "Playing piano"}, {"segment": [60.772880262558495, 70.68748912714508], "label": "Playing piano"}, {"segment": [83.90696761326053, 91.25112232776911], "label": "Playing piano"}, {"segment": [99.69690024945396, 106.30663949251169], "label": "Playing piano"}, {"segment": [114.75241741419656, 136.41767382199689], "label": "Playing piano"}, {"segment": [141.55858212215287, 163.95825400140404], "label": "Playing piano"}, {"segment": [169.09916230156006, 189.29558776645865], "label": "Playing piano"}, {"segment": [197.3741579524181, 199.21019663104522], "label": "Playing piano"}, {"segment": [202.14785851684866, 204.71831266692666], "label": "Playing piano"}, {"segment": [212.42967511716066, 230.79006190343213], "label": "Playing piano"}]}, "tBNOJJx4Z9k": {"duration": 127.85, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=tBNOJJx4Z9k", "annotations": [{"segment": [52.43341965678627, 100.28140717628705], "label": "Hand washing clothes"}]}, "Yzb_4XMgcM4": {"duration": 143.268, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Yzb_4XMgcM4", "annotations": [{"segment": [0.01, 136.31358814352575], "label": "Baton twirling"}]}, "TcxOTZ4xnQ4": {"duration": 39.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TcxOTZ4xnQ4", "annotations": [{"segment": [6.79860567855835, 33.00098106796645], "label": "Tumbling"}]}, "qF3EbR8y8go": {"duration": 204.1, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qF3EbR8y8go", "annotations": [{"segment": [2.865726384157407, 9.55242128052469], "label": "Painting"}, {"segment": [14.647045963471191, 17.831186390312755], "label": "Painting"}, {"segment": [26.109951500100816, 28.02043575620576], "label": "Painting"}, {"segment": [30.249334054994854, 32.478232353783945], "label": "Painting"}, {"segment": [35.025544695257196, 37.57285703673045], "label": "Painting"}, {"segment": [40.75699739157566, 44.57796589703588], "label": "Painting"}, {"segment": [52.538316950078, 55.722457371294844], "label": "Painting"}, {"segment": [60.817082045241804, 64.00122246645866], "label": "Painting"}, {"segment": [68.45901905616225, 72.27998756162246], "label": "Painting"}, {"segment": [75.14571394071763, 87.56386158346334], "label": "Painting"}, {"segment": [95.20579859438378, 102.84773560530421], "label": "Painting"}, {"segment": [106.98711815288611, 112.40015686895475], "label": "Painting"}, {"segment": [121.31575004836193, 130.5497572698908], "label": "Painting"}, {"segment": [142.01266278627145, 148.06252958658345], "label": "Painting"}, {"segment": [150.60984192355696, 153.47556830265208], "label": "Painting"}, {"segment": [157.2965368081123, 159.8438491450858], "label": "Painting"}, {"segment": [166.84895807176287, 170.66992657722307], "label": "Painting"}, {"segment": [172.5804108299532, 178.6302776302652], "label": "Painting"}, {"segment": [183.40648826209048, 197.0982920733229], "label": "Painting"}]}, "StTr5O_wGXI": {"duration": 44.049, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=StTr5O_wGXI", "annotations": [{"segment": [11.888218309684769, 39.65030037391972], "label": "Beach soccer"}]}, "DPctp-VRHxk": {"duration": 147.447, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DPctp-VRHxk", "annotations": []}, "aaZtnSeatP0": {"duration": 6.57, "subset": "testing", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=aaZtnSeatP0", "annotations": []}, "uxMOn-NmmZo": {"duration": 91.67, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uxMOn-NmmZo", "annotations": [{"segment": [32.16025741029641, 91.601], "label": "Starting a campfire"}]}, "StvwTXdXG2U": {"duration": 183.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=StvwTXdXG2U", "annotations": [{"segment": [0, 130.71523077249623], "label": "Skateboarding"}]}, "boQbuyDjWnc": {"duration": 207.261, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=boQbuyDjWnc", "annotations": [{"segment": [12.286942171606865, 162.9636540655226], "label": "Playing blackjack"}]}, "4WikrzXQ3Bo": {"duration": 75.047, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4WikrzXQ3Bo", "annotations": [{"segment": [7.492983822754271, 73.29074694227769], "label": "Rope skipping"}]}, "V9ktV85lA9E": {"duration": 88.96000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V9ktV85lA9E", "annotations": [{"segment": [18.734808229329172, 76.18822013260531], "label": "Wrapping presents"}]}, "crTQ2qXSW5c": {"duration": 227.417, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=crTQ2qXSW5c", "annotations": []}, "CCMtNNSzHw8": {"duration": 122.53, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CCMtNNSzHw8", "annotations": [{"segment": [4.00273576, 68.30952387], "label": "Putting on makeup"}]}, "bJ6SpcLM7GE": {"duration": 85.1, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=bJ6SpcLM7GE", "annotations": [{"segment": [9.293415460218409, 23.100775572542904], "label": "Playing saxophone"}, {"segment": [30.535507940717633, 59.07956971138846], "label": "Playing saxophone"}]}, "U6-j4rUn3dk": {"duration": 169.065, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U6-j4rUn3dk", "annotations": [{"segment": [0.01, 169.06448979591838], "label": "Trimming branches or hedges"}]}, "CIJp-SG3sWE": {"duration": 28.58, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CIJp-SG3sWE", "annotations": [{"segment": [16.4299984399376, 21.684035881435257], "label": "Plataform diving"}]}, "auO4r3De6vc": {"duration": 180.652, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=auO4r3De6vc", "annotations": []}, "bF3iEQf3TUc": {"duration": 192.587, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bF3iEQf3TUc", "annotations": []}, "Tz3OXEWBSVA": {"duration": 122.904, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tz3OXEWBSVA", "annotations": [{"segment": [71.26716038685399, 74.90448309579219], "label": "Spread mulch"}, {"segment": [79.11611991666798, 98.06848561060912], "label": "Spread mulch"}, {"segment": [103.62018869267268, 106.10888317773565], "label": "Spread mulch"}]}, "Ib3XqmBqy10": {"duration": 201.875, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=Ib3XqmBqy10", "annotations": [{"segment": [10.705014040561622, 29.91106864274571], "label": "Rope skipping"}, {"segment": [38.097255850234006, 51.321096723868955], "label": "Rope skipping"}, {"segment": [56.3587503900156, 61.711257410296405], "label": "Rope skipping"}, {"segment": [71.47171138845555, 96.98444305772232], "label": "Rope skipping"}, {"segment": [103.28213416536661, 141.38316536661466], "label": "Rope skipping"}, {"segment": [152.08924024961, 158.0720468018721], "label": "Rope skipping"}, {"segment": [163.42508424336975, 175.39069734789393], "label": "Rope skipping"}, {"segment": [188.93073322932918, 191.13492511700468], "label": "Rope skipping"}]}, "PKQ5iJjiDDg": {"duration": 6.781, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PKQ5iJjiDDg", "annotations": []}, "WUYSGCwm6OM": {"duration": 156.294, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=WUYSGCwm6OM", "annotations": [{"segment": [17.555592346142827, 149.4663626136882], "label": "Putting in contact lenses"}]}, "UgXPt2LydrY": {"duration": 26.43, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UgXPt2LydrY", "annotations": [{"segment": [2.0611785015618316, 25.06393057899187], "label": "Volleyball"}]}, "H_Spgfds1iY": {"duration": 119.049, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H_Spgfds1iY", "annotations": [{"segment": [55.28620592823713, 94.07657878315132], "label": "Playing pool"}]}, "PmeBYO3ARvk": {"duration": 93.879, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PmeBYO3ARvk", "annotations": [{"segment": [0.2, 91.97706908722742], "label": "Baton twirling"}]}, "VlfnvORj6KY": {"duration": 145.102, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=VlfnvORj6KY", "annotations": [{"segment": [5.740776003749705, 135.47326530281512], "label": "Playing congas"}]}, "ogQozSI5V8U": {"duration": 36.55, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=ogQozSI5V8U", "annotations": [{"segment": [0.01, 36.548207], "label": "Walking the dog"}]}, "SEyQMOMPT7A": {"duration": 120.628, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SEyQMOMPT7A", "annotations": []}, "FiJi18WpPe8": {"duration": 30.0, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=FiJi18WpPe8", "annotations": [{"segment": [0, 5.66306068798752], "label": "Brushing teeth"}, {"segment": [9.36043088923557, 30.000181], "label": "Brushing teeth"}]}, "_vK_sDOdgbM": {"duration": 62.253, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_vK_sDOdgbM", "annotations": [{"segment": [3.8185474999092075, 9.8274291198469], "label": "Putting in contact lenses"}, {"segment": [13.80104438464441, 19.325338777167772], "label": "Putting in contact lenses"}, {"segment": [22.232862141653754, 41.5194337927441], "label": "Putting in contact lenses"}, {"segment": [44.233122266264346, 52.0834353503765], "label": "Putting in contact lenses"}]}, "p1uwQVtHHOQ": {"duration": 92.718, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p1uwQVtHHOQ", "annotations": []}, "vGcH8N8sJlM": {"duration": 181.163, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vGcH8N8sJlM", "annotations": [{"segment": [23.457805229215975, 61.32944258722729], "label": "Croquet"}, {"segment": [87.04824109154842, 94.96171755441647], "label": "Croquet"}, {"segment": [100.04895242340305, 133.39860237129483], "label": "Croquet"}]}, "a6kF1_4rs2E": {"duration": 101.425, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a6kF1_4rs2E", "annotations": [{"segment": [7.800968153730731, 88.81101955559988], "label": "Waxing skis"}]}, "56sQMnSUWaI": {"duration": 95.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=56sQMnSUWaI", "annotations": [{"segment": [2.1624453978159126, 91.19554212168487], "label": "Using the balance beam"}]}, "zRqJZ0u6f88": {"duration": 91.78999999999999, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zRqJZ0u6f88", "annotations": []}, "NRdxsuSCJyM": {"duration": 62.114, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NRdxsuSCJyM", "annotations": [{"segment": [0.29070234009360374, 49.12869547581903], "label": "Scuba diving"}]}, "bY0vSkxH0PE": {"duration": 20.2, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=bY0vSkxH0PE", "annotations": [{"segment": [0.3463198127925117, 8.689478939157567], "label": "Shoveling snow"}]}, "6fuOwhx91zM": {"duration": 92.67, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6fuOwhx91zM", "annotations": [{"segment": [74.32558502340093, 84.43790951638064], "label": "Skateboarding"}]}, "zBg0FEtpC-o": {"duration": 223.07999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zBg0FEtpC-o", "annotations": [{"segment": [63.09368325, 190.82743074], "label": "Starting a campfire"}]}, "Oskk7eLgerM": {"duration": 78.67, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Oskk7eLgerM", "annotations": []}, "THOVGzbo8w8": {"duration": 122.32, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=THOVGzbo8w8", "annotations": [{"segment": [0, 63.14044461778471], "label": "Playing lacrosse"}, {"segment": [103.39009360374415, 122.075], "label": "Playing lacrosse"}]}, "qm8sJxsZ5VY": {"duration": 223.7, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qm8sJxsZ5VY", "annotations": [{"segment": [28.616953073634942, 150.4134972528861], "label": "Getting a haircut"}]}, "HCub1SN4X20": {"duration": 232.107, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HCub1SN4X20", "annotations": [{"segment": [203.46591887675507, 231.86700000000002], "label": "Swinging at the playground"}]}, "NPt1niJMbvE": {"duration": 16.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NPt1niJMbvE", "annotations": [{"segment": [4.080054602184087, 16.451833073322934], "label": "Long jump"}]}, "G77y1JRjZDU": {"duration": 73.52, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G77y1JRjZDU", "annotations": [{"segment": [15.935558502340092, 33.132204368174726], "label": "Hand washing clothes"}, {"segment": [35.42509048361934, 39.89621840873635], "label": "Hand washing clothes"}, {"segment": [41.730527301092046, 62.59579095163806], "label": "Hand washing clothes"}]}, "FOZdNPQ7d5Q": {"duration": 86.28999999999999, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FOZdNPQ7d5Q", "annotations": []}, "JMemapZ0KE4": {"duration": 20.2, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JMemapZ0KE4", "annotations": []}, "HCZpS7mhvrE": {"duration": 220.219, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HCZpS7mhvrE", "annotations": [{"segment": [40.25997155130689, 82.44035005597979], "label": "Waxing skis"}, {"segment": [92.38531734569942, 107.13130332700783], "label": "Waxing skis"}, {"segment": [132.85104631766202, 146.22531267280223], "label": "Waxing skis"}]}, "K1EW6wZDnOs": {"duration": 56.19, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=K1EW6wZDnOs", "annotations": [{"segment": [0, 56.141], "label": "Grooming horse"}]}, "IajP-SB2D5c": {"duration": 182.07, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IajP-SB2D5c", "annotations": [{"segment": [30.6054627, 105.51510918000001], "label": "Long jump"}]}, "am1_JLFDFMw": {"duration": 172.76, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=am1_JLFDFMw", "annotations": []}, "1w9MbbXJZVs": {"duration": 72.35, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1w9MbbXJZVs", "annotations": []}, "3YiGMRp-7B4": {"duration": 23.96, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=3YiGMRp-7B4", "annotations": [{"segment": [3.275769110764431, 23.173138845553822], "label": "Doing karate"}]}, "phDLuo6dYrk": {"duration": 206.07999999999998, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=phDLuo6dYrk", "annotations": [{"segment": [9.325401848727012, 150.49269087051482], "label": "Playing field hockey"}, {"segment": [152.10051876443057, 180.71985527613103], "label": "Playing field hockey"}, {"segment": [183.93551106396254, 197.76283095163808], "label": "Playing field hockey"}]}, "K90dEpCDiM0": {"duration": 174.498, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=K90dEpCDiM0", "annotations": []}, "BHQasKzBDQU": {"duration": 94.715, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=BHQasKzBDQU", "annotations": [{"segment": [0.01, 94.71419501133786], "label": "Arm wrestling"}]}, "ZA_XYgP4fMA": {"duration": 70.473, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZA_XYgP4fMA", "annotations": [{"segment": [4.617546898595944, 69.81291144305771], "label": "Arm wrestling"}]}, "89rUyq2rCJ8": {"duration": 41.448, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=89rUyq2rCJ8", "annotations": [{"segment": [0.01, 36.468731850234015], "label": "Surfing"}]}, "RjztFj0Zl40": {"duration": 109.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RjztFj0Zl40", "annotations": []}, "h2e8XAsjPEA": {"duration": 125.76, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=h2e8XAsjPEA", "annotations": []}, "iC1C5wfb0iU": {"duration": 25.101, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=iC1C5wfb0iU", "annotations": [{"segment": [3.9550356762569825, 18.482939001913817], "label": "Table soccer"}]}, "0lzqfDIWXtw": {"duration": 84.312, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0lzqfDIWXtw", "annotations": [{"segment": [5.129726299256052, 61.81977750390016], "label": "Curling"}, {"segment": [69.5801325522621, 84.311654], "label": "Curling"}]}, "KfzVxgHEyzI": {"duration": 20.04, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KfzVxgHEyzI", "annotations": [{"segment": [10.663357254290172, 17.647544461778473], "label": "Playing kickball"}]}, "v0cihSAXQbI": {"duration": 115.15, "subset": "validation", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=v0cihSAXQbI", "annotations": [{"segment": [9.874344773790952, 26.570964118564746], "label": "Javelin throw"}, {"segment": [36.4453088923557, 53.51029329173167], "label": "Javelin throw"}, {"segment": [70.3893790951638, 85.4728174726989], "label": "Javelin throw"}]}, "K757XUwinrc": {"duration": 28.56, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=K757XUwinrc", "annotations": [{"segment": [7.21942875, 27.15465678627145], "label": "Brushing hair"}, {"segment": [0.3986817472698908, 6.910483619344774], "label": "Brushing hair"}]}, "vrMWYB0UuGM": {"duration": 33.07, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vrMWYB0UuGM", "annotations": [{"segment": [4.64254174726989, 31.105029706708265], "label": "Playing squash"}]}, "RXDVJlb97OI": {"duration": 96.92, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RXDVJlb97OI", "annotations": []}, "W3a7BIqWov4": {"duration": 34.064, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W3a7BIqWov4", "annotations": [{"segment": [3.3479117450412303, 15.41102231844376], "label": "Playing ten pins"}]}, "fJ7gcHxxJMM": {"duration": 5.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fJ7gcHxxJMM", "annotations": [{"segment": [0.1476170046801872, 5.207], "label": "Springboard diving"}]}, "H8ddGMpqBSw": {"duration": 125.04, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=H8ddGMpqBSw", "annotations": []}, "tik7rHU_DM8": {"duration": 44.397, "subset": "validation", "resolution": "712x480", "url": "https://www.youtube.com/watch?v=tik7rHU_DM8", "annotations": [{"segment": [0.2, 44.141], "label": "Bullfighting"}]}, "fgP3D3Gqn6U": {"duration": 105.535, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fgP3D3Gqn6U", "annotations": [{"segment": [5.350823012512337, 15.064624789073195], "label": "Calf roping"}, {"segment": [31.03477347257156, 39.9253717087459], "label": "Calf roping"}, {"segment": [47.169562864147224, 52.6027062306982], "label": "Calf roping"}]}, "ll4HKY9ZBOA": {"duration": 125.458, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ll4HKY9ZBOA", "annotations": [{"segment": [1.5657711696222951, 37.96995086334066], "label": "Drum corps"}, {"segment": [43.2544285608159, 123.50020100395852], "label": "Drum corps"}]}, "l5QQ1vVctOo": {"duration": 25.473, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=l5QQ1vVctOo", "annotations": [{"segment": [0.2, 22.8973291653557], "label": "Putting in contact lenses"}]}, "A9VCuZ53ObQ": {"duration": 24.823, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A9VCuZ53ObQ", "annotations": []}, "KO7wfRb38J0": {"duration": 219.034, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KO7wfRb38J0", "annotations": []}, "LIaNG_UliKk": {"duration": 206.728, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LIaNG_UliKk", "annotations": [{"segment": [58.83527917666007, 195.1530958485108], "label": "Skiing"}]}, "z08g5S7J-CY": {"duration": 57.214, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z08g5S7J-CY", "annotations": [{"segment": [6.509755070202808, 45.40581747269891], "label": "Hand car wash"}]}, "fAJDaOuzgQI": {"duration": 169.738, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fAJDaOuzgQI", "annotations": []}, "GHcNgllmcpM": {"duration": 168.438, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GHcNgllmcpM", "annotations": [{"segment": [21.280001560062402, 122.4256879875195], "label": "Playing drums"}, {"segment": [126.10371294851795, 149.48544305772234], "label": "Playing drums"}]}, "oOURAJ7nraU": {"duration": 93.11, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oOURAJ7nraU", "annotations": [{"segment": [13.363971402393512, 25.85637945245701], "label": "Long jump"}, {"segment": [24.54903442396199, 26.727942804787023], "label": "Long jump"}, {"segment": [38.494048061242175, 55.344272872955734], "label": "Long jump"}, {"segment": [68.70824343369735, 85.70372859594383], "label": "Long jump"}]}, "r34VM4v0ShI": {"duration": 136.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r34VM4v0ShI", "annotations": [{"segment": [11.379947942277692, 107.31184554914196], "label": "Polishing shoes"}]}, "wqc2KnHfPHk": {"duration": 130.148, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=wqc2KnHfPHk", "annotations": [{"segment": [0.01, 130.101], "label": "Drum corps"}]}, "pieT6K771WQ": {"duration": 65.13, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=pieT6K771WQ", "annotations": [{"segment": [4.60801825, 52.189843993759744], "label": "Hopscotch"}]}, "WpQHQeY43zo": {"duration": 198.25, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WpQHQeY43zo", "annotations": [{"segment": [9.739036628705149, 184.1141686474259], "label": "Playing accordion"}]}, "VkRjs03YEjE": {"duration": 234.499, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=VkRjs03YEjE", "annotations": [{"segment": [0.4648788119555858, 13.614300898547231], "label": "Futsal"}]}, "Cdiv-IzHcjQ": {"duration": 151.905, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Cdiv-IzHcjQ", "annotations": [{"segment": [35.413774570982845, 71.65663416536663], "label": "Waxing skis"}, {"segment": [83.97446879875196, 94.87101482059283], "label": "Waxing skis"}]}, "-ew9R4QppTk": {"duration": 184.506, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-ew9R4QppTk", "annotations": [{"segment": [0.01, 46.097706798328154], "label": "Playing congas"}, {"segment": [78.86041580716534, 176.57375720861913], "label": "Playing congas"}]}, "Gr5jGxLEp70": {"duration": 96.363, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Gr5jGxLEp70", "annotations": [{"segment": [9.771579978845413, 16.23585596485084], "label": "Croquet"}, {"segment": [24.20345194760171, 34.87702327613105], "label": "Croquet"}]}, "LnDz1rvDaPY": {"duration": 73.955, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LnDz1rvDaPY", "annotations": [{"segment": [26.77627457098284, 73.981], "label": "Removing ice from car"}]}, "r-GVVgxtoMk": {"duration": 34.761, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r-GVVgxtoMk", "annotations": [{"segment": [10.19476721372855, 32.4822636224649], "label": "Hitting a pinata"}]}, "mlUg8iMtew0": {"duration": 203.62, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mlUg8iMtew0", "annotations": [{"segment": [40.97723805278742, 70.5189678117737], "label": "Painting"}, {"segment": [77.82498699947998, 86.71927122799197], "label": "Painting"}, {"segment": [90.84876033408683, 99.42539155443768], "label": "Painting"}, {"segment": [105.14314570133826, 114.03742992985025], "label": "Painting"}, {"segment": [120.70814310123426, 129.60242732974623], "label": "Painting"}, {"segment": [135.3201814766468, 145.48507773780338], "label": "Painting"}, {"segment": [150.88517887654282, 158.50885107241024], "label": "Painting"}, {"segment": [164.54425822747197, 171.21497139885594], "label": "Painting"}, {"segment": [176.61507146957877, 187.09762067550702], "label": "Painting"}, {"segment": [194.40363981903278, 196.30955785647427], "label": "Painting"}]}, "rP2MviNn52g": {"duration": 49.064, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rP2MviNn52g", "annotations": [{"segment": [9.94591263650546, 35.422750390015594], "label": "Tug of war"}]}, "LV0nevBELso": {"duration": 162.01, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LV0nevBELso", "annotations": [{"segment": [0.01, 161.961], "label": "Cleaning windows"}]}, "yuYqFaAnTxI": {"duration": 106.0, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yuYqFaAnTxI", "annotations": [{"segment": [14.57450015336807, 105.99909297052154], "label": "Pole vault"}]}, "fxEcsM0EaA4": {"duration": 20.295, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fxEcsM0EaA4", "annotations": [{"segment": [0, 20.2942403], "label": "Rope skipping"}]}, "N3rlaQHKrXY": {"duration": 126.828, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N3rlaQHKrXY", "annotations": []}, "xAMZGWqRmqE": {"duration": 63.577, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xAMZGWqRmqE", "annotations": [{"segment": [0.2, 62.28685639937598], "label": "Disc dog"}]}, "CRNycmwvGXs": {"duration": 152.86, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CRNycmwvGXs", "annotations": [{"segment": [89.54413179407175, 131.75268393135724], "label": "Smoking a cigarette"}]}, "Qs0hIRhHPVM": {"duration": 60.77, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Qs0hIRhHPVM", "annotations": [{"segment": [26.89804717394696, 39.56707466536662], "label": "Doing kickboxing"}, {"segment": [41.93069919734789, 49.7146234], "label": "Doing kickboxing"}]}, "j7Tk8I_DCtw": {"duration": 27.93, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j7Tk8I_DCtw", "annotations": [{"segment": [0, 27.086033541341653], "label": "Playing bagpipes"}]}, "UudvO6y1jUw": {"duration": 160.52, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UudvO6y1jUw", "annotations": []}, "Flh-37A4HWY": {"duration": 18.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Flh-37A4HWY", "annotations": [{"segment": [0.01, 12.01855316848674], "label": "Drinking coffee"}]}, "5sdUW55eS4M": {"duration": 95.481, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5sdUW55eS4M", "annotations": [{"segment": [0.01, 59.40015866151346], "label": "Mooping floor"}]}, "xlGJt4qHLhM": {"duration": 41.1, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xlGJt4qHLhM", "annotations": []}, "ZmUk9OQ1zfk": {"duration": 124.227, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZmUk9OQ1zfk", "annotations": [{"segment": [0.01, 95.10616380655226], "label": "Beach soccer"}]}, "S_1_ZSMxRfg": {"duration": 212.74, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S_1_ZSMxRfg", "annotations": [{"segment": [14.51531843, 157.82403826], "label": "Making a sandwich"}]}, "N8BlpYSpgg4": {"duration": 52.153, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=N8BlpYSpgg4", "annotations": [{"segment": [0.01, 50.52976710207217], "label": "Bullfighting"}]}, "ig8RaXkocJ0": {"duration": 215.9, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ig8RaXkocJ0", "annotations": [{"segment": [6.522903005261287, 214.31433291180335], "label": "Doing a powerbomb"}]}, "BH-kBRn84i8": {"duration": 22.106, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BH-kBRn84i8", "annotations": [{"segment": [0.19999999999999998, 19.832877757009346], "label": "Cutting the grass"}]}, "jpclX7wgcZU": {"duration": 32.207, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jpclX7wgcZU", "annotations": [{"segment": [0.2, 31.7044237258567], "label": "River tubing"}]}, "x8cuLOUppmU": {"duration": 45.23, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x8cuLOUppmU", "annotations": [{"segment": [33.94199386895476, 35.35330338533541], "label": "Hammer throw"}]}, "WtBr459dWY4": {"duration": 96.97, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WtBr459dWY4", "annotations": [{"segment": [23.75647581903276, 82.6180624024961], "label": "Dodgeball"}]}, "AGDsfpZQBIs": {"duration": 20.69, "subset": "training", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=AGDsfpZQBIs", "annotations": [{"segment": [2.130222383775351, 20.65670190327613], "label": "Shot put"}]}, "V2PhVadSbpQ": {"duration": 72.749, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=V2PhVadSbpQ", "annotations": [{"segment": [5.257796514337343, 66.10772400031865], "label": "Raking leaves"}]}, "coK0QVLhuj0": {"duration": 92.648, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=coK0QVLhuj0", "annotations": [{"segment": [3.6134016789243, 7.949483680967238], "label": "Grooming dog"}, {"segment": [15.031750960374415, 47.841438152730106], "label": "Grooming dog"}, {"segment": [65.33030225085803, 66.1975186524181], "label": "Grooming dog"}, {"segment": [78.33854827425897, 84.9872073528861], "label": "Grooming dog"}]}, "FARJEomZRrc": {"duration": 139.576, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FARJEomZRrc", "annotations": [{"segment": [70.30465366614665, 93.39015444617785], "label": "Painting fence"}]}, "ZH8hnmjRDsI": {"duration": 59.049, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZH8hnmjRDsI", "annotations": [{"segment": [6.540456088923557, 59.04834467120181], "label": "Building sandcastles"}]}, "v5patZyuYys": {"duration": 213.23, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=v5patZyuYys", "annotations": [{"segment": [28.607925966018232, 39.58538592972291], "label": "Cricket"}, {"segment": [48.234293779914466, 49.897545289566686], "label": "Cricket"}, {"segment": [82.1646245768198, 97.13388816368982], "label": "Cricket"}, {"segment": [102.78894329650737, 135.72132318762138], "label": "Cricket"}]}, "1QIUV7WYKXg": {"duration": 10.08, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1QIUV7WYKXg", "annotations": []}, "mn1ozn0j7D8": {"duration": 131.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mn1ozn0j7D8", "annotations": [{"segment": [22.542449297971917, 31.969291731669266], "label": "Shaving"}, {"segment": [59.22516224648985, 100.22669110764431], "label": "Shaving"}]}, "zc7_D9RiOY0": {"duration": 66.526, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zc7_D9RiOY0", "annotations": [{"segment": [0.01, 58.845196291731675], "label": "Scuba diving"}]}, "4WUFEnFE5sY": {"duration": 219.75, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4WUFEnFE5sY", "annotations": [{"segment": [10.625439385335415, 29.819781500780035], "label": "Belly dance"}]}, "6Dt510HMEWA": {"duration": 31.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6Dt510HMEWA", "annotations": [{"segment": [0, 12.816902496099845], "label": "Kayaking"}, {"segment": [10.297133385335414, 12.962273790951638], "label": "Kayaking"}]}, "in5nO2Rh6Fc": {"duration": 69.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=in5nO2Rh6Fc", "annotations": []}, "VhADj7cF6h8": {"duration": 134.235, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VhADj7cF6h8", "annotations": []}, "TdqEtrrPX_Q": {"duration": 27.214, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TdqEtrrPX_Q", "annotations": [{"segment": [6.186436817472699, 26.18642433697348], "label": "Disc dog"}]}, "-sXrwv7C5CI": {"duration": 54.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-sXrwv7C5CI", "annotations": [{"segment": [5.23749439625585, 49.16486675195008], "label": "Using parallel bars"}]}, "Zi2Pah5-BXI": {"duration": 156.34, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zi2Pah5-BXI", "annotations": [{"segment": [29.02403855070203, 134.14471599063964], "label": "Getting a tattoo"}]}, "Irmh66trRY4": {"duration": 238.469, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Irmh66trRY4", "annotations": [{"segment": [19.34124180967239, 39.798324492979724], "label": "Baking cookies"}]}, "ZqkJAD0CGBU": {"duration": 71.425, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZqkJAD0CGBU", "annotations": [{"segment": [2.4492074882995323, 70.49012948517941], "label": "Hitting a pinata"}]}, "QZxtQ49Ug7U": {"duration": 193.86, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QZxtQ49Ug7U", "annotations": []}, "Y-SehqCvzME": {"duration": 238.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y-SehqCvzME", "annotations": [{"segment": [13.392923047534147, 207.96233287698854], "label": "Playing guitarra"}]}, "Yr2unlRQZa0": {"duration": 118.608, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Yr2unlRQZa0", "annotations": [{"segment": [0.2, 48.93047507788162], "label": "Elliptical trainer"}, {"segment": [54.28513084112149, 62.23529127725857], "label": "Elliptical trainer"}, {"segment": [68.69889719626168, 72.7617352024922], "label": "Elliptical trainer"}, {"segment": [79.41001557632399, 83.28817912772587], "label": "Elliptical trainer"}, {"segment": [102.86367133956388, 115.23685981308412], "label": "Elliptical trainer"}]}, "-cApkmTI3F8": {"duration": 19.04, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=-cApkmTI3F8", "annotations": [{"segment": [0.01, 18.06012400624025], "label": "Drinking beer"}]}, "1UgjxeAPq_A": {"duration": 179.03, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1UgjxeAPq_A", "annotations": [{"segment": [10.672597615272744, 36.048535903360566], "label": "Painting"}, {"segment": [60.866761262039866, 158.74537918491345], "label": "Painting"}]}, "G71xFbDSSno": {"duration": 30.33, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=G71xFbDSSno", "annotations": [{"segment": [3.973980898595944, 26.587824583463338], "label": "Washing face"}]}, "NA5siWQEegU": {"duration": 225.745, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NA5siWQEegU", "annotations": []}, "c1eUdyyT4zg": {"duration": 32.253, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c1eUdyyT4zg", "annotations": [{"segment": [0.2, 26.734977505234312], "label": "Riding bumper cars"}]}, "Y7BBrdCwIJw": {"duration": 130.13, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=Y7BBrdCwIJw", "annotations": [{"segment": [3.6364440200000003, 128.749540860546], "label": "Playing accordion"}]}, "_Z7DaKYQ9J8": {"duration": 191.08, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_Z7DaKYQ9J8", "annotations": []}, "hXSee4C6pyE": {"duration": 75.024, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hXSee4C6pyE", "annotations": [{"segment": [10.7601248049922, 65.73208112324492], "label": "Assembling bicycle"}]}, "4-KiAur4dI8": {"duration": 114.68, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4-KiAur4dI8", "annotations": []}, "V-kMGMX-l2g": {"duration": 90.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V-kMGMX-l2g", "annotations": [{"segment": [8.145677893915757, 80.75456532761311], "label": "Zumba"}]}, "kHTcsP6nQdY": {"duration": 52.129, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kHTcsP6nQdY", "annotations": [{"segment": [0.01, 0.02], "label": "Fun sliding down"}]}, "3f6G-qzwzfg": {"duration": 205.36, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3f6G-qzwzfg", "annotations": [{"segment": [24.988873020280813, 140.32213311388458], "label": "Cricket"}, {"segment": [108.9258567550702, 128.14806677067085], "label": "Cricket"}, {"segment": [136.47769111076443, 140.64250328081124], "label": "Cricket"}, {"segment": [151.53508895631828, 199.5906139953198], "label": "Cricket"}]}, "tOW6_7jmht8": {"duration": 137.741, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tOW6_7jmht8", "annotations": []}, "Wdp8yduPUX4": {"duration": 161.96, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Wdp8yduPUX4", "annotations": [{"segment": [17.433983889967845, 30.572638415740713], "label": "Longboarding"}, {"segment": [32.84663631443217, 37.394632111815085], "label": "Longboarding"}, {"segment": [40.93196217644624, 50.280620204400016], "label": "Longboarding"}, {"segment": [54.828616001782926, 65.18793976248847], "label": "Longboarding"}, {"segment": [68.72526982711962, 84.64325511795984], "label": "Longboarding"}, {"segment": [96.77124391098094, 145.7885304430577], "label": "Longboarding"}]}, "20ooSJixdyg": {"duration": 75.61, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=20ooSJixdyg", "annotations": [{"segment": [0, 75.561], "label": "Mixing drinks"}]}, "dD2R0yyk87g": {"duration": 104.119, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dD2R0yyk87g", "annotations": []}, "j-qAgWm_Ylw": {"duration": 185.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j-qAgWm_Ylw", "annotations": [{"segment": [4.783738689547582, 8.262821372854914], "label": "Long jump"}, {"segment": [14.351216068642747, 18.410145865834636], "label": "Long jump"}, {"segment": [23.048922776911077, 28.84739391575663], "label": "Long jump"}, {"segment": [32.326476599063966, 42.18387753510141], "label": "Long jump"}, {"segment": [48.272272230889236, 53.49089625585024], "label": "Long jump"}, {"segment": [71.75608034321372, 90.3111879875195], "label": "Long jump"}, {"segment": [103.93759516380656, 111.47560764430578], "label": "Long jump"}, {"segment": [122.49270280811233, 140.46796333853354], "label": "Long jump"}, {"segment": [148.8757464898596, 163.66184789391576], "label": "Long jump"}, {"segment": [176.70840795631827, 185.64100000000002], "label": "Long jump"}]}, "jrXoqceGS-k": {"duration": 202.966, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jrXoqceGS-k", "annotations": [{"segment": [0.01, 184.2615631825273], "label": "Rollerblading"}]}, "fBbjlXgtd50": {"duration": 29.12, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=fBbjlXgtd50", "annotations": [{"segment": [0, 29.117822], "label": "Cumbia"}]}, "kW4ajodPtWU": {"duration": 194.58, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kW4ajodPtWU", "annotations": [{"segment": [24.09326706, 101.97900446999999], "label": "Tumbling"}, {"segment": [112.94959401000001, 123.04066839], "label": "Tumbling"}]}, "Zz2Cv6Epkes": {"duration": 113.128, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zz2Cv6Epkes", "annotations": []}, "I5RVQUYA1w4": {"duration": 33.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I5RVQUYA1w4", "annotations": [{"segment": [15.643422850000002, 26.466275000000003], "label": "Hopscotch"}]}, "SNQ3dLJKgro": {"duration": 236.89, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SNQ3dLJKgro", "annotations": []}, "s_VFaQTlskE": {"duration": 121.998, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s_VFaQTlskE", "annotations": [{"segment": [9.8922496099844, 119.1069984399376], "label": "Playing congas"}]}, "f4OyAeQAOi4": {"duration": 34.854, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f4OyAeQAOi4", "annotations": []}, "lBhNeACY8y4": {"duration": 206.61, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lBhNeACY8y4", "annotations": [{"segment": [38.15691948, 197.35845620124803], "label": "Doing kickboxing"}]}, "eQc-8npRq18": {"duration": 4.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eQc-8npRq18", "annotations": [{"segment": [0.01, 3.7169305975039], "label": "Pole vault"}]}, "Lu1dzpAN-lE": {"duration": 130.566, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lu1dzpAN-lE", "annotations": []}, "J959vSGFedM": {"duration": 28.259, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=J959vSGFedM", "annotations": [{"segment": [0.2, 26.643892129036495], "label": "Doing crunches"}]}, "KVIC0mnlhVk": {"duration": 231.921, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KVIC0mnlhVk", "annotations": []}, "kyvxaxRFLG8": {"duration": 103.59, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kyvxaxRFLG8", "annotations": [{"segment": [7.918294331773271, 63.83114776131045], "label": "Chopping wood"}, {"segment": [72.3958334102964, 77.72856221060842], "label": "Chopping wood"}, {"segment": [0.2, 103.38421699999999], "label": "Chopping wood"}, {"segment": [82.41489964118564, 83.70768238065523], "label": "Chopping wood"}]}, "_8-4M5XVuwI": {"duration": 25.635, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_8-4M5XVuwI", "annotations": [{"segment": [0.2, 16.091644367601244], "label": "Raking leaves"}]}, "lkCpF06uEmE": {"duration": 46.56, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=lkCpF06uEmE", "annotations": [{"segment": [0, 22.442558644305773], "label": "Clean and jerk"}, {"segment": [30.577078282371293, 43.650413414976605], "label": "Clean and jerk"}]}, "yN3e-giBxns": {"duration": 35.202, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yN3e-giBxns", "annotations": [{"segment": [8.731717174726988, 14.992193639625583], "label": "Calf roping"}]}, "wBij_rEXdCk": {"duration": 183.58, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wBij_rEXdCk", "annotations": [{"segment": [46.39699824, 77.973906], "label": "Breakdancing"}, {"segment": [5.3328828, 31.031687039999998], "label": "Breakdancing"}]}, "y76trNI6U0Y": {"duration": 171.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y76trNI6U0Y", "annotations": [{"segment": [0.01, 144.36218808112324], "label": "Bathing dog"}]}, "fOGXvBAmTsY": {"duration": 199.924, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fOGXvBAmTsY", "annotations": [{"segment": [40.85792964507696, 46.47199609828393], "label": "Running a marathon"}, {"segment": [64.87365898283932, 68.61637007800311], "label": "Running a marathon"}, {"segment": [115.08836617628704, 118.83107727145085], "label": "Running a marathon"}]}, "5qh_gx81Bd8": {"duration": 179.77, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5qh_gx81Bd8", "annotations": [{"segment": [94.79705944, 127.64966144], "label": "Breakdancing"}, {"segment": [52.36821608, 81.24974295999999], "label": "Breakdancing"}, {"segment": [134.02021271138844, 179.521666], "label": "Breakdancing"}, {"segment": [12.05621160374415, 32.80411064274571], "label": "Breakdancing"}]}, "rgAALWYnRrg": {"duration": 28.491, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rgAALWYnRrg", "annotations": [{"segment": [12.000839953198128, 28.290884353741497], "label": "Hitting a pinata"}]}, "NCvNIKw4EZ4": {"duration": 23.267, "subset": "validation", "resolution": "544x360", "url": "https://www.youtube.com/watch?v=NCvNIKw4EZ4", "annotations": [{"segment": [6.636917316692667, 10.722117004680186], "label": "Playing pool"}]}, "XbN3TvLEm2A": {"duration": 20.086, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XbN3TvLEm2A", "annotations": [{"segment": [0.01, 19.222654876245517], "label": "Kneeling"}]}, "QjMNQxu3Zf8": {"duration": 70.59, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QjMNQxu3Zf8", "annotations": [{"segment": [4.071789391575663, 67.91909048361936], "label": "Brushing teeth"}]}, "uatIP3FGQXk": {"duration": 65.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uatIP3FGQXk", "annotations": [{"segment": [5.483024452418097, 63.054781202808115], "label": "Playing kickball"}]}, "RFhzBUJw6w0": {"duration": 208.353, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RFhzBUJw6w0", "annotations": []}, "4rvACiBclFI": {"duration": 227.927, "subset": "training", "resolution": "416x320", "url": "https://www.youtube.com/watch?v=4rvACiBclFI", "annotations": [{"segment": [0.2, 39.46943073790952], "label": "Changing car wheel"}, {"segment": [72.8939937051482, 92.45091884555382], "label": "Changing car wheel"}, {"segment": [170.323038950078, 188.8132227191888], "label": "Changing car wheel"}, {"segment": [204.10318237441498, 227.927073], "label": "Changing car wheel"}]}, "DZVJtWJrWTU": {"duration": 209.607, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=DZVJtWJrWTU", "annotations": [{"segment": [0.01, 51.3479608550488], "label": "Blow-drying hair"}, {"segment": [75.83470508545125, 166.27241377640428], "label": "Blow-drying hair"}]}, "dL-ybVv7Sgs": {"duration": 33.83, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dL-ybVv7Sgs", "annotations": [{"segment": [11.816895475819033, 32.92413416536662], "label": "Starting a campfire"}]}, "C4qzKKhLR8g": {"duration": 116.17, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C4qzKKhLR8g", "annotations": []}, "o8n1YnT8ZZ8": {"duration": 203.848, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o8n1YnT8ZZ8", "annotations": [{"segment": [38.79789982347593, 136.4286792823713], "label": "Making a lemonade"}]}, "GtfngouN8xs": {"duration": 84.17, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=GtfngouN8xs", "annotations": []}, "umn2TO6ACKs": {"duration": 19.761, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=umn2TO6ACKs", "annotations": []}, "w5xTzAe_O9s": {"duration": 114.428, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w5xTzAe_O9s", "annotations": []}, "EbBlHnunlSI": {"duration": 123.32, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EbBlHnunlSI", "annotations": [{"segment": [10.388974497755422, 43.86455899052289], "label": "Shaving"}, {"segment": [39.63201382477068, 79.07163923291625], "label": "Shaving"}, {"segment": [90.42255581379719, 109.85378589293232], "label": "Shaving"}]}, "lsgB3WU-1r0": {"duration": 204.592, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lsgB3WU-1r0", "annotations": [{"segment": [0.01, 181.5837893915757], "label": "Doing fencing"}]}, "nnWON1EzK0o": {"duration": 20.155, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nnWON1EzK0o", "annotations": [{"segment": [3.6788229687987517, 14.998278257410297], "label": "Calf roping"}]}, "p4pj27t5B6g": {"duration": 154.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p4pj27t5B6g", "annotations": [{"segment": [110.86818342937799, 113.27313317838835], "label": "Playing saxophone"}]}, "kuMevlNUDCs": {"duration": 144.17000000000002, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kuMevlNUDCs", "annotations": [{"segment": [11.470840523416854, 96.93984828549141], "label": "Washing dishes"}]}, "0GWJ-VHFlTk": {"duration": 66.25, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0GWJ-VHFlTk", "annotations": [{"segment": [6.2008850546021845, 7.441062065522621], "label": "Layup drill in basketball"}, {"segment": [22.323186196567864, 24.28679979719189], "label": "Layup drill in basketball"}, {"segment": [28.007330829953197, 29.350855925117003], "label": "Layup drill in basketball"}, {"segment": [31.72786186271451, 33.48477929485179], "label": "Layup drill in basketball"}, {"segment": [35.75843714820593, 37.618702664586586], "label": "Layup drill in basketball"}, {"segment": [40.71914519188768, 41.95932220280812], "label": "Layup drill in basketball"}, {"segment": [45.36980898283932, 46.50663790951638], "label": "Layup drill in basketball"}, {"segment": [49.40038426833073, 50.5372131950078], "label": "Layup drill in basketball"}]}, "mMm1LfVb8Pg": {"duration": 82.99, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=mMm1LfVb8Pg", "annotations": []}, "lDriXPZt_Yg": {"duration": 83.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lDriXPZt_Yg", "annotations": [{"segment": [3.642308842433698, 70.11444521684868], "label": "Doing nails"}]}, "eUecHAdv1uU": {"duration": 14.188, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eUecHAdv1uU", "annotations": [{"segment": [0.2, 9.610039001560063], "label": "Calf roping"}]}, "BWAedc31WdY": {"duration": 155.39, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BWAedc31WdY", "annotations": []}, "coEvniePQLA": {"duration": 143.5, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=coEvniePQLA", "annotations": [{"segment": [9.846184087363495, 135.18009984399376], "label": "Making a lemonade"}]}, "LvkqCFRC5Vk": {"duration": 117.05, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=LvkqCFRC5Vk", "annotations": [{"segment": [13.147782964118566, 53.13895614664587], "label": "Playing lacrosse"}, {"segment": [62.99979336973479, 73.77367107644305], "label": "Playing lacrosse"}, {"segment": [81.99103542901716, 96.41707507020281], "label": "Playing lacrosse"}]}, "MkKUQ4MMHd8": {"duration": 37.547, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MkKUQ4MMHd8", "annotations": [{"segment": [0.2, 36.90434169064286], "label": "Camel ride"}]}, "9R2wP-iceaw": {"duration": 151.7, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9R2wP-iceaw", "annotations": [{"segment": [9.229544716482536, 150.2759203837541], "label": "Dodgeball"}]}, "SBj7yuFEwQI": {"duration": 131.82, "subset": "validation", "resolution": "1280x564", "url": "https://www.youtube.com/watch?v=SBj7yuFEwQI", "annotations": [{"segment": [58.99355078050132, 124.06298535994057], "label": "Painting fence"}]}, "Oh83skxSJkQ": {"duration": 152.79, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Oh83skxSJkQ", "annotations": [{"segment": [1.6679875195007803, 152.74], "label": "Playing piano"}]}, "ey6s7PdS4JA": {"duration": 26.96, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ey6s7PdS4JA", "annotations": []}, "ldAHwbF8uWI": {"duration": 187.269, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ldAHwbF8uWI", "annotations": []}, "FEqLmpNzxdg": {"duration": 173.941, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FEqLmpNzxdg", "annotations": [{"segment": [46.40221146805056, 61.32689883307332], "label": "Ice fishing"}, {"segment": [86.29183110140404, 136.76441155694226], "label": "Ice fishing"}, {"segment": [148.4328038127925, 164.44292341965678], "label": "Ice fishing"}]}, "1cccnJAK7TI": {"duration": 66.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1cccnJAK7TI", "annotations": [{"segment": [0.6185653666146645, 15.051757254290171], "label": "Getting a tattoo"}, {"segment": [24.12404929797192, 35.67060280811232], "label": "Getting a tattoo"}]}, "Y9wrTRY3mUI": {"duration": 228.717, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Y9wrTRY3mUI", "annotations": [{"segment": [48.510414976599066, 60.281324492979714], "label": "Ice fishing"}, {"segment": [87.74678003120125, 98.80430109204367], "label": "Ice fishing"}, {"segment": [134.11702964118564, 186.21067394695788], "label": "Ice fishing"}]}, "6pY2zz2_CCU": {"duration": 76.417, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6pY2zz2_CCU", "annotations": [{"segment": [5.305072944414374, 34.63199304162642], "label": "Clipping cat claws"}]}, "COwvUuj-KDE": {"duration": 224.31, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=COwvUuj-KDE", "annotations": []}, "IclzwpxVvIk": {"duration": 169.79, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IclzwpxVvIk", "annotations": []}, "D7ZeRbotot0": {"duration": 17.067, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D7ZeRbotot0", "annotations": [{"segment": [0, 15.176285569422777], "label": "Welding"}]}, "wLXty2wzuJg": {"duration": 109.529, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wLXty2wzuJg", "annotations": []}, "bX8QXK9mjKA": {"duration": 64.088, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bX8QXK9mjKA", "annotations": [{"segment": [0.01, 64.08668934240363], "label": "Riding bumper cars"}]}, "9bFevj6bomQ": {"duration": 58.213, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=9bFevj6bomQ", "annotations": [{"segment": [10.171281921996881, 16.709963157566303], "label": "Futsal"}]}, "JD2Mb8pM6E8": {"duration": 63.0, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JD2Mb8pM6E8", "annotations": []}, "3s4tAYvAr5A": {"duration": 67.45, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3s4tAYvAr5A", "annotations": []}, "dmoZPkjzhQ4": {"duration": 31.858, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dmoZPkjzhQ4", "annotations": []}, "7UrJuhFvlQ4": {"duration": 90.952, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=7UrJuhFvlQ4", "annotations": [{"segment": [0.20000000000000004, 90.721], "label": "Blowing leaves"}]}, "iKH8FgONbVw": {"duration": 25.588, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=iKH8FgONbVw", "annotations": [{"segment": [0.01, 25.587844], "label": "Kneeling"}]}, "I5F59PkcDWM": {"duration": 167.0, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I5F59PkcDWM", "annotations": [{"segment": [2.6053198127925117, 163.13706685179406], "label": "Playing guitarra"}]}, "VnBZR-yBLXA": {"duration": 117.586, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VnBZR-yBLXA", "annotations": [{"segment": [16.533949158044297, 74.59425154589476], "label": "Peeling potatoes"}, {"segment": [87.2319829804742, 108.11171317673588], "label": "Peeling potatoes"}]}, "pUIicfDCZC0": {"duration": 157.87, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pUIicfDCZC0", "annotations": [{"segment": [24.006142745709827, 145.63726599063963], "label": "Preparing pasta"}]}, "U_ia-tINzpw": {"duration": 129.08, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U_ia-tINzpw", "annotations": [{"segment": [87.99933356942277, 107.93510936661467], "label": "Playing saxophone"}]}, "Ocd1VWIIeVs": {"duration": 64.668, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ocd1VWIIeVs", "annotations": [{"segment": [6.2548716043879855, 7.36460670826833], "label": "Hand car wash"}]}, "FEOdYNR4gls": {"duration": 177.587, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FEOdYNR4gls", "annotations": []}, "ZJ6BFrKcRe0": {"duration": 154.878, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZJ6BFrKcRe0", "annotations": [{"segment": [0.2, 154.621], "label": "Kite flying"}]}, "mc5XVTo5PVk": {"duration": 16.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mc5XVTo5PVk", "annotations": [{"segment": [1.7070469641185648, 10.99539073946958], "label": "Using uneven bars"}]}, "Fi0PQwhsGs4": {"duration": 45.07, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Fi0PQwhsGs4", "annotations": [{"segment": [37.12468655480913, 42.25745571863691], "label": "Rock-paper-scissors"}]}, "m2X4HZxeaGk": {"duration": 153.066, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=m2X4HZxeaGk", "annotations": []}, "cfcYZBrKVP8": {"duration": 55.87, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cfcYZBrKVP8", "annotations": []}, "8QEG_1GhoEc": {"duration": 148.028, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8QEG_1GhoEc", "annotations": [{"segment": [40.87490846572638, 114.77304660374415], "label": "Spread mulch"}]}, "Mh-qSztuLtE": {"duration": 23.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Mh-qSztuLtE", "annotations": [{"segment": [7.0311368, 15.24667792979719], "label": "Zumba"}, {"segment": [1.0909579999999999, 5.30357272], "label": "Zumba"}]}, "_uPVFvEQocU": {"duration": 114.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_uPVFvEQocU", "annotations": [{"segment": [51.88226006000001, 81.76329177000001], "label": "Ping-pong"}, {"segment": [22.66821054, 41.36667166], "label": "Ping-pong"}]}, "4o7Abew2ExY": {"duration": 107.695, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4o7Abew2ExY", "annotations": [{"segment": [3.360191856474259, 85.01285396879875], "label": "Ice fishing"}]}, "vK0Jp1KbThQ": {"duration": 208.38, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vK0Jp1KbThQ", "annotations": []}, "70GQ4Nnrk4E": {"duration": 148.794, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=70GQ4Nnrk4E", "annotations": [{"segment": [52.69285109363558, 146.24006995319812], "label": "Rope skipping"}]}, "YO8EY7miuk8": {"duration": 58.027, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YO8EY7miuk8", "annotations": [{"segment": [19.010296193447736, 56.75931292043682], "label": "Capoeira"}]}, "l3k_7ydV74A": {"duration": 8.963, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=l3k_7ydV74A", "annotations": [{"segment": [0.47537882371294854, 7.913659241809673], "label": "Doing a powerbomb"}]}, "P62B_iDlQXM": {"duration": 30.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P62B_iDlQXM", "annotations": [{"segment": [0, 30.533333], "label": "Mowing the lawn"}]}, "OHNH7IV0768": {"duration": 18.07, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OHNH7IV0768", "annotations": [{"segment": [0.7497308892355694, 17.3286856474259], "label": "Playing water polo"}]}, "1nXLVjfupto": {"duration": 229.37, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1nXLVjfupto", "annotations": [{"segment": [125.04684711388457, 171.9170530421217], "label": "Wrapping presents"}]}, "gjkK_eSIs2g": {"duration": 202.8, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gjkK_eSIs2g", "annotations": [{"segment": [21.5142117086044, 89.22070149744766], "label": "Snatch"}]}, "ZW46Rcuhqac": {"duration": 33.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZW46Rcuhqac", "annotations": [{"segment": [7.82052192, 29.97799584], "label": "Drinking beer"}]}, "wUsmKmxxHYQ": {"duration": 176.728, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wUsmKmxxHYQ", "annotations": [{"segment": [52.38400033960543, 58.72522143334713], "label": "Swimming"}, {"segment": [104.49229541426556, 111.10922177295257], "label": "Swimming"}, {"segment": [137.3012212979719, 144.4695581528861], "label": "Swimming"}]}, "Y1j_e1DXW6I": {"duration": 39.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y1j_e1DXW6I", "annotations": [{"segment": [3.120855960218409, 16.160471952418096], "label": "Dodgeball"}]}, "Lv6ypQZWIEg": {"duration": 118.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lv6ypQZWIEg", "annotations": [{"segment": [5.890845553822153, 7.731734789391576], "label": "Javelin throw"}, {"segment": [53.38578783151326, 61.680241809672395], "label": "Javelin throw"}, {"segment": [73.27980967238689, 83.95877691107644], "label": "Javelin throw"}, {"segment": [96.29482527301091, 110.47207488299532], "label": "Javelin throw"}]}, "qhubAySHZic": {"duration": 193.07, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qhubAySHZic", "annotations": []}, "W30cufYc_ZI": {"duration": 220.729, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W30cufYc_ZI", "annotations": [{"segment": [0.01, 213.8418702449298], "label": "Scuba diving"}]}, "uBhuULfs3Hc": {"duration": 144.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uBhuULfs3Hc", "annotations": [{"segment": [9.002532182919971, 18.23012767041294], "label": "Discus throw"}, {"segment": [45.46278752374585, 81.02278964627973], "label": "Discus throw"}, {"segment": [104.65443573322933, 115.68253756318252], "label": "Discus throw"}]}, "owNzsQ9mxXE": {"duration": 224.189, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=owNzsQ9mxXE", "annotations": []}, "0K1SrDmREzs": {"duration": 52.62, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0K1SrDmREzs", "annotations": [{"segment": [1.0251170046801872, 51.296854914196565], "label": "Discus throw"}]}, "0w4OkBenR_k": {"duration": 140.83, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0w4OkBenR_k", "annotations": []}, "BFICeyu5oMM": {"duration": 103.19, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BFICeyu5oMM", "annotations": [{"segment": [20.403281931464175, 100.2686355140187], "label": "Making a lemonade"}]}, "krFle3KU4Ts": {"duration": 65.09, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=krFle3KU4Ts", "annotations": [{"segment": [0.01, 63.96862045910409], "label": "Playing accordion"}]}, "R7iFa9OpoTY": {"duration": 103.72, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R7iFa9OpoTY", "annotations": [{"segment": [22.239534438377536, 26.283086154446174], "label": "Spinning"}, {"segment": [39.22245164586583, 50.54439645085803], "label": "Spinning"}, {"segment": [67.04208745241809, 73.83525433541341], "label": "Spinning"}]}, "WebWWFKJ4b0": {"duration": 140.922, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WebWWFKJ4b0", "annotations": [{"segment": [81.12352918408736, 88.59832591107644], "label": "Welding"}, {"segment": [95.853275675507, 106.62577684087361], "label": "Welding"}]}, "uhxQoWHBbaw": {"duration": 43.375, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=uhxQoWHBbaw", "annotations": []}, "4VVIFV6XmKo": {"duration": 114.85, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=4VVIFV6XmKo", "annotations": [{"segment": [6.629170266130374, 111.0833935600624], "label": "Doing kickboxing"}]}, "yslzj3NGuLU": {"duration": 232.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yslzj3NGuLU", "annotations": [{"segment": [13.99388059331897, 222.9933959480828], "label": "Removing ice from car"}]}, "_asXSQ6Ql24": {"duration": 190.61, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_asXSQ6Ql24", "annotations": []}, "UArl5DMLpSI": {"duration": 224.63, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UArl5DMLpSI", "annotations": []}, "h1Jpycw4mT8": {"duration": 51.618, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=h1Jpycw4mT8", "annotations": []}, "RhsCYNiAaV8": {"duration": 206.518, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RhsCYNiAaV8", "annotations": [{"segment": [44.362271028037384, 175.85879595015578], "label": "Rafting"}]}, "skSPijG9sgU": {"duration": 220.033, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=skSPijG9sgU", "annotations": [{"segment": [0.2, 89.57104680187207], "label": "Curling"}]}, "PeZRmM_-_mY": {"duration": 28.47, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PeZRmM_-_mY", "annotations": []}, "PjFWE2F8tBE": {"duration": 91.65, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PjFWE2F8tBE", "annotations": []}, "JGurKGNFYpU": {"duration": 124.483, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JGurKGNFYpU", "annotations": []}, "hokqvyeqhmg": {"duration": 102.89, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hokqvyeqhmg", "annotations": [{"segment": [26.163258407887337, 100.80077472486654], "label": "Rock climbing"}]}, "IAtxK0w_ybY": {"duration": 177.703, "subset": "validation", "resolution": "402x226", "url": "https://www.youtube.com/watch?v=IAtxK0w_ybY", "annotations": [{"segment": [18.374907587366312, 173.08675649707118], "label": "Making a lemonade"}]}, "0FuZcOAxfJ8": {"duration": 60.023, "subset": "testing", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=0FuZcOAxfJ8", "annotations": []}, "vvk6f13VO5c": {"duration": 62.276, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vvk6f13VO5c", "annotations": [{"segment": [17.29346837129485, 48.18854107956319], "label": "Beach soccer"}]}, "dRwQj921TS4": {"duration": 200.969, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dRwQj921TS4", "annotations": []}, "_nGlzZystmo": {"duration": 139.018, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_nGlzZystmo", "annotations": [{"segment": [5.421890753181148, 8.675025205089836], "label": "Futsal"}, {"segment": [10.410030246107803, 11.494408396744033], "label": "Futsal"}, {"segment": [16.48254788967069, 42.5076235049402], "label": "Futsal"}, {"segment": [44.676379806212665, 51.61639997028453], "label": "Futsal"}, {"segment": [54.86953442219322, 62.89393273690131], "label": "Futsal"}, {"segment": [65.06268903817377, 74.82209239389985], "label": "Futsal"}, {"segment": [77.20772432529954, 86.75025205089837], "label": "Futsal"}, {"segment": [88.48525709191632, 97.59403355726066], "label": "Futsal"}, {"segment": [102.14842178993283, 113.64282944149765], "label": "Futsal"}, {"segment": [114.07658069890796, 138.81727800000002], "label": "Futsal"}]}, "ZuHwSBCjfds": {"duration": 228.25, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ZuHwSBCjfds", "annotations": [{"segment": [19.22873051794072, 228.25215419501134], "label": "Playing water polo"}]}, "mbB7UFoTwpo": {"duration": 60.12, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=mbB7UFoTwpo", "annotations": [{"segment": [18.004292760358567, 21.46894861238349], "label": "Washing face"}, {"segment": [31.11380138964206, 37.10671962017165], "label": "Washing face"}]}, "tFAcBYp3o0w": {"duration": 49.111, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tFAcBYp3o0w", "annotations": []}, "AwIts-6Vtms": {"duration": 69.06, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AwIts-6Vtms", "annotations": []}, "cRDXBF2RcYI": {"duration": 30.65, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cRDXBF2RcYI", "annotations": [{"segment": [1.1218408736349454, 25.94555382215289], "label": "Shaving legs"}]}, "IoJoUIxzdac": {"duration": 53.383, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IoJoUIxzdac", "annotations": [{"segment": [1.2492045631825273, 9.077553159126365], "label": "Playing rubik cube"}, {"segment": [10.743159243369735, 52.71643256630266], "label": "Playing rubik cube"}]}, "6Y8wppTQFPo": {"duration": 100.218, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6Y8wppTQFPo", "annotations": [{"segment": [17.030527301092043, 91.72859438377535], "label": "Assembling bicycle"}]}, "MWdPh6J-YXM": {"duration": 171.503, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MWdPh6J-YXM", "annotations": [{"segment": [9.079429906542057, 142.3845440373832], "label": "Canoeing"}]}, "jwYgWG2fqOA": {"duration": 169.297, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jwYgWG2fqOA", "annotations": [{"segment": [2.7419231437255287, 169.061], "label": "Baton twirling"}]}, "araMJB23T1s": {"duration": 75.81, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=araMJB23T1s", "annotations": []}, "YaIuavIbk6A": {"duration": 171.317, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YaIuavIbk6A", "annotations": [{"segment": [23.78657950127529, 26.459228883441057], "label": "Slacklining"}, {"segment": [36.61529653567095, 40.89153554713617], "label": "Slacklining"}, {"segment": [51.04760319936607, 53.987517519748415], "label": "Slacklining"}, {"segment": [68.95435405987668, 72.69606319490876], "label": "Slacklining"}, {"segment": [79.11042171210659, 83.92119060000496], "label": "Slacklining"}, {"segment": [86.59383998217072, 94.611788128668], "label": "Slacklining"}, {"segment": [105.30238565733106, 112.51853898917861], "label": "Slacklining"}, {"segment": [118.13110269172672, 127.75264017160686], "label": "Slacklining"}, {"segment": [133.36520386115444, 140.04682730109204], "label": "Slacklining"}, {"segment": [153.94460405616223, 159.55716774570982], "label": "Slacklining"}]}, "e4V0JFm4hhI": {"duration": 75.814, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=e4V0JFm4hhI", "annotations": []}, "f_EIRRM0EZE": {"duration": 91.19, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f_EIRRM0EZE", "annotations": []}, "SXwCedd4MKg": {"duration": 173.825, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SXwCedd4MKg", "annotations": [{"segment": [0.01, 173.36785150342564], "label": "Disc dog"}]}, "u2TTS37kXXY": {"duration": 214.86, "subset": "testing", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=u2TTS37kXXY", "annotations": []}, "JbW8efAOOt0": {"duration": 210.77, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JbW8efAOOt0", "annotations": []}, "gF0ymMmpS_M": {"duration": 137.741, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gF0ymMmpS_M", "annotations": [{"segment": [36.60057800312012, 51.8418744149766], "label": "Elliptical trainer"}]}, "vcl8rFSCH4w": {"duration": 70.473, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vcl8rFSCH4w", "annotations": [{"segment": [6.912434579439252, 46.74122429906542], "label": "Snow tubing"}]}, "MWjdJIAkMyI": {"duration": 36.22, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=MWjdJIAkMyI", "annotations": []}, "mU3ujj4Z7lE": {"duration": 56.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mU3ujj4Z7lE", "annotations": [{"segment": [3.8715195007800314, 12.758416536661468], "label": "Shoveling snow"}, {"segment": [15.222110764430578, 30.356232449297977], "label": "Shoveling snow"}, {"segment": [35.107642745709825, 41.882801872074886], "label": "Shoveling snow"}]}, "woRQ2JxcVHA": {"duration": 29.49, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=woRQ2JxcVHA", "annotations": [{"segment": [0.01, 25.69391099609984], "label": "Playing saxophone"}]}, "YAhMxt-3ciU": {"duration": 102.424, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=YAhMxt-3ciU", "annotations": [{"segment": [13.097101404056161, 58.936956318252726], "label": "Croquet"}, {"segment": [86.72836661466458, 93.75607956318252], "label": "Croquet"}]}, "YRGiRmcHdUY": {"duration": 35.666, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YRGiRmcHdUY", "annotations": []}, "CvVqbdk6owc": {"duration": 70.52, "subset": "training", "resolution": "642x480", "url": "https://www.youtube.com/watch?v=CvVqbdk6owc", "annotations": [{"segment": [4.398190327613104, 70.461], "label": "Breakdancing"}]}, "lxNV7FI-LQQ": {"duration": 146.263, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lxNV7FI-LQQ", "annotations": [{"segment": [17.060163052941498, 131.70448209315478], "label": "Camel ride"}]}, "Qos-Lzt5U6M": {"duration": 150.54, "subset": "testing", "resolution": "854x476", "url": "https://www.youtube.com/watch?v=Qos-Lzt5U6M", "annotations": []}, "sJUUm0n4cXA": {"duration": 201.06, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=sJUUm0n4cXA", "annotations": [{"segment": [6.273370336173992, 194.7881486723869], "label": "Playing lacrosse"}]}, "yNyv7Ykn5oo": {"duration": 211.361, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yNyv7Ykn5oo", "annotations": [{"segment": [3.9451483356432258, 119.01212110712612], "label": "Building sandcastles"}]}, "dkG9YRNJllQ": {"duration": 34.018, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=dkG9YRNJllQ", "annotations": [{"segment": [0.2, 30.620815700468018], "label": "Blowing leaves"}]}, "Xg_MSiPhJns": {"duration": 93.39, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=Xg_MSiPhJns", "annotations": [{"segment": [16.125651299999998, 72.99332083619345], "label": "Making a sandwich"}]}, "O2Y6rn4gFd4": {"duration": 24.66, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=O2Y6rn4gFd4", "annotations": [{"segment": [0.19999999999999998, 24.10109204368175], "label": "Shaving legs"}]}, "iCkDtX_mTII": {"duration": 33.995, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iCkDtX_mTII", "annotations": [{"segment": [2.7550265210608424, 33.941], "label": "Using the rowing machine"}]}, "QYU2srH753Q": {"duration": 178.93, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=QYU2srH753Q", "annotations": [{"segment": [42.83751560062402, 163.95466068642745], "label": "Doing motocross"}]}, "i1PpX1IOcIs": {"duration": 184.02, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=i1PpX1IOcIs", "annotations": [{"segment": [6.36334244, 119.3448739], "label": "Snatch"}, {"segment": [126.8767550702028, 178.8330733229329], "label": "Snatch"}]}, "W1Cp1UyY8S8": {"duration": 186.991, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=W1Cp1UyY8S8", "annotations": [{"segment": [16.59578971962617, 170.6346199376947], "label": "Painting fence"}]}, "lYP05IRcOPE": {"duration": 26.077, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lYP05IRcOPE", "annotations": []}, "2WwWzzO6rZQ": {"duration": 90.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2WwWzzO6rZQ", "annotations": [{"segment": [4.2208878559224, 87.23168152886115], "label": "Using the balance beam"}]}, "OYIAhO9nJmk": {"duration": 139.738, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=OYIAhO9nJmk", "annotations": [{"segment": [18.31195899264542, 130.36370706396255], "label": "Capoeira"}]}, "iDz8nKDpumY": {"duration": 112.176, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iDz8nKDpumY", "annotations": [{"segment": [0.01, 112.1756], "label": "Waterskiing"}]}, "dPdeb0SgIw8": {"duration": 192.122, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dPdeb0SgIw8", "annotations": [{"segment": [16.184996508580344, 30.871382229329175], "label": "Playing ice hockey"}]}, "kgz7c2kb4Rg": {"duration": 87.61, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=kgz7c2kb4Rg", "annotations": []}, "I6VBFfeBqF0": {"duration": 168.48, "subset": "training", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=I6VBFfeBqF0", "annotations": [{"segment": [0.5256910793438541, 19.450569935722598], "label": "Shaving legs"}, {"segment": [27.598781665552337, 44.15805066488374], "label": "Shaving legs"}, {"segment": [48.626424839306495, 64.13431167995019], "label": "Shaving legs"}, {"segment": [69.65406801306065, 94.62439428189371], "label": "Shaving legs"}, {"segment": [119.33187364430579, 126.9543942074883], "label": "Shaving legs"}]}, "sEGceBU8icE": {"duration": 110.44, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sEGceBU8icE", "annotations": [{"segment": [2.9288289980578814, 98.71876418096724], "label": "Painting"}]}, "xGLwosapbIw": {"duration": 162.215, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xGLwosapbIw", "annotations": []}, "5xgBlI9Xx0I": {"duration": 65.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5xgBlI9Xx0I", "annotations": [{"segment": [6.812720131045241, 55.51858494851793], "label": "Using parallel bars"}]}, "Z3DhUZhoRDA": {"duration": 110.829, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z3DhUZhoRDA", "annotations": [{"segment": [0.01, 110.82884353741497], "label": "Hanging wallpaper"}]}, "7OZHg_OOfxw": {"duration": 175.01, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7OZHg_OOfxw", "annotations": [{"segment": [13.373024960998439, 29.748157566302652], "label": "Cheerleading"}, {"segment": [27.83772542901716, 29.475238689547584], "label": "Cheerleading"}, {"segment": [147.3761934477379, 174.941], "label": "Cheerleading"}]}, "v1hQ0vf66QY": {"duration": 212.045, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=v1hQ0vf66QY", "annotations": [{"segment": [16.818453723249913, 36.275102087105566], "label": "Sumo"}, {"segment": [39.57283909792856, 60.0188085650311], "label": "Sumo"}, {"segment": [66.28450888559479, 77.82658842347526], "label": "Sumo"}, {"segment": [83.10296764079204, 89.69844166243803], "label": "Sumo"}, {"segment": [95.63436828191942, 107.17644781979989], "label": "Sumo"}, {"segment": [110.80395853170516, 120.3673948436359], "label": "Sumo"}, {"segment": [125.64377401626422, 138.17517455125645], "label": "Sumo"}, {"segment": [140.48359043928133, 147.40883810335598], "label": "Sumo"}, {"segment": [151.3661224828272, 168.5143547938692], "label": "Sumo"}, {"segment": [175.7693761562331, 194.23670326043217], "label": "Sumo"}]}, "vFO70WwrQgE": {"duration": 183.65, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vFO70WwrQgE", "annotations": []}, "hyW_cjYi8P4": {"duration": 189.45, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=hyW_cjYi8P4", "annotations": [{"segment": [85.71133680721378, 189.45160997732427], "label": "Polishing forniture"}]}, "HPTb6DO0P2k": {"duration": 177.96, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=HPTb6DO0P2k", "annotations": []}, "oKauZV0DHHk": {"duration": 160.59, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oKauZV0DHHk", "annotations": [{"segment": [0.01, 102.66054602184087], "label": "Blow-drying hair"}]}, "vXOT_VXWOqo": {"duration": 207.401, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vXOT_VXWOqo", "annotations": [{"segment": [13.91033229329173, 195.73401716068642], "label": "Hurling"}]}, "uWWo9qZh3cU": {"duration": 107.32, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=uWWo9qZh3cU", "annotations": [{"segment": [77.0177992823713, 88.06817917940718], "label": "Playing saxophone"}]}, "4CRacVTadPQ": {"duration": 101.866, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4CRacVTadPQ", "annotations": [{"segment": [83.21953822152885, 101.821], "label": "Mooping floor"}]}, "Gr2I0xdGN5A": {"duration": 53.52, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gr2I0xdGN5A", "annotations": []}, "K9kVsnTQh-g": {"duration": 102.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K9kVsnTQh-g", "annotations": [{"segment": [2.867682496099844, 50.34375937597503], "label": "Washing dishes"}, {"segment": [82.84416099843993, 88.89815737909515], "label": "Washing dishes"}]}, "NnLjEbG31ZE": {"duration": 46.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NnLjEbG31ZE", "annotations": [{"segment": [0.01, 42.373771781591266], "label": "River tubing"}]}, "tIAdhrfT70I": {"duration": 153.647, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tIAdhrfT70I", "annotations": [{"segment": [25.40047737909516, 29.713765990639626], "label": "Beer pong"}, {"segment": [74.28441497659907, 86.50539937597505], "label": "Beer pong"}, {"segment": [120.54814820592824, 123.4240483619345], "label": "Beer pong"}, {"segment": [127.01892355694228, 128.9361903276131], "label": "Beer pong"}, {"segment": [141.63808268330735, 145.472616224649], "label": "Beer pong"}]}, "uxRBnIWE79s": {"duration": 63.786, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uxRBnIWE79s", "annotations": [{"segment": [3.3312379095163807, 53.16732371294852], "label": "Doing fencing"}]}, "7bUu05RIksU": {"duration": 199.228, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7bUu05RIksU", "annotations": [{"segment": [25.47769422776911, 60.276496099843996], "label": "Hand car wash"}, {"segment": [66.49056786271451, 108.43555226209048], "label": "Hand car wash"}]}, "CUJJViubZ9c": {"duration": 147.238, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CUJJViubZ9c", "annotations": [{"segment": [15.389903240249609, 135.75272858190326], "label": "Doing a powerbomb"}]}, "w2fsq9BOoZo": {"duration": 149.861, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=w2fsq9BOoZo", "annotations": [{"segment": [39.27729555070203, 139.10708840873633], "label": "Rock-paper-scissors"}]}, "O_e0pqEMZMw": {"duration": 111.71000000000001, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=O_e0pqEMZMw", "annotations": [{"segment": [0, 111.581], "label": "Drinking coffee"}]}, "QLsdBUBpz_8": {"duration": 29.188, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=QLsdBUBpz_8", "annotations": [{"segment": [18.922483619344774, 26.291335413416537], "label": "Fun sliding down"}]}, "6_NquDQUFm0": {"duration": 97.32, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6_NquDQUFm0", "annotations": [{"segment": [17.610709566302653, 46.91128668954758], "label": "Pole vault"}]}, "mtSJG4q2vP4": {"duration": 111.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mtSJG4q2vP4", "annotations": [{"segment": [10.922353465567193, 19.24414658218982], "label": "Triple jump"}, {"segment": [29.6463879779681, 63.97378458403642], "label": "Triple jump"}]}, "qq1wRySmDTc": {"duration": 97.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qq1wRySmDTc", "annotations": [{"segment": [42.82571450858035, 87.63675351014041], "label": "Starting a campfire"}]}, "L9dtiopWGe4": {"duration": 77.137, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=L9dtiopWGe4", "annotations": [{"segment": [5.655888199687988, 10.108395931357256], "label": "Making a lemonade"}, {"segment": [37.184456461778474, 51.62502207800313], "label": "Making a lemonade"}]}, "TIue1jR51j8": {"duration": 238.03, "subset": "training", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=TIue1jR51j8", "annotations": [{"segment": [162.8317786723869, 220.3891929064918], "label": "High jump"}]}, "rze0k4LklN4": {"duration": 142.594, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rze0k4LklN4", "annotations": [{"segment": [97.35404407651475, 117.97802100185412], "label": "Assembling bicycle"}, {"segment": [126.84854915115427, 132.39262924446686], "label": "Assembling bicycle"}]}, "K0XdsHfUpco": {"duration": 148.283, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K0XdsHfUpco", "annotations": [{"segment": [8.09655548126687, 26.14030769666161], "label": "Croquet"}, {"segment": [37.2441552138276, 102.94191969039305], "label": "Croquet"}, {"segment": [110.57581457098284, 138.5667634477379], "label": "Croquet"}]}, "3KmMvfdidvQ": {"duration": 96.34, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3KmMvfdidvQ", "annotations": [{"segment": [26.36966540670503, 34.460358903388304], "label": "Sumo"}, {"segment": [41.35243114130369, 61.878819122831636], "label": "Sumo"}, {"segment": [80.30762065626864, 86.30072684600427], "label": "Sumo"}]}, "__mIAEE03bE": {"duration": 207.331, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=__mIAEE03bE", "annotations": [{"segment": [4.197131671858775, 185.31950612668743], "label": "Snowboarding"}]}, "OeIQ0P0FCCg": {"duration": 227.486, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OeIQ0P0FCCg", "annotations": [{"segment": [7.452720402496101, 223.58161207488303], "label": "BMX"}]}, "5DBzvN5Qe_0": {"duration": 210.28, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5DBzvN5Qe_0", "annotations": [{"segment": [12.281770404636712, 190.5127965497199], "label": "Hula hoop"}]}, "M9og58TMAm0": {"duration": 142.48, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=M9og58TMAm0", "annotations": [{"segment": [18.58042557, 37.86642745709828], "label": "Doing step aerobics"}, {"segment": [38.494435859999996, 134.4757878315133], "label": "Doing step aerobics"}]}, "e_y8_3siD0c": {"duration": 15.79, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e_y8_3siD0c", "annotations": [{"segment": [0.5911850733229329, 14.336238028081123], "label": "Hopscotch"}]}, "_yWADgOFxP0": {"duration": 238.377, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_yWADgOFxP0", "annotations": [{"segment": [31.97710764430577, 190.003511700468], "label": "Snowboarding"}]}, "cRP9tyF1N4I": {"duration": 26.889, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cRP9tyF1N4I", "annotations": [{"segment": [0.01, 26.841], "label": "Snow tubing"}]}, "ysbUc6WS12k": {"duration": 59.68, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ysbUc6WS12k", "annotations": [{"segment": [10.936848673946958, 51.24029953198128], "label": "Starting a campfire"}]}, "H8MY7XGrN6Q": {"duration": 53.267, "subset": "training", "resolution": "240x136", "url": "https://www.youtube.com/watch?v=H8MY7XGrN6Q", "annotations": [{"segment": [10.79365054602184, 24.410255850234005], "label": "Rollerblading"}]}, "DgVo1IAVrDM": {"duration": 149.165, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=DgVo1IAVrDM", "annotations": [{"segment": [0.009999999999999998, 149.164987], "label": "Longboarding"}]}, "q3-S2tg4ULw": {"duration": 170.923, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=q3-S2tg4ULw", "annotations": [{"segment": [35.99763125218886, 61.86259592968753], "label": "Playing drums"}, {"segment": [66.92892921703263, 163.98920887675507], "label": "Playing drums"}]}, "O-YKLVm0ciI": {"duration": 202.39, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=O-YKLVm0ciI", "annotations": [{"segment": [156.53190721996882, 165.68397437597505], "label": "Triple jump"}, {"segment": [3.7870622714508584, 137.28100734009362], "label": "Triple jump"}, {"segment": [146.74866301872075, 152.74484494851794], "label": "Triple jump"}]}, "WINtiw-_3vg": {"duration": 39.87, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=WINtiw-_3vg", "annotations": []}, "QZWyv6SShks": {"duration": 159.82, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QZWyv6SShks", "annotations": [{"segment": [7.480013159710062, 89.5108241445304], "label": "Playing field hockey"}]}, "yxcikJ3Hp8w": {"duration": 172.11, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yxcikJ3Hp8w", "annotations": [{"segment": [32.35379878732564, 163.91696398060003], "label": "Cheerleading"}]}, "x0zCL2FQV4E": {"duration": 88.422, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x0zCL2FQV4E", "annotations": []}, "Aen-RfnlK3A": {"duration": 188.082, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Aen-RfnlK3A", "annotations": []}, "-0i3BpAQAgE": {"duration": 64.831, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=-0i3BpAQAgE", "annotations": [{"segment": [1.6182170007888752, 3.9444039394228834], "label": "Rock-paper-scissors"}]}, "ogOrhXUgna0": {"duration": 189.963, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ogOrhXUgna0", "annotations": [{"segment": [26.079088134789394, 189.9624488], "label": "Surfing"}]}, "dzR4voNDZ7g": {"duration": 129.15, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dzR4voNDZ7g", "annotations": [{"segment": [6.0444330421216845, 111.62053017784712], "label": "Canoeing"}]}, "uDNLf3ty9ao": {"duration": 174.482, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uDNLf3ty9ao", "annotations": [{"segment": [80.29220508764392, 141.72090602222337], "label": "Rock-paper-scissors"}]}, "9iJ8snVY2s0": {"duration": 125.81, "subset": "validation", "resolution": "1920x812", "url": "https://www.youtube.com/watch?v=9iJ8snVY2s0", "annotations": [{"segment": [17.467564073991532, 112.85223980387786], "label": "Wrapping presents"}]}, "Q_AmwG-xRK4": {"duration": 19.482, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=Q_AmwG-xRK4", "annotations": []}, "VtIMPJjcdn4": {"duration": 82.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VtIMPJjcdn4", "annotations": [{"segment": [21.176707313190487, 72.00080370982839], "label": "Cleaning shoes"}]}, "8xS6cDOq5do": {"duration": 125.74, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8xS6cDOq5do", "annotations": [{"segment": [13.73092637991234, 111.61281585957317], "label": "Removing curlers"}]}, "3S_aifr5rG4": {"duration": 148.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3S_aifr5rG4", "annotations": [{"segment": [31.862714508580343, 71.89659252], "label": "Horseback riding"}, {"segment": [82.98934583999997, 131.14508580343212], "label": "Horseback riding"}]}, "ajSnAVhwWww": {"duration": 134.792, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ajSnAVhwWww", "annotations": []}, "GTOP7XXi_vI": {"duration": 218.059, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GTOP7XXi_vI", "annotations": [{"segment": [77.5209917393622, 144.42314835222192], "label": "Capoeira"}]}, "j5M9l0qxwnU": {"duration": 79.506, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j5M9l0qxwnU", "annotations": [{"segment": [0.4958564742589704, 20.95520904836193], "label": "Doing fencing"}, {"segment": [27.77495163806552, 34.7186895475819], "label": "Doing fencing"}, {"segment": [39.43051170046802, 46.49824492979719], "label": "Doing fencing"}, {"segment": [52.82200624024961, 58.27780031201248], "label": "Doing fencing"}, {"segment": [73.15723868954758, 76.62910764430576], "label": "Doing fencing"}]}, "AK4p_3Pojzs": {"duration": 23.96, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=AK4p_3Pojzs", "annotations": []}, "6d7oOG3dS-Y": {"duration": 60.303, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6d7oOG3dS-Y", "annotations": [{"segment": [0.2, 60.041], "label": "Installing carpet"}]}, "QUJXOFPJ_YI": {"duration": 27.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QUJXOFPJ_YI", "annotations": [{"segment": [1.6982544, 25.890722510140407], "label": "Playing squash"}]}, "tPZYyaX63yE": {"duration": 31.417, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=tPZYyaX63yE", "annotations": [{"segment": [8.920157492013967, 28.96600441029641], "label": "Blowing leaves"}]}, "ZH5b3e8ivvM": {"duration": 66.29, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZH5b3e8ivvM", "annotations": []}, "JSbVBXPP28Y": {"duration": 174.777, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JSbVBXPP28Y", "annotations": []}, "_oalsZ5Gtdk": {"duration": 100.71000000000001, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_oalsZ5Gtdk", "annotations": []}, "AHXcJCpvkAQ": {"duration": 81.34, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AHXcJCpvkAQ", "annotations": [{"segment": [8.816199687987519, 66.66061778471139], "label": "Doing step aerobics"}]}, "V9_mEvC24nk": {"duration": 152.741, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=V9_mEvC24nk", "annotations": [{"segment": [10.246266806552262, 133.91632430889234], "label": "Hurling"}]}, "0dkIbKXXFzI": {"duration": 15.49, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0dkIbKXXFzI", "annotations": [{"segment": [0.01, 15.415222627145086], "label": "Using the balance beam"}]}, "_9v34KLET0g": {"duration": 192.98, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_9v34KLET0g", "annotations": [{"segment": [34.02005888611544, 186.05660523556944], "label": "Mixing drinks"}]}, "5gNXNT-s_ww": {"duration": 194.862, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5gNXNT-s_ww", "annotations": []}, "BfsCKU69hHs": {"duration": 27.21, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BfsCKU69hHs", "annotations": [{"segment": [0.69719706, 26.151991700468017], "label": "Ironing clothes"}]}, "o8wdvkauJQQ": {"duration": 45.813, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=o8wdvkauJQQ", "annotations": [{"segment": [2.8499292634125006, 23.654420107239126], "label": "Using the rowing machine"}]}, "_Y4czVoIxvE": {"duration": 189.127, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_Y4czVoIxvE", "annotations": [{"segment": [74.02385491419658, 84.64082215288612], "label": "Installing carpet"}, {"segment": [109.41374570982839, 146.5731310452418], "label": "Installing carpet"}]}, "Xd_xJnlUHm0": {"duration": 185.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Xd_xJnlUHm0", "annotations": [{"segment": [25.977372727562162, 168.85292272915407], "label": "Putting on makeup"}]}, "CUU2_Msh7YU": {"duration": 33.762, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=CUU2_Msh7YU", "annotations": []}, "RNrxxPOyHo4": {"duration": 20.25, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=RNrxxPOyHo4", "annotations": [{"segment": [0, 1.4530402496099846], "label": "Ironing clothes"}]}, "EVDFLo3vufo": {"duration": 111.67, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EVDFLo3vufo", "annotations": []}, "bF55ewgaJLA": {"duration": 201.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bF55ewgaJLA", "annotations": [{"segment": [86.67251976609676, 121.21591532504836], "label": "Playing saxophone"}]}, "aPzHheM0Egw": {"duration": 64.297, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=aPzHheM0Egw", "annotations": [{"segment": [4.814659067995374, 60.082931422776916], "label": "Fixing the roof"}]}, "NQ51aXtD6Q0": {"duration": 96.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NQ51aXtD6Q0", "annotations": [{"segment": [23.465158174330572, 93.07544010579474], "label": "Doing a powerbomb"}]}, "ivivR6ZdgRM": {"duration": 49.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ivivR6ZdgRM", "annotations": [{"segment": [0.2, 49.513332999999996], "label": "Playing harmonica"}]}, "QJ5xv6oj1sk": {"duration": 75.14, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QJ5xv6oj1sk", "annotations": []}, "HgOHqD0lWTE": {"duration": 137.56, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HgOHqD0lWTE", "annotations": [{"segment": [83.4772213572543, 100.85936770670828], "label": "Starting a campfire"}]}, "yqFdxxWgvh8": {"duration": 92.14, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=yqFdxxWgvh8", "annotations": [{"segment": [16.44143779797192, 79.1917287823713], "label": "Mixing drinks"}]}, "uc1RbyLfuSs": {"duration": 7.76, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=uc1RbyLfuSs", "annotations": [{"segment": [0.04839602496099844, 7.694967968798752], "label": "Windsurfing"}]}, "3VkqckKyshc": {"duration": 66.711, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3VkqckKyshc", "annotations": [{"segment": [0.01, 66.681], "label": "Kite flying"}]}, "V2UitlmX7Uo": {"duration": 78.53, "subset": "testing", "resolution": "270x360", "url": "https://www.youtube.com/watch?v=V2UitlmX7Uo", "annotations": []}, "WTfeKnRJ17g": {"duration": 91.719, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=WTfeKnRJ17g", "annotations": [{"segment": [29.332852225653653, 86.99694532293292], "label": "Rope skipping"}]}, "3SL_Au0H0cw": {"duration": 120.09, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=3SL_Au0H0cw", "annotations": [{"segment": [7.205085803432136, 117.43354134165367], "label": "Tai chi"}]}, "hoYF0DhYVOI": {"duration": 72.03, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hoYF0DhYVOI", "annotations": [{"segment": [4.270008305772231, 67.42118377535103], "label": "Mixing drinks"}]}, "rA3fOmbAfi0": {"duration": 194.259, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rA3fOmbAfi0", "annotations": []}, "uxsZFuXKs3A": {"duration": 12.934, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uxsZFuXKs3A", "annotations": [{"segment": [0.2, 12.733514739229026], "label": "Trimming branches or hedges"}]}, "Al5u0b_QmiQ": {"duration": 230.71, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Al5u0b_QmiQ", "annotations": []}, "t2zLJ5mKTT8": {"duration": 85.311, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=t2zLJ5mKTT8", "annotations": [{"segment": [0.01, 85.309794], "label": "Wakeboarding"}]}, "ERYwCKVUTSQ": {"duration": 60.26, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ERYwCKVUTSQ", "annotations": []}, "-ux1hQSR1AM": {"duration": 53.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-ux1hQSR1AM", "annotations": [{"segment": [13.625121684867397, 50.256163806552266], "label": "Using the pommel horse"}]}, "XlEmG7nM0jw": {"duration": 66.22, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XlEmG7nM0jw", "annotations": [{"segment": [35.15579314, 54.46102443], "label": "Mixing drinks"}, {"segment": [0.7876966200000001, 34.54586583463338], "label": "Mixing drinks"}]}, "GyOLWizKXaE": {"duration": 123.25, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GyOLWizKXaE", "annotations": [{"segment": [0.2, 4.034751950078004], "label": "Mowing the lawn"}, {"segment": [16.139007800312015, 18.636711388455538], "label": "Mowing the lawn"}, {"segment": [43.99800936037442, 51.87538221528861], "label": "Mowing the lawn"}, {"segment": [69.3593073322933, 76.85241809672388], "label": "Mowing the lawn"}, {"segment": [97.79470202808113, 105.67207488299533], "label": "Mowing the lawn"}]}, "uqd8A4iJ6Bo": {"duration": 114.08, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uqd8A4iJ6Bo", "annotations": [{"segment": [14.059736730109202, 93.4349592823713], "label": "Baking cookies"}]}, "9B-CgqaLGeI": {"duration": 138.86, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9B-CgqaLGeI", "annotations": []}, "zKgFb1QojnQ": {"duration": 29.629, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zKgFb1QojnQ", "annotations": []}, "aUbSDHBZNEQ": {"duration": 35.85, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aUbSDHBZNEQ", "annotations": [{"segment": [0.8926302652106085, 31.018901716068644], "label": "Walking the dog"}]}, "x-WGuz3_2Fc": {"duration": 87.122, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x-WGuz3_2Fc", "annotations": [{"segment": [2.6326366544182322, 86.49703482264253], "label": "Peeling potatoes"}]}, "Mngqx9_VIEA": {"duration": 99.68, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mngqx9_VIEA", "annotations": []}, "cdufbM2OCwM": {"duration": 100.1, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=cdufbM2OCwM", "annotations": [{"segment": [21.550653634308638, 93.85465682215289], "label": "Playing saxophone"}]}, "4zYY4abpCgI": {"duration": 120.373, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4zYY4abpCgI", "annotations": [{"segment": [40.54498595943838, 79.60153510140405], "label": "Painting furniture"}, {"segment": [100.44061622464898, 114.89655538221528], "label": "Painting furniture"}]}, "lXTcGMN9I8g": {"duration": 29.745, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lXTcGMN9I8g", "annotations": [{"segment": [0, 22.691401137285492], "label": "Slacklining"}]}, "J8pZtBhpqMI": {"duration": 81.549, "subset": "validation", "resolution": "482x360", "url": "https://www.youtube.com/watch?v=J8pZtBhpqMI", "annotations": [{"segment": [0.2, 78.20765990639626], "label": "Using the monkey bar"}]}, "2DiyTT1vZG4": {"duration": 127.061, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=2DiyTT1vZG4", "annotations": []}, "X9Z9uqrb9EY": {"duration": 70.52, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X9Z9uqrb9EY", "annotations": [{"segment": [0, 70.519023], "label": "Playing congas"}]}, "FpxVS1Xpl1U": {"duration": 237.05, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=FpxVS1Xpl1U", "annotations": [{"segment": [17.51577228, 178.31288492000002], "label": "Plataform diving"}]}, "ZdaS-WZHUZY": {"duration": 154.622, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZdaS-WZHUZY", "annotations": [{"segment": [0.01, 154.581], "label": "Hurling"}]}, "Vq7Mm98I5Qs": {"duration": 229.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Vq7Mm98I5Qs", "annotations": [{"segment": [5.363034480562896, 46.837167796915956], "label": "Doing motocross"}, {"segment": [65.78655629490486, 100.82504823458244], "label": "Doing motocross"}, {"segment": [103.32779765884511, 149.0923585596485], "label": "Doing motocross"}, {"segment": [154.81292867224892, 226.32005395787834], "label": "Doing motocross"}]}, "fhpTXBJgFbo": {"duration": 38.06, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fhpTXBJgFbo", "annotations": []}, "X7f-hwiYZ1Q": {"duration": 167.77, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X7f-hwiYZ1Q", "annotations": [{"segment": [104.66209048361935, 145.21865054602185], "label": "Brushing teeth"}]}, "G0g7vEwI0z8": {"duration": 106.046, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=G0g7vEwI0z8", "annotations": [{"segment": [25.82816909933833, 86.43518934855952], "label": "Making an omelette"}]}, "__c8enCfzqw": {"duration": 172.803, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=__c8enCfzqw", "annotations": [{"segment": [14.825202808112325, 153.37346177847115], "label": "Blow-drying hair"}]}, "nibek2g971I": {"duration": 168.624, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nibek2g971I", "annotations": [{"segment": [26.36375803307506, 160.28324401438346], "label": "Slacklining"}]}, "EQMDnhIKU4w": {"duration": 113.22, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EQMDnhIKU4w", "annotations": [{"segment": [46.45387996723869, 93.08439065678627], "label": "Throwing darts"}]}, "NwlIZ1T93xE": {"duration": 84.01, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NwlIZ1T93xE", "annotations": []}, "1dM62Xpm9Ns": {"duration": 187.153, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1dM62Xpm9Ns", "annotations": [{"segment": [73.57846625353982, 143.2506268251909], "label": "Sharpening knives"}]}, "1opIvQF0m_M": {"duration": 116.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1opIvQF0m_M", "annotations": [{"segment": [58.0245, 112.89240795631827], "label": "Kayaking"}]}, "ybhc3-s7aYo": {"duration": 19.62, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ybhc3-s7aYo", "annotations": [{"segment": [0.01, 19.62086], "label": "Chopping wood"}]}, "kPM3RAn0Mk4": {"duration": 232.29, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kPM3RAn0Mk4", "annotations": [{"segment": [31.89035810684128, 196.41561421840873], "label": "Doing nails"}]}, "RM5ywQYztnM": {"duration": 5.016, "subset": "testing", "resolution": "406x720", "url": "https://www.youtube.com/watch?v=RM5ywQYztnM", "annotations": []}, "xtfTn-bdIPU": {"duration": 57.098, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xtfTn-bdIPU", "annotations": []}, "g5CYoFJFkPk": {"duration": 105.675, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=g5CYoFJFkPk", "annotations": [{"segment": [0.01, 105.621], "label": "Baton twirling"}]}, "dgbGv3G4k5M": {"duration": 119.769, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dgbGv3G4k5M", "annotations": [{"segment": [10.276550599438943, 115.09736663962558], "label": "Decorating the Christmas tree"}]}, "dwCeFVAaP9c": {"duration": 144.266, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dwCeFVAaP9c", "annotations": [{"segment": [4.2725101404056165, 138.06953822152886], "label": "Installing carpet"}]}, "lztbD1NRU4M": {"duration": 23.011, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=lztbD1NRU4M", "annotations": [{"segment": [7.41486271450858, 15.438675507020278], "label": "Doing a powerbomb"}]}, "2HUGDgn1Dp0": {"duration": 156.271, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2HUGDgn1Dp0", "annotations": []}, "MubE2kOK6z0": {"duration": 93.69, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MubE2kOK6z0", "annotations": [{"segment": [13.466146528577928, 17.4064869117555], "label": "Ballet"}, {"segment": [29.227508061288212, 54.91268981829756], "label": "Ballet"}, {"segment": [82.34913396783026, 85.41384315474616], "label": "Ballet"}]}, "vrWcBuRPDBw": {"duration": 132.77, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vrWcBuRPDBw", "annotations": [{"segment": [28.637376989999996, 51.60808769999999], "label": "Horseback riding"}, {"segment": [59.30482257, 97.82168967], "label": "Horseback riding"}]}, "RK8VF2pL72I": {"duration": 34.157, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=RK8VF2pL72I", "annotations": [{"segment": [0.20000000000000004, 33.11118847352025], "label": "Knitting"}]}, "lKi-hl_KGJ4": {"duration": 207.17000000000002, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lKi-hl_KGJ4", "annotations": [{"segment": [44.12264064, 195.6508093026521], "label": "Zumba"}, {"segment": [2.99979264, 19.93992], "label": "Zumba"}]}, "6I1aP4O04R8": {"duration": 235.427, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6I1aP4O04R8", "annotations": [{"segment": [13.773037452110328, 100.8186341494476], "label": "Blowing leaves"}, {"segment": [129.09927105111416, 139.3831390153565], "label": "Blowing leaves"}]}, "ivjnwAR91Sk": {"duration": 119.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ivjnwAR91Sk", "annotations": [{"segment": [8.589027665106604, 54.521653874154964], "label": "High jump"}]}, "TADJh_DZOfE": {"duration": 64.13, "subset": "training", "resolution": "448x338", "url": "https://www.youtube.com/watch?v=TADJh_DZOfE", "annotations": [{"segment": [21.911449839313573, 24.913018310452415], "label": "High jump"}, {"segment": [39.22049468954758, 43.12253370202807], "label": "High jump"}, {"segment": [46.324206737909506, 49.42582749141965], "label": "High jump"}]}, "ZoKode_Gexg": {"duration": 47.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZoKode_Gexg", "annotations": [{"segment": [4.582597471918877, 29.470200002340093], "label": "Washing dishes"}]}, "MERZe5qdfWc": {"duration": 128.082, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MERZe5qdfWc", "annotations": [{"segment": [1.7971216848673948, 11.980811232449298], "label": "Snow tubing"}, {"segment": [16.773135725429018, 21.565460218408738], "label": "Snow tubing"}, {"segment": [55.5485066801872, 73.13220663650546], "label": "Snow tubing"}, {"segment": [77.52813162558502, 94.91201680967238], "label": "Snow tubing"}, {"segment": [97.70942362090483, 115.69275312168486], "label": "Snow tubing"}, {"segment": [117.89071561622464, 124.88423264430577], "label": "Snow tubing"}]}, "K-kJ-0nkKZk": {"duration": 130.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K-kJ-0nkKZk", "annotations": [{"segment": [63.14505467293522, 94.41302386789349], "label": "Starting a campfire"}]}, "htKhaMfxxrM": {"duration": 188.245, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=htKhaMfxxrM", "annotations": [{"segment": [124.81054793212137, 175.90945324180967], "label": "Arm wrestling"}]}, "Va3NsrY1DJ8": {"duration": 85.612, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Va3NsrY1DJ8", "annotations": [{"segment": [6.410880823712948, 74.7936096099844], "label": "Slacklining"}]}, "Ej_a4iCd45I": {"duration": 174.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ej_a4iCd45I", "annotations": [{"segment": [5.72933073322933, 12.82278783151326], "label": "Playing beach volleyball"}, {"segment": [22.91732293291732, 32.19338221528861], "label": "Playing beach volleyball"}, {"segment": [36.285761310452415, 44.19769422776911], "label": "Playing beach volleyball"}, {"segment": [47.19877223088923, 54.83787987519501], "label": "Playing beach volleyball"}, {"segment": [58.384608424336975, 153.87345397815912], "label": "Playing beach volleyball"}]}, "ouqYXdtEuCI": {"duration": 100.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ouqYXdtEuCI", "annotations": [{"segment": [29.422860016380657, 36.91515328315133], "label": "Springboard diving"}, {"segment": [50.182755943057714, 67.5086841224649], "label": "Springboard diving"}, {"segment": [74.5327090600624, 99.0387516201248], "label": "Springboard diving"}]}, "GXEuuWhEMCY": {"duration": 181.84, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=GXEuuWhEMCY", "annotations": [{"segment": [98.40959680343212, 150.025004925117], "label": "Snatch"}, {"segment": [9.7572661, 95.2124427], "label": "Snatch"}]}, "6cha8sVi65k": {"duration": 161.008, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6cha8sVi65k", "annotations": [{"segment": [44.69743837753511, 129.08616848673947], "label": "Playing ice hockey"}]}, "I3WRp3QB8eg": {"duration": 123.93, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=I3WRp3QB8eg", "annotations": [{"segment": [44.64606552262091, 114.80416848673947], "label": "Getting a haircut"}]}, "WlYhEPyLcBc": {"duration": 90.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WlYhEPyLcBc", "annotations": [{"segment": [4.139802652106084, 37.39855616224649], "label": "Playing badminton"}, {"segment": [50.87045631825273, 75.00761076443058], "label": "Playing badminton"}]}, "TscC5kgurqY": {"duration": 170.133, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TscC5kgurqY", "annotations": [{"segment": [59.156229329173165, 101.33488611544462], "label": "Making an omelette"}]}, "2ZJLCKVgPRA": {"duration": 36.69, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=2ZJLCKVgPRA", "annotations": []}, "DACWnbWrd18": {"duration": 74.142, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=DACWnbWrd18", "annotations": [{"segment": [0, 10.294195117464563], "label": "Cutting the grass"}, {"segment": [25.33065989578359, 73.79434166302651], "label": "Cutting the grass"}]}, "ID44l9VqqGQ": {"duration": 110.85, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ID44l9VqqGQ", "annotations": [{"segment": [0, 110.85201814058956], "label": "Playing accordion"}]}, "ULwRERVQD6A": {"duration": 41.52, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=ULwRERVQD6A", "annotations": []}, "FXQxGgdXTXE": {"duration": 27.888, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FXQxGgdXTXE", "annotations": []}, "P17kaxPrbdc": {"duration": 136.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P17kaxPrbdc", "annotations": [{"segment": [80.65085803432137, 136.76507936507937], "label": "Doing karate"}]}, "rOtmhurweqo": {"duration": 84.2, "subset": "testing", "resolution": "634x480", "url": "https://www.youtube.com/watch?v=rOtmhurweqo", "annotations": []}, "EuIBTFqVOt8": {"duration": 230.435, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EuIBTFqVOt8", "annotations": [{"segment": [14.716241584899793, 202.79698769435083], "label": "Arm wrestling"}]}, "clSku91LoQQ": {"duration": 126.27, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=clSku91LoQQ", "annotations": [{"segment": [0, 20.7231768934908], "label": "Playing accordion"}, {"segment": [61.82980213959672, 80.5146317969176], "label": "Playing accordion"}, {"segment": [98.60941420190201, 121.03120979068706], "label": "Playing accordion"}]}, "zfU85oBVpfA": {"duration": 44.931, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zfU85oBVpfA", "annotations": [{"segment": [6.448650599085188, 36.30870573790951], "label": "Kneeling"}]}, "fHa6VNVy7m8": {"duration": 32.462, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fHa6VNVy7m8", "annotations": []}, "VvfY0GWxd7Y": {"duration": 124.51, "subset": "testing", "resolution": "320x178", "url": "https://www.youtube.com/watch?v=VvfY0GWxd7Y", "annotations": []}, "6_uxd3nj2d0": {"duration": 189.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6_uxd3nj2d0", "annotations": [{"segment": [4.739619344773791, 45.9150624024961], "label": "Putting on makeup"}]}, "1aCwFDS0j2A": {"duration": 75.744, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=1aCwFDS0j2A", "annotations": [{"segment": [0.010000000000000002, 75.681], "label": "Kneeling"}]}, "224E-VtB4k4": {"duration": 225.49, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=224E-VtB4k4", "annotations": [{"segment": [32.363472889936006, 103.77417937533828], "label": "Playing violin"}, {"segment": [108.69905568467637, 207.54835820592822], "label": "Playing violin"}]}, "_Af_9cK5x4E": {"duration": 232.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_Af_9cK5x4E", "annotations": [{"segment": [21.199840332388806, 202.75744728156474], "label": "Doing motocross"}]}, "5Jo_AWt9fB0": {"duration": 65.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5Jo_AWt9fB0", "annotations": [{"segment": [3.887910434865506, 64.13027024794962], "label": "Kneeling"}]}, "ru7UAr2488M": {"duration": 203.756, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ru7UAr2488M", "annotations": [{"segment": [21.581510868106328, 31.737515982509304], "label": "Playing ice hockey"}, {"segment": [100.6079256645545, 109.17705497983201], "label": "Playing ice hockey"}, {"segment": [136.78869388461513, 159.32233023219672], "label": "Playing ice hockey"}, {"segment": [175.19108822345137, 177.41271294236762], "label": "Playing ice hockey"}]}, "vMdSEzQkRTg": {"duration": 8.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vMdSEzQkRTg", "annotations": [{"segment": [6.21483260374415, 8.080543], "label": "Starting a campfire"}]}, "3am00ii0n-o": {"duration": 213.09, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3am00ii0n-o", "annotations": [{"segment": [88.42717529653567, 100.06232994081668], "label": "Playing flauta"}]}, "03KTIy44yTo": {"duration": 96.433, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=03KTIy44yTo", "annotations": [{"segment": [0, 96.401], "label": "Blowing leaves"}]}, "n0Az4cLruf4": {"duration": 148.515, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n0Az4cLruf4", "annotations": [{"segment": [84.9152784410696, 96.03651545381543], "label": "Using the rowing machine"}]}, "6C-hZvDrIK0": {"duration": 105.44, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6C-hZvDrIK0", "annotations": []}, "HQVSLlKhupk": {"duration": 79.495, "subset": "testing", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=HQVSLlKhupk", "annotations": []}, "4FdWr8WykqE": {"duration": 135.977, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4FdWr8WykqE", "annotations": []}, "CAh5HrKU9jE": {"duration": 132.447, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CAh5HrKU9jE", "annotations": []}, "XKvzx2cD9KY": {"duration": 146.426, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=XKvzx2cD9KY", "annotations": [{"segment": [90.17777797063616, 137.77160611311245], "label": "Sumo"}]}, "StGxFfLiYNU": {"duration": 69.985, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=StGxFfLiYNU", "annotations": []}, "1Y1pKGFm-pQ": {"duration": 32.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1Y1pKGFm-pQ", "annotations": [{"segment": [0.25592523020648716, 5.42561463026521], "label": "Putting on makeup"}, {"segment": [13.717591706708268, 20.187377699999995], "label": "Putting on makeup"}]}, "j_Xr8nLEjLQ": {"duration": 141.991, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j_Xr8nLEjLQ", "annotations": [{"segment": [0.19999999999999998, 19.265003120124806], "label": "Slacklining"}, {"segment": [25.908107644305773, 120.70007020280812], "label": "Slacklining"}]}, "1uYv7kTOfCE": {"duration": 110.226, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=1uYv7kTOfCE", "annotations": []}, "NNqghz7Fd0M": {"duration": 132.703, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NNqghz7Fd0M", "annotations": [{"segment": [0.01, 132.701881], "label": "Playing drums"}]}, "gyCXP8w8GRA": {"duration": 122.21, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=gyCXP8w8GRA", "annotations": [{"segment": [42.7598794, 115.6803744149766], "label": "Doing karate"}, {"segment": [16.97074722, 36.18397454], "label": "Doing karate"}]}, "yNHc-xADBm8": {"duration": 83.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yNHc-xADBm8", "annotations": [{"segment": [2.657004680187207, 19.765522620904836], "label": "Starting a campfire"}]}, "cJiwi-wB1FY": {"duration": 79.529, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cJiwi-wB1FY", "annotations": [{"segment": [49.2259567715111, 59.244303754276835], "label": "Throwing darts"}]}, "nuaTROuaZPY": {"duration": 165.93, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nuaTROuaZPY", "annotations": [{"segment": [92.85890639625585, 139.67634945397813], "label": "Starting a campfire"}]}, "wqZzLAPmr9k": {"duration": 184.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wqZzLAPmr9k", "annotations": [{"segment": [20.727093508230123, 165.87406448], "label": "Preparing pasta"}]}, "E8H-67ILaqc": {"duration": 193.59, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=E8H-67ILaqc", "annotations": []}, "_jT57LdAHUs": {"duration": 208.167, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_jT57LdAHUs", "annotations": [{"segment": [47.7939564062876, 60.43957567731564], "label": "Playing drums"}, {"segment": [85.73081421937171, 101.29465332217545], "label": "Playing drums"}, {"segment": [92.86424047482343, 102.59163991407577], "label": "Playing drums"}, {"segment": [154.14685694211315, 173.60165582061782], "label": "Playing drums"}]}, "uIcMgjypbqk": {"duration": 177.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uIcMgjypbqk", "annotations": []}, "bPZRYmr7p1k": {"duration": 143.73, "subset": "validation", "resolution": "576x320", "url": "https://www.youtube.com/watch?v=bPZRYmr7p1k", "annotations": [{"segment": [37.51527543525741, 40.8450336099844], "label": "Archery"}, {"segment": [78.13832516692668, 78.3603090452418], "label": "Archery"}, {"segment": [55.71795345709828, 58.60374387519501], "label": "Archery"}, {"segment": [63.04342144149766, 67.92706676443058], "label": "Archery"}]}, "ULPw4uZcx5I": {"duration": 81.131, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ULPw4uZcx5I", "annotations": [{"segment": [3.5876456756235284, 78.49869396222789], "label": "Snow tubing"}]}, "gGSs4U8CTeM": {"duration": 114.00999999999999, "subset": "testing", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=gGSs4U8CTeM", "annotations": []}, "iWj81FBROQQ": {"duration": 222.22, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iWj81FBROQQ", "annotations": [{"segment": [11.957577985179407, 202.58563282137283], "label": "Using the balance beam"}]}, "-mX18jJkPDk": {"duration": 189.62, "subset": "validation", "resolution": "208x160", "url": "https://www.youtube.com/watch?v=-mX18jJkPDk", "annotations": [{"segment": [14.490622464898596, 109.71471294851796], "label": "Brushing hair"}, {"segment": [127.16260530421216, 187.2149344773791], "label": "Brushing hair"}]}, "UNPQdg_tWh4": {"duration": 132.099, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UNPQdg_tWh4", "annotations": [{"segment": [0.01, 132.041], "label": "Croquet"}]}, "ghxiG0Dnrt0": {"duration": 62.79, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ghxiG0Dnrt0", "annotations": []}, "_JE5T2RKZvo": {"duration": 118.213, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_JE5T2RKZvo", "annotations": [{"segment": [98.4799201123245, 109.36066034945398], "label": "Painting furniture"}]}, "OFn-c_XvIhU": {"duration": 89.722, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OFn-c_XvIhU", "annotations": [{"segment": [31.744940717628708, 45.30995943837754], "label": "Scuba diving"}]}, "p9yq0kAqGU4": {"duration": 141.433, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p9yq0kAqGU4", "annotations": [{"segment": [0.01, 141.381], "label": "Tug of war"}]}, "jCxQQ9DMpT0": {"duration": 188.99, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jCxQQ9DMpT0", "annotations": []}, "Rw4swFpJOCc": {"duration": 136.952, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Rw4swFpJOCc", "annotations": []}, "Kb8SPjD121k": {"duration": 58.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Kb8SPjD121k", "annotations": [{"segment": [6.339246630265211, 23.998576528861154], "label": "Arm wrestling"}, {"segment": [33.77912847269891, 56.60041634165367], "label": "Arm wrestling"}]}, "rmMCA60HqZ0": {"duration": 128.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rmMCA60HqZ0", "annotations": [{"segment": [66.52482371294853, 67.52670358814352], "label": "Washing face"}]}, "KMK3_VeODlk": {"duration": 217.43, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KMK3_VeODlk", "annotations": []}, "utgRhyBvtRU": {"duration": 32.183, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=utgRhyBvtRU", "annotations": []}, "rEJb170mQuA": {"duration": 161.077, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rEJb170mQuA", "annotations": []}, "UIH5arBo3-Y": {"duration": 122.0, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UIH5arBo3-Y", "annotations": [{"segment": [15.184168799999997, 108.83259282371294], "label": "Playing guitarra"}]}, "aEol4D2AncU": {"duration": 85.543, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aEol4D2AncU", "annotations": []}, "_kdhjXHUfes": {"duration": 113.99000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_kdhjXHUfes", "annotations": [{"segment": [10.669587273286849, 99.2271616415677], "label": "Tango"}]}, "gHk4mjlJ5cg": {"duration": 210.883, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gHk4mjlJ5cg", "annotations": [{"segment": [55.2705920808261, 89.15672841965679], "label": "Rope skipping"}, {"segment": [103.30336798439937, 203.97480302652104], "label": "Rope skipping"}]}, "mBsIWxXJPY0": {"duration": 209.375, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mBsIWxXJPY0", "annotations": [{"segment": [2.2793423369542216, 108.10618097231922], "label": "Snowboarding"}, {"segment": [125.36409619782465, 177.78908395832232], "label": "Snowboarding"}]}, "WQmJrfjOF7o": {"duration": 187.084, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WQmJrfjOF7o", "annotations": [{"segment": [30.131388092313923, 135.0375733259588], "label": "Laying tile"}, {"segment": [144.071161498856, 177.58285955960363], "label": "Laying tile"}]}, "oXPgInpuHDA": {"duration": 82.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oXPgInpuHDA", "annotations": [{"segment": [0, 82.54696145124717], "label": "Cheerleading"}]}, "uCvjzcVCtEs": {"duration": 19.41, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uCvjzcVCtEs", "annotations": []}, "Ds-LrRrLj9c": {"duration": 95.46000000000001, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Ds-LrRrLj9c", "annotations": [{"segment": [11.535530421216848, 78.8379363], "label": "Brushing teeth"}]}, "zlOErVVHKEk": {"duration": 185.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zlOErVVHKEk", "annotations": [{"segment": [2.8939815551805745, 58.747825570165666], "label": "Cricket"}, {"segment": [67.14037208018934, 176.8222730215331], "label": "Cricket"}]}, "l9LHfuoU8js": {"duration": 107.09, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=l9LHfuoU8js", "annotations": [{"segment": [3.5084079934625954, 97.23302123868956], "label": "Fixing the roof"}]}, "JLDZdxTf5TA": {"duration": 125.69, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JLDZdxTf5TA", "annotations": [{"segment": [37.42659090320195, 43.88374510880943], "label": "Playing bagpipes"}]}, "nd5l829R8mw": {"duration": 13.073, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nd5l829R8mw", "annotations": [{"segment": [0.2, 11.193638065522622], "label": "Drum corps"}]}, "cfhc7tf8vU4": {"duration": 151.74, "subset": "training", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=cfhc7tf8vU4", "annotations": [{"segment": [2.6069275600000004, 133.82764820592823], "label": "Triple jump"}]}, "LbXhdPZakpo": {"duration": 50.67, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LbXhdPZakpo", "annotations": [{"segment": [0.01, 49.915040733229326], "label": "Playing field hockey"}]}, "0zjA3KPnLK8": {"duration": 180.234, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=0zjA3KPnLK8", "annotations": [{"segment": [0.01, 180.2332879818594], "label": "Sumo"}]}, "IlKOWIBAEFE": {"duration": 226.627, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IlKOWIBAEFE", "annotations": [{"segment": [73.99348367029549, 177.23201088646968], "label": "Scuba diving"}]}, "k1aFJ-F8xTs": {"duration": 27.05, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k1aFJ-F8xTs", "annotations": [{"segment": [2.5494001560062403, 17.930078783151327], "label": "Shaving"}, {"segment": [21.427602964118567, 24.714432917316692], "label": "Shaving"}]}, "EAp9xQx2o6k": {"duration": 68.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EAp9xQx2o6k", "annotations": [{"segment": [0.13492700640954705, 68.061], "label": "Doing step aerobics"}]}, "maHLwXvNN3w": {"duration": 43.886, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=maHLwXvNN3w", "annotations": [{"segment": [0.01, 43.680320742277694], "label": "Playing congas"}]}, "Gq8-XVrlAt4": {"duration": 173.29, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Gq8-XVrlAt4", "annotations": [{"segment": [8.379188767550701, 92.17107644305771], "label": "Tennis serve with ball bouncing"}, {"segment": [117.30864274570982, 169.74614664586582], "label": "Tennis serve with ball bouncing"}]}, "ShozMaLUS20": {"duration": 14.072, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ShozMaLUS20", "annotations": [{"segment": [0.01, 14.071292517006803], "label": "Raking leaves"}]}, "Tc8L-74Ilck": {"duration": 233.34, "subset": "training", "resolution": "624x352", "url": "https://www.youtube.com/watch?v=Tc8L-74Ilck", "annotations": [{"segment": [9.464531256080175, 169.63352064898595], "label": "Rock climbing"}]}, "5JVHUcOW0GE": {"duration": 129.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5JVHUcOW0GE", "annotations": [{"segment": [7.6507758215090504, 26.57637916945249], "label": "Making a sandwich"}]}, "63lmoqLxB8c": {"duration": 237.03, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=63lmoqLxB8c", "annotations": [{"segment": [18.8190135, 203.69364274570984], "label": "Volleyball"}]}, "aGlfi9PqRdY": {"duration": 122.532, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aGlfi9PqRdY", "annotations": [{"segment": [0.01, 122.531699], "label": "Peeling potatoes"}]}, "GbykXyc8LA8": {"duration": 59.86, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GbykXyc8LA8", "annotations": [{"segment": [12.42032451326053, 38.661762018720744], "label": "Getting a piercing"}]}, "eA5ANAdLvFE": {"duration": 121.836, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eA5ANAdLvFE", "annotations": [{"segment": [0.01, 114.3200596278904], "label": "Making an omelette"}]}, "tBC1HnUZzco": {"duration": 31.533, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=tBC1HnUZzco", "annotations": []}, "axzmwzPQ134": {"duration": 98.5, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=axzmwzPQ134", "annotations": [{"segment": [0.20000000000000004, 11.057335413416537], "label": "Croquet"}, {"segment": [34.554173166926674, 40.859010920436816], "label": "Croquet"}, {"segment": [77.26346801872074, 82.48604836193448], "label": "Croquet"}]}, "xuq9oRm8QZo": {"duration": 92.834, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xuq9oRm8QZo", "annotations": [{"segment": [25.69881802390474, 31.906959988829644], "label": "Painting fence"}, {"segment": [41.724486059836835, 48.07700330400479], "label": "Painting fence"}, {"segment": [69.15581052328939, 80.27271570058333], "label": "Painting fence"}]}, "sFrtQ8j8_jY": {"duration": 20.922, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sFrtQ8j8_jY", "annotations": []}, "jhuRtkqUSSI": {"duration": 38.75, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jhuRtkqUSSI", "annotations": [{"segment": [1.2982735085803432, 37.106468419656785], "label": "Plataform diving"}]}, "-_Ud3LXSjdE": {"duration": 66.6, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=-_Ud3LXSjdE", "annotations": [{"segment": [0.5194604368174728, 65.97147547581903], "label": "Smoking a cigarette"}]}, "h49mHiWjXBA": {"duration": 218.732, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=h49mHiWjXBA", "annotations": [{"segment": [18.796388473954746, 28.350983194703577], "label": "Curling"}, {"segment": [18.11391742247269, 29.37468977192667], "label": "Curling"}, {"segment": [47.11893711046021, 70.66418838659126], "label": "Curling"}, {"segment": [98.64550149735568, 119.4608685675585], "label": "Curling"}, {"segment": [129.35669881404837, 197.26256843651328], "label": "Curling"}]}, "l866oFamRY4": {"duration": 18.11, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=l866oFamRY4", "annotations": [{"segment": [1.4127585023400937, 15.964171076443058], "label": "Walking the dog"}]}, "weB3srg6o4c": {"duration": 128.13, "subset": "training", "resolution": "624x480", "url": "https://www.youtube.com/watch?v=weB3srg6o4c", "annotations": [{"segment": [48.65441497659907, 110.39656786271452], "label": "Brushing teeth"}]}, "GZsplhHXQy0": {"duration": 11.03, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GZsplhHXQy0", "annotations": [{"segment": [0.22270358814352575, 10.929606864274572], "label": "Hammer throw"}]}, "XCwPsQwzfiA": {"duration": 176.263, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XCwPsQwzfiA", "annotations": [{"segment": [27.223096564678915, 160.58877007176287], "label": "Making a cake"}]}, "JFA1iR09BUA": {"duration": 232.339, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JFA1iR09BUA", "annotations": [{"segment": [19.573009010156326, 217.11541362714507], "label": "Capoeira"}]}, "Cw5NDI8NnZU": {"duration": 67.013, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cw5NDI8NnZU", "annotations": [{"segment": [0.2, 21.849723388455537], "label": "Surfing"}, {"segment": [33.87229845865835, 64.60827298595945], "label": "Surfing"}]}, "jmL68eFmLjk": {"duration": 126.781, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jmL68eFmLjk", "annotations": []}, "lE3Hs4bsPhg": {"duration": 236.45, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lE3Hs4bsPhg", "annotations": [{"segment": [33.81915744, 236.44836734693877], "label": "Tango"}]}, "V1yT7gh-59E": {"duration": 130.682, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V1yT7gh-59E", "annotations": []}, "CEQqdt0vV0o": {"duration": 219.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CEQqdt0vV0o", "annotations": [{"segment": [64.01454107537302, 190.83169697879978], "label": "Breakdancing"}]}, "dAjhBWalWRM": {"duration": 32.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dAjhBWalWRM", "annotations": [{"segment": [1.101372031201248, 31.43916525429017], "label": "Playing blackjack"}]}, "Pd9qzQ2MOE8": {"duration": 123.856, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Pd9qzQ2MOE8", "annotations": [{"segment": [9.270589703588143, 104.50415132605305], "label": "Plastering"}]}, "YHqFNnhaFAY": {"duration": 63.925, "subset": "testing", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=YHqFNnhaFAY", "annotations": []}, "Qp_zbvHLwLk": {"duration": 70.4, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Qp_zbvHLwLk", "annotations": [{"segment": [0.01, 70.341], "label": "Mowing the lawn"}]}, "tokwDYW6rS0": {"duration": 128.76, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tokwDYW6rS0", "annotations": [{"segment": [54.21945397815912, 111.25028705148206], "label": "Painting"}]}, "oQDz6oh5vPI": {"duration": 68.73, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=oQDz6oh5vPI", "annotations": []}, "cyJAifECfH0": {"duration": 17.6, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cyJAifECfH0", "annotations": [{"segment": [2.5278042121684865, 16.08230031201248], "label": "Clean and jerk"}]}, "vJyJG6Lvy9Q": {"duration": 179.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vJyJG6Lvy9Q", "annotations": [{"segment": [17.667599040000002, 95.02312792511701], "label": "Tumbling"}, {"segment": [96.30351528, 158.37187987519502], "label": "Tumbling"}]}, "tzsgg_o6xHo": {"duration": 98.2, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=tzsgg_o6xHo", "annotations": [{"segment": [69.33043658190327, 95.96067050078004], "label": "Tango"}, {"segment": [5.03161428, 68.08194404000001], "label": "Tango"}]}, "GiHxeCLGNy8": {"duration": 37.06, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=GiHxeCLGNy8", "annotations": [{"segment": [0.2, 36.859047619047615], "label": "Starting a campfire"}]}, "e-VWzL3rU_c": {"duration": 171.41, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e-VWzL3rU_c", "annotations": []}, "0bzSBV3jHIY": {"duration": 152.184, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0bzSBV3jHIY", "annotations": [{"segment": [0.01, 152.183445], "label": "Playing pool"}]}, "SPhcOskjsDs": {"duration": 170.95, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=SPhcOskjsDs", "annotations": []}, "1MBVaveQDd8": {"duration": 131.45, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1MBVaveQDd8", "annotations": [{"segment": [0.01, 97.18183151326052], "label": "Cumbia"}]}, "ym_OhvcJ--w": {"duration": 76.533, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ym_OhvcJ--w", "annotations": [{"segment": [20.297247427312058, 28.893728690644227], "label": "Skiing"}, {"segment": [33.43076046851398, 46.44487711232449], "label": "Skiing"}]}, "Lomlff9wClo": {"duration": 55.43, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Lomlff9wClo", "annotations": [{"segment": [5.630636989878718, 54.54189399922451], "label": "Chopping wood"}]}, "YrdpvaBDDlE": {"duration": 77.69, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YrdpvaBDDlE", "annotations": [{"segment": [0, 77.69399], "label": "Preparing salad"}]}, "TjLoGNBzNRA": {"duration": 110.319, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TjLoGNBzNRA", "annotations": [{"segment": [30.612945397815913, 96.99832137285492], "label": "Hanging wallpaper"}]}, "XjV0D7nJx0Q": {"duration": 101.03, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XjV0D7nJx0Q", "annotations": [{"segment": [17.7310437, 72.42323778393312], "label": "Fixing bicycle"}]}, "3ImTO0bzXPA": {"duration": 190.31, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3ImTO0bzXPA", "annotations": [{"segment": [56.57916300000001, 164.73658317472697], "label": "Getting a piercing"}]}, "KgGQ6dHKl8U": {"duration": 24.242, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KgGQ6dHKl8U", "annotations": [{"segment": [0.2, 24.001], "label": "Calf roping"}]}, "FSfxYKauc9w": {"duration": 117.981, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FSfxYKauc9w", "annotations": [{"segment": [1.4724566560893728, 117.780589569161], "label": "Playing ice hockey"}]}, "1krxpAYQVj0": {"duration": 88.445, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1krxpAYQVj0", "annotations": []}, "pwOTvCDsALk": {"duration": 107.63, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pwOTvCDsALk", "annotations": []}, "Ju64Ld8S7kA": {"duration": 20.202, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ju64Ld8S7kA", "annotations": []}, "W9H2qVnIWXs": {"duration": 35.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W9H2qVnIWXs", "annotations": []}, "0fvL6IHKYF0": {"duration": 166.488, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0fvL6IHKYF0", "annotations": [{"segment": [86.4766193447738, 142.309872074883], "label": "Elliptical trainer"}]}, "lC12SCZ_-1o": {"duration": 102.77000000000001, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lC12SCZ_-1o", "annotations": [{"segment": [6.252858196567863, 21.484179444617784], "label": "Shoveling snow"}, {"segment": [35.11220371918877, 92.34990567238691], "label": "Shoveling snow"}]}, "KePjkCySBCs": {"duration": 40.751, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KePjkCySBCs", "annotations": [{"segment": [1.7165006115444619, 40.24240322620905], "label": "Rollerblading"}]}, "SaG9e90z1j8": {"duration": 99.99000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SaG9e90z1j8", "annotations": [{"segment": [0.01, 90.89797659906397], "label": "Polishing forniture"}]}, "0Bd8Js6Pf6E": {"duration": 68.151, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0Bd8Js6Pf6E", "annotations": [{"segment": [5.42227592199688, 67.19369377847113], "label": "Playing congas"}]}, "A_SU2jjfMJk": {"duration": 62.97, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=A_SU2jjfMJk", "annotations": [{"segment": [12.13391496, 55.573987360000004], "label": "Cleaning shoes"}]}, "t_PZmjzVSoE": {"duration": 125.04, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=t_PZmjzVSoE", "annotations": []}, "6DXH6kwMe-Q": {"duration": 180.68, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=6DXH6kwMe-Q", "annotations": [{"segment": [16.770874873090165, 164.18545569033648], "label": "Painting"}]}, "U7OZcgwLRcc": {"duration": 199.599, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U7OZcgwLRcc", "annotations": [{"segment": [7.161868572702092, 175.9328582706708], "label": "Playing drums"}]}, "HlYwtqJALns": {"duration": 14.884, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=HlYwtqJALns", "annotations": [{"segment": [0.7894531076443059, 13.420702829953198], "label": "Baton twirling"}]}, "8C1EFngZC3Q": {"duration": 57.24, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8C1EFngZC3Q", "annotations": [{"segment": [4.330738889419522, 26.029080129191563], "label": "Doing step aerobics"}]}, "9MN6GNhvwzA": {"duration": 12.515, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=9MN6GNhvwzA", "annotations": []}, "gMV5Mso7758": {"duration": 177.146, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gMV5Mso7758", "annotations": [{"segment": [81.22885179407176, 84.82060374414976], "label": "Rock-paper-scissors"}, {"segment": [89.79379875195006, 94.21441653666146], "label": "Rock-paper-scissors"}, {"segment": [136.4865741029641, 139.54150546021842], "label": "Rock-paper-scissors"}, {"segment": [145.89686115444619, 150.87061778471138], "label": "Rock-paper-scissors"}]}, "_n0cR3Oshxg": {"duration": 218.454, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_n0cR3Oshxg", "annotations": [{"segment": [65.7491542999529, 84.11933187004637], "label": "Baking cookies"}, {"segment": [98.06705928437658, 102.48950944013981], "label": "Baking cookies"}, {"segment": [113.3755405927878, 160.33623133197676], "label": "Baking cookies"}]}, "nXVNITiaaTQ": {"duration": 116.49000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nXVNITiaaTQ", "annotations": [{"segment": [33.439798315132606, 116.49408163265306], "label": "Doing motocross"}]}, "EF74-5YIhAk": {"duration": 44.676, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=EF74-5YIhAk", "annotations": [{"segment": [10.45441287051482, 37.91467067706708], "label": "Canoeing"}]}, "LaWlIUKH3PU": {"duration": 176.728, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LaWlIUKH3PU", "annotations": [{"segment": [10.198435257410297, 24.531371294851795], "label": "Baton twirling"}, {"segment": [29.49277223088924, 165.67441653666148], "label": "Baton twirling"}]}, "spJaetMCD20": {"duration": 114.61, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=spJaetMCD20", "annotations": [{"segment": [12.337511753531366, 82.78649191137713], "label": "Springboard diving"}]}, "-8awLlFLcQc": {"duration": 161.12, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-8awLlFLcQc", "annotations": [{"segment": [95.26632926053043, 161.12326530612245], "label": "Vacuuming floor"}]}, "NHznDFD3V3k": {"duration": 61.21, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NHznDFD3V3k", "annotations": [{"segment": [12.213117004680187, 21.754614664586583], "label": "Vacuuming floor"}]}, "B42CY1Z6eV8": {"duration": 127.826, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=B42CY1Z6eV8", "annotations": [{"segment": [0.01, 127.761], "label": "Putting on shoes"}]}, "4sVEFT3jdlU": {"duration": 43.98, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=4sVEFT3jdlU", "annotations": []}, "B5hzlU0OepQ": {"duration": 154.6, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B5hzlU0OepQ", "annotations": [{"segment": [49.68360936886456, 116.97354541341653], "label": "Preparing salad"}]}, "Rj_SwlpOhNk": {"duration": 109.552, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=Rj_SwlpOhNk", "annotations": [{"segment": [64.20768587669362, 102.77262977355048], "label": "Canoeing"}]}, "s84BStnbfK8": {"duration": 169.09, "subset": "training", "resolution": "480x720", "url": "https://www.youtube.com/watch?v=s84BStnbfK8", "annotations": [{"segment": [133.98622966999997, 142.53188665], "label": "Playing squash"}, {"segment": [71.70641496, 115.92266545999999], "label": "Playing squash"}]}, "AjaR1IOAwcw": {"duration": 65.852, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AjaR1IOAwcw", "annotations": []}, "bdC2YPVHGZM": {"duration": 141.039, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bdC2YPVHGZM", "annotations": [{"segment": [50.58600624024961, 115.48443837753511], "label": "Hurling"}]}, "5n8wY8hwy3Y": {"duration": 175.334, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5n8wY8hwy3Y", "annotations": [{"segment": [139.06701237110872, 162.79020930890033], "label": "Changing car wheel"}]}, "vlqrUu4gi0Q": {"duration": 220.056, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vlqrUu4gi0Q", "annotations": [{"segment": [26.87282295292533, 77.25936500900009], "label": "Slacklining"}, {"segment": [79.31595856230928, 200.65497710962316], "label": "Slacklining"}]}, "ImXRDdhcCgs": {"duration": 213.82999999999998, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ImXRDdhcCgs", "annotations": [{"segment": [13.60826848, 210.4498205928237], "label": "Chopping wood"}]}, "MtFX7uTHwFQ": {"duration": 234.894, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MtFX7uTHwFQ", "annotations": [{"segment": [0.01, 186.86991950078004], "label": "Playing ten pins"}]}, "DbZ0ns9s2X0": {"duration": 181.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DbZ0ns9s2X0", "annotations": [{"segment": [18.634549141965678, 162.36439157566303], "label": "Painting"}]}, "WaXfGbfUYJg": {"duration": 223.77, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WaXfGbfUYJg", "annotations": [{"segment": [58.64810043830325, 80.99023393860925], "label": "Doing nails"}, {"segment": [100.88869658731927, 141.3838135566239], "label": "Doing nails"}, {"segment": [164.0750428928722, 166.51871374446816], "label": "Doing nails"}, {"segment": [184.3226013775245, 186.0680805572359], "label": "Doing nails"}, {"segment": [196.89004985335413, 201.77739151638065], "label": "Doing nails"}]}, "CIPSw35iF3k": {"duration": 205.45, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CIPSw35iF3k", "annotations": [{"segment": [114.5838460137045, 138.62241510748865], "label": "Preparing salad"}]}, "T84P-XmTJUE": {"duration": 149.63, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=T84P-XmTJUE", "annotations": []}, "pYb8jzB5vs0": {"duration": 194.653, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pYb8jzB5vs0", "annotations": [{"segment": [24.900992426091598, 78.65069558972836], "label": "Scuba diving"}, {"segment": [83.81309645855221, 99.90764034370899], "label": "Scuba diving"}, {"segment": [102.9443467371348, 126.63065660585606], "label": "Scuba diving"}, {"segment": [138.4738115402167, 152.746331589318], "label": "Scuba diving"}, {"segment": [167.9298619875195, 188.5794652698908], "label": "Scuba diving"}]}, "xGpaEqbVgBI": {"duration": 80.39, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xGpaEqbVgBI", "annotations": [{"segment": [19.52090592, 55.441201920000005], "label": "Playing field hockey"}, {"segment": [63.04884384, 66.26261728], "label": "Playing field hockey"}]}, "JpEhNDnRZZY": {"duration": 20.02, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JpEhNDnRZZY", "annotations": []}, "MCnvxOLnbsg": {"duration": 53.613, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=MCnvxOLnbsg", "annotations": [{"segment": [2.4555412489394906, 52.485103554235444], "label": "Hand car wash"}]}, "ekgfptN7tQk": {"duration": 159.591, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ekgfptN7tQk", "annotations": [{"segment": [10.207832568174727, 19.170807506084245], "label": "Surfing"}, {"segment": [23.901266501092046, 26.390981761622466], "label": "Surfing"}, {"segment": [29.627611600312015, 32.36629838689548], "label": "Surfing"}, {"segment": [38.34161501216849, 59.25522320062403], "label": "Surfing"}, {"segment": [62.242881513260535, 64.23465372168486], "label": "Surfing"}, {"segment": [67.72025508642746, 72.20174255538222], "label": "Surfing"}, {"segment": [74.94042934196568, 80.91574596723869], "label": "Surfing"}, {"segment": [89.62974937909516, 92.61740769173167], "label": "Surfing"}, {"segment": [98.34375279095164, 106.06187009859595], "label": "Surfing"}, {"segment": [110.54335756755071, 112.03718672386896], "label": "Surfing"}, {"segment": [116.51867419282372, 128.71827896942278], "label": "Surfing"}, {"segment": [131.7059372820593, 135.68948169890797], "label": "Surfing"}]}, "xYeqvN8cihg": {"duration": 36.061, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xYeqvN8cihg", "annotations": [{"segment": [11.417654353316406, 24.72974057761547], "label": "Playing rubik cube"}]}, "p9hJmlWGvFI": {"duration": 50.759, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p9hJmlWGvFI", "annotations": [{"segment": [0, 45.61166976599064], "label": "Beach soccer"}]}, "F51cKkjt6tk": {"duration": 31.42, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=F51cKkjt6tk", "annotations": [{"segment": [3.945454069422776, 22.91304071372855], "label": "Putting on makeup"}]}, "ksyr3tHc0Vs": {"duration": 38.69, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ksyr3tHc0Vs", "annotations": [{"segment": [11.821460218408737, 27.38236193447738], "label": "Shot put"}]}, "Z9pWpZK4k2M": {"duration": 228.438, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z9pWpZK4k2M", "annotations": [{"segment": [0.01, 211.81130692705227], "label": "Powerbocking"}]}, "GVmGXTEm46s": {"duration": 77.462, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GVmGXTEm46s", "annotations": [{"segment": [0.2, 76.01388350467289], "label": "Cleaning sink"}]}, "UF2v4emX7rc": {"duration": 69.24, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UF2v4emX7rc", "annotations": [{"segment": [18.03946870202808, 69.241314], "label": "Paintball"}]}, "yUvSujxnFE0": {"duration": 79.53, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yUvSujxnFE0", "annotations": []}, "nQUfEEVHMTs": {"duration": 19.67, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nQUfEEVHMTs", "annotations": []}, "Tix8XrlBZxg": {"duration": 200.249, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tix8XrlBZxg", "annotations": [{"segment": [37.47538221528861, 149.91650546021842], "label": "Removing ice from car"}]}, "cByxAZfw_hM": {"duration": 51.06, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=cByxAZfw_hM", "annotations": [{"segment": [10.992782960298005, 38.30832621], "label": "Long jump"}]}, "FRag7CCTP4k": {"duration": 62.35, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=FRag7CCTP4k", "annotations": [{"segment": [21.00052418096724, 54.3109423], "label": "High jump"}]}, "bh6VHVHMoo8": {"duration": 26.587, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=bh6VHVHMoo8", "annotations": [{"segment": [4.109307539717059, 22.630129969623603], "label": "Kneeling"}]}, "M0AAcy8nYOU": {"duration": 84.85, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=M0AAcy8nYOU", "annotations": []}, "C_FKTQZgANw": {"duration": 208.19, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=C_FKTQZgANw", "annotations": []}, "SnFWB9NLFjY": {"duration": 216.782, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SnFWB9NLFjY", "annotations": [{"segment": [45.676889516107565, 204.0418571272434], "label": "Skiing"}]}, "cnMvMvVBXiI": {"duration": 144.893, "subset": "testing", "resolution": "1200x720", "url": "https://www.youtube.com/watch?v=cnMvMvVBXiI", "annotations": []}, "DNlnKpM-QSw": {"duration": 128.012, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DNlnKpM-QSw", "annotations": []}, "09G6PvM5q9Y": {"duration": 203.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=09G6PvM5q9Y", "annotations": [{"segment": [88.79335153, 188.75151295085806], "label": "Cleaning shoes"}, {"segment": [46.73066895, 67.88394284], "label": "Cleaning shoes"}]}, "hVTYRjSDyC4": {"duration": 136.07, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hVTYRjSDyC4", "annotations": []}, "hzU9--vcDMY": {"duration": 124.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hzU9--vcDMY", "annotations": [{"segment": [11.952203588143528, 108.5415561622465], "label": "Paintball"}]}, "7Xl0K-revJc": {"duration": 136.44, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=7Xl0K-revJc", "annotations": [{"segment": [0, 136.44045351473923], "label": "Playing saxophone"}]}, "0-kEI13BtcY": {"duration": 158.244, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0-kEI13BtcY", "annotations": [{"segment": [57.52082595475819, 62.45823590795632], "label": "Spread mulch"}, {"segment": [105.90744349609984, 109.61050096099845], "label": "Spread mulch"}, {"segment": [115.7822634024961, 117.01661589079563], "label": "Spread mulch"}, {"segment": [139.2349606801872, 149.85039207956316], "label": "Spread mulch"}]}, "14X-i7vqXWU": {"duration": 211.14, "subset": "training", "resolution": "612x360", "url": "https://www.youtube.com/watch?v=14X-i7vqXWU", "annotations": [{"segment": [84.60762008, 201.05641263650546], "label": "Cheerleading"}]}, "hPJw9_nPo_s": {"duration": 204.289, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=hPJw9_nPo_s", "annotations": [{"segment": [6.491422364049953, 196.46097439676024], "label": "Waterskiing"}]}, "n5RVQbgz1Rs": {"duration": 214.414, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=n5RVQbgz1Rs", "annotations": [{"segment": [5.852289391575663, 57.686852574102964], "label": "Hurling"}]}, "-EIsT868Trw": {"duration": 157.9, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-EIsT868Trw", "annotations": []}, "lQq_JPRFEMw": {"duration": 171.8, "subset": "training", "resolution": "346x240", "url": "https://www.youtube.com/watch?v=lQq_JPRFEMw", "annotations": [{"segment": [13.93345808424337, 147.10516323556942], "label": "Playing field hockey"}]}, "E4yf1mz4ij0": {"duration": 89.49, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E4yf1mz4ij0", "annotations": [{"segment": [17.15495007800312, 39.74927457098284], "label": "Shot put"}, {"segment": [44.07287987519501, 57.32263806552262], "label": "Shot put"}, {"segment": [58.71734945397815, 71.13028081123244], "label": "Shot put"}, {"segment": [73.64076131045242, 85.7747503900156], "label": "Shot put"}]}, "ztogHoQpE4A": {"duration": 52.942, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ztogHoQpE4A", "annotations": [{"segment": [0.01, 52.901], "label": "Welding"}]}, "CBoitanoH4s": {"duration": 226.49, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CBoitanoH4s", "annotations": [{"segment": [69.6069036121989, 226.487437], "label": "Starting a campfire"}]}, "V1IHwwpyFUE": {"duration": 134.19, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V1IHwwpyFUE", "annotations": [{"segment": [21.980892950003714, 99.19713712], "label": "Playing kickball"}]}, "nypn_gYS8Ik": {"duration": 61.12, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nypn_gYS8Ik", "annotations": [{"segment": [1.00043618, 59.84222776911077], "label": "Brushing hair"}]}, "nfIM66dU_J0": {"duration": 94.22999999999999, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=nfIM66dU_J0", "annotations": [{"segment": [37.474656786271446, 86.7060686427457], "label": "Doing kickboxing"}]}, "Keuj_3QyLq0": {"duration": 8.987, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Keuj_3QyLq0", "annotations": [{"segment": [3.6168786552262095, 6.140282368174728], "label": "Doing a powerbomb"}]}, "9wH6BIpe9X4": {"duration": 92.3, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9wH6BIpe9X4", "annotations": [{"segment": [11.087440507144095, 75.74017800984149], "label": "Horseback riding"}]}, "jelxK3R-heg": {"duration": 18.019, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=jelxK3R-heg", "annotations": [{"segment": [1.2367989516380655, 17.5119487925117], "label": "Blowing leaves"}]}, "wzhUvh_-eok": {"duration": 153.647, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wzhUvh_-eok", "annotations": []}, "0yy1Df51_QE": {"duration": 111.456, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=0yy1Df51_QE", "annotations": []}, "IjwOh2YmT9U": {"duration": 176.26, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IjwOh2YmT9U", "annotations": [{"segment": [6.09161472, 61.433443605304205], "label": "Skateboarding"}, {"segment": [62.252213160000004, 163.13534129641184], "label": "Skateboarding"}]}, "m4NBHYqtBP8": {"duration": 146.635, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m4NBHYqtBP8", "annotations": []}, "aAY_M6M26TI": {"duration": 72.73, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aAY_M6M26TI", "annotations": [{"segment": [2.097580593603744, 50.10029084000001], "label": "Ping-pong"}]}, "FPbPkjLAEzs": {"duration": 78.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FPbPkjLAEzs", "annotations": [{"segment": [0.49206448049922, 78.48428463962559], "label": "Windsurfing"}, {"segment": [34.88844418, 43.102038279999995], "label": "Windsurfing"}]}, "XxM72xEvsjE": {"duration": 83.92, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XxM72xEvsjE", "annotations": [{"segment": [2.667066, 63.18430320000001], "label": "Mowing the lawn"}]}, "XilAaJ_r4tA": {"duration": 105.187, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=XilAaJ_r4tA", "annotations": []}, "XQ0_P0LpCFM": {"duration": 165.233, "subset": "training", "resolution": "488x272", "url": "https://www.youtube.com/watch?v=XQ0_P0LpCFM", "annotations": [{"segment": [0.2, 18.559735107771658], "label": "Hurling"}, {"segment": [40.728307597610026, 56.710301718191175], "label": "Hurling"}, {"segment": [60.31913910025789, 76.30113322083905], "label": "Hurling"}, {"segment": [93.3142237362964, 165.0331972789116], "label": "Hurling"}]}, "V50vtrHboIE": {"duration": 219.569, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V50vtrHboIE", "annotations": [{"segment": [119.70190959257064, 147.70191270299054], "label": "Hula hoop"}]}, "LLFhSU-XuTI": {"duration": 59.98, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=LLFhSU-XuTI", "annotations": [{"segment": [0.01, 5.801221290795632], "label": "Spinning"}, {"segment": [15.62587025101404, 33.21667029407177], "label": "Spinning"}]}, "onBAyGhqubg": {"duration": 77.37, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=onBAyGhqubg", "annotations": [{"segment": [0, 62.16065024180968], "label": "Springboard diving"}]}, "s-kFpQRPcyE": {"duration": 163.56, "subset": "training", "resolution": "1920x800", "url": "https://www.youtube.com/watch?v=s-kFpQRPcyE", "annotations": [{"segment": [1.48513388, 57.06806851], "label": "Getting a haircut"}, {"segment": [64.4921023, 147.05996329875194], "label": "Getting a haircut"}]}, "0pcrpO0Gd8M": {"duration": 167.95, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0pcrpO0Gd8M", "annotations": [{"segment": [86.9881082916786, 167.7499319727891], "label": "Putting on makeup"}]}, "674t-jyyKeg": {"duration": 17.532, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=674t-jyyKeg", "annotations": [{"segment": [0.7636006240249611, 12.572138845553823], "label": "Having an ice cream"}]}, "UaV-pj2Lu2Y": {"duration": 13.329, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UaV-pj2Lu2Y", "annotations": []}, "E5YBb4rzV30": {"duration": 78.81, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E5YBb4rzV30", "annotations": []}, "gHCTTjqJ36c": {"duration": 91.045, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=gHCTTjqJ36c", "annotations": []}, "aEAmMKa8iuM": {"duration": 40.612, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aEAmMKa8iuM", "annotations": [{"segment": [0.01, 40.611678], "label": "BMX"}]}, "U_ZW0tTvf0k": {"duration": 85.8, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=U_ZW0tTvf0k", "annotations": [{"segment": [20.31844554, 68.56638849000001], "label": "Getting a piercing"}, {"segment": [4.76087553, 13.009399109999999], "label": "Getting a piercing"}]}, "im-nQ-KEzaQ": {"duration": 129.38, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=im-nQ-KEzaQ", "annotations": [{"segment": [55.85292846, 62.186965889999996], "label": "Cleaning windows"}, {"segment": [5.18192353, 39.45795931], "label": "Cleaning windows"}]}, "0-igBOtXYeE": {"duration": 154.344, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0-igBOtXYeE", "annotations": [{"segment": [0.01, 131.20896255850235], "label": "Doing a powerbomb"}]}, "sJK-NjgOB6g": {"duration": 97.55, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=sJK-NjgOB6g", "annotations": [{"segment": [20.834258970358814, 71.93141965678628], "label": "Brushing teeth"}]}, "GasbPKADdTo": {"duration": 235.96, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GasbPKADdTo", "annotations": []}, "rq-Xyoo5Rs0": {"duration": 130.64, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=rq-Xyoo5Rs0", "annotations": [{"segment": [0.01, 130.581], "label": "Tumbling"}]}, "7vQCa7Vg1cQ": {"duration": 26.332, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7vQCa7Vg1cQ", "annotations": []}, "TrRsujPlt5w": {"duration": 63.414, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TrRsujPlt5w", "annotations": [{"segment": [0.01, 63.413696], "label": "Elliptical trainer"}]}, "ktYgm8bRK-g": {"duration": 151.418, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ktYgm8bRK-g", "annotations": [{"segment": [19.205157515993093, 118.34459037571033], "label": "Hula hoop"}]}, "0_BvBpwDMpU": {"duration": 57.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0_BvBpwDMpU", "annotations": [{"segment": [0, 57.167528344671204], "label": "Using the pommel horse"}]}, "aEFEaypwmFM": {"duration": 75.21000000000001, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aEFEaypwmFM", "annotations": []}, "ZzarNWOJnro": {"duration": 79.25, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=ZzarNWOJnro", "annotations": [{"segment": [0.01, 43.02480029953198], "label": "Chopping wood"}]}, "E1-eVfnNfXI": {"duration": 49.691, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E1-eVfnNfXI", "annotations": [{"segment": [0.01, 49.28531694041457], "label": "Having an ice cream"}]}, "abWGVKYPffM": {"duration": 22.036, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=abWGVKYPffM", "annotations": [{"segment": [0.2, 7.494212867394696], "label": "Cutting the grass"}, {"segment": [12.547649984399376, 21.107553672386896], "label": "Cutting the grass"}]}, "x2DTmwZa8yE": {"duration": 16.069, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=x2DTmwZa8yE", "annotations": [{"segment": [0.01, 16.068208616780044], "label": "Peeling potatoes"}]}, "ol4ScGsyhbI": {"duration": 222.308, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ol4ScGsyhbI", "annotations": [{"segment": [17.340705601013156, 37.45592409818842], "label": "Arm wrestling"}, {"segment": [40.57725110637079, 50.28804624293815], "label": "Arm wrestling"}, {"segment": [48.9007897948571, 57.22432848334341], "label": "Arm wrestling"}, {"segment": [64.85423894778921, 90.17166912526841], "label": "Arm wrestling"}, {"segment": [97.8015795897142, 111.32732995850446], "label": "Arm wrestling"}, {"segment": [116.52954163880841, 128.66803555951762], "label": "Arm wrestling"}, {"segment": [134.21706135184183, 180.34333825053682], "label": "Arm wrestling"}, {"segment": [182.4242229226584, 201.84581155694227], "label": "Arm wrestling"}, {"segment": [208.08846552262088, 212.2502348330733], "label": "Arm wrestling"}]}, "otWTm1_aAqI": {"duration": 40.59, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=otWTm1_aAqI", "annotations": [{"segment": [3.2293450624024955, 36.599244040561615], "label": "Using the pommel horse"}]}, "tNFK-TZ7jv8": {"duration": 84.707, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tNFK-TZ7jv8", "annotations": []}, "BD4i-g3U5Kw": {"duration": 181.604, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BD4i-g3U5Kw", "annotations": [{"segment": [0.01, 181.603174], "label": "Playing ice hockey"}]}, "eQ0sW4hJ9zU": {"duration": 179.49, "subset": "testing", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=eQ0sW4hJ9zU", "annotations": []}, "00Dk03Jr70M": {"duration": 152.347, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=00Dk03Jr70M", "annotations": [{"segment": [13.091624758230125, 152.3461224489796], "label": "Plastering"}]}, "aoY0XhAXm7M": {"duration": 138.252, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=aoY0XhAXm7M", "annotations": [{"segment": [0.2, 117.73582839313572], "label": "Making a cake"}]}, "f0kwKCAseXU": {"duration": 23.99, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=f0kwKCAseXU", "annotations": []}, "r1dvt537JQU": {"duration": 178.66, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=r1dvt537JQU", "annotations": [{"segment": [20.206613108061738, 100.75435363536991], "label": "Using parallel bars"}]}, "34KalqGygZ0": {"duration": 104.97999999999999, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=34KalqGygZ0", "annotations": [{"segment": [33.245476897035886, 37.99483073946958], "label": "Horseback riding"}, {"segment": [41.597788826833074, 43.235497048361935], "label": "Horseback riding"}, {"segment": [46.019601024961, 54.04437131045242], "label": "Horseback riding"}, {"segment": [57.31978775351014, 99.24511822464899], "label": "Horseback riding"}]}, "iYH0Ol3xIZU": {"duration": 141.25, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iYH0Ol3xIZU", "annotations": [{"segment": [9.911302652106084, 133.91271138845556], "label": "Tai chi"}]}, "ssktVpcv9WI": {"duration": 76.86, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ssktVpcv9WI", "annotations": [{"segment": [6.528805223868955, 21.503129131825272], "label": "Washing hands"}]}, "KiZQNharaCE": {"duration": 13.909, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=KiZQNharaCE", "annotations": []}, "nSuPseBeQI0": {"duration": 30.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nSuPseBeQI0", "annotations": [{"segment": [0.01, 1.2206233385335414], "label": "Skateboarding"}, {"segment": [25.914772418096724, 28.30907204368175], "label": "Skateboarding"}]}, "shLUZZS9oYc": {"duration": 52.408, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=shLUZZS9oYc", "annotations": [{"segment": [0, 52.361], "label": "Trimming branches or hedges"}]}, "lmqLciDIUgk": {"duration": 89.65, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lmqLciDIUgk", "annotations": []}, "yu1XjQUctiM": {"duration": 94.32, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yu1XjQUctiM", "annotations": [{"segment": [24.616315565522623, 88.83918065288611], "label": "Brushing teeth"}]}, "ul9kg5QdvJo": {"duration": 72.981, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ul9kg5QdvJo", "annotations": [{"segment": [5.797053042121685, 55.924511700468024], "label": "Having an ice cream"}]}, "pwKZRo19Vf0": {"duration": 129.196, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pwKZRo19Vf0", "annotations": [{"segment": [18.333589703588142, 120.27640717628704], "label": "Making an omelette"}]}, "9fh_ray_B80": {"duration": 56.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9fh_ray_B80", "annotations": [{"segment": [2.278308623501403, 56.16907], "label": "Rock climbing"}]}, "op58Lalekrk": {"duration": 205.59, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=op58Lalekrk", "annotations": [{"segment": [3.5279625585023404, 71.84214664586584], "label": "Playing flauta"}, {"segment": [83.06748205928238, 99.42439937597504], "label": "Playing flauta"}, {"segment": [102.63163806552262, 172.22871762870517], "label": "Playing flauta"}, {"segment": [188.26491107644307, 200.13169422776912], "label": "Playing flauta"}]}, "zy7rd78yBnY": {"duration": 209.63, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zy7rd78yBnY", "annotations": [{"segment": [8.173985959438378, 209.581], "label": "Painting"}]}, "Qm-XTJ-uG5s": {"duration": 70.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Qm-XTJ-uG5s", "annotations": [{"segment": [0.6565125263545153, 70.35643633039807], "label": "Playing racquetball"}]}, "1RQ27XZKU1E": {"duration": 227.161, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1RQ27XZKU1E", "annotations": [{"segment": [71.12044249475491, 148.96371288702397], "label": "Braiding hair"}]}, "E5FiPYZARLE": {"duration": 189.684, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=E5FiPYZARLE", "annotations": [{"segment": [0, 2.6632672164029416], "label": "Playing beach volleyball"}, {"segment": [17.755114776019614, 20.418381992422553], "label": "Playing beach volleyball"}, {"segment": [33.73471807443726, 39.06125250724315], "label": "Playing beach volleyball"}, {"segment": [65.98984325087288, 74.57148205928236], "label": "Playing beach volleyball"}, {"segment": [100.02047990491047, 102.38782854171309], "label": "Playing beach volleyball"}, {"segment": [126.35723348933956, 129.61233786494316], "label": "Playing beach volleyball"}, {"segment": [154.469498124805, 160.387869700468], "label": "Playing beach volleyball"}]}, "eS0v1PJ-6Y8": {"duration": 52.106, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eS0v1PJ-6Y8", "annotations": []}, "gNFrILoB8LY": {"duration": 16.463, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=gNFrILoB8LY", "annotations": [{"segment": [0.01, 9.541422776911075], "label": "Mooping floor"}, {"segment": [13.388770670826833, 16.441], "label": "Mooping floor"}]}, "3abD7z6vRPM": {"duration": 78.03999999999999, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3abD7z6vRPM", "annotations": [{"segment": [5.478786322932917, 18.50612269079563], "label": "Snatch"}, {"segment": [58.92739067332293, 73.17223511294853], "label": "Snatch"}]}, "Kwhp0DNop9I": {"duration": 195.466, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Kwhp0DNop9I", "annotations": [{"segment": [14.941958745016466, 71.965351800312], "label": "Scuba diving"}, {"segment": [84.46780698595944, 111.91222080811234], "label": "Scuba diving"}, {"segment": [117.4011035725429, 140.271448424337], "label": "Scuba diving"}]}, "qHU7T2LBToI": {"duration": 235.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qHU7T2LBToI", "annotations": [{"segment": [21.328889235569424, 235.00545865834633], "label": "Cheerleading"}]}, "iXaW8uyzBNs": {"duration": 191.844, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=iXaW8uyzBNs", "annotations": [{"segment": [85.24790171606864, 133.1063728549142], "label": "Painting fence"}, {"segment": [160.02676287051483, 168.10287987519501], "label": "Painting fence"}]}, "qCe2NUqGiPg": {"duration": 53.94, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qCe2NUqGiPg", "annotations": []}, "c975-fpWTS4": {"duration": 18.576, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=c975-fpWTS4", "annotations": []}, "m731tx8N5-Y": {"duration": 99.568, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=m731tx8N5-Y", "annotations": []}, "DDwBAbeGejE": {"duration": 143.43, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=DDwBAbeGejE", "annotations": [{"segment": [23.606597684315535, 27.186745200420262], "label": "Raking leaves"}, {"segment": [47.548834198265894, 53.59033313169261], "label": "Raking leaves"}, {"segment": [56.05168454901461, 58.73679518609316], "label": "Raking leaves"}, {"segment": [70.81979305294661, 76.63753276661679], "label": "Raking leaves"}]}, "15yU0R2MBQM": {"duration": 58.26, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=15yU0R2MBQM", "annotations": []}, "HPKHDze6XfA": {"duration": 57.331, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HPKHDze6XfA", "annotations": []}, "2n6MvpNewpI": {"duration": 77.091, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=2n6MvpNewpI", "annotations": [{"segment": [0.2, 75.52923192122125], "label": "Mooping floor"}]}, "hvVu0YlsiIA": {"duration": 166.72, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hvVu0YlsiIA", "annotations": []}, "MNSDK-vCwTM": {"duration": 84.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MNSDK-vCwTM", "annotations": [{"segment": [37.93931509369218, 64.72000708424336], "label": "Gargling mouthwash"}]}, "eWKFbwBJK2w": {"duration": 49.85, "subset": "testing", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=eWKFbwBJK2w", "annotations": []}, "MJgqA2vwCi4": {"duration": 90.883, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MJgqA2vwCi4", "annotations": []}, "g5-CU3qfSWY": {"duration": 70.71000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g5-CU3qfSWY", "annotations": [{"segment": [2.867899858851497, 4.52245746972736], "label": "Pole vault"}, {"segment": [10.589168709605529, 13.67767624990714], "label": "Pole vault"}, {"segment": [17.648614516009214, 20.185602594383777], "label": "Pole vault"}, {"segment": [26.142009917316695, 29.782036614664587], "label": "Pole vault"}, {"segment": [32.42932875819033, 34.52510170514821], "label": "Pole vault"}, {"segment": [39.488774474258975, 44.34214340405616], "label": "Pole vault"}, {"segment": [50.29855072698908, 51.29128528081124], "label": "Pole vault"}, {"segment": [57.247692603744156, 59.12285787207489], "label": "Pole vault"}, {"segment": [62.873188408736354, 70.59445716068643], "label": "Pole vault"}]}, "1HsdK9wc-ZU": {"duration": 48.089, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1HsdK9wc-ZU", "annotations": []}, "kgvbU_3jEy8": {"duration": 163.886, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=kgvbU_3jEy8", "annotations": [{"segment": [12.783653659071533, 52.41298000219329], "label": "Futsal"}, {"segment": [59.57182605127334, 136.52942107888398], "label": "Futsal"}, {"segment": [143.17691931357254, 156.98326510452418], "label": "Futsal"}]}, "GqCS5MNl6a4": {"duration": 194.793, "subset": "training", "resolution": "426x236", "url": "https://www.youtube.com/watch?v=GqCS5MNl6a4", "annotations": [{"segment": [31.604350628446905, 160.75674502354244], "label": "Removing ice from car"}]}, "wdsZn1Yu_5M": {"duration": 119.212, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wdsZn1Yu_5M", "annotations": [{"segment": [1.668583834324295, 3.893366052032529], "label": "Wakeboarding"}, {"segment": [19.09604453970546, 24.287203047691342], "label": "Wakeboarding"}, {"segment": [31.703143773385456, 32.815534882239575], "label": "Wakeboarding"}, {"segment": [44.68104004335015, 48.01821336991251], "label": "Wakeboarding"}, {"segment": [53.023973359756035, 54.87795854117956], "label": "Wakeboarding"}, {"segment": [60.6253126035925, 66.92886222043249], "label": "Wakeboarding"}, {"segment": [69.89523851071014, 77.86737402951904], "label": "Wakeboarding"}, {"segment": [81.57534435611313, 83.05853248675076], "label": "Wakeboarding"}, {"segment": [99.55900044009448, 103.08157225035886], "label": "Wakeboarding"}]}, "c3SJUucMqJQ": {"duration": 183.113, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c3SJUucMqJQ", "annotations": [{"segment": [50.27734018204266, 79.12967744560123], "label": "Clipping cat claws"}, {"segment": [87.41401190741507, 125.97901716068644], "label": "Clipping cat claws"}]}, "jVxsUaM12rs": {"duration": 191.91, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jVxsUaM12rs", "annotations": []}, "dsVvnFkGAn0": {"duration": 82.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dsVvnFkGAn0", "annotations": [{"segment": [26.61037926142896, 65.23418217000788], "label": "Playing violin"}]}, "1wLF2M60OrI": {"duration": 87.87, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1wLF2M60OrI", "annotations": []}, "0DFz3sgfda0": {"duration": 92.03999999999999, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0DFz3sgfda0", "annotations": []}, "mfq5Y7S3BWs": {"duration": 101.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mfq5Y7S3BWs", "annotations": [{"segment": [20.778436037441498, 91.25130655226208], "label": "Spinning"}]}, "FFCmoQp_xVA": {"duration": 68.11, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FFCmoQp_xVA", "annotations": [{"segment": [10.35905207636877, 56.895101404056156], "label": "Mixing drinks"}]}, "xE9anOqq2XQ": {"duration": 153.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xE9anOqq2XQ", "annotations": [{"segment": [7.42390128802431, 12.692476395654465], "label": "Javelin throw"}, {"segment": [26.103394851440317, 28.977163091965856], "label": "Javelin throw"}, {"segment": [65.37822739469578, 68.01251494539781], "label": "Javelin throw"}, {"segment": [83.81824024960997, 87.88941191887676], "label": "Javelin throw"}, {"segment": [105.85046340093604, 108.72423163806552], "label": "Javelin throw"}, {"segment": [125.24839900156005, 127.88268655226209], "label": "Javelin throw"}]}, "kdGhUyT0fiM": {"duration": 236.473, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=kdGhUyT0fiM", "annotations": []}, "nxs4o4S43oY": {"duration": 172.43, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=nxs4o4S43oY", "annotations": [{"segment": [18.554864274570985, 167.2626895475819], "label": "Shaving"}]}, "qY7LG7r_IA8": {"duration": 228.65, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=qY7LG7r_IA8", "annotations": [{"segment": [12.206156900090251, 214.14196338607158], "label": "Cumbia"}]}, "BNa85xIhNqA": {"duration": 109.041, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BNa85xIhNqA", "annotations": [{"segment": [0, 94.20679251170047], "label": "Kite flying"}]}, "NbO4k5EtU4Y": {"duration": 117.12, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NbO4k5EtU4Y", "annotations": [{"segment": [7.674104368174727, 117.12145], "label": "Hammer throw"}]}, "fevlUD03yCM": {"duration": 87.354, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fevlUD03yCM", "annotations": [{"segment": [0, 6.81384305772231], "label": "Applying sunscreen"}, {"segment": [41.97327323556942, 62.278525547581914], "label": "Applying sunscreen"}]}, "nc_NMp3vAH0": {"duration": 66.432, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nc_NMp3vAH0", "annotations": []}, "zdaD55zVNdQ": {"duration": 51.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zdaD55zVNdQ", "annotations": [{"segment": [8.677512388140242, 16.50414322926174], "label": "Discus throw"}]}, "9n_cwQLpo_c": {"duration": 61.626, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9n_cwQLpo_c", "annotations": [{"segment": [0.01, 61.625758], "label": "Snowboarding"}]}, "pcBmq0G-q30": {"duration": 13.166, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pcBmq0G-q30", "annotations": []}, "JHHHuKeA-WQ": {"duration": 107.602, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JHHHuKeA-WQ", "annotations": [{"segment": [18.102383775351015, 86.3376209048362], "label": "Hitting a pinata"}]}, "EEJsmkVet3I": {"duration": 13.422, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EEJsmkVet3I", "annotations": []}, "tOVv0cAyjcg": {"duration": 185.64, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tOVv0cAyjcg", "annotations": [{"segment": [18.53535580655226, 175.79626522776908], "label": "Getting a haircut"}]}, "eQ9NP0__xvM": {"duration": 14.54, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=eQ9NP0__xvM", "annotations": []}, "_4oBeoCuU7M": {"duration": 124.761, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=_4oBeoCuU7M", "annotations": [{"segment": [58.36790746158826, 119.5726037232705], "label": "Welding"}]}, "fq3x7MPEtY0": {"duration": 30.651, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fq3x7MPEtY0", "annotations": []}, "E22gU_8tafI": {"duration": 82.55, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E22gU_8tafI", "annotations": [{"segment": [0.01, 82.546937], "label": "Triple jump"}]}, "t1-GV2bAL4I": {"duration": 5.32, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=t1-GV2bAL4I", "annotations": [{"segment": [0.11959672386895476, 3.212615444617785], "label": "Playing harmonica"}, {"segment": [4.029172386895476, 5.1426591263650545], "label": "Playing harmonica"}]}, "3NFWgG4_6NI": {"duration": 190.149, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3NFWgG4_6NI", "annotations": [{"segment": [9.32832216597419, 170.1433517149633], "label": "Hula hoop"}]}, "D_y9uXMbImA": {"duration": 49.598, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D_y9uXMbImA", "annotations": [{"segment": [0.20000000000000004, 47.125656417445484], "label": "Cutting the grass"}]}, "P_sjOVZiv6A": {"duration": 57.28, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=P_sjOVZiv6A", "annotations": [{"segment": [12.243867394695787, 32.26303744149766], "label": "Doing nails"}]}, "aTXsp_UF3VY": {"duration": 231.11, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aTXsp_UF3VY", "annotations": []}, "5WJL9eJ8YDw": {"duration": 83.2, "subset": "training", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=5WJL9eJ8YDw", "annotations": [{"segment": [3.5632735023400937, 68.09091729017162], "label": "Shot put"}]}, "gdr6iVHHYcU": {"duration": 134.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gdr6iVHHYcU", "annotations": [{"segment": [52.85394179304588, 87.17741793753383], "label": "Putting on makeup"}, {"segment": [100.2329722620905, 114.13082135725429], "label": "Putting on makeup"}]}, "hkpYSfc5Ewc": {"duration": 47.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hkpYSfc5Ewc", "annotations": [{"segment": [1.9678627145085803, 46.597503900156006], "label": "Using parallel bars"}]}, "6t1HVCqCbU0": {"duration": 140.853, "subset": "testing", "resolution": "300x170", "url": "https://www.youtube.com/watch?v=6t1HVCqCbU0", "annotations": []}, "HVU5EmblAP8": {"duration": 174.359, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HVU5EmblAP8", "annotations": [{"segment": [16.014640186915887, 174.281], "label": "Playing congas"}]}, "c-KBzCiSPcU": {"duration": 167.09, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=c-KBzCiSPcU", "annotations": []}, "3Hbm8FdirRc": {"duration": 73.4, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3Hbm8FdirRc", "annotations": [{"segment": [1.2023118639031276, 60.51636246099844], "label": "Mowing the lawn"}]}, "Ls8ha6c0ye8": {"duration": 214.57999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ls8ha6c0ye8", "annotations": [{"segment": [117.52312324492982, 184.82268954758192], "label": "Horseback riding"}]}, "7Sfl-Fmr_3E": {"duration": 71.216, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=7Sfl-Fmr_3E", "annotations": [{"segment": [27.611808112324493, 43.3705007800312], "label": "Painting fence"}]}, "0Qed9-9yp9Q": {"duration": 209.63, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0Qed9-9yp9Q", "annotations": [{"segment": [47.09311089178261, 67.69634690693749], "label": "Hammer throw"}, {"segment": [129.17901946009812, 202.7620037129485], "label": "Hammer throw"}, {"segment": [88.95365324180968, 107.26764067394696], "label": "Hammer throw"}]}, "AKShRE_4eTA": {"duration": 202.99, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AKShRE_4eTA", "annotations": [{"segment": [24.308121350916316, 177.73343716429113], "label": "Mowing the lawn"}]}, "szdKUpvx9Sk": {"duration": 167.3, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=szdKUpvx9Sk", "annotations": [{"segment": [95.7858949921997, 132.06447647425898], "label": "Playing blackjack"}]}, "2CTWueZkvUg": {"duration": 151.58, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2CTWueZkvUg", "annotations": []}, "ZYrQ-TSPQfU": {"duration": 147.006, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZYrQ-TSPQfU", "annotations": [{"segment": [0.2, 136.88890654205608], "label": "Rafting"}]}, "HVKveVRZ-JY": {"duration": 27.1, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=HVKveVRZ-JY", "annotations": [{"segment": [0.01, 27.097686], "label": "Playing water polo"}]}, "L0K-WJuI1-I": {"duration": 136.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L0K-WJuI1-I", "annotations": [{"segment": [18.305502320000002, 35.558987834343306], "label": "Vacuuming floor"}, {"segment": [73.0343283064656, 81.55145114103885], "label": "Vacuuming floor"}, {"segment": [107.10281964475858, 136.2868934240363], "label": "Vacuuming floor"}]}, "AY2usHrPYL0": {"duration": 226.488, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=AY2usHrPYL0", "annotations": [{"segment": [29.66877379095164, 203.46099219968798], "label": "River tubing"}]}, "NLdyQ1oMmAo": {"duration": 151.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NLdyQ1oMmAo", "annotations": [{"segment": [38.883400650000006, 141.7591299524181], "label": "Skateboarding"}, {"segment": [4.12090605, 37.590326230109206], "label": "Skateboarding"}]}, "SIKxtWHWybU": {"duration": 9.126, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SIKxtWHWybU", "annotations": [{"segment": [1.2052496099843995, 8.011365054602184], "label": "Doing crunches"}]}, "UYFDYX4i2EY": {"duration": 121.37, "subset": "training", "resolution": "426x214", "url": "https://www.youtube.com/watch?v=UYFDYX4i2EY", "annotations": [{"segment": [7.664040561622465, 111.55436817472699], "label": "Breakdancing"}]}, "36lphp9tnMk": {"duration": 151.35, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=36lphp9tnMk", "annotations": [{"segment": [0.01, 143.08375004368176], "label": "Drinking beer"}]}, "7xYnGM84888": {"duration": 221.171, "subset": "testing", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=7xYnGM84888", "annotations": []}, "XNzQVEmoJrY": {"duration": 81.78, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=XNzQVEmoJrY", "annotations": [{"segment": [9.053995319812792, 10.839290171606864], "label": "Playing squash"}, {"segment": [20.148327613104524, 78.93553666146646], "label": "Playing squash"}]}, "l8boztVhl7w": {"duration": 230.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l8boztVhl7w", "annotations": [{"segment": [51.219354100000004, 121.1074878], "label": "Volleyball"}, {"segment": [14.375361560062403, 213.47411916692667], "label": "Volleyball"}]}, "wEehiYq9ttE": {"duration": 18.72, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wEehiYq9ttE", "annotations": []}, "Ivq9_wPFNs4": {"duration": 20.69, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ivq9_wPFNs4", "annotations": []}, "gY-TqRhcWY8": {"duration": 34.087, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gY-TqRhcWY8", "annotations": [{"segment": [0.2, 32.253191887675506], "label": "Table soccer"}]}, "okC18bPTils": {"duration": 89.977, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=okC18bPTils", "annotations": [{"segment": [0.01, 32.82607488299532], "label": "Scuba diving"}, {"segment": [34.790028081123246, 67.61610296411857], "label": "Scuba diving"}, {"segment": [73.22739781591264, 84.87083463338534], "label": "Scuba diving"}]}, "UYcpD3r6Ol0": {"duration": 233.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UYcpD3r6Ol0", "annotations": [{"segment": [19.47895213026521, 208.80708498517942], "label": "Doing nails"}]}, "deuSw3RnNLU": {"duration": 211.79, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=deuSw3RnNLU", "annotations": [{"segment": [11.564153232795979, 191.63453806552263], "label": "Making a lemonade"}]}, "ADy-2Y0jmXQ": {"duration": 233.62, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ADy-2Y0jmXQ", "annotations": [{"segment": [25.86582059282371, 33.51627457098284], "label": "Drinking coffee"}, {"segment": [95.81282839313572, 97.63436505460217], "label": "Drinking coffee"}]}, "E3h0UHRsSQk": {"duration": 112.849, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E3h0UHRsSQk", "annotations": [{"segment": [4.4012756428624495, 45.24511360862597], "label": "Sharpening knives"}, {"segment": [58.096838485784325, 89.43391988767551], "label": "Sharpening knives"}]}, "WRXSn7DyaoI": {"duration": 141.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WRXSn7DyaoI", "annotations": [{"segment": [6.1709204368174735, 140.82921996879875], "label": "Cumbia"}]}, "P7lUYSLtErY": {"duration": 150.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P7lUYSLtErY", "annotations": [{"segment": [9.301048361934477, 140.2221341653666], "label": "Tango"}]}, "9wukG78Z0u8": {"duration": 85.845, "subset": "training", "resolution": "1273x720", "url": "https://www.youtube.com/watch?v=9wukG78Z0u8", "annotations": [{"segment": [12.983926677067084, 22.353770670826833], "label": "Kneeling"}, {"segment": [32.928308892355695, 40.8257488299532], "label": "Kneeling"}, {"segment": [50.32944773790951, 59.43158190327614], "label": "Kneeling"}]}, "memk6ryZCjE": {"duration": 182.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=memk6ryZCjE", "annotations": [{"segment": [12.263420218408735, 178.53258271450858], "label": "Doing karate"}]}, "qn22hNGngQE": {"duration": 137.88, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qn22hNGngQE", "annotations": []}, "6Ni6csyQbzw": {"duration": 73.955, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6Ni6csyQbzw", "annotations": [{"segment": [19.882172386895476, 59.4159992199688], "label": "BMX"}]}, "xMImWgn3owc": {"duration": 95.09, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=xMImWgn3owc", "annotations": [{"segment": [80.8336602, 84.50108864999999], "label": "Using the balance beam"}, {"segment": [3.780776117784711, 71.95367205], "label": "Using the balance beam"}]}, "vUuC72xikqw": {"duration": 214.599, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vUuC72xikqw", "annotations": [{"segment": [0.2, 106.79722910276955], "label": "Carving jack-o-lanterns"}, {"segment": [155.00663659743668, 192.50284075663023], "label": "Carving jack-o-lanterns"}]}, "KIanyNsP7wY": {"duration": 81.03999999999999, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KIanyNsP7wY", "annotations": []}, "HLmiBuyTTmM": {"duration": 217.27, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HLmiBuyTTmM", "annotations": []}, "lQH1mZe3ZmA": {"duration": 114.173, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lQH1mZe3ZmA", "annotations": []}, "zh0haUMeZV0": {"duration": 144.498, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zh0haUMeZV0", "annotations": [{"segment": [24.7904992199688, 119.8958689547582], "label": "Kneeling"}]}, "9PP5_HGpu4c": {"duration": 187.15, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9PP5_HGpu4c", "annotations": [{"segment": [6.817947359999999, 178.79266770670824], "label": "Preparing salad"}]}, "avGe67wRvxA": {"duration": 42.82, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=avGe67wRvxA", "annotations": [{"segment": [0, 27.658681747269885], "label": "Putting on makeup"}]}, "mzxp9dHn0Rs": {"duration": 220.102, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mzxp9dHn0Rs", "annotations": [{"segment": [18.195371294851792, 209.4184243369735], "label": "Futsal"}]}, "rCLGy2NA0aE": {"duration": 30.558, "subset": "testing", "resolution": "423x240", "url": "https://www.youtube.com/watch?v=rCLGy2NA0aE", "annotations": []}, "XKpx9fyNINg": {"duration": 194.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XKpx9fyNINg", "annotations": [{"segment": [13.070457098283931, 176.31728549141965], "label": "Snatch"}]}, "CUG8vpMIFEQ": {"duration": 197.97, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CUG8vpMIFEQ", "annotations": [{"segment": [8.45540472, 133.04717176], "label": "Plataform diving"}]}, "qoVYcplxgFE": {"duration": 68.76, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qoVYcplxgFE", "annotations": [{"segment": [45.96591107644306, 67.93097347893915], "label": "Using the balance beam"}]}, "SSJjjggYBxc": {"duration": 53.499, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SSJjjggYBxc", "annotations": [{"segment": [13.604212805246904, 40.72917583775351], "label": "Removing ice from car"}]}, "ahVeDMNS9ws": {"duration": 208.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ahVeDMNS9ws", "annotations": [{"segment": [90.58596896723868, 208.12045351473924], "label": "Cleaning shoes"}]}, "7DY1vm9RiIk": {"duration": 197.277, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7DY1vm9RiIk", "annotations": [{"segment": [9.2303120124805, 17.229915756630266], "label": "Beach soccer"}, {"segment": [51.997424336973474, 62.458444617784714], "label": "Beach soccer"}, {"segment": [77.84229797191888, 153.22317940717627], "label": "Beach soccer"}]}, "7OYvyg32iqw": {"duration": 192.912, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7OYvyg32iqw", "annotations": [{"segment": [11.731698907956318, 169.05678939157565], "label": "Waterskiing"}]}, "NHYHcPe_Te8": {"duration": 105.07, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NHYHcPe_Te8", "annotations": []}, "JcMOzfurtK4": {"duration": 235.566, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JcMOzfurtK4", "annotations": [{"segment": [121.05706630265212, 125.09842511700468], "label": "Canoeing"}, {"segment": [194.16892121684867, 197.12483073322932], "label": "Canoeing"}]}, "uRQjuRNAABI": {"duration": 146.495, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uRQjuRNAABI", "annotations": []}, "3sAcnQxBchE": {"duration": 67.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3sAcnQxBchE", "annotations": [{"segment": [0.26307852184087366, 59.19266741419657], "label": "Smoking a cigarette"}]}, "685wnEW1Uq4": {"duration": 7.34, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=685wnEW1Uq4", "annotations": [{"segment": [1.4423177934477378, 6.994096601560063], "label": "Javelin throw"}]}, "f2W1Mt04CIM": {"duration": 58.747, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=f2W1Mt04CIM", "annotations": [{"segment": [4.765705512574245, 58.746485260770974], "label": "Riding bumper cars"}]}, "Bi3vlu17qp8": {"duration": 28.445, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Bi3vlu17qp8", "annotations": [{"segment": [0.2, 25.38810140405616], "label": "Beach soccer"}]}, "blCvgADz8LY": {"duration": 55.54, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=blCvgADz8LY", "annotations": [{"segment": [17.87035729953198, 31.024324900156007], "label": "Clean and jerk"}, {"segment": [32.149335287051485, 55.42839636973479], "label": "Clean and jerk"}]}, "oVt9uNNAGEo": {"duration": 34.78, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oVt9uNNAGEo", "annotations": []}, "i0Z8I2WCLNk": {"duration": 190.985, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=i0Z8I2WCLNk", "annotations": [{"segment": [3.5753645982574, 184.13127681025608], "label": "Trimming branches or hedges"}]}, "iXaA7PVRhIY": {"duration": 71.7, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iXaA7PVRhIY", "annotations": [{"segment": [11.121852330000001, 38.46396177847114], "label": "Ping-pong"}]}, "yE5whKJ-DE4": {"duration": 150.7, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yE5whKJ-DE4", "annotations": [{"segment": [3.8754836021840875, 125.07242534321374], "label": "Mowing the lawn"}]}, "ByDsWCKyWlo": {"duration": 226.39, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ByDsWCKyWlo", "annotations": []}, "Zu-tYiGypek": {"duration": 35.039, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zu-tYiGypek", "annotations": []}, "SfQku6CicrU": {"duration": 5.57, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=SfQku6CicrU", "annotations": [{"segment": [0.01, 5.016378589703589], "label": "Washing face"}]}, "AXyRIyXqnzI": {"duration": 130.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AXyRIyXqnzI", "annotations": [{"segment": [2.8419344773790955, 13.194695787831513], "label": "Discus throw"}, {"segment": [21.314508580343215, 25.983400936037444], "label": "Discus throw"}, {"segment": [32.073260530421216, 36.74215288611545], "label": "Discus throw"}, {"segment": [42.02003120124805, 62.5225585023401], "label": "Discus throw"}, {"segment": [69.62739469578783, 73.0783151326053], "label": "Discus throw"}, {"segment": [98.85872074882995, 116.9253042121685], "label": "Discus throw"}, {"segment": [122.2031825273011, 124.43613104524181], "label": "Discus throw"}]}, "rXgxlwrRFTg": {"duration": 36.69, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rXgxlwrRFTg", "annotations": [{"segment": [13.507420237129486, 36.6875283446712], "label": "Horseback riding"}]}, "Sf27WmcLb2w": {"duration": 124.3, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Sf27WmcLb2w", "annotations": []}, "cXkwVOPNisA": {"duration": 168.0, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cXkwVOPNisA", "annotations": []}, "dyLGepr7VR0": {"duration": 178.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dyLGepr7VR0", "annotations": [{"segment": [12.824080288381603, 176.1917117881994], "label": "Zumba"}]}, "VFvHqc5Bg-0": {"duration": 174.289, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VFvHqc5Bg-0", "annotations": [{"segment": [19.57689053354134, 152.53660540717627], "label": "Snowboarding"}]}, "yqBRp0txs-c": {"duration": 59.815, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yqBRp0txs-c", "annotations": [{"segment": [9.395449468019454, 14.697832642128912], "label": "Beer pong"}, {"segment": [20.000215423486168, 24.279331585228004], "label": "Beer pong"}, {"segment": [31.907321264854755, 33.02361243748305], "label": "Beer pong"}, {"segment": [36.74458301291074, 39.256238151324425], "label": "Beer pong"}, {"segment": [47.34934915287963, 49.58193149813624], "label": "Beer pong"}]}, "4j7sZBThR7s": {"duration": 130.311, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4j7sZBThR7s", "annotations": [{"segment": [14.834716068642745, 114.63102028081124], "label": "Making an omelette"}]}, "Y5zJT3BjIxM": {"duration": 185.74, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y5zJT3BjIxM", "annotations": [{"segment": [41.857885335413414, 146.7198541341654], "label": "Getting a haircut"}]}, "GxOjqC_IDX4": {"duration": 83.69, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GxOjqC_IDX4", "annotations": [{"segment": [10.770624430577223, 71.47778031201248], "label": "Triple jump"}]}, "I2XqSiNerwg": {"duration": 170.203, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=I2XqSiNerwg", "annotations": []}, "DUb48prwNZk": {"duration": 89.56, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DUb48prwNZk", "annotations": [{"segment": [8.790678561622464, 75.62774254602184], "label": "Smoking hookah"}]}, "8OEts-YLeW0": {"duration": 57.31, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8OEts-YLeW0", "annotations": [{"segment": [7.248843732209964, 56.750175507910896], "label": "Doing karate"}]}, "Y-1QkIGm81w": {"duration": 56.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y-1QkIGm81w", "annotations": [{"segment": [7.822445397815912, 16.40478549141966], "label": "Playing guitarra"}, {"segment": [24.808326833073323, 42.95640015600624], "label": "Playing guitarra"}]}, "V9MTU7xLukc": {"duration": 114.521, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=V9MTU7xLukc", "annotations": [{"segment": [0, 16.436685026584737], "label": "Fixing the roof"}, {"segment": [33.5880085325862, 37.697179789232386], "label": "Fixing the roof"}, {"segment": [48.95273584004585, 107.37443028861155], "label": "Fixing the roof"}]}, "Ms6qkgx5fyk": {"duration": 179.4, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ms6qkgx5fyk", "annotations": []}, "0xxl3iG3VKY": {"duration": 46.86, "subset": "training", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=0xxl3iG3VKY", "annotations": [{"segment": [0.657904422299341, 6.140441274793849], "label": "Javelin throw"}, {"segment": [19.15232873804748, 34.06482897683254], "label": "Javelin throw"}]}, "8_RfK2rp2To": {"duration": 94.18, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8_RfK2rp2To", "annotations": [{"segment": [18.79263650546022, 32.00620904836193], "label": "Pole vault"}, {"segment": [57.69926677067083, 63.71878315132605], "label": "Pole vault"}]}, "RpyIg_j4I3E": {"duration": 184.9, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RpyIg_j4I3E", "annotations": [{"segment": [0, 183.11081903276133], "label": "Kayaking"}]}, "zGwcX0S4DVA": {"duration": 51.688, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zGwcX0S4DVA", "annotations": [{"segment": [0.2, 50.32098151370285], "label": "Camel ride"}]}, "Y76wuHBZgdU": {"duration": 47.485, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y76wuHBZgdU", "annotations": [{"segment": [8.43013104524181, 15.233394695787833], "label": "Installing carpet"}, {"segment": [28.76597347893916, 40.74563338533542], "label": "Installing carpet"}]}, "enESbo2pA9U": {"duration": 124.32, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=enESbo2pA9U", "annotations": [{"segment": [46.33132449297972, 51.961478939157566], "label": "Snowboarding"}]}, "q4jeW0iSA9Y": {"duration": 82.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q4jeW0iSA9Y", "annotations": [{"segment": [47.58851495475819, 76.19293229953198], "label": "Triple jump"}]}, "SipyRTPgdfY": {"duration": 22.2, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=SipyRTPgdfY", "annotations": [{"segment": [14.960368923556942, 19.39307082683307], "label": "Snatch"}]}, "oQ_PwsBgozM": {"duration": 162.262, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oQ_PwsBgozM", "annotations": [{"segment": [1.3122191739578155, 161.30211235466192], "label": "Decorating the Christmas tree"}]}, "a8_pA-cEAao": {"duration": 205.938, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a8_pA-cEAao", "annotations": []}, "21krmbuT21s": {"duration": 58.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=21krmbuT21s", "annotations": [{"segment": [0, 51.592588143525745], "label": "Using uneven bars"}]}, "_AaSNARQPfE": {"duration": 200.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_AaSNARQPfE", "annotations": [{"segment": [24.1134284228512, 169.73348184399376], "label": "Snatch"}]}, "x_BIktHkEc4": {"duration": 69.48, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x_BIktHkEc4", "annotations": [{"segment": [18.426088979229025, 52.08100096063633], "label": "Playing harmonica"}]}, "7orO93DICYw": {"duration": 76.86, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7orO93DICYw", "annotations": []}, "PJysE5c1WDs": {"duration": 40.47, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PJysE5c1WDs", "annotations": [{"segment": [16.037417347893918, 34.347854477379094], "label": "Clean and jerk"}]}, "6rOmYOU7748": {"duration": 59.49, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6rOmYOU7748", "annotations": [{"segment": [7.888548057722309, 49.28022374882996], "label": "Roof shingle removal"}]}, "ta4QlTBHTTw": {"duration": 91.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ta4QlTBHTTw", "annotations": [{"segment": [3.763006325273011, 19.80903329719189], "label": "Tai chi"}]}, "DfOqhNeHDgM": {"duration": 57.772, "subset": "validation", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=DfOqhNeHDgM", "annotations": [{"segment": [6.573530421216848, 51.97590795631825], "label": "Playing drums"}]}, "fVBXRUWEyoA": {"duration": 219.824, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fVBXRUWEyoA", "annotations": [{"segment": [0.2, 219.6233106575964], "label": "Clipping cat claws"}, {"segment": [138.54698518825109, 210.90691893135724], "label": "Clipping cat claws"}]}, "Zr1xfVeUGeo": {"duration": 213.856, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Zr1xfVeUGeo", "annotations": [{"segment": [4.754364748486354, 206.6180184896047], "label": "Making a cake"}]}, "LW8UDBQ1yJc": {"duration": 184.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LW8UDBQ1yJc", "annotations": [{"segment": [4.327920341303448, 156.38218699219968], "label": "Skateboarding"}]}, "QFz-keqof9g": {"duration": 43.909, "subset": "training", "resolution": "718x480", "url": "https://www.youtube.com/watch?v=QFz-keqof9g", "annotations": [{"segment": [3.1510311450858035, 43.908934], "label": "Slacklining"}]}, "hJKX5ZulTgI": {"duration": 76.324, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hJKX5ZulTgI", "annotations": [{"segment": [21.74580829716589, 61.15415224581927], "label": "Blow-drying hair"}]}, "WZ4u6Epo9lg": {"duration": 221.914, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WZ4u6Epo9lg", "annotations": []}, "Q8EoKJNm4mc": {"duration": 65.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q8EoKJNm4mc", "annotations": [{"segment": [4.366089372826613, 16.956672643057722], "label": "Playing violin"}, {"segment": [50.159259195631826, 63.46060120904836], "label": "Playing violin"}]}, "Bq4Dg1t_zUA": {"duration": 21.827, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Bq4Dg1t_zUA", "annotations": []}, "-h3NbdE8N9M": {"duration": 179.4, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-h3NbdE8N9M", "annotations": [{"segment": [8.393681747269891, 154.34337414], "label": "Playing lacrosse"}]}, "XP5Oqr1giQ4": {"duration": 146.542, "subset": "training", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=XP5Oqr1giQ4", "annotations": [{"segment": [16.460158553280905, 139.22550606552264], "label": "Bullfighting"}]}, "KlJqjBqpnjw": {"duration": 112.896, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KlJqjBqpnjw", "annotations": [{"segment": [8.630071069509446, 72.3869226442527], "label": "Rope skipping"}]}, "Q_kqplOxytU": {"duration": 85.984, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q_kqplOxytU", "annotations": [{"segment": [17.410918614590177, 65.62576869158877], "label": "Swimming"}]}, "pibXLs9IOX8": {"duration": 214.692, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pibXLs9IOX8", "annotations": [{"segment": [17.416487418680422, 37.177501989875516], "label": "Croquet"}, {"segment": [62.63236821717767, 89.09203179555753], "label": "Croquet"}]}, "vQ8NJRCSyb4": {"duration": 185.969, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vQ8NJRCSyb4", "annotations": [{"segment": [3.5919106432576093, 178.55269786572708], "label": "Kneeling"}]}, "ZLJ2BmBkLJg": {"duration": 75.81, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZLJ2BmBkLJg", "annotations": []}, "IGmzP4DnNbQ": {"duration": 159.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IGmzP4DnNbQ", "annotations": []}, "xyJCmMW4bEo": {"duration": 143.76, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xyJCmMW4bEo", "annotations": []}, "JBnwqVLJRBc": {"duration": 182.86, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JBnwqVLJRBc", "annotations": []}, "w64fHbGOl4E": {"duration": 200.435, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w64fHbGOl4E", "annotations": []}, "Jmip8PuILhM": {"duration": 36.154, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Jmip8PuILhM", "annotations": [{"segment": [0.2, 32.29509968847352], "label": "Painting furniture"}]}, "ywFa_D5QZ-k": {"duration": 219.777, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=ywFa_D5QZ-k", "annotations": [{"segment": [24.337271450858033, 219.721], "label": "Waxing skis"}]}, "er6fi7nYsuw": {"duration": 16.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=er6fi7nYsuw", "annotations": [{"segment": [0.3012325366614665, 16.065735288611545], "label": "Playing field hockey"}]}, "iGuOr5KwLQ0": {"duration": 116.332, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iGuOr5KwLQ0", "annotations": [{"segment": [0.2, 112.35173780495815], "label": "Rollerblading"}]}, "8VYre-lXqIo": {"duration": 177.38, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8VYre-lXqIo", "annotations": [{"segment": [7.748147487804275, 174.33331847559617], "label": "Tai chi"}]}, "deLpSUF1wio": {"duration": 14.65, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=deLpSUF1wio", "annotations": [{"segment": [2.8343558252730108, 7.154462687987519], "label": "Starting a campfire"}]}, "n5m7OWnwMC0": {"duration": 206.263, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=n5m7OWnwMC0", "annotations": []}, "evOhhiSN84I": {"duration": 50.178, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=evOhhiSN84I", "annotations": []}, "75cjK13ylJM": {"duration": 222.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=75cjK13ylJM", "annotations": [{"segment": [166.54039740909363, 217.6498721880848], "label": "Preparing salad"}]}, "ObU5as8-u-s": {"duration": 32.79, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ObU5as8-u-s", "annotations": [{"segment": [0.01, 32.741], "label": "Playing field hockey"}]}, "EtrCRyT4tkI": {"duration": 140.16, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EtrCRyT4tkI", "annotations": []}, "L2MfC4jcRo0": {"duration": 38.453, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L2MfC4jcRo0", "annotations": [{"segment": [27.9866334514359, 34.68428953852765], "label": "Table soccer"}]}, "pVuuVHA3RVw": {"duration": 30.163, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pVuuVHA3RVw", "annotations": []}, "O_L0CSZ7nnA": {"duration": 83.151, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=O_L0CSZ7nnA", "annotations": [{"segment": [4.784717048658499, 81.21090310920945], "label": "Carving jack-o-lanterns"}]}, "_wl0vFvb8Bc": {"duration": 160.241, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=_wl0vFvb8Bc", "annotations": [{"segment": [57.756621469950545, 115.66300043568263], "label": "Hand car wash"}]}, "9lAT3oA9Ixs": {"duration": 54.22, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=9lAT3oA9Ixs", "annotations": []}, "6GGQUpJR5qs": {"duration": 133.12, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6GGQUpJR5qs", "annotations": [{"segment": [64.0400384, 125.49583431903275], "label": "Bathing dog"}, {"segment": [1.8184192000000001, 56.9780224], "label": "Bathing dog"}]}, "UzCeMCr0lM8": {"duration": 58.584, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UzCeMCr0lM8", "annotations": [{"segment": [1.0053317697333743, 55.38464015288612], "label": "Peeling potatoes"}]}, "PoAB1ktiNQM": {"duration": 47.903, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PoAB1ktiNQM", "annotations": []}, "FJ7yrh2UiQ8": {"duration": 207.7, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=FJ7yrh2UiQ8", "annotations": [{"segment": [32.23132527301092, 187.41324258970357], "label": "Paintball"}]}, "uIOIcv5MhuA": {"duration": 27.14, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=uIOIcv5MhuA", "annotations": [{"segment": [1.754308329173167, 26.821894815912636], "label": "Smoking a cigarette"}]}, "ZN9kbYULUtw": {"duration": 97.72999999999999, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ZN9kbYULUtw", "annotations": [{"segment": [12.19748031201248, 97.73231292517006], "label": "Doing karate"}]}, "wvQS6xeOhOI": {"duration": 205.683, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wvQS6xeOhOI", "annotations": []}, "qlbuekEsXL4": {"duration": 27.029, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qlbuekEsXL4", "annotations": [{"segment": [8.406850369494638, 13.78723501646198], "label": "Calf roping"}]}, "Uk_VxavVjP0": {"duration": 63.066, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Uk_VxavVjP0", "annotations": [{"segment": [0, 23.00610608424337], "label": "River tubing"}, {"segment": [28.315207488299535, 34.607475819032764], "label": "River tubing"}, {"segment": [45.422312012480496, 59.67823244929797], "label": "River tubing"}]}, "C4QrTmNDADY": {"duration": 235.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C4QrTmNDADY", "annotations": [{"segment": [0.01, 97.43505930713418], "label": "Breakdancing"}, {"segment": [106.62704603422232, 235.48253968253968], "label": "Breakdancing"}]}, "B6W_0AvgGTY": {"duration": 23.197, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B6W_0AvgGTY", "annotations": [{"segment": [4.624894144573718, 19.077686953271026], "label": "Disc dog"}]}, "IEPoIqIrprg": {"duration": 233.57, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=IEPoIqIrprg", "annotations": [{"segment": [43.57696833, 227.85211745943838], "label": "Volleyball"}]}, "aM-fUKjbCd0": {"duration": 60.21, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=aM-fUKjbCd0", "annotations": []}, "iZg4cUU6Hkc": {"duration": 102.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iZg4cUU6Hkc", "annotations": [{"segment": [41.88097300624025, 93.63502710140405], "label": "Playing kickball"}]}, "8miqQQJEsO0": {"duration": 135.77, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8miqQQJEsO0", "annotations": [{"segment": [9.633790951638066, 128.83872074882996], "label": "Clean and jerk"}]}, "dQyZheN0Fw0": {"duration": 149.142, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dQyZheN0Fw0", "annotations": [{"segment": [0, 6.494505754707989], "label": "Curling"}]}, "Ba3uRADSg50": {"duration": 145.961, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ba3uRADSg50", "annotations": [{"segment": [49.03187389685008, 145.0526318193118], "label": "Using the monkey bar"}]}, "RiUvp2idW3U": {"duration": 33.553, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RiUvp2idW3U", "annotations": []}, "BeusKDfs3cs": {"duration": 230.25, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BeusKDfs3cs", "annotations": []}, "bV4xdc71azY": {"duration": 211.883, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bV4xdc71azY", "annotations": [{"segment": [17.84966092521252, 208.90714152886113], "label": "Making an omelette"}]}, "7WfF6FrZEuU": {"duration": 36.2, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7WfF6FrZEuU", "annotations": [{"segment": [0.8729570982839314, 34.60852496099844], "label": "Polishing shoes"}]}, "wJV-kQRmaGc": {"duration": 24.1, "subset": "training", "resolution": "384x240", "url": "https://www.youtube.com/watch?v=wJV-kQRmaGc", "annotations": [{"segment": [0, 24.061], "label": "Springboard diving"}]}, "43gst-Mw43s": {"duration": 75.372, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=43gst-Mw43s", "annotations": [{"segment": [22.684575663026518, 28.914018720748828], "label": "Roof shingle removal"}]}, "NLdhDlsMnxQ": {"duration": 41.31, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NLdhDlsMnxQ", "annotations": [{"segment": [0.7080670826833074, 39.587386895475824], "label": "Walking the dog"}]}, "8CAvjawFn3w": {"duration": 89.88, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=8CAvjawFn3w", "annotations": [{"segment": [29.70306664, 37.15894444], "label": "Horseback riding"}]}, "oqX-n2KHWAM": {"duration": 213.09, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oqX-n2KHWAM", "annotations": [{"segment": [42.218984858034325, 190.15164833697347], "label": "Having an ice cream"}]}, "G_kJ-Af89ew": {"duration": 165.884, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G_kJ-Af89ew", "annotations": [{"segment": [0.2, 165.561], "label": "Playing pool"}]}, "eIs6m8Xt4rw": {"duration": 55.241, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=eIs6m8Xt4rw", "annotations": []}, "uGVkH6PjXLs": {"duration": 32.3, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=uGVkH6PjXLs", "annotations": [{"segment": [0.01, 2.5698077953198126], "label": "Chopping wood"}, {"segment": [29.326041899531983, 32.098956799999996], "label": "Chopping wood"}]}, "RxXBMgsu6uU": {"duration": 177.447, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=RxXBMgsu6uU", "annotations": [{"segment": [95.76858970358813, 136.7331887675507], "label": "BMX"}]}, "VcyO9tZ3NkY": {"duration": 124.018, "subset": "training", "resolution": "448x360", "url": "https://www.youtube.com/watch?v=VcyO9tZ3NkY", "annotations": [{"segment": [52.99655850234009, 120.69289235569423], "label": "Fixing the roof"}]}, "Nkcc9kLDQz8": {"duration": 161.495, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Nkcc9kLDQz8", "annotations": [{"segment": [9.555697819314641, 150.3951370716511], "label": "Skiing"}]}, "DHaH6wgdotw": {"duration": 66.161, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DHaH6wgdotw", "annotations": []}, "jZh3iGJszMw": {"duration": 41.796, "subset": "testing", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=jZh3iGJszMw", "annotations": []}, "zV08tITKFMM": {"duration": 121.997, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zV08tITKFMM", "annotations": []}, "D0RDF1ez-8Y": {"duration": 178.887, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=D0RDF1ez-8Y", "annotations": [{"segment": [39.070380748829955, 178.8865306122449], "label": "Putting in contact lenses"}]}, "hrcSCXaNhmY": {"duration": 40.96, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hrcSCXaNhmY", "annotations": [{"segment": [0.01, 40.96], "label": "Disc dog"}]}, "uXPm6SMQH1M": {"duration": 35.83, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uXPm6SMQH1M", "annotations": [{"segment": [7.28214430577223, 34.45765600624025], "label": "Grooming horse"}]}, "pIv2jZdHP0k": {"duration": 448.401, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pIv2jZdHP0k", "annotations": [{"segment": [0.01, 448.40054421768707], "label": "Putting on shoes"}]}, "tJ2xOG_EWOg": {"duration": 216.666, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tJ2xOG_EWOg", "annotations": [{"segment": [0.01, 140.612799900156], "label": "Carving jack-o-lanterns"}]}, "IucYlH_gAic": {"duration": 64.064, "subset": "training", "resolution": "636x360", "url": "https://www.youtube.com/watch?v=IucYlH_gAic", "annotations": [{"segment": [0.01, 0.02], "label": "Swimming"}]}, "Gg32cIypcdc": {"duration": 237.354, "subset": "validation", "resolution": "290x240", "url": "https://www.youtube.com/watch?v=Gg32cIypcdc", "annotations": [{"segment": [6.664240249609985, 212.14498127925117], "label": "Bullfighting"}]}, "36JC5C6I4sg": {"duration": 187.687, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=36JC5C6I4sg", "annotations": [{"segment": [0.01, 136.73912485335413], "label": "Hand car wash"}]}, "J__1J4MmH4w": {"duration": 112.524, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=J__1J4MmH4w", "annotations": [{"segment": [19.074810898504037, 22.399778401316464], "label": "Beer pong"}, {"segment": [48.64952131418594, 53.72447224360196], "label": "Beer pong"}, {"segment": [59.84941172787178, 68.59932611356385], "label": "Beer pong"}, {"segment": [89.42412235151097, 90.82410865322171], "label": "Beer pong"}]}, "gIzsM5cv7XM": {"duration": 181.79, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=gIzsM5cv7XM", "annotations": [{"segment": [24.241584243369736, 26.793329953198132], "label": "Rock-paper-scissors"}, {"segment": [30.195657566302653, 40.68616770670827], "label": "Rock-paper-scissors"}, {"segment": [56.56369656786271, 61.38366068642746], "label": "Rock-paper-scissors"}, {"segment": [77.2611895475819, 82.93173556942278], "label": "Rock-paper-scissors"}, {"segment": [95.12340951638068, 98.24220982839314], "label": "Rock-paper-scissors"}]}, "dHzX9NqyBZw": {"duration": 12.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dHzX9NqyBZw", "annotations": [{"segment": [0.43911430265210605, 11.676448502340094], "label": "Springboard diving"}]}, "8LiO75NtXNw": {"duration": 224.14, "subset": "training", "resolution": "596x480", "url": "https://www.youtube.com/watch?v=8LiO75NtXNw", "annotations": [{"segment": [7.691248049921997, 207.31409516380654], "label": "Long jump"}]}, "kGLMybXl5Kk": {"duration": 115.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kGLMybXl5Kk", "annotations": [{"segment": [19.6079295, 104.63073322932918], "label": "Snatch"}]}, "H0r_ZPXJLtg": {"duration": 98.128, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=H0r_ZPXJLtg", "annotations": []}, "lQWij22wbNU": {"duration": 32.3, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lQWij22wbNU", "annotations": [{"segment": [0.01, 32.298955], "label": "Playing kickball"}]}, "Tvzd_WIvBU4": {"duration": 68.03999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Tvzd_WIvBU4", "annotations": [{"segment": [5.428389235569423, 65.08796801872074], "label": "Chopping wood"}]}, "Czd1PFeumIo": {"duration": 164.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Czd1PFeumIo", "annotations": [{"segment": [21.763001918876757, 149.78066026521063], "label": "Tango"}]}, "_6fbXk6y8X0": {"duration": 95.27000000000001, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=_6fbXk6y8X0", "annotations": [{"segment": [2.601657566302652, 65.9334360374415], "label": "Windsurfing"}]}, "SHT08nPhIb4": {"duration": 44.93, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=SHT08nPhIb4", "annotations": [{"segment": [8.03294472, 43.07974258970359], "label": "Layup drill in basketball"}]}, "L6BxxvCbwpQ": {"duration": 41.425, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=L6BxxvCbwpQ", "annotations": [{"segment": [0.01, 29.642318542692543], "label": "Installing carpet"}]}, "zvFff-07npY": {"duration": 181.836, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zvFff-07npY", "annotations": []}, "l5qCvYE1-tA": {"duration": 27.4, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=l5qCvYE1-tA", "annotations": [{"segment": [3.559757410296412, 8.718208268330734], "label": "High jump"}, {"segment": [10.039794851794072, 15.581932137285492], "label": "High jump"}, {"segment": [16.94615054602184, 20.228801092043682], "label": "High jump"}, {"segment": [21.294596723868956, 26.879365834633386], "label": "High jump"}]}, "P79nFNUaysw": {"duration": 73.82, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P79nFNUaysw", "annotations": [{"segment": [28.20100935898772, 48.89707991973538], "label": "Preparing pasta"}]}, "kTf-Id-lWX8": {"duration": 66.061, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kTf-Id-lWX8", "annotations": [{"segment": [14.325190585854724, 65.03018172427576], "label": "Shuffleboard"}]}, "CnrvRF_N7fU": {"duration": 153.577, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=CnrvRF_N7fU", "annotations": [{"segment": [0.01, 153.521], "label": "Using the rowing machine"}]}, "JRg3W4xKvJI": {"duration": 181.7, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JRg3W4xKvJI", "annotations": [{"segment": [12.18630733229329, 34.575104524180965], "label": "Polishing forniture"}, {"segment": [87.85477379095164, 89.5551887675507], "label": "Polishing forniture"}]}, "KuyaO3brwBc": {"duration": 181.07, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KuyaO3brwBc", "annotations": [{"segment": [55.37271089, 159.74196212338288], "label": "Cleaning shoes"}]}, "9SY9ufDznFQ": {"duration": 221.15, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9SY9ufDznFQ", "annotations": [{"segment": [12.5899375975039, 218.51302652106085], "label": "Playing field hockey"}]}, "j6OcN4OQZVM": {"duration": 157.15, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=j6OcN4OQZVM", "annotations": []}, "QgeqX3cWWRA": {"duration": 116.611, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QgeqX3cWWRA", "annotations": []}, "dIC0nm4nrI8": {"duration": 113.08, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=dIC0nm4nrI8", "annotations": [{"segment": [3.2587995319812793, 109.30189781591264], "label": "Ballet"}]}, "QkX8GFbwN7c": {"duration": 32.694, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QkX8GFbwN7c", "annotations": [{"segment": [0.2, 19.82079407176287], "label": "Welding"}]}, "7MWFfqhKgIY": {"duration": 33.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7MWFfqhKgIY", "annotations": [{"segment": [14.615483399999999, 29.123255728549143], "label": "Using parallel bars"}]}, "0vssmM38x-0": {"duration": 135.42, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0vssmM38x-0", "annotations": []}, "Mn_wH5lwlkc": {"duration": 66.735, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Mn_wH5lwlkc", "annotations": []}, "eSZh5MONfwQ": {"duration": 110.017, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eSZh5MONfwQ", "annotations": []}, "Swz9TXxW-kE": {"duration": 15.14, "subset": "testing", "resolution": "864x720", "url": "https://www.youtube.com/watch?v=Swz9TXxW-kE", "annotations": []}, "-4WdzYpCJPU": {"duration": 203.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-4WdzYpCJPU", "annotations": [{"segment": [99.4414976599064, 160.21370094383778], "label": "Archery"}]}, "4uitkT3dDIM": {"duration": 131.1, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4uitkT3dDIM", "annotations": [{"segment": [7.567386834009361, 127.4184325087289], "label": "Javelin throw"}]}, "A8KtrGjBodw": {"duration": 110.41, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A8KtrGjBodw", "annotations": [{"segment": [0, 19.627385335413415], "label": "Drinking beer"}]}, "DXhVbxfmrYM": {"duration": 80.04, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DXhVbxfmrYM", "annotations": [{"segment": [34.853763201709185, 42.07347168693469], "label": "Sumo"}]}, "6kUeZVukTEI": {"duration": 125.18, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6kUeZVukTEI", "annotations": [{"segment": [108.79077613104523, 124.20686817472699], "label": "High jump"}]}, "0IIVlMtO520": {"duration": 206.24, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=0IIVlMtO520", "annotations": []}, "0PbmXOVIwGc": {"duration": 120.84, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0PbmXOVIwGc", "annotations": [{"segment": [4.999354924314431, 117.91840157673253], "label": "Grooming horse"}]}, "eMgC7I-GcgA": {"duration": 118.445, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eMgC7I-GcgA", "annotations": []}, "Cg_jN5G1ZpY": {"duration": 70.775, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cg_jN5G1ZpY", "annotations": [{"segment": [0.2, 63.22308580343214], "label": "Bullfighting"}]}, "Ai6RYjJ97Zg": {"duration": 208.32999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ai6RYjJ97Zg", "annotations": [{"segment": [26.975574021600323, 179.0788106735154], "label": "Making a sandwich"}]}, "7VWp-Dfvqkg": {"duration": 186.39, "subset": "training", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=7VWp-Dfvqkg", "annotations": [{"segment": [63.970439470640045, 147.10142278], "label": "Playing harmonica"}]}, "Q5HdVXF390c": {"duration": 25.57, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Q5HdVXF390c", "annotations": []}, "p4Bm_NB_QoI": {"duration": 185.342, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=p4Bm_NB_QoI", "annotations": [{"segment": [22.229412797309994, 176.39183401501828], "label": "Baton twirling"}]}, "mhPpeG3LrUk": {"duration": 90.396, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mhPpeG3LrUk", "annotations": [{"segment": [45.543, 78.114], "label": "Applying sunscreen"}]}, "j5mhELw7XaM": {"duration": 126.2, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=j5mhELw7XaM", "annotations": [{"segment": [15.5535688049922, 124.03478920436818], "label": "Skateboarding"}]}, "et029cxyEOs": {"duration": 108.507, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=et029cxyEOs", "annotations": [{"segment": [0.01, 108.506847], "label": "Playing rubik cube"}]}, "7t666Y8cLGk": {"duration": 34.99, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=7t666Y8cLGk", "annotations": [{"segment": [0, 34.25196255850234], "label": "Smoking hookah"}]}, "FyxltEKCfZ0": {"duration": 68.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FyxltEKCfZ0", "annotations": [{"segment": [49.21310782839314, 67.25435560062404], "label": "Drinking coffee"}]}, "WhOsURGm0Kg": {"duration": 16.301, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=WhOsURGm0Kg", "annotations": [{"segment": [2.45894207042261, 8.492223042428854], "label": "Beer pong"}]}, "X1E7i3ocxKY": {"duration": 213.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=X1E7i3ocxKY", "annotations": [{"segment": [35.00815689770448, 37.342034024218115], "label": "Painting"}, {"segment": [41.00955522302525, 61.347627325501186], "label": "Painting"}, {"segment": [69.01608074118883, 191.37792385023403], "label": "Painting"}]}, "uG7GIj313aA": {"duration": 107.555, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uG7GIj313aA", "annotations": []}, "_RfQQ3F37jY": {"duration": 24.1, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_RfQQ3F37jY", "annotations": []}, "aVHmi3Kjr_Q": {"duration": 56.169, "subset": "training", "resolution": "1920x1440", "url": "https://www.youtube.com/watch?v=aVHmi3Kjr_Q", "annotations": [{"segment": [3.1092129485179405, 17.91081825273011], "label": "Decorating the Christmas tree"}, {"segment": [26.31883073322933, 46.112693447737904], "label": "Decorating the Christmas tree"}]}, "tJSF2GAp9TU": {"duration": 80.85, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tJSF2GAp9TU", "annotations": [{"segment": [5.802158187207489, 50.45354945397816], "label": "Using parallel bars"}]}, "8ZA8UGBEx74": {"duration": 104.65, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=8ZA8UGBEx74", "annotations": [{"segment": [0.4882665709675126, 104.1640624621028], "label": "Cumbia"}]}, "mEjU4uJZccw": {"duration": 69.451, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=mEjU4uJZccw", "annotations": [{"segment": [5.200690248018084, 69.4508843537415], "label": "Playing congas"}]}, "zGT6EplIOHY": {"duration": 164.537, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zGT6EplIOHY", "annotations": [{"segment": [11.805063962558503, 123.69653978159126], "label": "Futsal"}, {"segment": [129.3424399375975, 160.39489079563182], "label": "Futsal"}]}, "QHd9bJOuvIU": {"duration": 86.239, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=QHd9bJOuvIU", "annotations": [{"segment": [0, 81.35975819032761], "label": "Fixing the roof"}]}, "kmWf36zfL7o": {"duration": 24.06, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kmWf36zfL7o", "annotations": [{"segment": [12.799424949999999, 21.655754299999998], "label": "Getting a tattoo"}]}, "gmFioIDX4X0": {"duration": 111.53, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gmFioIDX4X0", "annotations": [{"segment": [6.694955758190328, 92.59906340873636], "label": "Kayaking"}]}, "goXkohySkU8": {"duration": 26.75, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=goXkohySkU8", "annotations": [{"segment": [11.297021840873635, 18.925638065522623], "label": "Disc dog"}]}, "0e-qdFlRmPU": {"duration": 167.65, "subset": "validation", "resolution": "480x352", "url": "https://www.youtube.com/watch?v=0e-qdFlRmPU", "annotations": [{"segment": [20.138692023871428, 161.63261894414975], "label": "Tango"}]}, "NOacQAV81Hw": {"duration": 46.53, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=NOacQAV81Hw", "annotations": []}, "N5Dt3ycaun4": {"duration": 48.204, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=N5Dt3ycaun4", "annotations": []}, "tXLvsYsWCoY": {"duration": 70.496, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=tXLvsYsWCoY", "annotations": [{"segment": [10.227937024960998, 64.11706758658346], "label": "Drum corps"}]}, "TQ22icNBoSI": {"duration": 66.479, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TQ22icNBoSI", "annotations": [{"segment": [5.0089478214656085, 8.113667447633832], "label": "Roof shingle removal"}, {"segment": [16.599901092493646, 19.497639410250656], "label": "Roof shingle removal"}, {"segment": [25.18962539155907, 40.29926090557776], "label": "Roof shingle removal"}, {"segment": [46.71568146632542, 53.23559268127869], "label": "Roof shingle removal"}]}, "SV3v-nA6hXM": {"duration": 173.45, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SV3v-nA6hXM", "annotations": []}, "l9jSJaV0DA0": {"duration": 197.602, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=l9jSJaV0DA0", "annotations": [{"segment": [111.90202044000127, 148.8944228096724], "label": "Longboarding"}]}, "8--Dvot-OcQ": {"duration": 65.295, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8--Dvot-OcQ", "annotations": []}, "KKEyKFmatq0": {"duration": 135.884, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KKEyKFmatq0", "annotations": [{"segment": [0.01, 135.883174], "label": "Futsal"}]}, "BrnUW2LSJDI": {"duration": 42.772, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BrnUW2LSJDI", "annotations": [{"segment": [0.01, 0.02], "label": "Shuffleboard"}, {"segment": [0.2, 20.646668553658362], "label": "Shuffleboard"}, {"segment": [30.761297837147456, 33.02378070319107], "label": "Shuffleboard"}]}, "ds3ZFTS5dC0": {"duration": 81.688, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ds3ZFTS5dC0", "annotations": []}, "FNB1BMEtIvw": {"duration": 183.949, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FNB1BMEtIvw", "annotations": [{"segment": [16.062739469578784, 24.95461310452418], "label": "Snowboarding"}, {"segment": [42.743009360374415, 170.1114399375975], "label": "Snowboarding"}]}, "JuC7EY32Spg": {"duration": 26.634, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JuC7EY32Spg", "annotations": [{"segment": [2.8011973478939156, 21.93233775351014], "label": "Putting on shoes"}]}, "l5EP4OdpeJs": {"duration": 46.95, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=l5EP4OdpeJs", "annotations": []}, "Wy0u0amd4Ko": {"duration": 60.67, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Wy0u0amd4Ko", "annotations": [{"segment": [0.6625837604932768, 57.73944189391575], "label": "Playing squash"}]}, "Gl6EMAgTNKo": {"duration": 62.95, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=Gl6EMAgTNKo", "annotations": [{"segment": [1.887676646805998, 35.98414620219436], "label": "Rafting"}]}, "Snj5CuEUbPI": {"duration": 178.144, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Snj5CuEUbPI", "annotations": [{"segment": [61.50502534020896, 106.66412362636758], "label": "Rafting"}, {"segment": [122.73300522512343, 143.23468174767396], "label": "Rafting"}]}, "brCkKw_gNWQ": {"duration": 167.788, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=brCkKw_gNWQ", "annotations": []}, "SQiIQDKhnh0": {"duration": 155.27, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SQiIQDKhnh0", "annotations": [{"segment": [84.05511192667707, 132.74409606864276], "label": "Polishing forniture"}]}, "3dR4MEUDHa0": {"duration": 153.76, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3dR4MEUDHa0", "annotations": [{"segment": [3.8380614190554017, 79.39989560670861], "label": "Ping-pong"}]}, "WXST-TXQjoU": {"duration": 14.86, "subset": "validation", "resolution": "496x480", "url": "https://www.youtube.com/watch?v=WXST-TXQjoU", "annotations": [{"segment": [3.1544834836193445, 7.476010307332293], "label": "Getting a haircut"}, {"segment": [11.936195745709828, 14.270282425897037], "label": "Getting a haircut"}]}, "3B39HkjJnbc": {"duration": 131.031, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3B39HkjJnbc", "annotations": []}, "qyRFJiORdH0": {"duration": 52.04, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qyRFJiORdH0", "annotations": []}, "huUb8mM5fv4": {"duration": 29.65, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=huUb8mM5fv4", "annotations": [{"segment": [15.462480499219968, 28.887987519500783], "label": "Triple jump"}]}, "54chD8QFKyk": {"duration": 104.374, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=54chD8QFKyk", "annotations": [{"segment": [38.571102964118566, 48.17319188767551], "label": "Clipping cat claws"}, {"segment": [55.98506084243369, 62.82044617784711], "label": "Clipping cat claws"}, {"segment": [79.25792043681747, 83.65209672386895], "label": "Clipping cat claws"}]}, "GKpeX_lWMgg": {"duration": 38.569, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GKpeX_lWMgg", "annotations": [{"segment": [0.19999999999999998, 37.8], "label": "Waterskiing"}]}, "jDfTrTtPs5s": {"duration": 40.47, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=jDfTrTtPs5s", "annotations": [{"segment": [0.2525577472698908, 39.71470575819032], "label": "Playing racquetball"}]}, "fHXgxSFDmJc": {"duration": 38.917, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fHXgxSFDmJc", "annotations": [{"segment": [0.7909516380655226, 38.21377535101404], "label": "Bullfighting"}]}, "aYxni8ohTfU": {"duration": 199.44, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aYxni8ohTfU", "annotations": []}, "jhy6npIdn5o": {"duration": 187.896, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jhy6npIdn5o", "annotations": []}, "HxenZkvSkPc": {"duration": 228.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HxenZkvSkPc", "annotations": [{"segment": [9.98158532055568, 72.01000838400883], "label": "Walking the dog"}, {"segment": [89.12129750496142, 221.02081781230433], "label": "Walking the dog"}]}, "ZPLbrQ391jg": {"duration": 187.13, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZPLbrQ391jg", "annotations": [{"segment": [27.441717837420978, 167.86157187784107], "label": "BMX"}]}, "2aw1pVJsnKs": {"duration": 93.78999999999999, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2aw1pVJsnKs", "annotations": [{"segment": [1.8270605694227768, 82.6583476], "label": "Doing motocross"}]}, "TjRibiXhHrg": {"duration": 209.63, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TjRibiXhHrg", "annotations": []}, "GvJxJf4m6_M": {"duration": 92.114, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GvJxJf4m6_M", "annotations": [{"segment": [51.28561913986046, 62.74609317913921], "label": "Putting on shoes"}]}, "WCS8dzRT7Xk": {"duration": 113.55, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WCS8dzRT7Xk", "annotations": []}, "CqAJxW84FkU": {"duration": 220.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CqAJxW84FkU", "annotations": [{"segment": [71.83304884304215, 91.42388034569002], "label": "Chopping wood"}, {"segment": [92.11127794227414, 133.01143390951637], "label": "Chopping wood"}]}, "SSqVGeD2XaQ": {"duration": 118.144, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SSqVGeD2XaQ", "annotations": [{"segment": [18.78980031201248, 20.263510140405614], "label": "Rock-paper-scissors"}, {"segment": [23.579357254290173, 27.632059282371294], "label": "Rock-paper-scissors"}, {"segment": [40.711234009360375, 42.73758502340094], "label": "Rock-paper-scissors"}, {"segment": [63.01176599063963, 64.30148049921996], "label": "Rock-paper-scissors"}, {"segment": [69.09184867394696, 70.93429797191888], "label": "Rock-paper-scissors"}, {"segment": [85.48964742589703, 87.14785179407177], "label": "Rock-paper-scissors"}, {"segment": [92.12246489859594, 93.2279344773791], "label": "Rock-paper-scissors"}, {"segment": [102.62442589703588, 105.20385491419657], "label": "Rock-paper-scissors"}, {"segment": [109.99422308892355, 111.65242745709828], "label": "Rock-paper-scissors"}]}, "K7iw2_1BrIk": {"duration": 205.729, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K7iw2_1BrIk", "annotations": []}, "eGA5tbX8oH0": {"duration": 78.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eGA5tbX8oH0", "annotations": [{"segment": [20.943819656786268, 61.47627063962558], "label": "Shoveling snow"}]}, "VQ1t1jMI0LE": {"duration": 131.913, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VQ1t1jMI0LE", "annotations": []}, "hoyQ36EH1a8": {"duration": 98.24000000000001, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=hoyQ36EH1a8", "annotations": [{"segment": [7.5067847113884545, 16.851965678627142], "label": "Horseback riding"}, {"segment": [20.52875819032761, 30.48673790951638], "label": "Horseback riding"}, {"segment": [40.75111700468018, 43.81511076443057], "label": "Horseback riding"}, {"segment": [50.55589703588143, 69.70585803432137], "label": "Horseback riding"}, {"segment": [73.99544929797192, 90.3878159126365], "label": "Horseback riding"}]}, "bM7Tgb1mJfc": {"duration": 65.34, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bM7Tgb1mJfc", "annotations": [{"segment": [1.7800312012480501, 55.38439937597504], "label": "Archery"}]}, "bfBTnUiGVUo": {"duration": 69.59, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=bfBTnUiGVUo", "annotations": [{"segment": [0.4631464276311405, 63.98317595722616], "label": "Brushing teeth"}]}, "v9vc_d-qFnk": {"duration": 48.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=v9vc_d-qFnk", "annotations": [{"segment": [0.7585179407176288, 44.752558502340094], "label": "Using parallel bars"}]}, "8klHUzPHtQw": {"duration": 179.653, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8klHUzPHtQw", "annotations": [{"segment": [43.98963650546022, 91.34153822152886], "label": "BMX"}, {"segment": [99.74720124804992, 175.97814040561624], "label": "BMX"}]}, "FCYdwCXRNVI": {"duration": 83.244, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FCYdwCXRNVI", "annotations": [{"segment": [0.01, 76.74850546021841], "label": "Welding"}]}, "6eqIYm3Y-jY": {"duration": 158.848, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6eqIYm3Y-jY", "annotations": []}, "pxt1-L_-H74": {"duration": 224.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pxt1-L_-H74", "annotations": [{"segment": [0.01, 168.63087398127925], "label": "Zumba"}]}, "_tRAypMWUdc": {"duration": 215.584, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_tRAypMWUdc", "annotations": [{"segment": [21.48894704049844, 120.87532710280374], "label": "Canoeing"}]}, "2nDJpvD-eZU": {"duration": 70.705, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2nDJpvD-eZU", "annotations": []}, "IiG80Vp4WyY": {"duration": 202.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IiG80Vp4WyY", "annotations": [{"segment": [46.21758825117005, 125.20046562], "label": "Playing saxophone"}, {"segment": [125.40267119344773, 191.0221821575663], "label": "Playing saxophone"}]}, "jzl0Ar3H5f8": {"duration": 170.481, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jzl0Ar3H5f8", "annotations": [{"segment": [18.34915600624025, 123.92328549141966], "label": "Doing fencing"}]}, "OPp3DqFq0O0": {"duration": 238.63, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OPp3DqFq0O0", "annotations": [{"segment": [0.01, 230.44131227457098], "label": "Playing violin"}]}, "I637SOXSbHY": {"duration": 57.61, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I637SOXSbHY", "annotations": [{"segment": [19.457102, 48.73406368], "label": "Clean and jerk"}]}, "4XTJzFjjFp0": {"duration": 165.048, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4XTJzFjjFp0", "annotations": [{"segment": [0.01, 153.46064252106083], "label": "Assembling bicycle"}]}, "WlsVDIIo2dk": {"duration": 236.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WlsVDIIo2dk", "annotations": [{"segment": [5.529862990437985, 228.5676702714367], "label": "Cumbia"}]}, "m8SFyH4vhik": {"duration": 105.77000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m8SFyH4vhik", "annotations": [{"segment": [8.25241809672387, 91.76688923556942], "label": "Using the balance beam"}]}, "wPYr19iFxhw": {"duration": 56.611, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wPYr19iFxhw", "annotations": [{"segment": [0.01, 56.541], "label": "Welding"}]}, "kCOMyIb7f0Q": {"duration": 182.44, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kCOMyIb7f0Q", "annotations": [{"segment": [49.807889458435476, 96.20038078257824], "label": "Skiing"}, {"segment": [103.6004100735458, 135.7620745928237], "label": "Skiing"}]}, "fvslbZDJ3C8": {"duration": 34.44, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fvslbZDJ3C8", "annotations": [{"segment": [2.3077113884555382, 33.32764586583464], "label": "Dodgeball"}]}, "sOUNe30PXY4": {"duration": 4.598, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=sOUNe30PXY4", "annotations": [{"segment": [0.01, 4.561], "label": "Doing a powerbomb"}]}, "sRol1BJ9EUk": {"duration": 13.54, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=sRol1BJ9EUk", "annotations": [{"segment": [0, 13.537232], "label": "Layup drill in basketball"}]}, "8A7nbBMC4eA": {"duration": 204.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8A7nbBMC4eA", "annotations": [{"segment": [49.40331759558379, 174.12644670600264], "label": "Tango"}]}, "i7rJN34TruM": {"duration": 45.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i7rJN34TruM", "annotations": [{"segment": [0.01, 45.97551020408163], "label": "Playing racquetball"}]}, "yfPa5IKWTvU": {"duration": 225.35, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yfPa5IKWTvU", "annotations": [{"segment": [23.20289626521061, 209.17762542121687], "label": "Cheerleading"}]}, "MonWdYi3a0I": {"duration": 78.14, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MonWdYi3a0I", "annotations": []}, "BnkUgUQBED0": {"duration": 124.088, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BnkUgUQBED0", "annotations": [{"segment": [7.356198817472699, 124.08746031746031], "label": "Clipping cat claws"}]}, "S7MZle1gPzI": {"duration": 31.092, "subset": "training", "resolution": "1252x720", "url": "https://www.youtube.com/watch?v=S7MZle1gPzI", "annotations": [{"segment": [0.01, 31.091451247165534], "label": "Waterskiing"}]}, "WdNtrGxSm1Y": {"duration": 109.835, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WdNtrGxSm1Y", "annotations": [{"segment": [0.01, 109.821], "label": "Disc dog"}]}, "6tzQ5-VBgVU": {"duration": 159.591, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6tzQ5-VBgVU", "annotations": [{"segment": [0.2, 154.5243697347894], "label": "Rollerblading"}]}, "WttP_X-aCEA": {"duration": 23.639, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WttP_X-aCEA", "annotations": [{"segment": [0.2, 16.891697347893913], "label": "Using the monkey bar"}]}, "u1VIetb75rs": {"duration": 148.03, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=u1VIetb75rs", "annotations": [{"segment": [28.581053159999996, 79.40908414999998], "label": "Snatch"}]}, "LbVnSPZZWj4": {"duration": 68.035, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LbVnSPZZWj4", "annotations": []}, "jUmfhYsA5r0": {"duration": 66.76, "subset": "validation", "resolution": "482x360", "url": "https://www.youtube.com/watch?v=jUmfhYsA5r0", "annotations": [{"segment": [0.01, 56.01984243369734], "label": "Playing guitarra"}]}, "aYHErLYOPrs": {"duration": 59.33, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=aYHErLYOPrs", "annotations": []}, "xhyCKLKaG0c": {"duration": 47.485, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xhyCKLKaG0c", "annotations": [{"segment": [6.830835626822023, 43.57245089161953], "label": "Rock-paper-scissors"}]}, "-nzBW4S8UqA": {"duration": 164.56, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=-nzBW4S8UqA", "annotations": []}, "mc-1msYF8Xo": {"duration": 114.34, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mc-1msYF8Xo", "annotations": [{"segment": [24.030930299999998, 98.10340132605305], "label": "Making a sandwich"}]}, "XQaaA2UZYh8": {"duration": 64.157, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=XQaaA2UZYh8", "annotations": [{"segment": [6.230377722446049, 53.55728426450212], "label": "Welding"}]}, "dI1RqUHyWYo": {"duration": 90.42, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dI1RqUHyWYo", "annotations": [{"segment": [10.579387909516381, 79.19803038], "label": "Springboard diving"}]}, "Ntyl5alZXeM": {"duration": 70.68, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=Ntyl5alZXeM", "annotations": []}, "dJVWRkiRXHM": {"duration": 72.56, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dJVWRkiRXHM", "annotations": [{"segment": [0, 0], "label": "Playing racquetball"}]}, "ApH2nZIq2tU": {"duration": 42.098, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ApH2nZIq2tU", "annotations": [{"segment": [4.5211684867394695, 26.799390015600626], "label": "Drum corps"}]}, "8qzR38WgsS8": {"duration": 126.735, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8qzR38WgsS8", "annotations": [{"segment": [69.48936221554719, 120.97425473891167], "label": "BMX"}]}, "6YNLwicEaG4": {"duration": 147.725, "subset": "testing", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=6YNLwicEaG4", "annotations": []}, "f025sQGYfJM": {"duration": 200.064, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f025sQGYfJM", "annotations": [{"segment": [13.399866912497084, 189.77951045838896], "label": "Decorating the Christmas tree"}]}, "DnHpfQKDNnc": {"duration": 72.888, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DnHpfQKDNnc", "annotations": []}, "1hiyhNqakMI": {"duration": 78.948, "subset": "validation", "resolution": "320x192", "url": "https://www.youtube.com/watch?v=1hiyhNqakMI", "annotations": [{"segment": [0, 48.649607457098284], "label": "Cutting the grass"}]}, "pniQHSjY7dc": {"duration": 85.38, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pniQHSjY7dc", "annotations": [{"segment": [24.22368618395321, 84.2516799534236], "label": "Kneeling"}]}, "IdEcXDZ4Cos": {"duration": 214.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IdEcXDZ4Cos", "annotations": [{"segment": [68.87074150519462, 72.88262965703389], "label": "Wakeboarding"}, {"segment": [129.71771180809029, 139.74743218768847], "label": "Wakeboarding"}, {"segment": [163.4844362221041, 182.2065808317464], "label": "Wakeboarding"}, {"segment": [198.25413335429695, 201.59737346316163], "label": "Wakeboarding"}]}, "tElXd9QahbU": {"duration": 16.951, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tElXd9QahbU", "annotations": []}, "P3BkKdkf43I": {"duration": 72.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P3BkKdkf43I", "annotations": [{"segment": [11.235672259543444, 62.07992652495216], "label": "Playing squash"}]}, "J8mSgO4r-kQ": {"duration": 190.799, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=J8mSgO4r-kQ", "annotations": [{"segment": [10.117307332293294, 129.15799375975038], "label": "Ice fishing"}]}, "Vh8RINfopdk": {"duration": 56.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vh8RINfopdk", "annotations": [{"segment": [14.93437887051482, 49.07010200312012], "label": "Doing nails"}]}, "pt6XC-p_CFE": {"duration": 121.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pt6XC-p_CFE", "annotations": [{"segment": [28.634985352574102, 102.02398754758191], "label": "Making a cake"}]}, "fh21bbDSVmA": {"duration": 52.32, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=fh21bbDSVmA", "annotations": []}, "74EoyLUbtgI": {"duration": 171.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=74EoyLUbtgI", "annotations": [{"segment": [2.7390901028047843, 99.52834711215058], "label": "Polishing shoes"}]}, "IAj0JIDDaOQ": {"duration": 65.13, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=IAj0JIDDaOQ", "annotations": [{"segment": [29.051368174726992, 42.43629173166927], "label": "Cleaning windows"}]}, "0WVkoTBmhA0": {"duration": 153.902, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0WVkoTBmhA0", "annotations": [{"segment": [28.615270459148366, 125.43745737503622], "label": "Sumo"}]}, "9fC5mvyEkzE": {"duration": 200.667, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9fC5mvyEkzE", "annotations": []}, "cfaBPxE-A5k": {"duration": 95.2, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cfaBPxE-A5k", "annotations": [{"segment": [9.950858448517941, 86.88436108034323], "label": "Chopping wood"}]}, "TET92DxNsJY": {"duration": 39.474, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TET92DxNsJY", "annotations": []}, "prtLoXM3Oik": {"duration": 148.933, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=prtLoXM3Oik", "annotations": []}, "VSONGdnvKiM": {"duration": 190.776, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VSONGdnvKiM", "annotations": [{"segment": [10.51639204208279, 15.863709799092137], "label": "Scuba diving"}, {"segment": [69.93103378663108, 75.57542475236316], "label": "Scuba diving"}, {"segment": [82.70518176170896, 89.53786556233202], "label": "Scuba diving"}, {"segment": [110.03591696420118, 115.08616151248778], "label": "Scuba diving"}, {"segment": [127.26616307012019, 143.62024873994966], "label": "Scuba diving"}, {"segment": [154.01890138792473, 161.44651042219263], "label": "Scuba diving"}]}, "UL_3QfD3ERM": {"duration": 90.953, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UL_3QfD3ERM", "annotations": [{"segment": [18.293648985959436, 31.34028237129485], "label": "Disc dog"}, {"segment": [36.30367550702028, 46.66616068642747], "label": "Disc dog"}, {"segment": [53.190912636505466, 77.871496099844], "label": "Disc dog"}]}, "r_j3snNLxks": {"duration": 51.432, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r_j3snNLxks", "annotations": []}, "Z-kXPcLsxBg": {"duration": 130.334, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z-kXPcLsxBg", "annotations": [{"segment": [17.486255897035882, 123.01377695007801], "label": "Slacklining"}]}, "SEZvtGus0HU": {"duration": 206.054, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SEZvtGus0HU", "annotations": [{"segment": [0, 13.501190003120126], "label": "Surfing"}]}, "Ck-9AHZNkq4": {"duration": 61.26, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ck-9AHZNkq4", "annotations": [{"segment": [9.74716439625585, 52.65379982683307], "label": "Spinning"}]}, "hNvTMSL2jOA": {"duration": 138.23, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hNvTMSL2jOA", "annotations": []}, "OmE9IhdWFa0": {"duration": 147.749, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=OmE9IhdWFa0", "annotations": [{"segment": [61.446835186915884, 70.42221560747663], "label": "Surfing"}, {"segment": [74.10442295950155, 81.00856174454827], "label": "Surfing"}, {"segment": [83.77021725856697, 89.98394216510904], "label": "Surfing"}, {"segment": [94.58670135514019, 109.31553076323988], "label": "Surfing"}, {"segment": [112.07718627725858, 116.2196695482866], "label": "Surfing"}, {"segment": [119.21146302180685, 127.26629160436137], "label": "Surfing"}, {"segment": [119.21146302180685, 127.26629160436137], "label": "Surfing"}, {"segment": [131.17863691588784, 138.5430516199377], "label": "Surfing"}]}, "puGw7gu96AM": {"duration": 54.103, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=puGw7gu96AM", "annotations": [{"segment": [0, 51.54990795631826], "label": "Calf roping"}]}, "D-ZEf_qoDIY": {"duration": 93.46, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D-ZEf_qoDIY", "annotations": []}, "LYTWYSqsTCE": {"duration": 7.617, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LYTWYSqsTCE", "annotations": []}, "xF7etZzLFbY": {"duration": 47.601, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xF7etZzLFbY", "annotations": [{"segment": [0, 47.600906], "label": "Cutting the grass"}]}, "y0VZOcxbjRM": {"duration": 76.139, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y0VZOcxbjRM", "annotations": [{"segment": [0.2, 76.101], "label": "Shuffleboard"}]}, "kNAgK0nC9Ig": {"duration": 233.663, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=kNAgK0nC9Ig", "annotations": [{"segment": [19.319980331186034, 233.66240362811791], "label": "Surfing"}]}, "Ed9xVend710": {"duration": 237.471, "subset": "training", "resolution": "634x480", "url": "https://www.youtube.com/watch?v=Ed9xVend710", "annotations": [{"segment": [28.89656340539336, 221.54031833073324], "label": "Waterskiing"}]}, "OCZqTCabREw": {"duration": 38.43, "subset": "testing", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=OCZqTCabREw", "annotations": []}, "N3UOyPU2t0E": {"duration": 196.975, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N3UOyPU2t0E", "annotations": [{"segment": [25.119594991357484, 38.90474114935412], "label": "Beer pong"}, {"segment": [58.81661893312704, 84.54889176138742], "label": "Beer pong"}, {"segment": [119.77759860960104, 157.763334689414], "label": "Beer pong"}]}, "p6ea3QpXryw": {"duration": 138.484, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=p6ea3QpXryw", "annotations": [{"segment": [13.174572542901716, 31.10063026521061], "label": "Throwing darts"}, {"segment": [56.36989235569423, 85.52673322932918], "label": "Throwing darts"}, {"segment": [102.58888455538221, 137.7930702028081], "label": "Throwing darts"}]}, "Relgb880pRI": {"duration": 127.46, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Relgb880pRI", "annotations": [{"segment": [6.957464898595944, 111.71700780031202], "label": "Preparing salad"}]}, "v8XB6emr_MU": {"duration": 180.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=v8XB6emr_MU", "annotations": []}, "K7f0co9akMI": {"duration": 191.078, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=K7f0co9akMI", "annotations": [{"segment": [0.20000000000000004, 175.54889408099686], "label": "Playing beach volleyball"}]}, "UadYaZOC6B8": {"duration": 75.26, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UadYaZOC6B8", "annotations": [{"segment": [35.439535101404054, 56.79713572542902], "label": "Washing hands"}]}, "CZGDWYYkO-U": {"duration": 232.455, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CZGDWYYkO-U", "annotations": [{"segment": [158.4387472698908, 221.52419812792513], "label": "Putting in contact lenses"}]}, "dpUDMvzlwmI": {"duration": 111.735, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dpUDMvzlwmI", "annotations": [{"segment": [0.01, 110.8773828228383], "label": "Beach soccer"}]}, "09Kr5TQ9DHQ": {"duration": 122.254, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=09Kr5TQ9DHQ", "annotations": [{"segment": [30.507581903276133, 94.76417628705148], "label": "Skiing"}]}, "YQiTt8b6N60": {"duration": 198.531, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YQiTt8b6N60", "annotations": [{"segment": [22.915872074882998, 26.322285491419656], "label": "Scuba diving"}, {"segment": [46.76076599063963, 62.244463338533535], "label": "Scuba diving"}, {"segment": [145.23708112324493, 147.4047987519501], "label": "Scuba diving"}, {"segment": [168.4626271450858, 174.3464321372855], "label": "Scuba diving"}]}, "Y-UOZRZ01hI": {"duration": 42.191, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y-UOZRZ01hI", "annotations": [{"segment": [2.497032761310453, 42.121], "label": "Welding"}]}, "7qjiHcYuq2Y": {"duration": 23.081, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7qjiHcYuq2Y", "annotations": [{"segment": [0.01, 20.11091887675507], "label": "Playing beach volleyball"}]}, "GSXP6BYeS-o": {"duration": 182.765, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GSXP6BYeS-o", "annotations": [{"segment": [27.080335413416538, 182.721], "label": "Clipping cat claws"}]}, "xlr_sSnttZo": {"duration": 188.38, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xlr_sSnttZo", "annotations": [{"segment": [0.01, 166.63563088455538], "label": "Brushing teeth"}]}, "liK3LME0gNY": {"duration": 236.565, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=liK3LME0gNY", "annotations": [{"segment": [0.2, 234.71964044276055], "label": "Plastering"}]}, "A_Sr0mCX2rI": {"duration": 32.137, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=A_Sr0mCX2rI", "annotations": []}, "OHOi0ZSuDUg": {"duration": 224.166, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=OHOi0ZSuDUg", "annotations": [{"segment": [9.791938191813388, 217.87062459594384], "label": "Playing congas"}]}, "w1qa9NOiFbE": {"duration": 142.76, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=w1qa9NOiFbE", "annotations": [{"segment": [20.93461513154404, 25.834205907011793], "label": "Hand washing clothes"}, {"segment": [37.41505683084467, 40.53297823341506], "label": "Hand washing clothes"}, {"segment": [44.09631697920979, 57.013419932715685], "label": "Hand washing clothes"}, {"segment": [60.57675867851041, 93.3149334054995], "label": "Hand washing clothes"}, {"segment": [105.56390997191887, 117.8128868673947], "label": "Hand washing clothes"}, {"segment": [132.73436781279253, 134.96145452106083], "label": "Hand washing clothes"}]}, "zKYiTA6WBS4": {"duration": 87.167, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zKYiTA6WBS4", "annotations": []}, "iJzt1rvGiOc": {"duration": 189.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iJzt1rvGiOc", "annotations": [{"segment": [27.127787107644306, 42.46088416848674], "label": "Cleaning shoes"}, {"segment": [65.4605297597504, 173.3819436879875], "label": "Cleaning shoes"}]}, "h1bdAd1cXSw": {"duration": 27.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h1bdAd1cXSw", "annotations": [{"segment": [0, 15.790150416536662], "label": "Springboard diving"}]}, "sNTOUuE7ZfM": {"duration": 146.52, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sNTOUuE7ZfM", "annotations": []}, "otq24Pdm3sc": {"duration": 211.14, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=otq24Pdm3sc", "annotations": [{"segment": [0.01, 211.101], "label": "Tug of war"}]}, "tHhdS8t9ytI": {"duration": 106.21000000000001, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tHhdS8t9ytI", "annotations": [{"segment": [28.48625897035881, 58.95993135725429], "label": "Drinking coffee"}]}, "5bZ2FdbAKiQ": {"duration": 110.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5bZ2FdbAKiQ", "annotations": [{"segment": [0, 110.2018820861678], "label": "Using the balance beam"}]}, "gik__XL3xL4": {"duration": 139.436, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gik__XL3xL4", "annotations": []}, "Ni7Lqloy5Qc": {"duration": 217.94, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Ni7Lqloy5Qc", "annotations": [{"segment": [122.4014008723614, 205.36235035251747], "label": "Getting a tattoo"}]}, "1o-Fx2dGfpc": {"duration": 77.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1o-Fx2dGfpc", "annotations": [{"segment": [28.21257028393136, 54.35784015912636], "label": "Playing violin"}, {"segment": [59.34368232137285, 71.13945524180969], "label": "Playing violin"}]}, "ynpvos7UFZo": {"duration": 200.621, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ynpvos7UFZo", "annotations": [{"segment": [2.8080566223384316, 9.672206670123227], "label": "Knitting"}, {"segment": [31.512684094893032, 38.06482732232398], "label": "Knitting"}, {"segment": [48.36105239400117, 63.96139341169389], "label": "Knitting"}, {"segment": [77.37768668690963, 81.43377535150974], "label": "Knitting"}, {"segment": [87.98591857894068, 98.90615729132557], "label": "Knitting"}, {"segment": [107.6423482612335, 108.57836872229505], "label": "Knitting"}, {"segment": [133.5389143506034, 149.13925536829612], "label": "Knitting"}, {"segment": [159.74748607585676, 166.92364289078674], "label": "Knitting"}]}, "DvYEsBlxfhM": {"duration": 223.66, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DvYEsBlxfhM", "annotations": []}, "06xJ8-Dg_j8": {"duration": 124.181, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=06xJ8-Dg_j8", "annotations": [{"segment": [0, 116.30053198127925], "label": "Playing drums"}]}, "WXkuikumRDg": {"duration": 65.295, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=WXkuikumRDg", "annotations": []}, "rlhs3UzfcVE": {"duration": 184.32, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=rlhs3UzfcVE", "annotations": []}, "cip9fHzktRE": {"duration": 14.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cip9fHzktRE", "annotations": []}, "-Cefl3S_L-w": {"duration": 82.919, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-Cefl3S_L-w", "annotations": [{"segment": [0.01, 82.861], "label": "Tug of war"}]}, "MtmQjudesdM": {"duration": 26.12, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MtmQjudesdM", "annotations": [{"segment": [6.505085803432138, 10.245510140405615], "label": "High jump"}]}, "qVqlImNflY8": {"duration": 97.59, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=qVqlImNflY8", "annotations": [{"segment": [13.185790230000002, 91.23124222620905], "label": "Bathing dog"}]}, "S_CnkpTkjYQ": {"duration": 160.265, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=S_CnkpTkjYQ", "annotations": [{"segment": [12.2493120124805, 62.74647581903277], "label": "Playing congas"}, {"segment": [66.49626521060843, 83.99528237129486], "label": "Playing congas"}, {"segment": [120.49323244929798, 160.241], "label": "Playing congas"}]}, "le7JdKQB65I": {"duration": 143.82999999999998, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=le7JdKQB65I", "annotations": []}, "lGPUCwHjiK0": {"duration": 179.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lGPUCwHjiK0", "annotations": [{"segment": [18.20103933409037, 147.00839344773792], "label": "High jump"}]}, "G8OyFOhVGCI": {"duration": 143.5, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=G8OyFOhVGCI", "annotations": [{"segment": [55.519158627569595, 67.16027253335031], "label": "Getting a piercing"}, {"segment": [113.94859442433697, 123.35103247113884], "label": "Getting a piercing"}]}, "aO_1pB_MXUw": {"duration": 69.962, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aO_1pB_MXUw", "annotations": []}, "Z86tpjRaiK8": {"duration": 231.526, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z86tpjRaiK8", "annotations": [{"segment": [0.01, 204.79771625897035], "label": "Doing fencing"}]}, "0HhNhRExwSQ": {"duration": 133.86, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0HhNhRExwSQ", "annotations": [{"segment": [55.793611000640986, 78.69397502863475], "label": "Rock climbing"}]}, "G4-qjGC_3VA": {"duration": 84.8, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G4-qjGC_3VA", "annotations": []}, "f6wAW0Jv2Eo": {"duration": 223.56, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f6wAW0Jv2Eo", "annotations": [{"segment": [57.03573790951638, 187.15396567862715], "label": "Kayaking"}]}, "i4SvqrGYH-Q": {"duration": 172.084, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i4SvqrGYH-Q", "annotations": [{"segment": [4.2947644305772235, 23.621204368174727], "label": "Beer pong"}, {"segment": [51.2687503900156, 66.30042589703588], "label": "Beer pong"}, {"segment": [71.40045865834634, 82.13736973478939], "label": "Beer pong"}, {"segment": [106.83226521060843, 119.98498127925116], "label": "Beer pong"}, {"segment": [134.74823400936037, 145.21672230889234], "label": "Beer pong"}]}, "GG9FU9MRkeA": {"duration": 120.28, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=GG9FU9MRkeA", "annotations": []}, "eJvOgEahkys": {"duration": 120.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eJvOgEahkys", "annotations": [{"segment": [6.197009880395216, 46.00810365747963], "label": "Ballet"}, {"segment": [57.650970705494885, 116.42866929797191], "label": "Ballet"}]}, "LCt-h6opmyE": {"duration": 57.563, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LCt-h6opmyE", "annotations": []}, "jjuitTS2cuM": {"duration": 36.06, "subset": "training", "resolution": "482x360", "url": "https://www.youtube.com/watch?v=jjuitTS2cuM", "annotations": [{"segment": [0.688591797623679, 35.69965721818443], "label": "Walking the dog"}]}, "QbXjUvFrvTA": {"duration": 119.003, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QbXjUvFrvTA", "annotations": []}, "mi6wetIKq2w": {"duration": 139.994, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mi6wetIKq2w", "annotations": [{"segment": [22.268302652106083, 36.240570982839316], "label": "Baking cookies"}, {"segment": [38.64205460218408, 72.04450858034322], "label": "Baking cookies"}, {"segment": [74.6643088923557, 88.21254914196568], "label": "Baking cookies"}, {"segment": [92.79785491419658, 103.06020592823714], "label": "Baking cookies"}, {"segment": [103.71524960998441, 117.68951482059283], "label": "Baking cookies"}]}, "AjqnGUbsduw": {"duration": 148.144, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AjqnGUbsduw", "annotations": []}, "81F42Yyw_iY": {"duration": 66.67, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=81F42Yyw_iY", "annotations": [{"segment": [4.472005476137412, 5.824007131713841], "label": "Pole vault"}, {"segment": [11.23201375401955, 54.704066987169284], "label": "Pole vault"}]}, "CD8FFj8AJE0": {"duration": 119.05, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=CD8FFj8AJE0", "annotations": [{"segment": [79.11778187644306, 84.88955736], "label": "Getting a tattoo"}]}, "9IvKkq9k81o": {"duration": 49.436, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9IvKkq9k81o", "annotations": [{"segment": [5.398548894336726, 35.16768891107644], "label": "Carving jack-o-lanterns"}]}, "krs42Q-HhQY": {"duration": 167.649, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=krs42Q-HhQY", "annotations": []}, "aZQWCEejbk8": {"duration": 21.85, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aZQWCEejbk8", "annotations": [{"segment": [10.228382063134461, 21.65], "label": "Fun sliding down"}]}, "cVuHOF56B64": {"duration": 40.334, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cVuHOF56B64", "annotations": [{"segment": [14.286187967824501, 34.766826693681196], "label": "Beer pong"}]}, "8WFB_LUOx4k": {"duration": 153.716, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8WFB_LUOx4k", "annotations": [{"segment": [15.701709048361934, 16.660592043681746], "label": "Croquet"}, {"segment": [72.27580577223088, 74.19357176287052], "label": "Croquet"}, {"segment": [78.74826599063962, 81.86463572542903], "label": "Croquet"}, {"segment": [88.81653744149767, 92.66412870514822], "label": "Croquet"}]}, "lgB0Ynn38-k": {"duration": 107.091, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lgB0Ynn38-k", "annotations": [{"segment": [5.403078759131031, 39.255700254458134], "label": "Beer pong"}, {"segment": [58.099770347916085, 68.60575632922449], "label": "Beer pong"}, {"segment": [73.27508343202823, 97.2887656750189], "label": "Beer pong"}]}, "OnfvTk_DECY": {"duration": 218.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OnfvTk_DECY", "annotations": [{"segment": [38.60916416, 198.44934867394693], "label": "Getting a haircut"}]}, "CQ4dPckD_Xc": {"duration": 105.164, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CQ4dPckD_Xc", "annotations": [{"segment": [4.92183336973479, 10.828033413416538], "label": "Playing ten pins"}, {"segment": [33.46846691419657, 49.218333697347894], "label": "Playing ten pins"}, {"segment": [65.78850604212168, 73.66343943369735], "label": "Playing ten pins"}, {"segment": [92.03828401404056, 103.35850076443059], "label": "Playing ten pins"}]}, "UeuZeIASSPw": {"duration": 129.03, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UeuZeIASSPw", "annotations": [{"segment": [10.064999062547537, 15.902698518825106], "label": "Triple jump"}, {"segment": [27.779397412631198, 33.41579688765783], "label": "Triple jump"}, {"segment": [42.272996062699654, 53.34449503150194], "label": "Triple jump"}, {"segment": [61.59779426279092, 67.4354937190685], "label": "Triple jump"}, {"segment": [78.30569270661982, 89.37719161872076], "label": "Triple jump"}, {"segment": [101.05259052386896, 108.50068982542902], "label": "Triple jump"}]}, "jsxrJJkUl2E": {"duration": 223.864, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jsxrJJkUl2E", "annotations": [{"segment": [76.46926521060843, 94.27717628705147], "label": "Rock-paper-scissors"}]}, "9Tip00S7cFY": {"duration": 60.81, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=9Tip00S7cFY", "annotations": [{"segment": [25.498346333853355, 41.51775351014041], "label": "Playing racquetball"}]}, "GcE-0A4Titg": {"duration": 119.096, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GcE-0A4Titg", "annotations": [{"segment": [8.70517658721237, 10.742559003112891], "label": "Welding"}, {"segment": [17.22513941734183, 31.857249495172855], "label": "Welding"}, {"segment": [51.30499073785967, 63.158852066735435], "label": "Welding"}, {"segment": [67.23361689853648, 84.273541823897], "label": "Welding"}, {"segment": [92.79350457972905, 96.31261963105098], "label": "Welding"}]}, "BwwjQZDn3NE": {"duration": 65.968, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BwwjQZDn3NE", "annotations": [{"segment": [0.8233122141212179, 55.676487503900155], "label": "Hand car wash"}]}, "T3dcZ5GT1qQ": {"duration": 95.062, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T3dcZ5GT1qQ", "annotations": []}, "C6YSkEP3t9U": {"duration": 159.452, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C6YSkEP3t9U", "annotations": []}, "nXr1r26HZis": {"duration": 47.276, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nXr1r26HZis", "annotations": [{"segment": [0, 41.492485179407176], "label": "Table soccer"}]}, "tETVZtWqdDs": {"duration": 93.159, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tETVZtWqdDs", "annotations": [{"segment": [35.38851850672666, 39.893832977808906], "label": "Applying sunscreen"}]}, "cA2ZLOZcCyc": {"duration": 20.23, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=cA2ZLOZcCyc", "annotations": [{"segment": [0.010000000000000002, 18.48924070826833], "label": "Doing motocross"}]}, "85DumZo8-ik": {"duration": 18.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=85DumZo8-ik", "annotations": [{"segment": [5.821180967238689, 12.144697347893915], "label": "Triple jump"}]}, "TbFqsEFrCB4": {"duration": 197.858, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TbFqsEFrCB4", "annotations": [{"segment": [120.68950764289076, 129.94957216792073], "label": "Swimming"}, {"segment": [162.35979800552568, 167.2984990702028], "label": "Swimming"}]}, "zxm4KYn7r8c": {"duration": 195.05, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=zxm4KYn7r8c", "annotations": [{"segment": [20.086446177847115, 25.564567862714508], "label": "Archery"}]}, "mdv8UW_871g": {"duration": 123.314, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mdv8UW_871g", "annotations": [{"segment": [0.01, 113.79339655478645], "label": "Wakeboarding"}]}, "oeSyeZqlC1g": {"duration": 81.46000000000001, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oeSyeZqlC1g", "annotations": [{"segment": [12.707567862714509, 79.04107210608424], "label": "Kayaking"}]}, "GhvNhbL2ivg": {"duration": 150.652, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GhvNhbL2ivg", "annotations": [{"segment": [0.01, 149.49552466826566], "label": "Playing congas"}]}, "SMLSQG3tsro": {"duration": 103.654, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SMLSQG3tsro", "annotations": []}, "K1G84xfLr9s": {"duration": 126.97, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K1G84xfLr9s", "annotations": [{"segment": [7.328811711388456, 124.05413577862917], "label": "Doing karate"}]}, "RHtpBRwZ9hM": {"duration": 138.531, "subset": "validation", "resolution": "852x476", "url": "https://www.youtube.com/watch?v=RHtpBRwZ9hM", "annotations": [{"segment": [20.955783151326052, 46.664424336973475], "label": "Curling"}, {"segment": [74.96553354134164, 81.67453666146646], "label": "Curling"}]}, "Y03qa1Qlnbc": {"duration": 52.5, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=Y03qa1Qlnbc", "annotations": []}, "B8WIh6PUjE0": {"duration": 107.718, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B8WIh6PUjE0", "annotations": [{"segment": [28.720797191887677, 94.07388455538222], "label": "Spread mulch"}]}, "PJgy8J1f3jg": {"duration": 186.02, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PJgy8J1f3jg", "annotations": [{"segment": [0.01, 160.76810277191888], "label": "Paintball"}]}, "bTC_MdhURLg": {"duration": 68.337, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bTC_MdhURLg", "annotations": [{"segment": [13.954463338533541, 56.047310452418095], "label": "Playing blackjack"}]}, "8tCRp3NKtkI": {"duration": 89.05, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8tCRp3NKtkI", "annotations": []}, "y1jyeWW-G4E": {"duration": 104.56, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y1jyeWW-G4E", "annotations": [{"segment": [13.538899890689507, 18.4324781644327], "label": "Building sandcastles"}, {"segment": [49.42514056480626, 55.623672096723865], "label": "Building sandcastles"}]}, "hUzsmIWojH0": {"duration": 117.38, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hUzsmIWojH0", "annotations": [{"segment": [9.885285491419657, 112.94853978159126], "label": "High jump"}]}, "RW-nnJiVPsU": {"duration": 161.054, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=RW-nnJiVPsU", "annotations": [{"segment": [24.62286341029641, 49.99948794539782], "label": "Welding"}, {"segment": [90.20008126833073, 121.8580485101404], "label": "Welding"}]}, "k1WnO7UeBJ8": {"duration": 82.083, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k1WnO7UeBJ8", "annotations": [{"segment": [15.878681623455416, 67.3563416349454], "label": "Installing carpet"}]}, "GZ30mkTroI8": {"duration": 119.33, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GZ30mkTroI8", "annotations": []}, "4mRkWNLE66E": {"duration": 195.19, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=4mRkWNLE66E", "annotations": []}, "Bkhr72sazvQ": {"duration": 212.74, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Bkhr72sazvQ", "annotations": []}, "MkL-tApJgXc": {"duration": 67.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MkL-tApJgXc", "annotations": [{"segment": [13.17417472698908, 20.493160686427455], "label": "Drinking coffee"}]}, "crSMHcUkPcA": {"duration": 90.326, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=crSMHcUkPcA", "annotations": [{"segment": [14.514105202808112, 77.5025035101404], "label": "Hanging wallpaper"}]}, "FhGOGbL-A84": {"duration": 47.95, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=FhGOGbL-A84", "annotations": [{"segment": [0.2, 47.74920634920635], "label": "Blowing leaves"}]}, "sVw0kD_9vxA": {"duration": 138.72, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sVw0kD_9vxA", "annotations": []}, "sPSfixKrDc0": {"duration": 130.775, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sPSfixKrDc0", "annotations": [{"segment": [28.721564837773116, 95.33115022429907], "label": "Making a cake"}]}, "KqmxWapIzSY": {"duration": 237.82, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KqmxWapIzSY", "annotations": []}, "ySvPn8Yb7Xw": {"duration": 24.92, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ySvPn8Yb7Xw", "annotations": []}, "d28kk1Lzusk": {"duration": 28.7, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=d28kk1Lzusk", "annotations": []}, "NpBZn7OHUKo": {"duration": 119.839, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NpBZn7OHUKo", "annotations": [{"segment": [30.65111388455538, 62.42360998439938], "label": "Kneeling"}]}, "Koxtbgzexmw": {"duration": 12.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Koxtbgzexmw", "annotations": [{"segment": [0.01, 12.724534], "label": "Ping-pong"}]}, "xC5RVs9mXyM": {"duration": 57.331, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xC5RVs9mXyM", "annotations": [{"segment": [8.31777883151326, 57.330068027210885], "label": "Drum corps"}]}, "2RfasAe0-tA": {"duration": 85.75, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2RfasAe0-tA", "annotations": [{"segment": [7.625311959417152, 82.00554791443358], "label": "Cumbia"}]}, "gLfIPN_WM48": {"duration": 192.192, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gLfIPN_WM48", "annotations": [{"segment": [14.48919664620064, 158.48306786738445], "label": "Baking cookies"}]}, "OUpTPRtEITY": {"duration": 175.82, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=OUpTPRtEITY", "annotations": [{"segment": [13.988887067082683, 18.37755751950078], "label": "Playing badminton"}]}, "H33jSILKmfI": {"duration": 15.05, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=H33jSILKmfI", "annotations": []}, "rMes9poepAI": {"duration": 219.36, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=rMes9poepAI", "annotations": [{"segment": [5.817631039935475, 14.715184395130906], "label": "Vacuuming floor"}, {"segment": [104.3751451282541, 110.87720334935847], "label": "Vacuuming floor"}]}, "K8XNOs0AwaE": {"duration": 139.3, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K8XNOs0AwaE", "annotations": [{"segment": [0, 35.421588741029645], "label": "Doing motocross"}]}, "-5xWaBSwnjY": {"duration": 143.337, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-5xWaBSwnjY", "annotations": [{"segment": [16.99468842971406, 143.336779], "label": "Disc dog"}]}, "2VoWT4gnQDg": {"duration": 130.751, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=2VoWT4gnQDg", "annotations": [{"segment": [53.22649141965679, 87.89508736349454], "label": "Roof shingle removal"}]}, "35sykahTJV0": {"duration": 56.262, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=35sykahTJV0", "annotations": []}, "TJ98vfFChfI": {"duration": 126.76, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=TJ98vfFChfI", "annotations": [{"segment": [80.05666249999999, 104.64086175], "label": "Chopping wood"}, {"segment": [2.9662495553822157, 78.11123829173167], "label": "Chopping wood"}]}, "sYKtWOfH6VQ": {"duration": 115.72999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sYKtWOfH6VQ", "annotations": [{"segment": [36.2749416, 90.49119504], "label": "Washing hands"}, {"segment": [16.332355694227772, 18.678439937597503], "label": "Washing hands"}, {"segment": [20.844056162246492, 23.009672386895478], "label": "Washing hands"}, {"segment": [28.965117004680188, 33.83775351014041], "label": "Washing hands"}, {"segment": [35.46196567862715, 38.71039001560063], "label": "Washing hands"}, {"segment": [43.76349453978159, 46.83145085803432], "label": "Washing hands"}, {"segment": [52.245491419656794, 54.591575663026525], "label": "Washing hands"}, {"segment": [61.44936037441498, 64.15638065522622], "label": "Washing hands"}, {"segment": [67.76574102964119, 70.29229329173168], "label": "Washing hands"}, {"segment": [72.63837753510141, 77.15007800312013], "label": "Washing hands"}, {"segment": [78.41335413416537, 81.30084243369735], "label": "Washing hands"}]}, "qRT0Z6Sz_Ck": {"duration": 218.709, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=qRT0Z6Sz_Ck", "annotations": [{"segment": [22.17795465560119, 218.70875283446713], "label": "Rollerblading"}]}, "isGfZVCL4gE": {"duration": 139.23, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=isGfZVCL4gE", "annotations": [{"segment": [12.577223088923557, 33.88360618408736], "label": "Ironing clothes"}]}, "ej_fSoVO0R8": {"duration": 229.716, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ej_fSoVO0R8", "annotations": []}, "_Boy9qJLibI": {"duration": 171.11, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_Boy9qJLibI", "annotations": [{"segment": [32.55382527301092, 53.10009204368175], "label": "Paintball"}, {"segment": [79.7835553822153, 133.68415132605304], "label": "Paintball"}]}, "nPlMYHjlonk": {"duration": 55.984, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nPlMYHjlonk", "annotations": []}, "s2VpBgSWIPg": {"duration": 203.29, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=s2VpBgSWIPg", "annotations": [{"segment": [10.465824020715932, 15.22301675740499], "label": "Hopscotch"}, {"segment": [29.177448785026233, 49.474804461566215], "label": "Hopscotch"}, {"segment": [89.75236963220026, 140.81290500599616], "label": "Hopscotch"}]}, "IbYlWVA8Dh8": {"duration": 86.193, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IbYlWVA8Dh8", "annotations": [{"segment": [31.098845124373582, 44.23542635274262], "label": "Assembling bicycle"}]}, "amgpZtvd9h0": {"duration": 222.68, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=amgpZtvd9h0", "annotations": []}, "PLnfT1PoVHw": {"duration": 112.69, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=PLnfT1PoVHw", "annotations": [{"segment": [37.26914954134165, 47.4654263026521], "label": "Shaving legs"}, {"segment": [73.30771395631825, 91.59069297659907], "label": "Shaving legs"}]}, "D84XQFQ4tsU": {"duration": 76.07, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=D84XQFQ4tsU", "annotations": [{"segment": [10.2729834, 28.24328772], "label": "Hand washing clothes"}, {"segment": [54.47031767550701, 65.12257548], "label": "Hand washing clothes"}]}, "tQmsDeu1d6M": {"duration": 131.913, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=tQmsDeu1d6M", "annotations": [{"segment": [86.46959657320872, 128.38882398753896], "label": "Building sandcastles"}]}, "kBTqTp0SKq8": {"duration": 39.521, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kBTqTp0SKq8", "annotations": []}, "TGvY7GtyTK4": {"duration": 106.301, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TGvY7GtyTK4", "annotations": [{"segment": [0, 10.28183925429017], "label": "Kneeling"}, {"segment": [14.261906062402495, 62.68605222776911], "label": "Kneeling"}, {"segment": [72.8020553650546, 73.96290818408735], "label": "Kneeling"}, {"segment": [78.44048334321373, 86.2347808424337], "label": "Kneeling"}]}, "wmCcT0gcUxw": {"duration": 170.06, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wmCcT0gcUxw", "annotations": []}, "4WrU5OdkvY0": {"duration": 81.131, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4WrU5OdkvY0", "annotations": [{"segment": [0.01, 75.00941185647426], "label": "Doing fencing"}]}, "VOxaxr5LPiQ": {"duration": 226.604, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VOxaxr5LPiQ", "annotations": [{"segment": [173.54360530421218, 189.44882371294852], "label": "Hula hoop"}]}, "zSRhiHJP0QY": {"duration": 110.017, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zSRhiHJP0QY", "annotations": []}, "KMBeFIRSzMY": {"duration": 180.211, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KMBeFIRSzMY", "annotations": []}, "6vylz7u_tHw": {"duration": 123.252, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6vylz7u_tHw", "annotations": [{"segment": [98.59924024960998, 120.70238377535101], "label": "Camel ride"}]}, "W74AfHdwWhM": {"duration": 107.764, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=W74AfHdwWhM", "annotations": [{"segment": [32.345899834758946, 35.195020770847215], "label": "Having an ice cream"}, {"segment": [65.52977897272828, 71.22801983772129], "label": "Having an ice cream"}]}, "UCndppz9rWc": {"duration": 11.52, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=UCndppz9rWc", "annotations": [{"segment": [0.5390152886115445, 11.445091294851794], "label": "Mowing the lawn"}]}, "dAdCETrsDQw": {"duration": 207.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dAdCETrsDQw", "annotations": [{"segment": [82.61792161482377, 99.78948963672832], "label": "Playing polo"}, {"segment": [133.16065013212773, 143.20439746569454], "label": "Playing polo"}]}, "Gl3okOVEwgg": {"duration": 129.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Gl3okOVEwgg", "annotations": [{"segment": [30.53003276131045, 128.18570046801872], "label": "Rock climbing"}]}, "VvD2fdPNWEg": {"duration": 188.29, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VvD2fdPNWEg", "annotations": [{"segment": [10.638385, 173.4129300101404], "label": "Triple jump"}]}, "Ct9HYiWaXt0": {"duration": 38.69, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Ct9HYiWaXt0", "annotations": []}, "lFlQ_xWVt8M": {"duration": 126.015, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lFlQ_xWVt8M", "annotations": [{"segment": [58.205850079577594, 82.70326298782021], "label": "Welding"}]}, "AIJpVd5JLdY": {"duration": 215.41, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AIJpVd5JLdY", "annotations": []}, "FNLPN4yftWc": {"duration": 191.936, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FNLPN4yftWc", "annotations": []}, "oA_uJ9gLvUQ": {"duration": 70.705, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=oA_uJ9gLvUQ", "annotations": [{"segment": [24.258689547581902, 40.02683775351014], "label": "Sharpening knives"}]}, "VEDRmPt_-Ms": {"duration": 232.07999999999998, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=VEDRmPt_-Ms", "annotations": [{"segment": [15.568780241809671, 21.723879407176288], "label": "Tumbling"}, {"segment": [21.723879407176288, 27.878978572542902], "label": "Tumbling"}, {"segment": [29.32723719968799, 34.75820705148206], "label": "Tumbling"}, {"segment": [34.75820705148206, 39.82711224648986], "label": "Tumbling"}, {"segment": [40.189176903276135, 49.24079332293292], "label": "Tumbling"}, {"segment": [49.60285797971919, 55.757957145085804], "label": "Tumbling"}, {"segment": [57.56828042901716, 64.80957356474259], "label": "Tumbling"}, {"segment": [65.17163822152887, 77.4818365522621], "label": "Tumbling"}, {"segment": [77.4818365522621, 85.80932365834634], "label": "Tumbling"}, {"segment": [85.44725900156007, 93.41268145085805], "label": "Tumbling"}, {"segment": [93.05061679407176, 98.8436513026521], "label": "Tumbling"}, {"segment": [93.41268145085805, 108.98146169266771], "label": "Tumbling"}, {"segment": [108.98146169266771, 118.75720742589704], "label": "Tumbling"}, {"segment": [130.70534109984402, 136.86044026521063], "label": "Tumbling"}, {"segment": [137.94663423556943, 149.17063859594387], "label": "Tumbling"}, {"segment": [149.53270325273013, 158.94638432917318], "label": "Tumbling"}, {"segment": [159.30844898595944, 166.5497421216849], "label": "Tumbling"}, {"segment": [166.5497421216849, 177.7737464820593], "label": "Tumbling"}, {"segment": [177.41168182527304, 190.80807412636506], "label": "Tumbling"}, {"segment": [190.4460094695788, 200.94588451638066], "label": "Tumbling"}]}, "4-8qvijRPCM": {"duration": 129.96, "subset": "testing", "resolution": "1920x1440", "url": "https://www.youtube.com/watch?v=4-8qvijRPCM", "annotations": []}, "Dys_KC7uIzs": {"duration": 234.267, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Dys_KC7uIzs", "annotations": [{"segment": [13.522381483014422, 74.55583304148492], "label": "Playing ten pins"}, {"segment": [89.17462383393294, 223.30202797347894], "label": "Playing ten pins"}]}, "MMVfzKCnpnI": {"duration": 83.151, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MMVfzKCnpnI", "annotations": [{"segment": [10.50734009204368, 82.95068027210884], "label": "Baton twirling"}]}, "ayDqRzRN8_M": {"duration": 124.924, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ayDqRzRN8_M", "annotations": [{"segment": [28.20975770419614, 34.62106640808205], "label": "Curling"}, {"segment": [42.97519593132733, 50.35791504489292], "label": "Curling"}, {"segment": [67.84330204722256, 81.05448354644496], "label": "Curling"}]}, "MWnYL4JiMP0": {"duration": 208.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MWnYL4JiMP0", "annotations": [{"segment": [12.839143224648986, 204.93872919344773], "label": "Zumba"}]}, "of4bEaqQzOk": {"duration": 199.14, "subset": "training", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=of4bEaqQzOk", "annotations": [{"segment": [8.54581752, 142.01175849453978], "label": "Snatch"}]}, "n2sTs3k9G-U": {"duration": 133.585, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n2sTs3k9G-U", "annotations": [{"segment": [11.319301331828061, 131.3787992227938], "label": "Assembling bicycle"}]}, "gjz9pSK0Y9I": {"duration": 124.39, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gjz9pSK0Y9I", "annotations": [{"segment": [0.01, 124.321], "label": "Putting on shoes"}]}, "O2Vd29Slt7g": {"duration": 70.589, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O2Vd29Slt7g", "annotations": [{"segment": [5.497557590014199, 57.284548906542064], "label": "Capoeira"}]}, "--veKG73Di4": {"duration": 115.45, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=--veKG73Di4", "annotations": [{"segment": [33.50020015494497, 109.86624771294852], "label": "Playing congas"}]}, "kK9k01CtXnE": {"duration": 80.411, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kK9k01CtXnE", "annotations": [{"segment": [6.127717020265024, 73.157458768321], "label": "Fixing the roof"}]}, "q0L4clHNIbc": {"duration": 126.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q0L4clHNIbc", "annotations": [{"segment": [9.581965678627146, 105.32080192], "label": "Shoveling snow"}]}, "Nogr8GCIL9k": {"duration": 118.376, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Nogr8GCIL9k", "annotations": [{"segment": [30.45704368174727, 72.92464118564742], "label": "Wakeboarding"}]}, "Bm6HrokioXU": {"duration": 105.97999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Bm6HrokioXU", "annotations": [{"segment": [15.872199687987521, 59.19007800312013], "label": "Dodgeball"}]}, "kmtuO9Xdp-Y": {"duration": 207.12, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=kmtuO9Xdp-Y", "annotations": [{"segment": [13.245430577223088, 206.4548190327613], "label": "Playing guitarra"}]}, "FAPMunnTNsE": {"duration": 92.973, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=FAPMunnTNsE", "annotations": [{"segment": [0.01, 92.9726984126984], "label": "Canoeing"}]}, "vt81bZ6_GcQ": {"duration": 215.92000000000002, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=vt81bZ6_GcQ", "annotations": [{"segment": [11.61928783151326, 196.51752028081123], "label": "Playing water polo"}]}, "gUR1wXosHMo": {"duration": 50.41, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gUR1wXosHMo", "annotations": [{"segment": [2.908396255850234, 9.039609984399377], "label": "Drinking beer"}, {"segment": [0.2, 50.186], "label": "Drinking beer"}, {"segment": [0.2, 50.186], "label": "Drinking beer"}, {"segment": [0.2, 50.186], "label": "Drinking beer"}, {"segment": [39.302652106084246, 43.94036505460218], "label": "Drinking beer"}]}, "HXCLDIk4pgI": {"duration": 119.82, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HXCLDIk4pgI", "annotations": [{"segment": [6.915996476593758, 118.5065342205525], "label": "Shoveling snow"}]}, "GA8XzZTETPE": {"duration": 158.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GA8XzZTETPE", "annotations": [{"segment": [5.926630795844078, 71.61345461778471], "label": "Ballet"}, {"segment": [76.05842766302652, 156.31488542433698], "label": "Ballet"}]}, "arhhcCWkAoM": {"duration": 76.28, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=arhhcCWkAoM", "annotations": [{"segment": [0, 3.331937696555481], "label": "Hammer throw"}, {"segment": [6.187884293603037, 7.020868717741907], "label": "Hammer throw"}, {"segment": [9.162828665527574, 11.423786388190223], "label": "Hammer throw"}, {"segment": [23.79955444617785, 26.298507663026523], "label": "Hammer throw"}, {"segment": [64.49679254914197, 74.01661432761311], "label": "Hammer throw"}]}, "DEVD2oszH48": {"duration": 86.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DEVD2oszH48", "annotations": [{"segment": [20.965227957098286, 72.8015853913068], "label": "Tumbling"}]}, "MAT310OaVLI": {"duration": 128.082, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MAT310OaVLI", "annotations": []}, "Woelo4XJkHE": {"duration": 75.93, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Woelo4XJkHE", "annotations": []}, "vrwJEvpeHyM": {"duration": 134.75, "subset": "validation", "resolution": "636x480", "url": "https://www.youtube.com/watch?v=vrwJEvpeHyM", "annotations": [{"segment": [6.306336824901567, 29.009149394547208], "label": "Discus throw"}, {"segment": [33.21337394448159, 58.018298789094416], "label": "Discus throw"}, {"segment": [62.64294579402224, 72.52287348636803], "label": "Discus throw"}, {"segment": [74.41477407800312, 85.34575784087365], "label": "Discus throw"}, {"segment": [87.44787010296412, 103.0035008424337], "label": "Discus throw"}, {"segment": [117.71828667706708, 133.69433986895476], "label": "Discus throw"}]}, "8rypSctbfjU": {"duration": 226.57999999999998, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8rypSctbfjU", "annotations": [{"segment": [18.91588065522621, 209.13539079563185], "label": "Painting"}]}, "-DTxZliHCTE": {"duration": 46.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-DTxZliHCTE", "annotations": [{"segment": [0, 2.1952093759750393], "label": "Mowing the lawn"}, {"segment": [8.634490212168487, 20.781315425897038], "label": "Mowing the lawn"}, {"segment": [24.879039594383777, 28.24502730421217], "label": "Mowing the lawn"}, {"segment": [38.269816787831516, 46.704307], "label": "Mowing the lawn"}]}, "TW1qn3wVAtw": {"duration": 221.31, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TW1qn3wVAtw", "annotations": []}, "6Lh3yNFvskc": {"duration": 54.43, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=6Lh3yNFvskc", "annotations": [{"segment": [5.7739077940717625, 49.163126658346336], "label": "Layup drill in basketball"}]}, "i9DEbIo8HGs": {"duration": 69.5, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i9DEbIo8HGs", "annotations": []}, "3Wjig_NRlnU": {"duration": 110.97, "subset": "training", "resolution": "1280x544", "url": "https://www.youtube.com/watch?v=3Wjig_NRlnU", "annotations": [{"segment": [27.35252698271196, 29.42993375975039], "label": "Tai chi"}, {"segment": [57.821163975039, 59.03298477691108], "label": "Tai chi"}]}, "rG-WlnAoc3M": {"duration": 129.823, "subset": "training", "resolution": "1444x1080", "url": "https://www.youtube.com/watch?v=rG-WlnAoc3M", "annotations": [{"segment": [0.2, 104.10781152647976], "label": "Hand car wash"}]}, "AZaSkc1gnhY": {"duration": 189.87, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=AZaSkc1gnhY", "annotations": [{"segment": [16.29146059282371, 20.438377834633382], "label": "Drinking coffee"}, {"segment": [75.53313547581902, 90.04734582215288], "label": "Drinking coffee"}, {"segment": [99.52601380343214, 101.007055675507], "label": "Drinking coffee"}]}, "c7HroaL0WDc": {"duration": 143.453, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c7HroaL0WDc", "annotations": [{"segment": [12.716659347966766, 13.609056734085549], "label": "Wakeboarding"}, {"segment": [82.32365546523172, 83.88535089093959], "label": "Wakeboarding"}, {"segment": [89.68593390071166, 93.70172213824618], "label": "Wakeboarding"}, {"segment": [105.97218485393903, 114.0037612271894], "label": "Wakeboarding"}]}, "9gU5be5YCVw": {"duration": 58.65, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9gU5be5YCVw", "annotations": [{"segment": [6.171528790951637, 52.617997321372854], "label": "Using parallel bars"}]}, "hoisfXCLJDQ": {"duration": 15.186, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hoisfXCLJDQ", "annotations": [{"segment": [0, 9.626407176287051], "label": "River tubing"}]}, "Yu18MvEn-To": {"duration": 82.501, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Yu18MvEn-To", "annotations": [{"segment": [0.12830363630220643, 79.42116078404715], "label": "Knitting"}]}, "sOL5ksIFX50": {"duration": 53.824, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sOL5ksIFX50", "annotations": [{"segment": [12.763222552262091, 46.1827131825273], "label": "Curling"}]}, "MOQQCJBDEyA": {"duration": 52.663, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MOQQCJBDEyA", "annotations": []}, "T3mcQnlsmq4": {"duration": 159.847, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=T3mcQnlsmq4", "annotations": []}, "yQ2AirKmnTM": {"duration": 39.52, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=yQ2AirKmnTM", "annotations": [{"segment": [0.01, 38.84880289762265], "label": "Riding bumper cars"}]}, "VcQHv5PHb-M": {"duration": 160.868, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VcQHv5PHb-M", "annotations": [{"segment": [0.01, 12.866092905311941], "label": "Throwing darts"}, {"segment": [17.87235458755493, 22.628303185685773], "label": "Throwing darts"}, {"segment": [45.65710692400353, 52.41556019503157], "label": "Throwing darts"}, {"segment": [59.92495271839605, 60.93378760940932], "label": "Throwing darts"}, {"segment": [84.96683433838128, 89.47303060006352], "label": "Throwing darts"}, {"segment": [98.485423123428, 102.99161938511025], "label": "Throwing darts"}]}, "VlLq4bAHCXI": {"duration": 44.56, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VlLq4bAHCXI", "annotations": [{"segment": [10.557241809672387, 38.82564586583463], "label": "Running a marathon"}]}, "8zq6C0SRyDQ": {"duration": 144.405, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8zq6C0SRyDQ", "annotations": [{"segment": [15.799323293858228, 52.89788460793896], "label": "Making a cake"}]}, "J9OSz5iCwNU": {"duration": 48.084, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J9OSz5iCwNU", "annotations": [{"segment": [0.5985897886889333, 43.32301651768745], "label": "Rollerblading"}]}, "ORVjNqVSLe0": {"duration": 187.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ORVjNqVSLe0", "annotations": [{"segment": [3.9101066399999995, 172.67770510062402], "label": "Shaving legs"}]}, "-bqaXU4s8Qs": {"duration": 55.357, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-bqaXU4s8Qs", "annotations": [{"segment": [0.01, 55.321], "label": "Hand car wash"}]}, "_jxpaVW4_cE": {"duration": 126.201, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_jxpaVW4_cE", "annotations": [{"segment": [23.724110216109324, 85.15085269520237], "label": "Camel ride"}]}, "h9WOFfVGeAQ": {"duration": 33.92, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=h9WOFfVGeAQ", "annotations": [{"segment": [0, 11.590379232449298], "label": "Playing bagpipes"}, {"segment": [22.069352237129483, 23.23368257098284], "label": "Playing bagpipes"}]}, "kkIClKG5xY8": {"duration": 165.721, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=kkIClKG5xY8", "annotations": [{"segment": [45.76066223057085, 104.44806520424082], "label": "Surfing"}, {"segment": [107.80901779744659, 157.1891661778471], "label": "Surfing"}]}, "NOxiMgQMeuk": {"duration": 130.8, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=NOxiMgQMeuk", "annotations": [{"segment": [6.325644525100731, 124.88046464274571], "label": "Painting"}]}, "tES39s-LJsQ": {"duration": 100.055, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tES39s-LJsQ", "annotations": [{"segment": [19.606378429903582, 20.228802897799646], "label": "Snowboarding"}, {"segment": [22.874108202620796, 25.052594924238214], "label": "Snowboarding"}, {"segment": [27.23108164585563, 28.32032500666434], "label": "Snowboarding"}, {"segment": [35.9450285323253, 36.87866569873277], "label": "Snowboarding"}, {"segment": [54.30655947167211, 64.57656830215423], "label": "Snowboarding"}, {"segment": [68.77793555098782, 71.26763466140773], "label": "Snowboarding"}, {"segment": [76.24703288224754, 80.91521871428488], "label": "Snowboarding"}, {"segment": [83.24931163030352, 84.80537357431598], "label": "Snowboarding"}]}, "P1P0Q9kW0GU": {"duration": 17.81, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=P1P0Q9kW0GU", "annotations": []}, "pr9GgpH34m4": {"duration": 35.945, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pr9GgpH34m4", "annotations": []}, "LrputIUn4oY": {"duration": 84.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LrputIUn4oY", "annotations": [{"segment": [33.55011748, 70.74519890795632], "label": "Plataform diving"}]}, "jEppv00aBBc": {"duration": 235.85, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jEppv00aBBc", "annotations": [{"segment": [18.764585097689622, 235.84507936507936], "label": "Walking the dog"}]}, "ZYv-6yZ6te4": {"duration": 38.41, "subset": "testing", "resolution": "1920x1440", "url": "https://www.youtube.com/watch?v=ZYv-6yZ6te4", "annotations": []}, "e4kS-n7_LRU": {"duration": 41.1, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e4kS-n7_LRU", "annotations": []}, "idSdnubrlds": {"duration": 24.544, "subset": "testing", "resolution": "712x480", "url": "https://www.youtube.com/watch?v=idSdnubrlds", "annotations": []}, "ivkkCEynzPs": {"duration": 76.394, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=ivkkCEynzPs", "annotations": [{"segment": [4.448444729778255, 73.67290267370349], "label": "Welding"}]}, "ig867kFeLic": {"duration": 17.415, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ig867kFeLic", "annotations": [{"segment": [0.01, 17.41496598639456], "label": "Powerbocking"}]}, "iWSKl7vOd2s": {"duration": 67.45, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=iWSKl7vOd2s", "annotations": [{"segment": [0.01, 40.83017035257411], "label": "Ballet"}]}, "qTAG23IVSeM": {"duration": 107.53, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qTAG23IVSeM", "annotations": [{"segment": [47.889173166926675, 99.38471138845554], "label": "Playing polo"}]}, "JxN0fchWCNI": {"duration": 105.37, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=JxN0fchWCNI", "annotations": [{"segment": [10.685163841927826, 46.3571723603638], "label": "Preparing salad"}, {"segment": [60.823240330973775, 105.37215419501133], "label": "Preparing salad"}]}, "BsceC-igcTY": {"duration": 124.738, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BsceC-igcTY", "annotations": [{"segment": [47.676503120124806, 124.7373], "label": "Playing ice hockey"}]}, "WMx0-3GZGUI": {"duration": 182.509, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WMx0-3GZGUI", "annotations": [{"segment": [6.833398495123478, 70.89650868486738], "label": "Making a cake"}, {"segment": [89.97307929485179, 140.65411763182527], "label": "Making a cake"}]}, "Vp63nbOfxgs": {"duration": 162.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Vp63nbOfxgs", "annotations": [{"segment": [9.60127403120125, 138.20781302808115], "label": "Playing violin"}]}, "ZtfdFFYa8vY": {"duration": 74.0, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ZtfdFFYa8vY", "annotations": [{"segment": [0.01, 71.8084871575663], "label": "Walking the dog"}]}, "Y05GAiavfOg": {"duration": 107.86, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y05GAiavfOg", "annotations": []}, "jWuq6clN5q4": {"duration": 78.972, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=jWuq6clN5q4", "annotations": [{"segment": [0.01, 78.921], "label": "Drum corps"}]}, "kLflIBGCNrc": {"duration": 122.32, "subset": "training", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=kLflIBGCNrc", "annotations": [{"segment": [11.731532761310454, 56.94085413416537], "label": "Playing water polo"}, {"segment": [65.33414586583464, 72.20138455538222], "label": "Playing water polo"}, {"segment": [77.92408346333853, 108.44514430577223], "label": "Playing water polo"}]}, "7_xkhnbnI7M": {"duration": 235.962, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7_xkhnbnI7M", "annotations": [{"segment": [7.129077507767722, 209.60956193144372], "label": "Clipping cat claws"}]}, "SL7iKDqir6g": {"duration": 44.14, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SL7iKDqir6g", "annotations": [{"segment": [14.94325373478939, 23.61998170982839], "label": "Preparing pasta"}, {"segment": [28.440386140405618, 33.26079057098284], "label": "Preparing pasta"}]}, "yt0K2HWC0WI": {"duration": 235.869, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yt0K2HWC0WI", "annotations": [{"segment": [26.677771763931784, 35.14106487524807], "label": "Slacklining"}, {"segment": [113.8864877370605, 121.98181158266738], "label": "Slacklining"}, {"segment": [136.70058221104355, 145.89981385377865], "label": "Slacklining"}, {"segment": [184.16861748755665, 194.10378657566304], "label": "Slacklining"}]}, "3G3Zear4vog": {"duration": 15.26, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=3G3Zear4vog", "annotations": [{"segment": [0.30938764898595944, 15.255191], "label": "Putting on makeup"}]}, "Dx9wjBdbZ1M": {"duration": 43.213, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Dx9wjBdbZ1M", "annotations": [{"segment": [10.853598491419655, 36.80785575351014], "label": "Fixing the roof"}]}, "fLvPz8W00l4": {"duration": 67.153, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fLvPz8W00l4", "annotations": [{"segment": [5.021728549141966, 66.13914508580343], "label": "Kneeling"}]}, "cxVbaN4GxxU": {"duration": 179.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cxVbaN4GxxU", "annotations": [{"segment": [31.329333630488076, 178.46530942901714], "label": "Kayaking"}]}, "HjRevGoqqz0": {"duration": 176.84, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HjRevGoqqz0", "annotations": [{"segment": [9.656024812421068, 91.04251965997007], "label": "Dodgeball"}]}, "1sp7zRy1Rxo": {"duration": 132.54, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1sp7zRy1Rxo", "annotations": [{"segment": [16.955110858658344, 18.609268015600623], "label": "Skiing"}, {"segment": [25.84620557722309, 28.534210957254288], "label": "Skiing"}, {"segment": [37.63207532043682, 62.2376630299532], "label": "Skiing"}, {"segment": [70.71521845928237, 130.47164575382214], "label": "Skiing"}]}, "sMO2IlNgDuw": {"duration": 175.52, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=sMO2IlNgDuw", "annotations": [{"segment": [4.928779012384986, 20.810400274514382], "label": "Putting on makeup"}, {"segment": [34.77527414293851, 47.37104273014458], "label": "Putting on makeup"}, {"segment": [64.07412716100481, 72.83640096080033], "label": "Putting on makeup"}, {"segment": [76.12225363572365, 88.71802222292973], "label": "Putting on makeup"}]}, "6EN1q4TmPtI": {"duration": 99.289, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6EN1q4TmPtI", "annotations": [{"segment": [32.528222271748014, 99.288525], "label": "River tubing"}]}, "VO49rhXzhk4": {"duration": 155.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VO49rhXzhk4", "annotations": [{"segment": [0.01, 153.62526664331878], "label": "Wrapping presents"}]}, "PjcTk1hcf4k": {"duration": 183.252, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=PjcTk1hcf4k", "annotations": [{"segment": [72.76675233644859, 157.5186168224299], "label": "Decorating the Christmas tree"}]}, "mXBWTtd4pgs": {"duration": 70.798, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mXBWTtd4pgs", "annotations": []}, "sQtT9sUyUwE": {"duration": 9.706, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=sQtT9sUyUwE", "annotations": [{"segment": [0, 9.705941], "label": "Canoeing"}]}, "9_zC7CdvYu4": {"duration": 91.812, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9_zC7CdvYu4", "annotations": [{"segment": [10.885630657879377, 81.6422284399376], "label": "Snowboarding"}]}, "mIC02-VKqUE": {"duration": 61.858, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mIC02-VKqUE", "annotations": [{"segment": [0.01, 45.489471174970966], "label": "Rock-paper-scissors"}]}, "djE5A2S1Ezg": {"duration": 204.545, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=djE5A2S1Ezg", "annotations": [{"segment": [91.09278193146417, 175.19556074766356], "label": "Painting fence"}]}, "TJFl99tLNWk": {"duration": 216.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TJFl99tLNWk", "annotations": [{"segment": [6.30223, 195.5833590327613], "label": "Doing nails"}]}, "Likt_9dbMqE": {"duration": 230.877, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Likt_9dbMqE", "annotations": [{"segment": [15.463654890824449, 216.13154859036035], "label": "Futsal"}]}, "fI12XNNqldA": {"duration": 220.985, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fI12XNNqldA", "annotations": [{"segment": [5.860736483881124, 94.11653294703216], "label": "Spread mulch"}]}, "nnWJGghixr0": {"duration": 45.535, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nnWJGghixr0", "annotations": [{"segment": [14.917643508580344, 21.310919297971918], "label": "Throwing darts"}, {"segment": [26.780721917784714, 28.41455906396256], "label": "Throwing darts"}]}, "gbuRv8phs1Y": {"duration": 165.048, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gbuRv8phs1Y", "annotations": [{"segment": [6.437107552329304, 22.14364975975039], "label": "Canoeing"}, {"segment": [42.484909422776916, 67.71837077691107], "label": "Canoeing"}, {"segment": [98.35900242121684, 106.34101570670828], "label": "Canoeing"}, {"segment": [133.11938285803433, 163.50253020280812], "label": "Canoeing"}]}, "KHxBqZmSaJU": {"duration": 158.9, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KHxBqZmSaJU", "annotations": [{"segment": [8.428051549272855, 12.889961193005544], "label": "Cheerleading"}, {"segment": [15.864567622160669, 26.52357399329987], "label": "Cheerleading"}, {"segment": [37.67834784399376, 56.021754031201255], "label": "Cheerleading"}, {"segment": [67.42441193135726, 73.62150861622465], "label": "Cheerleading"}, {"segment": [90.72549546645867, 97.17047601872075], "label": "Cheerleading"}, {"segment": [119.48002408424337, 128.15595944305772], "label": "Cheerleading"}, {"segment": [130.8826819843994, 137.82343027145086], "label": "Cheerleading"}, {"segment": [152.20069458034322, 158.69355900000002], "label": "Cheerleading"}]}, "z4qaGiWt-bo": {"duration": 79.67, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=z4qaGiWt-bo", "annotations": [{"segment": [1.21173507, 75.78708236583464], "label": "Cricket"}]}, "meMZuh8iNDs": {"duration": 178.492, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=meMZuh8iNDs", "annotations": [{"segment": [88.54947987307247, 137.00108117316694], "label": "Braiding hair"}]}, "IJUokB_p3Yo": {"duration": 111.317, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=IJUokB_p3Yo", "annotations": [{"segment": [56.07451326053042, 108.15610452418098], "label": "Having an ice cream"}]}, "cxFt88Cs-fs": {"duration": 183.0, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=cxFt88Cs-fs", "annotations": [{"segment": [150.5231200750182, 179.26193967066362], "label": "Bungee jumping"}]}, "xYt__nSK8NA": {"duration": 46.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xYt__nSK8NA", "annotations": [{"segment": [0.01, 5.929174726989079], "label": "Smoking a cigarette"}, {"segment": [13.32234321372855, 34.403853354134164], "label": "Smoking a cigarette"}]}, "xMQcYD044z4": {"duration": 95.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xMQcYD044z4", "annotations": [{"segment": [40.43918748, 84.76355664664587], "label": "Clean and jerk"}, {"segment": [22.524011639999998, 39.287945780811235], "label": "Clean and jerk"}]}, "kUxz_SPi72U": {"duration": 72.167, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kUxz_SPi72U", "annotations": [{"segment": [18.238930242924, 69.35296831201248], "label": "Changing car wheel"}]}, "qZk7okgCU2M": {"duration": 124.622, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qZk7okgCU2M", "annotations": [{"segment": [0.2, 23.913242276629845], "label": "Surfing"}, {"segment": [36.16148832075732, 42.382819644758584], "label": "Surfing"}, {"segment": [67.46256154463866, 93.70880225273011], "label": "Surfing"}, {"segment": [111.59512965366615, 124.421042], "label": "Surfing"}]}, "nOJUyDGq_l0": {"duration": 196.952, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nOJUyDGq_l0", "annotations": [{"segment": [34.66586573985773, 150.62778691588787], "label": "Capoeira"}]}, "PRkEpM8ffak": {"duration": 50.179, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PRkEpM8ffak", "annotations": [{"segment": [3.1312525426187117, 49.317227546244716], "label": "Elliptical trainer"}]}, "iM-HdlplAhw": {"duration": 22.733, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iM-HdlplAhw", "annotations": [{"segment": [0.20000000000000004, 19.444453115290997], "label": "Using the monkey bar"}]}, "H-hgVm5G54Y": {"duration": 146.87, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=H-hgVm5G54Y", "annotations": [{"segment": [10.31042058008851, 133.11898557410294], "label": "Brushing hair"}]}, "p7j6yY99vEg": {"duration": 130.31, "subset": "validation", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=p7j6yY99vEg", "annotations": [{"segment": [53.66916658707164, 55.498797266176354], "label": "Layup drill in basketball"}, {"segment": [59.56464321974239, 60.987689303490505], "label": "Layup drill in basketball"}, {"segment": [66.27328904312635, 67.69633512687446], "label": "Layup drill in basketball"}, {"segment": [72.1687656757971, 73.59181175954521], "label": "Layup drill in basketball"}, {"segment": [78.6741181076443, 80.70704105616225], "label": "Layup drill in basketball"}, {"segment": [85.5860561326053, 87.21239449141966], "label": "Layup drill in basketball"}, {"segment": [91.88811727301092, 93.71774792667706], "label": "Layup drill in basketball"}, {"segment": [97.78359382371295, 98.80005529797192], "label": "Layup drill in basketball"}, {"segment": [103.88236266926677, 104.89882414352574], "label": "Layup drill in basketball"}, {"segment": [109.98113151482059, 110.59100839937598], "label": "Layup drill in basketball"}, {"segment": [116.6897772449298, 118.51940789859594], "label": "Layup drill in basketball"}]}, "fgBFlwM466w": {"duration": 191.495, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fgBFlwM466w", "annotations": [{"segment": [88.72699672068515, 136.52605219310814], "label": "Swimming"}, {"segment": [155.64567438207735, 187.0138045358549], "label": "Swimming"}]}, "xzoquwJYEZw": {"duration": 107.95, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=xzoquwJYEZw", "annotations": [{"segment": [21.5562322193568, 22.9034967330666], "label": "Playing squash"}, {"segment": [73.59432406139783, 75.61522001872075], "label": "Playing squash"}]}, "iOnlcEk50CM": {"duration": 83.24, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iOnlcEk50CM", "annotations": [{"segment": [10.778804376664864, 12.207320619355386], "label": "Skateboarding"}, {"segment": [47.920226686618484, 53.504426544408716], "label": "Skateboarding"}]}, "legp5cXwuHc": {"duration": 169.53, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=legp5cXwuHc", "annotations": []}, "3YDQ9wZtIgw": {"duration": 235.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3YDQ9wZtIgw", "annotations": [{"segment": [1.1033644284546893, 234.28104697521235], "label": "Playing lacrosse"}]}, "AElVyfjSHIo": {"duration": 150.814, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AElVyfjSHIo", "annotations": [{"segment": [82.34752223088923, 123.05072607644306], "label": "Playing drums"}]}, "nezTU6Bq5hM": {"duration": 45.187, "subset": "validation", "resolution": "320x480", "url": "https://www.youtube.com/watch?v=nezTU6Bq5hM", "annotations": [{"segment": [1.2688744773790952, 38.55968550702028], "label": "Cutting the grass"}]}, "nwBb-WkaCYU": {"duration": 55.078, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nwBb-WkaCYU", "annotations": [{"segment": [2.663665666146646, 50.867421753510136], "label": "Playing rubik cube"}]}, "UJe1UjxgcdQ": {"duration": 90.373, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UJe1UjxgcdQ", "annotations": []}, "tp0L6sR1qts": {"duration": 62.323, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=tp0L6sR1qts", "annotations": [{"segment": [7.479210608424337, 56.43404368174727], "label": "Assembling bicycle"}]}, "Y16uL6dYDzQ": {"duration": 76.6, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y16uL6dYDzQ", "annotations": []}, "TUPCQpyoSbI": {"duration": 164.816, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=TUPCQpyoSbI", "annotations": [{"segment": [17.478546021840874, 154.2411856474259], "label": "Knitting"}]}, "Fr7rhb2Vw_k": {"duration": 153.229, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fr7rhb2Vw_k", "annotations": [{"segment": [67.76953866726097, 129.20435855257338], "label": "Hanging wallpaper"}]}, "sGwra7t-ARo": {"duration": 99.5, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sGwra7t-ARo", "annotations": [{"segment": [11.020716730577222, 79.3181161876755], "label": "Long jump"}]}, "nqbYEJlRwoQ": {"duration": 186.94, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nqbYEJlRwoQ", "annotations": [{"segment": [76.70240154134164, 97.11748940405616], "label": "Vacuuming floor"}, {"segment": [135.90615634321372, 154.2797354196568], "label": "Vacuuming floor"}]}, "ocvKEzb7OaE": {"duration": 53.267, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ocvKEzb7OaE", "annotations": [{"segment": [0.2, 53.06657596371882], "label": "Playing rubik cube"}]}, "SqIVJrXxO3g": {"duration": 146.077, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SqIVJrXxO3g", "annotations": [{"segment": [0.01, 146.07673469387754], "label": "Rafting"}]}, "3lla2AaJgNA": {"duration": 21.64, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3lla2AaJgNA", "annotations": [{"segment": [0, 7.697545703588142], "label": "Ironing clothes"}]}, "y3Zq6RZZNtc": {"duration": 25.124, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y3Zq6RZZNtc", "annotations": [{"segment": [7.747329173166927, 10.408028081123245], "label": "Putting on shoes"}]}, "E5SWUUNrHDU": {"duration": 146.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E5SWUUNrHDU", "annotations": [{"segment": [30.00288299531981, 133.2952511700468], "label": "Shaving"}]}, "19SHLOheCmo": {"duration": 231.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=19SHLOheCmo", "annotations": [{"segment": [0.01, 227.70592377980833], "label": "Tai chi"}]}, "Vq2mQvMp_c4": {"duration": 109.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vq2mQvMp_c4", "annotations": [{"segment": [0.01, 97.76397723088924], "label": "Horseback riding"}]}, "ntJk6Rj-lRA": {"duration": 179.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ntJk6Rj-lRA", "annotations": [{"segment": [100.33271138845554, 110.70229329173165], "label": "Installing carpet"}, {"segment": [139.0084492979719, 159.46912948517942], "label": "Installing carpet"}]}, "v-qFJJx0bTs": {"duration": 122.579, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=v-qFJJx0bTs", "annotations": [{"segment": [9.943916287263736, 120.66559842433698], "label": "Tug of war"}]}, "RhOV_K2XzZA": {"duration": 63.62, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RhOV_K2XzZA", "annotations": [{"segment": [2.42527064, 59.01267550702028], "label": "Volleyball"}]}, "dgRYwmcRpuo": {"duration": 161.379, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dgRYwmcRpuo", "annotations": [{"segment": [0.2, 145.96907956318253], "label": "Cleaning sink"}]}, "J6-rLsT975U": {"duration": 215.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J6-rLsT975U", "annotations": [{"segment": [45.9561263650546, 120.77185647425897], "label": "Shoveling snow"}]}, "ANaMmXJFE54": {"duration": 197.35, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ANaMmXJFE54", "annotations": []}, "7o7hL0VccJQ": {"duration": 93.0, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=7o7hL0VccJQ", "annotations": [{"segment": [0.5803177433219778, 78.05273647680602], "label": "Playing bagpipes"}]}, "MdFHY1UBH5w": {"duration": 265.799, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=MdFHY1UBH5w", "annotations": []}, "xs5imfBbWmw": {"duration": 232.363, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xs5imfBbWmw", "annotations": [{"segment": [0.01, 232.36208616780044], "label": "Rafting"}]}, "z8VqGGu5vPc": {"duration": 215.04, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z8VqGGu5vPc", "annotations": [{"segment": [0, 29.51651794071763], "label": "Hand car wash"}, {"segment": [67.75382527301092, 168.3783182527301], "label": "Hand car wash"}, {"segment": [194.89422932917316, 196.57146021840873], "label": "Hand car wash"}, {"segment": [204.62216848673944, 207.30573790951638], "label": "Hand car wash"}]}, "hTx_y1ils70": {"duration": 218.175, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hTx_y1ils70", "annotations": [{"segment": [92.57958928969404, 195.02979487675506], "label": "Elliptical trainer"}]}, "f59PJGzAFA8": {"duration": 13.45, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=f59PJGzAFA8", "annotations": []}, "DM4WamWf9N4": {"duration": 84.568, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=DM4WamWf9N4", "annotations": []}, "KPeXy0Mr5p4": {"duration": 85.171, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KPeXy0Mr5p4", "annotations": []}, "4JmFOPoKluM": {"duration": 200.18, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4JmFOPoKluM", "annotations": []}, "stDgqxV-VGg": {"duration": 40.473, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=stDgqxV-VGg", "annotations": []}, "nGABbRHJ2Ug": {"duration": 113.34, "subset": "validation", "resolution": "406x720", "url": "https://www.youtube.com/watch?v=nGABbRHJ2Ug", "annotations": [{"segment": [28.801159126365054, 64.84678159126365], "label": "Spinning"}, {"segment": [83.39967550702028, 111.5137457098284], "label": "Spinning"}]}, "3LrLgh2rlAM": {"duration": 15.21, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=3LrLgh2rlAM", "annotations": [{"segment": [1.115173626677067, 14.876890721840875], "label": "Clean and jerk"}]}, "yISeNkFiVAg": {"duration": 141.062, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yISeNkFiVAg", "annotations": [{"segment": [0.01, 136.01548527410574], "label": "Playing congas"}]}, "saB1t3Znhk0": {"duration": 90.19, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=saB1t3Znhk0", "annotations": [{"segment": [8.462685314035863, 77.54270400562464], "label": "Doing karate"}]}, "4lmYUyqQWzo": {"duration": 75.42, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4lmYUyqQWzo", "annotations": []}, "EqSXihtiv5g": {"duration": 93.02000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EqSXihtiv5g", "annotations": [{"segment": [14.87432618393171, 80.75670586193449], "label": "Wrapping presents"}]}, "AI0q-ros2CA": {"duration": 67.08, "subset": "testing", "resolution": "240x176", "url": "https://www.youtube.com/watch?v=AI0q-ros2CA", "annotations": []}, "X5Dsemc20pg": {"duration": 190.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X5Dsemc20pg", "annotations": [{"segment": [30.319460218408736, 48.451686427457105], "label": "Mixing drinks"}, {"segment": [62.422418096723874, 77.58214820592823], "label": "Mixing drinks"}, {"segment": [90.6611310452418, 115.92734789391577], "label": "Mixing drinks"}, {"segment": [130.78982839313574, 161.10928861154446], "label": "Mixing drinks"}]}, "ptY-0n53uq0": {"duration": 103.538, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ptY-0n53uq0", "annotations": [{"segment": [0.2, 103.33755102040816], "label": "Playing drums"}]}, "H-5nHSHwFOk": {"duration": 745.5, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H-5nHSHwFOk", "annotations": [{"segment": [113.96757878315132, 138.38920280811234], "label": "Waxing skis"}, {"segment": [251.19384711388457, 391.9089188767551], "label": "Waxing skis"}, {"segment": [483.7807425897036, 695.4534758190329], "label": "Waxing skis"}]}, "7_HWPDDW7Cw": {"duration": 162.145, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7_HWPDDW7Cw", "annotations": [{"segment": [3.3834168961783435, 152.35474088371728], "label": "Mooping floor"}]}, "Zq9jr2S5wGM": {"duration": 145.334, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zq9jr2S5wGM", "annotations": [{"segment": [17.23145227305691, 25.39371893915757], "label": "Making a lemonade"}, {"segment": [32.87579684087363, 42.39844144305773], "label": "Making a lemonade"}, {"segment": [50.78743787831514, 51.46762677847114], "label": "Making a lemonade"}, {"segment": [58.49624541341654, 75.95442718408738], "label": "Making a lemonade"}, {"segment": [78.90191241809673, 94.7729867550702], "label": "Making a lemonade"}, {"segment": [105.88273879095165, 112.91135742589704], "label": "Making a lemonade"}, {"segment": [118.3528686271451, 125.60821689547582], "label": "Making a lemonade"}, {"segment": [128.32897249609985, 130.5962688299532], "label": "Making a lemonade"}]}, "RWTLd_0BeAg": {"duration": 220.219, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RWTLd_0BeAg", "annotations": [{"segment": [64.58813319607614, 211.28564848716397], "label": "Baton twirling"}]}, "dH_gpSfaA6E": {"duration": 194.33, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=dH_gpSfaA6E", "annotations": [{"segment": [94.03289203, 110.64007745], "label": "Wrapping presents"}, {"segment": [7.60595878, 83.27494931], "label": "Wrapping presents"}]}, "2KhMB_ZWJXM": {"duration": 177.053, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2KhMB_ZWJXM", "annotations": []}, "KB9sqtTJTZw": {"duration": 189.06, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=KB9sqtTJTZw", "annotations": [{"segment": [6.728503039999999, 171.4653900156006], "label": "Breakdancing"}]}, "4_CcDjbCXBQ": {"duration": 172.548, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4_CcDjbCXBQ", "annotations": [{"segment": [35.208254639342755, 160.18412034388], "label": "Futsal"}]}, "QosVN26lb1g": {"duration": 33.46, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QosVN26lb1g", "annotations": [{"segment": [0.01, 32.073606117220926], "label": "Hitting a pinata"}]}, "84g4J1rxkQc": {"duration": 174.591, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=84g4J1rxkQc", "annotations": [{"segment": [0.01, 164.63727152243624], "label": "Slacklining"}]}, "FzqZsQx20jY": {"duration": 67.55, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FzqZsQx20jY", "annotations": []}, "EwjDShmfFHM": {"duration": 89.82, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=EwjDShmfFHM", "annotations": [{"segment": [3.3628000466957455, 86.87233446177846], "label": "Washing hands"}]}, "gDexNGeZln0": {"duration": 59.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gDexNGeZln0", "annotations": [{"segment": [13.655651706340363, 42.08933611544462], "label": "Starting a campfire"}]}, "8KxL1itwI3Q": {"duration": 188.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8KxL1itwI3Q", "annotations": [{"segment": [13.848545569033643, 171.33810507], "label": "Wrapping presents"}]}, "A32TgJfp2z8": {"duration": 90.56, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A32TgJfp2z8", "annotations": [{"segment": [0.01, 90.43040639625586], "label": "Ping-pong"}]}, "xNiYjTyCTtg": {"duration": 104.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xNiYjTyCTtg", "annotations": [{"segment": [41.823322179095165, 80.7059420174727], "label": "Getting a piercing"}]}, "uwGcEaW3z3Q": {"duration": 11.36, "subset": "training", "resolution": "812x480", "url": "https://www.youtube.com/watch?v=uwGcEaW3z3Q", "annotations": [{"segment": [0.25576701560062404, 10.821590625585022], "label": "Clean and jerk"}]}, "BNQPVf2Ia5Y": {"duration": 145.822, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BNQPVf2Ia5Y", "annotations": [{"segment": [0.01, 145.781], "label": "Trimming branches or hedges"}]}, "l6_QM53dI5s": {"duration": 210.91, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l6_QM53dI5s", "annotations": []}, "iBefG1qFbsE": {"duration": 183.229, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=iBefG1qFbsE", "annotations": [{"segment": [0, 183.228683], "label": "Knitting"}]}, "Y9B22Ii7-eE": {"duration": 206.797, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y9B22Ii7-eE", "annotations": [{"segment": [50.69187183098481, 196.90603694001908], "label": "Making an omelette"}]}, "EA58Fo2e0CI": {"duration": 78.809, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=EA58Fo2e0CI", "annotations": [{"segment": [6.675524174782347, 78.781], "label": "Running a marathon"}]}, "Rai5nKbB6wU": {"duration": 199.39, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rai5nKbB6wU", "annotations": [{"segment": [33.28346850336599, 34.838770769878415], "label": "Longboarding"}, {"segment": [45.414826182162926, 50.391793435002704], "label": "Longboarding"}, {"segment": [69.05542063315185, 76.83193196571399], "label": "Longboarding"}, {"segment": [95.80661961716564, 99.53934505679547], "label": "Longboarding"}, {"segment": [103.2720704964253, 124.11312086769183], "label": "Longboarding"}, {"segment": [125.35736268090179, 160.1961334507802], "label": "Longboarding"}]}, "XU8FzYcRat4": {"duration": 106.255, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=XU8FzYcRat4", "annotations": [{"segment": [1.491873030023242, 16.576366770670827], "label": "Playing beach volleyball"}]}, "E6_Vin5HBiw": {"duration": 91.16, "subset": "testing", "resolution": "256x144", "url": "https://www.youtube.com/watch?v=E6_Vin5HBiw", "annotations": []}, "v7ND9FPFEhk": {"duration": 55.334, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v7ND9FPFEhk", "annotations": []}, "DrQZ28PEXpM": {"duration": 42.4, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DrQZ28PEXpM", "annotations": []}, "HVmZOfkh8Pk": {"duration": 56.29, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HVmZOfkh8Pk", "annotations": [{"segment": [21.59158502340094, 33.00177223088924], "label": "Playing field hockey"}]}, "TrFoFWXLMFg": {"duration": 54.61, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TrFoFWXLMFg", "annotations": [{"segment": [0.01, 22.871714508580343], "label": "Shaving legs"}, {"segment": [26.69783775351014, 47.443928237129484], "label": "Shaving legs"}]}, "sLzAJGfkswM": {"duration": 157.641, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sLzAJGfkswM", "annotations": []}, "S5bjFaZUnOM": {"duration": 119.723, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S5bjFaZUnOM", "annotations": [{"segment": [11.9434383775351, 62.340240249609984], "label": "Doing fencing"}, {"segment": [99.29643057722309, 105.45579563182527], "label": "Doing fencing"}]}, "AntmYynwbhQ": {"duration": 122.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AntmYynwbhQ", "annotations": [{"segment": [36.860296411856474, 91.24120124804992], "label": "Getting a haircut"}]}, "vA6Ykf9pV64": {"duration": 58.42, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=vA6Ykf9pV64", "annotations": []}, "P90sZu5CbcE": {"duration": 99.359, "subset": "testing", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=P90sZu5CbcE", "annotations": []}, "hc4DBHpRuGM": {"duration": 63.832, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hc4DBHpRuGM", "annotations": [{"segment": [4.38157999375975, 6.8711140811232445], "label": "Elliptical trainer"}, {"segment": [11.352275438377534, 27.982363141965678], "label": "Elliptical trainer"}, {"segment": [32.46352449921997, 45.3095203900156], "label": "Elliptical trainer"}, {"segment": [49.79068174726989, 54.27184310452418], "label": "Elliptical trainer"}]}, "RjPoQP1HxAE": {"duration": 85.36, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RjPoQP1HxAE", "annotations": []}, "UkZiyYfO57g": {"duration": 169.785, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UkZiyYfO57g", "annotations": [{"segment": [17.5557673816265, 23.108043082561082], "label": "Skiing"}, {"segment": [38.97168794237416, 42.673205076330554], "label": "Skiing"}, {"segment": [53.77775647819971, 61.180790746112486], "label": "Skiing"}, {"segment": [77.04443560592557, 80.48155865888508], "label": "Skiing"}, {"segment": [87.62019884580096, 96.09213031002294], "label": "Skiing"}]}, "IeqBzYDyHGQ": {"duration": 168.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IeqBzYDyHGQ", "annotations": [{"segment": [34.2358488897379, 114.82177012250558], "label": "Beach soccer"}, {"segment": [125.35587747319418, 158.2749617301092], "label": "Beach soccer"}]}, "fRLDhkPqrTE": {"duration": 36.11, "subset": "training", "resolution": "270x360", "url": "https://www.youtube.com/watch?v=fRLDhkPqrTE", "annotations": [{"segment": [1.4082304212168486, 36.050698783151326], "label": "Playing piano"}]}, "FP0tI2Tjigs": {"duration": 48.37, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FP0tI2Tjigs", "annotations": [{"segment": [2.808062533136814, 47.70966649575364], "label": "Vacuuming floor"}]}, "ouv2veXexVw": {"duration": 42.052, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ouv2veXexVw", "annotations": [{"segment": [0.01, 42.001], "label": "Beach soccer"}]}, "gXvRxyT5rWw": {"duration": 47.81, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gXvRxyT5rWw", "annotations": [{"segment": [0, 47.809387], "label": "Using parallel bars"}]}, "dP2DgvNt12Y": {"duration": 72.794, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dP2DgvNt12Y", "annotations": [{"segment": [17.547675068807504, 64.41695373754779], "label": "Blowing leaves"}]}, "Pi5fonaOYTI": {"duration": 210.838, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Pi5fonaOYTI", "annotations": [{"segment": [68.70534488927538, 72.3167031447271], "label": "Hula hoop"}]}, "MvKhjWRNTvI": {"duration": 166.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MvKhjWRNTvI", "annotations": [{"segment": [33.24530925, 117.44419201872076], "label": "Paintball"}]}, "Zest0gXEWEc": {"duration": 56.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zest0gXEWEc", "annotations": [{"segment": [18.969289847113888, 47.137042400000006], "label": "Washing dishes"}]}, "ojIhxWrBM0s": {"duration": 83.31, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ojIhxWrBM0s", "annotations": []}, "5P-4_nS8euM": {"duration": 111.22, "subset": "training", "resolution": "4096x2160", "url": "https://www.youtube.com/watch?v=5P-4_nS8euM", "annotations": [{"segment": [14.57531238392393, 20.127812339704473], "label": "Getting a haircut"}, {"segment": [27.762499778902722, 33.31499973468327], "label": "Getting a haircut"}, {"segment": [38.86749969046381, 42.858359033681076], "label": "Getting a haircut"}, {"segment": [46.155155882425774, 52.74874957991518], "label": "Getting a haircut"}, {"segment": [59.342343277404574, 63.50671782215288], "label": "Getting a haircut"}, {"segment": [69.05921774102964, 71.83546770046802], "label": "Getting a haircut"}, {"segment": [78.42906135413416, 82.59343629329173], "label": "Getting a haircut"}]}, "uqiooW1OAXU": {"duration": 158.268, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uqiooW1OAXU", "annotations": [{"segment": [32.7364535746901, 133.40721333530766], "label": "Camel ride"}]}, "FUrLg7FETWo": {"duration": 29.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FUrLg7FETWo", "annotations": [{"segment": [0.01, 3.7354785075756776], "label": "Chopping wood"}, {"segment": [17.0431206099844, 18.210457638065524], "label": "Chopping wood"}]}, "thgW2RT30qQ": {"duration": 203.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=thgW2RT30qQ", "annotations": [{"segment": [56.72766, 123.152095], "label": "Hammer throw"}]}, "qHRCGBIiNFg": {"duration": 189.011, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qHRCGBIiNFg", "annotations": [{"segment": [0, 186.0617493447738], "label": "Ice fishing"}]}, "a6lCnOqNaMY": {"duration": 37.94, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=a6lCnOqNaMY", "annotations": [{"segment": [0.7091419656786272, 32.443244929797196], "label": "Javelin throw"}]}, "lOCw2uO3UK8": {"duration": 172.2, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lOCw2uO3UK8", "annotations": [{"segment": [30.08784488522398, 172.19918367346938], "label": "Playing racquetball"}]}, "VJh9W3Gcpmo": {"duration": 220.659, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=VJh9W3Gcpmo", "annotations": []}, "Vxw3yFCV2iA": {"duration": 195.118, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Vxw3yFCV2iA", "annotations": []}, "2wUKu5UIHmI": {"duration": 100.87, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=2wUKu5UIHmI", "annotations": [{"segment": [3.656482075306758, 7.584343562580797], "label": "Putting on makeup"}, {"segment": [33.66534383808041, 60.217687492052896], "label": "Putting on makeup"}]}, "J7GJKAoU0rE": {"duration": 211.627, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=J7GJKAoU0rE", "annotations": [{"segment": [29.71357251950078, 211.6266665], "label": "Kite flying"}]}, "pWotXONgXtc": {"duration": 206.57, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=pWotXONgXtc", "annotations": [{"segment": [5.557532912201035, 193.4606381458459], "label": "Playing squash"}]}, "uVHuVnv5XAk": {"duration": 56.52, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uVHuVnv5XAk", "annotations": [{"segment": [11.852180070000001, 55.47613104524181], "label": "Triple jump"}]}, "_CMIO5R_OGA": {"duration": 35.06, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=_CMIO5R_OGA", "annotations": [{"segment": [9.900539048361935, 30.303307363494543], "label": "Playing field hockey"}]}, "1L_4N307nBk": {"duration": 93.0, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1L_4N307nBk", "annotations": [{"segment": [64.7170804, 83.88550801950078], "label": "Using the balance beam"}, {"segment": [12.246511550000001, 57.38070485], "label": "Using the balance beam"}]}, "UySzNwvkQKQ": {"duration": 199.6, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UySzNwvkQKQ", "annotations": [{"segment": [10.121223315132605, 178.91208598595944], "label": "Dodgeball"}]}, "dkP1In7K63Y": {"duration": 174.034, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=dkP1In7K63Y", "annotations": [{"segment": [13.930921597358967, 146.1933639649602], "label": "Cleaning sink"}]}, "J9XPELRRXT0": {"duration": 230.157, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J9XPELRRXT0", "annotations": []}, "WiVzjWuu_Dk": {"duration": 231.573, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WiVzjWuu_Dk", "annotations": []}, "-vY2uUmv6Dk": {"duration": 142.85, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-vY2uUmv6Dk", "annotations": []}, "fpVaXCwccNk": {"duration": 39.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fpVaXCwccNk", "annotations": [{"segment": [21.419565522620907, 23.991222308892358], "label": "Hammer throw"}]}, "nxyENrsY4mo": {"duration": 138.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nxyENrsY4mo", "annotations": [{"segment": [13.123740159999999, 27.73138295], "label": "Playing polo"}, {"segment": [34.03554445, 129.21526718876754], "label": "Playing polo"}]}, "DgJ-GG1Agyw": {"duration": 222.77, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DgJ-GG1Agyw", "annotations": [{"segment": [0.34753863650546024, 45.87510001872075], "label": "Getting a piercing"}, {"segment": [86.88465912636505, 221.03457281747268], "label": "Getting a piercing"}]}, "L67RSiR2X78": {"duration": 300.327, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=L67RSiR2X78", "annotations": [{"segment": [0.01, 300.3268707482993], "label": "Waxing skis"}]}, "_E_9te0nq3A": {"duration": 167.67000000000002, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_E_9te0nq3A", "annotations": [{"segment": [55.43916850881153, 120.58698333541342], "label": "Starting a campfire"}]}, "t6FuJ4L8sHY": {"duration": 86.332, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=t6FuJ4L8sHY", "annotations": [{"segment": [1.2121468315132604, 86.331791], "label": "Futsal"}]}, "6czh95dpwAA": {"duration": 42.772, "subset": "validation", "resolution": "626x360", "url": "https://www.youtube.com/watch?v=6czh95dpwAA", "annotations": [{"segment": [4.60407142698908, 9.741948236817473], "label": "Hand car wash"}, {"segment": [12.811329188143526, 16.547966868018722], "label": "Hand car wash"}, {"segment": [21.6191180049922, 23.68761386349454], "label": "Hand car wash"}, {"segment": [26.957171833385335, 30.760535186115447], "label": "Hand car wash"}]}, "qC4ynfF8JrE": {"duration": 152.184, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qC4ynfF8JrE", "annotations": []}, "aHx8nMBV6rc": {"duration": 148.86, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=aHx8nMBV6rc", "annotations": []}, "vqyQF-1Zc_E": {"duration": 78.136, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vqyQF-1Zc_E", "annotations": []}, "HqxTRzf11tc": {"duration": 23.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HqxTRzf11tc", "annotations": [{"segment": [2.76038291, 23.545], "label": "Long jump"}]}, "EnMS9O1U6E0": {"duration": 106.22999999999999, "subset": "testing", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=EnMS9O1U6E0", "annotations": []}, "wkSm7bUCgGQ": {"duration": 134.35, "subset": "validation", "resolution": "288x208", "url": "https://www.youtube.com/watch?v=wkSm7bUCgGQ", "annotations": [{"segment": [0.01, 134.35068027210883], "label": "Shaving legs"}]}, "joehK-w2bCc": {"duration": 74.05, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=joehK-w2bCc", "annotations": [{"segment": [7.21301568, 70.52509048127925], "label": "Ballet"}]}, "f4CSejhkTd8": {"duration": 120.814, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=f4CSejhkTd8", "annotations": [{"segment": [99.15433415732367, 108.20120522128623], "label": "Elliptical trainer"}]}, "PjNF7HoQ6yY": {"duration": 97.594, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PjNF7HoQ6yY", "annotations": [{"segment": [7.003553248049922, 86.1741551825273], "label": "Playing ten pins"}]}, "rSGgal9LtDk": {"duration": 212.39, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rSGgal9LtDk", "annotations": [{"segment": [0.9933276131045243, 192.04333853354134], "label": "Making a sandwich"}]}, "TV8putYbiCU": {"duration": 123.554, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TV8putYbiCU", "annotations": [{"segment": [44.629644859813084, 117.55659034267913], "label": "Cleaning sink"}]}, "vgXU0u-rN9c": {"duration": 59.26, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vgXU0u-rN9c", "annotations": [{"segment": [0.01, 37.25538403900156], "label": "Mixing drinks"}]}, "7ftCKS5SCCk": {"duration": 184.367, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7ftCKS5SCCk", "annotations": [{"segment": [15.527822152886117, 22.141524180967238], "label": "Putting on shoes"}, {"segment": [35.36892823712949, 41.69507800312012], "label": "Putting on shoes"}, {"segment": [48.596332293291724, 59.52331825273011], "label": "Putting on shoes"}, {"segment": [69.58764742589703, 76.20134945397815], "label": "Putting on shoes"}, {"segment": [168.21807332293292, 184.121], "label": "Putting on shoes"}]}, "WKoHUS5B2u4": {"duration": 180.28, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WKoHUS5B2u4", "annotations": [{"segment": [34.72482176443057, 114.29668054446178], "label": "Horseback riding"}, {"segment": [132.2917298798752, 177.56052586427455], "label": "Horseback riding"}]}, "8slE1UskRS4": {"duration": 216.41, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8slE1UskRS4", "annotations": [{"segment": [0, 0], "label": "Capoeira"}]}, "moMisK3nGuY": {"duration": 85.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=moMisK3nGuY", "annotations": [{"segment": [1.0664518662379148, 61.18767582540036], "label": "Grooming horse"}]}, "SpEdr2o2TP0": {"duration": 198.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SpEdr2o2TP0", "annotations": [{"segment": [111.5163182527301, 182.47146489859594], "label": "Horseback riding"}]}, "nSgBX7D4XLA": {"duration": 72.006, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=nSgBX7D4XLA", "annotations": [{"segment": [0.01, 70.3675795125717], "label": "Shuffleboard"}]}, "dU4sCY0kHtk": {"duration": 149.91, "subset": "training", "resolution": "576x324", "url": "https://www.youtube.com/watch?v=dU4sCY0kHtk", "annotations": [{"segment": [12.94155764, 143.3597814945398], "label": "Cumbia"}]}, "Auy0KGsXAIg": {"duration": 133.75, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=Auy0KGsXAIg", "annotations": [{"segment": [31.923936735725427, 129.99093193697345], "label": "Fixing bicycle"}]}, "y3xcwZpcLvI": {"duration": 59.54, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y3xcwZpcLvI", "annotations": [{"segment": [0, 52.01269778471139], "label": "Using the balance beam"}]}, "Z0noyvIqi_U": {"duration": 219.313, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z0noyvIqi_U", "annotations": []}, "dJO_4TrLr7E": {"duration": 98.00999999999999, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dJO_4TrLr7E", "annotations": [{"segment": [6.788043398595944, 94.95633742979719], "label": "Tango"}]}, "unqP4N-QYgQ": {"duration": 15.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=unqP4N-QYgQ", "annotations": []}, "yzmPtZyuo4s": {"duration": 68.52, "subset": "training", "resolution": "448x338", "url": "https://www.youtube.com/watch?v=yzmPtZyuo4s", "annotations": [{"segment": [20.096961289859593, 48.4251248101404], "label": "High jump"}]}, "jKPgBtesr74": {"duration": 110.736, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jKPgBtesr74", "annotations": [{"segment": [42.238076323987535, 82.4222398753894], "label": "Making a lemonade"}]}, "PG-AK5fawJI": {"duration": 232.13, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PG-AK5fawJI", "annotations": [{"segment": [128.89365990639624, 142.28991107644305], "label": "Rock-paper-scissors"}]}, "qI1ZayfiGHI": {"duration": 95.03999999999999, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qI1ZayfiGHI", "annotations": [{"segment": [16.309337380297933, 91.77708944004019], "label": "Playing racquetball"}]}, "0Jhynsxxv_M": {"duration": 114.47999999999999, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0Jhynsxxv_M", "annotations": []}, "i6wR9_tkg6A": {"duration": 217.362, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i6wR9_tkg6A", "annotations": []}, "PgZ2e0H1ZVE": {"duration": 186.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PgZ2e0H1ZVE", "annotations": [{"segment": [61.2770256225215, 121.39240054602182], "label": "Playing harmonica"}, {"segment": [140.26920924336974, 152.17611934477378], "label": "Playing harmonica"}]}, "TIfAkOBMf5A": {"duration": 221.68, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TIfAkOBMf5A", "annotations": [{"segment": [11.066753549053526, 171.18884396192172], "label": "Doing step aerobics"}]}, "UZPSbNS1LU0": {"duration": 123.089, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UZPSbNS1LU0", "annotations": [{"segment": [6.335433697347894, 105.97452730109205], "label": "Rollerblading"}]}, "cNCkHqOnJV0": {"duration": 234.219, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cNCkHqOnJV0", "annotations": [{"segment": [10.596509988290688, 234.219409], "label": "River tubing"}]}, "K4F8I74OM-Y": {"duration": 137.695, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=K4F8I74OM-Y", "annotations": [{"segment": [19.32556942277691, 24.26432605304212], "label": "Running a marathon"}, {"segment": [108.65264586583463, 114.89646645865835], "label": "Running a marathon"}, {"segment": [125.63445397815913, 135.29864274570983], "label": "Running a marathon"}]}, "sGTDKZuG1vE": {"duration": 214.46, "subset": "training", "resolution": "854x392", "url": "https://www.youtube.com/watch?v=sGTDKZuG1vE", "annotations": [{"segment": [0.01, 214.45950113378686], "label": "Cleaning sink"}]}, "D_zM6YmlUnw": {"duration": 224.073, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D_zM6YmlUnw", "annotations": [{"segment": [0.01, 205.13040093603743], "label": "Sharpening knives"}]}, "O8JVA-scYlM": {"duration": 135.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O8JVA-scYlM", "annotations": [{"segment": [7.220544001829618, 86.22516841771747], "label": "Making a sandwich"}]}, "knY08LrNyHg": {"duration": 224.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=knY08LrNyHg", "annotations": [{"segment": [15.37597503900156, 56.61154446177847], "label": "Sailing"}, {"segment": [57.006569250000005, 213.86583463338533], "label": "Sailing"}]}, "xxvBR4zdV48": {"duration": 142.107, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=xxvBR4zdV48", "annotations": [{"segment": [34.356560062402494, 61.39849765990639], "label": "Playing blackjack"}]}, "OtMNVvCSxVo": {"duration": 71.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OtMNVvCSxVo", "annotations": [{"segment": [3.331936614664587, 59.19740718720749], "label": "Washing dishes"}]}, "qJrJeICcevI": {"duration": 32.439, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qJrJeICcevI", "annotations": [{"segment": [1.5134491514010997, 29.00778791606719], "label": "Camel ride"}]}, "ASKSqeyhpgc": {"duration": 82.68, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=ASKSqeyhpgc", "annotations": [{"segment": [11.164257580343214, 71.56740841965679], "label": "Putting on makeup"}]}, "nLddpveoSHE": {"duration": 72.89, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nLddpveoSHE", "annotations": [{"segment": [7.554303102964118, 9.599076875195006], "label": "Getting a tattoo"}, {"segment": [25.162077252730107, 29.365223340093603], "label": "Getting a tattoo"}, {"segment": [34.13636214196568, 42.20185868798752], "label": "Getting a tattoo"}, {"segment": [47.31379311856474, 52.19853046333853], "label": "Getting a tattoo"}]}, "GLsIq3P-J3A": {"duration": 92.927, "subset": "training", "resolution": "284x240", "url": "https://www.youtube.com/watch?v=GLsIq3P-J3A", "annotations": [{"segment": [12.896113884555382, 76.08896255850235], "label": "Playing blackjack"}]}, "1OP64qG2v7I": {"duration": 85.26, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1OP64qG2v7I", "annotations": []}, "AbVuOBhG634": {"duration": 155.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AbVuOBhG634", "annotations": [{"segment": [17.89849165667307, 24.18715088739604], "label": "Walking the dog"}]}, "IZXFAoiSj0c": {"duration": 72.42, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IZXFAoiSj0c", "annotations": [{"segment": [3.97095309, 59.466318252730105], "label": "Hammer throw"}]}, "BCC6fxrmA9M": {"duration": 102.82, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=BCC6fxrmA9M", "annotations": [{"segment": [54.37631607800312, 97.8452885179407], "label": "Playing piano"}]}, "c6wINALtTvs": {"duration": 217.014, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c6wINALtTvs", "annotations": [{"segment": [36.225299471842824, 205.16384406552262], "label": "Baking cookies"}]}, "MGF7I9aCPPM": {"duration": 65.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MGF7I9aCPPM", "annotations": [{"segment": [9.300624237214386, 15.704332728411178], "label": "Spinning"}, {"segment": [15.80597889493811, 19.97347118798752], "label": "Spinning"}, {"segment": [21.90474830031201, 29.01997976677067], "label": "Spinning"}, {"segment": [39.89611929407176, 41.624104078783155], "label": "Spinning"}, {"segment": [49.24756636427457, 53.92328989937598], "label": "Spinning"}]}, "M_E1i4S8Vp0": {"duration": 101.308, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=M_E1i4S8Vp0", "annotations": [{"segment": [10.10830577223089, 96.3638221528861], "label": "Swinging at the playground"}]}, "DR0hc7lT0hE": {"duration": 131.565, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DR0hc7lT0hE", "annotations": [{"segment": [35.89978603840766, 121.10591687952916], "label": "Hula hoop"}]}, "MY6o5ZObFLE": {"duration": 91.952, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MY6o5ZObFLE", "annotations": [{"segment": [0.01, 91.95102040816326], "label": "Applying sunscreen"}]}, "5HEunlL4nF0": {"duration": 62.32, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5HEunlL4nF0", "annotations": [{"segment": [0.6800702028081124, 60.040483619344776], "label": "Playing racquetball"}]}, "GO_36Qd9bb0": {"duration": 191.47, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=GO_36Qd9bb0", "annotations": [{"segment": [43.312610681298, 190.27677933784017], "label": "Tango"}]}, "oRtMsdNQ1LE": {"duration": 71.796, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=oRtMsdNQ1LE", "annotations": [{"segment": [54.883132636505465, 71.796098], "label": "Playing congas"}]}, "bQ0L24YbGGI": {"duration": 80.88, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bQ0L24YbGGI", "annotations": [{"segment": [9.210419680187208, 62.07570524180968], "label": "Ballet"}]}, "paY7wXmzcTQ": {"duration": 53.964, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=paY7wXmzcTQ", "annotations": [{"segment": [39.97187987519501, 50.15419032761311], "label": "Snow tubing"}]}, "PRTcRz--03E": {"duration": 179.398, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PRTcRz--03E", "annotations": [{"segment": [17.045544464930313, 172.13205558027988], "label": "Playing congas"}]}, "jkWqhnD8hyQ": {"duration": 170.505, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jkWqhnD8hyQ", "annotations": [{"segment": [93.89695603878577, 102.94085548728073], "label": "Painting furniture"}, {"segment": [109.32478367082682, 119.43267121216847], "label": "Painting furniture"}, {"segment": [124.4866149828393, 133.53051436193448], "label": "Painting furniture"}, {"segment": [139.11645221372854, 148.1603515928237], "label": "Painting furniture"}]}, "6aWTCiY3eww": {"duration": 35.829, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=6aWTCiY3eww", "annotations": [{"segment": [4.244736349453978, 34.963223088923556], "label": "Camel ride"}]}, "7knqgoHxuGE": {"duration": 38.128, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7knqgoHxuGE", "annotations": [{"segment": [0.01, 38.12716553287982], "label": "Beer pong"}]}, "DpoIgaZ1m_U": {"duration": 216.57, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DpoIgaZ1m_U", "annotations": [{"segment": [12.030574600000001, 190.6838728549142], "label": "Doing karate"}]}, "P3vu94B0KGY": {"duration": 235.497, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P3vu94B0KGY", "annotations": []}, "In5hkMlOG58": {"duration": 180.745, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=In5hkMlOG58", "annotations": []}, "N49yT-kvXuw": {"duration": 146.565, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N49yT-kvXuw", "annotations": [{"segment": [28.123810113884556, 130.32985174726988], "label": "Snowboarding"}]}, "nmMs1pWkiAU": {"duration": 55.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nmMs1pWkiAU", "annotations": [{"segment": [6.3065522620904835, 49.808892355694226], "label": "Doing karate"}]}, "Hk-wwGuHuC0": {"duration": 98.43, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Hk-wwGuHuC0", "annotations": [{"segment": [17.036335413416538, 89.32565054602185], "label": "Mooping floor"}]}, "jlKyDZ6nWcM": {"duration": 26.378, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jlKyDZ6nWcM", "annotations": []}, "SQncxXAlBFE": {"duration": 125.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SQncxXAlBFE", "annotations": [{"segment": [4.57671824, 117.61743369734789], "label": "Clean and jerk"}]}, "yFPevKUgKUI": {"duration": 70.496, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yFPevKUgKUI", "annotations": []}, "QZCBVJHF81w": {"duration": 120.21, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=QZCBVJHF81w", "annotations": [{"segment": [13.871940717628705, 105.55679095163806], "label": "Hand car wash"}]}, "7BFbmEY3BNw": {"duration": 97.083, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7BFbmEY3BNw", "annotations": []}, "6eQuZddoEcI": {"duration": 99.03, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6eQuZddoEcI", "annotations": [{"segment": [3.0918102600000004, 13.577672386895475], "label": "Discus throw"}, {"segment": [22.218009360374413, 28.241096723868957], "label": "Discus throw"}, {"segment": [39.198024960998445, 46.75984867394696], "label": "Discus throw"}, {"segment": [50.30927613104525, 56.327870514820596], "label": "Discus throw"}]}, "e6Ti5g_zQ_4": {"duration": 90.837, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e6Ti5g_zQ_4", "annotations": [{"segment": [0.01, 90.781], "label": "Playing pool"}]}, "xAoQ6JisbhI": {"duration": 60.163, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xAoQ6JisbhI", "annotations": [{"segment": [8.428177570093457, 59.921], "label": "Playing rubik cube"}]}, "8ycO15nH8YM": {"duration": 186.642, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8ycO15nH8YM", "annotations": [{"segment": [0.01, 186.64201814058956], "label": "Making a cake"}]}, "xATeffo_kP4": {"duration": 231.805, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=xATeffo_kP4", "annotations": [{"segment": [0.01, 227.97749978521196], "label": "Table soccer"}]}, "uC27rJLCn70": {"duration": 67.362, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uC27rJLCn70", "annotations": [{"segment": [14.598469578783149, 67.321], "label": "Snow tubing"}]}, "gJxR-KzawO4": {"duration": 194.44, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=gJxR-KzawO4", "annotations": [{"segment": [7.429245957098283, 141.45728250000002], "label": "Washing dishes"}, {"segment": [164.82155338, 193.00877762012482], "label": "Washing dishes"}]}, "QR2BmWR5FGM": {"duration": 51.224, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QR2BmWR5FGM", "annotations": [{"segment": [3.514577223088924, 51.181], "label": "Fun sliding down"}]}, "B3xMfP_UOt4": {"duration": 50.74, "subset": "testing", "resolution": "622x360", "url": "https://www.youtube.com/watch?v=B3xMfP_UOt4", "annotations": []}, "OQPBLjX1LHk": {"duration": 48.809, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OQPBLjX1LHk", "annotations": [{"segment": [0.01, 38.111171606864275], "label": "Using the monkey bar"}]}, "oW0G_C86fz0": {"duration": 205.799, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oW0G_C86fz0", "annotations": [{"segment": [22.15303203276131, 42.05865501872075], "label": "Plastering"}, {"segment": [54.900992429017165, 61.96427800468019], "label": "Plastering"}, {"segment": [73.20132323868955, 86.36471908424338], "label": "Plastering"}, {"segment": [109.1598679875195, 127.4601987971919], "label": "Plastering"}, {"segment": [135.48665967862715, 177.54531469734792], "label": "Plastering"}]}, "EGJII8_NOXo": {"duration": 224.514, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EGJII8_NOXo", "annotations": []}, "NtX9Q0QX3sw": {"duration": 183.79, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NtX9Q0QX3sw", "annotations": [{"segment": [13.546792349999999, 165.14930021840874], "label": "Cleaning windows"}]}, "ddLFSNa3ci0": {"duration": 158.198, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ddLFSNa3ci0", "annotations": [{"segment": [0.01, 149.5596182527301], "label": "Spread mulch"}]}, "Sh8JODtAMhc": {"duration": 182.974, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Sh8JODtAMhc", "annotations": [{"segment": [0.01, 172.69705273790953], "label": "Riding bumper cars"}]}, "TJsdyRfYItg": {"duration": 188.454, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TJsdyRfYItg", "annotations": []}, "REZTMjILEQ4": {"duration": 199.14, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=REZTMjILEQ4", "annotations": []}, "4xgUlwGdjJ4": {"duration": 19.18, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4xgUlwGdjJ4", "annotations": []}, "xQcJ8gTPRSU": {"duration": 165.7, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=xQcJ8gTPRSU", "annotations": [{"segment": [29.20036349453978, 160.49217004680187], "label": "Preparing salad"}]}, "YZvdzvM-124": {"duration": 6.618, "subset": "validation", "resolution": "360x360", "url": "https://www.youtube.com/watch?v=YZvdzvM-124", "annotations": [{"segment": [4.496845553822153, 6.361], "label": "Rock-paper-scissors"}]}, "xAPz4YOcIX0": {"duration": 169.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xAPz4YOcIX0", "annotations": [{"segment": [43.26097926638154, 154.57886493963161], "label": "Painting"}]}, "ykov_joUUTk": {"duration": 204.27, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ykov_joUUTk", "annotations": [{"segment": [2.5485304212168485, 204.201], "label": "Doing karate"}]}, "2c9-_NElJLw": {"duration": 226.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2c9-_NElJLw", "annotations": []}, "eOsASILOUZM": {"duration": 12.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eOsASILOUZM", "annotations": [{"segment": [3.88225716, 8.21952774], "label": "Springboard diving"}]}, "GP2S0V5NiPs": {"duration": 216.318, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GP2S0V5NiPs", "annotations": [{"segment": [85.5481813068441, 91.28514021105062], "label": "Ice fishing"}, {"segment": [143.59270569110765, 149.66713272386895], "label": "Ice fishing"}]}, "88Mt7VfUQBU": {"duration": 172.222, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=88Mt7VfUQBU", "annotations": [{"segment": [4.566953198127925, 172.181], "label": "Kneeling"}]}, "O8jThut7tAQ": {"duration": 155.296, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=O8jThut7tAQ", "annotations": [{"segment": [53.76613066981726, 56.9107346214706], "label": "Playing ten pins"}, {"segment": [103.59600867293959, 108.67575351791807], "label": "Playing ten pins"}]}, "Lwy92HbuZII": {"duration": 180.16, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Lwy92HbuZII", "annotations": [{"segment": [10.961594868954759, 52.278375528861154], "label": "Sailing"}, {"segment": [73.92049873166927, 89.09809162714508], "label": "Sailing"}, {"segment": [103.71355145241809, 105.11888412792511], "label": "Sailing"}, {"segment": [137.722602199688, 144.18713250702027], "label": "Sailing"}]}, "e5XATG2IK6Q": {"duration": 156.503, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=e5XATG2IK6Q", "annotations": [{"segment": [9.27419344773791, 100.55178159126366], "label": "Hand car wash"}, {"segment": [110.80220592823714, 115.45406084243372], "label": "Hand car wash"}, {"segment": [117.89495007800313, 151.8233104524181], "label": "Hand car wash"}]}, "d-NsZZkxTa0": {"duration": 52.106, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=d-NsZZkxTa0", "annotations": [{"segment": [0.2, 49.508414283489095], "label": "Shuffleboard"}]}, "YTuQrhSKkNE": {"duration": 100.287, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YTuQrhSKkNE", "annotations": [{"segment": [14.233620904836194, 65.85004836193447], "label": "Curling"}, {"segment": [76.48616068642745, 99.32251950078003], "label": "Curling"}]}, "ukXHzfVXkDo": {"duration": 164.63, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ukXHzfVXkDo", "annotations": []}, "He7CDkVm_aE": {"duration": 197.556, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=He7CDkVm_aE", "annotations": [{"segment": [60.21905181785, 190.48884641608745], "label": "Blow-drying hair"}]}, "aVJRaXUjsEE": {"duration": 6.595, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aVJRaXUjsEE", "annotations": [{"segment": [0, 6.594466], "label": "Beer pong"}]}, "agZNSscDJww": {"duration": 111.062, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=agZNSscDJww", "annotations": [{"segment": [8.83477535101404, 107.21060686427457], "label": "Hurling"}]}, "2ehPAKS6Gpo": {"duration": 30.233, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2ehPAKS6Gpo", "annotations": [{"segment": [0.2, 17.49799158970359], "label": "Playing ice hockey"}]}, "4zLTW7lT3fs": {"duration": 106.35, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=4zLTW7lT3fs", "annotations": [{"segment": [13.438545321404693, 26.877090187207486], "label": "Snatch"}, {"segment": [41.31108306552262, 58.73141929797191], "label": "Snatch"}, {"segment": [84.77896966458657, 85.27669355694226], "label": "Snatch"}, {"segment": [0.2, 106.147005], "label": "Snatch"}]}, "TyLHQAu66KA": {"duration": 171.11, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TyLHQAu66KA", "annotations": []}, "ZU4Mgdd3omA": {"duration": 34.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZU4Mgdd3omA", "annotations": [{"segment": [0.01, 32.50438649926949], "label": "Playing flauta"}]}, "KBG7wrKsZAI": {"duration": 181.26, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=KBG7wrKsZAI", "annotations": [{"segment": [6.926429017160686, 160.43871294851792], "label": "Doing karate"}]}, "DrQgYA5_8VA": {"duration": 29.676, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DrQgYA5_8VA", "annotations": [{"segment": [10.40440717628705, 29.641], "label": "Waterskiing"}]}, "UmU8dx36O9w": {"duration": 85.961, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UmU8dx36O9w", "annotations": [{"segment": [0, 85.721], "label": "Putting in contact lenses"}]}, "U0jlI70N_DQ": {"duration": 103.78999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U0jlI70N_DQ", "annotations": [{"segment": [8.901820015600624, 92.57892816224648], "label": "Playing water polo"}]}, "99A2KqvfBLI": {"duration": 126.39, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=99A2KqvfBLI", "annotations": [{"segment": [29.09532106, 49.29259483304502], "label": "Playing harmonica"}, {"segment": [52.84166166102426, 53.82751355768516], "label": "Playing harmonica"}, {"segment": [76.69927756021805, 116.33052380598625], "label": "Playing harmonica"}]}, "rBEQh8V2TI8": {"duration": 137.973, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rBEQh8V2TI8", "annotations": [{"segment": [27.919828660436135, 128.21644392523365], "label": "Baton twirling"}]}, "nhQTxZlzG1Q": {"duration": 125.16, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nhQTxZlzG1Q", "annotations": []}, "Mo9-1cuMVtQ": {"duration": 95.342, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Mo9-1cuMVtQ", "annotations": [{"segment": [28.649677570093456, 82.23793457943925], "label": "Futsal"}]}, "KwBuRjh_v9M": {"duration": 108.3, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KwBuRjh_v9M", "annotations": [{"segment": [27.549673830000003, 91.98422289], "label": "Getting a piercing"}]}, "-mWhX_yXOS8": {"duration": 76.7, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-mWhX_yXOS8", "annotations": []}, "IsHMvAfUOGs": {"duration": 11.87, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=IsHMvAfUOGs", "annotations": [{"segment": [0, 9.956784711388455], "label": "Hopscotch"}]}, "NulnzF8avMI": {"duration": 92.56, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NulnzF8avMI", "annotations": [{"segment": [15.8822664, 77.65373154000001], "label": "Using the pommel horse"}]}, "jhakEVLCqNM": {"duration": 145.891, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jhakEVLCqNM", "annotations": [{"segment": [47.55842277691107, 145.841], "label": "Swimming"}]}, "cr2lbZ6or1Q": {"duration": 125.76, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cr2lbZ6or1Q", "annotations": [{"segment": [27.27065785803432, 116.734110975039], "label": "Playing violin"}]}, "eKdtOKqLMRE": {"duration": 149.235, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eKdtOKqLMRE", "annotations": []}, "_ajGnJjNUX0": {"duration": 121.023, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_ajGnJjNUX0", "annotations": [{"segment": [24.741064773286244, 89.70048066113672], "label": "Installing carpet"}]}, "extd66Y1tJ8": {"duration": 88.352, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=extd66Y1tJ8", "annotations": [{"segment": [17.80177186100802, 60.44898369901425], "label": "Putting in contact lenses"}]}, "ZnKB0U96ZI4": {"duration": 72.49, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZnKB0U96ZI4", "annotations": []}, "YveUW4bLL5A": {"duration": 69.75, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YveUW4bLL5A", "annotations": [{"segment": [2.1518491999999996, 63.07160686427457], "label": "Vacuuming floor"}]}, "iuqJ_h0tOwk": {"duration": 71.77, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iuqJ_h0tOwk", "annotations": []}, "0EewuppFjEw": {"duration": 55.148, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=0EewuppFjEw", "annotations": [{"segment": [18.83229173166927, 49.53150702028081], "label": "Snow tubing"}]}, "7CeAzJF2z4o": {"duration": 185.53, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=7CeAzJF2z4o", "annotations": [{"segment": [0, 185.481], "label": "Playing guitarra"}]}, "TUTcdyeBEQs": {"duration": 141.94, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TUTcdyeBEQs", "annotations": []}, "7Ql7Hw6c1-o": {"duration": 175.241, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7Ql7Hw6c1-o", "annotations": [{"segment": [63.67733697347894, 149.78182215288612], "label": "Painting furniture"}]}, "YPNLMBhuDS8": {"duration": 175.07999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YPNLMBhuDS8", "annotations": [{"segment": [51.34906409547581, 148.58452589329173], "label": "Cleaning shoes"}]}, "7c9ku5U1Xj4": {"duration": 127.432, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7c9ku5U1Xj4", "annotations": []}, "oWsEp7hsmZg": {"duration": 98.058, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oWsEp7hsmZg", "annotations": [{"segment": [82.82506066101901, 86.33800958313739], "label": "Doing crunches"}]}, "JmtcnoHa66U": {"duration": 94.25, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JmtcnoHa66U", "annotations": [{"segment": [4.03102973, 90.06176521060843], "label": "Using the balance beam"}]}, "DlgFhgRivlE": {"duration": 132.8, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DlgFhgRivlE", "annotations": []}, "lM4FQ_FqEhQ": {"duration": 161.87, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lM4FQ_FqEhQ", "annotations": [{"segment": [34.34290468761608, 151.51281435257408], "label": "Skateboarding"}]}, "pMVo7PaXD1c": {"duration": 237.32999999999998, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pMVo7PaXD1c", "annotations": [{"segment": [28.509304410271646, 176.97983776766034], "label": "Paintball"}]}, "iDofsMwceTo": {"duration": 126.062, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iDofsMwceTo", "annotations": [{"segment": [30.458588143525738, 31.4411232449298], "label": "Plastering"}, {"segment": [52.07436037441497, 53.64641653666146], "label": "Plastering"}, {"segment": [86.65959594383776, 88.24566146645866], "label": "Plastering"}]}, "5vlGKyxl22M": {"duration": 90.907, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5vlGKyxl22M", "annotations": [{"segment": [9.826918712011937, 29.509075353756486], "label": "Running a marathon"}, {"segment": [35.59780007026739, 40.97853354067237], "label": "Running a marathon"}, {"segment": [72.41334486777518, 81.19243631948858], "label": "Running a marathon"}]}, "Z_1Zoc6lINU": {"duration": 210.61, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=Z_1Zoc6lINU", "annotations": [{"segment": [105.46677196099843, 196.80559627613104], "label": "Polishing shoes"}]}, "aBdrTqSnWbw": {"duration": 121.21, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=aBdrTqSnWbw", "annotations": [{"segment": [10.40007046801872, 17.396481510140408], "label": "Paintball"}, {"segment": [18.90921903276131, 20.421956555382216], "label": "Paintball"}, {"segment": [38.76389901716069, 42.16755844305772], "label": "Paintball"}, {"segment": [54.08036643369734, 57.29493366926677], "label": "Paintball"}, {"segment": [77.52779803432138, 81.49873403120124], "label": "Paintball"}, {"segment": [102.4879671575663, 110.42983915132605], "label": "Paintball"}]}, "LLD4SapVrgQ": {"duration": 206.496, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LLD4SapVrgQ", "annotations": [{"segment": [12.885806969693753, 34.147388469688444], "label": "BMX"}, {"segment": [36.08025951514251, 111.14008511360862], "label": "BMX"}, {"segment": [113.0729561590627, 153.34110293935566], "label": "BMX"}, {"segment": [174.28053868330736, 185.23347457098282], "label": "BMX"}]}, "mbVZlDl_kMU": {"duration": 64.74, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mbVZlDl_kMU", "annotations": []}, "HEw5wIWVpWE": {"duration": 236.03, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HEw5wIWVpWE", "annotations": [{"segment": [100.15661186991699, 101.99772605870221], "label": "Vacuuming floor"}]}, "EPsSfOEd3BU": {"duration": 46.67, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EPsSfOEd3BU", "annotations": []}, "o2hmponBzIE": {"duration": 137.625, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o2hmponBzIE", "annotations": [{"segment": [15.565972815912637, 113.04116810452419], "label": "Baton twirling"}]}, "EQNJfWiAS28": {"duration": 157.82999999999998, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EQNJfWiAS28", "annotations": [{"segment": [3.017065754618695, 155.68058431069346], "label": "Tango"}]}, "qpeovDXc1cg": {"duration": 69.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qpeovDXc1cg", "annotations": [{"segment": [0.01, 8.256967238689548], "label": "Drinking coffee"}]}, "DWvFgDSAUzE": {"duration": 19.83, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=DWvFgDSAUzE", "annotations": [{"segment": [0.2, 18.842973464106773], "label": "Knitting"}]}, "GPl7nFwqSgk": {"duration": 202.061, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=GPl7nFwqSgk", "annotations": [{"segment": [0.01, 185.0203853354134], "label": "Drum corps"}]}, "-K_cgWfJxiU": {"duration": 101.03, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=-K_cgWfJxiU", "annotations": [{"segment": [28.756134945397818, 83.57319890795632], "label": "Kayaking"}]}, "ciIaZrpHqgs": {"duration": 173.779, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ciIaZrpHqgs", "annotations": [{"segment": [0.01, 162.12040283931358], "label": "Curling"}]}, "PwbZimjxzps": {"duration": 190.218, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PwbZimjxzps", "annotations": [{"segment": [61.82819846030688, 63.60317079449717], "label": "Bullfighting"}, {"segment": [90.5235845297165, 102.9483908690485], "label": "Bullfighting"}, {"segment": [113.89405359655525, 130.75629077136298], "label": "Bullfighting"}, {"segment": [139.03949391524992, 177.49722752333702], "label": "Bullfighting"}]}, "jCCdvbdbwB8": {"duration": 225.095, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jCCdvbdbwB8", "annotations": []}, "cUEGoRjraxM": {"duration": 166.65, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cUEGoRjraxM", "annotations": [{"segment": [11.439287007800312, 25.738395767550703], "label": "Walking the dog"}, {"segment": [34.57784481903276, 83.71478219344772], "label": "Walking the dog"}, {"segment": [90.47436087987519, 93.85415022308892], "label": "Walking the dog"}, {"segment": [109.97314555226208, 113.87290248673946], "label": "Walking the dog"}, {"segment": [119.85252978627145, 123.49230292511699], "label": "Walking the dog"}, {"segment": [129.47193022464896, 133.37168715912637], "label": "Walking the dog"}, {"segment": [149.4906824882995, 153.65042321840872], "label": "Walking the dog"}]}, "_yk7gjnHkjs": {"duration": 230.13, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_yk7gjnHkjs", "annotations": [{"segment": [17.213873600000003, 189.35851014040563], "label": "Hammer throw"}]}, "e1hN5VLndNI": {"duration": 12.45, "subset": "testing", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=e1hN5VLndNI", "annotations": []}, "VoMTywbceKI": {"duration": 168.299, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VoMTywbceKI", "annotations": []}, "8onOVVuN_Is": {"duration": 64.41, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=8onOVVuN_Is", "annotations": [{"segment": [0.01, 0.02], "label": "Long jump"}]}, "10fX73-AXcg": {"duration": 184.135, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=10fX73-AXcg", "annotations": []}, "IKeV3kdX-_I": {"duration": 35.782, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IKeV3kdX-_I", "annotations": [{"segment": [0.01, 35.781950113378684], "label": "Calf roping"}]}, "cg0sa6wYA1U": {"duration": 192.73, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=cg0sa6wYA1U", "annotations": [{"segment": [3.9834929864116924, 189.16133878080421], "label": "Kayaking"}]}, "T98RJsOiQWc": {"duration": 88.654, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=T98RJsOiQWc", "annotations": [{"segment": [7.080253362758916, 9.013514006580309], "label": "Hitting a pinata"}, {"segment": [49.33580658685569, 52.37378754635724], "label": "Hitting a pinata"}, {"segment": [55.4117685058588, 57.345029116450704], "label": "Hitting a pinata"}]}, "QsfIM28uvHM": {"duration": 26.216, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=QsfIM28uvHM", "annotations": [{"segment": [0.2, 25.961000000000002], "label": "Kite flying"}]}, "25dJIdZj6Xs": {"duration": 6.897, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=25dJIdZj6Xs", "annotations": [{"segment": [0.01, 6.234746745278902], "label": "Scuba diving"}]}, "w6kBE7BHHeU": {"duration": 186.108, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=w6kBE7BHHeU", "annotations": [{"segment": [24.35057113187954, 162.33714087919694], "label": "Arm wrestling"}]}, "ghWwS5ev6xk": {"duration": 95.47999999999999, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ghWwS5ev6xk", "annotations": [{"segment": [42.93519110764431, 91.89767940717628], "label": "Kayaking"}]}, "TdFvHyfq97o": {"duration": 59.26, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=TdFvHyfq97o", "annotations": [{"segment": [3.8487120000000004, 15.623225564742588], "label": "Mowing the lawn"}]}, "ZwDfAhQVKLQ": {"duration": 55.821, "subset": "training", "resolution": "1920x960", "url": "https://www.youtube.com/watch?v=ZwDfAhQVKLQ", "annotations": [{"segment": [20.537154446177844, 52.49298439937598], "label": "Using the rowing machine"}]}, "zVMDHCnT-d4": {"duration": 61.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zVMDHCnT-d4", "annotations": [{"segment": [20.936502340093604, 27.371115444617786], "label": "Drinking beer"}]}, "8UKLht_ZKr4": {"duration": 44.91, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8UKLht_ZKr4", "annotations": []}, "lnx8WSmcLF8": {"duration": 97.03999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lnx8WSmcLF8", "annotations": [{"segment": [12.408864274570982, 76.87442745709829], "label": "Using parallel bars"}]}, "EOvotFy4YX0": {"duration": 44.676, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EOvotFy4YX0", "annotations": [{"segment": [0, 44.641], "label": "Camel ride"}]}, "A-FI48u86dQ": {"duration": 85.636, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=A-FI48u86dQ", "annotations": []}, "w-6Oo3bgms4": {"duration": 114.428, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=w-6Oo3bgms4", "annotations": [{"segment": [0.01, 111.8256800523414], "label": "Futsal"}]}, "uvnrLngXHh8": {"duration": 199.0, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uvnrLngXHh8", "annotations": [{"segment": [16.44984399375975, 182.81053042121684], "label": "Shaving"}]}, "2D22fVcAcyo": {"duration": 215.784, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2D22fVcAcyo", "annotations": [{"segment": [10.433652106084244, 25.242706708268333], "label": "Slacklining"}, {"segment": [38.368914196567864, 66.30417628705149], "label": "Slacklining"}, {"segment": [74.71841185647428, 91.2103135725429], "label": "Slacklining"}, {"segment": [103.66338221528862, 126.8866723868955], "label": "Slacklining"}, {"segment": [132.27178315132608, 180.0855070202808], "label": "Slacklining"}]}, "-Pa52Pf_Raw": {"duration": 221.1, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-Pa52Pf_Raw", "annotations": [{"segment": [1.3797218773104665, 85.88768686257653], "label": "Smoking a cigarette"}, {"segment": [113.13719393945824, 221.10043083900226], "label": "Smoking a cigarette"}]}, "kIjYdddY3Xk": {"duration": 59.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kIjYdddY3Xk", "annotations": [{"segment": [0.6481301528861154, 45.92465083307332], "label": "Cleaning shoes"}]}, "wBcP3SQ3Qg4": {"duration": 229.86, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=wBcP3SQ3Qg4", "annotations": [{"segment": [36.39659047581904, 206.72546216068645], "label": "Washing hands"}]}, "sPEed6XrTZA": {"duration": 124.854, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sPEed6XrTZA", "annotations": [{"segment": [49.45312636505461, 118.97914352574104], "label": "Hand car wash"}]}, "lHy_OZ8GygM": {"duration": 204.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lHy_OZ8GygM", "annotations": [{"segment": [5.584893377535102, 175.36565205460218], "label": "Getting a haircut"}]}, "q2JRY6-riYA": {"duration": 42.052, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q2JRY6-riYA", "annotations": [{"segment": [0.01, 42.051336], "label": "Riding bumper cars"}]}, "IiiQJFM7FAc": {"duration": 71.448, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=IiiQJFM7FAc", "annotations": []}, "Bg-0ibLZrgg": {"duration": 212.115, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=Bg-0ibLZrgg", "annotations": [{"segment": [44.011232093603745, 198.21600018096723], "label": "Surfing"}]}, "cwVZNpqc-mQ": {"duration": 236.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cwVZNpqc-mQ", "annotations": [{"segment": [7.717709714759485, 208.74581835375682], "label": "Playing drums"}]}, "7a7UMZtTraw": {"duration": 60.14, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7a7UMZtTraw", "annotations": []}, "AlLg4ZaxDoQ": {"duration": 179.909, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AlLg4ZaxDoQ", "annotations": [{"segment": [71.27097347893915, 134.98090639625585], "label": "River tubing"}]}, "rwxSphRRIL8": {"duration": 88.52, "subset": "validation", "resolution": "640x348", "url": "https://www.youtube.com/watch?v=rwxSphRRIL8", "annotations": [{"segment": [37.974225363572366, 40.59789911596464], "label": "Preparing pasta"}, {"segment": [42.39304431496988, 47.08803945082973], "label": "Preparing pasta"}, {"segment": [68.21551756219908, 70.97727940682253], "label": "Preparing pasta"}, {"segment": [75.67227454268239, 83.81947198432155], "label": "Preparing pasta"}]}, "n33Uv--Lbl4": {"duration": 37.18, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n33Uv--Lbl4", "annotations": [{"segment": [0.01, 35.2578159126365], "label": "Brushing teeth"}]}, "PRT0Z9HPF4U": {"duration": 141.247, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PRT0Z9HPF4U", "annotations": [{"segment": [0.01, 141.246983], "label": "Playing pool"}]}, "rXgC48CLncg": {"duration": 201.97, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rXgC48CLncg", "annotations": [{"segment": [0.9452436131045242, 167.3081195195008], "label": "Getting a haircut"}]}, "VTtRk9vvZoY": {"duration": 203.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VTtRk9vvZoY", "annotations": [{"segment": [16.848228669999997, 142.63624387999997], "label": "Starting a campfire"}]}, "6dc-fQCzOiw": {"duration": 174.289, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=6dc-fQCzOiw", "annotations": [{"segment": [0.01, 140.30126778159126], "label": "Longboarding"}]}, "feWO_gqAcGk": {"duration": 68.221, "subset": "validation", "resolution": "240x184", "url": "https://www.youtube.com/watch?v=feWO_gqAcGk", "annotations": [{"segment": [5.623127491730234, 7.108482351144191], "label": "Bullfighting"}, {"segment": [17.187676040024616, 19.627901880490402], "label": "Bullfighting"}, {"segment": [45.30332042619281, 46.682578474404316], "label": "Bullfighting"}]}, "oS7Twj3Pou0": {"duration": 37.92, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oS7Twj3Pou0", "annotations": [{"segment": [26.50132048673947, 28.867509815912634], "label": "Hopscotch"}]}, "l8gg9TYh_B0": {"duration": 139.76, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l8gg9TYh_B0", "annotations": []}, "j_1kNiGcepU": {"duration": 91.49, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j_1kNiGcepU", "annotations": []}, "U2u9JboK97A": {"duration": 88.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U2u9JboK97A", "annotations": [{"segment": [4.389807575663027, 64.0637543073323], "label": "Layup drill in basketball"}]}, "8j3UI3vaC2g": {"duration": 79.807, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8j3UI3vaC2g", "annotations": []}, "k-071peyck4": {"duration": 40.38, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=k-071peyck4", "annotations": [{"segment": [2.11295631825273, 38.56933697347894], "label": "Hand washing clothes"}]}, "W04glwJ_IYI": {"duration": 96.387, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=W04glwJ_IYI", "annotations": [{"segment": [15.951494045253307, 83.01543766147796], "label": "Grooming dog"}]}, "BUqr7or97JA": {"duration": 52.918, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BUqr7or97JA", "annotations": [{"segment": [0.2, 52.71809523809524], "label": "Arm wrestling"}]}, "tAleUKVZCD8": {"duration": 136.464, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tAleUKVZCD8", "annotations": [{"segment": [0.2, 6.38675538858289], "label": "Playing ice hockey"}, {"segment": [14.050861854882358, 23.630994937756693], "label": "Playing ice hockey"}, {"segment": [26.398588939475946, 56.41633926581552], "label": "Playing ice hockey"}, {"segment": [59.396825113820874, 70.25430927441178], "label": "Playing ice hockey"}, {"segment": [73.87347066127542, 97.50446559903212], "label": "Playing ice hockey"}, {"segment": [103.03965360247062, 126.24486348829953], "label": "Playing ice hockey"}]}, "j_e8c6tOQG8": {"duration": 116.89, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=j_e8c6tOQG8", "annotations": [{"segment": [14.855631825273012, 86.12620904836193], "label": "Kayaking"}, {"segment": [99.61475819032762, 113.46786271450858], "label": "Kayaking"}]}, "PqP3JIJEU5I": {"duration": 54.103, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=PqP3JIJEU5I", "annotations": [{"segment": [0.01, 54.061], "label": "Fixing the roof"}]}, "8itO1pQI9ww": {"duration": 12.8, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8itO1pQI9ww", "annotations": [{"segment": [0.16486430920588413, 9.332043810763516], "label": "Drinking beer"}]}, "9Zy5ylJYiA4": {"duration": 197.02, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9Zy5ylJYiA4", "annotations": [{"segment": [10.906419656786271, 34.56259750390016], "label": "Shot put"}, {"segment": [60.06211388455539, 85.56163026521061], "label": "Shot put"}, {"segment": [95.39276911076443, 119.66339313572544], "label": "Shot put"}, {"segment": [161.4457332293292, 184.48746489859593], "label": "Shot put"}]}, "qZTAv1s_eBQ": {"duration": 84.266, "subset": "training", "resolution": "660x480", "url": "https://www.youtube.com/watch?v=qZTAv1s_eBQ", "annotations": [{"segment": [8.939211620165487, 84.265214], "label": "Powerbocking"}]}, "jFZRNe7xFY8": {"duration": 21.36, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jFZRNe7xFY8", "annotations": [{"segment": [0.01, 21.195723950078], "label": "Using the balance beam"}]}, "7PSicg5Q2ZI": {"duration": 229.367, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=7PSicg5Q2ZI", "annotations": [{"segment": [31.4887216349454, 229.3667120181406], "label": "Making a lemonade"}]}, "8BAn48zfc68": {"duration": 8.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8BAn48zfc68", "annotations": [{"segment": [0.82036254, 8.753333], "label": "Starting a campfire"}]}, "sb0r71oGs3M": {"duration": 62.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sb0r71oGs3M", "annotations": [{"segment": [17.27060881901679, 38.54379021241137], "label": "Waterskiing"}, {"segment": [50.20671700368863, 57.0672621750282], "label": "Waterskiing"}]}, "OTwG3OCsPoY": {"duration": 102.192, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OTwG3OCsPoY", "annotations": [{"segment": [0.2, 101.901], "label": "Using the rowing machine"}]}, "6uRQHl4dKlk": {"duration": 151.05, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6uRQHl4dKlk", "annotations": []}, "C-6kvesNmU4": {"duration": 39.96, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C-6kvesNmU4", "annotations": [{"segment": [8.100358814352575, 17.010753510140407], "label": "Kayaking"}]}, "sPK-sbHIb3w": {"duration": 214.07, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sPK-sbHIb3w", "annotations": []}, "9af8-f0Xir8": {"duration": 43.422, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9af8-f0Xir8", "annotations": []}, "hFi6S_guB7I": {"duration": 201.02, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hFi6S_guB7I", "annotations": []}, "uIl5Tj74sLw": {"duration": 38.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uIl5Tj74sLw", "annotations": [{"segment": [0.6771977896129573, 26.50901511048523], "label": "Washing face"}]}, "jtyWcZGp4VA": {"duration": 217.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jtyWcZGp4VA", "annotations": [{"segment": [74.86452574102964, 217.161], "label": "Grooming dog"}]}, "sByOncVAMqg": {"duration": 99.71000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sByOncVAMqg", "annotations": [{"segment": [6.63643136, 95.97273580467028], "label": "Using the balance beam"}]}, "Z6l2Yu9Q0mU": {"duration": 210.002, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z6l2Yu9Q0mU", "annotations": [{"segment": [30.174331860907724, 111.27448224323784], "label": "Hula hoop"}, {"segment": [144.95708660460855, 172.4263950158235], "label": "Hula hoop"}]}, "TKHKTbbSynM": {"duration": 121.84, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TKHKTbbSynM", "annotations": []}, "TNwMZhQ7SFc": {"duration": 163.1, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=TNwMZhQ7SFc", "annotations": [{"segment": [83.45679150703442, 85.74676444472745], "label": "Washing face"}, {"segment": [86.76453019481322, 95.16109763302097], "label": "Washing face"}, {"segment": [98.46883604836194, 99.99548466926677], "label": "Washing face"}]}, "aYSJn94g_Io": {"duration": 81.572, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=aYSJn94g_Io", "annotations": [{"segment": [29.71449626964671, 33.02317652106085], "label": "Playing ten pins"}, {"segment": [42.56744719188767, 46.63966934477379], "label": "Playing ten pins"}, {"segment": [52.36623174726989, 55.54765530421217], "label": "Playing ten pins"}, {"segment": [62.80130101404057, 64.32838432137287], "label": "Playing ten pins"}]}, "b02GzZM5iGI": {"duration": 84.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b02GzZM5iGI", "annotations": [{"segment": [0.01, 10.335705689452068], "label": "Volleyball"}, {"segment": [23.719119098283933, 30.609589450858035], "label": "Volleyball"}, {"segment": [37.10253266770671, 60.55663367550703], "label": "Volleyball"}, {"segment": [71.42237538533541, 84.738298], "label": "Volleyball"}]}, "FSU0YVLCoSc": {"duration": 163.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FSU0YVLCoSc", "annotations": [{"segment": [73.64751709000001, 136.74636129], "label": "Fixing bicycle"}, {"segment": [73.64751709000001, 136.74636129], "label": "Fixing bicycle"}]}, "poERMbR_nY4": {"duration": 186.619, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=poERMbR_nY4", "annotations": [{"segment": [35.11791982733996, 170.9459072587044], "label": "Snow tubing"}]}, "64vbnwtL7I8": {"duration": 127.432, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=64vbnwtL7I8", "annotations": [{"segment": [0.5964014560582424, 4.7712116068642745], "label": "Playing ten pins"}, {"segment": [44.33250784711389, 50.49532283931358], "label": "Playing ten pins"}, {"segment": [56.459337347893914, 69.77896975039002], "label": "Playing ten pins"}, {"segment": [72.76097700468019, 78.52619102964118], "label": "Playing ten pins"}, {"segment": [88.46621521060842, 101.38824664586583], "label": "Playing ten pins"}, {"segment": [110.53306889235569, 115.70188146645867], "label": "Playing ten pins"}]}, "8AUTKSqcmiM": {"duration": 98.8, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8AUTKSqcmiM", "annotations": []}, "ennVaOEePHk": {"duration": 237.587, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ennVaOEePHk", "annotations": [{"segment": [38.16961466458659, 93.01527457098284], "label": "BMX"}, {"segment": [97.83279875195008, 217.17745085803432], "label": "BMX"}]}, "mSonugqhYuE": {"duration": 194.328, "subset": "validation", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=mSonugqhYuE", "annotations": [{"segment": [21.457625438951585, 82.50609403323222], "label": "Assembling bicycle"}]}, "OZftVCeblMk": {"duration": 6.67, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=OZftVCeblMk", "annotations": [{"segment": [0.22872198408736352, 2.1520659411856475], "label": "Washing dishes"}]}, "0h4UT-2XTAw": {"duration": 170.16, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=0h4UT-2XTAw", "annotations": [{"segment": [25.99690795631825, 124.41377379095162], "label": "Removing curlers"}]}, "bgYVKA4VCbw": {"duration": 64.32, "subset": "testing", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=bgYVKA4VCbw", "annotations": []}, "jETKNHeojN8": {"duration": 111.03999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jETKNHeojN8", "annotations": [{"segment": [24.078404703535078, 105.66781920256402], "label": "Ballet"}]}, "le1aEgEms9Y": {"duration": 144.92000000000002, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=le1aEgEms9Y", "annotations": [{"segment": [33.46892779520622, 138.65698608809763], "label": "Brushing teeth"}]}, "gaNiULmtQg8": {"duration": 109.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gaNiULmtQg8", "annotations": [{"segment": [23.79139737018052, 105.26409627813683], "label": "Using the balance beam"}]}, "a9qztQPPsJg": {"duration": 113.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a9qztQPPsJg", "annotations": [{"segment": [30.442784905954056, 40.8853681004383], "label": "Cleaning shoes"}, {"segment": [43.18627626193483, 53.45186652091933], "label": "Cleaning shoes"}, {"segment": [77.69989868438275, 98.58506450390016], "label": "Cleaning shoes"}]}, "wC-k0Zfx-cM": {"duration": 87.702, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wC-k0Zfx-cM", "annotations": [{"segment": [7.251472154446178, 30.100450452418098], "label": "Sumo"}, {"segment": [71.00969902184087, 87.501767], "label": "Sumo"}]}, "O_StqLLpAVI": {"duration": 191.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O_StqLLpAVI", "annotations": [{"segment": [18.329848468594122, 188.19442172432414], "label": "Fixing bicycle"}]}, "z3MP0yJBtGg": {"duration": 17.83, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z3MP0yJBtGg", "annotations": []}, "GLL1vOrV5Qo": {"duration": 228.137, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=GLL1vOrV5Qo", "annotations": [{"segment": [12.934817254373819, 221.17115223568223], "label": "Hanging wallpaper"}]}, "elgmPvU19K8": {"duration": 213.111, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=elgmPvU19K8", "annotations": [{"segment": [22.871505823331148, 26.18621754524127], "label": "Wakeboarding"}, {"segment": [36.46182388316265, 81.87337447333132], "label": "Wakeboarding"}]}, "OrDUCd94ONQ": {"duration": 180.42, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OrDUCd94ONQ", "annotations": []}, "0T7yANM5I5Y": {"duration": 22.22, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=0T7yANM5I5Y", "annotations": [{"segment": [12.44542387675507, 21.66682429797192], "label": "Clean and jerk"}]}, "X-xQLFHZIOU": {"duration": 138.995, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=X-xQLFHZIOU", "annotations": [{"segment": [0.009999999999999998, 138.994647], "label": "Bullfighting"}]}, "Y5puaQ9oNVw": {"duration": 48.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y5puaQ9oNVw", "annotations": [{"segment": [1.4432886115444619, 46.0333104524181], "label": "Tumbling"}]}, "xynscQyItDQ": {"duration": 67.571, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=xynscQyItDQ", "annotations": [{"segment": [36.156837723868954, 64.30224784711388], "label": "Putting on shoes"}]}, "LWt2FuU0uC4": {"duration": 153.23, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LWt2FuU0uC4", "annotations": []}, "V4U5SaPDL0E": {"duration": 194.1, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=V4U5SaPDL0E", "annotations": [{"segment": [0.01, 190.7647871629151], "label": "Cumbia"}]}, "qY8t0S0WWoA": {"duration": 45.651, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qY8t0S0WWoA", "annotations": [{"segment": [0.01, 45.650429], "label": "Painting fence"}]}, "dUFgilgnJmk": {"duration": 225.884, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dUFgilgnJmk", "annotations": []}, "Z2wZr0Beq9c": {"duration": 132.725, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=Z2wZr0Beq9c", "annotations": [{"segment": [0.2, 6.211790675708662], "label": "Playing blackjack"}, {"segment": [34.99308747315879, 131.6899605678627], "label": "Playing blackjack"}]}, "_Z71mu4aQy4": {"duration": 47.07, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=_Z71mu4aQy4", "annotations": [{"segment": [25.148810015600624, 40.0578726], "label": "Cricket"}]}, "Lfj9JC7uWmE": {"duration": 81.061, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Lfj9JC7uWmE", "annotations": [{"segment": [4.4228315132605305, 55.222210608424334], "label": "Tug of war"}]}, "4ehqHRSGpI4": {"duration": 146.379, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4ehqHRSGpI4", "annotations": []}, "qcaaF0SqVss": {"duration": 48.925, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qcaaF0SqVss", "annotations": [{"segment": [5.037406263597482, 6.563892845553822], "label": "Doing a powerbomb"}, {"segment": [17.783570151326053, 26.713517394695785], "label": "Doing a powerbomb"}]}, "JDfFAljt4JM": {"duration": 233.64, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JDfFAljt4JM", "annotations": [{"segment": [10.383826053042121, 206.7938789], "label": "Ballet"}]}, "i7NKJhCdKx0": {"duration": 195.118, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i7NKJhCdKx0", "annotations": [{"segment": [8.216297971918877, 189.9072449297972], "label": "Snowboarding"}]}, "s9MNW35YCMw": {"duration": 201.689, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=s9MNW35YCMw", "annotations": [{"segment": [42.162622815116684, 101.63079977076634], "label": "Gargling mouthwash"}]}, "qakxXmFvFy0": {"duration": 58.422, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qakxXmFvFy0", "annotations": [{"segment": [0.2, 55.4237503900156], "label": "Building sandcastles"}]}, "J8VMY1SPJaY": {"duration": 225.37, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J8VMY1SPJaY", "annotations": [{"segment": [25.66649021335003, 174.74309090458857], "label": "Kayaking"}, {"segment": [190.56489840249608, 213.06702662714508], "label": "Kayaking"}]}, "8-QvFzI1c0U": {"duration": 113.801, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8-QvFzI1c0U", "annotations": []}, "IDIALY6Ayk8": {"duration": 166.69, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=IDIALY6Ayk8", "annotations": []}, "39FOmbBNcI0": {"duration": 54.149, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=39FOmbBNcI0", "annotations": []}, "9pavfCL1HvU": {"duration": 28.86, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9pavfCL1HvU", "annotations": []}, "3G1T_V102GA": {"duration": 57.31, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3G1T_V102GA", "annotations": [{"segment": [40.549929797191886, 41.53567394695788], "label": "Shaving"}, {"segment": [42.342191887675504, 43.14870982839314], "label": "Shaving"}]}, "dgXPoqHrKRM": {"duration": 152.997, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dgXPoqHrKRM", "annotations": [{"segment": [29.73192068429238, 143.20765474339035], "label": "Snow tubing"}]}, "rBsE1WGXbz0": {"duration": 65.62, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=rBsE1WGXbz0", "annotations": [{"segment": [5.732756780031202, 7.677799258970358], "label": "Shot put"}]}, "hrN0hOmQrD4": {"duration": 99.521, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hrN0hOmQrD4", "annotations": [{"segment": [15.985246489859595, 69.69726833073324], "label": "River tubing"}]}, "lkC_md7KKq0": {"duration": 32.95, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lkC_md7KKq0", "annotations": [{"segment": [12.859736349453978, 22.300366614664586], "label": "Sharpening knives"}]}, "IRj0d3cLi6g": {"duration": 30.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IRj0d3cLi6g", "annotations": [{"segment": [0.01, 30.905758], "label": "Playing bagpipes"}]}, "gUGfWzgDyKA": {"duration": 114.243, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gUGfWzgDyKA", "annotations": []}, "w-KZEq6JhnQ": {"duration": 116.332, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w-KZEq6JhnQ", "annotations": [{"segment": [0.01, 110.52444670670826], "label": "Knitting"}]}, "7c5vvEn3uLk": {"duration": 145.311, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7c5vvEn3uLk", "annotations": [{"segment": [35.13747859594383, 37.404412698907954], "label": "Calf roping"}, {"segment": [42.391667725429016, 55.31319211232449], "label": "Calf roping"}, {"segment": [58.94028667706708, 62.56738124180967], "label": "Calf roping"}, {"segment": [72.31519788455537, 112.89331832761309], "label": "Calf roping"}]}, "qL7kMgxpFJY": {"duration": 54.08, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qL7kMgxpFJY", "annotations": [{"segment": [10.817454758190326, 53.961], "label": "Plastering"}]}, "sLq6p6NKApk": {"duration": 74.885, "subset": "training", "resolution": "647x480", "url": "https://www.youtube.com/watch?v=sLq6p6NKApk", "annotations": [{"segment": [42.05673525117004, 45.32781465959438], "label": "Croquet"}]}, "JjGhHZgdWVI": {"duration": 112.919, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JjGhHZgdWVI", "annotations": [{"segment": [15.453845389410716, 103.61102318351647], "label": "Welding"}]}, "cK0-WcGXacQ": {"duration": 13.38, "subset": "testing", "resolution": "426x236", "url": "https://www.youtube.com/watch?v=cK0-WcGXacQ", "annotations": []}, "LFiGhD1OEws": {"duration": 129.568, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LFiGhD1OEws", "annotations": [{"segment": [6.266127543060906, 8.893858448215479], "label": "Hurling"}, {"segment": [17.383450603330253, 20.819714094686237], "label": "Hurling"}, {"segment": [28.298640517049254, 29.915705689452068], "label": "Hurling"}, {"segment": [51.139686077239006, 53.565283835843225], "label": "Hurling"}, {"segment": [62.054875990958, 82.26819064599319], "label": "Hurling"}, {"segment": [107.33269921216848, 110.97109579563181], "label": "Hurling"}]}, "43dxxkLDw6M": {"duration": 131.634, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=43dxxkLDw6M", "annotations": []}, "S-DOW63629o": {"duration": 84.428, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=S-DOW63629o", "annotations": [{"segment": [0.2, 82.6696848673947], "label": "Playing congas"}]}, "3AWvyAJv20g": {"duration": 135.91, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3AWvyAJv20g", "annotations": [{"segment": [72.92209672386895, 105.56745397815912], "label": "Ironing clothes"}]}, "ku65ME0vW8s": {"duration": 32.764, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ku65ME0vW8s", "annotations": [{"segment": [1.2738396036594009, 27.413042883473803], "label": "Using the monkey bar"}]}, "ICl9CT-9fKY": {"duration": 126.944, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ICl9CT-9fKY", "annotations": [{"segment": [69.40344936105284, 120.6156800623053], "label": "Rope skipping"}]}, "r-1TiwT38ck": {"duration": 39.27, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=r-1TiwT38ck", "annotations": []}, "2ErgL7-MXHw": {"duration": 11.052, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=2ErgL7-MXHw", "annotations": [{"segment": [0.20000000000000004, 10.488003120124805], "label": "Trimming branches or hedges"}]}, "mdJHGKABLhU": {"duration": 60.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mdJHGKABLhU", "annotations": [{"segment": [25.231577453978158, 27.585269380655227], "label": "Shaving"}, {"segment": [42.931340742589704, 47.45042924180967], "label": "Shaving"}]}, "aXnllTmgeqg": {"duration": 88.051, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aXnllTmgeqg", "annotations": [{"segment": [7.950246105919003, 85.12246261682245], "label": "Playing drums"}]}, "OjV4UScwkU0": {"duration": 161.077, "subset": "validation", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=OjV4UScwkU0", "annotations": [{"segment": [23.32547196261682, 51.16555140186915], "label": "Welding"}, {"segment": [90.29214953271027, 121.89440186915887], "label": "Welding"}]}, "ErEr4Sxdprw": {"duration": 142.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ErEr4Sxdprw", "annotations": [{"segment": [7.983600878729027, 133.947081409787], "label": "Clean and jerk"}]}, "aRurjtzwmS8": {"duration": 65.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aRurjtzwmS8", "annotations": [{"segment": [12.756528737661473, 29.288991999793698], "label": "Sumo"}, {"segment": [38.47369273518588, 44.39272262632118], "label": "Sumo"}]}, "ueDoskgg2yw": {"duration": 15.09, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ueDoskgg2yw", "annotations": [{"segment": [0.02349609984399376, 9.492424336973478], "label": "Shaving"}]}, "qrvPTE0kb5U": {"duration": 218.384, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qrvPTE0kb5U", "annotations": [{"segment": [117.02775637556114, 123.50090738323411], "label": "Croquet"}]}, "DIATqGGmG2g": {"duration": 187.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DIATqGGmG2g", "annotations": [{"segment": [42.98882995319813, 47.66788627463466], "label": "Tumbling"}, {"segment": [69.01608074118883, 73.11025468018721], "label": "Tumbling"}, {"segment": [86.8549825600624, 92.99624395319812], "label": "Tumbling"}, {"segment": [106.74097183307332, 113.1746742449298], "label": "Tumbling"}, {"segment": [125.16475601248051, 133.93798657410295], "label": "Tumbling"}, {"segment": [153.53153482839312, 158.50303214664586], "label": "Tumbling"}]}, "X4J9UBXb0FM": {"duration": 203.73, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=X4J9UBXb0FM", "annotations": []}, "pxW19lG4iaI": {"duration": 47.81, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pxW19lG4iaI", "annotations": []}, "Wir90-9HmAI": {"duration": 122.35, "subset": "training", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=Wir90-9HmAI", "annotations": [{"segment": [41.131857995408254, 106.59927002522278], "label": "Breakdancing"}]}, "DfVEiJ4ioNU": {"duration": 55.125, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DfVEiJ4ioNU", "annotations": []}, "WreRcthWXv8": {"duration": 155.667, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WreRcthWXv8", "annotations": [{"segment": [0.01, 145.2947470988455], "label": "Rollerblading"}]}, "GoVqtUSnshE": {"duration": 224.26, "subset": "training", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=GoVqtUSnshE", "annotations": [{"segment": [112.12916099773243, 114.92801681046832], "label": "Drinking beer"}, {"segment": [148.51428656329927, 152.01285632921918], "label": "Drinking beer"}]}, "HlECGLpmqLc": {"duration": 44.165, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HlECGLpmqLc", "annotations": []}, "T8nteSe3U9Q": {"duration": 145.636, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=T8nteSe3U9Q", "annotations": [{"segment": [4.089609940717629, 20.448049703588143], "label": "Applying sunscreen"}, {"segment": [24.310459092043686, 79.97459439625587], "label": "Applying sunscreen"}, {"segment": [104.28505348829954, 140.40994129797193], "label": "Applying sunscreen"}]}, "p9JNte3VMKQ": {"duration": 218.082, "subset": "training", "resolution": "3414x1080", "url": "https://www.youtube.com/watch?v=p9JNte3VMKQ", "annotations": [{"segment": [0.01, 218.041], "label": "Sharpening knives"}]}, "MiOJxYa5Nt4": {"duration": 17.28, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MiOJxYa5Nt4", "annotations": [{"segment": [0.01, 12.653020019344773], "label": "Hammer throw"}]}, "Dzj5X11anrk": {"duration": 88.492, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Dzj5X11anrk", "annotations": [{"segment": [28.290959438377534, 77.00661154446178], "label": "Spread mulch"}]}, "_CFadTE9L4I": {"duration": 125.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_CFadTE9L4I", "annotations": [{"segment": [0, 125.364036], "label": "Hand washing clothes"}]}, "d3crFny-e3E": {"duration": 81.28999999999999, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=d3crFny-e3E", "annotations": [{"segment": [14.608352100000001, 17.79097305], "label": "Rock climbing"}, {"segment": [48.92754740212496, 59.547480590305355], "label": "Rock climbing"}, {"segment": [54.616797324364455, 59.67390836635512], "label": "Rock climbing"}, {"segment": [67.89171380958996, 72.3166859713318], "label": "Rock climbing"}]}, "GChGC-VJZHs": {"duration": 103.35, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GChGC-VJZHs", "annotations": []}, "894d9_pty3o": {"duration": 60.349, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=894d9_pty3o", "annotations": [{"segment": [6.773279251170047, 17.12702340093604], "label": "Skiing"}, {"segment": [23.14971294851794, 24.655385335413413], "label": "Skiing"}, {"segment": [29.64292511700468, 30.301656786271447], "label": "Skiing"}, {"segment": [34.34815132605304, 35.94792823712948], "label": "Skiing"}, {"segment": [39.712109204368176, 41.02957254290171], "label": "Skiing"}, {"segment": [50.8164430577223, 52.51032449297972], "label": "Skiing"}]}, "_3lgJ6yp7o8": {"duration": 22.9, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_3lgJ6yp7o8", "annotations": [{"segment": [0.21245631999999998, 11.17955625897036], "label": "Kayaking"}]}, "XFySa1Gc-yM": {"duration": 79.13, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XFySa1Gc-yM", "annotations": [{"segment": [4.920744623661783, 18.206760175761314], "label": "Removing curlers"}]}, "I9wra8bj2sg": {"duration": 23.081, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I9wra8bj2sg", "annotations": [{"segment": [2.0524119828393133, 15.483107940717627], "label": "Calf roping"}]}, "xI_XIVYtERk": {"duration": 123.81, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xI_XIVYtERk", "annotations": [{"segment": [11.778900156006241, 116.24422776911078], "label": "Smoking a cigarette"}]}, "FRX4LTw9650": {"duration": 162.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FRX4LTw9650", "annotations": [{"segment": [41.528234789391576, 47.116131825273015], "label": "Brushing hair"}, {"segment": [61.0858744149766, 93.34328003120126], "label": "Brushing hair"}, {"segment": [105.5350553822153, 118.74281201248051], "label": "Brushing hair"}, {"segment": [138.04645631825275, 150.74622230889236], "label": "Brushing hair"}]}, "Q6KyDc24uSk": {"duration": 224.863, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Q6KyDc24uSk", "annotations": [{"segment": [80.6837269422777, 88.40130082371294], "label": "Playing pool"}, {"segment": [104.5380462121685, 112.25562009360375], "label": "Playing pool"}, {"segment": [143.8275132449298, 146.9847025600624], "label": "Playing pool"}, {"segment": [165.226240825273, 176.45180283463338], "label": "Playing pool"}]}, "rtJTJ10ppRc": {"duration": 20.48, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rtJTJ10ppRc", "annotations": [{"segment": [0.010000000000000002, 20.479999], "label": "Playing beach volleyball"}]}, "rtePSWZ-SWA": {"duration": 83.732, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rtePSWZ-SWA", "annotations": []}, "Mzojo2EeWu8": {"duration": 161.797, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Mzojo2EeWu8", "annotations": [{"segment": [10.086801872074883, 20.677943837753507], "label": "Beach soccer"}, {"segment": [25.973514820592825, 31.016915756630265], "label": "Beach soccer"}, {"segment": [37.82550702028081, 57.494770670826824], "label": "Beach soccer"}, {"segment": [60.77298127925116, 62.286001560062395], "label": "Beach soccer"}, {"segment": [63.04251170046801, 79.68573478939157], "label": "Beach soccer"}, {"segment": [82.96394539781592, 87.25083619344774], "label": "Beach soccer"}, {"segment": [92.79857722308893, 117.52578159126364], "label": "Beach soccer"}, {"segment": [140.98049765990638, 151.82515132605306], "label": "Beach soccer"}]}, "nHafujMomWg": {"duration": 120.048, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nHafujMomWg", "annotations": [{"segment": [8.240366951638066, 102.81730582839313], "label": "Peeling potatoes"}]}, "87hsTxVtn-A": {"duration": 235.89, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=87hsTxVtn-A", "annotations": [{"segment": [3.680048861154446, 231.84307825273012], "label": "Belly dance"}]}, "ZVKUKjVYwF8": {"duration": 116.379, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZVKUKjVYwF8", "annotations": [{"segment": [0, 116.378389], "label": "Installing carpet"}]}, "LKf1eTzmK14": {"duration": 196.325, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LKf1eTzmK14", "annotations": [{"segment": [0.01, 7.682491900653917], "label": "Running a marathon"}, {"segment": [12.276674000687764, 24.22154751706842], "label": "Running a marathon"}, {"segment": [28.203172022528637, 46.57990050926811], "label": "Running a marathon"}, {"segment": [97.11590384780165, 108.14194093984534], "label": "Running a marathon"}, {"segment": [115.1863535264288, 155.61515619725563], "label": "Running a marathon"}]}, "e8MK2naV6E8": {"duration": 49.32, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=e8MK2naV6E8", "annotations": [{"segment": [0.6924690140405616, 2.1543480436817473], "label": "Hitting a pinata"}, {"segment": [8.232687166926677, 45.54907292355694], "label": "Hitting a pinata"}]}, "vpvhtd7CqUA": {"duration": 215.064, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vpvhtd7CqUA", "annotations": [{"segment": [142.14909051185276, 149.8418650980999], "label": "Bullfighting"}]}, "JH31FxBZ5Xw": {"duration": 138.299, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JH31FxBZ5Xw", "annotations": [{"segment": [0.01, 131.49085468882566], "label": "Doing a powerbomb"}]}, "6YIZ00dNpMU": {"duration": 179.536, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=6YIZ00dNpMU", "annotations": [{"segment": [28.75937188878312, 41.04493063217348], "label": "Playing congas"}, {"segment": [66.17448260729012, 78.46004135068048], "label": "Playing congas"}, {"segment": [92.14168631491064, 99.68055190744565], "label": "Playing congas"}, {"segment": [0.2, 179.336688], "label": "Playing congas"}, {"segment": [130.6736660100895, 155.2447834968702], "label": "Playing congas"}, {"segment": [172.27703539111593, 179.336688], "label": "Playing congas"}]}, "9DVsv84awMg": {"duration": 213.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9DVsv84awMg", "annotations": [{"segment": [29.977649718233625, 39.970199624311505], "label": "Paintball"}, {"segment": [46.964984558566016, 164.87707345028494], "label": "Paintball"}, {"segment": [171.87185838453945, 201.8495081027731], "label": "Paintball"}]}, "jGo19SQpsng": {"duration": 231.132, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jGo19SQpsng", "annotations": []}, "GldxuTsExZM": {"duration": 228.346, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GldxuTsExZM", "annotations": [{"segment": [53.078080393889245, 91.48850633557043], "label": "Carving jack-o-lanterns"}, {"segment": [100.37873219226825, 153.36447829818724], "label": "Carving jack-o-lanterns"}]}, "efkbBgZ2Mzo": {"duration": 60.256, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=efkbBgZ2Mzo", "annotations": [{"segment": [7.54605422590576, 53.34797452881405], "label": "Elliptical trainer"}]}, "2fndjkCHsEY": {"duration": 193.144, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2fndjkCHsEY", "annotations": [{"segment": [0.19999999999999998, 192.901], "label": "Table soccer"}]}, "l-PDSOCk7z0": {"duration": 140.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l-PDSOCk7z0", "annotations": [{"segment": [56.4010227, 128.0661266926677], "label": "Painting"}]}, "QixK0AeqcsI": {"duration": 13.4, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QixK0AeqcsI", "annotations": [{"segment": [0.18811420904836193, 13.23069936973479], "label": "Using uneven bars"}]}, "TSPOjp7vDeo": {"duration": 225.05, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TSPOjp7vDeo", "annotations": []}, "4w_46rTInXI": {"duration": 221.89, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=4w_46rTInXI", "annotations": [{"segment": [0.01, 221.88988662131518], "label": "Beer pong"}]}, "sNeT1WZ9UwA": {"duration": 99.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sNeT1WZ9UwA", "annotations": [{"segment": [14.168316692667709, 60.098574102964115], "label": "Horseback riding"}]}, "b8eqn-GTdcc": {"duration": 178.56, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b8eqn-GTdcc", "annotations": [{"segment": [100.5626886895476, 155.99752262090485], "label": "Polishing forniture"}]}, "EjraNJWCZVs": {"duration": 183.229, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EjraNJWCZVs", "annotations": []}, "sEcXR5b8HyY": {"duration": 193.59, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sEcXR5b8HyY", "annotations": []}, "CBvXpTgK3cA": {"duration": 66.13, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=CBvXpTgK3cA", "annotations": []}, "WwcbpTANbeU": {"duration": 77.83, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WwcbpTANbeU", "annotations": [{"segment": [9.913589210000001, 64.0876922], "label": "Bungee jumping"}]}, "X7-1DRvX-LU": {"duration": 228.35, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=X7-1DRvX-LU", "annotations": [{"segment": [12.64366742823713, 218.8601023845554], "label": "Wrapping presents"}]}, "w9M6GUCtoQQ": {"duration": 232.71, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w9M6GUCtoQQ", "annotations": [{"segment": [101.6663448, 203.365269], "label": "Getting a tattoo"}]}, "FK73jqIGUzE": {"duration": 17.299, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FK73jqIGUzE", "annotations": [{"segment": [0.01, 17.298366], "label": "Welding"}]}, "ckzLOLHjP44": {"duration": 32.462, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ckzLOLHjP44", "annotations": [{"segment": [1.468616850796481, 31.094162137285494], "label": "Hurling"}]}, "kkcTQHFNXAg": {"duration": 33.74, "subset": "validation", "resolution": "408x720", "url": "https://www.youtube.com/watch?v=kkcTQHFNXAg", "annotations": [{"segment": [0.01, 20.23789236895476], "label": "Using parallel bars"}]}, "LwJFjFdJy5c": {"duration": 33.112, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LwJFjFdJy5c", "annotations": [{"segment": [0.826499977005883, 33.111654], "label": "Disc dog"}]}, "1jjsTfZS5DY": {"duration": 172.9, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1jjsTfZS5DY", "annotations": [{"segment": [83.07619047619048, 108.1608843537415], "label": "Wrapping presents"}]}, "pOGc06VJF4k": {"duration": 92.253, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=pOGc06VJF4k", "annotations": []}, "shpZ47Mvxfg": {"duration": 179.05, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=shpZ47Mvxfg", "annotations": [{"segment": [30.38643213728549, 46.06975195007801], "label": "Putting on makeup"}, {"segment": [48.310226209048366, 69.31467238689547], "label": "Putting on makeup"}, {"segment": [71.27508736349453, 91.15929641185649], "label": "Putting on makeup"}, {"segment": [93.67982995319812, 123.64617316692669], "label": "Putting on makeup"}, {"segment": [126.72682527301092, 146.33097503900157], "label": "Putting on makeup"}, {"segment": [148.01133073322933, 160.8940577223089], "label": "Putting on makeup"}]}, "mV07bEBkIcM": {"duration": 42.052, "subset": "testing", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=mV07bEBkIcM", "annotations": []}, "obUkL-Ya8dE": {"duration": 211.81, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=obUkL-Ya8dE", "annotations": [{"segment": [130.3296294851794, 183.51865444617783], "label": "Fixing bicycle"}, {"segment": [29.850665160000002, 126.58308744000001], "label": "Fixing bicycle"}]}, "OCDRmpS8N5s": {"duration": 166.627, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OCDRmpS8N5s", "annotations": []}, "e1_oskOyQoU": {"duration": 162.099, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e1_oskOyQoU", "annotations": [{"segment": [0.009999999999999998, 161.962], "label": "Sharpening knives"}]}, "Q3tPDohXUYc": {"duration": 58.35, "subset": "validation", "resolution": "632x360", "url": "https://www.youtube.com/watch?v=Q3tPDohXUYc", "annotations": [{"segment": [21.574460878315133, 24.760562695787833], "label": "Triple jump"}, {"segment": [41.78344954914196, 48.88390502808113], "label": "Triple jump"}]}, "EVfTWz5GRGw": {"duration": 126.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EVfTWz5GRGw", "annotations": [{"segment": [102.64128542901715, 121.98521999063962], "label": "Washing face"}]}, "eksIn1NfWJg": {"duration": 45.442, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eksIn1NfWJg", "annotations": [{"segment": [4.03721840873635, 43.20531981279251], "label": "Putting in contact lenses"}]}, "7pYrfYFoxdc": {"duration": 189.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7pYrfYFoxdc", "annotations": [{"segment": [33.652126036061844, 58.743623074883], "label": "Slacklining"}, {"segment": [66.12347521996881, 130.475785925117], "label": "Slacklining"}, {"segment": [148.48262515912637, 155.56728321840873], "label": "Slacklining"}]}, "4dyn1oKJO34": {"duration": 79.74, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4dyn1oKJO34", "annotations": [{"segment": [0, 3.3562979719188766], "label": "Playing lacrosse"}]}, "29kX1NaPGP0": {"duration": 99.61, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=29kX1NaPGP0", "annotations": [{"segment": [37.28461778471139, 99.581], "label": "Tennis serve with ball bouncing"}]}, "CBPGCziyRP8": {"duration": 7.501, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CBPGCziyRP8", "annotations": []}, "a4gI4LC1rJk": {"duration": 82.175, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=a4gI4LC1rJk", "annotations": []}, "yL7tvoBkkkI": {"duration": 26.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yL7tvoBkkkI", "annotations": [{"segment": [0.01, 4.745892355694228], "label": "Hand washing clothes"}]}, "_CJaovqcgtM": {"duration": 8.174, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_CJaovqcgtM", "annotations": [{"segment": [1.9050702028081123, 7.315469578783151], "label": "Doing a powerbomb"}]}, "4E89GQ2UnoQ": {"duration": 174.08, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=4E89GQ2UnoQ", "annotations": [{"segment": [0.2, 14.933315132605303], "label": "Rafting"}, {"segment": [23.350274570982837, 37.469045241809674], "label": "Rafting"}, {"segment": [47.24357878315133, 100.46048361934476], "label": "Rafting"}, {"segment": [108.06289859594384, 128.96953978159127], "label": "Rafting"}, {"segment": [133.04226209048363, 159.1076848673947], "label": "Rafting"}]}, "Y8WY_c3onSY": {"duration": 5.016, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y8WY_c3onSY", "annotations": [{"segment": [0.2, 4.70320551782897], "label": "Fun sliding down"}]}, "3HHAEmr0Q34": {"duration": 12.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3HHAEmr0Q34", "annotations": [{"segment": [0.9283416536661465, 12.28571294851794], "label": "Tennis serve with ball bouncing"}]}, "EvJqfGXb5Fo": {"duration": 158.175, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EvJqfGXb5Fo", "annotations": [{"segment": [0.2, 11.844567692911799], "label": "Raking leaves"}, {"segment": [105.61406188455538, 128.56291178003121], "label": "Raking leaves"}]}, "XQ4owd3yQ_4": {"duration": 30.349, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XQ4owd3yQ_4", "annotations": [{"segment": [4.261093806552262, 28.1705646099844], "label": "Surfing"}]}, "TPqgLJpxyvY": {"duration": 221.415, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TPqgLJpxyvY", "annotations": [{"segment": [13.817223088923557, 203.45860998439937], "label": "Rollerblading"}]}, "9rHQ4INoBdQ": {"duration": 44.47, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=9rHQ4INoBdQ", "annotations": []}, "aIXUWoP-L-E": {"duration": 58.375, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=aIXUWoP-L-E", "annotations": [{"segment": [0.01, 54.74355147599475], "label": "Cutting the grass"}]}, "mLijl36SjJU": {"duration": 144.173, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mLijl36SjJU", "annotations": [{"segment": [10.346246687962758, 56.00468315875492], "label": "Installing carpet"}]}, "LPV3n9LeQ80": {"duration": 125.95, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LPV3n9LeQ80", "annotations": [{"segment": [57.33691334691083, 60.27955430485476], "label": "Shoveling snow"}]}, "0hdwFR5qWz4": {"duration": 228.74, "subset": "validation", "resolution": "242x144", "url": "https://www.youtube.com/watch?v=0hdwFR5qWz4", "annotations": [{"segment": [133.40188409905824, 145.1412503774514], "label": "Table soccer"}, {"segment": [155.45766316755447, 164.35112246936748], "label": "Table soccer"}]}, "hSYfK-W2UJQ": {"duration": 96.22, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hSYfK-W2UJQ", "annotations": [{"segment": [29.117757472926865, 41.25810460243287], "label": "Spinning"}, {"segment": [52.798928416901546, 68.68629626539088], "label": "Spinning"}, {"segment": [78.12878761372514, 89.51973047353823], "label": "Spinning"}]}, "JkcoGLKl_0A": {"duration": 30.047, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JkcoGLKl_0A", "annotations": [{"segment": [0.2, 29.846621315192746], "label": "Swinging at the playground"}]}, "x6E92fGgdH4": {"duration": 134.93, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=x6E92fGgdH4", "annotations": [{"segment": [12.000763140000002, 126.3610631302652], "label": "Washing hands"}]}, "OCT2h1kmZoA": {"duration": 173.43, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OCT2h1kmZoA", "annotations": [{"segment": [14.610314241141074, 160.44289410296412], "label": "Snowboarding"}]}, "5mxY_p7N_Yw": {"duration": 189.011, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5mxY_p7N_Yw", "annotations": [{"segment": [0, 36.56364028710808], "label": "Playing ice hockey"}, {"segment": [45.11481422522207, 163.94664515832332], "label": "Playing ice hockey"}, {"segment": [170.4337426286167, 189.01043083900225], "label": "Playing ice hockey"}]}, "0VoNAs7Ia0A": {"duration": 148.283, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0VoNAs7Ia0A", "annotations": [{"segment": [0.01, 148.221], "label": "Doing a powerbomb"}]}, "K8ivAsscK98": {"duration": 205.636, "subset": "training", "resolution": "424x320", "url": "https://www.youtube.com/watch?v=K8ivAsscK98", "annotations": [{"segment": [16.36102104492343, 87.57958324047247], "label": "Playing drums"}]}, "M_LSDRzVN38": {"duration": 43.584, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=M_LSDRzVN38", "annotations": [{"segment": [0.2, 43.383852999999995], "label": "Hitting a pinata"}]}, "88s9AYMyhTk": {"duration": 161.89, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=88s9AYMyhTk", "annotations": []}, "gWyBBQtsDhc": {"duration": 217.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gWyBBQtsDhc", "annotations": [{"segment": [66.54660596000001, 183.69887647347892], "label": "Brushing hair"}]}, "znhZPUF6xQw": {"duration": 75.47, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=znhZPUF6xQw", "annotations": [{"segment": [3.588547826053042, 63.00548724102964], "label": "Tumbling"}]}, "SHWgBc4Ahc8": {"duration": 181.697, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SHWgBc4Ahc8", "annotations": []}, "-PbWrGLuHT8": {"duration": 53.13, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-PbWrGLuHT8", "annotations": [{"segment": [7.459364976599063, 53.127256235827666], "label": "Windsurfing"}]}, "TPAG_7k1MrY": {"duration": 153.461, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=TPAG_7k1MrY", "annotations": []}, "Cb_R59_-0zU": {"duration": 99.99000000000001, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=Cb_R59_-0zU", "annotations": [{"segment": [62.45925585023401, 64.79855382215288], "label": "Washing hands"}, {"segment": [68.22952418096725, 70.56882215288611], "label": "Washing hands"}, {"segment": [73.37597971918876, 74.93551170046801], "label": "Washing hands"}, {"segment": [76.96290327613104, 78.36648205928238], "label": "Washing hands"}]}, "lBfyQsXSvUk": {"duration": 126.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lBfyQsXSvUk", "annotations": [{"segment": [20.06318247425897, 93.2347891450858], "label": "Getting a tattoo"}, {"segment": [14.87898, 17.242706000000002], "label": "Getting a tattoo"}]}, "qkHBMmMlEr8": {"duration": 71.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qkHBMmMlEr8", "annotations": [{"segment": [27.143919319812795, 71.308411], "label": "Layup drill in basketball"}]}, "ybcStSlBGPc": {"duration": 235.335, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ybcStSlBGPc", "annotations": [{"segment": [99.4713744149766, 165.5409219968799], "label": "Carving jack-o-lanterns"}]}, "kbdBKIWKOWk": {"duration": 52.94, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=kbdBKIWKOWk", "annotations": [{"segment": [4.955496692667707, 32.82289059000001], "label": "Doing step aerobics"}, {"segment": [37.308581520000004, 40.46988965678627], "label": "Doing step aerobics"}]}, "792896eLpfM": {"duration": 137.72, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=792896eLpfM", "annotations": []}, "nXBuKxYvgbk": {"duration": 77.044, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nXBuKxYvgbk", "annotations": []}, "UnpBbcIa3zA": {"duration": 72.05, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UnpBbcIa3zA", "annotations": [{"segment": [9.491551482059283, 36.67445631825273], "label": "Discus throw"}]}, "XkVicWlqTV4": {"duration": 64.784, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XkVicWlqTV4", "annotations": [{"segment": [0.01, 16.574917999375973], "label": "Playing beach volleyball"}, {"segment": [33.85730201092044, 47.29915624212168], "label": "Playing beach volleyball"}]}, "Vx4ZW7Xseyg": {"duration": 164.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Vx4ZW7Xseyg", "annotations": [{"segment": [17.54937975, 126.93256766999997], "label": "Springboard diving"}]}, "MNSzt39_UbE": {"duration": 82.198, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MNSzt39_UbE", "annotations": [{"segment": [0.01, 82.141], "label": "Tug of war"}]}, "hDPLy21Yyuk": {"duration": 76.23, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hDPLy21Yyuk", "annotations": [{"segment": [21.392480499219968, 76.161], "label": "Cleaning shoes"}]}, "_uKKSGTNJAY": {"duration": 37.036, "subset": "validation", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=_uKKSGTNJAY", "annotations": [{"segment": [11.544773790951638, 30.24730733229329], "label": "Fun sliding down"}]}, "Vkf3eEsHWOY": {"duration": 120.93, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Vkf3eEsHWOY", "annotations": []}, "fny1HWXezlE": {"duration": 144.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fny1HWXezlE", "annotations": [{"segment": [0.19999999999999998, 136.69237733987504], "label": "Camel ride"}]}, "8jUdeuAOEJg": {"duration": 73.91, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8jUdeuAOEJg", "annotations": [{"segment": [2.1907544900435476, 58.57385689169064], "label": "Breakdancing"}]}, "KDPa4AvVt0s": {"duration": 191.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KDPa4AvVt0s", "annotations": [{"segment": [34.51749495842434, 159.73680567340094], "label": "Dodgeball"}]}, "eWK9ZfcDvyM": {"duration": 91.74, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eWK9ZfcDvyM", "annotations": []}, "PCTqA_ov8RA": {"duration": 182.022, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PCTqA_ov8RA", "annotations": [{"segment": [7.099111719570824, 23.285086249609982], "label": "Surfing"}, {"segment": [48.27395929797191, 49.97774609672387], "label": "Surfing"}, {"segment": [56.792893291731666, 66.73164961778471], "label": "Surfing"}, {"segment": [96.83188306240248, 104.21495919032762], "label": "Surfing"}, {"segment": [110.46217745241809, 121.82075611076444], "label": "Surfing"}, {"segment": [124.66040077535101, 128.63590330577222], "label": "Surfing"}, {"segment": [138.57465963182526, 150.78513168954757], "label": "Surfing"}, {"segment": [160.72388801560064, 177.19382707020281], "label": "Surfing"}]}, "XFijgUPprk4": {"duration": 55.87, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XFijgUPprk4", "annotations": [{"segment": [2.8761590171606866, 48.72039062402496], "label": "Using parallel bars"}]}, "gRg50bxJxcI": {"duration": 23.615, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=gRg50bxJxcI", "annotations": []}, "89dNrk4VM1c": {"duration": 80.9, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=89dNrk4VM1c", "annotations": []}, "xFx7UXyjW4Y": {"duration": 57.145, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xFx7UXyjW4Y", "annotations": []}, "Fr9F2xRLd0A": {"duration": 70.543, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Fr9F2xRLd0A", "annotations": [{"segment": [10.244309337009893, 65.70935606598185], "label": "Canoeing"}]}, "xQxT2_meU50": {"duration": 2.56, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xQxT2_meU50", "annotations": [{"segment": [0, 2.436666], "label": "Chopping wood"}]}, "K5wPwCFVkhU": {"duration": 15.79, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K5wPwCFVkhU", "annotations": [{"segment": [0.01, 15.789568], "label": "Washing face"}]}, "-lGMxgEoYVE": {"duration": 24.03, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-lGMxgEoYVE", "annotations": []}, "vwWlNsCCQQw": {"duration": 50.829, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=vwWlNsCCQQw", "annotations": []}, "Xj0LbNWxJpE": {"duration": 52.872, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Xj0LbNWxJpE", "annotations": []}, "cgWhl3ZDuTQ": {"duration": 138.97, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cgWhl3ZDuTQ", "annotations": [{"segment": [0, 86.96142644999999], "label": "Discus throw"}]}, "NgFjrooP-FA": {"duration": 202.386, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=NgFjrooP-FA", "annotations": []}, "zOI9SjQpc_Q": {"duration": 84.683, "subset": "testing", "resolution": "484x352", "url": "https://www.youtube.com/watch?v=zOI9SjQpc_Q", "annotations": []}, "8874vqLm958": {"duration": 178.91, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=8874vqLm958", "annotations": [{"segment": [45.48264476981713, 82.2536914987891], "label": "Elliptical trainer"}]}, "xi1oCOBPAzQ": {"duration": 230.644, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xi1oCOBPAzQ", "annotations": [{"segment": [77.34666926677068, 179.53179251170047], "label": "Ice fishing"}]}, "w--X02F3MHM": {"duration": 35.83, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=w--X02F3MHM", "annotations": [{"segment": [0.22357809048361937, 5.086401558502341], "label": "Hopscotch"}, {"segment": [15.92993894695788, 22.469598093603743], "label": "Hopscotch"}, {"segment": [26.829370858034324, 29.512307943837754], "label": "Hopscotch"}, {"segment": [30.909671009360377, 32.86597930109204], "label": "Hopscotch"}]}, "dQs2-z3TIes": {"duration": 58.75, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=dQs2-z3TIes", "annotations": [{"segment": [13.27542108, 49.43475900000001], "label": "Ballet"}, {"segment": [6.63771054, 10.60835268], "label": "Ballet"}]}, "8oG9huHwmLw": {"duration": 225.326, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8oG9huHwmLw", "annotations": []}, "zFb7PKk_-vA": {"duration": 94.25, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zFb7PKk_-vA", "annotations": [{"segment": [0.19999999999999998, 94.00099999999999], "label": "River tubing"}]}, "o-RbNz6gD5k": {"duration": 41.33, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o-RbNz6gD5k", "annotations": [{"segment": [8.509339466879302, 32.95541423323444], "label": "Ping-pong"}]}, "9_7Z6Nhkzx8": {"duration": 196.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9_7Z6Nhkzx8", "annotations": [{"segment": [63.76614432449298, 184.55393693915755], "label": "Doing motocross"}]}, "2FcdD7WapJM": {"duration": 56.448, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2FcdD7WapJM", "annotations": [{"segment": [5.9871440633342505, 24.861572673554907], "label": "Gargling mouthwash"}]}, "r0XjfYHrAkk": {"duration": 71.727, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=r0XjfYHrAkk", "annotations": []}, "OLUOk4xfYWE": {"duration": 161.73, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OLUOk4xfYWE", "annotations": [{"segment": [0.01, 161.661], "label": "Playing piano"}]}, "D3d3CkKm2b4": {"duration": 71.216, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D3d3CkKm2b4", "annotations": []}, "T4NXGL9efME": {"duration": 212.74, "subset": "testing", "resolution": "3414x1080", "url": "https://www.youtube.com/watch?v=T4NXGL9efME", "annotations": []}, "t7J7SugZPlE": {"duration": 74.47, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=t7J7SugZPlE", "annotations": [{"segment": [0.01, 74.466393], "label": "Removing curlers"}]}, "JdxO6sLceBc": {"duration": 145.172, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JdxO6sLceBc", "annotations": []}, "Et4GHTvGbg0": {"duration": 59.77, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Et4GHTvGbg0", "annotations": [{"segment": [20.932843009360372, 38.46235074102964], "label": "Shoveling snow"}]}, "8b3ZBE0n3V8": {"duration": 37.803, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8b3ZBE0n3V8", "annotations": [{"segment": [16.92542651326053, 31.020119672386894], "label": "Arm wrestling"}]}, "f1d7_TKXNYQ": {"duration": 55.54, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=f1d7_TKXNYQ", "annotations": []}, "OD4MrhX85-M": {"duration": 32.926, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=OD4MrhX85-M", "annotations": [{"segment": [10.117686427457098, 18.078302652106085], "label": "Shuffleboard"}]}, "LoS0yfHXQjU": {"duration": 91.116, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LoS0yfHXQjU", "annotations": []}, "iEWe6IYVIuk": {"duration": 50.62, "subset": "training", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=iEWe6IYVIuk", "annotations": [{"segment": [19.41174102964119, 27.855059282371297], "label": "Spinning"}, {"segment": [40.16494383775351, 48.371533541341655], "label": "Spinning"}]}, "vAzTt06gpgE": {"duration": 146.124, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vAzTt06gpgE", "annotations": [{"segment": [0.2, 142.6625678627145], "label": "Running a marathon"}]}, "UGCqUiaia7k": {"duration": 8.57, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UGCqUiaia7k", "annotations": []}, "HT2xgemwHgQ": {"duration": 130.543, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=HT2xgemwHgQ", "annotations": []}, "RX-9yj3PkYI": {"duration": 106.7, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RX-9yj3PkYI", "annotations": [{"segment": [55.64952829410874, 59.135168481024635], "label": "Drinking coffee"}, {"segment": [83.36866692339223, 85.19447844987198], "label": "Drinking coffee"}, {"segment": [104.2336146645866, 104.89858190327614], "label": "Drinking coffee"}]}, "FRsHew2Pcko": {"duration": 61.673, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FRsHew2Pcko", "annotations": [{"segment": [0.2, 50.5033738317757], "label": "Playing drums"}]}, "lfuNoeRYWz4": {"duration": 104.97999999999999, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=lfuNoeRYWz4", "annotations": [{"segment": [46.249348673946955, 78.66482137285492], "label": "Washing hands"}]}, "fo0gFCcKdtg": {"duration": 51.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fo0gFCcKdtg", "annotations": [{"segment": [1.6363148399999998, 42.027184087363494], "label": "Kayaking"}]}, "E9R1H8xRIW8": {"duration": 24.96, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E9R1H8xRIW8", "annotations": [{"segment": [0.01, 20.113243252730108], "label": "Mowing the lawn"}]}, "3Lt8fMFoS30": {"duration": 133.56, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=3Lt8fMFoS30", "annotations": []}, "Fu46pdVz4qY": {"duration": 106.39, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Fu46pdVz4qY", "annotations": [{"segment": [57.24578003120126, 76.99142589703588], "label": "Ironing clothes"}]}, "4lxS8OJRsa8": {"duration": 170.458, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4lxS8OJRsa8", "annotations": [{"segment": [27.676288346085595, 142.86948772303265], "label": "Capoeira"}]}, "GYkKMTl-0Os": {"duration": 62.69, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GYkKMTl-0Os", "annotations": [{"segment": [11.489746109999999, 51.87030741], "label": "Preparing salad"}]}, "5RLeo8LymY4": {"duration": 12.63, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5RLeo8LymY4", "annotations": [{"segment": [0, 12.631654], "label": "Kayaking"}]}, "XdqHO4x2FL0": {"duration": 68.128, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XdqHO4x2FL0", "annotations": [{"segment": [30.396679663026518, 49.102328686427455], "label": "Arm wrestling"}]}, "s_6odqEAcyE": {"duration": 141.155, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s_6odqEAcyE", "annotations": []}, "4DCxc6mO864": {"duration": 166.21, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4DCxc6mO864", "annotations": [{"segment": [0.01, 164.13407112611037], "label": "Zumba"}]}, "uhAIe9rcjSo": {"duration": 76.811, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=uhAIe9rcjSo", "annotations": [{"segment": [32.11450187831513, 76.81117913832199], "label": "Cleaning sink"}]}, "m16Cn9VA3Lk": {"duration": 29.652, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m16Cn9VA3Lk", "annotations": [{"segment": [0.2, 29.501], "label": "Using the monkey bar"}]}, "ncXZIWMNKZQ": {"duration": 30.14, "subset": "validation", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=ncXZIWMNKZQ", "annotations": [{"segment": [0.2, 6.9372403423137285], "label": "Knitting"}, {"segment": [11.296452628472359, 12.42141063780362], "label": "Knitting"}, {"segment": [18.42118668757034, 24.702202239669873], "label": "Knitting"}]}, "WJn0HtVCrJU": {"duration": 81.78, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=WJn0HtVCrJU", "annotations": []}, "uMCyo89mK_E": {"duration": 104.746, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=uMCyo89mK_E", "annotations": [{"segment": [0.2, 94.12362443681748], "label": "Decorating the Christmas tree"}]}, "Z-6dR4H2dns": {"duration": 159.684, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z-6dR4H2dns", "annotations": [{"segment": [12.70488893841468, 25.90800881559072], "label": "Hand car wash"}, {"segment": [48.577516529232604, 62.52798281455068], "label": "Hand car wash"}, {"segment": [81.46075848748235, 91.17626179332888], "label": "Hand car wash"}, {"segment": [97.65326399722655, 99.14795565054601], "label": "Hand car wash"}, {"segment": [113.59665270514819, 134.27323642121684], "label": "Hand car wash"}, {"segment": [139.50466121684866, 148.72193347581901], "label": "Hand car wash"}]}, "SgnBsgrqfj0": {"duration": 237.355, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SgnBsgrqfj0", "annotations": [{"segment": [90.14476365054601, 224.17763728549141], "label": "Making a lemonade"}]}, "h-N9TvaBax8": {"duration": 63.39, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=h-N9TvaBax8", "annotations": [{"segment": [17.9490392925117, 25.86046157020281], "label": "Dodgeball"}]}, "c1RR1cmS9LU": {"duration": 150.744, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=c1RR1cmS9LU", "annotations": [{"segment": [80.66329608736349, 111.47056077379095], "label": "Rollerblading"}, {"segment": [138.0447661903276, 141.80748554134166], "label": "Rollerblading"}]}, "Ig_AvatvYrk": {"duration": 232.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ig_AvatvYrk", "annotations": [{"segment": [19.21245453355549, 138.83717144059915], "label": "Getting a haircut"}]}, "c-3fceLb7n4": {"duration": 76.069, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=c-3fceLb7n4", "annotations": [{"segment": [5.755580053134098, 9.79041967801161], "label": "Shuffleboard"}, {"segment": [16.31736613001935, 19.75884698653252], "label": "Shuffleboard"}, {"segment": [25.21774765548445, 27.235167467923205], "label": "Shuffleboard"}, {"segment": [34.47414444196815, 37.79695354480846], "label": "Shuffleboard"}, {"segment": [44.917258765180534, 47.40936559231077], "label": "Shuffleboard"}, {"segment": [51.08818995616967, 53.10560976860843], "label": "Shuffleboard"}, {"segment": [61.412632525709185, 64.7354416285495], "label": "Shuffleboard"}]}, "3V3bOyZzzSk": {"duration": 117.61, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=3V3bOyZzzSk", "annotations": []}, "i4yQ54eWfy4": {"duration": 137.37, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=i4yQ54eWfy4", "annotations": [{"segment": [40.712806754183546, 118.17023432427699], "label": "Preparing salad"}]}, "uM7QSskmloo": {"duration": 147.215, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uM7QSskmloo", "annotations": []}, "EznRcs1DffA": {"duration": 162.17000000000002, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EznRcs1DffA", "annotations": []}, "Pl4zAGveukQ": {"duration": 129.103, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=Pl4zAGveukQ", "annotations": [{"segment": [52.75016603883405, 97.378736132292], "label": "Removing ice from car"}]}, "u9ec3Exc5mI": {"duration": 60.581, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=u9ec3Exc5mI", "annotations": [{"segment": [0.01, 56.10949453631962], "label": "Hitting a pinata"}]}, "vKnQegYrYrg": {"duration": 182.21, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vKnQegYrYrg", "annotations": []}, "-lER_VO9LUo": {"duration": 25.45, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-lER_VO9LUo", "annotations": [{"segment": [0.3279333720137338, 12.774985811266072], "label": "Walking the dog"}, {"segment": [16.104770540238036, 25.22203825051841], "label": "Walking the dog"}]}, "utEbOY3UDDI": {"duration": 8.499, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=utEbOY3UDDI", "annotations": []}, "ZblmMtkVXIc": {"duration": 45.488, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZblmMtkVXIc", "annotations": [{"segment": [25.69327570093458, 40.132471962616826], "label": "Mooping floor"}]}, "LURZ8QDfowU": {"duration": 172.06, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LURZ8QDfowU", "annotations": [{"segment": [0.01, 172.001], "label": "Hand car wash"}]}, "q2VG0zzPJMw": {"duration": 31.138, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=q2VG0zzPJMw", "annotations": [{"segment": [0.8732948517940718, 28.624664586583464], "label": "Welding"}]}, "aOrDnKEJAwQ": {"duration": 144.986, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aOrDnKEJAwQ", "annotations": [{"segment": [57.787813084112145, 118.300753894081], "label": "Scuba diving"}]}, "qkk2tK19sx8": {"duration": 51.11, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qkk2tK19sx8", "annotations": [{"segment": [8.690590745709828, 42.73538201560063], "label": "Using the pommel horse"}]}, "Y7aacWy3WDo": {"duration": 173.41, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y7aacWy3WDo", "annotations": []}, "ay_YB-S4qR0": {"duration": 34.621, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ay_YB-S4qR0", "annotations": [{"segment": [0, 34.42095238095238], "label": "Throwing darts"}]}, "q2KR3lzTrq4": {"duration": 195.581, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=q2KR3lzTrq4", "annotations": [{"segment": [3.8139952808996713, 28.528684701129542], "label": "Fun sliding down"}, {"segment": [87.72189146069243, 93.51916428765993], "label": "Fun sliding down"}, {"segment": [159.11988393270684, 167.66323340560382], "label": "Fun sliding down"}]}, "WRv7Kpf1KT0": {"duration": 30.186, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WRv7Kpf1KT0", "annotations": [{"segment": [0.2, 30.18594], "label": "Applying sunscreen"}]}, "PwyvQ3BKziA": {"duration": 91.02, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=PwyvQ3BKziA", "annotations": [{"segment": [20.860010920436817, 42.28764118564742], "label": "Washing face"}]}, "Mm-bwu8Q2IU": {"duration": 82.501, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Mm-bwu8Q2IU", "annotations": [{"segment": [6.845837758851854, 15.841221727600308], "label": "Disc dog"}, {"segment": [27.27820991643763, 30.747857443985982], "label": "Disc dog"}, {"segment": [37.68715351875233, 41.28530703900156], "label": "Disc dog"}, {"segment": [54.392866291338, 72.12662292685202], "label": "Disc dog"}]}, "4YF9R3Sz2bA": {"duration": 64.993, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4YF9R3Sz2bA", "annotations": []}, "ODblEia5mcI": {"duration": 119.235, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ODblEia5mcI", "annotations": [{"segment": [0.2, 114.36601404056162], "label": "Assembling bicycle"}]}, "KpoH4pBp9fg": {"duration": 222.982, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KpoH4pBp9fg", "annotations": [{"segment": [52.86114196567863, 197.55146333853355], "label": "Making a cake"}]}, "tTBbFGv9emI": {"duration": 231.9, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=tTBbFGv9emI", "annotations": [{"segment": [4.341298333853354, 87.54951639937597], "label": "Chopping wood"}, {"segment": [108.53245834633385, 225.0239636380655], "label": "Chopping wood"}]}, "TkhUtx0Eyfw": {"duration": 188.45, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TkhUtx0Eyfw", "annotations": []}, "zpaS2iYrJjk": {"duration": 60.02, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=zpaS2iYrJjk", "annotations": []}, "aq-pRXpvrD8": {"duration": 209.468, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=aq-pRXpvrD8", "annotations": []}, "eLeGJBfqURA": {"duration": 151.464, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eLeGJBfqURA", "annotations": [{"segment": [9.451711263650546, 29.300304917316694], "label": "Disc dog"}, {"segment": [35.443917238689544, 45.36821406552262], "label": "Disc dog"}, {"segment": [47.731141881435256, 73.72334785647426], "label": "Disc dog"}, {"segment": [77.97661792511701, 122.8722464274571], "label": "Disc dog"}, {"segment": [123.34483199063963, 125.70775980655226], "label": "Disc dog"}, {"segment": [130.90620100156008, 148.8644524024961], "label": "Disc dog"}]}, "cxDPCkefl1A": {"duration": 73.816, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cxDPCkefl1A", "annotations": [{"segment": [15.776636986567897, 34.66253733853354], "label": "Making a cake"}]}, "tNIfKjCEPq4": {"duration": 50.968, "subset": "testing", "resolution": "524x352", "url": "https://www.youtube.com/watch?v=tNIfKjCEPq4", "annotations": []}, "gyQ3NBwXhDU": {"duration": 208.237, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gyQ3NBwXhDU", "annotations": [{"segment": [0.01, 155.90714508580342], "label": "Hurling"}]}, "X1vyuNazaIc": {"duration": 13.445, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=X1vyuNazaIc", "annotations": [{"segment": [0.2, 7.265346333853353], "label": "Decorating the Christmas tree"}]}, "M7tUnCF9lU4": {"duration": 40.426, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=M7tUnCF9lU4", "annotations": [{"segment": [0.01, 39.35636364742529], "label": "Fun sliding down"}]}, "Db6sq4DjW7E": {"duration": 43.07, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Db6sq4DjW7E", "annotations": [{"segment": [24.172792511700465, 26.25433853354134], "label": "Layup drill in basketball"}, {"segment": [29.813110764430576, 36.594921996879876], "label": "Layup drill in basketball"}]}, "sfT9Siql3P8": {"duration": 34.529, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sfT9Siql3P8", "annotations": [{"segment": [0.2, 34.297], "label": "Riding bumper cars"}]}, "BnswTxq-Hbg": {"duration": 10.148, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BnswTxq-Hbg", "annotations": []}, "5wOmHw6Boj8": {"duration": 39.59, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5wOmHw6Boj8", "annotations": [{"segment": [1.6355171606864274, 6.140904056162246], "label": "Washing dishes"}, {"segment": [18.546147425897036, 25.396804212168487], "label": "Washing dishes"}]}, "wvnP2VwQb6E": {"duration": 221.425, "subset": "testing", "resolution": "432x344", "url": "https://www.youtube.com/watch?v=wvnP2VwQb6E", "annotations": []}, "t6iq95QPZIg": {"duration": 108.763, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=t6iq95QPZIg", "annotations": [{"segment": [0, 93.49143302028081], "label": "Scuba diving"}]}, "ZOKC86lF6E8": {"duration": 83.128, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ZOKC86lF6E8", "annotations": [{"segment": [8.429459355244958, 32.93973348049569], "label": "Disc dog"}, {"segment": [33.458468780031204, 72.36366503588144], "label": "Disc dog"}, {"segment": [72.62303301092044, 78.45881244929798], "label": "Disc dog"}]}, "cFCN9QE1M0c": {"duration": 42.98, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=cFCN9QE1M0c", "annotations": [{"segment": [1.18186773, 40.16871754853673], "label": "Playing flauta"}]}, "F1zAvnkqa0c": {"duration": 59.838, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F1zAvnkqa0c", "annotations": []}, "q_eRxmeGJBY": {"duration": 98.03999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q_eRxmeGJBY", "annotations": [{"segment": [16.976358793127236, 95.43466513572544], "label": "Using the pommel horse"}]}, "sW5Fp_XgoiY": {"duration": 109.111, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sW5Fp_XgoiY", "annotations": []}, "yHXZZEucV1I": {"duration": 201.341, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=yHXZZEucV1I", "annotations": [{"segment": [9.108996218352136, 148.8849721123245], "label": "Making a lemonade"}]}, "X3476L-FelA": {"duration": 123.577, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=X3476L-FelA", "annotations": []}, "Z_lah8-xYx8": {"duration": 27.423, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z_lah8-xYx8", "annotations": [{"segment": [5.948883587824385, 15.644942494366441], "label": "Shuffleboard"}, {"segment": [20.727898485020646, 25.938996223338403], "label": "Shuffleboard"}]}, "lAa8tS962Qg": {"duration": 220.94, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lAa8tS962Qg", "annotations": [{"segment": [111.25107005630447, 220.937867], "label": "Skateboarding"}]}, "RpH774VD6Hw": {"duration": 100.4, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RpH774VD6Hw", "annotations": [{"segment": [20.51919035945111, 29.44738769142602], "label": "Archery"}, {"segment": [41.19501575981406, 73.61846922856506], "label": "Archery"}, {"segment": [79.57059981279251, 92.10140293291731], "label": "Archery"}]}, "R6kXT4Spiwo": {"duration": 157.52, "subset": "validation", "resolution": "1440x1440", "url": "https://www.youtube.com/watch?v=R6kXT4Spiwo", "annotations": [{"segment": [40.05095631825274, 130.96417004680188], "label": "Plataform diving"}]}, "ZpuZEui-Bu0": {"duration": 151.16, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ZpuZEui-Bu0", "annotations": [{"segment": [98.96964399, 120.90950919], "label": "Playing squash"}, {"segment": [31.09054176, 85.03057674], "label": "Playing squash"}]}, "8lRi0udDXqk": {"duration": 224.886, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=8lRi0udDXqk", "annotations": []}, "efaYmJsTDJc": {"duration": 74.026, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=efaYmJsTDJc", "annotations": [{"segment": [26.51993647975078, 37.24321514330218], "label": "Gargling mouthwash"}]}, "lk2niPrG3y8": {"duration": 28.724, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lk2niPrG3y8", "annotations": [{"segment": [0.2, 26.703714508580347], "label": "Rope skipping"}]}, "w0d32MVTY9Q": {"duration": 43.027, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=w0d32MVTY9Q", "annotations": [{"segment": [20.125273010920434, 29.181645865834632], "label": "Calf roping"}]}, "DWal5PJry6k": {"duration": 109.83, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=DWal5PJry6k", "annotations": [{"segment": [2.7382464898595944, 25.15763962558502], "label": "Washing hands"}, {"segment": [70.33870670826832, 79.58028861154446], "label": "Washing hands"}, {"segment": [96.52318876755069, 97.89231201248049], "label": "Washing hands"}]}, "yOFIXQCbSqc": {"duration": 37.57, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yOFIXQCbSqc", "annotations": [{"segment": [5.985007209237535, 37.46880491951791], "label": "Playing water polo"}]}, "EbRPwTlRLr0": {"duration": 13.306, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EbRPwTlRLr0", "annotations": []}, "PSh-caJvSHU": {"duration": 194.746, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PSh-caJvSHU", "annotations": [{"segment": [16.65787990675793, 125.08553457256411], "label": "Carving jack-o-lanterns"}]}, "bUCMSDMB7MU": {"duration": 210.954, "subset": "training", "resolution": "854x474", "url": "https://www.youtube.com/watch?v=bUCMSDMB7MU", "annotations": [{"segment": [5.265604692214899, 52.32694662888556], "label": "Disc dog"}, {"segment": [61.2126545469982, 193.51097243889754], "label": "Disc dog"}]}, "y47RXYfefvQ": {"duration": 12.562, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y47RXYfefvQ", "annotations": [{"segment": [0.01, 12.561994], "label": "Removing ice from car"}]}, "iyGve9RCjV4": {"duration": 233.5, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iyGve9RCjV4", "annotations": [{"segment": [32.733622607921674, 178.21638878504672], "label": "Capoeira"}]}, "cyznGwlE9hM": {"duration": 233.78, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=cyznGwlE9hM", "annotations": [{"segment": [50.694558459889414, 106.4950436711346], "label": "Discus throw"}, {"segment": [8.023599180701922, 45.22392265486538], "label": "Shot put"}, {"segment": [117.07160622751441, 159.01314739936538], "label": "Discus throw"}]}, "Kzpxiedbf2Y": {"duration": 136.63, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Kzpxiedbf2Y", "annotations": []}, "CJCONq_i1cQ": {"duration": 238.632, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=CJCONq_i1cQ", "annotations": [{"segment": [0.01, 238.541], "label": "Removing ice from car"}]}, "mB90PudOrnU": {"duration": 27.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mB90PudOrnU", "annotations": [{"segment": [0.01, 27.119377414196567], "label": "Bathing dog"}]}, "RdfM7bCMDeM": {"duration": 96.387, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RdfM7bCMDeM", "annotations": []}, "9ZboVy59qrw": {"duration": 115.03, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9ZboVy59qrw", "annotations": [{"segment": [46.830270410000004, 94.84420981], "label": "Playing polo"}]}, "Qmo9D73hWXY": {"duration": 203.709, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Qmo9D73hWXY", "annotations": [{"segment": [25.7416562131873, 195.445907199688], "label": "Playing ice hockey"}]}, "3fAVDJyrSQs": {"duration": 75.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3fAVDJyrSQs", "annotations": [{"segment": [6.041174606864274, 74.86318336349453], "label": "Chopping wood"}]}, "DuLweixeP8I": {"duration": 91.069, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=DuLweixeP8I", "annotations": [{"segment": [0.01, 25.145759406318547], "label": "Doing fencing"}, {"segment": [38.32814725678584, 59.461382459325584], "label": "Doing fencing"}, {"segment": [66.69202264624147, 70.80356314468384], "label": "Doing fencing"}, {"segment": [73.6391083160234, 83.84707093284584], "label": "Doing fencing"}, {"segment": [88.95105224125705, 91.021], "label": "Doing fencing"}]}, "K_hvDDWcjDQ": {"duration": 182.393, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K_hvDDWcjDQ", "annotations": []}, "KSbIw1m2KoU": {"duration": 222.59, "subset": "training", "resolution": "470x360", "url": "https://www.youtube.com/watch?v=KSbIw1m2KoU", "annotations": [{"segment": [14.235570982839313, 101.38504212168486], "label": "Using the balance beam"}, {"segment": [124.6592496099844, 156.95259282371293], "label": "Using the balance beam"}]}, "USwMjvs45MU": {"duration": 122.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=USwMjvs45MU", "annotations": [{"segment": [26.645865834633387, 105.3352352], "label": "Long jump"}]}, "ReOsTqcQBmY": {"duration": 187.734, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ReOsTqcQBmY", "annotations": [{"segment": [0.01, 146.27435146254484], "label": "Rope skipping"}, {"segment": [148.02614015616848, 172.84314664917002], "label": "Rope skipping"}]}, "IRua6QGP-Ow": {"duration": 175.54, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IRua6QGP-Ow", "annotations": []}, "5OWJ7WqKWMU": {"duration": 150.95, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=5OWJ7WqKWMU", "annotations": [{"segment": [0.01, 26.846542073927854], "label": "Brushing teeth"}, {"segment": [83.60107400214376, 103.38273658293271], "label": "Brushing teeth"}]}, "pmIJcJiydpk": {"duration": 12.54, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pmIJcJiydpk", "annotations": [{"segment": [4.6096446, 10.42615685179407], "label": "Hopscotch"}]}, "UZm05CvtxkA": {"duration": 51.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UZm05CvtxkA", "annotations": []}, "N-92_rikI8U": {"duration": 123.391, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N-92_rikI8U", "annotations": [{"segment": [21.55967378471139, 91.05112232293291], "label": "Snowboarding"}]}, "H50kgN8JkrQ": {"duration": 95.39, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H50kgN8JkrQ", "annotations": []}, "jB4jEtHC5RI": {"duration": 219.127, "subset": "testing", "resolution": "1920x1440", "url": "https://www.youtube.com/watch?v=jB4jEtHC5RI", "annotations": []}, "b7Q-Jj9ZEH0": {"duration": 90.907, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b7Q-Jj9ZEH0", "annotations": [{"segment": [0.01, 90.841], "label": "Fun sliding down"}]}, "_I7igpv-A0A": {"duration": 30.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_I7igpv-A0A", "annotations": []}, "S8oIiWRiIfE": {"duration": 28.75, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S8oIiWRiIfE", "annotations": [{"segment": [21.48124595631825, 23.813239254290174], "label": "Drinking coffee"}]}, "qp9OcGPufEc": {"duration": 185.737, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qp9OcGPufEc", "annotations": [{"segment": [0, 173.85624056162246], "label": "Disc dog"}]}, "Ie7DREiXp08": {"duration": 161.008, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ie7DREiXp08", "annotations": [{"segment": [13.061423600624025, 143.9268408299532], "label": "Putting in contact lenses"}]}, "w_X7cLnFCYw": {"duration": 104.96000000000001, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=w_X7cLnFCYw", "annotations": [{"segment": [33.39139469578783, 83.80585335413417], "label": "Triple jump"}, {"segment": [9.982778471138845, 13.094664586583464], "label": "Triple jump"}]}, "VqZr2oZk4z4": {"duration": 30.117, "subset": "training", "resolution": "711x480", "url": "https://www.youtube.com/watch?v=VqZr2oZk4z4", "annotations": [{"segment": [0, 3.8924539781591267], "label": "Scuba diving"}, {"segment": [5.158673946957878, 7.222143525741029], "label": "Scuba diving"}, {"segment": [8.1131872074883, 9.613892355694228], "label": "Scuba diving"}, {"segment": [10.317347893915755, 10.88011232449298], "label": "Scuba diving"}, {"segment": [12.005641185647427, 14.022213728549143], "label": "Scuba diving"}, {"segment": [15.52291887675507, 16.13258034321373], "label": "Scuba diving"}, {"segment": [17.680182527301092, 17.91466770670827], "label": "Scuba diving"}, {"segment": [22.979547581903276, 29.861], "label": "Scuba diving"}]}, "ywWHBghVyJ4": {"duration": 30.279, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ywWHBghVyJ4", "annotations": [{"segment": [8.833290953406843, 15.210264770670825], "label": "Waxing skis"}, {"segment": [25.03552897035881, 27.208423937597505], "label": "Waxing skis"}]}, "gzVpwbiB9fE": {"duration": 34.34, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gzVpwbiB9fE", "annotations": [{"segment": [4.9290059112568585, 16.769335328515183], "label": "Tennis serve with ball bouncing"}]}, "8NRv-75RlPI": {"duration": 81.131, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8NRv-75RlPI", "annotations": [{"segment": [31.377609984399378, 69.46091887675507], "label": "Elliptical trainer"}]}, "qokr0bO828E": {"duration": 188.85, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qokr0bO828E", "annotations": [{"segment": [38.899101733281974, 173.00074659309504], "label": "Shaving"}]}, "RLdlDewtKbE": {"duration": 100.94, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=RLdlDewtKbE", "annotations": [{"segment": [4.094174274882995, 74.32501002897259], "label": "Cricket"}]}, "IwViXKNNBEU": {"duration": 9.85, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IwViXKNNBEU", "annotations": [{"segment": [2.00237455, 6.79590505], "label": "Discus throw"}]}, "5X7zeOps9uA": {"duration": 109.575, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5X7zeOps9uA", "annotations": [{"segment": [0.01, 109.57496598639456], "label": "Hitting a pinata"}]}, "ZlVydB9uUe4": {"duration": 71.332, "subset": "testing", "resolution": "626x360", "url": "https://www.youtube.com/watch?v=ZlVydB9uUe4", "annotations": []}, "Rvs3tRNxEjE": {"duration": 113.5, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Rvs3tRNxEjE", "annotations": []}, "o8EOFXW5GJo": {"duration": 185.34, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=o8EOFXW5GJo", "annotations": [{"segment": [55.35724258970359, 177.92367004680185], "label": "Preparing salad"}]}, "tLkNfGM7GOQ": {"duration": 178.32999999999998, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tLkNfGM7GOQ", "annotations": []}, "dTZZq23pYFY": {"duration": 91.93, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dTZZq23pYFY", "annotations": [{"segment": [41.30297365366614, 52.48919568486739], "label": "Drinking coffee"}]}, "xvWw66PvUNg": {"duration": 72.261, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xvWw66PvUNg", "annotations": []}, "SYh-j9bK_ls": {"duration": 185.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SYh-j9bK_ls", "annotations": []}, "8GJWokVCzUM": {"duration": 42.68, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=8GJWokVCzUM", "annotations": [{"segment": [22.50430154134165, 29.095798146645862], "label": "Long jump"}]}, "zJX_Oh4yVnc": {"duration": 216.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zJX_Oh4yVnc", "annotations": [{"segment": [27.03272758978317, 70.13814108643832], "label": "Doing motocross"}, {"segment": [131.76541194493748, 168.4723644029283], "label": "Doing motocross"}]}, "qzt9U8tEdJQ": {"duration": 183.646, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qzt9U8tEdJQ", "annotations": [{"segment": [0.01, 183.64662131519273], "label": "Skiing"}]}, "ClW3USojCoE": {"duration": 176.1, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ClW3USojCoE", "annotations": [{"segment": [23.89866926677067, 92.29830889235569], "label": "Rock climbing"}, {"segment": [106.3078736349454, 142.01852886115444], "label": "Rock climbing"}, {"segment": [153.5558174726989, 157.4015803432137], "label": "Rock climbing"}]}, "MidLOhRnpnU": {"duration": 102.75, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MidLOhRnpnU", "annotations": []}, "qkN9uA8izVE": {"duration": 53.848, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qkN9uA8izVE", "annotations": [{"segment": [0.01, 53.847073], "label": "Arm wrestling"}]}, "wRNmTUPzVq0": {"duration": 230.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wRNmTUPzVq0", "annotations": []}, "PdNb0g36a6U": {"duration": 69.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PdNb0g36a6U", "annotations": [{"segment": [41.47382565210608, 61.054205078003115], "label": "Using the pommel horse"}]}, "y5j9TqTy9Xw": {"duration": 44.84, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=y5j9TqTy9Xw", "annotations": [{"segment": [0, 39.6611767327836], "label": "High jump"}]}, "awrIHVi-1OM": {"duration": 135.187, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=awrIHVi-1OM", "annotations": []}, "evr-Bzs4UJE": {"duration": 79.064, "subset": "testing", "resolution": "224x128", "url": "https://www.youtube.com/watch?v=evr-Bzs4UJE", "annotations": []}, "KlmlCbJup5A": {"duration": 48.879, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=KlmlCbJup5A", "annotations": [{"segment": [20.67623028021907, 25.769272583485012], "label": "Hanging wallpaper"}, {"segment": [41.12441505004799, 46.59753513713976], "label": "Hanging wallpaper"}]}, "3H7ZS0E90pY": {"duration": 107.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3H7ZS0E90pY", "annotations": [{"segment": [3.33412038, 95.78439157566304], "label": "Using the balance beam"}]}, "mouMMOraPyw": {"duration": 176.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mouMMOraPyw", "annotations": [{"segment": [6.040561622464899, 162.82059282371296], "label": "Kayaking"}]}, "HMs703DhjQU": {"duration": 74.53999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HMs703DhjQU", "annotations": []}, "rWHm1-o_zL0": {"duration": 162.378, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rWHm1-o_zL0", "annotations": [{"segment": [93.28515420560747, 117.06327570093458], "label": "Mooping floor"}]}, "wJlRDfqjNOM": {"duration": 129.289, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=wJlRDfqjNOM", "annotations": []}, "lTDkfbr7znU": {"duration": 184.32, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=lTDkfbr7znU", "annotations": [{"segment": [0.8590360661531867, 179.82579345340048], "label": "Cumbia"}]}, "P0A0rMgjXGQ": {"duration": 158.593, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P0A0rMgjXGQ", "annotations": [{"segment": [22.493963251033705, 137.1835823490119], "label": "Installing carpet"}]}, "9qUbxwjh-4A": {"duration": 14.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9qUbxwjh-4A", "annotations": [{"segment": [0.27304177999999996, 5.70441997], "label": "Smoking a cigarette"}, {"segment": [7.34296971, 12.387501511076444], "label": "Smoking a cigarette"}]}, "-aGpz-Ndctw": {"duration": 51.433, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-aGpz-Ndctw", "annotations": []}, "shBiO7aGy6k": {"duration": 159.382, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=shBiO7aGy6k", "annotations": [{"segment": [5.967492917316693, 145.20899432137284], "label": "Slacklining"}]}, "R7DhZaY3A08": {"duration": 83.06, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=R7DhZaY3A08", "annotations": [{"segment": [0.01, 83.05780045351474], "label": "Playing squash"}]}, "Ry1jXcxnsDs": {"duration": 140.899, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ry1jXcxnsDs", "annotations": []}, "cudeag10U7Q": {"duration": 168.484, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cudeag10U7Q", "annotations": [{"segment": [3.1492334753215925, 29.130409646724733], "label": "Elliptical trainer"}, {"segment": [35.166440474424455, 42.51465142990654], "label": "Elliptical trainer"}, {"segment": [45.139012629283485, 46.97606546884735], "label": "Elliptical trainer"}, {"segment": [49.07555442834891, 54.84914906697819], "label": "Elliptical trainer"}, {"segment": [59.5729992258567, 75.84403866199378], "label": "Elliptical trainer"}, {"segment": [81.61763330062305, 91.85264197819315], "label": "Elliptical trainer"}, {"segment": [95.00187541744548, 97.10136437694703], "label": "Elliptical trainer"}, {"segment": [109.69829813395638, 123.8698486105919], "label": "Elliptical trainer"}, {"segment": [134.3672934080997, 138.8287074470405], "label": "Elliptical trainer"}, {"segment": [147.22666328504675, 155.62461912305298], "label": "Elliptical trainer"}]}, "DV1ITGBfo5w": {"duration": 155.783, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=DV1ITGBfo5w", "annotations": [{"segment": [61.924952706417194, 149.93888261295925], "label": "Kneeling"}]}, "Eeq-noKrGwY": {"duration": 45.953, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Eeq-noKrGwY", "annotations": [{"segment": [0.01, 44.46580823913646], "label": "Gargling mouthwash"}]}, "QElXIVBV-gk": {"duration": 213.86, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QElXIVBV-gk", "annotations": [{"segment": [89.69708651282784, 196.95808570597427], "label": "Preparing pasta"}]}, "-5K3KZ6fHuI": {"duration": 39.591, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-5K3KZ6fHuI", "annotations": [{"segment": [16.181880658346333, 33.90783389859594], "label": "Snow tubing"}]}, "wUg9S2xx850": {"duration": 30.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wUg9S2xx850", "annotations": [{"segment": [0.01, 22.85386785023401], "label": "Playing kickball"}]}, "HE2alYPoG60": {"duration": 156.155, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HE2alYPoG60", "annotations": []}, "wUXpeZHrTWw": {"duration": 215.85, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wUXpeZHrTWw", "annotations": [{"segment": [12.456531981279252, 201.34314820592826], "label": "Tai chi"}]}, "sqjfhVDIN24": {"duration": 163.585, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=sqjfhVDIN24", "annotations": []}, "zQ7SL6hxS-c": {"duration": 221.194, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zQ7SL6hxS-c", "annotations": []}, "MyMYuXh8E0Y": {"duration": 226.349, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MyMYuXh8E0Y", "annotations": [{"segment": [85.06484021394776, 103.39833247563], "label": "Gargling mouthwash"}]}, "J4UFrrwKUQA": {"duration": 172.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J4UFrrwKUQA", "annotations": [{"segment": [0.01, 21.58107120124805], "label": "Sailing"}, {"segment": [68.78966445397815, 134.3421682277691], "label": "Sailing"}]}, "gLfvk2SSj1c": {"duration": 138.02, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gLfvk2SSj1c", "annotations": [{"segment": [5.813588143525741, 8.182087016814005], "label": "Rock climbing"}, {"segment": [40.91043508407003, 47.80061362454498], "label": "Rock climbing"}, {"segment": [56.84397259282372, 62.87287877691108], "label": "Rock climbing"}, {"segment": [100.98417858346333, 103.99863167550703], "label": "Rock climbing"}]}, "okh34JcrQQc": {"duration": 55.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=okh34JcrQQc", "annotations": []}, "1pOmOTz-bzk": {"duration": 63.53, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=1pOmOTz-bzk", "annotations": [{"segment": [4.24310191, 59.56537807634755], "label": "Kayaking"}]}, "lGldNWWnjzQ": {"duration": 89.722, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lGldNWWnjzQ", "annotations": []}, "kkLRLb8TGwI": {"duration": 126.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kkLRLb8TGwI", "annotations": [{"segment": [15.834487638008921, 19.001385165610706], "label": "Playing water polo"}, {"segment": [21.178627215836933, 103.91382496099844], "label": "Playing water polo"}]}, "gLFCNQe9akU": {"duration": 29.211, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gLFCNQe9akU", "annotations": []}, "dSdZz_Royyc": {"duration": 202.386, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dSdZz_Royyc", "annotations": [{"segment": [32.828741372784165, 172.6665531818552], "label": "Wakeboarding"}]}, "PzrtfuUx7GY": {"duration": 130.404, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PzrtfuUx7GY", "annotations": []}, "AXA-G5DdNrU": {"duration": 90.047, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AXA-G5DdNrU", "annotations": [{"segment": [0, 14.469328182651116], "label": "Rafting"}, {"segment": [64.479821707154, 69.81802035569422], "label": "Rafting"}, {"segment": [79.79202326365053, 83.72543286115445], "label": "Rafting"}]}, "0wwx4wnHv-U": {"duration": 8.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0wwx4wnHv-U", "annotations": [{"segment": [0.6385951638065523, 6.255485959438378], "label": "Tennis serve with ball bouncing"}]}, "zO5JMTzLDHw": {"duration": 33.251, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zO5JMTzLDHw", "annotations": []}, "3-KLYPzd1zU": {"duration": 117.702, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=3-KLYPzd1zU", "annotations": [{"segment": [8.247266355140185, 49.48359813084112], "label": "River tubing"}, {"segment": [72.39267133956386, 106.13286448598132], "label": "River tubing"}]}, "waTjpd7yDkc": {"duration": 71.541, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=waTjpd7yDkc", "annotations": [{"segment": [0.01, 71.501], "label": "Rollerblading"}]}, "G0WjPZqVwPo": {"duration": 212.39, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G0WjPZqVwPo", "annotations": []}, "KWhXvv1WtFM": {"duration": 14.188, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KWhXvv1WtFM", "annotations": [{"segment": [0.5523010920436817, 3.512634945397816], "label": "Doing fencing"}]}, "LCyLWiw7n5Q": {"duration": 116.263, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LCyLWiw7n5Q", "annotations": [{"segment": [0.01, 109.27208510093801], "label": "Having an ice cream"}]}, "dm5ZnhOeBBQ": {"duration": 106.67, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=dm5ZnhOeBBQ", "annotations": [{"segment": [49.75799124180968, 57.579481503900155], "label": "Vacuuming floor"}, {"segment": [86.53563694227769, 95.35561532293292], "label": "Vacuuming floor"}]}, "M-bUoaIqtDk": {"duration": 29.7, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=M-bUoaIqtDk", "annotations": [{"segment": [0.23165617781173833, 29.37400334652842], "label": "Windsurfing"}]}, "dukaFaotZGc": {"duration": 117.401, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dukaFaotZGc", "annotations": [{"segment": [0.2, 115.56810577691108], "label": "Fixing the roof"}]}, "9AqHhUuE9bE": {"duration": 33.67, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9AqHhUuE9bE", "annotations": [{"segment": [0.5777820204368175, 33.248728994227776], "label": "Polishing shoes"}]}, "Tfqln6lL7q0": {"duration": 122.346, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tfqln6lL7q0", "annotations": [{"segment": [12.976330733229329, 117.55029017160686], "label": "Skiing"}]}, "G0DPDo44wt8": {"duration": 21.53, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G0DPDo44wt8", "annotations": [{"segment": [0.01, 21.524897959183672], "label": "Clean and jerk"}]}, "z-iuSgXKUcw": {"duration": 169.18, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=z-iuSgXKUcw", "annotations": [{"segment": [0.01, 169.18061224489796], "label": "Playing violin"}]}, "YGBldj7DUq4": {"duration": 215.97, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YGBldj7DUq4", "annotations": [{"segment": [135.2203143525741, 201.56751716068644], "label": "Kayaking"}]}, "APQWGGPPjes": {"duration": 149.583, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=APQWGGPPjes", "annotations": []}, "CiCNA-Nr1QQ": {"duration": 82.013, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=CiCNA-Nr1QQ", "annotations": [{"segment": [0.2, 6.781084897817681], "label": "Playing ten pins"}, {"segment": [8.188479876610032, 18.296134724300536], "label": "Playing ten pins"}, {"segment": [27.89200957970292, 47.21170428857971], "label": "Playing ten pins"}, {"segment": [65.63578401095228, 82.01274376417234], "label": "Playing ten pins"}]}, "94w7SEcPDho": {"duration": 230.993, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=94w7SEcPDho", "annotations": [{"segment": [21.256659906396255, 64.49054446177847], "label": "BMX"}, {"segment": [71.69619188767551, 112.04781747269891], "label": "BMX"}, {"segment": [123.93713572542902, 161.76678471138845], "label": "BMX"}, {"segment": [180.14118564742589, 220.51190639625585], "label": "BMX"}]}, "PbzmcZ_IORE": {"duration": 77.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PbzmcZ_IORE", "annotations": [{"segment": [0, 75.53114820592823], "label": "Walking the dog"}]}, "Af_0zV79zsk": {"duration": 83.17, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=Af_0zV79zsk", "annotations": []}, "WOkHZ33CgAs": {"duration": 20.922, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WOkHZ33CgAs", "annotations": [{"segment": [5.342408736349453, 11.053726989079564], "label": "Doing a powerbomb"}]}, "cYvFlrAnlzA": {"duration": 111.503, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cYvFlrAnlzA", "annotations": []}, "jmS3NFo4XCc": {"duration": 39.173, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=jmS3NFo4XCc", "annotations": [{"segment": [3.422208224648986, 34.16097138533541], "label": "Throwing darts"}]}, "1uC0xzyx4Bc": {"duration": 211.975, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1uC0xzyx4Bc", "annotations": [{"segment": [118.64734536354564, 182.03156966261108], "label": "Laying tile"}]}, "NB4IBjUmrcI": {"duration": 26.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NB4IBjUmrcI", "annotations": [{"segment": [0.24273722000000003, 7.58459624], "label": "Doing kickboxing"}, {"segment": [9.58677464, 21.660394939157566], "label": "Doing kickboxing"}]}, "rt1kYX3slRI": {"duration": 69.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rt1kYX3slRI", "annotations": [{"segment": [11.730215288611545, 63.66556318252731], "label": "Using parallel bars"}]}, "Ur_ToogEkCA": {"duration": 14.44, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ur_ToogEkCA", "annotations": []}, "df5rlVZD0Zc": {"duration": 194.212, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=df5rlVZD0Zc", "annotations": []}, "sgUMHHuAhZg": {"duration": 203.663, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=sgUMHHuAhZg", "annotations": [{"segment": [8.26086097049324, 203.6619954648526], "label": "Capoeira"}]}, "4OCbTYrThtc": {"duration": 125.968, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4OCbTYrThtc", "annotations": [{"segment": [2.15752492211838, 18.437031152647975], "label": "Bullfighting"}, {"segment": [25.498021806853583, 30.20534890965732], "label": "Bullfighting"}, {"segment": [77.2786199376947, 122.60611370716512], "label": "Bullfighting"}]}, "wE9sQbGdeAk": {"duration": 109.459, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wE9sQbGdeAk", "annotations": []}, "m1ECYUCexX8": {"duration": 189.591, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m1ECYUCexX8", "annotations": []}, "KA6YYIl2z4E": {"duration": 105.001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KA6YYIl2z4E", "annotations": [{"segment": [34.78237327662507, 77.72962598589956], "label": "Putting in contact lenses"}]}, "1KVxpHAC08g": {"duration": 137.207, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1KVxpHAC08g", "annotations": [{"segment": [1.580983815095135, 136.17822680574935], "label": "Having an ice cream"}]}, "wKThOOUV6lY": {"duration": 167.347, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wKThOOUV6lY", "annotations": [{"segment": [58.54546723868955, 167.181], "label": "Tug of war"}]}, "03BMaaMEcNo": {"duration": 186.781, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=03BMaaMEcNo", "annotations": [{"segment": [12.817042121684867, 159.33913728549143], "label": "Surfing"}]}, "IlCsGkFnRkc": {"duration": 174.986, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IlCsGkFnRkc", "annotations": [{"segment": [0, 174.78557823129253], "label": "Decorating the Christmas tree"}]}, "wnnoaLzYqVQ": {"duration": 180.512, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wnnoaLzYqVQ", "annotations": [{"segment": [33.51157046281851, 154.32219003045836], "label": "Rafting"}]}, "YnB0VW34wlY": {"duration": 26.08, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=YnB0VW34wlY", "annotations": [{"segment": [0.01, 26.005], "label": "Chopping wood"}]}, "T-rV3KKFOgg": {"duration": 51.526, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=T-rV3KKFOgg", "annotations": [{"segment": [10.4565770693984, 11.258461804600891], "label": "Hand car wash"}, {"segment": [21.76315183575354, 24.569748408962262], "label": "Hand car wash"}, {"segment": [30.503695449460707, 36.99896180460089], "label": "Hand car wash"}]}, "G6FhQuR3_88": {"duration": 134.073, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G6FhQuR3_88", "annotations": [{"segment": [1.045803736349454, 132.60791376911075], "label": "Playing congas"}]}, "UZBHdQZc0ZM": {"duration": 133.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UZBHdQZc0ZM", "annotations": [{"segment": [8.93939274305666, 24.947142538762776], "label": "Snow tubing"}, {"segment": [29.10499962855657, 37.42071380814416], "label": "Snow tubing"}, {"segment": [55.50739214874718, 61.53628492894818], "label": "Snow tubing"}, {"segment": [78.16771328812335, 98.12542731913358], "label": "Snow tubing"}, {"segment": [101.24381958346335, 107.6884980374415], "label": "Snow tubing"}, {"segment": [112.26214081123244, 127.43831910608425], "label": "Snow tubing"}]}, "Fyi7pbkKk7w": {"duration": 145.57, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fyi7pbkKk7w", "annotations": [{"segment": [27.02393383354382, 137.84477013728548], "label": "Chopping wood"}]}, "PMvs8Dbbxts": {"duration": 117.981, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PMvs8Dbbxts", "annotations": []}, "s2PFIKG5PYI": {"duration": 17.53, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=s2PFIKG5PYI", "annotations": [{"segment": [7.244513200000001, 16.095723200000002], "label": "Polishing forniture"}]}, "aYmIsBUmHTo": {"duration": 142.2, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aYmIsBUmHTo", "annotations": []}, "iKAs4UYyzSI": {"duration": 32.555, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=iKAs4UYyzSI", "annotations": []}, "WzSZJ-yJX48": {"duration": 78.25, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WzSZJ-yJX48", "annotations": [{"segment": [59.93972286782627, 68.1188693728549], "label": "Cricket"}]}, "Wm1CL_g91G4": {"duration": 157.223, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Wm1CL_g91G4", "annotations": [{"segment": [17.65991658441848, 29.43319430736413], "label": "Snow tubing"}, {"segment": [35.07455654960892, 78.73379477219905], "label": "Snow tubing"}, {"segment": [81.92239082216349, 146.6754174352574], "label": "Snow tubing"}]}, "UMUGmKRkaxo": {"duration": 126.18, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UMUGmKRkaxo", "annotations": [{"segment": [0.01, 64.5649486677067], "label": "Ping-pong"}]}, "OT98MiVje0g": {"duration": 90.605, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=OT98MiVje0g", "annotations": [{"segment": [7.770444617784712, 87.75475975039002], "label": "Playing drums"}]}, "eZdao9O8jrw": {"duration": 227.28, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=eZdao9O8jrw", "annotations": [{"segment": [118.42486423400936, 165.58207065054603], "label": "Washing face"}]}, "oW2v7KlfmJk": {"duration": 7.593, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oW2v7KlfmJk", "annotations": [{"segment": [2.0374148642745706, 6.112244592823713], "label": "Shuffleboard"}]}, "48zOi9j1E0A": {"duration": 164.77, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=48zOi9j1E0A", "annotations": [{"segment": [65.03355061005162, 118.75691765054603], "label": "Paintball"}]}, "AjPT7Eki3AQ": {"duration": 232.64, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AjPT7Eki3AQ", "annotations": []}, "VFOpGMVouVg": {"duration": 203.523, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VFOpGMVouVg", "annotations": [{"segment": [32.38582359620915, 175.8046833073323], "label": "Braiding hair"}]}, "xSiT1pgUEm8": {"duration": 233.756, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=xSiT1pgUEm8", "annotations": [{"segment": [0, 222.0531809672387], "label": "Rafting"}]}, "jw2Y3L3iSOw": {"duration": 11.66, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jw2Y3L3iSOw", "annotations": []}, "0T14tLwBafQ": {"duration": 38.429, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=0T14tLwBafQ", "annotations": [{"segment": [2.5869946605527883, 28.592812417562133], "label": "Playing beach volleyball"}]}, "F559bkkKSp8": {"duration": 21.502, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=F559bkkKSp8", "annotations": [{"segment": [0.2, 21.299], "label": "Camel ride"}]}, "J7B7DAIg-MA": {"duration": 59.676, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=J7B7DAIg-MA", "annotations": []}, "VM-ldOw7e4Y": {"duration": 222.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VM-ldOw7e4Y", "annotations": [{"segment": [16.670527301092044, 213.26298595943837], "label": "Long jump"}]}, "CCRPXH8ui-s": {"duration": 229.692, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CCRPXH8ui-s", "annotations": [{"segment": [37.50798166324645, 63.227744255686225], "label": "Fun sliding down"}, {"segment": [206.47253313857996, 211.8308154372967], "label": "Fun sliding down"}]}, "U37UAWdI-vY": {"duration": 88.03, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U37UAWdI-vY", "annotations": [{"segment": [2.8817176287051485, 82.90253354134164], "label": "Playing harmonica"}]}, "7m--xUKvqkc": {"duration": 99.521, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=7m--xUKvqkc", "annotations": [{"segment": [14.86071028037383, 79.56671962616822], "label": "Mooping floor"}]}, "yrvV4Ncqyf0": {"duration": 120.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yrvV4Ncqyf0", "annotations": [{"segment": [36.7302672, 67.2181032], "label": "Washing dishes"}, {"segment": [29.944721274570984, 94.16579017160687], "label": "Washing dishes"}]}, "5SyvdU5LWPk": {"duration": 73.863, "subset": "training", "resolution": "640x358", "url": "https://www.youtube.com/watch?v=5SyvdU5LWPk", "annotations": [{"segment": [8.757509135739578, 11.63826871986444], "label": "Hurling"}, {"segment": [17.39978788811416, 20.050086234009363], "label": "Hurling"}, {"segment": [22.930845750390013, 27.07913945397816], "label": "Hurling"}, {"segment": [36.87372180967239, 60.61118022464898], "label": "Hurling"}]}, "J3jICOa0WCk": {"duration": 213.07, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J3jICOa0WCk", "annotations": [{"segment": [21.93818650705212, 213.06632653061226], "label": "Dodgeball"}]}, "zfi8f5NnYaY": {"duration": 45.95, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=zfi8f5NnYaY", "annotations": [{"segment": [0, 14.758360419656787], "label": "Playing polo"}]}, "H2KVO7LWU5w": {"duration": 51.571, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=H2KVO7LWU5w", "annotations": [{"segment": [0.2, 51.541], "label": "Blowing leaves"}]}, "JXucFXh58VE": {"duration": 144.893, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JXucFXh58VE", "annotations": [{"segment": [18.309350823012515, 32.549957018688914], "label": "Ice fishing"}, {"segment": [37.52286711876639, 50.407225105330745], "label": "Ice fishing"}, {"segment": [68.49053456015793, 76.40198244305772], "label": "Ice fishing"}, {"segment": [78.88843749297972, 85.21759580187208], "label": "Ice fishing"}, {"segment": [124.32275249609985, 131.55607627769112], "label": "Ice fishing"}]}, "eGl_I8h832w": {"duration": 73.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eGl_I8h832w", "annotations": [{"segment": [29.686683576186585, 60.40098312231809], "label": "Getting a haircut"}]}, "G7tMapFIcHg": {"duration": 140.574, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G7tMapFIcHg", "annotations": []}, "MfFM_a26bR8": {"duration": 107.88, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MfFM_a26bR8", "annotations": [{"segment": [42.247573980000006, 107.33628393135724], "label": "Horseback riding"}, {"segment": [1.4542089200000001, 42.22007956318253], "label": "Horseback riding"}]}, "x9BN93AXNTQ": {"duration": 196.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x9BN93AXNTQ", "annotations": [{"segment": [0.01, 196.201], "label": "Drinking coffee"}]}, "r-_JFgDJRrQ": {"duration": 184.924, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=r-_JFgDJRrQ", "annotations": [{"segment": [54.52509694673501, 79.04696594394387], "label": "Changing car wheel"}, {"segment": [104.434312670466, 114.24306026934956], "label": "Changing car wheel"}, {"segment": [121.16688210385558, 157.5169463120125], "label": "Changing car wheel"}]}, "lZcTesK6CfA": {"duration": 225.72, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lZcTesK6CfA", "annotations": [{"segment": [61.78804680187208, 68.12528237129486], "label": "Washing hands"}, {"segment": [73.4063120124805, 77.2790670826833], "label": "Washing hands"}, {"segment": [83.26423400936038, 93.12215600624025], "label": "Washing hands"}, {"segment": [103.68421528861154, 104.74042121684867], "label": "Washing hands"}, {"segment": [112.838, 118.47109828393135], "label": "Washing hands"}, {"segment": [126.92074570982838, 133.96211856474258], "label": "Washing hands"}, {"segment": [223.7396224648986, 225.476], "label": "Washing hands"}]}, "7x_1tRem1gA": {"duration": 58.724, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7x_1tRem1gA", "annotations": [{"segment": [0.01, 57.02195921728315], "label": "Using the monkey bar"}]}, "qA_KTu8oTW4": {"duration": 130.961, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qA_KTu8oTW4", "annotations": [{"segment": [38.818230998439944, 128.1001622948518], "label": "Baton twirling"}]}, "a9ItMklOTyE": {"duration": 100.15, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=a9ItMklOTyE", "annotations": [{"segment": [20.51653009282371, 98.5261502176287], "label": "Cricket"}]}, "yduDG8EcM0A": {"duration": 67.942, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yduDG8EcM0A", "annotations": [{"segment": [0, 67.941586], "label": "Making a lemonade"}]}, "sL7KD7QBm3U": {"duration": 14.954, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sL7KD7QBm3U", "annotations": []}, "s0N0PzdwVik": {"duration": 42.7, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s0N0PzdwVik", "annotations": [{"segment": [0.01, 40.89732371294852], "label": "Playing bagpipes"}]}, "uty4eGb_vvU": {"duration": 210.63, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uty4eGb_vvU", "annotations": []}, "Z6b4oFjIznU": {"duration": 68.87, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z6b4oFjIznU", "annotations": []}, "dc9m-G9CKmQ": {"duration": 137.463, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dc9m-G9CKmQ", "annotations": [{"segment": [42.89590126119596, 124.87758817708381], "label": "Sharpening knives"}]}, "AWryNQMbcd8": {"duration": 3.251, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AWryNQMbcd8", "annotations": [{"segment": [0.2, 3.001], "label": "Swinging at the playground"}]}, "r1y_ASZDdEo": {"duration": 188.31, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r1y_ASZDdEo", "annotations": [{"segment": [56.99357785959438, 60.51895380967239], "label": "Cricket"}, {"segment": [60.51895380967239, 62.869204443057725], "label": "Cricket"}, {"segment": [67.86348703900155, 72.27020697659907], "label": "Cricket"}, {"segment": [73.44533229329173, 78.43961488923557], "label": "Cricket"}, {"segment": [86.37171077691107, 112.81203040249609], "label": "Cricket"}]}, "zTzF5R_VhGk": {"duration": 203.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zTzF5R_VhGk", "annotations": [{"segment": [30.19769890795632, 197.39758970358815], "label": "Playing harmonica"}]}, "q_QR5HoROhI": {"duration": 166.14, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=q_QR5HoROhI", "annotations": []}, "m4EcgRjCpi8": {"duration": 186.039, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m4EcgRjCpi8", "annotations": [{"segment": [26.991515934477377, 163.9807150858034], "label": "Using the rowing machine"}]}, "IWoCFfIwv2g": {"duration": 76.53, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=IWoCFfIwv2g", "annotations": []}, "aKvCtSitxJY": {"duration": 96.061, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aKvCtSitxJY", "annotations": [{"segment": [18.88190324729848, 71.06881913514894], "label": "Hula hoop"}]}, "Hxr8DZcDuLI": {"duration": 122.602, "subset": "testing", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=Hxr8DZcDuLI", "annotations": []}, "wfUKi83iJCU": {"duration": 116.31, "subset": "training", "resolution": "630x360", "url": "https://www.youtube.com/watch?v=wfUKi83iJCU", "annotations": [{"segment": [26.491540789391575, 95.98647313416537], "label": "Playing water polo"}]}, "y_Ak7a3oXRY": {"duration": 34.041, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=y_Ak7a3oXRY", "annotations": [{"segment": [5.514341379334703, 21.686206959501558], "label": "Building sandcastles"}]}, "rgrzN8ELIxI": {"duration": 208.562, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rgrzN8ELIxI", "annotations": [{"segment": [25.053425252730108, 208.56163265306122], "label": "Surfing"}]}, "lh8ths6sKAE": {"duration": 32.369, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=lh8ths6sKAE", "annotations": [{"segment": [4.746723572542901, 18.633414875195005], "label": "Mooping floor"}]}, "GVa7TvByL5g": {"duration": 47.11, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GVa7TvByL5g", "annotations": []}, "9r5orsK1Tpw": {"duration": 73.38, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9r5orsK1Tpw", "annotations": []}, "LANB732DHbo": {"duration": 99.68, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LANB732DHbo", "annotations": [{"segment": [1.6317316638065522, 94.56273499297971], "label": "Using the balance beam"}]}, "dsYLNeumBig": {"duration": 79.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dsYLNeumBig", "annotations": [{"segment": [32.05079152825977, 62.005954266146645], "label": "Vacuuming floor"}]}, "R4oYA0Zu-m0": {"duration": 215.737, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R4oYA0Zu-m0", "annotations": [{"segment": [149.97526302191642, 210.3679866658263], "label": "Wakeboarding"}]}, "-ceM4HHukhE": {"duration": 210.977, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-ceM4HHukhE", "annotations": [{"segment": [122.10964699843994, 207.0268678221529], "label": "Using the rowing machine"}]}, "0kfJ7Lu4tvo": {"duration": 221.589, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0kfJ7Lu4tvo", "annotations": [{"segment": [55.483155520664724, 61.686241481858566], "label": "Changing car wheel"}, {"segment": [102.35091810394255, 124.06172002929246], "label": "Changing car wheel"}, {"segment": [145.0832901474884, 149.90791279756618], "label": "Changing car wheel"}, {"segment": [171.9633306264931, 174.37564195153197], "label": "Changing car wheel"}]}, "OzRxQDkrhTg": {"duration": 65.434, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OzRxQDkrhTg", "annotations": [{"segment": [0, 53.0391887675507], "label": "Hurling"}]}, "aa0MLYA8F7s": {"duration": 90.33, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aa0MLYA8F7s", "annotations": [{"segment": [23.098414976599063, 51.70119656786272], "label": "Using uneven bars"}]}, "pCEg_GoXoU4": {"duration": 220.079, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pCEg_GoXoU4", "annotations": [{"segment": [28.143653666146644, 57.66016848673947], "label": "Scuba diving"}, {"segment": [73.44807176287053, 112.57461466458658], "label": "Scuba diving"}, {"segment": [149.98508112324492, 204.5563120124805], "label": "Scuba diving"}]}, "jzCnWUUUviE": {"duration": 15.74, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jzCnWUUUviE", "annotations": [{"segment": [0.6254063962558501, 13.378791731669267], "label": "Clean and jerk"}]}, "u2hzK9YyIXY": {"duration": 43.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=u2hzK9YyIXY", "annotations": [{"segment": [2.1825612, 39.180219112324494], "label": "Cleaning shoes"}]}, "B_9S_qzlD38": {"duration": 15.419, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B_9S_qzlD38", "annotations": [{"segment": [0.01, 9.046235569422779], "label": "Building sandcastles"}]}, "hRuHqoXEvsI": {"duration": 28.004, "subset": "validation", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=hRuHqoXEvsI", "annotations": [{"segment": [0, 26.69265977379095], "label": "Baton twirling"}]}, "anfZnOQGPk0": {"duration": 122.28, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=anfZnOQGPk0", "annotations": [{"segment": [7.530573322932917, 37.73804188], "label": "Breakdancing"}, {"segment": [43.285703999999996, 110.67083073322932], "label": "Breakdancing"}]}, "hyv8OkQcgEA": {"duration": 137.532, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=hyv8OkQcgEA", "annotations": [{"segment": [0.01, 87.24261621460958], "label": "Blowing leaves"}, {"segment": [99.66301808376846, 137.281], "label": "Blowing leaves"}]}, "4IMiN1lM7JA": {"duration": 79.529, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4IMiN1lM7JA", "annotations": []}, "Kcy0A0DuckE": {"duration": 118.56, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Kcy0A0DuckE", "annotations": [{"segment": [10.81886919, 46.853323849999995], "label": "Dodgeball"}, {"segment": [10.81886919, 46.853323849999995], "label": "Dodgeball"}]}, "yB35e1u3pJs": {"duration": 176.727, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yB35e1u3pJs", "annotations": [{"segment": [62.115585287153465, 122.30724058068611], "label": "Putting in contact lenses"}]}, "hgLDMHCcw4k": {"duration": 93.229, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hgLDMHCcw4k", "annotations": [{"segment": [3.9777376046938318, 92.09768620282466], "label": "Kneeling"}]}, "aXI9v77XV7A": {"duration": 141.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aXI9v77XV7A", "annotations": [{"segment": [6.608424336973479, 132.38876755070203], "label": "Rock climbing"}]}, "eTflVlhe0YA": {"duration": 13.84, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eTflVlhe0YA", "annotations": []}, "aWKZSqbOi2Q": {"duration": 191.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aWKZSqbOi2Q", "annotations": [{"segment": [5.814914176287052, 104.28155775999998], "label": "Cheerleading"}, {"segment": [110.78157007644306, 188.31375909360375], "label": "Cheerleading"}]}, "1YWyQUYAyvs": {"duration": 37.97, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=1YWyQUYAyvs", "annotations": []}, "7o-2My6U3GU": {"duration": 107.88, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7o-2My6U3GU", "annotations": [{"segment": [10.43456208424337, 95.59405264274571], "label": "Shoveling snow"}]}, "HnM44lX65cQ": {"duration": 58.956, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HnM44lX65cQ", "annotations": [{"segment": [0, 56.62298907956318], "label": "Sharpening knives"}]}, "u1upxlAgsqM": {"duration": 47.07, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=u1upxlAgsqM", "annotations": [{"segment": [6.300405616224649, 41.68524180967239], "label": "Layup drill in basketball"}]}, "E3dV8LdAPx8": {"duration": 168.16, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E3dV8LdAPx8", "annotations": [{"segment": [8.2733736, 162.38746686193977], "label": "Volleyball"}]}, "YMbZ7RFRuYI": {"duration": 179.491, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YMbZ7RFRuYI", "annotations": [{"segment": [27.861590980646028, 37.66215062207931], "label": "BMX"}, {"segment": [58.383333863966804, 60.90347777176394], "label": "BMX"}, {"segment": [84.98485289071427, 87.50499609984399], "label": "BMX"}, {"segment": [111.30635503900156, 122.2269785522621], "label": "BMX"}]}, "Ia3Aa4AO-_E": {"duration": 99.99000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ia3Aa4AO-_E", "annotations": [{"segment": [35.40814808268331, 89.22229384711387], "label": "Brushing teeth"}]}, "OhydQlKoSUw": {"duration": 74.629, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OhydQlKoSUw", "annotations": [{"segment": [18.919122615244746, 61.065106533513045], "label": "Surfing"}]}, "Ieb7EkMxpJk": {"duration": 29.931, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ieb7EkMxpJk", "annotations": [{"segment": [11.234510140405616, 24.33366926677067], "label": "Grooming dog"}]}, "ean7ZR6XmG4": {"duration": 121.603, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ean7ZR6XmG4", "annotations": [{"segment": [38.70045523244929, 42.68432562402496], "label": "Rock-paper-scissors"}]}, "xgb-qIgWHw0": {"duration": 68.801, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xgb-qIgWHw0", "annotations": [{"segment": [0.2, 59.75019812792512], "label": "Playing drums"}]}, "RZkwDTyxMZI": {"duration": 109.37, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=RZkwDTyxMZI", "annotations": [{"segment": [34.3609368174727, 78.52710374414977], "label": "Ironing clothes"}]}, "3xUAyMuqBrQ": {"duration": 29.443, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3xUAyMuqBrQ", "annotations": [{"segment": [4.543960998439938, 8.812530421216849], "label": "Making an omelette"}, {"segment": [9.868198127925117, 13.03520124804992], "label": "Making an omelette"}]}, "mfENhAPlES8": {"duration": 31.464, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=mfENhAPlES8", "annotations": [{"segment": [0.2, 28.03870826833073], "label": "Putting in contact lenses"}]}, "pbsFHiYd57w": {"duration": 142.85, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pbsFHiYd57w", "annotations": []}, "i5jZVSzLbYQ": {"duration": 150.0, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=i5jZVSzLbYQ", "annotations": [{"segment": [63.06, 121.78747502808113], "label": "Breakdancing"}, {"segment": [8.882771691107646, 62.41315898751951], "label": "Breakdancing"}]}, "tqqWTxQ5-kY": {"duration": 50.02, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tqqWTxQ5-kY", "annotations": [{"segment": [20.744296648341848, 27.44422992871568], "label": "Getting a piercing"}]}, "NVR52Aed_7s": {"duration": 21.572, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NVR52Aed_7s", "annotations": [{"segment": [0.01, 21.47378939157566], "label": "Kneeling"}]}, "vwiaVgMqw5o": {"duration": 79.529, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vwiaVgMqw5o", "annotations": []}, "H80bs53Arrw": {"duration": 164.189, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=H80bs53Arrw", "annotations": [{"segment": [28.372602803738317, 122.69233644859813], "label": "Hula hoop"}]}, "Rx9SIyFMfcY": {"duration": 160.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Rx9SIyFMfcY", "annotations": [{"segment": [1.2493759750390014, 160.17], "label": "Spinning"}]}, "5jfQNanBKAM": {"duration": 69.126, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5jfQNanBKAM", "annotations": [{"segment": [4.205782206798475, 68.26308043342142], "label": "Canoeing"}]}, "M4npKXFKxPA": {"duration": 28.75, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=M4npKXFKxPA", "annotations": [{"segment": [14.171344380655226, 25.472543063962558], "label": "Using uneven bars"}]}, "5_M10vevgJg": {"duration": 206.89, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=5_M10vevgJg", "annotations": [{"segment": [0.01, 204.30770798178867], "label": "Waxing skis"}]}, "cp52LdlmlUk": {"duration": 129.96, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=cp52LdlmlUk", "annotations": [{"segment": [0, 129.962107], "label": "Doing step aerobics"}]}, "EKyV_WFsJH0": {"duration": 129.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EKyV_WFsJH0", "annotations": [{"segment": [12.283718254852642, 89.61073153130208], "label": "Layup drill in basketball"}]}, "ELLVAzDqshU": {"duration": 44.79, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ELLVAzDqshU", "annotations": []}, "f4IL30BPe2w": {"duration": 132.656, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f4IL30BPe2w", "annotations": [{"segment": [0.01, 132.621], "label": "Disc dog"}]}, "PZj0ODkGrUg": {"duration": 211.581, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PZj0ODkGrUg", "annotations": []}, "rBQO0Nf05vg": {"duration": 76.719, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rBQO0Nf05vg", "annotations": [{"segment": [7.767474975452279, 67.87577970093457], "label": "Installing carpet"}]}, "WXMb7XF6k8o": {"duration": 236.473, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WXMb7XF6k8o", "annotations": [{"segment": [55.33657019750177, 205.8520411347066], "label": "Blow-drying hair"}]}, "KgM8_YBJbM4": {"duration": 58.31, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=KgM8_YBJbM4", "annotations": [{"segment": [0, 58.305306], "label": "Spinning"}]}, "1Z3mjpu1I2A": {"duration": 152.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1Z3mjpu1I2A", "annotations": [{"segment": [7.258857254290172, 140.77423166926678], "label": "Washing dishes"}]}, "Gfsk28SzgXk": {"duration": 139.065, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Gfsk28SzgXk", "annotations": [{"segment": [99.79653956226278, 138.4134613928775], "label": "Waterskiing"}]}, "oEdRUyN6Kfo": {"duration": 41.471, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oEdRUyN6Kfo", "annotations": [{"segment": [22.56304056162247, 29.868552262090486], "label": "Hula hoop"}]}, "sD4B_NfJ-II": {"duration": 176.031, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sD4B_NfJ-II", "annotations": []}, "wD-9KvI1-AI": {"duration": 115.589, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wD-9KvI1-AI", "annotations": [{"segment": [40.93399066770671, 90.52362693915757], "label": "Gargling mouthwash"}]}, "5M-WpzuvSRw": {"duration": 28.91, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5M-WpzuvSRw", "annotations": []}, "_0sDAdyMtnk": {"duration": 126.25, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=_0sDAdyMtnk", "annotations": [{"segment": [7.2872385460218405, 17.528762989079564], "label": "Playing violin"}, {"segment": [21.07390606552262, 24.816001535101403], "label": "Playing violin"}, {"segment": [28.164192218408733, 59.873527513260534], "label": "Playing violin"}, {"segment": [79.1748620405616, 82.52305272386896], "label": "Playing violin"}, {"segment": [90.5981008424337, 122.50438853042121], "label": "Playing violin"}]}, "lGAK_3Jp2I8": {"duration": 40.404, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lGAK_3Jp2I8", "annotations": [{"segment": [0.2, 40.201], "label": "Doing a powerbomb"}, {"segment": [13.11633073322933, 15.134227769110765], "label": "Doing a powerbomb"}]}, "38wV6msixSE": {"duration": 218.13, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=38wV6msixSE", "annotations": []}, "hvrKRg166eQ": {"duration": 186.06, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=hvrKRg166eQ", "annotations": []}, "_akwfjIbsUA": {"duration": 26.216, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_akwfjIbsUA", "annotations": [{"segment": [8.84716321802827, 26.01532879818594], "label": "Fun sliding down"}]}, "AUPs7Ukfc1I": {"duration": 136.32999999999998, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AUPs7Ukfc1I", "annotations": [{"segment": [9.570353939157567, 112.29215288611545], "label": "Shaving"}]}, "3CA0DKDv060": {"duration": 192.633, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3CA0DKDv060", "annotations": []}, "nD4VObLUOuE": {"duration": 92.02000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nD4VObLUOuE", "annotations": [{"segment": [5.02704104524181, 90.48673881435258], "label": "Hammer throw"}]}, "jEhjwT1aiLM": {"duration": 192.006, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jEhjwT1aiLM", "annotations": []}, "ycYPyXN0CPg": {"duration": 47.091, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ycYPyXN0CPg", "annotations": [{"segment": [0.01, 46.45899624836966], "label": "Painting fence"}]}, "NK0WynwKc34": {"duration": 19.714, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=NK0WynwKc34", "annotations": [{"segment": [0, 19.51374149659864], "label": "Playing congas"}]}, "-EKpX0t44pQ": {"duration": 132.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-EKpX0t44pQ", "annotations": [{"segment": [20.71154446177847, 97.56607020280812], "label": "Getting a tattoo"}]}, "k8YJwiYJI9g": {"duration": 101.75, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k8YJwiYJI9g", "annotations": []}, "RogHmanNFRU": {"duration": 49.78, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RogHmanNFRU", "annotations": []}, "v_UmAH95xls": {"duration": 117.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=v_UmAH95xls", "annotations": [{"segment": [0.01, 117.05179138321995], "label": "Shaving legs"}]}, "H1_5M9mQ79w": {"duration": 166.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H1_5M9mQ79w", "annotations": [{"segment": [6.758620904836194, 159.08753822152886], "label": "Playing harmonica"}]}, "DHfiz3MNbcc": {"duration": 70.87, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=DHfiz3MNbcc", "annotations": [{"segment": [0, 63.4599535101404], "label": "Mixing drinks"}]}, "s3EHHOAkQYE": {"duration": 238.32999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s3EHHOAkQYE", "annotations": [{"segment": [31.04598948991973, 215.09107688525228], "label": "Fixing bicycle"}]}, "8LB2tdMOZ6g": {"duration": 40.96, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8LB2tdMOZ6g", "annotations": [{"segment": [11.363397815912636, 25.025010920436817], "label": "Playing beach volleyball"}, {"segment": [27.642422776911076, 39.00582059282372], "label": "Playing beach volleyball"}]}, "FKtYeu2bJpA": {"duration": 151.51, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FKtYeu2bJpA", "annotations": []}, "e4YsOfQR3sI": {"duration": 160.5, "subset": "training", "resolution": "320x176", "url": "https://www.youtube.com/watch?v=e4YsOfQR3sI", "annotations": [{"segment": [5.5084542647012, 13.771135661753], "label": "Playing piano"}]}, "yowQI5JxGqo": {"duration": 121.766, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=yowQI5JxGqo", "annotations": []}, "Ye6_xb7h7jc": {"duration": 103.747, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ye6_xb7h7jc", "annotations": [{"segment": [0.01, 96.6770195611615], "label": "Decorating the Christmas tree"}]}, "rzm4V_McRhQ": {"duration": 69.2, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rzm4V_McRhQ", "annotations": [{"segment": [7.281610729329172, 62.5139765577223], "label": "Mixing drinks"}]}, "rA7qVOp47Qg": {"duration": 154.82999999999998, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=rA7qVOp47Qg", "annotations": [{"segment": [5.190075380655227, 152.44337687831515], "label": "Horseback riding"}]}, "zL7Rz4I8UyA": {"duration": 27.56, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zL7Rz4I8UyA", "annotations": [{"segment": [4.127863139015357, 14.146530879875197], "label": "Playing polo"}]}, "bLpeUxij7YY": {"duration": 66.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bLpeUxij7YY", "annotations": [{"segment": [2.2862496099843996, 64.95027301092044], "label": "Snatch"}]}, "BB8YBkGNgVU": {"duration": 237.98, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BB8YBkGNgVU", "annotations": []}, "xFDQTAemadA": {"duration": 114.498, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xFDQTAemadA", "annotations": [{"segment": [0.01, 114.49759637188208], "label": "Calf roping"}]}, "ZNsX4KYJlVQ": {"duration": 10.566, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZNsX4KYJlVQ", "annotations": [{"segment": [0.8570734914196568, 2.307505553822153], "label": "Throwing darts"}, {"segment": [5.735799519500781, 7.730143605304212], "label": "Throwing darts"}]}, "8r167TmBebg": {"duration": 78.623, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8r167TmBebg", "annotations": [{"segment": [0.01, 78.561], "label": "Tug of war"}]}, "eLcqu3-3lw4": {"duration": 86.472, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eLcqu3-3lw4", "annotations": []}, "Rvx2EoMScKg": {"duration": 130.66, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Rvx2EoMScKg", "annotations": [{"segment": [12.736056942277692, 87.72596021840874], "label": "Playing piano"}]}, "UGWOtDmIcns": {"duration": 89.05, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UGWOtDmIcns", "annotations": []}, "I8jhEprzTN8": {"duration": 202.246, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I8jhEprzTN8", "annotations": [{"segment": [9.149966216335729, 171.95626165182662], "label": "Beach soccer"}]}, "E0DbrJVJUho": {"duration": 184.62, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=E0DbrJVJUho", "annotations": [{"segment": [46.05329173166927, 152.55152886115445], "label": "Cleaning windows"}]}, "lMFp8FPeDHI": {"duration": 124.483, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lMFp8FPeDHI", "annotations": [{"segment": [0.01, 124.441], "label": "Using the monkey bar"}]}, "IWXIEfaltJ4": {"duration": 228.88, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IWXIEfaltJ4", "annotations": [{"segment": [44.99016714954312, 76.05480637184671], "label": "Plastering"}, {"segment": [90.33739911773341, 228.878548], "label": "Plastering"}]}, "TkNCytAQ5cw": {"duration": 100.53999999999999, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TkNCytAQ5cw", "annotations": [{"segment": [12.184113199999999, 29.88478236], "label": "Polishing shoes"}, {"segment": [63.45105304212168, 98.54496879875194], "label": "Polishing shoes"}]}, "ukPz_13Agis": {"duration": 38.71, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ukPz_13Agis", "annotations": [{"segment": [0.01, 34.1853143525741], "label": "Layup drill in basketball"}]}, "f14r3tC-Y4E": {"duration": 203.11, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=f14r3tC-Y4E", "annotations": [{"segment": [0.950569157460176, 195.18353366515612], "label": "Sailing"}]}, "DOI6tsATsE4": {"duration": 6.46, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=DOI6tsATsE4", "annotations": []}, "qxQWNu_MN94": {"duration": 6.97, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qxQWNu_MN94", "annotations": [{"segment": [1.7240561622464898, 4.525647425897036], "label": "Plataform diving"}]}, "wJaFA5a6YAg": {"duration": 52.87, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=wJaFA5a6YAg", "annotations": []}, "TVbPV7X49tw": {"duration": 94.0, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TVbPV7X49tw", "annotations": [{"segment": [3.114512102395946, 93.15585263977913], "label": "Doing motocross"}]}, "Q-dFr9ZS29s": {"duration": 96.92, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Q-dFr9ZS29s", "annotations": [{"segment": [0.01, 96.920112], "label": "Removing curlers"}]}, "W01QffZYsxQ": {"duration": 18.391, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=W01QffZYsxQ", "annotations": [{"segment": [1.174160686427457, 17.09692511700468], "label": "Doing crunches"}]}, "eQ9cmxPKvT8": {"duration": 225.13, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eQ9cmxPKvT8", "annotations": [{"segment": [25.565561955147533, 202.77346573081996], "label": "Rollerblading"}]}, "cpy_BFGQoQ8": {"duration": 93.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cpy_BFGQoQ8", "annotations": [{"segment": [8.514359634301728, 54.731002332121044], "label": "Slacklining"}]}, "yAa24V7sXJM": {"duration": 203.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yAa24V7sXJM", "annotations": [{"segment": [22.17487556645123, 24.075579186432766], "label": "Washing hands"}, {"segment": [28.827338236386602, 32.311961539686074], "label": "Washing hands"}, {"segment": [34.212665159667615, 71.27638574930752], "label": "Washing hands"}, {"segment": [161.5598076984304, 165.04443067550704], "label": "Washing hands"}, {"segment": [176.76543630889236, 186.58573832605305], "label": "Washing hands"}]}, "pNcQf-aDNP0": {"duration": 144.196, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pNcQf-aDNP0", "annotations": []}, "vBpYwyXfE0o": {"duration": 200.458, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vBpYwyXfE0o", "annotations": [{"segment": [84.0912464898596, 198.5251716068643], "label": "Skiing"}]}, "EZyLBq2jJtE": {"duration": 62.63, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EZyLBq2jJtE", "annotations": []}, "p8tlnMtsTf8": {"duration": 123.136, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=p8tlnMtsTf8", "annotations": [{"segment": [29.19903863365419, 106.23071292375505], "label": "Surfing"}]}, "vZmDiMGYCo0": {"duration": 134.003, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vZmDiMGYCo0", "annotations": [{"segment": [54.135002499125825, 116.74294642435947], "label": "Fun sliding down"}]}, "P06Slwn3JG0": {"duration": 65.667, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=P06Slwn3JG0", "annotations": [{"segment": [10.654083153802343, 26.942537206250154], "label": "Raking leaves"}, {"segment": [29.093842458460244, 36.572189287571504], "label": "Raking leaves"}]}, "kLpYpfT5bok": {"duration": 215.04, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=kLpYpfT5bok", "annotations": [{"segment": [30.48069945112001, 192.93277894335304], "label": "Capoeira"}]}, "0Q9lJXWwjFg": {"duration": 93.75999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0Q9lJXWwjFg", "annotations": []}, "Jj7Xcisw62E": {"duration": 99.683, "subset": "training", "resolution": "320x218", "url": "https://www.youtube.com/watch?v=Jj7Xcisw62E", "annotations": [{"segment": [0, 62.51547166989079], "label": "Arm wrestling"}]}, "ExNd4o46kao": {"duration": 170.203, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ExNd4o46kao", "annotations": [{"segment": [0, 47.529182204368176], "label": "Shuffleboard"}, {"segment": [54.69838845865835, 58.68128082215289], "label": "Shuffleboard"}, {"segment": [89.48231509984399, 93.73073362090484], "label": "Shuffleboard"}, {"segment": [142.32202045553822, 147.63254360686426], "label": "Shuffleboard"}, {"segment": [153.47411907332292, 158.2535899095164], "label": "Shuffleboard"}, {"segment": [161.43990380031204, 166.48490079407176], "label": "Shuffleboard"}]}, "lVOBMs6op7o": {"duration": 11.01, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lVOBMs6op7o", "annotations": [{"segment": [0.01, 10.961], "label": "Drinking coffee"}]}, "cUdIbmXb2yI": {"duration": 102.49000000000001, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=cUdIbmXb2yI", "annotations": [{"segment": [33.73790556786272, 71.95287917316692], "label": "Cumbia"}, {"segment": [73.23204146957879, 73.71172733073324], "label": "Cumbia"}]}, "b7KUDaXZTfU": {"duration": 116.449, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b7KUDaXZTfU", "annotations": []}, "6KG4p_pfMQc": {"duration": 12.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6KG4p_pfMQc", "annotations": [{"segment": [0.01, 12.260135], "label": "Playing kickball"}]}, "7QA5qyCXwyg": {"duration": 238.353, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7QA5qyCXwyg", "annotations": [{"segment": [0.01, 238.35283446712018], "label": "Playing congas"}]}, "zg6BRB4a3Fo": {"duration": 84.68, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zg6BRB4a3Fo", "annotations": [{"segment": [30.081154297005725, 41.15044401663189], "label": "Spinning"}, {"segment": [53.142174546226904, 61.83947361164746], "label": "Spinning"}, {"segment": [70.27321815993407, 82.00139417239512], "label": "Spinning"}]}, "02fTz-0Kra8": {"duration": 170.226, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=02fTz-0Kra8", "annotations": []}, "jr7JA5eKkwY": {"duration": 186.016, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jr7JA5eKkwY", "annotations": [{"segment": [0.01, 173.50489547581904], "label": "Blow-drying hair"}]}, "I3DxLmTo9Y4": {"duration": 196.12, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=I3DxLmTo9Y4", "annotations": []}, "rrKGM5hck1A": {"duration": 99.15, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rrKGM5hck1A", "annotations": [{"segment": [6.032480330733229, 99.14922902494331], "label": "Tango"}]}, "U-mwyUsNOMU": {"duration": 204.847, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=U-mwyUsNOMU", "annotations": [{"segment": [0.2, 162.66277214040562], "label": "Ice fishing"}]}, "PPJ6gSuk33o": {"duration": 69.96000000000001, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=PPJ6gSuk33o", "annotations": [{"segment": [0, 69.961722], "label": "Shaving legs"}]}, "BC1VpNRFtXo": {"duration": 155.806, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BC1VpNRFtXo", "annotations": [{"segment": [24.811650550592727, 51.25535615806937], "label": "Arm wrestling"}]}, "ZKo1lM6TJTY": {"duration": 12.03, "subset": "training", "resolution": "252x240", "url": "https://www.youtube.com/watch?v=ZKo1lM6TJTY", "annotations": [{"segment": [0.2908471021840874, 10.423584855694227], "label": "Hammer throw"}]}, "OfwGarnPTcU": {"duration": 107.207, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OfwGarnPTcU", "annotations": []}, "qPZwXF1Xcpw": {"duration": 109.41, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qPZwXF1Xcpw", "annotations": [{"segment": [20.99489609672387, 25.944912249609985], "label": "Shot put"}, {"segment": [61.61916659282372, 64.52090019968799], "label": "Shot put"}, {"segment": [92.85547541965678, 96.95204051170046], "label": "Shot put"}]}, "Y_ZFCDVhS-4": {"duration": 210.791, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y_ZFCDVhS-4", "annotations": []}, "3AZgr5HkSDY": {"duration": 226.79, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3AZgr5HkSDY", "annotations": [{"segment": [13.090801857924657, 206.97619057722312], "label": "Doing step aerobics"}]}, "LjPKKqe-VO0": {"duration": 43.1, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LjPKKqe-VO0", "annotations": []}, "u7OvguFW-Hs": {"duration": 17.462, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u7OvguFW-Hs", "annotations": [{"segment": [5.448020592823713, 14.682415497659907], "label": "Doing a powerbomb"}]}, "B9ZdRhm2yQs": {"duration": 168.972, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=B9ZdRhm2yQs", "annotations": []}, "rggwT0-G5sk": {"duration": 78.55, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rggwT0-G5sk", "annotations": [{"segment": [0.612738728549142, 78.35310657596372], "label": "Playing squash"}]}, "L2J_QI2_qms": {"duration": 142.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L2J_QI2_qms", "annotations": [{"segment": [12.2125462, 127.69554542433697], "label": "Tumbling"}]}, "tmBTO0cA_qw": {"duration": 203.663, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tmBTO0cA_qw", "annotations": [{"segment": [31.44848517940718, 48.919865834633384], "label": "Sharpening knives"}, {"segment": [60.03801716068643, 77.50939781591264], "label": "Sharpening knives"}, {"segment": [93.71013260530421, 121.02901872074884], "label": "Sharpening knives"}, {"segment": [122.9349875195008, 160.73670202808114], "label": "Sharpening knives"}, {"segment": [162.00734789391575, 169.94888455538222], "label": "Sharpening knives"}, {"segment": [177.57275975039002, 200.44438533541341], "label": "Sharpening knives"}]}, "wB9LBEHR5-c": {"duration": 176.923, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=wB9LBEHR5-c", "annotations": [{"segment": [4.139648985959439, 169.74479719188767], "label": "Riding bumper cars"}]}, "_1Zoq6-OrRQ": {"duration": 116.1, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=_1Zoq6-OrRQ", "annotations": []}, "zB8knKX0W8Q": {"duration": 147.96, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zB8knKX0W8Q", "annotations": []}, "iJWmjVjBNzE": {"duration": 94.831, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iJWmjVjBNzE", "annotations": [{"segment": [18.16966100787127, 72.29519473406262], "label": "Decorating the Christmas tree"}]}, "pn41XETdQB4": {"duration": 229.72, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pn41XETdQB4", "annotations": [{"segment": [19.706131045241808, 36.187622464898595], "label": "Doing karate"}, {"segment": [76.67476443057723, 112.14580031201247], "label": "Doing karate"}, {"segment": [160.51539469578785, 173.41395319812793], "label": "Doing karate"}, {"segment": [187.7456848673947, 218.91720124804993], "label": "Doing karate"}]}, "pi2AaadXnnI": {"duration": 144.96, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pi2AaadXnnI", "annotations": [{"segment": [1.3565148205928237, 141.9818845553822], "label": "Hand washing clothes"}]}, "hR6VFvMXCN4": {"duration": 147.63, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hR6VFvMXCN4", "annotations": [{"segment": [16.574918017128848, 76.19858051482059], "label": "Long jump"}, {"segment": [118.09628943837754, 123.85146923556944], "label": "Long jump"}]}, "jBnNbI48KjQ": {"duration": 174.336, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jBnNbI48KjQ", "annotations": [{"segment": [43.508553430991014, 57.081840035352386], "label": "Surfing"}, {"segment": [61.42529174874802, 67.66900358675426], "label": "Surfing"}, {"segment": [80.15642726276671, 85.31427617242403], "label": "Surfing"}, {"segment": [111.38776753466597, 114.9172270362236], "label": "Surfing"}, {"segment": [141.79541862500864, 147.2253563197127], "label": "Surfing"}, {"segment": [154.01277843809277, 159.714213017532], "label": "Surfing"}]}, "ihdkXBpzKbE": {"duration": 193.52, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ihdkXBpzKbE", "annotations": [{"segment": [9.056869050272216, 119.55067146359325], "label": "Washing dishes"}, {"segment": [128.60754051386544, 166.34449485491422], "label": "Washing dishes"}]}, "Z2KHO87wHzg": {"duration": 110.226, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Z2KHO87wHzg", "annotations": [{"segment": [87.0107835226209, 89.93407071606863], "label": "Applying sunscreen"}]}, "aIwFZCRFHx8": {"duration": 148.33, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aIwFZCRFHx8", "annotations": [{"segment": [25.90816848673947, 135.09259282371295], "label": "Riding bumper cars"}]}, "CteuM2BUmHo": {"duration": 106.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CteuM2BUmHo", "annotations": [{"segment": [4.523949079648085, 102.26766097769911], "label": "Kneeling"}]}, "W1krUTxgsMc": {"duration": 143.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W1krUTxgsMc", "annotations": [{"segment": [34.98220904836194, 136.78940717628706], "label": "Playing guitarra"}]}, "pqVWGi0d4RU": {"duration": 203.268, "subset": "validation", "resolution": "1920x1072", "url": "https://www.youtube.com/watch?v=pqVWGi0d4RU", "annotations": [{"segment": [12.045644305772232, 194.96171606864274], "label": "Playing ice hockey"}]}, "fYIr-1aIHmA": {"duration": 117.029, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fYIr-1aIHmA", "annotations": [{"segment": [4.199137649859736, 50.20708059614902], "label": "Waterskiing"}, {"segment": [55.136503054680006, 112.82900237129485], "label": "Waterskiing"}]}, "djgM0IWIBKc": {"duration": 235.985, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=djgM0IWIBKc", "annotations": [{"segment": [0.01, 78.04788238332254], "label": "Canoeing"}, {"segment": [92.4057475387451, 94.24649948174799], "label": "Canoeing"}, {"segment": [158.30466709824856, 205.79606627457096], "label": "Canoeing"}, {"segment": [217.9450290421217, 235.984398], "label": "Canoeing"}]}, "5YLDQMvskH8": {"duration": 185.203, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5YLDQMvskH8", "annotations": []}, "CE93IieQkAg": {"duration": 148.004, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CE93IieQkAg", "annotations": [{"segment": [15.008205008472448, 144.77145355694228], "label": "Cutting the grass"}]}, "f4mDKdD0z3U": {"duration": 81.64, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=f4mDKdD0z3U", "annotations": [{"segment": [58.97019796879874, 73.99931969734789], "label": "Bungee jumping"}]}, "bSZnvk2Cx28": {"duration": 101.17, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bSZnvk2Cx28", "annotations": [{"segment": [14.274010140405617, 38.24803822152886], "label": "Capoeira"}, {"segment": [37.62458424336973, 70.91090405616224], "label": "Capoeira"}]}, "cqkcRy3k4v0": {"duration": 29.652, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cqkcRy3k4v0", "annotations": [{"segment": [12.107179407176286, 29.401], "label": "Camel ride"}]}, "pmlK-IV4vko": {"duration": 19.97, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pmlK-IV4vko", "annotations": [{"segment": [0.3033835, 18.733229329173167], "label": "Pole vault"}]}, "NL6UDkBKrgM": {"duration": 41.66, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NL6UDkBKrgM", "annotations": []}, "QoRlOa6R1LY": {"duration": 62.69, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QoRlOa6R1LY", "annotations": []}, "dSww-S8qyCM": {"duration": 94.25, "subset": "validation", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=dSww-S8qyCM", "annotations": [{"segment": [9.995363494539783, 82.4617488299532], "label": "Long jump"}]}, "UKboc2GEA6U": {"duration": 61.394, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UKboc2GEA6U", "annotations": []}, "mmgoptOJM0s": {"duration": 13.79, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mmgoptOJM0s", "annotations": [{"segment": [0.5259664586583462, 8.082709048361933], "label": "Ping-pong"}]}, "Jvxk68D3F2w": {"duration": 183.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Jvxk68D3F2w", "annotations": [{"segment": [4.578201410296412, 173.3993784149766], "label": "Paintball"}]}, "FnbVnRX6WxQ": {"duration": 94.227, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FnbVnRX6WxQ", "annotations": [{"segment": [11.650125143255824, 86.04170834698833], "label": "Doing crunches"}]}, "1ftLLKrC81s": {"duration": 220.32999999999998, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1ftLLKrC81s", "annotations": [{"segment": [35.06096856786271, 197.64761692667705], "label": "Tango"}]}, "L0QdLXym4F4": {"duration": 30.697, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L0QdLXym4F4", "annotations": [{"segment": [0.2, 18.128734789391576], "label": "Using the monkey bar"}]}, "cam-KNnddec": {"duration": 166.093, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cam-KNnddec", "annotations": []}, "fXdw7jwiP8g": {"duration": 57.56, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=fXdw7jwiP8g", "annotations": [{"segment": [4.796498932137286, 49.44428338455538], "label": "Cumbia"}]}, "hlFJj2dT3sU": {"duration": 41.68, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=hlFJj2dT3sU", "annotations": [{"segment": [0, 38.717684867394695], "label": "Tennis serve with ball bouncing"}]}, "P9jIpcRGeOk": {"duration": 65.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P9jIpcRGeOk", "annotations": [{"segment": [13.410560844202475, 29.380388347893916], "label": "Sumo"}, {"segment": [38.798491929797194, 46.066810998439934], "label": "Sumo"}]}, "BZAzrFF5emE": {"duration": 183.206, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BZAzrFF5emE", "annotations": [{"segment": [85.43871332808997, 99.7070358180413], "label": "Longboarding"}, {"segment": [110.83632689377544, 116.8290223143362], "label": "Longboarding"}, {"segment": [168.19498306199975, 175.32914427695303], "label": "Longboarding"}]}, "jHbf_E8aY1k": {"duration": 141.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jHbf_E8aY1k", "annotations": [{"segment": [43.78558907488299, 73.9294368299532], "label": "Bungee jumping"}, {"segment": [76.12971768798752, 78.77005471762871], "label": "Bungee jumping"}]}, "TEh6gfRUFZQ": {"duration": 109.088, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TEh6gfRUFZQ", "annotations": [{"segment": [48.98291419656786, 57.15721684867395], "label": "Removing ice from car"}, {"segment": [63.79153666146646, 87.60704368174727], "label": "Removing ice from car"}]}, "psgIH8U1adg": {"duration": 121.069, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=psgIH8U1adg", "annotations": [{"segment": [4.5995523125597995, 44.37436695430435], "label": "Swinging at the playground"}, {"segment": [49.65254140913301, 63.413496237793446], "label": "Swinging at the playground"}]}, "jVC3DZdphYM": {"duration": 167.3, "subset": "validation", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=jVC3DZdphYM", "annotations": [{"segment": [6.785950240730718, 80.12641245785886], "label": "Playing harmonica"}, {"segment": [104.39923447278028, 136.7629969609984], "label": "Playing harmonica"}]}, "sMITf5WBIxM": {"duration": 158.45, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sMITf5WBIxM", "annotations": [{"segment": [21.506097196567865, 132.99172749141965], "label": "Brushing hair"}]}, "oJ9BFy1KNlY": {"duration": 42.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oJ9BFy1KNlY", "annotations": [{"segment": [20.34806968798752, 40.23666683463338], "label": "Clean and jerk"}]}, "ORI5ZNZARw8": {"duration": 23.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ORI5ZNZARw8", "annotations": [{"segment": [3.355834633385335, 22.85696255850234], "label": "Using the rowing machine"}]}, "BYLxSOPFOuc": {"duration": 489.384, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BYLxSOPFOuc", "annotations": [{"segment": [345.04382527301095, 413.7472418096724], "label": "Removing ice from car"}]}, "1jPEEuKPfKs": {"duration": 82.57, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1jPEEuKPfKs", "annotations": []}, "n25mDmcBC6E": {"duration": 185.64, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=n25mDmcBC6E", "annotations": [{"segment": [0, 185.652], "label": "Playing flauta"}]}, "55rDKtgbGqI": {"duration": 187.11, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=55rDKtgbGqI", "annotations": []}, "xunKd050v7U": {"duration": 63.066, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xunKd050v7U", "annotations": [{"segment": [5.9031381663429805, 14.167531207488299], "label": "Making a lemonade"}]}, "N-1b20gDnCM": {"duration": 184.32, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=N-1b20gDnCM", "annotations": [{"segment": [27.024046801872075, 164.15670982839313], "label": "Canoeing"}]}, "nEcOF04KK0g": {"duration": 37.48, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nEcOF04KK0g", "annotations": [{"segment": [9.112006240249611, 12.733444617784713], "label": "Polishing shoes"}, {"segment": [15.770780031201248, 18.57447425897036], "label": "Polishing shoes"}]}, "L35c6QI1l_0": {"duration": 105.373, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L35c6QI1l_0", "annotations": [{"segment": [19.059650546021842, 100.90385959438377], "label": "Camel ride"}]}, "2fMpsSrmeIA": {"duration": 58.701, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2fMpsSrmeIA", "annotations": [{"segment": [4.578742355694228, 55.402782503900156], "label": "Playing pool"}]}, "EE_cxG4E-Q0": {"duration": 93.856, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=EE_cxG4E-Q0", "annotations": []}, "c1tbdVxIhH4": {"duration": 68.639, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c1tbdVxIhH4", "annotations": [{"segment": [0.2, 68.43818594104309], "label": "Carving jack-o-lanterns"}]}, "gVMG_FHDrvo": {"duration": 7.849, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gVMG_FHDrvo", "annotations": [{"segment": [0.01, 6.81474674756059], "label": "Doing a powerbomb"}]}, "9FK7tjzBKio": {"duration": 50.6, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9FK7tjzBKio", "annotations": [{"segment": [7.104001872074883, 9.550935850234008], "label": "Playing squash"}, {"segment": [11.445336349453978, 32.046941778471144], "label": "Playing squash"}]}, "DxtISmKol2I": {"duration": 128.382, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DxtISmKol2I", "annotations": [{"segment": [38.1493709616179, 46.3385036023644], "label": "Sumo"}, {"segment": [117.24440817468167, 124.434866103142], "label": "Sumo"}]}, "ZyDAF0AU3Ck": {"duration": 15.65, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ZyDAF0AU3Ck", "annotations": []}, "_Dkkr7L00zA": {"duration": 102.563, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_Dkkr7L00zA", "annotations": []}, "FYhB2rQwfCc": {"duration": 103.61, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FYhB2rQwfCc", "annotations": [{"segment": [12.924929797191888, 96.93697347893917], "label": "Playing racquetball"}]}, "sWtwatYMbX0": {"duration": 112.47999999999999, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=sWtwatYMbX0", "annotations": [{"segment": [28.338704568642747, 83.08592330187207], "label": "Mixing drinks"}]}, "ABB755sPZfY": {"duration": 12.818, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ABB755sPZfY", "annotations": [{"segment": [0.2, 12.617414965986395], "label": "Rope skipping"}]}, "3hSPAtKORgk": {"duration": 81.131, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3hSPAtKORgk", "annotations": [{"segment": [42.28448436520879, 81.130521], "label": "Futsal"}]}, "zmmiX3_TJ84": {"duration": 210.61, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=zmmiX3_TJ84", "annotations": [{"segment": [6.898254290171607, 199.06390951638065], "label": "Zumba"}]}, "Xf0c2abFH3Y": {"duration": 93.11, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Xf0c2abFH3Y", "annotations": [{"segment": [27.44189219911522, 31.21152771313391], "label": "Painting"}, {"segment": [49.62474733930214, 57.30900434864793], "label": "Painting"}]}, "VxBRMCxxcG0": {"duration": 157.618, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VxBRMCxxcG0", "annotations": []}, "UqU_mAjgknQ": {"duration": 77.787, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UqU_mAjgknQ", "annotations": [{"segment": [8.980072820592824, 68.44271717316693], "label": "Sumo"}]}, "ER0UPx6-VcM": {"duration": 78.07, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ER0UPx6-VcM", "annotations": [{"segment": [22.378820592823715, 73.23644617784711], "label": "Playing racquetball"}]}, "CRDBKk44RWg": {"duration": 143.5, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CRDBKk44RWg", "annotations": [{"segment": [63.55352886115445, 139.41495943837754], "label": "Tennis serve with ball bouncing"}]}, "nkrA8sJydF0": {"duration": 216.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nkrA8sJydF0", "annotations": [{"segment": [38.09228237129485, 56.295673946957876], "label": "Vacuuming floor"}, {"segment": [102.81545241809671, 108.54614976599062], "label": "Vacuuming floor"}]}, "-kky8-yaEPg": {"duration": 37.803, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=-kky8-yaEPg", "annotations": [{"segment": [0.01, 36.17694872536044], "label": "Making an omelette"}]}, "4kNx9rVwmAg": {"duration": 191.91, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=4kNx9rVwmAg", "annotations": [{"segment": [7.15256024, 99.52172496000001], "label": "Doing step aerobics"}, {"segment": [111.73500464, 178.6948595943838], "label": "Doing step aerobics"}]}, "udSHsodv3gY": {"duration": 165.65, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=udSHsodv3gY", "annotations": []}, "FhbsTTHT2Zs": {"duration": 71.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FhbsTTHT2Zs", "annotations": [{"segment": [0.01, 70.96], "label": "Grooming horse"}]}, "Jd3D2d-RwL0": {"duration": 114.497, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Jd3D2d-RwL0", "annotations": [{"segment": [22.483628705148206, 40.86310296411857], "label": "Blowing leaves"}]}, "xQqladBKjFU": {"duration": 220.38, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xQqladBKjFU", "annotations": []}, "0gw1Qq3WRbU": {"duration": 114.8, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0gw1Qq3WRbU", "annotations": [{"segment": [0.01, 114.79945578231292], "label": "Springboard diving"}]}, "wnNiWoJxfSQ": {"duration": 228.07, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wnNiWoJxfSQ", "annotations": []}, "zXe-F6ebHGM": {"duration": 192.656, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=zXe-F6ebHGM", "annotations": []}, "VTuf9nQkbCk": {"duration": 128.732, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VTuf9nQkbCk", "annotations": []}, "8Q-P5KEvXN0": {"duration": 46.139, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8Q-P5KEvXN0", "annotations": [{"segment": [0.01, 7.479725429017161], "label": "Futsal"}, {"segment": [9.277736349453978, 46.101], "label": "Futsal"}]}, "GbvZWitR2_g": {"duration": 160.775, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GbvZWitR2_g", "annotations": [{"segment": [0, 2.508190150735281], "label": "Kite flying"}, {"segment": [7.524570452205843, 159.52089292979718], "label": "Kite flying"}]}, "cZWgq6ATrRI": {"duration": 60.16, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cZWgq6ATrRI", "annotations": [{"segment": [37.78232995319813, 50.048712168486745], "label": "Putting on makeup"}]}, "I1P5GuVHFQA": {"duration": 205.567, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I1P5GuVHFQA", "annotations": [{"segment": [132.56141903490368, 154.9751855383898], "label": "Welding"}]}, "Of_UWPP-SSA": {"duration": 46.12, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Of_UWPP-SSA", "annotations": []}, "WzAGE-xKDpw": {"duration": 56.123, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WzAGE-xKDpw", "annotations": [{"segment": [21.452481685726383, 54.758901699745074], "label": "Using the rowing machine"}]}, "IRvFx8K0gAI": {"duration": 166.53, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IRvFx8K0gAI", "annotations": [{"segment": [0.01, 166.53310657596373], "label": "Tango"}]}, "6Pgz2EgPES0": {"duration": 140.295, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6Pgz2EgPES0", "annotations": []}, "Gpocjp7hSzU": {"duration": 197.86, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Gpocjp7hSzU", "annotations": [{"segment": [87.48296957878314, 174.81164820592824], "label": "Preparing pasta"}]}, "JB3h1trZ1cE": {"duration": 108.531, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JB3h1trZ1cE", "annotations": [{"segment": [15.739053042121684, 24.70862090483619], "label": "Waterskiing"}, {"segment": [29.108786271450853, 105.26549453978159], "label": "Waterskiing"}]}, "ZgVmS-AhYgA": {"duration": 152.741, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZgVmS-AhYgA", "annotations": [{"segment": [26.6150031152648, 75.80523208722742], "label": "Rafting"}, {"segment": [79.13210747663551, 112.16322741433022], "label": "Rafting"}]}, "3k5dJSs2zkg": {"duration": 59.049, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3k5dJSs2zkg", "annotations": [{"segment": [27.846266355140184, 36.22161059190031], "label": "Running a marathon"}, {"segment": [38.060271028037384, 41.27792679127726], "label": "Running a marathon"}]}, "aG5kKRuw0Fk": {"duration": 212.881, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aG5kKRuw0Fk", "annotations": [{"segment": [11.955849597249196, 38.19229732454605], "label": "Playing drums"}, {"segment": [52.805002387850614, 205.9062986192917], "label": "Playing drums"}]}, "P6ijX0vabJs": {"duration": 218.99, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=P6ijX0vabJs", "annotations": [{"segment": [40.648803432137285, 50.21322776911076], "label": "Playing flauta"}, {"segment": [70.70842277691108, 90.5204446177847], "label": "Playing flauta"}, {"segment": [130.48607488299533, 153.71396255850232], "label": "Playing flauta"}, {"segment": [177.62502340093602, 202.56084399375973], "label": "Playing flauta"}]}, "pizl41xmw7k": {"duration": 172.084, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pizl41xmw7k", "annotations": [{"segment": [49.64789653250436, 98.95473765399969], "label": "Mooping floor"}]}, "TXSOSSGJYO0": {"duration": 127.942, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TXSOSSGJYO0", "annotations": []}, "y_vaJ59chHo": {"duration": 49.51, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=y_vaJ59chHo", "annotations": []}, "6ypZ2Qeip0M": {"duration": 44.03, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=6ypZ2Qeip0M", "annotations": []}, "r1wtXxEQihE": {"duration": 170.37, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r1wtXxEQihE", "annotations": []}, "yZErFOSkogc": {"duration": 92.207, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yZErFOSkogc", "annotations": [{"segment": [0.01, 92.06245541341654], "label": "Scuba diving"}]}, "QriYk3MKSnM": {"duration": 37.97, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=QriYk3MKSnM", "annotations": []}, "rdZ0TZcP0NU": {"duration": 39.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rdZ0TZcP0NU", "annotations": [{"segment": [12.110231305772231, 37.79122432605304], "label": "Playing polo"}]}, "C53reDr47oU": {"duration": 18.93, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=C53reDr47oU", "annotations": [{"segment": [0.8801392499999999, 16.16385872854914], "label": "Putting on makeup"}]}, "c6BcS5NdI6E": {"duration": 178.933, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=c6BcS5NdI6E", "annotations": [{"segment": [0.2, 178.73297052154197], "label": "Playing beach volleyball"}]}, "kyx4MMmBr3A": {"duration": 44.258, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kyx4MMmBr3A", "annotations": []}, "f98FHTGx64Q": {"duration": 173.477, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=f98FHTGx64Q", "annotations": [{"segment": [76.56496567862715, 121.4898736349454], "label": "Kite flying"}]}, "yATxB9giT34": {"duration": 168.879, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yATxB9giT34", "annotations": [{"segment": [28.136833333333332, 31.555327102803737], "label": "Beer pong"}, {"segment": [62.32177102803738, 76.7846292834891], "label": "Beer pong"}, {"segment": [80.2031230529595, 84.4105], "label": "Beer pong"}]}, "qXQBHaML9Xg": {"duration": 8.267, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qXQBHaML9Xg", "annotations": [{"segment": [0.6383494516380657, 4.945596256630266], "label": "Doing a powerbomb"}]}, "2vAaAy_WC7Y": {"duration": 179.862, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2vAaAy_WC7Y", "annotations": [{"segment": [15.238863978840483, 171.82939201622366], "label": "Making a cake"}]}, "wEwp63lUsrE": {"duration": 158.059, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=wEwp63lUsrE", "annotations": [{"segment": [126.71262714508579, 141.7505070202808], "label": "Welding"}]}, "adS4bgldY64": {"duration": 93.25, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=adS4bgldY64", "annotations": []}, "1ErQKcUju8o": {"duration": 104.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1ErQKcUju8o", "annotations": [{"segment": [97.96126203666147, 102.01930934555382], "label": "Ballet"}]}, "26U3SkeZU6M": {"duration": 30.325, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=26U3SkeZU6M", "annotations": []}, "YS8swiRbbIE": {"duration": 79.273, "subset": "validation", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=YS8swiRbbIE", "annotations": [{"segment": [8.038598455502846, 72.96573812792512], "label": "Baton twirling"}]}, "h0PcCIFuBF8": {"duration": 217.037, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=h0PcCIFuBF8", "annotations": []}, "k42CKebiSLA": {"duration": 72.841, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k42CKebiSLA", "annotations": [{"segment": [53.66622488161995, 63.42372031464175], "label": "Sumo"}]}, "AdnLY0a6yn0": {"duration": 84.38, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AdnLY0a6yn0", "annotations": [{"segment": [8.161687157566304, 15.401893507020281], "label": "Triple jump"}, {"segment": [22.642099856474257, 29.487385859594387], "label": "Triple jump"}, {"segment": [32.646748630265215, 42.25647705772231], "label": "Triple jump"}, {"segment": [43.30959798127925, 51.33964502340094], "label": "Triple jump"}, {"segment": [53.709167101404056, 64.10873622152886], "label": "Triple jump"}, {"segment": [65.95169783775351, 73.58682453354135], "label": "Triple jump"}]}, "vcCwvRYqU2I": {"duration": 156.177, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vcCwvRYqU2I", "annotations": [{"segment": [11.203691107644307, 143.71792511700468], "label": "Raking leaves"}]}, "VFrQxkhhW80": {"duration": 121.51, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VFrQxkhhW80", "annotations": []}, "WAm4oWlM6to": {"duration": 183.531, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WAm4oWlM6to", "annotations": []}, "r0PXIqB_urk": {"duration": 196.836, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=r0PXIqB_urk", "annotations": []}, "_6TamBiyYWA": {"duration": 162.42000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_6TamBiyYWA", "annotations": [{"segment": [21.538228369734792, 26.859437731669267], "label": "Archery"}, {"segment": [47.13071149141966, 51.184966243369736], "label": "Archery"}, {"segment": [88.68682269890796, 90.713950074883], "label": "Archery"}]}, "wmI-4NI4QNc": {"duration": 111.18, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wmI-4NI4QNc", "annotations": []}, "yy6PwbO6chc": {"duration": 148.469, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yy6PwbO6chc", "annotations": []}, "vxjoR-VO5iI": {"duration": 196.093, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vxjoR-VO5iI", "annotations": []}, "0x6ZV9Y8g_c": {"duration": 44.03, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0x6ZV9Y8g_c", "annotations": []}, "F8K9WQfHth4": {"duration": 112.083, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F8K9WQfHth4", "annotations": [{"segment": [0.2, 103.8644862402496], "label": "Croquet"}]}, "No5ZwqHdEQU": {"duration": 180.16, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=No5ZwqHdEQU", "annotations": [{"segment": [0.01, 180.16362811791385], "label": "Volleyball"}]}, "0WRxp0X0edY": {"duration": 159.243, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0WRxp0X0edY", "annotations": [{"segment": [24.517888180641897, 29.223342193083578], "label": "Curling"}, {"segment": [61.17089838281919, 84.20285749634952], "label": "Curling"}, {"segment": [138.68706185093737, 143.39251586337906], "label": "Curling"}, {"segment": [152.80342388826242, 159.04247], "label": "Curling"}]}, "sS-KyhAzeUY": {"duration": 153.79, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sS-KyhAzeUY", "annotations": [{"segment": [18.947798751950078, 24.70409204368175], "label": "Washing hands"}, {"segment": [30.940076443057727, 35.25729641185647], "label": "Washing hands"}, {"segment": [44.371427457098285, 48.688647425897045], "label": "Washing hands"}, {"segment": [51.32694851794073, 53.725404056162255], "label": "Washing hands"}, {"segment": [58.762160686427464, 68.83567394695788], "label": "Washing hands"}, {"segment": [80.10841497659906, 82.74671606864275], "label": "Washing hands"}, {"segment": [92.34053822152887, 110.08910920436819], "label": "Washing hands"}, {"segment": [117.76416692667708, 124.47984243369736], "label": "Washing hands"}]}, "1QjVfj0S8qQ": {"duration": 62.857, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1QjVfj0S8qQ", "annotations": [{"segment": [8.531189574113577, 60.30668316692668], "label": "Fixing the roof"}]}, "gIf0VWXI_DY": {"duration": 203.87, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gIf0VWXI_DY", "annotations": [{"segment": [126.42558814352573, 156.6405085803432], "label": "Getting a tattoo"}]}, "Zrv8_-Y-jSM": {"duration": 27.168, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zrv8_-Y-jSM", "annotations": [{"segment": [3.8992134664586584, 23.903873859594384], "label": "Rope skipping"}]}, "9x7FyIJdVsI": {"duration": 99.266, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9x7FyIJdVsI", "annotations": [{"segment": [15.021337762354031, 22.76429501560062], "label": "Swimming"}]}, "tyeLIzY0MJ4": {"duration": 20.25, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tyeLIzY0MJ4", "annotations": [{"segment": [2.401415677067083, 20.121666], "label": "Drinking beer"}]}, "PhJVHb-J7zM": {"duration": 96.92, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PhJVHb-J7zM", "annotations": [{"segment": [3.6288333492523375, 78.3223192199688], "label": "Polishing shoes"}]}, "sg5z7jus30w": {"duration": 208.01, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sg5z7jus30w", "annotations": [{"segment": [1.6224988455538223, 198.59385869578784], "label": "Getting a tattoo"}]}, "gzyu1S2LNIw": {"duration": 90.535, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gzyu1S2LNIw", "annotations": [{"segment": [5.5083455909664965, 85.16749721417428], "label": "Baton twirling"}]}, "yjd9UOJdes8": {"duration": 123.04, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yjd9UOJdes8", "annotations": [{"segment": [22.26666864769829, 109.4138028378278], "label": "Playing violin"}]}, "nI_XzNfxjlY": {"duration": 215.504, "subset": "validation", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=nI_XzNfxjlY", "annotations": [{"segment": [63.20556528383584, 203.40088768330736], "label": "Swimming"}]}, "vjMuhHo6wMY": {"duration": 162.842, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vjMuhHo6wMY", "annotations": [{"segment": [16.512792865456113, 158.77685257410297], "label": "Disc dog"}]}, "c51KKeTaJl4": {"duration": 95.06, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c51KKeTaJl4", "annotations": []}, "tb8KKdC7r-A": {"duration": 55.636, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tb8KKdC7r-A", "annotations": [{"segment": [11.022849360232915, 14.147436580456418], "label": "Shuffleboard"}, {"segment": [18.313552874087755, 21.52493418376191], "label": "Shuffleboard"}, {"segment": [27.60052044530761, 30.898695844432417], "label": "Shuffleboard"}, {"segment": [36.36672347982355, 44.09139744093166], "label": "Shuffleboard"}, {"segment": [47.1291905717045, 53.11798274379955], "label": "Shuffleboard"}]}, "9jivQgF6J1g": {"duration": 152.045, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9jivQgF6J1g", "annotations": [{"segment": [0.01, 17.261618359503046], "label": "Playing ice hockey"}]}, "kYkwA_lvqYc": {"duration": 13.98, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kYkwA_lvqYc", "annotations": [{"segment": [2.342134165366615, 10.908273010920437], "label": "Polishing shoes"}]}, "7hxCoi0XU6I": {"duration": 116.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7hxCoi0XU6I", "annotations": [{"segment": [47.90251286786165, 96.71226877223089], "label": "Brushing hair"}]}, "HGqaDftZe5s": {"duration": 38.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HGqaDftZe5s", "annotations": [{"segment": [0, 35.071730171606866], "label": "Doing motocross"}]}, "RVYINuYPY6o": {"duration": 233.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RVYINuYPY6o", "annotations": [{"segment": [7.276068784247968, 230.6513789422777], "label": "Cricket"}]}, "2VA80MQNvs4": {"duration": 41.634, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2VA80MQNvs4", "annotations": []}, "g0vAi9iuVPA": {"duration": 13.26, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g0vAi9iuVPA", "annotations": [{"segment": [1.1550327613104525, 13.221], "label": "Tumbling"}]}, "Rq1MoqtH8fM": {"duration": 237.52, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rq1MoqtH8fM", "annotations": [{"segment": [19.642523975788965, 60.41002656704908], "label": "Mowing the lawn"}, {"segment": [100.06568817854755, 209.39671785510876], "label": "Mowing the lawn"}]}, "zB8kwWJqQ8Q": {"duration": 159.243, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zB8kwWJqQ8Q", "annotations": [{"segment": [12.664946957878316, 147.27955226209048], "label": "Table soccer"}]}, "OH5UXwa3F7E": {"duration": 186.97, "subset": "training", "resolution": "820x480", "url": "https://www.youtube.com/watch?v=OH5UXwa3F7E", "annotations": [{"segment": [16.475875975039, 33.09755616224649], "label": "Pole vault"}, {"segment": [60.80035647425897, 72.75630187207489], "label": "Pole vault"}, {"segment": [83.25420514820593, 96.08497581903276], "label": "Pole vault"}, {"segment": [115.331131825273, 124.37099297971918], "label": "Pole vault"}, {"segment": [150.32414274570982, 162.57169656786272], "label": "Pole vault"}]}, "GjUaqfAtEqQ": {"duration": 128.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GjUaqfAtEqQ", "annotations": []}, "ko9efCldvgs": {"duration": 14.88, "subset": "training", "resolution": "420x240", "url": "https://www.youtube.com/watch?v=ko9efCldvgs", "annotations": [{"segment": [5.015509553822153, 14.883989], "label": "Walking the dog"}]}, "S-4ftlylgig": {"duration": 158.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S-4ftlylgig", "annotations": [{"segment": [30.881355304212168, 52.86888028081123], "label": "Triple jump"}, {"segment": [66.70372745709828, 70.16243925117004], "label": "Triple jump"}, {"segment": [82.76203221528861, 102.03199792511701], "label": "Triple jump"}, {"segment": [109.93762488299532, 114.13748920436817], "label": "Triple jump"}, {"segment": [126.24298048361933, 142.05423439937596], "label": "Triple jump"}]}, "7vXt9RxeXWY": {"duration": 79.622, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7vXt9RxeXWY", "annotations": [{"segment": [4.8419017160686435, 74.01278627145086], "label": "Kneeling"}]}, "O_IrzZbXiCc": {"duration": 98.08, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=O_IrzZbXiCc", "annotations": [{"segment": [5.352834095163806, 94.66869442589703], "label": "Playing field hockey"}]}, "Q711Ki5aFHw": {"duration": 126.2, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=Q711Ki5aFHw", "annotations": [{"segment": [20.151616, 95.60659600000001], "label": "Breakdancing"}]}, "EHianByJXXM": {"duration": 28.236, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=EHianByJXXM", "annotations": [{"segment": [0, 28.235463], "label": "Calf roping"}]}, "1PpVSeE2BJ8": {"duration": 48.252, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1PpVSeE2BJ8", "annotations": [{"segment": [0.2, 3.9021066745679507], "label": "Having an ice cream"}, {"segment": [22.28703774766282, 30.766618201784123], "label": "Having an ice cream"}, {"segment": [39.771482400851, 47.95090071500341], "label": "Having an ice cream"}]}, "_gO4UzCTqhk": {"duration": 32.508, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_gO4UzCTqhk", "annotations": [{"segment": [12.041242990654206, 28.48411682242991], "label": "Spread mulch"}]}, "j1IYrdjnY6M": {"duration": 26.22, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=j1IYrdjnY6M", "annotations": [{"segment": [0, 26.21532879818594], "label": "Washing face"}]}, "KOiRoi5_SnQ": {"duration": 134.885, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KOiRoi5_SnQ", "annotations": [{"segment": [0.01, 134.0194336973479], "label": "Skiing"}]}, "Q6uc1kl008o": {"duration": 120.3, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Q6uc1kl008o", "annotations": [{"segment": [0.01, 105.8248159126365], "label": "Layup drill in basketball"}]}, "XeC4nqBB5BM": {"duration": 139.57999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XeC4nqBB5BM", "annotations": [{"segment": [42.46027834633386, 107.13054844305773], "label": "Playing flauta"}]}, "toeGFZLoeHU": {"duration": 40.54, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=toeGFZLoeHU", "annotations": [{"segment": [6.876015355694227, 38.56272206396256], "label": "Playing bagpipes"}]}, "peaOnHl5YS8": {"duration": 46.301, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=peaOnHl5YS8", "annotations": [{"segment": [0.01, 46.300589569161], "label": "Mooping floor"}]}, "W2wyahjyTFc": {"duration": 25.45, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=W2wyahjyTFc", "annotations": [{"segment": [8.377150638065524, 17.191024769110765], "label": "Hopscotch"}]}, "gOe72cTd0IA": {"duration": 187.153, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gOe72cTd0IA", "annotations": [{"segment": [20.432246489859594, 77.93442589703588], "label": "Scuba diving"}, {"segment": [88.4424383775351, 93.6964446177847], "label": "Scuba diving"}, {"segment": [99.8261185647426, 119.09080811232448], "label": "Scuba diving"}, {"segment": [136.60416224648986, 146.5440592823713], "label": "Scuba diving"}]}, "IAZV246DKis": {"duration": 81.967, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IAZV246DKis", "annotations": [{"segment": [5.882138877391831, 11.636405170492536], "label": "Peeling potatoes"}, {"segment": [15.600455283517464, 77.23503946957878], "label": "Peeling potatoes"}]}, "M6O-QNDUdwU": {"duration": 80.57, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=M6O-QNDUdwU", "annotations": [{"segment": [27.276745165044698, 70.391599875195], "label": "Playing water polo"}]}, "JM7YQFgxu2I": {"duration": 38.149, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JM7YQFgxu2I", "annotations": [{"segment": [2.0791419240477755, 35.34542721176162], "label": "Fun sliding down"}]}, "CQtu83639js": {"duration": 139.23, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CQtu83639js", "annotations": [{"segment": [15.29992979719189, 122.07390795631827], "label": "Spinning"}]}, "7NG6UrY2Foo": {"duration": 65.09, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7NG6UrY2Foo", "annotations": [{"segment": [0, 65.041], "label": "Mixing drinks"}]}, "77w2Am_ttbg": {"duration": 21.061, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=77w2Am_ttbg", "annotations": [{"segment": [0, 18.47576443057722], "label": "Using the rowing machine"}]}, "UU8Xtm8Gl3I": {"duration": 109.204, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UU8Xtm8Gl3I", "annotations": [{"segment": [0.2, 39.161357254290174], "label": "Snowboarding"}, {"segment": [56.35830109204368, 94.68567238689548], "label": "Snowboarding"}]}, "A1SuSeqDTEI": {"duration": 222.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A1SuSeqDTEI", "annotations": [{"segment": [33.38488256151431, 67.96432181385076], "label": "Kayaking"}, {"segment": [70.38488256151432, 77.64656480450498], "label": "Kayaking"}, {"segment": [81.10450872973861, 97.05198795837458], "label": "Kayaking"}, {"segment": [106.42578916398206, 138.3661488275335], "label": "Kayaking"}, {"segment": [148.78148350043065, 156.0722177714587], "label": "Kayaking"}]}, "EIl8Yiu-txE": {"duration": 175.06, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=EIl8Yiu-txE", "annotations": []}, "jIVP2bEWmJE": {"duration": 175.45, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jIVP2bEWmJE", "annotations": [{"segment": [74.7236252349454, 165.04888650795633], "label": "Making a sandwich"}]}, "ABCRXwB5-JQ": {"duration": 49.691, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ABCRXwB5-JQ", "annotations": [{"segment": [0.2, 49.490702947845804], "label": "Canoeing"}]}, "3fg_CB3rBSk": {"duration": 156.062, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3fg_CB3rBSk", "annotations": [{"segment": [0, 142.1470577223089], "label": "Hitting a pinata"}]}, "TZ5Vd9eL5WA": {"duration": 67.41, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=TZ5Vd9eL5WA", "annotations": [{"segment": [23.398088545241812, 62.307269496879876], "label": "Preparing salad"}]}, "BCzsVKHrN5Y": {"duration": 27.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BCzsVKHrN5Y", "annotations": [{"segment": [1.536211825273011, 18.173333200000002], "label": "Shoveling snow"}, {"segment": [20.995068680000003, 24.835424508580346], "label": "Shoveling snow"}]}, "E4oP6Ex9IPo": {"duration": 161.611, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=E4oP6Ex9IPo", "annotations": [{"segment": [37.05065054602184, 151.4791903276131], "label": "Waxing skis"}]}, "Bgi2Y85Mbwk": {"duration": 199.46, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Bgi2Y85Mbwk", "annotations": []}, "EQWdo0FcKAo": {"duration": 185.34, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EQWdo0FcKAo", "annotations": []}, "nfe_V3ei6ag": {"duration": 32.286, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=nfe_V3ei6ag", "annotations": [{"segment": [0, 32.229296], "label": "Playing beach volleyball"}]}, "IeTk_XgEsk4": {"duration": 219.476, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IeTk_XgEsk4", "annotations": []}, "wt-PGxOkL_s": {"duration": 172.36, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wt-PGxOkL_s", "annotations": [{"segment": [20.167112398781665, 45.17433177327093], "label": "Hand washing clothes"}, {"segment": [49.47664908501102, 126.3805710323651], "label": "Hand washing clothes"}]}, "lV86EXbNQkI": {"duration": 73.4, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lV86EXbNQkI", "annotations": []}, "yj7YkvTZ4zw": {"duration": 31.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yj7YkvTZ4zw", "annotations": [{"segment": [0.01, 3.743222382215288], "label": "High jump"}]}, "bWCIcijlgCo": {"duration": 146.007, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bWCIcijlgCo", "annotations": [{"segment": [120.95092029875195, 122.31759736427458], "label": "Longboarding"}, {"segment": [11.844534567862715, 43.733666096723866], "label": "Longboarding"}, {"segment": [0.2, 145.8066665], "label": "Longboarding"}, {"segment": [0.2, 145.8066665], "label": "Longboarding"}]}, "51wFW1g42VQ": {"duration": 108.809, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=51wFW1g42VQ", "annotations": [{"segment": [13.232134165366615, 22.05355694227769], "label": "Hurling"}, {"segment": [38.169617784711384, 106.89458658346334], "label": "Hurling"}]}, "eRco7yXc-40": {"duration": 8.43, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eRco7yXc-40", "annotations": []}, "6HmKyms-U2s": {"duration": 82.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6HmKyms-U2s", "annotations": [{"segment": [0.01, 82.05934240362812], "label": "Playing squash"}]}, "MCsGSMze_6Q": {"duration": 172.25, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=MCsGSMze_6Q", "annotations": [{"segment": [77.92704687987519, 138.1189727457098], "label": "Playing piano"}]}, "_jV5sAOOHLk": {"duration": 209.51, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_jV5sAOOHLk", "annotations": []}, "pYbOylWZx-s": {"duration": 140.85, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=pYbOylWZx-s", "annotations": [{"segment": [93.18580689079563, 128.9674005850234], "label": "Spinning"}]}, "fSBX-gjPblw": {"duration": 34.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fSBX-gjPblw", "annotations": []}, "2x-Xqt98Ek4": {"duration": 139.436, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2x-Xqt98Ek4", "annotations": [{"segment": [80.48521165366614, 133.34441822620906], "label": "Playing congas"}]}, "3ArMGKnxlcU": {"duration": 152.82999999999998, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3ArMGKnxlcU", "annotations": [{"segment": [0, 119.93037749265072], "label": "Kayaking"}, {"segment": [124.69897975039002, 147.58827622464898], "label": "Kayaking"}]}, "Jo2-IPbuapw": {"duration": 110.46000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Jo2-IPbuapw", "annotations": [{"segment": [0.6892812667706709, 92.70833038065523], "label": "Painting"}]}, "rzD3EEqJy-U": {"duration": 137.63, "subset": "testing", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=rzD3EEqJy-U", "annotations": []}, "pZ8jUGWdKGw": {"duration": 155.992, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pZ8jUGWdKGw", "annotations": []}, "Lf3oTCD4d08": {"duration": 39.41, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lf3oTCD4d08", "annotations": [{"segment": [0.24589243057722313, 3.1351284898595946], "label": "Ironing clothes"}, {"segment": [30.429188283931357, 39.404262], "label": "Ironing clothes"}]}, "2SYTRqm4Ym4": {"duration": 15.21, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2SYTRqm4Ym4", "annotations": [{"segment": [0.2, 14.921000000000001], "label": "Fun sliding down"}]}, "Y97KgwAmdrU": {"duration": 143.36, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y97KgwAmdrU", "annotations": [{"segment": [20.15261315398647, 119.06518084021596], "label": "Baking cookies"}]}, "H9E7PaqXpps": {"duration": 236.333, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H9E7PaqXpps", "annotations": []}, "yCEuQR_jKeo": {"duration": 204.1, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yCEuQR_jKeo", "annotations": []}, "ahY7nqwwJEg": {"duration": 140.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ahY7nqwwJEg", "annotations": [{"segment": [7.666832250000001, 137.29110729055012], "label": "Skateboarding"}]}, "oMtB-y49Yo8": {"duration": 222.378, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oMtB-y49Yo8", "annotations": []}, "KNpeiKm3xyM": {"duration": 78.159, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KNpeiKm3xyM", "annotations": [{"segment": [22.6791871825273, 78.157844], "label": "Beach soccer"}]}, "v8jzbjQqNhE": {"duration": 211.28, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v8jzbjQqNhE", "annotations": [{"segment": [110.25365229711228, 149.14731738249122], "label": "Kayaking"}, {"segment": [160.02435931314804, 178.81197719337345], "label": "Kayaking"}]}, "QhaxzjLAVd4": {"duration": 237.12, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=QhaxzjLAVd4", "annotations": []}, "HtCQ-OmHJl4": {"duration": 7.41, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HtCQ-OmHJl4", "annotations": [{"segment": [0.2642667706708268, 7.24930497], "label": "Hammer throw"}]}, "hIYN8cC3pEA": {"duration": 12.586, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hIYN8cC3pEA", "annotations": []}, "s2cXljpR-xk": {"duration": 173.988, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s2cXljpR-xk", "annotations": [{"segment": [138.3929953198128, 173.921], "label": "Rock-paper-scissors"}]}, "NzL_uQyQjIw": {"duration": 182.996, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NzL_uQyQjIw", "annotations": [{"segment": [22.62788735074195, 182.22003688345225], "label": "Wakeboarding"}]}, "jqrLD8zs7R4": {"duration": 173.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jqrLD8zs7R4", "annotations": [{"segment": [25.30379502, 151.70114231999997], "label": "Tango"}]}, "6O4S2CpRB0A": {"duration": 136.72, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6O4S2CpRB0A", "annotations": []}, "3tfGg2rVv6o": {"duration": 187.2, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3tfGg2rVv6o", "annotations": [{"segment": [23.982243900845724, 113.7632843992881], "label": "Baton twirling"}]}, "nv22QK8brp4": {"duration": 94.041, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nv22QK8brp4", "annotations": [{"segment": [13.49728096099844, 89.34613157878314], "label": "River tubing"}]}, "MMB5Cn3JCGI": {"duration": 20.23, "subset": "validation", "resolution": "632x360", "url": "https://www.youtube.com/watch?v=MMB5Cn3JCGI", "annotations": [{"segment": [0.26724388533541343, 18.785673116224647], "label": "High jump"}]}, "lmYmYYXs1mo": {"duration": 193.423, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lmYmYYXs1mo", "annotations": [{"segment": [0.01, 193.42222222222222], "label": "Sharpening knives"}]}, "YTWXDIMEjpg": {"duration": 226.163, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YTWXDIMEjpg", "annotations": [{"segment": [46.07661355016399, 55.573321265909705], "label": "Wakeboarding"}, {"segment": [68.5873281356353, 106.22242908322013], "label": "Wakeboarding"}, {"segment": [123.10546502232363, 189.58241903254358], "label": "Wakeboarding"}]}, "SYifUxnoov8": {"duration": 10.1, "subset": "testing", "resolution": "380x294", "url": "https://www.youtube.com/watch?v=SYifUxnoov8", "annotations": []}, "p066OEa7IeY": {"duration": 88.074, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=p066OEa7IeY", "annotations": []}, "lzb6HHN59K4": {"duration": 150.814, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lzb6HHN59K4", "annotations": [{"segment": [0.01, 94.79679095163807], "label": "Grooming dog"}]}, "f6vZaMgqDvQ": {"duration": 161.24, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f6vZaMgqDvQ", "annotations": []}, "yQe9_PLHcIs": {"duration": 138.35, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yQe9_PLHcIs", "annotations": []}, "q7Pp1TAgv3I": {"duration": 226.395, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q7Pp1TAgv3I", "annotations": [{"segment": [3.531896377895932, 63.92732443991637], "label": "Arm wrestling"}, {"segment": [68.87197936897068, 113.37387373045942], "label": "Arm wrestling"}, {"segment": [123.61637322635762, 162.82042302100248], "label": "Arm wrestling"}, {"segment": [176.24162925700702, 190.7224028705148], "label": "Arm wrestling"}, {"segment": [197.78619556942277, 219.33076330109205], "label": "Arm wrestling"}]}, "rzIaKwWJDZI": {"duration": 201.875, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rzIaKwWJDZI", "annotations": [{"segment": [37.156377535101406, 175.39069734789393], "label": "Making a cake"}]}, "HwTSF0VgmMU": {"duration": 56.15, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=HwTSF0VgmMU", "annotations": [{"segment": [1.489047512920925, 56.145849], "label": "Doing kickboxing"}]}, "Sx7YgFLnwfM": {"duration": 226.673, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Sx7YgFLnwfM", "annotations": [{"segment": [0.01, 0.02], "label": "Rock-paper-scissors"}, {"segment": [24.748003120124807, 44.192862714508585], "label": "Rock-paper-scissors"}, {"segment": [80.25423868954759, 82.72903900156007], "label": "Rock-paper-scissors"}, {"segment": [183.1513853354134, 186.33355226209048], "label": "Rock-paper-scissors"}]}, "H_dERoTis5Y": {"duration": 124.9, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H_dERoTis5Y", "annotations": [{"segment": [24.55135278627145, 31.371173004680184], "label": "Paintball"}, {"segment": [45.205665447737914, 54.55856174726989], "label": "Paintball"}, {"segment": [70.34157425273011, 97.81570713260531], "label": "Paintball"}]}, "xBR7YEKPgDA": {"duration": 68.917, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xBR7YEKPgDA", "annotations": [{"segment": [0.2, 66.3940171339564], "label": "Hurling"}]}, "5kmGgH4xFW0": {"duration": 179.375, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5kmGgH4xFW0", "annotations": [{"segment": [14.547101404056162, 91.75863962558502], "label": "Snow tubing"}, {"segment": [142.409624024961, 165.91140873634947], "label": "Snow tubing"}]}, "ChCIKwDB4aw": {"duration": 130.5, "subset": "testing", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=ChCIKwDB4aw", "annotations": []}, "0YHCiC7IIg8": {"duration": 128.08, "subset": "validation", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=0YHCiC7IIg8", "annotations": [{"segment": [7.1933317441497655, 121.28756579719187], "label": "Playing harmonica"}]}, "MDWaKr7Gu5Q": {"duration": 155.504, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MDWaKr7Gu5Q", "annotations": []}, "5n7NCViB5TU": {"duration": 121.44, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5n7NCViB5TU", "annotations": [{"segment": [24.250181653524645, 38.08036312324492], "label": "Discus throw"}, {"segment": [62.14109007176286, 71.61381721684867], "label": "Discus throw"}, {"segment": [97.00072596567863, 106.28399856786271], "label": "Discus throw"}]}, "N8nv95D4-pw": {"duration": 66.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N8nv95D4-pw", "annotations": [{"segment": [18.79838262918272, 64.95885528549142], "label": "Doing step aerobics"}]}, "SthTqCtPg7s": {"duration": 216.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SthTqCtPg7s", "annotations": [{"segment": [76.43957655449076, 158.29080454646757], "label": "Bungee jumping"}, {"segment": [173.17284463026522, 190.7607116630265], "label": "Bungee jumping"}]}, "LfMnWmwQBpI": {"duration": 223.399, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LfMnWmwQBpI", "annotations": []}, "Vshvpb9eecg": {"duration": 108.972, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vshvpb9eecg", "annotations": [{"segment": [9.350107011083164, 27.37031325062526], "label": "Baking cookies"}, {"segment": [85.680980610653, 92.14105454558317], "label": "Baking cookies"}]}, "02yDi9BaDO8": {"duration": 77.81, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=02yDi9BaDO8", "annotations": [{"segment": [7.757212655226209, 73.32990088143525], "label": "Doing step aerobics"}]}, "-0NfmccuL9Q": {"duration": 215.691, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=-0NfmccuL9Q", "annotations": [{"segment": [20.86238664784687, 212.6617477651487], "label": "Changing car wheel"}]}, "WuO75Sb0Kgg": {"duration": 135.906, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=WuO75Sb0Kgg", "annotations": [{"segment": [0.01, 126.32317628705147], "label": "Putting in contact lenses"}]}, "uLhOFVNmRu8": {"duration": 87.96000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uLhOFVNmRu8", "annotations": [{"segment": [22.229297971918875, 25.522527301092044], "label": "Triple jump"}, {"segment": [45.69355694227769, 81.91907801092043], "label": "Triple jump"}]}, "Wr7YbcQ_Q9g": {"duration": 40.194, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Wr7YbcQ_Q9g", "annotations": [{"segment": [1.2530733229329174, 26.25188611544462], "label": "Cutting the grass"}]}, "xhp9YNqXV20": {"duration": 104.188, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xhp9YNqXV20", "annotations": []}, "A9-AGBIG_x4": {"duration": 167.56, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=A9-AGBIG_x4", "annotations": []}, "LdzaFk5VrD0": {"duration": 161.17, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LdzaFk5VrD0", "annotations": [{"segment": [48.90406811918735, 55.18993805738624], "label": "Putting in contact lenses"}]}, "ULBhK8jXNws": {"duration": 184.622, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ULBhK8jXNws", "annotations": [{"segment": [34.56259458541607, 38.882918908593076], "label": "Surfing"}, {"segment": [43.779286474860356, 46.371481068766556], "label": "Surfing"}, {"segment": [81.5101188972729, 167.34056174570983], "label": "Surfing"}]}, "U01xasUtlvw": {"duration": 188.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U01xasUtlvw", "annotations": [{"segment": [90.46863114252461, 110.21259950261955], "label": "Fixing bicycle"}]}, "T3XGYHEFVIQ": {"duration": 26.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T3XGYHEFVIQ", "annotations": [{"segment": [2.7601043635418123, 21.79671078966934], "label": "Welding"}]}, "QtiqvB4uw2Y": {"duration": 36.99, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=QtiqvB4uw2Y", "annotations": [{"segment": [6.544576443057722, 29.09020982839313], "label": "Blowing leaves"}]}, "JGKjMJm0HRI": {"duration": 41.239, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JGKjMJm0HRI", "annotations": [{"segment": [0.2, 37.0057454692608], "label": "Riding bumper cars"}]}, "PDMdNcUsPZ0": {"duration": 150.814, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PDMdNcUsPZ0", "annotations": []}, "NA8fCmbHwU8": {"duration": 83.894, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NA8fCmbHwU8", "annotations": [{"segment": [12.989148615282895, 36.38006980687168], "label": "Beach soccer"}, {"segment": [40.43101146575019, 64.47531034425485], "label": "Beach soccer"}]}, "1SQAnbh_lcM": {"duration": 224.12, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1SQAnbh_lcM", "annotations": [{"segment": [5.942865834633385, 214.99191107644305], "label": "Sharpening knives"}]}, "7H5oYHs7EJw": {"duration": 146.635, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=7H5oYHs7EJw", "annotations": [{"segment": [21.46971461066254, 142.979163258242], "label": "Skiing"}]}, "o7O6XJYVDRE": {"duration": 151.57999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o7O6XJYVDRE", "annotations": [{"segment": [17.262605188767548, 129.114827850234], "label": "Wrapping presents"}]}, "m7XKITsRtVw": {"duration": 79.74, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=m7XKITsRtVw", "annotations": [{"segment": [51.499612670826835, 55.97783985959438], "label": "Drinking coffee"}]}, "S2VKbwm0uE8": {"duration": 137.65, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=S2VKbwm0uE8", "annotations": [{"segment": [94.98976989079563, 118.60304602184087], "label": "Bungee jumping"}]}, "bJx4unQR6nU": {"duration": 71.26, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bJx4unQR6nU", "annotations": [{"segment": [40.68941534804249, 50.472661697347895], "label": "Removing curlers"}]}, "cQnoiDzv1vk": {"duration": 22.27, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cQnoiDzv1vk", "annotations": []}, "orR865dd2l4": {"duration": 59.653, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=orR865dd2l4", "annotations": [{"segment": [0.01, 59.651132], "label": "Shuffleboard"}]}, "qNxA4UTadGo": {"duration": 180.09, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qNxA4UTadGo", "annotations": [{"segment": [32.59110813956368, 94.9637461307976], "label": "Springboard diving"}, {"segment": [113.22600500210486, 159.30308781435258], "label": "Springboard diving"}]}, "bH6KL0ai3Ww": {"duration": 55.66, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bH6KL0ai3Ww", "annotations": [{"segment": [0.8672126833073323, 52.37964607176286], "label": "Using the pommel horse"}]}, "MVUqd8iVUEk": {"duration": 42.77, "subset": "validation", "resolution": "292x240", "url": "https://www.youtube.com/watch?v=MVUqd8iVUEk", "annotations": [{"segment": [6.5345054602184085, 40.873998439937594], "label": "Using the pommel horse"}]}, "mgoEYrTMKtE": {"duration": 82.153, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mgoEYrTMKtE", "annotations": []}, "2YSsqivrvR4": {"duration": 74.61, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=2YSsqivrvR4", "annotations": [{"segment": [5.470309770202808, 68.32067734274571], "label": "Playing guitarra"}]}, "cn7MtTHk9ZA": {"duration": 41.43, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cn7MtTHk9ZA", "annotations": []}, "T_CvR3zm6K8": {"duration": 193.144, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=T_CvR3zm6K8", "annotations": [{"segment": [10.244745419748764, 19.28422667246826], "label": "Beach soccer"}, {"segment": [23.80396729882801, 54.5382035580743], "label": "Beach soccer"}, {"segment": [59.961892309705995, 85.5737558590779], "label": "Beach soccer"}, {"segment": [90.39481252719497, 118.11588836886808], "label": "Beach soccer"}, {"segment": [131.67511024794734, 136.4961669160644], "label": "Beach soccer"}, {"segment": [140.4132754589095, 169.94224599687988], "label": "Beach soccer"}, {"segment": [179.5843592449298, 184.40541586895478], "label": "Beach soccer"}]}, "2VYQ4q24D-4": {"duration": 92.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2VYQ4q24D-4", "annotations": [{"segment": [0.2, 44.880358814352576], "label": "Longboarding"}, {"segment": [45.169909516380656, 90.79370826833073], "label": "Longboarding"}]}, "_1JGrGGCY9k": {"duration": 184.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_1JGrGGCY9k", "annotations": [{"segment": [16.98256227347894, 181.6270644648915], "label": "Zumba"}]}, "CbQPrRwG2BM": {"duration": 96.71000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CbQPrRwG2BM", "annotations": [{"segment": [16.052534990639625, 33.235530191887676], "label": "Getting a haircut"}]}, "9x-Ffd1kP1g": {"duration": 152.393, "subset": "testing", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=9x-Ffd1kP1g", "annotations": []}, "0GpNcvAVWVg": {"duration": 194.096, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0GpNcvAVWVg", "annotations": [{"segment": [44.97704754818594, 53.73097045068027], "label": "Fun sliding down"}]}, "UeMcMSTO-Yg": {"duration": 128.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UeMcMSTO-Yg", "annotations": [{"segment": [0, 2.5071935842433697], "label": "Grooming horse"}]}, "Xq2LIzE5eDs": {"duration": 119.0, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Xq2LIzE5eDs", "annotations": [{"segment": [3.7130192199687992, 44.92753256162247], "label": "Playing water polo"}, {"segment": [57.366146948517944, 118.44531311700469], "label": "Playing water polo"}]}, "AUHORHUgC-s": {"duration": 144.057, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AUHORHUgC-s", "annotations": [{"segment": [59.33053010920437, 68.54474122464897], "label": "Sharpening knives"}, {"segment": [104.5026382605304, 108.77263853354133], "label": "Sharpening knives"}, {"segment": [117.98684964898595, 127.42579762090483], "label": "Sharpening knives"}]}, "4XEowfJ-1W0": {"duration": 193.144, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4XEowfJ-1W0", "annotations": [{"segment": [21.9934992199688, 170.5249391575663], "label": "Having an ice cream"}]}, "cWrOETjOOTc": {"duration": 4.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cWrOETjOOTc", "annotations": [{"segment": [0.01, 4.416210673946957], "label": "Fixing bicycle"}]}, "YeikEC85CGk": {"duration": 206.937, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=YeikEC85CGk", "annotations": [{"segment": [15.49600517898267, 43.90534800711757], "label": "Beach soccer"}, {"segment": [50.68485027292248, 69.08635642296441], "label": "Beach soccer"}, {"segment": [74.89735836508291, 161.093887173174], "label": "Beach soccer"}, {"segment": [167.22772255652131, 184.33789331357255], "label": "Beach soccer"}]}, "tSk1GWyofaU": {"duration": 37.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tSk1GWyofaU", "annotations": [{"segment": [0.8388208377535101, 36.58994343993759], "label": "Springboard diving"}, {"segment": [33.88598304, 34.40131968], "label": "Springboard diving"}]}, "egowZOcbEkI": {"duration": 163.63, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=egowZOcbEkI", "annotations": []}, "CIgdBoHjGXU": {"duration": 223.98, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CIgdBoHjGXU", "annotations": [{"segment": [34.59280238148302, 104.82667388328186], "label": "Getting a haircut"}]}, "S3OJBVHL9y8": {"duration": 176.217, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=S3OJBVHL9y8", "annotations": [{"segment": [0, 164.34364742589705], "label": "Tug of war"}]}, "DfFqlrv7F2g": {"duration": 78.461, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DfFqlrv7F2g", "annotations": [{"segment": [22.132544461778473, 27.757390015600624], "label": "Putting on shoes"}, {"segment": [36.072379095163804, 41.08582839313573], "label": "Putting on shoes"}]}, "8r54fZNICBQ": {"duration": 40.566, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8r54fZNICBQ", "annotations": []}, "eGTZNTfgg24": {"duration": 94.34, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=eGTZNTfgg24", "annotations": [{"segment": [32.52688196235332, 39.591543379095164], "label": "Long jump"}, {"segment": [56.07575474882996, 71.38252244929797], "label": "Long jump"}]}, "bFgXdHQnTtQ": {"duration": 165.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bFgXdHQnTtQ", "annotations": [{"segment": [4.894267212865386, 164.6019339781591], "label": "Cheerleading"}]}, "guwcWEMMlBA": {"duration": 193.423, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=guwcWEMMlBA", "annotations": []}, "k3MFZmhXynI": {"duration": 29.0, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k3MFZmhXynI", "annotations": [{"segment": [3.7576595700000004, 17.78863338], "label": "Mowing the lawn"}, {"segment": [3.75765957, 17.78863338], "label": "Mowing the lawn"}]}, "0-4oritGq84": {"duration": 199.46, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0-4oritGq84", "annotations": []}, "8kyg5u6o21k": {"duration": 165.257, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8kyg5u6o21k", "annotations": [{"segment": [14.69518842794528, 153.1393308954758], "label": "Longboarding"}]}, "oBZ_oAUGO0E": {"duration": 119.19, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=oBZ_oAUGO0E", "annotations": [{"segment": [17.292405149267193, 119.18743764172335], "label": "Getting a tattoo"}]}, "EE1UoJSNyms": {"duration": 35.434, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EE1UoJSNyms", "annotations": [{"segment": [0.01, 35.433649], "label": "Raking leaves"}]}, "MYWnPFGvxwQ": {"duration": 46.14, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MYWnPFGvxwQ", "annotations": [{"segment": [0, 46.137686], "label": "Mowing the lawn"}]}, "MEuGzNPI1pw": {"duration": 79.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MEuGzNPI1pw", "annotations": [{"segment": [19.247572882507136, 56.5009388923557], "label": "Using parallel bars"}]}, "qhjDVfcIxqk": {"duration": 180.442, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qhjDVfcIxqk", "annotations": []}, "LQAFcEvifrU": {"duration": 205.265, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=LQAFcEvifrU", "annotations": [{"segment": [117.1571249378445, 182.59911745554254], "label": "Camel ride"}]}, "k1GP0cJOXbQ": {"duration": 192.33, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k1GP0cJOXbQ", "annotations": []}, "8teo_0gxzgk": {"duration": 70.752, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8teo_0gxzgk", "annotations": []}, "HsGz6S2MBU4": {"duration": 13.468, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=HsGz6S2MBU4", "annotations": [{"segment": [0.2, 10.933229329173168], "label": "Putting in contact lenses"}]}, "LbS-C68GTX8": {"duration": 164.07, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LbS-C68GTX8", "annotations": [{"segment": [6.652957878315132, 160.20228705148205], "label": "Dodgeball"}]}, "yHtapvYRcMw": {"duration": 146.541, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yHtapvYRcMw", "annotations": [{"segment": [14.63125186895476, 142.6547057223089], "label": "Capoeira"}]}, "AP_zXlTlZVk": {"duration": 15.84, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=AP_zXlTlZVk", "annotations": []}, "3bC-rAjOXJE": {"duration": 7.2, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3bC-rAjOXJE", "annotations": []}, "RMkaNGdydws": {"duration": 173.105, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RMkaNGdydws", "annotations": [{"segment": [52.76593333812053, 141.87575504142106], "label": "Hanging wallpaper"}]}, "YKZnKmOizms": {"duration": 235.404, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YKZnKmOizms", "annotations": []}, "XD3yFrJHiv8": {"duration": 157.316, "subset": "training", "resolution": "636x360", "url": "https://www.youtube.com/watch?v=XD3yFrJHiv8", "annotations": [{"segment": [74.85357363954422, 112.89391434160768], "label": "Beach soccer"}]}, "YcjLd_XBK5Y": {"duration": 62.741, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YcjLd_XBK5Y", "annotations": [{"segment": [0.2, 62.501], "label": "Carving jack-o-lanterns"}]}, "d4B9z_WJAKs": {"duration": 238.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=d4B9z_WJAKs", "annotations": [{"segment": [8.273923288585532, 227.81993808079525], "label": "Playing badminton"}]}, "65mQtjchFdQ": {"duration": 142.525, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=65mQtjchFdQ", "annotations": []}, "A904-cbaYDw": {"duration": 156.81, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=A904-cbaYDw", "annotations": []}, "YZJghuPmff8": {"duration": 150.071, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YZJghuPmff8", "annotations": [{"segment": [10.76949466005851, 150.070566], "label": "Braiding hair"}]}, "n-1KBnDiWQU": {"duration": 30.3, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n-1KBnDiWQU", "annotations": [{"segment": [2.690915756630265, 30.024954758190326], "label": "Kayaking"}]}, "mhHoL-9mY1E": {"duration": 123.37, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=mhHoL-9mY1E", "annotations": [{"segment": [16.3337908, 104.39438907], "label": "Cleaning shoes"}]}, "4eHP5IvDl6o": {"duration": 71.24, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4eHP5IvDl6o", "annotations": [{"segment": [9.219474258970358, 71.201], "label": "Grooming horse"}]}, "kuJO1VapxuQ": {"duration": 198.74, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kuJO1VapxuQ", "annotations": [{"segment": [6.304001079999999, 190.1674525600624], "label": "Belly dance"}]}, "ReKUs0km4X8": {"duration": 132.82, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ReKUs0km4X8", "annotations": [{"segment": [65.47655963806552, 84.53935548205928], "label": "Shot put"}, {"segment": [104.22376423400937, 118.52086111700467], "label": "Shot put"}]}, "MSr04jhuTes": {"duration": 132.98, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MSr04jhuTes", "annotations": []}, "bUHE8XcPZgs": {"duration": 74.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bUHE8XcPZgs", "annotations": [{"segment": [1.5457725, 70.72810030811233], "label": "Using parallel bars"}]}, "OEQM6wYtYlk": {"duration": 52.687, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=OEQM6wYtYlk", "annotations": [{"segment": [0.01, 50.47909576379176], "label": "Fun sliding down"}]}, "xc0Wm-TH5K8": {"duration": 25.01, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xc0Wm-TH5K8", "annotations": [{"segment": [6.398199032761309, 17.517020522620903], "label": "Clean and jerk"}]}, "rqnzzNYt2cE": {"duration": 177.494, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rqnzzNYt2cE", "annotations": [{"segment": [45.43816303855682, 158.77507581119002], "label": "Longboarding"}]}, "CGIrDfEP5lE": {"duration": 95.09, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=CGIrDfEP5lE", "annotations": [{"segment": [0, 22.547631853573463], "label": "Mowing the lawn"}, {"segment": [44.20522560766376, 92.56396234624896], "label": "Mowing the lawn"}]}, "56kqqlco5H0": {"duration": 105.21000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=56kqqlco5H0", "annotations": [{"segment": [15.161630265210608, 91.70737441497661], "label": "Using the balance beam"}]}, "VXCV3KUtCdk": {"duration": 236.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VXCV3KUtCdk", "annotations": [{"segment": [43.63856468096724, 53.58152878549142], "label": "Rock climbing"}, {"segment": [68.31184597737911, 92.24861141419657], "label": "Rock climbing"}, {"segment": [126.49659888533542, 133.86175748127926], "label": "Rock climbing"}, {"segment": [157.06200705850233, 189.10044695085804], "label": "Rock climbing"}]}, "uzXbaoWOm5o": {"duration": 58.7, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uzXbaoWOm5o", "annotations": [{"segment": [50.56698907956318, 51.849485179407175], "label": "Drinking coffee"}]}, "34_5muoYtiQ": {"duration": 73.93, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=34_5muoYtiQ", "annotations": []}, "xIAq1OdWQM0": {"duration": 151.44, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xIAq1OdWQM0", "annotations": [{"segment": [16.537968947329322, 21.735616330775677], "label": "High jump"}, {"segment": [42.99871926305624, 46.30631305252209], "label": "High jump"}]}, "JAEMwQqh51c": {"duration": 76.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JAEMwQqh51c", "annotations": [{"segment": [13.637660400000001, 58.46295866302653], "label": "Chopping wood"}]}, "lfH_S2LTEXA": {"duration": 218.29, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lfH_S2LTEXA", "annotations": [{"segment": [19.3055676, 35.757462298491944], "label": "Rock climbing"}, {"segment": [42.56840749820469, 48.698258177946165], "label": "Rock climbing"}, {"segment": [69.47164103707006, 196.83631627169848], "label": "Rock climbing"}]}, "1FFbrlvDQ7c": {"duration": 196.279, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1FFbrlvDQ7c", "annotations": [{"segment": [47.14225897035882, 99.18241497659906], "label": "Capoeira"}, {"segment": [116.3250546021841, 147.86129485179407], "label": "Capoeira"}]}, "Zfo0XXcWXiU": {"duration": 65.597, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Zfo0XXcWXiU", "annotations": [{"segment": [0.01, 65.59637], "label": "Blow-drying hair"}]}, "lNvX6h3o4EA": {"duration": 32.79, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lNvX6h3o4EA", "annotations": [{"segment": [3.0177824399375974, 28.745656461778474], "label": "Playing guitarra"}]}, "3RRZMS4Qcvc": {"duration": 141.503, "subset": "testing", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=3RRZMS4Qcvc", "annotations": []}, "AsTfp_4Um1A": {"duration": 140.27, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=AsTfp_4Um1A", "annotations": [{"segment": [18.047679407176286, 43.64257020280811], "label": "Ballet"}]}, "An9hDoF1BPU": {"duration": 79.552, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=An9hDoF1BPU", "annotations": []}, "3JHIcli-Wlg": {"duration": 48.65, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3JHIcli-Wlg", "annotations": [{"segment": [13.414722308892355, 48.561], "label": "Using the pommel horse"}]}, "yrf93aLQXBE": {"duration": 130.845, "subset": "training", "resolution": "510x360", "url": "https://www.youtube.com/watch?v=yrf93aLQXBE", "annotations": [{"segment": [15.513537703588145, 125.33305460530423], "label": "Surfing"}]}, "bJ5YjjFLGyA": {"duration": 218.78, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bJ5YjjFLGyA", "annotations": [{"segment": [129.6970309288562, 183.28238318104152], "label": "Preparing pasta"}]}, "JouywOCeGHI": {"duration": 206.17000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JouywOCeGHI", "annotations": [{"segment": [127.86973981486527, 174.10248747841666], "label": "Rock climbing"}]}, "1lu7Sbu3a6o": {"duration": 61.719, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1lu7Sbu3a6o", "annotations": [{"segment": [32.23578003120125, 61.681], "label": "Snow tubing"}]}, "0FgX7ryQ8hI": {"duration": 182.579, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0FgX7ryQ8hI", "annotations": []}, "y3E6Qx4ZF1k": {"duration": 201.735, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y3E6Qx4ZF1k", "annotations": []}, "fQDj7pnGdVY": {"duration": 164.24, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fQDj7pnGdVY", "annotations": [{"segment": [4.611895953389156, 162.18500648049923], "label": "Doing kickboxing"}]}, "k8ZXFB6xqZs": {"duration": 216.06, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=k8ZXFB6xqZs", "annotations": [{"segment": [45.487628705148204, 59.97628081123245], "label": "Tennis serve with ball bouncing"}]}, "okxCZZIE-qU": {"duration": 53.85, "subset": "testing", "resolution": "240x176", "url": "https://www.youtube.com/watch?v=okxCZZIE-qU", "annotations": []}, "ySQBZ8hqBo4": {"duration": 85.87, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ySQBZ8hqBo4", "annotations": [{"segment": [0, 85.821], "label": "Using the balance beam"}]}, "XQWebGbAjxU": {"duration": 33.135, "subset": "training", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=XQWebGbAjxU", "annotations": [{"segment": [10.40599375975039, 14.063546021840873], "label": "Curling"}]}, "xMChLTVPzNI": {"duration": 183.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xMChLTVPzNI", "annotations": [{"segment": [45.16035503, 147.75666913], "label": "Preparing pasta"}]}, "YonmpJvwmKM": {"duration": 232.39, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YonmpJvwmKM", "annotations": [{"segment": [11.681780907385745, 214.7473257812175], "label": "Tai chi"}]}, "UZNw4utsywI": {"duration": 170.02, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UZNw4utsywI", "annotations": [{"segment": [19.355932917316693, 149.04836193447738], "label": "Skateboarding"}]}, "aw4ehW-wTKA": {"duration": 143.662, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aw4ehW-wTKA", "annotations": [{"segment": [17.929717067082684, 137.6105784898596], "label": "Slacklining"}]}, "kbRciA51ouY": {"duration": 122.88, "subset": "training", "resolution": "424x236", "url": "https://www.youtube.com/watch?v=kbRciA51ouY", "annotations": [{"segment": [0.01, 122.88], "label": "Knitting"}]}, "4R37E4Kevs4": {"duration": 26.4, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4R37E4Kevs4", "annotations": [{"segment": [0.28787363494539786, 23.52338845553822], "label": "Smoking hookah"}]}, "2DvHUEFc5VA": {"duration": 191.426, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=2DvHUEFc5VA", "annotations": [{"segment": [17.918116887940826, 20.90446970259763], "label": "Making a cake"}, {"segment": [27.17581061337692, 182.16752034321374], "label": "Making a cake"}]}, "5RKzXZ-0xQ8": {"duration": 14.4, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5RKzXZ-0xQ8", "annotations": [{"segment": [0, 9.073531800312013], "label": "Doing nails"}]}, "4Q5YJKHa5W0": {"duration": 131.36, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4Q5YJKHa5W0", "annotations": [{"segment": [13.524873828393135, 113.11712656474258], "label": "Making a sandwich"}]}, "hKa9wV1orM8": {"duration": 100.008, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hKa9wV1orM8", "annotations": [{"segment": [0.20000000000000004, 97.32939781591263], "label": "Riding bumper cars"}]}, "tkQK_2celDY": {"duration": 203.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tkQK_2celDY", "annotations": [{"segment": [94.48908312, 116.85366432000002], "label": "Paintball"}, {"segment": [61.11299391965679, 127.14677436271451], "label": "Paintball"}]}, "-lJS58hyo1c": {"duration": 35.3, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=-lJS58hyo1c", "annotations": [{"segment": [1.1838191809672387, 32.67891553042122], "label": "Clean and jerk"}]}, "rteN6_HkMkI": {"duration": 121.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rteN6_HkMkI", "annotations": [{"segment": [0, 6.097683167722007], "label": "Walking the dog"}, {"segment": [30.471238742488364, 36.13950748080612], "label": "Walking the dog"}, {"segment": [76.95104239669396, 81.48565738734817], "label": "Walking the dog"}]}, "xJNaedisXfQ": {"duration": 184.646, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xJNaedisXfQ", "annotations": []}, "H1cKUnazzFM": {"duration": 30.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H1cKUnazzFM", "annotations": [{"segment": [11.644127925117006, 23.47759126365055], "label": "Hammer throw"}]}, "iMiKGoQ7RjE": {"duration": 127.896, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iMiKGoQ7RjE", "annotations": [{"segment": [10.973830926026157, 31.724347586148344], "label": "Croquet"}, {"segment": [55.86677562340589, 94.57446917316693], "label": "Croquet"}]}, "VAG6ECk5WYo": {"duration": 214.437, "subset": "validation", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=VAG6ECk5WYo", "annotations": [{"segment": [55.178728549141965, 177.2407644305772], "label": "Curling"}]}, "45WfFPM5yvw": {"duration": 170.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=45WfFPM5yvw", "annotations": [{"segment": [23.890833837435128, 169.9558276643991], "label": "Windsurfing"}]}, "oy1XjDer7o4": {"duration": 77.346, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oy1XjDer7o4", "annotations": [{"segment": [11.342422452418097, 20.392227600624025], "label": "Cutting the grass"}, {"segment": [27.39074358190328, 33.90660328861154], "label": "Cutting the grass"}, {"segment": [36.56121279875195, 38.25050975975039], "label": "Cutting the grass"}, {"segment": [39.215822308892356, 41.02578333853354], "label": "Cutting the grass"}, {"segment": [44.16304912324493, 45.61101794695788], "label": "Cutting the grass"}, {"segment": [55.74679971294852, 59.366721772230896], "label": "Cutting the grass"}, {"segment": [65.03793299843994, 70.22648795007801], "label": "Cutting the grass"}]}, "8eGaK2mVEhY": {"duration": 78.46000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8eGaK2mVEhY", "annotations": [{"segment": [23.99095803432137, 77.48100222308892], "label": "Pole vault"}]}, "nHuu6aInr1w": {"duration": 190.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nHuu6aInr1w", "annotations": [{"segment": [131.39683303794737, 174.40482564430576], "label": "Starting a campfire"}]}, "NHDjJ8auZQ0": {"duration": 111.74000000000001, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NHDjJ8auZQ0", "annotations": [{"segment": [0.6972506708268331, 110.68854399375975], "label": "Ping-pong"}]}, "M5UrBI13R3s": {"duration": 40.543, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M5UrBI13R3s", "annotations": [{"segment": [28.71462670514821, 35.98815549609985], "label": "Peeling potatoes"}]}, "rFM3OUUL5fI": {"duration": 227.788, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rFM3OUUL5fI", "annotations": [{"segment": [39.8006685981725, 179.10300869177624], "label": "Slacklining"}]}, "jimvzigX1ak": {"duration": 32.183, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jimvzigX1ak", "annotations": [{"segment": [0.01, 32.141], "label": "Playing blackjack"}]}, "Tywf5EzWntY": {"duration": 157.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tywf5EzWntY", "annotations": [{"segment": [0.01, 157.121], "label": "Wakeboarding"}]}, "NhM6jEV08s4": {"duration": 13.42, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=NhM6jEV08s4", "annotations": [{"segment": [5.488478159126365, 10.42998138], "label": "Chopping wood"}]}, "o_e0KLnKpj8": {"duration": 132.31, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=o_e0KLnKpj8", "annotations": [{"segment": [3.970533069999999, 81.51963497999999], "label": "Mowing the lawn"}, {"segment": [122.5406858096724, 132.03666600000003], "label": "Mowing the lawn"}]}, "tTIsHfF0UgA": {"duration": 76.859, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=tTIsHfF0UgA", "annotations": [{"segment": [0.01, 76.85804988662132], "label": "Having an ice cream"}]}, "0uh3HP895OA": {"duration": 131.634, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0uh3HP895OA", "annotations": [{"segment": [58.690680187207484, 91.31941497659906], "label": "Knitting"}]}, "1o9RGnujlkI": {"duration": 130.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1o9RGnujlkI", "annotations": [{"segment": [23.402051482059285, 96.47194071762871], "label": "Archery"}, {"segment": [5.49523712948518, 15.264547581903278], "label": "Archery"}, {"segment": [106.44477847113885, 130.461], "label": "Archery"}]}, "pouxwDABDrg": {"duration": 222.59, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pouxwDABDrg", "annotations": [{"segment": [2.7779881916365095, 217.7248227051482], "label": "Doing step aerobics"}]}, "yJ5zO0T9jgk": {"duration": 152.21, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=yJ5zO0T9jgk", "annotations": [{"segment": [12.822415750390016, 39.179603681747274], "label": "Playing badminton"}, {"segment": [119.91333248049924, 123.47511463338535], "label": "Playing badminton"}]}, "XGwP_Y-x-dI": {"duration": 93.09, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=XGwP_Y-x-dI", "annotations": [{"segment": [19.97241554, 83.77065080999999], "label": "Shoveling snow"}]}, "LvFnL9lr-Yc": {"duration": 90.721, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LvFnL9lr-Yc", "annotations": []}, "eVwRbDXyBbc": {"duration": 7.547, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eVwRbDXyBbc", "annotations": [{"segment": [0.2, 6.876469065420562], "label": "Playing pool"}]}, "2QO41PbOUJ0": {"duration": 85.52, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=2QO41PbOUJ0", "annotations": [{"segment": [3.6668811466458657, 84.07158410764431], "label": "Smoking a cigarette"}]}, "aKacWW7Mn2c": {"duration": 22.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aKacWW7Mn2c", "annotations": [{"segment": [0.7462565569422778, 17.66140518096724], "label": "Plataform diving"}]}, "7yndv2xOmn0": {"duration": 106.069, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=7yndv2xOmn0", "annotations": [{"segment": [0.2, 106.021], "label": "Making a cake"}]}, "XHuKkrKjnhQ": {"duration": 228.3, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=XHuKkrKjnhQ", "annotations": [{"segment": [110.40961649350328, 131.42305963258937], "label": "Wrapping presents"}]}, "NVnjzpoY1O8": {"duration": 59.56, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NVnjzpoY1O8", "annotations": [{"segment": [0, 9.477436296411856], "label": "Breakdancing"}, {"segment": [14.587818613104524, 24.158170951638063], "label": "Breakdancing"}, {"segment": [27.503148468018722, 32.42769870046802], "label": "Breakdancing"}, {"segment": [37.445164975039, 41.16180665990639], "label": "Breakdancing"}]}, "h_Kt_rqmODs": {"duration": 72.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=h_Kt_rqmODs", "annotations": [{"segment": [0.01, 72.461], "label": "Hopscotch"}]}, "7wBrvMGZROQ": {"duration": 5.04, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7wBrvMGZROQ", "annotations": [{"segment": [0.01, 5.038730158730159], "label": "Using parallel bars"}]}, "rMZtiiLAqoY": {"duration": 118.144, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rMZtiiLAqoY", "annotations": [{"segment": [65.9612692119612, 98.85003715286322], "label": "Table soccer"}]}, "7A7XJLhRVVE": {"duration": 184.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7A7XJLhRVVE", "annotations": [{"segment": [18.293151326053042, 159.74098283931357], "label": "Sailing"}]}, "w24Pub_hNSw": {"duration": 48.135, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w24Pub_hNSw", "annotations": [{"segment": [0.01, 48.081], "label": "Using the monkey bar"}]}, "qHNbc4EYQR0": {"duration": 102.49000000000001, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qHNbc4EYQR0", "annotations": []}, "y_HCdBEJbNE": {"duration": 121.603, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y_HCdBEJbNE", "annotations": [{"segment": [0.01, 121.561], "label": "Plastering"}]}, "PcSnU_nGi5E": {"duration": 82.28999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PcSnU_nGi5E", "annotations": [{"segment": [3.976366614664587, 10.646400936037443], "label": "Tennis serve with ball bouncing"}, {"segment": [55.68267394695788, 57.09398595943838], "label": "Tennis serve with ball bouncing"}, {"segment": [74.67123556942278, 77.87876287051482], "label": "Tennis serve with ball bouncing"}]}, "993xtlhuVII": {"duration": 180.47, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=993xtlhuVII", "annotations": [{"segment": [22.21343685, 95.3360502], "label": "Playing violin"}, {"segment": [106.15492695000002, 161.4195225826833], "label": "Playing violin"}]}, "itlF0DEBetk": {"duration": 54.103, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=itlF0DEBetk", "annotations": [{"segment": [7.0054710124805, 41.442003218408736], "label": "Spread mulch"}, {"segment": [44.56492403120125, 53.902493], "label": "Spread mulch"}]}, "6QhZNQlsGys": {"duration": 85.472, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6QhZNQlsGys", "annotations": [{"segment": [3.598076443057723, 5.996794071762871], "label": "Slacklining"}, {"segment": [11.860326053042122, 19.056478939157568], "label": "Slacklining"}, {"segment": [38.64600624024961, 52.23873946957878], "label": "Slacklining"}, {"segment": [57.835747269890796, 63.832541341653666], "label": "Slacklining"}, {"segment": [77.8250608424337, 82.48923400936037], "label": "Slacklining"}]}, "047dUA_39x8": {"duration": 81.131, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=047dUA_39x8", "annotations": [{"segment": [27.069163806552265, 75.00941185647426], "label": "Calf roping"}]}, "Z6WJ0A9VvxQ": {"duration": 218.663, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z6WJ0A9VvxQ", "annotations": [{"segment": [53.882929797191885, 218.601], "label": "Running a marathon"}]}, "-7wfTI8Qv1Q": {"duration": 8.081, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-7wfTI8Qv1Q", "annotations": [{"segment": [0.2, 7.880544217687076], "label": "Swinging at the playground"}]}, "wb8TkqxxEuo": {"duration": 234.197, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wb8TkqxxEuo", "annotations": [{"segment": [0.01, 0.02], "label": "Tug of war"}]}, "gdyEfPbUEjw": {"duration": 156.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gdyEfPbUEjw", "annotations": [{"segment": [10.866400156006241, 152.00750780031203], "label": "Windsurfing"}]}, "9PFlQcBl_jU": {"duration": 72.865, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9PFlQcBl_jU", "annotations": [{"segment": [0.01, 72.86421768707483], "label": "Bullfighting"}]}, "lAsPxkZD6Xc": {"duration": 132.17, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lAsPxkZD6Xc", "annotations": [{"segment": [11.132003120124805, 83.2838751950078], "label": "Getting a tattoo"}]}, "U2eN4DbVLfE": {"duration": 191.89, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U2eN4DbVLfE", "annotations": []}, "LM8C4FSpN0w": {"duration": 61.02, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LM8C4FSpN0w", "annotations": [{"segment": [0.47599095800566715, 58.64208428081123], "label": "Playing water polo"}]}, "j6Kf_y7k-yU": {"duration": 156.782, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j6Kf_y7k-yU", "annotations": [{"segment": [72.87796879875195, 86.09502652106084], "label": "Sumo"}, {"segment": [143.0840639625585, 151.15547269890797], "label": "Sumo"}]}, "_DlDtsPxdyY": {"duration": 189.615, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_DlDtsPxdyY", "annotations": [{"segment": [53.74414641744548, 129.0450109034268], "label": "Playing pool"}]}, "6BKkSQ7uGco": {"duration": 61.927, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6BKkSQ7uGco", "annotations": [{"segment": [6.762766510660427, 16.327250575737313], "label": "Elliptical trainer"}, {"segment": [18.162858614664586, 31.88161352574103], "label": "Elliptical trainer"}, {"segment": [33.81383252730109, 44.92409178627145], "label": "Elliptical trainer"}, {"segment": [45.69697938689548, 61.727619], "label": "Elliptical trainer"}]}, "HzAlvJ1fNWU": {"duration": 48.507, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=HzAlvJ1fNWU", "annotations": [{"segment": [1.5134628393135725, 48.27946457410297], "label": "Snow tubing"}]}, "L5l0tTAhVJM": {"duration": 112.896, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L5l0tTAhVJM", "annotations": []}, "QGzrtgTrwiQ": {"duration": 170.25, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=QGzrtgTrwiQ", "annotations": [{"segment": [16.467113528861155, 153.78159247113885], "label": "Triple jump"}]}, "c-8tNd5MDT8": {"duration": 211.023, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c-8tNd5MDT8", "annotations": []}, "wJk3KOUAwXA": {"duration": 133.19, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wJk3KOUAwXA", "annotations": []}, "f07eWOCKLI8": {"duration": 129.823, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=f07eWOCKLI8", "annotations": [{"segment": [19.433784711388455, 122.89714040561623], "label": "Decorating the Christmas tree"}]}, "oNqtuh6qa2E": {"duration": 59.33, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oNqtuh6qa2E", "annotations": [{"segment": [7.867072778149222, 26.92095228], "label": "Playing bagpipes"}]}, "len7R78v5NY": {"duration": 98.291, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=len7R78v5NY", "annotations": [{"segment": [0.01, 98.221], "label": "Using the rowing machine"}]}, "9VtxNvLx6zA": {"duration": 26.84, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=9VtxNvLx6zA", "annotations": []}, "C4td7rprQGM": {"duration": 54.64, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=C4td7rprQGM", "annotations": [{"segment": [14.252743022620905, 44.82009624], "label": "Hopscotch"}]}, "4KE6dUAGZ94": {"duration": 208.59, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4KE6dUAGZ94", "annotations": [{"segment": [6.833494539781592, 205.98104800624026], "label": "Using the pommel horse"}]}, "RPkH81M6-NE": {"duration": 143.45, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RPkH81M6-NE", "annotations": [{"segment": [11.861158399375974, 135.39624210608426], "label": "Sailing"}]}, "SlT6OBmQqpc": {"duration": 52.315, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SlT6OBmQqpc", "annotations": [{"segment": [28.564890717628703, 30.360398134165365], "label": "Throwing darts"}, {"segment": [31.99267760374415, 33.298501179407175], "label": "Throwing darts"}, {"segment": [36.31821819812793, 43.25540594383775], "label": "Throwing darts"}, {"segment": [47.58094653822153, 49.13161203432137], "label": "Throwing darts"}]}, "bPvxr2-L_HU": {"duration": 10.798, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bPvxr2-L_HU", "annotations": [{"segment": [1.3405304212168487, 6.8326474258970356], "label": "Doing a powerbomb"}]}, "TnqjltyMoVA": {"duration": 54.753, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TnqjltyMoVA", "annotations": []}, "iUVz4A5oblQ": {"duration": 203.222, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iUVz4A5oblQ", "annotations": [{"segment": [35.50120436817473, 177.8405007800312], "label": "Installing carpet"}]}, "bGDwE1v16HU": {"duration": 185.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bGDwE1v16HU", "annotations": [{"segment": [91.72773322932917, 155.09799687987518], "label": "Spinning"}]}, "bW4AfWvteIo": {"duration": 56.982, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bW4AfWvteIo", "annotations": [{"segment": [27.560168204475026, 41.73903357125419], "label": "Table soccer"}]}, "CIcVR4m7nOc": {"duration": 87.563, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CIcVR4m7nOc", "annotations": [{"segment": [9.410917413694449, 82.78879521902219], "label": "River tubing"}]}, "saJTy2rnf2Y": {"duration": 121.998, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=saJTy2rnf2Y", "annotations": []}, "UhB739uudDo": {"duration": 81.549, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UhB739uudDo", "annotations": [{"segment": [8.014896367283262, 72.64294961458322], "label": "Disc dog"}]}, "QYb2ektVZek": {"duration": 150.527, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QYb2ektVZek", "annotations": [{"segment": [15.22206700704517, 55.03363621741464], "label": "Wakeboarding"}, {"segment": [76.57872073126165, 120.60563104558338], "label": "Wakeboarding"}, {"segment": [122.24493093360826, 133.01747305491466], "label": "Wakeboarding"}]}, "Chj1_JbBc4A": {"duration": 169.88, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Chj1_JbBc4A", "annotations": [{"segment": [9.008726989079564, 125.85721528861154], "label": "Paintball"}]}, "AeOUzM7nl5w": {"duration": 79.715, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=AeOUzM7nl5w", "annotations": [{"segment": [36.68589823865063, 68.14871832137285], "label": "Putting in contact lenses"}]}, "p2C4MeV02GU": {"duration": 232.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p2C4MeV02GU", "annotations": [{"segment": [166.51990405616223, 184.67910842433696], "label": "Clipping cat claws"}]}, "WSpfyZuoi3A": {"duration": 89.676, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WSpfyZuoi3A", "annotations": [{"segment": [0.20000000000000004, 20.005607982142415], "label": "Snow tubing"}, {"segment": [53.30165483354028, 85.75830554582727], "label": "Snow tubing"}]}, "9JiZDwpdjb8": {"duration": 210.86, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9JiZDwpdjb8", "annotations": []}, "r2K5IcpZEU4": {"duration": 214.04, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=r2K5IcpZEU4", "annotations": []}, "SGAUpoTMSnA": {"duration": 198.82999999999998, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SGAUpoTMSnA", "annotations": []}, "rRSTE1EsAUM": {"duration": 81.67, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rRSTE1EsAUM", "annotations": [{"segment": [31.314877440000004, 70.3290368], "label": "High jump"}]}, "1HOnhMOjDEg": {"duration": 54.103, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=1HOnhMOjDEg", "annotations": []}, "_KOVk8iGbrA": {"duration": 183.02, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_KOVk8iGbrA", "annotations": [{"segment": [81.37380580937524, 112.49571750489068], "label": "Getting a piercing"}, {"segment": [144.18867334165367, 165.03178849797192], "label": "Getting a piercing"}]}, "DCYz8p4zH6o": {"duration": 34.482, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DCYz8p4zH6o", "annotations": [{"segment": [6.724187012480499, 25.390530159126364], "label": "Kneeling"}]}, "swId_MFHywI": {"duration": 223.93, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=swId_MFHywI", "annotations": [{"segment": [9.432434970868222, 211.35641323612128], "label": "Breakdancing"}]}, "JQpx7CcTstU": {"duration": 49.877, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JQpx7CcTstU", "annotations": [{"segment": [6.528804992199688, 44.164879875195005], "label": "Snow tubing"}]}, "gCx-ucvPhDY": {"duration": 5.6, "subset": "training", "resolution": "640x354", "url": "https://www.youtube.com/watch?v=gCx-ucvPhDY", "annotations": [{"segment": [2.12743132, 5.488109120000001], "label": "Plataform diving"}]}, "unI7FhokvbM": {"duration": 124.993, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=unI7FhokvbM", "annotations": [{"segment": [41.63183914023227, 49.626712230393984], "label": "Installing carpet"}, {"segment": [85.5061426838026, 92.72102676599063], "label": "Installing carpet"}]}, "3gQsAKZ71tU": {"duration": 106.46000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3gQsAKZ71tU", "annotations": [{"segment": [20.42894934572893, 22.255928555509566], "label": "Playing squash"}, {"segment": [32.38735871883855, 39.52918653889013], "label": "Playing squash"}, {"segment": [44.51185659906396, 48.99626006240249], "label": "Playing squash"}, {"segment": [52.31804040561622, 92.34549354134165], "label": "Playing squash"}]}, "5rVXCKLihyg": {"duration": 92.0, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=5rVXCKLihyg", "annotations": [{"segment": [0.01, 91.941], "label": "Ironing clothes"}]}, "yX_DJiboktI": {"duration": 158.059, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yX_DJiboktI", "annotations": [{"segment": [10.35638956986851, 158.058231292517], "label": "Baton twirling"}]}, "jSEsIZ9ucgM": {"duration": 83.55, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jSEsIZ9ucgM", "annotations": []}, "Xmq-gWQw02A": {"duration": 149.583, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Xmq-gWQw02A", "annotations": []}, "YVbMt3qsB3o": {"duration": 61.49, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YVbMt3qsB3o", "annotations": [{"segment": [6.71459029789763, 10.5514987675507], "label": "Shot put"}, {"segment": [15.539480003120124, 24.652138029641183], "label": "Shot put"}]}, "zto8JvkVLVw": {"duration": 47.39, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zto8JvkVLVw", "annotations": [{"segment": [4.657864900156007, 42.43832464586583], "label": "Using the pommel horse"}]}, "uM6ErLnAGW4": {"duration": 193.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uM6ErLnAGW4", "annotations": [{"segment": [8.441911631825274, 45.224526599063964], "label": "Carving jack-o-lanterns"}]}, "sdMr24Qyrw8": {"duration": 119.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sdMr24Qyrw8", "annotations": [{"segment": [50.50775789671042, 78.09133047498771], "label": "Getting a piercing"}, {"segment": [82.9370929875195, 93.56049590951639], "label": "Getting a piercing"}]}, "LjOgqXGS5ug": {"duration": 71.75, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LjOgqXGS5ug", "annotations": []}, "86S2O0fpX4g": {"duration": 18.042, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=86S2O0fpX4g", "annotations": [{"segment": [0.2, 17.411610449436388], "label": "Doing crunches"}]}, "Qfi8dqC4E4s": {"duration": 181.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Qfi8dqC4E4s", "annotations": [{"segment": [27.816711388455534, 42.29275507020281], "label": "Triple jump"}, {"segment": [45.98272698907956, 60.74261466458658], "label": "Triple jump"}, {"segment": [65.85180655226209, 82.8824461778471], "label": "Triple jump"}, {"segment": [85.72088611544461, 107.0091856474259], "label": "Triple jump"}, {"segment": [122.90444929797191, 134.25820904836195], "label": "Triple jump"}, {"segment": [138.79971294851794, 161.7910764430577], "label": "Triple jump"}]}, "0MyaFtXcTKI": {"duration": 217.892, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0MyaFtXcTKI", "annotations": [{"segment": [47.587113884555386, 49.6265616224649], "label": "Fun sliding down"}, {"segment": [54.385273010920436, 62.88297191887675], "label": "Fun sliding down"}, {"segment": [72.06048673946958, 77.83892199687988], "label": "Fun sliding down"}, {"segment": [79.8783697347894, 90.41551638065522], "label": "Fun sliding down"}, {"segment": [102.65220280811232, 104.35174258970359], "label": "Fun sliding down"}, {"segment": [113.52925741029641, 118.6278767550702], "label": "Fun sliding down"}, {"segment": [164.17554290171608, 169.27416224648985], "label": "Fun sliding down"}, {"segment": [179.1479360374415, 185.60678003120125], "label": "Fun sliding down"}, {"segment": [197.16471138845554, 200.90404212168488], "label": "Fun sliding down"}]}, "0_GpcrCLdag": {"duration": 84.103, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0_GpcrCLdag", "annotations": [{"segment": [16.480464290349435, 19.488803374330775], "label": "Having an ice cream"}, {"segment": [21.712358349447413, 24.459102730473855], "label": "Having an ice cream"}, {"segment": [30.21418619548163, 37.5388378782188], "label": "Having an ice cream"}, {"segment": [44.34030015504617, 55.719669733584276], "label": "Having an ice cream"}, {"segment": [72.98492012860761, 76.25485391554385], "label": "Having an ice cream"}]}, "CTJej0rZMiI": {"duration": 96.5, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CTJej0rZMiI", "annotations": []}, "ofZURf7w9wk": {"duration": 164.7, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ofZURf7w9wk", "annotations": [{"segment": [115.26377874306151, 138.31653416312693], "label": "Doing fencing"}]}, "xmSN6La-2vQ": {"duration": 116.913, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xmSN6La-2vQ", "annotations": [{"segment": [54.5470291021087, 60.18355572934851], "label": "Putting in contact lenses"}]}, "Pr6zL1ToSC4": {"duration": 145.29, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Pr6zL1ToSC4", "annotations": [{"segment": [43.461153180000004, 136.85647425897037], "label": "Rock climbing"}, {"segment": [7.81934634, 28.48932783], "label": "Rock climbing"}]}, "H-fl49tiqGE": {"duration": 59.049, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H-fl49tiqGE", "annotations": [{"segment": [0.2, 40.51363494539781], "label": "Snow tubing"}]}, "ZbFyGkfGFcE": {"duration": 15.117, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ZbFyGkfGFcE", "annotations": [{"segment": [0.01, 15.041], "label": "Building sandcastles"}]}, "YPlsA_vTltk": {"duration": 215.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YPlsA_vTltk", "annotations": [{"segment": [12.097994770670827, 215.411518], "label": "Zumba"}]}, "WXaFTEWJThM": {"duration": 76.56, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WXaFTEWJThM", "annotations": [{"segment": [37.50178418096724, 46.100919407176285], "label": "Washing face"}, {"segment": [56.13324383775351, 65.21010879875195], "label": "Washing face"}]}, "kCb2Km85Yn4": {"duration": 24.707, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=kCb2Km85Yn4", "annotations": [{"segment": [21.061633385335416, 24.481], "label": "Fun sliding down"}]}, "oMegp6a547Q": {"duration": 117.702, "subset": "training", "resolution": "357x240", "url": "https://www.youtube.com/watch?v=oMegp6a547Q", "annotations": [{"segment": [5.600482805706786, 8.538440998864445], "label": "Capoeira"}, {"segment": [31.674861769981007, 33.51108564070454], "label": "Capoeira"}, {"segment": [36.99991099507926, 38.46889009165809], "label": "Capoeira"}, {"segment": [57.565617802652106, 60.687198353354134], "label": "Capoeira"}, {"segment": [82.17101743759751, 86.94519945631825], "label": "Capoeira"}, {"segment": [89.88315762168487, 92.45387101638066], "label": "Capoeira"}]}, "2PBdNJcQcp4": {"duration": 217.85, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2PBdNJcQcp4", "annotations": [{"segment": [21.789256979999998, 199.6232118174727], "label": "Belly dance"}]}, "aPXqTfbtgYI": {"duration": 41.19, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aPXqTfbtgYI", "annotations": [{"segment": [4.57638606, 18.18508068], "label": "Playing saxophone"}]}, "xpEbejnuGEY": {"duration": 147.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xpEbejnuGEY", "annotations": [{"segment": [46.50917069063715, 75.51984151747023], "label": "Polishing forniture"}, {"segment": [43.28576282098903, 120.41730766458657], "label": "Painting furniture"}]}, "dRa5x7LsBJI": {"duration": 222.959, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dRa5x7LsBJI", "annotations": []}, "9Ds9DvdPQkM": {"duration": 59.14, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=9Ds9DvdPQkM", "annotations": [{"segment": [25.187810925117002, 29.616437021840873], "label": "Playing saxophone"}, {"segment": [36.720691385335414, 57.01856099531982], "label": "Playing saxophone"}]}, "T8ae3_Pm5eE": {"duration": 211.79, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=T8ae3_Pm5eE", "annotations": [{"segment": [39.318120741029645, 192.95615556942278], "label": "Ballet"}]}, "RxuL7k7-hYQ": {"duration": 235.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RxuL7k7-hYQ", "annotations": [{"segment": [11.018429906396257, 196.12805233385336], "label": "Using uneven bars"}]}, "hHmn5tf5d5I": {"duration": 181.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hHmn5tf5d5I", "annotations": [{"segment": [0.2, 181.35666600000002], "label": "Belly dance"}]}, "jpWevi1HBYo": {"duration": 16.95, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=jpWevi1HBYo", "annotations": [{"segment": [6.8223539719188775, 15.91882593447738], "label": "Playing lacrosse"}]}, "w50et2-vEOQ": {"duration": 92.206, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w50et2-vEOQ", "annotations": [{"segment": [19.15412077983963, 61.36784975180225], "label": "Wakeboarding"}, {"segment": [74.29041984526019, 87.21298993871814], "label": "Wakeboarding"}]}, "FJnCSFPoWVU": {"duration": 213.462, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FJnCSFPoWVU", "annotations": [{"segment": [153.85179704331028, 199.14150345397815], "label": "Slacklining"}]}, "fFn51O3yHCQ": {"duration": 134.79, "subset": "testing", "resolution": "180x102", "url": "https://www.youtube.com/watch?v=fFn51O3yHCQ", "annotations": []}, "ako4JDxckE0": {"duration": 220.219, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ako4JDxckE0", "annotations": []}, "BtrGC6PUPJk": {"duration": 26.59, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=BtrGC6PUPJk", "annotations": [{"segment": [0, 26.38684807256236], "label": "Doing karate"}]}, "DbuQn8EOJx0": {"duration": 104.35, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DbuQn8EOJx0", "annotations": [{"segment": [46.23328430280068, 54.861326795928974], "label": "Smoking a cigarette"}, {"segment": [58.60557165143748, 65.11730183493054], "label": "Smoking a cigarette"}]}, "9mpPAwSmUmk": {"duration": 127.455, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=9mpPAwSmUmk", "annotations": [{"segment": [5.4382174368053215, 120.35528753026327], "label": "Fixing the roof"}]}, "vzxT-k8dsVs": {"duration": 51.967, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=vzxT-k8dsVs", "annotations": [{"segment": [1.7807751619937693, 49.78076021028038], "label": "Powerbocking"}]}, "LA5UXJ_hVU4": {"duration": 22.501, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=LA5UXJ_hVU4", "annotations": [{"segment": [6.17788447048086, 22.500135], "label": "Calf roping"}]}, "W7JxYa-PiB8": {"duration": 45.86, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W7JxYa-PiB8", "annotations": [{"segment": [0.07154350663822472, 10.731525585023402], "label": "Tennis serve with ball bouncing"}, {"segment": [26.971900970358817, 40.064362184087365], "label": "Tennis serve with ball bouncing"}]}, "A72gcVxGdwU": {"duration": 184.112, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=A72gcVxGdwU", "annotations": []}, "Z7gECmnHeKk": {"duration": 69.66, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=Z7gECmnHeKk", "annotations": [{"segment": [0, 61.5836302652106], "label": "Using the rowing machine"}]}, "vfLKyUMzFHg": {"duration": 114.776, "subset": "training", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=vfLKyUMzFHg", "annotations": [{"segment": [15.040879578040265, 109.04637520436818], "label": "Playing blackjack"}]}, "tVbrnWNOmFY": {"duration": 200.34, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tVbrnWNOmFY", "annotations": [{"segment": [24.222288728283825, 56.10194615131544], "label": "Fixing bicycle"}, {"segment": [100.79597567576174, 189.24639774162395], "label": "Fixing bicycle"}]}, "YVSD611Zpvo": {"duration": 123.42, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=YVSD611Zpvo", "annotations": [{"segment": [34.46352032012481, 109.55163721872076], "label": "Cleaning windows"}]}, "VVezUub0ocQ": {"duration": 193.144, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VVezUub0ocQ", "annotations": [{"segment": [70.34178393135726, 88.41676053042121], "label": "Clipping cat claws"}, {"segment": [111.31173088923556, 123.37449219968799], "label": "Clipping cat claws"}]}, "HJws_Cwu3qE": {"duration": 22.83, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=HJws_Cwu3qE", "annotations": [{"segment": [2.8590686427457093, 20.794840873634943], "label": "Playing racquetball"}]}, "7IW2BELXDHA": {"duration": 50.272, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=7IW2BELXDHA", "annotations": [{"segment": [3.5270592823712947, 49.53558814352574], "label": "Beach soccer"}]}, "KgEHEyz3oKw": {"duration": 188.78, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KgEHEyz3oKw", "annotations": [{"segment": [31.21761703121186, 176.70349262950108], "label": "Shoveling snow"}]}, "-KWToNMY1Lc": {"duration": 86.379, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=-KWToNMY1Lc", "annotations": [{"segment": [16.952736982643522, 82.47640964174455], "label": "Calf roping"}]}, "ecWUNT_HcN8": {"duration": 233.918, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ecWUNT_HcN8", "annotations": [{"segment": [0.2, 209.7817082683307], "label": "Roof shingle removal"}]}, "mkF3Rlj3JSs": {"duration": 178.32999999999998, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mkF3Rlj3JSs", "annotations": [{"segment": [32.21216848673947, 163.5600624024961], "label": "Brushing hair"}]}, "LGku-onF4VI": {"duration": 64.227, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LGku-onF4VI", "annotations": [{"segment": [0, 64.226393], "label": "Playing congas"}]}, "eWG5T3TC_cg": {"duration": 108.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eWG5T3TC_cg", "annotations": [{"segment": [0.01, 107.961], "label": "Drum corps"}]}, "YF_HHxbHA18": {"duration": 23.917, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=YF_HHxbHA18", "annotations": []}, "ke9gaIRnaEo": {"duration": 126.874, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ke9gaIRnaEo", "annotations": [{"segment": [0.2, 3.5627597185520075], "label": "Running a marathon"}, {"segment": [6.135863959728457, 14.053107778732917], "label": "Running a marathon"}, {"segment": [35.627597185520074, 44.93035824804992], "label": "Running a marathon"}, {"segment": [66.50484745085804, 81.1517483775351], "label": "Running a marathon"}, {"segment": [87.68347446645865, 91.24623415132605], "label": "Running a marathon"}, {"segment": [107.47658382683306, 125.48831334477379], "label": "Running a marathon"}]}, "JSxnQ-tgE3g": {"duration": 36.39, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=JSxnQ-tgE3g", "annotations": []}, "XsqslHC79FQ": {"duration": 238.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XsqslHC79FQ", "annotations": [{"segment": [0, 233.9653428393136], "label": "Playing violin"}]}, "W3KCFRADoX8": {"duration": 177.842, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W3KCFRADoX8", "annotations": [{"segment": [84.32241861022554, 164.37885374885482], "label": "Braiding hair"}]}, "-Jp86pFKlsw": {"duration": 27.609, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-Jp86pFKlsw", "annotations": [{"segment": [4.953167511700468, 27.408526077097505], "label": "Disc dog"}]}, "c2ejr7Lci8s": {"duration": 27.73, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=c2ejr7Lci8s", "annotations": [{"segment": [6.278099800000001, 15.801571039999999], "label": "Playing accordion"}]}, "z48kSSKMoXo": {"duration": 85.938, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=z48kSSKMoXo", "annotations": [{"segment": [4.941830482115086, 77.93884914463452], "label": "Rope skipping"}]}, "TQR1P0ZjByU": {"duration": 221.96, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=TQR1P0ZjByU", "annotations": []}, "YtgiDWEY_1A": {"duration": 755.113, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YtgiDWEY_1A", "annotations": [{"segment": [190.83170358814354, 267.39997971918876], "label": "Removing ice from car"}, {"segment": [355.7479906396256, 371.06164586583463], "label": "Removing ice from car"}, {"segment": [414.6466645865835, 581.9188985959438], "label": "Removing ice from car"}]}, "6pBqSAsP0DU": {"duration": 17.62, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6pBqSAsP0DU", "annotations": []}, "2zohqWPmeQU": {"duration": 185.18, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2zohqWPmeQU", "annotations": [{"segment": [48.24466893777792, 152.8229321404056], "label": "Painting furniture"}]}, "m_sBF4vLRRc": {"duration": 91.74, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=m_sBF4vLRRc", "annotations": [{"segment": [15.886687255245315, 28.624661721162727], "label": "Zumba"}, {"segment": [29.340277683307335, 38.07079933541342], "label": "Zumba"}, {"segment": [42.65074512012481, 51.953759995319814], "label": "Zumba"}, {"segment": [56.67682908580344, 60.11178842433698], "label": "Zumba"}, {"segment": [63.97611768018721, 71.56165288611545], "label": "Zumba"}]}, "asLRIsN6wLQ": {"duration": 155.02, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=asLRIsN6wLQ", "annotations": [{"segment": [15.71367394695788, 25.141878315132608], "label": "Shot put"}, {"segment": [32.87784087363495, 39.168365054602184], "label": "Shot put"}, {"segment": [49.56490639625585, 58.51076755070203], "label": "Shot put"}, {"segment": [64.07170826833072, 82.68877067082684], "label": "Shot put"}, {"segment": [87.52437129485179, 95.98667238689548], "label": "Shot put"}, {"segment": [102.03117316692668, 110.00991419656786], "label": "Shot put"}, {"segment": [122.82425585023401, 133.4625772230889], "label": "Shot put"}, {"segment": [148.4529391575663, 154.781], "label": "Shot put"}]}, "_fwExctFJpg": {"duration": 55.171, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_fwExctFJpg", "annotations": [{"segment": [26.337296358814353, 41.657684436817476], "label": "Calf roping"}]}, "8gTDyNpuHos": {"duration": 72.17, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=8gTDyNpuHos", "annotations": []}, "GD9SfOn3irM": {"duration": 97.269, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GD9SfOn3irM", "annotations": [{"segment": [4.8524680187207485, 97.201], "label": "Calf roping"}]}, "aorWXCqqyJs": {"duration": 141.897, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=aorWXCqqyJs", "annotations": [{"segment": [123.1216838122573, 136.6180490062749], "label": "Surfing"}]}, "MIAhrxkYadk": {"duration": 155.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MIAhrxkYadk", "annotations": [{"segment": [10.436187207488299, 13.834015600624024], "label": "Getting a haircut"}, {"segment": [106.30348829953199, 120.137503900156], "label": "Getting a haircut"}]}, "sP5Y_UMakiQ": {"duration": 23.267, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sP5Y_UMakiQ", "annotations": []}, "i0jy3fjw8xQ": {"duration": 206.84, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i0jy3fjw8xQ", "annotations": [{"segment": [126.1331840873635, 168.0863042121685], "label": "Bungee jumping"}]}, "JWN0cMm-8ug": {"duration": 217.618, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JWN0cMm-8ug", "annotations": [{"segment": [95.77873268241979, 194.60343745702014], "label": "Bullfighting"}]}, "Nb87GFizCB8": {"duration": 483.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Nb87GFizCB8", "annotations": [{"segment": [165.88240249609984, 183.22465366614668], "label": "Removing ice from car"}]}, "OyvOOO8DxKg": {"duration": 223.21, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=OyvOOO8DxKg", "annotations": []}, "KlqEMsj4nKM": {"duration": 150.05, "subset": "testing", "resolution": "632x360", "url": "https://www.youtube.com/watch?v=KlqEMsj4nKM", "annotations": []}, "oEC5UG-rBFc": {"duration": 198.601, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oEC5UG-rBFc", "annotations": [{"segment": [85.99824867018458, 198.40027210884355], "label": "Futsal"}]}, "Oa26_SgrY8w": {"duration": 129.08, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Oa26_SgrY8w", "annotations": [{"segment": [10.265321372854913, 18.517834633385334], "label": "Curling"}, {"segment": [31.601087363494536, 34.01645709828392], "label": "Curling"}, {"segment": [38.84719656786271, 42.872812792511695], "label": "Curling"}, {"segment": [46.294586583463335, 49.11251794071762], "label": "Curling"}, {"segment": [69.441879875195, 75.08938065522621], "label": "Curling"}, {"segment": [82.33661310452419, 87.57072542901716], "label": "Curling"}, {"segment": [96.02582995319813, 99.04551014040563], "label": "Curling"}, {"segment": [108.70848673946956, 112.93603900156006], "label": "Curling"}]}, "vF-XFROSmv4": {"duration": 117.19, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=vF-XFROSmv4", "annotations": [{"segment": [21.55025299, 42.73604197], "label": "Kayaking"}, {"segment": [65.99998613277864, 112.62047388455538], "label": "Kayaking"}]}, "I9kOPQ3J5HA": {"duration": 92.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I9kOPQ3J5HA", "annotations": [{"segment": [2.600997166417269, 3.9014957496259033], "label": "Rock climbing"}]}, "KR1-rdV18pI": {"duration": 221.937, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KR1-rdV18pI", "annotations": [{"segment": [4.845638065522621, 217.3810733229329], "label": "Waterskiing"}]}, "UYGiq0CsYEs": {"duration": 83.546, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UYGiq0CsYEs", "annotations": [{"segment": [10.035874282371294, 77.28926557722309], "label": "Playing drums"}]}, "Lou-YFz3kb0": {"duration": 148.61, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Lou-YFz3kb0", "annotations": [{"segment": [40.8033649237126, 135.1611447176287], "label": "Playing violin"}]}, "xND_kp-aYXw": {"duration": 39.567, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xND_kp-aYXw", "annotations": []}, "OCQ9OyMKQKw": {"duration": 55.798, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OCQ9OyMKQKw", "annotations": [{"segment": [39.25809638689548, 49.61666283931358], "label": "Trimming branches or hedges"}]}, "uYLYJABDubw": {"duration": 21.363, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uYLYJABDubw", "annotations": [{"segment": [0.01, 19.059177847113887], "label": "Arm wrestling"}]}, "9P-LVflVPTo": {"duration": 120.117, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9P-LVflVPTo", "annotations": []}, "InA8Xbg-hvo": {"duration": 165.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=InA8Xbg-hvo", "annotations": [{"segment": [54.22373471138845, 138.14141938377534], "label": "Putting on makeup"}]}, "Cf7xy_HQ9H0": {"duration": 180.489, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cf7xy_HQ9H0", "annotations": [{"segment": [45.321374414976596, 154.56020124804994], "label": "Rollerblading"}]}, "hHpcr5tYWso": {"duration": 41.425, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hHpcr5tYWso", "annotations": [{"segment": [12.207159126365056, 40.10923712948519], "label": "Kite flying"}]}, "zzz_3yWpTXo": {"duration": 21.53, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zzz_3yWpTXo", "annotations": [{"segment": [0.23458190327613107, 21.481], "label": "Washing dishes"}]}, "07G2P4soRZw": {"duration": 197.927, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=07G2P4soRZw", "annotations": []}, "54K2F3zAZ0o": {"duration": 37.76, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=54K2F3zAZ0o", "annotations": [{"segment": [1.4136083494539782, 24.738146115444614], "label": "Playing saxophone"}, {"segment": [30.51038020904836, 37.755123], "label": "Playing saxophone"}]}, "91Wdp_uaWrY": {"duration": 72.261, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=91Wdp_uaWrY", "annotations": [{"segment": [0.010000000000000002, 72.221], "label": "Doing fencing"}]}, "jzBR9ihZBbI": {"duration": 223.79, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=jzBR9ihZBbI", "annotations": [{"segment": [119.0541894573742, 176.66105532384563], "label": "Playing squash"}]}, "rApbE1VkEjE": {"duration": 19.69, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rApbE1VkEjE", "annotations": []}, "F9HKxxoNNSQ": {"duration": 44.722, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F9HKxxoNNSQ", "annotations": [{"segment": [3.2791212709732878, 33.209822708268334], "label": "Shuffleboard"}]}, "JY-H1u1hJZw": {"duration": 81.873, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JY-H1u1hJZw", "annotations": [{"segment": [5.105834633385335, 42.38878627145086], "label": "Canoeing"}, {"segment": [45.453035881435255, 81.841], "label": "Canoeing"}]}, "jJNDLjhemaE": {"duration": 78.205, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jJNDLjhemaE", "annotations": [{"segment": [0.2, 78.181], "label": "Playing ice hockey"}]}, "bRaUAmvOhT4": {"duration": 147.49, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=bRaUAmvOhT4", "annotations": []}, "Qf6gZtm9BIg": {"duration": 54.149, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Qf6gZtm9BIg", "annotations": [{"segment": [12.322536661466458, 42.80709360374415], "label": "Slacklining"}]}, "BWKKwqX62Y4": {"duration": 43.329, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BWKKwqX62Y4", "annotations": [{"segment": [0.2, 37.94683619344774], "label": "Swinging at the playground"}]}, "pxb95-aSDYo": {"duration": 168.72, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pxb95-aSDYo", "annotations": [{"segment": [5.376384984667055, 13.75021676036799], "label": "Playing harmonica"}, {"segment": [66.34834760148948, 83.09601115289135], "label": "Playing harmonica"}, {"segment": [116.32965601270443, 164.74087096597546], "label": "Playing harmonica"}]}, "7gaC3b5nJqs": {"duration": 213.51, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7gaC3b5nJqs", "annotations": []}, "vuizEjJ1XCM": {"duration": 201.71, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=vuizEjJ1XCM", "annotations": []}, "lR4vyq1WbyU": {"duration": 75.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lR4vyq1WbyU", "annotations": [{"segment": [18.16944170283818, 68.22332203432137], "label": "Tai chi"}]}, "BD7txKlwoj8": {"duration": 20.88, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=BD7txKlwoj8", "annotations": [{"segment": [8.59739599375975, 16.76154825], "label": "Hammer throw"}]}, "PUGP8PSlJEA": {"duration": 176.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PUGP8PSlJEA", "annotations": [{"segment": [91.10721392, 149.27413200156005], "label": "Rock climbing"}, {"segment": [25.93672556698186, 90.50261687202182], "label": "Rock climbing"}]}, "aZfNeGBMU1Y": {"duration": 19.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aZfNeGBMU1Y", "annotations": [{"segment": [0.19999999999999998, 19.721], "label": "Hitting a pinata"}]}, "qEU4vKowVo4": {"duration": 90.558, "subset": "validation", "resolution": "698x480", "url": "https://www.youtube.com/watch?v=qEU4vKowVo4", "annotations": [{"segment": [0.01, 89.28636279056605], "label": "Changing car wheel"}]}, "ccfffP3pXrc": {"duration": 70.125, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ccfffP3pXrc", "annotations": [{"segment": [0.010000000000000002, 70.12428571428572], "label": "Using the monkey bar"}]}, "Xfu6WQuFEks": {"duration": 7.01, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=Xfu6WQuFEks", "annotations": [{"segment": [0.01, 7.012425], "label": "Chopping wood"}]}, "DJ5tenTs0A0": {"duration": 80.689, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=DJ5tenTs0A0", "annotations": [{"segment": [0.7552826684495951, 18.63030582175668], "label": "Skiing"}, {"segment": [31.72187165054602, 55.890916717628706], "label": "Skiing"}, {"segment": [60.548493110764426, 76.28354849297972], "label": "Skiing"}]}, "tBGeBbO8gh0": {"duration": 166.766, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tBGeBbO8gh0", "annotations": [{"segment": [19.244733229329174, 152.13741809672388], "label": "Ice fishing"}]}, "CdjU2OZri4c": {"duration": 104.885, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=CdjU2OZri4c", "annotations": [{"segment": [0.01, 93.26705831513262], "label": "Installing carpet"}]}, "b0LY869JBrE": {"duration": 169.46, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b0LY869JBrE", "annotations": [{"segment": [42.57407644305771, 50.50713416536661], "label": "Doing nails"}, {"segment": [57.38245085803431, 145.17495631825273], "label": "Doing nails"}, {"segment": [152.57914352574102, 163.1565538221529], "label": "Doing nails"}]}, "4TgVsc3GVdw": {"duration": 190.99, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=4TgVsc3GVdw", "annotations": []}, "v5KAaaZD6uk": {"duration": 226.256, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=v5KAaaZD6uk", "annotations": [{"segment": [41.3644824690893, 123.4592379208027], "label": "Running a marathon"}, {"segment": [154.4649824690893, 170.33525193954182], "label": "Running a marathon"}]}, "E1Xsc4kfFRE": {"duration": 138.669, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=E1Xsc4kfFRE", "annotations": [{"segment": [23.136204368174727, 28.32563338533541], "label": "Playing ten pins"}, {"segment": [52.97542121684867, 59.89465990639626], "label": "Playing ten pins"}, {"segment": [82.59841185647426, 90.61185491419658], "label": "Playing ten pins"}, {"segment": [113.31888299531981, 118.07654602184087], "label": "Playing ten pins"}, {"segment": [128.02438689547583, 134.07959438377534], "label": "Playing ten pins"}]}, "B0sXYJeZ8Xk": {"duration": 50.783, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=B0sXYJeZ8Xk", "annotations": [{"segment": [0.20000000000000004, 23.133160187207487], "label": "Cutting the grass"}, {"segment": [38.26478209048362, 50.58204], "label": "Cutting the grass"}]}, "nEmuDmbOp1E": {"duration": 152.068, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nEmuDmbOp1E", "annotations": [{"segment": [0.2, 139.75036378504672], "label": "Plastering"}]}, "1y9s_l_DIEk": {"duration": 20.78, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1y9s_l_DIEk", "annotations": [{"segment": [0.5835779157566303, 20.74943700468019], "label": "Doing karate"}]}, "JFVZdzzJVr8": {"duration": 62.764, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JFVZdzzJVr8", "annotations": [{"segment": [8.980162495305276, 36.70153744681873], "label": "Welding"}]}, "oFc4uYTxEqs": {"duration": 214.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oFc4uYTxEqs", "annotations": [{"segment": [11.384008308892357, 193.19331747737908], "label": "Tango"}]}, "NkPkbeEnIXU": {"duration": 168.14, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NkPkbeEnIXU", "annotations": []}, "HGSZ9_CVuM4": {"duration": 148.469, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HGSZ9_CVuM4", "annotations": [{"segment": [40.070098910078855, 136.42363051170048], "label": "Assembling bicycle"}]}, "MqQ9mgUb664": {"duration": 114.45, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=MqQ9mgUb664", "annotations": [{"segment": [10.177403928810213, 82.13343509828394], "label": "Preparing salad"}]}, "34eCEU2Y4T0": {"duration": 154.62, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=34eCEU2Y4T0", "annotations": [{"segment": [31.358535101404055, 35.459266614664585], "label": "Washing face"}, {"segment": [44.38438814352575, 47.279022152886114], "label": "Washing face"}, {"segment": [65.61170421216849, 70.19487472698908], "label": "Washing face"}, {"segment": [78.1551182527301, 85.39170327613104], "label": "Washing face"}]}, "cyhWzLsM29E": {"duration": 171.526, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cyhWzLsM29E", "annotations": [{"segment": [33.46961704819479, 42.553533062112336], "label": "Baking cookies"}, {"segment": [62.59158364622449, 149.6903101851653], "label": "Baking cookies"}]}, "JY9oOSfjj_E": {"duration": 55.821, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JY9oOSfjj_E", "annotations": []}, "4NSWcmO_u4I": {"duration": 73.422, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4NSWcmO_u4I", "annotations": [{"segment": [6.394405426439627, 7.993007218633953], "label": "Painting fence"}, {"segment": [42.02038730898428, 63.14476767445861], "label": "Painting fence"}]}, "aNQRlsp5yv8": {"duration": 213.438, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=aNQRlsp5yv8", "annotations": [{"segment": [16.98179247986246, 186.13376463221795], "label": "Building sandcastles"}]}, "-E9YQ_Uhu50": {"duration": 62.09, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-E9YQ_Uhu50", "annotations": [{"segment": [3.342355694227769, 57.788845553822156], "label": "Playing violin"}]}, "d1doBRc_hAM": {"duration": 191.286, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=d1doBRc_hAM", "annotations": [{"segment": [20.28765678627145, 35.8017472698908], "label": "Elliptical trainer"}, {"segment": [97.26141341653667, 107.10689391575664], "label": "Elliptical trainer"}, {"segment": [148.87559906396257, 177.5355616224649], "label": "Elliptical trainer"}]}, "mKm75VWThAI": {"duration": 212.51, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mKm75VWThAI", "annotations": [{"segment": [3.3142121684867396, 81.86104056162246], "label": "Sumo"}, {"segment": [145.82533541341652, 167.7149235569423], "label": "Sumo"}]}, "Erje-PwY9hE": {"duration": 58.44, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Erje-PwY9hE", "annotations": []}, "ZoMYVoG76O0": {"duration": 175.845, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZoMYVoG76O0", "annotations": []}, "MXDeLfF5rok": {"duration": 167.74, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MXDeLfF5rok", "annotations": [{"segment": [21.719970112324493, 151.77810439937596], "label": "Windsurfing"}]}, "uy7WAwrrNiA": {"duration": 164.096, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uy7WAwrrNiA", "annotations": [{"segment": [10.495946915427638, 154.36721920436815], "label": "Kite flying"}]}, "NVeTGvwk8y0": {"duration": 87.61, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=NVeTGvwk8y0", "annotations": [{"segment": [9.493815580343213, 85.71754354914196], "label": "Mixing drinks"}]}, "wMDyshGO2og": {"duration": 173.731, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=wMDyshGO2og", "annotations": [{"segment": [0.01, 116.80772386895475], "label": "Futsal"}]}, "FuVi170R3cs": {"duration": 129.103, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FuVi170R3cs", "annotations": []}, "oPuZJt4J6jY": {"duration": 68.59, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oPuZJt4J6jY", "annotations": []}, "j05b3qqgRxw": {"duration": 56.75, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=j05b3qqgRxw", "annotations": [{"segment": [0.01, 24.85644461778471], "label": "Getting a tattoo"}, {"segment": [30.51769890795632, 44.31700624024961], "label": "Getting a tattoo"}]}, "Znb6Y9m_PsQ": {"duration": 50.574, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=Znb6Y9m_PsQ", "annotations": []}, "9uZoTZsTwv0": {"duration": 11.401, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9uZoTZsTwv0", "annotations": [{"segment": [1.5118325429017159, 11.400996], "label": "Calf roping"}]}, "OGJsBzZX04o": {"duration": 199.762, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OGJsBzZX04o", "annotations": []}, "HkzMA1jrm00": {"duration": 72.957, "subset": "validation", "resolution": "480x320", "url": "https://www.youtube.com/watch?v=HkzMA1jrm00", "annotations": [{"segment": [13.999533042546192, 68.85949179463778], "label": "Playing blackjack"}]}, "oIz8QNVb4P8": {"duration": 153.972, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=oIz8QNVb4P8", "annotations": []}, "yGO1YnMQNRI": {"duration": 102.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yGO1YnMQNRI", "annotations": [{"segment": [15.293110764430576, 21.378327613104524], "label": "Discus throw"}, {"segment": [49.7226271450858, 58.85045241809672], "label": "Discus throw"}, {"segment": [88.63598751950077, 93.27996879875195], "label": "Discus throw"}]}, "RfbEuBH1Pow": {"duration": 85.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RfbEuBH1Pow", "annotations": [{"segment": [7.423995860000001, 68.37721138845554], "label": "Playing water polo"}]}, "lwXIgNoLGhM": {"duration": 204.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lwXIgNoLGhM", "annotations": [{"segment": [0, 204.9857596371882], "label": "Paintball"}, {"segment": [15.455869, 122.37399515000001], "label": "Paintball"}]}, "Re-SsHmajds": {"duration": 204.99, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Re-SsHmajds", "annotations": [{"segment": [4.477068073199119, 190.59518368761962], "label": "Playing racquetball"}]}, "vBCnsp-NEAg": {"duration": 121.42, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vBCnsp-NEAg", "annotations": [{"segment": [10.319290171606864, 121.17], "label": "Preparing pasta"}]}, "_aEHpGmhHe8": {"duration": 162.22, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_aEHpGmhHe8", "annotations": [{"segment": [91.86255998811382, 153.10426664685636], "label": "Painting"}]}, "Mpph0kFsyZ0": {"duration": 120.44, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Mpph0kFsyZ0", "annotations": [{"segment": [20.377518720748828, 41.7879992199688], "label": "Shot put"}, {"segment": [68.8328166926677, 97.9435577223089], "label": "Shot put"}]}, "FFUh8Fx4FsA": {"duration": 59.258, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FFUh8Fx4FsA", "annotations": [{"segment": [24.577272659175517, 32.69977176197926], "label": "Hand car wash"}, {"segment": [53.0060195189886, 56.97496794422225], "label": "Hand car wash"}]}, "JgAlMwG3fWw": {"duration": 122.9, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JgAlMwG3fWw", "annotations": [{"segment": [3.115219103590943, 121.80680223736366], "label": "Sailing"}]}, "1WR4jgFCqRw": {"duration": 81.294, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1WR4jgFCqRw", "annotations": [{"segment": [0.01, 81.293082], "label": "Painting fence"}]}, "pA8QJ2ZoeBM": {"duration": 44.188, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=pA8QJ2ZoeBM", "annotations": [{"segment": [0.2, 10.20243511237048], "label": "Elliptical trainer"}, {"segment": [12.753043400936038, 20.818481659906396], "label": "Elliptical trainer"}]}, "1fqn2LtVdKA": {"duration": 95.992, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1fqn2LtVdKA", "annotations": []}, "Fok7z0mLNbU": {"duration": 112.66, "subset": "validation", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=Fok7z0mLNbU", "annotations": [{"segment": [0.01, 112.48745830105314], "label": "Ballet"}]}, "OZX6nSa3HQE": {"duration": 36.525, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=OZX6nSa3HQE", "annotations": []}, "wy3CaE40RXY": {"duration": 218.77, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wy3CaE40RXY", "annotations": []}, "mrT7FqQ0lCs": {"duration": 136.63, "subset": "training", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=mrT7FqQ0lCs", "annotations": [{"segment": [29.369124960000004, 110.14241616], "label": "Breakdancing"}]}, "h-NNy3iaytg": {"duration": 219.684, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=h-NNy3iaytg", "annotations": []}, "w83HV_L4AiU": {"duration": 62.555, "subset": "training", "resolution": "1916x1080", "url": "https://www.youtube.com/watch?v=w83HV_L4AiU", "annotations": [{"segment": [8.418557217835946, 19.331501566745604], "label": "Rock-paper-scissors"}, {"segment": [24.30078872562411, 36.18810310568641], "label": "Rock-paper-scissors"}, {"segment": [39.01377619602909, 55.480629722508844], "label": "Rock-paper-scissors"}]}, "fTQYGNJpSUk": {"duration": 182.207, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fTQYGNJpSUk", "annotations": [{"segment": [24.15286271450858, 155.14662402496097], "label": "Curling"}]}, "rse-MW1helQ": {"duration": 129.41, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rse-MW1helQ", "annotations": [{"segment": [11.910894780031201, 62.17890834321373], "label": "Playing water polo"}, {"segment": [67.42777722932918, 122.13714138845555], "label": "Playing water polo"}]}, "d1JA5V3hO3Q": {"duration": 120.001, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=d1JA5V3hO3Q", "annotations": [{"segment": [31.102991677113042, 86.61734530328127], "label": "Hand car wash"}]}, "JzadrhtPWzs": {"duration": 92.86, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JzadrhtPWzs", "annotations": []}, "rA_RAZx2_aA": {"duration": 128.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rA_RAZx2_aA", "annotations": [{"segment": [0, 128.26702947845806], "label": "Triple jump"}]}, "ss9mNDN7jYc": {"duration": 140.644, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ss9mNDN7jYc", "annotations": []}, "K6UXN8usLJM": {"duration": 20.457, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=K6UXN8usLJM", "annotations": []}, "8vDSV9ruYlc": {"duration": 30.419, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=8vDSV9ruYlc", "annotations": []}, "PpC4kPd5KfA": {"duration": 175.45, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=PpC4kPd5KfA", "annotations": [{"segment": [1.475747889823115, 11.860637513377627], "label": "Playing ten pins"}, {"segment": [20.605807722686688, 24.97839282734122], "label": "Playing ten pins"}, {"segment": [37.54957500322299, 48.75432433390023], "label": "Playing ten pins"}, {"segment": [60.50564680265928, 72.53025584045925], "label": "Playing ten pins"}, {"segment": [90.02059625907735, 96.85275975407386], "label": "Playing ten pins"}, {"segment": [115.436246308591, 121.17526421513304], "label": "Playing ten pins"}, {"segment": [129.92043435843522, 135.6594522649773], "label": "Playing ten pins"}, {"segment": [147.4107746450396, 154.5162253864726], "label": "Playing ten pins"}, {"segment": [164.90111493164395, 169.27370000329503], "label": "Playing ten pins"}]}, "dea_92hDJnU": {"duration": 214.251, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dea_92hDJnU", "annotations": [{"segment": [56.18081110095681, 180.2856896056297], "label": "Clipping cat claws"}]}, "-_gDSRlC1kg": {"duration": 65.225, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-_gDSRlC1kg", "annotations": [{"segment": [0.30526452028081125, 51.99672328783151], "label": "Curling"}, {"segment": [63.08800085803431, 65.0248525], "label": "Curling"}]}, "aCzKnKvZesg": {"duration": 199.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aCzKnKvZesg", "annotations": []}, "ouEWLFNLLos": {"duration": 69.24, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ouEWLFNLLos", "annotations": [{"segment": [10.910194268330734, 19.011823675507024], "label": "Cleaning windows"}]}, "cKHdMUfKsQ8": {"duration": 237.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cKHdMUfKsQ8", "annotations": [{"segment": [23.1239288, 217.6030062402496], "label": "Painting"}]}, "eDuV8t5R-aI": {"duration": 222.169, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=eDuV8t5R-aI", "annotations": []}, "79FMLEeVp7Q": {"duration": 135.094, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=79FMLEeVp7Q", "annotations": [{"segment": [66.5787192027042, 100.87812241921512], "label": "Canoeing"}]}, "qRI4UJ2HR2g": {"duration": 162.84, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=qRI4UJ2HR2g", "annotations": [{"segment": [25.912386238689546, 145.82068334321374], "label": "Getting a tattoo"}]}, "jwD-G9U01mk": {"duration": 46.3, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jwD-G9U01mk", "annotations": []}, "uHmoFLB-PLc": {"duration": 118.53999999999999, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=uHmoFLB-PLc", "annotations": [{"segment": [0, 118.15166599999999], "label": "Playing guitarra"}]}, "jdfe-CV_cpE": {"duration": 226.32999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jdfe-CV_cpE", "annotations": [{"segment": [0, 5.472746169710734], "label": "Getting a haircut"}, {"segment": [87.74047891439467, 133.64093066035565], "label": "Getting a haircut"}]}, "hLLYYzwnEjA": {"duration": 83.43, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hLLYYzwnEjA", "annotations": []}, "hfk93bEIjwc": {"duration": 86.054, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hfk93bEIjwc", "annotations": [{"segment": [13.819193447737911, 85.801], "label": "Rafting"}]}, "5pqVrMgiMcs": {"duration": 190.776, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5pqVrMgiMcs", "annotations": [{"segment": [7.210777181405516, 100.78883792906906], "label": "Longboarding"}, {"segment": [104.94786285118745, 155.4666096651428], "label": "Longboarding"}]}, "73iABxDLM7I": {"duration": 14.072, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=73iABxDLM7I", "annotations": []}, "9VWoQpg9wqE": {"duration": 128.012, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9VWoQpg9wqE", "annotations": [{"segment": [12.357604361370717, 56.40648442367601], "label": "Grooming dog"}]}, "LcXB-fSLTKY": {"duration": 151.23, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LcXB-fSLTKY", "annotations": [{"segment": [7.8491758800000015, 141.043104524181], "label": "Starting a campfire"}]}, "FQVs9_IbgOY": {"duration": 117.819, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FQVs9_IbgOY", "annotations": [{"segment": [14.84177331734923, 20.338727433365776], "label": "Bullfighting"}, {"segment": [30.416476646062776, 36.46312617368098], "label": "Bullfighting"}, {"segment": [41.96008028969753, 51.30490228692565], "label": "Bullfighting"}, {"segment": [0.2, 117.61804988662131], "label": "Bullfighting"}, {"segment": [75.49150039739847, 80.80522141561315], "label": "Bullfighting"}]}, "B-60jGEds7M": {"duration": 107.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=B-60jGEds7M", "annotations": [{"segment": [0, 106.908849475039], "label": "Sailing"}]}, "cmM0li5O1t0": {"duration": 26.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cmM0li5O1t0", "annotations": [{"segment": [0.01, 25.198502340093604], "label": "Waterskiing"}]}, "QilmFv8ZIx0": {"duration": 137.973, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QilmFv8ZIx0", "annotations": []}, "UTL1NSue01M": {"duration": 155.57, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UTL1NSue01M", "annotations": []}, "mYHezmI0U6U": {"duration": 136.023, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mYHezmI0U6U", "annotations": [{"segment": [12.518032761310453, 123.69513728549143], "label": "Skiing"}]}, "AFb77tjPuwQ": {"duration": 238.841, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AFb77tjPuwQ", "annotations": [{"segment": [32.40865366614665, 40.60394539781591], "label": "Croquet"}, {"segment": [57.36704212168487, 71.15003276131046], "label": "Croquet"}, {"segment": [119.94926989079562, 131.49718096723868], "label": "Croquet"}]}, "7Nj1Y-fyq_c": {"duration": 98.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7Nj1Y-fyq_c", "annotations": [{"segment": [44.33193302730109, 73.37434322542902], "label": "Washing dishes"}]}, "NcEWimHnC54": {"duration": 148.376, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=NcEWimHnC54", "annotations": [{"segment": [0.01, 126.81882683307333], "label": "Snowboarding"}]}, "mpLYUgMhacA": {"duration": 61.324, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mpLYUgMhacA", "annotations": [{"segment": [0.2, 61.037782930437885], "label": "Rollerblading"}]}, "p28XWij4U1c": {"duration": 220.637, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=p28XWij4U1c", "annotations": [{"segment": [75.7009360374415, 158.97196567862713], "label": "Using the monkey bar"}]}, "Pv6oIFroaCQ": {"duration": 173.709, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Pv6oIFroaCQ", "annotations": [{"segment": [33.332516582295945, 73.98192631357254], "label": "Baking cookies"}]}, "leIVsOMWwGk": {"duration": 76.069, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=leIVsOMWwGk", "annotations": []}, "QIkDYInr17A": {"duration": 145.566, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=QIkDYInr17A", "annotations": [{"segment": [0.2, 101.96425454841322], "label": "Making a lemonade"}]}, "yEBiJguscMY": {"duration": 74.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yEBiJguscMY", "annotations": [{"segment": [3.7561583463338533, 24.328348673946955], "label": "Playing squash"}, {"segment": [40.04642667706708, 52.87515210608424], "label": "Playing squash"}]}, "g9lNpSAtHgI": {"duration": 159.87, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g9lNpSAtHgI", "annotations": []}, "m3AbqJNPn-o": {"duration": 135.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m3AbqJNPn-o", "annotations": [{"segment": [0.01, 135.7206575963719], "label": "Getting a piercing"}]}, "uE6_kwnfDCQ": {"duration": 190.57, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uE6_kwnfDCQ", "annotations": [{"segment": [10.40532767550702, 185.80942277691108], "label": "Tango"}]}, "87JvCGMC514": {"duration": 113.662, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=87JvCGMC514", "annotations": [{"segment": [7.092751193040919, 105.85931149235569], "label": "Kite flying"}]}, "LtdO87DWEHM": {"duration": 15.059, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LtdO87DWEHM", "annotations": [{"segment": [2.188039001560062, 11.31663182527301], "label": "Rollerblading"}]}, "VXLyTLY1PAw": {"duration": 6.32, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=VXLyTLY1PAw", "annotations": [{"segment": [1.67063325, 6.287656050000001], "label": "High jump"}]}, "QPKJDlQSO6c": {"duration": 136.627, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QPKJDlQSO6c", "annotations": [{"segment": [33.14725842449473, 39.521731821940804], "label": "Beer pong"}, {"segment": [49.93337170443605, 50.995783937343724], "label": "Beer pong"}, {"segment": [58.64515201427901, 59.70756424718669], "label": "Beer pong"}, {"segment": [62.68231849932819, 64.16969562539894], "label": "Beer pong"}, {"segment": [70.11920412968193, 72.03154614891577], "label": "Beer pong"}, {"segment": [84.35552805064482, 85.630422008164], "label": "Beer pong"}, {"segment": [92.4298602414455, 97.74192136119666], "label": "Beer pong"}, {"segment": [105.60377181842838, 112.82817494128999], "label": "Beer pong"}]}, "OxZDfLtnTvc": {"duration": 35.295, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OxZDfLtnTvc", "annotations": []}, "4MBGT228QiQ": {"duration": 150.094, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4MBGT228QiQ", "annotations": [{"segment": [78.39635456226623, 136.36384365883944], "label": "Making a lemonade"}]}, "C8fyaj592GA": {"duration": 110.92, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C8fyaj592GA", "annotations": [{"segment": [0, 110.92165532879818], "label": "Doing step aerobics"}]}, "9qVcdqGeAzE": {"duration": 155.179, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9qVcdqGeAzE", "annotations": [{"segment": [35.829072205065074, 75.28946929577863], "label": "Blow-drying hair"}, {"segment": [82.06794241565582, 130.96978420905546], "label": "Blow-drying hair"}]}, "injaml6fr7s": {"duration": 224.31, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=injaml6fr7s", "annotations": [{"segment": [18.88834627769111, 187.48432601560063], "label": "Playing polo"}]}, "yhNC43Jco4M": {"duration": 158.43, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=yhNC43Jco4M", "annotations": []}, "OD7lx6blG9M": {"duration": 28.422, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OD7lx6blG9M", "annotations": [{"segment": [0.01, 28.421224489795918], "label": "Making an omelette"}]}, "8wxFQrFEo3M": {"duration": 145.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8wxFQrFEo3M", "annotations": [{"segment": [23.255711659412125, 139.67897137280778], "label": "Smoking hookah"}]}, "_86X1xtj67w": {"duration": 209.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_86X1xtj67w", "annotations": [{"segment": [4.733575127925117, 202.56437012948518], "label": "Dodgeball"}]}, "SaBcTNvRA6g": {"duration": 142.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SaBcTNvRA6g", "annotations": [{"segment": [0.01, 103.86963001560062], "label": "Washing face"}]}, "lUZCcRbMSNE": {"duration": 16.35, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lUZCcRbMSNE", "annotations": []}, "-01K1HxqPB8": {"duration": 215.714, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-01K1HxqPB8", "annotations": [{"segment": [88.46852262090484, 215.601], "label": "Sumo"}]}, "w30TFlJiRKA": {"duration": 233.69, "subset": "training", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=w30TFlJiRKA", "annotations": [{"segment": [16.406115444617782, 91.14508580343214], "label": "Playing piano"}, {"segment": [98.80127301092044, 145.46755694227767], "label": "Playing piano"}, {"segment": [155.31122620904836, 228.22729485179406], "label": "Playing piano"}]}, "hW6aZXhKl9M": {"duration": 196.93, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=hW6aZXhKl9M", "annotations": [{"segment": [0.30722064586583464, 195.39233077067084], "label": "Horseback riding"}]}, "MXbwIfqSq_Q": {"duration": 227.02, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=MXbwIfqSq_Q", "annotations": [{"segment": [34.708434737389496, 41.791788765428166], "label": "Tai chi"}, {"segment": [58.791838432720986, 70.4793725789848], "label": "Tai chi"}, {"segment": [78.27106145865834, 108.37531586583462], "label": "Tai chi"}, {"segment": [119.35451453198127, 176.37551405616222], "label": "Tai chi"}, {"segment": [179.20885564742588, 189.47971891575662], "label": "Tai chi"}]}, "Npj77L31bhw": {"duration": 61.98, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=Npj77L31bhw", "annotations": []}, "GyBIC-DBoss": {"duration": 78.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GyBIC-DBoss", "annotations": [{"segment": [9.623975788963532, 72.24072965639715], "label": "Clean and jerk"}]}, "WhBnR7yIvJc": {"duration": 35.086, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WhBnR7yIvJc", "annotations": [{"segment": [0.2, 34.8853514739229], "label": "Swinging at the playground"}]}, "3boxQwSpv-8": {"duration": 30.09, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3boxQwSpv-8", "annotations": [{"segment": [3.0985055110764432, 21.642591524336975], "label": "Washing hands"}, {"segment": [27.13539674851794, 29.8930611], "label": "Washing hands"}]}, "UJiQEkByWcM": {"duration": 15.302, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=UJiQEkByWcM", "annotations": [{"segment": [3.1033595943837753, 10.193342667706707], "label": "Throwing darts"}]}, "8aYk3so14fE": {"duration": 198.763, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8aYk3so14fE", "annotations": [{"segment": [60.747020747092655, 176.51895136167104], "label": "Playing congas"}]}, "u-YaDbGZoPk": {"duration": 68.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u-YaDbGZoPk", "annotations": [{"segment": [15.669293379999997, 48.39079797191887], "label": "Cleaning windows"}]}, "cWU5059m_1Q": {"duration": 95.109, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=cWU5059m_1Q", "annotations": [{"segment": [5.48990858954086, 88.87716699687986], "label": "Rope skipping"}]}, "dMryzJswHY8": {"duration": 143.593, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dMryzJswHY8", "annotations": [{"segment": [0.2, 129.8991887675507], "label": "Assembling bicycle"}]}, "KEMMmoIdT3g": {"duration": 160.752, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KEMMmoIdT3g", "annotations": [{"segment": [46.39479612324492, 144.20004200468017], "label": "Arm wrestling"}]}, "_Xydl5mYkQ8": {"duration": 56.98, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_Xydl5mYkQ8", "annotations": [{"segment": [31.61291107644306, 48.39617004680187], "label": "Drinking beer"}]}, "ZUM89wyBcYY": {"duration": 199.51, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZUM89wyBcYY", "annotations": [{"segment": [49.45529445, 173.82272135], "label": "Tango"}]}, "EwYgRPVDQWQ": {"duration": 122.23, "subset": "validation", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=EwYgRPVDQWQ", "annotations": [{"segment": [5.91099444, 117.01441497659907], "label": "Making a sandwich"}]}, "5UlxCwq-LOs": {"duration": 123.46, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5UlxCwq-LOs", "annotations": [{"segment": [1.3642394695787832, 118.45452396], "label": "Long jump"}]}, "4At1Vd-0lWE": {"duration": 162.89, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4At1Vd-0lWE", "annotations": [{"segment": [0.01, 143.82932399375974], "label": "Playing racquetball"}]}, "FkbpD1zWdPw": {"duration": 183.21, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FkbpD1zWdPw", "annotations": []}, "mBHsAuDJmj4": {"duration": 176.426, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=mBHsAuDJmj4", "annotations": [{"segment": [20.359928237129484, 80.6143104524181], "label": "Welding"}, {"segment": [94.64615288611543, 151.06578627145086], "label": "Welding"}]}, "y-X0DjEHD_k": {"duration": 71.332, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y-X0DjEHD_k", "annotations": [{"segment": [60.23806305531212, 62.789583293259234], "label": "Having an ice cream"}]}, "zm8gZpwZkRE": {"duration": 60.883, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zm8gZpwZkRE", "annotations": [{"segment": [21.27570908550628, 34.19310327613104], "label": "Playing drums"}, {"segment": [46.3506511076443, 60.31283494539782], "label": "Playing drums"}]}, "dSF2i1OQtMc": {"duration": 227.928, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dSF2i1OQtMc", "annotations": [{"segment": [16.712281617795323, 22.401568977044796], "label": "Rope skipping"}, {"segment": [37.33594829507466, 60.804258651978735], "label": "Rope skipping"}, {"segment": [68.27144831099366, 107.74087936578687], "label": "Rope skipping"}, {"segment": [122.31967822386363, 156.81098283931357], "label": "Rope skipping"}, {"segment": [161.78910927865687, 169.96745985757798], "label": "Rope skipping"}, {"segment": [176.7234885966867, 191.65786637597503], "label": "Rope skipping"}]}, "GdEKSyad_rk": {"duration": 72.261, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GdEKSyad_rk", "annotations": [{"segment": [4.171034936037442, 42.95038677379095], "label": "Waterskiing"}, {"segment": [45.43046160062403, 53.65980079875195], "label": "Waterskiing"}, {"segment": [57.60537438689547, 61.438217301092045], "label": "Waterskiing"}]}, "eUxFTEeNIGg": {"duration": 84.8, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eUxFTEeNIGg", "annotations": [{"segment": [7.672920527301093, 76.86149700624026], "label": "Rope skipping"}]}, "jDL2tRtoxN4": {"duration": 48.072, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jDL2tRtoxN4", "annotations": [{"segment": [0.2, 47.861], "label": "Riding bumper cars"}]}, "hrwcr7BxS5I": {"duration": 15.09, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hrwcr7BxS5I", "annotations": [{"segment": [0.38768564742589706, 7.060578003120125], "label": "Tennis serve with ball bouncing"}]}, "F4wq7O-V76I": {"duration": 144.29, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F4wq7O-V76I", "annotations": []}, "aIvWwX--xKw": {"duration": 83.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aIvWwX--xKw", "annotations": [{"segment": [4.9399742, 24.671441899999998], "label": "Preparing salad"}, {"segment": [29.8237982, 67.16374875], "label": "Preparing salad"}]}, "OGzYaA3k7vo": {"duration": 152.765, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=OGzYaA3k7vo", "annotations": []}, "iUOCQ9sE2_A": {"duration": 65.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iUOCQ9sE2_A", "annotations": [{"segment": [1.644885616224649, 62.19723736349454], "label": "Doing kickboxing"}]}, "W1JPhjmZgc8": {"duration": 80.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W1JPhjmZgc8", "annotations": []}, "y9kk0ptXevk": {"duration": 94.85, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y9kk0ptXevk", "annotations": [{"segment": [51.15619043999999, 73.91653666146645], "label": "Cleaning shoes"}]}, "Tv8r1w-rLME": {"duration": 5.32, "subset": "validation", "resolution": "360x360", "url": "https://www.youtube.com/watch?v=Tv8r1w-rLME", "annotations": [{"segment": [0.10784053666146647, 4.877710427457098], "label": "Smoking hookah"}]}, "AjizhJc_reI": {"duration": 98.616, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=AjizhJc_reI", "annotations": []}, "xCVZBvwhoWE": {"duration": 16.44, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xCVZBvwhoWE", "annotations": [{"segment": [0.76941, 15.849846000000001], "label": "Playing racquetball"}]}, "xsBFnpdLWkU": {"duration": 38.453, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xsBFnpdLWkU", "annotations": [{"segment": [0.2, 38.25224489795918], "label": "Playing blackjack"}]}, "ALjodjboELk": {"duration": 200.203, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ALjodjboELk", "annotations": [{"segment": [8.745192779139737, 167.09564692667706], "label": "Scuba diving"}]}, "MF3VcmCCgzY": {"duration": 206.773, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MF3VcmCCgzY", "annotations": [{"segment": [9.032216526756308, 206.5732426303855], "label": "Capoeira"}]}, "8xvoAyY70I8": {"duration": 34.58, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8xvoAyY70I8", "annotations": [{"segment": [8.36552504, 32.228190830733226], "label": "Washing dishes"}]}, "uFMlVrTUoR8": {"duration": 96.781, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uFMlVrTUoR8", "annotations": [{"segment": [20.533829516380656, 75.19005219968798], "label": "Spread mulch"}]}, "Nl_DJpK-WzY": {"duration": 150.419, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Nl_DJpK-WzY", "annotations": []}, "-eJT11AuOa0": {"duration": 69.03, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-eJT11AuOa0", "annotations": []}, "bvL1xS5OOE8": {"duration": 6.6, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bvL1xS5OOE8", "annotations": []}, "L3X8OmIbj2c": {"duration": 222.633, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L3X8OmIbj2c", "annotations": [{"segment": [11.458928237129484, 73.26769266770671], "label": "Arm wrestling"}, {"segment": [81.60145865834633, 213.91921372854912], "label": "Arm wrestling"}]}, "5kIl4TspdR0": {"duration": 141.74, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=5kIl4TspdR0", "annotations": [{"segment": [3.757530421216849, 137.05868954758188], "label": "Brushing teeth"}]}, "d4sX1jnL4BY": {"duration": 115.961, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d4sX1jnL4BY", "annotations": []}, "vjCKf3el7pM": {"duration": 218.268, "subset": "training", "resolution": "730x480", "url": "https://www.youtube.com/watch?v=vjCKf3el7pM", "annotations": [{"segment": [17.47500550443907, 127.6287334296727], "label": "Wakeboarding"}, {"segment": [132.04848177235186, 149.3874945013238], "label": "Wakeboarding"}, {"segment": [155.84712669447023, 165.70656530506213], "label": "Wakeboarding"}, {"segment": [183.38555867577864, 198.34470691253875], "label": "Wakeboarding"}]}, "8kC5L_Iu3KY": {"duration": 6.316, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8kC5L_Iu3KY", "annotations": [{"segment": [0.01, 6.187737492667707], "label": "Playing congas"}]}, "dFGS7YCDk3Y": {"duration": 86.63, "subset": "testing", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=dFGS7YCDk3Y", "annotations": []}, "Vbkq1ldn3A8": {"duration": 72.35, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Vbkq1ldn3A8", "annotations": [{"segment": [0.7895585023400936, 70.40297035881436], "label": "Smoking hookah"}]}, "2KzuD0SpFOc": {"duration": 163.283, "subset": "testing", "resolution": "350x240", "url": "https://www.youtube.com/watch?v=2KzuD0SpFOc", "annotations": []}, "j5Fq7Y8EJk8": {"duration": 197.881, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j5Fq7Y8EJk8", "annotations": [{"segment": [53.62045794392523, 61.01638317757009], "label": "Waterskiing"}, {"segment": [80.43068691588785, 177.21195482866042], "label": "Waterskiing"}]}, "vrY1ZMqjMog": {"duration": 51.851, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=vrY1ZMqjMog", "annotations": [{"segment": [0.2, 51.65015873015873], "label": "Hanging wallpaper"}]}, "PtPIxxX-5lY": {"duration": 47.6, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PtPIxxX-5lY", "annotations": []}, "f0CViWbj3V4": {"duration": 27.632, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=f0CViWbj3V4", "annotations": [{"segment": [0.01, 26.213630882951936], "label": "Playing congas"}]}, "X52XHYKeLiE": {"duration": 192.87, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=X52XHYKeLiE", "annotations": []}, "M_6oQOAMe0s": {"duration": 161.12, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M_6oQOAMe0s", "annotations": []}, "x1wy8QmHZL8": {"duration": 120.117, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x1wy8QmHZL8", "annotations": [{"segment": [0.01, 119.367288958225], "label": "Baton twirling"}]}, "18k_K9cFFJg": {"duration": 192.318, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=18k_K9cFFJg", "annotations": [{"segment": [0.20000000000000004, 184.2201154446178], "label": "Rollerblading"}]}, "Ip1EWQCXvRM": {"duration": 130.287, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ip1EWQCXvRM", "annotations": [{"segment": [11.179085920436815, 21.341891302652105], "label": "Rafting"}, {"segment": [23.78096459438377, 88.61966293291731], "label": "Rafting"}, {"segment": [94.31083394695787, 118.70156686427455], "label": "Rafting"}]}, "_WMRdq7yFpA": {"duration": 10.542, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_WMRdq7yFpA", "annotations": [{"segment": [0.20000000000000004, 10.461], "label": "Scuba diving"}]}, "_D0od3oWcD8": {"duration": 18.182, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=_D0od3oWcD8", "annotations": []}, "NXFnVeZen1U": {"duration": 52.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NXFnVeZen1U", "annotations": [{"segment": [2.3948385335413414, 47.20673244929797], "label": "Playing badminton"}]}, "V4S9ppnrXzc": {"duration": 145.752, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V4S9ppnrXzc", "annotations": [{"segment": [24.3297786848674, 41.61074298439938], "label": "Swimming"}, {"segment": [61.62028059438378, 63.89409168642746], "label": "Swimming"}, {"segment": [92.31673033697349, 95.50006586583464], "label": "Swimming"}]}, "qNE6ju5dRc0": {"duration": 120.0, "subset": "validation", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=qNE6ju5dRc0", "annotations": [{"segment": [35.56963737909517, 39.12660111700468], "label": "Kayaking"}, {"segment": [49.048657859594385, 59.53234045553822], "label": "Kayaking"}]}, "duZnMXDWkGw": {"duration": 231.945, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=duZnMXDWkGw", "annotations": [{"segment": [63.30604524180967, 70.54102184087364], "label": "Running a marathon"}, {"segment": [85.74186739469579, 91.53034789391576], "label": "Running a marathon"}, {"segment": [121.91987051482059, 139.2853120124805], "label": "Running a marathon"}, {"segment": [146.15913260530422, 152.30939313572543], "label": "Running a marathon"}, {"segment": [157.73609360374417, 175.46331513260532], "label": "Running a marathon"}, {"segment": [180.89001560062403, 189.57273634945398], "label": "Running a marathon"}]}, "aNE5ZWD5E34": {"duration": 140.737, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=aNE5ZWD5E34", "annotations": [{"segment": [32.72284255663493, 36.230284860659346], "label": "Applying sunscreen"}, {"segment": [49.60240864475242, 52.013775228769205], "label": "Applying sunscreen"}, {"segment": [61.44002642083481, 75.03136534892943], "label": "Applying sunscreen"}, {"segment": [89.71877907331229, 96.95287875088238], "label": "Applying sunscreen"}, {"segment": [103.09090271972973, 110.32500239729983], "label": "Applying sunscreen"}, {"segment": [116.90145664963627, 135.09631341443378], "label": "Applying sunscreen"}]}, "3V9tzjyr51I": {"duration": 8.92, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=3V9tzjyr51I", "annotations": []}, "l_0HvMt2tGg": {"duration": 209.31, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=l_0HvMt2tGg", "annotations": []}, "EeGunBjmpFw": {"duration": 70.798, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=EeGunBjmpFw", "annotations": [{"segment": [47.45529266369099, 61.76895305087835], "label": "Roof shingle removal"}]}, "_LmtW0w6ad4": {"duration": 143.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_LmtW0w6ad4", "annotations": [{"segment": [30.266178205928238, 36.09521993447738], "label": "Tai chi"}, {"segment": [46.63233382839314, 50.21943643057723], "label": "Tai chi"}, {"segment": [55.37589642121685, 56.27267207176288], "label": "Tai chi"}, {"segment": [70.17269465522621, 73.3114094321373], "label": "Tai chi"}, {"segment": [74.88076682059283, 76.89851203432139], "label": "Tai chi"}, {"segment": [83.62432941341653, 86.3146563650546], "label": "Tai chi"}, {"segment": [90.57434070514822, 91.91950418096725], "label": "Tai chi"}, {"segment": [93.26466765678629, 95.05821895787832], "label": "Tai chi"}, {"segment": [96.85177025897038, 109.85501719188768], "label": "Tai chi"}]}, "E88Sr9H3Wi8": {"duration": 169.854, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E88Sr9H3Wi8", "annotations": [{"segment": [31.532834113364537, 139.38042532293292], "label": "Making a lemonade"}]}, "JKE9D1anR7I": {"duration": 50.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JKE9D1anR7I", "annotations": [{"segment": [4.147312012480499, 46.559446177847114], "label": "Using parallel bars"}]}, "nIymjHWIz7Y": {"duration": 15.23, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=nIymjHWIz7Y", "annotations": [{"segment": [5.132877416536662, 13.521329861154447], "label": "Plataform diving"}]}, "SGiMk9KdOQw": {"duration": 136.63, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=SGiMk9KdOQw", "annotations": [{"segment": [0.01, 0.02], "label": "Playing water polo"}]}, "r2vO_RnVJYQ": {"duration": 208.7, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=r2vO_RnVJYQ", "annotations": []}, "vWz90CichbM": {"duration": 78.089, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vWz90CichbM", "annotations": [{"segment": [11.073464898595946, 52.21686271450858], "label": "Fixing the roof"}]}, "y9FLYBhbdM0": {"duration": 120.117, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y9FLYBhbdM0", "annotations": []}, "qp3OfC0dCOs": {"duration": 138.42000000000002, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qp3OfC0dCOs", "annotations": [{"segment": [13.810421216848674, 116.52542901716068], "label": "Playing lacrosse"}]}, "mGA8SbtKhc8": {"duration": 72.1, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mGA8SbtKhc8", "annotations": []}, "qr5vqi5tTL8": {"duration": 41.73, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qr5vqi5tTL8", "annotations": [{"segment": [1.3670068595943836, 41.01020578783151], "label": "Bathing dog"}]}, "-QuFk_ThRNg": {"duration": 135.373, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-QuFk_ThRNg", "annotations": [{"segment": [0.01, 127.76922041341653], "label": "Arm wrestling"}]}, "t0y6dkIwEvc": {"duration": 34.06, "subset": "validation", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=t0y6dkIwEvc", "annotations": [{"segment": [23.966795744149763, 34.063672], "label": "Triple jump"}]}, "vzUeFzhVYLg": {"duration": 8.85, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vzUeFzhVYLg", "annotations": []}, "Kd_qpMbvRtI": {"duration": 152.811, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Kd_qpMbvRtI", "annotations": [{"segment": [117.63781654066283, 120.96494590870803], "label": "Wakeboarding"}, {"segment": [129.9957279957998, 134.27346687915906], "label": "Wakeboarding"}, {"segment": [143.77955328662406, 146.1560748884903], "label": "Wakeboarding"}]}, "YWu9gYCBAas": {"duration": 216.109, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=YWu9gYCBAas", "annotations": [{"segment": [17.868533930472864, 192.84530788143525], "label": "Cleaning sink"}]}, "GWJw2jR2mTY": {"duration": 4.598, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GWJw2jR2mTY", "annotations": [{"segment": [0.19999999999999998, 4.39755], "label": "Mooping floor"}]}, "eCd8x8KqxKQ": {"duration": 56.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eCd8x8KqxKQ", "annotations": [{"segment": [28.18529251170047, 30.02061388455538], "label": "Playing blackjack"}, {"segment": [31.418953978159127, 35.87616302652106], "label": "Playing blackjack"}]}, "2Xfigl8xrXc": {"duration": 57.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2Xfigl8xrXc", "annotations": [{"segment": [3.644262849453978, 7.9633891895475815], "label": "Shaving legs"}, {"segment": [30.402076319999995, 41.16667292901716], "label": "Shaving legs"}]}, "ACnOpQNrhqs": {"duration": 189.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ACnOpQNrhqs", "annotations": [{"segment": [1.7718165585023402, 189.289069], "label": "Volleyball"}]}, "6g80a1NnftU": {"duration": 128.43, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6g80a1NnftU", "annotations": [{"segment": [3.0552371243041683, 73.07108470997395], "label": "Washing hands"}]}, "GBNj2k2OVGo": {"duration": 226.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GBNj2k2OVGo", "annotations": [{"segment": [4.599470923054609, 225.0202687301092], "label": "Playing saxophone"}]}, "fK8JkYW4cM8": {"duration": 148.38, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fK8JkYW4cM8", "annotations": []}, "x39KxoQHEp4": {"duration": 238.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x39KxoQHEp4", "annotations": [{"segment": [0.01, 238.241], "label": "River tubing"}]}, "FqlXey9KFZA": {"duration": 49.552, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FqlXey9KFZA", "annotations": [{"segment": [0, 49.501], "label": "Fixing the roof"}]}, "qlvW6jW2mfQ": {"duration": 123.206, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qlvW6jW2mfQ", "annotations": [{"segment": [9.41817299358641, 40.36359854394176], "label": "Playing drums"}, {"segment": [41.1324289923978, 44.39995839833593], "label": "Playing drums"}, {"segment": [45.937619295248, 48.43631825273011], "label": "Playing drums"}, {"segment": [51.70384765866825, 65.73500334299086], "label": "Playing drums"}, {"segment": [69.77136299063962, 79.18953595631825], "label": "Playing drums"}, {"segment": [80.34278162558502, 81.49602729485179], "label": "Playing drums"}, {"segment": [81.88044251794072, 83.61031102184087], "label": "Playing drums"}, {"segment": [83.9947262449298, 92.06744592979719], "label": "Playing drums"}, {"segment": [113.40249081123244, 118.39988871138846], "label": "Playing drums"}]}, "yE5euaQmcXw": {"duration": 100.705, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yE5euaQmcXw", "annotations": [{"segment": [11.149658346333853, 36.589723868954756], "label": "Curling"}]}, "FkKgmF1qeIA": {"duration": 182.068, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FkKgmF1qeIA", "annotations": []}, "bvnXdr-Hre4": {"duration": 89.14, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bvnXdr-Hre4", "annotations": [{"segment": [4.865273010920437, 86.04582839313572], "label": "Springboard diving"}]}, "gEqT50dX-iY": {"duration": 76.86, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=gEqT50dX-iY", "annotations": [{"segment": [54.25626633385335, 57.49365681123245], "label": "Cleaning windows"}]}, "JAkb4JGhs94": {"duration": 187.04, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JAkb4JGhs94", "annotations": []}, "9wpV8Gknh1E": {"duration": 126.619, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=9wpV8Gknh1E", "annotations": [{"segment": [5.827183220662159, 106.37078607150285], "label": "Camel ride"}]}, "kWdIYqh6kEo": {"duration": 193.167, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=kWdIYqh6kEo", "annotations": [{"segment": [7.522071757051729, 61.380105537542114], "label": "Playing ice hockey"}, {"segment": [68.60129442431177, 75.52160044079938], "label": "Playing ice hockey"}, {"segment": [78.22954627333799, 93.27368978744146], "label": "Playing ice hockey"}, {"segment": [103.80459024731388, 124.26462542649459], "label": "Playing ice hockey"}, {"segment": [127.57433699959734, 137.80435458918768], "label": "Playing ice hockey"}, {"segment": [155.85732680611184, 170.60058592990654], "label": "Playing ice hockey"}, {"segment": [156.45909115264797, 192.966801], "label": "Playing ice hockey"}]}, "bp1blshSKJU": {"duration": 118.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bp1blshSKJU", "annotations": [{"segment": [40.591263650546026, 98.39469578783152], "label": "Playing piano"}]}, "u9IwWaXtQlw": {"duration": 144.03, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u9IwWaXtQlw", "annotations": []}, "UyThFVEvNsI": {"duration": 61.12, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UyThFVEvNsI", "annotations": [{"segment": [13.054753510140406, 24.108413416536663], "label": "Playing badminton"}]}, "ffyPN2-EDg4": {"duration": 108.16, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ffyPN2-EDg4", "annotations": [{"segment": [6.997918595163807, 91.20315192], "label": "Shoveling snow"}]}, "Q_v6MdHJ7Go": {"duration": 37.872, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q_v6MdHJ7Go", "annotations": [{"segment": [10.804316912321287, 32.30691037026521], "label": "Kneeling"}]}, "HwM3ionUBsg": {"duration": 27.56, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HwM3ionUBsg", "annotations": [{"segment": [6.09526159, 22.144264859594383], "label": "Using uneven bars"}]}, "FT_34R0dZnA": {"duration": 152.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FT_34R0dZnA", "annotations": [{"segment": [68.44820503630595, 111.79397139144614], "label": "Canoeing"}]}, "SaiY5Fnk2kw": {"duration": 63.577, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SaiY5Fnk2kw", "annotations": []}, "hGKKovntCUQ": {"duration": 66.71, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hGKKovntCUQ", "annotations": []}, "X5_KlfVvi6E": {"duration": 146.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X5_KlfVvi6E", "annotations": [{"segment": [0.010000000000000002, 136.7054297971919], "label": "Playing flauta"}]}, "DAxBgyhuhyU": {"duration": 31.626, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=DAxBgyhuhyU", "annotations": []}, "9a6C5rvS56A": {"duration": 78.74, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=9a6C5rvS56A", "annotations": [{"segment": [19.2435672, 61.00384026], "label": "Hopscotch"}]}, "swmNnPkPBek": {"duration": 169.135, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=swmNnPkPBek", "annotations": [{"segment": [5.404836193447737, 9.623244929797192], "label": "Kite flying"}, {"segment": [24.12402496099844, 30.18798751950078], "label": "Kite flying"}, {"segment": [143.55772230889235, 155.4219968798752], "label": "Kite flying"}]}, "gkwLhml3mnA": {"duration": 236.17000000000002, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gkwLhml3mnA", "annotations": []}, "SWbUJh4XuMQ": {"duration": 73.724, "subset": "training", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=SWbUJh4XuMQ", "annotations": [{"segment": [0.2, 72.60605304212169], "label": "Welding"}]}, "ZTtPztql5jM": {"duration": 125.53, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ZTtPztql5jM", "annotations": [{"segment": [18.408026452418095, 21.149647413416538], "label": "Discus throw"}, {"segment": [41.51597455226209, 45.23674585647426], "label": "Discus throw"}, {"segment": [58.55319052418097, 76.96121697659908], "label": "Discus throw"}, {"segment": [83.81526937909517, 87.73187075195008], "label": "Discus throw"}]}, "NnMMEFglHBQ": {"duration": 158.9, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NnMMEFglHBQ", "annotations": [{"segment": [20.21216848673947, 129.53674456], "label": "Vacuuming floor"}]}, "inw7wXFL9dw": {"duration": 204.87, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=inw7wXFL9dw", "annotations": [{"segment": [46.02369002515203, 178.02218866926677], "label": "Beer pong"}]}, "WPYQ27GjFKc": {"duration": 194.514, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WPYQ27GjFKc", "annotations": []}, "NxnEKnqWLCI": {"duration": 183.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NxnEKnqWLCI", "annotations": [{"segment": [28.70586689590032, 167.52229785517952], "label": "Windsurfing"}]}, "arRoR5Hmegg": {"duration": 199.46, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=arRoR5Hmegg", "annotations": []}, "fnPX_0Rs4eE": {"duration": 102.05, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fnPX_0Rs4eE", "annotations": [{"segment": [3.5404513516368383, 95.89564929801652], "label": "Using the balance beam"}]}, "oumu91uHB9A": {"duration": 152.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oumu91uHB9A", "annotations": []}, "dBzWXTH5j00": {"duration": 142.571, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=dBzWXTH5j00", "annotations": [{"segment": [0.01, 142.570497], "label": "Rafting"}]}, "2G8vCitCz_0": {"duration": 141.18, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2G8vCitCz_0", "annotations": []}, "HytB88Fhqw8": {"duration": 96.5, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HytB88Fhqw8", "annotations": [{"segment": [0.3010986942879076, 6.774720514820592], "label": "Wrapping presents"}, {"segment": [56.30545494539781, 85.81312652106084], "label": "Wrapping presents"}]}, "4fEY6zIq8bQ": {"duration": 219.661, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4fEY6zIq8bQ", "annotations": [{"segment": [15.760633385335415, 219.621], "label": "Plastering"}]}, "9-yA1F4il_A": {"duration": 16.835, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=9-yA1F4il_A", "annotations": [{"segment": [0.01, 14.756572542901715], "label": "Blowing leaves"}]}, "mjbzWcSeiwQ": {"duration": 88.747, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mjbzWcSeiwQ", "annotations": [{"segment": [20.75686427457098, 28.09095631825273], "label": "Playing ice hockey"}, {"segment": [31.96557098283931, 53.137572542901715], "label": "Playing ice hockey"}, {"segment": [57.1505663026521, 67.11386115444617], "label": "Playing ice hockey"}]}, "hg8WbkmC2nU": {"duration": 214.901, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hg8WbkmC2nU", "annotations": [{"segment": [33.22659137723755, 37.24299250466672], "label": "Making a cake"}]}, "JkxO6IhuhOs": {"duration": 166.73, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JkxO6IhuhOs", "annotations": [{"segment": [98.87643527509809, 136.6056011700468], "label": "Bungee jumping"}]}, "6hjRnngC73o": {"duration": 119.165, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6hjRnngC73o", "annotations": [{"segment": [29.92465054602184, 100.18252574102965], "label": "Waxing skis"}]}, "Ny8NDMWfGJk": {"duration": 196.418, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ny8NDMWfGJk", "annotations": [{"segment": [43.833691001456515, 55.45963183261764], "label": "Changing car wheel"}, {"segment": [86.97205040129121, 93.70285825091081], "label": "Changing car wheel"}, {"segment": [101.04555772322311, 104.71690745937926], "label": "Changing car wheel"}]}, "pBaeRTgaNBM": {"duration": 123.647, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pBaeRTgaNBM", "annotations": [{"segment": [0.01, 123.541], "label": "Using the rowing machine"}]}, "zCND0HJq6Iw": {"duration": 75.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zCND0HJq6Iw", "annotations": [{"segment": [7.314973962870514, 66.54266637191887], "label": "Playing bagpipes"}]}, "M4db4WfqDoE": {"duration": 163.074, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M4db4WfqDoE", "annotations": [{"segment": [36.88875576443058, 125.42176959906398], "label": "Snowboarding"}, {"segment": [128.2202269329173, 143.48453966302654], "label": "Snowboarding"}]}, "xpKAvKrrBDs": {"duration": 153.856, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xpKAvKrrBDs", "annotations": [{"segment": [82.79000435419113, 106.47847485281048], "label": "Fun sliding down"}]}, "EZsumzBA68I": {"duration": 127.733, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=EZsumzBA68I", "annotations": [{"segment": [0.2, 102.30558154113409], "label": "Painting fence"}]}, "fFoYCI-Si6s": {"duration": 81.34, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=fFoYCI-Si6s", "annotations": [{"segment": [30.074051704925342, 71.44173305148206], "label": "Doing kickboxing"}]}, "PiDMuIRtgZo": {"duration": 49.947, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PiDMuIRtgZo", "annotations": [{"segment": [0.5454230624024962, 48.85432287519501], "label": "Playing ice hockey"}]}, "COuOkc3U-sQ": {"duration": 180.98, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=COuOkc3U-sQ", "annotations": []}, "Ogx1hUoOSrE": {"duration": 176.588, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Ogx1hUoOSrE", "annotations": [{"segment": [19.829878315132607, 147.63915132605305], "label": "Gargling mouthwash"}]}, "Prt07JFztMI": {"duration": 9.683, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Prt07JFztMI", "annotations": [{"segment": [0.2, 9.441], "label": "Powerbocking"}]}, "ZY0cdXr_1MA": {"duration": 60.12, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZY0cdXr_1MA", "annotations": []}, "FGKdmKX1z64": {"duration": 108.09, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=FGKdmKX1z64", "annotations": [{"segment": [26.338883839999998, 63.436847199999995], "label": "Cleaning windows"}]}, "5R3h6lxne90": {"duration": 197.649, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=5R3h6lxne90", "annotations": [{"segment": [36.75009299301433, 62.60442944161246], "label": "Putting in contact lenses"}, {"segment": [75.53159766591152, 85.68865841357508], "label": "Putting in contact lenses"}, {"segment": [99.53919579675264, 117.69878925469658], "label": "Putting in contact lenses"}, {"segment": [136.16617243226668, 141.10508832016467], "label": "Putting in contact lenses"}, {"segment": [144.49111791518024, 149.41625187156654], "label": "Putting in contact lenses"}, {"segment": [161.113445017984, 168.50114595256343], "label": "Putting in contact lenses"}]}, "jEGTpBJrD4s": {"duration": 188.105, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jEGTpBJrD4s", "annotations": [{"segment": [7.042928247742155, 16.726954588387617], "label": "Playing congas"}, {"segment": [29.05207902193639, 33.74736452043116], "label": "Playing congas"}, {"segment": [52.23505117075431, 179.30121497376902], "label": "Playing congas"}]}, "GhVERbQMZWQ": {"duration": 142.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GhVERbQMZWQ", "annotations": [{"segment": [32.641655502370725, 37.970905933491856], "label": "Wakeboarding"}, {"segment": [45.964781580173536, 46.18683331470876], "label": "Wakeboarding"}, {"segment": [52.40429210164499, 55.06891729604624], "label": "Wakeboarding"}, {"segment": [58.177646689514354, 63.72894917785028], "label": "Wakeboarding"}, {"segment": [66.1715222727181, 68.16999116851902], "label": "Wakeboarding"}, {"segment": [71.0566684624537, 77.27412724938993], "label": "Wakeboarding"}, {"segment": [79.71670034425775, 82.60337763819243], "label": "Wakeboarding"}, {"segment": [105.03063969106957, 117.2435051654086], "label": "Wakeboarding"}]}, "FofBEedm80o": {"duration": 205.99, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FofBEedm80o", "annotations": [{"segment": [9.31069466996367, 201.3036389063963], "label": "Tai chi"}]}, "3j2d27w3x5Q": {"duration": 194.44, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3j2d27w3x5Q", "annotations": [{"segment": [49.44509742901716, 188.68006503588143], "label": "Preparing pasta"}]}, "HrKO4BfXVbk": {"duration": 180.42, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HrKO4BfXVbk", "annotations": [{"segment": [49.917926900000005, 150.0846264], "label": "Playing saxophone"}]}, "4KzG9JBE9V8": {"duration": 25.542, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=4KzG9JBE9V8", "annotations": [{"segment": [0.2383271028037383, 23.17561214953271], "label": "Snow tubing"}]}, "jfhKC2WFDTo": {"duration": 179.31, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jfhKC2WFDTo", "annotations": [{"segment": [26.294178742589704, 176.78639324804993], "label": "Cheerleading"}]}, "Snw8Jf1DQwg": {"duration": 101.286, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Snw8Jf1DQwg", "annotations": [{"segment": [9.32268489703588, 101.08544217687074], "label": "Baton twirling"}]}, "3TsNntqwbSQ": {"duration": 43.422, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3TsNntqwbSQ", "annotations": [{"segment": [5.073439937597503, 25.10819188767551], "label": "Camel ride"}]}, "IdhpB7doBOE": {"duration": 166.91, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=IdhpB7doBOE", "annotations": [{"segment": [26.3976948, 84.4372395], "label": "Ballet"}, {"segment": [107.80393950000001, 140.77936035000002], "label": "Ballet"}]}, "DrEsyzI2u4c": {"duration": 37.501, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DrEsyzI2u4c", "annotations": [{"segment": [0.009999999999999998, 37.461], "label": "Kneeling"}]}, "LgoMRWkBDkQ": {"duration": 20.016, "subset": "training", "resolution": "240x176", "url": "https://www.youtube.com/watch?v=LgoMRWkBDkQ", "annotations": [{"segment": [3.091332762870515, 7.775170282371295], "label": "Removing ice from car"}, {"segment": [10.179540209048364, 19.234959413416536], "label": "Removing ice from car"}]}, "pJnMBfgK9Gw": {"duration": 154.227, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pJnMBfgK9Gw", "annotations": []}, "xbA294b3Z5c": {"duration": 159.869, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xbA294b3Z5c", "annotations": [{"segment": [52.60417628705148, 79.52953666146645], "label": "Putting in contact lenses"}]}, "FmDGejzydo8": {"duration": 203.965, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FmDGejzydo8", "annotations": [{"segment": [21.921373259584207, 192.5268418691589], "label": "Longboarding"}]}, "4oi936sw1Mo": {"duration": 159.498, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4oi936sw1Mo", "annotations": []}, "fkmYC91-biI": {"duration": 43.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fkmYC91-biI", "annotations": [{"segment": [3.324002809672387, 19.30810112], "label": "Smoking hookah"}, {"segment": [22.00571164, 42.93789196411856], "label": "Smoking hookah"}]}, "2qcdjyT7nDY": {"duration": 225.75, "subset": "training", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=2qcdjyT7nDY", "annotations": [{"segment": [7.788168, 201.20485149687988], "label": "Cheerleading"}]}, "bu7zk-YxEoY": {"duration": 128.85, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bu7zk-YxEoY", "annotations": [{"segment": [5.426157566302652, 94.85727301092044], "label": "Using the balance beam"}]}, "-4VuHlphgL4": {"duration": 157.525, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-4VuHlphgL4", "annotations": [{"segment": [56.90902180685358, 157.281], "label": "Painting furniture"}]}, "l4YcjFl6SDc": {"duration": 172.85, "subset": "training", "resolution": "1280x716", "url": "https://www.youtube.com/watch?v=l4YcjFl6SDc", "annotations": [{"segment": [0.01, 172.84918367346938], "label": "Playing pool"}]}, "lKDTjsH9XtU": {"duration": 232.641, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lKDTjsH9XtU", "annotations": [{"segment": [147.2546995007503, 159.91788052225166], "label": "Fun sliding down"}]}, "RotRDzNBDRM": {"duration": 222.099, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RotRDzNBDRM", "annotations": []}, "Wzpyu5NEqZ4": {"duration": 202.32, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Wzpyu5NEqZ4", "annotations": [{"segment": [36.44484477379095, 39.91578237129485], "label": "Playing squash"}, {"segment": [53.799532761310445, 56.639390795631826], "label": "Playing squash"}]}, "_pYDg0B6_tw": {"duration": 19.85, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_pYDg0B6_tw", "annotations": [{"segment": [3.784886895475819, 19.29519890795632], "label": "Springboard diving"}]}, "PtoVL69LEh4": {"duration": 148.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PtoVL69LEh4", "annotations": [{"segment": [35.7683615099706, 57.0673211146133], "label": "Getting a tattoo"}]}, "u_HDCcby_B0": {"duration": 117.517, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u_HDCcby_B0", "annotations": [{"segment": [7.0431941869032695, 13.984901807432042], "label": "Disc dog"}, {"segment": [35.17537770167777, 66.96109154304635], "label": "Disc dog"}]}, "3MJQEQ98168": {"duration": 24.103, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3MJQEQ98168", "annotations": [{"segment": [0.2, 15.008424336973478], "label": "Using the monkey bar"}]}, "OaFYMXKxTbk": {"duration": 144.823, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OaFYMXKxTbk", "annotations": [{"segment": [7.455778916870961, 124.94077904524183], "label": "Skiing"}]}, "86lVBoc5Ihs": {"duration": 119.745, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=86lVBoc5Ihs", "annotations": []}, "fMyTTjKV-1U": {"duration": 186.665, "subset": "testing", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=fMyTTjKV-1U", "annotations": []}, "Ocid5pcnSVY": {"duration": 234.03, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ocid5pcnSVY", "annotations": []}, "4o8MaHTb7E4": {"duration": 188.45, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4o8MaHTb7E4", "annotations": [{"segment": [6.900421216848674, 169.63150828], "label": "Triple jump"}]}, "zxpQHLwZCug": {"duration": 34.81, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zxpQHLwZCug", "annotations": []}, "4YGuIycW_IA": {"duration": 100.357, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4YGuIycW_IA", "annotations": [{"segment": [56.5190956236889, 100.35662131519274], "label": "Applying sunscreen"}]}, "gHYcZYkrxxQ": {"duration": 137.764, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gHYcZYkrxxQ", "annotations": [{"segment": [3.0088859173414555, 137.54907050703798], "label": "Removing ice from car"}]}, "-s1MQAiOMeY": {"duration": 125.9, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-s1MQAiOMeY", "annotations": [{"segment": [31.823237459999994, 71.64603384], "label": "Sailing"}, {"segment": [31.823237459999998, 71.64603384], "label": "Sailing"}]}, "tTkavaWq0QM": {"duration": 181.98, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tTkavaWq0QM", "annotations": [{"segment": [24.48642144, 169.19964369731252], "label": "Tango"}]}, "yvzlX5St_Ok": {"duration": 30.303, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=yvzlX5St_Ok", "annotations": [{"segment": [0.01, 30.302039], "label": "Riding bumper cars"}]}, "5TjIJOFGupI": {"duration": 67.62, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5TjIJOFGupI", "annotations": [{"segment": [0, 67.581], "label": "Making a sandwich"}]}, "ybAEMliC7p4": {"duration": 18.414, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ybAEMliC7p4", "annotations": [{"segment": [1.0772283346333853, 16.503138086583466], "label": "Playing rubik cube"}]}, "osLHgm_yuMc": {"duration": 132.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=osLHgm_yuMc", "annotations": [{"segment": [8.077940186841674, 128.81696533793203], "label": "Paintball"}]}, "aTl4KzTuJoU": {"duration": 41.402, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aTl4KzTuJoU", "annotations": [{"segment": [15.824163198127925, 41.401178], "label": "Waterskiing"}]}, "3zPoB-_JxEc": {"duration": 42.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3zPoB-_JxEc", "annotations": [{"segment": [3.1470826333853354, 37.96168426521061], "label": "Smoking a cigarette"}]}, "iOpoR0UeJNg": {"duration": 77.78999999999999, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=iOpoR0UeJNg", "annotations": [{"segment": [69.25134321372855, 77.741], "label": "Using uneven bars"}]}, "WCCkmuFrSQ0": {"duration": 185.85, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WCCkmuFrSQ0", "annotations": [{"segment": [3.479298714805735, 122.35533813733502], "label": "Hopscotch"}]}, "62BPME-ikJU": {"duration": 168.902, "subset": "validation", "resolution": "854x270", "url": "https://www.youtube.com/watch?v=62BPME-ikJU", "annotations": [{"segment": [72.08587902035165, 156.53685304517134], "label": "Cutting the grass"}]}, "j1XZ3FA8EYY": {"duration": 199.32, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=j1XZ3FA8EYY", "annotations": [{"segment": [0.01, 199.40266666666668], "label": "Futsal"}]}, "c7pbA5pQPYE": {"duration": 157.339, "subset": "testing", "resolution": "704x480", "url": "https://www.youtube.com/watch?v=c7pbA5pQPYE", "annotations": []}, "ufP69gKRisM": {"duration": 90.58, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ufP69gKRisM", "annotations": []}, "Geh_BZchxFY": {"duration": 58.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Geh_BZchxFY", "annotations": [{"segment": [0, 58.049885], "label": "Peeling potatoes"}]}, "NpsOCOnQS6c": {"duration": 49.111, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NpsOCOnQS6c", "annotations": [{"segment": [6.9917019145424035, 45.239523565632744], "label": "Painting fence"}]}, "8FSKFy1tPQc": {"duration": 123.02, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8FSKFy1tPQc", "annotations": [{"segment": [15.7372347628599, 31.4744695257198], "label": "Playing polo"}, {"segment": [37.23199443896123, 58.34291912084647], "label": "Playing polo"}, {"segment": [63.716609039871805, 71.20139142708565], "label": "Playing polo"}, {"segment": [76.57508094383775, 87.13054322932918], "label": "Playing polo"}, {"segment": [92.31231562402496, 106.32229283931358], "label": "Playing polo"}]}, "iPiP-7UwAa4": {"duration": 227.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iPiP-7UwAa4", "annotations": [{"segment": [11.161860374414976, 221.2883112324493], "label": "Cheerleading"}]}, "NcTZ3wgdNOQ": {"duration": 212.23, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=NcTZ3wgdNOQ", "annotations": [{"segment": [29.791965678627143, 206.5576287051482], "label": "Grooming horse"}]}, "dsCJ4xdRq-Q": {"duration": 28.863, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dsCJ4xdRq-Q", "annotations": [{"segment": [4.097470717876335, 19.406700876755067], "label": "Playing rubik cube"}]}, "4ACqWG_p1bI": {"duration": 192.285, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4ACqWG_p1bI", "annotations": [{"segment": [12.898956491592996, 15.298762350494018], "label": "Playing drums"}, {"segment": [35.39713641879008, 43.49648119258104], "label": "Playing drums"}, {"segment": [53.69565609291038, 59.695170740162936], "label": "Playing drums"}, {"segment": [98.69201594730455, 128.38961345120472], "label": "Playing drums"}, {"segment": [161.38694363806553, 167.9864097347894], "label": "Playing drums"}, {"segment": [183.58514778159127, 192.08444400000002], "label": "Playing drums"}]}, "sYhUC2uQR7I": {"duration": 230.41, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sYhUC2uQR7I", "annotations": [{"segment": [28.037603288611546, 217.1116972605304], "label": "Doing kickboxing"}]}, "0ysVELHeEyc": {"duration": 165.443, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0ysVELHeEyc", "annotations": [{"segment": [13.417865834633387, 155.83482683307332], "label": "Painting fence"}]}, "URYKdg-MG6g": {"duration": 200.11, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=URYKdg-MG6g", "annotations": []}, "h1d-jcsWap8": {"duration": 87.052, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=h1d-jcsWap8", "annotations": [{"segment": [30.98359282371295, 50.872624024961], "label": "Peeling potatoes"}]}, "dcEdjqyHj8M": {"duration": 62.53, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dcEdjqyHj8M", "annotations": [{"segment": [0, 52.19053283151326], "label": "Chopping wood"}]}, "peoMXSP3VgY": {"duration": 84.986, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=peoMXSP3VgY", "annotations": [{"segment": [10.208810516380655, 15.379506751950078], "label": "Longboarding"}, {"segment": [27.17930072542902, 53.297945812792506], "label": "Longboarding"}, {"segment": [69.8706901575663, 79.6817548096724], "label": "Longboarding"}]}, "ZFJkIiqOErk": {"duration": 124.18, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZFJkIiqOErk", "annotations": [{"segment": [0, 123.98031746031745], "label": "Dodgeball"}]}, "R3YoG3Hi4iI": {"duration": 169.054, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R3YoG3Hi4iI", "annotations": [{"segment": [31.956560175213365, 39.07132653035355], "label": "Longboarding"}, {"segment": [59.888605865763736, 74.1181385760441], "label": "Longboarding"}, {"segment": [102.31369413159968, 111.009519676771], "label": "Longboarding"}, {"segment": [125.50256225205656, 129.19170036212924], "label": "Longboarding"}, {"segment": [134.98891739224348, 142.10368374738368], "label": "Longboarding"}]}, "HUZ9PuMm8yM": {"duration": 207.84, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=HUZ9PuMm8yM", "annotations": [{"segment": [0.01, 187.04791731669266], "label": "Springboard diving"}]}, "1UQYk6ohuRE": {"duration": 177.076, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1UQYk6ohuRE", "annotations": [{"segment": [0.01, 165.1272979719189], "label": "Trimming branches or hedges"}]}, "2uBPhFis_4Y": {"duration": 48.739, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2uBPhFis_4Y", "annotations": [{"segment": [0.2, 48.480999999999995], "label": "Playing congas"}]}, "-uZnWuW8fB8": {"duration": 52.52, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-uZnWuW8fB8", "annotations": []}, "X5_bftRM7BQ": {"duration": 114.31, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X5_bftRM7BQ", "annotations": []}, "ih3LAZtQM50": {"duration": 52.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ih3LAZtQM50", "annotations": [{"segment": [0.01, 46.25222858268331], "label": "Doing karate"}]}, "DrigU09Wf7k": {"duration": 150.977, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DrigU09Wf7k", "annotations": [{"segment": [41.68918197544228, 54.87897966258787], "label": "Surfing"}, {"segment": [134.25329788701754, 150.7760770975057], "label": "Surfing"}]}, "3zT7x5jZEfI": {"duration": 230.57999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3zT7x5jZEfI", "annotations": [{"segment": [19.017160686427456, 225.33541341653665], "label": "Volleyball"}]}, "22IqnOqbTaE": {"duration": 15.883, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=22IqnOqbTaE", "annotations": []}, "YAiCO8en_ls": {"duration": 216.41, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YAiCO8en_ls", "annotations": [{"segment": [10.972424747330027, 158.1717228961267], "label": "Disc dog"}]}, "CcBPELJaRlk": {"duration": 220.055, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CcBPELJaRlk", "annotations": [{"segment": [18.538217691107644, 191.56158280811232], "label": "Rafting"}]}, "PUWg7fXnCf0": {"duration": 133.468, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PUWg7fXnCf0", "annotations": [{"segment": [10.378540456359033, 108.351992263513], "label": "Rope skipping"}]}, "_I1DKeGg_w8": {"duration": 135.489, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_I1DKeGg_w8", "annotations": [{"segment": [0.2, 131.6896941954352], "label": "Throwing darts"}]}, "pv6aKuS_TkU": {"duration": 122.439, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pv6aKuS_TkU", "annotations": []}, "KTZcBhW4RHY": {"duration": 18.553, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=KTZcBhW4RHY", "annotations": []}, "C1v3-D1kfwU": {"duration": 53.453, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=C1v3-D1kfwU", "annotations": []}, "QJKFphS_3UQ": {"duration": 118.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QJKFphS_3UQ", "annotations": [{"segment": [96.52708867550702, 100.95663002964119], "label": "Playing flauta"}]}, "tD30qafrkhM": {"duration": 162.07999999999998, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tD30qafrkhM", "annotations": [{"segment": [63.45366146645866, 76.34663650546021], "label": "Smoking a cigarette"}]}, "rVcgTALWCRA": {"duration": 25.751, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=rVcgTALWCRA", "annotations": [{"segment": [0.01, 25.75092970521542], "label": "Building sandcastles"}]}, "Rte9HqaQ1tw": {"duration": 80.9, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Rte9HqaQ1tw", "annotations": [{"segment": [23.85301537775797, 79.13143196748278], "label": "Washing dishes"}]}, "P_zz379qSuo": {"duration": 123.925, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P_zz379qSuo", "annotations": [{"segment": [19.33301849080766, 90.2851952402496], "label": "Slacklining"}]}, "VTmGlaJABNA": {"duration": 236.89, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VTmGlaJABNA", "annotations": []}, "ClE77VhGQsk": {"duration": 60.001, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=ClE77VhGQsk", "annotations": [{"segment": [0.2, 56.85992056074767], "label": "Elliptical trainer"}]}, "7eL_xnoyYco": {"duration": 183.654, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7eL_xnoyYco", "annotations": []}, "dRRht9vdYDE": {"duration": 101.797, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dRRht9vdYDE", "annotations": [{"segment": [6.669959063962558, 20.48630283931357], "label": "Curling"}, {"segment": [26.203410608424335, 29.538390140405614], "label": "Curling"}, {"segment": [26.203410608424335, 32.238135475819035], "label": "Curling"}, {"segment": [90.99729865834634, 101.59628], "label": "Curling"}]}, "R246xMs2aig": {"duration": 191.01, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R246xMs2aig", "annotations": [{"segment": [0.01, 186.88708268330734], "label": "Belly dance"}]}, "QP2IYVyoq5I": {"duration": 84.94, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QP2IYVyoq5I", "annotations": []}, "dhsSq0K1xEg": {"duration": 20.32, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=dhsSq0K1xEg", "annotations": []}, "tgPD2SZo_DY": {"duration": 36.943, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tgPD2SZo_DY", "annotations": []}, "IAozAC1YFr4": {"duration": 23.755, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IAozAC1YFr4", "annotations": []}, "QRdQ8KIVf40": {"duration": 195.582, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=QRdQ8KIVf40", "annotations": [{"segment": [0.01, 195.581677], "label": "Playing pool"}]}, "H8aW-6HhMBA": {"duration": 137.09, "subset": "training", "resolution": "490x360", "url": "https://www.youtube.com/watch?v=H8aW-6HhMBA", "annotations": [{"segment": [13.3964348, 88.985119], "label": "Drinking beer"}, {"segment": [0.2, 136.89013500000001], "label": "Drinking beer"}, {"segment": [89.39731112324493, 92.17761027301091], "label": "Drinking beer"}]}, "EmlhHfW5xCM": {"duration": 192.215, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EmlhHfW5xCM", "annotations": []}, "FIw076A69Oc": {"duration": 61.07, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FIw076A69Oc", "annotations": [{"segment": [3.04865823650546, 52.3988134399376], "label": "Using parallel bars"}]}, "NVTXHst6uh4": {"duration": 88.329, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NVTXHst6uh4", "annotations": []}, "Hub83eTB8mM": {"duration": 21.224, "subset": "training", "resolution": "204x360", "url": "https://www.youtube.com/watch?v=Hub83eTB8mM", "annotations": [{"segment": [13.588287051482059, 21.141], "label": "Plastering"}]}, "w3DvA9405_o": {"duration": 68.01, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=w3DvA9405_o", "annotations": [{"segment": [3.0227784711388455, 64.64503432137286], "label": "Playing harmonica"}]}, "NyNuB328oi0": {"duration": 33.46, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NyNuB328oi0", "annotations": [{"segment": [3.0330333400000002, 12.539392355694227], "label": "Playing kickball"}]}, "c8HFew22GnI": {"duration": 165.011, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c8HFew22GnI", "annotations": [{"segment": [38.101572542901714, 135.67249141965678], "label": "Wakeboarding"}]}, "JMrfe-UVA0Y": {"duration": 72.934, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=JMrfe-UVA0Y", "annotations": [{"segment": [0.01, 63.37623858346334], "label": "Clipping cat claws"}]}, "D-0MV6LRvbs": {"duration": 186.619, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D-0MV6LRvbs", "annotations": [{"segment": [143.90758558810464, 152.04333013639123], "label": "Changing car wheel"}]}, "N5LZMH9mZVU": {"duration": 191.29, "subset": "testing", "resolution": "468x270", "url": "https://www.youtube.com/watch?v=N5LZMH9mZVU", "annotations": []}, "frWLB7ZxMNI": {"duration": 76.116, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=frWLB7ZxMNI", "annotations": [{"segment": [4.747644305772231, 69.7903712948518], "label": "Playing pool"}]}, "4R0tKohd7OI": {"duration": 62.091, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4R0tKohd7OI", "annotations": []}, "NzrOOXRyDPM": {"duration": 221.263, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NzrOOXRyDPM", "annotations": [{"segment": [7.937726989079564, 210.54135725429018], "label": "Bullfighting"}]}, "an1R4BP97JY": {"duration": 141.224, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=an1R4BP97JY", "annotations": []}, "h4N22DApP_Y": {"duration": 213.067, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=h4N22DApP_Y", "annotations": [{"segment": [97.05974132917316, 113.679560049922], "label": "Playing rubik cube"}, {"segment": [121.32467666146647, 124.9810367800312], "label": "Playing rubik cube"}, {"segment": [129.96698239625584, 139.2740808798752], "label": "Playing rubik cube"}, {"segment": [145.92200836817474, 157.8882778471139], "label": "Playing rubik cube"}, {"segment": [171.18413282371296, 182.15321317940717], "label": "Playing rubik cube"}, {"segment": [186.4743660468019, 194.45187903276133], "label": "Playing rubik cube"}]}, "NYRlfaKwTag": {"duration": 170.92000000000002, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NYRlfaKwTag", "annotations": [{"segment": [0.01, 168.7888932047078], "label": "Getting a tattoo"}]}, "f0CVl853HUg": {"duration": 42.4, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f0CVl853HUg", "annotations": [{"segment": [3.9727863, 12.19098447], "label": "Hand washing clothes"}, {"segment": [15.79998735, 34.75361232], "label": "Hand washing clothes"}]}, "QZ9y5S-lnes": {"duration": 19.85, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QZ9y5S-lnes", "annotations": []}, "I4T6Bf528Zk": {"duration": 92.973, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I4T6Bf528Zk", "annotations": []}, "JDM9Akcs96g": {"duration": 14.07, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JDM9Akcs96g", "annotations": [{"segment": [0.01, 13.65420272321097], "label": "Bathing dog"}]}, "n0meVEbpXak": {"duration": 25.08, "subset": "training", "resolution": "544x360", "url": "https://www.youtube.com/watch?v=n0meVEbpXak", "annotations": [{"segment": [0, 13.966482711388455], "label": "Doing motocross"}]}, "sRNqK_QZbMs": {"duration": 54.312, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sRNqK_QZbMs", "annotations": []}, "pcLGX5qvUcI": {"duration": 31.022, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pcLGX5qvUcI", "annotations": []}, "FuaxI8PkeHQ": {"duration": 24.94, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=FuaxI8PkeHQ", "annotations": [{"segment": [10.582213042121685, 16.884854633385334], "label": "Drinking coffee"}]}, "an5XI45pIl8": {"duration": 190.334, "subset": "validation", "resolution": "636x480", "url": "https://www.youtube.com/watch?v=an5XI45pIl8", "annotations": [{"segment": [86.39145104677033, 96.47144185050864], "label": "Playing drums"}, {"segment": [102.69731852340585, 125.52553299069557], "label": "Playing drums"}, {"segment": [158.7302085794806, 176.51842764490118], "label": "Playing drums"}]}, "rfWlK9njyzg": {"duration": 45.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rfWlK9njyzg", "annotations": [{"segment": [28.882326411856475, 41.726800811232444], "label": "Hopscotch"}]}, "2fs09c8gFsk": {"duration": 101.286, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2fs09c8gFsk", "annotations": [{"segment": [0.01, 96.30794708523035], "label": "Riding bumper cars"}]}, "u0PEB9bXWpY": {"duration": 167.56, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=u0PEB9bXWpY", "annotations": [{"segment": [36.715945064742584, 142.3815368], "label": "Doing nails"}]}, "cMeMbnjX-pg": {"duration": 28.143, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cMeMbnjX-pg", "annotations": [{"segment": [0, 12.150724988955204], "label": "Curling"}]}, "43hQNmW-6_k": {"duration": 170.37, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=43hQNmW-6_k", "annotations": [{"segment": [24.701076360000002, 164.91606311778472], "label": "Breakdancing"}]}, "6LLDsbc8XMM": {"duration": 101.216, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6LLDsbc8XMM", "annotations": [{"segment": [0.2, 69.73342389521905], "label": "Capoeira"}]}, "BEA4YVQv7Wk": {"duration": 157.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BEA4YVQv7Wk", "annotations": [{"segment": [10.051907956318251, 14.464940717628703], "label": "Springboard diving"}, {"segment": [23.04583775351014, 24.762017160686426], "label": "Springboard diving"}, {"segment": [30.15572386895476, 36.039767550702024], "label": "Springboard diving"}, {"segment": [45.356170046801864, 75.51189391575663], "label": "Springboard diving"}, {"segment": [89.48649765990639, 141.46221684867393], "label": "Springboard diving"}]}, "civPiYFf2vE": {"duration": 103.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=civPiYFf2vE", "annotations": [{"segment": [19.323744149765993, 42.99533073322933], "label": "Vacuuming floor"}]}, "U40FhqwfBvs": {"duration": 101.03, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=U40FhqwfBvs", "annotations": [{"segment": [15.368683599999999, 97.24730570514822], "label": "Preparing salad"}]}, "HxJnPUqF9i8": {"duration": 121.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HxJnPUqF9i8", "annotations": [{"segment": [10.477378315132606, 14.253010140405616], "label": "Javelin throw"}, {"segment": [39.17218018720749, 55.029833853354134], "label": "Javelin throw"}, {"segment": [82.02560140405618, 87.68904914196568], "label": "Javelin throw"}, {"segment": [114.87424380655227, 115.62872464898595], "label": "Javelin throw"}]}, "2R_TVXvocQs": {"duration": 169.23, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2R_TVXvocQs", "annotations": [{"segment": [5.808084519299139, 97.94542530272639], "label": "Zumba"}, {"segment": [106.95654080999999, 160.7783396478716], "label": "Zumba"}]}, "wZeV6W1VEoM": {"duration": 88.19, "subset": "training", "resolution": "270x360", "url": "https://www.youtube.com/watch?v=wZeV6W1VEoM", "annotations": [{"segment": [0.2, 5.639003120124805], "label": "Snatch"}]}, "LmWcpkqUurg": {"duration": 187.316, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LmWcpkqUurg", "annotations": [{"segment": [0.01, 187.31489795918367], "label": "Changing car wheel"}]}, "66jfSBGjD2g": {"duration": 133.79, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=66jfSBGjD2g", "annotations": []}, "pbeQcGC3nXY": {"duration": 121.232, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pbeQcGC3nXY", "annotations": [{"segment": [29.288229329173163, 49.128642745709826], "label": "Doing fencing"}, {"segment": [82.00704212168486, 97.32857254290172], "label": "Doing fencing"}, {"segment": [104.32111076443059, 107.72288611544462], "label": "Doing fencing"}, {"segment": [113.77048673946959, 115.47137441497661], "label": "Doing fencing"}]}, "EL47wkhiOq8": {"duration": 43.52, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EL47wkhiOq8", "annotations": [{"segment": [1.8335569422776912, 41.017347893915755], "label": "Playing badminton"}]}, "uBPWqgUiQWA": {"duration": 39.567, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uBPWqgUiQWA", "annotations": [{"segment": [9.814541901716067, 18.82663698127925], "label": "Throwing darts"}, {"segment": [25.616571630265206, 26.9745585600624], "label": "Throwing darts"}]}, "P-6ITEpg0mw": {"duration": 27.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=P-6ITEpg0mw", "annotations": [{"segment": [3.8980197121094315, 26.16089665914993], "label": "Using uneven bars"}]}, "ep2Kyk8CHT8": {"duration": 152.393, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ep2Kyk8CHT8", "annotations": [{"segment": [41.47192589703589, 70.22896333853355], "label": "Plastering"}]}, "DXOKFXlx84M": {"duration": 5.27, "subset": "training", "resolution": "342x240", "url": "https://www.youtube.com/watch?v=DXOKFXlx84M", "annotations": [{"segment": [0.002217932573360075, 5.198820736311678], "label": "Bathing dog"}]}, "zfqach0dDYo": {"duration": 7.106, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=zfqach0dDYo", "annotations": [{"segment": [0.01, 6.9004227582153135], "label": "Doing a powerbomb"}]}, "j6HDZh7W6Z4": {"duration": 85.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j6HDZh7W6Z4", "annotations": [{"segment": [32.37651373810054, 42.062928804992204], "label": "Discus throw"}, {"segment": [48.03400702652106, 54.80122901092044], "label": "Discus throw"}, {"segment": [61.83383224960998, 75.766348099844], "label": "Discus throw"}, {"segment": [81.87011694851795, 83.59509510140407], "label": "Discus throw"}]}, "fuSTjQ46_Aw": {"duration": 176.31, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=fuSTjQ46_Aw", "annotations": []}, "AmW-O5MmJHk": {"duration": 70.102, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=AmW-O5MmJHk", "annotations": [{"segment": [13.779616324492977, 70.101064], "label": "Hitting a pinata"}]}, "_wo35butdOc": {"duration": 19.85, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_wo35butdOc", "annotations": [{"segment": [6.411206583463338, 8.73410751950078], "label": "Discus throw"}]}, "12IrPzohRSw": {"duration": 113.593, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=12IrPzohRSw", "annotations": [{"segment": [0.2, 105.64212988163662], "label": "Riding bumper cars"}]}, "NJuf9o45A8U": {"duration": 67.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NJuf9o45A8U", "annotations": [{"segment": [3.8962505975039003, 59.07558338377535], "label": "Snowboarding"}]}, "xJNqBSJ7rqU": {"duration": 177.87, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xJNqBSJ7rqU", "annotations": [{"segment": [20.533539926631082, 176.19996939937596], "label": "Playing field hockey"}]}, "oR8o_PuKS28": {"duration": 29.91, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=oR8o_PuKS28", "annotations": []}, "kXP1tgr476c": {"duration": 65.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kXP1tgr476c", "annotations": [{"segment": [5.214775979999999, 15.704964869999998], "label": "Polishing shoes"}, {"segment": [20.55657128, 60.63175897035882], "label": "Polishing shoes"}]}, "2GSSuYAWFc8": {"duration": 125.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2GSSuYAWFc8", "annotations": [{"segment": [5.24259376, 96.70414196567863], "label": "Getting a piercing"}]}, "stqHuZqLLuI": {"duration": 67.292, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=stqHuZqLLuI", "annotations": []}, "wz8d9DKMrTc": {"duration": 137.811, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wz8d9DKMrTc", "annotations": [{"segment": [0.01, 137.03768343782252], "label": "Drum corps"}]}, "u9oARzRT3fI": {"duration": 23.38, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=u9oARzRT3fI", "annotations": [{"segment": [0, 23.335], "label": "Using uneven bars"}]}, "slQuWp_rMTE": {"duration": 7.872, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=slQuWp_rMTE", "annotations": [{"segment": [0.2, 7.641], "label": "Canoeing"}]}, "WX1TLuHXCd8": {"duration": 89.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WX1TLuHXCd8", "annotations": [{"segment": [41.006085840000004, 53.189300429999996], "label": "Playing lacrosse"}, {"segment": [14.97174327, 28.67034555], "label": "Playing lacrosse"}]}, "nMTyJxJIZUs": {"duration": 68.71000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nMTyJxJIZUs", "annotations": [{"segment": [0.01, 68.70784580498866], "label": "Doing nails"}]}, "kyObhFkHrak": {"duration": 7.361, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=kyObhFkHrak", "annotations": [{"segment": [0.01, 6.903634596839501], "label": "Doing a powerbomb"}]}, "DFOoFr2H-24": {"duration": 66.69, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=DFOoFr2H-24", "annotations": [{"segment": [0.3118923556942278, 3.0149594383775353], "label": "Playing saxophone"}, {"segment": [15.911283931357255, 16.847241809672386], "label": "Playing saxophone"}, {"segment": [35.46240405616224, 46.58990327613105], "label": "Playing saxophone"}]}, "N-KiVTbKKyI": {"duration": 169.924, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N-KiVTbKKyI", "annotations": []}, "e07y3QI4Kbg": {"duration": 30.929, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=e07y3QI4Kbg", "annotations": [{"segment": [0.2, 30.728979591836737], "label": "Arm wrestling"}]}, "Nsl_tnIRNEo": {"duration": 156.851, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Nsl_tnIRNEo", "annotations": [{"segment": [7.427183665007309, 69.971885073107], "label": "Playing ten pins"}, {"segment": [79.98880990799796, 143.75508751547457], "label": "Playing ten pins"}]}, "RhokmoZJrco": {"duration": 26.29, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RhokmoZJrco", "annotations": [{"segment": [0.2870435397815913, 26.12096212012481], "label": "Windsurfing"}]}, "NhcOmldkGIo": {"duration": 175.195, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NhcOmldkGIo", "annotations": [{"segment": [5.991033039284922, 160.39640073535247], "label": "Hurling"}]}, "5c5PFa8GR-A": {"duration": 57.49, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=5c5PFa8GR-A", "annotations": []}, "53dqgEMV5e4": {"duration": 60.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=53dqgEMV5e4", "annotations": [{"segment": [8.432407956318253, 40.18348595943838], "label": "Mixing drinks"}]}, "qnl1q5Q7_qQ": {"duration": 194.328, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qnl1q5Q7_qQ", "annotations": []}, "sVDRluetSyg": {"duration": 125.46, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sVDRluetSyg", "annotations": [{"segment": [0.01, 5.480184235940866], "label": "Cleaning windows"}, {"segment": [19.18064482579303, 24.465108196164582], "label": "Cleaning windows"}, {"segment": [31.119617625521347, 39.14417311386333], "label": "Cleaning windows"}, {"segment": [48.93021567862714, 58.12909622620904], "label": "Cleaning windows"}, {"segment": [63.21783865678627, 72.41671920436816], "label": "Cleaning windows"}, {"segment": [73.98248610608424, 91.59736375039002], "label": "Cleaning windows"}, {"segment": [92.96740978939157, 104.71066155226208], "label": "Cleaning windows"}]}, "B8imoIn6NUE": {"duration": 124.51, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B8imoIn6NUE", "annotations": [{"segment": [11.45446, 106.2471951068519], "label": "Skateboarding"}]}, "Tu1oKxyfHTs": {"duration": 69.43, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Tu1oKxyfHTs", "annotations": [{"segment": [4.6988391599999995, 63.048127925117], "label": "Springboard diving"}]}, "-3baQGb2zIY": {"duration": 104.537, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-3baQGb2zIY", "annotations": [{"segment": [0.01, 0.02], "label": "Baton twirling"}]}, "e-OJAoH_eeM": {"duration": 88.538, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e-OJAoH_eeM", "annotations": []}, "KeP8tUI5GZg": {"duration": 114.27000000000001, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KeP8tUI5GZg", "annotations": []}, "r7gYDRQvA8w": {"duration": 102.12, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=r7gYDRQvA8w", "annotations": []}, "8gKV8f3qSOg": {"duration": 86.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8gKV8f3qSOg", "annotations": [{"segment": [48.06065777847114, 68.60054783463337], "label": "Washing face"}]}, "aTQaYDmcMDY": {"duration": 201.74, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aTQaYDmcMDY", "annotations": [{"segment": [120.90927290000002, 149.1217728], "label": "Doing motocross"}, {"segment": [48.78143822932917, 197.0140666552262], "label": "Doing motocross"}]}, "0HrPpZa_xv8": {"duration": 20.991, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=0HrPpZa_xv8", "annotations": [{"segment": [2.521520441770052, 19.680957313572545], "label": "Calf roping"}]}, "jbh_K0yyJFo": {"duration": 23.662, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=jbh_K0yyJFo", "annotations": [{"segment": [11.728296411856473, 18.219429017160685], "label": "Sumo"}]}, "P3wH4l2KKrE": {"duration": 144.336, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P3wH4l2KKrE", "annotations": []}, "bNuRrXSjJl0": {"duration": 228.76, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bNuRrXSjJl0", "annotations": [{"segment": [38.5434391416473, 41.75539240345124], "label": "Plataform diving"}, {"segment": [75.30245980451463, 78.15752937056257], "label": "Plataform diving"}, {"segment": [120.9835728612818, 124.19552612308574], "label": "Plataform diving"}, {"segment": [161.31143048170907, 164.88026743926898], "label": "Plataform diving"}, {"segment": [214.84398392511702, 219.12658825585024], "label": "Plataform diving"}]}, "1a8PCm9e1YU": {"duration": 135.326, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1a8PCm9e1YU", "annotations": [{"segment": [2.111168419525897, 134.90366032137285], "label": "Playing congas"}]}, "LZ-Cy0GqCiU": {"duration": 96.084, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LZ-Cy0GqCiU", "annotations": [{"segment": [8.36813714566957, 44.53045395004814], "label": "Bullfighting"}, {"segment": [59.02526606356439, 74.56609620975412], "label": "Bullfighting"}]}, "aBr9bdWrl0I": {"duration": 166.302, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aBr9bdWrl0I", "annotations": [{"segment": [0.2, 30.873411088824508], "label": "Raking leaves"}, {"segment": [77.57268827613105, 113.11602705148205], "label": "Raking leaves"}]}, "s__e_v6aaq0": {"duration": 30.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s__e_v6aaq0", "annotations": [{"segment": [0.01, 29.57976911076443], "label": "Vacuuming floor"}]}, "Ac_8KQGAe0c": {"duration": 306.62, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ac_8KQGAe0c", "annotations": [{"segment": [0.009999999999999998, 306.61950113378686], "label": "Clipping cat claws"}]}, "x7lP6GKepco": {"duration": 76.092, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=x7lP6GKepco", "annotations": [{"segment": [16.449079068738694, 25.206143706931968], "label": "Welding"}, {"segment": [53.96244957843146, 60.47107858309709], "label": "Welding"}]}, "4KMbeat6yoE": {"duration": 162.42000000000002, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4KMbeat6yoE", "annotations": [{"segment": [3.8008638299531987, 159.63628085803435], "label": "Playing accordion"}]}, "onFddYAkyyc": {"duration": 62.996, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=onFddYAkyyc", "annotations": [{"segment": [0.01, 62.99573696145125], "label": "Mooping floor"}]}, "zemqddZ_YO4": {"duration": 161.07999999999998, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=zemqddZ_YO4", "annotations": [{"segment": [5.5271482059282375, 157.021255850234], "label": "Rock climbing"}]}, "BbJeZoks_c4": {"duration": 216.062, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BbJeZoks_c4", "annotations": [{"segment": [92.69400159906397, 191.1181778424337], "label": "Slacklining"}]}, "3uPBTEy9oOw": {"duration": 182.42, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3uPBTEy9oOw", "annotations": []}, "mA4wEXlASpc": {"duration": 42.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mA4wEXlASpc", "annotations": [{"segment": [3.12307198, 32.3525252], "label": "Long jump"}]}, "cMoy7UJtlyA": {"duration": 98.66, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=cMoy7UJtlyA", "annotations": [{"segment": [12.621295099423024, 81.57666344749028], "label": "Sailing"}]}, "LnEed8TnWwk": {"duration": 178.771, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LnEed8TnWwk", "annotations": [{"segment": [0.01, 162.2590577223089], "label": "Tug of war"}]}, "8XB_0x_erho": {"duration": 59.119, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8XB_0x_erho", "annotations": [{"segment": [29.300152886115445, 49.58748517940718], "label": "Fun sliding down"}]}, "KNLGluuewIU": {"duration": 226.0, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=KNLGluuewIU", "annotations": [{"segment": [45.4701856474259, 99.75242277691109], "label": "Brushing hair"}, {"segment": [185.05308112324494, 221.37832761310455], "label": "Brushing hair"}]}, "qiw2I1oQIVQ": {"duration": 135.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qiw2I1oQIVQ", "annotations": [{"segment": [9.972493335048581, 126.10785501729156], "label": "Drum corps"}]}, "Vrac4j7C79Y": {"duration": 228.76, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=Vrac4j7C79Y", "annotations": []}, "Mx-rOsiQTos": {"duration": 180.025, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=Mx-rOsiQTos", "annotations": [{"segment": [28.49616458658347, 82.68098985959439], "label": "Blowing leaves"}]}, "YYddEHI8KRY": {"duration": 166.906, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YYddEHI8KRY", "annotations": []}, "f0lxilPpXeM": {"duration": 101.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f0lxilPpXeM", "annotations": [{"segment": [33.108907956318255, 35.158507020280815], "label": "Gargling mouthwash"}, {"segment": [86.87146801872076, 89.86703588143527], "label": "Gargling mouthwash"}]}, "EbeVfU-rWFE": {"duration": 183.833, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EbeVfU-rWFE", "annotations": []}, "gT_8511vwVE": {"duration": 232.55, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gT_8511vwVE", "annotations": [{"segment": [10.883674530654696, 226.01764108659583], "label": "Cumbia"}]}, "c0wxdAaqCOg": {"duration": 68.105, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c0wxdAaqCOg", "annotations": []}, "CeXtKbFnPIQ": {"duration": 9.99, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CeXtKbFnPIQ", "annotations": []}, "Oyycph_Ukfg": {"duration": 66.572, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Oyycph_Ukfg", "annotations": []}, "TH269r3O-Zo": {"duration": 77.16, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TH269r3O-Zo", "annotations": []}, "U_4goX5w_1c": {"duration": 111.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U_4goX5w_1c", "annotations": [{"segment": [8.70114360374415, 15.836081358814353], "label": "Triple jump"}]}, "nDHupsLdar0": {"duration": 122.93, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nDHupsLdar0", "annotations": []}, "qJ4ObH27qjc": {"duration": 102.238, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qJ4ObH27qjc", "annotations": [{"segment": [3.019463452566096, 95.15156920684294], "label": "Playing congas"}]}, "QrQN-Hm5xew": {"duration": 90.12, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QrQN-Hm5xew", "annotations": [{"segment": [0.01, 0.02], "label": "Paintball"}, {"segment": [31.98415756630265, 63.19507176287051], "label": "Paintball"}, {"segment": [68.39689079563182, 81.75291263650546], "label": "Paintball"}]}, "zhPqZtWuhow": {"duration": 33.298, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zhPqZtWuhow", "annotations": [{"segment": [5.592722096809174, 21.853046045984488], "label": "Table soccer"}]}, "laeOL4ipHck": {"duration": 184.86, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=laeOL4ipHck", "annotations": [{"segment": [12.400486508580343, 155.727039875195], "label": "Volleyball"}]}, "-YPp9kc3XDs": {"duration": 167.42000000000002, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=-YPp9kc3XDs", "annotations": [{"segment": [29.77430462714508, 106.8218473026521], "label": "Playing flauta"}]}, "Rzf7AoRxwTw": {"duration": 147.447, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rzf7AoRxwTw", "annotations": []}, "1RJgvoFfbkI": {"duration": 96.735, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1RJgvoFfbkI", "annotations": [{"segment": [7.696491239241405, 93.11245283552839], "label": "Capoeira"}]}, "6VT2jBflMAM": {"duration": 194.12, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6VT2jBflMAM", "annotations": [{"segment": [134.91409468623644, 179.73403912090484], "label": "Playing kickball"}]}, "iA0CccCcqts": {"duration": 85.496, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iA0CccCcqts", "annotations": []}, "xoSA8_kTiBY": {"duration": 82.55, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xoSA8_kTiBY", "annotations": [{"segment": [46.621270710000005, 76.27898045085803], "label": "Spinning"}, {"segment": [13.579941060000001, 41.13461772], "label": "Spinning"}]}, "44htu_ovvLs": {"duration": 56.96, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=44htu_ovvLs", "annotations": [{"segment": [24.03174632, 40.63929168], "label": "Horseback riding"}]}, "QdTUPBtxAV0": {"duration": 7.059, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QdTUPBtxAV0", "annotations": [{"segment": [0.2, 6.858412698412698], "label": "Fun sliding down"}]}, "LYqfB7HsQwQ": {"duration": 78.9, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LYqfB7HsQwQ", "annotations": [{"segment": [12.924567268405022, 74.71630773775351], "label": "Playing harmonica"}]}, "aoEszrK9wTM": {"duration": 75.0, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aoEszrK9wTM", "annotations": []}, "AQ3eFWxZ67U": {"duration": 72.888, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=AQ3eFWxZ67U", "annotations": []}, "W70Urhy8Yl4": {"duration": 185.249, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W70Urhy8Yl4", "annotations": [{"segment": [0.19999999999999998, 147.4484126984127], "label": "Ice fishing"}]}, "eic6dpU0ytM": {"duration": 128.314, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eic6dpU0ytM", "annotations": [{"segment": [32.92633078563199, 52.881684622345524], "label": "Knitting"}, {"segment": [100.1758732153566, 111.74997742161767], "label": "Knitting"}]}, "E6ePEI4UOtU": {"duration": 105.489, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E6ePEI4UOtU", "annotations": [{"segment": [51.523400942346605, 95.4629748846243], "label": "Mooping floor"}]}, "NbTlXQLScRo": {"duration": 180.16, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=NbTlXQLScRo", "annotations": [{"segment": [21.923187060842434, 180.16362811791385], "label": "Preparing pasta"}]}, "lnvsb4-71BQ": {"duration": 128.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lnvsb4-71BQ", "annotations": [{"segment": [32.85747269890796, 106.96155863026522], "label": "Ping-pong"}]}, "6l0tenUYS-w": {"duration": 112.15, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6l0tenUYS-w", "annotations": [{"segment": [10.354469455062832, 85.47201538964227], "label": "Breakdancing"}]}, "98OypfeTKEc": {"duration": 41.193, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=98OypfeTKEc", "annotations": [{"segment": [0.01, 38.718097077624606], "label": "Swinging at the playground"}]}, "Px08sPeSsG0": {"duration": 86.26, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Px08sPeSsG0", "annotations": [{"segment": [3.0952090872750557, 9.420201569967562], "label": "Getting a tattoo"}, {"segment": [22.743058076064536, 28.79890194247226], "label": "Getting a tattoo"}]}, "a4hE5hlE1tQ": {"duration": 9.66, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a4hE5hlE1tQ", "annotations": []}, "Ue2EbbpegkE": {"duration": 160.149, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ue2EbbpegkE", "annotations": [{"segment": [34.7278873359016, 46.22056947584025], "label": "Snowboarding"}, {"segment": [82.19766139216998, 92.94082008112323], "label": "Snowboarding"}, {"segment": [120.67316155694228, 133.91473000936037], "label": "Snowboarding"}]}, "0cscG-qOaQY": {"duration": 160.73, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0cscG-qOaQY", "annotations": [{"segment": [20.310463243019516, 49.64779903849215], "label": "Wrapping presents"}, {"segment": [54.411981774509066, 76.97916315564187], "label": "Wrapping presents"}, {"segment": [82.74633173082024, 121.110540078746], "label": "Wrapping presents"}, {"segment": [123.11651120124804, 146.1851853978159], "label": "Wrapping presents"}]}, "tgdLeBX7aXk": {"duration": 64.691, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tgdLeBX7aXk", "annotations": [{"segment": [13.01884099843994, 17.257533416536663], "label": "Croquet"}, {"segment": [32.19387812792512, 33.10216936037442], "label": "Croquet"}, {"segment": [35.726121809672385, 39.66205048361935], "label": "Croquet"}, {"segment": [42.891530421216856, 44.40534914196568], "label": "Croquet"}]}, "IfSQS2WQrdY": {"duration": 34.761, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=IfSQS2WQrdY", "annotations": []}, "4QvpJ71d8Nk": {"duration": 144.986, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=4QvpJ71d8Nk", "annotations": [{"segment": [9.246491419656786, 139.6757566302652], "label": "Rope skipping"}]}, "Mmdcsw_SEzc": {"duration": 41.425, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Mmdcsw_SEzc", "annotations": [{"segment": [1.417319048148031, 39.813806114951554], "label": "Building sandcastles"}]}, "UVJ0mQndDhE": {"duration": 138.159, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=UVJ0mQndDhE", "annotations": [{"segment": [0.01, 138.15873015873015], "label": "Shuffleboard"}]}, "0mNZfG6CL74": {"duration": 89.68, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0mNZfG6CL74", "annotations": []}, "iDhzxzLmwoI": {"duration": 185.172, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iDhzxzLmwoI", "annotations": [{"segment": [15.889165366614664, 57.194817472698915], "label": "Roof shingle removal"}]}, "MvZFYjs80Y4": {"duration": 209.979, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MvZFYjs80Y4", "annotations": [{"segment": [0, 82.52744461778471], "label": "Rock-paper-scissors"}, {"segment": [126.08359594383775, 168.67334633385337], "label": "Rock-paper-scissors"}]}, "jWODw4vy45M": {"duration": 3.07, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jWODw4vy45M", "annotations": []}, "Tu9QF2ALd2s": {"duration": 19.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tu9QF2ALd2s", "annotations": [{"segment": [0.4854565, 19.016163360374414], "label": "Preparing salad"}]}, "gU81ZXdYh7o": {"duration": 5.09, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=gU81ZXdYh7o", "annotations": [{"segment": [0.007933182527301092, 1.5073046801872076], "label": "Doing kickboxing"}, {"segment": [2.633816599063963, 4.88517], "label": "Doing kickboxing"}]}, "Launtf-qjDM": {"duration": 20.179, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Launtf-qjDM", "annotations": [{"segment": [1.2253057722308893, 19.939], "label": "Welding"}]}, "jibbgUBuuSA": {"duration": 96.02000000000001, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=jibbgUBuuSA", "annotations": []}, "eU1ORbzgRjE": {"duration": 89.82, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eU1ORbzgRjE", "annotations": []}, "Vg043D46E7Q": {"duration": 35.69, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vg043D46E7Q", "annotations": [{"segment": [0.2, 35.661], "label": "Snow tubing"}]}, "St9FULDo1zY": {"duration": 65.55, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=St9FULDo1zY", "annotations": []}, "bY-4XBIGiwI": {"duration": 182.927, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bY-4XBIGiwI", "annotations": [{"segment": [10.556313572542901, 40.79872542901716], "label": "Rope skipping"}, {"segment": [48.50198127925117, 125.24923400936038], "label": "Rope skipping"}, {"segment": [133.2377956318253, 162.64207488299533], "label": "Rope skipping"}]}, "PwOMgya8qYI": {"duration": 152.114, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PwOMgya8qYI", "annotations": [{"segment": [56.17874807321006, 124.53980578871351], "label": "Hula hoop"}]}, "K_I5LyzD8m0": {"duration": 54.13, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=K_I5LyzD8m0", "annotations": [{"segment": [15.0305125, 53.449795160686435], "label": "Shaving"}]}, "eL0LrJio1XE": {"duration": 135.326, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eL0LrJio1XE", "annotations": [{"segment": [29.968645865834635, 113.96527301092044], "label": "BMX"}]}, "8HvJ1B9w4GI": {"duration": 180.09, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8HvJ1B9w4GI", "annotations": []}, "zmaDLAZu4kA": {"duration": 30.14, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zmaDLAZu4kA", "annotations": [{"segment": [2.4412056074766353, 17.93347196261682], "label": "Calf roping"}]}, "IXUh06YCtjw": {"duration": 83.13, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IXUh06YCtjw", "annotations": [{"segment": [11.408561622464898, 71.69243837753511], "label": "Drinking coffee"}]}, "PMBnZ-2oKxs": {"duration": 213.206, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=PMBnZ-2oKxs", "annotations": []}, "XZZRyOhxQBE": {"duration": 66.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XZZRyOhxQBE", "annotations": [{"segment": [11.046684840873635, 17.550807691107643], "label": "Pole vault"}, {"segment": [32.933574432137284, 40.263617644305775], "label": "Pole vault"}]}, "af05Mqm03Y4": {"duration": 225.44, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=af05Mqm03Y4", "annotations": []}, "DFQi5aWn87I": {"duration": 39.869, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DFQi5aWn87I", "annotations": []}, "TKGjHAmCaRo": {"duration": 136.882, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TKGjHAmCaRo", "annotations": [{"segment": [0.20000000000000004, 129.83468432614856], "label": "Disc dog"}]}, "TSVdiNxLu50": {"duration": 219.777, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TSVdiNxLu50", "annotations": [{"segment": [29.257787118221266, 195.2043072732606], "label": "Surfing"}, {"segment": [198.52066274450286, 201.25930797814772], "label": "Surfing"}]}, "jYphKtLFIUk": {"duration": 138.995, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jYphKtLFIUk", "annotations": [{"segment": [0.2, 138.741], "label": "Building sandcastles"}]}, "L1B_cE8waag": {"duration": 199.831, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L1B_cE8waag", "annotations": [{"segment": [15.58738647425897, 188.60737633853356], "label": "Longboarding"}]}, "TcoSgl3qiHU": {"duration": 211.93, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=TcoSgl3qiHU", "annotations": []}, "xzuQIbnXt2U": {"duration": 26.843, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=xzuQIbnXt2U", "annotations": [{"segment": [0.01, 26.16927483010218], "label": "Doing a powerbomb"}]}, "VfhDSTKBwgc": {"duration": 136.952, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=VfhDSTKBwgc", "annotations": []}, "KFo88zRw5CM": {"duration": 71.843, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KFo88zRw5CM", "annotations": [{"segment": [0.010000000000000002, 71.05798709204369], "label": "Waterskiing"}]}, "ZluOmpTPdWw": {"duration": 29.606, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZluOmpTPdWw", "annotations": [{"segment": [1.1971744548286605, 28.70554984423676], "label": "Beach soccer"}]}, "LUDZ7e0RdEE": {"duration": 94.97, "subset": "validation", "resolution": "640x358", "url": "https://www.youtube.com/watch?v=LUDZ7e0RdEE", "annotations": [{"segment": [19.40877116380655, 74.52375492667706], "label": "Tai chi"}, {"segment": [89.63592789391575, 91.26567203744149], "label": "Tai chi"}]}, "fzHg9PhhlAg": {"duration": 235.17000000000002, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fzHg9PhhlAg", "annotations": [{"segment": [11.00647585087077, 203.61980178627144], "label": "Cricket"}]}, "nwNJccohnow": {"duration": 131.77, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nwNJccohnow", "annotations": []}, "RMrL2txm79c": {"duration": 102.772, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RMrL2txm79c", "annotations": [{"segment": [17.944636505460217, 92.94567862714509], "label": "Carving jack-o-lanterns"}]}, "C84V-jw-Sjg": {"duration": 29.35, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C84V-jw-Sjg", "annotations": []}, "jX26w60MxmU": {"duration": 67.919, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jX26w60MxmU", "annotations": [{"segment": [6.981070202808112, 45.90582527301092], "label": "Canoeing"}]}, "nIpT_lGpjck": {"duration": 24.73, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=nIpT_lGpjck", "annotations": [{"segment": [6.58360167, 20.14473798], "label": "Preparing salad"}]}, "yLQe0SqFIMk": {"duration": 68.198, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yLQe0SqFIMk", "annotations": []}, "DMUFb7gvUx4": {"duration": 181.557, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DMUFb7gvUx4", "annotations": [{"segment": [26.8576246105919, 170.19252647975077], "label": "Skiing"}]}, "mOrhfrmmxAQ": {"duration": 34.018, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mOrhfrmmxAQ", "annotations": [{"segment": [0.2, 8.789712948517941], "label": "Waterskiing"}]}, "LygR7ds26JY": {"duration": 20.69, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=LygR7ds26JY", "annotations": [{"segment": [0.8694617503900156, 9.55537344], "label": "Pole vault"}]}, "kzbQWKUMyS0": {"duration": 236.264, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kzbQWKUMyS0", "annotations": [{"segment": [17.29197352024922, 218.19167133956387], "label": "Slacklining"}]}, "nuVlFxONEdI": {"duration": 96.712, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nuVlFxONEdI", "annotations": []}, "OS-h1xzAZno": {"duration": 79.92, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=OS-h1xzAZno", "annotations": [{"segment": [1.8087901716068644, 23.763760530421216], "label": "Using the balance beam"}, {"segment": [29.127758970358816, 78.02746567862714], "label": "Using the balance beam"}]}, "z9PXpyWbXk8": {"duration": 7.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z9PXpyWbXk8", "annotations": [{"segment": [5.2198159282371295, 6.937730031201248], "label": "High jump"}]}, "aDDq5ofaFHI": {"duration": 157.153, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aDDq5ofaFHI", "annotations": [{"segment": [21.507668706128822, 40.326882087155255], "label": "Longboarding"}, {"segment": [47.65904314469803, 119.02541077144764], "label": "Longboarding"}, {"segment": [127.09078793474468, 141.2662993126607], "label": "Longboarding"}]}, "sbIh_M0oGs8": {"duration": 33.55, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sbIh_M0oGs8", "annotations": [{"segment": [11.620482451951139, 32.34891015912636], "label": "Doing motocross"}]}, "E3IP4Y8e_ho": {"duration": 89.119, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E3IP4Y8e_ho", "annotations": [{"segment": [18.351950927016674, 30.169494427457096], "label": "Slacklining"}, {"segment": [45.60181646177847, 70.34914368798752], "label": "Slacklining"}]}, "ZlwU7HKcoYs": {"duration": 29.768, "subset": "validation", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=ZlwU7HKcoYs", "annotations": [{"segment": [0.01, 27.745893915756632], "label": "Waterskiing"}]}, "InavXU3iWBE": {"duration": 25.96, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=InavXU3iWBE", "annotations": [{"segment": [10.263422776911078, 14.476939157566303], "label": "Doing a powerbomb"}]}, "2QeU2xYPdWE": {"duration": 76.23, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=2QeU2xYPdWE", "annotations": [{"segment": [41.25094217628705, 70.57175575195008], "label": "Discus throw"}, {"segment": [11.692713106084245, 43.38153748], "label": "Discus throw"}]}, "w9NZm9-cxbg": {"duration": 49.81, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=w9NZm9-cxbg", "annotations": []}, "fxxeCpqgRfk": {"duration": 64.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fxxeCpqgRfk", "annotations": [{"segment": [3.4251065865834636, 58.02533511388456], "label": "Using parallel bars"}]}, "9L-aeZsgwZs": {"duration": 177.772, "subset": "validation", "resolution": "640x270", "url": "https://www.youtube.com/watch?v=9L-aeZsgwZs", "annotations": [{"segment": [0, 173.30440717628707], "label": "Tug of war"}]}, "6mrjcnKVJyY": {"duration": 166.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6mrjcnKVJyY", "annotations": [{"segment": [2.3444187971600496, 163.06735135101403], "label": "Doing kickboxing"}]}, "Z1siDLDwbiY": {"duration": 239.677, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z1siDLDwbiY", "annotations": [{"segment": [0.01, 222.4436739469579], "label": "Removing ice from car"}]}, "CpEJfaG-WhE": {"duration": 223.864, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=CpEJfaG-WhE", "annotations": []}, "1rf7t4sYtIA": {"duration": 38.174, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1rf7t4sYtIA", "annotations": [{"segment": [0.2, 37.940999999999995], "label": "Blowing leaves"}]}, "VQ44ZEIaQXA": {"duration": 78.252, "subset": "testing", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=VQ44ZEIaQXA", "annotations": []}, "ZbfYwmb39H0": {"duration": 192.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZbfYwmb39H0", "annotations": [{"segment": [64.36862432, 133.88858384], "label": "Playing lacrosse"}]}, "aj2cioE_yG0": {"duration": 226.093, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aj2cioE_yG0", "annotations": [{"segment": [0.01, 196.28796758684348], "label": "Table soccer"}]}, "TEvkTLNFpR8": {"duration": 205.5, "subset": "testing", "resolution": "352x262", "url": "https://www.youtube.com/watch?v=TEvkTLNFpR8", "annotations": []}, "7S7THbo8uds": {"duration": 78.228, "subset": "training", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=7S7THbo8uds", "annotations": [{"segment": [0.2, 78.02802721088435], "label": "Arm wrestling"}]}, "A7ER02-zr54": {"duration": 237.96, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=A7ER02-zr54", "annotations": [{"segment": [51.91529686, 140.22626982], "label": "Fixing bicycle"}]}, "bsVJlftOY8Q": {"duration": 98.87, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bsVJlftOY8Q", "annotations": []}, "UJebyFqxDXI": {"duration": 141.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UJebyFqxDXI", "annotations": [{"segment": [41.8810461261988, 51.35938814423325], "label": "Mowing the lawn"}, {"segment": [57.751758342442535, 77.59014861274723], "label": "Mowing the lawn"}, {"segment": [90.15446245060687, 99.63280446864133], "label": "Mowing the lawn"}]}, "9FaSaHgQSO8": {"duration": 228.485, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9FaSaHgQSO8", "annotations": [{"segment": [0, 197.11676545397816], "label": "Riding bumper cars"}]}, "hpQkElxsSYQ": {"duration": 105.49000000000001, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hpQkElxsSYQ", "annotations": []}, "NrKBxbZXXg4": {"duration": 92.787, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NrKBxbZXXg4", "annotations": [{"segment": [0, 6.800042121684867], "label": "Rafting"}, {"segment": [10.851131045241809, 79.71964274570983], "label": "Rafting"}]}, "ZQSa_8wofFw": {"duration": 91.208, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=ZQSa_8wofFw", "annotations": [{"segment": [39.5566597700468, 47.24032749516381], "label": "Shuffleboard"}]}, "ZhPL4xb8JD0": {"duration": 22.106, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ZhPL4xb8JD0", "annotations": [{"segment": [7.66641173825574, 22.105396825396827], "label": "Calf roping"}]}, "AoyoL-921Ws": {"duration": 72.89, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=AoyoL-921Ws", "annotations": [{"segment": [0, 72.68743764172335], "label": "Mowing the lawn"}]}, "P6lwZ6gKvUw": {"duration": 136.557, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P6lwZ6gKvUw", "annotations": [{"segment": [6.39110232449298, 131.23063439625585], "label": "Peeling potatoes"}]}, "smYAy6TYS3M": {"duration": 67.11, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=smYAy6TYS3M", "annotations": [{"segment": [7.658904935142421, 48.42404372018915], "label": "Hopscotch"}, {"segment": [54.48655153950379, 62.744105293397865], "label": "Hopscotch"}]}, "4EoFt8F3_nw": {"duration": 5.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4EoFt8F3_nw", "annotations": [{"segment": [0.01, 5.041], "label": "Walking the dog"}]}, "KZ-6u3MUp44": {"duration": 68.43, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KZ-6u3MUp44", "annotations": [{"segment": [14.828329173166926, 20.162260530421218], "label": "Discus throw"}, {"segment": [30.83914040561622, 39.696056162246485], "label": "Discus throw"}]}, "8Ztw2YrUHRE": {"duration": 161.797, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8Ztw2YrUHRE", "annotations": [{"segment": [0.010000000000000002, 156.35301897017894], "label": "Baton twirling"}]}, "uWguPHhQON4": {"duration": 38.27, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uWguPHhQON4", "annotations": [{"segment": [33.78912627769111, 38.26648526077098], "label": "Starting a campfire"}]}, "IV47aK7qJdY": {"duration": 54.567, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IV47aK7qJdY", "annotations": [{"segment": [16.174193525564153, 44.09595893603744], "label": "Blowing leaves"}]}, "oXKrreio9hg": {"duration": 130.45, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oXKrreio9hg", "annotations": [{"segment": [48.19899687987519, 122.8362620904836], "label": "Tumbling"}]}, "Tab-dSCaMC8": {"duration": 74.3, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tab-dSCaMC8", "annotations": [{"segment": [10.441967369947106, 73.71419011138013], "label": "Grooming horse"}]}, "hru57dsi0N0": {"duration": 160.683, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hru57dsi0N0", "annotations": [{"segment": [1.8518561450922602, 160.661], "label": "Putting on shoes"}]}, "69X7tP6p7E0": {"duration": 70.06, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=69X7tP6p7E0", "annotations": [{"segment": [4.1226779, 67.7677632652106], "label": "Dodgeball"}]}, "HW5QhCSKTsw": {"duration": 230.67000000000002, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HW5QhCSKTsw", "annotations": [{"segment": [67.65272533541341, 73.05054916536662], "label": "Layup drill in basketball"}, {"segment": [186.15288234, 191.97030408], "label": "Layup drill in basketball"}]}, "Xjw9vUwILOE": {"duration": 102.89, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Xjw9vUwILOE", "annotations": [{"segment": [3.208861154446178, 19.253166926677068], "label": "Long jump"}, {"segment": [23.424686427457097, 38.66677691107644], "label": "Long jump"}, {"segment": [85.83703588143526, 96.10539157566302], "label": "Long jump"}]}, "QnQ2D-tJ9pM": {"duration": 123.79, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QnQ2D-tJ9pM", "annotations": [{"segment": [1.8336185647425898, 13.414367394695788], "label": "Chopping wood"}]}, "xV7uPiqNuwQ": {"duration": 194.096, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xV7uPiqNuwQ", "annotations": [{"segment": [20.281976599063963, 175.2906692667707], "label": "Assembling bicycle"}]}, "z3kkSRbahfE": {"duration": 32.95, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=z3kkSRbahfE", "annotations": []}, "TVPiI9551As": {"duration": 137.834, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TVPiI9551As", "annotations": [{"segment": [0.01, 112.03015776755072], "label": "Arm wrestling"}]}, "2EQMXueAmCA": {"duration": 27.28, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2EQMXueAmCA", "annotations": [{"segment": [0.38307491326053045, 26.432169123499634], "label": "Playing squash"}]}, "5iIJl-wjE9U": {"duration": 111.707, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5iIJl-wjE9U", "annotations": [{"segment": [46.963212948517935, 53.41085257410296], "label": "Rock-paper-scissors"}, {"segment": [59.16145007800312, 60.72979485179407], "label": "Rock-paper-scissors"}]}, "t_Creyg6ANs": {"duration": 173.686, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=t_Creyg6ANs", "annotations": [{"segment": [14.360871795416033, 86.97811030808579], "label": "Shuffleboard"}]}, "ZPVrC5185NM": {"duration": 51.758, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZPVrC5185NM", "annotations": [{"segment": [0.2, 50.3082415608518], "label": "Swinging at the playground"}]}, "PUJqlmTdlak": {"duration": 223.562, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PUJqlmTdlak", "annotations": [{"segment": [25.111457225635966, 220.77156109048363], "label": "Playing blackjack"}]}, "hh6Yt_LJPwQ": {"duration": 120.535, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hh6Yt_LJPwQ", "annotations": [{"segment": [20.49655436349454, 24.257390026521062], "label": "Gargling mouthwash"}]}, "CTWo9EfQ4Hc": {"duration": 67.01, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CTWo9EfQ4Hc", "annotations": [{"segment": [19.7562496099844, 51.11537597503901], "label": "Using parallel bars"}]}, "S633fHu_NrU": {"duration": 159.24, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=S633fHu_NrU", "annotations": [{"segment": [85.26945397815912, 136.65453978159127], "label": "Preparing salad"}]}, "bTlfN4vJkiY": {"duration": 22.87, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bTlfN4vJkiY", "annotations": [{"segment": [2.013286271450858, 13.202169266770673], "label": "Mowing the lawn"}]}, "5WHnYEinw4A": {"duration": 72.261, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5WHnYEinw4A", "annotations": [{"segment": [0.01, 46.86256131353526], "label": "Croquet"}, {"segment": [63.270075751776545, 70.35003113435819], "label": "Croquet"}]}, "lrxLP-R_ILw": {"duration": 344.421, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lrxLP-R_ILw", "annotations": [{"segment": [152.57490171606864, 344.368], "label": "Putting on shoes"}]}, "NYhBaOJrxCM": {"duration": 66.851, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NYhBaOJrxCM", "annotations": []}, "mpyN1mrMl3U": {"duration": 65.388, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mpyN1mrMl3U", "annotations": [{"segment": [2.979070022440402, 5.828615193779966], "label": "Baking cookies"}]}, "u-yFENQQxAo": {"duration": 30.163, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u-yFENQQxAo", "annotations": [{"segment": [3.500802652106084, 11.583192667706708], "label": "Raking leaves"}]}, "s1E6EKs1MZw": {"duration": 152.74, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s1E6EKs1MZw", "annotations": [{"segment": [10.484541231989416, 28.117633303971616], "label": "Playing saxophone"}, {"segment": [62.43067733593698, 76.72777901592254], "label": "Playing saxophone"}, {"segment": [127.48248918876754, 141.06473570046802], "label": "Playing saxophone"}]}, "RVbejE3s3m4": {"duration": 38.78, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RVbejE3s3m4", "annotations": [{"segment": [1.8148513572542901, 37.083462733229325], "label": "Doing nails"}]}, "pwPid8YHHpU": {"duration": 188.848, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pwPid8YHHpU", "annotations": [{"segment": [13.548901716068642, 28.27596879875195], "label": "Bullfighting"}, {"segment": [79.52616224648985, 83.06065834633385], "label": "Bullfighting"}, {"segment": [98.96589079563182, 105.44580031201248], "label": "Bullfighting"}, {"segment": [134.31085179407174, 138.72897191887674], "label": "Bullfighting"}, {"segment": [148.44883619344773, 154.0614399375975], "label": "Bullfighting"}, {"segment": [161.4257535101404, 168.79006708268332], "label": "Bullfighting"}]}, "Vckc3ZOollk": {"duration": 159.521, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Vckc3ZOollk", "annotations": [{"segment": [4.571944964812923, 145.9543617716665], "label": "Playing blackjack"}]}, "vy91mJTl7rQ": {"duration": 137.602, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vy91mJTl7rQ", "annotations": [{"segment": [0.2, 85.6520875120719], "label": "Rafting"}]}, "Zcq_xLi2NGo": {"duration": 131.29, "subset": "validation", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=Zcq_xLi2NGo", "annotations": [{"segment": [11.059937584399377, 128.8277914923557], "label": "Playing saxophone"}]}, "mDvWGOr_sws": {"duration": 194.723, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mDvWGOr_sws", "annotations": [{"segment": [0.01, 0.02], "label": "Rope skipping"}]}, "i0rC2mfp7mI": {"duration": 24.03, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=i0rC2mfp7mI", "annotations": [{"segment": [0, 21.965275301872076], "label": "Playing accordion"}]}, "U3uwdU8QH8k": {"duration": 206.565, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U3uwdU8QH8k", "annotations": []}, "U7vWTmVzWSc": {"duration": 197.091, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U7vWTmVzWSc", "annotations": [{"segment": [21.215721070202807, 178.94999511388454], "label": "Surfing"}]}, "93JbDUNFeWs": {"duration": 71.1, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=93JbDUNFeWs", "annotations": []}, "7GSCDxBSFsw": {"duration": 49.48, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7GSCDxBSFsw", "annotations": [{"segment": [1.9684363783151326, 35.47045160140406], "label": "Javelin throw"}]}, "N_zbcVs419E": {"duration": 237.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N_zbcVs419E", "annotations": [{"segment": [2.3644483500000004, 221.84087363494538], "label": "Painting"}]}, "yO0mTE_231M": {"duration": 27.33, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=yO0mTE_231M", "annotations": []}, "51XRujaBAbE": {"duration": 159.41, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=51XRujaBAbE", "annotations": [{"segment": [142.7433112917317, 159.15630527301093], "label": "Doing motocross"}]}, "PKdQIdYuXo8": {"duration": 116.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PKdQIdYuXo8", "annotations": [{"segment": [67.8252873299532, 97.60667186037442], "label": "Playing harmonica"}]}, "OOKHi7EBNKk": {"duration": 173.523, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=OOKHi7EBNKk", "annotations": [{"segment": [0.01, 172.7106230826833], "label": "Snow tubing"}]}, "j6ElczD-Pw4": {"duration": 27.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j6ElczD-Pw4", "annotations": []}, "y56qXoJh6U0": {"duration": 131.658, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y56qXoJh6U0", "annotations": [{"segment": [5.132644305772231, 13.757577223088925], "label": "Playing ten pins"}, {"segment": [24.84577379095164, 33.67526365054602], "label": "Playing ten pins"}, {"segment": [54.20896099843994, 57.90502652106085], "label": "Playing ten pins"}, {"segment": [110.26595475819035, 116.42606396255852], "label": "Playing ten pins"}]}, "ISa7BQJasm4": {"duration": 68.221, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ISa7BQJasm4", "annotations": []}, "TGffgRXc1eQ": {"duration": 207.889, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TGffgRXc1eQ", "annotations": []}, "bc7r5_gSAVg": {"duration": 167.254, "subset": "validation", "resolution": "632x360", "url": "https://www.youtube.com/watch?v=bc7r5_gSAVg", "annotations": [{"segment": [0.01, 127.73392313887696], "label": "Playing blackjack"}]}, "p1fpQ4yR1co": {"duration": 112.501, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=p1fpQ4yR1co", "annotations": [{"segment": [30.352685762957993, 32.98027922090192], "label": "Blow-drying hair"}]}, "QuEHZ2Y3H40": {"duration": 107.114, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QuEHZ2Y3H40", "annotations": [{"segment": [4.344703464329049, 9.023614887452641], "label": "Surfing"}, {"segment": [10.026238763836268, 15.206462125151674], "label": "Surfing"}, {"segment": [16.209086001535297, 18.882749671891638], "label": "Surfing"}, {"segment": [19.718269568877993, 29.74450833271426], "label": "Surfing"}, {"segment": [35.76025159101602, 42.277306787509595], "label": "Surfing"}, {"segment": [45.619386375455015, 48.085720960998444], "label": "Surfing"}, {"segment": [53.473273010920444, 75.86520608424338], "label": "Surfing"}, {"segment": [76.53362199687989, 92.74270787831513], "label": "Surfing"}, {"segment": [95.75057948517941, 98.59134711388457], "label": "Surfing"}]}, "5BbHu0WQZqw": {"duration": 118.25999999999999, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=5BbHu0WQZqw", "annotations": [{"segment": [1.18259, 116.4143111324493], "label": "Shaving legs"}]}, "5lFEpP89xQ4": {"duration": 45.46, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5lFEpP89xQ4", "annotations": []}, "ggVQ4_B_sw0": {"duration": 42.493, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ggVQ4_B_sw0", "annotations": []}, "jIOlMUtxhH0": {"duration": 125.02, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jIOlMUtxhH0", "annotations": []}, "Bc0_vvpfMAE": {"duration": 226.79, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Bc0_vvpfMAE", "annotations": [{"segment": [55.59039524295843, 211.36103231465705], "label": "Baking cookies"}]}, "32vYs9wKXE8": {"duration": 91.301, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=32vYs9wKXE8", "annotations": []}, "b82y7f7TFbw": {"duration": 167.718, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b82y7f7TFbw", "annotations": [{"segment": [42.38732628393136, 55.208184234009366], "label": "Playing pool"}, {"segment": [60.44118747893916, 150.7104934539782], "label": "Playing pool"}]}, "PlSk7Q9v15A": {"duration": 140.16, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=PlSk7Q9v15A", "annotations": [{"segment": [23.758885173241953, 113.99906717946278], "label": "Mixing drinks"}]}, "KCOzA0Suy5U": {"duration": 66.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KCOzA0Suy5U", "annotations": [{"segment": [8.262129402496099, 63.239065929797185], "label": "Doing motocross"}]}, "gN8F0o1baAo": {"duration": 23.66, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=gN8F0o1baAo", "annotations": [{"segment": [0.6974938, 23.070528178627146], "label": "Mowing the lawn"}]}, "q-ID2mgEIow": {"duration": 89.58, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=q-ID2mgEIow", "annotations": [{"segment": [2.236071331288626, 83.71291980187208], "label": "Playing accordion"}]}, "-8FS4LbXAgE": {"duration": 169.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-8FS4LbXAgE", "annotations": [{"segment": [33.52860022428108, 49.63288852098301], "label": "Getting a tattoo"}, {"segment": [84.74551636193446, 120.38615408424336], "label": "Getting a tattoo"}]}, "WUdefl_j2-8": {"duration": 124.181, "subset": "training", "resolution": "2560x1080", "url": "https://www.youtube.com/watch?v=WUdefl_j2-8", "annotations": [{"segment": [26.5367503900156, 85.61491731669267], "label": "Snowboarding"}, {"segment": [101.11082995319813, 123.961], "label": "Snowboarding"}]}, "26qGsfI9tZ8": {"duration": 49.227, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=26qGsfI9tZ8", "annotations": [{"segment": [33.608624925141896, 45.704668029340965], "label": "Fun sliding down"}]}, "Lr0Vm9zBXKE": {"duration": 193.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Lr0Vm9zBXKE", "annotations": [{"segment": [81.69887560149644, 88.62250908812058], "label": "Skiing"}, {"segment": [101.86772097557545, 106.68416166192269], "label": "Skiing"}, {"segment": [115.41396040592703, 139.49616272630976], "label": "Skiing"}, {"segment": [167.19069645216646, 177.4256328291135], "label": "Skiing"}]}, "PJdE8Ui2eno": {"duration": 184.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PJdE8Ui2eno", "annotations": [{"segment": [42.907588248049926, 158.1145268], "label": "Rock climbing"}]}, "V-Yz865cbn4": {"duration": 27.26, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=V-Yz865cbn4", "annotations": [{"segment": [4.841176287051482, 26.456603744149763], "label": "Cumbia"}]}, "OauFXDlQfWM": {"duration": 7.97, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=OauFXDlQfWM", "annotations": []}, "pzkwJYJol7o": {"duration": 159.498, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pzkwJYJol7o", "annotations": [{"segment": [0.01, 127.6480601101595], "label": "Canoeing"}]}, "MlGh0geYhSg": {"duration": 61.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MlGh0geYhSg", "annotations": [{"segment": [0.19322147579780743, 45.50365755038365], "label": "Horseback riding"}]}, "Ra21PsRb6dU": {"duration": 124.505, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ra21PsRb6dU", "annotations": []}, "IUnqrqZ_x_A": {"duration": 59.026, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IUnqrqZ_x_A", "annotations": [{"segment": [0.01, 59.024580498866214], "label": "Doing fencing"}]}, "13vEnzRCKzo": {"duration": 131.31, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=13vEnzRCKzo", "annotations": [{"segment": [5.030409479999999, 124.97154056162246], "label": "Ironing clothes"}]}, "2AcaLeBzcnY": {"duration": 136.232, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2AcaLeBzcnY", "annotations": []}, "fVtiM88rOJI": {"duration": 175.54, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fVtiM88rOJI", "annotations": [{"segment": [23.00405614352574, 175.542857], "label": "Paintball"}]}, "APCbvR0SBl0": {"duration": 33.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=APCbvR0SBl0", "annotations": [{"segment": [0, 31.67876723400936], "label": "Javelin throw"}]}, "te5xo60oVZM": {"duration": 123.716, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=te5xo60oVZM", "annotations": [{"segment": [0.01, 123.715578], "label": "Grooming dog"}]}, "mwDQENGsvd8": {"duration": 43.097, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mwDQENGsvd8", "annotations": [{"segment": [0.2, 38.142943925233645], "label": "Playing drums"}]}, "x7yOoVf5-Do": {"duration": 96.897, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x7yOoVf5-Do", "annotations": [{"segment": [14.80564430577223, 87.79444773790952], "label": "BMX"}]}, "wfy5UXaAVnQ": {"duration": 76.092, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wfy5UXaAVnQ", "annotations": [{"segment": [7.359894651568376, 13.414001542374619], "label": "Rafting"}, {"segment": [19.3494004399376, 22.791931806552263], "label": "Rafting"}, {"segment": [24.572551478939157, 46.65223541653666], "label": "Rafting"}, {"segment": [48.67027104524181, 54.486961975039], "label": "Rafting"}, {"segment": [56.861121538221525, 65.52680394383775], "label": "Rafting"}]}, "K5v9-h2S5pw": {"duration": 147.91, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K5v9-h2S5pw", "annotations": [{"segment": [115.14453024961, 139.83484034321373], "label": "Playing saxophone"}]}, "K-Kkuz4oI38": {"duration": 120.56, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K-Kkuz4oI38", "annotations": []}, "ahpoDWYqtfw": {"duration": 80.6, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ahpoDWYqtfw", "annotations": [{"segment": [16.901677067082684, 70.4341263650546], "label": "Chopping wood"}]}, "g4lQoscuTFY": {"duration": 197.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=g4lQoscuTFY", "annotations": []}, "DW-pIg-YKtk": {"duration": 48.089, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=DW-pIg-YKtk", "annotations": []}, "ZoCeeRbpPsU": {"duration": 150.373, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ZoCeeRbpPsU", "annotations": []}, "VkHUry3ZBok": {"duration": 93.58, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VkHUry3ZBok", "annotations": []}, "ofrkSTRhCE4": {"duration": 114.243, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ofrkSTRhCE4", "annotations": [{"segment": [12.70085914993286, 69.62347597236275], "label": "Running a marathon"}, {"segment": [76.02727036488612, 108.22412550507303], "label": "Running a marathon"}]}, "VrynprPwU2M": {"duration": 110.504, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VrynprPwU2M", "annotations": []}, "pbTzltaBByk": {"duration": 57.981, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pbTzltaBByk", "annotations": [{"segment": [6.241241257813577, 35.63839164430578], "label": "Trimming branches or hedges"}, {"segment": [37.26654151638066, 42.69370775663027], "label": "Trimming branches or hedges"}, {"segment": [51.377173741029644, 56.080717815912635], "label": "Trimming branches or hedges"}]}, "Ka210U3btNE": {"duration": 180.61, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ka210U3btNE", "annotations": []}, "5Chl3x_mx40": {"duration": 208.098, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5Chl3x_mx40", "annotations": [{"segment": [67.98160608424338, 72.20003510140405], "label": "Curling"}, {"segment": [91.34521294851794, 95.56364196567863], "label": "Curling"}, {"segment": [124.76815054602184, 128.33759048361935], "label": "Curling"}, {"segment": [168.89940795631824, 174.0913205928237], "label": "Curling"}]}, "3mlxwa9Ve5I": {"duration": 110.27000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3mlxwa9Ve5I", "annotations": [{"segment": [38.001312012480504, 79.09775351014041], "label": "Shaving"}]}, "yDCZNNI3tDo": {"duration": 111.36, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=yDCZNNI3tDo", "annotations": [{"segment": [23.10648007800312, 84.95540419656787], "label": "Getting a haircut"}]}, "I6B4g85H2iI": {"duration": 119.096, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=I6B4g85H2iI", "annotations": [{"segment": [16.57952271288578, 60.717401217558674], "label": "BMX"}, {"segment": [66.46645261942784, 76.29547598391382], "label": "BMX"}, {"segment": [85.38268626428766, 92.24445729232504], "label": "BMX"}]}, "0YPfoB84Fes": {"duration": 86.192, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=0YPfoB84Fes", "annotations": [{"segment": [5.378601248049923, 82.02366903276132], "label": "Making a cake"}]}, "PSAfQ1qjtOA": {"duration": 111.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PSAfQ1qjtOA", "annotations": [{"segment": [20.178249263303865, 105.06605629953198], "label": "Using the balance beam"}]}, "vmFfa1yv72k": {"duration": 219.986, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=vmFfa1yv72k", "annotations": [{"segment": [30.279203637916353, 100.83933759430265], "label": "Beer pong"}, {"segment": [105.97721142607836, 159.75362419866403], "label": "Beer pong"}, {"segment": [191.60844195567338, 198.45894039804097], "label": "Beer pong"}]}, "j6BCgvUusWs": {"duration": 43.12, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j6BCgvUusWs", "annotations": []}, "-SCRtjT7dto": {"duration": 126.57, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-SCRtjT7dto", "annotations": [{"segment": [77.97279884, 111.09516383], "label": "Preparing salad"}]}, "aeKIYkn99_Q": {"duration": 219.104, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aeKIYkn99_Q", "annotations": [{"segment": [48.87454446177846, 219.061], "label": "Snow tubing"}]}, "S47vfJ9g3Mw": {"duration": 187.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=S47vfJ9g3Mw", "annotations": [{"segment": [28.933729822662297, 179.44757582215288], "label": "Painting"}]}, "ZO8ValsY3rE": {"duration": 107.39, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=ZO8ValsY3rE", "annotations": [{"segment": [53.40721039186323, 56.58548685759533], "label": "Washing hands"}, {"segment": [73.31325772986946, 90.87741714575732], "label": "Washing hands"}]}, "7H4-gDM3r0w": {"duration": 117.00999999999999, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7H4-gDM3r0w", "annotations": [{"segment": [40.52417784711389, 47.09566614664587], "label": "Mixing drinks"}, {"segment": [60.056101404056164, 80.68327301092044], "label": "Mixing drinks"}, {"segment": [84.69918252730109, 96.19928705148206], "label": "Mixing drinks"}]}, "bb-DPA34qvw": {"duration": 124.785, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bb-DPA34qvw", "annotations": [{"segment": [6.424139153321235, 11.096240355736677], "label": "Playing congas"}, {"segment": [17.909721275925868, 69.88684715279767], "label": "Playing congas"}, {"segment": [81.95644108424337, 110.1837188923557], "label": "Playing congas"}]}, "8ofNg0y8w60": {"duration": 81.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8ofNg0y8w60", "annotations": [{"segment": [67.18424336973479, 80.81213728549142], "label": "Tennis serve with ball bouncing"}, {"segment": [0.2, 5.540312012480499], "label": "Tennis serve with ball bouncing"}]}, "8TNzCiy8iaY": {"duration": 159.07999999999998, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8TNzCiy8iaY", "annotations": [{"segment": [53.60551179598204, 140.71446846445286], "label": "Tango"}]}, "TfpCjzGqA7w": {"duration": 26.285, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TfpCjzGqA7w", "annotations": [{"segment": [0.01, 26.284988662131518], "label": "Hitting a pinata"}]}, "QSoT1qBELhs": {"duration": 108.972, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QSoT1qBELhs", "annotations": []}, "W0M9tXQV3Xg": {"duration": 191.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W0M9tXQV3Xg", "annotations": [{"segment": [77.75703588143526, 188.73003276131047], "label": "Spinning"}]}, "0lYwrXV1R3A": {"duration": 146.193, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=0lYwrXV1R3A", "annotations": [{"segment": [91.08587817266054, 145.9651183068536], "label": "Playing ice hockey"}]}, "wSgONQANu6M": {"duration": 19.064, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wSgONQANu6M", "annotations": []}, "MleQSQt6Utw": {"duration": 180.977, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MleQSQt6Utw", "annotations": [{"segment": [0.01, 180.97632653061225], "label": "Building sandcastles"}]}, "oM-f4_Z2gf4": {"duration": 151.604, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oM-f4_Z2gf4", "annotations": []}, "H5kerJ_FoBU": {"duration": 60.047, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=H5kerJ_FoBU", "annotations": [{"segment": [8.099557722308893, 60.021], "label": "Plastering"}]}, "HdZjxdQhtZo": {"duration": 157.943, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HdZjxdQhtZo", "annotations": [{"segment": [28.089552251477812, 32.77114429339078], "label": "Shuffleboard"}, {"segment": [36.95993717299712, 47.062320000283], "label": "Shuffleboard"}, {"segment": [56.425504084108944, 61.10709612602191], "label": "Shuffleboard"}, {"segment": [72.934276021381, 82.29746010520692], "label": "Shuffleboard"}, {"segment": [116.30060230436428, 122.21419225204382], "label": "Shuffleboard"}, {"segment": [140.94055906396255, 142.91175569422776], "label": "Shuffleboard"}]}, "k-5moAuYOcE": {"duration": 103.236, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k-5moAuYOcE", "annotations": []}, "NOEpoA4Mjrw": {"duration": 174.266, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NOEpoA4Mjrw", "annotations": []}, "osaJxzwVt6Q": {"duration": 98.848, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=osaJxzwVt6Q", "annotations": [{"segment": [7.093570870514821, 94.68375031513261], "label": "Playing congas"}]}, "jgHYzuDU2Iw": {"duration": 38.755, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=jgHYzuDU2Iw", "annotations": [{"segment": [0.01, 34.16025819032761], "label": "Mooping floor"}]}, "Nf-7lKa387k": {"duration": 63.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Nf-7lKa387k", "annotations": [{"segment": [1.2730257, 38.0697125], "label": "Smoking hookah"}]}, "9yktj7NCdFI": {"duration": 233.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9yktj7NCdFI", "annotations": [{"segment": [82.70672539010404, 96.91625089765495], "label": "Hopscotch"}, {"segment": [112.21881682886362, 170.51430609061097], "label": "Hopscotch"}]}, "qW926_opnTE": {"duration": 109.0, "subset": "validation", "resolution": "384x240", "url": "https://www.youtube.com/watch?v=qW926_opnTE", "annotations": [{"segment": [52.37175643216205, 57.13282519872224], "label": "Playing violin"}, {"segment": [65.63473371043685, 71.92614600910568], "label": "Playing violin"}]}, "8O1cLqQrn4o": {"duration": 129.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8O1cLqQrn4o", "annotations": [{"segment": [83.83309242573785, 122.50971236128358], "label": "Playing badminton"}]}, "NfTfLk1D-tI": {"duration": 228.856, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NfTfLk1D-tI", "annotations": [{"segment": [13.240134629698264, 228.85503401360543], "label": "Playing congas"}]}, "eBajcta4sD4": {"duration": 122.346, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eBajcta4sD4", "annotations": [{"segment": [40.82392199687988, 57.22979719188768], "label": "Doing fencing"}, {"segment": [69.05728861154446, 73.25414040561623], "label": "Doing fencing"}, {"segment": [76.11563026521061, 79.56242901716068], "label": "Doing fencing"}, {"segment": [79.75322620904836, 106.27403588143527], "label": "Doing fencing"}]}, "UoBFbIxneZ0": {"duration": 36.27, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UoBFbIxneZ0", "annotations": []}, "xH8l5rCWrMU": {"duration": 94.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xH8l5rCWrMU", "annotations": [{"segment": [36.53969534321373, 89.22142477379096], "label": "Preparing pasta"}]}, "DkouAjobzac": {"duration": 177.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DkouAjobzac", "annotations": [{"segment": [33.86585536346624, 36.641745147356914], "label": "Sailing"}, {"segment": [56.07297363459164, 142.40314591359166], "label": "Sailing"}]}, "FjZKKAEEHbY": {"duration": 179.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FjZKKAEEHbY", "annotations": [{"segment": [27.40630449815554, 112.4647235012708], "label": "Grooming horse"}]}, "1TWdrO8cCxA": {"duration": 82.965, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1TWdrO8cCxA", "annotations": [{"segment": [0.01, 82.96489795918367], "label": "Rope skipping"}]}, "xOTW_-tiEjY": {"duration": 171.967, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xOTW_-tiEjY", "annotations": [{"segment": [0, 171.966983], "label": "Having an ice cream"}]}, "zWiu-wdKeWs": {"duration": 53.43, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zWiu-wdKeWs", "annotations": [{"segment": [1.2809586938994697, 52.02022660667206], "label": "Spread mulch"}]}, "hiz0rH1bO7k": {"duration": 192.26, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hiz0rH1bO7k", "annotations": [{"segment": [91.57584444, 174.94065666], "label": "Playing field hockey"}]}, "IfYiYObrUlY": {"duration": 208.42000000000002, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=IfYiYObrUlY", "annotations": [{"segment": [94.29402612839208, 181.43471153822153], "label": "Preparing salad"}]}, "ANwaFSIHdW0": {"duration": 38.755, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ANwaFSIHdW0", "annotations": [{"segment": [0.01, 38.081681400441674], "label": "Fun sliding down"}]}, "5rO2DwFhdwo": {"duration": 199.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5rO2DwFhdwo", "annotations": [{"segment": [0, 135.20610452418097], "label": "Playing racquetball"}]}, "PFrFwE3CfjE": {"duration": 180.35, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=PFrFwE3CfjE", "annotations": [{"segment": [82.43739565092808, 151.9324000624025], "label": "Hanging wallpaper"}]}, "vGKdr_au240": {"duration": 39.103, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vGKdr_au240", "annotations": [{"segment": [17.324621166926676, 27.023968932917313], "label": "Curling"}]}, "LrmtSSUVRaQ": {"duration": 107.904, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LrmtSSUVRaQ", "annotations": [{"segment": [5.05004503900156, 99.31755243369736], "label": "Playing blackjack"}]}, "iqyc2JM0uIU": {"duration": 109.69, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iqyc2JM0uIU", "annotations": []}, "ELiXlJUBzzw": {"duration": 136.627, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ELiXlJUBzzw", "annotations": [{"segment": [7.673235060014646, 11.509852590021968], "label": "Hand car wash"}, {"segment": [19.18308765003661, 60.959589643449675], "label": "Hand car wash"}, {"segment": [72.04315139680416, 74.60089641680905], "label": "Hand car wash"}, {"segment": [78.01122311014889, 85.25816723868955], "label": "Hand car wash"}, {"segment": [92.9314022901716, 101.883509850234], "label": "Hand car wash"}, {"segment": [103.16238235881436, 121.49288831513262], "label": "Hand car wash"}, {"segment": [123.83748791419657, 128.95297794851794], "label": "Hand car wash"}]}, "Q0FbJovQ0Lw": {"duration": 148.701, "subset": "validation", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=Q0FbJovQ0Lw", "annotations": [{"segment": [0.2, 141.25110591900312], "label": "Playing drums"}]}, "e7r6SP4_CUQ": {"duration": 121.069, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=e7r6SP4_CUQ", "annotations": []}, "TqG43Oee7gc": {"duration": 178.423, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TqG43Oee7gc", "annotations": []}, "QHF28_yGOV8": {"duration": 113.407, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QHF28_yGOV8", "annotations": [{"segment": [26.36120482527301, 32.022671633385336], "label": "Surfing"}, {"segment": [38.21490095475819, 112.87549448673947], "label": "Surfing"}]}, "opAcl-kukdU": {"duration": 179.24, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=opAcl-kukdU", "annotations": [{"segment": [0.01, 7.270055504260987], "label": "Cheerleading"}, {"segment": [13.701258450338013, 168.6093623775351], "label": "Cheerleading"}]}, "P3HENCwV5iY": {"duration": 23.546, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=P3HENCwV5iY", "annotations": []}, "Ufki4AjZHFI": {"duration": 12.144, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Ufki4AjZHFI", "annotations": [{"segment": [0.01, 10.882467941581274], "label": "Powerbocking"}]}, "im6KJzbZzKE": {"duration": 135.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=im6KJzbZzKE", "annotations": [{"segment": [4.85650751200116, 107.68777380655226], "label": "Long jump"}]}, "62M1FRXsUiM": {"duration": 91.49, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=62M1FRXsUiM", "annotations": []}, "zufK6CufVhA": {"duration": 169.228, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=zufK6CufVhA", "annotations": [{"segment": [50.80708658346334, 77.728244149766], "label": "Shuffleboard"}, {"segment": [95.41174960998441, 101.49420358814352], "label": "Shuffleboard"}]}, "dL--vW-AJJo": {"duration": 213.229, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dL--vW-AJJo", "annotations": [{"segment": [26.612024154435566, 37.92213442007068], "label": "Changing car wheel"}, {"segment": [45.573091364470905, 50.89549619535802], "label": "Changing car wheel"}, {"segment": [56.55055092043681, 74.84631739469577], "label": "Changing car wheel"}, {"segment": [96.13593656474258, 104.7848443525741], "label": "Changing car wheel"}, {"segment": [117.42555573478938, 153.68443838377533], "label": "Changing car wheel"}]}, "EoguY2ethBo": {"duration": 54.405, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EoguY2ethBo", "annotations": []}, "hhoewBggtuw": {"duration": 198.322, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hhoewBggtuw", "annotations": [{"segment": [16.18531573611108, 157.96126433424192], "label": "Changing car wheel"}]}, "nt8MurhX7HY": {"duration": 91.069, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nt8MurhX7HY", "annotations": [{"segment": [16.2552488299532, 82.99808814352573], "label": "Tug of war"}]}, "VgQ6a7oVx7g": {"duration": 62.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VgQ6a7oVx7g", "annotations": [{"segment": [8.6365527, 59.62137285491419], "label": "Using parallel bars"}]}, "hsJct3UsbAs": {"duration": 211.023, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hsJct3UsbAs", "annotations": [{"segment": [20.410955953884415, 202.7927228580343], "label": "Waterskiing"}]}, "TrAU4xsihas": {"duration": 80.968, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TrAU4xsihas", "annotations": []}, "8NcRuVMSQLs": {"duration": 221.64, "subset": "training", "resolution": "582x480", "url": "https://www.youtube.com/watch?v=8NcRuVMSQLs", "annotations": [{"segment": [32.49687051482059, 38.719675507020284], "label": "High jump"}, {"segment": [48.05388299531981, 64.99374102964119], "label": "High jump"}, {"segment": [100.60201404056163, 105.7876848673947], "label": "High jump"}, {"segment": [120.65327457098284, 140.01311232449297], "label": "High jump"}, {"segment": [136.90170982839314, 144.8530717628705], "label": "High jump"}, {"segment": [202.24116224648986, 220.5638658346334], "label": "High jump"}]}, "I9_eKiGZPxc": {"duration": 170.946, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I9_eKiGZPxc", "annotations": [{"segment": [34.12687675507021, 94.64875975039003], "label": "Rafting"}]}, "6ECElQXTBIY": {"duration": 77.578, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6ECElQXTBIY", "annotations": [{"segment": [13.252296093603746, 70.25532312636506], "label": "Baton twirling"}]}, "lu7viu9nf6g": {"duration": 84.17, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lu7viu9nf6g", "annotations": [{"segment": [14.0301872074883, 21.373088923556942], "label": "Archery"}, {"segment": [26.486895475819033, 44.05741029641186], "label": "Archery"}, {"segment": [48.25335413416536, 68.05296411856474], "label": "Archery"}]}, "M_CU9guT3C0": {"duration": 98.27000000000001, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=M_CU9guT3C0", "annotations": [{"segment": [30.459215049999997, 88.6805249], "label": "Washing dishes"}]}, "feqYwvoENzk": {"duration": 84.75, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=feqYwvoENzk", "annotations": [{"segment": [46.673394561622466, 75.10053289235569], "label": "Playing piano"}]}, "3L0MnbQkLWM": {"duration": 151.42000000000002, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3L0MnbQkLWM", "annotations": [{"segment": [22.204724433697347, 151.417323], "label": "Hopscotch"}]}, "p-l6as8o1f4": {"duration": 123.53, "subset": "validation", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=p-l6as8o1f4", "annotations": [{"segment": [5.8503808, 116.42990639625584], "label": "Horseback riding"}]}, "16ZtFro1khg": {"duration": 90.489, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=16ZtFro1khg", "annotations": []}, "nxDAt9SkPoA": {"duration": 182.811, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nxDAt9SkPoA", "annotations": [{"segment": [7.697358814352575, 79.5393744149766], "label": "Rope skipping"}, {"segment": [92.65339313572542, 142.27416380655225], "label": "Rope skipping"}]}, "Ls-0SqAeXW0": {"duration": 96.6, "subset": "validation", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=Ls-0SqAeXW0", "annotations": [{"segment": [4.141474258970359, 38.02626365054603], "label": "Playing squash"}, {"segment": [48.417599063962555, 65.13409516380656], "label": "Playing squash"}, {"segment": [75.52543057722309, 92.84432293291732], "label": "Playing squash"}]}, "Kj2SF5bCxJ8": {"duration": 114.28999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Kj2SF5bCxJ8", "annotations": [{"segment": [13.57801198654076, 97.95912504261553], "label": "Making a sandwich"}]}, "WYjtYDyHUxY": {"duration": 140.16, "subset": "training", "resolution": "592x360", "url": "https://www.youtube.com/watch?v=WYjtYDyHUxY", "annotations": [{"segment": [0, 22.52110988299532], "label": "Paintball"}]}, "dnZ5wcF69lI": {"duration": 78.67, "subset": "testing", "resolution": "416x240", "url": "https://www.youtube.com/watch?v=dnZ5wcF69lI", "annotations": []}, "ibDWgL2D2l4": {"duration": 24.566, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ibDWgL2D2l4", "annotations": [{"segment": [0, 4.785686427457098], "label": "Hitting a pinata"}]}, "qKA-mFjzpok": {"duration": 200.876, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qKA-mFjzpok", "annotations": []}, "SYp68jyl0-Q": {"duration": 97.28999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SYp68jyl0-Q", "annotations": []}, "QTPz2j16KFk": {"duration": 95.272, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QTPz2j16KFk", "annotations": [{"segment": [0.20000000000000004, 19.7676502488671], "label": "Playing rubik cube"}, {"segment": [25.26692137073238, 40.724332091651014], "label": "Playing rubik cube"}]}, "dZSjgAYlpS0": {"duration": 62.067, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=dZSjgAYlpS0", "annotations": [{"segment": [0.2, 50.350714134165365], "label": "Peeling potatoes"}]}, "jE0XAclET5w": {"duration": 167.161, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jE0XAclET5w", "annotations": [{"segment": [0.01, 158.81482333385335], "label": "Playing rubik cube"}]}, "r1BcU3OOz34": {"duration": 191.1, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=r1BcU3OOz34", "annotations": []}, "qn7LRqyyjVE": {"duration": 223.957, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qn7LRqyyjVE", "annotations": []}, "lIo8HfzzSNs": {"duration": 163.05, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lIo8HfzzSNs", "annotations": []}, "Y6eNXuhsFzM": {"duration": 63.32, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y6eNXuhsFzM", "annotations": []}, "nTNkGOtp7aQ": {"duration": 34.67, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=nTNkGOtp7aQ", "annotations": [{"segment": [5.8409956755070205, 25.58139772698908], "label": "Springboard diving"}]}, "qKG1mU0Feug": {"duration": 80.92, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=qKG1mU0Feug", "annotations": [{"segment": [0.01, 80.921563], "label": "Playing water polo"}]}, "at_ahUtnF9Q": {"duration": 236.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=at_ahUtnF9Q", "annotations": [{"segment": [163.60327464527154, 224.03331125117006], "label": "Ping-pong"}]}, "iHzwizJqnUc": {"duration": 64.92, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iHzwizJqnUc", "annotations": []}, "I-ji2Cw-rD8": {"duration": 186.921, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I-ji2Cw-rD8", "annotations": []}, "spZ_RrpyNJw": {"duration": 112.083, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=spZ_RrpyNJw", "annotations": [{"segment": [8.715605511936502, 29.63306483631947], "label": "Playing pool"}, {"segment": [36.256926982509206, 106.50472816446876], "label": "Playing pool"}]}, "Da9uVKDLwVY": {"duration": 238.4, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Da9uVKDLwVY", "annotations": [{"segment": [17.077392523364484, 222.00610280373832], "label": "Grooming dog"}]}, "cxF2YQeHDZE": {"duration": 113.105, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=cxF2YQeHDZE", "annotations": []}, "arbdFQpe0fk": {"duration": 128.732, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=arbdFQpe0fk", "annotations": [{"segment": [0.01, 47.20153197263022], "label": "Hitting a pinata"}, {"segment": [98.53369895082336, 111.36674069537166], "label": "Hitting a pinata"}, {"segment": [113.97345229973301, 128.53145], "label": "Hitting a pinata"}]}, "HjNVTXM5JKk": {"duration": 174.382, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HjNVTXM5JKk", "annotations": []}, "980U7SiE0Nk": {"duration": 125.179, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=980U7SiE0Nk", "annotations": [{"segment": [0.01, 125.141], "label": "Baton twirling"}]}, "ArGF2CfOxkY": {"duration": 57.934, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ArGF2CfOxkY", "annotations": [{"segment": [18.02067004680187, 30.39576677067083], "label": "Calf roping"}]}, "iyOyZJm7fVU": {"duration": 103.654, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=iyOyZJm7fVU", "annotations": [{"segment": [83.198001875195, 89.82796898283931], "label": "Baking cookies"}]}, "U9b8U-EymNw": {"duration": 160.938, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U9b8U-EymNw", "annotations": [{"segment": [0.009999999999999998, 160.93750566893425], "label": "Wakeboarding"}]}, "sTMbpIfI-6o": {"duration": 150.234, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=sTMbpIfI-6o", "annotations": [{"segment": [40.766761310452424, 127.9403213728549], "label": "Making a lemonade"}]}, "dZZE8HI0OBE": {"duration": 13.17, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dZZE8HI0OBE", "annotations": [{"segment": [1.9306973822152886, 11.527676855532135], "label": "Putting on makeup"}]}, "CSCB9NjpUCk": {"duration": 181.882, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=CSCB9NjpUCk", "annotations": []}, "diZvvLb46O8": {"duration": 201.131, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=diZvvLb46O8", "annotations": [{"segment": [11.606859594383774, 192.31655694227769], "label": "Rope skipping"}]}, "stkWKPWbQpE": {"duration": 158.244, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=stkWKPWbQpE", "annotations": [{"segment": [11.812924165550855, 154.30636564446033], "label": "Baton twirling"}]}, "O4LdjwlsGKI": {"duration": 94.83, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O4LdjwlsGKI", "annotations": [{"segment": [41.541093603744145, 56.02873478939157], "label": "Skateboarding"}, {"segment": [80.12552574102963, 86.18668174726989], "label": "Skateboarding"}]}, "e8w3gmiv-mM": {"duration": 148.515, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=e8w3gmiv-mM", "annotations": [{"segment": [16.676368174726992, 28.025563182527303], "label": "Fun sliding down"}, {"segment": [51.41880187207489, 58.13567238689548], "label": "Fun sliding down"}, {"segment": [88.70901404056164, 90.79356006240249], "label": "Fun sliding down"}, {"segment": [125.99922620904837, 133.19278471138844], "label": "Fun sliding down"}]}, "nxNir9CE3zI": {"duration": 62.32, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nxNir9CE3zI", "annotations": [{"segment": [10.007711388455538, 58.97748361934477], "label": "Kayaking"}]}, "FklvvNrpsUk": {"duration": 172.153, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FklvvNrpsUk", "annotations": [{"segment": [0.19999999999999998, 172.121], "label": "Blow-drying hair"}]}, "y-rgla4aNUo": {"duration": 226.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y-rgla4aNUo", "annotations": [{"segment": [0, 226.11532879818594], "label": "Playing guitarra"}]}, "9Om-YIcAE7k": {"duration": 182.56, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9Om-YIcAE7k", "annotations": []}, "0CZuZZ17mck": {"duration": 198.74, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0CZuZZ17mck", "annotations": []}, "makIgB4X3q8": {"duration": 151.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=makIgB4X3q8", "annotations": [{"segment": [58.918096723868956, 72.82276755070203], "label": "Brushing teeth"}]}, "FsQWaEejutU": {"duration": 19.95, "subset": "training", "resolution": "136x240", "url": "https://www.youtube.com/watch?v=FsQWaEejutU", "annotations": [{"segment": [0.010000000000000002, 17.59014976599064], "label": "Smoking hookah"}]}, "WqnnGmL-lmU": {"duration": 224.236, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WqnnGmL-lmU", "annotations": [{"segment": [139.22850442725192, 152.171857853906], "label": "Braiding hair"}]}, "1kMPW5huHWQ": {"duration": 56.309, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1kMPW5huHWQ", "annotations": [{"segment": [0.2, 6.924023400936037], "label": "Playing rubik cube"}, {"segment": [10.429858034321374, 21.297945397815912], "label": "Playing rubik cube"}, {"segment": [28.747843993759748, 48.46816380655226], "label": "Playing rubik cube"}]}, "aEUbl9oT_0s": {"duration": 209.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aEUbl9oT_0s", "annotations": [{"segment": [0, 3.264914196567863], "label": "Javelin throw"}, {"segment": [55.830032761310456, 209.301], "label": "Javelin throw"}]}, "lEGetBydfl4": {"duration": 16.86, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lEGetBydfl4", "annotations": [{"segment": [0.33465328, 15.964637946957879], "label": "Chopping wood"}]}, "53tCYiyAyf0": {"duration": 28.213, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=53tCYiyAyf0", "annotations": [{"segment": [10.563086302652106, 20.377953992199686], "label": "Throwing darts"}]}, "0y4mO86t4Z0": {"duration": 169.88, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=0y4mO86t4Z0", "annotations": [{"segment": [0, 164.8418257628705], "label": "Horseback riding"}]}, "PN99KIY7jRY": {"duration": 5.13, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=PN99KIY7jRY", "annotations": [{"segment": [0.816574287051482, 3.378375971918877], "label": "Tennis serve with ball bouncing"}]}, "S-VSs8o98Ho": {"duration": 49.784, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=S-VSs8o98Ho", "annotations": [{"segment": [7.5101191249810855, 46.06723162575869], "label": "Playing blackjack"}]}, "nd50oUKdygI": {"duration": 128.059, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nd50oUKdygI", "annotations": [{"segment": [30.96567508958862, 41.953494664586586], "label": "Sharpening knives"}, {"segment": [49.3453008673947, 62.13112781279252], "label": "Sharpening knives"}, {"segment": [73.71828348205929, 80.91031113884556], "label": "Sharpening knives"}]}, "72_9wLYUDGM": {"duration": 48.37, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=72_9wLYUDGM", "annotations": []}, "AKQqaMyZtf8": {"duration": 153.855, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AKQqaMyZtf8", "annotations": [{"segment": [18.032742732003467, 132.32265309736067], "label": "Blowing leaves"}]}, "X3c6Mshh7Lw": {"duration": 197.602, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X3c6Mshh7Lw", "annotations": []}, "R1Q-KP8GHFE": {"duration": 175.567, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R1Q-KP8GHFE", "annotations": [{"segment": [17.74773302085868, 137.8862608103799], "label": "Wakeboarding"}]}, "gPtpPiea0iQ": {"duration": 47.83, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gPtpPiea0iQ", "annotations": [{"segment": [16.33138689547582, 27.29355070202808], "label": "Hammer throw"}]}, "njsO-Nlwkmg": {"duration": 36.9, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=njsO-Nlwkmg", "annotations": [{"segment": [17.71118688, 29.62970176], "label": "Vacuuming floor"}, {"segment": [2.1835052800000003, 15.1937728], "label": "Vacuuming floor"}]}, "CsvEXvHlO3M": {"duration": 88.817, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CsvEXvHlO3M", "annotations": [{"segment": [79.18853666146646, 88.541], "label": "Snow tubing"}, {"segment": [0.2, 82.2342496099844], "label": "Snow tubing"}]}, "UcQ63-sQ9bI": {"duration": 225.77, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=UcQ63-sQ9bI", "annotations": [{"segment": [3.6098718098554285, 216.91175187216072], "label": "Playing guitarra"}]}, "KLN5w_G9TwI": {"duration": 233.663, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KLN5w_G9TwI", "annotations": []}, "mpRRbDm7FUM": {"duration": 193.864, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mpRRbDm7FUM", "annotations": []}, "zz69LCrUQHU": {"duration": 95.16, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zz69LCrUQHU", "annotations": []}, "p54LLMnyg54": {"duration": 232.25, "subset": "testing", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=p54LLMnyg54", "annotations": []}, "8Tyu4-JnfDc": {"duration": 178.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8Tyu4-JnfDc", "annotations": [{"segment": [112.0624463971756, 129.08812318479133], "label": "Baking cookies"}, {"segment": [138.57784467297057, 144.71825269473362], "label": "Baking cookies"}]}, "iVVatZsgnGo": {"duration": 160.416, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iVVatZsgnGo", "annotations": [{"segment": [18.267196567862715, 94.10030577223088], "label": "Scuba diving"}, {"segment": [113.12058034321372, 141.9012589703588], "label": "Scuba diving"}]}, "c_RXwivt4JM": {"duration": 80.202, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c_RXwivt4JM", "annotations": []}, "ajA_cE6b9AY": {"duration": 124.552, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ajA_cE6b9AY", "annotations": [{"segment": [22.382747024928296, 105.20278908100306], "label": "Rafting"}]}, "ujiWstJ3tt0": {"duration": 58.375, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ujiWstJ3tt0", "annotations": [{"segment": [0.2, 58.174965986394554], "label": "Plastering"}]}, "CGpjeyRKB6o": {"duration": 219.801, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CGpjeyRKB6o", "annotations": []}, "A92F-HvSZx0": {"duration": 61.696, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A92F-HvSZx0", "annotations": [{"segment": [0.01, 61.69544217687075], "label": "Playing congas"}]}, "ozVKcVAgRMY": {"duration": 129.73, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ozVKcVAgRMY", "annotations": []}, "_OY-1VtINQQ": {"duration": 57.772, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_OY-1VtINQQ", "annotations": [{"segment": [0.8104352574102964, 46.75129329173167], "label": "Using the rowing machine"}]}, "ZIfB1zKleJU": {"duration": 204.27, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZIfB1zKleJU", "annotations": []}, "s0swzu1jIpc": {"duration": 91.0, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s0swzu1jIpc", "annotations": [{"segment": [32.08389114820593, 42.16334367706708], "label": "Shaving"}]}, "c-X500da7JU": {"duration": 57.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c-X500da7JU", "annotations": [{"segment": [8.932980992709096, 44.3075855975039], "label": "Wrapping presents"}]}, "hRoHfDR4-e0": {"duration": 237.042, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hRoHfDR4-e0", "annotations": [{"segment": [4.578748850041089, 227.23884698088222], "label": "Bullfighting"}]}, "lGESoAdgps8": {"duration": 231.36, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lGESoAdgps8", "annotations": [{"segment": [12.813617004680186, 29.056230109204364], "label": "Discus throw"}, {"segment": [61.180509360374415, 71.28702418096724], "label": "Discus throw"}, {"segment": [79.9497511700468, 87.1686903276131], "label": "Discus throw"}, {"segment": [118.5710756630265, 127.59474960998439], "label": "Discus throw"}, {"segment": [0.2, 231.167], "label": "Discus throw"}, {"segment": [0.2, 231.167], "label": "Discus throw"}, {"segment": [186.42910374414976, 191.12141419656786], "label": "Discus throw"}, {"segment": [201.58887597503897, 207.72497425897035], "label": "Discus throw"}]}, "9pJBfTZOcxI": {"duration": 63.623, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9pJBfTZOcxI", "annotations": [{"segment": [9.81984243369735, 54.95144149765991], "label": "Wakeboarding"}]}, "UjiJbwQqgW0": {"duration": 15.372, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UjiJbwQqgW0", "annotations": []}, "yslrb52Di5w": {"duration": 51.29, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yslrb52Di5w", "annotations": [{"segment": [5.294882496070726, 20.954370208081805], "label": "Playing piano"}]}, "9xOUoTQLea4": {"duration": 226.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9xOUoTQLea4", "annotations": [{"segment": [102.58257135725428, 223.9129919625585], "label": "Drinking coffee"}]}, "RzFqIN5hWJQ": {"duration": 175.358, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RzFqIN5hWJQ", "annotations": [{"segment": [25.441825379137615, 149.91527212653133], "label": "Baton twirling"}]}, "E0U3v4WLuEA": {"duration": 119.978, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E0U3v4WLuEA", "annotations": [{"segment": [55.60388002897505, 76.68857280449234], "label": "Assembling bicycle"}]}, "ETHVjrG7S4k": {"duration": 67.176, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=ETHVjrG7S4k", "annotations": [{"segment": [26.744763837202843, 57.98181253497022], "label": "Using the monkey bar"}]}, "vlX9sU9bM9s": {"duration": 4.51, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vlX9sU9bM9s", "annotations": [{"segment": [0, 4.411666], "label": "Javelin throw"}]}, "yb2CXXx7_QU": {"duration": 40.055, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yb2CXXx7_QU", "annotations": []}, "ik8uSCSBv1U": {"duration": 9.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ik8uSCSBv1U", "annotations": [{"segment": [0, 9.751666], "label": "Tennis serve with ball bouncing"}]}, "27Pg4wHah2I": {"duration": 93.55, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=27Pg4wHah2I", "annotations": [{"segment": [0, 1.7513816634298025], "label": "Playing piano"}, {"segment": [8.173114429339078, 11.675877756198684], "label": "Playing piano"}, {"segment": [25.103137175827168, 27.730209670971874], "label": "Playing piano"}]}, "rua32XjJh54": {"duration": 185.876, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=rua32XjJh54", "annotations": [{"segment": [0.01, 13.918931940951108], "label": "Curling"}, {"segment": [64.08508247812907, 80.90379190677831], "label": "Curling"}, {"segment": [122.37060998086183, 127.8801872074883], "label": "Curling"}, {"segment": [155.71805108939054, 165.5772945475642], "label": "Curling"}]}, "jRXF5_vNUWE": {"duration": 131.12, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jRXF5_vNUWE", "annotations": [{"segment": [0.01, 129.69104890171607], "label": "Cheerleading"}]}, "96vBhCFBbQk": {"duration": 138.345, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=96vBhCFBbQk", "annotations": []}, "8DIfyvX7H8Y": {"duration": 227.486, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8DIfyvX7H8Y", "annotations": [{"segment": [177.8006164899657, 209.3859555470654], "label": "Kite flying"}]}, "S6VgTNGiIkg": {"duration": 235.358, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=S6VgTNGiIkg", "annotations": [{"segment": [13.582117004680187, 211.458496099844], "label": "Playing pool"}]}, "8sTJGmY-SGQ": {"duration": 46.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8sTJGmY-SGQ", "annotations": [{"segment": [9.824648588143525, 46.648887], "label": "Using parallel bars"}]}, "GaxyzK2mHqw": {"duration": 156.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GaxyzK2mHqw", "annotations": [{"segment": [43.10480520445308, 51.18695618028802], "label": "Making a sandwich"}, {"segment": [88.65874706825008, 95.76124338034745], "label": "Making a sandwich"}, {"segment": [105.80270368365755, 111.43571800014858], "label": "Making a sandwich"}, {"segment": [113.63993964430577, 149.39733444617784], "label": "Making a sandwich"}]}, "GaIvG8u1tzo": {"duration": 122.93, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GaIvG8u1tzo", "annotations": [{"segment": [32.85566128, 104.35556918], "label": "Getting a piercing"}]}, "OJYajhN1RHs": {"duration": 237.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OJYajhN1RHs", "annotations": [{"segment": [17.948404836193447, 186.70041731669266], "label": "Brushing hair"}]}, "-zp0Sban5Wk": {"duration": 186.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-zp0Sban5Wk", "annotations": [{"segment": [80.88578091, 167.05022064898597], "label": "Preparing pasta"}]}, "zymyLkiT-W0": {"duration": 84.73, "subset": "testing", "resolution": "408x720", "url": "https://www.youtube.com/watch?v=zymyLkiT-W0", "annotations": []}, "DfYDYCWkB-o": {"duration": 73.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DfYDYCWkB-o", "annotations": [{"segment": [7.020690252226833, 34.863872495217485], "label": "Bathing dog"}]}, "kdjLJTGj4H0": {"duration": 89.165, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kdjLJTGj4H0", "annotations": [{"segment": [29.072396904836193, 84.99155267394697], "label": "Curling"}]}, "z5bc9KKiAGI": {"duration": 40.589, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z5bc9KKiAGI", "annotations": [{"segment": [0.2, 34.5729883151326], "label": "Throwing darts"}]}, "WZrKIuJaqTM": {"duration": 50.713, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WZrKIuJaqTM", "annotations": []}, "NKZzGiS5hv0": {"duration": 131.774, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=NKZzGiS5hv0", "annotations": [{"segment": [51.23376086982879, 56.76700738149287], "label": "Sumo"}, {"segment": [69.26804579673393, 85.45791521975103], "label": "Sumo"}]}, "oEd7WT4mVSE": {"duration": 65.97, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=oEd7WT4mVSE", "annotations": [{"segment": [2.518072542901716, 24.718222308892354], "label": "Smoking hookah"}]}, "_p0eoCHZ8BA": {"duration": 190.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_p0eoCHZ8BA", "annotations": [{"segment": [70.91808872, 177.63788611544462], "label": "Grooming horse"}, {"segment": [3.27346576, 69.23857254290171], "label": "Grooming horse"}]}, "rWoSJ9YBy5E": {"duration": 28.515, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rWoSJ9YBy5E", "annotations": [{"segment": [7.432498281284088, 23.624594780504058], "label": "Curling"}]}, "7toItxBIVtk": {"duration": 227.35, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7toItxBIVtk", "annotations": [{"segment": [0, 224.86382961713028], "label": "Belly dance"}]}, "Hv3We5wjaJE": {"duration": 126.06, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Hv3We5wjaJE", "annotations": [{"segment": [0.5897082683307333, 83.94846645865834], "label": "Hand washing clothes"}]}, "cht49ZCSDmY": {"duration": 19.39, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cht49ZCSDmY", "annotations": [{"segment": [1.9963363900156006, 12.219998508580343], "label": "Washing hands"}]}, "yOcWUk9cOws": {"duration": 30.3, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=yOcWUk9cOws", "annotations": [{"segment": [19.269578783151328, 29.06926677067083], "label": "Playing field hockey"}]}, "AnPLZtJs7zE": {"duration": 95.713, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AnPLZtJs7zE", "annotations": [{"segment": [25.04630173564753, 82.74224588785047], "label": "Painting fence"}]}, "3Vj0GcB_qh4": {"duration": 66.08, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3Vj0GcB_qh4", "annotations": [{"segment": [6.185708979379583, 26.279226610000002], "label": "Playing violin"}]}, "5U8RCiiZJM4": {"duration": 97.13, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5U8RCiiZJM4", "annotations": []}, "ZksAXEVmFrg": {"duration": 88.33, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZksAXEVmFrg", "annotations": [{"segment": [25.553290399999998, 80.75905065678627], "label": "Using the balance beam"}]}, "vxlbVkMj2Js": {"duration": 177.63, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vxlbVkMj2Js", "annotations": []}, "WHYEBsWp5qY": {"duration": 21.11, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WHYEBsWp5qY", "annotations": [{"segment": [3.62209527301092, 19.55931447425897], "label": "Long jump"}]}, "KgccYb6ufPY": {"duration": 84.03, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KgccYb6ufPY", "annotations": [{"segment": [3.341653666146646, 14.61154446177847], "label": "Long jump"}, {"segment": [17.88767550702028, 24.833073322932915], "label": "Long jump"}, {"segment": [36.234009360374415, 44.35881435257411], "label": "Long jump"}, {"segment": [55.10452418096724, 77.6443057722309], "label": "Long jump"}]}, "sZ95YHZtVCc": {"duration": 85.15, "subset": "validation", "resolution": "324x240", "url": "https://www.youtube.com/watch?v=sZ95YHZtVCc", "annotations": [{"segment": [10.626845079563182, 12.951467440717629], "label": "Polishing shoes"}, {"segment": [22.78129913931357, 25.43801040920437], "label": "Polishing shoes"}, {"segment": [36.33052661575663, 37.26037556021841], "label": "Polishing shoes"}, {"segment": [54.528998814508576, 57.58421677488299], "label": "Polishing shoes"}]}, "JA-B3KK9A-I": {"duration": 238.03, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JA-B3KK9A-I", "annotations": [{"segment": [12.254158999013022, 13.368173453468753], "label": "Plataform diving"}, {"segment": [18.566907574262157, 23.394303543570313], "label": "Plataform diving"}, {"segment": [25.622332452481775, 28.593037664363717], "label": "Plataform diving"}, {"segment": [33.42043363367188, 40.84719666337674], "label": "Plataform diving"}, {"segment": [51.24466490496355, 62.0134712980356], "label": "Plataform diving"}, {"segment": [62.384809449520844, 85.40777484160591], "label": "Plataform diving"}, {"segment": [106.57404947626478, 121.05623738418926], "label": "Plataform diving"}, {"segment": [131.0823674742908, 195.32386768123786], "label": "Plataform diving"}, {"segment": [202.75063062402495, 225.03091970358815], "label": "Plataform diving"}]}, "x0PE_98UO3s": {"duration": 123.44, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x0PE_98UO3s", "annotations": [{"segment": [12.489633385335413, 119.5161840873635], "label": "Archery"}]}, "OVpvpvmV2pg": {"duration": 106.53, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=OVpvpvmV2pg", "annotations": [{"segment": [15.539547122020936, 85.34285885335413], "label": "Fixing bicycle"}]}, "eC90hOqQ0yk": {"duration": 38.058, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eC90hOqQ0yk", "annotations": [{"segment": [9.594084112149531, 29.72981619937695], "label": "Sumo"}]}, "KKAPxOMogDE": {"duration": 6.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KKAPxOMogDE", "annotations": [{"segment": [0.01, 6.989205], "label": "Drinking coffee"}]}, "8hkVzhEKqpY": {"duration": 68.11, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8hkVzhEKqpY", "annotations": [{"segment": [16.29750114040562, 58.87273213260531], "label": "Volleyball"}]}, "bUKp023w6QM": {"duration": 198.57999999999998, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bUKp023w6QM", "annotations": []}, "fRmHJKlQmmw": {"duration": 23.89, "subset": "training", "resolution": "414x240", "url": "https://www.youtube.com/watch?v=fRmHJKlQmmw", "annotations": [{"segment": [2.45715612, 12.162257410296412], "label": "Using the pommel horse"}]}, "JBnL0T2XzGs": {"duration": 114.428, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=JBnL0T2XzGs", "annotations": [{"segment": [0.2, 13.735190327613104], "label": "Curling"}, {"segment": [18.55142589703588, 21.227112324492982], "label": "Curling"}, {"segment": [26.043347893915755, 27.827138845553822], "label": "Curling"}, {"segment": [31.03796255850234, 40.49205460218409], "label": "Curling"}, {"segment": [50.490113884555385, 58.69698751950079], "label": "Curling"}, {"segment": [67.08227145085803, 72.43458034321374], "label": "Curling"}, {"segment": [107.58140873634946, 114.161], "label": "Curling"}]}, "J9gOPvgsFdg": {"duration": 207.262, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=J9gOPvgsFdg", "annotations": []}, "OWyqpSBJH8M": {"duration": 60.37, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OWyqpSBJH8M", "annotations": [{"segment": [11.678804022909215, 50.10583654290171], "label": "Wrapping presents"}]}, "2qN85UKyL2s": {"duration": 77.16, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2qN85UKyL2s", "annotations": [{"segment": [0.01, 77.081], "label": "Playing pool"}]}, "xmb7j1e0wts": {"duration": 199.692, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xmb7j1e0wts", "annotations": [{"segment": [5.296016471138846, 191.27965372230892], "label": "Baking cookies"}]}, "0IFJNGqjsUQ": {"duration": 118.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0IFJNGqjsUQ", "annotations": [{"segment": [12.712384755070202, 20.634595544461778], "label": "Walking the dog"}, {"segment": [89.53940566614665, 94.32957963182527], "label": "Walking the dog"}]}, "gwpQuO5DPOA": {"duration": 22.083, "subset": "validation", "resolution": "274x240", "url": "https://www.youtube.com/watch?v=gwpQuO5DPOA", "annotations": [{"segment": [0.01, 22.022], "label": "Welding"}]}, "vynLNpomc30": {"duration": 64.85, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vynLNpomc30", "annotations": [{"segment": [0, 64.57751273616387], "label": "Playing bagpipes"}]}, "Gxjfc5O6AAc": {"duration": 133.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Gxjfc5O6AAc", "annotations": [{"segment": [12.549453978159127, 80.52566302652106], "label": "Getting a haircut"}]}, "uwxuvGci98g": {"duration": 151.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uwxuvGci98g", "annotations": [{"segment": [70.49085490795632, 87.52220240249609], "label": "Cricket"}]}, "nYxjWwJrHwk": {"duration": 71.982, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nYxjWwJrHwk", "annotations": [{"segment": [14.257483619344775, 61.857271450858036], "label": "Snow tubing"}]}, "sRYDJ379UYc": {"duration": 69.753, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sRYDJ379UYc", "annotations": [{"segment": [7.853595804149117, 30.199461847762826], "label": "Using the rowing machine"}, {"segment": [34.00585281354245, 57.22618614687578], "label": "Using the rowing machine"}]}, "Z5eMyGnyuKg": {"duration": 182.649, "subset": "testing", "resolution": "1920x816", "url": "https://www.youtube.com/watch?v=Z5eMyGnyuKg", "annotations": []}, "aYfWH960W0Q": {"duration": 99.428, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aYfWH960W0Q", "annotations": [{"segment": [0, 62.82066706708268], "label": "Running a marathon"}]}, "KEXm-3H6eTg": {"duration": 69.06, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=KEXm-3H6eTg", "annotations": [{"segment": [16.67035881435257, 61.5190015600624], "label": "Mixing drinks"}]}, "cHHVpBYfwdU": {"duration": 228.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cHHVpBYfwdU", "annotations": [{"segment": [3.2080486240249613, 227.41500245865834], "label": "Walking the dog"}]}, "H-RuFL---Kc": {"duration": 118.771, "subset": "testing", "resolution": "1916x1080", "url": "https://www.youtube.com/watch?v=H-RuFL---Kc", "annotations": []}, "CphkHGSqvYY": {"duration": 77.23, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CphkHGSqvYY", "annotations": []}, "CrnNXizH0IQ": {"duration": 102.49000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CrnNXizH0IQ", "annotations": [{"segment": [40.34290104, 82.35334691999999], "label": "Polishing forniture"}]}, "mIi5fkvHDAo": {"duration": 75.743, "subset": "training", "resolution": "320x208", "url": "https://www.youtube.com/watch?v=mIi5fkvHDAo", "annotations": [{"segment": [12.849266355140186, 36.55344236760124], "label": "Waterskiing"}]}, "tGX22SWP82Y": {"duration": 190.892, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tGX22SWP82Y", "annotations": []}, "SOyOank50IY": {"duration": 61.77, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=SOyOank50IY", "annotations": [{"segment": [2.601649151326053, 61.37964849609985], "label": "Washing dishes"}]}, "asgrHR8b--E": {"duration": 221.844, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=asgrHR8b--E", "annotations": []}, "2Is_nJdG2to": {"duration": 18.72, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2Is_nJdG2to", "annotations": [{"segment": [2.91149255, 14.8604586], "label": "Brushing hair"}]}, "Tw1vg9qWLx0": {"duration": 150.094, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Tw1vg9qWLx0", "annotations": [{"segment": [0.2, 3.8044164529778683], "label": "Croquet"}, {"segment": [50.79639644746788, 54.53705157140738], "label": "Croquet"}, {"segment": [116.72544300690163, 150.093785], "label": "Croquet"}]}, "nNoy9buW2Xg": {"duration": 109.621, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=nNoy9buW2Xg", "annotations": []}, "HCFF0svChQY": {"duration": 31.23, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HCFF0svChQY", "annotations": [{"segment": [3.453302652106084, 29.961048361934477], "label": "Triple jump"}]}, "h3qKte2gv14": {"duration": 90.05, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=h3qKte2gv14", "annotations": [{"segment": [28.938663652668556, 89.3446102652106], "label": "Smoking a cigarette"}]}, "ZSUgpFz2HQc": {"duration": 236.66, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZSUgpFz2HQc", "annotations": []}, "mcDVNexy6Ps": {"duration": 41.727, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mcDVNexy6Ps", "annotations": [{"segment": [4.23121170826833, 21.741918624024958], "label": "Decorating the Christmas tree"}]}, "3VoIt9XkPEM": {"duration": 168.345, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3VoIt9XkPEM", "annotations": [{"segment": [17.84347581903276, 19.680304212168487], "label": "River tubing"}, {"segment": [22.566748829953195, 36.474163806552255], "label": "River tubing"}, {"segment": [40.14782059282371, 83.96929797191888], "label": "River tubing"}, {"segment": [86.85574258970358, 126.21635101404055], "label": "River tubing"}, {"segment": [131.98924024961, 136.18770514820594], "label": "River tubing"}, {"segment": [139.07414976599063, 160.3288783151326], "label": "River tubing"}]}, "yweAN9o4QYI": {"duration": 35.18, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yweAN9o4QYI", "annotations": [{"segment": [1.4524976599063961, 33.90074726989079], "label": "Putting on makeup"}]}, "h7Tq3Gsj6lY": {"duration": 64.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h7Tq3Gsj6lY", "annotations": [{"segment": [0.20010457016920133, 5.803032534906839], "label": "Skateboarding"}]}, "BAf6PXfx05c": {"duration": 233.802, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BAf6PXfx05c", "annotations": []}, "B2RXamsVZ9A": {"duration": 48.948, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=B2RXamsVZ9A", "annotations": []}, "msiX-xky6Ac": {"duration": 191.94, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=msiX-xky6Ac", "annotations": [{"segment": [26.18390912, 167.19131666224646], "label": "Hopscotch"}]}, "XvM1rCVQWWY": {"duration": 107.973, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=XvM1rCVQWWY", "annotations": [{"segment": [18.154934579439253, 91.46402492211838], "label": "Fixing the roof"}]}, "MAXfslyf7Dw": {"duration": 153.808, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MAXfslyf7Dw", "annotations": [{"segment": [102.69395943837753, 120.94506084243369], "label": "Sumo"}]}, "ynispYtdDq8": {"duration": 67.64, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=ynispYtdDq8", "annotations": [{"segment": [13.516301369999999, 65.83067862714508], "label": "Smoking a cigarette"}]}, "8ltNqT5N-xs": {"duration": 180.14, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8ltNqT5N-xs", "annotations": [{"segment": [0.01, 180.021], "label": "Belly dance"}]}, "8ohisLftwZ4": {"duration": 83.28999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8ohisLftwZ4", "annotations": [{"segment": [0, 83.289772], "label": "Playing squash"}]}, "wrf4MfW4MGg": {"duration": 16.7, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=wrf4MfW4MGg", "annotations": [{"segment": [0.2344092457575854, 16.695146], "label": "Putting on makeup"}]}, "-kAzTQi_pQA": {"duration": 91.67, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-kAzTQi_pQA", "annotations": []}, "BR4dhPz42mQ": {"duration": 100.868, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BR4dhPz42mQ", "annotations": [{"segment": [4.392360784904942, 7.686633168819627], "label": "Drum corps"}, {"segment": [18.510671001682166, 58.041939608658396], "label": "Drum corps"}, {"segment": [66.82666596576422, 93.49458526412121], "label": "Drum corps"}]}, "wlcU-u-xsH4": {"duration": 34.761, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wlcU-u-xsH4", "annotations": [{"segment": [0.01, 17.840297191887675], "label": "Tug of war"}]}, "pI-rY6fLM1Q": {"duration": 126.549, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pI-rY6fLM1Q", "annotations": [{"segment": [15.17223831775701, 115.26960280373832], "label": "Ice fishing"}]}, "VcthLhKIntA": {"duration": 234.902, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VcthLhKIntA", "annotations": [{"segment": [69.99752679832974, 220.54905361952999], "label": "Hula hoop"}]}, "5oD3-y66g_8": {"duration": 146.703, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5oD3-y66g_8", "annotations": [{"segment": [36.85249474001049, 138.3111100365964], "label": "Hula hoop"}]}, "b3bawTEPLtA": {"duration": 187.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b3bawTEPLtA", "annotations": [{"segment": [7.602524683300257, 177.7821138471139], "label": "Ballet"}]}, "eMQ5sazq-q0": {"duration": 140.853, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eMQ5sazq-q0", "annotations": [{"segment": [5.9215373831775695, 120.62390965732087], "label": "Cutting the grass"}]}, "CvMXJpJZ6YA": {"duration": 222.053, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CvMXJpJZ6YA", "annotations": []}, "jto8_gMKUjE": {"duration": 109.576, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jto8_gMKUjE", "annotations": [{"segment": [11.467731860294819, 55.74523712948518], "label": "Rafting"}, {"segment": [58.53678029017161, 94.30925713416536], "label": "Rafting"}]}, "-Vu1wUnFZvU": {"duration": 190.776, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-Vu1wUnFZvU", "annotations": []}, "LPeyqO2pRd4": {"duration": 84.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LPeyqO2pRd4", "annotations": [{"segment": [1.36395594, 75.95917646957878], "label": "Mixing drinks"}]}, "VyD5Z0S9tXU": {"duration": 139.738, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VyD5Z0S9tXU", "annotations": [{"segment": [13.733969244484063, 33.57192481984993], "label": "Peeling potatoes"}, {"segment": [44.90789943434472, 90.68779691595827], "label": "Peeling potatoes"}]}, "js9SDSngI7A": {"duration": 113.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=js9SDSngI7A", "annotations": [{"segment": [55.519020238360554, 57.640766234731025], "label": "Layup drill in basketball"}, {"segment": [64.71325252418096, 66.4813741778471], "label": "Layup drill in basketball"}, {"segment": [71.07849047737909, 72.49298780031201], "label": "Layup drill in basketball"}, {"segment": [86.81477319500779, 87.69883402184087], "label": "Layup drill in basketball"}]}, "sd2MhdKdQTo": {"duration": 222.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sd2MhdKdQTo", "annotations": [{"segment": [131.11765021349154, 147.02742352], "label": "Playing harmonica"}]}, "auxBRPzLiIo": {"duration": 18.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=auxBRPzLiIo", "annotations": [{"segment": [0.5285517223088924, 17.831666], "label": "Spinning"}]}, "EMDTvPUEr7E": {"duration": 59.24, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EMDTvPUEr7E", "annotations": [{"segment": [0.01, 17.280292131413148], "label": "Hopscotch"}, {"segment": [26.059050166088277, 58.77147484266717], "label": "Hopscotch"}]}, "DzCVpvVJCo8": {"duration": 50.83, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DzCVpvVJCo8", "annotations": [{"segment": [2.2995723728549144, 21.96488094071763], "label": "Getting a piercing"}, {"segment": [32.51119561622465, 45.7535606599064], "label": "Getting a piercing"}]}, "0X34KkUlCAI": {"duration": 101.611, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0X34KkUlCAI", "annotations": [{"segment": [0.01, 95.87598283931358], "label": "Tug of war"}]}, "Ta70WnVT6Lc": {"duration": 172.687, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ta70WnVT6Lc", "annotations": [{"segment": [4.57769422776911, 17.772224648985958], "label": "Longboarding"}, {"segment": [23.69629953198128, 31.77458346333853], "label": "Longboarding"}, {"segment": [35.81372542901716, 79.97501092043682], "label": "Longboarding"}, {"segment": [92.90026521060842, 158.36555382215286], "label": "Longboarding"}]}, "ydMOC0TQO7k": {"duration": 52.45, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ydMOC0TQO7k", "annotations": []}, "1GQARL4coRo": {"duration": 117.819, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1GQARL4coRo", "annotations": [{"segment": [0, 106.60558436817472], "label": "Surfing"}]}, "CIAZbDkD0TE": {"duration": 118.53999999999999, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CIAZbDkD0TE", "annotations": [{"segment": [17.189911076443057, 41.41064586583464], "label": "Ironing clothes"}, {"segment": [0.2, 86.70353978159126], "label": "Ironing clothes"}]}, "f0Z-qud2N74": {"duration": 17.253, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=f0Z-qud2N74", "annotations": [{"segment": [5.1985390015600625, 12.479180187207488], "label": "Doing a powerbomb"}]}, "wfB4vYKRvv0": {"duration": 77.49, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wfB4vYKRvv0", "annotations": []}, "0QyHaXliXKs": {"duration": 158.57, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=0QyHaXliXKs", "annotations": [{"segment": [46.49289859594383, 54.653886115444614], "label": "Sumo"}, {"segment": [100.65217940717628, 116.24691107644306], "label": "Sumo"}]}, "3nqzJlFsXXA": {"duration": 79.67, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3nqzJlFsXXA", "annotations": []}, "g_qHPA0RkAY": {"duration": 101.727, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g_qHPA0RkAY", "annotations": [{"segment": [6.506694967118412, 71.41494476105575], "label": "Carving jack-o-lanterns"}]}, "6w4OWjY6k_k": {"duration": 53.731, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=6w4OWjY6k_k", "annotations": [{"segment": [0.01, 52.67644099312611], "label": "Shuffleboard"}]}, "U36rsW_WhUA": {"duration": 133.515, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U36rsW_WhUA", "annotations": [{"segment": [21.498342061800376, 126.07925016148884], "label": "Clipping cat claws"}]}, "HguqDEvSN68": {"duration": 32.114, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=HguqDEvSN68", "annotations": [{"segment": [1.4514929797191887, 30.731609984399377], "label": "Riding bumper cars"}]}, "POafWgyeV0I": {"duration": 169.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=POafWgyeV0I", "annotations": [{"segment": [92.28272714664588, 112.93711084087364], "label": "Dodgeball"}, {"segment": [142.85948721840873, 160.07147363026522], "label": "Dodgeball"}, {"segment": [14.431588606864276, 37.733970210608426], "label": "Dodgeball"}]}, "1HWajOhmPVg": {"duration": 26.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1HWajOhmPVg", "annotations": [{"segment": [4.69741584, 13.758739540000002], "label": "Hammer throw"}]}, "USOc5S2-3zA": {"duration": 153.3, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=USOc5S2-3zA", "annotations": [{"segment": [4.065629271450858, 153.29814058956916], "label": "Belly dance"}]}, "6qghJRLpHpo": {"duration": 35.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6qghJRLpHpo", "annotations": [{"segment": [0, 35.08535], "label": "Kayaking"}]}, "09MaNbzc2TA": {"duration": 143.616, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=09MaNbzc2TA", "annotations": [{"segment": [8.286672386895477, 19.260914196567864], "label": "Beach soccer"}, {"segment": [28.891371294851794, 38.969756630265216], "label": "Beach soccer"}, {"segment": [43.225074882995315, 52.40760374414977], "label": "Beach soccer"}, {"segment": [62.0380608424337, 88.91375507020281], "label": "Beach soccer"}, {"segment": [96.30457098283931, 125.88536973478938], "label": "Beach soccer"}]}, "G9m5zxl2f08": {"duration": 98.15, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=G9m5zxl2f08", "annotations": [{"segment": [2.7561838963354455, 94.78210176842448], "label": "Washing face"}]}, "Vhf-vNRYQEg": {"duration": 86.94, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vhf-vNRYQEg", "annotations": [{"segment": [5.696212762796226, 7.052453896795328], "label": "Playing accordion"}, {"segment": [11.528049638992362, 15.054276587390028], "label": "Playing accordion"}, {"segment": [38.24599967550702, 40.95848192199688], "label": "Playing accordion"}, {"segment": [44.21346061778472, 46.79031875195008], "label": "Playing accordion"}, {"segment": [50.1809215600624, 66.18456681435258], "label": "Playing accordion"}, {"segment": [68.49017672386896, 81.91696384399376], "label": "Playing accordion"}]}, "FPf7mAgdy0I": {"duration": 133.167, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FPf7mAgdy0I", "annotations": []}, "wEI_iPEfLNk": {"duration": 214.76, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wEI_iPEfLNk", "annotations": [{"segment": [125.64042063182528, 131.6711608221529], "label": "Washing face"}]}, "JyfelXz6GaA": {"duration": 123.902, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JyfelXz6GaA", "annotations": [{"segment": [13.890953271028037, 118.07310280373832], "label": "Futsal"}]}, "W40iaV960sw": {"duration": 16.974, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W40iaV960sw", "annotations": [{"segment": [0.2, 16.721], "label": "Camel ride"}]}, "XFlKGUFgBnc": {"duration": 167.0, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XFlKGUFgBnc", "annotations": [{"segment": [10.421086666595915, 157.8794627794072], "label": "Zumba"}]}, "RNA9HtKreug": {"duration": 91.116, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RNA9HtKreug", "annotations": [{"segment": [43.36115059748627, 90.12317705395628], "label": "Table soccer"}]}, "R_YZNqP1gSE": {"duration": 119.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R_YZNqP1gSE", "annotations": [{"segment": [42.74801521856792, 52.06130829450865], "label": "Getting a haircut"}, {"segment": [57.463017684867395, 83.7265038876755], "label": "Getting a haircut"}]}, "Amnp8434e90": {"duration": 193.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Amnp8434e90", "annotations": [{"segment": [6.337347893915757, 105.62246489859594], "label": "Windsurfing"}, {"segment": [117.99538221528861, 170.8066146645866], "label": "Windsurfing"}]}, "ADN1EPKzwlE": {"duration": 19.598, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ADN1EPKzwlE", "annotations": []}, "CV3pIbE8BnE": {"duration": 217.11, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=CV3pIbE8BnE", "annotations": [{"segment": [93.6576, 203.10202808112325], "label": "Shaving legs"}]}, "sVT71OQjHE0": {"duration": 55.449, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sVT71OQjHE0", "annotations": [{"segment": [13.40803899375975, 55.44873015873016], "label": "Plastering"}]}, "8tddzer_NfY": {"duration": 189.081, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8tddzer_NfY", "annotations": [{"segment": [20.64837183963549, 173.15134606864277], "label": "Baton twirling"}]}, "R547-iOgb6g": {"duration": 141.851, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=R547-iOgb6g", "annotations": [{"segment": [35.62092538683882, 49.535976788707984], "label": "Mooping floor"}, {"segment": [73.16947678870797, 89.73501417188555], "label": "Mooping floor"}]}, "opCMfcAl7yw": {"duration": 211.162, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=opCMfcAl7yw", "annotations": [{"segment": [0.2, 187.07226209048363], "label": "Swimming"}]}, "yMK2zxDDs2A": {"duration": 65.85, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yMK2zxDDs2A", "annotations": []}, "2l50doS2wB4": {"duration": 203.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2l50doS2wB4", "annotations": [{"segment": [11.30527762, 151.5200546021841], "label": "Cleaning shoes"}]}, "Zp9mSiw8Vkw": {"duration": 49.807, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zp9mSiw8Vkw", "annotations": [{"segment": [0.2, 48.30543213728549], "label": "Table soccer"}]}, "nfjIQXyL7_Y": {"duration": 11.75, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nfjIQXyL7_Y", "annotations": [{"segment": [0.7148557628705148, 11.749296], "label": "Doing a powerbomb"}]}, "D74hnUdBs04": {"duration": 128.36, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D74hnUdBs04", "annotations": [{"segment": [5.807234585982079, 128.35990929705216], "label": "Volleyball"}]}, "mAWuiYWXmb8": {"duration": 23.592, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=mAWuiYWXmb8", "annotations": [{"segment": [0.01, 20.768230109204367], "label": "Using the rowing machine"}]}, "2TEJnQzCPUM": {"duration": 72.818, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2TEJnQzCPUM", "annotations": [{"segment": [3.4053510140405616, 67.3309407176287], "label": "Rollerblading"}]}, "eSQnn2RFxeY": {"duration": 124.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eSQnn2RFxeY", "annotations": [{"segment": [14.861805732449298, 17.970157258190326], "label": "Sailing"}]}, "3IdaN49b9xg": {"duration": 36.99, "subset": "training", "resolution": "484x480", "url": "https://www.youtube.com/watch?v=3IdaN49b9xg", "annotations": [{"segment": [0.01, 27.98728893915757], "label": "Carving jack-o-lanterns"}]}, "bsl3oM4svdQ": {"duration": 49.273, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=bsl3oM4svdQ", "annotations": [{"segment": [0.01, 43.249115444617786], "label": "Kneeling"}]}, "ERubh80hIuE": {"duration": 194.05, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ERubh80hIuE", "annotations": []}, "qemvXlRMQgM": {"duration": 98.52000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qemvXlRMQgM", "annotations": [{"segment": [79.77075827457098, 87.14840065834633], "label": "Smoking hookah"}, {"segment": [90.68352096723869, 95.60194922308892], "label": "Smoking hookah"}]}, "JUfowIpmwaE": {"duration": 187.78, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JUfowIpmwaE", "annotations": [{"segment": [0.01, 187.779794], "label": "Windsurfing"}]}, "EVcn8N-YxtI": {"duration": 175.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EVcn8N-YxtI", "annotations": [{"segment": [9.29267421581217, 170.27458930738183], "label": "Tango"}]}, "y7tActqdm9A": {"duration": 146.936, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=y7tActqdm9A", "annotations": [{"segment": [5.583153620176227, 144.01789661083043], "label": "Bullfighting"}]}, "IjKWgD0y4rc": {"duration": 147.378, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IjKWgD0y4rc", "annotations": [{"segment": [12.645417979984504, 147.37659863945578], "label": "Rafting"}]}, "RrVsNvO6Yd4": {"duration": 116.5, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RrVsNvO6Yd4", "annotations": [{"segment": [7.1224431599999996, 111.67862714508581], "label": "Breakdancing"}]}, "Ol2rfUv3zHI": {"duration": 157.339, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ol2rfUv3zHI", "annotations": [{"segment": [10.55083151326053, 126.62607800312011], "label": "Playing drums"}]}, "KZ9RARLLAys": {"duration": 154.437, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KZ9RARLLAys", "annotations": []}, "2xQVZorob1Q": {"duration": 136.02, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2xQVZorob1Q", "annotations": [{"segment": [37.336046451069485, 127.99004645106947], "label": "Shoveling snow"}]}, "5VHqMj5iikc": {"duration": 148.655, "subset": "training", "resolution": "1280x544", "url": "https://www.youtube.com/watch?v=5VHqMj5iikc", "annotations": [{"segment": [39.64238845553822, 66.99795475819032], "label": "BMX"}, {"segment": [109.43699219968799, 136.79624024960998], "label": "BMX"}]}, "lp_Rwbp8S1k": {"duration": 170.3, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lp_Rwbp8S1k", "annotations": [{"segment": [43.55415288611544, 167.04611076443058], "label": "Shoveling snow"}]}, "SLrhe0T_ep0": {"duration": 23.081, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SLrhe0T_ep0", "annotations": []}, "-xiKk3ZKDt8": {"duration": 38.96, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-xiKk3ZKDt8", "annotations": []}, "T1T3UvwZcKU": {"duration": 6.223, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=T1T3UvwZcKU", "annotations": [{"segment": [0.23785058068989423, 3.150306670770232], "label": "Fun sliding down"}]}, "i7L1KNGeEqU": {"duration": 158.267, "subset": "training", "resolution": "2560x1080", "url": "https://www.youtube.com/watch?v=i7L1KNGeEqU", "annotations": [{"segment": [0.2, 13.082235569422776], "label": "Skiing"}, {"segment": [21.96828237129485, 140.96039157566304], "label": "Skiing"}, {"segment": [142.19472074882998, 152.06935413416537], "label": "Skiing"}]}, "5imysrEC7ok": {"duration": 65.64, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5imysrEC7ok", "annotations": []}, "3FAvxuTw4NI": {"duration": 148.422, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3FAvxuTw4NI", "annotations": [{"segment": [5.325592553822154, 139.85469141341653], "label": "Kite flying"}]}, "PfyOv5VFk74": {"duration": 79.37, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=PfyOv5VFk74", "annotations": []}, "ECjumI_otNw": {"duration": 160.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ECjumI_otNw", "annotations": [{"segment": [11.519447737909516, 158.03646021840873], "label": "Cheerleading"}]}, "LlZPcEaTNcM": {"duration": 70.84, "subset": "testing", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=LlZPcEaTNcM", "annotations": []}, "eS-VU_XV1K8": {"duration": 123.9, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eS-VU_XV1K8", "annotations": []}, "6YmZNDXap64": {"duration": 142.339, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=6YmZNDXap64", "annotations": [{"segment": [4.663189956169676, 142.13832199546488], "label": "Playing rubik cube"}]}, "j81ndpMC9gI": {"duration": 91.766, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j81ndpMC9gI", "annotations": []}, "S5MD51gg-vA": {"duration": 193.03, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=S5MD51gg-vA", "annotations": [{"segment": [6.3255865834633385, 30.121840873634945], "label": "Tennis serve with ball bouncing"}, {"segment": [40.06204836193447, 50.303474258970354], "label": "Tennis serve with ball bouncing"}, {"segment": [106.028879875195, 180.12860842433696], "label": "Tennis serve with ball bouncing"}]}, "FC5FsugGt8o": {"duration": 137.788, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=FC5FsugGt8o", "annotations": [{"segment": [0, 137.441], "label": "Playing beach volleyball"}]}, "bb9AIdvKkZU": {"duration": 31.16, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bb9AIdvKkZU", "annotations": [{"segment": [0.01, 12.785319522620904], "label": "Shoveling snow"}, {"segment": [21.827408614664584, 31.161178], "label": "Shoveling snow"}]}, "ZJnXQw84xvo": {"duration": 79.413, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZJnXQw84xvo", "annotations": []}, "aS6Qv9N5mG0": {"duration": 26.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aS6Qv9N5mG0", "annotations": [{"segment": [0, 26.702946], "label": "Playing badminton"}]}, "J8Ziy8QR8WQ": {"duration": 51.74, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=J8Ziy8QR8WQ", "annotations": [{"segment": [5.242691107644306, 48.232758190327615], "label": "Cricket"}]}, "U6KrVIJSeDo": {"duration": 66.224, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U6KrVIJSeDo", "annotations": [{"segment": [29.950530421216847, 63.515780031201245], "label": "Using the rowing machine"}]}, "cK1ssoaX768": {"duration": 208.887, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=cK1ssoaX768", "annotations": [{"segment": [0.01, 208.841], "label": "Beach soccer"}]}, "5LHIoaSvKCU": {"duration": 70.148, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=5LHIoaSvKCU", "annotations": [{"segment": [16.404290171606863, 52.83688455538221], "label": "Making a lemonade"}]}, "98YZQ0gNjpQ": {"duration": 142.73, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=98YZQ0gNjpQ", "annotations": [{"segment": [13.137659764893998, 138.05676363108947], "label": "Doing motocross"}]}, "ZXm8ed-ZHRs": {"duration": 169.23, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZXm8ed-ZHRs", "annotations": [{"segment": [6.940193000000001, 161.921887675507], "label": "Using uneven bars"}]}, "0y_5NIIvUzI": {"duration": 202.896, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0y_5NIIvUzI", "annotations": [{"segment": [28.39912400418955, 194.06068069529525], "label": "Skiing"}]}, "59nQKYriHQc": {"duration": 50.55, "subset": "training", "resolution": "576x360", "url": "https://www.youtube.com/watch?v=59nQKYriHQc", "annotations": [{"segment": [13.201142745709829, 25.180687207488297], "label": "Bathing dog"}]}, "NEthbdTgx-M": {"duration": 237.517, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NEthbdTgx-M", "annotations": [{"segment": [0.01, 237.51691609977325], "label": "Snowboarding"}]}, "28WDlVOgslg": {"duration": 141.07999999999998, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=28WDlVOgslg", "annotations": []}, "rCSFBiXxbVs": {"duration": 56.24, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rCSFBiXxbVs", "annotations": [{"segment": [24.390588049921995, 44.39437968798752], "label": "Getting a haircut"}]}, "uMw7YplUbAM": {"duration": 126.78, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uMw7YplUbAM", "annotations": [{"segment": [18.888581828987444, 96.0251987222346], "label": "Playing accordion"}]}, "4WGjeXTgpis": {"duration": 93.46, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4WGjeXTgpis", "annotations": []}, "pe0MhPhhVIk": {"duration": 52.29, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pe0MhPhhVIk", "annotations": [{"segment": [0, 52.291336], "label": "Playing badminton"}]}, "pXcFBfv5Sf4": {"duration": 84.382, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pXcFBfv5Sf4", "annotations": [{"segment": [4.212483694227769, 9.083167965678626], "label": "BMX"}, {"segment": [17.37649523868955, 24.221781241809673], "label": "BMX"}, {"segment": [42.9146776349454, 47.12716132917317], "label": "BMX"}, {"segment": [50.023243868954765, 63.58217575975039], "label": "BMX"}, {"segment": [74.7715855725429, 77.27274776599064], "label": "BMX"}]}, "EBhAGOXvPcA": {"duration": 79.715, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EBhAGOXvPcA", "annotations": [{"segment": [56.406659461390134, 64.09284188751766], "label": "Beer pong"}, {"segment": [68.30784515345857, 73.01873115656899], "label": "Beer pong"}]}, "m--b-Ltjm_Y": {"duration": 143.152, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=m--b-Ltjm_Y", "annotations": [{"segment": [53.76480655226209, 144], "label": "Powerbocking"}]}, "4k-F7EXpcrM": {"duration": 11.03, "subset": "training", "resolution": "426x226", "url": "https://www.youtube.com/watch?v=4k-F7EXpcrM", "annotations": [{"segment": [0.2, 10.781], "label": "Using the rowing machine"}]}, "2NAs35b7fck": {"duration": 158.616, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2NAs35b7fck", "annotations": [{"segment": [0.2, 158.4149433106576], "label": "Futsal"}]}, "4_MR2Tpc7SM": {"duration": 11.982, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=4_MR2Tpc7SM", "annotations": [{"segment": [7.046838712948519, 9.046869859594384], "label": "Croquet"}]}, "6kgJx6ahgq0": {"duration": 72.66, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6kgJx6ahgq0", "annotations": [{"segment": [0, 0], "label": "Playing kickball"}]}, "5K7V1uJEYFM": {"duration": 17.95, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5K7V1uJEYFM", "annotations": []}, "-fMxoShIXiM": {"duration": 117.168, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=-fMxoShIXiM", "annotations": [{"segment": [44.4176003693209, 95.41558519188767], "label": "Fixing the roof"}]}, "cy-RLevyo_8": {"duration": 142.292, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=cy-RLevyo_8", "annotations": [{"segment": [22.856198127925122, 124.71227145085803], "label": "Roof shingle removal"}]}, "Z0eBz6QsI-c": {"duration": 41.19, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z0eBz6QsI-c", "annotations": [{"segment": [0.01, 40.632236134561246], "label": "Playing racquetball"}]}, "8fVB8gAjTO4": {"duration": 75.698, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8fVB8gAjTO4", "annotations": [{"segment": [6.652490558412063, 70.06765651844003], "label": "Playing drums"}]}, "SXZjg8ap1Uk": {"duration": 186.18, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SXZjg8ap1Uk", "annotations": [{"segment": [17.18549043049322, 177.81183622488575], "label": "Cheerleading"}]}, "rsJUOsr1u3o": {"duration": 98.36, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=rsJUOsr1u3o", "annotations": [{"segment": [3.5292890219010125, 35.292890219010125], "label": "Making an omelette"}, {"segment": [41.27733682136401, 78.2581466302652], "label": "Making an omelette"}]}, "ChH3zlLeWug": {"duration": 159.38, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ChH3zlLeWug", "annotations": [{"segment": [10.494511324141003, 109.05263978702659], "label": "Shaving"}]}, "iksziMLKsBQ": {"duration": 43.05, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iksziMLKsBQ", "annotations": [{"segment": [4.753533395801711, 7.833288088762359], "label": "Playing pool"}, {"segment": [15.934381395202232, 26.177912948856974], "label": "Playing pool"}]}, "ROrLMf0zXko": {"duration": 49.71, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ROrLMf0zXko", "annotations": []}, "UqE_T7oG1Gc": {"duration": 196.047, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=UqE_T7oG1Gc", "annotations": [{"segment": [23.94082968464575, 180.59446985287005], "label": "Braiding hair"}]}, "1dvrNvxw43Q": {"duration": 47.323, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1dvrNvxw43Q", "annotations": [{"segment": [0.2, 41.77518224299066], "label": "Ice fishing"}]}, "AKk_3aRSVec": {"duration": 145.8, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AKk_3aRSVec", "annotations": []}, "03JdaRepHkA": {"duration": 123.48, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=03JdaRepHkA", "annotations": [{"segment": [21.191903276131043, 86.30884243369735], "label": "Mixing drinks"}]}, "efRG5AMu1dU": {"duration": 30.14, "subset": "testing", "resolution": "308x232", "url": "https://www.youtube.com/watch?v=efRG5AMu1dU", "annotations": []}, "V4ZKb2ne34U": {"duration": 68.36, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=V4ZKb2ne34U", "annotations": []}, "uZNgbdDLclM": {"duration": 24.521, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=uZNgbdDLclM", "annotations": []}, "tmOndbOAHvM": {"duration": 193.91, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tmOndbOAHvM", "annotations": []}, "9OVSvoKwa0A": {"duration": 111.316, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=9OVSvoKwa0A", "annotations": [{"segment": [0.2, 107.78951794071763], "label": "Riding bumper cars"}]}, "87fIi0-1JVM": {"duration": 208.73, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=87fIi0-1JVM", "annotations": [{"segment": [45.739581237129485, 119.63911818252731], "label": "Getting a piercing"}]}, "sk6hmmlS8kY": {"duration": 82.431, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sk6hmmlS8kY", "annotations": []}, "SSldR9yOJq8": {"duration": 236.217, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SSldR9yOJq8", "annotations": [{"segment": [9.949841128339012, 233.6370101987753], "label": "Canoeing"}]}, "soVddfhLIy0": {"duration": 224.79, "subset": "testing", "resolution": "844x480", "url": "https://www.youtube.com/watch?v=soVddfhLIy0", "annotations": []}, "nEAypudaSFs": {"duration": 126.78, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nEAypudaSFs", "annotations": [{"segment": [0.01, 122.62744224054676], "label": "Playing harmonica"}]}, "Eo3PUIfZSJU": {"duration": 101.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Eo3PUIfZSJU", "annotations": [{"segment": [7.103995553822153, 99.14020461778472], "label": "Using the balance beam"}]}, "FXN6qiDsClw": {"duration": 87.377, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FXN6qiDsClw", "annotations": [{"segment": [20.415114332337296, 29.80606692521245], "label": "Welding"}, {"segment": [44.913251531142045, 53.35149796884735], "label": "Welding"}, {"segment": [68.73088386292835, 72.67780590654206], "label": "Welding"}, {"segment": [80.02724695327103, 81.38825455451713], "label": "Welding"}]}, "wZZ1W6D1nwA": {"duration": 58.561, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wZZ1W6D1nwA", "annotations": [{"segment": [4.012923556942278, 45.160834633385335], "label": "Rope skipping"}]}, "PXBcPu2_KOo": {"duration": 164.47, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PXBcPu2_KOo", "annotations": [{"segment": [18.88149966, 91.7252418096724], "label": "Using the balance beam"}]}, "ReumF9EcX7c": {"duration": 196.882, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ReumF9EcX7c", "annotations": []}, "gvHQDDF7EGU": {"duration": 172.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gvHQDDF7EGU", "annotations": [{"segment": [1.8886026298194787, 168.3554336349454], "label": "Playing saxophone"}]}, "g_KbloQigw8": {"duration": 17.903, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=g_KbloQigw8", "annotations": [{"segment": [6.66701872074883, 16.988344773790953], "label": "Welding"}]}, "d7gY7YJ3Fdc": {"duration": 12.354, "subset": "validation", "resolution": "144x176", "url": "https://www.youtube.com/watch?v=d7gY7YJ3Fdc", "annotations": [{"segment": [0.7130444898595945, 5.5694556099843995], "label": "Putting on shoes"}]}, "KcgpoXEh44M": {"duration": 98.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KcgpoXEh44M", "annotations": [{"segment": [28.1357488299532, 85.79476287051483], "label": "Wrapping presents"}]}, "l1gyH11zqNk": {"duration": 131.193, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l1gyH11zqNk", "annotations": []}, "7dXqXFoju2M": {"duration": 79.32, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=7dXqXFoju2M", "annotations": []}, "yai74Fw-j24": {"duration": 115.473, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=yai74Fw-j24", "annotations": []}, "UlWLcqIvLKk": {"duration": 131.17, "subset": "validation", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=UlWLcqIvLKk", "annotations": [{"segment": [3.6833875132605307, 130.14635880187208], "label": "Ironing clothes"}]}, "ytXLI-2wgaQ": {"duration": 185.876, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ytXLI-2wgaQ", "annotations": [{"segment": [17.6886426749587, 169.3470052815718], "label": "Playing ice hockey"}]}, "FhWxgGQtCJY": {"duration": 117.586, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FhWxgGQtCJY", "annotations": [{"segment": [0.8058846028376427, 108.31817956078157], "label": "Spread mulch"}]}, "QuU6I9w1btI": {"duration": 82.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QuU6I9w1btI", "annotations": [{"segment": [10.092724820592824, 41.7942322698908], "label": "Vacuuming floor"}]}, "fJEXzQ0L1uU": {"duration": 185.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fJEXzQ0L1uU", "annotations": [{"segment": [96.40491650670826, 119.78455194695788], "label": "Hopscotch"}]}, "exCENNu1qBU": {"duration": 123.554, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=exCENNu1qBU", "annotations": [{"segment": [0.2, 119.51191302689301], "label": "Powerbocking"}]}, "i-8vfNV-klk": {"duration": 100.264, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i-8vfNV-klk", "annotations": [{"segment": [0, 5.318202779811873], "label": "Trimming branches or hedges"}]}, "CbGBQPE9t6g": {"duration": 56.402, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CbGBQPE9t6g", "annotations": []}, "Fn0-GpW-Ufw": {"duration": 65.2, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Fn0-GpW-Ufw", "annotations": []}, "CvkyjR4yk60": {"duration": 197.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CvkyjR4yk60", "annotations": [{"segment": [13.895023619344773, 149.75747678627144], "label": "Breakdancing"}]}, "xiICsWY0xOk": {"duration": 111.967, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xiICsWY0xOk", "annotations": [{"segment": [8.266684691912529, 19.079647442952716], "label": "Making a lemonade"}, {"segment": [21.695686818204372, 93.54956727172475], "label": "Making a lemonade"}]}, "Kv-A_H9TXug": {"duration": 91.626, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Kv-A_H9TXug", "annotations": [{"segment": [33.85328705148206, 67.42089235569424], "label": "Doing crunches"}]}, "kS_wMOFfMDk": {"duration": 23.69, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=kS_wMOFfMDk", "annotations": [{"segment": [1.551860817472699, 22.834523457098285], "label": "Washing hands"}]}, "a2IvS0xbNGQ": {"duration": 176.867, "subset": "testing", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=a2IvS0xbNGQ", "annotations": []}, "JGxf-jke0e8": {"duration": 27.12, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=JGxf-jke0e8", "annotations": []}, "Pho9Hgdvc0g": {"duration": 120.93, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Pho9Hgdvc0g", "annotations": []}, "TmA7xnrULME": {"duration": 64.134, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TmA7xnrULME", "annotations": [{"segment": [0.2, 32.04209672386895], "label": "Building sandcastles"}, {"segment": [52.69816536661466, 64.38799999999999], "label": "Building sandcastles"}]}, "zoWqv-sIdBY": {"duration": 100.5, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zoWqv-sIdBY", "annotations": []}, "L0arB1mOuSE": {"duration": 91.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L0arB1mOuSE", "annotations": [{"segment": [0.01, 11.090146348711091], "label": "Playing kickball"}, {"segment": [18.62575861129683, 27.867547235222737], "label": "Playing kickball"}, {"segment": [46.066761756184526, 47.34639402718966], "label": "Playing kickball"}, {"segment": [55.45073174355546, 60.42707946413095], "label": "Playing kickball"}, {"segment": [70.2375935418369, 85.45099943045341], "label": "Playing kickball"}]}, "vKYHz3PyXGM": {"duration": 191.519, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vKYHz3PyXGM", "annotations": []}, "AjtrZW0HR1g": {"duration": 131.17, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AjtrZW0HR1g", "annotations": []}, "mZe-MuljtiA": {"duration": 171.248, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mZe-MuljtiA", "annotations": []}, "l-YtPYZ_534": {"duration": 104.328, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l-YtPYZ_534", "annotations": [{"segment": [45.66335788161994, 54.60102579439252], "label": "Croquet"}, {"segment": [61.10114791277258, 74.91390741433023], "label": "Croquet"}, {"segment": [82.87655700934579, 84.01407838006232], "label": "Croquet"}]}, "BSKolF3MMe4": {"duration": 178.051, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=BSKolF3MMe4", "annotations": [{"segment": [0.01, 175.94284718081568], "label": "Blowing leaves"}]}, "x86YIU9TIPw": {"duration": 37.99, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x86YIU9TIPw", "annotations": [{"segment": [2.0123120124804994, 34.56441809672387], "label": "Playing lacrosse"}]}, "gCf7wYa4AFA": {"duration": 130.2, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gCf7wYa4AFA", "annotations": [{"segment": [23.580737279999997, 54.70882074], "label": "Playing field hockey"}, {"segment": [63.25605684, 101.74921488], "label": "Playing field hockey"}]}, "KBnNTxNygkI": {"duration": 223.21, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KBnNTxNygkI", "annotations": []}, "DzjOS3pQTXs": {"duration": 172.9, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DzjOS3pQTXs", "annotations": []}, "Bg_CMSihJl0": {"duration": 38.36, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Bg_CMSihJl0", "annotations": [{"segment": [3.3512080873634944, 27.22856570982839], "label": "Knitting"}]}, "iixZLnyLuN8": {"duration": 61.673, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iixZLnyLuN8", "annotations": []}, "FZk40J_drws": {"duration": 151.302, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FZk40J_drws", "annotations": [{"segment": [23.095825545171337, 113.59375309345793], "label": "Running a marathon"}]}, "5dN-MpXG9OE": {"duration": 9.5, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5dN-MpXG9OE", "annotations": []}, "dufQjqa3v6w": {"duration": 55.59, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dufQjqa3v6w", "annotations": [{"segment": [29.398635304212167, 55.58857], "label": "Playing harmonica"}]}, "TY_xri4VWPg": {"duration": 10.26, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TY_xri4VWPg", "annotations": []}, "eyBSKNXo6Vo": {"duration": 124.714, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eyBSKNXo6Vo", "annotations": [{"segment": [18.53227634702235, 74.86728740232714], "label": "Waterskiing"}, {"segment": [79.52949521380063, 81.47208180191458], "label": "Waterskiing"}, {"segment": [86.13428961338809, 92.15630803654136], "label": "Waterskiing"}, {"segment": [96.42999853039207, 104.78312085928209], "label": "Waterskiing"}]}, "crz2vi_dguk": {"duration": 172.478, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=crz2vi_dguk", "annotations": [{"segment": [14.261036857800843, 23.40962654016365], "label": "Cleaning sink"}, {"segment": [25.83131204431851, 163.0601571575663], "label": "Cleaning sink"}]}, "Q9UovyMsTLc": {"duration": 154.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q9UovyMsTLc", "annotations": [{"segment": [34.3199556396057, 115.67985915904583], "label": "Making a cake"}]}, "uaLMHEtFlNA": {"duration": 175.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uaLMHEtFlNA", "annotations": [{"segment": [4.6361568, 173.19086680577223], "label": "Mowing the lawn"}]}, "J7Q3b1uFHyc": {"duration": 115.056, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=J7Q3b1uFHyc", "annotations": [{"segment": [24.78000233478727, 106.66174918017128], "label": "Snow tubing"}]}, "NlbnZy3zyVk": {"duration": 181.26, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NlbnZy3zyVk", "annotations": []}, "4w_ScRrkkmU": {"duration": 183.531, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4w_ScRrkkmU", "annotations": []}, "a5Xc9ZgN2yo": {"duration": 19.48, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=a5Xc9ZgN2yo", "annotations": [{"segment": [0.5457566302652106, 18.88924336973479], "label": "Hopscotch"}]}, "yggwFa68Cp4": {"duration": 110.2, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=yggwFa68Cp4", "annotations": []}, "pYaZLHpl_Ww": {"duration": 197.6, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pYaZLHpl_Ww", "annotations": []}, "Acogo5mgX4Q": {"duration": 86.286, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Acogo5mgX4Q", "annotations": [{"segment": [26.030146417445483, 74.08235514018692], "label": "Raking leaves"}]}, "G0VWk5WqDp0": {"duration": 6.29, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G0VWk5WqDp0", "annotations": []}, "OhVBwILhXvs": {"duration": 37.872, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=OhVBwILhXvs", "annotations": []}, "6QrVxwNUbBk": {"duration": 184.878, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6QrVxwNUbBk", "annotations": [{"segment": [40.95558774731941, 81.33433623059209], "label": "Skiing"}, {"segment": [108.73420127281283, 177.0896529828393], "label": "Skiing"}]}, "eQMT-WmxKM8": {"duration": 67.129, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=eQMT-WmxKM8", "annotations": [{"segment": [2.5639383775351012, 53.738055382215286], "label": "Baton twirling"}]}, "At7fzSxdiCg": {"duration": 43.61, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=At7fzSxdiCg", "annotations": [{"segment": [2.480405616224649, 42.234851794071766], "label": "Triple jump"}]}, "ygmYkKbS-4U": {"duration": 113.035, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ygmYkKbS-4U", "annotations": [{"segment": [0.01, 38.089648190009235], "label": "Mooping floor"}]}, "VOLBBe1br3Q": {"duration": 12.28, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VOLBBe1br3Q", "annotations": [{"segment": [0.01, 4.733211677067083], "label": "Cricket"}]}, "Yx5dmoGLD-4": {"duration": 160.497, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=Yx5dmoGLD-4", "annotations": []}, "hnO1Fse52oA": {"duration": 217.386, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hnO1Fse52oA", "annotations": []}, "_-4ngMPCA9A": {"duration": 154.25, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_-4ngMPCA9A", "annotations": [{"segment": [30.0031675, 88.09988750000001], "label": "Washing dishes"}]}, "XOtnVABMQhM": {"duration": 168.322, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XOtnVABMQhM", "annotations": []}, "GlJ4DvArV6Q": {"duration": 133.61, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GlJ4DvArV6Q", "annotations": [{"segment": [13.76033073322933, 118.63073010920436], "label": "Playing polo"}]}, "kN2ZNcn241g": {"duration": 40.19, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kN2ZNcn241g", "annotations": [{"segment": [4.138513010920437, 29.91016221528861], "label": "Using uneven bars"}]}, "ZBP_mLWROZU": {"duration": 160.381, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZBP_mLWROZU", "annotations": []}, "oHUsS3B4P4g": {"duration": 66.34, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oHUsS3B4P4g", "annotations": []}, "B8KJJecq2F0": {"duration": 166.84, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B8KJJecq2F0", "annotations": [{"segment": [5.59586859392743, 84.71884778248274], "label": "Cricket"}, {"segment": [85.00076415000002, 159.1569137296104], "label": "Cricket"}]}, "AB5cWhlQ0Kk": {"duration": 90.976, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AB5cWhlQ0Kk", "annotations": []}, "I-slndrzKIc": {"duration": 126.48, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I-slndrzKIc", "annotations": []}, "1X4hgrBjw-U": {"duration": 78.136, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1X4hgrBjw-U", "annotations": [{"segment": [68.74917682371296, 78.135146], "label": "Having an ice cream"}]}, "KRES3eBM2l4": {"duration": 40.705, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KRES3eBM2l4", "annotations": [{"segment": [0.2, 20.384040341653666], "label": "Disc dog"}]}, "_qv7KzCGLMc": {"duration": 92.0, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_qv7KzCGLMc", "annotations": []}, "Y7yGeNl7POk": {"duration": 21.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y7yGeNl7POk", "annotations": [{"segment": [0, 21.315917], "label": "Hopscotch"}]}, "m5NK0eErs90": {"duration": 75.81, "subset": "validation", "resolution": "632x360", "url": "https://www.youtube.com/watch?v=m5NK0eErs90", "annotations": [{"segment": [6.918423556942278, 68.88857644305773], "label": "Playing water polo"}]}, "BTEv-FJHs8c": {"duration": 71.054, "subset": "testing", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=BTEv-FJHs8c", "annotations": []}, "jPaeFy4Phz4": {"duration": 204.429, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jPaeFy4Phz4", "annotations": [{"segment": [52.667399904257174, 193.41099448058117], "label": "Braiding hair"}]}, "-npRRmY2wBs": {"duration": 114.661, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-npRRmY2wBs", "annotations": [{"segment": [12.159482059282372, 106.59287987519501], "label": "Bullfighting"}]}, "JNr0oI927ng": {"duration": 12.68, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JNr0oI927ng", "annotations": [{"segment": [7.5867177, 11.891611544461778], "label": "Springboard diving"}]}, "RG0wtfLnNrE": {"duration": 52.222, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=RG0wtfLnNrE", "annotations": [{"segment": [2.6963601092256395, 26.73619943737494], "label": "Rafting"}, {"segment": [29.57874800658178, 52.221678], "label": "Rafting"}]}, "ursegSrjG1M": {"duration": 115.264, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ursegSrjG1M", "annotations": [{"segment": [79.83931442155647, 95.48350296879876], "label": "Spread mulch"}]}, "5BYayNkmlBo": {"duration": 29.977, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5BYayNkmlBo", "annotations": [{"segment": [3.1878642294967507, 5.755983855664975], "label": "Calf roping"}]}, "ltlJ2sOx4as": {"duration": 183.25, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ltlJ2sOx4as", "annotations": []}, "byO53jHoAwI": {"duration": 135.63, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=byO53jHoAwI", "annotations": [{"segment": [4.866518474258971, 135.627754], "label": "Playing bagpipes"}]}, "cAmGHiqmnaw": {"duration": 25.078, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cAmGHiqmnaw", "annotations": [{"segment": [5.655123432887954, 19.539427942996817], "label": "Calf roping"}]}, "bWBcYdYEup4": {"duration": 146.449, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bWBcYdYEup4", "annotations": [{"segment": [0.2, 131.55534477379098], "label": "Doing fencing"}]}, "2dFLbFsFyuY": {"duration": 97.71000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2dFLbFsFyuY", "annotations": [{"segment": [4.420535102964119, 22.712404494539783], "label": "Shot put"}, {"segment": [30.638881230889233, 40.394544906396256], "label": "Shot put"}, {"segment": [49.235615112324496, 63.10694940093604], "label": "Shot put"}]}, "ecUypvzBAOQ": {"duration": 157.73, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ecUypvzBAOQ", "annotations": [{"segment": [4.673851794071763, 16.481477379095164], "label": "Preparing pasta"}, {"segment": [40.09672854914197, 149.33620436817472], "label": "Preparing pasta"}]}, "exxji9vqjSw": {"duration": 185.621, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=exxji9vqjSw", "annotations": [{"segment": [23.992559190031148, 143.39267289719626], "label": "Snowboarding"}, {"segment": [153.51110747663552, 173.74797663551402], "label": "Snowboarding"}]}, "g2GZd1160m4": {"duration": 52.919, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=g2GZd1160m4", "annotations": [{"segment": [12.79615074102964, 49.86370998439937], "label": "Capoeira"}]}, "iJ6rHJf_Hgo": {"duration": 134.165, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iJ6rHJf_Hgo", "annotations": [{"segment": [7.30289236828974, 118.51555891883407], "label": "Longboarding"}]}, "p1pkFRHG7UY": {"duration": 35.13, "subset": "training", "resolution": "624x352", "url": "https://www.youtube.com/watch?v=p1pkFRHG7UY", "annotations": [{"segment": [8.19451248049922, 18.77338143525741], "label": "Bungee jumping"}]}, "GQ1eaxmDlzQ": {"duration": 106.67, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=GQ1eaxmDlzQ", "annotations": [{"segment": [15.367415, 92.4443716], "label": "Tango"}]}, "-ibmoYKRxQk": {"duration": 121.905, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-ibmoYKRxQk", "annotations": [{"segment": [17.490190327613107, 113.51475351014041], "label": "Snow tubing"}]}, "TFIlTCvL4oQ": {"duration": 221.96, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TFIlTCvL4oQ", "annotations": [{"segment": [72.71685610281554, 136.43067287861584], "label": "Snow tubing"}]}, "mixxaL3Bdv8": {"duration": 99.614, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mixxaL3Bdv8", "annotations": []}, "MW76ZGSmOR0": {"duration": 33.182, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=MW76ZGSmOR0", "annotations": []}, "CQvmcZSfyhI": {"duration": 199.82999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CQvmcZSfyhI", "annotations": [{"segment": [0.01, 7.947698894695788], "label": "Cheerleading"}, {"segment": [32.258307278471136, 196.19907663572542], "label": "Cheerleading"}]}, "5XiBb9XGsg8": {"duration": 125.829, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=5XiBb9XGsg8", "annotations": [{"segment": [10.583742132366964, 70.55828088244643], "label": "Playing rubik cube"}]}, "f9eW4rFQG-A": {"duration": 195.35, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=f9eW4rFQG-A", "annotations": [{"segment": [27.269016380655227, 31.534560842433695], "label": "Sailing"}]}, "dZ4y7J3ASEE": {"duration": 29.03, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dZ4y7J3ASEE", "annotations": [{"segment": [0.75810688, 29.024399], "label": "Playing kickball"}]}, "1lagsBNqNe4": {"duration": 163.68, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1lagsBNqNe4", "annotations": [{"segment": [31.27094383775351, 140.46269109], "label": "Ironing clothes"}]}, "G5frRzhSNJ8": {"duration": 232.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G5frRzhSNJ8", "annotations": [{"segment": [0.2, 11.952908326053043], "label": "Getting a haircut"}, {"segment": [23.543607308892355, 153.21455217940718], "label": "Getting a haircut"}, {"segment": [164.08083247581902, 192.33316124648985], "label": "Getting a haircut"}]}, "adLA8Fu4Wlc": {"duration": 57.656, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=adLA8Fu4Wlc", "annotations": []}, "_O6ONcIPR_s": {"duration": 40.148, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_O6ONcIPR_s", "annotations": [{"segment": [0.1876801872074883, 38.849798751950075], "label": "Kite flying"}]}, "ZUZ7EVzHS5E": {"duration": 51.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZUZ7EVzHS5E", "annotations": [{"segment": [0, 50.68716707644305], "label": "Playing polo"}]}, "_UlKLq9w_nw": {"duration": 167.161, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_UlKLq9w_nw", "annotations": [{"segment": [19.069284519219323, 43.012356170309666], "label": "Shuffleboard"}]}, "K68iNoSnZMg": {"duration": 77.903, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=K68iNoSnZMg", "annotations": [{"segment": [0.01, 28.18493735830534], "label": "Peeling potatoes"}, {"segment": [41.89682660752652, 76.35856595955144], "label": "Peeling potatoes"}]}, "adUapXmi-xA": {"duration": 121.116, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=adUapXmi-xA", "annotations": [{"segment": [8.691580299531982, 103.54317400312011], "label": "Playing rubik cube"}]}, "ot-Y1sa-ujc": {"duration": 136.09, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ot-Y1sa-ujc", "annotations": [{"segment": [74.73391241185648, 82.37715345397815], "label": "Bungee jumping"}, {"segment": [87.04802297971919, 107.64231134321372], "label": "Bungee jumping"}]}, "PHIZza_xVog": {"duration": 69.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PHIZza_xVog", "annotations": [{"segment": [14.347613599782088, 40.88530491967978], "label": "Applying sunscreen"}]}, "wfFnIYdSjCI": {"duration": 26.12, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=wfFnIYdSjCI", "annotations": [{"segment": [0.996855694227769, 24.73690717628705], "label": "Playing lacrosse"}]}, "hJf7uOUiEFo": {"duration": 185.04, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hJf7uOUiEFo", "annotations": [{"segment": [100.13792043681747, 131.01618408736348], "label": "Blow-drying hair"}, {"segment": [154.11939781591263, 172.8792496099844], "label": "Blow-drying hair"}]}, "ibIRzsC9NjY": {"duration": 37.872, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ibIRzsC9NjY", "annotations": [{"segment": [0.19999999999999998, 36.59547581903276], "label": "Camel ride"}]}, "35WvCw9Qcqk": {"duration": 182.486, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=35WvCw9Qcqk", "annotations": [{"segment": [8.540662442824244, 101.91857181770264], "label": "Longboarding"}, {"segment": [107.32765803149132, 113.02143299337415], "label": "Longboarding"}, {"segment": [126.11711540570465, 130.95682412330507], "label": "Longboarding"}, {"segment": [142.3443736349454, 170.81324836193448], "label": "Longboarding"}]}, "yBL1hCKmX7s": {"duration": 15.47, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yBL1hCKmX7s", "annotations": [{"segment": [0.2171250278582572, 9.915376272193745], "label": "Javelin throw"}]}, "pG7TQ7HfWfg": {"duration": 115.33, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pG7TQ7HfWfg", "annotations": []}, "wVCDyGGog4I": {"duration": 233.27, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wVCDyGGog4I", "annotations": [{"segment": [28.90244538906081, 225.47206426600783], "label": "Tai chi"}]}, "g0L0LnF3JiY": {"duration": 83.337, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g0L0LnF3JiY", "annotations": []}, "JYRVSy54UUY": {"duration": 134.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JYRVSy54UUY", "annotations": [{"segment": [36.47289391575663, 62.96041341653666], "label": "Playing accordion"}, {"segment": [69.68740249609984, 122.87265990639625], "label": "Playing accordion"}]}, "zrwpgILg7VI": {"duration": 159.75, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=zrwpgILg7VI", "annotations": [{"segment": [30.40546198718697, 152.52576013244612], "label": "Breakdancing"}]}, "1abFB1HE4T8": {"duration": 69.915, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=1abFB1HE4T8", "annotations": []}, "z9uVyf0D8eM": {"duration": 176.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z9uVyf0D8eM", "annotations": [{"segment": [94.70927394, 96.10346061], "label": "Playing bagpipes"}]}, "TfFVoogTDJY": {"duration": 60.163, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TfFVoogTDJY", "annotations": [{"segment": [0.2, 57.819220839813376], "label": "BMX"}]}, "Yl85vnsndx0": {"duration": 219.99, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Yl85vnsndx0", "annotations": [{"segment": [0.01, 199.39434987363495], "label": "Belly dance"}]}, "e3DB_y78IHc": {"duration": 60.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=e3DB_y78IHc", "annotations": []}, "OApnN8Jw8Z0": {"duration": 28.44, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OApnN8Jw8Z0", "annotations": []}, "0IxId05Eo6k": {"duration": 85.171, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0IxId05Eo6k", "annotations": [{"segment": [25.701690327613107, 43.89875273010921], "label": "Tug of war"}, {"segment": [49.34458892355695, 67.27600078003121], "label": "Tug of war"}]}, "-DaX_1bBNAo": {"duration": 22.872, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-DaX_1bBNAo", "annotations": [{"segment": [8.017511700468019, 17.10402496099844], "label": "Throwing darts"}]}, "dnQcp43wbRY": {"duration": 143.337, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dnQcp43wbRY", "annotations": [{"segment": [0.2, 134.9916443057722], "label": "Assembling bicycle"}]}, "OyKEEws65l8": {"duration": 104.978, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OyKEEws65l8", "annotations": [{"segment": [4.897855127762614, 96.81431644323749], "label": "Camel ride"}]}, "M7Lc8nh9auA": {"duration": 48.089, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=M7Lc8nh9auA", "annotations": [{"segment": [0.01, 43.13713241029641], "label": "Rollerblading"}]}, "uVL5BdceGR4": {"duration": 203.175, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uVL5BdceGR4", "annotations": []}, "K2kgYzeLl7U": {"duration": 211.14, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K2kgYzeLl7U", "annotations": []}, "BhAQhPasmhU": {"duration": 154.51, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BhAQhPasmhU", "annotations": [{"segment": [0, 154.505578], "label": "Discus throw"}]}, "Vf02ZDnbduk": {"duration": 72.029, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Vf02ZDnbduk", "annotations": [{"segment": [27.020904984423673, 64.4868769470405], "label": "Elliptical trainer"}]}, "bpXFhOs6Yhg": {"duration": 63.136, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bpXFhOs6Yhg", "annotations": []}, "BshSmo_rRII": {"duration": 43.47, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BshSmo_rRII", "annotations": [{"segment": [27.853745709828395, 34.76635413416537], "label": "Polishing shoes"}]}, "ScWdvpS6epQ": {"duration": 149.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ScWdvpS6epQ", "annotations": [{"segment": [7.444251576865796, 149.11766439909297], "label": "Cheerleading"}]}, "z1tV0-C3IBw": {"duration": 15.28, "subset": "validation", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=z1tV0-C3IBw", "annotations": [{"segment": [0.01, 15.278729], "label": "Mowing the lawn"}]}, "yAd6iTYWX_g": {"duration": 198.717, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=yAd6iTYWX_g", "annotations": []}, "68m6uCPwwNo": {"duration": 9.823, "subset": "training", "resolution": "632x480", "url": "https://www.youtube.com/watch?v=68m6uCPwwNo", "annotations": [{"segment": [0.19999999999999998, 9.622040816326532], "label": "Croquet"}]}, "QudbWIbFWGs": {"duration": 102.17, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=QudbWIbFWGs", "annotations": []}, "5vwd9hV4PRY": {"duration": 212.57999999999998, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5vwd9hV4PRY", "annotations": []}, "0bbL30qENMg": {"duration": 201.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0bbL30qENMg", "annotations": [{"segment": [16.386117260530423, 24.894293530421216], "label": "Washing face"}, {"segment": [79.72476282527302, 132.03429100312013], "label": "Washing face"}, {"segment": [144.9541142277691, 159.44952565054604], "label": "Washing face"}]}, "ruNII4WvE3k": {"duration": 162.192, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ruNII4WvE3k", "annotations": [{"segment": [19.446817757009345, 159.11032710280372], "label": "Grooming dog"}]}, "aqpeiIMe47Q": {"duration": 173.5, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aqpeiIMe47Q", "annotations": [{"segment": [75.51694305772232, 173.49950113378685], "label": "Preparing salad"}]}, "PxGggNnMGtQ": {"duration": 63.46, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PxGggNnMGtQ", "annotations": [{"segment": [1.5816952511700468, 56.34789332293292], "label": "Mixing drinks"}]}, "YAm1JXAtqh4": {"duration": 27.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YAm1JXAtqh4", "annotations": [{"segment": [0.3579414976599064, 19.307785491419654], "label": "Javelin throw"}]}, "j9ww01sQkvY": {"duration": 9.637, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j9ww01sQkvY", "annotations": [{"segment": [0.7054597507788161, 9.200996323987539], "label": "Applying sunscreen"}]}, "ZBAQx9DxYTo": {"duration": 86.08, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZBAQx9DxYTo", "annotations": [{"segment": [58.85135179407176, 76.96978393135726], "label": "Archery"}]}, "2vy0dMXhlWI": {"duration": 110.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2vy0dMXhlWI", "annotations": [{"segment": [3.4587103276131046, 108.25763325429016], "label": "Chopping wood"}]}, "AItKK-a8eEI": {"duration": 237.378, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AItKK-a8eEI", "annotations": [{"segment": [74.76812897345285, 110.62322710429396], "label": "Surfing"}]}, "ri5WJu1K0ak": {"duration": 176.01, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ri5WJu1K0ak", "annotations": [{"segment": [75.5101333305033, 155.3085768], "label": "Putting on makeup"}]}, "EYkkKSVHbmM": {"duration": 183.183, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EYkkKSVHbmM", "annotations": []}, "LJdI1neOr2c": {"duration": 89.68, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LJdI1neOr2c", "annotations": [{"segment": [2.307125576443058, 88.71946534867395], "label": "Using the balance beam"}]}, "sX8Rr1o4XWM": {"duration": 74.792, "subset": "training", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=sX8Rr1o4XWM", "annotations": [{"segment": [20.87151170046802, 50.03505304212168], "label": "Plastering"}, {"segment": [55.28348517940717, 68.57951326053042], "label": "Plastering"}]}, "hiYPv3MrrUw": {"duration": 56.8, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hiYPv3MrrUw", "annotations": [{"segment": [10.24773442, 44.50767321000001], "label": "Windsurfing"}]}, "Gp2bIEiDWTI": {"duration": 186.155, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gp2bIEiDWTI", "annotations": [{"segment": [32.38098747351254, 46.61119721523554], "label": "Braiding hair"}, {"segment": [50.38655898344777, 93.65801309603405], "label": "Braiding hair"}, {"segment": [99.75667441391532, 140.99524142054116], "label": "Braiding hair"}]}, "O-hUa9e9_DE": {"duration": 207.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O-hUa9e9_DE", "annotations": [{"segment": [29.793216848673946, 189.46425117004682], "label": "Preparing salad"}]}, "EBdkGs_jFDw": {"duration": 219.66, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=EBdkGs_jFDw", "annotations": [{"segment": [47.633146904107456, 200.47043840937312], "label": "Cleaning windows"}]}, "8inrvRctXQ0": {"duration": 197.39, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8inrvRctXQ0", "annotations": [{"segment": [11.391209048361935, 183.79869734789392], "label": "Playing squash"}]}, "cIKAwgMLKw4": {"duration": 98.94, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cIKAwgMLKw4", "annotations": [{"segment": [57.57364106864274, 75.01552160686428], "label": "Shaving legs"}]}, "YaHTgeeDs7s": {"duration": 39.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YaHTgeeDs7s", "annotations": [{"segment": [0.2, 37.56201120431481], "label": "Riding bumper cars"}]}, "WNgI2qBvPrY": {"duration": 60.16, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WNgI2qBvPrY", "annotations": []}, "ySu0ymIa8yo": {"duration": 211.766, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ySu0ymIa8yo", "annotations": []}, "bWZkD_s1940": {"duration": 159.893, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=bWZkD_s1940", "annotations": []}, "OIA7lPraPSM": {"duration": 218.383, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OIA7lPraPSM", "annotations": [{"segment": [0.2, 212.89099063962558], "label": "Longboarding"}]}, "KnHUAc20WEU": {"duration": 159.73, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KnHUAc20WEU", "annotations": [{"segment": [84.640927, 131.06804879999999], "label": "Hammer throw"}]}, "lyjz4sNglQg": {"duration": 111.874, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lyjz4sNglQg", "annotations": [{"segment": [0.7667361786694393, 106.01866602290619], "label": "Decorating the Christmas tree"}]}, "YZQ_qh9wC4w": {"duration": 184.575, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=YZQ_qh9wC4w", "annotations": [{"segment": [46.647765547581905, 157.22024684555382], "label": "Swimming"}]}, "8yeUJm0Pl24": {"duration": 60.651, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8yeUJm0Pl24", "annotations": [{"segment": [17.50444024960998, 22.51922583463339], "label": "Beer pong"}]}, "uHPrH22rPgU": {"duration": 12.121, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uHPrH22rPgU", "annotations": [{"segment": [0.01, 12.120816], "label": "Throwing darts"}]}, "nQjxgOz1WYo": {"duration": 33.019, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nQjxgOz1WYo", "annotations": [{"segment": [0.01, 27.816128705148202], "label": "Playing blackjack"}]}, "D2Trvi7Im_s": {"duration": 42.261, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D2Trvi7Im_s", "annotations": [{"segment": [0.01, 39.458198127925115], "label": "Rollerblading"}]}, "uOUjBTlwoxg": {"duration": 55.543, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uOUjBTlwoxg", "annotations": [{"segment": [10.657850487051483, 47.05051068673947], "label": "Snowboarding"}]}, "3nLA8R3ULSE": {"duration": 128.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3nLA8R3ULSE", "annotations": [{"segment": [0, 116.4371965678627], "label": "Doing motocross"}]}, "3JBUWRKezUI": {"duration": 123.88, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3JBUWRKezUI", "annotations": []}, "1Xt1gZfZ2Rw": {"duration": 160.218, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1Xt1gZfZ2Rw", "annotations": [{"segment": [15.36837987519501, 17.867303432137287], "label": "Waxing skis"}, {"segment": [52.602340873634944, 59.59932683307333], "label": "Waxing skis"}, {"segment": [63.347712168486744, 82.58942355694228], "label": "Waxing skis"}]}, "Htp7EK8IB18": {"duration": 38.36, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Htp7EK8IB18", "annotations": [{"segment": [0, 38.359364], "label": "Snow tubing"}]}, "2bS6JfPF1mk": {"duration": 23.453, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2bS6JfPF1mk", "annotations": [{"segment": [0.20000000000000004, 22.11600624024961], "label": "Using the monkey bar"}]}, "VtS4vy8Z0RQ": {"duration": 14.652, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VtS4vy8Z0RQ", "annotations": [{"segment": [0, 13.480516380655226], "label": "Beer pong"}]}, "b9AaHQu2bA8": {"duration": 83.848, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b9AaHQu2bA8", "annotations": []}, "DAv8CEings8": {"duration": 47.137, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=DAv8CEings8", "annotations": [{"segment": [7.206517450858034, 8.677235297971919], "label": "Shuffleboard"}, {"segment": [16.766183457098283, 47.136507], "label": "Shuffleboard"}]}, "gLPJ7_VhWVU": {"duration": 216.76, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gLPJ7_VhWVU", "annotations": [{"segment": [57.63706829407176, 205.36374773400937], "label": "Putting on makeup"}]}, "fwwo0GsYB7c": {"duration": 81.874, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=fwwo0GsYB7c", "annotations": [{"segment": [8.467919692412986, 67.76886195876813], "label": "Roof shingle removal"}]}, "F3jJVS3NHf8": {"duration": 207.355, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F3jJVS3NHf8", "annotations": [{"segment": [46.246556942277685, 175.9479313572543], "label": "Making a cake"}]}, "Sl-9a4w9UMo": {"duration": 130.845, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Sl-9a4w9UMo", "annotations": []}, "vifcEi_nBio": {"duration": 139.2, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vifcEi_nBio", "annotations": []}, "MRzsZN5p9QY": {"duration": 65.85, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MRzsZN5p9QY", "annotations": [{"segment": [21.984719778471142, 54.448137769110765], "label": "Snatch"}, {"segment": [60.50934555850234, 64.618638975039], "label": "Snatch"}]}, "oxp7V1AKJWk": {"duration": 136.58, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=oxp7V1AKJWk", "annotations": [{"segment": [27.273340054690625, 69.03564201343563], "label": "Playing ten pins"}, {"segment": [73.72324733533559, 110.15872406240248], "label": "Playing ten pins"}, {"segment": [131.46602078627146, 136.379772], "label": "Playing ten pins"}]}, "DPkrjs9-pwQ": {"duration": 173.41, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DPkrjs9-pwQ", "annotations": []}, "je6wJ_Ky5wg": {"duration": 77.49, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=je6wJ_Ky5wg", "annotations": [{"segment": [0, 77.441], "label": "Doing step aerobics"}]}, "uF9othvTXn8": {"duration": 98.105, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uF9othvTXn8", "annotations": [{"segment": [0.20000000000000004, 97.877], "label": "Riding bumper cars"}]}, "oGU7m7rCZ-Y": {"duration": 120.65, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oGU7m7rCZ-Y", "annotations": []}, "C3Mdjku7ZmM": {"duration": 127.153, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=C3Mdjku7ZmM", "annotations": [{"segment": [46.9566879875195, 125.01970514820593], "label": "Cleaning sink"}]}, "xWIKFgygROg": {"duration": 94.09, "subset": "training", "resolution": "624x360", "url": "https://www.youtube.com/watch?v=xWIKFgygROg", "annotations": [{"segment": [13.21451915, 22.73236007], "label": "Archery"}, {"segment": [27.18737952, 34.053429429017164], "label": "Archery"}]}, "FcfoTk3UK5g": {"duration": 166.464, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FcfoTk3UK5g", "annotations": [{"segment": [15.054755070202809, 166.381], "label": "Doing fencing"}]}, "ZQs-OWTZA6o": {"duration": 31.649, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZQs-OWTZA6o", "annotations": [{"segment": [0.01, 31.058149959685352], "label": "Snow tubing"}]}, "Kd3YFwF-2UM": {"duration": 213.462, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Kd3YFwF-2UM", "annotations": [{"segment": [5.994225858830272, 22.644853244469914], "label": "Playing ten pins"}, {"segment": [56.27912056346199, 72.59673540138884], "label": "Playing ten pins"}, {"segment": [147.85757118448004, 179.82677576490812], "label": "Playing ten pins"}, {"segment": [199.80752761310453, 203.13765307332292], "label": "Playing ten pins"}]}, "Fi_ja3Urdtg": {"duration": 55.66, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=Fi_ja3Urdtg", "annotations": [{"segment": [24.354063962558502, 33.45433697347894], "label": "Mixing drinks"}]}, "PgoRelvwBUI": {"duration": 45.883, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PgoRelvwBUI", "annotations": [{"segment": [9.448528904836195, 45.682629], "label": "Changing car wheel"}]}, "PL1JmxPH7y4": {"duration": 47.764, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=PL1JmxPH7y4", "annotations": [{"segment": [15.71824561358487, 34.46651318102607], "label": "Doing crunches"}]}, "a0Zlu4AvdnI": {"duration": 52.18, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a0Zlu4AvdnI", "annotations": [{"segment": [29.75047919578783, 42.69254812090484], "label": "Mowing the lawn"}]}, "aVH9QsSATKM": {"duration": 136.42000000000002, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aVH9QsSATKM", "annotations": [{"segment": [33.944693842175454, 43.73438611013828], "label": "Getting a tattoo"}, {"segment": [64.59068702884169, 73.52910170828602], "label": "Getting a tattoo"}, {"segment": [82.46751638773034, 109.06994102893367], "label": "Getting a tattoo"}]}, "Qxquum8p8S8": {"duration": 40.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Qxquum8p8S8", "annotations": [{"segment": [1.0142881747269892, 40.43492063492063], "label": "Washing hands"}]}, "kB0h-fHb_WU": {"duration": 33.298, "subset": "training", "resolution": "427x240", "url": "https://www.youtube.com/watch?v=kB0h-fHb_WU", "annotations": [{"segment": [0.01, 28.17585491419657], "label": "Powerbocking"}]}, "x0cSfzhQRAs": {"duration": 28.909, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=x0cSfzhQRAs", "annotations": [{"segment": [0.2, 17.724141819032763], "label": "Slacklining"}]}, "wOZdXVdiB04": {"duration": 134.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wOZdXVdiB04", "annotations": [{"segment": [7.131566302652106, 126.48066389859595], "label": "Dodgeball"}]}, "7xpkFhlxo2Q": {"duration": 186.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7xpkFhlxo2Q", "annotations": [{"segment": [2.757282371294852, 7.691366614664586], "label": "Playing squash"}, {"segment": [16.39857410296412, 36.71539157566303], "label": "Playing squash"}, {"segment": [45.13235881435258, 67.77109828393137], "label": "Playing squash"}, {"segment": [72.12470202808113, 156.58461466458658], "label": "Playing squash"}]}, "Ha5KU-pp4gk": {"duration": 80.458, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=Ha5KU-pp4gk", "annotations": [{"segment": [4.769691560451534, 80.25716553287981], "label": "Trimming branches or hedges"}]}, "cbd1LfrWYms": {"duration": 157.827, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cbd1LfrWYms", "annotations": []}, "pmbHoeudjKc": {"duration": 15.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pmbHoeudjKc", "annotations": [{"segment": [2.79086708, 14.07556572], "label": "Long jump"}]}, "agu58IVL7IQ": {"duration": 85.36, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=agu58IVL7IQ", "annotations": []}, "5c8HvpeRWrc": {"duration": 40.73, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5c8HvpeRWrc", "annotations": [{"segment": [0, 37.99566989079563], "label": "Ballet"}]}, "Cp4a_gN4pDQ": {"duration": 65.179, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cp4a_gN4pDQ", "annotations": [{"segment": [0, 64.9779365079365], "label": "Playing congas"}]}, "FZtIVsmpYcI": {"duration": 192.819, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FZtIVsmpYcI", "annotations": [{"segment": [30.38169866386492, 185.89989740405616], "label": "Cutting the grass"}]}, "QPEAp7nf2Tk": {"duration": 82.849, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QPEAp7nf2Tk", "annotations": []}, "yYdqlon7sR4": {"duration": 179.351, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yYdqlon7sR4", "annotations": []}, "rFXOInUQjKo": {"duration": 73.53999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rFXOInUQjKo", "annotations": [{"segment": [0.01, 25.525920261310453], "label": "Cleaning windows"}]}, "O-6zE9_S2KM": {"duration": 16.603, "subset": "training", "resolution": "632x480", "url": "https://www.youtube.com/watch?v=O-6zE9_S2KM", "annotations": [{"segment": [1.5799348299531981, 11.991964365054601], "label": "Playing blackjack"}]}, "LITdMW0xh7o": {"duration": 107.86, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LITdMW0xh7o", "annotations": [{"segment": [24.70745859, 50.14297531], "label": "Chopping wood"}, {"segment": [57.510101410000004, 89.67555813999999], "label": "Chopping wood"}]}, "NGiDXRIx1gk": {"duration": 175.45, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NGiDXRIx1gk", "annotations": [{"segment": [0.2, 152.80214712729605], "label": "Doing fencing"}]}, "0qQvcJJekN8": {"duration": 25.101, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0qQvcJJekN8", "annotations": [{"segment": [0.2, 24.861], "label": "Swinging at the playground"}]}, "qy-LbstiMYg": {"duration": 192.77, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qy-LbstiMYg", "annotations": [{"segment": [10.826512146200134, 103.15260103900157], "label": "Doing motocross"}, {"segment": [125.1063616099844, 147.06012218096726], "label": "Doing motocross"}]}, "MEU83dUao4g": {"duration": 63.25, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MEU83dUao4g", "annotations": [{"segment": [21.70866518876755, 54.27166297191888], "label": "Getting a piercing"}]}, "fJMVz_TBWU0": {"duration": 76.78999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fJMVz_TBWU0", "annotations": [{"segment": [25.484527200000002, 61.78774800000001], "label": "Playing bagpipes"}, {"segment": [10.2728208, 21.2423376], "label": "Playing bagpipes"}]}, "UKo5IFacUyE": {"duration": 38.57, "subset": "validation", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=UKo5IFacUyE", "annotations": [{"segment": [0.6969237599999999, 15.53198127925117], "label": "Polishing shoes"}, {"segment": [15.69298127925117, 16.655003120124803], "label": "Polishing shoes"}]}, "xyZ87TEcDSI": {"duration": 63.182, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xyZ87TEcDSI", "annotations": [{"segment": [0.01, 59.63308030421217], "label": "Surfing"}]}, "8rqI8xwXVac": {"duration": 106.812, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8rqI8xwXVac", "annotations": []}, "uC7y97DXEH8": {"duration": 23.36, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=uC7y97DXEH8", "annotations": []}, "eufCY8KHuMA": {"duration": 145.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eufCY8KHuMA", "annotations": [{"segment": [0.22712757489891433, 8.630847846158744], "label": "Horseback riding"}, {"segment": [15.217547518227258, 22.2585023400936], "label": "Horseback riding"}, {"segment": [29.072329587061034, 34.75051895953389], "label": "Horseback riding"}, {"segment": [42.47285650609698, 137.1850551351014], "label": "Horseback riding"}]}, "yToUeIIlkOg": {"duration": 190.474, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yToUeIIlkOg", "annotations": [{"segment": [118.26578567360382, 173.53572571202167], "label": "Making an omelette"}]}, "J4rzLO4u_pI": {"duration": 147.796, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=J4rzLO4u_pI", "annotations": [{"segment": [68.22361310452419, 90.81116068642747], "label": "Trimming branches or hedges"}, {"segment": [109.26476443057722, 135.543631825273], "label": "Trimming branches or hedges"}]}, "frbNKAZALzI": {"duration": 201.9, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=frbNKAZALzI", "annotations": [{"segment": [22.993007665884864, 115.59498374492803], "label": "Kayaking"}, {"segment": [122.83935602322052, 130.71367328393134], "label": "Kayaking"}, {"segment": [154.33662628705147, 160.9510531279251], "label": "Kayaking"}, {"segment": [163.78580748829953, 187.72373319812792], "label": "Kayaking"}]}, "7kQ3JRkEQr0": {"duration": 66.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7kQ3JRkEQr0", "annotations": [{"segment": [1.6574352574102964, 62.77536037441497], "label": "Doing karate"}]}, "VDj-dpVbgaI": {"duration": 174.29, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VDj-dpVbgaI", "annotations": []}, "5Vd6PEUWo64": {"duration": 168.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5Vd6PEUWo64", "annotations": [{"segment": [92.65936532699402, 121.49254831417747], "label": "Surfing"}]}, "IC61TQ-sd8o": {"duration": 159.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IC61TQ-sd8o", "annotations": [{"segment": [0.7462596645865835, 156.21702312012482], "label": "Cheerleading"}]}, "bjKd--KFl0E": {"duration": 230.923, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bjKd--KFl0E", "annotations": [{"segment": [0, 230.881], "label": "Surfing"}]}, "sIYRsGZm2XY": {"duration": 107.81, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sIYRsGZm2XY", "annotations": [{"segment": [2.8579360374414975, 9.414377535101403], "label": "Smoking hookah"}, {"segment": [26.56199375975039, 29.58804368174727], "label": "Smoking hookah"}, {"segment": [44.88640717628705, 53.12398751950078], "label": "Smoking hookah"}, {"segment": [64.05139001560062, 65.73252886115445], "label": "Smoking hookah"}, {"segment": [87.75544773790952, 96.16114196567862], "label": "Smoking hookah"}]}, "zN9COeDCm9Q": {"duration": 209.352, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zN9COeDCm9Q", "annotations": [{"segment": [0, 205.38272854914194], "label": "Snowboarding"}]}, "U20a95sHi8E": {"duration": 86.123, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U20a95sHi8E", "annotations": [{"segment": [13.435696067298474, 73.22454204368175], "label": "Hand car wash"}]}, "Ux9b6Yq6kTc": {"duration": 162.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ux9b6Yq6kTc", "annotations": []}, "DhgdEfKAvO0": {"duration": 156.689, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DhgdEfKAvO0", "annotations": [{"segment": [83.09594806092474, 106.73321187884174], "label": "Hitting a pinata"}]}, "JPZoLIUBIoc": {"duration": 5.39, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=JPZoLIUBIoc", "annotations": []}, "JQcN61A1MEU": {"duration": 65.434, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JQcN61A1MEU", "annotations": [{"segment": [4.396176271713518, 53.64963612863419], "label": "Futsal"}]}, "zcl7tr-pLFU": {"duration": 118.53999999999999, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zcl7tr-pLFU", "annotations": []}, "7-uMJ_5WsZM": {"duration": 133.701, "subset": "training", "resolution": "627x360", "url": "https://www.youtube.com/watch?v=7-uMJ_5WsZM", "annotations": [{"segment": [3.3363120124804992, 84.46303432137286], "label": "Cutting the grass"}]}, "hhQ1Xbytds4": {"duration": 60.233, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hhQ1Xbytds4", "annotations": [{"segment": [0.2, 60.001999999999995], "label": "Roof shingle removal"}]}, "TFwELfVs19g": {"duration": 73.53999999999999, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TFwELfVs19g", "annotations": [{"segment": [11.142059927930802, 56.8453315868093], "label": "Getting a piercing"}]}, "6DzBNkTen1g": {"duration": 80.76, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6DzBNkTen1g", "annotations": [{"segment": [20.630025501373964, 36.47992257009346], "label": "Hitting a pinata"}]}, "3oS_28utt2Y": {"duration": 89.257, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3oS_28utt2Y", "annotations": [{"segment": [0.20000000000000004, 64.4085418171872], "label": "Surfing"}]}, "cqxrPBGP4nU": {"duration": 62.253, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cqxrPBGP4nU", "annotations": [{"segment": [8.934864611544462, 47.68498395943838], "label": "Cleaning sink"}]}, "es0HLQRbv1E": {"duration": 141.158, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=es0HLQRbv1E", "annotations": []}, "Q6SooTA1os4": {"duration": 32.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Q6SooTA1os4", "annotations": [{"segment": [6.367094710999325, 28.676993501508765], "label": "Bathing dog"}]}, "s4pnHlWlt5k": {"duration": 53.453, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s4pnHlWlt5k", "annotations": [{"segment": [3.7525039469578783, 51.45099856162247], "label": "Hitting a pinata"}]}, "X_p-lKYPPhM": {"duration": 84.1, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=X_p-lKYPPhM", "annotations": []}, "q8lLTxHen0I": {"duration": 75.419, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=q8lLTxHen0I", "annotations": [{"segment": [14.236548722308893, 26.23760632293292], "label": "Beach soccer"}, {"segment": [30.473273711388458, 44.82747986115445], "label": "Beach soccer"}]}, "GgiaxJ1JeSM": {"duration": 99.986, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GgiaxJ1JeSM", "annotations": [{"segment": [15.394139246636227, 95.78576770309915], "label": "Riding bumper cars"}]}, "TwJvPxAq9_o": {"duration": 149.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TwJvPxAq9_o", "annotations": [{"segment": [1.5140062473247229, 6.405411046373827], "label": "Smoking a cigarette"}]}, "VXiaPftjWqQ": {"duration": 220.195, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VXiaPftjWqQ", "annotations": [{"segment": [12.141584497181688, 194.2653441046583], "label": "Laying tile"}]}, "_l251ZuOCYU": {"duration": 128.477, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_l251ZuOCYU", "annotations": []}, "c9A384zijnk": {"duration": 175.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c9A384zijnk", "annotations": [{"segment": [87.57638090483618, 106.18636184711387], "label": "Preparing pasta"}, {"segment": [107.82841898907955, 121.2385523151326], "label": "Preparing pasta"}, {"segment": [135.19603802184088, 139.84853325741028], "label": "Preparing pasta"}, {"segment": [143.67999992199688, 149.15352372854915], "label": "Preparing pasta"}, {"segment": [163.11100943525742, 165.3004189578783], "label": "Preparing pasta"}]}, "zhH7wxXrGSY": {"duration": 176.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zhH7wxXrGSY", "annotations": [{"segment": [0.01, 168.9323775351014], "label": "Painting"}]}, "9pNfaRJ0K4o": {"duration": 149.47, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9pNfaRJ0K4o", "annotations": [{"segment": [54.79673837293628, 132.67805919344775], "label": "Ballet"}]}, "S4GiCywMi38": {"duration": 199.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S4GiCywMi38", "annotations": [{"segment": [40.02424180967238, 188.59671918876754], "label": "Putting on makeup"}]}, "06dIgOEgYp0": {"duration": 237.703, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=06dIgOEgYp0", "annotations": [{"segment": [52.27800468018721, 99.36528549141966], "label": "Snowboarding"}, {"segment": [106.40983931357255, 137.18341653666147], "label": "Snowboarding"}, {"segment": [147.19409828393137, 179.4507394695788], "label": "Snowboarding"}, {"segment": [196.50597503900156, 199.47210296411856], "label": "Snowboarding"}, {"segment": [210.2243166926677, 218.01040249609983], "label": "Snowboarding"}]}, "jherly5DNjg": {"duration": 35.899, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jherly5DNjg", "annotations": [{"segment": [6.318304212168487, 35.861], "label": "Calf roping"}]}, "n1z0OF9tt4M": {"duration": 225.559, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=n1z0OF9tt4M", "annotations": []}, "AEZroPZf0_4": {"duration": 33.344, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=AEZroPZf0_4", "annotations": []}, "otMw5GJWYWs": {"duration": 51.851, "subset": "testing", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=otMw5GJWYWs", "annotations": []}, "B6kbmiYicYg": {"duration": 76.7, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=B6kbmiYicYg", "annotations": []}, "VoQ2VhCqir0": {"duration": 150.884, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=VoQ2VhCqir0", "annotations": [{"segment": [29.1880263618708, 41.663553758476866], "label": "Fixing the roof"}, {"segment": [48.96056034894457, 62.377636983030335], "label": "Fixing the roof"}]}, "V4wwal5FQZE": {"duration": 155.34, "subset": "validation", "resolution": "344x194", "url": "https://www.youtube.com/watch?v=V4wwal5FQZE", "annotations": [{"segment": [9.451354707249514, 14.782888131851804], "label": "Vacuuming floor"}, {"segment": [26.172982266229425, 37.32073397221603], "label": "Vacuuming floor"}, {"segment": [65.43245566557356, 70.0369618050028], "label": "Vacuuming floor"}, {"segment": [83.36579536650854, 94.513546099844], "label": "Vacuuming floor"}, {"segment": [103.9649007098284, 107.3576946723869], "label": "Vacuuming floor"}, {"segment": [134.2577039469579, 141.770319149766], "label": "Vacuuming floor"}]}, "bdDzTqaiB3E": {"duration": 67.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bdDzTqaiB3E", "annotations": [{"segment": [0, 7.005315132605304], "label": "Hand washing clothes"}]}, "Tm0jUAQf6zY": {"duration": 131.054, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Tm0jUAQf6zY", "annotations": [{"segment": [0.01, 131.001], "label": "Baton twirling"}]}, "a_EkYuPdaJ0": {"duration": 232.827, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=a_EkYuPdaJ0", "annotations": [{"segment": [154.8549987637838, 168.63600476635514], "label": "Painting fence"}]}, "OFe8toY6Ch4": {"duration": 190.225, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OFe8toY6Ch4", "annotations": [{"segment": [10.683238689547583, 25.82054134165367], "label": "Curling"}, {"segment": [35.911327613104525, 61.7318689547582], "label": "Curling"}, {"segment": [68.85477691107646, 97.0496209048362], "label": "Curling"}, {"segment": [99.72071138845556, 136.2256146645866], "label": "Curling"}, {"segment": [138.59991731669268, 170.0594274570983], "label": "Curling"}]}, "N1_pNhVqG20": {"duration": 200.67000000000002, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=N1_pNhVqG20", "annotations": []}, "1VwNfMlb4JU": {"duration": 29.35, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1VwNfMlb4JU", "annotations": [{"segment": [0.01, 24.74674102964119], "label": "Getting a haircut"}]}, "9ctFBfPUcI8": {"duration": 74.861, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9ctFBfPUcI8", "annotations": []}, "_a527xEzhkw": {"duration": 161.32999999999998, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_a527xEzhkw", "annotations": []}, "GNubfPDmTnE": {"duration": 96.108, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GNubfPDmTnE", "annotations": []}, "eEfvYiuGULM": {"duration": 163.54, "subset": "training", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=eEfvYiuGULM", "annotations": [{"segment": [5.863062402496101, 161.87150546021843], "label": "Javelin throw"}]}, "JSqJmZPqDy8": {"duration": 34.78, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JSqJmZPqDy8", "annotations": [{"segment": [0.8939087363494539, 16.82173712948518], "label": "Mowing the lawn"}, {"segment": [27.44028939157566, 30.961748049921994], "label": "Mowing the lawn"}]}, "_5UtmlGMIUc": {"duration": 182.65, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_5UtmlGMIUc", "annotations": [{"segment": [28.209310707122164, 133.06816250811232], "label": "Shaving legs"}]}, "GlcDmblxOcQ": {"duration": 69.636, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GlcDmblxOcQ", "annotations": []}, "C8m_dPhMGbg": {"duration": 128.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C8m_dPhMGbg", "annotations": [{"segment": [11.196502991643179, 116.96346757009346], "label": "Snowboarding"}]}, "_s20ZN8WZbo": {"duration": 67.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_s20ZN8WZbo", "annotations": [{"segment": [6.682194454758191, 58.66695], "label": "Mixing drinks"}]}, "reit7blyxIo": {"duration": 72.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=reit7blyxIo", "annotations": [{"segment": [10.55581373478939, 57.72008786895476], "label": "Shaving"}]}, "furUOKw0Qzs": {"duration": 137.602, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=furUOKw0Qzs", "annotations": [{"segment": [10.37215048276004, 129.95189814631144], "label": "Changing car wheel"}]}, "aYC7F72n924": {"duration": 84.034, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=aYC7F72n924", "annotations": [{"segment": [21.641893285105358, 84.033038], "label": "Mooping floor"}]}, "MAZlsi4Bon8": {"duration": 10.612, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MAZlsi4Bon8", "annotations": [{"segment": [0.01, 10.5949646421231], "label": "Hitting a pinata"}]}, "CsyHOM8ngZA": {"duration": 238.377, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CsyHOM8ngZA", "annotations": [{"segment": [25.841964898595943, 40.715038221528864], "label": "Grooming dog"}, {"segment": [52.24167004680187, 66.74291653666147], "label": "Grooming dog"}, {"segment": [75.66676053042121, 118.42684633385336], "label": "Grooming dog"}, {"segment": [139.24914898595944, 158.95597113884554], "label": "Grooming dog"}]}, "wFmczV8lv-I": {"duration": 154.07, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wFmczV8lv-I", "annotations": [{"segment": [63.09999248, 76.83017616], "label": "Using parallel bars"}, {"segment": [18.578577759999998, 51.007509119999995], "label": "Using parallel bars"}]}, "crfrKqFp0Zg": {"duration": 129.43, "subset": "training", "resolution": "320x214", "url": "https://www.youtube.com/watch?v=crfrKqFp0Zg", "annotations": [{"segment": [4.541419656786271, 128.26987519500778], "label": "Playing flauta"}]}, "MQ9Ojge7CyA": {"duration": 63.0, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MQ9Ojge7CyA", "annotations": []}, "_B3Q8bTJWG4": {"duration": 56.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_B3Q8bTJWG4", "annotations": [{"segment": [9.539887675507021, 21.02308580343214], "label": "Calf roping"}]}, "D9rHZpZoeRA": {"duration": 202.502, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D9rHZpZoeRA", "annotations": [{"segment": [95.38431348410398, 175.60820491107643], "label": "Changing car wheel"}]}, "ikZwamRM5mo": {"duration": 193.074, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ikZwamRM5mo", "annotations": [{"segment": [0.6013115264797507, 190.01444236760122], "label": "Drum corps"}]}, "W9goHnSz8H8": {"duration": 143.709, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W9goHnSz8H8", "annotations": []}, "qgQVbGtIn0M": {"duration": 168.995, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qgQVbGtIn0M", "annotations": [{"segment": [78.0379327176287, 83.31076600936036], "label": "Slacklining"}, {"segment": [105.98394916380654, 138.4118739079563], "label": "Slacklining"}]}, "PcvK0XvNHgQ": {"duration": 45.3, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PcvK0XvNHgQ", "annotations": []}, "hiifjzLG8Io": {"duration": 19.6, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hiifjzLG8Io", "annotations": [{"segment": [7.9491213978159125, 19.291906161622464], "label": "Grooming horse"}]}, "y7WO-8cQfO0": {"duration": 191.728, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y7WO-8cQfO0", "annotations": [{"segment": [8.671090483619345, 155.48162246489858], "label": "Rope skipping"}, {"segment": [164.16984243369734, 184.20514196567862], "label": "Rope skipping"}]}, "y9xNMLhgTjY": {"duration": 166.023, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y9xNMLhgTjY", "annotations": [{"segment": [6.276983246731974, 9.380210830600413], "label": "Longboarding"}, {"segment": [15.328063699681586, 26.447962541876823], "label": "Longboarding"}, {"segment": [56.44582918593839, 74.80659239049332], "label": "Longboarding"}, {"segment": [138.681360158452, 150.57706432124786], "label": "Longboarding"}]}, "4qstg-tiRvQ": {"duration": 23.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4qstg-tiRvQ", "annotations": [{"segment": [0.01, 23.661132], "label": "Cheerleading"}]}, "wyNM_7YDgfo": {"duration": 139.06, "subset": "validation", "resolution": "352x262", "url": "https://www.youtube.com/watch?v=wyNM_7YDgfo", "annotations": [{"segment": [67.85806416043219, 127.85908593613311], "label": "Paintball"}]}, "-IMXSEIabMM": {"duration": 126.39, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-IMXSEIabMM", "annotations": []}, "q1yuDuO01tg": {"duration": 95.39, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q1yuDuO01tg", "annotations": [{"segment": [43.89897127857903, 73.21455453978159], "label": "Bungee jumping"}]}, "JJzBlV3p1Wc": {"duration": 131.89, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JJzBlV3p1Wc", "annotations": [{"segment": [0.2, 29.628807029832217], "label": "Hand washing clothes"}, {"segment": [82.50799735390777, 131.68934240362813], "label": "Hand washing clothes"}]}, "JfifgnVgJEU": {"duration": 35.597, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=JfifgnVgJEU", "annotations": [{"segment": [15.200801872074884, 19.805424336973477], "label": "Having an ice cream"}]}, "ETbbYC0OhL0": {"duration": 27.121, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ETbbYC0OhL0", "annotations": []}, "f38Jt5D0z4A": {"duration": 20.76, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=f38Jt5D0z4A", "annotations": [{"segment": [0.7772344960998441, 20.208096898595947], "label": "Grooming horse"}]}, "2SBTnunPQrQ": {"duration": 229.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2SBTnunPQrQ", "annotations": [{"segment": [15.422319540400663, 212.68454621994402], "label": "Polishing shoes"}]}, "SvYeqLg4dQU": {"duration": 113.825, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SvYeqLg4dQU", "annotations": [{"segment": [11.506333456983105, 28.323286303440508], "label": "Playing pool"}, {"segment": [41.59982802432793, 50.45085583825288], "label": "Playing pool"}, {"segment": [71.69332259167275, 86.20900692874075], "label": "Playing pool"}, {"segment": [104.26510340152457, 109.92976111847636], "label": "Playing pool"}]}, "kYfewEkkBf4": {"duration": 91.65, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kYfewEkkBf4", "annotations": []}, "OVf4tUiUqA0": {"duration": 111.712, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OVf4tUiUqA0", "annotations": [{"segment": [0.01, 98.94453666146646], "label": "Hand car wash"}]}, "ygkcjkVqxAA": {"duration": 45.047, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ygkcjkVqxAA", "annotations": [{"segment": [6.822549000318541, 39.289685171479675], "label": "Disc dog"}]}, "eCh_SqpkjtA": {"duration": 84.45, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eCh_SqpkjtA", "annotations": [{"segment": [53.7684705, 74.287287], "label": "Cheerleading"}, {"segment": [4.545943500000001, 41.280848999999996], "label": "Cheerleading"}]}, "EXPYu9bfNcE": {"duration": 234.127, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EXPYu9bfNcE", "annotations": []}, "Y75Fvr71aRQ": {"duration": 144.71, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Y75Fvr71aRQ", "annotations": [{"segment": [76.84357254290171, 120.17386895475819], "label": "Playing saxophone"}]}, "g16MLNJnyF4": {"duration": 102.17, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g16MLNJnyF4", "annotations": []}, "m3SlyuAh1Wk": {"duration": 184.738, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m3SlyuAh1Wk", "annotations": [{"segment": [48.89953357587203, 76.76824584918272], "label": "Raking leaves"}, {"segment": [152.3298060622465, 160.3743828149681], "label": "Raking leaves"}]}, "3j52keiQuiw": {"duration": 99.71000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3j52keiQuiw", "annotations": [{"segment": [0, 99.70587301587301], "label": "Using the balance beam"}]}, "fpIcr1RaEDc": {"duration": 88.56, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=fpIcr1RaEDc", "annotations": [{"segment": [13.954132268330733, 75.29705035881435], "label": "Using parallel bars"}]}, "I5g6I-FOguQ": {"duration": 73.259, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I5g6I-FOguQ", "annotations": [{"segment": [5.600122931357254, 73.25875283446712], "label": "Surfing"}]}, "1AiQt87brik": {"duration": 75.303, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1AiQt87brik", "annotations": [{"segment": [0.01, 57.00525199294758], "label": "Playing congas"}, {"segment": [59.818747320050385, 68.37646227332142], "label": "Playing congas"}]}, "0iqx7o5WYW8": {"duration": 182.72, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0iqx7o5WYW8", "annotations": []}, "kG7inHibWLU": {"duration": 171.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kG7inHibWLU", "annotations": [{"segment": [70.43610608424336, 92.04718408736349], "label": "Making a sandwich"}]}, "OCy35F-1WgQ": {"duration": 16.184, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=OCy35F-1WgQ", "annotations": []}, "mb1LoOTM_NI": {"duration": 60.233, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=mb1LoOTM_NI", "annotations": [{"segment": [0.01, 56.63214196567863], "label": "Arm wrestling"}]}, "Wi9bm9omdoM": {"duration": 146.657, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=Wi9bm9omdoM", "annotations": [{"segment": [0.01, 145.14954117847535], "label": "Rock-paper-scissors"}]}, "2mOQx0l1158": {"duration": 182.393, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2mOQx0l1158", "annotations": [{"segment": [30.437577223088926, 172.1191965678627], "label": "Assembling bicycle"}]}, "RSyk6rS8ay0": {"duration": 211.47, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RSyk6rS8ay0", "annotations": [{"segment": [78.97521840873634, 173.94333073322935], "label": "Clean and jerk"}, {"segment": [23.88062952, 48.967932917316695], "label": "Clean and jerk"}]}, "qtOP38458F4": {"duration": 54.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qtOP38458F4", "annotations": [{"segment": [6.572101560062403, 47.44633319812793], "label": "Using parallel bars"}]}, "nw32dno_RcM": {"duration": 34.97, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nw32dno_RcM", "annotations": [{"segment": [1.6888793229329175, 33.94102639313573], "label": "Spinning"}]}, "tT7ljH8GCl8": {"duration": 80.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tT7ljH8GCl8", "annotations": [{"segment": [0.2, 79.90884199999999], "label": "Mowing the lawn"}]}, "gM7x-gjE8dg": {"duration": 121.534, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gM7x-gjE8dg", "annotations": []}, "38nAdby_fTw": {"duration": 148.0, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=38nAdby_fTw", "annotations": [{"segment": [14.546414085134835, 132.99578418720748], "label": "Doing motocross"}]}, "b7B0NRizzYo": {"duration": 26.216, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b7B0NRizzYo", "annotations": [{"segment": [0.3476248049921997, 24.190596723868957], "label": "Raking leaves"}]}, "hPhmxNbqqcs": {"duration": 17.067, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hPhmxNbqqcs", "annotations": []}, "HF_a81qOmVQ": {"duration": 65.835, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HF_a81qOmVQ", "annotations": [{"segment": [4.724484611544462, 28.552320043681746], "label": "Hurling"}]}, "f3spxjpfuxM": {"duration": 183.485, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=f3spxjpfuxM", "annotations": [{"segment": [9.4461383679837, 183.48408], "label": "Installing carpet"}]}, "4usf67inE3w": {"duration": 159.289, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4usf67inE3w", "annotations": [{"segment": [0.01, 155.66642982376254], "label": "Playing ten pins"}]}, "h3jponlpQNs": {"duration": 21.874, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h3jponlpQNs", "annotations": []}, "V8gr2EMHNwQ": {"duration": 120.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V8gr2EMHNwQ", "annotations": [{"segment": [7.500842433697348, 15.75176911076443], "label": "Sailing"}, {"segment": [42.379759750390015, 65.26818720748831], "label": "Sailing"}, {"segment": [101.27822152886115, 118.1579251170047], "label": "Sailing"}]}, "d-uGSELb0N0": {"duration": 67.66, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=d-uGSELb0N0", "annotations": [{"segment": [37.9516158, 61.41409092000001], "label": "Volleyball"}]}, "hwY9jvSjC9I": {"duration": 92.857, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hwY9jvSjC9I", "annotations": [{"segment": [0.01, 85.88339336688793], "label": "Putting on shoes"}]}, "s_QH-5G33Fw": {"duration": 158.895, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=s_QH-5G33Fw", "annotations": [{"segment": [44.60433697347894, 158.861], "label": "Playing beach volleyball"}]}, "LSaUJwsU4GQ": {"duration": 122.83, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LSaUJwsU4GQ", "annotations": [{"segment": [8.814875849455747, 117.08454660907525], "label": "Breakdancing"}]}, "9cJi1iD7Iyo": {"duration": 180.65, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9cJi1iD7Iyo", "annotations": [{"segment": [16.345980734467474, 179.24213357105714], "label": "Tai chi"}]}, "ozkDsQ1UY1k": {"duration": 224.839, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ozkDsQ1UY1k", "annotations": [{"segment": [0, 202.74077594695788], "label": "Playing pool"}]}, "LPRLV0eVgfY": {"duration": 146.913, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LPRLV0eVgfY", "annotations": []}, "JwdW7Tlc4gs": {"duration": 179.61, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JwdW7Tlc4gs", "annotations": [{"segment": [5.87925717, 135.20315571999998], "label": "Playing guitarra"}]}, "DG3VlOODYaI": {"duration": 179.63, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DG3VlOODYaI", "annotations": []}, "g1na24Bd5Rs": {"duration": 180.84, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g1na24Bd5Rs", "annotations": []}, "5JG8Dc2wsdc": {"duration": 111.64, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5JG8Dc2wsdc", "annotations": [{"segment": [18.290148205928237, 91.27654914196567], "label": "Doing nails"}]}, "pElJ7U46XnQ": {"duration": 43.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pElJ7U46XnQ", "annotations": [{"segment": [11.218874344773791, 25.70142122620905], "label": "Clean and jerk"}]}, "O36pn7cHNVc": {"duration": 227.25, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=O36pn7cHNVc", "annotations": [{"segment": [46.96549648205928, 206.46616808999997], "label": "Tai chi"}]}, "kF6tSW-ZOFg": {"duration": 15.42, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kF6tSW-ZOFg", "annotations": [{"segment": [1.2267089672386895, 13.421639288611544], "label": "Hopscotch"}]}, "LqCg09IRp-o": {"duration": 45.953, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LqCg09IRp-o", "annotations": [{"segment": [15.722267432083369, 35.44656794219223], "label": "Rollerblading"}]}, "PQunZdghRls": {"duration": 62.462, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PQunZdghRls", "annotations": []}, "ABQYqpWF1LA": {"duration": 132.447, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ABQYqpWF1LA", "annotations": [{"segment": [34.2879344773791, 132.401], "label": "Applying sunscreen"}]}, "0nPeqy-DA2E": {"duration": 202.9, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=0nPeqy-DA2E", "annotations": [{"segment": [0.01, 202.89596371882087], "label": "Removing curlers"}]}, "63d_t0U1pXw": {"duration": 210.327, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=63d_t0U1pXw", "annotations": [{"segment": [22.968556075576355, 188.6702820493772], "label": "Powerbocking"}]}, "pF-r_m8LVPs": {"duration": 216.016, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=pF-r_m8LVPs", "annotations": [{"segment": [0.01, 199.115368174727], "label": "Snow tubing"}]}, "LryRyt_SHhw": {"duration": 25.59, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LryRyt_SHhw", "annotations": []}, "lo8CcE7B9cI": {"duration": 112.408, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lo8CcE7B9cI", "annotations": []}, "MoSuxL57xRY": {"duration": 225.745, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MoSuxL57xRY", "annotations": [{"segment": [28.878378527301088, 172.2137451201248], "label": "Snowboarding"}]}, "uowNtTu74nA": {"duration": 226.024, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uowNtTu74nA", "annotations": [{"segment": [13.399181692437764, 200.63511512121687], "label": "Surfing"}]}, "C3BBG_9Vo48": {"duration": 210.141, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C3BBG_9Vo48", "annotations": [{"segment": [7.660365920081689, 184.76539551509725], "label": "Making a cake"}]}, "Ku76zinwWkg": {"duration": 53.92, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ku76zinwWkg", "annotations": []}, "NdcZrdvh24E": {"duration": 56.797, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NdcZrdvh24E", "annotations": []}, "LMMimz1-fa0": {"duration": 113.871, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LMMimz1-fa0", "annotations": [{"segment": [30.393064756553862, 104.15919559767535], "label": "Wakeboarding"}]}, "1IhbkbuDPpc": {"duration": 103.027, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1IhbkbuDPpc", "annotations": [{"segment": [0.01, 90.44977067082684], "label": "Rollerblading"}]}, "mUyMYnGXKgk": {"duration": 55.75, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mUyMYnGXKgk", "annotations": [{"segment": [20.699973884555384, 22.87434088923557], "label": "Layup drill in basketball"}, {"segment": [25.483581294851795, 26.7012268174727], "label": "Layup drill in basketball"}, {"segment": [28.527695101404056, 30.180214024961], "label": "Layup drill in basketball"}, {"segment": [38.355833962558506, 40.18230224648986], "label": "Layup drill in basketball"}, {"segment": [44.18313753510141, 45.052884336973484], "label": "Layup drill in basketball"}]}, "H5Sgn5ndjp0": {"duration": 180.37, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H5Sgn5ndjp0", "annotations": []}, "THOLslLjRqs": {"duration": 150.698, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=THOLslLjRqs", "annotations": [{"segment": [0, 138.43889079563183], "label": "Painting furniture"}]}, "CGz-ghbUXvk": {"duration": 79.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CGz-ghbUXvk", "annotations": [{"segment": [0.37014284087363497, 76.37280616692668], "label": "Playing bagpipes"}]}, "-y5p8UMdM20": {"duration": 141.11, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-y5p8UMdM20", "annotations": [{"segment": [15.40229329173167, 17.38258814352574], "label": "Springboard diving"}, {"segment": [27.287931357254294, 30.368826833073324], "label": "Springboard diving"}, {"segment": [32.78953042121685, 34.109914196567864], "label": "Springboard diving"}, {"segment": [40.49176911076444, 47.09368798751951], "label": "Springboard diving"}, {"segment": [52.815351014040566, 55.23605460218409], "label": "Springboard diving"}, {"segment": [62.71822932917317, 65.35899687987519], "label": "Springboard diving"}, {"segment": [71.96091575663027, 74.16155538221528], "label": "Springboard diving"}, {"segment": [87.58545709828394, 89.12590483619344], "label": "Springboard diving"}, {"segment": [100.34916692667707, 105.63070202808113], "label": "Springboard diving"}, {"segment": [121.69537129485181, 124.33613884555382], "label": "Springboard diving"}]}, "R4kvr-667Ag": {"duration": 86.68, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R4kvr-667Ag", "annotations": []}, "Y5hT9Sk5EY4": {"duration": 121.302, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y5hT9Sk5EY4", "annotations": [{"segment": [9.651097031636366, 27.628630718017835], "label": "Grooming dog"}, {"segment": [43.33531804401428, 62.82674874258971], "label": "Grooming dog"}, {"segment": [74.18098044305772, 83.83207738845553], "label": "Grooming dog"}]}, "cgPt46YiXNo": {"duration": 15.395, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cgPt46YiXNo", "annotations": [{"segment": [3.770652573041697, 14.9865408299532], "label": "Powerbocking"}]}, "knZfggvbS50": {"duration": 95.527, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=knZfggvbS50", "annotations": []}, "XBO6AIdaCzU": {"duration": 107.16, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XBO6AIdaCzU", "annotations": [{"segment": [0, 3.0091756661466458], "label": "Playing harmonica"}, {"segment": [8.191644868954757, 96.62797416848673], "label": "Playing harmonica"}, {"segment": [106.6585597223089, 106.960089], "label": "Playing harmonica"}]}, "1gM0xfKN-Kc": {"duration": 220.985, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1gM0xfKN-Kc", "annotations": [{"segment": [0.2, 189.02234252342072], "label": "Using the monkey bar"}]}, "_hdjRMpleWs": {"duration": 58.677, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_hdjRMpleWs", "annotations": [{"segment": [0.2, 58.47625850340136], "label": "Calf roping"}]}, "VIYMKDJQ80g": {"duration": 206.518, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VIYMKDJQ80g", "annotations": []}, "xMbfjxCCCFo": {"duration": 48.55, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xMbfjxCCCFo", "annotations": []}, "3kEAg-JtDBY": {"duration": 29.17, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3kEAg-JtDBY", "annotations": [{"segment": [0.3639845621035726, 26.02489619040544], "label": "Doing karate"}]}, "7lUaR1veDJU": {"duration": 13.52, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=7lUaR1veDJU", "annotations": [{"segment": [0.021016805276114315, 13.514012], "label": "Playing water polo"}]}, "iDMzTPfELoc": {"duration": 222.84, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iDMzTPfELoc", "annotations": [{"segment": [4.86706184399376, 189.12011736661466], "label": "Hopscotch"}]}, "qJfhu80Ldgo": {"duration": 15.12, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=qJfhu80Ldgo", "annotations": [{"segment": [0.9099831999999999, 11.70930708], "label": "Clean and jerk"}]}, "NIJTz15ikgA": {"duration": 136.46, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NIJTz15ikgA", "annotations": [{"segment": [16.160492979719187, 115.67510764430577], "label": "Putting on makeup"}]}, "IjdTjpaPPNg": {"duration": 150.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IjdTjpaPPNg", "annotations": [{"segment": [18.049511923334077, 58.60231143939635], "label": "Horseback riding"}, {"segment": [63.52490560030565, 89.30992172386895], "label": "Horseback riding"}, {"segment": [98.45188221528862, 126.81540066302652], "label": "Horseback riding"}]}, "05BGDQvQ2YM": {"duration": 173.755, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=05BGDQvQ2YM", "annotations": [{"segment": [20.58532917316693, 165.76607176287052], "label": "Riding bumper cars"}]}, "eT9oCTQUMhA": {"duration": 178.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eT9oCTQUMhA", "annotations": [{"segment": [29.856967252839777, 167.4222462776062], "label": "Plataform diving"}]}, "98iRGAxo05k": {"duration": 64.97, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=98iRGAxo05k", "annotations": []}, "aHzNMe-VKfE": {"duration": 18.99, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aHzNMe-VKfE", "annotations": [{"segment": [2.326088817394696, 12.037763399999998], "label": "Playing field hockey"}]}, "Trzd5ijRN1A": {"duration": 162.819, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Trzd5ijRN1A", "annotations": [{"segment": [0, 153.13095631825274], "label": "Doing fencing"}]}, "fM4pJeHb8hQ": {"duration": 185.412, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fM4pJeHb8hQ", "annotations": [{"segment": [41.36313989974565, 169.79135049755732], "label": "Braiding hair"}]}, "nGsRDNIsc2o": {"duration": 134.28, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=nGsRDNIsc2o", "annotations": [{"segment": [11.731257212193249, 24.928921575910657], "label": "Spinning"}, {"segment": [102.4390137697348, 120.66435977784712], "label": "Spinning"}]}, "_ye90Ou8SnE": {"duration": 90.465, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_ye90Ou8SnE", "annotations": [{"segment": [10.302557078003119, 44.45623944617785], "label": "Installing carpet"}, {"segment": [60.12177144149766, 82.70271846177847], "label": "Installing carpet"}]}, "suyh4tGuScw": {"duration": 113.11, "subset": "validation", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=suyh4tGuScw", "annotations": [{"segment": [4.940589778471138, 103.39948607800312], "label": "Tango"}]}, "IgyBIt3GTAU": {"duration": 194.03, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IgyBIt3GTAU", "annotations": [{"segment": [0.01, 194.0259410430839], "label": "Polishing forniture"}]}, "m-C1TSa1PGE": {"duration": 177.703, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=m-C1TSa1PGE", "annotations": []}, "QacSWR8c-8Y": {"duration": 16.93, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QacSWR8c-8Y", "annotations": [{"segment": [4.146011177847114, 8.027945210608424], "label": "Pole vault"}]}, "biAQgXDFskw": {"duration": 93.6, "subset": "training", "resolution": "448x336", "url": "https://www.youtube.com/watch?v=biAQgXDFskw", "annotations": [{"segment": [23.782742589703588, 91.4833104524181], "label": "Cumbia"}]}, "OWkly270P00": {"duration": 202.734, "subset": "testing", "resolution": "655x480", "url": "https://www.youtube.com/watch?v=OWkly270P00", "annotations": []}, "DF8nlagyN6k": {"duration": 218.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DF8nlagyN6k", "annotations": [{"segment": [27.941531981279248, 212.9689937597504], "label": "Doing fencing"}]}, "fT7dpRY0DTo": {"duration": 138.484, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=fT7dpRY0DTo", "annotations": [{"segment": [18.205662846899624, 120.23502743101179], "label": "Laying tile"}]}, "_S6D21MV8Ks": {"duration": 238.26, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_S6D21MV8Ks", "annotations": [{"segment": [3.902090478159127, 234.68287018642746], "label": "Playing squash"}]}, "TokZDNwr664": {"duration": 185.714, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=TokZDNwr664", "annotations": [{"segment": [0.2, 173.00498030530252], "label": "Riding bumper cars"}]}, "sCTmSj_tsDQ": {"duration": 132.447, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=sCTmSj_tsDQ", "annotations": [{"segment": [46.69724870083239, 129.34724511700466], "label": "Bullfighting"}]}, "xS4twW2gXqc": {"duration": 57.21, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xS4twW2gXqc", "annotations": []}, "M8aDrPK7D94": {"duration": 82.39, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=M8aDrPK7D94", "annotations": [{"segment": [14.535678354084265, 44.17866268118707], "label": "Bungee jumping"}]}, "-Sqc9fQ7vpk": {"duration": 27.12, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=-Sqc9fQ7vpk", "annotations": []}, "fh9d3gpT-8I": {"duration": 183.6, "subset": "testing", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=fh9d3gpT-8I", "annotations": []}, "2bnO6YWXCiU": {"duration": 55.287, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2bnO6YWXCiU", "annotations": [{"segment": [16.885048361934476, 21.709347893915755], "label": "Spread mulch"}]}, "p0menuS7Mlk": {"duration": 38.01, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p0menuS7Mlk", "annotations": [{"segment": [8.271631825273012, 31.515213728549146], "label": "Plataform diving"}]}, "veNIfCkd1Pw": {"duration": 91.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=veNIfCkd1Pw", "annotations": [{"segment": [15.004953088923555, 66.02179359126364], "label": "Using uneven bars"}]}, "zyh1cftKtQM": {"duration": 233.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zyh1cftKtQM", "annotations": [{"segment": [62.30939192607322, 115.06070812545016], "label": "Brushing hair"}, {"segment": [142.7096738575374, 144.89248694164954], "label": "Brushing hair"}]}, "NNuIaUFYGCE": {"duration": 52.083, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NNuIaUFYGCE", "annotations": [{"segment": [14.462807758568847, 44.363442936037444], "label": "Plastering"}]}, "N3xWCui6fNY": {"duration": 50.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N3xWCui6fNY", "annotations": [{"segment": [0.3909204368174727, 49.25597503900156], "label": "Playing accordion"}]}, "T-oY05uN3hY": {"duration": 55.06, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=T-oY05uN3hY", "annotations": []}, "hG2kfPRWjO0": {"duration": 213.531, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hG2kfPRWjO0", "annotations": [{"segment": [75.59144137620002, 81.57828351492466], "label": "Snowboarding"}, {"segment": [93.88457013341421, 108.85167548022584], "label": "Snowboarding"}, {"segment": [145.7705353356945, 149.0965587460971], "label": "Snowboarding"}, {"segment": [169.38530154955285, 179.36337014459218], "label": "Snowboarding"}]}, "py9diWBvxiM": {"duration": 69.01, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=py9diWBvxiM", "annotations": []}, "vvvjTjsXbzE": {"duration": 156.29, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vvvjTjsXbzE", "annotations": [{"segment": [67.90599665347159, 136.90921766938706], "label": "Mixing drinks"}]}, "TYRDXDR5l9U": {"duration": 50.2, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=TYRDXDR5l9U", "annotations": [{"segment": [3.720082917316693, 47.65622010920437], "label": "Washing hands"}]}, "gBihqIq4vEc": {"duration": 191.59, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gBihqIq4vEc", "annotations": []}, "DCyLmohIwkQ": {"duration": 155.92000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DCyLmohIwkQ", "annotations": [{"segment": [96.56923426052688, 126.73191700185015], "label": "Archery"}]}, "rEDbnmvTblI": {"duration": 58.584, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rEDbnmvTblI", "annotations": [{"segment": [0.2, 45.100427457098284], "label": "Snow tubing"}]}, "0rr7iGHamw0": {"duration": 60.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0rr7iGHamw0", "annotations": []}, "5iWfvSjZBnE": {"duration": 38.615, "subset": "testing", "resolution": "426x234", "url": "https://www.youtube.com/watch?v=5iWfvSjZBnE", "annotations": []}, "5OGNGIF22SY": {"duration": 207.262, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5OGNGIF22SY", "annotations": [{"segment": [97.64864309946547, 144.85626526013422], "label": "Using the rowing machine"}]}, "iA8ylJWzzVc": {"duration": 196.023, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iA8ylJWzzVc", "annotations": [{"segment": [16.81943392021395, 180.7324625382215], "label": "Decorating the Christmas tree"}]}, "zo8XBVxxkmo": {"duration": 190.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zo8XBVxxkmo", "annotations": [{"segment": [39.247736635996056, 43.410375370116846], "label": "Longboarding"}, {"segment": [61.844918335508936, 68.08887643669013], "label": "Longboarding"}, {"segment": [83.55010602056736, 88.30740743099112], "label": "Longboarding"}, {"segment": [91.57805215065746, 93.95670285586934], "label": "Longboarding"}, {"segment": [95.14602820847529, 104.36329969117133], "label": "Longboarding"}, {"segment": [106.14728772008023, 113.28323983571589], "label": "Longboarding"}, {"segment": [113.87790206084243, 118.33787211544461], "label": "Longboarding"}, {"segment": [119.52719746333851, 123.68983618096723], "label": "Longboarding"}, {"segment": [124.28449885491418, 129.6364629204368], "label": "Longboarding"}, {"segment": [137.36707768174725, 142.12437907332293], "label": "Longboarding"}, {"segment": [154.31496388923557, 155.50428923712948], "label": "Longboarding"}]}, "ZZ71FIfxX-c": {"duration": 151.813, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ZZ71FIfxX-c", "annotations": [{"segment": [0.01, 118.30406951962527], "label": "Beer pong"}]}, "4_yy5vswBn4": {"duration": 116.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4_yy5vswBn4", "annotations": [{"segment": [9.617639625585023, 108.51600936037443], "label": "Tango"}]}, "U2aNQHCnVfI": {"duration": 171.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U2aNQHCnVfI", "annotations": [{"segment": [80.5231656, 156.08992155850234], "label": "Playing violin"}]}, "9PY28-zQhm4": {"duration": 38.755, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9PY28-zQhm4", "annotations": [{"segment": [1.5114705148205927, 29.262069166926675], "label": "Peeling potatoes"}]}, "5MLEO5JWRYI": {"duration": 122.88, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5MLEO5JWRYI", "annotations": [{"segment": [6.517807425897036, 122.87984126984126], "label": "Cleaning sink"}]}, "8lVgQlJoglo": {"duration": 28.42, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=8lVgQlJoglo", "annotations": []}, "lWFGSRpuopM": {"duration": 154.018, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lWFGSRpuopM", "annotations": [{"segment": [0, 150.65409458010976], "label": "Riding bumper cars"}]}, "3b1AhVN3_MI": {"duration": 162.31, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3b1AhVN3_MI", "annotations": []}, "zNMLz9GIQLk": {"duration": 138.23, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=zNMLz9GIQLk", "annotations": [{"segment": [66.98390652, 123.35881404], "label": "Cleaning windows"}]}, "LCe0toF3058": {"duration": 152.72, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LCe0toF3058", "annotations": [{"segment": [7.909213430000001, 141.35670046801872], "label": "Snatch"}]}, "GqjFT65KAA0": {"duration": 130.08, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GqjFT65KAA0", "annotations": []}, "RpyRgnHdUXc": {"duration": 171.851, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RpyRgnHdUXc", "annotations": [{"segment": [83.04759750390015, 120.55296411856473], "label": "Using the rowing machine"}]}, "KI6FNa3BwMM": {"duration": 214.53, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=KI6FNa3BwMM", "annotations": [{"segment": [28.20289955693791, 214.52916099773242], "label": "Futsal"}]}, "28Dgwq9xNx8": {"duration": 95.829, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=28Dgwq9xNx8", "annotations": []}, "qN0a8-A-5Pg": {"duration": 164.56, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qN0a8-A-5Pg", "annotations": [{"segment": [13.088223088923556, 164.501], "label": "Hammer throw"}]}, "1fF1OQtpctg": {"duration": 213.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1fF1OQtpctg", "annotations": [{"segment": [7.6667862714508574, 191.3363182527301], "label": "Cheerleading"}]}, "MRpg1K7YWSE": {"duration": 32.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MRpg1K7YWSE", "annotations": [{"segment": [6.737593457943924, 18.577401869158876], "label": "Throwing darts"}]}, "XulEpl5Bi9A": {"duration": 50.643, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XulEpl5Bi9A", "annotations": []}, "BqjNfptu17Q": {"duration": 93.25, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BqjNfptu17Q", "annotations": [{"segment": [12.870191887675507, 20.868616224648985], "label": "Playing field hockey"}, {"segment": [23.631708268330733, 26.2493744149766], "label": "Playing field hockey"}, {"segment": [29.012466458658345, 33.956946957878316], "label": "Playing field hockey"}, {"segment": [36.865464898595945, 39.04685335413417], "label": "Playing field hockey"}, {"segment": [57.5159422776911, 58.243071762870514], "label": "Playing field hockey"}, {"segment": [61.29701560062403, 68.13203276131046], "label": "Playing field hockey"}, {"segment": [70.60427301092044, 73.51279095163807], "label": "Playing field hockey"}, {"segment": [76.56673478939157, 80.92951170046803], "label": "Playing field hockey"}]}, "IfpcrV_cwHI": {"duration": 97.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IfpcrV_cwHI", "annotations": [{"segment": [0.01, 94.88241341653666], "label": "Playing pool"}]}, "mlNP3uaTB3Q": {"duration": 65.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mlNP3uaTB3Q", "annotations": [{"segment": [37.98925663026521, 50.8721287051482], "label": "Cleaning windows"}]}, "qTvZtsi-3KU": {"duration": 133.933, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qTvZtsi-3KU", "annotations": [{"segment": [14.99709841817049, 111.43679340658576], "label": "Swimming"}]}, "XeRiPVEZ6pY": {"duration": 118.97999999999999, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=XeRiPVEZ6pY", "annotations": [{"segment": [0, 112.29608233308925], "label": "Playing accordion"}]}, "2C_YKYbhm3s": {"duration": 126.062, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2C_YKYbhm3s", "annotations": []}, "iKclcQEl4zI": {"duration": 26.91, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=iKclcQEl4zI", "annotations": []}, "71vVRQ4l8OI": {"duration": 49.13, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=71vVRQ4l8OI", "annotations": [{"segment": [1.09173526, 47.861413806552264], "label": "Using parallel bars"}]}, "3CeZS6-0NfU": {"duration": 18.019, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=3CeZS6-0NfU", "annotations": [{"segment": [0.5043890731695884, 11.713044645299945], "label": "Welding"}]}, "BdKxwCdax_w": {"duration": 146.24, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=BdKxwCdax_w", "annotations": [{"segment": [73.68978688767551, 109.96432594383775], "label": "Breakdancing"}, {"segment": [127.75938283931357, 140.30717936817473], "label": "Breakdancing"}]}, "Ujm7CiWkOBY": {"duration": 137.091, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ujm7CiWkOBY", "annotations": [{"segment": [104.77361934477379, 118.67216068642746], "label": "Playing pool"}]}, "xfV4V_AOVSM": {"duration": 156.34, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xfV4V_AOVSM", "annotations": []}, "H8f7pTaBFKk": {"duration": 169.297, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H8f7pTaBFKk", "annotations": [{"segment": [16.90325746021841, 169.29668934240362], "label": "Removing ice from car"}]}, "QnATCZ_P9DE": {"duration": 94.75999999999999, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=QnATCZ_P9DE", "annotations": [{"segment": [5.318620904836194, 92.947751950078], "label": "Kayaking"}]}, "J6T9pokWX1E": {"duration": 101.263, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J6T9pokWX1E", "annotations": [{"segment": [10.573770670826834, 27.933692667706705], "label": "Grooming dog"}, {"segment": [79.87142901716068, 88.07956006240249], "label": "Grooming dog"}]}, "bX-PSFlwV5s": {"duration": 142.734, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bX-PSFlwV5s", "annotations": [{"segment": [57.42854602184088, 126.0043151326053], "label": "Roof shingle removal"}]}, "Jdtr90RIkHk": {"duration": 142.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Jdtr90RIkHk", "annotations": [{"segment": [98.63785127999999, 120.581008274571], "label": "Skateboarding"}, {"segment": [45.88284689859594, 54.52744124180968], "label": "Skateboarding"}, {"segment": [72.48159872386896, 80.46122427145086], "label": "Skateboarding"}]}, "2AQg1DDVYHI": {"duration": 192.773, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2AQg1DDVYHI", "annotations": [{"segment": [16.239768219300203, 87.51430651511775], "label": "Powerbocking"}, {"segment": [109.76880370452913, 185.25365180655226], "label": "Powerbocking"}]}, "D88jUp91KPk": {"duration": 49.85, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D88jUp91KPk", "annotations": []}, "dFkdab5i1no": {"duration": 154.23, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dFkdab5i1no", "annotations": []}, "XNigP28gH58": {"duration": 82.59, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=XNigP28gH58", "annotations": []}, "jWPr92KwXeY": {"duration": 150.373, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jWPr92KwXeY", "annotations": [{"segment": [7.976072517940717, 150.372426], "label": "Snowboarding"}]}, "PzNpPDd-VWE": {"duration": 99.103, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PzNpPDd-VWE", "annotations": [{"segment": [15.151436770670827, 71.27359542121685], "label": "Slacklining"}]}, "80g-pLQce54": {"duration": 71.007, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=80g-pLQce54", "annotations": []}, "15HUSdsIHxg": {"duration": 116.867, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=15HUSdsIHxg", "annotations": [{"segment": [0.01, 116.8656], "label": "Baton twirling"}]}, "qhei_KanLyo": {"duration": 46.44, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qhei_KanLyo", "annotations": []}, "RZ2P53zcZk0": {"duration": 120.326, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RZ2P53zcZk0", "annotations": []}, "_ekSmyvWdRQ": {"duration": 15.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_ekSmyvWdRQ", "annotations": [{"segment": [0, 15.069749], "label": "Preparing salad"}]}, "yDH9iAn82Q8": {"duration": 178.54, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yDH9iAn82Q8", "annotations": [{"segment": [8.939397660000001, 39.09268048], "label": "Preparing pasta"}, {"segment": [8.12169362, 128.57949684000002], "label": "Preparing salad"}, {"segment": [51.243976759999995, 163.96896255850234], "label": "Preparing pasta"}]}, "cdP8_KujTCE": {"duration": 110.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cdP8_KujTCE", "annotations": [{"segment": [27.27812124180967, 30.73104798127925], "label": "Drinking beer"}]}, "BFvMwb0Ld2g": {"duration": 76.278, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BFvMwb0Ld2g", "annotations": []}, "7-_Nur_xiV4": {"duration": 185.9, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7-_Nur_xiV4", "annotations": [{"segment": [4.350209600220744, 179.51864765210607], "label": "Breakdancing"}]}, "CnPYJnV1gCU": {"duration": 148.469, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CnPYJnV1gCU", "annotations": []}, "6LX02yo9iGo": {"duration": 155.621, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=6LX02yo9iGo", "annotations": [{"segment": [115.20561773998931, 148.89907297964987], "label": "BMX"}]}, "EbQJuDQdW8U": {"duration": 124.13, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=EbQJuDQdW8U", "annotations": [{"segment": [21.490188767550702, 124.081], "label": "Ironing clothes"}]}, "65rZgGe7Zr4": {"duration": 26.355, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=65rZgGe7Zr4", "annotations": [{"segment": [0.2, 24.791875581903277], "label": "Raking leaves"}]}, "U9Ofeof4rlA": {"duration": 22.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U9Ofeof4rlA", "annotations": [{"segment": [0.8485678499999999, 20.981817472698907], "label": "Hammer throw"}]}, "WPVb8fYLFUM": {"duration": 177.656, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WPVb8fYLFUM", "annotations": [{"segment": [33.812727632914836, 105.87263898174974], "label": "Baking cookies"}]}, "l7EktV5FYGk": {"duration": 197.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l7EktV5FYGk", "annotations": [{"segment": [7.991484889327546, 189.3367189163757], "label": "Sailing"}]}, "CGR-z8vaawI": {"duration": 42.73, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CGR-z8vaawI", "annotations": []}, "_cA6yS9SeEc": {"duration": 134.165, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_cA6yS9SeEc", "annotations": [{"segment": [24.69806233882008, 37.46570473431182], "label": "Changing car wheel"}, {"segment": [45.00070680378235, 60.070710942723416], "label": "Changing car wheel"}, {"segment": [89.37349676844215, 105.69933458562832], "label": "Changing car wheel"}]}, "cdb7VSVuUl8": {"duration": 207.712, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cdb7VSVuUl8", "annotations": [{"segment": [75.82220592823712, 90.72742589703589], "label": "Sumo"}]}, "jkZIHh5TYIg": {"duration": 34.134, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jkZIHh5TYIg", "annotations": []}, "jcaOoRM6J80": {"duration": 215.273, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jcaOoRM6J80", "annotations": [{"segment": [27.87455644348223, 138.36526812909253], "label": "Making a cake"}, {"segment": [169.26236683753064, 207.88373938221528], "label": "Making a cake"}]}, "OLWpT2NYgaQ": {"duration": 211.93, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=OLWpT2NYgaQ", "annotations": []}, "k7MXH55q28U": {"duration": 189.38, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k7MXH55q28U", "annotations": [{"segment": [15.206158346333854, 181.73573712948516], "label": "Rock climbing"}]}, "ub0pmYL8i5s": {"duration": 59.629, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ub0pmYL8i5s", "annotations": [{"segment": [0.01, 59.6288434], "label": "Cleaning sink"}]}, "z-EPAHsmTMA": {"duration": 30.93, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=z-EPAHsmTMA", "annotations": []}, "SzCK4QuEG3c": {"duration": 105.21, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SzCK4QuEG3c", "annotations": [{"segment": [6.893609588143526, 75.33730478471139], "label": "Sharpening knives"}]}, "ZX8sFpq471A": {"duration": 111.83, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZX8sFpq471A", "annotations": [{"segment": [13.258764249609984, 19.713688950078], "label": "Archery"}, {"segment": [36.81051653510141, 41.52086699219969], "label": "Archery"}, {"segment": [60.18781139625585, 65.94490639937597], "label": "Archery"}]}, "jiM0HUFn0e8": {"duration": 99.66, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jiM0HUFn0e8", "annotations": []}, "rfxXEgc9RTQ": {"duration": 66.64, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=rfxXEgc9RTQ", "annotations": [{"segment": [0, 66.641291], "label": "Playing squash"}]}, "DVXOr56dlKg": {"duration": 131.15, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DVXOr56dlKg", "annotations": [{"segment": [39.21971898907957, 110.55316827], "label": "Pole vault"}]}, "CoP3xaSZt7A": {"duration": 119.35, "subset": "validation", "resolution": "360x242", "url": "https://www.youtube.com/watch?v=CoP3xaSZt7A", "annotations": [{"segment": [5.772024295937824, 10.24068826698646], "label": "Capoeira"}, {"segment": [24.172090483619343, 25.845542901716065], "label": "Capoeira"}, {"segment": [33.469048361934476, 39.97691887675507], "label": "Capoeira"}, {"segment": [0.2, 118.987], "label": "Capoeira"}, {"segment": [50.76139001560062, 51.13326833073323], "label": "Capoeira"}, {"segment": [54.66611232449298, 56.3395647425897], "label": "Capoeira"}, {"segment": [77.90850702028081, 81.18072819968799], "label": "Capoeira"}, {"segment": [83.60125449921996, 93.65574835881435], "label": "Capoeira"}, {"segment": [98.12441229641185, 107.06174017160686], "label": "Capoeira"}]}, "hRIXXCe0Hi0": {"duration": 133.004, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hRIXXCe0Hi0", "annotations": [{"segment": [25.106820479621906, 28.011741857429396], "label": "Rock-paper-scissors"}, {"segment": [41.29138244169222, 46.271247660790785], "label": "Rock-paper-scissors"}, {"segment": [57.683438787891646, 62.66330400699021], "label": "Rock-paper-scissors"}, {"segment": [77.6028996642859, 84.65770872467552], "label": "Rock-paper-scissors"}, {"segment": [92.12750655332336, 99.38980999784208], "label": "Rock-paper-scissors"}, {"segment": [108.72705628081121, 112.876943925117], "label": "Rock-paper-scissors"}]}, "w5lQ5vNAksM": {"duration": 101.66, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=w5lQ5vNAksM", "annotations": [{"segment": [0.01, 101.65696145124717], "label": "Playing flauta"}]}, "4SLvbRa2NI0": {"duration": 166.789, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4SLvbRa2NI0", "annotations": [{"segment": [32.52514294071763, 154.03907696723869], "label": "Blow-drying hair"}]}, "0ixNLmIRd30": {"duration": 87.702, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=0ixNLmIRd30", "annotations": []}, "zPGc84TC74A": {"duration": 60.0, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zPGc84TC74A", "annotations": [{"segment": [4.680215507940046, 14.695876694931743], "label": "Playing racquetball"}, {"segment": [17.129588759060567, 52.98003954988132], "label": "Playing racquetball"}]}, "nARkYpOZWyg": {"duration": 59.862, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nARkYpOZWyg", "annotations": [{"segment": [0.01, 59.821], "label": "Blowing leaves"}]}, "JBnHqQjeVMk": {"duration": 34.529, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=JBnHqQjeVMk", "annotations": [{"segment": [11.457804992199689, 25.712820592823714], "label": "Calf roping"}]}, "wKYPRyIgKuA": {"duration": 21.572, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wKYPRyIgKuA", "annotations": []}, "5L3ZcYo6SkY": {"duration": 53.99, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5L3ZcYo6SkY", "annotations": []}, "R-RQx5pbMvo": {"duration": 142.07999999999998, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=R-RQx5pbMvo", "annotations": [{"segment": [99.5245281575663, 107.06090668174727], "label": "Washing hands"}]}, "kp-mNHSU5Rc": {"duration": 172.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kp-mNHSU5Rc", "annotations": [{"segment": [38.78100468018721, 98.62989079563184], "label": "Grooming horse"}, {"segment": [107.21807176287052, 126.80985959438378], "label": "Grooming horse"}]}, "jXORdfzz4oE": {"duration": 75.61, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=jXORdfzz4oE", "annotations": [{"segment": [11.603023400936037, 26.563266770670825], "label": "Volleyball"}, {"segment": [37.16501404056162, 48.00235569422777], "label": "Volleyball"}]}, "Y4svcMuQgBw": {"duration": 164.79, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Y4svcMuQgBw", "annotations": [{"segment": [51.91526053042122, 143.42689235569424], "label": "Ironing clothes"}, {"segment": [150.10986583463338, 158.84913884555382], "label": "Ironing clothes"}]}, "GjHanmMGvuw": {"duration": 129.196, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GjHanmMGvuw", "annotations": [{"segment": [3.8278923556942277, 75.76370670826833], "label": "Baton twirling"}]}, "LA0tW9OQW80": {"duration": 165.77, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LA0tW9OQW80", "annotations": []}, "RTwa2d6Oqvo": {"duration": 45.07, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RTwa2d6Oqvo", "annotations": [{"segment": [0, 29.10912923047534], "label": "Polishing shoes"}]}, "lBXRkMZfIXk": {"duration": 191.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lBXRkMZfIXk", "annotations": [{"segment": [101.55884146794443, 135.66107394908042], "label": "Sharpening knives"}, {"segment": [159.59246424102963, 164.0796000678627], "label": "Sharpening knives"}]}, "r3lWJy0FYIw": {"duration": 23.546, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r3lWJy0FYIw", "annotations": []}, "KdiaNSVVe7Q": {"duration": 143.59, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KdiaNSVVe7Q", "annotations": []}, "PG0ao4HkF8M": {"duration": 78.948, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PG0ao4HkF8M", "annotations": [{"segment": [7.0143790951638065, 62.63717472698908], "label": "Hanging wallpaper"}]}, "pznmOdbp7E0": {"duration": 210.397, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=pznmOdbp7E0", "annotations": [{"segment": [111.43437968947329, 162.63839509906927], "label": "Making a cake"}]}, "SGHjq0KLpvk": {"duration": 20.62, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=SGHjq0KLpvk", "annotations": []}, "ZeUNeRiU6YY": {"duration": 121.255, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZeUNeRiU6YY", "annotations": [{"segment": [0.2, 115.16922395023327], "label": "Snowboarding"}]}, "kS_5cRXUTLA": {"duration": 166.93, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kS_5cRXUTLA", "annotations": []}, "_MYAaRrTeMQ": {"duration": 5.02, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_MYAaRrTeMQ", "annotations": [{"segment": [0.01, 5.015509], "label": "Chopping wood"}]}, "mek9Zodu8LM": {"duration": 126.781, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mek9Zodu8LM", "annotations": []}, "CP9NQpJD0-Y": {"duration": 97.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CP9NQpJD0-Y", "annotations": [{"segment": [45.81872045347681, 48.698755270751214], "label": "Drinking coffee"}, {"segment": [60.370470434785226, 62.94734282730859], "label": "Drinking coffee"}, {"segment": [74.31589750020578, 76.74118916375718], "label": "Drinking coffee"}, {"segment": [85.98761363104691, 88.26132456562634], "label": "Drinking coffee"}]}, "ZYAY3TEUjGI": {"duration": 178.03, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZYAY3TEUjGI", "annotations": [{"segment": [8.614227769110766, 170.61728549141966], "label": "Getting a haircut"}]}, "HhtSc4kz3tk": {"duration": 74.281, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HhtSc4kz3tk", "annotations": []}, "w_wIOJrztdU": {"duration": 140.063, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w_wIOJrztdU", "annotations": [{"segment": [0.01, 131.08361934477378], "label": "Building sandcastles"}]}, "RmTDHszp1v0": {"duration": 127.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RmTDHszp1v0", "annotations": [{"segment": [43.51350631825274, 66.56175624024962], "label": "Using parallel bars"}, {"segment": [77.68849758190328, 119.01639399375976], "label": "Using parallel bars"}]}, "mYrTrVCYSY0": {"duration": 79.761, "subset": "training", "resolution": "426x236", "url": "https://www.youtube.com/watch?v=mYrTrVCYSY0", "annotations": [{"segment": [0.01, 79.721], "label": "Running a marathon"}]}, "uN8lcRwsBno": {"duration": 165.559, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uN8lcRwsBno", "annotations": [{"segment": [17.040664586583464, 156.22496879875195], "label": "Using the rowing machine"}]}, "N0DA6RpIf5Y": {"duration": 37.872, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N0DA6RpIf5Y", "annotations": [{"segment": [0.2, 23.98740790951638], "label": "Raking leaves"}]}, "bhAVJ2uUw0U": {"duration": 63.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bhAVJ2uUw0U", "annotations": [{"segment": [0.8975363588143527, 24.931565522620904], "label": "Washing face"}]}, "Jv-bPV8eswU": {"duration": 105.05, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Jv-bPV8eswU", "annotations": [{"segment": [34.168978318316405, 75.63061627771233], "label": "Shaving"}]}, "ZK4Y6nOton8": {"duration": 117.49000000000001, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZK4Y6nOton8", "annotations": []}, "f2DS_AVorrY": {"duration": 18.34, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f2DS_AVorrY", "annotations": []}, "MBTSe-NHK-I": {"duration": 133.747, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MBTSe-NHK-I", "annotations": [{"segment": [11.051533541341653, 95.93332293291733], "label": "River tubing"}, {"segment": [105.73520592823714, 121.37650858034323], "label": "River tubing"}, {"segment": [129.3014352574103, 132.6382464898596], "label": "River tubing"}]}, "WW0Sm6oDc9M": {"duration": 22.919, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WW0Sm6oDc9M", "annotations": [{"segment": [4.736001557632399, 8.910046728971963], "label": "Playing ten pins"}]}, "yRjaK8wT4Rc": {"duration": 103.097, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yRjaK8wT4Rc", "annotations": [{"segment": [7.395953198127926, 79.10454290171607], "label": "Cleaning sink"}]}, "fnf7FbZkL6k": {"duration": 76.58, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fnf7FbZkL6k", "annotations": [{"segment": [0.01, 76.26866498232083], "label": "Painting fence"}]}, "C1IuvUSmcvA": {"duration": 238.469, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C1IuvUSmcvA", "annotations": [{"segment": [34.216680187207494, 142.44552730109206], "label": "Carving jack-o-lanterns"}, {"segment": [162.52923088923558, 192.28609204368175], "label": "Carving jack-o-lanterns"}]}, "ybT1q6cKpNY": {"duration": 88.49, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=ybT1q6cKpNY", "annotations": []}, "9hPFweZeIWs": {"duration": 82.176, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9hPFweZeIWs", "annotations": [{"segment": [23.9731723037629, 77.81666090044962], "label": "Calf roping"}]}, "WjueR8K-nnI": {"duration": 122.602, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WjueR8K-nnI", "annotations": [{"segment": [17.160374149659862, 100.67420790241326], "label": "Riding bumper cars"}]}, "jTMdMnbW9OI": {"duration": 123.9, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jTMdMnbW9OI", "annotations": [{"segment": [92.39469829329174, 120.03579004212169], "label": "Washing face"}]}, "YSrnHPcdGL4": {"duration": 187.76, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YSrnHPcdGL4", "annotations": [{"segment": [14.645557956318253, 129.75964349297973], "label": "Archery"}]}, "pGKTRM1vcfw": {"duration": 43.98, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pGKTRM1vcfw", "annotations": [{"segment": [2.5385459817472698, 21.063070713416536], "label": "Playing lacrosse"}]}, "ZeEpbvUaTrQ": {"duration": 234.336, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZeEpbvUaTrQ", "annotations": []}, "OYMGPkucZYI": {"duration": 72.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OYMGPkucZYI", "annotations": [{"segment": [24.564791683307334, 49.80879419656787], "label": "Cleaning shoes"}]}, "YjvAy19whWU": {"duration": 202.386, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YjvAy19whWU", "annotations": []}, "6VD_qwIgTzU": {"duration": 109.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6VD_qwIgTzU", "annotations": [{"segment": [0.6824670826833074, 7.677754680187207], "label": "Getting a tattoo"}]}, "iWes42ows7w": {"duration": 61.16, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=iWes42ows7w", "annotations": []}, "uS5QIHQztyA": {"duration": 166.05, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uS5QIHQztyA", "annotations": []}, "xrbKvttgcMA": {"duration": 38.82, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xrbKvttgcMA", "annotations": [{"segment": [0.01, 38.823763], "label": "Playing squash"}]}, "hi4aLY1ajTY": {"duration": 140.203, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hi4aLY1ajTY", "annotations": [{"segment": [15.7167476635514, 32.74789719626168], "label": "Swimming"}, {"segment": [38.64251869158878, 107.8497414330218], "label": "Swimming"}, {"segment": [123.56873208722742, 140.161], "label": "Swimming"}]}, "CikGafc6H_8": {"duration": 190.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CikGafc6H_8", "annotations": [{"segment": [4.75904014976599, 23.200320730109205], "label": "Playing water polo"}, {"segment": [60.97520191887675, 76.44208240561622], "label": "Playing water polo"}, {"segment": [103.21168324804991, 106.18608334165366], "label": "Playing water polo"}, {"segment": [113.62208357566301, 121.9504038377535], "label": "Playing water polo"}]}, "k2vkwy2vdP4": {"duration": 89.629, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=k2vkwy2vdP4", "annotations": [{"segment": [20.908580715396578, 74.9921094992224], "label": "Surfing"}]}, "l9mvKfKACio": {"duration": 154.645, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l9mvKfKACio", "annotations": [{"segment": [28.701277691107645, 47.996254290171606], "label": "Snow tubing"}]}, "yVKmkR78Jn0": {"duration": 37.106, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yVKmkR78Jn0", "annotations": [{"segment": [7.92099375975039, 36.840999999999994], "label": "Hand car wash"}]}, "8ma-p7ap2MQ": {"duration": 224.189, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8ma-p7ap2MQ", "annotations": [{"segment": [69.25016392511701, 162.28321243057724], "label": "Trimming branches or hedges"}]}, "IJ76Wtgg2g4": {"duration": 109.88, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IJ76Wtgg2g4", "annotations": [{"segment": [4.091782240000001, 106.74585023400935], "label": "Using the balance beam"}]}, "RN2QwhcAsUY": {"duration": 121.35, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RN2QwhcAsUY", "annotations": [{"segment": [11.354134165366615, 113.1628705148206], "label": "Bathing dog"}]}, "MoVZoCmkdjY": {"duration": 160.17000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MoVZoCmkdjY", "annotations": [{"segment": [0.01, 152.1464414976599], "label": "Mixing drinks"}]}, "lUrHCNqVl-A": {"duration": 176.54, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=lUrHCNqVl-A", "annotations": [{"segment": [15.285558491727425, 18.315128643240968], "label": "Discus throw"}, {"segment": [27.954670034420424, 35.390887679044575], "label": "Discus throw"}, {"segment": [43.10252079198814, 48.6108301583764], "label": "Discus throw"}, {"segment": [0.2, 176.34131519274376], "label": "Discus throw"}, {"segment": [69.5424057506518, 73.9490532437624], "label": "Discus throw"}, {"segment": [81.93610182502537, 85.79191838149715], "label": "Discus throw"}, {"segment": [94.88062883603779, 98.46102992419017], "label": "Discus throw"}, {"segment": [106.44807850545314, 110.85472599856375], "label": "Discus throw"}, {"segment": [119.94343645310438, 124.07466847789559], "label": "Discus throw"}, {"segment": [143.07833579193507, 146.93415234840685], "label": "Discus throw"}, {"segment": [157.12452467622512, 167.039481535724], "label": "Discus throw"}]}, "3TNDCTlLlGk": {"duration": 214.691, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3TNDCTlLlGk", "annotations": [{"segment": [46.74462449372621, 50.083526607189526], "label": "Wakeboarding"}, {"segment": [59.0985623135405, 72.12028055604745], "label": "Wakeboarding"}, {"segment": [85.14199879855441, 89.48257154605673], "label": "Wakeboarding"}, {"segment": [141.2355543047382, 149.91669979974287], "label": "Wakeboarding"}, {"segment": [154.25727254724518, 160.93507677417182], "label": "Wakeboarding"}, {"segment": [166.27732015571314, 187.31240181659277], "label": "Wakeboarding"}]}, "3ZJQHfrfR2M": {"duration": 40.055, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3ZJQHfrfR2M", "annotations": [{"segment": [0.01, 0.02], "label": "Raking leaves"}]}, "HxrtuZSCwvY": {"duration": 165.72, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HxrtuZSCwvY", "annotations": []}, "nY8a1E4QH5Q": {"duration": 236.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nY8a1E4QH5Q", "annotations": [{"segment": [8.468592823712948, 232.7022028081123], "label": "Zumba"}]}, "i8hNrK3r_MY": {"duration": 219.267, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i8hNrK3r_MY", "annotations": []}, "SjbKlrUx7Do": {"duration": 112.66, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SjbKlrUx7Do", "annotations": []}, "9xHLzVojpBc": {"duration": 96.69, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=9xHLzVojpBc", "annotations": [{"segment": [14.613809051482058, 76.83549088923556], "label": "Tennis serve with ball bouncing"}]}, "mSPUMHD4F-E": {"duration": 169.553, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mSPUMHD4F-E", "annotations": [{"segment": [5.554729217740139, 33.59288622157131], "label": "Installing carpet"}, {"segment": [49.19902975039001, 160.0291021450858], "label": "Installing carpet"}]}, "ujWG6rjlN3s": {"duration": 46.81, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ujWG6rjlN3s", "annotations": [{"segment": [4.673839825273011, 18.549301806552265], "label": "Cheerleading"}, {"segment": [20.594106730109207, 26.071262775351013], "label": "Cheerleading"}, {"segment": [27.458808973478938, 35.418942425897036], "label": "Cheerleading"}]}, "dc8pLGl9Ccc": {"duration": 208.376, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dc8pLGl9Ccc", "annotations": [{"segment": [114.68490610265286, 178.29056965405474], "label": "Rock-paper-scissors"}]}, "RQajzyqAcQM": {"duration": 177.454, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RQajzyqAcQM", "annotations": [{"segment": [57.69223010627268, 58.79638770088128], "label": "Building sandcastles"}, {"segment": [123.94168578278902, 136.915536490928], "label": "Building sandcastles"}, {"segment": [142.98840321565584, 149.06126994038365], "label": "Building sandcastles"}]}, "r5NqGSZsN0E": {"duration": 56.26, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=r5NqGSZsN0E", "annotations": [{"segment": [11.147063439000146, 40.55073471510289], "label": "Hand washing clothes"}]}, "KZLEUd2ALVI": {"duration": 42.03, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KZLEUd2ALVI", "annotations": [{"segment": [3.143663026521061, 11.788736349453979], "label": "Hammer throw"}, {"segment": [22.59507800312013, 29.733812792511703], "label": "Hammer throw"}]}, "8WJWtfmEipI": {"duration": 15.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8WJWtfmEipI", "annotations": [{"segment": [1.5429495070202808, 15.752108843537416], "label": "Shoveling snow"}]}, "Yg03CCu9GMo": {"duration": 150.74, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Yg03CCu9GMo", "annotations": []}, "dN14VPSHimI": {"duration": 110.964, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dN14VPSHimI", "annotations": [{"segment": [14.54489262402496, 31.167627051482057], "label": "Calf roping"}, {"segment": [36.362231560062405, 48.48297541341654], "label": "Calf roping"}, {"segment": [55.58226824180967, 62.68156107020281], "label": "Calf roping"}, {"segment": [65.27886332449299, 76.70699324336974], "label": "Calf roping"}, {"segment": [80.86267685023401, 86.74989529329173], "label": "Calf roping"}]}, "m7cHlmcFk9Y": {"duration": 181.627, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m7cHlmcFk9Y", "annotations": [{"segment": [19.91168641288258, 174.340387347462], "label": "Clipping cat claws"}]}, "cinmiQ4tHYk": {"duration": 70.26, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cinmiQ4tHYk", "annotations": [{"segment": [14.578870940717628, 62.042413176287056], "label": "Tai chi"}]}, "MhO48WnApoM": {"duration": 143.686, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MhO48WnApoM", "annotations": [{"segment": [10.759569125622168, 137.40866273634944], "label": "Ice fishing"}]}, "-hiRu8Sh7Tk": {"duration": 68.8, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-hiRu8Sh7Tk", "annotations": []}, "BJ-1HVacaF4": {"duration": 226.372, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BJ-1HVacaF4", "annotations": [{"segment": [59.58996277223246, 183.35373009345795], "label": "Scuba diving"}, {"segment": [188.64278000000002, 191.11100328971963], "label": "Scuba diving"}, {"segment": [195.34224321495327, 196.7526565233645], "label": "Scuba diving"}, {"segment": [199.22087981308414, 201.33649977570096], "label": "Scuba diving"}, {"segment": [204.50992971962617, 206.625549682243], "label": "Scuba diving"}]}, "L7dH-I2qLU8": {"duration": 125.063, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=L7dH-I2qLU8", "annotations": []}, "XoFikALe8Q0": {"duration": 184.715, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=XoFikALe8Q0", "annotations": [{"segment": [42.648650174578414, 117.86012109055791], "label": "Hand car wash"}]}, "msd9vrplD-I": {"duration": 99.567, "subset": "validation", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=msd9vrplD-I", "annotations": [{"segment": [70.13194335350272, 75.25786612352617], "label": "Swinging at the playground"}]}, "ybFvomdhW2Y": {"duration": 183.09, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ybFvomdhW2Y", "annotations": [{"segment": [4.8557235894877975, 57.98305227447193], "label": "Making an omelette"}, {"segment": [67.40886865406588, 79.97662382685783], "label": "Making an omelette"}, {"segment": [87.68865541016199, 103.68398017553355], "label": "Making an omelette"}, {"segment": [114.53795055203568, 114.8235813514173], "label": "Making an omelette"}, {"segment": [123.10687453348473, 167.37964715444616], "label": "Making an omelette"}]}, "t8hx8ihRPGc": {"duration": 172.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t8hx8ihRPGc", "annotations": [{"segment": [8.179636547635278, 19.21138265355428], "label": "Swinging at the playground"}, {"segment": [28.897793868507552, 34.27913343237048], "label": "Swinging at the playground"}, {"segment": [39.929539974426554, 45.310879538289484], "label": "Swinging at the playground"}, {"segment": [58.49516146975366, 67.64343872832065], "label": "Swinging at the playground"}, {"segment": [92.3976007220901, 95.89547143860102], "label": "Swinging at the playground"}, {"segment": [101.00774402427079, 105.043748697168], "label": "Swinging at the playground"}, {"segment": [112.57762408657611, 119.03523156321162], "label": "Swinging at the playground"}, {"segment": [157.2427424666384, 162.3550150523082], "label": "Swinging at the playground"}]}, "mO1T8zhIliY": {"duration": 233.849, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mO1T8zhIliY", "annotations": [{"segment": [5.609436451127097, 231.4438686006598], "label": "Doing fencing"}]}, "jRj8deMhPrQ": {"duration": 123.67, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=jRj8deMhPrQ", "annotations": [{"segment": [7.331419939790789, 19.87200667890661], "label": "Playing ice hockey"}, {"segment": [35.306574973203006, 45.53197646817437], "label": "Playing ice hockey"}, {"segment": [49.39061854174847, 61.159476866149475], "label": "Playing ice hockey"}, {"segment": [67.14037208018934, 80.64561933769866], "label": "Playing ice hockey"}, {"segment": [84.31132930759406, 98.78123591887676], "label": "Playing ice hockey"}, {"segment": [107.07731627925116, 116.91685345085803], "label": "Playing ice hockey"}]}, "mRgS35iyhYE": {"duration": 55.682, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mRgS35iyhYE", "annotations": [{"segment": [0.2, 53.20285335413416], "label": "Riding bumper cars"}]}, "PgfUSPHXluA": {"duration": 50.783, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PgfUSPHXluA", "annotations": [{"segment": [0, 31.66365054602184], "label": "Arm wrestling"}]}, "Lb1PFRh8cYs": {"duration": 22.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lb1PFRh8cYs", "annotations": []}, "CZveW9zX0s8": {"duration": 141.41, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CZveW9zX0s8", "annotations": [{"segment": [11.247131045241808, 12.349790951638065], "label": "Grooming dog"}, {"segment": [44.76799219968799, 50.06075975039001], "label": "Grooming dog"}, {"segment": [63.07214664586583, 66.8211903276131], "label": "Grooming dog"}, {"segment": [75.64246957878315, 81.15576911076442], "label": "Grooming dog"}, {"segment": [104.98807488299532, 121.7508767550702], "label": "Grooming dog"}, {"segment": [129.25002496099845, 135.20523088923557], "label": "Grooming dog"}]}, "ITVfOVR34Jo": {"duration": 137.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ITVfOVR34Jo", "annotations": [{"segment": [7.303599463706439, 122.65750769110765], "label": "Paintball"}]}, "uElCsF1fOgE": {"duration": 221.01, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uElCsF1fOgE", "annotations": [{"segment": [120.73170396, 184.46128248000002], "label": "Cumbia"}]}, "pMmlJGSucss": {"duration": 74.24, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pMmlJGSucss", "annotations": [{"segment": [31.44054068018721, 71.89711876131045], "label": "Plataform diving"}]}, "WJfMz7joX4s": {"duration": 87.099, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WJfMz7joX4s", "annotations": [{"segment": [0.01, 87.061], "label": "Playing ice hockey"}]}, "OtcJMIJTTx4": {"duration": 8.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OtcJMIJTTx4", "annotations": [{"segment": [0.5463696, 5.382383775351014], "label": "Springboard diving"}]}, "aEpRYY_wi0M": {"duration": 39.34, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aEpRYY_wi0M", "annotations": [{"segment": [10.371145457957972, 15.02757809347199], "label": "Fixing bicycle"}, {"segment": [19.806548429920586, 24.03409911216358], "label": "Fixing bicycle"}]}, "YFIkhFEtD8w": {"duration": 87.656, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YFIkhFEtD8w", "annotations": [{"segment": [0.01, 4.9229201821134065], "label": "Playing rubik cube"}, {"segment": [26.255574304604835, 81.6384256099844], "label": "Playing rubik cube"}]}, "huo2ldsQHUI": {"duration": 15.09, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=huo2ldsQHUI", "annotations": [{"segment": [0.07041784867740233, 14.341841102176623], "label": "Spinning"}]}, "3CO7fRiJOy4": {"duration": 24.196, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=3CO7fRiJOy4", "annotations": [{"segment": [0.01, 23.833393808907992], "label": "Riding bumper cars"}]}, "1V0TqgcXSVk": {"duration": 104.84, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=1V0TqgcXSVk", "annotations": [{"segment": [27.75900564, 87.73158353999999], "label": "Doing step aerobics"}]}, "5AoshxUQv0U": {"duration": 16.069, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=5AoshxUQv0U", "annotations": [{"segment": [4.229218408736349, 13.688653666146648], "label": "Doing a powerbomb"}]}, "hZ0jI9U5Nws": {"duration": 26.36, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hZ0jI9U5Nws", "annotations": [{"segment": [2.795812786271451, 24.298902304212167], "label": "Using uneven bars"}]}, "Nosx28FNB5E": {"duration": 73.027, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Nosx28FNB5E", "annotations": [{"segment": [15.607902608424338, 32.013289291731674], "label": "Playing pool"}, {"segment": [27.228384842433698, 32.69684707020281], "label": "Playing pool"}, {"segment": [30.987952624024963, 63.570873397815916], "label": "Playing pool"}]}, "TXSNkjjTzdE": {"duration": 225.327, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=TXSNkjjTzdE", "annotations": [{"segment": [48.51021639232916, 191.931724099844], "label": "Blow-drying hair"}]}, "Fdjw9ld-hbA": {"duration": 6.084, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Fdjw9ld-hbA", "annotations": [{"segment": [0.01, 5.717851310097941], "label": "Doing a powerbomb"}]}, "ho-aTcEykAc": {"duration": 12.7, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ho-aTcEykAc", "annotations": []}, "9VGxxRNOdbM": {"duration": 68.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9VGxxRNOdbM", "annotations": [{"segment": [23.838039001560063, 62.641663026521066], "label": "Using the pommel horse"}]}, "ZgIKmwnCqr4": {"duration": 17.206, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ZgIKmwnCqr4", "annotations": [{"segment": [0.2, 17.005986394557823], "label": "Hitting a pinata"}]}, "HAEyXtEdERI": {"duration": 194.978, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HAEyXtEdERI", "annotations": [{"segment": [30.417778343786814, 35.2846228787927], "label": "BMX"}, {"segment": [40.455645197236464, 51.40604540099971], "label": "BMX"}, {"segment": [76.65280142634276, 90.34080159594383], "label": "BMX"}, {"segment": [93.99093499375975, 97.64106839157566], "label": "BMX"}, {"segment": [110.11235750078004, 121.67111326053042], "label": "BMX"}, {"segment": [125.92960222464897, 170.03538078159124], "label": "BMX"}]}, "tIbSsad0z9U": {"duration": 158.918, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tIbSsad0z9U", "annotations": [{"segment": [19.21381274298591, 23.676375702647153], "label": "Swimming"}, {"segment": [53.426795433722106, 58.88103905108585], "label": "Swimming"}]}, "CB0yYGVNcOk": {"duration": 143.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CB0yYGVNcOk", "annotations": [{"segment": [29.84959893, 135.22091574882995], "label": "Playing saxophone"}, {"segment": [4.7327193, 7.64882995], "label": "Playing saxophone"}]}, "QKXh-onHFIM": {"duration": 17.903, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QKXh-onHFIM", "annotations": []}, "CvbL36Y8hN0": {"duration": 175.27, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CvbL36Y8hN0", "annotations": []}, "gGBDLkTjbpE": {"duration": 89.49, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=gGBDLkTjbpE", "annotations": []}, "Rvsc5eymWIg": {"duration": 68.592, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Rvsc5eymWIg", "annotations": [{"segment": [5.655066978193147, 55.91047352024923], "label": "Changing car wheel"}]}, "BlfFKVGQbi8": {"duration": 66.293, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=BlfFKVGQbi8", "annotations": [{"segment": [7.988475858420968, 59.07756464346769], "label": "Shuffleboard"}]}, "ks018ZYzrx4": {"duration": 226.49, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ks018ZYzrx4", "annotations": []}, "AZx_lm2XLHk": {"duration": 48.44, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AZx_lm2XLHk", "annotations": [{"segment": [15.566105906396258, 17.07737832449298], "label": "High jump"}, {"segment": [21.762322820592825, 25.61606748673947], "label": "High jump"}]}, "RodH5r1FjzU": {"duration": 61.39, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=RodH5r1FjzU", "annotations": []}, "7NMds32-lMc": {"duration": 237.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7NMds32-lMc", "annotations": [{"segment": [4.51474193, 225.71151002652107], "label": "Clean and jerk"}, {"segment": [90.76744966999999, 199.53686874000002], "label": "Clean and jerk"}]}, "5I0K3y27EUM": {"duration": 15.024, "subset": "validation", "resolution": "270x360", "url": "https://www.youtube.com/watch?v=5I0K3y27EUM", "annotations": [{"segment": [5.887829014738347, 15.023310657596372], "label": "Fun sliding down"}]}, "cY0E0B1b3KA": {"duration": 60.024, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=cY0E0B1b3KA", "annotations": []}, "8VlAzVqm2HQ": {"duration": 119.305, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8VlAzVqm2HQ", "annotations": []}, "G98dfNXqu4Q": {"duration": 26.912, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=G98dfNXqu4Q", "annotations": [{"segment": [0.01, 24.900179773326354], "label": "Calf roping"}]}, "IL8__uSFpqg": {"duration": 115.4, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=IL8__uSFpqg", "annotations": []}, "gS4dNLjHouo": {"duration": 186.782, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gS4dNLjHouo", "annotations": [{"segment": [0.01, 186.78131519274376], "label": "Drum corps"}]}, "EVtM8DKW4bc": {"duration": 126.294, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EVtM8DKW4bc", "annotations": [{"segment": [0.01, 126.29333333333334], "label": "Riding bumper cars"}]}, "AZrI6X2XAUU": {"duration": 107.602, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=AZrI6X2XAUU", "annotations": [{"segment": [16.90159659736108, 32.63179778722012], "label": "Putting in contact lenses"}, {"segment": [69.78184621171843, 75.63883591622854], "label": "Putting in contact lenses"}]}, "RclRzty0YVE": {"duration": 100.473, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RclRzty0YVE", "annotations": [{"segment": [4.536151090342679, 80.41538006230529], "label": "Beach soccer"}]}, "1Uur858vdY8": {"duration": 30.81, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=1Uur858vdY8", "annotations": [{"segment": [3.8801551599999993, 23.91843124], "label": "Long jump"}]}, "H6O2o-WLyK4": {"duration": 46.12, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H6O2o-WLyK4", "annotations": []}, "Sh8r9g_lp7U": {"duration": 69.428, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Sh8r9g_lp7U", "annotations": [{"segment": [34.226150783151326, 56.863066965678634], "label": "Laying tile"}]}, "-G-sh-NhYtk": {"duration": 20.83, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-G-sh-NhYtk", "annotations": [{"segment": [0, 20.828253], "label": "Shoveling snow"}]}, "_15t4WTR19s": {"duration": 238.28, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_15t4WTR19s", "annotations": [{"segment": [27.872971918876754, 223.74583775351016], "label": "Preparing salad"}]}, "hIjOvSJf9c0": {"duration": 170.528, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hIjOvSJf9c0", "annotations": [{"segment": [138.4374193829948, 154.0845599703881], "label": "Hanging wallpaper"}]}, "87hjft6OBiU": {"duration": 71.28999999999999, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=87hjft6OBiU", "annotations": [{"segment": [22.797938312090306, 65.50236910156679], "label": "Snatch"}]}, "1XQUDJhMcj8": {"duration": 121.0, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1XQUDJhMcj8", "annotations": [{"segment": [7.3595616224648985, 20.56903120124805], "label": "Running a marathon"}, {"segment": [34.91074102964118, 45.478316692667704], "label": "Running a marathon"}, {"segment": [56.04589235569423, 69.4440686427457], "label": "Running a marathon"}, {"segment": [80.20035101404056, 101.52420904836194], "label": "Running a marathon"}]}, "dxgeEdlt7do": {"duration": 130.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dxgeEdlt7do", "annotations": [{"segment": [1.4225389396528243, 106.08076092839632], "label": "Doing motocross"}]}, "qaB0igbuKuQ": {"duration": 160.868, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qaB0igbuKuQ", "annotations": [{"segment": [73.45615132605305, 76.46459438377535], "label": "Clipping cat claws"}, {"segment": [89.75188455538222, 96.01947425897036], "label": "Clipping cat claws"}, {"segment": [97.02228861154447, 127.85882995319812], "label": "Clipping cat claws"}]}, "KXMZHfCwTO0": {"duration": 54.823, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=KXMZHfCwTO0", "annotations": []}, "yjQ-0AGqFS4": {"duration": 202.339, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yjQ-0AGqFS4", "annotations": [{"segment": [0.2, 201.9654290171607], "label": "Playing drums"}]}, "F54PZypvzCc": {"duration": 80.062, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F54PZypvzCc", "annotations": [{"segment": [0, 6.744728555382216], "label": "Throwing darts"}, {"segment": [18.36064995631825, 26.479304698907956], "label": "Throwing darts"}, {"segment": [34.597959441497665, 42.34190704212169], "label": "Throwing darts"}, {"segment": [52.583902255850234, 59.078826049922], "label": "Throwing darts"}, {"segment": [67.44728555382216, 73.31769744461779], "label": "Throwing darts"}]}, "ZT_gv2W65Zc": {"duration": 28.79, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ZT_gv2W65Zc", "annotations": [{"segment": [4.668072390155257, 28.43983407239825], "label": "Brushing hair"}]}, "sjHCZWdopsQ": {"duration": 193.07, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=sjHCZWdopsQ", "annotations": [{"segment": [26.45486246, 142.88367365], "label": "Playing accordion"}]}, "YbEX3klOS3Q": {"duration": 68.99, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=YbEX3klOS3Q", "annotations": [{"segment": [6.49957636657905, 18.09161381891839], "label": "Javelin throw"}, {"segment": [23.03457061642618, 30.663916977796898], "label": "Javelin throw"}, {"segment": [32.59811746377821, 40.01255266003989], "label": "Javelin throw"}, {"segment": [53.01467814913646, 61.933491501161384], "label": "Javelin throw"}]}, "5T7vTuJmYeQ": {"duration": 11.355, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=5T7vTuJmYeQ", "annotations": [{"segment": [0.20000000000000004, 7.994283931357254], "label": "Using the monkey bar"}]}, "ayLeSjJz53I": {"duration": 146.01, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ayLeSjJz53I", "annotations": [{"segment": [5.238930384426261, 140.76778163371432], "label": "Ballet"}]}, "n04zQqQI3wQ": {"duration": 68.128, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=n04zQqQI3wQ", "annotations": [{"segment": [12.827696261682243, 36.055358255451715], "label": "Snow tubing"}]}, "LBC8Fa1oCGI": {"duration": 16.6, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LBC8Fa1oCGI", "annotations": [{"segment": [0, 16.555], "label": "Windsurfing"}]}, "J8B2dX3FLTo": {"duration": 204.544, "subset": "validation", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=J8B2dX3FLTo", "annotations": [{"segment": [60.75896514618973, 125.01713770917574], "label": "Playing drums"}]}, "0VklWRtiNyw": {"duration": 179.513, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0VklWRtiNyw", "annotations": []}, "KlxUgvLHP7M": {"duration": 168.995, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KlxUgvLHP7M", "annotations": [{"segment": [27.678656860560135, 158.6909650826833], "label": "Rope skipping"}]}, "PxEu0AAH9LI": {"duration": 233.11, "subset": "training", "resolution": "492x360", "url": "https://www.youtube.com/watch?v=PxEu0AAH9LI", "annotations": [{"segment": [3.6365854089945913, 227.65024492667706], "label": "Belly dance"}]}, "ljxNDSS_AcA": {"duration": 179.28, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ljxNDSS_AcA", "annotations": [{"segment": [93.36802374726989, 153.4701947223089], "label": "Painting"}, {"segment": [24.0408683900156, 85.62101998], "label": "Painting"}]}, "c0c9G-qotQE": {"duration": 53.66, "subset": "testing", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=c0c9G-qotQE", "annotations": []}, "l0aksT_7wKo": {"duration": 30.953, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l0aksT_7wKo", "annotations": [{"segment": [10.817347893915757, 19.84790171606864], "label": "Doing crunches"}]}, "Gda1xYAMb10": {"duration": 140.16, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Gda1xYAMb10", "annotations": []}, "TlwJfzlZZsE": {"duration": 120.51, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TlwJfzlZZsE", "annotations": [{"segment": [22.551223429999997, 94.75177375], "label": "Discus throw"}]}, "9fQ2wWFJJGo": {"duration": 213.415, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9fQ2wWFJJGo", "annotations": [{"segment": [55.832785046728965, 106.3581308411215], "label": "Building sandcastles"}]}, "UYhKDweME3A": {"duration": 91.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UYhKDweME3A", "annotations": [{"segment": [16.479989281203903, 70.64899752724803], "label": "Preparing pasta"}]}, "weqZ0sgHXNc": {"duration": 193.608, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=weqZ0sgHXNc", "annotations": []}, "rt3t2n6K_ww": {"duration": 210.838, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rt3t2n6K_ww", "annotations": [{"segment": [0.2, 210.58100000000002], "label": "Playing drums"}]}, "XLsuG0cNl4o": {"duration": 54.17, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=XLsuG0cNl4o", "annotations": [{"segment": [0.01, 13.810521060842433], "label": "Shaving"}, {"segment": [24.369023400936037, 45.232624024961], "label": "Shaving"}]}, "F8EK6W0XAKY": {"duration": 160.427, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F8EK6W0XAKY", "annotations": []}, "afORRx0QZgI": {"duration": 131.96, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=afORRx0QZgI", "annotations": [{"segment": [20.99815636707101, 123.3127014024961], "label": "Belly dance"}]}, "y21yGd1uMU4": {"duration": 217.873, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y21yGd1uMU4", "annotations": []}, "TdNiQsZNJTU": {"duration": 198.23, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=TdNiQsZNJTU", "annotations": [{"segment": [0.01, 198.22875283446712], "label": "Playing water polo"}]}, "-GRvxWH4axc": {"duration": 21.967, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-GRvxWH4axc", "annotations": [{"segment": [0.6151682242990654, 20.71066355140187], "label": "Hitting a pinata"}]}, "3Ay2SFDtYVo": {"duration": 67.45, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=3Ay2SFDtYVo", "annotations": []}, "Yc9pZ8Vy-3s": {"duration": 218.71, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Yc9pZ8Vy-3s", "annotations": [{"segment": [8.871181828393135, 58.00388118564742], "label": "Kayaking"}, {"segment": [98.60659801560062, 209.83757017160687], "label": "Kayaking"}]}, "M30w7UiD8Ac": {"duration": 80.899, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=M30w7UiD8Ac", "annotations": []}, "gR9qUDlytF8": {"duration": 16.045, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gR9qUDlytF8", "annotations": [{"segment": [0.01, 16.001], "label": "Camel ride"}]}, "Us795clHJmw": {"duration": 30.953, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Us795clHJmw", "annotations": [{"segment": [0.01, 30.95219954648526], "label": "Hitting a pinata"}]}, "KezLNkSQF1Y": {"duration": 210.025, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KezLNkSQF1Y", "annotations": []}, "HwYZEZYgeBI": {"duration": 72.53999999999999, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=HwYZEZYgeBI", "annotations": [{"segment": [25.108165366614667, 64.01451170046802], "label": "Polishing forniture"}]}, "tZswexUR6Q0": {"duration": 192.818, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tZswexUR6Q0", "annotations": [{"segment": [18.945308892355694, 192.761], "label": "Wakeboarding"}]}, "7gRbYNWSUjk": {"duration": 100.613, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7gRbYNWSUjk", "annotations": [{"segment": [0.01, 90.09562934898348], "label": "Capoeira"}]}, "wriCGl_odCY": {"duration": 69.684, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wriCGl_odCY", "annotations": []}, "VRiUhsafjRM": {"duration": 198.067, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=VRiUhsafjRM", "annotations": [{"segment": [35.21390639625585, 80.62945553822152], "label": "Baking cookies"}]}, "t7JiXZlKhpA": {"duration": 161.89, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=t7JiXZlKhpA", "annotations": []}, "-KGo1Xq3wQ8": {"duration": 76.51, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-KGo1Xq3wQ8", "annotations": []}, "YDwHdB6MBrE": {"duration": 94.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YDwHdB6MBrE", "annotations": [{"segment": [18.54967356, 57.529316], "label": "Playing field hockey"}, {"segment": [62.681532319999995, 83.1514762698908], "label": "Playing field hockey"}]}, "laVTxf8Vwvg": {"duration": 201.76, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=laVTxf8Vwvg", "annotations": []}, "mua8hNPuQHw": {"duration": 109.343, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mua8hNPuQHw", "annotations": [{"segment": [0.01, 107.66659278314656], "label": "Doing fencing"}]}, "ZjvmWr5LoFw": {"duration": 226.233, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZjvmWr5LoFw", "annotations": [{"segment": [10.411613939387507, 197.82066484836264], "label": "Riding bumper cars"}]}, "1494UwmvAJM": {"duration": 42.075, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1494UwmvAJM", "annotations": []}, "p0O-EsMFcL4": {"duration": 24.614, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p0O-EsMFcL4", "annotations": [{"segment": [0.01, 23.729995815912634], "label": "River tubing"}]}, "_032TQam_mY": {"duration": 69.08, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_032TQam_mY", "annotations": [{"segment": [3.0912852499999994, 65.50947056240248], "label": "Grooming horse"}]}, "Wms7wsENt0A": {"duration": 106.464, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Wms7wsENt0A", "annotations": []}, "onW5hJXnI5s": {"duration": 204.57, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=onW5hJXnI5s", "annotations": [{"segment": [11.488987151326054, 27.765052282371297], "label": "Horseback riding"}, {"segment": [33.19040732605305, 64.14684492823713], "label": "Horseback riding"}, {"segment": [86.48654216692668, 93.50758987051482], "label": "Horseback riding"}, {"segment": [103.72002289391575, 187.97259533697348], "label": "Horseback riding"}]}, "A5wdaSwVeTo": {"duration": 53.15, "subset": "testing", "resolution": "318x240", "url": "https://www.youtube.com/watch?v=A5wdaSwVeTo", "annotations": []}, "C8IEqXTBod0": {"duration": 75.09, "subset": "training", "resolution": "316x240", "url": "https://www.youtube.com/watch?v=C8IEqXTBod0", "annotations": [{"segment": [22.834469578783153, 29.50916068642746], "label": "Shoveling snow"}, {"segment": [61.243218408736354, 72.13350390015601], "label": "Shoveling snow"}]}, "JHKAcpYlxkQ": {"duration": 17.35, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JHKAcpYlxkQ", "annotations": []}, "kzvFQv5UCx4": {"duration": 217.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kzvFQv5UCx4", "annotations": [{"segment": [8.489209555382216, 59.4244668876755], "label": "Getting a tattoo"}, {"segment": [59.64577802000001, 205.0993028580343], "label": "Getting a tattoo"}]}, "x5MPJnwJrfE": {"duration": 131.1, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=x5MPJnwJrfE", "annotations": [{"segment": [0, 109.21572269578783], "label": "Getting a tattoo"}]}, "QJVtvcYcwac": {"duration": 206.426, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QJVtvcYcwac", "annotations": [{"segment": [41.864662287171754, 168.42475538377533], "label": "Braiding hair"}]}, "u1TCzaAQkQ0": {"duration": 67.97, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u1TCzaAQkQ0", "annotations": []}, "pne20gW35tA": {"duration": 52.55, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=pne20gW35tA", "annotations": []}, "vpWkDgOwyvM": {"duration": 151.07, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=vpWkDgOwyvM", "annotations": []}, "0WJX2A6PSnA": {"duration": 22.036, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0WJX2A6PSnA", "annotations": [{"segment": [6.732194227769111, 17.2844375975039], "label": "Cleaning sink"}]}, "MnZ9L54twws": {"duration": 136.626, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MnZ9L54twws", "annotations": [{"segment": [0.01, 124.24084711388456], "label": "Using the rowing machine"}]}, "jzDXtd0__DM": {"duration": 180.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jzDXtd0__DM", "annotations": [{"segment": [144.23991311761316, 152.40975194654044], "label": "Bungee jumping"}]}, "FRoL-CpfbJ4": {"duration": 209.54, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FRoL-CpfbJ4", "annotations": [{"segment": [28.81958144, 123.40413184], "label": "Sailing"}, {"segment": [148.19224064, 171.83209216], "label": "Sailing"}, {"segment": [36.61174652700394, 193.84612223672622], "label": "Sailing"}]}, "QTD0vCdofTE": {"duration": 32.18, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QTD0vCdofTE", "annotations": [{"segment": [1.5541107644305772, 14.006571860000001], "label": "Hopscotch"}, {"segment": [15.795247420000003, 31.232613104524177], "label": "Hopscotch"}]}, "lc-piYwzqsA": {"duration": 48.6, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lc-piYwzqsA", "annotations": [{"segment": [9.552966315444618, 42.154359436962515], "label": "Using the pommel horse"}]}, "moUL_qLnNDM": {"duration": 4.83, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=moUL_qLnNDM", "annotations": [{"segment": [0, 4.829749], "label": "Walking the dog"}]}, "_D9oML1HvVw": {"duration": 133.237, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_D9oML1HvVw", "annotations": [{"segment": [11.016384495319812, 20.16206218954758], "label": "Slacklining"}, {"segment": [23.279906858034323, 35.751285531981274], "label": "Slacklining"}, {"segment": [37.41413602184087, 132.19661394383775], "label": "Slacklining"}]}, "r43Wv6V-HLo": {"duration": 120.535, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r43Wv6V-HLo", "annotations": []}, "mj0lRelI0xw": {"duration": 131.193, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mj0lRelI0xw", "annotations": [{"segment": [0.2, 119.70231669266771], "label": "Drum corps"}]}, "WUcjqCvYfQ0": {"duration": 150.54, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WUcjqCvYfQ0", "annotations": [{"segment": [92.24842008, 107.75730889235571], "label": "Shaving"}]}, "3D4Cdc3pDrs": {"duration": 206.658, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3D4Cdc3pDrs", "annotations": []}, "rInf05YA7ww": {"duration": 66.177, "subset": "training", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=rInf05YA7ww", "annotations": [{"segment": [0.01, 66.121], "label": "Futsal"}]}, "anol2rdzkdM": {"duration": 129.73, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=anol2rdzkdM", "annotations": []}, "vwpaEsh0-1U": {"duration": 205.683, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vwpaEsh0-1U", "annotations": [{"segment": [91.7707600440072, 133.80561866556295], "label": "Futsal"}]}, "ElmBEYKRpZA": {"duration": 121.72, "subset": "testing", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=ElmBEYKRpZA", "annotations": []}, "5ytocb1ypRU": {"duration": 78.763, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5ytocb1ypRU", "annotations": [{"segment": [5.899357254290172, 74.23357878315133], "label": "Trimming branches or hedges"}]}, "VjMMwpgOCPo": {"duration": 46.88, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=VjMMwpgOCPo", "annotations": []}, "z4B1JZQnHl4": {"duration": 231.225, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=z4B1JZQnHl4", "annotations": [{"segment": [44.398210696824165, 54.842922446529236], "label": "Curling"}, {"segment": [63.84698429972327, 75.37218347181162], "label": "Curling"}, {"segment": [87.25754511802775, 103.46485645377699], "label": "Curling"}, {"segment": [136.23964159940328, 147.0445158232361], "label": "Curling"}, {"segment": [154.24776530579135, 156.76890262468567], "label": "Curling"}, {"segment": [167.5737768485185, 176.57783764020127], "label": "Curling"}, {"segment": [196.0266111261826, 206.11116034113587], "label": "Curling"}, {"segment": [224.11928393926672, 231.02430700000002], "label": "Curling"}]}, "EU-IdUKbLSg": {"duration": 79.3, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=EU-IdUKbLSg", "annotations": [{"segment": [5.690518914196568, 11.752158627145086], "label": "Playing water polo"}, {"segment": [14.96853888299532, 68.78105470202807], "label": "Playing water polo"}]}, "tbW6ErTogio": {"duration": 235.265, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tbW6ErTogio", "annotations": []}, "Jm1y_W7RLSI": {"duration": 163.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jm1y_W7RLSI", "annotations": [{"segment": [59.52118366146645, 72.54942557878314], "label": "Washing face"}]}, "zLVNSgR4IoA": {"duration": 60.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zLVNSgR4IoA", "annotations": [{"segment": [22.616013289999998, 53.9811232449298], "label": "Using parallel bars"}, {"segment": [0.69732758, 22.58249609984399], "label": "Using parallel bars"}]}, "vB00ah6E5E4": {"duration": 37.76, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vB00ah6E5E4", "annotations": [{"segment": [0.3828575546021841, 37.755645], "label": "Playing violin"}]}, "rczR9C00KOM": {"duration": 212.788, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rczR9C00KOM", "annotations": [{"segment": [0, 83.820342364715], "label": "Plastering"}, {"segment": [109.04943551211434, 137.59814617890837], "label": "Plastering"}, {"segment": [143.2414959618793, 145.2332664735161], "label": "Plastering"}, {"segment": [175.44178508112324, 183.74082884087366], "label": "Plastering"}]}, "ObrXN_FjVD8": {"duration": 62.46, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ObrXN_FjVD8", "annotations": []}, "NK5FWZ2BOQs": {"duration": 92.787, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NK5FWZ2BOQs", "annotations": [{"segment": [13.290627145085804, 83.95132761310451], "label": "Disc dog"}]}, "H5F01WIyJWI": {"duration": 51.735, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H5F01WIyJWI", "annotations": [{"segment": [0.2, 51.7340589569161], "label": "Playing congas"}]}, "PdxtuqiUsZ4": {"duration": 125.41, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PdxtuqiUsZ4", "annotations": []}, "z_-uG3dJKsk": {"duration": 75.44, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=z_-uG3dJKsk", "annotations": []}, "3mymOHc5-Gs": {"duration": 83.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3mymOHc5-Gs", "annotations": [{"segment": [30.218905409277593, 55.563793817058794], "label": "Cleaning shoes"}]}, "hyk4wNZwwxc": {"duration": 224.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hyk4wNZwwxc", "annotations": [{"segment": [0, 224.30448979591836], "label": "Doing step aerobics"}]}, "9voJIuym_f8": {"duration": 59.61, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=9voJIuym_f8", "annotations": [{"segment": [25.924878477379096, 58.81881030889236], "label": "Bathing dog"}]}, "09ocXmGF-T4": {"duration": 47.531, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=09ocXmGF-T4", "annotations": []}, "jkay2K3RA1M": {"duration": 10.68, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jkay2K3RA1M", "annotations": [{"segment": [2.8827519359277773, 10.281259794609472], "label": "Doing nails"}]}, "dx5VK79QWlg": {"duration": 24.103, "subset": "training", "resolution": "240x176", "url": "https://www.youtube.com/watch?v=dx5VK79QWlg", "annotations": [{"segment": [0.01, 23.85453028999565], "label": "Knitting"}]}, "IlN_XipVf44": {"duration": 65.713, "subset": "validation", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=IlN_XipVf44", "annotations": [{"segment": [0.2, 56.90986498442367], "label": "Painting furniture"}]}, "yj2WJBqmEHk": {"duration": 18.878, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yj2WJBqmEHk", "annotations": [{"segment": [0.2, 18.877277], "label": "Snow tubing"}]}, "R8RrOgz9hj8": {"duration": 186.851, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=R8RrOgz9hj8", "annotations": [{"segment": [15.443670826833072, 160.28166926677068], "label": "Rafting"}]}, "YnG1Uhh2h5E": {"duration": 176.797, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=YnG1Uhh2h5E", "annotations": [{"segment": [6.341720748829954, 169.57209828393138], "label": "Playing pool"}]}, "SrKGO2Xu670": {"duration": 209.584, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SrKGO2Xu670", "annotations": [{"segment": [0.01, 206.77580722803546], "label": "Making an omelette"}]}, "yGPzIGJ_2B8": {"duration": 100.125, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yGPzIGJ_2B8", "annotations": [{"segment": [29.347063962558504, 35.122815912636504], "label": "Cutting the grass"}, {"segment": [50.10855070202808, 62.596663026521064], "label": "Cutting the grass"}, {"segment": [68.84071918876757, 72.7432542901716], "label": "Cutting the grass"}]}, "ICBrXUuwvgg": {"duration": 193.93, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ICBrXUuwvgg", "annotations": [{"segment": [97.72276269009946, 167.61117811243062], "label": "Rock climbing"}]}, "7KT-z20qxI4": {"duration": 228.462, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7KT-z20qxI4", "annotations": [{"segment": [49.897907535349034, 63.44162529494377], "label": "Playing ice hockey"}, {"segment": [85.18285643534585, 188.54280710920435], "label": "Playing ice hockey"}]}, "2IRQ5fPwHV8": {"duration": 164.84, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2IRQ5fPwHV8", "annotations": [{"segment": [0, 0], "label": "Getting a haircut"}]}, "btMVEIvr0oc": {"duration": 34.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=btMVEIvr0oc", "annotations": [{"segment": [2.2861154446177845, 31.252005959999998], "label": "Dodgeball"}]}, "LB1A7BobPwg": {"duration": 218.48, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LB1A7BobPwg", "annotations": [{"segment": [118.95213275741914, 124.74636271981491], "label": "Drinking beer"}]}, "O_bTFogdpc4": {"duration": 209.79, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O_bTFogdpc4", "annotations": []}, "lV7tpgelpAY": {"duration": 101.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lV7tpgelpAY", "annotations": [{"segment": [7.28853003916075, 29.31256646184214], "label": "Paintball"}, {"segment": [79.69849023400937, 87.14546645865835], "label": "Paintball"}]}, "MmipoQF8EJs": {"duration": 76.208, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=MmipoQF8EJs", "annotations": [{"segment": [0.01, 76.161], "label": "Camel ride"}]}, "bWquUXC2Te8": {"duration": 62.76, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=bWquUXC2Te8", "annotations": [{"segment": [43.52260782, 46.705295549999995], "label": "Shaving"}, {"segment": [43.52260782, 46.705295549999995], "label": "Shaving"}]}, "9cD-Z6Aw7bE": {"duration": 149.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9cD-Z6Aw7bE", "annotations": [{"segment": [71.31774190695519, 79.50174507660579], "label": "Hammer throw"}, {"segment": [87.45191958426636, 95.86975120124805], "label": "Hammer throw"}, {"segment": [109.43181356630265, 113.64072947269892], "label": "Hammer throw"}, {"segment": [124.39684790015602, 130.94405042121687], "label": "Hammer throw"}]}, "i5K-DXt9djA": {"duration": 164.86, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=i5K-DXt9djA", "annotations": [{"segment": [29.06292315366084, 150.7156894851794], "label": "Fixing bicycle"}]}, "W80KSM1I47Q": {"duration": 214.205, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W80KSM1I47Q", "annotations": [{"segment": [12.030182431787068, 108.27164188608361], "label": "Tug of war"}]}, "oD0RWEO8D1g": {"duration": 179.7, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oD0RWEO8D1g", "annotations": [{"segment": [83.41179166578365, 162.34509787300175], "label": "Playing ice hockey"}]}, "uptOE6bfBgA": {"duration": 84.684, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=uptOE6bfBgA", "annotations": [{"segment": [0.2, 83.36208529797192], "label": "Blowing leaves"}]}, "Tg1G0EtWw9s": {"duration": 231.387, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tg1G0EtWw9s", "annotations": [{"segment": [11.31705194527774, 201.97693054808147], "label": "Playing blackjack"}]}, "zTAJDgpAQ4Q": {"duration": 173.268, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zTAJDgpAQ4Q", "annotations": []}, "J27dBmSpRW4": {"duration": 151.42000000000002, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=J27dBmSpRW4", "annotations": [{"segment": [98.71647113884555, 124.93065366614664], "label": "Smoking hookah"}, {"segment": [139.10048205928237, 151.181], "label": "Smoking hookah"}]}, "NtUIIxVvDsA": {"duration": 167.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NtUIIxVvDsA", "annotations": [{"segment": [29.46814740405616, 39.899350024960995], "label": "Rafting"}, {"segment": [63.369555921996884, 82.4065007051482], "label": "Rafting"}, {"segment": [116.0471291575663, 119.17648994383777], "label": "Rafting"}, {"segment": [0.2, 166.960022], "label": "Rafting"}, {"segment": [135.08407394071764, 160.64052036193448], "label": "Rafting"}]}, "4mRdgV8t4KY": {"duration": 189.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4mRdgV8t4KY", "annotations": [{"segment": [12.99166926677067, 172.73014820592823], "label": "Horseback riding"}]}, "MZz0OTfvjrY": {"duration": 102.424, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=MZz0OTfvjrY", "annotations": [{"segment": [22.6929433278473, 27.478036785791225], "label": "Arm wrestling"}, {"segment": [52.839032112894024, 87.45120812535508], "label": "Arm wrestling"}]}, "U7vH9pEfGVw": {"duration": 164.096, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=U7vH9pEfGVw", "annotations": [{"segment": [28.662390015600625, 95.2116879875195], "label": "Playing blackjack"}, {"segment": [101.61032293291733, 110.05652106084244], "label": "Playing blackjack"}, {"segment": [133.6034976599064, 146.40076755070203], "label": "Playing blackjack"}]}, "BbFCkDdq3S0": {"duration": 87.96000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BbFCkDdq3S0", "annotations": [{"segment": [2.880407176287052, 87.64667550702028], "label": "Triple jump"}]}, "2FKWKDEg5KE": {"duration": 204.475, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2FKWKDEg5KE", "annotations": [{"segment": [12.759728457165497, 129.5112438402298], "label": "Fixing the roof"}, {"segment": [140.35701302882046, 196.4998177347894], "label": "Fixing the roof"}]}, "_QdPfYK9s6o": {"duration": 173.616, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_QdPfYK9s6o", "annotations": [{"segment": [0.20000000000000004, 169.51904212168486], "label": "Surfing"}]}, "XkkOVpXegS8": {"duration": 172.084, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=XkkOVpXegS8", "annotations": [{"segment": [4.663930721149123, 165.75715587068183], "label": "Playing ice hockey"}]}, "vMy5o7pvy4c": {"duration": 6.94, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vMy5o7pvy4c", "annotations": [{"segment": [2.97339744, 6.848333], "label": "Hammer throw"}, {"segment": [0, 2.735059669266771], "label": "Hammer throw"}]}, "r0P0egQt-jU": {"duration": 185.273, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r0P0egQt-jU", "annotations": [{"segment": [10.389066056329073, 99.27329787158892], "label": "Surfing"}]}, "H9PaZiWLgXU": {"duration": 186.57, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=H9PaZiWLgXU", "annotations": []}, "Ydep68S6ViE": {"duration": 115.28999999999999, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Ydep68S6ViE", "annotations": [{"segment": [4.586289757712757, 9.981924766786591], "label": "Hammer throw"}, {"segment": [13.938723773440733, 17.895522780094876], "label": "Hammer throw"}, {"segment": [20.413485784329332, 30.66519230156961], "label": "Hammer throw"}, {"segment": [36.60039081155083, 42.53558932153204], "label": "Hammer throw"}, {"segment": [48.650642331815725, 60.16133035117323], "label": "Hammer throw"}, {"segment": [75.98852526131046, 81.02445119578783], "label": "Hammer throw"}]}, "z3-tII3XcUs": {"duration": 6.06, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=z3-tII3XcUs", "annotations": [{"segment": [0.01, 4.655622464898596], "label": "Playing guitarra"}]}, "NAfDr0se-9I": {"duration": 125.69, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=NAfDr0se-9I", "annotations": []}, "8kaQq1mR2ys": {"duration": 207.122, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8kaQq1mR2ys", "annotations": []}, "mWNTl9Bh7kI": {"duration": 33.37, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mWNTl9Bh7kI", "annotations": [{"segment": [0, 27.745164496099846], "label": "Playing polo"}]}, "mdwbRGlPn3A": {"duration": 29.211, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mdwbRGlPn3A", "annotations": [{"segment": [2.00510272074883, 27.251168795631827], "label": "Powerbocking"}]}, "vYxBAbbvSxc": {"duration": 87.958, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vYxBAbbvSxc", "annotations": [{"segment": [4.847980582798754, 80.99140114354643], "label": "Wakeboarding"}]}, "cgfJnog1WLE": {"duration": 184.25, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cgfJnog1WLE", "annotations": [{"segment": [15.97079, 117.15720499999999], "label": "Snatch"}, {"segment": [126.36970500000001, 130.70326500000002], "label": "Snatch"}]}, "eHC1rys4tT4": {"duration": 84.939, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=eHC1rys4tT4", "annotations": []}, "KovhYgNTBI0": {"duration": 16.742, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KovhYgNTBI0", "annotations": []}, "55FFh89yqVw": {"duration": 211.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=55FFh89yqVw", "annotations": [{"segment": [20.13036037087742, 39.60070892631624], "label": "Washing face"}, {"segment": [53.13095114280761, 65.01116382070249], "label": "Washing face"}, {"segment": [77.55138831403598, 86.4615478224571], "label": "Washing face"}, {"segment": [92.07164825368525, 108.90194954736965], "label": "Washing face"}]}, "OD5rJOPicoM": {"duration": 30.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OD5rJOPicoM", "annotations": [{"segment": [1.4052001926677067, 23.55496594149766], "label": "Windsurfing"}]}, "f-JfdEfNQlE": {"duration": 112.548, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f-JfdEfNQlE", "annotations": [{"segment": [51.00614416957631, 52.93753000732274], "label": "Fun sliding down"}]}, "N9xp9VbpklQ": {"duration": 23.778, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N9xp9VbpklQ", "annotations": [{"segment": [0.01, 21.98076391935497], "label": "Croquet"}]}, "ma0HmFqdFqg": {"duration": 107.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ma0HmFqdFqg", "annotations": [{"segment": [18.33325675, 89.78632995319813], "label": "Cleaning windows"}]}, "DLfOiuxMstg": {"duration": 56.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DLfOiuxMstg", "annotations": [{"segment": [4.819661346233523, 46.15142770137371], "label": "Wrapping presents"}]}, "n18TxTWnbFI": {"duration": 54.71, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=n18TxTWnbFI", "annotations": [{"segment": [7.336296411856473, 53.913248049922], "label": "Springboard diving"}]}, "nhFbFMZrHhc": {"duration": 142.385, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nhFbFMZrHhc", "annotations": []}, "tLxv2E7gj5c": {"duration": 30.72, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tLxv2E7gj5c", "annotations": [{"segment": [0, 14.312447999999998], "label": "Playing racquetball"}, {"segment": [16.0103424, 29.186395305772233], "label": "Playing racquetball"}]}, "fIG7aXzSyMU": {"duration": 91.487, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fIG7aXzSyMU", "annotations": [{"segment": [2.6404094695787834, 3.924932995319813], "label": "Raking leaves"}, {"segment": [32.755349906396255, 34.75349761310453], "label": "Raking leaves"}, {"segment": [50.16777992199688, 57.304021731669266], "label": "Raking leaves"}]}, "LMePtW22Wjg": {"duration": 100.357, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LMePtW22Wjg", "annotations": []}, "44BfVJnhBgc": {"duration": 169.924, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=44BfVJnhBgc", "annotations": [{"segment": [83.71342781804879, 160.17733591773725], "label": "Running a marathon"}]}, "cDnJjAQtf-g": {"duration": 87.45, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=cDnJjAQtf-g", "annotations": [{"segment": [11.323006215486716, 36.15176683257806], "label": "Playing water polo"}, {"segment": [60.025574290171605, 72.84921970670827], "label": "Playing water polo"}, {"segment": [59.34346549141966, 72.84921970670827], "label": "Playing water polo"}]}, "gHHlX92NYBY": {"duration": 202.9, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gHHlX92NYBY", "annotations": []}, "93bg_XU3OTg": {"duration": 174.311, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=93bg_XU3OTg", "annotations": [{"segment": [34.6658775351014, 52.338677847113885], "label": "Waxing skis"}, {"segment": [84.96538611544462, 136.0805624024961], "label": "Waxing skis"}]}, "Rzkne_2st5k": {"duration": 25.22, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Rzkne_2st5k", "annotations": []}, "YEwdGCEgsNo": {"duration": 79.924, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YEwdGCEgsNo", "annotations": []}, "tPHfkxAwyp8": {"duration": 230.064, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tPHfkxAwyp8", "annotations": [{"segment": [13.273995319812792, 218.87390015600624], "label": "Capoeira"}]}, "AfCRHmcLo9Y": {"duration": 219.615, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AfCRHmcLo9Y", "annotations": []}, "WOUkPgHtt4E": {"duration": 47.3, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WOUkPgHtt4E", "annotations": []}, "3TwqeiVbpS8": {"duration": 151.395, "subset": "training", "resolution": "392x240", "url": "https://www.youtube.com/watch?v=3TwqeiVbpS8", "annotations": [{"segment": [11.615239755683971, 58.76973423319071], "label": "Waterskiing"}, {"segment": [66.54997255094771, 97.67092582197576], "label": "Waterskiing"}]}, "-0r0HEwAYiQ": {"duration": 43.68, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=-0r0HEwAYiQ", "annotations": [{"segment": [27.52112112, 39.94781664], "label": "Vacuuming floor"}]}, "Hv6LHoTM9mU": {"duration": 7.895, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Hv6LHoTM9mU", "annotations": []}, "Ynvd8OuXKGw": {"duration": 158.34, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ynvd8OuXKGw", "annotations": []}, "ZYwfvPJv4Rk": {"duration": 61.58, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZYwfvPJv4Rk", "annotations": [{"segment": [0.2, 61.37922902494331], "label": "Cutting the grass"}]}, "qZaWIb28kfU": {"duration": 88.63, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qZaWIb28kfU", "annotations": [{"segment": [6.78985374, 62.17068414], "label": "Starting a campfire"}]}, "hchuVbHYK_k": {"duration": 46.557, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hchuVbHYK_k", "annotations": [{"segment": [2.103789391575663, 46.300999999999995], "label": "Canoeing"}]}, "Zk4EvGw7cAw": {"duration": 23.777, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zk4EvGw7cAw", "annotations": [{"segment": [9.000871395990485, 20.79741189443285], "label": "Doing a powerbomb"}]}, "CocYQOgnegg": {"duration": 157.223, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CocYQOgnegg", "annotations": [{"segment": [0.01, 154.97409048361936], "label": "Blowing leaves"}]}, "zHzKPtrgv1k": {"duration": 101.936, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zHzKPtrgv1k", "annotations": [{"segment": [0.01, 101.93560090702948], "label": "Riding bumper cars"}]}, "tN1_lOJlUlc": {"duration": 182.137, "subset": "validation", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=tN1_lOJlUlc", "annotations": [{"segment": [13.946811346093298, 176.7622351200234], "label": "Playing congas"}]}, "4I5h_JoKdx0": {"duration": 150.907, "subset": "testing", "resolution": "316x240", "url": "https://www.youtube.com/watch?v=4I5h_JoKdx0", "annotations": []}, "xS5JDBeMxOs": {"duration": 96.15, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xS5JDBeMxOs", "annotations": []}, "PWw-DAcSszM": {"duration": 159.73, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=PWw-DAcSszM", "annotations": [{"segment": [4.485381896908529, 159.729432], "label": "Tai chi"}]}, "TgDzZIop_uw": {"duration": 50.248, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TgDzZIop_uw", "annotations": [{"segment": [0.19999999999999998, 42.33057304212168], "label": "Changing car wheel"}]}, "KkBMOQOGTdk": {"duration": 24.056, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KkBMOQOGTdk", "annotations": [{"segment": [3.7153376396255853, 15.6869811450858], "label": "Raking leaves"}]}, "ESlUzrtqC98": {"duration": 107.718, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ESlUzrtqC98", "annotations": [{"segment": [27.1387437541437, 31.494345013828497], "label": "Beer pong"}, {"segment": [41.88077878692301, 47.7440874593521], "label": "Beer pong"}, {"segment": [54.109966128092374, 58.29804419963204], "label": "Beer pong"}, {"segment": [65.66906160554183, 70.69475529138943], "label": "Beer pong"}, {"segment": [94.98560810631943, 100.01130179216703], "label": "Beer pong"}]}, "yJ5OnuQuvRI": {"duration": 181.581, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yJ5OnuQuvRI", "annotations": []}, "p2RMXdo5avw": {"duration": 34.854, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p2RMXdo5avw", "annotations": [{"segment": [6.400121495327102, 33.89894859813084], "label": "Shuffleboard"}]}, "_IqOAGFrYfk": {"duration": 163.82, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_IqOAGFrYfk", "annotations": []}, "boRX-UKXzy8": {"duration": 118.747, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=boRX-UKXzy8", "annotations": [{"segment": [0.01, 118.701], "label": "Riding bumper cars"}]}, "O5vpeIfQxLQ": {"duration": 158.732, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O5vpeIfQxLQ", "annotations": [{"segment": [14.855007800312013, 31.690683307332293], "label": "Having an ice cream"}, {"segment": [34.166517940717625, 106.70847269890794], "label": "Having an ice cream"}]}, "xwu60huH3ds": {"duration": 176.495, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xwu60huH3ds", "annotations": [{"segment": [0.01, 167.33848361934477], "label": "Grooming dog"}]}, "00ZRoqhhb8g": {"duration": 72.56, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=00ZRoqhhb8g", "annotations": [{"segment": [0.01, 7.8109246818852345], "label": "Javelin throw"}]}, "eXUcFgDxslw": {"duration": 24.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eXUcFgDxslw", "annotations": [{"segment": [0, 19.979734215852258], "label": "Ballet"}]}, "reEjJfSUGa8": {"duration": 218.036, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=reEjJfSUGa8", "annotations": [{"segment": [0.01, 164.809934162948], "label": "BMX"}, {"segment": [177.71223634363335, 214.4016070601918], "label": "BMX"}]}, "EsOajUNpEzU": {"duration": 72.91, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=EsOajUNpEzU", "annotations": [{"segment": [21.952825729355705, 70.29454041835143], "label": "Playing bagpipes"}]}, "JXazqQitVdQ": {"duration": 12.493, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JXazqQitVdQ", "annotations": [{"segment": [0.2, 12.181261117240396], "label": "Using the monkey bar"}]}, "ctFlPn4EfkU": {"duration": 76.51, "subset": "training", "resolution": "854x476", "url": "https://www.youtube.com/watch?v=ctFlPn4EfkU", "annotations": [{"segment": [9.071358695787831, 76.509749], "label": "Croquet"}]}, "U0HiAZCgmd8": {"duration": 67.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U0HiAZCgmd8", "annotations": [{"segment": [6.695461244300112, 67.0592290249433], "label": "Mixing drinks"}]}, "wQbVxdVTN-I": {"duration": 50.34, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=wQbVxdVTN-I", "annotations": [{"segment": [8.638837417442275, 16.02111644929797], "label": "Chopping wood"}, {"segment": [17.35620948673947, 30.86420963026521], "label": "Chopping wood"}, {"segment": [32.82758174414976, 35.34069804992199], "label": "Chopping wood"}, {"segment": [37.0684655101404, 39.42451204680187], "label": "Chopping wood"}, {"segment": [40.91667485335413, 49.476977269890796], "label": "Chopping wood"}]}, "RHfjDkr6mcc": {"duration": 135.19, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RHfjDkr6mcc", "annotations": [{"segment": [11.000196480000001, 118.94731474446179], "label": "Ironing clothes"}]}, "mgNfayAiTQc": {"duration": 11.146, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mgNfayAiTQc", "annotations": [{"segment": [4.2339282363494535, 10.893453963338532], "label": "Laying tile"}]}, "xm7pETlJrwA": {"duration": 217.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xm7pETlJrwA", "annotations": [{"segment": [55.31435137133377, 151.77519576], "label": "Hand washing clothes"}]}, "Q2PC60uwjdY": {"duration": 169.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Q2PC60uwjdY", "annotations": [{"segment": [31.906380030855622, 146.611149152162], "label": "Preparing salad"}]}, "Ce0t7gfJl5w": {"duration": 176.96, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ce0t7gfJl5w", "annotations": [{"segment": [16.564050643658398, 45.27507175933295], "label": "Longboarding"}, {"segment": [57.69810974207675, 60.45878484935316], "label": "Longboarding"}, {"segment": [89.16980596502772, 93.3108186259423], "label": "Longboarding"}, {"segment": [124.23037982743799, 126.71498742398673], "label": "Longboarding"}, {"segment": [138.30982179875195, 142.17476691887677], "label": "Longboarding"}, {"segment": [145.48757702184085, 152.94139975351013], "label": "Longboarding"}]}, "Zhfm2dWtnpc": {"duration": 89.862, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zhfm2dWtnpc", "annotations": [{"segment": [5.6050546021840875, 84.21594539781591], "label": "Rope skipping"}]}, "D8dHEAYIawg": {"duration": 59.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D8dHEAYIawg", "annotations": [{"segment": [0, 59.559182], "label": "Playing beach volleyball"}]}, "-eCMRApm6n4": {"duration": 196.975, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-eCMRApm6n4", "annotations": [{"segment": [84.50560172066747, 147.5006866397105], "label": "Making a lemonade"}]}, "jFp6ld_IGuY": {"duration": 30.117, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jFp6ld_IGuY", "annotations": [{"segment": [3.98890015600624, 9.854929797191888], "label": "Doing a powerbomb"}, {"segment": [12.717552262090484, 24.21497035881435], "label": "Doing a powerbomb"}]}, "Kc1onvwa3rE": {"duration": 206.01, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Kc1onvwa3rE", "annotations": []}, "6Ik0MQ53FBU": {"duration": 80.388, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6Ik0MQ53FBU", "annotations": [{"segment": [0.01, 80.301], "label": "Playing beach volleyball"}]}, "LNTalSYEOpQ": {"duration": 99.475, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LNTalSYEOpQ", "annotations": []}, "7-u3OI6HDns": {"duration": 196.12, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7-u3OI6HDns", "annotations": [{"segment": [53.847690647761965, 59.354840827646704], "label": "Belly dance"}, {"segment": [67.9215188852452, 95.7632225724403], "label": "Belly dance"}, {"segment": [108.00133408329529, 159.09544886115444], "label": "Belly dance"}, {"segment": [166.1323629453978, 182.65381340405617], "label": "Belly dance"}]}, "jNPOEMYJlgc": {"duration": 79.668, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=jNPOEMYJlgc", "annotations": [{"segment": [0.009999999999999998, 70.34617462901717], "label": "Playing rubik cube"}]}, "iABuDhagUm8": {"duration": 98.06, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=iABuDhagUm8", "annotations": []}, "Z8u205eKUTo": {"duration": 206.542, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z8u205eKUTo", "annotations": []}, "MYLqqR528eU": {"duration": 167.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MYLqqR528eU", "annotations": []}, "4WNso9fVJG8": {"duration": 110.899, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4WNso9fVJG8", "annotations": [{"segment": [60.53712752986109, 79.33640626411065], "label": "Putting in contact lenses"}]}, "vAh8YJnIytk": {"duration": 88.77, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vAh8YJnIytk", "annotations": []}, "cj6QcTKVGpA": {"duration": 203.454, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cj6QcTKVGpA", "annotations": []}, "esuEWVNHfsM": {"duration": 220.82, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=esuEWVNHfsM", "annotations": [{"segment": [2.7561809672386897, 214.9821154446178], "label": "Ping-pong"}]}, "RtbfdVDciWQ": {"duration": 126.619, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RtbfdVDciWQ", "annotations": [{"segment": [0.01, 126.261], "label": "Putting on shoes"}]}, "BPpq8m93LSI": {"duration": 102.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BPpq8m93LSI", "annotations": [{"segment": [9.944930964118564, 15.398602783151325], "label": "Ping-pong"}, {"segment": [26.305946421216845, 99.12850541653665], "label": "Ping-pong"}]}, "VNR1j2U7gE4": {"duration": 40.68, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=VNR1j2U7gE4", "annotations": [{"segment": [1.0789127971918877, 40.42749716536662], "label": "Playing squash"}]}, "NouHsmaE-xA": {"duration": 50.272, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NouHsmaE-xA", "annotations": [{"segment": [18.116454943912043, 27.762878555382212], "label": "Calf roping"}]}, "w8gNl6HWctA": {"duration": 150.98, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=w8gNl6HWctA", "annotations": [{"segment": [34.62323452938118, 97.51033398070618], "label": "Playing racquetball"}]}, "L_fvvmvV-Tc": {"duration": 58.17, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=L_fvvmvV-Tc", "annotations": [{"segment": [1.0889107956318254, 45.462025717628705], "label": "Mowing the lawn"}]}, "oHOR69nFt-4": {"duration": 214.181, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oHOR69nFt-4", "annotations": [{"segment": [15.70436895298941, 190.12310349453978], "label": "Making a cake"}]}, "0rDLcTmgzGQ": {"duration": 134.1, "subset": "training", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=0rDLcTmgzGQ", "annotations": [{"segment": [3.768683307332293, 130.2289453978159], "label": "Playing polo"}]}, "FUCYJSSqQpo": {"duration": 105.418, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FUCYJSSqQpo", "annotations": []}, "_kG3DxvGnnQ": {"duration": 208.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_kG3DxvGnnQ", "annotations": [{"segment": [0, 198.58567224648985], "label": "Getting a piercing"}]}, "4dw_Hk9Wpc4": {"duration": 98.151, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4dw_Hk9Wpc4", "annotations": [{"segment": [11.024733038301125, 25.418134504972038], "label": "Scuba diving"}, {"segment": [31.84922877731436, 65.53591306101224], "label": "Scuba diving"}, {"segment": [86.20728636661467, 88.35098442901716], "label": "Scuba diving"}]}, "YZp-m0_kQxc": {"duration": 54.61, "subset": "testing", "resolution": "3414x1080", "url": "https://www.youtube.com/watch?v=YZp-m0_kQxc", "annotations": []}, "5rftu62ML_c": {"duration": 105.535, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5rftu62ML_c", "annotations": [{"segment": [0.2, 9.8784424024961], "label": "Canoeing"}, {"segment": [14.81766360374415, 21.074010458658346], "label": "Canoeing"}, {"segment": [35.891674062402494, 39.18448819656787], "label": "Canoeing"}, {"segment": [66.0209233900156, 78.69825780655226], "label": "Canoeing"}, {"segment": [84.29604183463339, 85.94244890171608], "label": "Canoeing"}]}, "ADbpTpH4U7s": {"duration": 102.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ADbpTpH4U7s", "annotations": [{"segment": [25.955806297558027, 92.12709025367818], "label": "Preparing salad"}]}, "aT5S8PcnlXo": {"duration": 95.46000000000001, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=aT5S8PcnlXo", "annotations": [{"segment": [1.9339984399375976, 14.87691107644306], "label": "Hammer throw"}, {"segment": [26.183363494539783, 37.34104680187208], "label": "Hammer throw"}]}, "wkMQ_e674IM": {"duration": 167.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wkMQ_e674IM", "annotations": []}, "qaml4eEk9hY": {"duration": 57.84, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qaml4eEk9hY", "annotations": []}, "OqA83jGQtfg": {"duration": 224.584, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OqA83jGQtfg", "annotations": [{"segment": [20.31728237129485, 224.541], "label": "Ice fishing"}]}, "gl2eHq4nRus": {"duration": 141.828, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gl2eHq4nRus", "annotations": []}, "rBVbsbJJcyM": {"duration": 132.73, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rBVbsbJJcyM", "annotations": [{"segment": [41.524457098283925, 90.60822932917317], "label": "Ironing clothes"}, {"segment": [96.20005148205928, 102.62029173166927], "label": "Ironing clothes"}, {"segment": [110.49026365054601, 119.80996723868954], "label": "Ironing clothes"}]}, "ovTKJgSWMKQ": {"duration": 208.03, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ovTKJgSWMKQ", "annotations": [{"segment": [39.917927557918645, 43.48782351838291], "label": "Plataform diving"}, {"segment": [89.8964710044184, 114.56120673126243], "label": "Plataform diving"}]}, "XnrQRuchl7E": {"duration": 65.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XnrQRuchl7E", "annotations": [{"segment": [0.7105091076443059, 62.22029756942278], "label": "Playing piano"}]}, "8jJdHFW-lys": {"duration": 102.8, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=8jJdHFW-lys", "annotations": [{"segment": [9.63888449, 90.81763884555382], "label": "Using the balance beam"}]}, "s0Pjp1qD18A": {"duration": 150.512, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=s0Pjp1qD18A", "annotations": []}, "1LdbczjQPII": {"duration": 163.33, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1LdbczjQPII", "annotations": [{"segment": [16.307435631825275, 163.32916099773243], "label": "Playing ten pins"}]}, "cLTDcBhgRw8": {"duration": 30.07, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cLTDcBhgRw8", "annotations": [{"segment": [5.101574102964118, 28.99063806552262], "label": "Powerbocking"}]}, "-02DygXbn6w": {"duration": 109.668, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-02DygXbn6w", "annotations": [{"segment": [1.3678751950078003, 101.24123556942277], "label": "Snowboarding"}]}, "_bj64aqyIyk": {"duration": 8.127, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_bj64aqyIyk", "annotations": [{"segment": [0.2, 7.8709999999999996], "label": "Using the monkey bar"}]}, "DOfpOkDHe_8": {"duration": 111.94, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=DOfpOkDHe_8", "annotations": [{"segment": [11.870932917316694, 106.83839625585023], "label": "Doing karate"}]}, "wlP0gzQtU20": {"duration": 112.176, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=wlP0gzQtU20", "annotations": []}, "rcqXzX-7ULg": {"duration": 210.61, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rcqXzX-7ULg", "annotations": [{"segment": [36.14125392226573, 184.32039385647425], "label": "Ironing clothes"}]}, "54Hp_Z-cu-s": {"duration": 61.603, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=54Hp_Z-cu-s", "annotations": [{"segment": [19.065170228375177, 58.72839013066854], "label": "Drum corps"}]}, "cQgP0-Id-p4": {"duration": 133.677, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=cQgP0-Id-p4", "annotations": []}, "ResZdSbk9kE": {"duration": 96.99000000000001, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ResZdSbk9kE", "annotations": [{"segment": [4.387991835319672, 96.98975056689342], "label": "Kayaking"}]}, "ivkF2jbavhc": {"duration": 66.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ivkF2jbavhc", "annotations": [{"segment": [26.737785000000002, 59.054337000000004], "label": "Playing piano"}]}, "dS9TsX_zyPg": {"duration": 11.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dS9TsX_zyPg", "annotations": [{"segment": [0, 11.447346], "label": "Washing dishes"}]}, "EAMThMTRRy4": {"duration": 64.41, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EAMThMTRRy4", "annotations": []}, "omLZQlz1PAo": {"duration": 172.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=omLZQlz1PAo", "annotations": [{"segment": [3.629014820592824, 159.542244149766], "label": "Tango"}]}, "s9ATMSQLb80": {"duration": 182.556, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s9ATMSQLb80", "annotations": []}, "ZBVaJaLg0G8": {"duration": 215.691, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZBVaJaLg0G8", "annotations": []}, "2cal3nR0PnY": {"duration": 101.53999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2cal3nR0PnY", "annotations": []}, "5YsdlM3Stjo": {"duration": 150.32999999999998, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5YsdlM3Stjo", "annotations": [{"segment": [14.301058210608424, 107.60960194539781], "label": "Grooming horse"}, {"segment": [107.68531375, 144.1827999921997], "label": "Grooming horse"}]}, "PKLLNgcEuRI": {"duration": 8.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PKLLNgcEuRI", "annotations": [{"segment": [0, 6.2233525741029645], "label": "Using the pommel horse"}]}, "ZGzXbvRHNAE": {"duration": 58.979, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZGzXbvRHNAE", "annotations": [{"segment": [23.539619344773794, 45.516060842433696], "label": "Cleaning sink"}]}, "NdZn0wf7MCc": {"duration": 195.79, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NdZn0wf7MCc", "annotations": []}, "0pxONuRdIAY": {"duration": 147.076, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0pxONuRdIAY", "annotations": [{"segment": [78.21241965678625, 106.1945756630265], "label": "Applying sunscreen"}]}, "j0cDmSTkVaA": {"duration": 101.425, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=j0cDmSTkVaA", "annotations": []}, "iqe_HmjojQ8": {"duration": 8.59, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iqe_HmjojQ8", "annotations": [{"segment": [0.01, 8.591382], "label": "Playing kickball"}]}, "deU1mGcNh1Q": {"duration": 33.53, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=deU1mGcNh1Q", "annotations": [{"segment": [0.7834022082353189, 33.32961451247165], "label": "Having an ice cream"}]}, "shZRGB58YBU": {"duration": 221.89, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=shZRGB58YBU", "annotations": [{"segment": [0.2, 214.28618177570092], "label": "Playing congas"}]}, "Hxgjh9Yb408": {"duration": 29.247, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Hxgjh9Yb408", "annotations": [{"segment": [8.365921226209048, 20.89194535725429], "label": "Calf roping"}]}, "xekPSA9h_jg": {"duration": 60.558, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xekPSA9h_jg", "annotations": [{"segment": [3.2101622464898596, 60.501], "label": "Table soccer"}]}, "Uc0Z2tuIJVA": {"duration": 90.349, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Uc0Z2tuIJVA", "annotations": [{"segment": [0.2, 4.074829057497805], "label": "Beer pong"}, {"segment": [11.521934356849272, 13.77011708872896], "label": "Beer pong"}, {"segment": [24.167962223672514, 24.87051932738492], "label": "Beer pong"}, {"segment": [29.788419053371737, 30.490975638236517], "label": "Beer pong"}, {"segment": [57.46916796172019, 58.874282145234965], "label": "Beer pong"}]}, "zrR9hGDeQhg": {"duration": 236.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zrR9hGDeQhg", "annotations": [{"segment": [0.2, 92.73334009360374], "label": "Snow tubing"}, {"segment": [101.96972854914196, 236.621], "label": "Snow tubing"}]}, "pA5W1NYnmyY": {"duration": 91.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pA5W1NYnmyY", "annotations": [{"segment": [51.998475819032755, 57.155349453978154], "label": "Cleaning windows"}]}, "IZdVPuT8Hfs": {"duration": 93.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IZdVPuT8Hfs", "annotations": [{"segment": [52.155458864274564, 56.246083088923555], "label": "Bungee jumping"}, {"segment": [78.4523288798752, 91.16248272074883], "label": "Bungee jumping"}]}, "q2-2jIQGfvg": {"duration": 76.487, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=q2-2jIQGfvg", "annotations": [{"segment": [2.8637710951638065, 54.65029839937598], "label": "Playing rubik cube"}]}, "YAzDGbY1dAM": {"duration": 144.532, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YAzDGbY1dAM", "annotations": []}, "6Zr1lLVoC4w": {"duration": 55.08, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6Zr1lLVoC4w", "annotations": []}, "QWRGRAod0no": {"duration": 227.44, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QWRGRAod0no", "annotations": []}, "_mfa4AcWV6k": {"duration": 72.702, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_mfa4AcWV6k", "annotations": []}, "YPKvzHQfPYU": {"duration": 192.749, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YPKvzHQfPYU", "annotations": []}, "xcz61rIR5lQ": {"duration": 63.14, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xcz61rIR5lQ", "annotations": []}, "NiPqyUecGdc": {"duration": 56.309, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NiPqyUecGdc", "annotations": [{"segment": [0.21039639544858368, 53.77377801538628], "label": "Waterskiing"}]}, "QXN6odBnVmI": {"duration": 48.67, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QXN6odBnVmI", "annotations": []}, "Pr3iVnKpv0c": {"duration": 84.2, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Pr3iVnKpv0c", "annotations": [{"segment": [0.9537871654433728, 81.60840665142469], "label": "Playing violin"}]}, "98wL6zakU9o": {"duration": 139.204, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=98wL6zakU9o", "annotations": [{"segment": [0.01, 133.08220436817473], "label": "Waxing skis"}]}, "6gyD-Mte2ZM": {"duration": 188.245, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6gyD-Mte2ZM", "annotations": [{"segment": [21.43810386973302, 32.59766478822418], "label": "Playing ten pins"}, {"segment": [36.12173665722139, 39.93948118196837], "label": "Playing ten pins"}, {"segment": [36.41540931297116, 40.5268264934679], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.0441723356009], "label": "Playing ten pins"}, {"segment": [0.2, 188.044172], "label": "Playing ten pins"}, {"segment": [0.2, 188.044172], "label": "Playing ten pins"}, {"segment": [176.79093844617785, 180.02133765366614], "label": "Playing ten pins"}]}, "bpB0GiH6uDw": {"duration": 95.922, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bpB0GiH6uDw", "annotations": [{"segment": [17.33163865753986, 83.07233586292836], "label": "Cutting the grass"}]}, "5K-soRkdGDs": {"duration": 225.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5K-soRkdGDs", "annotations": [{"segment": [50.234494539781586, 66.74513260530422], "label": "Bathing dog"}, {"segment": [73.41964586583462, 90.63286427457098], "label": "Bathing dog"}, {"segment": [98.00995787831513, 117.33091731669266], "label": "Bathing dog"}, {"segment": [124.00543057722308, 143.32639001560062], "label": "Bathing dog"}, {"segment": [151.75735413416535, 164.40380031201246], "label": "Bathing dog"}, {"segment": [173.8886349453978, 191.10185335413416], "label": "Bathing dog"}]}, "wtA-5-6DRcY": {"duration": 87.656, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wtA-5-6DRcY", "annotations": [{"segment": [0, 87.655327], "label": "River tubing"}]}, "sYAGVE3luRo": {"duration": 159.1, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sYAGVE3luRo", "annotations": [{"segment": [22.39692931, 64.52422165], "label": "Playing bagpipes"}, {"segment": [71.76818124, 131.23133646], "label": "Playing bagpipes"}]}, "al_769KF6Qc": {"duration": 124.645, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=al_769KF6Qc", "annotations": [{"segment": [9.330496099843995, 104.19053978159126], "label": "Spread mulch"}]}, "_L8VrJYsOAQ": {"duration": 70.52, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_L8VrJYsOAQ", "annotations": [{"segment": [14.411839517940717, 34.214367099844], "label": "Washing hands"}, {"segment": [44.775715143525744, 63.148060177847114], "label": "Washing hands"}]}, "rTiSrV2wdWo": {"duration": 115.52, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=rTiSrV2wdWo", "annotations": []}, "q4FZTmaqEx8": {"duration": 203.523, "subset": "training", "resolution": "548x360", "url": "https://www.youtube.com/watch?v=q4FZTmaqEx8", "annotations": [{"segment": [0.2, 203.501], "label": "Roof shingle removal"}]}, "6UqWORrn3KI": {"duration": 59.049, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=6UqWORrn3KI", "annotations": [{"segment": [10.3125616224649, 50.734120124804996], "label": "Making a lemonade"}]}, "54VcTlhF2H8": {"duration": 70.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=54VcTlhF2H8", "annotations": [{"segment": [0, 70.581], "label": "Using parallel bars"}]}, "dgas2Fku3No": {"duration": 87.05, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dgas2Fku3No", "annotations": [{"segment": [3.57605508, 81.64353744149766], "label": "Playing badminton"}]}, "rNQbSptv1cM": {"duration": 72.52, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=rNQbSptv1cM", "annotations": [{"segment": [20.466553822152886, 27.024897035881434], "label": "Cheerleading"}]}, "Pn2_AuY2rxg": {"duration": 73.352, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Pn2_AuY2rxg", "annotations": []}, "5_cLvl6mV3s": {"duration": 128.011, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5_cLvl6mV3s", "annotations": []}, "KYl67H9-4TA": {"duration": 235.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KYl67H9-4TA", "annotations": [{"segment": [6.232449297971919, 206.4040561622465], "label": "Playing violin"}]}, "I8m878YGmU8": {"duration": 192.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I8m878YGmU8", "annotations": [{"segment": [0.01, 192.7022448979592], "label": "Triple jump"}]}, "nX-GvQmf5Tc": {"duration": 81.758, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nX-GvQmf5Tc", "annotations": [{"segment": [0.01, 81.757345], "label": "Riding bumper cars"}]}, "oppGT2l_wZ4": {"duration": 193.817, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oppGT2l_wZ4", "annotations": []}, "RS65b_z37_4": {"duration": 62.439, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RS65b_z37_4", "annotations": []}, "bQa8S4Z-1cE": {"duration": 43.86, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bQa8S4Z-1cE", "annotations": [{"segment": [12.168702028081123, 17.090873634945396], "label": "Polishing shoes"}]}, "pi5k3Vt09K0": {"duration": 22.11, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pi5k3Vt09K0", "annotations": []}, "7s7YqryNMAE": {"duration": 84.823, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=7s7YqryNMAE", "annotations": [{"segment": [9.498006683589713, 13.05975994476977], "label": "Putting in contact lenses"}, {"segment": [23.613102940858823, 28.362107289098894], "label": "Putting in contact lenses"}]}, "LnMvFpR0xCY": {"duration": 144.313, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LnMvFpR0xCY", "annotations": [{"segment": [37.82280632761311, 41.65011411076443], "label": "Mooping floor"}, {"segment": [62.13746753822153, 66.86531832917316], "label": "Mooping floor"}, {"segment": [68.66640434477378, 75.87074840717628], "label": "Mooping floor"}, {"segment": [77.22156291887676, 87.57780750858034], "label": "Mooping floor"}, {"segment": [96.1329660826833, 119.99735578939156], "label": "Mooping floor"}]}, "Vg5Vfb16Kb8": {"duration": 98.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Vg5Vfb16Kb8", "annotations": [{"segment": [23.630755515934922, 34.37200802317807], "label": "Putting on makeup"}]}, "KzZlSbM16aY": {"duration": 99.87, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=KzZlSbM16aY", "annotations": [{"segment": [24.90483619344774, 95.10534321372856], "label": "Playing flauta"}]}, "XftM9eALPy0": {"duration": 68.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XftM9eALPy0", "annotations": [{"segment": [5.51759796, 13.914838260000002], "label": "Using parallel bars"}]}, "Qlh-VSBxcJs": {"duration": 222.47, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Qlh-VSBxcJs", "annotations": [{"segment": [0.01, 213.27309043369735], "label": "Belly dance"}]}, "c9BlG8bO6pQ": {"duration": 134.05, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=c9BlG8bO6pQ", "annotations": []}, "EO_BnsrWMnI": {"duration": 212.86, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EO_BnsrWMnI", "annotations": [{"segment": [27.229797950078, 195.92171695787832], "label": "Using the balance beam"}]}, "iZlG0UOtImQ": {"duration": 158.941, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iZlG0UOtImQ", "annotations": [{"segment": [2.4787363494539782, 100.88837909516381], "label": "Playing pool"}]}, "lmcBk-gqMzA": {"duration": 68.03, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=lmcBk-gqMzA", "annotations": [{"segment": [7.532092043681748, 67.981], "label": "Grooming horse"}]}, "cPIsE2ZCWzM": {"duration": 85.06, "subset": "testing", "resolution": "1920x792", "url": "https://www.youtube.com/watch?v=cPIsE2ZCWzM", "annotations": []}, "SymvoBsqt3Y": {"duration": 72.493, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=SymvoBsqt3Y", "annotations": [{"segment": [0.2, 66.92185959438378], "label": "Drum corps"}]}, "DVZCBD8-y2I": {"duration": 110.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DVZCBD8-y2I", "annotations": [{"segment": [49.96529017160687, 95.29462558502341], "label": "Removing ice from car"}]}, "a2HjLtnVDaY": {"duration": 36.04, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a2HjLtnVDaY", "annotations": [{"segment": [0.5760408205406689, 33.49192867100796], "label": "Walking the dog"}]}, "7_7KsHDtvJk": {"duration": 191.194, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7_7KsHDtvJk", "annotations": []}, "afI4V541WAA": {"duration": 65.23, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=afI4V541WAA", "annotations": []}, "O4P07fipvIA": {"duration": 219.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O4P07fipvIA", "annotations": [{"segment": [0, 200.20084867394698], "label": "Cheerleading"}]}, "tqanHyXTFFw": {"duration": 81.67, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tqanHyXTFFw", "annotations": [{"segment": [20.766471327613107, 78.73422871450859], "label": "Fixing bicycle"}]}, "bNVM1vgxEKA": {"duration": 37.69, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bNVM1vgxEKA", "annotations": []}, "9hODmjEMkYE": {"duration": 73.468, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=9hODmjEMkYE", "annotations": []}, "Jd6dM5p91M4": {"duration": 194.77, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Jd6dM5p91M4", "annotations": [{"segment": [6.683778471138846, 186.5381809672387], "label": "Kayaking"}]}, "Ki1bs5X_S5k": {"duration": 23.987, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=Ki1bs5X_S5k", "annotations": []}, "Cgi8ib7_7BY": {"duration": 105.605, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=Cgi8ib7_7BY", "annotations": []}, "Ie09KEAPP2I": {"duration": 16.49, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=Ie09KEAPP2I", "annotations": []}, "I4_jFA46Uio": {"duration": 35.782, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I4_jFA46Uio", "annotations": [{"segment": [0.01, 35.67030596325894], "label": "Peeling potatoes"}]}, "eYgXvnnlPQA": {"duration": 140.99, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eYgXvnnlPQA", "annotations": [{"segment": [22.10553105797701, 114.9267646450858], "label": "Rock climbing"}]}, "1iP5k9jHBRU": {"duration": 50.829, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=1iP5k9jHBRU", "annotations": [{"segment": [2.375162650730074, 37.21088026479751], "label": "Plastering"}]}, "EbCiL9JYTMQ": {"duration": 231.434, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EbCiL9JYTMQ", "annotations": []}, "aDBSNU2trmo": {"duration": 59.536, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aDBSNU2trmo", "annotations": [{"segment": [0.2, 50.03282215288611], "label": "Snow tubing"}]}, "-AjZCBMb4qU": {"duration": 116.7, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-AjZCBMb4qU", "annotations": [{"segment": [33.0969708, 107.83006958658346], "label": "Playing accordion"}, {"segment": [15.39662679, 25.03512756], "label": "Playing accordion"}]}, "7rvrBulkd5c": {"duration": 98.08, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=7rvrBulkd5c", "annotations": [{"segment": [14.842148262090484, 50.34089462090484], "label": "Doing step aerobics"}, {"segment": [71.76255190639625, 80.17820298283931], "label": "Doing step aerobics"}, {"segment": [86.91072384399376, 95.63239859594384], "label": "Doing step aerobics"}]}, "mQoYAZ2BLDM": {"duration": 68.081, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mQoYAZ2BLDM", "annotations": [{"segment": [50.71597870667189, 61.93913766718507], "label": "Arm wrestling"}]}, "ZMopjyYvcqw": {"duration": 172.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZMopjyYvcqw", "annotations": [{"segment": [10.244171606864274, 17.52292511700468], "label": "Archery"}, {"segment": [86.53629173166927, 126.43464430577222], "label": "Archery"}]}, "Eilil6FZhK8": {"duration": 218.848, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Eilil6FZhK8", "annotations": [{"segment": [17.35808454327169, 19.74056744830514], "label": "Powerbocking"}, {"segment": [26.88801616340549, 30.291563170596127], "label": "Powerbocking"}, {"segment": [38.46007598785367, 41.18291359360619], "label": "Powerbocking"}, {"segment": [45.267170002234955, 47.99000760798747], "label": "Powerbocking"}, {"segment": [64.66738794322161, 70.79377255616477], "label": "Powerbocking"}, {"segment": [75.55873836623167, 85.08866998636546], "label": "Powerbocking"}, {"segment": [89.5132810957133, 94.95895630721833], "label": "Powerbocking"}, {"segment": [116.40130245251936, 124.56981526977691], "label": "Powerbocking"}, {"segment": [129.6751357805629, 139.20506740069666], "label": "Powerbocking"}, {"segment": [166.09308757176277, 168.47557045978763], "label": "Powerbocking"}, {"segment": [179.02656610675498, 201.48997619384673], "label": "Powerbocking"}]}, "OpNcCJErjP8": {"duration": 82.5, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=OpNcCJErjP8", "annotations": []}, "pbZVsdQNWNU": {"duration": 211.743, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=pbZVsdQNWNU", "annotations": [{"segment": [0.2, 211.54276643990931], "label": "Playing ice hockey"}]}, "5AG9Q5bF4pM": {"duration": 121.6, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=5AG9Q5bF4pM", "annotations": [{"segment": [50.613369734789394, 118.66655226209049], "label": "Preparing salad"}]}, "vc820BteGzY": {"duration": 156.689, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vc820BteGzY", "annotations": [{"segment": [8.05404581770403, 156.687799], "label": "Making a cake"}]}, "E3QtX6r9QX0": {"duration": 103.329, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E3QtX6r9QX0", "annotations": [{"segment": [0.01, 103.32879818594104], "label": "Blowing leaves"}]}, "tOEomEC1rY8": {"duration": 86.611, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tOEomEC1rY8", "annotations": [{"segment": [38.75664118564743, 58.60760374414976], "label": "Blow-drying hair"}]}, "LeSq1XE93mE": {"duration": 228.044, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=LeSq1XE93mE", "annotations": [{"segment": [33.79735013260531, 228.0431746031746], "label": "Playing drums"}]}, "QuaoqO4JgKY": {"duration": 17.83, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=QuaoqO4JgKY", "annotations": [{"segment": [7.984099680000001, 17.66014430577223], "label": "Springboard diving"}, {"segment": [0, 6.283996799999999], "label": "Springboard diving"}]}, "uoy7NjeESZc": {"duration": 101.518, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uoy7NjeESZc", "annotations": [{"segment": [16.148302652106082, 87.54913104524181], "label": "BMX"}]}, "c8iet_4ndU0": {"duration": 35.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c8iet_4ndU0", "annotations": [{"segment": [1.6967954690976756, 28.626581623809173], "label": "Using uneven bars"}]}, "Qg3Lih9PTBM": {"duration": 62.927, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Qg3Lih9PTBM", "annotations": [{"segment": [0.2, 62.681], "label": "Sumo"}]}, "uDlyfvy0NOs": {"duration": 80.179, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=uDlyfvy0NOs", "annotations": [{"segment": [20.638771965678625, 69.42132388455538], "label": "Baking cookies"}]}, "Ue90f5r-2Qw": {"duration": 156.53, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Ue90f5r-2Qw", "annotations": [{"segment": [36.38429210140406, 142.36269996723868], "label": "Playing violin"}]}, "zuqNxHmtBD8": {"duration": 54.73, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=zuqNxHmtBD8", "annotations": [{"segment": [6.0918849900000005, 31.91412177], "label": "Long jump"}, {"segment": [35.15736231, 54.681666], "label": "Long jump"}]}, "CSsilC4QbB4": {"duration": 139.018, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CSsilC4QbB4", "annotations": [{"segment": [0.01, 132.35667229294108], "label": "Scuba diving"}]}, "KHsl04qla5A": {"duration": 195.47, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KHsl04qla5A", "annotations": []}, "Mdt2E8KYpCg": {"duration": 51.688, "subset": "validation", "resolution": "1252x720", "url": "https://www.youtube.com/watch?v=Mdt2E8KYpCg", "annotations": [{"segment": [0.01, 51.3171803561821], "label": "Doing a powerbomb"}]}, "ifmHO5lQq18": {"duration": 150.56, "subset": "training", "resolution": "2560x810", "url": "https://www.youtube.com/watch?v=ifmHO5lQq18", "annotations": [{"segment": [2.2307659906396258, 9.979742589703587], "label": "High jump"}, {"segment": [14.910909516380656, 21.955433697347896], "label": "High jump"}, {"segment": [27.591053042121686, 32.52221996879875], "label": "High jump"}, {"segment": [37.21856942277691, 43.55864118564743], "label": "High jump"}, {"segment": [50.36834789391576, 57.647689547581905], "label": "High jump"}, {"segment": [61.40476911076443, 68.68411076443059], "label": "High jump"}, {"segment": [73.38046021840874, 79.25089703588144], "label": "High jump"}, {"segment": [84.88651638065522, 93.33994539781591], "label": "High jump"}, {"segment": [113.06461310452418, 119.4046848673947], "label": "High jump"}, {"segment": [123.39658190327613, 131.38037597503902], "label": "High jump"}, {"segment": [136.07672542901716, 144.99978939157566], "label": "High jump"}]}, "UdBFm97tOJs": {"duration": 15.14, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=UdBFm97tOJs", "annotations": [{"segment": [1.9603290904836197, 12.966514104524181], "label": "Drinking coffee"}]}, "nxFbmoV3Idk": {"duration": 229.506, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nxFbmoV3Idk", "annotations": [{"segment": [13.939219626168224, 229.26100000000002], "label": "Changing car wheel"}]}, "5UZ2ft8Y3sI": {"duration": 151.487, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5UZ2ft8Y3sI", "annotations": []}, "uy9Z2i8AmyY": {"duration": 184.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uy9Z2i8AmyY", "annotations": [{"segment": [34.89669594016411, 62.23331291835726], "label": "Paintball"}, {"segment": [109.42494643860648, 164.67368812084945], "label": "Paintball"}]}, "aiTAx3oo97U": {"duration": 24.196, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aiTAx3oo97U", "annotations": [{"segment": [0.2, 23.961000000000002], "label": "Elliptical trainer"}]}, "JKZ-3N1fYL8": {"duration": 11.378, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JKZ-3N1fYL8", "annotations": [{"segment": [0.2, 11.177777777777779], "label": "Raking leaves"}]}, "2Sr1ODuJ0kE": {"duration": 122.834, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=2Sr1ODuJ0kE", "annotations": []}, "K65lcuHQn-E": {"duration": 178.77, "subset": "testing", "resolution": "634x480", "url": "https://www.youtube.com/watch?v=K65lcuHQn-E", "annotations": []}, "ZrrPDWi3lFE": {"duration": 176.217, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZrrPDWi3lFE", "annotations": []}, "0t49I3VyI0g": {"duration": 234.99, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0t49I3VyI0g", "annotations": []}, "Y5VEl3e9Hbo": {"duration": 107.49000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y5VEl3e9Hbo", "annotations": [{"segment": [16.76396255850234, 35.874879875195006], "label": "Polishing shoes"}, {"segment": [56.82983307332293, 69.23516536661467], "label": "Polishing shoes"}, {"segment": [82.98161466458657, 89.68719968798752], "label": "Polishing shoes"}]}, "Hy8bl_lnrX0": {"duration": 10.06, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Hy8bl_lnrX0", "annotations": []}, "UUdqzU0g9fM": {"duration": 159.057, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=UUdqzU0g9fM", "annotations": []}, "zUuT2FBf-Oc": {"duration": 35.99, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zUuT2FBf-Oc", "annotations": [{"segment": [0, 35.941], "label": "Wrapping presents"}]}, "QsRW8dj-F7M": {"duration": 159.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QsRW8dj-F7M", "annotations": [{"segment": [33.87961917, 77.39700741], "label": "Playing lacrosse"}, {"segment": [84.63928005000001, 95.00382648000001], "label": "Playing lacrosse"}]}, "twJ2uE1GS2I": {"duration": 198.903, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=twJ2uE1GS2I", "annotations": [{"segment": [15.514986857977721, 45.92436109961405], "label": "Making a cake"}, {"segment": [53.99215426576247, 91.8487221992281], "label": "Making a cake"}, {"segment": [94.33112009650455, 99.91651536537651], "label": "Making a cake"}, {"segment": [103.95041194845074, 116.67270117199246], "label": "Making a cake"}, {"segment": [122.56839617802399, 166.0103581123245], "label": "Making a cake"}]}, "DIhyfNJAm-M": {"duration": 213.81, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DIhyfNJAm-M", "annotations": []}, "73zt29iKyL4": {"duration": 201.921, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=73zt29iKyL4", "annotations": [{"segment": [0.01, 201.881], "label": "Ice fishing"}]}, "h-A3CAx_eyU": {"duration": 157.083, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=h-A3CAx_eyU", "annotations": [{"segment": [144.33947258034323, 157.08251700680273], "label": "Rock-paper-scissors"}]}, "R_TRpIHkgMs": {"duration": 96.781, "subset": "validation", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=R_TRpIHkgMs", "annotations": [{"segment": [0.01, 96.780361], "label": "Playing pool"}]}, "j55LAXY-T0E": {"duration": 147.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j55LAXY-T0E", "annotations": [{"segment": [31.824177847113884, 33.66905772230889], "label": "Drinking coffee"}, {"segment": [43.81589703588144, 45.66077691107645], "label": "Drinking coffee"}]}, "_pMQwrt-cf4": {"duration": 70.52, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_pMQwrt-cf4", "annotations": [{"segment": [32.43665366614664, 65.86290015600623], "label": "Installing carpet"}]}, "5MfhJjbNNJE": {"duration": 48.902, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5MfhJjbNNJE", "annotations": [{"segment": [13.960879862714508, 15.562948043681747], "label": "Rope skipping"}, {"segment": [21.437198040561622, 46.30739932605304], "label": "Rope skipping"}]}, "ugK_mLTN18U": {"duration": 230.41, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ugK_mLTN18U", "annotations": []}, "MC0L0ljTUiw": {"duration": 235.06, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MC0L0ljTUiw", "annotations": [{"segment": [0, 235.05528344671202], "label": "Belly dance"}]}, "juKQ_gU42EM": {"duration": 45.14, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=juKQ_gU42EM", "annotations": [{"segment": [0.01, 45.101], "label": "Bullfighting"}]}, "bmIWsU8sNlw": {"duration": 208.0, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bmIWsU8sNlw", "annotations": [{"segment": [30.219721848334945, 197.4001701287088], "label": "Ballet"}]}, "7TgvbmIn03A": {"duration": 122.21, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7TgvbmIn03A", "annotations": [{"segment": [14.870679953728763, 119.72803834633386], "label": "Mowing the lawn"}]}, "UcI4miTi0Cg": {"duration": 74.304, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UcI4miTi0Cg", "annotations": [{"segment": [8.705255815131641, 68.49790931272655], "label": "Futsal"}]}, "GN0TdqHETvc": {"duration": 165.32999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GN0TdqHETvc", "annotations": [{"segment": [21.340687210602233, 160.69797041574537], "label": "Playing squash"}]}, "eZ_IOp638kQ": {"duration": 222.15, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=eZ_IOp638kQ", "annotations": [{"segment": [37.08197758970359, 222.14530612244897], "label": "Cricket"}]}, "e_0bMJEFiN8": {"duration": 92.857, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=e_0bMJEFiN8", "annotations": [{"segment": [7.532830327613105, 48.09422439937598], "label": "Cleaning sink"}, {"segment": [54.323295631825275, 81.55737450858035], "label": "Cleaning sink"}]}, "GCbDiIoN_RU": {"duration": 212.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GCbDiIoN_RU", "annotations": []}, "sjE-hHKFghc": {"duration": 213.462, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sjE-hHKFghc", "annotations": [{"segment": [30.637151135024997, 194.47930592199688], "label": "Rollerblading"}]}, "iBz_YrU-T80": {"duration": 168.97, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iBz_YrU-T80", "annotations": [{"segment": [7.117368907001178, 15.816375348891507], "label": "Drinking coffee"}, {"segment": [20.2976816977441, 22.670138000077824], "label": "Drinking coffee"}, {"segment": [35.05963202337617, 35.58684453500589], "label": "Drinking coffee"}, {"segment": [38.48651334896933, 47.976338558304235], "label": "Drinking coffee"}, {"segment": [54.566494953675694, 59.83862006997286], "label": "Drinking coffee"}, {"segment": [65.11074518627002, 100.96119597709078], "label": "Drinking coffee"}, {"segment": [105.70610858175823, 128.1126403260212], "label": "Drinking coffee"}, {"segment": [145.77425946561672, 166.33554741917567], "label": "Drinking coffee"}]}, "h8J08sJ2no0": {"duration": 45.51, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=h8J08sJ2no0", "annotations": [{"segment": [1.436174362360874, 30.623358162983926], "label": "Rock climbing"}]}, "eAIJhHmdjmk": {"duration": 124.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eAIJhHmdjmk", "annotations": [{"segment": [14.004360797097748, 60.50940752606971], "label": "Shoveling snow"}, {"segment": [70.58550098401363, 112.24627201205102], "label": "Shoveling snow"}]}, "DlJPKOVd0bA": {"duration": 126.29, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DlJPKOVd0bA", "annotations": [{"segment": [5.799365559282371, 46.264387], "label": "Tai chi"}, {"segment": [52.08588398, 115.49583952808112], "label": "Tai chi"}]}, "UWTpfygMUQw": {"duration": 173.941, "subset": "validation", "resolution": "450x360", "url": "https://www.youtube.com/watch?v=UWTpfygMUQw", "annotations": [{"segment": [33.31364953271028, 145.45924766355142], "label": "Fixing the roof"}]}, "dnJLvsqqSgQ": {"duration": 176.216, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=dnJLvsqqSgQ", "annotations": [{"segment": [10.688047573983546, 169.09045404846015], "label": "Capoeira"}]}, "gefe0G9DHEk": {"duration": 66.46, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gefe0G9DHEk", "annotations": []}, "mo_PhyaQHh0": {"duration": 59.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mo_PhyaQHh0", "annotations": [{"segment": [7.426021840873635, 59.300999999999995], "label": "Installing carpet"}]}, "JO_CW4PR3I8": {"duration": 40.5, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=JO_CW4PR3I8", "annotations": [{"segment": [24.856483619344775, 36.620620904836194], "label": "Polishing shoes"}]}, "27uzQVdrvrk": {"duration": 89.28, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=27uzQVdrvrk", "annotations": []}, "lJ8gPeDG6kk": {"duration": 219.29, "subset": "testing", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=lJ8gPeDG6kk", "annotations": []}, "X1pGJqP89Nk": {"duration": 212.49, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=X1pGJqP89Nk", "annotations": [{"segment": [3.977429017160687, 210.47228549141965], "label": "Hand washing clothes"}]}, "lHObolD8eIg": {"duration": 26.22, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lHObolD8eIg", "annotations": []}, "p5H5ZmAwdH8": {"duration": 84.591, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=p5H5ZmAwdH8", "annotations": [{"segment": [0.2, 80.90675575015463], "label": "Hanging wallpaper"}]}, "d_6BVfxlAsI": {"duration": 181.488, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=d_6BVfxlAsI", "annotations": [{"segment": [12.54963968362541, 179.31286398269083], "label": "Playing blackjack"}]}, "qHu0fz_anTQ": {"duration": 123.62, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qHu0fz_anTQ", "annotations": [{"segment": [6.942947562800471, 121.69444185709828], "label": "Wrapping presents"}]}, "SdbxPbXeQJ0": {"duration": 171.39, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SdbxPbXeQJ0", "annotations": [{"segment": [21.122506808112323, 25.40048287051482], "label": "Drinking beer"}, {"segment": [123.79393230577222, 143.04482458658347], "label": "Drinking beer"}]}, "6fI4gF_2VCk": {"duration": 163.21, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6fI4gF_2VCk", "annotations": [{"segment": [2.8008481645388263, 156.33825092043685], "label": "Smoking a cigarette"}]}, "HHxPjC6T2e0": {"duration": 19.087, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HHxPjC6T2e0", "annotations": [{"segment": [0.2, 18.886802721088436], "label": "Hitting a pinata"}]}, "HpQrpwNgCjc": {"duration": 11.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HpQrpwNgCjc", "annotations": [{"segment": [2.3054547, 7.917408], "label": "Plataform diving"}]}, "RP_PlV5Jzuc": {"duration": 25.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RP_PlV5Jzuc", "annotations": [{"segment": [11.147842267432194, 15.230713360374416], "label": "Grooming horse"}, {"segment": [15.030572580343215, 23.356429029641188], "label": "Grooming horse"}]}, "UBDcmeET6ys": {"duration": 30.535, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UBDcmeET6ys", "annotations": [{"segment": [0.23791731669266772, 30.301000000000002], "label": "Using the rowing machine"}]}, "17ODXa4CEmI": {"duration": 112.339, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=17ODXa4CEmI", "annotations": [{"segment": [11.742051970358814, 101.82286857878314], "label": "Making a lemonade"}]}, "uOxKf-kNEyg": {"duration": 72.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uOxKf-kNEyg", "annotations": [{"segment": [12.242414976599065, 34.57348673946958], "label": "Polishing shoes"}]}, "RUv9YPd_0Zc": {"duration": 210.466, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=RUv9YPd_0Zc", "annotations": [{"segment": [54.50436980200297, 177.3033716450699], "label": "Changing car wheel"}]}, "flrXQtlMzTk": {"duration": 46.35, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=flrXQtlMzTk", "annotations": []}, "ULH_AqrP3to": {"duration": 50.852, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ULH_AqrP3to", "annotations": [{"segment": [4.704964110581668, 45.576368465721856], "label": "Mooping floor"}]}, "UjG_uHlFf3s": {"duration": 136.069, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UjG_uHlFf3s", "annotations": [{"segment": [35.22074570982839, 56.86157254290172], "label": "Sumo"}]}, "AGgJVF3nT6I": {"duration": 65.003, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AGgJVF3nT6I", "annotations": [{"segment": [0.2, 61.6220006317697], "label": "Fun sliding down"}]}, "QQfH0FDsYPc": {"duration": 11.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QQfH0FDsYPc", "annotations": [{"segment": [0.8298325366614664, 9.777592062402496], "label": "Hammer throw"}]}, "2Voht8wf3dQ": {"duration": 117.888, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2Voht8wf3dQ", "annotations": [{"segment": [0.01, 117.8877097505669], "label": "Sharpening knives"}]}, "l1aHbJlvI3U": {"duration": 120.141, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=l1aHbJlvI3U", "annotations": []}, "kWmf0_XSfBU": {"duration": 39.43, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kWmf0_XSfBU", "annotations": [{"segment": [13.409034439937596, 18.51430902028081], "label": "Ironing clothes"}]}, "ibWb6iRQiD4": {"duration": 182.277, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ibWb6iRQiD4", "annotations": [{"segment": [108.91100569192837, 145.59382484142904], "label": "Blow-drying hair"}, {"segment": [156.39961652887885, 166.3523193988984], "label": "Blow-drying hair"}]}, "cTZw5NUK1-c": {"duration": 155.783, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cTZw5NUK1-c", "annotations": [{"segment": [42.75317316692668, 58.78561310452418], "label": "Surfing"}, {"segment": [78.95471918876756, 88.18634789391577], "label": "Surfing"}]}, "q-FOdt9a7Js": {"duration": 148.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q-FOdt9a7Js", "annotations": [{"segment": [1.48449506, 144.25451170046802], "label": "Shoveling snow"}]}, "Imui46JTe7s": {"duration": 122.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Imui46JTe7s", "annotations": [{"segment": [23.08966926677067, 108.96212168486738], "label": "Doing crunches"}]}, "dyvB6XCWPZs": {"duration": 220.637, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dyvB6XCWPZs", "annotations": [{"segment": [24.094413490825346, 74.34847591454678], "label": "Rafting"}, {"segment": [100.50812484744287, 113.24374340687913], "label": "Rafting"}, {"segment": [148.35274565678625, 172.10295319812792], "label": "Rafting"}]}, "530VjKtBVKc": {"duration": 187.69, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=530VjKtBVKc", "annotations": []}, "DCKwTvFrA6Q": {"duration": 185.481, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DCKwTvFrA6Q", "annotations": [{"segment": [59.60855721684868, 111.69370429953197], "label": "Playing pool"}]}, "p5Ynl_rGoEU": {"duration": 180.838, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=p5Ynl_rGoEU", "annotations": [{"segment": [7.61715744149766, 161.3708909828393], "label": "Wakeboarding"}]}, "ZWEUIJcB6_Q": {"duration": 126.201, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZWEUIJcB6_Q", "annotations": []}, "FG1-LeVFscs": {"duration": 90.905, "subset": "testing", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=FG1-LeVFscs", "annotations": []}, "z2HRvo3APwU": {"duration": 85.984, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z2HRvo3APwU", "annotations": []}, "D0fHzHqABqk": {"duration": 8.55, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D0fHzHqABqk", "annotations": []}, "rvcSqYeUZ9s": {"duration": 51.688, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rvcSqYeUZ9s", "annotations": [{"segment": [12.244461804838567, 44.22146803536816], "label": "Applying sunscreen"}]}, "2U_BUV8PLtk": {"duration": 89.397, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2U_BUV8PLtk", "annotations": [{"segment": [8.341847837826162, 80.91594121914886], "label": "Assembling bicycle"}]}, "0w7cO4tscBc": {"duration": 98.8, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=0w7cO4tscBc", "annotations": [{"segment": [25.790819032761313, 32.25777067082683], "label": "Grooming horse"}, {"segment": [72.90718096723869, 94.00176131045241], "label": "Grooming horse"}]}, "5MpJCKOxRVs": {"duration": 60.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5MpJCKOxRVs", "annotations": [{"segment": [9.083385210608425, 55.635734414976596], "label": "Fixing bicycle"}]}, "AUSqTMtjD0Y": {"duration": 44.68, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=AUSqTMtjD0Y", "annotations": [{"segment": [0.22774103891881306, 43.99829421493127], "label": "Shoveling snow"}]}, "KFk4ZpyYeuE": {"duration": 96.53, "subset": "training", "resolution": "400x300", "url": "https://www.youtube.com/watch?v=KFk4ZpyYeuE", "annotations": [{"segment": [2.55914976599064, 93.93585023400937], "label": "Doing kickboxing"}]}, "7jS5kDQ8cQU": {"duration": 211.98, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7jS5kDQ8cQU", "annotations": []}, "4w3QwJdzwsE": {"duration": 69.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4w3QwJdzwsE", "annotations": [{"segment": [0, 69.659229], "label": "Sailing"}]}, "hDpYoAAFwOA": {"duration": 47.79, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hDpYoAAFwOA", "annotations": [{"segment": [2.6838064586583465, 29.44732086583463], "label": "Doing motocross"}]}, "9029wVyGkvg": {"duration": 112.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9029wVyGkvg", "annotations": [{"segment": [54.281568, 105.59006728861155], "label": "Layup drill in basketball"}, {"segment": [15.908437290171609, 49.1898672], "label": "Layup drill in basketball"}]}, "bXT72HACH4I": {"duration": 201.851, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=bXT72HACH4I", "annotations": [{"segment": [11.000521806853582, 181.99809813084113], "label": "Rope skipping"}]}, "vCAGiXqYXBk": {"duration": 93.67, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vCAGiXqYXBk", "annotations": [{"segment": [3.8685452418096724, 88.39260920436818], "label": "Playing accordion"}]}, "6aj1njn541k": {"duration": 196.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6aj1njn541k", "annotations": [{"segment": [79.96695755993505, 85.4819201502754], "label": "Getting a piercing"}, {"segment": [96.20545852038164, 107.5417705116368], "label": "Getting a piercing"}]}, "AcLZk6JyXUM": {"duration": 225.745, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AcLZk6JyXUM", "annotations": [{"segment": [0.01, 225.743037], "label": "Kneeling"}]}, "ej9TkJVjK9M": {"duration": 42.42, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=ej9TkJVjK9M", "annotations": []}, "YH536Kl_lTo": {"duration": 180.05, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=YH536Kl_lTo", "annotations": []}, "YGVIW3fNihQ": {"duration": 25.659, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YGVIW3fNihQ", "annotations": []}, "oUTfQ4c5iuQ": {"duration": 187.641, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oUTfQ4c5iuQ", "annotations": []}, "RrKCACSu9xU": {"duration": 49.018, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RrKCACSu9xU", "annotations": [{"segment": [0.840204368174727, 40.57552418096724], "label": "Swinging at the playground"}]}, "bmf7otW5TQo": {"duration": 84.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bmf7otW5TQo", "annotations": [{"segment": [16.66275663026521, 28.208603744149766], "label": "Using the pommel horse"}, {"segment": [49.988269890795635, 78.08423556942277], "label": "Using the pommel horse"}]}, "ke3R1rOeQzE": {"duration": 80.25, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ke3R1rOeQzE", "annotations": [{"segment": [0.010000000000000002, 80.248162], "label": "Volleyball"}]}, "McFsP60TqH4": {"duration": 62.88, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=McFsP60TqH4", "annotations": [{"segment": [0.2942884735797595, 30.802192985959437], "label": "Brushing hair"}]}, "ot7hBY4lQ2c": {"duration": 164.304, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ot7hBY4lQ2c", "annotations": [{"segment": [0.01, 164.30439909297053], "label": "Capoeira"}]}, "lz2FMoj9tpo": {"duration": 69.13, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=lz2FMoj9tpo", "annotations": [{"segment": [3.1273765127475848, 50.361545912176624], "label": "Discus throw"}]}, "eudqfH7Vz_c": {"duration": 175.334, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eudqfH7Vz_c", "annotations": [{"segment": [50.056317173166924, 55.800484717628706], "label": "Croquet"}, {"segment": [72.75945556318253, 77.95655953198128], "label": "Croquet"}]}, "0lbv0AE5gJQ": {"duration": 92.3, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=0lbv0AE5gJQ", "annotations": []}, "xww6n1FoJIM": {"duration": 154.97, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xww6n1FoJIM", "annotations": [{"segment": [15.714549792876069, 33.363198021798425], "label": "Hurling"}, {"segment": [42.308403288512494, 49.803034728191854], "label": "Hurling"}, {"segment": [58.74823999490592, 62.6164368669985], "label": "Hurling"}, {"segment": [74.46278978778199, 88.96852737597504], "label": "Hurling"}, {"segment": [99.12254408736351, 104.44131474570983], "label": "Hurling"}, {"segment": [111.45242152262091, 114.35356915444619], "label": "Hurling"}, {"segment": [121.36467593135727, 132.24397955070205], "label": "Hurling"}]}, "OBfVj8mCVUw": {"duration": 75.047, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OBfVj8mCVUw", "annotations": [{"segment": [0.2, 71.3062369470405], "label": "Knitting"}]}, "fCLnOf-YjEI": {"duration": 116.5, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fCLnOf-YjEI", "annotations": [{"segment": [1.1382217855178156, 92.0474897010454], "label": "Shoveling snow"}]}, "HzmT-lvHrpM": {"duration": 210.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HzmT-lvHrpM", "annotations": [{"segment": [53.30584864, 110.24934463999999], "label": "Fixing bicycle"}, {"segment": [135.9170728, 193.84091362948516], "label": "Fixing bicycle"}]}, "o_crN4ZgdmM": {"duration": 45.303, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=o_crN4ZgdmM", "annotations": []}, "cIN4HNSRoAg": {"duration": 66.595, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=cIN4HNSRoAg", "annotations": [{"segment": [0.2, 66.261], "label": "Table soccer"}]}, "LCLDhKiMAPA": {"duration": 215.226, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=LCLDhKiMAPA", "annotations": [{"segment": [0.2, 208.80278003120125], "label": "Table soccer"}]}, "EwMAJPldfKs": {"duration": 31.86, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EwMAJPldfKs", "annotations": [{"segment": [1.6898040312012481, 21.023150152886117], "label": "Washing hands"}]}, "fLCWIE8a4AU": {"duration": 70.565, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=fLCWIE8a4AU", "annotations": [{"segment": [0.19999999999999998, 55.38571450858035], "label": "Bullfighting"}]}, "fBxpQxIqGN8": {"duration": 164.51, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=fBxpQxIqGN8", "annotations": [{"segment": [1.2832556839688554, 145.77784569886197], "label": "Tai chi"}]}, "7BUr2cN8CLs": {"duration": 32.834, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7BUr2cN8CLs", "annotations": []}, "sxQ9H3c5bRM": {"duration": 129.71, "subset": "validation", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=sxQ9H3c5bRM", "annotations": [{"segment": [30.548878315132605, 117.13775195007801], "label": "Doing step aerobics"}]}, "D7tSpFiMXYM": {"duration": 53.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D7tSpFiMXYM", "annotations": []}, "_8Zk9dfBgPg": {"duration": 122.35, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_8Zk9dfBgPg", "annotations": [{"segment": [0.01, 101.44585858264263], "label": "Painting"}]}, "0rRk3C1yor8": {"duration": 180.63, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0rRk3C1yor8", "annotations": []}, "RUAWJc2OIJY": {"duration": 173.64, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RUAWJc2OIJY", "annotations": [{"segment": [32.2355580124805, 44.42547490795632], "label": "Breakdancing"}, {"segment": [118.91941149141967, 143.8410193666147], "label": "Breakdancing"}]}, "o4Q7F1KqmII": {"duration": 65.62, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o4Q7F1KqmII", "annotations": []}, "UvYRPf1xV3o": {"duration": 195.768, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UvYRPf1xV3o", "annotations": []}, "e4ZTZDDFtYY": {"duration": 204.383, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e4ZTZDDFtYY", "annotations": [{"segment": [131.36550390639627, 135.5105319422777], "label": "Rock-paper-scissors"}]}, "J3qC2hqRhPc": {"duration": 84.68, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=J3qC2hqRhPc", "annotations": [{"segment": [14.664274040349369, 17.43859615609114], "label": "Brushing teeth"}, {"segment": [20.609250002653166, 22.590908656754433], "label": "Brushing teeth"}, {"segment": [26.157894234136712, 31.045985580919837], "label": "Brushing teeth"}, {"segment": [34.216639096723874, 36.46251888299532], "label": "Brushing teeth"}, {"segment": [44.12493227145086, 49.40935529797192], "label": "Brushing teeth"}, {"segment": [52.58000911388455, 61.695638834633385], "label": "Brushing teeth"}, {"segment": [71.07548970670827, 73.18925891731669], "label": "Brushing teeth"}, {"segment": [79.53056654914197, 83.6259943946958], "label": "Brushing teeth"}]}, "KEWlDtEt2cQ": {"duration": 97.18, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=KEWlDtEt2cQ", "annotations": [{"segment": [8.641191907956319, 87.77631780187208], "label": "Tango"}]}, "zvglI3yW_Hs": {"duration": 199.39, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=zvglI3yW_Hs", "annotations": []}, "EXr5QXCpkYY": {"duration": 234.754, "subset": "validation", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=EXr5QXCpkYY", "annotations": [{"segment": [0, 234.721], "label": "Installing carpet"}]}, "85HxidMLf5Y": {"duration": 111.18, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=85HxidMLf5Y", "annotations": [{"segment": [10.666761578003122, 103.97924497581904], "label": "Polishing forniture"}]}, "QjoEYtjPa3w": {"duration": 131.124, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QjoEYtjPa3w", "annotations": [{"segment": [23.85908375795399, 39.96906549040851], "label": "Putting in contact lenses"}, {"segment": [62.80853326300225, 65.86739055397463], "label": "Putting in contact lenses"}]}, "cCqjsuJa2vk": {"duration": 49.923, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=cCqjsuJa2vk", "annotations": [{"segment": [0.01, 48.85544735087532], "label": "Baton twirling"}]}, "suL3ZeuQ3DA": {"duration": 24.13, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=suL3ZeuQ3DA", "annotations": [{"segment": [4.497661395475819, 20.98281362324493], "label": "Playing kickball"}]}, "5cNNxBAPxQI": {"duration": 22.94, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5cNNxBAPxQI", "annotations": []}, "s3HhNlysFDs": {"duration": 187.687, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s3HhNlysFDs", "annotations": [{"segment": [16.82319218580511, 169.39885521232617], "label": "Swimming"}]}, "g9b-kCC79kQ": {"duration": 133.655, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g9b-kCC79kQ", "annotations": [{"segment": [19.904673166926678, 23.0310616224649], "label": "Playing blackjack"}, {"segment": [42.206244149765986, 48.87587285491419], "label": "Playing blackjack"}, {"segment": [58.463464118564744, 80.34818330733229], "label": "Playing blackjack"}, {"segment": [81.80716458658345, 85.78009594383775], "label": "Playing blackjack"}, {"segment": [96.82832215288612, 99.74672152886116], "label": "Playing blackjack"}, {"segment": [115.17254680187207, 121.42625975039003], "label": "Playing blackjack"}]}, "ZVNRQ_MPZAs": {"duration": 206.66, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZVNRQ_MPZAs", "annotations": [{"segment": [39.65495876977936, 147.33590372186316], "label": "Doing motocross"}]}, "6SOluodeJ7s": {"duration": 17.067, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6SOluodeJ7s", "annotations": [{"segment": [9.953065580345301, 11.465930618336895], "label": "Sumo"}]}, "JTGuJPi88Wk": {"duration": 93.484, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JTGuJPi88Wk", "annotations": []}, "wUvC0TXK1PM": {"duration": 81.27, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wUvC0TXK1PM", "annotations": [{"segment": [8.238166926677067, 25.85517004680187], "label": "Cleaning windows"}, {"segment": [31.17829329173167, 76.04461778471139], "label": "Cleaning windows"}]}, "l9SPN6pgNow": {"duration": 204.99, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l9SPN6pgNow", "annotations": []}, "IaSMwUYheOs": {"duration": 170.156, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IaSMwUYheOs", "annotations": []}, "zqXJkZgKmSk": {"duration": 10.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zqXJkZgKmSk", "annotations": [{"segment": [3.12562368, 10.383773769110766], "label": "Playing kickball"}]}, "6iTymXuGFhg": {"duration": 121.603, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6iTymXuGFhg", "annotations": [{"segment": [3.4135694227769116, 107.54512792511701], "label": "Swinging at the playground"}]}, "CgWVpLVd16o": {"duration": 22.036, "subset": "validation", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=CgWVpLVd16o", "annotations": [{"segment": [1.6818237129485178, 11.94438065522621], "label": "Fun sliding down"}]}, "mCdKLZGnKxk": {"duration": 75.047, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mCdKLZGnKxk", "annotations": [{"segment": [46.47990034945398, 74.846892], "label": "Fixing the roof"}]}, "tKQHLe1UpwE": {"duration": 18.34, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tKQHLe1UpwE", "annotations": []}, "-yOwB5rvMAo": {"duration": 72.075, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-yOwB5rvMAo", "annotations": [{"segment": [0.01, 57.04578471138846], "label": "Knitting"}]}, "8HdfGmQaabM": {"duration": 231.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8HdfGmQaabM", "annotations": [{"segment": [22.132404822667, 25.38330697792162], "label": "Preparing pasta"}, {"segment": [129.41217594606965, 225.85560655195687], "label": "Preparing pasta"}]}, "ycA2gqWhPGk": {"duration": 18.53, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ycA2gqWhPGk", "annotations": [{"segment": [1.5898965834633385, 2.6016489547581902], "label": "Washing face"}]}, "wONwHYy59Tc": {"duration": 191.124, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wONwHYy59Tc", "annotations": [{"segment": [94.52138901727746, 130.48707650118757], "label": "Blow-drying hair"}]}, "kl_JsmJ84PI": {"duration": 91.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kl_JsmJ84PI", "annotations": [{"segment": [1.1411764539668412, 4.99264698610493], "label": "Futsal"}]}, "vT6bb2y34UE": {"duration": 57.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vT6bb2y34UE", "annotations": [{"segment": [15.225298586583463, 55.67594394383776], "label": "Wrapping presents"}]}, "3eF0whKP6EY": {"duration": 234.986, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3eF0whKP6EY", "annotations": []}, "KrwFx7kw2pY": {"duration": 223.66, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KrwFx7kw2pY", "annotations": []}, "3jReEwvx48A": {"duration": 81.57, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=3jReEwvx48A", "annotations": []}, "a2Y7AK4VD0o": {"duration": 30.19, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=a2Y7AK4VD0o", "annotations": [{"segment": [0, 30.18594], "label": "Playing accordion"}]}, "FzdLe1mjk7c": {"duration": 48.228, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=FzdLe1mjk7c", "annotations": [{"segment": [13.95082153670529, 36.90217497994012], "label": "Table soccer"}]}, "5cTnHXcD7DE": {"duration": 215.946, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5cTnHXcD7DE", "annotations": [{"segment": [6.3977831513260535, 14.815918876755072], "label": "Running a marathon"}, {"segment": [35.019444617784714, 69.36543837753511], "label": "Running a marathon"}, {"segment": [84.18135725429018, 91.92604212168487], "label": "Running a marathon"}, {"segment": [107.0786864274571, 113.8131950078003], "label": "Running a marathon"}, {"segment": [131.32291731669267, 136.71052418096724], "label": "Running a marathon"}, {"segment": [156.24059906396258, 169.38858502340094], "label": "Running a marathon"}]}, "rWDMssiL7hE": {"duration": 138.95, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rWDMssiL7hE", "annotations": [{"segment": [0.9751287004680188, 3.1420813681747273], "label": "Brushing teeth"}, {"segment": [8.99285357098284, 11.15980623868955], "label": "Brushing teeth"}, {"segment": [28.495427580343215, 33.47941871606864], "label": "Brushing teeth"}, {"segment": [39.54688618564743, 51.89851639157566], "label": "Brushing teeth"}, {"segment": [56.882507527301094, 71.83448093447737], "label": "Brushing teeth"}, {"segment": [79.41881527145085, 83.10263480655227], "label": "Brushing teeth"}, {"segment": [95.45426501248049, 99.35477981435258], "label": "Brushing teeth"}]}, "Z-zG845frso": {"duration": 184.135, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z-zG845frso", "annotations": []}, "c4ctwOucndQ": {"duration": 117.865, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=c4ctwOucndQ", "annotations": [{"segment": [3.861395141519947, 112.16433335413416], "label": "Using the rowing machine"}]}, "4BGapLta7Pk": {"duration": 6.83, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=4BGapLta7Pk", "annotations": []}, "MrVj3D-DuJI": {"duration": 81.967, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MrVj3D-DuJI", "annotations": [{"segment": [14.334261386765245, 79.3201129874475], "label": "Hula hoop"}]}, "ymLLzMVY6V4": {"duration": 101.57, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ymLLzMVY6V4", "annotations": []}, "wbXUEc4sBAo": {"duration": 88.35, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=wbXUEc4sBAo", "annotations": []}, "9PuaRdU6ViM": {"duration": 218.849, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9PuaRdU6ViM", "annotations": [{"segment": [17.385127259626593, 213.39391499071073], "label": "Making a lemonade"}]}, "Ty0BvWyYPVA": {"duration": 17.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ty0BvWyYPVA", "annotations": [{"segment": [0.009999999999999998, 14.385317570213543], "label": "Shuffleboard"}]}, "UZ-Twd5hujM": {"duration": 112.014, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UZ-Twd5hujM", "annotations": []}, "BAgUUUOGaxI": {"duration": 165.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BAgUUUOGaxI", "annotations": [{"segment": [0, 56.58446430000001], "label": "Playing accordion"}, {"segment": [67.58585190000001, 160.37386895475817], "label": "Playing accordion"}]}, "6YvF2YHee5I": {"duration": 85.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6YvF2YHee5I", "annotations": [{"segment": [3.338078510140406, 49.21785990000001], "label": "Shoveling snow"}, {"segment": [53.43024555, 81.4491156474259], "label": "Shoveling snow"}]}, "cd_-gvb82Bs": {"duration": 183.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cd_-gvb82Bs", "annotations": [{"segment": [20.93876627013542, 80.09207935424757], "label": "Cleaning windows"}, {"segment": [134.97363302178545, 173.5560862928135], "label": "Cleaning windows"}]}, "gFv3PrFkeL0": {"duration": 67.71, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gFv3PrFkeL0", "annotations": [{"segment": [12.253180907332293, 13.309489606240248], "label": "Ironing clothes"}, {"segment": [51.23097189703588, 58.62513278939157], "label": "Ironing clothes"}]}, "nMK_jVy5ZuI": {"duration": 183.53, "subset": "training", "resolution": "2276x720", "url": "https://www.youtube.com/watch?v=nMK_jVy5ZuI", "annotations": [{"segment": [38.4605468, 114.62549680000001], "label": "Long jump"}, {"segment": [22.5796959, 160.972113299532], "label": "Triple jump"}]}, "N5x5VUK7Kx8": {"duration": 71.657, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=N5x5VUK7Kx8", "annotations": [{"segment": [0, 71.621], "label": "Kite flying"}]}, "EWlpD9yoALg": {"duration": 25.635, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EWlpD9yoALg", "annotations": []}, "fcSJAHXHDvM": {"duration": 73.33, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=fcSJAHXHDvM", "annotations": [{"segment": [10.982132934296963, 55.82584182215288], "label": "Tennis serve with ball bouncing"}]}, "3ohvA6Raf4w": {"duration": 231.32, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=3ohvA6Raf4w", "annotations": [{"segment": [11.15241804, 62.82312966], "label": "Cumbia"}, {"segment": [74.61157320000001, 139.00973328], "label": "Cumbia"}]}, "UQFMy9Tz8dY": {"duration": 210.81, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UQFMy9Tz8dY", "annotations": []}, "IRmk1AM-yJE": {"duration": 158.639, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IRmk1AM-yJE", "annotations": [{"segment": [0.2, 153.4496116962617], "label": "Playing pool"}]}, "xslLJNiCmKg": {"duration": 148.051, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=xslLJNiCmKg", "annotations": []}, "-PpuDhbMdoY": {"duration": 56.17, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-PpuDhbMdoY", "annotations": []}, "xywGFiZ0hqs": {"duration": 89.722, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xywGFiZ0hqs", "annotations": [{"segment": [31.843577743109726, 49.20007726023327], "label": "Putting in contact lenses"}]}, "LIJBolW8k5o": {"duration": 131.26, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LIJBolW8k5o", "annotations": [{"segment": [8.086839313572543, 36.74905460218409], "label": "Windsurfing"}, {"segment": [40.43419656786271, 108.81405304212169], "label": "Windsurfing"}]}, "OqajYZVt1Wc": {"duration": 75.698, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=OqajYZVt1Wc", "annotations": [{"segment": [0.2, 74.2445694227769], "label": "Knitting"}]}, "UGd9cVzHQOU": {"duration": 34.134, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UGd9cVzHQOU", "annotations": [{"segment": [3.4313798751950078, 29.392827613104522], "label": "Blowing leaves"}]}, "-qGmUrF_7v4": {"duration": 204.963, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-qGmUrF_7v4", "annotations": [{"segment": [11.899552935936185, 17.64616619806347], "label": "Hitting a pinata"}, {"segment": [33.289724522743285, 37.75931261550895], "label": "Hitting a pinata"}, {"segment": [46.059976216359466, 50.52956430912513], "label": "Hitting a pinata"}, {"segment": [55.956921278912006, 63.619072295081715], "label": "Hitting a pinata"}, {"segment": [70.0041981418898, 76.7085802810383], "label": "Hitting a pinata"}, {"segment": [109.91123468444037, 118.8504108699717], "label": "Hitting a pinata"}, {"segment": [125.23553671677978, 131.30140627124746], "label": "Hitting a pinata"}]}, "nqmw4BCi6io": {"duration": 153.624, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=nqmw4BCi6io", "annotations": [{"segment": [38.82521315546499, 44.81675839550589], "label": "Mooping floor"}, {"segment": [57.51883430439259, 60.874099638815494], "label": "Mooping floor"}, {"segment": [102.57525450950013, 107.84781432073609], "label": "Mooping floor"}]}, "ksvK_P-Eas4": {"duration": 238.32999999999998, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ksvK_P-Eas4", "annotations": [{"segment": [20.447043681747267, 228.6351248049922], "label": "Belly dance"}]}, "gaILpaBa7M8": {"duration": 79.41, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gaILpaBa7M8", "annotations": [{"segment": [14.608631520000005, 79.412266], "label": "Ping-pong"}]}, "zxr6UZKPDh4": {"duration": 238.07, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zxr6UZKPDh4", "annotations": []}, "5HLW2AI1Ink": {"duration": 146.866, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5HLW2AI1Ink", "annotations": [{"segment": [3.0644885109518176, 139.59429692216676], "label": "Swimming"}]}, "VuR8vLoumyE": {"duration": 216.735, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VuR8vLoumyE", "annotations": []}, "twM3ZBS1irA": {"duration": 55.61, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=twM3ZBS1irA", "annotations": []}, "m0_F34CgAco": {"duration": 122.439, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m0_F34CgAco", "annotations": [{"segment": [0.2, 111.27114352574102], "label": "Baking cookies"}]}, "UNKf15ALDh8": {"duration": 218.547, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UNKf15ALDh8", "annotations": []}, "0xJPQ1I8-e0": {"duration": 27.981, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0xJPQ1I8-e0", "annotations": [{"segment": [0.01, 27.718141302652107], "label": "Canoeing"}]}, "_gF1wR7uHIA": {"duration": 34.65, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_gF1wR7uHIA", "annotations": []}, "3OXh6OV2Zrc": {"duration": 176.73, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3OXh6OV2Zrc", "annotations": [{"segment": [111.66063114664585, 159.63334675039002], "label": "Bungee jumping"}]}, "VYjsuhjRu6A": {"duration": 53.13, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=VYjsuhjRu6A", "annotations": []}, "S_ONutgLuV8": {"duration": 97.616, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=S_ONutgLuV8", "annotations": [{"segment": [16.7516122413604, 81.62603770358814], "label": "Surfing"}]}, "Sw7iRtHxMSs": {"duration": 90.791, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Sw7iRtHxMSs", "annotations": [{"segment": [0.01, 90.79004535147392], "label": "Beer pong"}]}, "6S9NCdsUxfE": {"duration": 49.97, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6S9NCdsUxfE", "annotations": [{"segment": [6.230390015600624, 16.822053042121684], "label": "Waxing skis"}, {"segment": [25.77823868954758, 44.39152886115445], "label": "Waxing skis"}]}, "HcPhIfhQLmA": {"duration": 31.858, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=HcPhIfhQLmA", "annotations": [{"segment": [15.407037614837925, 27.136264736349453], "label": "Doing a powerbomb"}]}, "BcMHGhxdMl4": {"duration": 42.029, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BcMHGhxdMl4", "annotations": [{"segment": [0.01, 40.22503953219353], "label": "Riding bumper cars"}]}, "C27eI-S1Vkw": {"duration": 181.975, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C27eI-S1Vkw", "annotations": []}, "BCRFFkvfB_Q": {"duration": 24.59, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BCRFFkvfB_Q", "annotations": [{"segment": [15.721252362961339, 24.541], "label": "Long jump"}]}, "Ynmg-Br3m_4": {"duration": 219.684, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ynmg-Br3m_4", "annotations": []}, "aKJKZHvP53E": {"duration": 142.548, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aKJKZHvP53E", "annotations": [{"segment": [0.01, 133.75466314253882], "label": "Playing blackjack"}]}, "KrLVIvCoW90": {"duration": 213.693, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KrLVIvCoW90", "annotations": [{"segment": [37.671307658346336, 210.35924915363964], "label": "Longboarding"}]}, "MVVhOW7t3u0": {"duration": 34.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MVVhOW7t3u0", "annotations": [{"segment": [24.35914509578783, 33.656943904967086], "label": "Javelin throw"}]}, "dSob6koudZY": {"duration": 40.71, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dSob6koudZY", "annotations": []}, "2Ot4ZPYpNwI": {"duration": 207.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2Ot4ZPYpNwI", "annotations": [{"segment": [2.2661733712948515, 198.77577856786272], "label": "Playing harmonica"}]}, "whcxrkF2hws": {"duration": 205.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=whcxrkF2hws", "annotations": [{"segment": [21.207004680187207, 202.1091809672387], "label": "Volleyball"}]}, "0wXKWpGzMJQ": {"duration": 72.772, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0wXKWpGzMJQ", "annotations": []}, "weKPXw4nxKA": {"duration": 141.967, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=weKPXw4nxKA", "annotations": [{"segment": [12.070500387362435, 128.78891541575663], "label": "Playing congas"}]}, "t-8wEopB3AQ": {"duration": 194.723, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=t-8wEopB3AQ", "annotations": [{"segment": [25.821199514647255, 30.985439417576703], "label": "Scuba diving"}, {"segment": [49.21216848673947, 57.7179753856821], "label": "Scuba diving"}, {"segment": [74.72958909828394, 106.32258611544461], "label": "Scuba diving"}, {"segment": [112.7019412823713, 131.53622796567862], "label": "Scuba diving"}]}, "9YnYdsmjkIY": {"duration": 115.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9YnYdsmjkIY", "annotations": [{"segment": [106.32406260218409, 110.27529465834634], "label": "Playing harmonica"}]}, "r2H-cnti8GI": {"duration": 121.371, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=r2H-cnti8GI", "annotations": []}, "lCuMD23jOxo": {"duration": 157.8, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lCuMD23jOxo", "annotations": [{"segment": [10.958385335413416, 15.144734789391576], "label": "Smoking hookah"}, {"segment": [62.67211388455538, 70.05978939157566], "label": "Smoking hookah"}]}, "TxLBvNWVk-4": {"duration": 60.813, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TxLBvNWVk-4", "annotations": [{"segment": [0.01, 59.240635228650106], "label": "Playing ten pins"}]}, "0tJ7eW6TKKM": {"duration": 159.243, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0tJ7eW6TKKM", "annotations": [{"segment": [0.2, 123.22038317628706], "label": "Running a marathon"}]}, "ie9PgZt9svA": {"duration": 207.053, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ie9PgZt9svA", "annotations": [{"segment": [0.01, 200.9150796474259], "label": "Rope skipping"}]}, "PB5RsoruLmE": {"duration": 89.7, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PB5RsoruLmE", "annotations": []}, "J9Sl_H-1BOA": {"duration": 15.14, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=J9Sl_H-1BOA", "annotations": [{"segment": [0.6849188783151325, 14.52500379875195], "label": "Playing piano"}]}, "YAjqqbSsFTg": {"duration": 148.097, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YAjqqbSsFTg", "annotations": [{"segment": [88.85043427281349, 134.2944896270424], "label": "Spread mulch"}]}, "AHd87Cn-NkU": {"duration": 83.616, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AHd87Cn-NkU", "annotations": [{"segment": [42.86803276131046, 75.05163182527302], "label": "Cleaning sink"}]}, "4IC1_C_dtvk": {"duration": 62.555, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4IC1_C_dtvk", "annotations": [{"segment": [0.2, 62.554307], "label": "Playing congas"}]}, "rIh_pfcVySw": {"duration": 51.108, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rIh_pfcVySw", "annotations": []}, "rI-Iq496Sgk": {"duration": 62.764, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rI-Iq496Sgk", "annotations": [{"segment": [0.01, 48.27211192043682], "label": "Trimming branches or hedges"}, {"segment": [52.580373430577225, 61.588556588143526], "label": "Trimming branches or hedges"}]}, "n6A8YtBAJX0": {"duration": 179.816, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=n6A8YtBAJX0", "annotations": [{"segment": [14.58441809672387, 64.50800312012481], "label": "Shuffleboard"}, {"segment": [82.45805616224649, 179.781], "label": "Shuffleboard"}]}, "FA9n39rLSpU": {"duration": 85.98, "subset": "testing", "resolution": "256x144", "url": "https://www.youtube.com/watch?v=FA9n39rLSpU", "annotations": []}, "V1AqyQ3qaQY": {"duration": 197.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V1AqyQ3qaQY", "annotations": [{"segment": [11.438481278834855, 14.814228003844502], "label": "Archery"}, {"segment": [32.6136203402931, 39.978886134685624], "label": "Archery"}, {"segment": [58.69893669543329, 60.84713921879778], "label": "Archery"}, {"segment": [66.98486071412488, 72.20192398515292], "label": "Archery"}, {"segment": [91.84263277019964, 93.99083529356412], "label": "Archery"}, {"segment": [101.96987323748937, 103.50430361132113], "label": "Archery"}, {"segment": [115.16597445244263, 118.54172127487254], "label": "Archery"}, {"segment": [129.28273389169496, 132.04470856459218], "label": "Archery"}, {"segment": [147.6958983776763, 151.6854173496389], "label": "Archery"}, {"segment": [150.15098697580711, 151.6854173496389], "label": "Archery"}, {"segment": [159.35756921879778, 167.64349323748937], "label": "Archery"}, {"segment": [173.47432865805013, 177.4638476300127], "label": "Archery"}]}, "0-zXxER0-g0": {"duration": 105.024, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0-zXxER0-g0", "annotations": []}, "rb-PUa4uGLQ": {"duration": 52.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rb-PUa4uGLQ", "annotations": [{"segment": [0, 52.63963718820862], "label": "Playing accordion"}]}, "0H_9zlnmW4U": {"duration": 230.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0H_9zlnmW4U", "annotations": [{"segment": [144.25717768, 220.954257350234], "label": "Tai chi"}, {"segment": [14.538826031981278, 131.49732903999998], "label": "Tai chi"}]}, "eFbZ0_TJLE0": {"duration": 113.383, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eFbZ0_TJLE0", "annotations": [{"segment": [0.01, 113.321], "label": "Sumo"}]}, "Pmt3R5olRP0": {"duration": 32.69, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Pmt3R5olRP0", "annotations": [{"segment": [9.84381144305772, 14.434189836193447], "label": "Drinking coffee"}, {"segment": [27.848295585023397, 29.225409102964118], "label": "Drinking coffee"}]}, "W06Ojnv-Wxg": {"duration": 44.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W06Ojnv-Wxg", "annotations": [{"segment": [0.9640754773048066, 20.943138859999998], "label": "Using parallel bars"}, {"segment": [23.36780399, 41.248656656786274], "label": "Using parallel bars"}]}, "RNxHKDlWAGQ": {"duration": 192.679, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=RNxHKDlWAGQ", "annotations": [{"segment": [10.520704256741698, 183.96202712012482], "label": "BMX"}]}, "r8qq4rU0tPE": {"duration": 180.814, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r8qq4rU0tPE", "annotations": [{"segment": [149.91004142657883, 157.23271807766918], "label": "Skiing"}]}, "_rDRANqiA-0": {"duration": 129.986, "subset": "testing", "resolution": "423x240", "url": "https://www.youtube.com/watch?v=_rDRANqiA-0", "annotations": []}, "mXqmRAUsn9U": {"duration": 192.842, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mXqmRAUsn9U", "annotations": []}, "jYA_3BMF2d4": {"duration": 186.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jYA_3BMF2d4", "annotations": [{"segment": [18.334742589703588, 175.4896790728772], "label": "Tai chi"}]}, "LxLvhGHwca8": {"duration": 154.901, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LxLvhGHwca8", "annotations": [{"segment": [0.01, 135.35327884185648], "label": "Powerbocking"}]}, "9RAW6QibWRs": {"duration": 47.02, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9RAW6QibWRs", "annotations": [{"segment": [0, 47.02040816326531], "label": "Hand washing clothes"}]}, "Zfak-LkgQfQ": {"duration": 132.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zfak-LkgQfQ", "annotations": [{"segment": [15.856474258970358, 114.4155516], "label": "Kayaking"}]}, "BMy7CBVxH5M": {"duration": 16.49, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BMy7CBVxH5M", "annotations": []}, "gitnWhi2idI": {"duration": 62.9, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gitnWhi2idI", "annotations": []}, "byiKkWjH-nw": {"duration": 105.84, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=byiKkWjH-nw", "annotations": []}, "BOckSaGr-uI": {"duration": 135.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BOckSaGr-uI", "annotations": [{"segment": [92.72958894596617, 123.288204544722], "label": "Painting"}]}, "bmc11KqVbEE": {"duration": 97.99000000000001, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=bmc11KqVbEE", "annotations": [{"segment": [41.00895788, 96.33731733697348], "label": "Cleaning windows"}, {"segment": [8.938513979719188, 40.56710190795632], "label": "Cleaning windows"}]}, "6iBXtHrJ4gc": {"duration": 145.473, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6iBXtHrJ4gc", "annotations": [{"segment": [95.9982292698908, 111.88446106396258], "label": "Trimming branches or hedges"}, {"segment": [134.80602408112324, 138.89105511388456], "label": "Trimming branches or hedges"}]}, "kD_EJjQCLoY": {"duration": 192.169, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kD_EJjQCLoY", "annotations": []}, "pu-2w-UxdYg": {"duration": 135.721, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pu-2w-UxdYg", "annotations": [{"segment": [3.4669984615151592, 132.84522169516], "label": "Painting furniture"}]}, "m6H1tLAkyjQ": {"duration": 24.056, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=m6H1tLAkyjQ", "annotations": [{"segment": [0.2, 23.855873015873016], "label": "Surfing"}]}, "BiZF6o-AU64": {"duration": 140.899, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=BiZF6o-AU64", "annotations": [{"segment": [7.473562779245864, 129.24867261154446], "label": "Playing pool"}]}, "EpLRCz5dwb0": {"duration": 32.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EpLRCz5dwb0", "annotations": [{"segment": [3.2576287051482056, 26.881653666146647], "label": "Javelin throw"}]}, "DR2sW3Wd8fo": {"duration": 179.4, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DR2sW3Wd8fo", "annotations": [{"segment": [17.352009862707433, 24.068916906336113], "label": "Ballet"}, {"segment": [27.707241554968324, 36.38324648632204], "label": "Ballet"}, {"segment": [39.18195775450066, 43.10015352995072], "label": "Ballet"}, {"segment": [55.974225363572366, 74.72559086036911], "label": "Ballet"}, {"segment": [78.92365776263703, 90.95811621580508], "label": "Ballet"}, {"segment": [98.51463663988737, 108.86986833214824], "label": "Ballet"}, {"segment": [139.9355634089309, 151.97002186209897], "label": "Ballet"}]}, "lgwDuuJy2zY": {"duration": 151.91, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lgwDuuJy2zY", "annotations": [{"segment": [0.2, 9.71622882330259], "label": "Kayaking"}, {"segment": [91.23775846271946, 138.16003396099845], "label": "Kayaking"}]}, "2belnHaa36g": {"duration": 22.71, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2belnHaa36g", "annotations": [{"segment": [0.2, 18.77221684867395], "label": "Playing drums"}]}, "PFn7a6eEhb4": {"duration": 111.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PFn7a6eEhb4", "annotations": [{"segment": [13.841943256663384, 19.246228303392357], "label": "Bullfighting"}, {"segment": [25.34784045292507, 28.48581241554189], "label": "Bullfighting"}, {"segment": [41.21203204171012, 46.441985312738154], "label": "Bullfighting"}, {"segment": [76.9500460604017, 79.21635914451386], "label": "Bullfighting"}, {"segment": [82.17999933142974, 86.18963017255125], "label": "Bullfighting"}, {"segment": [91.24525166787835, 93.51156475199049], "label": "Bullfighting"}]}, "etfSrbmOvUE": {"duration": 188.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=etfSrbmOvUE", "annotations": [{"segment": [7.546480000000002, 153.97815912636506], "label": "Rock climbing"}]}, "67NwPB79MTo": {"duration": 34.44, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=67NwPB79MTo", "annotations": [{"segment": [3.0909097200000004, 28.33368577], "label": "Dodgeball"}]}, "xckyFTyrkmw": {"duration": 172.525, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xckyFTyrkmw", "annotations": []}, "4T8uFygBeNo": {"duration": 221.959, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4T8uFygBeNo", "annotations": [{"segment": [26.26865420560748, 210.8595171339564], "label": "Longboarding"}]}, "-voGnJbk3CI": {"duration": 77.07, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-voGnJbk3CI", "annotations": [{"segment": [13.758041341653666, 25.9464976], "label": "Plataform diving"}, {"segment": [29.553344199999998, 38.6406887675507], "label": "Plataform diving"}, {"segment": [41.64540795631825, 48.37597893915757], "label": "Plataform diving"}, {"segment": [67.36580421216848, 70.61090093603744], "label": "Plataform diving"}]}, "4BRCZaaITn0": {"duration": 25.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4BRCZaaITn0", "annotations": [{"segment": [0.24016893603744152, 25.337822751950082], "label": "Mowing the lawn"}]}, "3LyyqeVeYkI": {"duration": 16.626, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3LyyqeVeYkI", "annotations": [{"segment": [0.9855982340093603, 13.175892180967239], "label": "Calf roping"}]}, "12qA1Z4EsUI": {"duration": 209.26, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=12qA1Z4EsUI", "annotations": []}, "f1HMhfSlPSw": {"duration": 202.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f1HMhfSlPSw", "annotations": [{"segment": [10.40246139641504, 193.5488271939041], "label": "Tumbling"}]}, "t9UsqWspqZM": {"duration": 82.76, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t9UsqWspqZM", "annotations": [{"segment": [1.5813276169797714, 79.66376686931622], "label": "Playing violin"}]}, "WxefBZ7Iaow": {"duration": 151.58, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WxefBZ7Iaow", "annotations": [{"segment": [67.13717098413524, 128.58274951767658], "label": "Rafting"}, {"segment": [24.1252660106563, 28.851848974774867], "label": "Rafting"}, {"segment": [33.3421027906875, 58.86565079692775], "label": "Rafting"}]}, "2KsL5MBiZcc": {"duration": 32.14, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2KsL5MBiZcc", "annotations": [{"segment": [0.8748179836193447, 31.868369403276127], "label": "Playing field hockey"}]}, "6DLfwAf8new": {"duration": 212.93, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6DLfwAf8new", "annotations": []}, "E7rhlhVA0SY": {"duration": 234.011, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=E7rhlhVA0SY", "annotations": [{"segment": [60.413304891379326, 106.77275940017194], "label": "Making a cake"}]}, "hC8ZZZhabp4": {"duration": 226.743, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hC8ZZZhabp4", "annotations": []}, "c4f6d1GRDEw": {"duration": 179.05, "subset": "training", "resolution": "423x240", "url": "https://www.youtube.com/watch?v=c4f6d1GRDEw", "annotations": [{"segment": [66.88102886115445, 134.73944227769113], "label": "Waxing skis"}]}, "0k6GFx2ZCg8": {"duration": 48.95, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0k6GFx2ZCg8", "annotations": []}, "A0F6uT95xhA": {"duration": 76.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A0F6uT95xhA", "annotations": [{"segment": [16.80498361934477, 75.41311154446178], "label": "Breakdancing"}]}, "h61C_KfvG0Y": {"duration": 125.992, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h61C_KfvG0Y", "annotations": [{"segment": [17.689910405616224, 113.41198115600623], "label": "Fixing the roof"}]}, "GqnJBjH_b24": {"duration": 220.22, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=GqnJBjH_b24", "annotations": []}, "UgSLUt8X1Lc": {"duration": 65.992, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UgSLUt8X1Lc", "annotations": [{"segment": [34.78130733229329, 53.71551014040561], "label": "Mooping floor"}]}, "RXT17X7lRoQ": {"duration": 123.6, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RXT17X7lRoQ", "annotations": []}, "xld0M1MXNiE": {"duration": 6.59, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=xld0M1MXNiE", "annotations": [{"segment": [1.0319610000000001, 2.8530919200000002], "label": "Smoking hookah"}]}, "rYumFn7S1YA": {"duration": 70.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rYumFn7S1YA", "annotations": [{"segment": [12.975108899999999, 61.062311205148205], "label": "Using the balance beam"}]}, "jVM8v6uJx8c": {"duration": 85.96000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jVM8v6uJx8c", "annotations": [{"segment": [25.211437061564094, 50.42287347269891], "label": "Rock climbing"}, {"segment": [54.98238862714509, 71.07479505460219], "label": "Rock climbing"}]}, "b3oobKr2RQ4": {"duration": 212.277, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b3oobKr2RQ4", "annotations": []}, "O0hvsRrzHNI": {"duration": 170.435, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O0hvsRrzHNI", "annotations": []}, "VWAIjYs9Lws": {"duration": 64.018, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VWAIjYs9Lws", "annotations": []}, "PCSlAOYPMOs": {"duration": 88.51, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PCSlAOYPMOs", "annotations": [{"segment": [1.2427928143525742, 64.76331443681747], "label": "Playing badminton"}]}, "MhWIIdVyWVs": {"duration": 238.56, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=MhWIIdVyWVs", "annotations": [{"segment": [17.119839890194246, 191.66777268369646], "label": "Tai chi"}]}, "OkEqcSlWMJg": {"duration": 213.368, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OkEqcSlWMJg", "annotations": [{"segment": [31.951816782875397, 47.262062324669856], "label": "Swimming"}, {"segment": [95.18978749898294, 116.49099868756655], "label": "Swimming"}, {"segment": [140.4548612747231, 144.78166958658346], "label": "Swimming"}, {"segment": [153.76811804368174, 155.43227516536663], "label": "Swimming"}, {"segment": [182.39162053666146, 189.04824902340093], "label": "Swimming"}, {"segment": [197.3690346318253, 201.36301172386896], "label": "Swimming"}]}, "4Sf9C_vtYIs": {"duration": 14.14, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4Sf9C_vtYIs", "annotations": [{"segment": [0.01, 13.41294686053042], "label": "Washing hands"}]}, "rlHxg7nDKgI": {"duration": 106.092, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rlHxg7nDKgI", "annotations": []}, "-FbWTFJ3kV0": {"duration": 49.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-FbWTFJ3kV0", "annotations": [{"segment": [0.01, 47.413979719188774], "label": "Sailing"}]}, "rYcac4QmSms": {"duration": 235.96, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rYcac4QmSms", "annotations": [{"segment": [146.48448985959436, 235.921], "label": "Bungee jumping"}]}, "ngMxE3CnGVg": {"duration": 216.364, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ngMxE3CnGVg", "annotations": []}, "AR6_PW1um-I": {"duration": 206.705, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AR6_PW1um-I", "annotations": [{"segment": [11.92895007800312, 196.34407020280813], "label": "Rollerblading"}]}, "G1hRHCymRGE": {"duration": 93.21000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G1hRHCymRGE", "annotations": [{"segment": [6.107015823490083, 87.97010805772231], "label": "Using the balance beam"}]}, "zEyFuCsJh3Y": {"duration": 61.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zEyFuCsJh3Y", "annotations": [{"segment": [0.1210646476798607, 55.19680185328734], "label": "Spinning"}]}, "U7MAQwyFeDQ": {"duration": 236.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U7MAQwyFeDQ", "annotations": [{"segment": [65.40456274033274, 73.87952016584065], "label": "Bungee jumping"}, {"segment": [77.19580785408287, 95.61962834431745], "label": "Bungee jumping"}, {"segment": [107.04239704826287, 121.41297703064585], "label": "Bungee jumping"}, {"segment": [134.67812778361474, 142.78460879931797], "label": "Bungee jumping"}, {"segment": [192.89739915678626, 205.79407340795632], "label": "Bungee jumping"}]}, "470dhR3Yrjs": {"duration": 180.28, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=470dhR3Yrjs", "annotations": [{"segment": [12.093647886739468, 175.77976579563182], "label": "Braiding hair"}]}, "z1vuU-3I52Y": {"duration": 167.718, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=z1vuU-3I52Y", "annotations": []}, "8XxsgEw49p0": {"duration": 190.03, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8XxsgEw49p0", "annotations": [{"segment": [35.2789715023401, 109.9873817425897], "label": "Painting"}]}, "GFE2wAi9fjo": {"duration": 17.113, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=GFE2wAi9fjo", "annotations": [{"segment": [0.01, 17.061], "label": "Putting on shoes"}]}, "7DO3wdJxP8Y": {"duration": 80.527, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=7DO3wdJxP8Y", "annotations": []}, "Ocvdvqvu7I8": {"duration": 114.61, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ocvdvqvu7I8", "annotations": [{"segment": [0.01, 65.4424665276131], "label": "Grooming horse"}]}, "EEaLID3z9tM": {"duration": 46.115, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EEaLID3z9tM", "annotations": [{"segment": [1.870476627145086, 38.344770856474256], "label": "Gargling mouthwash"}]}, "riYmZxbHN_0": {"duration": 213.0, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=riYmZxbHN_0", "annotations": []}, "K-t4tUTq_Ik": {"duration": 12.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K-t4tUTq_Ik", "annotations": [{"segment": [0.4619344773790951, 10.42792511700468], "label": "Playing badminton"}]}, "2zl0O1VRDC4": {"duration": 132.935, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2zl0O1VRDC4", "annotations": [{"segment": [3.1107814108482708, 62.008242789575526], "label": "Fixing the roof"}]}, "q4M7SsGjFro": {"duration": 168.972, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=q4M7SsGjFro", "annotations": [{"segment": [20.034075420904838, 158.16375332293293], "label": "Baton twirling"}]}, "uQ20ZKxr6iM": {"duration": 205.544, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uQ20ZKxr6iM", "annotations": []}, "-5Q7iNtaWCU": {"duration": 76.77, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-5Q7iNtaWCU", "annotations": [{"segment": [0.01, 58.81207722308891], "label": "Dodgeball"}]}, "9IKaAzeBkG8": {"duration": 58.422, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=9IKaAzeBkG8", "annotations": []}, "C7UEAqu1KtY": {"duration": 192.099, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=C7UEAqu1KtY", "annotations": [{"segment": [61.103954752925034, 88.62384883392191], "label": "Raking leaves"}]}, "xwSHzGCP6iA": {"duration": 79.13, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xwSHzGCP6iA", "annotations": [{"segment": [14.252808112324491, 37.08198127925117], "label": "Bungee jumping"}]}, "9Cs3CYCMH2w": {"duration": 67.129, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9Cs3CYCMH2w", "annotations": [{"segment": [0, 38.224718322066224], "label": "Rope skipping"}, {"segment": [44.50823307332294, 54.142956468018724], "label": "Rope skipping"}]}, "RYl-eG9hasI": {"duration": 201.549, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RYl-eG9hasI", "annotations": [{"segment": [40.24695533229329, 177.65257627145084], "label": "Baton twirling"}]}, "Db2Dnt2OOo0": {"duration": 128.22, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Db2Dnt2OOo0", "annotations": []}, "WFL8DhccHr0": {"duration": 237.332, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WFL8DhccHr0", "annotations": [{"segment": [33.32256041262058, 206.59987345085804], "label": "Making a cake"}]}, "haoaTAxA6n0": {"duration": 34.296, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=haoaTAxA6n0", "annotations": []}, "VmjCJxgPSiE": {"duration": 44.72, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=VmjCJxgPSiE", "annotations": [{"segment": [0.13947269890795633, 43.79442745709829], "label": "Starting a campfire"}]}, "BWqsgYhgUbI": {"duration": 70.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BWqsgYhgUbI", "annotations": [{"segment": [0.9396616, 57.34042327145087], "label": "Starting a campfire"}]}, "BgT03Dy-jD4": {"duration": 151.627, "subset": "testing", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=BgT03Dy-jD4", "annotations": []}, "sODu6d-3zAQ": {"duration": 89.885, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sODu6d-3zAQ", "annotations": [{"segment": [35.16474520611931, 49.14366268145677], "label": "Hula hoop"}]}, "a68k87VXX1c": {"duration": 126.71, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a68k87VXX1c", "annotations": [{"segment": [0, 126.711292], "label": "Cumbia"}]}, "DuDF7hkrjYo": {"duration": 148.77, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DuDF7hkrjYo", "annotations": [{"segment": [43.96045241809672, 144.64032761310452], "label": "Polishing forniture"}]}, "VufybWyvP98": {"duration": 95.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VufybWyvP98", "annotations": [{"segment": [12.437461778471137, 16.608107644305772], "label": "Playing water polo"}]}, "iAQY-FHckIM": {"duration": 75.837, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iAQY-FHckIM", "annotations": [{"segment": [9.199386907636377, 36.32578883800109], "label": "Putting in contact lenses"}, {"segment": [57.555147002853346, 58.852496668483205], "label": "Putting in contact lenses"}]}, "dWyE0o2NetQ": {"duration": 187.15, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dWyE0o2NetQ", "annotations": [{"segment": [22.621899375975037, 100.55799141965677], "label": "Tumbling"}]}, "xuvp0jKMqRU": {"duration": 80.736, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xuvp0jKMqRU", "annotations": []}, "j2ESEJmy7aA": {"duration": 75.071, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j2ESEJmy7aA", "annotations": [{"segment": [10.419109204368175, 13.93116848673947], "label": "Grooming dog"}, {"segment": [20.13580655226209, 22.594248049921994], "label": "Grooming dog"}, {"segment": [25.75510140405616, 31.14025897035881], "label": "Grooming dog"}, {"segment": [34.65231825273011, 37.34489703588143], "label": "Grooming dog"}, {"segment": [43.783672386895475, 47.41280031201248], "label": "Grooming dog"}]}, "hmPeCPjaxAM": {"duration": 150.257, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hmPeCPjaxAM", "annotations": [{"segment": [0.01, 81.80882676939731], "label": "Shuffleboard"}, {"segment": [111.34439173485308, 150.2563264], "label": "Shuffleboard"}]}, "eChLCFAGyx0": {"duration": 196.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eChLCFAGyx0", "annotations": [{"segment": [13.206903276131044, 28.56376755070203], "label": "Playing flauta"}, {"segment": [47.29914196567863, 55.591848673946956], "label": "Playing flauta"}, {"segment": [114.56220748829952, 120.3978159126365], "label": "Playing flauta"}, {"segment": [170.4611934477379, 178.1396255850234], "label": "Playing flauta"}]}, "AO-0r8H2DOo": {"duration": 51.46, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=AO-0r8H2DOo", "annotations": [{"segment": [14.404172509999999, 42.81902226], "label": "Shaving legs"}]}, "7y31RbZFHGk": {"duration": 28.793, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=7y31RbZFHGk", "annotations": []}, "iRp-pWnPoYw": {"duration": 187.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iRp-pWnPoYw", "annotations": [{"segment": [0.01, 167.19889703588143], "label": "Longboarding"}]}, "2tf414bkudE": {"duration": 192.656, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2tf414bkudE", "annotations": [{"segment": [15.178043943526449, 34.413584782847096], "label": "Sharpening knives"}, {"segment": [46.4357978074225, 134.7990635380517], "label": "Sharpening knives"}]}, "4XGQR2VmWpw": {"duration": 188.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4XGQR2VmWpw", "annotations": [{"segment": [30.212743516543384, 41.11880702983221], "label": "Doing motocross"}, {"segment": [43.77163328982139, 96.23864154294063], "label": "Doing motocross"}, {"segment": [101.54429406291896, 155.779853156031], "label": "Doing motocross"}, {"segment": [161.6750214196568, 174.0548772074883], "label": "Doing motocross"}]}, "vL8s-b4eJiU": {"duration": 192.285, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vL8s-b4eJiU", "annotations": [{"segment": [9.704075150471613, 178.92636532959935], "label": "Hanging wallpaper"}]}, "Np4nVolWFxI": {"duration": 62.04, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Np4nVolWFxI", "annotations": []}, "cgIv0CwYHrA": {"duration": 197.277, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=cgIv0CwYHrA", "annotations": []}, "6hm-Y7CyhMU": {"duration": 24.614, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=6hm-Y7CyhMU", "annotations": []}, "wfLV0MnRVZM": {"duration": 158.17000000000002, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wfLV0MnRVZM", "annotations": []}, "icjyAZ3KVBw": {"duration": 62.996, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=icjyAZ3KVBw", "annotations": [{"segment": [43.34013911232449, 55.5264820826833], "label": "Elliptical trainer"}]}, "jmxzDxfSbZM": {"duration": 194.491, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jmxzDxfSbZM", "annotations": [{"segment": [91.3285367223089, 152.92220102340093], "label": "Decorating the Christmas tree"}]}, "36K1e9WgEpU": {"duration": 110.667, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=36K1e9WgEpU", "annotations": [{"segment": [0.2, 110.42099999999999], "label": "River tubing"}]}, "NSdBL6HqlWk": {"duration": 190.613, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NSdBL6HqlWk", "annotations": [{"segment": [0.2, 179.34778183745325], "label": "Drum corps"}]}, "78UikTQkGgg": {"duration": 180.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=78UikTQkGgg", "annotations": [{"segment": [30.105124009041997, 135.33237857410296], "label": "Making a sandwich"}]}, "eg2s5lOfKGI": {"duration": 86.47, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eg2s5lOfKGI", "annotations": []}, "CZZE6SDPBmU": {"duration": 14.768, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CZZE6SDPBmU", "annotations": []}, "k6AzbT12a9c": {"duration": 8.151, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k6AzbT12a9c", "annotations": [{"segment": [0.01, 7.38088479226826], "label": "Kneeling"}]}, "zW8qDxX6Nmw": {"duration": 76.835, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=zW8qDxX6Nmw", "annotations": []}, "hcRf6HmOAzo": {"duration": 172.64, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hcRf6HmOAzo", "annotations": [{"segment": [0, 129.5180670826833], "label": "Hand washing clothes"}]}, "hIJ6VTEKji8": {"duration": 17.65, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hIJ6VTEKji8", "annotations": [{"segment": [0.8809816661466459, 16.38302512], "label": "Hopscotch"}]}, "RAw8sshR51c": {"duration": 86.43, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RAw8sshR51c", "annotations": [{"segment": [7.280705530262027, 11.595197696343227], "label": "Snatch"}, {"segment": [13.213132258623679, 18.60624746622518], "label": "Snatch"}, {"segment": [23.99936267382668, 30.20144516256841], "label": "Snatch"}, {"segment": [37.75180645321051, 46.38079078537291], "label": "Snatch"}, {"segment": [51.639077992199695, 62.155652622464906], "label": "Snatch"}]}, "j-rX65T0aBo": {"duration": 27.45, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=j-rX65T0aBo", "annotations": []}, "RgLFbpTUT7U": {"duration": 8.17, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=RgLFbpTUT7U", "annotations": []}, "xfNYfCAlkM4": {"duration": 38.615, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xfNYfCAlkM4", "annotations": [{"segment": [6.19944305772231, 12.45907488299532], "label": "Gargling mouthwash"}, {"segment": [18.177007800312012, 26.001547581903278], "label": "Gargling mouthwash"}]}, "r7kQ1sko74M": {"duration": 148.864, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r7kQ1sko74M", "annotations": [{"segment": [0.010000000000000002, 64.06022771771933], "label": "Playing drums"}, {"segment": [87.71128902697825, 130.83969375003272], "label": "Playing drums"}]}, "9Dr3VuYqkx8": {"duration": 222.31, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9Dr3VuYqkx8", "annotations": []}, "FtAQcl1h7O8": {"duration": 115.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FtAQcl1h7O8", "annotations": [{"segment": [36.025273010920436, 39.26754758190327], "label": "Spinning"}, {"segment": [44.31108580343214, 47.91361310452418], "label": "Spinning"}, {"segment": [66.6467550702028, 68.62814508580344], "label": "Spinning"}]}, "lRRBeGRTpdc": {"duration": 200.627, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lRRBeGRTpdc", "annotations": [{"segment": [16.963763281346807, 19.776849618685482], "label": "Hula hoop"}, {"segment": [26.028152590549197, 31.029194968040176], "label": "Hula hoop"}, {"segment": [43.844366060360805, 62.910840124545146], "label": "Hula hoop"}, {"segment": [83.85270508028862, 87.60348686340683], "label": "Hula hoop"}]}, "rqRDdzQ4RjM": {"duration": 156.06, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rqRDdzQ4RjM", "annotations": []}, "AtZEyAZuSVM": {"duration": 72.68, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AtZEyAZuSVM", "annotations": []}, "FrRKm_V0lZU": {"duration": 40.01, "subset": "validation", "resolution": "644x480", "url": "https://www.youtube.com/watch?v=FrRKm_V0lZU", "annotations": [{"segment": [8.80050791700468, 11.546765706708268], "label": "Drinking beer"}]}, "hj7rkE0fPsE": {"duration": 47.0, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hj7rkE0fPsE", "annotations": [{"segment": [30.427195951638062, 42.8913485101404], "label": "Brushing hair"}]}, "mplyiAfIOCc": {"duration": 85.798, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mplyiAfIOCc", "annotations": []}, "r4QPhlrqqP4": {"duration": 141.385, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r4QPhlrqqP4", "annotations": [{"segment": [20.95646645865835, 115.37086271450859], "label": "Spread mulch"}]}, "eMI2x3HFozQ": {"duration": 40.984, "subset": "validation", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=eMI2x3HFozQ", "annotations": [{"segment": [0.01, 27.812324906396256], "label": "Arm wrestling"}]}, "0Mn0gxECBLA": {"duration": 63.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0Mn0gxECBLA", "annotations": [{"segment": [10.9142822, 43.01993293], "label": "Mowing the lawn"}]}, "yAkVtmP7654": {"duration": 216.619, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yAkVtmP7654", "annotations": [{"segment": [0.01, 216.61895691609976], "label": "Removing ice from car"}]}, "O1z0Q-3OUg0": {"duration": 151.12, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=O1z0Q-3OUg0", "annotations": [{"segment": [11.315973416536663, 137.44192712168487], "label": "Preparing salad"}]}, "hQXWnoipdFE": {"duration": 153.82999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hQXWnoipdFE", "annotations": [{"segment": [41.039279678510994, 134.4885944915951], "label": "Rock climbing"}]}, "8AP2he781Cw": {"duration": 51.223, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=8AP2he781Cw", "annotations": [{"segment": [0.2, 51.201], "label": "Playing ice hockey"}]}, "VsFHNZHhNwI": {"duration": 23.62, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VsFHNZHhNwI", "annotations": [{"segment": [0.01, 23.61469387755102], "label": "Bathing dog"}]}, "dNgXkPmvU-c": {"duration": 172.966, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dNgXkPmvU-c", "annotations": [{"segment": [28.32231669266771, 40.19071606864275], "label": "Putting on shoes"}, {"segment": [61.49988767550702, 67.16435101404056], "label": "Putting on shoes"}]}, "b1QkoG9hxk8": {"duration": 77.53, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=b1QkoG9hxk8", "annotations": [{"segment": [25.09568055, 70.79069500780032], "label": "Mixing drinks"}]}, "PlAVnu-ueM4": {"duration": 120.02, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PlAVnu-ueM4", "annotations": [{"segment": [4.306631375975039, 8.42601790951638], "label": "Snatch"}, {"segment": [73.77446791887675, 80.51528224648986], "label": "Snatch"}]}, "EP9Ul7UdzYI": {"duration": 211.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EP9Ul7UdzYI", "annotations": [{"segment": [37.01325567715729, 48.22212817735155], "label": "Skiing"}, {"segment": [107.8928906048563, 109.87092692841999], "label": "Skiing"}, {"segment": [126.35456295811744, 131.95899920821458], "label": "Skiing"}]}, "C4V6fqELvPY": {"duration": 1.579, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=C4V6fqELvPY", "annotations": [{"segment": [0, 1.578955], "label": "Using the monkey bar"}]}, "8YKUwWUU-O0": {"duration": 195.837, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8YKUwWUU-O0", "annotations": [{"segment": [16.340535881435258, 89.9493049921997], "label": "Tug of war"}, {"segment": [101.86109750390015, 172.43317394695785], "label": "Tug of war"}]}, "QFxe74w7aNc": {"duration": 118.771, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QFxe74w7aNc", "annotations": [{"segment": [6.666514820592823, 104.08923868954759], "label": "Drum corps"}]}, "a1ltYmbbBYI": {"duration": 31.662, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=a1ltYmbbBYI", "annotations": [{"segment": [0.01, 31.059223439784066], "label": "Sumo"}]}, "AA1y6Zg5fIc": {"duration": 230.55, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AA1y6Zg5fIc", "annotations": []}, "YLT7YEwUCwI": {"duration": 230.086, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YLT7YEwUCwI", "annotations": [{"segment": [17.89163754145908, 39.719442003356434], "label": "Swimming"}, {"segment": [45.802600623885205, 85.52204808733778], "label": "Swimming"}, {"segment": [135.97648135172346, 174.26459737505164], "label": "Swimming"}, {"segment": [179.27425741548709, 191.7984075165757], "label": "Swimming"}, {"segment": [193.94540467676234, 196.80806755701119], "label": "Swimming"}]}, "gIhVeU8xbrs": {"duration": 170.16, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gIhVeU8xbrs", "annotations": [{"segment": [9.819776911076444, 166.93620748829954], "label": "Snatch"}]}, "fcJg9MY4OEo": {"duration": 34.25, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fcJg9MY4OEo", "annotations": []}, "Z2b84JjYdtw": {"duration": 30.349, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z2b84JjYdtw", "annotations": [{"segment": [0.01, 0.02], "label": "Drum corps"}]}, "p-HvSweux4c": {"duration": 152.718, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=p-HvSweux4c", "annotations": []}, "LTBrHLqhRMs": {"duration": 66.22, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=LTBrHLqhRMs", "annotations": [{"segment": [54.65230961154446, 61.98749672386895], "label": "Volleyball"}]}, "ijHyZJCmZJM": {"duration": 225.722, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ijHyZJCmZJM", "annotations": []}, "4KgIfmPLuAY": {"duration": 72.633, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=4KgIfmPLuAY", "annotations": [{"segment": [0.01, 71.43255108197395], "label": "Kneeling"}]}, "We9NR-CaIgM": {"duration": 180.512, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=We9NR-CaIgM", "annotations": [{"segment": [0.01, 180.511903], "label": "Capoeira"}]}, "KUorCsuIe2A": {"duration": 196.46, "subset": "training", "resolution": "448x336", "url": "https://www.youtube.com/watch?v=KUorCsuIe2A", "annotations": [{"segment": [75.24480640722227, 99.7644982722309], "label": "Breakdancing"}]}, "kd_XmN-fncQ": {"duration": 202.896, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kd_XmN-fncQ", "annotations": []}, "rduaWxRvcw0": {"duration": 236.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rduaWxRvcw0", "annotations": [{"segment": [63.36548123149415, 95.78502976853767], "label": "Removing curlers"}, {"segment": [97.25864561113056, 113.46841987965232], "label": "Removing curlers"}, {"segment": [120.46809513196854, 137.04627336113853], "label": "Removing curlers"}, {"segment": [141.46712088891718, 147.72998821993696], "label": "Removing curlers"}, {"segment": [154.36125951160494, 183.46517240281446], "label": "Removing curlers"}, {"segment": [186.04400012735204, 187.88601993059314], "label": "Removing curlers"}]}, "RAmQyeaBu-k": {"duration": 172.9, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=RAmQyeaBu-k", "annotations": [{"segment": [46.38391887675506, 147.51164898595943], "label": "Rock climbing"}]}, "N_um3L3w1uQ": {"duration": 39.591, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N_um3L3w1uQ", "annotations": [{"segment": [0.20000000000000004, 38.678735202492206], "label": "Powerbocking"}]}, "SsjGQDAAuT8": {"duration": 98.11, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SsjGQDAAuT8", "annotations": []}, "O9qI26TB3R0": {"duration": 157.9, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=O9qI26TB3R0", "annotations": [{"segment": [16.25759070046802, 154.4471117620215], "label": "Bathing dog"}]}, "9XyrLUWZl40": {"duration": 20.132, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9XyrLUWZl40", "annotations": [{"segment": [0.4201943883466275, 1.235496840060023], "label": "Playing rubik cube"}, {"segment": [5.50015581825317, 8.134209893019525], "label": "Playing rubik cube"}, {"segment": [13.841327055013295, 17.133894648471237], "label": "Playing rubik cube"}]}, "Lml6HGhM4yg": {"duration": 44.4, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lml6HGhM4yg", "annotations": [{"segment": [7.853633385335414, 9.375923556942277], "label": "Tennis serve with ball bouncing"}]}, "aJbl1eVBzk0": {"duration": 187.827, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aJbl1eVBzk0", "annotations": [{"segment": [11.574314226990849, 165.4101362312996], "label": "Assembling bicycle"}]}, "wzp6sPsgTDY": {"duration": 106.627, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wzp6sPsgTDY", "annotations": []}, "q-RNYziOHjE": {"duration": 142.989, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q-RNYziOHjE", "annotations": [{"segment": [33.68367113460049, 37.698943190380675], "label": "Blow-drying hair"}, {"segment": [43.05263926475426, 55.09845543209484], "label": "Blow-drying hair"}, {"segment": [74.50560370169909, 80.75158245513495], "label": "Blow-drying hair"}, {"segment": [83.6515011620873, 85.88220785974296], "label": "Blow-drying hair"}, {"segment": [89.67440924575759, 97.4818817878315], "label": "Blow-drying hair"}, {"segment": [110.41998051482058, 120.01201922620903], "label": "Blow-drying hair"}]}, "bG7hnpAeja0": {"duration": 200.265, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bG7hnpAeja0", "annotations": [{"segment": [24.681152886115445, 153.085631825273], "label": "Sumo"}]}, "d6lajCxMhPE": {"duration": 38.081, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=d6lajCxMhPE", "annotations": [{"segment": [7.125304212168487, 36.101541341653665], "label": "Mooping floor"}]}, "NH0YKVu9F6U": {"duration": 177.796, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=NH0YKVu9F6U", "annotations": []}, "6gk4PrePHmQ": {"duration": 66.34, "subset": "testing", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=6gk4PrePHmQ", "annotations": []}, "c9mrRiwmhZw": {"duration": 183.72, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=c9mrRiwmhZw", "annotations": [{"segment": [38.978809784711395, 169.6724661216849], "label": "Wrapping presents"}]}, "L149Uf5V7K0": {"duration": 137.253, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L149Uf5V7K0", "annotations": [{"segment": [0.01, 126.22962558502338], "label": "Rope skipping"}]}, "6tveUnZe7WA": {"duration": 50.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6tveUnZe7WA", "annotations": [{"segment": [0.3158780392032008, 48.17139928237129], "label": "Playing bagpipes"}]}, "3W92sMbvoaE": {"duration": 29.699, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3W92sMbvoaE", "annotations": [{"segment": [0.01, 29.561], "label": "Snow tubing"}]}, "TmI9MxCDBMw": {"duration": 191.937, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TmI9MxCDBMw", "annotations": [{"segment": [12.27672691125332, 20.960265458237377], "label": "River tubing"}, {"segment": [27.547777459397697, 172.77247373634947], "label": "River tubing"}]}, "X5kj4fZ38rc": {"duration": 217.32, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=X5kj4fZ38rc", "annotations": []}, "hJiaSHwOkcs": {"duration": 82.664, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hJiaSHwOkcs", "annotations": [{"segment": [0.2, 80.08384707800313], "label": "Bullfighting"}]}, "KlheP4IiS8w": {"duration": 211.395, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KlheP4IiS8w", "annotations": [{"segment": [0.01, 202.2703142040439], "label": "Disc dog"}]}, "26KzUnEbTUs": {"duration": 143.709, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=26KzUnEbTUs", "annotations": []}, "13hdhdieZuc": {"duration": 53.69, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=13hdhdieZuc", "annotations": [{"segment": [14.740215263650546, 44.220645790951636], "label": "Putting on makeup"}]}, "KvtqF0LaWU0": {"duration": 184.14, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KvtqF0LaWU0", "annotations": []}, "8K_gkiWH5EI": {"duration": 224.746, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8K_gkiWH5EI", "annotations": [{"segment": [2.804941541879362, 7.713589240168246], "label": "Surfing"}, {"segment": [10.869148474782529, 43.476593899130116], "label": "Surfing"}, {"segment": [45.229682362804716, 62.409949306815804], "label": "Surfing"}, {"segment": [66.96797931236976, 90.1087470328745], "label": "Surfing"}, {"segment": [91.51121780381419, 104.48407243500624], "label": "Surfing"}, {"segment": [107.63963166962053, 120.26186860807765], "label": "Surfing"}, {"segment": [123.41742784269194, 137.44213555208873], "label": "Surfing"}, {"segment": [145.155724792257, 157.7779617307141], "label": "Surfing"}, {"segment": [160.58290324180967, 196.69652558658345], "label": "Surfing"}, {"segment": [199.8520848205928, 224.545941], "label": "Surfing"}]}, "W2Wjbhsuacw": {"duration": 20.62, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=W2Wjbhsuacw", "annotations": [{"segment": [4.429767146645866, 17.89068918], "label": "Starting a campfire"}]}, "NRhoHN8x_00": {"duration": 84.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NRhoHN8x_00", "annotations": [{"segment": [5.644948365054602, 76.40371973166927], "label": "Playing drums"}]}, "xor90CAOc94": {"duration": 73.672, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xor90CAOc94", "annotations": [{"segment": [0.01, 68.62333385335413], "label": "Roof shingle removal"}]}, "bp1wsvCaiB0": {"duration": 109.274, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=bp1wsvCaiB0", "annotations": [{"segment": [6.645271450858035, 11.75701872074883], "label": "Doing fencing"}, {"segment": [23.17325429017161, 31.522441497659905], "label": "Doing fencing"}, {"segment": [43.44985179407176, 65.77114820592823], "label": "Doing fencing"}, {"segment": [71.5775663026521, 74.81559906396257], "label": "Doing fencing"}, {"segment": [82.99589235569422, 88.2789984399376], "label": "Doing fencing"}, {"segment": [94.92548673946958, 102.7649344773791], "label": "Doing fencing"}]}, "sWQ65uwxXbA": {"duration": 126.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sWQ65uwxXbA", "annotations": [{"segment": [12.667590110407138, 44.33656538642498], "label": "Snatch"}, {"segment": [47.50346291402677, 60.96277740633435], "label": "Snatch"}, {"segment": [64.92139931583657, 81.34968024027083], "label": "Snatch"}, {"segment": [84.51657763494539, 94.61106348829952], "label": "Snatch"}, {"segment": [98.56968539157565, 113.21658643369733], "label": "Snatch"}]}, "Xq9ueKle4fY": {"duration": 129.289, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Xq9ueKle4fY", "annotations": [{"segment": [0.01, 129.221], "label": "Grooming dog"}]}, "etWdTMJ9_To": {"duration": 40.71, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=etWdTMJ9_To", "annotations": [{"segment": [0.01, 40.704579], "label": "Cricket"}]}, "7qTVbrGZXVY": {"duration": 40.17, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7qTVbrGZXVY", "annotations": []}, "tZzse87ICr8": {"duration": 21.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tZzse87ICr8", "annotations": [{"segment": [1.8249942379095163, 20.307208761819858], "label": "Hopscotch"}]}, "7QvvqWJRwNo": {"duration": 22.431, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7QvvqWJRwNo", "annotations": [{"segment": [0.01, 14.524986945781642], "label": "Snow tubing"}]}, "4x0LdQRN248": {"duration": 22.547, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4x0LdQRN248", "annotations": [{"segment": [0.20000000000000004, 22.301000000000002], "label": "Welding"}]}, "h9YbUJuYzLk": {"duration": 121.67, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=h9YbUJuYzLk", "annotations": [{"segment": [10.42485696, 85.42834463999999], "label": "Getting a haircut"}]}, "nO2tXrQDD8I": {"duration": 55.334, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=nO2tXrQDD8I", "annotations": [{"segment": [0.01, 54.24717242123316], "label": "Table soccer"}]}, "PJsx3P4ubt8": {"duration": 22.57, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=PJsx3P4ubt8", "annotations": []}, "k--dW53UQWs": {"duration": 12.562, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k--dW53UQWs", "annotations": []}, "wvmuUuLOoEQ": {"duration": 237.308, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wvmuUuLOoEQ", "annotations": [{"segment": [62.740817395517055, 181.57931632809994], "label": "Skiing"}, {"segment": [191.9130988786443, 194.12748085376091], "label": "Skiing"}]}, "5KEq4f4dGto": {"duration": 189.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5KEq4f4dGto", "annotations": [{"segment": [102.04085179407176, 165.44773946957878], "label": "Doing nails"}]}, "xcSWHI3K0KA": {"duration": 44.47, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xcSWHI3K0KA", "annotations": [{"segment": [0.01, 43.009820592823715], "label": "Javelin throw"}]}, "8btD4-N4stM": {"duration": 70.566, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8btD4-N4stM", "annotations": [{"segment": [5.15796833563054, 16.68109247162857], "label": "Bullfighting"}]}, "JgJia7VviHs": {"duration": 74.21000000000001, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JgJia7VviHs", "annotations": []}, "g1UBXl7GHCc": {"duration": 101.263, "subset": "testing", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=g1UBXl7GHCc", "annotations": []}, "qb_5_9xDYqc": {"duration": 139.78, "subset": "training", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=qb_5_9xDYqc", "annotations": [{"segment": [58.930138719999995, 83.19445311145779], "label": "Rock climbing"}]}, "g-21fg072ZM": {"duration": 14.91, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g-21fg072ZM", "annotations": []}, "J1fcLhB-Slg": {"duration": 31.67, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=J1fcLhB-Slg", "annotations": [{"segment": [6.176290366614665, 22.679338226209047], "label": "Drinking beer"}]}, "rnI9vGr5V1k": {"duration": 84.057, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rnI9vGr5V1k", "annotations": []}, "s07U8_fkUWU": {"duration": 141.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s07U8_fkUWU", "annotations": [{"segment": [10.610034066668788, 19.89381387500398], "label": "Volleyball"}, {"segment": [22.546322391671175, 32.935314081951034], "label": "Volleyball"}, {"segment": [35.366780222229295, 44.42951765417555], "label": "Volleyball"}, {"segment": [46.639941418064886, 53.934339838899675], "label": "Volleyball"}, {"segment": [56.36580597917794, 66.97584004584672], "label": "Volleyball"}, {"segment": [69.407306186125, 76.03857747779298], "label": "Volleyball"}, {"segment": [78.24900124168231, 85.32235728612817], "label": "Volleyball"}, {"segment": [87.53278105001752, 96.37447610557483], "label": "Volleyball"}, {"segment": [99.02698462224203, 116.48933235696774], "label": "Volleyball"}, {"segment": [118.47871352262091, 135.27793409672387], "label": "Volleyball"}]}, "1VSqWp5DZiU": {"duration": 209.49, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1VSqWp5DZiU", "annotations": [{"segment": [9.477697475245948, 202.626635677672], "label": "Zumba"}]}, "8UyB9XigBXM": {"duration": 238.98, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=8UyB9XigBXM", "annotations": [{"segment": [39.51918607900779, 143.90948831201248], "label": "Braiding hair"}]}, "ijSmiDjlmlU": {"duration": 212.881, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ijSmiDjlmlU", "annotations": [{"segment": [0.01, 212.88034013605443], "label": "Swimming"}]}, "Chzp7HI7-uA": {"duration": 42.098, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Chzp7HI7-uA", "annotations": []}, "HQitzJ_OtqY": {"duration": 38.29, "subset": "testing", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=HQitzJ_OtqY", "annotations": []}, "OhPSVjVQZ8o": {"duration": 9.962, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OhPSVjVQZ8o", "annotations": [{"segment": [0.01, 9.941], "label": "Using the monkey bar"}]}, "78je-JBAqxQ": {"duration": 118.515, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=78je-JBAqxQ", "annotations": [{"segment": [10.690227436244017, 15.11377098741254], "label": "Beer pong"}, {"segment": [20.827514741005217, 25.066743977541716], "label": "Beer pong"}, {"segment": [29.305973214078218, 31.33343067503046], "label": "Beer pong"}, {"segment": [32.99225950671866, 34.466774023774825], "label": "Beer pong"}, {"segment": [87.54929567125708, 92.7100964239787], "label": "Beer pong"}]}, "YuuWL4EK7Q4": {"duration": 174.52, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YuuWL4EK7Q4", "annotations": [{"segment": [126.83124180967239, 155.69911388455537], "label": "Polishing shoes"}]}, "c1KmEWYsYjQ": {"duration": 164.235, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=c1KmEWYsYjQ", "annotations": []}, "hSZKCOmb7Bk": {"duration": 24.24, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=hSZKCOmb7Bk", "annotations": []}, "2GEZgHcA7zU": {"duration": 99.66, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2GEZgHcA7zU", "annotations": [{"segment": [24.82143395, 75.91943776000001], "label": "Springboard diving"}]}, "Ww-zqYBIiJ0": {"duration": 125.992, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ww-zqYBIiJ0", "annotations": []}, "qxAR5ZVYe00": {"duration": 141.99, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=qxAR5ZVYe00", "annotations": []}, "ppL_HTF9U4M": {"duration": 65.92, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ppL_HTF9U4M", "annotations": []}, "ZSoPIyVJWWk": {"duration": 131.87, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ZSoPIyVJWWk", "annotations": [{"segment": [0.01, 131.821], "label": "Removing curlers"}]}, "n--IF_1qqYA": {"duration": 10.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n--IF_1qqYA", "annotations": [{"segment": [0, 9.28651120124805], "label": "Mowing the lawn"}]}, "SFkpNuLyXdw": {"duration": 56.33, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SFkpNuLyXdw", "annotations": []}, "jTanjLR8QAU": {"duration": 123.02, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=jTanjLR8QAU", "annotations": []}, "Zd22n1caVgM": {"duration": 32.49, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Zd22n1caVgM", "annotations": [{"segment": [0.8615291029641187, 32.02861135725429], "label": "Doing karate"}]}, "BIL1t2eQrDU": {"duration": 151.35, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BIL1t2eQrDU", "annotations": [{"segment": [8.970336690000002, 131.35483619344777], "label": "Volleyball"}]}, "wZZUcxhISyE": {"duration": 61.696, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wZZUcxhISyE", "annotations": [{"segment": [0.01, 44.30086837080483], "label": "Having an ice cream"}]}, "buyWwRYGhig": {"duration": 80.109, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=buyWwRYGhig", "annotations": []}, "j0Iv6dmSw0k": {"duration": 79.34, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=j0Iv6dmSw0k", "annotations": [{"segment": [3.2182391382215294, 74.63839232106085], "label": "Doing karate"}]}, "ICMcCoyuBAw": {"duration": 197.417, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ICMcCoyuBAw", "annotations": [{"segment": [27.632032640245225, 143.37956563869002], "label": "Hanging wallpaper"}]}, "b2uAgV5n3FU": {"duration": 143.48, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b2uAgV5n3FU", "annotations": [{"segment": [57.52473819968799, 135.41815801872076], "label": "Chopping wood"}]}, "UljyGBNeimQ": {"duration": 139.367, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UljyGBNeimQ", "annotations": []}, "jkmbAc9BWaE": {"duration": 205.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jkmbAc9BWaE", "annotations": [{"segment": [163.06060269578785, 191.30732127301093], "label": "Skateboarding"}, {"segment": [31.135587522620906, 88.27099555382216], "label": "Skateboarding"}]}, "zq621OgpFFk": {"duration": 5.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zq621OgpFFk", "annotations": [{"segment": [0.5339502703588143, 4.9569711666146645], "label": "Tennis serve with ball bouncing"}]}, "Z-syOvXCc20": {"duration": 29.861, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z-syOvXCc20", "annotations": [{"segment": [0.01, 29.86086], "label": "Swinging at the playground"}]}, "-4Q_zG9EChY": {"duration": 166.116, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=-4Q_zG9EChY", "annotations": [{"segment": [17.489028861154445, 125.79145943837752], "label": "Slacklining"}]}, "ijgLl3PHHE0": {"duration": 151.418, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ijgLl3PHHE0", "annotations": [{"segment": [34.7517315123965, 50.7837377429261], "label": "Scuba diving"}, {"segment": [55.734798490589654, 96.75787325694479], "label": "Scuba diving"}, {"segment": [98.87975643451487, 125.30196827256866], "label": "Scuba diving"}]}, "Kyaxs5nDQZ4": {"duration": 166.139, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Kyaxs5nDQZ4", "annotations": []}, "zPDbMflNURc": {"duration": 76.069, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zPDbMflNURc", "annotations": [{"segment": [41.09403510140405, 67.659875975039], "label": "Waxing skis"}]}, "WVZ4CxvFYcM": {"duration": 50.32, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WVZ4CxvFYcM", "annotations": []}, "cdO-xmdjM7Q": {"duration": 180.094, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cdO-xmdjM7Q", "annotations": [{"segment": [53.36628705148206, 178.65646801872074], "label": "Rafting"}]}, "fqWjjWqsJPU": {"duration": 179.77, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fqWjjWqsJPU", "annotations": [{"segment": [42.628503625585026, 45.99391180655226], "label": "Skateboarding"}, {"segment": [70.95402248205929, 73.4780786177847], "label": "Skateboarding"}, {"segment": [86.37880997815913, 89.18331679563182], "label": "Skateboarding"}, {"segment": [104.3276536099844, 114.42387815288612], "label": "Skateboarding"}, {"segment": [124.52010269578784, 129.28776428549142], "label": "Skateboarding"}]}, "bZ4r3Y_qceE": {"duration": 36.85, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=bZ4r3Y_qceE", "annotations": [{"segment": [0.009999999999999998, 33.74779641185647], "label": "Triple jump"}]}, "vSpl69BWMKI": {"duration": 82.777, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vSpl69BWMKI", "annotations": [{"segment": [9.042379188767551, 13.82192247425897], "label": "Trimming branches or hedges"}, {"segment": [48.18296339157566, 60.58394056474258], "label": "Trimming branches or hedges"}]}, "76tyabobb_0": {"duration": 60.512, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=76tyabobb_0", "annotations": [{"segment": [0.01, 57.33269734005319], "label": "Having an ice cream"}]}, "flfLCoLHm2k": {"duration": 212.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=flfLCoLHm2k", "annotations": [{"segment": [45.308523539999996, 165.03863182839314], "label": "Making a sandwich"}]}, "2glmVDTPY44": {"duration": 226.186, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2glmVDTPY44", "annotations": [{"segment": [25.711577881619938, 226.141], "label": "Snowboarding"}]}, "tfPm9xAZ5z0": {"duration": 88.886, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tfPm9xAZ5z0", "annotations": [{"segment": [19.126455538221528, 79.7115054602184], "label": "Clipping cat claws"}]}, "701UoJ5pgeo": {"duration": 142.757, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=701UoJ5pgeo", "annotations": [{"segment": [12.026270177337706, 15.812318196129205], "label": "Cleaning sink"}, {"segment": [57.458846402835704, 95.09661788376296], "label": "Cleaning sink"}, {"segment": [104.45038357724783, 142.75628], "label": "Cleaning sink"}]}, "gNUCdXGxp0w": {"duration": 143.71, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=gNUCdXGxp0w", "annotations": []}, "-X7fBYN5fBc": {"duration": 25.542, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=-X7fBYN5fBc", "annotations": [{"segment": [5.658158361934477, 19.44493859594384], "label": "Calf roping"}]}, "OW_iCiS_U4Y": {"duration": 166.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OW_iCiS_U4Y", "annotations": [{"segment": [5.702907800665768, 15.294161829058195], "label": "Plataform diving"}, {"segment": [31.625215985510167, 35.254339131388384], "label": "Plataform diving"}, {"segment": [46.14170856902303, 54.955293351870125], "label": "Plataform diving"}, {"segment": [90.98730172880384, 93.32030946543983], "label": "Plataform diving"}, {"segment": [105.76301739416515, 114.3173790951638], "label": "Plataform diving"}, {"segment": [141.79502537441496, 146.72026391575662], "label": "Plataform diving"}]}, "wh4JNeQh9FQ": {"duration": 158.06, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wh4JNeQh9FQ", "annotations": []}, "zD_wAe6Eoxc": {"duration": 226.581, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zD_wAe6Eoxc", "annotations": [{"segment": [0.01, 226.579885], "label": "Longboarding"}]}, "K6oVOQG0lOo": {"duration": 41.75, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K6oVOQG0lOo", "annotations": [{"segment": [0.2, 39.92851946417446], "label": "Changing car wheel"}]}, "swkSdgwCxHs": {"duration": 96.2, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=swkSdgwCxHs", "annotations": [{"segment": [11.326295631825273, 16.12684477379095], "label": "High jump"}, {"segment": [50.48077457098284, 53.63113494539782], "label": "High jump"}]}, "9Me0tmjDgRQ": {"duration": 222.051, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=9Me0tmjDgRQ", "annotations": []}, "XPctbL-V1ww": {"duration": 120.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XPctbL-V1ww", "annotations": [{"segment": [2.0708372277691107, 106.7422461950078], "label": "Brushing teeth"}]}, "NYwrSPaSQPI": {"duration": 59.35, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=NYwrSPaSQPI", "annotations": []}, "ISEbX4WvBW4": {"duration": 75.14, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ISEbX4WvBW4", "annotations": [{"segment": [0.2, 73.03601666782033], "label": "Swinging at the playground"}]}, "ruHObln9mwI": {"duration": 131.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ruHObln9mwI", "annotations": [{"segment": [1.6457550383647999, 8.0230558120284], "label": "Playing field hockey"}]}, "dXSlCw8AXv0": {"duration": 206.17000000000002, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dXSlCw8AXv0", "annotations": [{"segment": [60.95040671286715, 80.24868852169054], "label": "Windsurfing"}, {"segment": [110.48266335551381, 118.20197607904318], "label": "Windsurfing"}]}, "9g-5J05BIiQ": {"duration": 126.34, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9g-5J05BIiQ", "annotations": [{"segment": [8.078044700468018, 114.42902247], "label": "Skateboarding"}]}, "ZBG-KhSyVgQ": {"duration": 111.11, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ZBG-KhSyVgQ", "annotations": [{"segment": [6.933384274146476, 68.12525705], "label": "Horseback riding"}]}, "73gFk8M4qbk": {"duration": 82.176, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=73gFk8M4qbk", "annotations": []}, "Rho3u46ZIEE": {"duration": 40.59, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rho3u46ZIEE", "annotations": [{"segment": [0.822204368174727, 38.725946957878314], "label": "Playing accordion"}]}, "fpIS8jWoK9w": {"duration": 221.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fpIS8jWoK9w", "annotations": [{"segment": [45.184266789999995, 191.94836765], "label": "Getting a tattoo"}]}, "mTlReluteEQ": {"duration": 106.97999999999999, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=mTlReluteEQ", "annotations": [{"segment": [10.00099926, 28.670101740000003], "label": "Hammer throw"}, {"segment": [42.248311560000005, 79.98018084], "label": "Hammer throw"}]}, "GtCXZRGSaqk": {"duration": 149.398, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GtCXZRGSaqk", "annotations": [{"segment": [22.141548954475187, 135.87918970982838], "label": "Waterskiing"}]}, "jWzLhOqcGtE": {"duration": 22.99, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=jWzLhOqcGtE", "annotations": [{"segment": [0.07157566302652106, 22.367394695787834], "label": "Layup drill in basketball"}]}, "Kud8SsSxj4w": {"duration": 143.035, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Kud8SsSxj4w", "annotations": []}, "2ISOAmuzs24": {"duration": 210.304, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2ISOAmuzs24", "annotations": [{"segment": [39.33996879875195, 210.161], "label": "Beach soccer"}]}, "cSCskKqO7CE": {"duration": 172.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cSCskKqO7CE", "annotations": [{"segment": [55.36253006038609, 58.58753181147654], "label": "Putting on makeup"}, {"segment": [62.88753414626381, 67.72503677289949], "label": "Putting on makeup"}, {"segment": [72.56253939953515, 77.40004202617085], "label": "Putting on makeup"}, {"segment": [82.77504494465492, 87.0750472794422], "label": "Putting on makeup"}, {"segment": [91.37504961422947, 92.98755048977469], "label": "Putting on makeup"}, {"segment": [98.63130355418299, 103.20005603489446], "label": "Putting on makeup"}, {"segment": [118.51881398283932, 119.32506441809673], "label": "Putting on makeup"}, {"segment": [123.62506673946959, 124.7000673198128], "label": "Putting on makeup"}]}, "airowj9k5ro": {"duration": 60.581, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=airowj9k5ro", "annotations": [{"segment": [0.01, 35.317816966368774], "label": "Fun sliding down"}]}, "pKFBr0pMn7I": {"duration": 60.58, "subset": "validation", "resolution": "1280x686", "url": "https://www.youtube.com/watch?v=pKFBr0pMn7I", "annotations": [{"segment": [9.734521965678628, 16.4447264274571], "label": "Pole vault"}, {"segment": [24.19454003120125, 39.59965731669267], "label": "Pole vault"}]}, "SjwFCog0amE": {"duration": 54.567, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SjwFCog0amE", "annotations": [{"segment": [9.662000377535103, 22.68654714196568], "label": "Putting on shoes"}]}, "wZVuXEYbZxo": {"duration": 162.68, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=wZVuXEYbZxo", "annotations": []}, "RKDjetk5Kko": {"duration": 212.77, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RKDjetk5Kko", "annotations": [{"segment": [28.848670759999997, 85.33538512], "label": "Preparing pasta"}, {"segment": [99.4565318, 200.2079079024961], "label": "Preparing pasta"}]}, "mhw52v2-f4U": {"duration": 226.581, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mhw52v2-f4U", "annotations": []}, "m5YvKrjGtPM": {"duration": 25.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m5YvKrjGtPM", "annotations": [{"segment": [0.01, 23.597704550358884], "label": "Snatch"}]}, "hToIzZ3o6Ic": {"duration": 129.196, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hToIzZ3o6Ic", "annotations": [{"segment": [0.01, 129.149386], "label": "Painting furniture"}]}, "fKbrGn6c3us": {"duration": 78.03999999999999, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fKbrGn6c3us", "annotations": [{"segment": [53.157527879999996, 67.29259438377535], "label": "Ironing clothes"}]}, "FLbTQGyoEMk": {"duration": 41.076, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=FLbTQGyoEMk", "annotations": [{"segment": [13.8623841742179, 28.874643342501532], "label": "Putting in contact lenses"}]}, "U8SmnfsEzOc": {"duration": 189.22, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=U8SmnfsEzOc", "annotations": [{"segment": [23.603369734789393, 177.6153572542902], "label": "Assembling bicycle"}]}, "9o_SlEkq-L0": {"duration": 85.53999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9o_SlEkq-L0", "annotations": [{"segment": [68.8894968798752, 79.95983229329174], "label": "Mowing the lawn"}]}, "faqijFZWQlo": {"duration": 158.52, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=faqijFZWQlo", "annotations": []}, "e1bHYT-G__8": {"duration": 168.624, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=e1bHYT-G__8", "annotations": [{"segment": [12.36395569564279, 15.257647454197487], "label": "Playing ten pins"}, {"segment": [18.15133921275218, 19.466653648458863], "label": "Playing ten pins"}, {"segment": [30.515294908394974, 39.19637018405906], "label": "Playing ten pins"}, {"segment": [47.08825679829914, 49.98194855685384], "label": "Playing ten pins"}]}, "ywSeEtroEXo": {"duration": 207.355, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ywSeEtroEXo", "annotations": [{"segment": [0.01, 6.954921926128745], "label": "Longboarding"}, {"segment": [14.071586222632579, 18.276887852384842], "label": "Longboarding"}, {"segment": [22.158704741386934, 27.33446035335413], "label": "Longboarding"}, {"segment": [34.12763984945398, 37.36248722854914], "label": "Longboarding"}, {"segment": [49.00793779329173, 55.47763255148205], "label": "Longboarding"}, {"segment": [66.1526289024961, 70.3579304953198], "label": "Longboarding"}, {"segment": [96.23670952808112, 104.0003432379095], "label": "Longboarding"}, {"segment": [107.88216009282371, 112.08746168564741], "label": "Longboarding"}, {"segment": [167.0798671302652, 178.40183295709826], "label": "Longboarding"}, {"segment": [181.63668033619342, 192.95864616302651], "label": "Longboarding"}]}, "0-CYG7bH54A": {"duration": 125.83, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0-CYG7bH54A", "annotations": [{"segment": [45.73800686739469, 113.65796556318253], "label": "Preparing pasta"}]}, "J_CqwWJZTh4": {"duration": 202.547, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=J_CqwWJZTh4", "annotations": [{"segment": [40.46405162066166, 51.50491610664296], "label": "Throwing darts"}, {"segment": [99.1383600318766, 107.6555983496336], "label": "Throwing darts"}, {"segment": [140.14728526552145, 145.19453760197004], "label": "Throwing darts"}, {"segment": [176.1089581627177, 185.2571030225308], "label": "Throwing darts"}]}, "ux4iT6vQQb0": {"duration": 73.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ux4iT6vQQb0", "annotations": [{"segment": [0.4564298159126365, 70.6325140124805], "label": "Doing motocross"}]}, "EnPDoWJj3Ug": {"duration": 90.65, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=EnPDoWJj3Ug", "annotations": []}, "KlIAmoVrptA": {"duration": 113.03999999999999, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=KlIAmoVrptA", "annotations": [{"segment": [31.60656708, 100.13104524180967], "label": "Shaving legs"}]}, "EJxWhTE_ZeE": {"duration": 205.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EJxWhTE_ZeE", "annotations": [{"segment": [19.912220488819553, 19.912220488819553], "label": "Cleaning windows"}, {"segment": [46.89006760270411, 72.90442017680708], "label": "Cleaning windows"}, {"segment": [82.86053042121685, 98.59760790431618], "label": "Cleaning windows"}, {"segment": [101.48809152366094, 134.8892355694228], "label": "Cleaning windows"}, {"segment": [147.73582943317734, 160.90358814352575], "label": "Cleaning windows"}, {"segment": [168.29037907020282, 178.88881897347895], "label": "Cleaning windows"}, {"segment": [188.20259949453978, 194.62589640561623], "label": "Cleaning windows"}]}, "kGvs0Nv5zJo": {"duration": 123.322, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kGvs0Nv5zJo", "annotations": [{"segment": [46.10129060970182, 65.31016169707758], "label": "Surfing"}]}, "iB20nDf5yJs": {"duration": 178.45, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iB20nDf5yJs", "annotations": [{"segment": [11.970594444617783, 167.3099363073323], "label": "Springboard diving"}]}, "BFrsZmuj4Ns": {"duration": 205.218, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BFrsZmuj4Ns", "annotations": [{"segment": [31.695129238689553, 200.41566569110765], "label": "Capoeira"}]}, "AeefhelpxGA": {"duration": 69.893, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AeefhelpxGA", "annotations": [{"segment": [0.2, 69.892062], "label": "Playing congas"}]}, "BWanQWn1OYQ": {"duration": 138.925, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BWanQWn1OYQ", "annotations": [{"segment": [32.72647899687987, 45.296914638065516], "label": "River tubing"}, {"segment": [60.03466676911076, 87.99304948829953], "label": "River tubing"}]}, "9khzc3a4zz0": {"duration": 43.933, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=9khzc3a4zz0", "annotations": [{"segment": [0.44436271450858034, 37.15555928237129], "label": "Rope skipping"}]}, "z4_gDZk7z3s": {"duration": 20.78, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z4_gDZk7z3s", "annotations": []}, "vE0kyySqMmc": {"duration": 57.26, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vE0kyySqMmc", "annotations": []}, "6x3qs7jQQko": {"duration": 205.22, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6x3qs7jQQko", "annotations": [{"segment": [131.26265720016556, 171.60191267394697], "label": "Removing curlers"}]}, "gK71Ut9XQ-Y": {"duration": 69.59, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gK71Ut9XQ-Y", "annotations": [{"segment": [12.6403276, 53.5028797], "label": "Playing guitarra"}]}, "rKwiUoQDEJ8": {"duration": 202.734, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=rKwiUoQDEJ8", "annotations": []}, "BgAiDS4fF_I": {"duration": 132.562, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BgAiDS4fF_I", "annotations": [{"segment": [65.48271891970055, 86.74114882624261], "label": "Doing crunches"}]}, "6z6iFou8nW0": {"duration": 118.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6z6iFou8nW0", "annotations": [{"segment": [49.31835725429017, 63.55198751950078], "label": "Washing hands"}, {"segment": [71.1264976599064, 104.7499329173167], "label": "Washing hands"}]}, "LlI1IJEkW5E": {"duration": 74.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LlI1IJEkW5E", "annotations": [{"segment": [15.030473820313357, 31.873961992493307], "label": "Making a sandwich"}]}, "_dFzOHyZSNk": {"duration": 201.178, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_dFzOHyZSNk", "annotations": [{"segment": [18.203279102592678, 67.47767253547285], "label": "Longboarding"}, {"segment": [83.4840041601664, 86.62250055716515], "label": "Longboarding"}, {"segment": [94.468741549662, 95.72414010846148], "label": "Longboarding"}, {"segment": [131.1891493945472, 133.38609687244633], "label": "Longboarding"}, {"segment": [0.2, 200.97761904761907], "label": "Longboarding"}, {"segment": [147.50932989079564, 169.1649549173167], "label": "Longboarding"}]}, "eH2ORakkiy4": {"duration": 193.841, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=eH2ORakkiy4", "annotations": []}, "YqB7UEdhKug": {"duration": 75.98, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=YqB7UEdhKug", "annotations": [{"segment": [7.3486626833073325, 70.04934912636506], "label": "Playing saxophone"}]}, "vaV6_bxiTLI": {"duration": 33.09, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vaV6_bxiTLI", "annotations": []}, "YMsyF-g2TqE": {"duration": 193.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YMsyF-g2TqE", "annotations": [{"segment": [8.622072542901716, 171.70283541341655], "label": "Carving jack-o-lanterns"}]}, "g3InHLh678M": {"duration": 112.594, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g3InHLh678M", "annotations": [{"segment": [0.01, 29.702170568686036], "label": "Doing fencing"}, {"segment": [44.60585696536309, 108.09178905262456], "label": "Doing fencing"}]}, "N9VOdVOiPNM": {"duration": 135.19, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N9VOdVOiPNM", "annotations": []}, "ejMpWeFZLec": {"duration": 119.583, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ejMpWeFZLec", "annotations": [{"segment": [3.5880596023630873, 112.12873490681159], "label": "Hula hoop"}]}, "soGmSXFO7wI": {"duration": 119.21000000000001, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=soGmSXFO7wI", "annotations": [{"segment": [24.54792912, 68.9754846], "label": "Shot put"}]}, "VpZ3PaLi2RQ": {"duration": 79.157, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=VpZ3PaLi2RQ", "annotations": [{"segment": [0.19999999999999998, 27.66166704524181], "label": "Cutting the grass"}, {"segment": [35.318021316692665, 63.35015711700468], "label": "Cutting the grass"}, {"segment": [70.14208429329173, 78.956824], "label": "Cutting the grass"}]}, "zgnBeiEB5pE": {"duration": 176.38, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zgnBeiEB5pE", "annotations": [{"segment": [0, 176.3787755102041], "label": "Mixing drinks"}]}, "UvPUywSVy1k": {"duration": 177.42000000000002, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UvPUywSVy1k", "annotations": [{"segment": [42.62585939628061, 71.68894534829012], "label": "Preparing pasta"}, {"segment": [81.09984937084558, 152.23521212957363], "label": "Preparing pasta"}]}, "PfRGaUpEhu0": {"duration": 20.85, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PfRGaUpEhu0", "annotations": []}, "F7K2oT7ADkk": {"duration": 22.04, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=F7K2oT7ADkk", "annotations": []}, "Z3jo5hbPo1Q": {"duration": 233.57, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Z3jo5hbPo1Q", "annotations": [{"segment": [12.543701230109205, 54.35603866380656], "label": "Ballet"}]}, "qoDCBrRmiF0": {"duration": 112.71, "subset": "testing", "resolution": "642x480", "url": "https://www.youtube.com/watch?v=qoDCBrRmiF0", "annotations": []}, "WDbG2_sDHow": {"duration": 22.36, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=WDbG2_sDHow", "annotations": [{"segment": [0.39443039999999996, 6.795874799999999], "label": "Springboard diving"}, {"segment": [1.2035072035881433, 8.389666158346333], "label": "Springboard diving"}]}, "8nyOw9vBh2E": {"duration": 158.96, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8nyOw9vBh2E", "annotations": [{"segment": [28.51924819849937, 151.02801727301093], "label": "Doing kickboxing"}]}, "fWD0rL_72nw": {"duration": 60.19, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fWD0rL_72nw", "annotations": [{"segment": [0.01, 60.061], "label": "Doing kickboxing"}]}, "YW3mCNKVaa4": {"duration": 43.143, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YW3mCNKVaa4", "annotations": [{"segment": [15.480211262872285, 40.517768717628705], "label": "Snow tubing"}]}, "wfSh2F8ymIg": {"duration": 23.731, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wfSh2F8ymIg", "annotations": [{"segment": [1.2217100405616226, 19.1031024524181], "label": "Snow tubing"}]}, "XEqijKYIJNg": {"duration": 125.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XEqijKYIJNg", "annotations": [{"segment": [42.87885149297973, 103.5749426474259], "label": "Preparing salad"}]}, "gE2Fn5yDXeU": {"duration": 203.62, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=gE2Fn5yDXeU", "annotations": []}, "Ule69iMpA3Y": {"duration": 60.93, "subset": "validation", "resolution": "256x192", "url": "https://www.youtube.com/watch?v=Ule69iMpA3Y", "annotations": [{"segment": [0, 60.929183], "label": "Doing step aerobics"}]}, "RFgusQogDyQ": {"duration": 132.029, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RFgusQogDyQ", "annotations": [{"segment": [0.2, 120.94071433033051], "label": "Swinging at the playground"}]}, "NDWJOqVUGeA": {"duration": 149.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NDWJOqVUGeA", "annotations": [{"segment": [35.03083478939157, 87.34354807488299], "label": "Fixing the roof"}]}, "liNIRD7YwFQ": {"duration": 195.187, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=liNIRD7YwFQ", "annotations": [{"segment": [51.461142984494884, 59.37824190518642], "label": "Surfing"}, {"segment": [64.85931038874209, 73.08091311407559], "label": "Surfing"}, {"segment": [92.8736604158044, 99.57274411792798], "label": "Surfing"}, {"segment": [102.6177821643478, 141.59426915852146], "label": "Surfing"}, {"segment": [147.07533764207713, 180.87525995733708], "label": "Surfing"}]}, "osjru9UsWsI": {"duration": 221.914, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=osjru9UsWsI", "annotations": [{"segment": [65.91819590961708, 86.62548423707872], "label": "Blow-drying hair"}, {"segment": [96.2888854565608, 112.16447317428138], "label": "Blow-drying hair"}, {"segment": [147.36686333096614, 159.10099338319438], "label": "Blow-drying hair"}, {"segment": [180.49852337305558, 185.33022394848328], "label": "Blow-drying hair"}]}, "2UfljrwzsLs": {"duration": 67.27, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2UfljrwzsLs", "annotations": [{"segment": [0.7325538221528862, 39.66255694227769], "label": "Doing nails"}]}, "xoXTlNWkC7w": {"duration": 181.116, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xoXTlNWkC7w", "annotations": [{"segment": [18.077853354134167, 172.6060702028081], "label": "Playing congas"}]}, "y3Wfx-RvTpk": {"duration": 82.08, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=y3Wfx-RvTpk", "annotations": [{"segment": [11.188897464898597, 75.63694686271451], "label": "Layup drill in basketball"}]}, "Qq2vKc9hWUk": {"duration": 46.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Qq2vKc9hWUk", "annotations": [{"segment": [5.477156045241809, 13.437289497659908], "label": "Playing racquetball"}]}, "K9qWnvl8A1Q": {"duration": 75.419, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K9qWnvl8A1Q", "annotations": [{"segment": [0.01, 75.418411], "label": "Painting fence"}]}, "yRQpolWhJ6A": {"duration": 180.094, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yRQpolWhJ6A", "annotations": []}, "fUJ88Ir7Dgc": {"duration": 79.9, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=fUJ88Ir7Dgc", "annotations": [{"segment": [51.123580343213725, 78.89595163806553], "label": "High jump"}]}, "LNd1TrbshXM": {"duration": 151.441, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LNd1TrbshXM", "annotations": [{"segment": [15.120428751843951, 141.28150501404056], "label": "Braiding hair"}]}, "tSNg5rvDvVc": {"duration": 54.1, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tSNg5rvDvVc", "annotations": []}, "fRUoSevi63M": {"duration": 132.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fRUoSevi63M", "annotations": [{"segment": [22.680909576519117, 128.04404328393136], "label": "Painting"}]}, "DgdEQgh8TKU": {"duration": 78.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DgdEQgh8TKU", "annotations": [{"segment": [0.8485144499999999, 47.087932516380654], "label": "Chopping wood"}]}, "U_FxyViYYBA": {"duration": 93.888, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U_FxyViYYBA", "annotations": [{"segment": [5.271444617784712, 86.10026209048362], "label": "Fun sliding down"}]}, "KyMDXMXZLyM": {"duration": 110.969, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=KyMDXMXZLyM", "annotations": [{"segment": [23.370829750390016, 106.98646507956319], "label": "Removing ice from car"}]}, "dcsQy55tjw4": {"duration": 60.12, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dcsQy55tjw4", "annotations": [{"segment": [0, 52.70741198439937], "label": "Shot put"}]}, "7EiY0caSyIM": {"duration": 119.61, "subset": "testing", "resolution": "854x474", "url": "https://www.youtube.com/watch?v=7EiY0caSyIM", "annotations": []}, "nTsXn3oHf_8": {"duration": 44.792, "subset": "training", "resolution": "180x240", "url": "https://www.youtube.com/watch?v=nTsXn3oHf_8", "annotations": [{"segment": [0.01, 44.7912924], "label": "Elliptical trainer"}]}, "myHHyzx6TPc": {"duration": 99.68, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=myHHyzx6TPc", "annotations": [{"segment": [3.2634633276131044, 96.50527268798751], "label": "Using the balance beam"}]}, "aEf-rEjboOc": {"duration": 145.38, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aEf-rEjboOc", "annotations": []}, "NE-xkW0byCM": {"duration": 148.91, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=NE-xkW0byCM", "annotations": []}, "qpdREcmjhUw": {"duration": 44.467, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=qpdREcmjhUw", "annotations": [{"segment": [0.01, 44.009082823442455], "label": "Shuffleboard"}]}, "CZISTSKNsSk": {"duration": 103.492, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=CZISTSKNsSk", "annotations": [{"segment": [10.194695099007676, 30.34094191296301], "label": "Skiing"}, {"segment": [56.77023163258918, 61.92716621202844], "label": "Skiing"}, {"segment": [67.7287176138976, 85.77798864193497], "label": "Skiing"}, {"segment": [88.67876434286954, 91.09607742698171], "label": "Skiing"}]}, "CjFqAb8UWEc": {"duration": 34.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CjFqAb8UWEc", "annotations": []}, "RgaFRpTYYR4": {"duration": 70.13, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RgaFRpTYYR4", "annotations": [{"segment": [34.0416958, 47.410135159999996], "label": "Playing accordion"}, {"segment": [2.4248879199999998, 29.797791319999995], "label": "Playing accordion"}]}, "yJN4jnk_S1U": {"duration": 177.42000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yJN4jnk_S1U", "annotations": [{"segment": [4.9799344773790954, 176.53091731669267], "label": "Belly dance"}]}, "xZPExy2EO0E": {"duration": 162.192, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xZPExy2EO0E", "annotations": []}, "GONk0qrh2Cw": {"duration": 75.651, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GONk0qrh2Cw", "annotations": []}, "U2cCOj3LiAA": {"duration": 25.217, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U2cCOj3LiAA", "annotations": []}, "npB7FaauqBw": {"duration": 33.669, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=npB7FaauqBw", "annotations": []}, "QxsELAtsSWE": {"duration": 170.57, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QxsELAtsSWE", "annotations": [{"segment": [48.165134657086966, 57.212729012561866], "label": "Spinning"}, {"segment": [69.98580339676172, 96.59637503051142], "label": "Spinning"}]}, "bMRx3vSVZUQ": {"duration": 175.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bMRx3vSVZUQ", "annotations": [{"segment": [19.72558693368143, 31.50614561622465], "label": "Windsurfing"}, {"segment": [88.217207725429, 89.58704014352574], "label": "Windsurfing"}, {"segment": [98.08000113572544, 163.28402423712947], "label": "Windsurfing"}]}, "MM2ZYfEWCQo": {"duration": 73.19, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MM2ZYfEWCQo", "annotations": [{"segment": [0, 63.82656593135725], "label": "Starting a campfire"}]}, "Io0_nTofT7w": {"duration": 196.72, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=Io0_nTofT7w", "annotations": [{"segment": [3.3748377535101404, 192.99898439937598], "label": "Drum corps"}]}, "A-Oz6mBBBZQ": {"duration": 25.357, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=A-Oz6mBBBZQ", "annotations": [{"segment": [19.165019388950636, 23.463346917722024], "label": "Building sandcastles"}]}, "8yM9Ay7cKwM": {"duration": 50.202, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=8yM9Ay7cKwM", "annotations": []}, "kEKIUQBGSmQ": {"duration": 156.025, "subset": "testing", "resolution": "426x236", "url": "https://www.youtube.com/watch?v=kEKIUQBGSmQ", "annotations": []}, "2DCeEAz8iO8": {"duration": 164.12, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=2DCeEAz8iO8", "annotations": [{"segment": [59.673304800000004, 98.52216068642744], "label": "Playing polo"}, {"segment": [10.48549902, 51.2786691], "label": "Playing polo"}]}, "ioHDdIBu2go": {"duration": 157.18, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=ioHDdIBu2go", "annotations": [{"segment": [25.73744929797192, 35.54219188767551], "label": "Vacuuming floor"}, {"segment": [74.2803790951638, 78.69307488299532], "label": "Vacuuming floor"}]}, "-A6e83tl4Y8": {"duration": 215.82999999999998, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-A6e83tl4Y8", "annotations": [{"segment": [10.94299227751423, 102.86412740863376], "label": "Volleyball"}]}, "rVmNL8rzHnU": {"duration": 199.878, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rVmNL8rzHnU", "annotations": [{"segment": [0.01, 173.02925897035882], "label": "Hanging wallpaper"}]}, "wiflxvRl04M": {"duration": 63.251, "subset": "training", "resolution": "560x340", "url": "https://www.youtube.com/watch?v=wiflxvRl04M", "annotations": [{"segment": [0.01, 63.25115646258504], "label": "Gargling mouthwash"}]}, "3nzXMKByUnk": {"duration": 209.44, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3nzXMKByUnk", "annotations": [{"segment": [13.233161991785792, 81.19606802367332], "label": "Cumbia"}]}, "n7lk6-nQ8fM": {"duration": 108.507, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n7lk6-nQ8fM", "annotations": []}, "D1x5KVBdiuI": {"duration": 197.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D1x5KVBdiuI", "annotations": [{"segment": [13.24787162915088, 172.53042009360377], "label": "Snatch"}]}, "lPw_BRbPw5s": {"duration": 177.87, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lPw_BRbPw5s", "annotations": []}, "RO5WKh6dViA": {"duration": 75.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RO5WKh6dViA", "annotations": [{"segment": [9.772485722006431, 33.22645145482186], "label": "Using the monkey bar"}]}, "S1bEYnNyLZE": {"duration": 57.38, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=S1bEYnNyLZE", "annotations": [{"segment": [2.2377733204566277, 53.348515959686004], "label": "Brushing teeth"}]}, "I5YCAQXHoFw": {"duration": 114.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I5YCAQXHoFw", "annotations": [{"segment": [19.69034321372855, 94.5301216848674], "label": "Painting furniture"}]}, "bNGldPjMYHc": {"duration": 204.499, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bNGldPjMYHc", "annotations": [{"segment": [11.276049927625465, 17.646699329650033], "label": "Rafting"}, {"segment": [27.839738372889347, 185.8318419028644], "label": "Rafting"}]}, "8i2lkdpB2w0": {"duration": 183.322, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8i2lkdpB2w0", "annotations": [{"segment": [2.647320503876588, 160.24074106462427], "label": "Canoeing"}]}, "Il2jGm3L0-0": {"duration": 74.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Il2jGm3L0-0", "annotations": [{"segment": [9.773572455538222, 71.55651262090484], "label": "Using parallel bars"}]}, "AZn294ubbps": {"duration": 104.47, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=AZn294ubbps", "annotations": [{"segment": [51.55049297971919, 96.83023088923557], "label": "Kayaking"}]}, "x4QVVFhamJ4": {"duration": 124.85, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x4QVVFhamJ4", "annotations": [{"segment": [80.054397996328, 85.89778471138845], "label": "Washing hands"}]}, "mxZRW_Xy7ks": {"duration": 169.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mxZRW_Xy7ks", "annotations": [{"segment": [6.6091742989447475, 16.126385289425183], "label": "Skiing"}, {"segment": [29.873467831230258, 44.149284316950904], "label": "Skiing"}, {"segment": [48.64352284023334, 103.10311906353806], "label": "Skiing"}, {"segment": [107.86172455877826, 169.19486202808113], "label": "Skiing"}]}, "MduMjuxuIac": {"duration": 180.81, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MduMjuxuIac", "annotations": [{"segment": [1.2033771398129605, 177.5109013873105], "label": "Cheerleading"}]}, "TfDNYSd4q04": {"duration": 200.2, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TfDNYSd4q04", "annotations": []}, "FDTkn9SFPfs": {"duration": 37.176, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FDTkn9SFPfs", "annotations": [{"segment": [0.01, 37.081], "label": "Clipping cat claws"}]}, "OhgsnOwo4b4": {"duration": 190.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OhgsnOwo4b4", "annotations": [{"segment": [21.366477379095166, 176.27529173166928], "label": "Preparing salad"}]}, "Q1XC-TJe2kM": {"duration": 177.84, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Q1XC-TJe2kM", "annotations": []}, "SZqwvjwqwK4": {"duration": 222.563, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SZqwvjwqwK4", "annotations": [{"segment": [13.541290712853003, 124.6493170747238], "label": "Playing drums"}, {"segment": [131.59356872234073, 212.4940991700468], "label": "Playing drums"}]}, "XgaEYidtX6E": {"duration": 192.293, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XgaEYidtX6E", "annotations": [{"segment": [15.599351014040563, 192.326], "label": "Longboarding"}]}, "p4s_tEv1JsE": {"duration": 115.288, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=p4s_tEv1JsE", "annotations": []}, "X1bmeq4u6YE": {"duration": 171.46, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=X1bmeq4u6YE", "annotations": [{"segment": [1.84941804, 111.01127507999999], "label": "Making a sandwich"}, {"segment": [15.508982839313575, 70.59261154446179], "label": "Making a sandwich"}, {"segment": [110.70204992199689, 145.4635631825273], "label": "Making a sandwich"}]}, "G4mX4StOvQE": {"duration": 122.973, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G4mX4StOvQE", "annotations": [{"segment": [16.76300702028081, 106.61272464898596], "label": "Kneeling"}]}, "f11zga3X2L4": {"duration": 132.73, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f11zga3X2L4", "annotations": [{"segment": [7.968358814352574, 99.24228705148207], "label": "Archery"}]}, "apjGHMrnMV0": {"duration": 223.86, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=apjGHMrnMV0", "annotations": [{"segment": [22.69635725429017, 40.50426833073323], "label": "Snatch"}, {"segment": [47.48776287051482, 65.29567394695788], "label": "Snatch"}, {"segment": [76.12009048361935, 94.27717628705147], "label": "Snatch"}, {"segment": [101.26067082683308, 131.2896973478939], "label": "Snatch"}, {"segment": [142.1141138845554, 162.366248049922], "label": "Snatch"}, {"segment": [171.09561622464898, 196.93454602184087], "label": "Snatch"}]}, "G5HZX5i26us": {"duration": 22.036, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=G5HZX5i26us", "annotations": [{"segment": [0.2, 21.04378315132605], "label": "Knitting"}]}, "8MSrdVG-U_s": {"duration": 137.7, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8MSrdVG-U_s", "annotations": []}, "3I4EzlMo124": {"duration": 98.66, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3I4EzlMo124", "annotations": [{"segment": [7.311116063690167, 85.6554954898596], "label": "Using the balance beam"}]}, "cBCi-pOE5NQ": {"duration": 165.95, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cBCi-pOE5NQ", "annotations": [{"segment": [56.7393307, 159.36877731045243], "label": "Playing saxophone"}]}, "V2ltLccVh_Q": {"duration": 52.22, "subset": "training", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=V2ltLccVh_Q", "annotations": [{"segment": [13.19799038492152, 41.54922819032761], "label": "Paintball"}]}, "0hfKWxuVT5k": {"duration": 236.936, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0hfKWxuVT5k", "annotations": [{"segment": [5.544533944623091, 236.9364172335601], "label": "Canoeing"}]}, "3q-MxrLyr0g": {"duration": 136.49, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3q-MxrLyr0g", "annotations": []}, "W0EN9JeU66A": {"duration": 30.372, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W0EN9JeU66A", "annotations": [{"segment": [0.01, 30.241], "label": "Swinging at the playground"}]}, "W8ayZca_fAY": {"duration": 154.74, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=W8ayZca_fAY", "annotations": [{"segment": [37.76438823, 73.52942145000002], "label": "Ironing clothes"}, {"segment": [96.89659230000001, 146.03633483853355], "label": "Ironing clothes"}]}, "oFh_AGspaEQ": {"duration": 113.55, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oFh_AGspaEQ", "annotations": [{"segment": [23.457254290171605, 95.51085803432137], "label": "Polishing forniture"}]}, "LSvjzZHbetU": {"duration": 195.861, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LSvjzZHbetU", "annotations": [{"segment": [77.29761778471139, 92.57477223088922], "label": "Painting fence"}, {"segment": [121.59986583463338, 163.45710608424335], "label": "Painting fence"}]}, "e0G-AY68U64": {"duration": 82.92, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=e0G-AY68U64", "annotations": []}, "PfNtxlM1QHw": {"duration": 92.555, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PfNtxlM1QHw", "annotations": [{"segment": [13.962380237151345, 47.788770668907254], "label": "Bullfighting"}]}, "iazhs1cz_1Q": {"duration": 216.88, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=iazhs1cz_1Q", "annotations": [{"segment": [39.24707878939157, 94.73432811232449], "label": "Playing piano"}, {"segment": [104.20776092355693, 150.22157743525742], "label": "Playing piano"}, {"segment": [157.32665204368175, 177.28852832449297], "label": "Playing piano"}]}, "APlxSpTZVPI": {"duration": 94.204, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=APlxSpTZVPI", "annotations": [{"segment": [0.2, 88.19633414796712], "label": "Capoeira"}]}, "-kmomJph-6E": {"duration": 18.344, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-kmomJph-6E", "annotations": [{"segment": [0.01, 16.97765054602184], "label": "Doing a powerbomb"}]}, "ma9R2AjCRZE": {"duration": 175.079, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ma9R2AjCRZE", "annotations": [{"segment": [8.713076183687575, 151.9343333351707], "label": "Sumo"}]}, "9SiYS0SEKTw": {"duration": 200.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9SiYS0SEKTw", "annotations": [{"segment": [43.67031885, 166.31417651326052], "label": "Fixing bicycle"}]}, "21biKVGaY1Y": {"duration": 158.152, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=21biKVGaY1Y", "annotations": [{"segment": [138.22849363422327, 153.72385899969763], "label": "Building sandcastles"}]}, "RcYsKjVGqNE": {"duration": 88.051, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RcYsKjVGqNE", "annotations": []}, "k3nRbFeancA": {"duration": 84.777, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k3nRbFeancA", "annotations": [{"segment": [0.01, 84.7760544217687], "label": "Sumo"}]}, "Mfk4bUp_ZC0": {"duration": 80.736, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Mfk4bUp_ZC0", "annotations": [{"segment": [0.2, 75.33665343052073], "label": "Riding bumper cars"}]}, "1lwG-y_QCkc": {"duration": 101.843, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1lwG-y_QCkc", "annotations": []}, "SmQSId7boic": {"duration": 163.051, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SmQSId7boic", "annotations": []}, "ookecPKcp6w": {"duration": 12.725, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ookecPKcp6w", "annotations": []}, "ojTFTIwsa_c": {"duration": 202.246, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ojTFTIwsa_c", "annotations": []}, "9ZnJyY_-Hzw": {"duration": 126.921, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9ZnJyY_-Hzw", "annotations": [{"segment": [35.118597200622084, 117.3908164852255], "label": "Slacklining"}]}, "IjmeHBgFb3g": {"duration": 66.53, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=IjmeHBgFb3g", "annotations": [{"segment": [12.671682000000002, 45.774522000000005], "label": "Using the pommel horse"}]}, "d-RYiiPzVKg": {"duration": 148.91, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d-RYiiPzVKg", "annotations": []}, "fid8KlncwTA": {"duration": 176.01, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fid8KlncwTA", "annotations": [{"segment": [3.704882988299532, 97.01304713806552], "label": "Horseback riding"}, {"segment": [113.20475797581904, 163.70094092745708], "label": "Horseback riding"}]}, "aBmKEmXVCqM": {"duration": 21.131, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=aBmKEmXVCqM", "annotations": [{"segment": [0.01, 21.130158], "label": "Elliptical trainer"}]}, "egmJbYMfCeU": {"duration": 13.538, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=egmJbYMfCeU", "annotations": []}, "j_vP89LZlp8": {"duration": 130.54, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=j_vP89LZlp8", "annotations": [{"segment": [10.793692592823712, 123.82198295163808], "label": "Brushing hair"}]}, "MO37MVz4uuA": {"duration": 91.78999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MO37MVz4uuA", "annotations": [{"segment": [63.87819344773791, 72.61489703588144], "label": "Shaving legs"}]}, "Jh07fhoPWEI": {"duration": 144.104, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Jh07fhoPWEI", "annotations": [{"segment": [11.240486583463339, 32.14779162870514], "label": "Playing rubik cube"}, {"segment": [39.11689331045242, 58.675339965678624], "label": "Playing rubik cube"}, {"segment": [64.74520272074884, 88.35022454602185], "label": "Playing rubik cube"}, {"segment": [94.86970676443057, 117.80029939469578], "label": "Playing rubik cube"}]}, "HEfOp_pz_j4": {"duration": 41.123, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=HEfOp_pz_j4", "annotations": [{"segment": [5.954466458658346, 10.116190327613104], "label": "Rock-paper-scissors"}, {"segment": [23.305653666146647, 26.05879407176287], "label": "Rock-paper-scissors"}]}, "Mvl-TR0rDdA": {"duration": 63.09, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Mvl-TR0rDdA", "annotations": [{"segment": [3.8384648751950077, 48.03002202808112], "label": "Using uneven bars"}]}, "YfxK4HAp8jI": {"duration": 78.69, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=YfxK4HAp8jI", "annotations": [{"segment": [6.929990840873636, 26.281554159999995], "label": "Layup drill in basketball"}, {"segment": [30.1311668, 43.347488199999994], "label": "Layup drill in basketball"}]}, "jxk6KOLu5kU": {"duration": 120.326, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jxk6KOLu5kU", "annotations": [{"segment": [13.66062508265359, 31.812418370212264], "label": "Putting in contact lenses"}, {"segment": [77.09833564432772, 97.12144782751099], "label": "Putting in contact lenses"}]}, "v1ukgczEA5A": {"duration": 96.015, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v1ukgczEA5A", "annotations": [{"segment": [26.27325351014041, 32.41116458658346], "label": "Waxing skis"}, {"segment": [40.94478471138846, 82.5632496099844], "label": "Waxing skis"}]}, "AA6nXQMyfhU": {"duration": 117.82, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=AA6nXQMyfhU", "annotations": [{"segment": [49.25934067022544, 53.85442841931364], "label": "Polishing forniture"}, {"segment": [74.99183150078002, 102.92996480499221], "label": "Polishing forniture"}]}, "GvvmZ1Bi1xk": {"duration": 22.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GvvmZ1Bi1xk", "annotations": [{"segment": [1.6064634399999995, 22.09699258970359], "label": "Mowing the lawn"}]}, "6hu3V1PS4vM": {"duration": 170.71, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=6hu3V1PS4vM", "annotations": [{"segment": [12.65034721116736, 99.20535444547032], "label": "Playing field hockey"}, {"segment": [104.53181642911973, 167.11774473700035], "label": "Playing field hockey"}]}, "EEMGyhO3OVI": {"duration": 145.333, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EEMGyhO3OVI", "annotations": [{"segment": [23.80661169303915, 145.13369614512473], "label": "BMX"}, {"segment": [53.05473421216849, 69.1525381825273], "label": "BMX"}, {"segment": [84.11669398595944, 88.87801628705148], "label": "BMX"}, {"segment": [107.46984622464899, 129.00916139625588], "label": "BMX"}]}, "sEaPxagod8o": {"duration": 20.782, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sEaPxagod8o", "annotations": []}, "aq9R-_cscTw": {"duration": 101.797, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=aq9R-_cscTw", "annotations": []}, "HDVk1O78gwc": {"duration": 223.61, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HDVk1O78gwc", "annotations": [{"segment": [0, 210.35213991575662], "label": "Using uneven bars"}]}, "Nr49JFixNE4": {"duration": 172.82999999999998, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Nr49JFixNE4", "annotations": [{"segment": [17.12360008, 140.20215744149766], "label": "Dodgeball"}]}, "iwhejKH3DSw": {"duration": 203.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iwhejKH3DSw", "annotations": [{"segment": [9.999872550000001, 175.48318195319814], "label": "Preparing salad"}]}, "0kTxTWwkY6k": {"duration": 52.13, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=0kTxTWwkY6k", "annotations": []}, "v3tNh1and0U": {"duration": 140.319, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=v3tNh1and0U", "annotations": [{"segment": [0.2, 140.1181859410431], "label": "Swimming"}, {"segment": [0, 128.5340752348367], "label": "Swimming"}]}, "MewROhYmQP0": {"duration": 62.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MewROhYmQP0", "annotations": [{"segment": [0.01, 60.20882371294852], "label": "Playing polo"}]}, "3fyR5F18WKg": {"duration": 15.12, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=3fyR5F18WKg", "annotations": [{"segment": [0, 3.8222317599999993], "label": "Playing piano"}, {"segment": [4.768968847113885, 15.021666], "label": "Playing piano"}]}, "Vjsd6Ec6eKA": {"duration": 152.86, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Vjsd6Ec6eKA", "annotations": [{"segment": [6.198221528861155, 123.01085803432137], "label": "Cheerleading"}]}, "WSaGZOdrjtM": {"duration": 211.07, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=WSaGZOdrjtM", "annotations": [{"segment": [8.56131682001974, 11.195568149256584], "label": "High jump"}, {"segment": [23.049699130822376, 28.31820178929606], "label": "High jump"}, {"segment": [46.75796109395396, 48.40436817472699], "label": "High jump"}, {"segment": [59.27065490782896, 63.22203190168423], "label": "High jump"}, {"segment": [86.27173103250661, 88.24741952943424], "label": "High jump"}, {"segment": [110.63855582794741, 115.57777707026649], "label": "High jump"}, {"segment": [122.1634053933586, 126.44406380336846], "label": "High jump"}, {"segment": [166.28711515807572, 167.60424082269415], "label": "High jump"}, {"segment": [184.7268744627336, 188.34897004043427], "label": "High jump"}, {"segment": [196.25172332605305, 200.20310030577224], "label": "High jump"}]}, "tgj3VBjomI8": {"duration": 88.89, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tgj3VBjomI8", "annotations": []}, "bY2dgTJFWko": {"duration": 73.863, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bY2dgTJFWko", "annotations": [{"segment": [4.954889731669266, 68.67707628081124], "label": "Slacklining"}]}, "HnEvJUr8DEM": {"duration": 8.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HnEvJUr8DEM", "annotations": [{"segment": [0.09446177457098284, 7.840327289391575], "label": "Playing kickball"}]}, "c0qbyRWSptg": {"duration": 134.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c0qbyRWSptg", "annotations": [{"segment": [13.254570982839315, 128.12751950078004], "label": "Sailing"}]}, "98MoyGZKHXc": {"duration": 187.595, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=98MoyGZKHXc", "annotations": [{"segment": [46.56888424641024, 132.46130480715792], "label": "Changing car wheel"}]}, "PfleCcLgZ7E": {"duration": 180.466, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PfleCcLgZ7E", "annotations": [{"segment": [23.89340566964065, 157.97757630985936], "label": "Baton twirling"}]}, "8HY9CPl9CqE": {"duration": 32.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8HY9CPl9CqE", "annotations": [{"segment": [15.469270801872074, 20.17513311700468], "label": "Springboard diving"}]}, "6FeSQ_hXOsg": {"duration": 177.819, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6FeSQ_hXOsg", "annotations": []}, "K8ZPkLVlAiA": {"duration": 91.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K8ZPkLVlAiA", "annotations": [{"segment": [39.602022774788544, 91.11334847113883], "label": "Vacuuming floor"}]}, "FkSf3pxra3M": {"duration": 16.09, "subset": "training", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=FkSf3pxra3M", "annotations": [{"segment": [8.443926677067083, 16.061], "label": "Discus throw"}]}, "2duxXAsCW0k": {"duration": 36.665, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2duxXAsCW0k", "annotations": [{"segment": [2.3165436658282657, 19.762119210608425], "label": "Hand car wash"}, {"segment": [28.055917432137285, 36.664308], "label": "Hand car wash"}]}, "f6Nhpyq1XdU": {"duration": 33.07, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=f6Nhpyq1XdU", "annotations": [{"segment": [2.321270873634945, 29.59219296], "label": "Washing hands"}]}, "y6WVmOiCZlI": {"duration": 63.55, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=y6WVmOiCZlI", "annotations": []}, "qs_VoH8fOhs": {"duration": 192.494, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=qs_VoH8fOhs", "annotations": [{"segment": [45.045263035011196, 179.28014687934456], "label": "Decorating the Christmas tree"}]}, "PKYg6_rs3LQ": {"duration": 24.1, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=PKYg6_rs3LQ", "annotations": [{"segment": [2.6696788642745712, 21.282228692667708], "label": "Chopping wood"}]}, "fTGtoTlJyh0": {"duration": 192.819, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=fTGtoTlJyh0", "annotations": [{"segment": [0.01, 177.47708264793178], "label": "Assembling bicycle"}]}, "Yd5Fka2DvGA": {"duration": 12.86, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Yd5Fka2DvGA", "annotations": []}, "hSSHf_c1q5I": {"duration": 135.977, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hSSHf_c1q5I", "annotations": [{"segment": [15.479302652106083, 120.45941965678627], "label": "Running a marathon"}]}, "Z57GknpXOBg": {"duration": 106.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z57GknpXOBg", "annotations": [{"segment": [29.07820380624795, 77.35586354369907], "label": "Waterskiing"}]}, "JZN0L8pp5hY": {"duration": 27.33, "subset": "validation", "resolution": "320x176", "url": "https://www.youtube.com/watch?v=JZN0L8pp5hY", "annotations": [{"segment": [0.01, 27.281], "label": "Camel ride"}]}, "vy5IRxiudmI": {"duration": 54.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vy5IRxiudmI", "annotations": [{"segment": [16.812841046975212, 19.729354289817852], "label": "Spinning"}, {"segment": [42.54678018096724, 43.74769736349454], "label": "Spinning"}]}, "3SLaaTD8t3Q": {"duration": 21.293, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3SLaaTD8t3Q", "annotations": [{"segment": [1.2072431309811602, 19.017395168364338], "label": "Applying sunscreen"}]}, "EZZzVXqxG6U": {"duration": 192.12, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=EZZzVXqxG6U", "annotations": [{"segment": [108.62615054602183, 114.31965912636505], "label": "Discus throw"}]}, "VTbMcI6nw54": {"duration": 64.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VTbMcI6nw54", "annotations": [{"segment": [22.498196567862717, 27.92186895475819], "label": "Using uneven bars"}, {"segment": [40.37622776911076, 45.900338533541344], "label": "Using uneven bars"}, {"segment": [58.957327613104525, 62.27179407176287], "label": "Using uneven bars"}]}, "4zYIek_ET8w": {"duration": 176.47, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4zYIek_ET8w", "annotations": []}, "Y1I-gDeNV9I": {"duration": 178.52, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y1I-gDeNV9I", "annotations": []}, "spAlIMm8jSg": {"duration": 101.36, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=spAlIMm8jSg", "annotations": [{"segment": [12.333382151612595, 39.84631156674838], "label": "Using uneven bars"}, {"segment": [49.96600974243051, 82.06442739342226], "label": "Using uneven bars"}]}, "9pNeqZ8y3yA": {"duration": 175.776, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=9pNeqZ8y3yA", "annotations": []}, "IjJo9dfIh9o": {"duration": 55.59, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=IjJo9dfIh9o", "annotations": []}, "uqlErIm56Jg": {"duration": 230.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uqlErIm56Jg", "annotations": [{"segment": [10.796734481624163, 218.0940365288081], "label": "Cleaning shoes"}]}, "-1EC1ZP6aC4": {"duration": 170.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-1EC1ZP6aC4", "annotations": []}, "P6t2HLPZ3Dk": {"duration": 79.413, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=P6t2HLPZ3Dk", "annotations": [{"segment": [0.01, 79.412243], "label": "Playing ten pins"}]}, "X5Bxa0OvdwI": {"duration": 213.46, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=X5Bxa0OvdwI", "annotations": [{"segment": [130.70742431357252, 145.35997633853353], "label": "Polishing shoes"}, {"segment": [149.68913943681747, 209.63139772074882], "label": "Polishing shoes"}]}, "D644NPLLTCQ": {"duration": 84.15, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D644NPLLTCQ", "annotations": []}, "LoigxYt5Hgk": {"duration": 121.42, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LoigxYt5Hgk", "annotations": []}, "sWaU9O4xzFE": {"duration": 172.734, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sWaU9O4xzFE", "annotations": [{"segment": [0.01, 155.9605600624025], "label": "Throwing darts"}]}, "2tlLq9qvG-c": {"duration": 180.164, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2tlLq9qvG-c", "annotations": [{"segment": [15.709744548286604, 101.27174610591901], "label": "Baking cookies"}, {"segment": [119.50627102803738, 131.86428348909658], "label": "Baking cookies"}]}, "_4LZrf1GL1s": {"duration": 37.013, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_4LZrf1GL1s", "annotations": [{"segment": [17.65395631825273, 30.57711388455538], "label": "Running a marathon"}]}, "YoDlwg4eAEA": {"duration": 125.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YoDlwg4eAEA", "annotations": [{"segment": [23.92064667845186, 119.19418090119329], "label": "Preparing salad"}]}, "pXyT_AybrQ8": {"duration": 161.96, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pXyT_AybrQ8", "annotations": [{"segment": [78.77793704413062, 120.90750619366334], "label": "Installing carpet"}]}, "jFqY3M37F-g": {"duration": 129.429, "subset": "training", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=jFqY3M37F-g", "annotations": [{"segment": [3.8332277691107643, 128.5140046801872], "label": "Playing congas"}]}, "9Rvz-oIAn50": {"duration": 79.41, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=9Rvz-oIAn50", "annotations": [{"segment": [13.672349453978159, 22.457297971918877], "label": "Doing step aerobics"}, {"segment": [28.148954758190328, 36.56270826833073], "label": "Doing step aerobics"}, {"segment": [40.64585335413416, 51.41050858034321], "label": "Doing step aerobics"}, {"segment": [54.87499531981279, 69.72279563182528], "label": "Doing step aerobics"}]}, "cQxTLj4XwuU": {"duration": 15.09, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=cQxTLj4XwuU", "annotations": [{"segment": [7.7949525792843835, 12.697988404829553], "label": "Cleaning shoes"}]}, "i-LI4TpJNQ8": {"duration": 161.379, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i-LI4TpJNQ8", "annotations": [{"segment": [0, 134.44020072074883], "label": "Canoeing"}]}, "VIjOP5ZBvHg": {"duration": 71.216, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=VIjOP5ZBvHg", "annotations": [{"segment": [16.84808722741433, 46.45613551401869], "label": "Grooming dog"}]}, "uE_8WuNO7cA": {"duration": 167.904, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uE_8WuNO7cA", "annotations": [{"segment": [20.685552262090486, 156.07668642745708], "label": "Carving jack-o-lanterns"}]}, "yzmrLFHRaTY": {"duration": 148.306, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yzmrLFHRaTY", "annotations": []}, "DgZ5-oatDg8": {"duration": 174.173, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DgZ5-oatDg8", "annotations": []}, "yVrru7t3478": {"duration": 50.365, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=yVrru7t3478", "annotations": [{"segment": [39.71769699033717, 42.86054146039351], "label": "Fun sliding down"}]}, "vdYFwqfqgJA": {"duration": 118.283, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vdYFwqfqgJA", "annotations": [{"segment": [0.01, 118.221], "label": "Playing ice hockey"}]}, "eXK0iVPl1j8": {"duration": 97.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eXK0iVPl1j8", "annotations": [{"segment": [0.9555892348022698, 82.4576475681356], "label": "Breakdancing"}]}, "xlPpP0eVL9A": {"duration": 194.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xlPpP0eVL9A", "annotations": [{"segment": [91.13631981279251, 103.85301560062402], "label": "Washing face"}, {"segment": [139.5808751950078, 149.26978627145084], "label": "Washing face"}]}, "rkDuiu5H8JY": {"duration": 107.044, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rkDuiu5H8JY", "annotations": [{"segment": [16.67344466343131, 70.02846644859812], "label": "Braiding hair"}]}, "G72MBCYwT8E": {"duration": 50.18, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=G72MBCYwT8E", "annotations": [{"segment": [18.265091263650547, 34.144378315132606], "label": "Hammer throw"}]}, "9snScUyE8_4": {"duration": 96.78, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9snScUyE8_4", "annotations": []}, "knCj92zA0tU": {"duration": 80.62, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=knCj92zA0tU", "annotations": []}, "_dp7ZH3Upuw": {"duration": 55.427, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_dp7ZH3Upuw", "annotations": [{"segment": [0.2, 31.44766926677067], "label": "Using the monkey bar"}]}, "rJ6BLT5atMo": {"duration": 177.19, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rJ6BLT5atMo", "annotations": []}, "ZKo3_ifK2tQ": {"duration": 33.79, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=ZKo3_ifK2tQ", "annotations": [{"segment": [2.5299245429017163, 3.9003003369734794], "label": "Drinking coffee"}, {"segment": [6.113984312012481, 7.589773628705149], "label": "Drinking coffee"}, {"segment": [9.856164365054603, 11.384660443057724], "label": "Drinking coffee"}, {"segment": [14.230825553822154, 16.12826896099844], "label": "Drinking coffee"}, {"segment": [18.02571236817473, 19.659621968798753], "label": "Drinking coffee"}, {"segment": [21.820599182527303, 23.349095260530422], "label": "Drinking coffee"}, {"segment": [25.351952190327616, 27.091275313572545], "label": "Drinking coffee"}, {"segment": [28.93601195943838, 30.4645080374415], "label": "Drinking coffee"}]}, "hKA9Ui60n-Q": {"duration": 161.07999999999998, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hKA9Ui60n-Q", "annotations": []}, "8-1h1YXYvhk": {"duration": 127.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8-1h1YXYvhk", "annotations": [{"segment": [4.379986203529774, 5.773618177380157], "label": "Breakdancing"}, {"segment": [7.167250151230539, 12.74177804663207], "label": "Breakdancing"}, {"segment": [66.89433474481837, 91.581529710168], "label": "Breakdancing"}, {"segment": [94.16970282371295, 102.33240433697348], "label": "Breakdancing"}, {"segment": [103.72603630265212, 124.03324494539781], "label": "Breakdancing"}]}, "kM7jFLYDUy0": {"duration": 11.541, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kM7jFLYDUy0", "annotations": [{"segment": [0.01, 11.521], "label": "Riding bumper cars"}]}, "ZAztpVmBVIE": {"duration": 42.87, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZAztpVmBVIE", "annotations": []}, "Z1N185E4gsk": {"duration": 50.62, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z1N185E4gsk", "annotations": [{"segment": [24.40159984399376, 49.11907800312013], "label": "Shoveling snow"}]}, "4NHBmXhDPzA": {"duration": 13.42, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4NHBmXhDPzA", "annotations": []}, "B8d9FYuZglQ": {"duration": 120.05, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B8d9FYuZglQ", "annotations": [{"segment": [16.676878315132605, 99.87388923556942], "label": "Putting on makeup"}]}, "JOYduGqZSRc": {"duration": 183.67, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=JOYduGqZSRc", "annotations": [{"segment": [62.17833271426095, 137.82386102340092], "label": "Beer pong"}]}, "I3SLLGYL1zY": {"duration": 118.724, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I3SLLGYL1zY", "annotations": []}, "QWhn9Ncvvso": {"duration": 238.52, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QWhn9Ncvvso", "annotations": [{"segment": [0.01, 203.72413002642554], "label": "Making a sandwich"}]}, "9MGnF59gZfM": {"duration": 107.74000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9MGnF59gZfM", "annotations": [{"segment": [7.056349432137285, 94.92470069422777], "label": "Archery"}]}, "QX5e5Mla660": {"duration": 184.74, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QX5e5Mla660", "annotations": []}, "bMWWSk_YFY8": {"duration": 121.952, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=bMWWSk_YFY8", "annotations": [{"segment": [24.72251170046802, 97.57484087363495], "label": "Bullfighting"}]}, "HDt5z5Y0opo": {"duration": 40.543, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HDt5z5Y0opo", "annotations": []}, "WxFhomp9xsI": {"duration": 136.81, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=WxFhomp9xsI", "annotations": []}, "bBRzBh0gAZQ": {"duration": 86.98, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bBRzBh0gAZQ", "annotations": []}, "ygDZHsUfnv0": {"duration": 154.32, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ygDZHsUfnv0", "annotations": []}, "O2vrVwjqktg": {"duration": 146.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=O2vrVwjqktg", "annotations": [{"segment": [58.139207976482325, 142.9540515070203], "label": "Ping-pong"}]}, "KjmdARv3lg4": {"duration": 59.98, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=KjmdARv3lg4", "annotations": []}, "586OlQMU420": {"duration": 183.995, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=586OlQMU420", "annotations": [{"segment": [16.935569918034815, 148.40151945125425], "label": "Snowboarding"}, {"segment": [155.5776070171607, 176.53178656006241], "label": "Snowboarding"}]}, "-MbZ-W0AbN0": {"duration": 77.23, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-MbZ-W0AbN0", "annotations": []}, "wiaZCl-poRw": {"duration": 60.42, "subset": "testing", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=wiaZCl-poRw", "annotations": []}, "ViCGpj478Ik": {"duration": 46.81, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ViCGpj478Ik", "annotations": [{"segment": [0.01, 46.8109977324263], "label": "Doing kickboxing"}]}, "Luzz1xyiFaw": {"duration": 138.18, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Luzz1xyiFaw", "annotations": []}, "E8r9-dSKLwc": {"duration": 49.343, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E8r9-dSKLwc", "annotations": [{"segment": [0.2, 49.301], "label": "BMX"}]}, "Mf3IyeMF8ug": {"duration": 214.95, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Mf3IyeMF8ug", "annotations": []}, "hV65qrcSBJU": {"duration": 136.95, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hV65qrcSBJU", "annotations": []}, "UPSd8xmShoA": {"duration": 217.014, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UPSd8xmShoA", "annotations": []}, "bCOS_e4HmT4": {"duration": 46.05, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bCOS_e4HmT4", "annotations": []}, "bqnRA6rZcqs": {"duration": 112.664, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=bqnRA6rZcqs", "annotations": [{"segment": [107.91765530757286, 112.66321995464853], "label": "Hitting a pinata"}]}, "sA8fmJulHMs": {"duration": 103.28, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sA8fmJulHMs", "annotations": [{"segment": [13.373493584641345, 71.8623872138559], "label": "Smoking a cigarette"}]}, "L5Jl7Tql2V8": {"duration": 50.32, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=L5Jl7Tql2V8", "annotations": []}, "wyARf3xHOgs": {"duration": 60.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wyARf3xHOgs", "annotations": [{"segment": [4.09180364, 58.38001170046802], "label": "Using uneven bars"}]}, "wSTU_jI-FRU": {"duration": 50.063, "subset": "training", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=wSTU_jI-FRU", "annotations": [{"segment": [6.32090639625585, 45.729026521060845], "label": "Sumo"}]}, "xm01x3C3RJo": {"duration": 229.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xm01x3C3RJo", "annotations": [{"segment": [32.223856219554904, 46.90361294179658], "label": "Bathing dog"}, {"segment": [90.58484026163768, 114.57371100286188], "label": "Bathing dog"}, {"segment": [133.19193904082692, 154.67450985386355], "label": "Bathing dog"}]}, "juLxWt_3omw": {"duration": 28.05, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=juLxWt_3omw", "annotations": [{"segment": [4.196592823712948, 26.053847113884558], "label": "Clean and jerk"}]}, "qRmaZNDyDhA": {"duration": 199.901, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=qRmaZNDyDhA", "annotations": [{"segment": [85.12020748829953, 124.7182527301092], "label": "Changing car wheel"}]}, "YpA8T0ZjRIo": {"duration": 80.249, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YpA8T0ZjRIo", "annotations": [{"segment": [12.417413586257029, 56.89024224669316], "label": "Hand car wash"}]}, "KTo5SIY3INk": {"duration": 94.297, "subset": "testing", "resolution": "1280x714", "url": "https://www.youtube.com/watch?v=KTo5SIY3INk", "annotations": []}, "f8SWWURHV1M": {"duration": 38.13, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f8SWWURHV1M", "annotations": []}, "uFMDSiHu7g4": {"duration": 181.23, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=uFMDSiHu7g4", "annotations": [{"segment": [0, 181.201], "label": "Cumbia"}]}, "6s9jdXxtoW4": {"duration": 193.47, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=6s9jdXxtoW4", "annotations": []}, "SH0t2-GgwiQ": {"duration": 118.747, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=SH0t2-GgwiQ", "annotations": [{"segment": [51.100491673532076, 55.90955405653305], "label": "Doing crunches"}, {"segment": [62.38329131633624, 77.55033407957612], "label": "Doing crunches"}, {"segment": [97.89636705465401, 105.29492450013687], "label": "Doing crunches"}]}, "KRw3ZeLhvy0": {"duration": 168.74, "subset": "testing", "resolution": "1920x810", "url": "https://www.youtube.com/watch?v=KRw3ZeLhvy0", "annotations": []}, "oRAYA7hFGCc": {"duration": 36.22, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oRAYA7hFGCc", "annotations": []}, "WNzVlf5i1ps": {"duration": 115.78, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WNzVlf5i1ps", "annotations": []}, "qdE6dbQOnt0": {"duration": 42.03, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=qdE6dbQOnt0", "annotations": [{"segment": [4.392942277691107, 38.94638377535101], "label": "Washing face"}]}, "MRsur5x9yz0": {"duration": 161.495, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MRsur5x9yz0", "annotations": []}, "j82E082KJSw": {"duration": 231.11, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j82E082KJSw", "annotations": [{"segment": [16.94553155850234, 68.50321268330734], "label": "Volleyball"}, {"segment": [73.19027460374416, 141.69348728705148], "label": "Volleyball"}, {"segment": [144.93837630889234, 177.3872665273011], "label": "Volleyball"}, {"segment": [183.15595812168488, 220.29191026053041], "label": "Volleyball"}]}, "Rr5lVqc01vw": {"duration": 132.656, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rr5lVqc01vw", "annotations": []}, "8-F3l2Y1F54": {"duration": 229.692, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8-F3l2Y1F54", "annotations": []}, "taGuG7DT6zE": {"duration": 102.284, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=taGuG7DT6zE", "annotations": []}, "D-yu30OVvKw": {"duration": 209.14, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=D-yu30OVvKw", "annotations": []}, "Ed08LA1pjIg": {"duration": 185.481, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ed08LA1pjIg", "annotations": [{"segment": [13.889372555382216, 23.72767811544462], "label": "Skiing"}, {"segment": [66.84260542277691, 80.15325412168487], "label": "Skiing"}, {"segment": [87.3873023276131, 88.2553881123245], "label": "Skiing"}, {"segment": [92.30645510764431, 94.04262667706708], "label": "Skiing"}, {"segment": [117.77030479251171, 125.29371492667707], "label": "Skiing"}, {"segment": [131.3703154196568, 154.51926967862715], "label": "Skiing"}]}, "SQs7o8LeU6A": {"duration": 131.29, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SQs7o8LeU6A", "annotations": [{"segment": [3.276405616224649, 128.18936973478938], "label": "Playing bagpipes"}]}, "dzsSiqXxfpw": {"duration": 156.224, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dzsSiqXxfpw", "annotations": [{"segment": [33.81055919003115, 142.78272118380062], "label": "Disc dog"}]}, "XLOhMBLYy2g": {"duration": 237.889, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XLOhMBLYy2g", "annotations": [{"segment": [57.52360947499124, 59.750329841765094], "label": "Slacklining"}, {"segment": [65.31713075869973, 67.91497118660257], "label": "Slacklining"}, {"segment": [73.11065204240823, 138.42778280110795], "label": "Slacklining"}, {"segment": [156.24154573529879, 161.43722659110446], "label": "Slacklining"}, {"segment": [188.52899105351972, 190.75571142029355], "label": "Slacklining"}]}, "O5CYGnqXJbQ": {"duration": 106.371, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O5CYGnqXJbQ", "annotations": []}, "rWfQj0X8WU4": {"duration": 128.059, "subset": "testing", "resolution": "586x480", "url": "https://www.youtube.com/watch?v=rWfQj0X8WU4", "annotations": []}, "0ZHZ1ZqmT7s": {"duration": 10.5, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0ZHZ1ZqmT7s", "annotations": [{"segment": [0.01, 10.495418], "label": "Getting a haircut"}]}, "IBscTNN6qfY": {"duration": 237.03, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IBscTNN6qfY", "annotations": [{"segment": [124.61581209915063, 168.98939369578784], "label": "Braiding hair"}]}, "1xLtBuMxBDY": {"duration": 30.883, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1xLtBuMxBDY", "annotations": []}, "wlAnWPxnQ8k": {"duration": 93.21000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wlAnWPxnQ8k", "annotations": [{"segment": [4.685311012616166, 86.13053252663485], "label": "Using the balance beam"}]}, "YDNEa5ZOf6c": {"duration": 122.741, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YDNEa5ZOf6c", "annotations": [{"segment": [51.50867394695789, 63.572043681747296], "label": "Removing ice from car"}, {"segment": [67.78464898595945, 103.59175440405616], "label": "Removing ice from car"}]}, "WqDep-4l0yc": {"duration": 93.182, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WqDep-4l0yc", "annotations": [{"segment": [68.21698232087228, 86.06951173676013], "label": "Canoeing"}]}, "C7so_y5I_9s": {"duration": 191.389, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=C7so_y5I_9s", "annotations": [{"segment": [32.83886115444618, 179.13978159126364], "label": "Wakeboarding"}]}, "ntKbTDQUhDA": {"duration": 126.921, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ntKbTDQUhDA", "annotations": [{"segment": [5.611035192436147, 120.59773612701558], "label": "Making a lemonade"}]}, "JNFUZz1bqmg": {"duration": 213.37, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JNFUZz1bqmg", "annotations": [{"segment": [126.65601945302302, 169.26303782355376], "label": "Bungee jumping"}]}, "aNsj5OrEGLY": {"duration": 137.32, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aNsj5OrEGLY", "annotations": []}, "puwMWETdvL0": {"duration": 103.585, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=puwMWETdvL0", "annotations": [{"segment": [5.0426543174458445, 83.90896966722671], "label": "Carving jack-o-lanterns"}]}, "SOkS5d8GjZ4": {"duration": 20.921, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=SOkS5d8GjZ4", "annotations": [{"segment": [0.01, 18.669133210608422], "label": "Arm wrestling"}]}, "9WmsYbZl1pw": {"duration": 237.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9WmsYbZl1pw", "annotations": [{"segment": [103.40341645000001, 190.23192582], "label": "Making a sandwich"}]}, "RkhSR7pz9qc": {"duration": 100.566, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RkhSR7pz9qc", "annotations": [{"segment": [0.01, 43.458155509828394], "label": "Futsal"}, {"segment": [49.57681278377535, 55.85235870577223], "label": "Futsal"}, {"segment": [65.26567758876756, 88.01453155600625], "label": "Futsal"}]}, "Zguc8yykcgk": {"duration": 14.792, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zguc8yykcgk", "annotations": [{"segment": [3.8304590639625586, 11.560602355694227], "label": "Putting on shoes"}]}, "bKEvJveN6k4": {"duration": 95.13, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=bKEvJveN6k4", "annotations": [{"segment": [41.953212, 89.58516692667706], "label": "Dodgeball"}, {"segment": [6.880897560000002, 36.67909392000001], "label": "Dodgeball"}]}, "BoKEPRjFydk": {"duration": 116.77000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BoKEPRjFydk", "annotations": [{"segment": [8.013422776911076, 88.32977379095163], "label": "Discus throw"}]}, "by8QQWbyimw": {"duration": 191.38, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=by8QQWbyimw", "annotations": [{"segment": [6.267511689547582, 189.517615374415], "label": "Tumbling"}, {"segment": [100.21891725999998, 151.19053377999998], "label": "Tumbling"}]}, "mK3keyPMe3o": {"duration": 208.61, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mK3keyPMe3o", "annotations": [{"segment": [12.16426480967679, 123.61597820531541], "label": "Skateboarding"}, {"segment": [129.7717414451908, 154.8457682380605], "label": "Skateboarding"}, {"segment": [155.49563599506985, 165.5685862287147], "label": "Skateboarding"}, {"segment": [166.21845398572407, 184.08981730348106], "label": "Skateboarding"}, {"segment": [185.0646189389951, 195.13756917263993], "label": "Skateboarding"}, {"segment": [199.036775714696, 204.56065164927546], "label": "Skateboarding"}]}, "_m8WprCWHF4": {"duration": 106.84, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_m8WprCWHF4", "annotations": [{"segment": [19.333636755070202, 21.833675990639623], "label": "Walking the dog"}, {"segment": [48.16742260530421, 50.667461840873635], "label": "Walking the dog"}, {"segment": [73.50115352574103, 83.33464118564743], "label": "Walking the dog"}]}, "ucEqZtmQS-0": {"duration": 78.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ucEqZtmQS-0", "annotations": [{"segment": [31.126557198127927, 70.12318653], "label": "Shaving"}]}, "pO86ZQSyYdA": {"duration": 162.68, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pO86ZQSyYdA", "annotations": []}, "wRyUFEWHPFM": {"duration": 15.465, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wRyUFEWHPFM", "annotations": []}, "NfVL1o5HMEo": {"duration": 66.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NfVL1o5HMEo", "annotations": [{"segment": [15.268965678627145, 51.223546021840875], "label": "Using parallel bars"}]}, "tt-uDaZQWWc": {"duration": 43.12, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tt-uDaZQWWc", "annotations": []}, "fWVUEOVUzS4": {"duration": 149.4, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=fWVUEOVUzS4", "annotations": [{"segment": [10.138457183185286, 121.54495220761211], "label": "Playing badminton"}]}, "b40ECuGuck8": {"duration": 105.791, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b40ECuGuck8", "annotations": [{"segment": [8.956595889855025, 17.682690905431347], "label": "Throwing darts"}, {"segment": [24.59770959702013, 32.17130149733166], "label": "Throwing darts"}, {"segment": [38.59239028237839, 48.30634511103882], "label": "Throwing darts"}, {"segment": [69.39381551607079, 77.79221271233247], "label": "Throwing darts"}, {"segment": [84.87321426996486, 96.72977501762843], "label": "Throwing darts"}]}, "aFlumCYsZgM": {"duration": 54.846, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=aFlumCYsZgM", "annotations": [{"segment": [0.01, 54.84553287981859], "label": "Having an ice cream"}]}, "A6rMOyqUm44": {"duration": 9.57, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A6rMOyqUm44", "annotations": []}, "5yqHTGQm2B0": {"duration": 84.869, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5yqHTGQm2B0", "annotations": [{"segment": [7.811649173449932, 72.02605319188767], "label": "Powerbocking"}]}, "PntiXZ692xM": {"duration": 187.339, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PntiXZ692xM", "annotations": [{"segment": [3.9077284823622658, 165.17441539825012], "label": "Running a marathon"}]}, "tcGO-GHcQIQ": {"duration": 124.134, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tcGO-GHcQIQ", "annotations": [{"segment": [11.519515600624025, 68.8266856474259], "label": "Sharpening knives"}]}, "gOKkjA2yvGo": {"duration": 54.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gOKkjA2yvGo", "annotations": [{"segment": [4.561688687955682, 48.826963887675504], "label": "Spinning"}]}, "toS-_4Ik5us": {"duration": 113.732, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=toS-_4Ik5us", "annotations": [{"segment": [12.94878783151326, 106.25101248049921], "label": "Playing blackjack"}]}, "MJpfymVwtGc": {"duration": 133.96, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MJpfymVwtGc", "annotations": [{"segment": [28.212168486739465, 96.3393309065696], "label": "Getting a piercing"}]}, "FtHP0PNqo2s": {"duration": 190.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FtHP0PNqo2s", "annotations": [{"segment": [19.42601076, 161.62791703432137], "label": "Shaving legs"}]}, "tUCGJk6aSeg": {"duration": 43.817, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tUCGJk6aSeg", "annotations": [{"segment": [0.2, 43.23459126365055], "label": "Playing drums"}]}, "MSSb3wPd5hM": {"duration": 159.127, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MSSb3wPd5hM", "annotations": [{"segment": [0.2, 155.36415420560746], "label": "Drum corps"}]}, "wIthr5w6RV4": {"duration": 76.046, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wIthr5w6RV4", "annotations": [{"segment": [19.218949982489097, 36.77700282371295], "label": "Curling"}, {"segment": [40.92924507800312, 44.84421634633385], "label": "Curling"}, {"segment": [51.487803953198124, 53.03006536193447], "label": "Curling"}, {"segment": [65.72406311076443, 70.4694828299532], "label": "Curling"}]}, "PvB98KAatK8": {"duration": 204.963, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PvB98KAatK8", "annotations": [{"segment": [24.2257385511553, 28.05085593247797], "label": "Wakeboarding"}, {"segment": [108.69708072203089, 163.20500340587893], "label": "Wakeboarding"}, {"segment": [174.99911389505422, 179.46175080329684], "label": "Wakeboarding"}]}, "sHxzQu_4edY": {"duration": 165.26, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=sHxzQu_4edY", "annotations": [{"segment": [38.30799336, 93.43739495999999], "label": "Horseback riding"}]}, "tseeGOVN_eY": {"duration": 105.19, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=tseeGOVN_eY", "annotations": [{"segment": [3.444469567862715, 100.21766218876756], "label": "Washing dishes"}]}, "Oc8ACBiwIyE": {"duration": 202.2, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Oc8ACBiwIyE", "annotations": [{"segment": [0, 202.161], "label": "Fixing the roof"}]}, "asM7fbK_F-U": {"duration": 126.596, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=asM7fbK_F-U", "annotations": [{"segment": [7.304238689547582, 120.24282215288612], "label": "Playing congas"}]}, "6HZ55yit9es": {"duration": 80.179, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=6HZ55yit9es", "annotations": []}, "3nX5ZwzHftM": {"duration": 33.112, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3nX5ZwzHftM", "annotations": [{"segment": [0.01, 33.111655], "label": "River tubing"}]}, "KxJpfKZbNiI": {"duration": 15.395, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KxJpfKZbNiI", "annotations": [{"segment": [0.2, 13.003198127925115], "label": "Using the monkey bar"}]}, "o7ChwZHkdUc": {"duration": 8.43, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o7ChwZHkdUc", "annotations": []}, "J6ScF5n_Cug": {"duration": 136.19, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=J6ScF5n_Cug", "annotations": [{"segment": [4.512503364628907, 130.30335160294666], "label": "Playing flauta"}]}, "HJZj6z1dyT4": {"duration": 55.961, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HJZj6z1dyT4", "annotations": [{"segment": [0.01, 55.960089], "label": "Table soccer"}]}, "EXUKhI7WTqo": {"duration": 104.72, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EXUKhI7WTqo", "annotations": [{"segment": [0.2, 100.2908174726989], "label": "Cumbia"}]}, "jfIcmcE320Q": {"duration": 25.659, "subset": "training", "resolution": "180x240", "url": "https://www.youtube.com/watch?v=jfIcmcE320Q", "annotations": [{"segment": [0.2, 25.458049886621318], "label": "Hurling"}]}, "FBqtQ3HCk48": {"duration": 181.023, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FBqtQ3HCk48", "annotations": [{"segment": [0.19999999999999998, 16.37959098772114], "label": "Carving jack-o-lanterns"}, {"segment": [28.523080857928193, 148.26353911299307], "label": "Carving jack-o-lanterns"}]}, "KyDcuYjDi_Y": {"duration": 220.03, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=KyDcuYjDi_Y", "annotations": [{"segment": [41.70658855741985, 63.332227068674584], "label": "Tumbling"}]}, "I9ZeyASdgTk": {"duration": 48.948, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I9ZeyASdgTk", "annotations": [{"segment": [35.16904992199688, 40.4495007800312], "label": "Welding"}]}, "tYV23sjRgt8": {"duration": 84.48, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=tYV23sjRgt8", "annotations": [{"segment": [41.561685880000006, 69.60452184087364], "label": "Mixing drinks"}]}, "rnyG80WzNPk": {"duration": 231.13, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rnyG80WzNPk", "annotations": []}, "B1DNoole3Wo": {"duration": 93.41, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B1DNoole3Wo", "annotations": [{"segment": [14.718846914196568, 17.196276592823715], "label": "Smoking a cigarette"}, {"segment": [23.462716368174725, 31.477930034321375], "label": "Smoking a cigarette"}]}, "Anvhv9FFv1Q": {"duration": 227.719, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Anvhv9FFv1Q", "annotations": [{"segment": [49.73551742069683, 94.85273635881435], "label": "Sharpening knives"}, {"segment": [121.49676342589704, 213.15221653666146], "label": "Sharpening knives"}]}, "pfDGqEJR2-E": {"duration": 139.343, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pfDGqEJR2-E", "annotations": []}, "RiQdIJNDTo4": {"duration": 209.03, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RiQdIJNDTo4", "annotations": [{"segment": [7.497605304212169, 188.26553768000002], "label": "Playing piano"}]}, "NLTd7qCLT7o": {"duration": 155.551, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NLTd7qCLT7o", "annotations": [{"segment": [0.19999999999999998, 77.4112354208454], "label": "Powerbocking"}, {"segment": [84.44862045910409, 146.32907398595944], "label": "Powerbocking"}]}, "ohNtDBYmhX0": {"duration": 154.92000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ohNtDBYmhX0", "annotations": [{"segment": [9.273690779999999, 117.06356423868955], "label": "Skateboarding"}]}, "l_R7AbA-d04": {"duration": 61.998, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=l_R7AbA-d04", "annotations": [{"segment": [24.506252336448597, 42.368970404984424], "label": "Curling"}, {"segment": [49.124842679127724, 60.22377570093458], "label": "Curling"}]}, "szW1c2SGdew": {"duration": 136.371, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=szW1c2SGdew", "annotations": [{"segment": [10.068498173104263, 116.06386441882447], "label": "Capoeira"}]}, "ItFq_6cIByw": {"duration": 27.539, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ItFq_6cIByw", "annotations": [{"segment": [7.174712098283932, 27.338866213151928], "label": "Playing drums"}]}, "dKf46wUnn-s": {"duration": 65.876, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dKf46wUnn-s", "annotations": [{"segment": [0.01, 64.09960358318436], "label": "Croquet"}]}, "Paj9y9YMSg0": {"duration": 91.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Paj9y9YMSg0", "annotations": [{"segment": [0.01, 80.39487173166927], "label": "Playing polo"}]}, "pTmlOZY0e4c": {"duration": 124.668, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=pTmlOZY0e4c", "annotations": [{"segment": [10.69693678627145, 83.43610693291731], "label": "Rope skipping"}, {"segment": [85.769984049922, 119.61120224648985], "label": "Rope skipping"}]}, "4a0Q-IAqO8U": {"duration": 231.596, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4a0Q-IAqO8U", "annotations": [{"segment": [62.14427825004156, 76.23513203929518], "label": "Swimming"}, {"segment": [126.45638015996829, 129.70811564979604], "label": "Swimming"}]}, "Z2gpcqXWNek": {"duration": 153.79, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z2gpcqXWNek", "annotations": []}, "JZK-HJ-Tdw4": {"duration": 221.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JZK-HJ-Tdw4", "annotations": [{"segment": [11.75843057722309, 22.825188767550703], "label": "Pole vault"}, {"segment": [49.108739469578786, 69.16723868954759], "label": "Pole vault"}, {"segment": [88.87990171606864, 98.56331513260531], "label": "Pole vault"}, {"segment": [136.95113260530422, 186.40570826833073], "label": "Pole vault"}, {"segment": [201.6406911076443, 215.4753868954758], "label": "Pole vault"}]}, "oSDHYvvYo5M": {"duration": 52.083, "subset": "validation", "resolution": "408x720", "url": "https://www.youtube.com/watch?v=oSDHYvvYo5M", "annotations": [{"segment": [0.01, 52.082357], "label": "Plastering"}]}, "lkSkFmHYdtI": {"duration": 21.64, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lkSkFmHYdtI", "annotations": [{"segment": [19.664653666146645, 19.6727076], "label": "Discus throw"}]}, "O62LVI0XNHo": {"duration": 171.5, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=O62LVI0XNHo", "annotations": [{"segment": [11.772409889592863, 163.20840983299198], "label": "Playing bagpipes"}]}, "JuS9DLgPQfk": {"duration": 86.63, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JuS9DLgPQfk", "annotations": []}, "8xm7fWZ0RmE": {"duration": 33.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8xm7fWZ0RmE", "annotations": [{"segment": [8.31615942901716, 28.053879592823712], "label": "Playing piano"}]}, "B67jaG6qKWE": {"duration": 155.876, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=B67jaG6qKWE", "annotations": [{"segment": [12.160608424336974, 149.0890592823713], "label": "Table soccer"}]}, "DsnVkCHbEVM": {"duration": 65.9, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DsnVkCHbEVM", "annotations": [{"segment": [0.01, 65.841], "label": "Playing water polo"}]}, "vh55SaEpuws": {"duration": 71.495, "subset": "validation", "resolution": "422x240", "url": "https://www.youtube.com/watch?v=vh55SaEpuws", "annotations": [{"segment": [0.01, 71.441], "label": "Peeling potatoes"}]}, "_HIJu_hNweY": {"duration": 150.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_HIJu_hNweY", "annotations": [{"segment": [4.9095, 87.31304275844504], "label": "Playing badminton"}]}, "rmoa-Ffel2k": {"duration": 94.552, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rmoa-Ffel2k", "annotations": [{"segment": [28.32124465386779, 86.4387975725429], "label": "Elliptical trainer"}]}, "bRgvAHL3dJk": {"duration": 58.42, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=bRgvAHL3dJk", "annotations": []}, "aELu8QS8T54": {"duration": 88.306, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aELu8QS8T54", "annotations": [{"segment": [0, 11.603159711332708], "label": "Building sandcastles"}, {"segment": [18.064634789214324, 87.07868774871588], "label": "Building sandcastles"}]}, "8J_bB1vI0uE": {"duration": 161.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8J_bB1vI0uE", "annotations": [{"segment": [12.874903216848674, 141.1190372199688], "label": "Rock climbing"}]}, "0L959X4oxaY": {"duration": 59.095, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0L959X4oxaY", "annotations": [{"segment": [31.160456758190325, 52.364317865834636], "label": "Snow tubing"}]}, "Lm7KLQgzQDk": {"duration": 107.6, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Lm7KLQgzQDk", "annotations": [{"segment": [76.37843516380656, 104.07610945397816], "label": "Breakdancing"}]}, "sicaCRRCkiY": {"duration": 16.045, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sicaCRRCkiY", "annotations": [{"segment": [10.22216770670827, 15.41437987519501], "label": "Baking cookies"}]}, "w7IeqGuuA7Q": {"duration": 233.013, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=w7IeqGuuA7Q", "annotations": [{"segment": [0.01, 24.537170874590085], "label": "Hitting a pinata"}, {"segment": [71.79394441083765, 77.24664904963547], "label": "Hitting a pinata"}, {"segment": [124.13990894329652, 143.04261835779556], "label": "Hitting a pinata"}, {"segment": [176.48587347575534, 179.75749625903404], "label": "Hitting a pinata"}]}, "CecqF_uHeOI": {"duration": 80.13, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=CecqF_uHeOI", "annotations": []}, "tTEAlDsmZrA": {"duration": 193.319, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tTEAlDsmZrA", "annotations": [{"segment": [23.825833073322933, 174.92383775351013], "label": "Trimming branches or hedges"}]}, "Q7eK0xT6VSQ": {"duration": 28.027, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Q7eK0xT6VSQ", "annotations": [{"segment": [4.968588785046729, 13.171809968847352], "label": "Changing car wheel"}]}, "jv70BvlA-jI": {"duration": 78.646, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jv70BvlA-jI", "annotations": []}, "dTwH5Fzu4eE": {"duration": 36.02, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dTwH5Fzu4eE", "annotations": [{"segment": [2.0788198711621937, 34.83427828393136], "label": "Ballet"}]}, "4dVVaDvCzn0": {"duration": 11.007, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4dVVaDvCzn0", "annotations": [{"segment": [0.01, 10.944], "label": "Playing blackjack"}]}, "4VWl4OsFyK0": {"duration": 20.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4VWl4OsFyK0", "annotations": []}, "LFbW1KVHT8s": {"duration": 79.064, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LFbW1KVHT8s", "annotations": [{"segment": [17.391600786271454, 64.75596037441498], "label": "Wakeboarding"}]}, "Exq_WdPgCu0": {"duration": 223.26, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Exq_WdPgCu0", "annotations": []}, "n_hNYA5kWME": {"duration": 192.447, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=n_hNYA5kWME", "annotations": [{"segment": [15.036482802169232, 81.98761763156114], "label": "Playing rubik cube"}, {"segment": [85.29013997789498, 192.446984], "label": "Playing rubik cube"}]}, "jRnJRqvmZvk": {"duration": 115.357, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jRnJRqvmZvk", "annotations": [{"segment": [0.01, 9.358112642873063], "label": "Arm wrestling"}, {"segment": [41.211688369575604, 110.13778564118566], "label": "Arm wrestling"}]}, "4fEMDQnD4Xg": {"duration": 55.705, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4fEMDQnD4Xg", "annotations": [{"segment": [0.01, 55.70467], "label": "Playing congas"}]}, "iwGYaSil0Z8": {"duration": 23.29, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=iwGYaSil0Z8", "annotations": []}, "xcBJP14YBvg": {"duration": 61.0, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xcBJP14YBvg", "annotations": [{"segment": [2.85804992199688, 39.53635725429017], "label": "Using the pommel horse"}]}, "rpjxjU-mLiw": {"duration": 21.46, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rpjxjU-mLiw", "annotations": []}, "Zsaa3XXIAhk": {"duration": 52.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zsaa3XXIAhk", "annotations": [{"segment": [20.67558658346334, 52.801], "label": "Playing guitarra"}]}, "n1sWb2K6hBM": {"duration": 167.35, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n1sWb2K6hBM", "annotations": [{"segment": [32.73703055070203, 145.73828448], "label": "Grooming horse"}]}, "6VygM9-XgAk": {"duration": 172.827, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6VygM9-XgAk", "annotations": [{"segment": [14.821684867394696, 26.948517940717633], "label": "Snowboarding"}, {"segment": [33.955132605304215, 48.23784711388456], "label": "Snowboarding"}, {"segment": [92.1639313572543, 98.09260530421219], "label": "Snowboarding"}, {"segment": [99.70951638065522, 134.27052848049922], "label": "Snowboarding"}]}, "A49K-WtIzsY": {"duration": 52.849, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A49K-WtIzsY", "annotations": [{"segment": [0.01, 49.17720043869251], "label": "Removing ice from car"}]}, "HcQVvWPkogM": {"duration": 126.224, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HcQVvWPkogM", "annotations": []}, "rF1AqlLjECk": {"duration": 182.138, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rF1AqlLjECk", "annotations": [{"segment": [14.771001560062402, 164.46942121684867], "label": "Playing ice hockey"}]}, "w_56T4AZgw0": {"duration": 49.668, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=w_56T4AZgw0", "annotations": []}, "hgpekgU915k": {"duration": 13.654, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=hgpekgU915k", "annotations": [{"segment": [1.1289026458658347, 3.6849086365054604], "label": "Doing a powerbomb"}]}, "yVK-GMdc9UA": {"duration": 53.38, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yVK-GMdc9UA", "annotations": [{"segment": [4.497136343213729, 53.382674], "label": "Ironing clothes"}]}, "UmH4VPH0KG4": {"duration": 57.632, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UmH4VPH0KG4", "annotations": [{"segment": [0.2, 34.32696099843994], "label": "Welding"}]}, "x6Z0xTgWoVI": {"duration": 152.207, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x6Z0xTgWoVI", "annotations": [{"segment": [0.01, 152.206802], "label": "Skiing"}]}, "Zc44Ddk2NG8": {"duration": 7.059, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zc44Ddk2NG8", "annotations": [{"segment": [0.2, 6.858866213151927], "label": "Fun sliding down"}]}, "CgaWju3yGc4": {"duration": 164.86, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CgaWju3yGc4", "annotations": [{"segment": [32.66370219434628, 124.08098303999999], "label": "Fixing bicycle"}]}, "fs3_MvZJe-8": {"duration": 219.03, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fs3_MvZJe-8", "annotations": []}, "6l0JqBhldeA": {"duration": 126.085, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6l0JqBhldeA", "annotations": [{"segment": [3.9339778761218476, 125.8839909297052], "label": "Croquet"}]}, "4b2_OpAGwW0": {"duration": 238.4, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4b2_OpAGwW0", "annotations": [{"segment": [17.108178830554586, 168.10645285675372], "label": "Braiding hair"}]}, "mTkzorlDa9U": {"duration": 184.088, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mTkzorlDa9U", "annotations": []}, "agr3ZKq1Iso": {"duration": 90.303, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=agr3ZKq1Iso", "annotations": [{"segment": [0.2, 76.67981863763465], "label": "Riding bumper cars"}]}, "8pt2jxlkNpw": {"duration": 119.978, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8pt2jxlkNpw", "annotations": []}, "OSw73cXwjR4": {"duration": 159.71, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=OSw73cXwjR4", "annotations": [{"segment": [55.6340663026521, 106.91199765990639], "label": "Playing accordion"}]}, "TMGG5x-UQ2s": {"duration": 90.907, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TMGG5x-UQ2s", "annotations": [{"segment": [11.770989560062404, 28.080191962558505], "label": "Snow tubing"}, {"segment": [44.67303266770671, 54.31673495787832], "label": "Snow tubing"}, {"segment": [59.56404355694227, 79.1350864399376], "label": "Snow tubing"}]}, "XSDIekFXkv0": {"duration": 78.6, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XSDIekFXkv0", "annotations": [{"segment": [8.319126365054602, 22.38823712948518], "label": "Plataform diving"}, {"segment": [23.489297971918877, 34.010546021840874], "label": "Plataform diving"}, {"segment": [36.579687987519506, 47.1009360374415], "label": "Plataform diving"}, {"segment": [48.201996879875196, 59.45728549141966], "label": "Plataform diving"}, {"segment": [60.55834633385335, 64.22854914196569], "label": "Plataform diving"}, {"segment": [66.30833073322933, 72.42533541341655], "label": "Plataform diving"}]}, "ORcQpMrCLTI": {"duration": 208.334, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ORcQpMrCLTI", "annotations": [{"segment": [10.39980031201248, 192.41477691107644], "label": "Rope skipping"}]}, "pBLKQ9yCSC0": {"duration": 24.06, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pBLKQ9yCSC0", "annotations": []}, "4mlA78hn4mY": {"duration": 214.14, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4mlA78hn4mY", "annotations": [{"segment": [4.675672386895476, 55.10613884555382], "label": "Playing racquetball"}, {"segment": [63.789530421216845, 200.71993603744153], "label": "Playing racquetball"}]}, "oEfj2MWh5O0": {"duration": 75.3, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oEfj2MWh5O0", "annotations": []}, "Lg6QZST92xo": {"duration": 52.32, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Lg6QZST92xo", "annotations": [{"segment": [14.397859080000002, 50.437436284867395], "label": "Chopping wood"}]}, "Tcf9sOzU-7M": {"duration": 224.28, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Tcf9sOzU-7M", "annotations": [{"segment": [80.81071918876755, 207.1178034321373], "label": "Paintball"}]}, "N3rFdrfE3q8": {"duration": 34.505, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=N3rFdrfE3q8", "annotations": [{"segment": [0.01, 27.955881435257407], "label": "Fun sliding down"}]}, "R3CGHNqunGA": {"duration": 182.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R3CGHNqunGA", "annotations": [{"segment": [69.56242849713988, 73.253496131682], "label": "Skateboarding"}, {"segment": [87.73383839027031, 91.4249060248124], "label": "Skateboarding"}, {"segment": [105.05346344466022, 108.74453107920236], "label": "Skateboarding"}, {"segment": [126.91594097233275, 128.33558237023357], "label": "Skateboarding"}]}, "Z4yZr5dIMec": {"duration": 160.2, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z4yZr5dIMec", "annotations": [{"segment": [53.46550159750391, 73.20276620592824], "label": "Playing polo"}, {"segment": [110.01803531999998, 124.16987987831514], "label": "Playing polo"}]}, "4uKoAk5NCkI": {"duration": 62.07, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4uKoAk5NCkI", "annotations": []}, "kBUDMFgWO9I": {"duration": 12.61, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=kBUDMFgWO9I", "annotations": [{"segment": [6.776295652106084, 8.52061248], "label": "Plataform diving"}]}, "5MJmwr1cmnU": {"duration": 124.297, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5MJmwr1cmnU", "annotations": [{"segment": [23.848375975039005, 109.3605678627145], "label": "Surfing"}]}, "XxYoSn6NE_4": {"duration": 220.962, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XxYoSn6NE_4", "annotations": [{"segment": [17.925080497097433, 174.42482141653664], "label": "Braiding hair"}]}, "eUCKEnKxG8w": {"duration": 167.555, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=eUCKEnKxG8w", "annotations": [{"segment": [48.60403432137286, 138.2340546021841], "label": "Gargling mouthwash"}]}, "JXL7LxJSjzw": {"duration": 142.76, "subset": "training", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=JXL7LxJSjzw", "annotations": [{"segment": [8.124808892355695, 134.33759360374415], "label": "Chopping wood"}]}, "UciHec7xncc": {"duration": 18.228, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UciHec7xncc", "annotations": [{"segment": [5.981394424366929, 18.02766439909297], "label": "Using the monkey bar"}]}, "DG9-yffcXS4": {"duration": 191.89, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=DG9-yffcXS4", "annotations": [{"segment": [14.66491263650546, 184.07880655226208], "label": "Fixing the roof"}]}, "W5jswt09V3s": {"duration": 192.982, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W5jswt09V3s", "annotations": [{"segment": [90.6198033826115, 187.86298004368174], "label": "Capoeira"}]}, "LnIxI3Av3W0": {"duration": 103.887, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LnIxI3Av3W0", "annotations": []}, "d4Z5KlP8vlE": {"duration": 128.616, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d4Z5KlP8vlE", "annotations": []}, "BtYKJOmw-aU": {"duration": 112.66, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BtYKJOmw-aU", "annotations": [{"segment": [0.01, 0.02], "label": "Brushing teeth"}]}, "pCQNYPvJu1I": {"duration": 83.08, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pCQNYPvJu1I", "annotations": []}, "I5setZ8js9I": {"duration": 113.5, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I5setZ8js9I", "annotations": [{"segment": [82.25583366429817, 113.32241563630438], "label": "Surfing"}]}, "s0Pw7vKtqpo": {"duration": 204.475, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=s0Pw7vKtqpo", "annotations": [{"segment": [25.20046649757147, 146.09890703655358], "label": "Surfing"}, {"segment": [170.9803792823713, 183.74010907956318], "label": "Surfing"}]}, "Y1kCMRoDjWk": {"duration": 164.026, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y1kCMRoDjWk", "annotations": [{"segment": [29.41578003120125, 36.83367238689548], "label": "Trimming branches or hedges"}, {"segment": [45.0189329173167, 67.01682059282372], "label": "Trimming branches or hedges"}, {"segment": [104.11898127925117, 152.7248939157566], "label": "Trimming branches or hedges"}]}, "cRBnLFujD3g": {"duration": 111.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cRBnLFujD3g", "annotations": [{"segment": [39.602881853354134, 52.630145620904834], "label": "Breakdancing"}, {"segment": [60.96759443213729, 64.09413773634945], "label": "Breakdancing"}, {"segment": [68.43655899219969, 77.29509835413417], "label": "Breakdancing"}]}, "85RJm2qymRY": {"duration": 71.472, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=85RJm2qymRY", "annotations": [{"segment": [33.1152775975039, 59.986597129485176], "label": "Changing car wheel"}]}, "fxyqt18d-jo": {"duration": 105.02000000000001, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=fxyqt18d-jo", "annotations": [{"segment": [42.615117784711394, 66.33562480499221], "label": "Making a sandwich"}]}, "1Se1ZqCSQvk": {"duration": 124.134, "subset": "validation", "resolution": "628x360", "url": "https://www.youtube.com/watch?v=1Se1ZqCSQvk", "annotations": [{"segment": [42.973450858034326, 60.007971918876756], "label": "Sharpening knives"}, {"segment": [75.68747425897035, 123.881], "label": "Sharpening knives"}]}, "MwkRK7A46P4": {"duration": 187.547, "subset": "training", "resolution": "448x252", "url": "https://www.youtube.com/watch?v=MwkRK7A46P4", "annotations": [{"segment": [46.15657356192737, 95.81870967286187], "label": "Futsal"}, {"segment": [113.9307828427321, 134.6720279243577], "label": "Futsal"}, {"segment": [146.9414968458827, 166.5142198598131], "label": "Futsal"}]}, "mB-hMTdtq38": {"duration": 92.811, "subset": "testing", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=mB-hMTdtq38", "annotations": []}, "blOgPoTkhks": {"duration": 17.39, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=blOgPoTkhks", "annotations": [{"segment": [0.9224565460218409, 17.309625775351012], "label": "Using uneven bars"}]}, "ZJWLTokAJbo": {"duration": 63.855, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ZJWLTokAJbo", "annotations": [{"segment": [0.2, 9.742146417445483], "label": "Elliptical trainer"}, {"segment": [13.718532710280373, 50.10246728971962], "label": "Elliptical trainer"}, {"segment": [54.675311526479746, 56.166456386292836], "label": "Elliptical trainer"}]}, "dEpUIpmBvow": {"duration": 200.69, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=dEpUIpmBvow", "annotations": []}, "jHyWtrhNTTk": {"duration": 43.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jHyWtrhNTTk", "annotations": [{"segment": [9.314408736349455, 18.765794071762873], "label": "High jump"}, {"segment": [24.58729953198128, 35.066009360374416], "label": "High jump"}]}, "nCzB1iXKYk4": {"duration": 208.68, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nCzB1iXKYk4", "annotations": [{"segment": [10.70826833073323, 177.82215288611545], "label": "Cumbia"}]}, "CuLC5G7yWxA": {"duration": 138.508, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CuLC5G7yWxA", "annotations": []}, "4x3dgSgXQ38": {"duration": 70.171, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=4x3dgSgXQ38", "annotations": [{"segment": [0.01, 70.17070294784581], "label": "Fun sliding down"}]}, "24vWSTx6N5M": {"duration": 20.225, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=24vWSTx6N5M", "annotations": [{"segment": [0.2, 19.994], "label": "Swinging at the playground"}]}, "QHTFsw_1Pi8": {"duration": 26.425, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=QHTFsw_1Pi8", "annotations": []}, "ZW3SKczQzJU": {"duration": 136.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZW3SKczQzJU", "annotations": [{"segment": [49.9274301950078, 83.70811700780031], "label": "Brushing teeth"}, {"segment": [92.41886014820594, 94.54343164586584], "label": "Brushing teeth"}]}, "Pcro3S-4EnQ": {"duration": 150.28, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Pcro3S-4EnQ", "annotations": [{"segment": [4.923354877052225, 20.865646859888], "label": "Putting on makeup"}, {"segment": [26.023447207276046, 28.602347380970066], "label": "Putting on makeup"}, {"segment": [64.9413952830222, 144.6528537675507], "label": "Putting on makeup"}]}, "AQmWv31K2GI": {"duration": 213.717, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AQmWv31K2GI", "annotations": []}, "yr_ZOq9ZB9E": {"duration": 196.186, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yr_ZOq9ZB9E", "annotations": [{"segment": [24.178855458980262, 102.53058960453657], "label": "Longboarding"}]}, "6r3qgd1y5KE": {"duration": 152.184, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=6r3qgd1y5KE", "annotations": [{"segment": [55.14578656573113, 125.91227267287486], "label": "Hanging wallpaper"}]}, "HbF6UM4jUDk": {"duration": 231.596, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HbF6UM4jUDk", "annotations": [{"segment": [174.55738940809968, 191.14755451713395], "label": "Hula hoop"}]}, "QooDDNWaQjU": {"duration": 62.439, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QooDDNWaQjU", "annotations": [{"segment": [0.01, 62.261], "label": "Disc dog"}]}, "6yD5jZn6SEs": {"duration": 13.21, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6yD5jZn6SEs", "annotations": [{"segment": [0, 13.212154], "label": "Using uneven bars"}]}, "8cbHNUbu3Tk": {"duration": 76.65, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8cbHNUbu3Tk", "annotations": [{"segment": [5.73971367017946, 76.44909297052153], "label": "Baton twirling"}]}, "Q8iXOTXdy2Y": {"duration": 137.95, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Q8iXOTXdy2Y", "annotations": [{"segment": [11.617244929797193, 137.881], "label": "Shot put"}]}, "45AIj4-_RBw": {"duration": 196.39, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=45AIj4-_RBw", "annotations": [{"segment": [11.94908691575663, 188.734295900156], "label": "Playing piano"}]}, "fykq7xuc3zk": {"duration": 6.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fykq7xuc3zk", "annotations": [{"segment": [0.08509153198127925, 3.5360258845553822], "label": "Springboard diving"}]}, "6G8m-vsGTA8": {"duration": 153.86, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6G8m-vsGTA8", "annotations": []}, "8j75QYV-iCg": {"duration": 154.018, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8j75QYV-iCg", "annotations": [{"segment": [0.2, 153.76100000000002], "label": "Baton twirling"}]}, "g5BPyMdlurg": {"duration": 154.95, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=g5BPyMdlurg", "annotations": [{"segment": [18.612948848673945, 32.149638920436814], "label": "Tumbling"}, {"segment": [37.467624305772226, 42.302156474258965], "label": "Tumbling"}, {"segment": [46.16978220904836, 49.553954726989076], "label": "Tumbling"}, {"segment": [68.6503567925117, 75.1769752199688], "label": "Tumbling"}, {"segment": [84.84603955694229, 92.33956441809671], "label": "Tumbling"}, {"segment": [97.65754980343213, 102.97553518876755], "label": "Tumbling"}, {"segment": [108.05179396567861, 114.57841239313572], "label": "Tumbling"}, {"segment": [134.6417208923557, 142.13524575351013], "label": "Tumbling"}, {"segment": [148.90359078939156, 153.7381229578783], "label": "Tumbling"}]}, "kt3hzGla8r4": {"duration": 226.79, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kt3hzGla8r4", "annotations": [{"segment": [0.01, 4.599470923054609], "label": "Playing guitarra"}, {"segment": [6.01469274553295, 47.056125597404844], "label": "Playing guitarra"}, {"segment": [61.91595473342743, 154.61298410575878], "label": "Playing guitarra"}]}, "QSV7f5XHohE": {"duration": 223.747, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=QSV7f5XHohE", "annotations": [{"segment": [33.50976344359897, 208.7379014507519], "label": "Making a cake"}]}, "rWfi4xN3XQI": {"duration": 225.21, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=rWfi4xN3XQI", "annotations": [{"segment": [35.8296879875195, 88.87167706708269], "label": "Grooming horse"}, {"segment": [106.4352496099844, 130.32170826833072], "label": "Grooming horse"}, {"segment": [161.5848673946958, 212.16795631825272], "label": "Grooming horse"}]}, "Ihmu18WVMpk": {"duration": 182.37, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ihmu18WVMpk", "annotations": [{"segment": [0.01, 178.10177337705753], "label": "Futsal"}]}, "TLQFjTXZ58E": {"duration": 127.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TLQFjTXZ58E", "annotations": [{"segment": [93.73446645865835, 112.70097191887676], "label": "Mowing the lawn"}]}, "d6PdD2O3Yrs": {"duration": 12.75, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=d6PdD2O3Yrs", "annotations": []}, "ZKP5J7OKEvI": {"duration": 173.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZKP5J7OKEvI", "annotations": [{"segment": [7.862026276969446, 161.57819506396257], "label": "Tango"}]}, "EXxckPa76vc": {"duration": 70.148, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EXxckPa76vc", "annotations": [{"segment": [0.2, 64.94986895475819], "label": "Table soccer"}]}, "AffS41W1Jgg": {"duration": 58.213, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=AffS41W1Jgg", "annotations": [{"segment": [22.81419990718553, 32.139051301320194], "label": "Hanging wallpaper"}, {"segment": [40.55857777876965, 48.52544154237774], "label": "Hanging wallpaper"}]}, "38ZxXyECPPU": {"duration": 112.176, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=38ZxXyECPPU", "annotations": [{"segment": [5.075027186121458, 13.825074058744661], "label": "Capoeira"}, {"segment": [22.575120931367866, 28.52515280475165], "label": "Capoeira"}, {"segment": [35.70019124030267, 36.75019686501746], "label": "Capoeira"}, {"segment": [41.300221238781525, 43.75023436311602], "label": "Capoeira"}, {"segment": [47.77525592452269, 52.150278474258975], "label": "Capoeira"}, {"segment": [56.00029903276131, 63.700340149766], "label": "Capoeira"}, {"segment": [71.40038126677067, 74.02539528393136], "label": "Capoeira"}, {"segment": [82.0754382698908, 98.17552424180968], "label": "Capoeira"}]}, "MO8Dfz2Lp3w": {"duration": 129.196, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MO8Dfz2Lp3w", "annotations": [{"segment": [0.01, 129.19521541950112], "label": "Playing beach volleyball"}]}, "4L0AI0f-cMs": {"duration": 232.717, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=4L0AI0f-cMs", "annotations": [{"segment": [45.382410296411855, 202.58707956318253], "label": "Making a cake"}]}, "pVehZjbud_M": {"duration": 97.06, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pVehZjbud_M", "annotations": [{"segment": [10.900081123244929, 97.041], "label": "Brushing hair"}]}, "VgubvtFkE8A": {"duration": 16.09, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VgubvtFkE8A", "annotations": []}, "jl10JmELMqY": {"duration": 208.005, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jl10JmELMqY", "annotations": [{"segment": [12.652853354134166, 187.19734321372857], "label": "Knitting"}]}, "jcQy1x8lDaQ": {"duration": 128.825, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=jcQy1x8lDaQ", "annotations": [{"segment": [10.512729901144438, 125.47063177030331], "label": "Playing ten pins"}]}, "Hujj6Q1Et3k": {"duration": 136.255, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Hujj6Q1Et3k", "annotations": [{"segment": [0.2, 102.4013135725429], "label": "Playing congas"}]}, "dlrp1gteCz4": {"duration": 59.79, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dlrp1gteCz4", "annotations": []}, "qmOYKPkURiw": {"duration": 34.16, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=qmOYKPkURiw", "annotations": [{"segment": [0, 34.156552], "label": "Playing bagpipes"}]}, "S3i3cTtxpi8": {"duration": 139.738, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=S3i3cTtxpi8", "annotations": []}, "g4vyzwiJYWw": {"duration": 151.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=g4vyzwiJYWw", "annotations": [{"segment": [88.65545211856472, 115.29924490951637], "label": "Washing face"}]}, "4IRb308UjTA": {"duration": 60.814, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4IRb308UjTA", "annotations": [{"segment": [13.376975976599063, 51.041227485179405], "label": "Changing car wheel"}]}, "riuJrZqkYYU": {"duration": 74.33, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=riuJrZqkYYU", "annotations": [{"segment": [0.01, 73.51541313353214], "label": "Dodgeball"}]}, "-KbDXeEoQ1E": {"duration": 207.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-KbDXeEoQ1E", "annotations": [{"segment": [14.21584399375975, 16.800542901716067], "label": "Horseback riding"}, {"segment": [49.43236661466459, 102.41869422776911], "label": "Horseback riding"}, {"segment": [105.6495678627145, 115.34218876755068], "label": "Horseback riding"}, {"segment": [160.89750702028078, 164.77455538221528], "label": "Horseback riding"}]}, "Amgt0yzQido": {"duration": 76.742, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Amgt0yzQido", "annotations": [{"segment": [3.140531040002092, 76.741971], "label": "Rope skipping"}]}, "vgUSEkvJRlI": {"duration": 207.657, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=vgUSEkvJRlI", "annotations": [{"segment": [120.80370202808112, 144.1223790951638], "label": "Doing fencing"}, {"segment": [149.3043073322933, 175.55473010920437], "label": "Doing fencing"}]}, "CaDoaqs_5jE": {"duration": 222.982, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CaDoaqs_5jE", "annotations": [{"segment": [18.436825113820877, 40.00443185074342], "label": "Ice fishing"}, {"segment": [50.092505969626544, 81.40032220064313], "label": "Ice fishing"}, {"segment": [102.62006431277659, 165.23569641185648], "label": "Ice fishing"}, {"segment": [190.62981396567864, 217.41539001560062], "label": "Ice fishing"}]}, "p-vfyM7ew04": {"duration": 226.906, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=p-vfyM7ew04", "annotations": [{"segment": [0.01, 226.841], "label": "Waxing skis"}]}, "xfOs5nRsSuQ": {"duration": 135.72, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xfOs5nRsSuQ", "annotations": [{"segment": [49.01611073966768, 88.18665279944531], "label": "Playing field hockey"}]}, "kU2FVf0ldx4": {"duration": 27.725, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kU2FVf0ldx4", "annotations": [{"segment": [0.01, 27.724625850340136], "label": "Swinging at the playground"}]}, "hQ0Tjmm44lQ": {"duration": 198.694, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=hQ0Tjmm44lQ", "annotations": []}, "cGoj8xGxrG0": {"duration": 44.443, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cGoj8xGxrG0", "annotations": [{"segment": [8.741879233256308, 32.602229700546026], "label": "Kneeling"}]}, "0uOMJSUza68": {"duration": 108.46000000000001, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0uOMJSUza68", "annotations": [{"segment": [72.73171606864275, 108.421], "label": "Removing curlers"}]}, "5JCqKshcfHE": {"duration": 18.3, "subset": "validation", "resolution": "1216x720", "url": "https://www.youtube.com/watch?v=5JCqKshcfHE", "annotations": [{"segment": [0.8114274570982839, 16.641380655226207], "label": "Shaving legs"}]}, "V9nOM1VWdnc": {"duration": 126.247, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V9nOM1VWdnc", "annotations": [{"segment": [12.801947071080122, 121.32306625897036], "label": "Making a lemonade"}]}, "kShrO0yutUQ": {"duration": 223.7, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kShrO0yutUQ", "annotations": [{"segment": [35.5967336723869, 205.90267516380655], "label": "Fixing bicycle"}]}, "hYBctolxeqQ": {"duration": 130.636, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hYBctolxeqQ", "annotations": [{"segment": [0.2, 125.70705909797822], "label": "Playing drums"}]}, "iJVJ1G4TlqQ": {"duration": 179.82, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=iJVJ1G4TlqQ", "annotations": []}, "6TgVNz6PATk": {"duration": 35.016, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6TgVNz6PATk", "annotations": [{"segment": [0.2, 32.91047352024922], "label": "Snow tubing"}]}, "zkLld95_bhg": {"duration": 112.34, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zkLld95_bhg", "annotations": [{"segment": [4.729464898595944, 13.487733229329173], "label": "Sailing"}, {"segment": [16.115213728549143, 31.00426989079563], "label": "Sailing"}, {"segment": [32.75592355694228, 76.37209984399377], "label": "Sailing"}, {"segment": [77.59825741029641, 105.44955070202809], "label": "Sailing"}]}, "KX-vl2o8U1w": {"duration": 180.582, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KX-vl2o8U1w", "annotations": [{"segment": [7.4655414407052465, 18.170846148131638], "label": "Playing drums"}, {"segment": [24.086935591709384, 36.76427011366169], "label": "Playing drums"}, {"segment": [0.2, 180.3815873015873], "label": "Playing drums"}, {"segment": [86.34673402174181, 87.75532674640318], "label": "Playing drums"}, {"segment": [115.36374414976599, 117.89921020436817], "label": "Playing drums"}, {"segment": [142.69044197971917, 150.86027972386896], "label": "Playing drums"}]}, "PbZIYJxoWdY": {"duration": 158.361, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PbZIYJxoWdY", "annotations": [{"segment": [13.660242432318, 57.05740131082267], "label": "Wakeboarding"}, {"segment": [91.58943869404777, 138.9377751426459], "label": "Wakeboarding"}]}, "1qKXZ9fThTg": {"duration": 196.19, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1qKXZ9fThTg", "annotations": [{"segment": [68.3868159126365, 184.96568330733228], "label": "Doing nails"}]}, "OM58jhy61Mc": {"duration": 27.7, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OM58jhy61Mc", "annotations": [{"segment": [6.115052521060843, 18.993396346333853], "label": "Smoking hookah"}]}, "ck05xSh9-ig": {"duration": 22.454, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ck05xSh9-ig", "annotations": [{"segment": [2.019684867394696, 22.321], "label": "Hand car wash"}]}, "EYgfoKt1zrw": {"duration": 161.77, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EYgfoKt1zrw", "annotations": []}, "pF7z3Z_JSGk": {"duration": 119.351, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pF7z3Z_JSGk", "annotations": [{"segment": [0.01, 118.12317127899354], "label": "Clipping cat claws"}]}, "n2fFj1mBjhQ": {"duration": 16.65, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=n2fFj1mBjhQ", "annotations": [{"segment": [1.1168109594383775, 13.405802], "label": "Playing lacrosse"}]}, "yCPeF59MOEk": {"duration": 119.955, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yCPeF59MOEk", "annotations": [{"segment": [0.01, 117.00703206072419], "label": "Hitting a pinata"}]}, "3K_8CdJS9lE": {"duration": 223.562, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3K_8CdJS9lE", "annotations": [{"segment": [115.23182398753895, 223.30100000000002], "label": "Hitting a pinata"}]}, "NKblxYCeetg": {"duration": 198.183, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NKblxYCeetg", "annotations": [{"segment": [23.49253666146646, 180.21248517940717], "label": "Snowboarding"}]}, "nTkMD63Wj14": {"duration": 65.5, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nTkMD63Wj14", "annotations": [{"segment": [32.45608147, 56.36598653], "label": "Shoveling snow"}]}, "YtKUxxMo6Cs": {"duration": 66.93, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YtKUxxMo6Cs", "annotations": [{"segment": [0.2, 63.68961214953272], "label": "Wakeboarding"}]}, "Su6aFxSeeps": {"duration": 107.811, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Su6aFxSeeps", "annotations": []}, "uaBTWbu0jps": {"duration": 40.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uaBTWbu0jps", "annotations": [{"segment": [25.317496879875197, 33.9959976599064], "label": "Hammer throw"}]}, "7DDKcs7C-4A": {"duration": 34.16, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7DDKcs7C-4A", "annotations": []}, "umi5d_a6bfc": {"duration": 61.46, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=umi5d_a6bfc", "annotations": [{"segment": [11.794034406274212, 28.382391741928185], "label": "Fixing bicycle"}, {"segment": [31.450758416731226, 54.65528139492927], "label": "Fixing bicycle"}]}, "tF8Nv-QxbIU": {"duration": 90.74, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tF8Nv-QxbIU", "annotations": []}, "3_Gha8keu-A": {"duration": 212.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3_Gha8keu-A", "annotations": [{"segment": [11.945408004075267, 210.37190762732553], "label": "Zumba"}]}, "qGLRri_c6n8": {"duration": 105.117, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qGLRri_c6n8", "annotations": []}, "dsgQVrJi71g": {"duration": 123.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dsgQVrJi71g", "annotations": [{"segment": [32.27262125429017, 45.71954677691108], "label": "Mowing the lawn"}, {"segment": [63.96894570046802, 67.23462761310452], "label": "Mowing the lawn"}, {"segment": [72.61339782215289, 81.83414675195009], "label": "Mowing the lawn"}, {"segment": [92.20748929797192, 120.44603289547582], "label": "Mowing the lawn"}]}, "vRNcq6nOk0E": {"duration": 167.51, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vRNcq6nOk0E", "annotations": [{"segment": [7.93245896513514, 166.05975803055568], "label": "Rock-paper-scissors"}]}, "SX075xQ0j6c": {"duration": 44.374, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=SX075xQ0j6c", "annotations": [{"segment": [5.19188751950078, 44.373333333333335], "label": "Knitting"}]}, "6JZVpl2S2Mg": {"duration": 97.246, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6JZVpl2S2Mg", "annotations": []}, "EM0N4dzewIg": {"duration": 206.542, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EM0N4dzewIg", "annotations": [{"segment": [53.97134526197375, 127.11460122894711], "label": "Assembling bicycle"}]}, "8Yi308KEGw0": {"duration": 66.11, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8Yi308KEGw0", "annotations": []}, "EQPiYEvFmSo": {"duration": 177.24, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=EQPiYEvFmSo", "annotations": [{"segment": [14.79286785023401, 166.59257719188767], "label": "Cheerleading"}]}, "rQeJGZ0SsIc": {"duration": 70.775, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rQeJGZ0SsIc", "annotations": []}, "JeSxkw4ed-I": {"duration": 45.05, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=JeSxkw4ed-I", "annotations": [{"segment": [1.3352379235569425, 45.04671201814059], "label": "Bathing dog"}]}, "RJEqnUPQylM": {"duration": 196.835, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RJEqnUPQylM", "annotations": []}, "sCzauf2u4dc": {"duration": 116.53999999999999, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sCzauf2u4dc", "annotations": [{"segment": [0, 0], "label": "Grooming horse"}]}, "4UdxT6Zmero": {"duration": 98.221, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4UdxT6Zmero", "annotations": []}, "4_wfCFTnExI": {"duration": 177.703, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4_wfCFTnExI", "annotations": [{"segment": [4.705500349672579, 163.58533568567614], "label": "Playing beach volleyball"}]}, "xXRKrdnuZBQ": {"duration": 96.178, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xXRKrdnuZBQ", "annotations": [{"segment": [35.709901196047845, 44.412314092563705], "label": "Hurling"}, {"segment": [49.21364534581382, 62.56734789391575], "label": "Hurling"}]}, "nYlAXMmPWw0": {"duration": 177.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nYlAXMmPWw0", "annotations": [{"segment": [2.497648444713299, 177.61055450858035], "label": "Zumba"}]}, "0DEF7Mp7ZP4": {"duration": 142.339, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0DEF7Mp7ZP4", "annotations": [{"segment": [0.01, 134.3090561622465], "label": "Playing blackjack"}]}, "t19uSLVQGWA": {"duration": 63.182, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t19uSLVQGWA", "annotations": [{"segment": [1.182804926677067, 52.33911800546022], "label": "Playing drums"}]}, "exhsUZg_xQA": {"duration": 14.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=exhsUZg_xQA", "annotations": [{"segment": [3.9743260530421214, 13.508365054602184], "label": "Tumbling"}]}, "CI6cPLeVCTk": {"duration": 8.987, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=CI6cPLeVCTk", "annotations": [{"segment": [0.2, 8.62276286140385], "label": "Swinging at the playground"}]}, "SfYJTxMRKDE": {"duration": 230.064, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SfYJTxMRKDE", "annotations": [{"segment": [69.64516316347924, 227.29801051555168], "label": "River tubing"}]}, "lKlAe8dKOyk": {"duration": 73.78999999999999, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lKlAe8dKOyk", "annotations": []}, "cjUz6gVQPEs": {"duration": 89.07, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cjUz6gVQPEs", "annotations": [{"segment": [6.250928237129485, 63.8983775351014], "label": "Using parallel bars"}]}, "qVHazdU4_vY": {"duration": 164.93, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qVHazdU4_vY", "annotations": [{"segment": [53.69988429, 149.06379325039], "label": "Breakdancing"}]}, "dWZEniTrr_Q": {"duration": 46.86, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dWZEniTrr_Q", "annotations": []}, "9WOvWFdA7lY": {"duration": 172.46, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9WOvWFdA7lY", "annotations": [{"segment": [27.831327613104527, 75.15802964118565], "label": "Getting a piercing"}, {"segment": [84.83849141965679, 114.68658190327614], "label": "Getting a piercing"}]}, "VFOjLDa5VtA": {"duration": 48.76, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VFOjLDa5VtA", "annotations": [{"segment": [0.6401300007800312, 46.804799468798755], "label": "Playing badminton"}]}, "j0MefCTx3p0": {"duration": 158.709, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=j0MefCTx3p0", "annotations": []}, "Ta_Kf0dCd3U": {"duration": 157.06, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ta_Kf0dCd3U", "annotations": [{"segment": [14.701384953357318, 86.24812505969626], "label": "Laying tile"}, {"segment": [103.64476392116909, 124.4717259384253], "label": "Laying tile"}]}, "K-rEA43wOS0": {"duration": 152.022, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K-rEA43wOS0", "annotations": [{"segment": [19.44729500156006, 110.28039238689547], "label": "Beer pong"}]}, "G0PzY40uZoE": {"duration": 101.216, "subset": "testing", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=G0PzY40uZoE", "annotations": []}, "o_Davs3OrOw": {"duration": 29.792, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o_Davs3OrOw", "annotations": [{"segment": [0.01, 29.337151449898087], "label": "Swimming"}]}, "XWG_-4VMTcA": {"duration": 55.546, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XWG_-4VMTcA", "annotations": [{"segment": [6.845146645865834, 9.968042121684869], "label": "Doing a powerbomb"}, {"segment": [20.369477379095166, 26.090266770670826], "label": "Doing a powerbomb"}, {"segment": [36.49170202808112, 40.73895475819033], "label": "Doing a powerbomb"}]}, "CTxuJBTp_CQ": {"duration": 179.932, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=CTxuJBTp_CQ", "annotations": [{"segment": [0.19999999999999998, 179.73142857142858], "label": "Decorating the Christmas tree"}]}, "lxtG6PbeaqY": {"duration": 133.79, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lxtG6PbeaqY", "annotations": [{"segment": [11.162943262090485, 103.17898024492979], "label": "Dodgeball"}]}, "pxfc6AIPrig": {"duration": 94.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pxfc6AIPrig", "annotations": [{"segment": [8.424584286390013, 86.97380253872646], "label": "Windsurfing"}]}, "RuBLAghI1vE": {"duration": 132.261, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RuBLAghI1vE", "annotations": [{"segment": [0, 0], "label": "Curling"}]}, "o_JAjYZDs9Y": {"duration": 16.045, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o_JAjYZDs9Y", "annotations": [{"segment": [0.2, 15.84498866213152], "label": "Hurling"}]}, "LmioUbGNv04": {"duration": 41.54, "subset": "validation", "resolution": "904x720", "url": "https://www.youtube.com/watch?v=LmioUbGNv04", "annotations": [{"segment": [0.01, 41.54049886621315], "label": "Doing karate"}]}, "iODCh_DsGak": {"duration": 171.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iODCh_DsGak", "annotations": [{"segment": [13.782744382995322, 147.86342274960998], "label": "Tango"}]}, "CBN0dqyWB7w": {"duration": 143.128, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CBN0dqyWB7w", "annotations": [{"segment": [0.2, 0.4465041497659907], "label": "Changing car wheel"}]}, "a_tqFOElJxU": {"duration": 95.75999999999999, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=a_tqFOElJxU", "annotations": []}, "zQd3B8WBXgE": {"duration": 115.102, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zQd3B8WBXgE", "annotations": []}, "1xBdquA6dx8": {"duration": 19.51, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1xBdquA6dx8", "annotations": []}, "orcF8QNkg9g": {"duration": 121.394, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=orcF8QNkg9g", "annotations": []}, "IZx-EMbylmM": {"duration": 134.1, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IZx-EMbylmM", "annotations": [{"segment": [72.98020124804992, 110.60391419656786], "label": "Drinking coffee"}]}, "0T1FKHx3yOk": {"duration": 237.494, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0T1FKHx3yOk", "annotations": [{"segment": [22.899853090406715, 46.16906437678752], "label": "Sumo"}]}, "unLrTQt07kI": {"duration": 43.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=unLrTQt07kI", "annotations": [{"segment": [4.013646943057722, 34.33897940171607], "label": "Washing dishes"}]}, "e_E9Kv2odGM": {"duration": 76.65, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=e_E9Kv2odGM", "annotations": []}, "weeD4A7gl2A": {"duration": 27.515, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=weeD4A7gl2A", "annotations": [{"segment": [0.01, 27.515646258503402], "label": "Hand car wash"}]}, "ufBz1xfqQoM": {"duration": 180.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ufBz1xfqQoM", "annotations": [{"segment": [20.22896353198128, 61.52976407644306], "label": "Tango"}, {"segment": [96.08757677691108, 179.893967], "label": "Tango"}]}, "-erT3ckPkAg": {"duration": 50.109, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-erT3ckPkAg", "annotations": [{"segment": [0, 2.1888339095163807], "label": "Hitting a pinata"}, {"segment": [4.143149900156006, 5.6284300530421225], "label": "Hitting a pinata"}, {"segment": [7.9736092418096725, 9.380716755070203], "label": "Hitting a pinata"}, {"segment": [14.696456249609984, 16.572599600624024], "label": "Hitting a pinata"}, {"segment": [20.246713663026522, 21.966511734789393], "label": "Hitting a pinata"}, {"segment": [22.51372021216849, 30.409156814352574], "label": "Hitting a pinata"}, {"segment": [32.3634728049922, 34.00509823712949], "label": "Hitting a pinata"}]}, "37Q3so6ERxs": {"duration": 8.22, "subset": "training", "resolution": "632x480", "url": "https://www.youtube.com/watch?v=37Q3so6ERxs", "annotations": [{"segment": [0.09344773010920436, 7.706322809672386], "label": "Drinking beer"}]}, "B0jASjqZl7s": {"duration": 157.618, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=B0jASjqZl7s", "annotations": [{"segment": [14.999438645865833, 107.94677976287052], "label": "Snow tubing"}, {"segment": [123.68389571918877, 156.87937468954757], "label": "Snow tubing"}]}, "A7oh6l1AIvs": {"duration": 89.165, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A7oh6l1AIvs", "annotations": [{"segment": [0, 82.0302496099844], "label": "Using the rowing machine"}]}, "a5WhYgyH_C0": {"duration": 142.22, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=a5WhYgyH_C0", "annotations": [{"segment": [90.36328393135724, 114.53407644305771], "label": "Vacuuming floor"}]}, "Mx6Gt14tnmY": {"duration": 141.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mx6Gt14tnmY", "annotations": [{"segment": [64.21861130291869, 79.59911848360406], "label": "Doing nails"}, {"segment": [104.64737303500591, 141.061223], "label": "Doing nails"}]}, "FlLDPameKGM": {"duration": 10.08, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FlLDPameKGM", "annotations": [{"segment": [0.01, 7.144457098283932], "label": "Brushing hair"}]}, "mWaGZtvQd_E": {"duration": 24.22, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=mWaGZtvQd_E", "annotations": []}, "mgEX5A1o2jg": {"duration": 121.441, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mgEX5A1o2jg", "annotations": [{"segment": [3.3154545229428227, 112.06236287546741], "label": "Assembling bicycle"}]}, "It9tfX8Ux2o": {"duration": 165.187, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=It9tfX8Ux2o", "annotations": []}, "prqwtY9cn6s": {"duration": 100.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=prqwtY9cn6s", "annotations": [{"segment": [0.01, 86.46181045241809], "label": "Playing piano"}]}, "boqfpo3YAps": {"duration": 222.239, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=boqfpo3YAps", "annotations": [{"segment": [56.04417757009345, 137.70118068535825], "label": "Running a marathon"}]}, "2UhbDOd43zU": {"duration": 137.068, "subset": "testing", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=2UhbDOd43zU", "annotations": []}, "xOGOd2UJaMY": {"duration": 191.379, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xOGOd2UJaMY", "annotations": [{"segment": [16.71952653344229, 138.53321984852184], "label": "Capoeira"}, {"segment": [144.2059161903276, 188.9903622121685], "label": "Capoeira"}]}, "CEnA3D5WX1I": {"duration": 167.625, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CEnA3D5WX1I", "annotations": []}, "kpOAGEYHts8": {"duration": 142.594, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kpOAGEYHts8", "annotations": []}, "T5CMoNMZ-lo": {"duration": 132.63, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T5CMoNMZ-lo", "annotations": []}, "Ws12Dt0Ny5Q": {"duration": 219.22, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Ws12Dt0Ny5Q", "annotations": []}, "T9gKHEOvRKk": {"duration": 79.134, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T9gKHEOvRKk", "annotations": [{"segment": [14.567500942277691, 36.295298957878316], "label": "Applying sunscreen"}, {"segment": [45.06049020280811, 52.961507663026524], "label": "Applying sunscreen"}]}, "lJRwUE12drE": {"duration": 87.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lJRwUE12drE", "annotations": [{"segment": [15.759625700000003, 46.580871699999996], "label": "Using parallel bars"}]}, "ggyGuKFjdxk": {"duration": 171.132, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ggyGuKFjdxk", "annotations": [{"segment": [3.571894888980188, 161.37499440299888], "label": "Making an omelette"}]}, "qZp8DmGpH6Y": {"duration": 63.786, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qZp8DmGpH6Y", "annotations": []}, "OmIcycXGkG8": {"duration": 162.26, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OmIcycXGkG8", "annotations": [{"segment": [8.605606864274572, 80.74084087363495], "label": "Ping-pong"}, {"segment": [98.20516068642746, 135.41175507020282], "label": "Ping-pong"}, {"segment": [145.28289235569423, 159.20372698907957], "label": "Ping-pong"}]}, "mEsW__Np9Xo": {"duration": 54.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mEsW__Np9Xo", "annotations": [{"segment": [0.8440326521060844, 29.794352619344775], "label": "Playing polo"}]}, "kMsWDe0V1Xg": {"duration": 190.85, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kMsWDe0V1Xg", "annotations": [{"segment": [16.220339313572545, 181.9951833073323], "label": "Playing saxophone"}]}, "8wB0BOjuyes": {"duration": 16.23, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8wB0BOjuyes", "annotations": [{"segment": [6.861232500000001, 16.183333], "label": "Playing polo"}, {"segment": [0.23984658892355698, 5.6467416], "label": "Playing polo"}]}, "Lb43_7s9t7E": {"duration": 128.523, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lb43_7s9t7E", "annotations": [{"segment": [0, 128.52244897959184], "label": "Capoeira"}]}, "Zxddl4l5HUY": {"duration": 155.203, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Zxddl4l5HUY", "annotations": []}, "6Kp_fvkZWTE": {"duration": 64.13, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6Kp_fvkZWTE", "annotations": [{"segment": [5.950928237129485, 61.45958658346334], "label": "High jump"}]}, "sqcJOpPrexQ": {"duration": 44.397, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sqcJOpPrexQ", "annotations": [{"segment": [10.352808112324492, 34.509360374414975], "label": "Roof shingle removal"}]}, "_MR8G1jwM4o": {"duration": 201.55, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_MR8G1jwM4o", "annotations": [{"segment": [45.59225416635713, 48.10768855694228], "label": "Elliptical trainer"}, {"segment": [83.6382036349454, 93.69994241809673], "label": "Elliptical trainer"}, {"segment": [122.62744141965679, 133.31803887675505], "label": "Elliptical trainer"}, {"segment": [150.61165241029641, 154.38480445397815], "label": "Elliptical trainer"}, {"segment": [167.2764072698908, 178.59586340093603], "label": "Elliptical trainer"}]}, "MLU2wreEGIg": {"duration": 19.877, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MLU2wreEGIg", "annotations": [{"segment": [0.20000000000000004, 17.121432137285492], "label": "Using the monkey bar"}]}, "fkUnZB6NTT8": {"duration": 159.359, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=fkUnZB6NTT8", "annotations": [{"segment": [24.845709828393137, 59.885680187207484], "label": "Snow tubing"}]}, "rdwOiqU_2JA": {"duration": 114.57, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rdwOiqU_2JA", "annotations": []}, "iuEuMQUXLVw": {"duration": 221.844, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iuEuMQUXLVw", "annotations": [{"segment": [10.035583463338535, 12.457965678627145], "label": "Making a cake"}, {"segment": [33.221241809672385, 36.33573322932917], "label": "Making a cake"}, {"segment": [55.71479095163806, 67.48064742589703], "label": "Making a cake"}, {"segment": [88.93603276131044, 157.80089859594383], "label": "Making a cake"}, {"segment": [184.44710296411856, 192.40635881435256], "label": "Making a cake"}]}, "yeWCfvmeUvM": {"duration": 56.146, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=yeWCfvmeUvM", "annotations": [{"segment": [0.01, 53.080163017160686], "label": "Applying sunscreen"}]}, "JiZCjH6ePq0": {"duration": 230.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JiZCjH6ePq0", "annotations": [{"segment": [4.306333853354134, 16.148751950078], "label": "Dodgeball"}, {"segment": [175.12424336973478, 211.01035881435257], "label": "Dodgeball"}]}, "AEH6JEd_03c": {"duration": 106.023, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AEH6JEd_03c", "annotations": [{"segment": [51.37128081123245, 83.10172074882995], "label": "Baking cookies"}]}, "UaZcqOngDzo": {"duration": 142.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UaZcqOngDzo", "annotations": [{"segment": [37.71321372854914, 127.78124180967238], "label": "Doing motocross"}]}, "mS5QXB8lv9A": {"duration": 53.08, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mS5QXB8lv9A", "annotations": []}, "1SXVKikAHdM": {"duration": 179.587, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1SXVKikAHdM", "annotations": [{"segment": [64.1489375975039, 156.32792199687987], "label": "Rollerblading"}]}, "66-sCRx4QRI": {"duration": 213.856, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=66-sCRx4QRI", "annotations": [{"segment": [37.250102968621405, 54.87738623922794], "label": "Surfing"}]}, "bQ3yPbCICRg": {"duration": 10.496, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bQ3yPbCICRg", "annotations": [{"segment": [0.19999999999999998, 10.295419501133788], "label": "Drum corps"}]}, "gSwjTXkXK3Q": {"duration": 208.771, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gSwjTXkXK3Q", "annotations": [{"segment": [54.54659357940299, 65.58578613978639], "label": "Beer pong"}, {"segment": [73.70283949300949, 78.24838937081442], "label": "Beer pong"}, {"segment": [103.24891369874155, 104.22296010112832], "label": "Beer pong"}, {"segment": [142.2107697942124, 146.10695540375949], "label": "Beer pong"}, {"segment": [164.2891549149792, 172.08152510795222], "label": "Beer pong"}]}, "SAEf6byveZU": {"duration": 119.978, "subset": "testing", "resolution": "282x192", "url": "https://www.youtube.com/watch?v=SAEf6byveZU", "annotations": []}, "9xC9rVAJHm8": {"duration": 115.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9xC9rVAJHm8", "annotations": [{"segment": [3.4105444617784713, 107.3610577223089], "label": "Smoking a cigarette"}]}, "l2xRpn3K6Rs": {"duration": 18.576, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=l2xRpn3K6Rs", "annotations": []}, "_AKzq9X1Aik": {"duration": 26.49, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=_AKzq9X1Aik", "annotations": [{"segment": [0.01, 24.530098803952157], "label": "Using parallel bars"}]}, "Srpn1NaBueI": {"duration": 36.131, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Srpn1NaBueI", "annotations": [{"segment": [14.993056801872076, 34.6643982449298], "label": "Calf roping"}]}, "PTkNBVyhUuo": {"duration": 140.5, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PTkNBVyhUuo", "annotations": [{"segment": [35.48861466458658, 133.41090327613105], "label": "Archery"}]}, "zrnxRV3yLR8": {"duration": 69.173, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=zrnxRV3yLR8", "annotations": [{"segment": [30.87471117653963, 40.771831661765134], "label": "Hanging wallpaper"}]}, "TPu-vPWJtIA": {"duration": 101.727, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=TPu-vPWJtIA", "annotations": [{"segment": [31.8986753266049, 38.88146992546368], "label": "Swimming"}, {"segment": [46.49906403330963, 49.990461332739024], "label": "Swimming"}, {"segment": [59.35375332293292, 74.74764121684868], "label": "Swimming"}]}, "lyJpgvmTOpo": {"duration": 8.22, "subset": "validation", "resolution": "632x480", "url": "https://www.youtube.com/watch?v=lyJpgvmTOpo", "annotations": [{"segment": [1.2182322776911076, 5.924455918876754], "label": "Drinking beer"}]}, "4PgIW76SBI0": {"duration": 40.19, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4PgIW76SBI0", "annotations": []}, "on8ok9e-zyM": {"duration": 199.53, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=on8ok9e-zyM", "annotations": []}, "oq54_GlzK6A": {"duration": 166.626, "subset": "training", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=oq54_GlzK6A", "annotations": [{"segment": [33.79309716606351, 44.7108670197148], "label": "Playing ten pins"}, {"segment": [57.70821208358539, 68.62598193723667], "label": "Playing ten pins"}, {"segment": [71.74534475256561, 73.5649730615075], "label": "Playing ten pins"}, {"segment": [89.16178713815219, 97.48008797902936], "label": "Playing ten pins"}, {"segment": [121.13525599527381, 134.91244037129485], "label": "Playing ten pins"}, {"segment": [142.71084732917316, 149.9893604898596], "label": "Playing ten pins"}]}, "Hj3kEemIPic": {"duration": 196.256, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Hj3kEemIPic", "annotations": [{"segment": [0.01, 196.2550566893424], "label": "Removing ice from car"}]}, "NNKEE-015ZY": {"duration": 91.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NNKEE-015ZY", "annotations": [{"segment": [48.76807176287052, 51.611691107644305], "label": "Walking the dog"}, {"segment": [57.58329173166926, 73.36537909516382], "label": "Walking the dog"}]}, "MKLoBzvca30": {"duration": 169.599, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=MKLoBzvca30", "annotations": [{"segment": [35.18959668174727, 133.6146340171607], "label": "Futsal"}]}, "wj0D-wiqEb0": {"duration": 86.63, "subset": "training", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=wj0D-wiqEb0", "annotations": [{"segment": [1.8245776688210384, 24.12497139885595], "label": "Playing flauta"}]}, "EsGpVb1-W0E": {"duration": 86.63, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=EsGpVb1-W0E", "annotations": [{"segment": [27.52590309, 33.77015054602184], "label": "Playing racquetball"}, {"segment": [68.21570410296412, 72.42172268], "label": "Playing racquetball"}]}, "LZ4dINjqX5U": {"duration": 49.13, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LZ4dINjqX5U", "annotations": [{"segment": [1.00083921, 46.5757225124805], "label": "Using the pommel horse"}]}, "mOTaXxGKUsw": {"duration": 192.7, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=mOTaXxGKUsw", "annotations": []}, "a_c-FIC_W4A": {"duration": 59.98, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=a_c-FIC_W4A", "annotations": [{"segment": [0.01, 58.3980569422777], "label": "Polishing shoes"}]}, "CBDFNn15G90": {"duration": 217.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CBDFNn15G90", "annotations": [{"segment": [30.69248907956318, 71.72885569422776], "label": "Kayaking"}, {"segment": [78.51172620904835, 125.65267628705149], "label": "Kayaking"}, {"segment": [134.47040795631824, 209.08198361934475], "label": "Kayaking"}]}, "aM9WZ1_8yC0": {"duration": 172.084, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=aM9WZ1_8yC0", "annotations": [{"segment": [0.01, 168.14808670178232], "label": "Throwing darts"}]}, "ZVjLscGMOs8": {"duration": 105.19, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZVjLscGMOs8", "annotations": []}, "HfowLX1u38Y": {"duration": 62.25, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HfowLX1u38Y", "annotations": [{"segment": [7.866565455538221, 45.15991280031201], "label": "Starting a campfire"}]}, "00SfeRtiM2o": {"duration": 175.868, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=00SfeRtiM2o", "annotations": [{"segment": [101.2406685981725, 122.9154990327613], "label": "Snowboarding"}, {"segment": [145.96215510140405, 150.07762939937598], "label": "Snowboarding"}]}, "ff4S4Am-Zoo": {"duration": 234.383, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ff4S4Am-Zoo", "annotations": []}, "c8ntlBKuVmc": {"duration": 142.85, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c8ntlBKuVmc", "annotations": [{"segment": [20.272840873634944, 142.601], "label": "Trimming branches or hedges"}]}, "B3YfGd4Wt24": {"duration": 101.24000000000001, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=B3YfGd4Wt24", "annotations": [{"segment": [6.001688121946647, 10.423984632854703], "label": "Cheerleading"}, {"segment": [48.32938329778089, 99.65961065653511], "label": "Cheerleading"}]}, "0drl-yrfBAA": {"duration": 235.683, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0drl-yrfBAA", "annotations": [{"segment": [0.2, 235.441], "label": "Drum corps"}]}, "G2uCtAeVgLM": {"duration": 146.333, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=G2uCtAeVgLM", "annotations": []}, "f2fyhNjY3EA": {"duration": 88.52, "subset": "training", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=f2fyhNjY3EA", "annotations": [{"segment": [14.284453198127924, 82.87743135725428], "label": "Sailing"}]}, "Evygqtr18Us": {"duration": 232.5, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Evygqtr18Us", "annotations": []}, "DUi-hLMpQQo": {"duration": 96.15, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DUi-hLMpQQo", "annotations": []}, "n8gFxENOyR0": {"duration": 195.86, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n8gFxENOyR0", "annotations": [{"segment": [97.16727301092044, 103.88953718148726], "label": "Skateboarding"}, {"segment": [141.7786622652106, 147.2786965772231], "label": "Skateboarding"}]}, "IXnGjiNnJ3A": {"duration": 53.894, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IXnGjiNnJ3A", "annotations": []}, "Qn61vl2khuo": {"duration": 155.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Qn61vl2khuo", "annotations": [{"segment": [35.30795122620905, 44.01402139157567], "label": "Elliptical trainer"}, {"segment": [45.706868368174725, 58.28230305148205], "label": "Elliptical trainer"}, {"segment": [74.9689375351014, 81.74032544149765], "label": "Elliptical trainer"}, {"segment": [85.12601939469579, 86.81886637129485], "label": "Elliptical trainer"}, {"segment": [103.5055008549142, 110.76055932605304], "label": "Elliptical trainer"}, {"segment": [113.90441799687987, 131.5583936099844], "label": "Elliptical trainer"}]}, "jpGyplACWUg": {"duration": 45.21, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jpGyplACWUg", "annotations": [{"segment": [4.714145085803432, 16.042165366614665], "label": "Shot put"}, {"segment": [18.926940717628707, 21.037751950078004], "label": "Shot put"}, {"segment": [25.89261778471139, 29.34027613104524], "label": "Shot put"}]}, "1Vu0bzAKL8Q": {"duration": 56.007, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1Vu0bzAKL8Q", "annotations": [{"segment": [0.01, 18.52913562470186], "label": "Painting fence"}]}, "CvbBSwz-9XI": {"duration": 32.76, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CvbBSwz-9XI", "annotations": []}, "L7Gt1NO1nx8": {"duration": 73.027, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L7Gt1NO1nx8", "annotations": [{"segment": [0.2, 63.68480056630265], "label": "Cutting the grass"}]}, "8kOMPCgHQ68": {"duration": 226.488, "subset": "testing", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=8kOMPCgHQ68", "annotations": []}, "J1LIG037WzE": {"duration": 167.44, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=J1LIG037WzE", "annotations": [{"segment": [7.052797605781783, 160.90827130228067], "label": "Doing kickboxing"}]}, "xIB7VSLKvaM": {"duration": 198.37, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xIB7VSLKvaM", "annotations": [{"segment": [1.85679943116092, 41.15905405740039], "label": "Mowing the lawn"}, {"segment": [51.37145092878545, 65.60691323435249], "label": "Mowing the lawn"}, {"segment": [147.30608820543299, 183.82314223712947], "label": "Mowing the lawn"}]}, "12sz7NMvyfI": {"duration": 68.615, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=12sz7NMvyfI", "annotations": [{"segment": [0, 68.614443], "label": "Blowing leaves"}]}, "BG4QxmqBaS4": {"duration": 182.18, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BG4QxmqBaS4", "annotations": [{"segment": [13.213036661466457, 28.55720826833073], "label": "Pole vault"}, {"segment": [38.218353354134166, 50.15270904836193], "label": "Pole vault"}, {"segment": [53.278373634945396, 62.087064742589696], "label": "Pole vault"}, {"segment": [124.61403822152887, 133.7078790951638], "label": "Pole vault"}, {"segment": [146.78027535101404, 155.30575117004682], "label": "Pole vault"}, {"segment": [172.64088533541343, 181.961], "label": "Pole vault"}]}, "R586XlFT7Go": {"duration": 69.103, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=R586XlFT7Go", "annotations": [{"segment": [5.28241289651586, 65.11382330109204], "label": "Running a marathon"}]}, "Qyruw9480BU": {"duration": 118.21000000000001, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Qyruw9480BU", "annotations": [{"segment": [0.2, 118.012788], "label": "Ironing clothes"}]}, "kI6maggAugg": {"duration": 119.95, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=kI6maggAugg", "annotations": [{"segment": [2.244742577223089, 118.41017094851794], "label": "Clean and jerk"}]}, "cTY0q4wS3eI": {"duration": 202.07999999999998, "subset": "testing", "resolution": "640x354", "url": "https://www.youtube.com/watch?v=cTY0q4wS3eI", "annotations": []}, "SMpmoqIk3Ts": {"duration": 25.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SMpmoqIk3Ts", "annotations": [{"segment": [0, 25.10077097505669], "label": "Playing water polo"}]}, "NMBu3DIn1eg": {"duration": 202.29, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=NMBu3DIn1eg", "annotations": [{"segment": [35.34591484804992, 180.2010480199688], "label": "Polishing forniture"}]}, "hGPCJb2g1tQ": {"duration": 110.504, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=hGPCJb2g1tQ", "annotations": []}, "mwGs9TeYpM0": {"duration": 52.66, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mwGs9TeYpM0", "annotations": [{"segment": [14.874528519104574, 41.7068649677027], "label": "Playing lacrosse"}]}, "43OU5XCzLzo": {"duration": 36.084, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=43OU5XCzLzo", "annotations": [{"segment": [0.2, 32.89233229329173], "label": "River tubing"}]}, "28rFeoIxM4M": {"duration": 113.546, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=28rFeoIxM4M", "annotations": []}, "kElViDpjunQ": {"duration": 120.77, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kElViDpjunQ", "annotations": [{"segment": [34.666341777480625, 84.59340930889235], "label": "Cleaning shoes"}]}, "ubVPP8BVcfs": {"duration": 67.385, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ubVPP8BVcfs", "annotations": [{"segment": [0.01, 7.564043681747269], "label": "Canoeing"}, {"segment": [11.346065522620904, 46.22471138845553], "label": "Canoeing"}]}, "bbDWLOPnWOY": {"duration": 50.945, "subset": "training", "resolution": "286x240", "url": "https://www.youtube.com/watch?v=bbDWLOPnWOY", "annotations": [{"segment": [0.20000000000000004, 39.06909828393136], "label": "Playing pool"}]}, "OYEKKzQjaJI": {"duration": 181.697, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OYEKKzQjaJI", "annotations": []}, "WhYyvKiwLc8": {"duration": 64.343, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WhYyvKiwLc8", "annotations": [{"segment": [29.912619312935785, 32.221310457098284], "label": "Arm wrestling"}, {"segment": [35.43340371138845, 36.5375607675507], "label": "Arm wrestling"}, {"segment": [43.26288101872074, 48.884044213728544], "label": "Arm wrestling"}]}, "jd609r5yKkI": {"duration": 143.709, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jd609r5yKkI", "annotations": [{"segment": [0.01, 128.91131677067082], "label": "Playing drums"}]}, "ynda_PQe_CM": {"duration": 211.232, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ynda_PQe_CM", "annotations": [{"segment": [15.134997916092708, 145.75661036584935], "label": "Futsal"}]}, "8UvKQnfXnCk": {"duration": 78.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8UvKQnfXnCk", "annotations": [{"segment": [0.01, 76.71465783221541], "label": "Waterskiing"}]}, "VIQG4W0vYxw": {"duration": 148.121, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VIQG4W0vYxw", "annotations": [{"segment": [12.474842433697347, 148.081], "label": "Running a marathon"}]}, "LXH96dFBVYk": {"duration": 222.819, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LXH96dFBVYk", "annotations": [{"segment": [5.544472094825489, 214.50196170615354], "label": "Bullfighting"}]}, "ctMcsGnutho": {"duration": 216.736, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ctMcsGnutho", "annotations": [{"segment": [78.05562640231123, 185.71961705651682], "label": "Changing car wheel"}]}, "Me3ykrZobJE": {"duration": 134.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Me3ykrZobJE", "annotations": [{"segment": [9.866307163806553, 124.27348597815914], "label": "Wrapping presents"}]}, "u_G3ZPXU35A": {"duration": 69.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u_G3ZPXU35A", "annotations": [{"segment": [5.887095163806553, 68.57375663026521], "label": "Cleaning windows"}]}, "wDFpFJ1CP9g": {"duration": 229.459, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wDFpFJ1CP9g", "annotations": [{"segment": [0.01, 129.92172074882996], "label": "Wakeboarding"}, {"segment": [130.63754290171607, 219.39948985959438], "label": "Wakeboarding"}]}, "mpC5EaOOe1w": {"duration": 48.832, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mpC5EaOOe1w", "annotations": []}, "rDlPcW_2nG0": {"duration": 143.34, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rDlPcW_2nG0", "annotations": [{"segment": [20.79613189425536, 116.27944715067514], "label": "Making a sandwich"}]}, "nLjD4aESipo": {"duration": 72.238, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nLjD4aESipo", "annotations": []}, "Lh3dhueuKmQ": {"duration": 145.404, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Lh3dhueuKmQ", "annotations": [{"segment": [8.846673238689549, 61.24619934477379], "label": "Hanging wallpaper"}, {"segment": [78.48587027145086, 135.64898965990642], "label": "Hanging wallpaper"}]}, "fJNauQt9Di0": {"duration": 182.46, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fJNauQt9Di0", "annotations": [{"segment": [8.539582073078842, 75.4329749788631], "label": "Playing violin"}, {"segment": [92.2274863892515, 172.2149045475819], "label": "Playing violin"}]}, "W4mJsJGa0CY": {"duration": 72.17, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W4mJsJGa0CY", "annotations": []}, "h15m87WsCHQ": {"duration": 137.509, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h15m87WsCHQ", "annotations": [{"segment": [7.722790283039894, 117.34350791174505], "label": "Playing ice hockey"}]}, "swbCsf51XVg": {"duration": 98.476, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=swbCsf51XVg", "annotations": [{"segment": [6.757260530421218, 81.24070046801872], "label": "Elliptical trainer"}]}, "RZurdKhOxk0": {"duration": 152.21, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RZurdKhOxk0", "annotations": [{"segment": [5.936302758232778, 127.51178180499222], "label": "Tango"}]}, "9oCLvMVaZmw": {"duration": 143.848, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9oCLvMVaZmw", "annotations": []}, "PUqHiigzb7A": {"duration": 25.22, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PUqHiigzb7A", "annotations": [{"segment": [3.815969407176287, 25.21687], "label": "Clean and jerk"}]}, "bWYIL4KG8kM": {"duration": 182.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bWYIL4KG8kM", "annotations": [{"segment": [40.06452814820593, 153.72276402964118], "label": "Dodgeball"}]}, "f-Cf16fQTB4": {"duration": 84.66, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=f-Cf16fQTB4", "annotations": [{"segment": [10.970862159999998, 74.29065883385336], "label": "Getting a haircut"}]}, "W2eumNilwuI": {"duration": 33.484, "subset": "testing", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=W2eumNilwuI", "annotations": []}, "a2jpe1QfZdM": {"duration": 40.38, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=a2jpe1QfZdM", "annotations": [{"segment": [12.47291177223089, 33.57607057878315], "label": "Hand washing clothes"}]}, "Q9n6B1AVO4E": {"duration": 83.732, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Q9n6B1AVO4E", "annotations": [{"segment": [9.006168486739469, 59.12886739469579], "label": "Putting in contact lenses"}]}, "U697zTxxDHM": {"duration": 95.76, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U697zTxxDHM", "annotations": [{"segment": [13.43695787831513, 86.16383307332293], "label": "Rollerblading"}]}, "Y1Vop80R9b8": {"duration": 56.286, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y1Vop80R9b8", "annotations": [{"segment": [14.939241251533259, 19.936522611346344], "label": "Painting fence"}, {"segment": [25.196818779570645, 28.352996480505222], "label": "Painting fence"}, {"segment": [36.769470349664104, 40.01331965340242], "label": "Painting fence"}, {"segment": [46.85170467209401, 50.09555397583233], "label": "Painting fence"}]}, "vbfPAKV3i3w": {"duration": 58.38, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vbfPAKV3i3w", "annotations": []}, "bnQVFmXUx_U": {"duration": 131.01, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bnQVFmXUx_U", "annotations": [{"segment": [30.55840639625585, 39.75658892355694], "label": "Cleaning shoes"}, {"segment": [48.137155226209046, 66.32911622464898], "label": "Cleaning shoes"}, {"segment": [73.4832581903276, 82.88584477379095], "label": "Cleaning shoes"}, {"segment": [88.60915834633384, 96.78532059282371], "label": "Cleaning shoes"}, {"segment": [103.73505850234008, 109.8671801872075], "label": "Cleaning shoes"}, {"segment": [116.40810998439937, 125.60629251170047], "label": "Cleaning shoes"}]}, "inFPa4wxOwQ": {"duration": 29.07, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=inFPa4wxOwQ", "annotations": [{"segment": [13.818260530421217, 17.35211076443058], "label": "Springboard diving"}]}, "ROMy00dG8Ds": {"duration": 222.07999999999998, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ROMy00dG8Ds", "annotations": [{"segment": [6.581558502340093, 196.75395943837754], "label": "Walking the dog"}]}, "JMIvoKPtxRA": {"duration": 107.788, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JMIvoKPtxRA", "annotations": [{"segment": [2.921316773939495, 95.42967316957812], "label": "Doing a powerbomb"}]}, "xeOwFqcsITY": {"duration": 83.73, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xeOwFqcsITY", "annotations": []}, "2_hcULoN4Ls": {"duration": 220.89, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2_hcULoN4Ls", "annotations": [{"segment": [31.524105304212167, 212.03638923556943], "label": "Playing polo"}]}, "a7YSE6dZ1yk": {"duration": 125.946, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a7YSE6dZ1yk", "annotations": [{"segment": [47.70760081571062, 86.14466623627136], "label": "Baking cookies"}]}, "pc_QYeZG9GA": {"duration": 21.78, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pc_QYeZG9GA", "annotations": [{"segment": [0.01, 0.02], "label": "Brushing teeth"}]}, "z7uCdnvk55Y": {"duration": 52.11, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z7uCdnvk55Y", "annotations": []}, "yttmx8mcjGw": {"duration": 188.38, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yttmx8mcjGw", "annotations": []}, "4H9MMsj4mcM": {"duration": 31.696, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=4H9MMsj4mcM", "annotations": []}, "54x_-2w7-7E": {"duration": 109.738, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=54x_-2w7-7E", "annotations": []}, "gl_0jjJBUkc": {"duration": 17.07, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gl_0jjJBUkc", "annotations": [{"segment": [4.762291920000001, 9.949270199999999], "label": "Hand washing clothes"}, {"segment": [4.762291920000001, 9.9492702], "label": "Hand washing clothes"}]}, "1rdecGieY-M": {"duration": 134.537, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1rdecGieY-M", "annotations": [{"segment": [0.01, 133.54577822623727], "label": "Baking cookies"}]}, "lfllVwgOWBk": {"duration": 68.55, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lfllVwgOWBk", "annotations": [{"segment": [35.18160017829285, 60.418248556942274], "label": "Brushing hair"}]}, "CfWROyOu8Xs": {"duration": 151.186, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CfWROyOu8Xs", "annotations": []}, "QCBaT1NJ32E": {"duration": 171.29, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QCBaT1NJ32E", "annotations": [{"segment": [2.404319812792512, 171.241], "label": "Smoking a cigarette"}]}, "uBT9n8wOKP4": {"duration": 43.213, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uBT9n8wOKP4", "annotations": [{"segment": [0.2, 41.79867251573527], "label": "Blowing leaves"}]}, "F4FpAZJSV1g": {"duration": 94.16, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=F4FpAZJSV1g", "annotations": [{"segment": [4.625323680187208, 83.62291542433698], "label": "Triple jump"}]}, "0RYLNnfWeBg": {"duration": 168.531, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0RYLNnfWeBg", "annotations": []}, "MJvwDHbFMU0": {"duration": 38.94, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MJvwDHbFMU0", "annotations": []}, "oWaqaK58Tds": {"duration": 199.297, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oWaqaK58Tds", "annotations": [{"segment": [79.63442768273717, 190.5878304821151], "label": "Curling"}]}, "nDGhaYCgRiI": {"duration": 211.743, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nDGhaYCgRiI", "annotations": [{"segment": [42.970676740632555, 114.65270531701371], "label": "Slacklining"}, {"segment": [117.62569267732444, 190.62904896939924], "label": "Slacklining"}]}, "2k-vgnzg7fk": {"duration": 182.15, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2k-vgnzg7fk", "annotations": []}, "IGdB41eBvHg": {"duration": 188.036, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IGdB41eBvHg", "annotations": []}, "gY-ijXvMjuA": {"duration": 133.61, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=gY-ijXvMjuA", "annotations": []}, "9tks__Y0mqI": {"duration": 204.754, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9tks__Y0mqI", "annotations": [{"segment": [0.19999999999999998, 4.1396472113339895], "label": "Having an ice cream"}, {"segment": [21.335125041657413, 25.474777111920456], "label": "Having an ice cream"}, {"segment": [47.446776561778165, 49.357385209591875], "label": "Having an ice cream"}, {"segment": [53.49703727985492, 58.27355889938921], "label": "Having an ice cream"}]}, "UVxXr_mh5BM": {"duration": 196.511, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UVxXr_mh5BM", "annotations": [{"segment": [9.807725429017161, 176.55702964118564], "label": "Braiding hair"}]}, "-r_bvqjYjYg": {"duration": 66.06, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-r_bvqjYjYg", "annotations": [{"segment": [5.5781064, 38.9549214], "label": "Shot put"}]}, "1LnkDmSaKg4": {"duration": 70.055, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1LnkDmSaKg4", "annotations": []}, "FAZvfHh5B6E": {"duration": 21.92, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FAZvfHh5B6E", "annotations": []}, "CXSoih6nFME": {"duration": 154.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CXSoih6nFME", "annotations": [{"segment": [78.33698620999999, 123.73259089], "label": "Putting on makeup"}]}, "lly76lvHbB8": {"duration": 17.95, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=lly76lvHbB8", "annotations": [{"segment": [0.2, 16.441540498442368], "label": "Curling"}]}, "UdkzkCJMBaQ": {"duration": 37.988, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UdkzkCJMBaQ", "annotations": []}, "bvBNoeCBDdg": {"duration": 151.232, "subset": "training", "resolution": "712x480", "url": "https://www.youtube.com/watch?v=bvBNoeCBDdg", "annotations": [{"segment": [11.77815923877339, 139.2178422023015], "label": "Playing ice hockey"}]}, "7fV1wG7yZPI": {"duration": 13.468, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=7fV1wG7yZPI", "annotations": []}, "MKLwNTbEK4E": {"duration": 134.537, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MKLwNTbEK4E", "annotations": [{"segment": [12.799627145085804, 59.801536661466464], "label": "Playing blackjack"}]}, "12v5k4Z8lAE": {"duration": 115.8, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=12v5k4Z8lAE", "annotations": [{"segment": [41.53442537234041, 73.27964735364883], "label": "Wrapping presents"}]}, "NctohVMw1Kc": {"duration": 84.126, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=NctohVMw1Kc", "annotations": []}, "wq4nRbBHHwo": {"duration": 30.56, "subset": "testing", "resolution": "202x360", "url": "https://www.youtube.com/watch?v=wq4nRbBHHwo", "annotations": []}, "HqGWA16ECWE": {"duration": 10.426, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=HqGWA16ECWE", "annotations": [{"segment": [0.2, 10.201], "label": "Using the monkey bar"}]}, "uThAYmmxBFk": {"duration": 91.63, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uThAYmmxBFk", "annotations": [{"segment": [5.6679224999999995, 19.215734459438377], "label": "Doing step aerobics"}, {"segment": [20.49009875, 79.36312633619345], "label": "Doing step aerobics"}]}, "gi0wjdtxaC4": {"duration": 64.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gi0wjdtxaC4", "annotations": [{"segment": [0, 63.9792464898596], "label": "Tumbling"}]}, "8jvjOg-6zhA": {"duration": 85.496, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8jvjOg-6zhA", "annotations": []}, "4CTczKzaPa4": {"duration": 52.617, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4CTczKzaPa4", "annotations": [{"segment": [0.2, 52.361], "label": "Hitting a pinata"}]}, "VSdjLyGA8Oc": {"duration": 238.72, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VSdjLyGA8Oc", "annotations": [{"segment": [30.16935018625235, 228.69112363406103], "label": "Hurling"}]}, "9jFyJhik9VM": {"duration": 15.51, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9jFyJhik9VM", "annotations": [{"segment": [0, 15.43834566926677], "label": "Windsurfing"}]}, "hsUfo_jHQ60": {"duration": 83.777, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hsUfo_jHQ60", "annotations": [{"segment": [1.6990729859594385, 80.7713173506532], "label": "Running a marathon"}]}, "SP84bAfiRAo": {"duration": 10.728, "subset": "testing", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=SP84bAfiRAo", "annotations": []}, "SGdhHAZHwmI": {"duration": 137.973, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SGdhHAZHwmI", "annotations": [{"segment": [40.03583811856475, 42.403548975039], "label": "Putting in contact lenses"}]}, "re4vD9S8ThA": {"duration": 2.415, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=re4vD9S8ThA", "annotations": [{"segment": [0.01, 1.8860090767676096], "label": "Kneeling"}]}, "vctal2m9Zvg": {"duration": 20.434, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vctal2m9Zvg", "annotations": [{"segment": [3.634049494539781, 16.895142386895476], "label": "Calf roping"}]}, "WOonpO98TdQ": {"duration": 231.67000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WOonpO98TdQ", "annotations": [{"segment": [27.467347925117004, 228.77409521840875], "label": "Snatch"}]}, "ZRnCnUeqKv4": {"duration": 164.31, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=ZRnCnUeqKv4", "annotations": [{"segment": [8.24313168, 112.09311792], "label": "Playing field hockey"}, {"segment": [120.33624960000002, 156.41879843447737], "label": "Playing field hockey"}]}, "E5bRptEXtq8": {"duration": 156.758, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E5bRptEXtq8", "annotations": [{"segment": [0.2, 145.54350088967976], "label": "Riding bumper cars"}]}, "u9aFICSj7zw": {"duration": 58.306, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u9aFICSj7zw", "annotations": [{"segment": [9.001277691107644, 58.281], "label": "Ice fishing"}]}, "CBckvP5FR4A": {"duration": 132.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CBckvP5FR4A", "annotations": [{"segment": [7.310729329173166, 131.49015990639626], "label": "Playing guitarra"}]}, "c3a9H6a60k0": {"duration": 14.58, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=c3a9H6a60k0", "annotations": []}, "vBOFzuS-Djo": {"duration": 185.783, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vBOFzuS-Djo", "annotations": []}, "Ar3eaYtLlKI": {"duration": 56.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ar3eaYtLlKI", "annotations": [{"segment": [0.01, 56.447709750566894], "label": "Walking the dog"}]}, "Jsx38_s3Mnc": {"duration": 194.26, "subset": "validation", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=Jsx38_s3Mnc", "annotations": [{"segment": [15.758830483831598, 173.65018961466458], "label": "Polishing shoes"}]}, "Vyj5eIh3jh0": {"duration": 10.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vyj5eIh3jh0", "annotations": [{"segment": [0.2, 10.666938775510205], "label": "Starting a campfire"}]}, "cTxVTEzQDr8": {"duration": 185.71, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cTxVTEzQDr8", "annotations": []}, "Lk_Xe9yQ8Zc": {"duration": 157.455, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lk_Xe9yQ8Zc", "annotations": [{"segment": [13.259990639625586, 16.20665522620905], "label": "Rope skipping"}, {"segment": [18.416653666146647, 27.01109204368175], "label": "Rope skipping"}, {"segment": [37.324418096723875, 42.481081123244934], "label": "Rope skipping"}, {"segment": [48.37441029641186, 51.32107488299532], "label": "Rope skipping"}, {"segment": [56.968848673946965, 103.63753822152886], "label": "Rope skipping"}, {"segment": [106.09340405616224, 116.89921372854914], "label": "Rope skipping"}, {"segment": [126.72267706708267, 154.71954758190327], "label": "Rope skipping"}]}, "_-zOtZZ_fwI": {"duration": 182.927, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_-zOtZZ_fwI", "annotations": [{"segment": [60.4999792324493, 182.926824], "label": "Removing ice from car"}]}, "8eqO6e2wWmY": {"duration": 53.66, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=8eqO6e2wWmY", "annotations": [{"segment": [6.643810411076443, 34.97402084321373], "label": "Playing badminton"}, {"segment": [40.32249714898597, 48.679491376755074], "label": "Playing badminton"}]}, "eU27exUJZSM": {"duration": 151.28, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=eU27exUJZSM", "annotations": [{"segment": [8.966102964118566, 137.79484555382214], "label": "Getting a tattoo"}]}, "F3tKnLz9YyE": {"duration": 22.964, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=F3tKnLz9YyE", "annotations": [{"segment": [1.5166429031786341, 22.620871220935644], "label": "Swimming"}]}, "VxoBV76IkLM": {"duration": 112.47999999999999, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VxoBV76IkLM", "annotations": [{"segment": [9.115007800312013, 69.45421040000001], "label": "Discus throw"}, {"segment": [74.57557919999999, 108.67893915756629], "label": "Discus throw"}]}, "vdTisVMhW7I": {"duration": 187.873, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vdTisVMhW7I", "annotations": [{"segment": [68.07808563100478, 170.04912936127286], "label": "Assembling bicycle"}]}, "lhdro5SnxU0": {"duration": 227.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lhdro5SnxU0", "annotations": [{"segment": [48.59541660033748, 207.50597424336976], "label": "Cleaning sink"}]}, "8TDYCXqSHCw": {"duration": 22.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8TDYCXqSHCw", "annotations": [{"segment": [0.46055925429017164, 18.13894909204368], "label": "Clean and jerk"}]}, "0gLAhptj34w": {"duration": 91.65, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0gLAhptj34w", "annotations": [{"segment": [0.2, 88.22810869603207], "label": "Table soccer"}]}, "jBKlpn2mE8I": {"duration": 209.47, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=jBKlpn2mE8I", "annotations": [{"segment": [7.514326053042121, 81.02403744149765], "label": "Brushing hair"}, {"segment": [120.89418096723868, 183.30171762870515], "label": "Brushing hair"}, {"segment": [200.618992199688, 209.241], "label": "Brushing hair"}]}, "1PNjVTM0Zto": {"duration": 232.13, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=1PNjVTM0Zto", "annotations": []}, "ZazjI26IQUo": {"duration": 67.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZazjI26IQUo", "annotations": [{"segment": [3.5811503507653057, 66.79747688137755], "label": "Getting a tattoo"}]}, "Db9VWkgMpbE": {"duration": 115.43, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Db9VWkgMpbE", "annotations": []}, "NogC2IkaaAI": {"duration": 14.536, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NogC2IkaaAI", "annotations": [{"segment": [0.01, 8.798335819032761], "label": "Sumo"}]}, "2cc7BoZHEJI": {"duration": 88.38, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2cc7BoZHEJI", "annotations": [{"segment": [48.761790000000005, 84.67669364820593], "label": "Playing water polo"}, {"segment": [0.4822919898595944, 15.915635665366615], "label": "Playing water polo"}]}, "tx5o8t-JJ_M": {"duration": 29.77, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tx5o8t-JJ_M", "annotations": []}, "UB2GzjNzo3M": {"duration": 237.332, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UB2GzjNzo3M", "annotations": [{"segment": [0.01, 196.60310643446147], "label": "BMX"}, {"segment": [220.66940006579856, 237.33068], "label": "BMX"}]}, "xbcP38aF5Ok": {"duration": 176.89, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xbcP38aF5Ok", "annotations": [{"segment": [0, 42.80454880109204], "label": "Tumbling"}]}, "MDucYea4ie8": {"duration": 80.016, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MDucYea4ie8", "annotations": [{"segment": [2.6838222748663685, 80.0153524739229], "label": "Riding bumper cars"}]}, "nXpIrzQnprs": {"duration": 31.12, "subset": "testing", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=nXpIrzQnprs", "annotations": []}, "V8AkmiL0o70": {"duration": 168.554, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V8AkmiL0o70", "annotations": []}, "IlzwpnsIRZM": {"duration": 33.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IlzwpnsIRZM", "annotations": [{"segment": [0.01, 22.81713643525741], "label": "Ping-pong"}]}, "6XINT0LDdz8": {"duration": 39.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6XINT0LDdz8", "annotations": [{"segment": [1.2954914087363494, 37.75432105460218], "label": "Using parallel bars"}]}, "DepG0r3JiV4": {"duration": 76.07, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=DepG0r3JiV4", "annotations": [{"segment": [11.088963761310453, 28.04855539625585], "label": "Playing squash"}]}, "4miOTutCPkE": {"duration": 61.742, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=4miOTutCPkE", "annotations": []}, "3vtYXzE4i1M": {"duration": 41.169, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3vtYXzE4i1M", "annotations": []}, "k85EQoiLckw": {"duration": 80.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k85EQoiLckw", "annotations": [{"segment": [47.03138338845553, 76.5510814726989], "label": "Polishing shoes"}]}, "j8lH0saRXl4": {"duration": 91.301, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j8lH0saRXl4", "annotations": [{"segment": [0.01, 81.54406132707894], "label": "Playing rubik cube"}]}, "XHQqBdlu4wE": {"duration": 159.986, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XHQqBdlu4wE", "annotations": []}, "2PCZkpF1_wU": {"duration": 169.553, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2PCZkpF1_wU", "annotations": [{"segment": [57.66358771901896, 152.35883561934477], "label": "Making a cake"}]}, "F9Wv_Lxe_QM": {"duration": 43.05, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=F9Wv_Lxe_QM", "annotations": [{"segment": [14.036516296411856, 43.049795918367344], "label": "Bungee jumping"}]}, "Nk4p-ah_PkA": {"duration": 63.18, "subset": "training", "resolution": "240x176", "url": "https://www.youtube.com/watch?v=Nk4p-ah_PkA", "annotations": [{"segment": [0.2, 62.98151927437642], "label": "Playing kickball"}]}, "KDzAXuav8Ag": {"duration": 206.449, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KDzAXuav8Ag", "annotations": []}, "n3wYcW4mnrs": {"duration": 60.442, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n3wYcW4mnrs", "annotations": [{"segment": [5.2803843201347105, 37.90561601239559], "label": "River tubing"}, {"segment": [55.34974191419657, 60.241541], "label": "River tubing"}]}, "A0R3KFXD_BU": {"duration": 46.673, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A0R3KFXD_BU", "annotations": [{"segment": [1.8918346333853353, 46.05889703588144], "label": "Playing blackjack"}]}, "AU3_oU1_rfA": {"duration": 103.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AU3_oU1_rfA", "annotations": [{"segment": [47.245670522620905, 66.79003337129485], "label": "Shaving"}, {"segment": [15.587033180967238, 24.955405455538223], "label": "Shaving"}, {"segment": [74.54316904680186, 81.00411544305771], "label": "Shaving"}]}, "v4KygkYPdp0": {"duration": 193.237, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=v4KygkYPdp0", "annotations": [{"segment": [11.154054691107644, 163.0903672402496], "label": "Playing congas"}]}, "9A9_sNvJ8zQ": {"duration": 230.644, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9A9_sNvJ8zQ", "annotations": [{"segment": [44.97733287831513, 131.3338120046802], "label": "Using the rowing machine"}, {"segment": [205.81627525117005, 212.65282984867395], "label": "Using the rowing machine"}]}, "xFIfGrhYpAg": {"duration": 33.693, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xFIfGrhYpAg", "annotations": [{"segment": [0.2, 33.361], "label": "Playing ice hockey"}]}, "fs2per3zoZQ": {"duration": 105.907, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fs2per3zoZQ", "annotations": [{"segment": [13.036971918876755, 102.3348205928237], "label": "Carving jack-o-lanterns"}]}, "Yfg0bZbxjVg": {"duration": 229.25, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Yfg0bZbxjVg", "annotations": [{"segment": [109.08180457830558, 185.26024414664587], "label": "Bathing dog"}]}, "zXHyFBrj5Ag": {"duration": 27.15, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zXHyFBrj5Ag", "annotations": []}, "M7MDkkGdkas": {"duration": 51.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M7MDkkGdkas", "annotations": [{"segment": [16.22558892355694, 37.93983385335413], "label": "Washing hands"}]}, "ZkkTMR21XLc": {"duration": 116.077, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZkkTMR21XLc", "annotations": [{"segment": [24.801274570982837, 106.08428393135725], "label": "Rafting"}]}, "yCpdDJAW-jU": {"duration": 245.807, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yCpdDJAW-jU", "annotations": []}, "fsyMiHRW3z4": {"duration": 54.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fsyMiHRW3z4", "annotations": [{"segment": [25.858497932137286, 42.64753900390016], "label": "Hopscotch"}]}, "8DZj0ggPr74": {"duration": 36.108, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8DZj0ggPr74", "annotations": [{"segment": [7.322798190327613, 34.98044366302652], "label": "Playing ice hockey"}]}, "AQnp2QAnwI0": {"duration": 198.02, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AQnp2QAnwI0", "annotations": []}, "TbxVdELEiO8": {"duration": 137.532, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TbxVdELEiO8", "annotations": [{"segment": [19.464060524579196, 131.9706257098467], "label": "Mooping floor"}]}, "-wOaPhSf6OE": {"duration": 203.31, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-wOaPhSf6OE", "annotations": []}, "qKp2QCheLwU": {"duration": 131.867, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qKp2QCheLwU", "annotations": []}, "azfkn6EsuJA": {"duration": 16.533, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=azfkn6EsuJA", "annotations": [{"segment": [0.3610776474258971, 15.681086402496101], "label": "Waxing skis"}]}, "OfvVyonfqvg": {"duration": 34.62, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=OfvVyonfqvg", "annotations": []}, "UNJTM62HyqQ": {"duration": 139.07, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UNJTM62HyqQ", "annotations": []}, "P6Z-7k-erfc": {"duration": 67.106, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P6Z-7k-erfc", "annotations": [{"segment": [12.143927607444432, 22.298763624014345], "label": "Carving jack-o-lanterns"}, {"segment": [25.753501650270092, 31.197331267400354], "label": "Carving jack-o-lanterns"}, {"segment": [32.55828867168292, 41.24747825287161], "label": "Carving jack-o-lanterns"}, {"segment": [47.00537496329785, 54.64767265834633], "label": "Carving jack-o-lanterns"}]}, "F9mk2Dn2tFw": {"duration": 40.52, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=F9mk2Dn2tFw", "annotations": []}, "QOsG3BeQeX0": {"duration": 12.33, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QOsG3BeQeX0", "annotations": []}, "l5ZUUChppHU": {"duration": 24.36, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l5ZUUChppHU", "annotations": []}, "PwMDJapSo70": {"duration": 85.496, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=PwMDJapSo70", "annotations": [{"segment": [0.01, 85.441], "label": "Clipping cat claws"}]}, "XXfoSppjX1o": {"duration": 223.539, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XXfoSppjX1o", "annotations": []}, "Z_F01eV0Iqw": {"duration": 228.391, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z_F01eV0Iqw", "annotations": [{"segment": [57.25770567745651, 187.97008835863846], "label": "Skiing"}]}, "RgzbNJPchqc": {"duration": 69.24, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RgzbNJPchqc", "annotations": [{"segment": [6.45741566, 37.06781165522621], "label": "Playing bagpipes"}]}, "HrWr7FmBYn4": {"duration": 182.277, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HrWr7FmBYn4", "annotations": [{"segment": [79.89036037441498, 164.04533073322935], "label": "Blow-drying hair"}]}, "LACH47i14lY": {"duration": 101.82, "subset": "validation", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=LACH47i14lY", "annotations": [{"segment": [12.434032779210463, 23.377250190785684], "label": "Futsal"}, {"segment": [36.065038494061305, 56.68269448688418], "label": "Futsal"}, {"segment": [65.56414629917711, 96.01483715788821], "label": "Futsal"}]}, "f-aGlEjgZoE": {"duration": 235.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f-aGlEjgZoE", "annotations": [{"segment": [126.50664770536399, 136.4359485427036], "label": "Putting on makeup"}, {"segment": [180.56617448643524, 192.334234738097], "label": "Putting on makeup"}]}, "81k4vwur1Gk": {"duration": 171.48, "subset": "validation", "resolution": "478x360", "url": "https://www.youtube.com/watch?v=81k4vwur1Gk", "annotations": [{"segment": [2.6751850858034323, 8.560592274570983], "label": "Drinking coffee"}, {"segment": [138.0395504274571, 141.2497725304212], "label": "Drinking coffee"}, {"segment": [152.75306839937596, 171.27936400000002], "label": "Drinking coffee"}]}, "X4l1wbSYQFo": {"duration": 101.797, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=X4l1wbSYQFo", "annotations": [{"segment": [61.30173790951638, 87.02941029641187], "label": "Applying sunscreen"}]}, "f6j6lb0AaxM": {"duration": 134.56, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f6j6lb0AaxM", "annotations": [{"segment": [1.8884695787831514, 18.884695787831514], "label": "Playing badminton"}, {"segment": [26.018914196567863, 44.4905647425897], "label": "Playing badminton"}, {"segment": [50.99626053042122, 57.08223400936037], "label": "Playing badminton"}, {"segment": [64.00765210608424, 131.79280499219968], "label": "Playing badminton"}]}, "J76bFZWXHFY": {"duration": 195.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J76bFZWXHFY", "annotations": [{"segment": [0.01, 16.761528861154446], "label": "Rock climbing"}, {"segment": [24.07565054602184, 185.59583775351015], "label": "Rock climbing"}]}, "9-XGzUTSsuI": {"duration": 91.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9-XGzUTSsuI", "annotations": [{"segment": [5.8499839399999995, 80.96916926677066], "label": "Doing kickboxing"}]}, "0LebHO3TvzE": {"duration": 18.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0LebHO3TvzE", "annotations": [{"segment": [12.590394336973478, 18.321194517940718], "label": "Kayaking"}]}, "bLFudA2DtFs": {"duration": 40.64, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bLFudA2DtFs", "annotations": []}, "PaAeG07keJE": {"duration": 142.014, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PaAeG07keJE", "annotations": []}, "6NqS3vYvf6Q": {"duration": 154.9, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=6NqS3vYvf6Q", "annotations": [{"segment": [110.6776048798752, 146.68407458970358], "label": "Horseback riding"}]}, "N4wKdmmX0lk": {"duration": 38.824, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N4wKdmmX0lk", "annotations": [{"segment": [0.20000000000000004, 38.6237641723356], "label": "Powerbocking"}]}, "Uae2MWoldTU": {"duration": 202.989, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Uae2MWoldTU", "annotations": []}, "BydyVDyq_mE": {"duration": 32.44, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=BydyVDyq_mE", "annotations": []}, "UKhH_iEifrU": {"duration": 187.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UKhH_iEifrU", "annotations": [{"segment": [5.275002069470534, 179.64312430421217], "label": "Making a lemonade"}]}, "NpKBAtQ5M6I": {"duration": 173.76, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NpKBAtQ5M6I", "annotations": [{"segment": [27.670483619344772, 155.44301092043682], "label": "Fixing bicycle"}]}, "Xpb1Lup1GAU": {"duration": 143.732, "subset": "training", "resolution": "810x1440", "url": "https://www.youtube.com/watch?v=Xpb1Lup1GAU", "annotations": [{"segment": [0.2, 72.8716533285813], "label": "Knitting"}, {"segment": [102.15443563797992, 106.40155579289437], "label": "Knitting"}]}, "2ZoraV9ts0c": {"duration": 21.67, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2ZoraV9ts0c", "annotations": []}, "Aqx7rM9gZgg": {"duration": 102.35, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Aqx7rM9gZgg", "annotations": [{"segment": [17.076932917316697, 98.63125741029641], "label": "Grooming horse"}]}, "wdecG9VSAl0": {"duration": 139.761, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wdecG9VSAl0", "annotations": [{"segment": [0.20000000000000004, 16.025626625065], "label": "Canoeing"}, {"segment": [111.0892076700468, 126.35170921450859], "label": "Canoeing"}]}, "GxHHcl962mU": {"duration": 131.031, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GxHHcl962mU", "annotations": []}, "GkPmeXaNY0s": {"duration": 108.229, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GkPmeXaNY0s", "annotations": [{"segment": [68.04363193847483, 94.21425819968799], "label": "Applying sunscreen"}]}, "Z8xxgFpK-NM": {"duration": 146.378, "subset": "training", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=Z8xxgFpK-NM", "annotations": [{"segment": [9.132043681747271, 138.57876287051482], "label": "Capoeira"}]}, "AH4v5vqsUlc": {"duration": 56.332, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=AH4v5vqsUlc", "annotations": [{"segment": [25.032425897035882, 34.34273166926677], "label": "Fun sliding down"}]}, "D6HdoIsLMFg": {"duration": 80.852, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D6HdoIsLMFg", "annotations": []}, "du3da-_eL-Q": {"duration": 162.122, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=du3da-_eL-Q", "annotations": []}, "VwclmKWo_-M": {"duration": 120.91, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VwclmKWo_-M", "annotations": [{"segment": [58.79725798955661, 113.40082496774976], "label": "Making a sandwich"}]}, "NcjQI0avKHE": {"duration": 220.544, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NcjQI0avKHE", "annotations": [{"segment": [15.307791731669267, 23.563679407176288], "label": "Snowboarding"}, {"segment": [46.26737051482059, 96.1466918876755], "label": "Snowboarding"}, {"segment": [119.53837363494539, 151.5299383775351], "label": "Snowboarding"}, {"segment": [173.20164352574102, 220.521], "label": "Snowboarding"}]}, "Qpo9dRh1-tI": {"duration": 131.04, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Qpo9dRh1-tI", "annotations": []}, "Gs79qz286QE": {"duration": 111.851, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gs79qz286QE", "annotations": []}, "NbmseYFNIlA": {"duration": 49.13, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=NbmseYFNIlA", "annotations": [{"segment": [8.086702225429018, 42.579744893135725], "label": "Triple jump"}]}, "kuPWb9E4aUQ": {"duration": 173.221, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kuPWb9E4aUQ", "annotations": [{"segment": [15.379406192383493, 130.32023141967068], "label": "Scuba diving"}]}, "Nj_fvFbf56I": {"duration": 32.86, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=Nj_fvFbf56I", "annotations": [{"segment": [0.42338113065323874, 32.51194611196165], "label": "Hopscotch"}]}, "suwVnxHZtI8": {"duration": 103.654, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=suwVnxHZtI8", "annotations": [{"segment": [3.3958368620459107, 101.06657176287051], "label": "Snow tubing"}]}, "ULwdDmQ8Z_8": {"duration": 122.834, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ULwdDmQ8Z_8", "annotations": [{"segment": [22.270757718113916, 122.7187615732541], "label": "Waterskiing"}]}, "-1CEVKeAyA8": {"duration": 202.386, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-1CEVKeAyA8", "annotations": [{"segment": [20.522658588143525, 179.33646273946957], "label": "Knitting"}]}, "iJPPWotXvV0": {"duration": 35.46, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=iJPPWotXvV0", "annotations": [{"segment": [4.25924947425897, 25.83207148673947], "label": "Clean and jerk"}]}, "tlgEi4bU9Fc": {"duration": 86.75, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tlgEi4bU9Fc", "annotations": [{"segment": [1.082680174726989, 21.112263407176286], "label": "Removing curlers"}]}, "dn1qrAHh7k0": {"duration": 105.7, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dn1qrAHh7k0", "annotations": [{"segment": [29.021392873634948, 105.20254916692667], "label": "Making a sandwich"}]}, "63HZk1SInLk": {"duration": 110.388, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=63HZk1SInLk", "annotations": [{"segment": [40.98637149295496, 104.3602568265996], "label": "Making a cake"}]}, "rDYeRbucGt4": {"duration": 82.803, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=rDYeRbucGt4", "annotations": [{"segment": [0.01, 82.741], "label": "Kneeling"}]}, "G8dCenteoT0": {"duration": 27.052, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=G8dCenteoT0", "annotations": [{"segment": [4.459458455330032, 11.443139683134786], "label": "Putting in contact lenses"}]}, "bOp6pObPeZ4": {"duration": 154.37, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=bOp6pObPeZ4", "annotations": [{"segment": [10.952297191887675, 154.295], "label": "Tango"}]}, "-2VzSMAdzl4": {"duration": 223.445, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-2VzSMAdzl4", "annotations": [{"segment": [36.23953822152886, 42.511765990639624], "label": "Waterskiing"}, {"segment": [45.29942277691107, 99.31027301092044], "label": "Waterskiing"}, {"segment": [111.85472854914195, 210.8354212168487], "label": "Waterskiing"}]}, "2jpnkf-xgf0": {"duration": 161.681, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2jpnkf-xgf0", "annotations": [{"segment": [9.832198127925118, 141.9541700468019], "label": "Baking cookies"}]}, "zPV8s8ZuLBY": {"duration": 185.78, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=zPV8s8ZuLBY", "annotations": [{"segment": [7.535655031643443, 53.61908387900141], "label": "Playing harmonica"}, {"segment": [58.5462429381529, 60.575073138979974], "label": "Playing harmonica"}, {"segment": [64.34290065480171, 69.84972548561805], "label": "Playing harmonica"}, {"segment": [93.3261892380457, 100.28217849802427], "label": "Playing harmonica"}, {"segment": [169.26240366926677, 182.30488340405617], "label": "Playing harmonica"}]}, "ZGnnFIDJjaY": {"duration": 142.269, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZGnnFIDJjaY", "annotations": [{"segment": [11.759302652106085, 27.956078003120126], "label": "Applying sunscreen"}, {"segment": [40.38100156006241, 57.68714508580343], "label": "Applying sunscreen"}, {"segment": [78.76514040561624, 126.02422464898596], "label": "Applying sunscreen"}]}, "KE2tDW9x9d8": {"duration": 203.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KE2tDW9x9d8", "annotations": [{"segment": [69.0588287450858, 190.07017085803432], "label": "Making a sandwich"}]}, "AhFc_MuiZIc": {"duration": 83.871, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AhFc_MuiZIc", "annotations": []}, "0z_dyY_89ys": {"duration": 96.43, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=0z_dyY_89ys", "annotations": [{"segment": [66.19359931357255, 82.89243914040561], "label": "Cleaning windows"}]}, "3ROld-bSgPQ": {"duration": 84.568, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3ROld-bSgPQ", "annotations": [{"segment": [0.2, 27.092888603027895], "label": "Having an ice cream"}, {"segment": [40.90237168086245, 52.344514802496796], "label": "Having an ice cream"}, {"segment": [70.7571589062762, 80.48955650398818], "label": "Having an ice cream"}]}, "lU6DMCif3eE": {"duration": 129.82, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lU6DMCif3eE", "annotations": []}, "MjRsR_7ECi8": {"duration": 30.14, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MjRsR_7ECi8", "annotations": [{"segment": [16.572340258268554, 19.798896333034907], "label": "Gargling mouthwash"}]}, "huC1ZNVVmM0": {"duration": 148.515, "subset": "testing", "resolution": "320x208", "url": "https://www.youtube.com/watch?v=huC1ZNVVmM0", "annotations": []}, "kWPvAxLzGw8": {"duration": 206.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kWPvAxLzGw8", "annotations": [{"segment": [51.48026538748625, 67.61827961553837], "label": "Getting a tattoo"}]}, "Z7ZODw0C_hY": {"duration": 226.697, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Z7ZODw0C_hY", "annotations": [{"segment": [65.7807512948518, 120.2443840873635], "label": "Trimming branches or hedges"}, {"segment": [137.22006184087363, 202.29349322932916], "label": "Trimming branches or hedges"}]}, "YRp5nE0Hlsc": {"duration": 161.01, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YRp5nE0Hlsc", "annotations": [{"segment": [12.810242499495898, 134.13077440648647], "label": "Ballet"}]}, "EIibo7aTpys": {"duration": 170.81, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EIibo7aTpys", "annotations": [{"segment": [23.715651777091495, 163.34488246468635], "label": "Doing kickboxing"}]}, "VEihQG2UWKE": {"duration": 125.202, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VEihQG2UWKE", "annotations": [{"segment": [0.2, 125.0019954648526], "label": "Kite flying"}]}, "ASXqlsSfZ5E": {"duration": 21.734, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ASXqlsSfZ5E", "annotations": [{"segment": [0.2, 21.53387755102041], "label": "Powerbocking"}]}, "KTq7Gblfl-g": {"duration": 68.894, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KTq7Gblfl-g", "annotations": []}, "mhU_KzxJyr4": {"duration": 56.193, "subset": "training", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=mhU_KzxJyr4", "annotations": [{"segment": [30.28773264692533, 51.940612426348075], "label": "Camel ride"}]}, "bXp_reTHyHY": {"duration": 101.94, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bXp_reTHyHY", "annotations": [{"segment": [7.3151913287415855, 93.6662524352574], "label": "Grooming horse"}]}, "H0l29-F7Edg": {"duration": 40.868, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H0l29-F7Edg", "annotations": [{"segment": [0.2, 9.818309400936037], "label": "Laying tile"}]}, "5l2_BVOqQX8": {"duration": 219.336, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5l2_BVOqQX8", "annotations": [{"segment": [40.02476911076443, 148.81003900156009], "label": "Using the monkey bar"}]}, "jprf0pE-4uI": {"duration": 88.979, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jprf0pE-4uI", "annotations": [{"segment": [13.603546541861675, 88.77829931972789], "label": "Blowing leaves"}]}, "opxLlaEPxqw": {"duration": 117.03, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=opxLlaEPxqw", "annotations": [{"segment": [0, 88.89155538221529], "label": "Javelin throw"}]}, "67FrPviS7ws": {"duration": 135.628, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=67FrPviS7ws", "annotations": []}, "g9XLOpSmr3o": {"duration": 135.42, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=g9XLOpSmr3o", "annotations": []}, "zwm-v9MLOFY": {"duration": 224.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zwm-v9MLOFY", "annotations": [{"segment": [22.61013609828393, 190.87161403900157], "label": "Breakdancing"}]}, "NRaKsY2xtqU": {"duration": 13.909, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NRaKsY2xtqU", "annotations": []}, "FIzlf7jdsUY": {"duration": 149.61, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FIzlf7jdsUY", "annotations": [{"segment": [82.62181398439937, 104.32754477691107], "label": "Removing curlers"}]}, "Isx1Q5wyJZg": {"duration": 215.83, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Isx1Q5wyJZg", "annotations": [{"segment": [0.2, 17.508787644022767], "label": "Rope skipping"}, {"segment": [31.313793286425334, 52.52636293206831], "label": "Rope skipping"}, {"segment": [63.30100148223616, 133.67285951301997], "label": "Rope skipping"}, {"segment": [145.45762042726608, 157.5790887962049], "label": "Rope skipping"}, {"segment": [184.51568517162457, 194.95361585335414], "label": "Rope skipping"}]}, "BUk-fR6TAnk": {"duration": 184.92, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BUk-fR6TAnk", "annotations": [{"segment": [45.870314028887684, 166.7486870920437], "label": "Wrapping presents"}]}, "J3coWVRd5do": {"duration": 44.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J3coWVRd5do", "annotations": [{"segment": [7.506012074882996, 34.63783553822153], "label": "High jump"}]}, "1sTTv-XC-RA": {"duration": 93.46000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1sTTv-XC-RA", "annotations": [{"segment": [2.3654726, 19.6181886], "label": "Kayaking"}, {"segment": [24.106137799999996, 77.13029536817473], "label": "Kayaking"}]}, "NS8qkB39x9U": {"duration": 64.04, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NS8qkB39x9U", "annotations": [{"segment": [0.01, 64.040633], "label": "Removing curlers"}]}, "ANuV_fDgI54": {"duration": 223.35, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ANuV_fDgI54", "annotations": [{"segment": [7.665772796898271, 220.21674580180488], "label": "Pole vault"}]}, "DvIng_zQPyY": {"duration": 199.95, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DvIng_zQPyY", "annotations": [{"segment": [4.5222808112324495, 165.45310140405618], "label": "Dodgeball"}]}, "i-VlZZWf7pY": {"duration": 125.365, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=i-VlZZWf7pY", "annotations": []}, "EYIYohKR0Qo": {"duration": 108.623, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=EYIYohKR0Qo", "annotations": [{"segment": [11.184266080847316, 71.51151827145087], "label": "Swimming"}, {"segment": [81.1706569953198, 106.7589016848674], "label": "Swimming"}]}, "Oj_PLXsWxL4": {"duration": 171.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Oj_PLXsWxL4", "annotations": [{"segment": [0, 171.3631746031746], "label": "Clean and jerk"}]}, "9mF5s6_dTlk": {"duration": 130.357, "subset": "training", "resolution": "544x360", "url": "https://www.youtube.com/watch?v=9mF5s6_dTlk", "annotations": [{"segment": [0.01, 130.301], "label": "Waxing skis"}]}, "5-EESwowW1Y": {"duration": 67.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5-EESwowW1Y", "annotations": []}, "I3BWhaDRxGk": {"duration": 34.947, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I3BWhaDRxGk", "annotations": [{"segment": [0.01, 32.82138855651903], "label": "Tug of war"}]}, "KYtV2vpwuVw": {"duration": 98.709, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KYtV2vpwuVw", "annotations": [{"segment": [41.243987519500784, 72.64013728549142], "label": "BMX"}]}, "yqkjHCPsdF0": {"duration": 40.57, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yqkjHCPsdF0", "annotations": [{"segment": [0, 40.565237], "label": "Playing water polo"}]}, "800KXmqsK-w": {"duration": 184.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=800KXmqsK-w", "annotations": [{"segment": [15.524452418096725, 184.261], "label": "Riding bumper cars"}]}, "Jb8YAhQ6EMk": {"duration": 54.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Jb8YAhQ6EMk", "annotations": [{"segment": [1.0981843109204368, 49.8406725725429], "label": "Washing dishes"}]}, "KmhzUY2ooCw": {"duration": 97.385, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KmhzUY2ooCw", "annotations": [{"segment": [13.990528992162686, 26.423491608985117], "label": "Croquet"}, {"segment": [40.52429067440568, 43.10185609496642], "label": "Croquet"}, {"segment": [52.66482876561958, 56.75945026094669], "label": "Croquet"}, {"segment": [80.41726334505883, 87.5449377998875], "label": "Croquet"}]}, "YIT4kdS5IVQ": {"duration": 201.386, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YIT4kdS5IVQ", "annotations": []}, "Hi0L9rcsXUI": {"duration": 30.0, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Hi0L9rcsXUI", "annotations": [{"segment": [10.106262878629739, 26.087667177695156], "label": "Using uneven bars"}]}, "-Sp-gOEGy2o": {"duration": 229.34, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-Sp-gOEGy2o", "annotations": [{"segment": [40.78052106084243, 210.69935881435256], "label": "Starting a campfire"}]}, "hjuvoK5En4s": {"duration": 12.028, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=hjuvoK5En4s", "annotations": [{"segment": [0.19999999999999998, 11.781], "label": "Swinging at the playground"}]}, "o8PA6XW32oI": {"duration": 31.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o8PA6XW32oI", "annotations": [{"segment": [0.01, 29.583887314352573], "label": "Playing rubik cube"}]}, "UFmKIOTYWNs": {"duration": 226.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UFmKIOTYWNs", "annotations": []}, "or7N8oeI-SY": {"duration": 218.9, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=or7N8oeI-SY", "annotations": [{"segment": [49.84840873634945, 116.08533541341653], "label": "Cleaning shoes"}]}, "ntS2PA5YWuA": {"duration": 139.088, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ntS2PA5YWuA", "annotations": [{"segment": [18.842004672897193, 132.5437570093458], "label": "Changing car wheel"}]}, "bHgNCJohwSg": {"duration": 41.75, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bHgNCJohwSg", "annotations": []}, "6RePzOd3GvQ": {"duration": 13.52, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=6RePzOd3GvQ", "annotations": [{"segment": [5.045960998439937, 6.2603416536661465], "label": "Brushing teeth"}]}, "f119wB0sj0w": {"duration": 114.64, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=f119wB0sj0w", "annotations": [{"segment": [34.2131142, 82.30520892], "label": "Cricket"}, {"segment": [34.2131142, 82.30520892], "label": "Cricket"}]}, "XEriJg8cW4g": {"duration": 74.78999999999999, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=XEriJg8cW4g", "annotations": [{"segment": [60.14454404681586, 72.37679414027382], "label": "Playing bagpipes"}]}, "YzPsXYD1OgQ": {"duration": 85.821, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YzPsXYD1OgQ", "annotations": []}, "8Qg395HjqFg": {"duration": 165.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8Qg395HjqFg", "annotations": [{"segment": [23.934575015653685, 73.61498971528862], "label": "Making a sandwich"}]}, "tRAntV9Om2o": {"duration": 127.757, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tRAntV9Om2o", "annotations": [{"segment": [42.05391727777955, 122.17559829594383], "label": "Cutting the grass"}]}, "g_T8qK7qTFI": {"duration": 64.34, "subset": "training", "resolution": "400x320", "url": "https://www.youtube.com/watch?v=g_T8qK7qTFI", "annotations": [{"segment": [51.19293514820593, 63.53946788075606], "label": "Cricket"}]}, "r1ZhBGLfRWY": {"duration": 14.258, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r1ZhBGLfRWY", "annotations": [{"segment": [7.94246177847114, 8.408360374414976], "label": "Hitting a pinata"}]}, "IfiAeg5elK4": {"duration": 128.57, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IfiAeg5elK4", "annotations": []}, "LlhgIOYsxsE": {"duration": 74.978, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LlhgIOYsxsE", "annotations": [{"segment": [15.140186427457099, 26.0130616224649], "label": "Using the rowing machine"}, {"segment": [28.818964898595947, 32.20943135725429], "label": "Using the rowing machine"}, {"segment": [35.95063572542902, 38.28888845553822], "label": "Using the rowing machine"}, {"segment": [40.39331591263651, 42.497743369734785], "label": "Using the rowing machine"}]}, "e4mXFJzcWLs": {"duration": 104.165, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=e4mXFJzcWLs", "annotations": []}, "Vm07Uu4_iCE": {"duration": 68.314, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vm07Uu4_iCE", "annotations": [{"segment": [0.01, 67.78024303012936], "label": "Changing car wheel"}]}, "S6Sg1l78IW0": {"duration": 95.34, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=S6Sg1l78IW0", "annotations": []}, "Sk1RSHPavBU": {"duration": 9.869, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Sk1RSHPavBU", "annotations": [{"segment": [0.009999999999999998, 9.8684806], "label": "Beach soccer"}]}, "1Y3BV0Awjuo": {"duration": 111.294, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1Y3BV0Awjuo", "annotations": [{"segment": [10.591077822704744, 109.55688575351014], "label": "Peeling potatoes"}]}, "tgPjcWP7Vig": {"duration": 174.36, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tgPjcWP7Vig", "annotations": [{"segment": [0, 146.70076575], "label": "Spinning"}]}, "or8ypNphr1A": {"duration": 17.74, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=or8ypNphr1A", "annotations": []}, "r9eXOf4hvCE": {"duration": 68.15, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r9eXOf4hvCE", "annotations": [{"segment": [0.6905954204368175, 65.92530128939158], "label": "Using parallel bars"}]}, "2wC0d2XUses": {"duration": 30.906, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2wC0d2XUses", "annotations": [{"segment": [0.01, 30.905668934240364], "label": "Shuffleboard"}]}, "GEiTdrt_boA": {"duration": 167.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GEiTdrt_boA", "annotations": []}, "SCM5-6hQCGY": {"duration": 120.024, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=SCM5-6hQCGY", "annotations": [{"segment": [2.24693813875004, 117.9642522843771], "label": "Building sandcastles"}]}, "dhISoJW6Ii0": {"duration": 60.93, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dhISoJW6Ii0", "annotations": []}, "e51ld7ANyQg": {"duration": 167.858, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e51ld7ANyQg", "annotations": [{"segment": [41.869828393135734, 57.04764118564743], "label": "Curling"}, {"segment": [85.57146177847115, 121.68418876755071], "label": "Curling"}, {"segment": [133.4759906396256, 137.4017550702028], "label": "Curling"}]}, "gRzHp84ptgE": {"duration": 99.335, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gRzHp84ptgE", "annotations": [{"segment": [31.923386715060442, 76.24420380655226], "label": "Surfing"}]}, "G2soQTiGL10": {"duration": 204.96, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=G2soQTiGL10", "annotations": [{"segment": [61.38427774479469, 194.8334068475984], "label": "Preparing pasta"}]}, "nyg5Q5E2-AU": {"duration": 129.614, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nyg5Q5E2-AU", "annotations": []}, "rob5gIT_pAw": {"duration": 60.976, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rob5gIT_pAw", "annotations": [{"segment": [0.2, 4.267343904837187], "label": "Welding"}, {"segment": [26.078219470584518, 27.974817355498985], "label": "Welding"}, {"segment": [44.75970863699198, 51.87195070542122], "label": "Welding"}]}, "t97xM9sY2yg": {"duration": 130.73, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=t97xM9sY2yg", "annotations": [{"segment": [15.527915604243768, 118.33789223975778], "label": "Preparing pasta"}]}, "QHJTOHgjDMw": {"duration": 96.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QHJTOHgjDMw", "annotations": [{"segment": [19.04608202028081, 43.491053432137285], "label": "Playing piano"}, {"segment": [49.03991197347894, 87.88192176287052], "label": "Playing piano"}]}, "SWEWGVoDiz4": {"duration": 176.17000000000002, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SWEWGVoDiz4", "annotations": []}, "fgP2pf2rh4Q": {"duration": 235.52, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fgP2pf2rh4Q", "annotations": [{"segment": [42.25397815912637, 235.52], "label": "Smoking a cigarette"}]}, "fd7VuzALBCM": {"duration": 81.201, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fd7VuzALBCM", "annotations": [{"segment": [16.460109204368173, 60.030879875195005], "label": "Removing ice from car"}]}, "YAPkcWlWmYg": {"duration": 222.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YAPkcWlWmYg", "annotations": [{"segment": [15.277342620904838, 219.7854063416537], "label": "Zumba"}]}, "U6S8a3WI19w": {"duration": 117.772, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U6S8a3WI19w", "annotations": [{"segment": [8.543494327528203, 110.51423307544547], "label": "Beach soccer"}]}, "mLFjww64sX8": {"duration": 47.56, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=mLFjww64sX8", "annotations": []}, "c_KU8M2YfzA": {"duration": 171.201, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c_KU8M2YfzA", "annotations": [{"segment": [29.699899783755388, 163.80267704232236], "label": "Tug of war"}]}, "nStBfiFl6RU": {"duration": 171.92000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nStBfiFl6RU", "annotations": [{"segment": [1.0728259656786272, 168.16547012012484], "label": "Zumba"}]}, "oeFUzyNgxQU": {"duration": 145.706, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oeFUzyNgxQU", "annotations": [{"segment": [0.01, 145.661], "label": "Slacklining"}]}, "k3qEvYiUthQ": {"duration": 191.054, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k3qEvYiUthQ", "annotations": []}, "Zv78Or7fW5U": {"duration": 50.85, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Zv78Or7fW5U", "annotations": [{"segment": [7.166301092043682, 17.302063962558503], "label": "Long jump"}, {"segment": [29.655024960998443, 41.928800312012484], "label": "Long jump"}]}, "FBbN6FHC7L8": {"duration": 65.9, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=FBbN6FHC7L8", "annotations": [{"segment": [3.39572394, 46.29466296], "label": "Ballet"}]}, "9XmzbuByY_E": {"duration": 49.668, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9XmzbuByY_E", "annotations": [{"segment": [6.27288767550702, 28.886260530421215], "label": "Shuffleboard"}, {"segment": [42.43879563182527, 46.465834633385334], "label": "Shuffleboard"}]}, "Ft3SzVtUsKA": {"duration": 75.09, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ft3SzVtUsKA", "annotations": [{"segment": [50.94725429017161, 73.31719812792511], "label": "Springboard diving"}]}, "xx5TLuZmjcE": {"duration": 6.85, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xx5TLuZmjcE", "annotations": [{"segment": [0.7266649734789391, 5.51410479875195], "label": "Playing squash"}]}, "cS19K2310Lc": {"duration": 205.474, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cS19K2310Lc", "annotations": [{"segment": [38.14553648812619, 179.5084070029468], "label": "Braiding hair"}]}, "gg_F8EtNMW0": {"duration": 10.12, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=gg_F8EtNMW0", "annotations": [{"segment": [0.576477868174727, 10.123899], "label": "Doing kickboxing"}]}, "r_jey4tT7zo": {"duration": 52.41, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r_jey4tT7zo", "annotations": [{"segment": [14.716597113884555, 49.627635822932916], "label": "Ballet"}]}, "R_ffZ9kGeTI": {"duration": 168.902, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R_ffZ9kGeTI", "annotations": [{"segment": [8.4157507788162, 168.861], "label": "Longboarding"}]}, "1DvNkSGk-JA": {"duration": 67.431, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1DvNkSGk-JA", "annotations": [{"segment": [0.01, 55.45813162432281], "label": "Waterskiing"}]}, "DmaPpBMsuXg": {"duration": 25.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DmaPpBMsuXg", "annotations": [{"segment": [10.185401188767552, 21.052502457098285], "label": "Playing badminton"}]}, "sXdKLXuww-U": {"duration": 184.67, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=sXdKLXuww-U", "annotations": []}, "IKhLjVIYisg": {"duration": 194.375, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=IKhLjVIYisg", "annotations": [{"segment": [26.98060686427457, 178.55704992199688], "label": "Rollerblading"}]}, "hXGE1GGTHzM": {"duration": 73.376, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hXGE1GGTHzM", "annotations": [{"segment": [1.1438533541341653, 65.10300936037441], "label": "Playing pool"}]}, "46ac33Z6V4E": {"duration": 79.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=46ac33Z6V4E", "annotations": [{"segment": [42.35996879875195, 77.61841341653667], "label": "Removing ice from car"}]}, "yrGietojx78": {"duration": 140.853, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yrGietojx78", "annotations": [{"segment": [34.915755070202806, 126.94448985959438], "label": "Cutting the grass"}]}, "4G2jW3hbiO4": {"duration": 165.153, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4G2jW3hbiO4", "annotations": [{"segment": [25.465668224299062, 89.00122429906541], "label": "Wakeboarding"}]}, "igrjxhf0XyY": {"duration": 57.679, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=igrjxhf0XyY", "annotations": [{"segment": [6.553031201248049, 44.16563494539782], "label": "Applying sunscreen"}]}, "jA-KApGlXyA": {"duration": 66.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jA-KApGlXyA", "annotations": [{"segment": [28.518491419656783, 34.159511700468016], "label": "Playing racquetball"}, {"segment": [50.66471918876755, 59.75302964118564], "label": "Playing racquetball"}]}, "o1WPnnvs00I": {"duration": 229.86, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o1WPnnvs00I", "annotations": [{"segment": [4.303033313169262, 13.626272158369328], "label": "Playing flauta"}, {"segment": [17.92930547153859, 24.025269212168485], "label": "Playing flauta"}, {"segment": [28.68688861154446, 35.50002465678627], "label": "Playing flauta"}, {"segment": [39.085885733229325, 40.52023016380655], "label": "Playing flauta"}, {"segment": [45.89902177847114, 48.767710639625584], "label": "Playing flauta"}, {"segment": [59.16670776131045, 63.469741053042114], "label": "Playing flauta"}, {"segment": [68.1313604524181, 95.02531852574101], "label": "Playing flauta"}, {"segment": [98.96976570982838, 99.686937925117], "label": "Playing flauta"}, {"segment": [100.7626962480499, 108.65159061622464], "label": "Playing flauta"}, {"segment": [111.16169336973478, 111.52027947737909], "label": "Playing flauta"}, {"segment": [114.38896833853353, 120.84351827613104], "label": "Playing flauta"}, {"segment": [125.50513767550702, 128.73241264430575], "label": "Playing flauta"}, {"segment": [131.95968761310453, 140.56575419656784], "label": "Playing flauta"}, {"segment": [143.075856950078, 157.06071514820593], "label": "Playing flauta"}, {"segment": [158.49505957878316, 162.0809206552262], "label": "Playing flauta"}, {"segment": [164.59102340873633, 166.3839539469579], "label": "Playing flauta"}, {"segment": [168.894056700468, 171.04557334633384], "label": "Playing flauta"}, {"segment": [173.1970899921997, 190.40922315912636], "label": "Playing flauta"}, {"segment": [193.9950842355694, 194.712256450858], "label": "Playing flauta"}, {"segment": [204.7526674648986, 212.6415618330733], "label": "Playing flauta"}, {"segment": [214.43449237129485, 217.66176734009358], "label": "Playing flauta"}, {"segment": [219.81328398595943, 221.24762841653666], "label": "Playing flauta"}, {"segment": [222.68197284711388, 224.8334894929797], "label": "Playing flauta"}]}, "YJcjkeVD0qk": {"duration": 115.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YJcjkeVD0qk", "annotations": [{"segment": [16.445446177847113, 115.841], "label": "Smoking hookah"}]}, "2SMmL6kIx-w": {"duration": 231.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2SMmL6kIx-w", "annotations": [{"segment": [9.407132605304213, 221.067616224649], "label": "Ballet"}]}, "3dAJEnMn6QA": {"duration": 184.251, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3dAJEnMn6QA", "annotations": [{"segment": [0.19999999999999998, 61.48944461778472], "label": "Building sandcastles"}, {"segment": [75.28146957878315, 138.79732137285492], "label": "Building sandcastles"}]}, "uz04njTFKP8": {"duration": 45.77, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uz04njTFKP8", "annotations": [{"segment": [7.925249171606865, 8.782032865834633], "label": "Tennis serve with ball bouncing"}]}, "ncTkaZcDcTo": {"duration": 89.908, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ncTkaZcDcTo", "annotations": [{"segment": [0.19999999999999998, 2.5246910121302815], "label": "Longboarding"}, {"segment": [11.781891389941313, 18.935182590977107], "label": "Longboarding"}, {"segment": [21.880655438462437, 50.213298374414975], "label": "Longboarding"}]}, "58he7Xcwfws": {"duration": 36.22, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=58he7Xcwfws", "annotations": [{"segment": [6.131306563182527, 34.83825342121684], "label": "Clean and jerk"}]}, "oR0NlCzEvfw": {"duration": 7.71, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oR0NlCzEvfw", "annotations": []}, "9aJfBzzywJE": {"duration": 44.327, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9aJfBzzywJE", "annotations": []}, "rKSaO4xIUDY": {"duration": 53.105, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rKSaO4xIUDY", "annotations": []}, "Hm3DAsKWs8E": {"duration": 62.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Hm3DAsKWs8E", "annotations": []}, "SqjBRSIWMoA": {"duration": 215.993, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SqjBRSIWMoA", "annotations": []}, "vV_d5bfNmBQ": {"duration": 106.092, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vV_d5bfNmBQ", "annotations": [{"segment": [2.151631273414202, 96.98891722870515], "label": "Playing beach volleyball"}]}, "1P_y_DLlg-E": {"duration": 26.285, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1P_y_DLlg-E", "annotations": [{"segment": [5.781877235569422, 26.284988], "label": "Blowing leaves"}]}, "E8Ae4lFrJ18": {"duration": 111.921, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E8Ae4lFrJ18", "annotations": []}, "cMf_8UIl9uw": {"duration": 174.731, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cMf_8UIl9uw", "annotations": []}, "3QAGRGjqQZY": {"duration": 201.201, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3QAGRGjqQZY", "annotations": []}, "ECDCTq_bJwg": {"duration": 16.42, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ECDCTq_bJwg", "annotations": []}, "8W-x068F4tE": {"duration": 58.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8W-x068F4tE", "annotations": [{"segment": [7.00099375975039, 54.098588143525745], "label": "Using the pommel horse"}]}, "H6E-PA807Vo": {"duration": 84.173, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H6E-PA807Vo", "annotations": []}, "afrMWrFkgDg": {"duration": 80.736, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=afrMWrFkgDg", "annotations": [{"segment": [13.593678627145087, 14.856034321372853], "label": "Having an ice cream"}, {"segment": [25.683313572542897, 61.438514820592815], "label": "Having an ice cream"}, {"segment": [77.42763650546021, 80.50099999999999], "label": "Having an ice cream"}]}, "0gf3AgK1YLY": {"duration": 133.05, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0gf3AgK1YLY", "annotations": [{"segment": [10.3698127925117, 122.5711872074883], "label": "Putting on makeup"}]}, "XbkGlZTlixw": {"duration": 124.39, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XbkGlZTlixw", "annotations": [{"segment": [24.64499312324493, 118.37358901716068], "label": "Longboarding"}]}, "b8pCuIPzb3o": {"duration": 46.456, "subset": "validation", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=b8pCuIPzb3o", "annotations": [{"segment": [4.67998004818152, 8.59810287921721], "label": "Fun sliding down"}, {"segment": [19.19154650169095, 20.279913978157357], "label": "Fun sliding down"}, {"segment": [26.737561005191363, 32.687303209874386], "label": "Fun sliding down"}, {"segment": [37.984024928677556, 40.37843337690365], "label": "Fun sliding down"}]}, "jN2CFk0xKp8": {"duration": 32.25, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=jN2CFk0xKp8", "annotations": []}, "Lr-FutvMXoA": {"duration": 43.26, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lr-FutvMXoA", "annotations": [{"segment": [6.97362218, 42.92114094664586], "label": "Pole vault"}]}, "GWRqx0obKzA": {"duration": 64.738, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GWRqx0obKzA", "annotations": [{"segment": [8.281480240249609, 10.402347131045243], "label": "River tubing"}, {"segment": [12.624207683307333, 16.057992173166927], "label": "River tubing"}, {"segment": [21.309662569422777, 26.864313950078003], "label": "River tubing"}, {"segment": [34.84281320592824, 44.33621738377535], "label": "River tubing"}]}, "T-2mwXMMZIk": {"duration": 232.154, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=T-2mwXMMZIk", "annotations": []}, "sAAARH12tdc": {"duration": 211.6, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=sAAARH12tdc", "annotations": [{"segment": [33.01144399517477, 144.26001025891375], "label": "Putting on makeup"}, {"segment": [160.10550337659765, 176.94133981413677], "label": "Putting on makeup"}, {"segment": [181.2328275335095, 204.67095179407175], "label": "Putting on makeup"}]}, "ruHRz56XGE4": {"duration": 43.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ruHRz56XGE4", "annotations": [{"segment": [4.419290171606864, 37.597960998439945], "label": "Mowing the lawn"}]}, "x_luDzL03vw": {"duration": 229.69, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=x_luDzL03vw", "annotations": [{"segment": [12.18330867656475, 225.03287790831362], "label": "Tai chi"}]}, "kUwCHpiuCuA": {"duration": 153.415, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kUwCHpiuCuA", "annotations": [{"segment": [61.26988025210609, 86.40010457425898], "label": "Elliptical trainer"}]}, "G-xh30e4s8E": {"duration": 160.01, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=G-xh30e4s8E", "annotations": [{"segment": [6.4902127840215655, 154.01774183620407], "label": "Playing bagpipes"}]}, "6AwS8vWMwSA": {"duration": 53.964, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6AwS8vWMwSA", "annotations": [{"segment": [1.4300421216848673, 4.712474258970359], "label": "Rock-paper-scissors"}, {"segment": [11.10797503900156, 14.389876755070205], "label": "Rock-paper-scissors"}, {"segment": [21.374436817472702, 28.274845553822153], "label": "Rock-paper-scissors"}, {"segment": [39.21451794071763, 41.654906396255846], "label": "Rock-paper-scissors"}]}, "qF9gCNZO3hk": {"duration": 153.65, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qF9gCNZO3hk", "annotations": []}, "Ib1xMq2aUl0": {"duration": 193.121, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=Ib1xMq2aUl0", "annotations": [{"segment": [20.482851794071763, 175.92977223088923], "label": "Rollerblading"}]}, "4KqjBUpLQk8": {"duration": 226.349, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4KqjBUpLQk8", "annotations": [{"segment": [0.2, 198.45148220592824], "label": "Calf roping"}]}, "kDJVAE81rww": {"duration": 95.852, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kDJVAE81rww", "annotations": []}, "rHUCx_JqNk4": {"duration": 156.875, "subset": "training", "resolution": "2276x720", "url": "https://www.youtube.com/watch?v=rHUCx_JqNk4", "annotations": [{"segment": [12.726129028834622, 115.51409426172964], "label": "Braiding hair"}]}, "cDV1vDdAT1w": {"duration": 60.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cDV1vDdAT1w", "annotations": [{"segment": [4.129746817083568, 22.244317530421217], "label": "Windsurfing"}, {"segment": [24.121475127925116, 41.109751385335414], "label": "Windsurfing"}, {"segment": [43.36234050234009, 47.39822933697348], "label": "Windsurfing"}, {"segment": [49.932392093603745, 53.87442304836193], "label": "Windsurfing"}]}, "WDvsx_q2h-4": {"duration": 146.89, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WDvsx_q2h-4", "annotations": [{"segment": [27.1550668067539, 98.4227948818633], "label": "Playing violin"}, {"segment": [103.00592852013399, 128.4423192449298], "label": "Playing violin"}]}, "2vOBrR0jKgY": {"duration": 193.21, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2vOBrR0jKgY", "annotations": [{"segment": [37.37319059, 169.70211482200784], "label": "Smoking a cigarette"}]}, "A_g93uJD-_8": {"duration": 106.301, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=A_g93uJD-_8", "annotations": [{"segment": [0.01, 106.30095238095238], "label": "Making an omelette"}]}, "xZEl3yh0Cos": {"duration": 159.15, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=xZEl3yh0Cos", "annotations": [{"segment": [21.82091939, 145.13962528081123], "label": "Fixing bicycle"}]}, "VvbWwNDHJf4": {"duration": 123.786, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VvbWwNDHJf4", "annotations": [{"segment": [14.475936037441498, 115.61447581903276], "label": "Rollerblading"}]}, "kC1DyxcRHBg": {"duration": 117.586, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=kC1DyxcRHBg", "annotations": [{"segment": [4.95291314976599, 9.172061388455537], "label": "Cleaning sink"}, {"segment": [11.740238577223089, 19.07788768798752], "label": "Cleaning sink"}, {"segment": [21.09574119344774, 114.46732612792512], "label": "Cleaning sink"}]}, "CdwgIN9FkdY": {"duration": 35.806, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CdwgIN9FkdY", "annotations": [{"segment": [0.7814882995319813, 34.83204992199688], "label": "Kneeling"}]}, "4CSyAAoO18s": {"duration": 154.72, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=4CSyAAoO18s", "annotations": [{"segment": [43.92818562266301, 68.06455134940092], "label": "Washing face"}]}, "_0IkiqDouX8": {"duration": 149.931, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_0IkiqDouX8", "annotations": [{"segment": [14.501868770670827, 133.55753335569423], "label": "Surfing"}]}, "QnwlpzsfFws": {"duration": 221.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QnwlpzsfFws", "annotations": []}, "i9g2MGNUVQI": {"duration": 97.199, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=i9g2MGNUVQI", "annotations": [{"segment": [47.16330066312424, 52.75638466914759], "label": "Bullfighting"}]}, "gIwTydKpIe4": {"duration": 106.58, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gIwTydKpIe4", "annotations": [{"segment": [8.9736879875195, 9.804585023400936], "label": "Ironing clothes"}, {"segment": [60.833082683307325, 63.8248736349454], "label": "Ironing clothes"}, {"segment": [69.47603432137285, 75.45961622464898], "label": "Ironing clothes"}]}, "zb2YYXs4gtw": {"duration": 206.263, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zb2YYXs4gtw", "annotations": [{"segment": [0.01, 206.2619999999999], "label": "Camel ride"}]}, "Cm8hWFFA16I": {"duration": 209.0, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Cm8hWFFA16I", "annotations": [{"segment": [27.714881438794965, 128.14056947584027], "label": "Cleaning shoes"}]}, "8EGM3zcvjs8": {"duration": 186.74, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8EGM3zcvjs8", "annotations": [{"segment": [22.570031201248046, 180.4146365054602], "label": "Zumba"}]}, "2xgecBn6YwM": {"duration": 206.1, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2xgecBn6YwM", "annotations": []}, "485DjSbjOV8": {"duration": 148.724, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=485DjSbjOV8", "annotations": [{"segment": [0, 145.47555010296412], "label": "Playing congas"}]}, "O1WvjCFqLz0": {"duration": 63.07, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=O1WvjCFqLz0", "annotations": [{"segment": [0.09838595163806553, 34.828626879875195], "label": "Doing kickboxing"}, {"segment": [41.32209968798752, 52.439712223088925], "label": "Doing kickboxing"}]}, "nwznKOuZM7w": {"duration": 31.649, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nwznKOuZM7w", "annotations": [{"segment": [1.152541196650805, 30.01532312811498], "label": "Waxing skis"}]}, "Opqg11Nkb7c": {"duration": 192.89, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Opqg11Nkb7c", "annotations": [{"segment": [31.722582683307337, 121.32760296411857], "label": "Starting a campfire"}]}, "0dc-NU85piM": {"duration": 114.52000000000001, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0dc-NU85piM", "annotations": []}, "hW25ecQ1GUc": {"duration": 43.074, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hW25ecQ1GUc", "annotations": [{"segment": [9.777103801872075, 17.840694566302652], "label": "Putting on shoes"}]}, "-vqefJDOxkw": {"duration": 6.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-vqefJDOxkw", "annotations": [{"segment": [0.1412625585023401, 6.03662], "label": "Triple jump"}]}, "3X6eP273RoI": {"duration": 151.57999999999998, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3X6eP273RoI", "annotations": [{"segment": [12.060176227769109, 151.57986394557824], "label": "Making a sandwich"}]}, "f9pbouXN2XA": {"duration": 180.164, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=f9pbouXN2XA", "annotations": []}, "C_2EFIuyDSA": {"duration": 81.967, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=C_2EFIuyDSA", "annotations": [{"segment": [0, 81.766462585034], "label": "Playing drums"}]}, "fBlvOzfFq-k": {"duration": 88.65, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fBlvOzfFq-k", "annotations": [{"segment": [9.690577223088924, 12.874624024960998], "label": "Plataform diving"}, {"segment": [28.656421216848674, 36.68575663026521], "label": "Plataform diving"}, {"segment": [45.68414976599064, 50.667875195007795], "label": "Plataform diving"}, {"segment": [56.759095163806556, 60.08157878315132], "label": "Plataform diving"}, {"segment": [66.17279875195008, 69.49528237129485], "label": "Plataform diving"}, {"segment": [76.97087051482059, 84.58489547581904], "label": "Plataform diving"}]}, "ave_VDl3LwE": {"duration": 69.17, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ave_VDl3LwE", "annotations": [{"segment": [17.579842433697348, 22.433173166926675], "label": "Chopping wood"}, {"segment": [28.472873634945397, 32.2476864274571], "label": "Chopping wood"}]}, "cV9xeDlKJok": {"duration": 74.769, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cV9xeDlKJok", "annotations": [{"segment": [20.81418410326752, 71.9775444543183], "label": "Rollerblading"}]}, "Uru0CastJqU": {"duration": 54.61, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Uru0CastJqU", "annotations": [{"segment": [1.5326957878315135, 54.49585023400937], "label": "Springboard diving"}]}, "ARNHE8qiBgw": {"duration": 77.834, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ARNHE8qiBgw", "annotations": []}, "zLjAfrfqRcc": {"duration": 234.59, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zLjAfrfqRcc", "annotations": [{"segment": [66.6077210707476, 96.25181671212427], "label": "Sailing"}, {"segment": [129.18970075809835, 223.9776114121685], "label": "Sailing"}]}, "QBJelFFVJu0": {"duration": 77.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QBJelFFVJu0", "annotations": [{"segment": [4.332143007800312, 75.09047880187208], "label": "Playing harmonica"}]}, "s_V2crzeMfI": {"duration": 11.1, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=s_V2crzeMfI", "annotations": []}, "HGy8UgabFMw": {"duration": 63.112, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HGy8UgabFMw", "annotations": [{"segment": [0.01, 63.11163265306122], "label": "Trimming branches or hedges"}]}, "zA2zZPyYLlU": {"duration": 76.232, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zA2zZPyYLlU", "annotations": []}, "IhuwmiSPx0w": {"duration": 183.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IhuwmiSPx0w", "annotations": [{"segment": [10.025015600624025, 183.02814196567863], "label": "Cheerleading"}]}, "45WdXofnTkI": {"duration": 67.18, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=45WdXofnTkI", "annotations": [{"segment": [0.2, 66.861], "label": "Playing bagpipes"}]}, "5u-PL0wr3TU": {"duration": 12.052, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5u-PL0wr3TU", "annotations": [{"segment": [0.2, 6.7581903276131055], "label": "Fun sliding down"}]}, "Dv2T0R7HVBQ": {"duration": 17.183, "subset": "training", "resolution": "1276x720", "url": "https://www.youtube.com/watch?v=Dv2T0R7HVBQ", "annotations": [{"segment": [0, 17.161], "label": "Swimming"}]}, "AimG8xzchfI": {"duration": 227.208, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=AimG8xzchfI", "annotations": [{"segment": [85.09933576031847, 206.67825745766635], "label": "Curling"}]}, "VthI1KPjEq8": {"duration": 16.95, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=VthI1KPjEq8", "annotations": [{"segment": [1.971624, 13.589153999999999], "label": "Polishing forniture"}]}, "eC4l8AuAmKw": {"duration": 165.0, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eC4l8AuAmKw", "annotations": [{"segment": [9.9396, 159.33793529485177], "label": "Cumbia"}]}, "EDNd6qqEu1M": {"duration": 106.78999999999999, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EDNd6qqEu1M", "annotations": []}, "P_b6_k2ocYw": {"duration": 87.888, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=P_b6_k2ocYw", "annotations": [{"segment": [0.2, 85.0127013504673], "label": "Knitting"}]}, "lO2Y1vkpyNc": {"duration": 138.39, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=lO2Y1vkpyNc", "annotations": [{"segment": [12.306213566302652, 132.77756742589705], "label": "Cricket"}]}, "smk2WJV1Zmo": {"duration": 126.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=smk2WJV1Zmo", "annotations": [{"segment": [18.474592823712946, 96.89334321372854], "label": "Walking the dog"}]}, "bDf_xpUpdmU": {"duration": 143.569, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bDf_xpUpdmU", "annotations": [{"segment": [0.2, 57.11393443492842], "label": "Longboarding"}, {"segment": [62.265387344745484, 143.36875283446713], "label": "Longboarding"}]}, "awv0jtZpeoo": {"duration": 28.282, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=awv0jtZpeoo", "annotations": [{"segment": [0.2, 26.610499041139047], "label": "Using the rowing machine"}]}, "iq0h4m3I8hY": {"duration": 217.292, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=iq0h4m3I8hY", "annotations": [{"segment": [11.214904312413994, 43.75790621208182], "label": "Rafting"}, {"segment": [46.469823037054134, 136.98004707050526], "label": "Rafting"}, {"segment": [142.74287032357145, 217.09233560090703], "label": "Rafting"}]}, "H9ejAfleKUg": {"duration": 164.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H9ejAfleKUg", "annotations": [{"segment": [0.01, 146.9287717379095], "label": "Rock climbing"}]}, "wQHMoyzJx_w": {"duration": 88.445, "subset": "training", "resolution": "640x320", "url": "https://www.youtube.com/watch?v=wQHMoyzJx_w", "annotations": [{"segment": [35.59868946645866, 79.33816450858033], "label": "Peeling potatoes"}]}, "vdeBWB2GdBY": {"duration": 238.191, "subset": "testing", "resolution": "576x360", "url": "https://www.youtube.com/watch?v=vdeBWB2GdBY", "annotations": []}, "oyljZJZJ7IA": {"duration": 177.49, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oyljZJZJ7IA", "annotations": [{"segment": [155.8950794321373, 164.47900032449297], "label": "Walking the dog"}]}, "pZqX9rz3W40": {"duration": 73.72, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=pZqX9rz3W40", "annotations": []}, "XdX5ONI3yaQ": {"duration": 128.89, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XdX5ONI3yaQ", "annotations": []}, "OkMH4AlF8hQ": {"duration": 50.248, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OkMH4AlF8hQ", "annotations": [{"segment": [0.19999999999999998, 46.46030109204368], "label": "Having an ice cream"}]}, "2UJ4wqJt_Y8": {"duration": 3.066, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2UJ4wqJt_Y8", "annotations": [{"segment": [0.19999999999999998, 2.865034013605442], "label": "Rollerblading"}]}, "6Kbv1OpIpaA": {"duration": 195.47, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6Kbv1OpIpaA", "annotations": [{"segment": [78.0100815, 89.19451112738388], "label": "Playing guitarra"}, {"segment": [109.32045209971665, 130.6661470703726], "label": "Playing guitarra"}, {"segment": [145.6081335498318, 147.7427030468974], "label": "Playing guitarra"}]}, "PhioVMXx0yY": {"duration": 63.879, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PhioVMXx0yY", "annotations": [{"segment": [0.6471708268330734, 2.3397714508580343], "label": "Elliptical trainer"}, {"segment": [7.5194937597503895, 9.51141263650546], "label": "Elliptical trainer"}, {"segment": [15.287977379095166, 17.877471918876754], "label": "Elliptical trainer"}, {"segment": [27.93666224648986, 30.72534867394696], "label": "Elliptical trainer"}, {"segment": [56.321506240249605, 57.11827379095164], "label": "Elliptical trainer"}]}, "nM_6V_Z6IwI": {"duration": 97.71000000000001, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nM_6V_Z6IwI", "annotations": [{"segment": [0.01, 35.82176856474259], "label": "Washing face"}]}, "orrSdYqqr7M": {"duration": 66.084, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=orrSdYqqr7M", "annotations": []}, "-DzTAnE1t3w": {"duration": 149.839, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-DzTAnE1t3w", "annotations": [{"segment": [47.817779972506116, 68.32443055809027], "label": "Decorating the Christmas tree"}, {"segment": [76.71351488855652, 85.80168957989495], "label": "Decorating the Christmas tree"}]}, "JKmWaMLSm6Q": {"duration": 178.632, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JKmWaMLSm6Q", "annotations": []}, "VrUFvuLt9ME": {"duration": 222.494, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VrUFvuLt9ME", "annotations": []}, "bM7JNvFMfTw": {"duration": 68.87, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=bM7JNvFMfTw", "annotations": []}, "LkUnT9fMIXc": {"duration": 21.734, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LkUnT9fMIXc", "annotations": [{"segment": [0.01, 19.665598533541342], "label": "Curling"}]}, "5GFpN0YZEog": {"duration": 212.02, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5GFpN0YZEog", "annotations": [{"segment": [0, 67.5769856387742], "label": "Playing saxophone"}, {"segment": [74.16986277718789, 211.30170725619257], "label": "Playing saxophone"}]}, "yqzV0O139Yk": {"duration": 90.976, "subset": "training", "resolution": "600x360", "url": "https://www.youtube.com/watch?v=yqzV0O139Yk", "annotations": [{"segment": [28.568676287051485, 80.4778744149766], "label": "Kneeling"}]}, "iHO42zwYsu0": {"duration": 142.71, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iHO42zwYsu0", "annotations": [{"segment": [5.788542692667708, 13.358175444617785], "label": "Mowing the lawn"}, {"segment": [20.037263166926678, 32.504893581903275], "label": "Mowing the lawn"}, {"segment": [41.63298013572543, 56.549609382215294], "label": "Mowing the lawn"}, {"segment": [60.334425758190335, 91.72613805304213], "label": "Mowing the lawn"}, {"segment": [94.84304565678627, 105.0843134976599], "label": "Mowing the lawn"}]}, "x0TiekqVMj0": {"duration": 220.99, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=x0TiekqVMj0", "annotations": [{"segment": [10.687225352959697, 202.02303284555381], "label": "Belly dance"}]}, "Vpue9WHrcAk": {"duration": 90.26, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Vpue9WHrcAk", "annotations": [{"segment": [5.3505875101404055, 90.255963], "label": "Smoking a cigarette"}]}, "ssies4ErWus": {"duration": 44.26, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ssies4ErWus", "annotations": [{"segment": [18.99703588143526, 32.92589703588144], "label": "Hand washing clothes"}]}, "G-giwt-TN8A": {"duration": 93.02, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=G-giwt-TN8A", "annotations": [{"segment": [24.436705928237128, 41.83970124804992], "label": "Blowing leaves"}, {"segment": [61.43128471138845, 73.32589001560062], "label": "Blowing leaves"}]}, "RnwidjJiDEE": {"duration": 149.212, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RnwidjJiDEE", "annotations": [{"segment": [7.678085803432137, 90.9736833073323], "label": "Playing drums"}, {"segment": [98.66499843993759, 106.80951482059282], "label": "Playing drums"}, {"segment": [108.9038190327613, 110.53272230889236], "label": "Playing drums"}, {"segment": [116.11753354134166, 122.40044617784712], "label": "Playing drums"}, {"segment": [127.98525741029641, 135.89707332293293], "label": "Playing drums"}]}, "wsc5GIgVwN8": {"duration": 172.734, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wsc5GIgVwN8", "annotations": [{"segment": [23.444293461534382, 148.7499984898596], "label": "Longboarding"}]}, "B0enQXEBfKk": {"duration": 99.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B0enQXEBfKk", "annotations": [{"segment": [5.505176788611545, 29.541864175507023], "label": "Tango"}, {"segment": [31.0372235, 93.58800540639625], "label": "Tango"}]}, "rmGvgGOkqyk": {"duration": 49.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rmGvgGOkqyk", "annotations": [{"segment": [3.6888112324492983, 8.760926677067083], "label": "Javelin throw"}, {"segment": [25.283726989079568, 33.19930109204368], "label": "Javelin throw"}]}, "YnsusYE6i_0": {"duration": 224.19, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=YnsusYE6i_0", "annotations": [{"segment": [36.70921216848674, 195.10065210608425], "label": "Ballet"}]}, "mhsotF4wdG8": {"duration": 194.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mhsotF4wdG8", "annotations": [{"segment": [29.88821918876755, 32.315688767550704], "label": "Rock-paper-scissors"}, {"segment": [75.09984009360375, 80.25821294851794], "label": "Rock-paper-scissors"}, {"segment": [131.2485686427457, 133.9797527301092], "label": "Rock-paper-scissors"}]}, "C2KzNljiTIU": {"duration": 52.34, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=C2KzNljiTIU", "annotations": [{"segment": [1.3943923599999999, 52.301], "label": "Brushing hair"}]}, "Ad9jrt2bP1o": {"duration": 40.08, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ad9jrt2bP1o", "annotations": [{"segment": [33.63770865392439, 37.63921884555383], "label": "Triple jump"}]}, "Nt6cha3hK_s": {"duration": 187.06, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Nt6cha3hK_s", "annotations": [{"segment": [35.01902396255851, 169.25861581903277], "label": "Preparing salad"}]}, "IdG3UPXRtio": {"duration": 109.993, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IdG3UPXRtio", "annotations": []}, "QeVVKZhoFCg": {"duration": 206.71, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QeVVKZhoFCg", "annotations": [{"segment": [16.364755680000002, 200.85803432137286], "label": "Putting on makeup"}]}, "pOVICBn8QMw": {"duration": 50.06, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pOVICBn8QMw", "annotations": [{"segment": [6.250358814352575, 45.862007800312014], "label": "Using the pommel horse"}]}, "rjgu0ucCW7U": {"duration": 123.86, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rjgu0ucCW7U", "annotations": [{"segment": [13.139089104524182, 118.83146763650547], "label": "Mowing the lawn"}]}, "2ENwSfYEmNo": {"duration": 7.85, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2ENwSfYEmNo", "annotations": []}, "tAEGMVLn0wk": {"duration": 210.907, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tAEGMVLn0wk", "annotations": [{"segment": [116.43507332293291, 172.35022152886114], "label": "Using the monkey bar"}]}, "k3rWocPJ428": {"duration": 70.207, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k3rWocPJ428", "annotations": [{"segment": [23.486278277997307, 48.28341911386803], "label": "Table soccer"}]}, "voVkip40EyE": {"duration": 235.78, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=voVkip40EyE", "annotations": []}, "-6prmNqkAN8": {"duration": 47.137, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-6prmNqkAN8", "annotations": [{"segment": [6.397622762053341, 31.69396893291731], "label": "Curling"}]}, "WWhL75t7p8A": {"duration": 100.659, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WWhL75t7p8A", "annotations": [{"segment": [74.74796716380655, 85.89734882059282], "label": "Tug of war"}]}, "T5pq-K73yao": {"duration": 26.96, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=T5pq-K73yao", "annotations": []}, "7Cjzxlb94JM": {"duration": 147.4, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7Cjzxlb94JM", "annotations": []}, "nIfYhQHFWZI": {"duration": 23.64, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nIfYhQHFWZI", "annotations": [{"segment": [10.380668656786272, 19.378477368174725], "label": "Tumbling"}]}, "1scjpxusQx0": {"duration": 210.861, "subset": "validation", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=1scjpxusQx0", "annotations": [{"segment": [35.268186482378994, 184.3534419340924], "label": "Rope skipping"}]}, "nnUYbsoLNqo": {"duration": 195.025, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=nnUYbsoLNqo", "annotations": [{"segment": [11.662415416693761, 177.20184376565095], "label": "Running a marathon"}]}, "RiF_iAc0keQ": {"duration": 11.889, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RiF_iAc0keQ", "annotations": [{"segment": [6.751102196468812, 9.774259134165366], "label": "Doing a powerbomb"}]}, "KgfKmcsEMK0": {"duration": 21.2, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KgfKmcsEMK0", "annotations": [{"segment": [0.01, 21.076736805259383], "label": "Shaving legs"}]}, "GVpKrpEHK80": {"duration": 192.82, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GVpKrpEHK80", "annotations": []}, "WGHXYbCWvyU": {"duration": 200.3, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WGHXYbCWvyU", "annotations": [{"segment": [50.45073556942278, 179.48509282371293], "label": "Tai chi"}]}, "gWTndmDHZQc": {"duration": 176.15, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gWTndmDHZQc", "annotations": []}, "evGKkyIAfw8": {"duration": 133.75, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=evGKkyIAfw8", "annotations": []}, "11p9goB2rm8": {"duration": 33.9, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=11p9goB2rm8", "annotations": []}, "SzKFCQ7IArg": {"duration": 135.628, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SzKFCQ7IArg", "annotations": []}, "Q8Y9pmhvJqE": {"duration": 53.522, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q8Y9pmhvJqE", "annotations": [{"segment": [5.427347386895475, 12.77514077223089], "label": "Playing ice hockey"}, {"segment": [19.204459984399378, 47.00917813572543], "label": "Playing ice hockey"}]}, "srARxP_ocyg": {"duration": 30.86, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=srARxP_ocyg", "annotations": [{"segment": [0.01, 30.859318], "label": "Doing kickboxing"}]}, "K8f4LNNiQy8": {"duration": 13.61, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K8f4LNNiQy8", "annotations": [{"segment": [0, 13.42], "label": "Pole vault"}]}, "I2w4N_GnyT8": {"duration": 53.5, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I2w4N_GnyT8", "annotations": [{"segment": [1.919604650546022, 34.71980585335414], "label": "Cricket"}]}, "gN_Kq57w72s": {"duration": 237.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gN_Kq57w72s", "annotations": [{"segment": [48.17929797191888, 70.04528705148206], "label": "Making a cake"}, {"segment": [94.50554602184087, 180.13161622464898], "label": "Making a cake"}, {"segment": [184.57931045241807, 214.6012464898596], "label": "Making a cake"}]}, "zogfsf4O3Vg": {"duration": 100.8, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zogfsf4O3Vg", "annotations": [{"segment": [13.038091949999997, 90.29777691107644], "label": "Drinking beer"}]}, "YXl4cEB7E3Y": {"duration": 19.2, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YXl4cEB7E3Y", "annotations": [{"segment": [0.010000000000000002, 19.202902494331067], "label": "Playing bagpipes"}]}, "cxIfpBvuk0E": {"duration": 191.8, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cxIfpBvuk0E", "annotations": [{"segment": [8.827955538221529, 12.418988299531982], "label": "Smoking a cigarette"}, {"segment": [24.089844773790947, 42.94276677067083], "label": "Smoking a cigarette"}, {"segment": [45.03753588143526, 64.48896333853354], "label": "Smoking a cigarette"}, {"segment": [72.26953432137286, 74.06505070202807], "label": "Smoking a cigarette"}, {"segment": [80.34935803432138, 83.64113806552261], "label": "Smoking a cigarette"}, {"segment": [135.11260764430577, 144.0901895475819], "label": "Smoking a cigarette"}, {"segment": [158.75357332293294, 164.43937519500778], "label": "Smoking a cigarette"}]}, "d40Tv56RrnA": {"duration": 19.11, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d40Tv56RrnA", "annotations": [{"segment": [0.01, 8.347146645865834], "label": "Hand washing clothes"}, {"segment": [9.683878315132606, 17.793383775351014], "label": "Hand washing clothes"}]}, "W97fdJ2Rn9g": {"duration": 123.97, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W97fdJ2Rn9g", "annotations": []}, "DF9CGVGiKXY": {"duration": 90.187, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DF9CGVGiKXY", "annotations": [{"segment": [10.530490654205607, 77.9256308411215], "label": "Rope skipping"}]}, "485Wn1DXt5U": {"duration": 119.212, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=485Wn1DXt5U", "annotations": [{"segment": [24.93274043797815, 32.54591977409474], "label": "Hula hoop"}, {"segment": [44.42990703047185, 47.029529242804344], "label": "Hula hoop"}, {"segment": [74.69693634374252, 79.33911882037803], "label": "Hula hoop"}, {"segment": [85.83817428766774, 89.73760756804158], "label": "Hula hoop"}, {"segment": [94.56547734374251, 112.94851995121915], "label": "Hula hoop"}]}, "dZsk5Dp_5FE": {"duration": 38.22, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=dZsk5Dp_5FE", "annotations": []}, "rIr091-LMGY": {"duration": 103.45, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rIr091-LMGY", "annotations": [{"segment": [14.846973089807946, 92.14806124217758], "label": "Putting on makeup"}]}, "g7IZWqeyM20": {"duration": 117.89, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g7IZWqeyM20", "annotations": [{"segment": [25.829204368174725, 108.92386895475819], "label": "Playing racquetball"}]}, "iDgmPtf4rMI": {"duration": 215.9, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iDgmPtf4rMI", "annotations": []}, "iSIzuN9cEAs": {"duration": 101.007, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iSIzuN9cEAs", "annotations": [{"segment": [0.20000000000000004, 100.721], "label": "Disc dog"}]}, "FMXPFrZYhes": {"duration": 14.4, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=FMXPFrZYhes", "annotations": [{"segment": [4.21196255850234, 13.263201248049922], "label": "Shot put"}]}, "pud1-V-4LL0": {"duration": 88.84, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pud1-V-4LL0", "annotations": []}, "XOEQfCu6KsE": {"duration": 143.128, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=XOEQfCu6KsE", "annotations": [{"segment": [23.66082059282371, 56.69668330733229], "label": "Disc dog"}, {"segment": [63.16992667706708, 133.48274258970358], "label": "Disc dog"}]}, "DinaQYSgbtg": {"duration": 56.77, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DinaQYSgbtg", "annotations": [{"segment": [4.428454602184088, 54.91283706708268], "label": "Washing dishes"}]}, "Y4pyUctR49I": {"duration": 42.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y4pyUctR49I", "annotations": [{"segment": [1.4432596460320997, 17.84393664898596], "label": "Doing nails"}, {"segment": [37.45914642121685, 41.851335999999996], "label": "Doing nails"}]}, "a370DDsJtBA": {"duration": 96.479, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a370DDsJtBA", "annotations": [{"segment": [1.0535921029641186, 95.57585505460217], "label": "Playing rubik cube"}]}, "fULvroAfMAo": {"duration": 118.25999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fULvroAfMAo", "annotations": [{"segment": [7.472155226209049, 18.357517160686427], "label": "Shot put"}, {"segment": [31.08785569422777, 35.70029719188768], "label": "Shot put"}, {"segment": [47.692645085803434, 64.66642979719188], "label": "Shot put"}, {"segment": [81.27121918876755, 86.25265600624026], "label": "Shot put"}]}, "kXvFkU7gQSM": {"duration": 218.361, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kXvFkU7gQSM", "annotations": [{"segment": [94.69064223861487, 173.9396347697675], "label": "Doing fencing"}]}, "pJ3sECWr5Xg": {"duration": 109.03999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pJ3sECWr5Xg", "annotations": []}, "6MsWbxRblnY": {"duration": 71.727, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=6MsWbxRblnY", "annotations": []}, "6RdkwoTi-98": {"duration": 13.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6RdkwoTi-98", "annotations": [{"segment": [0.2646507550702028, 11.88892622776911], "label": "Hammer throw"}]}, "jw8yJ6tJXrA": {"duration": 14.93, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jw8yJ6tJXrA", "annotations": [{"segment": [0.9400435200000002, 5.76111744], "label": "Long jump"}, {"segment": [10.295006767550703, 14.730089000000001], "label": "Long jump"}]}, "Zn84iOuIkDs": {"duration": 81.363, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Zn84iOuIkDs", "annotations": [{"segment": [0.01, 81.321], "label": "Clipping cat claws"}]}, "UzmnkYD5YTA": {"duration": 101.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UzmnkYD5YTA", "annotations": [{"segment": [28.580437860842434, 92.68904450599793], "label": "Playing flauta"}]}, "nDRaXY5ddPs": {"duration": 87.54, "subset": "training", "resolution": "208x160", "url": "https://www.youtube.com/watch?v=nDRaXY5ddPs", "annotations": [{"segment": [0, 4.779833098085828], "label": "Mooping floor"}, {"segment": [7.511166296992015, 70.60496317160687], "label": "Mooping floor"}, {"segment": [76.20419622776912, 82.21312926365056], "label": "Mooping floor"}]}, "fm6UhMKF8DU": {"duration": 62.28, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fm6UhMKF8DU", "annotations": [{"segment": [24.6771964399376, 52.851948280811236], "label": "Bungee jumping"}]}, "pxFR0rLMvS0": {"duration": 82.478, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=pxFR0rLMvS0", "annotations": []}, "30Yk_1Yc7Vk": {"duration": 57.47, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=30Yk_1Yc7Vk", "annotations": [{"segment": [0.01, 57.469386], "label": "Getting a haircut"}]}, "_HeqBIO9TgQ": {"duration": 65.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_HeqBIO9TgQ", "annotations": [{"segment": [7.382332293291731, 57.317627009999995], "label": "Washing dishes"}]}, "Lt6waJFJWI0": {"duration": 63.693, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lt6waJFJWI0", "annotations": [{"segment": [0.01, 53.17703759355634], "label": "Hurling"}]}, "KTk1Rh8Lgdw": {"duration": 176.17000000000002, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KTk1Rh8Lgdw", "annotations": [{"segment": [0, 176.141], "label": "Removing curlers"}]}, "G6qOkn-K6aw": {"duration": 208.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G6qOkn-K6aw", "annotations": [{"segment": [8.92872453978159, 190.75002425897034], "label": "Cumbia"}]}, "Nns49SsSe_g": {"duration": 107.695, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Nns49SsSe_g", "annotations": []}, "StM48dIvK-M": {"duration": 79.157, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=StM48dIvK-M", "annotations": [{"segment": [0, 74.55395319812791], "label": "Tug of war"}]}, "Ez7s36AwgLk": {"duration": 233.94, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ez7s36AwgLk", "annotations": [{"segment": [0.01, 202.18909751950076], "label": "Skateboarding"}]}, "-o3M0VV74gw": {"duration": 14.47, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-o3M0VV74gw", "annotations": []}, "oYH-XLQzMxU": {"duration": 33.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oYH-XLQzMxU", "annotations": [{"segment": [0.16974046910430837, 30.92576395014781], "label": "Tennis serve with ball bouncing"}]}, "BK2ECcEE0Rs": {"duration": 135.187, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BK2ECcEE0Rs", "annotations": []}, "pT2fO9vlVn0": {"duration": 82.5, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=pT2fO9vlVn0", "annotations": [{"segment": [1.4154305772230888, 80.55086739469579], "label": "Doing step aerobics"}]}, "xXTfM9xXFQM": {"duration": 152.184, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xXTfM9xXFQM", "annotations": [{"segment": [48.19543198870813, 144.34888004499774], "label": "Powerbocking"}]}, "mlxcljsNEBg": {"duration": 190.45, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mlxcljsNEBg", "annotations": []}, "8bppcsg07Rc": {"duration": 177.099, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8bppcsg07Rc", "annotations": [{"segment": [34.70360832788922, 121.73806439143067], "label": "Using the monkey bar"}, {"segment": [126.9711487880092, 134.1322116464851], "label": "Using the monkey bar"}, {"segment": [151.75944329811807, 161.67476110216163], "label": "Using the monkey bar"}]}, "Yh9eGll8fYQ": {"duration": 56.01, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Yh9eGll8fYQ", "annotations": [{"segment": [6.635001560062403, 53.254617784711385], "label": "Bungee jumping"}]}, "roavmdw1ORo": {"duration": 151.84, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=roavmdw1ORo", "annotations": [{"segment": [15.637778471138846, 26.773772230889236], "label": "Grooming horse"}, {"segment": [29.143132605304213, 47.150271450858035], "label": "Grooming horse"}, {"segment": [48.57188767550702, 98.80232761310452], "label": "Grooming horse"}, {"segment": [99.03926365054602, 115.86172230889237], "label": "Grooming horse"}, {"segment": [117.04640249609984, 145.95259906396257], "label": "Grooming horse"}]}, "MTJ1EtiizVQ": {"duration": 103.35, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=MTJ1EtiizVQ", "annotations": [{"segment": [7.739293329173168, 96.90240189235571], "label": "Tango"}]}, "RnN4BXyOtxU": {"duration": 121.534, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RnN4BXyOtxU", "annotations": [{"segment": [20.287140345477766, 79.06296710452418], "label": "Making a lemonade"}, {"segment": [83.2341548174727, 91.19733136037442], "label": "Making a lemonade"}]}, "qgUHRwC3194": {"duration": 122.788, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=qgUHRwC3194", "annotations": []}, "n_CMnVD7kCg": {"duration": 145.85, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n_CMnVD7kCg", "annotations": [{"segment": [8.873511626179333, 125.36679246217467], "label": "Tumbling"}]}, "dPZfExDmX9Y": {"duration": 180.07, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dPZfExDmX9Y", "annotations": [{"segment": [7.303961656786272, 178.94706059126366], "label": "Making a sandwich"}]}, "SHS8pNOsZDU": {"duration": 26.36, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SHS8pNOsZDU", "annotations": [{"segment": [0.20557110764430578, 26.31310177847114], "label": "Playing polo"}]}, "wvlyV067oOU": {"duration": 143.27, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=wvlyV067oOU", "annotations": [{"segment": [26.261913605795932, 128.69674609999998], "label": "Dodgeball"}]}, "J4hnBPgwDlw": {"duration": 191.054, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=J4hnBPgwDlw", "annotations": [{"segment": [20.054230049026344, 178.3457752203659], "label": "Gargling mouthwash"}]}, "9XanCE5nX2Q": {"duration": 168.82999999999998, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9XanCE5nX2Q", "annotations": [{"segment": [5.133225418876756, 66.73193044539782], "label": "Cumbia"}]}, "YYpeT1sH_m0": {"duration": 142.757, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YYpeT1sH_m0", "annotations": [{"segment": [8.23703120124805, 22.039624024961], "label": "Beach soccer"}, {"segment": [30.276655226209048, 34.06123712948518], "label": "Beach soccer"}, {"segment": [37.845819032761305, 42.74351326053042], "label": "Beach soccer"}, {"segment": [51.64841185647426, 59.885443057722306], "label": "Beach soccer"}, {"segment": [73.68803588143525, 106.42845241809673], "label": "Beach soccer"}, {"segment": [137.3773120124805, 142.721], "label": "Beach soccer"}]}, "t6trrsdhMEc": {"duration": 26.123, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=t6trrsdhMEc", "annotations": [{"segment": [10.915795403295665, 20.721886063858957], "label": "Doing crunches"}]}, "o86qcfpzO0g": {"duration": 139.34, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=o86qcfpzO0g", "annotations": [{"segment": [9.890958076418295, 11.195260240341588], "label": "Smoking a cigarette"}, {"segment": [15.97770150806032, 19.23845691786855], "label": "Smoking a cigarette"}, {"segment": [24.2382818795745, 26.62950251343387], "label": "Smoking a cigarette"}, {"segment": [42.49851217450058, 43.80281433842388], "label": "Smoking a cigarette"}, {"segment": [51.193859933989195, 52.932929485886916], "label": "Smoking a cigarette"}, {"segment": [62.7151957153116, 64.45426526720932], "label": "Smoking a cigarette"}, {"segment": [103.14856279693365, 107.27885298269074], "label": "Smoking a cigarette"}]}, "HI_lWd3JhII": {"duration": 71.588, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HI_lWd3JhII", "annotations": [{"segment": [24.330636505460216, 69.53204836193447], "label": "Waxing skis"}]}, "Q0U51Hqn21w": {"duration": 6.34, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Q0U51Hqn21w", "annotations": [{"segment": [2.195426574102964, 5.814913628705148], "label": "Discus throw"}]}, "HQk5hngL4Us": {"duration": 153.531, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HQk5hngL4Us", "annotations": [{"segment": [0.010000000000000002, 151.32604056162245], "label": "Decorating the Christmas tree"}]}, "oNo8ZpqE_6k": {"duration": 90.698, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oNo8ZpqE_6k", "annotations": [{"segment": [16.13022509471807, 48.10768789391576], "label": "Hurling"}]}, "F7V-FqgG3T8": {"duration": 51.18, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F7V-FqgG3T8", "annotations": [{"segment": [3.3341163999999996, 49.50016152574103], "label": "Playing badminton"}]}, "qGf6earGAOc": {"duration": 87.4, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qGf6earGAOc", "annotations": [{"segment": [34.08851896286763, 67.44190193015736], "label": "Wakeboarding"}]}, "_4wEUsTft44": {"duration": 113.894, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_4wEUsTft44", "annotations": [{"segment": [0.01, 113.841], "label": "Installing carpet"}]}, "Exu0qsz42fQ": {"duration": 197.393, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Exu0qsz42fQ", "annotations": [{"segment": [3.0786427457098284, 184.1214945397816], "label": "Playing drums"}]}, "fAjpgUcycuA": {"duration": 41.146, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fAjpgUcycuA", "annotations": []}, "PAGuZzrzSO4": {"duration": 26.053, "subset": "validation", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=PAGuZzrzSO4", "annotations": [{"segment": [9.278559768995768, 14.667417985169953], "label": "Calf roping"}]}, "7X_wgaRaJYQ": {"duration": 144.265, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7X_wgaRaJYQ", "annotations": [{"segment": [3.685282277463218, 135.36669774475294], "label": "Swimming"}]}, "O_kbj_PTIUQ": {"duration": 35.643, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=O_kbj_PTIUQ", "annotations": []}, "PAiJNr97C6g": {"duration": 122.37, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PAiJNr97C6g", "annotations": [{"segment": [11.245764016925564, 82.91368385360374], "label": "Snowboarding"}]}, "H5TETCI731k": {"duration": 121.21, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H5TETCI731k", "annotations": [{"segment": [46.13849491122502, 56.9167498699948], "label": "Playing field hockey"}]}, "9o2iojC7gSw": {"duration": 131.24, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9o2iojC7gSw", "annotations": [{"segment": [15.355598713744468, 18.631459748049924], "label": "Pole vault"}, {"segment": [90.90514426521061, 102.98488190405617], "label": "Pole vault"}, {"segment": [108.92238006552262, 122.8447895475819], "label": "Pole vault"}]}, "q8mReXud6fE": {"duration": 29.049, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=q8mReXud6fE", "annotations": [{"segment": [0.01, 3.715876436817472], "label": "Raking leaves"}, {"segment": [10.286633550702028, 18.3528043525741], "label": "Raking leaves"}]}, "XG6ar8yc2pM": {"duration": 144.03, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XG6ar8yc2pM", "annotations": [{"segment": [78.42054605721646, 133.6969177067083], "label": "Horseback riding"}, {"segment": [5.168116215805095, 77.29098846], "label": "Horseback riding"}]}, "HPNZi_WsUeY": {"duration": 118.1, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HPNZi_WsUeY", "annotations": [{"segment": [13.904485179407175, 99.17304992199688], "label": "Preparing pasta"}]}, "lkJUUh-xJOY": {"duration": 204.754, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lkJUUh-xJOY", "annotations": []}, "uY5UbRJIrxI": {"duration": 153.345, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=uY5UbRJIrxI", "annotations": []}, "o8RIoxL9FDE": {"duration": 23.406, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=o8RIoxL9FDE", "annotations": [{"segment": [4.295247708959529, 9.500506198850665], "label": "Gargling mouthwash"}]}, "Wu3wa5Rc_Qw": {"duration": 17.624, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Wu3wa5Rc_Qw", "annotations": [{"segment": [0.01, 17.348999475819035], "label": "Canoeing"}]}, "JGxSaJ7XoPQ": {"duration": 79.25, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JGxSaJ7XoPQ", "annotations": [{"segment": [0.9887176287051482, 71.18766926677068], "label": "Grooming horse"}]}, "IwSUfrzxFnU": {"duration": 180.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IwSUfrzxFnU", "annotations": [{"segment": [26.159397781591263, 165.676185950078], "label": "Plataform diving"}]}, "B5VIJnAFlK8": {"duration": 30.4, "subset": "validation", "resolution": "810x1440", "url": "https://www.youtube.com/watch?v=B5VIJnAFlK8", "annotations": [{"segment": [0.9009414852784587, 30.252665088923557], "label": "Cleaning windows"}]}, "D3NZ45e9llI": {"duration": 224.42000000000002, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D3NZ45e9llI", "annotations": [{"segment": [0.01, 224.42056689342402], "label": "Kayaking"}]}, "0jrThYemNLs": {"duration": 63.995, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0jrThYemNLs", "annotations": [{"segment": [4.190892355694228, 58.57270982839313], "label": "Doing fencing"}]}, "HNBiW2Nq318": {"duration": 40.03, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=HNBiW2Nq318", "annotations": [{"segment": [0.9589768510140406, 9.002008504680187], "label": "Using the balance beam"}, {"segment": [10.676512800000001, 27.47999736], "label": "Using the balance beam"}]}, "K5jY2chByPw": {"duration": 71.91, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K5jY2chByPw", "annotations": []}, "0PnAEoMx-v0": {"duration": 95.75999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0PnAEoMx-v0", "annotations": [{"segment": [5.897988778471138, 66.52035445085804], "label": "Playing guitarra"}]}, "4Dj7wOTfyH4": {"duration": 192.169, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4Dj7wOTfyH4", "annotations": [{"segment": [68.93577223088924, 95.01147737909517], "label": "Installing carpet"}, {"segment": [107.00030733229329, 125.88271450858035], "label": "Installing carpet"}, {"segment": [148.37721528861152, 155.2715101404056], "label": "Installing carpet"}]}, "yRD0WDE5IG0": {"duration": 218.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yRD0WDE5IG0", "annotations": [{"segment": [0.01, 215.00653900156007], "label": "Doing step aerobics"}]}, "ZMG8WE3Y22k": {"duration": 135.582, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZMG8WE3Y22k", "annotations": [{"segment": [70.40326521060842, 79.29465678627145], "label": "Rafting"}, {"segment": [84.1580624024961, 120.31642589703588], "label": "Rafting"}]}, "9xKOEE8Ni-Q": {"duration": 79.575, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9xKOEE8Ni-Q", "annotations": [{"segment": [0.01, 79.57480725623583], "label": "Camel ride"}]}, "47f7KnSW2HA": {"duration": 15.581, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=47f7KnSW2HA", "annotations": []}, "5j1ytnL-KEI": {"duration": 30.54, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5j1ytnL-KEI", "annotations": []}, "aH39BSM0-Ag": {"duration": 138.37, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=aH39BSM0-Ag", "annotations": []}, "j_YzK7aHTIA": {"duration": 113.871, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j_YzK7aHTIA", "annotations": [{"segment": [10.83634138377535, 84.91428166302651], "label": "Making a cake"}]}, "URzytLugoVw": {"duration": 54.08, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=URzytLugoVw", "annotations": [{"segment": [0.01, 54.079023], "label": "Playing kickball"}]}, "8cH0mAgsuAc": {"duration": 176.24, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8cH0mAgsuAc", "annotations": [{"segment": [42.05136973478939, 56.618184087363495], "label": "Kneeling"}, {"segment": [75.03283619344774, 95.64625273010921], "label": "Kneeling"}]}, "ouc67LyENuw": {"duration": 131.727, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ouc67LyENuw", "annotations": [{"segment": [0.01, 48.05615288611544], "label": "Bullfighting"}, {"segment": [56.27087987519501, 64.48560686427457], "label": "Bullfighting"}]}, "jBvGvVw3R-Q": {"duration": 218.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jBvGvVw3R-Q", "annotations": [{"segment": [19.776733229329174, 23.868471138845557], "label": "Snatch"}, {"segment": [26.93727457098284, 29.66509984399376], "label": "Snatch"}, {"segment": [30.006078003120127, 36.82564118564743], "label": "Snatch"}, {"segment": [38.18955382215289, 42.963248049921994], "label": "Snatch"}, {"segment": [42.963248049921994, 50.46476755070203], "label": "Snatch"}, {"segment": [54.55650546021841, 57.62530889235569], "label": "Snatch"}, {"segment": [58.648243369734786, 63.08095943837753], "label": "Snatch"}, {"segment": [64.10389391575663, 66.14976287051482], "label": "Snatch"}, {"segment": [67.85465366614665, 71.60541341653666], "label": "Snatch"}, {"segment": [74.67421684867395, 79.78888923556943], "label": "Snatch"}, {"segment": [81.83475819032762, 87.63138689547581], "label": "Snatch"}, {"segment": [91.72312480499221, 95.13290639625585], "label": "Snatch"}, {"segment": [100.58855694227769, 110.13594539781592], "label": "Snatch"}, {"segment": [117.97844305772232, 124.7980062402496], "label": "Snatch"}, {"segment": [127.86680967238689, 144.23376131045242], "label": "Snatch"}, {"segment": [146.96158658346334, 159.2368003120125], "label": "Snatch"}, {"segment": [164.69245085803433, 178.67255538221528], "label": "Snatch"}, {"segment": [180.3774461778471, 196.0624414976599], "label": "Snatch"}]}, "lzQwtmUrSK4": {"duration": 209.119, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lzQwtmUrSK4", "annotations": [{"segment": [41.75841234465705, 180.40939083277618], "label": "Futsal"}]}, "6Yn2U58qxPs": {"duration": 214.95, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6Yn2U58qxPs", "annotations": [{"segment": [43.593019693576856, 95.56931240514926], "label": "Rock climbing"}, {"segment": [155.25821629327757, 198.51590506613462], "label": "Rock climbing"}]}, "Gk5iK0bEWak": {"duration": 208.423, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gk5iK0bEWak", "annotations": []}, "G7kqlq8WhRo": {"duration": 171.108, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G7kqlq8WhRo", "annotations": [{"segment": [11.607306552262092, 154.38188533541341], "label": "Tug of war"}]}, "5E2OdhrgG8s": {"duration": 181.21, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5E2OdhrgG8s", "annotations": [{"segment": [6.076405333853353, 164.3455675179407], "label": "Playing harmonica"}]}, "ubR7XPBJQH8": {"duration": 209.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ubR7XPBJQH8", "annotations": [{"segment": [2.9364729854500307, 8.156869404027862], "label": "Tai chi"}, {"segment": [44.37336923556943, 182.38759854914196], "label": "Tai chi"}]}, "GeR07RwzkBw": {"duration": 220.52, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GeR07RwzkBw", "annotations": []}, "ldjIg23Rofw": {"duration": 206.124, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ldjIg23Rofw", "annotations": [{"segment": [25.72519251170047, 174.28817926677067], "label": "Beer pong"}]}, "IKrnz_OzqT8": {"duration": 89.351, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IKrnz_OzqT8", "annotations": [{"segment": [0.13939217704762613, 87.53828670826833], "label": "Playing congas"}]}, "hPOtGLv1GzM": {"duration": 191.123, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hPOtGLv1GzM", "annotations": [{"segment": [17.293535044803154, 55.458577902299766], "label": "BMX"}, {"segment": [57.8438930808933, 83.78419564809802], "label": "BMX"}, {"segment": [86.16951082669156, 179.79313000156006], "label": "BMX"}]}, "scwBQj4GE7Y": {"duration": 36.711, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=scwBQj4GE7Y", "annotations": [{"segment": [4.409819706864274, 23.480858179407175], "label": "Swimming"}]}, "8TDqnzrHE8c": {"duration": 87.89, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8TDqnzrHE8c", "annotations": []}, "n-i9nC6vxu0": {"duration": 94.808, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=n-i9nC6vxu0", "annotations": [{"segment": [22.333647697347892, 76.91057485179407], "label": "Slacklining"}]}, "UeN7XgwLkOU": {"duration": 192.493, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UeN7XgwLkOU", "annotations": [{"segment": [48.39320680050102, 174.92315151389667], "label": "Rollerblading"}]}, "GKM-K8jbfyE": {"duration": 119.77000000000001, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=GKM-K8jbfyE", "annotations": [{"segment": [0.01, 119.7685260770975], "label": "Cleaning windows"}]}, "dGw6pVNy8Ns": {"duration": 201.898, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dGw6pVNy8Ns", "annotations": []}, "nfTFGY2n_io": {"duration": 228.183, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nfTFGY2n_io", "annotations": [{"segment": [14.94184399375975, 187.1288081123245], "label": "Making a lemonade"}]}, "qiTQ02NDSlQ": {"duration": 138.81, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qiTQ02NDSlQ", "annotations": []}, "wqThk38AdEA": {"duration": 23.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wqThk38AdEA", "annotations": [{"segment": [0.36383945999999995, 22.87737839625585], "label": "Playing flauta"}]}, "p7jJNlAY_lw": {"duration": 180.7, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p7jJNlAY_lw", "annotations": []}, "FrkXeG1YoKg": {"duration": 80.11, "subset": "validation", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=FrkXeG1YoKg", "annotations": [{"segment": [3.3743194567862713, 72.48538092355695], "label": "Pole vault"}]}, "lRoy13g3Iow": {"duration": 170.76, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lRoy13g3Iow", "annotations": []}, "hIQ4Lmazo4g": {"duration": 67.083, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hIQ4Lmazo4g", "annotations": [{"segment": [0.01, 66.70628587372448], "label": "Building sandcastles"}]}, "zTrI4pPFIlw": {"duration": 150.09, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zTrI4pPFIlw", "annotations": [{"segment": [103.44822649921997, 144.87432625117006], "label": "Using the pommel horse"}, {"segment": [8.425647407176287, 98.80472097], "label": "Using the pommel horse"}]}, "EorPZy_QoHU": {"duration": 134.189, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=EorPZy_QoHU", "annotations": []}, "s60we-9PBhw": {"duration": 79.668, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s60we-9PBhw", "annotations": [{"segment": [29.30707644305772, 69.06283307332292], "label": "Clipping cat claws"}]}, "JKJ_EvP7JwQ": {"duration": 27.33, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JKJ_EvP7JwQ", "annotations": []}, "1kwIYXB4b8s": {"duration": 61.23, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=1kwIYXB4b8s", "annotations": []}, "QCcueK6xiZ4": {"duration": 73.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QCcueK6xiZ4", "annotations": [{"segment": [34.87779407176287, 56.43540561622465], "label": "Hand car wash"}, {"segment": [63.2760608424337, 69.31863962558504], "label": "Hand car wash"}]}, "k5wjc4OO1XM": {"duration": 65.411, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k5wjc4OO1XM", "annotations": [{"segment": [5.204247614802551, 61.022353048361936], "label": "Making an omelette"}]}, "UxhKb-zZoWE": {"duration": 126.828, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UxhKb-zZoWE", "annotations": [{"segment": [0.01, 126.82696145124717], "label": "Sumo"}]}, "5kCv4zwc7-I": {"duration": 166.46, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5kCv4zwc7-I", "annotations": [{"segment": [6.36554512, 90.47770374570983], "label": "Doing nails"}]}, "JJwNO3BLQvY": {"duration": 195.88, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=JJwNO3BLQvY", "annotations": []}, "REOP3JWB0yc": {"duration": 149.96, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=REOP3JWB0yc", "annotations": [{"segment": [33.94358744, 121.92609786], "label": "Playing lacrosse"}]}, "NGF0rQiDxNk": {"duration": 38.57, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=NGF0rQiDxNk", "annotations": [{"segment": [0, 38.568343], "label": "Using uneven bars"}]}, "Tg3FU9Ro6KQ": {"duration": 201.55, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=Tg3FU9Ro6KQ", "annotations": [{"segment": [18.236901666542852, 29.87078721244088], "label": "Doing nails"}, {"segment": [32.38622192506748, 72.94760666617141], "label": "Doing nails"}, {"segment": [84.26706272698908, 140.86434366302652], "label": "Doing nails"}, {"segment": [177.3381469329173, 193.68847253666146], "label": "Doing nails"}]}, "tTEZnwfHX8E": {"duration": 29.142, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tTEZnwfHX8E", "annotations": []}, "W3ozAI2ozCs": {"duration": 36.67, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W3ozAI2ozCs", "annotations": [{"segment": [0.01, 36.664307], "label": "Using parallel bars"}]}, "82cpSdoHdg4": {"duration": 65.04, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=82cpSdoHdg4", "annotations": [{"segment": [7.102553054503132, 58.240935046925685], "label": "Playing congas"}]}, "0U7_h4uasNY": {"duration": 115.311, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0U7_h4uasNY", "annotations": []}, "NURr5XJcwFs": {"duration": 45.396, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NURr5XJcwFs", "annotations": [{"segment": [32.11643843213728, 41.18127662246489], "label": "Hitting a pinata"}]}, "6LrY7uMj8Kg": {"duration": 155.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6LrY7uMj8Kg", "annotations": [{"segment": [4.0919967999999995, 144.58901555148205], "label": "Cheerleading"}]}, "hiEVVSgAhls": {"duration": 85.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hiEVVSgAhls", "annotations": [{"segment": [1.5313299531981281, 72.05743272], "label": "Ping-pong"}]}, "r7Z5Pm3pKMA": {"duration": 45.628, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r7Z5Pm3pKMA", "annotations": [{"segment": [22.366557469295188, 29.746386147366724], "label": "Hurling"}]}, "ExMpLopi9ug": {"duration": 41.96, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ExMpLopi9ug", "annotations": [{"segment": [5.172096723868955, 39.47815600624025], "label": "Using the pommel horse"}]}, "ioue7a-6jn0": {"duration": 111.154, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ioue7a-6jn0", "annotations": []}, "IsM_xfhJzps": {"duration": 134.607, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IsM_xfhJzps", "annotations": [{"segment": [58.37829864759216, 74.96781516974964], "label": "Using the rowing machine"}, {"segment": [101.84703098283931, 122.63642493603744], "label": "Using the rowing machine"}]}, "kFmOtf_Ew1w": {"duration": 228.717, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kFmOtf_Ew1w", "annotations": [{"segment": [9.260414330218069, 209.08994859813086], "label": "Swimming"}]}, "KsAnLnOjeSQ": {"duration": 14.58, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KsAnLnOjeSQ", "annotations": []}, "gWeunYH-qoA": {"duration": 142.339, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=gWeunYH-qoA", "annotations": []}, "B3ZVUyLeT84": {"duration": 59.49, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B3ZVUyLeT84", "annotations": [{"segment": [6.125286382883887, 55.31319050234009], "label": "Surfing"}]}, "0IN40z63WAU": {"duration": 232.014, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0IN40z63WAU", "annotations": [{"segment": [0.01, 108.92396411856475], "label": "Elliptical trainer"}, {"segment": [114.71394227769112, 223.29528393135726], "label": "Elliptical trainer"}]}, "5pl_qttD8Fc": {"duration": 75.78999999999999, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=5pl_qttD8Fc", "annotations": [{"segment": [5.320666051131841, 7.685406518301549], "label": "Smoking a cigarette"}, {"segment": [44.575357806148986, 52.14252730109205], "label": "Smoking a cigarette"}, {"segment": [60.77383000626148, 61.60148916977088], "label": "Smoking a cigarette"}]}, "a7FNzxXWvgU": {"duration": 168.57999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=a7FNzxXWvgU", "annotations": [{"segment": [48.37420280811232, 153.2908003120125], "label": "Making a sandwich"}]}, "X4IE65LtDzQ": {"duration": 155.57, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=X4IE65LtDzQ", "annotations": [{"segment": [20.86821528861154, 94.39227613104524], "label": "Hopscotch"}]}, "ZAX6OojMg38": {"duration": 82.06, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZAX6OojMg38", "annotations": []}, "8EyYHnijfnI": {"duration": 130.66, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=8EyYHnijfnI", "annotations": [{"segment": [10.290340093603746, 67.54946021840874], "label": "Getting a piercing"}, {"segment": [75.70022464898597, 83.85098907956319], "label": "Getting a piercing"}]}, "LNiep92AJLY": {"duration": 200.81, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LNiep92AJLY", "annotations": []}, "NyL7m4JV8vQ": {"duration": 98.47999999999999, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=NyL7m4JV8vQ", "annotations": [{"segment": [5.607408163026522, 61.854116999999995], "label": "High jump"}]}, "bKVrokdj7HM": {"duration": 123.577, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bKVrokdj7HM", "annotations": [{"segment": [13.30231715522621, 57.45058713416537], "label": "Playing drums"}, {"segment": [70.75290428939158, 117.21462073010922], "label": "Playing drums"}]}, "O8TIRkB99Lo": {"duration": 125.992, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=O8TIRkB99Lo", "annotations": [{"segment": [5.89663790633258, 125.99149659863946], "label": "Hanging wallpaper"}]}, "UnOzWl0EGCA": {"duration": 66.6, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UnOzWl0EGCA", "annotations": [{"segment": [14.329825273010922, 60.95367706708269], "label": "Mixing drinks"}]}, "fCE3Eo7_Nmw": {"duration": 60.396, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fCE3Eo7_Nmw", "annotations": [{"segment": [7.160729722308893, 53.045932021840876], "label": "Peeling potatoes"}]}, "UgtzVS_oeq0": {"duration": 41.1, "subset": "training", "resolution": "320x176", "url": "https://www.youtube.com/watch?v=UgtzVS_oeq0", "annotations": [{"segment": [8.711850234009361, 11.081985959438377], "label": "Smoking a cigarette"}, {"segment": [28.5056864274571, 34.91145865834633], "label": "Smoking a cigarette"}]}, "hz0W27EwjQ8": {"duration": 238.725, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hz0W27EwjQ8", "annotations": [{"segment": [7.43689575518681, 238.724352], "label": "Playing beach volleyball"}]}, "z-_snl6eaPE": {"duration": 20.573, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z-_snl6eaPE", "annotations": [{"segment": [2.1799949204368176, 19.780248028081125], "label": "Playing ice hockey"}]}, "hzpFVURhKwo": {"duration": 139.018, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hzpFVURhKwo", "annotations": [{"segment": [0.01, 139.01732426303855], "label": "Hanging wallpaper"}]}, "Z-2Gwqt_GCQ": {"duration": 164.793, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z-2Gwqt_GCQ", "annotations": [{"segment": [20.82003276131045, 148.05356630265211], "label": "Mooping floor"}]}, "SxIJ6MjcgnY": {"duration": 152.834, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SxIJ6MjcgnY", "annotations": [{"segment": [6.6760448049922, 143.77339347893917], "label": "Disc dog"}]}, "JDn95TW9WoM": {"duration": 148.585, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=JDn95TW9WoM", "annotations": [{"segment": [17.15056786271451, 38.47289547581904], "label": "Futsal"}, {"segment": [0.2, 148.36100000000002], "label": "Futsal"}, {"segment": [50.29288143525741, 110.3198689547582], "label": "Futsal"}]}, "TeLWp5sSxg0": {"duration": 186.25, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=TeLWp5sSxg0", "annotations": [{"segment": [41.54384243369735, 186.201], "label": "Removing curlers"}]}, "Hfd5sxW8Clg": {"duration": 187.246, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Hfd5sxW8Clg", "annotations": [{"segment": [35.78408736349454, 102.38630710942722], "label": "Playing rubik cube"}]}, "5UAdJuvBEOI": {"duration": 120.79, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5UAdJuvBEOI", "annotations": []}, "fjN9Qe237bw": {"duration": 99.382, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=fjN9Qe237bw", "annotations": [{"segment": [0.01, 98.824125335271], "label": "Playing pool"}]}, "wideKBXF-60": {"duration": 176.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wideKBXF-60", "annotations": [{"segment": [33.63596032276665, 55.4166231547221], "label": "Playing polo"}, {"segment": [81.88426402230888, 105.59485899173168], "label": "Playing polo"}]}, "6dNh6AxVLx8": {"duration": 117.517, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=6dNh6AxVLx8", "annotations": [{"segment": [3.482276131045242, 117.461], "label": "BMX"}]}, "I1iUbyjWy7k": {"duration": 31.86, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=I1iUbyjWy7k", "annotations": []}, "FQEGKGn9vnU": {"duration": 128.546, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FQEGKGn9vnU", "annotations": [{"segment": [0.01, 124.99768317150696], "label": "Fun sliding down"}]}, "iIhEO_NA8gk": {"duration": 221.101, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iIhEO_NA8gk", "annotations": [{"segment": [86.85058435093057, 116.8022993042016], "label": "Skiing"}]}, "jhiJUgDpwFw": {"duration": 59.792, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jhiJUgDpwFw", "annotations": [{"segment": [4.370441833582837, 59.59138321995464], "label": "Using the monkey bar"}]}, "2Jr1K1wBKfQ": {"duration": 133.49, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2Jr1K1wBKfQ", "annotations": [{"segment": [21.241984144671907, 47.27382745922082], "label": "Archery"}, {"segment": [68.09930211085994, 97.46322121123245], "label": "Archery"}]}, "zUPadKgtYHw": {"duration": 137.834, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zUPadKgtYHw", "annotations": [{"segment": [11.464630649091536, 124.39338625033763], "label": "Running a marathon"}]}, "WRX7aUqgZJ0": {"duration": 46.603, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WRX7aUqgZJ0", "annotations": [{"segment": [7.524732401540959, 42.78555573243338], "label": "Waxing skis"}]}, "_zAfwnDt4VE": {"duration": 5.83, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_zAfwnDt4VE", "annotations": [{"segment": [0.01, 5.755468067082684], "label": "Hand washing clothes"}]}, "cKFARm4dsws": {"duration": 11.123, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cKFARm4dsws", "annotations": [{"segment": [0.19999999999999998, 10.861], "label": "Swinging at the playground"}]}, "9GwsrWUq7mY": {"duration": 73.77, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9GwsrWUq7mY", "annotations": [{"segment": [1.8401497659906398, 70.38572854914197], "label": "River tubing"}]}, "RVZprJDJz1U": {"duration": 84.24, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RVZprJDJz1U", "annotations": [{"segment": [41.00390418882061, 48.889269528861156], "label": "Polishing forniture"}, {"segment": [69.39122126677067, 78.065123925117], "label": "Polishing forniture"}]}, "oEZToisRw4w": {"duration": 214.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oEZToisRw4w", "annotations": [{"segment": [29.390215288611543, 184.69078471138846], "label": "Shot put"}]}, "eaTRlHkPBfI": {"duration": 12.283, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=eaTRlHkPBfI", "annotations": []}, "OhpaFQeQtKs": {"duration": 110.179, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OhpaFQeQtKs", "annotations": [{"segment": [0.01, 56.87468174726989], "label": "Doing fencing"}]}, "gpmrwsV9skQ": {"duration": 31.069, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gpmrwsV9skQ", "annotations": [{"segment": [5.223257410296412, 7.544705148205929], "label": "Powerbocking"}, {"segment": [8.850519500780031, 10.833422776911076], "label": "Powerbocking"}, {"segment": [16.44358814352574, 19.200307332293292], "label": "Powerbocking"}, {"segment": [22.489024960998442, 25.584288611544462], "label": "Powerbocking"}, {"segment": [27.03519344773791, 29.550095163806553], "label": "Powerbocking"}]}, "5CtecK635eI": {"duration": 214.07, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=5CtecK635eI", "annotations": [{"segment": [51.09502071762871, 214.0647619047619], "label": "Clean and jerk"}]}, "bOULU-0Nloc": {"duration": 176.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bOULU-0Nloc", "annotations": [{"segment": [8.237129485179407, 125.20436817472698], "label": "Shoveling snow"}]}, "D707ZMWH7jQ": {"duration": 200.34, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D707ZMWH7jQ", "annotations": [{"segment": [21.253104767550703, 47.50694006864275], "label": "Fixing bicycle"}, {"segment": [66.25967956942277, 96.26406277067083], "label": "Fixing bicycle"}, {"segment": [110.95370871294853, 174.4004773572543], "label": "Fixing bicycle"}]}, "ciXArKhjAv8": {"duration": 7.547, "subset": "training", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=ciXArKhjAv8", "annotations": [{"segment": [0.2, 3.8485741029641183], "label": "Having an ice cream"}]}, "o2qCnRFKD7o": {"duration": 64.81, "subset": "testing", "resolution": "200x160", "url": "https://www.youtube.com/watch?v=o2qCnRFKD7o", "annotations": []}, "uub0z8wJfhU": {"duration": 80.11, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uub0z8wJfhU", "annotations": []}, "i2dFL7sGf9c": {"duration": 8.453, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=i2dFL7sGf9c", "annotations": [{"segment": [0, 6.428051033541342], "label": "Doing a powerbomb"}]}, "c8GFfLcwFrE": {"duration": 122.508, "subset": "training", "resolution": "854x270", "url": "https://www.youtube.com/watch?v=c8GFfLcwFrE", "annotations": [{"segment": [0, 122.30832199546485], "label": "Surfing"}]}, "gsfIHiBB6xE": {"duration": 85.867, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gsfIHiBB6xE", "annotations": [{"segment": [0.2, 85.621], "label": "Cutting the grass"}]}, "ONAP4ATzU8E": {"duration": 199.321, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ONAP4ATzU8E", "annotations": []}, "wuZkli4TSZQ": {"duration": 59.931, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wuZkli4TSZQ", "annotations": [{"segment": [0.2, 53.91155451713396], "label": "Hitting a pinata"}]}, "Ay6oxBYCSnU": {"duration": 39.451, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ay6oxBYCSnU", "annotations": [{"segment": [0, 39.450702], "label": "Playing ice hockey"}]}, "2u1gdf_53C0": {"duration": 73.561, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=2u1gdf_53C0", "annotations": [{"segment": [7.80361948627605, 51.18256200936037], "label": "Painting furniture"}]}, "sG3JpMuXFnU": {"duration": 126.038, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=sG3JpMuXFnU", "annotations": [{"segment": [36.163032761310454, 123.24902808112326], "label": "River tubing"}]}, "f00gW6PbWDE": {"duration": 48.019, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=f00gW6PbWDE", "annotations": [{"segment": [0.2, 3.745621294851794], "label": "Shuffleboard"}, {"segment": [8.764753829953198, 12.7351124024961], "label": "Shuffleboard"}, {"segment": [14.083536068642744, 15.881434290171606], "label": "Shuffleboard"}, {"segment": [29.06602124804992, 34.08515378315133], "label": "Shuffleboard"}]}, "UYHgieL5IKM": {"duration": 185.18, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UYHgieL5IKM", "annotations": [{"segment": [69.03073166926677, 120.44274102964118], "label": "Sharpening knives"}, {"segment": [142.682767550702, 153.65836505460217], "label": "Sharpening knives"}]}, "jafelg2mzIk": {"duration": 27.17, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jafelg2mzIk", "annotations": []}, "SiqB6_9kNQU": {"duration": 162.169, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SiqB6_9kNQU", "annotations": [{"segment": [3.540427457098284, 157.80190951638065], "label": "Riding bumper cars"}]}, "8ikOQRbeQL8": {"duration": 132.215, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=8ikOQRbeQL8", "annotations": [{"segment": [0.01, 126.02653762870514], "label": "Baton twirling"}]}, "V6Sy8zpJSuc": {"duration": 59.676, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=V6Sy8zpJSuc", "annotations": []}, "gwyB0U6g0JM": {"duration": 208.051, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gwyB0U6g0JM", "annotations": []}, "saMB0Mr1qdE": {"duration": 157.71, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=saMB0Mr1qdE", "annotations": []}, "UvIS1-hwgaQ": {"duration": 156.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UvIS1-hwgaQ", "annotations": [{"segment": [32.1660419775351, 144.25982462652107], "label": "Rock climbing"}]}, "H_xL4tssqEU": {"duration": 116.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H_xL4tssqEU", "annotations": [{"segment": [16.35104620105285, 106.46349387582], "label": "Knitting"}]}, "Spi0HgoqMs4": {"duration": 32.56, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Spi0HgoqMs4", "annotations": []}, "feio1hW6Dy0": {"duration": 112.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=feio1hW6Dy0", "annotations": [{"segment": [12.075645865834632, 47.07751794071763], "label": "Ballet"}, {"segment": [60.37822932917317, 92.05492355694227], "label": "Ballet"}, {"segment": [92.77149765990639, 108.87522932917317], "label": "Ballet"}]}, "G02h5bFOpCE": {"duration": 102.47, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G02h5bFOpCE", "annotations": [{"segment": [23.33942288299532, 75.29361765678627], "label": "Surfing"}]}, "asyXgaH1Sro": {"duration": 61.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=asyXgaH1Sro", "annotations": [{"segment": [24.99640730109204, 61.33733791575663], "label": "Brushing hair"}]}, "PxiYXUk05WA": {"duration": 198.3, "subset": "testing", "resolution": "624x360", "url": "https://www.youtube.com/watch?v=PxiYXUk05WA", "annotations": []}, "8_XQPqLdblg": {"duration": 34.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8_XQPqLdblg", "annotations": [{"segment": [1.2962492106084242, 33.16237563806552], "label": "Playing guitarra"}]}, "bVq8bimzJR4": {"duration": 89.003, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bVq8bimzJR4", "annotations": []}, "MOBJ_kpzHc0": {"duration": 189.545, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MOBJ_kpzHc0", "annotations": [{"segment": [0.01, 189.54448979591837], "label": "Making a lemonade"}]}, "-fLJHBqizNM": {"duration": 72.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-fLJHBqizNM", "annotations": [{"segment": [8.761310452418098, 57.73478939157566], "label": "Using parallel bars"}]}, "XFlWVBU4z9E": {"duration": 87.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XFlWVBU4z9E", "annotations": [{"segment": [86.10509665522622, 87.887527], "label": "Drinking coffee"}]}, "74HRnmwIhfM": {"duration": 213.79, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=74HRnmwIhfM", "annotations": [{"segment": [18.66993138, 191.4400797365228], "label": "Preparing pasta"}]}, "31TT2oiYRO4": {"duration": 217.53, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=31TT2oiYRO4", "annotations": [{"segment": [4.748861154446178, 213.35954758190329], "label": "Zumba"}]}, "wZ-teiWX4mg": {"duration": 115.497, "subset": "training", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=wZ-teiWX4mg", "annotations": [{"segment": [10.807581903276132, 22.15554290171607], "label": "Playing ice hockey"}, {"segment": [26.47857566302652, 63.76473322932917], "label": "Playing ice hockey"}]}, "yqOW5LBA81w": {"duration": 67.156, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yqOW5LBA81w", "annotations": [{"segment": [0.01, 66.05211137457874], "label": "Trimming branches or hedges"}]}, "GqWH2IYPw6U": {"duration": 97.52000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GqWH2IYPw6U", "annotations": [{"segment": [4.27345786, 92.24000976911076], "label": "Playing violin"}]}, "UFfi0gxL6_Y": {"duration": 158.99, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UFfi0gxL6_Y", "annotations": [{"segment": [16.81349200320292, 154.02586271138844], "label": "Playing violin"}]}, "jDlvGWOdcqU": {"duration": 180.118, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jDlvGWOdcqU", "annotations": []}, "4Lbvc9rA4LU": {"duration": 231.178, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4Lbvc9rA4LU", "annotations": []}, "WNd6SHMi30M": {"duration": 51.2, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WNd6SHMi30M", "annotations": [{"segment": [6.864034321372855, 11.09853198127925], "label": "Hand car wash"}, {"segment": [14.531890795631826, 16.767566302652106], "label": "Hand car wash"}, {"segment": [20.20092511700468, 29.223472698907955], "label": "Hand car wash"}]}, "UvOEuhS0V3E": {"duration": 8.17, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=UvOEuhS0V3E", "annotations": []}, "Y8-7fr5bv24": {"duration": 195.025, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y8-7fr5bv24", "annotations": [{"segment": [14.299760931933875, 16.125262327499904], "label": "Braiding hair"}, {"segment": [23.123017677169674, 27.3825209334904], "label": "Braiding hair"}, {"segment": [29.512272561650764, 118.96184094438608], "label": "Braiding hair"}, {"segment": [135.08710327188598, 157.60162048386698], "label": "Braiding hair"}]}, "wG0OD99mjdY": {"duration": 119.24000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wG0OD99mjdY", "annotations": [{"segment": [2.9762113198127924, 11.90484527925117], "label": "Rock climbing"}, {"segment": [15.997135843993759, 21.205505653666144], "label": "Rock climbing"}, {"segment": [26.599888670826832, 61.75638488611545], "label": "Rock climbing"}]}, "R74yolowFPs": {"duration": 185.11, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=R74yolowFPs", "annotations": []}, "fruimvo90vA": {"duration": 36.99, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fruimvo90vA", "annotations": [{"segment": [0.01, 8.586909516380656], "label": "Triple jump"}]}, "b993qWuMRBA": {"duration": 490.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b993qWuMRBA", "annotations": [{"segment": [149.1129407176287, 329.5778330733229], "label": "Removing ice from car"}]}, "UH9qJ4Y6ENA": {"duration": 198.252, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UH9qJ4Y6ENA", "annotations": [{"segment": [4.329319812792512, 68.03216848673947], "label": "Doing fencing"}, {"segment": [81.94783931357254, 107.30528393135725], "label": "Doing fencing"}, {"segment": [130.18883151326054, 170.0804212168487], "label": "Doing fencing"}]}, "bphckj0BV5U": {"duration": 70.28999999999999, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bphckj0BV5U", "annotations": []}, "BAtlo_AGFWs": {"duration": 81.97, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=BAtlo_AGFWs", "annotations": []}, "M679ctbf49M": {"duration": 198.09, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M679ctbf49M", "annotations": []}, "n4apOkL24BE": {"duration": 108.07, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n4apOkL24BE", "annotations": [{"segment": [42.99025173251828, 56.47738953095539], "label": "Hand washing clothes"}, {"segment": [79.06834416536661, 92.89266020280812], "label": "Hand washing clothes"}]}, "aB5xErksFkI": {"duration": 15.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aB5xErksFkI", "annotations": [{"segment": [0.8916661466458659, 14.102404056162248], "label": "Laying tile"}]}, "2BQsHMTk_VQ": {"duration": 119.165, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2BQsHMTk_VQ", "annotations": []}, "wnkihU1fO5w": {"duration": 85.22, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wnkihU1fO5w", "annotations": []}, "Co_cpZWbzFk": {"duration": 57.958, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=Co_cpZWbzFk", "annotations": []}, "KMz8f9vDK38": {"duration": 131.634, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KMz8f9vDK38", "annotations": [{"segment": [0, 131.4338321995465], "label": "Playing ice hockey"}]}, "Nqh3RtLRleU": {"duration": 62.578, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Nqh3RtLRleU", "annotations": [{"segment": [0.01, 4.226497656385476], "label": "Running a marathon"}, {"segment": [25.45636993052878, 31.88376245389326], "label": "Running a marathon"}, {"segment": [38.70069391806772, 47.07578114548205], "label": "Running a marathon"}]}, "bEt8fGREAAA": {"duration": 121.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bEt8fGREAAA", "annotations": [{"segment": [61.53072654, 115.61720717082683], "label": "Playing saxophone"}]}, "pev7rvOE8eM": {"duration": 619.393, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pev7rvOE8eM", "annotations": [{"segment": [1.4494359366211385, 148.32561084756315], "label": "Removing ice from car"}, {"segment": [187.94352644854092, 238.19063891807374], "label": "Removing ice from car"}]}, "2_H3lxMWvPo": {"duration": 137.857, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2_H3lxMWvPo", "annotations": []}, "J98U-PGh1bQ": {"duration": 27.795, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J98U-PGh1bQ", "annotations": [{"segment": [1.0406316131045241, 27.706816698907954], "label": "Mooping floor"}]}, "fU4EgYmISro": {"duration": 32.16, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=fU4EgYmISro", "annotations": [{"segment": [0.01, 32.05929491887675], "label": "Playing badminton"}]}, "iYlglcAh34E": {"duration": 144.498, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iYlglcAh34E", "annotations": []}, "N_FbnDY_2j8": {"duration": 30.186, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=N_FbnDY_2j8", "annotations": [{"segment": [0.01, 30.132812221218124], "label": "Snow tubing"}]}, "-AaOr1DI2no": {"duration": 113.847, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-AaOr1DI2no", "annotations": [{"segment": [2.131309254290172, 110.1176448049922], "label": "Hanging wallpaper"}]}, "1AxGiLSmGZo": {"duration": 60.16, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=1AxGiLSmGZo", "annotations": [{"segment": [0.01, 56.11544461778471], "label": "Windsurfing"}]}, "nobV1wL__iY": {"duration": 157.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nobV1wL__iY", "annotations": [{"segment": [0.49214556337355536, 157.73265306122448], "label": "Paintball"}]}, "pv73L2pQX1s": {"duration": 66.015, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pv73L2pQX1s", "annotations": [{"segment": [0.2, 65.761], "label": "Welding"}]}, "msz1VkByvuU": {"duration": 130.89, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=msz1VkByvuU", "annotations": []}, "CB41iJu4ZVY": {"duration": 95.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CB41iJu4ZVY", "annotations": [{"segment": [11.062061794036387, 79.36472384945398], "label": "Using the balance beam"}]}, "SnBTp7iV2kE": {"duration": 146.751, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=SnBTp7iV2kE", "annotations": [{"segment": [0.010000000000000002, 138.5082960374415], "label": "Playing pool"}]}, "Czw85LWCGes": {"duration": 56.053, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Czw85LWCGes", "annotations": [{"segment": [0.2, 50.3213728549142], "label": "Welding"}]}, "H7EmlU0wQ6g": {"duration": 22.62, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=H7EmlU0wQ6g", "annotations": [{"segment": [0.4323409462685035, 21.850830466517724], "label": "Vacuuming floor"}]}, "myGBEXiYGOU": {"duration": 203.76, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=myGBEXiYGOU", "annotations": []}, "sARnRvNdl-Y": {"duration": 126.22, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sARnRvNdl-Y", "annotations": [{"segment": [2.4121263045432784, 111.88691913943424], "label": "Playing guitarra"}]}, "f0cxC1B5cbw": {"duration": 228.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f0cxC1B5cbw", "annotations": [{"segment": [14.983778471138846, 59.578357254290175], "label": "Raking leaves"}]}, "8qbx7yfwTNI": {"duration": 212.533, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=8qbx7yfwTNI", "annotations": []}, "bnkO0slLSyw": {"duration": 221.52, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bnkO0slLSyw", "annotations": [{"segment": [30.750356121684867, 60.46418338533541], "label": "Tango"}, {"segment": [61.579788820000005, 213.17943513572544], "label": "Tango"}]}, "7eR0DyDg7wQ": {"duration": 53.29, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=7eR0DyDg7wQ", "annotations": [{"segment": [41.52964118564743, 53.241], "label": "Drinking coffee"}]}, "FfYNkePtHjo": {"duration": 154.79, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FfYNkePtHjo", "annotations": [{"segment": [4.103780533541342, 151.11568317628706], "label": "Playing badminton"}]}, "QBqfrJzcrns": {"duration": 69.43, "subset": "validation", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=QBqfrJzcrns", "annotations": [{"segment": [0, 3.249921996879875], "label": "Ironing clothes"}]}, "SsW-IeF0bzE": {"duration": 235.404, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SsW-IeF0bzE", "annotations": [{"segment": [8.997496966545327, 198.12855565106958], "label": "Riding bumper cars"}]}, "GGSY1Qvo990": {"duration": 18.16, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GGSY1Qvo990", "annotations": [{"segment": [5.823597503900156, 16.198647425897036], "label": "Clean and jerk"}]}, "gXp3KSWhf1g": {"duration": 234.452, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gXp3KSWhf1g", "annotations": [{"segment": [0.2, 16.089928237129488], "label": "Running a marathon"}, {"segment": [87.39756474258971, 212.84402808112324], "label": "Running a marathon"}]}, "9dhczBPiZoI": {"duration": 93.86, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9dhczBPiZoI", "annotations": []}, "CGO3djF-9cw": {"duration": 192.053, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CGO3djF-9cw", "annotations": []}, "p5ic2-xLyZg": {"duration": 151.255, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p5ic2-xLyZg", "annotations": []}, "nywOe9C3ajM": {"duration": 53.2, "subset": "testing", "resolution": "640x272", "url": "https://www.youtube.com/watch?v=nywOe9C3ajM", "annotations": []}, "5oyenoaVB-w": {"duration": 47.07, "subset": "testing", "resolution": "450x360", "url": "https://www.youtube.com/watch?v=5oyenoaVB-w", "annotations": []}, "Z90-HZ_geSI": {"duration": 108.252, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z90-HZ_geSI", "annotations": [{"segment": [9.28834410519278, 63.66737686650324], "label": "Grooming dog"}]}, "ByF8Pg3xXNA": {"duration": 20.25, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ByF8Pg3xXNA", "annotations": [{"segment": [0.10321734897740682, 20.098707327170555], "label": "Hand washing clothes"}]}, "hV_Ud3zrAnA": {"duration": 92.323, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hV_Ud3zrAnA", "annotations": [{"segment": [0.2, 84.80652647975079], "label": "Rope skipping"}]}, "Cx1614E1Mgk": {"duration": 122.254, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Cx1614E1Mgk", "annotations": [{"segment": [0.2, 110.78034579439252], "label": "Playing congas"}]}, "qyu0tJDF8Ic": {"duration": 24.01, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qyu0tJDF8Ic", "annotations": []}, "dIlpPraDc-I": {"duration": 96.596, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dIlpPraDc-I", "annotations": [{"segment": [5.877076952465854, 96.595034], "label": "Skiing"}]}, "qU4GmCN2e1Y": {"duration": 48.51, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qU4GmCN2e1Y", "annotations": []}, "S9alQwrQ-oo": {"duration": 148.7, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=S9alQwrQ-oo", "annotations": [{"segment": [9.333899, 105.51638065522621], "label": "Doing nails"}]}, "B3eCRzEMtrI": {"duration": 162.63, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=B3eCRzEMtrI", "annotations": []}, "LK1LJ63X81E": {"duration": 89.44, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LK1LJ63X81E", "annotations": []}, "xXDNNCl1m6c": {"duration": 54.707, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xXDNNCl1m6c", "annotations": [{"segment": [11.878507331891853, 47.3265632571255], "label": "Removing ice from car"}]}, "4rv1uLmbyHA": {"duration": 116.05, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4rv1uLmbyHA", "annotations": [{"segment": [41.09831382370942, 106.27625476131045], "label": "Preparing pasta"}]}, "XhqFJNRt-5g": {"duration": 30.953, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XhqFJNRt-5g", "annotations": [{"segment": [6.03590266059622, 24.674768606864273], "label": "Calf roping"}]}, "Vx6vP1oxiAg": {"duration": 19.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Vx6vP1oxiAg", "annotations": [{"segment": [0.19999999999999998, 17.016734789391574], "label": "Camel ride"}]}, "-vKXPND_mD8": {"duration": 8.081, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-vKXPND_mD8", "annotations": [{"segment": [0.2, 7.880544217687076], "label": "Camel ride"}]}, "xpxxH2V7Fe8": {"duration": 151.511, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xpxxH2V7Fe8", "annotations": [{"segment": [0.01, 151.461], "label": "Drum corps"}]}, "CxUFit4ek3U": {"duration": 152.857, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=CxUFit4ek3U", "annotations": []}, "nOh81Wzig8U": {"duration": 225.09, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nOh81Wzig8U", "annotations": []}, "uJ4OZfCjViM": {"duration": 58.677, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uJ4OZfCjViM", "annotations": []}, "tdJS3WJjeVo": {"duration": 175.125, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tdJS3WJjeVo", "annotations": []}, "o1DCCTev0CQ": {"duration": 215.087, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o1DCCTev0CQ", "annotations": [{"segment": [0.01, 215.041], "label": "Hanging wallpaper"}]}, "YqairWJU2Vw": {"duration": 102.89, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YqairWJU2Vw", "annotations": [{"segment": [12.910260760000002, 45.30629045], "label": "Discus throw"}]}, "fKmsf8B7pDo": {"duration": 156.34, "subset": "testing", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=fKmsf8B7pDo", "annotations": []}, "1cCRZztswFA": {"duration": 175.752, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1cCRZztswFA", "annotations": [{"segment": [0.01, 155.30667601687085], "label": "Baton twirling"}]}, "aOTtBZynDOQ": {"duration": 79.67, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aOTtBZynDOQ", "annotations": [{"segment": [9.321489430577223, 78.17622469110765], "label": "Javelin throw"}]}, "cr9VTwfM_2w": {"duration": 74.328, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cr9VTwfM_2w", "annotations": [{"segment": [0.2, 53.436444617784716], "label": "Longboarding"}, {"segment": [58.304840873634944, 69.54851794071763], "label": "Longboarding"}]}, "BFChXuwZ4wo": {"duration": 209.14, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=BFChXuwZ4wo", "annotations": []}, "VejaBBey5DU": {"duration": 63.321, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VejaBBey5DU", "annotations": []}, "hocTbE5f0uY": {"duration": 183.856, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hocTbE5f0uY", "annotations": []}, "KaGXQh-UHVU": {"duration": 121.789, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KaGXQh-UHVU", "annotations": [{"segment": [0.01, 19.75978293553511], "label": "Mooping floor"}, {"segment": [38.75957421970348, 107.15882272698909], "label": "Mooping floor"}]}, "HysxcMFrTkc": {"duration": 201.689, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HysxcMFrTkc", "annotations": [{"segment": [19.193447879411774, 188.78801092043682], "label": "Rope skipping"}]}, "YK5n3YTf8pI": {"duration": 60.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YK5n3YTf8pI", "annotations": [{"segment": [25.09901165, 54.16840008], "label": "Clean and jerk"}]}, "7EZM1DRIOHE": {"duration": 78.48, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=7EZM1DRIOHE", "annotations": []}, "WVxP7f_OyEs": {"duration": 175.264, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WVxP7f_OyEs", "annotations": [{"segment": [19.352654403518795, 40.06817624522593], "label": "Wakeboarding"}, {"segment": [74.13975822171791, 90.22154491462214], "label": "Wakeboarding"}, {"segment": [95.40042537504893, 101.39702380291152], "label": "Wakeboarding"}, {"segment": [113.93536597026058, 119.11424643068736], "label": "Wakeboarding"}, {"segment": [149.09723713480605, 157.0018440772633], "label": "Wakeboarding"}]}, "niqc-dW54ic": {"duration": 77.03999999999999, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=niqc-dW54ic", "annotations": [{"segment": [58.44506449609984, 73.92640601560062], "label": "Using the balance beam"}]}, "G25py7yEVyo": {"duration": 88.03, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=G25py7yEVyo", "annotations": []}, "c2VcfB3YTBo": {"duration": 147.42000000000002, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c2VcfB3YTBo", "annotations": [{"segment": [0.01, 128.33391858346334], "label": "Tango"}]}, "UuJwtJBJ7oU": {"duration": 159.289, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UuJwtJBJ7oU", "annotations": [{"segment": [13.89435790931118, 148.8681204569055], "label": "Playing congas"}]}, "YTdLk7Nsn_k": {"duration": 120.164, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YTdLk7Nsn_k", "annotations": [{"segment": [41.414572542901716, 95.75948673946957], "label": "Assembling bicycle"}]}, "jQU4NYJzxco": {"duration": 71.91, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=jQU4NYJzxco", "annotations": [{"segment": [0.7853128767550702, 66.6394069703588], "label": "Washing dishes"}]}, "X2zoUDI1Gmc": {"duration": 211.325, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X2zoUDI1Gmc", "annotations": [{"segment": [78.61414181879917, 101.61995284523775], "label": "Blow-drying hair"}, {"segment": [126.59769053108533, 149.27484711428906], "label": "Blow-drying hair"}, {"segment": [126.59769053108533, 148.94619267105423], "label": "Blow-drying hair"}, {"segment": [161.43506151397804, 191.34261584834815], "label": "Blow-drying hair"}]}, "KPJJfGK5NCw": {"duration": 128.52, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=KPJJfGK5NCw", "annotations": [{"segment": [17.440853354134166, 126.29583463338534], "label": "Shot put"}]}, "oO3iQ35xcTs": {"duration": 35.34, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=oO3iQ35xcTs", "annotations": []}, "fkgojk18K0E": {"duration": 96.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fkgojk18K0E", "annotations": [{"segment": [36.67334112, 87.9552936], "label": "Belly dance"}]}, "Ujgmih4OtMs": {"duration": 197.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ujgmih4OtMs", "annotations": [{"segment": [24.914156911076443, 58.1330327925117], "label": "Archery"}, {"segment": [83.04718970358813, 190.393371950078], "label": "Archery"}]}, "h0p_7Q2Bucc": {"duration": 81.34, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h0p_7Q2Bucc", "annotations": [{"segment": [2.06113026, 73.49381145085803], "label": "Plataform diving"}]}, "Cy56IkwsQVs": {"duration": 237.657, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=Cy56IkwsQVs", "annotations": [{"segment": [0.01, 233.73231543650257], "label": "Beer pong"}]}, "Tc0nHNkf0KM": {"duration": 27.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Tc0nHNkf0KM", "annotations": [{"segment": [4.357961494539782, 18.362672705148206], "label": "Washing hands"}]}, "2EgLTpVOLys": {"duration": 49.342, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2EgLTpVOLys", "annotations": []}, "dD6-HmQtMkw": {"duration": 187.2, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=dD6-HmQtMkw", "annotations": []}, "plE3KNmuwj4": {"duration": 157.618, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=plE3KNmuwj4", "annotations": [{"segment": [12.25631848788465, 46.08376783776456], "label": "Wakeboarding"}, {"segment": [49.76066450622977, 106.62999964515834], "label": "Wakeboarding"}, {"segment": [117.41556182420854, 127.46574593307324], "label": "Wakeboarding"}]}, "zM1-aiWdPvY": {"duration": 72.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zM1-aiWdPvY", "annotations": [{"segment": [24.354607802058496, 56.371454864363784], "label": "Grooming horse"}]}, "Q6tYENILn40": {"duration": 31.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q6tYENILn40", "annotations": [{"segment": [1.00060905, 15.4953983], "label": "Playing piano"}, {"segment": [17.163395599999998, 29.68510111076443], "label": "Playing piano"}]}, "C7rGK81C7SU": {"duration": 238.957, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C7rGK81C7SU", "annotations": [{"segment": [5.963207488299532, 238.921], "label": "Making a cake"}]}, "WHchTZ61VT4": {"duration": 150.79, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WHchTZ61VT4", "annotations": [{"segment": [0.01, 103.83792878705147], "label": "Bathing dog"}, {"segment": [121.20291574804992, 150.218333], "label": "Bathing dog"}]}, "ZWHFbIjh0N8": {"duration": 187.943, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZWHFbIjh0N8", "annotations": [{"segment": [10.262033471138846, 176.21377574014525], "label": "Playing beach volleyball"}]}, "OwaZOx15_x0": {"duration": 108.322, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OwaZOx15_x0", "annotations": []}, "qgasVDGUw3E": {"duration": 172.431, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qgasVDGUw3E", "annotations": [{"segment": [12.603845070363553, 156.87769181789412], "label": "Fixing the roof"}]}, "GnLUmMkyvCo": {"duration": 176.43, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GnLUmMkyvCo", "annotations": [{"segment": [5.225988029641186, 164.20604493135724], "label": "Hammer throw"}]}, "jvBTmCNtVpE": {"duration": 51.43, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jvBTmCNtVpE", "annotations": []}, "qCZhlIBr8_Q": {"duration": 67.291, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qCZhlIBr8_Q", "annotations": [{"segment": [0.01, 63.61671810608425], "label": "Drum corps"}]}, "-3JfzLB02vE": {"duration": 162.424, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=-3JfzLB02vE", "annotations": []}, "BryW4niZT9Y": {"duration": 111.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BryW4niZT9Y", "annotations": [{"segment": [0.01, 43.06238845553822], "label": "Tennis serve with ball bouncing"}, {"segment": [48.815663026521065, 90.1346349453978], "label": "Tennis serve with ball bouncing"}]}, "ztD4lVJATws": {"duration": 14.165, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ztD4lVJATws", "annotations": [{"segment": [0, 13.932153581903277], "label": "Kneeling"}]}, "moqRG7uwZ4Q": {"duration": 104.257, "subset": "training", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=moqRG7uwZ4Q", "annotations": [{"segment": [22.120176374414978, 85.22773838377535], "label": "Blowing leaves"}]}, "JMJp9kFudNA": {"duration": 156.711, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JMJp9kFudNA", "annotations": [{"segment": [25.670299383403908, 37.4052933872457], "label": "Disc dog"}, {"segment": [42.53935326392648, 48.89580834934078], "label": "Disc dog"}, {"segment": [52.31851493379464, 59.89736522794245], "label": "Disc dog"}, {"segment": [66.98725743859687, 82.14495802689251], "label": "Disc dog"}, {"segment": [94.124431072481, 111.72692207824369], "label": "Disc dog"}, {"segment": [121.01712566461842, 144.487112049922], "label": "Disc dog"}]}, "OWEwmnZvvJE": {"duration": 151.975, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=OWEwmnZvvJE", "annotations": [{"segment": [13.277020528440186, 138.46035591887676], "label": "Swimming"}]}, "2SCc8b-lYmc": {"duration": 4.46, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=2SCc8b-lYmc", "annotations": []}, "O-upcCp0jIs": {"duration": 112.083, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O-upcCp0jIs", "annotations": [{"segment": [16.786179594383775, 102.99020605304212], "label": "Installing carpet"}]}, "JlgDwIT9KLI": {"duration": 109.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JlgDwIT9KLI", "annotations": [{"segment": [0.01, 88.35674882995319], "label": "Making a sandwich"}]}, "di_PcoYYnro": {"duration": 60.488, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=di_PcoYYnro", "annotations": []}, "i_Fs0Qrtu90": {"duration": 224.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=i_Fs0Qrtu90", "annotations": [{"segment": [106.08264496022018, 132.29746661431082], "label": "Washing hands"}]}, "d79uK3AhtTU": {"duration": 186.27, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=d79uK3AhtTU", "annotations": [{"segment": [59.151901200000005, 174.93696346021844], "label": "Bungee jumping"}]}, "N5J9Zb07uuQ": {"duration": 176.333, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N5J9Zb07uuQ", "annotations": [{"segment": [29.68753198127925, 74.21882995319812], "label": "Hanging wallpaper"}]}, "GlvfqDfvbKA": {"duration": 77.857, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GlvfqDfvbKA", "annotations": [{"segment": [11.409195007800312, 75.99991575663026], "label": "Playing drums"}]}, "8fZbv6OUEm8": {"duration": 64.111, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=8fZbv6OUEm8", "annotations": [{"segment": [4.597201248049923, 60.0821856474259], "label": "Beach soccer"}]}, "CAW0CEuyvZo": {"duration": 3.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CAW0CEuyvZo", "annotations": [{"segment": [0.01, 3.924171], "label": "Putting on makeup"}]}, "kKxNVwk7Evc": {"duration": 88.631, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kKxNVwk7Evc", "annotations": [{"segment": [36.745684867394694, 87.888], "label": "Drum corps"}]}, "NLkJgnrKaKM": {"duration": 17.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NLkJgnrKaKM", "annotations": [{"segment": [0, 17.925804], "label": "Smoking hookah"}]}, "vCaOS3TAjbI": {"duration": 178.56, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vCaOS3TAjbI", "annotations": []}, "Zr8cz8QrBp4": {"duration": 84.5, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zr8cz8QrBp4", "annotations": [{"segment": [2.89401248049922, 84.321], "label": "Breakdancing"}]}, "esZMkzlZiuY": {"duration": 64.95, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=esZMkzlZiuY", "annotations": [{"segment": [18.60962684, 57.344720159999994], "label": "Playing water polo"}]}, "Xueo66Jb8T4": {"duration": 165.327, "subset": "training", "resolution": "853x474", "url": "https://www.youtube.com/watch?v=Xueo66Jb8T4", "annotations": [{"segment": [19.601843644305774, 97.23546123556943], "label": "Roof shingle removal"}]}, "4mBVik8dq_w": {"duration": 23.04, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4mBVik8dq_w", "annotations": [{"segment": [0.8804021107644305, 20.1055094274571], "label": "Mowing the lawn"}]}, "g-rw2Kyh9xo": {"duration": 238.91, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g-rw2Kyh9xo", "annotations": [{"segment": [31.73289250619364, 237.1509327304927], "label": "Windsurfing"}]}, "A3a6MNgab0c": {"duration": 95.67, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A3a6MNgab0c", "annotations": [{"segment": [44.367414196567864, 47.35009750390015], "label": "Archery"}, {"segment": [70.61502730109204, 78.37000390015601], "label": "Archery"}, {"segment": [87.4671879875195, 89.70420046801873], "label": "Archery"}]}, "B5uwLKdF8fY": {"duration": 195.164, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B5uwLKdF8fY", "annotations": []}, "CNSVakEgGxc": {"duration": 34.99, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CNSVakEgGxc", "annotations": []}, "G-HRgYT6CCY": {"duration": 129.15, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G-HRgYT6CCY", "annotations": []}, "Ja8QImLWYII": {"duration": 91.069, "subset": "validation", "resolution": "854x270", "url": "https://www.youtube.com/watch?v=Ja8QImLWYII", "annotations": [{"segment": [8.734822932917316, 18.96095709828393], "label": "Hand car wash"}]}, "ZkGN3SMxio4": {"duration": 24.103, "subset": "testing", "resolution": "544x360", "url": "https://www.youtube.com/watch?v=ZkGN3SMxio4", "annotations": []}, "e5_lP2HgtSE": {"duration": 22.2, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=e5_lP2HgtSE", "annotations": [{"segment": [9.835117160686426, 21.921229446177847], "label": "Walking the dog"}]}, "30hcRe-5_3M": {"duration": 35.272, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=30hcRe-5_3M", "annotations": []}, "4XfNexN6t04": {"duration": 37.826, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4XfNexN6t04", "annotations": [{"segment": [6.78613116224649, 27.734623010920437], "label": "Playing congas"}]}, "P9I3zxNUu74": {"duration": 171.921, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P9I3zxNUu74", "annotations": [{"segment": [20.920128477379098, 88.24002909048363], "label": "Decorating the Christmas tree"}]}, "OUxgs_QlPHE": {"duration": 187.39, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=OUxgs_QlPHE", "annotations": []}, "PWriIL3pTWY": {"duration": 98.523, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PWriIL3pTWY", "annotations": [{"segment": [18.286457098283933, 27.81385491419657], "label": "Rock-paper-scissors"}]}, "6-beYw2R10s": {"duration": 48.95, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6-beYw2R10s", "annotations": [{"segment": [10.22267394695788, 38.75461466458659], "label": "Bathing dog"}]}, "0CTnYEE7rdo": {"duration": 148.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0CTnYEE7rdo", "annotations": [{"segment": [1.8509310327613104, 6.940991372854914], "label": "Skateboarding"}, {"segment": [16.88974567394696, 32.159926694227764], "label": "Skateboarding"}, {"segment": [43.03414651170046, 57.147495636505454], "label": "Skateboarding"}, {"segment": [106.65990076287052, 113.1381593775351], "label": "Skateboarding"}]}, "3qkNnr1_78I": {"duration": 131.147, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3qkNnr1_78I", "annotations": [{"segment": [7.979260296942489, 16.163117011755297], "label": "Slacklining"}, {"segment": [89.61322975975038, 108.0269071076443], "label": "Slacklining"}]}, "iY7bZQnHXlk": {"duration": 205.27, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iY7bZQnHXlk", "annotations": [{"segment": [1.8494286400000002, 182.00573263806552], "label": "Washing dishes"}]}, "1U0VxGw1cdA": {"duration": 29.05, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1U0VxGw1cdA", "annotations": [{"segment": [1.4058814352574103, 27.34666146645866], "label": "Playing lacrosse"}]}, "zcDA0s8eWU4": {"duration": 137.57999999999998, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zcDA0s8eWU4", "annotations": [{"segment": [19.619541341653665, 68.722], "label": "Chopping wood"}]}, "dYi4CDe8gHo": {"duration": 101.12, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dYi4CDe8gHo", "annotations": []}, "n--fgqwuTTI": {"duration": 170.574, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n--fgqwuTTI", "annotations": [{"segment": [17.508344996306473, 161.02375044829074], "label": "Assembling bicycle"}]}, "D9A_9FH7e-A": {"duration": 168.21, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D9A_9FH7e-A", "annotations": [{"segment": [19.418339538221527, 153.7722563432137], "label": "Doing nails"}]}, "1iUkXSE73W0": {"duration": 50.97, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1iUkXSE73W0", "annotations": []}, "dJAZplo9ke0": {"duration": 30.697, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dJAZplo9ke0", "annotations": [{"segment": [0.01, 30.696779], "label": "River tubing"}]}, "m8LmntdRXNI": {"duration": 165.791, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=m8LmntdRXNI", "annotations": []}, "gwbRqyRZguM": {"duration": 25.589, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gwbRqyRZguM", "annotations": [{"segment": [3.403765513153005, 21.219727541190387], "label": "Swinging at the playground"}]}, "mpFNy97oV0c": {"duration": 234.1, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=mpFNy97oV0c", "annotations": [{"segment": [17.30489376, 218.90323130577224], "label": "Wrapping presents"}]}, "g0ns-bQ4PjE": {"duration": 180.03, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g0ns-bQ4PjE", "annotations": [{"segment": [0.3568878039473893, 176.73584279304396], "label": "Belly dance"}]}, "m7wfcL6YSMY": {"duration": 43.399, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m7wfcL6YSMY", "annotations": [{"segment": [1.28637096099844, 43.398094], "label": "Camel ride"}]}, "TY4-gh8DV04": {"duration": 32.229, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=TY4-gh8DV04", "annotations": [{"segment": [3.3436009110764426, 29.790729921996878], "label": "Using the rowing machine"}]}, "I0yNAIWHcQQ": {"duration": 37.269, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=I0yNAIWHcQQ", "annotations": [{"segment": [0.2, 37.06802721088435], "label": "Swinging at the playground"}]}, "xpA16yFuDqw": {"duration": 111.062, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xpA16yFuDqw", "annotations": [{"segment": [0.01, 111.06097505668934], "label": "Swimming"}]}, "B2qCBkXrb8s": {"duration": 68.894, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B2qCBkXrb8s", "annotations": [{"segment": [0.01, 61.12622308892357], "label": "Sumo"}]}, "PmmKHLmG5Ec": {"duration": 98.081, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PmmKHLmG5Ec", "annotations": [{"segment": [6.7298034321372855, 66.5502496099844], "label": "Tug of war"}]}, "fm4ZaId1sL0": {"duration": 15.21, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=fm4ZaId1sL0", "annotations": [{"segment": [0.7118129485179407, 3.582791840873635], "label": "Ironing clothes"}, {"segment": [5.40977840873635, 10.345014851794073], "label": "Ironing clothes"}]}, "U2-GFZxIufw": {"duration": 65.876, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U2-GFZxIufw", "annotations": [{"segment": [27.70429143620065, 37.349488330218065], "label": "Waterskiing"}]}, "RoHYo2x9gbc": {"duration": 141.224, "subset": "training", "resolution": "632x360", "url": "https://www.youtube.com/watch?v=RoHYo2x9gbc", "annotations": [{"segment": [0, 141.223695], "label": "Cutting the grass"}]}, "x6pYeqq3O08": {"duration": 14.397, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x6pYeqq3O08", "annotations": []}, "DBGea9pST1A": {"duration": 60.44, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DBGea9pST1A", "annotations": [{"segment": [3.7717016536661467, 44.12890934789392], "label": "Shoveling snow"}]}, "QokthYjtPzM": {"duration": 112.9, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QokthYjtPzM", "annotations": [{"segment": [2.2432236499999996, 67.62297605], "label": "Doing nails"}]}, "SHbvBTSGwYM": {"duration": 191.542, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=SHbvBTSGwYM", "annotations": []}, "dG_jxrIaK6w": {"duration": 80.16, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dG_jxrIaK6w", "annotations": [{"segment": [12.504724406663343, 77.90443291092043], "label": "Playing piano"}]}, "pDuceHUHFt0": {"duration": 188.105, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pDuceHUHFt0", "annotations": []}, "gkhgAlVSKTI": {"duration": 124.88, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gkhgAlVSKTI", "annotations": [{"segment": [0.01, 124.48699067146359], "label": "Playing bagpipes"}]}, "4EQYvkfMUWw": {"duration": 58.93, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4EQYvkfMUWw", "annotations": [{"segment": [15.748480060842434, 55.877260157566305], "label": "Playing violin"}]}, "twQbAuWVxlU": {"duration": 19.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=twQbAuWVxlU", "annotations": [{"segment": [0.01, 19.57466711076443], "label": "Walking the dog"}]}, "Aj5ncX5G_2w": {"duration": 230.11, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Aj5ncX5G_2w", "annotations": []}, "lcwR7eA-VOY": {"duration": 179.629, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lcwR7eA-VOY", "annotations": [{"segment": [80.42668350543545, 139.55570769734788], "label": "Arm wrestling"}]}, "veKNTB-N9EQ": {"duration": 151.348, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=veKNTB-N9EQ", "annotations": [{"segment": [109.79192040561624, 115.22248851170048], "label": "Hand car wash"}]}, "ak5mpw8komA": {"duration": 78.35, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ak5mpw8komA", "annotations": [{"segment": [5.520240526371657, 52.23392970792825], "label": "Walking the dog"}]}, "3VAq3wYxnMs": {"duration": 109.32, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3VAq3wYxnMs", "annotations": [{"segment": [59.64742589703587, 89.47113884555382], "label": "Shaving legs"}, {"segment": [94.92461778471139, 99.52599063962558], "label": "Shaving legs"}]}, "eF1fbnVguUc": {"duration": 23.847, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=eF1fbnVguUc", "annotations": []}, "Qe33o8ezVfI": {"duration": 236.24, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Qe33o8ezVfI", "annotations": []}, "IpFsz2xc3sY": {"duration": 236.22, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=IpFsz2xc3sY", "annotations": [{"segment": [35.734285491419655, 216.98447581903275], "label": "Removing curlers"}]}, "5t_hQVg9NsE": {"duration": 75.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5t_hQVg9NsE", "annotations": [{"segment": [28.464752580000003, 68.02374498], "label": "Springboard diving"}]}, "b2eonKWCocE": {"duration": 156.782, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=b2eonKWCocE", "annotations": [{"segment": [0.2, 144.5707651118599], "label": "Raking leaves"}]}, "Ix2vIutdeNQ": {"duration": 45.233, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Ix2vIutdeNQ", "annotations": [{"segment": [15.594970726720227, 28.790714134165366], "label": "Gargling mouthwash"}]}, "kF7p6irju1Q": {"duration": 109.99000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kF7p6irju1Q", "annotations": [{"segment": [1.0295749625903405, 7.721812137285491], "label": "Ballet"}, {"segment": [25.224586315132605, 107.76217827145086], "label": "Ballet"}]}, "ICM-GjDa9QY": {"duration": 126.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ICM-GjDa9QY", "annotations": [{"segment": [15.598001560062404, 112.34509984399378], "label": "Cleaning windows"}]}, "Z9o_h64qVeo": {"duration": 165.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z9o_h64qVeo", "annotations": [{"segment": [0.01, 158.73236349453978], "label": "Playing accordion"}]}, "2FRzNpVz6f8": {"duration": 56.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2FRzNpVz6f8", "annotations": [{"segment": [0.5288063930720495, 55.87720722620905], "label": "Playing polo"}]}, "KkqsXWg9Gao": {"duration": 75.628, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KkqsXWg9Gao", "annotations": []}, "0iIY3HLF3lU": {"duration": 89.281, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0iIY3HLF3lU", "annotations": [{"segment": [5.276274758069864, 10.413702243320055], "label": "Canoeing"}, {"segment": [20.27200903934069, 54.15125951429692], "label": "Canoeing"}, {"segment": [64.56496367448354, 88.16935977090655], "label": "Canoeing"}]}, "Vvu6wxPyB1E": {"duration": 21.2, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Vvu6wxPyB1E", "annotations": []}, "cZwFimlaZS4": {"duration": 213.09, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=cZwFimlaZS4", "annotations": []}, "eCNng1eGQ_0": {"duration": 230.39, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eCNng1eGQ_0", "annotations": [{"segment": [20.486955119021086, 132.26665761052212], "label": "Playing field hockey"}]}, "R2EZlSlDCuE": {"duration": 204.336, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R2EZlSlDCuE", "annotations": [{"segment": [22.08861508160651, 182.18332893986195], "label": "Painting furniture"}]}, "2Lip2nXEzfQ": {"duration": 173.663, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2Lip2nXEzfQ", "annotations": []}, "bcRri59F0S8": {"duration": 121.302, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bcRri59F0S8", "annotations": []}, "OsiES8BiRac": {"duration": 226.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OsiES8BiRac", "annotations": [{"segment": [7.055823348580202, 215.90819415912637], "label": "Belly dance"}]}, "wmmOt79-Azo": {"duration": 87.87, "subset": "training", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=wmmOt79-Azo", "annotations": [{"segment": [1.9860249609984402, 83.48207492121685], "label": "Playing badminton"}]}, "34cQhizPKfc": {"duration": 183.461, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=34cQhizPKfc", "annotations": [{"segment": [124.47446957878316, 134.48965678627144], "label": "Skiing"}, {"segment": [137.3661154446178, 140.80026833073325], "label": "Skiing"}, {"segment": [141.94498595943838, 144.80678003120124], "label": "Skiing"}, {"segment": [146.81003588143525, 151.10272698907957], "label": "Skiing"}]}, "VssVjdgvHrQ": {"duration": 98.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VssVjdgvHrQ", "annotations": [{"segment": [6.121892854914197, 72.85052497347895], "label": "Washing hands"}]}, "LaWCwEG6n3w": {"duration": 185.087, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LaWCwEG6n3w", "annotations": [{"segment": [7.794238689547582, 90.06675819032762], "label": "Ice fishing"}, {"segment": [99.59304992199688, 108.54199063962558], "label": "Ice fishing"}, {"segment": [129.90397815912635, 161.65828393135726], "label": "Ice fishing"}]}, "Zl3YebXhXC0": {"duration": 16.301, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zl3YebXhXC0", "annotations": [{"segment": [0.01, 16.221], "label": "Blowing leaves"}]}, "YjE1by2PX08": {"duration": 73.03, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=YjE1by2PX08", "annotations": [{"segment": [6.835577912912435, 72.00142051544462], "label": "Shoveling snow"}]}, "XwrF8edyJz0": {"duration": 214.785, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XwrF8edyJz0", "annotations": [{"segment": [10.384322932917318, 129.3015694227769], "label": "Making a lemonade"}]}, "-zjG7ZdFlrQ": {"duration": 123.159, "subset": "testing", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=-zjG7ZdFlrQ", "annotations": []}, "uBmUiouilQY": {"duration": 38.847, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uBmUiouilQY", "annotations": [{"segment": [0.2, 6.480260530421217], "label": "Hula hoop"}]}, "bNRE808ALfM": {"duration": 183.97, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bNRE808ALfM", "annotations": [{"segment": [4.592117333672939, 161.58512867861654], "label": "Tai chi"}]}, "HadTAlDM5YM": {"duration": 131.19, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=HadTAlDM5YM", "annotations": [{"segment": [9.210099016205547, 87.3936062204393], "label": "Shaving"}, {"segment": [97.62704825897036, 106.42780942277692], "label": "Shaving"}]}, "iANrLcieixM": {"duration": 76.49, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iANrLcieixM", "annotations": [{"segment": [28.63770256932726, 65.50874324648986], "label": "Pole vault"}]}, "HGK_K2XqhTA": {"duration": 95.85, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HGK_K2XqhTA", "annotations": [{"segment": [64.0010200624025, 66.69265174726989], "label": "Drinking coffee"}]}, "rhfqzNjqdmo": {"duration": 162.145, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rhfqzNjqdmo", "annotations": []}, "FHywlXL0QAA": {"duration": 7.06, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FHywlXL0QAA", "annotations": []}, "0bjHe_5nACw": {"duration": 124.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0bjHe_5nACw", "annotations": [{"segment": [21.20333212948518, 24.315747854914196], "label": "Playing violin"}, {"segment": [28.011741528861155, 29.95700135725429], "label": "Playing violin"}, {"segment": [81.50638680967239, 86.75858834633385], "label": "Playing violin"}, {"segment": [102.70971893915757, 108.74002440717628], "label": "Playing violin"}]}, "-n0F3QTuxug": {"duration": 15.3, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-n0F3QTuxug", "annotations": [{"segment": [1.2413437566302652, 9.596542118564741], "label": "Washing face"}]}, "nnEPsdA0080": {"duration": 81.874, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nnEPsdA0080", "annotations": []}, "XdsGxg1qwI4": {"duration": 169.3, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XdsGxg1qwI4", "annotations": []}, "SGQQSH88isc": {"duration": 75.767, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SGQQSH88isc", "annotations": [{"segment": [39.82761212735833, 56.67775648039099], "label": "Throwing darts"}]}, "sfeUXTuKMs8": {"duration": 159.99, "subset": "training", "resolution": "608x342", "url": "https://www.youtube.com/watch?v=sfeUXTuKMs8", "annotations": [{"segment": [79.61804677357162, 149.25263940625652], "label": "Horseback riding"}]}, "I4y6q9oIIQo": {"duration": 127.11, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I4y6q9oIIQo", "annotations": [{"segment": [90.02483855850234, 91.80947192199687], "label": "Washing face"}]}, "fghAmraOM9A": {"duration": 131.495, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fghAmraOM9A", "annotations": [{"segment": [40.18397503900156, 123.42220904836195], "label": "Rollerblading"}]}, "9qgO4EQiQzI": {"duration": 158.5, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9qgO4EQiQzI", "annotations": [{"segment": [121.01106969823867, 147.91488433998325], "label": "Installing carpet"}]}, "ZOeQh2-ci3M": {"duration": 138.07, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ZOeQh2-ci3M", "annotations": [{"segment": [69.55888143525742, 108.106992199688], "label": "Washing hands"}]}, "r1hot8Kj1ng": {"duration": 105.16, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r1hot8Kj1ng", "annotations": []}, "sf77PM1CtNQ": {"duration": 109.74000000000001, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sf77PM1CtNQ", "annotations": [{"segment": [86.94170358814353, 99.26414976599064], "label": "Vacuuming floor"}]}, "yScYLIJ_txA": {"duration": 41.262, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yScYLIJ_txA", "annotations": [{"segment": [8.766506230529595, 40.01265576323988], "label": "BMX"}]}, "O9VbYvD4rvg": {"duration": 39.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O9VbYvD4rvg", "annotations": []}, "H9FxfosWRTE": {"duration": 155.946, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H9FxfosWRTE", "annotations": [{"segment": [2.7969758190327614, 155.921], "label": "Kneeling"}]}, "1aNOjjLWjxc": {"duration": 2.996, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1aNOjjLWjxc", "annotations": [{"segment": [0.2, 2.795374149659864], "label": "Playing beach volleyball"}]}, "bWdufJDosIo": {"duration": 121.046, "subset": "validation", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=bWdufJDosIo", "annotations": [{"segment": [84.0022542901716, 101.38576755070204], "label": "Hanging wallpaper"}]}, "uqixFWxAQSA": {"duration": 209.537, "subset": "training", "resolution": "424x238", "url": "https://www.youtube.com/watch?v=uqixFWxAQSA", "annotations": [{"segment": [31.05460642915513, 185.34696524648984], "label": "Removing ice from car"}]}, "HeMpg3SAUUs": {"duration": 27.66, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HeMpg3SAUUs", "annotations": [{"segment": [0.01, 27.23595138011396], "label": "Washing face"}]}, "ooWk0EL6I7o": {"duration": 21.29, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ooWk0EL6I7o", "annotations": [{"segment": [1.129409825273011, 21.292697], "label": "Removing curlers"}]}, "ciS5DtShxA8": {"duration": 273.601, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ciS5DtShxA8", "annotations": [{"segment": [10.669305772230889, 221.49478783151324], "label": "Putting on shoes"}]}, "xsh5VD0fggU": {"duration": 66.386, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xsh5VD0fggU", "annotations": []}, "Y0fgqIsaZS8": {"duration": 53.99, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y0fgqIsaZS8", "annotations": []}, "P2fUelA4BfA": {"duration": 32.555, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P2fUelA4BfA", "annotations": [{"segment": [0.2, 31.23597352024922], "label": "Peeling potatoes"}]}, "loZjywaPvUk": {"duration": 67.59, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=loZjywaPvUk", "annotations": []}, "l4UJiGsZVfE": {"duration": 211.929, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l4UJiGsZVfE", "annotations": [{"segment": [17.186851794071764, 124.60467550702029], "label": "BMX"}, {"segment": [130.55397035881435, 196.67581123244932], "label": "BMX"}]}, "e60HGAzBOvo": {"duration": 156.224, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=e60HGAzBOvo", "annotations": [{"segment": [16.324691107644306, 135.73160218408736], "label": "Waxing skis"}]}, "16czjUF1AA4": {"duration": 86.61, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=16czjUF1AA4", "annotations": []}, "RnRUwLtR33g": {"duration": 27.145, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RnRUwLtR33g", "annotations": [{"segment": [4.1370513689859045, 18.57451859849046], "label": "Rope skipping"}]}, "kPbae85fofk": {"duration": 111.967, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=kPbae85fofk", "annotations": [{"segment": [71.7040522776911, 79.21507358814353], "label": "Throwing darts"}]}, "9wtMJoqGTg0": {"duration": 89.28, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9wtMJoqGTg0", "annotations": [{"segment": [0, 85.66436193447738], "label": "Using the balance beam"}]}, "Cx3QGeQu7xM": {"duration": 113.848, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cx3QGeQu7xM", "annotations": [{"segment": [7.967547366568452, 33.64076425311571], "label": "Disc dog"}, {"segment": [40.72303098043909, 74.3637968628709], "label": "Disc dog"}, {"segment": [87.99716011637013, 104.81754335120682], "label": "Disc dog"}]}, "AjSFqqfzml0": {"duration": 18.25, "subset": "training", "resolution": "1216x720", "url": "https://www.youtube.com/watch?v=AjSFqqfzml0", "annotations": [{"segment": [2.4564560132605306, 16.96459798], "label": "Shaving legs"}]}, "q66BG9h_7XI": {"duration": 182.347, "subset": "training", "resolution": "657x480", "url": "https://www.youtube.com/watch?v=q66BG9h_7XI", "annotations": [{"segment": [43.4563621336385, 174.1094752741433], "label": "Powerbocking"}]}, "xNy1WgRRkiQ": {"duration": 203.34, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xNy1WgRRkiQ", "annotations": []}, "zLF5DSuDixg": {"duration": 231.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zLF5DSuDixg", "annotations": [{"segment": [10.48411230326764, 148.94669892918165], "label": "Shot put"}, {"segment": [167.74579611856473, 219.08179406864272], "label": "Shot put"}]}, "vNMBtwYDH9E": {"duration": 104.21000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vNMBtwYDH9E", "annotations": [{"segment": [5.118234009360374, 21.041628705148206], "label": "Using the balance beam"}, {"segment": [27.37848985959438, 99.35873322932918], "label": "Using the balance beam"}]}, "MwQTeFD0OKQ": {"duration": 231.92000000000002, "subset": "validation", "resolution": "854x270", "url": "https://www.youtube.com/watch?v=MwQTeFD0OKQ", "annotations": [{"segment": [37.990160463561395, 217.44844227238477], "label": "Snatch"}]}, "OqoUemFpetc": {"duration": 140.27, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=OqoUemFpetc", "annotations": [{"segment": [2.6209048361934477, 128.42433697347894], "label": "Dodgeball"}]}, "aL4758WHLjQ": {"duration": 61.65, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aL4758WHLjQ", "annotations": []}, "hxbp-zM5JPQ": {"duration": 106.72, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hxbp-zM5JPQ", "annotations": [{"segment": [19.472377535101405, 41.274783151326055], "label": "Ironing clothes"}, {"segment": [51.59347893915756, 61.745744149766], "label": "Ironing clothes"}, {"segment": [67.40438377535101, 83.21528861154447], "label": "Ironing clothes"}, {"segment": [87.70891419656786, 100.8569297971919], "label": "Ironing clothes"}]}, "b3mJ5rPzDv8": {"duration": 145.543, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b3mJ5rPzDv8", "annotations": [{"segment": [0.01, 145.481], "label": "Roof shingle removal"}]}, "IcfWEKjl_AY": {"duration": 21.967, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=IcfWEKjl_AY", "annotations": [{"segment": [14.650152886115444, 20.70874414976599], "label": "Fun sliding down"}]}, "c3lo99rni-o": {"duration": 208.42000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c3lo99rni-o", "annotations": [{"segment": [16.416439157566302, 190.33317082683308], "label": "Preparing pasta"}]}, "hXjxOvlNxQM": {"duration": 7.129, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=hXjxOvlNxQM", "annotations": [{"segment": [2.031491419656786, 5.788658346333853], "label": "Doing a powerbomb"}]}, "3xoIXUMqmqA": {"duration": 159.614, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=3xoIXUMqmqA", "annotations": [{"segment": [16.67798283931357, 138.9176255850234], "label": "Kneeling"}]}, "a_uamUiKq1o": {"duration": 45.488, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=a_uamUiKq1o", "annotations": [{"segment": [0.01, 34.98522585023401], "label": "Arm wrestling"}]}, "JWFJGkUKVfk": {"duration": 116.356, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JWFJGkUKVfk", "annotations": [{"segment": [28.848453978159124, 59.52151014040561], "label": "Welding"}]}, "daWdp5tXqFI": {"duration": 105.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=daWdp5tXqFI", "annotations": []}, "pn1XDTlNMso": {"duration": 140.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pn1XDTlNMso", "annotations": [{"segment": [10.55439311450009, 16.711122431291813], "label": "Polishing forniture"}, {"segment": [41.33803948829953, 46.61523601872074], "label": "Polishing forniture"}, {"segment": [54.750914003120116, 64.42577430889236], "label": "Polishing forniture"}]}, "1zyyumIi0iQ": {"duration": 232.18, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1zyyumIi0iQ", "annotations": [{"segment": [22.27057956318253, 99.04070748829955], "label": "Cheerleading"}]}, "hcFw88RcAbo": {"duration": 84.15, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hcFw88RcAbo", "annotations": [{"segment": [6.432615704628185, 17.459956912562216], "label": "Mooping floor"}, {"segment": [29.40624322115742, 36.88908117628705], "label": "Mooping floor"}, {"segment": [52.24859184399376, 52.77370331981279], "label": "Mooping floor"}, {"segment": [73.64688448361935, 78.24160989703589], "label": "Mooping floor"}]}, "5wQLpjdsRUg": {"duration": 122.346, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5wQLpjdsRUg", "annotations": [{"segment": [10.084488021073504, 19.217611877147814], "label": "Doing fencing"}, {"segment": [23.02308015051211, 26.067454769203547], "label": "Doing fencing"}, {"segment": [29.111829387894986, 41.47960127632895], "label": "Doing fencing"}, {"segment": [45.85588979069789, 49.66135806406219], "label": "Doing fencing"}, {"segment": [57.65284143812721, 75.91908915027582], "label": "Doing fencing"}, {"segment": [83.91057252434085, 90.37986858906017], "label": "Doing fencing"}, {"segment": [97.22971148111588, 103.12818730483055], "label": "Doing fencing"}, {"segment": [107.88502264653592, 109.78775678321807], "label": "Doing fencing"}]}, "whwserG3XGc": {"duration": 128.709, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=whwserG3XGc", "annotations": [{"segment": [25.17459516380655, 51.25186505460218], "label": "Hurling"}, {"segment": [78.73329563182526, 96.5861957878315], "label": "Hurling"}]}, "bNvAqFcKzqU": {"duration": 233.48, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bNvAqFcKzqU", "annotations": [{"segment": [9.468745709828394, 220.3304290171607], "label": "Playing saxophone"}]}, "Ocv54K1U50g": {"duration": 197.39, "subset": "testing", "resolution": "492x360", "url": "https://www.youtube.com/watch?v=Ocv54K1U50g", "annotations": []}, "pop8YCGQpWk": {"duration": 81.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pop8YCGQpWk", "annotations": []}, "HPyLSmDjsHY": {"duration": 171.48, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HPyLSmDjsHY", "annotations": [{"segment": [27.55440655721467, 61.79677587103483], "label": "Doing nails"}, {"segment": [80.52307158953025, 143.12240209048363], "label": "Doing nails"}]}, "BMxtjh9E7BY": {"duration": 67.849, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BMxtjh9E7BY", "annotations": [{"segment": [4.128080457098284, 50.48959943681748], "label": "Curling"}]}, "boJA2e8Lt-8": {"duration": 152.79, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=boJA2e8Lt-8", "annotations": []}, "GsNyn-6DDJM": {"duration": 33.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GsNyn-6DDJM", "annotations": [{"segment": [1.109827081903276, 30.79124903978159], "label": "Brushing hair"}]}, "xxvBpA0qDS4": {"duration": 183.67, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xxvBpA0qDS4", "annotations": []}, "MgjxjBxGrQU": {"duration": 232.22, "subset": "testing", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=MgjxjBxGrQU", "annotations": []}, "AIBFZrfFUdk": {"duration": 98.662, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AIBFZrfFUdk", "annotations": []}, "62Dwj4l7_qs": {"duration": 178.72, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=62Dwj4l7_qs", "annotations": [{"segment": [1.2543907956318254, 135.0711926677067], "label": "Snatch"}]}, "TS-p15iwQPc": {"duration": 129.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TS-p15iwQPc", "annotations": [{"segment": [17.415584864210896, 101.37279744659175], "label": "Doing karate"}]}, "_ja1fre9OTE": {"duration": 114.45, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_ja1fre9OTE", "annotations": []}, "BjQnUdue4qY": {"duration": 120.187, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BjQnUdue4qY", "annotations": []}, "q4Oy6EDTJiM": {"duration": 74.68, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=q4Oy6EDTJiM", "annotations": [{"segment": [1.2814810687665603, 34.25049318252729], "label": "Bathing dog"}]}, "hnwZmUR1FUw": {"duration": 83.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hnwZmUR1FUw", "annotations": [{"segment": [1.9631915480700861, 67.40290981707295], "label": "Playing squash"}]}, "7iPb7mLURVc": {"duration": 111.294, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=7iPb7mLURVc", "annotations": [{"segment": [74.6584924024961, 110.59874339625584], "label": "Baton twirling"}]}, "2VTEseqA5SA": {"duration": 31.278, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=2VTEseqA5SA", "annotations": [{"segment": [9.144833527403962, 17.754596621689934], "label": "Welding"}, {"segment": [19.84623411935712, 20.235375979388223], "label": "Welding"}, {"segment": [21.694657954504866, 24.46729370722648], "label": "Welding"}, {"segment": [26.072503879854786, 27.094001262436436], "label": "Welding"}]}, "rvkVdD2u_yA": {"duration": 36.525, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rvkVdD2u_yA", "annotations": [{"segment": [3.640411856474259, 36.481], "label": "Kneeling"}]}, "TeXm2KTfkzo": {"duration": 73.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TeXm2KTfkzo", "annotations": [{"segment": [0.01, 73.9088888888889], "label": "Cutting the grass"}]}, "a5R0qO5NynU": {"duration": 238.632, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a5R0qO5NynU", "annotations": [{"segment": [105.33295319812792, 177.9121965678627], "label": "Hula hoop"}]}, "mcS9LX_P4BA": {"duration": 159.313, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mcS9LX_P4BA", "annotations": [{"segment": [49.61401869158878, 109.16454828660436], "label": "Carving jack-o-lanterns"}]}, "at8e-jBBU5E": {"duration": 67.15, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=at8e-jBBU5E", "annotations": [{"segment": [1.0476148003120125, 9.638056162870516], "label": "Rock climbing"}, {"segment": [12.990423523868955, 20.323727126053043], "label": "Rock climbing"}, {"segment": [35.61890321060842, 64.42831021918877], "label": "Rock climbing"}]}, "tyjUDi3uLd0": {"duration": 235.915, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tyjUDi3uLd0", "annotations": [{"segment": [80.96917727049218, 195.79819128861155], "label": "Laying tile"}]}, "NOGtIoZbEVg": {"duration": 136.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NOGtIoZbEVg", "annotations": [{"segment": [2.4847672299999997, 129.92161811370414], "label": "Playing guitarra"}]}, "NV9rPVAczy0": {"duration": 103.631, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NV9rPVAczy0", "annotations": []}, "Dx4LpX-X9JY": {"duration": 227.811, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Dx4LpX-X9JY", "annotations": [{"segment": [65.3791326053042, 84.56648673946958], "label": "Running a marathon"}, {"segment": [89.54098595943837, 136.44340717628705], "label": "Running a marathon"}, {"segment": [138.57533541341655, 145.68176287051483], "label": "Running a marathon"}]}, "iosb2TdQ7yY": {"duration": 230.97, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iosb2TdQ7yY", "annotations": [{"segment": [44.091791199999996, 169.06857599999998], "label": "Chopping wood"}]}, "ZeN0mrRQHg8": {"duration": 127.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZeN0mrRQHg8", "annotations": [{"segment": [8.545120837895812, 105.72106320099682], "label": "Riding bumper cars"}]}, "j3h8Di7V3nk": {"duration": 54.219, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j3h8Di7V3nk", "annotations": [{"segment": [0.01, 54.21859410430839], "label": "Putting on shoes"}]}, "8hrRE3_sWXo": {"duration": 228.555, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8hrRE3_sWXo", "annotations": [{"segment": [45.995949144088215, 73.45089553241995], "label": "Kneeling"}, {"segment": [76.30335749484402, 113.74192075166], "label": "Kneeling"}, {"segment": [122.65586438423523, 213.22153006864278], "label": "Kneeling"}]}, "2Mh-OomUNpQ": {"duration": 188.454, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2Mh-OomUNpQ", "annotations": [{"segment": [0.01, 171.11435569422778], "label": "Fun sliding down"}]}, "TIAAUayALPI": {"duration": 212.881, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TIAAUayALPI", "annotations": [{"segment": [7.970566398166132, 10.95952879747843], "label": "Braiding hair"}, {"segment": [13.284277330276886, 14.944811996561494], "label": "Braiding hair"}, {"segment": [16.605346662846106, 22.25116452821378], "label": "Braiding hair"}, {"segment": [61.43978265253059, 206.57051130421215], "label": "Braiding hair"}]}, "VZ5R9goklpo": {"duration": 94.22999999999999, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=VZ5R9goklpo", "annotations": []}, "8wNsOZHTsj0": {"duration": 70.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8wNsOZHTsj0", "annotations": [{"segment": [0.01, 26.113266770670826], "label": "Chopping wood"}, {"segment": [29.281822152886114, 38.67822776911076], "label": "Chopping wood"}, {"segment": [39.661572542901716, 51.57097035881435], "label": "Chopping wood"}, {"segment": [52.66357566302652, 55.504349453978165], "label": "Chopping wood"}, {"segment": [56.59695475819033, 61.18589703588144], "label": "Chopping wood"}]}, "X6BHwIJKl-Q": {"duration": 69.73, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=X6BHwIJKl-Q", "annotations": [{"segment": [22.216165366614664, 66.86576911076443], "label": "Kayaking"}]}, "4-m0cFAVp9k": {"duration": 178.75, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4-m0cFAVp9k", "annotations": []}, "OlyMLJkWQbQ": {"duration": 70.64, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OlyMLJkWQbQ", "annotations": []}, "zE0vlPLBVJo": {"duration": 25.426, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=zE0vlPLBVJo", "annotations": [{"segment": [0, 2.221291020280811], "label": "Hitting a pinata"}, {"segment": [3.0939410639625584, 21.379926070202806], "label": "Hitting a pinata"}]}, "0h45uztur-o": {"duration": 171.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0h45uztur-o", "annotations": [{"segment": [126.7101009126365, 154.86790111544462], "label": "Wrapping presents"}, {"segment": [27.51947715, 122.71399967000002], "label": "Wrapping presents"}]}, "xcDAjcRX93A": {"duration": 85.96000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xcDAjcRX93A", "annotations": [{"segment": [9.651032761310454, 31.365856474258972], "label": "Using parallel bars"}, {"segment": [51.484156006240255, 65.42778159126365], "label": "Using parallel bars"}]}, "tZ6Fjq6SJvQ": {"duration": 200.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tZ6Fjq6SJvQ", "annotations": [{"segment": [0, 200.063128], "label": "Tango"}]}, "N4Z9eYSu2uM": {"duration": 128.918, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N4Z9eYSu2uM", "annotations": [{"segment": [5.226811232449298, 119.83319188767551], "label": "Tug of war"}]}, "xtmxgih0yIk": {"duration": 232.62, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xtmxgih0yIk", "annotations": [{"segment": [64.9587168673947, 71.12798048049922], "label": "Grooming horse"}, {"segment": [74.75695907644307, 84.9180991450858], "label": "Grooming horse"}, {"segment": [93.26474991575662, 101.61140068642746], "label": "Grooming horse"}, {"segment": [129.9174337347894, 161.1266496599064], "label": "Grooming horse"}, {"segment": [193.06166130421215, 215.19843073946956], "label": "Grooming horse"}]}, "sAjMcMdflqs": {"duration": 224.654, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sAjMcMdflqs", "annotations": []}, "UP12y84KU_M": {"duration": 50.574, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UP12y84KU_M", "annotations": []}, "P7bvD_CZH5M": {"duration": 233.221, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=P7bvD_CZH5M", "annotations": [{"segment": [4.72909984399376, 232.961], "label": "Surfing"}]}, "HeOj7jZ0igI": {"duration": 160.752, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=HeOj7jZ0igI", "annotations": [{"segment": [14.039400936037442, 155.18552106084243], "label": "Doing fencing"}]}, "mxY7J50ItrU": {"duration": 30.86, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mxY7J50ItrU", "annotations": [{"segment": [1.2998446305199147, 30.52227625897036], "label": "Doing kickboxing"}]}, "tpDhYD9e_cU": {"duration": 232.99, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tpDhYD9e_cU", "annotations": [{"segment": [0.01, 58.87625897035882], "label": "Rollerblading"}]}, "8IlwMnM_n38": {"duration": 144.94, "subset": "training", "resolution": "434x326", "url": "https://www.youtube.com/watch?v=8IlwMnM_n38", "annotations": [{"segment": [8.253153041767929, 131.259050431405], "label": "Breakdancing"}]}, "Dv1Y6_QalWM": {"duration": 198.23, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=Dv1Y6_QalWM", "annotations": []}, "CaQkeVwKiUs": {"duration": 104.746, "subset": "validation", "resolution": "640x338", "url": "https://www.youtube.com/watch?v=CaQkeVwKiUs", "annotations": [{"segment": [54.80725233644859, 60.67945794392523], "label": "Playing ten pins"}, {"segment": [92.97658878504673, 99.82749532710281], "label": "Playing ten pins"}]}, "2k-qlZxc9l4": {"duration": 236.17000000000002, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2k-qlZxc9l4", "annotations": []}, "nKa1e_CpvoY": {"duration": 223.91, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nKa1e_CpvoY", "annotations": [{"segment": [37.02723868954758, 213.08127829953196], "label": "Making a cake"}]}, "mgmwdQixDXY": {"duration": 177.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mgmwdQixDXY", "annotations": [{"segment": [17.733185198127927, 154.8882894648986], "label": "Doing nails"}]}, "_3hVtPVXTmg": {"duration": 45.86, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_3hVtPVXTmg", "annotations": [{"segment": [6.362042121684867, 31.309825273010922], "label": "Using uneven bars"}]}, "dFsFL_WJasg": {"duration": 23.267, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dFsFL_WJasg", "annotations": [{"segment": [0.01, 21.505989653682068], "label": "Camel ride"}]}, "fD9JNH5FWCk": {"duration": 86.843, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fD9JNH5FWCk", "annotations": [{"segment": [10.69704368174727, 21.52998907956318], "label": "Hula hoop"}]}, "e6r-A-LAe2E": {"duration": 39.24, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e6r-A-LAe2E", "annotations": [{"segment": [4.407740255850234, 39.04113378684807], "label": "Playing kickball"}]}, "ObOkhXGu7oY": {"duration": 224.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ObOkhXGu7oY", "annotations": [{"segment": [41.35356942277691, 207.13674102964117], "label": "Doing step aerobics"}]}, "of_tqCVh17o": {"duration": 237.425, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=of_tqCVh17o", "annotations": []}, "B7t85SESTXI": {"duration": 128.59, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B7t85SESTXI", "annotations": [{"segment": [7.422600705388759, 12.036649792522313], "label": "Paintball"}, {"segment": [43.532550082955694, 83.25349313572542], "label": "Paintball"}]}, "8VPiqCCOPWg": {"duration": 198.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8VPiqCCOPWg", "annotations": [{"segment": [50.83568941669231, 55.48529515602393], "label": "Plataform diving"}, {"segment": [64.47453291873171, 91.44224620685507], "label": "Plataform diving"}, {"segment": [114.07032747160227, 143.2078567714137], "label": "Plataform diving"}]}, "G00TjQ7JJ8Y": {"duration": 27.52, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=G00TjQ7JJ8Y", "annotations": [{"segment": [1.6371425, 26.075632808892358], "label": "Playing polo"}]}, "mwndGBRpOPg": {"duration": 6.55, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mwndGBRpOPg", "annotations": [{"segment": [0, 6.501], "label": "Windsurfing"}]}, "qdY2-BpnThc": {"duration": 158.337, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=qdY2-BpnThc", "annotations": []}, "hJ4ZU-FA5XA": {"duration": 170.13, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=hJ4ZU-FA5XA", "annotations": [{"segment": [24.418408492979722, 32.911767968798756], "label": "Getting a haircut"}, {"segment": [64.49644851950079, 88.11860456162248], "label": "Getting a haircut"}, {"segment": [99.79697384087363, 100.85864377535103], "label": "Getting a haircut"}, {"segment": [143.06002367082684, 144.6525285725429], "label": "Getting a haircut"}]}, "2OEc8kPVn_Y": {"duration": 212.77, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=2OEc8kPVn_Y", "annotations": [{"segment": [7.381377819068802, 186.67353305738655], "label": "Tai chi"}]}, "HiEClhlj5_A": {"duration": 154.738, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=HiEClhlj5_A", "annotations": [{"segment": [50.055147952130035, 76.28597260132616], "label": "Hanging wallpaper"}]}, "YpXe04ALJw4": {"duration": 15.465, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YpXe04ALJw4", "annotations": [{"segment": [0.01, 15.401], "label": "Beach soccer"}]}, "c2mpAjNATAw": {"duration": 98.69, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c2mpAjNATAw", "annotations": []}, "HCvhz5HkpF4": {"duration": 161.263, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HCvhz5HkpF4", "annotations": []}, "Lxcm6HMOHaY": {"duration": 31.022, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lxcm6HMOHaY", "annotations": []}, "fFu37ig0pkY": {"duration": 139.692, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fFu37ig0pkY", "annotations": [{"segment": [25.10961059431764, 128.2461380148784], "label": "Baton twirling"}]}, "RaYMOYWhba8": {"duration": 152.811, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RaYMOYWhba8", "annotations": [{"segment": [0.20000000000000004, 151.58926053042123], "label": "Beach soccer"}]}, "L4yBWOH-vUM": {"duration": 140.528, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=L4yBWOH-vUM", "annotations": []}, "m2IJHzLEcKU": {"duration": 57.122, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m2IJHzLEcKU", "annotations": []}, "ztnt2EYuK3I": {"duration": 12.33, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ztnt2EYuK3I", "annotations": [{"segment": [0.01, 12.329794], "label": "Removing ice from car"}]}, "UXX8k68S3_g": {"duration": 99.103, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UXX8k68S3_g", "annotations": [{"segment": [0.01, 99.1027664399093], "label": "Carving jack-o-lanterns"}]}, "E29kZvEtOUA": {"duration": 237.63, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=E29kZvEtOUA", "annotations": [{"segment": [19.455304212168485, 107.07505347], "label": "Mowing the lawn"}, {"segment": [140.99003523000002, 198.44410296411854], "label": "Mowing the lawn"}]}, "zyylgHTPUS8": {"duration": 104.931, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=zyylgHTPUS8", "annotations": [{"segment": [39.942393840873635, 84.46834107332293], "label": "Grooming dog"}]}, "RHpigjSwhVM": {"duration": 122.161, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RHpigjSwhVM", "annotations": [{"segment": [12.768678687283545, 48.40663263537345], "label": "BMX"}, {"segment": [51.646446630654346, 108.43847899531981], "label": "BMX"}]}, "gsyV9UpPvVs": {"duration": 140.946, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=gsyV9UpPvVs", "annotations": []}, "kfW5sm39ZYE": {"duration": 208.91, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kfW5sm39ZYE", "annotations": [{"segment": [55.55753400204794, 98.44405464276272], "label": "Changing car wheel"}, {"segment": [126.71017076562431, 143.27996297557766], "label": "Changing car wheel"}, {"segment": [172.84567064431795, 202.08648042658854], "label": "Changing car wheel"}]}, "8B3qhnSB7U8": {"duration": 48.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8B3qhnSB7U8", "annotations": [{"segment": [2.716362825941609, 29.35180929953198], "label": "Getting a piercing"}]}, "TakNLlCbjvw": {"duration": 52.48, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=TakNLlCbjvw", "annotations": [{"segment": [3.9261154446177846, 47.35876755070203], "label": "Hopscotch"}]}, "jXIKHEsmVl4": {"duration": 30.07, "subset": "training", "resolution": "144x176", "url": "https://www.youtube.com/watch?v=jXIKHEsmVl4", "annotations": [{"segment": [0.01, 29.988938452196923], "label": "Chopping wood"}]}, "l8-mKyFRzuA": {"duration": 131.634, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l8-mKyFRzuA", "annotations": []}, "cGxZAOpmpKQ": {"duration": 139.0, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cGxZAOpmpKQ", "annotations": []}, "esNQZCjMZaM": {"duration": 93.86, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=esNQZCjMZaM", "annotations": [{"segment": [1.00894125, 87.10619672854915], "label": "Archery"}]}, "Z8v3jTD-6H0": {"duration": 175.45, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Z8v3jTD-6H0", "annotations": []}, "qvLFSqPCvn8": {"duration": 158.059, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qvLFSqPCvn8", "annotations": [{"segment": [9.847864878038443, 145.00981032911608], "label": "Changing car wheel"}]}, "5tM7hwg_8wU": {"duration": 221.472, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5tM7hwg_8wU", "annotations": []}, "we6Ddq1ABcQ": {"duration": 68.06, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=we6Ddq1ABcQ", "annotations": [{"segment": [1.9628471138845556, 67.63865054602185], "label": "Cricket"}]}, "eS7ENymCpZE": {"duration": 64.69, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=eS7ENymCpZE", "annotations": [{"segment": [0.01, 0.02], "label": "Sailing"}]}, "Z5d8BZG4d2Y": {"duration": 194.189, "subset": "testing", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=Z5d8BZG4d2Y", "annotations": []}, "rjzQD0vwnP0": {"duration": 236.867, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=rjzQD0vwnP0", "annotations": [{"segment": [0.01, 211.73892541029642], "label": "Removing ice from car"}]}, "GbqYSIQ29mA": {"duration": 191.89, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GbqYSIQ29mA", "annotations": []}, "ewGW8hMlxnA": {"duration": 190.73, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ewGW8hMlxnA", "annotations": [{"segment": [51.1783739267938, 132.11161641567705], "label": "Cleaning shoes"}]}, "tOiPx5Veyd8": {"duration": 104.235, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tOiPx5Veyd8", "annotations": [{"segment": [36.433323845870774, 56.8905376309175], "label": "Shuffleboard"}]}, "HRbBaePTCR8": {"duration": 183.879, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HRbBaePTCR8", "annotations": [{"segment": [17.158205981232566, 130.97433460168023], "label": "Wakeboarding"}, {"segment": [139.2674696016626, 169.29433599102936], "label": "Wakeboarding"}]}, "M9Z2RKnwiz4": {"duration": 234.313, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=M9Z2RKnwiz4", "annotations": [{"segment": [52.63807922620905, 234.31256235827664], "label": "Croquet"}]}, "gHhgZ0Bd4H4": {"duration": 215.02, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gHhgZ0Bd4H4", "annotations": [{"segment": [9.893315132605304, 202.05838533541342], "label": "Spinning"}]}, "Sw_zeKg3V9M": {"duration": 15.5, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Sw_zeKg3V9M", "annotations": []}, "PLgcXxX3A2A": {"duration": 169.09, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PLgcXxX3A2A", "annotations": [{"segment": [9.850279303180253, 135.21749029372356], "label": "Dodgeball"}, {"segment": [137.5878787376488, 138.64138471272668], "label": "Dodgeball"}, {"segment": [141.0117731566519, 141.53852614419085], "label": "Dodgeball"}, {"segment": [142.85540861303818, 143.64553809434662], "label": "Dodgeball"}, {"segment": [145.4891735507329, 160.23825720182325], "label": "Dodgeball"}]}, "JDwd6Ec454A": {"duration": 56.43, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=JDwd6Ec454A", "annotations": []}, "1cU8sp05Bu0": {"duration": 64.807, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1cU8sp05Bu0", "annotations": [{"segment": [11.72792863650546, 53.7867071950078], "label": "Playing congas"}]}, "GKy0MNJZxDc": {"duration": 37.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GKy0MNJZxDc", "annotations": [{"segment": [1.5251575663026522, 31.676349453978155], "label": "Using parallel bars"}]}, "oghT33khlYQ": {"duration": 166.395, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oghT33khlYQ", "annotations": [{"segment": [16.950439221647077, 162.35099245529193], "label": "Waterskiing"}]}, "yDSjqGmia-k": {"duration": 86.123, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yDSjqGmia-k", "annotations": []}, "mTsEJFP3W1A": {"duration": 206.194, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=mTsEJFP3W1A", "annotations": [{"segment": [62.707862714508586, 180.72727613104524], "label": "Assembling bicycle"}]}, "d_z2CA-o13U": {"duration": 182.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=d_z2CA-o13U", "annotations": [{"segment": [10.225321329173168, 160.4807375273011], "label": "Paintball"}]}, "JE50XTpCN78": {"duration": 219.03, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JE50XTpCN78", "annotations": [{"segment": [93.09382215288612, 207.1978096723869], "label": "Doing nails"}]}, "SwXGVQXyXkQ": {"duration": 148.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SwXGVQXyXkQ", "annotations": [{"segment": [37.888081985927045, 118.41080377097376], "label": "Getting a haircut"}]}, "Qm7AL0RjHxI": {"duration": 82.97, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=Qm7AL0RjHxI", "annotations": [{"segment": [8.315132605304212, 66.00936037441498], "label": "Tai chi"}]}, "uklYBuQDwIw": {"duration": 90.047, "subset": "training", "resolution": "470x360", "url": "https://www.youtube.com/watch?v=uklYBuQDwIw", "annotations": [{"segment": [1.2760160894394517, 5.911820257926191], "label": "Slacklining"}, {"segment": [10.688103340609498, 30.636109156522135], "label": "Slacklining"}, {"segment": [38.36244943733337, 59.99620222360482], "label": "Slacklining"}, {"segment": [63.50817507851901, 84.15857546541449], "label": "Slacklining"}]}, "CJKwvmOuhJk": {"duration": 36.27, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=CJKwvmOuhJk", "annotations": [{"segment": [0.9393670999999999, 13.42497035], "label": "Washing dishes"}]}, "qrvbu5AkWzk": {"duration": 105.93, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qrvbu5AkWzk", "annotations": []}, "RZr2KLNE6qE": {"duration": 6.107, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RZr2KLNE6qE", "annotations": [{"segment": [0.01, 6.106438], "label": "Swinging at the playground"}]}, "IEqnfSiCIXc": {"duration": 198.72, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IEqnfSiCIXc", "annotations": [{"segment": [14.877828393135728, 44.9434399375975], "label": "Skateboarding"}, {"segment": [48.35294227769111, 170.47511700468021], "label": "Skateboarding"}]}, "sYpC9oPhmY4": {"duration": 84.707, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sYpC9oPhmY4", "annotations": [{"segment": [0.2, 81.73503900156007], "label": "Drum corps"}]}, "HaprTZfP4sQ": {"duration": 117.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HaprTZfP4sQ", "annotations": [{"segment": [8.820465714242847, 69.66672832639237], "label": "Wrapping presents"}]}, "IGXq7a7Jl7s": {"duration": 227.35, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IGXq7a7Jl7s", "annotations": [{"segment": [26.06108190327613, 27.833944617784713], "label": "Javelin throw"}, {"segment": [33.152532761310454, 39.53483853354134], "label": "Javelin throw"}, {"segment": [42.37141887675507, 46.62628939157566], "label": "Javelin throw"}, {"segment": [52.65402262090483, 71.8009399375975], "label": "Javelin throw"}, {"segment": [77.82867316692668, 87.75670436817472], "label": "Javelin throw"}, {"segment": [110.09477457098285, 128.8871193447738], "label": "Javelin throw"}, {"segment": [160.79864820592823, 172.14496957878313], "label": "Javelin throw"}]}, "WHG0Q_0H24k": {"duration": 43.93, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=WHG0Q_0H24k", "annotations": []}, "WSKx2EK_Bqc": {"duration": 177.54, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WSKx2EK_Bqc", "annotations": [{"segment": [59.27224837441498, 151.5042984149766], "label": "Playing congas"}]}, "NGBaYycOQT0": {"duration": 29.28, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NGBaYycOQT0", "annotations": [{"segment": [9.27287563650546, 19.8704477925117], "label": "Playing badminton"}]}, "VbhW_K3NvmQ": {"duration": 166.0, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VbhW_K3NvmQ", "annotations": [{"segment": [0.01, 165.99909297052153], "label": "Playing piano"}]}, "pSdlIsd_vZc": {"duration": 43.375, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pSdlIsd_vZc", "annotations": [{"segment": [2.8610754923716515, 41.256167392683174], "label": "Fun sliding down"}]}, "soE6TfvOiIo": {"duration": 117.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=soE6TfvOiIo", "annotations": [{"segment": [90.85261739781592, 116.04858619032761], "label": "Walking the dog"}]}, "yWvyLG3kq2I": {"duration": 45.54, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yWvyLG3kq2I", "annotations": [{"segment": [3.5454758190327618, 44.60208580343214], "label": "Kayaking"}]}, "-CqVpCRxwTY": {"duration": 43.143, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-CqVpCRxwTY", "annotations": []}, "1zEcIngghq4": {"duration": 203.152, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1zEcIngghq4", "annotations": [{"segment": [12.994082234037661, 200.2990225023401], "label": "Playing congas"}]}, "FiqkrBh1VOI": {"duration": 156.57, "subset": "validation", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=FiqkrBh1VOI", "annotations": [{"segment": [1.9540935542183595, 143.13735190327614], "label": "Tango"}]}, "NM7Vye5nxj8": {"duration": 35.36, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=NM7Vye5nxj8", "annotations": []}, "uz91AvGxjbw": {"duration": 190.59, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uz91AvGxjbw", "annotations": [{"segment": [0.01, 190.441], "label": "Croquet"}]}, "EyOX-qduoFE": {"duration": 180.466, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EyOX-qduoFE", "annotations": []}, "CcMic7jNIuE": {"duration": 60.56, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CcMic7jNIuE", "annotations": [{"segment": [13.78943447, 17.57210770670827], "label": "Clean and jerk"}, {"segment": [43.83579556942278, 58.762639750390015], "label": "Clean and jerk"}]}, "v8Dqgro-f-U": {"duration": 159.289, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v8Dqgro-f-U", "annotations": [{"segment": [0.01, 153.7949438377535], "label": "Rope skipping"}]}, "syfyM1Pble8": {"duration": 80.71000000000001, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=syfyM1Pble8", "annotations": []}, "1QdkjsSU44I": {"duration": 154.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1QdkjsSU44I", "annotations": []}, "xijxN7XB4d8": {"duration": 234.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xijxN7XB4d8", "annotations": [{"segment": [5.677126868954758, 179.65359543369735], "label": "Playing violin"}]}, "8QY00KU3gkw": {"duration": 123.786, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8QY00KU3gkw", "annotations": [{"segment": [19.49426053042122, 112.54446645865835], "label": "Elliptical trainer"}]}, "48h91eAy4eI": {"duration": 171.619, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=48h91eAy4eI", "annotations": []}, "bMDyOXygNPM": {"duration": 95.388, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bMDyOXygNPM", "annotations": [{"segment": [0.2, 93.26191277258566], "label": "Futsal"}]}, "giWOxJiFmjk": {"duration": 148.469, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=giWOxJiFmjk", "annotations": []}, "pVoM_kUQ4_E": {"duration": 51.735, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pVoM_kUQ4_E", "annotations": []}, "DBGsPnuwdnU": {"duration": 28.962, "subset": "training", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=DBGsPnuwdnU", "annotations": [{"segment": [0.2, 28.741], "label": "Riding bumper cars"}]}, "PLPUWZ7XZDo": {"duration": 145.799, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PLPUWZ7XZDo", "annotations": [{"segment": [0.01, 145.1706910006443], "label": "Kneeling"}]}, "agX22QLvOcU": {"duration": 57.77, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=agX22QLvOcU", "annotations": [{"segment": [6.759447928937566, 50.831047282371294], "label": "Layup drill in basketball"}]}, "DSMSAIk_xhY": {"duration": 190.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DSMSAIk_xhY", "annotations": [{"segment": [0.5940830795631825, 185.35392082371297], "label": "Playing guitarra"}]}, "svSM-UqjNWE": {"duration": 48.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=svSM-UqjNWE", "annotations": [{"segment": [0.01, 4.503439566154075], "label": "Smoking hookah"}, {"segment": [12.159286828616, 15.161579872718718], "label": "Smoking hookah"}, {"segment": [25.519490327613102, 44.433936099844], "label": "Smoking hookah"}]}, "dTZEVW_vLWc": {"duration": 204.057, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=dTZEVW_vLWc", "annotations": []}, "s946M878MxI": {"duration": 61.324, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s946M878MxI", "annotations": []}, "FLL06mU-ua8": {"duration": 141.317, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FLL06mU-ua8", "annotations": [{"segment": [50.988272549883085, 131.64645037985386], "label": "Using the monkey bar"}]}, "zQCqzIgIpHI": {"duration": 189.452, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zQCqzIgIpHI", "annotations": [{"segment": [53.62914430577223, 71.35778705148206], "label": "Skiing"}, {"segment": [121.5889414976599, 140.49949375975038], "label": "Skiing"}]}, "HWgQhsTgj90": {"duration": 22.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HWgQhsTgj90", "annotations": [{"segment": [0.6685366614664586, 21.23889547581903], "label": "Playing water polo"}]}, "2mXGnG6ZBDA": {"duration": 45.79, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=2mXGnG6ZBDA", "annotations": [{"segment": [2.285915784930717, 43.93244344773791], "label": "Hitting a pinata"}]}, "Jd0KWW9LN4Q": {"duration": 184.413, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=Jd0KWW9LN4Q", "annotations": [{"segment": [9.493955978647309, 68.75925693626384], "label": "Playing ten pins"}, {"segment": [53.79908387900142, 58.40221405046678], "label": "Playing ten pins"}, {"segment": [63.58073549336531, 68.47156130054726], "label": "Playing ten pins"}, {"segment": [81.4178649077936, 117.9552106437999], "label": "Playing ten pins"}, {"segment": [150.46481707644307, 159.095686124805], "label": "Playing ten pins"}, {"segment": [168.30194644305774, 172.6173809672387], "label": "Playing ten pins"}]}, "Pi79Fa4YwDk": {"duration": 77.11, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Pi79Fa4YwDk", "annotations": [{"segment": [6.075242018720749, 17.985122411856473], "label": "Cricket"}, {"segment": [56.24110185647426, 76.33150615600624], "label": "Cricket"}]}, "5RW4JnGyjro": {"duration": 140.063, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5RW4JnGyjro", "annotations": []}, "VwIU_FcOI_Y": {"duration": 69.196, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=VwIU_FcOI_Y", "annotations": []}, "j6zAdpBqRu0": {"duration": 142.896, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j6zAdpBqRu0", "annotations": [{"segment": [5.1272927434104165, 7.579476229389311], "label": "Skiing"}, {"segment": [12.706768972799727, 15.158952458778622], "label": "Skiing"}, {"segment": [33.66179148934664, 37.67445537549393], "label": "Skiing"}, {"segment": [40.7954161758307, 49.712447033935774], "label": "Skiing"}, {"segment": [61.30458714947237, 63.97969640690389], "label": "Skiing"}, {"segment": [77.13231692260887, 78.91572309422988], "label": "Skiing"}, {"segment": [80.92205503730352, 107.89607338307137], "label": "Skiing"}]}, "JjRSGzdwtc0": {"duration": 156.48, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JjRSGzdwtc0", "annotations": [{"segment": [17.321452418096722, 145.17721528861156], "label": "Rope skipping"}]}, "QakbOSMJ0AA": {"duration": 148.66, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QakbOSMJ0AA", "annotations": [{"segment": [66.64229054212169, 120.41972151872075], "label": "Shaving"}]}, "Ncfysce-svA": {"duration": 34.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ncfysce-svA", "annotations": [{"segment": [11.557700904836192, 31.020433088923557], "label": "Playing guitarra"}]}, "KoeytdNfQ_M": {"duration": 73.677, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KoeytdNfQ_M", "annotations": [{"segment": [3.5631581889125905, 70.68846084455623], "label": "Disc dog"}]}, "yPA6klGWEsc": {"duration": 104.24000000000001, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yPA6klGWEsc", "annotations": [{"segment": [30.571080343213726, 36.42511700468019], "label": "Walking the dog"}, {"segment": [94.8028714898596, 99.19339898595943], "label": "Walking the dog"}]}, "eaR8oEBlYWk": {"duration": 133.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eaR8oEBlYWk", "annotations": [{"segment": [11.704885719238293, 46.6105270605382], "label": "Hand washing clothes"}, {"segment": [51.62690665449747, 89.66778435725429], "label": "Hand washing clothes"}]}, "4innF37ifyY": {"duration": 19.97, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=4innF37ifyY", "annotations": [{"segment": [12.025230359999998, 15.274488319999998], "label": "Plataform diving"}]}, "TK5FnYshy10": {"duration": 226.72, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TK5FnYshy10", "annotations": [{"segment": [0.01, 226.581], "label": "Building sandcastles"}]}, "p4cQt5On74M": {"duration": 92.07, "subset": "training", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=p4cQt5On74M", "annotations": [{"segment": [2.333615774202139, 90.24183082871927], "label": "Playing polo"}]}, "adEkjcH5vbA": {"duration": 123.41, "subset": "testing", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=adEkjcH5vbA", "annotations": []}, "iLaye6q55qk": {"duration": 237.32999999999998, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iLaye6q55qk", "annotations": [{"segment": [13.699274762870516, 190.6790946723869], "label": "Skateboarding"}]}, "UzmsAb15Bso": {"duration": 158.43, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UzmsAb15Bso", "annotations": []}, "JvP1O8ZIL3A": {"duration": 228.485, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JvP1O8ZIL3A", "annotations": [{"segment": [9.751490167972934, 215.45809612435923], "label": "Braiding hair"}]}, "_A5iOie5VkM": {"duration": 145.5, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_A5iOie5VkM", "annotations": [{"segment": [2.3824188767550702, 144.8936794071763], "label": "Doing step aerobics"}]}, "KSu59HUet3E": {"duration": 152.834, "subset": "training", "resolution": "1440x1440", "url": "https://www.youtube.com/watch?v=KSu59HUet3E", "annotations": [{"segment": [23.604587220223504, 36.71824678701434], "label": "Plastering"}, {"segment": [45.06330287497214, 134.47461766614666], "label": "Plastering"}, {"segment": [137.5742099173167, 152.63374100000001], "label": "Plastering"}]}, "Eq0Ibnq9sYA": {"duration": 39.265, "subset": "training", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=Eq0Ibnq9sYA", "annotations": [{"segment": [2.0214400717628704, 27.993882205928237], "label": "Curling"}]}, "2VYZeOa6804": {"duration": 190.29, "subset": "training", "resolution": "632x480", "url": "https://www.youtube.com/watch?v=2VYZeOa6804", "annotations": [{"segment": [69.16847705304212, 90.54242704368176], "label": "Playing water polo"}, {"segment": [166.24183326053043, 172.47590200780033], "label": "Playing water polo"}]}, "tw0I1FEUigE": {"duration": 185.04, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tw0I1FEUigE", "annotations": []}, "2SnFlGUHKnc": {"duration": 83.035, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2SnFlGUHKnc", "annotations": [{"segment": [0.01, 82.981], "label": "Baton twirling"}]}, "iZT4XgzUHm4": {"duration": 134.189, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iZT4XgzUHm4", "annotations": []}, "mTDRTiTnsjQ": {"duration": 33.855, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mTDRTiTnsjQ", "annotations": [{"segment": [0.2, 33.65469387755102], "label": "Blowing leaves"}]}, "oj6czGQ830c": {"duration": 222.726, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oj6czGQ830c", "annotations": [{"segment": [83.39187699219968, 146.97818319875194], "label": "Scuba diving"}, {"segment": [154.27497243556942, 177.20773860842434], "label": "Scuba diving"}, {"segment": [180.68240014976598, 201.53036939781592], "label": "Scuba diving"}]}, "jwndE_xn8sA": {"duration": 8.52, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=jwndE_xn8sA", "annotations": [{"segment": [0.5743896540662107, 8.458973560608268], "label": "Smoking hookah"}]}, "aPI0nPvzJlE": {"duration": 34.041, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aPI0nPvzJlE", "annotations": []}, "XNTy5ZTMqVU": {"duration": 133.655, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XNTy5ZTMqVU", "annotations": [{"segment": [13.65729254884481, 23.66569014189139], "label": "Rock-paper-scissors"}, {"segment": [48.061159274942426, 48.686684124507835], "label": "Rock-paper-scissors"}, {"segment": [50.56325867320407, 50.77176695639254], "label": "Rock-paper-scissors"}]}, "X0nwoVCrp6Q": {"duration": 195.79, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=X0nwoVCrp6Q", "annotations": []}, "nDo0nfs9Ee4": {"duration": 17.32, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nDo0nfs9Ee4", "annotations": [{"segment": [0.2501915607698981, 17.221517830240305], "label": "Bathing dog"}]}, "vHFm2rIPCwY": {"duration": 112.69, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vHFm2rIPCwY", "annotations": []}, "yFPxSn69pcc": {"duration": 93.81, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yFPxSn69pcc", "annotations": [{"segment": [3.5837142230889234, 84.7658119297972], "label": "Cricket"}]}, "QtCNHRtycmg": {"duration": 110.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QtCNHRtycmg", "annotations": [{"segment": [6.86427457098284, 98.33073322932917], "label": "Doing motocross"}]}, "EE2nFMHRacg": {"duration": 136.42000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EE2nFMHRacg", "annotations": [{"segment": [15.322996418096725, 122.15833255538223], "label": "Playing violin"}]}, "OM0jqPYx700": {"duration": 30.09, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OM0jqPYx700", "annotations": [{"segment": [0.01, 0.046947051482059285], "label": "Polishing forniture"}]}, "PRCMcOB2PwA": {"duration": 202.57, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PRCMcOB2PwA", "annotations": []}, "UlddctIswqo": {"duration": 159.149, "subset": "training", "resolution": "2276x720", "url": "https://www.youtube.com/watch?v=UlddctIswqo", "annotations": [{"segment": [19.67697763671162, 46.4416318423191], "label": "Baking cookies"}, {"segment": [75.68449477066801, 78.16270349340944], "label": "Baking cookies"}]}, "dpS_S4Zi2Po": {"duration": 62.02, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dpS_S4Zi2Po", "annotations": [{"segment": [0, 1.0643143331175424], "label": "Putting on makeup"}, {"segment": [16.642005936019753, 30.28458056961734], "label": "Putting on makeup"}, {"segment": [45.0882247550702, 50.21628465210609], "label": "Putting on makeup"}]}, "VbbmHEKITMk": {"duration": 101.699, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VbbmHEKITMk", "annotations": []}, "5gaobaT6K1A": {"duration": 66.038, "subset": "training", "resolution": "308x172", "url": "https://www.youtube.com/watch?v=5gaobaT6K1A", "annotations": [{"segment": [0.01, 66.001], "label": "Bullfighting"}]}, "SrA6k_iQNGA": {"duration": 134.235, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SrA6k_iQNGA", "annotations": [{"segment": [61.358396390277385, 134.23458049886622], "label": "Carving jack-o-lanterns"}]}, "Es3PxD9oi4I": {"duration": 225.86, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Es3PxD9oi4I", "annotations": []}, "tu-YAMiS5wA": {"duration": 99.75, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tu-YAMiS5wA", "annotations": []}, "mrqSaH_FIcs": {"duration": 232.293, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mrqSaH_FIcs", "annotations": [{"segment": [24.745565038009595, 152.4529373121529], "label": "Trimming branches or hedges"}]}, "CtQ25XC45As": {"duration": 135.3, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CtQ25XC45As", "annotations": [{"segment": [19.835466458658345, 59.717414976599066], "label": "Dodgeball"}]}, "90SltIDizo8": {"duration": 82.314, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=90SltIDizo8", "annotations": [{"segment": [5.562298326403182, 81.17879365350599], "label": "Kneeling"}]}, "Jc_cLD6yaRM": {"duration": 134.49, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Jc_cLD6yaRM", "annotations": []}, "xLmdKbYfI5E": {"duration": 21.084, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=xLmdKbYfI5E", "annotations": []}, "fZQclIXmRHE": {"duration": 87.91, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fZQclIXmRHE", "annotations": [{"segment": [31.80571498947972, 33.03810863901243], "label": "Ironing clothes"}, {"segment": [57.13825111876321, 70.83151389134888], "label": "Ironing clothes"}]}, "s24-UW2mnjM": {"duration": 215.736, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=s24-UW2mnjM", "annotations": [{"segment": [21.869609984399375, 215.46800000000002], "label": "Rollerblading"}]}, "xPnNcv7ErXk": {"duration": 19.482, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xPnNcv7ErXk", "annotations": [{"segment": [2.212549141965679, 18.155026521060844], "label": "Using the monkey bar"}]}, "l9XzNcLiMKc": {"duration": 229.692, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l9XzNcLiMKc", "annotations": [{"segment": [61.17958929366139, 193.19870303261493], "label": "Making a cake"}]}, "xI4LLnFt6Io": {"duration": 184.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xI4LLnFt6Io", "annotations": [{"segment": [7.465006066909344, 174.2791799953198], "label": "Playing bagpipes"}]}, "SIHG_BRgn2E": {"duration": 187.71, "subset": "training", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=SIHG_BRgn2E", "annotations": [{"segment": [78.92005200208008, 133.09522684580853], "label": "Playing piano"}, {"segment": [29.4010173, 78.62721321914101], "label": "Playing piano"}]}, "xT7eQmyr1-Y": {"duration": 215.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xT7eQmyr1-Y", "annotations": [{"segment": [20.128549048361936, 203.96929702340094], "label": "Blowing leaves"}]}, "kH50-giCeDM": {"duration": 30.094, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kH50-giCeDM", "annotations": [{"segment": [9.15916506217814, 14.784036090215524], "label": "Rock-paper-scissors"}]}, "mr2wnh2GwL8": {"duration": 238.168, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mr2wnh2GwL8", "annotations": [{"segment": [22.664885436233774, 186.89241453822152], "label": "Building sandcastles"}]}, "tSR4Pj_muN0": {"duration": 47.97, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=tSR4Pj_muN0", "annotations": [{"segment": [17.59576247041741, 45.62969093075956], "label": "Bungee jumping"}]}, "KoqE2gPCLe4": {"duration": 67.13, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KoqE2gPCLe4", "annotations": [{"segment": [23.667907751638065, 28.380544250858033], "label": "Pole vault"}]}, "45gAK3x_0ds": {"duration": 188.43, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=45gAK3x_0ds", "annotations": [{"segment": [4.260896255850234, 7.199445397815912], "label": "Slacklining"}, {"segment": [21.598336193447736, 31.589403276131044], "label": "Slacklining"}, {"segment": [57.742490639625586, 123.8729976599064], "label": "Slacklining"}, {"segment": [137.68564508580343, 182.94410686427457], "label": "Slacklining"}]}, "eCRYdEWeaXk": {"duration": 87.934, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eCRYdEWeaXk", "annotations": []}, "AOBkrb8yYS4": {"duration": 113.128, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AOBkrb8yYS4", "annotations": [{"segment": [25.9281856474259, 39.50961622464899], "label": "Croquet"}, {"segment": [54.32572230889236, 70.72926833073323], "label": "Croquet"}, {"segment": [78.13732137285491, 100.00871606864276], "label": "Croquet"}]}, "CiryRTSYkC4": {"duration": 172.22, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CiryRTSYkC4", "annotations": [{"segment": [91.08173920426205, 133.53281403120124], "label": "Ping-pong"}]}, "_5VJcnrEgbg": {"duration": 67.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_5VJcnrEgbg", "annotations": [{"segment": [0.01, 59.70416536661467], "label": "Playing squash"}]}, "jIsGq431gBs": {"duration": 230.668, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=jIsGq431gBs", "annotations": [{"segment": [59.191378072006955, 117.30655464432422], "label": "Rafting"}, {"segment": [132.37345227418425, 136.31954451057615], "label": "Rafting"}, {"segment": [152.46264911399763, 189.05368621508626], "label": "Rafting"}]}, "nOlCltba34g": {"duration": 16.58, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nOlCltba34g", "annotations": []}, "eS_xTXii3is": {"duration": 160.66, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eS_xTXii3is", "annotations": []}, "dGT_KGb0gyw": {"duration": 29.861, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dGT_KGb0gyw", "annotations": []}, "b_c17BmZCCs": {"duration": 236.055, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b_c17BmZCCs", "annotations": []}, "NW42KGlKi_c": {"duration": 60.883, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NW42KGlKi_c", "annotations": [{"segment": [6.1262644464962275, 19.898486845596274], "label": "Canoeing"}, {"segment": [20.65833359865007, 22.367988393135725], "label": "Canoeing"}, {"segment": [32.72090021840873, 34.24059369734789], "label": "Canoeing"}, {"segment": [41.2691760374415, 43.45373541341654], "label": "Canoeing"}]}, "-SggX9E_LOA": {"duration": 40.798, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-SggX9E_LOA", "annotations": []}, "mJQS2T-IVWc": {"duration": 146.008, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=mJQS2T-IVWc", "annotations": [{"segment": [0.01, 146.00707482993198], "label": "Fixing the roof"}]}, "jX9v6NXrLXs": {"duration": 119.49, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jX9v6NXrLXs", "annotations": []}, "U32mlg60EGA": {"duration": 72.353, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=U32mlg60EGA", "annotations": []}, "MOH9q_3Z9zg": {"duration": 154.69, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MOH9q_3Z9zg", "annotations": [{"segment": [69.01984809732525, 114.3895384550076], "label": "Getting a haircut"}]}, "CN01Gm2Yc4k": {"duration": 17.555, "subset": "validation", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=CN01Gm2Yc4k", "annotations": [{"segment": [1.6411644199050477, 17.261], "label": "Doing crunches"}]}, "GMHzZXAQzIA": {"duration": 97.687, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=GMHzZXAQzIA", "annotations": [{"segment": [10.178826478264433, 92.36907093893102], "label": "Using the monkey bar"}]}, "tu6EkaMa9c8": {"duration": 174.498, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tu6EkaMa9c8", "annotations": [{"segment": [7.075609984399376, 43.27007644305772], "label": "River tubing"}, {"segment": [53.88349141965679, 146.4274851794072], "label": "River tubing"}, {"segment": [154.86475663026522, 158.40296723868957], "label": "River tubing"}]}, "_Rcb5kuhn90": {"duration": 33.855, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_Rcb5kuhn90", "annotations": [{"segment": [0, 32.82006697819315], "label": "Playing rubik cube"}]}, "mmoAdwG65kI": {"duration": 189.66, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mmoAdwG65kI", "annotations": []}, "oZjx7F1doGs": {"duration": 144.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oZjx7F1doGs", "annotations": [{"segment": [55.17854542753139, 139.58253720625015], "label": "Tennis serve with ball bouncing"}]}, "v-UmNR5yeDc": {"duration": 230.691, "subset": "testing", "resolution": "855x470", "url": "https://www.youtube.com/watch?v=v-UmNR5yeDc", "annotations": []}, "iaXlCCgLBdo": {"duration": 36.36, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=iaXlCCgLBdo", "annotations": [{"segment": [19.061687640000002, 33.06251212], "label": "Javelin throw"}]}, "BJWL_X79HRc": {"duration": 170.9, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=BJWL_X79HRc", "annotations": [{"segment": [19.729354149765992, 154.63547847113884], "label": "Playing lacrosse"}]}, "MFow119nrOk": {"duration": 221.61, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=MFow119nrOk", "annotations": [{"segment": [0, 8.813770670826832], "label": "Skateboarding"}]}, "W5cGKWhOTOU": {"duration": 67.71, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=W5cGKWhOTOU", "annotations": [{"segment": [2.5333291731669267, 23.53885023400936], "label": "Putting on shoes"}, {"segment": [31.24439313572543, 54.88879875195008], "label": "Putting on shoes"}]}, "Fq6JJ7a_3vY": {"duration": 148.933, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Fq6JJ7a_3vY", "annotations": []}, "Rvi30WJaH1w": {"duration": 148.143, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Rvi30WJaH1w", "annotations": []}, "kfwwya1qzXM": {"duration": 209.31, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kfwwya1qzXM", "annotations": [{"segment": [18.612063276131046, 182.8553585023401], "label": "Playing guitarra"}]}, "hwGvU9Csz98": {"duration": 19.39, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hwGvU9Csz98", "annotations": [{"segment": [0.01, 0.02], "label": "Horseback riding"}]}, "rHD_vDxPpDA": {"duration": 131.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rHD_vDxPpDA", "annotations": [{"segment": [7.381118695787831, 131.424919], "label": "Cleaning shoes"}]}, "dh4bxvmrCNs": {"duration": 85.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dh4bxvmrCNs", "annotations": [{"segment": [4.376384794167277, 9.018005021528861], "label": "Archery"}, {"segment": [16.444597392199686, 49.59902761840873], "label": "Archery"}]}, "sYAAXumvtck": {"duration": 112.47999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sYAAXumvtck", "annotations": [{"segment": [9.998653666146646, 91.39120280811233], "label": "Doing nails"}]}, "xQH8YS2_NxY": {"duration": 93.855, "subset": "validation", "resolution": "480x268", "url": "https://www.youtube.com/watch?v=xQH8YS2_NxY", "annotations": [{"segment": [6.179922489495799, 77.52147814394864], "label": "Playing drums"}]}, "WLGdtzCt4A8": {"duration": 155.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WLGdtzCt4A8", "annotations": []}, "MMnTMB6AmuU": {"duration": 179.444, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MMnTMB6AmuU", "annotations": [{"segment": [6.1572886115444625, 90.1203151326053], "label": "Rope skipping"}, {"segment": [97.95686427457099, 168.4858065522621], "label": "Rope skipping"}]}, "qPZBSTNEl78": {"duration": 31.09, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=qPZBSTNEl78", "annotations": [{"segment": [0.7725844867394696, 11.588767301092044], "label": "Tennis serve with ball bouncing"}, {"segment": [17.006451, 27.2525428], "label": "Tennis serve with ball bouncing"}]}, "Ox51OBhM5Ak": {"duration": 32.16, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ox51OBhM5Ak", "annotations": [{"segment": [0.2, 21.506443218169355], "label": "Having an ice cream"}]}, "XLdqEn8pqis": {"duration": 142.06, "subset": "validation", "resolution": "208x160", "url": "https://www.youtube.com/watch?v=XLdqEn8pqis", "annotations": [{"segment": [4.763261808112324, 116.4229804726989], "label": "Playing accordion"}]}, "qwBFQf5STBA": {"duration": 229.46, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qwBFQf5STBA", "annotations": [{"segment": [21.47335413416537, 51.536049921996884], "label": "Doing nails"}, {"segment": [64.06217316692668, 122.75600780031202], "label": "Doing nails"}]}, "hog7gGdk4i4": {"duration": 13.073, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hog7gGdk4i4", "annotations": [{"segment": [0.2, 12.814], "label": "Swinging at the playground"}]}, "MHAS5TK2eW0": {"duration": 70.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MHAS5TK2eW0", "annotations": [{"segment": [1e-06, 69.98034321372855], "label": "Doing step aerobics"}]}, "zPZGjZES31A": {"duration": 110.74000000000001, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=zPZGjZES31A", "annotations": [{"segment": [11.39469571918877, 77.5167147], "label": "Mowing the lawn"}]}, "ah3tGziTbds": {"duration": 127.46, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ah3tGziTbds", "annotations": [{"segment": [7.953468397239291, 115.92180188976266], "label": "Sailing"}]}, "QQQ7jSVSG3A": {"duration": 209.44, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QQQ7jSVSG3A", "annotations": [{"segment": [7.188385070096682, 202.90850452418098], "label": "Belly dance"}]}, "KfP205pf7PU": {"duration": 103.051, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KfP205pf7PU", "annotations": [{"segment": [27.405249720741068, 29.969483338381636], "label": "Knitting"}, {"segment": [49.521768181305426, 71.79855173184974], "label": "Knitting"}, {"segment": [87.9852793477129, 98.56274491847495], "label": "Knitting"}]}, "BzU_kAzMMxo": {"duration": 193.794, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BzU_kAzMMxo", "annotations": []}, "pbz8c7TAlDs": {"duration": 45.464, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pbz8c7TAlDs", "annotations": [{"segment": [0.992988112324493, 40.00323538221529], "label": "River tubing"}]}, "ac1M42uRXS8": {"duration": 141.735, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ac1M42uRXS8", "annotations": [{"segment": [0.01, 112.10521641341653], "label": "Baton twirling"}]}, "hZD-CjWt0Rg": {"duration": 167.091, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hZD-CjWt0Rg", "annotations": [{"segment": [11.29554614915937, 158.08558771925283], "label": "Futsal"}]}, "h-lKlgls9OI": {"duration": 49.83, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h-lKlgls9OI", "annotations": [{"segment": [12.658819032761311, 13.746079563182526], "label": "Playing water polo"}, {"segment": [15.8429391575663, 16.85253822152886], "label": "Playing water polo"}, {"segment": [19.104720748829955, 20.890934477379094], "label": "Playing water polo"}, {"segment": [28.11345085803432, 28.967726989079562], "label": "Playing water polo"}, {"segment": [30.210310452418096, 36.65621216848674], "label": "Playing water polo"}]}, "pzTqZWPubk8": {"duration": 212.42000000000002, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=pzTqZWPubk8", "annotations": []}, "D2IDRG8_6To": {"duration": 212.974, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D2IDRG8_6To", "annotations": [{"segment": [86.68009516380656, 90.00117160686428], "label": "Shuffleboard"}, {"segment": [111.25606084243371, 117.89821372854915], "label": "Shuffleboard"}, {"segment": [144.46682527301093, 151.78745241809673], "label": "Shuffleboard"}]}, "dStkNaVlExA": {"duration": 117.17, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=dStkNaVlExA", "annotations": []}, "jsu65VwKf74": {"duration": 145.984, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jsu65VwKf74", "annotations": [{"segment": [2.2774392336237668, 53.51982199015852], "label": "Futsal"}, {"segment": [62.857322848015954, 145.3006231051963], "label": "Futsal"}]}, "uiCkFmebAZs": {"duration": 235.729, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uiCkFmebAZs", "annotations": [{"segment": [4.780774937119935, 16.181084389391575], "label": "Swimming"}, {"segment": [27.213641927613104, 29.420153435257408], "label": "Swimming"}, {"segment": [77.59565468549143, 82.74418153666146], "label": "Swimming"}, {"segment": [118.41611757691106, 125.4034040177847], "label": "Swimming"}, {"segment": [146.3652633404056, 156.294565124805], "label": "Swimming"}, {"segment": [162.91409964773788, 170.26913800655225], "label": "Swimming"}, {"segment": [180.93394362683307, 182.77270321653666], "label": "Swimming"}, {"segment": [188.65673390358813, 190.86324541123244], "label": "Swimming"}, {"segment": [194.17301267269892, 200.7925471956318], "label": "Swimming"}, {"segment": [209.61859322620904, 215.50262391326052], "label": "Swimming"}]}, "JoiZmVQCLCI": {"duration": 186.32, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JoiZmVQCLCI", "annotations": [{"segment": [6.245397815912636, 177.0497659906396], "label": "Pole vault"}]}, "_il2j9UtSe4": {"duration": 8.82, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_il2j9UtSe4", "annotations": [{"segment": [2.298659906396256, 4.913901716068643], "label": "Drinking coffee"}]}, "uavot-yVwhg": {"duration": 98.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uavot-yVwhg", "annotations": [{"segment": [44.05720143907797, 76.33521939625585], "label": "Cleaning shoes"}]}, "gMwjhe0Njkc": {"duration": 134.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gMwjhe0Njkc", "annotations": [{"segment": [12.63871334, 44.28331448], "label": "Discus throw"}, {"segment": [46.63766302652107, 92.22492823712949], "label": "Discus throw"}]}, "1sA-lEbrgak": {"duration": 70.17, "subset": "training", "resolution": "470x360", "url": "https://www.youtube.com/watch?v=1sA-lEbrgak", "annotations": [{"segment": [6.91279191944681, 15.762357340007558], "label": "Clean and jerk"}, {"segment": [32.36894924031908, 66.8931797699141], "label": "Clean and jerk"}]}, "0ivHmKR8cUw": {"duration": 92.22999999999999, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=0ivHmKR8cUw", "annotations": [{"segment": [13.520895475819032, 81.26921216848673], "label": "Mixing drinks"}]}, "I58i4uJkHJ0": {"duration": 158.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I58i4uJkHJ0", "annotations": [{"segment": [60.852703469999994, 123.31176466], "label": "Using the balance beam"}, {"segment": [8.21274194, 53.21615506], "label": "Using the balance beam"}]}, "ML6CRJRp3ik": {"duration": 188.918, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ML6CRJRp3ik", "annotations": []}, "wpxozv4Yois": {"duration": 173.779, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wpxozv4Yois", "annotations": [{"segment": [24.94163617472699, 119.82829553510142], "label": "Snowboarding"}, {"segment": [126.33480931981279, 173.23592951794072], "label": "Snowboarding"}]}, "EdtXUHBGnRY": {"duration": 229.16, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=EdtXUHBGnRY", "annotations": []}, "beASFdYJFUU": {"duration": 173.662, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=beASFdYJFUU", "annotations": [{"segment": [0.01, 13.00433378967812], "label": "Curling"}, {"segment": [29.801598268012352, 34.407299818523356], "label": "Curling"}, {"segment": [67.73090515457353, 79.38062084116018], "label": "Curling"}, {"segment": [95.63603807825783, 103.76374669680666], "label": "Curling"}, {"segment": [134.10719157566302, 149.54983787831512], "label": "Curling"}]}, "HafSt7EDin0": {"duration": 178.75, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HafSt7EDin0", "annotations": [{"segment": [69.3153552, 168.29008076241416], "label": "Hopscotch"}]}, "8gUKEh27AFM": {"duration": 25.75, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8gUKEh27AFM", "annotations": [{"segment": [1.0228642745709828, 23.485765990639628], "label": "Hammer throw"}]}, "FMmrxy3OanA": {"duration": 213.276, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FMmrxy3OanA", "annotations": [{"segment": [106.3689584574833, 119.98587590296617], "label": "Hula hoop"}]}, "1ASxtL1Brn4": {"duration": 35.295, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1ASxtL1Brn4", "annotations": []}, "Zre8db8Q3ss": {"duration": 138.229, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zre8db8Q3ss", "annotations": []}, "dSHw52PBi5I": {"duration": 148.79, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dSHw52PBi5I", "annotations": [{"segment": [7.97084101, 144.33763845865835], "label": "Plataform diving"}]}, "K-PpTJmmjhA": {"duration": 208.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K-PpTJmmjhA", "annotations": [{"segment": [0.01, 208.56108843537416], "label": "Ironing clothes"}]}, "AyaS46-OPPU": {"duration": 15.28, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=AyaS46-OPPU", "annotations": []}, "ebBqY4VYHlA": {"duration": 122.346, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ebBqY4VYHlA", "annotations": []}, "oKaNlV45bB8": {"duration": 95.53, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=oKaNlV45bB8", "annotations": [{"segment": [1.8261301541790353, 10.15869394234103], "label": "Brushing teeth"}, {"segment": [17.896074602777166, 20.87199024140645], "label": "Brushing teeth"}, {"segment": [34.11481483330677, 95.12108542520708], "label": "Brushing teeth"}]}, "kRMskyrrRcA": {"duration": 49.087, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kRMskyrrRcA", "annotations": [{"segment": [0.2, 29.023349354134165], "label": "Playing rubik cube"}]}, "kn41Y1h8-Uo": {"duration": 71.31, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kn41Y1h8-Uo", "annotations": []}, "j0FMv4RDpEk": {"duration": 195.234, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j0FMv4RDpEk", "annotations": [{"segment": [27.326600437997907, 186.12451187214128], "label": "Fixing the roof"}]}, "zwa44U585FE": {"duration": 143.453, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zwa44U585FE", "annotations": [{"segment": [9.608201022880595, 133.84447471408086], "label": "Croquet"}]}, "mZ1Di2gg-I4": {"duration": 209.05, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mZ1Di2gg-I4", "annotations": [{"segment": [101.56633222932916, 205.5780817675507], "label": "Tai chi"}, {"segment": [6.032029361934478, 99.0683211], "label": "Tai chi"}]}, "rsU73H6VyG8": {"duration": 86.82, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rsU73H6VyG8", "annotations": []}, "-E2dqOULQgY": {"duration": 68.337, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-E2dqOULQgY", "annotations": [{"segment": [0, 22.909071762870514], "label": "Playing drums"}, {"segment": [31.220269890795635, 67.98133853354135], "label": "Playing drums"}]}, "hgzyQ_jHeJc": {"duration": 143.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hgzyQ_jHeJc", "annotations": [{"segment": [14.946957878315132, 120.91419656786272], "label": "Using the pommel horse"}]}, "BN3ejP3RbkQ": {"duration": 55.47, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BN3ejP3RbkQ", "annotations": [{"segment": [0.01, 54.74906864274571], "label": "Playing flauta"}]}, "DFzoAQ8AzWA": {"duration": 59.68, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DFzoAQ8AzWA", "annotations": []}, "YCqbvmEG-Uw": {"duration": 17.276, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YCqbvmEG-Uw", "annotations": [{"segment": [0.24179251170046803, 16.434088923556942], "label": "Removing ice from car"}]}, "2-1MNxfX5Bc": {"duration": 68.383, "subset": "validation", "resolution": "626x480", "url": "https://www.youtube.com/watch?v=2-1MNxfX5Bc", "annotations": [{"segment": [0.01, 6.93428984399376], "label": "Rafting"}]}, "ycBFz2RdgoY": {"duration": 198.76, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ycBFz2RdgoY", "annotations": [{"segment": [3.799629122973647, 10.3012163659643], "label": "Windsurfing"}, {"segment": [31.66357445007645, 184.91527374914187], "label": "Windsurfing"}]}, "oey6DFvL9Xk": {"duration": 61.719, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oey6DFvL9Xk", "annotations": [{"segment": [34.45870905121001, 42.820160276849016], "label": "Putting in contact lenses"}, {"segment": [49.540813465029416, 54.91733601557374], "label": "Putting in contact lenses"}]}, "ze6Bu0bcNbg": {"duration": 233.59, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ze6Bu0bcNbg", "annotations": [{"segment": [65.59546626763029, 204.07478240249608], "label": "Shaving"}]}, "cIpBpGQ0XTI": {"duration": 226.558, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=cIpBpGQ0XTI", "annotations": [{"segment": [18.698680685358255, 216.97525700934582], "label": "Changing car wheel"}]}, "mH52O_EaYtA": {"duration": 199.69, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mH52O_EaYtA", "annotations": []}, "QXZkejzunpk": {"duration": 71.332, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QXZkejzunpk", "annotations": [{"segment": [14.577915565602215, 71.13163265306122], "label": "BMX"}]}, "2ooY3GqZieg": {"duration": 142.988, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2ooY3GqZieg", "annotations": [{"segment": [23.19934944149766, 142.9882993197279], "label": "Longboarding"}]}, "itgR5a-hH_o": {"duration": 214.57999999999998, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=itgR5a-hH_o", "annotations": [{"segment": [3.012755897991022, 212.23191442433696], "label": "Playing piano"}]}, "rgU4olNYrJs": {"duration": 138.995, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rgU4olNYrJs", "annotations": []}, "PdwWFueQErM": {"duration": 43.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PdwWFueQErM", "annotations": []}, "rxEQaswh_Uo": {"duration": 59.65, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rxEQaswh_Uo", "annotations": [{"segment": [0.01, 59.465940121684866], "label": "Polishing shoes"}]}, "I9ficvPdpZg": {"duration": 187.55, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I9ficvPdpZg", "annotations": []}, "pKOy4HJwCqo": {"duration": 58.956, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pKOy4HJwCqo", "annotations": []}, "rgwKbJe5_wg": {"duration": 38.55, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rgwKbJe5_wg", "annotations": []}, "x3cOxXOYbwA": {"duration": 37.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x3cOxXOYbwA", "annotations": [{"segment": [9.26468096723869, 22.59889469578783], "label": "Snatch"}]}, "Hj_YKQiU-8Y": {"duration": 121.79, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Hj_YKQiU-8Y", "annotations": [{"segment": [37.71458569907422, 70.39422670784383], "label": "Brushing hair"}]}, "42i3phJnY4M": {"duration": 51.804, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=42i3phJnY4M", "annotations": [{"segment": [0.01, 50.280802926809244], "label": "Bullfighting"}]}, "TaLEPzEyZ34": {"duration": 167.81, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=TaLEPzEyZ34", "annotations": [{"segment": [17.584178476418973, 22.289134290520312], "label": "Drinking coffee"}, {"segment": [33.0059780893067, 34.57429669400715], "label": "Drinking coffee"}, {"segment": [72.99810250916809, 86.06742421500516], "label": "Drinking coffee"}]}, "A_ndiCY-rDc": {"duration": 150.141, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=A_ndiCY-rDc", "annotations": [{"segment": [12.64265522620905, 86.39147737909516], "label": "Doing crunches"}, {"segment": [96.69290015600625, 139.53745397815914], "label": "Doing crunches"}]}, "x4DuEusRR7Q": {"duration": 31.741, "subset": "training", "resolution": "256x144", "url": "https://www.youtube.com/watch?v=x4DuEusRR7Q", "annotations": [{"segment": [1.6799283489096573, 30.63398753894081], "label": "Swimming"}]}, "8vcO5DRxP6c": {"duration": 203.17000000000002, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8vcO5DRxP6c", "annotations": [{"segment": [27.5686848673947, 49.1166224648986], "label": "Shot put"}, {"segment": [88.40992043681747, 99.81765210608425], "label": "Shot put"}, {"segment": [148.61739313572542, 176.52033853354135], "label": "Shot put"}]}, "hIHeMA1oRrY": {"duration": 21.061, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hIHeMA1oRrY", "annotations": []}, "WaVrNbTmbU4": {"duration": 107.625, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WaVrNbTmbU4", "annotations": [{"segment": [45.850942081688366, 55.08549228082716], "label": "Painting furniture"}, {"segment": [59.954618928253055, 74.22619703277724], "label": "Painting furniture"}]}, "WKLLTGbDI7w": {"duration": 79.436, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WKLLTGbDI7w", "annotations": [{"segment": [8.548822873381026, 79.434875], "label": "Making a lemonade"}]}, "Pr5K2Jh2X94": {"duration": 109.83, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Pr5K2Jh2X94", "annotations": []}, "l4C-l6XeNRc": {"duration": 238.86, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l4C-l6XeNRc", "annotations": [{"segment": [20.489287542901714, 181.35673275], "label": "Mowing the lawn"}, {"segment": [198.26823315, 224.6370979703588], "label": "Mowing the lawn"}]}, "pLfUo8hHnzs": {"duration": 20.898, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pLfUo8hHnzs", "annotations": []}, "huv327wdIU8": {"duration": 115.125, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=huv327wdIU8", "annotations": [{"segment": [27.313976941687123, 100.54249152222125], "label": "Futsal"}]}, "sZbkKa2iKrM": {"duration": 59.952, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sZbkKa2iKrM", "annotations": [{"segment": [9.257658346333855, 38.259046801872074], "label": "Doing a powerbomb"}]}, "q0buJ1CKj-Y": {"duration": 48.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=q0buJ1CKj-Y", "annotations": [{"segment": [1.364072686427457, 45.31752591575663], "label": "Playing guitarra"}]}, "tD-wp39rjnE": {"duration": 33.81, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=tD-wp39rjnE", "annotations": [{"segment": [1.7644552433697347, 29.94306883151326], "label": "Javelin throw"}]}, "DguywhRJ7ds": {"duration": 4.81, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DguywhRJ7ds", "annotations": [{"segment": [0.60680556, 2.00251602], "label": "Springboard diving"}]}, "_lxs63ftS9M": {"duration": 18.321, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=_lxs63ftS9M", "annotations": []}, "WgZOr6_3VtI": {"duration": 217.502, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=WgZOr6_3VtI", "annotations": []}, "ESgcgoYgg0o": {"duration": 173.407, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ESgcgoYgg0o", "annotations": [{"segment": [87.91676824242417, 98.70410227401503], "label": "Putting in contact lenses"}, {"segment": [124.32402059904332, 131.60547107036717], "label": "Putting in contact lenses"}]}, "c6qtZXZoOmA": {"duration": 222.29, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=c6qtZXZoOmA", "annotations": []}, "RllrUfp2EIU": {"duration": 155.69, "subset": "validation", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=RllrUfp2EIU", "annotations": [{"segment": [48.33427361584259, 133.62475492], "label": "Starting a campfire"}]}, "YXsAQqmWWtE": {"duration": 210.26, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YXsAQqmWWtE", "annotations": []}, "1aT9uLUKuZc": {"duration": 54.01, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1aT9uLUKuZc", "annotations": []}, "Jy_cnXCPVI4": {"duration": 236.17000000000002, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Jy_cnXCPVI4", "annotations": []}, "akMN3uX51Ws": {"duration": 92.798, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=akMN3uX51Ws", "annotations": [{"segment": [14.993226145372345, 20.20826132637142], "label": "Rollerblading"}, {"segment": [57.43781803517039, 84.38216737906863], "label": "Rollerblading"}]}, "7R6rLXMQAWc": {"duration": 169.88, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=7R6rLXMQAWc", "annotations": []}, "Ycf5NG0xedg": {"duration": 237.261, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ycf5NG0xedg", "annotations": [{"segment": [78.82694695787832, 112.5041872074883], "label": "Rafting"}, {"segment": [125.45697191887676, 161.73840405616224], "label": "Rafting"}]}, "PBYg9ekd86w": {"duration": 74.327, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PBYg9ekd86w", "annotations": []}, "hQIwXufRVGY": {"duration": 56.87, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=hQIwXufRVGY", "annotations": [{"segment": [0.01, 55.534959052783876], "label": "Walking the dog"}]}, "jHfG1I40Cdw": {"duration": 217.25, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jHfG1I40Cdw", "annotations": []}, "KAbo60mowhw": {"duration": 219.87, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KAbo60mowhw", "annotations": [{"segment": [33.92080650893717, 203.86678789578946], "label": "Disc dog"}]}, "c-3ed17J3F8": {"duration": 224.0, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=c-3ed17J3F8", "annotations": [{"segment": [66.04765767773569, 146.77257163806553], "label": "Polishing shoes"}]}, "j46ll2_jR7k": {"duration": 219.917, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j46ll2_jR7k", "annotations": [{"segment": [37.72897035881435, 204.0794305772231], "label": "Hula hoop"}]}, "RDC4sqe8xVk": {"duration": 192.587, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RDC4sqe8xVk", "annotations": [{"segment": [20.430355489049493, 105.45668789200549], "label": "Skiing"}, {"segment": [133.6986498915739, 153.2276652730109], "label": "Skiing"}, {"segment": [162.54150375039, 181.7700735101404], "label": "Skiing"}]}, "oUDqawe5d00": {"duration": 167.556, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oUDqawe5d00", "annotations": []}, "2wWWlklKCSs": {"duration": 41.91, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2wWWlklKCSs", "annotations": []}, "e0-lO2jb8vo": {"duration": 188.756, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=e0-lO2jb8vo", "annotations": [{"segment": [15.015243369734788, 188.701], "label": "Baton twirling"}]}, "2hgXJR5QdkU": {"duration": 197.07, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=2hgXJR5QdkU", "annotations": [{"segment": [5.223322932917316, 193.57020280811233], "label": "Smoking hookah"}]}, "AB480dHyDeM": {"duration": 212.6, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AB480dHyDeM", "annotations": []}, "BpSvtcblt9c": {"duration": 60.233, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BpSvtcblt9c", "annotations": []}, "bwRsZtPzipc": {"duration": 228.86, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=bwRsZtPzipc", "annotations": [{"segment": [13.206921996879874, 214.5419984399376], "label": "Belly dance"}]}, "5qBzjfW-sMs": {"duration": 12.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5qBzjfW-sMs", "annotations": [{"segment": [4.438524705148206, 6.083156627145086], "label": "Chopping wood"}]}, "k7nuduqYOdQ": {"duration": 195.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k7nuduqYOdQ", "annotations": [{"segment": [11.136865536661468, 178.95264759594386], "label": "Snatch"}]}, "MldEr60j33M": {"duration": 25.87, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MldEr60j33M", "annotations": [{"segment": [0, 22.679047950078004], "label": "Clean and jerk"}]}, "L2FgftH2VD8": {"duration": 90.77, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=L2FgftH2VD8", "annotations": [{"segment": [3.3952324492979717, 60.265375975039], "label": "Hand washing clothes"}, {"segment": [81.50355070202808, 89.4275070202808], "label": "Hand washing clothes"}]}, "YigV1ARspVU": {"duration": 36.2, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YigV1ARspVU", "annotations": [{"segment": [0.2, 33.77907469030251], "label": "Riding bumper cars"}]}, "7RESODKApso": {"duration": 54.103, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=7RESODKApso", "annotations": [{"segment": [32.50229088297528, 53.60310757564299], "label": "Ice fishing"}]}, "yiFKPKXevOU": {"duration": 49.25, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yiFKPKXevOU", "annotations": [{"segment": [3.0660426209048364, 44.380966937597506], "label": "Chopping wood"}]}, "SO67XxdevPw": {"duration": 64.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SO67XxdevPw", "annotations": [{"segment": [1.2501950078003121, 62.45974258970359], "label": "Using parallel bars"}]}, "hMOh-yKfZiE": {"duration": 198.252, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hMOh-yKfZiE", "annotations": []}, "KnED67CTI5c": {"duration": 4.46, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KnED67CTI5c", "annotations": []}, "Tovbyt_ZiIc": {"duration": 203.454, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Tovbyt_ZiIc", "annotations": [{"segment": [78.34308716395488, 190.81538537975558], "label": "Braiding hair"}]}, "uuZN3GbSeAw": {"duration": 224.03, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uuZN3GbSeAw", "annotations": []}, "Ok_Tjkob8IA": {"duration": 197.068, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ok_Tjkob8IA", "annotations": [{"segment": [31.899975077881617, 172.09330373831776], "label": "Making a lemonade"}]}, "GwJ9DmiW8dk": {"duration": 97.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GwJ9DmiW8dk", "annotations": [{"segment": [0.2, 91.381228099844], "label": "Using the balance beam"}]}, "gt2Sp_iG2hU": {"duration": 214.599, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gt2Sp_iG2hU", "annotations": [{"segment": [78.67496659360374, 85.37070843135726], "label": "Playing drums"}, {"segment": [111.14931450670828, 121.8625014471139], "label": "Playing drums"}, {"segment": [134.91919803073324, 142.2845140522621], "label": "Playing drums"}, {"segment": [165.3848233925117, 178.77630706801872], "label": "Playing drums"}, {"segment": [187.48077145709829, 197.8591713056162], "label": "Playing drums"}]}, "mkEME_iWi9o": {"duration": 88.469, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mkEME_iWi9o", "annotations": [{"segment": [0.19999999999999998, 21.932967510140404], "label": "Hurling"}, {"segment": [36.417002658346334, 50.625151422776916], "label": "Hurling"}]}, "VwK2kBm9BDE": {"duration": 97.92, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=VwK2kBm9BDE", "annotations": []}, "4JnXF13ktSs": {"duration": 35.02, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=4JnXF13ktSs", "annotations": [{"segment": [4.18577986, 22.29362228], "label": "Mowing the lawn"}, {"segment": [23.12817472698908, 34.965], "label": "Mowing the lawn"}]}, "BxKFlAS3V6g": {"duration": 17.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BxKFlAS3V6g", "annotations": []}, "R7QwxoVTtE0": {"duration": 72.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=R7QwxoVTtE0", "annotations": [{"segment": [26.28170041495537, 68.40053779702208], "label": "Cleaning windows"}]}, "90vop6PS2Y0": {"duration": 271.233, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=90vop6PS2Y0", "annotations": [{"segment": [0.2, 38.97732258128229], "label": "Raking leaves"}]}, "jZMxjRSL7dw": {"duration": 94.78, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jZMxjRSL7dw", "annotations": []}, "S2V2TgLAMKg": {"duration": 40.055, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=S2V2TgLAMKg", "annotations": [{"segment": [2.593226994385898, 35.2116493450338], "label": "Waxing skis"}]}, "knHJztfWFrA": {"duration": 166.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=knHJztfWFrA", "annotations": [{"segment": [0.3303272122994182, 154.49867213753308], "label": "Playing flauta"}]}, "hP8wUI0qdaQ": {"duration": 162.053, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hP8wUI0qdaQ", "annotations": [{"segment": [33.3831728046137, 38.68318165508099], "label": "Elliptical trainer"}, {"segment": [62.91179354293146, 65.43560728124922], "label": "Elliptical trainer"}, {"segment": [77.29753185134267, 80.57848971115575], "label": "Elliptical trainer"}, {"segment": [88.14993092610902, 91.93565153358567], "label": "Elliptical trainer"}]}, "UF5gg_E5Vd0": {"duration": 87.15, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UF5gg_E5Vd0", "annotations": []}, "AEer099-yUM": {"duration": 47.137, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AEer099-yUM", "annotations": [{"segment": [6.576504680187208, 20.46431903276131], "label": "Throwing darts"}, {"segment": [23.03613650546022, 30.751588923556945], "label": "Throwing darts"}, {"segment": [35.16041887675507, 42.06758580343214], "label": "Throwing darts"}]}, "FFMugev85IY": {"duration": 92.927, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=FFMugev85IY", "annotations": []}, "8QbHeaXOJOA": {"duration": 210.559, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8QbHeaXOJOA", "annotations": [{"segment": [48.616133192579326, 108.62907471058449], "label": "Kite flying"}, {"segment": [162.7349391965658, 203.06840181338825], "label": "Kite flying"}]}, "6DYQHmsezUw": {"duration": 96.363, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=6DYQHmsezUw", "annotations": [{"segment": [0.01, 96.36240362811792], "label": "River tubing"}]}, "LQW4hk1FLgw": {"duration": 39.339, "subset": "training", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=LQW4hk1FLgw", "annotations": [{"segment": [4.697529641185647, 34.78628159126365], "label": "Removing ice from car"}]}, "TvNNx-ZFfDA": {"duration": 228.578, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TvNNx-ZFfDA", "annotations": [{"segment": [23.46204343258817, 34.1266110948008], "label": "Beer pong"}, {"segment": [116.59926768257847, 118.02121003754016], "label": "Beer pong"}, {"segment": [142.5497156606292, 153.92525450032267], "label": "Beer pong"}, {"segment": [171.3440483486033, 189.47381208116366], "label": "Beer pong"}]}, "8eMjzrhxv2U": {"duration": 76.347, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=8eMjzrhxv2U", "annotations": [{"segment": [0.2, 72.39795794392523], "label": "Ice fishing"}]}, "UFVeN-ThOwU": {"duration": 143.76, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UFVeN-ThOwU", "annotations": [{"segment": [0, 21.978107124284968], "label": "Washing dishes"}, {"segment": [26.01490231037813, 98.67721566005496], "label": "Washing dishes"}, {"segment": [114.15159648829953, 126.37879475232177], "label": "Washing dishes"}]}, "oRR1nIUPbjo": {"duration": 203.211, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oRR1nIUPbjo", "annotations": [{"segment": [0.01, 179.14378315132606], "label": "Rafting"}]}, "9GYLUAFgCXE": {"duration": 180.75, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9GYLUAFgCXE", "annotations": [{"segment": [21.147911232449296, 41.167933865834634], "label": "Playing flauta"}]}, "pqcY83EZ9Vc": {"duration": 31.162, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pqcY83EZ9Vc", "annotations": []}, "RfsugL4sLso": {"duration": 107.74000000000001, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RfsugL4sLso", "annotations": [{"segment": [0.01, 24.53997792199688], "label": "Brushing teeth"}, {"segment": [42.02051014040562, 104.21086514820593], "label": "Brushing teeth"}]}, "6gKir83gUhU": {"duration": 128.151, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6gKir83gUhU", "annotations": []}, "3akCh2r4Bnk": {"duration": 40.38, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3akCh2r4Bnk", "annotations": []}, "D7Oc3SLX0wo": {"duration": 71.727, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D7Oc3SLX0wo", "annotations": [{"segment": [0.01, 8.16853350078003], "label": "Playing beach volleyball"}, {"segment": [12.86823770670827, 20.02969173478939], "label": "Playing beach volleyball"}, {"segment": [37.5976336474259, 63.22221134165367], "label": "Playing beach volleyball"}]}, "7lv1VOxIng4": {"duration": 233.8, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=7lv1VOxIng4", "annotations": [{"segment": [30.638606209048366, 223.22413095163807], "label": "Putting on makeup"}]}, "6sorAV7KJxA": {"duration": 201.225, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6sorAV7KJxA", "annotations": [{"segment": [61.86460702026224, 188.16367711372018], "label": "Fixing the roof"}]}, "5Wp2dxIAocI": {"duration": 213.25, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5Wp2dxIAocI", "annotations": [{"segment": [20.947745709828393, 180.55620336], "label": "Wrapping presents"}]}, "_xgGaxc1jNE": {"duration": 184.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_xgGaxc1jNE", "annotations": [{"segment": [0, 1.7248695070202809], "label": "Playing water polo"}]}, "MWn4qozlaMs": {"duration": 41.22, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=MWn4qozlaMs", "annotations": [{"segment": [9.251269890795632, 41.181], "label": "Removing curlers"}]}, "2gc5VDx2xY8": {"duration": 19.273, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2gc5VDx2xY8", "annotations": [{"segment": [6.0034321372854915, 14.198117004680189], "label": "Calf roping"}]}, "8IrulONeC28": {"duration": 104.769, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=8IrulONeC28", "annotations": []}, "UxlSiLBleX4": {"duration": 230.18, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UxlSiLBleX4", "annotations": [{"segment": [15.516366390000002, 207.6929310156006], "label": "Playing guitarra"}]}, "vBOejU7dBzY": {"duration": 107.7, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vBOejU7dBzY", "annotations": [{"segment": [0, 58.05944773790952], "label": "Grooming horse"}]}, "7n5UbaZ0a0c": {"duration": 215.18, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7n5UbaZ0a0c", "annotations": []}, "6g7FGRgUDYc": {"duration": 50.016, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6g7FGRgUDYc", "annotations": []}, "6QImnhUemH0": {"duration": 62.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6QImnhUemH0", "annotations": [{"segment": [0, 1.8413400936037443], "label": "Mooping floor"}, {"segment": [16.184410296411855, 39.44344305772231], "label": "Mooping floor"}, {"segment": [51.169872074882996, 61.53952418096724], "label": "Mooping floor"}]}, "Sjx7K9Ybx9Q": {"duration": 18.321, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Sjx7K9Ybx9Q", "annotations": [{"segment": [0.01, 18.320544], "label": "Cutting the grass"}]}, "RtF6TGqwa7Y": {"duration": 49.9, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=RtF6TGqwa7Y", "annotations": []}, "PKNBIy7FyP8": {"duration": 71.054, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PKNBIy7FyP8", "annotations": []}, "vnZmsOgWs0o": {"duration": 237.819, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vnZmsOgWs0o", "annotations": []}, "4rmo35vpG00": {"duration": 60.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4rmo35vpG00", "annotations": [{"segment": [26.799666645865834, 31.234822745709828], "label": "Bathing dog"}, {"segment": [32.08410795631825, 38.78402461778471], "label": "Bathing dog"}, {"segment": [45.106481185647425, 56.99647413416536], "label": "Bathing dog"}]}, "IfugNwywj-k": {"duration": 65.09, "subset": "testing", "resolution": "386x240", "url": "https://www.youtube.com/watch?v=IfugNwywj-k", "annotations": []}, "RGMSc1tfkzA": {"duration": 124.11, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RGMSc1tfkzA", "annotations": [{"segment": [3.580611014820593, 109.83766221138845], "label": "Plataform diving"}]}, "ToLMOwlrgm0": {"duration": 218.8, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ToLMOwlrgm0", "annotations": [{"segment": [36.36202418096724, 61.28622854914196], "label": "Brushing hair"}, {"segment": [71.87047971918876, 75.62618174726988], "label": "Brushing hair"}, {"segment": [102.9403783151326, 131.27885725429016], "label": "Brushing hair"}, {"segment": [149.37451248049922, 151.7645046801872], "label": "Brushing hair"}, {"segment": [168.49445007800313, 189.6629524180967], "label": "Brushing hair"}, {"segment": [197.17435647425896, 211.8557371294852], "label": "Brushing hair"}]}, "FjY3eKNvzug": {"duration": 163.237, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FjY3eKNvzug", "annotations": [{"segment": [18.335432386895476, 118.67099294851793], "label": "Using the rowing machine"}]}, "3cjtV-ldvto": {"duration": 212.138, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3cjtV-ldvto", "annotations": [{"segment": [7.942740828000467, 188.3091456973479], "label": "Baking cookies"}]}, "K2B6AMIhaT8": {"duration": 231.527, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K2B6AMIhaT8", "annotations": []}, "04LdesS7Pxk": {"duration": 185.13, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=04LdesS7Pxk", "annotations": [{"segment": [13.57097503900156, 164.87290951638064], "label": "Shaving"}]}, "VhdidrZKuTU": {"duration": 215.13, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=VhdidrZKuTU", "annotations": []}, "Q5qIsUsM_-A": {"duration": 49.807, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Q5qIsUsM_-A", "annotations": [{"segment": [9.160371294851794, 49.781], "label": "Playing rubik cube"}]}, "fsz79uPkUSs": {"duration": 199.18, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=fsz79uPkUSs", "annotations": [{"segment": [19.88502964118565, 150.06983307332294], "label": "Putting on makeup"}]}, "GuwWFip-AF0": {"duration": 134.258, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GuwWFip-AF0", "annotations": [{"segment": [6.490219968798752, 133.57291419656787], "label": "Playing pool"}]}, "mbGDPga031E": {"duration": 187.734, "subset": "training", "resolution": "256x192", "url": "https://www.youtube.com/watch?v=mbGDPga031E", "annotations": [{"segment": [7.71114437092576, 177.706817268122], "label": "Shuffleboard"}]}, "9Xrw-WOipSI": {"duration": 74.095, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9Xrw-WOipSI", "annotations": [{"segment": [33.637455081876745, 49.126866536661474], "label": "Croquet"}]}, "Dk3DiAp2yAU": {"duration": 85.1, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=Dk3DiAp2yAU", "annotations": [{"segment": [19.25064648844457, 48.59128597815913], "label": "Cleaning windows"}, {"segment": [57.08812287051482, 76.86982126053043], "label": "Cleaning windows"}]}, "tScqYRQ7zyo": {"duration": 16.21, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tScqYRQ7zyo", "annotations": [{"segment": [0.9440327223088925, 14.97624042], "label": "Putting on makeup"}]}, "rdszlTqxBXs": {"duration": 15.93, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rdszlTqxBXs", "annotations": [{"segment": [3.155957704974866, 10.610975905702894], "label": "Putting on makeup"}]}, "qmfZw9eEpIU": {"duration": 95.02000000000001, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qmfZw9eEpIU", "annotations": []}, "uLsq6gQXQPM": {"duration": 148.748, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uLsq6gQXQPM", "annotations": []}, "vCeaFAiokrU": {"duration": 141.5, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=vCeaFAiokrU", "annotations": [{"segment": [40.821505460218404, 133.05604212168487], "label": "Shoveling snow"}]}, "Damu3T2Yy0M": {"duration": 99.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Damu3T2Yy0M", "annotations": [{"segment": [66.77397648673947, 80.37608280811233], "label": "Arm wrestling"}]}, "feY5JrgSpzE": {"duration": 65.992, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=feY5JrgSpzE", "annotations": [{"segment": [0.01, 45.46945709828393], "label": "Kneeling"}, {"segment": [0.2, 45.880945397815914], "label": "Kneeling"}]}, "wZgBJlWqWWI": {"duration": 54.753, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=wZgBJlWqWWI", "annotations": [{"segment": [0.2, 54.579], "label": "Using the monkey bar"}]}, "QEdbqJijx1w": {"duration": 76.255, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QEdbqJijx1w", "annotations": [{"segment": [31.39211232449298, 37.4564976599064], "label": "Putting in contact lenses"}, {"segment": [64.21113884555382, 74.0806287051482], "label": "Putting in contact lenses"}]}, "Q2OOxx3RWhY": {"duration": 209.84, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Q2OOxx3RWhY", "annotations": [{"segment": [69.40064086372978, 91.0065007552683], "label": "Removing curlers"}, {"segment": [138.80128172745955, 144.03906594359012], "label": "Removing curlers"}]}, "XToVLTbQEm4": {"duration": 182.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XToVLTbQEm4", "annotations": [{"segment": [8.548275912636507, 168.11609294851795], "label": "Layup drill in basketball"}]}, "K-pwTH9ubkE": {"duration": 150.37, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=K-pwTH9ubkE", "annotations": [{"segment": [0, 140.51962960218407], "label": "Playing piano"}]}, "CZp3ZPTQrds": {"duration": 121.98, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CZp3ZPTQrds", "annotations": [{"segment": [55.373723455060656, 66.02983518524414], "label": "Shaving"}, {"segment": [71.73846542901717, 84.86831719188767], "label": "Shaving"}, {"segment": [89.62550985959437, 104.84852639625585], "label": "Shaving"}, {"segment": [107.5125542901716, 113.79204861154446], "label": "Shaving"}]}, "f4UdgFrorCo": {"duration": 209.17000000000002, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f4UdgFrorCo", "annotations": [{"segment": [81.57775018483733, 187.62882542512583], "label": "Tango"}]}, "m5_20ftU4iQ": {"duration": 126.549, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=m5_20ftU4iQ", "annotations": []}, "ZQs0S7WwYiM": {"duration": 159.336, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZQs0S7WwYiM", "annotations": [{"segment": [20.378599063962554, 141.9046349453978], "label": "Installing carpet"}]}, "gywpvBoHPmQ": {"duration": 63.507, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gywpvBoHPmQ", "annotations": []}, "evXdygHVzNc": {"duration": 79.81, "subset": "training", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=evXdygHVzNc", "annotations": [{"segment": [2.60914352574103, 66.59528237129486], "label": "Discus throw"}]}, "Y6rHCVo28kM": {"duration": 144.22, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Y6rHCVo28kM", "annotations": [{"segment": [12.374496934477378, 128.4697772652106], "label": "Getting a haircut"}]}, "gGYje5UGLdY": {"duration": 238.957, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gGYje5UGLdY", "annotations": [{"segment": [0.01, 238.956553], "label": "Playing congas"}]}, "9-hrNWXnuY8": {"duration": 171.782, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9-hrNWXnuY8", "annotations": [{"segment": [7.503703968798753, 165.88545559594385], "label": "Disc dog"}]}, "cNCrFL2l83o": {"duration": 148.86, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cNCrFL2l83o", "annotations": [{"segment": [9.866939633385336, 129.8953582324493], "label": "Mixing drinks"}]}, "7rwDYzqfGgU": {"duration": 113.569, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7rwDYzqfGgU", "annotations": []}, "NM4nBojCSwQ": {"duration": 74.93, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NM4nBojCSwQ", "annotations": [{"segment": [21.82228776, 64.10306448], "label": "Ironing clothes"}]}, "bUfhRJjHNoU": {"duration": 192.146, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bUfhRJjHNoU", "annotations": [{"segment": [17.65819681974555, 155.63156519097774], "label": "Slacklining"}]}, "khIjshXJjaY": {"duration": 4.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=khIjshXJjaY", "annotations": []}, "Twa9ChH4zZU": {"duration": 127.873, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Twa9ChH4zZU", "annotations": []}, "1nltPeGC5ZQ": {"duration": 84.753, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1nltPeGC5ZQ", "annotations": [{"segment": [0.01, 12.858140285860687], "label": "Sumo"}, {"segment": [28.963818831032025, 43.749359790533575], "label": "Sumo"}, {"segment": [56.15865309582953, 60.11906585283887], "label": "Sumo"}, {"segment": [76.48877191514417, 80.44918467215352], "label": "Sumo"}]}, "sUL9HAplalo": {"duration": 109.11, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=sUL9HAplalo", "annotations": [{"segment": [0, 29.107341844694197], "label": "Smoking a cigarette"}, {"segment": [49.53354664798837, 60.08708579635702], "label": "Smoking a cigarette"}]}, "ggw-70xi0dA": {"duration": 7.593, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ggw-70xi0dA", "annotations": [{"segment": [0.01, 1.086719775939656], "label": "Slacklining"}]}, "3s46k9MIq_s": {"duration": 115.43, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=3s46k9MIq_s", "annotations": []}, "Flm_SCK10GQ": {"duration": 30.697, "subset": "testing", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=Flm_SCK10GQ", "annotations": []}, "7phIVBx1BzQ": {"duration": 210.257, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7phIVBx1BzQ", "annotations": [{"segment": [0.01, 210.25668934240363], "label": "Disc dog"}]}, "bgAmrgXf500": {"duration": 59.0, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=bgAmrgXf500", "annotations": []}, "LFeoVokXAFY": {"duration": 58.422, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LFeoVokXAFY", "annotations": [{"segment": [0.01, 57.96572263338533], "label": "Hitting a pinata"}]}, "ix40OdQd7iE": {"duration": 99.85, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ix40OdQd7iE", "annotations": [{"segment": [23.18500745, 39.87602477704551], "label": "Pole vault"}]}, "c-C_9InvwKE": {"duration": 73.0, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=c-C_9InvwKE", "annotations": [{"segment": [22.537095163806555, 53.38332137285491], "label": "Hand washing clothes"}]}, "8wDm37sdBCI": {"duration": 20.6, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8wDm37sdBCI", "annotations": [{"segment": [0.2566115444617785, 4.554854914196568], "label": "Hand washing clothes"}, {"segment": [12.573965678627145, 20.361], "label": "Hand washing clothes"}]}, "nlsHB3c3fCw": {"duration": 173.94, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nlsHB3c3fCw", "annotations": []}, "LrwBGQ9B0Vg": {"duration": 104.374, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=LrwBGQ9B0Vg", "annotations": [{"segment": [47.856870514820585, 51.27521840873635], "label": "Scuba diving"}, {"segment": [61.85581903276131, 95.55096255850233], "label": "Scuba diving"}]}, "xgIxKDm9M38": {"duration": 265.056, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xgIxKDm9M38", "annotations": [{"segment": [39.38679445030543, 133.51875452405454], "label": "Removing ice from car"}, {"segment": [145.07881979626936, 183.06189140497514], "label": "Removing ice from car"}]}, "Lw4ZiLSPvfE": {"duration": 78.646, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lw4ZiLSPvfE", "annotations": []}, "6VNFwlzQiuc": {"duration": 219.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6VNFwlzQiuc", "annotations": [{"segment": [20.37027536339549, 22.42442077819167], "label": "Getting a tattoo"}, {"segment": [30.298644868243706, 33.379862990437985], "label": "Getting a tattoo"}, {"segment": [148.75436255850235, 156.62858658346335], "label": "Getting a tattoo"}]}, "I16FTpQQiic": {"duration": 212.51, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I16FTpQQiic", "annotations": []}, "GgnM5RGNtDE": {"duration": 61.255, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GgnM5RGNtDE", "annotations": []}, "pMCPCsvxRBw": {"duration": 55.752, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pMCPCsvxRBw", "annotations": [{"segment": [22.55803738317757, 53.29080062305296], "label": "Making a lemonade"}]}, "KKbfCtmIE0o": {"duration": 122.58, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KKbfCtmIE0o", "annotations": [{"segment": [3.00070944, 111.86928442277691], "label": "Tai chi"}]}, "VwMSsicKRYI": {"duration": 106.09, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VwMSsicKRYI", "annotations": []}, "QY3q4DuiGq8": {"duration": 18.07, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QY3q4DuiGq8", "annotations": []}, "ryZuSiZNN2o": {"duration": 80.109, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=ryZuSiZNN2o", "annotations": [{"segment": [0.009999999999999998, 77.78793181469842], "label": "Hurling"}]}, "BUhpWV98EjM": {"duration": 112.176, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BUhpWV98EjM", "annotations": [{"segment": [0.2, 99.75053354134165], "label": "Capoeira"}]}, "j4J8WqxAZIM": {"duration": 86.28999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j4J8WqxAZIM", "annotations": []}, "3-9pr55kr8g": {"duration": 55.055, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3-9pr55kr8g", "annotations": []}, "GgfyTMpHfnI": {"duration": 214.599, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GgfyTMpHfnI", "annotations": [{"segment": [20.087253122777973, 200.87253042121682], "label": "Braiding hair"}]}, "POhazPPwHhQ": {"duration": 154.854, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=POhazPPwHhQ", "annotations": [{"segment": [37.44516502340094, 118.37503781591265], "label": "Slacklining"}, {"segment": [128.52146962870515, 154.65387600000003], "label": "Slacklining"}]}, "XGj9lakgSic": {"duration": 128.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XGj9lakgSic", "annotations": [{"segment": [12.524538416536661, 97.36454640000001], "label": "Preparing salad"}]}, "-3Ss9jSRbKk": {"duration": 67.663, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-3Ss9jSRbKk", "annotations": [{"segment": [33.01683345234873, 40.921326803486785], "label": "Doing crunches"}, {"segment": [54.20087699040267, 62.52694417108803], "label": "Doing crunches"}]}, "AS1nLh3xfxI": {"duration": 31.394, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AS1nLh3xfxI", "annotations": [{"segment": [5.97288944198038, 15.742671373444555], "label": "Spread mulch"}]}, "ru4NMIVZqrg": {"duration": 187.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ru4NMIVZqrg", "annotations": [{"segment": [50.82158362252858, 177.8755409531981], "label": "Getting a piercing"}]}, "N7z9_g196z0": {"duration": 187.246, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N7z9_g196z0", "annotations": [{"segment": [33.293157566302646, 172.01464742589704], "label": "Croquet"}]}, "wuknZBoyMRE": {"duration": 98.662, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=wuknZBoyMRE", "annotations": [{"segment": [0, 98.66160997732426], "label": "Powerbocking"}]}, "zQ4HbFGX7t0": {"duration": 59.026, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zQ4HbFGX7t0", "annotations": [{"segment": [26.684071762870516, 37.173672386895475], "label": "Waxing skis"}]}, "tjDOXnDswdA": {"duration": 45.001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tjDOXnDswdA", "annotations": [{"segment": [0, 42.543158864274574], "label": "Playing rubik cube"}]}, "rj2TaVP_Bwk": {"duration": 79.65, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rj2TaVP_Bwk", "annotations": []}, "zJquKzopJuI": {"duration": 32.25, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zJquKzopJuI", "annotations": [{"segment": [0.27646723868954753, 27.77239079563182], "label": "Drinking beer"}]}, "NzRNlTfyYtI": {"duration": 75.372, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=NzRNlTfyYtI", "annotations": [{"segment": [0.23516996190051684, 23.046656266250647], "label": "Elliptical trainer"}, {"segment": [39.27338322620905, 52.67807091419657], "label": "Elliptical trainer"}]}, "j4YrzUjxAp8": {"duration": 38.48, "subset": "training", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=j4YrzUjxAp8", "annotations": [{"segment": [6.962798477379096, 38.275464852607705], "label": "Vacuuming floor"}]}, "HD4aY4ATCFY": {"duration": 159.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HD4aY4ATCFY", "annotations": [{"segment": [44.175070202808115, 118.33941341653664], "label": "Making a sandwich"}]}, "57J-q04z1Hs": {"duration": 22.198, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=57J-q04z1Hs", "annotations": [{"segment": [0.01, 22.161], "label": "Baton twirling"}]}, "cdpPn-7R3GQ": {"duration": 237.169, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cdpPn-7R3GQ", "annotations": [{"segment": [0, 7.376934240583221], "label": "Playing pool"}, {"segment": [80.03979194768887, 91.84289573767558], "label": "Playing pool"}, {"segment": [141.2683928582449, 145.69455677948991], "label": "Playing pool"}, {"segment": [197.33313586068175, 206.92315613700484], "label": "Playing pool"}]}, "R4yz8nXO5hI": {"duration": 97.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R4yz8nXO5hI", "annotations": [{"segment": [2.2124997399999997, 88.34769298254923], "label": "Painting"}]}, "xn93Nn9y-SI": {"duration": 30.35, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xn93Nn9y-SI", "annotations": []}, "bYUmtLBL7W4": {"duration": 224.49, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bYUmtLBL7W4", "annotations": [{"segment": [11.553655226209049, 57.06805460218409], "label": "Wakeboarding"}, {"segment": [68.62170982839314, 126.03987519500778], "label": "Wakeboarding"}, {"segment": [135.4928658346334, 201.31368954758187], "label": "Wakeboarding"}]}, "ar3Jzu6hZlY": {"duration": 130.01, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=ar3Jzu6hZlY", "annotations": []}, "nuqxJvTbG9w": {"duration": 30.65, "subset": "training", "resolution": "1920x1088", "url": "https://www.youtube.com/watch?v=nuqxJvTbG9w", "annotations": [{"segment": [0, 30.650339], "label": "Playing kickball"}]}, "PdWkg0NiVCA": {"duration": 7.501, "subset": "testing", "resolution": "310x240", "url": "https://www.youtube.com/watch?v=PdWkg0NiVCA", "annotations": []}, "hrpRVbLRoJ4": {"duration": 203.2, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=hrpRVbLRoJ4", "annotations": [{"segment": [44.06317849448671, 181.95873709234084], "label": "Horseback riding"}]}, "eXMF6Skt2To": {"duration": 183.42, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eXMF6Skt2To", "annotations": [{"segment": [83.8383241533743, 114.09876569999999], "label": "Doing motocross"}]}, "55sP2yXNFxY": {"duration": 199.181, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=55sP2yXNFxY", "annotations": [{"segment": [36.86237745999126, 41.81866414055254], "label": "Playing pool"}, {"segment": [45.22611123343843, 47.08471873864891], "label": "Playing pool"}, {"segment": [51.421469584140034, 54.82891667702592], "label": "Playing pool"}, {"segment": [122.35832269967345, 127.62437729776983], "label": "Playing pool"}]}, "Wxp53duZNAM": {"duration": 193.191, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Wxp53duZNAM", "annotations": []}, "zQFC5vNZ_Qs": {"duration": 31.162, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=zQFC5vNZ_Qs", "annotations": [{"segment": [0.01, 31.161178], "label": "Using the monkey bar"}]}, "_f0S0vQZ9JM": {"duration": 149.305, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_f0S0vQZ9JM", "annotations": [{"segment": [30.280124867394694, 77.33078043057722], "label": "Making a lemonade"}, {"segment": [92.00499478939156, 138.1239542028081], "label": "Making a lemonade"}]}, "bQhCEXZwnMM": {"duration": 226.28, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bQhCEXZwnMM", "annotations": [{"segment": [92.2535406, 208.76135793915756], "label": "Archery"}]}, "AVIMCVsLrVw": {"duration": 212.997, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AVIMCVsLrVw", "annotations": [{"segment": [60.619568585177724, 180.53369786156426], "label": "Powerbocking"}]}, "jRfTdoqG7Tw": {"duration": 135.19, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jRfTdoqG7Tw", "annotations": [{"segment": [10.966746120184943, 111.77645084034654], "label": "Putting on makeup"}]}, "AH-3mF6wWhU": {"duration": 122.3, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=AH-3mF6wWhU", "annotations": [{"segment": [46.1723545622097, 113.9045274524181], "label": "Kite flying"}]}, "Kz6I9Zx5E_0": {"duration": 177.54, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Kz6I9Zx5E_0", "annotations": [{"segment": [3.8762527301092047, 7.752505460218409], "label": "Cheerleading"}, {"segment": [41.25440405616225, 175.53887363494542], "label": "Cheerleading"}]}, "fU-kGMQ68jg": {"duration": 201.805, "subset": "validation", "resolution": "423x240", "url": "https://www.youtube.com/watch?v=fU-kGMQ68jg", "annotations": [{"segment": [27.70484723062392, 31.482780943890816], "label": "Snow tubing"}, {"segment": [36.834853704352255, 57.29866131788129], "label": "Snow tubing"}, {"segment": [85.3183363579441, 89.41109788064992], "label": "Snow tubing"}, {"segment": [93.18903159391682, 119.31973977734619], "label": "Snow tubing"}, {"segment": [157.41390405616224, 170.32184418876756], "label": "Snow tubing"}]}, "VmD2k_dtddc": {"duration": 80.88, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VmD2k_dtddc", "annotations": [{"segment": [6.364234216848674, 63.57932994851794], "label": "Playing lacrosse"}]}, "kNUpypAppjk": {"duration": 46.53, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kNUpypAppjk", "annotations": [{"segment": [3.7748907581903275, 44.57274856786271], "label": "Snatch"}]}, "IeTMYNbQSp0": {"duration": 19.134, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=IeTMYNbQSp0", "annotations": [{"segment": [0.01, 19.073565828393136], "label": "Hand car wash"}]}, "TtewPltwuXw": {"duration": 141.87, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TtewPltwuXw", "annotations": []}, "FaKQvJBhMPg": {"duration": 89.095, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FaKQvJBhMPg", "annotations": [{"segment": [0, 8.186457098283931], "label": "River tubing"}, {"segment": [27.750702028081125, 34.27211700468019], "label": "River tubing"}, {"segment": [47.31494695787831, 50.09001716068643], "label": "River tubing"}, {"segment": [60.77403744149767, 64.65913572542901], "label": "River tubing"}, {"segment": [69.2380015600624, 82.97459906396257], "label": "River tubing"}]}, "Ki13JIVwMdk": {"duration": 20.69, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ki13JIVwMdk", "annotations": []}, "COx5Bf0PsQs": {"duration": 71.889, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=COx5Bf0PsQs", "annotations": [{"segment": [6.504762614664586, 71.88884353741497], "label": "Making a lemonade"}]}, "PZ4miMXdjHE": {"duration": 78.623, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=PZ4miMXdjHE", "annotations": [{"segment": [45.2338736349454, 67.05400780031201], "label": "Sumo"}]}, "G5mKcPzVMG4": {"duration": 73.259, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G5mKcPzVMG4", "annotations": [{"segment": [4.10796872019836, 71.54712040186917], "label": "Waterskiing"}]}, "m5T_w1mhWgY": {"duration": 212.138, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m5T_w1mhWgY", "annotations": [{"segment": [19.194954949218378, 212.13734693877552], "label": "Waxing skis"}]}, "9peoWGfb9jc": {"duration": 75.814, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9peoWGfb9jc", "annotations": [{"segment": [0.01, 75.54455382215289], "label": "Waxing skis"}]}, "swopywrINVw": {"duration": 139.13, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=swopywrINVw", "annotations": []}, "zKYqEsVfEnU": {"duration": 122.625, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zKYqEsVfEnU", "annotations": [{"segment": [71.97688537445187, 116.01453856263227], "label": "Doing crunches"}]}, "ceDvPX6hP_U": {"duration": 142.385, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ceDvPX6hP_U", "annotations": [{"segment": [30.631478939157567, 136.28788455538222], "label": "Kneeling"}]}, "N_o9Qp0FLVY": {"duration": 56.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N_o9Qp0FLVY", "annotations": [{"segment": [19.962914976599066, 44.52839235569423], "label": "Shaving legs"}]}, "RldXwiaqykc": {"duration": 79.064, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RldXwiaqykc", "annotations": []}, "Z8QrA66UVzI": {"duration": 173.94, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=Z8QrA66UVzI", "annotations": [{"segment": [31.477519465404466, 108.00045471750842], "label": "Cleaning windows"}]}, "MHo5kioyrFM": {"duration": 201.67000000000002, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MHo5kioyrFM", "annotations": [{"segment": [12.254173146645867, 187.26890244617783], "label": "Wrapping presents"}]}, "bQ6TMowx36o": {"duration": 16.21, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bQ6TMowx36o", "annotations": []}, "aGu-FfGDdVI": {"duration": 197.904, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=aGu-FfGDdVI", "annotations": [{"segment": [0.2, 197.70367346938778], "label": "Canoeing"}]}, "_Yt94ffXcCE": {"duration": 80.597, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=_Yt94ffXcCE", "annotations": [{"segment": [29.58128972276389, 52.26863627361855], "label": "Welding"}]}, "TnOdqLQPQA4": {"duration": 99.00999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TnOdqLQPQA4", "annotations": [{"segment": [11.893546046603769, 75.99512439313573], "label": "Starting a campfire"}, {"segment": [92.52251933229329, 99.009908], "label": "Starting a campfire"}]}, "AxtbfvPM1CE": {"duration": 123.415, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=AxtbfvPM1CE", "annotations": []}, "VNMj5YKHEsY": {"duration": 190.009, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VNMj5YKHEsY", "annotations": [{"segment": [19.85551794071763, 62.24026521060842], "label": "Playing ice hockey"}, {"segment": [74.6883182527301, 94.24954446177847], "label": "Playing ice hockey"}, {"segment": [101.95548205928236, 103.43739313572543], "label": "Playing ice hockey"}]}, "Fm7oIMqCymo": {"duration": 29.58, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fm7oIMqCymo", "annotations": []}, "u3XYsINR-y4": {"duration": 169.69, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=u3XYsINR-y4", "annotations": [{"segment": [1.853104507020281, 147.98363134633385], "label": "Playing violin"}]}, "T8P52q1gCNA": {"duration": 99.056, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T8P52q1gCNA", "annotations": [{"segment": [22.491663134616697, 69.16957164172094], "label": "Baking cookies"}]}, "k2U1lOpLqdk": {"duration": 112.966, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k2U1lOpLqdk", "annotations": [{"segment": [24.936909095411437, 28.990258276997746], "label": "Curling"}, {"segment": [25.994304534086123, 28.637793130772845], "label": "Curling"}, {"segment": [42.73639897976871, 46.96598073446747], "label": "Curling"}, {"segment": [51.54802763539113, 58.068632840551714], "label": "Curling"}, {"segment": [89.26179828145507, 105.12273079215264], "label": "Curling"}]}, "Vc4NSwtI5jM": {"duration": 30.117, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Vc4NSwtI5jM", "annotations": [{"segment": [2.2551972542901715, 16.585096474258968], "label": "Hand car wash"}]}, "bNwrAuu0qiQ": {"duration": 171.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bNwrAuu0qiQ", "annotations": [{"segment": [26.607065730889236, 160.5783213205928], "label": "Playing badminton"}]}, "ayXuNcjC8wk": {"duration": 37.733, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ayXuNcjC8wk", "annotations": [{"segment": [5.105319615372069, 34.387559057929835], "label": "Using the monkey bar"}]}, "uavao6fYoMM": {"duration": 184.019, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uavao6fYoMM", "annotations": [{"segment": [24.11470173092638, 76.0761423654225], "label": "Futsal"}, {"segment": [84.1143762757313, 140.66909343040388], "label": "Futsal"}, {"segment": [144.40113060304725, 182.582740099844], "label": "Futsal"}]}, "LRhkbJ9dcP8": {"duration": 157.525, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LRhkbJ9dcP8", "annotations": [{"segment": [37.82994383775351, 63.131789391575666], "label": "Blow-drying hair"}, {"segment": [99.24219032761312, 114.22677847113886], "label": "Blow-drying hair"}]}, "VRS48f5tj_c": {"duration": 191.08, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VRS48f5tj_c", "annotations": [{"segment": [0.01, 191.041], "label": "Playing flauta"}]}, "-qcPtBHelmc": {"duration": 134.699, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-qcPtBHelmc", "annotations": [{"segment": [23.745681952448166, 67.03427029053951], "label": "Disc dog"}, {"segment": [79.22231943427397, 82.58453988771797], "label": "Disc dog"}, {"segment": [88.88870323792544, 102.12744482059284], "label": "Disc dog"}, {"segment": [112.63438358814354, 124.40215500780033], "label": "Disc dog"}]}, "qZtdMXkHRXw": {"duration": 4.784, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qZtdMXkHRXw", "annotations": []}, "prBiREYQ2vQ": {"duration": 237.865, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=prBiREYQ2vQ", "annotations": []}, "hjtGItcwkQA": {"duration": 78.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hjtGItcwkQA", "annotations": [{"segment": [0, 77.86526521060843], "label": "Sailing"}]}, "2cUJTeArPsI": {"duration": 14.536, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2cUJTeArPsI", "annotations": [{"segment": [0.8852574102964119, 13.619344773790951], "label": "Using the monkey bar"}]}, "b-ex8bAnl5U": {"duration": 204.429, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=b-ex8bAnl5U", "annotations": [{"segment": [14.456469250051565, 196.91366312388334], "label": "Playing blackjack"}]}, "Z_hwYD3_lBY": {"duration": 184.79, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Z_hwYD3_lBY", "annotations": [{"segment": [10.374851794071763, 180.69533541341653], "label": "Mowing the lawn"}]}, "HtG-i1CG2Qs": {"duration": 41.94, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=HtG-i1CG2Qs", "annotations": []}, "GHmxFOXP1Q0": {"duration": 59.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GHmxFOXP1Q0", "annotations": [{"segment": [0, 59.813968253968255], "label": "Getting a piercing"}]}, "yo3s6V19YtQ": {"duration": 185.04, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yo3s6V19YtQ", "annotations": [{"segment": [20.777897035881434, 140.2508049921997], "label": "Rock climbing"}]}, "xq7Gr0FUwpo": {"duration": 223.191, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xq7Gr0FUwpo", "annotations": [{"segment": [79.73558502340093, 223.19], "label": "Waterskiing"}]}, "PzERiygIpkg": {"duration": 72.24, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=PzERiygIpkg", "annotations": [{"segment": [0.01, 72.23727891156463], "label": "Washing hands"}]}, "kRBqJhxfWHc": {"duration": 80.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kRBqJhxfWHc", "annotations": [{"segment": [0.01, 60.72974339001561], "label": "Drinking beer"}]}, "UU8a7vp6tgY": {"duration": 33.925, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=UU8a7vp6tgY", "annotations": [{"segment": [0.2, 32.475257009345796], "label": "Decorating the Christmas tree"}]}, "jMQgOcM9vZA": {"duration": 149.003, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jMQgOcM9vZA", "annotations": [{"segment": [29.98645229074469, 34.86796777993568], "label": "Playing ice hockey"}, {"segment": [45.560811232449296, 50.20987360310739], "label": "Playing ice hockey"}, {"segment": [51.372139195771915, 111.8099500143271], "label": "Playing ice hockey"}, {"segment": [113.20466872552453, 127.6167620745646], "label": "Playing ice hockey"}]}, "ZcgahXg_ELw": {"duration": 194.816, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ZcgahXg_ELw", "annotations": [{"segment": [23.70608846396256, 89.35371805647425], "label": "Scuba diving"}, {"segment": [92.69688437831513, 128.86386549641185], "label": "Scuba diving"}, {"segment": [133.11880445148208, 149.83463606068642], "label": "Scuba diving"}, {"segment": [151.6581813271451, 172.62895189141966], "label": "Scuba diving"}]}, "zSd5dBteQ_s": {"duration": 30.07, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zSd5dBteQ_s", "annotations": []}, "RW7LEc-Ykh8": {"duration": 50.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RW7LEc-Ykh8", "annotations": [{"segment": [7.800053546021841, 46.012437079563185], "label": "Using the pommel horse"}]}, "vRf9sof3dwU": {"duration": 63.414, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vRf9sof3dwU", "annotations": [{"segment": [28.371360374414976, 54.569634945397816], "label": "Hula hoop"}]}, "DQLotF3P9Fc": {"duration": 193.56, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DQLotF3P9Fc", "annotations": [{"segment": [51.938510476473475, 75.49202104138587], "label": "Shot put"}, {"segment": [108.10457413126457, 114.65976743999998], "label": "Shot put"}, {"segment": [133.77186128533575, 160.64701915756632], "label": "Shot put"}]}, "xqI9M6QiHws": {"duration": 162.981, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xqI9M6QiHws", "annotations": [{"segment": [35.85070436286839, 43.22425348714629], "label": "Making a cake"}, {"segment": [49.32650103827282, 128.65571787831513], "label": "Making a cake"}]}, "M_hfBH8xEig": {"duration": 64.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M_hfBH8xEig", "annotations": [{"segment": [23.859384900000002, 59.672906301092056], "label": "Layup drill in basketball"}, {"segment": [6.50702021372855, 23.859074117004685], "label": "Layup drill in basketball"}]}, "Vn4wrgBpgP8": {"duration": 69.196, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Vn4wrgBpgP8", "annotations": [{"segment": [34.52655226209048, 49.092441497659905], "label": "Waxing skis"}]}, "oXFKaTlg3p0": {"duration": 162.726, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oXFKaTlg3p0", "annotations": [{"segment": [14.97784881191166, 140.6394617254078], "label": "Waterskiing"}]}, "shGGt9TRlkk": {"duration": 37.18, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=shGGt9TRlkk", "annotations": [{"segment": [22.79225105330744, 27.373898458658342], "label": "Kayaking"}]}, "8jJ870_CgPQ": {"duration": 30.047, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8jJ870_CgPQ", "annotations": []}, "_ucD-3rUWWE": {"duration": 107.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_ucD-3rUWWE", "annotations": [{"segment": [12.5469278, 42.519185900000004], "label": "Kayaking"}, {"segment": [55.64181909999999, 98.30747263026521], "label": "Kayaking"}]}, "yDWOqabreoU": {"duration": 63.577, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yDWOqabreoU", "annotations": [{"segment": [3.8672059282371296, 33.119148205928234], "label": "Hitting a pinata"}, {"segment": [36.589717628705145, 42.53926521060843], "label": "Hitting a pinata"}, {"segment": [51.166109204368176, 63.361], "label": "Hitting a pinata"}]}, "Liha_xwiwtc": {"duration": 24.01, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Liha_xwiwtc", "annotations": [{"segment": [0.2, 23.761], "label": "Canoeing"}]}, "0_IShglbOQ4": {"duration": 78.763, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0_IShglbOQ4", "annotations": [{"segment": [12.145801872074884, 78.661], "label": "Cutting the grass"}]}, "jQR4Hhaf8o8": {"duration": 231.9, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jQR4Hhaf8o8", "annotations": [{"segment": [39.635241809672394, 226.04756630265211], "label": "Breakdancing"}]}, "jGafq3cWPSI": {"duration": 128.92, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jGafq3cWPSI", "annotations": []}, "LlgGMRw16UY": {"duration": 182.486, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=LlgGMRw16UY", "annotations": [{"segment": [16.227270739809182, 100.49520300267793], "label": "Longboarding"}, {"segment": [106.75836013032358, 136.36601200646663], "label": "Longboarding"}]}, "ZjuM-Xv_jqY": {"duration": 100.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZjuM-Xv_jqY", "annotations": [{"segment": [51.00058369681725, 65.1238222590128], "label": "Cleaning shoes"}, {"segment": [68.73309396879876, 77.36396193291732], "label": "Cleaning shoes"}, {"segment": [85.52405528081123, 90.23180144305772], "label": "Cleaning shoes"}]}, "N6HUPyM5m2o": {"duration": 8.71, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N6HUPyM5m2o", "annotations": [{"segment": [5.869478159126366, 8.125411076443058], "label": "Snatch"}]}, "dot-t-Nen_k": {"duration": 106.44, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=dot-t-Nen_k", "annotations": [{"segment": [31.374755425081823, 42.149467263088056], "label": "Playing lacrosse"}, {"segment": [55.41065106371111, 79.11501710732482], "label": "Playing lacrosse"}, {"segment": [79.94384109486374, 100.66444078333728], "label": "Playing lacrosse"}]}, "Snq0l-gKpWo": {"duration": 33.716, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Snq0l-gKpWo", "annotations": [{"segment": [0.01, 30.186989799677768], "label": "Doing a powerbomb"}]}, "6-cHUULLVGQ": {"duration": 197.439, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6-cHUULLVGQ", "annotations": [{"segment": [85.14172620904837, 104.2331801872075], "label": "Hand car wash"}, {"segment": [130.09902106084243, 161.8318650546022], "label": "Hand car wash"}]}, "H0Pvn5ejnRQ": {"duration": 65.968, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H0Pvn5ejnRQ", "annotations": []}, "Yxsw5nTf4Dg": {"duration": 95.017, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Yxsw5nTf4Dg", "annotations": [{"segment": [7.999834633385335, 78.23707956318252], "label": "Hula hoop"}]}, "7vmWw_tGXpQ": {"duration": 123.531, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=7vmWw_tGXpQ", "annotations": []}, "ez9uLqGV7AA": {"duration": 154.901, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ez9uLqGV7AA", "annotations": [{"segment": [17.88241054602184, 23.440457067082683], "label": "Longboarding"}, {"segment": [28.5151951950078, 137.259583650546], "label": "Longboarding"}]}, "BUu9nQ5rj7k": {"duration": 176.426, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BUu9nQ5rj7k", "annotations": []}, "G4tUTpJV-so": {"duration": 69.66, "subset": "training", "resolution": "450x360", "url": "https://www.youtube.com/watch?v=G4tUTpJV-so", "annotations": [{"segment": [0.01, 69.65986394557824], "label": "Tai chi"}]}, "50MmJszF2XM": {"duration": 149.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=50MmJszF2XM", "annotations": [{"segment": [23.377379095163807, 58.67722152886115], "label": "Cheerleading"}, {"segment": [79.71686271450857, 145.40729797191887], "label": "Cheerleading"}]}, "hj88A5tvA0Y": {"duration": 192.517, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hj88A5tvA0Y", "annotations": [{"segment": [19.221614627145087, 143.26109651794073], "label": "Slacklining"}, {"segment": [157.67730748829953, 166.98777707332292], "label": "Slacklining"}]}, "3ddoLUFrGsE": {"duration": 120.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3ddoLUFrGsE", "annotations": [{"segment": [8.001638609999999, 91.32034576], "label": "Putting on makeup"}]}, "FVD4vHnJAIA": {"duration": 229.41, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FVD4vHnJAIA", "annotations": []}, "_ZiTTLhXjZQ": {"duration": 155.83, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_ZiTTLhXjZQ", "annotations": [{"segment": [0.2, 155.58100000000002], "label": "Disc dog"}]}, "wV2c3sdctbs": {"duration": 50.666, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wV2c3sdctbs", "annotations": [{"segment": [5.29111856474259, 33.655329173166926], "label": "Putting on shoes"}]}, "EbqurNRxneE": {"duration": 34.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EbqurNRxneE", "annotations": [{"segment": [0.01, 34.23356260231897], "label": "Applying sunscreen"}]}, "gA1pa5MJwFs": {"duration": 37.27, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gA1pa5MJwFs", "annotations": []}, "NxWYb3uDzb0": {"duration": 163.051, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NxWYb3uDzb0", "annotations": []}, "VpfLfFtu0d8": {"duration": 171.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VpfLfFtu0d8", "annotations": [{"segment": [36.344673946957876, 111.43918408736349], "label": "Making a sandwich"}]}, "VGVIaQuhdcI": {"duration": 65.713, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VGVIaQuhdcI", "annotations": [{"segment": [0.2, 39.46848822152886], "label": "Plastering"}]}, "IGT0dIKDGCw": {"duration": 70.241, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IGT0dIKDGCw", "annotations": []}, "H8oehT8eeGc": {"duration": 15.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H8oehT8eeGc", "annotations": []}, "nS9PgniAQAE": {"duration": 161.681, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nS9PgniAQAE", "annotations": [{"segment": [17.601301839736177, 125.47216091438627], "label": "Grooming dog"}]}, "qBvQjv1XyfQ": {"duration": 77.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qBvQjv1XyfQ", "annotations": [{"segment": [15.61569085803432, 27.147277953198127], "label": "Using uneven bars"}, {"segment": [32.67283010296412, 46.84707257410297], "label": "Using uneven bars"}, {"segment": [56.336607787831504, 66.42674649609984], "label": "Using uneven bars"}]}, "68oJj9QktGg": {"duration": 181.843, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=68oJj9QktGg", "annotations": [{"segment": [4.2490887850467285, 171.37991433021807], "label": "Bullfighting"}]}, "pOO6H6d5SXI": {"duration": 146.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pOO6H6d5SXI", "annotations": [{"segment": [0, 145.983333], "label": "Playing accordion"}]}, "AsdsR27UnJ0": {"duration": 56.19, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AsdsR27UnJ0", "annotations": []}, "Q7R3ajucvyE": {"duration": 239.305, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Q7R3ajucvyE", "annotations": [{"segment": [74.43385205293886, 78.89988351684634], "label": "Putting in contact lenses"}, {"segment": [104.20739514565547, 119.09416669201377], "label": "Putting in contact lenses"}]}, "Yr7cZQAhJag": {"duration": 233.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Yr7cZQAhJag", "annotations": []}, "q_nBBJS-eJo": {"duration": 220.892, "subset": "validation", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=q_nBBJS-eJo", "annotations": [{"segment": [66.16403329797193, 197.45828687363493], "label": "Painting furniture"}]}, "KaicDn9WDUo": {"duration": 204.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KaicDn9WDUo", "annotations": [{"segment": [0.01, 192.56583931357252], "label": "Ballet"}]}, "8X1GZojsjGQ": {"duration": 192.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8X1GZojsjGQ", "annotations": []}, "R0B5bBr6t8w": {"duration": 18.3, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R0B5bBr6t8w", "annotations": [{"segment": [2.711771887675507, 4.338835020280811], "label": "Layup drill in basketball"}, {"segment": [8.106770695787832, 9.36274925429017], "label": "Layup drill in basketball"}, {"segment": [12.845235257410295, 13.673039307332292], "label": "Layup drill in basketball"}]}, "vLJz4a4NvRI": {"duration": 104.699, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vLJz4a4NvRI", "annotations": [{"segment": [11.102882995319813, 95.51744929797192], "label": "Playing blackjack"}]}, "I-C_fOUaknA": {"duration": 18.948, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I-C_fOUaknA", "annotations": [{"segment": [0.2, 18.180139445186175], "label": "Laying tile"}]}, "lnHdEtuXU8w": {"duration": 77.81, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lnHdEtuXU8w", "annotations": [{"segment": [35.93102393759751, 50.86182104680188], "label": "Ironing clothes"}]}, "i3H8cucgXEc": {"duration": 112.966, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i3H8cucgXEc", "annotations": [{"segment": [10.569828393135726, 106.95036973478939], "label": "Rollerblading"}]}, "QQe2n2yjJuc": {"duration": 196.581, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QQe2n2yjJuc", "annotations": [{"segment": [0.2, 196.3797052154195], "label": "Futsal"}]}, "vWsPUB8dQMU": {"duration": 178.724, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=vWsPUB8dQMU", "annotations": [{"segment": [32.343186957878316, 155.02424093603742], "label": "Changing car wheel"}]}, "3FZ47muWIYA": {"duration": 160.68, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3FZ47muWIYA", "annotations": [{"segment": [88.8301786271451, 120.15252652106085], "label": "Washing hands"}]}, "q81H-V1_gGo": {"duration": 160.15, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q81H-V1_gGo", "annotations": [{"segment": [27.9821823900156, 149.155025775351], "label": "Archery"}]}, "jNJnPpIvtTU": {"duration": 211.95, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jNJnPpIvtTU", "annotations": [{"segment": [27.16175741179035, 183.9796372793916], "label": "Tango"}]}, "Lo0EQXmaeXM": {"duration": 78.902, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Lo0EQXmaeXM", "annotations": [{"segment": [20.310034278922178, 52.43681577467181], "label": "Gargling mouthwash"}]}, "pRkJ_9zq16A": {"duration": 63.368, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=pRkJ_9zq16A", "annotations": [{"segment": [7.6119791497659905, 47.6490123400936], "label": "Futsal"}]}, "huKjjvCG0EE": {"duration": 130.613, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=huKjjvCG0EE", "annotations": [{"segment": [0.2, 23.22900676027041], "label": "Croquet"}, {"segment": [39.53006413589877, 48.08811925810365], "label": "Croquet"}, {"segment": [54.404778991159645, 61.33272837580169], "label": "Croquet"}, {"segment": [96.99129047737911, 101.27031799843994], "label": "Croquet"}]}, "Ckkf840HZE4": {"duration": 220.357, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ckkf840HZE4", "annotations": [{"segment": [5.28583096432363, 22.44761810908808], "label": "Wakeboarding"}, {"segment": [55.05501368414053, 67.06826468547564], "label": "Wakeboarding"}, {"segment": [91.43800243104117, 109.62949680449148], "label": "Wakeboarding"}, {"segment": [112.37538274765379, 152.8772004092979], "label": "Wakeboarding"}, {"segment": [158.02573516236546, 178.27664381501344], "label": "Wakeboarding"}, {"segment": [181.70900121376732, 218.435225380434], "label": "Wakeboarding"}]}, "k7U1T2M5ufU": {"duration": 78.371, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k7U1T2M5ufU", "annotations": []}, "jzGsvw5Douk": {"duration": 194.352, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jzGsvw5Douk", "annotations": [{"segment": [59.088940855153666, 194.152988], "label": "River tubing"}]}, "fL8Gzk7ALxQ": {"duration": 123.182, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fL8Gzk7ALxQ", "annotations": [{"segment": [0, 123.18185941043085], "label": "Capoeira"}]}, "zwFxq1MnaO0": {"duration": 166.23, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zwFxq1MnaO0", "annotations": [{"segment": [7.519505460218409, 156.35385491419655], "label": "Zumba"}]}, "5fgJ5vWTcSQ": {"duration": 180.094, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5fgJ5vWTcSQ", "annotations": [{"segment": [14.040639625585024, 92.38740873634946], "label": "Longboarding"}, {"segment": [97.44203900156006, 164.85542433697347], "label": "Longboarding"}]}, "2yjRePOzYq4": {"duration": 44.77, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2yjRePOzYq4", "annotations": [{"segment": [10.24649984, 16.00635072], "label": "Triple jump"}, {"segment": [11.487792425897036, 44.428332999999995], "label": "Triple jump"}]}, "o1R8glzh0As": {"duration": 227.88, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o1R8glzh0As", "annotations": [{"segment": [53.30446687255245, 207.53205668642747], "label": "River tubing"}]}, "HdgVytIzu4s": {"duration": 152.25, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=HdgVytIzu4s", "annotations": [{"segment": [7.6007859035449865, 146.79017619032763], "label": "Playing racquetball"}]}, "CpxGRgJgRPA": {"duration": 185.76, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CpxGRgJgRPA", "annotations": [{"segment": [10.967238689547582, 172.30109204368173], "label": "Wrapping presents"}]}, "4DMnMu2Cb_c": {"duration": 234.034, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4DMnMu2Cb_c", "annotations": [{"segment": [18.372756971195365, 217.77549028427947], "label": "Braiding hair"}]}, "USCVBB97pOc": {"duration": 145.148, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=USCVBB97pOc", "annotations": [{"segment": [0.01, 131.10866515600623], "label": "Clipping cat claws"}]}, "cC3dwwPPHKE": {"duration": 91.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cC3dwwPPHKE", "annotations": [{"segment": [29.67458875, 77.56697625000001], "label": "Ironing clothes"}]}, "HSZz2XqWIqI": {"duration": 154.553, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HSZz2XqWIqI", "annotations": []}, "yuxoNmlNcc8": {"duration": 146.843, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yuxoNmlNcc8", "annotations": [{"segment": [17.868520487758285, 139.05374179095162], "label": "Tug of war"}]}, "fMRgMeDs9Ck": {"duration": 51.67, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fMRgMeDs9Ck", "annotations": [{"segment": [0, 41.347638336973475], "label": "Shaving"}]}, "kUlHkpKeN1g": {"duration": 229.135, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=kUlHkpKeN1g", "annotations": []}, "2CfCDcCuOO4": {"duration": 91.0, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2CfCDcCuOO4", "annotations": [{"segment": [0.01, 90.961], "label": "Disc dog"}]}, "kDz85hnSKdo": {"duration": 31.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kDz85hnSKdo", "annotations": [{"segment": [5.4284701653666145, 28.596405335413415], "label": "Kayaking"}]}, "G0vkQmcalvk": {"duration": 43.863, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G0vkQmcalvk", "annotations": [{"segment": [0.2, 40.74943457943925], "label": "Playing drums"}]}, "HM3Rd5ZcME8": {"duration": 225.721, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HM3Rd5ZcME8", "annotations": [{"segment": [5.630726989079563, 225.381], "label": "Snow tubing"}]}, "BTG82umu8Ug": {"duration": 25.47, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BTG82umu8Ug", "annotations": [{"segment": [1.3113483884555381, 3.775093845553822], "label": "High jump"}, {"segment": [5.6030340234009355, 6.4772662823712945], "label": "High jump"}, {"segment": [11.603446346333852, 12.596892095163806], "label": "High jump"}, {"segment": [19.431798847113885, 20.464982425897034], "label": "High jump"}, {"segment": [24.12086278159126, 25.271949], "label": "High jump"}]}, "1Cv2YWq34ao": {"duration": 49.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1Cv2YWq34ao", "annotations": []}, "r46Vy3p19a0": {"duration": 31.742, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r46Vy3p19a0", "annotations": [{"segment": [2.2778739285625846, 30.008512109204368], "label": "Cutting the grass"}]}, "odMI0DGsn7k": {"duration": 152.277, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=odMI0DGsn7k", "annotations": [{"segment": [0.009999999999999998, 5.224433697347894], "label": "Doing fencing"}, {"segment": [78.3665054602184, 86.91557878315132], "label": "Doing fencing"}, {"segment": [109.47563338533541, 116.14017004680188], "label": "Doing fencing"}]}, "Y34oqcxYmWc": {"duration": 219.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y34oqcxYmWc", "annotations": [{"segment": [21.639598198125963, 175.88391642242502], "label": "Tango"}]}, "aR5pR5tR1bw": {"duration": 16.835, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=aR5pR5tR1bw", "annotations": []}, "hIRq3O1EmLg": {"duration": 153.183, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hIRq3O1EmLg", "annotations": []}, "5c1zqvr3yKM": {"duration": 80.318, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5c1zqvr3yKM", "annotations": [{"segment": [0.01, 58.34927150055339], "label": "Clipping cat claws"}]}, "sdR443ncw-I": {"duration": 23.59, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sdR443ncw-I", "annotations": [{"segment": [0.8492759999999999, 23.498333], "label": "Brushing hair"}]}, "qoSn8oUMBSE": {"duration": 171.991, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qoSn8oUMBSE", "annotations": [{"segment": [16.583812495481066, 69.81251408913045], "label": "Beach soccer"}, {"segment": [71.41739956431586, 90.40854435400986], "label": "Beach soccer"}, {"segment": [109.66717005623475, 145.77709175837862], "label": "Beach soccer"}]}, "a4-5QFOiAiw": {"duration": 59.98, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=a4-5QFOiAiw", "annotations": [{"segment": [0.5459708, 59.921], "label": "Tennis serve with ball bouncing"}]}, "9q6wWG6ql4E": {"duration": 56.17, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9q6wWG6ql4E", "annotations": [{"segment": [5.739585186128533, 53.233557260657776], "label": "Using the monkey bar"}]}, "OFKGyZxazQk": {"duration": 168.02, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OFKGyZxazQk", "annotations": [{"segment": [19.659017433697347, 21.493859060842432], "label": "Applying sunscreen"}, {"segment": [27.522624407176288, 31.978668358814353], "label": "Applying sunscreen"}]}, "XtqTatsTGNY": {"duration": 106.673, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=XtqTatsTGNY", "annotations": [{"segment": [0.01, 106.67247], "label": "Fun sliding down"}]}, "l_Y_Xns6SpE": {"duration": 54.683, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=l_Y_Xns6SpE", "annotations": [{"segment": [6.312820399375975, 18.255994127925117], "label": "Playing pool"}, {"segment": [24.48350614352574, 35.91482956942277], "label": "Playing pool"}, {"segment": [41.88641643369735, 54.682674], "label": "Playing pool"}]}, "oUQPIZu5bVU": {"duration": 113.639, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=oUQPIZu5bVU", "annotations": [{"segment": [35.43868954758191, 102.08139781591264], "label": "Hurling"}]}, "8vVk8oHmvbs": {"duration": 229.692, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8vVk8oHmvbs", "annotations": []}, "5nOc03oiFvk": {"duration": 213.44, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5nOc03oiFvk", "annotations": [{"segment": [61.26764341965679, 168.81899572698907], "label": "Rock climbing"}, {"segment": [175.47852218564742, 199.4528174368175], "label": "Rock climbing"}]}, "AUwam1VhQm0": {"duration": 87.586, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=AUwam1VhQm0", "annotations": [{"segment": [21.452339655226208, 76.38126030109204], "label": "Fun sliding down"}]}, "3xbd33jK63A": {"duration": 90.976, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3xbd33jK63A", "annotations": []}, "G_US7iMc6Y4": {"duration": 161.595, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G_US7iMc6Y4", "annotations": [{"segment": [26.024105368462674, 87.43495272048759], "label": "Sumo"}, {"segment": [94.48209913793308, 149.36765365511457], "label": "Sumo"}]}, "HX6BeHLQh5s": {"duration": 26.29, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HX6BeHLQh5s", "annotations": []}, "vezvHLPR3P0": {"duration": 12.49, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vezvHLPR3P0", "annotations": []}, "5y9Lw8--ulU": {"duration": 93.577, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5y9Lw8--ulU", "annotations": [{"segment": [80.4688481973547, 88.3351005338033], "label": "Using the rowing machine"}]}, "8bX1g0mefzM": {"duration": 140.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8bX1g0mefzM", "annotations": [{"segment": [49.739430907952425, 133.998026608887], "label": "Polishing shoes"}]}, "TrQlBM3nKAs": {"duration": 122.32, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=TrQlBM3nKAs", "annotations": []}, "LvTwJTvluHE": {"duration": 227.672, "subset": "testing", "resolution": "840x480", "url": "https://www.youtube.com/watch?v=LvTwJTvluHE", "annotations": []}, "8c-s3TKrtdE": {"duration": 157.69, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8c-s3TKrtdE", "annotations": [{"segment": [8.11803782355376, 27.798129517017415], "label": "Playing field hockey"}, {"segment": [62.48429112674712, 69.12632207329109], "label": "Playing field hockey"}, {"segment": [84.13239198955714, 94.71044127479385], "label": "Playing field hockey"}, {"segment": [103.07448024451591, 117.83454901461364], "label": "Playing field hockey"}, {"segment": [138.25264353822152, 148.09268934165368], "label": "Playing field hockey"}]}, "E_UkQGk7QAE": {"duration": 88.236, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=E_UkQGk7QAE", "annotations": []}, "ywsH9kD033I": {"duration": 175.567, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ywsH9kD033I", "annotations": [{"segment": [62.40201250905783, 78.80397512588026], "label": "Arm wrestling"}, {"segment": [90.55871500126966, 116.25512310095814], "label": "Arm wrestling"}, {"segment": [130.74352341248462, 141.14751633235895], "label": "Arm wrestling"}, {"segment": [161.6523060519851, 167.39364717348045], "label": "Arm wrestling"}]}, "HqpBHYNuA4Y": {"duration": 193.655, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HqpBHYNuA4Y", "annotations": []}, "02V8Hz-M6BM": {"duration": 55.427, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=02V8Hz-M6BM", "annotations": [{"segment": [31.462676114412847, 41.54797140632333], "label": "Drum corps"}, {"segment": [48.96109516993141, 53.529648187038724], "label": "Drum corps"}]}, "9ovdfIi4R1o": {"duration": 12.261, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=9ovdfIi4R1o", "annotations": []}, "UI98gtpg7FE": {"duration": 192.169, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UI98gtpg7FE", "annotations": [{"segment": [24.80553859283475, 161.9831542674961], "label": "Snowboarding"}]}, "jml_hnhjltg": {"duration": 129.24, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jml_hnhjltg", "annotations": [{"segment": [0, 127.226006], "label": "Shoveling snow"}]}, "YCHmXwLhs3A": {"duration": 213.763, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YCHmXwLhs3A", "annotations": [{"segment": [0.2, 203.78951161316004], "label": "Assembling bicycle"}]}, "--6bJUbfpnQ": {"duration": 26.75, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=--6bJUbfpnQ", "annotations": [{"segment": [2.578755070202808, 24.914101404056165], "label": "Drinking beer"}]}, "PBuzvPn0iho": {"duration": 165.396, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PBuzvPn0iho", "annotations": []}, "jdB2QeTdbp0": {"duration": 176.588, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jdB2QeTdbp0", "annotations": []}, "B-6kP8M_GmM": {"duration": 186.572, "subset": "validation", "resolution": "576x324", "url": "https://www.youtube.com/watch?v=B-6kP8M_GmM", "annotations": [{"segment": [35.861333260258824, 41.6735427929691], "label": "Sumo"}, {"segment": [56.204066624744804, 152.97735534437095], "label": "Sumo"}]}, "gpKYclCmQHM": {"duration": 167.556, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gpKYclCmQHM", "annotations": [{"segment": [85.47138516837528, 94.59180779050864], "label": "Beer pong"}, {"segment": [125.60124470576203, 127.94649490561537], "label": "Beer pong"}]}, "Zk4Ewz3v6aQ": {"duration": 124.204, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=Zk4Ewz3v6aQ", "annotations": [{"segment": [56.19181515559942, 103.27668095839479], "label": "Using the rowing machine"}]}, "LxntXVcMxtc": {"duration": 38.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LxntXVcMxtc", "annotations": [{"segment": [7.367514484619426, 32.44508925097457], "label": "Doing karate"}]}, "kO_4QDNExkY": {"duration": 70.357, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=kO_4QDNExkY", "annotations": [{"segment": [7.786850234009361, 63.7205631825273], "label": "Blowing leaves"}]}, "KEJP4Uxa5m0": {"duration": 164.305, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KEJP4Uxa5m0", "annotations": [{"segment": [25.140674207455646, 28.7222318398544], "label": "Croquet"}]}, "2jHMUtvWP_0": {"duration": 23.36, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2jHMUtvWP_0", "annotations": [{"segment": [5.063781052313119, 13.927214074119972], "label": "Plastering"}]}, "sO7ZGxFwbm8": {"duration": 233.82999999999998, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sO7ZGxFwbm8", "annotations": [{"segment": [13.856190971448383, 21.513559666196173], "label": "Javelin throw"}, {"segment": [35.36975063764456, 39.01611668276255], "label": "Javelin throw"}, {"segment": [64.54067899858852, 68.91631825273011], "label": "Javelin throw"}, {"segment": [72.56268429784811, 77.66759626521062], "label": "Javelin throw"}, {"segment": [87.87742112636505, 102.09824861154448], "label": "Javelin throw"}, {"segment": [112.30807347269891, 210.7599560624025], "label": "Javelin throw"}]}, "tEAEFVJGGG0": {"duration": 147.401, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tEAEFVJGGG0", "annotations": [{"segment": [4.454151841612094, 139.68586464379277], "label": "Trimming branches or hedges"}]}, "9A0BwCxAKAg": {"duration": 230.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9A0BwCxAKAg", "annotations": [{"segment": [14.587265990639626, 211.96558112324493], "label": "Cleaning windows"}]}, "g-l5CI29DPQ": {"duration": 114.55, "subset": "training", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=g-l5CI29DPQ", "annotations": [{"segment": [1.6082625998917508, 110.61272770366597], "label": "Hopscotch"}]}, "2pJTak2Qz8Q": {"duration": 237.657, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2pJTak2Qz8Q", "annotations": [{"segment": [19.645636505460217, 227.61200312012483], "label": "Skiing"}]}, "H306mUUJ5BE": {"duration": 213.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H306mUUJ5BE", "annotations": [{"segment": [0.2, 213.64000000000001], "label": "Playing harmonica"}, {"segment": [0.2, 213.64000000000001], "label": "Playing harmonica"}, {"segment": [178.4780031201248, 186.15088923556942], "label": "Playing harmonica"}]}, "l-pltFa_oRw": {"duration": 151.464, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l-pltFa_oRw", "annotations": []}, "ojVY8I3GPa4": {"duration": 166.023, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ojVY8I3GPa4", "annotations": [{"segment": [52.79844617784711, 165.881], "label": "Bullfighting"}]}, "7LbBmVX33I4": {"duration": 191.91, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7LbBmVX33I4", "annotations": []}, "SPjv6P5fxug": {"duration": 125.226, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=SPjv6P5fxug", "annotations": []}, "656VWQU5dgE": {"duration": 91.743, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=656VWQU5dgE", "annotations": [{"segment": [5.009310848624421, 85.15828339313573], "label": "Doing fencing"}]}, "r_n1PfHvKI4": {"duration": 176.472, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r_n1PfHvKI4", "annotations": [{"segment": [13.351648607644305, 35.374986517160686], "label": "Kite flying"}, {"segment": [56.572449255070204, 171.6443898322933], "label": "Kite flying"}]}, "AuVL_nkYXw0": {"duration": 226.999, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=AuVL_nkYXw0", "annotations": [{"segment": [84.28329087363495, 208.9375698127925], "label": "Elliptical trainer"}]}, "YrWRdVrWPBA": {"duration": 66.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YrWRdVrWPBA", "annotations": []}, "TDwSu2nfSXw": {"duration": 113.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TDwSu2nfSXw", "annotations": [{"segment": [77.07417071450858, 113.3131746031746], "label": "Breakdancing"}]}, "brrf2inGs7E": {"duration": 27.45, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=brrf2inGs7E", "annotations": [{"segment": [4.794444617784711, 25.898196567862712], "label": "Washing hands"}]}, "4_3m_-SGzXw": {"duration": 167.811, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4_3m_-SGzXw", "annotations": [{"segment": [101.78388299531981, 141.8171326053042], "label": "Hanging wallpaper"}]}, "fqoEZaCd7hU": {"duration": 79.83, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=fqoEZaCd7hU", "annotations": [{"segment": [3.7361721528861156, 77.83691985179406], "label": "Playing guitarra"}]}, "VdY1Shdks6o": {"duration": 82.199, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VdY1Shdks6o", "annotations": [{"segment": [26.67288097347894, 67.70808247113885], "label": "Putting in contact lenses"}]}, "x8yuq2i72eo": {"duration": 8.476, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=x8yuq2i72eo", "annotations": [{"segment": [0.2, 8.218], "label": "Doing crunches"}]}, "16g-sBraeKE": {"duration": 225.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=16g-sBraeKE", "annotations": [{"segment": [50.3395755, 170.59282371294853], "label": "Doing nails"}]}, "ZdgWC6Jk2Nk": {"duration": 47.648, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ZdgWC6Jk2Nk", "annotations": [{"segment": [15.297669266770672, 21.832595943837752], "label": "Spread mulch"}]}, "feYYvRmqCgI": {"duration": 33.135, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=feYYvRmqCgI", "annotations": [{"segment": [6.5542698907956325, 33.081], "label": "Calf roping"}]}, "iZUwLKd5TTk": {"duration": 203.59, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iZUwLKd5TTk", "annotations": [{"segment": [33.81831903276131, 188.4617121684867], "label": "Ironing clothes"}]}, "sY7m7E-sAYc": {"duration": 136.371, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sY7m7E-sAYc", "annotations": [{"segment": [98.07604946918966, 134.24292091107645], "label": "Peeling potatoes"}]}, "nciIPwJTok8": {"duration": 60.163, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nciIPwJTok8", "annotations": []}, "oijpI0s_R04": {"duration": 170.3, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oijpI0s_R04", "annotations": [{"segment": [15.143250236131895, 45.42975070839569], "label": "Tango"}, {"segment": [59.510316717430605, 82.62369865678981], "label": "Tango"}, {"segment": [90.85950141679137, 116.62959392389301], "label": "Tango"}, {"segment": [121.94301505937788, 143.99371277164013], "label": "Tango"}, {"segment": [150.3698169048362, 168.43544861778471], "label": "Tango"}]}, "b7fs8OAJzQk": {"duration": 31.394, "subset": "training", "resolution": "657x480", "url": "https://www.youtube.com/watch?v=b7fs8OAJzQk", "annotations": [{"segment": [0.01, 18.73317738299532], "label": "Waterskiing"}, {"segment": [29.458839727769114, 31.193377], "label": "Waterskiing"}]}, "tGHLUWWm_zU": {"duration": 187.548, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tGHLUWWm_zU", "annotations": [{"segment": [2.0480959809820964, 186.9619037223089], "label": "Plastering"}]}, "pkHb81FOGgg": {"duration": 100.822, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pkHb81FOGgg", "annotations": []}, "Oyi0X-rwUg8": {"duration": 4.83, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Oyi0X-rwUg8", "annotations": [{"segment": [0.01, 4.6331828773118255], "label": "Doing a powerbomb"}]}, "GvMep9y7d7c": {"duration": 184.81, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GvMep9y7d7c", "annotations": []}, "iCveBOc8JzM": {"duration": 11.193, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=iCveBOc8JzM", "annotations": [{"segment": [0.20000000000000004, 10.99201814058957], "label": "Applying sunscreen"}]}, "D9DdNf-TIO8": {"duration": 63.37, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=D9DdNf-TIO8", "annotations": [{"segment": [0, 62.67727392852641], "label": "Bathing dog"}]}, "1EzvfTeJjvw": {"duration": 108.67, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=1EzvfTeJjvw", "annotations": []}, "K3sJnHGHQHM": {"duration": 180.002, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K3sJnHGHQHM", "annotations": [{"segment": [37.62893268383797, 119.34549536271452], "label": "Rafting"}, {"segment": [140.96809099313572, 172.4191391828393], "label": "Rafting"}]}, "K0e_DdvGP54": {"duration": 24.984, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=K0e_DdvGP54", "annotations": [{"segment": [0.01, 12.70671255226209], "label": "Gargling mouthwash"}]}, "ys4_S3JHQjs": {"duration": 173.198, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ys4_S3JHQjs", "annotations": []}, "wC97n9iS0Ak": {"duration": 195.641, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wC97n9iS0Ak", "annotations": []}, "aj1ole7T9hc": {"duration": 30.14, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=aj1ole7T9hc", "annotations": [{"segment": [0, 30.139501133786847], "label": "Mowing the lawn"}]}, "rDWa3Z2IaIw": {"duration": 221.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rDWa3Z2IaIw", "annotations": [{"segment": [45.664702932917315, 221.75056689342404], "label": "Playing field hockey"}]}, "rHWOESWciSc": {"duration": 38.197, "subset": "training", "resolution": "224x128", "url": "https://www.youtube.com/watch?v=rHWOESWciSc", "annotations": [{"segment": [18.025802277691106, 32.26767581279251], "label": "Peeling potatoes"}]}, "I4uZkBmE5eM": {"duration": 82.082, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=I4uZkBmE5eM", "annotations": [{"segment": [0.2, 81.821], "label": "Playing congas"}]}, "KXcVYfMuUGw": {"duration": 131.333, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KXcVYfMuUGw", "annotations": []}, "T69Cadlc62E": {"duration": 68.43, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T69Cadlc62E", "annotations": [{"segment": [30.88346255850234, 46.778578003120124], "label": "Hurling"}]}, "ylo_0z8si1g": {"duration": 163.75, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ylo_0z8si1g", "annotations": [{"segment": [88.82968197936724, 97.24658996534853], "label": "Belly dance"}, {"segment": [104.1331510447878, 115.86581066161021], "label": "Belly dance"}, {"segment": [130.14904845600273, 156.67506150273172], "label": "Belly dance"}]}, "32H1n87WgCM": {"duration": 168.25, "subset": "validation", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=32H1n87WgCM", "annotations": [{"segment": [9.186915286135257, 12.861681400589356], "label": "Doing karate"}, {"segment": [28.610679033964082, 32.0229618545286], "label": "Doing karate"}, {"segment": [56.69639148014901, 80.58237122410067], "label": "Doing karate"}, {"segment": [97.6437853269233, 123.36714812810202], "label": "Doing karate"}, {"segment": [143.3158781372855, 155.39010962870515], "label": "Doing karate"}]}, "QTfz_zprGcQ": {"duration": 181.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QTfz_zprGcQ", "annotations": [{"segment": [73.2174698299532, 77.18282602964119], "label": "Spinning"}, {"segment": [112.02131264118565, 136.37992929641186], "label": "Spinning"}, {"segment": [142.32796359594386, 151.95811436661467], "label": "Spinning"}]}, "jOUwMAGYImE": {"duration": 149.1, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jOUwMAGYImE", "annotations": [{"segment": [17.208290171606865, 148.57597347893915], "label": "Spinning"}]}, "qmHjHk0Hzz8": {"duration": 20.95, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qmHjHk0Hzz8", "annotations": []}, "gxILsv1RTEI": {"duration": 68.8, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gxILsv1RTEI", "annotations": [{"segment": [0, 64.61472145085804], "label": "Brushing teeth"}]}, "r6l4mxphtX0": {"duration": 76.12, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=r6l4mxphtX0", "annotations": []}, "PMZLlOrkizg": {"duration": 94.691, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PMZLlOrkizg", "annotations": []}, "2Z_2Xh30LBg": {"duration": 103.353, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2Z_2Xh30LBg", "annotations": []}, "AYhduTuI_zM": {"duration": 229.3, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=AYhduTuI_zM", "annotations": [{"segment": [7.154335806085305, 40.779714094686234], "label": "Playing flauta"}, {"segment": [84.4211625118066, 90.86006473728338], "label": "Playing flauta"}, {"segment": [99.08755091428148, 114.11165610706061], "label": "Playing flauta"}, {"segment": [133.07064599318667, 144.1598664926189], "label": "Playing flauta"}, {"segment": [183.86642894539781, 208.90660409360373], "label": "Playing flauta"}]}, "jPLJAYnjsBw": {"duration": 202.07999999999998, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jPLJAYnjsBw", "annotations": [{"segment": [108.13503830265212, 155.10915115132607], "label": "Getting a tattoo"}]}, "LZEiFNEAyyw": {"duration": 127.826, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LZEiFNEAyyw", "annotations": [{"segment": [18.34629989281204, 107.08655368642746], "label": "Applying sunscreen"}]}, "uu4_cV49pMI": {"duration": 18.251, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=uu4_cV49pMI", "annotations": [{"segment": [0.01, 4.071569842433697], "label": "Running a marathon"}]}, "ZrAxt3ZjKdM": {"duration": 43.097, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZrAxt3ZjKdM", "annotations": []}, "oc4v7GPk05c": {"duration": 9.47, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oc4v7GPk05c", "annotations": [{"segment": [0.5744134165366616, 5.611577223088924], "label": "Playing field hockey"}]}, "f6Id4KERnoI": {"duration": 144.452, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=f6Id4KERnoI", "annotations": [{"segment": [0.01, 140.1693641653666], "label": "Painting furniture"}]}, "LI3wIHFQkAk": {"duration": 67.29, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LI3wIHFQkAk", "annotations": [{"segment": [6.235432917316693, 63.97868564742589], "label": "Playing flauta"}]}, "iUiXNXRs4rU": {"duration": 79.67, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iUiXNXRs4rU", "annotations": []}, "AT_pPlJTiyE": {"duration": 205.752, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AT_pPlJTiyE", "annotations": [{"segment": [59.06142174394459, 62.913253596810534], "label": "Changing car wheel"}, {"segment": [68.37001538837063, 71.57987526575893], "label": "Changing car wheel"}, {"segment": [77.67860903279669, 84.09832878757327], "label": "Changing car wheel"}, {"segment": [102.71551607642537, 106.2463619415525], "label": "Changing car wheel"}, {"segment": [121.65368935301629, 125.18453521814342], "label": "Changing car wheel"}, {"segment": [144.12270849473435, 147.9745403476003], "label": "Changing car wheel"}, {"segment": [153.43130213916038, 160.1720077722309], "label": "Changing car wheel"}, {"segment": [164.0238396224649, 166.9127135101404], "label": "Changing car wheel"}, {"segment": [174.2953912230889, 179.11018103588142], "label": "Changing car wheel"}]}, "oY1y_UONCGg": {"duration": 161.36, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=oY1y_UONCGg", "annotations": [{"segment": [69.22421475090296, 99.17942040674824], "label": "Vacuuming floor"}, {"segment": [131.6518702353536, 156.5725875456787], "label": "Vacuuming floor"}]}, "IeMeu-7QihI": {"duration": 76.324, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=IeMeu-7QihI", "annotations": [{"segment": [7.370852706607046, 60.98786036915888], "label": "Wakeboarding"}]}, "rZ8tVgFyY84": {"duration": 150.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rZ8tVgFyY84", "annotations": [{"segment": [50.23198822776911, 115.40417077907607], "label": "Washing hands"}]}, "-cJova7MiO8": {"duration": 29.513, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-cJova7MiO8", "annotations": [{"segment": [0.5507662191841759, 16.339417116148905], "label": "Sumo"}]}, "IWuiVS3-Z7k": {"duration": 119.93, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IWuiVS3-Z7k", "annotations": [{"segment": [5.3303447737909515, 92.299127925117], "label": "Paintball"}]}, "mSyfGQigb8U": {"duration": 202.85, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=mSyfGQigb8U", "annotations": [{"segment": [38.23176383325916, 197.16215222429906], "label": "Cleaning sink"}]}, "fllAtkXf7pw": {"duration": 36.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fllAtkXf7pw", "annotations": [{"segment": [13.58389012948518, 16.880399031201247], "label": "Drinking coffee"}]}, "zwQYVBQnn4o": {"duration": 162.22, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zwQYVBQnn4o", "annotations": [{"segment": [99.06895622, 137.10327279999998], "label": "Snatch"}, {"segment": [18.36586908, 83.24984694000001], "label": "Snatch"}]}, "tzwIHzuzG9c": {"duration": 24.57, "subset": "validation", "resolution": "240x176", "url": "https://www.youtube.com/watch?v=tzwIHzuzG9c", "annotations": [{"segment": [0.30660481747269897, 24.375082989079562], "label": "Getting a haircut"}]}, "Po819PkLT9I": {"duration": 103.608, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Po819PkLT9I", "annotations": []}, "gTevMU1l0QE": {"duration": 224.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gTevMU1l0QE", "annotations": []}, "hxErUs42bBI": {"duration": 56.704, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hxErUs42bBI", "annotations": []}, "YED3l3gkrk4": {"duration": 159.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YED3l3gkrk4", "annotations": [{"segment": [50.15228734633385, 135.2365161279251], "label": "Doing nails"}]}, "HCM-Hrxa5k8": {"duration": 60.51, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HCM-Hrxa5k8", "annotations": []}, "uG_hgODoDes": {"duration": 207.726, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uG_hgODoDes", "annotations": [{"segment": [33.047522620904836, 42.11939157566303], "label": "Grooming dog"}, {"segment": [44.387358814352574, 61.23511544461779], "label": "Grooming dog"}, {"segment": [61.88310608424337, 83.91478783151327], "label": "Grooming dog"}, {"segment": [83.91478783151327, 121.4982449297972], "label": "Grooming dog"}, {"segment": [122.79422620904836, 127.65415600624026], "label": "Grooming dog"}, {"segment": [132.51408580343215, 152.29246489859594], "label": "Grooming dog"}, {"segment": [154.56065054602183, 171.41002964118564], "label": "Grooming dog"}, {"segment": [172.706135725429, 186.3152496099844], "label": "Grooming dog"}]}, "mDaZqz7lB0o": {"duration": 221.36, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mDaZqz7lB0o", "annotations": [{"segment": [33.151574814012974, 146.07412652424466], "label": "Playing piano"}]}, "MleVjHU-wI8": {"duration": 232.32, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=MleVjHU-wI8", "annotations": [{"segment": [23.92017574580534, 201.14693240790854], "label": "Wrapping presents"}]}, "r0685Fhcfjs": {"duration": 64.064, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r0685Fhcfjs", "annotations": [{"segment": [0.9376721148268085, 60.489816974639886], "label": "Spread mulch"}]}, "c6nEk5N4fSU": {"duration": 95.156, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c6nEk5N4fSU", "annotations": [{"segment": [24.642421386651385, 30.431905929298395], "label": "Elliptical trainer"}, {"segment": [39.63570186889108, 51.3631191950078], "label": "Elliptical trainer"}, {"segment": [56.26191380031201, 75.4117472574103], "label": "Elliptical trainer"}]}, "BbeZ2t__2iA": {"duration": 9.43, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=BbeZ2t__2iA", "annotations": [{"segment": [0.009999999999999998, 9.4273], "label": "Drinking coffee"}]}, "JguRZtFN-Ck": {"duration": 25.64, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=JguRZtFN-Ck", "annotations": []}, "WXwW9nf6suY": {"duration": 118.283, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WXwW9nf6suY", "annotations": []}, "Q0UzDeDra3U": {"duration": 180.164, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Q0UzDeDra3U", "annotations": [{"segment": [0.01, 164.9653151326053], "label": "Changing car wheel"}]}, "YdfWewUrcTw": {"duration": 186.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YdfWewUrcTw", "annotations": [{"segment": [48.7409321372855, 69.11027691107644], "label": "Skateboarding"}, {"segment": [71.43820202808112, 81.62287441497661], "label": "Skateboarding"}, {"segment": [91.22556552262091, 99.37330343213729], "label": "Skateboarding"}, {"segment": [107.23005070202808, 138.65703978159127], "label": "Skateboarding"}]}, "BH9zsAu6OMQ": {"duration": 136.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BH9zsAu6OMQ", "annotations": [{"segment": [30.567666271450857, 40.96916382215288], "label": "Triple jump"}]}, "2RpAv3irv1c": {"duration": 161.426, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2RpAv3irv1c", "annotations": [{"segment": [52.381319720816045, 154.6256248393136], "label": "Braiding hair"}]}, "Mu8-AwcMqaw": {"duration": 146.309, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Mu8-AwcMqaw", "annotations": [{"segment": [43.82408948602842, 104.7669639275367], "label": "Braiding hair"}]}, "An_CpsJkJMM": {"duration": 28.03, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=An_CpsJkJMM", "annotations": [{"segment": [15.171903195007799, 28.026484], "label": "Washing face"}]}, "3oy4P1gyU4k": {"duration": 225.606, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3oy4P1gyU4k", "annotations": []}, "l3wFDLyBQ9U": {"duration": 29.03, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=l3wFDLyBQ9U", "annotations": []}, "0e7-mNDgIXw": {"duration": 195.42, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0e7-mNDgIXw", "annotations": [{"segment": [5.487579992995638, 7.316773323994185], "label": "Skiing"}, {"segment": [37.498463285470194, 42.071446612966554], "label": "Skiing"}, {"segment": [62.49743880911699, 69.50934657794474], "label": "Skiing"}, {"segment": [80.78937211910245, 86.88668322243093], "label": "Skiing"}, {"segment": [90.54506988442803, 100.91049876008645], "label": "Skiing"}, {"segment": [111.58079319091131, 146.03060092471725], "label": "Skiing"}]}, "Ar6TcWqwK-A": {"duration": 216.62, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=Ar6TcWqwK-A", "annotations": []}, "nn__PD7zCcs": {"duration": 41.332, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nn__PD7zCcs", "annotations": [{"segment": [0.01, 41.331519], "label": "Plastering"}]}, "nOxKfRwdf2w": {"duration": 228.07, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nOxKfRwdf2w", "annotations": [{"segment": [16.722496948857543, 25.973239764430577], "label": "Ballet"}, {"segment": [35.22398269422777, 58.70663782371295], "label": "Ballet"}, {"segment": [64.39940270358815, 101.40237442277692], "label": "Ballet"}, {"segment": [118.1248712574103, 218.1040544602184], "label": "Ballet"}]}, "dKwPfFN7DpM": {"duration": 90.51, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dKwPfFN7DpM", "annotations": [{"segment": [4.374675507020281, 87.77574726989079], "label": "Hammer throw"}]}, "Jy9kiITFsjI": {"duration": 88.77, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Jy9kiITFsjI", "annotations": [{"segment": [9.27859969364761, 88.7698866213152], "label": "Bullfighting"}]}, "cIlE202sClg": {"duration": 33.715, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cIlE202sClg", "annotations": []}, "VToS7Ma-V4g": {"duration": 49.23, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=VToS7Ma-V4g", "annotations": [{"segment": [4.377377868954758, 13.90009463650546], "label": "Triple jump"}, {"segment": [27.493004861154446, 40.5483423650546], "label": "Triple jump"}]}, "sS1Zh8mqMOE": {"duration": 108.113, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=sS1Zh8mqMOE", "annotations": [{"segment": [20.661050442017682, 83.23451749498551], "label": "Arm wrestling"}]}, "6-P9SRNbwac": {"duration": 116.216, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6-P9SRNbwac", "annotations": [{"segment": [14.373115909861912, 90.40219105004883], "label": "Building sandcastles"}]}, "IseUmPHr054": {"duration": 151.3, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IseUmPHr054", "annotations": []}, "0p3JN4KJsUs": {"duration": 97.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0p3JN4KJsUs", "annotations": [{"segment": [13.046374414976599, 87.24660686427457], "label": "Ballet"}]}, "Mk1gOZ5EOUk": {"duration": 86.518, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Mk1gOZ5EOUk", "annotations": [{"segment": [0.01, 85.5328159126365], "label": "River tubing"}]}, "tCkHrK6mRME": {"duration": 124.111, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tCkHrK6mRME", "annotations": [{"segment": [0.01, 124.021], "label": "Disc dog"}]}, "tGuPkIKGIK4": {"duration": 94.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tGuPkIKGIK4", "annotations": [{"segment": [1.6368337, 69.87539001560063], "label": "Bathing dog"}]}, "19fGZFUE7yE": {"duration": 112.176, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=19fGZFUE7yE", "annotations": [{"segment": [35.53974611801617, 111.02239217409094], "label": "Playing blackjack"}]}, "_SJOVswvGRc": {"duration": 40.24, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_SJOVswvGRc", "annotations": []}, "eg3oZ4XCEKk": {"duration": 184.715, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eg3oZ4XCEKk", "annotations": [{"segment": [14.404134165366616, 18.43729173166927], "label": "Running a marathon"}, {"segment": [23.046614664586585, 25.351276131045243], "label": "Running a marathon"}, {"segment": [30.24868174726989, 33.12950858034321], "label": "Running a marathon"}, {"segment": [63.666273010920435, 74.32533229329174], "label": "Running a marathon"}, {"segment": [75.7657457098284, 78.07886271450859], "label": "Running a marathon"}, {"segment": [101.41608736349455, 114.9574399375975], "label": "Running a marathon"}, {"segment": [121.87217316692669, 130.80370358814355], "label": "Running a marathon"}, {"segment": [133.68484243369736, 144.92128393135727], "label": "Running a marathon"}, {"segment": [150.6835616224649, 169.12285023400938], "label": "Running a marathon"}]}, "ZRH7JzcSm1s": {"duration": 61.93, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ZRH7JzcSm1s", "annotations": []}, "NVNUT5iwLtw": {"duration": 101.43, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=NVNUT5iwLtw", "annotations": [{"segment": [0.01, 101.281], "label": "Hand washing clothes"}]}, "S5zweEQSnho": {"duration": 135.4, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S5zweEQSnho", "annotations": [{"segment": [55.446549290543054, 78.04761890611678], "label": "Tennis serve with ball bouncing"}, {"segment": [52.91184921294852, 79.10374263572544], "label": "Tennis serve with ball bouncing"}]}, "bghOBRA0z-Y": {"duration": 204.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bghOBRA0z-Y", "annotations": [{"segment": [89.55565955971572, 106.7656439590917], "label": "Playing harmonica"}, {"segment": [119.51377944617784, 129.39358521372856], "label": "Playing harmonica"}, {"segment": [149.7906035725429, 160.94522298751951], "label": "Playing harmonica"}]}, "fdHpRUOSi28": {"duration": 78.53, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fdHpRUOSi28", "annotations": [{"segment": [5.390507037968594, 74.4870048049922], "label": "Clipping cat claws"}]}, "QImpWj3AJcY": {"duration": 155.481, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QImpWj3AJcY", "annotations": []}, "VEwZJ-5u_Xs": {"duration": 153.299, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=VEwZJ-5u_Xs", "annotations": [{"segment": [0.19999999999999998, 142.73850155763242], "label": "Kite flying"}]}, "-uJnucdW6DY": {"duration": 220.07999999999998, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=-uJnucdW6DY", "annotations": []}, "kl-2y3jtPNE": {"duration": 99.28999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kl-2y3jtPNE", "annotations": []}, "08ItAtlb60Q": {"duration": 80.179, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=08ItAtlb60Q", "annotations": [{"segment": [40.96347214275118, 65.69142064174454], "label": "BMX"}]}, "9LraB2HBC0g": {"duration": 184.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9LraB2HBC0g", "annotations": [{"segment": [59.874629485179405, 119.89284321372854], "label": "Shaving legs"}]}, "3kcGcDv0NOw": {"duration": 73.65, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3kcGcDv0NOw", "annotations": [{"segment": [1.5495093603744148, 5.107641965678627], "label": "Discus throw"}, {"segment": [26.34165912636505, 33.22836739469579], "label": "Discus throw"}, {"segment": [55.49539079563182, 61.349092823712944], "label": "Discus throw"}]}, "PDjtB578yRk": {"duration": 148.167, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PDjtB578yRk", "annotations": [{"segment": [39.69570602072604, 141.0120719689745], "label": "Capoeira"}]}, "JZDTcGPKw2w": {"duration": 195.33, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JZDTcGPKw2w", "annotations": [{"segment": [9.44410979160255, 12.79524552410668], "label": "Horseback riding"}, {"segment": [19.49751698911494, 22.239355315709226], "label": "Horseback riding"}, {"segment": [35.33924954277083, 40.51827749300448], "label": "Horseback riding"}]}, "_IlyE43ZUH8": {"duration": 230.34, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_IlyE43ZUH8", "annotations": [{"segment": [99.16236505460218, 146.2285600624025], "label": "Cleaning windows"}]}, "CG-itBlFOzc": {"duration": 49.041, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=CG-itBlFOzc", "annotations": [{"segment": [0.01, 28.919384889681304], "label": "Knitting"}]}, "leJM3mgm_gU": {"duration": 3.6, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=leJM3mgm_gU", "annotations": [{"segment": [0.8256106, 3.42470043], "label": "Playing kickball"}]}, "JJ0jtr07EGE": {"duration": 81.363, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JJ0jtr07EGE", "annotations": [{"segment": [0.2, 72.12558285989357], "label": "Riding bumper cars"}]}, "zjbxaEVgzyE": {"duration": 51.224, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=zjbxaEVgzyE", "annotations": [{"segment": [0.20000000000000004, 30.01899531981279], "label": "Sumo"}]}, "d6uMS_ZgMg4": {"duration": 110.67, "subset": "training", "resolution": "624x360", "url": "https://www.youtube.com/watch?v=d6uMS_ZgMg4", "annotations": [{"segment": [15.365523990639625, 99.27164375975039], "label": "Spinning"}]}, "YtV4sdy7bqE": {"duration": 203.97, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YtV4sdy7bqE", "annotations": []}, "3C00m8lkJi0": {"duration": 54.242, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3C00m8lkJi0", "annotations": [{"segment": [5.820630741675857, 42.17849015915985], "label": "Spread mulch"}]}, "vlBzrnOuW9U": {"duration": 28.49, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=vlBzrnOuW9U", "annotations": [{"segment": [2.684589152106084, 5.1251247449297965], "label": "Playing kickball"}, {"segment": [17.682789704368172, 18.43713706942278], "label": "Playing kickball"}]}, "qVikbiCtK7k": {"duration": 212.161, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qVikbiCtK7k", "annotations": []}, "df4QVzHEHeo": {"duration": 194.259, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=df4QVzHEHeo", "annotations": []}, "nDG3MsnLE20": {"duration": 60.721, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nDG3MsnLE20", "annotations": [{"segment": [24.281866614664587, 32.99115210608424], "label": "Curling"}, {"segment": [40.375111544461774, 56.373690327613105], "label": "Curling"}]}, "QoTM5tmcJeI": {"duration": 31.092, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QoTM5tmcJeI", "annotations": [{"segment": [2.466045013345389, 28.19028696668909], "label": "Table soccer"}]}, "TypqJox_rts": {"duration": 35.83, "subset": "testing", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=TypqJox_rts", "annotations": []}, "KcSo4p-hhBs": {"duration": 90.63, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=KcSo4p-hhBs", "annotations": [{"segment": [18.58125381, 89.47405063260531], "label": "Getting a haircut"}]}, "8A4cZXyDrkU": {"duration": 54.173, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8A4cZXyDrkU", "annotations": []}, "EM7A0cTCrNw": {"duration": 219.267, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EM7A0cTCrNw", "annotations": []}, "HSo1WlL5CcI": {"duration": 64.9, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HSo1WlL5CcI", "annotations": []}, "ENIllA4z1Aw": {"duration": 27.12, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ENIllA4z1Aw", "annotations": []}, "f9Bo4yVcnDE": {"duration": 129.85, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=f9Bo4yVcnDE", "annotations": [{"segment": [43.14695023719316, 43.95722160314984], "label": "Chopping wood"}, {"segment": [80.62200091268957, 126.60489957098285], "label": "Chopping wood"}]}, "C5SXR9nMY3I": {"duration": 62.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=C5SXR9nMY3I", "annotations": []}, "lX2rbXJpScw": {"duration": 123.716, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lX2rbXJpScw", "annotations": []}, "dFSGN8U8EnA": {"duration": 106.418, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dFSGN8U8EnA", "annotations": [{"segment": [0.2, 106.2166439909297], "label": "Applying sunscreen"}]}, "qpVPY6w9Fp0": {"duration": 234.336, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=qpVPY6w9Fp0", "annotations": [{"segment": [45.68408736349454, 205.4158221528861], "label": "Building sandcastles"}]}, "caeb6tAThNI": {"duration": 25.5, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=caeb6tAThNI", "annotations": []}, "B39pJK4FU1o": {"duration": 44.24, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B39pJK4FU1o", "annotations": [{"segment": [17.58386115444618, 44.13204368174727], "label": "Hopscotch"}]}, "IaQGTPWI_KE": {"duration": 229.692, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IaQGTPWI_KE", "annotations": []}, "bd3Df5_QMNw": {"duration": 60.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bd3Df5_QMNw", "annotations": [{"segment": [30.774806864274574, 60.51120181405896], "label": "Brushing teeth"}]}, "Hc7A90HiA28": {"duration": 147.192, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Hc7A90HiA28", "annotations": [{"segment": [3.4432371294851793, 112.47907956318252], "label": "Tug of war"}]}, "f4k-dKaEZog": {"duration": 9.03, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=f4k-dKaEZog", "annotations": [{"segment": [0, 8.990286923556942], "label": "Playing lacrosse"}]}, "cptHZTGmOng": {"duration": 152.323, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=cptHZTGmOng", "annotations": []}, "HWymkBdgm1M": {"duration": 143.96, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HWymkBdgm1M", "annotations": [{"segment": [21.111684074882994, 33.239672798751954], "label": "Playing piano"}, {"segment": [33.4642651825273, 40.20203669578783], "label": "Playing piano"}, {"segment": [50.30869396567863, 87.5910296723869], "label": "Playing piano"}]}, "Kdf1HhynNx8": {"duration": 204.638, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Kdf1HhynNx8", "annotations": [{"segment": [45.33303279563183, 204.637141], "label": "Gargling mouthwash"}]}, "rbnlUn4UNSk": {"duration": 199.622, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rbnlUn4UNSk", "annotations": [{"segment": [0.2, 199.381], "label": "Scuba diving"}]}, "i1s-HuRTr5k": {"duration": 165.791, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=i1s-HuRTr5k", "annotations": [{"segment": [0.01, 156.22066599063962], "label": "Putting on shoes"}]}, "lx5Yfoj7Gk8": {"duration": 101.565, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lx5Yfoj7Gk8", "annotations": [{"segment": [12.555712702350082, 87.19420491418809], "label": "Hanging wallpaper"}]}, "UOF3w4T-bBE": {"duration": 32.39, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=UOF3w4T-bBE", "annotations": [{"segment": [1.42534256, 30.9067511700468], "label": "Shaving legs"}]}, "FoPNGc6Lg8k": {"duration": 144.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FoPNGc6Lg8k", "annotations": [{"segment": [9.458845553822155, 92.78677067082685], "label": "Using the balance beam"}]}, "B5Ea3Bs8hC4": {"duration": 73.52, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=B5Ea3Bs8hC4", "annotations": [{"segment": [2.0643662246489862, 68.58283346333853], "label": "Hand washing clothes"}]}, "86ZBHRSW6Gc": {"duration": 222.22, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=86ZBHRSW6Gc", "annotations": []}, "OESjoURo19U": {"duration": 189.17, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OESjoURo19U", "annotations": []}, "i9Yxt2k2aWg": {"duration": 35.434, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i9Yxt2k2aWg", "annotations": [{"segment": [14.195344982895897, 18.48242909504543], "label": "Doing crunches"}]}, "GdkZLDpwk2o": {"duration": 36.11, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GdkZLDpwk2o", "annotations": []}, "cvuSpqwxRYE": {"duration": 50.504, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=cvuSpqwxRYE", "annotations": [{"segment": [0.01, 50.50340136054422], "label": "Bullfighting"}]}, "r-nPLO3vHfU": {"duration": 18.785, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r-nPLO3vHfU", "annotations": []}, "cdiazgshHuc": {"duration": 52.01, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cdiazgshHuc", "annotations": [{"segment": [4.047184867394696, 17.70132371294852], "label": "Grooming horse"}]}, "1UIathRb404": {"duration": 128.29, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1UIathRb404", "annotations": [{"segment": [3.1520853, 121.48546105460217], "label": "Playing field hockey"}]}, "mV5DfYFg4H0": {"duration": 87.099, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mV5DfYFg4H0", "annotations": [{"segment": [15.513673727172186, 75.18164880505381], "label": "Clipping cat claws"}]}, "YJxqF6aSs7s": {"duration": 147.121, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YJxqF6aSs7s", "annotations": [{"segment": [33.17663693782334, 121.03752249922239], "label": "Rafting"}]}, "HPULzvqZ6SQ": {"duration": 114.15, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HPULzvqZ6SQ", "annotations": []}, "jQgAdClKvDE": {"duration": 193.957, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jQgAdClKvDE", "annotations": [{"segment": [138.76527678489091, 180.44923005591895], "label": "Wakeboarding"}]}, "56McYSkE4uQ": {"duration": 49.087, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=56McYSkE4uQ", "annotations": [{"segment": [0.30631502652106085, 49.08698412698413], "label": "Playing rubik cube"}]}, "ZmVy3gSaKv0": {"duration": 105.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZmVy3gSaKv0", "annotations": [{"segment": [11.205421234536455, 104.80364398127925], "label": "Springboard diving"}]}, "XxW4XWySzOw": {"duration": 153.693, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XxW4XWySzOw", "annotations": []}, "R-H_kkrXtmM": {"duration": 192.42, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=R-H_kkrXtmM", "annotations": []}, "Mt42eefIErM": {"duration": 26.63, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Mt42eefIErM", "annotations": []}, "er5jUsRr4y0": {"duration": 119.351, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=er5jUsRr4y0", "annotations": []}, "ga_pm6X1zUQ": {"duration": 59.769, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ga_pm6X1zUQ", "annotations": []}, "E0niuPtg16o": {"duration": 65.02, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E0niuPtg16o", "annotations": [{"segment": [11.25854765522621, 58.92987556474258], "label": "Layup drill in basketball"}]}, "gVX9h58RsPU": {"duration": 155.876, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=gVX9h58RsPU", "annotations": []}, "_RCe4Q0p1aA": {"duration": 89.53999999999999, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_RCe4Q0p1aA", "annotations": [{"segment": [42.94683776, 81.19661696], "label": "Clean and jerk"}]}, "u3B3M17akWU": {"duration": 35.899, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=u3B3M17akWU", "annotations": [{"segment": [0.2, 30.266460218408735], "label": "Having an ice cream"}]}, "FEBEqdo45wM": {"duration": 50.829, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FEBEqdo45wM", "annotations": []}, "EBOT5e2xAoQ": {"duration": 98.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EBOT5e2xAoQ", "annotations": [{"segment": [2.60627763, 86.32168954758191], "label": "High jump"}]}, "ZVIi4lPU6h0": {"duration": 69.405, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZVIi4lPU6h0", "annotations": [{"segment": [13.317857429017161, 43.5266559875195], "label": "Carving jack-o-lanterns"}]}, "uQsL4kCWu8U": {"duration": 21.71, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uQsL4kCWu8U", "annotations": []}, "YotaXgC6Ee8": {"duration": 110.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YotaXgC6Ee8", "annotations": [{"segment": [22.581567059999998, 33.86206942277691], "label": "Springboard diving"}, {"segment": [3.5326840873634944, 6.634553042121684], "label": "Springboard diving"}, {"segment": [9.047117784711388, 13.527595163806552], "label": "Springboard diving"}, {"segment": [20.2483112324493, 26.45204914196568], "label": "Springboard diving"}, {"segment": [70.91217082683306, 74.01403978159126], "label": "Springboard diving"}, {"segment": [94.52084009360374, 98.65666536661467], "label": "Springboard diving"}]}, "MFlYmFZ7xlo": {"duration": 34.784, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MFlYmFZ7xlo", "annotations": [{"segment": [0.2, 3.846772369158878], "label": "Mooping floor"}, {"segment": [12.461375280373831, 16.09142807943925], "label": "Mooping floor"}]}, "1gp-5iOIfVo": {"duration": 169.645, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1gp-5iOIfVo", "annotations": [{"segment": [44.991488285381756, 162.49866809672386], "label": "Running a marathon"}]}, "rDGbxMgkt7w": {"duration": 189.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rDGbxMgkt7w", "annotations": [{"segment": [8.550702028081124, 164.52730109204367], "label": "Zumba"}]}, "sY8TfKQHe5w": {"duration": 28.166, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=sY8TfKQHe5w", "annotations": [{"segment": [5.045109204368175, 17.24111232449298], "label": "Carving jack-o-lanterns"}]}, "ZIHD-3iWCrE": {"duration": 57.911, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZIHD-3iWCrE", "annotations": [{"segment": [2.8006417535101407, 55.380432093603744], "label": "Clipping cat claws"}]}, "afL6f_pwgMs": {"duration": 120.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=afL6f_pwgMs", "annotations": [{"segment": [0.01, 120.92952380952381], "label": "River tubing"}]}, "mzVJHw9Jrb4": {"duration": 140.713, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mzVJHw9Jrb4", "annotations": [{"segment": [5.597784114409881, 51.03861986667834], "label": "Rollerblading"}, {"segment": [54.98999688861472, 120.62675964189137], "label": "Rollerblading"}]}, "lKCDcLEby-Y": {"duration": 139.645, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lKCDcLEby-Y", "annotations": [{"segment": [26.8368093661133, 101.64930780848091], "label": "Tug of war"}]}, "aBPtU0R0zx8": {"duration": 123.14, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aBPtU0R0zx8", "annotations": []}, "reL2X6B87Rs": {"duration": 113.03999999999999, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=reL2X6B87Rs", "annotations": []}, "mGLCCohSA0A": {"duration": 169.924, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mGLCCohSA0A", "annotations": [{"segment": [51.95772531045241, 156.13826636661466], "label": "Laying tile"}]}, "nYisYkLo5xM": {"duration": 98.941, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nYisYkLo5xM", "annotations": []}, "MyqWp3ipXxA": {"duration": 219.081, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=MyqWp3ipXxA", "annotations": [{"segment": [30.464008621117838, 36.94769268664436], "label": "Carving jack-o-lanterns"}, {"segment": [41.042651043819006, 59.811210180869466], "label": "Carving jack-o-lanterns"}, {"segment": [144.44034956247881, 181.63622130681514], "label": "Carving jack-o-lanterns"}, {"segment": [205.5234773501062, 215.0783801351529], "label": "Carving jack-o-lanterns"}]}, "jXZ6WVxXgLA": {"duration": 12.911, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jXZ6WVxXgLA", "annotations": [{"segment": [0.2, 12.681000000000001], "label": "Raking leaves"}]}, "K9ccE4wrTts": {"duration": 86.89, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=K9ccE4wrTts", "annotations": [{"segment": [23.574106084243372, 55.996945397815914], "label": "Shaving"}]}, "DEduSDgovOQ": {"duration": 170.621, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=DEduSDgovOQ", "annotations": [{"segment": [0.01, 28.037432078179002], "label": "Grooming dog"}, {"segment": [30.699213733118665, 42.14487484935923], "label": "Grooming dog"}, {"segment": [55.45378312405757, 165.65154258842554], "label": "Grooming dog"}]}, "TnvAN5iwpIw": {"duration": 93.46000000000001, "subset": "validation", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=TnvAN5iwpIw", "annotations": [{"segment": [57.00921028393135, 58.02983553198128], "label": "Playing saxophone"}, {"segment": [59.77947881435257, 64.88260505460218], "label": "Playing saxophone"}, {"segment": [67.36126637129485, 78.15073327925117], "label": "Playing saxophone"}]}, "AF6tasHTW-8": {"duration": 72.678, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AF6tasHTW-8", "annotations": [{"segment": [7.483275631825273, 18.368040187207487], "label": "Rope skipping"}, {"segment": [20.86246539781591, 65.87550215288611], "label": "Rope skipping"}]}, "1SdH6HFnOZA": {"duration": 123.298, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1SdH6HFnOZA", "annotations": [{"segment": [0.01, 75.402183975039], "label": "Beach soccer"}]}, "EsdNBkcubkw": {"duration": 143.22, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EsdNBkcubkw", "annotations": []}, "bQ7CWiB6TAg": {"duration": 59.05, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=bQ7CWiB6TAg", "annotations": []}, "RzMKERQ9vOU": {"duration": 157.014, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=RzMKERQ9vOU", "annotations": [{"segment": [92.89639225958295, 140.98553222847875], "label": "Doing crunches"}]}, "lb-TlBtrKcU": {"duration": 204.82, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=lb-TlBtrKcU", "annotations": [{"segment": [49.98234009360374, 56.369859594383776], "label": "Archery"}, {"segment": [143.5595007800312, 162.4026833073323], "label": "Archery"}, {"segment": [186.3558814352574, 193.06277691107644], "label": "Archery"}]}, "BcflqWdlBjI": {"duration": 4.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BcflqWdlBjI", "annotations": [{"segment": [0, 4.596666], "label": "Chopping wood"}]}, "oA8ZUG1y4Lc": {"duration": 113.08, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=oA8ZUG1y4Lc", "annotations": [{"segment": [3.881101312079694, 106.90669977819522], "label": "Cumbia"}]}, "vGf06QD6AB4": {"duration": 203.01, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vGf06QD6AB4", "annotations": []}, "KxWvKN_kGMY": {"duration": 106.673, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KxWvKN_kGMY", "annotations": [{"segment": [60.90815074235622, 90.19731612665866], "label": "Canoeing"}]}, "-xQvJmC2jhk": {"duration": 95.086, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=-xQvJmC2jhk", "annotations": [{"segment": [7.445822715158088, 63.28276009989329], "label": "Canoeing"}]}, "bmoS216hsoc": {"duration": 122.3, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bmoS216hsoc", "annotations": [{"segment": [15.672762451581864, 116.63652413382485], "label": "Playing flauta"}]}, "DyXsrXlQMr4": {"duration": 86.96000000000001, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DyXsrXlQMr4", "annotations": []}, "DXG4djsPk7s": {"duration": 19.48, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DXG4djsPk7s", "annotations": [{"segment": [0, 19.441], "label": "Windsurfing"}]}, "2-xMWBQA7iM": {"duration": 74.792, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2-xMWBQA7iM", "annotations": [{"segment": [14.695609984399374, 43.27040717628705], "label": "Using the rowing machine"}]}, "b1uGF3c3Vz4": {"duration": 71.448, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b1uGF3c3Vz4", "annotations": [{"segment": [30.863677067082687, 62.73014508580344], "label": "Clipping cat claws"}]}, "oO1g33vi4hg": {"duration": 220.915, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=oO1g33vi4hg", "annotations": [{"segment": [2.5848047620904837, 39.805993336193445], "label": "Snowboarding"}, {"segment": [54.97018127379095, 205.5781387449298], "label": "Snowboarding"}]}, "2zQk4ZEfzf4": {"duration": 37.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2zQk4ZEfzf4", "annotations": [{"segment": [0, 37.940883], "label": "Hammer throw"}]}, "0Y7c4uPB1tE": {"duration": 223.59, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0Y7c4uPB1tE", "annotations": []}, "wIK83guBfM0": {"duration": 44.42, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=wIK83guBfM0", "annotations": [{"segment": [6.473671606864275, 11.73569344773791], "label": "Carving jack-o-lanterns"}]}, "Kl9jdsVmB2A": {"duration": 87.958, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Kl9jdsVmB2A", "annotations": []}, "ZjYttT9itfY": {"duration": 184.04, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ZjYttT9itfY", "annotations": [{"segment": [18.649929797191888, 22.66683775351014], "label": "Shot put"}, {"segment": [32.99602964118565, 39.02139157566303], "label": "Shot put"}, {"segment": [66.56590327613105, 115.62956474258971], "label": "Shot put"}, {"segment": [117.06417472698908, 140.59177847113884], "label": "Shot put"}, {"segment": [148.0517503900156, 159.24170826833074], "label": "Shot put"}]}, "t3UI2gjo8X8": {"duration": 65.69, "subset": "training", "resolution": "630x360", "url": "https://www.youtube.com/watch?v=t3UI2gjo8X8", "annotations": [{"segment": [8.65669398673947, 61.415548361934476], "label": "Playing water polo"}]}, "uFhZhnlYKRw": {"duration": 143.871, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uFhZhnlYKRw", "annotations": [{"segment": [49.602894224648985, 128.6084090078003], "label": "Making a lemonade"}]}, "d_JH9U-UI3c": {"duration": 142.13, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=d_JH9U-UI3c", "annotations": [{"segment": [0.01, 132.26232877342306], "label": "Baton twirling"}]}, "x6Gs4PINiiI": {"duration": 157.53, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x6Gs4PINiiI", "annotations": [{"segment": [1.2287376833073322, 4.914950733229329], "label": "Hopscotch"}, {"segment": [6.389435953198128, 11.304386686427458], "label": "Hopscotch"}, {"segment": [13.516114516380654, 17.93957017628705], "label": "Hopscotch"}, {"segment": [21.871530762870513, 64.14010706864275], "label": "Hopscotch"}]}, "l5HNvNpRTpk": {"duration": 172.46, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=l5HNvNpRTpk", "annotations": [{"segment": [46.27487011861426, 163.57628395631826], "label": "Making a sandwich"}]}, "k4a5XXRYKK4": {"duration": 45.09, "subset": "training", "resolution": "482x360", "url": "https://www.youtube.com/watch?v=k4a5XXRYKK4", "annotations": [{"segment": [1.055221918876755, 44.03792808112325], "label": "Playing lacrosse"}]}, "Dh3bLRYJkiY": {"duration": 59.0, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Dh3bLRYJkiY", "annotations": [{"segment": [5.303249200000001, 27.9395856], "label": "Long jump"}, {"segment": [31.545300800000003, 51.545605200000004], "label": "Long jump"}]}, "1KONAdZGdWU": {"duration": 200.99, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1KONAdZGdWU", "annotations": []}, "VEXswbPrwxE": {"duration": 79.016, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VEXswbPrwxE", "annotations": [{"segment": [0.01, 79.001], "label": "Bullfighting"}]}, "q9nloCMmFuo": {"duration": 120.14, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q9nloCMmFuo", "annotations": []}, "kDj0WKEI-i8": {"duration": 223.03, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kDj0WKEI-i8", "annotations": []}, "GbOJ1Yqxyx0": {"duration": 87.725, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GbOJ1Yqxyx0", "annotations": []}, "xdl0lyO9ub0": {"duration": 222.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xdl0lyO9ub0", "annotations": [{"segment": [43.643569422776906, 172.8423900156006], "label": "Clean and jerk"}]}, "V_rril47Z5Q": {"duration": 164.816, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V_rril47Z5Q", "annotations": [{"segment": [14.141713097095312, 160.70128412636504], "label": "Baton twirling"}]}, "FxpP7lqvm30": {"duration": 97.849, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FxpP7lqvm30", "annotations": []}, "kv8lsSjfguI": {"duration": 120.512, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kv8lsSjfguI", "annotations": [{"segment": [12.215234009360374, 102.79589235569424], "label": "Disc dog"}]}, "Yr6Oyltj6Eo": {"duration": 52.2, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Yr6Oyltj6Eo", "annotations": [{"segment": [0.01, 52.198298], "label": "High jump"}]}, "VBpE3V49xEg": {"duration": 236.403, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VBpE3V49xEg", "annotations": []}, "9elfMU_LRKc": {"duration": 43.86, "subset": "validation", "resolution": "202x360", "url": "https://www.youtube.com/watch?v=9elfMU_LRKc", "annotations": [{"segment": [13.411932336973479, 22.376029970358815], "label": "Fixing bicycle"}]}, "v13d7g_uUXM": {"duration": 167.49, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v13d7g_uUXM", "annotations": [{"segment": [19.093290000000003, 64.4013322], "label": "Clean and jerk"}, {"segment": [73.85586045, 159.47129843447738], "label": "Clean and jerk"}]}, "cIaqen3kVIA": {"duration": 145.4, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cIaqen3kVIA", "annotations": [{"segment": [30.04670046801872, 113.49715912636505], "label": "Playing flauta"}, {"segment": [122.34109360374414, 139.5754274570983], "label": "Playing flauta"}]}, "uzgIBU9arcU": {"duration": 25.333, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uzgIBU9arcU", "annotations": [{"segment": [0.19999999999999998, 24.118146645865835], "label": "Swinging at the playground"}]}, "qXUdnj4VN80": {"duration": 150.489, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qXUdnj4VN80", "annotations": [{"segment": [0.2, 143.16443057722307], "label": "Hula hoop"}]}, "Dod5s-yv0es": {"duration": 84.41, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Dod5s-yv0es", "annotations": []}, "AxaksczuL80": {"duration": 150.79, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=AxaksczuL80", "annotations": [{"segment": [60.45729964164553, 135.73487110109204], "label": "Shaving"}]}, "HURRgbZGz8Q": {"duration": 206.87, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HURRgbZGz8Q", "annotations": []}, "sz1772O9AUM": {"duration": 90.837, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sz1772O9AUM", "annotations": [{"segment": [30.467768261750876, 61.644088198127925], "label": "Hand car wash"}]}, "FDIYymZEFi0": {"duration": 24.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FDIYymZEFi0", "annotations": [{"segment": [4.443451664597196, 18.436426643460297], "label": "Washing hands"}]}, "XkWO7aqcC8Y": {"duration": 154.32, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XkWO7aqcC8Y", "annotations": [{"segment": [39.71351794071763, 138.63628081123247], "label": "Laying tile"}]}, "Ci__IRtoMOo": {"duration": 208.771, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ci__IRtoMOo", "annotations": [{"segment": [0.2, 198.38077781557274], "label": "Riding bumper cars"}]}, "zqxzFxya6T8": {"duration": 42.75, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zqxzFxya6T8", "annotations": []}, "yGTIEmZ7S2w": {"duration": 97.62, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=yGTIEmZ7S2w", "annotations": []}, "2-h36nfbFK8": {"duration": 180.675, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2-h36nfbFK8", "annotations": [{"segment": [0, 106.54438147581902], "label": "Snowboarding"}, {"segment": [133.8851354524181, 166.58129484711387], "label": "Snowboarding"}]}, "7845vSLQoG8": {"duration": 28.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7845vSLQoG8", "annotations": [{"segment": [7.9317841028037375, 23.929789327102803], "label": "Painting fence"}]}, "lOZ9bfzq89o": {"duration": 89.1, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=lOZ9bfzq89o", "annotations": []}, "0yGGccaHMnI": {"duration": 46.069, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=0yGGccaHMnI", "annotations": [{"segment": [0.20000000000000004, 45.86839002267573], "label": "Powerbocking"}]}, "u3XOLSC7lg0": {"duration": 33.67, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u3XOLSC7lg0", "annotations": []}, "84x9A9xyLcE": {"duration": 111.46000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=84x9A9xyLcE", "annotations": [{"segment": [26.416461778471138, 107.5775647425897], "label": "Windsurfing"}]}, "RIvAh7e3XSk": {"duration": 66.665, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RIvAh7e3XSk", "annotations": []}, "Nc71GzZhAHE": {"duration": 213.229, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Nc71GzZhAHE", "annotations": [{"segment": [25.605923556942276, 68.17161466458658], "label": "Skiing"}, {"segment": [111.40239469578785, 206.1775663026521], "label": "Skiing"}]}, "jxAZc0VgG4w": {"duration": 99.89, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=jxAZc0VgG4w", "annotations": [{"segment": [3.7400906321967167, 32.88162971294852], "label": "Triple jump"}, {"segment": [50.64705998439938, 79.47692489859594], "label": "Triple jump"}]}, "zSvsPo5jbn8": {"duration": 113.87, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=zSvsPo5jbn8", "annotations": []}, "0O589B9zr84": {"duration": 114.92, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0O589B9zr84", "annotations": [{"segment": [60.431869119999995, 85.48866926677067], "label": "Hand washing clothes"}]}, "VLg5XqBRorY": {"duration": 185.899, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VLg5XqBRorY", "annotations": [{"segment": [11.018280811232449, 152.226248049922], "label": "Making an omelette"}]}, "r0WjqMOxP-Y": {"duration": 45.77, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r0WjqMOxP-Y", "annotations": []}, "8dhDEj0Dd5s": {"duration": 157.339, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=8dhDEj0Dd5s", "annotations": [{"segment": [0.009999999999999998, 157.281], "label": "Baton twirling"}]}, "kaRZaCGzNzw": {"duration": 151.42000000000002, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=kaRZaCGzNzw", "annotations": [{"segment": [24.59298649620823, 56.904782837329726], "label": "Shaving"}, {"segment": [94.40533479527366, 114.924504734526], "label": "Shaving"}]}, "gPjH9C9wdJw": {"duration": 183.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gPjH9C9wdJw", "annotations": [{"segment": [3.5137692066052795, 178.16110286488686], "label": "Playing guitarra"}]}, "7MWDmMh3zyA": {"duration": 36.525, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7MWDmMh3zyA", "annotations": [{"segment": [1.0957536503087695, 30.774136827878863], "label": "River tubing"}]}, "XqmMZs2-3ZI": {"duration": 104.537, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XqmMZs2-3ZI", "annotations": [{"segment": [30.170279095163803, 76.64881716068643], "label": "Baking cookies"}]}, "B-j1ZCJHuD0": {"duration": 96.62, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=B-j1ZCJHuD0", "annotations": []}, "eUvvxpAK3_8": {"duration": 75.47, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eUvvxpAK3_8", "annotations": [{"segment": [2.6676523999999997, 51.566060480000004], "label": "Polishing shoes"}, {"segment": [26.58672184, 68.36132832], "label": "Cleaning shoes"}, {"segment": [57.90201792, 63.32863416], "label": "Polishing shoes"}]}, "l4LFSd-7hxU": {"duration": 16.556, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=l4LFSd-7hxU", "annotations": [{"segment": [0.2, 15.448670835735838], "label": "Table soccer"}]}, "PJ72Yl0B1rY": {"duration": 206.332, "subset": "validation", "resolution": "358x240", "url": "https://www.youtube.com/watch?v=PJ72Yl0B1rY", "annotations": [{"segment": [10.941581903276132, 24.457653666146648], "label": "Beach soccer"}, {"segment": [35.39923556942278, 67.25854758190327], "label": "Beach soccer"}, {"segment": [72.72933853354134, 88.49808892355695], "label": "Beach soccer"}, {"segment": [93.32525741029642, 95.8997472698908], "label": "Beach soccer"}, {"segment": [99.43967082683308, 123.89732449297972], "label": "Beach soccer"}, {"segment": [132.90803900156007, 133.22985023400935], "label": "Beach soccer"}, {"segment": [99.43967082683308, 100.08329329173168], "label": "Beach soccer"}, {"segment": [115.85204368174726, 118.42653354134166], "label": "Beach soccer"}, {"segment": [120.67921216848674, 123.90933697347893], "label": "Beach soccer"}, {"segment": [131.6335553822153, 133.56460998439937], "label": "Beach soccer"}, {"segment": [137.1048767550702, 139.35777379095163], "label": "Beach soccer"}, {"segment": [142.89804056162248, 144.18541029641185], "label": "Beach soccer"}, {"segment": [147.72567706708267, 151.2659438377535], "label": "Beach soccer"}, {"segment": [157.70279251170047, 166.39253822152884], "label": "Beach soccer"}, {"segment": [168.0017503900156, 173.4730717628705], "label": "Beach soccer"}, {"segment": [180.55360530421217, 183.12834477379096], "label": "Beach soccer"}, {"segment": [187.95598127925115, 195.35835725429015], "label": "Beach soccer"}]}, "4bw6ocN0jGU": {"duration": 162.819, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4bw6ocN0jGU", "annotations": [{"segment": [11.938316887051482, 147.32391052106084], "label": "Waterskiing"}]}, "RZFU1gXLqDs": {"duration": 139.204, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RZFU1gXLqDs", "annotations": [{"segment": [24.030483043560867, 134.87380745471464], "label": "Riding bumper cars"}]}, "7n9It_jjaBg": {"duration": 50.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7n9It_jjaBg", "annotations": [{"segment": [16.91156386901832, 49.44091066029558], "label": "Bungee jumping"}]}, "_vVKdZM5Cy8": {"duration": 107.02000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_vVKdZM5Cy8", "annotations": [{"segment": [3.5156069999999997, 59.13176059999999], "label": "Volleyball"}, {"segment": [100.38137254212168, 106.773333], "label": "Volleyball"}]}, "xbEI98IA-XU": {"duration": 111.201, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xbEI98IA-XU", "annotations": [{"segment": [17.68546333853354, 81.14506708268331], "label": "Kneeling"}]}, "J7JLo0nQ5pA": {"duration": 179.398, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J7JLo0nQ5pA", "annotations": [{"segment": [30.011335820776726, 156.59545997030943], "label": "Making a cake"}]}, "7IfBJgi3WCE": {"duration": 47.067, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7IfBJgi3WCE", "annotations": [{"segment": [14.758870900156007, 45.52487541341654], "label": "Cutting the grass"}]}, "qUFPq8D0jMc": {"duration": 179.281, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qUFPq8D0jMc", "annotations": [{"segment": [0.01, 163.05924999063964], "label": "Rope skipping"}]}, "G8gTBLLf8Bo": {"duration": 102.63, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=G8gTBLLf8Bo", "annotations": [{"segment": [74.29226189079563, 93.02544139860832], "label": "Cleaning windows"}]}, "HEuC9ZEY91E": {"duration": 151.882, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HEuC9ZEY91E", "annotations": [{"segment": [12.55636973478939, 25.823477379095163], "label": "Snow tubing"}, {"segment": [49.9885663026521, 52.35769266770671], "label": "Snow tubing"}, {"segment": [58.04359594383775, 61.360372854914196], "label": "Snow tubing"}, {"segment": [64.20332449297972, 75.33821840873634], "label": "Snow tubing"}, {"segment": [76.99660686427455, 78.89190795631825], "label": "Snow tubing"}]}, "mkK9iEzRrqQ": {"duration": 17.51, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mkK9iEzRrqQ", "annotations": [{"segment": [0.12272152886115446, 15.994705928237131], "label": "Playing field hockey"}]}, "chLoV5zQxVA": {"duration": 195.814, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=chLoV5zQxVA", "annotations": [{"segment": [86.1458868477188, 131.35720334935846], "label": "Kite flying"}, {"segment": [153.65737929953198, 195.20291326521058], "label": "Kite flying"}]}, "32-Bxdbf3mQ": {"duration": 35.41, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=32-Bxdbf3mQ", "annotations": [{"segment": [3.9863057668026243, 35.26000240698954], "label": "Mowing the lawn"}]}, "6kBo1TR--dk": {"duration": 148.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6kBo1TR--dk", "annotations": [{"segment": [1.855850517013878, 28.533701377535102], "label": "Running a marathon"}, {"segment": [30.157570561622464, 50.10796339469579], "label": "Running a marathon"}, {"segment": [72.14618803588144, 113.20688026209048], "label": "Running a marathon"}]}, "tJbScqVWPsU": {"duration": 200.76, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tJbScqVWPsU", "annotations": [{"segment": [14.09131825273011, 161.58044929797194], "label": "Skateboarding"}]}, "t6Hc2tHRrs4": {"duration": 120.419, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=t6Hc2tHRrs4", "annotations": [{"segment": [10.892508580343215, 116.24935881435258], "label": "BMX"}]}, "ptukoEe5jvI": {"duration": 180.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ptukoEe5jvI", "annotations": [{"segment": [6.739469578783152, 171.29485179407175], "label": "Volleyball"}]}, "drzTgrfN19M": {"duration": 197.72, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=drzTgrfN19M", "annotations": [{"segment": [28.994514396255852, 177.05160918564744], "label": "Preparing pasta"}]}, "npzRJiGHc9c": {"duration": 35.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=npzRJiGHc9c", "annotations": [{"segment": [10.468214354134165, 29.96457124648986], "label": "Playing field hockey"}]}, "47QZ84A8H0o": {"duration": 165.4, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=47QZ84A8H0o", "annotations": []}, "eMNvlcxXZj8": {"duration": 109.0, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=eMNvlcxXZj8", "annotations": [{"segment": [18.28590093603744, 104.01700858034322], "label": "Doing step aerobics"}]}, "RfYI6hxUM44": {"duration": 194.955, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RfYI6hxUM44", "annotations": []}, "6FPoGGaox4g": {"duration": 28.003, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6FPoGGaox4g", "annotations": [{"segment": [0.01, 27.524790951638064], "label": "Fun sliding down"}]}, "C6DH4l7fop0": {"duration": 29.35, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=C6DH4l7fop0", "annotations": [{"segment": [2.838759856474259, 29.349114], "label": "Baking cookies"}]}, "k1a56VMhv6Q": {"duration": 227.28, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k1a56VMhv6Q", "annotations": []}, "GyLX64aJY80": {"duration": 192.26, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GyLX64aJY80", "annotations": [{"segment": [4.797836705148206, 172.87340075999998], "label": "Making a sandwich"}]}, "Q1mh4F0L8Ec": {"duration": 222.77, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Q1mh4F0L8Ec", "annotations": [{"segment": [7.381578448844491, 138.5465450710074], "label": "Grooming horse"}, {"segment": [151.7324411864629, 210.02798011804268], "label": "Grooming horse"}]}, "IGamNJ5yIh4": {"duration": 39.591, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=IGamNJ5yIh4", "annotations": [{"segment": [2.0318352860096742, 11.636879261126316], "label": "Beach soccer"}, {"segment": [29.36926813826473, 33.740794562837046], "label": "Beach soccer"}]}, "6ChRD-1NwSg": {"duration": 107.50999999999999, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6ChRD-1NwSg", "annotations": [{"segment": [6.373325409206845, 9.392269024094297], "label": "Vacuuming floor"}, {"segment": [18.784538048188594, 23.648391649951712], "label": "Vacuuming floor"}, {"segment": [49.47713146621103, 52.49607508109848], "label": "Vacuuming floor"}, {"segment": [75.47359037218631, 77.65393853849392], "label": "Vacuuming floor"}, {"segment": [94.42584706396256, 100.96689153198128], "label": "Vacuuming floor"}]}, "SIeV0DPSpHQ": {"duration": 175.799, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SIeV0DPSpHQ", "annotations": [{"segment": [12.979499226241245, 156.73977083839077], "label": "Spread mulch"}]}, "dDN37ufNu84": {"duration": 85.26, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dDN37ufNu84", "annotations": [{"segment": [8.437277691107646, 27.731790750000002], "label": "Ping-pong"}, {"segment": [34.64150427, 81.11744929797192], "label": "Ping-pong"}]}, "AvjExlAl18I": {"duration": 148.79, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=AvjExlAl18I", "annotations": [{"segment": [7.42806704524181, 131.84819005304212], "label": "Tango"}]}, "SA2GomtZkvE": {"duration": 161.54, "subset": "training", "resolution": "600x360", "url": "https://www.youtube.com/watch?v=SA2GomtZkvE", "annotations": [{"segment": [0, 161.481], "label": "Playing accordion"}]}, "B-nlhZ0RR4A": {"duration": 109.74000000000001, "subset": "training", "resolution": "626x360", "url": "https://www.youtube.com/watch?v=B-nlhZ0RR4A", "annotations": [{"segment": [8.788836329999999, 96.55797422542902], "label": "Snatch"}]}, "_3xMhj4mbsk": {"duration": 197.602, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_3xMhj4mbsk", "annotations": [{"segment": [15.078643302180685, 143.0932476635514], "label": "Painting furniture"}]}, "skIP_U4EYDQ": {"duration": 17.857, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=skIP_U4EYDQ", "annotations": [{"segment": [10.501975488299532, 13.17621858346334], "label": "Building sandcastles"}]}, "pnFRC2_HPrE": {"duration": 180.489, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=pnFRC2_HPrE", "annotations": [{"segment": [0.01, 89.81010634805556], "label": "Elliptical trainer"}]}, "5x1diXJ8gME": {"duration": 72.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5x1diXJ8gME", "annotations": [{"segment": [1.1569929811184014, 71.99977082068227], "label": "Playing polo"}]}, "vaqswUFfvMY": {"duration": 11.91, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vaqswUFfvMY", "annotations": [{"segment": [2.5458605273010924, 10.499351809672387], "label": "Clean and jerk"}]}, "W1FmiUTYt3I": {"duration": 121.72, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=W1FmiUTYt3I", "annotations": [{"segment": [67.00391575663026, 85.03613104524182], "label": "Washing hands"}]}, "qMj2sCoRHqY": {"duration": 154.51, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qMj2sCoRHqY", "annotations": [{"segment": [2.8924600998439938, 152.09519358346333], "label": "Playing harmonica"}]}, "4xmQoAK3QNc": {"duration": 90.88, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4xmQoAK3QNc", "annotations": [{"segment": [0.01, 22.827051195007797], "label": "Cricket"}, {"segment": [38.42317312948518, 43.810924343213735], "label": "Cricket"}, {"segment": [47.92262921684867, 65.36192919812792], "label": "Cricket"}, {"segment": [70.74968041185647, 73.72712187207489], "label": "Cricket"}, {"segment": [76.98812918564742, 80.1073535725429], "label": "Cricket"}]}, "KCxa27MM7Cg": {"duration": 37.59, "subset": "training", "resolution": "180x240", "url": "https://www.youtube.com/watch?v=KCxa27MM7Cg", "annotations": [{"segment": [8.797138806552262, 37.5931065], "label": "Doing nails"}]}, "n913aoCh1IQ": {"duration": 78.972, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n913aoCh1IQ", "annotations": [{"segment": [0.01, 25.201963379279913], "label": "Slacklining"}, {"segment": [28.763644139968015, 78.971065], "label": "Slacklining"}]}, "VJyNbaAz6Xs": {"duration": 167.44, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VJyNbaAz6Xs", "annotations": [{"segment": [0.01, 31.248260101227824], "label": "Snowboarding"}, {"segment": [35.41469531118117, 41.14354372486702], "label": "Snowboarding"}, {"segment": [53.64284935472705, 103.64007187416716], "label": "Snowboarding"}, {"segment": [119.00380171087014, 151.03327238738646], "label": "Snowboarding"}]}, "C0MIMsY6okw": {"duration": 217.734, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=C0MIMsY6okw", "annotations": [{"segment": [14.606148414644068, 197.01316399375978], "label": "Making a lemonade"}]}, "eD2SW7ibzmM": {"duration": 202.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eD2SW7ibzmM", "annotations": [{"segment": [17.959090483619345, 195.99404368174726], "label": "Zumba"}]}, "DvTZ5mmF8NM": {"duration": 26.61, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DvTZ5mmF8NM", "annotations": [{"segment": [0, 26.610067], "label": "Preparing pasta"}]}, "6_XA3oKwwzU": {"duration": 187.177, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6_XA3oKwwzU", "annotations": [{"segment": [21.024455410869496, 175.78780774088105], "label": "Baton twirling"}]}, "saShR7NwjMg": {"duration": 193.28, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=saShR7NwjMg", "annotations": [{"segment": [15.676336973478941, 140.1826287051482], "label": "Washing hands"}]}, "hFWd7qM_imU": {"duration": 232.85, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hFWd7qM_imU", "annotations": []}, "18yZR3NDaXM": {"duration": 215.95, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=18yZR3NDaXM", "annotations": []}, "FkQ1V0V_XCs": {"duration": 238.03, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FkQ1V0V_XCs", "annotations": [{"segment": [77.60967366041581, 188.26844280301825], "label": "Doing nails"}]}, "SPjlJjW0kBs": {"duration": 180.071, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SPjlJjW0kBs", "annotations": []}, "xVq_Q3zVUYw": {"duration": 80.25, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xVq_Q3zVUYw", "annotations": []}, "CAG75_XxmEE": {"duration": 29.049, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CAG75_XxmEE", "annotations": [{"segment": [0.2, 27.41748205928237], "label": "Cutting the grass"}]}, "59yYq2zQ_TY": {"duration": 149.142, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=59yYq2zQ_TY", "annotations": []}, "oTKYejnHLtE": {"duration": 57.15, "subset": "training", "resolution": "634x480", "url": "https://www.youtube.com/watch?v=oTKYejnHLtE", "annotations": [{"segment": [9.761699679407176, 54.95970641419657], "label": "Playing saxophone"}]}, "7tDDXbiQ8AI": {"duration": 177.029, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7tDDXbiQ8AI", "annotations": [{"segment": [43.63583714505043, 82.576679154241], "label": "Making a lemonade"}, {"segment": [108.26106430923903, 148.58278724074134], "label": "Making a lemonade"}]}, "mQ9e5RyHE_k": {"duration": 112.408, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=mQ9e5RyHE_k", "annotations": [{"segment": [11.623206497632083, 26.85240743221152], "label": "Playing ten pins"}, {"segment": [45.232477525669466, 47.50810525152617], "label": "Playing ten pins"}, {"segment": [61.51196817987507, 63.43749933252305], "label": "Playing ten pins"}, {"segment": [72.89010680915857, 77.44136226087197], "label": "Playing ten pins"}]}, "TSIz-6s4OeY": {"duration": 60.74, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=TSIz-6s4OeY", "annotations": [{"segment": [4.6319906396255845, 16.54282371294852], "label": "Ironing clothes"}, {"segment": [30.81691731669267, 49.43941029641186], "label": "Ironing clothes"}, {"segment": [50.95189703588144, 59.27057410296412], "label": "Ironing clothes"}]}, "dF8WhhsAux8": {"duration": 236.75, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dF8WhhsAux8", "annotations": []}, "rAuz_Pf3lp8": {"duration": 174.916, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rAuz_Pf3lp8", "annotations": [{"segment": [48.693546797814264, 94.39475662674117], "label": "River tubing"}, {"segment": [96.57100471383292, 97.38709774649233], "label": "River tubing"}, {"segment": [115.61317547588581, 151.79329992378626], "label": "River tubing"}]}, "Wzo3_EYrfAY": {"duration": 41.959, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=Wzo3_EYrfAY", "annotations": [{"segment": [5.760287388455539, 41.75845804988662], "label": "Powerbocking"}]}, "YBrcJxnXuVU": {"duration": 15.65, "subset": "validation", "resolution": "206x360", "url": "https://www.youtube.com/watch?v=YBrcJxnXuVU", "annotations": [{"segment": [5.200351801872075, 12.524790959438377], "label": "Drinking beer"}]}, "LHewj6shKGw": {"duration": 71.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LHewj6shKGw", "annotations": [{"segment": [0, 68.09094383775351], "label": "High jump"}]}, "F8pv5rFrEm4": {"duration": 101.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F8pv5rFrEm4", "annotations": [{"segment": [2.9225285819032765, 90.914335074883], "label": "Doing step aerobics"}]}, "ahOnhEvMgq0": {"duration": 78.39, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ahOnhEvMgq0", "annotations": [{"segment": [5.870120312012481, 28.739130694227768], "label": "Ping-pong"}, {"segment": [27.7931745, 40.968548010920436], "label": "Ping-pong"}]}, "8Da6w-Eg3Ko": {"duration": 226.024, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8Da6w-Eg3Ko", "annotations": [{"segment": [22.20833541341654, 223.14089391575666], "label": "Trimming branches or hedges"}]}, "v6Ui5kgi2OI": {"duration": 82.385, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v6Ui5kgi2OI", "annotations": [{"segment": [24.815996836982322, 73.01279809602904], "label": "Making a cake"}]}, "MYFVsllwDnc": {"duration": 96.688, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MYFVsllwDnc", "annotations": [{"segment": [0.2, 51.43614741029641], "label": "Cutting the grass"}, {"segment": [79.94474524180967, 96.48789], "label": "Cutting the grass"}]}, "SI0GumzxYt4": {"duration": 133.59, "subset": "training", "resolution": "478x360", "url": "https://www.youtube.com/watch?v=SI0GumzxYt4", "annotations": [{"segment": [10.520439365054601, 116.34980961154446], "label": "Sailing"}]}, "iAPv-QSvZF4": {"duration": 36.76, "subset": "validation", "resolution": "482x360", "url": "https://www.youtube.com/watch?v=iAPv-QSvZF4", "annotations": [{"segment": [0, 34.75016430889236], "label": "Using the balance beam"}]}, "ePqfnkzAQjQ": {"duration": 136.32, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=ePqfnkzAQjQ", "annotations": [{"segment": [0.01, 136.32374149659864], "label": "Getting a tattoo"}]}, "chmegNbBTNc": {"duration": 180.33, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=chmegNbBTNc", "annotations": []}, "6HyNydVIji4": {"duration": 224.32999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6HyNydVIji4", "annotations": [{"segment": [8.049199380220108, 219.77813952574104], "label": "Playing flauta"}]}, "dRF1C_sBE5M": {"duration": 227.23, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=dRF1C_sBE5M", "annotations": []}, "oD5lGBt7U7g": {"duration": 211.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oD5lGBt7U7g", "annotations": [{"segment": [40.679662994683056, 179.0234561749817], "label": "Assembling bicycle"}]}, "Zhx6BYVb64g": {"duration": 162.31, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zhx6BYVb64g", "annotations": []}, "E1b_DF716Q4": {"duration": 178.84, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=E1b_DF716Q4", "annotations": [{"segment": [157.35695906396256, 169.6330338845554], "label": "Removing curlers"}]}, "hsI_BHN5h_0": {"duration": 158.686, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hsI_BHN5h_0", "annotations": [{"segment": [83.51489977474748, 89.93394338845465], "label": "Beach soccer"}]}, "cVFVREeD6hM": {"duration": 86.216, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cVFVREeD6hM", "annotations": []}, "p-JFdMUWVvU": {"duration": 36.293, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p-JFdMUWVvU", "annotations": [{"segment": [0.01, 36.261], "label": "Curling"}]}, "GVn7MSMHXxk": {"duration": 56.495, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GVn7MSMHXxk", "annotations": [{"segment": [5.376198314352574, 54.114520737909515], "label": "Disc dog"}]}, "tr1sNwRTMd8": {"duration": 106.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tr1sNwRTMd8", "annotations": [{"segment": [41.071211399999996, 83.11230744], "label": "Triple jump"}]}, "E1AVyl1RwF8": {"duration": 52.04, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E1AVyl1RwF8", "annotations": [{"segment": [1.4612270321372856, 51.22412540436817], "label": "Doing kickboxing"}]}, "5kXIZCs22l8": {"duration": 105.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5kXIZCs22l8", "annotations": [{"segment": [17.14894598439938, 84.42558023088924], "label": "Getting a haircut"}]}, "pOABO1PhzSI": {"duration": 26.75, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=pOABO1PhzSI", "annotations": []}, "g21ESQzeffY": {"duration": 135.54, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g21ESQzeffY", "annotations": []}, "8xYzQMbI5fM": {"duration": 90.744, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8xYzQMbI5fM", "annotations": [{"segment": [0.01, 81.52152262090485], "label": "Fixing the roof"}]}, "a0bj_vVpK9s": {"duration": 50.156, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a0bj_vVpK9s", "annotations": [{"segment": [9.78063612340412, 44.36496453510141], "label": "Snow tubing"}]}, "_eanWnL3FtM": {"duration": 238.795, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_eanWnL3FtM", "annotations": [{"segment": [51.02220492357109, 68.5261730360371], "label": "Rock-paper-scissors"}, {"segment": [83.42316717430602, 87.8922654157867], "label": "Rock-paper-scissors"}, {"segment": [157.53571301219395, 163.49451066750152], "label": "Rock-paper-scissors"}, {"segment": [181.0058096723869, 185.10264898595943], "label": "Rock-paper-scissors"}]}, "1ATBCBUgflA": {"duration": 68.15, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=1ATBCBUgflA", "annotations": [{"segment": [13.488248000000002, 59.23183798361935], "label": "Polishing forniture"}]}, "yXH7GcOduNo": {"duration": 86.59, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=yXH7GcOduNo", "annotations": [{"segment": [17.94515575, 85.71113104524181], "label": "Hammer throw"}]}, "OccEAILrU7M": {"duration": 8.36, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OccEAILrU7M", "annotations": []}, "xSIh6JjAR_Y": {"duration": 233.57, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xSIh6JjAR_Y", "annotations": [{"segment": [40.68395505859809, 214.3172869107791], "label": "Camel ride"}]}, "-BVpaPkee2I": {"duration": 96.39, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-BVpaPkee2I", "annotations": [{"segment": [10.676146433612447, 56.68883387988581], "label": "Spinning"}]}, "dbLtukGqkhI": {"duration": 64.622, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dbLtukGqkhI", "annotations": [{"segment": [0.2, 64.381], "label": "Kite flying"}]}, "w9LRKWLmw0A": {"duration": 144.429, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w9LRKWLmw0A", "annotations": [{"segment": [0.01, 144.42814058956915], "label": "Mooping floor"}]}, "_2txWbQfJrY": {"duration": 142.687, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_2txWbQfJrY", "annotations": []}, "mwjAm0Ofwzc": {"duration": 121.46, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mwjAm0Ofwzc", "annotations": []}, "gTh9bd9yV9E": {"duration": 156.85, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gTh9bd9yV9E", "annotations": [{"segment": [10.397382995319813, 65.19770748829953], "label": "Putting on makeup"}, {"segment": [72.53703666146646, 152.0464360374415], "label": "Putting on makeup"}]}, "wrn5qM9aVeo": {"duration": 168.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wrn5qM9aVeo", "annotations": [{"segment": [34.77736661466459, 159.13279875195008], "label": "Preparing pasta"}]}, "JNBWPj42n-g": {"duration": 180.4, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JNBWPj42n-g", "annotations": [{"segment": [106.3753236, 139.89451855000002], "label": "Wrapping presents"}, {"segment": [106.3753236, 139.89451855000002], "label": "Wrapping presents"}]}, "isqM83F_aUk": {"duration": 98.128, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=isqM83F_aUk", "annotations": []}, "BEtftLo6NKQ": {"duration": 173.059, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BEtftLo6NKQ", "annotations": [{"segment": [54.24836349453978, 64.50427301092044], "label": "Mooping floor"}, {"segment": [75.56985959438379, 80.9770670826833], "label": "Mooping floor"}, {"segment": [132.53246645865832, 138.7407082683307], "label": "Mooping floor"}]}, "AQ2-EcbX4dE": {"duration": 14.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AQ2-EcbX4dE", "annotations": [{"segment": [0, 14.976869], "label": "Grooming horse"}]}, "abDf2LeTQfk": {"duration": 36.73, "subset": "testing", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=abDf2LeTQfk", "annotations": []}, "PwTwGnAVFhQ": {"duration": 6.53, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=PwTwGnAVFhQ", "annotations": []}, "23ApAC123aQ": {"duration": 133.747, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=23ApAC123aQ", "annotations": [{"segment": [34.222701697501435, 72.3468281450592], "label": "Hand car wash"}, {"segment": [90.47141306561996, 94.84631287403117], "label": "Hand car wash"}]}, "SB08N4L6Ujw": {"duration": 133.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SB08N4L6Ujw", "annotations": [{"segment": [48.59606864274571, 71.85572542901717], "label": "Doing kickboxing"}]}, "XCJ2StGMgW4": {"duration": 34.76, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=XCJ2StGMgW4", "annotations": [{"segment": [0.010000000000000002, 32.899141649765994], "label": "Playing bagpipes"}]}, "MlnK2sa7mm4": {"duration": 157.53, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MlnK2sa7mm4", "annotations": [{"segment": [29.981203850234007, 43.98881548517941], "label": "Playing harmonica"}, {"segment": [52.09848537909516, 62.66563099843994], "label": "Playing harmonica"}, {"segment": [69.05506788455537, 82.07968922932916], "label": "Playing harmonica"}, {"segment": [94.12132028393137, 97.31603872698908], "label": "Playing harmonica"}, {"segment": [122.13654355382215, 125.08551442433698], "label": "Playing harmonica"}, {"segment": [134.66966975351014, 140.56761149453976], "label": "Playing harmonica"}]}, "9aRUmbcYxUM": {"duration": 160.288, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9aRUmbcYxUM", "annotations": [{"segment": [2.499235569422777, 152.9723120124805], "label": "Arm wrestling"}]}, "VQWUgufKtA0": {"duration": 215.946, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VQWUgufKtA0", "annotations": []}, "qtJhNEdrF8w": {"duration": 116.63, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=qtJhNEdrF8w", "annotations": []}, "gISiybfqigc": {"duration": 91.91, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gISiybfqigc", "annotations": []}, "rF2b7MLtVv4": {"duration": 168.763, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rF2b7MLtVv4", "annotations": []}, "G836MDBhEpg": {"duration": 61.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G836MDBhEpg", "annotations": [{"segment": [1.45518762, 41.938211388455535], "label": "Mixing drinks"}]}, "RHHXOAkuWjg": {"duration": 37.199, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RHHXOAkuWjg", "annotations": []}, "5oy5Yi6fzJU": {"duration": 43.58, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5oy5Yi6fzJU", "annotations": [{"segment": [0.2, 36.92046801872075], "label": "Mowing the lawn"}]}, "IJ2m8Bi3LVE": {"duration": 200.551, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=IJ2m8Bi3LVE", "annotations": [{"segment": [0.01, 183.3428045897036], "label": "Putting in contact lenses"}]}, "SSTom962aPk": {"duration": 85.13, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SSTom962aPk", "annotations": [{"segment": [35.372989859594384, 81.96180577223089], "label": "Getting a haircut"}]}, "NkvwL_M_lDg": {"duration": 158.314, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NkvwL_M_lDg", "annotations": [{"segment": [0.2, 156.06210747663553], "label": "Playing congas"}]}, "_Hi9gQEaT-Q": {"duration": 16.07, "subset": "training", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=_Hi9gQEaT-Q", "annotations": [{"segment": [0, 16.067619], "label": "Walking the dog"}]}, "Ufm19cvrxGs": {"duration": 158.361, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ufm19cvrxGs", "annotations": [{"segment": [59.107961667689366, 118.95477666118288], "label": "Assembling bicycle"}]}, "4MwnHtlaEUY": {"duration": 196.232, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=4MwnHtlaEUY", "annotations": []}, "oNZGC8cjquw": {"duration": 237.8, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oNZGC8cjquw", "annotations": []}, "Fz1NCLAB8bA": {"duration": 122.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Fz1NCLAB8bA", "annotations": [{"segment": [59.07873630421217, 92.53756754446178], "label": "Shaving"}]}, "_EKqIcXdW04": {"duration": 54.08, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_EKqIcXdW04", "annotations": [{"segment": [0.01, 53.23291040881623], "label": "Kneeling"}]}, "SllpW4we_zk": {"duration": 82.199, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SllpW4we_zk", "annotations": []}, "2ShsRSJ9cqE": {"duration": 43.817, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=2ShsRSJ9cqE", "annotations": []}, "m7v8AUpPZes": {"duration": 213.25, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=m7v8AUpPZes", "annotations": []}, "YbD0P8dH6Oc": {"duration": 86.356, "subset": "training", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=YbD0P8dH6Oc", "annotations": [{"segment": [14.772951051120366, 62.58359792586782], "label": "Sumo"}]}, "9-xf_gylOR0": {"duration": 38.128, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9-xf_gylOR0", "annotations": [{"segment": [1.3045055364680866, 1.8974629334769004], "label": "Painting fence"}, {"segment": [4.506475711081877, 8.894360837053883], "label": "Painting fence"}, {"segment": [16.365624700195408, 23.777592818391366], "label": "Painting fence"}]}, "KnI7IWbLWmE": {"duration": 161.87, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KnI7IWbLWmE", "annotations": [{"segment": [1.010086271450858, 84.84724680187207], "label": "Playing bagpipes"}]}, "SCOViAddWJU": {"duration": 121.302, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SCOViAddWJU", "annotations": []}, "6Q8QqcnT05I": {"duration": 237.89, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6Q8QqcnT05I", "annotations": []}, "Mgy7IRETxSk": {"duration": 176.263, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Mgy7IRETxSk", "annotations": [{"segment": [0.01, 176.2622902494331], "label": "Drum corps"}]}, "scapZQe6_Iw": {"duration": 214.88, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=scapZQe6_Iw", "annotations": [{"segment": [2.6817779594383775, 156.54878934204987], "label": "Cricket"}]}, "bOBQLGfEeyg": {"duration": 59.7, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bOBQLGfEeyg", "annotations": [{"segment": [6.418650510140405, 58.88414163650546], "label": "Dodgeball"}]}, "fPbVku8Ssyw": {"duration": 7.059, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=fPbVku8Ssyw", "annotations": [{"segment": [0.2, 6.858412698412698], "label": "Building sandcastles"}]}, "YCdPa5gJYmc": {"duration": 30.88, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YCdPa5gJYmc", "annotations": [{"segment": [0, 25.72738106396256], "label": "Hammer throw"}]}, "aRZd3O6-PKw": {"duration": 97.849, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=aRZd3O6-PKw", "annotations": [{"segment": [1.8318048861154448, 80.90471580343214], "label": "Hitting a pinata"}]}, "4X8raZBOzvM": {"duration": 97.28999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4X8raZBOzvM", "annotations": [{"segment": [8.493566302652107, 90.09247113884555], "label": "Playing bagpipes"}]}, "u1ImXpBa4nM": {"duration": 149.235, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=u1ImXpBa4nM", "annotations": []}, "l0Sz7dqLzqs": {"duration": 146.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=l0Sz7dqLzqs", "annotations": [{"segment": [10.738950078003121, 21.477900156006243], "label": "Plataform diving"}, {"segment": [23.077318252730112, 90.93834321372856], "label": "Plataform diving"}, {"segment": [103.73368798751952, 109.902872074883], "label": "Plataform diving"}]}, "-G8N5FxjZbw": {"duration": 116.61, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-G8N5FxjZbw", "annotations": [{"segment": [0.01, 110.65333561700469], "label": "Playing bagpipes"}]}, "MbjyQGdTtK8": {"duration": 129.99, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MbjyQGdTtK8", "annotations": []}, "PT4x_Y5lu_g": {"duration": 147.94, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PT4x_Y5lu_g", "annotations": [{"segment": [27.69441455209229, 122.77857118094248], "label": "Springboard diving"}]}, "4ZyPBOe4P0U": {"duration": 126.36, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4ZyPBOe4P0U", "annotations": [{"segment": [53.029087126365056, 83.58488082371295], "label": "Playing badminton"}]}, "sRgBK2_nb0I": {"duration": 53.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sRgBK2_nb0I", "annotations": [{"segment": [27.48716144, 48.2157768], "label": "Using the pommel horse"}]}, "eNq-jGrQuV0": {"duration": 171.526, "subset": "training", "resolution": "854x474", "url": "https://www.youtube.com/watch?v=eNq-jGrQuV0", "annotations": [{"segment": [21.94245867249656, 157.87866514820593], "label": "Hurling"}]}, "AJ_a4fE-rR0": {"duration": 121.046, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AJ_a4fE-rR0", "annotations": [{"segment": [11.295078496097025, 115.96283251164911], "label": "Surfing"}]}, "lhKZvm9ukbs": {"duration": 199.831, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lhKZvm9ukbs", "annotations": []}, "epqnpUCVLK8": {"duration": 63.321, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=epqnpUCVLK8", "annotations": [{"segment": [0.2, 4.662537066592864], "label": "Doing crunches"}]}, "DcHywvflgqE": {"duration": 159.48, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DcHywvflgqE", "annotations": []}, "e6mpdQ3BFhA": {"duration": 175.265, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=e6mpdQ3BFhA", "annotations": [{"segment": [17.198791722296395, 35.762566914616315], "label": "Elliptical trainer"}, {"segment": [57.87529912899739, 70.43314705321382], "label": "Elliptical trainer"}, {"segment": [98.8248032297031, 102.10076355775955], "label": "Elliptical trainer"}, {"segment": [146.053231292517, 158.61107781308414], "label": "Elliptical trainer"}]}, "AdFg_21LRg4": {"duration": 207.122, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=AdFg_21LRg4", "annotations": [{"segment": [130.97041433021806, 160.64843925233643], "label": "Bullfighting"}]}, "ObqrS-ZONKM": {"duration": 127.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ObqrS-ZONKM", "annotations": [{"segment": [0.01, 0.02], "label": "Cumbia"}]}, "UfYMJBu4HI0": {"duration": 120.187, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UfYMJBu4HI0", "annotations": [{"segment": [47.04391791453972, 56.22779033610324], "label": "Laying tile"}, {"segment": [71.40929351794072, 76.09494269890796], "label": "Laying tile"}]}, "T1F9ddqt0I0": {"duration": 66.804, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T1F9ddqt0I0", "annotations": []}, "lsIiSTG2pn4": {"duration": 41.61, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lsIiSTG2pn4", "annotations": [{"segment": [2.3341591263650545, 38.059761310452416], "label": "Volleyball"}]}, "GR6Ul2pD8_Y": {"duration": 203.802, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=GR6Ul2pD8_Y", "annotations": [{"segment": [118.59278380655225, 145.61794901716067], "label": "Blowing leaves"}, {"segment": [147.52560773790952, 174.55077294851793], "label": "Blowing leaves"}]}, "WP8YccCA2Jc": {"duration": 73.793, "subset": "training", "resolution": "614x480", "url": "https://www.youtube.com/watch?v=WP8YccCA2Jc", "annotations": [{"segment": [31.414591263650543, 61.56339313572543], "label": "Removing ice from car"}]}, "FGFPyp9nJug": {"duration": 229.72, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FGFPyp9nJug", "annotations": [{"segment": [9.317613510140406, 193.51966521060842], "label": "Walking the dog"}]}, "wS_T-RMSSCE": {"duration": 60.28, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wS_T-RMSSCE", "annotations": [{"segment": [0, 10.344294290171606], "label": "Playing water polo"}, {"segment": [15.140285279251168, 23.321681672386894], "label": "Playing water polo"}]}, "yG4C_s7ItA4": {"duration": 114.47999999999999, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yG4C_s7ItA4", "annotations": [{"segment": [21.241964118564745, 23.919522620904836], "label": "Shaving legs"}, {"segment": [60.155814352574104, 62.8333728549142], "label": "Shaving legs"}, {"segment": [69.2716193447738, 74.44913728549142], "label": "Shaving legs"}, {"segment": [84.80417316692667, 88.19634009360374], "label": "Shaving legs"}]}, "rM02KWikOo8": {"duration": 59.119, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rM02KWikOo8", "annotations": []}, "9K97gmkVbyQ": {"duration": 175.96, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9K97gmkVbyQ", "annotations": [{"segment": [22.394429200000005, 164.59594352574103], "label": "Wrapping presents"}]}, "91ozOcjKl5M": {"duration": 186.78, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=91ozOcjKl5M", "annotations": [{"segment": [110.70883432000001, 143.86432963], "label": "Preparing pasta"}]}, "2nDbYhgdw2A": {"duration": 163.4, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2nDbYhgdw2A", "annotations": []}, "VGvjsCblFY0": {"duration": 88.59, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VGvjsCblFY0", "annotations": [{"segment": [0, 7.4625775132605305], "label": "Tumbling"}, {"segment": [9.95010335101404, 15.616134425897036], "label": "Tumbling"}]}, "5aTek77vxBA": {"duration": 43.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5aTek77vxBA", "annotations": [{"segment": [0.01, 2.9127971918876754], "label": "Hand washing clothes"}, {"segment": [31.092416536661464, 43.221], "label": "Hand washing clothes"}]}, "Byd_QaPB50k": {"duration": 66.69, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Byd_QaPB50k", "annotations": []}, "TF591ByrGYk": {"duration": 51.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TF591ByrGYk", "annotations": [{"segment": [18.00256901, 37.36897683], "label": "Using parallel bars"}, {"segment": [6.21292871, 13.789696229999997], "label": "Using parallel bars"}]}, "WjFaS7tSmPw": {"duration": 189.87, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WjFaS7tSmPw", "annotations": []}, "mY7QdIn51S8": {"duration": 227.79, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mY7QdIn51S8", "annotations": []}, "lKKimizxQJM": {"duration": 39.45, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lKKimizxQJM", "annotations": [{"segment": [3.659203588143526, 36.49978705148206], "label": "Hopscotch"}]}, "cHYZPYLwvks": {"duration": 167.463, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cHYZPYLwvks", "annotations": [{"segment": [10.186301092043681, 66.34155070202809], "label": "Scuba diving"}, {"segment": [80.70684711388455, 120.14611544461779], "label": "Scuba diving"}, {"segment": [128.78067550702028, 150.72302184087363], "label": "Scuba diving"}]}, "D5EgASje8MQ": {"duration": 114.17, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D5EgASje8MQ", "annotations": [{"segment": [11.07450793269995, 110.48640358192537], "label": "Playing accordion"}]}, "41xqneu4_RA": {"duration": 100.96000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=41xqneu4_RA", "annotations": [{"segment": [6.767368174726989, 17.626633385335413], "label": "Playing racquetball"}, {"segment": [20.931627145085802, 36.040170046801876], "label": "Playing racquetball"}, {"segment": [38.558260530421215, 55.712751950078], "label": "Playing racquetball"}, {"segment": [58.70298439937597, 78.3755663026521], "label": "Playing racquetball"}, {"segment": [81.36579875195008, 95.05791575663027], "label": "Playing racquetball"}]}, "rC24UspQv14": {"duration": 228.555, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rC24UspQv14", "annotations": [{"segment": [22.106573608424338, 41.360686106084245], "label": "Hitting a pinata"}, {"segment": [52.77053054914197, 58.118895131825276], "label": "Hitting a pinata"}, {"segment": [96.98367776599065, 106.25417637597505], "label": "Hitting a pinata"}, {"segment": [116.23779026365055, 120.51648192979718], "label": "Hitting a pinata"}, {"segment": [130.5000958174727, 131.2132110951638], "label": "Hitting a pinata"}, {"segment": [165.44274442433698, 171.86078192355694], "label": "Hitting a pinata"}, {"segment": [196.46325900390016, 208.94277636349454], "label": "Hitting a pinata"}]}, "9S9hPRDwi24": {"duration": 8.313, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9S9hPRDwi24", "annotations": [{"segment": [1.693575544568621, 7.394849606777019], "label": "Croquet"}]}, "-ntEh6HpeX0": {"duration": 231.085, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-ntEh6HpeX0", "annotations": [{"segment": [21.232739875389406, 226.72247663551403], "label": "Cleaning sink"}]}, "xyMCaug7LXM": {"duration": 54.78, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xyMCaug7LXM", "annotations": [{"segment": [9.849708660000001, 15.616666146645866], "label": "Javelin throw"}, {"segment": [32.09843369734789, 43.53776911076443], "label": "Javelin throw"}]}, "iJahVlx_yDE": {"duration": 99.568, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iJahVlx_yDE", "annotations": [{"segment": [2.17462048049922, 39.76448878627145], "label": "Snow tubing"}, {"segment": [42.24976933541342, 66.63658472386896], "label": "Snow tubing"}, {"segment": [72.38379599375975, 99.566552], "label": "Snow tubing"}]}, "OQEKZXv6jSw": {"duration": 41.1, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=OQEKZXv6jSw", "annotations": []}, "dF2p3CMDe_g": {"duration": 146.89, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=dF2p3CMDe_g", "annotations": [{"segment": [9.624553769110765, 136.80615714664586], "label": "Capoeira"}]}, "M4DcibAnW_E": {"duration": 84.986, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=M4DcibAnW_E", "annotations": [{"segment": [8.485245204368175, 38.44876733229329], "label": "Surfing"}]}, "VZ2CPdIPa30": {"duration": 230.78, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VZ2CPdIPa30", "annotations": [{"segment": [133.93342217784712, 230.783128], "label": "Bungee jumping"}]}, "cJyl4yqFuK0": {"duration": 95.85, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=cJyl4yqFuK0", "annotations": [{"segment": [3.4286063943462297, 19.37908984045878], "label": "Washing face"}, {"segment": [29.813985594735605, 36.52213286534214], "label": "Washing face"}, {"segment": [64.24914158384914, 76.17473673159408], "label": "Washing face"}]}, "Np43Q2E8GCE": {"duration": 15.581, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Np43Q2E8GCE", "annotations": [{"segment": [0.01, 10.78540717628705], "label": "Installing carpet"}]}, "YNo7-L8VQWw": {"duration": 149.165, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YNo7-L8VQWw", "annotations": [{"segment": [12.098801872074883, 110.05256318252731], "label": "Removing ice from car"}]}, "WaFDgdqY1DM": {"duration": 40.13, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=WaFDgdqY1DM", "annotations": [{"segment": [10.641331669266771, 40.12408], "label": "Drinking coffee"}]}, "R_EnlXwBvtA": {"duration": 187.83, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=R_EnlXwBvtA", "annotations": [{"segment": [1.171675507020281, 185.7105678627145], "label": "Playing squash"}]}, "YH1n8khU-fQ": {"duration": 213.07, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YH1n8khU-fQ", "annotations": []}, "mzbhfWgJ2sU": {"duration": 200.25, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=mzbhfWgJ2sU", "annotations": [{"segment": [67.15010140405616, 83.07873010920437], "label": "Preparing salad"}, {"segment": [109.93877067082683, 123.36879095163806], "label": "Preparing salad"}, {"segment": [127.11670358814352, 132.11392043681747], "label": "Preparing salad"}, {"segment": [136.7988112324493, 143.66998439937598], "label": "Preparing salad"}, {"segment": [151.18091107644307, 173.35827613104524], "label": "Preparing salad"}]}, "WSGXoKV9vLs": {"duration": 130.54, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=WSGXoKV9vLs", "annotations": [{"segment": [24.879999780000002, 108.27936731999999], "label": "Mixing drinks"}]}, "y7R4iEhqx9A": {"duration": 58.17, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=y7R4iEhqx9A", "annotations": [{"segment": [9.80019739565093, 13.974355545650399], "label": "Drinking beer"}, {"segment": [0.2, 57.965984999999996], "label": "Drinking beer"}, {"segment": [40.01747173946958, 43.46568925897036], "label": "Drinking beer"}, {"segment": [53.084401287051485, 56.441876240249606], "label": "Drinking beer"}]}, "Lx_FcKpTZIw": {"duration": 77.14, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Lx_FcKpTZIw", "annotations": [{"segment": [5.41270670826833, 77.081], "label": "Washing hands"}]}, "zJKedNWsZKY": {"duration": 119.467, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zJKedNWsZKY", "annotations": [{"segment": [10.526187987519501, 111.87567940717629], "label": "Using the rowing machine"}]}, "F6BJoTLozoc": {"duration": 232.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F6BJoTLozoc", "annotations": [{"segment": [88.70384061, 207.3270046801872], "label": "Belly dance"}]}, "3Yx9IEOdD7o": {"duration": 51.9, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3Yx9IEOdD7o", "annotations": []}, "Pf-vJP5Yt5Q": {"duration": 238.098, "subset": "testing", "resolution": "1916x1080", "url": "https://www.youtube.com/watch?v=Pf-vJP5Yt5Q", "annotations": []}, "lO1r4df9uUc": {"duration": 95.133, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lO1r4df9uUc", "annotations": [{"segment": [0.19999999999999998, 91.71867481123245], "label": "Waterskiing"}]}, "Em4ekFj4XIs": {"duration": 166.325, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Em4ekFj4XIs", "annotations": []}, "5WCgpt2bEJw": {"duration": 16.069, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5WCgpt2bEJw", "annotations": [{"segment": [3.960650087363495, 11.405669555382216], "label": "Playing rubik cube"}]}, "D5iU1Qfy-wk": {"duration": 184.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D5iU1Qfy-wk", "annotations": [{"segment": [109.65452037441499, 181.795652199688], "label": "Tai chi"}]}, "nhky9RGjzwc": {"duration": 26.889, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nhky9RGjzwc", "annotations": [{"segment": [0, 26.888707], "label": "Arm wrestling"}]}, "yXzLQU_jS44": {"duration": 182.556, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yXzLQU_jS44", "annotations": [{"segment": [97.08711544461778, 166.29058346333852], "label": "Putting in contact lenses"}]}, "NqYEX8tUjYg": {"duration": 216.527, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NqYEX8tUjYg", "annotations": [{"segment": [28.54355542112841, 34.62383941616168], "label": "Snowboarding"}, {"segment": [56.242626954057755, 61.309530283252144], "label": "Snowboarding"}, {"segment": [132.24617689197365, 138.66425444228653], "label": "Snowboarding"}, {"segment": [188.65769893837754, 193.7246022223089], "label": "Snowboarding"}]}, "uzUVSpklbRs": {"duration": 48.832, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uzUVSpklbRs", "annotations": [{"segment": [0.01, 48.83156462585034], "label": "Camel ride"}]}, "PQkambF3iEk": {"duration": 80.156, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PQkambF3iEk", "annotations": [{"segment": [5.3770314948652365, 34.262944874257556], "label": "Changing car wheel"}]}, "ixfgBUWkb1E": {"duration": 47.903, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ixfgBUWkb1E", "annotations": []}, "yRfnDlO7TBw": {"duration": 100.59, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yRfnDlO7TBw", "annotations": []}, "iMATWwGyAUM": {"duration": 178.098, "subset": "validation", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=iMATWwGyAUM", "annotations": [{"segment": [0.2, 12.5029131777516], "label": "Skiing"}, {"segment": [29.729149111542696, 155.03612239937596], "label": "Skiing"}]}, "IHpBwsyMT9Q": {"duration": 59.327, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IHpBwsyMT9Q", "annotations": [{"segment": [0.01, 59.32698412698413], "label": "Throwing darts"}]}, "LvbM6zv8XVc": {"duration": 184.25, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LvbM6zv8XVc", "annotations": [{"segment": [2.58697825464039, 56.05119551720845], "label": "Cumbia"}]}, "5TMKHLOACYg": {"duration": 151.046, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=5TMKHLOACYg", "annotations": [{"segment": [31.213155226209047, 39.22256864274571], "label": "Applying sunscreen"}]}, "LYrqEz_vttQ": {"duration": 142.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LYrqEz_vttQ", "annotations": [{"segment": [0.01, 131.77921847113885], "label": "Bathing dog"}]}, "DfpUMDpSbS4": {"duration": 235.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DfpUMDpSbS4", "annotations": [{"segment": [69.87761310452419, 73.18760530421217], "label": "Snatch"}, {"segment": [78.33648205928237, 84.22091263650546], "label": "Snatch"}, {"segment": [128.35414196567862, 182.41734789391577], "label": "Snatch"}]}, "QKEFacWrn_8": {"duration": 134.537, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QKEFacWrn_8", "annotations": [{"segment": [21.198359316692667, 35.68040677067083], "label": "Playing ten pins"}, {"segment": [46.17464405616224, 53.31072541029642], "label": "Playing ten pins"}, {"segment": [74.08931523556943, 82.48470506396256], "label": "Playing ten pins"}, {"segment": [102.21387116068644, 112.2883389547582], "label": "Playing ten pins"}]}, "sQwx_m8Vghw": {"duration": 25.96, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sQwx_m8Vghw", "annotations": [{"segment": [0, 24.38044401872075], "label": "Fixing bicycle"}]}, "wHxyzVcKq0c": {"duration": 165.77, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wHxyzVcKq0c", "annotations": [{"segment": [16.29225751170047, 153.0954991575663], "label": "Cheerleading"}]}, "tJ9wPza9ZqE": {"duration": 55.357, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tJ9wPza9ZqE", "annotations": []}, "15Yf7NIDLtM": {"duration": 155.296, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=15Yf7NIDLtM", "annotations": [{"segment": [7.254252336448599, 134.92909345794394], "label": "Decorating the Christmas tree"}]}, "2AeQkiArbdk": {"duration": 53.267, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2AeQkiArbdk", "annotations": []}, "ecVUPpBNpdA": {"duration": 84.8, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=ecVUPpBNpdA", "annotations": [{"segment": [0.01, 64.39720280811233], "label": "Arm wrestling"}]}, "7OM1UcqHPwE": {"duration": 57.98, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7OM1UcqHPwE", "annotations": []}, "6Xsfwn5M6qs": {"duration": 91.44, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6Xsfwn5M6qs", "annotations": []}, "l9HcwQPNvWo": {"duration": 196.63, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=l9HcwQPNvWo", "annotations": [{"segment": [88.80023412000001, 130.82875920206874], "label": "Playing harmonica"}, {"segment": [145.24599644121818, 190.64495625811426], "label": "Playing harmonica"}]}, "SjlILJ2st-I": {"duration": 80.69, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SjlILJ2st-I", "annotations": []}, "PV5Y3NeR5yQ": {"duration": 191.24, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=PV5Y3NeR5yQ", "annotations": [{"segment": [18.251976180983817, 70.083255134933], "label": "Hula hoop"}, {"segment": [84.67941989782673, 149.9153387962393], "label": "Hula hoop"}]}, "edbczrXeEOk": {"duration": 86.53999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=edbczrXeEOk", "annotations": [{"segment": [50.5194558, 83.86210056084244], "label": "Springboard diving"}, {"segment": [8.16356731123245, 46.0340876], "label": "Springboard diving"}]}, "BSdXxBOJ12A": {"duration": 238.01, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BSdXxBOJ12A", "annotations": [{"segment": [5.569517760302249, 233.5484447486743], "label": "Playing field hockey"}]}, "STwMeZMg_aA": {"duration": 10.124, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=STwMeZMg_aA", "annotations": [{"segment": [3.443073294851794, 6.570268305772231], "label": "Using the rowing machine"}]}, "8s3b1f6OMw0": {"duration": 33.07, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=8s3b1f6OMw0", "annotations": [{"segment": [0, 33.065215], "label": "Playing water polo"}]}, "rVYuVW9tB3U": {"duration": 80.133, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rVYuVW9tB3U", "annotations": [{"segment": [35.228003120124804, 40.22588767550702], "label": "Sumo"}]}, "yVJGQI5MBIk": {"duration": 95.481, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yVJGQI5MBIk", "annotations": [{"segment": [0.19999999999999998, 95.32689342403629], "label": "Capoeira"}]}, "BFxxrjqgF0w": {"duration": 85.543, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BFxxrjqgF0w", "annotations": [{"segment": [30.938521060842433, 69.89473322932918], "label": "Bullfighting"}]}, "CzyMYAvKE2E": {"duration": 44.258, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CzyMYAvKE2E", "annotations": [{"segment": [5.674466804111618, 34.710348239369026], "label": "Carving jack-o-lanterns"}]}, "6SWx37_Byzs": {"duration": 33.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6SWx37_Byzs", "annotations": [{"segment": [1.5548478939157564, 8.7021707], "label": "Cumbia"}, {"segment": [10.91588376, 22.426704368174725], "label": "Cumbia"}]}, "sqv3M6MPKjQ": {"duration": 79.831, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sqv3M6MPKjQ", "annotations": []}, "sk_pnoqXxhs": {"duration": 93.93, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sk_pnoqXxhs", "annotations": []}, "QgzpNg0Ponc": {"duration": 143.62, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=QgzpNg0Ponc", "annotations": [{"segment": [0.01, 141.51215522620905], "label": "Skateboarding"}]}, "eSyrFLhr3b0": {"duration": 127.99, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eSyrFLhr3b0", "annotations": [{"segment": [9.480856684867396, 119.45879422932917], "label": "Preparing salad"}]}, "-YVhOi8vt_Q": {"duration": 21.967, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-YVhOi8vt_Q", "annotations": [{"segment": [0.20000000000000004, 14.034024960998439], "label": "Playing rubik cube"}]}, "1RVu0qNtWCc": {"duration": 136.56, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1RVu0qNtWCc", "annotations": [{"segment": [15.977758498095024, 101.83158082785896], "label": "Shaving"}]}, "T9JSNxOyUQA": {"duration": 228.09, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=T9JSNxOyUQA", "annotations": [{"segment": [37.091205195938, 140.8328887104653], "label": "River tubing"}, {"segment": [152.9123998046226, 227.8895918367347], "label": "River tubing"}]}, "C1vhQIJtLsw": {"duration": 36.572, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=C1vhQIJtLsw", "annotations": [{"segment": [12.02382449297972, 18.845852574102963], "label": "Powerbocking"}]}, "259K1wC2IMk": {"duration": 119.049, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=259K1wC2IMk", "annotations": [{"segment": [5.38597896568924, 54.60268262714508], "label": "Mooping floor"}, {"segment": [64.63174678315133, 119.048706], "label": "Mooping floor"}]}, "hFtmkU7wdx4": {"duration": 87.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hFtmkU7wdx4", "annotations": [{"segment": [26.872677847113888, 30.29159360374415], "label": "Triple jump"}, {"segment": [56.15140483619344, 60.528615444617785], "label": "Triple jump"}]}, "V2KVxpDERCk": {"duration": 160.73, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=V2KVxpDERCk", "annotations": []}, "xS-F-9E3xKk": {"duration": 71.564, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=xS-F-9E3xKk", "annotations": [{"segment": [1.3364672897196261, 66.50784579439252], "label": "Arm wrestling"}]}, "uJbM9KQBXak": {"duration": 190.985, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uJbM9KQBXak", "annotations": [{"segment": [0.2, 184.36832917316693], "label": "Slacklining"}]}, "JhiX65Okpt0": {"duration": 182.068, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JhiX65Okpt0", "annotations": [{"segment": [0.2, 147.18554064953273], "label": "Capoeira"}]}, "VFUk9WX8y5w": {"duration": 157.547, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VFUk9WX8y5w", "annotations": [{"segment": [4.5153630112110275, 8.196365160743738], "label": "Longboarding"}, {"segment": [16.53997003301788, 28.319176911522554], "label": "Longboarding"}, {"segment": [45.49718694267521, 59.73039525420169], "label": "Longboarding"}, {"segment": [62.1843966872235, 74.20900370903036], "label": "Longboarding"}, {"segment": [92.12321417008954, 134.822839104669], "label": "Longboarding"}, {"segment": [146.84744612647583, 157.34689200000003], "label": "Longboarding"}]}, "ButnmRRcOOM": {"duration": 42.563, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ButnmRRcOOM", "annotations": []}, "MSjSXzk2k50": {"duration": 140.621, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MSjSXzk2k50", "annotations": [{"segment": [14.474798751950077, 15.571374414976598], "label": "Mooping floor"}, {"segment": [122.37784399375974, 126.34348829953198], "label": "Mooping floor"}]}, "vw64k9rIi_g": {"duration": 15.4, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=vw64k9rIi_g", "annotations": [{"segment": [10.807601560062402, 14.025865135725429], "label": "Washing face"}]}, "Q9Fw56ZvXq4": {"duration": 12.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Q9Fw56ZvXq4", "annotations": [{"segment": [0, 12.941], "label": "Washing dishes"}]}, "l5uaH-Qydxk": {"duration": 96.015, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l5uaH-Qydxk", "annotations": [{"segment": [0.2, 91.3855272863741], "label": "Using the monkey bar"}]}, "g_cdSUwwk8Y": {"duration": 116.448, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=g_cdSUwwk8Y", "annotations": [{"segment": [39.96657697347894, 92.28645955694228], "label": "Blowing leaves"}]}, "szl1InYab_k": {"duration": 61.301, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=szl1InYab_k", "annotations": [{"segment": [2.103920336973479, 53.17180487987519], "label": "Painting furniture"}]}, "kl9xvnAKfdE": {"duration": 49.74, "subset": "validation", "resolution": "1920x1088", "url": "https://www.youtube.com/watch?v=kl9xvnAKfdE", "annotations": [{"segment": [0.3103721747269891, 49.34917578159126], "label": "Playing field hockey"}]}, "Fe_YqV7TsaA": {"duration": 82.8, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Fe_YqV7TsaA", "annotations": []}, "2UjVfKEcj4g": {"duration": 152.95, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2UjVfKEcj4g", "annotations": [{"segment": [27.65559438377535, 152.801], "label": "Cleaning shoes"}]}, "2q_4I3ae0J4": {"duration": 122.973, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2q_4I3ae0J4", "annotations": [{"segment": [0.2, 25.121140405616224], "label": "Hurling"}, {"segment": [39.126308892355695, 114.69378783151326], "label": "Hurling"}, {"segment": [117.37892667706708, 121.40663494539783], "label": "Hurling"}]}, "sW_iLVVMF4w": {"duration": 168.995, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sW_iLVVMF4w", "annotations": [{"segment": [0.01, 168.941], "label": "Blowing leaves"}]}, "6RZsrNf3SgM": {"duration": 211.093, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6RZsrNf3SgM", "annotations": []}, "Ko7Nt8UDP4Q": {"duration": 136.35, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ko7Nt8UDP4Q", "annotations": []}, "tQT-iQIv-KY": {"duration": 91.58, "subset": "training", "resolution": "1274x720", "url": "https://www.youtube.com/watch?v=tQT-iQIv-KY", "annotations": [{"segment": [18.99347893915757, 84.68521060842434], "label": "Using the balance beam"}]}, "CfqnHsp6olc": {"duration": 91.65, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=CfqnHsp6olc", "annotations": []}, "Wiu7FVCc6DA": {"duration": 106.77000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Wiu7FVCc6DA", "annotations": [{"segment": [2.998078848673947, 97.60412251794072], "label": "Paintball"}]}, "ZREEgMgSz_o": {"duration": 43.65, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZREEgMgSz_o", "annotations": [{"segment": [11.236863720748829, 38.54584767238689], "label": "Washing face"}]}, "3N1tbv5Z6Kk": {"duration": 146.24, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3N1tbv5Z6Kk", "annotations": [{"segment": [4.676718408736349, 141.78441419656787], "label": "Tango"}]}, "5ssP_EapV9Q": {"duration": 101.797, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5ssP_EapV9Q", "annotations": [{"segment": [0.01, 0.02], "label": "Playing ice hockey"}, {"segment": [5.081873631407841, 101.79628], "label": "Playing ice hockey"}]}, "rmzMfd9ftU8": {"duration": 131.867, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rmzMfd9ftU8", "annotations": [{"segment": [20.559906396255847, 83.47321996879874], "label": "Cleaning sink"}]}, "U7x4eGLBITE": {"duration": 154.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U7x4eGLBITE", "annotations": [{"segment": [35.286218408736346, 65.49702184087363], "label": "Snow tubing"}]}, "NuSM8LfLR90": {"duration": 162.053, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NuSM8LfLR90", "annotations": [{"segment": [11.629321249040439, 153.96210088403535], "label": "Baton twirling"}]}, "D7aa0iuPlKc": {"duration": 125.342, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=D7aa0iuPlKc", "annotations": [{"segment": [4.2884899530790035, 116.17916778141348], "label": "Drum corps"}]}, "01yWsvgvmug": {"duration": 83.267, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=01yWsvgvmug", "annotations": []}, "7CM2OYr-Csw": {"duration": 84.057, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7CM2OYr-Csw", "annotations": []}, "MIQiVsnwcWE": {"duration": 208.68, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MIQiVsnwcWE", "annotations": [{"segment": [4.8823946957878315, 205.7115631825273], "label": "Tai chi"}]}, "CIQ-mnURg9E": {"duration": 176.07999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CIQ-mnURg9E", "annotations": [{"segment": [21.213636479999998, 32.68822623381126], "label": "Preparing salad"}, {"segment": [61.53077840873635, 132.95043191887677], "label": "Preparing salad"}]}, "lBBefqJhytA": {"duration": 213.508, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=lBBefqJhytA", "annotations": []}, "wBODR3P39Nw": {"duration": 134.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wBODR3P39Nw", "annotations": [{"segment": [107.67035101404058, 126.61528081123247], "label": "Playing field hockey"}, {"segment": [8.73571762870515, 107.92221012], "label": "Playing field hockey"}]}, "WGzUnE043iA": {"duration": 84.358, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=WGzUnE043iA", "annotations": [{"segment": [19.201163806552263, 69.58784555382215], "label": "Canoeing"}]}, "VA8SXILXTj0": {"duration": 222.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VA8SXILXTj0", "annotations": [{"segment": [28.752979020403803, 83.83399714948622], "label": "Making a lemonade"}, {"segment": [125.40457686954845, 142.03280875757332], "label": "Making a lemonade"}]}, "_Mz7KEe_mz0": {"duration": 179.212, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_Mz7KEe_mz0", "annotations": [{"segment": [14.232415887850467, 169.11458878504672], "label": "Longboarding"}]}, "gmbC7D6HfuM": {"duration": 141.34, "subset": "testing", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=gmbC7D6HfuM", "annotations": []}, "h9LI_6eBzw4": {"duration": 184.367, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h9LI_6eBzw4", "annotations": [{"segment": [43.076271006845104, 182.93056231464175], "label": "Canoeing"}]}, "APuN4vwgKJ4": {"duration": 209.77, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=APuN4vwgKJ4", "annotations": [{"segment": [18.097420124624378, 191.23157900312907], "label": "Baking cookies"}]}, "IcKNG8J2RjI": {"duration": 27.33, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=IcKNG8J2RjI", "annotations": []}, "UcnzlGqUEyA": {"duration": 59.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UcnzlGqUEyA", "annotations": [{"segment": [22.689191906396257, 35.14964975663027], "label": "Shaving"}]}, "Ke5MPiv-wrY": {"duration": 136.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ke5MPiv-wrY", "annotations": [{"segment": [4.68043279875195, 127.86091418408736], "label": "Tumbling"}]}, "7F-mVWMP2vM": {"duration": 137.393, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7F-mVWMP2vM", "annotations": []}, "xgjRCA0_Hb8": {"duration": 190.915, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xgjRCA0_Hb8", "annotations": []}, "DjyO6zWUiDw": {"duration": 62.07, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=DjyO6zWUiDw", "annotations": [{"segment": [1.7913252730109206, 5.083490639625585], "label": "Washing dishes"}, {"segment": [28.031819812792513, 34.42249375975039], "label": "Washing dishes"}, {"segment": [44.39581825273011, 60.37250312012481], "label": "Washing dishes"}]}, "Wr_Eu2M7U9E": {"duration": 219.058, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Wr_Eu2M7U9E", "annotations": [{"segment": [22.213273502340094, 206.41257223712947], "label": "Hanging wallpaper"}]}, "P5Sg_kACPRM": {"duration": 15.63, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P5Sg_kACPRM", "annotations": [{"segment": [2.94443934, 15.58], "label": "Long jump"}]}, "99xnJSBRzkE": {"duration": 152.25, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=99xnJSBRzkE", "annotations": [{"segment": [11.163654295831698, 23.989980508063862], "label": "Sailing"}, {"segment": [27.07779978137901, 31.35324185212307], "label": "Sailing"}, {"segment": [35.153634803895564, 44.65461718332679], "label": "Sailing"}, {"segment": [54.393124122243805, 59.3811398714452], "label": "Sailing"}, {"segment": [63.18153256162247, 65.55677814664587], "label": "Sailing"}, {"segment": [72.20746578471139, 80.04577621528861], "label": "Sailing"}, {"segment": [87.88408664586582, 92.15952869890796], "label": "Sailing"}, {"segment": [95.48487251794072, 97.86011810296412], "label": "Sailing"}, {"segment": [103.0856583900156, 116.38703366614665], "label": "Sailing"}, {"segment": [119.47485292667707, 130.16345805928236], "label": "Sailing"}, {"segment": [132.53870364430577, 136.10157202184087], "label": "Sailing"}]}, "fjGqWAxenL0": {"duration": 39.173, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fjGqWAxenL0", "annotations": [{"segment": [0.01, 39.141], "label": "Welding"}]}, "pf49xhMRrgQ": {"duration": 174.08, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pf49xhMRrgQ", "annotations": [{"segment": [0.01, 174.079999], "label": "Playing congas"}]}, "x768VAsOQSw": {"duration": 75.372, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x768VAsOQSw", "annotations": [{"segment": [22.5761669160644, 32.45323911388455], "label": "Ice fishing"}]}, "1f8iBakydSI": {"duration": 13.77, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=1f8iBakydSI", "annotations": [{"segment": [0.01, 13.721], "label": "Using the rowing machine"}]}, "BKrUkWSo4Ig": {"duration": 216.57, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=BKrUkWSo4Ig", "annotations": [{"segment": [10.135969469578782, 197.65140465678627], "label": "Playing violin"}]}, "r9xmgB116dE": {"duration": 205.358, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r9xmgB116dE", "annotations": [{"segment": [18.5781872074883, 195.71159282371295], "label": "Longboarding"}]}, "GIy6ZbAooOQ": {"duration": 205.474, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=GIy6ZbAooOQ", "annotations": [{"segment": [25.64410340093604, 195.53628843213727], "label": "Rope skipping"}]}, "V9LQVhq2ePE": {"duration": 117.679, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=V9LQVhq2ePE", "annotations": [{"segment": [8.995721873634945, 80.77791070202808], "label": "Installing carpet"}]}, "0dgmG3h9RLA": {"duration": 78.41, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0dgmG3h9RLA", "annotations": [{"segment": [1.0391084243369735, 6.29577457098284], "label": "Playing bagpipes"}, {"segment": [17.17585101404056, 21.454532761310457], "label": "Playing bagpipes"}]}, "D9v5e1TTwts": {"duration": 210.28, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D9v5e1TTwts", "annotations": []}, "DJyfOeZc2lI": {"duration": 41.239, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DJyfOeZc2lI", "annotations": [{"segment": [8.942529529752617, 19.622096489859594], "label": "Throwing darts"}, {"segment": [26.248574976599066, 36.799472761310454], "label": "Throwing darts"}]}, "VkJ03vm8FJk": {"duration": 91.09, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=VkJ03vm8FJk", "annotations": [{"segment": [35.45899357, 58.552383889999994], "label": "Playing flauta"}]}, "yUbdrBSmUHE": {"duration": 139.343, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=yUbdrBSmUHE", "annotations": [{"segment": [10.64859438377535, 132.58316692667705], "label": "Rope skipping"}]}, "OG7-gtjRaEg": {"duration": 135.141, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OG7-gtjRaEg", "annotations": [{"segment": [50.37307176287052, 56.27451950078003], "label": "Tug of war"}, {"segment": [97.38830265210609, 114.67367238689549], "label": "Tug of war"}]}, "OuEQLjwBIPI": {"duration": 34.32, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=OuEQLjwBIPI", "annotations": [{"segment": [0, 34.318683], "label": "Cricket"}]}, "n832gDqfWl4": {"duration": 55.8, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n832gDqfWl4", "annotations": []}, "j59UZGZtuNg": {"duration": 13.49, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=j59UZGZtuNg", "annotations": [{"segment": [0.48563999999999996, 7.254922], "label": "Springboard diving"}]}, "b-p57jzkrQI": {"duration": 25.333, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=b-p57jzkrQI", "annotations": [{"segment": [0.01, 25.332969], "label": "Making an omelette"}]}, "sxf0x55Cvb4": {"duration": 11.657, "subset": "training", "resolution": "3414x1088", "url": "https://www.youtube.com/watch?v=sxf0x55Cvb4", "annotations": [{"segment": [0.01, 11.656416], "label": "Canoeing"}]}, "U9pnR51t6As": {"duration": 200.691, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U9pnR51t6As", "annotations": [{"segment": [9.815679604717504, 69.52251761094803], "label": "Sumo"}, {"segment": [92.34240857668013, 99.84483848322218], "label": "Sumo"}, {"segment": [117.97571075736548, 134.8561780470851], "label": "Sumo"}, {"segment": [143.60901293805082, 159.23907444988885], "label": "Sumo"}]}, "JgHubY5Vw3Y": {"duration": 143.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JgHubY5Vw3Y", "annotations": [{"segment": [13.33626131045242, 14.681094383775351], "label": "Fixing bicycle"}, {"segment": [49.42261544461779, 58.388169266770674], "label": "Fixing bicycle"}]}, "qcA3mFHfo84": {"duration": 60.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qcA3mFHfo84", "annotations": [{"segment": [6.64848031825273, 51.97051516380655], "label": "Pole vault"}]}, "FKl0077EUoY": {"duration": 56.982, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FKl0077EUoY", "annotations": [{"segment": [12.347580343213728, 27.804263650546023], "label": "Sharpening knives"}]}, "dXONZBWOKHk": {"duration": 58.7, "subset": "validation", "resolution": "482x360", "url": "https://www.youtube.com/watch?v=dXONZBWOKHk", "annotations": [{"segment": [0.01, 58.700044], "label": "Removing curlers"}]}, "8wqlhbw4e30": {"duration": 106.91, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8wqlhbw4e30", "annotations": []}, "78WY5lXk42A": {"duration": 205.451, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=78WY5lXk42A", "annotations": [{"segment": [58.4718132390333, 170.30315935964848], "label": "Hanging wallpaper"}]}, "SKMVLKmgxAw": {"duration": 149.258, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SKMVLKmgxAw", "annotations": [{"segment": [0.009999999999999998, 144.6008362371295], "label": "Playing congas"}]}, "WEjMCo8OfjE": {"duration": 169.553, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WEjMCo8OfjE", "annotations": [{"segment": [11.63226833073323, 38.86235101404056], "label": "Skiing"}, {"segment": [42.827897035881435, 47.850921996879876], "label": "Skiing"}, {"segment": [52.60957722308893, 84.06957566302653], "label": "Skiing"}, {"segment": [94.90873478939159, 120.5668268330733], "label": "Skiing"}, {"segment": [130.87846333853352, 153.08814196567863], "label": "Skiing"}]}, "_zqj6daJo9U": {"duration": 15.09, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=_zqj6daJo9U", "annotations": []}, "FnrvqpjHe6I": {"duration": 191.333, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=FnrvqpjHe6I", "annotations": [{"segment": [14.92452623274999, 191.332425], "label": "Futsal"}]}, "FsMTWgT5t8s": {"duration": 40.1, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FsMTWgT5t8s", "annotations": []}, "BnAG37ecNbA": {"duration": 70.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BnAG37ecNbA", "annotations": [{"segment": [11.893245585023402, 61.448435522620905], "label": "Shaving legs"}]}, "lIqAkmaXnXg": {"duration": 183.856, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lIqAkmaXnXg", "annotations": [{"segment": [12.61691887675507, 172.73082059282373], "label": "Painting furniture"}]}, "_7JooVc_sCQ": {"duration": 207.912, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=_7JooVc_sCQ", "annotations": [{"segment": [51.803364485981305, 179.38640809968848], "label": "BMX"}]}, "fxgbk_Kk4Rw": {"duration": 125.83, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fxgbk_Kk4Rw", "annotations": [{"segment": [25.3228276396362, 81.46491062363582], "label": "Vacuuming floor"}]}, "seScyTd2XSc": {"duration": 99.312, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=seScyTd2XSc", "annotations": [{"segment": [37.028872545378, 83.50862801092043], "label": "Cleaning sink"}]}, "9hR1MHvXGv8": {"duration": 112.52000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9hR1MHvXGv8", "annotations": [{"segment": [46.60871138845554, 72.0635631825273], "label": "Rock climbing"}, {"segment": [92.25189391575664, 103.6626895475819], "label": "Rock climbing"}]}, "Rn6_IwckXg4": {"duration": 31.53, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rn6_IwckXg4", "annotations": []}, "-xDx4qvX3KQ": {"duration": 214.483, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-xDx4qvX3KQ", "annotations": []}, "sqAFVbKbcbM": {"duration": 191.24, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=sqAFVbKbcbM", "annotations": []}, "qbGml8XLhz8": {"duration": 166.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qbGml8XLhz8", "annotations": [{"segment": [9.453651808112324, 157.60403082839315], "label": "Tai chi"}]}, "2wDakH-_GzU": {"duration": 144.707, "subset": "testing", "resolution": "618x360", "url": "https://www.youtube.com/watch?v=2wDakH-_GzU", "annotations": []}, "pLVCuSq560Y": {"duration": 185.853, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pLVCuSq560Y", "annotations": [{"segment": [17.106549834633384, 174.83473814040562], "label": "Beach soccer"}]}, "5eV7gLIMCas": {"duration": 127.501, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5eV7gLIMCas", "annotations": []}, "ZYPKueJon34": {"duration": 24.34, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZYPKueJon34", "annotations": [{"segment": [1.48486068, 8.846251827613106], "label": "Bungee jumping"}]}, "2ahuZDlObAQ": {"duration": 196.4, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2ahuZDlObAQ", "annotations": [{"segment": [19.34854396740447, 182.64358135058205], "label": "Rock climbing"}]}, "7PhDA2-meDc": {"duration": 122.509, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7PhDA2-meDc", "annotations": []}, "taHfD8TFfX4": {"duration": 5.53, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=taHfD8TFfX4", "annotations": [{"segment": [0, 5.526349], "label": "Cricket"}]}, "lfbspF18Wrw": {"duration": 66.967, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lfbspF18Wrw", "annotations": []}, "iYdbam6e8cI": {"duration": 188.384, "subset": "training", "resolution": "384x240", "url": "https://www.youtube.com/watch?v=iYdbam6e8cI", "annotations": [{"segment": [0.19999999999999998, 172.85576487635464], "label": "Rollerblading"}]}, "LkwkGj27pP8": {"duration": 27.911, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LkwkGj27pP8", "annotations": [{"segment": [0, 27.861], "label": "Painting fence"}]}, "H-PhEsI9njQ": {"duration": 60.094, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=H-PhEsI9njQ", "annotations": [{"segment": [4.781209502340094, 24.281044335413416], "label": "Shuffleboard"}, {"segment": [32.0622284274571, 42.74963790327613], "label": "Shuffleboard"}, {"segment": [54.2807902324493, 58.874501322932915], "label": "Shuffleboard"}]}, "-g-qMUjVA-s": {"duration": 62.694, "subset": "validation", "resolution": "160x112", "url": "https://www.youtube.com/watch?v=-g-qMUjVA-s", "annotations": [{"segment": [0.01, 62.641], "label": "Roof shingle removal"}]}, "o3yvGAz5IJ8": {"duration": 207.68, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=o3yvGAz5IJ8", "annotations": [{"segment": [42.73855226209049, 71.55469734789392], "label": "Polishing forniture"}, {"segment": [92.27641965678627, 134.04364118564743], "label": "Polishing forniture"}]}, "pHAIrx_cRFU": {"duration": 145.265, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=pHAIrx_cRFU", "annotations": [{"segment": [0, 64.21916302652106], "label": "Sharpening knives"}, {"segment": [88.9101287051482, 110.2184344773791], "label": "Sharpening knives"}]}, "jQVT4u6NojM": {"duration": 78.298, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jQVT4u6NojM", "annotations": [{"segment": [11.47371918876755, 57.38326053042122], "label": "Arm wrestling"}]}, "YzyCFfrX_4I": {"duration": 18.762, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YzyCFfrX_4I", "annotations": [{"segment": [1.611560062402496, 17.08253666146646], "label": "Rollerblading"}]}, "EInkc1uEX3c": {"duration": 228.183, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EInkc1uEX3c", "annotations": [{"segment": [32.74410608424337, 179.73666926677066], "label": "Laying tile"}, {"segment": [191.8545694227769, 200.39725897035882], "label": "Laying tile"}]}, "kxfOrs5ZWkw": {"duration": 203.64, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kxfOrs5ZWkw", "annotations": [{"segment": [7.9687112913938325, 93.74489009364797], "label": "Making a cake"}, {"segment": [100.09868111603716, 115.66546912089068], "label": "Making a cake"}, {"segment": [119.4777437343242, 165.2250390955264], "label": "Making a cake"}]}, "vgO3mzJGbSI": {"duration": 203.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vgO3mzJGbSI", "annotations": [{"segment": [112.23116537722733, 123.64450422914877], "label": "Getting a piercing"}]}, "BCdt22s9hlU": {"duration": 97.757, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BCdt22s9hlU", "annotations": [{"segment": [5.335780031201248, 97.701], "label": "Putting on shoes"}]}, "I5Q0DcmTs9c": {"duration": 227.02, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=I5Q0DcmTs9c", "annotations": [{"segment": [4.12270136, 227.021495], "label": "Playing harmonica"}]}, "zOcXA1S_VPI": {"duration": 159.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zOcXA1S_VPI", "annotations": [{"segment": [27.178833739469578, 125.84125408], "label": "Layup drill in basketball"}]}, "P-04xkAdWSY": {"duration": 30.186, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P-04xkAdWSY", "annotations": [{"segment": [0.01, 20.595922813118435], "label": "Sumo"}]}, "QGZN8aXpXHU": {"duration": 124.13, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=QGZN8aXpXHU", "annotations": [{"segment": [38.687467394695794, 109.67897006396257], "label": "Getting a haircut"}]}, "AIxxMCcmIgM": {"duration": 83.639, "subset": "training", "resolution": "1270x720", "url": "https://www.youtube.com/watch?v=AIxxMCcmIgM", "annotations": [{"segment": [0.19999999999999998, 76.01646957878314], "label": "Wakeboarding"}]}, "YTXolcHh_qo": {"duration": 135.49, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=YTXolcHh_qo", "annotations": []}, "_R-jA9hOFCE": {"duration": 206.66, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_R-jA9hOFCE", "annotations": [{"segment": [4.835981194349815, 8.05996865724969], "label": "Sailing"}, {"segment": [18.054329792239308, 21.60071600142917], "label": "Sailing"}, {"segment": [25.147102210619035, 42.879033256568356], "label": "Sailing"}, {"segment": [142.8226446064645, 145.07943583049442], "label": "Sailing"}, {"segment": [152.81700574145415, 155.71859445806405], "label": "Sailing"}, {"segment": [164.10096077223088, 169.90413816692666], "label": "Sailing"}]}, "n41Ypwpn-P8": {"duration": 102.53999999999999, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n41Ypwpn-P8", "annotations": [{"segment": [60.62778585288611, 99.4999547192772], "label": "Playing kickball"}]}, "K5R5tbbISAg": {"duration": 43.677, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=K5R5tbbISAg", "annotations": []}, "z-ttrQ38mOc": {"duration": 221.05, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z-ttrQ38mOc", "annotations": [{"segment": [12.409609984399378, 188.55712948517942], "label": "Playing violin"}]}, "BJGACTBaU_Y": {"duration": 14.583, "subset": "training", "resolution": "426x214", "url": "https://www.youtube.com/watch?v=BJGACTBaU_Y", "annotations": [{"segment": [0.2, 14.281], "label": "Hitting a pinata"}]}, "-Rv7tGWehRE": {"duration": 60.071, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-Rv7tGWehRE", "annotations": [{"segment": [0.2, 58.244677570093465], "label": "Powerbocking"}]}, "HQP20PGfwYM": {"duration": 178.005, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HQP20PGfwYM", "annotations": [{"segment": [0.2, 171.07206624705958], "label": "Installing carpet"}]}, "qmar8pDP5co": {"duration": 176.054, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qmar8pDP5co", "annotations": [{"segment": [133.99140093603745, 144.16696567862715], "label": "Sumo"}]}, "i7yqaSTEdAA": {"duration": 194.91, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i7yqaSTEdAA", "annotations": []}, "PtRpSvyZQm4": {"duration": 80.06, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=PtRpSvyZQm4", "annotations": []}, "OKTmKPHpzqo": {"duration": 238.539, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OKTmKPHpzqo", "annotations": []}, "MlkXsUDCoww": {"duration": 51.386, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=MlkXsUDCoww", "annotations": [{"segment": [4.89006442901716, 35.59325584399376], "label": "Cutting the grass"}]}, "dnJJWt0SBTc": {"duration": 210.885, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dnJJWt0SBTc", "annotations": [{"segment": [0.01, 43.822731653908704], "label": "Doing fencing"}, {"segment": [54.99194349191494, 75.68783601527943], "label": "Doing fencing"}, {"segment": [99.66879084393986, 109.19547152929812], "label": "Doing fencing"}, {"segment": [130.8768827442514, 133.50493258848815], "label": "Doing fencing"}, {"segment": [136.4614886632545, 141.717588351728], "label": "Doing fencing"}, {"segment": [182.4523609373978, 199.53468492493676], "label": "Doing fencing"}]}, "1UqVaZCy8gU": {"duration": 32.694, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1UqVaZCy8gU", "annotations": []}, "GG3JgE6hy4g": {"duration": 106.092, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GG3JgE6hy4g", "annotations": [{"segment": [0.01, 106.041], "label": "Roof shingle removal"}]}, "LCbKBMILWDk": {"duration": 97.315, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LCbKBMILWDk", "annotations": []}, "DS02_2l_BOE": {"duration": 63.62, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DS02_2l_BOE", "annotations": []}, "Ygy-zufAejI": {"duration": 58.213, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Ygy-zufAejI", "annotations": [{"segment": [4.714964118564743, 17.777903276131045], "label": "Slacklining"}, {"segment": [22.494489859594385, 41.995761310452416], "label": "Slacklining"}]}, "YYmx8EHIjAE": {"duration": 38.917, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YYmx8EHIjAE", "annotations": [{"segment": [0.01, 38.916643], "label": "Laying tile"}]}, "5MnHirHxqkA": {"duration": 198.16, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=5MnHirHxqkA", "annotations": [{"segment": [13.602184228865754, 175.9009733232867], "label": "Ironing clothes"}]}, "VAKQMtYrl4I": {"duration": 73.03, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VAKQMtYrl4I", "annotations": [{"segment": [0.01, 73.02657596371883], "label": "Walking the dog"}]}, "-5Yp-vToI2E": {"duration": 116.47, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-5Yp-vToI2E", "annotations": [{"segment": [52.74244264664586, 105.3941065624025], "label": "Ballet"}, {"segment": [3.18198772, 50.369048660000004], "label": "Ballet"}]}, "HUxoNWi1qjs": {"duration": 186.2, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HUxoNWi1qjs", "annotations": []}, "qhsK94t7x7w": {"duration": 235.985, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qhsK94t7x7w", "annotations": [{"segment": [10.306315132605304, 231.9117472698908], "label": "Bullfighting"}]}, "2Mw-3jScc0M": {"duration": 226.209, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2Mw-3jScc0M", "annotations": [{"segment": [103.73980343213728, 165.4896864274571], "label": "Wakeboarding"}]}, "4UIAc9irBLs": {"duration": 63.345, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4UIAc9irBLs", "annotations": [{"segment": [0.01, 62.35560656711983], "label": "Swimming"}]}, "hgTgOjkcvpc": {"duration": 15.697, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hgTgOjkcvpc", "annotations": [{"segment": [0.01, 15.661], "label": "Croquet"}]}, "xcrGhwE7R5E": {"duration": 180.09, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=xcrGhwE7R5E", "annotations": [{"segment": [23.31949962714509, 171.10331654134166], "label": "Preparing pasta"}]}, "qH3HnhEaeok": {"duration": 182.509, "subset": "validation", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=qH3HnhEaeok", "annotations": [{"segment": [21.923820158970358, 180.80033507722308], "label": "Canoeing"}]}, "cdHBwzbNI5Y": {"duration": 26.541, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cdHBwzbNI5Y", "annotations": [{"segment": [1.7804017176287048, 26.08495539781591], "label": "Baton twirling"}]}, "3VjaC7wdMso": {"duration": 51.53, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3VjaC7wdMso", "annotations": []}, "gBTnWp9VLz4": {"duration": 208.144, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gBTnWp9VLz4", "annotations": [{"segment": [22.107822220541884, 208.14367346938775], "label": "Snowboarding"}]}, "_K88UOvtQPk": {"duration": 67.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_K88UOvtQPk", "annotations": [{"segment": [1.3248829953198127, 65.34322932917316], "label": "Playing saxophone"}]}, "z60D7p37Lws": {"duration": 80.759, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=z60D7p37Lws", "annotations": [{"segment": [28.194296411856474, 76.29454914196567], "label": "Hitting a pinata"}]}, "YK1yqJUcnj8": {"duration": 150.094, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YK1yqJUcnj8", "annotations": []}, "2JyEK7sKsrY": {"duration": 119.21000000000001, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2JyEK7sKsrY", "annotations": []}, "c6uw9Qpp1Tg": {"duration": 207.819, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=c6uw9Qpp1Tg", "annotations": []}, "N0mNYZaCJoQ": {"duration": 31.998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N0mNYZaCJoQ", "annotations": [{"segment": [0.2, 30.837015600624024], "label": "Camel ride"}]}, "aN9vCyXMbb8": {"duration": 65.23, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aN9vCyXMbb8", "annotations": [{"segment": [4.578967698907956, 62.78273489391576], "label": "Playing saxophone"}]}, "5C0G3BQ-Nds": {"duration": 213.717, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5C0G3BQ-Nds", "annotations": [{"segment": [0.2, 207.33559750390017], "label": "Table soccer"}]}, "k_bvz0NHKBo": {"duration": 169.042, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k_bvz0NHKBo", "annotations": [{"segment": [8.149731708649744, 19.191309135888137], "label": "Beer pong"}, {"segment": [34.17630707285453, 46.53235800333558], "label": "Beer pong"}, {"segment": [92.27603591618035, 96.48235112655688], "label": "Beer pong"}]}, "PVdd6E1S0Yc": {"duration": 91.12, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=PVdd6E1S0Yc", "annotations": [{"segment": [24.306812696997678, 25.870408835401037], "label": "Washing face"}, {"segment": [27.149714766821965, 29.139746215698967], "label": "Washing face"}, {"segment": [33.97267973440026, 40.653499598487336], "label": "Washing face"}, {"segment": [78.32195173322933, 87.2770932199688], "label": "Washing face"}]}, "15npAlupNU4": {"duration": 122.068, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=15npAlupNU4", "annotations": [{"segment": [0.01, 73.12612138771266], "label": "Removing ice from car"}, {"segment": [91.59808315132605, 95.78760046801872], "label": "Removing ice from car"}, {"segment": [107.02312418096723, 110.07004586583463], "label": "Removing ice from car"}]}, "44r5SAczrOg": {"duration": 53.174, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=44r5SAczrOg", "annotations": [{"segment": [0.2, 49.35779893915757], "label": "Playing rubik cube"}]}, "tbPbyTLkYF0": {"duration": 60.628, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tbPbyTLkYF0", "annotations": []}, "5KYUiMysyb0": {"duration": 22.013, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5KYUiMysyb0", "annotations": [{"segment": [6.440979719188767, 17.042990639625586], "label": "Cleaning sink"}]}, "yNwdhK5UKec": {"duration": 151.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yNwdhK5UKec", "annotations": [{"segment": [0.01, 17.257609984399377], "label": "Playing harmonica"}, {"segment": [26.950240249609983, 142.07977535101406], "label": "Playing harmonica"}]}, "RDCZQ0nWIPc": {"duration": 115.589, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RDCZQ0nWIPc", "annotations": [{"segment": [6.471539453974955, 10.60613585759602], "label": "Rollerblading"}, {"segment": [58.78317158204176, 61.299882409413456], "label": "Rollerblading"}]}, "py4ekPd5G_k": {"duration": 71.633, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=py4ekPd5G_k", "annotations": [{"segment": [23.20835145273062, 70.2945246728972], "label": "River tubing"}]}, "u1grT0O81Qg": {"duration": 159.336, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=u1grT0O81Qg", "annotations": [{"segment": [0.01, 149.8572589703588], "label": "Doing fencing"}]}, "V44vW_JrDow": {"duration": 49.97, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=V44vW_JrDow", "annotations": []}, "FBcsA3MB3TA": {"duration": 200.13, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FBcsA3MB3TA", "annotations": []}, "stVRtmxHVaE": {"duration": 234.29, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=stVRtmxHVaE", "annotations": [{"segment": [78.92626521060843, 181.25356630265213], "label": "Roof shingle removal"}]}, "BQRidRi2V1c": {"duration": 68.035, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BQRidRi2V1c", "annotations": [{"segment": [13.044716068642746, 46.14732449297973], "label": "Rope skipping"}, {"segment": [53.78550234009361, 65.03059750390015], "label": "Rope skipping"}]}, "plhiqYw0P_g": {"duration": 124.923, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=plhiqYw0P_g", "annotations": [{"segment": [13.057480634945398, 112.06046813572543], "label": "Waterskiing"}]}, "gdi_m9LjRAw": {"duration": 104.328, "subset": "training", "resolution": "582x480", "url": "https://www.youtube.com/watch?v=gdi_m9LjRAw", "annotations": [{"segment": [41.925906711594294, 78.0016859813084], "label": "Trimming branches or hedges"}]}, "-UWE4jXuLoo": {"duration": 180.58, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-UWE4jXuLoo", "annotations": [{"segment": [96.70473711999999, 171.0998634945398], "label": "Skateboarding"}, {"segment": [40.79144209, 96.74535491419657], "label": "Skateboarding"}]}, "j6S6eWSeGr0": {"duration": 51.897, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j6S6eWSeGr0", "annotations": [{"segment": [0.01, 51.781], "label": "Painting fence"}]}, "QOlSCBRmfWY": {"duration": 82.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QOlSCBRmfWY", "annotations": [{"segment": [6.195294851794072, 77.73085420904837], "label": "Ballet"}]}, "yeLB4QXA3NQ": {"duration": 165.187, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yeLB4QXA3NQ", "annotations": [{"segment": [10.652230452422097, 16.8274362841978], "label": "Hand car wash"}, {"segment": [31.493550134665085, 93.50290869541274], "label": "Hand car wash"}, {"segment": [144.70565705055293, 164.26047551784265], "label": "Hand car wash"}]}, "GAEavSUmQRk": {"duration": 216.34, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GAEavSUmQRk", "annotations": [{"segment": [57.51628458, 200.17893915756628], "label": "Playing saxophone"}]}, "fMVKdmMVixQ": {"duration": 182.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fMVKdmMVixQ", "annotations": [{"segment": [16.80733489672387, 171.06448483868954], "label": "Tai chi"}]}, "5K6oT9zBjVE": {"duration": 144.777, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5K6oT9zBjVE", "annotations": [{"segment": [0.01, 144.741], "label": "Cleaning sink"}]}, "Kmk0IVi2Xu4": {"duration": 178.144, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Kmk0IVi2Xu4", "annotations": [{"segment": [37.23172230889235, 70.57356318252731], "label": "Playing congas"}, {"segment": [115.3071996879875, 119.21046645865836], "label": "Playing congas"}]}, "jSxu2GOiBO0": {"duration": 218.941, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=jSxu2GOiBO0", "annotations": [{"segment": [18.102762021840874, 64.89669404056163], "label": "Skiing"}, {"segment": [84.02414070514821, 104.51783356006241], "label": "Skiing"}, {"segment": [126.71933415288613, 157.8014349828393], "label": "Skiing"}, {"segment": [198.1056975975039, 208.352544024961], "label": "Skiing"}]}, "LAZHNzFbDNY": {"duration": 106.67, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LAZHNzFbDNY", "annotations": [{"segment": [12.718930577223087, 29.6775046801872], "label": "Discus throw"}, {"segment": [49.62876833073322, 55.94666848673947], "label": "Discus throw"}, {"segment": [72.73898205928238, 77.06175585023401], "label": "Discus throw"}]}, "7YbdzJWLBxs": {"duration": 179.746, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=7YbdzJWLBxs", "annotations": []}, "E15Q3Z9J-Zg": {"duration": 130.473, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E15Q3Z9J-Zg", "annotations": [{"segment": [0.2, 4.261168629193513], "label": "Elliptical trainer"}, {"segment": [22.52331989716571, 111.39912173561432], "label": "Elliptical trainer"}]}, "tO8yDY0x6yA": {"duration": 218.315, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tO8yDY0x6yA", "annotations": []}, "Boa880LnJ3w": {"duration": 108.925, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Boa880LnJ3w", "annotations": [{"segment": [19.70389391575663, 95.479503900156], "label": "Assembling bicycle"}]}, "DVlDRG0SxG8": {"duration": 26.84, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DVlDRG0SxG8", "annotations": [{"segment": [0.6071963803722217, 25.39755887480499], "label": "Kayaking"}]}, "2nPrH4Tv0yc": {"duration": 83.06, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2nPrH4Tv0yc", "annotations": [{"segment": [1.1661781450858035, 83.057799], "label": "Doing kickboxing"}]}, "8PIQD4aLQhI": {"duration": 147.8, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8PIQD4aLQhI", "annotations": []}, "WQzjqCkgZn0": {"duration": 153.693, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WQzjqCkgZn0", "annotations": []}, "qBqUu4_qOnU": {"duration": 16.254, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qBqUu4_qOnU", "annotations": [{"segment": [0.01, 16.253968], "label": "Hitting a pinata"}]}, "dBCiKzkJogg": {"duration": 236.24, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dBCiKzkJogg", "annotations": [{"segment": [0.2, 227.37105607476636], "label": "Beer pong"}]}, "bSaZYR-4t5k": {"duration": 12.75, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bSaZYR-4t5k", "annotations": []}, "UH_z4C6sv3E": {"duration": 54.1, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=UH_z4C6sv3E", "annotations": [{"segment": [15.783410982697811, 54.102494331065756], "label": "Shaving"}]}, "7KEM_rbhASw": {"duration": 191.217, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7KEM_rbhASw", "annotations": [{"segment": [40.54985335413417, 135.08092511700465], "label": "Rafting"}, {"segment": [143.43029797191886, 172.95129485179407], "label": "Rafting"}]}, "K4qFT8_yqJ0": {"duration": 53.708, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K4qFT8_yqJ0", "annotations": []}, "VNUVKrN4ndc": {"duration": 83.71000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VNUVKrN4ndc", "annotations": [{"segment": [27.796666146645865, 69.55691575663026], "label": "Using parallel bars"}]}, "6xAe1YVbxuY": {"duration": 204.8, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6xAe1YVbxuY", "annotations": [{"segment": [82.302976, 196.92734757332292], "label": "Snatch"}, {"segment": [14.514176, 68.182016], "label": "Snatch"}]}, "sHx5UcQQ1kU": {"duration": 197.277, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sHx5UcQQ1kU", "annotations": [{"segment": [71.84259516380655, 93.07231279251171], "label": "Sharpening knives"}, {"segment": [114.30203042121686, 126.3014360374415], "label": "Sharpening knives"}]}, "woI_zP55qWo": {"duration": 27.08, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=woI_zP55qWo", "annotations": []}, "OyV4eki18GE": {"duration": 77.58, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=OyV4eki18GE", "annotations": [{"segment": [3.97039086, 27.367614059999998], "label": "Playing kickball"}, {"segment": [9.863644556162248, 36.731486169266766], "label": "Playing kickball"}]}, "E50d5qFvzOI": {"duration": 164.28, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E50d5qFvzOI", "annotations": [{"segment": [9.349162728549143, 159.96033216380656], "label": "Tango"}]}, "hRMSh7vUmGY": {"duration": 116.57, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=hRMSh7vUmGY", "annotations": [{"segment": [7.819408555934073, 15.093276980058794], "label": "Clean and jerk"}, {"segment": [16.366203954280618, 25.2766927738334], "label": "Clean and jerk"}, {"segment": [28.00439343288017, 38.73334935846413], "label": "Clean and jerk"}, {"segment": [40.733663175098435, 50.00784541585745], "label": "Clean and jerk"}, {"segment": [52.00815923249174, 61.10049476264765], "label": "Clean and jerk"}, {"segment": [64.55558226410689, 74.01161121546903], "label": "Clean and jerk"}, {"segment": [100.379383825273, 113.47234693291732], "label": "Clean and jerk"}]}, "yeQDfh6K6Sc": {"duration": 41.4, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yeQDfh6K6Sc", "annotations": [{"segment": [2.9710674477379095, 41.401179], "label": "Smoking hookah"}]}, "-jl_v7zi17A": {"duration": 214.553, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=-jl_v7zi17A", "annotations": [{"segment": [7.6984473664660875, 19.748191070499963], "label": "Elliptical trainer"}, {"segment": [23.76477230517792, 27.11192333407622], "label": "Elliptical trainer"}, {"segment": [35.8145160092118, 66.94302057796598], "label": "Elliptical trainer"}, {"segment": [73.9720377386524, 111.79484436520318], "label": "Elliptical trainer"}, {"segment": [127.19173909813537, 141.24977341950822], "label": "Elliptical trainer"}, {"segment": [149.61765099175395, 160.66324938711836], "label": "Elliptical trainer"}, {"segment": [172.7129930911522, 175.05599803432136], "label": "Elliptical trainer"}, {"segment": [192.79589840873635, 201.16377594383778], "label": "Elliptical trainer"}]}, "9PqYL4zTnxY": {"duration": 138.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9PqYL4zTnxY", "annotations": [{"segment": [3.890386895475819, 15.779957878315132], "label": "Rope skipping"}, {"segment": [39.3418127925117, 129.04979251170047], "label": "Rope skipping"}]}, "kUlwaBYvyBI": {"duration": 107.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kUlwaBYvyBI", "annotations": [{"segment": [63.1787162915088, 92.42812198202213], "label": "Polishing forniture"}, {"segment": [65.31487563280717, 93.80682504087524], "label": "Painting furniture"}]}, "J3NhT51f0To": {"duration": 193.26, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=J3NhT51f0To", "annotations": [{"segment": [95.81837736973479, 193.143333], "label": "Playing violin"}]}, "J0EWvkkxh5o": {"duration": 53.57, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J0EWvkkxh5o", "annotations": [{"segment": [4.798992429797193, 51.620466397035884], "label": "Plataform diving"}]}, "pp1KvMmpgBk": {"duration": 4.99, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=pp1KvMmpgBk", "annotations": []}, "MVzypK0eMKc": {"duration": 126.062, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MVzypK0eMKc", "annotations": [{"segment": [21.826132605304213, 83.96178939157566], "label": "Making a lemonade"}]}, "RTM6iJxc-G0": {"duration": 31.719, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RTM6iJxc-G0", "annotations": [{"segment": [8.3199193258544, 10.147930087536643], "label": "Drum corps"}, {"segment": [16.175425031461877, 19.63382376977963], "label": "Drum corps"}]}, "9WDvq8LXrxU": {"duration": 20.11, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9WDvq8LXrxU", "annotations": [{"segment": [3.42436563, 18.44584491856474], "label": "Polishing shoes"}]}, "LdDB7xXXHQM": {"duration": 192.08, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LdDB7xXXHQM", "annotations": [{"segment": [71.61628096688494, 112.6682913956014], "label": "Doing kickboxing"}, {"segment": [111.17004283980884, 132.44517105460218], "label": "Doing kickboxing"}, {"segment": [24.27162636973479, 47.04500419812793], "label": "Doing kickboxing"}]}, "dE1NAofn3ks": {"duration": 46.231, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dE1NAofn3ks", "annotations": [{"segment": [4.9668159126365055, 18.211658346333852], "label": "Curling"}, {"segment": [20.65907488299532, 32.32029485179407], "label": "Curling"}, {"segment": [37.215127925117, 46.141], "label": "Curling"}]}, "nsdB1r31-70": {"duration": 73.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nsdB1r31-70", "annotations": [{"segment": [0.2, 67.93761325856158], "label": "Swinging at the playground"}]}, "pS-BoCsMcpQ": {"duration": 131.52, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pS-BoCsMcpQ", "annotations": [{"segment": [5.333082683307333, 109.3281950078003], "label": "Playing water polo"}]}, "7e035QnLp0c": {"duration": 176.17, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=7e035QnLp0c", "annotations": [{"segment": [7.134602249221183, 166.5655235552165], "label": "Slacklining"}]}, "v8XN1ajGeIw": {"duration": 152.28, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=v8XN1ajGeIw", "annotations": [{"segment": [7.364377652106085, 93.59886435257411], "label": "Vacuuming floor"}, {"segment": [109.2778619344774, 143.48658393135727], "label": "Vacuuming floor"}]}, "meNgZdu8tIk": {"duration": 193.14, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=meNgZdu8tIk", "annotations": []}, "CkE-dOzXddY": {"duration": 173.69, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CkE-dOzXddY", "annotations": []}, "Ga-204sSRoc": {"duration": 46.371, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ga-204sSRoc", "annotations": [{"segment": [2.242554895475819, 44.2000335850234], "label": "Fixing the roof"}]}, "6iwKUJUUlaQ": {"duration": 14.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6iwKUJUUlaQ", "annotations": [{"segment": [0.27602531045241807, 12.83517693603744], "label": "Hopscotch"}]}, "k3UfVrIInMY": {"duration": 22.222, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=k3UfVrIInMY", "annotations": []}, "Yi-0wjSu0E0": {"duration": 27.052, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=Yi-0wjSu0E0", "annotations": [{"segment": [1.6849297971918877, 10.66507488299532], "label": "Mooping floor"}]}, "mfjO2ziAT7s": {"duration": 223.887, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mfjO2ziAT7s", "annotations": []}, "pJEOK1DbTfU": {"duration": 16.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pJEOK1DbTfU", "annotations": [{"segment": [0.01, 15.366334633385335], "label": "Using uneven bars"}]}, "YMSMASHyl2Y": {"duration": 154.39, "subset": "training", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=YMSMASHyl2Y", "annotations": [{"segment": [50.56413416536662, 97.51654446177847], "label": "Grooming dog"}, {"segment": [111.73713572542901, 115.3493276131045], "label": "Grooming dog"}, {"segment": [123.53696255850232, 142.56117316692666], "label": "Grooming dog"}]}, "3Nd3Aai1bVA": {"duration": 130.822, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=3Nd3Aai1bVA", "annotations": []}, "dGHCQVdC27c": {"duration": 67.965, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dGHCQVdC27c", "annotations": [{"segment": [0.2, 67.721], "label": "Plastering"}]}, "dB50ZkOlDzY": {"duration": 41.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dB50ZkOlDzY", "annotations": [{"segment": [0.01, 41.821], "label": "Using parallel bars"}]}, "gGEkmrII58w": {"duration": 130.744, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gGEkmrII58w", "annotations": []}, "a2Xwx37YbhE": {"duration": 43.84, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=a2Xwx37YbhE", "annotations": [{"segment": [3.351189042121685, 43.839024], "label": "Disc dog"}]}, "gXdFGYPKClE": {"duration": 67.896, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gXdFGYPKClE", "annotations": [{"segment": [17.457823712948517, 65.08925897035881], "label": "BMX"}]}, "QrsOyEw4_7k": {"duration": 164.514, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=QrsOyEw4_7k", "annotations": [{"segment": [0.01, 164.51337868480726], "label": "Putting in contact lenses"}]}, "xR9VOguQeKM": {"duration": 208.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xR9VOguQeKM", "annotations": [{"segment": [18.849638065522623, 25.02451950078003], "label": "Pole vault"}, {"segment": [33.79935101404056, 38.02426989079563], "label": "Pole vault"}, {"segment": [70.19865210608424, 90.67325897035882], "label": "Pole vault"}, {"segment": [104.32299687987519, 108.54791575663026], "label": "Pole vault"}, {"segment": [117.64774102964118, 135.52239781591263], "label": "Pole vault"}, {"segment": [144.62222308892356, 145.59720436817474], "label": "Pole vault"}, {"segment": [155.34701716068642, 163.1468673946958], "label": "Pole vault"}, {"segment": [169.97173634945398, 174.52164898595944], "label": "Pole vault"}]}, "Le6fNx5IWRI": {"duration": 161.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Le6fNx5IWRI", "annotations": [{"segment": [16.03266214, 90.92500489999999], "label": "Using the pommel horse"}, {"segment": [98.40971474, 124.92937318], "label": "Using the pommel horse"}]}, "Y9d4iPDeigI": {"duration": 46.9, "subset": "testing", "resolution": "342x240", "url": "https://www.youtube.com/watch?v=Y9d4iPDeigI", "annotations": []}, "z2qG-TOSwqw": {"duration": 49.2, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z2qG-TOSwqw", "annotations": [{"segment": [0, 49.20308390022676], "label": "Getting a piercing"}]}, "TUMk0wpBiP0": {"duration": 165.582, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TUMk0wpBiP0", "annotations": [{"segment": [31.514661628705145, 36.68099960062402], "label": "Snowboarding"}, {"segment": [43.397238964118564, 155.76508985335414], "label": "Snowboarding"}]}, "2WeG8QGOfcI": {"duration": 212.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2WeG8QGOfcI", "annotations": [{"segment": [8.961985361934477, 207.12143947581905], "label": "Zumba"}]}, "O7Aalo-T70I": {"duration": 62.741, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O7Aalo-T70I", "annotations": []}, "SjP2LnPSpS8": {"duration": 174.986, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SjP2LnPSpS8", "annotations": []}, "hbipKjS0G7w": {"duration": 122.42, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=hbipKjS0G7w", "annotations": [{"segment": [6.91032675, 119.5509593978159], "label": "Shot put"}]}, "E4PIKUW4v0k": {"duration": 48.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E4PIKUW4v0k", "annotations": [{"segment": [9.03319296, 22.27986816], "label": "Smoking a cigarette"}, {"segment": [25.978444800000005, 40.95296832], "label": "Smoking a cigarette"}]}, "EBw5-KVV_PU": {"duration": 140.3, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EBw5-KVV_PU", "annotations": []}, "LnInBoqM37Y": {"duration": 29.049, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LnInBoqM37Y", "annotations": []}, "3CJW0qBwwx4": {"duration": 126.596, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3CJW0qBwwx4", "annotations": [{"segment": [0, 99.11641497659907], "label": "Hand car wash"}, {"segment": [104.84226365054603, 122.41469578783152], "label": "Hand car wash"}]}, "BhxKjirVF4Y": {"duration": 106.33, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BhxKjirVF4Y", "annotations": [{"segment": [32.11164196567863, 55.510822932917314], "label": "Spinning"}, {"segment": [65.63387285491419, 71.94003510140405], "label": "Spinning"}, {"segment": [80.73547191887675, 88.53519890795631], "label": "Spinning"}]}, "0LJ1mSpqGJg": {"duration": 192.33, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0LJ1mSpqGJg", "annotations": [{"segment": [44.735958000000004, 116.0230725], "label": "Tennis serve with ball bouncing"}]}, "sTtFSpelQk4": {"duration": 209.537, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sTtFSpelQk4", "annotations": [{"segment": [0.2, 192.23809386059722], "label": "Canoeing"}]}, "HsfrvjsaIDU": {"duration": 21.92, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=HsfrvjsaIDU", "annotations": [{"segment": [0.2, 11.05997503900156], "label": "Trimming branches or hedges"}]}, "1KKVWzCwRSA": {"duration": 124.041, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1KKVWzCwRSA", "annotations": [{"segment": [0.3869609984399376, 97.32069110764431], "label": "Snowboarding"}, {"segment": [104.47946957878314, 107.38167706708268], "label": "Snowboarding"}]}, "Vkr3r1Cd0mI": {"duration": 163.748, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Vkr3r1Cd0mI", "annotations": [{"segment": [0, 137.83984399375976], "label": "Blow-drying hair"}]}, "5zCY_vezHaU": {"duration": 168.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5zCY_vezHaU", "annotations": [{"segment": [21.84738689547582, 25.53248829953198], "label": "Skateboarding"}, {"segment": [48.16953978159126, 53.433970358814356], "label": "Skateboarding"}, {"segment": [70.80659126365055, 78.70323712948517], "label": "Skateboarding"}, {"segment": [94.49652886115445, 102.65639625585024], "label": "Skateboarding"}, {"segment": [110.28982059282372, 116.34391575663027], "label": "Skateboarding"}, {"segment": [120.55546021840874, 129.504992199688], "label": "Skateboarding"}, {"segment": [142.40284711388455, 151.08915756630265], "label": "Skateboarding"}]}, "pk7LcugO3zg": {"duration": 161.53, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=pk7LcugO3zg", "annotations": [{"segment": [70.5582527301092, 132.5766177847114], "label": "Drinking coffee"}]}, "RpB5_XYoYhk": {"duration": 63.48, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RpB5_XYoYhk", "annotations": [{"segment": [15.444486739469578, 54.84772854914196], "label": "Wrapping presents"}]}, "m5z6TfYGMt4": {"duration": 84.53999999999999, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=m5z6TfYGMt4", "annotations": []}, "ufgYKCfK2Vk": {"duration": 180.559, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ufgYKCfK2Vk", "annotations": [{"segment": [13.52074967896675, 170.69946311700468], "label": "Baton twirling"}]}, "S5kuckj4Ud4": {"duration": 230.041, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=S5kuckj4Ud4", "annotations": [{"segment": [26.19801325585023, 43.424104163806554], "label": "Changing car wheel"}, {"segment": [79.67067044929797, 166.87775567082682], "label": "Changing car wheel"}]}, "QrKzEkFKu5g": {"duration": 100.728, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QrKzEkFKu5g", "annotations": []}, "cc9iCNPSiKc": {"duration": 189.73, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=cc9iCNPSiKc", "annotations": [{"segment": [0.01, 189.681], "label": "Blowing leaves"}]}, "QgYjZzI6sEU": {"duration": 122.741, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QgYjZzI6sEU", "annotations": []}, "MeGHSbUpM-U": {"duration": 229.39, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=MeGHSbUpM-U", "annotations": [{"segment": [0.01, 229.341], "label": "Sailing"}]}, "fG7iLOObw30": {"duration": 181.28, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fG7iLOObw30", "annotations": [{"segment": [42.13798660842434, 50.622144985959444], "label": "Getting a haircut"}, {"segment": [100.67867941341655, 114.25333281747271], "label": "Getting a haircut"}, {"segment": [138.29178155382218, 143.38227658034322], "label": "Getting a haircut"}, {"segment": [143.09947130109205, 156.10851414664586], "label": "Getting a haircut"}]}, "FutaoYIC4Ok": {"duration": 26.796, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=FutaoYIC4Ok", "annotations": [{"segment": [0.2, 25.916882995319813], "label": "Rollerblading"}]}, "1LrEiYVd3qg": {"duration": 182.77, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1LrEiYVd3qg", "annotations": [{"segment": [75.55776822152886, 96.37179493915755], "label": "Starting a campfire"}]}, "4r0qYLCucEI": {"duration": 213.996, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4r0qYLCucEI", "annotations": [{"segment": [67.99836438001992, 177.66239320858148], "label": "Changing car wheel"}]}, "jCd-VeMUwRg": {"duration": 161.054, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jCd-VeMUwRg", "annotations": [{"segment": [12.060176382565507, 150.75220436817472], "label": "Scuba diving"}]}, "3DnHpUPPSBw": {"duration": 77.37, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3DnHpUPPSBw", "annotations": []}, "w8LDUu66JWI": {"duration": 160.2, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=w8LDUu66JWI", "annotations": [{"segment": [13.495319691107646, 16.99410627769111], "label": "Cheerleading"}, {"segment": [123.20727051326054, 125.45649046177849], "label": "Cheerleading"}]}, "FJZTNJ0LuJ4": {"duration": 146.913, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FJZTNJ0LuJ4", "annotations": [{"segment": [56.61064790346716, 99.92810650858034], "label": "Making a cake"}]}, "75Tc1nPmZFQ": {"duration": 62.253, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=75Tc1nPmZFQ", "annotations": []}, "wW6es5Ew50s": {"duration": 116.17, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wW6es5Ew50s", "annotations": []}, "Pfc7KbwqdYk": {"duration": 37.99, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Pfc7KbwqdYk", "annotations": [{"segment": [30.15747191887676, 36.96435959438378], "label": "Drinking beer"}]}, "YRRcCaxIoR4": {"duration": 97.57, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=YRRcCaxIoR4", "annotations": [{"segment": [10.807313119735674, 90.11168122371154], "label": "Pole vault"}]}, "h8cXVe6N6Oc": {"duration": 4.41, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=h8cXVe6N6Oc", "annotations": [{"segment": [0.6075711399999999, 3.5239479], "label": "Brushing hair"}]}, "Jo3R272P8eM": {"duration": 128.22, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Jo3R272P8eM", "annotations": [{"segment": [0.01, 128.141], "label": "Playing racquetball"}]}, "kuyuhXuMPX4": {"duration": 133.28, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kuyuhXuMPX4", "annotations": [{"segment": [50.30088923556943, 96.86039001560063], "label": "Shoveling snow"}]}, "ANB_IQfi9kA": {"duration": 132.238, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ANB_IQfi9kA", "annotations": [{"segment": [9.902350706273149, 12.79053632893615], "label": "Surfing"}, {"segment": [34.24562952586131, 67.25346484555382], "label": "Surfing"}, {"segment": [80.45659904836194, 92.83453736349453], "label": "Surfing"}, {"segment": [94.48492913884556, 97.16681577379094], "label": "Surfing"}, {"segment": [102.73688801560061, 107.48176436973479], "label": "Surfing"}, {"segment": [109.33845511700467, 127.08016670202808], "label": "Surfing"}]}, "5FM_xJGb-Tk": {"duration": 34.668, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5FM_xJGb-Tk", "annotations": [{"segment": [0.2, 34.641], "label": "Canoeing"}]}, "qWdjYyFfrTM": {"duration": 184.41, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qWdjYyFfrTM", "annotations": [{"segment": [14.45605668, 177.74971918876756], "label": "Playing saxophone"}]}, "fs8yU4pBNm4": {"duration": 21.502, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=fs8yU4pBNm4", "annotations": [{"segment": [0.01, 17.86447748381457], "label": "Doing a powerbomb"}]}, "nQQ-tcG6wBA": {"duration": 192.912, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=nQQ-tcG6wBA", "annotations": [{"segment": [0, 123.37162246489859], "label": "Kneeling"}]}, "FLJzzot6F-s": {"duration": 232.73, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=FLJzzot6F-s", "annotations": [{"segment": [73.70502637248347, 82.78200006367601], "label": "Tai chi"}, {"segment": [90.76973691192545, 228.01357912275674], "label": "Tai chi"}]}, "2ji02dSx1nM": {"duration": 162.656, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2ji02dSx1nM", "annotations": [{"segment": [0.761003120124805, 4.56601872074883], "label": "Surfing"}, {"segment": [5.834357254290172, 7.102695787831514], "label": "Surfing"}, {"segment": [8.371034321372855, 10.146708268330734], "label": "Surfing"}, {"segment": [19.78608112324493, 21.30808736349454], "label": "Surfing"}, {"segment": [25.366770670826835, 29.17178627145086], "label": "Surfing"}, {"segment": [31.70846333853354, 33.991472698907955], "label": "Surfing"}, {"segment": [36.0208143525741, 42.61617472698908], "label": "Surfing"}, {"segment": [44.64551638065523, 53.52388611544462], "label": "Surfing"}, {"segment": [58.34357254290172, 62.40225585023401], "label": "Surfing"}, {"segment": [70.77329017160686, 93.85705148205928], "label": "Surfing"}, {"segment": [103.25545553822154, 117.20889547581903], "label": "Surfing"}, {"segment": [132.43082995319813, 139.53439937597506], "label": "Surfing"}]}, "i_LT0ef1n-Y": {"duration": 83.15, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i_LT0ef1n-Y", "annotations": []}, "ZXlJIrRiXrA": {"duration": 78.414, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZXlJIrRiXrA", "annotations": [{"segment": [17.465999251206192, 57.772151369374335], "label": "Ice fishing"}]}, "OqLrsVv44MY": {"duration": 38.267, "subset": "training", "resolution": "144x192", "url": "https://www.youtube.com/watch?v=OqLrsVv44MY", "annotations": [{"segment": [0.01, 38.26569], "label": "Removing ice from car"}]}, "81w6SClSFMU": {"duration": 115.38, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=81w6SClSFMU", "annotations": [{"segment": [70.10562629281398, 79.26687862926258], "label": "Hula hoop"}]}, "2uUNiV8xmEo": {"duration": 25.287, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=2uUNiV8xmEo", "annotations": []}, "feUoPtDWe1Y": {"duration": 92.28, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=feUoPtDWe1Y", "annotations": [{"segment": [27.809964898595947, 33.41507410296412], "label": "Playing violin"}]}, "ivWTI2J_UnY": {"duration": 128.85, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ivWTI2J_UnY", "annotations": [{"segment": [15.477784216130548, 49.85052578701787], "label": "Using the pommel horse"}, {"segment": [67.53942132917318, 106.53539673946959], "label": "Using the pommel horse"}]}, "e6J_ygZ779A": {"duration": 142.32, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=e6J_ygZ779A", "annotations": [{"segment": [142.281, 142.281], "label": "High jump"}]}, "Et8xkGzQOTA": {"duration": 42.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Et8xkGzQOTA", "annotations": [{"segment": [11.630914652106082, 35.155589485179405], "label": "Breakdancing"}]}, "40datCdG408": {"duration": 90.86, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=40datCdG408", "annotations": []}, "KUejIghF6K4": {"duration": 32.137, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KUejIghF6K4", "annotations": [{"segment": [0.2, 31.93641723356009], "label": "Hitting a pinata"}]}, "Zw4illqWzFI": {"duration": 24.59, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zw4illqWzFI", "annotations": [{"segment": [5.055813009345794, 24.389637188208617], "label": "Playing blackjack"}]}, "JSYv9uYZP2o": {"duration": 194.653, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JSYv9uYZP2o", "annotations": [{"segment": [0.20000000000000004, 129.02562402496102], "label": "Hurling"}]}, "lGWAepvduTI": {"duration": 89.931, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lGWAepvduTI", "annotations": [{"segment": [19.79815879581323, 81.057454825267], "label": "Doing crunches"}]}, "5zPTTiJiXUY": {"duration": 14.21, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5zPTTiJiXUY", "annotations": [{"segment": [10.685465519500779, 13.500930500780031], "label": "Smoking a cigarette"}]}, "FrVitupq7sQ": {"duration": 165.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FrVitupq7sQ", "annotations": [{"segment": [21.122624017885883, 39.669318277493005], "label": "Tennis serve with ball bouncing"}, {"segment": [103.8099692586343, 108.44664282353607], "label": "Tennis serve with ball bouncing"}, {"segment": [129.82685863338534, 133.17556728861155], "label": "Tennis serve with ball bouncing"}, {"segment": [157.38930679563182, 159.7076435569423], "label": "Tennis serve with ball bouncing"}]}, "5F4jcV8dHVs": {"duration": 180.577, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5F4jcV8dHVs", "annotations": [{"segment": [19.380604890932528, 24.436415980531663], "label": "Wakeboarding"}, {"segment": [31.45837582719713, 148.86554446344374], "label": "Wakeboarding"}]}, "qlq13iS0nEA": {"duration": 212.23, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qlq13iS0nEA", "annotations": [{"segment": [7.334668799999999, 200.64201292979718], "label": "Playing guitarra"}]}, "siMXfU6NPlE": {"duration": 51.67, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=siMXfU6NPlE", "annotations": [{"segment": [0.01, 49.50797971918877], "label": "Cumbia"}]}, "xAxax-pfZsk": {"duration": 148.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xAxax-pfZsk", "annotations": [{"segment": [65.43039469578783, 111.45479251170046], "label": "Rollerblading"}]}, "wZEf-Bs__sU": {"duration": 82.78, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wZEf-Bs__sU", "annotations": [{"segment": [34.58983851482059, 69.95407639937598], "label": "Long jump"}]}, "xzQRc682Isc": {"duration": 77.25, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xzQRc682Isc", "annotations": [{"segment": [28.826159126365052, 44.746882995319815], "label": "Hammer throw"}]}, "COThs7Ab2Do": {"duration": 112.687, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=COThs7Ab2Do", "annotations": []}, "E3KLk-55yC0": {"duration": 61.348, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E3KLk-55yC0", "annotations": [{"segment": [1.6262979719188768, 61.120999999999995], "label": "Beach soccer"}]}, "BSsXKG9dFHI": {"duration": 64.738, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BSsXKG9dFHI", "annotations": [{"segment": [3.0298247269890797, 6.261637769110765], "label": "Elliptical trainer"}, {"segment": [18.481930834633385, 59.58655296411857], "label": "Elliptical trainer"}]}, "kiQVjQe0mlY": {"duration": 150.768, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kiQVjQe0mlY", "annotations": []}, "TXgzRTa2C38": {"duration": 62.903, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TXgzRTa2C38", "annotations": [{"segment": [8.63564965455761, 25.808816112324493], "label": "Playing ten pins"}, {"segment": [35.818318939157564, 56.720516018720744], "label": "Playing ten pins"}]}, "s84PG-JfEwc": {"duration": 222.749, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s84PG-JfEwc", "annotations": []}, "PveTDlMybvU": {"duration": 106.37, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PveTDlMybvU", "annotations": [{"segment": [5.642102511311337, 99.89840163806554], "label": "Doing motocross"}]}, "Deo_GsHyw8Y": {"duration": 88.376, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Deo_GsHyw8Y", "annotations": [{"segment": [5.098092043681747, 88.321], "label": "Waxing skis"}]}, "qGID8CHyClA": {"duration": 123.624, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qGID8CHyClA", "annotations": [{"segment": [0, 112.99560998439938], "label": "Tug of war"}]}, "Cz2ut3p7nw0": {"duration": 232.53, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cz2ut3p7nw0", "annotations": [{"segment": [3.9994127999999995, 9.250190603542508], "label": "Brushing teeth"}, {"segment": [24.12304608374811, 169.5868269344774], "label": "Brushing teeth"}]}, "QJfuxpFMn8s": {"duration": 175.96, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QJfuxpFMn8s", "annotations": [{"segment": [33.20817628705148, 167.98111076443058], "label": "Washing dishes"}]}, "Coctj-3Yzic": {"duration": 43.584, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Coctj-3Yzic", "annotations": [{"segment": [1.4920591900311524, 25.037397196261683], "label": "Beer pong"}]}, "uZpVDAd7da8": {"duration": 22.245, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=uZpVDAd7da8", "annotations": [{"segment": [2.533329477379095, 22.244715], "label": "Cutting the grass"}]}, "XPwpA67qHjA": {"duration": 220.102, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=XPwpA67qHjA", "annotations": [{"segment": [25.75295826833073, 123.61419968798751], "label": "Slacklining"}, {"segment": [155.20449516380654, 185.76467230889236], "label": "Slacklining"}, {"segment": [193.6622461778471, 212.8911216848674], "label": "Slacklining"}]}, "F-WmsfI8HG0": {"duration": 235.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F-WmsfI8HG0", "annotations": [{"segment": [12.514649878315133, 38.28010551014041], "label": "Vacuuming floor"}]}, "nVHL9qP11aA": {"duration": 231.991, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nVHL9qP11aA", "annotations": [{"segment": [0.01, 231.05103989873757], "label": "Beer pong"}]}, "ev-RTtbVjFI": {"duration": 143.38, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ev-RTtbVjFI", "annotations": [{"segment": [62.83747425897036, 137.30323556942278], "label": "Cleaning windows"}]}, "td15Nx9J0a4": {"duration": 142.34, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=td15Nx9J0a4", "annotations": [{"segment": [87.09377543999999, 112.06413077999999], "label": "Shot put"}, {"segment": [2.4553305, 76.97212026], "label": "Shot put"}]}, "i9DlXuEewE8": {"duration": 153.65, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=i9DlXuEewE8", "annotations": [{"segment": [8.389412527301092, 116.0135903775351], "label": "Cumbia"}]}, "4A49mkP6HsM": {"duration": 129.2, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4A49mkP6HsM", "annotations": [{"segment": [26.90727185413947, 115.38886244565428], "label": "Playing violin"}]}, "o9ghRI_Iddk": {"duration": 148.07, "subset": "validation", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=o9ghRI_Iddk", "annotations": [{"segment": [39.733725004915364, 121.61245614977517], "label": "Playing violin"}]}, "1ezOsdHncXo": {"duration": 83.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1ezOsdHncXo", "annotations": [{"segment": [39.66410137285491, 64.50293206864275], "label": "Washing hands"}]}, "6H0D8VaIli0": {"duration": 175.427, "subset": "testing", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=6H0D8VaIli0", "annotations": []}, "USWExMIMcik": {"duration": 45.697, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=USWExMIMcik", "annotations": [{"segment": [17.25217244929797, 45.62558077833317], "label": "Shuffleboard"}]}, "fgoXpih2Kws": {"duration": 154.67000000000002, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fgoXpih2Kws", "annotations": [{"segment": [4.46121294851794, 141.432507800312], "label": "Doing karate"}]}, "GOF5guH_kuI": {"duration": 110.55, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GOF5guH_kuI", "annotations": []}, "T3A88zsRuVs": {"duration": 223.748, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=T3A88zsRuVs", "annotations": []}, "D5A6eBnKmD8": {"duration": 170.57, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D5A6eBnKmD8", "annotations": [{"segment": [50.0278813220556, 166.31609482066358], "label": "Doing kickboxing"}]}, "lAZIxuxjogI": {"duration": 156.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lAZIxuxjogI", "annotations": [{"segment": [100.22779950093603, 145.58636569843992], "label": "Bungee jumping"}]}, "lgz1mojrClE": {"duration": 188.9, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lgz1mojrClE", "annotations": []}, "oqWo2Rl8r3s": {"duration": 163.259, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=oqWo2Rl8r3s", "annotations": []}, "wCG7f6naJsQ": {"duration": 235.428, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wCG7f6naJsQ", "annotations": [{"segment": [14.686614664586585, 235.15300000000002], "label": "Assembling bicycle"}]}, "Pf8eE_zup_k": {"duration": 62.83, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Pf8eE_zup_k", "annotations": [{"segment": [9.724404472698907, 56.636104441497665], "label": "Mixing drinks"}]}, "wIcK3bQNqcA": {"duration": 140.504, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wIcK3bQNqcA", "annotations": [{"segment": [50.913557038581374, 133.07449566597634], "label": "Assembling bicycle"}]}, "cAGABdvv5u8": {"duration": 219.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cAGABdvv5u8", "annotations": [{"segment": [19.153659906396257, 90.29582527301093], "label": "Surfing"}, {"segment": [113.55384087363497, 193.9484976599064], "label": "Surfing"}]}, "1y2aqd5HQlU": {"duration": 60.187, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1y2aqd5HQlU", "annotations": [{"segment": [27.04150210296412, 36.33701845085804], "label": "Carving jack-o-lanterns"}, {"segment": [40.56225315444618, 48.3554638299532], "label": "Carving jack-o-lanterns"}]}, "sRTDDlI1-Yo": {"duration": 167.231, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sRTDDlI1-Yo", "annotations": [{"segment": [43.959865997360986, 54.134553082803585], "label": "Swinging at the playground"}]}, "m2hiQ9EOUUI": {"duration": 97.617, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=m2hiQ9EOUUI", "annotations": [{"segment": [0.01, 0.02], "label": "Capoeira"}, {"segment": [15.22881263650546, 27.716438998439937], "label": "Capoeira"}, {"segment": [40.81321786583463, 59.08779302964118], "label": "Capoeira"}, {"segment": [66.54991122152886, 77.36236819344774], "label": "Capoeira"}]}, "nrC41oAR-F0": {"duration": 8.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nrC41oAR-F0", "annotations": [{"segment": [3.446768057722309, 5.514828892355694], "label": "High jump"}]}, "dVCAykynRzs": {"duration": 70.473, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dVCAykynRzs", "annotations": [{"segment": [21.422925117004684, 26.374165366614665], "label": "Rock-paper-scissors"}, {"segment": [30.65996723868955, 62.199073322932925], "label": "Rock-paper-scissors"}]}, "DxhdDYQkQU8": {"duration": 53.64, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DxhdDYQkQU8", "annotations": [{"segment": [13.221195975039, 27.27917650546022], "label": "Skateboarding"}]}, "Pjnuoa4o55c": {"duration": 209.79, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Pjnuoa4o55c", "annotations": []}, "UQtJ2osfP9I": {"duration": 229.0, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UQtJ2osfP9I", "annotations": []}, "b0qGo-HT5iQ": {"duration": 25.96, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b0qGo-HT5iQ", "annotations": []}, "Tp4g0ErB2oQ": {"duration": 21.48, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Tp4g0ErB2oQ", "annotations": [{"segment": [7.337099580000001, 17.009072540000002], "label": "Playing field hockey"}]}, "3EuY86B4uTo": {"duration": 204.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3EuY86B4uTo", "annotations": [{"segment": [57.49709955745168, 98.38392590941733], "label": "Paintball"}, {"segment": [127.4519040190179, 164.1861611950078], "label": "Paintball"}, {"segment": [171.8524410951638, 193.254139149766], "label": "Paintball"}]}, "jIs9DOPuZ7o": {"duration": 122.973, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=jIs9DOPuZ7o", "annotations": []}, "GPR8rtfZr4A": {"duration": 78.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GPR8rtfZr4A", "annotations": [{"segment": [28.035808736349455, 36.568446177847115], "label": "Playing kickball"}, {"segment": [28.279598377535102, 36.81223581903276], "label": "Playing kickball"}, {"segment": [41.688028642745714, 52.049088393135726], "label": "Playing kickball"}]}, "uKzelWWaYB4": {"duration": 83.639, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uKzelWWaYB4", "annotations": [{"segment": [0.01, 0.02], "label": "Fun sliding down"}]}, "v05j8FVewPc": {"duration": 147.61, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=v05j8FVewPc", "annotations": [{"segment": [49.263734789391584, 71.5935585023401], "label": "Ironing clothes"}, {"segment": [95.5348127925117, 121.0874976599064], "label": "Ironing clothes"}, {"segment": [129.14465054602184, 140.88507332293293], "label": "Ironing clothes"}]}, "Ctabf6Se00M": {"duration": 170.133, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ctabf6Se00M", "annotations": []}, "K49OCArHjXI": {"duration": 62.114, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K49OCArHjXI", "annotations": []}, "QjaqFPDoImc": {"duration": 27.168, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QjaqFPDoImc", "annotations": [{"segment": [2.336896326047238, 23.259288849411725], "label": "Powerbocking"}]}, "qumU7AgV3Mk": {"duration": 131.542, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qumU7AgV3Mk", "annotations": [{"segment": [35.90115109080615, 48.1909641749183], "label": "Curling"}]}, "rNZXdUBWNgY": {"duration": 187.2, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rNZXdUBWNgY", "annotations": []}, "23KpderJKY4": {"duration": 75.628, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=23KpderJKY4", "annotations": []}, "QWqEi91fWOQ": {"duration": 180.88, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=QWqEi91fWOQ", "annotations": [{"segment": [11.710864152886115, 26.666907528861152], "label": "Playing lacrosse"}, {"segment": [38.51886643057722, 42.751708895475815], "label": "Playing lacrosse"}, {"segment": [51.499583322932914, 68.7131426801872], "label": "Playing lacrosse"}, {"segment": [92.13487098595944, 168.32603535413415], "label": "Playing lacrosse"}]}, "cZZM3bgmXE4": {"duration": 235.43, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cZZM3bgmXE4", "annotations": [{"segment": [110.52945397815913, 208.57385335413414], "label": "Playing violin"}]}, "brZ1m2qNUzc": {"duration": 175.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=brZ1m2qNUzc", "annotations": [{"segment": [44.28064595787832, 150.11550255070202], "label": "Cleaning windows"}]}, "dm5WXFiQZUQ": {"duration": 159.89, "subset": "training", "resolution": "1920x800", "url": "https://www.youtube.com/watch?v=dm5WXFiQZUQ", "annotations": [{"segment": [25.692473530233734, 48.39164873010921], "label": "Doing motocross"}, {"segment": [58.61874975039002, 159.891994], "label": "Doing motocross"}]}, "L69n16haU9Q": {"duration": 20.69, "subset": "testing", "resolution": "270x360", "url": "https://www.youtube.com/watch?v=L69n16haU9Q", "annotations": []}, "sgFp3HCSgCo": {"duration": 216.201, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sgFp3HCSgCo", "annotations": [{"segment": [63.73545865834633, 75.20109672386896], "label": "Curling"}, {"segment": [89.70175663026521, 105.21409048361934], "label": "Curling"}, {"segment": [184.46188299531983, 211.45941809672388], "label": "Curling"}]}, "QYLxQ_uE8iE": {"duration": 28.68, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QYLxQ_uE8iE", "annotations": []}, "rCmpRDbS_O4": {"duration": 95.34, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rCmpRDbS_O4", "annotations": [{"segment": [9.51920787035563, 44.02633578783152], "label": "Washing face"}]}, "HM_rHjh-wqQ": {"duration": 121.53, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HM_rHjh-wqQ", "annotations": [{"segment": [8.531974797191888, 51.19184878315132], "label": "Playing violin"}, {"segment": [71.28938941653666, 106.7444846848674], "label": "Playing violin"}]}, "BmZwZLKxHCY": {"duration": 199.62, "subset": "training", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=BmZwZLKxHCY", "annotations": [{"segment": [15.456654030000001, 154.89635335413416], "label": "Putting on makeup"}]}, "zjAZ7m8TQ3E": {"duration": 221.147, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zjAZ7m8TQ3E", "annotations": [{"segment": [58.65306155652214, 62.09771545130955], "label": "Snow tubing"}, {"segment": [76.22079641993791, 86.55475810430013], "label": "Snow tubing"}, {"segment": [96.5442543991836, 113.76752387312065], "label": "Snow tubing"}, {"segment": [119.96790088373798, 141.3247550314199], "label": "Snow tubing"}, {"segment": [189.54990955844357, 213.31802042413142], "label": "Snow tubing"}]}, "9VRLj4IfUzY": {"duration": 214.414, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9VRLj4IfUzY", "annotations": [{"segment": [25.5120073631144, 208.83781577432936], "label": "Painting furniture"}]}, "z6IsPWucZnU": {"duration": 103.538, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=z6IsPWucZnU", "annotations": [{"segment": [6.7975261906309585, 97.41327242048015], "label": "Swimming"}]}, "CwvtmoTWNgQ": {"duration": 153.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CwvtmoTWNgQ", "annotations": [{"segment": [5.75629329173167, 147.7448611544462], "label": "Windsurfing"}]}, "NWbWuVtgt9Y": {"duration": 223.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NWbWuVtgt9Y", "annotations": [{"segment": [100.62709669203095, 125.00044191155402], "label": "Playing guitarra"}, {"segment": [131.26787290327613, 212.39629302652105], "label": "Playing guitarra"}]}, "i887uArWX9w": {"duration": 134.955, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=i887uArWX9w", "annotations": [{"segment": [0.2, 129.84914196567863], "label": "Table soccer"}]}, "RnZ-icC6xbQ": {"duration": 29.977, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RnZ-icC6xbQ", "annotations": [{"segment": [7.763786472487405, 12.526481051926657], "label": "Doing crunches"}, {"segment": [20.604384603328526, 28.822367407066842], "label": "Doing crunches"}]}, "sanasW2azw4": {"duration": 125.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sanasW2azw4", "annotations": [{"segment": [4.694664561622465, 108.75972901092044], "label": "Mowing the lawn"}]}, "yDrTpm9c7MY": {"duration": 80.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yDrTpm9c7MY", "annotations": [{"segment": [9.91307038676105, 65.12510699063962], "label": "Tumbling"}]}, "hfZQBDePOOE": {"duration": 199.46, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hfZQBDePOOE", "annotations": [{"segment": [26.760465575663027, 177.3658764898596], "label": "Wakeboarding"}]}, "2NITA0d4vqo": {"duration": 158.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2NITA0d4vqo", "annotations": [{"segment": [26.206474258970356, 126.65432234], "label": "Ping-pong"}]}, "YQfJWGJ75Pk": {"duration": 89.56, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YQfJWGJ75Pk", "annotations": [{"segment": [0.2, 80.9970109034268], "label": "Painting furniture"}]}, "KLr1ZVJDFDs": {"duration": 228.926, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=KLr1ZVJDFDs", "annotations": [{"segment": [0.2, 228.72555555555556], "label": "Longboarding"}]}, "e-Z9xiEQqPQ": {"duration": 153.35, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e-Z9xiEQqPQ", "annotations": [{"segment": [49.170046801872076, 147.7858783151326], "label": "Horseback riding"}]}, "mzGbmHjdCM8": {"duration": 80.04, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mzGbmHjdCM8", "annotations": [{"segment": [9.614691129485179, 77.16726127301092], "label": "Cutting the grass"}]}, "cCDffwsJvsY": {"duration": 111.5, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cCDffwsJvsY", "annotations": []}, "YDIRaWzfyYY": {"duration": 195.977, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=YDIRaWzfyYY", "annotations": [{"segment": [56.85071138845554, 136.3194477379095], "label": "Sharpening knives"}]}, "OSjsVUg_wXA": {"duration": 108.066, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OSjsVUg_wXA", "annotations": [{"segment": [4.21298751950078, 105.83024648985959], "label": "Drum corps"}]}, "5HW6mjZZvtY": {"duration": 85.8, "subset": "training", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=5HW6mjZZvtY", "annotations": [{"segment": [5.60769192, 80.1314352574103], "label": "Skateboarding"}]}, "d-eoNpp8mNM": {"duration": 42.63, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=d-eoNpp8mNM", "annotations": [{"segment": [0.01, 40.30404533732369], "label": "Playing lacrosse"}]}, "Mw_AIqw6rfM": {"duration": 129.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mw_AIqw6rfM", "annotations": [{"segment": [12.878191999999999, 71.13561609672387], "label": "Playing polo"}]}, "k5vE0ehf5TA": {"duration": 67.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k5vE0ehf5TA", "annotations": [{"segment": [1.688934689547582, 67.662946], "label": "Vacuuming floor"}]}, "-9n7u53oAZk": {"duration": 20.527, "subset": "testing", "resolution": "220x176", "url": "https://www.youtube.com/watch?v=-9n7u53oAZk", "annotations": []}, "xPDjkE7llg4": {"duration": 119.885, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=xPDjkE7llg4", "annotations": [{"segment": [2.805412418096724, 113.89974417472699], "label": "Clipping cat claws"}]}, "1stYB5_yR5k": {"duration": 181.929, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1stYB5_yR5k", "annotations": [{"segment": [120.24804817106613, 134.67781446966643], "label": "Painting fence"}]}, "teffBaLEZQg": {"duration": 189.71, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=teffBaLEZQg", "annotations": []}, "Z47QGlaQ1NE": {"duration": 166.3, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z47QGlaQ1NE", "annotations": [{"segment": [8.302074408736349, 166.04148987728217], "label": "High jump"}]}, "0_-Q1zOC3Kw": {"duration": 39.27, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=0_-Q1zOC3Kw", "annotations": [{"segment": [0, 37.36601439937598], "label": "Playing harmonica"}]}, "C_85qE-jaWA": {"duration": 117.842, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C_85qE-jaWA", "annotations": []}, "GOZ305xZvz8": {"duration": 38.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GOZ305xZvz8", "annotations": [{"segment": [0.01, 33.25329342969637], "label": "Cricket"}]}, "uIHWHnItois": {"duration": 147.284, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uIHWHnItois", "annotations": []}, "9k19UUJtldg": {"duration": 182.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9k19UUJtldg", "annotations": []}, "NOXkMlnZKYM": {"duration": 67.11, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NOXkMlnZKYM", "annotations": []}, "84uYA9rFoSw": {"duration": 87.21000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=84uYA9rFoSw", "annotations": [{"segment": [14.558371758190328, 86.94205190171606], "label": "Ballet"}]}, "TnP6coXzbLY": {"duration": 197.44, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TnP6coXzbLY", "annotations": []}, "y8ENWnuzCIE": {"duration": 38.13, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y8ENWnuzCIE", "annotations": [{"segment": [1.2482386895475819, 20.031258970358813], "label": "Getting a piercing"}, {"segment": [9.546604799999999, 17.88050196], "label": "Getting a piercing"}]}, "AS0hqTk_mIs": {"duration": 41.889, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AS0hqTk_mIs", "annotations": [{"segment": [10.913305926677065, 25.682210953198126], "label": "Playing rubik cube"}]}, "xcI2e5T_BL0": {"duration": 89.862, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xcI2e5T_BL0", "annotations": [{"segment": [5.035570093457944, 80.16734112149533], "label": "Carving jack-o-lanterns"}]}, "kBDTz-dwQ2k": {"duration": 82.5, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kBDTz-dwQ2k", "annotations": [{"segment": [24.325373300646312, 78.89658112855126], "label": "Cumbia"}]}, "iGXUvIRX77c": {"duration": 91.534, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iGXUvIRX77c", "annotations": [{"segment": [0, 23.561552421280524], "label": "Drum corps"}, {"segment": [30.130227641758733, 80.25207551975548], "label": "Drum corps"}]}, "64djQm_uhiE": {"duration": 152.18, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=64djQm_uhiE", "annotations": []}, "Xj--zKhm6k4": {"duration": 29.722, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=Xj--zKhm6k4", "annotations": [{"segment": [0.2, 16.666284112149533], "label": "Throwing darts"}]}, "UIPTzsWiGSU": {"duration": 55.334, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UIPTzsWiGSU", "annotations": [{"segment": [0.01, 55.301], "label": "Having an ice cream"}]}, "bI1L2D_erOY": {"duration": 184.181, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=bI1L2D_erOY", "annotations": [{"segment": [66.23025668509734, 69.1035866931276], "label": "Applying sunscreen"}]}, "ggDfJLB8jTk": {"duration": 174.68, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ggDfJLB8jTk", "annotations": [{"segment": [38.15244989079563, 47.96308036267029], "label": "Archery"}, {"segment": [120.99776965366615, 124.54049714352576], "label": "Archery"}]}, "hT_4wWPNYxo": {"duration": 166.07, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hT_4wWPNYxo", "annotations": [{"segment": [10.154691768123353, 152.38574387802885], "label": "Hula hoop"}]}, "OvSf6kk07uE": {"duration": 210.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OvSf6kk07uE", "annotations": [{"segment": [23.276100233124968, 201.94475695218284], "label": "Tumbling"}]}, "SzLlB2SgtQo": {"duration": 136.3, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SzLlB2SgtQo", "annotations": [{"segment": [0.01, 133.7490739738433], "label": "Windsurfing"}]}, "_BJqPK0NNZU": {"duration": 97.39, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_BJqPK0NNZU", "annotations": [{"segment": [0.01, 69.86904836193449], "label": "Discus throw"}]}, "rMy6sItJID0": {"duration": 93.21000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rMy6sItJID0", "annotations": [{"segment": [16.818661799999997, 73.88798517940718], "label": "Using the balance beam"}]}, "ZXEc0cahpuw": {"duration": 30.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZXEc0cahpuw", "annotations": [{"segment": [1.5399067355694227, 20.634750256630266], "label": "Hand washing clothes"}]}, "JrRtv8puAZI": {"duration": 183.62, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JrRtv8puAZI", "annotations": [{"segment": [74.33730703513855, 135.0675925513211], "label": "Washing dishes"}]}, "SvDnZ47J37U": {"duration": 55.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SvDnZ47J37U", "annotations": [{"segment": [0.8599661622464899, 23.219086380655227], "label": "Playing field hockey"}]}, "EvDrxFO5jk4": {"duration": 25.78, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=EvDrxFO5jk4", "annotations": []}, "DLJFoRluYx4": {"duration": 178.38, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DLJFoRluYx4", "annotations": [{"segment": [73.98199821040704, 130.1062725686625], "label": "Shaving"}]}, "uE15Mhtj9nE": {"duration": 230.668, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uE15Mhtj9nE", "annotations": [{"segment": [126.84887346514269, 219.69145414523967], "label": "Hanging wallpaper"}]}, "9vqOA8KVcME": {"duration": 207.03, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9vqOA8KVcME", "annotations": [{"segment": [123.21596661961716, 193.94806697425895], "label": "Putting on makeup"}]}, "p7TUDtxbz4c": {"duration": 86.4, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=p7TUDtxbz4c", "annotations": []}, "oO6jZR9Aijc": {"duration": 159.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oO6jZR9Aijc", "annotations": [{"segment": [11.419322911691978, 27.55532267821325], "label": "Paintball"}, {"segment": [82.91421418489392, 100.78793693166928], "label": "Paintball"}]}, "JFBd-R1YuXY": {"duration": 84.684, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=JFBd-R1YuXY", "annotations": [{"segment": [15.672315993821822, 29.76423173225035], "label": "Building sandcastles"}]}, "dV7OXaQ8pQo": {"duration": 186.457, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=dV7OXaQ8pQo", "annotations": []}, "ZSLzzrc8pro": {"duration": 175.427, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZSLzzrc8pro", "annotations": [{"segment": [25.17818049320612, 86.4815764766645], "label": "Elliptical trainer"}]}, "um2h6O3zjGM": {"duration": 45.743, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=um2h6O3zjGM", "annotations": [{"segment": [5.406673269035453, 6.900622761740777], "label": "Having an ice cream"}]}, "PHr8BcCBQxw": {"duration": 220.938, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PHr8BcCBQxw", "annotations": []}, "sR3_5j8pUdI": {"duration": 84.475, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sR3_5j8pUdI", "annotations": [{"segment": [13.205415153966886, 77.39109739695753], "label": "Using the rowing machine"}]}, "lFsPB8BbqPY": {"duration": 30.26, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=lFsPB8BbqPY", "annotations": []}, "DTprdtsGCts": {"duration": 237.7, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=DTprdtsGCts", "annotations": [{"segment": [33.74555385023401, 233.6230651170047], "label": "Playing flauta"}]}, "zLbSEo4xhVY": {"duration": 160.194, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zLbSEo4xhVY", "annotations": [{"segment": [0.01, 5.8388632564444265], "label": "Wakeboarding"}, {"segment": [30.791268708157823, 156.55139218479334], "label": "Wakeboarding"}]}, "kOVQPyzXkUY": {"duration": 128.058, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kOVQPyzXkUY", "annotations": [{"segment": [11.781683307332294, 97.2488096723869], "label": "Slacklining"}, {"segment": [101.04291107644306, 127.00255226209048], "label": "Slacklining"}]}, "86lJVqKyj3A": {"duration": 189.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=86lJVqKyj3A", "annotations": [{"segment": [20.090443291201037, 184.65480889235567], "label": "Wrapping presents"}]}, "Tt5ONIePiwI": {"duration": 192.262, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tt5ONIePiwI", "annotations": []}, "6jTH_gFx6Ik": {"duration": 154.111, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=6jTH_gFx6Ik", "annotations": [{"segment": [8.174365875315285, 139.2046424061044], "label": "Rafting"}]}, "cbR34GknrBs": {"duration": 16.53, "subset": "validation", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=cbR34GknrBs", "annotations": [{"segment": [6.4399375975039, 16.20288299531981], "label": "Triple jump"}]}, "YySTmiavdMc": {"duration": 36.92, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YySTmiavdMc", "annotations": [{"segment": [0.2, 14.629657416536661], "label": "Cutting the grass"}, {"segment": [21.944486124804992, 36.719725999999994], "label": "Cutting the grass"}]}, "EYoyxe8hd3g": {"duration": 194.978, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EYoyxe8hd3g", "annotations": [{"segment": [102.50791292199688, 114.06666868174727], "label": "Croquet"}, {"segment": [117.10844651326053, 118.02097986271451], "label": "Croquet"}]}, "HoybOj2Ro7o": {"duration": 218.536, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HoybOj2Ro7o", "annotations": [{"segment": [31.025321372854915, 202.53546645865833], "label": "Making a cake"}]}, "PvqizTCKl8A": {"duration": 150.63, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PvqizTCKl8A", "annotations": []}, "uj0UBMgR2gk": {"duration": 109.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uj0UBMgR2gk", "annotations": [{"segment": [18.624867606949177, 30.927532121684866], "label": "Playing violin"}, {"segment": [40.49627134165367, 63.22202698907956], "label": "Playing violin"}, {"segment": [86.80213435257411, 95.51652185647426], "label": "Playing violin"}]}, "WnYqWQ005Tc": {"duration": 198.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WnYqWQ005Tc", "annotations": [{"segment": [12.079085612404088, 20.751249641822405], "label": "Breakdancing"}, {"segment": [44.0061598, 167.57123180000002], "label": "Breakdancing"}]}, "WMoofjkumOs": {"duration": 153.438, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=WMoofjkumOs", "annotations": [{"segment": [3.3447570093457943, 18.63507476635514], "label": "Bullfighting"}, {"segment": [31.297369158878503, 38.7036168224299], "label": "Bullfighting"}, {"segment": [67.6118738317757, 76.45158878504672], "label": "Bullfighting"}, {"segment": [81.70763551401869, 99.63896728971963], "label": "Bullfighting"}, {"segment": [117.0817601246106, 124.96685825545173], "label": "Bullfighting"}, {"segment": [137.15291900311527, 143.60436292834893], "label": "Bullfighting"}]}, "NI-TZyAxJU0": {"duration": 150.373, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=NI-TZyAxJU0", "annotations": [{"segment": [0.4690202808112325, 56.51694383775351], "label": "Rafting"}, {"segment": [62.848717628705145, 84.42365054602183], "label": "Rafting"}, {"segment": [87.94130265210609, 150.341], "label": "Rafting"}]}, "45Bscg4Qe-I": {"duration": 226.07, "subset": "training", "resolution": "624x360", "url": "https://www.youtube.com/watch?v=45Bscg4Qe-I", "annotations": [{"segment": [18.683202808112323, 22.20833541341654], "label": "Playing ten pins"}, {"segment": [25.02844149765991, 27.496034321372857], "label": "Playing ten pins"}, {"segment": [30.66865366614665, 33.84127301092044], "label": "Playing ten pins"}, {"segment": [46.5317503900156, 51.11442277691108], "label": "Playing ten pins"}, {"segment": [100.81879251170047, 106.4590046801872], "label": "Playing ten pins"}, {"segment": [114.5668096723869, 118.09194227769113], "label": "Playing ten pins"}, {"segment": [206.59105460218407, 214.6995772230889], "label": "Playing ten pins"}]}, "-crpjVnFv0U": {"duration": 89.65, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-crpjVnFv0U", "annotations": []}, "aEnq40UPxJ0": {"duration": 61.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aEnq40UPxJ0", "annotations": [{"segment": [0, 61.71508817472699], "label": "Playing water polo"}]}, "wBy9_ZSfEVA": {"duration": 151.325, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wBy9_ZSfEVA", "annotations": [{"segment": [0.01, 139.16183434844646], "label": "Kite flying"}]}, "X5gh32tHdsc": {"duration": 158.92000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X5gh32tHdsc", "annotations": [{"segment": [7.437603790951638, 145.77703430265208], "label": "Ping-pong"}]}, "IgDBDaIoNAM": {"duration": 170.34, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IgDBDaIoNAM", "annotations": []}, "mBZAFJ-Q6Mw": {"duration": 225.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mBZAFJ-Q6Mw", "annotations": [{"segment": [70.08680655226209, 80.30046177847113], "label": "Breakdancing"}, {"segment": [93.683872074883, 126.08581279251172], "label": "Breakdancing"}, {"segment": [131.36873790951637, 158.83994851794074], "label": "Breakdancing"}]}, "ezPArXEbFBg": {"duration": 202.734, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ezPArXEbFBg", "annotations": []}, "q1jvfsKWY5c": {"duration": 89.746, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q1jvfsKWY5c", "annotations": [{"segment": [21.701239206030827, 64.40367641185648], "label": "Tug of war"}]}, "aVDVMN9hE8A": {"duration": 53.38, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aVDVMN9hE8A", "annotations": [{"segment": [8.97244656, 43.80527179407176], "label": "Making a sandwich"}]}, "Zt8zZhMs4Es": {"duration": 236.47, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zt8zZhMs4Es", "annotations": [{"segment": [0.01, 225.58877122698908], "label": "Tennis serve with ball bouncing"}]}, "xCplsH6deic": {"duration": 73.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xCplsH6deic", "annotations": [{"segment": [10.562460998439935, 17.185409516380656], "label": "Playing kickball"}, {"segment": [20.040128705148206, 44.24814742589703], "label": "Playing kickball"}, {"segment": [47.67381045241809, 53.04068252730109], "label": "Playing kickball"}, {"segment": [54.98189157566302, 65.07098915054601], "label": "Playing kickball"}]}, "TB5AeIdAndA": {"duration": 94.6, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TB5AeIdAndA", "annotations": []}, "_--nxrRXdPg": {"duration": 208.68, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=_--nxrRXdPg", "annotations": [{"segment": [16.015765019419632, 38.1187024103677], "label": "High jump"}, {"segment": [43.96947995503043, 49.49521430276745], "label": "High jump"}, {"segment": [56.97120783205871, 58.921467013612954], "label": "High jump"}, {"segment": [68.02267652753275, 74.52354046604688], "label": "High jump"}, {"segment": [78.74910148783415, 99.87690914359737], "label": "High jump"}, {"segment": [106.37777303767838, 125.23027833051327], "label": "High jump"}, {"segment": [131.4060990298902, 166.5107640579276], "label": "High jump"}, {"segment": [175.93701670434504, 208.677731], "label": "High jump"}]}, "uktUeF3Fy7o": {"duration": 181.581, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uktUeF3Fy7o", "annotations": []}, "ZOQSDsJYXIA": {"duration": 98.59, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZOQSDsJYXIA", "annotations": [{"segment": [2.307143918409798, 10.459052430124416], "label": "Triple jump"}, {"segment": [18.149532158157076, 65.98431532137286], "label": "Triple jump"}, {"segment": [67.52241124960999, 97.3614722574103], "label": "Triple jump"}]}, "G5ueYVLGtm8": {"duration": 6.827, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G5ueYVLGtm8", "annotations": [{"segment": [0, 6.826665], "label": "Croquet"}]}, "P3q6n0ZjumU": {"duration": 48.252, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=P3q6n0ZjumU", "annotations": [{"segment": [4.516429329173167, 48.250521541950114], "label": "Throwing darts"}]}, "EzQL-i_MTdU": {"duration": 56.75, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EzQL-i_MTdU", "annotations": [{"segment": [16.20151506468422, 52.76558989703588], "label": "Snatch"}]}, "rxwmIyvVVUU": {"duration": 16.649, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rxwmIyvVVUU", "annotations": [{"segment": [0.01, 16.648706], "label": "Playing pool"}]}, "AzNHZyeXHBM": {"duration": 42.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AzNHZyeXHBM", "annotations": [{"segment": [11.912283359999998, 37.25234586], "label": "Triple jump"}]}, "3PC5_h68qkc": {"duration": 193.26, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3PC5_h68qkc", "annotations": [{"segment": [20.495581903276133, 43.7038143525741], "label": "Rafting"}]}, "zBm3FR-CCI0": {"duration": 133.65, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zBm3FR-CCI0", "annotations": [{"segment": [0.01, 7.811992979719188], "label": "Discus throw"}, {"segment": [10.936790171606862, 15.72814586583463], "label": "Discus throw"}, {"segment": [23.43597893915756, 25.727496879875194], "label": "Discus throw"}, {"segment": [30.31053276131045, 34.06028939157566], "label": "Discus throw"}, {"segment": [38.018365834633386, 41.55980265210608], "label": "Discus throw"}, {"segment": [47.80939703588143, 53.01739235569423], "label": "Discus throw"}, {"segment": [57.600428237129485, 65.72490093603744], "label": "Discus throw"}, {"segment": [69.47465756630264, 111.3469399375975], "label": "Discus throw"}, {"segment": [116.34661544461777, 120.92965132605303], "label": "Discus throw"}]}, "QyTiHhG9qf0": {"duration": 144.754, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QyTiHhG9qf0", "annotations": [{"segment": [18.736634945397817, 133.65808424336976], "label": "Riding bumper cars"}]}, "dVpHF8tE2qo": {"duration": 36.78, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dVpHF8tE2qo", "annotations": []}, "luDywPbDOEQ": {"duration": 133.237, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=luDywPbDOEQ", "annotations": [{"segment": [33.88057877961377, 35.54342927186475], "label": "Mooping floor"}, {"segment": [39.07698656789809, 45.52053222537065], "label": "Mooping floor"}, {"segment": [52.587646817437324, 55.28977886734517], "label": "Mooping floor"}, {"segment": [59.86261772103538, 62.56474977094322], "label": "Mooping floor"}, {"segment": [69.42400805147852, 70.25543329760401], "label": "Mooping floor"}]}, "Z0GbQKiufUE": {"duration": 108.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z0GbQKiufUE", "annotations": [{"segment": [32.64957460883469, 84.24605167862714], "label": "Cleaning windows"}]}, "oSQNry_U7oA": {"duration": 99.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oSQNry_U7oA", "annotations": [{"segment": [26.431899815534187, 97.99418674484382], "label": "Playing saxophone"}]}, "qSP3_C7QWpU": {"duration": 30.651, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qSP3_C7QWpU", "annotations": []}, "rKtktLDSOpA": {"duration": 128.52, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rKtktLDSOpA", "annotations": [{"segment": [14.063411778565877, 81.50593047015467], "label": "Playing polo"}, {"segment": [87.30958934865933, 105.92132299351915], "label": "Playing polo"}]}, "dD3eR9Sb1x0": {"duration": 208.07999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dD3eR9Sb1x0", "annotations": [{"segment": [25.644067199422672, 194.11584894851796], "label": "Zumba"}]}, "R1qdfsEyrDE": {"duration": 123.18, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R1qdfsEyrDE", "annotations": []}, "ehGHCYKzyZ8": {"duration": 61.719, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ehGHCYKzyZ8", "annotations": [{"segment": [43.95990729267573, 45.401932082395355], "label": "Doing crunches"}]}, "fhtNAMK0Vqk": {"duration": 27.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fhtNAMK0Vqk", "annotations": [{"segment": [0.01, 26.749261310452418], "label": "Grooming horse"}]}, "zRNS_ebpi7o": {"duration": 81.25, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=zRNS_ebpi7o", "annotations": [{"segment": [20.4861789, 54.51931584], "label": "Playing violin"}, {"segment": [54.69249171294852, 77.2539367301092], "label": "Playing violin"}]}, "7A_NgDs7jZY": {"duration": 216.72, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7A_NgDs7jZY", "annotations": [{"segment": [13.192313127145086, 204.6499856903276], "label": "Belly dance"}]}, "bUhhrFqCMmc": {"duration": 2.183, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bUhhrFqCMmc", "annotations": []}, "Z47Ig2AqKfg": {"duration": 87.08, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Z47Ig2AqKfg", "annotations": [{"segment": [4.346949388179609, 5.9770554087469625], "label": "Playing violin"}, {"segment": [8.558056516380654, 21.463062374414974], "label": "Playing violin"}, {"segment": [32.05875139469578, 34.63975256630265], "label": "Playing violin"}, {"segment": [44.556230751950075, 75.39240264430576], "label": "Playing violin"}]}, "beejEQ04nkY": {"duration": 30.929, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=beejEQ04nkY", "annotations": [{"segment": [12.352291145085804, 16.019377578783153], "label": "Disc dog"}, {"segment": [18.624938992199688, 21.954267464898596], "label": "Disc dog"}]}, "G-zVpNXq224": {"duration": 192.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G-zVpNXq224", "annotations": [{"segment": [60.8509515897036, 164.56735183619344], "label": "Ping-pong"}]}, "xmAcpNTJUxo": {"duration": 123.391, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=xmAcpNTJUxo", "annotations": [{"segment": [27.868606466470283, 39.4004431152648], "label": "Carving jack-o-lanterns"}, {"segment": [60.54214429906543, 93.21568249221184], "label": "Carving jack-o-lanterns"}]}, "quoyW7FZqdI": {"duration": 233.849, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=quoyW7FZqdI", "annotations": [{"segment": [73.69310947676003, 128.41571552385906], "label": "Rope skipping"}, {"segment": [195.1772944773791, 224.36268430577223], "label": "Rope skipping"}]}, "O0uVjGlOW8M": {"duration": 220.985, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O0uVjGlOW8M", "annotations": [{"segment": [28.608585023400938, 215.44559282371296], "label": "Surfing"}]}, "xlTKV-3QSW8": {"duration": 78.07, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xlTKV-3QSW8", "annotations": [{"segment": [4.3843331232449305, 78.065487], "label": "Kayaking"}]}, "ArT40PO05h8": {"duration": 152.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ArT40PO05h8", "annotations": [{"segment": [26.147451840873636, 132.40118795787834], "label": "Cleaning windows"}]}, "rZGxJN2AOQY": {"duration": 238.21, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=rZGxJN2AOQY", "annotations": [{"segment": [13.378606132000382, 226.69304834778424], "label": "Playing racquetball"}]}, "oSyBkE3gYMM": {"duration": 90.326, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oSyBkE3gYMM", "annotations": [{"segment": [0.01, 60.040939272745895], "label": "Longboarding"}]}, "x4Vk5wSH7xE": {"duration": 25.566, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x4Vk5wSH7xE", "annotations": [{"segment": [0.01, 25.565169], "label": "Playing blackjack"}]}, "vutxJfF0Rlg": {"duration": 183.67, "subset": "validation", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=vutxJfF0Rlg", "annotations": [{"segment": [118.05300250105243, 148.71239878166554], "label": "Drinking coffee"}]}, "b_DY76Y6ekg": {"duration": 210.26, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b_DY76Y6ekg", "annotations": [{"segment": [113.4926772074883, 210.25666666666666], "label": "Bungee jumping"}]}, "CSk99DHanfg": {"duration": 197.161, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CSk99DHanfg", "annotations": [{"segment": [30.435318252730113, 54.41465990639625], "label": "Scuba diving"}, {"segment": [65.17462090483619, 71.93802496099845], "label": "Scuba diving"}, {"segment": [96.53222152886116, 104.21790795631826], "label": "Scuba diving"}, {"segment": [120.8312527301092, 129.1326365054602], "label": "Scuba diving"}, {"segment": [141.7384414976599, 161.41579563182526], "label": "Scuba diving"}]}, "GlAEgSWM2nQ": {"duration": 55.055, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GlAEgSWM2nQ", "annotations": [{"segment": [0.01, 54.11255371332614], "label": "Trimming branches or hedges"}]}, "tvR1ovqyJZ8": {"duration": 91.162, "subset": "training", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=tvR1ovqyJZ8", "annotations": [{"segment": [0.01, 64.69446957878316], "label": "Waterskiing"}]}, "B4LYBLihttQ": {"duration": 186.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B4LYBLihttQ", "annotations": [{"segment": [43.013114853845856, 49.406956251039155], "label": "Preparing pasta"}, {"segment": [52.313247795217926, 56.672685111486096], "label": "Preparing pasta"}, {"segment": [68.87910959703694, 123.5173906275979], "label": "Preparing pasta"}, {"segment": [129.62060287037332, 186.293287], "label": "Preparing pasta"}]}, "NvOo-wtEPPk": {"duration": 68.058, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NvOo-wtEPPk", "annotations": [{"segment": [45.795292771314124, 55.441985227414335], "label": "Swimming"}]}, "thvpt_lUxTI": {"duration": 52.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=thvpt_lUxTI", "annotations": [{"segment": [2.727928010920437, 9.589080280811233], "label": "Shoveling snow"}, {"segment": [13.722304539781593, 32.2391492199688], "label": "Shoveling snow"}, {"segment": [52.73994154446178, 52.787935], "label": "Shoveling snow"}]}, "gxJeNdvNzhU": {"duration": 42.563, "subset": "validation", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=gxJeNdvNzhU", "annotations": [{"segment": [17.595906981279253, 37.914199570982845], "label": "Hitting a pinata"}]}, "FV2TzKt9MdI": {"duration": 198.18, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=FV2TzKt9MdI", "annotations": [{"segment": [33.4569235, 138.94178237129483], "label": "Spinning"}]}, "BxEnhkx2srY": {"duration": 16.3, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=BxEnhkx2srY", "annotations": [{"segment": [1.6669571, 10.4566649], "label": "Pole vault"}, {"segment": [11.6387426, 14.6696481], "label": "Pole vault"}]}, "GvDu_0ktsJ0": {"duration": 188.87, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GvDu_0ktsJ0", "annotations": [{"segment": [13.39473132, 87.0317568], "label": "Preparing salad"}, {"segment": [108.30618624, 157.76205759], "label": "Preparing salad"}, {"segment": [55.667838540000005, 116.51829732000002], "label": "Preparing pasta"}]}, "mH_B3FvFbFc": {"duration": 70.194, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mH_B3FvFbFc", "annotations": []}, "cRTsarldNlc": {"duration": 25.54, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cRTsarldNlc", "annotations": [{"segment": [10.18381775, 12.611586362714508], "label": "Hammer throw"}, {"segment": [18.588641510920436, 20.262216952418097], "label": "Hammer throw"}]}, "-V00wKlFJA0": {"duration": 92.72, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-V00wKlFJA0", "annotations": [{"segment": [4.550152886115445, 84.14171606864275], "label": "Playing bagpipes"}]}, "0Yb2EQlAEYM": {"duration": 70.937, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0Yb2EQlAEYM", "annotations": [{"segment": [19.809229442589704, 32.31449719126365], "label": "River tubing"}, {"segment": [38.9544623675507, 54.00505010046801], "label": "River tubing"}]}, "0Zg9UgCp4a4": {"duration": 109.85, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=0Zg9UgCp4a4", "annotations": [{"segment": [53.577324299999994, 92.32780539999999], "label": "Hopscotch"}]}, "hHMqyl_Dugs": {"duration": 237.773, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hHMqyl_Dugs", "annotations": [{"segment": [9.088003120124805, 24.296498137476515], "label": "Ice fishing"}, {"segment": [78.4535779553631, 232.39322264319142], "label": "Ice fishing"}]}, "32EWNGHBctI": {"duration": 64.6, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=32EWNGHBctI", "annotations": [{"segment": [42.779546467572985, 46.81059913825125], "label": "Tennis serve with ball bouncing"}]}, "_4licRHiRRs": {"duration": 120.233, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_4licRHiRRs", "annotations": [{"segment": [18.194373985959437, 71.46449988299531], "label": "Disc dog"}]}, "J5eSX6Tb2hc": {"duration": 44.4, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=J5eSX6Tb2hc", "annotations": []}, "RiU6gIrM4v4": {"duration": 72.981, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RiU6gIrM4v4", "annotations": []}, "7vECSCWxbDk": {"duration": 139.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7vECSCWxbDk", "annotations": [{"segment": [13.231480499219968, 131.23025741029642], "label": "Playing guitarra"}]}, "zH1MiuiTsx8": {"duration": 115.845, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zH1MiuiTsx8", "annotations": []}, "8IJJGK2td2c": {"duration": 154.692, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8IJJGK2td2c", "annotations": [{"segment": [14.457134380231844, 132.2827779813084], "label": "Making a lemonade"}]}, "pwoy7UXdnAE": {"duration": 82.385, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pwoy7UXdnAE", "annotations": [{"segment": [17.864952614431107, 78.5286758673947], "label": "Disc dog"}]}, "JX0pGTdQXRw": {"duration": 37.04, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JX0pGTdQXRw", "annotations": []}, "5Qj8hHmZbiI": {"duration": 94.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5Qj8hHmZbiI", "annotations": [{"segment": [12.593474623338674, 18.816132672517785], "label": "Beer pong"}, {"segment": [38.22489944495739, 41.632544856474254], "label": "Beer pong"}, {"segment": [48.00336132917317, 51.411007349453975], "label": "Beer pong"}]}, "P8S8PIyPXxY": {"duration": 202.061, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P8S8PIyPXxY", "annotations": []}, "6u-faCwecyk": {"duration": 45.23, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=6u-faCwecyk", "annotations": []}, "snhPrCNubYA": {"duration": 21.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=snhPrCNubYA", "annotations": []}, "hCJTKVzkYFE": {"duration": 62.21, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hCJTKVzkYFE", "annotations": [{"segment": [1.3566094570982838, 24.03136752574103], "label": "Playing polo"}, {"segment": [24.19688988, 52.669820200000004], "label": "Playing polo"}]}, "4OeZViscNp4": {"duration": 109.99000000000001, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4OeZViscNp4", "annotations": [{"segment": [13.041167252132261, 101.41223378939158], "label": "Cleaning shoes"}]}, "Oheg1qwrESg": {"duration": 65.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Oheg1qwrESg", "annotations": [{"segment": [6.239034321372856, 16.369672386895477], "label": "Doing fencing"}, {"segment": [31.102377535101407, 58.930820592823714], "label": "Doing fencing"}]}, "XBMiD_7fdF4": {"duration": 55.26, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XBMiD_7fdF4", "annotations": [{"segment": [50.1763836599064, 53.797359800312016], "label": "Drinking beer"}]}, "Dn8acaAwWnQ": {"duration": 212.858, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Dn8acaAwWnQ", "annotations": []}, "Bl2Hqx3h9Fw": {"duration": 110.226, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Bl2Hqx3h9Fw", "annotations": [{"segment": [23.644235021101522, 98.96186940171607], "label": "Removing ice from car"}]}, "UdIoEWadRxI": {"duration": 170.3, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=UdIoEWadRxI", "annotations": [{"segment": [55.776362786356394, 132.96621868355265], "label": "Mixing drinks"}]}, "rrpEOrMpBuE": {"duration": 120.744, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rrpEOrMpBuE", "annotations": []}, "CRdgzvZxB8A": {"duration": 74.21000000000001, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=CRdgzvZxB8A", "annotations": [{"segment": [3.180733229329173, 73.04120124804992], "label": "Kayaking"}]}, "b1s0BdtlFZs": {"duration": 112.8, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=b1s0BdtlFZs", "annotations": [{"segment": [6.863181036505461, 108.40306465591958], "label": "Shaving legs"}]}, "dtYofkPozXM": {"duration": 145.404, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=dtYofkPozXM", "annotations": [{"segment": [18.09057395204871, 104.02081661417317], "label": "Hanging wallpaper"}]}, "MgLQZD76PaI": {"duration": 65.43, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=MgLQZD76PaI", "annotations": []}, "63Zt34YuyRk": {"duration": 169.158, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=63Zt34YuyRk", "annotations": [{"segment": [54.08066302652106, 89.16714196567862], "label": "Grooming dog"}, {"segment": [0.2, 168.901], "label": "Grooming dog"}, {"segment": [117.92222620904835, 168.901], "label": "Grooming dog"}]}, "vyoVLhEyyrQ": {"duration": 175.13, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vyoVLhEyyrQ", "annotations": []}, "WQlkB6EdnQc": {"duration": 120.814, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WQlkB6EdnQc", "annotations": []}, "-2VNLsk1s34": {"duration": 212.65, "subset": "training", "resolution": "1280x718", "url": "https://www.youtube.com/watch?v=-2VNLsk1s34", "annotations": [{"segment": [16.918966432126673, 31.847466225179623], "label": "Shot put"}, {"segment": [75.30598784495598, 103.50426523183377], "label": "Shot put"}, {"segment": [139.9961536148521, 166.53570880250177], "label": "Shot put"}]}, "Gc1Mk5UyECQ": {"duration": 113.52000000000001, "subset": "training", "resolution": "426x196", "url": "https://www.youtube.com/watch?v=Gc1Mk5UyECQ", "annotations": [{"segment": [34.69933853354134, 44.26716068642746], "label": "Playing lacrosse"}, {"segment": [54.71421060842434, 61.79695631825273], "label": "Playing lacrosse"}, {"segment": [77.02485959438378, 93.4922433697348], "label": "Playing lacrosse"}, {"segment": [102.34567550702027, 111.73031357254291], "label": "Playing lacrosse"}]}, "73LjSLUZGZc": {"duration": 130.497, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=73LjSLUZGZc", "annotations": [{"segment": [16.464464047301163, 32.522397757009344], "label": "Playing ten pins"}, {"segment": [98.78678318691587, 104.27493780841121], "label": "Playing ten pins"}]}, "9RcCkU6dVD0": {"duration": 37.222, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9RcCkU6dVD0", "annotations": [{"segment": [0.01, 37.221587], "label": "Hitting a pinata"}]}, "qXD7myRvw0M": {"duration": 28.33, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qXD7myRvw0M", "annotations": [{"segment": [0.01, 28.32755], "label": "Playing badminton"}]}, "vnlxIMiHKRQ": {"duration": 183.345, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vnlxIMiHKRQ", "annotations": [{"segment": [15.159551221528861, 175.6219707550702], "label": "River tubing"}]}, "oXqzXQRqAhI": {"duration": 55.33, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oXqzXQRqAhI", "annotations": [{"segment": [4.920420514820593, 6.042621684867395], "label": "Washing face"}]}, "B1u-Er4jXFA": {"duration": 83.128, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=B1u-Er4jXFA", "annotations": [{"segment": [4.149887682582134, 80.14470586986745], "label": "Making an omelette"}]}, "GwmpYLRpJQw": {"duration": 183.299, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GwmpYLRpJQw", "annotations": [{"segment": [67.48580929173166, 122.10356172698907], "label": "Skiing"}, {"segment": [85.50108889079563, 122.96143218408736], "label": "Skiing"}, {"segment": [146.12393452574102, 150.98520044929796], "label": "Skiing"}]}, "L0Fdx2r3qA8": {"duration": 181.906, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L0Fdx2r3qA8", "annotations": [{"segment": [75.77010612669405, 100.74302499991155], "label": "Peeling potatoes"}, {"segment": [117.20244880271402, 145.01319936606987], "label": "Peeling potatoes"}, {"segment": [150.1212964083189, 180.20231099843994], "label": "Peeling potatoes"}]}, "w_CXbraJr58": {"duration": 80.71000000000001, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w_CXbraJr58", "annotations": []}, "Uot4XZns2b4": {"duration": 236.264, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Uot4XZns2b4", "annotations": [{"segment": [9.2146270884849, 225.94265472854914], "label": "Fixing the roof"}]}, "IT9uvR9yjxI": {"duration": 110.667, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IT9uvR9yjxI", "annotations": [{"segment": [27.29340587039603, 48.14253515388513], "label": "Roof shingle removal"}, {"segment": [65.37322051214058, 71.40396038752999], "label": "Roof shingle removal"}]}, "J1QB1y3vXLY": {"duration": 124.39, "subset": "testing", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=J1QB1y3vXLY", "annotations": []}, "sCCJpFKVV8g": {"duration": 180.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sCCJpFKVV8g", "annotations": [{"segment": [43.84072335954663, 173.39568037285494], "label": "Getting a tattoo"}]}, "3J_-uEjb8UE": {"duration": 219.522, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=3J_-uEjb8UE", "annotations": [{"segment": [150.23168183011205, 177.58383136282234], "label": "Removing ice from car"}]}, "hvCYwmaukDw": {"duration": 60.488, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hvCYwmaukDw", "annotations": [{"segment": [0.01, 54.11884081666251], "label": "Sumo"}, {"segment": [57.88756231198961, 59.58348698488681], "label": "Sumo"}]}, "foZ88hBB77I": {"duration": 70.102, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=foZ88hBB77I", "annotations": [{"segment": [41.79651557632399, 69.84274143302181], "label": "Playing rubik cube"}]}, "vL8Hy6lcnF8": {"duration": 223.31, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vL8Hy6lcnF8", "annotations": [{"segment": [6.270691839918495, 213.5518943261132], "label": "Zumba"}]}, "fIN8MmMloZE": {"duration": 209.91, "subset": "testing", "resolution": "352x262", "url": "https://www.youtube.com/watch?v=fIN8MmMloZE", "annotations": []}, "odbjmsyfJe4": {"duration": 20.88, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=odbjmsyfJe4", "annotations": [{"segment": [0.04138296392829396, 13.14499575832082], "label": "Brushing hair"}]}, "0qoi1RmRMbQ": {"duration": 12.075, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=0qoi1RmRMbQ", "annotations": []}, "f0On10HA3HQ": {"duration": 135.605, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f0On10HA3HQ", "annotations": [{"segment": [0.01, 119.31506680675389], "label": "Wakeboarding"}]}, "IBkHS9Zl2z8": {"duration": 127.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IBkHS9Zl2z8", "annotations": [{"segment": [0.01, 32.62116068642746], "label": "Shoveling snow"}, {"segment": [57.48485023400936, 76.79473010920437], "label": "Shoveling snow"}]}, "mkGUacg0GRc": {"duration": 216.596, "subset": "testing", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=mkGUacg0GRc", "annotations": []}, "l9o9R7UcPuc": {"duration": 153.948, "subset": "validation", "resolution": "426x212", "url": "https://www.youtube.com/watch?v=l9o9R7UcPuc", "annotations": [{"segment": [39.74403629550576, 71.10830878521703], "label": "Roof shingle removal"}, {"segment": [78.05154467988595, 99.36009621869742], "label": "Roof shingle removal"}]}, "EqqqGf4wLH0": {"duration": 156.456, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=EqqqGf4wLH0", "annotations": [{"segment": [0.9763249672386896, 4.393462352574103], "label": "Playing drums"}, {"segment": [55.406441890795634, 94.70352182215288], "label": "Playing drums"}, {"segment": [109.5924775725429, 123.01694587207487], "label": "Playing drums"}, {"segment": [150.3540449547582, 156.256076], "label": "Playing drums"}]}, "4W3atvAalEs": {"duration": 102.192, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4W3atvAalEs", "annotations": []}, "nrh2jDsmeLQ": {"duration": 226.21, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nrh2jDsmeLQ", "annotations": [{"segment": [79.9177511700468, 170.94989157566303], "label": "Preparing salad"}]}, "a073aSPGTIw": {"duration": 194.653, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=a073aSPGTIw", "annotations": [{"segment": [35.52946480308192, 49.498314212840626], "label": "Elliptical trainer"}, {"segment": [78.95436622907093, 105.67738249121803], "label": "Elliptical trainer"}, {"segment": [107.80307696661609, 116.60952550755091], "label": "Elliptical trainer"}, {"segment": [156.39037926142896, 175.21795726365053], "label": "Elliptical trainer"}, {"segment": [183.41706444929795, 193.7418660904836], "label": "Elliptical trainer"}]}, "IWdJF6lBSnM": {"duration": 91.859, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IWdJF6lBSnM", "annotations": [{"segment": [10.497915171507362, 84.15494788178773], "label": "Roof shingle removal"}]}, "mkwCGf92vqo": {"duration": 49.95, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mkwCGf92vqo", "annotations": [{"segment": [1.7125792917316693, 49.431265920436815], "label": "Doing karate"}]}, "hv3tNd_6qB4": {"duration": 121.627, "subset": "training", "resolution": "180x240", "url": "https://www.youtube.com/watch?v=hv3tNd_6qB4", "annotations": [{"segment": [0.01, 4.364119838262918], "label": "Mooping floor"}, {"segment": [10.246194402878157, 106.4465747925117], "label": "Mooping floor"}]}, "QEaQzeB94N4": {"duration": 111.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QEaQzeB94N4", "annotations": [{"segment": [3.056171532575589, 105.2196187051482], "label": "Playing violin"}]}, "sV8nkrbJIKE": {"duration": 191.542, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=sV8nkrbJIKE", "annotations": [{"segment": [31.375737315206894, 191.5414058956916], "label": "Making a cake"}]}, "uCdUm-lou7w": {"duration": 13.166, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uCdUm-lou7w", "annotations": [{"segment": [0.2, 11.87959438377535], "label": "Using the monkey bar"}]}, "64JprLAtnzg": {"duration": 15.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=64JprLAtnzg", "annotations": [{"segment": [0, 9.59714352574103], "label": "Using the balance beam"}]}, "KiKZEKwn4Aw": {"duration": 207.006, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KiKZEKwn4Aw", "annotations": [{"segment": [17.540684461894525, 22.05483170488518], "label": "Skiing"}, {"segment": [23.02214897124032, 43], "label": "Skiing"}, {"segment": [44.9480070086235, 72.67776864413752], "label": "Skiing"}, {"segment": [84.9304540179693, 103.63192116750201], "label": "Skiing"}, {"segment": [104.59923843385714, 112.98265474226835], "label": "Skiing"}, {"segment": [117.49680198525901, 132.00656098058613], "label": "Skiing"}, {"segment": [138.45534275628705, 139.74509911142724], "label": "Skiing"}, {"segment": [140.38997728899733, 143.93680726563284], "label": "Skiing"}, {"segment": [145.5490027095581, 149.7407108637637], "label": "Skiing"}, {"segment": [153.2875408403992, 155.54461446189453], "label": "Skiing"}, {"segment": [158.44656626095997, 160.38120079367025], "label": "Skiing"}, {"segment": [166.82998256937117, 178.43778976563286], "label": "Skiing"}]}, "8eDJXDetgGE": {"duration": 221.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8eDJXDetgGE", "annotations": [{"segment": [7.260271896590149, 31.461178218557315], "label": "Playing field hockey"}, {"segment": [35.264177783437866, 80.90017256200451], "label": "Playing field hockey"}, {"segment": [82.62880872785931, 137.94516603521282], "label": "Playing field hockey"}, {"segment": [139.32807496789667, 199.13888499219968], "label": "Playing field hockey"}]}, "JLqjNaSEpV0": {"duration": 230.09, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JLqjNaSEpV0", "annotations": []}, "gSOUhz7yQ-s": {"duration": 70.36, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=gSOUhz7yQ-s", "annotations": [{"segment": [6.636123244929798, 67.29358034321373], "label": "Cleaning windows"}]}, "AF79fdkjLQw": {"duration": 155.64, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AF79fdkjLQw", "annotations": []}, "cKFTf1vZQOw": {"duration": 127.733, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cKFTf1vZQOw", "annotations": [{"segment": [25.497360374414978, 110.75415912636505], "label": "Rollerblading"}]}, "2O-TyeSMueg": {"duration": 221.356, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2O-TyeSMueg", "annotations": [{"segment": [34.86958034321373, 75.26305460218408], "label": "Scuba diving"}, {"segment": [80.78694851794071, 88.38230265210608], "label": "Scuba diving"}, {"segment": [104.60874102964118, 117.72798907956317], "label": "Scuba diving"}, {"segment": [106.33495787831512, 126.01382995319813], "label": "Scuba diving"}, {"segment": [139.8235647425897, 146.38318876755068], "label": "Scuba diving"}, {"segment": [172.63728549141965, 180.92387519500778], "label": "Scuba diving"}]}, "OmzUF102S5w": {"duration": 93.298, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OmzUF102S5w", "annotations": []}, "IaRYt0J7pHI": {"duration": 193.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IaRYt0J7pHI", "annotations": [{"segment": [15.111082810659365, 38.684371995287975], "label": "Brushing hair"}, {"segment": [48.65768665032315, 60.74655289885064], "label": "Brushing hair"}, {"segment": [72.53319749116496, 105.77757967461555], "label": "Brushing hair"}, {"segment": [111.51979114266611, 182.54187881435254], "label": "Brushing hair"}]}, "4NPgcPSwdMU": {"duration": 30.256, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=4NPgcPSwdMU", "annotations": []}, "5j5_YV25cFA": {"duration": 121.88, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5j5_YV25cFA", "annotations": [{"segment": [9.126854789391576, 80.62055063962558], "label": "Playing squash"}]}, "8jj-mhVrlGA": {"duration": 71.87, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=8jj-mhVrlGA", "annotations": [{"segment": [37.32928089781591, 68.78213826599064], "label": "Layup drill in basketball"}]}, "XJmBiSBx7Ss": {"duration": 114.266, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XJmBiSBx7Ss", "annotations": [{"segment": [9.447818365054601, 90.91296917316693], "label": "Trimming branches or hedges"}, {"segment": [94.12166220280811, 105.70860925429017], "label": "Trimming branches or hedges"}]}, "pspJX3Lyys0": {"duration": 96.85, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pspJX3Lyys0", "annotations": [{"segment": [3.5506796530364615, 86.52385299999999], "label": "Discus throw"}]}, "kliGNOFLJSk": {"duration": 163.515, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kliGNOFLJSk", "annotations": [{"segment": [83.79809098595943, 92.98144342277692], "label": "Capoeira"}]}, "S_hFs1vb3C4": {"duration": 40.403, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=S_hFs1vb3C4", "annotations": []}, "osQBvw8UzIg": {"duration": 46.115, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=osQBvw8UzIg", "annotations": []}, "MTC0fs4QyEY": {"duration": 225.26, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=MTC0fs4QyEY", "annotations": [{"segment": [19.850803432137287, 206.41322152886116], "label": "Tai chi"}]}, "O1XzCrHZm34": {"duration": 34.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O1XzCrHZm34", "annotations": [{"segment": [6.609172790000001, 18.0085708], "label": "Doing kickboxing"}]}, "NKQehrewXlM": {"duration": 142.548, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NKQehrewXlM", "annotations": [{"segment": [6.665569422776911, 16.663923556942276], "label": "Running a marathon"}, {"segment": [44.881500780031196, 67.3222511700468], "label": "Running a marathon"}, {"segment": [70.87722152886116, 81.09776131045241], "label": "Running a marathon"}, {"segment": [88.20770202808112, 92.65141497659906], "label": "Running a marathon"}, {"segment": [111.3150093603744, 121.09117784711388], "label": "Running a marathon"}, {"segment": [126.4236333853354, 134.2001310452418], "label": "Running a marathon"}]}, "sdq5E1nLX5A": {"duration": 104.68, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sdq5E1nLX5A", "annotations": []}, "WVkOtMX9Gg8": {"duration": 199.53, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WVkOtMX9Gg8", "annotations": [{"segment": [35.48556040200791, 47.002803690378904], "label": "Cricket"}, {"segment": [57.8974932874866, 65.67941442827781], "label": "Cricket"}, {"segment": [159.0624681177723, 160.61885230577224], "label": "Cricket"}, {"segment": [164.6654512979719, 166.84438921684867], "label": "Cricket"}]}, "fEsTL9tYOVc": {"duration": 213.508, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fEsTL9tYOVc", "annotations": [{"segment": [34.90857476635514, 200.16124922118382], "label": "Scuba diving"}]}, "FjfWqx-NL_w": {"duration": 100.287, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=FjfWqx-NL_w", "annotations": [{"segment": [0.19999999999999998, 94.51619105862278], "label": "Rollerblading"}]}, "IDuoF51IMJ8": {"duration": 17.14, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IDuoF51IMJ8", "annotations": []}, "N2fTZV7pnEw": {"duration": 209.978, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N2fTZV7pnEw", "annotations": [{"segment": [0.01, 208.99531311794073], "label": "Snowboarding"}]}, "_JW114l6IG8": {"duration": 190.311, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_JW114l6IG8", "annotations": []}, "cxxiokwoVgg": {"duration": 199.413, "subset": "training", "resolution": "1267x720", "url": "https://www.youtube.com/watch?v=cxxiokwoVgg", "annotations": [{"segment": [35.153924600521435, 181.68045988234087], "label": "Snow tubing"}]}, "0QjoAi4b1Lw": {"duration": 128.85, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0QjoAi4b1Lw", "annotations": []}, "2fp9Ni7Ms6A": {"duration": 45.976, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2fp9Ni7Ms6A", "annotations": [{"segment": [10.256627035881435, 41.31340680187208], "label": "Grooming dog"}]}, "Ly70Hwy52WA": {"duration": 203.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ly70Hwy52WA", "annotations": [{"segment": [49.752067586739464, 57.35747919235569], "label": "Ping-pong"}, {"segment": [118.5176641875195, 163.1994573705148], "label": "Ping-pong"}, {"segment": [182.5298785347894, 186.3325843375975], "label": "Ping-pong"}]}, "6lyXvR5VtTQ": {"duration": 80.899, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6lyXvR5VtTQ", "annotations": [{"segment": [21.532415887850462, 80.64099999999999], "label": "Hitting a pinata"}]}, "H3pQJc8d8H8": {"duration": 160.056, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H3pQJc8d8H8", "annotations": [{"segment": [22.722337617314217, 139.8297699527029], "label": "Capoeira"}]}, "2Sev8z4P7pE": {"duration": 45.279, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2Sev8z4P7pE", "annotations": [{"segment": [3.49364976599064, 4.2700163806552265], "label": "Raking leaves"}, {"segment": [13.021785491419656, 15.492042901716069], "label": "Raking leaves"}]}, "yirVJRiG4mI": {"duration": 44.815, "subset": "testing", "resolution": "603x480", "url": "https://www.youtube.com/watch?v=yirVJRiG4mI", "annotations": []}, "boqZwv68BiM": {"duration": 21.154, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=boqZwv68BiM", "annotations": [{"segment": [2.967384910490235, 21.02829811422274], "label": "Installing carpet"}]}, "fgeW0L2acbI": {"duration": 106.743, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fgeW0L2acbI", "annotations": [{"segment": [52.91450546021841, 106.641], "label": "Surfing"}]}, "RgEyUF4POcE": {"duration": 45.744, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RgEyUF4POcE", "annotations": []}, "CEEJ1gzdmAk": {"duration": 171.55, "subset": "training", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=CEEJ1gzdmAk", "annotations": [{"segment": [0, 171.501], "label": "Playing ice hockey"}]}, "E-M2Cq0RNTs": {"duration": 142.896, "subset": "validation", "resolution": "853x470", "url": "https://www.youtube.com/watch?v=E-M2Cq0RNTs", "annotations": [{"segment": [13.577307874343923, 113.7377757998319], "label": "Hand car wash"}]}, "zm5n_kHOQcA": {"duration": 128.43, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zm5n_kHOQcA", "annotations": []}, "Y82dz6aTwJ0": {"duration": 37.013, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y82dz6aTwJ0", "annotations": [{"segment": [0.009999999999999998, 36.966], "label": "Putting on shoes"}]}, "bRXOj9Jm1_E": {"duration": 188.5, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bRXOj9Jm1_E", "annotations": [{"segment": [12.056882811366876, 58.22592186952785], "label": "Surfing"}, {"segment": [60.87255468177911, 75.2819999929249], "label": "Surfing"}, {"segment": [79.1049140550656, 86.75074217934704], "label": "Surfing"}, {"segment": [93.51435936621138, 132.03757030009092], "label": "Surfing"}, {"segment": [136.44862498717637, 146.4470146053042], "label": "Surfing"}, {"segment": [152.03435050390013, 163.20902230109203], "label": "Surfing"}, {"segment": [165.56158478471139, 176.44218627145085], "label": "Surfing"}]}, "IU6LVYI0FZM": {"duration": 62.509, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IU6LVYI0FZM", "annotations": [{"segment": [6.707714052961776, 57.84188205090227], "label": "Capoeira"}]}, "60yOoxs9AFc": {"duration": 74.583, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=60yOoxs9AFc", "annotations": [{"segment": [0.2, 70.74881345327103], "label": "Playing congas"}]}, "IxSk2VZeLeY": {"duration": 235.87, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IxSk2VZeLeY", "annotations": []}, "LDjomH0-hYA": {"duration": 189.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LDjomH0-hYA", "annotations": [{"segment": [12.242863559999998, 182.15983295319813], "label": "Belly dance"}]}, "-yn3D1fOfr0": {"duration": 80.736, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-yn3D1fOfr0", "annotations": [{"segment": [19.48708113309476, 60.16887502016748], "label": "Decorating the Christmas tree"}]}, "1px5LTVIR3A": {"duration": 224.282, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1px5LTVIR3A", "annotations": []}, "6yVhTyPaaLQ": {"duration": 160.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6yVhTyPaaLQ", "annotations": [{"segment": [0, 160.4266439909297], "label": "Painting"}]}, "1JeK3U_nUUc": {"duration": 141.967, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1JeK3U_nUUc", "annotations": [{"segment": [0.01, 141.921], "label": "Beach soccer"}]}, "MRxC-Ygp4go": {"duration": 127.316, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=MRxC-Ygp4go", "annotations": [{"segment": [26.416276372854917, 119.96564608424337], "label": "Surfing"}]}, "1RYEOaCeV4k": {"duration": 174.963, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1RYEOaCeV4k", "annotations": [{"segment": [45.98543649033639, 149.38465213574852], "label": "Cutting the grass"}]}, "W19-fYOO9sg": {"duration": 98.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W19-fYOO9sg", "annotations": [{"segment": [13.1236365, 80.0442695], "label": "Fixing bicycle"}]}, "Zk3JXtS2fOY": {"duration": 189.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zk3JXtS2fOY", "annotations": [{"segment": [37.757204368174726, 57.52074102964119], "label": "Doing fencing"}, {"segment": [70.79475819032761, 74.92445241809672], "label": "Doing fencing"}, {"segment": [84.06877535101404, 93.50807644305772], "label": "Doing fencing"}, {"segment": [105.89715912636505, 110.02685335413415], "label": "Doing fencing"}, {"segment": [115.04148205928237, 120.94104524180966], "label": "Doing fencing"}, {"segment": [125.07073946957877, 131.8691840873635], "label": "Doing fencing"}, {"segment": [144.84959594383776, 150.1597644305772], "label": "Doing fencing"}, {"segment": [163.1401762870515, 166.38527925117003], "label": "Doing fencing"}]}, "24_3cq6pPsI": {"duration": 215.366, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=24_3cq6pPsI", "annotations": []}, "YiMOKwzEf1k": {"duration": 18.205, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=YiMOKwzEf1k", "annotations": [{"segment": [0.5964014560582424, 17.466041255850236], "label": "Cutting the grass"}]}, "gC4B1Az2Mk0": {"duration": 96.921, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gC4B1Az2Mk0", "annotations": [{"segment": [0.2, 33.8624399375975], "label": "Having an ice cream"}, {"segment": [40.66516224648986, 95.54045553822152], "label": "Having an ice cream"}]}, "41LaEr0i2Dc": {"duration": 176.145, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=41LaEr0i2Dc", "annotations": [{"segment": [29.863927009275717, 75.6187573644349], "label": "Wakeboarding"}, {"segment": [84.93411204752118, 101.09899223287684], "label": "Wakeboarding"}, {"segment": [109.86638487578158, 112.88017609678009], "label": "Wakeboarding"}, {"segment": [130.96292342277113, 164.38860608376714], "label": "Wakeboarding"}, {"segment": [172.0600745627718, 173.42997964830835], "label": "Wakeboarding"}]}, "0OxOI3sAIrM": {"duration": 80.551, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0OxOI3sAIrM", "annotations": [{"segment": [26.131422056734817, 63.64615091964711], "label": "Doing crunches"}]}, "N9Zo5lRPHdw": {"duration": 57.122, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=N9Zo5lRPHdw", "annotations": []}, "hZRgO5Kd-rE": {"duration": 170.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hZRgO5Kd-rE", "annotations": [{"segment": [27.194508155836438, 135.9725407791822], "label": "Making a sandwich"}]}, "twrPZghmNtA": {"duration": 44.861, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=twrPZghmNtA", "annotations": [{"segment": [0.20000000000000004, 44.601], "label": "Beach soccer"}]}, "kFP91VjB1AI": {"duration": 82.896, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=kFP91VjB1AI", "annotations": [{"segment": [0.01, 74.4249648194519], "label": "Beer pong"}]}, "3OGD7if0zs8": {"duration": 95.156, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3OGD7if0zs8", "annotations": [{"segment": [9.797591475819033, 18.110699394695786], "label": "Baking cookies"}, {"segment": [32.807086608424335, 50.47244093603744], "label": "Baking cookies"}, {"segment": [52.55071791575662, 53.14451133853354], "label": "Baking cookies"}, {"segment": [72.44279757878314, 75.70866140405616], "label": "Baking cookies"}, {"segment": [78.0838350951638, 84.76401110140405], "label": "Baking cookies"}]}, "qeyCTRqPCN4": {"duration": 133.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qeyCTRqPCN4", "annotations": [{"segment": [20.61747484300226, 36.86154843511407], "label": "Bullfighting"}, {"segment": [43.31752640120978, 46.02487199989508], "label": "Bullfighting"}, {"segment": [50.39827642854057, 112.66722397154517], "label": "Bullfighting"}]}, "_HMwzNA9DNY": {"duration": 100.13, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_HMwzNA9DNY", "annotations": [{"segment": [27.335066341653665, 91.68962252886115], "label": "Archery"}]}, "B2_qciB7U1Q": {"duration": 184.32, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B2_qciB7U1Q", "annotations": [{"segment": [0.19999999999999998, 163.51071606864275], "label": "Longboarding"}]}, "y_zFbT7xXKI": {"duration": 169.545, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=y_zFbT7xXKI", "annotations": [{"segment": [17.427392523364485, 157.63868691588786], "label": "Rope skipping"}]}, "38qpTblXDTw": {"duration": 15.953, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=38qpTblXDTw", "annotations": [{"segment": [0.004961699716998111, 15.783407559748104], "label": "Cutting the grass"}]}, "F3iZD7tm8Io": {"duration": 74.86, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=F3iZD7tm8Io", "annotations": [{"segment": [10.978079106837741, 13.547416770140194], "label": "Pole vault"}, {"segment": [19.85397285279166, 74.66115646258503], "label": "Pole vault"}, {"segment": [65.75168656542179, 74.861156], "label": "Pole vault"}]}, "leKf6Q87IYg": {"duration": 176.774, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=leKf6Q87IYg", "annotations": [{"segment": [27.9202315855102, 168.07208114288827], "label": "Wakeboarding"}]}, "l7QlSS_li6M": {"duration": 21.15, "subset": "training", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=l7QlSS_li6M", "annotations": [{"segment": [2.7390490340702063, 10.758192591649244], "label": "Tumbling"}, {"segment": [13.101234536456287, 20.953378684807255], "label": "Tumbling"}]}, "vjVtKL3xd8w": {"duration": 141.53, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vjVtKL3xd8w", "annotations": [{"segment": [91.61141185647425, 109.71294383775351], "label": "Removing curlers"}]}, "XBbSLB3Ov0w": {"duration": 120.187, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XBbSLB3Ov0w", "annotations": [{"segment": [4.872302652106084, 110.01991731669267], "label": "Rafting"}]}, "IQ4SUx8ythk": {"duration": 60.117, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IQ4SUx8ythk", "annotations": [{"segment": [25.79099340873635, 28.698341756630267], "label": "Applying sunscreen"}, {"segment": [35.26332189703588, 40.6090914399376], "label": "Applying sunscreen"}, {"segment": [43.32886892667707, 49.04978019188768], "label": "Applying sunscreen"}]}, "y7i-jRmrwnI": {"duration": 82.08, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=y7i-jRmrwnI", "annotations": [{"segment": [7.171017500156007, 81.5703241462992], "label": "Washing dishes"}]}, "Fde_qSwXRzY": {"duration": 97.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fde_qSwXRzY", "annotations": [{"segment": [46.176240421216846, 89.45694101404055], "label": "Starting a campfire"}]}, "g_bb4RSu6TQ": {"duration": 2.3, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=g_bb4RSu6TQ", "annotations": [{"segment": [0.01, 2.223464505460218], "label": "Using parallel bars"}]}, "085A7Iv6xzA": {"duration": 160.96, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=085A7Iv6xzA", "annotations": [{"segment": [38.7848829149766, 133.5066624], "label": "Kayaking"}]}, "5JlwYD_GChY": {"duration": 78.136, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5JlwYD_GChY", "annotations": [{"segment": [4.064946769507697, 47.14851235829274], "label": "Beach soccer"}, {"segment": [65.16085051405598, 71.73291984115879], "label": "Beach soccer"}]}, "BBRNbo8c8gA": {"duration": 130.61, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BBRNbo8c8gA", "annotations": [{"segment": [13.448369840873637, 110.8471695975039], "label": "Tumbling"}]}, "-c-vnHG9Yho": {"duration": 161.29, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-c-vnHG9Yho", "annotations": []}, "lj-VovhJcPA": {"duration": 213.02, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lj-VovhJcPA", "annotations": [{"segment": [96.01517784711389, 101.99536193447739], "label": "Walking the dog"}, {"segment": [153.50580655226207, 157.16070670826835], "label": "Walking the dog"}]}, "2ttzbjuKoT0": {"duration": 80.086, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=2ttzbjuKoT0", "annotations": []}, "5c1jHhHUwPI": {"duration": 42.17, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5c1jHhHUwPI", "annotations": [{"segment": [0.01, 10.525373354134166], "label": "Cricket"}]}, "KGi_eGXrW6E": {"duration": 86.45, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KGi_eGXrW6E", "annotations": []}, "6X5ZOKDZrCI": {"duration": 225.025, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6X5ZOKDZrCI", "annotations": []}, "TnYCazY5WEM": {"duration": 13.491, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TnYCazY5WEM", "annotations": [{"segment": [0.2, 13.027769497659907], "label": "Playing beach volleyball"}]}, "Fv1qhPABYk0": {"duration": 18.321, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Fv1qhPABYk0", "annotations": [{"segment": [0.01, 18.301], "label": "Tug of war"}]}, "lSTqYESahrY": {"duration": 106.788, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lSTqYESahrY", "annotations": [{"segment": [79.43129017160686, 90.60527925117005], "label": "Putting on shoes"}]}, "mB0MeZjjCmU": {"duration": 214.367, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mB0MeZjjCmU", "annotations": [{"segment": [18.058945525167946, 57.521085746831226], "label": "Making a cake"}, {"segment": [72.90463193493726, 92.97012696290163], "label": "Making a cake"}, {"segment": [101.66517480835287, 165.20590838377532], "label": "Making a cake"}]}, "mw9WwLwcptY": {"duration": 122.091, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mw9WwLwcptY", "annotations": []}, "cGEhbEHCQyE": {"duration": 235.822, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cGEhbEHCQyE", "annotations": [{"segment": [0.01, 235.82185941043085], "label": "Playing ice hockey"}]}, "vVgvNxSSJao": {"duration": 86.82, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vVgvNxSSJao", "annotations": []}, "Qv0fN5A1vp4": {"duration": 28.49, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=Qv0fN5A1vp4", "annotations": [{"segment": [0.01, 8.222797745709828], "label": "Removing curlers"}]}, "gCHo9vxfEzM": {"duration": 163.93, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gCHo9vxfEzM", "annotations": []}, "vb1CG5i1lEs": {"duration": 183.043, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vb1CG5i1lEs", "annotations": [{"segment": [5.700342679127726, 179.01032398753895], "label": "Painting fence"}]}, "2lUqeOw61QY": {"duration": 80.18, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2lUqeOw61QY", "annotations": [{"segment": [23.336608679999998, 71.90158736349454], "label": "Layup drill in basketball"}, {"segment": [8.058475039001559, 23.175924336973477], "label": "Layup drill in basketball"}]}, "k67UWfSt-iE": {"duration": 86.309, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=k67UWfSt-iE", "annotations": [{"segment": [4.847283262759082, 68.93913859594385], "label": "Playing beach volleyball"}]}, "9QnASEwOBT4": {"duration": 99.06, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=9QnASEwOBT4", "annotations": [{"segment": [51.6758143525741, 63.26231981279251], "label": "Snatch"}, {"segment": [79.79240093603744, 90.76095943837753], "label": "Snatch"}]}, "FA4BDINxLmE": {"duration": 166.325, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FA4BDINxLmE", "annotations": [{"segment": [0.19999999999999998, 15.56859923376527], "label": "Slacklining"}, {"segment": [18.94179573441441, 33.731965006491414], "label": "Slacklining"}, {"segment": [86.66520240129333, 118.5808300124805], "label": "Slacklining"}, {"segment": [131.03570931357254, 137.78210226833073], "label": "Slacklining"}, {"segment": [150.2369815694228, 152.57227143837753], "label": "Slacklining"}]}, "2wcD0wSzB5w": {"duration": 150.094, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2wcD0wSzB5w", "annotations": [{"segment": [0.01, 150.041], "label": "Disc dog"}]}, "lq-8Y-YLcNI": {"duration": 44.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lq-8Y-YLcNI", "annotations": [{"segment": [12.699552539952942, 31.475019312538613], "label": "Rock climbing"}]}, "QZyR7yfp8hA": {"duration": 124.79, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QZyR7yfp8hA", "annotations": [{"segment": [15.95533229329173, 19.846876755070202], "label": "Playing polo"}, {"segment": [89.50552262090484, 105.26627769110765], "label": "Playing polo"}]}, "0w2XFd-Q9Eg": {"duration": 18.576, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0w2XFd-Q9Eg", "annotations": [{"segment": [3.7673559906396257, 16.8951426349454], "label": "Playing rubik cube"}]}, "BsSZDCHPjtY": {"duration": 51.06, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BsSZDCHPjtY", "annotations": [{"segment": [0.01, 51.001], "label": "River tubing"}]}, "KgONRjXP8_Y": {"duration": 24.753, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=KgONRjXP8_Y", "annotations": [{"segment": [0.46194286517462124, 22.519742725205724], "label": "Painting furniture"}]}, "k_gAGeXhmHo": {"duration": 179.978, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k_gAGeXhmHo", "annotations": [{"segment": [0.01, 22.176815912636506], "label": "Rock-paper-scissors"}]}, "vbnuIUgUVXA": {"duration": 139.134, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vbnuIUgUVXA", "annotations": [{"segment": [46.68139673536073, 112.78086703909905], "label": "Building sandcastles"}]}, "JHYMG87h3XI": {"duration": 76.348, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=JHYMG87h3XI", "annotations": [{"segment": [0.01, 76.301], "label": "Waterskiing"}]}, "A6CBFa22oJc": {"duration": 112.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A6CBFa22oJc", "annotations": [{"segment": [27.08598127925117, 41.691664586583464], "label": "Walking the dog"}]}, "iZImXrduFlA": {"duration": 133.167, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iZImXrduFlA", "annotations": [{"segment": [0.2, 132.31016692667706], "label": "Blowing leaves"}]}, "NfjeG6AsoJA": {"duration": 55.101, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NfjeG6AsoJA", "annotations": [{"segment": [25.45083628407404, 47.30256552942692], "label": "Fun sliding down"}]}, "3URkGUH1-vk": {"duration": 98.546, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3URkGUH1-vk", "annotations": []}, "X3hQtXddMd4": {"duration": 227.115, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=X3hQtXddMd4", "annotations": [{"segment": [9.564528861154447, 216.44174570982838], "label": "Wakeboarding"}]}, "BUe-oRdwBFw": {"duration": 70.06, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BUe-oRdwBFw", "annotations": []}, "K5mrupCfD1w": {"duration": 123.07, "subset": "testing", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=K5mrupCfD1w", "annotations": []}, "jzwdcO34AoY": {"duration": 222.45, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jzwdcO34AoY", "annotations": []}, "6j-H-tIjJvA": {"duration": 230.714, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6j-H-tIjJvA", "annotations": [{"segment": [25.194918653889015, 40.671797255563696], "label": "Beach soccer"}, {"segment": [43.91114393963514, 115.17677098920693], "label": "Beach soccer"}, {"segment": [119.85582731064345, 160.52762456620715], "label": "Beach soccer"}, {"segment": [165.92653570632623, 193.64094622560413], "label": "Beach soccer"}]}, "jqLFT1c8WS8": {"duration": 187.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jqLFT1c8WS8", "annotations": [{"segment": [3.942589703588143, 172.74383775351015], "label": "Discus throw"}]}, "--mFXNrRZ5E": {"duration": 97.803, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=--mFXNrRZ5E", "annotations": [{"segment": [3.659569422776911, 96.23586739469579], "label": "Doing fencing"}]}, "EolA3Rd_Vm4": {"duration": 66.32, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EolA3Rd_Vm4", "annotations": [{"segment": [2.4829774726989084, 23.795200780031205], "label": "Shaving legs"}, {"segment": [26.485093042121687, 28.968070514820596], "label": "Shaving legs"}, {"segment": [36.10663074882996, 39.52072477379095], "label": "Shaving legs"}, {"segment": [42.93481879875195, 64.45395689547583], "label": "Shaving legs"}]}, "XX2sXEmR4BE": {"duration": 23.45, "subset": "validation", "resolution": "526x360", "url": "https://www.youtube.com/watch?v=XX2sXEmR4BE", "annotations": [{"segment": [0.01, 19.28125527457098], "label": "Spinning"}]}, "s3OxuR2zsZM": {"duration": 112.014, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s3OxuR2zsZM", "annotations": [{"segment": [40.87917940717629, 111.981], "label": "Cutting the grass"}]}, "J3TwVc0v89A": {"duration": 204.48, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=J3TwVc0v89A", "annotations": [{"segment": [29.02838202340094, 73.6874312901716], "label": "Washing dishes"}, {"segment": [82.30024793447738, 159.17761131513262], "label": "Washing dishes"}]}, "ZEEUm7RwSRQ": {"duration": 193.98, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZEEUm7RwSRQ", "annotations": []}, "5GZNSTv1rVs": {"duration": 118.817, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=5GZNSTv1rVs", "annotations": [{"segment": [24.048177570093458, 111.17657476635515], "label": "Ice fishing"}]}, "X095EDl0-eU": {"duration": 59.862, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=X095EDl0-eU", "annotations": [{"segment": [0.2, 48.808709828393134], "label": "Doing fencing"}]}, "ziJaskA3588": {"duration": 190.056, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ziJaskA3588", "annotations": [{"segment": [0.19999999999999998, 122.71515444617785], "label": "Waterskiing"}, {"segment": [127.45776911076443, 180.51577067082684], "label": "Waterskiing"}]}, "WWYXTYht4k0": {"duration": 18.46, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WWYXTYht4k0", "annotations": []}, "sI7qq85QaA0": {"duration": 78.28, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sI7qq85QaA0", "annotations": [{"segment": [4.666319850352136, 70.62657858243936], "label": "Ping-pong"}]}, "n3MGZcDHr-U": {"duration": 195.095, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n3MGZcDHr-U", "annotations": [{"segment": [84.61164811218299, 160.09254186895475], "label": "Blow-drying hair"}]}, "1dJy1bMOvuU": {"duration": 56.355, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1dJy1bMOvuU", "annotations": []}, "TkNhbOYp9wc": {"duration": 122.88, "subset": "testing", "resolution": "624x360", "url": "https://www.youtube.com/watch?v=TkNhbOYp9wc", "annotations": []}, "ncgzVLi_hlI": {"duration": 235.056, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ncgzVLi_hlI", "annotations": [{"segment": [5.500521082067773, 88.37503871855554], "label": "Beach soccer"}]}, "A-oC3ReQiuQ": {"duration": 88.33, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=A-oC3ReQiuQ", "annotations": []}, "RhSNiFS9pR4": {"duration": 29.58, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RhSNiFS9pR4", "annotations": []}, "HGk9BqOuxCE": {"duration": 190.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HGk9BqOuxCE", "annotations": [{"segment": [0, 190.89120181405895], "label": "Playing saxophone"}]}, "SbY68ygmuEA": {"duration": 119.049, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SbY68ygmuEA", "annotations": [{"segment": [0.01, 119.0487074829932], "label": "Running a marathon"}]}, "-ySxFjhhK4Y": {"duration": 152.37, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-ySxFjhhK4Y", "annotations": [{"segment": [27.79909048361934, 38.966246489859586], "label": "Shot put"}, {"segment": [63.43895007800312, 73.65570982839313], "label": "Shot put"}]}, "J7LhH4IKvnM": {"duration": 102.49000000000001, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=J7LhH4IKvnM", "annotations": [{"segment": [0, 84.71814352574103], "label": "Bathing dog"}]}, "IN4nGNF9gi8": {"duration": 108.02000000000001, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IN4nGNF9gi8", "annotations": [{"segment": [38.744177728988575, 78.28373522120042], "label": "Getting a haircut"}]}, "gDhfInESf_E": {"duration": 104.86, "subset": "training", "resolution": "160x112", "url": "https://www.youtube.com/watch?v=gDhfInESf_E", "annotations": [{"segment": [3.105833073322933, 102.4924914196568], "label": "Brushing hair"}]}, "eS445rQ9SBo": {"duration": 120.09, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=eS445rQ9SBo", "annotations": [{"segment": [4.121777409871905, 93.48940467394696], "label": "Ironing clothes"}]}, "jA05XIX7Yh0": {"duration": 106.161, "subset": "validation", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=jA05XIX7Yh0", "annotations": [{"segment": [16.436800199671605, 101.10120234297379], "label": "Surfing"}]}, "fDPNV463JuE": {"duration": 177.91, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fDPNV463JuE", "annotations": [{"segment": [73.8290231037813, 174.5806584071763], "label": "Paintball"}]}, "Vk-aKj0R65o": {"duration": 180.071, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Vk-aKj0R65o", "annotations": [{"segment": [0, 180.041], "label": "Doing fencing"}]}, "5ObgpPDmVPA": {"duration": 186.085, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5ObgpPDmVPA", "annotations": [{"segment": [26.40291887675507, 71.6650655226209], "label": "Scuba diving"}]}, "MN0YWduOEHk": {"duration": 234.708, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MN0YWduOEHk", "annotations": [{"segment": [0.01, 234.661], "label": "Tug of war"}]}, "9xtYwXpaiZ0": {"duration": 121.02, "subset": "training", "resolution": "342x240", "url": "https://www.youtube.com/watch?v=9xtYwXpaiZ0", "annotations": [{"segment": [14.568797921760059, 33.04546147316193], "label": "Polishing shoes"}, {"segment": [45.48892876288156, 92.05766240774136], "label": "Polishing shoes"}]}, "T3DSDybuBsU": {"duration": 85.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T3DSDybuBsU", "annotations": [{"segment": [3.2708837753510136, 5.139960218408736], "label": "Archery"}]}, "FujUrbL1qc0": {"duration": 161.054, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FujUrbL1qc0", "annotations": [{"segment": [36.4502344787918, 95.54969948656785], "label": "Doing crunches"}]}, "kuv1yEeNQzQ": {"duration": 206.983, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kuv1yEeNQzQ", "annotations": [{"segment": [4.843577743109725, 193.09729821840875], "label": "Beach soccer"}]}, "KiE-fFIz5ic": {"duration": 214.76, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KiE-fFIz5ic", "annotations": [{"segment": [26.13073010920437, 164.82460530421218], "label": "Pole vault"}]}, "MAYrBia9Xnw": {"duration": 22.66, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=MAYrBia9Xnw", "annotations": []}, "kAQML4pRtck": {"duration": 6.13, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=kAQML4pRtck", "annotations": [{"segment": [0.28460218408736354, 5.919725429017161], "label": "Hammer throw"}]}, "jN6ljpARJaw": {"duration": 60.024, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jN6ljpARJaw", "annotations": []}, "vDaDT1vUOfI": {"duration": 173.709, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vDaDT1vUOfI", "annotations": [{"segment": [26.47500538185077, 168.03526028111477], "label": "Swimming"}]}, "Ye_uVlSo1HI": {"duration": 111.967, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ye_uVlSo1HI", "annotations": []}, "pfARuqcw3Q8": {"duration": 141.22, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pfARuqcw3Q8", "annotations": []}, "dskswWtslKc": {"duration": 56.68, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=dskswWtslKc", "annotations": []}, "bSBmunE-1KQ": {"duration": 116.704, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=bSBmunE-1KQ", "annotations": [{"segment": [23.30428525429017, 60.263425149765986], "label": "Rafting"}]}, "57cM1GcKktw": {"duration": 134.05, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=57cM1GcKktw", "annotations": [{"segment": [2.8207488229329174, 133.41097210686428], "label": "Playing bagpipes"}]}, "Cjq9wf9XnX0": {"duration": 33.95, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Cjq9wf9XnX0", "annotations": [{"segment": [9.313722308892356, 19.579984399375974], "label": "Snatch"}]}, "aHKZIUD3wPI": {"duration": 28.445, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aHKZIUD3wPI", "annotations": [{"segment": [0.2, 27.67368535825545], "label": "Trimming branches or hedges"}]}, "X1WExPnfJjE": {"duration": 32.21, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=X1WExPnfJjE", "annotations": [{"segment": [0, 20.86528549141966], "label": "Brushing hair"}]}, "P8H4VrbYuTs": {"duration": 180.95, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P8H4VrbYuTs", "annotations": []}, "lEqYTrzaHsc": {"duration": 61.6, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lEqYTrzaHsc", "annotations": [{"segment": [4.901295804992199, 52.18438474726989], "label": "Sailing"}]}, "0cYB-WFrVxk": {"duration": 224.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0cYB-WFrVxk", "annotations": [{"segment": [0, 224.0493422], "label": "Waterskiing"}]}, "FnLY_cMSO88": {"duration": 23.73, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FnLY_cMSO88", "annotations": []}, "xtAMq9qWdX8": {"duration": 24.03, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xtAMq9qWdX8", "annotations": []}, "FGdCWm4gI3M": {"duration": 228.113, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FGdCWm4gI3M", "annotations": [{"segment": [7.827366614664587, 14.231575663026522], "label": "Longboarding"}, {"segment": [17.43368018720749, 54.07998751950078], "label": "Longboarding"}, {"segment": [66.88840561622465, 102.11155538221529], "label": "Longboarding"}, {"segment": [117.05470982839314, 198.1920702028081], "label": "Longboarding"}, {"segment": [201.39445553822154, 220.608767550702], "label": "Longboarding"}]}, "oobYvNJU5ko": {"duration": 70.311, "subset": "validation", "resolution": "206x360", "url": "https://www.youtube.com/watch?v=oobYvNJU5ko", "annotations": [{"segment": [21.58118096723869, 56.4178081123245], "label": "Blow-drying hair"}]}, "ehYH-NSx3jM": {"duration": 152.834, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ehYH-NSx3jM", "annotations": []}, "6q7LQZLC_N8": {"duration": 151.19, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6q7LQZLC_N8", "annotations": [{"segment": [0.01, 144.10931139625586], "label": "Sailing"}]}, "kl7qwEgYLZU": {"duration": 31.417, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kl7qwEgYLZU", "annotations": [{"segment": [0.5374527304117603, 9.576442507963877], "label": "Building sandcastles"}]}, "RpItv2ztook": {"duration": 223.585, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RpItv2ztook", "annotations": [{"segment": [5.232096957347681, 223.5849431], "label": "Skiing"}]}, "bVAUJAAg3TM": {"duration": 174.62, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bVAUJAAg3TM", "annotations": [{"segment": [0.01, 173.11999375975037], "label": "Long jump"}]}, "aoAytgRjR3w": {"duration": 150.93, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aoAytgRjR3w", "annotations": []}, "nHkVfEKklJA": {"duration": 100.775, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nHkVfEKklJA", "annotations": [{"segment": [8.833112052513266, 98.30583027460126], "label": "Playing drums"}]}, "YOLDcsX7EiA": {"duration": 58.468, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YOLDcsX7EiA", "annotations": [{"segment": [0.2, 50.12722308892356], "label": "Skiing"}]}, "8JgckTuL1WA": {"duration": 124.18, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=8JgckTuL1WA", "annotations": [{"segment": [0.01, 116.43089928861154], "label": "Playing badminton"}]}, "htfrXrKUIzg": {"duration": 128.105, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=htfrXrKUIzg", "annotations": [{"segment": [14.942142486854772, 122.12711125922634], "label": "Grooming dog"}]}, "3cQg4XOkC5Y": {"duration": 106.88, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=3cQg4XOkC5Y", "annotations": [{"segment": [6.502927535101405, 100.04503900156006], "label": "Cheerleading"}]}, "AgocNx3-De0": {"duration": 152.555, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AgocNx3-De0", "annotations": [{"segment": [12.851755866153015, 31.1774077493712], "label": "Rafting"}, {"segment": [52.12100990162056, 118.99773868954759], "label": "Rafting"}]}, "XyZoiuga6vw": {"duration": 49.58, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XyZoiuga6vw", "annotations": [{"segment": [2.702990101404056, 47.03202776443057], "label": "Using the pommel horse"}]}, "Tko7eefi1BI": {"duration": 62.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tko7eefi1BI", "annotations": [{"segment": [33.26647912012481, 61.640829061733896], "label": "Playing accordion"}]}, "XkTWEDKtP1A": {"duration": 46.74, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=XkTWEDKtP1A", "annotations": [{"segment": [5.104358065522621, 42.220333141965675], "label": "Long jump"}]}, "0dkIuFFlLRM": {"duration": 94.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0dkIuFFlLRM", "annotations": [{"segment": [20.223209048361934, 32.17999687987519], "label": "Spinning"}, {"segment": [44.43201404056162, 45.76054602184087], "label": "Spinning"}, {"segment": [57.56971918876755, 62.29338845553822], "label": "Spinning"}]}, "gVKgXyKh4BQ": {"duration": 6.5, "subset": "validation", "resolution": "1920x1440", "url": "https://www.youtube.com/watch?v=gVKgXyKh4BQ", "annotations": [{"segment": [1.8662901840873636, 6.085728861154446], "label": "Triple jump"}]}, "bqpG1zi2APg": {"duration": 120.535, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=bqpG1zi2APg", "annotations": []}, "1DmdX5QwqFI": {"duration": 121.65, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1DmdX5QwqFI", "annotations": [{"segment": [4.8790179899999995, 93.941379859276], "label": "Washing hands"}]}, "gdisMpHS668": {"duration": 27.098, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gdisMpHS668", "annotations": [{"segment": [0.01, 24.906270626006414], "label": "Decorating the Christmas tree"}]}, "naCGjbEz1T8": {"duration": 105.651, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=naCGjbEz1T8", "annotations": [{"segment": [3.790902053042122, 86.3666380780031], "label": "Making a lemonade"}]}, "3OcAjx8e4LU": {"duration": 187.478, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3OcAjx8e4LU", "annotations": [{"segment": [19.840853582554516, 170.9997289719626], "label": "River tubing"}]}, "YB5psJsxEJI": {"duration": 140.365, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YB5psJsxEJI", "annotations": []}, "ys3NrLuOxMs": {"duration": 54.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ys3NrLuOxMs", "annotations": [{"segment": [11.644315132605305, 17.302191887675505], "label": "Layup drill in basketball"}, {"segment": [20.76263026521061, 27.345903276131043], "label": "Layup drill in basketball"}, {"segment": [32.83196411856474, 34.09797815912636], "label": "Layup drill in basketball"}, {"segment": [41.94726521060842, 46.42051482059283], "label": "Layup drill in basketball"}]}, "P6z57_sYXm8": {"duration": 81.67, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=P6z57_sYXm8", "annotations": [{"segment": [1.09103104, 11.134977808112323], "label": "Windsurfing"}, {"segment": [11.791464959999999, 81.571666], "label": "Windsurfing"}]}, "-uR5-jYe0Ag": {"duration": 33.28, "subset": "validation", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=-uR5-jYe0Ag", "annotations": [{"segment": [3.9013074038320523, 28.98652531037411], "label": "Putting on makeup"}]}, "s2ra7HNzIF0": {"duration": 60.581, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s2ra7HNzIF0", "annotations": [{"segment": [8.201443925233644, 39.700562305295946], "label": "Having an ice cream"}]}, "g0upuaWM74M": {"duration": 86.17, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g0upuaWM74M", "annotations": [{"segment": [13.30851507529689, 68.6934454024961], "label": "Hand car wash"}]}, "zI6PsewSm7w": {"duration": 134.444, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zI6PsewSm7w", "annotations": [{"segment": [17.195822152886112, 120.79016536661466], "label": "Baton twirling"}]}, "m_BaRLpmkvc": {"duration": 183.647, "subset": "testing", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=m_BaRLpmkvc", "annotations": []}, "S-8ogZOLiWU": {"duration": 61.162, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=S-8ogZOLiWU", "annotations": [{"segment": [11.545280227535631, 31.296296024960995], "label": "Playing ice hockey"}]}, "OBb4013eIc8": {"duration": 163.167, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OBb4013eIc8", "annotations": [{"segment": [41.491667800312015, 161.893870758912], "label": "Wakeboarding"}]}, "x1Mb7cN8WgU": {"duration": 233.477, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x1Mb7cN8WgU", "annotations": [{"segment": [0.2, 232.02198172897195], "label": "Fixing the roof"}]}, "Y9xPzIiy6mI": {"duration": 49.575, "subset": "validation", "resolution": "426x228", "url": "https://www.youtube.com/watch?v=Y9xPzIiy6mI", "annotations": [{"segment": [18.935327613104523, 21.022078003120125], "label": "Sumo"}]}, "KZPFJiEHn6U": {"duration": 108.6, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=KZPFJiEHn6U", "annotations": []}, "xqYII12ta4E": {"duration": 154.25, "subset": "training", "resolution": "606x480", "url": "https://www.youtube.com/watch?v=xqYII12ta4E", "annotations": [{"segment": [2.8876784461778473, 153.28759751794072], "label": "Cheerleading"}]}, "t3zAh4NBVko": {"duration": 24.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t3zAh4NBVko", "annotations": [{"segment": [4.629445741029642, 19.983774115444618], "label": "Arm wrestling"}]}, "J3l1jj070N8": {"duration": 48.391, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J3l1jj070N8", "annotations": []}, "qi5Oie047pQ": {"duration": 31.02, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qi5Oie047pQ", "annotations": [{"segment": [7.319238689547582, 27.01376911076443], "label": "Doing nails"}]}, "iOgmO3MMeH4": {"duration": 104.89, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iOgmO3MMeH4", "annotations": [{"segment": [35.67045156908317, 54.32380662090485], "label": "Playing piano"}, {"segment": [69.54101751170047, 70.19552120592824], "label": "Playing piano"}]}, "nEv28s46Kvo": {"duration": 226.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nEv28s46Kvo", "annotations": [{"segment": [66.48403822152886, 75.6542503900156], "label": "Doing fencing"}, {"segment": [131.38092433697346, 160.6550631825273], "label": "Doing fencing"}, {"segment": [175.11578237129484, 212.85473244929796], "label": "Doing fencing"}]}, "aWVEB3Ovj_U": {"duration": 46.348, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aWVEB3Ovj_U", "annotations": []}, "4pSWTD4vAz0": {"duration": 169.135, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4pSWTD4vAz0", "annotations": [{"segment": [22.649479750778813, 133.26321806853582], "label": "Waterskiing"}]}, "sGGnEgCnEt8": {"duration": 203.523, "subset": "validation", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=sGGnEgCnEt8", "annotations": [{"segment": [16.510438267870853, 195.90269877847115], "label": "Arm wrestling"}]}, "UoO7VCPs8Jw": {"duration": 12.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UoO7VCPs8Jw", "annotations": [{"segment": [0.01, 12.492175], "label": "Shot put"}]}, "KbbEbeCJTJg": {"duration": 154.14, "subset": "validation", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=KbbEbeCJTJg", "annotations": [{"segment": [6.130581903276131, 9.255976599063962], "label": "Shot put"}, {"segment": [25.363780031201248, 31.374154446177847], "label": "Shot put"}, {"segment": [41.23116848673947, 43.87573322932917], "label": "Shot put"}, {"segment": [51.08818252730109, 54.93482215288612], "label": "Shot put"}, {"segment": [61.6664414976599, 65.0322511700468], "label": "Shot put"}, {"segment": [72.00428549141965, 80.89963962558502], "label": "Shot put"}, {"segment": [87.15042901716068, 97.00744305772231], "label": "Shot put"}]}, "1VmqefkNV_8": {"duration": 171.82999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1VmqefkNV_8", "annotations": [{"segment": [97.02334165366614, 124.20171040999999], "label": "Doing kickboxing"}]}, "Fk0z6MeCV0I": {"duration": 67.83, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fk0z6MeCV0I", "annotations": [{"segment": [7.031605529641186, 59.56730625], "label": "Hopscotch"}]}, "Xc70KHd4zhI": {"duration": 60.953, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Xc70KHd4zhI", "annotations": [{"segment": [0.01, 60.95238095238095], "label": "Having an ice cream"}]}, "Pc1uz-MzWRU": {"duration": 194.65, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Pc1uz-MzWRU", "annotations": []}, "hcsH2gglpuw": {"duration": 44.72, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hcsH2gglpuw", "annotations": []}, "Ti3z17QofU8": {"duration": 24.126, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ti3z17QofU8", "annotations": [{"segment": [10.312629475819032, 24.125531], "label": "Fixing the roof"}]}, "fwqDDFkOaCc": {"duration": 162.98, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fwqDDFkOaCc", "annotations": [{"segment": [8.390580978159127, 81.87172954446177], "label": "Playing violin"}, {"segment": [87.21119016692668, 98.65289150078003], "label": "Playing violin"}, {"segment": [102.21253191575663, 153.57305790327612], "label": "Playing violin"}]}, "r9vcB6tc1mM": {"duration": 183.089, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r9vcB6tc1mM", "annotations": [{"segment": [28.79616978193146, 163.93859034267913], "label": "Capoeira"}]}, "T35cHr4pjkc": {"duration": 6.64, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=T35cHr4pjkc", "annotations": []}, "M0ygYlmTHy4": {"duration": 202.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M0ygYlmTHy4", "annotations": [{"segment": [18.32724576466052, 196.54391147618693], "label": "Playing water polo"}]}, "vFVg-ImCW9w": {"duration": 199.901, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vFVg-ImCW9w", "annotations": [{"segment": [43.686021693291195, 182.77441942784003], "label": "Making a cake"}]}, "eI_LceS_qnQ": {"duration": 148.21, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eI_LceS_qnQ", "annotations": [{"segment": [0, 148.01297052154197], "label": "Zumba"}]}, "kYZSCs-ILNQ": {"duration": 112.8, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kYZSCs-ILNQ", "annotations": [{"segment": [35.72367976623827, 76.1987840577223], "label": "Tennis serve with ball bouncing"}]}, "li_mry59KCM": {"duration": 101.24, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=li_mry59KCM", "annotations": []}, "5-KmeeiTUgo": {"duration": 133.19, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5-KmeeiTUgo", "annotations": []}, "t_2jTjX3a_o": {"duration": 213.76, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=t_2jTjX3a_o", "annotations": [{"segment": [7.166778471138844, 191.50298751950075], "label": "Dodgeball"}]}, "BtKLAioC21g": {"duration": 207.261, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BtKLAioC21g", "annotations": [{"segment": [13.25693353992663, 136.77275335095035], "label": "Skiing"}, {"segment": [150.67636657575144, 190.44716719344774], "label": "Skiing"}]}, "SfiAcQAPpQ8": {"duration": 82.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SfiAcQAPpQ8", "annotations": [{"segment": [34.36607074871836, 82.241], "label": "Mixing drinks"}]}, "ZHOPn9lONHA": {"duration": 179.282, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=ZHOPn9lONHA", "annotations": [{"segment": [98.37334230640086, 102.28288932197718], "label": "Doing crunches"}]}, "V90CMuokf0c": {"duration": 151.139, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V90CMuokf0c", "annotations": []}, "sDdgFSFaAdY": {"duration": 167.57999999999998, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=sDdgFSFaAdY", "annotations": [{"segment": [54.37786395265334, 157.64351763650546], "label": "Polishing forniture"}]}, "1xtKGlUp4Vg": {"duration": 138.554, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1xtKGlUp4Vg", "annotations": []}, "VJ0LBlxsutw": {"duration": 87.633, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=VJ0LBlxsutw", "annotations": []}, "uXcCES4BsQ0": {"duration": 50.574, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uXcCES4BsQ0", "annotations": [{"segment": [19.329796723868956, 50.57306], "label": "Playing rubik cube"}]}, "yUCSKSMVrPo": {"duration": 162.354, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yUCSKSMVrPo", "annotations": [{"segment": [10.381187207488297, 73.68110920436817], "label": "Playing drums"}, {"segment": [79.50470202808111, 146.87391575663025], "label": "Playing drums"}]}, "UgsqMLsPG0k": {"duration": 157.966, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=UgsqMLsPG0k", "annotations": [{"segment": [17.940830218068538, 150.1622912772586], "label": "Plastering"}]}, "P5D9S9NzQ28": {"duration": 59.89, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=P5D9S9NzQ28", "annotations": []}, "pw9Ve02R5nc": {"duration": 38.731, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pw9Ve02R5nc", "annotations": []}, "cICxG-28hK0": {"duration": 25.06, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=cICxG-28hK0", "annotations": [{"segment": [3.009589205928237, 20.4808408299532], "label": "Dodgeball"}]}, "pMHaNlUQb9s": {"duration": 222.401, "subset": "testing", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=pMHaNlUQb9s", "annotations": []}, "U6Xnw2RGjsI": {"duration": 17.067, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=U6Xnw2RGjsI", "annotations": [{"segment": [0.01, 16.55438072581306], "label": "Kneeling"}]}, "sx4zAnVDV9Y": {"duration": 150.234, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sx4zAnVDV9Y", "annotations": [{"segment": [28.738211223123255, 33.177447653828246], "label": "Drum corps"}, {"segment": [45.5605808552685, 49.29888521796744], "label": "Drum corps"}, {"segment": [56.074561875359265, 62.616594510082415], "label": "Drum corps"}, {"segment": [69.15862714480556, 76.86887989287213], "label": "Drum corps"}, {"segment": [80.84082827823975, 83.1772685049266], "label": "Drum corps"}, {"segment": [90.88752125299317, 96.96226584237894], "label": "Drum corps"}, {"segment": [107.94353490780709, 118.69115870548171], "label": "Drum corps"}, {"segment": [128.9714955950618, 146.96208515182695], "label": "Drum corps"}]}, "4BIKUGTeGQw": {"duration": 34.436, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4BIKUGTeGQw", "annotations": []}, "tLifahMJYkA": {"duration": 216.92000000000002, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tLifahMJYkA", "annotations": []}, "tF4Tl56ntnE": {"duration": 89.072, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=tF4Tl56ntnE", "annotations": [{"segment": [9.310089524180967, 79.76106547581904], "label": "Putting in contact lenses"}]}, "4Gs7Xyn16QI": {"duration": 87.47, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4Gs7Xyn16QI", "annotations": [{"segment": [9.552059034742342, 13.509340634849885], "label": "Javelin throw"}, {"segment": [37.52594620791635, 42.71134911700467], "label": "Javelin throw"}, {"segment": [56.2206895725429, 60.17797112012481], "label": "Javelin throw"}, {"segment": [70.13940398127924, 74.09668552886114], "label": "Javelin throw"}]}, "Aj0Pd6snB-k": {"duration": 106.301, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=Aj0Pd6snB-k", "annotations": [{"segment": [26.036270707971177, 94.19491568234157], "label": "Baking cookies"}]}, "C6OmnTWhsEc": {"duration": 56.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C6OmnTWhsEc", "annotations": [{"segment": [5.94052599, 43.10041197], "label": "Playing flauta"}]}, "Y2nPNns1RFY": {"duration": 60.163, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y2nPNns1RFY", "annotations": []}, "QGY7zA03RkE": {"duration": 76.44, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=QGY7zA03RkE", "annotations": [{"segment": [10.2131484, 21.34586393447738], "label": "Ping-pong"}]}, "7oeFpnRCJkY": {"duration": 7.059, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7oeFpnRCJkY", "annotations": [{"segment": [0.01, 6.5332483032915825], "label": "Kneeling"}]}, "n1KeC6NXPUA": {"duration": 89.235, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=n1KeC6NXPUA", "annotations": [{"segment": [9.045343213728549, 43.69596567862714], "label": "Using the monkey bar"}]}, "PjxH86Lw23o": {"duration": 136.116, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PjxH86Lw23o", "annotations": []}, "AzUK4Nwd-sY": {"duration": 90.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AzUK4Nwd-sY", "annotations": [{"segment": [5.903157269147909, 44.97643633636502], "label": "Horseback riding"}]}, "xmK2kbPSGUs": {"duration": 88.515, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xmK2kbPSGUs", "annotations": [{"segment": [26.082884555382215, 77.69663494539782], "label": "Making a lemonade"}]}, "koEfnIoZB_4": {"duration": 95.47999999999999, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=koEfnIoZB_4", "annotations": [{"segment": [3.2770202454356676, 13.405991913145913], "label": "Bungee jumping"}, {"segment": [21.002720663928596, 32.17438059155019], "label": "Bungee jumping"}, {"segment": [36.04722269979234, 39.324242945228015], "label": "Bungee jumping"}, {"segment": [51.538591132760956, 52.879190039001564], "label": "Bungee jumping"}, {"segment": [52.13441271450858, 53.326056433697346], "label": "Bungee jumping"}, {"segment": [66.1362264149766, 75.07355430889235], "label": "Bungee jumping"}, {"segment": [83.11714941341653, 88.77745707956318], "label": "Bungee jumping"}]}, "fFmfx_332I8": {"duration": 61.719, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fFmfx_332I8", "annotations": []}, "x7M1yRFtnxY": {"duration": 184.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x7M1yRFtnxY", "annotations": [{"segment": [36.17261466458658, 159.04467082683306], "label": "Paintball"}]}, "-2veyKpAwXU": {"duration": 158.69, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-2veyKpAwXU", "annotations": []}, "pEr9p5-qUsU": {"duration": 119.862, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pEr9p5-qUsU", "annotations": [{"segment": [13.32143860244751, 92.42913953702696], "label": "Beer pong"}]}, "AJGg44cliq8": {"duration": 136.789, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AJGg44cliq8", "annotations": []}, "ij87GXkcdho": {"duration": 203.872, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ij87GXkcdho", "annotations": [{"segment": [7.315182378723721, 14.948416165218037], "label": "Elliptical trainer"}, {"segment": [27.3524210682713, 43.89109427234232], "label": "Elliptical trainer"}, {"segment": [60.429767476413346, 70.92546393284303], "label": "Elliptical trainer"}, {"segment": [74.74208082609019, 90.32659980684942], "label": "Elliptical trainer"}, {"segment": [100.18619344773792, 109.4096842730852], "label": "Elliptical trainer"}, {"segment": [121.8136891761385, 135.48989971027413], "label": "Elliptical trainer"}, {"segment": [181.92540524478122, 190.51279196411855], "label": "Elliptical trainer"}]}, "6VUsbs84lCc": {"duration": 17.857, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6VUsbs84lCc", "annotations": [{"segment": [0.2, 17.106352148579884], "label": "Knitting"}]}, "Dbas61NkTV0": {"duration": 153.46, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Dbas61NkTV0", "annotations": []}, "gd7SO0TQ-sY": {"duration": 120.275, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gd7SO0TQ-sY", "annotations": [{"segment": [0.2, 9.929739875389409], "label": "Playing beach volleyball"}, {"segment": [20.98360124610592, 62.95080373831775], "label": "Playing beach volleyball"}, {"segment": [104.16859190031153, 120.281], "label": "Playing beach volleyball"}]}, "dTkMZlj7jFU": {"duration": 193.817, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dTkMZlj7jFU", "annotations": [{"segment": [8.767003120124805, 171.71233697347893], "label": "Hanging wallpaper"}]}, "uM3RiCL0g2U": {"duration": 190.404, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uM3RiCL0g2U", "annotations": [{"segment": [106.61403900156006, 141.37497035881435], "label": "Disc dog"}, {"segment": [164.24445085803433, 178.7977566302652], "label": "Disc dog"}]}, "ez8ram5yd70": {"duration": 98.267, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ez8ram5yd70", "annotations": [{"segment": [0, 98.06687074829932], "label": "Rope skipping"}]}, "_xtOX9UCWmU": {"duration": 89.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_xtOX9UCWmU", "annotations": [{"segment": [9.63759, 85.41131825273011], "label": "Playing guitarra"}]}, "HRjSOacCdWI": {"duration": 19.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HRjSOacCdWI", "annotations": [{"segment": [0, 4.638299531981279], "label": "Clean and jerk"}, {"segment": [5.2185127499999995, 14.780714508580344], "label": "Clean and jerk"}]}, "x4c_wI6kQyE": {"duration": 70.36, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x4c_wI6kQyE", "annotations": [{"segment": [35.78191359126365, 41.489458090483616], "label": "Smoking a cigarette"}, {"segment": [43.57490704212168, 54.221672742589696], "label": "Smoking a cigarette"}]}, "mpC_UTM1tWQ": {"duration": 16.95, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mpC_UTM1tWQ", "annotations": [{"segment": [8.726500797191887, 14.491280277061422], "label": "Discus throw"}]}, "5qsXmDi8d74": {"duration": 186.596, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5qsXmDi8d74", "annotations": [{"segment": [61.402645865834636, 173.44250858034323], "label": "Sumo"}]}, "Yp9WuBrgbI4": {"duration": 64.854, "subset": "training", "resolution": "324x240", "url": "https://www.youtube.com/watch?v=Yp9WuBrgbI4", "annotations": [{"segment": [2.8543124461188865, 45.65982023428087], "label": "Doing crunches"}]}, "oLrkd6M2WRY": {"duration": 100.195, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oLrkd6M2WRY", "annotations": [{"segment": [77.68559936193448, 98.31839436349453], "label": "Shuffleboard"}]}, "aXXes0e-eM0": {"duration": 46.928, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aXXes0e-eM0", "annotations": [{"segment": [11.76843000543935, 22.65970929906542], "label": "Hitting a pinata"}]}, "B9sTAMQfQ1I": {"duration": 120.21, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=B9sTAMQfQ1I", "annotations": []}, "XxMzH75Bhr8": {"duration": 157.455, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XxMzH75Bhr8", "annotations": [{"segment": [0.01, 157.454534], "label": "Futsal"}]}, "8ATFQNEWHL0": {"duration": 59.42, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=8ATFQNEWHL0", "annotations": [{"segment": [10.753052765054603, 59.04909147706709], "label": "Preparing salad"}]}, "WgJy8seQ7z4": {"duration": 56.448, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WgJy8seQ7z4", "annotations": [{"segment": [0, 55.628817472698906], "label": "Arm wrestling"}]}, "Igpy_o0h3Cs": {"duration": 150.88, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Igpy_o0h3Cs", "annotations": [{"segment": [32.00795007800312, 136.03378783151325], "label": "Removing curlers"}]}, "BrvdzlErtco": {"duration": 39.66, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BrvdzlErtco", "annotations": []}, "5bKIKQaOOYY": {"duration": 59.653, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=5bKIKQaOOYY", "annotations": []}, "8olnQLX0hO8": {"duration": 117.702, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8olnQLX0hO8", "annotations": [{"segment": [4.40691856474259, 110.54020733229329], "label": "River tubing"}]}, "KzogfJrOqJE": {"duration": 222.68, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KzogfJrOqJE", "annotations": [{"segment": [93.44890671817349, 222.67936507936508], "label": "Hand washing clothes"}]}, "-Fk62y-1WHo": {"duration": 120.56, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-Fk62y-1WHo", "annotations": []}, "WROGzgOpPXc": {"duration": 209.77, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WROGzgOpPXc", "annotations": [{"segment": [23.229627145085804, 90.9554414976599], "label": "Rafting"}, {"segment": [118.11120280811234, 144.61260842433697], "label": "Rafting"}]}, "f5Ja983oTMw": {"duration": 36.02, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=f5Ja983oTMw", "annotations": [{"segment": [2.00025774, 13.209195021840873], "label": "Using the pommel horse"}, {"segment": [13.78878228, 34.43327158970359], "label": "Using the pommel horse"}]}, "IssfjAvSwo0": {"duration": 33.53, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IssfjAvSwo0", "annotations": []}, "au3iN1kGMWY": {"duration": 224.03, "subset": "testing", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=au3iN1kGMWY", "annotations": []}, "XHMrvd3sKzE": {"duration": 88.38, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=XHMrvd3sKzE", "annotations": [{"segment": [2.4116146645865837, 84.40651326053042], "label": "Rock climbing"}]}, "FKAlXR9NkKw": {"duration": 49.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FKAlXR9NkKw", "annotations": [{"segment": [18.12710608424337, 46.264703588143526], "label": "Cleaning windows"}]}, "Ub88_ql0B78": {"duration": 74.907, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ub88_ql0B78", "annotations": [{"segment": [0.01, 67.94105185847624], "label": "Baton twirling"}]}, "MVxXCu4zxSM": {"duration": 87.89, "subset": "validation", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=MVxXCu4zxSM", "annotations": [{"segment": [6.0328412592286, 14.396552784711389], "label": "Shaving"}, {"segment": [24.679804773790952, 30.30131586115445], "label": "Shaving"}, {"segment": [36.197047001560065, 39.62479766458659], "label": "Shaving"}, {"segment": [42.22988816848674, 44.14942853978159], "label": "Shaving"}, {"segment": [51.96470005148206, 55.666670767550706], "label": "Shaving"}, {"segment": [61.425291881435264, 77.7413850374415], "label": "Shaving"}]}, "0PmrImNqA2w": {"duration": 54.289, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=0PmrImNqA2w", "annotations": [{"segment": [1.0131543067589919, 4.221480357760004], "label": "Painting fence"}, {"segment": [19.503243916475352, 41.792667007640276], "label": "Painting fence"}, {"segment": [50.40448956559036, 52.008652591090865], "label": "Painting fence"}]}, "hKLDr9ZaDb8": {"duration": 148.515, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hKLDr9ZaDb8", "annotations": [{"segment": [6.716870514820593, 134.56721372854915], "label": "Hula hoop"}]}, "LHu41OIGw7Q": {"duration": 99.846, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LHu41OIGw7Q", "annotations": [{"segment": [0.01, 99.701], "label": "Canoeing"}]}, "ymmBQHiNK24": {"duration": 132.17, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ymmBQHiNK24", "annotations": [{"segment": [5.153705148205928, 129.04877691107643], "label": "Playing bagpipes"}]}, "2EfjOGiEeJs": {"duration": 88.19, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2EfjOGiEeJs", "annotations": [{"segment": [30.357849584278153, 68.27081916666667], "label": "Beach soccer"}]}, "I4mFeQwqjnE": {"duration": 51.92, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I4mFeQwqjnE", "annotations": [{"segment": [5.507874502340094, 50.704844683307336], "label": "Dodgeball"}]}, "GSrBZXLSwWI": {"duration": 237.169, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GSrBZXLSwWI", "annotations": []}, "pZZVzBD8bZY": {"duration": 136.46, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pZZVzBD8bZY", "annotations": [{"segment": [14.046468018720748, 116.20259906396255], "label": "Doing motocross"}]}, "5sYPXv0RkiM": {"duration": 50.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5sYPXv0RkiM", "annotations": [{"segment": [8.027138238689547, 47.924399583463334], "label": "Chopping wood"}]}, "A5-gsreb2zY": {"duration": 157.87, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=A5-gsreb2zY", "annotations": [{"segment": [4.493383177353119, 153.7586485324933], "label": "Getting a piercing"}]}, "IK9kE9IrcOM": {"duration": 116.147, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IK9kE9IrcOM", "annotations": [{"segment": [0.2, 12.857645865834634], "label": "Mooping floor"}, {"segment": [77.70254134165367, 115.881], "label": "Mooping floor"}]}, "hYRNSJwhVPw": {"duration": 234.549, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hYRNSJwhVPw", "annotations": [{"segment": [52.70696244539873, 110.43830739019519], "label": "Doing crunches"}]}, "28GYivx4lyk": {"duration": 11.053, "subset": "training", "resolution": "128x96", "url": "https://www.youtube.com/watch?v=28GYivx4lyk", "annotations": [{"segment": [0.19999999999999998, 10.852698412698414], "label": "Table soccer"}]}, "Ar3Bk2xWK4I": {"duration": 163.399, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ar3Bk2xWK4I", "annotations": []}, "SJHbdA5ZG4o": {"duration": 147.54, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SJHbdA5ZG4o", "annotations": []}, "16bJAOf1SJI": {"duration": 159.893, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=16bJAOf1SJI", "annotations": []}, "1gradpCDbPU": {"duration": 60.21, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1gradpCDbPU", "annotations": [{"segment": [6.011541067082684, 17.56497155538222], "label": "Washing hands"}, {"segment": [23.8583036099844, 29.775914347893917], "label": "Washing hands"}]}, "nlHN3bO_MOQ": {"duration": 126.247, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nlHN3bO_MOQ", "annotations": [{"segment": [10.433062402496098, 111.81093291731669], "label": "Drum corps"}]}, "2cYfV33Qw4Y": {"duration": 165.419, "subset": "training", "resolution": "1916x1080", "url": "https://www.youtube.com/watch?v=2cYfV33Qw4Y", "annotations": [{"segment": [0.2, 137.00048517940718], "label": "Canoeing"}]}, "q0KrlywYHM8": {"duration": 37.69, "subset": "validation", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=q0KrlywYHM8", "annotations": [{"segment": [14.381904102931513, 31.730511712051822], "label": "Playing water polo"}]}, "jvz9ra2l7Qc": {"duration": 29.885, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jvz9ra2l7Qc", "annotations": []}, "T3rh5gQVFKA": {"duration": 196.186, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T3rh5gQVFKA", "annotations": [{"segment": [35.80108736349454, 186.67427457098285], "label": "Longboarding"}]}, "zNjk-Xw_AVU": {"duration": 221.054, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zNjk-Xw_AVU", "annotations": []}, "Wrbf7c58IuU": {"duration": 104.4, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Wrbf7c58IuU", "annotations": [{"segment": [5.537433927289064, 18.07809311556136], "label": "Playing water polo"}, {"segment": [20.03248156048691, 94.78783948829954], "label": "Playing water polo"}]}, "nUghBtcrTPA": {"duration": 128.64, "subset": "training", "resolution": "854x428", "url": "https://www.youtube.com/watch?v=nUghBtcrTPA", "annotations": [{"segment": [9.432115664586583, 116.79768759126365], "label": "Tai chi"}]}, "phg81-nhqH4": {"duration": 27.957, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=phg81-nhqH4", "annotations": [{"segment": [0, 27.921], "label": "Kneeling"}]}, "WRc1Jv1j3nk": {"duration": 38.94, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WRc1Jv1j3nk", "annotations": [{"segment": [6.276188019999999, 22.83110387], "label": "Archery"}, {"segment": [26.85048153822153, 32.317774159126365], "label": "Archery"}]}, "6F9C3dIU4kU": {"duration": 64.389, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6F9C3dIU4kU", "annotations": [{"segment": [0.2, 62.83536037441497], "label": "Swinging at the playground"}]}, "mRe2xY_amWw": {"duration": 180.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mRe2xY_amWw", "annotations": [{"segment": [11.550460258970357, 18.029986745709827], "label": "Tumbling"}, {"segment": [26.19982448985959, 30.70732117628705], "label": "Tumbling"}, {"segment": [38.03200329173166, 43.666374149765986], "label": "Tumbling"}, {"segment": [51.55449335101404, 61.69636089547582], "label": "Tumbling"}, {"segment": [69.86619863962558, 74.93713241185647], "label": "Tumbling"}, {"segment": [84.79728141341653, 88.17790392823711], "label": "Tumbling"}, {"segment": [97.47461584399376, 104.51757941653666], "label": "Tumbling"}, {"segment": [112.68741716068642, 120.01209927613104], "label": "Tumbling"}, {"segment": [141.70442707956317, 151.56457608112325], "label": "Tumbling"}]}, "vJiOYQE9tts": {"duration": 136.32999999999998, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vJiOYQE9tts", "annotations": [{"segment": [12.329388455538222, 16.36832605304212], "label": "Grooming horse"}, {"segment": [46.55407020280811, 70.14996879875194], "label": "Grooming horse"}, {"segment": [88.23184867394696, 110.34296255850235], "label": "Grooming horse"}]}, "E7OCzz_XuiI": {"duration": 165.907, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E7OCzz_XuiI", "annotations": [{"segment": [27.694233429201113, 36.49427021978838], "label": "Rock-paper-scissors"}]}, "UALnEw4XhTY": {"duration": 237.4, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UALnEw4XhTY", "annotations": [{"segment": [26.483666926677063, 34.262086583463336], "label": "Rock climbing"}, {"segment": [49.44852496099844, 57.96774648985959], "label": "Rock climbing"}, {"segment": [70.5613783151326, 92.41503354134166], "label": "Rock climbing"}, {"segment": [105.00866536661466, 153.53118798751947], "label": "Rock climbing"}, {"segment": [163.90241419656786, 217.24014898595942], "label": "Rock climbing"}]}, "2rA5pyel_NE": {"duration": 115.055, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2rA5pyel_NE", "annotations": [{"segment": [0.01, 110.28780317003086], "label": "Having an ice cream"}]}, "oZgyW8Yca_M": {"duration": 97.013, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=oZgyW8Yca_M", "annotations": [{"segment": [26.704974052282378, 34.5505034023316], "label": "Building sandcastles"}, {"segment": [81.321930338568, 88.26220659362242], "label": "Building sandcastles"}]}, "keFBEoBy0zY": {"duration": 212.602, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=keFBEoBy0zY", "annotations": [{"segment": [64.33445665418633, 87.51533238973497], "label": "Snowboarding"}, {"segment": [92.48266290449537, 134.8705499637843], "label": "Snowboarding"}, {"segment": [155.07102739047667, 170.9664842080179], "label": "Snowboarding"}]}, "tV_1whw_S4g": {"duration": 40.357, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=tV_1whw_S4g", "annotations": [{"segment": [0.01, 13.15795243232941], "label": "Doing a powerbomb"}]}, "AdP2aMo6OgY": {"duration": 49.53, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AdP2aMo6OgY", "annotations": [{"segment": [4.404220431669267, 48.13735664789392], "label": "Plataform diving"}]}, "tzP0lKSUv_o": {"duration": 19.74, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tzP0lKSUv_o", "annotations": []}, "tlNpR7CkM-4": {"duration": 100.25999999999999, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=tlNpR7CkM-4", "annotations": [{"segment": [9.725386352301303, 97.9637447557281], "label": "Playing bagpipes"}]}, "-U4lNtzVQ8s": {"duration": 89.19, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-U4lNtzVQ8s", "annotations": [{"segment": [6.21365829, 39.40549221], "label": "Shaving legs"}, {"segment": [44.95292361, 65.74687266], "label": "Shaving legs"}]}, "NVWk38z7f4U": {"duration": 226.12, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NVWk38z7f4U", "annotations": [{"segment": [114.64535642650195, 121.70045528351748], "label": "Getting a tattoo"}, {"segment": [126.9917794262791, 128.75555414053298], "label": "Getting a tattoo"}, {"segment": [158.03421439714734, 162.26727371135664], "label": "Getting a tattoo"}]}, "l-R4wiSZcRc": {"duration": 169.228, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=l-R4wiSZcRc", "annotations": [{"segment": [12.672226856421196, 59.92907284182524], "label": "Swimming"}, {"segment": [64.68115791298318, 94.24968724463263], "label": "Swimming"}, {"segment": [101.641819577545, 111.14598971986089], "label": "Swimming"}]}, "UCFV1JYhq18": {"duration": 86.541, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UCFV1JYhq18", "annotations": [{"segment": [3.3752250770302923, 73.579905850234], "label": "Rope skipping"}]}, "X1lLinc_yvo": {"duration": 201.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X1lLinc_yvo", "annotations": [{"segment": [44.02008270807023, 165.38973931746386], "label": "Making a cake"}]}, "NbrC_xwOMQ4": {"duration": 48.65, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NbrC_xwOMQ4", "annotations": []}, "cB5ozEChN5Q": {"duration": 113.012, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cB5ozEChN5Q", "annotations": [{"segment": [21.50921271680799, 45.310390723111915], "label": "Snow tubing"}, {"segment": [49.18910121302811, 78.80834495420632], "label": "Snow tubing"}, {"segment": [88.85773122353464, 109.13280878446021], "label": "Snow tubing"}]}, "1AEZ9fDDkz4": {"duration": 94.622, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=1AEZ9fDDkz4", "annotations": [{"segment": [54.49777258566978, 94.581], "label": "BMX"}]}, "MRt7pz2OY5M": {"duration": 45.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MRt7pz2OY5M", "annotations": [{"segment": [2.9485834633385335, 40.24498127925117], "label": "Hand car wash"}]}, "CvmhLCrOjhM": {"duration": 31.278, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CvmhLCrOjhM", "annotations": [{"segment": [0.2, 31.077209], "label": "Playing rubik cube"}]}, "otrk9uW9UjU": {"duration": 101.1, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=otrk9uW9UjU", "annotations": [{"segment": [1.9729792764464953, 94.84270089386484], "label": "Doing crunches"}]}, "Lh-cI4M0dNw": {"duration": 51.061, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Lh-cI4M0dNw", "annotations": [{"segment": [4.460839455850234, 50.34375957316693], "label": "Calf roping"}]}, "t6d__c9sIUU": {"duration": 427.062, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=t6d__c9sIUU", "annotations": [{"segment": [18.226609223121475, 396.0628669676698], "label": "Removing ice from car"}]}, "9XMFp9AEiYw": {"duration": 100.85, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9XMFp9AEiYw", "annotations": []}, "ooCciCGrdcA": {"duration": 183.79, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ooCciCGrdcA", "annotations": [{"segment": [11.038626422776911, 163.28565578627146], "label": "Playing badminton"}]}, "z6U8CyJRNXw": {"duration": 166.324, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z6U8CyJRNXw", "annotations": [{"segment": [3.880030119528982, 5.690712683897394], "label": "Wakeboarding"}, {"segment": [9.312077812634218, 11.12276037700263], "label": "Wakeboarding"}, {"segment": [12.674774003604124, 16.296139132340947], "label": "Wakeboarding"}, {"segment": [20.952180012145437, 158.30538596637786], "label": "Wakeboarding"}]}, "ywl6JVt7-7I": {"duration": 34.552, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=ywl6JVt7-7I", "annotations": [{"segment": [0.01, 34.55102040816327], "label": "Cutting the grass"}]}, "PMMuX5drbQ4": {"duration": 114.5, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PMMuX5drbQ4", "annotations": []}, "xbWAmySRE88": {"duration": 145.311, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xbWAmySRE88", "annotations": [{"segment": [0.01, 145.31013605442178], "label": "Braiding hair"}]}, "NMUdgvut9E8": {"duration": 104.84, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NMUdgvut9E8", "annotations": []}, "hSq0yL5AB40": {"duration": 190.543, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hSq0yL5AB40", "annotations": [{"segment": [13.376650004775701, 174.78822672906918], "label": "BMX"}]}, "G1Lht9z3V7o": {"duration": 110.99000000000001, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=G1Lht9z3V7o", "annotations": []}, "fQB76oAKOQc": {"duration": 154.41, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fQB76oAKOQc", "annotations": [{"segment": [3.9735140405616227, 91.18955072], "label": "Getting a tattoo"}]}, "ekbZecn088U": {"duration": 220.45, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ekbZecn088U", "annotations": [{"segment": [26.475627145085806, 67.73634477379096], "label": "Smoking a cigarette"}, {"segment": [116.2176879875195, 136.17272386895476], "label": "Smoking a cigarette"}, {"segment": [144.0817457098284, 194.28684087363493], "label": "Smoking a cigarette"}]}, "BT9O3WA5Xys": {"duration": 48.37, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BT9O3WA5Xys", "annotations": []}, "eeoQE0dbA6U": {"duration": 201.016, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=eeoQE0dbA6U", "annotations": [{"segment": [0.01, 184.70814507644306], "label": "Baton twirling"}]}, "E5zIMqTj4nc": {"duration": 103.956, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=E5zIMqTj4nc", "annotations": [{"segment": [16.704275986005428, 61.140893657515015], "label": "Snowboarding"}, {"segment": [61.30307109427235, 92.27896137160687], "label": "Snowboarding"}]}, "HYAlS44yzdo": {"duration": 116.68, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HYAlS44yzdo", "annotations": [{"segment": [7.099111719570824, 18.202850563002112], "label": "Shot put"}, {"segment": [95.01887903588144, 109.94521635569423], "label": "Shot put"}]}, "vijDPbwU6vA": {"duration": 93.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vijDPbwU6vA", "annotations": [{"segment": [63.462894817472694, 67.0934952074883], "label": "Chopping wood"}, {"segment": [79.00186448673948, 82.48724086115445], "label": "Chopping wood"}]}, "6rMF2jWbeUQ": {"duration": 133.074, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6rMF2jWbeUQ", "annotations": [{"segment": [13.909404876875348, 106.0851625684075], "label": "Putting on shoes"}]}, "IB068eD8A7Y": {"duration": 225.79, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IB068eD8A7Y", "annotations": [{"segment": [0.01, 225.790838], "label": "Archery"}]}, "H-yBxySDyTw": {"duration": 106.67, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=H-yBxySDyTw", "annotations": [{"segment": [0, 0], "label": "Smoking a cigarette"}]}, "euyYRNOSPE0": {"duration": 180.7, "subset": "validation", "resolution": "534x360", "url": "https://www.youtube.com/watch?v=euyYRNOSPE0", "annotations": [{"segment": [6.201793470378272, 178.72441108580344], "label": "Belly dance"}]}, "hLyHeVmMDGU": {"duration": 232.07999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hLyHeVmMDGU", "annotations": [{"segment": [19.90648205928237, 186.39705928237132], "label": "Pole vault"}]}, "-YwrMtiqHKg": {"duration": 150.63, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-YwrMtiqHKg", "annotations": []}, "ripbruSSD8w": {"duration": 6.78, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=ripbruSSD8w", "annotations": [{"segment": [2.6867038221528863, 4.167564196567863], "label": "Chopping wood"}]}, "QinlLV8QKBU": {"duration": 219.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QinlLV8QKBU", "annotations": [{"segment": [5.146788075604657, 40.48806619475663], "label": "Preparing pasta"}, {"segment": [45.63485427036129, 99.16145025664972], "label": "Preparing pasta"}, {"segment": [130.04217871027765, 182.19629669110768], "label": "Preparing pasta"}]}, "TMnmi7WJr_o": {"duration": 154.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TMnmi7WJr_o", "annotations": [{"segment": [57.02270206006241, 78.96378403666147], "label": "Getting a tattoo"}]}, "QHn9KyE-zZo": {"duration": 196.279, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QHn9KyE-zZo", "annotations": [{"segment": [0.01, 7.961349719294894], "label": "Slacklining"}, {"segment": [13.779259129548853, 18.37234550606514], "label": "Slacklining"}, {"segment": [33.070221910917255, 35.21366221995819], "label": "Slacklining"}, {"segment": [43.4812176976875, 46.543275282031686], "label": "Slacklining"}, {"segment": [54.50462500132658, 64.30320927122798], "label": "Slacklining"}, {"segment": [68.89629564774427, 72.26455899052287], "label": "Slacklining"}, {"segment": [77.470056883908, 79.61349719294894], "label": "Slacklining"}, {"segment": [90.02449297971918, 94.31137359780105], "label": "Slacklining"}, {"segment": [100.12928292823713, 107.478221124805], "label": "Slacklining"}, {"segment": [115.43957083775352, 123.70712630889236], "label": "Slacklining"}, {"segment": [128.3002126817473, 130.44365298907957], "label": "Slacklining"}, {"segment": [134.1181220873635, 148.20358696411859], "label": "Slacklining"}, {"segment": [151.57185030421218, 161.676640324493], "label": "Slacklining"}, {"segment": [164.73869790639625, 166.57593245553824], "label": "Slacklining"}, {"segment": [169.6379900374415, 172.08763610296413], "label": "Slacklining"}, {"segment": [178.51795702496102, 184.9482779469579], "label": "Slacklining"}, {"segment": [191.07239311076444, 196.277891], "label": "Slacklining"}]}, "VWmSZPIoBlw": {"duration": 18.646, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VWmSZPIoBlw", "annotations": [{"segment": [0.01, 18.601], "label": "Removing ice from car"}]}, "WFqm48C2BPk": {"duration": 133.93, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=WFqm48C2BPk", "annotations": [{"segment": [10.029281628407993, 38.445579575563976], "label": "Sailing"}, {"segment": [57.877312730604466, 96.32289230616844], "label": "Sailing"}, {"segment": [112.82941712948517, 116.79934106552261], "label": "Sailing"}]}, "krqxyYFHzBI": {"duration": 63.693, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=krqxyYFHzBI", "annotations": [{"segment": [16.599457160652612, 62.114097347568496], "label": "Doing a powerbomb"}]}, "9AOVI0OCZqg": {"duration": 193.121, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9AOVI0OCZqg", "annotations": [{"segment": [18.98060842433697, 125.33227150038383], "label": "Braiding hair"}]}, "u_RzyIJi8qc": {"duration": 163.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u_RzyIJi8qc", "annotations": [{"segment": [34.21771737184865, 93.23132262114663], "label": "Snatch"}, {"segment": [120.5709003800312, 144.2272162773791], "label": "Snatch"}]}, "m34BYRanODU": {"duration": 114.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m34BYRanODU", "annotations": [{"segment": [26.50410140405616, 108.15096411856474], "label": "Doing step aerobics"}]}, "9PxPcJS47js": {"duration": 225.188, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9PxPcJS47js", "annotations": [{"segment": [19.316154446177848, 209.33546957878315], "label": "Slacklining"}]}, "tM0BAYylvLA": {"duration": 52.52, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=tM0BAYylvLA", "annotations": [{"segment": [0, 52.501], "label": "Getting a haircut"}]}, "ddzfgHw_qlU": {"duration": 59.82, "subset": "testing", "resolution": "1016x720", "url": "https://www.youtube.com/watch?v=ddzfgHw_qlU", "annotations": []}, "HK9tOFFF5II": {"duration": 141.27, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HK9tOFFF5II", "annotations": [{"segment": [18.4555128, 94.91083680000001], "label": "Getting a tattoo"}]}, "syy0FVZ7SkQ": {"duration": 192.308, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=syy0FVZ7SkQ", "annotations": [{"segment": [0.01, 181.20722067394695], "label": "Mooping floor"}]}, "a0--cEyaRNM": {"duration": 50.16, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a0--cEyaRNM", "annotations": []}, "m9eV0XgWzmQ": {"duration": 220.52, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m9eV0XgWzmQ", "annotations": []}, "YvIfwsbuiKI": {"duration": 71.332, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=YvIfwsbuiKI", "annotations": [{"segment": [0, 71.301], "label": "Kneeling"}]}, "RI8IORq_BbY": {"duration": 114.288, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RI8IORq_BbY", "annotations": [{"segment": [27.713690327613104, 37.515960218408736], "label": "Sharpening knives"}]}, "s5Ng5ZZ9f9k": {"duration": 41.936, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=s5Ng5ZZ9f9k", "annotations": []}, "zi_TdMQffkU": {"duration": 43.097, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zi_TdMQffkU", "annotations": [{"segment": [9.270542901716068, 17.26470670826833], "label": "Calf roping"}]}, "RZpbFG3j-Bo": {"duration": 160.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RZpbFG3j-Bo", "annotations": [{"segment": [0.01, 152.07123462714506], "label": "Putting in contact lenses"}]}, "xv8OYJ7t7-E": {"duration": 94.622, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xv8OYJ7t7-E", "annotations": [{"segment": [79.56433952886115, 80.892872099844], "label": "Having an ice cream"}]}, "zIvC3Yytcv0": {"duration": 188.129, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zIvC3Yytcv0", "annotations": [{"segment": [72.18560543144677, 186.44041851555892], "label": "Drum corps"}]}, "BIy2GQjWUsA": {"duration": 53.546, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BIy2GQjWUsA", "annotations": [{"segment": [30.072055731669266, 53.5449659], "label": "Grooming dog"}]}, "sf2zGT5nN04": {"duration": 225.93, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sf2zGT5nN04", "annotations": [{"segment": [63.09126069110765, 219.23331927301092], "label": "Preparing salad"}]}, "xbQQhK7wQZQ": {"duration": 217.804, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xbQQhK7wQZQ", "annotations": [{"segment": [5.435531981279251, 25.47905616224649], "label": "Making a cake"}, {"segment": [35.33095787831513, 187.1861326053042], "label": "Making a cake"}]}, "GQVqSLs2Ulg": {"duration": 85.427, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GQVqSLs2Ulg", "annotations": [{"segment": [6.7868190327613105, 80.66228705148205], "label": "Painting fence"}]}, "yVp99wxlW90": {"duration": 60.512, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yVp99wxlW90", "annotations": [{"segment": [0.01, 2.83203856474259], "label": "Croquet"}, {"segment": [6.513688698907956, 7.835306695787832], "label": "Croquet"}, {"segment": [8.968122121684868, 11.611358115444618], "label": "Croquet"}, {"segment": [12.744173541341652, 16.3314223900156], "label": "Croquet"}, {"segment": [19.918671238689548, 25.582748368174727], "label": "Croquet"}, {"segment": [0.2, 60.311223999999996], "label": "Croquet"}, {"segment": [28.131583076443057, 30.208411357254292], "label": "Croquet"}, {"segment": [37.85491548205928, 40.403750190327614], "label": "Croquet"}]}, "D-wP7_1A_Kw": {"duration": 74.537, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D-wP7_1A_Kw", "annotations": [{"segment": [7.128500104567875, 66.57136594880464], "label": "Playing beach volleyball"}]}, "Z8yKTJeRMOE": {"duration": 65.829, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z8yKTJeRMOE", "annotations": [{"segment": [11.896236002032882, 55.20181225864252], "label": "Painting furniture"}]}, "5Qm6ZTr9DJY": {"duration": 33.72, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5Qm6ZTr9DJY", "annotations": []}, "LyJxXonLzT4": {"duration": 70.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LyJxXonLzT4", "annotations": [{"segment": [9.665035881435257, 64.9095023400936], "label": "Mixing drinks"}]}, "3zMCxmdkcRY": {"duration": 202.48, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3zMCxmdkcRY", "annotations": [{"segment": [20.05351794071763, 193.74540561622464], "label": "Ballet"}]}, "7hbcifblzUY": {"duration": 234.383, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7hbcifblzUY", "annotations": []}, "GYh4XH7jUL4": {"duration": 7.99, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=GYh4XH7jUL4", "annotations": [{"segment": [1.7587059282371296, 6.6805967238689545], "label": "Springboard diving"}]}, "wHvOS2brBsI": {"duration": 195.164, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wHvOS2brBsI", "annotations": []}, "Q2jdtN4-RE0": {"duration": 231.23, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Q2jdtN4-RE0", "annotations": [{"segment": [6.8537626099844, 219.3204035195008], "label": "Tai chi"}]}, "EJr0qHpy77g": {"duration": 51.967, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=EJr0qHpy77g", "annotations": [{"segment": [17.59299920654297, 47.95748518785138], "label": "Hitting a pinata"}]}, "p1yEPOr467M": {"duration": 72.098, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=p1yEPOr467M", "annotations": [{"segment": [0.009999999999999998, 64.44950157098285], "label": "Waterskiing"}]}, "rSxO9uspxT8": {"duration": 70.427, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rSxO9uspxT8", "annotations": [{"segment": [17.579057223088924, 53.835862745709825], "label": "Trimming branches or hedges"}]}, "_i6yjCO8nzQ": {"duration": 187.87, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_i6yjCO8nzQ", "annotations": [{"segment": [23.74053806233882, 184.06244220904836], "label": "Tumbling"}]}, "wCGpPDYfs-s": {"duration": 197.811, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wCGpPDYfs-s", "annotations": [{"segment": [121.61269786309137, 196.9102203707944], "label": "Playing drums"}]}, "xXj-oQm-NbE": {"duration": 221.82, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xXj-oQm-NbE", "annotations": [{"segment": [84.76515847893916, 132.8564116567863], "label": "Wrapping presents"}]}, "zJNYqTVWqyY": {"duration": 203.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zJNYqTVWqyY", "annotations": [{"segment": [11.442040179566366, 178.94079407644304], "label": "Bathing dog"}]}, "8K4cX9GfaII": {"duration": 68.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8K4cX9GfaII", "annotations": [{"segment": [1.5969032761310453, 64.19551170046802], "label": "Hanging wallpaper"}]}, "8J_erRJya-k": {"duration": 118.747, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8J_erRJya-k", "annotations": [{"segment": [5.372322299694709, 115.96805962870515], "label": "Longboarding"}]}, "bFi3hnk5yAM": {"duration": 146.03, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bFi3hnk5yAM", "annotations": []}, "58DHmEWMpug": {"duration": 32.067, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=58DHmEWMpug", "annotations": []}, "G3xayqdY0TY": {"duration": 179.003, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=G3xayqdY0TY", "annotations": [{"segment": [0.01, 170.33584995781968], "label": "Wakeboarding"}]}, "xuF9WjXsGKo": {"duration": 18.16, "subset": "testing", "resolution": "786x480", "url": "https://www.youtube.com/watch?v=xuF9WjXsGKo", "annotations": []}, "OHJzD35IfgI": {"duration": 128.848, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OHJzD35IfgI", "annotations": []}, "rxfkWIGZtlQ": {"duration": 121.743, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=rxfkWIGZtlQ", "annotations": [{"segment": [0.01, 121.701], "label": "Blow-drying hair"}]}, "vvoqG7UQsdc": {"duration": 49.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vvoqG7UQsdc", "annotations": [{"segment": [7.9910314, 42.4021714], "label": "Pole vault"}]}, "oFtwNVuasmU": {"duration": 178.237, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oFtwNVuasmU", "annotations": [{"segment": [16.213404539165147, 18.712045584368276], "label": "Waterskiing"}, {"segment": [30.92762402758357, 59.24555587321902], "label": "Waterskiing"}, {"segment": [64.24283796362528, 69.79537361963223], "label": "Waterskiing"}, {"segment": [104.22109468687532, 107.5526160804795], "label": "Waterskiing"}, {"segment": [117.547180261292, 119.49056669652205], "label": "Waterskiing"}, {"segment": [130.87326469184913, 137.81393420119494], "label": "Waterskiing"}, {"segment": [143.08884302829773, 149.19663219652205], "label": "Waterskiing"}, {"segment": [163.63322477596128, 164.18847833670895], "label": "Waterskiing"}]}, "OzXD3WO6jrs": {"duration": 65.97, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OzXD3WO6jrs", "annotations": [{"segment": [10.242696089999999, 59.28686427457099], "label": "Using parallel bars"}]}, "OiL6Aj0gC14": {"duration": 152.46, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=OiL6Aj0gC14", "annotations": [{"segment": [1.9028049285236717, 20.930854213760387], "label": "Ping-pong"}, {"segment": [41.623857811455316, 57.559849087841066], "label": "Ping-pong"}, {"segment": [61.60330956095387, 70.16593173931038], "label": "Ping-pong"}, {"segment": [100.37295997962367, 105.12997230093285], "label": "Ping-pong"}]}, "70bS0DkAeDo": {"duration": 32.88, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=70bS0DkAeDo", "annotations": [{"segment": [3.30335313, 23.3966964], "label": "Clean and jerk"}]}, "z08lVo_kgDI": {"duration": 65.667, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z08lVo_kgDI", "annotations": []}, "cms1BP4eKOA": {"duration": 55.75, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=cms1BP4eKOA", "annotations": []}, "XiG7rgPoKKI": {"duration": 184.506, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=XiG7rgPoKKI", "annotations": [{"segment": [6.401007153607809, 175.38758919026722], "label": "Canoeing"}]}, "sObM6mb3hgs": {"duration": 233.291, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=sObM6mb3hgs", "annotations": [{"segment": [27.9368499774521, 225.30892009943557], "label": "Rollerblading"}]}, "wdU4BKY6Ajg": {"duration": 85.73, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wdU4BKY6Ajg", "annotations": []}, "XORmEz1vOeQ": {"duration": 71.797, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XORmEz1vOeQ", "annotations": [{"segment": [7.052953198127924, 71.741], "label": "Spread mulch"}]}, "tkHJgKq3jTs": {"duration": 107.021, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=tkHJgKq3jTs", "annotations": [{"segment": [0.01, 106.38110679632406], "label": "Decorating the Christmas tree"}]}, "W5g1IIAOZqs": {"duration": 33.0, "subset": "training", "resolution": "312x224", "url": "https://www.youtube.com/watch?v=W5g1IIAOZqs", "annotations": [{"segment": [11.50634635, 18.736777341653667], "label": "Ping-pong"}]}, "U0mKdo2-Gfc": {"duration": 104.769, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=U0mKdo2-Gfc", "annotations": [{"segment": [22.704273010920435, 32.01557566302652], "label": "Running a marathon"}]}, "Pt0CjFgzpxM": {"duration": 173.477, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Pt0CjFgzpxM", "annotations": [{"segment": [9.982301332177505, 151.08353781225424], "label": "Laying tile"}]}, "CMYeHWoB1FM": {"duration": 13.793, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=CMYeHWoB1FM", "annotations": [{"segment": [0, 13.792652], "label": "Peeling potatoes"}]}, "AtUAYnNTniM": {"duration": 72.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AtUAYnNTniM", "annotations": [{"segment": [2.533184750780031, 70.19736409383775], "label": "Cheerleading"}]}, "Odofg9Xz5ig": {"duration": 230.226, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Odofg9Xz5ig", "annotations": []}, "lbtW7nHTnwA": {"duration": 168.37, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lbtW7nHTnwA", "annotations": [{"segment": [0.3337780099885055, 168.367073], "label": "Ping-pong"}]}, "eCunwZEbEMs": {"duration": 168.32, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eCunwZEbEMs", "annotations": []}, "CBW_uJJpmZY": {"duration": 115.938, "subset": "validation", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=CBW_uJJpmZY", "annotations": [{"segment": [38.5251649739459, 109.24506728237128], "label": "Rope skipping"}]}, "padyJHC5Y5Y": {"duration": 148.84, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=padyJHC5Y5Y", "annotations": [{"segment": [31.081519500780033, 59.84352262090484], "label": "Blow-drying hair"}, {"segment": [69.35353978159127, 95.3321232449298], "label": "Blow-drying hair"}]}, "wBiRGLLjFVU": {"duration": 227.672, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wBiRGLLjFVU", "annotations": [{"segment": [4.262174111454254, 215.2397918845554], "label": "Slacklining"}]}, "bPw2rvCjXUE": {"duration": 175.892, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=bPw2rvCjXUE", "annotations": [{"segment": [0.01, 31.616572516181623], "label": "Arm wrestling"}, {"segment": [74.35647933861154, 134.63070690870498], "label": "Arm wrestling"}, {"segment": [157.91847665169564, 169.6993484040321], "label": "Arm wrestling"}]}, "XgbTh1BCciA": {"duration": 64.95, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=XgbTh1BCciA", "annotations": [{"segment": [7.668174219999999, 64.60039488377535], "label": "Playing bagpipes"}]}, "r6r4Q9jLFvs": {"duration": 54.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r6r4Q9jLFvs", "annotations": [{"segment": [8.065072431468687, 13.271098022152179], "label": "Playing squash"}]}, "yVE4t-X5b-M": {"duration": 114.64, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yVE4t-X5b-M", "annotations": [{"segment": [1.6095622043681748, 109.45022989703588], "label": "Tumbling"}]}, "YDNgm6ufrJc": {"duration": 61.16, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YDNgm6ufrJc", "annotations": [{"segment": [0.01, 57.440153070202804], "label": "Washing dishes"}]}, "fXgszraP2tc": {"duration": 217.06, "subset": "training", "resolution": "2560x1080", "url": "https://www.youtube.com/watch?v=fXgszraP2tc", "annotations": [{"segment": [25.058424321372858, 37.58763648205929], "label": "Walking the dog"}, {"segment": [198.4356304368175, 210.62621524180966], "label": "Walking the dog"}]}, "Mm6iYEyY9dI": {"duration": 233.825, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mm6iYEyY9dI", "annotations": [{"segment": [5.561104755520449, 10.294663945551601], "label": "Hula hoop"}]}, "JapT-ASJlX0": {"duration": 211.32999999999998, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JapT-ASJlX0", "annotations": []}, "PcGzPNEnVD0": {"duration": 202.339, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PcGzPNEnVD0", "annotations": [{"segment": [60.796007980266474, 166.21351896999082], "label": "Changing car wheel"}]}, "QMHF20eV9N0": {"duration": 34.81, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QMHF20eV9N0", "annotations": [{"segment": [2.9843291731669264, 10.092458658346333], "label": "Javelin throw"}, {"segment": [17.146327613104525, 21.16160686427457], "label": "Javelin throw"}]}, "rcrhyi2EzZc": {"duration": 90.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rcrhyi2EzZc", "annotations": [{"segment": [0, 89.20413131825272], "label": "Washing dishes"}]}, "mpj1rR1wXKU": {"duration": 195.814, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mpj1rR1wXKU", "annotations": [{"segment": [185.78073088268624, 195.55700000000002], "label": "Playing ten pins"}]}, "_vbwjI1QA7g": {"duration": 108.74, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_vbwjI1QA7g", "annotations": [{"segment": [70.26622274143303, 76.53092211838005], "label": "Throwing darts"}]}, "fsoAdigH0X4": {"duration": 16.86, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fsoAdigH0X4", "annotations": [{"segment": [0.14464473166926678, 16.791938394695787], "label": "Mowing the lawn"}]}, "CNH37tJNzFE": {"duration": 129.986, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CNH37tJNzFE", "annotations": [{"segment": [49.682128541341655, 129.37631860648574], "label": "Changing car wheel"}]}, "618D4x4bwfc": {"duration": 65.71, "subset": "testing", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=618D4x4bwfc", "annotations": []}, "kzBTJEMgoj0": {"duration": 176.983, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=kzBTJEMgoj0", "annotations": [{"segment": [0.2, 10.491941941623244], "label": "Mooping floor"}, {"segment": [14.357394235905492, 29.819203413034483], "label": "Mooping floor"}, {"segment": [42.519975237104724, 55.772954531786716], "label": "Mooping floor"}]}, "jkn6uvmqwh4": {"duration": 73.236, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jkn6uvmqwh4", "annotations": [{"segment": [34.15934579439252, 64.78889252336448], "label": "Drum corps"}]}, "pGNZ3GqYrVY": {"duration": 152.672, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=pGNZ3GqYrVY", "annotations": []}, "7ih5UMIU7zE": {"duration": 166.42000000000002, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7ih5UMIU7zE", "annotations": [{"segment": [22.58707504218536, 166.41741496598638], "label": "Cheerleading"}]}, "IDr50VT8BK8": {"duration": 238.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IDr50VT8BK8", "annotations": [{"segment": [117.6811809672387, 194.5266209048362], "label": "Brushing hair"}]}, "GIXToy60QnQ": {"duration": 98.96000000000001, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GIXToy60QnQ", "annotations": []}, "iGtvLylVVQQ": {"duration": 94.25, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iGtvLylVVQQ", "annotations": []}, "3YdmY4oPE5s": {"duration": 61.533, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=3YdmY4oPE5s", "annotations": [{"segment": [0.01, 61.15689758181632], "label": "Croquet"}]}, "nXNczyQpljQ": {"duration": 204.34, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nXNczyQpljQ", "annotations": [{"segment": [20.879846894556053, 25.66149122155362], "label": "Playing saxophone"}, {"segment": [47.65705512574245, 52.11992316427351], "label": "Playing saxophone"}, {"segment": [76.98447366466088, 122.88825920383753], "label": "Playing saxophone"}, {"segment": [149.98424372349044, 192.38149008953556], "label": "Playing saxophone"}]}, "Nh-RdjyfGNA": {"duration": 31.185, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=Nh-RdjyfGNA", "annotations": [{"segment": [7.686638131045242, 10.313716985959438], "label": "Beach soccer"}, {"segment": [15.713823521060842, 21.162579664586584], "label": "Beach soccer"}]}, "c3yTMTETJEY": {"duration": 36.13, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=c3yTMTETJEY", "annotations": []}, "_G4EVIG9YaE": {"duration": 55.937, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_G4EVIG9YaE", "annotations": []}, "_LKFGLH8XxI": {"duration": 39.196, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_LKFGLH8XxI", "annotations": [{"segment": [0.2, 4.1437566302652105], "label": "Blow-drying hair"}, {"segment": [11.273455538221528, 21.632847113884555], "label": "Blow-drying hair"}, {"segment": [25.654728549141964, 32.11411388455538], "label": "Blow-drying hair"}]}, "cEVHZc_uT7c": {"duration": 138.0, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=cEVHZc_uT7c", "annotations": [{"segment": [37.02861897333035, 74.48780328355991], "label": "Hand washing clothes"}]}, "F035AUh0mSA": {"duration": 36.967, "subset": "testing", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=F035AUh0mSA", "annotations": []}, "_zHSfEhEqkU": {"duration": 194.352, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_zHSfEhEqkU", "annotations": [{"segment": [5.457587279251171, 66.40064523088924], "label": "Canoeing"}, {"segment": [120.97651802340093, 187.07396396099847], "label": "Canoeing"}]}, "MZ4yIxM6snw": {"duration": 115.706, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MZ4yIxM6snw", "annotations": [{"segment": [1.6245636600974243, 113.89996228237129], "label": "Plastering"}]}, "65VsS25XcMY": {"duration": 17.624, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=65VsS25XcMY", "annotations": []}, "L2oaUDyzWPA": {"duration": 92.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L2oaUDyzWPA", "annotations": [{"segment": [11.630765990639626, 22.830762870514818], "label": "Playing harmonica"}, {"segment": [48.964088923556936, 61.74357254290171], "label": "Playing harmonica"}, {"segment": [68.49228861154447, 88.30766770670827], "label": "Playing harmonica"}]}, "bQVMoeBK7XU": {"duration": 100.055, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bQVMoeBK7XU", "annotations": [{"segment": [13.55153738317757, 54.82920872274143], "label": "Applying sunscreen"}]}, "VI2qAFwvPSc": {"duration": 101.12, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VI2qAFwvPSc", "annotations": [{"segment": [0.01, 82.59171918876754], "label": "Cumbia"}]}, "LDIemY9nO-4": {"duration": 132.702, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LDIemY9nO-4", "annotations": [{"segment": [17.596994492024578, 30.846496227196027], "label": "Rafting"}, {"segment": [47.61539686077239, 55.27526505141839], "label": "Rafting"}, {"segment": [62.31406284822822, 68.11071985736572], "label": "Rafting"}, {"segment": [85.2936674201662, 88.60604285395907], "label": "Rafting"}, {"segment": [90.0552065522621, 112.20671712948518], "label": "Rafting"}, {"segment": [114.48397522620907, 115.72611600624025], "label": "Rafting"}, {"segment": [119.45253834633387, 121.72979644305772], "label": "Rafting"}]}, "PzAjZbCEbLU": {"duration": 207.192, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PzAjZbCEbLU", "annotations": [{"segment": [98.58573240249609, 192.64621807176286], "label": "Fixing the roof"}]}, "UmuWttAYfco": {"duration": 157.316, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=UmuWttAYfco", "annotations": [{"segment": [0.2, 157.08100000000002], "label": "Arm wrestling"}]}, "THPFtlx3gXw": {"duration": 215.44, "subset": "training", "resolution": "2048x1024", "url": "https://www.youtube.com/watch?v=THPFtlx3gXw", "annotations": [{"segment": [9.910419886115445, 190.64960289391576], "label": "Doing motocross"}]}, "ou45_7IdWCE": {"duration": 61.998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ou45_7IdWCE", "annotations": [{"segment": [0.01, 50.65142589703588], "label": "Swinging at the playground"}]}, "0dWysfs67aY": {"duration": 139.74, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0dWysfs67aY", "annotations": []}, "BzhREL6ncq4": {"duration": 34.67, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BzhREL6ncq4", "annotations": [{"segment": [0.5941029641185647, 33.431794071762866], "label": "Using uneven bars"}]}, "7eo-KtL5ITg": {"duration": 146.4, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7eo-KtL5ITg", "annotations": []}, "nB90Q8sTBgE": {"duration": 210.44, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nB90Q8sTBgE", "annotations": [{"segment": [105.02639625585024, 188.40900780031203], "label": "Painting"}]}, "yRswMXWCcrI": {"duration": 131.031, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yRswMXWCcrI", "annotations": [{"segment": [3.492091768402909, 127.77652653907373], "label": "Skiing"}]}, "mGsenLq9yEU": {"duration": 235.357, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mGsenLq9yEU", "annotations": [{"segment": [14.07407906392653, 16.639666291340863], "label": "Bullfighting"}, {"segment": [115.23151831626298, 136.1227285966368], "label": "Bullfighting"}]}, "snG89ed-64M": {"duration": 74.17, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=snG89ed-64M", "annotations": [{"segment": [1.7355195432307087, 73.00752211857181], "label": "Playing lacrosse"}]}, "VLmZ3M3wSbo": {"duration": 103.701, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VLmZ3M3wSbo", "annotations": [{"segment": [0.2, 5.805925110777852], "label": "Putting in contact lenses"}, {"segment": [48.5440030687625, 97.73311028562793], "label": "Putting in contact lenses"}]}, "yEn-5y95DI0": {"duration": 56.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yEn-5y95DI0", "annotations": [{"segment": [14.82409984399376, 38.309685300000005], "label": "Volleyball"}, {"segment": [39.98626053042122, 50.137297971918876], "label": "Volleyball"}]}, "P6UwokP8BEg": {"duration": 90.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P6UwokP8BEg", "annotations": [{"segment": [22.663756756630264, 28.294503776911075], "label": "Hammer throw"}, {"segment": [42.79367735413417, 53.91440271918877], "label": "Hammer throw"}, {"segment": [63.627441329173166, 75.17047272074883], "label": "Hammer throw"}]}, "V9LudLaWGOM": {"duration": 63.65, "subset": "validation", "resolution": "642x480", "url": "https://www.youtube.com/watch?v=V9LudLaWGOM", "annotations": [{"segment": [2.083564731669267, 62.60615931825273], "label": "Javelin throw"}]}, "rZu5ZJmAlbI": {"duration": 230.459, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rZu5ZJmAlbI", "annotations": [{"segment": [32.25694695435954, 197.4842241625038], "label": "Painting fence"}]}, "FF7vKwGNjo4": {"duration": 108.93, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FF7vKwGNjo4", "annotations": []}, "xJ23geP1Hss": {"duration": 191.263, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=xJ23geP1Hss", "annotations": [{"segment": [7.827094443550394, 166.31898064630687], "label": "Baton twirling"}]}, "bY0bGFJ-k1U": {"duration": 143.152, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bY0bGFJ-k1U", "annotations": [{"segment": [0.01, 136.22776466458657], "label": "Welding"}]}, "RHb_nF11Scc": {"duration": 30.54, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=RHb_nF11Scc", "annotations": [{"segment": [1.4766948658346335, 29.581532634945397], "label": "Doing nails"}]}, "x2xC5lm0cZw": {"duration": 158.55, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=x2xC5lm0cZw", "annotations": [{"segment": [0.01, 156.5671176552262], "label": "Mowing the lawn"}]}, "FTCCoZdr5fo": {"duration": 216.745, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FTCCoZdr5fo", "annotations": [{"segment": [0.01, 205.5827269890796], "label": "Rollerblading"}]}, "SkytxdoXLs4": {"duration": 166.836, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SkytxdoXLs4", "annotations": [{"segment": [0.2, 166.63537414965987], "label": "Futsal"}]}, "7AkyOhKkT6g": {"duration": 190.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7AkyOhKkT6g", "annotations": [{"segment": [145.3550868174727, 157.81409425897036], "label": "Removing curlers"}]}, "OlR9S70AA74": {"duration": 76.07, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=OlR9S70AA74", "annotations": []}, "OysON6LbF6Y": {"duration": 120.44, "subset": "testing", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=OysON6LbF6Y", "annotations": []}, "wAgOScz_TjA": {"duration": 75.47, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wAgOScz_TjA", "annotations": []}, "VVLeWYKoNUE": {"duration": 17.996, "subset": "training", "resolution": "624x360", "url": "https://www.youtube.com/watch?v=VVLeWYKoNUE", "annotations": [{"segment": [0, 10.866995473432661], "label": "Waterskiing"}]}, "-rKS00dzFxQ": {"duration": 231.434, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-rKS00dzFxQ", "annotations": [{"segment": [42.95528705148206, 48.00885023400936], "label": "Peeling potatoes"}]}, "o_AftOrP4zI": {"duration": 158.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o_AftOrP4zI", "annotations": [{"segment": [79.88604426898164, 83.83713139258971], "label": "High jump"}, {"segment": [116.92748664032761, 124.82966102784712], "label": "High jump"}]}, "TlnvPrGG2BM": {"duration": 78.81, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=TlnvPrGG2BM", "annotations": []}, "7WeokrKMOM8": {"duration": 133.701, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7WeokrKMOM8", "annotations": []}, "rprQvEVVpIc": {"duration": 17.83, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=rprQvEVVpIc", "annotations": [{"segment": [1.7607316799999997, 17.04973868954758], "label": "Springboard diving"}]}, "OhXBMlKOHMI": {"duration": 42.284, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OhXBMlKOHMI", "annotations": [{"segment": [0, 42.283537], "label": "Shuffleboard"}]}, "MJZjSHG6tBY": {"duration": 224.12, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=MJZjSHG6tBY", "annotations": [{"segment": [49.648827335406345, 198.24670264], "label": "Drinking beer"}]}, "AJ_QqCET1iY": {"duration": 63.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AJ_QqCET1iY", "annotations": [{"segment": [12.698953198127924, 53.45373322932917], "label": "Using the pommel horse"}]}, "BpsmWqCxTDA": {"duration": 236.913, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BpsmWqCxTDA", "annotations": []}, "-7eQ2bHNPUw": {"duration": 197.46, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-7eQ2bHNPUw", "annotations": [{"segment": [70.83748829953198, 137.97910764430577], "label": "Washing hands"}]}, "7uumfT4aGLU": {"duration": 43.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7uumfT4aGLU", "annotations": [{"segment": [5.30401344, 40.19269110764431], "label": "Using parallel bars"}]}, "L7kozALmUSY": {"duration": 250.312, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=L7kozALmUSY", "annotations": []}, "gCxLSh-cgng": {"duration": 151.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gCxLSh-cgng", "annotations": [{"segment": [72.83455226209048, 88.20548049921996], "label": "Tennis serve with ball bouncing"}]}, "_8aVDfNQtq0": {"duration": 122.23, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_8aVDfNQtq0", "annotations": [{"segment": [19.64067652229899, 28.03086843473739], "label": "Beach soccer"}, {"segment": [32.79802293044103, 96.29652060062402], "label": "Beach soccer"}]}, "kPnqo24kemc": {"duration": 109.924, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kPnqo24kemc", "annotations": [{"segment": [0, 109.881], "label": "Kneeling"}]}, "xYM6h31PrM0": {"duration": 115.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xYM6h31PrM0", "annotations": [{"segment": [14.0018752, 115.170974], "label": "Using the balance beam"}]}, "ejUc7O8NbCE": {"duration": 183.16, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ejUc7O8NbCE", "annotations": []}, "sNQQ2Fpxbzw": {"duration": 149.583, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sNQQ2Fpxbzw", "annotations": [{"segment": [26.625002348258878, 121.45406591691126], "label": "Hurling"}]}, "kj8L5yu-fGs": {"duration": 229.48, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kj8L5yu-fGs", "annotations": [{"segment": [0.01, 211.58243645664194], "label": "Preparing salad"}]}, "kdOsmDACtqw": {"duration": 131.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kdOsmDACtqw", "annotations": []}, "djgnHazfi2Y": {"duration": 46.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=djgnHazfi2Y", "annotations": [{"segment": [7.197346333853354, 41.861448720000006], "label": "Using the pommel horse"}]}, "j28N0gvrNqA": {"duration": 60.74, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j28N0gvrNqA", "annotations": []}, "M_Jib8gkQpg": {"duration": 8.62, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=M_Jib8gkQpg", "annotations": [{"segment": [0.1747111170046802, 5.671391644305772], "label": "Springboard diving"}]}, "yLS0UuNYXOI": {"duration": 132.238, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yLS0UuNYXOI", "annotations": [{"segment": [39.3248769470405, 114.0799906542056], "label": "Waterskiing"}]}, "Mzt-E6pxuUI": {"duration": 138.46, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Mzt-E6pxuUI", "annotations": [{"segment": [71.0604412, 86.0985818], "label": "Washing dishes"}, {"segment": [30.073512, 63.6334468], "label": "Washing dishes"}]}, "XoeGnEtEq3U": {"duration": 218.988, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XoeGnEtEq3U", "annotations": [{"segment": [24.86170496999344, 35.75998887868393], "label": "Knitting"}, {"segment": [57.89712806821148, 58.57827081250463], "label": "Knitting"}, {"segment": [119.54054642674204, 128.39540210255305], "label": "Knitting"}, {"segment": [128.73597347469962, 131.46054445187224], "label": "Knitting"}]}, "-byL2szHXXE": {"duration": 205.53, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-byL2szHXXE", "annotations": []}, "gx6QvOXwetM": {"duration": 18.019, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gx6QvOXwetM", "annotations": [{"segment": [0.01, 17.065907889075795], "label": "Cutting the grass"}]}, "knnQ99kDt8w": {"duration": 39.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=knnQ99kDt8w", "annotations": [{"segment": [1.9175686427457097, 35.82505226209049], "label": "Dodgeball"}]}, "j30ZbXEi8sc": {"duration": 207.517, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j30ZbXEi8sc", "annotations": [{"segment": [0.2, 207.26100000000002], "label": "Drum corps"}]}, "MVB2uaxckNg": {"duration": 234.615, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=MVB2uaxckNg", "annotations": []}, "oflwFZj9PNY": {"duration": 20.34, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oflwFZj9PNY", "annotations": []}, "QF7wNOCJpMU": {"duration": 87.725, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QF7wNOCJpMU", "annotations": [{"segment": [2.1897038377535103, 80.74532901716069], "label": "Canoeing"}]}, "1tEJRzqqalM": {"duration": 231.23, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1tEJRzqqalM", "annotations": [{"segment": [13.521704368174726, 204.62845943837755], "label": "Grooming horse"}]}, "Fp62gCq6eHE": {"duration": 51.32, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Fp62gCq6eHE", "annotations": []}, "BXq2SjN9BMY": {"duration": 123.879, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BXq2SjN9BMY", "annotations": []}, "f--wWfsr0zA": {"duration": 104.49000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f--wWfsr0zA", "annotations": [{"segment": [8.485551690000001, 59.97136973478939], "label": "Ballet"}]}, "YAKXrBinGD4": {"duration": 222.84, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YAKXrBinGD4", "annotations": []}, "VE-zu8cdL44": {"duration": 73.05, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=VE-zu8cdL44", "annotations": [{"segment": [0.34188756318252733, 33.39101867082683], "label": "Walking the dog"}, {"segment": [37.83555699219969, 72.36620087363495], "label": "Walking the dog"}]}, "_yvbyKO1ymo": {"duration": 70.403, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_yvbyKO1ymo", "annotations": []}, "0fsMeZoZzJI": {"duration": 59.93, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0fsMeZoZzJI", "annotations": [{"segment": [0, 59.9307029478458], "label": "Shaving legs"}]}, "xC9s-_MNO_Q": {"duration": 122.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xC9s-_MNO_Q", "annotations": [{"segment": [0.2, 122.09936507936507], "label": "Decorating the Christmas tree"}]}, "LZ1142kuCnI": {"duration": 14.745, "subset": "training", "resolution": "204x360", "url": "https://www.youtube.com/watch?v=LZ1142kuCnI", "annotations": [{"segment": [0.0045861419023285836, 14.72632608274158], "label": "Mooping floor"}]}, "DsUbSyRX-B4": {"duration": 29.513, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=DsUbSyRX-B4", "annotations": [{"segment": [0.01, 14.571824317870675], "label": "Swinging at the playground"}]}, "_7l3OxQodF0": {"duration": 179.17000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_7l3OxQodF0", "annotations": [{"segment": [71.69377391830368, 110.5453707005423], "label": "Rock climbing"}]}, "vBaiedxy1Uw": {"duration": 93.948, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vBaiedxy1Uw", "annotations": []}, "tLdbdQ9fVZE": {"duration": 56.077, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tLdbdQ9fVZE", "annotations": [{"segment": [23.197924445326997, 24.418867907224357], "label": "Spread mulch"}, {"segment": [31.221267194938196, 32.267790162278786], "label": "Spread mulch"}]}, "sWxKUQuMxhA": {"duration": 15.63, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sWxKUQuMxhA", "annotations": []}, "2i_rotAjuoE": {"duration": 195.14, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=2i_rotAjuoE", "annotations": [{"segment": [27.971367599999997, 157.0057412], "label": "Horseback riding"}]}, "by4dS4tgkY0": {"duration": 189.359, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=by4dS4tgkY0", "annotations": [{"segment": [18.020097565807394, 174.02195162955329], "label": "Running a marathon"}]}, "YtFGG_7A_UU": {"duration": 135.88, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YtFGG_7A_UU", "annotations": [{"segment": [40.67704212168486, 94.06565990639626], "label": "Polishing shoes"}]}, "CnHtcWiHKw0": {"duration": 71.541, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CnHtcWiHKw0", "annotations": []}, "Xc2CUi0Rnpw": {"duration": 214.251, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Xc2CUi0Rnpw", "annotations": [{"segment": [8.52125663026521, 117.79384165366615], "label": "Beach soccer"}, {"segment": [143.52469500780032, 205.01140951638064], "label": "Beach soccer"}]}, "HQFey_iCEw8": {"duration": 12.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HQFey_iCEw8", "annotations": [{"segment": [0.020126365054602184, 12.881], "label": "Tennis serve with ball bouncing"}]}, "Rd9TrjbCkAE": {"duration": 11.819, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Rd9TrjbCkAE", "annotations": [{"segment": [0, 11.818955], "label": "Painting fence"}]}, "EK7qW-oavQY": {"duration": 51.76, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EK7qW-oavQY", "annotations": []}, "iFDr-o61ewo": {"duration": 94.715, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iFDr-o61ewo", "annotations": [{"segment": [3.83798127925117, 87.70164430577223], "label": "Blowing leaves"}]}, "9KNOOoIK0zw": {"duration": 215.807, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9KNOOoIK0zw", "annotations": [{"segment": [65.65088987232959, 148.80868267394695], "label": "Blow-drying hair"}, {"segment": [169.0089563400936, 201.32939420592822], "label": "Blow-drying hair"}]}, "mSR-HpiJ0No": {"duration": 174.82, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mSR-HpiJ0No", "annotations": []}, "fJWi44w71_o": {"duration": 180.094, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fJWi44w71_o", "annotations": []}, "TotbMcWIoyE": {"duration": 196.79, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TotbMcWIoyE", "annotations": [{"segment": [14.12215170670827, 165.78178090483618], "label": "Playing racquetball"}]}, "hHWXldYW8rU": {"duration": 8.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hHWXldYW8rU", "annotations": [{"segment": [3.0100982839313573, 8.281], "label": "Washing face"}]}, "7EPzlmJ25dA": {"duration": 191.542, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7EPzlmJ25dA", "annotations": [{"segment": [15.833936037441497, 168.5145772230889], "label": "Skiing"}]}, "aTvt_fP243g": {"duration": 45.256, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aTvt_fP243g", "annotations": [{"segment": [0.01, 0.02], "label": "Cleaning sink"}, {"segment": [12.199333853354135, 39.84175507020281], "label": "Cleaning sink"}]}, "xlf4czJSWHM": {"duration": 162.73, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xlf4czJSWHM", "annotations": []}, "E2Vd-sOC_ik": {"duration": 105.419, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=E2Vd-sOC_ik", "annotations": [{"segment": [39.4604245495306, 58.66537314766144], "label": "Powerbocking"}, {"segment": [76.22887314766145, 92.1509245495306], "label": "Powerbocking"}]}, "mt7gThnTtUM": {"duration": 152.138, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mt7gThnTtUM", "annotations": []}, "Q_32kySHzCQ": {"duration": 153.322, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Q_32kySHzCQ", "annotations": [{"segment": [89.42217472698908, 108.78900936037441], "label": "Throwing darts"}, {"segment": [122.67262558502341, 129.36820748829953], "label": "Throwing darts"}, {"segment": [136.06378939157568, 140.60721996879874], "label": "Throwing darts"}]}, "e2IL0BusPNM": {"duration": 113.941, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e2IL0BusPNM", "annotations": [{"segment": [0.01, 112.02039185950021], "label": "Beer pong"}]}, "gwZleaX_ZR8": {"duration": 149.61, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gwZleaX_ZR8", "annotations": [{"segment": [12.603327708618549, 31.04152935641235], "label": "Plataform diving"}, {"segment": [38.510167998556675, 49.01294108907213], "label": "Plataform diving"}, {"segment": [54.38102511311336, 71.65225197307213], "label": "Plataform diving"}, {"segment": [62.31645367039172, 64.4170082884948], "label": "Plataform diving"}, {"segment": [67.45114273686593, 71.65225197307213], "label": "Plataform diving"}, {"segment": [73.98620154874222, 77.48712591224738], "label": "Plataform diving"}, {"segment": [78.88749565764942, 84.02218472412365], "label": "Plataform diving"}, {"segment": [109.46223509892775, 121.36537793484527], "label": "Plataform diving"}, {"segment": [131.1679661526597, 136.30265521913392], "label": "Plataform diving"}, {"segment": [137.00284009183497, 142.3709241158762], "label": "Plataform diving"}]}, "WElK913B33U": {"duration": 169.111, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WElK913B33U", "annotations": [{"segment": [0.2, 49.84204212168487], "label": "Elliptical trainer"}, {"segment": [52.742901716068644, 101.2663712948518], "label": "Elliptical trainer"}, {"segment": [119.74055226209049, 130.29038065522622], "label": "Elliptical trainer"}]}, "B92SbWzu8yI": {"duration": 213.647, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B92SbWzu8yI", "annotations": [{"segment": [21.33135003767498, 187.6492192293292], "label": "Playing congas"}]}, "FrV8r4l5ZUM": {"duration": 147.563, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FrV8r4l5ZUM", "annotations": [{"segment": [6.209620904836194, 140.5404851794072], "label": "Rope skipping"}]}, "5wkGeYBS7hQ": {"duration": 189.313, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5wkGeYBS7hQ", "annotations": [{"segment": [19.19386115444618, 176.8788127925117], "label": "Beer pong"}]}, "2N129Bw1jiQ": {"duration": 17.97, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2N129Bw1jiQ", "annotations": [{"segment": [4.34059731, 13.84113159], "label": "Hopscotch"}]}, "JqD5p8LDqWI": {"duration": 202.153, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JqD5p8LDqWI", "annotations": []}, "4avZ79LB5n4": {"duration": 12.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4avZ79LB5n4", "annotations": [{"segment": [2.369162751950078, 7.431476324492981], "label": "Plataform diving"}]}, "UiK3kYsvvQ8": {"duration": 217.71, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UiK3kYsvvQ8", "annotations": [{"segment": [16.64244063095857, 203.78498731786004], "label": "Belly dance"}]}, "u9YrRYp2t3I": {"duration": 117.633, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=u9YrRYp2t3I", "annotations": [{"segment": [13.975289212225649, 16.90693507513987], "label": "Canoeing"}, {"segment": [34.13035451976092, 36.32908891694658], "label": "Canoeing"}, {"segment": [61.79776235101387, 62.713901683174576], "label": "Canoeing"}, {"segment": [74.99016873412786, 80.30377600771213], "label": "Canoeing"}]}, "8HxMXHc8Nr0": {"duration": 182.811, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8HxMXHc8Nr0", "annotations": [{"segment": [15.068359023726549, 42.077689578135974], "label": "Assembling bicycle"}, {"segment": [59.7048316241716, 91.83171954678491], "label": "Assembling bicycle"}]}, "PQgydNnSggw": {"duration": 169.48, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PQgydNnSggw", "annotations": [{"segment": [58.69008112324494, 160.4724290171607], "label": "Getting a tattoo"}]}, "x3l7GB-O0gw": {"duration": 189.545, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x3l7GB-O0gw", "annotations": []}, "74qSBsksI-E": {"duration": 59.03, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=74qSBsksI-E", "annotations": [{"segment": [9.944950810277309, 48.71184239478423], "label": "Shaving"}]}, "134sVNOzn_w": {"duration": 125.365, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=134sVNOzn_w", "annotations": [{"segment": [22.874503900156007, 97.96543057722307], "label": "Fixing the roof"}]}, "_Iece35DRMQ": {"duration": 180.025, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=_Iece35DRMQ", "annotations": []}, "9bERRZ2eTbo": {"duration": 113.894, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9bERRZ2eTbo", "annotations": [{"segment": [0.01, 72.98433140216241], "label": "Making a lemonade"}]}, "Ivyz2upE-CI": {"duration": 105.25999999999999, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ivyz2upE-CI", "annotations": []}, "FRFjdn_pjYk": {"duration": 58.352, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FRFjdn_pjYk", "annotations": [{"segment": [0, 58.101], "label": "Elliptical trainer"}]}, "cc17BiSDn8w": {"duration": 41.59, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=cc17BiSDn8w", "annotations": [{"segment": [1.6676710399999999, 37.46862914352574], "label": "Kayaking"}]}, "-TWiYyvt2Ec": {"duration": 123.21, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-TWiYyvt2Ec", "annotations": [{"segment": [4.274312572135804, 121.33813488195277], "label": "Windsurfing"}]}, "dI1GZfJ-A0E": {"duration": 38.778, "subset": "training", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=dI1GZfJ-A0E", "annotations": [{"segment": [12.945939769563573, 23.351087463338533], "label": "Changing car wheel"}]}, "NLCNBK2YJQU": {"duration": 122.83, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NLCNBK2YJQU", "annotations": [{"segment": [0.01, 121.87524000551858], "label": "Sailing"}]}, "wj_xmkHjItg": {"duration": 212.393, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wj_xmkHjItg", "annotations": [{"segment": [14.241720748829952, 198.40917160686428], "label": "Skiing"}]}, "vKNsvOvC5mA": {"duration": 97.501, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vKNsvOvC5mA", "annotations": [{"segment": [24.337120641288237, 88.06995390327613], "label": "Canoeing"}]}, "Epl3pExUuNs": {"duration": 136.023, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Epl3pExUuNs", "annotations": [{"segment": [5.304134165366614, 124.11673946957877], "label": "Riding bumper cars"}]}, "8VPjByN_v9w": {"duration": 99.89, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8VPjByN_v9w", "annotations": [{"segment": [0.01, 98.72280897902584], "label": "Breakdancing"}]}, "XumLmNQiRjk": {"duration": 97.03999999999999, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=XumLmNQiRjk", "annotations": [{"segment": [10.939838640000001, 69.60780424], "label": "Volleyball"}, {"segment": [16.1979285850234, 18.771431257410296], "label": "Volleyball"}, {"segment": [40.87327773790951, 43.59816292043681], "label": "Volleyball"}, {"segment": [50.56175838689547, 59.03917895475819], "label": "Volleyball"}, {"segment": [64.943096850234, 70.09010219500779], "label": "Volleyball"}]}, "bru03d1k6DY": {"duration": 220.428, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bru03d1k6DY", "annotations": [{"segment": [51.82945638629283, 201.48272118380063], "label": "Ice fishing"}]}, "lsvZBtYMXZM": {"duration": 102.97999999999999, "subset": "training", "resolution": "256x144", "url": "https://www.youtube.com/watch?v=lsvZBtYMXZM", "annotations": [{"segment": [8.125122, 99.1247529625585], "label": "Tango"}]}, "fLR-b-FDjlM": {"duration": 178.1, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fLR-b-FDjlM", "annotations": []}, "a_n3mR1iVDI": {"duration": 189.127, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=a_n3mR1iVDI", "annotations": [{"segment": [25.078447737909517, 34.51974570982839], "label": "Curling"}, {"segment": [41.30567862714509, 56.94282839313573], "label": "Curling"}, {"segment": [69.62957254290171, 83.49647893915757], "label": "Curling"}, {"segment": [96.18322308892357, 131.58809048361934], "label": "Curling"}]}, "mEEovBtzwHc": {"duration": 145.427, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mEEovBtzwHc", "annotations": [{"segment": [0, 117.95400936037441], "label": "Decorating the Christmas tree"}]}, "-VexUX6OJBM": {"duration": 126.549, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-VexUX6OJBM", "annotations": [{"segment": [4.440362714508581, 6.216507800312012], "label": "Powerbocking"}, {"segment": [8.190002340093605, 10.163496879875197], "label": "Powerbocking"}, {"segment": [34.83217862714508, 35.62157644305773], "label": "Powerbocking"}, {"segment": [42.13410842433697, 48.44929095163807], "label": "Powerbocking"}, {"segment": [50.620134945397815, 56.73796801872075], "label": "Powerbocking"}, {"segment": [60.487607644305776, 61.67170436817473], "label": "Powerbocking"}, {"segment": [71.73652652106084, 75.48616614664586], "label": "Powerbocking"}, {"segment": [78.6561911076443, 81.22213962558502], "label": "Powerbocking"}, {"segment": [84.57761076443059, 92.67021762870515], "label": "Powerbocking"}, {"segment": [94.64402418096725, 106.09210218408737], "label": "Powerbocking"}]}, "m9CbLJdYqHw": {"duration": 43.004, "subset": "validation", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=m9CbLJdYqHw", "annotations": [{"segment": [0.01, 0.02], "label": "Kneeling"}, {"segment": [0, 42.981], "label": "Kneeling"}]}, "PU6f-DcDngM": {"duration": 186.57, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=PU6f-DcDngM", "annotations": [{"segment": [0, 186.28127111479017], "label": "Playing polo"}]}, "1buoiCgXG1Q": {"duration": 22.83, "subset": "validation", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=1buoiCgXG1Q", "annotations": [{"segment": [3.9740607499999996, 15.2895545], "label": "Kayaking"}]}, "B_PhHrBEeNI": {"duration": 169.92000000000002, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B_PhHrBEeNI", "annotations": [{"segment": [24.918597407176286, 28.364786410296414], "label": "Playing harmonica"}, {"segment": [31.280792489859596, 41.3542680374415], "label": "Playing harmonica"}, {"segment": [49.307011890795636, 55.40411551170047], "label": "Playing harmonica"}, {"segment": [84.5641763073323, 92.51692016068644], "label": "Playing harmonica"}, {"segment": [113.98932856474259, 120.88170657098284], "label": "Playing harmonica"}]}, "FmugQfHQ4p0": {"duration": 82.13, "subset": "training", "resolution": "410x240", "url": "https://www.youtube.com/watch?v=FmugQfHQ4p0", "annotations": [{"segment": [9.219706708268331, 41.10452574102964], "label": "Vacuuming floor"}]}, "2NAbdlHXcTI": {"duration": 21.13, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=2NAbdlHXcTI", "annotations": [{"segment": [0.8184364799999998, 19.798380655226207], "label": "Discus throw"}]}, "m_gr7WdjJmc": {"duration": 186.689, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m_gr7WdjJmc", "annotations": [{"segment": [15.43601727742579, 64.65652472386896], "label": "Skiing"}, {"segment": [90.86862934165367, 116.20699713884554], "label": "Skiing"}, {"segment": [140.67162811544463, 162.22380302340093], "label": "Skiing"}, {"segment": [176.78608336661466, 186.688434], "label": "Skiing"}]}, "flneQOXwGxw": {"duration": 31.63, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=flneQOXwGxw", "annotations": [{"segment": [6.093205429797192, 15.270016846333853], "label": "Cricket"}]}, "T0wmRC8Ka2Y": {"duration": 49.343, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=T0wmRC8Ka2Y", "annotations": [{"segment": [6.697018901164211, 46.802153535101404], "label": "Playing beach volleyball"}]}, "ffGPrFNbQkA": {"duration": 153.97, "subset": "training", "resolution": "1280x718", "url": "https://www.youtube.com/watch?v=ffGPrFNbQkA", "annotations": [{"segment": [19.937029631825276, 124.90669166926676], "label": "Paintball"}]}, "vtYbJD2J8Tw": {"duration": 71.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vtYbJD2J8Tw", "annotations": [{"segment": [31.067646801872076, 71.12272], "label": "Removing curlers"}]}, "PG3f3Wkkptk": {"duration": 15.256, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=PG3f3Wkkptk", "annotations": [{"segment": [0.2, 15.001000000000001], "label": "Camel ride"}]}, "uX-UwKfRvJY": {"duration": 124.994, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uX-UwKfRvJY", "annotations": [{"segment": [55.326026189928854, 70.50662432076999], "label": "Doing crunches"}]}, "xevpFDYTJ0U": {"duration": 29.21, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xevpFDYTJ0U", "annotations": [{"segment": [0.3189936146645866, 18.182636035881433], "label": "Using uneven bars"}]}, "JFfLwtTd8PI": {"duration": 141.944, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JFfLwtTd8PI", "annotations": []}, "yfikZ4E_uko": {"duration": 125.85, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yfikZ4E_uko", "annotations": [{"segment": [4.319418708721138, 118.39133893759751], "label": "Preparing salad"}]}, "-9l1Rh10bO8": {"duration": 92.02000000000001, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-9l1Rh10bO8", "annotations": [{"segment": [12.633104137285493, 16.07849617472699], "label": "Doing karate"}, {"segment": [19.523888212168487, 23.83062825897036], "label": "Doing karate"}, {"segment": [33.30545636193448, 36.176616393135724], "label": "Doing karate"}, {"segment": [41.34470444929797, 42.493168461778474], "label": "Doing karate"}, {"segment": [51.68088056162246, 52.25511256786272], "label": "Doing karate"}, {"segment": [54.839156595943834, 61.29926666614664], "label": "Doing karate"}, {"segment": [64.02686869578783, 68.3336087425897], "label": "Doing karate"}, {"segment": [80.536038875195, 82.25873489391576], "label": "Doing karate"}]}, "EFGtb9IDQao": {"duration": 89.49, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EFGtb9IDQao", "annotations": [{"segment": [29.56641683407365, 76.36619253500822], "label": "Cleaning shoes"}]}, "ZVNnoOcohaU": {"duration": 57.911, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZVNnoOcohaU", "annotations": []}, "w_OiqLfRJHI": {"duration": 163.47, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w_OiqLfRJHI", "annotations": [{"segment": [93.08267623221936, 101.24334814820592], "label": "Paintball"}, {"segment": [108.12891590639626, 116.79963086115445], "label": "Paintball"}]}, "jwNny3H4GpI": {"duration": 129.41, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jwNny3H4GpI", "annotations": []}, "aLb79wtvxoQ": {"duration": 231.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aLb79wtvxoQ", "annotations": []}, "dJknA-jTNGc": {"duration": 54.614, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dJknA-jTNGc", "annotations": [{"segment": [0.2, 38.851084043681745], "label": "Rafting"}]}, "8GxWehFZVRE": {"duration": 32.81, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8GxWehFZVRE", "annotations": [{"segment": [0.01, 19.348054379318032], "label": "Playing accordion"}]}, "3iHHhCHcT8I": {"duration": 183.3, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=3iHHhCHcT8I", "annotations": [{"segment": [11.435257410296412, 21.15522620904836], "label": "Smoking a cigarette"}, {"segment": [32.87636505460219, 40.30928237129485], "label": "Smoking a cigarette"}, {"segment": [59.74921996879875, 66.89625585023401], "label": "Smoking a cigarette"}, {"segment": [76.33034321372855, 90.33853354134165], "label": "Smoking a cigarette"}, {"segment": [101.77379095163806, 107.49141965678626], "label": "Smoking a cigarette"}, {"segment": [122.35725429017161, 128.07488299531983], "label": "Smoking a cigarette"}, {"segment": [143.22659906396254, 158.95007800312013], "label": "Smoking a cigarette"}, {"segment": [168.95592823712948, 175.81708268330735], "label": "Smoking a cigarette"}]}, "S7LNJrgseNk": {"duration": 187.409, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S7LNJrgseNk", "annotations": []}, "lcVkXTsnR7A": {"duration": 228.25, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lcVkXTsnR7A", "annotations": [{"segment": [36.426736680000005, 77.40025320000001], "label": "Playing harmonica"}, {"segment": [91.34188536, 186.37335023400937], "label": "Playing harmonica"}]}, "E_6XYa_WO8I": {"duration": 131.43, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=E_6XYa_WO8I", "annotations": [{"segment": [12.299625585023403, 121.56129953198129], "label": "Chopping wood"}]}, "Atdxx_Pxnd4": {"duration": 41.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Atdxx_Pxnd4", "annotations": [{"segment": [0.2, 40.58149898909657], "label": "Calf roping"}]}, "xkIhTMJ_ThA": {"duration": 11.4, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xkIhTMJ_ThA", "annotations": [{"segment": [0, 11.400996], "label": "Playing racquetball"}]}, "kcPbEkv5UXE": {"duration": 213.345, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=kcPbEkv5UXE", "annotations": [{"segment": [0.01, 209.3507431575663], "label": "Playing drums"}]}, "Quj1J31xQFM": {"duration": 30.65, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=Quj1J31xQFM", "annotations": [{"segment": [0.01, 20.60888628549142], "label": "Washing face"}]}, "BTtMoBOHrlQ": {"duration": 190.03, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=BTtMoBOHrlQ", "annotations": [{"segment": [21.229329173166928, 173.66770670826833], "label": "Tai chi"}]}, "di5f5hM05Vw": {"duration": 167.02, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=di5f5hM05Vw", "annotations": []}, "lidR3AK0ZzQ": {"duration": 17.79, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lidR3AK0ZzQ", "annotations": [{"segment": [4.317622464898596, 13.672471138845554], "label": "Snatch"}]}, "f1tPFhJ1chI": {"duration": 71.84, "subset": "testing", "resolution": "632x360", "url": "https://www.youtube.com/watch?v=f1tPFhJ1chI", "annotations": []}, "lJ02cAklA6o": {"duration": 18.62, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=lJ02cAklA6o", "annotations": [{"segment": [0.010000000000000002, 18.56], "label": "Polishing shoes"}]}, "cy3a8BJJ-YQ": {"duration": 141.085, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cy3a8BJJ-YQ", "annotations": [{"segment": [0, 141.084444], "label": "Playing blackjack"}]}, "DXIHbnyfNGA": {"duration": 220.683, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DXIHbnyfNGA", "annotations": [{"segment": [43.72335572606578, 87.79098977223089], "label": "Longboarding"}, {"segment": [102.59496059656786, 113.95614750826833], "label": "Longboarding"}, {"segment": [121.185993724805, 139.43274846177846], "label": "Longboarding"}, {"segment": [151.1382137647426, 169.0406901104524], "label": "Longboarding"}, {"segment": [183.8446609347894, 220.6824488], "label": "Longboarding"}]}, "pF8jt67vTDY": {"duration": 82.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pF8jt67vTDY", "annotations": [{"segment": [6.00820327925117, 81.941666], "label": "Volleyball"}]}, "9-yueOtwiL8": {"duration": 230.69, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9-yueOtwiL8", "annotations": [{"segment": [6.295631825273011, 10.612636505460218], "label": "Shot put"}, {"segment": [14.210140405616224, 18.527145085803433], "label": "Shot put"}, {"segment": [21.40514820592824, 23.923400936037442], "label": "Shot put"}, {"segment": [27.52090483619345, 31.478159126365053], "label": "Shot put"}, {"segment": [32.197659906396254, 35.79516380655226], "label": "Shot put"}, {"segment": [51.983931357254285, 59.178939157566305], "label": "Shot put"}, {"segment": [67.09344773790951, 74.64820592823712], "label": "Shot put"}, {"segment": [86.51996879875195, 91.19672386895475], "label": "Shot put"}, {"segment": [92.27597503900155, 94.07472698907956], "label": "Shot put"}, {"segment": [96.59297971918878, 99.11123244929797], "label": "Shot put"}, {"segment": [100.90998439937597, 104.50748829953197], "label": "Shot put"}, {"segment": [0.2, 230.4], "label": "Shot put"}, {"segment": [114.5804992199688, 121.0560062402496], "label": "Shot put"}, {"segment": [122.85475819032762, 146.95803432137285], "label": "Shot put"}, {"segment": [173.2198127925117, 179.33556942277693], "label": "Shot put"}, {"segment": [184.01232449297973, 187.2500780031201], "label": "Shot put"}, {"segment": [209.19485179407175, 210.99360374414977], "label": "Shot put"}]}, "uaqcvxz_5fY": {"duration": 135.68, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uaqcvxz_5fY", "annotations": [{"segment": [6.982048361934478, 135.621], "label": "Cleaning windows"}]}, "_jVoRRn_Ujw": {"duration": 29.56, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=_jVoRRn_Ujw", "annotations": []}, "2mVwrM0qOGw": {"duration": 182.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2mVwrM0qOGw", "annotations": [{"segment": [85.73860374414977, 95.15415756630264], "label": "Archery"}, {"segment": [133.3870124804992, 175.32902496099842], "label": "Archery"}]}, "QG2xYcN_bl0": {"duration": 27.24, "subset": "testing", "resolution": "480x720", "url": "https://www.youtube.com/watch?v=QG2xYcN_bl0", "annotations": []}, "vP-wI1egoHA": {"duration": 120.605, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vP-wI1egoHA", "annotations": [{"segment": [0.2, 109.72565810221185], "label": "Riding bumper cars"}]}, "0-auIBOTx9E": {"duration": 151.65, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=0-auIBOTx9E", "annotations": [{"segment": [10.646215975039, 119.94736665210607], "label": "Playing ten pins"}, {"segment": [127.9911742776911, 151.649432], "label": "Playing ten pins"}]}, "PVed6JEd3ZM": {"duration": 38.615, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PVed6JEd3ZM", "annotations": [{"segment": [0.5416989079563184, 27.446078003120125], "label": "Using the monkey bar"}]}, "74MEdLMrXAQ": {"duration": 202.757, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=74MEdLMrXAQ", "annotations": [{"segment": [5.377321265210608, 10.122016499219969], "label": "Futsal"}, {"segment": [13.285146655226209, 24.35610220124805], "label": "Futsal"}, {"segment": [27.835545372854916, 45.23276123088923], "label": "Futsal"}, {"segment": [49.977456464898594, 59.46684693291731], "label": "Futsal"}, {"segment": [64.21154216692668, 74.9661846973479], "label": "Futsal"}, {"segment": [79.078253900156, 95.52653071138845], "label": "Futsal"}, {"segment": [99.95491292979719, 110.7095554602184], "label": "Futsal"}, {"segment": [115.13793767862715, 132.53515353666148], "label": "Futsal"}, {"segment": [136.33090972386896, 148.6671173322933], "label": "Futsal"}, {"segment": [153.09549955070202, 167.3295852527301], "label": "Futsal"}, {"segment": [171.12534143993759, 179.98210587675507], "label": "Futsal"}, {"segment": [183.14523603276132, 188.52255729797193], "label": "Futsal"}, {"segment": [192.3183134851794, 202.556643], "label": "Futsal"}]}, "w4oF8qNhzXg": {"duration": 137.625, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w4oF8qNhzXg", "annotations": []}, "-rJVr9sQ1h8": {"duration": 107.369, "subset": "training", "resolution": "396x224", "url": "https://www.youtube.com/watch?v=-rJVr9sQ1h8", "annotations": [{"segment": [14.884428126390741, 85.12554848909659], "label": "Grooming dog"}]}, "bwsa4iyYmw4": {"duration": 214.576, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bwsa4iyYmw4", "annotations": []}, "u713piB4VWk": {"duration": 110.504, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u713piB4VWk", "annotations": [{"segment": [0.01, 83.0933132074883], "label": "Sumo"}]}, "J0-OVQ-JB5g": {"duration": 203.198, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=J0-OVQ-JB5g", "annotations": [{"segment": [17.106542901716068, 172.34955382215287], "label": "Grooming dog"}]}, "GZjNt2TsChs": {"duration": 64.552, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GZjNt2TsChs", "annotations": [{"segment": [6.49542912328738, 19.889104679833448], "label": "Fun sliding down"}, {"segment": [43.95743895061924, 55.9412547888804], "label": "Fun sliding down"}]}, "5Blj6-K0J64": {"duration": 63.762, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5Blj6-K0J64", "annotations": []}, "aEyTdUOp-qs": {"duration": 29.095, "subset": "training", "resolution": "380x216", "url": "https://www.youtube.com/watch?v=aEyTdUOp-qs", "annotations": [{"segment": [0.01, 29.049213703588144], "label": "Mooping floor"}]}, "Z-C5yMAUmM4": {"duration": 74.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z-C5yMAUmM4", "annotations": [{"segment": [0.11569578783151327, 1.8511326053042123], "label": "Drinking beer"}]}, "v6DdLg_Xnkg": {"duration": 151.696, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=v6DdLg_Xnkg", "annotations": []}, "cffsp2tCT5k": {"duration": 211.023, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=cffsp2tCT5k", "annotations": [{"segment": [0.01, 207.40164906552263], "label": "Carving jack-o-lanterns"}]}, "5sl-E-AwptI": {"duration": 95.32, "subset": "training", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=5sl-E-AwptI", "annotations": [{"segment": [1.2639663967511081, 47.048471199999994], "label": "Playing field hockey"}, {"segment": [53.56624766, 90.78252767253547], "label": "Playing field hockey"}]}, "VhvGXJ2XSPY": {"duration": 186.92, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VhvGXJ2XSPY", "annotations": []}, "86sxvTk3YEY": {"duration": 225.629, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=86sxvTk3YEY", "annotations": [{"segment": [65.47091911076443, 190.07686193447736], "label": "Arm wrestling"}]}, "Se7eQXlW-BA": {"duration": 64.529, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Se7eQXlW-BA", "annotations": [{"segment": [34.42849119679073, 36.74385756382636], "label": "Playing ten pins"}]}, "cCimLg-8Dy4": {"duration": 46.626, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cCimLg-8Dy4", "annotations": []}, "Cai2pNQrv7w": {"duration": 216.97, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cai2pNQrv7w", "annotations": []}, "2P555wnyj_8": {"duration": 48.716, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2P555wnyj_8", "annotations": [{"segment": [0.01, 47.30408054288769], "label": "Welding"}]}, "xlOc6r6E08w": {"duration": 48.44, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xlOc6r6E08w", "annotations": [{"segment": [1.1334670202808113, 40.804812730109205], "label": "Javelin throw"}]}, "vi8zTSA6bhY": {"duration": 13.027, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vi8zTSA6bhY", "annotations": [{"segment": [0.01, 13.001], "label": "Swinging at the playground"}]}, "69SYd5feEBQ": {"duration": 92.927, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=69SYd5feEBQ", "annotations": [{"segment": [0.2, 92.691], "label": "Knitting"}]}, "jM0l31xZj8E": {"duration": 72.75, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jM0l31xZj8E", "annotations": [{"segment": [2.95078261361747, 69.57037413416538], "label": "Playing saxophone"}]}, "jCNaa108OhI": {"duration": 137.51, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jCNaa108OhI", "annotations": [{"segment": [9.224443853354133, 47.40935096723869], "label": "Shoveling snow"}]}, "46D3Yvu-BL8": {"duration": 143.11, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=46D3Yvu-BL8", "annotations": [{"segment": [8.531156733333516, 125.11012062274162], "label": "Playing racquetball"}]}, "xM0jxaj73bg": {"duration": 212.92000000000002, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xM0jxaj73bg", "annotations": []}, "TpURw9nc8ks": {"duration": 52.06, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=TpURw9nc8ks", "annotations": [{"segment": [6.573636505460218, 25.726453978159128], "label": "Croquet"}, {"segment": [27.10610608424337, 31.001594383775352], "label": "Croquet"}]}, "3HUvtWSu4mA": {"duration": 122.787, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3HUvtWSu4mA", "annotations": [{"segment": [9.577778485289071, 67.23600496672928], "label": "Clipping cat claws"}]}, "o9gsbkp135s": {"duration": 27.377, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o9gsbkp135s", "annotations": [{"segment": [0.01, 27.376325], "label": "Removing ice from car"}]}, "agM7yjqVKo0": {"duration": 63.18, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=agM7yjqVKo0", "annotations": [{"segment": [22.045258970358812, 24.21526209048362], "label": "Layup drill in basketball"}, {"segment": [30.72527145085803, 33.092547581903275], "label": "Layup drill in basketball"}]}, "1fyxOUEihAQ": {"duration": 109.993, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1fyxOUEihAQ", "annotations": [{"segment": [0.2, 30.529638065522622], "label": "Blowing leaves"}, {"segment": [36.18962714508581, 109.741], "label": "Blowing leaves"}]}, "ZTHsS5lQyvQ": {"duration": 153.35, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZTHsS5lQyvQ", "annotations": [{"segment": [16.093452799999998, 128.22573220592824], "label": "Making a sandwich"}]}, "ndET50Ccnr8": {"duration": 154.18, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=ndET50Ccnr8", "annotations": [{"segment": [2.886458658346334, 148.41208268330735], "label": "Playing water polo"}]}, "73AGD3RWPEw": {"duration": 56.239, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=73AGD3RWPEw", "annotations": [{"segment": [8.329321372854915, 43.57550234009361], "label": "Slacklining"}]}, "veaxOUe_8HE": {"duration": 205.892, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=veaxOUe_8HE", "annotations": [{"segment": [0.01, 186.591639625585], "label": "Playing beach volleyball"}]}, "dMjOeGJBF9M": {"duration": 28.352, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dMjOeGJBF9M", "annotations": [{"segment": [0.01, 25.12229618720749], "label": "Fun sliding down"}]}, "L3LdtLFpQl0": {"duration": 100.125, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L3LdtLFpQl0", "annotations": []}, "F_sbhegCsyg": {"duration": 88.863, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F_sbhegCsyg", "annotations": [{"segment": [23.151306278101462, 46.44124232449299], "label": "Rafting"}, {"segment": [57.393057678627144, 81.23751642433699], "label": "Rafting"}]}, "qVfcSGDJc-M": {"duration": 205.335, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=qVfcSGDJc-M", "annotations": []}, "zyh16NahocQ": {"duration": 199.135, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zyh16NahocQ", "annotations": [{"segment": [142.86624024960997, 176.42647113884556], "label": "Blow-drying hair"}]}, "2MRR5NxbO9k": {"duration": 155.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2MRR5NxbO9k", "annotations": [{"segment": [22.942438154669045, 134.1343606397317], "label": "Mowing the lawn"}]}, "fNhw7xE3NM8": {"duration": 16.58, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fNhw7xE3NM8", "annotations": []}, "H_IfIsKQ3Zw": {"duration": 125.644, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H_IfIsKQ3Zw", "annotations": []}, "-aE1lALKV9M": {"duration": 225.907, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-aE1lALKV9M", "annotations": []}, "AQQT21DBT_E": {"duration": 120.373, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AQQT21DBT_E", "annotations": [{"segment": [5.0702817285491415, 28.919384673946954], "label": "Grooming dog"}, {"segment": [36.8064895850234, 74.73970844305772], "label": "Grooming dog"}]}, "6M5UgOj_eoM": {"duration": 59.862, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=6M5UgOj_eoM", "annotations": []}, "S7Wi1dNHhYo": {"duration": 4.32, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=S7Wi1dNHhYo", "annotations": []}, "2L9pp85KvbQ": {"duration": 73.96000000000001, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=2L9pp85KvbQ", "annotations": [{"segment": [0.2, 73.75555555555555], "label": "Smoking hookah"}]}, "dFVX_2UQ2WY": {"duration": 55.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dFVX_2UQ2WY", "annotations": [{"segment": [25.11324336973479, 40.58091575663027], "label": "Cleaning shoes"}]}, "MFeI2mRg6Ho": {"duration": 192.935, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MFeI2mRg6Ho", "annotations": [{"segment": [8.124472698907956, 37.91420592823713], "label": "Hurling"}, {"segment": [46.64049141965678, 52.35771294851794], "label": "Hurling"}, {"segment": [62.88943681747269, 80.34200780031202], "label": "Hurling"}, {"segment": [92.37826365054603, 94.48460842433697], "label": "Hurling"}, {"segment": [107.72448985959439, 117.05258814352574], "label": "Hurling"}, {"segment": [123.07071606864275, 132.09790795631827], "label": "Hurling"}, {"segment": [148.0612979719189, 162.80724024961], "label": "Hurling"}, {"segment": [168.5250546021841, 182.36818408736352], "label": "Hurling"}]}, "WEoelJQYlxk": {"duration": 64.691, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=WEoelJQYlxk", "annotations": [{"segment": [1.6634110764430576, 13.861758970358814], "label": "Hand car wash"}, {"segment": [47.230793291731665, 49.75111310452418], "label": "Hand car wash"}]}, "i2e67kStfk4": {"duration": 23.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=i2e67kStfk4", "annotations": [{"segment": [4.989725319745109, 18.111711508218626], "label": "Using uneven bars"}]}, "VSeBb4e9ysU": {"duration": 151.84, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VSeBb4e9ysU", "annotations": [{"segment": [21.1255884, 126.33241131903274], "label": "Shoveling snow"}]}, "TNVdpnrtn5A": {"duration": 158.779, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TNVdpnrtn5A", "annotations": [{"segment": [53.75150360300126, 80.25570123213092], "label": "Kneeling"}, {"segment": [92.64084031116347, 103.04435713755082], "label": "Kneeling"}, {"segment": [106.75989886126057, 145.64923556942279], "label": "Kneeling"}]}, "BhgcXqWQhkQ": {"duration": 190.17, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BhgcXqWQhkQ", "annotations": []}, "OSndW3d2XxU": {"duration": 4.95, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=OSndW3d2XxU", "annotations": [{"segment": [0.07715090483619345, 2.7234269407176286], "label": "Hammer throw"}]}, "tt5-QtDwm3Y": {"duration": 77.88, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=tt5-QtDwm3Y", "annotations": [{"segment": [11.785231833763147, 25.514419433920214], "label": "Layup drill in basketball"}, {"segment": [47.262424570452204, 70.22540035569423], "label": "Layup drill in basketball"}]}, "nmUJ2GfVkKY": {"duration": 147.17000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nmUJ2GfVkKY", "annotations": [{"segment": [22.49988543525741, 33.74982815288612], "label": "Triple jump"}, {"segment": [56.93848559126366, 73.69860433385335], "label": "Triple jump"}, {"segment": [113.41778984711388, 131.09627126053041], "label": "Triple jump"}]}, "WWip1_lFvGg": {"duration": 20.388, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WWip1_lFvGg", "annotations": [{"segment": [0.2, 19.439831513260533], "label": "Putting in contact lenses"}]}, "9pQH9u9uar4": {"duration": 170.016, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9pQH9u9uar4", "annotations": []}, "lxMSX0Y0b8I": {"duration": 201.619, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lxMSX0Y0b8I", "annotations": [{"segment": [25.751942413517845, 31.090759743149594], "label": "Capoeira"}, {"segment": [36.11552899456771, 41.45434632419946], "label": "Capoeira"}, {"segment": [46.793163653831215, 51.50388482703569], "label": "Capoeira"}, {"segment": [55.58650984381291, 63.43771179915372], "label": "Capoeira"}, {"segment": [68.14843297235821, 76.94177916233991], "label": "Capoeira"}, {"segment": [82.5946445701853, 85.10702919589436], "label": "Capoeira"}, {"segment": [89.5037022908852, 94.84251962051695], "label": "Capoeira"}, {"segment": [100.1813369501487, 106.1482504362077], "label": "Capoeira"}, {"segment": [111.80111584405309, 116.82588509547121], "label": "Capoeira"}, {"segment": [121.5366062686757, 124.99113512902565], "label": "Capoeira"}, {"segment": [139.12329864863912, 146.34640444755263], "label": "Capoeira"}, {"segment": [150.4290294643299, 158.28023046728973], "label": "Capoeira"}, {"segment": [163.30499968847352, 169.2719131386293], "label": "Capoeira"}, {"segment": [173.35453813084112, 179.0074035046729], "label": "Capoeira"}, {"segment": [184.0321727258567, 191.2552784813084], "label": "Capoeira"}]}, "tYZJ4O5Hsz4": {"duration": 157.385, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=tYZJ4O5Hsz4", "annotations": [{"segment": [0.19999999999999998, 153.1746495327103], "label": "Playing drums"}]}, "kW63TeJo4JY": {"duration": 23.592, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kW63TeJo4JY", "annotations": [{"segment": [0.19999999999999998, 21.901117308411216], "label": "Hitting a pinata"}]}, "NWaMWZUuTZc": {"duration": 44.75, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NWaMWZUuTZc", "annotations": [{"segment": [1.203840559526497, 43.99720273086606], "label": "Breakdancing"}]}, "XYW6F_4qKJU": {"duration": 59.769, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=XYW6F_4qKJU", "annotations": [{"segment": [9.316848673946957, 24.884316692667706], "label": "Clipping cat claws"}, {"segment": [34.94988299531981, 51.166628705148206], "label": "Clipping cat claws"}]}, "hjaUbrqPe-w": {"duration": 223.562, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hjaUbrqPe-w", "annotations": [{"segment": [11.282544430417142, 211.51287597247324], "label": "Rafting"}]}, "QdvC8Xt-ym0": {"duration": 65.25, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QdvC8Xt-ym0", "annotations": [{"segment": [2.6454695787831515, 64.30526053042122], "label": "Playing polo"}]}, "hsPepNAzu_Q": {"duration": 86.05, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hsPepNAzu_Q", "annotations": [{"segment": [9.460968018720749, 60.590454758190326], "label": "Horseback riding"}]}, "7oBW5W6Fdok": {"duration": 44.56, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7oBW5W6Fdok", "annotations": [{"segment": [33.19974726989079, 43.06243369734789], "label": "Hammer throw"}]}, "6tdIiKzMVcg": {"duration": 202.107, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=6tdIiKzMVcg", "annotations": [{"segment": [12.927239107644304, 188.54851186271452], "label": "Arm wrestling"}]}, "cM67XJS7yM8": {"duration": 41.704, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cM67XJS7yM8", "annotations": [{"segment": [10.669731775351014, 20.949107510140404], "label": "Curling"}]}, "bZF4nakRNF4": {"duration": 23.04, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bZF4nakRNF4", "annotations": [{"segment": [3.198195422776911, 5.569890904836193], "label": "High jump"}, {"segment": [12.864651251170047, 16.09878145397816], "label": "High jump"}]}, "CVj_bIIbTnc": {"duration": 75.91, "subset": "testing", "resolution": "618x360", "url": "https://www.youtube.com/watch?v=CVj_bIIbTnc", "annotations": []}, "hKn_RK3VSAo": {"duration": 120.141, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hKn_RK3VSAo", "annotations": [{"segment": [7.68446322886929, 120.14002267573696], "label": "Applying sunscreen"}]}, "wBhtt9x74TA": {"duration": 25.124, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=wBhtt9x74TA", "annotations": [{"segment": [0.19999999999999998, 5.282269890795631], "label": "Applying sunscreen"}, {"segment": [6.221340093603744, 11.551942277691108], "label": "Applying sunscreen"}, {"segment": [15.272059282371295, 24.08286271450858], "label": "Applying sunscreen"}]}, "LAD4qVnywmA": {"duration": 130.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LAD4qVnywmA", "annotations": [{"segment": [53.36699009048362, 57.84818773166927], "label": "Triple jump"}, {"segment": [63.95891178783152, 71.29178065522622], "label": "Triple jump"}, {"segment": [110.4004146146646, 130.565804], "label": "Triple jump"}]}, "RgzOQ3Sr1bE": {"duration": 122.277, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RgzOQ3Sr1bE", "annotations": []}, "hXBeu7o9uUM": {"duration": 219.57, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hXBeu7o9uUM", "annotations": [{"segment": [35.85507488299532, 62.274603744149765], "label": "Playing saxophone"}, {"segment": [65.70571138845554, 124.37765210608424], "label": "Playing saxophone"}, {"segment": [127.12253822152887, 164.5216115444618], "label": "Playing saxophone"}, {"segment": [166.58027613104522, 186.48070046801874], "label": "Playing saxophone"}, {"segment": [188.88247581903278, 198.8326879875195], "label": "Playing saxophone"}]}, "pi4p4zdoqmw": {"duration": 142.757, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pi4p4zdoqmw", "annotations": [{"segment": [9.428128929950965, 142.756098], "label": "Surfing"}]}, "A8q6beSMpEE": {"duration": 58.28, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=A8q6beSMpEE", "annotations": [{"segment": [8.268223088923557, 56.98852886115445], "label": "Cleaning windows"}]}, "KqOLTkf5ujs": {"duration": 134.258, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KqOLTkf5ujs", "annotations": [{"segment": [0.01, 134.2577777777778], "label": "Painting fence"}]}, "7BM40jqPYgA": {"duration": 163.306, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7BM40jqPYgA", "annotations": [{"segment": [4.934790475366182, 151.96101059997676], "label": "Welding"}]}, "2Lqo9im9vPU": {"duration": 104.7, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2Lqo9im9vPU", "annotations": []}, "I7K2Wji22v4": {"duration": 120.094, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=I7K2Wji22v4", "annotations": []}, "mJZj3znhBas": {"duration": 73.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mJZj3znhBas", "annotations": [{"segment": [0.34373406084243374, 8.707929541341654], "label": "Playing field hockey"}, {"segment": [15.009720656786271, 20.62404365054602], "label": "Playing field hockey"}, {"segment": [23.259338117004685, 26.81125674570983], "label": "Playing field hockey"}, {"segment": [29.217395171606867, 33.685937962558505], "label": "Playing field hockey"}, {"segment": [35.40460826677067, 39.98772907800313], "label": "Playing field hockey"}, {"segment": [47.09156633541342, 55.914073897035884], "label": "Playing field hockey"}, {"segment": [62.10128699219969, 63.70537927613105], "label": "Playing field hockey"}]}, "YjxjsP6A5H8": {"duration": 235.1, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YjxjsP6A5H8", "annotations": [{"segment": [162.81916380655227, 183.721624024961], "label": "Skateboarding"}, {"segment": [185.18846333853355, 209.3913120124805], "label": "Skateboarding"}]}, "iUIfmMljiOg": {"duration": 190.149, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iUIfmMljiOg", "annotations": [{"segment": [39.15688506084244, 79.7969854648986], "label": "Making a lemonade"}, {"segment": [85.72984683775351, 136.75245464430577], "label": "Making a lemonade"}, {"segment": [142.6853160171607, 169.08654912636507], "label": "Making a lemonade"}]}, "TjDlEonao3s": {"duration": 106.12, "subset": "training", "resolution": "1152x720", "url": "https://www.youtube.com/watch?v=TjDlEonao3s", "annotations": [{"segment": [15.718868954758193, 84.07043369734791], "label": "Removing curlers"}]}, "afMQA352X_A": {"duration": 155.02, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=afMQA352X_A", "annotations": []}, "O2JUUFFdWpk": {"duration": 151.464, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=O2JUUFFdWpk", "annotations": [{"segment": [87.4283938377535, 112.71174016380655], "label": "Rock-paper-scissors"}]}, "7uE2pMuAM8I": {"duration": 59.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7uE2pMuAM8I", "annotations": [{"segment": [0, 59.048343], "label": "Polishing shoes"}]}, "tJiYIajy41Q": {"duration": 15.024, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tJiYIajy41Q", "annotations": [{"segment": [0.2, 14.823309], "label": "Futsal"}]}, "OHwE8aA90IE": {"duration": 3.832, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OHwE8aA90IE", "annotations": [{"segment": [0.44161315616589314, 3.6312925170068024], "label": "Cutting the grass"}]}, "nK_U3gJWba0": {"duration": 174.615, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nK_U3gJWba0", "annotations": []}, "5kdiJ2cC5_w": {"duration": 221.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5kdiJ2cC5_w", "annotations": [{"segment": [7.608361934477379, 106.22967315], "label": "Playing violin"}, {"segment": [124.56563114000001, 199.54658346333855], "label": "Playing violin"}]}, "hl4CItu_we8": {"duration": 15.12, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=hl4CItu_we8", "annotations": []}, "Ez5uEh7YyIM": {"duration": 147.098, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ez5uEh7YyIM", "annotations": [{"segment": [11.015169749647129, 23.866201124235445], "label": "Swimming"}, {"segment": [26.161028155411927, 34.42240546764727], "label": "Swimming"}, {"segment": [39.929990342470845, 47.04395413911794], "label": "Swimming"}, {"segment": [53.469469826412094, 70.68067256023573], "label": "Swimming"}, {"segment": [84.67911745041229, 92.48152935641235], "label": "Swimming"}, {"segment": [98.21859693435357, 103.03773369982417], "label": "Swimming"}, {"segment": [111.75807641829482, 117.72462669935368], "label": "Swimming"}, {"segment": [123.92065968353018, 135.16531213629497], "label": "Swimming"}]}, "uuFJdgTT5kE": {"duration": 137.49, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uuFJdgTT5kE", "annotations": [{"segment": [23.822025950000004, 69.28144120000002], "label": "Playing polo"}, {"segment": [117.37084633385335, 137.215], "label": "Playing polo"}]}, "II0BlR0BriI": {"duration": 147.31, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=II0BlR0BriI", "annotations": [{"segment": [70.51880703999998, 116.79382801999999], "label": "Cheerleading"}]}, "1hB5jVAhSDE": {"duration": 32.067, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1hB5jVAhSDE", "annotations": [{"segment": [0.2, 31.016208376226206], "label": "Playing congas"}]}, "oTCyC7czSTI": {"duration": 123.21, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=oTCyC7czSTI", "annotations": []}, "Xag8E74bkpw": {"duration": 81.061, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Xag8E74bkpw", "annotations": [{"segment": [0.009999999999999998, 74.1668369851853], "label": "Playing congas"}]}, "r4kC4AHDIH8": {"duration": 206.147, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=r4kC4AHDIH8", "annotations": [{"segment": [13.185674028321676, 177.84579729797193], "label": "Rafting"}]}, "WmOTDAim7XM": {"duration": 16.045, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WmOTDAim7XM", "annotations": [{"segment": [0, 15.988518720748832], "label": "Trimming branches or hedges"}]}, "esTcWwmykKQ": {"duration": 219.43, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=esTcWwmykKQ", "annotations": [{"segment": [19.00306664, 178.44455692], "label": "Playing water polo"}, {"segment": [59.58193632, 192.23398423999998], "label": "Playing polo"}]}, "kMHMdqkGRB4": {"duration": 43.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kMHMdqkGRB4", "annotations": [{"segment": [9.086172472698907, 41.05603858034321], "label": "Using the pommel horse"}]}, "hPV-Z73KXak": {"duration": 238.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hPV-Z73KXak", "annotations": [{"segment": [31.86694556765163, 90.0502068429238], "label": "Playing drums"}, {"segment": [130.81367496111977, 152.3071399688958], "label": "Playing drums"}]}, "lHaxXbcEMp8": {"duration": 68.01, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lHaxXbcEMp8", "annotations": []}, "JFupxdGxgPQ": {"duration": 45.512, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JFupxdGxgPQ", "annotations": [{"segment": [7.880960340454434, 22.009889239106975], "label": "Calf roping"}]}, "4o1k3KUHz6E": {"duration": 37.5, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4o1k3KUHz6E", "annotations": [{"segment": [5.0312315694227765, 26.150703622464896], "label": "Playing kickball"}]}, "Uo0XHGIdNu0": {"duration": 23.987, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Uo0XHGIdNu0", "annotations": []}, "kQ7ensWEW08": {"duration": 122.021, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=kQ7ensWEW08", "annotations": [{"segment": [17.64842983360346, 118.98459195630635], "label": "Knitting"}]}, "bX74cXcY7To": {"duration": 213.16, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bX74cXcY7To", "annotations": []}, "j_IJQAywTuU": {"duration": 72.795, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j_IJQAywTuU", "annotations": [{"segment": [12.037789593216381, 69.72832665210609], "label": "Shuffleboard"}]}, "IujbHw7SXJE": {"duration": 153.14, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IujbHw7SXJE", "annotations": []}, "FQcbPesIO3c": {"duration": 220.59, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FQcbPesIO3c", "annotations": []}, "Jtlvh2w1aUM": {"duration": 78.554, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Jtlvh2w1aUM", "annotations": [{"segment": [0.2, 76.34724557722309], "label": "Blowing leaves"}]}, "1hTqfvjis9E": {"duration": 39.92, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1hTqfvjis9E", "annotations": [{"segment": [22.149198127925118, 33.286014040561625], "label": "Hammer throw"}]}, "brewO_8bl4g": {"duration": 85.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=brewO_8bl4g", "annotations": [{"segment": [58.665711081201415, 69.5967961732451], "label": "Arm wrestling"}]}, "mPtCJg-j4SM": {"duration": 191.658, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mPtCJg-j4SM", "annotations": [{"segment": [0.01, 178.7665491419657], "label": "Using the rowing machine"}]}, "W8ILh7ickB4": {"duration": 86.56, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=W8ILh7ickB4", "annotations": [{"segment": [62.62672858658347, 76.25883976599064], "label": "Washing dishes"}]}, "n1iu-AlcS-Q": {"duration": 35.086, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n1iu-AlcS-Q", "annotations": [{"segment": [5.473533541341654, 35.08535], "label": "Shuffleboard"}]}, "icGrJ2XBxa0": {"duration": 59.536, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=icGrJ2XBxa0", "annotations": [{"segment": [9.473741496598638, 40.774239185647424], "label": "Canoeing"}]}, "zQZe5vxsi40": {"duration": 155.249, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zQZe5vxsi40", "annotations": []}, "Xdre6bEf3m8": {"duration": 15.279, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Xdre6bEf3m8", "annotations": []}, "OM_n35Vcw30": {"duration": 228.392, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OM_n35Vcw30", "annotations": [{"segment": [0.01, 226.0435228306818], "label": "Shuffleboard"}]}, "Id4W6VA0uLc": {"duration": 78.03999999999999, "subset": "testing", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=Id4W6VA0uLc", "annotations": []}, "gxuere-ofCU": {"duration": 136.19, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gxuere-ofCU", "annotations": [{"segment": [13.059395475819034, 120.08273400936038], "label": "Playing harmonica"}]}, "4y8mMJkvAR4": {"duration": 170.412, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4y8mMJkvAR4", "annotations": [{"segment": [10.368234996879876, 36.42174857878315], "label": "Canoeing"}, {"segment": [11.165791535101405, 36.953452937597504], "label": "Canoeing"}]}, "6SFW4g4UVKI": {"duration": 32.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6SFW4g4UVKI", "annotations": [{"segment": [6.42383775351014, 32.68], "label": "Polishing shoes"}]}, "uMAjCPUZfp0": {"duration": 153.763, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uMAjCPUZfp0", "annotations": [{"segment": [6.5940366614664585, 146.62739703588144], "label": "Doing fencing"}]}, "nD2n9ivzAYc": {"duration": 96.34, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nD2n9ivzAYc", "annotations": []}, "7fwrkFHTm-Q": {"duration": 123.42, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=7fwrkFHTm-Q", "annotations": [{"segment": [7.741776983784817, 113.08586469155743], "label": "Doing kickboxing"}]}, "aOzMA2rpWEw": {"duration": 536.869, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aOzMA2rpWEw", "annotations": [{"segment": [112.2173697347894, 231.9717269890796], "label": "Removing ice from car"}]}, "iqmpDgATXbU": {"duration": 185.76, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iqmpDgATXbU", "annotations": [{"segment": [101.2572309, 185.711666], "label": "Belly dance"}, {"segment": [7.387905589703588, 91.64977542], "label": "Belly dance"}]}, "KTdFb9_IAso": {"duration": 203.431, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KTdFb9_IAso", "annotations": []}, "9UvVeUYQLgA": {"duration": 207.98, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9UvVeUYQLgA", "annotations": []}, "UD7HJBRK2Z4": {"duration": 119.91, "subset": "training", "resolution": "426x236", "url": "https://www.youtube.com/watch?v=UD7HJBRK2Z4", "annotations": [{"segment": [38.89405304212169, 82.2758814352574], "label": "Tai chi"}, {"segment": [91.64070202808112, 118.57184711388456], "label": "Tai chi"}]}, "Ga-6wy0Nboo": {"duration": 194.584, "subset": "training", "resolution": "1280x716", "url": "https://www.youtube.com/watch?v=Ga-6wy0Nboo", "annotations": [{"segment": [5.767677346549644, 20.642213661335568], "label": "Skiing"}, {"segment": [47.962790566044404, 72.55130978028237], "label": "Skiing"}, {"segment": [85.9080362670289, 87.4258460950683], "label": "Skiing"}, {"segment": [92.2828375447943, 98.96120078816757], "label": "Skiing"}, {"segment": [105.03244010032509, 126.88890162409216], "label": "Skiing"}, {"segment": [143.28124776691746, 152.69166870076162], "label": "Skiing"}, {"segment": [156.33441228805614, 181.22649346790197], "label": "Skiing"}]}, "uWJUoJyvFF0": {"duration": 137.601, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=uWJUoJyvFF0", "annotations": [{"segment": [15.666073322932919, 137.561], "label": "Knitting"}]}, "snUWwTH8eQg": {"duration": 77.416, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=snUWwTH8eQg", "annotations": [{"segment": [0.2, 15.085998439937597], "label": "Futsal"}, {"segment": [24.49966146645866, 37.42294851794072], "label": "Futsal"}, {"segment": [41.889558502340094, 71.94863650546021], "label": "Futsal"}]}, "-Q9WotFs1MI": {"duration": 56.68, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=-Q9WotFs1MI", "annotations": [{"segment": [5.51656707, 44.88863442], "label": "Chopping wood"}, {"segment": [47.55594816, 47.64663456], "label": "Chopping wood"}]}, "EHVkkRI7mBQ": {"duration": 203.314, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EHVkkRI7mBQ", "annotations": [{"segment": [43.42985491419657, 50.403992199687984], "label": "Snowboarding"}, {"segment": [53.891060842433696, 174.03642589703588], "label": "Snowboarding"}]}, "AK34mhNU28s": {"duration": 104.397, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AK34mhNU28s", "annotations": [{"segment": [0.2, 31.904580343213723], "label": "Kite flying"}, {"segment": [36.95071294851794, 86.4519360374415], "label": "Kite flying"}]}, "1vTHJMMPZN0": {"duration": 153.6, "subset": "testing", "resolution": "423x240", "url": "https://www.youtube.com/watch?v=1vTHJMMPZN0", "annotations": []}, "aQulBdlcGNU": {"duration": 191.124, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aQulBdlcGNU", "annotations": [{"segment": [8.785006597547326, 179.1018572444935], "label": "Doing crunches"}]}, "wmrrBnxbHjk": {"duration": 60.651, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wmrrBnxbHjk", "annotations": [{"segment": [8.624771795076747, 39.95326400691475], "label": "Table soccer"}]}, "gDNgiybMA1s": {"duration": 94.32, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=gDNgiybMA1s", "annotations": [{"segment": [7.062925063941333, 91.96517010340277], "label": "Cumbia"}]}, "ltE76mknyt4": {"duration": 131.59, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=ltE76mknyt4", "annotations": []}, "N75m1Z4RqbA": {"duration": 124.88, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N75m1Z4RqbA", "annotations": []}, "IyEnvWbx8V0": {"duration": 198.95, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IyEnvWbx8V0", "annotations": []}, "hBjVRKwCUNA": {"duration": 222.61, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hBjVRKwCUNA", "annotations": [{"segment": [11.1116879875195, 222.561], "label": "Using the monkey bar"}]}, "v9APkG4il4Q": {"duration": 30.024, "subset": "validation", "resolution": "416x240", "url": "https://www.youtube.com/watch?v=v9APkG4il4Q", "annotations": [{"segment": [0.2, 22.91839313572543], "label": "Slacklining"}]}, "__wPHayoMgw": {"duration": 137.11, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=__wPHayoMgw", "annotations": [{"segment": [10.15184652, 21.39061344766716], "label": "Smoking hookah"}, {"segment": [39.35872874370757, 48.55669252620446], "label": "Smoking hookah"}, {"segment": [52.407002946784544, 62.24668513271144], "label": "Smoking hookah"}, {"segment": [93.47698076630549, 100.53588320403566], "label": "Smoking hookah"}]}, "yCcqJnlviQI": {"duration": 70.1, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=yCcqJnlviQI", "annotations": [{"segment": [0, 70.10104308390022], "label": "Volleyball"}]}, "s6oXeM9OluM": {"duration": 170.226, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s6oXeM9OluM", "annotations": [{"segment": [58.67463806552262, 151.33277691107645], "label": "Clipping cat claws"}]}, "H3cFZelOk2Y": {"duration": 202.339, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H3cFZelOk2Y", "annotations": [{"segment": [13.569552262090482, 104.13842433697349], "label": "Tug of war"}]}, "juP0cn5B60Y": {"duration": 71.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=juP0cn5B60Y", "annotations": [{"segment": [24.49307488299532, 28.269196567862718], "label": "Grooming dog"}, {"segment": [33.14701872074883, 39.5768751950078], "label": "Grooming dog"}, {"segment": [53.5451840873635, 58.86644461778472], "label": "Grooming dog"}]}, "u7dfBgc_SqU": {"duration": 177.471, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u7dfBgc_SqU", "annotations": [{"segment": [26.09529414890639, 171.22272218628956], "label": "Blow-drying hair"}]}, "4E40jmmc2N8": {"duration": 9.06, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4E40jmmc2N8", "annotations": [{"segment": [0.9763962558502342, 8.731772230889236], "label": "Discus throw"}]}, "RULNhPrXnfA": {"duration": 187.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RULNhPrXnfA", "annotations": [{"segment": [76.3455538221529, 83.65834633385334], "label": "Playing harmonica"}, {"segment": [111.73946957878314, 153.86115444617784], "label": "Playing harmonica"}, {"segment": [162.92901716068644, 177.8471138845554], "label": "Playing harmonica"}]}, "HlFnWOx53XU": {"duration": 77.39, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HlFnWOx53XU", "annotations": [{"segment": [7.478786271450858, 76.11474414976598], "label": "Playing bagpipes"}]}, "vPZf3F3bNAA": {"duration": 62.79, "subset": "training", "resolution": "1280x698", "url": "https://www.youtube.com/watch?v=vPZf3F3bNAA", "annotations": [{"segment": [7.438870514820593, 48.05901872074883], "label": "Javelin throw"}]}, "fGMZvLvAMK4": {"duration": 224.491, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fGMZvLvAMK4", "annotations": [{"segment": [16.460281837123823, 222.73913278876753], "label": "Skiing"}]}, "TSVDMiy_FC4": {"duration": 113.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TSVDMiy_FC4", "annotations": [{"segment": [15.749178756265897, 24.77398905480029], "label": "Polishing shoes"}, {"segment": [26.897473830926028, 40.34621074638904], "label": "Polishing shoes"}, {"segment": [43.70839497525479, 67.59759870666936], "label": "Polishing shoes"}, {"segment": [70.60586852106084, 108.65163727301093], "label": "Polishing shoes"}]}, "45hcc_VI4t4": {"duration": 12.35, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=45hcc_VI4t4", "annotations": []}, "zihMznAYlV0": {"duration": 72.795, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zihMznAYlV0", "annotations": [{"segment": [0.2, 27.59607285023401], "label": "Snowboarding"}, {"segment": [33.38784122620905, 72.594579], "label": "Snowboarding"}]}, "3mQ9FPD5OPY": {"duration": 78.18, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3mQ9FPD5OPY", "annotations": []}, "JN1mex2vCjI": {"duration": 46.464, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JN1mex2vCjI", "annotations": [{"segment": [0, 46.46312925170068], "label": "Slacklining"}]}, "75lRDDRD1Lw": {"duration": 64.691, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=75lRDDRD1Lw", "annotations": []}, "RrgUFU70PNQ": {"duration": 98.639, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RrgUFU70PNQ", "annotations": [{"segment": [11.694968108928439, 98.637823], "label": "Capoeira"}]}, "dexjr5WGeqQ": {"duration": 175.52, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=dexjr5WGeqQ", "annotations": [{"segment": [0.2, 175.3196598639456], "label": "Installing carpet"}, {"segment": [144.15378331055066, 146.88773436974068], "label": "Installing carpet"}]}, "JYh2Oejg_lQ": {"duration": 39.707, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=JYh2Oejg_lQ", "annotations": [{"segment": [17.22043980655226, 25.644827625585023], "label": "Playing rubik cube"}]}, "m6yPz9fHJnY": {"duration": 92.39, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m6yPz9fHJnY", "annotations": [{"segment": [3.2414391575663024, 91.12045631825272], "label": "Chopping wood"}, {"segment": [61.78033679999999, 63.53902351999999], "label": "Chopping wood"}]}, "XbSQ_7vh7yc": {"duration": 215.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XbSQ_7vh7yc", "annotations": [{"segment": [1.796237078039817, 131.06414985062548], "label": "Washing dishes"}, {"segment": [158.52520904065665, 181.63268567617067], "label": "Washing dishes"}]}, "qpWwrunXZ8g": {"duration": 90.094, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qpWwrunXZ8g", "annotations": []}, "V-6-T-Y_CR0": {"duration": 165.466, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V-6-T-Y_CR0", "annotations": []}, "Lme4KL45gwk": {"duration": 199.204, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Lme4KL45gwk", "annotations": [{"segment": [34.5213366321619, 55.964448440274225], "label": "Longboarding"}, {"segment": [112.2137707195254, 114.38915887397157], "label": "Longboarding"}]}, "dLLvpM-gB8Q": {"duration": 166.441, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dLLvpM-gB8Q", "annotations": [{"segment": [66.98315678183535, 162.37712562918736], "label": "Having an ice cream"}]}, "ZfXkzv-hNlg": {"duration": 109.25, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZfXkzv-hNlg", "annotations": [{"segment": [0.01, 109.201], "label": "Rafting"}]}, "FQkvwPpDomw": {"duration": 27.49, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FQkvwPpDomw", "annotations": [{"segment": [7.398507507800312, 23.096210393915758], "label": "Shoveling snow"}]}, "1cWWCiNIYnc": {"duration": 20.09, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1cWWCiNIYnc", "annotations": [{"segment": [0.01, 20.085259], "label": "Shaving"}]}, "_cLb2ZRS_4M": {"duration": 227.858, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_cLb2ZRS_4M", "annotations": [{"segment": [31.992460759654875, 198.35325670986023], "label": "Drum corps"}]}, "cHdZ_dBVgDE": {"duration": 97.87, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cHdZ_dBVgDE", "annotations": [{"segment": [4.806166113884555, 84.39894048], "label": "Cumbia"}]}, "SjdEr8kBCW8": {"duration": 39.985, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SjdEr8kBCW8", "annotations": []}, "OUfVZuWyqJQ": {"duration": 47.509, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=OUfVZuWyqJQ", "annotations": [{"segment": [8.745627251170047, 37.13185807488299], "label": "Powerbocking"}]}, "Vcv9P-4Tty0": {"duration": 45.535, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Vcv9P-4Tty0", "annotations": [{"segment": [0.19999999999999998, 45.281], "label": "Beach soccer"}]}, "rMQh0pUt7vU": {"duration": 65.318, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rMQh0pUt7vU", "annotations": []}, "LL_leiyIdVQ": {"duration": 181.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LL_leiyIdVQ", "annotations": [{"segment": [8.646345553822153, 172.21819422776912], "label": "Tumbling"}]}, "bCEdkW675dQ": {"duration": 24.1, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bCEdkW675dQ", "annotations": [{"segment": [14.777235917316695, 22.297457758190326], "label": "Playing violin"}]}, "AkeJ-lJLNkw": {"duration": 193.376, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AkeJ-lJLNkw", "annotations": [{"segment": [0.2, 31.977898479204473], "label": "Capoeira"}, {"segment": [45.25174313094973, 68.17929298396426], "label": "Capoeira"}, {"segment": [83.26320736094749, 116.44781899031062], "label": "Capoeira"}, {"segment": [120.36963672832627, 155.96767465800673], "label": "Capoeira"}]}, "cBAlXvu38dg": {"duration": 60.117, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=cBAlXvu38dg", "annotations": [{"segment": [6.681905750274658, 59.18259266616251], "label": "Kneeling"}]}, "qz47x5-R-BU": {"duration": 100.496, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qz47x5-R-BU", "annotations": [{"segment": [8.779676592823714, 90.93236471138846], "label": "Playing drums"}]}, "YdenUU3g0XY": {"duration": 68.03999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YdenUU3g0XY", "annotations": [{"segment": [35.84292121684867, 65.04826443057722], "label": "Playing badminton"}]}, "bt4cH91_KPI": {"duration": 119.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bt4cH91_KPI", "annotations": [{"segment": [50.915233310643906, 107.90155849755979], "label": "Beer pong"}]}, "cpeevuvJKjE": {"duration": 90.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cpeevuvJKjE", "annotations": [{"segment": [1.9696222, 13.0612274], "label": "Ballet"}, {"segment": [26.608052900000004, 80.0964567], "label": "Ballet"}]}, "2Tm_2wgv2Zs": {"duration": 70.171, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2Tm_2wgv2Zs", "annotations": [{"segment": [24.511051482059287, 45.739372854914194], "label": "Table soccer"}]}, "xabaKyhx7cg": {"duration": 121.53, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=xabaKyhx7cg", "annotations": [{"segment": [41.427439534316065, 46.73621897120783], "label": "Cleaning shoes"}]}, "qmKSDwVvxVk": {"duration": 21.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qmKSDwVvxVk", "annotations": [{"segment": [0.8800644999999999, 18.669412080343214], "label": "High jump"}]}, "LW5lcM_rKBY": {"duration": 10.078, "subset": "training", "resolution": "406x720", "url": "https://www.youtube.com/watch?v=LW5lcM_rKBY", "annotations": [{"segment": [0.01, 10.077460317460318], "label": "Peeling potatoes"}]}, "IFPYE5tCOFw": {"duration": 55.06, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IFPYE5tCOFw", "annotations": []}, "vH9gqDUaBQ0": {"duration": 213.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vH9gqDUaBQ0", "annotations": [{"segment": [70.00862488455537, 202.69163776099845], "label": "Sailing"}]}, "tg6HOGy0BBQ": {"duration": 200.2, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tg6HOGy0BBQ", "annotations": [{"segment": [16.6067559, 184.38615834633384], "label": "Shaving legs"}]}, "JBmG2eQNeNA": {"duration": 203.8, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JBmG2eQNeNA", "annotations": []}, "bG55LSFBA9M": {"duration": 45.233, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=bG55LSFBA9M", "annotations": [{"segment": [8.602998439937597, 37.796780031201244], "label": "River tubing"}]}, "WnoJmKZC_qg": {"duration": 144.68, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=WnoJmKZC_qg", "annotations": [{"segment": [23.06391703, 78.19103369], "label": "Playing flauta"}, {"segment": [85.91855272, 121.22399840093604], "label": "Playing flauta"}]}, "CHBpVOfPmRA": {"duration": 107.37, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=CHBpVOfPmRA", "annotations": [{"segment": [0.01, 107.36907029478458], "label": "Playing beach volleyball"}]}, "m9IDk3YHBfQ": {"duration": 99.266, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=m9IDk3YHBfQ", "annotations": []}, "WdJUMmy2ox8": {"duration": 204.8, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=WdJUMmy2ox8", "annotations": []}, "f0rWFvJ7uTk": {"duration": 41.22, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=f0rWFvJ7uTk", "annotations": [{"segment": [0, 41.21535], "label": "Playing kickball"}]}, "WgsoxWcUKS4": {"duration": 165.698, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WgsoxWcUKS4", "annotations": []}, "TgantuXBiJg": {"duration": 129.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TgantuXBiJg", "annotations": [{"segment": [0.01, 14.291189510920438], "label": "Putting on makeup"}, {"segment": [68.4152689352574, 99.4301908525741], "label": "Putting on makeup"}]}, "Yyv8oxrFGV0": {"duration": 70.87, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Yyv8oxrFGV0", "annotations": [{"segment": [12.164524715423584, 65.0044920051432], "label": "Drinking beer"}]}, "5wBo0Gd81-I": {"duration": 164.003, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5wBo0Gd81-I", "annotations": [{"segment": [6.393954758190328, 163.941], "label": "Riding bumper cars"}]}, "U4ua-VD7hNM": {"duration": 127.641, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=U4ua-VD7hNM", "annotations": [{"segment": [0.5964490219763917, 110.54188393146417], "label": "Slacklining"}]}, "qqZLs6j3YII": {"duration": 159.45, "subset": "testing", "resolution": "624x480", "url": "https://www.youtube.com/watch?v=qqZLs6j3YII", "annotations": []}, "6VW7ZPurE7c": {"duration": 90.21000000000001, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=6VW7ZPurE7c", "annotations": [{"segment": [0, 90.209501], "label": "Walking the dog"}]}, "BahcBS2r0es": {"duration": 200.435, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BahcBS2r0es", "annotations": [{"segment": [19.06901872074883, 200.401], "label": "Scuba diving"}]}, "Qu3_80O0j5w": {"duration": 112.06, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Qu3_80O0j5w", "annotations": [{"segment": [8.622647151073563, 97.64187612303618], "label": "Swimming"}]}, "Bs3TR80BRco": {"duration": 144.638, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Bs3TR80BRco", "annotations": [{"segment": [0.01, 144.6368253968254], "label": "Slacklining"}]}, "mWOa-0w0Bls": {"duration": 125.481, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=mWOa-0w0Bls", "annotations": [{"segment": [7.500620858795545, 123.52584515786096], "label": "Painting fence"}]}, "Hc--KcdMkEA": {"duration": 82.39, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Hc--KcdMkEA", "annotations": [{"segment": [5.78912368, 66.04395744], "label": "Starting a campfire"}]}, "hKezMv52Nw8": {"duration": 201.94, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hKezMv52Nw8", "annotations": [{"segment": [0, 201.94383219954648], "label": "Long jump"}]}, "06eyqLosXjU": {"duration": 227.788, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=06eyqLosXjU", "annotations": [{"segment": [7.947709127064464, 216.57505950089623], "label": "Playing drums"}]}, "A7PBp9PDW80": {"duration": 32.021, "subset": "training", "resolution": "1920x820", "url": "https://www.youtube.com/watch?v=A7PBp9PDW80", "annotations": [{"segment": [0.01, 27.358109204368173], "label": "Doing fencing"}]}, "eTVzSwuCfd8": {"duration": 226.859, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eTVzSwuCfd8", "annotations": [{"segment": [12.03000624024961, 146.12919344773792], "label": "Blow-drying hair"}]}, "K2Pws9z20Do": {"duration": 228.46, "subset": "validation", "resolution": "1444x1080", "url": "https://www.youtube.com/watch?v=K2Pws9z20Do", "annotations": [{"segment": [12.830859856474259, 219.5502686552262], "label": "Zumba"}]}, "cnFw7hjmqJY": {"duration": 120.51, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cnFw7hjmqJY", "annotations": [{"segment": [19.171703588143522, 112.02289547581903], "label": "Layup drill in basketball"}]}, "MFfJ4V9Dqvg": {"duration": 83.407, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MFfJ4V9Dqvg", "annotations": [{"segment": [0.01, 83.361], "label": "Building sandcastles"}]}, "Fp8e9dbWeSg": {"duration": 161.495, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fp8e9dbWeSg", "annotations": [{"segment": [74.34285189986441, 155.2158233383102], "label": "Riding bumper cars"}]}, "rbLCMaf3phk": {"duration": 215.11, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=rbLCMaf3phk", "annotations": [{"segment": [19.962822351014044, 164.58204699], "label": "Shot put"}, {"segment": [55.86235162090484, 74.31538068486739], "label": "Shot put"}, {"segment": [102.4981887098284, 214.861666], "label": "Shot put"}, {"segment": [0.2, 214.861666], "label": "Shot put"}, {"segment": [140.7462853151326, 214.861666], "label": "Shot put"}, {"segment": [189.05967050078004, 215.061666], "label": "Shot put"}]}, "kXho4FVOvjM": {"duration": 229.088, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kXho4FVOvjM", "annotations": []}, "lDJpGI4BZ8k": {"duration": 204.85, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lDJpGI4BZ8k", "annotations": [{"segment": [52.7295828159657, 204.84643990929706], "label": "Playing harmonica"}]}, "I3tqWc5wG78": {"duration": 60.12, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I3tqWc5wG78", "annotations": []}, "uiG7A8lEdEg": {"duration": 45.79, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=uiG7A8lEdEg", "annotations": [{"segment": [11.858188134328095, 30.529815749999997], "label": "Playing lacrosse"}]}, "ze4292jVUcQ": {"duration": 33.37, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ze4292jVUcQ", "annotations": [{"segment": [5.1515311299999995, 30.37392682605304], "label": "Playing water polo"}]}, "qWgF9hxNbP8": {"duration": 17.857, "subset": "testing", "resolution": "400x220", "url": "https://www.youtube.com/watch?v=qWgF9hxNbP8", "annotations": []}, "XBBT8UvESiE": {"duration": 142.53, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XBBT8UvESiE", "annotations": [{"segment": [100.81000780031201, 117.25971138845556], "label": "Shaving"}]}, "0DsdxtAvHDk": {"duration": 61.58, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=0DsdxtAvHDk", "annotations": []}, "ovq0Fqbxt1c": {"duration": 42.84, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ovq0Fqbxt1c", "annotations": [{"segment": [31.650037441497663, 41.26523868954759], "label": "Polishing forniture"}]}, "hfUKfKNKwfo": {"duration": 212.579, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=hfUKfKNKwfo", "annotations": []}, "nwVRSQr7ZhQ": {"duration": 202.85, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nwVRSQr7ZhQ", "annotations": []}, "nrKWb9lZm1U": {"duration": 161.727, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nrKWb9lZm1U", "annotations": []}, "J9KjpzKM8oc": {"duration": 221.518, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=J9KjpzKM8oc", "annotations": []}, "etG65un_AiQ": {"duration": 190.473, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=etG65un_AiQ", "annotations": [{"segment": [20.501934477379095, 164.0171981279251], "label": "Wakeboarding"}]}, "8liz3Fu7DGU": {"duration": 224.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8liz3Fu7DGU", "annotations": [{"segment": [5.598835145085804, 215.20522588923558], "label": "Playing lacrosse"}]}, "oUWcw4FlGPU": {"duration": 141.062, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oUWcw4FlGPU", "annotations": []}, "tbOIyG1RCUI": {"duration": 13.793, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tbOIyG1RCUI", "annotations": [{"segment": [0, 13.792653], "label": "Hitting a pinata"}]}, "Rm84DwISnjc": {"duration": 144.429, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=Rm84DwISnjc", "annotations": []}, "fGU3iPxGRCM": {"duration": 75.326, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=fGU3iPxGRCM", "annotations": []}, "y3r4IrZkgBo": {"duration": 34.16, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=y3r4IrZkgBo", "annotations": [{"segment": [0.3996476443057722, 7.273587126365054], "label": "Shaving legs"}]}, "7LhgGs4TrYA": {"duration": 126.55, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=7LhgGs4TrYA", "annotations": [{"segment": [29.4241079166653, 35.73183382320736], "label": "Playing saxophone"}, {"segment": [38.294347472740064, 42.43379259890829], "label": "Playing saxophone"}]}, "FBL7iWMmTHU": {"duration": 215.41, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FBL7iWMmTHU", "annotations": [{"segment": [4.3687203338533545, 213.73124094851792], "label": "Playing badminton"}]}, "p4MMyVArVc0": {"duration": 129.8, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p4MMyVArVc0", "annotations": [{"segment": [0.01, 125.39520965420493], "label": "Slacklining"}]}, "WPK5VeqNSh8": {"duration": 145.288, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WPK5VeqNSh8", "annotations": [{"segment": [12.239488024961, 23.572347307332297], "label": "Curling"}, {"segment": [60.290811382215296, 70.49038473634945], "label": "Curling"}, {"segment": [73.21027096411858, 80.23664371918876], "label": "Curling"}, {"segment": [98.36921857098284, 123.98148054914198], "label": "Curling"}]}, "y1F_Hzpux48": {"duration": 107.253, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y1F_Hzpux48", "annotations": [{"segment": [6.358210420933845, 14.05499145680113], "label": "Ice fishing"}, {"segment": [38.48390517933643, 39.655154467403186], "label": "Ice fishing"}, {"segment": [54.04478857793768, 83.66066302652106], "label": "Ice fishing"}, {"segment": [96.37708380655226, 107.05297], "label": "Ice fishing"}]}, "hUynCsek8I0": {"duration": 203.291, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hUynCsek8I0", "annotations": [{"segment": [49.31059844842773, 192.96140936249697], "label": "Ice fishing"}]}, "kbgY_qeAD1c": {"duration": 34.947, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=kbgY_qeAD1c", "annotations": []}, "ALqZpLyQHLQ": {"duration": 32.79, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=ALqZpLyQHLQ", "annotations": []}, "zdoGEZO5Ldc": {"duration": 121.743, "subset": "training", "resolution": "1164x720", "url": "https://www.youtube.com/watch?v=zdoGEZO5Ldc", "annotations": [{"segment": [9.496273184260705, 105.59855780897902], "label": "Playing rubik cube"}]}, "XCejO7RyWdA": {"duration": 54.103, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=XCejO7RyWdA", "annotations": [{"segment": [5.7350202808112325, 6.9157597503900154], "label": "Disc dog"}, {"segment": [10.879670826833074, 12.317981279251171], "label": "Disc dog"}, {"segment": [15.017812792511702, 19.15192979719189], "label": "Disc dog"}, {"segment": [21.261173166926678, 25.0578112324493], "label": "Disc dog"}, {"segment": [26.998315132605306, 30.373104524180967], "label": "Disc dog"}, {"segment": [33.832263650546025, 47.753269890795636], "label": "Disc dog"}]}, "rMj2JWNJzkw": {"duration": 110.03999999999999, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=rMj2JWNJzkw", "annotations": []}, "zvXi08rTq3Y": {"duration": 17.741, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zvXi08rTq3Y", "annotations": [{"segment": [0.01, 16.950837322326226], "label": "Riding bumper cars"}]}, "iGxMm7C1q48": {"duration": 88.074, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iGxMm7C1q48", "annotations": [{"segment": [0.2, 66.80912534267914], "label": "Playing ten pins"}]}, "yacDrjVTUak": {"duration": 234.151, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yacDrjVTUak", "annotations": [{"segment": [25.932279251170048, 202.7100702028081], "label": "Welding"}]}, "0BLpSquuZFM": {"duration": 173.36, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0BLpSquuZFM", "annotations": []}, "JHITVq5zJOM": {"duration": 237.541, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JHITVq5zJOM", "annotations": [{"segment": [2.9600016953822026, 237.34013605442178], "label": "Playing beach volleyball"}]}, "V4_SJyhRGqA": {"duration": 129.893, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V4_SJyhRGqA", "annotations": [{"segment": [0.01, 129.861], "label": "Croquet"}]}, "eRHbpYeYtxo": {"duration": 15.12, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=eRHbpYeYtxo", "annotations": [{"segment": [0, 9.126091516380656], "label": "Clean and jerk"}]}, "Ey2SmPzJTKM": {"duration": 137.811, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ey2SmPzJTKM", "annotations": [{"segment": [26.608043613707164, 85.6302476635514], "label": "Changing car wheel"}]}, "bL0bXUG2QWI": {"duration": 107.161, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bL0bXUG2QWI", "annotations": [{"segment": [24.32406741167606, 77.31877084633385], "label": "Kneeling"}, {"segment": [80.99663670280812, 90.19130134399376], "label": "Kneeling"}, {"segment": [104.73558904914196, 106.959637], "label": "Kneeling"}]}, "tOPaS3wfoJQ": {"duration": 206.472, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tOPaS3wfoJQ", "annotations": []}, "r1mrueEHDDE": {"duration": 224.68, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r1mrueEHDDE", "annotations": [{"segment": [21.73380510186394, 218.7402308892356], "label": "Playing flauta"}]}, "VD3kzQGuQnc": {"duration": 63.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VD3kzQGuQnc", "annotations": [{"segment": [22.81533073322933, 47.72289703588143], "label": "Mixing drinks"}]}, "pX-ik8n_eNQ": {"duration": 211.79, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pX-ik8n_eNQ", "annotations": [{"segment": [0.01, 206.57873293681746], "label": "Ping-pong"}]}, "foFFu7bY5ow": {"duration": 138.043, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=foFFu7bY5ow", "annotations": [{"segment": [12.059841022212318, 13.782675453956935], "label": "Surfing"}, {"segment": [31.011019771403102, 58.145661716068645], "label": "Surfing"}]}, "-rwKyNnz89s": {"duration": 177.912, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-rwKyNnz89s", "annotations": [{"segment": [0.01, 177.9107709750567], "label": "Futsal"}]}, "GK_DCUGw5lM": {"duration": 41.68, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GK_DCUGw5lM", "annotations": [{"segment": [5.002117004680187, 12.083035881435256], "label": "Baton twirling"}, {"segment": [16.630414976599063, 30.792252730109205], "label": "Baton twirling"}]}, "Xepkoln9NF4": {"duration": 149.82, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Xepkoln9NF4", "annotations": []}, "9N1525JmqZQ": {"duration": 144.219, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9N1525JmqZQ", "annotations": [{"segment": [48.2605384868456, 125.65739246957877], "label": "Laying tile"}]}, "BBKSKU6dGGk": {"duration": 36.69, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BBKSKU6dGGk", "annotations": []}, "zvAlL20-K4w": {"duration": 88.78999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zvAlL20-K4w", "annotations": [{"segment": [10.31995027964384, 22.78700430874378], "label": "Shot put"}, {"segment": [46.058838496397, 51.322705029641185], "label": "Shot put"}]}, "RQO61KXeBos": {"duration": 238.77, "subset": "training", "resolution": "528x360", "url": "https://www.youtube.com/watch?v=RQO61KXeBos", "annotations": [{"segment": [137.24499600000001, 216.19172356708268], "label": "Pole vault"}, {"segment": [5.772561094383775, 124.6689801], "label": "Pole vault"}]}, "xg0OCQW8t2s": {"duration": 67.129, "subset": "training", "resolution": "624x480", "url": "https://www.youtube.com/watch?v=xg0OCQW8t2s", "annotations": [{"segment": [29.61610452418097, 67.081], "label": "Cutting the grass"}]}, "Y2gFwWnli4g": {"duration": 116.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y2gFwWnli4g", "annotations": [{"segment": [40.86364262189535, 116.93569160997733], "label": "Preparing salad"}]}, "oCicjtc1t9Y": {"duration": 88.051, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oCicjtc1t9Y", "annotations": [{"segment": [0.20000000000000004, 87.85009070294784], "label": "Raking leaves"}]}, "tcUet_leWUE": {"duration": 34.74, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tcUet_leWUE", "annotations": []}, "eolJOpzxhx4": {"duration": 104.63, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eolJOpzxhx4", "annotations": []}, "EFtxSXp1pck": {"duration": 220.335, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EFtxSXp1pck", "annotations": [{"segment": [62.888335413416534, 208.95945085803433], "label": "Making an omelette"}]}, "xpmXZBpYrM0": {"duration": 47.07, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xpmXZBpYrM0", "annotations": [{"segment": [18.19221216848674, 33.52355226209048], "label": "Shaving legs"}]}, "3dWw9GLcOeA": {"duration": 56.936, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3dWw9GLcOeA", "annotations": []}, "nGKc2t-NMYc": {"duration": 30.77, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nGKc2t-NMYc", "annotations": []}, "hvXMCjJcIqI": {"duration": 33.69, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hvXMCjJcIqI", "annotations": [{"segment": [0, 33.692153], "label": "Mowing the lawn"}]}, "lUk_dSjmIgM": {"duration": 103.376, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lUk_dSjmIgM", "annotations": [{"segment": [0.01, 103.37523809523809], "label": "Cleaning sink"}]}, "gnKpguqHfag": {"duration": 217.502, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gnKpguqHfag", "annotations": [{"segment": [6.786312486513066, 11.19741560274656], "label": "Playing ice hockey"}, {"segment": [19.34099058656224, 21.037568708190506], "label": "Playing ice hockey"}, {"segment": [67.86312486513066, 72.27422798136415], "label": "Playing ice hockey"}, {"segment": [81.77506546248244, 86.86479982736725], "label": "Playing ice hockey"}, {"segment": [90.25795607062378, 149.2416848673947], "label": "Playing ice hockey"}]}, "-8SgY4vW4xM": {"duration": 222.123, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-8SgY4vW4xM", "annotations": []}, "M0sa3xWhFGo": {"duration": 230.02, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=M0sa3xWhFGo", "annotations": [{"segment": [8.610421216848675, 212.74915756630264], "label": "Cheerleading"}]}, "8qOiZzSzSTY": {"duration": 39.27, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8qOiZzSzSTY", "annotations": []}, "mnqSG5o84NE": {"duration": 139.82999999999998, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=mnqSG5o84NE", "annotations": [{"segment": [1.7451552574102966, 133.2862327847114], "label": "Doing kickboxing"}]}, "eMB9L-LDK1E": {"duration": 129.684, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eMB9L-LDK1E", "annotations": [{"segment": [3.2319862108914177, 128.2694510514019], "label": "Cleaning sink"}]}, "ui7LIgAF8-E": {"duration": 20.36, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=ui7LIgAF8-E", "annotations": [{"segment": [0, 20.316666], "label": "Springboard diving"}]}, "cFzo-Zgxk1M": {"duration": 99.986, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=cFzo-Zgxk1M", "annotations": [{"segment": [82.67100653666147, 86.25861625429017], "label": "Playing ten pins"}, {"segment": [89.22229384711387, 92.65392053354134], "label": "Playing ten pins"}]}, "poxJkbrLcHQ": {"duration": 177.77, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=poxJkbrLcHQ", "annotations": []}, "lt--z8nFIT0": {"duration": 193.376, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lt--z8nFIT0", "annotations": [{"segment": [0.19999999999999998, 11.762120124804992], "label": "Playing ice hockey"}, {"segment": [30.159282371294854, 46.14370202808112], "label": "Playing ice hockey"}, {"segment": [57.00104368174727, 72.38227769110765], "label": "Playing ice hockey"}, {"segment": [83.23961934477379, 115.20845865834633], "label": "Playing ice hockey"}, {"segment": [127.87535725429017, 136.31995631825274], "label": "Playing ice hockey"}, {"segment": [144.77953198127926, 159.86073322932918], "label": "Playing ice hockey"}, {"segment": [169.51270202808112, 190.92800780031203], "label": "Playing ice hockey"}]}, "VYuQAfG0gKw": {"duration": 25.73, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=VYuQAfG0gKw", "annotations": [{"segment": [0.01, 20.16786739469579], "label": "Brushing hair"}]}, "N2Eo_C-Ng4k": {"duration": 212.765, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N2Eo_C-Ng4k", "annotations": []}, "V_Nj_BhesEY": {"duration": 83.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V_Nj_BhesEY", "annotations": [{"segment": [20.106633272133607, 76.01604579013093], "label": "Doing karate"}]}, "ir759AX1EYY": {"duration": 108.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ir759AX1EYY", "annotations": [{"segment": [13.346574665626276, 100.0533921188973], "label": "Polishing shoes"}]}, "1P_BTFhNZFo": {"duration": 140.969, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1P_BTFhNZFo", "annotations": [{"segment": [0.2, 106.86010296411857], "label": "Baton twirling"}, {"segment": [110.59800780031202, 126.64901092043682], "label": "Baton twirling"}]}, "jwv7eQpeGW4": {"duration": 180.95, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=jwv7eQpeGW4", "annotations": [{"segment": [22.84440093603744, 166.13383619344773], "label": "Triple jump"}]}, "zPfyhY0J2yc": {"duration": 87.52, "subset": "testing", "resolution": "640x358", "url": "https://www.youtube.com/watch?v=zPfyhY0J2yc", "annotations": []}, "rVqeQ9D7EWo": {"duration": 232.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rVqeQ9D7EWo", "annotations": [{"segment": [39.90580343213728, 162.88823400936036], "label": "Shoveling snow"}]}, "3kxP8qOtym8": {"duration": 26.26, "subset": "testing", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=3kxP8qOtym8", "annotations": []}, "M0mLgp8VxpY": {"duration": 53.755, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M0mLgp8VxpY", "annotations": [{"segment": [24.12196140043312, 26.96699878361069], "label": "Shuffleboard"}, {"segment": [31.652942708844336, 38.93289130697517], "label": "Shuffleboard"}]}, "Ka0Rtjfi9fI": {"duration": 100.775, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ka0Rtjfi9fI", "annotations": [{"segment": [6.4323380062305295, 50.04672741433022], "label": "Trimming branches or hedges"}]}, "W3TQnn0q9kc": {"duration": 98.105, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W3TQnn0q9kc", "annotations": [{"segment": [18.201059282371297, 39.163207488299534], "label": "Waterskiing"}, {"segment": [53.23748517940719, 72.66610764430578], "label": "Waterskiing"}, {"segment": [75.11380811232449, 85.51653510140406], "label": "Waterskiing"}, {"segment": [87.81125429017162, 97.75503744149766], "label": "Waterskiing"}]}, "RgiVa9GMYxE": {"duration": 23.66, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RgiVa9GMYxE", "annotations": []}, "a2k-AgrAppg": {"duration": 63.368, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a2k-AgrAppg", "annotations": [{"segment": [19.414227856221544, 61.29771639672227], "label": "Having an ice cream"}]}, "p0_buoe9M7I": {"duration": 24.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p0_buoe9M7I", "annotations": [{"segment": [0.6178463650546022, 23.47816187207488], "label": "Getting a tattoo"}]}, "3WyJNIqDFQg": {"duration": 66.781, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3WyJNIqDFQg", "annotations": [{"segment": [25.397697347893917, 39.137435257410296], "label": "Kneeling"}]}, "VbWomrZXIus": {"duration": 129.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VbWomrZXIus", "annotations": [{"segment": [15.575409454473418, 115.50076361693924], "label": "Skateboarding"}]}, "Z7zc7zs5Zj8": {"duration": 236.82, "subset": "training", "resolution": "352x262", "url": "https://www.youtube.com/watch?v=Z7zc7zs5Zj8", "annotations": [{"segment": [5.577111, 232.7563168174727], "label": "Belly dance"}]}, "e142-8fV9OU": {"duration": 117.61, "subset": "testing", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=e142-8fV9OU", "annotations": []}, "xoETvVbyiBA": {"duration": 58.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xoETvVbyiBA", "annotations": [{"segment": [12.76171974, 31.737724829999998], "label": "Using parallel bars"}, {"segment": [38.16433404, 52.47664572542902], "label": "Using parallel bars"}]}, "RTIzzeNaH2Q": {"duration": 156.782, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=RTIzzeNaH2Q", "annotations": [{"segment": [63.57669266770671, 153.33720280811232], "label": "Assembling bicycle"}]}, "drVqGhjI558": {"duration": 128.731, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=drVqGhjI558", "annotations": [{"segment": [0.2, 9.037307778025406], "label": "Peeling potatoes"}, {"segment": [27.11192333407622, 35.14508580343214], "label": "Peeling potatoes"}, {"segment": [49.203120124805, 53.018872297749056], "label": "Peeling potatoes"}, {"segment": [62.257009137508355, 117.68582965366615], "label": "Peeling potatoes"}]}, "_Wag6CT_0j8": {"duration": 232.5, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_Wag6CT_0j8", "annotations": [{"segment": [14.871384655226208, 103.0115424898596], "label": "Playing field hockey"}]}, "gsJ953MHtpY": {"duration": 214.855, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gsJ953MHtpY", "annotations": [{"segment": [0.01, 206.44264586583463], "label": "Powerbocking"}]}, "E4n0KcS_zgI": {"duration": 63.554, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=E4n0KcS_zgI", "annotations": [{"segment": [4.759040190179036, 50.46565386271451], "label": "Playing congas"}]}, "zNpILhjhFbQ": {"duration": 24.265, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zNpILhjhFbQ", "annotations": [{"segment": [4.696977301028797, 18.227844058358272], "label": "Doing crunches"}]}, "x0tjkH_zfXA": {"duration": 45.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x0tjkH_zfXA", "annotations": [{"segment": [0.19999999999999998, 41.13246432917317], "label": "Raking leaves"}]}, "O_tZAD_opA4": {"duration": 74.24, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O_tZAD_opA4", "annotations": [{"segment": [3.0110593920355457, 4.632399064670071], "label": "Tumbling"}, {"segment": [20.266745907931558, 53.38839922032256], "label": "Tumbling"}, {"segment": [63.46386717628705, 67.1697864274571], "label": "Tumbling"}]}, "_aEA1UuTai8": {"duration": 109.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_aEA1UuTai8", "annotations": [{"segment": [10.7228751950078, 108.59038689547582], "label": "Using the balance beam"}]}, "Il0IelOoXa4": {"duration": 107.322, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Il0IelOoXa4", "annotations": [{"segment": [0, 100.29014481279252], "label": "Canoeing"}]}, "7VbXyfF3kEw": {"duration": 137.648, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7VbXyfF3kEw", "annotations": [{"segment": [28.130848270665517, 114.24130735101403], "label": "Playing rubik cube"}]}, "E36gtCVNsSc": {"duration": 116.379, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E36gtCVNsSc", "annotations": []}, "8rHY_Vt2FLA": {"duration": 129.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8rHY_Vt2FLA", "annotations": [{"segment": [30.75316142583116, 101.84738161841655], "label": "Changing car wheel"}]}, "gtzg66XV6E0": {"duration": 156.2, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=gtzg66XV6E0", "annotations": [{"segment": [0, 156.2006349206349], "label": "Clipping cat claws"}]}, "SwQbnQX7LCQ": {"duration": 158.268, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SwQbnQX7LCQ", "annotations": [{"segment": [23.69312948517941, 139.70840249609986], "label": "Hand car wash"}]}, "2Q0WGQbJbso": {"duration": 128.27, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2Q0WGQbJbso", "annotations": [{"segment": [9.098107100624027, 74.68446158424338], "label": "Playing violin"}]}, "Fvm9BuMz0yE": {"duration": 127.71, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fvm9BuMz0yE", "annotations": []}, "6Eh7B3GnQWI": {"duration": 69.1, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=6Eh7B3GnQWI", "annotations": [{"segment": [3.6653488361934476, 57.89095073634945], "label": "Playing accordion"}]}, "2iBb6ArHweI": {"duration": 101.402, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2iBb6ArHweI", "annotations": [{"segment": [61.129105304212175, 76.78701326053041], "label": "Applying sunscreen"}]}, "J-CUMDsQqwE": {"duration": 223.4, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=J-CUMDsQqwE", "annotations": []}, "nSsll1A6Gxw": {"duration": 226.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nSsll1A6Gxw", "annotations": [{"segment": [39.61403957110666, 209.03479699843993], "label": "Playing piano"}]}, "B_NQ_U2JEp4": {"duration": 142.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B_NQ_U2JEp4", "annotations": [{"segment": [60.7541956848674, 132.6701525001927], "label": "Wrapping presents"}]}, "baSx0q9LKg0": {"duration": 236.217, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=baSx0q9LKg0", "annotations": [{"segment": [12.158054602184087, 205.96751794071764], "label": "Making a cake"}]}, "s5QkiA-w5YE": {"duration": 158.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s5QkiA-w5YE", "annotations": [{"segment": [9.863194201237437, 150.9068702527301], "label": "Kayaking"}]}, "xYu5luMTycc": {"duration": 116.47, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xYu5luMTycc", "annotations": [{"segment": [4.038961468170736, 12.561994178451112], "label": "Sailing"}, {"segment": [34.68561100088102, 65.51360165508663], "label": "Sailing"}, {"segment": [68.77774184200251, 83.28503156162868], "label": "Sailing"}, {"segment": [91.4453820289184, 99.24305025321748], "label": "Sailing"}, {"segment": [105.22730726256326, 109.21681193546046], "label": "Sailing"}]}, "EfjzkyLrnDg": {"duration": 13.89, "subset": "validation", "resolution": "384x216", "url": "https://www.youtube.com/watch?v=EfjzkyLrnDg", "annotations": [{"segment": [5.112301585023401, 9.661383503900156], "label": "Drinking beer"}]}, "firp_OhUMPc": {"duration": 194.189, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=firp_OhUMPc", "annotations": [{"segment": [15.503291628579115, 183.94186243007337], "label": "Disc dog"}]}, "Ol7JKNItQC4": {"duration": 223.66, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ol7JKNItQC4", "annotations": [{"segment": [23.82138754, 183.28892608], "label": "Zumba"}]}, "2syOa4jpZDU": {"duration": 114.289, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2syOa4jpZDU", "annotations": [{"segment": [0.01, 114.241], "label": "Clipping cat claws"}]}, "18qVzfQ9VAs": {"duration": 70.87, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=18qVzfQ9VAs", "annotations": [{"segment": [7.669612324492979, 64.83305382215288], "label": "Cumbia"}]}, "wN2XnDS0aGc": {"duration": 23.27, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wN2XnDS0aGc", "annotations": [{"segment": [1.2438003599999998, 21.16116555226209], "label": "High jump"}]}, "I6IfZiNmlWA": {"duration": 41.169, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=I6IfZiNmlWA", "annotations": [{"segment": [3.725111981279251, 38.664093322932914], "label": "Hurling"}]}, "w1FFMG52FZE": {"duration": 224.816, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w1FFMG52FZE", "annotations": [{"segment": [23.557537276383027, 213.35488187839206], "label": "Hula hoop"}]}, "PwpGXy4U1Bo": {"duration": 188.78, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PwpGXy4U1Bo", "annotations": []}, "gWbIoFrpwTY": {"duration": 118.77000000000001, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gWbIoFrpwTY", "annotations": []}, "L1lXij7Fyvo": {"duration": 107.137, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L1lXij7Fyvo", "annotations": [{"segment": [12.57852891498934, 93.82181084645352], "label": "Slacklining"}]}, "2OokWbQ3VQE": {"duration": 31.09, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=2OokWbQ3VQE", "annotations": []}, "grvzIljsqcI": {"duration": 214.04, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=grvzIljsqcI", "annotations": [{"segment": [27.04737088923557, 212.70586736349455], "label": "Smoking a cigarette"}]}, "QQNW2ha8WIs": {"duration": 47.067, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QQNW2ha8WIs", "annotations": [{"segment": [5.598025656976432, 12.485648709935933], "label": "Doing a powerbomb"}, {"segment": [15.41655213672721, 23.256718803393877], "label": "Doing a powerbomb"}, {"segment": [26.041077058845588, 36.07942129560572], "label": "Doing a powerbomb"}, {"segment": [39.23014247940634, 42.38086366320696], "label": "Doing a powerbomb"}]}, "N6ERAg1EKcc": {"duration": 24.312, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N6ERAg1EKcc", "annotations": [{"segment": [5.196017277425791, 8.723240140405617], "label": "Hitting a pinata"}]}, "hRk-3fep5WQ": {"duration": 48.69, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=hRk-3fep5WQ", "annotations": [{"segment": [0, 48.4917006802721], "label": "Brushing hair"}]}, "eLVaojWbwHE": {"duration": 209.0, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eLVaojWbwHE", "annotations": [{"segment": [134.92675485491418, 189.02782081123246], "label": "Painting"}, {"segment": [34.872373839313575, 134.21272432], "label": "Painting"}]}, "R-lGXNY19wE": {"duration": 77.184, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R-lGXNY19wE", "annotations": [{"segment": [12.288816990730547, 63.4922927766485], "label": "Applying sunscreen"}]}, "iN1DEIADG9o": {"duration": 137.532, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iN1DEIADG9o", "annotations": [{"segment": [0.2, 125.94498464118566], "label": "Disc dog"}]}, "g0jF_Gh8c8g": {"duration": 183.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g0jF_Gh8c8g", "annotations": [{"segment": [88.8703744149766, 156.83027613104525], "label": "Ice fishing"}]}, "aKfUE-KeYWw": {"duration": 42.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aKfUE-KeYWw", "annotations": [{"segment": [0.32801354134165367, 41.32970620904836], "label": "Mowing the lawn"}]}, "AbzyaqyFI0Y": {"duration": 34.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AbzyaqyFI0Y", "annotations": [{"segment": [6.302315611544461, 27.4177662074883], "label": "Kayaking"}]}, "hYj38k-VOFM": {"duration": 41.239, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hYj38k-VOFM", "annotations": [{"segment": [0.2, 9.006864274570983], "label": "Raking leaves"}, {"segment": [22.967503900156004, 24.38286744149766], "label": "Raking leaves"}]}, "sax2PjbeLdk": {"duration": 23.128, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sax2PjbeLdk", "annotations": [{"segment": [1.9461060842433697, 5.2256552262090485], "label": "Putting on shoes"}]}, "hHxEBnhLsXs": {"duration": 122.555, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hHxEBnhLsXs", "annotations": [{"segment": [0.01, 108.37660998439938], "label": "Wakeboarding"}]}, "ScKbopywnvM": {"duration": 51.595, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ScKbopywnvM", "annotations": [{"segment": [6.519772023556942, 50.62884694836193], "label": "Slacklining"}]}, "ynUBEoobKW0": {"duration": 40.8, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ynUBEoobKW0", "annotations": [{"segment": [11.010858669266772, 35.00562004680188], "label": "Using the balance beam"}]}, "hltWAq_Odxk": {"duration": 9.335, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hltWAq_Odxk", "annotations": [{"segment": [0.010000000000000002, 9.301], "label": "Doing a powerbomb"}]}, "giSSBgCi16c": {"duration": 153.86, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=giSSBgCi16c", "annotations": []}, "SN_YS2w3JhE": {"duration": 114.428, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SN_YS2w3JhE", "annotations": [{"segment": [31.130132192846034, 83.6212597200622], "label": "Playing beach volleyball"}]}, "xKDHIyd_tWA": {"duration": 111.015, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xKDHIyd_tWA", "annotations": [{"segment": [17.145773859594385, 84.17016258346334], "label": "Rope skipping"}]}, "87F9xGaMwCw": {"duration": 126.805, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=87F9xGaMwCw", "annotations": []}, "zdMvd5Cr5jM": {"duration": 106.53, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zdMvd5Cr5jM", "annotations": [{"segment": [41.050751095163804, 99.3860289672387], "label": "Cheerleading"}]}, "Zo7oziWT-7o": {"duration": 183.833, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zo7oziWT-7o", "annotations": [{"segment": [0.20000000000000004, 174.31957566302654], "label": "Slacklining"}]}, "h3uTJ7HknzA": {"duration": 83.453, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h3uTJ7HknzA", "annotations": [{"segment": [0.01, 4.426487737060502], "label": "Capoeira"}, {"segment": [13.800226474365095, 25.38720908019994], "label": "Capoeira"}, {"segment": [33.84961210693325, 40.879915706708275], "label": "Capoeira"}, {"segment": [45.95735746645866, 61.18968274570984], "label": "Capoeira"}]}, "YcAs1-6SYb4": {"duration": 233.9, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=YcAs1-6SYb4", "annotations": [{"segment": [0.01, 102.51064118564743], "label": "Hammer throw"}, {"segment": [129.14151950078002, 182.03846957878318], "label": "Hammer throw"}]}, "Kw2z2wqn6RM": {"duration": 86.94, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Kw2z2wqn6RM", "annotations": []}, "ZWLF3bFfkac": {"duration": 167.23, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ZWLF3bFfkac", "annotations": []}, "s_gb6Xi_J5g": {"duration": 12.586, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=s_gb6Xi_J5g", "annotations": [{"segment": [0.2, 12.385215419501135], "label": "Using the monkey bar"}]}, "0PyY_iHBZHk": {"duration": 105.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0PyY_iHBZHk", "annotations": [{"segment": [65.31524180967239, 99.85538221528861], "label": "Playing water polo"}]}, "eTIaRqgQ4Qg": {"duration": 87.284, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eTIaRqgQ4Qg", "annotations": [{"segment": [12.110248049921998, 74.44746801872076], "label": "Wakeboarding"}]}, "P3kWD8Oocio": {"duration": 20.527, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=P3kWD8Oocio", "annotations": [{"segment": [4.628823442674115, 6.288814887700485], "label": "Fun sliding down"}]}, "J_SD_hhGET8": {"duration": 234.92000000000002, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=J_SD_hhGET8", "annotations": [{"segment": [33.3432422823713, 194.56331485647428], "label": "Belly dance"}]}, "EQajiMQAW74": {"duration": 37.83, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=EQajiMQAW74", "annotations": [{"segment": [0.01, 37.825305], "label": "Getting a haircut"}]}, "yR7HKCyK0lg": {"duration": 209.979, "subset": "testing", "resolution": "1252x720", "url": "https://www.youtube.com/watch?v=yR7HKCyK0lg", "annotations": []}, "eLzYgo71GcY": {"duration": 171.74, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eLzYgo71GcY", "annotations": []}, "Tm1ebIrDyz0": {"duration": 52.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Tm1ebIrDyz0", "annotations": [{"segment": [6.81302567204729, 12.641035584280514], "label": "High jump"}, {"segment": [13.297712757489892, 21.424092775955938], "label": "High jump"}, {"segment": [22.32702388911883, 45.22864030479587], "label": "High jump"}]}, "Aqq8S79ZAhk": {"duration": 59.792, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Aqq8S79ZAhk", "annotations": [{"segment": [0.2, 43.373926146645864], "label": "Laying tile"}]}, "IqXaLlFSWwc": {"duration": 13.35, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IqXaLlFSWwc", "annotations": [{"segment": [4.51992109828393, 12.97654766926677], "label": "High jump"}]}, "ekn9AFX2XX8": {"duration": 67.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ekn9AFX2XX8", "annotations": [{"segment": [7.353488179320255, 24.379319816391906], "label": "Applying sunscreen"}]}, "4fw4nswO748": {"duration": 64.088, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4fw4nswO748", "annotations": [{"segment": [0.2, 10.693489859594386], "label": "Snowboarding"}, {"segment": [14.790995319812795, 40.475358814352575], "label": "Snowboarding"}, {"segment": [43.373594383775355, 56.66550234009361], "label": "Snowboarding"}]}, "E0xXymnjDkc": {"duration": 56.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E0xXymnjDkc", "annotations": [{"segment": [11.09932449297972, 56.001], "label": "Volleyball"}]}, "88wc2an-eC4": {"duration": 123.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=88wc2an-eC4", "annotations": [{"segment": [5.105983619344774, 118.78637363494539], "label": "Doing kickboxing"}]}, "h3GLEcwSvD4": {"duration": 64.3, "subset": "training", "resolution": "482x360", "url": "https://www.youtube.com/watch?v=h3GLEcwSvD4", "annotations": [{"segment": [12.939459912636506, 55.56946350078003], "label": "Tai chi"}]}, "ZhUC4qTGdHY": {"duration": 164.282, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=ZhUC4qTGdHY", "annotations": [{"segment": [37.93075586969057, 76.11780063039255], "label": "Waterskiing"}, {"segment": [92.77657854613504, 117.38031126989081], "label": "Waterskiing"}]}, "HdOylJP08Lk": {"duration": 183.508, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HdOylJP08Lk", "annotations": [{"segment": [68.42159918777703, 93.90077210707474], "label": "Playing blackjack"}, {"segment": [123.67418725741031, 126.82329850702028], "label": "Playing blackjack"}, {"segment": [133.40780384711388, 137.4157636193448], "label": "Playing blackjack"}, {"segment": [150.584774299532, 177.20907850078004], "label": "Playing blackjack"}]}, "wYIeRRTRtX8": {"duration": 15.37, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=wYIeRRTRtX8", "annotations": [{"segment": [9.808094353706123, 13.045484910552887], "label": "Springboard diving"}]}, "vycd0CJTwoA": {"duration": 111.805, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vycd0CJTwoA", "annotations": [{"segment": [3.488426884014136, 90.00141360756471], "label": "Longboarding"}, {"segment": [93.83868306864274, 103.25743564430577], "label": "Longboarding"}]}, "dJVTlZWqYOs": {"duration": 71.889, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=dJVTlZWqYOs", "annotations": [{"segment": [0.01, 71.888978], "label": "Playing ten pins"}]}, "d3RF0qC6RJs": {"duration": 172.478, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d3RF0qC6RJs", "annotations": [{"segment": [0.01, 172.4776643], "label": "Disc dog"}]}, "AwgGYaV1lT0": {"duration": 3.11, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AwgGYaV1lT0", "annotations": []}, "vopKTwCiHrA": {"duration": 162.006, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vopKTwCiHrA", "annotations": [{"segment": [0.01, 161.961], "label": "Making a cake"}]}, "FU0EPNGKsv8": {"duration": 58.05, "subset": "validation", "resolution": "576x360", "url": "https://www.youtube.com/watch?v=FU0EPNGKsv8", "annotations": [{"segment": [17.20667418096724, 51.438899656786276], "label": "Layup drill in basketball"}]}, "8B4M8AsGZmQ": {"duration": 35.063, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8B4M8AsGZmQ", "annotations": [{"segment": [15.976844454546738, 31.57198941212938], "label": "Wakeboarding"}]}, "nAkA1HfRwF8": {"duration": 42.377, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nAkA1HfRwF8", "annotations": [{"segment": [7.710768450061092, 34.072373426732945], "label": "Capoeira"}]}, "oP77DgsbhKQ": {"duration": 214.599, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oP77DgsbhKQ", "annotations": [{"segment": [0.01, 214.575599], "label": "Laying tile"}]}, "vBKIXqRd-eA": {"duration": 233.748, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vBKIXqRd-eA", "annotations": [{"segment": [29.544367537966824, 110.1530740304442], "label": "Calf roping"}]}, "X9Y-b-SbkIU": {"duration": 71.12, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=X9Y-b-SbkIU", "annotations": []}, "9sbiZgFefA4": {"duration": 131.286, "subset": "training", "resolution": "324x240", "url": "https://www.youtube.com/watch?v=9sbiZgFefA4", "annotations": [{"segment": [10.646695787831515, 115.49329797191888], "label": "Rollerblading"}]}, "80spFFIdnC4": {"duration": 119.37, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=80spFFIdnC4", "annotations": [{"segment": [2.467925193070845, 34.63546900810465], "label": "Sailing"}, {"segment": [81.86411715272074, 110.49880933488956], "label": "Sailing"}]}, "fk_hkHmnmJo": {"duration": 60.326, "subset": "validation", "resolution": "1920x818", "url": "https://www.youtube.com/watch?v=fk_hkHmnmJo", "annotations": [{"segment": [4.041127925117005, 60.241], "label": "Applying sunscreen"}]}, "PYNTOqgOXWc": {"duration": 63.925, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PYNTOqgOXWc", "annotations": [{"segment": [15.342580343213728, 36.27563806552262], "label": "Mooping floor"}, {"segment": [50.32746489859595, 56.605940717628705], "label": "Mooping floor"}]}, "sRN_crwj3B4": {"duration": 225.443, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=sRN_crwj3B4", "annotations": [{"segment": [8.791770670826834, 225.401], "label": "Hanging wallpaper"}]}, "V6s9eMtUnME": {"duration": 103.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V6s9eMtUnME", "annotations": [{"segment": [31.6868551099902, 86.45984732182819], "label": "Cleaning windows"}]}, "rLCwdyMr8bQ": {"duration": 63.39, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=rLCwdyMr8bQ", "annotations": []}, "NbHdPDi4L_g": {"duration": 164.584, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NbHdPDi4L_g", "annotations": []}, "RkCSfz8GeNg": {"duration": 154.018, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RkCSfz8GeNg", "annotations": [{"segment": [52.14024475195007, 117.25548128549141], "label": "Blow-drying hair"}]}, "AyYpsQ4xOo8": {"duration": 195.4, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AyYpsQ4xOo8", "annotations": [{"segment": [58.10452418096724, 157.29218903588142], "label": "Getting a tattoo"}]}, "UJwWjTvDEpQ": {"duration": 131.05, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UJwWjTvDEpQ", "annotations": [{"segment": [2.097003369988427, 130.08843050550246], "label": "Playing guitarra"}]}, "3jEnu5mEYA4": {"duration": 176.84, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3jEnu5mEYA4", "annotations": [{"segment": [118.76908996359855, 152.1513428918109], "label": "Preparing pasta"}]}, "tNY0Isg-K18": {"duration": 50.109, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tNY0Isg-K18", "annotations": []}, "NnEUVHhtLZs": {"duration": 138.21, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NnEUVHhtLZs", "annotations": [{"segment": [57.99873751684761, 109.31360565443025], "label": "Playing harmonica"}]}, "KnF56TCKPYw": {"duration": 226.72, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KnF56TCKPYw", "annotations": [{"segment": [0.01, 190.6425654359508], "label": "Walking the dog"}]}, "3-gD02qDjPI": {"duration": 103.35, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=3-gD02qDjPI", "annotations": []}, "b1D6v3kZrHM": {"duration": 161.96, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b1D6v3kZrHM", "annotations": [{"segment": [28.852995850000003, 82.11659078608042], "label": "Archery"}, {"segment": [93.9919131459136, 129.36521379222518], "label": "Archery"}]}, "GFVqcC1q9wM": {"duration": 216.155, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GFVqcC1q9wM", "annotations": []}, "OwchMqCYaF4": {"duration": 169.34, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OwchMqCYaF4", "annotations": [{"segment": [0, 16.115278529508526], "label": "Cheerleading"}, {"segment": [36.72170025576533, 145.8300605803432], "label": "Cheerleading"}]}, "opaWopwV74I": {"duration": 203.5, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=opaWopwV74I", "annotations": [{"segment": [0.01, 48.57324715845777], "label": "Hand washing clothes"}, {"segment": [80.95541193076295, 180.6416838768789], "label": "Hand washing clothes"}]}, "nbOsU2Vmp1U": {"duration": 206.1, "subset": "testing", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=nbOsU2Vmp1U", "annotations": []}, "iebP0qmPz1k": {"duration": 188.48, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iebP0qmPz1k", "annotations": []}, "JV2pXP8ODcI": {"duration": 192.91, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JV2pXP8ODcI", "annotations": []}, "tB_B3HVdO2I": {"duration": 9.474, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tB_B3HVdO2I", "annotations": [{"segment": [0.19999999999999998, 9.309000000000001], "label": "Using the monkey bar"}]}, "tl2RLYJUu3k": {"duration": 45.79, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tl2RLYJUu3k", "annotations": [{"segment": [0.01, 43.3694344773791], "label": "Doing karate"}]}, "DmTI8y6Anvo": {"duration": 37.45, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=DmTI8y6Anvo", "annotations": [{"segment": [6.534964118564743, 16.220714508580343], "label": "Layup drill in basketball"}]}, "rkgeHZjFskY": {"duration": 173.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rkgeHZjFskY", "annotations": [{"segment": [12.445823242453507, 159.36064953042123], "label": "Braiding hair"}]}, "BrgYIg6UXhU": {"duration": 267.029, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BrgYIg6UXhU", "annotations": [{"segment": [0.01, 266.981], "label": "Removing ice from car"}]}, "LkA7hS3FBkQ": {"duration": 55.566, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LkA7hS3FBkQ", "annotations": [{"segment": [0.2, 55.340999999999994], "label": "Changing car wheel"}]}, "Bnw54GNBKlU": {"duration": 154.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Bnw54GNBKlU", "annotations": [{"segment": [62.019665279251164, 99.51992800624024], "label": "Hopscotch"}, {"segment": [107.69306219032761, 148.07796051170047], "label": "Hopscotch"}]}, "59mz2Nd8GM0": {"duration": 222.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=59mz2Nd8GM0", "annotations": [{"segment": [3.84967297, 190.3167508853354], "label": "Making a sandwich"}]}, "tXNycNfALsE": {"duration": 133.38, "subset": "training", "resolution": "628x352", "url": "https://www.youtube.com/watch?v=tXNycNfALsE", "annotations": [{"segment": [47.85694157017981, 128.58951989391576], "label": "Using the balance beam"}]}, "yjazHd6a5SQ": {"duration": 155.34, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yjazHd6a5SQ", "annotations": [{"segment": [50.64957492721477, 92.81716362259934], "label": "Mixing drinks"}]}, "aGvpDyMNATI": {"duration": 4.3, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aGvpDyMNATI", "annotations": []}, "LvNPoXWFHWQ": {"duration": 108.74000000000001, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=LvNPoXWFHWQ", "annotations": []}, "FWbCX1wBVoE": {"duration": 168.67, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=FWbCX1wBVoE", "annotations": [{"segment": [81.01264898595943, 108.36756942277692], "label": "Mooping floor"}]}, "FWPJWq-uhUw": {"duration": 117.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FWPJWq-uhUw", "annotations": [{"segment": [54.038231522620904, 55.87626660842434], "label": "Long jump"}, {"segment": [75.35943851794072, 78.6679016723869], "label": "Long jump"}, {"segment": [80.68974026677067, 83.44679289547582], "label": "Long jump"}]}, "W_hux-Z6Ll0": {"duration": 155.667, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W_hux-Z6Ll0", "annotations": [{"segment": [0.01, 0.02], "label": "Skiing"}]}, "vD9oh7NZ2PA": {"duration": 119.421, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vD9oh7NZ2PA", "annotations": [{"segment": [13.600119427906368, 17.326179545140988], "label": "Canoeing"}, {"segment": [100.78992553198128, 106.00640966302653], "label": "Canoeing"}]}, "HtuDZLsOK6M": {"duration": 125.88, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HtuDZLsOK6M", "annotations": [{"segment": [16.10262181903276, 19.244596808112323], "label": "Sailing"}, {"segment": [38.68556705304212, 45.36226390483619], "label": "Sailing"}]}, "bqDxblFvgro": {"duration": 185.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bqDxblFvgro", "annotations": [{"segment": [4.920744149765991, 168.19179563182527], "label": "Plataform diving"}]}, "PGNwd5bySwI": {"duration": 94.459, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PGNwd5bySwI", "annotations": [{"segment": [22.071996879875194, 78.59297035881436], "label": "Wakeboarding"}]}, "P2hrv6QzDPI": {"duration": 173.9, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=P2hrv6QzDPI", "annotations": [{"segment": [0.01, 173.8942403628118], "label": "Playing saxophone"}]}, "0EdDWY0Zuqw": {"duration": 179.93, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=0EdDWY0Zuqw", "annotations": [{"segment": [0.01, 179.088683852116], "label": "Playing saxophone"}]}, "WUTHTWQb-5g": {"duration": 15.09, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=WUTHTWQb-5g", "annotations": [{"segment": [0.2, 14.861], "label": "Playing field hockey"}]}, "siKl-1EFFaQ": {"duration": 10.449, "subset": "testing", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=siKl-1EFFaQ", "annotations": []}, "eyWerZITH1Y": {"duration": 39.43, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=eyWerZITH1Y", "annotations": [{"segment": [1.9067893010920436, 36.536543382215285], "label": "Using the pommel horse"}]}, "-HpCLXdtcas": {"duration": 23.34, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-HpCLXdtcas", "annotations": []}, "m2DOej6tPNs": {"duration": 206.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m2DOej6tPNs", "annotations": [{"segment": [0.01, 206.35573696145124], "label": "Doing step aerobics"}]}, "8tI9IsSpgeI": {"duration": 21.69, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=8tI9IsSpgeI", "annotations": [{"segment": [6.824682030000001, 17.683146060000002], "label": "Clean and jerk"}]}, "633ZdPm_GjM": {"duration": 70.891, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=633ZdPm_GjM", "annotations": [{"segment": [0.01, 70.24289798773412], "label": "Making an omelette"}]}, "_z3MXy3Wc-k": {"duration": 88.38, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_z3MXy3Wc-k", "annotations": []}, "SqfB4ohs2Ro": {"duration": 237.169, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=SqfB4ohs2Ro", "annotations": [{"segment": [32.18981202028081, 48.469716950077995], "label": "Fixing the roof"}, {"segment": [56.609669414976594, 73.25957218408736], "label": "Fixing the roof"}, {"segment": [74.73956354134165, 97.30943173946957], "label": "Fixing the roof"}, {"segment": [96.93943390015599, 154.28909899375975], "label": "Fixing the roof"}, {"segment": [159.83906658346334, 213.48875328393135], "label": "Fixing the roof"}]}, "lXueRLwe3tk": {"duration": 151.487, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lXueRLwe3tk", "annotations": [{"segment": [29.524375975039003, 62.827872074883], "label": "Hand car wash"}, {"segment": [74.4014274570983, 79.83391263650546], "label": "Hand car wash"}]}, "2DCHbuwZNSs": {"duration": 137.997, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2DCHbuwZNSs", "annotations": []}, "srR0M_HL3Ao": {"duration": 154.67000000000002, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=srR0M_HL3Ao", "annotations": [{"segment": [50.166939157566304, 54.26712168486739], "label": "Playing polo"}, {"segment": [84.17433541341654, 86.58620748829954], "label": "Playing polo"}, {"segment": [107.56949453978159, 121.31716536661466], "label": "Playing polo"}]}, "MOvLBw1EzmI": {"duration": 37.942, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MOvLBw1EzmI", "annotations": [{"segment": [0.19999999999999998, 37.711], "label": "Removing ice from car"}]}, "_icW_MAe7Po": {"duration": 62.323, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_icW_MAe7Po", "annotations": [{"segment": [7.389236823712949, 54.544234975039], "label": "Futsal"}]}, "3idPvL1koOA": {"duration": 31.046, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3idPvL1koOA", "annotations": []}, "9PRK4oy1hhY": {"duration": 38.406, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9PRK4oy1hhY", "annotations": [{"segment": [18.843131639551295, 38.405239095238095], "label": "Table soccer"}]}, "UsexiHSzIQI": {"duration": 91.35, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UsexiHSzIQI", "annotations": [{"segment": [14.813104524180968, 32.47488299531981], "label": "Spinning"}, {"segment": [41.30577223088923, 46.860686427457104], "label": "Spinning"}, {"segment": [66.51653666146646, 83.18127925117004], "label": "Spinning"}]}, "-l5e1zVzQOQ": {"duration": 138.786, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-l5e1zVzQOQ", "annotations": [{"segment": [0.2, 70.1280561622465], "label": "Playing drums"}, {"segment": [78.78584087363495, 138.761], "label": "Playing drums"}]}, "nhWNiPWwH0I": {"duration": 234.17000000000002, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nhWNiPWwH0I", "annotations": []}, "K6Tm5xHkJ5c": {"duration": 114.64, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K6Tm5xHkJ5c", "annotations": [{"segment": [25.81087088455538, 50.817943021840875], "label": "Playing accordion"}, {"segment": [52.78278440405616, 110.6562942074883], "label": "Playing accordion"}]}, "eM-l2d8e-CA": {"duration": 186.596, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eM-l2d8e-CA", "annotations": [{"segment": [31.429684867394695, 61.11327613104524], "label": "Curling"}, {"segment": [65.1874945397816, 90.50585179407176], "label": "Curling"}, {"segment": [92.25194539781592, 126.0097550702028], "label": "Curling"}, {"segment": [137.06834789391576, 142.03083931357256], "label": "Curling"}, {"segment": [148.1428221528861, 159.49364742589705], "label": "Curling"}]}, "MWQ9mb_mB8U": {"duration": 85.01, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=MWQ9mb_mB8U", "annotations": []}, "SlgBcsCTy58": {"duration": 135.141, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SlgBcsCTy58", "annotations": [{"segment": [0.01, 110.89474515444617], "label": "Grooming dog"}]}, "z_ExqQ80T5g": {"duration": 114.55, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z_ExqQ80T5g", "annotations": [{"segment": [14.3065456, 103.36999419656786], "label": "Sailing"}]}, "bXJMw4gLmGM": {"duration": 159.452, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=bXJMw4gLmGM", "annotations": [{"segment": [18.966835892828826, 147.5639667339503], "label": "Waxing skis"}]}, "Feq-2NIDl94": {"duration": 175.87, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Feq-2NIDl94", "annotations": []}, "DK271OQnNRA": {"duration": 31.208, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DK271OQnNRA", "annotations": [{"segment": [0.01, 31.161], "label": "River tubing"}]}, "FkMk9TZdea0": {"duration": 210.3, "subset": "training", "resolution": "2560x1080", "url": "https://www.youtube.com/watch?v=FkMk9TZdea0", "annotations": [{"segment": [5.738325514820593, 205.1041491154446], "label": "Painting"}]}, "1wqSUOGmpBs": {"duration": 129.73, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1wqSUOGmpBs", "annotations": [{"segment": [3.833383928902043, 112.98403362438421], "label": "Sumo"}]}, "ipmWSYmt0_Y": {"duration": 148.0, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ipmWSYmt0_Y", "annotations": []}, "sYphlW-m1pU": {"duration": 195.698, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sYphlW-m1pU", "annotations": [{"segment": [0.01, 195.697777], "label": "Playing rubik cube"}]}, "3pjVV7A6Apw": {"duration": 201.29, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3pjVV7A6Apw", "annotations": [{"segment": [123.4140609921997, 175.54315545709827], "label": "Cleaning windows"}]}, "nwttSIR1qvs": {"duration": 198.972, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nwttSIR1qvs", "annotations": [{"segment": [79.42530126423596, 84.6940246754509], "label": "Cleaning sink"}]}, "t3wyR2VQy20": {"duration": 38.778, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=t3wyR2VQy20", "annotations": [{"segment": [0.01, 38.777323], "label": "Powerbocking"}]}, "l_KhWbeZeRA": {"duration": 221.983, "subset": "validation", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=l_KhWbeZeRA", "annotations": [{"segment": [32.552854920436815, 208.4767942776911], "label": "Futsal"}]}, "k1QAWLupB-I": {"duration": 42.91, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=k1QAWLupB-I", "annotations": []}, "7lxP-hWFA24": {"duration": 171.387, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7lxP-hWFA24", "annotations": []}, "9fTAW_HAN2Y": {"duration": 155.016, "subset": "testing", "resolution": "778x480", "url": "https://www.youtube.com/watch?v=9fTAW_HAN2Y", "annotations": []}, "f-uRpjoKGLQ": {"duration": 212.835, "subset": "training", "resolution": "1280x716", "url": "https://www.youtube.com/watch?v=f-uRpjoKGLQ", "annotations": [{"segment": [31.369429797191888, 191.7020709828393], "label": "Skiing"}]}, "lqOrPn05NpU": {"duration": 204.253, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lqOrPn05NpU", "annotations": []}, "b1U1VmaZz7Y": {"duration": 58.28, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=b1U1VmaZz7Y", "annotations": [{"segment": [0.01, 58.282107], "label": "Starting a campfire"}]}, "vl7OBBTv7KY": {"duration": 235.149, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vl7OBBTv7KY", "annotations": [{"segment": [26.076656228111368, 194.82595596880847], "label": "Making a cake"}]}, "TiQ6P1NkPHU": {"duration": 135.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TiQ6P1NkPHU", "annotations": [{"segment": [59.19385822152886, 130.86070799687985], "label": "Cumbia"}]}, "e8KIgIC9RNg": {"duration": 97.39, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=e8KIgIC9RNg", "annotations": []}, "m22vOf2fw1M": {"duration": 139.157, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=m22vOf2fw1M", "annotations": [{"segment": [23.751933697347894, 79.71539391575664], "label": "Trimming branches or hedges"}]}, "mM6F8DppWcQ": {"duration": 166.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mM6F8DppWcQ", "annotations": [{"segment": [18.652805898733263, 140.15513535177547], "label": "Rollerblading"}]}, "VUvEWwghANE": {"duration": 109.552, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=VUvEWwghANE", "annotations": [{"segment": [13.629995672273084, 16.696745283470595], "label": "Roof shingle removal"}, {"segment": [27.08961896586216, 50.60136598504308], "label": "Roof shingle removal"}, {"segment": [0.2, 109.35111111111111], "label": "Roof shingle removal"}, {"segment": [69.51298788239926, 72.57973746249256], "label": "Roof shingle removal"}, {"segment": [81.0984862960851, 82.29111113278806], "label": "Roof shingle removal"}, {"segment": [88.76536024631838, 93.1951096397865], "label": "Roof shingle removal"}]}, "LBh2kEwx2cQ": {"duration": 146.217, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LBh2kEwx2cQ", "annotations": [{"segment": [0.01, 145.32265996959305], "label": "Laying tile"}]}, "OsB_uEj1PRM": {"duration": 124.993, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OsB_uEj1PRM", "annotations": [{"segment": [0.2, 111.88246243925133], "label": "Doing crunches"}]}, "s5y4xXcphcc": {"duration": 131.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s5y4xXcphcc", "annotations": [{"segment": [3.0846411856474263, 129.7605725429017], "label": "Windsurfing"}]}, "Gi9aPJOgn6M": {"duration": 30.117, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Gi9aPJOgn6M", "annotations": [{"segment": [12.130949297971918, 12.5063751950078], "label": "Hand car wash"}, {"segment": [13.210298751950079, 13.398011700468018], "label": "Hand car wash"}, {"segment": [16.917629485179404, 17.76233775351014], "label": "Hand car wash"}]}, "xmStXpxlG_I": {"duration": 9.126, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xmStXpxlG_I", "annotations": [{"segment": [0.2, 8.626335313572543], "label": "Hitting a pinata"}]}, "YfcxIgsqs5M": {"duration": 195.28, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YfcxIgsqs5M", "annotations": [{"segment": [0.6092963446429014, 164.510011825273], "label": "Cheerleading"}, {"segment": [170.29832705616224, 185.83538372854915], "label": "Cheerleading"}]}, "fU2mbWwVSXU": {"duration": 162.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fU2mbWwVSXU", "annotations": [{"segment": [0.01, 155.2524282848865], "label": "Sharpening knives"}]}, "yHaTlDD-qHA": {"duration": 66.5, "subset": "validation", "resolution": "480x352", "url": "https://www.youtube.com/watch?v=yHaTlDD-qHA", "annotations": [{"segment": [31.869090483619345, 51.86479491], "label": "Cricket"}, {"segment": [2.1799703588143524, 9.550346333853353], "label": "Cricket"}]}, "MxvKscpouzY": {"duration": 160.73, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MxvKscpouzY", "annotations": []}, "Vncj0EkAGio": {"duration": 61.58, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Vncj0EkAGio", "annotations": [{"segment": [16.91331492, 55.20448517940717], "label": "Mixing drinks"}]}, "SSoHwNbASQQ": {"duration": 120.535, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SSoHwNbASQQ", "annotations": [{"segment": [10.154254513037666, 12.974880766659238], "label": "Slacklining"}, {"segment": [18.428091523660946, 29.89863828838868], "label": "Slacklining"}, {"segment": [36.480099546839014, 45.31806180818661], "label": "Slacklining"}, {"segment": [48.32672981204962, 54.90819107049996], "label": "Slacklining"}, {"segment": [59.985318327018796, 113.57721535101403], "label": "Slacklining"}]}, "hj1II_7r8XQ": {"duration": 30.651, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hj1II_7r8XQ", "annotations": []}, "LlA0gm0EBkg": {"duration": 46.891, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=LlA0gm0EBkg", "annotations": []}, "tghS4UnuWzk": {"duration": 219.29, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tghS4UnuWzk", "annotations": [{"segment": [87.92096294851794, 154.63142121684868], "label": "Cleaning shoes"}, {"segment": [182.34191926677067, 198.42085023400938], "label": "Cleaning shoes"}]}, "Tzm6TEManmQ": {"duration": 204.29, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Tzm6TEManmQ", "annotations": [{"segment": [161.26414269087772, 165.08858795631826], "label": "Swimming"}, {"segment": [168.9130340093604, 174.33099925117006], "label": "Swimming"}, {"segment": [176.56192611544463, 181.9798913572543], "label": "Swimming"}, {"segment": [188.99137578783152, 196.00286021840876], "label": "Swimming"}]}, "kF9Pfa9EkDI": {"duration": 53.964, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kF9Pfa9EkDI", "annotations": []}, "AauepSs1kUU": {"duration": 170.78, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AauepSs1kUU", "annotations": [{"segment": [29.307498912201385, 161.19124296411857], "label": "Playing field hockey"}]}, "WygAYInf0g8": {"duration": 185.3, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WygAYInf0g8", "annotations": []}, "HWkGANOfqQE": {"duration": 225.4, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HWkGANOfqQE", "annotations": [{"segment": [6.503139625585023, 130.5900741029641], "label": "Playing guitarra"}]}, "eRQAjBwhDXk": {"duration": 81.061, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eRQAjBwhDXk", "annotations": [{"segment": [0, 81.060792], "label": "Snow tubing"}]}, "51faGoLPJgM": {"duration": 8.96, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=51faGoLPJgM", "annotations": []}, "X6IoTFqaegc": {"duration": 71.77, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=X6IoTFqaegc", "annotations": [{"segment": [1.7915199001560065, 8.28577953822153], "label": "Cleaning windows"}, {"segment": [19.93065888923557, 40.19722775975039], "label": "Cleaning windows"}]}, "I8doQJXPqwo": {"duration": 63.066, "subset": "training", "resolution": "698x480", "url": "https://www.youtube.com/watch?v=I8doQJXPqwo", "annotations": [{"segment": [0.01, 63.001], "label": "Hand car wash"}]}, "JBTiD2-FcR0": {"duration": 50.109, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=JBTiD2-FcR0", "annotations": [{"segment": [32.44164552976677, 47.607136581903276], "label": "Plastering"}]}, "d7Et_9J1NHM": {"duration": 145.26, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=d7Et_9J1NHM", "annotations": []}, "8rimo9x4qqw": {"duration": 145.706, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8rimo9x4qqw", "annotations": [{"segment": [0.01, 145.26915297814392], "label": "Snowboarding"}]}, "Jx4GCjGARqs": {"duration": 59.12, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Jx4GCjGARqs", "annotations": [{"segment": [2.534204368174727, 37.496182680000004], "label": "Hammer throw"}]}, "ECZb_tQZ4Pg": {"duration": 104.583, "subset": "testing", "resolution": "160x200", "url": "https://www.youtube.com/watch?v=ECZb_tQZ4Pg", "annotations": []}, "t2DdSm_MGXo": {"duration": 88.631, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=t2DdSm_MGXo", "annotations": [{"segment": [4.472944612522601, 85.09638691781855], "label": "Cleaning sink"}]}, "qtKa7Lr3URA": {"duration": 20.27, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qtKa7Lr3URA", "annotations": []}, "mI7Hwk_AF-M": {"duration": 120.02, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=mI7Hwk_AF-M", "annotations": [{"segment": [0, 120.02394557823129], "label": "Playing field hockey"}]}, "G4O_Ar7EO4Y": {"duration": 77.81, "subset": "training", "resolution": "624x360", "url": "https://www.youtube.com/watch?v=G4O_Ar7EO4Y", "annotations": [{"segment": [18.31811388455538, 23.534530421216846], "label": "Hammer throw"}, {"segment": [63.462500780031206, 70.1363775351014], "label": "Hammer throw"}]}, "beRmfL0Uh28": {"duration": 160.659, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=beRmfL0Uh28", "annotations": [{"segment": [22.306769255804245, 101.5083312402496], "label": "Rafting"}]}, "7QxUtHqQdbY": {"duration": 13.89, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7QxUtHqQdbY", "annotations": [{"segment": [0.6720163806552262, 12.34176944], "label": "Chopping wood"}]}, "6LWkrN1qz8E": {"duration": 232.64, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6LWkrN1qz8E", "annotations": [{"segment": [0, 232.581], "label": "Brushing hair"}]}, "-2xeOGDeDMU": {"duration": 148.097, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-2xeOGDeDMU", "annotations": []}, "kbe4iowYMqM": {"duration": 139.99, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kbe4iowYMqM", "annotations": [{"segment": [57.438668666146654, 67.04817977379096], "label": "Preparing pasta"}]}, "EX3szlLBdFM": {"duration": 185.667, "subset": "training", "resolution": "792x480", "url": "https://www.youtube.com/watch?v=EX3szlLBdFM", "annotations": [{"segment": [0.2, 0.5793034539781591], "label": "Futsal"}]}, "rx_WRuQ-X14": {"duration": 63.321, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rx_WRuQ-X14", "annotations": [{"segment": [5.627171606864274, 58.857092043681746], "label": "Hanging wallpaper"}]}, "HeHQxObROwQ": {"duration": 92.7, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HeHQxObROwQ", "annotations": [{"segment": [24.221932407908564, 34.77835668120603], "label": "Shaving legs"}, {"segment": [55.89120522780096, 74.97953405074979], "label": "Shaving legs"}]}, "fhXVqPPDjN8": {"duration": 156.41, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=fhXVqPPDjN8", "annotations": [{"segment": [1.9520700719185229, 153.7255166770671], "label": "Cumbia"}]}, "uhnY3lZ9ZCI": {"duration": 158.07999999999998, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=uhnY3lZ9ZCI", "annotations": [{"segment": [3.0805382215288613, 58.776669266770675], "label": "Kayaking"}, {"segment": [62.3949906, 140.34932137285492], "label": "Kayaking"}]}, "aw89asgFHbs": {"duration": 15.12, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=aw89asgFHbs", "annotations": [{"segment": [0.31770280811232454, 14.626095943837754], "label": "Tumbling"}]}, "AnRYS02tvRA": {"duration": 133.306, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AnRYS02tvRA", "annotations": [{"segment": [51.97386895475819, 55.30019656786271], "label": "Rock-paper-scissors"}, {"segment": [69.22919344773791, 75.25816224648986], "label": "Rock-paper-scissors"}, {"segment": [89.81084555382215, 91.69566458658348], "label": "Rock-paper-scissors"}, {"segment": [105.41882527301092, 112.48833229329173], "label": "Rock-paper-scissors"}]}, "gh0GD6OvLHE": {"duration": 16.21, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gh0GD6OvLHE", "annotations": [{"segment": [0.2, 8.16697538377535], "label": "Shaving legs"}]}, "6V6XB0KmYkk": {"duration": 155.969, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6V6XB0KmYkk", "annotations": [{"segment": [8.272896718876755, 136.74611635319812], "label": "Playing ice hockey"}]}, "YVqRQDkdA8U": {"duration": 182.625, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YVqRQDkdA8U", "annotations": []}, "TPzxIqu1JJg": {"duration": 225.072, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TPzxIqu1JJg", "annotations": [{"segment": [117.05659501557632, 178.7391121495327], "label": "Making a cake"}]}, "W_ZNdQLFmAA": {"duration": 58.329, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W_ZNdQLFmAA", "annotations": [{"segment": [8.735629490485742, 41.67623159126365], "label": "Plastering"}]}, "cDe2GlvMEQg": {"duration": 68.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cDe2GlvMEQg", "annotations": [{"segment": [33.32204836193448, 49.39754134165367], "label": "Plastering"}]}, "aSFtL7YEbDE": {"duration": 105.048, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aSFtL7YEbDE", "annotations": [{"segment": [0.2, 104.84707482993197], "label": "Croquet"}]}, "gh8rmWE-564": {"duration": 15.07, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=gh8rmWE-564", "annotations": [{"segment": [0.2, 14.821000000000002], "label": "Skiing"}]}, "czCM6ZNwHK8": {"duration": 231.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=czCM6ZNwHK8", "annotations": [{"segment": [9.408030111680658, 218.91762375256917], "label": "Playing saxophone"}]}, "u0Dc0ZtudXY": {"duration": 87.77, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=u0Dc0ZtudXY", "annotations": []}, "ZgdMnfTqblw": {"duration": 94.09, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZgdMnfTqblw", "annotations": []}, "cVnyxcDd23w": {"duration": 44.82, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=cVnyxcDd23w", "annotations": []}, "Mg8y0rgD16M": {"duration": 185.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Mg8y0rgD16M", "annotations": [{"segment": [49.52890951638066, 149.15840303], "label": "Preparing salad"}]}, "Xxng1g1PrdE": {"duration": 203.431, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Xxng1g1PrdE", "annotations": [{"segment": [24.748390015600624, 203.401], "label": "Rafting"}]}, "T9Q072gox7I": {"duration": 221.333, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=T9Q072gox7I", "annotations": []}, "rMevLL0KHzc": {"duration": 147.63, "subset": "training", "resolution": "1280x2276", "url": "https://www.youtube.com/watch?v=rMevLL0KHzc", "annotations": [{"segment": [79.81716692667707, 125.88768174726991], "label": "Playing polo"}]}, "hmb86jpgWfE": {"duration": 169.3, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hmb86jpgWfE", "annotations": [{"segment": [51.766038839313566, 65.23577343525741], "label": "Drinking coffee"}]}, "aFGLXYcKD2M": {"duration": 143.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aFGLXYcKD2M", "annotations": [{"segment": [4.704261941906248, 132.16735916848674], "label": "Playing badminton"}]}, "-qXqnltHWtE": {"duration": 213.462, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-qXqnltHWtE", "annotations": [{"segment": [4.315692517640726, 11.951157472539638], "label": "Calf roping"}, {"segment": [35.52150581157541, 45.148831189491425], "label": "Calf roping"}, {"segment": [86.64592333568116, 92.62150460473248], "label": "Calf roping"}, {"segment": [110.21627167471694, 120.50755052697198], "label": "Calf roping"}, {"segment": [152.0453405580762, 164.66045657051785], "label": "Calf roping"}, {"segment": [191.21859554407928, 195.86626986445253], "label": "Calf roping"}]}, "fDTm1IzQf-U": {"duration": 93.62, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fDTm1IzQf-U", "annotations": []}, "aPjbJ4ZNcVQ": {"duration": 225.025, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aPjbJ4ZNcVQ", "annotations": [{"segment": [38.300700577235695, 196.02821027270267], "label": "Baking cookies"}]}, "CfDdbeAk8LE": {"duration": 15.349, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CfDdbeAk8LE", "annotations": [{"segment": [3.71138891575663, 13.744111210608423], "label": "Welding"}]}, "ezugU3qibVQ": {"duration": 220.75, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=ezugU3qibVQ", "annotations": [{"segment": [141.52313728549143, 220.701], "label": "Removing curlers"}]}, "XPcBV3UatRI": {"duration": 61.07, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=XPcBV3UatRI", "annotations": [{"segment": [33.13122204, 52.5612276], "label": "Ironing clothes"}, {"segment": [24.21895812, 28.554175439999998], "label": "Ironing clothes"}]}, "9LUHQzLK010": {"duration": 149.07, "subset": "testing", "resolution": "544x360", "url": "https://www.youtube.com/watch?v=9LUHQzLK010", "annotations": []}, "ipCUC6Sc2Jo": {"duration": 166.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ipCUC6Sc2Jo", "annotations": []}, "w46wkDledek": {"duration": 192.42, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=w46wkDledek", "annotations": []}, "YOk1cMsyk88": {"duration": 119.77000000000001, "subset": "testing", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=YOk1cMsyk88", "annotations": []}, "Izr9-P7YIKw": {"duration": 49.343, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Izr9-P7YIKw", "annotations": [{"segment": [0.01, 47.528001922881046], "label": "Changing car wheel"}]}, "E0dqgXVU6wA": {"duration": 200.67000000000002, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E0dqgXVU6wA", "annotations": [{"segment": [26.13990331858172, 41.479487301941056], "label": "Doing nails"}]}, "l6a56j87obQ": {"duration": 126.87, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=l6a56j87obQ", "annotations": [{"segment": [4.353357254290172, 105.27209360374414], "label": "Playing bagpipes"}]}, "oTk_XIzeCOs": {"duration": 36.27, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oTk_XIzeCOs", "annotations": []}, "3DDo37BcChI": {"duration": 162.61, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=3DDo37BcChI", "annotations": [{"segment": [9.3861866099844, 148.40322072542904], "label": "Chopping wood"}]}, "PCoxnf59j5U": {"duration": 213.02, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PCoxnf59j5U", "annotations": [{"segment": [68.24579577224064, 184.52868941492324], "label": "Assembling bicycle"}]}, "n1yugby5jC0": {"duration": 110.133, "subset": "validation", "resolution": "853x470", "url": "https://www.youtube.com/watch?v=n1yugby5jC0", "annotations": [{"segment": [0.2, 109.821], "label": "Playing beach volleyball"}, {"segment": [16.2550466562986, 71.02244167962674], "label": "Playing beach volleyball"}]}, "L9ons75VGCg": {"duration": 184.855, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L9ons75VGCg", "annotations": [{"segment": [0.2, 150.01372355607475], "label": "Canoeing"}]}, "T7YcO-sOLuU": {"duration": 93.72, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=T7YcO-sOLuU", "annotations": []}, "9Pv2hUyODK4": {"duration": 111.74000000000001, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9Pv2hUyODK4", "annotations": [{"segment": [47.58732040710199, 51.073570910608424], "label": "Triple jump"}, {"segment": [65.01857320702028, 84.36726389329174], "label": "Triple jump"}]}, "Oebnyz-Qu9s": {"duration": 20.27, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Oebnyz-Qu9s", "annotations": []}, "NurRl6_NLbg": {"duration": 169.088, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=NurRl6_NLbg", "annotations": [{"segment": [7.935602301047824, 153.01866058936574], "label": "BMX"}]}, "9A9wnSoXs48": {"duration": 107.74000000000001, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=9A9wnSoXs48", "annotations": [{"segment": [18.228902699687985, 105.76123732215288], "label": "Kayaking"}]}, "noKDv_a8u-Y": {"duration": 209.77, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=noKDv_a8u-Y", "annotations": [{"segment": [0, 203.17453666146645], "label": "Playing guitarra"}]}, "EG7yGT2z5aA": {"duration": 130.59, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EG7yGT2z5aA", "annotations": []}, "CHMk7efu1ro": {"duration": 76.371, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CHMk7efu1ro", "annotations": [{"segment": [13.127798312285234, 60.33555064873383], "label": "Using the rowing machine"}]}, "t0ajvfx6dgA": {"duration": 147.26, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=t0ajvfx6dgA", "annotations": [{"segment": [18.378902013223385, 21.595209865537477], "label": "Playing field hockey"}, {"segment": [28.02782557016566, 31.93334224797563], "label": "Playing field hockey"}, {"segment": [42.271474630413785, 47.32567268405022], "label": "Playing field hockey"}, {"segment": [80.4076963078523, 87.75925711314166], "label": "Playing field hockey"}, {"segment": [97.8676529672387, 100.6244882620905], "label": "Playing field hockey"}]}, "Ww2_b9f6Kh0": {"duration": 44.21, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Ww2_b9f6Kh0", "annotations": [{"segment": [7.242017410296412, 16.691125840873635], "label": "Plataform diving"}, {"segment": [23.58828527925117, 26.96789340405616], "label": "Plataform diving"}, {"segment": [39.10689401560062, 41.10707025273011], "label": "Plataform diving"}]}, "9ku5v_hSVMw": {"duration": 41.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9ku5v_hSVMw", "annotations": [{"segment": [0.5175767613104525, 39.012348383775354], "label": "Clean and jerk"}]}, "yVbzejdydrw": {"duration": 110.086, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yVbzejdydrw", "annotations": [{"segment": [31.495871294851796, 36.98833931357254], "label": "Doing fencing"}, {"segment": [47.62999609984399, 51.06278861154446], "label": "Doing fencing"}, {"segment": [67.19691341653666, 70.4580663026521], "label": "Doing fencing"}, {"segment": [94.83089313572542, 98.2636856474259], "label": "Doing fencing"}]}, "xqzsv8VpaNM": {"duration": 163.82, "subset": "validation", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=xqzsv8VpaNM", "annotations": [{"segment": [17.11792511700468, 38.323712948517944], "label": "Sailing"}, {"segment": [132.3445553822153, 146.65207488299532], "label": "Sailing"}]}, "WsHzMciP5X8": {"duration": 106.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WsHzMciP5X8", "annotations": [{"segment": [19.5429399216785, 23.84901142984495], "label": "Cricket"}, {"segment": [46.20745964532458, 98.70840841796937], "label": "Cricket"}]}, "STu_-YXTjU0": {"duration": 161.402, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=STu_-YXTjU0", "annotations": []}, "NLuNMeYBeoc": {"duration": 97.46000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NLuNMeYBeoc", "annotations": [{"segment": [6.3854201025183865, 33.29540482027444], "label": "Using parallel bars"}, {"segment": [57.31674711070076, 92.8926577098284], "label": "Using parallel bars"}]}, "Uqte3S_ErTM": {"duration": 9.01, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Uqte3S_ErTM", "annotations": [{"segment": [0.01, 8.320639425897035], "label": "Doing motocross"}]}, "Otm5TV4XI7w": {"duration": 127.733, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Otm5TV4XI7w", "annotations": [{"segment": [16.029797971918875, 22.601019500780033], "label": "Rock-paper-scissors"}, {"segment": [24.791426677067083, 27.778345553822152], "label": "Rock-paper-scissors"}, {"segment": [37.734741809672386, 40.32340483619345], "label": "Rock-paper-scissors"}, {"segment": [48.48764976599064, 53.26671996879875], "label": "Rock-paper-scissors"}]}, "FtRGOgg0Qh8": {"duration": 33.97, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=FtRGOgg0Qh8", "annotations": []}, "i3uRumyN7mg": {"duration": 52.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i3uRumyN7mg", "annotations": [{"segment": [4.731497035881436, 52.2912], "label": "Playing accordion"}]}, "ZVMGA4oF6xo": {"duration": 107.137, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZVMGA4oF6xo", "annotations": [{"segment": [0.01, 107.081], "label": "Painting fence"}]}, "ymwMEUuXygs": {"duration": 178.399, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ymwMEUuXygs", "annotations": [{"segment": [49.80388767550703, 138.56209048361933], "label": "Assembling bicycle"}]}, "QRLye5j54ic": {"duration": 218.593, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=QRLye5j54ic", "annotations": [{"segment": [7.159689547581904, 218.561], "label": "Kite flying"}]}, "3JrxcNxNMU4": {"duration": 31.649, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=3JrxcNxNMU4", "annotations": [{"segment": [0.01, 31.648798], "label": "Croquet"}]}, "j8bfR_d_TlM": {"duration": 97.153, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=j8bfR_d_TlM", "annotations": [{"segment": [11.215708985959438, 85.02719920436817], "label": "Carving jack-o-lanterns"}]}, "mio5dnRbo4w": {"duration": 181.975, "subset": "validation", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=mio5dnRbo4w", "annotations": [{"segment": [0.01, 176.4191670044727], "label": "Hand car wash"}]}, "5pPQc85IXtU": {"duration": 175.543, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5pPQc85IXtU", "annotations": [{"segment": [9.673849949691155, 14.046211320407666], "label": "BMX"}, {"segment": [21.151298547821995, 66.78782035467557], "label": "BMX"}, {"segment": [93.06387580657675, 156.49166396857052], "label": "BMX"}]}, "escpZEdyNTI": {"duration": 20.74, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=escpZEdyNTI", "annotations": [{"segment": [13.683405271450857, 17.85635865210608], "label": "Spinning"}]}, "UKiwR9T4eGg": {"duration": 74.3, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UKiwR9T4eGg", "annotations": []}, "87pCIcWgwVM": {"duration": 212.347, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=87pCIcWgwVM", "annotations": [{"segment": [22.52661619344774, 62.94201583463339], "label": "Rope skipping"}, {"segment": [83.48098942277691, 167.29325261310453], "label": "Rope skipping"}]}, "R9dE9WaGURw": {"duration": 33.414, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=R9dE9WaGURw", "annotations": []}, "gmnwqOPcOo0": {"duration": 136.395, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gmnwqOPcOo0", "annotations": [{"segment": [75.95021840873635, 82.33258970358816], "label": "Hula hoop"}]}, "SnZnAVuMn4M": {"duration": 194.56, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=SnZnAVuMn4M", "annotations": [{"segment": [0.2, 37.62577847113885], "label": "Playing beach volleyball"}, {"segment": [52.49402964118565, 72.21721996879876], "label": "Playing beach volleyball"}, {"segment": [88.90607332293293, 95.5816146645866], "label": "Playing beach volleyball"}, {"segment": [120.76661154446178, 124.42060842433696], "label": "Playing beach volleyball"}, {"segment": [164.47797503900154, 168.42301872074881], "label": "Playing beach volleyball"}]}, "Oh3-DvDCpZ4": {"duration": 33.6, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Oh3-DvDCpZ4", "annotations": []}, "YDz0pyc26Ss": {"duration": 208.098, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YDz0pyc26Ss", "annotations": [{"segment": [24.997579674615555, 189.59203285682446], "label": "Longboarding"}]}, "Fdzw3niNDYY": {"duration": 122.068, "subset": "validation", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=Fdzw3niNDYY", "annotations": [{"segment": [29.897885722308892, 96.5492233198128], "label": "Doing fencing"}]}, "g-Egl6eXlX4": {"duration": 176.01, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=g-Egl6eXlX4", "annotations": [{"segment": [16.749483799760153, 110.10726235580036], "label": "Getting a haircut"}]}, "UGujWA07GkM": {"duration": 103.538, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UGujWA07GkM", "annotations": [{"segment": [14.451387675507021, 42.70829095163807], "label": "Playing blackjack"}, {"segment": [57.88628471138846, 62.89179329173168], "label": "Playing blackjack"}, {"segment": [77.58538299531982, 91.79456864274572], "label": "Playing blackjack"}]}, "jaj2Kq5ggGw": {"duration": 200.342, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jaj2Kq5ggGw", "annotations": []}, "upic9S0oEhg": {"duration": 102.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=upic9S0oEhg", "annotations": [{"segment": [0, 101.8924929797192], "label": "Playing guitarra"}]}, "IdHJv8SPo2Y": {"duration": 47.183, "subset": "testing", "resolution": "320x480", "url": "https://www.youtube.com/watch?v=IdHJv8SPo2Y", "annotations": []}, "ZNUkzdw2EeI": {"duration": 139.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZNUkzdw2EeI", "annotations": [{"segment": [7.1593168970358825, 15.40337877847114], "label": "Discus throw"}, {"segment": [68.55588301404056, 77.45079188611544], "label": "Discus throw"}]}, "r8AXq1Q5bn0": {"duration": 168.32, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r8AXq1Q5bn0", "annotations": [{"segment": [26.921680967238693, 163.49996489859595], "label": "Playing accordion"}]}, "LAbzDJZtSvQ": {"duration": 98.72999999999999, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LAbzDJZtSvQ", "annotations": [{"segment": [4.928862053042121, 15.86477473322933], "label": "Chopping wood"}, {"segment": [29.727199257410295, 97.49905248673947], "label": "Chopping wood"}]}, "NSi8DorB6L0": {"duration": 39.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NSi8DorB6L0", "annotations": [{"segment": [9.166533541341654, 36.79000624024961], "label": "Waterskiing"}]}, "ComW-O6dMW4": {"duration": 137.602, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ComW-O6dMW4", "annotations": [{"segment": [0.2, 137.40145124716554], "label": "Hand car wash"}]}, "laKctaVegPg": {"duration": 147.005, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=laKctaVegPg", "annotations": [{"segment": [13.965609172017718, 127.29309176947041], "label": "Mooping floor"}]}, "GrACpo7aonA": {"duration": 213.369, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GrACpo7aonA", "annotations": [{"segment": [24.632179877671298, 43.60561572939108], "label": "Playing drums"}, {"segment": [58.584644033380386, 69.236397493995], "label": "Playing drums"}, {"segment": [145.46300819651833, 164.43644288611543], "label": "Playing drums"}, {"segment": [176.41966544461778, 197.39030492199686], "label": "Playing drums"}]}, "gLsHOCeeVUM": {"duration": 215.6, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gLsHOCeeVUM", "annotations": [{"segment": [66.2600055678627, 102.24894260218409], "label": "Washing face"}]}, "pQof_-tUNtY": {"duration": 150.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pQof_-tUNtY", "annotations": [{"segment": [5.8604134165366615, 45.71122464898596], "label": "Wrapping presents"}, {"segment": [59.30738377535101, 88.14061778471138], "label": "Wrapping presents"}]}, "-NndIs9BaS4": {"duration": 106.58, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-NndIs9BaS4", "annotations": [{"segment": [19.786226877646534, 22.114017893915754], "label": "Rope skipping"}, {"segment": [58.19478393135725, 62.01901258970358], "label": "Rope skipping"}, {"segment": [68.33730341653666, 71.8289904524181], "label": "Rope skipping"}]}, "jQ2UoAENwlw": {"duration": 64.44, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jQ2UoAENwlw", "annotations": [{"segment": [25.44557305928237, 63.68921540873635], "label": "Getting a tattoo"}]}, "39WSVNFZR5Y": {"duration": 230.087, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=39WSVNFZR5Y", "annotations": []}, "gnZssGiQC7A": {"duration": 71.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gnZssGiQC7A", "annotations": [{"segment": [18.196764555382213, 28.4047056474259], "label": "Long jump"}]}, "V3dTp7_NyiE": {"duration": 63.786, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V3dTp7_NyiE", "annotations": [{"segment": [0, 27.843182527301092], "label": "Trimming branches or hedges"}, {"segment": [37.28997659906396, 58.570123244929796], "label": "Trimming branches or hedges"}]}, "aLv03Fznf5A": {"duration": 33.67, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aLv03Fznf5A", "annotations": [{"segment": [0.01, 33.6687074829932], "label": "Using the pommel horse"}]}, "zu960Glpzo4": {"duration": 100.079, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zu960Glpzo4", "annotations": [{"segment": [44.3403327425614, 70.25756948645292], "label": "Arm wrestling"}]}, "ojQdrhHT_gE": {"duration": 95.899, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=ojQdrhHT_gE", "annotations": [{"segment": [0.2, 95.64099999999999], "label": "Riding bumper cars"}]}, "CR_79ZjQG_w": {"duration": 37.55, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CR_79ZjQG_w", "annotations": [{"segment": [8.259094797191887, 12.769380608424337], "label": "Hopscotch"}, {"segment": [21.204200826833073, 26.65168888455538], "label": "Hopscotch"}, {"segment": [29.873321606864273, 33.153529469578785], "label": "Hopscotch"}]}, "IEtCboPbTXI": {"duration": 73.05, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IEtCboPbTXI", "annotations": [{"segment": [19.024191887675507, 66.41379563182528], "label": "Bathing dog"}]}, "TOfBNYOlv8c": {"duration": 86.01, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TOfBNYOlv8c", "annotations": [{"segment": [21.476786187987518, 79.4172747823713], "label": "Mixing drinks"}]}, "wq4H7L15NMA": {"duration": 16.092, "subset": "validation", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=wq4H7L15NMA", "annotations": [{"segment": [0, 16.091427], "label": "Using the rowing machine"}]}, "Zm32ORZly10": {"duration": 74.769, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zm32ORZly10", "annotations": [{"segment": [12.042063229107187, 20.893096032845506], "label": "Tug of war"}, {"segment": [25.085690518826812, 59.44167311228475], "label": "Tug of war"}]}, "VUlsdTzaKV4": {"duration": 148.98, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VUlsdTzaKV4", "annotations": [{"segment": [0.01, 118.96035484555382], "label": "Snowboarding"}]}, "7gYfKVOTjqA": {"duration": 6.223, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=7gYfKVOTjqA", "annotations": []}, "bqA-WmqF19E": {"duration": 80.69, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=bqA-WmqF19E", "annotations": [{"segment": [0.01, 80.68936507936507], "label": "Playing pool"}]}, "Feo8xSjY5A8": {"duration": 54.103, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=Feo8xSjY5A8", "annotations": [{"segment": [3.966953464898596, 52.414427695787836], "label": "Clipping cat claws"}]}, "nMFEEBtIu-Y": {"duration": 227.09, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nMFEEBtIu-Y", "annotations": [{"segment": [68.37521067917545, 129.98461749], "label": "Playing guitarra"}, {"segment": [32.3377584, 49.519463460000004], "label": "Playing guitarra"}, {"segment": [116.55670628729912, 216.10817883055458], "label": "Playing guitarra"}]}, "6it_yeIb_L0": {"duration": 185.25, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6it_yeIb_L0", "annotations": [{"segment": [24.564973238385317, 148.25683848578433], "label": "Skateboarding"}]}, "3La7NPOBVN8": {"duration": 205.59, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3La7NPOBVN8", "annotations": [{"segment": [68.20959332019412, 198.54435614328804], "label": "Braiding hair"}]}, "WJmvyBeKMUY": {"duration": 73.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WJmvyBeKMUY", "annotations": []}, "2kx--bbqXHg": {"duration": 225.28, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=2kx--bbqXHg", "annotations": [{"segment": [34.43927925117004, 221.74678783151325], "label": "Starting a campfire"}]}, "zapTJ7B_rJg": {"duration": 172.34, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=zapTJ7B_rJg", "annotations": []}, "9z_ylV5c9HA": {"duration": 125.133, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9z_ylV5c9HA", "annotations": [{"segment": [15.373915277983679, 21.795931761093254], "label": "Putting in contact lenses"}, {"segment": [33.083112246558564, 48.262423933908465], "label": "Putting in contact lenses"}, {"segment": [44.37029232758468, 50.20848907719588], "label": "Putting in contact lenses"}, {"segment": [89.71362041623165, 108.00663689834673], "label": "Putting in contact lenses"}]}, "hyYqJlwsTl8": {"duration": 75.88, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hyYqJlwsTl8", "annotations": []}, "HW9SFCj0dVU": {"duration": 187.55, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HW9SFCj0dVU", "annotations": [{"segment": [44.18506732238342, 186.72439442518714], "label": "Starting a campfire"}]}, "oS4w7-0aH8Q": {"duration": 57.21, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oS4w7-0aH8Q", "annotations": [{"segment": [2.856235544659882, 51.9477839685016], "label": "Using the balance beam"}]}, "XksnxOAUQhY": {"duration": 234.289, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XksnxOAUQhY", "annotations": []}, "woLR9wHe0Vs": {"duration": 84.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=woLR9wHe0Vs", "annotations": [{"segment": [0.5289297971918877, 3.305811232449298], "label": "Raking leaves"}, {"segment": [37.818480499219966, 55.14093135725429], "label": "Raking leaves"}]}, "T8wY9ZQv8vQ": {"duration": 175.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=T8wY9ZQv8vQ", "annotations": [{"segment": [8.9399165, 172.21019429017161], "label": "Tumbling"}]}, "G6hTargh4Lc": {"duration": 160.96, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G6hTargh4Lc", "annotations": []}, "RZSI1LkU00g": {"duration": 103.353, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=RZSI1LkU00g", "annotations": []}, "c_DQ7Y8ZRBQ": {"duration": 122.0, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c_DQ7Y8ZRBQ", "annotations": [{"segment": [37.30349107297625, 42.44223704056162], "label": "Getting a haircut"}, {"segment": [44.91644816848674, 68.89726371606865], "label": "Getting a haircut"}, {"segment": [72.32309450858035, 117.23954267706708], "label": "Getting a haircut"}]}, "wTBJ4PRnU4k": {"duration": 233.849, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wTBJ4PRnU4k", "annotations": [{"segment": [85.00253048489286, 194.083030978382], "label": "Changing car wheel"}]}, "QgklQUCQcV4": {"duration": 209.723, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QgklQUCQcV4", "annotations": [{"segment": [81.1403684011306, 209.721677], "label": "Hula hoop"}]}, "Lh50gF5l0dw": {"duration": 15.02, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Lh50gF5l0dw", "annotations": [{"segment": [4.546836109204368, 14.952997101404055], "label": "Discus throw"}, {"segment": [0.36405603, 3.8219231700000003], "label": "Discus throw"}]}, "j5cmc3cnLeU": {"duration": 104.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j5cmc3cnLeU", "annotations": [{"segment": [6.661140614332055, 97.48010655120082], "label": "Tango"}]}, "hJJas1Zat1s": {"duration": 60.326, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hJJas1Zat1s", "annotations": [{"segment": [9.599368146645865, 47.526283471138846], "label": "Making a lemonade"}]}, "o4uymxrCKeo": {"duration": 13.352, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o4uymxrCKeo", "annotations": [{"segment": [0.01, 13.321], "label": "Swinging at the playground"}]}, "i69NhEDno_M": {"duration": 114.382, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i69NhEDno_M", "annotations": [{"segment": [13.37843213728549, 106.13556162246489], "label": "Canoeing"}]}, "1ItX1KBmkLE": {"duration": 71.913, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1ItX1KBmkLE", "annotations": []}, "WZeMQ-5dFlM": {"duration": 127.803, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WZeMQ-5dFlM", "annotations": [{"segment": [15.745895475819031, 90.10454290171607], "label": "Longboarding"}, {"segment": [93.4934305772231, 113.62741029641187], "label": "Longboarding"}]}, "qNxLTF4Q6yk": {"duration": 229.55, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qNxLTF4Q6yk", "annotations": [{"segment": [0.01, 229.55183673469386], "label": "Playing guitarra"}]}, "iM8rmKLJnt8": {"duration": 56.053, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=iM8rmKLJnt8", "annotations": [{"segment": [0.01, 47.76619234498639], "label": "Painting fence"}]}, "1VDNFR3vQR4": {"duration": 53.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1VDNFR3vQR4", "annotations": [{"segment": [1.7435613995988413, 52.38986872127947], "label": "Doing kickboxing"}]}, "zxhhLuJP-PQ": {"duration": 11.75, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zxhhLuJP-PQ", "annotations": []}, "dVPToPJ3xEc": {"duration": 140.063, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dVPToPJ3xEc", "annotations": []}, "9l-IexiD7tY": {"duration": 36.038, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9l-IexiD7tY", "annotations": []}, "tMheZSxPl3Q": {"duration": 91.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tMheZSxPl3Q", "annotations": [{"segment": [47.30826685725429, 51.84759081357254], "label": "Polishing forniture"}]}, "3S5RoKVsAys": {"duration": 150.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3S5RoKVsAys", "annotations": [{"segment": [121.48827819344773, 137.93736808424336], "label": "Shaving"}]}, "vPcH4j7DhRk": {"duration": 177.45, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vPcH4j7DhRk", "annotations": []}, "83WXP5z80Y0": {"duration": 140.667, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=83WXP5z80Y0", "annotations": [{"segment": [11.884797518067641, 127.94753668544448], "label": "Hula hoop"}]}, "uLoeu43EkHI": {"duration": 114.59, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uLoeu43EkHI", "annotations": []}, "oB6h3DscMUw": {"duration": 30.86, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=oB6h3DscMUw", "annotations": [{"segment": [2.4986770670826832, 27.022468018720748], "label": "Peeling potatoes"}]}, "sfCfrWpHpu0": {"duration": 154.692, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sfCfrWpHpu0", "annotations": [{"segment": [13.953493858727503, 40.417023656338614], "label": "Having an ice cream"}, {"segment": [46.190884703090134, 55.81398644767598], "label": "Having an ice cream"}, {"segment": [60.62553731996892, 64.23420047418861], "label": "Having an ice cream"}, {"segment": [68.8051738028669, 81.31520607082852], "label": "Having an ice cream"}, {"segment": [84.68329168143356, 99.11794429831235], "label": "Having an ice cream"}, {"segment": [115.9583723513376, 128.9495584192802], "label": "Having an ice cream"}, {"segment": [144.34652105691632, 151.56384729330824], "label": "Having an ice cream"}]}, "Lyi8-hf1rCw": {"duration": 53.24, "subset": "training", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=Lyi8-hf1rCw", "annotations": [{"segment": [6.478885647425897, 41.61438088923557], "label": "Triple jump"}]}, "PWrVWeH45Zo": {"duration": 44.443, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=PWrVWeH45Zo", "annotations": [{"segment": [0.01, 42.922390908015174], "label": "Mooping floor"}]}, "zGDecqEw9AU": {"duration": 63.043, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zGDecqEw9AU", "annotations": [{"segment": [0.2, 53.82605460816913], "label": "Table soccer"}]}, "8VfPKyFbroo": {"duration": 111.02000000000001, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=8VfPKyFbroo", "annotations": []}, "fchUWtD6HjQ": {"duration": 7.85, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fchUWtD6HjQ", "annotations": []}, "Z7K29Gk_j8Q": {"duration": 62.3, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z7K29Gk_j8Q", "annotations": []}, "nYfc0J4XF6g": {"duration": 94.97, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=nYfc0J4XF6g", "annotations": []}, "5O1ym4u2n7k": {"duration": 125.46, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5O1ym4u2n7k", "annotations": []}, "9YYypM0JvUM": {"duration": 116.38, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=9YYypM0JvUM", "annotations": [{"segment": [31.579170299999998, 93.31188039999999], "label": "Fixing bicycle"}]}, "v2iIR1YLdz4": {"duration": 53.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=v2iIR1YLdz4", "annotations": [{"segment": [2.718382912187236, 52.06748800624025], "label": "Using parallel bars"}]}, "pPM1jC_NlzI": {"duration": 67.725, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pPM1jC_NlzI", "annotations": [{"segment": [0.01, 62.98539157566303], "label": "Welding"}]}, "37gHYr2uDZo": {"duration": 63.23, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=37gHYr2uDZo", "annotations": [{"segment": [4.238351014040561, 51.55017628705148], "label": "Mixing drinks"}]}, "9Ht1yvo936I": {"duration": 79.645, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9Ht1yvo936I", "annotations": [{"segment": [0.009999999999999998, 79.64444444444445], "label": "Playing blackjack"}]}, "wHRHS7_4J_s": {"duration": 67.04, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wHRHS7_4J_s", "annotations": [{"segment": [10.286667864274571, 61.876656950078], "label": "Playing violin"}]}, "qcV3JtJDoV8": {"duration": 209.119, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qcV3JtJDoV8", "annotations": [{"segment": [14.97944859813084, 191.47642990654205], "label": "Skiing"}]}, "njH63OVW2t0": {"duration": 181.324, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=njH63OVW2t0", "annotations": []}, "94bJbSWNw3o": {"duration": 47.067, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=94bJbSWNw3o", "annotations": [{"segment": [0.01, 47.066848], "label": "Slacklining"}]}, "NvKPIvDjpsg": {"duration": 36.87, "subset": "testing", "resolution": "324x240", "url": "https://www.youtube.com/watch?v=NvKPIvDjpsg", "annotations": []}, "YK9BIL9fKA0": {"duration": 135.47, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YK9BIL9fKA0", "annotations": []}, "POYg9zju63U": {"duration": 148.933, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=POYg9zju63U", "annotations": [{"segment": [0.2, 11.849521580863232], "label": "Raking leaves"}, {"segment": [57.85354654186167, 104.78694574449644], "label": "Raking leaves"}, {"segment": [104.55460132605303, 113.61600010764431], "label": "Raking leaves"}]}, "uSMwc0AO4bo": {"duration": 101.25999999999999, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uSMwc0AO4bo", "annotations": []}, "aYrYnmUYSrA": {"duration": 190.033, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aYrYnmUYSrA", "annotations": []}, "Cof9eHf7VCI": {"duration": 219.963, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Cof9eHf7VCI", "annotations": [{"segment": [42.55115461466459, 157.50790296879876], "label": "Surfing"}]}, "m5PO3T2uGzs": {"duration": 24.01, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m5PO3T2uGzs", "annotations": [{"segment": [0.2, 22.889241256572614], "label": "Swinging at the playground"}]}, "MIBJBSg7Ch0": {"duration": 198.02, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=MIBJBSg7Ch0", "annotations": []}, "1_PiNdPt6RU": {"duration": 15.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1_PiNdPt6RU", "annotations": [{"segment": [1.5696993769470404, 4.381101246105919], "label": "Gargling mouthwash"}]}, "Or7mIy83LsQ": {"duration": 118.17, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Or7mIy83LsQ", "annotations": [{"segment": [15.034260179999999, 112.08242775663027], "label": "Long jump"}]}, "Zgiq5MK8fQ8": {"duration": 30.047, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Zgiq5MK8fQ8", "annotations": [{"segment": [0, 28.827880343213728], "label": "Removing ice from car"}]}, "0JpULUYPDX0": {"duration": 100.03, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0JpULUYPDX0", "annotations": [{"segment": [10.727559600000001, 80.91442219999999], "label": "Tennis serve with ball bouncing"}, {"segment": [17.774040561622463, 29.62340093603744], "label": "Tennis serve with ball bouncing"}, {"segment": [56.440374414976596, 72.65528861154445], "label": "Tennis serve with ball bouncing"}, {"segment": [85.28421216848675, 97.60131045241809], "label": "Tennis serve with ball bouncing"}]}, "lUX1nfb5rx0": {"duration": 141.364, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lUX1nfb5rx0", "annotations": [{"segment": [32.849967238689544, 112.45539781591265], "label": "Cleaning sink"}]}, "NaN4aHalv-k": {"duration": 18.576, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NaN4aHalv-k", "annotations": []}, "IoiDAHNryTk": {"duration": 47.044, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IoiDAHNryTk", "annotations": [{"segment": [0.19999999999999998, 33.874336832303605], "label": "Using the monkey bar"}]}, "QhKoyVQ9blk": {"duration": 52.245, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QhKoyVQ9blk", "annotations": []}, "QLbWA54l1Sg": {"duration": 94.227, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QLbWA54l1Sg", "annotations": [{"segment": [3.968983067082683, 39.395831925117], "label": "Arm wrestling"}, {"segment": [51.44978049921996, 64.53272468330734], "label": "Arm wrestling"}]}, "8z29qtUWwdU": {"duration": 189.777, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8z29qtUWwdU", "annotations": [{"segment": [0.01, 174.94060998439937], "label": "Sharpening knives"}]}, "2g9GrshWQrU": {"duration": 179.514, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2g9GrshWQrU", "annotations": [{"segment": [3.3596443057722314, 120.38725429017161], "label": "Arm wrestling"}]}, "PV8QJ4QJgtY": {"duration": 231.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PV8QJ4QJgtY", "annotations": [{"segment": [113.60872698907956, 215.2776833073323], "label": "Horseback riding"}]}, "W5tacjehFRA": {"duration": 154.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W5tacjehFRA", "annotations": []}, "R_JKmGmURvA": {"duration": 202.27, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R_JKmGmURvA", "annotations": []}, "Z3bPr9odxpw": {"duration": 203.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z3bPr9odxpw", "annotations": [{"segment": [35.231337408598385, 199.00944492823714], "label": "Zumba"}]}, "qpyAkXLIJz0": {"duration": 184.74, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qpyAkXLIJz0", "annotations": [{"segment": [127.24152728694183, 184.73798185941044], "label": "Playing guitarra"}]}, "24NiBYgzKj0": {"duration": 122.184, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=24NiBYgzKj0", "annotations": []}, "uE3YXkZoV5c": {"duration": 89.397, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uE3YXkZoV5c", "annotations": [{"segment": [44.6981179138322, 80.06672048286606], "label": "Shuffleboard"}]}, "jSHcj_pXiI4": {"duration": 164.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jSHcj_pXiI4", "annotations": [{"segment": [6.2879001560062395, 71.47674258970359], "label": "Getting a piercing"}, {"segment": [101.50467394695787, 160.79058970358813], "label": "Getting a piercing"}]}, "eR6lcSS62sA": {"duration": 75.859, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=eR6lcSS62sA", "annotations": []}, "9A3z0W8U124": {"duration": 95.69, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9A3z0W8U124", "annotations": [{"segment": [0, 95.68943310657596], "label": "Using the balance beam"}]}, "XM3SktXBbHU": {"duration": 7.802, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=XM3SktXBbHU", "annotations": [{"segment": [0.2, 7.601904761904762], "label": "Powerbocking"}]}, "bzBnXFQ6rJs": {"duration": 86.08, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=bzBnXFQ6rJs", "annotations": []}, "fNuBBkigydA": {"duration": 56.355, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fNuBBkigydA", "annotations": []}, "PkRvgIFREJU": {"duration": 138.97, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PkRvgIFREJU", "annotations": []}, "rML1aj5XW0M": {"duration": 17.857, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rML1aj5XW0M", "annotations": [{"segment": [4.387302063182528, 9.624208652886116], "label": "Doing a powerbomb"}]}, "MzqovHjEKE4": {"duration": 62.624, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MzqovHjEKE4", "annotations": [{"segment": [16.02235693237253, 54.515091397815915], "label": "Trimming branches or hedges"}]}, "o7z-u08tc-Y": {"duration": 120.047, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=o7z-u08tc-Y", "annotations": [{"segment": [0.01, 119.981], "label": "Tug of war"}]}, "GdrutQ4RrDs": {"duration": 209.77, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=GdrutQ4RrDs", "annotations": [{"segment": [0.01, 58.251005479674966], "label": "Removing curlers"}]}, "7H1KylJRii8": {"duration": 123.37, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=7H1KylJRii8", "annotations": []}, "uyHGyE_vNtY": {"duration": 76.626, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uyHGyE_vNtY", "annotations": []}, "kwaCAq-9LnM": {"duration": 151.418, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=kwaCAq-9LnM", "annotations": [{"segment": [65.43307136973479, 141.0236231326053], "label": "Making a lemonade"}]}, "qiRrR2Nj2SQ": {"duration": 133.86, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qiRrR2Nj2SQ", "annotations": [{"segment": [5.429702951383362, 73.30098917316693], "label": "Spinning"}, {"segment": [77.06001425897036, 122.37715001560062], "label": "Spinning"}]}, "fX3tcnTdAN4": {"duration": 117.076, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fX3tcnTdAN4", "annotations": [{"segment": [9.370572552321486, 111.82702426571713], "label": "Hurling"}]}, "QwLiAkfSa9A": {"duration": 115.00999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QwLiAkfSa9A", "annotations": [{"segment": [48.62291098159126, 110.70234731021894], "label": "Getting a piercing"}]}, "wjr5M-6qEWI": {"duration": 122.56, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=wjr5M-6qEWI", "annotations": []}, "hsIudyKGz6A": {"duration": 66.85, "subset": "training", "resolution": "480x720", "url": "https://www.youtube.com/watch?v=hsIudyKGz6A", "annotations": [{"segment": [1.9293628931357256, 63.46039570124805], "label": "Chopping wood"}]}, "xJ3KAhVRr8c": {"duration": 98.27000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xJ3KAhVRr8c", "annotations": [{"segment": [0.01, 97.19708737879715], "label": "Doing step aerobics"}]}, "v34qczSoYLo": {"duration": 95.78, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=v34qczSoYLo", "annotations": [{"segment": [0.01, 71.94853194944125], "label": "Bungee jumping"}]}, "2zFmooC8gUg": {"duration": 218.384, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2zFmooC8gUg", "annotations": [{"segment": [0.01, 218.3831746031746], "label": "Roof shingle removal"}]}, "dZa5ir3baH4": {"duration": 37.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dZa5ir3baH4", "annotations": [{"segment": [31.998734789391573, 37.361], "label": "Triple jump"}]}, "gN27GB5txG8": {"duration": 89.14, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=gN27GB5txG8", "annotations": [{"segment": [13.243678370000001, 68.0056689], "label": "Washing hands"}]}, "s_H9cTp2tT8": {"duration": 64.81, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=s_H9cTp2tT8", "annotations": [{"segment": [4.4485231055500725, 15.87313891419657], "label": "Playing polo"}, {"segment": [25.376801703588146, 40.946632230889236], "label": "Playing polo"}]}, "u6f9COsww0w": {"duration": 138.07, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=u6f9COsww0w", "annotations": [{"segment": [3.87918, 42.45432], "label": "Playing water polo"}, {"segment": [58.45404, 95.8034321372855], "label": "Playing water polo"}]}, "W34A9wUi-4g": {"duration": 136.186, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W34A9wUi-4g", "annotations": [{"segment": [0.2, 131.68084243369734], "label": "Futsal"}]}, "jvQU6RyhY7I": {"duration": 47.857, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jvQU6RyhY7I", "annotations": [{"segment": [0.01, 47.57728237129486], "label": "Hitting a pinata"}]}, "2mmN96TdEdY": {"duration": 218.988, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2mmN96TdEdY", "annotations": [{"segment": [0.01, 218.841], "label": "Playing rubik cube"}]}, "-cwPzzcNpGs": {"duration": 131.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-cwPzzcNpGs", "annotations": [{"segment": [23.250387101404055, 48.35257494539782], "label": "Arm wrestling"}]}, "1UIhgxQXcfI": {"duration": 224.909, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1UIhgxQXcfI", "annotations": [{"segment": [23.91044926584547, 119.87062683593894], "label": "Baking cookies"}]}, "mPlfR8V1pFg": {"duration": 88.236, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mPlfR8V1pFg", "annotations": []}, "Tp6MFA0SbFU": {"duration": 21.549, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Tp6MFA0SbFU", "annotations": [{"segment": [6.080855814390094, 20.025927465480436], "label": "Doing a powerbomb"}]}, "keaMf0raxF8": {"duration": 144.196, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=keaMf0raxF8", "annotations": [{"segment": [30.1324399375975, 131.3416911076443], "label": "Curling"}]}, "MIE0KbGIXjM": {"duration": 119.28, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MIE0KbGIXjM", "annotations": [{"segment": [12.64877791575663, 91.9803864], "label": "Washing hands"}]}, "gY-8BUnSgf4": {"duration": 205.799, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gY-8BUnSgf4", "annotations": [{"segment": [6.10011030101068, 13.163395845553822], "label": "Playing ten pins"}, {"segment": [37.56383692511701, 109.1598679875195], "label": "Playing ten pins"}, {"segment": [115.90209512792514, 150.25534770046804], "label": "Playing ten pins"}, {"segment": [154.42910735881435, 172.40837973322934], "label": "Playing ten pins"}]}, "FOm0uKw7dXc": {"duration": 203.76, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FOm0uKw7dXc", "annotations": [{"segment": [15.00069690657999, 192.70125765424356], "label": "Tango"}]}, "xIld1Pt1QGs": {"duration": 150.35, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xIld1Pt1QGs", "annotations": [{"segment": [59.080829953198126, 146.31485803432136], "label": "Camel ride"}]}, "Er88I-NQkG4": {"duration": 180.88, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Er88I-NQkG4", "annotations": [{"segment": [11.065087844749788, 156.16629509815996], "label": "Cumbia"}]}, "uvglcAUnQv0": {"duration": 180.03, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uvglcAUnQv0", "annotations": []}, "FzxX2Crg8Ro": {"duration": 177.7, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=FzxX2Crg8Ro", "annotations": []}, "lydctNDJZ5o": {"duration": 96.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lydctNDJZ5o", "annotations": [{"segment": [11.25045081903276, 94.65379289079563], "label": "Playing lacrosse"}]}, "12Nafz_Yet4": {"duration": 144.359, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=12Nafz_Yet4", "annotations": []}, "7mmXZeOJT8w": {"duration": 29.86, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=7mmXZeOJT8w", "annotations": [{"segment": [0.41926324492979716, 22.50046081123245], "label": "Brushing teeth"}]}, "rLH89pN9I84": {"duration": 195.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rLH89pN9I84", "annotations": [{"segment": [9.46541974876274, 61.06722418556607], "label": "Walking the dog"}, {"segment": [62.28856866927738, 84.578105497009], "label": "Walking the dog"}, {"segment": [96.7915503341222, 119.08108716185383], "label": "Walking the dog"}, {"segment": [140.45461562680197, 143.8133129570081], "label": "Walking the dog"}, {"segment": [145.64532929953197, 150.22537110140405], "label": "Walking the dog"}, {"segment": [152.9733961825273, 165.1868409875195], "label": "Walking the dog"}, {"segment": [166.71352158814352, 176.17894131201248], "label": "Walking the dog"}]}, "TomBet77rDc": {"duration": 46.19, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TomBet77rDc", "annotations": [{"segment": [1.5851018190327615, 44.67105126365055], "label": "Clean and jerk"}]}, "Fmr6mPyvE-g": {"duration": 102.656, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Fmr6mPyvE-g", "annotations": [{"segment": [36.49458346333853, 79.40720124804992], "label": "Using the monkey bar"}, {"segment": [87.89224492979719, 99.25900156006239], "label": "Using the monkey bar"}]}, "44TbmH7hgDc": {"duration": 187.04, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=44TbmH7hgDc", "annotations": []}, "dAiqJJKezPE": {"duration": 34.134, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=dAiqJJKezPE", "annotations": [{"segment": [0.20000000000000004, 33.861], "label": "Changing car wheel"}]}, "Ok3m6ExNn1M": {"duration": 169.41, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ok3m6ExNn1M", "annotations": []}, "9VGbtQrlcN4": {"duration": 113.15, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=9VGbtQrlcN4", "annotations": [{"segment": [55.825947, 87.80100550000002], "label": "Cleaning windows"}]}, "ANeDHelwzK0": {"duration": 53.89, "subset": "validation", "resolution": "136x240", "url": "https://www.youtube.com/watch?v=ANeDHelwzK0", "annotations": [{"segment": [12.866621912855866, 53.49645272784022], "label": "Bathing dog"}]}, "6G1wS1p1ig0": {"duration": 222.935, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6G1wS1p1ig0", "annotations": [{"segment": [43.75352024922118, 60.074277802502095], "label": "Rope skipping"}, {"segment": [71.88078326657765, 77.08953567719922], "label": "Rope skipping"}, {"segment": [111.81455174800968, 121.19030608712852], "label": "Rope skipping"}, {"segment": [0.2, 222.7346031746032], "label": "Rope skipping"}, {"segment": [131.60781090837168, 137.51106364040942], "label": "Rope skipping"}, {"segment": [165.29107649705782, 172.58332896261683], "label": "Rope skipping"}, {"segment": [176.7503308691589, 200.0160915140187], "label": "Rope skipping"}]}, "ZoKdN--u6TU": {"duration": 140.365, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ZoKdN--u6TU", "annotations": [{"segment": [3.9921356655489246, 117.16222600822805], "label": "Shuffleboard"}]}, "twL4mmkCQ0s": {"duration": 54.149, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=twL4mmkCQ0s", "annotations": [{"segment": [3.801407239963068, 6.166727300384532], "label": "Grooming dog"}, {"segment": [14.27639607897241, 25.004812067312624], "label": "Grooming dog"}]}, "KeHywadGvd0": {"duration": 37.292, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KeHywadGvd0", "annotations": []}, "6iSqTbL5WXY": {"duration": 21.85, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6iSqTbL5WXY", "annotations": [{"segment": [2.4202001497659906, 20.316046327613105], "label": "Powerbocking"}]}, "KwhqAV_0o40": {"duration": 167.602, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=KwhqAV_0o40", "annotations": [{"segment": [12.284597503900155, 144.29590015600624], "label": "Ice fishing"}]}, "3zDw5mwGIW0": {"duration": 126.71, "subset": "training", "resolution": "1920x816", "url": "https://www.youtube.com/watch?v=3zDw5mwGIW0", "annotations": [{"segment": [13.080180497554137, 123.01470048353545], "label": "Painting"}]}, "CmEQjszPDrg": {"duration": 220.845, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=CmEQjszPDrg", "annotations": [{"segment": [0, 220.801], "label": "Running a marathon"}]}, "drjYCuezfA8": {"duration": 95.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=drjYCuezfA8", "annotations": [{"segment": [8.198529614653975, 90.48195311232449], "label": "Washing dishes"}]}, "H0puoztHMY8": {"duration": 39.149, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=H0puoztHMY8", "annotations": [{"segment": [0.01, 30.94100076407538], "label": "Roof shingle removal"}]}, "iZk3PH8ghlI": {"duration": 131.216, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iZk3PH8ghlI", "annotations": [{"segment": [9.842902059570097, 112.80956257283061], "label": "Longboarding"}, {"segment": [121.61188146362625, 128.77655963055292], "label": "Longboarding"}]}, "tRK4TZi1SvI": {"duration": 59.1, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tRK4TZi1SvI", "annotations": []}, "FwbnNQBzqHw": {"duration": 208.771, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FwbnNQBzqHw", "annotations": [{"segment": [30.941042376388936, 189.22890127033654], "label": "Making a cake"}]}, "y5wu5pysE14": {"duration": 187.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y5wu5pysE14", "annotations": [{"segment": [22.21185748, 177.10083853354135], "label": "High jump"}]}, "ZIFxB0gZ5tY": {"duration": 83.081, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZIFxB0gZ5tY", "annotations": [{"segment": [0.2, 61.887959438377536], "label": "Swinging at the playground"}]}, "CaeZoHNHul0": {"duration": 174.43, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CaeZoHNHul0", "annotations": []}, "hANXaoStVR0": {"duration": 25.03, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hANXaoStVR0", "annotations": [{"segment": [8.240251170046802, 16.49998283931357], "label": "Playing polo"}, {"segment": [19.26621372854914, 23.746728549141967], "label": "Playing polo"}]}, "NDq29HQJVP0": {"duration": 163.56, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NDq29HQJVP0", "annotations": []}, "Ws3wACUl1WM": {"duration": 146.704, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ws3wACUl1WM", "annotations": []}, "rGuOU8U5Bvk": {"duration": 169.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rGuOU8U5Bvk", "annotations": [{"segment": [0.01, 152.40676134945397], "label": "Grooming horse"}]}, "r6C3l3NS_4I": {"duration": 70.78, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r6C3l3NS_4I", "annotations": []}, "UQXB3JOoxYE": {"duration": 197.927, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UQXB3JOoxYE", "annotations": [{"segment": [35.497683307332295, 115.14760218408736], "label": "Waterskiing"}, {"segment": [122.24785647425898, 174.41929017160686], "label": "Waterskiing"}]}, "XJ-knBky6S0": {"duration": 142.71, "subset": "training", "resolution": "636x480", "url": "https://www.youtube.com/watch?v=XJ-knBky6S0", "annotations": [{"segment": [27.606860878516773, 131.80049537597503], "label": "Capoeira"}]}, "WV-Sf5-aCcc": {"duration": 147.284, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WV-Sf5-aCcc", "annotations": [{"segment": [19.760434977943333, 85.47536982589703], "label": "Capoeira"}, {"segment": [102.47853479126366, 134.87645722527301], "label": "Capoeira"}]}, "I6nuNE-Qibw": {"duration": 105.024, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I6nuNE-Qibw", "annotations": [{"segment": [0.01, 9.224387165449862], "label": "Making a cake"}, {"segment": [24.107706480091608, 81.02413638663367], "label": "Making a cake"}]}, "95pMelnv2dQ": {"duration": 13.91, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=95pMelnv2dQ", "annotations": [{"segment": [0, 13.604971726989081], "label": "Playing badminton"}]}, "zaPxNw11llc": {"duration": 21.85, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zaPxNw11llc", "annotations": [{"segment": [10.703420380655228, 20.452395631825276], "label": "Hammer throw"}]}, "2mAKLFVhV9Y": {"duration": 111.74000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2mAKLFVhV9Y", "annotations": [{"segment": [0, 111.734421], "label": "Playing lacrosse"}]}, "Z56Xq8Ud4I0": {"duration": 162.68, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Z56Xq8Ud4I0", "annotations": [{"segment": [40.86009261322834, 123.08785662991146], "label": "Washing hands"}]}, "zEttEkAdHts": {"duration": 48.51, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=zEttEkAdHts", "annotations": []}, "xKePBw5XZHs": {"duration": 210.93, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xKePBw5XZHs", "annotations": [{"segment": [61.53498084413173, 85.55665744149766], "label": "Doing nails"}, {"segment": [124.71528142433698, 149.7241505226209], "label": "Doing nails"}, {"segment": [167.16454607800313, 191.84435110920438], "label": "Doing nails"}]}, "grroBKRxAMY": {"duration": 93.228, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=grroBKRxAMY", "annotations": [{"segment": [39.560137010920435, 55.26783847113885], "label": "Spread mulch"}]}, "5QBRDpzedoI": {"duration": 117.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5QBRDpzedoI", "annotations": [{"segment": [0.2, 17.263297971918877], "label": "Triple jump"}, {"segment": [32.13911856474259, 50.13702496099844], "label": "Triple jump"}, {"segment": [51.78989391575663, 56.19754446177847], "label": "Triple jump"}, {"segment": [57.483109204368176, 63.37074102964119], "label": "Triple jump"}, {"segment": [79.90223868954759, 83.94327145085803], "label": "Triple jump"}, {"segment": [84.31063806552262, 114.43470046801872], "label": "Triple jump"}]}, "ffUtqOyJ7fM": {"duration": 84.8, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ffUtqOyJ7fM", "annotations": [{"segment": [27.10125585023401, 77.20552886115445], "label": "Washing face"}]}, "LuH7I7Rxju8": {"duration": 23.69, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LuH7I7Rxju8", "annotations": []}, "1FxlkazrYaY": {"duration": 158.616, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1FxlkazrYaY", "annotations": []}, "VrNHEv6aR38": {"duration": 14.56, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VrNHEv6aR38", "annotations": [{"segment": [7.467234940000001, 14.036515413416536], "label": "Playing water polo"}, {"segment": [1.158353213728549, 6.586912679999999], "label": "Playing water polo"}]}, "HBIUe_l_zuI": {"duration": 44.072, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HBIUe_l_zuI", "annotations": []}, "4cqesj6HwTU": {"duration": 75.63, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=4cqesj6HwTU", "annotations": [{"segment": [42.38903198127925, 65.38169188767552], "label": "Drinking beer"}]}, "KQvqRh1mY8c": {"duration": 13.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KQvqRh1mY8c", "annotations": [{"segment": [0.9576708268330733, 9.82121996879875], "label": "Pole vault"}]}, "W50sQxSWDwM": {"duration": 116.28999999999999, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=W50sQxSWDwM", "annotations": [{"segment": [10.340523204601652, 111.02456493361775], "label": "Horseback riding"}]}, "5asz3rt3QyQ": {"duration": 188.454, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5asz3rt3QyQ", "annotations": [{"segment": [0.01, 188.421], "label": "Building sandcastles"}]}, "uK0dxEMBXfY": {"duration": 94.16, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uK0dxEMBXfY", "annotations": [{"segment": [43.92018183040247, 91.21883918622052], "label": "Mowing the lawn"}]}, "Wx-fRCjU9r0": {"duration": 90.674, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Wx-fRCjU9r0", "annotations": [{"segment": [0, 90.601], "label": "Playing ice hockey"}]}, "T4g31MwZ2ds": {"duration": 150.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T4g31MwZ2ds", "annotations": [{"segment": [18.035568168642744, 138.66306955631825], "label": "Breakdancing"}]}, "_rs8dhCdI1w": {"duration": 229.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_rs8dhCdI1w", "annotations": [{"segment": [52.99047222841295, 216.61645742020156], "label": "Polishing forniture"}]}, "Kyo1nkGKRqw": {"duration": 93.72, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Kyo1nkGKRqw", "annotations": [{"segment": [37.15706035, 82.5891552], "label": "Grooming horse"}]}, "_b_9BQvJ_v4": {"duration": 114.057, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_b_9BQvJ_v4", "annotations": [{"segment": [6.761534801872075, 110.31977834633385], "label": "Disc dog"}]}, "njGsSwBc32M": {"duration": 123.415, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=njGsSwBc32M", "annotations": []}, "klGP18026Ek": {"duration": 86.356, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=klGP18026Ek", "annotations": [{"segment": [10.675837276815253, 57.46665970672179], "label": "Rock-paper-scissors"}]}, "cb5Xy7k7z1w": {"duration": 108.02000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cb5Xy7k7z1w", "annotations": [{"segment": [15.166506415358654, 81.05655095319459], "label": "Using the balance beam"}]}, "bpfak_VIJ8o": {"duration": 235.126, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bpfak_VIJ8o", "annotations": [{"segment": [76.64887519500779, 123.59172698907955], "label": "Playing beach volleyball"}]}, "hSlydQ9rJuk": {"duration": 53.29, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hSlydQ9rJuk", "annotations": [{"segment": [6.567696920436817, 32.83848460218409], "label": "Disc dog"}]}, "XJQlO4G5Fns": {"duration": 34.97, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=XJQlO4G5Fns", "annotations": [{"segment": [3.000481806552262, 32.07787822277691], "label": "Blow-drying hair"}]}, "TN8tBV7CY6U": {"duration": 112.199, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=TN8tBV7CY6U", "annotations": [{"segment": [0.01, 102.01226677067083], "label": "Playing pool"}]}, "a1nRXQZ6-Fo": {"duration": 187.62, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=a1nRXQZ6-Fo", "annotations": [{"segment": [0.01, 0.02], "label": "Playing saxophone"}, {"segment": [0.2, 187.381], "label": "Playing saxophone"}, {"segment": [0.2, 187.381], "label": "Playing saxophone"}, {"segment": [0.2, 187.381], "label": "Playing saxophone"}]}, "vm64rRECzZM": {"duration": 70.52, "subset": "testing", "resolution": "320x188", "url": "https://www.youtube.com/watch?v=vm64rRECzZM", "annotations": []}, "3Z2rUp7ojhk": {"duration": 157.734, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3Z2rUp7ojhk", "annotations": []}, "Yn-Ajw7BzRA": {"duration": 158.24, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Yn-Ajw7BzRA", "annotations": []}, "3nrianTc060": {"duration": 225.095, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=3nrianTc060", "annotations": [{"segment": [24.0435, 203.78629095163808], "label": "Skiing"}]}, "6KDIwk_WjXI": {"duration": 126.18, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6KDIwk_WjXI", "annotations": []}, "k3_4Zbb2oE4": {"duration": 140.899, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k3_4Zbb2oE4", "annotations": []}, "BMANcV_mbVw": {"duration": 48.553, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BMANcV_mbVw", "annotations": []}, "XKOeu8n7R7s": {"duration": 75.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XKOeu8n7R7s", "annotations": [{"segment": [7.869834633385335, 14.565067082683308], "label": "Layup drill in basketball"}, {"segment": [26.898390015600626, 35.00314508580343], "label": "Layup drill in basketball"}, {"segment": [43.81266146645866, 47.92376911076443], "label": "Layup drill in basketball"}, {"segment": [56.49836505460218, 67.30470514820593], "label": "Layup drill in basketball"}]}, "5WWvCSCGXmc": {"duration": 231.202, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5WWvCSCGXmc", "annotations": [{"segment": [3.7446647238686817, 186.29704322854158], "label": "BMX"}]}, "YvPoAOrjX5I": {"duration": 62.67, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=YvPoAOrjX5I", "annotations": []}, "6xBh3vzhgo4": {"duration": 6.385, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6xBh3vzhgo4", "annotations": []}, "uBCLX7hfZLQ": {"duration": 149.328, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=uBCLX7hfZLQ", "annotations": [{"segment": [0.01, 140.3585582334858], "label": "Clipping cat claws"}]}, "HImOluKZgp0": {"duration": 32.07, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HImOluKZgp0", "annotations": [{"segment": [0, 27.26424652106084], "label": "Windsurfing"}]}, "qf5wMRlZYnQ": {"duration": 231.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qf5wMRlZYnQ", "annotations": [{"segment": [146.03526328, 225.1369078283931], "label": "Belly dance"}, {"segment": [27.818778159999997, 116.76148447999998], "label": "Belly dance"}]}, "V4BNtCtuSQQ": {"duration": 34.88, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=V4BNtCtuSQQ", "annotations": [{"segment": [1.5778700936037442, 34.16904892355694], "label": "Mowing the lawn"}]}, "K8Fq9kjfK_U": {"duration": 150.303, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K8Fq9kjfK_U", "annotations": [{"segment": [36.696385254049616, 140.80625779589008], "label": "Baton twirling"}]}, "zFsMqb8__9E": {"duration": 100.15, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zFsMqb8__9E", "annotations": []}, "saotMUaxVCw": {"duration": 133.492, "subset": "testing", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=saotMUaxVCw", "annotations": []}, "2_Tgrk8iJpE": {"duration": 70.89, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=2_Tgrk8iJpE", "annotations": [{"segment": [40.9829268, 60.690622464898595], "label": "Dodgeball"}, {"segment": [10.1854752, 37.0123779], "label": "Dodgeball"}]}, "V6lFpCjRKC8": {"duration": 64.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V6lFpCjRKC8", "annotations": [{"segment": [16.082049921996877, 62.77510452418096], "label": "Javelin throw"}]}, "5SNtTQZnN4g": {"duration": 186.46, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5SNtTQZnN4g", "annotations": [{"segment": [0.01, 186.4562358276644], "label": "Washing face"}]}, "W6H9iZgobyI": {"duration": 63.41, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W6H9iZgobyI", "annotations": []}, "rmnuuQrkLac": {"duration": 179.119, "subset": "testing", "resolution": "647x480", "url": "https://www.youtube.com/watch?v=rmnuuQrkLac", "annotations": []}, "DRSH-_Ye9eE": {"duration": 40.64, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DRSH-_Ye9eE", "annotations": [{"segment": [0, 40.601], "label": "Playing lacrosse"}]}, "XCfrjEJkl3M": {"duration": 111.5, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XCfrjEJkl3M", "annotations": []}, "jqLwYHwYYPM": {"duration": 210.234, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=jqLwYHwYYPM", "annotations": [{"segment": [113.39270478843869, 142.2097533616474], "label": "Laying tile"}, {"segment": [163.16760686943556, 165.7873385579091], "label": "Laying tile"}]}, "owflykBauOA": {"duration": 25.96, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=owflykBauOA", "annotations": [{"segment": [2.510903276131045, 9.922117784711388], "label": "Skateboarding"}, {"segment": [15.672896255850233, 17.576322932917314], "label": "Skateboarding"}]}, "2kKy5PPtn5k": {"duration": 213.531, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2kKy5PPtn5k", "annotations": []}, "BS9UPqgR89E": {"duration": 140.48, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=BS9UPqgR89E", "annotations": [{"segment": [7.4513957429045465, 40.54435872074883], "label": "Ballet"}, {"segment": [60.92611742901716, 130.1802652979719], "label": "Ballet"}]}, "3792wPKkdXI": {"duration": 53.801, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3792wPKkdXI", "annotations": [{"segment": [17.70326209048362, 45.55863182527301], "label": "Arm wrestling"}]}, "ZQU0p5OKC04": {"duration": 22.13, "subset": "training", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=ZQU0p5OKC04", "annotations": [{"segment": [12.669581443057723, 18.52755312], "label": "Bungee jumping"}]}, "7rpq2RXAoKE": {"duration": 30.883, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7rpq2RXAoKE", "annotations": [{"segment": [0.01, 26.33536193447738], "label": "Using the monkey bar"}]}, "gdmGZK_vFAc": {"duration": 36.432, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=gdmGZK_vFAc", "annotations": [{"segment": [0, 15.516327167372408], "label": "Peeling potatoes"}]}, "VDX1IQnUMgo": {"duration": 52.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VDX1IQnUMgo", "annotations": [{"segment": [21.009879875195008, 46.611861154446174], "label": "Playing water polo"}]}, "0KTued0g034": {"duration": 56.82, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0KTued0g034", "annotations": [{"segment": [24.286402496099846, 29.338683307332293], "label": "Drum corps"}]}, "Pz9rnMX6PX0": {"duration": 151.21, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Pz9rnMX6PX0", "annotations": []}, "5BAvlsHfTLk": {"duration": 151.766, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5BAvlsHfTLk", "annotations": [{"segment": [18.53335846912117, 130.82092353921465], "label": "Assembling bicycle"}]}, "d09u-0k3LdE": {"duration": 10.73, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=d09u-0k3LdE", "annotations": []}, "JH9qiuPCkY4": {"duration": 149.12, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JH9qiuPCkY4", "annotations": []}, "yl3bjdUZrmM": {"duration": 38.522, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yl3bjdUZrmM", "annotations": [{"segment": [5.378643488596687, 8.08298900702028], "label": "Applying sunscreen"}, {"segment": [8.804147877535103, 11.628686787051484], "label": "Applying sunscreen"}, {"segment": [16.556605735569423, 18.299406339313574], "label": "Applying sunscreen"}, {"segment": [31.580748871294855, 34.58557749843994], "label": "Applying sunscreen"}]}, "r8hXEpP7HH0": {"duration": 75.024, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r8hXEpP7HH0", "annotations": [{"segment": [46.31223088923557, 72.041248049922], "label": "Sumo"}]}, "f8i_Yk0YCeA": {"duration": 188.55, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f8i_Yk0YCeA", "annotations": []}, "KBfCXpgGhdw": {"duration": 132.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KBfCXpgGhdw", "annotations": [{"segment": [56.63503100468018, 109.34281533385334], "label": "Doing motocross"}]}, "WsqMDMOjuN4": {"duration": 227.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WsqMDMOjuN4", "annotations": [{"segment": [29.492131547581906, 193.65315390846925], "label": "Playing water polo"}]}, "Bs3OMhhUlY4": {"duration": 15.186, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=Bs3OMhhUlY4", "annotations": [{"segment": [2.9801653666146644, 10.66709984399376], "label": "Doing a powerbomb"}]}, "g6L-l5sgFEQ": {"duration": 150.234, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g6L-l5sgFEQ", "annotations": [{"segment": [47.10897014656096, 127.9676486302652], "label": "Painting furniture"}]}, "9WhPG89P-tg": {"duration": 60.05, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9WhPG89P-tg", "annotations": [{"segment": [14.70093135725429, 60.021], "label": "Getting a haircut"}]}, "vCbul5FHGvM": {"duration": 235.869, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vCbul5FHGvM", "annotations": []}, "oD45QIOK4z0": {"duration": 212.742, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oD45QIOK4z0", "annotations": [{"segment": [3.6394249194623414, 141.60690543985467], "label": "Futsal"}, {"segment": [145.2463353823119, 199.17606998417816], "label": "Futsal"}]}, "yePMTMv3nMA": {"duration": 202.386, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yePMTMv3nMA", "annotations": []}, "nFhBmNJ9zN0": {"duration": 193.77, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nFhBmNJ9zN0", "annotations": [{"segment": [27.3331962, 185.17160686427457], "label": "Playing polo"}]}, "EjRE-fbECtg": {"duration": 80.81, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EjRE-fbECtg", "annotations": [{"segment": [7.060740900000001, 42.3660615], "label": "Cheerleading"}, {"segment": [49.821130800000006, 74.03815851482058], "label": "Cheerleading"}]}, "Q2wd5aLtZ1E": {"duration": 62.253, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Q2wd5aLtZ1E", "annotations": [{"segment": [12.12082573987539, 27.44154947507788], "label": "Roof shingle removal"}]}, "0McanL-Gob8": {"duration": 228.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0McanL-Gob8", "annotations": [{"segment": [10.880553822152885, 211.7248751950078], "label": "Getting a tattoo"}]}, "PsddM2OmOGo": {"duration": 107.207, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PsddM2OmOGo", "annotations": [{"segment": [13.212661339106626, 15.721394268330732], "label": "Hanging wallpaper"}, {"segment": [22.57859815132605, 26.425322280811233], "label": "Hanging wallpaper"}, {"segment": [30.773793035881436, 80.11221121840873], "label": "Hanging wallpaper"}]}, "BtdPw9zydjM": {"duration": 185.11, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BtdPw9zydjM", "annotations": []}, "wcxEkMoOmBk": {"duration": 86.94, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wcxEkMoOmBk", "annotations": []}, "exc6YVfF940": {"duration": 171.25, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=exc6YVfF940", "annotations": []}, "8AQopjogplo": {"duration": 212.14, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8AQopjogplo", "annotations": [{"segment": [86.53065348, 155.96846502], "label": "Playing guitarra"}, {"segment": [20.21569524, 73.43803790999999], "label": "Playing guitarra"}]}, "y9bLCC26MGQ": {"duration": 36.87, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=y9bLCC26MGQ", "annotations": [{"segment": [8.743743563182527, 11.274827226209046], "label": "Triple jump"}, {"segment": [15.9918467800312, 36.873287], "label": "Triple jump"}]}, "bV_vdBbOubE": {"duration": 154.11, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=bV_vdBbOubE", "annotations": [{"segment": [5.770140617869613, 58.18225123018526], "label": "Hopscotch"}]}, "gB_xHRJY7sw": {"duration": 25.542, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gB_xHRJY7sw", "annotations": [{"segment": [24.14050979817471, 25.521], "label": "Kite flying"}]}, "CtooIi6Mk7U": {"duration": 194.909, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CtooIi6Mk7U", "annotations": [{"segment": [72.34346021840875, 75.38310140405616], "label": "Kite flying"}]}, "cwCNZAsvf8U": {"duration": 69.94, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cwCNZAsvf8U", "annotations": [{"segment": [5.84891494, 69.938502], "label": "Shaving legs"}]}, "7yg2zbiTJJ4": {"duration": 80.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7yg2zbiTJJ4", "annotations": [{"segment": [0.010000000000000002, 79.88536349453977], "label": "Wrapping presents"}]}, "bZpI0-7s2Jo": {"duration": 184.32, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bZpI0-7s2Jo", "annotations": []}, "LfhnPDx1Iy0": {"duration": 91.46000000000001, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LfhnPDx1Iy0", "annotations": []}, "RRIGf87R6jc": {"duration": 113.801, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RRIGf87R6jc", "annotations": [{"segment": [27.120798525017484, 87.21197824299065], "label": "Baking cookies"}]}, "9qNM5tNMxK4": {"duration": 207.1, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=9qNM5tNMxK4", "annotations": [{"segment": [0, 207.098775], "label": "Playing squash"}]}, "aR05GUXDiYg": {"duration": 143.453, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=aR05GUXDiYg", "annotations": [{"segment": [86.56254304929594, 96.15581373107744], "label": "Table soccer"}]}, "5gc896my38M": {"duration": 206.867, "subset": "training", "resolution": "634x480", "url": "https://www.youtube.com/watch?v=5gc896my38M", "annotations": [{"segment": [5.80904581489382, 9.036293489834831], "label": "Applying sunscreen"}, {"segment": [18.395311747163763, 21.622559422104775], "label": "Applying sunscreen"}, {"segment": [33.563375819386515, 37.11334826182163], "label": "Applying sunscreen"}, {"segment": [49.37688942659747, 51.31323803156207], "label": "Applying sunscreen"}, {"segment": [61.64043059137331, 65.51312780130253], "label": "Applying sunscreen"}, {"segment": [69.06310024373765, 73.58124698865505], "label": "Applying sunscreen"}, {"segment": [95.84925594574803, 101.01285222565365], "label": "Applying sunscreen"}, {"segment": [105.20827420307697, 107.14462280804158], "label": "Applying sunscreen"}, {"segment": [117.1490906003587, 119.73088874031151], "label": "Applying sunscreen"}, {"segment": [143.28979676738092, 147.1624939773101], "label": "Applying sunscreen"}, {"segment": [152.00336548972163, 154.90788839716853], "label": "Applying sunscreen"}]}, "ysM4zvujAh8": {"duration": 29.815, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=ysM4zvujAh8", "annotations": []}, "OhpILjGpQTs": {"duration": 81.642, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OhpILjGpQTs", "annotations": [{"segment": [14.093592078746559, 74.5309845571588], "label": "Camel ride"}]}, "0vQs3ztG7vg": {"duration": 37.965, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=0vQs3ztG7vg", "annotations": [{"segment": [1.1850858034321372, 36.2636255850234], "label": "Using the monkey bar"}]}, "2dA1fAU3o6o": {"duration": 64.366, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2dA1fAU3o6o", "annotations": [{"segment": [4.050428198464616, 64.365713], "label": "Blowing leaves"}]}, "bsU2nEf2Tpc": {"duration": 191.61, "subset": "testing", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=bsU2nEf2Tpc", "annotations": []}, "GKouuFdpCck": {"duration": 18.16, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=GKouuFdpCck", "annotations": []}, "8xsLp6lqijo": {"duration": 127.25, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=8xsLp6lqijo", "annotations": [{"segment": [96.47619360374415, 110.96747371294852], "label": "Skateboarding"}]}, "kvqX0QOgYDc": {"duration": 99.242, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kvqX0QOgYDc", "annotations": [{"segment": [0.2, 99.00099999999999], "label": "Scuba diving"}]}, "SOpFjnAT_SE": {"duration": 132.49, "subset": "training", "resolution": "400x266", "url": "https://www.youtube.com/watch?v=SOpFjnAT_SE", "annotations": [{"segment": [82.6226348, 129.96553042121684], "label": "Smoking hookah"}, {"segment": [3.719204368174727, 75.41719968798752], "label": "Smoking hookah"}]}, "PDaGlIxn6Pk": {"duration": 200.37, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=PDaGlIxn6Pk", "annotations": [{"segment": [0, 174.73327259438378], "label": "Playing field hockey"}]}, "5lre5zbUV1Q": {"duration": 77.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5lre5zbUV1Q", "annotations": [{"segment": [12.65059771450858, 71.44575661622464], "label": "Cleaning sink"}]}, "G12fEYNjIR0": {"duration": 48.832, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=G12fEYNjIR0", "annotations": [{"segment": [9.370062296369406, 15.46441142277691], "label": "Curling"}, {"segment": [30.471746645865835, 36.185199141965676], "label": "Curling"}]}, "tnt6Wpv_kHc": {"duration": 184.76, "subset": "validation", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=tnt6Wpv_kHc", "annotations": [{"segment": [105.78370097659908, 166.60212306396255], "label": "Spinning"}]}, "EO9ZKeBSGSw": {"duration": 15.54, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EO9ZKeBSGSw", "annotations": [{"segment": [3.1019102652106083, 14.394802324492979], "label": "Hopscotch"}]}, "zYjLA99koBk": {"duration": 105.628, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zYjLA99koBk", "annotations": []}, "rHta6u5VWHk": {"duration": 167.44, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rHta6u5VWHk", "annotations": []}, "CfUOUDzUSu0": {"duration": 102.586, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CfUOUDzUSu0", "annotations": []}, "MiEIeitUNFs": {"duration": 35.74, "subset": "testing", "resolution": "480x352", "url": "https://www.youtube.com/watch?v=MiEIeitUNFs", "annotations": []}, "2vrR_5u2buU": {"duration": 42.59, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2vrR_5u2buU", "annotations": []}, "VXZfnNu9o8E": {"duration": 45.094, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VXZfnNu9o8E", "annotations": []}, "7IwHwDuQdyU": {"duration": 57.17, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7IwHwDuQdyU", "annotations": []}, "9BS1II6O7I0": {"duration": 101.379, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9BS1II6O7I0", "annotations": [{"segment": [0.01, 101.378231], "label": "Playing rubik cube"}]}, "NjTk2naIaac": {"duration": 234.986, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NjTk2naIaac", "annotations": [{"segment": [14.61807506180984, 149.10441693292844], "label": "Riding bumper cars"}]}, "YoXZfvf5Teg": {"duration": 231.11, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YoXZfvf5Teg", "annotations": [{"segment": [94.288496099844, 145.4891326053042], "label": "Playing violin"}, {"segment": [204.2616942277691, 209.30964430577222], "label": "Playing violin"}]}, "DUQUohpaGjQ": {"duration": 25.03, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DUQUohpaGjQ", "annotations": []}, "xcworhtkwNY": {"duration": 51.317, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xcworhtkwNY", "annotations": [{"segment": [0.01, 50.51553583307332], "label": "Playing congas"}]}, "BxyOAXTWkzk": {"duration": 132.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BxyOAXTWkzk", "annotations": [{"segment": [40.223469636505456, 116.53431948673948], "label": "Horseback riding"}]}, "Z1POv1Qeno0": {"duration": 43.399, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=Z1POv1Qeno0", "annotations": [{"segment": [0.2, 7.5828182964118565], "label": "River tubing"}, {"segment": [18.88934200624025, 21.123565254290174], "label": "River tubing"}, {"segment": [33.58105245553822, 43.198094], "label": "River tubing"}]}, "0j9TxzwRr6E": {"duration": 176.867, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=0j9TxzwRr6E", "annotations": [{"segment": [0.20000000000000004, 160.545744149766], "label": "Table soccer"}]}, "7V4Wsi6hklI": {"duration": 15.16, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=7V4Wsi6hklI", "annotations": []}, "Mk7Yjq1U8OI": {"duration": 152.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Mk7Yjq1U8OI", "annotations": []}, "BOqca4eckEs": {"duration": 81.9, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=BOqca4eckEs", "annotations": [{"segment": [5.238327517940717, 20.825545985959437], "label": "Bungee jumping"}]}, "VEzeFMRjtyI": {"duration": 120.33, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VEzeFMRjtyI", "annotations": []}, "VLjfvly5xuE": {"duration": 197.718, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VLjfvly5xuE", "annotations": []}, "1mksQqxFtv4": {"duration": 13.978, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1mksQqxFtv4", "annotations": [{"segment": [0.2, 12.942112149532711], "label": "Rope skipping"}]}, "SdsoRu3953g": {"duration": 185.71, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SdsoRu3953g", "annotations": [{"segment": [18.252623007800313, 137.6189829953198], "label": "Removing curlers"}]}, "V66cwBOS80A": {"duration": 174.057, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=V66cwBOS80A", "annotations": [{"segment": [30.683912572829446, 174.0565306122449], "label": "Rafting"}, {"segment": [57.294738875195016, 68.97091788767551], "label": "Rafting"}, {"segment": [80.37555785335414, 90.69404163182527], "label": "Rafting"}]}, "F1gyHVC3zxk": {"duration": 63.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F1gyHVC3zxk", "annotations": []}, "xD2qZzNCu3c": {"duration": 220.287, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xD2qZzNCu3c", "annotations": []}, "YYUm4o1aDmY": {"duration": 299.608, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YYUm4o1aDmY", "annotations": [{"segment": [62.63236821717766, 184.6252645207849], "label": "Putting on shoes"}]}, "BWsjIONsXlM": {"duration": 74.467, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=BWsjIONsXlM", "annotations": [{"segment": [25.767585023400937, 51.6651248049922], "label": "Waxing skis"}]}, "lcEGoZAC7GI": {"duration": 59.91, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lcEGoZAC7GI", "annotations": [{"segment": [37.2435755626365, 40.51465497215288], "label": "Spinning"}, {"segment": [49.954055553900155, 53.78589143361935], "label": "Spinning"}]}, "1OJa2iiFxfk": {"duration": 234.34, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=1OJa2iiFxfk", "annotations": [{"segment": [16.451010772230887, 223.3681684851794], "label": "Polishing shoes"}]}, "JoZYs4Q3LG0": {"duration": 204.591, "subset": "testing", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=JoZYs4Q3LG0", "annotations": []}, "AA46-AbEGdk": {"duration": 173.18, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AA46-AbEGdk", "annotations": [{"segment": [27.968897035881433, 110.92978003120123], "label": "Using the balance beam"}, {"segment": [134.43986739469577, 159.0308783151326], "label": "Using the balance beam"}]}, "scBelfrnHoI": {"duration": 176.077, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=scBelfrnHoI", "annotations": [{"segment": [6.657114697879022, 43.682638766264574], "label": "Powerbocking"}, {"segment": [52.45905930209012, 95.79263569772874], "label": "Powerbocking"}, {"segment": [106.21463508402158, 146.53131692047018], "label": "Powerbocking"}, {"segment": [150.0967377631493, 161.61578971642032], "label": "Powerbocking"}]}, "UZIs0_W2Akw": {"duration": 99.75, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=UZIs0_W2Akw", "annotations": [{"segment": [16.09398768, 87.1982108], "label": "Grooming horse"}]}, "xsRNDo8Upys": {"duration": 204.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xsRNDo8Upys", "annotations": [{"segment": [4.77896255850234, 179.33037144999997], "label": "Ballet"}]}, "Il9Ara3IqeY": {"duration": 38.45, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Il9Ara3IqeY", "annotations": []}, "Ifcm-8vzRU4": {"duration": 196.0, "subset": "testing", "resolution": "512x272", "url": "https://www.youtube.com/watch?v=Ifcm-8vzRU4", "annotations": []}, "DTI__3fkSzM": {"duration": 22.036, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DTI__3fkSzM", "annotations": []}, "4kbtdUz2M3M": {"duration": 234.592, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4kbtdUz2M3M", "annotations": [{"segment": [104.66926585090613, 216.29208337909515], "label": "Surfing"}]}, "ZnoSOqba2HM": {"duration": 14.07, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZnoSOqba2HM", "annotations": [{"segment": [3.7977741083765837, 7.567652694046365], "label": "Brushing teeth"}, {"segment": [10.153964747005867, 13.901925433921754], "label": "Brushing teeth"}]}, "QJqUOwlTiUU": {"duration": 87.749, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QJqUOwlTiUU", "annotations": []}, "PF0c0UIsG0E": {"duration": 80.83, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=PF0c0UIsG0E", "annotations": [{"segment": [4.473867617784712, 75.8037006224649], "label": "Smoking hookah"}]}, "39HCogCoD7Y": {"duration": 184.112, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=39HCogCoD7Y", "annotations": [{"segment": [42.49308580343214, 52.82924180967239], "label": "Spread mulch"}, {"segment": [66.03655226209048, 72.92732293291732], "label": "Spread mulch"}]}, "FgZpY1V-dWo": {"duration": 228.28, "subset": "testing", "resolution": "2048x1556", "url": "https://www.youtube.com/watch?v=FgZpY1V-dWo", "annotations": []}, "-e9e4ke_wJk": {"duration": 28.21, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-e9e4ke_wJk", "annotations": [{"segment": [0.09976284551174841, 21.433043219343524], "label": "Hand washing clothes"}]}, "eyfUkLbsixg": {"duration": 197.649, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eyfUkLbsixg", "annotations": [{"segment": [28.05919752574103, 39.776225063962556], "label": "Running a marathon"}, {"segment": [87.87770653666146, 90.65279200624025], "label": "Running a marathon"}, {"segment": [96.81964860530421, 102.06147671450857], "label": "Running a marathon"}, {"segment": [106.99496199375974, 112.23679010296412], "label": "Running a marathon"}, {"segment": [145.2294729079563, 154.47975780655224], "label": "Running a marathon"}, {"segment": [163.1133570452418, 168.66352798439937], "label": "Running a marathon"}, {"segment": [176.3720987332293, 177.60547005304213], "label": "Running a marathon"}]}, "IvkpJMxlRKc": {"duration": 29.351, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IvkpJMxlRKc", "annotations": [{"segment": [0.01, 29.349433], "label": "Peeling potatoes"}]}, "GOhmWfR8QBM": {"duration": 52.338, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=GOhmWfR8QBM", "annotations": []}, "Mk9PMED8K4g": {"duration": 210.88, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mk9PMED8K4g", "annotations": [{"segment": [25.661346323240686, 186.86723989231677], "label": "Doing motocross"}]}, "R3ffxWaI9QU": {"duration": 26.96, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=R3ffxWaI9QU", "annotations": [{"segment": [1.4064599235569422, 25.631157711388454], "label": "High jump"}]}, "ZWudhOEyE_0": {"duration": 160.149, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ZWudhOEyE_0", "annotations": [{"segment": [17.988545822152886, 151.15375308892354], "label": "Grooming dog"}]}, "7hfaWQgcDyo": {"duration": 54.1, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7hfaWQgcDyo", "annotations": [{"segment": [26.281669559999997, 41.85136063806552], "label": "Making a sandwich"}, {"segment": [2.233758422776911, 23.614440959999996], "label": "Making a sandwich"}]}, "fp0dd844oUU": {"duration": 107.6, "subset": "training", "resolution": "614x480", "url": "https://www.youtube.com/watch?v=fp0dd844oUU", "annotations": [{"segment": [17.476445088775492, 91.72770048], "label": "Ping-pong"}]}, "Rnux3rCLdmI": {"duration": 226.67000000000002, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rnux3rCLdmI", "annotations": [{"segment": [174.82696489859595, 207.72478549141965], "label": "Polishing shoes"}]}, "kuNn6nQt7fM": {"duration": 181.05, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kuNn6nQt7fM", "annotations": []}, "Tsht1n005fI": {"duration": 102.818, "subset": "validation", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=Tsht1n005fI", "annotations": [{"segment": [18.941649628055266, 20.222867693066746], "label": "Hurling"}, {"segment": [46.327692440730296, 48.890129225777024], "label": "Hurling"}]}, "-i3wp-ZNVko": {"duration": 28.52, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-i3wp-ZNVko", "annotations": []}, "5WqnKjOz1z4": {"duration": 143.476, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5WqnKjOz1z4", "annotations": [{"segment": [5.592082683307333, 18.118347893915757], "label": "Raking leaves"}, {"segment": [45.40771138845554, 143.181], "label": "Raking leaves"}]}, "UXi0Cy16-0Y": {"duration": 101.031, "subset": "validation", "resolution": "426x206", "url": "https://www.youtube.com/watch?v=UXi0Cy16-0Y", "annotations": [{"segment": [0, 80.01277379095164], "label": "Rope skipping"}]}, "gxcTf2r3CRk": {"duration": 161.797, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gxcTf2r3CRk", "annotations": []}, "47OMV7rZrQA": {"duration": 228.79, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=47OMV7rZrQA", "annotations": [{"segment": [19.987562982839314, 220.57703434633387], "label": "Tai chi"}]}, "xMEwcb1P6dQ": {"duration": 48.67, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xMEwcb1P6dQ", "annotations": [{"segment": [3.438172079563183, 43.8650305975039], "label": "Playing guitarra"}]}, "DQ9EaCSFwGI": {"duration": 205.241, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DQ9EaCSFwGI", "annotations": [{"segment": [0, 110.78541019344773], "label": "Longboarding"}, {"segment": [116.54881303588144, 183.46832381747268], "label": "Longboarding"}]}, "Qz_PtO18pXE": {"duration": 8.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Qz_PtO18pXE", "annotations": [{"segment": [5.0495663026521065, 8.701], "label": "Springboard diving"}]}, "4LJOkyEM-aY": {"duration": 189.731, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4LJOkyEM-aY", "annotations": []}, "Q3cc0HlO7so": {"duration": 143.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Q3cc0HlO7so", "annotations": [{"segment": [48.424050720000004, 113.51634624000002], "label": "Playing violin"}]}, "OnvOEKfeVuY": {"duration": 231.016, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OnvOEKfeVuY", "annotations": []}, "yUVaugWiZ0g": {"duration": 210.327, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yUVaugWiZ0g", "annotations": []}, "RisNywzGXt8": {"duration": 63.53, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RisNywzGXt8", "annotations": [{"segment": [10.005617784711388, 62.41127925117005], "label": "Putting on shoes"}]}, "Irg5qYkjJoY": {"duration": 198.345, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Irg5qYkjJoY", "annotations": [{"segment": [98.11520437679752, 172.06905684997284], "label": "Slacklining"}]}, "3vVhq-v5-wM": {"duration": 30.349, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=3vVhq-v5-wM", "annotations": [{"segment": [9.838951638065524, 22.18494383775351], "label": "Playing ten pins"}]}, "YX0oF4tqJT8": {"duration": 142.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YX0oF4tqJT8", "annotations": []}, "8fqxUtHLyoE": {"duration": 79.645, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8fqxUtHLyoE", "annotations": [{"segment": [0.20000000000000004, 76.43877691107645], "label": "Rope skipping"}]}, "ZsXJw0d3QbU": {"duration": 114.173, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZsXJw0d3QbU", "annotations": [{"segment": [0.01, 114.141], "label": "Riding bumper cars"}]}, "kk1Iqyox4c8": {"duration": 91.67, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kk1Iqyox4c8", "annotations": [{"segment": [6.290489859594384, 82.92009360374415], "label": "Playing saxophone"}]}, "N7kI8J6vfL8": {"duration": 80.76, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=N7kI8J6vfL8", "annotations": [{"segment": [0.01, 80.75900226757369], "label": "Fun sliding down"}]}, "ma-A4w_7eAo": {"duration": 78.14, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ma-A4w_7eAo", "annotations": []}, "VqeeqABnpQk": {"duration": 98.13, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VqeeqABnpQk", "annotations": [{"segment": [8.18281053, 91.39131936973479], "label": "Tango"}]}, "JTGS1YulUQw": {"duration": 76.46000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JTGS1YulUQw", "annotations": [{"segment": [49.742902462558504, 71.45323399297972], "label": "Ironing clothes"}]}, "HBklS2vYEPo": {"duration": 102.887, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HBklS2vYEPo", "annotations": []}, "FbPUX8cWmeU": {"duration": 131.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FbPUX8cWmeU", "annotations": [{"segment": [6.368317078261362, 125.92833275039001], "label": "Ballet"}]}, "a7IHiGxI9EE": {"duration": 90.071, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a7IHiGxI9EE", "annotations": []}, "0NgQr2-AieQ": {"duration": 59.05, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0NgQr2-AieQ", "annotations": [{"segment": [2.763572995319813, 51.58669591263651], "label": "Washing face"}]}, "UwZbkuVsPno": {"duration": 57.68, "subset": "training", "resolution": "1920x1072", "url": "https://www.youtube.com/watch?v=UwZbkuVsPno", "annotations": [{"segment": [34.46304863962558, 48.950126527301094], "label": "Layup drill in basketball"}]}, "9vzbohxkIKA": {"duration": 50.085, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=9vzbohxkIKA", "annotations": [{"segment": [0.01, 43.42264586583463], "label": "Playing blackjack"}]}, "Wd8Kz1XRpdg": {"duration": 145.07999999999998, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Wd8Kz1XRpdg", "annotations": [{"segment": [4.526623244929797, 62.4674007800312], "label": "Drinking beer"}, {"segment": [86.45850397815911, 96.19074395475819], "label": "Drinking beer"}]}, "6b8h8ztnj9Q": {"duration": 236.287, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=6b8h8ztnj9Q", "annotations": [{"segment": [12.163202808112326, 230.01457722308893], "label": "Using the rowing machine"}]}, "pCUun9uE3h8": {"duration": 81.363, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pCUun9uE3h8", "annotations": [{"segment": [49.27393925233645, 79.8206074766355], "label": "Drum corps"}]}, "j7fPZQE3-fQ": {"duration": 74.65, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j7fPZQE3-fQ", "annotations": [{"segment": [20.306090519999998, 62.34337824], "label": "Getting a piercing"}]}, "aFdYbgmbOeY": {"duration": 75.674, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aFdYbgmbOeY", "annotations": [{"segment": [10.152810482487327, 70.71550540608425], "label": "Playing rubik cube"}]}, "mnMmGnnbvSk": {"duration": 143.66, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mnMmGnnbvSk", "annotations": [{"segment": [40.137608424336975, 112.34045709828393], "label": "Polishing shoes"}]}, "0tqPeeUvNsE": {"duration": 210.03, "subset": "testing", "resolution": "698x480", "url": "https://www.youtube.com/watch?v=0tqPeeUvNsE", "annotations": []}, "122b9bus5Bs": {"duration": 160.218, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=122b9bus5Bs", "annotations": []}, "rrc9Ph5juXM": {"duration": 213.345, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rrc9Ph5juXM", "annotations": [{"segment": [11.649087487309016, 202.02845961310453], "label": "Making a cake"}]}, "in5XF1bbYr0": {"duration": 95.713, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=in5XF1bbYr0", "annotations": [{"segment": [12.853765562553727, 76.91106374820116], "label": "Disc dog"}]}, "sUy3fUESkGI": {"duration": 31.347, "subset": "testing", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=sUy3fUESkGI", "annotations": []}, "yRgei7gpr-I": {"duration": 229.55, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yRgei7gpr-I", "annotations": [{"segment": [17.896489859594382, 66.2170124804992], "label": "Shot put"}, {"segment": [104.51550078003119, 147.10914664586582], "label": "Shot put"}]}, "XIRDxB7dRPA": {"duration": 21.32, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=XIRDxB7dRPA", "annotations": []}, "baktSTMnMv4": {"duration": 95.411, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=baktSTMnMv4", "annotations": [{"segment": [7.419188532949469, 15.580298409766966], "label": "Playing pool"}, {"segment": [19.289893808320375, 29.676760924269914], "label": "Playing pool"}, {"segment": [36.502416457608184, 41.250698567756544], "label": "Playing pool"}, {"segment": [50.15372752428473, 65.14049180738235], "label": "Playing pool"}]}, "hg-wVv7XSFo": {"duration": 228.044, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hg-wVv7XSFo", "annotations": [{"segment": [28.460867196592623, 36.64336644929797], "label": "Braiding hair"}, {"segment": [39.84521407522968, 208.83161267706708], "label": "Braiding hair"}]}, "XELYXH6fqeA": {"duration": 153.484, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XELYXH6fqeA", "annotations": [{"segment": [111.66962012480501, 148.0550054602184], "label": "Tug of war"}]}, "mfJj5gBQg-4": {"duration": 27.074, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mfJj5gBQg-4", "annotations": [{"segment": [0.2, 26.17791107644306], "label": "Mooping floor"}]}, "wpSDjDdGOrc": {"duration": 108.28, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=wpSDjDdGOrc", "annotations": []}, "c6Cbq-WNq9M": {"duration": 192.773, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c6Cbq-WNq9M", "annotations": [{"segment": [89.01798853042122, 101.94965578315133], "label": "Trimming branches or hedges"}]}, "MAyYq3HilFc": {"duration": 5.11, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=MAyYq3HilFc", "annotations": [{"segment": [0.01, 5.0492932917316695], "label": "Javelin throw"}]}, "uqT5jtfx8x0": {"duration": 69.94, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=uqT5jtfx8x0", "annotations": [{"segment": [9.928866898595945, 45.60732267706708], "label": "Playing polo"}]}, "kU2HJk6l0lY": {"duration": 68.592, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kU2HJk6l0lY", "annotations": []}, "w6n_SrjLypA": {"duration": 120.164, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w6n_SrjLypA", "annotations": []}, "7ofmJdesasE": {"duration": 189.55, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7ofmJdesasE", "annotations": []}, "nvcMYiZhUuo": {"duration": 174.406, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nvcMYiZhUuo", "annotations": []}, "4p6OjaHHSDs": {"duration": 37.48, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=4p6OjaHHSDs", "annotations": [{"segment": [13.622686684867393, 17.71533933697348], "label": "Smoking a cigarette"}]}, "nF9pKALI9Ik": {"duration": 51.015, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nF9pKALI9Ik", "annotations": []}, "FmRLElPPWV8": {"duration": 178.562, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=FmRLElPPWV8", "annotations": [{"segment": [8.634588143525741, 94.7019344773791], "label": "Canoeing"}, {"segment": [108.35015444617784, 178.561], "label": "Canoeing"}]}, "ZwxvczODMbM": {"duration": 51.363, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZwxvczODMbM", "annotations": [{"segment": [0.2, 47.122266090342684], "label": "Arm wrestling"}]}, "JUvqA51RX0Y": {"duration": 53.87, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JUvqA51RX0Y", "annotations": [{"segment": [1.848902473105727, 50.844818010407494], "label": "Hopscotch"}]}, "JmL6BiuXr_g": {"duration": 141.131, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JmL6BiuXr_g", "annotations": [{"segment": [89.7988268330733, 102.56434633385334], "label": "Waxing skis"}]}, "gGg-kio0dmU": {"duration": 8.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gGg-kio0dmU", "annotations": [{"segment": [4.2313204711388455, 6.1178964773790945], "label": "Springboard diving"}]}, "b5NP9oI-urM": {"duration": 5.898, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=b5NP9oI-urM", "annotations": [{"segment": [0.01, 5.316894717980025], "label": "Decorating the Christmas tree"}]}, "Rx4gq8hrZ8s": {"duration": 224.143, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Rx4gq8hrZ8s", "annotations": [{"segment": [5.376111747634745, 222.1660509999712], "label": "Table soccer"}]}, "eVkp83uGf3Y": {"duration": 17.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eVkp83uGf3Y", "annotations": []}, "TH1kq-KBr9Y": {"duration": 38.057, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=TH1kq-KBr9Y", "annotations": [{"segment": [0.2, 26.869755070202807], "label": "Hanging wallpaper"}]}, "lGKUEUBeo8U": {"duration": 145.13, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lGKUEUBeo8U", "annotations": [{"segment": [4.754475842433698, 117.88786899], "label": "Doing step aerobics"}]}, "uWTge1GL818": {"duration": 14.58, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uWTge1GL818", "annotations": []}, "icOO9whIgX8": {"duration": 207.63, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=icOO9whIgX8", "annotations": [{"segment": [13.277667706708268, 198.8411700468019], "label": "Playing field hockey"}]}, "gCxG1PInEVo": {"duration": 59.722, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gCxG1PInEVo", "annotations": []}, "iTc3UCWgUF0": {"duration": 83.777, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iTc3UCWgUF0", "annotations": [{"segment": [6.140135725429017, 19.204254290171605], "label": "Putting on shoes"}]}, "ThRbaI4pGIs": {"duration": 125.504, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ThRbaI4pGIs", "annotations": [{"segment": [13.874970404984424, 95.57664953271028], "label": "Surfing"}]}, "vXcfhKnUjRc": {"duration": 232.363, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vXcfhKnUjRc", "annotations": [{"segment": [16.664845553822154, 223.88857722308893], "label": "Drum corps"}]}, "EDkYPikPWW8": {"duration": 66.386, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EDkYPikPWW8", "annotations": [{"segment": [41.11574287675508, 57.27205493915758], "label": "Hitting a pinata"}]}, "702l5pkas6o": {"duration": 44.211, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=702l5pkas6o", "annotations": []}, "pMXz0BKQOLQ": {"duration": 206.658, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pMXz0BKQOLQ", "annotations": [{"segment": [14.61151669046515, 175.53129862192932], "label": "Having an ice cream"}]}, "Vtnn6yJqHqM": {"duration": 32.65, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vtnn6yJqHqM", "annotations": [{"segment": [2.7883802699999998, 32.53], "label": "Playing piano"}]}, "L73C-2b0yEw": {"duration": 64.761, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L73C-2b0yEw", "annotations": [{"segment": [30.8647708049922, 39.85647162870515], "label": "Waxing skis"}]}, "QRYmJnPbp0U": {"duration": 35.736, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QRYmJnPbp0U", "annotations": []}, "SXlSHYVNcvM": {"duration": 238.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SXlSHYVNcvM", "annotations": [{"segment": [98.6059836427457, 212.09588934477378], "label": "Polishing forniture"}]}, "aXCAudMzN2o": {"duration": 117.54, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aXCAudMzN2o", "annotations": [{"segment": [0.2, 116.62256493603745], "label": "Powerbocking"}]}, "bFHpaULTwsE": {"duration": 74.885, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bFHpaULTwsE", "annotations": [{"segment": [16.939451497659906, 27.10312239625585], "label": "Spread mulch"}, {"segment": [40.77150739781591, 61.098849195007794], "label": "Spread mulch"}]}, "PHIQBuDG-fo": {"duration": 224.561, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PHIQBuDG-fo", "annotations": []}, "Nd209mI-wq0": {"duration": 172.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Nd209mI-wq0", "annotations": [{"segment": [135.97909463962557, 172.941666], "label": "Playing guitarra"}, {"segment": [4.316796655226209, 16.997386829953196], "label": "Playing guitarra"}]}, "uLphvdhunTY": {"duration": 24.892, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uLphvdhunTY", "annotations": []}, "qxmrH20IA2Q": {"duration": 5.34, "subset": "validation", "resolution": "352x262", "url": "https://www.youtube.com/watch?v=qxmrH20IA2Q", "annotations": [{"segment": [0, 5.340589], "label": "Curling"}]}, "aTAFgaIASTk": {"duration": 78.809, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aTAFgaIASTk", "annotations": []}, "Ny49eEt1OJg": {"duration": 85.798, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ny49eEt1OJg", "annotations": [{"segment": [40.02109449141966, 85.597731], "label": "Plastering"}, {"segment": [42.43039114976599, 85.597731], "label": "Plastering"}]}, "TBeZXLchito": {"duration": 180.443, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TBeZXLchito", "annotations": [{"segment": [10.693413416536663, 180.181], "label": "Longboarding"}]}, "Ld2a5ogu9k8": {"duration": 36.131, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ld2a5ogu9k8", "annotations": [{"segment": [7.092365054602184, 28.538326053042123], "label": "Putting on shoes"}]}, "6rG5dBgwBDc": {"duration": 154.018, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6rG5dBgwBDc", "annotations": [{"segment": [36.263792511700466, 89.8189297971919], "label": "Decorating the Christmas tree"}]}, "rc-DiL5er28": {"duration": 129.82, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rc-DiL5er28", "annotations": []}, "TTWzoRcW5FA": {"duration": 217.9, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TTWzoRcW5FA", "annotations": []}, "-fBTCykx4gM": {"duration": 103.56, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-fBTCykx4gM", "annotations": [{"segment": [41.19821282647224, 81.42705457722309], "label": "Brushing teeth"}]}, "Q5kU3DUXXZg": {"duration": 133.86, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Q5kU3DUXXZg", "annotations": [{"segment": [19.918876755070205, 122.37715001560062], "label": "Spinning"}]}, "-vPshfXc1fc": {"duration": 46.951, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-vPshfXc1fc", "annotations": [{"segment": [5.197177847113885, 46.901], "label": "Welding"}]}, "AnbC-26sAI4": {"duration": 108.46000000000001, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=AnbC-26sAI4", "annotations": [{"segment": [3.4249039716646, 44.98449962300925], "label": "Sailing"}, {"segment": [62.72335075268865, 91.95022055175407], "label": "Sailing"}]}, "KrHpGJBzjJk": {"duration": 76.324, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KrHpGJBzjJk", "annotations": [{"segment": [8.151713728549142, 67.53427067082684], "label": "Playing congas"}]}, "Xo4-GnduX8A": {"duration": 230.598, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=Xo4-GnduX8A", "annotations": [{"segment": [16.1846255850234, 204.30366302652106], "label": "Futsal"}]}, "DzCk5xjSF9o": {"duration": 42.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DzCk5xjSF9o", "annotations": [{"segment": [2.6976763800000003, 3.42533628], "label": "Making a sandwich"}]}, "giN7cA0EmLs": {"duration": 117.632, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=giN7cA0EmLs", "annotations": [{"segment": [0.2, 111.76854606847931], "label": "River tubing"}]}, "YBuNpnthXfE": {"duration": 112.246, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YBuNpnthXfE", "annotations": [{"segment": [0, 87.2045849953198], "label": "Tug of war"}, {"segment": [94.38407894071763, 109.61861487363494], "label": "Tug of war"}]}, "UoX2xhYPD38": {"duration": 81.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UoX2xhYPD38", "annotations": [{"segment": [6.48618246, 41.463576360000005], "label": "Hand washing clothes"}]}, "Y4IsLkxb5CI": {"duration": 181.046, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y4IsLkxb5CI", "annotations": [{"segment": [0.19999999999999998, 149.7436429525081], "label": "Swimming"}]}, "HvzJ-RJOF9Q": {"duration": 82.8, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HvzJ-RJOF9Q", "annotations": [{"segment": [67.4303125647426, 81.25223487207488], "label": "Springboard diving"}]}, "-V5_GMuMzc8": {"duration": 216.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-V5_GMuMzc8", "annotations": [{"segment": [16.55369041428324, 190.53635404680188], "label": "Getting a haircut"}]}, "M2ntxFBPaug": {"duration": 207.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M2ntxFBPaug", "annotations": [{"segment": [71.59420091905717, 75.15771318199667], "label": "Tennis serve with ball bouncing"}, {"segment": [102.36998864444374, 106.5814122279177], "label": "Tennis serve with ball bouncing"}, {"segment": [131.20204240822693, 139.3009339149076], "label": "Tennis serve with ball bouncing"}]}, "bHAzuAnnvcU": {"duration": 21.943, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bHAzuAnnvcU", "annotations": [{"segment": [0.01, 21.942857142857143], "label": "Using the monkey bar"}]}, "BD9CAqD1Yf8": {"duration": 137.184, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BD9CAqD1Yf8", "annotations": []}, "vigHVj40dO4": {"duration": 231.32, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vigHVj40dO4", "annotations": [{"segment": [6.133313572542902, 35.3567488299532], "label": "Mowing the lawn"}, {"segment": [42.933195007800315, 77.92916068642747], "label": "Mowing the lawn"}, {"segment": [116.17217472698907, 163.7955507020281], "label": "Mowing the lawn"}, {"segment": [183.99940717628704, 216.8306739469579], "label": "Mowing the lawn"}]}, "5IkU9svJFRI": {"duration": 69.57, "subset": "training", "resolution": "3414x1080", "url": "https://www.youtube.com/watch?v=5IkU9svJFRI", "annotations": [{"segment": [14.710522620904836, 25.784126365054604], "label": "Starting a campfire"}]}, "pRTmUkkIze4": {"duration": 147.61, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=pRTmUkkIze4", "annotations": []}, "crgj7lKtIUA": {"duration": 28.63, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=crgj7lKtIUA", "annotations": []}, "r015El3onHw": {"duration": 84.939, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r015El3onHw", "annotations": [{"segment": [13.648479239849868, 14.576045790130925], "label": "BMX"}, {"segment": [22.52661622111143, 24.646768336039564], "label": "BMX"}, {"segment": [28.224525029980793, 34.05494334603316], "label": "BMX"}, {"segment": [36.307604968144304, 37.76520950078003], "label": "BMX"}, {"segment": [39.22281407800312, 44.788213372854905], "label": "BMX"}, {"segment": [47.835932034321374, 59.49676865210608], "label": "BMX"}, {"segment": [61.881939778471136, 69.30247217160685], "label": "BMX"}, {"segment": [77.2530425928237, 78.84315667706709], "label": "BMX"}]}, "y-87JMf_-J8": {"duration": 112.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y-87JMf_-J8", "annotations": [{"segment": [54.49372776911076, 109.09481981279251], "label": "Playing guitarra"}]}, "VdGZfI-8RuA": {"duration": 221.32999999999998, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=VdGZfI-8RuA", "annotations": [{"segment": [42.36515812, 197.00982652], "label": "Fixing bicycle"}]}, "RseCMmSvcPY": {"duration": 122.323, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RseCMmSvcPY", "annotations": [{"segment": [12.401817472698907, 122.281], "label": "Playing pool"}]}, "9uieGqu2TYA": {"duration": 232.48, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=9uieGqu2TYA", "annotations": [{"segment": [33.67211352, 158.4802526], "label": "Using parallel bars"}]}, "DXluU6_7OXQ": {"duration": 148.585, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DXluU6_7OXQ", "annotations": [{"segment": [27.03636553484146, 32.35120731603294], "label": "Having an ice cream"}, {"segment": [39.51468971676929, 49.682213124266035], "label": "Having an ice cream"}, {"segment": [85.96178528283397, 92.8941876061272], "label": "Having an ice cream"}, {"segment": [108.60763155953443, 119.69947514273817], "label": "Having an ice cream"}, {"segment": [129.17375820339134, 133.56427962174283], "label": "Having an ice cream"}]}, "THtDZ6g9csA": {"duration": 54.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=THtDZ6g9csA", "annotations": [{"segment": [0, 54.311473], "label": "Doing nails"}]}, "cWBbuw_DA2c": {"duration": 47.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cWBbuw_DA2c", "annotations": [{"segment": [0.8080900826833074, 45.473432834633385], "label": "Smoking hookah"}]}, "rzomS2gZGQ4": {"duration": 11.866, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rzomS2gZGQ4", "annotations": [{"segment": [0.01, 11.821], "label": "Removing ice from car"}]}, "aQAAzVQ_gSA": {"duration": 170.156, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aQAAzVQ_gSA", "annotations": [{"segment": [13.247741433021806, 157.6481230529595], "label": "Playing ten pins"}]}, "N2nNkWXYAYw": {"duration": 213.67000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N2nNkWXYAYw", "annotations": [{"segment": [25.000392668767976, 153.33574170177693], "label": "Vacuuming floor"}]}, "8kjYbv52EcI": {"duration": 43.004, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8kjYbv52EcI", "annotations": [{"segment": [0.01, 2.884768302652106], "label": "Canoeing"}, {"segment": [5.367010795631826, 18.516187244929796], "label": "Canoeing"}, {"segment": [22.809795881435257, 27.036316882995322], "label": "Canoeing"}, {"segment": [36.764023950078005, 43.003174], "label": "Canoeing"}]}, "jJxS5NgbZAg": {"duration": 155.737, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jJxS5NgbZAg", "annotations": []}, "PY6WgOIZlhw": {"duration": 223.35, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PY6WgOIZlhw", "annotations": [{"segment": [0.01, 136.59013347200553], "label": "Snatch"}]}, "ifDsS6ZRsRM": {"duration": 91.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ifDsS6ZRsRM", "annotations": [{"segment": [17.88066534, 79.66499682215289], "label": "Ballet"}, {"segment": [9.424575780000001, 12.97109322], "label": "Ballet"}]}, "v5O_Yfeqza4": {"duration": 31.789, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v5O_Yfeqza4", "annotations": [{"segment": [9.696504672897195, 20.530864485981308], "label": "Throwing darts"}]}, "jOH5bDU0MQs": {"duration": 24.196, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jOH5bDU0MQs", "annotations": []}, "nm2sHNDN7Lo": {"duration": 14.304, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nm2sHNDN7Lo", "annotations": [{"segment": [0.01, 14.303491], "label": "Beer pong"}]}, "Y_slRMRs18I": {"duration": 39.056, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y_slRMRs18I", "annotations": [{"segment": [9.692905744047478, 38.51267251315824], "label": "Fixing the roof"}]}, "46NS8dT15Hw": {"duration": 60.396, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=46NS8dT15Hw", "annotations": []}, "BOVYcAeBxyY": {"duration": 54.475, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BOVYcAeBxyY", "annotations": [{"segment": [12.832411798751949, 48.86514426677067], "label": "Raking leaves"}]}, "fk2CxZEzN7U": {"duration": 99.94, "subset": "testing", "resolution": "840x480", "url": "https://www.youtube.com/watch?v=fk2CxZEzN7U", "annotations": []}, "z5xZrF421HE": {"duration": 219.2, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=z5xZrF421HE", "annotations": [{"segment": [42.08189148570741, 207.9888167193523], "label": "Rock climbing"}]}, "EiXW33yuAcw": {"duration": 52.5, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EiXW33yuAcw", "annotations": [{"segment": [0.01, 17.840494868941477], "label": "Drinking coffee"}, {"segment": [32.93220436817473, 50.99180031201248], "label": "Drinking coffee"}]}, "85lhWBgqkvE": {"duration": 231.43, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=85lhWBgqkvE", "annotations": []}, "8ZcddBvJ0Co": {"duration": 184.042, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=8ZcddBvJ0Co", "annotations": []}, "NJjoTu1vS7A": {"duration": 174.78, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NJjoTu1vS7A", "annotations": [{"segment": [41.717347257155595, 131.96860129797193], "label": "Playing lacrosse"}, {"segment": [137.42184928549142, 155.9628924430577], "label": "Playing lacrosse"}]}, "9AvEM_n8iJE": {"duration": 104.24000000000001, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=9AvEM_n8iJE", "annotations": []}, "cEa87QoI3Do": {"duration": 208.26, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cEa87QoI3Do", "annotations": []}, "JvFo4uTvmYs": {"duration": 14.1, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JvFo4uTvmYs", "annotations": [{"segment": [4.551581555382215, 11.433924680187207], "label": "Pole vault"}]}, "f2LGG2ocbu8": {"duration": 211.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f2LGG2ocbu8", "annotations": [{"segment": [5.940758381355662, 22.772907128530036], "label": "Shot put"}]}, "UL4YwgCFrDI": {"duration": 77.93, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UL4YwgCFrDI", "annotations": [{"segment": [0.01, 77.51837597503899], "label": "Tai chi"}]}, "kIIAJsYMjyc": {"duration": 52.106, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=kIIAJsYMjyc", "annotations": [{"segment": [7.799962558502341, 31.362349453978158], "label": "Kneeling"}]}, "g17h49EYsJY": {"duration": 72.03, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=g17h49EYsJY", "annotations": [{"segment": [21.45957795830251, 69.81502412341155], "label": "Kayaking"}]}, "NttjvRpSdsI": {"duration": 61.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NttjvRpSdsI", "annotations": [{"segment": [27.71059355, 53.22458340561622], "label": "Doing kickboxing"}, {"segment": [5.27536929, 23.132214679999997], "label": "Doing kickboxing"}]}, "o3Nuqg4w_b8": {"duration": 120.86, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o3Nuqg4w_b8", "annotations": [{"segment": [0.01, 117.59917628705148], "label": "Doing step aerobics"}]}, "390iVq_urQM": {"duration": 127.524, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=390iVq_urQM", "annotations": []}, "4A1cjG6k7pI": {"duration": 76.51, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4A1cjG6k7pI", "annotations": []}, "kzm9ckZ5gU0": {"duration": 58.608, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kzm9ckZ5gU0", "annotations": [{"segment": [13.064310452418097, 19.10051326053042], "label": "Doing a powerbomb"}]}, "KRIn0DaH3Hk": {"duration": 152.625, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KRIn0DaH3Hk", "annotations": []}, "ZxUbde4YHXE": {"duration": 201.82999999999998, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ZxUbde4YHXE", "annotations": []}, "gIgim1Dp8HU": {"duration": 62.973, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gIgim1Dp8HU", "annotations": [{"segment": [1.8626817472698909, 31.18533229329173], "label": "Sumo"}, {"segment": [38.834564742589706, 62.660999999999994], "label": "Sumo"}]}, "6ciZ58xAV9I": {"duration": 50.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6ciZ58xAV9I", "annotations": [{"segment": [5.784775347476485, 39.711700929797196], "label": "Using the pommel horse"}]}, "fy7vTpeBN_U": {"duration": 106.22999999999999, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fy7vTpeBN_U", "annotations": []}, "GjjRDUirW7g": {"duration": 214.51, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=GjjRDUirW7g", "annotations": [{"segment": [64.92078801000001, 178.82680029407177], "label": "Playing harmonica"}]}, "wnsy_i-IXpM": {"duration": 146.82, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wnsy_i-IXpM", "annotations": [{"segment": [21.530512768810073, 74.89869867447759], "label": "Skiing"}, {"segment": [82.91537896073666, 121.39544330733229], "label": "Skiing"}]}, "WA7arPPyLPg": {"duration": 188.31, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=WA7arPPyLPg", "annotations": []}, "rDADR0Lg4U8": {"duration": 101.05, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=rDADR0Lg4U8", "annotations": [{"segment": [13.469251170046803, 98.06560062402497], "label": "Playing field hockey"}]}, "bVOD0M4LCEY": {"duration": 122.299, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bVOD0M4LCEY", "annotations": [{"segment": [0.2, 120.06265463660192], "label": "Camel ride"}]}, "j4EFi_NoEbQ": {"duration": 82.28999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j4EFi_NoEbQ", "annotations": [{"segment": [1.5405588631708533, 70.22380811606864], "label": "Skateboarding"}]}, "72PUOTjZpQU": {"duration": 74.77, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=72PUOTjZpQU", "annotations": [{"segment": [0.01, 50.73976673352649], "label": "Zumba"}]}, "B_heSKsoI9o": {"duration": 64.78, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=B_heSKsoI9o", "annotations": [{"segment": [14.957784393135727, 36.78806431825274], "label": "Archery"}, {"segment": [39.31471708736349, 47.9053365023401], "label": "Archery"}]}, "XgJ6iiK-gQo": {"duration": 57.563, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XgJ6iiK-gQo", "annotations": [{"segment": [3.681829907209894, 21.013370689929637], "label": "Trimming branches or hedges"}, {"segment": [34.57328083599535, 53.611034012480495], "label": "Trimming branches or hedges"}]}, "hvRNXdJa5TE": {"duration": 132.22, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hvRNXdJa5TE", "annotations": []}, "tiP4Q4mGidE": {"duration": 155.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tiP4Q4mGidE", "annotations": []}, "3nSOFmmA7FU": {"duration": 83.616, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3nSOFmmA7FU", "annotations": []}, "lVGvv7S6oKo": {"duration": 98.453, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=lVGvv7S6oKo", "annotations": []}, "7-taNK5mW8E": {"duration": 26.59, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7-taNK5mW8E", "annotations": []}, "yWEFVfX-JoI": {"duration": 238.121, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yWEFVfX-JoI", "annotations": [{"segment": [0.2, 222.9000964230171], "label": "Canoeing"}]}, "O_fdvOxYqiY": {"duration": 165.025, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O_fdvOxYqiY", "annotations": [{"segment": [36.999153156589735, 161.87129327102804], "label": "Playing congas"}]}, "OVpila9fAaQ": {"duration": 230.064, "subset": "training", "resolution": "632x360", "url": "https://www.youtube.com/watch?v=OVpila9fAaQ", "annotations": [{"segment": [6.101523015600624, 160.43416399843994], "label": "Waterskiing"}]}, "hsjURYHGd7A": {"duration": 158.082, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hsjURYHGd7A", "annotations": []}, "T47mErD2KeA": {"duration": 21.061, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T47mErD2KeA", "annotations": [{"segment": [0.01, 20.281008461176793], "label": "Making an omelette"}]}, "rcMv32d1QkI": {"duration": 121.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rcMv32d1QkI", "annotations": [{"segment": [15.18422818, 97.75601014040562], "label": "Skateboarding"}]}, "eS1USHkyb2U": {"duration": 192.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eS1USHkyb2U", "annotations": [{"segment": [5.1131354424244995, 8.12086217326244], "label": "Brushing hair"}]}, "3z98x3-Jxls": {"duration": 9.29, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=3z98x3-Jxls", "annotations": []}, "ngwH6Zy5vb8": {"duration": 234.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ngwH6Zy5vb8", "annotations": [{"segment": [24.971474830000002, 67.05986935803963], "label": "Playing piano"}]}, "6rfFmqz6s8M": {"duration": 62.16, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6rfFmqz6s8M", "annotations": [{"segment": [22.449245166146643, 55.226112838533545], "label": "Ping-pong"}]}, "s_XdqaQj0uI": {"duration": 208.214, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s_XdqaQj0uI", "annotations": [{"segment": [99.09893387996188, 120.5374361857341], "label": "Carving jack-o-lanterns"}]}, "xRfSkg8j7Dg": {"duration": 223.423, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=xRfSkg8j7Dg", "annotations": [{"segment": [90.25846957878315, 160.65310608424338], "label": "Cutting the grass"}, {"segment": [182.2593806552262, 204.92946645865834], "label": "Cutting the grass"}]}, "aBkLboBmHoM": {"duration": 184.251, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aBkLboBmHoM", "annotations": []}, "Au4PfoK7hyw": {"duration": 85.64, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=Au4PfoK7hyw", "annotations": []}, "vuntaZJBcfI": {"duration": 124.645, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vuntaZJBcfI", "annotations": [{"segment": [0.7778141341653667, 49.78010458658347], "label": "Running a marathon"}, {"segment": [51.72463992199688, 113.75531712168488], "label": "Running a marathon"}]}, "DqHWMWY-r1o": {"duration": 68.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DqHWMWY-r1o", "annotations": [{"segment": [0, 2.1761558271271415], "label": "Hammer throw"}]}, "MfSdXqOC-f0": {"duration": 91.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MfSdXqOC-f0", "annotations": [{"segment": [0.2, 27.214083366055732], "label": "Archery"}, {"segment": [43.542533216848675, 81.2125537301092], "label": "Archery"}]}, "zChDaAcfc2o": {"duration": 75.605, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zChDaAcfc2o", "annotations": [{"segment": [12.825496884735202, 63.909070093457956], "label": "Canoeing"}]}, "LNK_yYs6UOA": {"duration": 14.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LNK_yYs6UOA", "annotations": [{"segment": [0.5807124009360374, 14.046666], "label": "Discus throw"}]}, "MxKuqpxmKKk": {"duration": 180.33, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MxKuqpxmKKk", "annotations": [{"segment": [9.542277691107644, 21.049141965678626], "label": "Shot put"}, {"segment": [23.575039001560064, 34.239937597503896], "label": "Shot put"}, {"segment": [39.57238689547582, 54.447113884555385], "label": "Shot put"}, {"segment": [59.21825273010921, 72.40904836193448], "label": "Shot put"}, {"segment": [79.9867394695788, 91.77425897035882], "label": "Shot put"}, {"segment": [94.01950078003121, 115.91060842433698], "label": "Shot put"}, {"segment": [115.91060842433698, 147.06333853354133], "label": "Shot put"}]}, "NT0dXbWzt7w": {"duration": 50.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NT0dXbWzt7w", "annotations": [{"segment": [0.01, 50.54984126984127], "label": "Having an ice cream"}]}, "wqlEUeJzNVc": {"duration": 138.76, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wqlEUeJzNVc", "annotations": [{"segment": [55.16634509466297, 131.0200693937284], "label": "Dodgeball"}]}, "n-BJ753InB0": {"duration": 254.932, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n-BJ753InB0", "annotations": [{"segment": [13.124419433920215, 233.05787076131045], "label": "Removing ice from car"}]}, "4qnrM4k6qN0": {"duration": 227.37, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4qnrM4k6qN0", "annotations": [{"segment": [18.09026457384826, 218.14730721528863], "label": "Futsal"}]}, "P6Oq6C9X0fw": {"duration": 188.756, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P6Oq6C9X0fw", "annotations": []}, "d14twk9O2Vw": {"duration": 185.9, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=d14twk9O2Vw", "annotations": [{"segment": [113.93995787831514, 138.01846489859594], "label": "Getting a haircut"}]}, "fdDRoBEXdZI": {"duration": 230.226, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fdDRoBEXdZI", "annotations": []}, "_EFv0igpT0s": {"duration": 7.73, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_EFv0igpT0s", "annotations": []}, "ZLJJ5LxfE6o": {"duration": 213.856, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZLJJ5LxfE6o", "annotations": [{"segment": [30.360145728549146, 88.41141338533541], "label": "Capoeira"}, {"segment": [93.08220503588144, 177.49008272074883], "label": "Capoeira"}, {"segment": [179.49185057098285, 192.8369695725429], "label": "Capoeira"}]}, "mU7nezFZ-Ew": {"duration": 105.91, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=mU7nezFZ-Ew", "annotations": []}, "-Q03gEypilg": {"duration": 202.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-Q03gEypilg", "annotations": [{"segment": [7.12026129875195, 196.67743987441497], "label": "Using the balance beam"}]}, "c21Y_pi3-Gg": {"duration": 168.11, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c21Y_pi3-Gg", "annotations": [{"segment": [11.331285330302515, 159.80444808731187], "label": "Making a sandwich"}]}, "ILmZg0vz0Hg": {"duration": 236.64, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=ILmZg0vz0Hg", "annotations": []}, "biyf6Q-xF0M": {"duration": 166.325, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=biyf6Q-xF0M", "annotations": [{"segment": [56.565910549347144, 60.458060357788455], "label": "Carving jack-o-lanterns"}, {"segment": [75.76718293765764, 82.25409928505984], "label": "Carving jack-o-lanterns"}]}, "NzJkKwzPqvk": {"duration": 68.081, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=NzJkKwzPqvk", "annotations": [{"segment": [0.19999999999999998, 67.901], "label": "Playing pool"}]}, "pvFviIF1VGc": {"duration": 61.26, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=pvFviIF1VGc", "annotations": [{"segment": [15.308010753510139, 56.224749776911075], "label": "Mixing drinks"}]}, "xfhwYTFCGYY": {"duration": 104.53999999999999, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xfhwYTFCGYY", "annotations": [{"segment": [62.95005776829712, 85.4555188357194], "label": "Ironing clothes"}]}, "IOQjNOM59Yw": {"duration": 47.067, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=IOQjNOM59Yw", "annotations": []}, "aINlF3UDJ7s": {"duration": 79.88, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=aINlF3UDJ7s", "annotations": [{"segment": [25.939409429999998, 70.17941497659906], "label": "Mixing drinks"}]}, "6ZUHLsyJ6yo": {"duration": 112.918, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=6ZUHLsyJ6yo", "annotations": [{"segment": [0.01, 0.02], "label": "Drum corps"}]}, "Aygp8JaMkqQ": {"duration": 174.847, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Aygp8JaMkqQ", "annotations": [{"segment": [55.91807003120125, 155.47951179407178], "label": "Surfing"}]}, "u8ykXBc2Efs": {"duration": 104.82, "subset": "validation", "resolution": "2276x720", "url": "https://www.youtube.com/watch?v=u8ykXBc2Efs", "annotations": [{"segment": [0, 104.81487528344671], "label": "Washing dishes"}]}, "KIIcgQyr0-Y": {"duration": 219.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KIIcgQyr0-Y", "annotations": [{"segment": [9.758248829953198, 81.03536986], "label": "Playing guitarra"}, {"segment": [99.00638348, 205.6080148205928], "label": "Playing guitarra"}]}, "9AzESzqOtb0": {"duration": 68.03999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9AzESzqOtb0", "annotations": [{"segment": [8.64561154446178, 60.731443057722316], "label": "Pole vault"}]}, "2mI7NL54yP8": {"duration": 170.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2mI7NL54yP8", "annotations": [{"segment": [45.97565775, 148.35461825], "label": "Doing motocross"}]}, "ZBuPedt-oEE": {"duration": 153.468, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=ZBuPedt-oEE", "annotations": [{"segment": [17.000235569422777, 126.42428705148205], "label": "Making a cake"}]}, "kYkvyRKxwfY": {"duration": 23.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kYkvyRKxwfY", "annotations": []}, "AMU1mHpR6Os": {"duration": 121.789, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AMU1mHpR6Os", "annotations": [{"segment": [0.01, 121.78866213151927], "label": "Grooming dog"}]}, "a_v_1fL2VyM": {"duration": 20.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=a_v_1fL2VyM", "annotations": [{"segment": [0.01, 18.89498749453978], "label": "Pole vault"}]}, "_NwkwvaC7Bg": {"duration": 62.02, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_NwkwvaC7Bg", "annotations": [{"segment": [18.383611208393916, 61.53671777223089], "label": "Chopping wood"}]}, "qgV5UlcDevM": {"duration": 200.528, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qgV5UlcDevM", "annotations": []}, "9wMEbyzxNHI": {"duration": 162.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9wMEbyzxNHI", "annotations": [{"segment": [12.00189888, 121.6809648], "label": "Putting on makeup"}]}, "JnP5FDtEPVE": {"duration": 59.838, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JnP5FDtEPVE", "annotations": [{"segment": [44.08108034321373, 49.398797971918874], "label": "Grooming dog"}]}, "Ccy8WD2k4L8": {"duration": 103.538, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Ccy8WD2k4L8", "annotations": [{"segment": [34.70891575663026, 74.7596926677067], "label": "Playing drums"}]}, "3Z4b34lBnyU": {"duration": 20.318, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3Z4b34lBnyU", "annotations": [{"segment": [4.691082881410495, 9.572344177847112], "label": "Doing a powerbomb"}]}, "Zhf052cWrv8": {"duration": 98.639, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Zhf052cWrv8", "annotations": [{"segment": [39.731479799021194, 90.0477960153929], "label": "Using the rowing machine"}]}, "8kO6A3W_kQ8": {"duration": 80.34, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8kO6A3W_kQ8", "annotations": [{"segment": [0, 20.30460057098284], "label": "Brushing hair"}, {"segment": [38.10246033073323, 80.141043], "label": "Brushing hair"}]}, "kt_sGN-1prU": {"duration": 197.25, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kt_sGN-1prU", "annotations": [{"segment": [7.076572542901716, 189.22139625585024], "label": "Using uneven bars"}]}, "Zomv5zlkkEc": {"duration": 182.67, "subset": "validation", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=Zomv5zlkkEc", "annotations": [{"segment": [0.01, 182.671383], "label": "Belly dance"}]}, "RnShLAifVno": {"duration": 102.84, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RnShLAifVno", "annotations": [{"segment": [51.741363673946964, 68.10607404524181], "label": "Washing dishes"}]}, "tFiXLhbKdnk": {"duration": 66.223, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tFiXLhbKdnk", "annotations": [{"segment": [4.0291886864274575, 64.15708139157566], "label": "Hand car wash"}]}, "ujltXvkQK_g": {"duration": 162.26, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ujltXvkQK_g", "annotations": [{"segment": [33.4906704, 101.80579662], "label": "Playing lacrosse"}, {"segment": [116.2924587, 156.01329141107644], "label": "Playing lacrosse"}]}, "I5N4rwhRa0o": {"duration": 182.3, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=I5N4rwhRa0o", "annotations": [{"segment": [15.926355007800312, 23.03633492199688], "label": "Playing saxophone"}, {"segment": [52.32945216848674, 54.03584734789392], "label": "Playing saxophone"}, {"segment": [109.20929148205929, 120.86965854134165], "label": "Playing saxophone"}, {"segment": [147.034384625585, 149.87837659126365], "label": "Playing saxophone"}]}, "-wXbBZDSIa8": {"duration": 25.264, "subset": "validation", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=-wXbBZDSIa8", "annotations": [{"segment": [0.01, 24.451013144056372], "label": "Doing a powerbomb"}]}, "tCRsEiWgPdk": {"duration": 10.821, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tCRsEiWgPdk", "annotations": []}, "x8AR0FD5Jqo": {"duration": 196.256, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=x8AR0FD5Jqo", "annotations": [{"segment": [16.22698341947283, 43.16989928576735], "label": "BMX"}, {"segment": [51.742645243224686, 102.56678199100755], "label": "BMX"}, {"segment": [106.24081597277495, 132.57139284210822], "label": "BMX"}, {"segment": [137.16393531931755, 195.02996901560064], "label": "BMX"}]}, "xmGucghu2FI": {"duration": 195.79, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xmGucghu2FI", "annotations": [{"segment": [9.468814953958702, 187.23817957344144], "label": "Javelin throw"}]}, "bQGegLwVc8I": {"duration": 57.38, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bQGegLwVc8I", "annotations": [{"segment": [12.710552031201248, 47.97785837129485], "label": "Springboard diving"}]}, "Jq8b1u0vTRI": {"duration": 205.73, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jq8b1u0vTRI", "annotations": []}, "x_lN_QxbEjQ": {"duration": 203.709, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=x_lN_QxbEjQ", "annotations": [{"segment": [12.706458658346335, 31.13082371294852], "label": "Surfing"}, {"segment": [37.48405304212169, 65.75592355694228], "label": "Surfing"}, {"segment": [69.88552262090484, 100.38102340093604], "label": "Surfing"}, {"segment": [103.87529953198128, 126.42926365054602], "label": "Surfing"}, {"segment": [130.24120124804992, 149.30088923556943], "label": "Surfing"}, {"segment": [152.15984243369735, 156.62248517940716], "label": "Surfing"}, {"segment": [160.43479719188767, 175.04865990639624], "label": "Surfing"}, {"segment": [178.86097191887674, 196.016375975039], "label": "Surfing"}]}, "HxculAXODvo": {"duration": 160.381, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HxculAXODvo", "annotations": []}, "9V7cMp_w1_0": {"duration": 176.611, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9V7cMp_w1_0", "annotations": [{"segment": [4.1328621308117635, 176.61097505668934], "label": "Removing ice from car"}]}, "iGOGz6M_zcE": {"duration": 41.31, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iGOGz6M_zcE", "annotations": [{"segment": [4.188828970358815, 29.901794496099846], "label": "Washing face"}]}, "uTc2-oAxgbc": {"duration": 176.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uTc2-oAxgbc", "annotations": [{"segment": [42.73993600624025, 131.80444781279252], "label": "Getting a piercing"}]}, "Xhrk1wP8tCs": {"duration": 67.594, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=Xhrk1wP8tCs", "annotations": [{"segment": [15.177510140405618, 67.541], "label": "Elliptical trainer"}]}, "SNja9iUdAiI": {"duration": 237.494, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SNja9iUdAiI", "annotations": []}, "MAUZbZnbM_Y": {"duration": 59.049, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=MAUZbZnbM_Y", "annotations": [{"segment": [39.37240603940732, 44.88838734781854], "label": "Hand car wash"}]}, "oB8Nl31w3xc": {"duration": 75.61, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oB8Nl31w3xc", "annotations": []}, "bULG65Ec5fo": {"duration": 170.44, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bULG65Ec5fo", "annotations": [{"segment": [6.913098510334971, 162.45781487831516], "label": "Rock climbing"}]}, "ZWzPz-LX9Qg": {"duration": 31.26, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=ZWzPz-LX9Qg", "annotations": [{"segment": [0.01, 1.9503312921632512], "label": "Washing hands"}, {"segment": [5.070861359624454, 9.55662333159993], "label": "Washing hands"}, {"segment": [16.1389914426509, 17.89428960559783], "label": "Washing hands"}, {"segment": [20.380962003105974, 21.161094519971275], "label": "Washing hands"}, {"segment": [27.694704348718165, 29.64503564088142], "label": "Washing hands"}]}, "Yp9DBq34bFQ": {"duration": 81.571, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Yp9DBq34bFQ", "annotations": [{"segment": [13.166136505460218, 26.0142503900156], "label": "Applying sunscreen"}]}, "nHE7u40plD0": {"duration": 145.519, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nHE7u40plD0", "annotations": [{"segment": [0.01, 140.33086563437155], "label": "Playing drums"}]}, "ARkMGkD2RDs": {"duration": 41.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ARkMGkD2RDs", "annotations": [{"segment": [8.331870514820594, 27.062432137285494], "label": "Snatch"}]}, "PAhJqXTzCOU": {"duration": 236.22, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PAhJqXTzCOU", "annotations": [{"segment": [130.82198520593886, 172.83242432605303], "label": "Drinking coffee"}]}, "MpqXCbsqVNQ": {"duration": 174.197, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MpqXCbsqVNQ", "annotations": [{"segment": [17.935947870567883, 21.197029301580226], "label": "Hurling"}, {"segment": [27.447435377687214, 38.04595002847733], "label": "Hurling"}, {"segment": [43.20932896091353, 54.62311396945673], "label": "Hurling"}, {"segment": [64.13460119188767, 70.38500724024962], "label": "Hurling"}, {"segment": [83.97284647581904, 95.11487464898596], "label": "Hurling"}]}, "sMG6vA7yo5I": {"duration": 71.26, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sMG6vA7yo5I", "annotations": []}, "cHSGL-OcMak": {"duration": 54.242, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=cHSGL-OcMak", "annotations": [{"segment": [0.01, 28.686388318408735], "label": "Powerbocking"}, {"segment": [36.80996731123245, 54.2418139], "label": "Powerbocking"}]}, "cXVWUEcSx20": {"duration": 114.962, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cXVWUEcSx20", "annotations": [{"segment": [10.760875332972502, 54.88046365366615], "label": "Clipping cat claws"}]}, "MNpV9WvVPSA": {"duration": 77.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MNpV9WvVPSA", "annotations": [{"segment": [30.92008736349454, 71.03792199687987], "label": "Playing violin"}]}, "W84TQm1l90U": {"duration": 7.733, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W84TQm1l90U", "annotations": [{"segment": [0.01, 7.026465074763967], "label": "Doing a powerbomb"}]}, "II3AiNrQ1uA": {"duration": 40.08, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=II3AiNrQ1uA", "annotations": [{"segment": [11.196580769890796, 34.71251921138846], "label": "Using uneven bars"}]}, "8Uyi1KXdS50": {"duration": 109.552, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8Uyi1KXdS50", "annotations": [{"segment": [0.01, 109.55174603174603], "label": "Knitting"}]}, "5GuyzVay_eM": {"duration": 100.682, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5GuyzVay_eM", "annotations": []}, "gzdasX0KIVg": {"duration": 234.568, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=gzdasX0KIVg", "annotations": [{"segment": [0.01, 234.541], "label": "Grooming dog"}]}, "n1w2_7WQpXs": {"duration": 115.938, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=n1w2_7WQpXs", "annotations": []}, "CzXF0CQn2Fk": {"duration": 219.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CzXF0CQn2Fk", "annotations": [{"segment": [4.455827593647964, 219.0210640262345], "label": "Zumba"}]}, "eVncG9w7jwg": {"duration": 125.086, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eVncG9w7jwg", "annotations": []}, "xPyj6pmYyEk": {"duration": 207.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xPyj6pmYyEk", "annotations": [{"segment": [33.53323674804992, 187.42973390093601], "label": "Preparing pasta"}]}, "YH3571KWDpM": {"duration": 202.76, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=YH3571KWDpM", "annotations": [{"segment": [8.85675648673947, 201.17489734165366], "label": "Zumba"}]}, "g9sYWTE6LTg": {"duration": 216.9, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=g9sYWTE6LTg", "annotations": [{"segment": [67.94949215999999, 160.23482772], "label": "Tango"}, {"segment": [26.034146909999997, 55.916046599999994], "label": "Tango"}]}, "eUKMPNZ3NI4": {"duration": 123.066, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eUKMPNZ3NI4", "annotations": [{"segment": [0.01, 120.62617692795276], "label": "Beer pong"}]}, "KU8VVtam3ig": {"duration": 158.941, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KU8VVtam3ig", "annotations": [{"segment": [29.258849728138784, 63.228869508580345], "label": "Sharpening knives"}, {"segment": [126.45773901716069, 147.53402885335413], "label": "Sharpening knives"}]}, "UtErhcCYBZk": {"duration": 148.86, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UtErhcCYBZk", "annotations": [{"segment": [13.70191049274624, 143.0572337722309], "label": "Snatch"}]}, "dVcnkTR5EBE": {"duration": 117.749, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dVcnkTR5EBE", "annotations": [{"segment": [75.63775047238285, 95.4458906779903], "label": "Removing ice from car"}]}, "ZFi592_m_NQ": {"duration": 136.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZFi592_m_NQ", "annotations": [{"segment": [16.791465340000002, 89.23031874], "label": "Tai chi"}]}, "opGZh9nUlWA": {"duration": 170.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=opGZh9nUlWA", "annotations": [{"segment": [14.615892826189238, 166.0896899375975], "label": "Ballet"}]}, "oncTwYfvCD8": {"duration": 155.06, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oncTwYfvCD8", "annotations": []}, "rcDw6If4hjc": {"duration": 48.971, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rcDw6If4hjc", "annotations": [{"segment": [0.01, 48.921], "label": "Playing ten pins"}]}, "dnzcNZBtUG4": {"duration": 188.57, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dnzcNZBtUG4", "annotations": [{"segment": [5.883596661466459, 166.21160568642745], "label": "Tango"}]}, "MGggqAefik4": {"duration": 32.021, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MGggqAefik4", "annotations": []}, "nDM4E9CD_y0": {"duration": 161.68, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nDM4E9CD_y0", "annotations": []}, "HDhG0WkwGaU": {"duration": 221.124, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=HDhG0WkwGaU", "annotations": [{"segment": [8.94123010417879, 13.7557414473371], "label": "Bullfighting"}, {"segment": [22.69697679891682, 26.823700807338227], "label": "Bullfighting"}, {"segment": [29.57485014628583, 31.982105656176078], "label": "Bullfighting"}, {"segment": [40.23555363129272, 42.29891562507188], "label": "Bullfighting"}, {"segment": [49.17678893766908, 59.493598906564884], "label": "Bullfighting"}, {"segment": [64.30811022538292, 70.49819620672041], "label": "Bullfighting"}, {"segment": [78.06385685057732, 81.15889984124607], "label": "Bullfighting"}, {"segment": [85.28562382880439, 90.10013514762242], "label": "Bullfighting"}, {"segment": [101.44862611340781, 103.85588177281683], "label": "Bullfighting"}, {"segment": [107.63871209474529, 132.74294968572505], "label": "Bullfighting"}, {"segment": [142.715865988991, 200.4900018148075], "label": "Bullfighting"}, {"segment": [207.3678751274047, 210.11902445244357], "label": "Bullfighting"}]}, "7vh_NMNnHbQ": {"duration": 103.515, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=7vh_NMNnHbQ", "annotations": []}, "AG6Rpvxxetg": {"duration": 9.27, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AG6Rpvxxetg", "annotations": [{"segment": [0, 7.804202152886115], "label": "Using parallel bars"}]}, "1RMNLqb8OWI": {"duration": 163.561, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1RMNLqb8OWI", "annotations": [{"segment": [3.444740042662931, 149.65481740902288], "label": "Blowing leaves"}]}, "6okx-34bDEg": {"duration": 49.784, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=6okx-34bDEg", "annotations": [{"segment": [0.01, 34.64528229426044], "label": "Running a marathon"}]}, "UIZkQUo8VtA": {"duration": 59.653, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UIZkQUo8VtA", "annotations": [{"segment": [15.820359656786271, 23.45135666770671], "label": "Roof shingle removal"}]}, "lngs7qR5AXA": {"duration": 6.57, "subset": "training", "resolution": "360x360", "url": "https://www.youtube.com/watch?v=lngs7qR5AXA", "annotations": [{"segment": [0.01, 0.02], "label": "Drinking coffee"}]}, "arnKDX_ToxE": {"duration": 133.051, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=arnKDX_ToxE", "annotations": [{"segment": [13.076710496991307, 76.59216082839313], "label": "Elliptical trainer"}, {"segment": [108.14231921840874, 130.76710385335414], "label": "Elliptical trainer"}]}, "OtokKrYWhlE": {"duration": 78.69, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OtokKrYWhlE", "annotations": []}, "hK3E6njydQA": {"duration": 85.66, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hK3E6njydQA", "annotations": [{"segment": [2.6382664, 80.04586890171608], "label": "Playing lacrosse"}]}, "P2xT7dlyX8Y": {"duration": 106.78999999999999, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=P2xT7dlyX8Y", "annotations": []}, "ml4aMGCJgP0": {"duration": 151.766, "subset": "validation", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=ml4aMGCJgP0", "annotations": [{"segment": [28.44984522036971, 40.26456484653794], "label": "Playing ten pins"}, {"segment": [51.842990080182794, 61.29476578111738], "label": "Playing ten pins"}, {"segment": [85.39679381850054, 91.78884210515245], "label": "Playing ten pins"}, {"segment": [104.31409599923344, 113.29446671574435], "label": "Playing ten pins"}]}, "MINjkEBIIkA": {"duration": 65.597, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MINjkEBIIkA", "annotations": [{"segment": [1.0227925117004681, 26.081209048361938], "label": "Throwing darts"}, {"segment": [27.513118564742594, 36.41141341653667], "label": "Throwing darts"}, {"segment": [38.6615569422777, 50.0145538221529], "label": "Throwing darts"}, {"segment": [53.594327613104525, 62.08350546021841], "label": "Throwing darts"}]}, "XzbvKQ-o5Ow": {"duration": 44.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XzbvKQ-o5Ow", "annotations": [{"segment": [5.537976536661466, 41.604048731669266], "label": "Using uneven bars"}]}, "oNYC_3fcGg0": {"duration": 148.283, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oNYC_3fcGg0", "annotations": [{"segment": [9.484504512153274, 139.9542724648986], "label": "Surfing"}]}, "6SHSstpZN1I": {"duration": 108.577, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6SHSstpZN1I", "annotations": [{"segment": [21.670340093603745, 91.59104680187208], "label": "BMX"}]}, "uwLM5n-rYmA": {"duration": 177.45, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uwLM5n-rYmA", "annotations": [{"segment": [122.6031872074883, 152.25552355694228], "label": "Snatch"}]}, "xh8qQfCYDBY": {"duration": 42.168, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xh8qQfCYDBY", "annotations": [{"segment": [15.214377744099192, 39.347528703850685], "label": "Welding"}]}, "swOYvotIM1Y": {"duration": 69.126, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=swOYvotIM1Y", "annotations": [{"segment": [9.597810677052932, 24.58764982436032], "label": "Arm wrestling"}, {"segment": [36.98931530594557, 57.802545201127764], "label": "Arm wrestling"}]}, "7LkHBg_eAfU": {"duration": 200.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7LkHBg_eAfU", "annotations": [{"segment": [22.273401520000004, 54.007839313572546], "label": "Tango"}, {"segment": [56.66745320000001, 190.51460998439939], "label": "Tango"}]}, "f4983HmCNiM": {"duration": 119.723, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f4983HmCNiM", "annotations": [{"segment": [75.7661872074883, 118.52111544461779], "label": "BMX"}]}, "-faeAVsbBG0": {"duration": 85.78, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-faeAVsbBG0", "annotations": [{"segment": [0.303387943053914, 84.66948140099782], "label": "Doing step aerobics"}]}, "2_KTq85YQcY": {"duration": 148.38, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2_KTq85YQcY", "annotations": [{"segment": [31.48060265811993, 126.61683569111473], "label": "Ironing clothes"}]}, "iQnnCZTl_7U": {"duration": 75.163, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iQnnCZTl_7U", "annotations": []}, "z_ojmuWzMWo": {"duration": 140.667, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=z_ojmuWzMWo", "annotations": [{"segment": [4.906402826433984, 132.3852532937237], "label": "Grooming dog"}]}, "wFVukrQfQhI": {"duration": 137.67000000000002, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=wFVukrQfQhI", "annotations": []}, "bw96D55q2FI": {"duration": 130.311, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bw96D55q2FI", "annotations": [{"segment": [0.01, 117.29967483307333], "label": "Longboarding"}]}, "9SPxXA1FevM": {"duration": 57.586, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9SPxXA1FevM", "annotations": []}, "yGwevg8vwuU": {"duration": 179.909, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yGwevg8vwuU", "annotations": [{"segment": [0.01, 162.50621558190326], "label": "Capoeira"}]}, "-KjJxkFUhv4": {"duration": 235.1, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-KjJxkFUhv4", "annotations": [{"segment": [88.02560624873975, 91.69333984243724], "label": "Sailing"}]}, "eeD2_hKG7zE": {"duration": 74.328, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eeD2_hKG7zE", "annotations": [{"segment": [6.667407342552206, 69.86283439885418], "label": "Using the monkey bar"}]}, "0Um-HgjcnXY": {"duration": 91.673, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0Um-HgjcnXY", "annotations": []}, "QwnEXRwaDJc": {"duration": 187.06, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QwnEXRwaDJc", "annotations": []}, "nFfMY4CQq50": {"duration": 234.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nFfMY4CQq50", "annotations": [{"segment": [116.70972269094844, 124.01549562934898], "label": "Shaving"}]}, "qoS5nkk7Rgk": {"duration": 67.756, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qoS5nkk7Rgk", "annotations": [{"segment": [0.01, 67.755328], "label": "Grooming dog"}]}, "0QDPpdms08g": {"duration": 119.65, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0QDPpdms08g", "annotations": [{"segment": [0, 120], "label": "Doing kickboxing"}, {"segment": [54.81941826, 77.03251775999999], "label": "Doing kickboxing"}]}, "mVAP-WyEc7w": {"duration": 149.444, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mVAP-WyEc7w", "annotations": []}, "a7QrIgqkyao": {"duration": 180.512, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a7QrIgqkyao", "annotations": [{"segment": [76.07889514962284, 139.8054998345359], "label": "Carving jack-o-lanterns"}]}, "n1JkJu0VjQk": {"duration": 33.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=n1JkJu0VjQk", "annotations": [{"segment": [0.01, 33.041], "label": "Doing karate"}]}, "pmmQuksmTvw": {"duration": 143.41, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pmmQuksmTvw", "annotations": []}, "q3DzEgYmIV0": {"duration": 34.18, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=q3DzEgYmIV0", "annotations": [{"segment": [0.01, 34.121], "label": "Roof shingle removal"}]}, "I9NukwdINyY": {"duration": 202.99, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=I9NukwdINyY", "annotations": [{"segment": [101.33609485179407, 140.92050690327613], "label": "Painting"}, {"segment": [178.92154247269892, 186.52174958658347], "label": "Painting"}]}, "TrhOt1WyU7Y": {"duration": 62.741, "subset": "training", "resolution": "350x240", "url": "https://www.youtube.com/watch?v=TrhOt1WyU7Y", "annotations": [{"segment": [0.2, 62.520999999999994], "label": "Kite flying"}]}, "nUoN18FTeug": {"duration": 197.02, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nUoN18FTeug", "annotations": [{"segment": [12.904090483619346, 186.2064992199688], "label": "Playing guitarra"}]}, "aJsAuChVmlY": {"duration": 21.015, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aJsAuChVmlY", "annotations": [{"segment": [0.01, 21.014058], "label": "Blow-drying hair"}]}, "Szze8i9UsRo": {"duration": 225.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Szze8i9UsRo", "annotations": [{"segment": [72.36902770260471, 93.44738479563182], "label": "Doing nails"}, {"segment": [134.90148782527302, 152.11547976131044], "label": "Doing nails"}, {"segment": [176.3555908549142, 181.97648618096724], "label": "Doing nails"}]}, "U9Dcet1qdRE": {"duration": 29.33, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U9Dcet1qdRE", "annotations": [{"segment": [6.176471349453978, 20.47863906], "label": "Clean and jerk"}]}, "VqTyewe2R1A": {"duration": 166.441, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VqTyewe2R1A", "annotations": [{"segment": [2.4369756525975523, 164.72915766194336], "label": "Drum corps"}]}, "N2WxAkVh-C4": {"duration": 177.587, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N2WxAkVh-C4", "annotations": [{"segment": [90.87094838351356, 151.2668826084243], "label": "Blow-drying hair"}]}, "iaqFvTpoceI": {"duration": 194.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iaqFvTpoceI", "annotations": [{"segment": [29.49118965054602, 186.06812439313572], "label": "Cheerleading"}]}, "VzR7cskYnng": {"duration": 127.8, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=VzR7cskYnng", "annotations": [{"segment": [0.01, 127.802629], "label": "Doing kickboxing"}]}, "M0UsJ31h64U": {"duration": 224.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=M0UsJ31h64U", "annotations": [{"segment": [0.01, 0.02], "label": "Playing water polo"}]}, "gXUm7nobsfU": {"duration": 153.97, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gXUm7nobsfU", "annotations": [{"segment": [18.495801451105663, 125.62732514820593], "label": "Tango"}]}, "IBte7CKOLSs": {"duration": 161.077, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IBte7CKOLSs", "annotations": [{"segment": [24.375120223856573, 140.7223423400936], "label": "Playing ten pins"}, {"segment": [145.24556048673946, 151.5278079126365], "label": "Playing ten pins"}]}, "Iphl3YB-yYQ": {"duration": 96.944, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Iphl3YB-yYQ", "annotations": [{"segment": [0.01, 96.94285714285714], "label": "Roof shingle removal"}]}, "ktYrQ5S-JE0": {"duration": 183.879, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ktYrQ5S-JE0", "annotations": []}, "QN2wHsiBEC0": {"duration": 163.42000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QN2wHsiBEC0", "annotations": [{"segment": [126.67297191887675, 134.50284288], "label": "Playing saxophone"}, {"segment": [111.12558502340093, 120.30109204368175], "label": "Playing saxophone"}, {"segment": [8.66575663026521, 13.763260530421217], "label": "Playing saxophone"}]}, "9d8ttpHu3cc": {"duration": 23.36, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9d8ttpHu3cc", "annotations": [{"segment": [0, 21.356360374414976], "label": "Kayaking"}]}, "y6VBDPLK9Vo": {"duration": 160.22, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=y6VBDPLK9Vo", "annotations": []}, "wVahCLOq4s0": {"duration": 165.466, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wVahCLOq4s0", "annotations": [{"segment": [63.99289859594384, 88.5063073322933], "label": "Carving jack-o-lanterns"}, {"segment": [91.34470202808112, 100.90422932917316], "label": "Carving jack-o-lanterns"}, {"segment": [110.45271138845554, 131.61421216848674], "label": "Carving jack-o-lanterns"}]}, "291szrilAVE": {"duration": 186.178, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=291szrilAVE", "annotations": [{"segment": [76.3850137182001, 184.26362016679823], "label": "Powerbocking"}]}, "qOi_E_uYCIo": {"duration": 85.36, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=qOi_E_uYCIo", "annotations": [{"segment": [16.24570905013071, 77.36686005148206], "label": "Mixing drinks"}]}, "mShp2gzuPOM": {"duration": 211.79, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=mShp2gzuPOM", "annotations": []}, "_gnMuU1UJnM": {"duration": 214.181, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=_gnMuU1UJnM", "annotations": [{"segment": [54.50255409151594, 63.84203695755956], "label": "Curling"}, {"segment": [106.20326281425737, 141.2263235619209], "label": "Curling"}, {"segment": [153.56778306347857, 172.91385471456888], "label": "Curling"}]}, "MVA568rRjzQ": {"duration": 126.13, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MVA568rRjzQ", "annotations": []}, "7FtSO6hPcxU": {"duration": 222.52, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7FtSO6hPcxU", "annotations": [{"segment": [191.27421220592825, 222.516824], "label": "Getting a haircut"}]}, "-EswFKNXjMo": {"duration": 49.737, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-EswFKNXjMo", "annotations": []}, "qEpr1LBc-RE": {"duration": 144.38, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qEpr1LBc-RE", "annotations": []}, "gqK_jApRT5E": {"duration": 176.867, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gqK_jApRT5E", "annotations": [{"segment": [27.59226124147007, 72.01580184023688], "label": "Painting fence"}, {"segment": [80.01755760026319, 91.88222993409532], "label": "Painting fence"}, {"segment": [100.71175264430576, 110.36904399375975], "label": "Painting fence"}, {"segment": [120.57818056318251, 121.68187100312012], "label": "Painting fence"}]}, "PXvqhB91EkA": {"duration": 52.73, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PXvqhB91EkA", "annotations": [{"segment": [2.00123352, 40.89046411856474], "label": "Spinning"}]}, "0V8mzi_89Fw": {"duration": 119.56, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0V8mzi_89Fw", "annotations": [{"segment": [11.372170046801871, 102.36665990639625], "label": "Roof shingle removal"}]}, "tXUssW4vT3o": {"duration": 223.47, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tXUssW4vT3o", "annotations": []}, "AX2KQGWXg1E": {"duration": 186.2, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=AX2KQGWXg1E", "annotations": [{"segment": [14.07786271450858, 157.17861154446177], "label": "Doing kickboxing"}]}, "fO8b3U8fuGo": {"duration": 18.07, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fO8b3U8fuGo", "annotations": [{"segment": [0.733232449297972, 17.569377535101406], "label": "Horseback riding"}]}, "H91Dm6jaUPg": {"duration": 145.92000000000002, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H91Dm6jaUPg", "annotations": [{"segment": [37.54157557722309, 137.65244378315134], "label": "Painting"}]}, "Rokj1EIAHHk": {"duration": 39.57, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Rokj1EIAHHk", "annotations": [{"segment": [7.160188131045242, 17.283212730109206], "label": "Tumbling"}, {"segment": [23.702691744149767, 35.43058609672387], "label": "Tumbling"}]}, "_jIXs7AFTxw": {"duration": 285.815, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=_jIXs7AFTxw", "annotations": []}, "KfkNKhAWOII": {"duration": 127.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KfkNKhAWOII", "annotations": [{"segment": [56.41409044116867, 68.77336029641187], "label": "Peeling potatoes"}]}, "teemFjum3oU": {"duration": 72.447, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=teemFjum3oU", "annotations": [{"segment": [0.01, 72.44625850340137], "label": "Fun sliding down"}]}, "7ZX3NHDmMZY": {"duration": 218.97, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7ZX3NHDmMZY", "annotations": [{"segment": [0.01, 218.96417233560092], "label": "Playing water polo"}]}, "jfnFrt9nfSo": {"duration": 170.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jfnFrt9nfSo", "annotations": [{"segment": [14.903948691280984, 18.363793923185497], "label": "Layup drill in basketball"}, {"segment": [27.678761855236115, 29.009471559814774], "label": "Layup drill in basketball"}, {"segment": [43.11499442834856, 44.179562192011495], "label": "Layup drill in basketball"}, {"segment": [56.15594953321943, 58.285085060545285], "label": "Layup drill in basketball"}, {"segment": [70.26147240175321, 72.39060792907907], "label": "Layup drill in basketball"}, {"segment": [86.7622727385286, 87.82684050219152], "label": "Layup drill in basketball"}, {"segment": [101.13393754797812, 102.73078919347249], "label": "Layup drill in basketball"}, {"segment": [115.77174429834336, 117.36859594383776], "label": "Layup drill in basketball"}, {"segment": [126.1512799940569, 128.01427358046703], "label": "Layup drill in basketball"}, {"segment": [151.1686223276131, 152.76547397191888], "label": "Layup drill in basketball"}, {"segment": [149.03948680187207, 152.2331900904836], "label": "Layup drill in basketball"}]}, "gk6NAPqfJoY": {"duration": 194.955, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gk6NAPqfJoY", "annotations": [{"segment": [0.01, 194.901], "label": "Skiing"}]}, "bMFTN7aL6aQ": {"duration": 51.735, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bMFTN7aL6aQ", "annotations": []}, "BK8yiRJkfcU": {"duration": 46.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BK8yiRJkfcU", "annotations": [{"segment": [24.78154187936225, 30.63833168836958], "label": "Long jump"}]}, "FMUaey1tigI": {"duration": 107.021, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FMUaey1tigI", "annotations": [{"segment": [9.73522303464805, 99.58599492803677], "label": "Playing congas"}]}, "jF33TElZc_Y": {"duration": 188.825, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jF33TElZc_Y", "annotations": [{"segment": [11.19397426781425, 18.85300929316084], "label": "Snowboarding"}, {"segment": [24.155418156862325, 76.59035025346591], "label": "Snowboarding"}]}, "ZEChBNpLCyU": {"duration": 140.156, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=ZEChBNpLCyU", "annotations": [{"segment": [118.94644443057722, 121.35161150546021], "label": "Spread mulch"}]}, "nqB4Zn6UWdk": {"duration": 69.405, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=nqB4Zn6UWdk", "annotations": [{"segment": [0.2, 25.00313728549142], "label": "Running a marathon"}]}, "WxlJBRUU1A0": {"duration": 226.465, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WxlJBRUU1A0", "annotations": [{"segment": [28.211962616822433, 199.26458722741432], "label": "Knitting"}]}, "WZUxscN9rW8": {"duration": 127.29, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WZUxscN9rW8", "annotations": [{"segment": [2.09139113, 93.18341994851794], "label": "Plataform diving"}]}, "hx0WYKDa7PM": {"duration": 100.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hx0WYKDa7PM", "annotations": [{"segment": [39.77306942277691, 87.1256825273011], "label": "Plataform diving"}]}, "WFbUBMgOMn8": {"duration": 206.66, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WFbUBMgOMn8", "annotations": [{"segment": [28.682853354134163, 190.1447581903276], "label": "Preparing pasta"}]}, "lMYtmGRAn8k": {"duration": 201.174, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lMYtmGRAn8k", "annotations": [{"segment": [13.495247009880394, 188.6196151846074], "label": "Rope skipping"}]}, "CjoAnld43C4": {"duration": 217.386, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CjoAnld43C4", "annotations": [{"segment": [15.934979719188766, 205.46141653666146], "label": "Doing fencing"}]}, "TDZsE3yValQ": {"duration": 19.58, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TDZsE3yValQ", "annotations": [{"segment": [0.01, 18.367644224648988], "label": "Triple jump"}]}, "SfFjpnTKG7s": {"duration": 207.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SfFjpnTKG7s", "annotations": [{"segment": [0.6479852554646404, 202.1713983650546], "label": "Pole vault"}]}, "M2pxaQiBbXA": {"duration": 79.51, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=M2pxaQiBbXA", "annotations": [{"segment": [0, 79.458333], "label": "Playing water polo"}]}, "pbBJXBPrnmo": {"duration": 155.853, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pbBJXBPrnmo", "annotations": []}, "eQb6kWNJRd0": {"duration": 21.897, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=eQb6kWNJRd0", "annotations": []}, "2CegApogJd4": {"duration": 124.81, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2CegApogJd4", "annotations": []}, "wPLEmDBfgok": {"duration": 184.76, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wPLEmDBfgok", "annotations": [{"segment": [0, 183.89601083907303], "label": "Cumbia"}]}, "UMS4-qifK9A": {"duration": 53.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UMS4-qifK9A", "annotations": [{"segment": [5.760946177847114, 52.016718408736345], "label": "Playing guitarra"}]}, "8H9WqAFtn60": {"duration": 85.752, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8H9WqAFtn60", "annotations": []}, "sP416nSD4xQ": {"duration": 68.15, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sP416nSD4xQ", "annotations": [{"segment": [0.01, 65.95669763634946], "label": "Playing piano"}]}, "srSwfib_ogk": {"duration": 134.839, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=srSwfib_ogk", "annotations": [{"segment": [3.57605474014879, 134.838299], "label": "Baton twirling"}]}, "M1SUIrWykRs": {"duration": 58.306, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M1SUIrWykRs", "annotations": [{"segment": [2.4548939157566303, 51.55277223088923], "label": "Surfing"}]}, "LvRP3c5n3P8": {"duration": 230.76, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=LvRP3c5n3P8", "annotations": [{"segment": [142.12810691887677, 186.74553795163808], "label": "Washing hands"}]}, "hghdjiQlYko": {"duration": 141.967, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=hghdjiQlYko", "annotations": [{"segment": [15.281917736349454, 109.40967190951638], "label": "Snow tubing"}]}, "0Ffvglar63w": {"duration": 66.827, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0Ffvglar63w", "annotations": []}, "7qeiQKbWpLM": {"duration": 34.92, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7qeiQKbWpLM", "annotations": [{"segment": [2.833051750390016, 33.397321595943836], "label": "Using uneven bars"}]}, "yKbRi1SZilA": {"duration": 157.15, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yKbRi1SZilA", "annotations": []}, "bwcgMTHFRV8": {"duration": 136.696, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=bwcgMTHFRV8", "annotations": [{"segment": [76.986488299532, 123.2636848673947], "label": "Drum corps"}]}, "9FAPy8NVDgY": {"duration": 158.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9FAPy8NVDgY", "annotations": [{"segment": [10.648104294239797, 149.81635009360375], "label": "Preparing pasta"}]}, "5P-Xs2v6lis": {"duration": 217.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5P-Xs2v6lis", "annotations": [{"segment": [0.01, 131.76621297871955], "label": "Roof shingle removal"}, {"segment": [143.64009927155442, 207.08057746470084], "label": "Roof shingle removal"}]}, "XzYtm5WdAE8": {"duration": 31.741, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=XzYtm5WdAE8", "annotations": [{"segment": [0.01, 28.077131471138845], "label": "Cutting the grass"}]}, "lsIYvscKoPo": {"duration": 164.47, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lsIYvscKoPo", "annotations": []}, "IgAE9XJVIlk": {"duration": 106.12, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IgAE9XJVIlk", "annotations": [{"segment": [22.26589949448318, 89.14637083850701], "label": "Breakdancing"}]}, "kXbc9D0sF5k": {"duration": 191.426, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kXbc9D0sF5k", "annotations": [{"segment": [28.957904836193446, 37.31688767550702], "label": "Skiing"}, {"segment": [39.70516848673947, 48.661221528861155], "label": "Skiing"}, {"segment": [56.721669266770675, 60.00555538221528], "label": "Skiing"}, {"segment": [63.58797659906396, 68.0660031201248], "label": "Skiing"}, {"segment": [75.5293806552262, 90.15760062402495], "label": "Skiing"}, {"segment": [100.90486427457097, 109.56238221528862], "label": "Skiing"}, {"segment": [111.652127925117, 125.68327769110763], "label": "Skiing"}, {"segment": [138.8188221528861, 142.10270826833073], "label": "Skiing"}, {"segment": [151.6716817472699, 153.46307956318253], "label": "Skiing"}, {"segment": [163.61433385335414, 178.24408268330734], "label": "Skiing"}, {"segment": [181.8268783151326, 183.61827613104523], "label": "Skiing"}]}, "uegTBtD59R0": {"duration": 204.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uegTBtD59R0", "annotations": [{"segment": [34.31966848673947, 109.02480733229329], "label": "Doing motocross"}, {"segment": [111.57882917316692, 169.36357332293292], "label": "Doing motocross"}, {"segment": [175.4293751950078, 181.81442979719188], "label": "Doing motocross"}]}, "snPtOKtIEA4": {"duration": 72.59, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=snPtOKtIEA4", "annotations": [{"segment": [6.506948748049922, 72.538333], "label": "Layup drill in basketball"}]}, "u10c6Nx4K0A": {"duration": 5.99, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=u10c6Nx4K0A", "annotations": [{"segment": [0.1520861, 1.1560101], "label": "Pole vault"}, {"segment": [1.429813, 4.5936711], "label": "Pole vault"}, {"segment": [0, 5.743333], "label": "Pole vault"}]}, "9ZQY-ZfimYo": {"duration": 172.99, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9ZQY-ZfimYo", "annotations": [{"segment": [18.935179431812962, 29.174383732432485], "label": "Rock climbing"}, {"segment": [70.67010642441686, 77.67587778799863], "label": "Rock climbing"}, {"segment": [143.4223475077661, 147.73359142381642], "label": "Rock climbing"}]}, "lfGmSfTjnA8": {"duration": 160.497, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lfGmSfTjnA8", "annotations": [{"segment": [9.26421844773791, 151.23210655226208], "label": "Peeling potatoes"}]}, "8R8r_ohAdPw": {"duration": 100.47, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8R8r_ohAdPw", "annotations": []}, "2xmu01HHhKY": {"duration": 49.598, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2xmu01HHhKY", "annotations": [{"segment": [3.4045306833073323, 19.576051429017163], "label": "Playing congas"}]}, "hKef3llID08": {"duration": 27.632, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=hKef3llID08", "annotations": []}, "l-gHWS0oXiw": {"duration": 106.627, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l-gHWS0oXiw", "annotations": [{"segment": [6.321043959438378, 16.800669471138846], "label": "Spread mulch"}, {"segment": [24.78514605148206, 29.77544391419657], "label": "Spread mulch"}, {"segment": [54.56058996567862, 74.0227516302652], "label": "Spread mulch"}, {"segment": [88.82730195631825, 99.14058420592823], "label": "Spread mulch"}]}, "vqqoDYma9F8": {"duration": 154.23, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vqqoDYma9F8", "annotations": [{"segment": [62.5064010445069, 65.86860353671874], "label": "Cleaning windows"}, {"segment": [111.01817986070627, 117.02211288251313], "label": "Cleaning windows"}]}, "XklHV0q3w40": {"duration": 236.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XklHV0q3w40", "annotations": [{"segment": [14.002080719963494, 222.92786296411856], "label": "Playing saxophone"}]}, "lca7DhtJjF8": {"duration": 67.52, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lca7DhtJjF8", "annotations": []}, "HhnveTWVWQ8": {"duration": 229.576, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HhnveTWVWQ8", "annotations": []}, "mtQUYdj2Ecg": {"duration": 23.59, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=mtQUYdj2Ecg", "annotations": [{"segment": [10.010647263650545, 11.44599742277691], "label": "Clean and jerk"}]}, "OgaurUPiWYI": {"duration": 98.082, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=OgaurUPiWYI", "annotations": [{"segment": [11.777156006240249, 77.85158970358815], "label": "BMX"}]}, "NKf6zbwP-wU": {"duration": 111.39, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=NKf6zbwP-wU", "annotations": []}, "h4SzYWJUqVQ": {"duration": 174.661, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h4SzYWJUqVQ", "annotations": [{"segment": [0.01, 174.621], "label": "Installing carpet"}]}, "dxvKVBrNMDw": {"duration": 117.54, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dxvKVBrNMDw", "annotations": [{"segment": [31.340485179407178, 71.1135600624025], "label": "Hula hoop"}]}, "ZC65qkYMy9Q": {"duration": 52.524, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ZC65qkYMy9Q", "annotations": [{"segment": [0.2, 17.609539781591263], "label": "Shuffleboard"}, {"segment": [21.6228767550702, 45.53908892355694], "label": "Shuffleboard"}]}, "PSB1nM3QXxg": {"duration": 38.8, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=PSB1nM3QXxg", "annotations": []}, "l2MB-KxbVEs": {"duration": 43.515, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=l2MB-KxbVEs", "annotations": [{"segment": [0.2, 40.866684536661474], "label": "Calf roping"}]}, "dR3hrw9dVdw": {"duration": 170.11, "subset": "validation", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=dR3hrw9dVdw", "annotations": [{"segment": [0.01, 165.5979055975039], "label": "Playing piano"}]}, "yPsC9mVpGmk": {"duration": 95.574, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yPsC9mVpGmk", "annotations": []}, "O0-CRPl0TR0": {"duration": 209.12, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=O0-CRPl0TR0", "annotations": [{"segment": [14.744029962498079, 180.83641314006817], "label": "Ping-pong"}]}, "G_npKjzxQLk": {"duration": 95.759, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G_npKjzxQLk", "annotations": [{"segment": [3.1274331123066124, 82.20687069760511], "label": "Scuba diving"}, {"segment": [93.9719847639678, 95.16338872005515], "label": "Scuba diving"}]}, "sxZTPWMnyFY": {"duration": 198.46, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sxZTPWMnyFY", "annotations": [{"segment": [0, 198.401], "label": "Snatch"}]}, "ZG4ApSk5iKw": {"duration": 147.749, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZG4ApSk5iKw", "annotations": [{"segment": [4.838092043681748, 36.40088299531981], "label": "Doing crunches"}, {"segment": [69.57778471138846, 99.06770670826833], "label": "Doing crunches"}]}, "iPk4GeFFcTQ": {"duration": 78.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iPk4GeFFcTQ", "annotations": [{"segment": [5.601719251170047, 21.797994477379095], "label": "Javelin throw"}]}, "ll91M5topgU": {"duration": 10.31, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ll91M5topgU", "annotations": [{"segment": [0.2, 10.061], "label": "Blowing leaves"}]}, "ngxs6ngJR4k": {"duration": 228.694, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ngxs6ngJR4k", "annotations": [{"segment": [0.01, 211.02481343576062], "label": "Assembling bicycle"}]}, "28QW8P1x_LE": {"duration": 192.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=28QW8P1x_LE", "annotations": [{"segment": [0, 189.55897775663027], "label": "Zumba"}]}, "3NSikmqK3Ys": {"duration": 67.59, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3NSikmqK3Ys", "annotations": [{"segment": [53.990270587694255, 56.83741376321719], "label": "Walking the dog"}]}, "OMYnLCWTdEA": {"duration": 143.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OMYnLCWTdEA", "annotations": [{"segment": [58.24695018999999, 127.13105855999997], "label": "Breakdancing"}]}, "EnZgkrQFcnA": {"duration": 103.724, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EnZgkrQFcnA", "annotations": []}, "L2XwEKLfqfk": {"duration": 207.84, "subset": "testing", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=L2XwEKLfqfk", "annotations": []}, "9H8OBfrm1Rw": {"duration": 118.47, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9H8OBfrm1Rw", "annotations": [{"segment": [10.904250091092079, 111.07549669061594], "label": "Playing lacrosse"}]}, "BkBbzC6nIvA": {"duration": 226.42000000000002, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BkBbzC6nIvA", "annotations": [{"segment": [13.772354134165367, 219.651391575663], "label": "Spinning"}]}, "PD9e9MVHEyU": {"duration": 111.46000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PD9e9MVHEyU", "annotations": [{"segment": [16.170638564318082, 103.63118758190328], "label": "Painting"}]}, "HfjvlIAEsJY": {"duration": 192.517, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HfjvlIAEsJY", "annotations": [{"segment": [15.467405954809486, 26.579911203895907], "label": "Assembling bicycle"}, {"segment": [34.68903665593195, 59.016413012040076], "label": "Assembling bicycle"}, {"segment": [127.79381036449391, 171.6431554014295], "label": "Assembling bicycle"}]}, "giXgqmNJJ2E": {"duration": 151.51, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=giXgqmNJJ2E", "annotations": []}, "PLqTX6ij52U": {"duration": 147.47, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PLqTX6ij52U", "annotations": [{"segment": [69.01868891082174, 121.70295477941566], "label": "Assembling bicycle"}]}, "T9jiC2SRLJQ": {"duration": 70.636, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T9jiC2SRLJQ", "annotations": [{"segment": [19.993137626162437, 21.531071418652996], "label": "Playing pool"}, {"segment": [23.728119693639506, 25.485758313628715], "label": "Playing pool"}, {"segment": [28.232068657361857, 28.891183139857805], "label": "Playing pool"}, {"segment": [33.83454175857746, 35.2626226177199], "label": "Playing pool"}, {"segment": [42.842439054733894, 44.490225236693455], "label": "Playing pool"}, {"segment": [48.884321721918965, 50.092698255355984], "label": "Playing pool"}]}, "IwzMVj0JgJo": {"duration": 21.99, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IwzMVj0JgJo", "annotations": []}, "0ecdkXfxQUA": {"duration": 9.242, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0ecdkXfxQUA", "annotations": []}, "sCxGclun1E0": {"duration": 110.063, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sCxGclun1E0", "annotations": [{"segment": [0.2, 102.33588153510141], "label": "Baking cookies"}]}, "hN9diyaOK4E": {"duration": 94.227, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hN9diyaOK4E", "annotations": [{"segment": [3.226700934579439, 90.7876308411215], "label": "Playing rubik cube"}]}, "yYvDJjIxvS4": {"duration": 82.94, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yYvDJjIxvS4", "annotations": []}, "vB3VWKEXLuY": {"duration": 75.674, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vB3VWKEXLuY", "annotations": []}, "VjwKZEMHlpE": {"duration": 11.03, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=VjwKZEMHlpE", "annotations": []}, "j18sB8o2IQw": {"duration": 15.09, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=j18sB8o2IQw", "annotations": [{"segment": [0.01, 10.101222622464899], "label": "Plataform diving"}]}, "26j3-w5G1fU": {"duration": 47.97, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=26j3-w5G1fU", "annotations": []}, "vjUx3k63oZI": {"duration": 72.93, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vjUx3k63oZI", "annotations": [{"segment": [59.042120124804995, 71.21458034321373], "label": "Bungee jumping"}]}, "xhBvsWa0PCs": {"duration": 25.38, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xhBvsWa0PCs", "annotations": [{"segment": [1.5045206614664588, 5.028266421216849], "label": "Calf roping"}, {"segment": [7.522603307332294, 11.561053503900157], "label": "Calf roping"}, {"segment": [13.42190800624025, 18.846100917316694], "label": "Calf roping"}, {"segment": [20.033880386895476, 25.178888], "label": "Calf roping"}]}, "3HP2E_a9xrU": {"duration": 213.322, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3HP2E_a9xrU", "annotations": [{"segment": [5.988608424336974, 211.28460998439937], "label": "Hitting a pinata"}]}, "OBbHkBVDX8Y": {"duration": 172.85, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OBbHkBVDX8Y", "annotations": [{"segment": [0, 172.781], "label": "Playing saxophone"}]}, "pocqDciTABA": {"duration": 32.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pocqDciTABA", "annotations": []}, "Zob68l3snFo": {"duration": 155.876, "subset": "training", "resolution": "1920x1280", "url": "https://www.youtube.com/watch?v=Zob68l3snFo", "annotations": [{"segment": [19.75674398405604, 149.62677980964875], "label": "Scuba diving"}]}, "4VdS1eN3dv4": {"duration": 55.43, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4VdS1eN3dv4", "annotations": []}, "Lo848n58uoM": {"duration": 127.01, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Lo848n58uoM", "annotations": [{"segment": [1.1888861840873637, 104.4238365023401], "label": "Walking the dog"}]}, "jeaaS1NK_d4": {"duration": 186.225, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jeaaS1NK_d4", "annotations": [{"segment": [0.01, 177.50805552574104], "label": "Arm wrestling"}]}, "cQYAi2drreo": {"duration": 71.936, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cQYAi2drreo", "annotations": [{"segment": [0.01, 41.83942745709828], "label": "Carving jack-o-lanterns"}]}, "Lzvtnr4gT8Y": {"duration": 85.66, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Lzvtnr4gT8Y", "annotations": [{"segment": [6.681625224192642, 35.54624619270485], "label": "Shaving legs"}, {"segment": [60.401892026701475, 62.54001197129485], "label": "Shaving legs"}]}, "j73Ks4r5tbg": {"duration": 186.32, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=j73Ks4r5tbg", "annotations": [{"segment": [83.98132605304212, 186.27], "label": "Smoking a cigarette"}]}, "5ZmZr2bguy0": {"duration": 43.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5ZmZr2bguy0", "annotations": [{"segment": [5.208921996879876, 41.50114976599064], "label": "Shot put"}]}, "QyYbyFXmd-g": {"duration": 205.776, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=QyYbyFXmd-g", "annotations": [{"segment": [22.14469422776911, 143.7800436817473], "label": "Tug of war"}, {"segment": [153.40817160686427, 193.22321684867396], "label": "Tug of war"}]}, "xj-f9hzQ3Lc": {"duration": 54.196, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xj-f9hzQ3Lc", "annotations": [{"segment": [20.05987239042649, 51.83538657549647], "label": "BMX"}]}, "mvl6wff267o": {"duration": 150.094, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mvl6wff267o", "annotations": [{"segment": [9.366226833073323, 19.20076500780031], "label": "Running a marathon"}]}, "FRzN9ApCxW8": {"duration": 67.71, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FRzN9ApCxW8", "annotations": [{"segment": [0.2, 54.505527575663024], "label": "Cutting the grass"}]}, "FeKEqUxiReA": {"duration": 236.635, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=FeKEqUxiReA", "annotations": [{"segment": [16.610054602184086, 236.601], "label": "Rope skipping"}]}, "kIDCXgNy8iI": {"duration": 36.48, "subset": "testing", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=kIDCXgNy8iI", "annotations": []}, "E_8IuayRJ2w": {"duration": 102.447, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E_8IuayRJ2w", "annotations": []}, "ibjvKk93__g": {"duration": 102.029, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ibjvKk93__g", "annotations": [{"segment": [12.096967111337516, 18.145450667006273], "label": "Elliptical trainer"}, {"segment": [12.096967111337516, 18.145450667006273], "label": "Elliptical trainer"}]}, "B-KSdR2ct00": {"duration": 39.66, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=B-KSdR2ct00", "annotations": [{"segment": [0.01, 32.186583463338536], "label": "Brushing teeth"}]}, "I7slpTgl0jc": {"duration": 20.16, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I7slpTgl0jc", "annotations": []}, "Cj_r6t5lLx8": {"duration": 207.45, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cj_r6t5lLx8", "annotations": []}, "Hg1tl2hFWGc": {"duration": 29.56, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Hg1tl2hFWGc", "annotations": [{"segment": [0, 29.461], "label": "Putting on makeup"}]}, "tn8YmTHinZc": {"duration": 237.308, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tn8YmTHinZc", "annotations": [{"segment": [18.94623135229806, 222.5594734933443], "label": "Making a lemonade"}]}, "7CNEttu-t7g": {"duration": 88.422, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7CNEttu-t7g", "annotations": [{"segment": [38.390855694227774, 45.69683541341654], "label": "Using the rowing machine"}, {"segment": [45.845058502340095, 47.637496879875194], "label": "Using the rowing machine"}]}, "GffGGAwbcHo": {"duration": 54.85, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=GffGGAwbcHo", "annotations": [{"segment": [17.44693291731669, 26.59802028081123], "label": "Walking the dog"}, {"segment": [29.762414976599064, 32.67023712948518], "label": "Walking the dog"}]}, "PAF5AIQNXhs": {"duration": 68.802, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PAF5AIQNXhs", "annotations": [{"segment": [0.2, 65.91173551214808], "label": "Swinging at the playground"}]}, "waFqh-Qkafc": {"duration": 52.315, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=waFqh-Qkafc", "annotations": [{"segment": [4.800964174454829, 44.42926168224299], "label": "Carving jack-o-lanterns"}]}, "lHsPZ49cpG8": {"duration": 103.56, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lHsPZ49cpG8", "annotations": [{"segment": [0.01, 103.513333], "label": "Playing polo"}]}, "FLImHIKzzm4": {"duration": 152.57999999999998, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FLImHIKzzm4", "annotations": [{"segment": [35.09254725, 80.5801845], "label": "Playing water polo"}]}, "cw0HRDIQ10I": {"duration": 38.71, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cw0HRDIQ10I", "annotations": [{"segment": [9.540839613104524, 12.801632898595942], "label": "Clean and jerk"}, {"segment": [20.470535625585022, 26.207116405616222], "label": "Clean and jerk"}]}, "_H-qWt56giQ": {"duration": 126.642, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_H-qWt56giQ", "annotations": []}, "SsAmEJvdpyU": {"duration": 82.849, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=SsAmEJvdpyU", "annotations": [{"segment": [7.4964591182286755, 82.84879818594105], "label": "Rollerblading"}]}, "JB-ynj70saA": {"duration": 35.759, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JB-ynj70saA", "annotations": []}, "r-rNYo8ju4k": {"duration": 9.776, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=r-rNYo8ju4k", "annotations": [{"segment": [0, 9.741], "label": "Applying sunscreen"}]}, "PIJd7_6RGpY": {"duration": 79.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PIJd7_6RGpY", "annotations": [{"segment": [11.350159126365055, 79.081], "label": "Starting a campfire"}]}, "CuyMHCcxqNc": {"duration": 159.614, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CuyMHCcxqNc", "annotations": [{"segment": [4.731147257410297, 154.6338129921997], "label": "Making a cake"}]}, "WN5EWPfDbog": {"duration": 170.551, "subset": "validation", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=WN5EWPfDbog", "annotations": [{"segment": [0.01, 170.55056689342405], "label": "Playing blackjack"}]}, "rBMQFpHspmo": {"duration": 144.31, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rBMQFpHspmo", "annotations": [{"segment": [57.732015600000004, 143.47728159126365], "label": "Pole vault"}, {"segment": [3.713529641185647, 42.7307832], "label": "Pole vault"}]}, "Bule85koN3o": {"duration": 48.51, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Bule85koN3o", "annotations": [{"segment": [7.340294770670827, 9.988854739469579], "label": "Shot put"}, {"segment": [23.004635157566305, 24.669444280811234], "label": "Shot put"}]}, "d0h_l9bE1o0": {"duration": 216.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d0h_l9bE1o0", "annotations": [{"segment": [10.134912498540759, 19.594164163845466], "label": "Cheerleading"}, {"segment": [51.01239290932181, 53.039375409029965], "label": "Cheerleading"}, {"segment": [163.50992164312424, 172.6313428918109], "label": "Cheerleading"}]}, "Aoo_Wt4TegA": {"duration": 26.703, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Aoo_Wt4TegA", "annotations": []}, "Fvc0-yNMFyE": {"duration": 118.028, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=Fvc0-yNMFyE", "annotations": [{"segment": [0.2, 108.48207250516198], "label": "Riding bumper cars"}]}, "nYl_tYmijlM": {"duration": 185.667, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nYl_tYmijlM", "annotations": []}, "G4ir4oKYhMg": {"duration": 224.49, "subset": "testing", "resolution": "352x288", "url": "https://www.youtube.com/watch?v=G4ir4oKYhMg", "annotations": []}, "IYbnF7LM2II": {"duration": 139.34, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=IYbnF7LM2II", "annotations": [{"segment": [16.52116074302836, 92.6054536385537], "label": "Playing racquetball"}]}, "ywJQotAB3dw": {"duration": 162.587, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ywJQotAB3dw", "annotations": [{"segment": [11.66763546188106, 153.70841337285492], "label": "Futsal"}]}, "YGl_GO6ezh4": {"duration": 73.35, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=YGl_GO6ezh4", "annotations": []}, "S4ppfXBp5o0": {"duration": 140.16, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=S4ppfXBp5o0", "annotations": [{"segment": [14.435784223205662, 36.08946055801415], "label": "Clean and jerk"}, {"segment": [48.55672875078268, 50.08779677445601], "label": "Clean and jerk"}, {"segment": [75.6785045397816, 81.80277658346336], "label": "Clean and jerk"}, {"segment": [110.0181727847114, 115.26754882215289], "label": "Clean and jerk"}, {"segment": [127.51609290951639, 131.23440093603745], "label": "Clean and jerk"}]}, "JW3hvO3OFlU": {"duration": 197.35, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JW3hvO3OFlU", "annotations": []}, "yINX46xPRf0": {"duration": 159.986, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=yINX46xPRf0", "annotations": [{"segment": [31.435329173166924, 147.96413884555383], "label": "Scuba diving"}]}, "nyjtOdSyHn4": {"duration": 115.984, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=nyjtOdSyHn4", "annotations": [{"segment": [38.17872057195213, 68.75788539024555], "label": "Hitting a pinata"}]}, "eGW0rygfcrg": {"duration": 92.22999999999999, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=eGW0rygfcrg", "annotations": [{"segment": [0.8633028749721418, 91.51010474704702], "label": "Tango"}]}, "SDiDPAiCIdU": {"duration": 72.052, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SDiDPAiCIdU", "annotations": []}, "Kq8g8jjodvc": {"duration": 49.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Kq8g8jjodvc", "annotations": [{"segment": [0.01, 49.713921], "label": "Snatch"}]}, "cKMGacBQX0E": {"duration": 57.17, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cKMGacBQX0E", "annotations": [{"segment": [8.394608600276637, 46.77343124357882], "label": "Using the pommel horse"}]}, "kH8YpzoflKg": {"duration": 115.171, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kH8YpzoflKg", "annotations": [{"segment": [0.01, 9.163344004680187], "label": "Snowboarding"}, {"segment": [20.123422127925117, 111.93653558658346], "label": "Snowboarding"}]}, "RWir3muDHg0": {"duration": 81.061, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RWir3muDHg0", "annotations": [{"segment": [4.1731872074883, 81.060906], "label": "Tug of war"}]}, "E50qKeeMbgU": {"duration": 70.705, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=E50qKeeMbgU", "annotations": [{"segment": [11.798544461778471, 70.44052886115445], "label": "Elliptical trainer"}]}, "U-ApHGUtLMA": {"duration": 67.617, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U-ApHGUtLMA", "annotations": [{"segment": [23.20691380036154, 53.16492906084243], "label": "Calf roping"}]}, "Wgr9YpWni30": {"duration": 163.864, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Wgr9YpWni30", "annotations": [{"segment": [8.180380205928238, 163.6632426303855], "label": "Slacklining"}]}, "ng14GLT_hHQ": {"duration": 153.35, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ng14GLT_hHQ", "annotations": [{"segment": [2.5114820592823714, 132.39098283931358], "label": "Painting"}]}, "Ly21QMQ4kGA": {"duration": 37.036, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Ly21QMQ4kGA", "annotations": [{"segment": [26.668097780525883, 29.37522658612464], "label": "Beer pong"}]}, "fKFcbNM89MA": {"duration": 30.0, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fKFcbNM89MA", "annotations": [{"segment": [0.2, 29.493589703588142], "label": "River tubing"}]}, "O9K2pEzSOG0": {"duration": 13.143, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O9K2pEzSOG0", "annotations": []}, "atGMJc-DZ7w": {"duration": 82.73, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=atGMJc-DZ7w", "annotations": []}, "aTrEYoCRL2k": {"duration": 236.147, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aTrEYoCRL2k", "annotations": [{"segment": [30.48236148912322, 199.42078016125961], "label": "Blowing leaves"}]}, "2OEa00knM9E": {"duration": 51.502, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2OEa00knM9E", "annotations": [{"segment": [2.1676240249609986, 41.66655070202808], "label": "Calf roping"}]}, "WIarYDce6rc": {"duration": 234.17000000000002, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WIarYDce6rc", "annotations": []}, "F1MKU9-yXg0": {"duration": 121.069, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F1MKU9-yXg0", "annotations": [{"segment": [49.331313773928585, 96.59146827159577], "label": "Spread mulch"}]}, "fKDl_CnA8nY": {"duration": 128.92, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fKDl_CnA8nY", "annotations": [{"segment": [0, 6.636922325872627], "label": "Playing saxophone"}, {"segment": [59.12894435777431, 113.02879733853355], "label": "Playing saxophone"}]}, "8dXbbJWFEJo": {"duration": 38.012, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8dXbbJWFEJo", "annotations": [{"segment": [0, 37.981], "label": "Canoeing"}]}, "-MB6Wxglgzw": {"duration": 196.604, "subset": "validation", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=-MB6Wxglgzw", "annotations": [{"segment": [9.18707271070422, 196.603355], "label": "Playing beach volleyball"}]}, "d8M7yIj9jbE": {"duration": 64.111, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=d8M7yIj9jbE", "annotations": [{"segment": [7.20112560374415, 56.2087859625585], "label": "Playing pool"}]}, "HVfr9FnEj-Q": {"duration": 188.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HVfr9FnEj-Q", "annotations": [{"segment": [0.01, 188.70857142857142], "label": "Belly dance"}]}, "6diEN8nUqsI": {"duration": 121.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6diEN8nUqsI", "annotations": [{"segment": [0.2, 99.02866510903428], "label": "Ice fishing"}]}, "XK6Ldhzbc90": {"duration": 224.05, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XK6Ldhzbc90", "annotations": []}, "c_MMN0233ZY": {"duration": 64.97, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c_MMN0233ZY", "annotations": []}, "U-VzZQGWOqA": {"duration": 59.814, "subset": "training", "resolution": "352x262", "url": "https://www.youtube.com/watch?v=U-VzZQGWOqA", "annotations": [{"segment": [0.01, 52.95735181942342], "label": "Having an ice cream"}]}, "rlWN8PZMbfY": {"duration": 171.341, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rlWN8PZMbfY", "annotations": [{"segment": [58.09040011225459, 111.65081864358626], "label": "Putting in contact lenses"}]}, "LDSEEw5oUM4": {"duration": 155.087, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LDSEEw5oUM4", "annotations": [{"segment": [11.1261872074883, 147.82775731045243], "label": "Baton twirling"}]}, "KT2-cbD1tfk": {"duration": 73.329, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KT2-cbD1tfk", "annotations": []}, "eL4Ce4WPI0Y": {"duration": 39.1, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=eL4Ce4WPI0Y", "annotations": [{"segment": [13.359478998439938, 23.24183332605304], "label": "Shot put"}]}, "rwUlqI6HWyY": {"duration": 201.619, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=rwUlqI6HWyY", "annotations": [{"segment": [24.21247581903276, 161.64217472698905], "label": "Making an omelette"}]}, "bH-S32gOlCA": {"duration": 218.547, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=bH-S32gOlCA", "annotations": [{"segment": [48.40250546021841, 210.3122948517941], "label": "Playing ice hockey"}]}, "yjUMopRteDc": {"duration": 87.31, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=yjUMopRteDc", "annotations": [{"segment": [8.8791219, 47.70105252], "label": "Getting a piercing"}]}, "gpJ7veSnhUs": {"duration": 216.039, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gpJ7veSnhUs", "annotations": [{"segment": [30.616964174454825, 190.76723831775703], "label": "Hurling"}]}, "GycRQhoHnXw": {"duration": 235.01, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GycRQhoHnXw", "annotations": [{"segment": [8.43246599523845, 218.51085796357026], "label": "Belly dance"}]}, "134qxRztr2s": {"duration": 168.6, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=134qxRztr2s", "annotations": []}, "Z4biLo6k5fI": {"duration": 136.16, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Z4biLo6k5fI", "annotations": [{"segment": [49.2816550104181, 135.96181405895692], "label": "Bathing dog"}]}, "z3xkE5Ox-2A": {"duration": 166.929, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=z3xkE5Ox-2A", "annotations": [{"segment": [7.349147730556975, 19.046343992239215], "label": "Carving jack-o-lanterns"}, {"segment": [63.75562748133579, 81.17145302650712], "label": "Carving jack-o-lanterns"}]}, "TspdPLMqTx0": {"duration": 169.0, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TspdPLMqTx0", "annotations": [{"segment": [3.2125759400000002, 14.89574817550525], "label": "Playing violin"}, {"segment": [28.605109328182646, 32.82337429823723], "label": "Playing violin"}, {"segment": [38.8871301926907, 42.84175360211687], "label": "Playing violin"}, {"segment": [46.26909389028623, 77.6424396050672], "label": "Playing violin"}, {"segment": [78.96064740820925, 141.9709803983996], "label": "Playing violin"}]}, "TjR436qaQw4": {"duration": 150.21, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TjR436qaQw4", "annotations": [{"segment": [41.34859619436867, 109.09488958472636], "label": "Carving jack-o-lanterns"}]}, "K7rfN2W0ts4": {"duration": 181.72, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=K7rfN2W0ts4", "annotations": [{"segment": [0.01, 174.37147662842946], "label": "Playing blackjack"}]}, "BSIV5tJ3bv4": {"duration": 27.4, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BSIV5tJ3bv4", "annotations": [{"segment": [0.14897295709828393, 25.04873864352574], "label": "Clean and jerk"}]}, "6Z4Qg_fNo0Q": {"duration": 203.732, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6Z4Qg_fNo0Q", "annotations": [{"segment": [0.2, 84.21336193447738], "label": "Scuba diving"}, {"segment": [92.15801872074883, 102.00939313572542], "label": "Scuba diving"}, {"segment": [109.95404992199688, 185.58718252730108], "label": "Scuba diving"}, {"segment": [194.48519812792512, 201.79428237129486], "label": "Scuba diving"}]}, "hSnTYQ9osIg": {"duration": 129.92, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hSnTYQ9osIg", "annotations": [{"segment": [1.2160590913432456, 112.89081884196568], "label": "Tango"}]}, "bDTQVC0ASbE": {"duration": 193.725, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=bDTQVC0ASbE", "annotations": [{"segment": [27.496054602184092, 178.28953198127925], "label": "Waxing skis"}]}, "gUNBEOi-Fug": {"duration": 85.682, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gUNBEOi-Fug", "annotations": []}, "UJGK_TKrl4E": {"duration": 93.113, "subset": "testing", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=UJGK_TKrl4E", "annotations": []}, "H1bmoIihWwo": {"duration": 54.1, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H1bmoIihWwo", "annotations": [{"segment": [39.04082480044878, 40.641988923501735], "label": "Springboard diving"}]}, "Gn651vGKC2I": {"duration": 99.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gn651vGKC2I", "annotations": [{"segment": [0, 99.938048], "label": "Getting a tattoo"}]}, "HkbOI7gEtKk": {"duration": 129.06, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HkbOI7gEtKk", "annotations": []}, "oNc2lJ9hV9E": {"duration": 190.799, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=oNc2lJ9hV9E", "annotations": [{"segment": [9.331883151923991, 173.68000838245356], "label": "Making a cake"}]}, "6LsvBNqJuJY": {"duration": 149.57999999999998, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6LsvBNqJuJY", "annotations": []}, "5k0KMrksf8c": {"duration": 9.68, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5k0KMrksf8c", "annotations": [{"segment": [0.09722958580343215, 9.588333], "label": "Playing lacrosse"}]}, "IjFlVNAWQec": {"duration": 94.273, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IjFlVNAWQec", "annotations": [{"segment": [4.706298308892356, 42.94497206864274], "label": "Surfing"}, {"segment": [46.76883944461778, 54.857789663026516], "label": "Surfing"}, {"segment": [60.15237526053043, 74.41834200936037], "label": "Surfing"}, {"segment": [77.06563480811232, 86.62530324804992], "label": "Surfing"}, {"segment": [90.0079551575663, 94.073038], "label": "Surfing"}]}, "_79Qlv1SQ9k": {"duration": 120.094, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_79Qlv1SQ9k", "annotations": [{"segment": [0.01, 77.4973186654183], "label": "Snow tubing"}]}, "V5MvrOzQWZo": {"duration": 94.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V5MvrOzQWZo", "annotations": [{"segment": [5.289368015536948, 90.21310999063964], "label": "Using the balance beam"}]}, "ucHq8B0-1BA": {"duration": 208.86, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ucHq8B0-1BA", "annotations": [{"segment": [0.01, 10.261333853354135], "label": "Grooming horse"}, {"segment": [74.10963338533541, 202.4577457098284], "label": "Grooming horse"}]}, "X2toGKgWMpE": {"duration": 30.094, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X2toGKgWMpE", "annotations": []}, "rlBlDYBLTn0": {"duration": 229.994, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rlBlDYBLTn0", "annotations": []}, "YmAIoN4uuz0": {"duration": 146.936, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YmAIoN4uuz0", "annotations": [{"segment": [34.1551405294307, 67.16413493915756], "label": "Scuba diving"}, {"segment": [84.58554878003119, 91.92088092355694], "label": "Scuba diving"}]}, "zbaxx7CTOSA": {"duration": 213.647, "subset": "testing", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=zbaxx7CTOSA", "annotations": []}, "qAZStAHJ3CQ": {"duration": 129.661, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qAZStAHJ3CQ", "annotations": [{"segment": [35.80235758829568, 83.06146845103716], "label": "Hula hoop"}]}, "vNdBI3RN7M8": {"duration": 43.75, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vNdBI3RN7M8", "annotations": []}, "2QdSai6J_ug": {"duration": 58.492, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2QdSai6J_ug", "annotations": []}, "T62XjoKbN8M": {"duration": 83.267, "subset": "testing", "resolution": "855x470", "url": "https://www.youtube.com/watch?v=T62XjoKbN8M", "annotations": []}, "H0dG_1Vgw4Q": {"duration": 20.32, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H0dG_1Vgw4Q", "annotations": []}, "kM6v-XN8Ixo": {"duration": 110.527, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=kM6v-XN8Ixo", "annotations": [{"segment": [8.82713394093359, 20.017560682762202], "label": "Disc dog"}, {"segment": [22.944287442828372, 36.3727993960994], "label": "Disc dog"}, {"segment": [48.59618848176918, 93.87437622164457], "label": "Disc dog"}]}, "JK8pYBpatAY": {"duration": 28.468, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JK8pYBpatAY", "annotations": [{"segment": [0.01, 28.421], "label": "Kneeling"}]}, "XwW5p80hYCg": {"duration": 207.471, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XwW5p80hYCg", "annotations": [{"segment": [16.933713026926114, 36.64662265599061], "label": "Hand car wash"}, {"segment": [51.51209549102286, 57.97534454973253], "label": "Hand car wash"}, {"segment": [61.20696907908737, 65.73124342018414], "label": "Hand car wash"}, {"segment": [68.96286794953897, 78.65774153760348], "label": "Hand car wash"}, {"segment": [86.4136404080551, 137.796470424797], "label": "Hand car wash"}, {"segment": [142.3207435417049, 149.10715499497593], "label": "Hand car wash"}, {"segment": [178.51493795915036, 184.97818696226562], "label": "Hand car wash"}, {"segment": [206.306908672546, 207.270293], "label": "Hand car wash"}]}, "e9R62cyqW4A": {"duration": 67.663, "subset": "training", "resolution": "420x240", "url": "https://www.youtube.com/watch?v=e9R62cyqW4A", "annotations": [{"segment": [11.399307332293292, 66.28486115444618], "label": "Camel ride"}]}, "mRyXubjYWEo": {"duration": 104.003, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mRyXubjYWEo", "annotations": [{"segment": [5.181856697819314, 95.05468380062305], "label": "Capoeira"}]}, "q4rVY3sLQqA": {"duration": 145.38, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=q4rVY3sLQqA", "annotations": [{"segment": [6.123656276863319, 9.072083373130845], "label": "Rock climbing"}, {"segment": [21.09259384252921, 30.391479299988326], "label": "Rock climbing"}]}, "4d3RtnuVCV4": {"duration": 126.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4d3RtnuVCV4", "annotations": [{"segment": [58.36286349453978, 88.97620982839314], "label": "Playing field hockey"}]}, "CXP20cVQN30": {"duration": 175.54, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CXP20cVQN30", "annotations": [{"segment": [4.655582794740361, 145.69235557566302], "label": "Playing water polo"}]}, "_yFOkxb22RI": {"duration": 60.42, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_yFOkxb22RI", "annotations": [{"segment": [24.114720748829953, 55.01170670826833], "label": "Polishing forniture"}]}, "E7C91KoML-o": {"duration": 20.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=E7C91KoML-o", "annotations": [{"segment": [2.68786895475819, 16.002196567862715], "label": "Clean and jerk"}]}, "hL11sP4Hlrg": {"duration": 117.08, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hL11sP4Hlrg", "annotations": [{"segment": [22.465164411856477, 97.16640217160686], "label": "Doing nails"}]}, "VbzzcpsAPo8": {"duration": 73.03, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VbzzcpsAPo8", "annotations": [{"segment": [1.5949681796795683, 9.911587973723032], "label": "Ping-pong"}, {"segment": [14.924345109858816, 34.06396318689548], "label": "Ping-pong"}, {"segment": [39.532425504524184, 61.975906266458665], "label": "Ping-pong"}]}, "QfUTGmlNO1s": {"duration": 20.02, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QfUTGmlNO1s", "annotations": [{"segment": [0, 2.0557815912636506], "label": "Washing dishes"}]}, "USoYnMSrWv0": {"duration": 63.368, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=USoYnMSrWv0", "annotations": [{"segment": [0.2, 60.652854914196574], "label": "Table soccer"}]}, "jQzBXawu29s": {"duration": 206.24, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jQzBXawu29s", "annotations": []}, "vrXqd_Ct298": {"duration": 225.977, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vrXqd_Ct298", "annotations": [{"segment": [8.235233247287548, 225.941], "label": "BMX"}]}, "8jyqeivzs2M": {"duration": 182.95, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8jyqeivzs2M", "annotations": [{"segment": [0, 150.6983020530421], "label": "Preparing pasta"}]}, "U7_kDhChqQM": {"duration": 100.148, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=U7_kDhChqQM", "annotations": []}, "jYU215e-dKg": {"duration": 39.66, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jYU215e-dKg", "annotations": [{"segment": [12.869288062402497, 33.34878012324493], "label": "Drinking beer"}]}, "Kbr2nr94EB0": {"duration": 88.35, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Kbr2nr94EB0", "annotations": [{"segment": [32.82946458, 85.63416770670827], "label": "Layup drill in basketball"}]}, "TGIQtaINzCY": {"duration": 221.24, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TGIQtaINzCY", "annotations": []}, "0gkxTQGR6zI": {"duration": 38.17, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0gkxTQGR6zI", "annotations": [{"segment": [0.010000000000000002, 38.173604], "label": "Ping-pong"}]}, "rthLJwFnOc0": {"duration": 125.09, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rthLJwFnOc0", "annotations": []}, "pSp7zYRYjHE": {"duration": 32.11, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pSp7zYRYjHE", "annotations": [{"segment": [4.91361013, 28.54139214], "label": "Brushing teeth"}]}, "_cZD6JN-SYg": {"duration": 130.217, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_cZD6JN-SYg", "annotations": [{"segment": [47.333351687987516, 130.217504], "label": "Futsal"}]}, "fm9mf-VuWxI": {"duration": 108.22999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fm9mf-VuWxI", "annotations": [{"segment": [14.155140119999999, 105.52619247269891], "label": "Playing field hockey"}]}, "thhFfqcOfJQ": {"duration": 139.9, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=thhFfqcOfJQ", "annotations": [{"segment": [8.948376438458897, 128.1145586193448], "label": "Zumba"}]}, "4Xvn1xXvYdU": {"duration": 213.21, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4Xvn1xXvYdU", "annotations": [{"segment": [93.46455573597093, 164.64396706708268], "label": "Archery"}]}, "jM3Buw2Kidk": {"duration": 124.18, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jM3Buw2Kidk", "annotations": [{"segment": [6.586787509595622, 22.666298194784932], "label": "Windsurfing"}, {"segment": [81.94738578114553, 111.20047017784712], "label": "Windsurfing"}]}, "1d_U_2HIPmY": {"duration": 36.69, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1d_U_2HIPmY", "annotations": []}, "9qFwSM8QLIE": {"duration": 190.009, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9qFwSM8QLIE", "annotations": [{"segment": [53.190664231251084, 74.76243544258683], "label": "Bullfighting"}, {"segment": [85.40056919064283, 131.79465248077588], "label": "Bullfighting"}]}, "FHUR9Wane1k": {"duration": 136.84, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FHUR9Wane1k", "annotations": []}, "kpnGg2lOBLM": {"duration": 182.695, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kpnGg2lOBLM", "annotations": [{"segment": [8.265434464997648, 179.27442224336974], "label": "Rope skipping"}]}, "45ymM0irIIY": {"duration": 42.702, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=45ymM0irIIY", "annotations": [{"segment": [10.239678627145086, 25.200248049921996], "label": "Bullfighting"}]}, "oT6WUU8-dgs": {"duration": 17.14, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=oT6WUU8-dgs", "annotations": []}, "mtF0AFNSFnI": {"duration": 149.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mtF0AFNSFnI", "annotations": [{"segment": [3.7314213291731675, 136.1968785148206], "label": "Playing accordion"}]}, "p1yGu2Bbwxg": {"duration": 198.786, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=p1yGu2Bbwxg", "annotations": []}, "niZ7PxgQM2M": {"duration": 190.8, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=niZ7PxgQM2M", "annotations": []}, "q1BuFA6HLts": {"duration": 104.096, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=q1BuFA6HLts", "annotations": []}, "VqUUJoIOQKE": {"duration": 148.52, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VqUUJoIOQKE", "annotations": []}, "vKCxWIzJTm0": {"duration": 219.48, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vKCxWIzJTm0", "annotations": [{"segment": [16.777294158433, 194.4796547344887], "label": "Breakdancing"}]}, "XuG2V9gDD9M": {"duration": 97.99000000000001, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XuG2V9gDD9M", "annotations": [{"segment": [0.6521449473717325, 96.65619087103714], "label": "Windsurfing"}]}, "yvTmIulkl7c": {"duration": 26.657, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yvTmIulkl7c", "annotations": [{"segment": [0.2, 26.122634945397813], "label": "Hitting a pinata"}]}, "Sd850yNArWY": {"duration": 178.84, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Sd850yNArWY", "annotations": []}, "ynvCxrj1UNg": {"duration": 99.57, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ynvCxrj1UNg", "annotations": [{"segment": [1.3640679, 96.8029953198128], "label": "Using the balance beam"}]}, "l7gWFOa7FnI": {"duration": 168.113, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l7gWFOa7FnI", "annotations": [{"segment": [21.243475755356744, 32.258611332208396], "label": "Laying tile"}, {"segment": [39.864300182891675, 137.16466353666146], "label": "Laying tile"}]}, "S7a1aBxTEkk": {"duration": 220.149, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S7a1aBxTEkk", "annotations": []}, "KjXvBqmh5C4": {"duration": 217.39, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KjXvBqmh5C4", "annotations": [{"segment": [3.7304795157792707, 23.40028059897906], "label": "Pole vault"}, {"segment": [81.73141484570947, 96.99246677691107], "label": "Pole vault"}]}, "cGNkR-JvaJs": {"duration": 161.588, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cGNkR-JvaJs", "annotations": [{"segment": [21.49468346767251, 24.51501364335649], "label": "Playing pool"}, {"segment": [61.262364114178254, 143.8180543376374], "label": "Playing pool"}]}, "tA4KrZHGoYQ": {"duration": 181.023, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tA4KrZHGoYQ", "annotations": [{"segment": [26.263833508442378, 137.8145242163428], "label": "River tubing"}]}, "aSYCOvkN5uk": {"duration": 108.949, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aSYCOvkN5uk", "annotations": [{"segment": [31.93976287051482, 87.17078471138846], "label": "Playing ten pins"}, {"segment": [94.81734477379095, 97.87596879875196], "label": "Playing ten pins"}]}, "xSpWBpHMxqY": {"duration": 87.15, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=xSpWBpHMxqY", "annotations": [{"segment": [7.51617, 52.58181816], "label": "Putting on makeup"}]}, "BwR1DPCVsP8": {"duration": 189.43, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BwR1DPCVsP8", "annotations": [{"segment": [9.899923659531414, 160.61517850468022], "label": "Layup drill in basketball"}]}, "06ofnvq2Hjs": {"duration": 203.5, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=06ofnvq2Hjs", "annotations": [{"segment": [19.8818523, 192.8188566224649], "label": "Skateboarding"}]}, "ksNvNH4fpdo": {"duration": 18.367, "subset": "validation", "resolution": "640x350", "url": "https://www.youtube.com/watch?v=ksNvNH4fpdo", "annotations": [{"segment": [0.01, 18.301], "label": "Using the rowing machine"}]}, "KjbZvsu0OxE": {"duration": 161.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KjbZvsu0OxE", "annotations": [{"segment": [0, 6.598506121519571], "label": "Walking the dog"}, {"segment": [34.47662294394948, 69.13590331778126], "label": "Walking the dog"}, {"segment": [102.28825845796818, 153.7748706075009], "label": "Walking the dog"}]}, "XP8pufIBsnM": {"duration": 46.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XP8pufIBsnM", "annotations": [{"segment": [18.282471438377534, 37.932529322932915], "label": "Clean and jerk"}]}, "x4f4jp_eHHo": {"duration": 229.46, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=x4f4jp_eHHo", "annotations": [{"segment": [20.762334196567863, 225.52190592823715], "label": "Belly dance"}]}, "id4XtnLsw7c": {"duration": 106.84, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=id4XtnLsw7c", "annotations": [{"segment": [13.939354999999999, 22.17634945397816], "label": "Snatch"}]}, "xUUmAdQJgjg": {"duration": 5.83, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=xUUmAdQJgjg", "annotations": [{"segment": [0.01, 5.399398127925116], "label": "Triple jump"}]}, "jed5hUKCCk0": {"duration": 154.042, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jed5hUKCCk0", "annotations": [{"segment": [17.30259734471012, 139.14171928549143], "label": "Decorating the Christmas tree"}]}, "4tIHbL_1UMs": {"duration": 102.935, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=4tIHbL_1UMs", "annotations": []}, "bXCOtMuaJf4": {"duration": 147.63, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=bXCOtMuaJf4", "annotations": [{"segment": [0.2, 7.600385133772698], "label": "Washing dishes"}, {"segment": [13.58856736038149, 22.11021129824785], "label": "Washing dishes"}, {"segment": [69.32472500804795, 73.240074925446], "label": "Washing dishes"}, {"segment": [79.68888655410163, 98.57469203802167], "label": "Washing dishes"}]}, "UyORfPxpaGo": {"duration": 12.052, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UyORfPxpaGo", "annotations": [{"segment": [0.01, 12.051155], "label": "Doing a powerbomb"}]}, "vcRCId1-xdA": {"duration": 145.71, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vcRCId1-xdA", "annotations": [{"segment": [10.912249609984398, 41.60295163806552], "label": "Drinking beer"}]}, "GJTW7ZGUgU0": {"duration": 79.85, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GJTW7ZGUgU0", "annotations": []}, "UyqM2sglj1s": {"duration": 237.866, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UyqM2sglj1s", "annotations": [{"segment": [62.82896957878315, 118.42982761310452], "label": "Snowboarding"}, {"segment": [139.55815366614664, 165.50522074882994], "label": "Snowboarding"}]}, "lRB6XvAm_FU": {"duration": 91.069, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lRB6XvAm_FU", "annotations": [{"segment": [9.945095708590248, 49.299259542901716], "label": "Decorating the Christmas tree"}]}, "ph7d2H77tks": {"duration": 172.664, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ph7d2H77tks", "annotations": [{"segment": [0.01, 163.23577403504302], "label": "Assembling bicycle"}]}, "Ey-0Q6VNJaY": {"duration": 213.67000000000002, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ey-0Q6VNJaY", "annotations": [{"segment": [15.333574049921996, 176.33610157410297], "label": "Skateboarding"}]}, "DEt_Xgg8dzc": {"duration": 153.35, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DEt_Xgg8dzc", "annotations": [{"segment": [24.1619064245563, 26.793401183666393], "label": "Long jump"}, {"segment": [140.42612903588144, 145.21066494851794], "label": "Long jump"}]}, "akJbB6LWP34": {"duration": 55.91, "subset": "validation", "resolution": "576x360", "url": "https://www.youtube.com/watch?v=akJbB6LWP34", "annotations": [{"segment": [6.79528857098284, 48.002615418096724], "label": "Layup drill in basketball"}]}, "ibKFezOKsBQ": {"duration": 82.408, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ibKFezOKsBQ", "annotations": [{"segment": [2.178737583777679, 63.56792732590908], "label": "Fun sliding down"}]}, "CYSyc4jy9-Q": {"duration": 100.682, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CYSyc4jy9-Q", "annotations": [{"segment": [21.51855865799258, 100.68172335600907], "label": "Powerbocking"}]}, "znoMrA2qCbA": {"duration": 20.597, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=znoMrA2qCbA", "annotations": [{"segment": [0.2, 20.341], "label": "Kneeling"}]}, "04qggWACGWk": {"duration": 86.53999999999999, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=04qggWACGWk", "annotations": []}, "9mViirdNpDc": {"duration": 111.25, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9mViirdNpDc", "annotations": []}, "l3EBfLkfAX8": {"duration": 88.42, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=l3EBfLkfAX8", "annotations": [{"segment": [8.408198127925116, 16.816396255850233], "label": "High jump"}, {"segment": [20.81373634945398, 32.39223868954758], "label": "High jump"}, {"segment": [35.011185647425904, 55.68708268330733], "label": "High jump"}, {"segment": [58.85738689547582, 87.52796411856474], "label": "High jump"}]}, "legHHtPK5fA": {"duration": 36.734, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=legHHtPK5fA", "annotations": [{"segment": [0.01, 14.89989341653666], "label": "Throwing darts"}, {"segment": [21.89138186583463, 32.60784366926677], "label": "Throwing darts"}]}, "3oJ62-Ce9ys": {"duration": 60.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3oJ62-Ce9ys", "annotations": [{"segment": [10.059168486739472, 45.87732917316693], "label": "Snatch"}]}, "OUIS4bnEhU0": {"duration": 35.991, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OUIS4bnEhU0", "annotations": [{"segment": [0.2, 32.25015472367445], "label": "Doing crunches"}]}, "HGG_st1NOYM": {"duration": 96.92, "subset": "training", "resolution": "582x358", "url": "https://www.youtube.com/watch?v=HGG_st1NOYM", "annotations": [{"segment": [1.9656180296411858, 9.979291535101405], "label": "Playing beach volleyball"}, {"segment": [29.786673218408737, 47.17483271138846], "label": "Playing beach volleyball"}, {"segment": [65.47020052574103, 67.88942271606865], "label": "Playing beach volleyball"}, {"segment": [69.4014365850234, 77.41511009048362], "label": "Playing beach volleyball"}, {"segment": [82.40475585803432, 91.9304432324493], "label": "Playing beach volleyball"}]}, "cfKMu6aeMwU": {"duration": 124.32, "subset": "training", "resolution": "1916x1080", "url": "https://www.youtube.com/watch?v=cfKMu6aeMwU", "annotations": [{"segment": [14.73989083100739, 114.23415394030727], "label": "Elliptical trainer"}]}, "vXvdkqpo7F0": {"duration": 122.09, "subset": "testing", "resolution": "480x320", "url": "https://www.youtube.com/watch?v=vXvdkqpo7F0", "annotations": []}, "wtoKUYBw9f4": {"duration": 61.74, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wtoKUYBw9f4", "annotations": [{"segment": [30.919090226677067, 52.591349731357255], "label": "High jump"}]}, "37siIMJNG9Y": {"duration": 98.174, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=37siIMJNG9Y", "annotations": [{"segment": [49.010416688776395, 62.0288074024961], "label": "Gargling mouthwash"}]}, "w9CC0wf27zs": {"duration": 78.902, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w9CC0wf27zs", "annotations": [{"segment": [0, 43.69686858814353], "label": "Knitting"}, {"segment": [55.39039680187207, 78.900543], "label": "Knitting"}]}, "YlK_P4Ys6hE": {"duration": 116.1, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YlK_P4Ys6hE", "annotations": [{"segment": [17.975572644036845, 66.26005777765417], "label": "Snowboarding"}, {"segment": [78.01470771654976, 110.56604482547382], "label": "Snowboarding"}]}, "OixwThoO8z4": {"duration": 170.365, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OixwThoO8z4", "annotations": [{"segment": [13.023206698907954, 170.36480725623582], "label": "Rollerblading"}]}, "wts5XRikF1Y": {"duration": 155.412, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wts5XRikF1Y", "annotations": [{"segment": [9.940471980783993, 104.73863048985959], "label": "Surfing"}, {"segment": [111.28479489547581, 129.71103544461778], "label": "Surfing"}, {"segment": [137.22700198439938, 142.0760126552262], "label": "Surfing"}]}, "uZCov5TG-Y8": {"duration": 70.64, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uZCov5TG-Y8", "annotations": [{"segment": [19.83513594383775, 32.28719350858034], "label": "Ironing clothes"}, {"segment": [41.98437108112324, 48.37569266302652], "label": "Ironing clothes"}]}, "p3LvCbxC_ZE": {"duration": 113.198, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p3LvCbxC_ZE", "annotations": [{"segment": [46.652064050250736, 66.54520948883052], "label": "Trimming branches or hedges"}, {"segment": [82.03721517067905, 88.02276325466039], "label": "Trimming branches or hedges"}]}, "dKJMP8EmZjw": {"duration": 152.46, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dKJMP8EmZjw", "annotations": []}, "d7gTlJhEwrM": {"duration": 120.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d7gTlJhEwrM", "annotations": [{"segment": [65.96118939157567, 68.0224765600624], "label": "Long jump"}, {"segment": [83.2010457098284, 87.51100978939158], "label": "Long jump"}]}, "LF5wwjmKr40": {"duration": 100.4, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LF5wwjmKr40", "annotations": []}, "lvd2InghJOo": {"duration": 131.61, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lvd2InghJOo", "annotations": [{"segment": [0.6157301092043682, 130.53478315132605], "label": "Playing racquetball"}]}, "1Om21fCH1sI": {"duration": 30.349, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=1Om21fCH1sI", "annotations": [{"segment": [0.01, 3.818878315132605], "label": "BMX"}, {"segment": [12.022394695787831, 20.697377535101403], "label": "BMX"}, {"segment": [22.583243369734788, 30.021], "label": "BMX"}]}, "unzG2wuyBsU": {"duration": 190.52, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=unzG2wuyBsU", "annotations": []}, "hOdSTik8_nw": {"duration": 166.023, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hOdSTik8_nw", "annotations": [{"segment": [54.650209711388456, 70.70856517160686], "label": "Hurling"}, {"segment": [104.37931049141966, 119.66064875195006], "label": "Hurling"}, {"segment": [145.82022780811232, 165.822675], "label": "Hurling"}]}, "zzE2VrQMvbc": {"duration": 121.44, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zzE2VrQMvbc", "annotations": [{"segment": [6.062545322932917, 114.4305429703588], "label": "Paintball"}]}, "LE05u6TR9MI": {"duration": 42.632, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LE05u6TR9MI", "annotations": [{"segment": [9.211948517940717, 14.056492979719188], "label": "Playing beach volleyball"}, {"segment": [18.233658346333854, 20.156480499219967], "label": "Playing beach volleyball"}, {"segment": [31.030371294851793, 32.95319344773791], "label": "Playing beach volleyball"}]}, "GQzwzOM9db8": {"duration": 75.98, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GQzwzOM9db8", "annotations": [{"segment": [9.126521340093603, 73.13069697191888], "label": "Using the pommel horse"}]}, "uG_G4g6ixms": {"duration": 30.326, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=uG_G4g6ixms", "annotations": [{"segment": [2.6493207537825323, 6.434064312012481], "label": "Calf roping"}, {"segment": [20.34299745709828, 30.125259], "label": "Calf roping"}]}, "vULLe2qogZU": {"duration": 8.336, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vULLe2qogZU", "annotations": [{"segment": [0.2, 8.101], "label": "Hurling"}]}, "ryFSf08mrkA": {"duration": 83.87, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ryFSf08mrkA", "annotations": []}, "cY541XSdz50": {"duration": 73.33, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cY541XSdz50", "annotations": [{"segment": [38.43747715622911, 62.57529763231346], "label": "Bungee jumping"}]}, "YNVK-pb7fWM": {"duration": 53.755, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YNVK-pb7fWM", "annotations": [{"segment": [0.01, 53.753832], "label": "Braiding hair"}]}, "43R60vMRook": {"duration": 112.5, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=43R60vMRook", "annotations": [{"segment": [57.86460504929968, 75.03444928605981], "label": "Hula hoop"}]}, "OuVncktxGw0": {"duration": 28.631, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=OuVncktxGw0", "annotations": [{"segment": [0.4007331106341705, 19.99216284624848], "label": "Arm wrestling"}]}, "3MS3CAyl_YA": {"duration": 66.29, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3MS3CAyl_YA", "annotations": [{"segment": [0.01, 39.71372869890796], "label": "Smoking hookah"}, {"segment": [46.6429469875195, 65.46559965210609], "label": "Smoking hookah"}]}, "6gzU9P-5tqE": {"duration": 90.836, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6gzU9P-5tqE", "annotations": [{"segment": [0.2, 6.802106283931358], "label": "Playing ten pins"}, {"segment": [16.580134067082682, 22.10684542277691], "label": "Playing ten pins"}, {"segment": [30.751188825273008, 33.160268134165364], "label": "Playing ten pins"}, {"segment": [48.60671782059282, 50.16553384399376], "label": "Playing ten pins"}, {"segment": [77.23224843213728, 79.21619609828393], "label": "Playing ten pins"}]}, "z85nM9V4058": {"duration": 196.63, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=z85nM9V4058", "annotations": [{"segment": [11.63829294, 95.2554657], "label": "Hopscotch"}, {"segment": [118.3491894, 194.58659906396258], "label": "Hopscotch"}]}, "qiupddjDHDg": {"duration": 141.108, "subset": "testing", "resolution": "606x480", "url": "https://www.youtube.com/watch?v=qiupddjDHDg", "annotations": []}, "6B7fKzdJAbo": {"duration": 28.42, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6B7fKzdJAbo", "annotations": [{"segment": [8.025337539781592, 13.079970023400936], "label": "Volleyball"}]}, "XuFiQCgRsRo": {"duration": 67.687, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XuFiQCgRsRo", "annotations": [{"segment": [14.456812792511702, 67.661], "label": "Using the rowing machine"}]}, "txMaVvdALeM": {"duration": 177.981, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=txMaVvdALeM", "annotations": [{"segment": [5.274382215288612, 7.772773790951638], "label": "Rafting"}, {"segment": [10.548764430577224, 11.10396255850234], "label": "Rafting"}, {"segment": [12.769556942277692, 16.93354290171607], "label": "Rafting"}, {"segment": [28.03750546021841, 109.38632449297972], "label": "Rafting"}, {"segment": [116.32708112324494, 168.5215709828393], "label": "Rafting"}]}, "SLPD1U5iDjY": {"duration": 129.405, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=SLPD1U5iDjY", "annotations": [{"segment": [0.2, 123.71028549141964], "label": "Playing beach volleyball"}]}, "N84ci0Imc30": {"duration": 67.2, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=N84ci0Imc30", "annotations": [{"segment": [19.70877821528861, 67.198547], "label": "Drinking coffee"}]}, "bc-DycGxV9E": {"duration": 64.691, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=bc-DycGxV9E", "annotations": [{"segment": [39.740066104137355, 60.86769544774005], "label": "Fun sliding down"}]}, "qwxmpiaT-kk": {"duration": 41.053, "subset": "validation", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=qwxmpiaT-kk", "annotations": [{"segment": [0.01, 41.001], "label": "Arm wrestling"}]}, "0X0u_3xUBPE": {"duration": 224.77, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0X0u_3xUBPE", "annotations": []}, "k2gc3YcVjwM": {"duration": 54.03, "subset": "training", "resolution": "274x360", "url": "https://www.youtube.com/watch?v=k2gc3YcVjwM", "annotations": [{"segment": [18.618129485179406, 50.12573322932917], "label": "Washing dishes"}]}, "rs4UF-p9V_Y": {"duration": 168.531, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rs4UF-p9V_Y", "annotations": []}, "CsaeiOFuij0": {"duration": 51.9, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=CsaeiOFuij0", "annotations": [{"segment": [0, 51.896597], "label": "Playing squash"}]}, "dJ0kxnyVzFI": {"duration": 164.189, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=dJ0kxnyVzFI", "annotations": [{"segment": [0, 14.109263600850431], "label": "Running a marathon"}, {"segment": [56.88531029819832, 75.83996572097523], "label": "Running a marathon"}, {"segment": [81.21898955716868, 86.59801339336214], "label": "Running a marathon"}, {"segment": [108.11410873813593, 136.2899478801016], "label": "Running a marathon"}]}, "AuVVP8q6tFY": {"duration": 185.273, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AuVVP8q6tFY", "annotations": [{"segment": [0.01, 183.36735184501973], "label": "Rollerblading"}]}, "nMiXX2jqI40": {"duration": 64.18, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=nMiXX2jqI40", "annotations": [{"segment": [0, 38.5246255850234], "label": "Scuba diving"}, {"segment": [40.038065522620904, 62.45938221528861], "label": "Scuba diving"}]}, "AoRk69eieek": {"duration": 111.13, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=AoRk69eieek", "annotations": [{"segment": [18.72404946645866, 94.1403598174727], "label": "Pole vault"}]}, "GwLhmYF7Wdw": {"duration": 191.286, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=GwLhmYF7Wdw", "annotations": [{"segment": [17.905076039776283, 36.10857001354884], "label": "Elliptical trainer"}, {"segment": [95.79215681280311, 107.13203830466144], "label": "Elliptical trainer"}, {"segment": [148.01529526215063, 176.3649989917964], "label": "Elliptical trainer"}]}, "AAQp3iEJxJc": {"duration": 237.216, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=AAQp3iEJxJc", "annotations": [{"segment": [21.834147183574416, 151.35874912003283], "label": "Canoeing"}, {"segment": [163.94113902243163, 216.49111883775353], "label": "Canoeing"}]}, "Fg_VZJAtByk": {"duration": 198.21, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Fg_VZJAtByk", "annotations": [{"segment": [0, 195.11340103120125], "label": "Playing guitarra"}]}, "rmaHl5HTcwE": {"duration": 73.422, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rmaHl5HTcwE", "annotations": [{"segment": [5.496486739469579, 73.381], "label": "Throwing darts"}]}, "1YGz5CQBflM": {"duration": 82.338, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=1YGz5CQBflM", "annotations": [{"segment": [16.17380031201248, 34.28138377535102], "label": "Arm wrestling"}, {"segment": [61.37266458658346, 73.69855538221529], "label": "Arm wrestling"}]}, "T6KP0pg7qxM": {"duration": 112.989, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=T6KP0pg7qxM", "annotations": [{"segment": [35.78256593120868, 39.48421068271302], "label": "Throwing darts"}, {"segment": [44.06719942267078, 57.11108978471139], "label": "Throwing darts"}, {"segment": [81.43618358190328, 94.832612049922], "label": "Throwing darts"}]}, "XKA2W2735WM": {"duration": 131.727, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XKA2W2735WM", "annotations": [{"segment": [8.899095951508139, 124.7513778829723], "label": "Drum corps"}]}, "Vnj0j648Emw": {"duration": 29.258, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vnj0j648Emw", "annotations": [{"segment": [3.692513260530421, 27.260776911076444], "label": "Sharpening knives"}]}, "OMq736aZeV8": {"duration": 181.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OMq736aZeV8", "annotations": [{"segment": [2.263226209048362, 107.5032449297972], "label": "Kayaking"}, {"segment": [144.01364898595943, 166.36547269890795], "label": "Kayaking"}]}, "_N9LhaKnZv4": {"duration": 96.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_N9LhaKnZv4", "annotations": [{"segment": [54.81316098358221, 76.34690279856092], "label": "Drinking beer"}]}, "GtYf9VsPHO0": {"duration": 118.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GtYf9VsPHO0", "annotations": [{"segment": [53.2655541051574, 99.89595268164467], "label": "Making a sandwich"}]}, "vX0fg06e9co": {"duration": 195.443, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vX0fg06e9co", "annotations": [{"segment": [44.52934135929098, 188.82765271624245], "label": "Hula hoop"}]}, "iEHm8zPBDkI": {"duration": 31.72, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=iEHm8zPBDkI", "annotations": []}, "Y2UkP0rySHA": {"duration": 105.466, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y2UkP0rySHA", "annotations": [{"segment": [0.2, 105.26469387755101], "label": "Skiing"}]}, "lwqSz2Q2L-E": {"duration": 50.99, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lwqSz2Q2L-E", "annotations": [{"segment": [6.574130265210608, 45.859539001560066], "label": "Smoking a cigarette"}]}, "McfQMw15TgQ": {"duration": 78.112, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=McfQMw15TgQ", "annotations": []}, "8Ny9NjNpQQA": {"duration": 17.81, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8Ny9NjNpQQA", "annotations": [{"segment": [1.9950701279251173, 7.215850619999999], "label": "Playing water polo"}, {"segment": [5.032823400000001, 13.06717566], "label": "Playing polo"}, {"segment": [8.36791483, 15.0681949], "label": "Playing water polo"}]}, "O7U80T6HruQ": {"duration": 70.71000000000001, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=O7U80T6HruQ", "annotations": []}, "dd1LE0m_KVg": {"duration": 84.126, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dd1LE0m_KVg", "annotations": [{"segment": [5.887502367749848, 80.72422288388915], "label": "Drum corps"}]}, "bDwGZOk7njI": {"duration": 59.58, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=bDwGZOk7njI", "annotations": [{"segment": [2.2308544260137753, 59.117642289365044], "label": "Mowing the lawn"}]}, "rWVpotNVeyM": {"duration": 209.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rWVpotNVeyM", "annotations": [{"segment": [32.3782511700468, 203.0999391575663], "label": "Playing flauta"}]}, "V3LvKGRzkeg": {"duration": 115.00999999999999, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V3LvKGRzkeg", "annotations": [{"segment": [8.964196567862714, 110.99606708268331], "label": "Playing guitarra"}]}, "PY4304sV5fY": {"duration": 173.52, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PY4304sV5fY", "annotations": [{"segment": [73.49666629329172, 149.56503914196566], "label": "Playing piano"}]}, "g0Cp28ElIMk": {"duration": 98.894, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=g0Cp28ElIMk", "annotations": [{"segment": [2.4684876555382216, 90.71692134102965], "label": "Slacklining"}]}, "2bQx5igckJc": {"duration": 167.369, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2bQx5igckJc", "annotations": []}, "A80eMz7rJUM": {"duration": 156.41, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A80eMz7rJUM", "annotations": [{"segment": [21.47277058346334, 62.4662416973479], "label": "Trimming branches or hedges"}]}, "Yz7FjWlA6U4": {"duration": 233.058, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Yz7FjWlA6U4", "annotations": [{"segment": [76.74696234945486, 125.10391255127247], "label": "Rafting"}, {"segment": [141.82887277144997, 231.99822352371135], "label": "Rafting"}]}, "QDjaaUtepHo": {"duration": 160.265, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QDjaaUtepHo", "annotations": [{"segment": [6.249648985959439, 144.99185647425898], "label": "Blow-drying hair"}]}, "wwh94C7NB1I": {"duration": 192.865, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wwh94C7NB1I", "annotations": [{"segment": [20.159050236839406, 50.247184918689264], "label": "Longboarding"}, {"segment": [55.963930508240736, 150.74155373166926], "label": "Longboarding"}]}, "PofMK_beoR8": {"duration": 161.519, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=PofMK_beoR8", "annotations": [{"segment": [24.93703432137286, 151.90802340093603], "label": "Powerbocking"}]}, "ssHXm1LqovI": {"duration": 11.59, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=ssHXm1LqovI", "annotations": [{"segment": [2.440268424336973, 10.84563744149766], "label": "Dodgeball"}]}, "LSFmrUdURCs": {"duration": 162.749, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=LSFmrUdURCs", "annotations": [{"segment": [30.9626864274571, 153.2906770670827], "label": "Waxing skis"}]}, "7AsHuXeoSpA": {"duration": 14.98, "subset": "validation", "resolution": "636x480", "url": "https://www.youtube.com/watch?v=7AsHuXeoSpA", "annotations": [{"segment": [3.434632984399376, 4.976713099843994], "label": "Washing face"}]}, "u-X4YO91V78": {"duration": 168.832, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=u-X4YO91V78", "annotations": [{"segment": [10.005737909516382, 159.30187987519503], "label": "River tubing"}]}, "kq7KVmvkYPc": {"duration": 149.89, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kq7KVmvkYPc", "annotations": [{"segment": [9.155588143525742, 141.95807020280813], "label": "Shaving"}]}, "gvFJe2W3ZNU": {"duration": 130.682, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gvFJe2W3ZNU", "annotations": []}, "rQZIJBinOsw": {"duration": 129.011, "subset": "validation", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=rQZIJBinOsw", "annotations": [{"segment": [67.99073010920436, 83.0774305772231], "label": "Sharpening knives"}, {"segment": [95.76511076443059, 108.84227925117005], "label": "Sharpening knives"}]}, "EW3zRMVjkoU": {"duration": 35.759, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EW3zRMVjkoU", "annotations": [{"segment": [0.01, 35.75873015873016], "label": "Beach soccer"}]}, "9abGikdleAU": {"duration": 135.0, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9abGikdleAU", "annotations": [{"segment": [11.583533268330731, 124.68093990639625], "label": "Cumbia"}]}, "l1oh52tqdPo": {"duration": 34.598, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=l1oh52tqdPo", "annotations": [{"segment": [13.135333333333334, 27.455], "label": "Decorating the Christmas tree"}]}, "CPk3EvOGcZg": {"duration": 88.28, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CPk3EvOGcZg", "annotations": []}, "m-BZKFg9k18": {"duration": 40.33, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m-BZKFg9k18", "annotations": [{"segment": [0.2, 39.656583463338535], "label": "Doing step aerobics"}]}, "8TqqLqBb3sk": {"duration": 43.84, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8TqqLqBb3sk", "annotations": [{"segment": [15.787463343979786, 31.138281100989133], "label": "Kneeling"}]}, "2Py3HAe0L-8": {"duration": 107.93, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2Py3HAe0L-8", "annotations": []}, "fE3j74_s4KY": {"duration": 226.256, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=fE3j74_s4KY", "annotations": [{"segment": [18.001586806329396, 166.2499487408068], "label": "Futsal"}]}, "-VKGwqL83w8": {"duration": 117.957, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-VKGwqL83w8", "annotations": [{"segment": [16.91608736349454, 93.60597347893916], "label": "Scuba diving"}]}, "WUSEdPfHPoY": {"duration": 211.74, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WUSEdPfHPoY", "annotations": [{"segment": [0.01, 179.28402964118564], "label": "Playing squash"}]}, "JE0xYYOp5_s": {"duration": 82.106, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JE0xYYOp5_s", "annotations": [{"segment": [33.17481892598667, 57.6019213839306], "label": "Sharpening knives"}]}, "9ut_IDtfVzY": {"duration": 150.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9ut_IDtfVzY", "annotations": [{"segment": [0.010000000000000002, 109.81901086162247], "label": "Fixing bicycle"}]}, "TFdv2VQ8-Hg": {"duration": 154.669, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TFdv2VQ8-Hg", "annotations": [{"segment": [45.031755451713394, 132.70431619937696], "label": "Surfing"}]}, "o8ja3mhecQI": {"duration": 70.28999999999999, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=o8ja3mhecQI", "annotations": [{"segment": [27.88667336, 44.40897723088923], "label": "Using uneven bars"}]}, "j4vMVBSfyu0": {"duration": 81.549, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j4vMVBSfyu0", "annotations": [{"segment": [27.606895971076934, 38.16621562821696], "label": "Throwing darts"}, {"segment": [62.71981379095164, 74.93300268330734], "label": "Throwing darts"}]}, "zVOj9aaq4L0": {"duration": 169.553, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zVOj9aaq4L0", "annotations": [{"segment": [17.193273712948518, 20.896440358814353], "label": "Blow-drying hair"}, {"segment": [30.418868876755074, 39.1477616848674], "label": "Blow-drying hair"}, {"segment": [46.55409497659907, 56.60554730109205], "label": "Blow-drying hair"}, {"segment": [60.83773775351015, 71.68272578783152], "label": "Blow-drying hair"}, {"segment": [75.12138053042122, 155.53299912636504], "label": "Blow-drying hair"}]}, "8GqAgX-kseE": {"duration": 21.386, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8GqAgX-kseE", "annotations": [{"segment": [0.01, 21.385578231292516], "label": "Cutting the grass"}]}, "U7k6GFEOt7g": {"duration": 202.734, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U7k6GFEOt7g", "annotations": [{"segment": [0.01, 197.62187987519502], "label": "Hurling"}]}, "FqiMsRnatP0": {"duration": 45.79, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FqiMsRnatP0", "annotations": [{"segment": [13.285640902060049, 29.904038903617682], "label": "Hitting a pinata"}]}, "j5SX_9JKtfs": {"duration": 196.465, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j5SX_9JKtfs", "annotations": [{"segment": [68.04214628705148, 196.46403628117915], "label": "Cleaning sink"}]}, "GEhtyurCwDQ": {"duration": 197.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GEhtyurCwDQ", "annotations": [{"segment": [12.319254566101012, 14.475124115168688], "label": "Shaving"}, {"segment": [93.31835333821516, 100.09394334957071], "label": "Shaving"}, {"segment": [119.4967692911798, 153.37471934795758], "label": "Shaving"}]}, "rCwolEeHY80": {"duration": 110.434, "subset": "training", "resolution": "448x252", "url": "https://www.youtube.com/watch?v=rCwolEeHY80", "annotations": [{"segment": [17.442389013614164, 103.10619175589488], "label": "Trimming branches or hedges"}]}, "WUTZxlqZZ38": {"duration": 202.29, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WUTZxlqZZ38", "annotations": []}, "2zwUExKXw9s": {"duration": 42.86, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=2zwUExKXw9s", "annotations": [{"segment": [3.8095741029641186, 39.76660686427457], "label": "Horseback riding"}]}, "KaRFuHnwrDI": {"duration": 92.114, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KaRFuHnwrDI", "annotations": []}, "pPGniqIBs-U": {"duration": 66.92, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pPGniqIBs-U", "annotations": [{"segment": [16.584865834633387, 61.74994071762871], "label": "Camel ride"}]}, "ew7XlNRrKyM": {"duration": 16.37, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ew7XlNRrKyM", "annotations": [{"segment": [0.17876829219968798, 15.654994852855339], "label": "Playing field hockey"}]}, "bDK-_jU_KzI": {"duration": 126.805, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bDK-_jU_KzI", "annotations": [{"segment": [39.298160436137074, 109.40291900311527], "label": "Arm wrestling"}]}, "f3Mse2A5edA": {"duration": 116.983, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f3Mse2A5edA", "annotations": [{"segment": [0.2, 51.099839041180694], "label": "Playing rubik cube"}, {"segment": [0.2, 116.78213151927437], "label": "Playing rubik cube"}, {"segment": [77.19725583463338, 94.89969984399376], "label": "Playing rubik cube"}, {"segment": [92.89220619344773, 111.68964673946958], "label": "Playing rubik cube"}]}, "mYfo8LhPB5Y": {"duration": 81.34, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mYfo8LhPB5Y", "annotations": [{"segment": [4.060626925757303, 67.12723877847114], "label": "Playing accordion"}]}, "WmxNXcXMcdw": {"duration": 153.6, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=WmxNXcXMcdw", "annotations": []}, "ptlSMte8xz0": {"duration": 112.873, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ptlSMte8xz0", "annotations": [{"segment": [0.01, 112.841], "label": "Skiing"}]}, "ZKkjR2VTb7Y": {"duration": 36.897, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZKkjR2VTb7Y", "annotations": [{"segment": [0.01, 36.841], "label": "Cutting the grass"}]}, "EdJgTx0iFeE": {"duration": 160.799, "subset": "training", "resolution": "647x480", "url": "https://www.youtube.com/watch?v=EdJgTx0iFeE", "annotations": [{"segment": [19.30838065522621, 160.536], "label": "Hula hoop"}]}, "lm_DRjAHlSs": {"duration": 233.082, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=lm_DRjAHlSs", "annotations": []}, "_wqhA-Ceqwc": {"duration": 221.542, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=_wqhA-Ceqwc", "annotations": [{"segment": [29.01616848673947, 45.94226677067083], "label": "Sharpening knives"}, {"segment": [188.60509516380654, 198.27715132605303], "label": "Sharpening knives"}]}, "43j1BAyP6Q0": {"duration": 179.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=43j1BAyP6Q0", "annotations": [{"segment": [0.01, 174.30052108206777], "label": "Elliptical trainer"}]}, "wil2yt6Ktu0": {"duration": 71.75, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wil2yt6Ktu0", "annotations": []}, "MssKmHxNPmw": {"duration": 27.981, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MssKmHxNPmw", "annotations": []}, "UgrOTFBD7tA": {"duration": 202.55, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UgrOTFBD7tA", "annotations": [{"segment": [16.58541341653666, 188.75780031201248], "label": "Rock climbing"}]}, "VFqkLp5mzBM": {"duration": 79.16, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VFqkLp5mzBM", "annotations": [{"segment": [20.93148466146646, 33.03346399375975], "label": "Playing polo"}]}, "gYqXtgtyFnY": {"duration": 67.872, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gYqXtgtyFnY", "annotations": [{"segment": [0.01, 67.871427], "label": "Putting on shoes"}]}, "r09mVf2dQnk": {"duration": 76.673, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=r09mVf2dQnk", "annotations": [{"segment": [6.136055322344059, 68.4516347615964], "label": "Cleaning sink"}]}, "X7okoyw18ss": {"duration": 61.67, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X7okoyw18ss", "annotations": []}, "HoQQlRzybmA": {"duration": 217.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HoQQlRzybmA", "annotations": [{"segment": [33.20678079875195, 45.06634536973479], "label": "Slacklining"}, {"segment": [50.14901590015601, 55.23168643057723], "label": "Slacklining"}, {"segment": [78.27312616848674, 86.0665543151326], "label": "Slacklining"}, {"segment": [93.18229305772232, 103.00878941653667], "label": "Slacklining"}, {"segment": [109.78568345709829, 110.46337286115445], "label": "Slacklining"}, {"segment": [138.58748312948518, 151.46358180655224], "label": "Slacklining"}, {"segment": [164.00083578159126, 201.27375300468017], "label": "Slacklining"}]}, "DLVJlI5p4g8": {"duration": 192.564, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DLVJlI5p4g8", "annotations": []}, "8TGG-FZx0cc": {"duration": 165.65, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8TGG-FZx0cc", "annotations": [{"segment": [25.58419129797192, 165.651179138322], "label": "Preparing pasta"}]}, "z7zj8stU-kw": {"duration": 4.64, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=z7zj8stU-kw", "annotations": [{"segment": [0.01, 2.0358212399375977], "label": "Brushing teeth"}]}, "HyTsvX-Qefo": {"duration": 56.96, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=HyTsvX-Qefo", "annotations": []}, "PagM71op4HU": {"duration": 30.558, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=PagM71op4HU", "annotations": [{"segment": [0.01, 24.378708268330733], "label": "Throwing darts"}]}, "nAwM_Mh4V8k": {"duration": 160.32999999999998, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nAwM_Mh4V8k", "annotations": []}, "F2yh5HfIj2c": {"duration": 140.852, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F2yh5HfIj2c", "annotations": [{"segment": [43.947644164269974, 140.19298334477378], "label": "Scuba diving"}]}, "ToD9rw_QoJY": {"duration": 209.723, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ToD9rw_QoJY", "annotations": []}, "3rqesp6F95Q": {"duration": 41.482, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3rqesp6F95Q", "annotations": [{"segment": [0.01, 41.501], "label": "Table soccer"}]}, "JR9LosQmMvg": {"duration": 150.535, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=JR9LosQmMvg", "annotations": []}, "5MuLI6Plf-Y": {"duration": 180.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5MuLI6Plf-Y", "annotations": [{"segment": [7.467459468446765, 172.03335692667707], "label": "Playing guitarra"}]}, "azCsE98qwus": {"duration": 155.55, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=azCsE98qwus", "annotations": [{"segment": [9.914196567862714, 57.525501], "label": "Tai chi"}, {"segment": [71.68055100000001, 150.40561622464898], "label": "Tai chi"}]}, "Aq2w52hpwig": {"duration": 30.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Aq2w52hpwig", "annotations": [{"segment": [9.987985418096724, 15.102605970358814], "label": "Kayaking"}]}, "jNJg1TYq3c8": {"duration": 49.02, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jNJg1TYq3c8", "annotations": [{"segment": [6.499957173946958, 48.405563424804996], "label": "Cleaning windows"}]}, "08ZbVaYzAl4": {"duration": 63.02, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=08ZbVaYzAl4", "annotations": []}, "QPxiOzXkUFM": {"duration": 210.071, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QPxiOzXkUFM", "annotations": [{"segment": [62.9229615725429, 191.062951024961], "label": "Rafting"}]}, "suxZhXSVNKY": {"duration": 112.988, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=suxZhXSVNKY", "annotations": [{"segment": [14.95951003454341, 91.51700256426558], "label": "Surfing"}]}, "nw8MpBwL508": {"duration": 100.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nw8MpBwL508", "annotations": [{"segment": [4.473373605304212, 96.45221334945398], "label": "Using the balance beam"}]}, "UNXLfqkwKFc": {"duration": 48.51, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UNXLfqkwKFc", "annotations": []}, "pv6CNBFhgCU": {"duration": 115.497, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pv6CNBFhgCU", "annotations": [{"segment": [11.171225232682778, 115.4960544217687], "label": "Carving jack-o-lanterns"}]}, "WVaYjd1F8kg": {"duration": 167.57999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WVaYjd1F8kg", "annotations": [{"segment": [100.59311232449298, 167.481], "label": "Preparing pasta"}]}, "1Xav7gMRCOQ": {"duration": 44.37, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1Xav7gMRCOQ", "annotations": [{"segment": [12.96411856474259, 42.48773790951638], "label": "Doing step aerobics"}]}, "0JHOEr3YdNM": {"duration": 200.46, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=0JHOEr3YdNM", "annotations": [{"segment": [84.03959268, 161.00860374414978], "label": "Shaving legs"}]}, "itXYuf_Cj_Q": {"duration": 31.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=itXYuf_Cj_Q", "annotations": []}, "Sul7NDmB5HM": {"duration": 225.884, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Sul7NDmB5HM", "annotations": [{"segment": [30.29735725429017, 60.59471450858034], "label": "Kneeling"}, {"segment": [70.81126521060841, 184.25020748829954], "label": "Kneeling"}, {"segment": [198.0072418096724, 222.31773946957878], "label": "Kneeling"}]}, "wNmGrltNmlo": {"duration": 32.23, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wNmGrltNmlo", "annotations": []}, "q-03l2tyn8I": {"duration": 87.981, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=q-03l2tyn8I", "annotations": []}, "6hNV9oxC51k": {"duration": 128.545, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6hNV9oxC51k", "annotations": [{"segment": [6.6852548225497905, 78.34157569482393], "label": "Rafting"}, {"segment": [89.76451183194311, 113.18656323381227], "label": "Rafting"}]}, "nkWMBJC2Qec": {"duration": 94.85, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nkWMBJC2Qec", "annotations": []}, "nB50V0OBto0": {"duration": 162.796, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nB50V0OBto0", "annotations": [{"segment": [29.206609570505268, 36.82572511063707], "label": "Snowboarding"}, {"segment": [42.66704702473813, 49.778221528861145], "label": "Snowboarding"}, {"segment": [52.82586774491388, 56.38145499697539], "label": "Snowboarding"}, {"segment": [57.14336655098856, 85.33409404947626], "label": "Snowboarding"}, {"segment": [93.46115062561685, 97.01673787767837], "label": "Snowboarding"}, {"segment": [98.54056098570473, 100.06438345397815], "label": "Snowboarding"}]}, "O9phka35v6I": {"duration": 92.28, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=O9phka35v6I", "annotations": [{"segment": [14.683560338331901, 89.54092601872075], "label": "Playing accordion"}]}, "3UgGvKnelfY": {"duration": 115.566, "subset": "training", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=3UgGvKnelfY", "annotations": [{"segment": [0.01, 115.5653514739229], "label": "Roof shingle removal"}]}, "t5Br7yOUe4g": {"duration": 95.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t5Br7yOUe4g", "annotations": [{"segment": [2.8236489859594385, 90.95121996879875], "label": "Using the balance beam"}]}, "soDdkeamoMM": {"duration": 10.578, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=soDdkeamoMM", "annotations": [{"segment": [0.2, 10.154261637310093], "label": "Swinging at the playground"}]}, "JqHxPA5deHs": {"duration": 17.346, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=JqHxPA5deHs", "annotations": []}, "hShYj_OGFtc": {"duration": 191.82, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=hShYj_OGFtc", "annotations": [{"segment": [0, 180.4305819032761], "label": "Plastering"}]}, "iPPfX25MUQ0": {"duration": 112.989, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iPPfX25MUQ0", "annotations": [{"segment": [20.438620904836196, 95.16215288611544], "label": "Camel ride"}]}, "mqa1ptnQWWo": {"duration": 133.608, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mqa1ptnQWWo", "annotations": [{"segment": [71.88550195787833, 90.42987782527301], "label": "Fixing the roof"}]}, "txyXUXWybt4": {"duration": 85.891, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=txyXUXWybt4", "annotations": [{"segment": [11.858532095943838, 30.215807769890798], "label": "Sharpening knives"}, {"segment": [43.34728963884556, 48.57308344383776], "label": "Sharpening knives"}, {"segment": [52.99490897113885, 58.756681627925126], "label": "Sharpening knives"}, {"segment": [62.91051772932917, 77.51594144071763], "label": "Sharpening knives"}]}, "r8DXz1FOb90": {"duration": 128.128, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r8DXz1FOb90", "annotations": [{"segment": [2.398647096904284, 100.3434032199688], "label": "Disc dog"}, {"segment": [107.53934448673948, 123.13055056474259], "label": "Disc dog"}]}, "p1JSdZmztGk": {"duration": 224.14, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p1JSdZmztGk", "annotations": [{"segment": [12.87919932, 222.74324729017164], "label": "Sailing"}]}, "egNXVIVrVY0": {"duration": 195.861, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=egNXVIVrVY0", "annotations": []}, "rtFW05uRr7Q": {"duration": 29.28, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=rtFW05uRr7Q", "annotations": []}, "4QRRxub6L5Q": {"duration": 8.29, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=4QRRxub6L5Q", "annotations": []}, "sQo4gMcgfT4": {"duration": 193.028, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sQo4gMcgfT4", "annotations": [{"segment": [22.514865130830717, 131.7870327972752], "label": "Riding bumper cars"}]}, "8z8FprjMNbI": {"duration": 332.417, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=8z8FprjMNbI", "annotations": [{"segment": [64.03523166926676, 252.77065132605304], "label": "Removing ice from car"}]}, "IIAg_MFuCoY": {"duration": 200.46, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=IIAg_MFuCoY", "annotations": [{"segment": [0, 200.401], "label": "Cumbia"}]}, "7SJf9PysNv8": {"duration": 130.079, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7SJf9PysNv8", "annotations": []}, "tbKBKWCh6rs": {"duration": 40.91, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=tbKBKWCh6rs", "annotations": [{"segment": [4.08497312948518, 34.08399454914197], "label": "Washing face"}]}, "1-nEQf-TJPE": {"duration": 55.64, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=1-nEQf-TJPE", "annotations": [{"segment": [9.00066176, 11.151960319999999], "label": "Mixing drinks"}, {"segment": [9.00066176, 11.151960319999999], "label": "Mixing drinks"}]}, "Te1d8H4VWYw": {"duration": 15.47, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=Te1d8H4VWYw", "annotations": [{"segment": [0.48561756000000006, 11.966280886115445], "label": "Playing water polo"}]}, "PrR-kkpy1c8": {"duration": 223.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PrR-kkpy1c8", "annotations": [{"segment": [51.92893597376548, 208.4127751076443], "label": "Rafting"}]}, "0KwSF8NdEug": {"duration": 217.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0KwSF8NdEug", "annotations": [{"segment": [142.13364641391345, 190.98148670890996], "label": "Baking cookies"}]}, "-1IBHYS3L-Y": {"duration": 252.819, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-1IBHYS3L-Y", "annotations": [{"segment": [0, 184.9797935803432], "label": "Removing ice from car"}]}, "XqxJsWQqKRk": {"duration": 173.291, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XqxJsWQqKRk", "annotations": [{"segment": [42.45146530757578, 55.1378280053951], "label": "Making a cake"}, {"segment": [59.9964349960493, 70.25349419854152], "label": "Making a cake"}, {"segment": [81.05039862221751, 128.8267006969839], "label": "Making a cake"}]}, "ISHKwbnOzXY": {"duration": 192.076, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ISHKwbnOzXY", "annotations": [{"segment": [4.343688767550701, 184.98122854914195], "label": "Doing fencing"}]}, "cXY-ONmtylc": {"duration": 115.055, "subset": "validation", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=cXY-ONmtylc", "annotations": [{"segment": [0.01, 3.654873170573392], "label": "Plastering"}, {"segment": [5.983958840355323, 14.583659774934764], "label": "Plastering"}, {"segment": [22.466718964965914, 37.69535603661701], "label": "Plastering"}, {"segment": [53.461474416679316, 78.72309591200641], "label": "Plastering"}]}, "MaJlWFemO68": {"duration": 15.67, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=MaJlWFemO68", "annotations": [{"segment": [0.3667738502340094, 14.817663549453977], "label": "Brushing hair"}]}, "brJIpRGcBFU": {"duration": 145.775, "subset": "testing", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=brJIpRGcBFU", "annotations": []}, "ulJxGEx2r7o": {"duration": 217.71, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ulJxGEx2r7o", "annotations": [{"segment": [15.691629397761812, 23.830313285612277], "label": "Playing flauta"}, {"segment": [31.290773516141872, 56.04593700835371], "label": "Playing flauta"}, {"segment": [81.81843598654686, 121.83363176847831], "label": "Playing flauta"}, {"segment": [126.58119736972442, 178.8044189834316], "label": "Playing flauta"}, {"segment": [183.55198458467768, 185.58665555664032], "label": "Playing flauta"}, {"segment": [191.0124448152073, 209.32448356287085], "label": "Playing flauta"}]}, "b0dgV_N6cNQ": {"duration": 211.766, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=b0dgV_N6cNQ", "annotations": []}, "xKLnBh0zmL4": {"duration": 100.45, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xKLnBh0zmL4", "annotations": [{"segment": [1.723782779882624, 91.8306099101107], "label": "Using parallel bars"}]}, "7tlXgKBTD_0": {"duration": 51.967, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7tlXgKBTD_0", "annotations": [{"segment": [16.119, 51.941], "label": "Doing crunches"}]}, "Iib8Qt_9HQc": {"duration": 113.825, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Iib8Qt_9HQc", "annotations": [{"segment": [61.594394695787834, 102.43865366614665], "label": "Using the rowing machine"}]}, "-l16smV_uYg": {"duration": 157.85, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-l16smV_uYg", "annotations": [{"segment": [64.75339001560062, 68.20033853354134], "label": "Removing curlers"}, {"segment": [106.36298283931357, 108.33266770670826], "label": "Removing curlers"}]}, "UxIXOCccW24": {"duration": 63.623, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UxIXOCccW24", "annotations": [{"segment": [16.82092318692852, 18.10722919314936], "label": "Sumo"}, {"segment": [55.311156757690576, 56.89430261150084], "label": "Sumo"}]}, "V--Xz2FtJXA": {"duration": 199.62, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=V--Xz2FtJXA", "annotations": [{"segment": [64.13048860530422, 185.2312656318253], "label": "Getting a haircut"}]}, "5g3D9uF11a8": {"duration": 87.749, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5g3D9uF11a8", "annotations": []}, "aUCdj7acYos": {"duration": 24.265, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aUCdj7acYos", "annotations": [{"segment": [13.001727015576323, 21.241193554517135], "label": "Hitting a pinata"}]}, "JviFZ5Cliug": {"duration": 182.904, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JviFZ5Cliug", "annotations": [{"segment": [68.48184037441497, 71.62059139157567], "label": "Elliptical trainer"}, {"segment": [76.18604741653667, 80.18082143837754], "label": "Elliptical trainer"}, {"segment": [93.02116650858035, 97.30128153198127], "label": "Elliptical trainer"}, {"segment": [101.58139655538221, 105.86151157878315], "label": "Elliptical trainer"}, {"segment": [111.28299060842434, 116.70446963806553], "label": "Elliptical trainer"}]}, "OPqqbxGKp6E": {"duration": 138.786, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OPqqbxGKp6E", "annotations": [{"segment": [0.01, 96.34826625585023], "label": "Making an omelette"}]}, "Y_dtU10XIsg": {"duration": 66.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y_dtU10XIsg", "annotations": [{"segment": [9.009496879875195, 66.001], "label": "Shoveling snow"}]}, "bjtjeUcoxkg": {"duration": 121.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bjtjeUcoxkg", "annotations": [{"segment": [19.283719478939158, 30.24897173166927], "label": "Shaving"}, {"segment": [45.56251367082683, 54.070036970358814], "label": "Shaving"}, {"segment": [83.94089655538222, 96.79670954134166], "label": "Shaving"}]}, "KFIxTdJtXAE": {"duration": 238.539, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KFIxTdJtXAE", "annotations": [{"segment": [0, 215.4656858437603], "label": "Rafting"}]}, "EhzHU0gkrzA": {"duration": 24.68, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=EhzHU0gkrzA", "annotations": []}, "Iwgbi95VjXU": {"duration": 150.884, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Iwgbi95VjXU", "annotations": [{"segment": [27.065546021840873, 145.21253822152886], "label": "Building sandcastles"}]}, "HwdYYnxEde8": {"duration": 122.21, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HwdYYnxEde8", "annotations": []}, "kA2P40yJxms": {"duration": 197.86, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kA2P40yJxms", "annotations": []}, "-MldnTjJ-zE": {"duration": 178.75, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=-MldnTjJ-zE", "annotations": [{"segment": [21.047303432137287, 31.083103744149767], "label": "Grooming horse"}, {"segment": [85.16491653666148, 157.08815210608427], "label": "Grooming horse"}]}, "byVhCu3KH1g": {"duration": 78.41, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=byVhCu3KH1g", "annotations": [{"segment": [37.43310173166927, 64.71278044461779], "label": "Polishing shoes"}]}, "PKEw32TJRWs": {"duration": 230.946, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PKEw32TJRWs", "annotations": [{"segment": [16.207090483619343, 17.293903445933758], "label": "Waterskiing"}, {"segment": [30.264331030384074, 43.23475861483439], "label": "Waterskiing"}, {"segment": [47.197944821194206, 86.10922757454516], "label": "Waterskiing"}, {"segment": [109.52805515758045, 165.01266204661792], "label": "Waterskiing"}, {"segment": [175.46106057566303, 209.68857752574104], "label": "Waterskiing"}]}, "e-Xf13exY4E": {"duration": 93.066, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e-Xf13exY4E", "annotations": []}, "g49F9coR2VU": {"duration": 76.19, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=g49F9coR2VU", "annotations": [{"segment": [0.01, 66.20069757075998], "label": "Doing kickboxing"}]}, "FsS_NCZEfaI": {"duration": 212.742, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FsS_NCZEfaI", "annotations": [{"segment": [42.13804524180968, 58.39603120124806], "label": "Scuba diving"}, {"segment": [97.216120124805, 103.5202371294852], "label": "Scuba diving"}, {"segment": [108.16537597503901, 116.12847113884555], "label": "Scuba diving"}, {"segment": [189.12351014040564, 205.7326365054602], "label": "Scuba diving"}]}, "sY31L_r7dsk": {"duration": 55.61, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sY31L_r7dsk", "annotations": [{"segment": [13.924636965678626, 51.75106510920437], "label": "Preparing salad"}]}, "j3P7ttoKGeY": {"duration": 94.691, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j3P7ttoKGeY", "annotations": [{"segment": [7.068666091183727, 23.4149616233935], "label": "Table soccer"}, {"segment": [37.257770452472045, 63.47074887349313], "label": "Table soccer"}]}, "AahkhvTAXwg": {"duration": 106.162, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AahkhvTAXwg", "annotations": [{"segment": [6.125549141965679, 100.67664274570984], "label": "Baton twirling"}]}, "wVNZHyJySiU": {"duration": 151.534, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wVNZHyJySiU", "annotations": []}, "7lwrP2ma2TU": {"duration": 115.43, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7lwrP2ma2TU", "annotations": [{"segment": [18.907543783151326, 62.84507409828393], "label": "Archery"}, {"segment": [72.74902560374414, 99.03951505460219], "label": "Archery"}]}, "9Pun6fgAg8Q": {"duration": 89.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9Pun6fgAg8Q", "annotations": [{"segment": [4.686550702028081, 19.935327613104526], "label": "Volleyball"}, {"segment": [65.68165834633385, 85.12734633385337], "label": "Volleyball"}]}, "Wgz_qspJAbI": {"duration": 75.14, "subset": "testing", "resolution": "370x208", "url": "https://www.youtube.com/watch?v=Wgz_qspJAbI", "annotations": []}, "jcqRPdvb18w": {"duration": 61.069, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jcqRPdvb18w", "annotations": [{"segment": [37.18766228982425, 48.39461392590513], "label": "Hula hoop"}]}, "KaVu94VRRvE": {"duration": 102.168, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KaVu94VRRvE", "annotations": []}, "6B3qNzgqnpc": {"duration": 36.25, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6B3qNzgqnpc", "annotations": [{"segment": [0, 5.846216068642746], "label": "Kayaking"}, {"segment": [18.047014820592825, 23.582562402496098], "label": "Kayaking"}]}, "PBZScfP9ynI": {"duration": 10.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PBZScfP9ynI", "annotations": [{"segment": [0.8188837499999999, 9.838467238689548], "label": "Doing karate"}]}, "2fEQZstAbws": {"duration": 106.77000000000001, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2fEQZstAbws", "annotations": []}, "TZ9JbvClnZA": {"duration": 14.165, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TZ9JbvClnZA", "annotations": [{"segment": [0, 13.61174623400936], "label": "Playing ten pins"}]}, "l9_VsufvmsU": {"duration": 200.53, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=l9_VsufvmsU", "annotations": []}, "JXMD8Obk0yg": {"duration": 63.066, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=JXMD8Obk0yg", "annotations": [{"segment": [19.677190639625586, 56.27676522932917], "label": "Arm wrestling"}]}, "ZhZ12nMnxaQ": {"duration": 93.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZhZ12nMnxaQ", "annotations": []}, "BQ_BJNFGmTg": {"duration": 65.318, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BQ_BJNFGmTg", "annotations": [{"segment": [0.2, 63.29990046656298], "label": "Playing drums"}]}, "Eh0OoXQDLVc": {"duration": 111.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Eh0OoXQDLVc", "annotations": [{"segment": [16.87957659, 102.2167511700468], "label": "Cheerleading"}]}, "Lan3mtnCmlw": {"duration": 53.267, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Lan3mtnCmlw", "annotations": [{"segment": [3.155067082683307, 51.33063650546022], "label": "Scuba diving"}]}, "Ht9WSqhFD34": {"duration": 89.65, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ht9WSqhFD34", "annotations": [{"segment": [9.115025845472536, 80.33409719456137], "label": "Archery"}]}, "eJnSC8C3OIw": {"duration": 233.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eJnSC8C3OIw", "annotations": [{"segment": [21.585772377197053, 219.3339578071036], "label": "Skateboarding"}]}, "2cQVuBiTm8w": {"duration": 45.65, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2cQVuBiTm8w", "annotations": []}, "bOUtD3leN0E": {"duration": 222.66, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=bOUtD3leN0E", "annotations": [{"segment": [211.8880621528861, 217.0984243369735], "label": "Removing curlers"}]}, "F12ga-do744": {"duration": 56.54, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F12ga-do744", "annotations": [{"segment": [17.82525698395921, 29.01007766197081], "label": "Clean and jerk"}, {"segment": [41.25173137461625, 54.10987155218634], "label": "Clean and jerk"}]}, "iHQLWtllKVw": {"duration": 138.438, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iHQLWtllKVw", "annotations": []}, "NU28ZSvyWGc": {"duration": 85.357, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NU28ZSvyWGc", "annotations": []}, "4aiE_-yQWZE": {"duration": 181.999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4aiE_-yQWZE", "annotations": [{"segment": [35.82534373465803, 38.6596272237546], "label": "Doing fencing"}, {"segment": [40.6436256661222, 58.49961164743061], "label": "Doing fencing"}, {"segment": [97.89615214587297, 157.41610541690102], "label": "Doing fencing"}]}, "F99Suh6SvD8": {"duration": 126.967, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F99Suh6SvD8", "annotations": [{"segment": [9.108215288611545, 119.41563650546021], "label": "Doing fencing"}]}, "836UrVGw6GI": {"duration": 225.466, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=836UrVGw6GI", "annotations": []}, "UcVbSLmILaY": {"duration": 69.359, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UcVbSLmILaY", "annotations": [{"segment": [0.01, 0.02], "label": "Putting on shoes"}]}, "kfiF8A8g7UE": {"duration": 37.2, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kfiF8A8g7UE", "annotations": [{"segment": [6.9757409400000014, 32.96709948], "label": "Using uneven bars"}]}, "t04jE7d0Obw": {"duration": 175.17000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=t04jE7d0Obw", "annotations": [{"segment": [21.990146822152884, 169.5016907223089], "label": "Tango"}]}, "jkrlHzTRTvo": {"duration": 199.0, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jkrlHzTRTvo", "annotations": []}, "wEn3nAJHhtw": {"duration": 159.777, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wEn3nAJHhtw", "annotations": [{"segment": [0.01, 159.721], "label": "Disc dog"}]}, "X2GUUIGudxs": {"duration": 188.036, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X2GUUIGudxs", "annotations": [{"segment": [23.394664094720262, 173.12054285887496], "label": "Hanging wallpaper"}]}, "517rgTzIobE": {"duration": 87.42, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=517rgTzIobE", "annotations": [{"segment": [6.546470107956319, 82.51280031903276], "label": "Playing squash"}]}, "dyobp97TugE": {"duration": 181.139, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dyobp97TugE", "annotations": [{"segment": [29.389148899289307, 46.62701508060322], "label": "Disc dog"}, {"segment": [61.886765470618826, 89.01521060842434], "label": "Disc dog"}]}, "vzrZJX-Slzg": {"duration": 124.901, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vzrZJX-Slzg", "annotations": [{"segment": [22.362484423676012, 104.82878348909658], "label": "Welding"}]}, "zyPisOj2Tdo": {"duration": 93.02, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zyPisOj2Tdo", "annotations": [{"segment": [0.01, 92.961], "label": "Baton twirling"}]}, "Uofnmkfohkc": {"duration": 132.1, "subset": "training", "resolution": "400x300", "url": "https://www.youtube.com/watch?v=Uofnmkfohkc", "annotations": [{"segment": [30.8250683, 63.712186380000006], "label": "Skateboarding"}, {"segment": [72.38045714, 105.96505266000001], "label": "Skateboarding"}]}, "0_1BQPWzRiw": {"duration": 187.13, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0_1BQPWzRiw", "annotations": []}, "sGFbsMKkoYs": {"duration": 8.36, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sGFbsMKkoYs", "annotations": [{"segment": [0.9726532400000001, 7.56865655], "label": "Clean and jerk"}]}, "RZ8r5B69CLY": {"duration": 104.72, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RZ8r5B69CLY", "annotations": [{"segment": [11.191361154446177, 25.241829173166927], "label": "Volleyball"}, {"segment": [28.3460023400936, 76.37899765990639], "label": "Volleyball"}, {"segment": [80.13668096723869, 99.25185257410297], "label": "Volleyball"}]}, "LtQTtCplKz0": {"duration": 194.38, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LtQTtCplKz0", "annotations": [{"segment": [39.70477698, 107.59656786271451], "label": "Triple jump"}]}, "fZQS02Ypca4": {"duration": 170.99, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fZQS02Ypca4", "annotations": [{"segment": [16.00546764727732, 20.00683455909665], "label": "Putting on makeup"}, {"segment": [28.809841765099176, 43.748278235891334], "label": "Putting on makeup"}, {"segment": [50.417223088923556, 58.15319911844093], "label": "Putting on makeup"}, {"segment": [74.15866676571824, 82.69491617759948], "label": "Putting on makeup"}, {"segment": [97.36659485427036, 102.96850853081742], "label": "Putting on makeup"}, {"segment": [131.24483470767404, 147.2503023276131], "label": "Putting on makeup"}]}, "IAuKFVx94Ec": {"duration": 159.92000000000002, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IAuKFVx94Ec", "annotations": []}, "lx83HHNNrlc": {"duration": 131.402, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=lx83HHNNrlc", "annotations": [{"segment": [8.607117004680187, 121.11443213728548], "label": "Mooping floor"}]}, "smJtFktW640": {"duration": 207.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=smJtFktW640", "annotations": [{"segment": [0, 200.51559765990638], "label": "Zumba"}]}, "VN-BUL75x5Y": {"duration": 117.192, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VN-BUL75x5Y", "annotations": [{"segment": [35.62354914196568, 63.40247581903275], "label": "Grooming dog"}]}, "f6JZ3LWJRVE": {"duration": 106.74000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=f6JZ3LWJRVE", "annotations": [{"segment": [66.0348790951638, 71.1977574102964], "label": "Archery"}]}, "RjBXzs2XvbY": {"duration": 225.582, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RjBXzs2XvbY", "annotations": [{"segment": [0.01, 225.33021455474807], "label": "Sharpening knives"}]}, "JLA4Ck8_BRI": {"duration": 68.13, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JLA4Ck8_BRI", "annotations": [{"segment": [3.507336014040562, 63.238331162246496], "label": "Mixing drinks"}]}, "8L1xXJvKuv4": {"duration": 76.7, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=8L1xXJvKuv4", "annotations": [{"segment": [0.2, 76.49551020408163], "label": "Playing water polo"}]}, "bj4nkWPdqIY": {"duration": 191.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bj4nkWPdqIY", "annotations": [{"segment": [16.729376839999997, 28.235155226209045], "label": "Triple jump"}, {"segment": [30.027863494539776, 58.418510920436816], "label": "Triple jump"}, {"segment": [62.900749609984395, 108.32076833073323], "label": "Triple jump"}, {"segment": [139.69643915756632, 165.39460764430578], "label": "Triple jump"}, {"segment": [167.1875031201248, 188.4034329173167], "label": "Triple jump"}]}, "GV_BDNmUiLY": {"duration": 124.85, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GV_BDNmUiLY", "annotations": [{"segment": [21.0273135725429, 76.33359126365055], "label": "Sailing"}]}, "ePAc9xK9fac": {"duration": 125.44, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ePAc9xK9fac", "annotations": [{"segment": [8.318065522620905, 15.75539469578783], "label": "Rock climbing"}, {"segment": [26.911388455538223, 34.935875195007796], "label": "Rock climbing"}, {"segment": [49.6148143525741, 51.76772542901716], "label": "Rock climbing"}, {"segment": [68.40385647425897, 107.74341341653667], "label": "Rock climbing"}]}, "Gdf7ZWhh9Nw": {"duration": 128.11, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Gdf7ZWhh9Nw", "annotations": [{"segment": [28.568990639625586, 101.23162392], "label": "Grooming horse"}]}, "fAxUvb5qcvg": {"duration": 30.7, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=fAxUvb5qcvg", "annotations": []}, "V4srMOGRlU8": {"duration": 128.221, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=V4srMOGRlU8", "annotations": [{"segment": [19.40311550078003, 113.01814698907955], "label": "River tubing"}]}, "1AZxtWKkRlo": {"duration": 193.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1AZxtWKkRlo", "annotations": [{"segment": [3.3156448143525745, 182.05904253354134], "label": "Cheerleading"}]}, "QX3pwZOJbRs": {"duration": 151.743, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=QX3pwZOJbRs", "annotations": [{"segment": [46.307595015576325, 140.59516355140187], "label": "Grooming dog"}]}, "kPn9NzU4W48": {"duration": 162.03, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kPn9NzU4W48", "annotations": []}, "V2QIntSKqhA": {"duration": 39.73, "subset": "training", "resolution": "400x218", "url": "https://www.youtube.com/watch?v=V2QIntSKqhA", "annotations": [{"segment": [0.2, 39.701], "label": "Camel ride"}]}, "sq1wlclMvY8": {"duration": 77.97, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sq1wlclMvY8", "annotations": []}, "mBAlD4A8swo": {"duration": 13.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mBAlD4A8swo", "annotations": [{"segment": [0.01, 13.322197883833722], "label": "Spinning"}]}, "nDbLtdY66dA": {"duration": 94.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nDbLtdY66dA", "annotations": [{"segment": [11.07381891575663, 89.91940959594383], "label": "Playing saxophone"}]}, "E3UJv-NC1E8": {"duration": 33.066, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=E3UJv-NC1E8", "annotations": [{"segment": [13.247327613104522, 17.88646957878315], "label": "Doing a powerbomb"}]}, "92fD8Cy2zL0": {"duration": 203.69, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=92fD8Cy2zL0", "annotations": [{"segment": [88.0978362, 178.25900145], "label": "Doing nails"}]}, "eDMv3IHGpnA": {"duration": 213.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eDMv3IHGpnA", "annotations": [{"segment": [19.278989815374928, 208.08013136037442], "label": "Breakdancing"}]}, "ZdeEZk6PulE": {"duration": 54.335, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=ZdeEZk6PulE", "annotations": []}, "esQnfUcBbPE": {"duration": 106.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=esQnfUcBbPE", "annotations": [{"segment": [7.7298001769383875, 106.18479488254586], "label": "Bullfighting"}]}, "6fWXqCWuU9Y": {"duration": 193.26, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=6fWXqCWuU9Y", "annotations": [{"segment": [86.5152139789572, 113.60766080138711], "label": "Playing beach volleyball"}]}, "fZc3tkvUJe0": {"duration": 107.068, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=fZc3tkvUJe0", "annotations": [{"segment": [17.5241887675507, 102.6608658346334], "label": "Sharpening knives"}]}, "a25vC5zsf6A": {"duration": 176.45, "subset": "training", "resolution": "480x320", "url": "https://www.youtube.com/watch?v=a25vC5zsf6A", "annotations": [{"segment": [64.13804281150837, 91.11455867214279], "label": "Fixing bicycle"}, {"segment": [105.03420095999999, 143.40105408000002], "label": "Fixing bicycle"}]}, "RlRrktN5iH4": {"duration": 65.341, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RlRrktN5iH4", "annotations": []}, "6TxGVSw6Ayw": {"duration": 70.101, "subset": "training", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=6TxGVSw6Ayw", "annotations": [{"segment": [9.951942153876631, 60.367824000000006], "label": "Playing ice hockey"}]}, "K9cQxJ37xzA": {"duration": 106.7, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K9cQxJ37xzA", "annotations": [{"segment": [14.974024914196566, 35.27214757566302], "label": "Spinning"}, {"segment": [46.419477234009356, 71.87531958814353], "label": "Spinning"}, {"segment": [86.01745422932916, 93.00533252262089], "label": "Spinning"}]}, "irnvsfchQX8": {"duration": 38.151, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=irnvsfchQX8", "annotations": [{"segment": [8.153826539456137, 30.591727575663025], "label": "Canoeing"}]}, "cdEBDjnH9xk": {"duration": 62.044, "subset": "training", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=cdEBDjnH9xk", "annotations": [{"segment": [0.01, 57.107196240249614], "label": "Roof shingle removal"}]}, "erxnWYOO1ew": {"duration": 6.943, "subset": "training", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=erxnWYOO1ew", "annotations": [{"segment": [0.01, 6.882205072576041], "label": "Powerbocking"}]}, "bmWICdhvyJw": {"duration": 74.84, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bmWICdhvyJw", "annotations": [{"segment": [0.2335036255850234, 73.7871456848674], "label": "Using the balance beam"}]}, "ciEBsDNryN8": {"duration": 119.56, "subset": "training", "resolution": "1280x622", "url": "https://www.youtube.com/watch?v=ciEBsDNryN8", "annotations": [{"segment": [9.69905825273011, 113.21785306552262], "label": "Playing accordion"}]}, "LmEUkoYh0nQ": {"duration": 138.159, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=LmEUkoYh0nQ", "annotations": [{"segment": [18.281752336448594, 112.93306074766356], "label": "Carving jack-o-lanterns"}]}, "yUC6HXP9S8A": {"duration": 231.48, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=yUC6HXP9S8A", "annotations": [{"segment": [4.830700550079346, 217.52570055007936], "label": "Ice fishing"}]}, "52tZGZGuWP4": {"duration": 78.159, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=52tZGZGuWP4", "annotations": [{"segment": [0.010000000000000002, 0.020000000000000004], "label": "Playing congas"}]}, "gc04Okw4DTs": {"duration": 170.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gc04Okw4DTs", "annotations": [{"segment": [20.72516794549333, 28.430678879875195], "label": "Painting"}, {"segment": [43.575993797191884, 159.15866027145086], "label": "Painting"}]}, "IuntoXkEWPI": {"duration": 29.513, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=IuntoXkEWPI", "annotations": [{"segment": [23.548006240249613, 29.261], "label": "Fun sliding down"}]}, "sDLIA6K6Uq4": {"duration": 147.099, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sDLIA6K6Uq4", "annotations": []}, "P5HJQrDE9b8": {"duration": 215.551, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P5HJQrDE9b8", "annotations": []}, "JN3o_6Rx-8g": {"duration": 45.163, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JN3o_6Rx-8g", "annotations": []}, "ZBZUrC4M5E8": {"duration": 119.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZBZUrC4M5E8", "annotations": [{"segment": [6.81058112324493, 87.1604994], "label": "Shot put"}]}, "EgPk-mmJyS8": {"duration": 81.15, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=EgPk-mmJyS8", "annotations": [{"segment": [17.471476328440893, 37.60165534633385], "label": "Ironing clothes"}, {"segment": [47.60344245865834, 62.7960304773791], "label": "Ironing clothes"}, {"segment": [66.59417748205928, 73.55744699063962], "label": "Ironing clothes"}, {"segment": [78.49503809672386, 81.153741], "label": "Ironing clothes"}]}, "Omvg9UtGOes": {"duration": 62.23, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Omvg9UtGOes", "annotations": []}, "po_mfZ5LUVo": {"duration": 145.915, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=po_mfZ5LUVo", "annotations": [{"segment": [12.29227087675507, 132.02809460218407], "label": "Swimming"}]}, "o18RrUlR-0Y": {"duration": 135.26, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=o18RrUlR-0Y", "annotations": [{"segment": [0, 1.6880653457430814], "label": "Shaving"}, {"segment": [49.37591136298513, 62.45841779249401], "label": "Shaving"}, {"segment": [82.08217743675733, 119.64162976287052], "label": "Shaving"}]}, "aDaazrgvjJg": {"duration": 23.29, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aDaazrgvjJg", "annotations": [{"segment": [1.3995140405616224, 21.937836973478937], "label": "Clean and jerk"}]}, "svG8RyP-OlU": {"duration": 149.51, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=svG8RyP-OlU", "annotations": [{"segment": [0.6997501777622125, 45.950261673051955], "label": "Shoveling snow"}, {"segment": [103.79627636806153, 149.31328798185942], "label": "Shoveling snow"}]}, "xSMmdsV65Bc": {"duration": 176.402, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xSMmdsV65Bc", "annotations": [{"segment": [24.759266770670827, 122.69592199687987], "label": "Plastering"}]}, "ElJJgJj9Gm0": {"duration": 84.289, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ElJJgJj9Gm0", "annotations": []}, "E9y9fCPKNf0": {"duration": 42.84, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E9y9fCPKNf0", "annotations": []}, "G9ynahMxUz8": {"duration": 98.59, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G9ynahMxUz8", "annotations": [{"segment": [18.918580130960343, 97.82290103588143], "label": "Playing kickball"}]}, "Br1Ty6PCrv8": {"duration": 42.121, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Br1Ty6PCrv8", "annotations": [{"segment": [5.568093568492716, 40.41781320261991], "label": "Fun sliding down"}]}, "Z3noRsTXGt4": {"duration": 19.087, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Z3noRsTXGt4", "annotations": [{"segment": [0.2, 18.3384992199688], "label": "Elliptical trainer"}]}, "FNAt8Pew0HA": {"duration": 35.991, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FNAt8Pew0HA", "annotations": [{"segment": [4.9971803307332285, 27.624862053042122], "label": "Peeling potatoes"}]}, "r1v5fEOypSM": {"duration": 86.634, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=r1v5fEOypSM", "annotations": [{"segment": [0, 84.03404368174726], "label": "Wakeboarding"}]}, "xqRnZiSR0_w": {"duration": 22.965, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xqRnZiSR0_w", "annotations": []}, "C_bwHYiX-Vw": {"duration": 216.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C_bwHYiX-Vw", "annotations": [{"segment": [95.62000468018721, 209.82340249609985], "label": "Fixing bicycle"}]}, "7vgokK5_Pvc": {"duration": 93.414, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=7vgokK5_Pvc", "annotations": [{"segment": [0.19999999999999998, 20.96424336973479], "label": "Swinging at the playground"}]}, "M57ZC-IJKCo": {"duration": 110.37, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M57ZC-IJKCo", "annotations": []}, "0wpStx2JuS4": {"duration": 69.196, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0wpStx2JuS4", "annotations": [{"segment": [0.2, 67.75835881435258], "label": "Playing congas"}]}, "lnOZvhRNC8E": {"duration": 170.899, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lnOZvhRNC8E", "annotations": []}, "JV4ss42WZG8": {"duration": 65.829, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JV4ss42WZG8", "annotations": [{"segment": [5.4377255850234, 10.577699569422778], "label": "Making a lemonade"}, {"segment": [21.463487475819033, 52.6831056224649], "label": "Making a lemonade"}]}, "SLdf2ZUdgEQ": {"duration": 165.837, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=SLdf2ZUdgEQ", "annotations": [{"segment": [0.2, 0.25871537129485184], "label": "Playing ten pins"}]}, "AWAMhmc08Cw": {"duration": 140.76, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AWAMhmc08Cw", "annotations": [{"segment": [27.229580555382217, 73.7833801352054], "label": "Washing face"}]}, "_crwKCjKRjg": {"duration": 190.404, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_crwKCjKRjg", "annotations": [{"segment": [16.92428705148206, 181.11956318252732], "label": "Riding bumper cars"}]}, "j66O6PR3SFk": {"duration": 67.71, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=j66O6PR3SFk", "annotations": []}, "q9T1GR6s0vI": {"duration": 120.047, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=q9T1GR6s0vI", "annotations": [{"segment": [1.1236864024960997, 117.42522906084244], "label": "Sumo"}]}, "YVxuIAwOyoE": {"duration": 177.77, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YVxuIAwOyoE", "annotations": [{"segment": [23.71391185647426, 29.81573712948518], "label": "Cricket"}, {"segment": [42.85145475819033, 51.72683697347893], "label": "Cricket"}, {"segment": [59.77015210608424, 68.09082293291732], "label": "Cricket"}, {"segment": [80.57182917316692, 86.67365444617785], "label": "Cricket"}, {"segment": [99.43201638065523, 105.53384165366614], "label": "Cricket"}, {"segment": [115.7960023400936, 119.12427067082683], "label": "Cricket"}, {"segment": [131.8826326053042, 138.53916926677067], "label": "Cricket"}, {"segment": [148.52397425897036, 162.66911466458657], "label": "Cricket"}]}, "vPh9o_BuJaU": {"duration": 18.367, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vPh9o_BuJaU", "annotations": [{"segment": [3.1760546021840876, 9.44232449297972], "label": "Putting on shoes"}]}, "RAQs7EkXLtE": {"duration": 215.71, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RAQs7EkXLtE", "annotations": [{"segment": [33.57788558, 201.74753591504205], "label": "Belly dance"}]}, "uqaSFllHrco": {"duration": 141.015, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uqaSFllHrco", "annotations": [{"segment": [19.13929192043682, 123.4154341076443], "label": "Wakeboarding"}]}, "EXvyNKaFkaU": {"duration": 130.89, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EXvyNKaFkaU", "annotations": []}, "nT1-fINd4nA": {"duration": 18.716, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nT1-fINd4nA", "annotations": [{"segment": [0.2, 17.66582693457944], "label": "Blowing leaves"}]}, "7-jcXxwqf5E": {"duration": 216.016, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7-jcXxwqf5E", "annotations": [{"segment": [16.483004672897195, 179.31132866043615], "label": "Ice fishing"}]}, "JRs2MpyP0SQ": {"duration": 106.209, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=JRs2MpyP0SQ", "annotations": [{"segment": [20.062527262008395, 27.341585087553568], "label": "Playing ten pins"}, {"segment": [30.48481460312989, 31.477413397522415], "label": "Playing ten pins"}, {"segment": [41.237968209048894, 45.04293025422023], "label": "Playing ten pins"}, {"segment": [83.5888501031299, 88.05554467789624], "label": "Playing ten pins"}]}, "UoGoOznvKew": {"duration": 142.966, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UoGoOznvKew", "annotations": [{"segment": [0.01, 136.35057417151194], "label": "Putting in contact lenses"}]}, "sRMrtzg0nwU": {"duration": 92.23, "subset": "training", "resolution": "853x470", "url": "https://www.youtube.com/watch?v=sRMrtzg0nwU", "annotations": [{"segment": [0.01, 83.30884864586584], "label": "Knitting"}]}, "8twDb4CdhE0": {"duration": 35.991, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8twDb4CdhE0", "annotations": [{"segment": [0, 21.430736349453976], "label": "Having an ice cream"}, {"segment": [26.816471138845554, 28.443411856474256], "label": "Having an ice cream"}]}, "5a7Rv4LXvJ0": {"duration": 102.818, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5a7Rv4LXvJ0", "annotations": []}, "Ti14bsS3RtU": {"duration": 211.093, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ti14bsS3RtU", "annotations": [{"segment": [31.277527301092046, 176.81709360374415], "label": "Fixing the roof"}]}, "d83YCo6xLkk": {"duration": 25.333, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d83YCo6xLkk", "annotations": [{"segment": [0.01, 25.308930727499202], "label": "Building sandcastles"}]}, "k3yRK68SEDE": {"duration": 41.053, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=k3yRK68SEDE", "annotations": [{"segment": [0, 1.7238368267228528], "label": "Arm wrestling"}, {"segment": [6.767658540564221, 40.09518935860466], "label": "Arm wrestling"}]}, "5IwVWA2HY5M": {"duration": 140.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5IwVWA2HY5M", "annotations": [{"segment": [26.5094318684312, 81.0618991018144], "label": "Making a sandwich"}]}, "QwnUZ-5JaOM": {"duration": 118.24000000000001, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=QwnUZ-5JaOM", "annotations": [{"segment": [21.571258970358812, 39.08638377535101], "label": "Long jump"}, {"segment": [52.729744149765985, 66.18873478939157], "label": "Long jump"}, {"segment": [106.01259750390015, 109.69999219968798], "label": "Long jump"}]}, "zwj4jJ9yDHk": {"duration": 191.008, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zwj4jJ9yDHk", "annotations": []}, "DOONG5zq1Yg": {"duration": 12.168, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DOONG5zq1Yg", "annotations": [{"segment": [0.2, 11.921000000000001], "label": "Kneeling"}]}, "-l18hJp8ShE": {"duration": 95.74000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-l18hJp8ShE", "annotations": [{"segment": [21.26199283151326, 87.95750718720748], "label": "Doing motocross"}]}, "41__Qick6tM": {"duration": 195.698, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=41__Qick6tM", "annotations": [{"segment": [35.10958539781591, 185.6228514945398], "label": "Braiding hair"}]}, "iEZgExTrv70": {"duration": 23.313, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iEZgExTrv70", "annotations": [{"segment": [0.01, 23.312833], "label": "Using the monkey bar"}]}, "gYchIiLd9Uw": {"duration": 89.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gYchIiLd9Uw", "annotations": [{"segment": [15.162159465970474, 77.61912666458659], "label": "Tumbling"}]}, "Sx3YbcKbuYk": {"duration": 80.69, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=Sx3YbcKbuYk", "annotations": [{"segment": [5.66840225, 65.65633097893917], "label": "Starting a campfire"}]}, "ShKrNPaSdhY": {"duration": 235.055, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ShKrNPaSdhY", "annotations": [{"segment": [19.068447331090525, 41.80390376431384], "label": "Snowboarding"}, {"segment": [48.771221058366145, 98.64254484737214], "label": "Snowboarding"}, {"segment": [100.47604939843852, 103.77635759035805], "label": "Snowboarding"}, {"segment": [105.24316123121116, 118.44439399888921], "label": "Snowboarding"}, {"segment": [119.17779581931576, 182.2503523759998], "label": "Snowboarding"}, {"segment": [183.20434165366615, 187.7508656723869], "label": "Snowboarding"}, {"segment": [188.85096840093604, 204.9858084196568], "label": "Snowboarding"}, {"segment": [206.45261205772232, 216.3535366146646], "label": "Snowboarding"}, {"segment": [218.55374207176288, 235.055283], "label": "Snowboarding"}]}, "3hp7kPpZDhs": {"duration": 76.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3hp7kPpZDhs", "annotations": [{"segment": [20.75569968798752, 35.14790405616224], "label": "Triple jump"}, {"segment": [44.068691887675506, 55.011524960998436], "label": "Triple jump"}, {"segment": [63.575481279251164, 68.5711224648986], "label": "Triple jump"}]}, "BRApK7rWSAk": {"duration": 187.71, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BRApK7rWSAk", "annotations": [{"segment": [96.34420345553823, 186.24593737909515], "label": "Starting a campfire"}]}, "1F_CSpL_4Dg": {"duration": 124.134, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1F_CSpL_4Dg", "annotations": [{"segment": [18.389539781591267, 120.79024024961], "label": "Snow tubing"}]}, "b4KrsFjcj-o": {"duration": 148.701, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b4KrsFjcj-o", "annotations": [{"segment": [38.73071294851794, 146.5934352574103], "label": "Capoeira"}]}, "r40TuTkt9y4": {"duration": 210.141, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r40TuTkt9y4", "annotations": [{"segment": [41.30688644929797, 73.76229723088923], "label": "Rafting"}, {"segment": [81.63027560218408, 89.82608640561622], "label": "Rafting"}, {"segment": [94.41574045553823, 102.61155125897035], "label": "Rafting"}, {"segment": [107.20120530889236, 140.3122809547582], "label": "Rafting"}, {"segment": [150.47508635101403, 152.44208094383777], "label": "Rafting"}, {"segment": [156.70390256162247, 183.91399442901715], "label": "Rafting"}, {"segment": [196.04379441809675, 202.9282754929797], "label": "Rafting"}]}, "sRDuqREW6PU": {"duration": 76.649, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sRDuqREW6PU", "annotations": []}, "8leZKBY0B6I": {"duration": 114.2, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8leZKBY0B6I", "annotations": [{"segment": [6.057184175802407, 114.195736], "label": "Getting a piercing"}]}, "XHwfkmvFsxA": {"duration": 181.21, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=XHwfkmvFsxA", "annotations": []}, "dj_GBrR5N4E": {"duration": 85.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dj_GBrR5N4E", "annotations": [{"segment": [13.671202319999999, 65.8697192], "label": "Using parallel bars"}]}, "pvpfRBndHh8": {"duration": 8.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pvpfRBndHh8", "annotations": [{"segment": [0.04450655658594767, 8.730701], "label": "Doing kickboxing"}]}, "7uDlPHQDZnw": {"duration": 144.15, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=7uDlPHQDZnw", "annotations": []}, "UxR9fdD0Vzw": {"duration": 129.8, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UxR9fdD0Vzw", "annotations": [{"segment": [49.99380187207489, 119.23436661466458], "label": "Trimming branches or hedges"}]}, "0rX2f0H3AaA": {"duration": 83.36, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=0rX2f0H3AaA", "annotations": [{"segment": [9.88351392559104, 13.394762293893118], "label": "Installing carpet"}, {"segment": [19.636981615319034, 62.942378157711346], "label": "Installing carpet"}]}, "37pnsj0hlZ4": {"duration": 111.154, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=37pnsj0hlZ4", "annotations": [{"segment": [40.22510992142865, 96.65103827033829], "label": "Using the rowing machine"}]}, "tfepV4CXF7c": {"duration": 65.97, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=tfepV4CXF7c", "annotations": [{"segment": [0.01, 65.96730158730159], "label": "Drinking beer"}]}, "fPtKNj6jCPU": {"duration": 56.541, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fPtKNj6jCPU", "annotations": [{"segment": [5.6225295950155765, 44.99618691588785], "label": "Beer pong"}]}, "q_AQim3WN3g": {"duration": 189.45, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=q_AQim3WN3g", "annotations": []}, "PJgB6h-fImY": {"duration": 181.58, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PJgB6h-fImY", "annotations": [{"segment": [41.353031773540465, 178.18556136855605], "label": "Fixing bicycle"}]}, "S1gXqf9_gJg": {"duration": 141.015, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S1gXqf9_gJg", "annotations": []}, "sGiZDgVq9GQ": {"duration": 75.303, "subset": "testing", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=sGiZDgVq9GQ", "annotations": []}, "uh-H5Gmt4PI": {"duration": 105.49000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uh-H5Gmt4PI", "annotations": [{"segment": [21.39380007853375, 102.19653729822662], "label": "Snatch"}]}, "FkWxS_5VxsM": {"duration": 103.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FkWxS_5VxsM", "annotations": [{"segment": [7.45643714, 72.16729093], "label": "Doing nails"}]}, "DPsa28MmdoI": {"duration": 198.694, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DPsa28MmdoI", "annotations": [{"segment": [13.948777420484573, 188.15350778159126], "label": "Rafting"}]}, "lvAXZEUE0m8": {"duration": 152.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lvAXZEUE0m8", "annotations": [{"segment": [74.6046856474259, 111.50519266770671], "label": "Bungee jumping"}]}, "hzeK-DdGOsc": {"duration": 124.761, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hzeK-DdGOsc", "annotations": [{"segment": [26.776035380034596, 31.626766963492237], "label": "Bullfighting"}, {"segment": [62.089361307606254, 68.29829773443204], "label": "Bullfighting"}, {"segment": [79.74602342348528, 82.85049160388964], "label": "Bullfighting"}, {"segment": [90.41763279362524, 93.13404245147906], "label": "Bullfighting"}]}, "mNM-JUC7ZEA": {"duration": 163.144, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mNM-JUC7ZEA", "annotations": [{"segment": [7.725161070618008, 158.16250068432515], "label": "Fixing the roof"}]}, "0BXBfSWIR2k": {"duration": 6.73, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=0BXBfSWIR2k", "annotations": [{"segment": [0.39395399999999997, 3.788112324492979], "label": "Windsurfing"}]}, "PaPR1XQU0_A": {"duration": 202.037, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PaPR1XQU0_A", "annotations": [{"segment": [0.01, 202.03671201814058], "label": "Hanging wallpaper"}]}, "4eCKiA1czfk": {"duration": 87.122, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4eCKiA1czfk", "annotations": [{"segment": [0, 76.36624336973479], "label": "Blowing leaves"}]}, "YcxPoVlukf4": {"duration": 107.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YcxPoVlukf4", "annotations": [{"segment": [0.01, 32.64610523935716], "label": "Playing ice hockey"}, {"segment": [57.87415975649112, 63.0534292268961], "label": "Playing ice hockey"}]}, "R7uL6BvULN8": {"duration": 69.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R7uL6BvULN8", "annotations": [{"segment": [12.666719968798752, 59.668336193447736], "label": "Windsurfing"}]}, "ZNVhz7ctTq0": {"duration": 14.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZNVhz7ctTq0", "annotations": [{"segment": [0.3287380109204368, 14.048072562358277], "label": "Walking the dog"}]}, "3gPjMvTmE2g": {"duration": 207.49, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3gPjMvTmE2g", "annotations": [{"segment": [8.252558475819033, 190.4561044321373], "label": "Sailing"}]}, "-rCYwovSK4s": {"duration": 114.5, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-rCYwovSK4s", "annotations": [{"segment": [11.253273885491419, 104.67330947457098], "label": "Hammer throw"}]}, "HB11MXjwvVo": {"duration": 125.44, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HB11MXjwvVo", "annotations": [{"segment": [7.240351350108426, 119.5636398626013], "label": "Vacuuming floor"}]}, "pHq6Eb280uM": {"duration": 187.455, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pHq6Eb280uM", "annotations": [{"segment": [16.079648985959437, 186.83622308892353], "label": "Table soccer"}]}, "AJ15GW-sS5M": {"duration": 177.98, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AJ15GW-sS5M", "annotations": [{"segment": [3.8872594903796154, 173.26070853790952], "label": "Long jump"}]}, "GTV4XlxawgE": {"duration": 98.52000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GTV4XlxawgE", "annotations": [{"segment": [35.043624438855105, 38.425026796990245], "label": "Tumbling"}, {"segment": [50.4136351576512, 61.17264266080847], "label": "Tumbling"}, {"segment": [68.85764802020653, 89.76086259776922], "label": "Tumbling"}]}, "889_kqLFM0E": {"duration": 167.49, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=889_kqLFM0E", "annotations": []}, "GX1EjqXAszM": {"duration": 62.69, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=GX1EjqXAszM", "annotations": [{"segment": [12.030182431787068, 18.094176380655224], "label": "Making a sandwich"}, {"segment": [26.309910521060843, 44.208474184087365], "label": "Making a sandwich"}]}, "q8c_0JTe5r8": {"duration": 130.311, "subset": "validation", "resolution": "3414x1080", "url": "https://www.youtube.com/watch?v=q8c_0JTe5r8", "annotations": [{"segment": [0.01, 0.02], "label": "Making a cake"}]}, "ffDlO-IrsxI": {"duration": 73.538, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ffDlO-IrsxI", "annotations": [{"segment": [19.237703588143525, 48.43778939157566], "label": "Playing pool"}, {"segment": [51.415053042121684, 60.57586427457098], "label": "Playing pool"}]}, "9BgDWjUnz60": {"duration": 69.22, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9BgDWjUnz60", "annotations": []}, "v8fS--GlXF4": {"duration": 92.323, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=v8fS--GlXF4", "annotations": [{"segment": [0.01, 92.32253968253968], "label": "Bullfighting"}]}, "y2MSbfOsukM": {"duration": 135.72, "subset": "training", "resolution": "496x360", "url": "https://www.youtube.com/watch?v=y2MSbfOsukM", "annotations": [{"segment": [11.001463199999998, 126.97347893915757], "label": "Brushing hair"}]}, "jExOw6W1I3E": {"duration": 36.18, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=jExOw6W1I3E", "annotations": [{"segment": [12.585649647425896, 20.09188912324493], "label": "Drinking beer"}]}, "WU4ISFy651Y": {"duration": 156.805, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=WU4ISFy651Y", "annotations": [{"segment": [13.695188767550702, 114.69720592823712], "label": "Baking cookies"}]}, "aq41GgfAlDo": {"duration": 53.06, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=aq41GgfAlDo", "annotations": [{"segment": [1.2622028270741044, 52.336336331747006], "label": "Playing accordion"}]}, "je5KvCND9xo": {"duration": 189.522, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=je5KvCND9xo", "annotations": [{"segment": [102.56313104524182, 122.07081591263652], "label": "Doing crunches"}]}, "v8u8BiprYKs": {"duration": 80.063, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=v8u8BiprYKs", "annotations": [{"segment": [16.28725507020281, 63.60483541341654], "label": "Trimming branches or hedges"}]}, "twGoVVPO08Q": {"duration": 191.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=twGoVVPO08Q", "annotations": [{"segment": [70.85199947, 154.67606364], "label": "Long jump"}, {"segment": [3.1213431899999997, 56.214835179999994], "label": "Long jump"}]}, "7MDr4f1r8rI": {"duration": 199.62, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=7MDr4f1r8rI", "annotations": [{"segment": [27.15325663026521, 46.39327847113885], "label": "Pole vault"}, {"segment": [57.254581123244925, 99.76882293291732], "label": "Pole vault"}, {"segment": [104.11334399375976, 184.17666068642745], "label": "Pole vault"}]}, "81S8kHui3Bs": {"duration": 146.402, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=81S8kHui3Bs", "annotations": []}, "gRVQwpbn4Z8": {"duration": 177.865, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gRVQwpbn4Z8", "annotations": []}, "-Ws0UnqETac": {"duration": 73.561, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-Ws0UnqETac", "annotations": []}, "7dlkshlM4nk": {"duration": 24.01, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=7dlkshlM4nk", "annotations": [{"segment": [13.442167081879635, 23.1130599201378], "label": "Spread mulch"}]}, "e__FVkfXB-U": {"duration": 131.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e__FVkfXB-U", "annotations": [{"segment": [12.844926904471503, 19.777111165746803], "label": "Disc dog"}, {"segment": [20.388774493896104, 51.583604229510414], "label": "Disc dog"}, {"segment": [58.51578861520248, 74.62292292313406], "label": "Disc dog"}, {"segment": [84.2056483974731, 118.25490699778412], "label": "Disc dog"}]}, "wmQiRptG3-0": {"duration": 165.048, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wmQiRptG3-0", "annotations": []}, "1yFs3I9ppBI": {"duration": 232.06, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1yFs3I9ppBI", "annotations": []}, "InwY966XXBY": {"duration": 166.464, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=InwY966XXBY", "annotations": [{"segment": [0.01, 166.361], "label": "Camel ride"}]}, "C1kmoJttp4M": {"duration": 128.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=C1kmoJttp4M", "annotations": [{"segment": [0.01, 128.441], "label": "Making an omelette"}]}, "DRWMUsADKFM": {"duration": 183.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DRWMUsADKFM", "annotations": [{"segment": [7.66666591, 141.43909594851795], "label": "Wrapping presents"}]}, "RuG5d5pMNmI": {"duration": 59.908, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RuG5d5pMNmI", "annotations": []}, "nNldj5g7W5o": {"duration": 182.231, "subset": "validation", "resolution": "180x240", "url": "https://www.youtube.com/watch?v=nNldj5g7W5o", "annotations": [{"segment": [112.51226786904496, 118.74720486717237], "label": "Having an ice cream"}, {"segment": [150.48870141895455, 155.0232010270416], "label": "Having an ice cream"}, {"segment": [164.94241891973215, 170.32713720433557], "label": "Having an ice cream"}]}, "4sm-tTbfamM": {"duration": 182.51, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4sm-tTbfamM", "annotations": [{"segment": [10.250098932917316, 176.8142065928237], "label": "Fixing bicycle"}]}, "PFeerDZbGn8": {"duration": 169.599, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PFeerDZbGn8", "annotations": []}, "JGPhQX97OSE": {"duration": 53.546, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JGPhQX97OSE", "annotations": []}, "iuQHLWWhSEY": {"duration": 22.18, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iuQHLWWhSEY", "annotations": [{"segment": [0.01, 17.746962893915757], "label": "Using the balance beam"}]}, "8ClpQ4q6NmM": {"duration": 121.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8ClpQ4q6NmM", "annotations": [{"segment": [0, 115.23241030889236], "label": "Bathing dog"}]}, "CXb3Ce7s6G0": {"duration": 125.55, "subset": "training", "resolution": "480x320", "url": "https://www.youtube.com/watch?v=CXb3Ce7s6G0", "annotations": [{"segment": [58.75988751950078, 64.24414368798752], "label": "Swimming"}, {"segment": [76.58372006708268, 92.4488896973479], "label": "Swimming"}, {"segment": [98.1290121575663, 117.12804245553822], "label": "Swimming"}]}, "G4D6UtJh3AU": {"duration": 78.9, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G4D6UtJh3AU", "annotations": [{"segment": [43.005863160000004, 76.82356701794073], "label": "Brushing hair"}, {"segment": [13.27437862, 33.12560488], "label": "Brushing hair"}]}, "URgF15eyQvg": {"duration": 133.515, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=URgF15eyQvg", "annotations": [{"segment": [24.748021312367104, 99.8239506542056], "label": "Futsal"}]}, "2FjaU9wfxaU": {"duration": 161.565, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2FjaU9wfxaU", "annotations": [{"segment": [55.451135378748475, 70.57417230022534], "label": "Grooming dog"}]}, "0IAcCT2UT8c": {"duration": 66.619, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0IAcCT2UT8c", "annotations": []}, "buBtMl9SUNk": {"duration": 231.411, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=buBtMl9SUNk", "annotations": [{"segment": [8.301564742589704, 43.67344929797191], "label": "Scuba diving"}, {"segment": [51.975014040561625, 78.32345865834633], "label": "Scuba diving"}, {"segment": [88.42971138845553, 175.05473478939155], "label": "Scuba diving"}, {"segment": [191.29692667706706, 227.0493744149766], "label": "Scuba diving"}]}, "Vfb3LmFDols": {"duration": 130.195, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=Vfb3LmFDols", "annotations": [{"segment": [23.764011276131047, 118.21072275819034], "label": "BMX"}]}, "Fpwzxyb8fGQ": {"duration": 30.33, "subset": "testing", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=Fpwzxyb8fGQ", "annotations": []}, "as7KugARkLE": {"duration": 8.94, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=as7KugARkLE", "annotations": [{"segment": [0.49509941528861157, 6.575757022776911], "label": "Using uneven bars"}]}, "h2qoTykYXFY": {"duration": 154.97, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=h2qoTykYXFY", "annotations": []}, "YrS64TBX798": {"duration": 22.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YrS64TBX798", "annotations": [{"segment": [0.01, 22.094238159126366], "label": "Grooming horse"}]}, "Y6lkU_RH3mo": {"duration": 146.797, "subset": "testing", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=Y6lkU_RH3mo", "annotations": []}, "3HUoSa_3vpc": {"duration": 82.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3HUoSa_3vpc", "annotations": [{"segment": [5.143890046801872, 80.11608747893915], "label": "Mixing drinks"}]}, "0XTAq_xvquw": {"duration": 192.03, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0XTAq_xvquw", "annotations": []}, "uZk7ciOa5aE": {"duration": 126.76, "subset": "training", "resolution": "352x264", "url": "https://www.youtube.com/watch?v=uZk7ciOa5aE", "annotations": [{"segment": [13.97242411, 37.855978050000004], "label": "Cumbia"}, {"segment": [46.37278088, 120.21909516380656], "label": "Cumbia"}]}, "q5EezLOfg90": {"duration": 183.76, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=q5EezLOfg90", "annotations": []}, "8lhODyltF2Q": {"duration": 121.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8lhODyltF2Q", "annotations": [{"segment": [0, 121.51004535147392], "label": "Mowing the lawn"}]}, "41Mc82x-sas": {"duration": 197.44, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=41Mc82x-sas", "annotations": []}, "MxZtYALqIrE": {"duration": 232.989, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MxZtYALqIrE", "annotations": [{"segment": [0.2, 184.23873790951637], "label": "Playing pool"}]}, "u4FZAIj_Fw8": {"duration": 77.28, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=u4FZAIj_Fw8", "annotations": []}, "HMc2apLs7wE": {"duration": 220.47, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HMc2apLs7wE", "annotations": []}, "XlR3u3bROWM": {"duration": 134.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XlR3u3bROWM", "annotations": [{"segment": [96.7904783400936, 99.72988549141965], "label": "Playing lacrosse"}]}, "EMgWolnolDg": {"duration": 143.871, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=EMgWolnolDg", "annotations": [{"segment": [0.2, 21.380463675070235], "label": "Slacklining"}]}, "qkUkvD_g3Eg": {"duration": 181.86, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qkUkvD_g3Eg", "annotations": []}, "0ZzKrBk1ac8": {"duration": 57.307, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0ZzKrBk1ac8", "annotations": [{"segment": [4.023101658760228, 53.64135444617784], "label": "Arm wrestling"}]}, "Lk32tXffSyw": {"duration": 88.98, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Lk32tXffSyw", "annotations": []}, "cqHWqXRkUfU": {"duration": 81.34, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=cqHWqXRkUfU", "annotations": [{"segment": [56.681134165366615, 76.84287987519501], "label": "Snow tubing"}]}, "KxAxMZ6dYa4": {"duration": 104.305, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KxAxMZ6dYa4", "annotations": [{"segment": [28.789698907956314, 36.109113884555384], "label": "Shuffleboard"}, {"segment": [60.507163806552256, 60.832471138845555], "label": "Shuffleboard"}]}, "xDc407xoYUM": {"duration": 32.137, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xDc407xoYUM", "annotations": [{"segment": [0.2, 24.127340361370713], "label": "Waterskiing"}]}, "Eucw0oPrFUs": {"duration": 127.014, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Eucw0oPrFUs", "annotations": [{"segment": [20.58284610143954, 23.54582787268614], "label": "Futsal"}]}, "Sar_kHdgjYQ": {"duration": 31.394, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Sar_kHdgjYQ", "annotations": []}, "tgAA4I1xVio": {"duration": 130.334, "subset": "testing", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=tgAA4I1xVio", "annotations": []}, "GmlQwRidCvo": {"duration": 119.44, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=GmlQwRidCvo", "annotations": []}, "9yiXv7Hu5Lw": {"duration": 63.066, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9yiXv7Hu5Lw", "annotations": [{"segment": [0.2, 56.738532710280374], "label": "Ice fishing"}]}, "r8dbfvjwOiQ": {"duration": 36.781, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r8dbfvjwOiQ", "annotations": []}, "zTnRo6yABtg": {"duration": 205.869, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zTnRo6yABtg", "annotations": [{"segment": [72.67816426927173, 93.48909376289397], "label": "Disc dog"}, {"segment": [108.85716477356884, 182.17566852675247], "label": "Disc dog"}]}, "XpFkoI5hz7s": {"duration": 98.92, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XpFkoI5hz7s", "annotations": [{"segment": [8.406263650546022, 98.87], "label": "Doing step aerobics"}]}, "qZIJRg7hJ0o": {"duration": 124.367, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qZIJRg7hJ0o", "annotations": [{"segment": [9.21589494872312, 58.69069941028934], "label": "Riding bumper cars"}, {"segment": [64.12322695901032, 120.77672950966813], "label": "Riding bumper cars"}]}, "NdnosxA2c5g": {"duration": 37.97, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NdnosxA2c5g", "annotations": [{"segment": [2.024386475405589, 37.700841304066024], "label": "Horseback riding"}]}, "EWSMeiZqUHQ": {"duration": 222.4, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EWSMeiZqUHQ", "annotations": [{"segment": [7.111165366614665, 214.89594851794072], "label": "Doing step aerobics"}]}, "pOIJeK3YZr8": {"duration": 238.702, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pOIJeK3YZr8", "annotations": [{"segment": [16.568509360374414, 226.93272932917316], "label": "Trimming branches or hedges"}]}, "YKzVHb16eUk": {"duration": 74.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YKzVHb16eUk", "annotations": [{"segment": [0, 44.097917669266764], "label": "Preparing salad"}, {"segment": [52.82441852730109, 74.00072727613104], "label": "Preparing salad"}]}, "KyDS9Zg76Jg": {"duration": 152.44, "subset": "training", "resolution": "982x720", "url": "https://www.youtube.com/watch?v=KyDS9Zg76Jg", "annotations": [{"segment": [35.898816795631824, 146.68589379407175], "label": "Preparing salad"}]}, "NsqW8ZwYDEk": {"duration": 187.32, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NsqW8ZwYDEk", "annotations": [{"segment": [94.38824567706709, 141.4362566801872], "label": "Archery"}]}, "K5_Rm4MfF9o": {"duration": 142.524, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K5_Rm4MfF9o", "annotations": [{"segment": [52.45053978159127, 90.46765522620905], "label": "Waterskiing"}]}, "i2X7z9ywHV8": {"duration": 192.22, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=i2X7z9ywHV8", "annotations": [{"segment": [49.178197614979425, 65.07115172225936], "label": "Smoking hookah"}, {"segment": [106.15293875428486, 122.94549403744858], "label": "Smoking hookah"}, {"segment": [157.13033857347895, 162.22807856536664], "label": "Smoking hookah"}]}, "qhUJT_lNLyE": {"duration": 26.912, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qhUJT_lNLyE", "annotations": []}, "ksOPSr7LmMY": {"duration": 25.101, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=ksOPSr7LmMY", "annotations": []}, "vkE5YOgatZ8": {"duration": 70.47, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vkE5YOgatZ8", "annotations": []}, "xBO66pI09rE": {"duration": 32.532, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=xBO66pI09rE", "annotations": [{"segment": [5.937823923556942, 19.843497043681747], "label": "Gargling mouthwash"}]}, "hpoReoD1vD0": {"duration": 163.79, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hpoReoD1vD0", "annotations": [{"segment": [35.7384440873635, 54.88403913416537], "label": "Making a sandwich"}, {"segment": [60.75535494851795, 62.031727951638075], "label": "Making a sandwich"}, {"segment": [84.7511674071763, 147.80399376131047], "label": "Making a sandwich"}]}, "Rn5qprCWXFg": {"duration": 89.815, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Rn5qprCWXFg", "annotations": [{"segment": [0.01, 0.02], "label": "Building sandcastles"}]}, "kee3-2cw7-E": {"duration": 24.451, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kee3-2cw7-E", "annotations": [{"segment": [2.1360908205928237, 23.1918431950078], "label": "Hitting a pinata"}]}, "v52YAxZZslU": {"duration": 28.491, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=v52YAxZZslU", "annotations": []}, "Zc7uU4Qwolc": {"duration": 67.87, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zc7uU4Qwolc", "annotations": [{"segment": [0.01, 65.30093135725429], "label": "Playing violin"}]}, "sPOCh7SN42Y": {"duration": 190.82, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sPOCh7SN42Y", "annotations": []}, "xBGgX2PqAo8": {"duration": 237.007, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xBGgX2PqAo8", "annotations": [{"segment": [19.166893301157945, 214.1532304466953], "label": "Playing pool"}]}, "dN8LBey17O8": {"duration": 187.71, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dN8LBey17O8", "annotations": []}, "OlH5t7EKOKM": {"duration": 38.1, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OlH5t7EKOKM", "annotations": [{"segment": [1.0700015919004107, 32.39726907956318], "label": "Using uneven bars"}]}, "RmZhmsu3NSI": {"duration": 66.967, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RmZhmsu3NSI", "annotations": []}, "iS_ms9ajumY": {"duration": 133.028, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iS_ms9ajumY", "annotations": [{"segment": [26.33536193447738, 122.15639469578784], "label": "Making a lemonade"}]}, "U7SRRMoCGks": {"duration": 150.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U7SRRMoCGks", "annotations": [{"segment": [2.3451014040561624, 29.31376755070203], "label": "Using the pommel horse"}, {"segment": [57.9240046801872, 86.53424180967238], "label": "Using the pommel horse"}, {"segment": [132.26371918876754, 141.40961466458657], "label": "Using the pommel horse"}]}, "3LwQ2yq85LU": {"duration": 131.658, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3LwQ2yq85LU", "annotations": []}, "UuXJdO35a8I": {"duration": 109.366, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UuXJdO35a8I", "annotations": []}, "nbHBjdE8oNo": {"duration": 34.78, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=nbHBjdE8oNo", "annotations": []}, "u02UsNRxclU": {"duration": 231.76, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=u02UsNRxclU", "annotations": [{"segment": [13.55567468798752, 212.7337214368175], "label": "Smoking hookah"}]}, "oY22VETX20w": {"duration": 103.19, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oY22VETX20w", "annotations": [{"segment": [1.2073651925668865, 53.04357746010521], "label": "Getting a tattoo"}, {"segment": [72.52240154212168, 75.74204201014041], "label": "Getting a tattoo"}]}, "r8V0OneFGoc": {"duration": 112.432, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r8V0OneFGoc", "annotations": []}, "zUt53fHpqc8": {"duration": 71.054, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zUt53fHpqc8", "annotations": [{"segment": [21.658474208112484, 26.74158543672838], "label": "Beer pong"}]}, "rbKPBMRj9jY": {"duration": 157.2, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rbKPBMRj9jY", "annotations": [{"segment": [18.63588767550702, 137.3170670826833], "label": "Rollerblading"}]}, "_8HTgaTPFRo": {"duration": 83.708, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=_8HTgaTPFRo", "annotations": [{"segment": [0.01, 83.70793650793651], "label": "Putting in contact lenses"}]}, "ai80XIxFqqg": {"duration": 236.94, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ai80XIxFqqg", "annotations": [{"segment": [141.94006898235114, 175.5769082463979], "label": "Paintball"}]}, "eWP99kNayuU": {"duration": 38.987, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eWP99kNayuU", "annotations": [{"segment": [0.01, 0.02], "label": "Fixing the roof"}, {"segment": [7.76088034388218, 11.216897784006596], "label": "Fixing the roof"}]}, "eKV-VVNPLVQ": {"duration": 212.72, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=eKV-VVNPLVQ", "annotations": []}, "s_hQSJVIN3c": {"duration": 70.589, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=s_hQSJVIN3c", "annotations": [{"segment": [0.01, 69.86298334440771], "label": "Kneeling"}]}, "lroya8XhuPs": {"duration": 188.454, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lroya8XhuPs", "annotations": [{"segment": [0.01, 188.321], "label": "Beach soccer"}]}, "IRbc5J7sH0k": {"duration": 124.599, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IRbc5J7sH0k", "annotations": []}, "2j-DRUk2yCs": {"duration": 81.03999999999999, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2j-DRUk2yCs", "annotations": [{"segment": [9.098396255850234, 76.07270202808112], "label": "Tumbling"}]}, "Yd0q08-cpJU": {"duration": 125.34, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Yd0q08-cpJU", "annotations": [{"segment": [5.081600624024961, 121.95841497659907], "label": "Belly dance"}]}, "zkR_t6p3lto": {"duration": 193.492, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zkR_t6p3lto", "annotations": [{"segment": [3.320453514739229, 193.491881], "label": "Skiing"}]}, "FDLhpMkJwCM": {"duration": 12.28, "subset": "validation", "resolution": "256x360", "url": "https://www.youtube.com/watch?v=FDLhpMkJwCM", "annotations": [{"segment": [0.01, 11.746797451950078], "label": "Playing saxophone"}]}, "oJCB2Zglcq4": {"duration": 44.235, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oJCB2Zglcq4", "annotations": [{"segment": [7.78501248049922, 21.839371294851794], "label": "Calf roping"}]}, "8ZPV7ySIcbM": {"duration": 204.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8ZPV7ySIcbM", "annotations": [{"segment": [20.094316750390014, 80.69622441029641], "label": "Fixing bicycle"}, {"segment": [188.5038285631825, 204.251699], "label": "Fixing bicycle"}]}, "-CS7U_XrkPI": {"duration": 102.702, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-CS7U_XrkPI", "annotations": [{"segment": [12.176819475819032, 18.745893140405617], "label": "Throwing darts"}, {"segment": [26.43651596723869, 32.68514701404056], "label": "Throwing darts"}, {"segment": [39.73488460530422, 46.624400887675506], "label": "Throwing darts"}, {"segment": [52.87303193447738, 59.60232690795632], "label": "Throwing darts"}, {"segment": [66.97250711700468, 72.90069554602184], "label": "Throwing darts"}, {"segment": [85.7184002574103, 92.12725261310452], "label": "Throwing darts"}]}, "OgRa0ZcbUZg": {"duration": 104.026, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OgRa0ZcbUZg", "annotations": []}, "xmW27Mi-jbg": {"duration": 123.44, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xmW27Mi-jbg", "annotations": [{"segment": [86.84900456630265, 123.43727891156463], "label": "Starting a campfire"}]}, "1Dpm9kvn3M8": {"duration": 84.48, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1Dpm9kvn3M8", "annotations": [{"segment": [3.9398673600000005, 78.83153244851793], "label": "Smoking a cigarette"}]}, "XSfG1M-Ik_A": {"duration": 51.99, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XSfG1M-Ik_A", "annotations": [{"segment": [17.6175, 25.069499999999998], "label": "Shot put"}]}, "GRGBMJG1Koc": {"duration": 201.828, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GRGBMJG1Koc", "annotations": [{"segment": [7.396854134165366, 98.9919414976599], "label": "Kite flying"}, {"segment": [136.7631115444618, 201.781], "label": "Kite flying"}]}, "w10Nr_wOh8g": {"duration": 194.978, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w10Nr_wOh8g", "annotations": []}, "movzxpiGX8k": {"duration": 99.4, "subset": "validation", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=movzxpiGX8k", "annotations": [{"segment": [6.0914771199999995, 34.54686616], "label": "Brushing hair"}, {"segment": [41.60753228, 80.00332132], "label": "Brushing hair"}]}, "GoFV8lTD4ug": {"duration": 123.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GoFV8lTD4ug", "annotations": [{"segment": [56.305374719999996, 75.27438280187206], "label": "Playing flauta"}]}, "7V1ZHZRHFrg": {"duration": 103.956, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7V1ZHZRHFrg", "annotations": [{"segment": [12.643179407176287, 53.176424336973476], "label": "Elliptical trainer"}]}, "ILF-93buuSY": {"duration": 66.944, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ILF-93buuSY", "annotations": [{"segment": [10.955546021840874, 43.00033073322933], "label": "Cutting the grass"}]}, "v79nh_3vpVk": {"duration": 102.49000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=v79nh_3vpVk", "annotations": [{"segment": [0, 6.713513260530421], "label": "Playing field hockey"}, {"segment": [38.52277847113884, 42.19922620904836], "label": "Playing field hockey"}, {"segment": [87.75520904836193, 93.98918564742588], "label": "Playing field hockey"}]}, "K11OHa6PeUc": {"duration": 186.387, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K11OHa6PeUc", "annotations": []}, "v-YKnFqX_L0": {"duration": 122.415, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=v-YKnFqX_L0", "annotations": [{"segment": [0, 122.415623], "label": "Running a marathon"}]}, "78TRh3fX3vU": {"duration": 15.07, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=78TRh3fX3vU", "annotations": []}, "E46k4xD_fo4": {"duration": 92.95, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E46k4xD_fo4", "annotations": []}, "_V62aT_K8Zw": {"duration": 190.94, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=_V62aT_K8Zw", "annotations": []}, "ffWMjZCysO0": {"duration": 218.64, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=ffWMjZCysO0", "annotations": [{"segment": [48.098962852965265, 72.95318870966308], "label": "Running a marathon"}]}, "nlkmPF8TBdQ": {"duration": 174.57, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nlkmPF8TBdQ", "annotations": [{"segment": [48.59658002847519, 148.50464134107239], "label": "Preparing pasta"}]}, "Fwu0dTTQfKM": {"duration": 195.12, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Fwu0dTTQfKM", "annotations": []}, "r97vYbzloD8": {"duration": 33.832, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=r97vYbzloD8", "annotations": [{"segment": [9.130802895475819, 31.667524492979716], "label": "Kite flying"}]}, "YNQphOFqDOA": {"duration": 167.07, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YNQphOFqDOA", "annotations": [{"segment": [28.93057799063963, 48.47826582215289], "label": "Tai chi"}]}, "0Lc7aCtT2XU": {"duration": 6.92, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=0Lc7aCtT2XU", "annotations": []}, "OwoQuRQfMNw": {"duration": 167.556, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OwoQuRQfMNw", "annotations": []}, "83CLjJhrEYs": {"duration": 52.849, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=83CLjJhrEYs", "annotations": []}, "_FvFoOncvX4": {"duration": 208.121, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_FvFoOncvX4", "annotations": []}, "0Zg-7EgFiC8": {"duration": 218.22, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0Zg-7EgFiC8", "annotations": [{"segment": [2.5750078, 130.22556396], "label": "Cheerleading"}]}, "eqm20MDaeRQ": {"duration": 13.49, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eqm20MDaeRQ", "annotations": [{"segment": [2.317052262090484, 5.797872854914197], "label": "Hand washing clothes"}, {"segment": [10.369070982839313, 12.424013260530423], "label": "Hand washing clothes"}]}, "oHDeMg1ZFp8": {"duration": 111.55, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=oHDeMg1ZFp8", "annotations": [{"segment": [15.043487994539783, 107.73920014586584], "label": "Preparing salad"}]}, "mqUJEX9qk8E": {"duration": 109.65, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=mqUJEX9qk8E", "annotations": [{"segment": [0, 5.643514820592824], "label": "Horseback riding"}, {"segment": [42.75390015600624, 106.2006879875195], "label": "Horseback riding"}]}, "lue8XE3MEe8": {"duration": 85.357, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lue8XE3MEe8", "annotations": [{"segment": [9.450531981279251, 85.301], "label": "Kneeling"}]}, "xuEhV19bVRI": {"duration": 71.786, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xuEhV19bVRI", "annotations": [{"segment": [15.233909516380656, 52.534585023400936], "label": "Doing a powerbomb"}]}, "G1vQQ4ypw3w": {"duration": 40.8, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G1vQQ4ypw3w", "annotations": []}, "Q_W0GL3ljUY": {"duration": 166.209, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q_W0GL3ljUY", "annotations": [{"segment": [0.01, 146.24231928861153], "label": "Sumo"}]}, "MAGeYoFuCgo": {"duration": 113.71000000000001, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=MAGeYoFuCgo", "annotations": []}, "9ZVWD0ZKXZg": {"duration": 234.22, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9ZVWD0ZKXZg", "annotations": []}, "STR26rN9GcA": {"duration": 27.841, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=STR26rN9GcA", "annotations": [{"segment": [0.2, 27.640725623582767], "label": "Playing congas"}]}, "QU5R75IyQow": {"duration": 143.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QU5R75IyQow", "annotations": [{"segment": [22.612234996879874, 112.165640925117], "label": "Braiding hair"}]}, "GJz8FEFB70w": {"duration": 144.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GJz8FEFB70w", "annotations": [{"segment": [23.87949032449298, 123.6852291], "label": "Clean and jerk"}]}, "x75wqBUD3zo": {"duration": 144.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x75wqBUD3zo", "annotations": [{"segment": [69.87935079, 105.06117591], "label": "Playing harmonica"}, {"segment": [69.87935079, 105.06117591], "label": "Playing harmonica"}]}, "1WbHAVWplj8": {"duration": 26.24, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1WbHAVWplj8", "annotations": []}, "icm5lX5pZE0": {"duration": 104.281, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=icm5lX5pZE0", "annotations": [{"segment": [40.818238689547584, 82.12434477379095], "label": "Hanging wallpaper"}]}, "o8B6D4Sxlvk": {"duration": 140.69, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o8B6D4Sxlvk", "annotations": []}, "PhqSGnssOW0": {"duration": 156.04, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PhqSGnssOW0", "annotations": []}, "RYyGvn4MOr0": {"duration": 12.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RYyGvn4MOr0", "annotations": [{"segment": [0.2, 12.721], "label": "Riding bumper cars"}]}, "XSnPmf41ToA": {"duration": 78.41, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=XSnPmf41ToA", "annotations": [{"segment": [15.413527969690216, 25.566883695755994], "label": "Hammer throw"}, {"segment": [39.87944522620905, 47.7085387675507], "label": "Hammer throw"}]}, "s56ctLdnOdw": {"duration": 92.53, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=s56ctLdnOdw", "annotations": [{"segment": [0.01, 89.06700061199727], "label": "Volleyball"}]}, "R3MPcPKQYKE": {"duration": 27.261, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=R3MPcPKQYKE", "annotations": [{"segment": [0.01, 27.221], "label": "Installing carpet"}]}, "1r_WSe-AQKg": {"duration": 60.512, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1r_WSe-AQKg", "annotations": []}, "e_X0K2t8API": {"duration": 82.315, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=e_X0K2t8API", "annotations": [{"segment": [6.16397593046579, 62.92392095683828], "label": "Hitting a pinata"}]}, "5Q8ltS84BBg": {"duration": 73.24, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5Q8ltS84BBg", "annotations": [{"segment": [19.367699780000002, 57.73962062], "label": "Playing guitarra"}]}, "pCWlZ37fGEo": {"duration": 81.57, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=pCWlZ37fGEo", "annotations": [{"segment": [0, 81.57170068027212], "label": "Playing water polo"}]}, "TWhxFETOG8I": {"duration": 129.986, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=TWhxFETOG8I", "annotations": [{"segment": [13.948653404402195, 124.93144199106548], "label": "Futsal"}]}, "cY3QbnSeu9k": {"duration": 197.068, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cY3QbnSeu9k", "annotations": [{"segment": [7.110375294863047, 195.9031234752674], "label": "Snowboarding"}]}, "mjKcoY18QG0": {"duration": 143.732, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mjKcoY18QG0", "annotations": [{"segment": [68.16585479745721, 92.60690108580343], "label": "Playing blackjack"}]}, "W0dCIs9Y8yg": {"duration": 162.423, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W0dCIs9Y8yg", "annotations": [{"segment": [0, 162.161], "label": "Skiing"}]}, "k0ruZZZ5Gxw": {"duration": 66.04, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=k0ruZZZ5Gxw", "annotations": [{"segment": [8.384370732449298, 65.89189514274571], "label": "Preparing salad"}]}, "R8vqzwGs6aE": {"duration": 206.96, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=R8vqzwGs6aE", "annotations": [{"segment": [33.739880076835725, 180.96844612636505], "label": "Archery"}]}, "DEYYpOYEy_c": {"duration": 6.758, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DEYYpOYEy_c", "annotations": []}, "oQ_gE5_0xyc": {"duration": 179.305, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oQ_gE5_0xyc", "annotations": [{"segment": [20.140285905313764, 174.82887070584863], "label": "Baton twirling"}]}, "5VNv4hoVhLk": {"duration": 20.991, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5VNv4hoVhLk", "annotations": []}, "JLdp1Kz0naI": {"duration": 198.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JLdp1Kz0naI", "annotations": [{"segment": [1.3909633385335414, 194.58031591263648], "label": "Doing motocross"}]}, "9Nj9WdaSQCs": {"duration": 83.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9Nj9WdaSQCs", "annotations": [{"segment": [16.83307332293292, 58.48821926677067], "label": "Using uneven bars"}]}, "WdX_4wwtkGg": {"duration": 140.85, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=WdX_4wwtkGg", "annotations": [{"segment": [0.01, 80.20447534321373], "label": "Playing harmonica"}, {"segment": [97.56380014352574, 140.652243], "label": "Playing harmonica"}]}, "ybF4RykZxK8": {"duration": 63.716, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ybF4RykZxK8", "annotations": [{"segment": [0.2, 63.681], "label": "Longboarding"}]}, "OFTkwnSh-sQ": {"duration": 95.95, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=OFTkwnSh-sQ", "annotations": []}, "rGOOlcdpfLg": {"duration": 192.17, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rGOOlcdpfLg", "annotations": []}, "ufrTSC3Dewg": {"duration": 39.567, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ufrTSC3Dewg", "annotations": []}, "6RqeTmOkxWg": {"duration": 90.81, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6RqeTmOkxWg", "annotations": []}, "oofh5aSJUSc": {"duration": 78.6, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oofh5aSJUSc", "annotations": [{"segment": [6.356415459822896, 13.079549168998636], "label": "Beer pong"}, {"segment": [14.057459526696926, 25.30342864022725], "label": "Beer pong"}, {"segment": [27.87044332918526, 34.10462185951186], "label": "Beer pong"}, {"segment": [39.11641244271559, 42.29462110523503], "label": "Beer pong"}, {"segment": [44.73939699948075, 60.50820151736567], "label": "Beer pong"}]}, "94q8YdJoPUw": {"duration": 89.14, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=94q8YdJoPUw", "annotations": [{"segment": [70.04699780000001, 87.70815846645866], "label": "Kayaking"}, {"segment": [11.699756250000002, 65.89391861], "label": "Kayaking"}]}, "u6rdzTx2S8c": {"duration": 43.0, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=u6rdzTx2S8c", "annotations": []}, "kHyEIbBnzYM": {"duration": 132.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kHyEIbBnzYM", "annotations": [{"segment": [19.378483025035287, 92.15087140628482], "label": "Doing karate"}]}, "hkiX5j0IJ0o": {"duration": 92.253, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hkiX5j0IJ0o", "annotations": []}, "PRLlYhneNMk": {"duration": 112.94, "subset": "training", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=PRLlYhneNMk", "annotations": [{"segment": [15.51583458, 92.72888845553821], "label": "Wrapping presents"}]}, "RG98kemBdyg": {"duration": 31.02, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RG98kemBdyg", "annotations": [{"segment": [0, 2.1294255787831515], "label": "Playing saxophone"}, {"segment": [6.388276736349454, 22.213780469578783], "label": "Playing saxophone"}]}, "6CD_Jvf2J0Q": {"duration": 35.53, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6CD_Jvf2J0Q", "annotations": []}, "aeg49bWI-wU": {"duration": 50.736, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aeg49bWI-wU", "annotations": []}, "DIVaPURJHPo": {"duration": 55.984, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DIVaPURJHPo", "annotations": []}, "-v9YLmGCYO0": {"duration": 72.261, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-v9YLmGCYO0", "annotations": [{"segment": [0.2, 69.40426833073323], "label": "Swinging at the playground"}]}, "KzVRgHnpCOQ": {"duration": 142.362, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KzVRgHnpCOQ", "annotations": [{"segment": [55.30112940027805, 125.4824806552262], "label": "Putting on shoes"}]}, "D2TQ_RR2Q50": {"duration": 226.372, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D2TQ_RR2Q50", "annotations": [{"segment": [56.15139270060598, 63.56761437804451], "label": "Running a marathon"}, {"segment": [201.29744538065523, 210.83258753026522], "label": "Running a marathon"}]}, "m49gj6Y6SDo": {"duration": 152.184, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m49gj6Y6SDo", "annotations": [{"segment": [14.838479691109413, 143.99260692252574], "label": "Assembling bicycle"}]}, "cofT2pt-nFg": {"duration": 56.355, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cofT2pt-nFg", "annotations": []}, "hBLnY5MacSc": {"duration": 26.611, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hBLnY5MacSc", "annotations": []}, "4hmJfJo6UI8": {"duration": 117.94, "subset": "training", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=4hmJfJo6UI8", "annotations": [{"segment": [6.805517940717629, 11.955639625585023], "label": "Long jump"}, {"segment": [18.393291731669265, 31.45252886115445], "label": "Long jump"}, {"segment": [38.99377847113884, 42.304570982839316], "label": "Long jump"}, {"segment": [50.0297535101404, 68.23911232449298], "label": "Long jump"}, {"segment": [74.30889859594383, 78.72328861154446], "label": "Long jump"}, {"segment": [83.68947737909517, 93.98972074882994], "label": "Long jump"}, {"segment": [103.73816536661467, 106.86502496099844], "label": "Long jump"}]}, "Cqv9FqNJXks": {"duration": 81.87, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=Cqv9FqNJXks", "annotations": []}, "pZsHrSHAOqs": {"duration": 12.47, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pZsHrSHAOqs", "annotations": [{"segment": [0.01, 4.7658256162246495], "label": "Playing ten pins"}]}, "KmdmcWOmIQ4": {"duration": 120.63, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=KmdmcWOmIQ4", "annotations": []}, "1Cf8TkmsbMU": {"duration": 136.0, "subset": "training", "resolution": "426x236", "url": "https://www.youtube.com/watch?v=1Cf8TkmsbMU", "annotations": [{"segment": [12.305706573841187, 22.914074309911175], "label": "Spinning"}, {"segment": [62.589369642812926, 74.04640679776851], "label": "Spinning"}, {"segment": [105.02283952418095, 107.3566804024961], "label": "Spinning"}]}, "8AsV0ojyUMU": {"duration": 118.78999999999999, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=8AsV0ojyUMU", "annotations": [{"segment": [9.346045241809671, 104.10198907956318], "label": "Fixing bicycle"}]}, "DN3v5LhGsx0": {"duration": 167.231, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DN3v5LhGsx0", "annotations": [{"segment": [0, 152.35898194071763], "label": "Longboarding"}]}, "ZyOPt4sgsbs": {"duration": 106.673, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZyOPt4sgsbs", "annotations": [{"segment": [14.97741411697284, 17.64006551554579], "label": "Clipping cat claws"}, {"segment": [34.448052469037535, 39.440523841361816], "label": "Clipping cat claws"}, {"segment": [47.76130946190228, 49.092634399375974], "label": "Clipping cat claws"}]}, "CFBmZ1g16H8": {"duration": 93.321, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CFBmZ1g16H8", "annotations": [{"segment": [11.210166654285219, 43.53038739780884], "label": "Scuba diving"}, {"segment": [48.189157060842426, 89.68133156942277], "label": "Scuba diving"}]}, "9YxgMsdO9GE": {"duration": 138.67000000000002, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9YxgMsdO9GE", "annotations": [{"segment": [0.01, 138.66956916099772], "label": "Walking the dog"}]}, "ivAvKRQQy1Q": {"duration": 134.189, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ivAvKRQQy1Q", "annotations": []}, "EaxmojivFrQ": {"duration": 17.95, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EaxmojivFrQ", "annotations": []}, "-1IlTIWPNs4": {"duration": 86.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-1IlTIWPNs4", "annotations": [{"segment": [0, 86.05315192743764], "label": "Shoveling snow"}]}, "gusezUdYfNA": {"duration": 139.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gusezUdYfNA", "annotations": [{"segment": [59.2960280577223, 136.5440646099844], "label": "Smoking hookah"}]}, "RDq85fMq41s": {"duration": 175.5, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=RDq85fMq41s", "annotations": [{"segment": [12.590676003120125, 159.29942247425896], "label": "Grooming horse"}]}, "wWOH-b4PTq4": {"duration": 148.14, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wWOH-b4PTq4", "annotations": [{"segment": [0.01, 0.02], "label": "Sailing"}]}, "GXte17Sz13Q": {"duration": 33.553, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GXte17Sz13Q", "annotations": []}, "F4GwrEQUvDc": {"duration": 79.65, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F4GwrEQUvDc", "annotations": [{"segment": [27.52123021294852, 34.2306949149766], "label": "Javelin throw"}, {"segment": [4.162353102184087, 13.85380211622465], "label": "Javelin throw"}, {"segment": [41.56140634867395, 67.65376907878316], "label": "Javelin throw"}]}, "BVhUKjJrEw8": {"duration": 205.54, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=BVhUKjJrEw8", "annotations": [{"segment": [7.769579236455424, 174.55606904106497], "label": "Playing polo"}]}, "TUdtpLFcUF8": {"duration": 154.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TUdtpLFcUF8", "annotations": []}, "b8ftOk8tKxg": {"duration": 110.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b8ftOk8tKxg", "annotations": [{"segment": [29.380052851093637, 97.5898814726989], "label": "Washing face"}]}, "AkG3o8tNptU": {"duration": 206.96, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=AkG3o8tNptU", "annotations": []}, "4YoVu3D3AsA": {"duration": 231.666, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4YoVu3D3AsA", "annotations": []}, "TIjwhYSIRgg": {"duration": 123.438, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TIjwhYSIRgg", "annotations": [{"segment": [0, 21.750472698907956], "label": "Doing fencing"}, {"segment": [25.985078003120123, 50.04533541341654], "label": "Doing fencing"}, {"segment": [60.63184867394696, 103.57213416536662], "label": "Doing fencing"}]}, "-y8LMGQt8uI": {"duration": 203.988, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-y8LMGQt8uI", "annotations": [{"segment": [15.69419581186883, 182.80242945672865], "label": "Skiing"}]}, "jN-FG9yxNvE": {"duration": 40.798, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jN-FG9yxNvE", "annotations": [{"segment": [5.0871762870514825, 35.864592823712954], "label": "Using the monkey bar"}]}, "AA1wvSZ4Mno": {"duration": 78.93, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AA1wvSZ4Mno", "annotations": [{"segment": [7.018260477357869, 10.342699650843176], "label": "Walking the dog"}, {"segment": [21.424163562460866, 24.00983847517166], "label": "Walking the dog"}]}, "eQwLUJ3P7wI": {"duration": 49.71, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=eQwLUJ3P7wI", "annotations": []}, "eFZufRRvIz4": {"duration": 90.187, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eFZufRRvIz4", "annotations": [{"segment": [3.798798992199688, 34.751975965678625], "label": "Mooping floor"}]}, "GxDNGtoVjS0": {"duration": 171.341, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GxDNGtoVjS0", "annotations": []}, "uPqp30C6MDE": {"duration": 47.23, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uPqp30C6MDE", "annotations": [{"segment": [0.20000000000000004, 47.001999999999995], "label": "Swinging at the playground"}]}, "SEVVSei-r6w": {"duration": 12.03, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SEVVSei-r6w", "annotations": []}, "xAaHXwTz1Ug": {"duration": 47.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xAaHXwTz1Ug", "annotations": [{"segment": [1.8273669613451204, 47.809885], "label": "Kite flying"}]}, "F8ISOmP1GTk": {"duration": 137.973, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F8ISOmP1GTk", "annotations": []}, "BWCMfPoKlMM": {"duration": 51.25, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BWCMfPoKlMM", "annotations": [{"segment": [30.939738291572482, 33.657957595943834], "label": "Long jump"}]}, "_dZLd_Xk4fw": {"duration": 111.247, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=_dZLd_Xk4fw", "annotations": []}, "Hhzuzfdg6LY": {"duration": 9.94, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Hhzuzfdg6LY", "annotations": []}, "ZKtjyZhRB6E": {"duration": 42.865, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZKtjyZhRB6E", "annotations": [{"segment": [1.1332627386418843, 4.733041727755415], "label": "Knitting"}, {"segment": [10.59934822853302, 14.399114939263967], "label": "Knitting"}, {"segment": [24.865139037242194, 42.06408309634018], "label": "Knitting"}]}, "xEnnIWce1ok": {"duration": 13.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xEnnIWce1ok", "annotations": [{"segment": [0.36517994383775354, 4.575489884555382], "label": "Doing a powerbomb"}]}, "X5o8NVuMi3I": {"duration": 103.051, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=X5o8NVuMi3I", "annotations": []}, "jC1He93cebg": {"duration": 103.933, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=jC1He93cebg", "annotations": [{"segment": [5.997405616224649, 41.17137909516381], "label": "Elliptical trainer"}]}, "r64pATF3vCI": {"duration": 25.43, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r64pATF3vCI", "annotations": [{"segment": [4.989088923556942, 18.83661975], "label": "Paintball"}]}, "0ZXc2fEDgg8": {"duration": 83.174, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0ZXc2fEDgg8", "annotations": [{"segment": [6.228309083383744, 25.951287847432265], "label": "Doing a powerbomb"}, {"segment": [30.8820325384444, 47.49085587519501], "label": "Doing a powerbomb"}, {"segment": [51.383548979719194, 67.34359070826834], "label": "Doing a powerbomb"}]}, "L36MIRUpcrI": {"duration": 32.44, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L36MIRUpcrI", "annotations": [{"segment": [9.55349297971919, 10.918277691107646], "label": "Playing racquetball"}]}, "2VvCuSFicyA": {"duration": 103.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2VvCuSFicyA", "annotations": [{"segment": [0.01, 103.141], "label": "Walking the dog"}]}, "R8-iriCwcyE": {"duration": 139.09, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=R8-iriCwcyE", "annotations": [{"segment": [24.3023450461828, 72.9070351385484], "label": "Using parallel bars"}, {"segment": [96.77540973747793, 122.59665122152886], "label": "Using parallel bars"}]}, "pIUpJihiju0": {"duration": 63.995, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pIUpJihiju0", "annotations": [{"segment": [4.889374414976599, 5.787422776911076], "label": "Mooping floor"}, {"segment": [19.757063962558504, 63.961], "label": "Mooping floor"}]}, "G6LjkB0xv2k": {"duration": 101.913, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=G6LjkB0xv2k", "annotations": [{"segment": [1.8096593085762451, 99.43600367773512], "label": "Using the rowing machine"}]}, "698QI6p6Qgc": {"duration": 69.22, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=698QI6p6Qgc", "annotations": []}, "4_jDgRsOGvY": {"duration": 151.813, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4_jDgRsOGvY", "annotations": []}, "-9rt8p7BgnY": {"duration": 91.069, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-9rt8p7BgnY", "annotations": []}, "n9MONPwq7x0": {"duration": 170.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n9MONPwq7x0", "annotations": [{"segment": [33.76322202964119, 86.13609399687988], "label": "Tennis serve with ball bouncing"}, {"segment": [130.00169742121687, 164.03077159282373], "label": "Tennis serve with ball bouncing"}]}, "HtkuvF7VbSQ": {"duration": 160.15, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HtkuvF7VbSQ", "annotations": [{"segment": [15.233663514820591, 119.87145060842433], "label": "Getting a tattoo"}]}, "offWBoR4Ggg": {"duration": 104.118, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=offWBoR4Ggg", "annotations": [{"segment": [0.2, 65.2560883748056], "label": "Futsal"}]}, "5rkAMBttgPc": {"duration": 77.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5rkAMBttgPc", "annotations": [{"segment": [6.85433434399376, 48.06958785], "label": "Mixing drinks"}, {"segment": [6.85433434399376, 74.54846821918878], "label": "Mixing drinks"}]}, "aivuk1LvDv8": {"duration": 130.92, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aivuk1LvDv8", "annotations": [{"segment": [57.166216379999995, 58.37848001999999], "label": "Playing piano"}, {"segment": [1.8799250399999998, 125.36694715834635], "label": "Playing piano"}]}, "cqgecSy943o": {"duration": 218.69, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cqgecSy943o", "annotations": [{"segment": [25.24597079875195, 39.23360326833074], "label": "Skateboarding"}, {"segment": [41.621735641185644, 59.703309321372856], "label": "Skateboarding"}, {"segment": [113.94803036193447, 126.91217752886116], "label": "Skateboarding"}, {"segment": [129.6414716692668, 146.3583982792512], "label": "Skateboarding"}, {"segment": [171.60436907800312, 177.06295735881437], "label": "Skateboarding"}, {"segment": [191.73291336349453, 202.65008992511702], "label": "Skateboarding"}]}, "b1PAqOmflgI": {"duration": 34.02, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=b1PAqOmflgI", "annotations": [{"segment": [10.145616224648988, 14.17207488299532], "label": "Layup drill in basketball"}, {"segment": [24.079282371294852, 28.052761310452418], "label": "Layup drill in basketball"}]}, "CjuFIFZ_MLg": {"duration": 65.156, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CjuFIFZ_MLg", "annotations": []}, "kIFHChO_-vg": {"duration": 50.18, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kIFHChO_-vg", "annotations": []}, "pASyH2NPOOw": {"duration": 209.258, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pASyH2NPOOw", "annotations": [{"segment": [12.842290449379728, 82.2688833372302], "label": "Capoeira"}, {"segment": [80.63915110981587, 90.74349091978472], "label": "Capoeira"}, {"segment": [95.95863404751057, 106.38892030296229], "label": "Capoeira"}, {"segment": [108.34459897585948, 118.12299234034548], "label": "Capoeira"}, {"segment": [131.48679660514296, 136.70193973286882], "label": "Capoeira"}, {"segment": [141.9170828605947, 149.08790466121775], "label": "Capoeira"}, {"segment": [162.45170892601527, 168.97063783567256], "label": "Capoeira"}, {"segment": [172.88199518146695, 176.46740608177848], "label": "Capoeira"}, {"segment": [166.36306627180963, 169.94847717212116], "label": "Capoeira"}, {"segment": [182.33444210047008, 196.67608570171618], "label": "Capoeira"}]}, "FkWlzk8CrxA": {"duration": 83.62, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FkWlzk8CrxA", "annotations": [{"segment": [9.391993160686429, 11.479102751950078], "label": "Cumbia"}, {"segment": [28.175979482059283, 77.61438792511701], "label": "Cumbia"}]}, "wAt_OsQe-qM": {"duration": 128.5, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=wAt_OsQe-qM", "annotations": [{"segment": [32.26555538221529, 75.35309828393136], "label": "Playing violin"}, {"segment": [90.38363650546023, 120.2443057722309], "label": "Playing violin"}]}, "YWsqpINl8Ic": {"duration": 72.05, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YWsqpINl8Ic", "annotations": [{"segment": [14.602386895475819, 61.1053728549142], "label": "Cleaning windows"}]}, "dTxVCLxbymM": {"duration": 31.79, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dTxVCLxbymM", "annotations": []}, "Z5DKoX7wx0E": {"duration": 140.504, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z5DKoX7wx0E", "annotations": []}, "Dqt0kW_gJ_I": {"duration": 41.727, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Dqt0kW_gJ_I", "annotations": []}, "52Xw6TZcysc": {"duration": 42.49, "subset": "testing", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=52Xw6TZcysc", "annotations": []}, "drMXYzlmJQU": {"duration": 101.518, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=drMXYzlmJQU", "annotations": [{"segment": [0.01, 100.63203890171883], "label": "Knitting"}]}, "wr2FC2P3hIs": {"duration": 70.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wr2FC2P3hIs", "annotations": [{"segment": [6.972279599999999, 55.593904079999994], "label": "Getting a haircut"}]}, "-oExUcmbTEE": {"duration": 48.9, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-oExUcmbTEE", "annotations": [{"segment": [12.282522796099844, 17.69903906021841], "label": "Long jump"}, {"segment": [38.52592554056163, 45.16306518814353], "label": "Long jump"}]}, "iSJ87SnNLPc": {"duration": 35.62, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iSJ87SnNLPc", "annotations": [{"segment": [10.113467141965678, 17.55964624648986], "label": "Gargling mouthwash"}]}, "c0Hix_5Vm8I": {"duration": 217.804, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=c0Hix_5Vm8I", "annotations": [{"segment": [29.509668224299062, 177.75271651090344], "label": "Snowboarding"}]}, "v-dxQNxdMrU": {"duration": 91.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=v-dxQNxdMrU", "annotations": [{"segment": [16.274749063431926, 51.6794663242312], "label": "Doing motocross"}, {"segment": [54.53468545816663, 60.38788468273425], "label": "Doing motocross"}, {"segment": [65.2417570374415, 87.08418335413415], "label": "Doing motocross"}]}, "HwRiUpC5mf4": {"duration": 201.852, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HwRiUpC5mf4", "annotations": [{"segment": [32.11185959438377, 190.1525803432137], "label": "Camel ride"}]}, "k_xDTGiDp9A": {"duration": 180.51, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=k_xDTGiDp9A", "annotations": [{"segment": [6.052255567862714, 64.322809174727], "label": "Kayaking"}, {"segment": [79.18806705, 169.88540628861153], "label": "Kayaking"}]}, "rRkwB9EcEMs": {"duration": 24.94, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rRkwB9EcEMs", "annotations": [{"segment": [5.17432972698908, 24.937935], "label": "Clean and jerk"}]}, "pLF3UbT_46I": {"duration": 25.45, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pLF3UbT_46I", "annotations": []}, "90cHokbZy6A": {"duration": 170.737, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=90cHokbZy6A", "annotations": [{"segment": [52.45086583463338, 166.144624024961], "label": "Sharpening knives"}]}, "EJiyr-6l3ps": {"duration": 79.064, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EJiyr-6l3ps", "annotations": [{"segment": [0.2, 54.13260374414976], "label": "Clipping cat claws"}]}, "rPeQxKmIe4I": {"duration": 34.25, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rPeQxKmIe4I", "annotations": [{"segment": [0.01, 34.221], "label": "Scuba diving"}]}, "o67-Z8n-jEE": {"duration": 177.029, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=o67-Z8n-jEE", "annotations": [{"segment": [118.7101872074883, 159.86271294851795], "label": "Cleaning sink"}]}, "dySzHZniFCo": {"duration": 160.4, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dySzHZniFCo", "annotations": [{"segment": [10.243335580000002, 147.59817931357253], "label": "Doing karate"}]}, "45v95EtrJX0": {"duration": 8.336, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=45v95EtrJX0", "annotations": [{"segment": [1.2744530932917315, 8.218921989391575], "label": "Shuffleboard"}]}, "rJKrXQ4v-9s": {"duration": 77.671, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rJKrXQ4v-9s", "annotations": [{"segment": [16.479251120124804, 62.5239800124805], "label": "Waxing skis"}]}, "rR1R-x_UlOM": {"duration": 80.2, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=rR1R-x_UlOM", "annotations": [{"segment": [8.631028081123244, 72.92593291731669], "label": "Mixing drinks"}]}, "T_5ANYuDWOA": {"duration": 104.792, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T_5ANYuDWOA", "annotations": [{"segment": [0.01, 0.02], "label": "Trimming branches or hedges"}]}, "eNE8O5cgYuE": {"duration": 173.988, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eNE8O5cgYuE", "annotations": [{"segment": [36.88358190327613, 120.95645241809673], "label": "Running a marathon"}]}, "BVU-iDSTMDU": {"duration": 139.85, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BVU-iDSTMDU", "annotations": []}, "ijrjRNwZjqQ": {"duration": 91.07, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ijrjRNwZjqQ", "annotations": []}, "nWpFumm3Z5g": {"duration": 10.5, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=nWpFumm3Z5g", "annotations": [{"segment": [0.11461454914196569, 10.495418], "label": "Playing field hockey"}]}, "-tv6laq1ic0": {"duration": 54.99, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-tv6laq1ic0", "annotations": [{"segment": [2.487614163806552, 54.984851], "label": "Getting a tattoo"}]}, "laU1Hvm5Dis": {"duration": 8.04, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=laU1Hvm5Dis", "annotations": []}, "hFlDERq1ThU": {"duration": 132.87, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hFlDERq1ThU", "annotations": [{"segment": [26.823912959999998, 64.43904], "label": "Using the pommel horse"}, {"segment": [70.4710656, 124.58994867238691], "label": "Using the pommel horse"}]}, "cGqmHplppAU": {"duration": 209.027, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=cGqmHplppAU", "annotations": [{"segment": [19.829784680798138, 25.35611949881036], "label": "Building sandcastles"}, {"segment": [34.1332395038886, 36.0837106161282], "label": "Building sandcastles"}]}, "UCBAyJpXuCw": {"duration": 144.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UCBAyJpXuCw", "annotations": [{"segment": [0.01, 4.967331515029309], "label": "Playing drums"}, {"segment": [17.837235894877974, 120.11910727613103], "label": "Playing drums"}, {"segment": [120.11910727613103, 129.6021946926677], "label": "Playing drums"}]}, "LbVTFawQnj0": {"duration": 177.31, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LbVTFawQnj0", "annotations": []}, "WPrlU-Im5Ko": {"duration": 85.938, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WPrlU-Im5Ko", "annotations": [{"segment": [16.55722485770179, 20.579222798843926], "label": "Putting in contact lenses"}, {"segment": [24.064954347833776, 26.61221971055713], "label": "Putting in contact lenses"}]}, "9qOfesI3bGE": {"duration": 115.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9qOfesI3bGE", "annotations": [{"segment": [1.0820686427457098, 111.29197659906396], "label": "Playing accordion"}]}, "u08gU4eQFHY": {"duration": 9.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u08gU4eQFHY", "annotations": [{"segment": [0.48573, 9.030333073322932], "label": "Snatch"}]}, "s8xM3FUf8hA": {"duration": 102.53999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s8xM3FUf8hA", "annotations": []}, "2zvN1cL1uik": {"duration": 191.64, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2zvN1cL1uik", "annotations": [{"segment": [58.66715776, 147.42604160000002], "label": "Smoking hookah"}]}, "tznMNEWglxY": {"duration": 41.47, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tznMNEWglxY", "annotations": [{"segment": [0, 41.470839], "label": "Using the balance beam"}]}, "KK7g7udEPuo": {"duration": 137.347, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KK7g7udEPuo", "annotations": [{"segment": [0.01, 137.34603174603174], "label": "Cutting the grass"}]}, "DFJBJkCR0Bk": {"duration": 89.07, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DFJBJkCR0Bk", "annotations": [{"segment": [0, 89.071746], "label": "Playing water polo"}]}, "u3qNB8GDOzY": {"duration": 108.136, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=u3qNB8GDOzY", "annotations": [{"segment": [0.01, 108.134806], "label": "Roof shingle removal"}]}, "dI6TWaB6tls": {"duration": 110.67, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=dI6TWaB6tls", "annotations": [{"segment": [4.831201248049922, 108.70202808112325], "label": "Tango"}]}, "nt4Ag91306U": {"duration": 123.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nt4Ag91306U", "annotations": [{"segment": [6.527416536661467, 11.518970358814354], "label": "Tennis serve with ball bouncing"}, {"segment": [48.37967550702028, 59.71636661466459], "label": "Tennis serve with ball bouncing"}, {"segment": [63.36463338533542, 67.58894227769112], "label": "Tennis serve with ball bouncing"}, {"segment": [82.18200936037442, 97.15910452418098], "label": "Tennis serve with ball bouncing"}]}, "lOuegsXjFkI": {"duration": 25.45, "subset": "testing", "resolution": "633x360", "url": "https://www.youtube.com/watch?v=lOuegsXjFkI", "annotations": []}, "DEtZBd0XdX0": {"duration": 184.2, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DEtZBd0XdX0", "annotations": []}, "Huz41aVo5eo": {"duration": 157.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Huz41aVo5eo", "annotations": [{"segment": [57.49969266770671, 65.83653510140407], "label": "Shaving legs"}, {"segment": [69.26935257410297, 71.96656630265211], "label": "Shaving legs"}, {"segment": [84.2266287051482, 86.67864118564742], "label": "Shaving legs"}, {"segment": [101.39071606864275, 108.50155226209048], "label": "Shaving legs"}, {"segment": [113.89597971918877, 121.25201716068642], "label": "Shaving legs"}]}, "AC9mml3mqps": {"duration": 134.699, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AC9mml3mqps", "annotations": [{"segment": [37.808705148205924, 134.661], "label": "Playing ice hockey"}]}, "h2xV6mTpUCM": {"duration": 172.15, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h2xV6mTpUCM", "annotations": [{"segment": [14.234148846225956, 156.84420615464074], "label": "Washing face"}]}, "ksvIIhvmSRM": {"duration": 209.235, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ksvIIhvmSRM", "annotations": [{"segment": [75.06432137285492, 178.84890483619344], "label": "River tubing"}]}, "nKBjM-kdeeI": {"duration": 141.689, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nKBjM-kdeeI", "annotations": [{"segment": [28.288787170046803, 37.129033160686426], "label": "Fun sliding down"}]}, "QhiKgeJV3k0": {"duration": 195.05, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QhiKgeJV3k0", "annotations": [{"segment": [4.10395084399376, 4.10395084399376], "label": "Playing guitarra"}, {"segment": [29.335648625585023, 170.99795183307336], "label": "Playing guitarra"}]}, "1C0Ur7fen8s": {"duration": 71.263, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=1C0Ur7fen8s", "annotations": [{"segment": [0.01, 42.30141483792862], "label": "Table soccer"}]}, "cGj8zXTrm8A": {"duration": 88.77, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cGj8zXTrm8A", "annotations": []}, "BoK5y7jdhzA": {"duration": 82.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BoK5y7jdhzA", "annotations": [{"segment": [16.744921216848674, 23.545540561622467], "label": "Playing lacrosse"}, {"segment": [41.76606786271451, 45.87210218408737], "label": "Playing lacrosse"}, {"segment": [54.46911154446177, 58.96008658346333], "label": "Playing lacrosse"}]}, "IdEXShfpQHs": {"duration": 33.623, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IdEXShfpQHs", "annotations": [{"segment": [0.01, 33.33890171606864], "label": "Grooming dog"}]}, "6180cMhkWJA": {"duration": 66.828, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=6180cMhkWJA", "annotations": [{"segment": [19.21381804043546, 50.490335925349925], "label": "Futsal"}]}, "xl1GN2hruLU": {"duration": 13.306, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xl1GN2hruLU", "annotations": []}, "RKUEXxnXrm8": {"duration": 148.14, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RKUEXxnXrm8", "annotations": [{"segment": [110.70303433853353, 129.8853972823713], "label": "Making a sandwich"}]}, "B4qwjeJBk0s": {"duration": 198.996, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B4qwjeJBk0s", "annotations": [{"segment": [34.7697991729193, 160.18943190380676], "label": "Longboarding"}]}, "nQVJyUX8fHw": {"duration": 179.28, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nQVJyUX8fHw", "annotations": [{"segment": [5.593800078533754, 12.865740180627633], "label": "Tennis serve with ball bouncing"}, {"segment": [20.976750294501574, 27.68931038874208], "label": "Tennis serve with ball bouncing"}, {"segment": [47.82699067146359, 89.50080125654006], "label": "Tennis serve with ball bouncing"}, {"segment": [90.89925127617349, 101.2477805647426], "label": "Tennis serve with ball bouncing"}, {"segment": [109.91817061310454, 171.44997095631825], "label": "Tennis serve with ball bouncing"}]}, "ADbp9ObJVxE": {"duration": 120.88, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ADbp9ObJVxE", "annotations": [{"segment": [19.235678274570983, 96.36697645397815], "label": "Polishing forniture"}]}, "WTOTYZOu3MQ": {"duration": 128.76, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WTOTYZOu3MQ", "annotations": [{"segment": [1.0574584880090874, 127.20494669673182], "label": "Playing squash"}]}, "j15g2ZTig1k": {"duration": 170.23, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=j15g2ZTig1k", "annotations": [{"segment": [24.29260764430577, 152.01259360374416], "label": "Playing guitarra"}]}, "GNzaxjYL0TM": {"duration": 89.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GNzaxjYL0TM", "annotations": [{"segment": [0.01, 81.53196121060843], "label": "Doing motocross"}]}, "oOYaw6-b4SY": {"duration": 130.78, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oOYaw6-b4SY", "annotations": [{"segment": [6.426529975484734, 92.72564678913687], "label": "Doing kickboxing"}]}, "45llr44Pu9g": {"duration": 214.18, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=45llr44Pu9g", "annotations": [{"segment": [34.000839639999995, 203.42405148205927], "label": "Fixing bicycle"}]}, "MmIxh3MWWME": {"duration": 30.651, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=MmIxh3MWWME", "annotations": []}, "CmoibYw9WYc": {"duration": 210.79, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CmoibYw9WYc", "annotations": []}, "1XicledQXng": {"duration": 121.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1XicledQXng", "annotations": [{"segment": [56.27390809984399, 89.32066621216849], "label": "Brushing hair"}]}, "z1QgzOfUjow": {"duration": 44.978, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=z1QgzOfUjow", "annotations": [{"segment": [0.2, 40.24357878315133], "label": "Rafting"}]}, "AVL31l6H1uI": {"duration": 15.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AVL31l6H1uI", "annotations": [{"segment": [4.803907956318253, 8.212714508580342], "label": "Hand car wash"}]}, "Ayix_aDOrzs": {"duration": 135.47, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Ayix_aDOrzs", "annotations": [{"segment": [4.226673135725429, 121.72818630889236], "label": "Getting a haircut"}]}, "ITGlho0dw-k": {"duration": 126.433, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ITGlho0dw-k", "annotations": [{"segment": [67.06256112324493, 93.88758557254292], "label": "Applying sunscreen"}]}, "Z0oPjHAZ_yc": {"duration": 220.31, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z0oPjHAZ_yc", "annotations": []}, "pjF6uBZHVRE": {"duration": 192.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pjF6uBZHVRE", "annotations": [{"segment": [41.9167232, 166.20866856], "label": "Pole vault"}]}, "XDFJFhaG_p0": {"duration": 189.08, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XDFJFhaG_p0", "annotations": [{"segment": [107.3715464428101, 176.1011348845554], "label": "Playing violin"}]}, "oFku30m99do": {"duration": 15.07, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oFku30m99do", "annotations": [{"segment": [0, 15.046240249609985], "label": "Putting on makeup"}]}, "lPCl1ZYH2xI": {"duration": 360.421, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lPCl1ZYH2xI", "annotations": [{"segment": [59.60145775626943, 337.3667410296412], "label": "Removing ice from car"}]}, "nLdRqOTb0Ik": {"duration": 49.9, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nLdRqOTb0Ik", "annotations": [{"segment": [1.6335117004680186, 7.467482059282371], "label": "Playing rubik cube"}, {"segment": [25.747255850234005, 31.114508580343212], "label": "Playing rubik cube"}]}, "iUajMS61fpQ": {"duration": 153.415, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iUajMS61fpQ", "annotations": []}, "XI7qJHE61zo": {"duration": 155.528, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XI7qJHE61zo", "annotations": []}, "fgIJnjuMyoc": {"duration": 126.224, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fgIJnjuMyoc", "annotations": [{"segment": [3.813550767239009, 22.488153570977325], "label": "Surfing"}, {"segment": [27.40252272985583, 30.547718991538073], "label": "Surfing"}, {"segment": [41.555905907425924, 59.83735917845396], "label": "Surfing"}, {"segment": [64.16200403826704, 71.43527039340722], "label": "Surfing"}, {"segment": [125.88648067378105, 126.00099999999999], "label": "Surfing"}]}, "UIrn2ziRgEM": {"duration": 170.81, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UIrn2ziRgEM", "annotations": [{"segment": [3.061260530421217, 160.64962870514822], "label": "Smoking a cigarette"}]}, "vJilhGo626U": {"duration": 229.6, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vJilhGo626U", "annotations": []}, "A0H6JceDqlg": {"duration": 114.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=A0H6JceDqlg", "annotations": [{"segment": [79.95774134094616, 94.38218911069367], "label": "Tennis serve with ball bouncing"}]}, "n7ED3zaQqws": {"duration": 56.865, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n7ED3zaQqws", "annotations": [{"segment": [6.825851794071763, 17.37489547581903], "label": "Swimming"}, {"segment": [35.63626521060842, 51.947391575663026], "label": "Swimming"}]}, "nFJj9tUHttM": {"duration": 181.417, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nFJj9tUHttM", "annotations": []}, "UjJ8yWaFNGg": {"duration": 183.252, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UjJ8yWaFNGg", "annotations": [{"segment": [34.330497224313135, 37.755762233743695], "label": "Skiing"}, {"segment": [50.02962851753655, 54.88208728089651], "label": "Skiing"}, {"segment": [61.73261729975764, 69.15402482019053], "label": "Skiing"}, {"segment": [73.43560608197873, 76.57543234062342], "label": "Skiing"}, {"segment": [78.2880648453387, 80.85701360241161], "label": "Skiing"}, {"segment": [84.28227861184217, 85.99491111655746], "label": "Skiing"}, {"segment": [87.42210487048686, 89.13473737520214], "label": "Skiing"}, {"segment": [90.56193112913154, 92.84544113541858], "label": "Skiing"}, {"segment": [116.82229620143254, 124.52914247265129], "label": "Skiing"}, {"segment": [138.80108001194532, 142.79722174017266], "label": "Skiing"}, {"segment": [147.93511922615397, 151.07494546758699], "label": "Skiing"}, {"segment": [152.7875779629141, 160.49442419188605], "label": "Skiing"}]}, "hva8RRWhvWA": {"duration": 71.704, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hva8RRWhvWA", "annotations": []}, "JtQSDGSKnsY": {"duration": 167.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JtQSDGSKnsY", "annotations": [{"segment": [25.313973478939158, 126.063], "label": "Vacuuming floor"}]}, "49PbyLbrxlE": {"duration": 35.71, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=49PbyLbrxlE", "annotations": []}, "B3ZL18f1x4Y": {"duration": 238.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=B3ZL18f1x4Y", "annotations": [{"segment": [96.67134530999999, 190.34252073], "label": "Making a sandwich"}]}, "cEHyZzBsWnw": {"duration": 123.97, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=cEHyZzBsWnw", "annotations": [{"segment": [1.5465959438377537, 119.49381903276131], "label": "Snatch"}]}, "kh42ufAYMZQ": {"duration": 147.076, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kh42ufAYMZQ", "annotations": [{"segment": [6.653947877642997, 118.39438292633747], "label": "Clipping cat claws"}]}, "EQ7QqxSFWGY": {"duration": 185.88, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EQ7QqxSFWGY", "annotations": []}, "1xQTlp0hscs": {"duration": 234.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1xQTlp0hscs", "annotations": [{"segment": [69.00713277691109, 206.65531274570984], "label": "Painting"}]}, "SFDvkbDZTjA": {"duration": 143.802, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SFDvkbDZTjA", "annotations": [{"segment": [26.00879251170047, 70.86139781591264], "label": "Clipping cat claws"}, {"segment": [97.99503432137286, 124.4559360374415], "label": "Clipping cat claws"}]}, "hPIHCG5n7RI": {"duration": 109.25, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hPIHCG5n7RI", "annotations": [{"segment": [0, 109.2498866213152], "label": "Hurling"}]}, "zf4MXHeiSXs": {"duration": 210.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zf4MXHeiSXs", "annotations": [{"segment": [35.66789344000001, 200.5840024621393], "label": "Horseback riding"}]}, "w8PqH7bltJQ": {"duration": 165.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w8PqH7bltJQ", "annotations": [{"segment": [48.40694897410296, 151.91542497191887], "label": "Fixing bicycle"}]}, "TABp0GOGqNw": {"duration": 34.74, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=TABp0GOGqNw", "annotations": []}, "D0dyvNTI7yU": {"duration": 127.083, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D0dyvNTI7yU", "annotations": [{"segment": [7.930284667169, 118.55775577417656], "label": "Capoeira"}]}, "SvIUXZqy8Hs": {"duration": 27.4, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SvIUXZqy8Hs", "annotations": [{"segment": [5.121410557992667, 26.076514010903427], "label": "Hitting a pinata"}]}, "-v-Zail6TNw": {"duration": 32.11, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=-v-Zail6TNw", "annotations": [{"segment": [3.42549371, 17.31019152], "label": "Playing harmonica"}, {"segment": [19.098564489999998, 29.590379], "label": "Playing harmonica"}]}, "optJ47P_5Ys": {"duration": 15.51, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=optJ47P_5Ys", "annotations": [{"segment": [0.12126730000000001, 6.6429282], "label": "Hand washing clothes"}, {"segment": [7.431638120000001, 14.679608373634945], "label": "Hand washing clothes"}]}, "Yb5b_xx1P9I": {"duration": 94.65, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Yb5b_xx1P9I", "annotations": [{"segment": [18.456422610645923, 65.55721231825272], "label": "Making a sandwich"}]}, "zB0mp92uh48": {"duration": 31.487, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=zB0mp92uh48", "annotations": []}, "elW8E-9bCRQ": {"duration": 153.6, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=elW8E-9bCRQ", "annotations": [{"segment": [8.515584, 143.97122173634946], "label": "Tango"}]}, "tO1VJnsd8sg": {"duration": 99.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tO1VJnsd8sg", "annotations": [{"segment": [7.46279516486782, 24.254084285820415], "label": "Playing racquetball"}, {"segment": [43.22202199652612, 71.51845335413417], "label": "Playing racquetball"}]}, "zCcUwVzaEac": {"duration": 125.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zCcUwVzaEac", "annotations": [{"segment": [0.2, 125.341], "label": "Hammer throw"}]}, "En9FemmDusk": {"duration": 279.592, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=En9FemmDusk", "annotations": [{"segment": [150.69989840137822, 189.51984328624846], "label": "Clipping cat claws"}]}, "QVe7NojAHjY": {"duration": 171.735, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QVe7NojAHjY", "annotations": [{"segment": [12.053003916075008, 133.11873084399377], "label": "Baking cookies"}]}, "2rFldVRhE0E": {"duration": 111.36, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2rFldVRhE0E", "annotations": []}, "KRsDvTz5eUc": {"duration": 16.162, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=KRsDvTz5eUc", "annotations": []}, "AD-IKY1O9t0": {"duration": 95.62, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AD-IKY1O9t0", "annotations": []}, "QoRUUJz-PU0": {"duration": 208.376, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=QoRUUJz-PU0", "annotations": [{"segment": [26.981587301587304, 199.27365077847114], "label": "Futsal"}]}, "1fmaEo3wzxg": {"duration": 193.33, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1fmaEo3wzxg", "annotations": [{"segment": [16.129985671606864, 189.18814969968798], "label": "Getting a tattoo"}]}, "RXeD_Arfvcs": {"duration": 220.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RXeD_Arfvcs", "annotations": []}, "Pso333FAp2w": {"duration": 130.775, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Pso333FAp2w", "annotations": [{"segment": [12.235975039001561, 16.518566302652108], "label": "Disc dog"}, {"segment": [45.273107644305774, 47.10850390015601], "label": "Disc dog"}, {"segment": [72.60011856474259, 73.61978315132606], "label": "Disc dog"}, {"segment": [89.73048361934477, 95.04727925117005], "label": "Disc dog"}, {"segment": [117.48333229329175, 122.37847113884557], "label": "Disc dog"}]}, "aS0wGPhD48o": {"duration": 80.03999999999999, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aS0wGPhD48o", "annotations": [{"segment": [9.11522665522621, 61.55899645241809], "label": "Doing nails"}]}, "M1J3XjExaW4": {"duration": 99.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M1J3XjExaW4", "annotations": [{"segment": [0.01, 87.06568461778471], "label": "Croquet"}]}, "SNJgJB2PGdI": {"duration": 189.66, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SNJgJB2PGdI", "annotations": []}, "TCFDMNSYTVw": {"duration": 194.259, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TCFDMNSYTVw", "annotations": [{"segment": [0.2, 186.6249360374415], "label": "Wakeboarding"}]}, "juiMCvZUYwk": {"duration": 144.01, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=juiMCvZUYwk", "annotations": [{"segment": [39.31634551482059, 103.34582249609984], "label": "Wrapping presents"}]}, "uLcuqmzyAGU": {"duration": 159.34, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=uLcuqmzyAGU", "annotations": []}, "s82_J03bqwQ": {"duration": 228.86, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s82_J03bqwQ", "annotations": [{"segment": [78.51391575663025, 204.8499438377535], "label": "Windsurfing"}]}, "rgJ00nNS_r0": {"duration": 48.948, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rgJ00nNS_r0", "annotations": [{"segment": [0.01, 48.947096], "label": "Croquet"}]}, "lgWH94ea3-U": {"duration": 73.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lgWH94ea3-U", "annotations": [{"segment": [0, 8.497564074882995], "label": "Smoking hookah"}, {"segment": [15.61714478627145, 19.98075877067083], "label": "Smoking hookah"}, {"segment": [23.999876914196562, 28.93765063338533], "label": "Smoking hookah"}, {"segment": [37.66487860218408, 38.81319807176286], "label": "Smoking hookah"}, {"segment": [43.521307897035875, 46.851434358814345], "label": "Smoking hookah"}, {"segment": [48.6887455101404, 51.215048343213724], "label": "Smoking hookah"}, {"segment": [60.286772152886115, 61.77958746333853], "label": "Smoking hookah"}]}, "c-zbA4zixfE": {"duration": 140.179, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c-zbA4zixfE", "annotations": [{"segment": [0.20000000000000004, 139.97886621315195], "label": "Throwing darts"}]}, "-Lxv663IEaI": {"duration": 73.82, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-Lxv663IEaI", "annotations": [{"segment": [0.01, 73.8162358276644], "label": "Horseback riding"}]}, "5pzCqex6S48": {"duration": 61.84, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5pzCqex6S48", "annotations": [{"segment": [9.646604992199688, 55.95030895475819], "label": "Using parallel bars"}]}, "4VAhZEpQsv8": {"duration": 195.65, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4VAhZEpQsv8", "annotations": [{"segment": [8.695908706708268, 106.63930150858035], "label": "Doing nails"}]}, "Hh7aXAtWsXg": {"duration": 33.275, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Hh7aXAtWsXg", "annotations": [{"segment": [7.163555244250586, 12.821725301092044], "label": "Throwing darts"}]}, "DuU7OKTAmsY": {"duration": 134.188, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DuU7OKTAmsY", "annotations": [{"segment": [8.897011560734539, 110.00893118037646], "label": "River tubing"}]}, "8LI3rnG6kMA": {"duration": 91.766, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8LI3rnG6kMA", "annotations": [{"segment": [18.765796137992183, 68.1211087351928], "label": "Carving jack-o-lanterns"}]}, "gCku1WVr608": {"duration": 147.958, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gCku1WVr608", "annotations": [{"segment": [75.70994809360374, 141.26368363806552], "label": "Grooming dog"}]}, "UzdI8PLRiXg": {"duration": 220.962, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=UzdI8PLRiXg", "annotations": [{"segment": [0.01, 220.921], "label": "Sharpening knives"}]}, "-u2zAMnrCC4": {"duration": 161.05, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=-u2zAMnrCC4", "annotations": [{"segment": [79.63129017160686, 107.51480187207488], "label": "Kayaking"}]}, "o-S8I8lhQpo": {"duration": 212.23, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o-S8I8lhQpo", "annotations": [{"segment": [67.06648272, 148.66059763478975], "label": "Windsurfing"}]}, "ienRkMdn_OM": {"duration": 117.35, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ienRkMdn_OM", "annotations": [{"segment": [0.01, 117.301], "label": "Grooming horse"}]}, "PVJK-r4pNg8": {"duration": 75.61, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PVJK-r4pNg8", "annotations": [{"segment": [23.88431341335286, 60.91974153120124], "label": "Playing squash"}]}, "ayDMt_8KajY": {"duration": 33.18, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ayDMt_8KajY", "annotations": [{"segment": [0.7505867568036055, 5.202342519500779], "label": "Shot put"}, {"segment": [18.505845280811233, 27.875238276131043], "label": "Shot put"}]}, "Zjfw0n32DBA": {"duration": 79.23, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zjfw0n32DBA", "annotations": [{"segment": [21.011463478939156, 70.45020107644307], "label": "Brushing teeth"}]}, "qV8hycDJWto": {"duration": 81.642, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qV8hycDJWto", "annotations": [{"segment": [35.04341330366595, 63.35747706727404], "label": "Welding"}]}, "tdHQb9Uwc4A": {"duration": 186.364, "subset": "training", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=tdHQb9Uwc4A", "annotations": [{"segment": [14.536923166926679, 186.3633560090703], "label": "Powerbocking"}]}, "LfOWLfeyo-w": {"duration": 43.49, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LfOWLfeyo-w", "annotations": []}, "VFSSZXuMmgg": {"duration": 141.085, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VFSSZXuMmgg", "annotations": [{"segment": [19.747421033863578, 118.04571010454632], "label": "Rollerblading"}, {"segment": [130.11358043891647, 131.86890703300665], "label": "Rollerblading"}]}, "6fyIc1vrK4Q": {"duration": 197.579, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6fyIc1vrK4Q", "annotations": [{"segment": [56.07672698907957, 184.88705148205926], "label": "Decorating the Christmas tree"}]}, "OTtgayeZclk": {"duration": 135.7, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OTtgayeZclk", "annotations": []}, "JKVOi9DE8Hs": {"duration": 73.794, "subset": "training", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=JKVOi9DE8Hs", "annotations": [{"segment": [1.1512170963028998, 5.065355163806553], "label": "Throwing darts"}, {"segment": [8.979493244929797, 9.670223494539782], "label": "Throwing darts"}, {"segment": [30.852617815912634, 38.45065056162246], "label": "Throwing darts"}, {"segment": [50.653551638065515, 54.22232459438377], "label": "Throwing darts"}]}, "9mL3eC5iizc": {"duration": 65.76, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9mL3eC5iizc", "annotations": [{"segment": [6.2578681903276125, 37.34203313572543], "label": "Playing harmonica"}]}, "v7OW60YncY4": {"duration": 136.93, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=v7OW60YncY4", "annotations": [{"segment": [48.91797897347894, 65.79361364118564], "label": "Polishing forniture"}, {"segment": [69.63869495787831, 73.27016064586583], "label": "Polishing forniture"}, {"segment": [77.5424732199688, 87.58240776911076], "label": "Polishing forniture"}]}, "YL3MvJVk6u0": {"duration": 45.906, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YL3MvJVk6u0", "annotations": [{"segment": [0.01, 44.032274875355654], "label": "Trimming branches or hedges"}]}, "4SSbyJ6pMuE": {"duration": 91.51, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4SSbyJ6pMuE", "annotations": [{"segment": [2.141413819818099, 86.22759500156006], "label": "Playing violin"}]}, "zryPDx9WL08": {"duration": 152.74, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zryPDx9WL08", "annotations": [{"segment": [26.8196166, 134.21145865834632], "label": "Breakdancing"}]}, "G4fSvtn1HAw": {"duration": 193.771, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G4fSvtn1HAw", "annotations": [{"segment": [15.41697772400692, 168.3775798642746], "label": "Baking cookies"}]}, "Ksbk5rtqVd0": {"duration": 89.281, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Ksbk5rtqVd0", "annotations": [{"segment": [42.592240249609986, 82.83680967238689], "label": "Applying sunscreen"}]}, "S8RXX1uOGgQ": {"duration": 194.352, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=S8RXX1uOGgQ", "annotations": [{"segment": [44.43172167097788, 46.84977480202812], "label": "Wakeboarding"}, {"segment": [49.872341215840926, 56.82424396761037], "label": "Wakeboarding"}, {"segment": [60.75358030556702, 65.28742992628622], "label": "Wakeboarding"}, {"segment": [68.91450962286159, 71.93707603667438], "label": "Wakeboarding"}, {"segment": [76.16866901601232, 77.98220886429999], "label": "Wakeboarding"}, {"segment": [82.21380184363792, 85.23636825745074], "label": "Wakeboarding"}, {"segment": [100.65145696789602, 103.37176674032754], "label": "Wakeboarding"}, {"segment": [113.34623590590981, 133.59743087845558], "label": "Wakeboarding"}, {"segment": [136.01548400950583, 150.82605943718858], "label": "Wakeboarding"}, {"segment": [152.94185592685753, 174.09981956211323], "label": "Wakeboarding"}]}, "t6v-9gTSfp4": {"duration": 103.051, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=t6v-9gTSfp4", "annotations": []}, "uO2YgcdqoZg": {"duration": 152.23, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uO2YgcdqoZg", "annotations": [{"segment": [29.250641154351637, 38.010765764943535], "label": "Playing squash"}, {"segment": [57.42509598301207, 64.76465984594041], "label": "Playing squash"}, {"segment": [74.4718249549747, 89.38771280544198], "label": "Playing squash"}, {"segment": [107.38148227584696, 129.6369339892426], "label": "Playing squash"}]}, "cSfs5ht9sro": {"duration": 205.544, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=cSfs5ht9sro", "annotations": [{"segment": [61.912401423053204, 185.16097618940833], "label": "Skiing"}]}, "PqrexMJ5ohk": {"duration": 129.915, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=PqrexMJ5ohk", "annotations": [{"segment": [0.2, 117.71096255850233], "label": "Playing pool"}]}, "lZsPSqZbfus": {"duration": 73.352, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=lZsPSqZbfus", "annotations": []}, "whP6aYxUklU": {"duration": 25.055, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=whP6aYxUklU", "annotations": []}, "ot4CuJqrN-Y": {"duration": 99.823, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=ot4CuJqrN-Y", "annotations": [{"segment": [12.127977620954923, 97.33479374454829], "label": "Drum corps"}]}, "UJfwaeMrZ7A": {"duration": 25.33, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=UJfwaeMrZ7A", "annotations": [{"segment": [4.12632948, 16.05060852], "label": "Hopscotch"}]}, "OcArjqkv1RU": {"duration": 81.48, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OcArjqkv1RU", "annotations": []}, "GI2a7_XfKaI": {"duration": 102.91, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GI2a7_XfKaI", "annotations": []}, "bEmDo5nmBe0": {"duration": 33.18, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bEmDo5nmBe0", "annotations": []}, "hGvf2Y3H0GI": {"duration": 232.06, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hGvf2Y3H0GI", "annotations": []}, "woUdHiRWKMg": {"duration": 180.629, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=woUdHiRWKMg", "annotations": [{"segment": [14.456130335691935, 174.4860508964396], "label": "Changing car wheel"}]}, "nP1Bk-vJgVg": {"duration": 197.35, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nP1Bk-vJgVg", "annotations": []}, "q0o0k2uD0co": {"duration": 29.048, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=q0o0k2uD0co", "annotations": [{"segment": [0.01, 29.0481632], "label": "Hand car wash"}]}, "TxiLjdHoXU4": {"duration": 127.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TxiLjdHoXU4", "annotations": [{"segment": [0, 119.15145283307332], "label": "Using the balance beam"}]}, "Z-UR8n33TWc": {"duration": 26.982, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z-UR8n33TWc", "annotations": [{"segment": [0.01, 24.224589114596], "label": "Sumo"}]}, "1xHdr8MWJOc": {"duration": 181.58, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1xHdr8MWJOc", "annotations": []}, "zET6lJ-0dGo": {"duration": 180.002, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zET6lJ-0dGo", "annotations": []}, "eP3h7IqxRqo": {"duration": 96.41, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eP3h7IqxRqo", "annotations": []}, "Mz-yz0fQ_Hk": {"duration": 101.66, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Mz-yz0fQ_Hk", "annotations": [{"segment": [7.219522620904836, 82.58816536661466], "label": "Sailing"}]}, "JaBA8V-nuDg": {"duration": 177.935, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JaBA8V-nuDg", "annotations": [{"segment": [44.69181982517395, 93.26988408112324], "label": "Knitting"}]}, "2WvbAEMG5DU": {"duration": 59.675, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2WvbAEMG5DU", "annotations": [{"segment": [23.718182527301092, 44.940098283931356], "label": "Building sandcastles"}]}, "aVL9ArQS_Ag": {"duration": 140.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aVL9ArQS_Ag", "annotations": [{"segment": [12.299120124804993, 129.36038845553824], "label": "Cleaning shoes"}]}, "vSExyDlV9JA": {"duration": 45.999, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=vSExyDlV9JA", "annotations": [{"segment": [0.2, 16.405474258970358], "label": "Playing beach volleyball"}]}, "0hWGSkDnRHA": {"duration": 208.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0hWGSkDnRHA", "annotations": [{"segment": [1.627026918876755, 8.135134594383775], "label": "Playing water polo"}]}, "rGFhqcxeVIg": {"duration": 105.91, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=rGFhqcxeVIg", "annotations": []}, "cQMWhfU9kr4": {"duration": 200.53, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cQMWhfU9kr4", "annotations": []}, "NNQQPZpzCOk": {"duration": 63.112, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NNQQPZpzCOk", "annotations": [{"segment": [5.021378527301091, 50.50916048049922], "label": "Shuffleboard"}]}, "_1CHWQx_x-o": {"duration": 177.8, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=_1CHWQx_x-o", "annotations": []}, "absXyPrnh6Q": {"duration": 113.66, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=absXyPrnh6Q", "annotations": []}, "nn-ASL9KZUo": {"duration": 128.964, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nn-ASL9KZUo", "annotations": [{"segment": [8.24483775351014, 10.859054602184088], "label": "Throwing darts"}, {"segment": [14.478739469578786, 18.50061154446178], "label": "Throwing darts"}, {"segment": [20.511547581903276, 23.527951638065524], "label": "Throwing darts"}, {"segment": [50.27340093603745, 53.89308580343214], "label": "Throwing darts"}, {"segment": [55.90402184087364, 59.92589391575664], "label": "Throwing darts"}, {"segment": [61.132455538221535, 67.76854446177848], "label": "Throwing darts"}, {"segment": [69.98057410296413, 71.99151014040562], "label": "Throwing darts"}, {"segment": [73.60025897035882, 77.42103744149766], "label": "Throwing darts"}, {"segment": [78.8286926677067, 83.65493915756632], "label": "Throwing darts"}, {"segment": [86.67134321372855, 93.7096193447738], "label": "Throwing darts"}, {"segment": [98.9380530421217, 110.40038845553822], "label": "Throwing darts"}]}, "dAcdSkaoK64": {"duration": 89.28, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dAcdSkaoK64", "annotations": [{"segment": [14.396400000000002, 43.80438280265211], "label": "Using the pommel horse"}, {"segment": [68.73597110686428, 85.44988505382216], "label": "Using the pommel horse"}]}, "dzpsJQN4o-Y": {"duration": 174.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dzpsJQN4o-Y", "annotations": [{"segment": [44.04220446368875, 51.92630279360834], "label": "Shaving legs"}, {"segment": [94.0654490397303, 145.71988637368625], "label": "Shaving legs"}]}, "tkmoslnsmwY": {"duration": 188.685, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tkmoslnsmwY", "annotations": [{"segment": [34.039556395990175, 96.54323218491189], "label": "Rafting"}, {"segment": [146.72223969035514, 169.02402080388546], "label": "Rafting"}]}, "3pBldeB3uaE": {"duration": 124.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3pBldeB3uaE", "annotations": [{"segment": [46.703624087363494, 54.48756143525741], "label": "Springboard diving"}, {"segment": [57.01734107332293, 66.94186119188767], "label": "Springboard diving"}, {"segment": [71.61222360062402, 76.67178287675507], "label": "Springboard diving"}, {"segment": [82.8989327550702, 85.23411395943837], "label": "Springboard diving"}, {"segment": [88.54228733229328, 91.0720669703588], "label": "Springboard diving"}, {"segment": [92.6288544399376, 95.15863407800312], "label": "Springboard diving"}, {"segment": [96.91001998127925, 103.13716985959438], "label": "Springboard diving"}]}, "-z0ZvJD9gY4": {"duration": 110.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-z0ZvJD9gY4", "annotations": [{"segment": [0.01, 107.22827958016279], "label": "Bathing dog"}]}, "59NxymNdzBE": {"duration": 153.18, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=59NxymNdzBE", "annotations": [{"segment": [41.807691355273256, 50.169229626327905], "label": "Polishing forniture"}, {"segment": [56.858460243171635, 63.30878897815913], "label": "Polishing forniture"}, {"segment": [69.042415149766, 78.12065658814353], "label": "Polishing forniture"}, {"segment": [89.11010675039002, 93.88812856006241], "label": "Polishing forniture"}, {"segment": [100.09955691263652, 104.39977654134165], "label": "Polishing forniture"}, {"segment": [116.58373215600625, 129.2454899516381], "label": "Polishing forniture"}, {"segment": [136.8903248471139, 142.62395101872076], "label": "Polishing forniture"}]}, "C7sabT8febk": {"duration": 113.52000000000001, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=C7sabT8febk", "annotations": [{"segment": [35.42039251170047, 113.32235827664398], "label": "Painting"}]}, "3sL7fLHM09c": {"duration": 31.42, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3sL7fLHM09c", "annotations": [{"segment": [4.656126163413884, 28.769957003120123], "label": "Using uneven bars"}]}, "5VwGzOLPFAQ": {"duration": 141.039, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5VwGzOLPFAQ", "annotations": [{"segment": [13.42171338009983, 36.52466264092741], "label": "Snowboarding"}, {"segment": [88.45129145573986, 124.97595409666727], "label": "Snowboarding"}]}, "NVtRGWuiEP4": {"duration": 153.74, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=NVtRGWuiEP4", "annotations": [{"segment": [0.01, 151.64940560711239], "label": "Baton twirling"}]}, "DvDfifKGXXg": {"duration": 60.488, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DvDfifKGXXg", "annotations": [{"segment": [0.01, 60.48798185941043], "label": "Painting fence"}]}, "aIm5e9Oqob0": {"duration": 105.001, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=aIm5e9Oqob0", "annotations": []}, "ulV37d5wFaw": {"duration": 202.04, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ulV37d5wFaw", "annotations": [{"segment": [39.39875663026521, 151.92160556630265], "label": "Removing curlers"}]}, "S7RWgSxlZ-M": {"duration": 99.243, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S7RWgSxlZ-M", "annotations": [{"segment": [22.16244789937575, 83.67328590560628], "label": "Sharpening knives"}]}, "fGA7nlbGmHU": {"duration": 69.126, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fGA7nlbGmHU", "annotations": [{"segment": [0.01, 62.34506465017016], "label": "Playing drums"}]}, "7FPvAakfM9Y": {"duration": 79.157, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7FPvAakfM9Y", "annotations": [{"segment": [0.01, 55.076355616224646], "label": "Mooping floor"}]}, "jmWtHelsblo": {"duration": 214.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jmWtHelsblo", "annotations": [{"segment": [0.01, 214.50594], "label": "Zumba"}]}, "w6P9JrTGRlE": {"duration": 102.35, "subset": "training", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=w6P9JrTGRlE", "annotations": [{"segment": [71.69539468174727, 102.35356009070294], "label": "Drinking coffee"}]}, "s43eZJ0hy44": {"duration": 91.49, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=s43eZJ0hy44", "annotations": [{"segment": [15.124465520000001, 60.971103744149765], "label": "Dodgeball"}]}, "Y5qDKbTkCaY": {"duration": 107.21000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y5qDKbTkCaY", "annotations": [{"segment": [19.233620647425898, 22.57859815132605], "label": "Shot put"}, {"segment": [31.777286287051485, 53.5196400624025], "label": "Shot put"}, {"segment": [61.04583944617785, 92.3213791076443], "label": "Shot put"}]}, "rv9jW-t3mT4": {"duration": 173.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rv9jW-t3mT4", "annotations": [{"segment": [0.01, 72.0652511700468], "label": "Pole vault"}]}, "HDvXPeEXO0E": {"duration": 188.988, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HDvXPeEXO0E", "annotations": []}, "0EepbsAtiDk": {"duration": 186.55, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0EepbsAtiDk", "annotations": [{"segment": [3.200397934477379, 183.0036739469579], "label": "Horseback riding"}]}, "etqiXPKlthk": {"duration": 199.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=etqiXPKlthk", "annotations": [{"segment": [12.474295760946084, 118.50580962870517], "label": "Drinking beer"}]}, "2DtotlMv9Gg": {"duration": 65.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2DtotlMv9Gg", "annotations": [{"segment": [6.684575718577875, 59.629684914839565], "label": "Hurling"}]}, "p2P5g7HTJeY": {"duration": 163.33, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p2P5g7HTJeY", "annotations": [{"segment": [9.42495631825273, 153.11060998439936], "label": "Hurling"}]}, "0BtHd6dvm78": {"duration": 34.0, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=0BtHd6dvm78", "annotations": []}, "T6TgMYLrPII": {"duration": 113.78, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=T6TgMYLrPII", "annotations": [{"segment": [34.7900849367308, 112.71277341653666], "label": "Pole vault"}]}, "hzuQYOG0a_g": {"duration": 7.85, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hzuQYOG0a_g", "annotations": [{"segment": [0, 7.848343], "label": "Kayaking"}]}, "iMF8-iDLl6U": {"duration": 219.661, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iMF8-iDLl6U", "annotations": [{"segment": [25.696684867394698, 62.69991107644306], "label": "Changing car wheel"}, {"segment": [76.7474321372855, 138.41947581903275], "label": "Changing car wheel"}]}, "sYDP8SiL6_g": {"duration": 125.39, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=sYDP8SiL6_g", "annotations": []}, "R0dqEWnDC7k": {"duration": 89.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=R0dqEWnDC7k", "annotations": [{"segment": [15.26085023400936, 86.24480499219969], "label": "Cricket"}]}, "m6T68amiXjs": {"duration": 234.96, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=m6T68amiXjs", "annotations": [{"segment": [117.01342561999999, 209.62773790951636], "label": "Playing field hockey"}, {"segment": [14.093020759999998, 97.83112794], "label": "Playing field hockey"}]}, "uqAFtPVzEEs": {"duration": 112.478, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uqAFtPVzEEs", "annotations": [{"segment": [2.849527026013051, 17.208595561838596], "label": "Beach soccer"}, {"segment": [28.245586924080538, 33.32469751598085], "label": "Beach soccer"}, {"segment": [43.1326352106849, 51.18915545990608], "label": "Beach soccer"}, {"segment": [61.697660132803286, 68.52818817018645], "label": "Beach soccer"}, {"segment": [77.46041714214907, 86.3926461141117], "label": "Beach soccer"}, {"segment": [92.17232368420514, 97.60171776520204], "label": "Beach soccer"}, {"segment": [103.20625359074721, 107.75993894900266], "label": "Beach soccer"}]}, "PLWeVDUH_Cw": {"duration": 66.46, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PLWeVDUH_Cw", "annotations": [{"segment": [16.344126365054603, 58.26810764430577], "label": "Ping-pong"}]}, "ipcvgAb5y0U": {"duration": 86.03, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ipcvgAb5y0U", "annotations": [{"segment": [52.69950606405459, 63.82170878056549], "label": "Making a sandwich"}]}, "5fMtHNSIOAE": {"duration": 206.449, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5fMtHNSIOAE", "annotations": [{"segment": [3.3438920793978966, 203.9773874065007], "label": "Tug of war"}]}, "7TOw39_59xo": {"duration": 112.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7TOw39_59xo", "annotations": [{"segment": [25.3442057871143, 88.4092770991282], "label": "Hand car wash"}]}, "r82RY124wVY": {"duration": 100.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r82RY124wVY", "annotations": [{"segment": [45.854306890000004, 81.82911223], "label": "Tennis serve with ball bouncing"}, {"segment": [18.418554280811232, 26.22302643369735], "label": "Tennis serve with ball bouncing"}]}, "rBGdFwbG118": {"duration": 170.017, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rBGdFwbG118", "annotations": [{"segment": [12.25409286842333, 32.38065767087598], "label": "Hand car wash"}, {"segment": [54.625808242007864, 75.54684184722784], "label": "Hand car wash"}, {"segment": [91.96588136747705, 96.20305285657362], "label": "Hand car wash"}]}, "l_36uVP3LnI": {"duration": 96.22, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=l_36uVP3LnI", "annotations": [{"segment": [0, 96.181], "label": "Triple jump"}]}, "rKnQNI9PNFI": {"duration": 159.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rKnQNI9PNFI", "annotations": [{"segment": [14.395655120000004, 61.858399680000005], "label": "Tai chi"}]}, "o-BGGr-DU5g": {"duration": 47.532, "subset": "validation", "resolution": "853x470", "url": "https://www.youtube.com/watch?v=o-BGGr-DU5g", "annotations": [{"segment": [4.523254394695788, 9.268963923556942], "label": "Canoeing"}, {"segment": [13.792218318252731, 18.09301757878315], "label": "Canoeing"}, {"segment": [23.357789087363496, 29.66068455538222], "label": "Canoeing"}, {"segment": [33.07166327925117, 38.8554967675507], "label": "Canoeing"}]}, "WYcTPQSXwRc": {"duration": 186.573, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=WYcTPQSXwRc", "annotations": [{"segment": [0.20000000000000004, 114.61286114384193], "label": "Drum corps"}, {"segment": [115.77349775722237, 159.007210241263], "label": "Drum corps"}]}, "4x08i2_AYKo": {"duration": 48.089, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4x08i2_AYKo", "annotations": [{"segment": [0.01, 43.812323219968796], "label": "Powerbocking"}]}, "ChPzol03Hqs": {"duration": 206.565, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ChPzol03Hqs", "annotations": [{"segment": [86.686285105826, 107.31053137635709], "label": "Applying sunscreen"}]}, "4FGdbFzq-8I": {"duration": 74.304, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=4FGdbFzq-8I", "annotations": [{"segment": [12.866970189011644, 50.656448925117004], "label": "Surfing"}, {"segment": [68.39200198127925, 74.103853], "label": "Surfing"}]}, "2ESPauwYUnQ": {"duration": 30.3, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2ESPauwYUnQ", "annotations": [{"segment": [2.552745875195008, 24.676543460218408], "label": "Ironing clothes"}]}, "te1znGPS_Vw": {"duration": 158.96, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=te1znGPS_Vw", "annotations": [{"segment": [127.6787566146646, 147.26442995943836], "label": "Paintball"}, {"segment": [2.72780508, 112.48857732], "label": "Paintball"}]}, "ekJtPwfLM-M": {"duration": 154.41, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ekJtPwfLM-M", "annotations": [{"segment": [41.43472982059975, 151.35155618975864], "label": "Breakdancing"}]}, "wlYxVUJSJVI": {"duration": 221.542, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=wlYxVUJSJVI", "annotations": [{"segment": [30.740544461778473, 194.805248049922], "label": "Futsal"}]}, "idkQqlwyff8": {"duration": 51.99, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=idkQqlwyff8", "annotations": []}, "pPrW3iW0DA8": {"duration": 64.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pPrW3iW0DA8", "annotations": [{"segment": [1.249739976599064, 48.73985908736349], "label": "Discus throw"}]}, "N6y3yXiVo24": {"duration": 35.063, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=N6y3yXiVo24", "annotations": [{"segment": [0.01, 34.78863583525741], "label": "Hitting a pinata"}]}, "3jt4XE6ilIQ": {"duration": 54.614, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3jt4XE6ilIQ", "annotations": [{"segment": [19.383906542056074, 31.11627102803738], "label": "Bullfighting"}, {"segment": [6.801370716510903, 11.987415887850467], "label": "Bullfighting"}]}, "ykdPTjsmfgY": {"duration": 142.316, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ykdPTjsmfgY", "annotations": [{"segment": [9.54687892005476, 13.987287720080232], "label": "Canoeing"}, {"segment": [20.20386004011589, 31.74892292018211], "label": "Canoeing"}, {"segment": [37.7434748002165, 40.62974052023305], "label": "Canoeing"}, {"segment": [59.94551880034385, 63.941886720366774], "label": "Canoeing"}, {"segment": [69.71441816039989, 72.82270429953198], "label": "Canoeing"}, {"segment": [82.81362409672386, 98.57707533229329], "label": "Canoeing"}, {"segment": [114.1185061279251, 121.44518064586583], "label": "Canoeing"}]}, "zL0L9Faa5CA": {"duration": 90.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zL0L9Faa5CA", "annotations": [{"segment": [41.4073088, 83.23940669110765], "label": "High jump"}, {"segment": [26.052028048361933, 29.433936640000002], "label": "High jump"}]}, "HbXNXmCRFh4": {"duration": 4.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HbXNXmCRFh4", "annotations": [{"segment": [0.02535719812792512, 4.063491], "label": "Playing water polo"}]}, "WT7ZtXsTslM": {"duration": 69.139, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WT7ZtXsTslM", "annotations": [{"segment": [0.2, 53.39281591263651], "label": "Making a cake"}]}, "Pp42uLsicMk": {"duration": 194.096, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Pp42uLsicMk", "annotations": []}, "Xt86M-mRxi8": {"duration": 129.057, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Xt86M-mRxi8", "annotations": [{"segment": [33.412614664586584, 102.85449453978157], "label": "Spread mulch"}]}, "cFJo7Nm2W3Y": {"duration": 210.536, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cFJo7Nm2W3Y", "annotations": [{"segment": [30.826045026525662, 199.38548102180684], "label": "Beach soccer"}]}, "J_ZY19xnhJU": {"duration": 200.296, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=J_ZY19xnhJU", "annotations": [{"segment": [0.01, 200.261], "label": "Swinging at the playground"}]}, "GkwkHQJifDU": {"duration": 63.69, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=GkwkHQJifDU", "annotations": [{"segment": [26.9422684, 48.247736], "label": "Doing step aerobics"}]}, "iuVi8QAWfCI": {"duration": 152.509, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iuVi8QAWfCI", "annotations": [{"segment": [24.26812372230889, 73.99398507488299], "label": "Painting furniture"}, {"segment": [96.59664932605304, 131.80922100156005], "label": "Painting furniture"}]}, "uWQ5tyfrXT0": {"duration": 216.64, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uWQ5tyfrXT0", "annotations": []}, "rrTNLcPCWK8": {"duration": 216.69, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=rrTNLcPCWK8", "annotations": [{"segment": [67.60955257410296, 195.05355917628705], "label": "Dodgeball"}]}, "7S15OsGinjw": {"duration": 72.563, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7S15OsGinjw", "annotations": [{"segment": [42.54279016436351, 45.48145259738532], "label": "Building sandcastles"}, {"segment": [49.66339529053173, 64.24368197744762], "label": "Building sandcastles"}]}, "_UPD2IvdQ_M": {"duration": 49.74, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_UPD2IvdQ_M", "annotations": [{"segment": [8.542967708221436, 45.9615886054177], "label": "Using the pommel horse"}]}, "X_K7Aa3Aa-E": {"duration": 67.13, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X_K7Aa3Aa-E", "annotations": []}, "W8XwSNt8P5A": {"duration": 110.806, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W8XwSNt8P5A", "annotations": [{"segment": [17.113498041962497, 19.70645229074469], "label": "Kneeling"}, {"segment": [75.71426322308892, 76.75144491107645], "label": "Kneeling"}]}, "97LW-ivu01A": {"duration": 159.2, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=97LW-ivu01A", "annotations": [{"segment": [0.01, 148.02000119812794], "label": "Kayaking"}]}, "A_4yI6vh03I": {"duration": 140.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A_4yI6vh03I", "annotations": [{"segment": [67.02882449297972, 79.93159984399377], "label": "Pole vault"}]}, "LYqq0dPB-U8": {"duration": 83.732, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LYqq0dPB-U8", "annotations": [{"segment": [7.587182868103979, 74.33698117699633], "label": "Decorating the Christmas tree"}]}, "c1Gby2EHBzs": {"duration": 20.922, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=c1Gby2EHBzs", "annotations": [{"segment": [6.070732127925116, 7.376265918876754], "label": "Hurling"}, {"segment": [10.117886879875194, 11.423420670826832], "label": "Hurling"}, {"segment": [13.283806322932916, 14.45878673478939], "label": "Hurling"}]}, "Bbb8CybvMlk": {"duration": 213.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Bbb8CybvMlk", "annotations": [{"segment": [46.873046579006015, 68.81362157343436], "label": "Wrapping presents"}, {"segment": [77.45687838942128, 111.03260678998588], "label": "Wrapping presents"}, {"segment": [180.84352569110763, 187.1597517722309], "label": "Wrapping presents"}]}, "G5ZD9YfZ7aM": {"duration": 206.379, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G5ZD9YfZ7aM", "annotations": []}, "IAqNteMxXNk": {"duration": 106.278, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IAqNteMxXNk", "annotations": [{"segment": [26.030583099843994, 99.31413552106085], "label": "Cleaning sink"}, {"segment": [101.30373422932918, 106.077731], "label": "Cleaning sink"}]}, "TpgtCuYz0RQ": {"duration": 122.416, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TpgtCuYz0RQ", "annotations": [{"segment": [56.32198907956318, 104.24341029641185], "label": "Rock-paper-scissors"}]}, "DR6F2sgfLUc": {"duration": 67.04, "subset": "testing", "resolution": "318x240", "url": "https://www.youtube.com/watch?v=DR6F2sgfLUc", "annotations": []}, "qSXMTcGBqtY": {"duration": 223.77, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qSXMTcGBqtY", "annotations": [{"segment": [11.171080334369837, 189.21017151638065], "label": "Brushing hair"}]}, "i2u80Y3BMYs": {"duration": 214.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i2u80Y3BMYs", "annotations": [{"segment": [35.17551418876755, 184.2526933697348], "label": "Clean and jerk"}]}, "eECoZPqsTYI": {"duration": 159.684, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eECoZPqsTYI", "annotations": [{"segment": [2.4911645709828396, 153.4557375725429], "label": "Playing drums"}]}, "4ELWxQw0PPE": {"duration": 226.767, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4ELWxQw0PPE", "annotations": [{"segment": [3.1839230793721547, 25.82515374102964], "label": "Arm wrestling"}, {"segment": [29.362846034321375, 67.92369203120126], "label": "Arm wrestling"}, {"segment": [75.35284584711388, 124.52676872386895], "label": "Arm wrestling"}, {"segment": [130.18707639313573, 169.45546084867394], "label": "Arm wrestling"}, {"segment": [171.57807622464898, 212.96907605616224], "label": "Arm wrestling"}]}, "W_iKlOPSDos": {"duration": 78.972, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W_iKlOPSDos", "annotations": [{"segment": [11.30668535825545, 67.98023831775701], "label": "Waterskiing"}]}, "eHoeL0iPDWQ": {"duration": 229.53, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eHoeL0iPDWQ", "annotations": []}, "J4WbF-bJ1T8": {"duration": 177.006, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=J4WbF-bJ1T8", "annotations": [{"segment": [120.20780789694386, 166.2085041586261], "label": "Doing fencing"}]}, "LlpV8R-EaDQ": {"duration": 17.323, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LlpV8R-EaDQ", "annotations": [{"segment": [0.2, 17.122086167800454], "label": "Building sandcastles"}]}, "nKnsvy7-wAI": {"duration": 129.057, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=nKnsvy7-wAI", "annotations": [{"segment": [31.206963856785563, 35.83767462263116], "label": "Rock-paper-scissors"}, {"segment": [54.36051768601356, 62.81659821494901], "label": "Rock-paper-scissors"}, {"segment": [82.95012219656786, 84.56080418096724], "label": "Rock-paper-scissors"}, {"segment": [107.91569295475819, 111.53972741965678], "label": "Rock-paper-scissors"}]}, "m_B1Jb6Wwn0": {"duration": 34.9, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=m_B1Jb6Wwn0", "annotations": [{"segment": [0.01, 29.618372792511703], "label": "Cleaning shoes"}]}, "okSvWjK0okw": {"duration": 121.19, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=okSvWjK0okw", "annotations": [{"segment": [46.86890483619345, 57.64119344773791], "label": "Rock climbing"}, {"segment": [81.64260842433698, 96.95059750390016], "label": "Rock climbing"}]}, "sbvdGKpHy2M": {"duration": 56.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sbvdGKpHy2M", "annotations": [{"segment": [16.4669815975039, 52.942231157566304], "label": "Shaving"}]}, "cZ8iVGnxjX4": {"duration": 174.731, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=cZ8iVGnxjX4", "annotations": []}, "qx1FNJxiUuE": {"duration": 134.699, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qx1FNJxiUuE", "annotations": [{"segment": [22.4848000749962, 30.25991785793881], "label": "Baking cookies"}, {"segment": [45.81015342382403, 61.15025067125135], "label": "Baking cookies"}, {"segment": [63.25163385583043, 66.40370863269905], "label": "Baking cookies"}, {"segment": [70.1861983649414, 74.38896473409957], "label": "Baking cookies"}, {"segment": [90.14933853042123, 104.6488824898596], "label": "Baking cookies"}]}, "Bhc8INrOToY": {"duration": 234.731, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Bhc8INrOToY", "annotations": [{"segment": [8.420691107644306, 222.61810920436818], "label": "Snowboarding"}]}, "Ux4brKS0Sac": {"duration": 119.22999999999999, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ux4brKS0Sac", "annotations": [{"segment": [4.45458224, 11.81847408], "label": "Wrapping presents"}, {"segment": [18.33341984, 90.24344524], "label": "Wrapping presents"}]}, "00S8I27qDU4": {"duration": 37.41, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=00S8I27qDU4", "annotations": [{"segment": [3.5059282371294853, 18.815148205928235], "label": "Javelin throw"}]}, "VuiuqKX8srs": {"duration": 123.949, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VuiuqKX8srs", "annotations": [{"segment": [0.01, 123.881], "label": "Snow tubing"}]}, "PT18KK_6jmU": {"duration": 177.889, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PT18KK_6jmU", "annotations": [{"segment": [19.1137523364486, 77.57177570093458], "label": "Scuba diving"}, {"segment": [109.15457009345793, 152.37313084112148], "label": "Scuba diving"}]}, "E4AoMNfsuaw": {"duration": 92.95, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=E4AoMNfsuaw", "annotations": [{"segment": [0, 3.9151895475819036], "label": "Vacuuming floor"}, {"segment": [33.351614664586585, 36.54176911076443], "label": "Vacuuming floor"}, {"segment": [42.92207800312013, 92.7495], "label": "Vacuuming floor"}]}, "DZrCkQ2z-u4": {"duration": 50.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DZrCkQ2z-u4", "annotations": [{"segment": [5.208028081123246, 42.05877223088924], "label": "Using the pommel horse"}]}, "qlP8qKtGE7U": {"duration": 42.75, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qlP8qKtGE7U", "annotations": [{"segment": [0.06668944617784711, 23.40799560842434], "label": "Drinking coffee"}]}, "FSe9tVYHgBc": {"duration": 84.78, "subset": "validation", "resolution": "624x336", "url": "https://www.youtube.com/watch?v=FSe9tVYHgBc", "annotations": [{"segment": [34.91556629017161, 45.89280872230889], "label": "Bungee jumping"}, {"segment": [62.5570562698908, 70.36015631201248], "label": "Bungee jumping"}]}, "HatKNbfqL-k": {"duration": 45.72, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=HatKNbfqL-k", "annotations": [{"segment": [30.6968652, 45.58], "label": "Playing flauta"}, {"segment": [4.9064274570982835, 28.3331412], "label": "Playing flauta"}]}, "JoQywfQ6B-8": {"duration": 137.88, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JoQywfQ6B-8", "annotations": [{"segment": [73.99493167209681, 122.17767636817472], "label": "Waterskiing"}]}, "X6CBZwwRdQQ": {"duration": 47.532, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=X6CBZwwRdQQ", "annotations": []}, "LN8UWHvoELs": {"duration": 202.827, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LN8UWHvoELs", "annotations": [{"segment": [77.77921857079242, 187.68650829041857], "label": "Playing drums"}]}, "g9bIJ1MIvPk": {"duration": 214.228, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=g9bIJ1MIvPk", "annotations": [{"segment": [0.01, 189.83011918876755], "label": "Fixing the roof"}]}, "PUHGXI6N0DA": {"duration": 57.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PUHGXI6N0DA", "annotations": [{"segment": [7.313104736434354, 50.56744210764431], "label": "Using parallel bars"}]}, "0x7ZcIZffyg": {"duration": 144.1, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0x7ZcIZffyg", "annotations": []}, "DD3MU7uM9mM": {"duration": 13.282, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=DD3MU7uM9mM", "annotations": [{"segment": [4.372624024960999, 8.097460218408736], "label": "Doing a powerbomb"}]}, "MOG4eTo4Q4Y": {"duration": 10.73, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=MOG4eTo4Q4Y", "annotations": [{"segment": [4.31386305, 9.78216584], "label": "Dodgeball"}]}, "hu714U34avg": {"duration": 237.75, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hu714U34avg", "annotations": [{"segment": [40.60553822152886, 215.63580343213727], "label": "Doing step aerobics"}]}, "4kriQ6h6ymI": {"duration": 157.62, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=4kriQ6h6ymI", "annotations": [{"segment": [56.67276852, 138.07564434], "label": "Horseback riding"}, {"segment": [36.76101291, 46.06514442], "label": "Horseback riding"}]}, "-rx6D1uBsUU": {"duration": 220.822, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-rx6D1uBsUU", "annotations": [{"segment": [37.887535101404055, 204.9371216848674], "label": "Rafting"}]}, "mdu7eHlbDwc": {"duration": 161.077, "subset": "validation", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=mdu7eHlbDwc", "annotations": [{"segment": [9.268798701150036, 19.038617415495732], "label": "Scuba diving"}, {"segment": [24.549797247533057, 30.311485253753897], "label": "Scuba diving"}, {"segment": [35.572156911607706, 37.32571413089231], "label": "Scuba diving"}, {"segment": [46.84502474986587, 50.10163101425156], "label": "Scuba diving"}, {"segment": [52.85722093027023, 56.61484354302295], "label": "Scuba diving"}, {"segment": [93.1890369738161, 125.5045914434895], "label": "Scuba diving"}, {"segment": [135.02390206246307, 159.57370313244752], "label": "Scuba diving"}]}, "yIIDIQDKVhI": {"duration": 159.057, "subset": "training", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=yIIDIQDKVhI", "annotations": [{"segment": [3.2257811825273013, 159.0558276643991], "label": "Sharpening knives"}]}, "rbDbXXqaZaM": {"duration": 117.68, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rbDbXXqaZaM", "annotations": [{"segment": [0, 117.621], "label": "Mowing the lawn"}]}, "JMlNfZlOyX8": {"duration": 121.84, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JMlNfZlOyX8", "annotations": [{"segment": [27.180066441497658, 111.19118089703588], "label": "Washing face"}]}, "XPxxd9vv1MQ": {"duration": 137.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XPxxd9vv1MQ", "annotations": []}, "c8LevTVQ5nk": {"duration": 72.145, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c8LevTVQ5nk", "annotations": [{"segment": [6.6967109836193455, 11.198701560842434], "label": "Playing drums"}, {"segment": [25.830170936817474, 34.834152091263654], "label": "Playing drums"}, {"segment": [35.95964973556943, 39.22359290405617], "label": "Playing drums"}, {"segment": [52.054266049141965, 64.65983966536662], "label": "Playing drums"}]}, "pLJhoqTpAaI": {"duration": 101.52000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pLJhoqTpAaI", "annotations": [{"segment": [18.24565041, 90.25774953], "label": "Fixing bicycle"}]}, "raNO7MvD-98": {"duration": 105.56, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=raNO7MvD-98", "annotations": []}, "QAJ4PWhM35I": {"duration": 136.302, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=QAJ4PWhM35I", "annotations": []}, "sSVG3g2iKL8": {"duration": 144.2, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sSVG3g2iKL8", "annotations": [{"segment": [3.488297971918877, 140.09454758190327], "label": "Playing bagpipes"}]}, "54OZLVVOHJM": {"duration": 14.002, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=54OZLVVOHJM", "annotations": []}, "eBlYGGmeBY0": {"duration": 53.546, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eBlYGGmeBY0", "annotations": [{"segment": [0.2, 51.296814745532735], "label": "Roof shingle removal"}]}, "pPn5f_ked3s": {"duration": 9.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pPn5f_ked3s", "annotations": [{"segment": [4.71917744149766, 5.82236177847114], "label": "Long jump"}]}, "rqraLuIBvyg": {"duration": 90.84, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rqraLuIBvyg", "annotations": [{"segment": [16.77579173166927, 82.46328003120125], "label": "Shoveling snow"}]}, "cXRWQa9tQLw": {"duration": 156.02, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=cXRWQa9tQLw", "annotations": [{"segment": [4.381072940876819, 156.014875], "label": "Cheerleading"}]}, "1dDAcUliXrQ": {"duration": 135.09, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=1dDAcUliXrQ", "annotations": []}, "DxNIzbD6uD4": {"duration": 91.98, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DxNIzbD6uD4", "annotations": [{"segment": [27.975179195370707, 86.2828499648411], "label": "Doing step aerobics"}]}, "eCzDH6PdtlE": {"duration": 95.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eCzDH6PdtlE", "annotations": [{"segment": [3.7320230224174957, 90.91208082609019], "label": "Using the balance beam"}]}, "mXb1tduBEj8": {"duration": 83.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mXb1tduBEj8", "annotations": [{"segment": [6.290699687987519, 10.05214898595944], "label": "Volleyball"}, {"segment": [32.62084477379096, 37.80905070202808], "label": "Volleyball"}]}, "5PgDTLR7wFQ": {"duration": 106.975, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5PgDTLR7wFQ", "annotations": [{"segment": [29.205160587375875, 103.30282413416536], "label": "Cleaning sink"}]}, "HppLOtjJY_Q": {"duration": 166.84, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=HppLOtjJY_Q", "annotations": [{"segment": [17.609024029999997, 72.28167816999999], "label": "Kayaking"}]}, "hQfjNwuaZRo": {"duration": 126.387, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=hQfjNwuaZRo", "annotations": []}, "o0O-CwwSaGw": {"duration": 144.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o0O-CwwSaGw", "annotations": [{"segment": [41.970711388455534, 108.31151326053042], "label": "Bathing dog"}]}, "Mgym0F-T7Js": {"duration": 179.96, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Mgym0F-T7Js", "annotations": [{"segment": [25.018751539604683, 31.18426867356107], "label": "Polishing forniture"}, {"segment": [45.19680761437104, 138.80056773898164], "label": "Polishing forniture"}, {"segment": [144.68583409412182, 144.966084872938], "label": "Polishing forniture"}]}, "ES6LPFdgoRk": {"duration": 60.61, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ES6LPFdgoRk", "annotations": []}, "gJydcG-d1cs": {"duration": 238.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gJydcG-d1cs", "annotations": [{"segment": [0, 6.683434684326149], "label": "Playing flauta"}, {"segment": [119.18791853714964, 120.67312624477768], "label": "Playing flauta"}, {"segment": [133.66869368652297, 139.60952451703508], "label": "Playing flauta"}, {"segment": [156.6894131547575, 161.14503627764157], "label": "Playing flauta"}, {"segment": [197.5326241622465, 200.13173763806552], "label": "Playing flauta"}, {"segment": [215.72641849297972, 224.63766469578783], "label": "Playing flauta"}]}, "pFTqH8DYPuY": {"duration": 95.44, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=pFTqH8DYPuY", "annotations": []}, "lIbRuIm9MxI": {"duration": 135.582, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lIbRuIm9MxI", "annotations": [{"segment": [85.75392113195402, 122.06058241771092], "label": "Playing congas"}]}, "Qu-Y2u1Xn_U": {"duration": 216.712, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Qu-Y2u1Xn_U", "annotations": [{"segment": [36.91511005141168, 209.0982686828269], "label": "River tubing"}]}, "U_Pb3Wm_pb4": {"duration": 99.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U_Pb3Wm_pb4", "annotations": [{"segment": [0.19999999999999998, 91.35105255538221], "label": "Slacklining"}]}, "TPsMocKBQU0": {"duration": 147.679, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TPsMocKBQU0", "annotations": [{"segment": [0.01, 142.9403035815267], "label": "Slacklining"}]}, "eh6b_t3nvMY": {"duration": 21.53, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eh6b_t3nvMY", "annotations": []}, "47Zc29aynbo": {"duration": 86.263, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=47Zc29aynbo", "annotations": []}, "cau489T9DLE": {"duration": 30.395, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=cau489T9DLE", "annotations": [{"segment": [0.9454088899072199, 22.54801850577036], "label": "Baton twirling"}]}, "Z9k8GiGjkZ8": {"duration": 84.89, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Z9k8GiGjkZ8", "annotations": [{"segment": [1.4550488799999997, 19.07086464898596], "label": "Bungee jumping"}, {"segment": [64.89391443057723, 74.4293467550702], "label": "Bungee jumping"}]}, "8LTh7r2U8Fw": {"duration": 157.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8LTh7r2U8Fw", "annotations": [{"segment": [28.8998751950078, 148.4180031201248], "label": "Javelin throw"}]}, "2768Mbhzllc": {"duration": 41.773, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2768Mbhzllc", "annotations": []}, "hPYGgv_75SY": {"duration": 37.92, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hPYGgv_75SY", "annotations": []}, "yinXvETACC4": {"duration": 126.76, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yinXvETACC4", "annotations": [{"segment": [31.726918876755068, 112.77387675507019], "label": "Spinning"}]}, "40BPDWojssc": {"duration": 237.36, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=40BPDWojssc", "annotations": []}, "sC_hs-OXERg": {"duration": 181.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sC_hs-OXERg", "annotations": [{"segment": [95.544589130504, 144.65504507999998], "label": "Polishing forniture"}]}, "7JQ6lw2KKJE": {"duration": 75.88, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7JQ6lw2KKJE", "annotations": []}, "-_N_l6HZiak": {"duration": 15.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-_N_l6HZiak", "annotations": []}, "mi6f8kGVR70": {"duration": 58.445, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mi6f8kGVR70", "annotations": [{"segment": [0.01, 50.03608268330733], "label": "Camel ride"}]}, "MEwGjGcV_Ik": {"duration": 40.055, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MEwGjGcV_Ik", "annotations": [{"segment": [8.873210046801871, 40.05442], "label": "Shuffleboard"}]}, "fs-goyuhTi8": {"duration": 12.145, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=fs-goyuhTi8", "annotations": [{"segment": [0, 7.900253658346334], "label": "Hitting a pinata"}]}, "u7THn6D6y5w": {"duration": 61.16, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=u7THn6D6y5w", "annotations": []}, "taOJ9kUiwgM": {"duration": 94.413, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=taOJ9kUiwgM", "annotations": [{"segment": [9.72108291678606, 74.38101196567864], "label": "Making an omelette"}]}, "UOL8oxziVwU": {"duration": 42.516, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=UOL8oxziVwU", "annotations": [{"segment": [10.47969803417987, 32.367674941011245], "label": "Powerbocking"}]}, "GhwvPy4_2KE": {"duration": 57.795, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=GhwvPy4_2KE", "annotations": [{"segment": [0.2, 57.546], "label": "Swinging at the playground"}]}, "6OLIr7E0eHA": {"duration": 56.286, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6OLIr7E0eHA", "annotations": [{"segment": [0.01, 55.33699764151885], "label": "Playing drums"}]}, "GcEutQTeAnA": {"duration": 40.241, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GcEutQTeAnA", "annotations": [{"segment": [0.2, 33.758031201248045], "label": "Hand car wash"}]}, "0k_JEcQzi2w": {"duration": 60.651, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0k_JEcQzi2w", "annotations": []}, "2rgamh4uty8": {"duration": 120.605, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2rgamh4uty8", "annotations": [{"segment": [5.895619289612101, 110.28938252948748], "label": "Waxing skis"}]}, "J5xPmg3tmtg": {"duration": 125.272, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=J5xPmg3tmtg", "annotations": [{"segment": [75.79679875195008, 109.61029797191888], "label": "Waxing skis"}]}, "j_YetAlzwbk": {"duration": 74.49, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=j_YetAlzwbk", "annotations": [{"segment": [4.21290848, 74.418333], "label": "Washing dishes"}]}, "5cijPqKVc84": {"duration": 142.59, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5cijPqKVc84", "annotations": []}, "1OmzECVyIww": {"duration": 45.81, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1OmzECVyIww", "annotations": [{"segment": [16.21653176, 37.03991824], "label": "Using the pommel horse"}]}, "3aQnQEL3USQ": {"duration": 211.929, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3aQnQEL3USQ", "annotations": [{"segment": [0.01, 211.92854875283447], "label": "Powerbocking"}]}, "n1LmL6_vxKs": {"duration": 21.41, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=n1LmL6_vxKs", "annotations": [{"segment": [1.3675834633385335, 17.078115444617787], "label": "Clean and jerk"}]}, "2I9xymLVssI": {"duration": 215.133, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2I9xymLVssI", "annotations": [{"segment": [115.24589408099686, 117.26690031152647], "label": "Roof shingle removal"}]}, "Yh6xzcNlAjo": {"duration": 72.215, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Yh6xzcNlAjo", "annotations": [{"segment": [1.0702551639480546, 11.209514611876992], "label": "Baking cookies"}]}, "NjFxN3xQqeY": {"duration": 72.53999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NjFxN3xQqeY", "annotations": [{"segment": [0.01, 68.19734789391575], "label": "Mowing the lawn"}]}, "G7zChU3ma0M": {"duration": 62.23, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=G7zChU3ma0M", "annotations": [{"segment": [2.653830988910711, 4.300368372088281], "label": "Playing ten pins"}, {"segment": [9.821111362742485, 51.08140108236865], "label": "Playing ten pins"}]}, "6koP53YFNjg": {"duration": 39.939, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6koP53YFNjg", "annotations": [{"segment": [0.010000000000000002, 15.763817530516882], "label": "Beer pong"}, {"segment": [31.627169724977218, 39.46553198576939], "label": "Beer pong"}]}, "ggNmtMAdqe0": {"duration": 135.466, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=ggNmtMAdqe0", "annotations": [{"segment": [8.65291887675507, 42.63145397815913], "label": "Welding"}, {"segment": [53.60588767550703, 54.87216848673947], "label": "Welding"}, {"segment": [73.65533385335414, 77.66522308892357], "label": "Welding"}, {"segment": [83.78558034321374, 84.8408143525741], "label": "Welding"}]}, "gE1nShlv6mo": {"duration": 136.7, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=gE1nShlv6mo", "annotations": [{"segment": [34.75154914196568, 97.21905772230889], "label": "Ironing clothes"}]}, "q9y-83399Ao": {"duration": 139.97, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=q9y-83399Ao", "annotations": [{"segment": [15.958529028855466, 129.28774876520492], "label": "Swimming"}]}, "nGYqSqf0yCY": {"duration": 198.113, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nGYqSqf0yCY", "annotations": [{"segment": [30.364930310118247, 154.1081911387787], "label": "Rock-paper-scissors"}]}, "3Edxa-t4MxA": {"duration": 158.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3Edxa-t4MxA", "annotations": [{"segment": [30.661242743587295, 158.29884353741497], "label": "Cheerleading"}, {"segment": [19.039642486739467, 24.974076508580342], "label": "Cheerleading"}]}, "QT2l100KJe0": {"duration": 34.02, "subset": "training", "resolution": "448x358", "url": "https://www.youtube.com/watch?v=QT2l100KJe0", "annotations": [{"segment": [6.580556580343214, 16.13297742277691], "label": "Javelin throw"}]}, "YAhHfaXnpKg": {"duration": 88.306, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YAhHfaXnpKg", "annotations": [{"segment": [31.249152886115446, 54.11321840873635], "label": "Camel ride"}]}, "rD3W6N2g0q4": {"duration": 116.62, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rD3W6N2g0q4", "annotations": []}, "8v2sAylgkgc": {"duration": 63.414, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8v2sAylgkgc", "annotations": [{"segment": [0.2, 63.160999999999994], "label": "Swinging at the playground"}]}, "9qWtHP_iSqA": {"duration": 84.52, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9qWtHP_iSqA", "annotations": []}, "o6Xr1PiL-Jg": {"duration": 59.606, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=o6Xr1PiL-Jg", "annotations": [{"segment": [4.2151017597367, 56.76460651363078], "label": "Raking leaves"}]}, "1tRTSOnylOs": {"duration": 68.848, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1tRTSOnylOs", "annotations": [{"segment": [0.01, 68.823944], "label": "Bullfighting"}]}, "CmS2nSm5n2Q": {"duration": 232.363, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CmS2nSm5n2Q", "annotations": [{"segment": [22.83744372632048, 30.44992496842731], "label": "Doing fencing"}, {"segment": [75.76231331430128, 158.4121096571754], "label": "Doing fencing"}, {"segment": [160.94960340454435, 176.53706390327613], "label": "Doing fencing"}]}, "TCcopbAANQ0": {"duration": 158.1, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TCcopbAANQ0", "annotations": [{"segment": [18.00526209048362, 147.98845553822153], "label": "Assembling bicycle"}]}, "aFVthcfDK9Q": {"duration": 31.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aFVthcfDK9Q", "annotations": [{"segment": [8.063579563182527, 23.94564508580343], "label": "Putting on makeup"}]}, "f3YyN44Dx8M": {"duration": 128.082, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=f3YyN44Dx8M", "annotations": [{"segment": [14.186848717628704, 125.48367598127925], "label": "Disc dog"}]}, "3IPOOpGRl80": {"duration": 191.821, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3IPOOpGRl80", "annotations": []}, "keylpQ6q1QU": {"duration": 55.91, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=keylpQ6q1QU", "annotations": []}, "ognXkeWL7h8": {"duration": 189.41, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ognXkeWL7h8", "annotations": [{"segment": [0.3754848217285132, 180.92994498995282], "label": "Putting on makeup"}]}, "ZvVnXvKlKbQ": {"duration": 143.662, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZvVnXvKlKbQ", "annotations": [{"segment": [35.859377885319496, 41.23828456811742], "label": "Playing ten pins"}, {"segment": [50.20312903944729, 55.13379349867873], "label": "Playing ten pins"}, {"segment": [60.51270018147664, 80.23535801840237], "label": "Playing ten pins"}, {"segment": [92.3378980546977, 97.04444140214588], "label": "Playing ten pins"}, {"segment": [102.87159030851029, 116.54297678627145], "label": "Playing ten pins"}]}, "ydRycaBjMVw": {"duration": 237.07999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ydRycaBjMVw", "annotations": [{"segment": [1.1093026521060843, 84.31411544461778], "label": "Getting a piercing"}, {"segment": [100.9550592823713, 235.93160374414975], "label": "Getting a piercing"}]}, "D-BRqQhL74Q": {"duration": 44.002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D-BRqQhL74Q", "annotations": [{"segment": [4.114914196567863, 27.77567082683307], "label": "Tug of war"}]}, "H1FKtaktOEc": {"duration": 206.17000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H1FKtaktOEc", "annotations": [{"segment": [38.59656355694228, 41.16966779407176], "label": "Archery"}, {"segment": [44.38604809048361, 49.85389459438377], "label": "Archery"}, {"segment": [52.74863686115444, 53.713550950077995], "label": "Archery"}, {"segment": [67.54398622464898, 82.9826116474259], "label": "Archery"}, {"segment": [86.84226800312011, 88.12882012168487], "label": "Archery"}, {"segment": [108.71365401872073, 111.28675825585023], "label": "Archery"}, {"segment": [113.21658643369734, 117.07624278939159], "label": "Archery"}, {"segment": [122.22245126365056, 123.83064141185646], "label": "Archery"}, {"segment": [136.05288653822151, 144.73711333853353], "label": "Archery"}, {"segment": [149.56168378315132, 151.8131499906396], "label": "Archery"}, {"segment": [11.900607096723869, 14.473711333853352], "label": "Archery"}, {"segment": [19.61991980811232, 20.58483389703588], "label": "Archery"}, {"segment": [25.08776631201248, 35.380183260530416], "label": "Archery"}]}, "bcOFV26B3jk": {"duration": 20.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bcOFV26B3jk", "annotations": [{"segment": [0.21244409999999997, 7.82800062], "label": "Playing kickball"}]}, "sFQ7AaId8zg": {"duration": 45.721, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sFQ7AaId8zg", "annotations": [{"segment": [0.2, 45.52009070294784], "label": "Peeling potatoes"}]}, "taO9hPOvMz0": {"duration": 196.836, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=taO9hPOvMz0", "annotations": [{"segment": [16.270503900156005, 22.410316692667706], "label": "Surfing"}, {"segment": [25.48022308892356, 87.79932293291732], "label": "Surfing"}, {"segment": [88.72029485179407, 171.30077691107644], "label": "Surfing"}, {"segment": [182.37097347893913, 186.3622574102964], "label": "Surfing"}]}, "g5rkuDdjRVI": {"duration": 157.757, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g5rkuDdjRVI", "annotations": [{"segment": [20.91926553252606, 147.1731857464775], "label": "Longboarding"}]}, "4taBobzpYNU": {"duration": 120.117, "subset": "training", "resolution": "640x320", "url": "https://www.youtube.com/watch?v=4taBobzpYNU", "annotations": [{"segment": [67.43728170360289, 71.54703311686909], "label": "Sharpening knives"}, {"segment": [108.34798769387669, 113.01815970009753], "label": "Sharpening knives"}]}, "Md9vHrFsggM": {"duration": 181.186, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Md9vHrFsggM", "annotations": []}, "dgao8t9JFBw": {"duration": 110.2, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dgao8t9JFBw", "annotations": []}, "b_iUgvzb1-0": {"duration": 44.1, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=b_iUgvzb1-0", "annotations": [{"segment": [27.9289312823713, 44.09469387755102], "label": "Shot put"}]}, "IjULOynkK5I": {"duration": 71.63, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IjULOynkK5I", "annotations": [{"segment": [15.07134945397816, 25.342195007800317], "label": "Spinning"}, {"segment": [32.15221216848674, 43.4278143525741], "label": "Spinning"}, {"segment": [50.907669266770675, 71.561], "label": "Spinning"}]}, "4gEDonpfTeY": {"duration": 175.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4gEDonpfTeY", "annotations": [{"segment": [36.39899957903078, 90.31330652106085], "label": "Tango"}, {"segment": [96.88154699531981, 103.99714084243371], "label": "Tango"}, {"segment": [141.21717019656788, 145.86967386583464], "label": "Tango"}, {"segment": [151.06953090795633, 160.1008615600624], "label": "Tango"}]}, "vcE8e-fQBhs": {"duration": 179.0, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vcE8e-fQBhs", "annotations": [{"segment": [92.96726209048363, 178.955], "label": "Plataform diving"}, {"segment": [0, 92.21288030000001], "label": "Plataform diving"}]}, "PJvOR4ue3tU": {"duration": 100.75, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=PJvOR4ue3tU", "annotations": [{"segment": [29.009235430000004, 30.335439510140404], "label": "Shot put"}, {"segment": [45.738926929797195, 48.882495790951644], "label": "Shot put"}]}, "rNb4Jz_t9F4": {"duration": 149.792, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rNb4Jz_t9F4", "annotations": [{"segment": [0.01, 149.09087317506305], "label": "Waterskiing"}]}, "OvGxDaayPcw": {"duration": 41.26, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=OvGxDaayPcw", "annotations": [{"segment": [0.7724528985959439, 41.261859], "label": "Hopscotch"}]}, "9nndNUHadcg": {"duration": 47.18, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=9nndNUHadcg", "annotations": [{"segment": [6.850991520000001, 38.847126365054606], "label": "Mixing drinks"}]}, "Ij5-awZLA44": {"duration": 67.756, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ij5-awZLA44", "annotations": []}, "hDf32SV0QN0": {"duration": 86.24, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hDf32SV0QN0", "annotations": [{"segment": [0.01, 0.02], "label": "Brushing hair"}]}, "ywIG-f5f9Sg": {"duration": 77.068, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ywIG-f5f9Sg", "annotations": []}, "mG8h5rX3OnU": {"duration": 66.363, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=mG8h5rX3OnU", "annotations": [{"segment": [4.348253472996063, 63.36026356942278], "label": "Decorating the Christmas tree"}]}, "rAO-_VxIJng": {"duration": 46.81, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rAO-_VxIJng", "annotations": [{"segment": [1.0406958929684007, 46.24799299577214], "label": "Walking the dog"}]}, "WvpiEwrWmWE": {"duration": 169.854, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WvpiEwrWmWE", "annotations": [{"segment": [1.692920029791716, 162.25576582418424], "label": "Baton twirling"}]}, "lGRZ3F7tW2c": {"duration": 140.76, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lGRZ3F7tW2c", "annotations": [{"segment": [9.662109303419756, 58.41184260703761], "label": "Layup drill in basketball"}, {"segment": [88.93532358814352, 139.22221025897036], "label": "Layup drill in basketball"}]}, "QdmcjM2fluo": {"duration": 160.729, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QdmcjM2fluo", "annotations": []}, "HlAjWgz7zZ4": {"duration": 106.441, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HlAjWgz7zZ4", "annotations": [{"segment": [23.009550681859057, 84.92014028628557], "label": "Slacklining"}]}, "88TLZbT_KkE": {"duration": 126.363, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=88TLZbT_KkE", "annotations": [{"segment": [15.573598225561675, 79.44506436583994], "label": "Laying tile"}, {"segment": [97.5814066032029, 110.19799146333854], "label": "Laying tile"}]}, "dbR5AzEZOPE": {"duration": 191.84, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dbR5AzEZOPE", "annotations": [{"segment": [49.36695919, 136.59154138689547], "label": "Dodgeball"}]}, "jAk-vBePtTU": {"duration": 186.41, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jAk-vBePtTU", "annotations": [{"segment": [29.66271292979719, 181.46600851170047], "label": "Playing congas"}]}, "cFYkr23rRv0": {"duration": 185.6, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cFYkr23rRv0", "annotations": []}, "NSxYVWGnNuY": {"duration": 122.21, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=NSxYVWGnNuY", "annotations": []}, "_I1zlicAxpM": {"duration": 13.21, "subset": "validation", "resolution": "400x220", "url": "https://www.youtube.com/watch?v=_I1zlicAxpM", "annotations": [{"segment": [0.01, 8.152064742589705], "label": "Ping-pong"}]}, "cMuQUTKMc0k": {"duration": 160.172, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cMuQUTKMc0k", "annotations": [{"segment": [0.01, 126.68771031657593], "label": "Having an ice cream"}]}, "KdqoLhoXjS8": {"duration": 119.699, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KdqoLhoXjS8", "annotations": [{"segment": [0.01, 41.08207626264234], "label": "Grooming dog"}, {"segment": [65.35784859965827, 66.4782688613667], "label": "Grooming dog"}]}, "eElKkhIJXCs": {"duration": 60.19, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eElKkhIJXCs", "annotations": [{"segment": [11.526308892355695, 54.8202496099844], "label": "Playing lacrosse"}]}, "RZogaNvPuNs": {"duration": 211.836, "subset": "validation", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=RZogaNvPuNs", "annotations": [{"segment": [109.84902336448599, 184.40121339563865], "label": "Making a lemonade"}]}, "Tmh2VNM2HAk": {"duration": 205.06, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=Tmh2VNM2HAk", "annotations": []}, "3Hgwyprv8u4": {"duration": 67.34, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3Hgwyprv8u4", "annotations": [{"segment": [16.679703505460218, 43.63997898283932], "label": "Vacuuming floor"}]}, "t-xIv68Ac4c": {"duration": 233.85, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=t-xIv68Ac4c", "annotations": [{"segment": [10.212836193447737, 221.4186380655226], "label": "Zumba"}]}, "z-1HUIQFGIg": {"duration": 175.033, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=z-1HUIQFGIg", "annotations": [{"segment": [75.04912946263006, 87.5903644072829], "label": "Capoeira"}, {"segment": [118.39817991282756, 120.57926423681509], "label": "Capoeira"}]}, "pgBfTJzVB98": {"duration": 54.567, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pgBfTJzVB98", "annotations": [{"segment": [33.96597551794071, 34.561869825273014], "label": "Throwing darts"}]}, "HUzoMRdSrZo": {"duration": 85.938, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HUzoMRdSrZo", "annotations": [{"segment": [5.228620666146646, 73.87102530889236], "label": "Futsal"}]}, "z-94IUxC4Xc": {"duration": 139.599, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=z-94IUxC4Xc", "annotations": [{"segment": [13.066929860867905, 131.1048616723869], "label": "Longboarding"}]}, "XKu57UKSqPc": {"duration": 121.069, "subset": "validation", "resolution": "300x200", "url": "https://www.youtube.com/watch?v=XKu57UKSqPc", "annotations": [{"segment": [15.158405895161852, 121.041], "label": "Grooming dog"}]}, "B_-jPPr2RLA": {"duration": 29.63, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=B_-jPPr2RLA", "annotations": [{"segment": [0.6672225599999999, 29.17883619344774], "label": "Hopscotch"}]}, "xICaX0kCCF4": {"duration": 36.43, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xICaX0kCCF4", "annotations": [{"segment": [0.45469088299531984, 29.10021651170047], "label": "Spinning"}]}, "nPTq63WtemI": {"duration": 7.199, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nPTq63WtemI", "annotations": [{"segment": [0, 7.198184], "label": "Kneeling"}]}, "5MjAgMF_FzA": {"duration": 109.158, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5MjAgMF_FzA", "annotations": [{"segment": [23.32985237776858, 81.0584651957507], "label": "Making an omelette"}]}, "Sb2SPX38lyQ": {"duration": 167.46, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Sb2SPX38lyQ", "annotations": [{"segment": [32.13395396224013, 140.81464216692666], "label": "Doing motocross"}]}, "7dx3MobORY0": {"duration": 203.082, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=7dx3MobORY0", "annotations": [{"segment": [27.83671915287402, 121.152993585804], "label": "Baking cookies"}]}, "Y0G_wA38HkI": {"duration": 231.29, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y0G_wA38HkI", "annotations": [{"segment": [0.01, 206.3964278127925], "label": "Windsurfing"}]}, "jMQP5ePI4eQ": {"duration": 181.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jMQP5ePI4eQ", "annotations": [{"segment": [3.963419656786271, 171.5594508580343], "label": "Playing guitarra"}]}, "BW7_eGchA_M": {"duration": 14.88, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=BW7_eGchA_M", "annotations": []}, "UPwDuuYlLfQ": {"duration": 35.063, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UPwDuuYlLfQ", "annotations": [{"segment": [0.01, 35.062131], "label": "Grooming dog"}]}, "As4WC0IZWos": {"duration": 138.97, "subset": "testing", "resolution": "304x176", "url": "https://www.youtube.com/watch?v=As4WC0IZWos", "annotations": []}, "r4iurK3kOe8": {"duration": 70.055, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r4iurK3kOe8", "annotations": [{"segment": [5.901637734477379, 68.4152818848674], "label": "Disc dog"}]}, "ieWgalZPc2g": {"duration": 132.45, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=ieWgalZPc2g", "annotations": [{"segment": [17.769727996879872, 122.94172276911077], "label": "Doing motocross"}]}, "_8H6x8OzjyQ": {"duration": 173.082, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_8H6x8OzjyQ", "annotations": []}, "zzxYEZkahBU": {"duration": 56.077, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zzxYEZkahBU", "annotations": [{"segment": [0.2, 30.881271009360372], "label": "Shuffleboard"}]}, "TcrLMpMA1WM": {"duration": 91.44, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=TcrLMpMA1WM", "annotations": [{"segment": [5.704898595943838, 55.90800624024961], "label": "Starting a campfire"}]}, "9pJZsi04xBk": {"duration": 95.97, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9pJZsi04xBk", "annotations": [{"segment": [2.3206008185905667, 60.1110470105879], "label": "Playing lacrosse"}]}, "o0gdMKlKLcU": {"duration": 17.09, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o0gdMKlKLcU", "annotations": [{"segment": [0.01, 11.11775735101404], "label": "Plastering"}]}, "LS8Nku9D3Xk": {"duration": 75.86, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LS8Nku9D3Xk", "annotations": []}, "H6hb3SIqdL4": {"duration": 15.001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H6hb3SIqdL4", "annotations": [{"segment": [0, 4.553681994126529], "label": "Throwing darts"}]}, "-t2ikmhg9_w": {"duration": 194.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-t2ikmhg9_w", "annotations": [{"segment": [24.89204331981279, 92.88982019344773], "label": "Vacuuming floor"}, {"segment": [118.692548024961, 140.85253780967238], "label": "Vacuuming floor"}]}, "lLHAzwAs_9I": {"duration": 197.115, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lLHAzwAs_9I", "annotations": [{"segment": [39.65814196567863, 85.16604836193447], "label": "Swinging at the playground"}]}, "3dDTV-WVkqk": {"duration": 124.483, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3dDTV-WVkqk", "annotations": []}, "eil9mWGJB8E": {"duration": 29.443, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=eil9mWGJB8E", "annotations": [{"segment": [0.01, 0.02], "label": "Arm wrestling"}]}, "So4269j6yxM": {"duration": 57.26, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=So4269j6yxM", "annotations": []}, "rMWCaPh9UqE": {"duration": 171.154, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rMWCaPh9UqE", "annotations": [{"segment": [18.153532993279274, 166.15008425299933], "label": "Cutting the grass"}]}, "_roK9m9UOvM": {"duration": 37.663, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=_roK9m9UOvM", "annotations": [{"segment": [13.022499219968799, 32.086968798751954], "label": "Putting on shoes"}]}, "Rc_pWU_3WLk": {"duration": 31.533, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Rc_pWU_3WLk", "annotations": [{"segment": [12.217677570093459, 12.855548286604362], "label": "Sumo"}]}, "8eUBLvj3veI": {"duration": 186.92, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8eUBLvj3veI", "annotations": [{"segment": [69.69411665446209, 160.96716389391574], "label": "Removing curlers"}]}, "e4bcTIoiMIk": {"duration": 209.468, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e4bcTIoiMIk", "annotations": [{"segment": [76.94016458658346, 117.12547347893916], "label": "Sharpening knives"}]}, "gGai6uu5Yjs": {"duration": 115.915, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gGai6uu5Yjs", "annotations": [{"segment": [22.965803007630512, 115.91401360544218], "label": "Elliptical trainer"}]}, "OMlpKhvmMpA": {"duration": 32.508, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OMlpKhvmMpA", "annotations": [{"segment": [0.01, 32.401], "label": "Beer pong"}]}, "FYuYu-WBqgY": {"duration": 23.615, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=FYuYu-WBqgY", "annotations": []}, "IGcalXmWUwA": {"duration": 196.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IGcalXmWUwA", "annotations": [{"segment": [16.839322416434072, 20.207186899720888], "label": "Layup drill in basketball"}, {"segment": [33.0663058359069, 59.39688270524018], "label": "Layup drill in basketball"}, {"segment": [71.94983214294558, 82.05342559280602], "label": "Layup drill in basketball"}]}, "QlQrJ5WsWzo": {"duration": 177.076, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QlQrJ5WsWzo", "annotations": [{"segment": [0.01, 177.07539682539684], "label": "Camel ride"}]}, "ZIvekQVbCZ4": {"duration": 174.661, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZIvekQVbCZ4", "annotations": [{"segment": [0.2, 38.964761975513035], "label": "Rope skipping"}, {"segment": [53.9512088891719, 70.57254092068446], "label": "Rope skipping"}, {"segment": [82.56169845161153, 88.01131551112385], "label": "Rope skipping"}, {"segment": [103.27024327775833, 149.86446762870514], "label": "Rope skipping"}, {"segment": [157.4939314352574, 174.460225], "label": "Rope skipping"}]}, "kx0ZSPOOFJ0": {"duration": 9.57, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kx0ZSPOOFJ0", "annotations": [{"segment": [2.863385335413416, 8.812698907956317], "label": "Javelin throw"}]}, "NjzUWVoc8rw": {"duration": 7.34, "subset": "validation", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=NjzUWVoc8rw", "annotations": [{"segment": [1.1048315132605304, 7.095979719188767], "label": "Tennis serve with ball bouncing"}]}, "bf3ac4bkIIo": {"duration": 84.916, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bf3ac4bkIIo", "annotations": [{"segment": [21.858091333853356, 64.1170679126365], "label": "Making a lemonade"}]}, "MQJYVBmPFmE": {"duration": 29.536, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=MQJYVBmPFmE", "annotations": []}, "mvomkmq32vU": {"duration": 78.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mvomkmq32vU", "annotations": [{"segment": [9.517628705148205, 76.0796255850234], "label": "High jump"}]}, "cmmJ3HqcaZE": {"duration": 129.497, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=cmmJ3HqcaZE", "annotations": []}, "qU_605hliNw": {"duration": 172.385, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qU_605hliNw", "annotations": [{"segment": [8.336869333276733, 165.6616603307332], "label": "Waterskiing"}]}, "5kBKAfEX7XA": {"duration": 45.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5kBKAfEX7XA", "annotations": [{"segment": [9.49516146645866, 44.7730647425897], "label": "Using parallel bars"}]}, "-qtdjw3MYcY": {"duration": 15.419, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-qtdjw3MYcY", "annotations": [{"segment": [0.01, 15.418048], "label": "Braiding hair"}]}, "ZSkiPiKaJAs": {"duration": 117.377, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=ZSkiPiKaJAs", "annotations": []}, "izSjI7hH1-Y": {"duration": 103.19, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=izSjI7hH1-Y", "annotations": [{"segment": [0.01, 103.161], "label": "Getting a piercing"}]}, "D_yO_40uREE": {"duration": 112.524, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D_yO_40uREE", "annotations": [{"segment": [6.57923166926677, 104.14582449297971], "label": "Installing carpet"}]}, "w9cJ3FNGw_U": {"duration": 188.245, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=w9cJ3FNGw_U", "annotations": [{"segment": [22.896346333853355, 175.85088767550704], "label": "Calf roping"}]}, "0qTzoVjsMpQ": {"duration": 225.75, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0qTzoVjsMpQ", "annotations": [{"segment": [208.48747188527, 218.7005406093795], "label": "Smoking hookah"}]}, "wJOHmxQZPR4": {"duration": 224.166, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wJOHmxQZPR4", "annotations": [{"segment": [26.22320592823713, 47.55141341653667], "label": "Sumo"}, {"segment": [71.32712012480499, 99.6481825273011], "label": "Sumo"}]}, "B4Zp5XN97Gc": {"duration": 160.218, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=B4Zp5XN97Gc", "annotations": []}, "w69pipCSlM4": {"duration": 146.867, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w69pipCSlM4", "annotations": [{"segment": [11.910595943837754, 145.92467550702028], "label": "Waterskiing"}]}, "EDDbcCBnTQM": {"duration": 32.416, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=EDDbcCBnTQM", "annotations": []}, "LRRy4naeNfE": {"duration": 19.92, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LRRy4naeNfE", "annotations": []}, "ABmZdlZEZOY": {"duration": 187.501, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ABmZdlZEZOY", "annotations": [{"segment": [19.30589115444618, 160.00488578003123], "label": "Scuba diving"}]}, "EgGNBfkTmOo": {"duration": 90.976, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EgGNBfkTmOo", "annotations": []}, "u6HM4rYdllc": {"duration": 199.266, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u6HM4rYdllc", "annotations": [{"segment": [59.05739469578784, 114.69567706708268], "label": "Hula hoop"}]}, "t05CLPgsJY0": {"duration": 54.1, "subset": "testing", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=t05CLPgsJY0", "annotations": []}, "21qQL15lUNY": {"duration": 65.48, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=21qQL15lUNY", "annotations": [{"segment": [1.123686416844429, 60.168299956488056], "label": "Walking the dog"}]}, "rXTwD2K27jo": {"duration": 188.175, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rXTwD2K27jo", "annotations": []}, "x2ztI3YNu9o": {"duration": 140.95, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x2ztI3YNu9o", "annotations": [{"segment": [9.894740361043011, 53.2117143151326], "label": "Playing accordion"}, {"segment": [62.44680522932917, 87.95324680187207], "label": "Playing accordion"}, {"segment": [112.58015590639626, 128.19185721372855], "label": "Playing accordion"}]}, "G8-soqkFiI8": {"duration": 24.776, "subset": "training", "resolution": "3200x1800", "url": "https://www.youtube.com/watch?v=G8-soqkFiI8", "annotations": [{"segment": [0.01, 24.775419501133786], "label": "Building sandcastles"}]}, "R4M90w2zPVU": {"duration": 84.242, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=R4M90w2zPVU", "annotations": [{"segment": [22.26682737169518, 66.9314634525661], "label": "Playing ice hockey"}]}, "WmQC309JCAI": {"duration": 181.046, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=WmQC309JCAI", "annotations": [{"segment": [42.366439555541405, 55.35881435257409], "label": "Hurling"}]}, "YSO5CGeeTmE": {"duration": 106.975, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YSO5CGeeTmE", "annotations": [{"segment": [23.45782122546401, 94.9958653285381], "label": "Drum corps"}]}, "AUFI2wx5Z48": {"duration": 141.32, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=AUFI2wx5Z48", "annotations": [{"segment": [1.2732571600000002, 62.70473552], "label": "Doing nails"}, {"segment": [76.88720928000001, 115.55833267999999], "label": "Doing nails"}]}, "0BHufmWSI6Y": {"duration": 145.311, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0BHufmWSI6Y", "annotations": [{"segment": [22.771794851794073, 91.20047191887677], "label": "Laying tile"}]}, "xnCw4tvy0uQ": {"duration": 225.861, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xnCw4tvy0uQ", "annotations": [{"segment": [0.01, 72.23307686048939], "label": "Snow tubing"}, {"segment": [122.26769595409667, 223.04164708629165], "label": "Snow tubing"}]}, "r6z6Ct16I_8": {"duration": 66.15, "subset": "validation", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=r6z6Ct16I_8", "annotations": [{"segment": [0.6187301092043682, 8.043491419656787], "label": "Doing nails"}, {"segment": [25.169304212168484, 29.398572542901714], "label": "Doing nails"}, {"segment": [46.21249297971919, 56.01201716068642], "label": "Doing nails"}]}, "JGuVc7z_YOQ": {"duration": 89.17, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JGuVc7z_YOQ", "annotations": [{"segment": [0.01, 88.60823886288786], "label": "Playing kickball"}]}, "PsCJt5fEzvQ": {"duration": 188.616, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PsCJt5fEzvQ", "annotations": []}, "0CyA_OyN7zU": {"duration": 132.958, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0CyA_OyN7zU", "annotations": []}, "QCj7IGUGs2Y": {"duration": 126.855, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QCj7IGUGs2Y", "annotations": [{"segment": [0.01, 126.26726677067083], "label": "Swinging at the playground"}]}, "3CImIarFM30": {"duration": 10.217, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3CImIarFM30", "annotations": [{"segment": [0.20000000000000004, 10.016757369614513], "label": "Spread mulch"}]}, "bzJv-SZ41II": {"duration": 208.237, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bzJv-SZ41II", "annotations": []}, "8Fe4IEXgkXk": {"duration": 14.54, "subset": "testing", "resolution": "2276x720", "url": "https://www.youtube.com/watch?v=8Fe4IEXgkXk", "annotations": []}, "a-6lVnhqU4w": {"duration": 127.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=a-6lVnhqU4w", "annotations": [{"segment": [12.334277199999999, 15.97166675], "label": "Javelin throw"}, {"segment": [27.66936895, 100.22364114999999], "label": "Javelin throw"}]}, "JOBSEatasv4": {"duration": 133.701, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JOBSEatasv4", "annotations": [{"segment": [25.435572542901713, 127.80332761310451], "label": "Doing crunches"}]}, "QgjNH6sAziM": {"duration": 90.326, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QgjNH6sAziM", "annotations": [{"segment": [26.76295577583432, 38.57803988798385], "label": "Grooming dog"}, {"segment": [41.109843626301604, 83.16591683502435], "label": "Grooming dog"}]}, "Djlmto9iibw": {"duration": 88.63, "subset": "training", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=Djlmto9iibw", "annotations": [{"segment": [0, 88.461], "label": "Bathing dog"}]}, "1VemxWV-g8s": {"duration": 70.64, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1VemxWV-g8s", "annotations": []}, "C91n572ijKA": {"duration": 129.73, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=C91n572ijKA", "annotations": []}, "VgAP_Aq2A8g": {"duration": 49.598, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VgAP_Aq2A8g", "annotations": []}, "NrlITLsd7Fk": {"duration": 12.21, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NrlITLsd7Fk", "annotations": [{"segment": [0.1507644305772231, 11.665397815912636], "label": "Using the pommel horse"}]}, "s46pci16zS8": {"duration": 136.696, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s46pci16zS8", "annotations": []}, "KGTPkiDRpfE": {"duration": 40.22, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KGTPkiDRpfE", "annotations": [{"segment": [3.5971138845553825, 39.38057722308892], "label": "Kayaking"}]}, "yFOZHa2pn1g": {"duration": 178.38, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yFOZHa2pn1g", "annotations": [{"segment": [105.88696750000001, 157.18488010140405], "label": "Bungee jumping"}, {"segment": [8.2730325, 99.59679128549142], "label": "Bungee jumping"}]}, "7-UlUnaBjXU": {"duration": 46.37, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=7-UlUnaBjXU", "annotations": []}, "BR9dr2iOyNc": {"duration": 102.03, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=BR9dr2iOyNc", "annotations": [{"segment": [0, 101.961], "label": "Cleaning shoes"}]}, "MYB_4V-Pddc": {"duration": 206.59, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MYB_4V-Pddc", "annotations": []}, "DJNmAoSQu6k": {"duration": 147.912, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DJNmAoSQu6k", "annotations": [{"segment": [0.01, 142.473398859959], "label": "Rock-paper-scissors"}]}, "nKPkHO9ajs8": {"duration": 52.76, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nKPkHO9ajs8", "annotations": [{"segment": [2.222160527237416, 50.368971950714766], "label": "Playing lacrosse"}]}, "vmlxwwXwlEE": {"duration": 136.905, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vmlxwwXwlEE", "annotations": []}, "vvHrSeomFtg": {"duration": 214.81, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vvHrSeomFtg", "annotations": [{"segment": [19.441400936037443, 206.48108580343214], "label": "Making a sandwich"}]}, "ZL7xefcDWYc": {"duration": 51.41, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZL7xefcDWYc", "annotations": [{"segment": [3.72810816, 40.504402750390014], "label": "Using uneven bars"}]}, "xDRvD_-fvd8": {"duration": 77.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xDRvD_-fvd8", "annotations": [{"segment": [24.347460218408735, 29.771399375975037], "label": "Doing fencing"}, {"segment": [35.556934477379095, 37.605978159126366], "label": "Doing fencing"}]}, "twfpk8zB0rc": {"duration": 101.15, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=twfpk8zB0rc", "annotations": []}, "D4LoDBsIFjw": {"duration": 156.085, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=D4LoDBsIFjw", "annotations": [{"segment": [52.81834165366614, 61.824234009360374], "label": "Canoeing"}, {"segment": [72.04713884555382, 86.16448361934476], "label": "Canoeing"}, {"segment": [97.11759594383774, 99.79502340093603], "label": "Canoeing"}, {"segment": [110.27546489859594, 112.46636817472698], "label": "Canoeing"}, {"segment": [125.12492043681746, 145.32991731669267], "label": "Canoeing"}]}, "cH3H-arDsH4": {"duration": 62.65, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cH3H-arDsH4", "annotations": []}, "pbPWMH4Ep2A": {"duration": 233.616, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pbPWMH4Ep2A", "annotations": []}, "oZa-yum3mcU": {"duration": 126.25, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oZa-yum3mcU", "annotations": [{"segment": [31.506021840873636, 42.73004212168487], "label": "Horseback riding"}, {"segment": [49.818897035881434, 61.43674258970359], "label": "Horseback riding"}, {"segment": [69.70707332293291, 101.60692043681748], "label": "Horseback riding"}]}, "0S0fNrI4oVs": {"duration": 85.821, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=0S0fNrI4oVs", "annotations": [{"segment": [51.911494539781586, 64.77067706708269], "label": "Doing fencing"}]}, "fG0nn2IVdDM": {"duration": 55.055, "subset": "validation", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=fG0nn2IVdDM", "annotations": [{"segment": [0.2, 11.76672107428515], "label": "Kite flying"}, {"segment": [28.343195990639625, 40.28169369578783], "label": "Kite flying"}, {"segment": [43.803121076443055, 46.980994566302655], "label": "Kite flying"}]}, "c3vEqTcKPsU": {"duration": 79.784, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c3vEqTcKPsU", "annotations": [{"segment": [19.416953527120675, 23.026515062402492], "label": "Mooping floor"}, {"segment": [26.63607688299532, 29.623300458658345], "label": "Mooping floor"}]}, "omuYi2Vhgjo": {"duration": 233.64, "subset": "training", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=omuYi2Vhgjo", "annotations": [{"segment": [33.39869505, 157.81379894], "label": "Playing piano"}, {"segment": [169.90695358, 226.78196113104525], "label": "Playing piano"}]}, "mTFMQTutS2w": {"duration": 105.349, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=mTFMQTutS2w", "annotations": []}, "5SpWmZxECqc": {"duration": 214.25, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5SpWmZxECqc", "annotations": [{"segment": [23.731336848673944, 101.61023101404056], "label": "Brushing hair"}, {"segment": [119.3251726053042, 207.23139219968797], "label": "Brushing hair"}]}, "Ds2O0-8Labs": {"duration": 100.15, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ds2O0-8Labs", "annotations": []}, "D1E_KJRxGvQ": {"duration": 96.67, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D1E_KJRxGvQ", "annotations": [{"segment": [4.524067347893916, 90.4813469578783], "label": "Using the balance beam"}]}, "a8dUtKcAunw": {"duration": 117.03, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=a8dUtKcAunw", "annotations": [{"segment": [4.379095151326053, 110.93707716692667], "label": "Playing guitarra"}]}, "iYxj8a1TPYk": {"duration": 157.41, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iYxj8a1TPYk", "annotations": [{"segment": [23.546662719999997, 107.45929343999998], "label": "Layup drill in basketball"}]}, "vruB4ViPGuE": {"duration": 18.02, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=vruB4ViPGuE", "annotations": [{"segment": [0.8976474258970359, 11.893828393135726], "label": "Drinking coffee"}]}, "B4zPg1ftxf4": {"duration": 213.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B4zPg1ftxf4", "annotations": [{"segment": [30.281421216848674, 198.6780608424337], "label": "Paintball"}]}, "LHiui4s2X1s": {"duration": 69.684, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LHiui4s2X1s", "annotations": [{"segment": [0.01, 69.68308390022676], "label": "Beer pong"}]}, "eiN5XPmwcEM": {"duration": 216.41, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eiN5XPmwcEM", "annotations": []}, "TLJdzU44My4": {"duration": 44.26, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=TLJdzU44My4", "annotations": [{"segment": [2.589151638065523, 25.649862227769113], "label": "Vacuuming floor"}]}, "aaMrQi25WHc": {"duration": 74.095, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aaMrQi25WHc", "annotations": [{"segment": [16.414097672386895, 68.08382766926677], "label": "Surfing"}]}, "sBShp5KeRS8": {"duration": 118.93, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sBShp5KeRS8", "annotations": []}, "Kp7pUEKrb8Q": {"duration": 89.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Kp7pUEKrb8Q", "annotations": [{"segment": [8.94885631201248, 89.62839002267573], "label": "Making a lemonade"}]}, "JcsnMUVBlac": {"duration": 165.42000000000002, "subset": "validation", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=JcsnMUVBlac", "annotations": [{"segment": [0, 150.19320623400938], "label": "Playing piano"}]}, "IIPKxzPjrWg": {"duration": 103.77, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IIPKxzPjrWg", "annotations": []}, "TjMdEjgAKLw": {"duration": 51.874, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TjMdEjgAKLw", "annotations": [{"segment": [0.2, 50.791260124610595], "label": "Plastering"}]}, "G6W6wen6n7w": {"duration": 46.95, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=G6W6wen6n7w", "annotations": [{"segment": [1.0954680187207488, 46.15571918876755], "label": "High jump"}]}, "63aZHbMd-KY": {"duration": 99.057, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=63aZHbMd-KY", "annotations": []}, "j_NshVqDA-8": {"duration": 32.3, "subset": "testing", "resolution": "714x480", "url": "https://www.youtube.com/watch?v=j_NshVqDA-8", "annotations": []}, "CRH5U5XKb2Q": {"duration": 225.16, "subset": "validation", "resolution": "854x474", "url": "https://www.youtube.com/watch?v=CRH5U5XKb2Q", "annotations": [{"segment": [4.74100624024961, 217.20832293291733], "label": "Playing guitarra"}]}, "HQGwzM22zfg": {"duration": 226.79, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HQGwzM22zfg", "annotations": []}, "80pIiAmVa5Q": {"duration": 43.84, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=80pIiAmVa5Q", "annotations": [{"segment": [7.31155538221529, 43.18600936037441], "label": "Fun sliding down"}]}, "tSUzEX6ps0s": {"duration": 132.89, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=tSUzEX6ps0s", "annotations": [{"segment": [2.0731280135559165, 25.914100169448954], "label": "Making a sandwich"}, {"segment": [67.99859884463405, 113.40010234150863], "label": "Making a sandwich"}]}, "JH-Q5JNTgwE": {"duration": 215.691, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JH-Q5JNTgwE", "annotations": [{"segment": [7.4027691284522135, 10.767664186839584], "label": "Playing blackjack"}, {"segment": [104.31174681000846, 115.07941099684805], "label": "Playing blackjack"}, {"segment": [119.11728506691287, 123.15515913697773], "label": "Playing blackjack"}, {"segment": [201.89370350324216, 202.5666822870515], "label": "Playing blackjack"}]}, "af51-Bv-4Gs": {"duration": 60.26, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=af51-Bv-4Gs", "annotations": []}, "7gzD-qp0Faw": {"duration": 227.486, "subset": "testing", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=7gzD-qp0Faw", "annotations": []}, "iPc-u9aU_SQ": {"duration": 157.78, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iPc-u9aU_SQ", "annotations": [{"segment": [22.631282371294855, 157.681], "label": "Changing car wheel"}]}, "pB7kDdqxcBI": {"duration": 83.43, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=pB7kDdqxcBI", "annotations": []}, "kWh_zSLpZuQ": {"duration": 109.25, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kWh_zSLpZuQ", "annotations": [{"segment": [64.23762402496101, 78.72090795631826], "label": "Layup drill in basketball"}, {"segment": [83.32148049921997, 101.5533790951638], "label": "Layup drill in basketball"}]}, "nvsZFU73npI": {"duration": 173.755, "subset": "training", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=nvsZFU73npI", "annotations": [{"segment": [0.01, 173.75492063492064], "label": "Sumo"}]}, "Ke29p3rcs1k": {"duration": 151.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ke29p3rcs1k", "annotations": [{"segment": [68.46582888, 129.69957431045242], "label": "Polishing forniture"}]}, "u0p_dBCEDs4": {"duration": 140.39, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=u0p_dBCEDs4", "annotations": [{"segment": [59.53859076197593, 132.79391844110367], "label": "Preparing pasta"}]}, "qywEvklc1kM": {"duration": 109.483, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qywEvklc1kM", "annotations": [{"segment": [21.512583463338533, 59.93886271450859], "label": "Making an omelette"}]}, "ijNy8Fh5HfU": {"duration": 110.202, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ijNy8Fh5HfU", "annotations": []}, "LsK452h29ng": {"duration": 164.328, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LsK452h29ng", "annotations": [{"segment": [34.08638533541342, 153.26058970358815], "label": "Using the rowing machine"}]}, "j4ryUBchNiE": {"duration": 36.08, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j4ryUBchNiE", "annotations": []}, "CrWlXxqj4ac": {"duration": 167.556, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CrWlXxqj4ac", "annotations": [{"segment": [3.920948348138007, 147.1662598018721], "label": "Decorating the Christmas tree"}]}, "Wr69bnB86d0": {"duration": 95.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Wr69bnB86d0", "annotations": [{"segment": [59.632090483619336, 81.43785491419656], "label": "Drinking beer"}]}, "BdRH3lpopLQ": {"duration": 58.91, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BdRH3lpopLQ", "annotations": []}, "gU67yCVXHnQ": {"duration": 75.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gU67yCVXHnQ", "annotations": [{"segment": [3.515595317690259, 72.65563628705148], "label": "Brushing hair"}]}, "sJ0Tc4nthOA": {"duration": 61.905, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sJ0Tc4nthOA", "annotations": [{"segment": [7.575801092043681, 54.86424102964118], "label": "Doing fencing"}]}, "PjSOjB7qhcA": {"duration": 31.58, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PjSOjB7qhcA", "annotations": [{"segment": [7.67559174, 28.9737325], "label": "Dodgeball"}]}, "CZRkT9Lno3k": {"duration": 51.15, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CZRkT9Lno3k", "annotations": [{"segment": [31.57439309986841, 40.57805691139489], "label": "Pole vault"}]}, "3svMy4Kfjd8": {"duration": 116.983, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3svMy4Kfjd8", "annotations": [{"segment": [0, 33.41260215910337], "label": "Waterskiing"}, {"segment": [34.32509928483874, 116.81483893278437], "label": "Waterskiing"}]}, "vSv1qMMt4Bk": {"duration": 201.99, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vSv1qMMt4Bk", "annotations": []}, "6wTk8QqWxuo": {"duration": 49.13, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=6wTk8QqWxuo", "annotations": [{"segment": [0.15330241508980086, 48.9034703775351], "label": "Mowing the lawn"}]}, "sFMa1EP7d2M": {"duration": 221.101, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sFMa1EP7d2M", "annotations": [{"segment": [53.44767550702028, 208.27540717628705], "label": "Assembling bicycle"}]}, "iPU4muYW14Y": {"duration": 180.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iPU4muYW14Y", "annotations": [{"segment": [41.45862812, 88.30036875488625], "label": "Tai chi"}, {"segment": [12.970700079999999, 42.181704819213174], "label": "Tai chi"}]}, "FIwKvp1H_UM": {"duration": 83.592, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FIwKvp1H_UM", "annotations": []}, "eJSSRVy5n88": {"duration": 9.196, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eJSSRVy5n88", "annotations": []}, "6Pj1Ocz8MUo": {"duration": 101.402, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6Pj1Ocz8MUo", "annotations": [{"segment": [0.01, 101.241], "label": "Longboarding"}]}, "6LGYaYsMIbc": {"duration": 36.73, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6LGYaYsMIbc", "annotations": []}, "JqFFXs0RAnI": {"duration": 38.546, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JqFFXs0RAnI", "annotations": [{"segment": [0.2, 16.175722444617783], "label": "Playing rubik cube"}]}, "Si4_ER4lUME": {"duration": 39.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Si4_ER4lUME", "annotations": [{"segment": [3.4403744149765996, 36.17624385], "label": "Kayaking"}]}, "sxQbiXWFdKs": {"duration": 171.271, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sxQbiXWFdKs", "annotations": [{"segment": [35.002201865834635, 37.406933291731676], "label": "River tubing"}, {"segment": [40.61324185959439, 42.7507809048362], "label": "River tubing"}, {"segment": [46.758666614664584, 52.10251422776911], "label": "River tubing"}, {"segment": [70.00440373166927, 72.40913515756631], "label": "River tubing"}, {"segment": [82.5624456224649, 86.03594657098283], "label": "River tubing"}, {"segment": [91.37979418408737, 93.51733322932917], "label": "River tubing"}, {"segment": [105.80818273946959, 110.61764559126365], "label": "River tubing"}, {"segment": [154.704388399376, 158.17788934789394], "label": "River tubing"}]}, "6E4_XOsT89k": {"duration": 13.52, "subset": "training", "resolution": "136x240", "url": "https://www.youtube.com/watch?v=6E4_XOsT89k", "annotations": [{"segment": [0.5466993, 13.141028990639626], "label": "Plataform diving"}]}, "8v4NoUJMssI": {"duration": 55.26, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8v4NoUJMssI", "annotations": [{"segment": [30.692183444617786, 49.48683510452418], "label": "Drinking coffee"}]}, "lneRTkBTPwg": {"duration": 79.854, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lneRTkBTPwg", "annotations": [{"segment": [0, 79.821], "label": "Doing fencing"}]}, "cErtrZrWhiQ": {"duration": 84.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cErtrZrWhiQ", "annotations": [{"segment": [8.707374131045242, 83.90742344461778], "label": "Playing accordion"}]}, "8Nv52hFr0tg": {"duration": 123.62, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8Nv52hFr0tg", "annotations": [{"segment": [11.15326706, 110.3692472698908], "label": "Using the balance beam"}]}, "apwnQn2dGv0": {"duration": 20.666, "subset": "testing", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=apwnQn2dGv0", "annotations": []}, "2Ks8gsK22PA": {"duration": 109.506, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2Ks8gsK22PA", "annotations": [{"segment": [8.570090602518242, 81.19263039294803], "label": "Playing drums"}]}, "4Fxj9ObTMw8": {"duration": 17.3, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=4Fxj9ObTMw8", "annotations": [{"segment": [0.01, 1.2198859957490606], "label": "Washing face"}]}, "cy8dcwxvxdY": {"duration": 25.52, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=cy8dcwxvxdY", "annotations": []}, "px-_NixBgHM": {"duration": 44.7, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=px-_NixBgHM", "annotations": []}, "13ubLHzEW8A": {"duration": 198.95, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=13ubLHzEW8A", "annotations": []}, "FmHDfTBXEE0": {"duration": 187.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FmHDfTBXEE0", "annotations": []}, "N6vpa6BarX4": {"duration": 184.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N6vpa6BarX4", "annotations": [{"segment": [0, 37.44752459486135], "label": "Sailing"}, {"segment": [62.796618166767495, 82.38455410869496], "label": "Sailing"}]}, "djpr7UMlnSw": {"duration": 23.987, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=djpr7UMlnSw", "annotations": [{"segment": [0.19999999999999998, 23.362909516380654], "label": "Laying tile"}]}, "G1LGXWN_9v4": {"duration": 129.452, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=G1LGXWN_9v4", "annotations": [{"segment": [0.2, 129.20100000000002], "label": "Playing congas"}]}, "lVe-7FnfNz8": {"duration": 17.253, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lVe-7FnfNz8", "annotations": [{"segment": [0.009999999999999998, 17.252425], "label": "Using the monkey bar"}]}, "s5oN3d77e50": {"duration": 71.87, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=s5oN3d77e50", "annotations": [{"segment": [0.01, 71.761], "label": "Doing motocross"}]}, "B0rorWq-j44": {"duration": 47.648, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=B0rorWq-j44", "annotations": [{"segment": [18.480045023364486, 27.608741962616822], "label": "Mooping floor"}]}, "Wt7Ca_mHbL0": {"duration": 25.78, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Wt7Ca_mHbL0", "annotations": [{"segment": [13.156521060842433, 17.78343213728549], "label": "Springboard diving"}]}, "mOvBVmQrexM": {"duration": 229.646, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mOvBVmQrexM", "annotations": []}, "mhqfGS59FG0": {"duration": 79.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mhqfGS59FG0", "annotations": [{"segment": [10.11146723868955, 76.85955772230889], "label": "Playing racquetball"}]}, "ankROxowV70": {"duration": 195.257, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ankROxowV70", "annotations": []}, "Jix3poZOeZA": {"duration": 7.9, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Jix3poZOeZA", "annotations": []}, "aWz9H0jTv3c": {"duration": 170.55, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=aWz9H0jTv3c", "annotations": [{"segment": [2.9946571487100804, 137.9474242243877], "label": "Playing squash"}, {"segment": [149.63624550116077, 161.05941271144115], "label": "Playing squash"}]}, "LsVCwr6qPmY": {"duration": 154.62, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=LsVCwr6qPmY", "annotations": []}, "xaG1pLsiYqo": {"duration": 43.909, "subset": "testing", "resolution": "854x476", "url": "https://www.youtube.com/watch?v=xaG1pLsiYqo", "annotations": []}, "73n-KuFkH94": {"duration": 179.7, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=73n-KuFkH94", "annotations": [{"segment": [16.674851794071763, 22.560093603744146], "label": "Triple jump"}, {"segment": [27.044087363494537, 29.286084243369732], "label": "Triple jump"}, {"segment": [35.73182527301092, 41.617067082683306], "label": "Triple jump"}, {"segment": [79.45076443057722, 86.7372542901716], "label": "Triple jump"}, {"segment": [94.02374414976599, 109.15722308892356], "label": "Triple jump"}, {"segment": [112.52021840873634, 147.83166926677066], "label": "Triple jump"}, {"segment": [151.19466458658346, 157.64040561622465], "label": "Triple jump"}]}, "98buk_vE_z4": {"duration": 176.728, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=98buk_vE_z4", "annotations": [{"segment": [9.098274833073322, 172.86722182839313], "label": "Playing congas"}]}, "kgPvfYhmn54": {"duration": 183.019, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kgPvfYhmn54", "annotations": [{"segment": [8.05991541577041, 11.765921448505416], "label": "Rafting"}, {"segment": [83.32034491805852, 108.69223215949155], "label": "Rafting"}, {"segment": [126.9371847825445, 132.63873247724857], "label": "Rafting"}]}, "N_gMG044zh4": {"duration": 221.078, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N_gMG044zh4", "annotations": []}, "27e4y89XeyI": {"duration": 20.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=27e4y89XeyI", "annotations": [{"segment": [4.324127522620905, 16.16847682371295], "label": "Ironing clothes"}]}, "uyp6t45rOD8": {"duration": 117.68, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uyp6t45rOD8", "annotations": [{"segment": [1.562342433697348, 114.78621762870516], "label": "High jump"}]}, "Gn7Ha7l1uOk": {"duration": 203.268, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gn7Ha7l1uOk", "annotations": [{"segment": [15.53838793551742, 194.3884041729016], "label": "Playing drums"}]}, "kZJPGcmq3J8": {"duration": 97.75999999999999, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kZJPGcmq3J8", "annotations": []}, "G_qDsLIVhaA": {"duration": 30.14, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G_qDsLIVhaA", "annotations": []}, "lipavQnHm_g": {"duration": 74.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lipavQnHm_g", "annotations": [{"segment": [44.72914196567863, 52.670608424336976], "label": "Playing racquetball"}]}, "zqcJ0N_a6y8": {"duration": 81.108, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zqcJ0N_a6y8", "annotations": [{"segment": [0.01, 81.081], "label": "Tug of war"}]}, "tQ9yPS_BJHQ": {"duration": 60.721, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tQ9yPS_BJHQ", "annotations": [{"segment": [0.2, 58.144171606864276], "label": "Riding bumper cars"}]}, "y0_lTTdKkro": {"duration": 96.99, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=y0_lTTdKkro", "annotations": [{"segment": [24.360920189188516, 86.24673607352457], "label": "Futsal"}]}, "_5by7Is4JEU": {"duration": 82.431, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_5by7Is4JEU", "annotations": [{"segment": [0.2, 18.382750390015598], "label": "Rope skipping"}, {"segment": [40.750572542901715, 63.889698907956316], "label": "Rope skipping"}, {"segment": [71.98839313572543, 75.84491419656786], "label": "Rope skipping"}]}, "ABBA086Gmq0": {"duration": 200.57, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ABBA086Gmq0", "annotations": [{"segment": [10.63886754446178, 197.1319574414977], "label": "Doing motocross"}]}, "W3fKTlm1BPs": {"duration": 236.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W3fKTlm1BPs", "annotations": [{"segment": [17.35241077398198, 228.16574076131045], "label": "Playing lacrosse"}]}, "PPXrVuZAb74": {"duration": 10.08, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PPXrVuZAb74", "annotations": [{"segment": [1.5139843993759752, 7.317591263650546], "label": "Plataform diving"}]}, "Qdf3iuE0tYA": {"duration": 100.566, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Qdf3iuE0tYA", "annotations": []}, "m12BPEN6Y3s": {"duration": 180.396, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=m12BPEN6Y3s", "annotations": [{"segment": [15.755219968798754, 170.51289547581902], "label": "Baking cookies"}]}, "1IbkFHNA7fg": {"duration": 56.33, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1IbkFHNA7fg", "annotations": []}, "mglEC2-MH14": {"duration": 225.1, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mglEC2-MH14", "annotations": [{"segment": [134.84564381758943, 144.3269781485137], "label": "Removing curlers"}]}, "0DFbXnXdGIQ": {"duration": 130.43, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0DFbXnXdGIQ", "annotations": []}, "Sbgh1UQcG24": {"duration": 196.93, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Sbgh1UQcG24", "annotations": [{"segment": [3.3950387199999996, 190.8922861450858], "label": "Preparing salad"}]}, "R7vb6cuP6GE": {"duration": 79.181, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=R7vb6cuP6GE", "annotations": [{"segment": [24.21105869578783, 75.47426970982839], "label": "Snowboarding"}]}, "enx7eEDrYcc": {"duration": 14.094, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=enx7eEDrYcc", "annotations": [{"segment": [0, 13.84385647425897], "label": "Kneeling"}]}, "N1c3C_Npr-E": {"duration": 65.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N1c3C_Npr-E", "annotations": [{"segment": [0.2, 65.80535], "label": "Horseback riding"}]}, "6ffxjwTIZHk": {"duration": 105.767, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6ffxjwTIZHk", "annotations": [{"segment": [31.350453691617055, 55.935809481358845], "label": "Spread mulch"}]}, "306mYrvdzIk": {"duration": 229.39, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=306mYrvdzIk", "annotations": [{"segment": [0, 207.65668614], "label": "Volleyball"}]}, "6hOaMdO2-bY": {"duration": 49.13, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6hOaMdO2-bY", "annotations": []}, "KoCjfxd302Q": {"duration": 121.95, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=KoCjfxd302Q", "annotations": [{"segment": [2.283013998110945, 119.47773256780611], "label": "Washing dishes"}]}, "-Mpj6_BEodc": {"duration": 79.32, "subset": "testing", "resolution": "400x240", "url": "https://www.youtube.com/watch?v=-Mpj6_BEodc", "annotations": []}, "mWj6lO9PZCs": {"duration": 145.102, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mWj6lO9PZCs", "annotations": [{"segment": [16.07208464665117, 122.01202153666146], "label": "Cutting the grass"}]}, "OTm43dbEEuE": {"duration": 71.19, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=OTm43dbEEuE", "annotations": [{"segment": [11.5507135975039, 70.52599167706708], "label": "Cheerleading"}]}, "mtXxvToU6qA": {"duration": 70.055, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mtXxvToU6qA", "annotations": []}, "seQE5VZt3K0": {"duration": 181.859, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=seQE5VZt3K0", "annotations": [{"segment": [0.01, 181.801], "label": "Laying tile"}]}, "5Ia1MCFut_8": {"duration": 18.762, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=5Ia1MCFut_8", "annotations": [{"segment": [11.494227769110765, 17.09547581903276], "label": "Fun sliding down"}]}, "VgQxsmHHD4I": {"duration": 71.53999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VgQxsmHHD4I", "annotations": []}, "ZIbrxshdMu0": {"duration": 139.785, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=ZIbrxshdMu0", "annotations": [{"segment": [15.513697120666501, 29.44436378733317], "label": "Croquet"}, {"segment": [65.7946971206665, 76.67803045399984], "label": "Croquet"}]}, "VE9MAMmF1wc": {"duration": 110.5, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VE9MAMmF1wc", "annotations": [{"segment": [65.66620826833073, 96.64895397815913], "label": "Bungee jumping"}]}, "WkWCbz54JSU": {"duration": 143.268, "subset": "training", "resolution": "220x166", "url": "https://www.youtube.com/watch?v=WkWCbz54JSU", "annotations": [{"segment": [26.5144403040506, 90.6838495286427], "label": "Bullfighting"}, {"segment": [100.93313020360381, 104.72090784435032], "label": "Bullfighting"}]}, "OAHL9M7QNSY": {"duration": 90.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OAHL9M7QNSY", "annotations": []}, "wBbSb9Ubis4": {"duration": 200.249, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=wBbSb9Ubis4", "annotations": [{"segment": [0.01, 195.2504766857341], "label": "Bullfighting"}]}, "F6cNWYlfUs8": {"duration": 47.114, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F6cNWYlfUs8", "annotations": [{"segment": [0.01, 37.11733296542746], "label": "Waxing skis"}]}, "m4ef0fCA2WU": {"duration": 31.719, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m4ef0fCA2WU", "annotations": [{"segment": [12.257235569422777, 27.18338533541342], "label": "Swinging at the playground"}]}, "MFx2omPfM2o": {"duration": 108.066, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=MFx2omPfM2o", "annotations": [{"segment": [0.2, 96.13306528994438], "label": "Riding bumper cars"}]}, "dOUCAVnJLko": {"duration": 206.29, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=dOUCAVnJLko", "annotations": [{"segment": [6.919111790321953, 109.25760245647922], "label": "Bungee jumping"}]}, "lc_xgozMTWo": {"duration": 54.36, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lc_xgozMTWo", "annotations": []}, "KTIzccovgeg": {"duration": 65.25, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=KTIzccovgeg", "annotations": [{"segment": [26.872840474258968, 46.00956020592823], "label": "Shoveling snow"}]}, "jE2jDJ9WHeE": {"duration": 80.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jE2jDJ9WHeE", "annotations": [{"segment": [10.06220592, 68.37630826], "label": "Playing lacrosse"}]}, "AL1W3HkjJ2Y": {"duration": 45.67, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=AL1W3HkjJ2Y", "annotations": []}, "A8xThM3onkc": {"duration": 73.24, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=A8xThM3onkc", "annotations": [{"segment": [8.734645046021841, 65.70964998673948], "label": "Mixing drinks"}]}, "fgFU8vdsA8k": {"duration": 142.73, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fgFU8vdsA8k", "annotations": [{"segment": [70.36450444140215, 142.5330612244898], "label": "Bungee jumping"}]}, "O7SljQqahy8": {"duration": 37.106, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=O7SljQqahy8", "annotations": [{"segment": [0.20000000000000004, 36.881], "label": "Cleaning sink"}]}, "at3YFTd6nEw": {"duration": 222.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=at3YFTd6nEw", "annotations": [{"segment": [45.55122855798585, 180.46631772209665], "label": "Getting a piercing"}]}, "n5xfkpQ8PWM": {"duration": 15.23, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=n5xfkpQ8PWM", "annotations": [{"segment": [1.7005004799999999, 7.16650368], "label": "Wrapping presents"}]}, "CF2HhBzVSaA": {"duration": 219.92000000000002, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CF2HhBzVSaA", "annotations": []}, "2aHetC-N-P4": {"duration": 180.35, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2aHetC-N-P4", "annotations": [{"segment": [0.01, 180.34938775510204], "label": "Cutting the grass"}]}, "fg_R9Vrr1KI": {"duration": 50.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fg_R9Vrr1KI", "annotations": [{"segment": [1.2640922808112325, 44.00621252574103], "label": "Using parallel bars"}]}, "96krk6Ka9Vc": {"duration": 115.752, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=96krk6Ka9Vc", "annotations": [{"segment": [56.34081008424337, 91.55381638689548], "label": "Hurling"}]}, "MmYNcmba_Ps": {"duration": 30.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MmYNcmba_Ps", "annotations": [{"segment": [11.808983619344772, 22.406180187207486], "label": "Plataform diving"}]}, "2Lrcn6XiiSY": {"duration": 209.328, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2Lrcn6XiiSY", "annotations": []}, "KVaTsulE5Z0": {"duration": 110.922, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KVaTsulE5Z0", "annotations": [{"segment": [7.671219046811601, 101.48815146269945], "label": "Slacklining"}]}, "mXbrIyhXbQo": {"duration": 31.231, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mXbrIyhXbQo", "annotations": [{"segment": [0.01, 31.181], "label": "Powerbocking"}]}, "PfhsPS1-NB8": {"duration": 107.16, "subset": "testing", "resolution": "608x1080", "url": "https://www.youtube.com/watch?v=PfhsPS1-NB8", "annotations": []}, "oYDD-65IT4E": {"duration": 137.509, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oYDD-65IT4E", "annotations": []}, "x3WVWHn3teI": {"duration": 129.55, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x3WVWHn3teI", "annotations": [{"segment": [13.742590694103956, 29.101956763984848], "label": "Doing motocross"}]}, "-boJqGIgV9I": {"duration": 67.408, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-boJqGIgV9I", "annotations": [{"segment": [36.38534218720749, 44.1671783775351], "label": "Throwing darts"}]}, "ij9czNGMR5I": {"duration": 55.57, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=ij9czNGMR5I", "annotations": []}, "5PkSdg7BdnU": {"duration": 89.722, "subset": "testing", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=5PkSdg7BdnU", "annotations": []}, "x06fLYM58wM": {"duration": 143.04, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=x06fLYM58wM", "annotations": [{"segment": [2.67771742706443, 133.88587113884554], "label": "Ballet"}]}, "4qONyTNaPOI": {"duration": 204.428, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4qONyTNaPOI", "annotations": [{"segment": [37.94299977713395, 65.36399121270973], "label": "Waterskiing"}, {"segment": [72.0598147027922, 85.13261294533415], "label": "Waterskiing"}, {"segment": [97.56771371263014, 125.30755388582891], "label": "Waterskiing"}, {"segment": [135.1918647521411, 150.49660415804388], "label": "Waterskiing"}, {"segment": [154.0039402718966, 156.87357828393135], "label": "Waterskiing"}, {"segment": [160.6997631201248, 178.55529235569423], "label": "Waterskiing"}, {"segment": [182.38147719188765, 182.70032592823713], "label": "Waterskiing"}]}, "jVoj7XaUoU8": {"duration": 162.24, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jVoj7XaUoU8", "annotations": [{"segment": [15.814937597503901, 48.20392979719189], "label": "Ballet"}, {"segment": [64.14538689547582, 151.19080343213727], "label": "Ballet"}]}, "COIwJdDOmcc": {"duration": 199.251, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=COIwJdDOmcc", "annotations": [{"segment": [135.19677847113886, 145.14228861154447], "label": "Changing car wheel"}]}, "cBMi-cIbI2c": {"duration": 20.76, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cBMi-cIbI2c", "annotations": [{"segment": [5.019639453978159, 19.916633962558503], "label": "Discus throw"}]}, "b4DhjwkO-b4": {"duration": 65.71, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=b4DhjwkO-b4", "annotations": [{"segment": [8.728164452187249, 61.134371056548616], "label": "Dodgeball"}]}, "r10jGF9lQYM": {"duration": 216.573, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r10jGF9lQYM", "annotations": [{"segment": [0.01, 202.99079719188768], "label": "Ice fishing"}]}, "hDV6wQKvKCg": {"duration": 44.42, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hDV6wQKvKCg", "annotations": [{"segment": [0.2, 44.160999999999994], "label": "Swinging at the playground"}]}, "n9Fqjdwh-L8": {"duration": 88.515, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n9Fqjdwh-L8", "annotations": [{"segment": [14.913513960966602, 34.79819924225541], "label": "Capoeira"}, {"segment": [36.04099207233595, 82.43859001872075], "label": "Capoeira"}]}, "gIFI03m-ATM": {"duration": 54.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gIFI03m-ATM", "annotations": [{"segment": [5.648868954758191, 16.94660686427457], "label": "Paintball"}]}, "MpM-4y7oPuU": {"duration": 80.899, "subset": "testing", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=MpM-4y7oPuU", "annotations": []}, "tydn-vo3DaY": {"duration": 221.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tydn-vo3DaY", "annotations": [{"segment": [100.85718488331369, 104.3111980642491], "label": "Rock climbing"}, {"segment": [126.76228374032921, 144.37775096309974], "label": "Rock climbing"}, {"segment": [154.73979050590594, 164.06562609443154], "label": "Rock climbing"}]}, "1qO8Z8SnrhU": {"duration": 126.223, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1qO8Z8SnrhU", "annotations": []}, "2dWPKpM7jME": {"duration": 68.36, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2dWPKpM7jME", "annotations": []}, "0w-3O0ZOQFQ": {"duration": 72.539, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0w-3O0ZOQFQ", "annotations": [{"segment": [35.507799377916015, 47.131287713841374], "label": "Mooping floor"}]}, "q-mpNUe0JNo": {"duration": 137.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q-mpNUe0JNo", "annotations": [{"segment": [0.01, 120.11632137285491], "label": "Skateboarding"}]}, "ijChwOwYDWc": {"duration": 210.234, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ijChwOwYDWc", "annotations": [{"segment": [0.01, 210.181], "label": "Ice fishing"}]}, "0XRZ2F5B7fQ": {"duration": 61.533, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0XRZ2F5B7fQ", "annotations": [{"segment": [0, 32.44635376599064], "label": "Removing ice from car"}]}, "kmzWLxPtAsU": {"duration": 99.986, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=kmzWLxPtAsU", "annotations": []}, "N4qhryre_TE": {"duration": 180.28, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=N4qhryre_TE", "annotations": []}, "Az7bzVcLMSM": {"duration": 184.48, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Az7bzVcLMSM", "annotations": [{"segment": [10.073150830795138, 177.863063240897], "label": "Belly dance"}]}, "qCy-68g0oXw": {"duration": 107.602, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qCy-68g0oXw", "annotations": [{"segment": [0.01, 106.32748377474533], "label": "Beer pong"}]}, "NIY1f2KcEe0": {"duration": 30.372, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=NIY1f2KcEe0", "annotations": [{"segment": [0.2, 30.17170068027211], "label": "Cutting the grass"}]}, "RFc5HiWSToY": {"duration": 16.88, "subset": "testing", "resolution": "406x720", "url": "https://www.youtube.com/watch?v=RFc5HiWSToY", "annotations": []}, "1jWMd8QaN5s": {"duration": 38.035, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1jWMd8QaN5s", "annotations": [{"segment": [0.2, 36.163198127925114], "label": "Table soccer"}]}, "99dCcB5Unmo": {"duration": 181.07, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=99dCcB5Unmo", "annotations": [{"segment": [34.462469594383776, 160.16573983619347], "label": "Ballet"}]}, "YeNlYRWhFzE": {"duration": 211.511, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YeNlYRWhFzE", "annotations": [{"segment": [8.907225317584132, 33.64951786642894], "label": "Surfing"}, {"segment": [46.18561275784365, 141.85581061337692], "label": "Surfing"}, {"segment": [149.44344699502264, 160.65995295049896], "label": "Surfing"}]}, "N88-LuWK_K0": {"duration": 149.653, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=N88-LuWK_K0", "annotations": [{"segment": [18.175822429906543, 111.63311370716512], "label": "Calf roping"}]}, "n6k21NjvqXE": {"duration": 94.343, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n6k21NjvqXE", "annotations": [{"segment": [35.0288485397816, 75.35617837129486], "label": "Rafting"}]}, "OUPEzXXawCs": {"duration": 83.24, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OUPEzXXawCs", "annotations": [{"segment": [38.69981370046802, 83.243559], "label": "Playing polo"}]}, "uFmEm88xyhY": {"duration": 75.256, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uFmEm88xyhY", "annotations": []}, "NZ5iioSnCO8": {"duration": 68.708, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NZ5iioSnCO8", "annotations": [{"segment": [0.10708424336973481, 68.641], "label": "Shuffleboard"}]}, "AWPlbtK7afY": {"duration": 188.678, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AWPlbtK7afY", "annotations": [{"segment": [6.475790951638066, 113.04396879875195], "label": "Rollerblading"}]}, "Fpivz0Yie_Q": {"duration": 201.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fpivz0Yie_Q", "annotations": [{"segment": [68.92977568354435, 123.25711597171369], "label": "Doing motocross"}, {"segment": [129.85176421478627, 173.50205496655238], "label": "Doing motocross"}]}, "jqZg_FK2OlM": {"duration": 25.426, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=jqZg_FK2OlM", "annotations": [{"segment": [9.312379095163807, 21.35903120124805], "label": "Painting fence"}]}, "KBMvitQaXzE": {"duration": 22.454, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KBMvitQaXzE", "annotations": [{"segment": [3.4153437974253666, 21.10507414966871], "label": "Camel ride"}]}, "Zg7J_rLXbuk": {"duration": 99.71000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Zg7J_rLXbuk", "annotations": [{"segment": [7.932916573805809, 94.41726100468019], "label": "Using the balance beam"}]}, "Hi4YiQO47EM": {"duration": 79.157, "subset": "training", "resolution": "406x720", "url": "https://www.youtube.com/watch?v=Hi4YiQO47EM", "annotations": [{"segment": [70.2932275212484, 73.98639193804466], "label": "Beer pong"}]}, "RiLEPcZ2t2A": {"duration": 230.459, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RiLEPcZ2t2A", "annotations": []}, "twCNBLY4Y_o": {"duration": 127.57, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=twCNBLY4Y_o", "annotations": []}, "rBy_0F04_50": {"duration": 74.14, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=rBy_0F04_50", "annotations": []}, "l7rzaG3aDyw": {"duration": 229.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l7rzaG3aDyw", "annotations": [{"segment": [0, 229.341], "label": "Belly dance"}]}, "vVvImml1A8g": {"duration": 55.148, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vVvImml1A8g", "annotations": [{"segment": [0.2, 53.260543966971184], "label": "Riding bumper cars"}]}, "yRnM5oD4oJk": {"duration": 20.32, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yRnM5oD4oJk", "annotations": []}, "MoPg7NqyhLI": {"duration": 69.94, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MoPg7NqyhLI", "annotations": [{"segment": [5.455371840873635, 8.837702382215289], "label": "Archery"}, {"segment": [20.403090684867394, 47.13441270514821], "label": "Archery"}]}, "RaQE93FNLQI": {"duration": 98.058, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RaQE93FNLQI", "annotations": [{"segment": [4.130363541341654, 95.76324358814352], "label": "Baking cookies"}]}, "Bri_myFFu4A": {"duration": 65.9, "subset": "training", "resolution": "640x346", "url": "https://www.youtube.com/watch?v=Bri_myFFu4A", "annotations": [{"segment": [23.850701728549144, 59.93516856786272], "label": "Playing violin"}]}, "E8cnLtJEQhk": {"duration": 63.646, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E8cnLtJEQhk", "annotations": []}, "gPofktt3hUw": {"duration": 76.255, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=gPofktt3hUw", "annotations": [{"segment": [0.19999999999999998, 59.004910577223086], "label": "Plastering"}, {"segment": [67.21325499219968, 76.05433], "label": "Plastering"}]}, "OzAqwPQ-APY": {"duration": 63.623, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OzAqwPQ-APY", "annotations": [{"segment": [7.044728549141966, 41.07771294851794], "label": "Carving jack-o-lanterns"}]}, "7jGKlEvH_uw": {"duration": 149.444, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7jGKlEvH_uw", "annotations": [{"segment": [56.62734009360375, 121.88347737909517], "label": "Hula hoop"}]}, "S1cz3vUvi-Y": {"duration": 94.16, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S1cz3vUvi-Y", "annotations": []}, "6q3EIv2X8BQ": {"duration": 33.34, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=6q3EIv2X8BQ", "annotations": [{"segment": [20.203982194027052, 23.162388736083127], "label": "Washing hands"}]}, "Qx4YLntwZ5s": {"duration": 225.164, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Qx4YLntwZ5s", "annotations": [{"segment": [70.93666458658346, 207.20965678627147], "label": "Blow-drying hair"}]}, "On4V94fVLpg": {"duration": 165.791, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=On4V94fVLpg", "annotations": [{"segment": [43.43389703588144, 45.502177847113884], "label": "Carving jack-o-lanterns"}, {"segment": [75.4922496099844, 94.11302340093604], "label": "Carving jack-o-lanterns"}, {"segment": [117.38272698907957, 127.21462090483621], "label": "Carving jack-o-lanterns"}]}, "q7Ap96aMkZ8": {"duration": 17.276, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q7Ap96aMkZ8", "annotations": [{"segment": [0.2, 16.918738317757008], "label": "Running a marathon"}]}, "QT4d1nTQYSE": {"duration": 198.49, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QT4d1nTQYSE", "annotations": []}, "WL4iqWa_1Z0": {"duration": 40.06, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WL4iqWa_1Z0", "annotations": [{"segment": [27.284207488299533, 34.33939157566303], "label": "Drinking coffee"}]}, "g4OlXwjgwSs": {"duration": 155.09, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=g4OlXwjgwSs", "annotations": [{"segment": [6.893280016380655, 18.986753729329173], "label": "Shot put"}, {"segment": [22.856665317472697, 36.401355875975035], "label": "Shot put"}, {"segment": [46.07613484633385, 58.653347507800305], "label": "Shot put"}, {"segment": [75.58421070592823, 80.17973071684867], "label": "Shot put"}, {"segment": [89.85450968720748, 94.93376864664586], "label": "Shot put"}, {"segment": [113.31584869032761, 117.66949922698907], "label": "Shot put"}, {"segment": [127.34427819734789, 133.14914557956317], "label": "Shot put"}]}, "Cl0lsFUR5HE": {"duration": 188.291, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Cl0lsFUR5HE", "annotations": [{"segment": [0.2, 188.09009070294786], "label": "Longboarding"}, {"segment": [24.012111100531456, 173.94139016726442], "label": "Longboarding"}]}, "3haFn-jQefE": {"duration": 193.68, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=3haFn-jQefE", "annotations": []}, "4-BW75h1D1A": {"duration": 216.85, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4-BW75h1D1A", "annotations": [{"segment": [0, 204.82638578636383], "label": "Cumbia"}]}, "Y1UwPTU61uk": {"duration": 99.96000000000001, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Y1UwPTU61uk", "annotations": [{"segment": [1.8713617116113217, 98.09054132137285], "label": "Using the balance beam"}]}, "iSHPVCBsnLw": {"duration": 62.091, "subset": "training", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=iSHPVCBsnLw", "annotations": [{"segment": [46.39800321216848, 59.668413316692664], "label": "Rock-paper-scissors"}]}, "sy8WjZwaNw4": {"duration": 62.23, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=sy8WjZwaNw4", "annotations": []}, "yOKDaH8Go0A": {"duration": 128.151, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yOKDaH8Go0A", "annotations": [{"segment": [37.38553215132605, 128.1504081632653], "label": "Painting furniture"}]}, "fJB7KO1hoUo": {"duration": 147.77, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fJB7KO1hoUo", "annotations": []}, "tzChPctqAP4": {"duration": 125.481, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tzChPctqAP4", "annotations": [{"segment": [11.90407267520798, 118.84557801969933], "label": "Capoeira"}]}, "-HKsU_uFUWo": {"duration": 119.281, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-HKsU_uFUWo", "annotations": [{"segment": [58.8053791832775, 64.37055650377421], "label": "Fun sliding down"}, {"segment": [101.10073157064824, 113.71513512896861], "label": "Fun sliding down"}]}, "-TIFhuhUePA": {"duration": 64.993, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-TIFhuhUePA", "annotations": []}, "ZojEQYIV_o8": {"duration": 198.69, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZojEQYIV_o8", "annotations": [{"segment": [90.20209398580025, 120.88940431086631], "label": "Polishing forniture"}]}, "4NPVpI9zUpI": {"duration": 149.05, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4NPVpI9zUpI", "annotations": []}, "F5n6TMMDLrg": {"duration": 91.14, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F5n6TMMDLrg", "annotations": []}, "bz62JE0qMSk": {"duration": 192.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bz62JE0qMSk", "annotations": [{"segment": [89.27502662011243, 165.49639224882998], "label": "Playing badminton"}]}, "4ksF9BUdQJM": {"duration": 61.603, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4ksF9BUdQJM", "annotations": [{"segment": [4.805183227737273, 39.114191473781396], "label": "Cutting the grass"}, {"segment": [54.68298513165016, 61.602448979591834], "label": "Cutting the grass"}]}, "xRuZMDClaQM": {"duration": 189.708, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xRuZMDClaQM", "annotations": [{"segment": [5.917659906396256, 18.344745709828395], "label": "Doing fencing"}, {"segment": [35.801842433697345, 82.25547269890795], "label": "Doing fencing"}, {"segment": [97.93727145085803, 117.16966614664588], "label": "Doing fencing"}, {"segment": [129.59675195007802, 144.98266770670827], "label": "Doing fencing"}]}, "PMdba6f_cho": {"duration": 120.303, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PMdba6f_cho", "annotations": [{"segment": [13.60676663801246, 93.37057106774067], "label": "Waterskiing"}]}, "Qci4EFEIZuo": {"duration": 57.238, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Qci4EFEIZuo", "annotations": [{"segment": [2.4068751950078004, 57.121], "label": "Drum corps"}]}, "ui_CNb4FUtQ": {"duration": 238.42000000000002, "subset": "training", "resolution": "640x354", "url": "https://www.youtube.com/watch?v=ui_CNb4FUtQ", "annotations": [{"segment": [11.530546729352167, 32.73187458654809], "label": "Playing saxophone"}, {"segment": [43.14656195499521, 57.65273364676083], "label": "Playing saxophone"}]}, "EkriUlmSGkE": {"duration": 6.04, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=EkriUlmSGkE", "annotations": []}, "nTY4IaGRFIc": {"duration": 125.88, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nTY4IaGRFIc", "annotations": []}, "BO0vQ6ASVlo": {"duration": 94.134, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BO0vQ6ASVlo", "annotations": [{"segment": [5.433614286068042, 11.014083012300084], "label": "Disc dog"}, {"segment": [12.335772973776095, 21.000184911076442], "label": "Disc dog"}, {"segment": [22.468729310452417, 26.874362508580344], "label": "Disc dog"}, {"segment": [27.60863470826833, 29.95830574726989], "label": "Disc dog"}, {"segment": [30.10516018720749, 31.133141266770668], "label": "Disc dog"}, {"segment": [36.713609984399376, 38.32900882371295], "label": "Disc dog"}, {"segment": [40.238116542901714, 42.88149646177847], "label": "Disc dog"}, {"segment": [51.69276285803432, 61.38515589391575], "label": "Disc dog"}, {"segment": [64.46909913260531, 68.72787789079563], "label": "Disc dog"}, {"segment": [68.87473233073322, 75.18947324804992], "label": "Disc dog"}, {"segment": [75.48318212792512, 82.091631925117], "label": "Disc dog"}, {"segment": [83.85388520436817, 86.79097400312013], "label": "Disc dog"}, {"segment": [89.43435392199687, 93.933696], "label": "Disc dog"}]}, "Y7gywSk5i0M": {"duration": 9.59, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Y7gywSk5i0M", "annotations": [{"segment": [0.01, 7.763875647425897], "label": "Using uneven bars"}]}, "Xmp_twSbkAg": {"duration": 72.47, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Xmp_twSbkAg", "annotations": []}, "1517CiM5c0A": {"duration": 214.27, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1517CiM5c0A", "annotations": [{"segment": [8.101258880597408, 203.68445458153198], "label": "Playing flauta"}]}, "ccirM2NGwMA": {"duration": 16.998, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=ccirM2NGwMA", "annotations": [{"segment": [0.4242622152886116, 16.042415015600625], "label": "Blow-drying hair"}]}, "s3DoV1L9zAM": {"duration": 134.816, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=s3DoV1L9zAM", "annotations": [{"segment": [0.01, 131.0818775351014], "label": "Playing drums"}]}, "DXu_aHrZaUs": {"duration": 119.908, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=DXu_aHrZaUs", "annotations": [{"segment": [9.594747013805069, 113.38227972408544], "label": "Laying tile"}]}, "tBFX7g605Go": {"duration": 146.66, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tBFX7g605Go", "annotations": [{"segment": [95.38370826833074, 134.72662870514822], "label": "Washing face"}]}, "P6cR-26pTSY": {"duration": 171.13, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P6cR-26pTSY", "annotations": [{"segment": [10.278542951241858, 113.33094760525114], "label": "Doing step aerobics"}]}, "Ovtfld_ZyCs": {"duration": 220.47, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ovtfld_ZyCs", "annotations": [{"segment": [34.39522979719189, 188.48585928861155], "label": "Breakdancing"}]}, "E8HSjhQ7Z2o": {"duration": 73.26, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=E8HSjhQ7Z2o", "annotations": []}, "p377oL-EM_E": {"duration": 39.47, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=p377oL-EM_E", "annotations": [{"segment": [8.92935810452418, 37.626467599063965], "label": "Shot put"}]}, "4GrPMa_BE6M": {"duration": 22.291, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=4GrPMa_BE6M", "annotations": [{"segment": [1.0780433775351015, 20.030741466458657], "label": "Arm wrestling"}]}, "bhRHD3bBNZY": {"duration": 188.779, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=bhRHD3bBNZY", "annotations": [{"segment": [15.076551141510581, 23.015798709696732], "label": "Hula hoop"}, {"segment": [66.2405910253769, 102.11422818532914], "label": "Hula hoop"}, {"segment": [124.75578606497115, 184.15311835843374], "label": "Hula hoop"}]}, "aF0gDTbcOxE": {"duration": 214.576, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aF0gDTbcOxE", "annotations": [{"segment": [13.386645865834634, 194.79385647425897], "label": "Sumo"}]}, "RGMaWJNYcKQ": {"duration": 133.492, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=RGMaWJNYcKQ", "annotations": []}, "IVnPfvBZxKE": {"duration": 115.59, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=IVnPfvBZxKE", "annotations": [{"segment": [6.170635602219424, 100.15416056483625], "label": "Making a sandwich"}]}, "YG_jr8Efdxk": {"duration": 78.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YG_jr8Efdxk", "annotations": [{"segment": [21.036046521060847, 50.06825107644306], "label": "Snatch"}]}, "bDiwuABU45I": {"duration": 35.156, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=bDiwuABU45I", "annotations": [{"segment": [0, 4.579474911856474], "label": "Applying sunscreen"}, {"segment": [7.541051501560063, 29.972251968018725], "label": "Applying sunscreen"}]}, "OUY4TkdjnyA": {"duration": 173.012, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OUY4TkdjnyA", "annotations": [{"segment": [26.36883639889989, 67.2674461611044], "label": "Table soccer"}, {"segment": [144.7595479596395, 160.36559632355863], "label": "Table soccer"}]}, "I52lhI6txNo": {"duration": 60.094, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=I52lhI6txNo", "annotations": [{"segment": [17.00635179407176, 48.30178705148206], "label": "Snow tubing"}]}, "sFQ343Qq4TE": {"duration": 105.42, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=sFQ343Qq4TE", "annotations": [{"segment": [4.111489629653213, 98.34683091107645], "label": "Doing step aerobics"}]}, "YP42JEe3V7o": {"duration": 135.582, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YP42JEe3V7o", "annotations": []}, "iNMxIXAcHYU": {"duration": 40.38, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iNMxIXAcHYU", "annotations": [{"segment": [2.66801872074883, 40.24], "label": "Putting on makeup"}]}, "vOOXaGo3fis": {"duration": 164.398, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vOOXaGo3fis", "annotations": []}, "TDROfnEk0NQ": {"duration": 220.47, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TDROfnEk0NQ", "annotations": [{"segment": [25.16307234353994, 40.27340348372686], "label": "Shaving"}, {"segment": [45.08123611924088, 79.07948118466145], "label": "Shaving"}, {"segment": [109.98697669868012, 133.33930664260538], "label": "Shaving"}, {"segment": [137.80372266129694, 199.2752970725119], "label": "Shaving"}]}, "7vcihMnmssI": {"duration": 168.856, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=7vcihMnmssI", "annotations": []}, "rNJBvIoPlXM": {"duration": 67.385, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rNJBvIoPlXM", "annotations": []}, "sbvk9FNaAH0": {"duration": 178.26, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sbvk9FNaAH0", "annotations": [{"segment": [70.35847113884554, 151.56271450858034], "label": "Windsurfing"}]}, "dUBkspFNj-E": {"duration": 159.568, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dUBkspFNj-E", "annotations": [{"segment": [13.191377535101402, 159.541], "label": "Playing pool"}]}, "WFqr6QPsszQ": {"duration": 150.91, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WFqr6QPsszQ", "annotations": [{"segment": [10.15144662, 121.59619947581903], "label": "Wrapping presents"}]}, "GFQuGc1yY8o": {"duration": 106.14, "subset": "training", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=GFQuGc1yY8o", "annotations": [{"segment": [9.433010920436818, 97.6399375975039], "label": "Tango"}]}, "hs-8typVf2A": {"duration": 106.49000000000001, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=hs-8typVf2A", "annotations": []}, "sZ-S-YZBpTs": {"duration": 16.069, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sZ-S-YZBpTs", "annotations": []}, "-zfOvigQLdA": {"duration": 24.29, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-zfOvigQLdA", "annotations": [{"segment": [1.5459312, 13.821716068642745], "label": "Springboard diving"}, {"segment": [16.62009360374415, 19.68318252730109], "label": "Springboard diving"}]}, "vu-3Zi94F0M": {"duration": 18.25, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vu-3Zi94F0M", "annotations": [{"segment": [0.7402854258970359, 17.53907009048362], "label": "Kayaking"}]}, "9uxkazuxmDw": {"duration": 237.77, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9uxkazuxmDw", "annotations": [{"segment": [0.01, 237.77233560090704], "label": "Playing lacrosse"}]}, "l5ZoWgR0vhM": {"duration": 60.35, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l5ZoWgR0vhM", "annotations": []}, "xafFWMX7ZeY": {"duration": 228.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xafFWMX7ZeY", "annotations": [{"segment": [9.273347129803561, 217.56699035308353], "label": "Walking the dog"}]}, "gOUqdgxwNPw": {"duration": 224.12, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gOUqdgxwNPw", "annotations": [{"segment": [13.983213728549142, 212.8944290171607], "label": "Wakeboarding"}]}, "03xBbhjqQzw": {"duration": 23.708, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=03xBbhjqQzw", "annotations": []}, "oDZlW0OgEgg": {"duration": 134.12, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oDZlW0OgEgg", "annotations": [{"segment": [27.816088923556944, 112.5360031201248], "label": "Polishing forniture"}]}, "-Wmrg_0jxA8": {"duration": 93.159, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-Wmrg_0jxA8", "annotations": [{"segment": [9.301314768236988, 10.173313027759205], "label": "Rock-paper-scissors"}]}, "jWH-2aa1gJY": {"duration": 41.19, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jWH-2aa1gJY", "annotations": []}, "pRzFL4_I-cE": {"duration": 134.328, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pRzFL4_I-cE", "annotations": [{"segment": [19.698563398318242, 119.23917485803433], "label": "Playing blackjack"}]}, "2DwBXRhtX4s": {"duration": 48.414, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2DwBXRhtX4s", "annotations": [{"segment": [14.19930975351014, 38.14176290171607], "label": "Doing fencing"}]}, "Wgh8e4V8hBc": {"duration": 11.936, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Wgh8e4V8hBc", "annotations": [{"segment": [0.48410523556942275, 1.8619432137285492], "label": "Hitting a pinata"}, {"segment": [4.17075279875195, 5.287918726989079], "label": "Hitting a pinata"}]}, "NFErgnaSRRY": {"duration": 78.298, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NFErgnaSRRY", "annotations": [{"segment": [8.672598716068642, 16.245853932917317], "label": "Carving jack-o-lanterns"}]}, "bCsxg077oP8": {"duration": 48.925, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=bCsxg077oP8", "annotations": []}, "qsEnLQ2UnEA": {"duration": 26.89, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qsEnLQ2UnEA", "annotations": []}, "90tOAABCbJI": {"duration": 201.06, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=90tOAABCbJI", "annotations": []}, "No2adeap68I": {"duration": 108.368, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=No2adeap68I", "annotations": [{"segment": [25.280136233504546, 78.87402952168392], "label": "Hanging wallpaper"}]}, "MDdCBN1o2sE": {"duration": 43.422, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MDdCBN1o2sE", "annotations": [{"segment": [0.2, 38.30521996879875], "label": "Kite flying"}]}, "5O1ttcUIUKk": {"duration": 95.41, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5O1ttcUIUKk", "annotations": [{"segment": [24.175571938377534, 64.764308], "label": "Playing lacrosse"}]}, "sCj-ME5RkLY": {"duration": 71.89, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sCj-ME5RkLY", "annotations": [{"segment": [9.932571762870515, 68.51791029641186], "label": "Volleyball"}]}, "o6lTgyb_AkM": {"duration": 171.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=o6lTgyb_AkM", "annotations": [{"segment": [29.514699608392494, 163.94074055207105], "label": "Painting"}]}, "SZXscHYG26Y": {"duration": 80.64, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=SZXscHYG26Y", "annotations": [{"segment": [0, 80.26545423400935], "label": "Windsurfing"}]}, "dFgwKTH-FhY": {"duration": 15.674, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dFgwKTH-FhY", "annotations": [{"segment": [0.01, 15.146136403607187], "label": "River tubing"}]}, "w_Z7rx4tmQk": {"duration": 26.262, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=w_Z7rx4tmQk", "annotations": []}, "vHejQlbAL78": {"duration": 189.87, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=vHejQlbAL78", "annotations": [{"segment": [26.9129795282599, 183.3631554517134], "label": "Skiing"}]}, "h3thb-S-3L8": {"duration": 168.23, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=h3thb-S-3L8", "annotations": [{"segment": [38.842045404056165, 131.4855726177847], "label": "Preparing pasta"}]}, "7UwRRkgQSj4": {"duration": 45.86, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7UwRRkgQSj4", "annotations": []}, "sQZaEt-ssCs": {"duration": 104.63, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sQZaEt-ssCs", "annotations": [{"segment": [7.8298408736349465, 23.000157566302654], "label": "Arm wrestling"}, {"segment": [31.319363494539786, 90.22354602184087], "label": "Arm wrestling"}]}, "5Ym-k30X5PY": {"duration": 177.54, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5Ym-k30X5PY", "annotations": []}, "hYgheCsbtco": {"duration": 48.07, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hYgheCsbtco", "annotations": [{"segment": [0, 48.065282], "label": "Playing violin"}]}, "QBXswoKU4S4": {"duration": 164.79, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=QBXswoKU4S4", "annotations": [{"segment": [4.627965678627145, 21.340063962558503], "label": "Cricket"}, {"segment": [47.822312012480495, 106.95742901716068], "label": "Cricket"}]}, "b0MmqWHdhls": {"duration": 233.733, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=b0MmqWHdhls", "annotations": []}, "-h1jccLMbjM": {"duration": 39.29, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-h1jccLMbjM", "annotations": []}, "JD6SB3k9GkY": {"duration": 162.03, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JD6SB3k9GkY", "annotations": []}, "_Wq1OGf0MHM": {"duration": 143.175, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_Wq1OGf0MHM", "annotations": [{"segment": [15.406160686427457, 112.77099063962558], "label": "Rafting"}]}, "0ixULg-DK_4": {"duration": 7.38, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0ixULg-DK_4", "annotations": []}, "b87ec6jTm_M": {"duration": 125.992, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b87ec6jTm_M", "annotations": [{"segment": [25.472543498939526, 27.431970151923164], "label": "Drum corps"}, {"segment": [35.66156209445445, 36.83721808624463], "label": "Drum corps"}, {"segment": [41.53984205340537, 46.046523355267745], "label": "Drum corps"}, {"segment": [103.26178015789718, 107.57251873332487], "label": "Drum corps"}]}, "Xj1R81SK_zs": {"duration": 95.48, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Xj1R81SK_zs", "annotations": [{"segment": [36.94095568174727, 40.66484234321373], "label": "Surfing"}, {"segment": [43.94186260530421, 74.32877776287052], "label": "Surfing"}]}, "ggGw9EGSMs8": {"duration": 19.32, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ggGw9EGSMs8", "annotations": [{"segment": [3.4851294068824736, 18.567328020566237], "label": "Painting fence"}]}, "HgYuRKZ_V90": {"duration": 159.43, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=HgYuRKZ_V90", "annotations": []}, "nO7weoDrM5k": {"duration": 25.29, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=nO7weoDrM5k", "annotations": []}, "aA6Bchzww4Y": {"duration": 187.269, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=aA6Bchzww4Y", "annotations": [{"segment": [0.01, 187.26895691609977], "label": "Removing ice from car"}]}, "86Yl3F2HSik": {"duration": 91.37, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=86Yl3F2HSik", "annotations": [{"segment": [5.1315685553822155, 87.94938329641187], "label": "Tennis serve with ball bouncing"}]}, "5Y4YkCkgShc": {"duration": 193.887, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5Y4YkCkgShc", "annotations": [{"segment": [50.95924859731347, 146.847064059098], "label": "Making a cake"}]}, "b5GJUtjiWkQ": {"duration": 42.28, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=b5GJUtjiWkQ", "annotations": [{"segment": [0, 36.61054989079563], "label": "Hopscotch"}]}, "BiIIco4EC2U": {"duration": 180.071, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BiIIco4EC2U", "annotations": [{"segment": [0.2, 175.58300379341767], "label": "Playing rubik cube"}]}, "DfOiHMcrCbs": {"duration": 81.22, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DfOiHMcrCbs", "annotations": [{"segment": [39.4079322199688, 50.305302544461775], "label": "Smoking a cigarette"}]}, "HlhQ3-WOdgI": {"duration": 19.09, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HlhQ3-WOdgI", "annotations": [{"segment": [1.485257410296412, 11.703828393135726], "label": "Drinking coffee"}]}, "pgff9mC5y3s": {"duration": 26.123, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pgff9mC5y3s", "annotations": [{"segment": [5.635286271450858, 13.243939937597505], "label": "Hitting a pinata"}, {"segment": [17.434802652106086, 22.92768096723869], "label": "Hitting a pinata"}]}, "bt4qUkWab3k": {"duration": 113.85, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bt4qUkWab3k", "annotations": []}, "0eR57IP6paM": {"duration": 219.499, "subset": "testing", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=0eR57IP6paM", "annotations": []}, "BJM1rUjXvkQ": {"duration": 223.33, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=BJM1rUjXvkQ", "annotations": [{"segment": [8.000901943331849, 213.241430054888], "label": "Making a cake"}]}, "zz3Mw8FMA70": {"duration": 121.88, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=zz3Mw8FMA70", "annotations": [{"segment": [0, 121.68154], "label": "Smoking hookah"}]}, "KT8DUDFc3Xg": {"duration": 60.07, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=KT8DUDFc3Xg", "annotations": []}, "sV6NoTdFaPE": {"duration": 86.24, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sV6NoTdFaPE", "annotations": [{"segment": [18.028103197597293, 45.19819817999999], "label": "Playing field hockey"}]}, "D-XCWhvStd4": {"duration": 130.427, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D-XCWhvStd4", "annotations": [{"segment": [26.08529710999224, 58.38718333429131], "label": "Waxing skis"}, {"segment": [70.57657436232869, 118.52151240594239], "label": "Waxing skis"}]}, "nI6l7BP0odw": {"duration": 232.97, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nI6l7BP0odw", "annotations": []}, "DmFnPx4z_XA": {"duration": 162.958, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DmFnPx4z_XA", "annotations": []}, "5yfDyVVkPuU": {"duration": 94.67, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5yfDyVVkPuU", "annotations": [{"segment": [3.4553455, 84.32962173790952], "label": "Cricket"}]}, "HhbTyPnFVIg": {"duration": 83.76, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=HhbTyPnFVIg", "annotations": [{"segment": [6.33515256, 59.044894920000004], "label": "Chopping wood"}]}, "DYwF_1xX4dU": {"duration": 215.02, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DYwF_1xX4dU", "annotations": [{"segment": [46.729584360000004, 192.3327535101404], "label": "Playing flauta"}]}, "fZ_CmvF-_No": {"duration": 226.02, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fZ_CmvF-_No", "annotations": [{"segment": [3.878092043681747, 221.0512464898596], "label": "Cheerleading"}]}, "VvsYrfEvvUw": {"duration": 12.38, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VvsYrfEvvUw", "annotations": []}, "iRyBbZlZv5Q": {"duration": 162.215, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iRyBbZlZv5Q", "annotations": []}, "r-XQMXUJBqk": {"duration": 87.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r-XQMXUJBqk", "annotations": [{"segment": [0.01, 0.02], "label": "Hopscotch"}]}, "MZiBlBhpq-Y": {"duration": 163.72, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MZiBlBhpq-Y", "annotations": [{"segment": [1.4849676100000004, 31.5352078798752], "label": "Playing guitarra"}, {"segment": [34.005267100000005, 155.8886187098284], "label": "Playing guitarra"}]}, "ePaFTey15ho": {"duration": 199.46, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ePaFTey15ho", "annotations": [{"segment": [27.374864274570985, 169.24340717628706], "label": "Mixing drinks"}]}, "2Peh_gdQCjg": {"duration": 141.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2Peh_gdQCjg", "annotations": [{"segment": [78.7431524, 135.54137741653668], "label": "Playing badminton"}, {"segment": [11.4866202, 66.77102255999999], "label": "Playing badminton"}]}, "fyxXJJhCGBQ": {"duration": 40.54, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fyxXJJhCGBQ", "annotations": [{"segment": [8.348750620904838, 27.829168736349455], "label": "Chopping wood"}]}, "dQR6VEemP24": {"duration": 54.1, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dQR6VEemP24", "annotations": [{"segment": [0.8440326521060844, 37.221839957878316], "label": "Bathing dog"}]}, "AjDBsuCwTyk": {"duration": 14.44, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=AjDBsuCwTyk", "annotations": []}, "YU-dUWGOoD4": {"duration": 97.2, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YU-dUWGOoD4", "annotations": []}, "uwQDvcWzBy0": {"duration": 156.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uwQDvcWzBy0", "annotations": [{"segment": [30.974732250000002, 150.23097113884558], "label": "Wrapping presents"}]}, "DQXJ8OU7Ox4": {"duration": 170.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DQXJ8OU7Ox4", "annotations": [{"segment": [20.9691225, 28.17863650546022], "label": "Clean and jerk"}, {"segment": [30.039489859594386, 35.09037753510141], "label": "Clean and jerk"}, {"segment": [45.72382527301092, 54.76225585023402], "label": "Clean and jerk"}, {"segment": [69.3832464898596, 78.95334945397816], "label": "Clean and jerk"}, {"segment": [90.65014196567863, 101.54942589703589], "label": "Clean and jerk"}, {"segment": [125.4746833073323, 140.37798439937598], "label": "Clean and jerk"}, {"segment": [155.00069110764431, 159.2545694227769], "label": "Clean and jerk"}]}, "hPQEkPZmNsU": {"duration": 154.48, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hPQEkPZmNsU", "annotations": [{"segment": [15.173892322932916, 141.3821395803432], "label": "Getting a haircut"}]}, "vfJnUQ9Tax0": {"duration": 32.28, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vfJnUQ9Tax0", "annotations": [{"segment": [7.613873583249957, 23.114356449293567], "label": "Washing hands"}]}, "Q6Mov3tWyYg": {"duration": 146.054, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q6Mov3tWyYg", "annotations": []}, "fU-OulK7lZs": {"duration": 144.498, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=fU-OulK7lZs", "annotations": [{"segment": [0.01, 108.40209204368176], "label": "Futsal"}]}, "KC_nNsohkmU": {"duration": 191.01, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=KC_nNsohkmU", "annotations": []}, "TnXeZsrYpUA": {"duration": 10.101, "subset": "testing", "resolution": "240x240", "url": "https://www.youtube.com/watch?v=TnXeZsrYpUA", "annotations": []}, "UTiSAR1o2nU": {"duration": 138.949, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UTiSAR1o2nU", "annotations": [{"segment": [8.453947170131704, 128.76011693915757], "label": "Slacklining"}]}, "RYJ3yzxZB8k": {"duration": 154.32, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RYJ3yzxZB8k", "annotations": [{"segment": [7.517136614802983, 10.401619205346988], "label": "Breakdancing"}, {"segment": [67.3701503685911, 139.96296078481546], "label": "Breakdancing"}]}, "bnx74ZKCrBQ": {"duration": 92.3, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bnx74ZKCrBQ", "annotations": []}, "FaDD5MyZj88": {"duration": 141.67000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FaDD5MyZj88", "annotations": [{"segment": [3.9781106957878314, 110.50307488299532], "label": "Triple jump"}]}, "XazKuBawFCM": {"duration": 134.21, "subset": "training", "resolution": "2560x810", "url": "https://www.youtube.com/watch?v=XazKuBawFCM", "annotations": [{"segment": [6.480499219968799, 123.2958018720749], "label": "Discus throw"}]}, "fjnOZFkhub4": {"duration": 89.815, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=fjnOZFkhub4", "annotations": []}, "3JNNjd9fv7U": {"duration": 156.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3JNNjd9fv7U", "annotations": [{"segment": [17.54776507956318, 84.81419788455538], "label": "Vacuuming floor"}]}, "fe4r2-Y8k1U": {"duration": 212.161, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fe4r2-Y8k1U", "annotations": []}, "_fLhtWSCaSo": {"duration": 77.021, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_fLhtWSCaSo", "annotations": []}, "jpSbO4LrEnc": {"duration": 13.398, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=jpSbO4LrEnc", "annotations": [{"segment": [0.01, 13.009748226236123], "label": "Rollerblading"}]}, "UaO7bS5Ky6M": {"duration": 182.74, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=UaO7bS5Ky6M", "annotations": []}, "sfbgZOHLkII": {"duration": 122.23, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sfbgZOHLkII", "annotations": []}, "Q6XjxUlbP2M": {"duration": 37.454, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Q6XjxUlbP2M", "annotations": [{"segment": [0.2, 36.40811232449298], "label": "Riding bumper cars"}]}, "sz0GhFkkXYI": {"duration": 118.144, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sz0GhFkkXYI", "annotations": [{"segment": [0.01, 105.70216010273064], "label": "Swinging at the playground"}]}, "0m63LNVHP1U": {"duration": 136.766, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0m63LNVHP1U", "annotations": [{"segment": [13.222249609984399, 110.68302496099844], "label": "Swinging at the playground"}]}, "2w3FSD235TY": {"duration": 205.73, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2w3FSD235TY", "annotations": [{"segment": [119.84164118564743, 145.51053354134163], "label": "Horseback riding"}]}, "F-Z17rHI_ms": {"duration": 36.8, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=F-Z17rHI_ms", "annotations": []}, "JWb0-VdfZI4": {"duration": 56.45, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JWb0-VdfZI4", "annotations": [{"segment": [10.15425083, 36.16051267], "label": "Hopscotch"}, {"segment": [40.43411504, 53.80436817472699], "label": "Hopscotch"}]}, "Ep5LuX_2gfI": {"duration": 175.59, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ep5LuX_2gfI", "annotations": []}, "jrCsCqylsmA": {"duration": 63.298, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jrCsCqylsmA", "annotations": []}, "sGUkc9ajgiU": {"duration": 31.63, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sGUkc9ajgiU", "annotations": [{"segment": [5.48915125, 25.77785375], "label": "Polishing shoes"}]}, "K6Tg4ZlgF8Y": {"duration": 128.662, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K6Tg4ZlgF8Y", "annotations": []}, "snvSHNYvRks": {"duration": 35.5, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=snvSHNYvRks", "annotations": [{"segment": [3.397371294851794, 32.627257], "label": "Shot put"}]}, "xxzNNwzKSAo": {"duration": 152.068, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xxzNNwzKSAo", "annotations": []}, "lL2XqxgNIeQ": {"duration": 108.461, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lL2XqxgNIeQ", "annotations": [{"segment": [39.91787207488299, 46.51446957878316], "label": "River tubing"}, {"segment": [62.41396099843994, 65.62768798751951], "label": "River tubing"}, {"segment": [78.1443088923557, 83.40314040561623], "label": "River tubing"}, {"segment": [92.87692511700467, 96.59876911076444], "label": "River tubing"}]}, "sZRUTtoxY_s": {"duration": 45.47, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=sZRUTtoxY_s", "annotations": [{"segment": [5.603290062402496, 43.549621497659906], "label": "Doing kickboxing"}]}, "oG4zf8dAQKw": {"duration": 37.57, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=oG4zf8dAQKw", "annotations": [{"segment": [19.869252753510143, 31.064023478939156], "label": "Gargling mouthwash"}]}, "nVJ5SmO86O4": {"duration": 79.2, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=nVJ5SmO86O4", "annotations": []}, "WznRr7xFs08": {"duration": 202.896, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WznRr7xFs08", "annotations": []}, "Rvc6Q33VEjA": {"duration": 196.63, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rvc6Q33VEjA", "annotations": []}, "LslQvR_4bkk": {"duration": 169.994, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=LslQvR_4bkk", "annotations": [{"segment": [6.359101404056163, 169.841], "label": "Playing ice hockey"}]}, "vREvYuCYk0E": {"duration": 7.43, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vREvYuCYk0E", "annotations": []}, "SMUWOVJ3bzE": {"duration": 69.96000000000001, "subset": "training", "resolution": "632x360", "url": "https://www.youtube.com/watch?v=SMUWOVJ3bzE", "annotations": [{"segment": [1.5280251294851794, 66.57823778471138], "label": "Playing piano"}]}, "KSP62q7tM68": {"duration": 80.551, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KSP62q7tM68", "annotations": [{"segment": [18.581734789391575, 47.333202808112325], "label": "Surfing"}]}, "hvhny4-sI-w": {"duration": 63.28, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hvhny4-sI-w", "annotations": []}, "HhlWUEbHBUk": {"duration": 90.33, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HhlWUEbHBUk", "annotations": [{"segment": [20.704067082683306, 38.027878315132604], "label": "Mixing drinks"}, {"segment": [67.18258502340095, 87.1824321372855], "label": "Mixing drinks"}]}, "o5j6N8O6HS4": {"duration": 30.024, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o5j6N8O6HS4", "annotations": [{"segment": [4.121777223088923, 30.0234], "label": "Using the rowing machine"}]}, "vKShqwBJrME": {"duration": 82.896, "subset": "training", "resolution": "704x480", "url": "https://www.youtube.com/watch?v=vKShqwBJrME", "annotations": [{"segment": [4.519210799584632, 63.01070974454828], "label": "Playing rubik cube"}]}, "LOLS94lyERc": {"duration": 169.599, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LOLS94lyERc", "annotations": []}, "P4PQ5tC3gX8": {"duration": 80.39, "subset": "validation", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=P4PQ5tC3gX8", "annotations": [{"segment": [8.76057526, 75.97177295553823], "label": "Playing accordion"}]}, "F30odTEdsxo": {"duration": 41.518, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F30odTEdsxo", "annotations": [{"segment": [0.2, 41.317277], "label": "Playing blackjack"}]}, "Vcxv3nyZpSs": {"duration": 37.385, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=Vcxv3nyZpSs", "annotations": [{"segment": [1.5163323322048528, 32.95106707488299], "label": "Cutting the grass"}]}, "_Ew3g9PXhvo": {"duration": 133.419, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_Ew3g9PXhvo", "annotations": [{"segment": [23.519983347581903, 128.42327190670827], "label": "Slacklining"}]}, "Si6LZFiQT3k": {"duration": 190.92, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Si6LZFiQT3k", "annotations": [{"segment": [7.404078003120125, 179.4065054602184], "label": "Starting a campfire"}]}, "3eQqgH3PJBo": {"duration": 157.06, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3eQqgH3PJBo", "annotations": [{"segment": [0.010000000000000002, 156.38508978130065], "label": "Playing pool"}]}, "ydJsJgdaEq4": {"duration": 109.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ydJsJgdaEq4", "annotations": [{"segment": [61.278944461778465, 91.57797811232449], "label": "Cleaning windows"}]}, "WdKelyOqZvU": {"duration": 179.096, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WdKelyOqZvU", "annotations": []}, "AhsujXLZYWA": {"duration": 61.998, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=AhsujXLZYWA", "annotations": [{"segment": [0.2, 17.109355694227766], "label": "Clipping cat claws"}, {"segment": [25.905691107644305, 46.881567862714505], "label": "Clipping cat claws"}]}, "0VHUSC4YFT4": {"duration": 86.52, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0VHUSC4YFT4", "annotations": [{"segment": [64.51698803463339, 79.49896642761311], "label": "Bungee jumping"}]}, "SqZh6ytnQ3g": {"duration": 235.6, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SqZh6ytnQ3g", "annotations": []}, "6nMQRUhOcwM": {"duration": 49.691, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6nMQRUhOcwM", "annotations": [{"segment": [0.01, 49.661], "label": "Disc dog"}]}, "n04N7AuqZsY": {"duration": 231.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n04N7AuqZsY", "annotations": [{"segment": [50.01319266770671, 58.31863260530422], "label": "Playing field hockey"}, {"segment": [66.62407254290171, 85.04048283931357], "label": "Playing field hockey"}, {"segment": [93.70702886115444, 111.04012090483619], "label": "Playing field hockey"}, {"segment": [133.06759204368174, 145.70630499219968], "label": "Playing field hockey"}, {"segment": [163.03939703588142, 173.8725795631825], "label": "Playing field hockey"}, {"segment": [185.42797425897035, 196.62226287051482], "label": "Playing field hockey"}]}, "OjiIamJOwlc": {"duration": 75.976, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OjiIamJOwlc", "annotations": []}, "9OTkoHkDszQ": {"duration": 126.9, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9OTkoHkDszQ", "annotations": []}, "IxaBEVd2PZM": {"duration": 191.36, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=IxaBEVd2PZM", "annotations": []}, "j0JsoWxrGh0": {"duration": 100.102, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=j0JsoWxrGh0", "annotations": [{"segment": [57.468409691489704, 75.73961603362093], "label": "Sharpening knives"}]}, "VSsZZZHn1L0": {"duration": 198.996, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VSsZZZHn1L0", "annotations": []}, "GfiqDJA-qqU": {"duration": 190.752, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GfiqDJA-qqU", "annotations": [{"segment": [34.817434493298094, 77.66966156197267], "label": "Baton twirling"}, {"segment": [80.34792575376484, 162.18377568642745], "label": "Baton twirling"}]}, "m1aF1CVo-s8": {"duration": 94.506, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m1aF1CVo-s8", "annotations": [{"segment": [19.123566978193146, 75.17032866043614], "label": "Elliptical trainer"}]}, "Dd5yTxwKzXw": {"duration": 235.265, "subset": "testing", "resolution": "2276x720", "url": "https://www.youtube.com/watch?v=Dd5yTxwKzXw", "annotations": []}, "zzci2xZ011A": {"duration": 48.53, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zzci2xZ011A", "annotations": [{"segment": [17.629848673946956, 20.959090483619345], "label": "Beach soccer"}, {"segment": [24.137003120124803, 27.99589703588143], "label": "Beach soccer"}, {"segment": [41.99384555382215, 45.17175819032761], "label": "Beach soccer"}]}, "suWTtKXXkJU": {"duration": 89.119, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=suWTtKXXkJU", "annotations": [{"segment": [8.337004680187206, 58.7851216848674], "label": "Knitting"}, {"segment": [63.64913884555383, 72.6823135725429], "label": "Knitting"}]}, "79jhPh0PzWQ": {"duration": 14.675, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=79jhPh0PzWQ", "annotations": [{"segment": [2.674439268153776, 7.965267928589912], "label": "Croquet"}]}, "Tk3zk2pJ9KA": {"duration": 131.193, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tk3zk2pJ9KA", "annotations": [{"segment": [69.5811856474259, 92.29739625585024], "label": "Rafting"}, {"segment": [101.0973697347894, 119.31126833073324], "label": "Rafting"}]}, "H4wC2d_Vbog": {"duration": 160.66, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H4wC2d_Vbog", "annotations": [{"segment": [0, 158.86694851794073], "label": "Doing step aerobics"}]}, "Xrjkjz1l4qw": {"duration": 15.093, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Xrjkjz1l4qw", "annotations": [{"segment": [0.2, 13.841105919003114], "label": "Playing rubik cube"}]}, "K0MzjnMzbj4": {"duration": 175.78, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K0MzjnMzbj4", "annotations": [{"segment": [12.202767229329172, 162.7492663057722], "label": "Layup drill in basketball"}]}, "xe6-tTvxQxk": {"duration": 75.51, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xe6-tTvxQxk", "annotations": [{"segment": [0.01, 75.51077097505669], "label": "Tennis serve with ball bouncing"}]}, "tzbJ_CETeG8": {"duration": 193.423, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=tzbJ_CETeG8", "annotations": [{"segment": [47.52574102964119, 82.83057722308892], "label": "Camel ride"}, {"segment": [102.14262437164153, 106.97063615877968], "label": "Camel ride"}]}, "nShRXseBM4Y": {"duration": 71.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nShRXseBM4Y", "annotations": [{"segment": [31.00017217, 34.0298208650546], "label": "Making a sandwich"}, {"segment": [44.16636325039002, 48.62198627691107], "label": "Making a sandwich"}]}, "X1MbVBqHFOk": {"duration": 62.369, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=X1MbVBqHFOk", "annotations": []}, "XOTqx9kP-3w": {"duration": 92.16, "subset": "testing", "resolution": "592x480", "url": "https://www.youtube.com/watch?v=XOTqx9kP-3w", "annotations": []}, "8Mdnvm8olC0": {"duration": 94.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8Mdnvm8olC0", "annotations": [{"segment": [18.51975371531868, 20.445808101711823], "label": "Long jump"}, {"segment": [38.372929698140304, 40.891616203423645], "label": "Long jump"}, {"segment": [56.596367354013886, 58.37426371068448], "label": "Long jump"}, {"segment": [62.522688508580345, 65.63400713104524], "label": "Long jump"}, {"segment": [67.70821954602184, 71.70848634633386], "label": "Long jump"}]}, "YYetK3WzRdc": {"duration": 152.37, "subset": "testing", "resolution": "582x480", "url": "https://www.youtube.com/watch?v=YYetK3WzRdc", "annotations": []}, "Sw4QZTF7sOI": {"duration": 78.09, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=Sw4QZTF7sOI", "annotations": [{"segment": [12.883132, 78.0887074829932], "label": "Preparing salad"}]}, "PIr3WhkKB1o": {"duration": 137.439, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=PIr3WhkKB1o", "annotations": [{"segment": [39.75690405616225, 44.47200858034321], "label": "Applying sunscreen"}, {"segment": [51.544665366614666, 54.75950936037441], "label": "Applying sunscreen"}, {"segment": [71.2623751950078, 73.61992745709829], "label": "Applying sunscreen"}]}, "otGlbzsPIi0": {"duration": 66.827, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=otGlbzsPIi0", "annotations": [{"segment": [11.147530421216851, 63.674588143525746], "label": "Making an omelette"}]}, "Ti1ZaH0VGfg": {"duration": 81.55, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ti1ZaH0VGfg", "annotations": [{"segment": [2.4173572542901716, 73.02963494539782], "label": "Playing bagpipes"}]}, "ESecNZbZgug": {"duration": 91.557, "subset": "validation", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=ESecNZbZgug", "annotations": [{"segment": [10.426846374535254, 91.35628117913832], "label": "Elliptical trainer"}]}, "fxbEiZrQQzM": {"duration": 65.783, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fxbEiZrQQzM", "annotations": [{"segment": [3.3865995945960288, 41.562812878315135], "label": "Cutting the grass"}, {"segment": [54.49346577379096, 63.52439795475819], "label": "Cutting the grass"}]}, "CKWWRS9CpTY": {"duration": 85.566, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CKWWRS9CpTY", "annotations": [{"segment": [6.54089082527301, 83.02926721060841], "label": "Running a marathon"}]}, "vaXGI0UyZ8s": {"duration": 143.34, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vaXGI0UyZ8s", "annotations": []}, "55ziFpzx5oc": {"duration": 68.71000000000001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=55ziFpzx5oc", "annotations": [{"segment": [0.01, 63.02685221840873], "label": "Playing guitarra"}]}, "tww0AR8NanU": {"duration": 203.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tww0AR8NanU", "annotations": [{"segment": [10.153314584284052, 44.73804199843994], "label": "Playing harmonica"}]}, "JjUQ6dWZIL0": {"duration": 189.406, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=JjUQ6dWZIL0", "annotations": [{"segment": [28.95742053354134, 166.06194224336974], "label": "Capoeira"}]}, "UYe6JGaUZzg": {"duration": 117.354, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UYe6JGaUZzg", "annotations": [{"segment": [0.2, 117.101], "label": "Baton twirling"}]}, "z9MMLl1isUk": {"duration": 41.053, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z9MMLl1isUk", "annotations": [{"segment": [3.010117492979719, 31.830391361934474], "label": "Hitting a pinata"}]}, "r-iXUXMP4DY": {"duration": 233.13, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r-iXUXMP4DY", "annotations": [{"segment": [31.641444596559374, 204.03276197035882], "label": "Ironing clothes"}]}, "RoPO0M2qmBg": {"duration": 87.93, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RoPO0M2qmBg", "annotations": []}, "X82bc2v5kcM": {"duration": 116.309, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X82bc2v5kcM", "annotations": [{"segment": [20.866624594383776, 107.23630552418096], "label": "Blow-drying hair"}]}, "iMXdwkAGfM4": {"duration": 218.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iMXdwkAGfM4", "annotations": [{"segment": [0.01, 123.63576131045241], "label": "Shaving"}, {"segment": [146.45558502340094, 198.56652574102964], "label": "Shaving"}]}, "U7oDqpIYsxI": {"duration": 73.26, "subset": "validation", "resolution": "312x240", "url": "https://www.youtube.com/watch?v=U7oDqpIYsxI", "annotations": [{"segment": [36.22946844602184, 38.85810495787831], "label": "Spinning"}]}, "ranTpEJvqs8": {"duration": 53.546, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ranTpEJvqs8", "annotations": [{"segment": [19.78857566302652, 53.501], "label": "BMX"}]}, "wott7JRSkOk": {"duration": 5.411, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wott7JRSkOk", "annotations": [{"segment": [0.2, 5.210249433106576], "label": "Table soccer"}]}, "vaT6Saf9n3E": {"duration": 224.61, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vaT6Saf9n3E", "annotations": []}, "yl37hI-Bgkk": {"duration": 57.47, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yl37hI-Bgkk", "annotations": [{"segment": [21.248255913910025, 54.06201821134069], "label": "Getting a haircut"}]}, "upoS4Jct7kE": {"duration": 90.535, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=upoS4Jct7kE", "annotations": [{"segment": [44.63370828390555, 65.19055914549186], "label": "Sharpening knives"}]}, "kgmcYBRYkAk": {"duration": 16.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kgmcYBRYkAk", "annotations": [{"segment": [0, 13.98448557], "label": "Hopscotch"}]}, "lrlUN65DM8c": {"duration": 109.03999999999999, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lrlUN65DM8c", "annotations": [{"segment": [8.930807433697348, 14.374347202808112], "label": "Discus throw"}]}, "-aWU5Yj_OPw": {"duration": 15.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-aWU5Yj_OPw", "annotations": [{"segment": [0.40027253198127927, 14.645265581903276], "label": "Playing piano"}]}, "s9Wop4PMZaw": {"duration": 42.934, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s9Wop4PMZaw", "annotations": [{"segment": [0.2, 27.576210021598683], "label": "Camel ride"}]}, "p81NOkb2rww": {"duration": 100.52, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p81NOkb2rww", "annotations": [{"segment": [35.72290410174314, 94.5762118049574], "label": "Futsal"}]}, "p3vqC_FFyyM": {"duration": 147.308, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=p3vqC_FFyyM", "annotations": [{"segment": [0.01, 141.15953212570474], "label": "Roof shingle removal"}]}, "KzxVQ19pRUU": {"duration": 235.4, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KzxVQ19pRUU", "annotations": [{"segment": [55.63758328292315, 200.69926907008255], "label": "Mixing drinks"}]}, "eOmL8f-yB40": {"duration": 172.13, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eOmL8f-yB40", "annotations": [{"segment": [12.406390389732607, 166.84035392286094], "label": "Playing piano"}]}, "om1vXrpctVE": {"duration": 153.368, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=om1vXrpctVE", "annotations": [{"segment": [41.740844073340845, 126.89217220496595], "label": "Sumo"}]}, "bxxSc2m08Sk": {"duration": 44.931, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bxxSc2m08Sk", "annotations": [{"segment": [0.2, 41.38559501557632], "label": "Capoeira"}]}, "R4aCl2dLS1w": {"duration": 163.72, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=R4aCl2dLS1w", "annotations": [{"segment": [10.852197043681748, 136.73768275039004], "label": "Doing kickboxing"}]}, "Xbk1XePzTIA": {"duration": 59.93, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Xbk1XePzTIA", "annotations": [{"segment": [0.9584839219779613, 51.43580635188451], "label": "Starting a campfire"}]}, "CX5QUmM97DI": {"duration": 219.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CX5QUmM97DI", "annotations": [{"segment": [54.04964353903655, 184.5014535078839], "label": "Chopping wood"}]}, "ufK2mbJI0to": {"duration": 170.296, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ufK2mbJI0to", "annotations": [{"segment": [1.8596973822152885, 168.96679072698905], "label": "Futsal"}]}, "Ey7w7pu5HZc": {"duration": 86.43, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ey7w7pu5HZc", "annotations": [{"segment": [0.2, 83.37302808112325], "label": "Cumbia"}]}, "DMw9Cb_Xy2A": {"duration": 97.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DMw9Cb_Xy2A", "annotations": [{"segment": [0.2, 3.4826309001560065], "label": "Archery"}, {"segment": [8.933705352574103, 10.447892700468019], "label": "Archery"}, {"segment": [15.141873478939159, 17.110317031201248], "label": "Archery"}, {"segment": [22.409972748829954, 24.529835035881437], "label": "Archery"}, {"segment": [29.829490753510143, 31.949353040561626], "label": "Archery"}, {"segment": [37.24900875819033, 38.61177737129485], "label": "Archery"}, {"segment": [42.700083210608426, 45.879876641185646], "label": "Archery"}, {"segment": [49.816763745709835, 64.6557997550702], "label": "Archery"}]}, "dRiIYsAmNAc": {"duration": 64.761, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dRiIYsAmNAc", "annotations": [{"segment": [0.2, 63.750171177847115], "label": "Beach soccer"}]}, "gnmtsqvTO_c": {"duration": 178.7, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=gnmtsqvTO_c", "annotations": [{"segment": [92.55640555962376, 120.99240967733947], "label": "Washing hands"}]}, "5k2Ot6-wOgc": {"duration": 190.543, "subset": "testing", "resolution": "543x360", "url": "https://www.youtube.com/watch?v=5k2Ot6-wOgc", "annotations": []}, "2GACaR0GdD8": {"duration": 146.05, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2GACaR0GdD8", "annotations": [{"segment": [4.441528081123245, 17.42445631825273], "label": "Horseback riding"}, {"segment": [20.841016380655226, 32.68509126365055], "label": "Horseback riding"}, {"segment": [100.57452652106083, 137.70623166926677], "label": "Horseback riding"}]}, "mxIsgAmJHsM": {"duration": 88.074, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mxIsgAmJHsM", "annotations": [{"segment": [13.454131045241809, 80.45021216848674], "label": "Baton twirling"}]}, "PAws7ziV3ns": {"duration": 68.66, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=PAws7ziV3ns", "annotations": []}, "rUANGr4sQlY": {"duration": 102.28, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rUANGr4sQlY", "annotations": []}, "2-hF-v79XyQ": {"duration": 118.98, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2-hF-v79XyQ", "annotations": [{"segment": [19.675019828004004, 113.03855546957878], "label": "Futsal"}]}, "zxOcSuLSrU4": {"duration": 39.358, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zxOcSuLSrU4", "annotations": [{"segment": [0.01, 38.375412441497666], "label": "Cutting the grass"}]}, "JxbmHo84AC4": {"duration": 156.5, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JxbmHo84AC4", "annotations": [{"segment": [46.38907355694228, 70.31606939157567], "label": "Bungee jumping"}, {"segment": [78.61727202808112, 137.70230255850234], "label": "Bungee jumping"}]}, "t3dHI5TeY7I": {"duration": 168.81, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=t3dHI5TeY7I", "annotations": [{"segment": [99.78224492979719, 108.7336864274571], "label": "Putting on shoes"}, {"segment": [158.49317004680188, 163.75872386895475], "label": "Putting on shoes"}]}, "4vdclFJFrdk": {"duration": 97.71000000000001, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4vdclFJFrdk", "annotations": [{"segment": [2.12126239, 90.54521697602597], "label": "Tumbling"}]}, "PMy2EyktRmo": {"duration": 84.99, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=PMy2EyktRmo", "annotations": [{"segment": [3.0917543000000003, 81.95687825351014], "label": "Spinning"}]}, "jaRSbVCkxlU": {"duration": 237.239, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jaRSbVCkxlU", "annotations": [{"segment": [10.36298244310725, 81.79353999738221], "label": "Making a cake"}, {"segment": [102.51950488359671, 210.5906060452418], "label": "Making a cake"}]}, "FFZOmzFc_us": {"duration": 12.68, "subset": "training", "resolution": "270x360", "url": "https://www.youtube.com/watch?v=FFZOmzFc_us", "annotations": [{"segment": [0.13845033650546024, 11.82761446146646], "label": "Washing dishes"}]}, "RJus6sogyiM": {"duration": 85.589, "subset": "training", "resolution": "144x176", "url": "https://www.youtube.com/watch?v=RJus6sogyiM", "annotations": [{"segment": [17.99880655226209, 45.607647425897035], "label": "Painting fence"}, {"segment": [51.07523088923557, 85.28099999999999], "label": "Painting fence"}]}, "lZKrd84QElk": {"duration": 65.83, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lZKrd84QElk", "annotations": [{"segment": [50.19077691107645, 61.378496099844], "label": "Drinking beer"}]}, "WPaHiE-UJvY": {"duration": 177.66, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WPaHiE-UJvY", "annotations": []}, "P8CZCX42X-o": {"duration": 227.208, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P8CZCX42X-o", "annotations": []}, "sH_716hH_rY": {"duration": 63.39, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sH_716hH_rY", "annotations": []}, "JSDZWq777Mc": {"duration": 171.224, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JSDZWq777Mc", "annotations": [{"segment": [12.421081855519118, 43.67412652424464], "label": "Cutting the grass"}, {"segment": [66.64644995595742, 117.93349761745571], "label": "Cutting the grass"}, {"segment": [150.52214248569942, 158.26862218408735], "label": "Cutting the grass"}]}, "Eu3QFCldg0s": {"duration": 154.53, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Eu3QFCldg0s", "annotations": [{"segment": [0.01, 125.11746105772231], "label": "Walking the dog"}]}, "YD7pb5-CZdI": {"duration": 140.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YD7pb5-CZdI", "annotations": [{"segment": [12.719608474258969, 40.35186136661466], "label": "Shot put"}, {"segment": [63.598042371294845, 90.13377729173166], "label": "Shot put"}]}, "blMN_9ZtxsY": {"duration": 184.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=blMN_9ZtxsY", "annotations": [{"segment": [4.315977373788829, 14.098859421043509], "label": "Ping-pong"}, {"segment": [27.334523367329254, 34.527818990310635], "label": "Ping-pong"}, {"segment": [37.980600889341694, 94.66377039843499], "label": "Ping-pong"}, {"segment": [113.9418026680251, 170.91270328549143], "label": "Ping-pong"}]}, "BuaWU70q7cE": {"duration": 147.91, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BuaWU70q7cE", "annotations": [{"segment": [30.333897487519504, 141.28906624414978], "label": "Wrapping presents"}]}, "GHU3G24jFjI": {"duration": 206.38, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GHU3G24jFjI", "annotations": [{"segment": [7.481310401380128, 205.50223918642686], "label": "Breakdancing"}]}, "GbNPZBkaTnU": {"duration": 127.69, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=GbNPZBkaTnU", "annotations": []}, "eRh6V0bcBpU": {"duration": 66.27, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eRh6V0bcBpU", "annotations": [{"segment": [8.729689199687986, 45.0947850374415], "label": "Playing racquetball"}]}, "Pnszux17JFE": {"duration": 54.5, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Pnszux17JFE", "annotations": []}, "vPhAXENSjiw": {"duration": 207.29, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vPhAXENSjiw", "annotations": [{"segment": [18.109059752866308, 119.6491447957238], "label": "Cheerleading"}]}, "7fPzX7HZLEA": {"duration": 35.318, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7fPzX7HZLEA", "annotations": []}, "mmRpNwb0NZ0": {"duration": 128.89, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mmRpNwb0NZ0", "annotations": [{"segment": [25.13522289391576, 100.74197335881436], "label": "Ballet"}]}, "UUjTMDSUvs0": {"duration": 114.684, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UUjTMDSUvs0", "annotations": [{"segment": [6.7950046801872075, 114.621], "label": "Making an omelette"}]}, "iChE4EoYG6k": {"duration": 114.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iChE4EoYG6k", "annotations": [{"segment": [0.01, 0.02], "label": "Doing step aerobics"}]}, "UtErFzrzcqk": {"duration": 38.453, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UtErFzrzcqk", "annotations": [{"segment": [0.01, 38.452243], "label": "Hitting a pinata"}]}, "Cz5fahiO1AA": {"duration": 61.93, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Cz5fahiO1AA", "annotations": [{"segment": [48.595218957057604, 56.71052391211295], "label": "Preparing pasta"}]}, "sORYi-Xpj0U": {"duration": 71.193, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sORYi-Xpj0U", "annotations": []}, "FaP7unySMOs": {"duration": 170.76, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FaP7unySMOs", "annotations": []}, "sV4vBuWGJng": {"duration": 73.21000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sV4vBuWGJng", "annotations": [{"segment": [18.503007418255915, 63.16149976287051], "label": "Getting a haircut"}]}, "Zxi0V2pBPlA": {"duration": 122.37, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Zxi0V2pBPlA", "annotations": [{"segment": [5.03058959, 106.10116426833072], "label": "Shaving legs"}]}, "skk-nX55gvA": {"duration": 84.1, "subset": "testing", "resolution": "630x360", "url": "https://www.youtube.com/watch?v=skk-nX55gvA", "annotations": []}, "sBx1HvNjs6s": {"duration": 154.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sBx1HvNjs6s", "annotations": [{"segment": [23.78781432, 114.67444539781592], "label": "Getting a haircut"}]}, "7DJDUzdw_I4": {"duration": 49.13, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7DJDUzdw_I4", "annotations": [{"segment": [0, 49.063333], "label": "Using parallel bars"}]}, "6pnabYJdqxc": {"duration": 79.366, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=6pnabYJdqxc", "annotations": [{"segment": [25.196476310753113, 30.272915878315132], "label": "Trimming branches or hedges"}]}, "a_JlhLnJM98": {"duration": 184.83, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=a_JlhLnJM98", "annotations": []}, "8jx74MCCMIs": {"duration": 40.29, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=8jx74MCCMIs", "annotations": []}, "sIzcPVbn0lg": {"duration": 52.2, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=sIzcPVbn0lg", "annotations": [{"segment": [1.5769015800000001, 52.061], "label": "Cumbia"}]}, "FMYfVG0ZRFo": {"duration": 165.698, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=FMYfVG0ZRFo", "annotations": []}, "mXM5FOi_OoM": {"duration": 40.96, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mXM5FOi_OoM", "annotations": [{"segment": [15.680211388455538, 17.468585803432138], "label": "Cutting the grass"}, {"segment": [21.3646872074883, 39.05681981279251], "label": "Cutting the grass"}]}, "oq8DE3Cbar0": {"duration": 59.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oq8DE3Cbar0", "annotations": [{"segment": [41.02441248, 57.930050054602184], "label": "Playing accordion"}, {"segment": [4.4203652652106085, 37.992404480000005], "label": "Playing accordion"}]}, "at2XbuHG5WQ": {"duration": 91.696, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=at2XbuHG5WQ", "annotations": [{"segment": [27.72928861154446, 91.601], "label": "Painting fence"}]}, "NGk3v4sKqdg": {"duration": 177.77, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=NGk3v4sKqdg", "annotations": [{"segment": [10.396392355694228, 43.1103736349454], "label": "Fixing bicycle"}, {"segment": [53.09091029641186, 122.40019266770672], "label": "Fixing bicycle"}, {"segment": [133.76691497659908, 156.2231224648986], "label": "Fixing bicycle"}]}, "BSDXE1iuZBQ": {"duration": 81.108, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BSDXE1iuZBQ", "annotations": []}, "29Vrl0rJbSo": {"duration": 214.297, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=29Vrl0rJbSo", "annotations": [{"segment": [8.143253468748194, 212.05831733167653], "label": "Blow-drying hair"}]}, "elD1P_1oiUk": {"duration": 146.936, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=elD1P_1oiUk", "annotations": []}, "ECnyZgaH11s": {"duration": 35.225, "subset": "testing", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=ECnyZgaH11s", "annotations": []}, "dffUIntKlGI": {"duration": 84.75, "subset": "training", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=dffUIntKlGI", "annotations": [{"segment": [12.8253151825273, 79.19962674570982], "label": "Hopscotch"}]}, "L5IbDi09Yb4": {"duration": 212.231, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=L5IbDi09Yb4", "annotations": []}, "fnKOW7tJA1A": {"duration": 43.1, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fnKOW7tJA1A", "annotations": [{"segment": [1.4791219157566304, 38.99503232449298], "label": "Using parallel bars"}]}, "eSpPY2yMg70": {"duration": 51.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eSpPY2yMg70", "annotations": [{"segment": [5.073206897035881, 46.94729557098284], "label": "Using the pommel horse"}]}, "ofCRJRFgjNs": {"duration": 121.65, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ofCRJRFgjNs", "annotations": []}, "cECycCDw1Nc": {"duration": 61.84, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cECycCDw1Nc", "annotations": []}, "1maBC85X9WI": {"duration": 91.255, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1maBC85X9WI", "annotations": []}, "nDJgThY8zi8": {"duration": 52.71, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nDJgThY8zi8", "annotations": [{"segment": [12.087737875975039, 32.97403325351014], "label": "Spinning"}]}, "jEN2smSwZ-o": {"duration": 178.35, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jEN2smSwZ-o", "annotations": [{"segment": [51.47443832447175, 146.3543474976599], "label": "Getting a piercing"}]}, "ofrX4WyAM-0": {"duration": 77.81, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ofrX4WyAM-0", "annotations": [{"segment": [32.51161934477379, 63.81011856474259], "label": "Preparing pasta"}]}, "3UOWHwwxczI": {"duration": 64.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3UOWHwwxczI", "annotations": [{"segment": [5.325811816146115, 6.933604062529848], "label": "Rock climbing"}, {"segment": [20.59983755850234, 60.19172047581904], "label": "Rock climbing"}]}, "F5IWShcFIN4": {"duration": 159.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F5IWShcFIN4", "annotations": [{"segment": [43.51821372854914, 60.67682371294852], "label": "Walking the dog"}]}, "oUWArkSoOAU": {"duration": 89.049, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=oUWArkSoOAU", "annotations": []}, "yvOOFjG-FEo": {"duration": 95.37, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yvOOFjG-FEo", "annotations": [{"segment": [33.68053798595943, 95.316666], "label": "Polishing forniture"}]}, "OkCu0QymO10": {"duration": 35.899, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OkCu0QymO10", "annotations": [{"segment": [0.01, 35.861], "label": "Roof shingle removal"}]}, "S04LRGuoAg8": {"duration": 161.75, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=S04LRGuoAg8", "annotations": []}, "--tFD65KaK4": {"duration": 86.425, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=--tFD65KaK4", "annotations": []}, "OTlQ20IWBmY": {"duration": 197.53, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OTlQ20IWBmY", "annotations": []}, "McwVhHxdr5g": {"duration": 24.313, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=McwVhHxdr5g", "annotations": [{"segment": [0.01, 22.499770670826834], "label": "Rollerblading"}]}, "od9EdcDcByA": {"duration": 224.82, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=od9EdcDcByA", "annotations": [{"segment": [21.043581988177486, 25.953751118752233], "label": "Paintball"}, {"segment": [39.6320794110676, 43.49006944223347], "label": "Paintball"}, {"segment": [46.64660674046009, 50.50459677162597], "label": "Paintball"}, {"segment": [58.92202956689696, 64.53365143041096], "label": "Paintball"}, {"segment": [66.6380096292287, 72.24963149274271], "label": "Paintball"}, {"segment": [75.75689515743895, 76.45834789037819], "label": "Paintball"}, {"segment": [78.91343245566557, 89.0844970832847], "label": "Paintball"}, {"segment": [97.85265624502532, 141.6934520537284], "label": "Paintball"}, {"segment": [144.84998935195503, 147.3050739172424], "label": "Paintball"}, {"segment": [153.26742214722603, 155.72250608424338], "label": "Paintball"}, {"segment": [158.87904336973477, 163.4384861154446], "label": "Paintball"}, {"segment": [164.84139157566304, 172.90809797191886], "label": "Paintball"}, {"segment": [174.31100343213728, 179.22117254290168], "label": "Paintball"}, {"segment": [180.2733516380655, 194.3024062402496], "label": "Paintball"}, {"segment": [196.40676443057723, 197.45894352574103], "label": "Paintball"}, {"segment": [198.51112262090484, 200.61548081123243], "label": "Paintball"}, {"segment": [213.24162995319813, 217.45034633385333], "label": "Paintball"}]}, "W07hi9Y37Jg": {"duration": 200.97, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=W07hi9Y37Jg", "annotations": [{"segment": [11.91390144773791, 164.59995421216848], "label": "Playing accordion"}]}, "Lmne5ZF0McI": {"duration": 45.883, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Lmne5ZF0McI", "annotations": [{"segment": [0.01, 45.81105023400936], "label": "Powerbocking"}]}, "L5kxbN9wFAg": {"duration": 48.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L5kxbN9wFAg", "annotations": [{"segment": [21.046479126365057, 47.65524202184088], "label": "Discus throw"}, {"segment": [0, 20.763601949999998], "label": "Discus throw"}]}, "RgWvaKWa7YU": {"duration": 164.235, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RgWvaKWa7YU", "annotations": [{"segment": [7.662581136621315, 161.425120819161], "label": "Drum corps"}]}, "TsHek6oZYXU": {"duration": 115.64, "subset": "testing", "resolution": "854x428", "url": "https://www.youtube.com/watch?v=TsHek6oZYXU", "annotations": []}, "VggDjeV0gHA": {"duration": 176.495, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VggDjeV0gHA", "annotations": []}, "MKmrqcoCZ-M": {"duration": 70.287, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MKmrqcoCZ-M", "annotations": [{"segment": [23.22402513960366, 55.93817396430138], "label": "Doing crunches"}]}, "B2OfU3a0NRk": {"duration": 192.262, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=B2OfU3a0NRk", "annotations": []}, "Frldvgi9luM": {"duration": 20.318, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Frldvgi9luM", "annotations": []}, "u9JhQ0xSSJI": {"duration": 30.349, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u9JhQ0xSSJI", "annotations": [{"segment": [0.01, 29.091049879919165], "label": "Drum corps"}]}, "kbWQIXFOK54": {"duration": 139.436, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kbWQIXFOK54", "annotations": [{"segment": [20.2301589875195, 105.9364239453978], "label": "Painting furniture"}]}, "01_BrVxYsE0": {"duration": 208.98, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=01_BrVxYsE0", "annotations": [{"segment": [56.401668311630424, 168.8789837307778], "label": "Mixing drinks"}]}, "ibHRKaD5MbM": {"duration": 13.816, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ibHRKaD5MbM", "annotations": [{"segment": [9.181843170046802, 11.83293873322933], "label": "Hitting a pinata"}]}, "rulzKikXMHo": {"duration": 49.37, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rulzKikXMHo", "annotations": [{"segment": [0.9097969499999999, 12.245138686427458], "label": "Shot put"}]}, "tYyTrDyzB6o": {"duration": 26.077, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tYyTrDyzB6o", "annotations": [{"segment": [0.01, 25.967527260588508], "label": "River tubing"}]}, "Gpjx38IDVT0": {"duration": 22.41, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Gpjx38IDVT0", "annotations": [{"segment": [6.501949188767551, 15.17121477379095], "label": "Hammer throw"}]}, "L963epA4MFU": {"duration": 27.052, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L963epA4MFU", "annotations": [{"segment": [0.2, 21.119377535101403], "label": "Blowing leaves"}]}, "pg-DMwEM3K4": {"duration": 48.322, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=pg-DMwEM3K4", "annotations": [{"segment": [7.682780031201247, 42.724909516380656], "label": "Canoeing"}]}, "g2uL6H3fP1c": {"duration": 195.629, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g2uL6H3fP1c", "annotations": [{"segment": [24.720509089751346, 174.874711174727], "label": "Capoeira"}]}, "GUqqkqE3jWc": {"duration": 208.608, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GUqqkqE3jWc", "annotations": []}, "tVIY6uyH3aA": {"duration": 3.553, "subset": "training", "resolution": "276x480", "url": "https://www.youtube.com/watch?v=tVIY6uyH3aA", "annotations": [{"segment": [0.2, 3.328], "label": "Painting fence"}]}, "XRb38sJzuY4": {"duration": 31.696, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=XRb38sJzuY4", "annotations": [{"segment": [3.708491196790729, 31.25021905772231], "label": "Hurling"}]}, "Q_HKC8HTTik": {"duration": 113.012, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Q_HKC8HTTik", "annotations": []}, "DYr73-hn2hM": {"duration": 59.862, "subset": "testing", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=DYr73-hn2hM", "annotations": []}, "QX4915WkFvU": {"duration": 207.24, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QX4915WkFvU", "annotations": []}, "AFtFitXAFks": {"duration": 154.11, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AFtFitXAFks", "annotations": [{"segment": [16.348731750390016, 41.352674427457096], "label": "Tumbling"}, {"segment": [43.276054633385336, 101.69872838845554], "label": "Tumbling"}, {"segment": [104.34337617160686, 126.70267106552261], "label": "Tumbling"}]}, "kMtjk1KFvXQ": {"duration": 90.74, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kMtjk1KFvXQ", "annotations": []}, "V5PYoIqay84": {"duration": 81.13, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=V5PYoIqay84", "annotations": []}, "xzmcOKHP-sM": {"duration": 113.569, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xzmcOKHP-sM", "annotations": [{"segment": [18.015576488403813, 25.61037947751029], "label": "Knitting"}, {"segment": [31.79219586399231, 32.67531249063259], "label": "Knitting"}, {"segment": [49.10128174614195, 50.337645023438355], "label": "Knitting"}]}, "13Y47Uk_w1o": {"duration": 150.094, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=13Y47Uk_w1o", "annotations": [{"segment": [0.010000000000000002, 115.08890497311309], "label": "Powerbocking"}, {"segment": [124.90812455255234, 145.71551842326886], "label": "Powerbocking"}]}, "JQf_oSGY8q4": {"duration": 198.298, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JQf_oSGY8q4", "annotations": [{"segment": [0.01, 196.22280327495682], "label": "Playing pool"}]}, "_ImvYtXpIzE": {"duration": 135.84, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_ImvYtXpIzE", "annotations": []}, "NsjDjDFRtWA": {"duration": 66.9, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NsjDjDFRtWA", "annotations": []}, "jbPxbFoP2OU": {"duration": 124.95, "subset": "testing", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=jbPxbFoP2OU", "annotations": []}, "uH6n_FGsyk0": {"duration": 124.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uH6n_FGsyk0", "annotations": []}, "5_UEjPqtnE0": {"duration": 71.007, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=5_UEjPqtnE0", "annotations": [{"segment": [0.19999999999999998, 70.67099999999999], "label": "Table soccer"}]}, "zE1l4avJZaU": {"duration": 148.28, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zE1l4avJZaU", "annotations": [{"segment": [3.7012824711388452, 134.6341498876755], "label": "Wrapping presents"}]}, "cufmgoo7E_k": {"duration": 62.23, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=cufmgoo7E_k", "annotations": [{"segment": [15.331915756630265, 21.63934945397816], "label": "Calf roping"}]}, "S3EA0yDdaWY": {"duration": 113.407, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=S3EA0yDdaWY", "annotations": [{"segment": [10.438329565835694, 39.453347342056944], "label": "Baton twirling"}, {"segment": [46.70710157878315, 59.976164527301094], "label": "Baton twirling"}, {"segment": [62.98381879563182, 79.61437769110763], "label": "Baton twirling"}]}, "Yezk4k2E5s0": {"duration": 11.56, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Yezk4k2E5s0", "annotations": [{"segment": [0.01, 9.146377535101404], "label": "Javelin throw"}]}, "kq71WnShe8E": {"duration": 229.27, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kq71WnShe8E", "annotations": []}, "CQ0r8ldAKl8": {"duration": 229.855, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CQ0r8ldAKl8", "annotations": [{"segment": [54.921580331354946, 216.75048556499982], "label": "Rope skipping"}]}, "96HJ_LqNdkg": {"duration": 167.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=96HJ_LqNdkg", "annotations": [{"segment": [17.91209822, 143.11860374414974], "label": "Cleaning shoes"}]}, "xW1M8k9A-gk": {"duration": 210.93, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xW1M8k9A-gk", "annotations": [{"segment": [0, 147.73121676], "label": "Preparing pasta"}, {"segment": [158.24698992, 201.48478939157567], "label": "Preparing pasta"}]}, "nEOpfvJ7g_g": {"duration": 25.96, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nEOpfvJ7g_g", "annotations": [{"segment": [0.01, 25.258733423556944], "label": "Playing racquetball"}]}, "U-kPlTqXc_M": {"duration": 12.12, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=U-kPlTqXc_M", "annotations": [{"segment": [0.01, 0.02], "label": "Drinking beer"}]}, "RKsto2FmBzM": {"duration": 118.49000000000001, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RKsto2FmBzM", "annotations": [{"segment": [52.86825050144863, 68.95055047916202], "label": "Mowing the lawn"}, {"segment": [73.5719006926677, 93.53613505148206], "label": "Mowing the lawn"}]}, "xkKjgwIPEuM": {"duration": 171.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xkKjgwIPEuM", "annotations": [{"segment": [7.240604868954758, 53.09776903900156], "label": "Playing field hockey"}, {"segment": [67.31080822620905, 171.697323], "label": "Playing field hockey"}]}, "WD40PRo1quM": {"duration": 161.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WD40PRo1quM", "annotations": [{"segment": [9.695753510140404, 153.99878627145085], "label": "Cumbia"}]}, "2VfX8cPmZ6Y": {"duration": 69.8, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2VfX8cPmZ6Y", "annotations": []}, "sNJ6_N0j9Ts": {"duration": 86.15, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sNJ6_N0j9Ts", "annotations": []}, "-TubttTNt90": {"duration": 220.52, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-TubttTNt90", "annotations": [{"segment": [19.77614274570983, 43.16358112324493], "label": "Sharpening knives"}]}, "-ZDCHvzbnoU": {"duration": 145.06, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=-ZDCHvzbnoU", "annotations": [{"segment": [59.06929710000001, 116.5632969], "label": "Paintball"}]}, "RLBfyIVpocE": {"duration": 13.56, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RLBfyIVpocE", "annotations": []}, "aObyxa8gdAo": {"duration": 128.059, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aObyxa8gdAo", "annotations": [{"segment": [13.98450073322933, 128.0580725623583], "label": "Playing blackjack"}]}, "ap2XL5wma4c": {"duration": 100.636, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ap2XL5wma4c", "annotations": [{"segment": [17.112700377535102, 22.293609666146647], "label": "Applying sunscreen"}, {"segment": [60.91493345397816, 67.19482350078003], "label": "Applying sunscreen"}]}, "xT6ejzSmftg": {"duration": 172.222, "subset": "training", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=xT6ejzSmftg", "annotations": [{"segment": [89.98539001560064, 116.05434009360374], "label": "Drum corps"}]}, "Rg9qviHZ3qc": {"duration": 213.136, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Rg9qviHZ3qc", "annotations": [{"segment": [24.272894464077883, 189.860584825273], "label": "Braiding hair"}]}, "qwdavExYM_Y": {"duration": 179.514, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qwdavExYM_Y", "annotations": [{"segment": [0.20000000000000004, 179.08100000000002], "label": "Snow tubing"}]}, "MkouTZhtvEI": {"duration": 57.54, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MkouTZhtvEI", "annotations": [{"segment": [12.973893720000001, 48.955331980000004], "label": "Archery"}]}, "PD-FXR7O9J0": {"duration": 41.5, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=PD-FXR7O9J0", "annotations": []}, "PGJAnrLYOJA": {"duration": 75.628, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PGJAnrLYOJA", "annotations": []}, "MSiXlMKPGKs": {"duration": 234.011, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=MSiXlMKPGKs", "annotations": [{"segment": [43.44348463462348, 58.77647921154941], "label": "Curling"}, {"segment": [80.31568587913584, 92.36303876100622], "label": "Curling"}, {"segment": [101.12474994782104, 112.44196023079017], "label": "Curling"}, {"segment": [124.12424181320993, 138.72709379123464], "label": "Curling"}, {"segment": [179.25000803025318, 190.20214543057725], "label": "Curling"}, {"segment": [198.59878524804995, 225.24898988611548], "label": "Curling"}]}, "FayB7ZswnAw": {"duration": 9.96, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FayB7ZswnAw", "annotations": [{"segment": [6.33916434, 9.685920107644305], "label": "Playing racquetball"}, {"segment": [0, 5.095579320000001], "label": "Playing racquetball"}]}, "MjljlkQaHh4": {"duration": 24.567, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=MjljlkQaHh4", "annotations": [{"segment": [0.01, 17.169869778471142], "label": "Sumo"}]}, "y8OQkNk8WpY": {"duration": 107.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y8OQkNk8WpY", "annotations": [{"segment": [0.01, 107.09043], "label": "Curling"}]}, "548xC74c6MQ": {"duration": 14.374, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=548xC74c6MQ", "annotations": [{"segment": [0.2, 12.170833073322934], "label": "Kite flying"}]}, "P3_YQbHXEIs": {"duration": 69.96000000000001, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P3_YQbHXEIs", "annotations": [{"segment": [1.9083463338533544, 62.648283931357255], "label": "Getting a tattoo"}]}, "1sCgzkVUuy0": {"duration": 60.54, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1sCgzkVUuy0", "annotations": []}, "XLspZ5bof7o": {"duration": 169.437, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=XLspZ5bof7o", "annotations": [{"segment": [20.65766144183205, 162.38217267582755], "label": "Braiding hair"}]}, "oni9K1_MCuw": {"duration": 199.97, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oni9K1_MCuw", "annotations": []}, "daSrmaPxaZA": {"duration": 76.39, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=daSrmaPxaZA", "annotations": [{"segment": [40.10550985307782, 49.73731826429278], "label": "Cleaning shoes"}]}, "319ko2wJlPM": {"duration": 8.57, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=319ko2wJlPM", "annotations": []}, "EeUkryzfwYk": {"duration": 139.669, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=EeUkryzfwYk", "annotations": [{"segment": [22.769592579621552, 124.74250480223292], "label": "Decorating the Christmas tree"}]}, "gXffXyAkcHM": {"duration": 13.47, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=gXffXyAkcHM", "annotations": [{"segment": [5.2105426770670835, 13.383530989079564], "label": "Using uneven bars"}]}, "gvhjtFg8A-c": {"duration": 104.444, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gvhjtFg8A-c", "annotations": [{"segment": [67.91765522620905, 100.8178143525741], "label": "Snow tubing"}]}, "lQs4uY7EpDc": {"duration": 70.13, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=lQs4uY7EpDc", "annotations": []}, "EjiugNIf6d0": {"duration": 63.414, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=EjiugNIf6d0", "annotations": []}, "iVGGeRlJekQ": {"duration": 102.87, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iVGGeRlJekQ", "annotations": [{"segment": [18.87713436145233, 83.18738007840412], "label": "Wrapping presents"}]}, "rJpFVvho0o4": {"duration": 167.485, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rJpFVvho0o4", "annotations": [{"segment": [7.929617843497209, 160.0008997749614], "label": "Swimming"}]}, "P4dx0xrr6fM": {"duration": 19.78, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P4dx0xrr6fM", "annotations": [{"segment": [6.635618120124805, 11.172529113884556], "label": "Discus throw"}]}, "r1fHWOdyQBU": {"duration": 155.249, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r1fHWOdyQBU", "annotations": []}, "ZDlw4L7oNfs": {"duration": 122.857, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZDlw4L7oNfs", "annotations": [{"segment": [29.132921986267206, 106.18183266458658], "label": "Longboarding"}]}, "Q_LhL-t0Yls": {"duration": 168.34, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=Q_LhL-t0Yls", "annotations": [{"segment": [90.81915200624024, 154.8650279875195], "label": "Playing flauta"}]}, "S9KUHyvFuGc": {"duration": 33.228, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S9KUHyvFuGc", "annotations": [{"segment": [7.163117140656587, 32.98967359860051], "label": "Calf roping"}]}, "hOcP06xuKFg": {"duration": 39.103, "subset": "training", "resolution": "544x360", "url": "https://www.youtube.com/watch?v=hOcP06xuKFg", "annotations": [{"segment": [0.2, 35.87432357782158], "label": "Shuffleboard"}]}, "lVhdZ-sTUjI": {"duration": 20.67, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lVhdZ-sTUjI", "annotations": []}, "Jbl0E75cx8o": {"duration": 128.01, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Jbl0E75cx8o", "annotations": [{"segment": [0.01, 128.01160997732427], "label": "Getting a piercing"}]}, "qbDQNRf-EUI": {"duration": 71.36, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=qbDQNRf-EUI", "annotations": [{"segment": [8.477303955450907, 70.94058473738237], "label": "Fixing bicycle"}]}, "bULPHJydFTQ": {"duration": 89.19, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bULPHJydFTQ", "annotations": [{"segment": [14.16622464898596, 66.66458658346335], "label": "Hammer throw"}]}, "8nhRg9GjNQ0": {"duration": 231.085, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8nhRg9GjNQ0", "annotations": []}, "GKK32IWDlSE": {"duration": 98.38, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=GKK32IWDlSE", "annotations": [{"segment": [0, 20.23588129797192], "label": "Javelin throw"}, {"segment": [32.653353912636504, 47.98356701716069], "label": "Javelin throw"}, {"segment": [55.49537143837754, 80.17701453666146], "label": "Javelin throw"}]}, "qZvP5BvVbcg": {"duration": 227.045, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qZvP5BvVbcg", "annotations": [{"segment": [16.239573872472786, 213.93873405909798], "label": "Skiing"}]}, "lueciI7K4_o": {"duration": 23.058, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lueciI7K4_o", "annotations": []}, "IvkpOoeCM3A": {"duration": 157.827, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=IvkpOoeCM3A", "annotations": [{"segment": [10.554426875057587, 59.644801853457444], "label": "Playing pool"}, {"segment": [64.30838747640543, 85.41724871711737], "label": "Playing pool"}, {"segment": [95.48077558768935, 101.37161958596138], "label": "Playing pool"}, {"segment": [107.2624645253082, 115.85328006185563], "label": "Playing pool"}, {"segment": [134.01671862484164, 136.96214109451503], "label": "Playing pool"}, {"segment": [147.27111973837197, 153.65286842266434], "label": "Playing pool"}]}, "jcX2QAiqP9U": {"duration": 26.076, "subset": "training", "resolution": "640x278", "url": "https://www.youtube.com/watch?v=jcX2QAiqP9U", "annotations": [{"segment": [0.2, 22.669499956454125], "label": "Futsal"}]}, "JgTElHbqCs8": {"duration": 29.281, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=JgTElHbqCs8", "annotations": []}, "fdd5ixvEXOE": {"duration": 63.18, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fdd5ixvEXOE", "annotations": [{"segment": [10.842382184087363, 59.6331020124805], "label": "Hopscotch"}]}, "IeBCgi4xPIE": {"duration": 146.495, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IeBCgi4xPIE", "annotations": []}, "qAXE_vqt56g": {"duration": 25.589, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=qAXE_vqt56g", "annotations": [{"segment": [13.013751659906397, 18.682318333853352], "label": "Rock-paper-scissors"}]}, "2bMIjmygXXE": {"duration": 50.04, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2bMIjmygXXE", "annotations": []}, "-gZjpAPEhR4": {"duration": 68.128, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-gZjpAPEhR4", "annotations": [{"segment": [2.8668439937597507, 37.59855538221529], "label": "Rollerblading"}]}, "fvUQQF5S4Dg": {"duration": 107.648, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fvUQQF5S4Dg", "annotations": [{"segment": [93.91969291668168, 99.27696609864127], "label": "Having an ice cream"}]}, "Xr8zB1aIBzg": {"duration": 159.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Xr8zB1aIBzg", "annotations": [{"segment": [5.460587600624025, 149.91795048985958], "label": "Doing step aerobics"}]}, "-DphPPTybSY": {"duration": 158.384, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-DphPPTybSY", "annotations": [{"segment": [0.009999999999999998, 154.36914976599064], "label": "Playing pool"}]}, "X3Nr9Gj6JR0": {"duration": 105.78999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=X3Nr9Gj6JR0", "annotations": [{"segment": [2.3105484981304016, 67.66606227769111], "label": "Preparing pasta"}, {"segment": [69.81157156942277, 97.86823153822152], "label": "Preparing pasta"}]}, "7Xa3VvDxbPw": {"duration": 37.036, "subset": "testing", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=7Xa3VvDxbPw", "annotations": []}, "b5SYTLjp6HI": {"duration": 149.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b5SYTLjp6HI", "annotations": [{"segment": [16.046702643615948, 20.000237950078002], "label": "Snatch"}, {"segment": [21.860725201248048, 24.186334265210608], "label": "Snatch"}, {"segment": [41.16328043213728, 46.279620372854914], "label": "Snatch"}, {"segment": [47.20986399843994, 51.395960313572544], "label": "Snatch"}, {"segment": [84.65216992823713, 86.51265717940717], "label": "Snatch"}, {"segment": [103.95472515912635, 110.23386963182527], "label": "Snatch"}, {"segment": [111.39667416380654, 114.88508775975039], "label": "Snatch"}, {"segment": [115.35020957254291, 117.44325773010921], "label": "Snatch"}, {"segment": [132.09459483307333, 136.28069114820593], "label": "Snatch"}, {"segment": [137.21093477379094, 139.5365438377535], "label": "Snatch"}]}, "AGjhryYGVs4": {"duration": 198.972, "subset": "validation", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=AGjhryYGVs4", "annotations": [{"segment": [37.2395007800312, 105.82224804992198], "label": "Rafting"}, {"segment": [127.54529017160685, 186.50783307332293], "label": "Rafting"}]}, "OjNLFiEYpVo": {"duration": 48.46, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OjNLFiEYpVo", "annotations": []}, "BfLrltipDDU": {"duration": 115.055, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BfLrltipDDU", "annotations": [{"segment": [21.570916784280556, 105.77632176870424], "label": "Wakeboarding"}]}, "ZefWc2tgltY": {"duration": 102.03, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZefWc2tgltY", "annotations": [{"segment": [13.525873634945398, 45.03320280811233], "label": "Playing flauta"}, {"segment": [66.51547269890796, 85.92907956318253], "label": "Playing flauta"}]}, "r5cjo7N6IEY": {"duration": 38.894, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r5cjo7N6IEY", "annotations": [{"segment": [20.83386254551255, 23.133454445824075], "label": "Trimming branches or hedges"}]}, "O0KUnuhLwj0": {"duration": 112.18, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=O0KUnuhLwj0", "annotations": [{"segment": [31.150166614664588, 107.62557566302652], "label": "Smoking a cigarette"}]}, "8e80cJTrJDs": {"duration": 39.149, "subset": "validation", "resolution": "400x300", "url": "https://www.youtube.com/watch?v=8e80cJTrJDs", "annotations": [{"segment": [0.19999999999999998, 37.377677541341654], "label": "BMX"}]}, "Bhq890NxKxc": {"duration": 116.797, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Bhq890NxKxc", "annotations": []}, "czh5nNO_Eow": {"duration": 215.25, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=czh5nNO_Eow", "annotations": [{"segment": [32.23692911798105, 199.4659989175077], "label": "Painting"}]}, "fPCfTJLh46A": {"duration": 184.135, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fPCfTJLh46A", "annotations": [{"segment": [36.184408736349454, 51.979190327613104], "label": "Playing ten pins"}, {"segment": [54.27661310452418, 66.62526053042122], "label": "Playing ten pins"}]}, "jFbIvLKgVNM": {"duration": 17.11, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jFbIvLKgVNM", "annotations": []}, "lol04SNoopE": {"duration": 19.48, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=lol04SNoopE", "annotations": [{"segment": [1.3980513042121685, 3.2215964836193445], "label": "Springboard diving"}]}, "CV9n0jgThLY": {"duration": 58.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CV9n0jgThLY", "annotations": [{"segment": [0.01, 12.152499427690179], "label": "Fixing the roof"}, {"segment": [25.98681781554065, 48.50149283890513], "label": "Fixing the roof"}]}, "lrM8LSnXmSw": {"duration": 75.72, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=lrM8LSnXmSw", "annotations": [{"segment": [6.165272784296208, 73.51149512230243], "label": "Doing step aerobics"}]}, "QKIkAfzncFc": {"duration": 55.287, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=QKIkAfzncFc", "annotations": [{"segment": [0.2, 10.690115444617785], "label": "Playing congas"}, {"segment": [17.500753510140406, 28.53571138845554], "label": "Playing congas"}, {"segment": [34.65666458658346, 52.32983931357254], "label": "Playing congas"}]}, "E9hFBWDR12A": {"duration": 129.45, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E9hFBWDR12A", "annotations": [{"segment": [71.89457932605305, 105.62040726833074], "label": "Playing lacrosse"}]}, "TqO-bmGQGiY": {"duration": 108.88, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TqO-bmGQGiY", "annotations": [{"segment": [3.0574268143525742, 107.0099385023401], "label": "Walking the dog"}]}, "vEiQlqCy-rI": {"duration": 106.348, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vEiQlqCy-rI", "annotations": [{"segment": [0.01, 7.186080314957094], "label": "Futsal"}, {"segment": [16.12728592243373, 22.41924542399136], "label": "Futsal"}, {"segment": [38.48029994112532, 54.22039807201397], "label": "Futsal"}, {"segment": [61.0103684769984, 70.78130149880525], "label": "Futsal"}, {"segment": [82.53954293182706, 88.335859131204], "label": "Futsal"}, {"segment": [101.08775476983328, 106.121], "label": "Futsal"}]}, "_7AyUgKv3kU": {"duration": 71.866, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=_7AyUgKv3kU", "annotations": [{"segment": [18.274762088299532, 65.92368164368175], "label": "Blowing leaves"}]}, "HLZLkI1NYAs": {"duration": 66.247, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HLZLkI1NYAs", "annotations": [{"segment": [17.645103948121303, 58.30107302959502], "label": "Playing ten pins"}]}, "jTChoOFb05w": {"duration": 184.715, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jTChoOFb05w", "annotations": [{"segment": [40.63141445657826, 90.7723088923557], "label": "Beach soccer"}, {"segment": [99.41729069162767, 104.89244583116658], "label": "Beach soccer"}, {"segment": [111.23209915063268, 150.71084818876753], "label": "Beach soccer"}, {"segment": [160.79666020904835, 172.03513646021838], "label": "Beach soccer"}]}, "YniPQWUmrhg": {"duration": 52.99, "subset": "testing", "resolution": "194x144", "url": "https://www.youtube.com/watch?v=YniPQWUmrhg", "annotations": []}, "g_65rfEiXPY": {"duration": 156.18, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=g_65rfEiXPY", "annotations": [{"segment": [0.7309377354686024, 153.00963262476077], "label": "Cleaning windows"}]}, "Hy8WbkpvUlA": {"duration": 220.172, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Hy8WbkpvUlA", "annotations": [{"segment": [0.01, 216.53637831096225], "label": "Waxing skis"}]}, "-06dWmCzbxY": {"duration": 65.527, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-06dWmCzbxY", "annotations": [{"segment": [3.2712243129180947, 63.379970078003126], "label": "River tubing"}]}, "mvRoVXRGVGw": {"duration": 31.626, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mvRoVXRGVGw", "annotations": []}, "ZN7C8n_6uH8": {"duration": 110.597, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ZN7C8n_6uH8", "annotations": [{"segment": [8.622542901716068, 25.005374414976597], "label": "Futsal"}, {"segment": [28.97174414976599, 72.77557878315133], "label": "Futsal"}, {"segment": [77.94919812792511, 101.24696879875195], "label": "Futsal"}]}, "InHt3sukfeg": {"duration": 175.032, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=InHt3sukfeg", "annotations": [{"segment": [0.01, 175.001], "label": "Painting furniture"}]}, "P9HUphIXgNA": {"duration": 121.511, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=P9HUphIXgNA", "annotations": []}, "H6g8SsxL8RU": {"duration": 55.57, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=H6g8SsxL8RU", "annotations": [{"segment": [32.420344617784714, 49.58405647425897], "label": "Starting a campfire"}]}, "RihO8i98QJg": {"duration": 218.92000000000002, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RihO8i98QJg", "annotations": [{"segment": [4.097616224648986, 88.78168486739469], "label": "Dodgeball"}, {"segment": [120.53821060842434, 152.6362043681747], "label": "Dodgeball"}]}, "yyCsQ7QzAJ8": {"duration": 228.717, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yyCsQ7QzAJ8", "annotations": [{"segment": [47.81270682059282, 213.7299357129485], "label": "Making a cake"}]}, "35Xzs3Bz1hQ": {"duration": 91.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=35Xzs3Bz1hQ", "annotations": [{"segment": [70.96090015600625, 76.24370358814353], "label": "Drinking coffee"}]}, "o1jPudE8eRE": {"duration": 230.296, "subset": "testing", "resolution": "1920x1072", "url": "https://www.youtube.com/watch?v=o1jPudE8eRE", "annotations": []}, "qZMcbBsQorE": {"duration": 4.528, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qZMcbBsQorE", "annotations": []}, "3-_Eld2NwJ0": {"duration": 70.218, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3-_Eld2NwJ0", "annotations": [{"segment": [0.2, 67.72883619344773], "label": "Building sandcastles"}]}, "EfJO4zg7bYE": {"duration": 229.25, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=EfJO4zg7bYE", "annotations": [{"segment": [116.91397815912637, 216.308736349454], "label": "Polishing forniture"}]}, "vHeh6hl2dFc": {"duration": 164.003, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vHeh6hl2dFc", "annotations": []}, "z0vMpInrDqM": {"duration": 24.823, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=z0vMpInrDqM", "annotations": [{"segment": [13.712007800312012, 21.923934477379095], "label": "Putting in contact lenses"}]}, "hY73M6hJqgU": {"duration": 198.322, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hY73M6hJqgU", "annotations": [{"segment": [17.0166767487026, 177.28283230921073], "label": "Baking cookies"}]}, "EZKrOWEKX_Q": {"duration": 136.209, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=EZKrOWEKX_Q", "annotations": [{"segment": [12.814570070773637, 127.806238992892], "label": "Surfing"}]}, "4J-hDMxEb14": {"duration": 66.549, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4J-hDMxEb14", "annotations": []}, "WZOa0RpD6xE": {"duration": 212.3, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WZOa0RpD6xE", "annotations": []}, "8zfA20hcc0o": {"duration": 130.566, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8zfA20hcc0o", "annotations": [{"segment": [0.2, 3.870125264874541], "label": "Futsal"}, {"segment": [15.887882666327062, 119.36281080086742], "label": "Futsal"}]}, "Ru18FF0tcuA": {"duration": 70.682, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=Ru18FF0tcuA", "annotations": [{"segment": [0.2, 68.25943303593505], "label": "Canoeing"}]}, "Yi3xUQcaOnE": {"duration": 90.071, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Yi3xUQcaOnE", "annotations": [{"segment": [8.264606595359762, 34.31912867029549], "label": "Rafting"}]}, "f6NpnKJZJE0": {"duration": 118.631, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=f6NpnKJZJE0", "annotations": [{"segment": [0, 49.04391256630265], "label": "Elliptical trainer"}, {"segment": [54.225910875195005, 62.554122443057715], "label": "Elliptical trainer"}, {"segment": [69.40176306552262, 72.91811906084243], "label": "Elliptical trainer"}, {"segment": [79.5806883151326, 84.20747251950078], "label": "Elliptical trainer"}, {"segment": [101.78925249609985, 115.11439100468019], "label": "Elliptical trainer"}]}, "9eJgsiS3xH8": {"duration": 105.07, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9eJgsiS3xH8", "annotations": [{"segment": [47.134401999999994, 97.2191887675507], "label": "Sailing"}, {"segment": [4.5169593, 41.375515299999996], "label": "Sailing"}]}, "jNGa0jPAMjI": {"duration": 61.7, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=jNGa0jPAMjI", "annotations": [{"segment": [13.2483843, 49.5682308], "label": "Smoking hookah"}]}, "oNrWO_VQQbk": {"duration": 178.19, "subset": "validation", "resolution": "448x360", "url": "https://www.youtube.com/watch?v=oNrWO_VQQbk", "annotations": [{"segment": [117.31070337235259, 126.20630173234142], "label": "Bathing dog"}]}, "kRom61pt8zk": {"duration": 184.39, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=kRom61pt8zk", "annotations": [{"segment": [76.51739395290097, 163.39052378159127], "label": "Brushing hair"}]}, "BSl22Hx2WGM": {"duration": 195.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BSl22Hx2WGM", "annotations": [{"segment": [13.131022967238689, 163.679728149766], "label": "Cheerleading"}]}, "zrXTMcKFa5E": {"duration": 237.091, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zrXTMcKFa5E", "annotations": [{"segment": [26.55437784273248, 62.69784423291738], "label": "Fun sliding down"}]}, "3tRTl23mhVQ": {"duration": 67.18, "subset": "training", "resolution": "1152x720", "url": "https://www.youtube.com/watch?v=3tRTl23mhVQ", "annotations": [{"segment": [22.06362875, 55.43759154290172], "label": "Javelin throw"}]}, "XvFv0n2mJUk": {"duration": 9.846, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=XvFv0n2mJUk", "annotations": [{"segment": [0.01, 9.191975482278307], "label": "Doing a powerbomb"}]}, "iEqyqzKSEZ4": {"duration": 237.425, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=iEqyqzKSEZ4", "annotations": [{"segment": [32.58896723868955, 107.76578939157567], "label": "Rope skipping"}, {"segment": [156.64924024961, 190.3491950078003], "label": "Rope skipping"}]}, "i5qTK0mInTc": {"duration": 215.5, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=i5qTK0mInTc", "annotations": []}, "s7kIK4sQQ8w": {"duration": 115.53999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s7kIK4sQQ8w", "annotations": []}, "bXEG4X7pRds": {"duration": 129.73, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bXEG4X7pRds", "annotations": [{"segment": [0.01, 113.49616380655227], "label": "Grooming dog"}]}, "tvzl11XL01k": {"duration": 80.3, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=tvzl11XL01k", "annotations": [{"segment": [3.6670269800000006, 46.55365826], "label": "Wrapping presents"}]}, "4SecbKo1iGE": {"duration": 148.748, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=4SecbKo1iGE", "annotations": [{"segment": [17.630695787831513, 122.9674414976599], "label": "Painting furniture"}]}, "uTQyPHg8r0M": {"duration": 86.448, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uTQyPHg8r0M", "annotations": [{"segment": [8.631302689547582, 51.65295203276131], "label": "Gargling mouthwash"}]}, "0dZVZtTTgfk": {"duration": 196.86, "subset": "testing", "resolution": "256x144", "url": "https://www.youtube.com/watch?v=0dZVZtTTgfk", "annotations": []}, "yW76UI8lnNc": {"duration": 78.414, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yW76UI8lnNc", "annotations": [{"segment": [0.01, 78.381], "label": "Shuffleboard"}]}, "anO5Xhewrsw": {"duration": 72.772, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=anO5Xhewrsw", "annotations": [{"segment": [5.90342683095079, 72.771087], "label": "Playing drums"}]}, "LcCtHQuyDO0": {"duration": 75.116, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LcCtHQuyDO0", "annotations": []}, "fMVuGUBs2cs": {"duration": 231.16, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fMVuGUBs2cs", "annotations": []}, "6LADh__9LUI": {"duration": 65.318, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=6LADh__9LUI", "annotations": [{"segment": [0.009999999999999998, 65.317731], "label": "Hand car wash"}]}, "ux3h_qEusvw": {"duration": 154.181, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ux3h_qEusvw", "annotations": [{"segment": [11.78603050788698, 132.53270993915757], "label": "Peeling potatoes"}]}, "PYeU5d6wK2s": {"duration": 238.07999999999998, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PYeU5d6wK2s", "annotations": []}, "eoOzE3cpqVg": {"duration": 51.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eoOzE3cpqVg", "annotations": [{"segment": [5.283019500780031, 43.594992979719194], "label": "Paintball"}]}, "zoA-sfWcVlI": {"duration": 33.11, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zoA-sfWcVlI", "annotations": []}, "eR6obwco54s": {"duration": 136.395, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eR6obwco54s", "annotations": []}, "uRcVktMzlUc": {"duration": 232.966, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uRcVktMzlUc", "annotations": [{"segment": [49.99887641315599, 53.25967306151984], "label": "Beer pong"}, {"segment": [86.59226102257259, 92.75154358059321], "label": "Beer pong"}, {"segment": [99.99775835473511, 101.44700130956349], "label": "Beer pong"}, {"segment": [110.86708051594796, 112.67863420948343], "label": "Beer pong"}, {"segment": [125.72182080293885, 128.6203067125956], "label": "Beer pong"}, {"segment": [163.40213762847677, 167.3875557542548], "label": "Beer pong"}, {"segment": [176.08301348322507, 190.2131322928018], "label": "Beer pong"}, {"segment": [196.73472558952952, 198.54627928306496], "label": "Beer pong"}]}, "Vvco5y9PUFM": {"duration": 14.35, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Vvco5y9PUFM", "annotations": [{"segment": [0.2, 14.149931972789116], "label": "Powerbocking"}]}, "jN-jnIgzd6k": {"duration": 120.3, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=jN-jnIgzd6k", "annotations": []}, "NSITtemHdAs": {"duration": 130.659, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NSITtemHdAs", "annotations": []}, "x2FfKHiN_3s": {"duration": 112.524, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x2FfKHiN_3s", "annotations": [{"segment": [28.441682634275157, 45.26768640650056], "label": "Ice fishing"}]}, "42ZfZq3MKvI": {"duration": 75.883, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=42ZfZq3MKvI", "annotations": []}, "wlO7PcigY-s": {"duration": 63.53, "subset": "training", "resolution": "320x176", "url": "https://www.youtube.com/watch?v=wlO7PcigY-s", "annotations": [{"segment": [1.632925099843994, 23.40525976443058], "label": "Tennis serve with ball bouncing"}, {"segment": [32.60901941809673, 37.26038182371295], "label": "Tennis serve with ball bouncing"}]}, "R2cL7miVEwA": {"duration": 196.05, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=R2cL7miVEwA", "annotations": [{"segment": [17.12727029017161, 187.4824408549142], "label": "Playing field hockey"}]}, "erevt6avST0": {"duration": 117.42, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=erevt6avST0", "annotations": [{"segment": [12.177142416536661, 20.234199053042122], "label": "Paintball"}, {"segment": [48.06766743369735, 56.12472407020281], "label": "Paintball"}, {"segment": [77.36605520280811, 84.50753722152886], "label": "Paintball"}]}, "nfBBazBLQFk": {"duration": 207.308, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nfBBazBLQFk", "annotations": [{"segment": [0.01, 0.02], "label": "Kite flying"}, {"segment": [12.285737909516381, 192.3687909516381], "label": "Kite flying"}]}, "d8u6MM00_ig": {"duration": 137.35, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=d8u6MM00_ig", "annotations": [{"segment": [2.87877216, 80.71275036], "label": "Snatch"}]}, "voaGOm-rl9Y": {"duration": 51.8, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=voaGOm-rl9Y", "annotations": [{"segment": [3.5459153499999996, 43.88357170202808], "label": "Shoveling snow"}]}, "RL4V-Sx619M": {"duration": 83.685, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RL4V-Sx619M", "annotations": [{"segment": [1.8577000336335086, 17.47657853830641], "label": "Shuffleboard"}, {"segment": [22.167546536062368, 52.241083919239934], "label": "Shuffleboard"}]}, "KjiHKzquw3Y": {"duration": 18.934, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KjiHKzquw3Y", "annotations": []}, "4Gx_8vuxzAk": {"duration": 42.84, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4Gx_8vuxzAk", "annotations": []}, "MinmayCk2Nk": {"duration": 38.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MinmayCk2Nk", "annotations": [{"segment": [5.967492037441499, 34.7199536723869], "label": "Using the pommel horse"}]}, "RD7AUdgtchE": {"duration": 4.55, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=RD7AUdgtchE", "annotations": [{"segment": [0, 1.5126403181072483], "label": "Shoveling snow"}]}, "1SyJihd6LGI": {"duration": 128.592, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1SyJihd6LGI", "annotations": []}, "FXl3qRRs9jw": {"duration": 96.387, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=FXl3qRRs9jw", "annotations": [{"segment": [9.170079563182528, 83.58302028081123], "label": "Bullfighting"}]}, "UodvUEkuVig": {"duration": 203.57, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UodvUEkuVig", "annotations": [{"segment": [60.3402994895306, 182.29121908892355], "label": "Removing curlers"}]}, "4S09Z8HRtdo": {"duration": 175.172, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4S09Z8HRtdo", "annotations": [{"segment": [23.2246255850234, 159.0203775351014], "label": "Painting fence"}]}, "zgdT41KjjrE": {"duration": 236.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zgdT41KjjrE", "annotations": [{"segment": [46.7321106, 212.16138165600626], "label": "Horseback riding"}]}, "vGZO5lM61D4": {"duration": 192.633, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vGZO5lM61D4", "annotations": [{"segment": [20.435298853042124, 179.71042226645866], "label": "Snowboarding"}]}, "Hn3-SRXssY4": {"duration": 213.04, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Hn3-SRXssY4", "annotations": [{"segment": [42.708324550702024, 96.88308643213729], "label": "High jump"}]}, "JZ9mgVXPD-I": {"duration": 63.112, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JZ9mgVXPD-I", "annotations": [{"segment": [0.01, 50.11532687051482], "label": "Playing drums"}]}, "esmMIjNWVCc": {"duration": 29.49, "subset": "testing", "resolution": "426x228", "url": "https://www.youtube.com/watch?v=esmMIjNWVCc", "annotations": []}, "W5WfWmISKrk": {"duration": 155.899, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=W5WfWmISKrk", "annotations": [{"segment": [0.01, 123.3083906677067], "label": "Sumo"}]}, "_ao0UwSJHWA": {"duration": 222.541, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_ao0UwSJHWA", "annotations": [{"segment": [70.97872776911076, 213.64955616224648], "label": "Kneeling"}]}, "2v4Pjfd4UYw": {"duration": 79.62, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2v4Pjfd4UYw", "annotations": []}, "m210FwMsnTQ": {"duration": 96.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m210FwMsnTQ", "annotations": [{"segment": [4.8570455589294435, 91.99654555892944], "label": "Using the balance beam"}]}, "9qJbSz-eCq0": {"duration": 87.447, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9qJbSz-eCq0", "annotations": [{"segment": [79.26544679631249, 81.9846731726733], "label": "Having an ice cream"}]}, "Ve37zGVerDU": {"duration": 91.859, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ve37zGVerDU", "annotations": [{"segment": [26.224710510140405, 58.61151146801872], "label": "Arm wrestling"}, {"segment": [73.08525879875194, 84.5496131201248], "label": "Arm wrestling"}]}, "bVMxBXeYMr0": {"duration": 174.36, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bVMxBXeYMr0", "annotations": [{"segment": [68.2745998493001, 93.57156314007662], "label": "Shaving"}, {"segment": [100.64383244717543, 128.11687860167467], "label": "Shaving"}, {"segment": [154.22987296634722, 155.86193381279253], "label": "Shaving"}]}, "-C1nnsyw7R0": {"duration": 133.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-C1nnsyw7R0", "annotations": [{"segment": [8.2731826, 15.62258190327613], "label": "Long jump"}]}, "d7gbNqcKXps": {"duration": 7.059, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d7gbNqcKXps", "annotations": [{"segment": [0.9140184009360375, 6.695460093603744], "label": "Croquet"}]}, "3Rc2pPAQ-Cs": {"duration": 129.243, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3Rc2pPAQ-Cs", "annotations": [{"segment": [0.01, 0.02], "label": "Playing congas"}]}, "5-vAXCUN8X0": {"duration": 190.24, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5-vAXCUN8X0", "annotations": [{"segment": [31.451627609984396, 159.0384188580343], "label": "Playing water polo"}]}, "5BuDj1xkpiI": {"duration": 126.81, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5BuDj1xkpiI", "annotations": [{"segment": [36.003680756754086, 64.8857543308535], "label": "Washing face"}, {"segment": [94.16347249921998, 112.56095767238689], "label": "Washing face"}]}, "gkqZ8cV6Kp4": {"duration": 202.664, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gkqZ8cV6Kp4", "annotations": []}, "vB2qMaP_JaI": {"duration": 217.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vB2qMaP_JaI", "annotations": [{"segment": [36.42583135863925, 196.1703967792], "label": "Braiding hair"}]}, "Pp5DCsgaALg": {"duration": 153.206, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Pp5DCsgaALg", "annotations": [{"segment": [0.01, 153.181], "label": "Sharpening knives"}]}, "IFgEjS97Pmw": {"duration": 83.778, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=IFgEjS97Pmw", "annotations": [{"segment": [45.19105460218409, 83.741], "label": "Elliptical trainer"}]}, "x--RoqrwiLI": {"duration": 132.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x--RoqrwiLI", "annotations": [{"segment": [80.44648812, 113.69740284], "label": "Hammer throw"}, {"segment": [16.30741896, 74.08041948], "label": "Hammer throw"}]}, "4sgfR6N1thU": {"duration": 120.721, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=4sgfR6N1thU", "annotations": []}, "3VzXH3o88mw": {"duration": 129.71, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=3VzXH3o88mw", "annotations": [{"segment": [0.2, 12.54567141052989], "label": "Preparing pasta"}, {"segment": [27.317187748734437, 37.63701423158967], "label": "Preparing pasta"}, {"segment": [41.68400500917996, 43.90984993685461], "label": "Preparing pasta"}, {"segment": [46.9450928673947, 80.13041713572544], "label": "Preparing pasta"}, {"segment": [96.72307926989079, 110.48284786895475], "label": "Preparing pasta"}]}, "z31xpHUGgO0": {"duration": 113.57, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=z31xpHUGgO0", "annotations": [{"segment": [0, 113.568820861678], "label": "Playing badminton"}]}, "K1z2fiB9pUM": {"duration": 202.07999999999998, "subset": "training", "resolution": "352x264", "url": "https://www.youtube.com/watch?v=K1z2fiB9pUM", "annotations": [{"segment": [2.2074134165366615, 195.82910452418096], "label": "Doing motocross"}]}, "YddPa1ujWuk": {"duration": 140.85, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=YddPa1ujWuk", "annotations": [{"segment": [36.682943837753506, 140.801], "label": "Wrapping presents"}]}, "76RoR_LbIzQ": {"duration": 96.688, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=76RoR_LbIzQ", "annotations": [{"segment": [54.58405384122012, 54.88479169466012], "label": "Braiding hair"}]}, "RLIsHDFYr6k": {"duration": 202.29, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=RLIsHDFYr6k", "annotations": []}, "PyPu-6wATfw": {"duration": 169.65, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=PyPu-6wATfw", "annotations": [{"segment": [5.422479177847114, 143.73596832], "label": "Mixing drinks"}]}, "lW5wvALKE6s": {"duration": 37.29, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lW5wvALKE6s", "annotations": []}, "wtQQ5I0QRuw": {"duration": 180.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wtQQ5I0QRuw", "annotations": [{"segment": [50.540686427457096, 84.79604056162246], "label": "Shaving legs"}, {"segment": [133.371255850234, 150.51565678627145], "label": "Shaving legs"}]}, "rCmQ7TXC5jk": {"duration": 105.907, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rCmQ7TXC5jk", "annotations": []}, "BetFWvm92nM": {"duration": 166.534, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BetFWvm92nM", "annotations": [{"segment": [25.699998439937602, 132.3939313572543], "label": "Gargling mouthwash"}]}, "Z3NtsxP6w-g": {"duration": 218.45, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z3NtsxP6w-g", "annotations": [{"segment": [56.91373894955798, 212.65971789079563], "label": "Tango"}]}, "r-xtiGmrKxA": {"duration": 221.32999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r-xtiGmrKxA", "annotations": [{"segment": [18.127865686056012, 213.21822808892355], "label": "Doing kickboxing"}]}, "PG04Cv8WB2s": {"duration": 44.745, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PG04Cv8WB2s", "annotations": []}, "D18b2IZpxk0": {"duration": 95.667, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D18b2IZpxk0", "annotations": [{"segment": [0.01, 86.37216692667707], "label": "Camel ride"}]}, "BDQHEemWnSk": {"duration": 155.11, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BDQHEemWnSk", "annotations": [{"segment": [0.01, 99.33286496085694], "label": "Rollerblading"}, {"segment": [83.60415309129372, 154.90929705215422], "label": "Rollerblading"}]}, "DqT3NrqisLk": {"duration": 161.31, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DqT3NrqisLk", "annotations": [{"segment": [35.23128544189387, 120.54132518720748], "label": "Cleaning shoes"}]}, "LUGksGa4WJA": {"duration": 144.545, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LUGksGa4WJA", "annotations": [{"segment": [3.8334587750856977, 142.0634716848674], "label": "Surfing"}]}, "7JXae2so5-E": {"duration": 48.6, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=7JXae2so5-E", "annotations": [{"segment": [10.808144831914273, 28.645364064448167], "label": "Welding"}]}, "NNZKinEXYc4": {"duration": 90.14, "subset": "validation", "resolution": "704x480", "url": "https://www.youtube.com/watch?v=NNZKinEXYc4", "annotations": [{"segment": [0.01, 0.02], "label": "Using the balance beam"}]}, "SoWow2cxfac": {"duration": 148.213, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SoWow2cxfac", "annotations": [{"segment": [11.788160686427457, 64.26570670826833], "label": "Disc dog"}, {"segment": [92.00629953198128, 138.47179251170047], "label": "Disc dog"}]}, "fEBM3nPMen8": {"duration": 48.204, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fEBM3nPMen8", "annotations": []}, "QryL-hVKAOA": {"duration": 122.74, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QryL-hVKAOA", "annotations": [{"segment": [63.980622090000004, 116.833127925117], "label": "Washing dishes"}, {"segment": [9.664329173166927, 49.46228721000001], "label": "Washing dishes"}]}, "XncWGxekE30": {"duration": 163.028, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XncWGxekE30", "annotations": [{"segment": [61.548528836391554, 74.26516702572864], "label": "Snow tubing"}]}, "eypKc4FyZ2s": {"duration": 198.067, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eypKc4FyZ2s", "annotations": [{"segment": [19.55337025253201, 107.14259766686223], "label": "Croquet"}, {"segment": [124.41371293165973, 167.60842944261069], "label": "Croquet"}]}, "B73wt5icB-c": {"duration": 167.625, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=B73wt5icB-c", "annotations": [{"segment": [6.4044375975039, 11.371144305772232], "label": "Rope skipping"}, {"segment": [16.337851014040563, 19.2133127925117], "label": "Rope skipping"}, {"segment": [23.134397035881435, 26.532670046801876], "label": "Rope skipping"}, {"segment": [39.080139625585026, 48.229336193447736], "label": "Rope skipping"}, {"segment": [53.457448517940726, 56.85572152886116], "label": "Rope skipping"}, {"segment": [62.345239469578786, 63.39086193447738], "label": "Rope skipping"}, {"segment": [98.94202574102964, 117.25441263650545], "label": "Rope skipping"}, {"segment": [121.69883853354133, 122.48314898595943], "label": "Rope skipping"}]}, "8GZlEny_cYw": {"duration": 5.78, "subset": "testing", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=8GZlEny_cYw", "annotations": []}, "w8kVVzMOC98": {"duration": 220.288, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=w8kVVzMOC98", "annotations": [{"segment": [23.705536661466457, 207.87177067082686], "label": "Playing ten pins"}]}, "UOswYEzeJ-M": {"duration": 85.33, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=UOswYEzeJ-M", "annotations": [{"segment": [17.83879348205928, 19.835673349453977], "label": "Layup drill in basketball"}, {"segment": [25.560062302652106, 27.024440872074884], "label": "Layup drill in basketball"}, {"segment": [36.7425895600624, 37.94071748049922], "label": "Layup drill in basketball"}, {"segment": [42.33385318876755, 43.66510643369734], "label": "Layup drill in basketball"}, {"segment": [47.92511681747269, 48.85699408892356], "label": "Layup drill in basketball"}, {"segment": [53.7826310951638, 54.847633691107646], "label": "Layup drill in basketball"}]}, "ZWlh8sQE2Z4": {"duration": 63.298, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=ZWlh8sQE2Z4", "annotations": [{"segment": [0.01, 63.141], "label": "Kneeling"}]}, "9E-FFtZl9qM": {"duration": 97.291, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9E-FFtZl9qM", "annotations": [{"segment": [5.192211518272804, 14.433521487120155], "label": "Running a marathon"}, {"segment": [21.85686883914508, 23.674831455967507], "label": "Running a marathon"}, {"segment": [28.522731767493987, 35.64308535004851], "label": "Running a marathon"}, {"segment": [75.48676603540676, 82.00113207902046], "label": "Running a marathon"}, {"segment": [86.69753550581174, 94.72687039677747], "label": "Running a marathon"}]}, "Q7cgJD7-sEM": {"duration": 206.94, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=Q7cgJD7-sEM", "annotations": [{"segment": [22.2695616224649, 188.17984867394696], "label": "Playing flauta"}]}, "hs81dVKvvdw": {"duration": 115.75, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hs81dVKvvdw", "annotations": [{"segment": [17.967662488812476, 51.37487415142864], "label": "Getting a tattoo"}]}, "AFdqkU6FyqY": {"duration": 132.052, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AFdqkU6FyqY", "annotations": [{"segment": [78.6560802473539, 80.70976474859698], "label": "Wakeboarding"}, {"segment": [85.84397298452342, 97.75533809183291], "label": "Wakeboarding"}, {"segment": [101.8627053702155, 112.13112356617194], "label": "Wakeboarding"}]}, "MUbcqBcEdQg": {"duration": 135.768, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MUbcqBcEdQg", "annotations": []}, "Yg6jFRYIyXw": {"duration": 102.35, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Yg6jFRYIyXw", "annotations": [{"segment": [24.430724691107645, 91.0164253198128], "label": "Shoveling snow"}]}, "aV5DMcsNMmk": {"duration": 56.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aV5DMcsNMmk", "annotations": [{"segment": [0.01, 56.726349206349205], "label": "Drinking coffee"}]}, "6wPVO7I8iIY": {"duration": 150.42000000000002, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6wPVO7I8iIY", "annotations": []}, "5vd8j0hKIgs": {"duration": 23.31, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5vd8j0hKIgs", "annotations": [{"segment": [7.445561622464898, 18.450464898595943], "label": "Using parallel bars"}]}, "blbaWHbtqTI": {"duration": 102.31, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=blbaWHbtqTI", "annotations": []}, "D5vmWDHQ86I": {"duration": 139.505, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D5vmWDHQ86I", "annotations": [{"segment": [56.36805190302851, 66.59700340666689], "label": "Futsal"}, {"segment": [87.05490608424337, 96.41330848829952], "label": "Futsal"}, {"segment": [132.1058199828393, 139.305487], "label": "Futsal"}]}, "hpZ5XnuiRPw": {"duration": 162.22, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hpZ5XnuiRPw", "annotations": [{"segment": [52.74843972074883, 158.4983092808112], "label": "Playing harmonica"}]}, "qPCdE01lKlE": {"duration": 94.32, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qPCdE01lKlE", "annotations": []}, "HmqRZ4HPu1U": {"duration": 32.857, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HmqRZ4HPu1U", "annotations": [{"segment": [0.01, 32.81517539429437], "label": "Cutting the grass"}]}, "NV2scxevYKk": {"duration": 208.794, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=NV2scxevYKk", "annotations": [{"segment": [16.28244929797192, 183.66602808112324], "label": "Canoeing"}]}, "ZmoSxXei954": {"duration": 210.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZmoSxXei954", "annotations": [{"segment": [19.183986570000002, 151.95309885], "label": "Playing racquetball"}]}, "3IRjOwcHEpQ": {"duration": 91.928, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3IRjOwcHEpQ", "annotations": []}, "8tlLBffNjf8": {"duration": 32.555, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8tlLBffNjf8", "annotations": [{"segment": [8.468194488595023, 24.542550623052957], "label": "Decorating the Christmas tree"}]}, "54TIARWo-S8": {"duration": 29.26, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=54TIARWo-S8", "annotations": []}, "JTFEzdAw1L4": {"duration": 44.606, "subset": "training", "resolution": "136x240", "url": "https://www.youtube.com/watch?v=JTFEzdAw1L4", "annotations": [{"segment": [0.01, 44.21644859262093], "label": "Beer pong"}]}, "ssNrAODsmIE": {"duration": 192.192, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ssNrAODsmIE", "annotations": []}, "PZ1FVhgTRWU": {"duration": 91.42, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PZ1FVhgTRWU", "annotations": [{"segment": [4.991560805289354, 86.85315635101405], "label": "Using the balance beam"}]}, "cnjaB6GFpSc": {"duration": 158.9, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cnjaB6GFpSc", "annotations": [{"segment": [0.01, 158.89414965986396], "label": "Doing step aerobics"}]}, "y2jDV7tFUXg": {"duration": 34.134, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y2jDV7tFUXg", "annotations": [{"segment": [3.2717808112324493, 28.275634165366615], "label": "Waterskiing"}]}, "91WRZuT4c6E": {"duration": 111.642, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=91WRZuT4c6E", "annotations": [{"segment": [0, 7.399442277691107], "label": "Hand car wash"}, {"segment": [29.33661232449298, 41.17571996879875], "label": "Hand car wash"}, {"segment": [66.76908502340093, 72.35339391575663], "label": "Hand car wash"}, {"segment": [75.31370124804992, 82.62740171606863], "label": "Hand car wash"}]}, "ESsUwNlkJoo": {"duration": 69.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ESsUwNlkJoo", "annotations": [{"segment": [8.09918096723869, 62.309698907956324], "label": "Spinning"}]}, "y3kJaNihMkA": {"duration": 35.88, "subset": "testing", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=y3kJaNihMkA", "annotations": []}, "uaGy1W-EYWU": {"duration": 76.023, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=uaGy1W-EYWU", "annotations": [{"segment": [35.90191681957542, 38.86145731801779], "label": "Hula hoop"}]}, "uSsb5pdYWE8": {"duration": 80.295, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uSsb5pdYWE8", "annotations": []}, "tAWTfutrwg0": {"duration": 211.72, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tAWTfutrwg0", "annotations": [{"segment": [44.90821840873635, 176.66434477379096], "label": "Hanging wallpaper"}]}, "fklBsM-H7-Y": {"duration": 103.42, "subset": "validation", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=fklBsM-H7-Y", "annotations": [{"segment": [4.835959422776911, 102.03874382059283], "label": "Playing guitarra"}]}, "w1VJnYDYYY0": {"duration": 33.41, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=w1VJnYDYYY0", "annotations": [{"segment": [18.87003484493121, 23.24871696364453], "label": "Ping-pong"}]}, "K1b_YcfLsHM": {"duration": 189.06, "subset": "testing", "resolution": "424x240", "url": "https://www.youtube.com/watch?v=K1b_YcfLsHM", "annotations": []}, "rXwSSTGmvb8": {"duration": 201.5, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rXwSSTGmvb8", "annotations": [{"segment": [118.11185676000001, 174.42671606864275], "label": "Mixing drinks"}, {"segment": [35.82819032761311, 108.38177207999999], "label": "Mixing drinks"}]}, "AzmaqkS88YM": {"duration": 149.32999999999998, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=AzmaqkS88YM", "annotations": [{"segment": [0.01, 149.327527], "label": "Playing squash"}]}, "Hhc10CrukfA": {"duration": 30.163, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Hhc10CrukfA", "annotations": [{"segment": [0.2, 29.861], "label": "Baking cookies"}]}, "5vDPgcyRtOU": {"duration": 111.22, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5vDPgcyRtOU", "annotations": [{"segment": [41.99080659825032, 99.94505964430577], "label": "Polishing forniture"}]}, "kXfYuIx31Bo": {"duration": 169.437, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kXfYuIx31Bo", "annotations": [{"segment": [131.21309906396257, 140.46276365054604], "label": "Laying tile"}]}, "lHVn-5OwXMw": {"duration": 184.41, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=lHVn-5OwXMw", "annotations": [{"segment": [0, 160.82158261466458], "label": "Cricket"}, {"segment": [134.35362984087365, 177.50790066770674], "label": "Cricket"}]}, "BCsfauy69xM": {"duration": 41.889, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BCsfauy69xM", "annotations": [{"segment": [0, 24.949533223088924], "label": "Carving jack-o-lanterns"}]}, "LjfF72Hwpyg": {"duration": 238.725, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LjfF72Hwpyg", "annotations": [{"segment": [0.01, 10.051960998439938], "label": "Snow tubing"}, {"segment": [16.380973478939158, 31.272767550702028], "label": "Snow tubing"}, {"segment": [38.718664586583465, 51.3766895475819], "label": "Snow tubing"}, {"segment": [67.75766302652106, 71.48061154446178], "label": "Snow tubing"}, {"segment": [80.04339313572542, 146.68417160686428], "label": "Snow tubing"}, {"segment": [157.85301716068642, 192.4925694227769], "label": "Snow tubing"}, {"segment": [216.69376287051483, 218.55539313572544], "label": "Snow tubing"}]}, "m6w3Skrc89s": {"duration": 27.888, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=m6w3Skrc89s", "annotations": [{"segment": [0.01, 26.948904342916563], "label": "Kneeling"}]}, "ppYlWcoqOYI": {"duration": 170.32, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ppYlWcoqOYI", "annotations": []}, "6O5UcjQMwoQ": {"duration": 36.385, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=6O5UcjQMwoQ", "annotations": [{"segment": [0, 36.361], "label": "Playing drums"}]}, "DVlMzGPhWO4": {"duration": 95.97, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DVlMzGPhWO4", "annotations": [{"segment": [14.90958848, 80.39527264], "label": "Sailing"}, {"segment": [2.620033139625585, 93.79718639859594], "label": "Sailing"}]}, "p1WrfcqnINk": {"duration": 133.608, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p1WrfcqnINk", "annotations": []}, "4U13LEMIdi4": {"duration": 12.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4U13LEMIdi4", "annotations": [{"segment": [0.41679995319812796, 11.765125951638067], "label": "Washing face"}]}, "Vre3tO7xV98": {"duration": 170.024, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Vre3tO7xV98", "annotations": [{"segment": [25.990792511700466, 158.06645241809673], "label": "BMX"}]}, "yhnIVL0t4sM": {"duration": 24.474, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=yhnIVL0t4sM", "annotations": []}, "GwZOgt1SvfM": {"duration": 55.241, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GwZOgt1SvfM", "annotations": []}, "DLFerdBxdxQ": {"duration": 150.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DLFerdBxdxQ", "annotations": [{"segment": [24.68523856474259, 114.49248743837754], "label": "Preparing salad"}]}, "A6nhjnCvpHg": {"duration": 119.072, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=A6nhjnCvpHg", "annotations": []}, "2Iakg-Z-iXM": {"duration": 148.375, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2Iakg-Z-iXM", "annotations": [{"segment": [13.72820165265274, 148.375509], "label": "Baton twirling"}]}, "lOtplLrtapE": {"duration": 209.47, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lOtplLrtapE", "annotations": [{"segment": [20.584684867394696, 87.89333697347894], "label": "Snatch"}, {"segment": [99.98275507020281, 186.56912792511702], "label": "Snatch"}]}, "CSDApI2nHPU": {"duration": 115.59, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CSDApI2nHPU", "annotations": [{"segment": [0.01, 112.07741341653667], "label": "Washing dishes"}]}, "7aydawW3AfE": {"duration": 18.81, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7aydawW3AfE", "annotations": [{"segment": [2.8021520608424333, 16.035351845553823], "label": "Putting on makeup"}]}, "L-A8RECKhtQ": {"duration": 118.585, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=L-A8RECKhtQ", "annotations": [{"segment": [97.71199242729283, 104.1768678317757], "label": "Cutting the grass"}]}, "YzcgGHmfaKE": {"duration": 162.981, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=YzcgGHmfaKE", "annotations": [{"segment": [7.8791747269890795, 162.721], "label": "Futsal"}]}, "ekt-X2XgFko": {"duration": 140.11, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ekt-X2XgFko", "annotations": []}, "FbmK-7sZ3O4": {"duration": 118.097, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FbmK-7sZ3O4", "annotations": [{"segment": [0.01, 99.36284044477068], "label": "Playing ice hockey"}]}, "CXM1sXEMSWI": {"duration": 95.97, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CXM1sXEMSWI", "annotations": [{"segment": [17.60724896, 46.86116415444617], "label": "Drinking beer"}]}, "AqTZd5HZKNI": {"duration": 204.24, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AqTZd5HZKNI", "annotations": [{"segment": [0.01, 46.19663806552262], "label": "Vacuuming floor"}, {"segment": [50.657003120124806, 60.53352574102965], "label": "Vacuuming floor"}, {"segment": [65.31248829953198, 162.16612948517943], "label": "Vacuuming floor"}, {"segment": [166.6264945397816, 204.02100000000002], "label": "Vacuuming floor"}]}, "ZsVfa3JIUNU": {"duration": 101.588, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZsVfa3JIUNU", "annotations": []}, "oAyygdg493M": {"duration": 107.277, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oAyygdg493M", "annotations": [{"segment": [29.120213951415202, 42.5088180670084], "label": "Surfing"}, {"segment": [65.60415926365054, 94.55701526521061], "label": "Surfing"}]}, "-hEr3ydGyoM": {"duration": 176.472, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=-hEr3ydGyoM", "annotations": [{"segment": [0.01, 15.220416930118455], "label": "Playing drums"}, {"segment": [89.39934216376331, 92.69618328525864], "label": "Playing drums"}, {"segment": [111.65301973385678, 114.68812721053982], "label": "Playing drums"}, {"segment": [133.64711319184826, 140.24154309839028], "label": "Playing drums"}, {"segment": [140.79107892393546, 144.63782970275165], "label": "Playing drums"}]}, "DzdNjXensv0": {"duration": 136.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DzdNjXensv0", "annotations": [{"segment": [0.01, 28.76932137285491], "label": "Polishing shoes"}, {"segment": [32.818336973478935, 134.25683307332295], "label": "Polishing shoes"}]}, "VFVAkH9zPIM": {"duration": 205.335, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VFVAkH9zPIM", "annotations": [{"segment": [35.54787987519501, 77.50078315132606], "label": "Disc dog"}, {"segment": [95.43484867394696, 121.69544461778473], "label": "Disc dog"}, {"segment": [121.69544461778473, 146.03453354134166], "label": "Disc dog"}]}, "I6gc2AjcYOY": {"duration": 44.12, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=I6gc2AjcYOY", "annotations": [{"segment": [0.2, 14.0241248049922], "label": "Hand washing clothes"}, {"segment": [21.24242433697348, 37.122683307332295], "label": "Hand washing clothes"}]}, "qVgsPJeQYMY": {"duration": 128.918, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qVgsPJeQYMY", "annotations": [{"segment": [24.567891895056512, 27.980099060165546], "label": "Drum corps"}, {"segment": [55.077038312502, 102.06090435616323], "label": "Drum corps"}]}, "soHEsBIljbI": {"duration": 17.0, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=soHEsBIljbI", "annotations": [{"segment": [6.599516828048268, 12.371085504060728], "label": "Cleaning shoes"}]}, "LWJzkihZUWA": {"duration": 41.77, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LWJzkihZUWA", "annotations": [{"segment": [6.511856474258971, 19.275095163806554], "label": "Putting on makeup"}]}, "qAMf2UUH1xI": {"duration": 144.707, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qAMf2UUH1xI", "annotations": [{"segment": [7.22403439625585, 8.804291920436818], "label": "Scuba diving"}, {"segment": [13.319313418096723, 19.414592439937596], "label": "Scuba diving"}, {"segment": [24.606867162246488, 27.541631135725424], "label": "Scuba diving"}, {"segment": [34.314163382215284, 40.63519347893915], "label": "Scuba diving"}, {"segment": [58.469528394695786, 70.20858428861153], "label": "Scuba diving"}, {"segment": [75.40085901092043, 79.91588050858033], "label": "Scuba diving"}, {"segment": [100.00772617316692, 107.45751164430577], "label": "Scuba diving"}, {"segment": [116.93905678939157, 130.2583702074883], "label": "Scuba diving"}]}, "HfpCQGe9eA0": {"duration": 42.261, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HfpCQGe9eA0", "annotations": [{"segment": [0.2, 42.06018140589569], "label": "Slacklining"}]}, "glZIkoJTeFY": {"duration": 204.59, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=glZIkoJTeFY", "annotations": []}, "-TuxT19bogQ": {"duration": 41.332, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-TuxT19bogQ", "annotations": [{"segment": [2.64830242094279, 41.331519], "label": "Hurling"}]}, "RsHk6sq_9IY": {"duration": 115.543, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RsHk6sq_9IY", "annotations": [{"segment": [6.489106943869592, 107.07026439313572], "label": "Futsal"}]}, "W6y6Vmk5edg": {"duration": 90.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W6y6Vmk5edg", "annotations": [{"segment": [0, 83.29680293334182], "label": "Using the balance beam"}]}, "nRkGD4lF65U": {"duration": 143.01, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nRkGD4lF65U", "annotations": []}, "eyr5ZLPu4gU": {"duration": 126.67, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=eyr5ZLPu4gU", "annotations": []}, "46YBNutTwKg": {"duration": 43.422, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=46YBNutTwKg", "annotations": [{"segment": [5.617196567862715, 36.42704836193448], "label": "Decorating the Christmas tree"}]}, "Is0_Qv6NLm0": {"duration": 199.199, "subset": "training", "resolution": "842x480", "url": "https://www.youtube.com/watch?v=Is0_Qv6NLm0", "annotations": [{"segment": [107.8606113938973, 124.33104353118279], "label": "Arm wrestling"}]}, "J2GQ8168b2Q": {"duration": 220.01, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=J2GQ8168b2Q", "annotations": []}, "TM_5fb15eXQ": {"duration": 144.382, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TM_5fb15eXQ", "annotations": [{"segment": [0.01, 144.381677], "label": "Waxing skis"}]}, "c7SweMMGQE8": {"duration": 138.86, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c7SweMMGQE8", "annotations": [{"segment": [27.287747269890794, 120.41259906396255], "label": "Grooming horse"}]}, "eF7Rla7UOtQ": {"duration": 28.63, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=eF7Rla7UOtQ", "annotations": []}, "eqWQOAjF4-k": {"duration": 23.662, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eqWQOAjF4-k", "annotations": [{"segment": [1.9124804992199689, 19.67648205928237], "label": "Using the monkey bar"}]}, "yZLwR5EJ7Wc": {"duration": 23.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yZLwR5EJ7Wc", "annotations": [{"segment": [0.234986996099844, 21.853790637285492], "label": "Playing bagpipes"}]}, "m6C4SOxfNGQ": {"duration": 106.51, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=m6C4SOxfNGQ", "annotations": [{"segment": [0.2, 106.262], "label": "Table soccer"}]}, "li16LYa7us8": {"duration": 211.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=li16LYa7us8", "annotations": [{"segment": [14.816028720748829, 196.23006927925118], "label": "Ballet"}]}, "rHMMgJBUWh8": {"duration": 26.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rHMMgJBUWh8", "annotations": [{"segment": [2.2328561575663026, 11.123310950078004], "label": "Hammer throw"}]}, "huFOZ2BiM_k": {"duration": 42.08, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=huFOZ2BiM_k", "annotations": []}, "3i3SUPsyZtA": {"duration": 78.53, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3i3SUPsyZtA", "annotations": []}, "gQLH5G88ClE": {"duration": 177.262, "subset": "training", "resolution": "1920x1026", "url": "https://www.youtube.com/watch?v=gQLH5G88ClE", "annotations": [{"segment": [16.0142457315221, 168.14958018098204], "label": "Decorating the Christmas tree"}]}, "W-poAYW1pb0": {"duration": 135.117, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=W-poAYW1pb0", "annotations": [{"segment": [16.441683884555385, 22.3438268174727], "label": "Running a marathon"}, {"segment": [44.898444453978165, 58.17826605304212], "label": "Running a marathon"}]}, "VFKUKX9ToNI": {"duration": 11.47, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=VFKUKX9ToNI", "annotations": [{"segment": [0.6059178190327613, 11.209479652106083], "label": "Plataform diving"}]}, "fYQTAuBS7zw": {"duration": 157.339, "subset": "training", "resolution": "240x180", "url": "https://www.youtube.com/watch?v=fYQTAuBS7zw", "annotations": [{"segment": [23.098433616554995, 56.17574670066715], "label": "Throwing darts"}, {"segment": [67.44653486266091, 77.24722022091636], "label": "Throwing darts"}, {"segment": [90.96817972247399, 98.31869374116557], "label": "Throwing darts"}, {"segment": [106.89429342963909, 118.16508159163286], "label": "Throwing darts"}, {"segment": [134.82624670066713, 150.99737754178864], "label": "Throwing darts"}]}, "P2HtcvFg3z8": {"duration": 92.49000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P2HtcvFg3z8", "annotations": [{"segment": [0.4325475819032762, 67.63623868954758], "label": "Javelin throw"}]}, "Wq-eDa62BcI": {"duration": 127.85, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Wq-eDa62BcI", "annotations": [{"segment": [50.64189391575663, 127.801], "label": "Removing curlers"}]}, "GSFyEkGCUVo": {"duration": 181.72, "subset": "validation", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=GSFyEkGCUVo", "annotations": [{"segment": [85.02074882995319, 179.13009672386897], "label": "Baton twirling"}]}, "NiinNJg-uyg": {"duration": 117.59, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NiinNJg-uyg", "annotations": [{"segment": [17.793802604368175, 28.800278442121684], "label": "Playing lacrosse"}, {"segment": [63.47067733104524, 68.97391524992199], "label": "Playing lacrosse"}, {"segment": [104.01119666677067, 110.61508216942278], "label": "Playing lacrosse"}]}, "rYrO0KnZ7F0": {"duration": 143.709, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rYrO0KnZ7F0", "annotations": [{"segment": [24.425692667706706, 40.11750234009361], "label": "Carving jack-o-lanterns"}, {"segment": [60.06419344773791, 103.5434976599064], "label": "Carving jack-o-lanterns"}, {"segment": [117.21482527301092, 129.09319188767552], "label": "Carving jack-o-lanterns"}]}, "mHWQrZW6-0M": {"duration": 124.111, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mHWQrZW6-0M", "annotations": [{"segment": [26.71961115179301, 108.62102654602185], "label": "Canoeing"}]}, "e34P8tr-Mi4": {"duration": 64.064, "subset": "training", "resolution": "327x240", "url": "https://www.youtube.com/watch?v=e34P8tr-Mi4", "annotations": [{"segment": [2.0229736204713125, 3.420004682776608], "label": "Plastering"}, {"segment": [5.2161874771691314, 38.84472090551804], "label": "Plastering"}, {"segment": [52.216303930440155, 63.0931886298171], "label": "Plastering"}]}, "YvVVwrLQGq8": {"duration": 164.63, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=YvVVwrLQGq8", "annotations": []}, "t6vWMTMooDc": {"duration": 222.192, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t6vWMTMooDc", "annotations": [{"segment": [42.50323263205673, 96.75533304713204], "label": "Sumo"}, {"segment": [107.81308599797542, 109.88641467625855], "label": "Sumo"}, {"segment": [119.5619485082465, 121.63527718652963], "label": "Sumo"}, {"segment": [154.4629812593459, 193.51067018603948], "label": "Sumo"}]}, "xv6h1JNMX8g": {"duration": 155.249, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xv6h1JNMX8g", "annotations": [{"segment": [30.87071372854914, 146.3823627145086], "label": "Disc dog"}]}, "aa-bFJZmcko": {"duration": 181.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aa-bFJZmcko", "annotations": [{"segment": [56.80744181603999, 136.052662], "label": "Fixing bicycle"}]}, "-p7aui3nAnc": {"duration": 77.509, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-p7aui3nAnc", "annotations": []}, "cUw2aQVdrbE": {"duration": 121.07, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cUw2aQVdrbE", "annotations": []}, "SKtUq_1cOSs": {"duration": 55.96, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=SKtUq_1cOSs", "annotations": [{"segment": [19.80353666146646, 50.68658502340094], "label": "Snatch"}]}, "iKuBIzUHxYc": {"duration": 154.018, "subset": "training", "resolution": "854x456", "url": "https://www.youtube.com/watch?v=iKuBIzUHxYc", "annotations": [{"segment": [49.63518224299065, 70.25656230529594], "label": "Futsal"}, {"segment": [88.71989096573208, 119.66750623052961], "label": "Futsal"}]}, "sx5Q-JsaAAs": {"duration": 164.1, "subset": "training", "resolution": "636x360", "url": "https://www.youtube.com/watch?v=sx5Q-JsaAAs", "annotations": [{"segment": [12.036765990639624, 134.70933853354134], "label": "Ping-pong"}]}, "aWnpbk007cE": {"duration": 82.849, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aWnpbk007cE", "annotations": [{"segment": [0.2, 82.64879818594105], "label": "Kite flying"}]}, "Tb875ldI6AY": {"duration": 154.95, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Tb875ldI6AY", "annotations": [{"segment": [8.217232449297972, 43.2613120124805], "label": "Smoking a cigarette"}, {"segment": [59.454093603744155, 89.42282371294851], "label": "Smoking a cigarette"}, {"segment": [112.3827379095164, 149.36028393135726], "label": "Smoking a cigarette"}]}, "cQr-HSUKbsw": {"duration": 140.295, "subset": "validation", "resolution": "448x298", "url": "https://www.youtube.com/watch?v=cQr-HSUKbsw", "annotations": [{"segment": [12.893151079746538, 114.50866382690147], "label": "Playing ice hockey"}]}, "17ytsjuUCRA": {"duration": 54.75, "subset": "training", "resolution": "480x720", "url": "https://www.youtube.com/watch?v=17ytsjuUCRA", "annotations": [{"segment": [0.01, 54.496399338533536], "label": "Bathing dog"}]}, "-M-Dr6HqDhU": {"duration": 53.685, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-M-Dr6HqDhU", "annotations": [{"segment": [30.22895709828393, 38.591047581903275], "label": "Painting furniture"}]}, "iwMXYbYyJy4": {"duration": 32.601, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iwMXYbYyJy4", "annotations": [{"segment": [0.01, 32.541], "label": "Swinging at the playground"}]}, "OKTxSQMS560": {"duration": 207.703, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OKTxSQMS560", "annotations": [{"segment": [88.13584782847097, 124.10305043494259], "label": "Grooming dog"}, {"segment": [150.34938747209753, 172.05931947425898], "label": "Grooming dog"}]}, "Jd0hvO7erXM": {"duration": 211.534, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Jd0hvO7erXM", "annotations": [{"segment": [18.14751170046802, 150.4593697347894], "label": "Arm wrestling"}, {"segment": [162.00778627145087, 196.32308112324495], "label": "Arm wrestling"}]}, "jCw0kBph-v4": {"duration": 187.362, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jCw0kBph-v4", "annotations": [{"segment": [74.3036698695484, 170.46136604288554], "label": "Hanging wallpaper"}]}, "KJpMYfMXJnQ": {"duration": 140.34, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KJpMYfMXJnQ", "annotations": []}, "fvEaC-fTR4A": {"duration": 223.144, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fvEaC-fTR4A", "annotations": []}, "c7fu7RcM2iE": {"duration": 179.12, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=c7fu7RcM2iE", "annotations": [{"segment": [0, 178.91875283446714], "label": "Long jump"}]}, "JW0VZ5NoC8A": {"duration": 124.088, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=JW0VZ5NoC8A", "annotations": [{"segment": [0.2, 112.44842589703589], "label": "Playing pool"}]}, "metrvLaYO1w": {"duration": 27.19, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=metrvLaYO1w", "annotations": [{"segment": [1.5665834633385336, 24.38789391575663], "label": "Doing karate"}]}, "L4ZhAK04yjg": {"duration": 128.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=L4ZhAK04yjg", "annotations": [{"segment": [0.01, 128.101], "label": "Cumbia"}]}, "iF9jetQ7OTE": {"duration": 137.57999999999998, "subset": "training", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=iF9jetQ7OTE", "annotations": [{"segment": [15.882666177847113, 45.31045241809672], "label": "Smoking hookah"}, {"segment": [101.09102391575662, 134.1441400156006], "label": "Smoking hookah"}]}, "jUegk6fzp7o": {"duration": 30.117, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jUegk6fzp7o", "annotations": []}, "NiQaDgj8Z10": {"duration": 133.469, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NiQaDgj8Z10", "annotations": [{"segment": [0.2, 43.50590951638065], "label": "Hula hoop"}, {"segment": [90.96690171606863, 117.4035070202808], "label": "Hula hoop"}]}, "YUN8d87DNNY": {"duration": 176.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YUN8d87DNNY", "annotations": [{"segment": [76.09943431357254, 176.100135], "label": "Grooming horse"}]}, "IJER0EpbxW4": {"duration": 25.659, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IJER0EpbxW4", "annotations": [{"segment": [0.01, 0.02], "label": "Running a marathon"}]}, "2bkQz8BcKZ4": {"duration": 118.399, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2bkQz8BcKZ4", "annotations": [{"segment": [0.01, 118.261], "label": "Baton twirling"}]}, "2H-m_3f_3qM": {"duration": 12.307, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=2H-m_3f_3qM", "annotations": []}, "yyvc2OSPx1Y": {"duration": 181.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yyvc2OSPx1Y", "annotations": [{"segment": [103.07292870762451, 181.5102947845805], "label": "Getting a piercing"}]}, "jIb_BfiRsQ4": {"duration": 140.969, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jIb_BfiRsQ4", "annotations": [{"segment": [5.057165366614665, 135.00432761310452], "label": "Making an omelette"}]}, "xMuC8lmVX3A": {"duration": 224.03, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=xMuC8lmVX3A", "annotations": [{"segment": [33.020206659126366, 213.3210176232449], "label": "Wrapping presents"}]}, "dUvZ6-IVo34": {"duration": 80.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dUvZ6-IVo34", "annotations": [{"segment": [22.72614976599064, 58.07857254290172], "label": "Playing flauta"}]}, "4ZoBfU4b5Ko": {"duration": 62.857, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4ZoBfU4b5Ko", "annotations": [{"segment": [12.740608424336973, 62.801], "label": "Blow-drying hair"}]}, "DJCtIWibRPM": {"duration": 130.381, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DJCtIWibRPM", "annotations": []}, "sra0ywgWITo": {"duration": 187.293, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sra0ywgWITo", "annotations": [{"segment": [47.89541965678627, 176.10328081123245], "label": "Arm wrestling"}]}, "VbP9fOp-Umg": {"duration": 191.31, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=VbP9fOp-Umg", "annotations": [{"segment": [9.252083302379715, 183.2509402471337], "label": "Baton twirling"}]}, "4r6fQ5RvuGE": {"duration": 96.781, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4r6fQ5RvuGE", "annotations": [{"segment": [0.01, 96.741], "label": "Surfing"}]}, "lZl-u5UYu_Y": {"duration": 172.5, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=lZl-u5UYu_Y", "annotations": []}, "9eniCub7u60": {"duration": 19.48, "subset": "testing", "resolution": "270x360", "url": "https://www.youtube.com/watch?v=9eniCub7u60", "annotations": []}, "IqRN2sOQ7Mo": {"duration": 187.711, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=IqRN2sOQ7Mo", "annotations": [{"segment": [55.917708268330735, 155.16432137285491], "label": "Kite flying"}]}, "j4iaeT5xIdw": {"duration": 192.262, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j4iaeT5xIdw", "annotations": [{"segment": [3.899210382020723, 182.9629470202808], "label": "Futsal"}]}, "YtsAKuSraLY": {"duration": 148.1, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YtsAKuSraLY", "annotations": []}, "jmSrbVNKF6U": {"duration": 81.34, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jmSrbVNKF6U", "annotations": [{"segment": [2.0303151326053044, 79.56297425897036], "label": "Ballet"}]}, "nzqlP05Lkz8": {"duration": 159.452, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nzqlP05Lkz8", "annotations": []}, "cTbkhIaj_Xc": {"duration": 144.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cTbkhIaj_Xc", "annotations": [{"segment": [0.01, 45.28869220074925], "label": "Playing kickball"}, {"segment": [59.708972304470414, 70.07354862902], "label": "Playing kickball"}, {"segment": [76.83305492763928, 84.26851185612051], "label": "Playing kickball"}]}, "5E42nKwv1xY": {"duration": 187.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5E42nKwv1xY", "annotations": [{"segment": [43.75230919422777, 186.56921317669034], "label": "Volleyball"}]}, "WcBB6DfMTWA": {"duration": 205.82, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WcBB6DfMTWA", "annotations": [{"segment": [18.462943025351017, 198.27595335920438], "label": "Tai chi"}]}, "X0UmqVLOAK0": {"duration": 12.1, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X0UmqVLOAK0", "annotations": [{"segment": [4.902809672386896, 7.344822152886116], "label": "Discus throw"}]}, "ZeBrPKBGb_k": {"duration": 180.164, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=ZeBrPKBGb_k", "annotations": [{"segment": [10.678, 172.2721263650546], "label": "BMX"}]}, "JavaLreBqtI": {"duration": 91.65, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=JavaLreBqtI", "annotations": [{"segment": [23.162507775351013, 37.46035208112324], "label": "Removing curlers"}, {"segment": [46.61097243681748, 55.332657463338535], "label": "Removing curlers"}]}, "gvr1dpCpvhw": {"duration": 218.338, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gvr1dpCpvhw", "annotations": [{"segment": [38.14940742391601, 165.8817983522062], "label": "Making a cake"}]}, "QrKWO2uNNJ0": {"duration": 194.21, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=QrKWO2uNNJ0", "annotations": [{"segment": [49.992091438759594, 114.52733675061289], "label": "Removing curlers"}]}, "xUDwlT5fw_o": {"duration": 75.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xUDwlT5fw_o", "annotations": [{"segment": [5.978457098283932, 64.82523088923558], "label": "Mixing drinks"}]}, "MiTTWddOuaM": {"duration": 100.053, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MiTTWddOuaM", "annotations": [{"segment": [15.919160686427455, 39.641831513260534], "label": "Table soccer"}, {"segment": [48.38176287051482, 62.58415132605304], "label": "Table soccer"}, {"segment": [85.9946817472699, 87.24324336973478], "label": "Table soccer"}]}, "tz3zHV1Z5po": {"duration": 221.77, "subset": "validation", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=tz3zHV1Z5po", "annotations": [{"segment": [10.379428401625862, 184.7538240093604], "label": "Playing water polo"}, {"segment": [198.93904270670825, 212.7782804602184], "label": "Playing polo"}]}, "DgPLWO7R21A": {"duration": 66.02, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DgPLWO7R21A", "annotations": []}, "9FmnQcHnacg": {"duration": 89.26, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9FmnQcHnacg", "annotations": []}, "QY-2lN9zwho": {"duration": 140.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QY-2lN9zwho", "annotations": [{"segment": [0.01, 140.6897052154195], "label": "Brushing teeth"}]}, "X_9NiZmR2tQ": {"duration": 45.19, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=X_9NiZmR2tQ", "annotations": [{"segment": [1.0573954352574104, 41.802366207176284], "label": "Playing badminton"}]}, "50nJ8UkOGwg": {"duration": 160.4, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=50nJ8UkOGwg", "annotations": [{"segment": [0.01, 153.65375975039], "label": "Sailing"}]}, "YnjITZnk6OE": {"duration": 32.53, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=YnjITZnk6OE", "annotations": []}, "RTD_JWmhNkA": {"duration": 121.67, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RTD_JWmhNkA", "annotations": [{"segment": [0.01, 119.11523868954758], "label": "Playing guitarra"}]}, "fGgtDMSQk4Y": {"duration": 191.751, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fGgtDMSQk4Y", "annotations": []}, "MJ9dlbY4gX0": {"duration": 176.936, "subset": "testing", "resolution": "512x288", "url": "https://www.youtube.com/watch?v=MJ9dlbY4gX0", "annotations": []}, "1Q_042dyY_A": {"duration": 207.587, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1Q_042dyY_A", "annotations": []}, "-kAlrG4jS9M": {"duration": 146.611, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-kAlrG4jS9M", "annotations": [{"segment": [0.01, 146.61077097505668], "label": "Bullfighting"}]}, "dY2iZq5T0zo": {"duration": 130.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dY2iZq5T0zo", "annotations": [{"segment": [0.7122763892355694, 16.585864492199686], "label": "Playing polo"}, {"segment": [86.59245817706709, 97.58186532527301], "label": "Playing polo"}]}, "6UUkWJvHJzg": {"duration": 154.018, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6UUkWJvHJzg", "annotations": []}, "X7ynQ_gIWZc": {"duration": 46.905, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X7ynQ_gIWZc", "annotations": []}, "EWWCQH6WbtQ": {"duration": 85.032, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EWWCQH6WbtQ", "annotations": [{"segment": [58.582893262095716, 60.96323663690131], "label": "Having an ice cream"}]}, "hDb19ih3jAA": {"duration": 36.409, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hDb19ih3jAA", "annotations": [{"segment": [0.01, 0.02], "label": "Rollerblading"}]}, "-ML_fQxRrFg": {"duration": 209.32999999999998, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-ML_fQxRrFg", "annotations": []}, "ERVK6vQbor4": {"duration": 164.21, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ERVK6vQbor4", "annotations": [{"segment": [13.065191081112632, 158.31937304836194], "label": "Doing kickboxing"}]}, "LihSTWbJZJo": {"duration": 115.868, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LihSTWbJZJo", "annotations": [{"segment": [6.417002911408973, 55.40313781258733], "label": "Blowing leaves"}, {"segment": [61.5489997559086, 101.67786242199688], "label": "Blowing leaves"}]}, "El_q7DhzArg": {"duration": 4.134, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=El_q7DhzArg", "annotations": [{"segment": [0.2, 3.8810000000000002], "label": "Baton twirling"}]}, "qRuPuzY6yiE": {"duration": 84.01, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=qRuPuzY6yiE", "annotations": [{"segment": [6.809566302652106, 71.64846645865835], "label": "Tug of war"}]}, "uuH0ieCzqkM": {"duration": 36.525, "subset": "training", "resolution": "1272x720", "url": "https://www.youtube.com/watch?v=uuH0ieCzqkM", "annotations": [{"segment": [4.325360374414977, 34.83053354134166], "label": "Rope skipping"}]}, "XqW8wjFpGRU": {"duration": 75.02, "subset": "training", "resolution": "324x240", "url": "https://www.youtube.com/watch?v=XqW8wjFpGRU", "annotations": [{"segment": [2.7487545257410293, 32.797804909999996], "label": "Cumbia"}, {"segment": [36.85955013, 62.2915969], "label": "Cumbia"}]}, "-fuvIQpA-QQ": {"duration": 198.113, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=-fuvIQpA-QQ", "annotations": [{"segment": [49.75996407488299, 198.11265306122448], "label": "Making a lemonade"}]}, "KkpQ347Ceak": {"duration": 51.99, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KkpQ347Ceak", "annotations": [{"segment": [1.1028510140405616, 51.017070982839314], "label": "Preparing salad"}]}, "NDK0XQnsnmA": {"duration": 129.963, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NDK0XQnsnmA", "annotations": [{"segment": [0.2, 119.65404825548026], "label": "Riding bumper cars"}]}, "mf6UsZuW9Nw": {"duration": 63.414, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=mf6UsZuW9Nw", "annotations": [{"segment": [8.112204499064315, 44.91391190327613], "label": "Applying sunscreen"}]}, "8SMXK2uOvk0": {"duration": 42.93, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=8SMXK2uOvk0", "annotations": [{"segment": [17.32633229329173, 31.45627145085804], "label": "Starting a campfire"}]}, "aCSG6pfivy4": {"duration": 160.613, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aCSG6pfivy4", "annotations": [{"segment": [20.038814352574104, 142.54382059282372], "label": "Baking cookies"}]}, "4oi6dXfpJS4": {"duration": 94.18, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4oi6dXfpJS4", "annotations": []}, "XIMi2oydVB8": {"duration": 81.06, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XIMi2oydVB8", "annotations": [{"segment": [12.0649704, 22.004021541341658], "label": "Pole vault"}]}, "OcL7ZhBCkxQ": {"duration": 153.113, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OcL7ZhBCkxQ", "annotations": []}, "Q78FBGHniCc": {"duration": 187.64, "subset": "validation", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=Q78FBGHniCc", "annotations": [{"segment": [0.19999999999999998, 187.581], "label": "Blowing leaves"}]}, "I9HV59QQkNw": {"duration": 6.39, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I9HV59QQkNw", "annotations": [{"segment": [0.01, 4.054191155401318], "label": "Drinking beer"}]}, "crxqFzslD0Y": {"duration": 180.75, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=crxqFzslD0Y", "annotations": [{"segment": [15.363388455538221, 150.67396567862716], "label": "Playing harmonica"}]}, "ZqyKUd2Vjzc": {"duration": 82.71, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZqyKUd2Vjzc", "annotations": []}, "p2n_FtzA1gk": {"duration": 87.45, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=p2n_FtzA1gk", "annotations": [{"segment": [12.550801897035882, 83.62653872698908], "label": "Shaving legs"}]}, "UbVTGLlR0L8": {"duration": 181.928, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UbVTGLlR0L8", "annotations": [{"segment": [7.661134165366615, 170.5499235569423], "label": "Rafting"}]}, "4LpqPUWHH5k": {"duration": 155.64, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=4LpqPUWHH5k", "annotations": []}, "vfuRgSowX74": {"duration": 27.888, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vfuRgSowX74", "annotations": []}, "hFpSoRlpyMo": {"duration": 89.75, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=hFpSoRlpyMo", "annotations": []}, "z477-uRyDag": {"duration": 211.558, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=z477-uRyDag", "annotations": []}, "pgUgZMOpwDU": {"duration": 104.769, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pgUgZMOpwDU", "annotations": []}, "ZJGXWbt6cbU": {"duration": 102.05, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZJGXWbt6cbU", "annotations": [{"segment": [0.01, 102.05156462585035], "label": "Zumba"}]}, "c-8GvZKndyQ": {"duration": 91.77, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=c-8GvZKndyQ", "annotations": [{"segment": [4.060397801092043, 44.23696551716069], "label": "Using the balance beam"}, {"segment": [44.43551868, 87.97528569032762], "label": "Using the balance beam"}]}, "iId8WcbiKZI": {"duration": 104.839, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=iId8WcbiKZI", "annotations": [{"segment": [10.134823712948519, 104.581], "label": "Waterskiing"}]}, "UMhZGJqeSuU": {"duration": 72.749, "subset": "validation", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=UMhZGJqeSuU", "annotations": [{"segment": [24.044636505460215, 72.24732761310452], "label": "Hurling"}, {"segment": [26.99350702028081, 60.11159126365054], "label": "Hurling"}]}, "YamDoDK71Ds": {"duration": 211.81, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=YamDoDK71Ds", "annotations": [{"segment": [0.01, 162.90712990968618], "label": "Belly dance"}]}, "hGUUlFxygmw": {"duration": 229.48, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hGUUlFxygmw", "annotations": [{"segment": [54.04928393135725, 59.418418096723876], "label": "Doing nails"}, {"segment": [66.57726365054602, 81.61083931357254], "label": "Doing nails"}]}, "bNVitmKpOzA": {"duration": 138.275, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=bNVitmKpOzA", "annotations": []}, "GOE5in6FJNA": {"duration": 106.627, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GOE5in6FJNA", "annotations": []}, "lI6h3H4Zs98": {"duration": 91.464, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lI6h3H4Zs98", "annotations": [{"segment": [10.273580143525741, 15.553058828393137], "label": "Rope skipping"}, {"segment": [24.39975284087364, 28.109656781591266], "label": "Rope skipping"}, {"segment": [31.819560722308893, 46.37379925897036], "label": "Rope skipping"}, {"segment": [53.936295753510144, 64.49525312324494], "label": "Rope skipping"}, {"segment": [70.20279764742591, 75.33958771918877], "label": "Rope skipping"}, {"segment": [83.04477282683308, 85.6131678627145], "label": "Rope skipping"}]}, "oxcUr-Nlldo": {"duration": 200.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oxcUr-Nlldo", "annotations": [{"segment": [7.984693983677714, 135.73979772252116], "label": "Skateboarding"}]}, "FUvUDCZxAO8": {"duration": 139.95, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=FUvUDCZxAO8", "annotations": [{"segment": [34.49527212653132, 41.918305368949454], "label": "Archery"}, {"segment": [47.37641804719808, 58.07431889656539], "label": "Archery"}, {"segment": [94.31618708013627, 119.20518089295001], "label": "Archery"}]}, "_-M4xNBhSEI": {"duration": 6.43, "subset": "training", "resolution": "256x144", "url": "https://www.youtube.com/watch?v=_-M4xNBhSEI", "annotations": [{"segment": [0.9744251199999999, 5.63323445], "label": "Dodgeball"}]}, "XbvlEwmvLko": {"duration": 72.354, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XbvlEwmvLko", "annotations": [{"segment": [10.271696408736352, 11.85195739469579], "label": "Calf roping"}, {"segment": [16.028361429017163, 23.929666358814355], "label": "Calf roping"}, {"segment": [27.654567254290175, 42.1026676973479], "label": "Calf roping"}, {"segment": [49.21384213416537, 60.16279325117005], "label": "Calf roping"}]}, "D7WhCBcddSA": {"duration": 31.65, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=D7WhCBcddSA", "annotations": [{"segment": [4.196798354134165, 28.883847496099843], "label": "Playing racquetball"}]}, "NDyc4PZE954": {"duration": 186.09, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NDyc4PZE954", "annotations": [{"segment": [30.254751950078003, 39.56390639625585], "label": "Polishing shoes"}, {"segment": [61.09132605304212, 63.12770358814352], "label": "Polishing shoes"}, {"segment": [79.70963494539781, 128.5826957878315], "label": "Polishing shoes"}]}, "ot0n6mfurHo": {"duration": 85.68, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ot0n6mfurHo", "annotations": []}, "kGdxtU3NwWo": {"duration": 26.73, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kGdxtU3NwWo", "annotations": []}, "VHhyCKGmFqo": {"duration": 88.14, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VHhyCKGmFqo", "annotations": []}, "d-cxJQcmBzA": {"duration": 63.39, "subset": "training", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=d-cxJQcmBzA", "annotations": [{"segment": [0.9155804210748999, 62.23253521079452], "label": "Vacuuming floor"}]}, "RVHx_Otzcl0": {"duration": 222.89, "subset": "validation", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=RVHx_Otzcl0", "annotations": [{"segment": [0.6954394477379096, 219.41114740644048], "label": "Ballet"}]}, "zSWuVb-aYAA": {"duration": 164.351, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zSWuVb-aYAA", "annotations": [{"segment": [13.83790015600624, 33.56972074882995], "label": "Disc dog"}, {"segment": [75.08342121684868, 141.72760218408737], "label": "Disc dog"}]}, "3O2acf8oRVA": {"duration": 15.929, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3O2acf8oRVA", "annotations": [{"segment": [0.2, 15.72888888888889], "label": "Kneeling"}]}, "MPolPBO-K-Y": {"duration": 170.51, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MPolPBO-K-Y", "annotations": [{"segment": [3.190015600624025, 158.96911076443058], "label": "Playing guitarra"}]}, "tL90IPP3hbA": {"duration": 92.602, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tL90IPP3hbA", "annotations": [{"segment": [29.556082554517133, 73.68986137071651], "label": "Spread mulch"}]}, "iiQ6t0p9lik": {"duration": 224.723, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iiQ6t0p9lik", "annotations": [{"segment": [8.063363190309927, 170.3823700213315], "label": "Slacklining"}, {"segment": [174.93992211388456, 210.69918475039], "label": "Slacklining"}]}, "e2QVdX-JdIg": {"duration": 102.56, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=e2QVdX-JdIg", "annotations": [{"segment": [4.303501519999999, 76.53978159126365], "label": "Discus throw"}]}, "8OSnT8UvJkU": {"duration": 161.89, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8OSnT8UvJkU", "annotations": [{"segment": [21.467394329173167, 139.15922676911077], "label": "Getting a haircut"}]}, "56m7PcBuUFc": {"duration": 36.27, "subset": "training", "resolution": "293x240", "url": "https://www.youtube.com/watch?v=56m7PcBuUFc", "annotations": [{"segment": [0.010000000000000002, 36.221], "label": "Swinging at the playground"}]}, "aqQ7-J9kbUE": {"duration": 34.2, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aqQ7-J9kbUE", "annotations": [{"segment": [0.2809082125386746, 26.652374006931197], "label": "Starting a campfire"}]}, "cjS2B52jBbU": {"duration": 75.047, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=cjS2B52jBbU", "annotations": [{"segment": [0.2, 5.872434094045763], "label": "Throwing darts"}]}, "3j-CWo_hYBo": {"duration": 231.178, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3j-CWo_hYBo", "annotations": []}, "35DlDj_hzvg": {"duration": 10.73, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=35DlDj_hzvg", "annotations": [{"segment": [1.106638845553822, 10.026314352574103], "label": "Clean and jerk"}]}, "5dXi-tAGqbs": {"duration": 236.94, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=5dXi-tAGqbs", "annotations": [{"segment": [10.34979664274571, 228.4347973291732], "label": "Zumba"}]}, "9u-wH-UBuRc": {"duration": 72.098, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9u-wH-UBuRc", "annotations": [{"segment": [26.41128705148206, 55.744673946957874], "label": "Arm wrestling"}]}, "JTrwGfPJNzU": {"duration": 11.309, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=JTrwGfPJNzU", "annotations": [{"segment": [1.0761233634945397, 10.373123569422777], "label": "Fun sliding down"}]}, "8_fjIZkNHms": {"duration": 30.395, "subset": "testing", "resolution": "192x108", "url": "https://www.youtube.com/watch?v=8_fjIZkNHms", "annotations": []}, "IYB8S2pqVCw": {"duration": 103.38, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IYB8S2pqVCw", "annotations": []}, "BfTjydFBQJU": {"duration": 90.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BfTjydFBQJU", "annotations": [{"segment": [0.4952472698907956, 88.9518697347894], "label": "Doing step aerobics"}]}, "lUil9HfTzJ0": {"duration": 36.409, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lUil9HfTzJ0", "annotations": []}, "bdKB3QYNFbc": {"duration": 232.154, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bdKB3QYNFbc", "annotations": [{"segment": [10.500669266770672, 153.1649344773791], "label": "BMX"}, {"segment": [156.78585491419656, 183.958608424337], "label": "BMX"}]}, "qtfFFPKya3Y": {"duration": 210.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qtfFFPKya3Y", "annotations": [{"segment": [83.97469013, 197.9981120101404], "label": "Cricket"}, {"segment": [7.369656774570982, 70.21661162999999], "label": "Cricket"}]}, "2vixXUJiNhk": {"duration": 223.005, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2vixXUJiNhk", "annotations": [{"segment": [0.01, 188.5424368174727], "label": "Scuba diving"}]}, "5-SbGVTPM1c": {"duration": 179.75, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=5-SbGVTPM1c", "annotations": [{"segment": [10.79311360452418, 178.71714085413416], "label": "High jump"}]}, "5ptxyeHlcwM": {"duration": 91.069, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5ptxyeHlcwM", "annotations": [{"segment": [31.966379063325796, 40.064528426035004], "label": "Playing ten pins"}]}, "Ca5GNndwnHw": {"duration": 144.31, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ca5GNndwnHw", "annotations": []}, "cmzEDurFcwI": {"duration": 119.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cmzEDurFcwI", "annotations": [{"segment": [33.674781530030366, 63.44254671801328], "label": "Table soccer"}, {"segment": [65.675129107112, 93.95450497605474], "label": "Table soccer"}]}, "XKwodc1MriQ": {"duration": 233.524, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XKwodc1MriQ", "annotations": []}, "2-SPZIF5lPY": {"duration": 139.622, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2-SPZIF5lPY", "annotations": [{"segment": [53.13985023400936, 56.84221684867395], "label": "Doing fencing"}, {"segment": [103.2306926677067, 117.16901404056162], "label": "Doing fencing"}]}, "RHsU8F1G0iM": {"duration": 202.548, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RHsU8F1G0iM", "annotations": []}, "unFlcSwdDFc": {"duration": 117.028, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=unFlcSwdDFc", "annotations": [{"segment": [6.207443726320482, 116.82857142857142], "label": "Futsal"}]}, "jHXqbgeq83Y": {"duration": 25.17, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jHXqbgeq83Y", "annotations": [{"segment": [0.01, 25.121], "label": "Drinking beer"}]}, "H2-rEH9TXR4": {"duration": 10.055, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=H2-rEH9TXR4", "annotations": [{"segment": [3.9369953026521065, 8.219065890795632], "label": "Throwing darts"}]}, "dtT0BzjTStw": {"duration": 88.96000000000001, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dtT0BzjTStw", "annotations": [{"segment": [25.73379195, 57.708480499219974], "label": "Layup drill in basketball"}, {"segment": [62.56376131045243, 80.73638377535103], "label": "Layup drill in basketball"}]}, "WkJfGQ1C0oE": {"duration": 165.025, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=WkJfGQ1C0oE", "annotations": []}, "mxn6WcmdNpA": {"duration": 32.044, "subset": "testing", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=mxn6WcmdNpA", "annotations": []}, "f1kY1-9XR1k": {"duration": 152.347, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=f1kY1-9XR1k", "annotations": [{"segment": [17.112183132223247, 69.8747477899116], "label": "Braiding hair"}]}, "udpVICVTQrQ": {"duration": 29.26, "subset": "validation", "resolution": "854x478", "url": "https://www.youtube.com/watch?v=udpVICVTQrQ", "annotations": [{"segment": [13.144899869464167, 17.480891078783152], "label": "Plataform diving"}]}, "0ys4fnsu9Jc": {"duration": 64.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0ys4fnsu9Jc", "annotations": [{"segment": [3.3651352000000005, 55.61912188923557], "label": "Dodgeball"}]}, "17zMW3rzlR4": {"duration": 83.267, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=17zMW3rzlR4", "annotations": [{"segment": [10.90833697347894, 76.2284976599064], "label": "Waxing skis"}]}, "3GgQdXjJfEU": {"duration": 7.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3GgQdXjJfEU", "annotations": [{"segment": [0.20159438377535102, 6.697413416536662], "label": "Javelin throw"}]}, "uyr3E9ZReAw": {"duration": 145.43, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uyr3E9ZReAw", "annotations": [{"segment": [24.729262454851938, 71.01155181989593], "label": "Cricket"}, {"segment": [77.36402290921568, 108.89950438833881], "label": "Cricket"}, {"segment": [120.01632784399375, 135.67063147581902], "label": "Cricket"}]}, "8E8bytYxwAA": {"duration": 29.03, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8E8bytYxwAA", "annotations": [{"segment": [5.606923770000001, 23.52333442901716], "label": "Using uneven bars"}]}, "XnBUb3qoFQM": {"duration": 188.732, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XnBUb3qoFQM", "annotations": [{"segment": [63.87269652305367, 66.51846929945395], "label": "Swimming"}, {"segment": [118.84597532159282, 121.19777334505974], "label": "Swimming"}, {"segment": [132.66278870946095, 139.71818175586367], "label": "Swimming"}]}, "UR1e1MIRvvc": {"duration": 69.45, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=UR1e1MIRvvc", "annotations": [{"segment": [0.01, 68.85420670826832], "label": "Playing bagpipes"}]}, "pZ8QqpYu-H8": {"duration": 187.83, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pZ8QqpYu-H8", "annotations": [{"segment": [0.20000000000000004, 187.62573696145125], "label": "Playing piano"}]}, "2YE_8XDuDBI": {"duration": 135.675, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2YE_8XDuDBI", "annotations": [{"segment": [6.982048361934478, 135.42100000000002], "label": "Playing drums"}]}, "0WMReYXyDXs": {"duration": 84.242, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=0WMReYXyDXs", "annotations": []}, "blcUvpHClnM": {"duration": 127.71, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=blcUvpHClnM", "annotations": []}, "ZsqjU0X3mxY": {"duration": 229.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZsqjU0X3mxY", "annotations": [{"segment": [58.19857644305772, 97.59453588143526], "label": "Getting a haircut"}, {"segment": [139.85565600624025, 142.36267160686427], "label": "Getting a haircut"}, {"segment": [160.98621606864273, 164.9258120124805], "label": "Getting a haircut"}]}, "ZncidS9kQ-g": {"duration": 57.8, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZncidS9kQ-g", "annotations": [{"segment": [0.01, 55.78425585023401], "label": "Playing saxophone"}]}, "HX_RdK-9fxo": {"duration": 187.037, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HX_RdK-9fxo", "annotations": []}, "sUrqd6Qn8Qw": {"duration": 44.235, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sUrqd6Qn8Qw", "annotations": [{"segment": [0, 44.141], "label": "Using the rowing machine"}]}, "S7ktlGGweeA": {"duration": 122.28, "subset": "training", "resolution": "622x480", "url": "https://www.youtube.com/watch?v=S7ktlGGweeA", "annotations": [{"segment": [0.01, 109.5906899553042], "label": "Volleyball"}]}, "j50CSWkWAHU": {"duration": 19.9216666666667, "subset": "testing", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=j50CSWkWAHU", "annotations": []}, "fIkFGf40v8U": {"duration": 41.634, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fIkFGf40v8U", "annotations": []}, "D0aZaiBAHxg": {"duration": 176.96, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D0aZaiBAHxg", "annotations": [{"segment": [0.01, 176.958820861678], "label": "Trimming branches or hedges"}]}, "WsXxnCQ_MIc": {"duration": 213.856, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WsXxnCQ_MIc", "annotations": [{"segment": [8.668059282371296, 186.04729797191888], "label": "Blow-drying hair"}]}, "fcJItKSMIhQ": {"duration": 26.378, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fcJItKSMIhQ", "annotations": []}, "Uqs8NaPzHKU": {"duration": 226.743, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Uqs8NaPzHKU", "annotations": [{"segment": [13.867860907617017, 192.56195974649447], "label": "Making a cake"}]}, "VDYSVR0HbpM": {"duration": 213.53, "subset": "validation", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=VDYSVR0HbpM", "annotations": [{"segment": [21.319758172639833, 175.2217608829953], "label": "Ballet"}]}, "4Sfu2RyEaYg": {"duration": 34.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4Sfu2RyEaYg", "annotations": []}, "KeTYxQkYfQw": {"duration": 170.46, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KeTYxQkYfQw", "annotations": [{"segment": [7.445888616497041, 170.457663], "label": "Playing saxophone"}]}, "n2wq_9TeNYM": {"duration": 41.169, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=n2wq_9TeNYM", "annotations": [{"segment": [1.862559139100258, 40.26981155382216], "label": "BMX"}]}, "epZjMuRRXLo": {"duration": 155.249, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=epZjMuRRXLo", "annotations": [{"segment": [8.961308612888734, 130.30227118200375], "label": "Rope skipping"}]}, "OXTQsO5abO4": {"duration": 41.38, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OXTQsO5abO4", "annotations": [{"segment": [0.3873131794071763, 2.323879076443058], "label": "Ironing clothes"}, {"segment": [10.651112433697348, 28.27386209672387], "label": "Ironing clothes"}]}, "IHPb-JPIhg8": {"duration": 48.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IHPb-JPIhg8", "annotations": [{"segment": [5.926608099843993, 30.908386546021845], "label": "Using the pommel horse"}]}, "A8H0FIuBJUs": {"duration": 33.23, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A8H0FIuBJUs", "annotations": []}, "yPpKYckLbdI": {"duration": 153.67000000000002, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yPpKYckLbdI", "annotations": [{"segment": [45.24322698, 131.45317285647425], "label": "Tumbling"}, {"segment": [9.09105804, 44.456815979719195], "label": "Tumbling"}]}, "YynqCEoHKCk": {"duration": 189.41, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YynqCEoHKCk", "annotations": [{"segment": [13.295124758190326, 59.08944336973479], "label": "Belly dance"}, {"segment": [60.91075395, 174.01841072386895], "label": "Belly dance"}]}, "CE0LilaJEI8": {"duration": 154.785, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CE0LilaJEI8", "annotations": []}, "jG1kgKJkf8c": {"duration": 191.449, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jG1kgKJkf8c", "annotations": [{"segment": [0.01, 191.448526], "label": "Playing drums"}]}, "uqwf903l5-E": {"duration": 171.16, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=uqwf903l5-E", "annotations": []}, "DqYA22vBP_Q": {"duration": 36.67, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DqYA22vBP_Q", "annotations": []}, "_QBTD1bLSI0": {"duration": 73.33, "subset": "testing", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=_QBTD1bLSI0", "annotations": []}, "wEZhiIqOCSk": {"duration": 133.028, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wEZhiIqOCSk", "annotations": []}, "MHw7mYxeSY0": {"duration": 185.389, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MHw7mYxeSY0", "annotations": []}, "MSfIKwQhLFk": {"duration": 103.701, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MSfIKwQhLFk", "annotations": [{"segment": [9.585513330346698, 103.401], "label": "Swimming"}]}, "HNR_HofJ_Fs": {"duration": 122.05, "subset": "validation", "resolution": "638x360", "url": "https://www.youtube.com/watch?v=HNR_HofJ_Fs", "annotations": [{"segment": [23.03793668481433, 93.10372639001561], "label": "Volleyball"}]}, "CQFSupyrQK4": {"duration": 83.685, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CQFSupyrQK4", "annotations": []}, "jCyrQzXK4eI": {"duration": 53.221, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=jCyrQzXK4eI", "annotations": [{"segment": [3.677938444155399, 34.824443117052596], "label": "Painting fence"}, {"segment": [42.279723490884365, 47.82976554695914], "label": "Painting fence"}]}, "9VflCeoPr9I": {"duration": 138.37, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9VflCeoPr9I", "annotations": []}, "y80Jbcb5GWA": {"duration": 56.889, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y80Jbcb5GWA", "annotations": [{"segment": [0.2, 56.641999999999996], "label": "Swinging at the playground"}]}, "SJiNKbnFs_o": {"duration": 236.634, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SJiNKbnFs_o", "annotations": []}, "qPs7sAYUvDw": {"duration": 84.08, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qPs7sAYUvDw", "annotations": []}, "61wzTjdnXe4": {"duration": 183.694, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=61wzTjdnXe4", "annotations": [{"segment": [10.027199687987519, 14.038079563182526], "label": "Scuba diving"}, {"segment": [23.492296411856472, 40.68178159126365], "label": "Scuba diving"}, {"segment": [48.99003276131045, 52.1414383775351], "label": "Scuba diving"}, {"segment": [91.10427145085802, 103.70989391575662], "label": "Scuba diving"}, {"segment": [107.43428237129484, 111.44516224648984], "label": "Scuba diving"}, {"segment": [138.66184711388456, 147.2726271450858], "label": "Scuba diving"}, {"segment": [157.5874414976599, 173.91923088923556], "label": "Scuba diving"}]}, "LOhacn4jdT4": {"duration": 102.94, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LOhacn4jdT4", "annotations": [{"segment": [8.021840873634947, 101.23563182527302], "label": "Clean and jerk"}]}, "afqUOlnLHX0": {"duration": 89.513, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=afqUOlnLHX0", "annotations": [{"segment": [5.0272469674297176, 87.13894629641186], "label": "Playing pool"}]}, "p8MvTi8hJdE": {"duration": 219.94, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p8MvTi8hJdE", "annotations": [{"segment": [46.32109268044191, 67.59448339294116], "label": "Snowboarding"}, {"segment": [113.91557607338308, 117.34676812378618], "label": "Snowboarding"}, {"segment": [118.3761257389071, 139.3063963400936], "label": "Snowboarding"}, {"segment": [152.00180684399376, 170.87336299843994], "label": "Snowboarding"}]}, "cP2LVnGxiww": {"duration": 223.934, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cP2LVnGxiww", "annotations": [{"segment": [18.511221528861153, 45.40488299531981], "label": "Carving jack-o-lanterns"}, {"segment": [89.41269266770671, 92.55610764430577], "label": "Carving jack-o-lanterns"}, {"segment": [115.95708580343215, 161.01270046801872], "label": "Carving jack-o-lanterns"}]}, "DQ7a0-cSSCQ": {"duration": 134.769, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DQ7a0-cSSCQ", "annotations": []}, "K1_g97xCqKg": {"duration": 100.449, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K1_g97xCqKg", "annotations": [{"segment": [0.01, 0.02], "label": "Changing car wheel"}, {"segment": [75.4542703861005, 81.70307144364327], "label": "Changing car wheel"}]}, "06r6DtoTtSQ": {"duration": 125.95, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=06r6DtoTtSQ", "annotations": [{"segment": [20.153718899999998, 79.068271], "label": "Skateboarding"}]}, "ATBbNtNlBnY": {"duration": 156.734, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ATBbNtNlBnY", "annotations": [{"segment": [0.009999999999999998, 156.73469387755102], "label": "Sharpening knives"}]}, "NE6SEdnVUh4": {"duration": 76.39, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NE6SEdnVUh4", "annotations": [{"segment": [17.26902496099844, 22.038666146645866], "label": "Playing flauta"}, {"segment": [26.327271450858035, 76.12274414976599], "label": "Playing flauta"}]}, "XaBF6VNWMo8": {"duration": 227.95, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XaBF6VNWMo8", "annotations": []}, "GySHt3Z6Lt4": {"duration": 181.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GySHt3Z6Lt4", "annotations": [{"segment": [10.49998510688727, 179.63488034215246], "label": "Starting a campfire"}]}, "6utacRir3uM": {"duration": 20.3, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6utacRir3uM", "annotations": []}, "TrtuuE1K8pk": {"duration": 69.173, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TrtuuE1K8pk", "annotations": [{"segment": [0.753873831775701, 68.92560747663552], "label": "Plastering"}]}, "8ZjUZbykp4U": {"duration": 77.137, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8ZjUZbykp4U", "annotations": [{"segment": [0.2, 8.417581903276131], "label": "Playing drums"}, {"segment": [28.499527301092044, 32.82856942277691], "label": "Playing drums"}]}, "ItX9t75ZIJI": {"duration": 49.134, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ItX9t75ZIJI", "annotations": []}, "k_z03d9w_Mk": {"duration": 50.06, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=k_z03d9w_Mk", "annotations": [{"segment": [35.4804992199688, 49.875], "label": "Cumbia"}, {"segment": [3.423556942277691, 34.36998044], "label": "Cumbia"}]}, "GehJuCIJ7AI": {"duration": 136.023, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GehJuCIJ7AI", "annotations": [{"segment": [0.01, 38.21432673320928], "label": "Disc dog"}, {"segment": [40.760769678607105, 128.18864413726544], "label": "Disc dog"}]}, "3PsxzgeMv5w": {"duration": 226.511, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3PsxzgeMv5w", "annotations": []}, "LSkcoamI3gw": {"duration": 134.4, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LSkcoamI3gw", "annotations": [{"segment": [10.483392941159824, 113.01097590570289], "label": "Shoveling snow"}]}, "_bSRAXNa5PY": {"duration": 96.67, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_bSRAXNa5PY", "annotations": [{"segment": [72.65459568, 78.50470096], "label": "Snatch"}]}, "Q159fj3M9NM": {"duration": 138.39, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Q159fj3M9NM", "annotations": [{"segment": [5.447844667210279, 135.00072274820715], "label": "Tumbling"}]}, "Qnr73D2zIjU": {"duration": 62.694, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Qnr73D2zIjU", "annotations": [{"segment": [4.387594810835724, 18.135396546450036], "label": "Bullfighting"}]}, "3HEJb83Wm_I": {"duration": 105.024, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3HEJb83Wm_I", "annotations": [{"segment": [33.91555686739469, 68.15880027457098], "label": "Cleaning sink"}, {"segment": [88.96689555070202, 92.57144748829953], "label": "Cleaning sink"}]}, "yVQBdK8VQ_I": {"duration": 152.811, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yVQBdK8VQ_I", "annotations": [{"segment": [95.5959727301092, 125.87200399375975], "label": "Gargling mouthwash"}]}, "Qzewn-dGdOI": {"duration": 48.739, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Qzewn-dGdOI", "annotations": [{"segment": [0.01, 40.87541029641186], "label": "Bullfighting"}]}, "TEaugXCX1m4": {"duration": 150.141, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TEaugXCX1m4", "annotations": [{"segment": [0.01, 150.140225], "label": "Surfing"}]}, "OKhVHhchsyI": {"duration": 197.811, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OKhVHhchsyI", "annotations": [{"segment": [0.01, 197.761], "label": "Building sandcastles"}]}, "-DGsqL65o4k": {"duration": 42.447, "subset": "validation", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=-DGsqL65o4k", "annotations": [{"segment": [12.515255181635837, 39.39987615444618], "label": "Waterskiing"}]}, "fxqzU9KGOsQ": {"duration": 47.95, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=fxqzU9KGOsQ", "annotations": [{"segment": [4.637758046801872, 47.948434], "label": "Dodgeball"}]}, "zOGg5-Mll4o": {"duration": 211.023, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zOGg5-Mll4o", "annotations": [{"segment": [85.92352131201248, 125.42858858190327], "label": "Skiing"}]}, "ZgytgzK17qs": {"duration": 99.266, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZgytgzK17qs", "annotations": []}, "iMB_mb11KWM": {"duration": 50.063, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iMB_mb11KWM", "annotations": [{"segment": [5.618583463338534, 43.70009360374415], "label": "Tug of war"}]}, "JgHQCKz_DRc": {"duration": 104.05, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JgHQCKz_DRc", "annotations": [{"segment": [7.72764496, 81.16116614664587], "label": "Getting a haircut"}]}, "c4B0_ZeADxM": {"duration": 85.682, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=c4B0_ZeADxM", "annotations": []}, "ivmBrbO32Qo": {"duration": 67.942, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ivmBrbO32Qo", "annotations": [{"segment": [0.2, 67.94111], "label": "Mooping floor"}]}, "o8qR72Ymru8": {"duration": 165.56, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=o8qR72Ymru8", "annotations": [{"segment": [36.67593647963605, 138.69702601404057], "label": "Starting a campfire"}]}, "zVRajE-LL4I": {"duration": 121.023, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zVRajE-LL4I", "annotations": []}, "LL0uQO4q_wY": {"duration": 100.009, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=LL0uQO4q_wY", "annotations": []}, "q23IYtwNOkE": {"duration": 61.324, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=q23IYtwNOkE", "annotations": [{"segment": [0.2, 60.462877017160686], "label": "Gargling mouthwash"}]}, "fKy5rh-SoTM": {"duration": 105.675, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fKy5rh-SoTM", "annotations": [{"segment": [0.2, 24.651789157634283], "label": "Playing pool"}, {"segment": [30.568219157316896, 84.6378155433052], "label": "Playing pool"}]}, "JNuVGc8mn54": {"duration": 117.89, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JNuVGc8mn54", "annotations": [{"segment": [8.24501678, 103.02065724336974], "label": "Playing guitarra"}]}, "83oa1S0x9zI": {"duration": 149.75, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=83oa1S0x9zI", "annotations": [{"segment": [12.147839737909516, 117.97421283931358], "label": "Cricket"}]}, "P2Fcv3cC8bI": {"duration": 77.3, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P2Fcv3cC8bI", "annotations": [{"segment": [0.01, 77.29925170068027], "label": "Hanging wallpaper"}]}, "EPCb-f6yQxA": {"duration": 77.51, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=EPCb-f6yQxA", "annotations": [{"segment": [13.776215288611544, 70.57289235569422], "label": "Playing bagpipes"}]}, "OOqGdga8t4s": {"duration": 22.454, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OOqGdga8t4s", "annotations": [{"segment": [2.1336677067082683, 22.176152886115446], "label": "Hitting a pinata"}]}, "0B4T3GIkMZw": {"duration": 86.38, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0B4T3GIkMZw", "annotations": []}, "kQoGS3YjBXk": {"duration": 139.32, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kQoGS3YjBXk", "annotations": [{"segment": [60.6278751950078, 134.74215288611546], "label": "Assembling bicycle"}]}, "ZqvDwQWmlss": {"duration": 20.689, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZqvDwQWmlss", "annotations": []}, "I2Ctmi7C56k": {"duration": 48.855, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I2Ctmi7C56k", "annotations": [{"segment": [0.01, 47.563131279214886], "label": "Raking leaves"}]}, "Rm2GoMtTQWQ": {"duration": 61.12, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Rm2GoMtTQWQ", "annotations": []}, "K-rdUv9LXTc": {"duration": 31.6, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K-rdUv9LXTc", "annotations": []}, "uaIu2yDmhNU": {"duration": 43.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uaIu2yDmhNU", "annotations": [{"segment": [2.755179365054602, 24.976937009999993], "label": "Triple jump"}]}, "X9AnhFjdiXA": {"duration": 149.1, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=X9AnhFjdiXA", "annotations": [{"segment": [0, 144.6754155101404], "label": "Playing guitarra"}]}, "fJyxb59mA-A": {"duration": 54.92, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fJyxb59mA-A", "annotations": [{"segment": [9.937850658346333, 16.277514009360374], "label": "Playing bagpipes"}, {"segment": [26.900733678627144, 29.299525216848675], "label": "Playing bagpipes"}, {"segment": [33.5830815351014, 49.517911039001554], "label": "Playing bagpipes"}]}, "aT-0XxCqV5E": {"duration": 47.16, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aT-0XxCqV5E", "annotations": [{"segment": [8.671596919999999, 42.26531057], "label": "Long jump"}]}, "BMa9v2uZBp8": {"duration": 199.53, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BMa9v2uZBp8", "annotations": [{"segment": [28.296741029641186, 191.25540561622466], "label": "Snowboarding"}]}, "bCrCWBxoYTo": {"duration": 200.09, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bCrCWBxoYTo", "annotations": []}, "GnbyHdd-BjQ": {"duration": 213.229, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GnbyHdd-BjQ", "annotations": [{"segment": [79.50342216137625, 155.68034018096722], "label": "Snowboarding"}]}, "ggeFotn1f18": {"duration": 61.858, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ggeFotn1f18", "annotations": []}, "vUAf-jRo12A": {"duration": 117.819, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vUAf-jRo12A", "annotations": []}, "S6t9Lq1xKJk": {"duration": 30.604, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=S6t9Lq1xKJk", "annotations": [{"segment": [0.2, 29.21930126677067], "label": "Making an omelette"}]}, "dpU5CcFqAPU": {"duration": 117.331, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dpU5CcFqAPU", "annotations": [{"segment": [21.77291575663027, 83.43234945397816], "label": "Tug of war"}]}, "1xcFq3SkTKk": {"duration": 175.8, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1xcFq3SkTKk", "annotations": [{"segment": [35.907692667706705, 165.02970670826835], "label": "Doing step aerobics"}]}, "-TddN8oBvhQ": {"duration": 107.927, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-TddN8oBvhQ", "annotations": [{"segment": [9.76556669885843, 18.18415868063294], "label": "Blow-drying hair"}, {"segment": [26.60275066240745, 45.96551222048881], "label": "Blow-drying hair"}, {"segment": [68.69571057127999, 76.44081504836194], "label": "Blow-drying hair"}, {"segment": [87.048240925117, 88.73195931825272], "label": "Blow-drying hair"}]}, "e1TfVkNgitY": {"duration": 84.28999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=e1TfVkNgitY", "annotations": [{"segment": [48.127094630265205, 77.58192850234009], "label": "Using parallel bars"}]}, "Zt9nALIsHPc": {"duration": 46.49, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zt9nALIsHPc", "annotations": [{"segment": [21.248829897035883, 31.54689762870515], "label": "Playing harmonica"}]}, "3uaQ0cULMgc": {"duration": 82.199, "subset": "testing", "resolution": "180x240", "url": "https://www.youtube.com/watch?v=3uaQ0cULMgc", "annotations": []}, "rel9A7gnnSg": {"duration": 205.985, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rel9A7gnnSg", "annotations": []}, "Mg3_BN9t9JY": {"duration": 133.42, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=Mg3_BN9t9JY", "annotations": [{"segment": [23.608177067082686, 36.08827067082683], "label": "Playing polo"}, {"segment": [55.016412636505464, 99.3207449297972], "label": "Playing polo"}, {"segment": [109.51282137285492, 116.37687285491421], "label": "Playing polo"}]}, "HqpflYNEI7o": {"duration": 68.964, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HqpflYNEI7o", "annotations": [{"segment": [8.601684867394697, 50.74994071762871], "label": "Carving jack-o-lanterns"}]}, "bJ695Pp7Vng": {"duration": 212.63, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bJ695Pp7Vng", "annotations": [{"segment": [12.27345605824233, 198.36558710348416], "label": "Playing water polo"}]}, "tyuyI30cZ00": {"duration": 37.8, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=tyuyI30cZ00", "annotations": [{"segment": [1.605866013651518, 37.802085], "label": "Putting on makeup"}]}, "UHNUmpx0nww": {"duration": 108.902, "subset": "validation", "resolution": "854x428", "url": "https://www.youtube.com/watch?v=UHNUmpx0nww", "annotations": [{"segment": [0.01, 108.901065], "label": "Beach soccer"}]}, "IsV1n_kLpvs": {"duration": 57.54, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=IsV1n_kLpvs", "annotations": []}, "q1e275n6R3c": {"duration": 178.608, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q1e275n6R3c", "annotations": [{"segment": [0.01, 159.40498710525733], "label": "Carving jack-o-lanterns"}]}, "zcvbHJ1BZ9s": {"duration": 77.625, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=zcvbHJ1BZ9s", "annotations": [{"segment": [1.8159360374414977, 14.527488299531981], "label": "Putting on shoes"}]}, "o5EXkCKlkIE": {"duration": 17.648, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=o5EXkCKlkIE", "annotations": []}, "WGpz-hV-Ejw": {"duration": 78.02, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WGpz-hV-Ejw", "annotations": [{"segment": [6.937731223534656, 75.21961161622465], "label": "Rope skipping"}]}, "Z-osDpUPwn0": {"duration": 158.38, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z-osDpUPwn0", "annotations": []}, "vw065HaGq3I": {"duration": 44.21, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vw065HaGq3I", "annotations": [{"segment": [4.138268049921996, 11.311266003120123], "label": "Brushing hair"}, {"segment": [32.89923099687987, 37.24441244929797], "label": "Brushing hair"}]}, "F7u4kpwhs5g": {"duration": 53.62, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F7u4kpwhs5g", "annotations": [{"segment": [1.923778676317121, 30.11131841192015], "label": "Playing polo"}]}, "QlCA9f1WD84": {"duration": 37.013, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QlCA9f1WD84", "annotations": [{"segment": [0, 14.175655526901348], "label": "Plastering"}, {"segment": [16.138882768916197, 18.159851640405616], "label": "Plastering"}, {"segment": [21.566628259750395, 23.472113487519504], "label": "Plastering"}, {"segment": [25.493082668486743, 30.574376609204368], "label": "Plastering"}]}, "ZKSORB5ygRM": {"duration": 168.902, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZKSORB5ygRM", "annotations": [{"segment": [19.762252301092044, 144.132693449298], "label": "Surfing"}]}, "_wHyOKf_fhc": {"duration": 173.73, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_wHyOKf_fhc", "annotations": [{"segment": [17.617099132944908, 160.72214899687987], "label": "Cumbia"}]}, "gOnp3DZxeKU": {"duration": 88.82, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gOnp3DZxeKU", "annotations": []}, "gXVjjjvjVf4": {"duration": 227.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gXVjjjvjVf4", "annotations": [{"segment": [123.18320921, 219.05127925117006], "label": "Painting"}, {"segment": [26.031154446177847, 101.88002998], "label": "Painting"}]}, "25Wxe9TQzY8": {"duration": 6.04, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=25Wxe9TQzY8", "annotations": [{"segment": [0.011968374973106977, 5.983329348493356], "label": "Windsurfing"}]}, "4BsepqjsKVw": {"duration": 21.014, "subset": "testing", "resolution": "492x276", "url": "https://www.youtube.com/watch?v=4BsepqjsKVw", "annotations": []}, "-x1twrM_ABM": {"duration": 174.451, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=-x1twrM_ABM", "annotations": [{"segment": [9.510596845176284, 156.7889822761919], "label": "Laying tile"}]}, "RzlX3Ai9AH4": {"duration": 199.204, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=RzlX3Ai9AH4", "annotations": [{"segment": [0.01, 199.181], "label": "Painting furniture"}]}, "t0XM3ivJYUo": {"duration": 49.95, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=t0XM3ivJYUo", "annotations": [{"segment": [21.564082683307333, 49.43390795631826], "label": "Ping-pong"}]}, "MysxDDxgHNo": {"duration": 13.816, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MysxDDxgHNo", "annotations": [{"segment": [0.01, 13.815872], "label": "Curling"}]}, "QOMvNgo6CQ4": {"duration": 47.09, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=QOMvNgo6CQ4", "annotations": [{"segment": [5.24742325975039, 17.5236017], "label": "Polishing shoes"}, {"segment": [21.283267300000002, 46.859856662246486], "label": "Polishing shoes"}]}, "wM7FmqEj4PY": {"duration": 20.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wM7FmqEj4PY", "annotations": [{"segment": [0.6065712, 19.811656531567387], "label": "Ironing clothes"}]}, "HCphw9_Jku8": {"duration": 167.765, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=HCphw9_Jku8", "annotations": [{"segment": [0.01, 0.02], "label": "Riding bumper cars"}]}, "PI_A4uJ_fsI": {"duration": 194.212, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=PI_A4uJ_fsI", "annotations": [{"segment": [23.62645553822153, 40.89194227769111], "label": "Elliptical trainer"}, {"segment": [55.12839625585024, 62.398074882995324], "label": "Elliptical trainer"}, {"segment": [79.05775507020282, 84.51001404056163], "label": "Elliptical trainer"}, {"segment": [106.92485647425897, 142.36453978159128], "label": "Elliptical trainer"}]}, "yjOriMHCSdw": {"duration": 154.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yjOriMHCSdw", "annotations": [{"segment": [39.46391783000001, 137.47444787587423], "label": "Wrapping presents"}]}, "U0qUFAPUg_Y": {"duration": 193.19, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=U0qUFAPUg_Y", "annotations": [{"segment": [16.274978509344457, 18.686086436654747], "label": "Pole vault"}, {"segment": [34.65967645508542, 37.372172873309495], "label": "Pole vault"}, {"segment": [43.39994269158522, 45.50966212798172], "label": "Pole vault"}, {"segment": [54.24992836448152, 57.565201764533164], "label": "Pole vault"}, {"segment": [66.30546800103296, 69.31935291017084], "label": "Pole vault"}, {"segment": [76.55267669210171, 79.56656160123957], "label": "Pole vault"}, {"segment": [88.00543934682557, 91.62210123779101], "label": "Pole vault"}, {"segment": [95.23876312875645, 97.64987105606673], "label": "Pole vault"}, {"segment": [102.77347504212167, 105.48597145085803], "label": "Pole vault"}, {"segment": [112.4179067176287, 115.13040312636505], "label": "Pole vault"}, {"segment": [127.48733121060842, 130.50121610920436], "label": "Pole vault"}, {"segment": [134.1178779875195, 137.43315137597503], "label": "Pole vault"}, {"segment": [139.8442592948518, 143.46092117316692], "label": "Pole vault"}, {"segment": [151.59841039937598, 154.9136837878315], "label": "Pole vault"}, {"segment": [162.44839603432138, 165.4622809329173], "label": "Pole vault"}, {"segment": [180.23031693603744, 183.2442018346334], "label": "Pole vault"}]}, "KZ1NEffR2Qg": {"duration": 90.256, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KZ1NEffR2Qg", "annotations": []}, "0SmA1cGocz4": {"duration": 214.274, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0SmA1cGocz4", "annotations": [{"segment": [45.191066546660096, 51.53250121955729], "label": "Blow-drying hair"}, {"segment": [56.87265673357598, 172.35351972423018], "label": "Blow-drying hair"}, {"segment": [179.36247383637973, 196.3842195373143], "label": "Blow-drying hair"}]}, "7iuU-zsauOY": {"duration": 143.848, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7iuU-zsauOY", "annotations": [{"segment": [2.4685195756630263, 143.39854625897033], "label": "Scuba diving"}]}, "-JhWjGDPHMY": {"duration": 149.282, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-JhWjGDPHMY", "annotations": [{"segment": [54.728635639625594, 70.09923118096725], "label": "Roof shingle removal"}]}, "dHBhJ7MRxPQ": {"duration": 33.93, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dHBhJ7MRxPQ", "annotations": [{"segment": [16.141852355694226, 33.44803504524181], "label": "Washing hands"}]}, "j_tKuqWYV-A": {"duration": 138.04, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j_tKuqWYV-A", "annotations": [{"segment": [6.24529841057588, 130.28984615166922], "label": "Plataform diving"}]}, "MM5mreexkI8": {"duration": 77.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MM5mreexkI8", "annotations": [{"segment": [10.669373887880685, 38.67648034356748], "label": "Vacuuming floor"}, {"segment": [44.6173809422777, 53.83184004992201], "label": "Vacuuming floor"}]}, "WCChCrg9eZU": {"duration": 161.12, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=WCChCrg9eZU", "annotations": [{"segment": [1.7595364243369735, 29.409394521060843], "label": "Shoveling snow"}, {"segment": [51.7806433447738, 72.39235574414977], "label": "Shoveling snow"}, {"segment": [81.9441249048362, 94.00951752886117], "label": "Shoveling snow"}, {"segment": [135.2329423276131, 160.87190165366616], "label": "Shoveling snow"}]}, "0L_CxBasznE": {"duration": 25.4, "subset": "testing", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=0L_CxBasznE", "annotations": []}, "Ppp24iA2Fw8": {"duration": 137.973, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ppp24iA2Fw8", "annotations": []}, "cYMweZa5J1I": {"duration": 236.12, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cYMweZa5J1I", "annotations": [{"segment": [16.389097717628704, 221.89733426677066], "label": "Doing motocross"}]}, "YizcSEo0bTo": {"duration": 60.86, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=YizcSEo0bTo", "annotations": [{"segment": [0.2, 54.70725269573628], "label": "Riding bumper cars"}]}, "3RTmWrwgKek": {"duration": 30.65, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=3RTmWrwgKek", "annotations": [{"segment": [0.01, 30.650339], "label": "Hopscotch"}]}, "Skz2tom_Ag0": {"duration": 7.08, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Skz2tom_Ag0", "annotations": [{"segment": [1.1915171606864274, 6.273147425897036], "label": "Playing badminton"}]}, "Z4KErmUO3gw": {"duration": 202.061, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z4KErmUO3gw", "annotations": [{"segment": [37.08097389403207, 69.13402326930317], "label": "Bullfighting"}, {"segment": [71.01949676196618, 78.87563631472871], "label": "Bullfighting"}, {"segment": [80.4468642252812, 129.46917503451937], "label": "Bullfighting"}, {"segment": [131.35464852718238, 138.26805133361339], "label": "Bullfighting"}, {"segment": [160.26524208134845, 176.60601121743468], "label": "Bullfighting"}]}, "6GIWY4-491k": {"duration": 203.268, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6GIWY4-491k", "annotations": [{"segment": [21.404922156069915, 28.381341229159375], "label": "Ice fishing"}, {"segment": [48.042158616956925, 67.70297600475448], "label": "Ice fishing"}, {"segment": [90.85200292909677, 138.1013873307916], "label": "Ice fishing"}, {"segment": [157.44509485588526, 167.59261355757374], "label": "Ice fishing"}]}, "NnW_1eQV9GU": {"duration": 145.915, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NnW_1eQV9GU", "annotations": [{"segment": [5.99884520653698, 28.494513430836044], "label": "Bullfighting"}, {"segment": [90.30079380466782, 102.34392931868652], "label": "Bullfighting"}]}, "FKl0ncFqDF0": {"duration": 44.002, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=FKl0ncFqDF0", "annotations": []}, "u246t3R7hMs": {"duration": 126.99, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=u246t3R7hMs", "annotations": []}, "UG82vrvkYFk": {"duration": 33.995, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UG82vrvkYFk", "annotations": [{"segment": [0.01, 30.043929066465335], "label": "Doing a powerbomb"}]}, "gOniW-yEZ0k": {"duration": 13.58, "subset": "validation", "resolution": "600x360", "url": "https://www.youtube.com/watch?v=gOniW-yEZ0k", "annotations": [{"segment": [1.207908430577223, 4.619719962558502], "label": "Shaving legs"}]}, "A_Rh4tMM2xY": {"duration": 210.698, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A_Rh4tMM2xY", "annotations": [{"segment": [0.2, 203.42599687987519], "label": "Swinging at the playground"}]}, "T49ZxNX1DFM": {"duration": 58.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T49ZxNX1DFM", "annotations": [{"segment": [4.640204368174727, 53.9537488299532], "label": "Playing polo"}]}, "mnv-Qz4QDfU": {"duration": 92.068, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mnv-Qz4QDfU", "annotations": [{"segment": [46.512954758190325, 62.44794851794072], "label": "Table soccer"}]}, "KJQi_5e72lM": {"duration": 217.571, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KJQi_5e72lM", "annotations": [{"segment": [24.871510485889754, 208.52761952015766], "label": "Waxing skis"}]}, "pQsk5XPTLoY": {"duration": 13.468, "subset": "validation", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=pQsk5XPTLoY", "annotations": [{"segment": [0.01, 12.951220027335514], "label": "Doing a powerbomb"}]}, "csxC5H5uDb4": {"duration": 47.136, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=csxC5H5uDb4", "annotations": [{"segment": [7.037486847058679, 43.25122700356711], "label": "Hanging wallpaper"}]}, "A8dBgZCuQow": {"duration": 173.06, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=A8dBgZCuQow", "annotations": [{"segment": [33.181408736349454, 144.8819297971919], "label": "Putting on makeup"}, {"segment": [154.86448361934478, 163.2282449297972], "label": "Putting on makeup"}]}, "V4cYhOQ6Pfs": {"duration": 41.08, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V4cYhOQ6Pfs", "annotations": [{"segment": [21.961096723868952, 30.092464898595942], "label": "Hammer throw"}]}, "bPkk-z5n8MY": {"duration": 62.671, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bPkk-z5n8MY", "annotations": [{"segment": [1.857632596460321, 61.4974143900156], "label": "Baton twirling"}]}, "dSsMVoPLQOk": {"duration": 101.286, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dSsMVoPLQOk", "annotations": [{"segment": [4.424322115741773, 8.690632727349909], "label": "Grooming dog"}, {"segment": [28.284059239920616, 101.28537414965986], "label": "Grooming dog"}, {"segment": [40.450944599063966, 68.41898051326054], "label": "Grooming dog"}, {"segment": [76.16154412792511, 82.32399271918878], "label": "Grooming dog"}, {"segment": [93.06877487831514, 97.49309694383777], "label": "Grooming dog"}]}, "doAvtD_jgiY": {"duration": 37.895, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=doAvtD_jgiY", "annotations": [{"segment": [0.5911773322932917, 37.894467], "label": "Baton twirling"}]}, "ilX9ntIzaoQ": {"duration": 32.555, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=ilX9ntIzaoQ", "annotations": [{"segment": [0.01, 32.55437641723356], "label": "Snow tubing"}]}, "0fNBm46reNg": {"duration": 89.038, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0fNBm46reNg", "annotations": [{"segment": [0.19999999999999998, 83.20680031201248], "label": "Spread mulch"}]}, "RPLbUeV3-o0": {"duration": 41.007, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=RPLbUeV3-o0", "annotations": [{"segment": [12.011545916490569, 37.620133785852936], "label": "Skiing"}]}, "hR1HPLOmR1I": {"duration": 43.863, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hR1HPLOmR1I", "annotations": [{"segment": [15.74284758942457, 41.36756920684292], "label": "Hitting a pinata"}]}, "UZ9D6DhrmDI": {"duration": 33.019, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UZ9D6DhrmDI", "annotations": [{"segment": [6.644963878315132, 24.57091294539782], "label": "Playing rubik cube"}]}, "tLk0Y04OaCA": {"duration": 35.2, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=tLk0Y04OaCA", "annotations": []}, "zL_WLS1gPNw": {"duration": 108.414, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zL_WLS1gPNw", "annotations": [{"segment": [40.8025699067659, 51.087515862338385], "label": "Painting furniture"}, {"segment": [74.69230985873425, 88.51797351592863], "label": "Painting furniture"}]}, "sAi1aMHR89A": {"duration": 22.826, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sAi1aMHR89A", "annotations": [{"segment": [0.2, 21.709703224859503], "label": "Doing crunches"}]}, "VWsyA_RJIzg": {"duration": 25.495, "subset": "validation", "resolution": "1920x1440", "url": "https://www.youtube.com/watch?v=VWsyA_RJIzg", "annotations": [{"segment": [0.01, 25.461], "label": "Scuba diving"}]}, "sYJgYpQknw0": {"duration": 130.29, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sYJgYpQknw0", "annotations": [{"segment": [37.399123519500776, 109.55504117940717], "label": "Ping-pong"}]}, "wAubNJ2NUUw": {"duration": 174.41, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wAubNJ2NUUw", "annotations": []}, "LZxTeIeuqT8": {"duration": 170.969, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LZxTeIeuqT8", "annotations": [{"segment": [87.39992626279314, 159.52349479861869], "label": "Rollerblading"}]}, "8im-T1bsyHs": {"duration": 107.07, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=8im-T1bsyHs", "annotations": [{"segment": [2.171409854914197, 16.703152730109206], "label": "Javelin throw"}, {"segment": [20.37784633073323, 28.562391168486744], "label": "Javelin throw"}, {"segment": [38.417251279251175, 58.962129137285494], "label": "Javelin throw"}, {"segment": [71.65652521216849, 98.38156958034322], "label": "Javelin throw"}]}, "M2wdIwZMNm8": {"duration": 171.085, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M2wdIwZMNm8", "annotations": [{"segment": [113.9250655226209, 128.34745865834634], "label": "Ice fishing"}]}, "j5D5FR6Xtps": {"duration": 36.99, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=j5D5FR6Xtps", "annotations": [{"segment": [4.00331947, 34.463519918876756], "label": "Brushing hair"}]}, "94wjthSzsSQ": {"duration": 138.623, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=94wjthSzsSQ", "annotations": [{"segment": [0, 2.5951287847432263], "label": "Capoeira"}, {"segment": [8.21790781835355, 10.813036603096776], "label": "Capoeira"}, {"segment": [12.11060099546839, 14.056947584025808], "label": "Capoeira"}, {"segment": [16.219554904645165, 26.383809311556135], "label": "Capoeira"}, {"segment": [28.113895168051616, 30.92528468485678], "label": "Capoeira"}, {"segment": [39.79197469939614, 42.81962494826324], "label": "Capoeira"}, {"segment": [47.14483958950195, 48.44240398187356], "label": "Capoeira"}, {"segment": [73.5286487675507, 75.90751681591264], "label": "Capoeira"}, {"segment": [87.80185705772232, 91.9108109594384], "label": "Capoeira"}, {"segment": [95.58724339781593, 96.66854705616224], "label": "Capoeira"}, {"segment": [102.94010827457099, 109.42793022464899], "label": "Capoeira"}, {"segment": [111.80679827301093, 115.26696997971919], "label": "Capoeira"}, {"segment": [118.29462022308893, 122.403574124805], "label": "Capoeira"}, {"segment": [128.67513534321375, 130.62148192823713], "label": "Capoeira"}, {"segment": [135.16295729329173, 137.75808607332294], "label": "Capoeira"}]}, "Y_sVqGm8AyU": {"duration": 194.93, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=Y_sVqGm8AyU", "annotations": [{"segment": [83.93307226166598, 174.55646172137284], "label": "Getting a piercing"}]}, "u_vLZxgO9Rw": {"duration": 212.974, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u_vLZxgO9Rw", "annotations": []}, "j0bhBuunr-E": {"duration": 25.566, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=j0bhBuunr-E", "annotations": []}, "I62kL8H81XA": {"duration": 92.3, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I62kL8H81XA", "annotations": [{"segment": [14.865628330222547, 85.5995647957745], "label": "Making a lemonade"}]}, "49drGj3JUg4": {"duration": 23.453, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=49drGj3JUg4", "annotations": [{"segment": [12.860144296928341, 15.67730250706627], "label": "Calf roping"}]}, "vOI3hJ45lag": {"duration": 58.77, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=vOI3hJ45lag", "annotations": [{"segment": [16.77825279407176, 17.236675001560062], "label": "Playing accordion"}, {"segment": [23.47121702340094, 26.405119151326055], "label": "Playing accordion"}, {"segment": [36.123669950078, 43.27505638689548], "label": "Playing accordion"}]}, "LyIaNIuEYR8": {"duration": 27.145, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LyIaNIuEYR8", "annotations": []}, "yJSQmNSFlNI": {"duration": 237.981, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yJSQmNSFlNI", "annotations": [{"segment": [6.678842433697349, 127.64009984399375], "label": "River tubing"}, {"segment": [146.56348673946957, 228.955127925117], "label": "River tubing"}]}, "ad5QOYLt_cI": {"duration": 140.9, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ad5QOYLt_cI", "annotations": [{"segment": [53.83120124804992, 132.05123244929797], "label": "Making a sandwich"}]}, "UuLBAMSmwgc": {"duration": 189.266, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=UuLBAMSmwgc", "annotations": [{"segment": [16.529260530421215, 179.75570826833072], "label": "Table soccer"}]}, "I6O3bhwQKUU": {"duration": 63.67, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=I6O3bhwQKUU", "annotations": []}, "Fl9oEpnUDBY": {"duration": 21.664, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Fl9oEpnUDBY", "annotations": []}, "C26UfwZbHE0": {"duration": 83.685, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=C26UfwZbHE0", "annotations": [{"segment": [23.891203282371293, 71.0208447301092], "label": "Gargling mouthwash"}]}, "W-pFUV2O1l0": {"duration": 150.12, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W-pFUV2O1l0", "annotations": [{"segment": [20.602433697347895, 111.90867394695788], "label": "Getting a haircut"}]}, "QCeGGnd4QB0": {"duration": 203.9, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QCeGGnd4QB0", "annotations": [{"segment": [4.771320322200643, 200.39545179407176], "label": "Playing badminton"}]}, "lSbbzsOjwno": {"duration": 55.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lSbbzsOjwno", "annotations": [{"segment": [0.01, 52.807284056162246], "label": "Bathing dog"}]}, "c2-6_i1yV5E": {"duration": 170.92000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c2-6_i1yV5E", "annotations": [{"segment": [2.6657566302652107, 151.6815522620905], "label": "Pole vault"}]}, "vPHPhlbpJVs": {"duration": 139.111, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vPHPhlbpJVs", "annotations": []}, "_B7rGFDRIww": {"duration": 230.6, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_B7rGFDRIww", "annotations": []}, "g2Dv154zmMU": {"duration": 107.671, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=g2Dv154zmMU", "annotations": [{"segment": [20.151201248049922, 91.85589235569424], "label": "Hurling"}]}, "9hE6VRD3qXQ": {"duration": 177.029, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9hE6VRD3qXQ", "annotations": [{"segment": [0.2, 93.62372653273478], "label": "Laying tile"}, {"segment": [120.68899261004454, 176.82893424036283], "label": "Laying tile"}]}, "o_-a7AMw74M": {"duration": 103.05, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=o_-a7AMw74M", "annotations": [{"segment": [10.288939405195256, 21.542466879627565], "label": "Cleaning shoes"}, {"segment": [37.45817002203897, 47.10405071440953], "label": "Cleaning shoes"}, {"segment": [61.090577718346836, 94.04733675061289], "label": "Cleaning shoes"}]}, "vwaQwo_5X-k": {"duration": 127.52, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vwaQwo_5X-k", "annotations": [{"segment": [0.2, 111.41240274570983], "label": "Breakdancing"}]}, "wJYsD3_CS6E": {"duration": 85.984, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=wJYsD3_CS6E", "annotations": [{"segment": [0.01, 85.983513], "label": "Swinging at the playground"}]}, "c7S423zJqPo": {"duration": 143.85, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=c7S423zJqPo", "annotations": []}, "60CCYfec2vQ": {"duration": 61.533, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=60CCYfec2vQ", "annotations": [{"segment": [4.799756616114985, 9.88749833697348], "label": "Roof shingle removal"}, {"segment": [12.76735222152886, 34.07827096723869], "label": "Roof shingle removal"}, {"segment": [36.574144333853354, 56.25314754086762], "label": "Roof shingle removal"}]}, "VVyE6rVJEuI": {"duration": 144.82, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VVyE6rVJEuI", "annotations": [{"segment": [0.01, 144.82256235827666], "label": "Zumba"}]}, "uz4Tb2RG1tY": {"duration": 150.419, "subset": "training", "resolution": "426x236", "url": "https://www.youtube.com/watch?v=uz4Tb2RG1tY", "annotations": [{"segment": [74.71135491419656, 97.46489157566303], "label": "Sharpening knives"}, {"segment": [104.98517550702029, 106.62740171606865], "label": "Sharpening knives"}, {"segment": [116.71536271450857, 120.23441887675507], "label": "Sharpening knives"}]}, "qiT-OtAHtvk": {"duration": 126.41, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=qiT-OtAHtvk", "annotations": [{"segment": [0.01, 126.301], "label": "Playing congas"}]}, "ynxngsa-IHU": {"duration": 206.96, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ynxngsa-IHU", "annotations": [{"segment": [35.184667073322935, 191.74029579407176], "label": "Getting a haircut"}, {"segment": [124.44030752, 157.71724513], "label": "Getting a haircut"}]}, "2RxbcK90TeA": {"duration": 21.293, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2RxbcK90TeA", "annotations": [{"segment": [0.01, 20.79597728009211], "label": "Canoeing"}]}, "vFRSso8WQfY": {"duration": 170.226, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=vFRSso8WQfY", "annotations": [{"segment": [45.67673596739788, 163.05532471138847], "label": "Making a lemonade"}]}, "PllZQ09sBuI": {"duration": 219.383, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PllZQ09sBuI", "annotations": [{"segment": [21.561738355248494, 138.26892532572052], "label": "Decorating the Christmas tree"}]}, "gYvYh9aMohY": {"duration": 201.225, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=gYvYh9aMohY", "annotations": [{"segment": [17.579642919050098, 178.30780675036524], "label": "Rope skipping"}]}, "cTioh2vzxGE": {"duration": 108.03999999999999, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=cTioh2vzxGE", "annotations": [{"segment": [0.01, 67.50518096723869], "label": "Playing harmonica"}, {"segment": [89.922631825273, 93.79933385335414], "label": "Playing harmonica"}]}, "Bh8RcPBQjxo": {"duration": 10.4, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Bh8RcPBQjxo", "annotations": [{"segment": [0.01, 10.061737223088922], "label": "Doing motocross"}]}, "SLfZUqfyfjs": {"duration": 158.385, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=SLfZUqfyfjs", "annotations": [{"segment": [0.19999999999999998, 8.895162533031934], "label": "Removing ice from car"}, {"segment": [49.66465747609496, 59.30108355354623], "label": "Removing ice from car"}]}, "9PvtW0Uvnl0": {"duration": 64.18, "subset": "training", "resolution": "3840x2160", "url": "https://www.youtube.com/watch?v=9PvtW0Uvnl0", "annotations": [{"segment": [12.935959526683545, 54.52296957029725], "label": "Running a marathon"}]}, "fxpbQqCviGY": {"duration": 17.624, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=fxpbQqCviGY", "annotations": []}, "yhWPzE-kmk4": {"duration": 45.21, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yhWPzE-kmk4", "annotations": [{"segment": [4.321049017041642, 39.57911911049958], "label": "Baking cookies"}]}, "0_PdI-5l62o": {"duration": 68.987, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0_PdI-5l62o", "annotations": [{"segment": [0.01, 66.18828028081124], "label": "Bullfighting"}]}, "UYW1EzZjflo": {"duration": 214.44, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UYW1EzZjflo", "annotations": []}, "4qZckue0QU4": {"duration": 23.383, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4qZckue0QU4", "annotations": [{"segment": [0, 23.361], "label": "Playing pool"}]}, "u5ri43qbi1A": {"duration": 220.845, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=u5ri43qbi1A", "annotations": [{"segment": [0.01, 220.84492063492064], "label": "Making a cake"}]}, "IrTqW6Qn8mI": {"duration": 40.171, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=IrTqW6Qn8mI", "annotations": [{"segment": [0.28200833073322934, 36.00306355694228], "label": "Blowing leaves"}]}, "MDhKBOm_wQ0": {"duration": 111.595, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MDhKBOm_wQ0", "annotations": []}, "W8eqlIPRWCk": {"duration": 99.36, "subset": "training", "resolution": "320x212", "url": "https://www.youtube.com/watch?v=W8eqlIPRWCk", "annotations": [{"segment": [7.7502502821201285, 98.89319359985284], "label": "Playing field hockey"}]}, "UpmzSgLLj-s": {"duration": 169.228, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UpmzSgLLj-s", "annotations": [{"segment": [0.01, 159.05178633274002], "label": "Assembling bicycle"}]}, "JRr3BruqS2Y": {"duration": 222.587, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JRr3BruqS2Y", "annotations": [{"segment": [94.50405455643896, 146.4293613306373], "label": "Rollerblading"}, {"segment": [200.08551221710698, 208.73973010202147], "label": "Rollerblading"}]}, "KHmaaCZIQrs": {"duration": 223.14, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KHmaaCZIQrs", "annotations": []}, "DbBqhlSvr-o": {"duration": 64.134, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DbBqhlSvr-o", "annotations": [{"segment": [5.853033843802731, 28.06454732967727], "label": "Hanging wallpaper"}, {"segment": [30.265688296710785, 50.47616444856393], "label": "Hanging wallpaper"}]}, "PnY3FT_QqWo": {"duration": 208.26, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=PnY3FT_QqWo", "annotations": [{"segment": [21.428995319812795, 195.13373010920438], "label": "Horseback riding"}]}, "uqiMw7tQ1Cc": {"duration": 55.148, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uqiMw7tQ1Cc", "annotations": [{"segment": [5.868443121568436, 54.17156305926314], "label": "Kneeling"}]}, "Gvc1z9qypFQ": {"duration": 24.61, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Gvc1z9qypFQ", "annotations": []}, "GEHPJEgMm_I": {"duration": 47.95, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GEHPJEgMm_I", "annotations": [{"segment": [4.563028868954758, 46.82714872074883], "label": "Tennis serve with ball bouncing"}]}, "ZBGb82CLWe4": {"duration": 56.936, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZBGb82CLWe4", "annotations": []}, "dLDZtWXF1do": {"duration": 23.453, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dLDZtWXF1do", "annotations": []}, "nQgmUkyblgc": {"duration": 47.65, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nQgmUkyblgc", "annotations": [{"segment": [7.698325789999999, 28.86233775351014], "label": "Washing dishes"}]}, "BUmS918SyLA": {"duration": 133.469, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=BUmS918SyLA", "annotations": [{"segment": [7.495880798497247, 133.26832199546487], "label": "Baton twirling"}]}, "KxY2z_xDsO8": {"duration": 41.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KxY2z_xDsO8", "annotations": [{"segment": [0.2, 40.840999999999994], "label": "Shuffleboard"}]}, "9lNEPwPEemI": {"duration": 57.586, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9lNEPwPEemI", "annotations": []}, "IMMoglaJ3RY": {"duration": 77.69, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=IMMoglaJ3RY", "annotations": []}, "DaDHatB3RbM": {"duration": 46.603, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DaDHatB3RbM", "annotations": [{"segment": [4.354539781591264, 22.063001560062403], "label": "Canoeing"}]}, "QjFioni0uCM": {"duration": 145.752, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QjFioni0uCM", "annotations": [{"segment": [59.57399952596743, 107.32415083931357], "label": "Blow-drying hair"}]}, "m_ST2LDe5lA": {"duration": 17.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m_ST2LDe5lA", "annotations": [{"segment": [2.4458221528861155, 15.073708268330734], "label": "Scuba diving"}]}, "0jBwj0bfZ3Y": {"duration": 29.91, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0jBwj0bfZ3Y", "annotations": [{"segment": [1.2597458658346334, 29.207441185647426], "label": "Playing kickball"}]}, "5N-PfYLyCpI": {"duration": 169.503, "subset": "training", "resolution": "360x240", "url": "https://www.youtube.com/watch?v=5N-PfYLyCpI", "annotations": [{"segment": [0.19999999999999998, 6.082010920436817], "label": "Capoeira"}, {"segment": [16.394985959438376, 163.98274570982838], "label": "Capoeira"}]}, "g-KEU43sCt4": {"duration": 224.027, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g-KEU43sCt4", "annotations": [{"segment": [0.2, 113.20337597503901], "label": "Capoeira"}, {"segment": [135.21514352574104, 183.7972012480499], "label": "Capoeira"}]}, "fJ45W32t6h0": {"duration": 121.464, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fJ45W32t6h0", "annotations": [{"segment": [7.766397815912637, 11.933733229329173], "label": "Snow tubing"}, {"segment": [24.435739469578785, 27.466528861154448], "label": "Snow tubing"}, {"segment": [41.862778471138846, 47.166659906396255], "label": "Snow tubing"}, {"segment": [58.72154446177847, 60.994636505460214], "label": "Snow tubing"}, {"segment": [91.69648049921997, 118.78862246489861], "label": "Snow tubing"}]}, "o8Qw9GY3fRc": {"duration": 174.266, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=o8Qw9GY3fRc", "annotations": []}, "y20J3BbydOk": {"duration": 141.108, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y20J3BbydOk", "annotations": [{"segment": [41.32124751873751, 123.52415359190032], "label": "Making a lemonade"}]}, "QLJAQBUisuQ": {"duration": 146.91, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=QLJAQBUisuQ", "annotations": [{"segment": [57.976445240000004, 126.31714067000001], "label": "Triple jump"}]}, "g1Z8boTOAlA": {"duration": 44.1, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=g1Z8boTOAlA", "annotations": []}, "LKeQMeUZFso": {"duration": 78.23, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=LKeQMeUZFso", "annotations": [{"segment": [3.476021840873635, 72.75252730109204], "label": "Vacuuming floor"}]}, "ShiBZnuxlmc": {"duration": 113.47999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ShiBZnuxlmc", "annotations": [{"segment": [22.659777497659906, 113.475917], "label": "Using the balance beam"}]}, "nbcRj00xCKM": {"duration": 133.864, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nbcRj00xCKM", "annotations": [{"segment": [14.607285491419656, 130.8591060842434], "label": "Waterskiing"}]}, "0RZ5_8VD4vc": {"duration": 39.939, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0RZ5_8VD4vc", "annotations": [{"segment": [0.010000000000000002, 31.028524732826046], "label": "Roof shingle removal"}]}, "p1QGn0IzfW0": {"duration": 72.59, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=p1QGn0IzfW0", "annotations": []}, "pIk_bbjCNDo": {"duration": 22.27, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pIk_bbjCNDo", "annotations": [{"segment": [0.10421810452418098, 22.16371689547582], "label": "Brushing teeth"}]}, "nZ41Hmvap-4": {"duration": 18.391, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nZ41Hmvap-4", "annotations": []}, "5g70tfGYWDk": {"duration": 71.43, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5g70tfGYWDk", "annotations": [{"segment": [4.3429937597503905, 64.71759906396255], "label": "Tennis serve with ball bouncing"}]}, "Rh2UGc42X0M": {"duration": 166.162, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Rh2UGc42X0M", "annotations": [{"segment": [11.146592519483091, 146.4610399531981], "label": "Snow tubing"}]}, "HnCUykqco5M": {"duration": 216.619, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HnCUykqco5M", "annotations": [{"segment": [16.559015427283757, 40.214751751974845], "label": "Rafting"}, {"segment": [44.27002083620759, 54.408193546789484], "label": "Rafting"}, {"segment": [69.61545261266234, 105.43699619005169], "label": "Rafting"}, {"segment": [139.23090522532468, 146.6655652130847], "label": "Rafting"}, {"segment": [157.14167701401934, 167.2798497246012], "label": "Rafting"}]}, "vKC23-I4pBc": {"duration": 34.09, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vKC23-I4pBc", "annotations": [{"segment": [16.940840873634944, 26.29937877], "label": "Cleaning windows"}]}, "Lliy0OcwzUE": {"duration": 157.618, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Lliy0OcwzUE", "annotations": []}, "T3bTwmccIEQ": {"duration": 112.896, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=T3bTwmccIEQ", "annotations": [{"segment": [48.2578611933593, 112.895214], "label": "Canoeing"}]}, "hQl2eeYX3IY": {"duration": 76.905, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hQl2eeYX3IY", "annotations": [{"segment": [2.039588631825273, 76.904489], "label": "Blowing leaves"}]}, "O8tsM_ilGx4": {"duration": 30.094, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=O8tsM_ilGx4", "annotations": [{"segment": [8.529581903276132, 26.10426989079563], "label": "Calf roping"}]}, "_9h6NBOPTy8": {"duration": 88.65, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=_9h6NBOPTy8", "annotations": [{"segment": [34.85658654, 78.74514072000001], "label": "Mixing drinks"}]}, "zSeLjjo3KF0": {"duration": 61.35, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zSeLjjo3KF0", "annotations": [{"segment": [35.65023686037441, 61.3471201814059], "label": "Bungee jumping"}]}, "CTmXCO7ha-Y": {"duration": 186.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CTmXCO7ha-Y", "annotations": [{"segment": [0, 186.5955782312925], "label": "Playing guitarra"}]}, "ZZImcxqa1_w": {"duration": 24.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ZZImcxqa1_w", "annotations": [{"segment": [3.5630160234009365, 21.1569085], "label": "Hammer throw"}]}, "Ne8UzAInivM": {"duration": 120.257, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Ne8UzAInivM", "annotations": [{"segment": [8.629926171196509, 60.034269017019184], "label": "Scuba diving"}, {"segment": [84.6107971045242, 113.87750297659906], "label": "Scuba diving"}]}, "iqhdqhVttEM": {"duration": 205.5, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iqhdqhVttEM", "annotations": []}, "tMTvOaUYNeg": {"duration": 23.963, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=tMTvOaUYNeg", "annotations": [{"segment": [0.2, 20.044152336448597], "label": "Raking leaves"}]}, "AuS32BoK0Bg": {"duration": 213.578, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AuS32BoK0Bg", "annotations": [{"segment": [24.98295631825273, 36.64166926677067], "label": "Skiing"}, {"segment": [45.96863962558502, 184.22491887675508], "label": "Skiing"}]}, "GgAXP4FTFnA": {"duration": 54.22, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=GgAXP4FTFnA", "annotations": [{"segment": [0.8872581903276131, 53.61574492979719], "label": "Playing water polo"}]}, "kHSCvFav1p4": {"duration": 180.12, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kHSCvFav1p4", "annotations": [{"segment": [0.01, 180.11718820861677], "label": "Playing badminton"}]}, "yCqe0J1xgyc": {"duration": 199.27, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=yCqe0J1xgyc", "annotations": [{"segment": [13.518478916536662, 197.80486966380656], "label": "Springboard diving"}]}, "jqEpQ-cRGFk": {"duration": 95.9, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=jqEpQ-cRGFk", "annotations": [{"segment": [6.413112284146227, 89.48531716439506], "label": "Brushing hair"}]}, "Fh1LR3xB3zQ": {"duration": 168.043, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Fh1LR3xB3zQ", "annotations": [{"segment": [91.20803120124806, 112.17539469578784], "label": "Sharpening knives"}, {"segment": [117.41723556942279, 150.1787410296412], "label": "Sharpening knives"}]}, "d_J5_sRRiAk": {"duration": 139.46, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=d_J5_sRRiAk", "annotations": []}, "FjMeS0FCvas": {"duration": 46.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FjMeS0FCvas", "annotations": [{"segment": [0.01, 15.308198017160688], "label": "Drinking beer"}]}, "2w7Fxoeo_Qk": {"duration": 40.148, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2w7Fxoeo_Qk", "annotations": [{"segment": [18.163365687987522, 27.495577713884558], "label": "Playing ten pins"}]}, "HMc2QoiQcoA": {"duration": 170.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HMc2QoiQcoA", "annotations": [{"segment": [46.51144664834212, 81.59436640594876], "label": "Smoking a cigarette"}, {"segment": [127.84003217316692, 159.73359529329173], "label": "Smoking a cigarette"}]}, "KNp7bwtYEA8": {"duration": 6.6, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=KNp7bwtYEA8", "annotations": []}, "vmUbGiOyUbU": {"duration": 113.801, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vmUbGiOyUbU", "annotations": [{"segment": [21.659472267326066, 43.67401785050995], "label": "Grooming dog"}, {"segment": [50.06533753595042, 54.14868031981279], "label": "Grooming dog"}, {"segment": [60.895073277691104, 64.26826975663026], "label": "Grooming dog"}, {"segment": [69.06175948985958, 73.32263925273011], "label": "Grooming dog"}, {"segment": [78.47120229953198, 84.86252194383775], "label": "Grooming dog"}, {"segment": [100.84082105460217, 107.05460404212168], "label": "Grooming dog"}]}, "V1ntLwOfkyE": {"duration": 56.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V1ntLwOfkyE", "annotations": [{"segment": [6.476558502340094, 52.00604368174727], "label": "Using the pommel horse"}]}, "qVuRcevXgMk": {"duration": 80.806, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=qVuRcevXgMk", "annotations": [{"segment": [0.01, 80.805395], "label": "Putting in contact lenses"}]}, "jx7_0MRlScw": {"duration": 91.162, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jx7_0MRlScw", "annotations": [{"segment": [0.2, 85.73794539781592], "label": "Riding bumper cars"}]}, "y1CzJvvItrM": {"duration": 90.396, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y1CzJvvItrM", "annotations": [{"segment": [0.01, 90.395283], "label": "Plastering"}]}, "Eblks2o9IHw": {"duration": 109.018, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Eblks2o9IHw", "annotations": []}, "bfk3xsTt0XA": {"duration": 61.533, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bfk3xsTt0XA", "annotations": [{"segment": [8.255569060842433, 60.476843120124805], "label": "Beach soccer"}]}, "uFtP9Z_K87U": {"duration": 129.777, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uFtP9Z_K87U", "annotations": [{"segment": [6.276234200388424, 9.515580884459867], "label": "Scuba diving"}, {"segment": [13.76722340730364, 19.233620936674203], "label": "Scuba diving"}, {"segment": [38.06232353783947, 42.11150689292878], "label": "Scuba diving"}, {"segment": [54.86643446146009, 62.357423668375304], "label": "Scuba diving"}, {"segment": [69.24103537202713, 71.6705453850807], "label": "Scuba diving"}, {"segment": [79.76891209525932, 89.68941094851795], "label": "Scuba diving"}, {"segment": [94.75089012168488, 107.91073597191888], "label": "Scuba diving"}]}, "UAEZC2C2fYc": {"duration": 50.25, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UAEZC2C2fYc", "annotations": []}, "XASTWKClhPU": {"duration": 130.334, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XASTWKClhPU", "annotations": [{"segment": [5.489870026521062, 103.9008734648986], "label": "Rollerblading"}]}, "FCFSLuCZKj4": {"duration": 126.27, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=FCFSLuCZKj4", "annotations": [{"segment": [25.0368156, 106.3887885], "label": "Cleaning windows"}]}, "pmix_U52VWk": {"duration": 74.26, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pmix_U52VWk", "annotations": [{"segment": [1.3883931294851795, 65.02307823088923], "label": "Playing flauta"}]}, "173d8EtsIpE": {"duration": 187.13, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=173d8EtsIpE", "annotations": [{"segment": [8.026095943837753, 175.84446567862713], "label": "Doing fencing"}]}, "XZCmkuDKltY": {"duration": 29.211, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XZCmkuDKltY", "annotations": [{"segment": [1.6379832336448599, 25.570738258566976], "label": "Playing rubik cube"}]}, "iH659QSrcDc": {"duration": 23.406, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iH659QSrcDc", "annotations": [{"segment": [5.769271206864274, 11.684599912636505], "label": "Fixing the roof"}]}, "L0jbYqV9Zaw": {"duration": 187.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=L0jbYqV9Zaw", "annotations": [{"segment": [36.01480605346663, 67.9303658894655], "label": "Paintball"}, {"segment": [81.98492434935493, 136.44633838142641], "label": "Paintball"}]}, "MxYa4WTV8Zs": {"duration": 190.125, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MxYa4WTV8Zs", "annotations": [{"segment": [3.558084243369735, 184.7433120124805], "label": "Decorating the Christmas tree"}]}, "-Z98HU6T7J8": {"duration": 131.96, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=-Z98HU6T7J8", "annotations": [{"segment": [29.438596394695786, 121.45994316692668], "label": "Smoking a cigarette"}]}, "uDqnGmLJyao": {"duration": 76.092, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uDqnGmLJyao", "annotations": [{"segment": [31.123079273278776, 37.040014839625584], "label": "Hanging wallpaper"}, {"segment": [46.27043348200844, 48.63720766552321], "label": "Hanging wallpaper"}, {"segment": [63.666223730842034, 67.09804629693845], "label": "Hanging wallpaper"}]}, "JcWM1SgpU9M": {"duration": 15.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JcWM1SgpU9M", "annotations": []}, "SvMIG1JlcMk": {"duration": 169.85, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=SvMIG1JlcMk", "annotations": [{"segment": [13.245007800312012, 49.536329173166926], "label": "Belly dance"}, {"segment": [49.80122932917316, 134.3043790951638], "label": "Belly dance"}]}, "N8otQdjR96s": {"duration": 182.625, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N8otQdjR96s", "annotations": [{"segment": [0.19999999999999998, 13.101101404056163], "label": "Playing drums"}, {"segment": [42.72098283931357, 51.265179407176284], "label": "Playing drums"}, {"segment": [68.06876599063963, 114.20742745709829], "label": "Playing drums"}, {"segment": [122.4668174726989, 149.25498283931356], "label": "Playing drums"}, {"segment": [158.93946645865833, 171.75716536661466], "label": "Playing drums"}]}, "y8ids5d1A88": {"duration": 229.82999999999998, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=y8ids5d1A88", "annotations": []}, "zYQ-WdosIwI": {"duration": 42.493, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zYQ-WdosIwI", "annotations": [{"segment": [8.233721613143599, 41.06270770660154], "label": "Kneeling"}]}, "2WKy0FvMtCM": {"duration": 192.285, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2WKy0FvMtCM", "annotations": [{"segment": [13.178362260792166, 145.2614918068536], "label": "Making a cake"}]}, "PIGH5p2iMoI": {"duration": 192.053, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PIGH5p2iMoI", "annotations": [{"segment": [94.50283229329173, 124.45618642745711], "label": "Hurling"}, {"segment": [119.66364976599066, 153.81047347893914], "label": "Hurling"}]}, "q1TVicD9MOk": {"duration": 191.495, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q1TVicD9MOk", "annotations": []}, "mBnLy9ZgMkc": {"duration": 187.25, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=mBnLy9ZgMkc", "annotations": [{"segment": [6.7163229329173175, 177.25252262090484], "label": "Playing accordion"}]}, "pnxgTQofPQo": {"duration": 11.1, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pnxgTQofPQo", "annotations": [{"segment": [3.221009360374415, 8.438110764430578], "label": "Doing a powerbomb"}]}, "PQBeNLSdugw": {"duration": 96.32, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PQBeNLSdugw", "annotations": [{"segment": [0.7907531484799964, 91.37287775907191], "label": "Doing karate"}]}, "o5F7X-UBETE": {"duration": 220.32999999999998, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=o5F7X-UBETE", "annotations": []}, "LdAT5MEBkuQ": {"duration": 143.5, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=LdAT5MEBkuQ", "annotations": [{"segment": [45.89291812480499, 98.94960883494541], "label": "Waterskiing"}, {"segment": [28.20735455475819, 31.341505060842437], "label": "Waterskiing"}, {"segment": [45.89291812480499, 98.94960883494541], "label": "Waterskiing"}]}, "rVRNX-fClE4": {"duration": 83.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rVRNX-fClE4", "annotations": [{"segment": [39.067591808434244, 75.66912300437596], "label": "Polishing shoes"}]}, "GBdj6erXjDM": {"duration": 29.374, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GBdj6erXjDM", "annotations": [{"segment": [0.2, 17.95285111166394], "label": "Having an ice cream"}]}, "cDMKIQSIcsU": {"duration": 123.902, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cDMKIQSIcsU", "annotations": []}, "5GiIqXY__74": {"duration": 228.949, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5GiIqXY__74", "annotations": [{"segment": [35.96239586749682, 93.64466330013083], "label": "Playing ten pins"}, {"segment": [100.76593007701291, 126.40249317499112], "label": "Playing ten pins"}, {"segment": [134.23588745492893, 160.58457730562878], "label": "Playing ten pins"}, {"segment": [166.63765470376254, 182.30444326363812], "label": "Playing ten pins"}]}, "Xi68dag0iGo": {"duration": 143.22, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Xi68dag0iGo", "annotations": [{"segment": [8.152615685647426, 62.5842756], "label": "Playing bagpipes"}, {"segment": [69.22109040000001, 119.60892327847115], "label": "Playing bagpipes"}]}, "XzNMQl0EdEo": {"duration": 175.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XzNMQl0EdEo", "annotations": [{"segment": [52.94548892, 156.68740079999998], "label": "Breakdancing"}, {"segment": [0.2, 11.33653531825273], "label": "Breakdancing"}]}, "F9FPK6mfh88": {"duration": 203.22, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=F9FPK6mfh88", "annotations": []}, "hbHkS0GAOLE": {"duration": 23.894, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hbHkS0GAOLE", "annotations": [{"segment": [0.01, 23.893333], "label": "Snow tubing"}]}, "hS4L3PMfYqA": {"duration": 111.32, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hS4L3PMfYqA", "annotations": [{"segment": [20.665614444617788, 104.02271472542903], "label": "Breakdancing"}]}, "3jb0w48ACkU": {"duration": 42.96, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3jb0w48ACkU", "annotations": [{"segment": [35.58521351794072, 42.75586859594384], "label": "Starting a campfire"}]}, "uWfQcDvel84": {"duration": 113.38, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uWfQcDvel84", "annotations": []}, "gTUJmj5GRqo": {"duration": 212.672, "subset": "testing", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=gTUJmj5GRqo", "annotations": []}, "UNbC2c1C824": {"duration": 90.721, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UNbC2c1C824", "annotations": [{"segment": [2.829984399375975, 86.45602340093603], "label": "Baton twirling"}]}, "EKfhRuD3x9s": {"duration": 238.562, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=EKfhRuD3x9s", "annotations": [{"segment": [15.631195304955055, 232.6070729329173], "label": "Playing congas"}]}, "aH7puGPu39Y": {"duration": 189.986, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aH7puGPu39Y", "annotations": [{"segment": [46.092941140044765, 186.14458368747867], "label": "Snowboarding"}]}, "HcZ3irBAcE0": {"duration": 29.467, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HcZ3irBAcE0", "annotations": [{"segment": [0.6205694227769112, 25.903026020280812], "label": "Clipping cat claws"}]}, "9btLaLqX-Zk": {"duration": 158.76, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9btLaLqX-Zk", "annotations": [{"segment": [19.070845865834634, 145.07555979344775], "label": "Cleaning windows"}]}, "HjVQNiy4uuw": {"duration": 23.546, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=HjVQNiy4uuw", "annotations": [{"segment": [0.01, 4.922050789391576], "label": "River tubing"}, {"segment": [16.3456164274571, 22.112496829953198], "label": "River tubing"}]}, "hi07Rdf1r4Y": {"duration": 53.8, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=hi07Rdf1r4Y", "annotations": [{"segment": [0, 34.99964074726989], "label": "Getting a haircut"}, {"segment": [15.545509999999998, 26.242563999999998], "label": "Getting a haircut"}]}, "AY6QSTuHGRc": {"duration": 168.531, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AY6QSTuHGRc", "annotations": [{"segment": [51.79484379919415, 53.635269720992916], "label": "Snowboarding"}, {"segment": [63.889071285300396, 69.14743106186832], "label": "Snowboarding"}, {"segment": [74.66870882726465, 84.65959240274371], "label": "Snowboarding"}, {"segment": [108.58512938612783, 113.31765318503895], "label": "Snowboarding"}, {"segment": [145.65656581093177, 146.70823703588144], "label": "Snowboarding"}]}, "6GYdu5G61g8": {"duration": 121.858, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6GYdu5G61g8", "annotations": [{"segment": [15.01842033953467, 72.6207160721803], "label": "Rafting"}, {"segment": [97.52467891368715, 100.94659747206215], "label": "Rafting"}, {"segment": [111.02224656061071, 121.85832199546485], "label": "Rafting"}]}, "WdC6dVbfTvs": {"duration": 146.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WdC6dVbfTvs", "annotations": [{"segment": [0.2284684149270733, 70.1398033826115], "label": "Getting a piercing"}]}, "M6yAoJJQvGY": {"duration": 113.755, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=M6yAoJJQvGY", "annotations": [{"segment": [19.166134547422715, 24.312596601823255], "label": "Doing crunches"}, {"segment": [70.63075395943837, 90.32928081747269], "label": "Doing crunches"}, {"segment": [97.42784905460218, 101.8644542028081], "label": "Doing crunches"}]}, "_7GQcJezzo4": {"duration": 24.52, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=_7GQcJezzo4", "annotations": []}, "UCmycSotoy4": {"duration": 38.08, "subset": "training", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=UCmycSotoy4", "annotations": [{"segment": [1.6064291999999998, 34.94367940717629], "label": "Kayaking"}]}, "YfouQ4TJhmQ": {"duration": 55.71, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=YfouQ4TJhmQ", "annotations": [{"segment": [0.08690276131045242, 39.019339828393136], "label": "Grooming horse"}, {"segment": [48.14412976599064, 54.92254514820593], "label": "Grooming horse"}]}, "bcXc6mKSEEM": {"duration": 28.747, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bcXc6mKSEEM", "annotations": [{"segment": [0.2, 28.501], "label": "Arm wrestling"}]}, "OTMkyaTCpqM": {"duration": 56.726, "subset": "training", "resolution": "574x360", "url": "https://www.youtube.com/watch?v=OTMkyaTCpqM", "annotations": [{"segment": [0, 6.017723544461778], "label": "Playing pool"}, {"segment": [12.389430826833072, 15.48678853354134], "label": "Playing pool"}, {"segment": [28.849674639625587, 43.45150382839313], "label": "Playing pool"}, {"segment": [47.079837141965676, 52.389593210608425], "label": "Playing pool"}]}, "hBT_SN8IGsQ": {"duration": 121.65, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hBT_SN8IGsQ", "annotations": [{"segment": [0.01, 112.81934239411552], "label": "Hurling"}]}, "cyXWvxVt8qE": {"duration": 86.98, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cyXWvxVt8qE", "annotations": [{"segment": [2.8496433556942278, 22.525752240249613], "label": "Playing badminton"}, {"segment": [24.425514477379092, 70.01980816848675], "label": "Playing badminton"}, {"segment": [76.26188409048362, 82.09686810452419], "label": "Playing badminton"}]}, "goH-zdx5urM": {"duration": 22.756, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=goH-zdx5urM", "annotations": [{"segment": [1.2395241809672388, 21.673965678627145], "label": "Doing crunches"}]}, "JJyV1AIQj4M": {"duration": 231.39, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JJyV1AIQj4M", "annotations": [{"segment": [16.778689523400935, 197.19470590405618], "label": "Belly dance"}]}, "3QYwfLRJhI8": {"duration": 168.11, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3QYwfLRJhI8", "annotations": []}, "NVj37t3AReg": {"duration": 75.187, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NVj37t3AReg", "annotations": []}, "kpE3q-Hmd7A": {"duration": 102.4, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kpE3q-Hmd7A", "annotations": [{"segment": [11.329355137285491, 99.73023888455538], "label": "Doing step aerobics"}]}, "CR9Ghr1h8oY": {"duration": 89.745, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CR9Ghr1h8oY", "annotations": []}, "M-IRMq2DmY0": {"duration": 15.67, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=M-IRMq2DmY0", "annotations": [{"segment": [0, 15.673469], "label": "Shaving legs"}]}, "cZFThsHMC5w": {"duration": 172.04, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cZFThsHMC5w", "annotations": [{"segment": [19.860704610497347, 144.6610781764604], "label": "Tai chi"}]}, "Z0mxEFOm_Wc": {"duration": 88.585, "subset": "validation", "resolution": "718x480", "url": "https://www.youtube.com/watch?v=Z0mxEFOm_Wc", "annotations": [{"segment": [28.862541341653664, 39.35801092043682], "label": "Waxing skis"}]}, "fljxcvQe_Mo": {"duration": 84.428, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=fljxcvQe_Mo", "annotations": [{"segment": [29.3719023209908, 83.1106283525741], "label": "Fixing the roof"}]}, "p0-BGit7WAo": {"duration": 56.05, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=p0-BGit7WAo", "annotations": [{"segment": [14.95328851544462, 48.88238760312012], "label": "Ballet"}]}, "uYqNIBbz4_Q": {"duration": 23.45, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=uYqNIBbz4_Q", "annotations": [{"segment": [13.92016912, 18.3781598], "label": "Chopping wood"}, {"segment": [13.920169119999999, 19.42760256318253], "label": "Chopping wood"}]}, "nVN1rP8H688": {"duration": 198.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nVN1rP8H688", "annotations": [{"segment": [39.01498595943838, 46.446411856474256], "label": "Snowboarding"}, {"segment": [54.497123244929796, 59.14176443057722], "label": "Snowboarding"}, {"segment": [62.857477379095165, 117.36642589703588], "label": "Snowboarding"}, {"segment": [121.39218720748829, 150.81121216848675], "label": "Snowboarding"}, {"segment": [170.0109968798752, 175.585127925117], "label": "Snowboarding"}]}, "3BD1Y-lvaM0": {"duration": 99.428, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3BD1Y-lvaM0", "annotations": []}, "5I5xXW25OXs": {"duration": 23.847, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5I5xXW25OXs", "annotations": [{"segment": [0.9053464740622453, 6.953357377489037], "label": "Rock-paper-scissors"}]}, "gXAMD_KxXII": {"duration": 163.03, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gXAMD_KxXII", "annotations": [{"segment": [51.629558229948245, 65.10919608112324], "label": "Polishing shoes"}, {"segment": [91.55980698907956, 100.20712209360374], "label": "Polishing shoes"}]}, "XnvaW1HQyg4": {"duration": 27.66, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XnvaW1HQyg4", "annotations": [{"segment": [3.3083572542901716, 14.522399375975038], "label": "Hammer throw"}]}, "k6pU9VaFsZ8": {"duration": 95.667, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=k6pU9VaFsZ8", "annotations": [{"segment": [2.3879241684867396, 92.0843257472699], "label": "Powerbocking"}]}, "-IkL9Kq9zfQ": {"duration": 41.518, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-IkL9Kq9zfQ", "annotations": [{"segment": [0.01, 40.6105043775351], "label": "Canoeing"}]}, "tr8JRPrfAN0": {"duration": 168.48, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tr8JRPrfAN0", "annotations": [{"segment": [8.933407176287051, 11.560879875195008], "label": "Vacuuming floor"}, {"segment": [18.65505616224649, 23.121759750390016], "label": "Vacuuming floor"}, {"segment": [35.470881435257404, 35.996375975039], "label": "Vacuuming floor"}, {"segment": [57.016157566302645, 58.85538845553822], "label": "Vacuuming floor"}, {"segment": [81.45165366614664, 89.33407176287051], "label": "Vacuuming floor"}]}, "izZqZFVpW4c": {"duration": 174.475, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=izZqZFVpW4c", "annotations": [{"segment": [11.976426714211424, 167.66997216848674], "label": "Ice fishing"}]}, "SKbjRXF35Wk": {"duration": 172.13, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SKbjRXF35Wk", "annotations": [{"segment": [7.787451073322933, 164.34207092667708], "label": "Tango"}]}, "VeWdsZb5tog": {"duration": 160.06, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=VeWdsZb5tog", "annotations": [{"segment": [28.96473780967239, 96.6323580374415], "label": "Hopscotch"}]}, "zBCZT-hF2Ao": {"duration": 194.56, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zBCZT-hF2Ao", "annotations": []}, "PoamN_DEInI": {"duration": 32.93, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PoamN_DEInI", "annotations": [{"segment": [9.733942141965677, 12.40499750546022], "label": "Hopscotch"}]}, "MSPslSgkp60": {"duration": 158.221, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MSPslSgkp60", "annotations": [{"segment": [79.72747117775869, 87.62616801270691], "label": "Rope skipping"}, {"segment": [96.75903622811579, 109.1007500327224], "label": "Rope skipping"}]}, "n96adnrNnno": {"duration": 144.85, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=n96adnrNnno", "annotations": [{"segment": [0.22589859594383774, 23.26755538221529], "label": "Using uneven bars"}]}, "BmLDHLKiIdQ": {"duration": 237.63, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BmLDHLKiIdQ", "annotations": []}, "f0hh9bwVfV0": {"duration": 170.598, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f0hh9bwVfV0", "annotations": [{"segment": [38.32444968002802, 92.61742006006772], "label": "Scuba diving"}, {"segment": [103.52924253840901, 120.56233128508812], "label": "Scuba diving"}]}, "Zso0ZBs2y6E": {"duration": 180.443, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Zso0ZBs2y6E", "annotations": [{"segment": [55.84449221436242, 73.80453128857167], "label": "Hanging wallpaper"}, {"segment": [151.53782408502258, 156.58908503059024], "label": "Hanging wallpaper"}]}, "TyHLBe6__rc": {"duration": 238.1, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TyHLBe6__rc", "annotations": [{"segment": [15.040662163806552, 227.8381797149437], "label": "Windsurfing"}]}, "rim1Bl1zWwI": {"duration": 230.598, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=rim1Bl1zWwI", "annotations": []}, "msGQHfIRLhE": {"duration": 84.45, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=msGQHfIRLhE", "annotations": [{"segment": [0, 32.937091633325196], "label": "Doing karate"}, {"segment": [41.76423122776911, 57.70578321060842], "label": "Doing karate"}, {"segment": [67.5869104726989, 74.96481882839313], "label": "Doing karate"}]}, "-tvhSol-wTs": {"duration": 126.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-tvhSol-wTs", "annotations": [{"segment": [33.52457340561622, 99.78721996099843], "label": "Playing squash"}]}, "8-muYZwFf9c": {"duration": 198.11, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=8-muYZwFf9c", "annotations": [{"segment": [10.882955184259147, 190.14317481042735], "label": "Windsurfing"}]}, "AQSiBbhLBhs": {"duration": 225.12, "subset": "training", "resolution": "270x360", "url": "https://www.youtube.com/watch?v=AQSiBbhLBhs", "annotations": [{"segment": [8.425210595943838, 124.73732969999999], "label": "Brushing hair"}, {"segment": [116.89979701872076, 215.5449710795632], "label": "Brushing hair"}]}, "5YVN5d8DkSk": {"duration": 137.857, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5YVN5d8DkSk", "annotations": [{"segment": [0.01, 136.78154240873633], "label": "Beer pong"}]}, "RgeVltfPDXQ": {"duration": 203.988, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RgeVltfPDXQ", "annotations": [{"segment": [0.01, 199.24445437865688], "label": "Hitting a pinata"}]}, "pzSqv6KB-Qc": {"duration": 165.884, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pzSqv6KB-Qc", "annotations": []}, "owLv-_CPNJI": {"duration": 55.263, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=owLv-_CPNJI", "annotations": [{"segment": [8.701283931357255, 50.57082839313572], "label": "Assembling bicycle"}]}, "a7oaRcmRZ5k": {"duration": 129.196, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=a7oaRcmRZ5k", "annotations": [{"segment": [20.405599527626556, 123.03731158370132], "label": "Wakeboarding"}]}, "rAL6jEy54Tk": {"duration": 32.09, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rAL6jEy54Tk", "annotations": [{"segment": [3.1539289984399375, 12.86602781903276], "label": "Throwing darts"}]}, "nsDyItaRUXI": {"duration": 82.686, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=nsDyItaRUXI", "annotations": [{"segment": [0.2, 72.91309408756997], "label": "Knitting"}]}, "FgKnVoyAqaQ": {"duration": 218.872, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=FgKnVoyAqaQ", "annotations": []}, "wv2baWJtSoc": {"duration": 34.459, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wv2baWJtSoc", "annotations": [{"segment": [17.603007800312014, 32.48783931357254], "label": "Using the rowing machine"}]}, "tMM166j4YEw": {"duration": 42.679, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=tMM166j4YEw", "annotations": [{"segment": [8.455758271450858, 18.77577820904836], "label": "Calf roping"}]}, "D2ggFcgEbFo": {"duration": 176.194, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=D2ggFcgEbFo", "annotations": [{"segment": [42.88004708892356, 173.99403722620906], "label": "Snow tubing"}]}, "44M2Ov9JzUE": {"duration": 55.821, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=44M2Ov9JzUE", "annotations": [{"segment": [0.2, 54.93046957878315], "label": "Playing beach volleyball"}]}, "P14X14uarj0": {"duration": 69.52, "subset": "training", "resolution": "576x360", "url": "https://www.youtube.com/watch?v=P14X14uarj0", "annotations": [{"segment": [1.6976784, 62.45331669266771], "label": "Layup drill in basketball"}]}, "OLRDf3vO8lw": {"duration": 39.962, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OLRDf3vO8lw", "annotations": [{"segment": [13.403424009360375, 31.35777803120125], "label": "Building sandcastles"}]}, "8SCg3toperM": {"duration": 88.376, "subset": "training", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=8SCg3toperM", "annotations": [{"segment": [4.183125443610075, 88.25843852772222], "label": "Hurling"}]}, "6EWzgWd72Cs": {"duration": 237.935, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=6EWzgWd72Cs", "annotations": [{"segment": [35.20045950155763, 192.69239875389408], "label": "Grooming dog"}]}, "UgO10QT_eSA": {"duration": 57.1, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=UgO10QT_eSA", "annotations": []}, "C10_qXWxpsk": {"duration": 73.53999999999999, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=C10_qXWxpsk", "annotations": [{"segment": [23.288817571750492, 59.08246822389903], "label": "Tumbling"}]}, "waF8oGaQqvI": {"duration": 134.82, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=waF8oGaQqvI", "annotations": [{"segment": [85.59999745085804, 96.32628705772231], "label": "Getting a tattoo"}]}, "kH8o0M69KGc": {"duration": 32.56, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=kH8o0M69KGc", "annotations": [{"segment": [0.5819370655226209, 31.85472893447738], "label": "Clean and jerk"}]}, "inbkDgHuRrY": {"duration": 102.423, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=inbkDgHuRrY", "annotations": [{"segment": [33.381636505460214, 97.1291856474259], "label": "Roof shingle removal"}]}, "5Fq-ln3yC38": {"duration": 127.71, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5Fq-ln3yC38", "annotations": [{"segment": [101.01223627581903, 120.13881355881435], "label": "Polishing forniture"}]}, "LYOQ0gVySxE": {"duration": 49.6, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=LYOQ0gVySxE", "annotations": [{"segment": [27.390996861154445, 45.419534343213726], "label": "Drinking coffee"}]}, "e5rZPT7BJas": {"duration": 33.62, "subset": "training", "resolution": "1072x720", "url": "https://www.youtube.com/watch?v=e5rZPT7BJas", "annotations": [{"segment": [17.5609733900156, 25.62174806084243], "label": "Cleaning shoes"}]}, "WxIo1WvHOz4": {"duration": 66.53, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=WxIo1WvHOz4", "annotations": []}, "JiNcVb97CnA": {"duration": 224.42000000000002, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=JiNcVb97CnA", "annotations": []}, "i-159wZdZSE": {"duration": 215.157, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=i-159wZdZSE", "annotations": []}, "1bcm3LHTI_w": {"duration": 129.545, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1bcm3LHTI_w", "annotations": [{"segment": [58.97541337330947, 60.99323466589712], "label": "Playing congas"}, {"segment": [69.06451983624773, 73.10016242142302], "label": "Playing congas"}, {"segment": [81.17144759177361, 124.35282325314927], "label": "Playing congas"}]}, "jGLJuL49zYE": {"duration": 86.15, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=jGLJuL49zYE", "annotations": [{"segment": [0.6719627418096724, 80.36674392043682], "label": "Breakdancing"}]}, "zAr9k1-umvY": {"duration": 211.813, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zAr9k1-umvY", "annotations": [{"segment": [0.01, 211.761], "label": "Laying tile"}]}, "UU3kcPSassU": {"duration": 205.47, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UU3kcPSassU", "annotations": []}, "IvKaW2Nsr3A": {"duration": 210.095, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IvKaW2Nsr3A", "annotations": []}, "Bg9hw5K-m6Q": {"duration": 59.24, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Bg9hw5K-m6Q", "annotations": [{"segment": [2.33322726, 50.83035824180968], "label": "Brushing hair"}]}, "bXdq2zI1Ms0": {"duration": 73.097, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bXdq2zI1Ms0", "annotations": [{"segment": [36.719261075204905, 72.07010248301088], "label": "Capoeira"}]}, "GZvOCDorlQw": {"duration": 94.97, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=GZvOCDorlQw", "annotations": []}, "p9iE8iemCMs": {"duration": 134.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p9iE8iemCMs", "annotations": [{"segment": [11.337688560603649, 90.49155128926245], "label": "Playing flauta"}]}, "j7rvKBvvvRg": {"duration": 163.84, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j7rvKBvvvRg", "annotations": [{"segment": [27.09366598127925, 61.34414939157567], "label": "Washing dishes"}]}, "MdrK2uQ-GvA": {"duration": 70.71000000000001, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MdrK2uQ-GvA", "annotations": [{"segment": [9.26551667780997, 22.2813615347335], "label": "Plataform diving"}, {"segment": [61.54950351201248, 68.05742592636506], "label": "Plataform diving"}]}, "14dA-qLmNZo": {"duration": 65.25, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=14dA-qLmNZo", "annotations": [{"segment": [25.142261399375975, 29.9264164024961], "label": "Sailing"}]}, "VvlJjaLwGqY": {"duration": 135.7, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=VvlJjaLwGqY", "annotations": [{"segment": [32.812947456673776, 130.8283969562864], "label": "Preparing salad"}]}, "DjY-7jEpxZ0": {"duration": 24.056, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=DjY-7jEpxZ0", "annotations": [{"segment": [0.01, 24.055873015873015], "label": "Hitting a pinata"}]}, "YxUNzzrgJnI": {"duration": 168.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YxUNzzrgJnI", "annotations": [{"segment": [29.523801599683033, 150.51866611856474], "label": "Bungee jumping"}]}, "3Rzyta8op6s": {"duration": 113.43, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3Rzyta8op6s", "annotations": []}, "Ssoqzp-ZL8A": {"duration": 46.255, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ssoqzp-ZL8A", "annotations": []}, "q5thIuCSTkk": {"duration": 74.026, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=q5thIuCSTkk", "annotations": [{"segment": [7.7374250126467645, 41.34325580343214], "label": "Skiing"}]}, "ufxyjYMKe7c": {"duration": 125.643, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ufxyjYMKe7c", "annotations": [{"segment": [27.539572592427504, 51.84496050318203], "label": "Rock-paper-scissors"}]}, "z8x3wUZYOaM": {"duration": 41.66, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=z8x3wUZYOaM", "annotations": []}, "sjpWwjzCbrw": {"duration": 161.43, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=sjpWwjzCbrw", "annotations": [{"segment": [3.364097000000001, 132.71613217725988], "label": "Washing hands"}]}, "8vc8xXSowoY": {"duration": 73.52, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8vc8xXSowoY", "annotations": []}, "3X2CY79a0X8": {"duration": 82.129, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3X2CY79a0X8", "annotations": [{"segment": [0.19999999999999998, 79.50458255451714], "label": "Cutting the grass"}]}, "aJsDVKt5Igs": {"duration": 42.61, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=aJsDVKt5Igs", "annotations": [{"segment": [11.167312511700468, 34.36607481279251], "label": "Playing kickball"}]}, "zPQodcasmSA": {"duration": 67.83, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zPQodcasmSA", "annotations": []}, "eW5W8dVB_I4": {"duration": 122.09, "subset": "training", "resolution": "1920x1072", "url": "https://www.youtube.com/watch?v=eW5W8dVB_I4", "annotations": [{"segment": [96.66293230967239, 102.75793493806552], "label": "Layup drill in basketball"}]}, "yc9Bc8G7Y_Q": {"duration": 7.18, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yc9Bc8G7Y_Q", "annotations": [{"segment": [2.5073200624024965, 6.111592652106085], "label": "Doing motocross"}]}, "LzPppxSWYNY": {"duration": 92.58, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=LzPppxSWYNY", "annotations": []}, "FKphYO14qhw": {"duration": 230.76, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FKphYO14qhw", "annotations": [{"segment": [13.316594383775351, 230.501], "label": "Playing pool"}]}, "rlH9RkelqHk": {"duration": 18.019, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=rlH9RkelqHk", "annotations": [{"segment": [2.1644907815912635, 13.436709007800312], "label": "Putting on shoes"}]}, "mAMNYRLAZ_M": {"duration": 22.895, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mAMNYRLAZ_M", "annotations": []}, "rO9SwC42Goo": {"duration": 22.152, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rO9SwC42Goo", "annotations": [{"segment": [4.065188797330752, 19.843633937775774], "label": "Camel ride"}]}, "HxLGLfEIxd0": {"duration": 156.224, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HxLGLfEIxd0", "annotations": []}, "dHKeNK1uWUg": {"duration": 79.18, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dHKeNK1uWUg", "annotations": []}, "hM3bDhEIG7w": {"duration": 181.91, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hM3bDhEIG7w", "annotations": []}, "zKHMKAOb1iw": {"duration": 30.094, "subset": "validation", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=zKHMKAOb1iw", "annotations": [{"segment": [7.7849079563182535, 21.291254290171608], "label": "Curling"}]}, "CDncYtx6Lwc": {"duration": 90.23, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=CDncYtx6Lwc", "annotations": []}, "-HHybw--2XA": {"duration": 135.257, "subset": "training", "resolution": "656x480", "url": "https://www.youtube.com/watch?v=-HHybw--2XA", "annotations": [{"segment": [13.709929797191887, 123.61857410296412], "label": "Disc dog"}]}, "zBtmE0BAjGE": {"duration": 65.411, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zBtmE0BAjGE", "annotations": [{"segment": [16.122962845553822, 45.00143427145086], "label": "Cleaning sink"}]}, "zGTqXydTuQs": {"duration": 158.895, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zGTqXydTuQs", "annotations": [{"segment": [48.83330340560562, 54.53465354940728], "label": "Scuba diving"}, {"segment": [68.41620172562004, 79.07524764664056], "label": "Scuba diving"}, {"segment": [84.77659779044224, 103.36795652574102], "label": "Scuba diving"}]}, "76hO-Xc1VzA": {"duration": 190.474, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=76hO-Xc1VzA", "annotations": []}, "ZbAfEWdGBzQ": {"duration": 12.795, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZbAfEWdGBzQ", "annotations": [{"segment": [3.075779251170047, 10.760250390015601], "label": "Doing a powerbomb"}]}, "LublNVXGH5I": {"duration": 217.13, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LublNVXGH5I", "annotations": [{"segment": [97.53944461778471, 212.3514992199688], "label": "Cleaning windows"}]}, "5fW_2c_kKfc": {"duration": 200.18, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5fW_2c_kKfc", "annotations": [{"segment": [6.870425957174342, 197.99318338845555], "label": "Volleyball"}]}, "1d6FGbM4FRM": {"duration": 105.489, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1d6FGbM4FRM", "annotations": [{"segment": [0.01, 105.441], "label": "Laying tile"}]}, "uJ_QCxMDfag": {"duration": 29.63, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=uJ_QCxMDfag", "annotations": [{"segment": [0.2311128003120125, 29.351326760553416], "label": "Mowing the lawn"}]}, "2cnNnskrZkU": {"duration": 127.826, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=2cnNnskrZkU", "annotations": []}, "QE80ROTC_fI": {"duration": 146.054, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QE80ROTC_fI", "annotations": [{"segment": [9.569809029641187, 142.86357765678628], "label": "Disc dog"}]}, "jsqD1NB63hk": {"duration": 74.56, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jsqD1NB63hk", "annotations": [{"segment": [57.77993291731669, 69.17315912636506], "label": "Swimming"}]}, "TcGhRsPTTRE": {"duration": 122.207, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TcGhRsPTTRE", "annotations": [{"segment": [12.004497659906397, 54.49660842433697], "label": "Capoeira"}, {"segment": [70.69315288611546, 76.8032496099844], "label": "Capoeira"}, {"segment": [104.43717316692668, 115.49074258970359], "label": "Capoeira"}]}, "wH-uaN8gL_k": {"duration": 60.05, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wH-uaN8gL_k", "annotations": [{"segment": [17.679944239999998, 43.851593799999996], "label": "Dodgeball"}]}, "4fQUWOuFjwQ": {"duration": 122.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4fQUWOuFjwQ", "annotations": [{"segment": [16.583394383775353, 27.638990639625586], "label": "Playing polo"}, {"segment": [113.22455475819032, 118.37112542901717], "label": "Playing polo"}]}, "zBmVL3I3nFU": {"duration": 129.103, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zBmVL3I3nFU", "annotations": [{"segment": [68.68002479563182, 101.10666406864276], "label": "Braiding hair"}]}, "y_bXP4NtAw0": {"duration": 197.068, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=y_bXP4NtAw0", "annotations": [{"segment": [5.224675507020281, 69.15713728549142], "label": "Welding"}, {"segment": [108.1924992199688, 148.14995631825272], "label": "Welding"}, {"segment": [160.75192355694227, 189.6442386895476], "label": "Welding"}]}, "jGoW5WVAtX4": {"duration": 97.153, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jGoW5WVAtX4", "annotations": [{"segment": [30.312585564647076, 39.254798306217964], "label": "Beer pong"}, {"segment": [47.74232226431914, 51.07670576443058], "label": "Beer pong"}, {"segment": [91.99869554602184, 96.951835], "label": "Beer pong"}]}, "WE9Md637nbc": {"duration": 82.11, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WE9Md637nbc", "annotations": [{"segment": [14.288872854914196, 69.00948907956318], "label": "Cleaning shoes"}]}, "_zkcTAj5Z8Q": {"duration": 76.696, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_zkcTAj5Z8Q", "annotations": [{"segment": [0.5982489001560063, 76.695509], "label": "Swimming"}]}, "I5QbY8vlR54": {"duration": 51.015, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=I5QbY8vlR54", "annotations": [{"segment": [2.7854888689547583, 51.014239], "label": "Surfing"}]}, "-79MZQX4CEA": {"duration": 65.23, "subset": "validation", "resolution": "490x360", "url": "https://www.youtube.com/watch?v=-79MZQX4CEA", "annotations": [{"segment": [24.624551490903173, 47.5192791167429], "label": "Playing field hockey"}]}, "2NMTArm9IkA": {"duration": 13.282, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2NMTArm9IkA", "annotations": [{"segment": [8.453946496099844, 11.562015060842434], "label": "Cleaning sink"}]}, "8qODQbBVsus": {"duration": 140.5, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=8qODQbBVsus", "annotations": [{"segment": [35.000702329999996, 133.68706708268328], "label": "Preparing salad"}]}, "2-xPjKzp-Kk": {"duration": 220.799, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2-xPjKzp-Kk", "annotations": [{"segment": [4.1324368174726995, 216.62818876755068], "label": "Playing blackjack"}]}, "CYG4TdmZn9c": {"duration": 22.15, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=CYG4TdmZn9c", "annotations": []}, "NeUIHk1uCgA": {"duration": 99.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NeUIHk1uCgA", "annotations": [{"segment": [23.63982345, 93.2086887675507], "label": "Using the balance beam"}]}, "31PcR8hZixQ": {"duration": 102.238, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=31PcR8hZixQ", "annotations": []}, "KPfKLVV-HCQ": {"duration": 135.47, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=KPfKLVV-HCQ", "annotations": [{"segment": [6.34002561622465, 134.61987725117007], "label": "Doing step aerobics"}]}, "ddepLUaQTsg": {"duration": 12.05, "subset": "testing", "resolution": "256x192", "url": "https://www.youtube.com/watch?v=ddepLUaQTsg", "annotations": []}, "9ukVV07rszg": {"duration": 194.212, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9ukVV07rszg", "annotations": [{"segment": [51.79646021840875, 59.375157566302654], "label": "Doing fencing"}, {"segment": [69.97786427457099, 79.06589859594385], "label": "Doing fencing"}, {"segment": [85.12458814352574, 88.15393291731671], "label": "Doing fencing"}, {"segment": [95.12142589703589, 111.78282215288613], "label": "Doing fencing"}, {"segment": [116.02390483619347, 118.75031513260531], "label": "Doing fencing"}, {"segment": [125.11193915756631, 132.0794321372855], "label": "Doing fencing"}, {"segment": [135.41171138845553, 137.8351872074883], "label": "Doing fencing"}, {"segment": [143.8938767550702, 150.8613697347894], "label": "Doing fencing"}, {"segment": [153.58778003120125, 171.46091419656787], "label": "Doing fencing"}, {"segment": [180.8518829953198, 186.60763806552262], "label": "Doing fencing"}]}, "yEtwO77xQC4": {"duration": 203.25, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yEtwO77xQC4", "annotations": []}, "m7vcstRI23E": {"duration": 159.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m7vcstRI23E", "annotations": [{"segment": [31.952817757009342, 137.47142523364485], "label": "Baking cookies"}]}, "C0OxaqCr_wk": {"duration": 25.54, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=C0OxaqCr_wk", "annotations": [{"segment": [0.9347574102964118, 24.204250390015602], "label": "Smoking a cigarette"}]}, "747hJQNJpeg": {"duration": 183.345, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=747hJQNJpeg", "annotations": [{"segment": [0.2, 182.749015600624], "label": "Playing drums"}]}, "h4phcAYe0aE": {"duration": 29.699, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=h4phcAYe0aE", "annotations": [{"segment": [0.2, 28.26429087107325], "label": "Raking leaves"}]}, "F6hVDkYcjv0": {"duration": 90.49, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=F6hVDkYcjv0", "annotations": []}, "9Hw3xvJJTy0": {"duration": 38.104, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9Hw3xvJJTy0", "annotations": [{"segment": [0.01, 36.24687766055526], "label": "Doing a powerbomb"}]}, "_AuJBnTClQ8": {"duration": 143.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_AuJBnTClQ8", "annotations": [{"segment": [27.55526209048362, 69.04060338000001], "label": "Shaving"}, {"segment": [80.13266919, 109.98542499000001], "label": "Shaving"}]}, "uJLhYVxWW5M": {"duration": 181.163, "subset": "testing", "resolution": "1274x720", "url": "https://www.youtube.com/watch?v=uJLhYVxWW5M", "annotations": []}, "VaT3qsoHPQ8": {"duration": 159.197, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VaT3qsoHPQ8", "annotations": [{"segment": [10.675605304212167, 35.25432449297972], "label": "Baking cookies"}, {"segment": [45.681659906396256, 85.40484243369734], "label": "Baking cookies"}, {"segment": [91.36331981279251, 107.5143728549142], "label": "Baking cookies"}, {"segment": [112.48039469578782, 127.13015912636506], "label": "Baking cookies"}]}, "A9uBPwP7DQc": {"duration": 161.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A9uBPwP7DQc", "annotations": [{"segment": [127.40728081123244, 150.77784711388455], "label": "Playing flauta"}]}, "Y3j1uDKRpPU": {"duration": 12.98, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Y3j1uDKRpPU", "annotations": [{"segment": [0.01, 0.02], "label": "Polishing shoes"}]}, "moxpALb09zU": {"duration": 168.48, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=moxpALb09zU", "annotations": [{"segment": [91.98338533541342, 164.51885491419657], "label": "Shaving legs"}]}, "-FpAgvltiC4": {"duration": 147.215, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-FpAgvltiC4", "annotations": []}, "8qIl-0XOguM": {"duration": 213.369, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8qIl-0XOguM", "annotations": [{"segment": [51.25509204368175, 213.321], "label": "Snow tubing"}]}, "pXeVR0bf4Pk": {"duration": 198.461, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pXeVR0bf4Pk", "annotations": [{"segment": [24.459305973478937, 177.7169826427457], "label": "Rafting"}]}, "zMrUSfQ_mzo": {"duration": 22.059, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=zMrUSfQ_mzo", "annotations": [{"segment": [0.2, 16.690472971918876], "label": "Playing rubik cube"}]}, "GiHCOSgpzYA": {"duration": 84.87, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GiHCOSgpzYA", "annotations": []}, "FOlMVTmAeaQ": {"duration": 129.61, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=FOlMVTmAeaQ", "annotations": [{"segment": [20.018262918271834, 27.499835928131006], "label": "Shot put"}, {"segment": [71.98486463540175, 78.25320958960808], "label": "Shot put"}, {"segment": [102.11336034321373, 116.67209686739469], "label": "Shot put"}]}, "irsm8ihOS9w": {"duration": 44.072, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=irsm8ihOS9w", "annotations": []}, "StRa193fqMM": {"duration": 102.888, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=StRa193fqMM", "annotations": []}, "iFTYM1Ez5ZM": {"duration": 156.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iFTYM1Ez5ZM", "annotations": [{"segment": [8.69317428479452, 14.08049355987845], "label": "Triple jump"}, {"segment": [64.28051407770596, 74.07564003240401], "label": "Triple jump"}, {"segment": [80.93222746645866, 86.07466854602185], "label": "Triple jump"}, {"segment": [117.66394946333854, 129.90785679563183], "label": "Triple jump"}]}, "K6QunmuTZOQ": {"duration": 39.34, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K6QunmuTZOQ", "annotations": []}, "1GzrcmQ5Pcc": {"duration": 160.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1GzrcmQ5Pcc", "annotations": [{"segment": [13.247602184087363, 50.49086115444618], "label": "Dodgeball"}, {"segment": [71.48706084243369, 128.22679095163807], "label": "Dodgeball"}]}, "wibcfckhRyU": {"duration": 56.17, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wibcfckhRyU", "annotations": [{"segment": [0.01, 55.36922349787204], "label": "Tug of war"}]}, "14AnvDNV5BI": {"duration": 22.338, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=14AnvDNV5BI", "annotations": [{"segment": [4.373429286271451, 19.462631524960997], "label": "Using the rowing machine"}]}, "3mD9Q3F6Xbg": {"duration": 60.721, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3mD9Q3F6Xbg", "annotations": []}, "-pK2JheWido": {"duration": 108.6, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-pK2JheWido", "annotations": []}, "9njq_aC4AS4": {"duration": 51.688, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9njq_aC4AS4", "annotations": [{"segment": [0.01, 51.687619047619044], "label": "Disc dog"}]}, "frePM0YGtQE": {"duration": 175.172, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=frePM0YGtQE", "annotations": [{"segment": [44.25834945397816, 112.55827145085804], "label": "Clipping cat claws"}]}, "v1Vmf5s42No": {"duration": 22.41, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=v1Vmf5s42No", "annotations": [{"segment": [0.9418408736349454, 19.553670479999997], "label": "Smoking a cigarette"}]}, "lAN2pe1lW-o": {"duration": 38.78, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=lAN2pe1lW-o", "annotations": [{"segment": [1.30329497, 8.008085023400936], "label": "Shot put"}, {"segment": [14.172799531981278, 22.392418876755073], "label": "Shot put"}, {"segment": [27.59011934477379, 36.23280733229329], "label": "Shot put"}]}, "NmmFlMtvE68": {"duration": 77.973, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NmmFlMtvE68", "annotations": [{"segment": [13.137266246489862, 58.38784998439938], "label": "Grooming dog"}]}, "xmag8UI8iws": {"duration": 104.328, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=xmag8UI8iws", "annotations": [{"segment": [0.01, 104.326983], "label": "Blowing leaves"}]}, "9zm9jW7_ANc": {"duration": 210.35, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9zm9jW7_ANc", "annotations": [{"segment": [114.36322129892, 195.41835949356343], "label": "Preparing salad"}]}, "AtHnNOkaoUk": {"duration": 229.089, "subset": "training", "resolution": "638x480", "url": "https://www.youtube.com/watch?v=AtHnNOkaoUk", "annotations": [{"segment": [100.47096884444551, 106.88400975598896], "label": "Welding"}, {"segment": [144.64969512396715, 149.63761583294541], "label": "Welding"}, {"segment": [220.1810643583102, 228.88807100000002], "label": "Welding"}]}, "7wyt_-SoudU": {"duration": 111.758, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7wyt_-SoudU", "annotations": [{"segment": [0.01, 111.721], "label": "Welding"}]}, "1RQOgX36Z2E": {"duration": 218.663, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1RQOgX36Z2E", "annotations": [{"segment": [19.383745723503242, 129.90513136155198], "label": "Changing car wheel"}, {"segment": [153.70973753573548, 179.89480432733734], "label": "Changing car wheel"}]}, "cQ3pZhIBPHI": {"duration": 55.938, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cQ3pZhIBPHI", "annotations": [{"segment": [4.055220748829953, 39.37488533541342], "label": "Kneeling"}]}, "LB2P_KH0W2I": {"duration": 10.1, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=LB2P_KH0W2I", "annotations": [{"segment": [0.7294076, 10.014123432917316], "label": "Washing dishes"}]}, "VQjnNPRfcTE": {"duration": 44.86, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VQjnNPRfcTE", "annotations": []}, "b2YyPJ02NNQ": {"duration": 133.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b2YyPJ02NNQ", "annotations": [{"segment": [13.964535101404056, 16.882497659906395], "label": "Powerbocking"}, {"segment": [33.978507020280816, 48.77896099843994], "label": "Powerbocking"}, {"segment": [51.28044617784712, 124.44888767550702], "label": "Powerbocking"}]}, "RINsqxGLQdQ": {"duration": 144.36, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=RINsqxGLQdQ", "annotations": [{"segment": [8.105391575663027, 60.56528705148206], "label": "Windsurfing"}, {"segment": [65.06828237129484, 110.54853510140406], "label": "Windsurfing"}]}, "oIOR6556hvo": {"duration": 63.07, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oIOR6556hvo", "annotations": []}, "e0yu4WgiZUM": {"duration": 232.455, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e0yu4WgiZUM", "annotations": []}, "DKnvOGEDUyQ": {"duration": 179.885, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DKnvOGEDUyQ", "annotations": [{"segment": [6.8754792858381, 171.88698214595252], "label": "Table soccer"}]}, "DDh5-FjIegY": {"duration": 26.33, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=DDh5-FjIegY", "annotations": [{"segment": [0.2875459970779784, 26.330996], "label": "Discus throw"}]}, "IJV7CwRhFvw": {"duration": 168.44, "subset": "training", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=IJV7CwRhFvw", "annotations": [{"segment": [10.77369671113375, 164.4959546626763], "label": "Playing racquetball"}]}, "iFJaqDgYsp0": {"duration": 88.89, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=iFJaqDgYsp0", "annotations": [{"segment": [11.925420764430577, 81.95260083463339], "label": "Clean and jerk"}]}, "TexMXN2yegk": {"duration": 143.035, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TexMXN2yegk", "annotations": [{"segment": [0.2, 21.6367503900156], "label": "Carving jack-o-lanterns"}, {"segment": [26.097936037441496, 43.942678627145085], "label": "Carving jack-o-lanterns"}, {"segment": [55.095642745709824, 94.81345553822153], "label": "Carving jack-o-lanterns"}]}, "wd7sLvmmVrs": {"duration": 95.992, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wd7sLvmmVrs", "annotations": []}, "ghFOHoBiyD8": {"duration": 214.53, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ghFOHoBiyD8", "annotations": [{"segment": [41.49086427457098, 46.17531669266771], "label": "Playing ten pins"}, {"segment": [151.57549609984397, 166.63266458658345], "label": "Playing ten pins"}, {"segment": [179.69896567862716, 196.43071294851794], "label": "Playing ten pins"}]}, "5Z-78AmLjFo": {"duration": 95.295, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=5Z-78AmLjFo", "annotations": [{"segment": [1.6302335592090647, 2.9640628669819407], "label": "Welding"}, {"segment": [9.633209405846321, 11.263445226457613], "label": "Welding"}, {"segment": [17.043372226806742, 19.711030842352496], "label": "Welding"}, {"segment": [35.27237276636938, 72.47138869971756], "label": "Welding"}, {"segment": [75.88006353019969, 87.43991730313903], "label": "Welding"}]}, "yVTcDXAykr0": {"duration": 18.41, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=yVTcDXAykr0", "annotations": []}, "ooadlJjyibo": {"duration": 75.42, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ooadlJjyibo", "annotations": [{"segment": [0.8818993759750391, 72.1393689547582], "label": "Using the balance beam"}]}, "NBawYEfglow": {"duration": 85.38, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=NBawYEfglow", "annotations": [{"segment": [4.856322152886115, 10.57746879875195], "label": "Playing violin"}, {"segment": [21.88671216848674, 30.93410686427457], "label": "Playing violin"}, {"segment": [32.663755850234004, 34.39340483619345], "label": "Playing violin"}, {"segment": [46.50094773790951, 49.42804602184087], "label": "Playing violin"}]}, "xpoIzpip_TQ": {"duration": 79.46000000000001, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=xpoIzpip_TQ", "annotations": [{"segment": [5.13408736349454, 74.66076443057722], "label": "Volleyball"}]}, "x-kPXpuo16Q": {"duration": 7.45, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x-kPXpuo16Q", "annotations": []}, "YmGXXV6ztUo": {"duration": 221.91, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YmGXXV6ztUo", "annotations": [{"segment": [104.68104524180967, 179.42850234009362], "label": "Archery"}]}, "pHiulmPx7ek": {"duration": 216.64, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pHiulmPx7ek", "annotations": [{"segment": [43.001270579999996, 119.13629851316502], "label": "Playing field hockey"}]}, "jUPbimyUndA": {"duration": 19.9, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=jUPbimyUndA", "annotations": [{"segment": [0.01, 9.064982839313572], "label": "Having an ice cream"}]}, "VuGLqAda2hE": {"duration": 59.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VuGLqAda2hE", "annotations": [{"segment": [24.87215763182527, 32.057447614352576], "label": "Spinning"}, {"segment": [38.505784778159125, 43.66445450920437], "label": "Spinning"}, {"segment": [49.92855346833073, 57.85079626957879], "label": "Spinning"}]}, "BZQb9uWULOc": {"duration": 161.983, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=BZQb9uWULOc", "annotations": [{"segment": [51.551342184299614, 118.01213999063964], "label": "Beer pong"}]}, "stdtUoog49I": {"duration": 49.087, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=stdtUoog49I", "annotations": []}, "idACyRv-Sqk": {"duration": 35.434, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=idACyRv-Sqk", "annotations": [{"segment": [0.01, 34.67959250494699], "label": "Slacklining"}]}, "h7cetqsBVn0": {"duration": 232.386, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=h7cetqsBVn0", "annotations": [{"segment": [28.01361644011643, 187.9880120787457], "label": "Camel ride"}]}, "Uv_6SJlvCl0": {"duration": 47.79, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Uv_6SJlvCl0", "annotations": [{"segment": [5.591263650546022, 8.722370990639625], "label": "Drinking coffee"}]}, "uug0bhnLgHs": {"duration": 184.855, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uug0bhnLgHs", "annotations": [{"segment": [39.38562949360772, 130.80629685747397], "label": "Making a lemonade"}]}, "kkXSGwesZ0U": {"duration": 214.99, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kkXSGwesZ0U", "annotations": [{"segment": [116.38496875663026, 151.9377257831513], "label": "Bungee jumping"}]}, "_kmjpSZb3OI": {"duration": 28.375, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_kmjpSZb3OI", "annotations": []}, "Bvry1S-bYXU": {"duration": 158.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Bvry1S-bYXU", "annotations": [{"segment": [9.51594567, 155.7601478775351], "label": "Using the balance beam"}]}, "eH8PT9fzbqU": {"duration": 26.31, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eH8PT9fzbqU", "annotations": [{"segment": [11.615012268330734, 18.674313010920436], "label": "Discus throw"}]}, "GvP6gZbHn30": {"duration": 111.154, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GvP6gZbHn30", "annotations": [{"segment": [0, 111.101], "label": "Playing rubik cube"}]}, "ARpoJIBF2NM": {"duration": 132.54, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ARpoJIBF2NM", "annotations": []}, "P7kSJ8SRNHQ": {"duration": 40.891, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=P7kSJ8SRNHQ", "annotations": [{"segment": [0.2, 40.643], "label": "Swinging at the playground"}]}, "oIJBu3NJslI": {"duration": 141.294, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oIJBu3NJslI", "annotations": []}, "OzjjQ4SZ8PY": {"duration": 154.181, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OzjjQ4SZ8PY", "annotations": [{"segment": [0.19999999999999998, 93.04965210608425], "label": "Scuba diving"}, {"segment": [100.9841185647426, 154.141], "label": "Scuba diving"}]}, "ySOSe7s0jcM": {"duration": 73.56, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ySOSe7s0jcM", "annotations": []}, "mW9iDH07j40": {"duration": 106.19, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mW9iDH07j40", "annotations": [{"segment": [56.81966285959438, 59.30448776599064], "label": "Ironing clothes"}, {"segment": [70.40337234789392, 77.52653707956318], "label": "Ironing clothes"}]}, "DvtfiPvPhZ8": {"duration": 108.3, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=DvtfiPvPhZ8", "annotations": [{"segment": [5.9951248049922, 99.55284711388455], "label": "Tango"}]}, "3w5xIOwFUXU": {"duration": 177.61, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3w5xIOwFUXU", "annotations": [{"segment": [56.663727769110764, 78.27629875195007], "label": "Drinking beer"}]}, "GKhuh6bha6U": {"duration": 188.569, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=GKhuh6bha6U", "annotations": [{"segment": [13.490181370630161, 175.0790930057871], "label": "Capoeira"}]}, "LqBp5qgcwLY": {"duration": 130.938, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LqBp5qgcwLY", "annotations": [{"segment": [6.533840873634945, 117.83132137285493], "label": "Hanging wallpaper"}]}, "rSGboODhu04": {"duration": 238.54, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=rSGboODhu04", "annotations": []}, "bNR0MmJ9d8I": {"duration": 152.416, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bNR0MmJ9d8I", "annotations": [{"segment": [0.01, 152.361], "label": "Assembling bicycle"}]}, "BmL2XGNrxc4": {"duration": 75.19, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BmL2XGNrxc4", "annotations": []}, "iZraW641GR0": {"duration": 18.67, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iZraW641GR0", "annotations": []}, "I_6Ok72DnJA": {"duration": 51.479, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=I_6Ok72DnJA", "annotations": [{"segment": [0.2, 32.58445549066613], "label": "Hanging wallpaper"}]}, "xWOrsBkYXEQ": {"duration": 23.27, "subset": "testing", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=xWOrsBkYXEQ", "annotations": []}, "LNLsmdVMCmY": {"duration": 134.792, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=LNLsmdVMCmY", "annotations": [{"segment": [13.455076443057722, 134.741], "label": "Removing ice from car"}]}, "JK1Wn9sORbs": {"duration": 85.68, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JK1Wn9sORbs", "annotations": [{"segment": [12.417652057722309, 22.164841307332296], "label": "Mowing the lawn"}]}, "E2NKQZNMAO0": {"duration": 197.0, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=E2NKQZNMAO0", "annotations": [{"segment": [43.34152998, 139.72477145999997], "label": "Tai chi"}]}, "aDWrPrNFdR0": {"duration": 111.99, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aDWrPrNFdR0", "annotations": [{"segment": [25.712309325362828, 78.74176627240332], "label": "Beer pong"}, {"segment": [91.65025250293291, 108.04751879576781], "label": "Beer pong"}]}, "p8C7govzEPw": {"duration": 163.608, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=p8C7govzEPw", "annotations": [{"segment": [42.862539781591266, 141.85459594383775], "label": "Grooming dog"}]}, "2St4Skz9zvE": {"duration": 182.068, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=2St4Skz9zvE", "annotations": []}, "ni6VySdH0XY": {"duration": 69.451, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ni6VySdH0XY", "annotations": [{"segment": [9.094670826833074, 69.401], "label": "Sharpening knives"}]}, "A1vDgtl1ufE": {"duration": 131.703, "subset": "testing", "resolution": "360x288", "url": "https://www.youtube.com/watch?v=A1vDgtl1ufE", "annotations": []}, "CM5mFH3_Qhs": {"duration": 38.452, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CM5mFH3_Qhs", "annotations": []}, "xrl3oxTa6sQ": {"duration": 104.61, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=xrl3oxTa6sQ", "annotations": [{"segment": [30.711874478939162, 94.36718446], "label": "Starting a campfire"}]}, "mWXonCBxeaw": {"duration": 53.244, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mWXonCBxeaw", "annotations": []}, "A3160tXXLGg": {"duration": 25.124, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=A3160tXXLGg", "annotations": [{"segment": [2.0969321602796085, 19.146754398067078], "label": "Playing rubik cube"}]}, "dRAn_gsx9Wc": {"duration": 192.94, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dRAn_gsx9Wc", "annotations": [{"segment": [0.01, 186.76413214895945], "label": "Playing lacrosse"}]}, "mb5CLhyHMvU": {"duration": 136.255, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mb5CLhyHMvU", "annotations": [{"segment": [0.01, 136.25433106575963], "label": "Skiing"}]}, "ekdKb7Gynko": {"duration": 13.84, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=ekdKb7Gynko", "annotations": []}, "dU7jG8G5li0": {"duration": 220.265, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dU7jG8G5li0", "annotations": []}, "OlykXdYeQOI": {"duration": 129.08, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OlykXdYeQOI", "annotations": []}, "l8NepxsCh34": {"duration": 48.67, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=l8NepxsCh34", "annotations": [{"segment": [2.581508343326931, 47.60604901872075], "label": "Hopscotch"}]}, "ExcXXMZYGs0": {"duration": 187.687, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ExcXXMZYGs0", "annotations": [{"segment": [13.17614686427457, 180.0740071450858], "label": "Rafting"}]}, "nAh1HyYtA04": {"duration": 119.82, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nAh1HyYtA04", "annotations": []}, "Wg0xG-eRTho": {"duration": 159.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Wg0xG-eRTho", "annotations": [{"segment": [36.85301060218409, 109.3139976645866], "label": "Shaving legs"}]}, "hV9sBYo342M": {"duration": 37.31, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=hV9sBYo342M", "annotations": [{"segment": [2.33473698, 21.368686427457096], "label": "High jump"}]}, "wNwwiK6znIE": {"duration": 46.348, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wNwwiK6znIE", "annotations": [{"segment": [0.01, 33.05388143525741], "label": "Waxing skis"}]}, "m3IL58elWc0": {"duration": 74.14, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=m3IL58elWc0", "annotations": []}, "QN-4aLiDhiQ": {"duration": 217.5, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QN-4aLiDhiQ", "annotations": []}, "dcMFJ-8Eo7g": {"duration": 153.136, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dcMFJ-8Eo7g", "annotations": [{"segment": [12.422856705148208, 98.6661503697348], "label": "Laying tile"}]}, "bH7ORYDcI7A": {"duration": 190.381, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bH7ORYDcI7A", "annotations": [{"segment": [27.607071762870515, 185.25401560062403], "label": "Disc dog"}]}, "E2KlBkQJyzU": {"duration": 197.742, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=E2KlBkQJyzU", "annotations": [{"segment": [27.169300694926132, 148.43783652047128], "label": "Playing ice hockey"}]}, "hyMqGr0mOfA": {"duration": 63.646, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hyMqGr0mOfA", "annotations": []}, "HNvolNt5RU0": {"duration": 42.89, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HNvolNt5RU0", "annotations": [{"segment": [0.01, 42.887256235827664], "label": "Playing water polo"}]}, "8-QcL1k5n6k": {"duration": 60.837, "subset": "validation", "resolution": "854x468", "url": "https://www.youtube.com/watch?v=8-QcL1k5n6k", "annotations": [{"segment": [2.70243135725429, 6.212894695787831], "label": "Using the rowing machine"}, {"segment": [14.085723088923556, 19.018097503900155], "label": "Using the rowing machine"}, {"segment": [29.451966458658347, 44.059382995319815], "label": "Using the rowing machine"}]}, "4-Xe_9Ywvd8": {"duration": 171.57, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4-Xe_9Ywvd8", "annotations": [{"segment": [18.06728007895826, 104.52256104938076], "label": "Cheerleading"}]}, "0gA_3BAxtVM": {"duration": 175.915, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0gA_3BAxtVM", "annotations": [{"segment": [9.316337691878413, 172.0782359813084], "label": "BMX"}]}, "0zXTrkgUj0U": {"duration": 56.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0zXTrkgUj0U", "annotations": [{"segment": [13.82528232, 48.941431069422784], "label": "Vacuuming floor"}, {"segment": [4.9724898, 11.09680248], "label": "Vacuuming floor"}]}, "55IErOrgQOA": {"duration": 70.64, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=55IErOrgQOA", "annotations": [{"segment": [12.115616224648985, 70.1604321372855], "label": "Grooming horse"}]}, "BERvPz1e_AU": {"duration": 133.82, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BERvPz1e_AU", "annotations": [{"segment": [77.7671684, 84.43388152], "label": "Discus throw"}, {"segment": [24.36655544, 62.098652959999995], "label": "Discus throw"}]}, "---9CpRcKoU": {"duration": 14.07, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=---9CpRcKoU", "annotations": [{"segment": [0.01, 12.644405017160688], "label": "Drinking beer"}]}, "LHjmL7Pg_80": {"duration": 230.969, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=LHjmL7Pg_80", "annotations": [{"segment": [16.214661294851794, 30.988019363494537], "label": "Futsal"}, {"segment": [34.2309516224649, 85.75754195943838], "label": "Futsal"}, {"segment": [86.4781935725429, 94.40536131669266], "label": "Futsal"}, {"segment": [99.44992260842433, 217.99711296411857], "label": "Futsal"}]}, "n_9skH6xGeM": {"duration": 197.695, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n_9skH6xGeM", "annotations": [{"segment": [0.01, 189.47130794642797], "label": "Rope skipping"}]}, "HPv-d6qybdE": {"duration": 214.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HPv-d6qybdE", "annotations": [{"segment": [8.0010365, 213.16737042815046], "label": "Pole vault"}]}, "nfPuaotli6I": {"duration": 130.915, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nfPuaotli6I", "annotations": []}, "rZMiqPMOpzA": {"duration": 218.15, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rZMiqPMOpzA", "annotations": [{"segment": [2.3819765990639628, 216.41958814352574], "label": "Playing accordion"}]}, "pHU2Ie4VQBg": {"duration": 211.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pHU2Ie4VQBg", "annotations": [{"segment": [12.524053042121684, 204.02954602184087], "label": "Belly dance"}]}, "3HYQV_zu2RA": {"duration": 152.207, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3HYQV_zu2RA", "annotations": [{"segment": [98.76333229329174, 125.82828393135726], "label": "Using the rowing machine"}]}, "IiNf2F4P5sE": {"duration": 73.863, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=IiNf2F4P5sE", "annotations": [{"segment": [27.071193447737908, 31.67905616224649], "label": "Surfing"}, {"segment": [44.00508892355694, 48.843344773790946], "label": "Surfing"}, {"segment": [59.44142901716068, 67.15959906396256], "label": "Surfing"}]}, "GvRPOGpObyU": {"duration": 62.65, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=GvRPOGpObyU", "annotations": [{"segment": [13.867928237129485, 53.24250390015601], "label": "Mixing drinks"}]}, "RK1L77bNTxg": {"duration": 190.543, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=RK1L77bNTxg", "annotations": [{"segment": [37.446374414976596, 46.659371294851795], "label": "Curling"}, {"segment": [51.414466458658346, 60.924656786271456], "label": "Curling"}, {"segment": [65.679751950078, 71.02923400936038], "label": "Curling"}, {"segment": [74.00116848673947, 118.58018564742589], "label": "Curling"}, {"segment": [148.89391731669267, 156.32375351014042], "label": "Curling"}]}, "Lo9xfcrfn2A": {"duration": 190.125, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Lo9xfcrfn2A", "annotations": []}, "bteBkFTUlEI": {"duration": 98.453, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bteBkFTUlEI", "annotations": []}, "izdf6Fm48Tc": {"duration": 227.79, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=izdf6Fm48Tc", "annotations": [{"segment": [19.66940745, 184.07695257], "label": "Cleaning shoes"}]}, "OoyoTLkSOuY": {"duration": 75.58, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=OoyoTLkSOuY", "annotations": [{"segment": [35.446936764430575, 74.8978464524181], "label": "Shaving legs"}]}, "ILwwD00q1ZY": {"duration": 46.231, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ILwwD00q1ZY", "annotations": [{"segment": [5.717656937294078, 34.017897909256696], "label": "Painting furniture"}]}, "JQ3mObvro1s": {"duration": 151.35, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=JQ3mObvro1s", "annotations": [{"segment": [68.56978315132605, 87.92510920436817], "label": "Cleaning shoes"}]}, "-AUKiMfqga4": {"duration": 198.16, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=-AUKiMfqga4", "annotations": [{"segment": [30.295773964291904, 180.22894103247125], "label": "Making an omelette"}]}, "IFmtu3Sd7iI": {"duration": 114.219, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IFmtu3Sd7iI", "annotations": [{"segment": [1.3161052558043291, 111.76221273243983], "label": "Scuba diving"}]}, "YrBVLdz09HA": {"duration": 186.29, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YrBVLdz09HA", "annotations": []}, "9KPRS9y8Fvo": {"duration": 86.518, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9KPRS9y8Fvo", "annotations": [{"segment": [0.2, 78.98260410495145], "label": "Swinging at the playground"}]}, "skr8x0kUVwg": {"duration": 68.94, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=skr8x0kUVwg", "annotations": []}, "V5TmcsuC1vU": {"duration": 133.515, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V5TmcsuC1vU", "annotations": []}, "aYtnkEWM_Cg": {"duration": 95.16, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=aYtnkEWM_Cg", "annotations": [{"segment": [8.906899296380018, 63.0905366826918], "label": "Volleyball"}, {"segment": [77.3415746224649, 85.80312885179407], "label": "Volleyball"}]}, "gIMaGZrddX4": {"duration": 134.885, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=gIMaGZrddX4", "annotations": []}, "1VBg21aaiKM": {"duration": 97.13, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1VBg21aaiKM", "annotations": [{"segment": [17.56124180967239, 22.713182527301093], "label": "Shaving"}, {"segment": [36.94677691107644, 45.123522620904836], "label": "Shaving"}, {"segment": [57.54006240249611, 66.17107176287051], "label": "Shaving"}, {"segment": [73.2878689547582, 80.55608736349454], "label": "Shaving"}]}, "ut1AoOzqL4k": {"duration": 29.75, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ut1AoOzqL4k", "annotations": []}, "tHFF0OaIHoE": {"duration": 27.84, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=tHFF0OaIHoE", "annotations": [{"segment": [0.3908994009360375, 22.91886456], "label": "Shaving legs"}]}, "jalqZE5ekho": {"duration": 95.388, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jalqZE5ekho", "annotations": [{"segment": [0, 95.387572], "label": "Building sandcastles"}]}, "x1Z_3-X49HY": {"duration": 91.835, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x1Z_3-X49HY", "annotations": [{"segment": [2.7740491278461206, 87.85441517145982], "label": "Clipping cat claws"}]}, "Wzg4d-3ym1E": {"duration": 205.288, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Wzg4d-3ym1E", "annotations": [{"segment": [81.34628524732827, 93.51620193787342], "label": "Painting furniture"}, {"segment": [107.28742345612193, 174.54222544461777], "label": "Painting furniture"}]}, "CzdYTTVN5UA": {"duration": 150.094, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CzdYTTVN5UA", "annotations": []}, "hryx3zm06U8": {"duration": 112.478, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hryx3zm06U8", "annotations": [{"segment": [7.194346051482058, 112.47746031746031], "label": "Laying tile"}]}, "Go_bbW1C2Zs": {"duration": 62.53, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Go_bbW1C2Zs", "annotations": []}, "NNOsdZr802w": {"duration": 167.63, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=NNOsdZr802w", "annotations": [{"segment": [108.5246705650539, 167.62485260770976], "label": "Shaving legs"}]}, "tJcbG_HGZE8": {"duration": 178.446, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=tJcbG_HGZE8", "annotations": [{"segment": [109.36558502340094, 159.45848049921997], "label": "Hanging wallpaper"}]}, "9FJwDxnlW7Y": {"duration": 38.15, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=9FJwDxnlW7Y", "annotations": [{"segment": [0.009999999999999998, 38.101], "label": "Shoveling snow"}]}, "cit8ShHV_bU": {"duration": 62.741, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cit8ShHV_bU", "annotations": []}, "F4bG73WqAzI": {"duration": 53.66, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=F4bG73WqAzI", "annotations": []}, "A_ZCUnCD4hk": {"duration": 57.911, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=A_ZCUnCD4hk", "annotations": []}, "lPYV3qFuXOI": {"duration": 145.5, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lPYV3qFuXOI", "annotations": [{"segment": [28.817376429017163, 121.1691260873635], "label": "Fixing bicycle"}]}, "9D46Mp1AD9g": {"duration": 44.03, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9D46Mp1AD9g", "annotations": []}, "KbEoaYhMZ6c": {"duration": 44.606, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KbEoaYhMZ6c", "annotations": [{"segment": [0.01, 3.4097832574102958], "label": "Installing carpet"}, {"segment": [6.123692380655226, 16.353042152886115], "label": "Installing carpet"}, {"segment": [19.5540631700468, 43.49213338533541], "label": "Installing carpet"}]}, "JspVuT6rsLA": {"duration": 181.58, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JspVuT6rsLA", "annotations": []}, "kHBTnFweJfw": {"duration": 198.25, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kHBTnFweJfw", "annotations": [{"segment": [1.9370364716140651, 193.7036259575954], "label": "Zumba"}]}, "Gz0UlnBx6JU": {"duration": 197.091, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Gz0UlnBx6JU", "annotations": [{"segment": [67.01238377535101, 92.83366926677067], "label": "Playing congas"}, {"segment": [118.34755850234009, 162.92001560062403], "label": "Playing congas"}]}, "VmwvfQDR4YQ": {"duration": 231.225, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VmwvfQDR4YQ", "annotations": [{"segment": [6.493038301123883, 207.41650093603744], "label": "Capoeira"}]}, "OBua42LRiF8": {"duration": 78.136, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OBua42LRiF8", "annotations": [{"segment": [5.972889582249956, 76.18481474258971], "label": "Hurling"}]}, "M-PQajs86Bo": {"duration": 48.832, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M-PQajs86Bo", "annotations": [{"segment": [0.2, 47.27253510140406], "label": "Swinging at the playground"}]}, "VfKA9zNDBfM": {"duration": 54.15, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=VfKA9zNDBfM", "annotations": []}, "DqsaFxxfONY": {"duration": 100.36, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DqsaFxxfONY", "annotations": [{"segment": [26.772209239389987, 89.86694797315702], "label": "Using parallel bars"}]}, "kcMJk5dg-Wc": {"duration": 14.68, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=kcMJk5dg-Wc", "annotations": [{"segment": [0.4633995103315549, 10.132448155191367], "label": "Kayaking"}]}, "pHZogKGkc9o": {"duration": 41.63, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=pHZogKGkc9o", "annotations": [{"segment": [6.949721277691109, 41.113771670826836], "label": "Playing kickball"}]}, "meAHw06Mh-Y": {"duration": 117.842, "subset": "training", "resolution": "536x360", "url": "https://www.youtube.com/watch?v=meAHw06Mh-Y", "annotations": [{"segment": [17.14388568693381, 117.841268], "label": "Laying tile"}]}, "WWyGnNfcj8Q": {"duration": 185.737, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=WWyGnNfcj8Q", "annotations": [{"segment": [5.795196705827417, 83.74059239920616], "label": "Making a cake"}]}, "VcEW9F8TyqU": {"duration": 82.129, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VcEW9F8TyqU", "annotations": [{"segment": [10.121982962558503, 58.04124407644306], "label": "Beer pong"}]}, "2KYGAq5qQCI": {"duration": 77.021, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2KYGAq5qQCI", "annotations": [{"segment": [9.382252694401995, 28.126733151171287], "label": "Plastering"}, {"segment": [44.828545865856746, 67.89867565880357], "label": "Plastering"}]}, "RfaZnfoyZ-g": {"duration": 146.077, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RfaZnfoyZ-g", "annotations": []}, "-HZtgP41I_o": {"duration": 207.935, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=-HZtgP41I_o", "annotations": [{"segment": [27.5661232449298, 107.02141965678628], "label": "Putting on shoes"}]}, "4x7HYKV8zkY": {"duration": 115.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4x7HYKV8zkY", "annotations": [{"segment": [30.638689371294852, 103.84617724960998], "label": "Tumbling"}]}, "1D7iddqIIhM": {"duration": 15.88, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1D7iddqIIhM", "annotations": [{"segment": [8.127056576140596, 15.733783310516094], "label": "Smoking hookah"}]}, "8uP35-qttBo": {"duration": 165.47, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8uP35-qttBo", "annotations": [{"segment": [50.247210259999996, 135.40551856], "label": "Archery"}]}, "UfrztSg9gf0": {"duration": 118.376, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=UfrztSg9gf0", "annotations": [{"segment": [17.359252321372853, 108.77233635413417], "label": "Disc dog"}]}, "BvACPvbUq1k": {"duration": 29.05, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BvACPvbUq1k", "annotations": [{"segment": [3.1721863160240695, 5.483350619656786], "label": "Shoveling snow"}, {"segment": [7.703881035881436, 18.851850064274572], "label": "Shoveling snow"}]}, "WmrGNp40Qzg": {"duration": 233.85, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WmrGNp40Qzg", "annotations": []}, "IOmzDJjVUoQ": {"duration": 17.717, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=IOmzDJjVUoQ", "annotations": [{"segment": [0.2, 17.563000000000002], "label": "Using the monkey bar"}]}, "hxluAHro9bQ": {"duration": 151.208, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hxluAHro9bQ", "annotations": [{"segment": [15.798098283931356, 41.02789703588144], "label": "Hula hoop"}, {"segment": [58.94812792511701, 71.44513104524181], "label": "Hula hoop"}, {"segment": [86.77164430577223, 140.06909204368174], "label": "Hula hoop"}]}, "dfgwl-_IMic": {"duration": 31.974, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dfgwl-_IMic", "annotations": [{"segment": [0.2, 31.77387755102041], "label": "Powerbocking"}, {"segment": [17.258909666146646, 23.64370861778471], "label": "Powerbocking"}]}, "2-S2fehRKVc": {"duration": 4.74, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2-S2fehRKVc", "annotations": [{"segment": [1.3781997948517941, 4.171548440717629], "label": "Chopping wood"}]}, "NNfAlym-xh8": {"duration": 138.79, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=NNfAlym-xh8", "annotations": [{"segment": [2.055690327613105, 28.88785881435258], "label": "Using parallel bars"}, {"segment": [30.402578003120126, 33.21562792511701], "label": "Using parallel bars"}, {"segment": [61.56251560062403, 73.68026911076443], "label": "Using parallel bars"}, {"segment": [78.22442667706709, 89.04384945397815], "label": "Using parallel bars"}, {"segment": [94.02078393135727, 97.26661076443058], "label": "Using parallel bars"}, {"segment": [126.91182917316694, 129.07571372854915], "label": "Using parallel bars"}, {"segment": [133.40348283931357, 136.64930967238692], "label": "Using parallel bars"}]}, "NyPzV1CzeDk": {"duration": 209.073, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NyPzV1CzeDk", "annotations": []}, "YkqO2oi9SIk": {"duration": 84.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YkqO2oi9SIk", "annotations": [{"segment": [56.595037049999995, 75.17980439625585], "label": "Plataform diving"}, {"segment": [8.397046215288611, 49.350057299999996], "label": "Plataform diving"}]}, "XsCND4lDiqA": {"duration": 154.413, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XsCND4lDiqA", "annotations": [{"segment": [6.003601354680785, 27.616579423046545], "label": "Beer pong"}, {"segment": [42.9858082716622, 63.15792113547024], "label": "Beer pong"}, {"segment": [93.41609043118231, 111.90719310085362], "label": "Beer pong"}]}, "q_IyXRFSV-A": {"duration": 87.772, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=q_IyXRFSV-A", "annotations": []}, "QOaPQpXemCA": {"duration": 185.67, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QOaPQpXemCA", "annotations": []}, "U2td95KJU7Y": {"duration": 104.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=U2td95KJU7Y", "annotations": [{"segment": [29.940511035407404, 37.588358962929945], "label": "Plataform diving"}]}, "MPrkTgXNHkw": {"duration": 24.43, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MPrkTgXNHkw", "annotations": []}, "iKnRiVXMMGg": {"duration": 87.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iKnRiVXMMGg", "annotations": [{"segment": [0.01, 87.352992], "label": "Removing curlers"}]}, "3MqnXsnEAkM": {"duration": 154.88, "subset": "training", "resolution": "352x262", "url": "https://www.youtube.com/watch?v=3MqnXsnEAkM", "annotations": [{"segment": [25.36988336676324, 38.90048782903697], "label": "Snatch"}, {"segment": [63.06228151166863, 93.98937742543714], "label": "Snatch"}, {"segment": [128.78236032842673, 138.20545986465308], "label": "Snatch"}, {"segment": [139.65516748561097, 154.67709750566894], "label": "Snatch"}]}, "5ydZNWU9fO8": {"duration": 60.16, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5ydZNWU9fO8", "annotations": []}, "It2fslENHXs": {"duration": 46.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=It2fslENHXs", "annotations": [{"segment": [7.350292647425897, 42.209601341653666], "label": "Pole vault"}]}, "xa-zOtV3uPU": {"duration": 42.957, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xa-zOtV3uPU", "annotations": [{"segment": [9.147611384555383, 10.487920744929797], "label": "Playing blackjack"}, {"segment": [13.838694145865835, 16.98842114274571], "label": "Playing blackjack"}]}, "aCJHnvM0Mkc": {"duration": 34.714, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aCJHnvM0Mkc", "annotations": [{"segment": [19.171132876755067, 34.513832199546485], "label": "Mooping floor"}]}, "WmabLngcvas": {"duration": 126.689, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WmabLngcvas", "annotations": [{"segment": [17.53536006989628, 109.54675958252663], "label": "Camel ride"}]}, "h4s1rAwVm4M": {"duration": 88.12, "subset": "testing", "resolution": "853x470", "url": "https://www.youtube.com/watch?v=h4s1rAwVm4M", "annotations": []}, "f4s1ngeK5P4": {"duration": 31.394, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=f4s1ngeK5P4", "annotations": [{"segment": [0.01, 31.361], "label": "Croquet"}]}, "aDhbXceBwHw": {"duration": 72.307, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aDhbXceBwHw", "annotations": [{"segment": [0.01, 68.54085491419657], "label": "Sumo"}]}, "oyLTgy93soQ": {"duration": 42.15, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oyLTgy93soQ", "annotations": [{"segment": [6.706858034321373, 14.005497659906398], "label": "Shot put"}]}, "j5JTGwlcFJw": {"duration": 151.975, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=j5JTGwlcFJw", "annotations": []}, "u0SpXGr_elM": {"duration": 195.93, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=u0SpXGr_elM", "annotations": [{"segment": [47.218889, 125.83735954000002], "label": "Making a sandwich"}]}, "Elq_Pigh6qQ": {"duration": 191.82, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=Elq_Pigh6qQ", "annotations": []}, "7XChhLM-lYg": {"duration": 3.25, "subset": "testing", "resolution": "1200x720", "url": "https://www.youtube.com/watch?v=7XChhLM-lYg", "annotations": []}, "be8r0Ztbv3s": {"duration": 76.46000000000001, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=be8r0Ztbv3s", "annotations": []}, "niHEMszTMBU": {"duration": 100.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=niHEMszTMBU", "annotations": [{"segment": [32.53593771294852, 84.4048239219969], "label": "Starting a campfire"}]}, "QbhMOqg9Tmg": {"duration": 226.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QbhMOqg9Tmg", "annotations": [{"segment": [142.2899282, 175.02069396], "label": "Shaving legs"}, {"segment": [47.46097111, 118.37687428], "label": "Shaving legs"}]}, "nK7XMj4gYJs": {"duration": 45.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nK7XMj4gYJs", "annotations": [{"segment": [2.4532137285491418, 45.58], "label": "Using parallel bars"}]}, "QzbZxKJ-YBY": {"duration": 160.38, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QzbZxKJ-YBY", "annotations": [{"segment": [3.502424336973479, 92.5640717628705], "label": "Belly dance"}]}, "01vNlQLepsE": {"duration": 11.703, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=01vNlQLepsE", "annotations": [{"segment": [0, 11.661], "label": "Baton twirling"}]}, "-Hbqom0pAeo": {"duration": 81.828, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-Hbqom0pAeo", "annotations": []}, "xZdCmghMcvU": {"duration": 142.53, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=xZdCmghMcvU", "annotations": [{"segment": [86.04458928000003, 118.360100850234], "label": "Preparing salad"}, {"segment": [7.8906733900156, 75.13437708000001], "label": "Preparing salad"}]}, "DRI64Xoxy5w": {"duration": 59.815, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DRI64Xoxy5w", "annotations": [{"segment": [14.921934477379095, 33.20130421216849], "label": "Hitting a pinata"}]}, "Hs1fGDcU6Js": {"duration": 180.907, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Hs1fGDcU6Js", "annotations": [{"segment": [138.4650639625585, 162.15358814352572], "label": "Doing crunches"}]}, "lIb8_YksZyg": {"duration": 213.438, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lIb8_YksZyg", "annotations": [{"segment": [4.9937831513260535, 21.306808112324493], "label": "Snowboarding"}, {"segment": [24.968915756630267, 56.596209048361935], "label": "Snowboarding"}, {"segment": [57.92788455538222, 133.16755070202808], "label": "Snowboarding"}, {"segment": [135.16506396255852, 190.42959750390014], "label": "Snowboarding"}]}, "pmPEfkb8roQ": {"duration": 30.86, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pmPEfkb8roQ", "annotations": [{"segment": [12.16491887675507, 23.608597503900157], "label": "Painting fence"}]}, "8HhihBmSS9s": {"duration": 120.4, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=8HhihBmSS9s", "annotations": [{"segment": [4.507785666528702, 53.905603595572394], "label": "Playing kickball"}]}, "WtWw-GNpr4E": {"duration": 151.12, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WtWw-GNpr4E", "annotations": [{"segment": [0.01, 2.5932450748829954], "label": "Playing badminton"}, {"segment": [80.15484776911076, 86.7558352324493], "label": "Playing badminton"}, {"segment": [126.83325911700469, 131.0767510577223], "label": "Playing badminton"}]}, "jqW7r60gOzE": {"duration": 13.143, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=jqW7r60gOzE", "annotations": [{"segment": [0, 11.624171606864275], "label": "Hitting a pinata"}]}, "w40c5_KzUnY": {"duration": 156.71, "subset": "testing", "resolution": "640x358", "url": "https://www.youtube.com/watch?v=w40c5_KzUnY", "annotations": []}, "trV2ycq9rkU": {"duration": 6.316, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=trV2ycq9rkU", "annotations": [{"segment": [0.01, 6.315827], "label": "Fixing the roof"}]}, "qsYElirHVUU": {"duration": 200.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qsYElirHVUU", "annotations": [{"segment": [2.4971775251962463, 9.988710100784985], "label": "Skateboarding"}, {"segment": [19.040978629621375, 44.63704826288289], "label": "Skateboarding"}, {"segment": [52.752875219770694, 56.18649431691553], "label": "Skateboarding"}, {"segment": [59.30796622341085, 76.47606170913504], "label": "Skateboarding"}, {"segment": [80.2218279969294, 85.84047742862096], "label": "Skateboarding"}, {"segment": [92.08342124161157, 95.20489314810688], "label": "Skateboarding"}, {"segment": [97.70207067330313, 116.11875492162544], "label": "Skateboarding"}, {"segment": [117.99163806552262, 194.46769860218407], "label": "Skateboarding"}]}, "Qhyk2ZbrOEM": {"duration": 219.267, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Qhyk2ZbrOEM", "annotations": []}, "16T82DHJClA": {"duration": 123.298, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=16T82DHJClA", "annotations": [{"segment": [0.2, 4.800418194276672], "label": "Arm wrestling"}, {"segment": [29.378559348973234, 53.95670050366979], "label": "Arm wrestling"}, {"segment": [91.78399587456997, 100.61676434890965], "label": "Arm wrestling"}]}, "O_bDLIcx5k0": {"duration": 145.96, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=O_bDLIcx5k0", "annotations": [{"segment": [47.590901652392624, 72.18332930051896], "label": "Painting"}, {"segment": [91.7661883536566, 116.58632366519151], "label": "Painting"}]}, "3AsQjx1lxLU": {"duration": 234.336, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3AsQjx1lxLU", "annotations": [{"segment": [36.923422798136414, 38.75131479875195], "label": "Running a marathon"}, {"segment": [52.277717134165364, 106.7489049173167], "label": "Running a marathon"}, {"segment": [145.13464127457098, 220.4438002230889], "label": "Running a marathon"}]}, "bF2qGYgqJuM": {"duration": 8.41, "subset": "testing", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=bF2qGYgqJuM", "annotations": []}, "n3v9Znovl98": {"duration": 159.31, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n3v9Znovl98", "annotations": [{"segment": [13.172405184642757, 150.3642460218409], "label": "Snatch"}]}, "mNTO-xnsaTc": {"duration": 31.046, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mNTO-xnsaTc", "annotations": [{"segment": [0.2, 29.546017900410423], "label": "Rafting"}]}, "Lfh5p8ReOYo": {"duration": 186.5, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Lfh5p8ReOYo", "annotations": [{"segment": [63.428227750390015, 69.24733121372856], "label": "Vacuuming floor"}, {"segment": [81.46744848673947, 84.95891056474258], "label": "Vacuuming floor"}, {"segment": [111.14487614976598, 113.4725175351014], "label": "Vacuuming floor"}, {"segment": [121.32830721060841, 130.05696240561622], "label": "Vacuuming floor"}]}, "gXKGS1N3zuM": {"duration": 151.28, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gXKGS1N3zuM", "annotations": [{"segment": [45.544687987519495, 147.48927457098281], "label": "Belly dance"}]}, "vWde8sMxe1w": {"duration": 128.62, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=vWde8sMxe1w", "annotations": [{"segment": [37.32036492355694, 40.73136601872075], "label": "High jump"}, {"segment": [62.40125532917316, 115.97403723556941], "label": "High jump"}]}, "5YXp6dVOn90": {"duration": 18.11, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=5YXp6dVOn90", "annotations": []}, "MsBqlgdzeag": {"duration": 56.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MsBqlgdzeag", "annotations": [{"segment": [0.2, 55.8], "label": "Chopping wood"}, {"segment": [27.25741029641186, 32.23712948517941], "label": "Chopping wood"}, {"segment": [0.2, 55.8], "label": "Chopping wood"}]}, "TAC-5hXVLPY": {"duration": 177.66, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TAC-5hXVLPY", "annotations": [{"segment": [26.6091659, 135.9558184], "label": "Archery"}, {"segment": [160.4082917316693, 169.82777691107646], "label": "Archery"}]}, "0SS16APU65E": {"duration": 77.624, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0SS16APU65E", "annotations": []}, "qVy_WDpLHRM": {"duration": 176.87, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qVy_WDpLHRM", "annotations": [{"segment": [118.47722742, 143.57274416], "label": "Vacuuming floor"}, {"segment": [21.397248680000004, 108.3834848], "label": "Vacuuming floor"}, {"segment": [21.108028431341335, 82.36269837285491], "label": "Vacuuming floor"}]}, "KokLo4Mwg3k": {"duration": 227.324, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KokLo4Mwg3k", "annotations": []}, "MLfEz9HPElE": {"duration": 9.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MLfEz9HPElE", "annotations": [{"segment": [3.5475234009360372, 8.21039391575663], "label": "Hopscotch"}]}, "NbIxjYWSURI": {"duration": 231.13, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=NbIxjYWSURI", "annotations": [{"segment": [16.219165366614664, 36.04258970358814], "label": "High jump"}, {"segment": [46.13451482059282, 49.37834789391575], "label": "High jump"}, {"segment": [64.51623556942278, 90.46690015600625], "label": "High jump"}, {"segment": [108.48819500780031, 189.22359594383775], "label": "High jump"}, {"segment": [195.35083619344772, 214.09298283931355], "label": "High jump"}]}, "TDfWOcKi684": {"duration": 222.15, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TDfWOcKi684", "annotations": [{"segment": [25.992040497946448, 171.89402782641918], "label": "Paintball"}]}, "JNSaWuU8ky4": {"duration": 137.997, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JNSaWuU8ky4", "annotations": [{"segment": [66.26663212303802, 80.66902308877012], "label": "Canoeing"}]}, "K2dU4-Rg354": {"duration": 122.09, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=K2dU4-Rg354", "annotations": [{"segment": [15.231326053042121, 120.34719500780032], "label": "Pole vault"}]}, "8DlvRudzDjo": {"duration": 57.818, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=8DlvRudzDjo", "annotations": []}, "GHOCVoZsFHE": {"duration": 116.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GHOCVoZsFHE", "annotations": [{"segment": [13.788723868954758, 18.68734945397816], "label": "Tai chi"}, {"segment": [26.307433697347893, 62.95641029641185], "label": "Tai chi"}, {"segment": [75.11225897035881, 82.18805148205928], "label": "Tai chi"}]}, "dAP7xalPRHo": {"duration": 116.52000000000001, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=dAP7xalPRHo", "annotations": [{"segment": [11.06330679, 73.32640514586583], "label": "Hand washing clothes"}]}, "Ao1h62LMRZg": {"duration": 208.91, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ao1h62LMRZg", "annotations": []}, "4OfhHE72V8c": {"duration": 201.55, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4OfhHE72V8c", "annotations": [{"segment": [24.679003900156008, 184.07078705148209], "label": "Putting on makeup"}]}, "CF6Q5ojrAJo": {"duration": 39.15, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=CF6Q5ojrAJo", "annotations": [{"segment": [1.357214004314577, 39.121], "label": "Chopping wood"}]}, "WC3GCmEqCBc": {"duration": 106.0, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WC3GCmEqCBc", "annotations": [{"segment": [52.52754758190327, 92.17097971918876], "label": "Preparing salad"}]}, "iMwLP3y0VcQ": {"duration": 42.26, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iMwLP3y0VcQ", "annotations": [{"segment": [7.515875492162543, 34.81037070054231], "label": "Smoking hookah"}]}, "sbnF7VVeopM": {"duration": 72.17, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sbnF7VVeopM", "annotations": []}, "v5i_NAlJX1Y": {"duration": 45.163, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=v5i_NAlJX1Y", "annotations": [{"segment": [12.383425897035881, 34.210305772230896], "label": "Using the rowing machine"}]}, "mUAqa5nNZnM": {"duration": 80.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mUAqa5nNZnM", "annotations": []}, "htzAYwoAZkg": {"duration": 165.93, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=htzAYwoAZkg", "annotations": []}, "uv7COBs24D8": {"duration": 153.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uv7COBs24D8", "annotations": [{"segment": [61.30421898000001, 87.84942483556944], "label": "Layup drill in basketball"}, {"segment": [111.91234469, 121.54818829000001], "label": "Layup drill in basketball"}, {"segment": [127.34576025210609, 141.7080640399376], "label": "Layup drill in basketball"}]}, "h0R04AFnor4": {"duration": 53.34, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=h0R04AFnor4", "annotations": [{"segment": [0, 52.552624024961], "label": "Shaving legs"}]}, "x4iCAIPRDVQ": {"duration": 23.383, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x4iCAIPRDVQ", "annotations": [{"segment": [0.01, 23.341], "label": "Using the rowing machine"}]}, "vbyMT7bdUX4": {"duration": 108.46000000000001, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=vbyMT7bdUX4", "annotations": [{"segment": [23.581373199999998, 96.44685177847114], "label": "Wrapping presents"}]}, "2OAHfFCggh0": {"duration": 209.82, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2OAHfFCggh0", "annotations": []}, "olFThb2_GHM": {"duration": 176.98, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=olFThb2_GHM", "annotations": [{"segment": [34.51296691323435, 43.900493913634094], "label": "Discus throw"}, {"segment": [74.5480085325862, 95.03933400000001], "label": "Discus throw"}, {"segment": [121.48564353458492, 160.96847768332503], "label": "Discus throw"}]}, "I71OenniCZI": {"duration": 56.448, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I71OenniCZI", "annotations": [{"segment": [5.988212393135726, 56.447709750566894], "label": "Shuffleboard"}]}, "OOcsprnRCQQ": {"duration": 183.531, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OOcsprnRCQQ", "annotations": [{"segment": [44.08604368174727, 66.41533853354134], "label": "Scuba diving"}, {"segment": [78.72507800312013, 89.60345241809672], "label": "Scuba diving"}, {"segment": [127.96403588143525, 154.01487987519502], "label": "Scuba diving"}]}, "4BofYu8Soz8": {"duration": 16.07, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4BofYu8Soz8", "annotations": [{"segment": [0, 16.068207], "label": "Playing water polo"}]}, "blSOfdlqJtY": {"duration": 111.99000000000001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=blSOfdlqJtY", "annotations": [{"segment": [7.246832293291733, 15.279465678627146], "label": "Dodgeball"}, {"segment": [33.44020202808113, 105.90852496099845], "label": "Dodgeball"}]}, "CRzaKuaCXr8": {"duration": 127.52, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=CRzaKuaCXr8", "annotations": [{"segment": [0, 125.3129953198128], "label": "Cumbia"}]}, "BKdKbFPerGo": {"duration": 70.937, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BKdKbFPerGo", "annotations": [{"segment": [13.369738840499856, 52.92648573208723], "label": "River tubing"}]}, "yE-ooj_-CXo": {"duration": 25.52, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yE-ooj_-CXo", "annotations": []}, "c_3z4UfOBtI": {"duration": 95.03999999999999, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c_3z4UfOBtI", "annotations": []}, "obt6wudzHxY": {"duration": 122.091, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=obt6wudzHxY", "annotations": [{"segment": [70.28299825273011, 85.90144230889236], "label": "Braiding hair"}]}, "ejzgfj6QKB8": {"duration": 86.147, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ejzgfj6QKB8", "annotations": [{"segment": [11.957517940717628, 63.28079719188767], "label": "Hurling"}]}, "6YtaXJJKUKM": {"duration": 58.747, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6YtaXJJKUKM", "annotations": [{"segment": [48.87926705771733, 55.36604729020186], "label": "Table soccer"}]}, "y6sx0u3MYFo": {"duration": 9.41, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=y6sx0u3MYFo", "annotations": [{"segment": [0.01, 7.716921792511701], "label": "Discus throw"}]}, "D4zZErqkD6A": {"duration": 135.025, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=D4zZErqkD6A", "annotations": [{"segment": [7.370257410296412, 127.41904056162247], "label": "Baton twirling"}]}, "qhYQd9nwOts": {"duration": 133.4, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qhYQd9nwOts", "annotations": [{"segment": [0, 132.73684555382215], "label": "Kayaking"}]}, "BJNzJR3oQ3o": {"duration": 169.32, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=BJNzJR3oQ3o", "annotations": []}, "bBtzyRzk0UM": {"duration": 214.76, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bBtzyRzk0UM", "annotations": [{"segment": [16.81719529389679, 26.51825356648246], "label": "Ballet"}, {"segment": [44.916812359317355, 107.47191225495598], "label": "Ballet"}, {"segment": [121.85624003844508, 131.89181756180957], "label": "Ballet"}, {"segment": [137.91316407582826, 155.64268436710552], "label": "Ballet"}, {"segment": [158.31883837333604, 168.35441589670054], "label": "Ballet"}, {"segment": [173.03768540760396, 191.43624420043884], "label": "Ballet"}]}, "3QDXF-efrLo": {"duration": 75.349, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3QDXF-efrLo", "annotations": []}, "DLlooZTLPdI": {"duration": 173.105, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DLlooZTLPdI", "annotations": [{"segment": [17.33008609279966, 41.86674191456875], "label": "Hula hoop"}, {"segment": [57.23585612017623, 65.05523001425723], "label": "Hula hoop"}, {"segment": [75.03167256877435, 76.91910764665597], "label": "Hula hoop"}, {"segment": [82.31177929774631, 87.97408453139118], "label": "Hula hoop"}, {"segment": [100.91649649400802, 123.56571742858743], "label": "Hula hoop"}, {"segment": [131.11545774011392, 134.62069431332264], "label": "Hula hoop"}, {"segment": [138.12593088653136, 156.19138091768403], "label": "Hula hoop"}]}, "rABAqfLcssA": {"duration": 30.7, "subset": "training", "resolution": "210x144", "url": "https://www.youtube.com/watch?v=rABAqfLcssA", "annotations": [{"segment": [0.2, 19.96966746177847], "label": "Windsurfing"}]}, "BC0cgv8YgTg": {"duration": 72.49, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=BC0cgv8YgTg", "annotations": [{"segment": [20.738935257410297, 26.61590873634945], "label": "Doing karate"}, {"segment": [34.41420046801872, 43.22966068642745], "label": "Doing karate"}]}, "BRuansCVV3U": {"duration": 123.35, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=BRuansCVV3U", "annotations": [{"segment": [8.274259571743414, 123.3441949], "label": "Tai chi"}]}, "5sUctxaAFrw": {"duration": 60.14, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5sUctxaAFrw", "annotations": [{"segment": [13.798890173998206, 47.87484656029104], "label": "Roof shingle removal"}]}, "oMzNT4NVXwk": {"duration": 20.341, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oMzNT4NVXwk", "annotations": []}, "nwpz-KGn46A": {"duration": 71.01, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nwpz-KGn46A", "annotations": [{"segment": [21.82262736349454, 28.690662371294852], "label": "Spinning"}, {"segment": [37.55264302652106, 53.17188393135726], "label": "Spinning"}, {"segment": [60.372243213728545, 70.80662], "label": "Spinning"}]}, "7edBQl3VD1o": {"duration": 205.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7edBQl3VD1o", "annotations": [{"segment": [43.55410591900311, 179.3578816199377], "label": "Skiing"}]}, "fZzswQaICfM": {"duration": 159.57, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fZzswQaICfM", "annotations": [{"segment": [26.387145722563595, 150.35694258970358], "label": "Hopscotch"}]}, "2b_hjshny54": {"duration": 165.559, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2b_hjshny54", "annotations": [{"segment": [12.909594383775351, 165.501], "label": "Swimming"}]}, "Z2QA7dUVwMM": {"duration": 212.51, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Z2QA7dUVwMM", "annotations": [{"segment": [24.5330231603822, 140.89911950219508], "label": "Changing car wheel"}, {"segment": [161.78534192252044, 212.30902494331067], "label": "Changing car wheel"}]}, "XDBugI_CcYs": {"duration": 57.679, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=XDBugI_CcYs", "annotations": [{"segment": [5.080680967238689, 33.94614274570983], "label": "Tug of war"}]}, "-t_Zhm1b0FI": {"duration": 139.552, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=-t_Zhm1b0FI", "annotations": [{"segment": [0.2, 139.501], "label": "Playing congas"}]}, "bEcSrzeCGyA": {"duration": 237.22, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=bEcSrzeCGyA", "annotations": [{"segment": [27.7552718435268, 32.19611533849109], "label": "Ping-pong"}, {"segment": [94.36792426799111, 116.57214174281256], "label": "Ping-pong"}, {"segment": [132.11509303432138, 158.39008352574103], "label": "Ping-pong"}]}, "iL5abexk3vQ": {"duration": 150.419, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iL5abexk3vQ", "annotations": [{"segment": [0.01, 145.1389516419657], "label": "Baton twirling"}]}, "caPl3Aszru0": {"duration": 67.22, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=caPl3Aszru0", "annotations": [{"segment": [16.779230298463638, 21.393518630541138], "label": "Springboard diving"}, {"segment": [31.461056630265208, 37.96300833385335], "label": "Springboard diving"}]}, "ML0XZMcKk_E": {"duration": 125.202, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ML0XZMcKk_E", "annotations": [{"segment": [31.04617628705148, 98.62153666146646], "label": "Doing fencing"}]}, "PFYk8lhE7-0": {"duration": 32.834, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=PFYk8lhE7-0", "annotations": [{"segment": [15.878680717628704, 29.452391653666144], "label": "Plastering"}]}, "DEmArEKL8gc": {"duration": 194.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DEmArEKL8gc", "annotations": [{"segment": [42.1864448, 148.4084224], "label": "Cheerleading"}, {"segment": [6.9088256, 31.9428608], "label": "Cheerleading"}]}, "ZGsYV0KDB-4": {"duration": 31.19, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ZGsYV0KDB-4", "annotations": [{"segment": [0.01, 31.18433], "label": "Walking the dog"}]}, "SI8HO5-e24c": {"duration": 187.69, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SI8HO5-e24c", "annotations": [{"segment": [77.88567813188718, 187.68691609977324], "label": "Bungee jumping"}]}, "_DiTdY1Mtj4": {"duration": 146.147, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_DiTdY1Mtj4", "annotations": [{"segment": [20.063779596081805, 24.395732008872194], "label": "Shuffleboard"}, {"segment": [34.42762180691309, 37.847584238063405], "label": "Shuffleboard"}, {"segment": [47.19548154987424, 61.78732125611555], "label": "Shuffleboard"}, {"segment": [67.9432536321861, 73.41519352202658], "label": "Shuffleboard"}, {"segment": [75.23917348530676, 82.3070958430174], "label": "Shuffleboard"}, {"segment": [88.46302821908795, 93.93496810892844], "label": "Shuffleboard"}, {"segment": [138.62247573166925, 143.41042308424335], "label": "Shuffleboard"}]}, "R8WbSI3m1lI": {"duration": 99.661, "subset": "validation", "resolution": "240x136", "url": "https://www.youtube.com/watch?v=R8WbSI3m1lI", "annotations": [{"segment": [6.974652473612027, 26.81367068120611], "label": "Rollerblading"}, {"segment": [92.53041754013695, 95.01029479052575], "label": "Rollerblading"}]}, "JXyi7hFT26w": {"duration": 203.222, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JXyi7hFT26w", "annotations": [{"segment": [95.65048520956306, 98.81486215660355], "label": "Curling"}, {"segment": [118.12279432880247, 122.55311831634141], "label": "Curling"}]}, "3G9zc_SEOHM": {"duration": 135.187, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3G9zc_SEOHM", "annotations": [{"segment": [95.74814458346336, 114.30725630889236], "label": "Rock-paper-scissors"}]}, "8JKK6JayDeA": {"duration": 220.66, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8JKK6JayDeA", "annotations": [{"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [32.350811232449296, 46.11711388455538], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [0.2, 220.405], "label": "Snatch"}, {"segment": [182.4035101404056, 201.67633385335412], "label": "Snatch"}]}, "BtGrWzfvmP0": {"duration": 122.625, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BtGrWzfvmP0", "annotations": [{"segment": [60.64481295106819, 89.25085707760095], "label": "Putting in contact lenses"}]}, "8H3Wy96BwHM": {"duration": 54.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8H3Wy96BwHM", "annotations": [{"segment": [10.317935871176344, 26.18839313572543], "label": "BMX"}, {"segment": [27.12673946957878, 45.29653666146646], "label": "BMX"}]}, "Z98StW03bYc": {"duration": 103.26, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Z98StW03bYc", "annotations": []}, "NEKcJ0whvkg": {"duration": 144.196, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NEKcJ0whvkg", "annotations": [{"segment": [8.371612522053214, 11.740676035308983], "label": "Hurling"}, {"segment": [31.9550571148436, 34.65030792544822], "label": "Hurling"}, {"segment": [40.93922648352565, 45.43131116786668], "label": "Hurling"}, {"segment": [71.26079810282758, 81.3679886425949], "label": "Hurling"}, {"segment": [99.11172314574193, 109.66812215394334], "label": "Hurling"}]}, "Vft2cnyVuJc": {"duration": 233.76, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Vft2cnyVuJc", "annotations": []}, "joVOZZ63pe4": {"duration": 160.78, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=joVOZZ63pe4", "annotations": []}, "USYy2DpAIyk": {"duration": 35.156, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=USYy2DpAIyk", "annotations": []}, "czDoodPfLK8": {"duration": 220.15, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=czDoodPfLK8", "annotations": []}, "r3lN8kgtgw4": {"duration": 200.551, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=r3lN8kgtgw4", "annotations": [{"segment": [23.704282600059805, 107.6049839111018], "label": "Rafting"}, {"segment": [116.33814241187939, 153.1421675222993], "label": "Rafting"}]}, "mX3gbTBdbKY": {"duration": 219.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mX3gbTBdbKY", "annotations": [{"segment": [100.00900139999999, 167.0162844], "label": "Paintball"}]}, "EqhnCfb5vEc": {"duration": 186.16, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EqhnCfb5vEc", "annotations": [{"segment": [8.71237331125898, 175.40911466458655], "label": "Belly dance"}]}, "Krh4m6nxZWE": {"duration": 9.172, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Krh4m6nxZWE", "annotations": [{"segment": [0.01, 7.146364294009647], "label": "Slacklining"}]}, "qCtoh8hxjOA": {"duration": 25.589, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qCtoh8hxjOA", "annotations": [{"segment": [0.01, 14.842859972966952], "label": "Fun sliding down"}]}, "JgDfOMDfNZs": {"duration": 46.255, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JgDfOMDfNZs", "annotations": [{"segment": [15.658580524180966, 34.20353533853354], "label": "Hand car wash"}]}, "Flh6nxGkf74": {"duration": 35.852, "subset": "validation", "resolution": "316x204", "url": "https://www.youtube.com/watch?v=Flh6nxGkf74", "annotations": [{"segment": [5.537144130663187, 19.072385338950973], "label": "Building sandcastles"}, {"segment": [27.909443648494243, 34.956717996611026], "label": "Building sandcastles"}]}, "yST1TsQ6mgY": {"duration": 183.554, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yST1TsQ6mgY", "annotations": [{"segment": [42.37417784711389, 113.0932449297972], "label": "Camel ride"}]}, "-GYS5LjMTZo": {"duration": 108.21000000000001, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-GYS5LjMTZo", "annotations": []}, "OxPzQ4yqfwg": {"duration": 51.711, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=OxPzQ4yqfwg", "annotations": [{"segment": [13.552918539781592, 21.620131956318254], "label": "Fixing the roof"}]}, "gjyT6Rt985k": {"duration": 223.59, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gjyT6Rt985k", "annotations": [{"segment": [4.185677565878145, 98.36342279813641], "label": "Tango"}]}, "X0IXZMr95TM": {"duration": 98.45, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=X0IXZMr95TM", "annotations": [{"segment": [10.51762716, 88.6702496099844], "label": "Mixing drinks"}]}, "aOJOAHIEIzo": {"duration": 30.117, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aOJOAHIEIzo", "annotations": []}, "8GeLWWLIar4": {"duration": 98.96000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8GeLWWLIar4", "annotations": [{"segment": [3.9397170299999997, 73.36720968], "label": "Breakdancing"}]}, "poI4vU4k1sY": {"duration": 231.039, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=poI4vU4k1sY", "annotations": [{"segment": [40.36865779719189, 201.84328898595945], "label": "Playing pool"}]}, "t_D9MYkEPEo": {"duration": 182.56, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=t_D9MYkEPEo", "annotations": [{"segment": [35.03011312792512, 110.50149507020281], "label": "Tango"}, {"segment": [11.7894019, 14.608051099999999], "label": "Tango"}]}, "SF3pw17yBB4": {"duration": 15.14, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SF3pw17yBB4", "annotations": [{"segment": [0.01, 15.139069], "label": "Peeling potatoes"}]}, "fErOJ98E15w": {"duration": 162.12, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=fErOJ98E15w", "annotations": [{"segment": [29.337416160000004, 143.92778138], "label": "Playing squash"}]}, "sJkabaVH-C0": {"duration": 130.38, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sJkabaVH-C0", "annotations": []}, "CDWHstJThJA": {"duration": 215.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CDWHstJThJA", "annotations": [{"segment": [21.708727769110766, 24.06471372854914], "label": "Archery"}, {"segment": [40.22004602184087, 45.26858736349455], "label": "Archery"}, {"segment": [50.65369812792512, 53.68282293291732], "label": "Archery"}, {"segment": [78.58896021840874, 82.29122386895477], "label": "Archery"}, {"segment": [89.3591817472699, 94.40772308892356], "label": "Archery"}, {"segment": [119.65042979719189, 129.7475124804992], "label": "Archery"}, {"segment": [134.79605382215289, 137.8251786271451], "label": "Archery"}, {"segment": [148.25883073322933, 153.307372074883], "label": "Archery"}, {"segment": [189.6568697347894, 192.0128556942278], "label": "Archery"}]}, "QWXzDdMjZOw": {"duration": 151.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=QWXzDdMjZOw", "annotations": [{"segment": [104.25803432137285, 140.98529641185647], "label": "Playing saxophone"}]}, "2OPk79DNQdM": {"duration": 119.68, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2OPk79DNQdM", "annotations": [{"segment": [17.449686638845556, 112.8164232425897], "label": "Brushing hair"}]}, "wGEaIInAtT4": {"duration": 199.228, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wGEaIInAtT4", "annotations": [{"segment": [0.19999999999999998, 197.64717004680185], "label": "Rope skipping"}]}, "qnDHR-jYWf0": {"duration": 24.99, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qnDHR-jYWf0", "annotations": [{"segment": [0, 21.476682012480502], "label": "Playing water polo"}]}, "yKdvz5dgro4": {"duration": 10.54, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yKdvz5dgro4", "annotations": [{"segment": [0, 10.541858], "label": "Washing dishes"}]}, "CMGjxw3X1dI": {"duration": 38.57, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CMGjxw3X1dI", "annotations": [{"segment": [1.5643945678627145, 36.64293430109204], "label": "Windsurfing"}]}, "aRdqcOaBvjk": {"duration": 160.29, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aRdqcOaBvjk", "annotations": [{"segment": [4.252386895475819, 151.33494539781591], "label": "Paintball"}]}, "wlx5YuEHsO8": {"duration": 197.718, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wlx5YuEHsO8", "annotations": [{"segment": [21.591659656786273, 173.0417295351014], "label": "Table soccer"}]}, "ZF4oT2P0a54": {"duration": 106.023, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=ZF4oT2P0a54", "annotations": [{"segment": [20.013499563111775, 100.23289863839452], "label": "Surfing"}]}, "xeFS0RuvVGA": {"duration": 78.6, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xeFS0RuvVGA", "annotations": [{"segment": [0, 78.39956916099773], "label": "Playing water polo"}]}, "2ptdloHcTcs": {"duration": 235.335, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2ptdloHcTcs", "annotations": [{"segment": [0.01, 225.78837423556942], "label": "Surfing"}]}, "V2MlQezL1IE": {"duration": 191.12, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=V2MlQezL1IE", "annotations": [{"segment": [119.8621303872563, 177.70604405672825], "label": "Volleyball"}]}, "yE0fDD_7FkM": {"duration": 54.59, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yE0fDD_7FkM", "annotations": [{"segment": [8.57887935, 45.835804700000004], "label": "Springboard diving"}]}, "s9iaX2fDyeg": {"duration": 162.354, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=s9iaX2fDyeg", "annotations": []}, "woiigfrANUM": {"duration": 141.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=woiigfrANUM", "annotations": [{"segment": [40.23627958939158, 72.51373464461777], "label": "Skateboarding"}]}, "b380n1dci9I": {"duration": 58.91, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b380n1dci9I", "annotations": [{"segment": [4.211117004680188, 48.44435101404056], "label": "BMX"}]}, "peiF2E05u8g": {"duration": 206.26, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=peiF2E05u8g", "annotations": []}, "Q3ZNFGE8PZE": {"duration": 73.77, "subset": "testing", "resolution": "830x480", "url": "https://www.youtube.com/watch?v=Q3ZNFGE8PZE", "annotations": []}, "s3JuGKwna6o": {"duration": 47.35, "subset": "training", "resolution": "426x196", "url": "https://www.youtube.com/watch?v=s3JuGKwna6o", "annotations": [{"segment": [14.32921105148206, 19.647268761310453], "label": "Drinking coffee"}]}, "-UQbRLLGjRE": {"duration": 189.382, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=-UQbRLLGjRE", "annotations": []}, "k74CgBa46bA": {"duration": 42.15, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=k74CgBa46bA", "annotations": [{"segment": [4.346025109204919, 5.7231606232236105], "label": "Drinking coffee"}, {"segment": [13.854817944095885, 15.625420747834204], "label": "Drinking coffee"}, {"segment": [20.478183987709592, 22.445520436307724], "label": "Drinking coffee"}]}, "Ug3pM64HhK4": {"duration": 174.846, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ug3pM64HhK4", "annotations": [{"segment": [24.255457900670738, 130.03318658322314], "label": "Braiding hair"}]}, "JxBFSll-mDE": {"duration": 78.391, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JxBFSll-mDE", "annotations": [{"segment": [28.372248174726987, 66.40573602964118], "label": "Playing ice hockey"}]}, "ucsAN6pGv6w": {"duration": 7.99, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ucsAN6pGv6w", "annotations": [{"segment": [3.862988829953198, 6.791383588143526], "label": "Javelin throw"}]}, "oJcsC2yB7ok": {"duration": 83.75, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=oJcsC2yB7ok", "annotations": []}, "7I6v9c00aRA": {"duration": 31.115, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7I6v9c00aRA", "annotations": [{"segment": [4.028888756630265, 26.163506503900155], "label": "Waterskiing"}]}, "9Hxcuf80TK0": {"duration": 215.09, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9Hxcuf80TK0", "annotations": [{"segment": [0, 215.08612244897958], "label": "Playing saxophone"}]}, "QNKjKB4k4D0": {"duration": 24.614, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QNKjKB4k4D0", "annotations": [{"segment": [2.261956329780095, 18.325678660436136], "label": "Calf roping"}]}, "wP0jr86MNOw": {"duration": 128.128, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=wP0jr86MNOw", "annotations": [{"segment": [24.37735101404056, 61.94244929797191], "label": "Powerbocking"}, {"segment": [74.73056786271451, 110.29752262090483], "label": "Powerbocking"}]}, "q59X9izcjQc": {"duration": 224.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=q59X9izcjQc", "annotations": [{"segment": [57.4833743457894, 211.451017211833], "label": "Windsurfing"}]}, "gY5akj1YcMk": {"duration": 33.46, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gY5akj1YcMk", "annotations": [{"segment": [7.5689441263650545, 26.72620270826833], "label": "Kayaking"}]}, "sOMA_oI7dgk": {"duration": 198.089, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=sOMA_oI7dgk", "annotations": [{"segment": [121.72202808112324, 178.87757878315134], "label": "Using the rowing machine"}]}, "9hYyE1gofFQ": {"duration": 167.649, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9hYyE1gofFQ", "annotations": [{"segment": [43.67742296086401, 162.41724195163806], "label": "Baton twirling"}]}, "74AJ-1e1qGA": {"duration": 10.705, "subset": "validation", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=74AJ-1e1qGA", "annotations": [{"segment": [5.232190327613104, 7.0151341653666135], "label": "Doing a powerbomb"}]}, "6qynXM937V0": {"duration": 147.32999999999998, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=6qynXM937V0", "annotations": [{"segment": [4.364160686427457, 144.4766879875195], "label": "Playing harmonica"}]}, "fNpXgb67gR8": {"duration": 47.276, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=fNpXgb67gR8", "annotations": []}, "-NM-0NZXRNw": {"duration": 201.18, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-NM-0NZXRNw", "annotations": [{"segment": [4.938895349999999, 194.69760530421217], "label": "Paintball"}]}, "Oi-atN4-Oqk": {"duration": 29.7, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=Oi-atN4-Oqk", "annotations": [{"segment": [2.223899299843994, 28.540041014664585], "label": "Cumbia"}]}, "l2drIA62T8w": {"duration": 107.579, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=l2drIA62T8w", "annotations": [{"segment": [34.05744617784711, 98.31361310452418], "label": "Grooming dog"}]}, "_bC2DnG4yqA": {"duration": 236.635, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_bC2DnG4yqA", "annotations": []}, "mmC3M5dAfNo": {"duration": 139.85, "subset": "testing", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=mmC3M5dAfNo", "annotations": []}, "C1TWr5XH8b0": {"duration": 92.22999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=C1TWr5XH8b0", "annotations": [{"segment": [54.86319123088924, 73.55838090327615], "label": "Vacuuming floor"}]}, "vX0xxYxGj-4": {"duration": 92.787, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vX0xxYxGj-4", "annotations": [{"segment": [8.536223088923556, 92.741], "label": "Tug of war"}]}, "T-CTcQ8onFY": {"duration": 86.278, "subset": "testing", "resolution": "720x720", "url": "https://www.youtube.com/watch?v=T-CTcQ8onFY", "annotations": []}, "cCISvaObBxI": {"duration": 194.259, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cCISvaObBxI", "annotations": [{"segment": [13.334393468255737, 157.89133963650548], "label": "Futsal"}]}, "WOZbWqJMkRg": {"duration": 184.738, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=WOZbWqJMkRg", "annotations": [{"segment": [0.2, 176.35052106084242], "label": "Drum corps"}]}, "Ih8bPM3p0rE": {"duration": 27.17, "subset": "validation", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=Ih8bPM3p0rE", "annotations": [{"segment": [3.814448088127606, 16.953102613900473], "label": "Javelin throw"}]}, "5jaIcPomOuM": {"duration": 59.119, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=5jaIcPomOuM", "annotations": [{"segment": [6.728413416536662, 51.154375975039], "label": "Clipping cat claws"}]}, "92kGXXfm6ok": {"duration": 14.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=92kGXXfm6ok", "annotations": [{"segment": [0, 13.02889282371295], "label": "Clean and jerk"}]}, "zuBJzdDI9MY": {"duration": 75.88, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=zuBJzdDI9MY", "annotations": [{"segment": [38.47412444617784, 75.05413815039002], "label": "Preparing pasta"}]}, "4R0fSNCWUo0": {"duration": 155.18, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4R0fSNCWUo0", "annotations": [{"segment": [12.588620374414978, 142.59033462558503], "label": "Playing saxophone"}]}, "FsS8cQbfKTQ": {"duration": 74.61, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FsS8cQbfKTQ", "annotations": [{"segment": [0.01, 41.376340523400934], "label": "Pole vault"}]}, "lKSWdEIuJtM": {"duration": 209.444, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lKSWdEIuJtM", "annotations": [{"segment": [58.38439096573209, 141.88385514018694], "label": "Trimming branches or hedges"}]}, "_g2D5oXLCZI": {"duration": 54.31, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_g2D5oXLCZI", "annotations": [{"segment": [11.768160686427455, 31.748634945397814], "label": "Layup drill in basketball"}]}, "AO5Kb8aOJoE": {"duration": 72.47, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AO5Kb8aOJoE", "annotations": []}, "kRVmpJILmjo": {"duration": 227.115, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kRVmpJILmjo", "annotations": [{"segment": [33.305314541833376, 215.0672968798752], "label": "Longboarding"}]}, "0JCM9uzZafU": {"duration": 228.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0JCM9uzZafU", "annotations": [{"segment": [6.758093540068134, 215.1919242823713], "label": "Doing nails"}]}, "NSegylr0MBg": {"duration": 145.938, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NSegylr0MBg", "annotations": [{"segment": [13.204914727201333, 63.292521953198126], "label": "Blow-drying hair"}]}, "YWfLZFXwjTE": {"duration": 112.199, "subset": "validation", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=YWfLZFXwjTE", "annotations": [{"segment": [0.01, 110.13980535240493], "label": "Gargling mouthwash"}]}, "TzhdX5tmEEo": {"duration": 205.87, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TzhdX5tmEEo", "annotations": []}, "l7qSKJyq6mA": {"duration": 163.28, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=l7qSKJyq6mA", "annotations": []}, "kyMsxlpBjlY": {"duration": 30.35, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kyMsxlpBjlY", "annotations": [{"segment": [0.01, 30.348479], "label": "Mowing the lawn"}]}, "MlbM7Mew0Ys": {"duration": 233.547, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=MlbM7Mew0Ys", "annotations": [{"segment": [17.12428334412288, 217.15048564118564], "label": "Rope skipping"}]}, "9lS5qpgrTmI": {"duration": 172.664, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9lS5qpgrTmI", "annotations": [{"segment": [20.202447074882993, 172.6635827664399], "label": "Braiding hair"}]}, "nvFtFFJXxB0": {"duration": 181.44, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nvFtFFJXxB0", "annotations": [{"segment": [14.214009600000002, 169.22498148829956], "label": "Playing violin"}]}, "0jvbBtMIA8k": {"duration": 224.328, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0jvbBtMIA8k", "annotations": [{"segment": [81.98602441246011, 103.61638119384556], "label": "Beer pong"}, {"segment": [117.571450085062, 151.76136886854218], "label": "Beer pong"}, {"segment": [170.94958859396476, 186.64903990984521], "label": "Beer pong"}, {"segment": [198.85972511202252, 201.30186215245797], "label": "Beer pong"}]}, "DmtaWx7QcZ8": {"duration": 94.065, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DmtaWx7QcZ8", "annotations": [{"segment": [31.013252529536395, 80.89778023590148], "label": "Painting fence"}]}, "kwUdyaXVXAI": {"duration": 218.64, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kwUdyaXVXAI", "annotations": []}, "DHWmzuAWxLM": {"duration": 134.328, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DHWmzuAWxLM", "annotations": [{"segment": [9.43016321372855, 134.32743764172335], "label": "Wakeboarding"}]}, "IhBlRs2b6nc": {"duration": 199.831, "subset": "testing", "resolution": "472x360", "url": "https://www.youtube.com/watch?v=IhBlRs2b6nc", "annotations": []}, "iIVOAvu3qtM": {"duration": 68.87, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=iIVOAvu3qtM", "annotations": [{"segment": [4.512568159869252, 68.87038548752834], "label": "Polishing forniture"}]}, "3DQTz4Ke50Q": {"duration": 126.223, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=3DQTz4Ke50Q", "annotations": [{"segment": [0.2, 126.201], "label": "Blowing leaves"}]}, "_6mQ9_DQr0Q": {"duration": 170.62, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_6mQ9_DQr0Q", "annotations": [{"segment": [23.019120904836193, 155.54538923556942], "label": "Snatch"}]}, "Y1f_NXMfD98": {"duration": 181.348, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Y1f_NXMfD98", "annotations": [{"segment": [0.2, 99.01984746056507], "label": "Grooming dog"}, {"segment": [105.80977985786097, 145.98354654186167], "label": "Grooming dog"}]}, "6J45AbWiGIE": {"duration": 121.069, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6J45AbWiGIE", "annotations": [{"segment": [11.51681903276131, 82.50573634945397], "label": "Disc dog"}]}, "HVhLOYGU9E4": {"duration": 85.102, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HVhLOYGU9E4", "annotations": [{"segment": [0.01, 76.73705818408736], "label": "Rollerblading"}]}, "M6i-5nzy5Bk": {"duration": 69.2, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=M6i-5nzy5Bk", "annotations": [{"segment": [0.2, 8.52798997971919], "label": "Pole vault"}, {"segment": [15.65263983619345, 17.59572616068643], "label": "Pole vault"}, {"segment": [22.777289692667708, 24.93627449765991], "label": "Pole vault"}, {"segment": [36.7027416848674, 40.91276205460218], "label": "Pole vault"}, {"segment": [49.54870127457098, 57.213097332293295], "label": "Pole vault"}]}, "Dt2KQcKR4T8": {"duration": 5.97, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=Dt2KQcKR4T8", "annotations": [{"segment": [0.2, 5.598933697347895], "label": "Pole vault"}]}, "Jy8JurvYlH4": {"duration": 141.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jy8JurvYlH4", "annotations": [{"segment": [53.62869189547582, 128.07170064], "label": "Brushing teeth"}]}, "sF859t5osSg": {"duration": 102.00999999999999, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sF859t5osSg", "annotations": [{"segment": [29.439895343213724, 93.0937231123245], "label": "Drinking coffee"}]}, "4-GJMxf6kQM": {"duration": 121.35, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4-GJMxf6kQM", "annotations": []}, "LGS_yzsScfw": {"duration": 166.697, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LGS_yzsScfw", "annotations": [{"segment": [16.4288381689806, 47.067676832171735], "label": "Changing car wheel"}, {"segment": [89.1311672002816, 99.51721335596007], "label": "Changing car wheel"}, {"segment": [127.29988885596006, 147.03337799614698], "label": "Changing car wheel"}, {"segment": [155.08256435596007, 158.97733194941802], "label": "Changing car wheel"}]}, "2UBPORshiXQ": {"duration": 111.387, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2UBPORshiXQ", "annotations": []}, "zQVUXbyCV1o": {"duration": 183.554, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zQVUXbyCV1o", "annotations": [{"segment": [4.581685191887676, 164.0816009344774], "label": "Beach soccer"}]}, "vuXsKQKbAfE": {"duration": 214.971, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vuXsKQKbAfE", "annotations": [{"segment": [155.44267184564936, 160.4731790251202], "label": "Scuba diving"}]}, "mZoC4fb7bIk": {"duration": 171.596, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=mZoC4fb7bIk", "annotations": []}, "9amq0hE3O9Q": {"duration": 21.48, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9amq0hE3O9Q", "annotations": []}, "FoT7T-CG8BM": {"duration": 181.604, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FoT7T-CG8BM", "annotations": []}, "B0GCHDqEsRU": {"duration": 120.14, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=B0GCHDqEsRU", "annotations": []}, "SBn1i9YqN1k": {"duration": 89.049, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SBn1i9YqN1k", "annotations": [{"segment": [1.2499048361934477, 89.021], "label": "Sharpening knives"}]}, "Eigz6Eo9jYQ": {"duration": 65.527, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Eigz6Eo9jYQ", "annotations": []}, "JBemXLIpgK8": {"duration": 88.03, "subset": "testing", "resolution": "426x234", "url": "https://www.youtube.com/watch?v=JBemXLIpgK8", "annotations": []}, "ZreBacTw5Hw": {"duration": 218.198, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZreBacTw5Hw", "annotations": [{"segment": [27.912993657161252, 47.656330634177756], "label": "Beach soccer"}, {"segment": [50.719951889232036, 61.27242510108567], "label": "Beach soccer"}, {"segment": [65.35725344115806, 88.50461403490154], "label": "Beach soccer"}, {"segment": [94.6318565450101, 114.7155958836993], "label": "Beach soccer"}, {"segment": [123.90645964886217, 137.52255411577008], "label": "Beach soccer"}, {"segment": [142.62858954086056, 143.30939426420593], "label": "Beach soccer"}, {"segment": [147.39422260427833, 148.41542968929642], "label": "Beach soccer"}, {"segment": [152.1598556676961, 154.542672199405], "label": "Beach soccer"}, {"segment": [163.39313360289515, 166.11635249627673], "label": "Beach soccer"}, {"segment": [173.60520445307608, 177.34962975507023], "label": "Beach soccer"}, {"segment": [179.7324462776911, 181.09405571918876], "label": "Beach soccer"}, {"segment": [183.47687224180967, 199.4757831794072], "label": "Beach soccer"}]}, "XD52QWWvQ_A": {"duration": 18.228, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XD52QWWvQ_A", "annotations": []}, "IE3pd-4HHJU": {"duration": 165.3, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=IE3pd-4HHJU", "annotations": []}, "Y9EIH-A_ePo": {"duration": 54.13, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Y9EIH-A_ePo", "annotations": [{"segment": [15.895971249999999, 35.91735], "label": "Playing kickball"}]}, "x164DXmWtRM": {"duration": 135.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x164DXmWtRM", "annotations": [{"segment": [11.621359797191888, 129.73663482683307], "label": "Paintball"}]}, "PP5EOYoCSb4": {"duration": 158.29, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=PP5EOYoCSb4", "annotations": []}, "b9h8ONagDv4": {"duration": 190.752, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b9h8ONagDv4", "annotations": []}, "x7PDVqDFw6c": {"duration": 176.61, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x7PDVqDFw6c", "annotations": [{"segment": [37.460680187207494, 102.46597815912637], "label": "Cleaning windows"}, {"segment": [112.65748673946959, 130.2860421216849], "label": "Cleaning windows"}]}, "D5hsFA2ZrIQ": {"duration": 13.004, "subset": "training", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=D5hsFA2ZrIQ", "annotations": [{"segment": [0.16958320837525936, 12.62587137036903], "label": "Trimming branches or hedges"}]}, "7GsxM2QSmhY": {"duration": 29.374, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7GsxM2QSmhY", "annotations": []}, "_UZpaTEWXuA": {"duration": 131.89, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_UZpaTEWXuA", "annotations": []}, "nxNJNn8blDw": {"duration": 67.617, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=nxNJNn8blDw", "annotations": [{"segment": [7.489453719188768, 41.77216440561623], "label": "Installing carpet"}]}, "rOeNl9qIoVI": {"duration": 16.84, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=rOeNl9qIoVI", "annotations": [{"segment": [0.9454614290171607, 16.440523737909515], "label": "Kayaking"}]}, "uXX2lSVAXq8": {"duration": 162.61, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uXX2lSVAXq8", "annotations": [{"segment": [8.575998660000002, 67.82022230889235], "label": "Getting a piercing"}, {"segment": [68.58034575, 139.06314742589703], "label": "Getting a piercing"}]}, "jrwW5L2Ixrg": {"duration": 9.196, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=jrwW5L2Ixrg", "annotations": []}, "uo1ERV8LZeU": {"duration": 120.535, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uo1ERV8LZeU", "annotations": [{"segment": [30.650799464898597, 32.15513318096724], "label": "Making a lemonade"}, {"segment": [34.03555032605304, 39.48876004680187], "label": "Making a lemonade"}, {"segment": [40.42896861934477, 56.788597781591264], "label": "Making a lemonade"}, {"segment": [58.66901492667707, 59.60922349921997], "label": "Making a lemonade"}, {"segment": [62.24180750234009, 63.55809950390015], "label": "Making a lemonade"}, {"segment": [68.07110065210608, 85.55898010140406], "label": "Making a lemonade"}]}, "NsQVVICY_Ss": {"duration": 33.93, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NsQVVICY_Ss", "annotations": []}, "ybkcKusf-Kg": {"duration": 62.16, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ybkcKusf-Kg", "annotations": [{"segment": [12.703488341653665, 59.735487163806546], "label": "Using parallel bars"}]}, "O67GDqPk_ug": {"duration": 204.312, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O67GDqPk_ug", "annotations": [{"segment": [3.627420111694811, 192.95326902135213], "label": "Trimming branches or hedges"}]}, "xMLTg5Th50g": {"duration": 26.123, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xMLTg5Th50g", "annotations": []}, "RA61jh8HwPI": {"duration": 104.21000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RA61jh8HwPI", "annotations": [{"segment": [0, 5.527580764430578], "label": "Putting on makeup"}]}, "OdLcbH2H_zI": {"duration": 111.53, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OdLcbH2H_zI", "annotations": [{"segment": [28.70779682527301, 45.23652833073323], "label": "Playing accordion"}, {"segment": [54.45782064430577, 58.11154024024961], "label": "Playing accordion"}, {"segment": [65.24499278471139, 103.34806857098283], "label": "Playing accordion"}]}, "I4kjOE8HnU0": {"duration": 170.77, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=I4kjOE8HnU0", "annotations": [{"segment": [6.127592303692148, 18.915611024440977], "label": "Triple jump"}, {"segment": [23.444700988039525, 29.572293291731672], "label": "Triple jump"}, {"segment": [83.38853874154967, 85.25345813832554], "label": "Triple jump"}, {"segment": [96.70939157566302, 100.9720644825793], "label": "Triple jump"}, {"segment": [106.83323972958918, 109.76382735309414], "label": "Triple jump"}, {"segment": [116.4242537701508, 119.08842433697347], "label": "Triple jump"}, {"segment": [151.324887900156, 155.32114374258973], "label": "Triple jump"}]}, "X9CpU4ucYeg": {"duration": 112.873, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=X9CpU4ucYeg", "annotations": [{"segment": [67.96984214352574, 75.89378747113884], "label": "Making a lemonade"}, {"segment": [54.235003838248765, 63.496370535101406], "label": "Making a lemonade"}, {"segment": [67.96984214352574, 75.89378747113884], "label": "Making a lemonade"}, {"segment": [78.35901490639625, 89.10036301716069], "label": "Making a lemonade"}, {"segment": [91.38950277847114, 92.79820417004679], "label": "Making a lemonade"}]}, "L54gbbqtxOg": {"duration": 61.51, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=L54gbbqtxOg", "annotations": [{"segment": [7.29287698854893, 52.77739925923568], "label": "Painting furniture"}]}, "YNnyUVFE4uM": {"duration": 60.33, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YNnyUVFE4uM", "annotations": [{"segment": [1.5998946911076444, 50.82018430577222], "label": "Washing dishes"}]}, "SFfB6qvT5FI": {"duration": 31.39, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SFfB6qvT5FI", "annotations": [{"segment": [7.493270953198128, 26.985570556942278], "label": "Bungee jumping"}, {"segment": [26.299911776911078, 30.658742592823714], "label": "Bungee jumping"}]}, "Lyaozxv4_qU": {"duration": 41.959, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=Lyaozxv4_qU", "annotations": [{"segment": [26.444956368174726, 40.45292830577223], "label": "Blowing leaves"}]}, "SLHPvKh-_x8": {"duration": 101.84, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=SLHPvKh-_x8", "annotations": []}, "mZ6iJfRokTw": {"duration": 132.59, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mZ6iJfRokTw", "annotations": [{"segment": [45.712157160686424, 118.52066087363494], "label": "Bungee jumping"}]}, "ntYIITLp90k": {"duration": 146.26, "subset": "training", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=ntYIITLp90k", "annotations": [{"segment": [41.642429982913605, 79.29174928627144], "label": "Fixing bicycle"}]}, "urnUdUVDoZg": {"duration": 88.956, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=urnUdUVDoZg", "annotations": []}, "sStBEz5KNs4": {"duration": 82.431, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sStBEz5KNs4", "annotations": []}, "FsXl6whrCWk": {"duration": 35.248, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=FsXl6whrCWk", "annotations": [{"segment": [0.009999999999999998, 29.83442909623678], "label": "Doing a powerbomb"}]}, "ijnNP7P5m28": {"duration": 143.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ijnNP7P5m28", "annotations": [{"segment": [37.28131931464174, 112.30611059190032], "label": "Snow tubing"}]}, "ERcMdKNF_pc": {"duration": 40.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ERcMdKNF_pc", "annotations": [{"segment": [0, 37.8299066438848], "label": "Using parallel bars"}]}, "I3Rm5Cw33Zw": {"duration": 182.347, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I3Rm5Cw33Zw", "annotations": []}, "xWxpAaPhknA": {"duration": 163.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xWxpAaPhknA", "annotations": [{"segment": [8.64851794071763, 152.11216848673948], "label": "Plataform diving"}]}, "5V47VQHzWHc": {"duration": 118.65, "subset": "training", "resolution": "300x240", "url": "https://www.youtube.com/watch?v=5V47VQHzWHc", "annotations": [{"segment": [15.544717628705149, 118.621], "label": "Playing accordion"}]}, "RZNsL5O_4uc": {"duration": 196.906, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RZNsL5O_4uc", "annotations": []}, "i1llwxnI_Vo": {"duration": 191.89, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=i1llwxnI_Vo", "annotations": [{"segment": [11.372789391575663, 53.27253978159126], "label": "Cumbia"}, {"segment": [54.30842478, 180.76749453978158], "label": "Cumbia"}]}, "vdjSbzkRLIs": {"duration": 226.648333333333, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vdjSbzkRLIs", "annotations": []}, "XuYmybr9uDE": {"duration": 72.49, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XuYmybr9uDE", "annotations": [{"segment": [33.341229676730975, 69.58749867673097], "label": "Mixing drinks"}]}, "MdJ4WZvo2tg": {"duration": 13.05, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MdJ4WZvo2tg", "annotations": []}, "IWHER8iS1B4": {"duration": 87.772, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IWHER8iS1B4", "annotations": [{"segment": [13.004804709195172, 57.63181876390702], "label": "Rope skipping"}, {"segment": [62.14927724183798, 76.65990003120125], "label": "Rope skipping"}]}, "GLTxG31Kw8M": {"duration": 100.914, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GLTxG31Kw8M", "annotations": []}, "xS6elK6Khs4": {"duration": 176.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xS6elK6Khs4", "annotations": [{"segment": [46.006347250000005, 125.86688775], "label": "Skateboarding"}]}, "yz00070X04o": {"duration": 228.86, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=yz00070X04o", "annotations": []}, "r4oAhRg4H14": {"duration": 159.313, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=r4oAhRg4H14", "annotations": [{"segment": [9.19291263650546, 11.925940717628706], "label": "Futsal"}, {"segment": [12.919769110764431, 15.652797191887675], "label": "Futsal"}, {"segment": [18.882739469578784, 20.37348205928237], "label": "Futsal"}, {"segment": [21.118853354134163, 23.10941185647426], "label": "Futsal"}, {"segment": [24.84882995319813, 25.34580655226209], "label": "Futsal"}, {"segment": [26.588248049922, 31.558014040561627], "label": "Futsal"}, {"segment": [34.04289703588144, 35.782315132605305], "label": "Futsal"}, {"segment": [38.26719812792512, 39.509639625585024], "label": "Futsal"}, {"segment": [41.2490577223089, 43.98242901716068], "label": "Futsal"}, {"segment": [45.970335413416535, 51.18858970358815], "label": "Futsal"}, {"segment": [53.67347269890796, 149.34146801872078], "label": "Futsal"}]}, "39O9xB837Gs": {"duration": 178.074, "subset": "testing", "resolution": "556x360", "url": "https://www.youtube.com/watch?v=39O9xB837Gs", "annotations": []}, "zpBZ7HMNO34": {"duration": 123.531, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zpBZ7HMNO34", "annotations": [{"segment": [1.7307616822429905, 71.15353582554516], "label": "Elliptical trainer"}, {"segment": [84.4363847352025, 97.32303115264797], "label": "Elliptical trainer"}, {"segment": [106.17057943925234, 123.481], "label": "Elliptical trainer"}]}, "tCQiu-qY9XA": {"duration": 71.31, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tCQiu-qY9XA", "annotations": [{"segment": [28.367620600624026, 59.850117188767555], "label": "Tennis serve with ball bouncing"}]}, "RZnC-ZFEqv8": {"duration": 111.108, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=RZnC-ZFEqv8", "annotations": []}, "Hlc6FXX5kGs": {"duration": 109.37, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Hlc6FXX5kGs", "annotations": [{"segment": [0.6820655226209049, 21.65954290171607], "label": "Putting on makeup"}, {"segment": [95.33609828393135, 109.121], "label": "Putting on makeup"}]}, "lQUqzLT7bl0": {"duration": 96.711, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lQUqzLT7bl0", "annotations": [{"segment": [3.9199157566302656, 96.661], "label": "Skiing"}]}, "cO-4HzHsAsw": {"duration": 99.846, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cO-4HzHsAsw", "annotations": []}, "DVsLkoQijV4": {"duration": 16.42, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DVsLkoQijV4", "annotations": [{"segment": [1.2421225288611544, 13.04300448], "label": "Volleyball"}]}, "eLQ1c2tMJls": {"duration": 72.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=eLQ1c2tMJls", "annotations": [{"segment": [10.265103632716738, 57.64250439625585], "label": "Polishing shoes"}]}, "HFkc98prpGE": {"duration": 165.791, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HFkc98prpGE", "annotations": []}, "QaNQrUpmmgo": {"duration": 134.56, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QaNQrUpmmgo", "annotations": [{"segment": [0.01, 134.501], "label": "Using the rowing machine"}]}, "tA7cF5wA6ZU": {"duration": 76.185, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tA7cF5wA6ZU", "annotations": [{"segment": [0.2, 1.7827925897035881], "label": "Playing congas"}, {"segment": [12.717253806552263, 18.77874861154446], "label": "Playing congas"}, {"segment": [22.344333790951637, 27.336153042121687], "label": "Playing congas"}, {"segment": [33.63535352574103, 38.151761419656786], "label": "Playing congas"}, {"segment": [54.672306084243374, 58.237891263650546], "label": "Playing congas"}, {"segment": [63.58626903276131, 68.22152976599064], "label": "Playing congas"}]}, "geuUVSJyovM": {"duration": 84.08, "subset": "validation", "resolution": "640x352", "url": "https://www.youtube.com/watch?v=geuUVSJyovM", "annotations": [{"segment": [22.68226375547748, 82.63674662152108], "label": "Playing piano"}]}, "GNg5kjnJlOE": {"duration": 97.849, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GNg5kjnJlOE", "annotations": [{"segment": [27.281855313257182, 93.5813361024576], "label": "Powerbocking"}]}, "HHG1kCydLYU": {"duration": 189.82, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=HHG1kCydLYU", "annotations": [{"segment": [21.469857277284287, 182.86395495319815], "label": "Preparing salad"}]}, "kCD0iQFnHA4": {"duration": 135.791, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=kCD0iQFnHA4", "annotations": [{"segment": [8.897336007726024, 130.28241840093602], "label": "Making a cake"}]}, "Z_YXWLkRmjQ": {"duration": 15.65, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=Z_YXWLkRmjQ", "annotations": [{"segment": [0.48739469578783157, 14.134446177847114], "label": "Doing karate"}]}, "8nhuvbFSSmw": {"duration": 207.564, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8nhuvbFSSmw", "annotations": [{"segment": [12.304808954262933, 185.54356623088924], "label": "Longboarding"}, {"segment": [193.63883526365058, 202.05791505772234], "label": "Longboarding"}]}, "wMwJObSq21Y": {"duration": 132.94, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wMwJObSq21Y", "annotations": [{"segment": [0, 119.9958288], "label": "Doing step aerobics"}]}, "JbAvSsIaI8g": {"duration": 78.066, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JbAvSsIaI8g", "annotations": [{"segment": [0.01, 70.45349331389171], "label": "Putting in contact lenses"}]}, "g1lqpONbOKY": {"duration": 19.46, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=g1lqpONbOKY", "annotations": [{"segment": [0.75888774, 7.938145400645958], "label": "Using the balance beam"}]}, "lETAKUG4pQw": {"duration": 66.711, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lETAKUG4pQw", "annotations": [{"segment": [31.354588923556943, 38.73825663026521], "label": "Clipping cat claws"}]}, "olBh9KMAHMQ": {"duration": 149.21, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=olBh9KMAHMQ", "annotations": [{"segment": [47.90576599063963, 66.74249921996879], "label": "Shoveling snow"}, {"segment": [75.57948517940717, 93.48600936037441], "label": "Shoveling snow"}]}, "6WQSZekz8vQ": {"duration": 56.146, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=6WQSZekz8vQ", "annotations": [{"segment": [38.556006252082874, 47.72445656818782], "label": "Doing crunches"}]}, "GLHsjvmOXRs": {"duration": 163.144, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GLHsjvmOXRs", "annotations": [{"segment": [31.432430053664728, 43.649083029987864], "label": "Canoeing"}, {"segment": [68.33690258630753, 72.1546066414085], "label": "Canoeing"}, {"segment": [126.36600422384241, 130.6927354862902], "label": "Canoeing"}]}, "VceicZDzH3U": {"duration": 195.164, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=VceicZDzH3U", "annotations": [{"segment": [7.0027542761310455, 195.163717], "label": "Carving jack-o-lanterns"}]}, "hhk7A9gJcu8": {"duration": 162.935, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hhk7A9gJcu8", "annotations": [{"segment": [23.8936327521128, 38.38232495286206], "label": "Making a cake"}, {"segment": [42.195138689901334, 60.24245704522058], "label": "Making a cake"}, {"segment": [71.42671067386912, 84.89865254474124], "label": "Making a cake"}, {"segment": [94.30359309610479, 102.69178245865834], "label": "Making a cake"}, {"segment": [126.33122743057721, 132.94010451950078], "label": "Making a cake"}]}, "ab3xXzyq6L8": {"duration": 22.176, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ab3xXzyq6L8", "annotations": []}, "unE-vkRljRs": {"duration": 99.89, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=unE-vkRljRs", "annotations": [{"segment": [1.8700461651119107, 97.70991051014042], "label": "Horseback riding"}]}, "qeNMFrM0ZOE": {"duration": 43.561, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qeNMFrM0ZOE", "annotations": []}, "6cgnE6x9nvI": {"duration": 13.259, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=6cgnE6x9nvI", "annotations": []}, "8v2ewQE-QK0": {"duration": 217.07999999999998, "subset": "validation", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=8v2ewQE-QK0", "annotations": [{"segment": [24.4332478980305, 186.70689275784358], "label": "Playing bagpipes"}]}, "pK3OSevBqDw": {"duration": 138.879, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=pK3OSevBqDw", "annotations": [{"segment": [27.08240472476042, 32.06556719411634], "label": "Painting fence"}]}, "dZyb8t-4ATQ": {"duration": 26.587, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=dZyb8t-4ATQ", "annotations": [{"segment": [0.01, 26.586847], "label": "Installing carpet"}]}, "jsofzMPb0i4": {"duration": 139.99, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=jsofzMPb0i4", "annotations": [{"segment": [27.18003978159126, 128.91416458658344], "label": "Layup drill in basketball"}]}, "ye7e0mitDdU": {"duration": 43.05, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ye7e0mitDdU", "annotations": [{"segment": [1.7441903276131046, 42.981], "label": "Using the rowing machine"}]}, "Dydb923dXss": {"duration": 148.957, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Dydb923dXss", "annotations": [{"segment": [0, 148.921], "label": "Scuba diving"}]}, "HrBa_UraJvc": {"duration": 146.1, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HrBa_UraJvc", "annotations": [{"segment": [0.4558595943837754, 7.9775429017160695], "label": "Smoking hookah"}, {"segment": [120.57486271450858, 129.6920546021841], "label": "Smoking hookah"}]}, "yo73PqGytMQ": {"duration": 103.75, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=yo73PqGytMQ", "annotations": []}, "tIaNvRDp0eE": {"duration": 57.029, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tIaNvRDp0eE", "annotations": [{"segment": [18.934404056162247, 41.246776911076445], "label": "Snow tubing"}]}, "WPM0vuERyfc": {"duration": 31.86, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WPM0vuERyfc", "annotations": [{"segment": [23.8063567925117, 25.197960112324495], "label": "Washing face"}]}, "9riZioI46sM": {"duration": 113.221, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9riZioI46sM", "annotations": [{"segment": [7.418503825867321, 113.22049886621315], "label": "Putting on shoes"}]}, "Q4UF7Z0ozCw": {"duration": 160.66, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=Q4UF7Z0ozCw", "annotations": [{"segment": [94.9917209150951, 158.65371673478938], "label": "Ironing clothes"}]}, "F22yx0YBi9Q": {"duration": 227.811, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=F22yx0YBi9Q", "annotations": []}, "lgdCr24iaTg": {"duration": 226.79, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lgdCr24iaTg", "annotations": []}, "GwIHO7HpGkY": {"duration": 117.076, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=GwIHO7HpGkY", "annotations": [{"segment": [0.2, 113.05683622174818], "label": "Ice fishing"}]}, "Y2g4ipeFSHw": {"duration": 103.306, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Y2g4ipeFSHw", "annotations": []}, "kdRcWkrZRZo": {"duration": 179.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kdRcWkrZRZo", "annotations": [{"segment": [26.536133619344778, 30.167394009360375], "label": "Applying sunscreen"}]}, "2NyOIsFJ3Tw": {"duration": 105.001, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2NyOIsFJ3Tw", "annotations": [{"segment": [17.19657566302652, 104.961], "label": "Fun sliding down"}]}, "SMNXIkCGh_0": {"duration": 40.125, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=SMNXIkCGh_0", "annotations": [{"segment": [0.19999999999999998, 38.97667289719626], "label": "River tubing"}]}, "Jth2Zlpr1gQ": {"duration": 49.09, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=Jth2Zlpr1gQ", "annotations": [{"segment": [4.364989127925116, 43.03726122620904], "label": "Ballet"}]}, "1cLxW-FhgpA": {"duration": 181.6, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=1cLxW-FhgpA", "annotations": [{"segment": [14.730683307332294, 177.33476443057722], "label": "Belly dance"}]}, "KZyg_UYyL0s": {"duration": 43.631, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=KZyg_UYyL0s", "annotations": [{"segment": [0.01, 42.43465648982681], "label": "Painting fence"}]}, "VqMA_qg5cfU": {"duration": 39.8, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=VqMA_qg5cfU", "annotations": []}, "zruHn4r6_CY": {"duration": 113.94, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=zruHn4r6_CY", "annotations": [{"segment": [7.998912732019484, 20.619419486983563], "label": "Long jump"}, {"segment": [29.684853916605643, 110.91825555070203], "label": "Long jump"}]}, "ICXWlSG5vVA": {"duration": 156.09, "subset": "testing", "resolution": "360x480", "url": "https://www.youtube.com/watch?v=ICXWlSG5vVA", "annotations": []}, "sHRajZw7A8g": {"duration": 90.78999999999999, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=sHRajZw7A8g", "annotations": [{"segment": [9.68225945475819, 81.66796104], "label": "Mixing drinks"}]}, "SQM-heGm8iU": {"duration": 184.41, "subset": "training", "resolution": "318x240", "url": "https://www.youtube.com/watch?v=SQM-heGm8iU", "annotations": [{"segment": [30.609469599999997, 161.46980272], "label": "Playing racquetball"}]}, "chsV9w7bzUE": {"duration": 238.283, "subset": "testing", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=chsV9w7bzUE", "annotations": []}, "tI87cD7sv-Y": {"duration": 32.834, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=tI87cD7sv-Y", "annotations": [{"segment": [8.041783460218408, 11.934621313572542], "label": "Putting in contact lenses"}, {"segment": [16.44211777535101, 21.256943541341652], "label": "Putting in contact lenses"}]}, "52grVhaCfuU": {"duration": 82.92, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=52grVhaCfuU", "annotations": []}, "3POAgJRFPBo": {"duration": 22.942, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3POAgJRFPBo", "annotations": []}, "pRGlbeqRfM0": {"duration": 206.496, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pRGlbeqRfM0", "annotations": [{"segment": [0.01, 0.02], "label": "Gargling mouthwash"}, {"segment": [141.09932291169198, 141.42146748454974], "label": "Gargling mouthwash"}]}, "DWVI_N4Aeo8": {"duration": 26.47, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=DWVI_N4Aeo8", "annotations": [{"segment": [1.2801765319812792, 24.777610296411854], "label": "Springboard diving"}]}, "ok3Kpgv0PzE": {"duration": 97.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ok3Kpgv0PzE", "annotations": [{"segment": [3.890705901716068, 59.705871120000005], "label": "Using the balance beam"}, {"segment": [65.91921912, 95.89445722464899], "label": "Using the balance beam"}]}, "-YMpwZkNc2A": {"duration": 162.75, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-YMpwZkNc2A", "annotations": [{"segment": [29.312554542121685, 129.5589531926677], "label": "Shaving"}]}, "0JgcRWHCi4c": {"duration": 22.779, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0JgcRWHCi4c", "annotations": [{"segment": [10.021239107644305, 22.778774], "label": "Disc dog"}]}, "xaMEGQCxddY": {"duration": 46.58, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=xaMEGQCxddY", "annotations": [{"segment": [5.788247488299532, 44.49715256630265], "label": "Discus throw"}]}, "i4SNM6xSLI8": {"duration": 41.749, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=i4SNM6xSLI8", "annotations": [{"segment": [0.01, 38.75341604140356], "label": "Arm wrestling"}]}, "nQM5LT1-ZRU": {"duration": 217.78, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=nQM5LT1-ZRU", "annotations": [{"segment": [64.70214089999999, 124.49554313999998], "label": "Fixing bicycle"}, {"segment": [31.420176758970353, 183.42589675507017], "label": "Fixing bicycle"}]}, "KBxE7NIOkf8": {"duration": 67.25, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KBxE7NIOkf8", "annotations": [{"segment": [33.72439703588144, 56.277259750390016], "label": "Bungee jumping"}]}, "Mnhg-VEP69U": {"duration": 61.324, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Mnhg-VEP69U", "annotations": [{"segment": [6.7925068893912215, 54.81839957410296], "label": "Changing car wheel"}]}, "hf1JuvacaJc": {"duration": 170.505, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hf1JuvacaJc", "annotations": [{"segment": [0.01, 55.59338190398364], "label": "Playing ten pins"}, {"segment": [60.6473257134367, 77.40513939741264], "label": "Playing ten pins"}, {"segment": [86.44903884590757, 133.7965121872075], "label": "Playing ten pins"}, {"segment": [138.85045598751952, 166.51414836817474], "label": "Playing ten pins"}]}, "pJRUkudK6iE": {"duration": 197.7, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pJRUkudK6iE", "annotations": [{"segment": [0.01, 192.14320481390683], "label": "Cleaning shoes"}]}, "-ixct4B3dtg": {"duration": 210.1, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-ixct4B3dtg", "annotations": []}, "oIEDMaMo7UE": {"duration": 9.71, "subset": "validation", "resolution": "350x240", "url": "https://www.youtube.com/watch?v=oIEDMaMo7UE", "annotations": [{"segment": [0.5253276131045242, 7.159464898595944], "label": "Hammer throw"}]}, "xsdrqauYhJs": {"duration": 130.59, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=xsdrqauYhJs", "annotations": [{"segment": [15.890708068642745, 93.51070517316693], "label": "Baking cookies"}, {"segment": [104.10451055226208, 113.67968079875195], "label": "Baking cookies"}]}, "7KKYKIlGaXk": {"duration": 195.304, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7KKYKIlGaXk", "annotations": [{"segment": [9.721897869146362, 13.063801855209425], "label": "Wakeboarding"}, {"segment": [21.57046654700631, 30.684750145360113], "label": "Wakeboarding"}, {"segment": [59.242838753535366, 115.14377815677203], "label": "Wakeboarding"}, {"segment": [133.0685359002012, 151.60091255018725], "label": "Wakeboarding"}, {"segment": [160.41138549798373, 164.96852726314705], "label": "Wakeboarding"}, {"segment": [176.20947695054983, 178.33614310762604], "label": "Wakeboarding"}]}, "qmlohhdz784": {"duration": 180.7, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qmlohhdz784", "annotations": [{"segment": [0, 180.49768707482994], "label": "Playing racquetball"}]}, "7VvcQNSAd24": {"duration": 48.97, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7VvcQNSAd24", "annotations": [{"segment": [4.889448636505461, 41.48391577535101], "label": "Shaving"}]}, "tgnyGoKM2tM": {"duration": 130.4, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=tgnyGoKM2tM", "annotations": [{"segment": [34.27382049, 128.57233022382118], "label": "Fixing bicycle"}]}, "-76d-7Ju7L0": {"duration": 191.89, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-76d-7Ju7L0", "annotations": [{"segment": [38.013021840873634, 58.96508112324493], "label": "Changing car wheel"}, {"segment": [76.32535881435257, 109.25002340093603], "label": "Changing car wheel"}]}, "z0tiCqKa4cs": {"duration": 92.74000000000001, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=z0tiCqKa4cs", "annotations": [{"segment": [13.338915359999998, 53.43242830499219], "label": "Making a sandwich"}]}, "7rd23-3wEyU": {"duration": 198.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7rd23-3wEyU", "annotations": [{"segment": [139.77863535558538, 142.87606683533616], "label": "Smoking hookah"}]}, "OEBSls79Ths": {"duration": 65.06, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=OEBSls79Ths", "annotations": [{"segment": [4.060050608424337, 65.06231292517006], "label": "Using uneven bars"}]}, "lpZytCK-g1M": {"duration": 72.33, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lpZytCK-g1M", "annotations": [{"segment": [0.01, 72.33018140589569], "label": "Washing face"}]}, "iBEUNOMTr8M": {"duration": 182.533, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=iBEUNOMTr8M", "annotations": [{"segment": [83.98749438212123, 161.32195055657604], "label": "Roof shingle removal"}]}, "1wiGNX_S8ZU": {"duration": 109.065, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1wiGNX_S8ZU", "annotations": [{"segment": [0, 109.06412698412699], "label": "Capoeira"}]}, "_VPf75tGIHQ": {"duration": 60.14, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_VPf75tGIHQ", "annotations": [{"segment": [7.641546801872075, 54.99100858034321], "label": "Playing racquetball"}]}, "Sx3NHkPp3Jo": {"duration": 112.34, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Sx3NHkPp3Jo", "annotations": [{"segment": [5.343051190327613, 102.91942866614664], "label": "Triple jump"}]}, "iUGuDzgow2I": {"duration": 48.65, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iUGuDzgow2I", "annotations": [{"segment": [4.78910025, 36.94830975], "label": "Using parallel bars"}]}, "dk4maV6EqBw": {"duration": 31.092, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dk4maV6EqBw", "annotations": []}, "2DTwoKZsLMo": {"duration": 163.632, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2DTwoKZsLMo", "annotations": [{"segment": [7.785875386035849, 30.50531503709128], "label": "Putting on shoes"}, {"segment": [82.07078410062402, 86.41045231123246], "label": "Putting on shoes"}]}, "Ufi6HgfXMBY": {"duration": 122.8, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Ufi6HgfXMBY", "annotations": []}, "8Yfm6gbKRho": {"duration": 92.11, "subset": "training", "resolution": "766x480", "url": "https://www.youtube.com/watch?v=8Yfm6gbKRho", "annotations": [{"segment": [2.69706864, 63.76614538], "label": "Cricket"}]}, "pzHDfjY-pFY": {"duration": 204.08, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=pzHDfjY-pFY", "annotations": [{"segment": [0.20000000000000004, 203.841], "label": "Waterskiing"}]}, "5oPGbuL8G5Y": {"duration": 188.221, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5oPGbuL8G5Y", "annotations": [{"segment": [0.2, 185.8755199525293], "label": "Waterskiing"}]}, "8729_qnILMM": {"duration": 85.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8729_qnILMM", "annotations": [{"segment": [4.672909516380655, 79.97350858034322], "label": "Playing flauta"}]}, "iPHZeElXYXU": {"duration": 176.43, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iPHZeElXYXU", "annotations": [{"segment": [26.181481962336104, 40.47137468507754], "label": "Ironing clothes"}, {"segment": [50.36437733928315, 158.91260090626133], "label": "Ironing clothes"}]}, "1XtjXqqPvyQ": {"duration": 138.67, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1XtjXqqPvyQ", "annotations": [{"segment": [0.01802666475314469, 133.2792661405722], "label": "Disc dog"}]}, "9hTcvGUnMBs": {"duration": 157.29, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9hTcvGUnMBs", "annotations": []}, "FNX7CeuIvIQ": {"duration": 184.599, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FNX7CeuIvIQ", "annotations": [{"segment": [9.212655226209048, 150.29772542901716], "label": "Disc dog"}]}, "Q48_MDiak-w": {"duration": 13.17, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=Q48_MDiak-w", "annotations": [{"segment": [2.0333487217039705, 9.632732833122848], "label": "Hammer throw"}]}, "YtPZMBePf5c": {"duration": 37.547, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=YtPZMBePf5c", "annotations": [{"segment": [0.01, 32.64125149090491], "label": "Having an ice cream"}]}, "AjiCJiOIDtI": {"duration": 37.129, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AjiCJiOIDtI", "annotations": [{"segment": [0.01, 35.82124698534225], "label": "Kneeling"}]}, "c1YUEusus3c": {"duration": 90.28, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c1YUEusus3c", "annotations": []}, "hlvs-e3bCq0": {"duration": 175.288, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hlvs-e3bCq0", "annotations": [{"segment": [8.750698915031432, 80.67050562294601], "label": "Skiing"}]}, "5i6-j67nzCo": {"duration": 132.517, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=5i6-j67nzCo", "annotations": []}, "QDTo_ss6INM": {"duration": 124.16, "subset": "validation", "resolution": "634x480", "url": "https://www.youtube.com/watch?v=QDTo_ss6INM", "annotations": [{"segment": [24.986373453978157, 92.39147393447737], "label": "Preparing salad"}]}, "v26OP6f29Qc": {"duration": 145.427, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v26OP6f29Qc", "annotations": [{"segment": [99.5979201998012, 109.58039967312979], "label": "Rock-paper-scissors"}, {"segment": [118.20163194554993, 145.22657596371883], "label": "Rock-paper-scissors"}]}, "Qt2ktBj1l-Y": {"duration": 80.95, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Qt2ktBj1l-Y", "annotations": [{"segment": [17.5786219, 73.58797098000001], "label": "Shoveling snow"}]}, "IEtXUErbdRA": {"duration": 119.351, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IEtXUErbdRA", "annotations": []}, "Q-fUXywUo7o": {"duration": 144.61, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Q-fUXywUo7o", "annotations": [{"segment": [14.249274582705393, 68.97385869485491], "label": "Polishing shoes"}, {"segment": [74.15355184127236, 93.52110012787672], "label": "Polishing shoes"}, {"segment": [102.07885402195772, 110.86181196588295], "label": "Polishing shoes"}]}, "EjUMR9v_8Pg": {"duration": 136.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EjUMR9v_8Pg", "annotations": [{"segment": [34.718975205266716, 136.53290249433107], "label": "Breakdancing"}]}, "p6jip892RdM": {"duration": 206.71, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p6jip892RdM", "annotations": [{"segment": [15.15267285803432, 184.4112526552262], "label": "Doing motocross"}]}, "pc5_pexVob8": {"duration": 190.5, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pc5_pexVob8", "annotations": [{"segment": [95.67067082683306, 157.47035881435255], "label": "Preparing salad"}]}, "UGKGBBAckJw": {"duration": 82.76, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UGKGBBAckJw", "annotations": [{"segment": [21.302231313742343, 82.755941], "label": "Doing step aerobics"}]}, "XlSmrSegTME": {"duration": 24.474, "subset": "testing", "resolution": "1906x1080", "url": "https://www.youtube.com/watch?v=XlSmrSegTME", "annotations": []}, "aSxSgymPOBw": {"duration": 95.64, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aSxSgymPOBw", "annotations": [{"segment": [0.01, 88.03333117004681], "label": "Walking the dog"}]}, "G4XDVF_hYZc": {"duration": 32.601, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=G4XDVF_hYZc", "annotations": [{"segment": [0.5841677067082683, 25.01761700468019], "label": "Playing rubik cube"}]}, "LIVk_vFb9Ps": {"duration": 206.17, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LIVk_vFb9Ps", "annotations": []}, "oEDBkmmVKM0": {"duration": 60.47, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oEDBkmmVKM0", "annotations": [{"segment": [5.32386895475819, 12.579407176287052], "label": "Playing harmonica"}, {"segment": [16.819656786271448, 37.83244929797192], "label": "Playing harmonica"}, {"segment": [42.9207488299532, 58.18564742589704], "label": "Playing harmonica"}]}, "un6VqJYUpDo": {"duration": 109.6, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=un6VqJYUpDo", "annotations": [{"segment": [31.374831145085807, 35.13639128237129], "label": "Cleaning windows"}, {"segment": [42.65951155694228, 52.063411900156005], "label": "Cleaning windows"}, {"segment": [79.42021289859595, 85.74647312948518], "label": "Cleaning windows"}]}, "rrNJt73BG_8": {"duration": 57.1, "subset": "training", "resolution": "658x480", "url": "https://www.youtube.com/watch?v=rrNJt73BG_8", "annotations": [{"segment": [6.868004876145692, 38.50930705683105], "label": "Washing hands"}]}, "tilmCzSf1oc": {"duration": 22.48, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=tilmCzSf1oc", "annotations": [{"segment": [6.612403839999999, 18.695826053042122], "label": "Doing karate"}]}, "iddZ6YIWLWc": {"duration": 115.171, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iddZ6YIWLWc", "annotations": [{"segment": [19.22510798439938, 94.14912695163807], "label": "Slacklining"}]}, "P4Z4sQNQtJM": {"duration": 209.3, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=P4Z4sQNQtJM", "annotations": [{"segment": [0.01, 179.75385465678627], "label": "Tai chi"}]}, "2tO1ApNwXpQ": {"duration": 111.83, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=2tO1ApNwXpQ", "annotations": [{"segment": [45.35896784711389, 57.57099765210609], "label": "Polishing shoes"}, {"segment": [68.21290933931357, 111.6273015], "label": "Polishing shoes"}]}, "6tDur7vIIM4": {"duration": 222.935, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6tDur7vIIM4", "annotations": []}, "dPQ9D-BiKH4": {"duration": 122.9, "subset": "testing", "resolution": "636x360", "url": "https://www.youtube.com/watch?v=dPQ9D-BiKH4", "annotations": []}, "qlqF8K072UU": {"duration": 161.5, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qlqF8K072UU", "annotations": [{"segment": [3.651647425897036, 26.568882995319814], "label": "Triple jump"}, {"segment": [42.43466146645866, 60.06330421216849], "label": "Triple jump"}, {"segment": [82.98053978159126, 94.56507644305772], "label": "Triple jump"}, {"segment": [121.76355382215289, 130.5778751950078], "label": "Triple jump"}, {"segment": [137.3774945397816, 158.02819032761312], "label": "Triple jump"}]}, "LVG7DLzI-GM": {"duration": 66.502, "subset": "training", "resolution": "2048x1152", "url": "https://www.youtube.com/watch?v=LVG7DLzI-GM", "annotations": [{"segment": [3.835127925117005, 55.97213728549142], "label": "Playing blackjack"}]}, "dvHj856L8zY": {"duration": 126.64, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dvHj856L8zY", "annotations": [{"segment": [0, 126.64163265306122], "label": "Horseback riding"}]}, "6Epn-R6sg5U": {"duration": 85.102, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=6Epn-R6sg5U", "annotations": [{"segment": [0.01, 69.64508873341754], "label": "Playing pool"}]}, "v-xUwDARVb4": {"duration": 158.64, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v-xUwDARVb4", "annotations": [{"segment": [9.528191840093603, 141.4379645873635], "label": "Playing guitarra"}]}, "pKV8lrvbX6Q": {"duration": 38.151, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=pKV8lrvbX6Q", "annotations": [{"segment": [0.2, 10.359921996879876], "label": "Using the monkey bar"}, {"segment": [22.863276131045243, 33.52089703588143], "label": "Using the monkey bar"}]}, "p-uTwZ9Ph-A": {"duration": 13.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=p-uTwZ9Ph-A", "annotations": [{"segment": [0.01, 13.032696224648985], "label": "Playing squash"}]}, "oTsTx2BhtgY": {"duration": 184.646, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oTsTx2BhtgY", "annotations": [{"segment": [0.01, 176.82529485179407], "label": "Sharpening knives"}]}, "4wIqgWw53XE": {"duration": 163.86, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4wIqgWw53XE", "annotations": [{"segment": [47.01931981279251, 61.074007800312], "label": "Windsurfing"}, {"segment": [74.87315600624025, 140.29134009360374], "label": "Windsurfing"}]}, "a0YyuiZVtFU": {"duration": 27.539, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=a0YyuiZVtFU", "annotations": [{"segment": [0.21436037441497663, 27.286483619344775], "label": "Kneeling"}]}, "uQlRZj_jQ9c": {"duration": 59.769, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uQlRZj_jQ9c", "annotations": []}, "1epGZvRN3Fw": {"duration": 192.56, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=1epGZvRN3Fw", "annotations": [{"segment": [24.47809906396256, 44.30085413416536], "label": "Snatch"}, {"segment": [50.9084391575663, 70.73119422776911], "label": "Snatch"}, {"segment": [96.56084477379093, 123.59187441497659], "label": "Snatch"}]}, "FMVECEaQ0Jo": {"duration": 169.669, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FMVECEaQ0Jo", "annotations": [{"segment": [17.465391575663027, 21.964053042121687], "label": "Running a marathon"}, {"segment": [59.01463650546021, 65.10134789391576], "label": "Running a marathon"}, {"segment": [93.68242745709827, 99.76913884555383], "label": "Running a marathon"}]}, "wfCH5qwHoeQ": {"duration": 87.642, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wfCH5qwHoeQ", "annotations": []}, "tb7s5a1H-IU": {"duration": 151.348, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=tb7s5a1H-IU", "annotations": [{"segment": [57.13906438706528, 102.00031328600083], "label": "Blow-drying hair"}]}, "6lYTHj9vImo": {"duration": 60.117, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6lYTHj9vImo", "annotations": [{"segment": [0.01, 37.77411286468916], "label": "Building sandcastles"}]}, "ZeugCIqbsD4": {"duration": 48.832, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZeugCIqbsD4", "annotations": [{"segment": [11.19850196723869, 44.41310644149766], "label": "Peeling potatoes"}, {"segment": [0.2, 48.631563], "label": "Peeling potatoes"}]}, "I91LmNcwN4Y": {"duration": 126.456, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=I91LmNcwN4Y", "annotations": [{"segment": [60.44409190031153, 81.31403894080997], "label": "Painting furniture"}]}, "cz2ESqP3PDk": {"duration": 48.995, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=cz2ESqP3PDk", "annotations": [{"segment": [4.428358814352574, 47.03222464898596], "label": "Playing blackjack"}]}, "qcjRTlgnuXY": {"duration": 55.264, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qcjRTlgnuXY", "annotations": []}, "yGjIBQLyAMY": {"duration": 80.829, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=yGjIBQLyAMY", "annotations": []}, "EP50UmPrcAs": {"duration": 171.108, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EP50UmPrcAs", "annotations": []}, "fbkFWMYPado": {"duration": 103.65, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fbkFWMYPado", "annotations": []}, "hQeDWu5NOj8": {"duration": 128.384, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=hQeDWu5NOj8", "annotations": [{"segment": [0.2, 90.38773826791278], "label": "Rafting"}]}, "BMzspHz04Q8": {"duration": 100.6, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BMzspHz04Q8", "annotations": [{"segment": [4.48533855, 90.33075473166927], "label": "Using the balance beam"}]}, "POckWq-93Uo": {"duration": 134.91, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=POckWq-93Uo", "annotations": []}, "25PDCgM4qV0": {"duration": 238.24, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=25PDCgM4qV0", "annotations": []}, "nEX4I5j6H-Y": {"duration": 201.968, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nEX4I5j6H-Y", "annotations": []}, "kdS1zN8Ej1s": {"duration": 69.266, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kdS1zN8Ej1s", "annotations": []}, "i3u27-oJzBY": {"duration": 124.04, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=i3u27-oJzBY", "annotations": [{"segment": [90.95031371294851, 113.78464779407176], "label": "Vacuuming floor"}]}, "yJi5VXV1O3I": {"duration": 54.823, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yJi5VXV1O3I", "annotations": []}, "RKzwMrL5Th4": {"duration": 67.407, "subset": "validation", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=RKzwMrL5Th4", "annotations": [{"segment": [0.01, 67.40702947845806], "label": "Futsal"}]}, "qXNYHbnGvto": {"duration": 186.81, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qXNYHbnGvto", "annotations": [{"segment": [10.199935553822153, 179.2274390171607], "label": "Grooming horse"}]}, "pyeTqpj2EPo": {"duration": 133.66, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pyeTqpj2EPo", "annotations": [{"segment": [24.916786342201984, 37.844324026022264], "label": "Preparing pasta"}, {"segment": [50.14633569288349, 86.63535334882783], "label": "Preparing pasta"}]}, "oGwn4NUeoy8": {"duration": 153.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=oGwn4NUeoy8", "annotations": [{"segment": [37.01843986684637, 42.0338413971933], "label": "Playing congas"}, {"segment": [38.929069021264255, 43.227984618704475], "label": "Playing congas"}, {"segment": [14.568547302436315, 17.43449103406313], "label": "Playing congas"}, {"segment": [19.34512018848101, 20.77809205429442], "label": "Playing congas"}, {"segment": [22.688721208712295, 26.032322228943578], "label": "Playing congas"}, {"segment": [28.65943731626816, 30.80889511498827], "label": "Playing congas"}, {"segment": [33.91366749091732, 36.5407825782419], "label": "Playing congas"}, {"segment": [39.40672630986872, 42.0338413971933], "label": "Playing congas"}, {"segment": [44.66095648451788, 47.04924292754023], "label": "Playing congas"}, {"segment": [50.39284394777152, 52.78113039079386], "label": "Playing congas"}, {"segment": [56.12473141102515, 59.46833243125643], "label": "Playing congas"}, {"segment": [62.33427616288325, 65.91670582741676], "label": "Playing congas"}, {"segment": [68.78264955904359, 73.32039380078605], "label": "Playing congas"}, {"segment": [76.90282346531956, 80.96291041845755], "label": "Playing congas"}, {"segment": [83.82885415008438, 87.65011245892013], "label": "Playing congas"}, {"segment": [90.99371347915141, 94.33731449938269], "label": "Playing congas"}, {"segment": [100.54685925124079, 104.36811756007654], "label": "Playing congas"}, {"segment": [104.60694620437879, 109.86117637902795], "label": "Playing congas"}, {"segment": [114.3989206207704, 119.1754935068151], "label": "Playing congas"}, {"segment": [121.80260859413968, 122.28026588274415], "label": "Playing congas"}, {"segment": [126.81801012448662, 131.5945830105313], "label": "Playing congas"}, {"segment": [133.74404080925143, 139.47592827250503], "label": "Playing congas"}, {"segment": [142.5807006484341, 148.55141675598995], "label": "Playing congas"}]}, "7UpgIJtQIPk": {"duration": 35.55, "subset": "testing", "resolution": "972x720", "url": "https://www.youtube.com/watch?v=7UpgIJtQIPk", "annotations": []}, "oezddremlnE": {"duration": 152.138, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=oezddremlnE", "annotations": [{"segment": [45.55312324492979, 123.6265538221529], "label": "Braiding hair"}]}, "g4Kxg9fWtNI": {"duration": 207.05, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g4Kxg9fWtNI", "annotations": []}, "69IsHpmRyfk": {"duration": 94.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=69IsHpmRyfk", "annotations": [{"segment": [2.5835842433697347, 88.65384789391575], "label": "Getting a piercing"}]}, "Ub2g9Yzon0c": {"duration": 218.41, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Ub2g9Yzon0c", "annotations": []}, "2icoQWmbocU": {"duration": 68.85, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2icoQWmbocU", "annotations": [{"segment": [6.551758196567863, 16.862721915756634], "label": "Tai chi"}, {"segment": [23.62929185647426, 38.66611394695788], "label": "Tai chi"}, {"segment": [41.458666620904836, 60.89912946645867], "label": "Tai chi"}]}, "75u6o4tXGC0": {"duration": 225.93, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=75u6o4tXGC0", "annotations": [{"segment": [26.787351113092143, 59.21414456578263], "label": "Preparing salad"}, {"segment": [65.55851698595944, 94.10819373790952], "label": "Preparing salad"}, {"segment": [116.31349787831513, 121.24800990951637], "label": "Preparing salad"}, {"segment": [131.8219642620905, 133.2318248424337], "label": "Preparing salad"}, {"segment": [140.28112774414976, 149.44522151638066], "label": "Preparing salad"}, {"segment": [165.65861819032762, 166.71601362558505], "label": "Preparing salad"}, {"segment": [183.28187544461778, 199.49527211856477], "label": "Preparing salad"}]}, "XQMG33we6eI": {"duration": 213.97, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=XQMG33we6eI", "annotations": []}, "1f_EQl4C700": {"duration": 204.197, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1f_EQl4C700", "annotations": [{"segment": [35.9971123634061, 59.251883756630264], "label": "Making a cake"}, {"segment": [59.570442271450865, 100.98304919812793], "label": "Making a cake"}, {"segment": [104.16863434633386, 150.996736024961], "label": "Making a cake"}]}, "xE43h7Kd9Oc": {"duration": 215.621, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xE43h7Kd9Oc", "annotations": [{"segment": [0.2, 122.92383580223365], "label": "Decorating the Christmas tree"}]}, "3XUxm78gjJE": {"duration": 143.24, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3XUxm78gjJE", "annotations": [{"segment": [10.72746827, 140.93973966146646], "label": "Cricket"}]}, "zc-RT9tpoBA": {"duration": 218.66, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=zc-RT9tpoBA", "annotations": [{"segment": [68.56649039022786, 76.41240720104994], "label": "Playing harmonica"}, {"segment": [85.96395810118119, 96.88001627275976], "label": "Playing harmonica"}, {"segment": [114.95973761943675, 126.89917624460081], "label": "Playing harmonica"}, {"segment": [150.09579985920524, 157.60058895163806], "label": "Playing harmonica"}]}, "nXD-q814KjI": {"duration": 25.264, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=nXD-q814KjI", "annotations": [{"segment": [0, 24.51317316692668], "label": "Painting fence"}]}, "JVfhBvlv0IY": {"duration": 125.39, "subset": "training", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=JVfhBvlv0IY", "annotations": [{"segment": [8.802548632557547, 108.17354208454053], "label": "Long jump"}]}, "avJNxcysMCk": {"duration": 95.69, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=avJNxcysMCk", "annotations": [{"segment": [17.316586806864276, 90.61351889453978], "label": "Running a marathon"}]}, "OZiArYAPXDo": {"duration": 184.85, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OZiArYAPXDo", "annotations": [{"segment": [9.07946957878315, 168.18636505460216], "label": "Discus throw"}]}, "-cAcA8dO7kA": {"duration": 11.63, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-cAcA8dO7kA", "annotations": []}, "EwoxOKwqUPI": {"duration": 142.2, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EwoxOKwqUPI", "annotations": [{"segment": [0.01, 142.141], "label": "Beer pong"}]}, "FBANd818hU4": {"duration": 221.705, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=FBANd818hU4", "annotations": [{"segment": [1.0376163884555383, 221.70403628117913], "label": "Futsal"}]}, "bTE3HEy_kVk": {"duration": 56.402, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=bTE3HEy_kVk", "annotations": [{"segment": [26.83679678627145, 34.843841073322935], "label": "Fixing the roof"}]}, "ImysAb63CLk": {"duration": 30.98, "subset": "testing", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=ImysAb63CLk", "annotations": []}, "l0btLzdAeuM": {"duration": 151.58, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=l0btLzdAeuM", "annotations": [{"segment": [20.74498555876472, 89.10915322406665], "label": "Table soccer"}, {"segment": [129.18469994257364, 140.50014842779916], "label": "Table soccer"}]}, "uLXSOqRSa4A": {"duration": 226.74, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=uLXSOqRSa4A", "annotations": [{"segment": [24.053801139800694, 144.88360315999998], "label": "Bungee jumping"}]}, "wdD-UHM8rTg": {"duration": 223.1, "subset": "validation", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=wdD-UHM8rTg", "annotations": [{"segment": [50.814679161316114, 59.51582251638066], "label": "Playing violin"}, {"segment": [112.07073011856473, 117.29141630421216], "label": "Playing violin"}, {"segment": [120.0757822698908, 125.2964684555382], "label": "Playing violin"}, {"segment": [136.0858865725429, 139.21829828393135], "label": "Playing violin"}, {"segment": [141.30657275819033, 144.09093872386893], "label": "Playing violin"}, {"segment": [154.18426534945397, 156.27253982371295], "label": "Playing violin"}, {"segment": [164.27759197503897, 168.80218666926675], "label": "Playing violin"}]}, "JXvPFMjtbcw": {"duration": 227.185, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JXvPFMjtbcw", "annotations": [{"segment": [13.465457098283931, 227.141], "label": "Making an omelette"}]}, "pem8BpCspUM": {"duration": 4.27, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=pem8BpCspUM", "annotations": [{"segment": [0.013232449297971918, 4.194686427457098], "label": "Playing kickball"}]}, "GBFRHM7i-NQ": {"duration": 129.823, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=GBFRHM7i-NQ", "annotations": [{"segment": [86.84673322932919, 120.26507644305772], "label": "Bullfighting"}]}, "1imA9vLRd3k": {"duration": 152.6, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1imA9vLRd3k", "annotations": [{"segment": [22.859232449297973, 42.62294383775352], "label": "Doing karate"}]}, "SAaqnGbci6Y": {"duration": 156.64, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=SAaqnGbci6Y", "annotations": [{"segment": [8.696708319999999, 151.02088560374415], "label": "Washing dishes"}]}, "avxSMcGBdG8": {"duration": 91.325, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=avxSMcGBdG8", "annotations": [{"segment": [1.4624716365493347, 6.729957617857746], "label": "Croquet"}, {"segment": [73.783630515054, 78.05456509449326], "label": "Croquet"}]}, "Cul7VFkLTMc": {"duration": 147.424, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Cul7VFkLTMc", "annotations": [{"segment": [0.01, 147.423038], "label": "Surfing"}]}, "MZkc1bGdVPg": {"duration": 151.464, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MZkc1bGdVPg", "annotations": []}, "dWPlarw_xg8": {"duration": 214.69, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=dWPlarw_xg8", "annotations": [{"segment": [16.908571190327613, 199.05238757722307], "label": "Long jump"}]}, "pNmAqAYOALA": {"duration": 78.39, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pNmAqAYOALA", "annotations": []}, "zAPoqg3REns": {"duration": 233.767, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zAPoqg3REns", "annotations": []}, "bJj66H_WsDA": {"duration": 97.849, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bJj66H_WsDA", "annotations": [{"segment": [0.19999999999999998, 92.78497347893916], "label": "Hurling"}]}, "e9bdQGmyrKA": {"duration": 97.501, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=e9bdQGmyrKA", "annotations": [{"segment": [0.19999999999999998, 39.996104524180964], "label": "Rope skipping"}, {"segment": [53.378831513260536, 97.481], "label": "Rope skipping"}]}, "cp4n_xpbBDs": {"duration": 81.433, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cp4n_xpbBDs", "annotations": [{"segment": [18.743376576038656, 51.79757786464119], "label": "Painting fence"}]}, "OabVylOVys4": {"duration": 207.215, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OabVylOVys4", "annotations": [{"segment": [54.14011801830633, 79.92112894004154], "label": "Rafting"}, {"segment": [108.60250359047197, 111.82512995568885], "label": "Rafting"}, {"segment": [167.8988287104629, 173.69955509199372], "label": "Rafting"}]}, "CArYinl5tFo": {"duration": 146.124, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=CArYinl5tFo", "annotations": [{"segment": [30.30615600624025, 130.5671232449298], "label": "Assembling bicycle"}]}, "Tm7N2HU4noQ": {"duration": 216.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Tm7N2HU4noQ", "annotations": [{"segment": [8.08967281123245, 11.460369815912637], "label": "Layup drill in basketball"}, {"segment": [35.72938824960998, 37.07766705148206], "label": "Layup drill in basketball"}, {"segment": [41.12250345709828, 42.80785195943838], "label": "Layup drill in basketball"}, {"segment": [86.96398272074883, 89.32347062402496], "label": "Layup drill in basketball"}, {"segment": [91.6829585273011, 93.03123732917317], "label": "Layup drill in basketball"}, {"segment": [103.48039804368176, 106.51402534789392], "label": "Layup drill in basketball"}, {"segment": [109.21058295163806, 111.23300115444619], "label": "Layup drill in basketball"}, {"segment": [130.78304378159126, 133.14253168486738], "label": "Layup drill in basketball"}, {"segment": [137.52443779095162, 140.2209953946958], "label": "Layup drill in basketball"}, {"segment": [148.31066820592824, 149.99601670826834], "label": "Layup drill in basketball"}, {"segment": [155.72620161622464, 158.08568951950076], "label": "Layup drill in basketball"}]}, "pZxteNqdweM": {"duration": 80.36, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pZxteNqdweM", "annotations": [{"segment": [3.1334243369734787, 76.45555382215288], "label": "Mixing drinks"}]}, "TdZOfBHjU5g": {"duration": 232.07999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TdZOfBHjU5g", "annotations": [{"segment": [11.948882995319813, 225.21834009360376], "label": "Polishing forniture"}]}, "c4GQ3WTOeAA": {"duration": 105.72, "subset": "training", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=c4GQ3WTOeAA", "annotations": [{"segment": [8.278562733443822, 70.854316347151], "label": "Wrapping presents"}, {"segment": [81.88740974746251, 104.94163476303885], "label": "Wrapping presents"}]}, "eFauvKa-AjY": {"duration": 43.584, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=eFauvKa-AjY", "annotations": []}, "RZdX_1VP7mk": {"duration": 164.119, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RZdX_1VP7mk", "annotations": []}, "P3oTMGfwOOs": {"duration": 135.768, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=P3oTMGfwOOs", "annotations": [{"segment": [19.244242693962317, 109.33267552503868], "label": "Roof shingle removal"}]}, "Pp4U2Ql7bhY": {"duration": 200.079, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Pp4U2Ql7bhY", "annotations": [{"segment": [0.01, 200.081], "label": "Welding"}]}, "wJuRhdHgVO4": {"duration": 54.2, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wJuRhdHgVO4", "annotations": []}, "EavcvmrlVLQ": {"duration": 159.916, "subset": "training", "resolution": "562x480", "url": "https://www.youtube.com/watch?v=EavcvmrlVLQ", "annotations": [{"segment": [0.2, 156.1396349453978], "label": "Futsal"}]}, "FzWsQ1W_NKM": {"duration": 60.35, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FzWsQ1W_NKM", "annotations": []}, "B0cb0B90Ubg": {"duration": 86.402, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=B0cb0B90Ubg", "annotations": [{"segment": [9.028903276131045, 22.774397815912636], "label": "Grooming dog"}, {"segment": [27.22146957878315, 51.34346489859595], "label": "Grooming dog"}, {"segment": [52.55630265210609, 80.99060998439938], "label": "Grooming dog"}]}, "rqmi-DjYp0U": {"duration": 36.9, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rqmi-DjYp0U", "annotations": [{"segment": [3.6979329173166926, 12.813767550702027], "label": "Drinking beer"}, {"segment": [18.317667706708267, 35.34535881435257], "label": "Drinking beer"}]}, "PzeM5iOLQnk": {"duration": 110.34, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=PzeM5iOLQnk", "annotations": [{"segment": [25.648740170014964, 86.586015473272], "label": "Fixing bicycle"}]}, "g2UjvJaSZuI": {"duration": 77.49, "subset": "testing", "resolution": "362x240", "url": "https://www.youtube.com/watch?v=g2UjvJaSZuI", "annotations": []}, "E2yPoqpNVdM": {"duration": 115.06, "subset": "validation", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=E2yPoqpNVdM", "annotations": [{"segment": [28.18036695475819, 115.0548752834467], "label": "Paintball"}]}, "Uj1QtIM8500": {"duration": 155.667, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Uj1QtIM8500", "annotations": [{"segment": [14.402795716157698, 155.66657596371883], "label": "Putting in contact lenses"}]}, "JJ811udnROI": {"duration": 37.06, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=JJ811udnROI", "annotations": [{"segment": [14.569235326053041, 37.059047], "label": "Hopscotch"}]}, "g0B_4IlLn3g": {"duration": 36.595, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=g0B_4IlLn3g", "annotations": [{"segment": [4.624284566302652, 18.154598667706708], "label": "Gargling mouthwash"}]}, "jNq7v-ryQ98": {"duration": 65.086, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=jNq7v-ryQ98", "annotations": []}, "j1QaZ7YWcJ8": {"duration": 155.039, "subset": "training", "resolution": "1088x1920", "url": "https://www.youtube.com/watch?v=j1QaZ7YWcJ8", "annotations": [{"segment": [0.2, 154.781], "label": "Playing congas"}]}, "f-r9g_-5huM": {"duration": 39.71, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=f-r9g_-5huM", "annotations": [{"segment": [5.499903703588143, 36.33644244617784], "label": "Playing harmonica"}]}, "iNqruq2PS9s": {"duration": 64.37, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=iNqruq2PS9s", "annotations": []}, "h3H6TzHC8Cc": {"duration": 124.39, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=h3H6TzHC8Cc", "annotations": [{"segment": [5.090414541633392, 122.6983626288608], "label": "Doing kickboxing"}]}, "TFkI_wKMypE": {"duration": 15.88, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TFkI_wKMypE", "annotations": [{"segment": [5.128841034477379, 14.890983086942526], "label": "Hopscotch"}]}, "vxJlOKF1VB4": {"duration": 229.82999999999998, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=vxJlOKF1VB4", "annotations": [{"segment": [108.43886241999999, 184.79455251404056], "label": "Brushing hair"}]}, "oIKpoLWA1GA": {"duration": 14.374, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=oIKpoLWA1GA", "annotations": []}, "3GVrO1VYAOI": {"duration": 50.504, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3GVrO1VYAOI", "annotations": []}, "ZxPF3s_OLDo": {"duration": 182.9, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZxPF3s_OLDo", "annotations": []}, "IqRwR1a9ia0": {"duration": 20.341, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=IqRwR1a9ia0", "annotations": [{"segment": [0.6956728971962616, 16.443177570093457], "label": "Plastering"}]}, "aa5jHg4E3O0": {"duration": 202.66, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=aa5jHg4E3O0", "annotations": [{"segment": [0.01, 4.741521060842434], "label": "Drinking coffee"}]}, "m0u1kjhlHJ4": {"duration": 140.295, "subset": "training", "resolution": "624x352", "url": "https://www.youtube.com/watch?v=m0u1kjhlHJ4", "annotations": [{"segment": [0.2, 37.20771328812336], "label": "Hurling"}, {"segment": [37.645451091513046, 123.66092945758645], "label": "Hurling"}, {"segment": [128.25717639317816, 140.09496598639456], "label": "Hurling"}]}, "ehO168THGqU": {"duration": 218.48, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ehO168THGqU", "annotations": [{"segment": [28.66842072, 196.77043621684868], "label": "Skateboarding"}]}, "KU4twxFnX5Q": {"duration": 144.429, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KU4twxFnX5Q", "annotations": [{"segment": [12.615375975039003, 119.84607176287052], "label": "Running a marathon"}]}, "ame0HVidIhA": {"duration": 80.69, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ame0HVidIhA", "annotations": []}, "XhFAi-1uK5Q": {"duration": 148.237, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=XhFAi-1uK5Q", "annotations": []}, "bR62246z9qA": {"duration": 199.81, "subset": "training", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=bR62246z9qA", "annotations": [{"segment": [18.388578783151324, 168.92558814352574], "label": "Cheerleading"}]}, "J_CJSmMFWlg": {"duration": 33.32, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=J_CJSmMFWlg", "annotations": [{"segment": [1.4793915608424337, 32.67638552808112], "label": "Doing kickboxing"}]}, "uvs1pb32xZM": {"duration": 101.634, "subset": "training", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=uvs1pb32xZM", "annotations": [{"segment": [0.01, 101.6336279], "label": "Futsal"}]}, "mtC3trP8ahY": {"duration": 188.31, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mtC3trP8ahY", "annotations": [{"segment": [16.153926099843993, 175.6372328673947], "label": "Playing lacrosse"}]}, "ERmsX91_7V4": {"duration": 94.55, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=ERmsX91_7V4", "annotations": [{"segment": [0, 83.01922464898597], "label": "Layup drill in basketball"}]}, "smh90DBXsBg": {"duration": 129.2, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=smh90DBXsBg", "annotations": [{"segment": [32.33745311466459, 123.00320951092044], "label": "Getting a tattoo"}]}, "bi6C3kyP4mU": {"duration": 213.647, "subset": "training", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=bi6C3kyP4mU", "annotations": [{"segment": [23.331164103707007, 25.664280514077706], "label": "Rafting"}, {"segment": [39.662978976301915, 41.66279304233394], "label": "Rafting"}, {"segment": [126.98819319303385, 131.3211236694366], "label": "Rafting"}, {"segment": [169.9841956127225, 173.3172176599064], "label": "Rafting"}, {"segment": [178.98335746801874, 184.9827996177847], "label": "Rafting"}, {"segment": [190.9822417675507, 194.31526518408737], "label": "Rafting"}]}, "BzUhN9NQf34": {"duration": 120.88, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BzUhN9NQf34", "annotations": []}, "LVub7uinY-4": {"duration": 95.53, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LVub7uinY-4", "annotations": [{"segment": [11.028065535101405, 14.008623787831512], "label": "Cricket"}, {"segment": [21.161963594383774, 24.291549759750392], "label": "Cricket"}, {"segment": [31.14683374102964, 35.617671120124804], "label": "Cricket"}, {"segment": [40.833648062402496, 43.36712257722309], "label": "Cricket"}, {"segment": [58.56796966614665, 62.4426953946958], "label": "Cricket"}]}, "egT7FYHlWho": {"duration": 97.72999999999999, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=egT7FYHlWho", "annotations": [{"segment": [48.7244976599064, 77.85244461778471], "label": "Cleaning windows"}]}, "Scr23rwMV7w": {"duration": 43.77, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Scr23rwMV7w", "annotations": []}, "RblRzlmSFak": {"duration": 35.156, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=RblRzlmSFak", "annotations": [{"segment": [8.33624626521061, 32.68732432449298], "label": "Raking leaves"}]}, "OyeXXxV3MSg": {"duration": 71.15, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=OyeXXxV3MSg", "annotations": [{"segment": [4.99801872074883, 69.30585959438378], "label": "Doing nails"}]}, "It-SVT1ICJM": {"duration": 19.203, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=It-SVT1ICJM", "annotations": [{"segment": [4.329867394695787, 13.70626989079563], "label": "Doing a powerbomb"}]}, "fcXkm-SnJKE": {"duration": 64.738, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fcXkm-SnJKE", "annotations": []}, "baaXPgmu8hA": {"duration": 98.987, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=baaXPgmu8hA", "annotations": []}, "p8_hwwgyoZE": {"duration": 159.15, "subset": "training", "resolution": "524x360", "url": "https://www.youtube.com/watch?v=p8_hwwgyoZE", "annotations": [{"segment": [66.88238805538221, 89.71422368096724], "label": "Playing flauta"}]}, "tAbB24pczrs": {"duration": 208.421, "subset": "validation", "resolution": "1276x720", "url": "https://www.youtube.com/watch?v=tAbB24pczrs", "annotations": [{"segment": [60.79395956880679, 162.24928305247707], "label": "Doing crunches"}]}, "caoMescJ1J4": {"duration": 152.28, "subset": "training", "resolution": "810x480", "url": "https://www.youtube.com/watch?v=caoMescJ1J4", "annotations": [{"segment": [14.725338533541343, 129.44047581903277], "label": "Playing harmonica"}]}, "08P5E8jltVg": {"duration": 77.927, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=08P5E8jltVg", "annotations": []}, "F_-w_z4B3bg": {"duration": 85.891, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=F_-w_z4B3bg", "annotations": [{"segment": [14.600388455538223, 59.74103900156006], "label": "Playing drums"}]}, "dWUEAavKWmI": {"duration": 124.9, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dWUEAavKWmI", "annotations": [{"segment": [0.01, 113.89099769705074], "label": "Ping-pong"}]}, "_GQaltSDMAk": {"duration": 77.509, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_GQaltSDMAk", "annotations": [{"segment": [0.2, 77.50820861678004], "label": "Making a lemonade"}]}, "BIJK3xcjiKE": {"duration": 159.522, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=BIJK3xcjiKE", "annotations": [{"segment": [12.194279771190846, 139.8609230899848], "label": "Futsal"}]}, "eyB-aYaCRWM": {"duration": 37.18, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=eyB-aYaCRWM", "annotations": []}, "hXrNCak63u0": {"duration": 82.71000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=hXrNCak63u0", "annotations": [{"segment": [8.06330041, 35.04297621], "label": "Playing piano"}, {"segment": [45.3493447, 73.08663494], "label": "Playing piano"}]}, "7IHT1wZzJXw": {"duration": 126.108, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=7IHT1wZzJXw", "annotations": []}, "0XDH6N86K4k": {"duration": 117.819, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0XDH6N86K4k", "annotations": []}, "QkqsI11OtC8": {"duration": 14.86, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QkqsI11OtC8", "annotations": [{"segment": [0, 14.685453978159126], "label": "Hopscotch"}]}, "jbSBQL08VHA": {"duration": 169.0, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=jbSBQL08VHA", "annotations": [{"segment": [112.26102426000001, 137.38536224], "label": "Fixing bicycle"}, {"segment": [41.1584887, 104.59207654], "label": "Fixing bicycle"}]}, "MNduaJtXy1A": {"duration": 191.657, "subset": "training", "resolution": "350x240", "url": "https://www.youtube.com/watch?v=MNduaJtXy1A", "annotations": [{"segment": [0, 191.601], "label": "Playing congas"}]}, "lPJkzW5mEqg": {"duration": 92.11, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=lPJkzW5mEqg", "annotations": []}, "xH_1O0mckTU": {"duration": 191.078, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xH_1O0mckTU", "annotations": [{"segment": [15.798878860842434, 20.86644377847114], "label": "Longboarding"}, {"segment": [68.85926446895476, 73.62873733260531], "label": "Longboarding"}, {"segment": [91.81235262527301, 98.96656192074884], "label": "Longboarding"}, {"segment": [127.88149115663028, 128.47767526458657], "label": "Longboarding"}, {"segment": [149.94030315101404, 151.728855474883], "label": "Longboarding"}, {"segment": [160.67161709422777, 163.65253763400935], "label": "Longboarding"}]}, "yhWw7oJrUFo": {"duration": 133.19, "subset": "training", "resolution": "400x224", "url": "https://www.youtube.com/watch?v=yhWw7oJrUFo", "annotations": [{"segment": [29.07915756630265, 120.9043712948518], "label": "Making a cake"}]}, "rMX2KeJa8qI": {"duration": 111.085, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=rMX2KeJa8qI", "annotations": [{"segment": [30.142174726989083, 48.3401232449298], "label": "Croquet"}, {"segment": [78.6609890795632, 107.24923400936038], "label": "Croquet"}]}, "pQQb2FP1ewE": {"duration": 124.065, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pQQb2FP1ewE", "annotations": [{"segment": [64.63687857724739, 114.4169223770379], "label": "Scuba diving"}]}, "8KPk9pH4wWw": {"duration": 87.93, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=8KPk9pH4wWw", "annotations": [{"segment": [27.710867656474967, 35.255905879772605], "label": "Triple jump"}, {"segment": [51.58062381279251, 78.60557831045242], "label": "Triple jump"}]}, "ECzQabG77eM": {"duration": 73.747, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=ECzQabG77eM", "annotations": []}, "lVXgy-JU14Q": {"duration": 111.55, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=lVXgy-JU14Q", "annotations": [{"segment": [3.826867394695788, 102.6296255850234], "label": "Shot put"}]}, "-C9mAKWi9Ho": {"duration": 92.021, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-C9mAKWi9Ho", "annotations": [{"segment": [35.881825273010925, 53.96626521060843], "label": "Doing fencing"}, {"segment": [62.290848673946954, 85.1116895475819], "label": "Doing fencing"}]}, "8tfZXiFd6b4": {"duration": 36.224, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8tfZXiFd6b4", "annotations": []}, "o92ZcNsB-Vw": {"duration": 71.448, "subset": "testing", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=o92ZcNsB-Vw", "annotations": []}, "mNq0NksbsZ4": {"duration": 157.827, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mNq0NksbsZ4", "annotations": [{"segment": [0.20000000000000004, 30.27622932917317], "label": "Longboarding"}, {"segment": [32.98385959438377, 56.61408736349455], "label": "Longboarding"}, {"segment": [60.3063104524181, 97.47468954758192], "label": "Longboarding"}, {"segment": [107.56676599063962, 153.12359126365052], "label": "Longboarding"}]}, "5HqegO_ayw8": {"duration": 119.792, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=5HqegO_ayw8", "annotations": [{"segment": [15.49173634945398, 119.47432137285492], "label": "Playing beach volleyball"}]}, "19LxLS1_Yn0": {"duration": 35.74, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=19LxLS1_Yn0", "annotations": [{"segment": [0.01, 13.33403744149766], "label": "Pole vault"}]}, "ezx7z-kx2v4": {"duration": 111.18, "subset": "training", "resolution": "480x272", "url": "https://www.youtube.com/watch?v=ezx7z-kx2v4", "annotations": [{"segment": [19.33072542901716, 72.90197347893915], "label": "Discus throw"}]}, "t1U8fJVEztQ": {"duration": 13.329, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=t1U8fJVEztQ", "annotations": [{"segment": [0.2, 12.663917321861247], "label": "Running a marathon"}]}, "8m7zkNuqqGA": {"duration": 52.942, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=8m7zkNuqqGA", "annotations": []}, "KNzFJses9jc": {"duration": 151.209, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KNzFJses9jc", "annotations": [{"segment": [0.2, 148.65346333853356], "label": "Welding"}]}, "vY77rUOOgwg": {"duration": 54.521, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vY77rUOOgwg", "annotations": [{"segment": [24.49592851170047, 44.3988704274571], "label": "Installing carpet"}]}, "FPv0qnoQbq0": {"duration": 44.327, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FPv0qnoQbq0", "annotations": [{"segment": [5.463064852886116, 39.90111924196568], "label": "Calf roping"}]}, "UIZN2F8pjzE": {"duration": 30.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UIZN2F8pjzE", "annotations": [{"segment": [2.48435391575663, 4.687460218408737], "label": "Smoking a cigarette"}, {"segment": [24.89041375975039, 26.53102483619345], "label": "Smoking a cigarette"}]}, "1iIG5D3jTk4": {"duration": 96.247, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1iIG5D3jTk4", "annotations": [{"segment": [14.560744149765991, 91.26734477379095], "label": "Rope skipping"}]}, "_uTNqQhQA9w": {"duration": 92.811, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_uTNqQhQA9w", "annotations": [{"segment": [0.01, 71.52609815912636], "label": "Making a cake"}]}, "X487OD6H6LE": {"duration": 83.941, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=X487OD6H6LE", "annotations": [{"segment": [11.256608424336973, 83.681], "label": "Playing congas"}]}, "JuD1OdoXe9Q": {"duration": 140.82999999999998, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=JuD1OdoXe9Q", "annotations": [{"segment": [6.63727077, 48.9803262], "label": "Long jump"}]}, "W_dRFcpPqiM": {"duration": 124.134, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=W_dRFcpPqiM", "annotations": []}, "_i6kvwg1Oyo": {"duration": 57.609, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_i6kvwg1Oyo", "annotations": [{"segment": [0.01, 57.608706], "label": "Throwing darts"}]}, "Khxa5Ey3udM": {"duration": 142.27, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Khxa5Ey3udM", "annotations": [{"segment": [65.35101404056162, 120.39571768], "label": "Mowing the lawn"}, {"segment": [35.27790795631825, 55.72779828], "label": "Mowing the lawn"}]}, "nA9MM4baSdc": {"duration": 143.96, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=nA9MM4baSdc", "annotations": [{"segment": [5.386558502340094, 54.09002496099844], "label": "Playing harmonica"}, {"segment": [56.334424336973484, 84.61385647425898], "label": "Playing harmonica"}, {"segment": [87.30713572542902, 141.39716068642747], "label": "Playing harmonica"}]}, "g7l-Y_bgPkI": {"duration": 27.38, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=g7l-Y_bgPkI", "annotations": [{"segment": [1.06769992199688, 26.906038034321373], "label": "Tennis serve with ball bouncing"}]}, "jRBh9ND4c8s": {"duration": 181.627, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=jRBh9ND4c8s", "annotations": []}, "QOuNt8YH3Rk": {"duration": 167.93, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=QOuNt8YH3Rk", "annotations": [{"segment": [13.098807489714556, 43.22606471605803], "label": "Hammer throw"}, {"segment": [63.922180549807024, 96.40722312429912], "label": "Hammer throw"}, {"segment": [123.3907665397816, 130.72609873322935], "label": "Hammer throw"}, {"segment": [139.89526397503903, 156.3997614102964], "label": "Hammer throw"}]}, "yp1zqqLB57M": {"duration": 180.7, "subset": "training", "resolution": "596x360", "url": "https://www.youtube.com/watch?v=yp1zqqLB57M", "annotations": [{"segment": [10.007435696067299, 111.49129064210187], "label": "Belly dance"}, {"segment": [97.9601099826306, 104.16190111822159], "label": "Belly dance"}, {"segment": [134.60705760203197, 166.46171207120395], "label": "Belly dance"}]}, "n9teiB6FW84": {"duration": 145.636, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=n9teiB6FW84", "annotations": [{"segment": [7.709897196261681, 132.447015576324], "label": "BMX"}]}, "kKUoxGrxSWw": {"duration": 162.66, "subset": "testing", "resolution": "568x360", "url": "https://www.youtube.com/watch?v=kKUoxGrxSWw", "annotations": []}, "GhXniQgRUTY": {"duration": 121.88, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GhXniQgRUTY", "annotations": [{"segment": [47.519890795631824, 120.5104430577223], "label": "Doing step aerobics"}]}, "tldn48dG8Ag": {"duration": 129.916, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=tldn48dG8Ag", "annotations": []}, "2rjIzlwBiKs": {"duration": 148.051, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2rjIzlwBiKs", "annotations": []}, "dDmc6n79ek0": {"duration": 110.736, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=dDmc6n79ek0", "annotations": [{"segment": [18.6575414690057, 98.64311123556942], "label": "Playing ice hockey"}]}, "JzLqc6MhJtc": {"duration": 54.89, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=JzLqc6MhJtc", "annotations": [{"segment": [0, 54.891971], "label": "Getting a tattoo"}]}, "4fgIHu_Se3Q": {"duration": 50.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4fgIHu_Se3Q", "annotations": [{"segment": [1.2710140405616226, 49.64898595943838], "label": "Using parallel bars"}]}, "JlCQlNjvXzA": {"duration": 85.148, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=JlCQlNjvXzA", "annotations": [{"segment": [7.423140186915887, 84.19288940809969], "label": "Elliptical trainer"}]}, "Nvf-BZNEGdI": {"duration": 101.45, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Nvf-BZNEGdI", "annotations": []}, "u6FTTLK6GUg": {"duration": 35.99, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=u6FTTLK6GUg", "annotations": [{"segment": [0.01, 35.457521110764425], "label": "Dodgeball"}]}, "0zrJtIM2XRA": {"duration": 208.96, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=0zrJtIM2XRA", "annotations": []}, "asgLNARjP-A": {"duration": 55.264, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=asgLNARjP-A", "annotations": []}, "O9HLR19is-A": {"duration": 147.285, "subset": "training", "resolution": "328x240", "url": "https://www.youtube.com/watch?v=O9HLR19is-A", "annotations": [{"segment": [0.2, 142.89600936037442], "label": "Riding bumper cars"}]}, "DP9hfhq8sro": {"duration": 18.716, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DP9hfhq8sro", "annotations": [{"segment": [0.01, 15.212920436817473], "label": "Table soccer"}]}, "K2l7ops-tno": {"duration": 109.528, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=K2l7ops-tno", "annotations": [{"segment": [0.01, 109.52852607709751], "label": "Laying tile"}]}, "EWF-801zIRM": {"duration": 212.74, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=EWF-801zIRM", "annotations": []}, "wiOdB3Vgs-4": {"duration": 137.86, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=wiOdB3Vgs-4", "annotations": []}, "T3CHT7rQQVc": {"duration": 116.61, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=T3CHT7rQQVc", "annotations": [{"segment": [9.823670910140406, 73.67753182605304], "label": "Playing guitarra"}]}, "rlirIGuekbw": {"duration": 87.215, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rlirIGuekbw", "annotations": []}, "KWeZbXoX8Ys": {"duration": 10.148, "subset": "training", "resolution": "202x360", "url": "https://www.youtube.com/watch?v=KWeZbXoX8Ys", "annotations": [{"segment": [1.8557847113884554, 9.530555382215288], "label": "Fun sliding down"}]}, "4chhQzVxWkA": {"duration": 65.364, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4chhQzVxWkA", "annotations": [{"segment": [5.4044995460218415, 65.363853], "label": "Swimming"}]}, "1JHqxhajh60": {"duration": 99.85, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=1JHqxhajh60", "annotations": [{"segment": [0.01, 8.644492979719189], "label": "Bathing dog"}, {"segment": [13.628705148205928, 42.91095163806553], "label": "Bathing dog"}, {"segment": [43.84549141965679, 72.66046801872075], "label": "Bathing dog"}, {"segment": [74.21803432137285, 86.99007800312012], "label": "Bathing dog"}, {"segment": [90.10521060842433, 97.73728549141967], "label": "Bathing dog"}]}, "9LhMNDcFW_k": {"duration": 85.636, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=9LhMNDcFW_k", "annotations": [{"segment": [6.543602184087363, 7.87903120124805], "label": "Croquet"}, {"segment": [20.031435257410294, 20.69914976599064], "label": "Croquet"}, {"segment": [26.57503744149766, 31.11549609984399], "label": "Croquet"}, {"segment": [35.655954758190326, 39.52869890795632], "label": "Croquet"}, {"segment": [41.66538533541342, 72.11316692667707], "label": "Croquet"}, {"segment": [76.78716848673946, 82.26242745709828], "label": "Croquet"}]}, "8NfYYT8d3EQ": {"duration": 200.74, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8NfYYT8d3EQ", "annotations": [{"segment": [105.79590143999998, 172.34791488], "label": "Preparing salad"}]}, "n1dboqahvZU": {"duration": 63.83, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=n1dboqahvZU", "annotations": [{"segment": [31.865900530421218, 52.97705963182527], "label": "Clean and jerk"}]}, "VpiAoWvJhB4": {"duration": 225.954, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=VpiAoWvJhB4", "annotations": []}, "O3HFalRZVts": {"duration": 123.51, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=O3HFalRZVts", "annotations": [{"segment": [0, 123.461], "label": "Making a sandwich"}]}, "s80tsQgUczA": {"duration": 173.27, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=s80tsQgUczA", "annotations": []}, "W4tmb8RwzQM": {"duration": 188.152, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W4tmb8RwzQM", "annotations": [{"segment": [16.43141341653666, 180.4713182527301], "label": "Slacklining"}]}, "3wEEaF-9xPU": {"duration": 70.868, "subset": "testing", "resolution": "597x480", "url": "https://www.youtube.com/watch?v=3wEEaF-9xPU", "annotations": []}, "9uitA8tmSfE": {"duration": 206.59, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=9uitA8tmSfE", "annotations": [{"segment": [94.89240249609985, 202.5123088923557], "label": "Plataform diving"}, {"segment": [6.605413416536661, 91.31558574], "label": "Plataform diving"}]}, "d950IKYTYY0": {"duration": 64.343, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=d950IKYTYY0", "annotations": [{"segment": [0, 4.115510472698908], "label": "Having an ice cream"}, {"segment": [13.651449372854916, 63.53946656630266], "label": "Having an ice cream"}]}, "GSo0lqq5zmM": {"duration": 182.184, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=GSo0lqq5zmM", "annotations": [{"segment": [86.97073131904868, 89.52869400490306], "label": "Curling"}, {"segment": [141.25638244149766, 146.08808968798752], "label": "Curling"}]}, "Jl2lDgcsvmA": {"duration": 190.27, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Jl2lDgcsvmA", "annotations": [{"segment": [0.01, 190.26430839002268], "label": "Playing kickball"}]}, "ekzUyQo2R9Y": {"duration": 20.063, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ekzUyQo2R9Y", "annotations": [{"segment": [0.2, 19.812044744548285], "label": "Waterskiing"}]}, "2X4-yp_EFVE": {"duration": 28.259, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2X4-yp_EFVE", "annotations": [{"segment": [0.2, 12.211630563182528], "label": "Arm wrestling"}]}, "sTev5fWX3T8": {"duration": 15.09, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=sTev5fWX3T8", "annotations": [{"segment": [0.023545195007800314, 15.09247], "label": "Using uneven bars"}]}, "fvroOk6TpKk": {"duration": 193.14, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fvroOk6TpKk", "annotations": [{"segment": [10.546051547581904, 181.0907708599064], "label": "Playing lacrosse"}]}, "b0QGh__f7lU": {"duration": 137.648, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=b0QGh__f7lU", "annotations": [{"segment": [8.804311290818978, 137.64789115646258], "label": "Playing congas"}]}, "ZRwdRQ2iSFY": {"duration": 60.49, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZRwdRQ2iSFY", "annotations": []}, "34NXXCSWyzE": {"duration": 151.836, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=34NXXCSWyzE", "annotations": [{"segment": [24.04256048337172, 90.84061029924192], "label": "Calf roping"}]}, "y3LlQBDXAHI": {"duration": 37.94, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=y3LlQBDXAHI", "annotations": [{"segment": [2.9013198127925115, 21.611872074882996], "label": "Hammer throw"}]}, "QMFtjdg6d2A": {"duration": 42.447, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=QMFtjdg6d2A", "annotations": [{"segment": [0.01, 34.23486892355694], "label": "Bullfighting"}]}, "yeUuZ9vk5gE": {"duration": 63.182, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=yeUuZ9vk5gE", "annotations": [{"segment": [5.142675655887877, 34.65716163719629], "label": "Baking cookies"}]}, "2iW1Eq9SDW4": {"duration": 26.262, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2iW1Eq9SDW4", "annotations": [{"segment": [0.01, 26.261768707482993], "label": "Playing beach volleyball"}]}, "7bwruwic1CI": {"duration": 31.88, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7bwruwic1CI", "annotations": []}, "lEYr4d4vBWc": {"duration": 206.426, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=lEYr4d4vBWc", "annotations": [{"segment": [52.81398608325285, 180.3404386895476], "label": "Fixing the roof"}]}, "CvsFEsXakwo": {"duration": 185.76, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CvsFEsXakwo", "annotations": [{"segment": [47.23684997035882, 51.583799354134165], "label": "Playing drums"}, {"segment": [67.23281713572543, 71.8695631450858], "label": "Playing drums"}, {"segment": [72.44915639625584, 74.18793614976599], "label": "Playing drums"}, {"segment": [87.80837755226209, 91.57573368486739], "label": "Playing drums"}]}, "QeH6IOAjy-4": {"duration": 40.52, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=QeH6IOAjy-4", "annotations": [{"segment": [1.294335652106084, 38.987915374414975], "label": "Clean and jerk"}]}, "9T1C2CW_P0A": {"duration": 171.62, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=9T1C2CW_P0A", "annotations": [{"segment": [0.01, 0.02], "label": "Walking the dog"}]}, "6YmUCNIMpB4": {"duration": 27.864, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=6YmUCNIMpB4", "annotations": [{"segment": [0.2, 27.663197278911564], "label": "Trimming branches or hedges"}]}, "xM5OQYON59w": {"duration": 46.65, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=xM5OQYON59w", "annotations": [{"segment": [10.53704368174727, 23.108826833073323], "label": "Shot put"}]}, "GqrEVe0c6PQ": {"duration": 109.877, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GqrEVe0c6PQ", "annotations": [{"segment": [10.456296823712949, 103.19165062090484], "label": "Making a lemonade"}]}, "2_tzemKY72E": {"duration": 82.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2_tzemKY72E", "annotations": [{"segment": [16.867490639625586, 59.85245943837754], "label": "Using uneven bars"}]}, "-WrOnvkUTXg": {"duration": 36.455, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=-WrOnvkUTXg", "annotations": [{"segment": [1.606723450812224, 30.912452422774837], "label": "Kneeling"}]}, "fzp5ooc727c": {"duration": 148.282, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=fzp5ooc727c", "annotations": [{"segment": [0.01, 148.221], "label": "Hurling"}]}, "0NaL4it1TVk": {"duration": 176.29, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0NaL4it1TVk", "annotations": []}, "1mYtNMDFyXQ": {"duration": 110.34, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=1mYtNMDFyXQ", "annotations": [{"segment": [7.918401248049922, 42.00195444617785], "label": "Drinking coffee"}]}, "xIhTY02lRSE": {"duration": 64.02, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xIhTY02lRSE", "annotations": [{"segment": [4.094692898595944, 47.53838584711389], "label": "Fixing bicycle"}]}, "5zT1GWfmVLU": {"duration": 32.183, "subset": "validation", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=5zT1GWfmVLU", "annotations": [{"segment": [8.660753248829954, 17.99930457800312], "label": "Doing a powerbomb"}]}, "kl4vLrvGAmM": {"duration": 180.141, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kl4vLrvGAmM", "annotations": [{"segment": [41.31145085803432, 146.97883536565953], "label": "Laying tile"}]}, "syAccI5soVw": {"duration": 202.99, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=syAccI5soVw", "annotations": [{"segment": [0.01, 202.921], "label": "Belly dance"}]}, "6fjc0Brqa7A": {"duration": 126.689, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6fjc0Brqa7A", "annotations": [{"segment": [29.843835758190323, 38.737694096723864], "label": "Longboarding"}, {"segment": [54.351356513260534, 113.84138673322933], "label": "Longboarding"}]}, "M96TST6CN4M": {"duration": 87.725, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=M96TST6CN4M", "annotations": [{"segment": [25.239691390228977, 38.337045840562766], "label": "Sumo"}, {"segment": [48.43292231169973, 57.43735332569662], "label": "Sumo"}, {"segment": [66.44178433969351, 69.3068305714198], "label": "Sumo"}, {"segment": [84.4506463676873, 87.524987], "label": "Sumo"}]}, "h7iCyiNUxeE": {"duration": 6.92, "subset": "validation", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=h7iCyiNUxeE", "annotations": [{"segment": [0.06210848, 0.21744632], "label": "Washing dishes"}]}, "oZTFplEHVDo": {"duration": 19.74, "subset": "validation", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=oZTFplEHVDo", "annotations": [{"segment": [5.2344511700468015, 19.305887550702028], "label": "Using uneven bars"}]}, "o4z1nEiyr4E": {"duration": 22.176, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o4z1nEiyr4E", "annotations": [{"segment": [5.152438370380817, 19.05091656352723], "label": "Rope skipping"}]}, "4kURX9KFGj4": {"duration": 15.4, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4kURX9KFGj4", "annotations": []}, "LWAxCIy4PBw": {"duration": 69.591, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LWAxCIy4PBw", "annotations": [{"segment": [0.01, 69.59020408163265], "label": "Mooping floor"}]}, "nbgE4MAR0pc": {"duration": 20.39, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=nbgE4MAR0pc", "annotations": [{"segment": [1.6380954500000002, 16.622540450000002], "label": "Pole vault"}]}, "M3jMJB_t024": {"duration": 78.554, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=M3jMJB_t024", "annotations": [{"segment": [0.01, 77.01140873343073], "label": "Table soccer"}]}, "c-TD6-GESQk": {"duration": 52.15, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=c-TD6-GESQk", "annotations": [{"segment": [27.784516184867396, 43.649759687207485], "label": "Clean and jerk"}]}, "sy-xNiKnfBU": {"duration": 201.92000000000002, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sy-xNiKnfBU", "annotations": [{"segment": [0.01, 200.03067202960227], "label": "Windsurfing"}]}, "N-6-MdxRg50": {"duration": 134.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=N-6-MdxRg50", "annotations": [{"segment": [8.182680898595944, 127.35608475507019], "label": "Zumba"}]}, "3baH1vI0Cxk": {"duration": 94.738, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3baH1vI0Cxk", "annotations": [{"segment": [22.686663695119233, 33.62355694227769], "label": "Table soccer"}, {"segment": [35.101515489190994, 39.23979942054825], "label": "Table soccer"}, {"segment": [51.95024292400268, 74.85860040115891], "label": "Table soccer"}, {"segment": [80.62263873412078, 83.2829649976599], "label": "Table soccer"}]}, "hJI5PEg2UuA": {"duration": 92.695, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hJI5PEg2UuA", "annotations": [{"segment": [39.15802028081123, 45.80476911076443], "label": "Sharpening knives"}]}, "MaYV3jnGr6g": {"duration": 160.961, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=MaYV3jnGr6g", "annotations": [{"segment": [49.970646488444565, 55.74614778159127], "label": "Putting in contact lenses"}, {"segment": [76.08595845865834, 82.36367780343213], "label": "Putting in contact lenses"}]}, "4HxmQpkryjA": {"duration": 66.57, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=4HxmQpkryjA", "annotations": [{"segment": [10.860500780031199, 13.45870670826833], "label": "Playing water polo"}, {"segment": [18.655118564742587, 21.772965678627145], "label": "Playing water polo"}, {"segment": [27.800803432137283, 30.191152886115443], "label": "Playing water polo"}, {"segment": [35.59542121684867, 41.20754602184087], "label": "Playing water polo"}]}, "GUSlicDnqIA": {"duration": 101.169, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GUSlicDnqIA", "annotations": []}, "sn1OtmEJxng": {"duration": 155.365, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sn1OtmEJxng", "annotations": [{"segment": [36.124166775111526, 39.75418351701078], "label": "Running a marathon"}, {"segment": [46.04621253630282, 47.74022034918914], "label": "Running a marathon"}, {"segment": [53.30624602010132, 55.242254949114255], "label": "Running a marathon"}, {"segment": [58.63027057488689, 59.84027615551998], "label": "Running a marathon"}, {"segment": [73.39233865861051, 75.57034870375006], "label": "Running a marathon"}, {"segment": [81.62037660691549, 85.25039334881474], "label": "Running a marathon"}, {"segment": [89.12241120684061, 91.78442348423339], "label": "Running a marathon"}, {"segment": [120.09855287033965, 124.45457291706862], "label": "Running a marathon"}]}, "yTJCrP0HqEE": {"duration": 147.888, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yTJCrP0HqEE", "annotations": [{"segment": [74.86680293334182, 79.71180404767212], "label": "Putting in contact lenses"}]}, "9i7ABIB99Ow": {"duration": 171.29, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=9i7ABIB99Ow", "annotations": [{"segment": [22.67062855, 163.6327346723869], "label": "Preparing pasta"}]}, "7OTqYfUuAIw": {"duration": 31.58, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7OTqYfUuAIw", "annotations": [{"segment": [6.27506309, 20.6747713], "label": "Doing kickboxing"}]}, "HO36c47J3vU": {"duration": 87.168, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HO36c47J3vU", "annotations": []}, "hXaydigjzDE": {"duration": 66.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=hXaydigjzDE", "annotations": []}, "rrXdhD3WDy4": {"duration": 94.46000000000001, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=rrXdhD3WDy4", "annotations": [{"segment": [6.407031936037442, 78.44453526000001], "label": "Getting a piercing"}]}, "coYI2JYy6os": {"duration": 8.104, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=coYI2JYy6os", "annotations": []}, "cJcr6kWylI8": {"duration": 145.5, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cJcr6kWylI8", "annotations": [{"segment": [3.631722783151326, 30.64266098283931], "label": "Polishing forniture"}, {"segment": [71.04557694539781, 94.1978096879875], "label": "Polishing forniture"}]}, "6df3LoaxV4Q": {"duration": 180.91, "subset": "testing", "resolution": "426x236", "url": "https://www.youtube.com/watch?v=6df3LoaxV4Q", "annotations": []}, "gfwSnetklbE": {"duration": 118.956, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=gfwSnetklbE", "annotations": [{"segment": [0.2, 92.13063690960831], "label": "Removing ice from car"}]}, "Msj_JClcTZI": {"duration": 200.551, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Msj_JClcTZI", "annotations": []}, "99PtaOQbBIY": {"duration": 150.652, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=99PtaOQbBIY", "annotations": [{"segment": [51.219669945981515, 59.44301145107029], "label": "Clipping cat claws"}, {"segment": [109.01801175663027, 116.7714479375975], "label": "Clipping cat claws"}]}, "v621l04N1QQ": {"duration": 69.359, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=v621l04N1QQ", "annotations": [{"segment": [0.2, 66.22012142901715], "label": "Waterskiing"}]}, "JHFjlLtpIcU": {"duration": 42.17, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JHFjlLtpIcU", "annotations": [{"segment": [10.525413451912225, 40.391272549141966], "label": "Using uneven bars"}]}, "OZpxc1X8Bsc": {"duration": 71.03, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OZpxc1X8Bsc", "annotations": [{"segment": [5.429738256196914, 68.03794346333854], "label": "Rock climbing"}]}, "Nn9qwOB-g34": {"duration": 126.642, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Nn9qwOB-g34", "annotations": [{"segment": [7.9027269890795635, 126.64120181405896], "label": "Futsal"}]}, "VkdPyMG-Gvo": {"duration": 212.347, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=VkdPyMG-Gvo", "annotations": [{"segment": [0.01, 189.9872597096807], "label": "Playing pool"}]}, "NdFbQ0l3R9k": {"duration": 88.91, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=NdFbQ0l3R9k", "annotations": [{"segment": [16.505765297278558, 56.036379664710395], "label": "Using the balance beam"}]}, "mW23e1rzy-k": {"duration": 126.29, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mW23e1rzy-k", "annotations": []}, "B5s4uYNIM24": {"duration": 146.31, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=B5s4uYNIM24", "annotations": [{"segment": [16.890531747269893, 123.7117325273011], "label": "Cleaning windows"}]}, "jLykQW_-IVw": {"duration": 88.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jLykQW_-IVw", "annotations": [{"segment": [10.82045328, 84.68305090405616], "label": "Smoking a cigarette"}]}, "IRBrTKrVCxo": {"duration": 24.92, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IRBrTKrVCxo", "annotations": []}, "7cpX1j9dwjA": {"duration": 92.16, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7cpX1j9dwjA", "annotations": [{"segment": [7.758140405616225, 85.48507020280812], "label": "Bullfighting"}]}, "DhBaapZUYjA": {"duration": 55.22, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DhBaapZUYjA", "annotations": []}, "jW1isCO6MYk": {"duration": 26.123, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jW1isCO6MYk", "annotations": [{"segment": [5.646998459667692, 23.52239122952836], "label": "Using the monkey bar"}]}, "XSNenkxgryQ": {"duration": 188.18, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=XSNenkxgryQ", "annotations": [{"segment": [9.981175310452418, 27.888578073322936], "label": "Smoking a cigarette"}, {"segment": [62.52912768018721, 85.72068207800312], "label": "Smoking a cigarette"}]}, "5FwNVOIGdVs": {"duration": 96.387, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=5FwNVOIGdVs", "annotations": [{"segment": [15.036762286818004, 86.31101529485179], "label": "Blowing leaves"}]}, "mUsjm4oBBvw": {"duration": 232.15, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mUsjm4oBBvw", "annotations": [{"segment": [38.563744039001556, 192.81872019500778], "label": "Getting a haircut"}]}, "RpgTxW7lYJM": {"duration": 221.98, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RpgTxW7lYJM", "annotations": [{"segment": [4.501021840873635, 213.97165366614666], "label": "Painting"}]}, "EsVxUf4Ae2c": {"duration": 85.171, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EsVxUf4Ae2c", "annotations": [{"segment": [11.42697375351014, 47.30235646801872], "label": "Ice fishing"}]}, "bWEx3UqVvXU": {"duration": 115.218, "subset": "testing", "resolution": "636x360", "url": "https://www.youtube.com/watch?v=bWEx3UqVvXU", "annotations": []}, "cqxiLraI8uY": {"duration": 24.5, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cqxiLraI8uY", "annotations": []}, "08wKlw4a_so": {"duration": 25.31, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=08wKlw4a_so", "annotations": [{"segment": [0.2, 25.109750566893425], "label": "Raking leaves"}]}, "koSLx0E23gU": {"duration": 122.49, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=koSLx0E23gU", "annotations": [{"segment": [5.732533138060217, 98.59956939781591], "label": "Tumbling"}]}, "iHm8ZXs2XdY": {"duration": 222.541, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iHm8ZXs2XdY", "annotations": [{"segment": [3.818939929797192, 208.3058143525741], "label": "Braiding hair"}]}, "Svr-YN9ujkA": {"duration": 225.12, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Svr-YN9ujkA", "annotations": [{"segment": [17.384265656340542, 43.72406210534135], "label": "Doing nails"}, {"segment": [83.40935542183591, 146.2736696134512], "label": "Doing nails"}, {"segment": [185.6077645577223, 213.35234998829955], "label": "Doing nails"}]}, "TdAfqkmTrf0": {"duration": 115.891, "subset": "training", "resolution": "176x144", "url": "https://www.youtube.com/watch?v=TdAfqkmTrf0", "annotations": [{"segment": [6.308174445044187, 18.203594022792384], "label": "Hurling"}, {"segment": [20.907098494021, 31.901350010350704], "label": "Hurling"}, {"segment": [35.50602263865552, 44.1572369465871], "label": "Hurling"}, {"segment": [51.366582203196735, 68.66901081905988], "label": "Hurling"}, {"segment": [79.48302870397434, 84.1691031207706], "label": "Hurling"}, {"segment": [98.76802726540514, 107.23900794192147], "label": "Hurling"}]}, "vSV7arHrH5k": {"duration": 152.05, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=vSV7arHrH5k", "annotations": [{"segment": [26.97108516, 135.7676898], "label": "Hand washing clothes"}]}, "K1TizK5Sg78": {"duration": 221.22, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K1TizK5Sg78", "annotations": [{"segment": [21.57759786, 116.67746922], "label": "Smoking a cigarette"}]}, "Z5EajmUZLbk": {"duration": 106.534, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Z5EajmUZLbk", "annotations": []}, "Yd3G3732WbI": {"duration": 93.37, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=Yd3G3732WbI", "annotations": [{"segment": [19.14217394695788, 28.458517160686426], "label": "Wrapping presents"}, {"segment": [29.768627925117006, 32.679985179407176], "label": "Wrapping presents"}, {"segment": [34.57236739469579, 39.23053900156006], "label": "Wrapping presents"}, {"segment": [41.122921216848674, 47.382339313572544], "label": "Wrapping presents"}, {"segment": [49.420289391575665, 61.64798985959438], "label": "Wrapping presents"}, {"segment": [66.16059360374415, 78.9705655226209], "label": "Wrapping presents"}]}, "TTDruR5Vin4": {"duration": 173.25, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=TTDruR5Vin4", "annotations": [{"segment": [7.971903276131044, 167.9504368174727], "label": "Doing motocross"}]}, "50b9lVikSeo": {"duration": 36.13, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=50b9lVikSeo", "annotations": [{"segment": [14.654858221528862, 26.94239319188768], "label": "Vacuuming floor"}]}, "x7-2_HigN8c": {"duration": 74.235, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x7-2_HigN8c", "annotations": [{"segment": [13.560200963800924, 18.185385761308712], "label": "Throwing darts"}, {"segment": [33.911014072835194, 47.902198085296256], "label": "Throwing darts"}, {"segment": [55.6493826211218, 62.240270957570395], "label": "Throwing darts"}]}, "rvqULxGTSZw": {"duration": 9.31, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rvqULxGTSZw", "annotations": []}, "dEG-OgH9zmU": {"duration": 94.32, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=dEG-OgH9zmU", "annotations": [{"segment": [0.01, 94.319455], "label": "Scuba diving"}]}, "4w7sVSMbjyM": {"duration": 189.985, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4w7sVSMbjyM", "annotations": [{"segment": [10.670023241745996, 24.303941828421433], "label": "Snowboarding"}, {"segment": [29.638953449294434, 32.602848794223874], "label": "Snowboarding"}, {"segment": [55.4248429501806, 62.53819177801126], "label": "Snowboarding"}, {"segment": [68.7623720023631, 82.39628989079563], "label": "Snowboarding"}, {"segment": [88.91685959438378, 95.14103976599064], "label": "Snowboarding"}, {"segment": [107.58940010920436, 119.74137092043682], "label": "Snowboarding"}, {"segment": [152.9369985023401, 170.42398088923557], "label": "Snowboarding"}, {"segment": [174.86982386895477, 177.53732965678628], "label": "Snowboarding"}]}, "AKoregkLJgc": {"duration": 84.15, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AKoregkLJgc", "annotations": [{"segment": [0.01, 84.081], "label": "Rope skipping"}]}, "AwoZxz8M8Jg": {"duration": 179.607, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AwoZxz8M8Jg", "annotations": [{"segment": [34.464136075647104, 47.9135550319972], "label": "Making a lemonade"}, {"segment": [62.483758901376454, 105.35378182474237], "label": "Making a lemonade"}, {"segment": [116.28143472677681, 159.43165387840003], "label": "Making a lemonade"}]}, "wMHwxGyHVaU": {"duration": 88.24, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wMHwxGyHVaU", "annotations": []}, "7Zd7KlliqQw": {"duration": 125.97, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7Zd7KlliqQw", "annotations": [{"segment": [48.14699217628706, 101.0104244024961], "label": "Getting a haircut"}]}, "fNPEa0Sd4hM": {"duration": 105.65, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fNPEa0Sd4hM", "annotations": [{"segment": [11.612152886115444, 21.988970358814353], "label": "Triple jump"}, {"segment": [35.495304212168485, 43.401450858034316], "label": "Triple jump"}]}, "99KPQLxH4cw": {"duration": 176.867, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=99KPQLxH4cw", "annotations": [{"segment": [0.20000000000000004, 28.133700468018716], "label": "Snowboarding"}]}, "K-Jzu2AmuW4": {"duration": 170.04, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=K-Jzu2AmuW4", "annotations": [{"segment": [2.9173853354134165, 167.35183151326052], "label": "Zumba"}]}, "cvFFwMKFg7Q": {"duration": 7.617, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=cvFFwMKFg7Q", "annotations": [{"segment": [0.2, 7.416144], "label": "Raking leaves"}]}, "heLlfK--Bx0": {"duration": 80.922, "subset": "training", "resolution": "1920x1072", "url": "https://www.youtube.com/watch?v=heLlfK--Bx0", "annotations": [{"segment": [8.702073322932916, 76.31966458658347], "label": "Rope skipping"}]}, "aDsRltSdQak": {"duration": 147.77, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=aDsRltSdQak", "annotations": [{"segment": [33.42721371439892, 135.32258226193449], "label": "Windsurfing"}]}, "WLpjci5dN8s": {"duration": 109.552, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=WLpjci5dN8s", "annotations": [{"segment": [18.946020280811236, 49.49861154446178], "label": "Sharpening knives"}]}, "8kNk_fzmTyI": {"duration": 84.312, "subset": "training", "resolution": "853x474", "url": "https://www.youtube.com/watch?v=8kNk_fzmTyI", "annotations": [{"segment": [37.069908020272614, 39.30245964112864], "label": "Swimming"}, {"segment": [45.21215510810048, 47.707359108927626], "label": "Swimming"}]}, "ByIIq3jFOKo": {"duration": 193.027, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ByIIq3jFOKo", "annotations": [{"segment": [20.527278561395352, 138.98962793728276], "label": "Rafting"}, {"segment": [175.37019716185736, 181.98484611178], "label": "Rafting"}]}, "n7AaZhEOJSY": {"duration": 121.05, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=n7AaZhEOJSY", "annotations": [{"segment": [5.330666697465914, 113.36665180961545], "label": "Ballet"}]}, "gD3VQ0Kn7Fg": {"duration": 38.36, "subset": "testing", "resolution": "640x350", "url": "https://www.youtube.com/watch?v=gD3VQ0Kn7Fg", "annotations": []}, "6U081DbNJIY": {"duration": 121.02, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=6U081DbNJIY", "annotations": [{"segment": [23.033905095163806, 37.19409265366615], "label": "Javelin throw"}, {"segment": [39.27092016224649, 51.35428021216849], "label": "Javelin throw"}, {"segment": [66.83608527613104, 72.50016029953198], "label": "Javelin throw"}, {"segment": [80.42986533229329, 120.822403], "label": "Javelin throw"}]}, "S5Aj9b5T284": {"duration": 185.737, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=S5Aj9b5T284", "annotations": [{"segment": [0.2, 176.68683307332293], "label": "Table soccer"}]}, "Ii3jLIcf92s": {"duration": 31.115, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Ii3jLIcf92s", "annotations": [{"segment": [0.2, 30.093367601246104], "label": "Playing drums"}]}, "7ToBC5eGg38": {"duration": 9.08, "subset": "training", "resolution": "160x120", "url": "https://www.youtube.com/watch?v=7ToBC5eGg38", "annotations": [{"segment": [0, 3.895047659906396], "label": "Tennis serve with ball bouncing"}]}, "CQocaUwWcQI": {"duration": 37.99, "subset": "validation", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=CQocaUwWcQI", "annotations": [{"segment": [0.01, 9.422881741029641], "label": "Long jump"}, {"segment": [10.608149884555383, 18.253129410296413], "label": "Long jump"}, {"segment": [19.438397553822153, 27.379694115444618], "label": "Long jump"}, {"segment": [28.564962258970358, 37.573000149765996], "label": "Long jump"}]}, "qBEdCDpoeTc": {"duration": 145.36, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=qBEdCDpoeTc", "annotations": [{"segment": [18.814326053042123, 132.3803182527301], "label": "Shot put"}]}, "6dJrQV3Jzbw": {"duration": 162.75, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=6dJrQV3Jzbw", "annotations": [{"segment": [2.7928760334086835, 159.19393390429497], "label": "Playing lacrosse"}]}, "R_AsoAmxd4o": {"duration": 220.079, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=R_AsoAmxd4o", "annotations": [{"segment": [6.864929797191888, 213.1560702028081], "label": "Waterskiing"}]}, "0DMrpd7yoAc": {"duration": 19.018, "subset": "testing", "resolution": "427x240", "url": "https://www.youtube.com/watch?v=0DMrpd7yoAc", "annotations": []}, "ODUY_0YRmrA": {"duration": 191.495, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ODUY_0YRmrA", "annotations": []}, "RfchfRzuV8I": {"duration": 52.15, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=RfchfRzuV8I", "annotations": [{"segment": [10.887447737909516, 45.74353042121685], "label": "Playing bagpipes"}]}, "GPX1sFm4kwg": {"duration": 158.988, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=GPX1sFm4kwg", "annotations": []}, "zH-fOiPLLxA": {"duration": 39.567, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zH-fOiPLLxA", "annotations": [{"segment": [0.2, 35.93922897196262], "label": "Cleaning sink"}]}, "1qU2CdUQbw0": {"duration": 215.39, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1qU2CdUQbw0", "annotations": [{"segment": [0.01, 153.2247486552262], "label": "Doing kickboxing"}]}, "ta9cB1q4AzU": {"duration": 179.026, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ta9cB1q4AzU", "annotations": [{"segment": [120.61561310452419, 166.97260842433698], "label": "Having an ice cream"}]}, "h_r8gioFi4k": {"duration": 171.32, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=h_r8gioFi4k", "annotations": []}, "t0YXK4W_L7o": {"duration": 93.03999999999999, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t0YXK4W_L7o", "annotations": [{"segment": [5.515772973776094, 51.09347596761013], "label": "Horseback riding"}, {"segment": [53.12560230889235, 93.04238], "label": "Horseback riding"}]}, "aOxRwTlkY80": {"duration": 156.67000000000002, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aOxRwTlkY80", "annotations": []}, "mHe2twk5BDE": {"duration": 237.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mHe2twk5BDE", "annotations": [{"segment": [48.23157693654685, 111.30363908433888], "label": "Playing water polo"}]}, "Xl2kHRcZADw": {"duration": 73.59, "subset": "testing", "resolution": "634x360", "url": "https://www.youtube.com/watch?v=Xl2kHRcZADw", "annotations": []}, "Sd7oYdE2uyw": {"duration": 161.565, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Sd7oYdE2uyw", "annotations": []}, "Y-dpHbPGQWE": {"duration": 233.92000000000002, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=Y-dpHbPGQWE", "annotations": [{"segment": [8.755569397815913, 215.24108102964118], "label": "Playing guitarra"}]}, "ZqwgoryWdg4": {"duration": 70.26, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ZqwgoryWdg4", "annotations": []}, "JAO5R6VA8r4": {"duration": 40.798, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JAO5R6VA8r4", "annotations": [{"segment": [5.0871762870514825, 34.592798751950085], "label": "Rope skipping"}]}, "ZBoa0UN86Qw": {"duration": 130.73, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ZBoa0UN86Qw", "annotations": [{"segment": [12.48713856, 37.81798049921997], "label": "Layup drill in basketball"}, {"segment": [45.157319032761315, 82.87336427457099], "label": "Layup drill in basketball"}, {"segment": [87.56238611544462, 99.99848751950078], "label": "Layup drill in basketball"}]}, "j-FGgYaQVio": {"duration": 152.532, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=j-FGgYaQVio", "annotations": [{"segment": [15.94324634481978, 145.63084594071762], "label": "Grooming dog"}]}, "Fb4X1PWfOvY": {"duration": 28.35, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Fb4X1PWfOvY", "annotations": []}, "4DmsWE6byuw": {"duration": 114.89, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=4DmsWE6byuw", "annotations": [{"segment": [10.575113425897037, 87.28949556630266], "label": "Breakdancing"}]}, "LWSHCLmbmc8": {"duration": 88.26, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=LWSHCLmbmc8", "annotations": []}, "_W8m7v1Ir5I": {"duration": 119.049, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_W8m7v1Ir5I", "annotations": [{"segment": [65.73078627145085, 113.07923400936038], "label": "Running a marathon"}]}, "XuSWH1lfcvI": {"duration": 57.63, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XuSWH1lfcvI", "annotations": [{"segment": [8.001867617561846, 46.84239358145755], "label": "Using parallel bars"}]}, "2VZumBKJuPs": {"duration": 147.1, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2VZumBKJuPs", "annotations": []}, "qX-cRt1Ma3w": {"duration": 52.663, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qX-cRt1Ma3w", "annotations": []}, "lcOepzWnOO4": {"duration": 180.49, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=lcOepzWnOO4", "annotations": [{"segment": [20.88150702028081, 176.36407956318251], "label": "Cumbia"}]}, "7LhKz7863kg": {"duration": 20.573, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7LhKz7863kg", "annotations": [{"segment": [1.3519371504739066, 19.58128294486643], "label": "Doing crunches"}]}, "c81l0bAPiFo": {"duration": 57.47, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=c81l0bAPiFo", "annotations": [{"segment": [0.01, 56.50550858034322], "label": "Canoeing"}]}, "P00O62PPzNU": {"duration": 136.069, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=P00O62PPzNU", "annotations": [{"segment": [13.89713806552262, 28.11253120124805], "label": "Tug of war"}, {"segment": [84.54976365054603, 120.83084165366616], "label": "Tug of war"}]}, "3QC7SNC7ivY": {"duration": 24.056, "subset": "testing", "resolution": "543x360", "url": "https://www.youtube.com/watch?v=3QC7SNC7ivY", "annotations": []}, "7EEpIeXhO54": {"duration": 140.27, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7EEpIeXhO54", "annotations": [{"segment": [0, 30.182111069999998], "label": "Plataform diving"}, {"segment": [38.88031578, 135.19352574102965], "label": "Plataform diving"}]}, "hniQpwn3Ob4": {"duration": 157.76, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hniQpwn3Ob4", "annotations": [{"segment": [5.657820592823714, 155.46707020280812], "label": "Shot put"}]}, "rDxEl9bPodU": {"duration": 73.52, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rDxEl9bPodU", "annotations": [{"segment": [25.891309765004667, 72.00467892388318], "label": "Horseback riding"}]}, "kxj4wRdROQg": {"duration": 85.009, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=kxj4wRdROQg", "annotations": []}, "UQObeXnSDzc": {"duration": 112.966, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UQObeXnSDzc", "annotations": []}, "JowsnupmNd0": {"duration": 19.064, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=JowsnupmNd0", "annotations": [{"segment": [0.2527932192117617, 16.22337659764894], "label": "Using the monkey bar"}]}, "Wgm-mYzdnxc": {"duration": 32.694, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Wgm-mYzdnxc", "annotations": [{"segment": [0.2, 11.107797191887675], "label": "Baking cookies"}, {"segment": [14.878333853354134, 31.540029641185647], "label": "Baking cookies"}]}, "njPRhj-YgE0": {"duration": 110.969, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=njPRhj-YgE0", "annotations": [{"segment": [28.391213098907954, 36.35460213884555], "label": "Grooming dog"}, {"segment": [50.203974382215286, 59.20606634040562], "label": "Grooming dog"}, {"segment": [67.68880683946958, 72.88232143073323], "label": "Grooming dog"}, {"segment": [89.15533381669267, 90.88650534711388], "label": "Grooming dog"}]}, "jwd8ELi0au8": {"duration": 123.925, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=jwd8ELi0au8", "annotations": [{"segment": [7.721177442939792, 93.23321762349799], "label": "Throwing darts"}]}, "HhAXDtayrAY": {"duration": 154.79, "subset": "testing", "resolution": "540x360", "url": "https://www.youtube.com/watch?v=HhAXDtayrAY", "annotations": []}, "tS2d90ZGmeA": {"duration": 137.579, "subset": "validation", "resolution": "855x470", "url": "https://www.youtube.com/watch?v=tS2d90ZGmeA", "annotations": [{"segment": [6.437176287051482, 14.16178783151326], "label": "Running a marathon"}, {"segment": [120.160624024961, 127.02694539781591], "label": "Running a marathon"}]}, "0k0xNs8Zjb4": {"duration": 116.53999999999999, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0k0xNs8Zjb4", "annotations": []}, "IiCN1md2MV4": {"duration": 12.19, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=IiCN1md2MV4", "annotations": [{"segment": [0.3518311817472699, 11.610428997659906], "label": "Playing racquetball"}]}, "W_LjBXWG7wI": {"duration": 99.684, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=W_LjBXWG7wI", "annotations": [{"segment": [12.904622464898598, 32.028340093603745], "label": "Playing beach volleyball"}]}, "x2uByejBIiY": {"duration": 228.57999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=x2uByejBIiY", "annotations": [{"segment": [14.620384673890355, 196.48370622716064], "label": "Preparing pasta"}]}, "iOa_svsqGxQ": {"duration": 169.32, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=iOa_svsqGxQ", "annotations": [{"segment": [26.364641744548283, 152.91492211838005], "label": "Surfing"}]}, "HMIv7qpDmH0": {"duration": 37.593, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=HMIv7qpDmH0", "annotations": [{"segment": [0.01, 36.004734357186194], "label": "Playing rubik cube"}]}, "TN73eY7B208": {"duration": 193.52, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TN73eY7B208", "annotations": [{"segment": [0.01, 192.60941409984397], "label": "Zumba"}]}, "SLv4rEkiYaU": {"duration": 73.631, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=SLv4rEkiYaU", "annotations": [{"segment": [35.20708416908105, 40.95049104821335], "label": "River tubing"}, {"segment": [49.68046950449446, 68.8634473673947], "label": "River tubing"}]}, "S4frK_Og5Yc": {"duration": 27.748, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=S4frK_Og5Yc", "annotations": []}, "B53aHHzgTzc": {"duration": 151.35, "subset": "validation", "resolution": "1920x1440", "url": "https://www.youtube.com/watch?v=B53aHHzgTzc", "annotations": [{"segment": [5.902795865834634, 12.277815400936039], "label": "Triple jump"}, {"segment": [21.722288786271452, 29.986202998439943], "label": "Triple jump"}, {"segment": [34.94455152574103, 43.91680124180967], "label": "Triple jump"}, {"segment": [48.87514976911076, 57.13906398127926], "label": "Triple jump"}, {"segment": [65.40297819344774, 81.22247111388455], "label": "Triple jump"}, {"segment": [87.59749064898597, 96.09751669578783], "label": "Triple jump"}, {"segment": [98.45863504212168, 109.08366760062403], "label": "Triple jump"}, {"segment": [112.15312145085804, 126.31983152886116], "label": "Triple jump"}, {"segment": [131.51429189079565, 149.22267948829955], "label": "Triple jump"}]}, "WkiBckpLXfU": {"duration": 65.016, "subset": "training", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=WkiBckpLXfU", "annotations": [{"segment": [12.678602382190524, 17.64861451600921], "label": "Playing ice hockey"}, {"segment": [34.28294030577223, 37.1229471950078], "label": "Playing ice hockey"}, {"segment": [38.64437945709829, 43.81724914820593], "label": "Playing ice hockey"}, {"segment": [48.07725948205928, 50.41012228393136], "label": "Playing ice hockey"}]}, "oL1gGyMz79s": {"duration": 137.393, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oL1gGyMz79s", "annotations": [{"segment": [19.283447737909515, 51.422527301092046], "label": "Playing ten pins"}, {"segment": [92.13202808112325, 98.78848829953198], "label": "Playing ten pins"}, {"segment": [104.3600733229329, 119.57478627145085], "label": "Playing ten pins"}]}, "kAmDOTsp5cs": {"duration": 196.72, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kAmDOTsp5cs", "annotations": [{"segment": [6.751666224472109, 8.899923659531416], "label": "Rope skipping"}, {"segment": [40.50999734683265, 196.719001], "label": "Rope skipping"}]}, "evEfF66Q2IM": {"duration": 22.385, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=evEfF66Q2IM", "annotations": []}, "Po6rrhjsS8g": {"duration": 168.25, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Po6rrhjsS8g", "annotations": [{"segment": [38.3225609078785, 109.2457834], "label": "Playing squash"}, {"segment": [136.7537949921997, 149.87795957878313], "label": "Playing squash"}]}, "TuMig8MQ0hk": {"duration": 4.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TuMig8MQ0hk", "annotations": []}, "fzwNgtDwgKQ": {"duration": 83.987, "subset": "training", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=fzwNgtDwgKQ", "annotations": [{"segment": [4.847900389484967, 80.31791726362931], "label": "Cutting the grass"}]}, "QMCHIR3nDLs": {"duration": 87.26, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QMCHIR3nDLs", "annotations": [{"segment": [1.1571216817472698, 7.827587847113885], "label": "Chopping wood"}, {"segment": [10.686359060842433, 16.676165413416538], "label": "Chopping wood"}]}, "_zR5wVbz3xU": {"duration": 59.1, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_zR5wVbz3xU", "annotations": [{"segment": [5.338684287051482, 56.33232385647426], "label": "Using parallel bars"}]}, "I-vi5EpjrFI": {"duration": 63.693, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=I-vi5EpjrFI", "annotations": [{"segment": [16.11152379125614, 55.69587066508792], "label": "Gargling mouthwash"}]}, "x9geCRHcUf0": {"duration": 69.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=x9geCRHcUf0", "annotations": []}, "_jxYsp9_lXk": {"duration": 201.944, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_jxYsp9_lXk", "annotations": []}, "_nvQDglPcmc": {"duration": 237.07999999999998, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_nvQDglPcmc", "annotations": [{"segment": [28.103203288611546, 221.12783640249611], "label": "Playing water polo"}]}, "fppEk17hs8M": {"duration": 36.851, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fppEk17hs8M", "annotations": [{"segment": [0, 36.821], "label": "Playing ice hockey"}]}, "v3t4Z5cEgZM": {"duration": 51.11, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=v3t4Z5cEgZM", "annotations": [{"segment": [4.76144643, 41.97336861], "label": "Smoking a cigarette"}]}, "t6z_3QbsUaE": {"duration": 237.29, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t6z_3QbsUaE", "annotations": []}, "xBtOxEIETtk": {"duration": 38.22, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xBtOxEIETtk", "annotations": [{"segment": [2.7892956, 37.309817666926676], "label": "Breakdancing"}]}, "yACg55C3IlM": {"duration": 176.332, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=yACg55C3IlM", "annotations": [{"segment": [0.01, 176.281], "label": "Hand car wash"}]}, "dNUZyM1gF1E": {"duration": 33.066, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dNUZyM1gF1E", "annotations": [{"segment": [0.01, 31.077180962700172], "label": "Rock-paper-scissors"}]}, "jzVxdBzCuoM": {"duration": 221.12, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=jzVxdBzCuoM", "annotations": [{"segment": [0, 28.632232658013805], "label": "Cheerleading"}, {"segment": [38.291299096861835, 74.51279824254195], "label": "Cheerleading"}, {"segment": [94.86583109582887, 107.28463080291918], "label": "Cheerleading"}, {"segment": [119.35846385147923, 218.36389374570985], "label": "Cheerleading"}]}, "s789WxU8PFY": {"duration": 167.416, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=s789WxU8PFY", "annotations": [{"segment": [29.565429369786816, 73.10534681526968], "label": "Baking cookies"}, {"segment": [89.00914899595504, 109.08444027321362], "label": "Baking cookies"}, {"segment": [115.60239198660925, 117.68813653489585], "label": "Baking cookies"}, {"segment": [131.76691223583043, 134.6348109897245], "label": "Baking cookies"}, {"segment": [140.89204463458432, 160.1851817062354], "label": "Baking cookies"}]}, "m6YWqN_nBTQ": {"duration": 60.72, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=m6YWqN_nBTQ", "annotations": []}, "2NHvn4FFobE": {"duration": 38.964, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=2NHvn4FFobE", "annotations": []}, "O7ergU8WZ1o": {"duration": 184.447, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O7ergU8WZ1o", "annotations": []}, "RF0ChBe9HHI": {"duration": 82.36, "subset": "validation", "resolution": "320x256", "url": "https://www.youtube.com/watch?v=RF0ChBe9HHI", "annotations": [{"segment": [6.1674416037866004, 76.32208884243369], "label": "Playing violin"}]}, "kZB7yxzHOrA": {"duration": 150.28, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=kZB7yxzHOrA", "annotations": [{"segment": [3.8659594212168487, 5.974664560062402], "label": "Springboard diving"}, {"segment": [10.66067597971919, 19.329797106084243], "label": "Springboard diving"}, {"segment": [24.484409667706707, 126.17085747425897], "label": "Springboard diving"}, {"segment": [138.3544871653666, 143.5090997269891], "label": "Springboard diving"}]}, "b6VAlwv45q4": {"duration": 234.103, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=b6VAlwv45q4", "annotations": [{"segment": [0.01, 234.121], "label": "Sumo"}]}, "4Smm8f31YLA": {"duration": 11.12, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4Smm8f31YLA", "annotations": []}, "eFq3gsEi9yI": {"duration": 147.842, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=eFq3gsEi9yI", "annotations": [{"segment": [0.01, 147.821], "label": "Croquet"}]}, "Z0oEaM7h6jM": {"duration": 196.023, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Z0oEaM7h6jM", "annotations": [{"segment": [18.95410608424337, 74.89929017160686], "label": "Rafting"}, {"segment": [102.10760374414977, 180.6938705148206], "label": "Rafting"}]}, "Th0SMArnWiY": {"duration": 207.773, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Th0SMArnWiY", "annotations": [{"segment": [21.717214245032384, 86.54471945408429], "label": "BMX"}, {"segment": [97.56539533962311, 152.66877476731722], "label": "BMX"}, {"segment": [174.06185048517943, 194.15837698439938], "label": "BMX"}]}, "IrO7Fgr102M": {"duration": 51.62, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=IrO7Fgr102M", "annotations": [{"segment": [0, 21.1634992199688], "label": "Playing polo"}, {"segment": [26.876839313572543, 51.381], "label": "Playing polo"}]}, "oHR5-JjR-Tg": {"duration": 28.306, "subset": "testing", "resolution": "400x226", "url": "https://www.youtube.com/watch?v=oHR5-JjR-Tg", "annotations": []}, "cudw2faobPA": {"duration": 152.509, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cudw2faobPA", "annotations": []}, "DZBu_U_Jt4c": {"duration": 26.146, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=DZBu_U_Jt4c", "annotations": [{"segment": [0.01, 26.101], "label": "Playing blackjack"}]}, "-uICMT1yOcM": {"duration": 105.906, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=-uICMT1yOcM", "annotations": [{"segment": [2.9722184087363495, 10.237641185647426], "label": "Having an ice cream"}, {"segment": [25.428979719188767, 32.69440249609985], "label": "Having an ice cream"}, {"segment": [62.74683307332293, 66.21629797191888], "label": "Having an ice cream"}, {"segment": [94.12291731669266, 99.90239469578783], "label": "Having an ice cream"}]}, "mD0aPovD2qw": {"duration": 172.46, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mD0aPovD2qw", "annotations": []}, "R6INcHHxlNs": {"duration": 42.052, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=R6INcHHxlNs", "annotations": [{"segment": [0.2, 41.98573329173167], "label": "Mooping floor"}]}, "d-sWAcDYsbc": {"duration": 212.834, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=d-sWAcDYsbc", "annotations": [{"segment": [0.2, 212.58100000000002], "label": "Playing drums"}]}, "OK84hJx9daQ": {"duration": 121.86, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=OK84hJx9daQ", "annotations": [{"segment": [8.935009569090246, 115.01448487871488], "label": "Preparing pasta"}]}, "l_cjSGt5j40": {"duration": 221.1, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=l_cjSGt5j40", "annotations": [{"segment": [0, 217.26744149765992], "label": "Playing piano"}]}, "gXk9TiqGUHs": {"duration": 215.83, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gXk9TiqGUHs", "annotations": [{"segment": [28.11507246684425, 34.51251410600642], "label": "Surfing"}, {"segment": [60.438988117347826, 67.50984466589549], "label": "Surfing"}, {"segment": [110.94510632125966, 119.36279268857831], "label": "Surfing"}, {"segment": [131.48426105751713, 134.51462814975184], "label": "Surfing"}]}, "hYVFzI_kfaQ": {"duration": 140.02, "subset": "training", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=hYVFzI_kfaQ", "annotations": [{"segment": [17.037790670826833, 138.04979107644306], "label": "Preparing salad"}]}, "9Ze3-UiuRT0": {"duration": 48.44, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9Ze3-UiuRT0", "annotations": []}, "-9KxEw6L9T8": {"duration": 125.806, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=-9KxEw6L9T8", "annotations": []}, "UOkpGU7NAag": {"duration": 53.824, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=UOkpGU7NAag", "annotations": [{"segment": [3.432001910730596, 26.618949351031386], "label": "Using the monkey bar"}]}, "WvxwmaDFGqk": {"duration": 11.494, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=WvxwmaDFGqk", "annotations": [{"segment": [2.787546021840874, 11.181000000000001], "label": "Kneeling"}]}, "sp5pM_rJrcI": {"duration": 11.053, "subset": "testing", "resolution": "626x360", "url": "https://www.youtube.com/watch?v=sp5pM_rJrcI", "annotations": []}, "oQuAwR_t5Ig": {"duration": 216.95, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=oQuAwR_t5Ig", "annotations": [{"segment": [0.01, 216.94360544217687], "label": "Skateboarding"}]}, "iR-0hVp5ISY": {"duration": 74.0, "subset": "testing", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=iR-0hVp5ISY", "annotations": []}, "AISkvED80lU": {"duration": 175.45, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=AISkvED80lU", "annotations": [{"segment": [1.2317055514820594, 4.51625368876755], "label": "Cheerleading"}, {"segment": [7.25337713650546, 12.180199342433697], "label": "Cheerleading"}, {"segment": [12.453911687207489, 21.212706719968796], "label": "Cheerleading"}, {"segment": [22.307556099063962, 33.529762234789395], "label": "Cheerleading"}, {"segment": [35.99317333775351, 44.478256025741025], "label": "Cheerleading"}, {"segment": [49.95250292121685, 70.20721643447737], "label": "Cheerleading"}, {"segment": [71.57577815834634, 82.25055960452418], "label": "Cheerleading"}, {"segment": [94.29390277457097, 99.76814967004681], "label": "Cheerleading"}, {"segment": [102.23156077301093, 106.06353359984399], "label": "Cheerleading"}, {"segment": [107.97952001326053, 118.38058911466459], "label": "Cheerleading"}, {"segment": [120.02286318330734, 128.78165821606865], "label": "Cheerleading"}, {"segment": [134.25590511154445, 140.82500138611545], "label": "Cheerleading"}]}, "aW8LjbEpY1c": {"duration": 35.991, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aW8LjbEpY1c", "annotations": [{"segment": [6.681623138845554, 28.97241629953198], "label": "Playing rubik cube"}]}, "_cU5aISTDBo": {"duration": 44.49, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_cU5aISTDBo", "annotations": [{"segment": [3.5168554499999995, 42.950389235569425], "label": "Pole vault"}]}, "wBv1tu74S2E": {"duration": 54.1, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wBv1tu74S2E", "annotations": [{"segment": [6.515503859999999, 46.92800016973479], "label": "Using parallel bars"}]}, "gEYutYXODs4": {"duration": 190.59, "subset": "validation", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=gEYutYXODs4", "annotations": [{"segment": [24.077723868954756, 171.5166255850234], "label": "Knitting"}]}, "A5JcwIaP30Y": {"duration": 125.11, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=A5JcwIaP30Y", "annotations": [{"segment": [0.2, 124.861], "label": "Slacklining"}]}, "outMi06JZss": {"duration": 68.917, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=outMi06JZss", "annotations": [{"segment": [3.5479520124804993, 35.69454751950078], "label": "Running a marathon"}]}, "9R8QRLKPJs8": {"duration": 13.56, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=9R8QRLKPJs8", "annotations": []}, "4E-RWxR6Xr0": {"duration": 118.56, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4E-RWxR6Xr0", "annotations": [{"segment": [14.06252021, 108.31492355694228], "label": "Spinning"}]}, "UWusSRe8Whg": {"duration": 98.267, "subset": "training", "resolution": "352x240", "url": "https://www.youtube.com/watch?v=UWusSRe8Whg", "annotations": [{"segment": [0, 95.35412346333852], "label": "Playing congas"}]}, "2fLmeXCHTqQ": {"duration": 140.39, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=2fLmeXCHTqQ", "annotations": []}, "7g3YO0-R4qk": {"duration": 209.862, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7g3YO0-R4qk", "annotations": [{"segment": [71.70010426053042, 113.60701451326052], "label": "Swimming"}, {"segment": [117.53578734945398, 125.06593528549142], "label": "Swimming"}, {"segment": [160.097493074883, 167.9550387472699], "label": "Swimming"}, {"segment": [174.50299347425897, 187.9263006645866], "label": "Swimming"}]}, "5xI8T41HejM": {"duration": 87.702, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5xI8T41HejM", "annotations": [{"segment": [34.73547311922653, 35.828330332724605], "label": "River tubing"}, {"segment": [62.73993822558929, 65.33547406671079], "label": "River tubing"}, {"segment": [76.1274388797949, 78.58636757138369], "label": "River tubing"}]}, "nj5dMUGvOWo": {"duration": 66.85, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=nj5dMUGvOWo", "annotations": [{"segment": [4.064477358814353, 50.545423564742585], "label": "Using uneven bars"}]}, "fJCkM6secVM": {"duration": 201.39, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=fJCkM6secVM", "annotations": [{"segment": [31.7317524700988, 201.38666666666666], "label": "Preparing salad"}]}, "Y76yy0chbhU": {"duration": 98.59, "subset": "training", "resolution": "852x480", "url": "https://www.youtube.com/watch?v=Y76yy0chbhU", "annotations": [{"segment": [2.1533343238491445, 98.28432981435257], "label": "Playing field hockey"}]}, "1_4E-zSZrf4": {"duration": 72.56, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=1_4E-zSZrf4", "annotations": []}, "UaitYHDxhJI": {"duration": 140.342, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UaitYHDxhJI", "annotations": []}, "rpaqx40HtWY": {"duration": 212.974, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=rpaqx40HtWY", "annotations": []}, "wcmO0R3Kqzo": {"duration": 93.25, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wcmO0R3Kqzo", "annotations": [{"segment": [15.978601029641185, 79.74774513884554], "label": "Doing nails"}]}, "rE2IiY5E8LQ": {"duration": 40.589, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=rE2IiY5E8LQ", "annotations": [{"segment": [15.893460575663028, 37.35913043681747], "label": "Curling"}]}, "M1dh_TEW4K8": {"duration": 115.31, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=M1dh_TEW4K8", "annotations": []}, "7-RbxFePrmU": {"duration": 123.3, "subset": "training", "resolution": "180x240", "url": "https://www.youtube.com/watch?v=7-RbxFePrmU", "annotations": [{"segment": [3.845273010920437, 37.10688455538221], "label": "Brushing teeth"}, {"segment": [42.298003120124804, 45.7587488299532], "label": "Brushing teeth"}, {"segment": [55.56419500780031, 74.21376911076442], "label": "Brushing teeth"}, {"segment": [79.78941497659906, 96.70861622464899], "label": "Brushing teeth"}]}, "LGt_KpgXymU": {"duration": 225.14, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LGt_KpgXymU", "annotations": [{"segment": [22.12771116558948, 44.95788935230879], "label": "Playing violin"}, {"segment": [54.08996062699651, 118.01445954981057], "label": "Playing violin"}, {"segment": [128.20023135619303, 144.356972842179], "label": "Playing violin"}]}, "RTS4mOH3cFY": {"duration": 136.37, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RTS4mOH3cFY", "annotations": [{"segment": [21.70018843057722, 101.48029295475818], "label": "Walking the dog"}]}, "1Cg6rkv4lyk": {"duration": 52.687, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1Cg6rkv4lyk", "annotations": []}, "IKTYMYu8FFs": {"duration": 107.091, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IKTYMYu8FFs", "annotations": [{"segment": [31.67288161993769, 89.6842647975078], "label": "Applying sunscreen"}]}, "28wbI73IVvQ": {"duration": 83.848, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=28wbI73IVvQ", "annotations": []}, "goLVOzKw4U8": {"duration": 131.379, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=goLVOzKw4U8", "annotations": [{"segment": [35.387123052959495, 96.97139252336449], "label": "Welding"}]}, "16Jm2QATbtE": {"duration": 69.823, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=16Jm2QATbtE", "annotations": []}, "WBRQ4vHBFMY": {"duration": 29.98, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WBRQ4vHBFMY", "annotations": [{"segment": [0.561178855317478, 5.892377616224649], "label": "Getting a piercing"}]}, "iAwNYgA6mkc": {"duration": 203.848, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=iAwNYgA6mkc", "annotations": [{"segment": [85.15835881435258, 96.59754134165368], "label": "Doing fencing"}, {"segment": [114.39182527301092, 124.8777425897036], "label": "Doing fencing"}, {"segment": [130.91508892355697, 150.31290639625587], "label": "Doing fencing"}, {"segment": [186.2227550702028, 191.30733541341652], "label": "Doing fencing"}, {"segment": [197.98084711388455, 203.501], "label": "Doing fencing"}]}, "_sgg_QEjcEE": {"duration": 108.41, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_sgg_QEjcEE", "annotations": [{"segment": [5.578595943837754, 22.314383775351015], "label": "Discus throw"}, {"segment": [22.314383775351015, 51.39070202808112], "label": "Discus throw"}, {"segment": [64.57647425897035, 75.90271450858035], "label": "Discus throw"}]}, "Jd4HJLTLze8": {"duration": 74.46000000000001, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Jd4HJLTLze8", "annotations": []}, "rSwrt0eKH3I": {"duration": 55.68, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=rSwrt0eKH3I", "annotations": []}, "qblFXnyqf1o": {"duration": 186.78, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=qblFXnyqf1o", "annotations": [{"segment": [3.7868533541341654, 180.04046489859596], "label": "Doing step aerobics"}]}, "Rvqu079gCh0": {"duration": 210.535, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Rvqu079gCh0", "annotations": [{"segment": [15.43706612754306, 18.064651851380177], "label": "Longboarding"}, {"segment": [19.049996497819098, 22.334478652615495], "label": "Longboarding"}, {"segment": [27.918098315769367, 55.179300200579455], "label": "Longboarding"}, {"segment": [72.58705562100036, 138.27669871692825], "label": "Longboarding"}, {"segment": [142.54652551816358, 182.617207700468], "label": "Longboarding"}, {"segment": [190.49996486739468, 210.535306], "label": "Longboarding"}]}, "W4LIBSfe6bc": {"duration": 60.07, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=W4LIBSfe6bc", "annotations": [{"segment": [3.842230889235569, 57.25861154446178], "label": "Getting a tattoo"}]}, "pcrNMrBmy1I": {"duration": 121.58, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=pcrNMrBmy1I", "annotations": [{"segment": [0.2, 113.00848049921997], "label": "Hurling"}]}, "-PcbohemKh0": {"duration": 201.13, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-PcbohemKh0", "annotations": []}, "WMxQ5hc4s78": {"duration": 184.74, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WMxQ5hc4s78", "annotations": [{"segment": [12.248616560062402, 178.25339743385655], "label": "Polishing forniture"}]}, "QwK2hghLFmI": {"duration": 63.461, "subset": "testing", "resolution": "478x360", "url": "https://www.youtube.com/watch?v=QwK2hghLFmI", "annotations": []}, "4XX5rcqpOXA": {"duration": 171.27, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4XX5rcqpOXA", "annotations": [{"segment": [5.475941226989079, 166.8158681099844], "label": "Cumbia"}]}, "S-mMmPO6a9s": {"duration": 122.486, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=S-mMmPO6a9s", "annotations": [{"segment": [0.2, 114.24598751950079], "label": "Cutting the grass"}]}, "u4nL6NXfsak": {"duration": 60.628, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=u4nL6NXfsak", "annotations": [{"segment": [7.47200178627145, 52.587759407176286], "label": "Grooming dog"}]}, "IbcBQFvcS9o": {"duration": 122.74, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IbcBQFvcS9o", "annotations": [{"segment": [6.127459794071763, 115.46432049453978], "label": "Shoveling snow"}]}, "lL-YnWr815o": {"duration": 168.09, "subset": "validation", "resolution": "853x480", "url": "https://www.youtube.com/watch?v=lL-YnWr815o", "annotations": [{"segment": [18.090809672386897, 136.86090795631827], "label": "Rafting"}, {"segment": [146.82396255850236, 168.061], "label": "Rafting"}]}, "bzJTmN6wAC4": {"duration": 209.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bzJTmN6wAC4", "annotations": [{"segment": [10.790975834951766, 185.0815855328091], "label": "Painting"}]}, "cSwDKlxiqXQ": {"duration": 127.059, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=cSwDKlxiqXQ", "annotations": [{"segment": [68.96391230520202, 83.19142492144603], "label": "Spread mulch"}, {"segment": [108.48477911543472, 109.07759213254202], "label": "Spread mulch"}]}, "JGpu6r-1ghM": {"duration": 162.772, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=JGpu6r-1ghM", "annotations": []}, "1ZwVoNUvaAE": {"duration": 70.705, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=1ZwVoNUvaAE", "annotations": []}, "83f0iwb8VW8": {"duration": 173.38, "subset": "testing", "resolution": "204x360", "url": "https://www.youtube.com/watch?v=83f0iwb8VW8", "annotations": []}, "PmWF-HawRw0": {"duration": 92.25, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=PmWF-HawRw0", "annotations": [{"segment": [4.814645085803432, 9.988592043681747], "label": "Javelin throw"}, {"segment": [18.755557722308893, 25.797874414976597], "label": "Javelin throw"}, {"segment": [32.40902886115445, 35.71460608424337], "label": "Javelin throw"}, {"segment": [44.19413026521061, 48.93691497659906], "label": "Javelin throw"}]}, "-kuXhOsHAc4": {"duration": 232.595, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-kuXhOsHAc4", "annotations": [{"segment": [54.85163735250357, 167.52573936506354], "label": "Beer pong"}, {"segment": [172.2355892884181, 198.68320808879392], "label": "Beer pong"}, {"segment": [206.6537233437017, 211.3635732670563], "label": "Beer pong"}, {"segment": [223.31934614941795, 225.85541918507045], "label": "Beer pong"}]}, "sQNdiyPVKvQ": {"duration": 10.71, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sQNdiyPVKvQ", "annotations": []}, "6ZQXEewM7Fc": {"duration": 42.4, "subset": "testing", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=6ZQXEewM7Fc", "annotations": []}, "UusDcf16TVQ": {"duration": 116.66, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UusDcf16TVQ", "annotations": [{"segment": [0.2312658742597942, 35.482773497312756], "label": "Playing accordion"}, {"segment": [43.29625199108222, 107.25775035868347], "label": "Playing accordion"}]}, "3FUVKpQA6IY": {"duration": 129.591, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=3FUVKpQA6IY", "annotations": [{"segment": [9.270861741372288, 34.86651025686973], "label": "Beer pong"}, {"segment": [43.532753455030274, 47.765104784364496], "label": "Beer pong"}, {"segment": [68.12069927306719, 83.84086012846797], "label": "Beer pong"}]}, "DRHW4FvSKdw": {"duration": 22.036, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DRHW4FvSKdw", "annotations": [{"segment": [0.2, 21.835736961451246], "label": "Playing blackjack"}]}, "WbZpWiPCHVU": {"duration": 35.13, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WbZpWiPCHVU", "annotations": [{"segment": [3.0616666500000003, 33.90820202808112], "label": "Vacuuming floor"}]}, "Go5vzj3VkzM": {"duration": 148.1, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Go5vzj3VkzM", "annotations": []}, "DeHIqrMlsU8": {"duration": 139.065, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DeHIqrMlsU8", "annotations": [{"segment": [2.6033773759113634, 120.18925552124126], "label": "Longboarding"}, {"segment": [124.74516592908614, 127.34854330499752], "label": "Longboarding"}]}, "0KqeKi2CBqg": {"duration": 88.49, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=0KqeKi2CBqg", "annotations": [{"segment": [53.56269500780031, 83.48059672386896], "label": "Smoking a cigarette"}]}, "Wh-Q13ZAffE": {"duration": 123.066, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Wh-Q13ZAffE", "annotations": [{"segment": [0.01, 123.065781], "label": "Snow tubing"}]}, "bFPfj889BhU": {"duration": 222.61, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bFPfj889BhU", "annotations": [{"segment": [14.759613826053041, 204.0299558307332], "label": "Sailing"}]}, "Bh35Q9vNsSA": {"duration": 27.31, "subset": "validation", "resolution": "800x480", "url": "https://www.youtube.com/watch?v=Bh35Q9vNsSA", "annotations": [{"segment": [3.330393135725429, 5.209076443057722], "label": "Playing bagpipes"}, {"segment": [0.2, 27.169], "label": "Playing bagpipes"}, {"segment": [24.63636973478939, 26.77123712948518], "label": "Playing bagpipes"}]}, "NEiu47xcJxc": {"duration": 202.13, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=NEiu47xcJxc", "annotations": []}, "UN0bAa_ko4I": {"duration": 217.386, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=UN0bAa_ko4I", "annotations": [{"segment": [21.026339088937707, 117.67967199776425], "label": "Capoeira"}, {"segment": [132.9407245623158, 168.54984721293613], "label": "Capoeira"}]}, "PEp22TMpoHA": {"duration": 175.148, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=PEp22TMpoHA", "annotations": [{"segment": [0.20000000000000004, 174.94780045351476], "label": "Rafting"}]}, "NKnOCGiw048": {"duration": 82.896, "subset": "testing", "resolution": "426x232", "url": "https://www.youtube.com/watch?v=NKnOCGiw048", "annotations": []}, "FExyWFc1nU0": {"duration": 33.902, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FExyWFc1nU0", "annotations": [{"segment": [12.058437602810233, 26.232389965678628], "label": "Calf roping"}]}, "DouY-lXxXG4": {"duration": 82.06, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DouY-lXxXG4", "annotations": []}, "OszjSKHCvKI": {"duration": 218.988, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=OszjSKHCvKI", "annotations": [{"segment": [21.45593348360053, 48.70156990302055], "label": "Wakeboarding"}, {"segment": [60.96210629175957, 197.53085729164323], "label": "Wakeboarding"}]}, "trl-RCWyhb0": {"duration": 58.793, "subset": "validation", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=trl-RCWyhb0", "annotations": [{"segment": [0.9171971216848674, 58.42545665132605], "label": "Playing congas"}]}, "_py-3xo0_pQ": {"duration": 79.9, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=_py-3xo0_pQ", "annotations": []}, "0gvD2pktxxw": {"duration": 176.774, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=0gvD2pktxxw", "annotations": [{"segment": [15.144109606459406, 162.4549939602009], "label": "Playing rubik cube"}]}, "Lf_7RurLgp0": {"duration": 143.04, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Lf_7RurLgp0", "annotations": []}, "3PiN6rDgg_E": {"duration": 208.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3PiN6rDgg_E", "annotations": [{"segment": [44.70339357791999, 152.96688493390076], "label": "Putting on makeup"}]}, "_9e948mdwrs": {"duration": 99.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_9e948mdwrs", "annotations": [{"segment": [5.442800312012481, 62.060372854914185], "label": "Blowing leaves"}, {"segment": [72.79261778471138, 90.05753354134166], "label": "Blowing leaves"}]}, "j3QSVh_AhDc": {"duration": 231.69, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=j3QSVh_AhDc", "annotations": [{"segment": [0, 214.31353042121685], "label": "Kayaking"}]}, "YSnCGTXJtig": {"duration": 31.069, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YSnCGTXJtig", "annotations": [{"segment": [0.2, 30.868299319727893], "label": "Calf roping"}]}, "C4xYnBd7PM0": {"duration": 62.949, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=C4xYnBd7PM0", "annotations": [{"segment": [0.01, 62.949114], "label": "Bullfighting"}]}, "-ArIbKfZc2s": {"duration": 202.13, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=-ArIbKfZc2s", "annotations": [{"segment": [0, 148.522663024961], "label": "Rafting"}]}, "bXApJtAf6Qo": {"duration": 127.617, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bXApJtAf6Qo", "annotations": [{"segment": [3.782715357593896, 127.61687074829932], "label": "Scuba diving"}]}, "ABMVo0NDhKo": {"duration": 32.53, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ABMVo0NDhKo", "annotations": [{"segment": [0.01, 31.465391731669264], "label": "Drinking coffee"}]}, "tm_CL7A0W4M": {"duration": 117.33, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tm_CL7A0W4M", "annotations": [{"segment": [0, 109.59644149765991], "label": "Tango"}]}, "KIOvUY8S2pQ": {"duration": 15.093, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=KIOvUY8S2pQ", "annotations": [{"segment": [8.384425892494178, 11.92681062769667], "label": "Rock-paper-scissors"}]}, "QjaEDlh805g": {"duration": 211.23, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=QjaEDlh805g", "annotations": [{"segment": [21.419774087363493, 198.05052656162246], "label": "Brushing hair"}]}, "aDrjDISgmLU": {"duration": 123.14, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=aDrjDISgmLU", "annotations": [{"segment": [91.91934089391576, 114.01071853822152], "label": "Cricket"}]}, "E6LJROCxQPA": {"duration": 38.522, "subset": "training", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=E6LJROCxQPA", "annotations": [{"segment": [5.048112093603745, 34.73581892979719], "label": "Slacklining"}]}, "xLRUuV1z3QA": {"duration": 72.934, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xLRUuV1z3QA", "annotations": [{"segment": [17.240982459695367, 60.79715202351014], "label": "Camel ride"}]}, "eBITMlyTZhY": {"duration": 219.1, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=eBITMlyTZhY", "annotations": [{"segment": [11.963529149765991, 199.9618443603744], "label": "Belly dance"}]}, "0jeq5CeP2P0": {"duration": 97.385, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0jeq5CeP2P0", "annotations": [{"segment": [8.124404836193447, 92.55747191887674], "label": "Kite flying"}]}, "n8AFyBdT_sQ": {"duration": 160.009, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=n8AFyBdT_sQ", "annotations": []}, "wDw3i5ODGWA": {"duration": 148.957, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wDw3i5ODGWA", "annotations": [{"segment": [14.131126408086564, 130.42337516502596], "label": "Longboarding"}]}, "sgJ_NrROLYw": {"duration": 118.585, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=sgJ_NrROLYw", "annotations": []}, "yaK-giAk4e8": {"duration": 25.496, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yaK-giAk4e8", "annotations": [{"segment": [1.312561305772231, 12.489219697347894], "label": "Beer pong"}]}, "vTbeVoT1Gsg": {"duration": 49.27, "subset": "validation", "resolution": "2560x810", "url": "https://www.youtube.com/watch?v=vTbeVoT1Gsg", "annotations": [{"segment": [0.01, 41.35528111700468], "label": "Javelin throw"}]}, "sC7xUkNTpP4": {"duration": 219.27, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sC7xUkNTpP4", "annotations": [{"segment": [0.01, 211.65454446177847], "label": "Playing kickball"}]}, "3v_wDKeVvcc": {"duration": 168.37, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=3v_wDKeVvcc", "annotations": []}, "lnieKUxnxuY": {"duration": 25.542, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lnieKUxnxuY", "annotations": [{"segment": [7.451395418096724, 19.883670126365054], "label": "Laying tile"}]}, "A8NAj6NQ5vM": {"duration": 8.499, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=A8NAj6NQ5vM", "annotations": [{"segment": [0.01, 8.490559494965241], "label": "Playing beach volleyball"}]}, "dBNZf90PLJ0": {"duration": 167.741, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=dBNZf90PLJ0", "annotations": [{"segment": [72.2054375072962, 113.54043334477379], "label": "Rafting"}, {"segment": [134.4695454820593, 152.25929079875198], "label": "Rafting"}]}, "D59UvMNXsEs": {"duration": 13.05, "subset": "testing", "resolution": "312x240", "url": "https://www.youtube.com/watch?v=D59UvMNXsEs", "annotations": []}, "FMYu8k1b_DM": {"duration": 213.647, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FMYu8k1b_DM", "annotations": [{"segment": [13.661187207488299, 195.2550171606864], "label": "Futsal"}]}, "wZJeEV6sZXE": {"duration": 38.64, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wZJeEV6sZXE", "annotations": [{"segment": [13.549492979719188, 14.994772230889234], "label": "Drinking coffee"}, {"segment": [34.68670202808112, 36.55352106084243], "label": "Drinking coffee"}]}, "fpWOf1DR1oA": {"duration": 9.126, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=fpWOf1DR1oA", "annotations": [{"segment": [0.01, 9.125441], "label": "Throwing darts"}]}, "Uc1_7BXtXZs": {"duration": 96.015, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Uc1_7BXtXZs", "annotations": [{"segment": [20.071676554136992, 82.53353568156331], "label": "Playing beach volleyball"}]}, "KoMKIJ6QbpY": {"duration": 23.847, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KoMKIJ6QbpY", "annotations": [{"segment": [0.01, 23.821], "label": "Laying tile"}]}, "kVzojOj5_Zg": {"duration": 75.78999999999999, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kVzojOj5_Zg", "annotations": [{"segment": [7.567169494943064, 14.5431538730937], "label": "Volleyball"}]}, "RVOBQBamjDA": {"duration": 209.65, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RVOBQBamjDA", "annotations": [{"segment": [12.101653524644387, 69.33920397904352], "label": "Playing saxophone"}, {"segment": [83.40328780498159, 203.11153470982842], "label": "Playing saxophone"}]}, "YPFk9ftkzl4": {"duration": 20.99, "subset": "training", "resolution": "720x1280", "url": "https://www.youtube.com/watch?v=YPFk9ftkzl4", "annotations": [{"segment": [1.110878817472699, 19.636416744149766], "label": "Smoking hookah"}]}, "iTev1WJ819g": {"duration": 975.099, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=iTev1WJ819g", "annotations": []}, "mzewLmZSCMU": {"duration": 43.1, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mzewLmZSCMU", "annotations": [{"segment": [7.189698907956318, 31.043372854914196], "label": "Ballet"}]}, "r3IFPcLG1KA": {"duration": 121.53, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=r3IFPcLG1KA", "annotations": []}, "BFXSk5F0E_w": {"duration": 199.901, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=BFXSk5F0E_w", "annotations": [{"segment": [1.6412638805781568, 4.754005313599964], "label": "Hula hoop"}]}, "21Pz1cjdd2I": {"duration": 146.472, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=21Pz1cjdd2I", "annotations": [{"segment": [14.594492211838007, 141.1753878504673], "label": "Snow tubing"}]}, "byPNX32dCSA": {"duration": 43.329, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=byPNX32dCSA", "annotations": []}, "Cc_DmDsXm6M": {"duration": 22.617, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Cc_DmDsXm6M", "annotations": [{"segment": [0.01, 18.064762368798753], "label": "Waterskiing"}]}, "tOCFOu8eOkU": {"duration": 17.508, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=tOCFOu8eOkU", "annotations": [{"segment": [8.44399375975039, 16.751794071762873], "label": "Having an ice cream"}]}, "7HKWppcWgeY": {"duration": 238.4, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=7HKWppcWgeY", "annotations": [{"segment": [34.9498159126365, 223.4557379095164], "label": "Assembling bicycle"}]}, "brSp5DB7OLs": {"duration": 67.52, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=brSp5DB7OLs", "annotations": [{"segment": [0.6300788984000294, 67.20858514723362], "label": "Hand washing clothes"}]}, "k_gjsTv-jy8": {"duration": 152.231, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=k_gjsTv-jy8", "annotations": [{"segment": [54.36479356123238, 95.14916672011088], "label": "Baking cookies"}]}, "qD52CX0CSBs": {"duration": 42.26, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=qD52CX0CSBs", "annotations": [{"segment": [1.3179719188767551, 42.241], "label": "Grooming horse"}]}, "QFWxdR_C71M": {"duration": 168.51, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=QFWxdR_C71M", "annotations": [{"segment": [1.9717160686427457, 159.7090015600624], "label": "Painting"}]}, "BrkFwA10o5s": {"duration": 174.034, "subset": "testing", "resolution": "327x240", "url": "https://www.youtube.com/watch?v=BrkFwA10o5s", "annotations": []}, "uyzQkTArIwU": {"duration": 235.219, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uyzQkTArIwU", "annotations": [{"segment": [4.402390015600624, 235.181], "label": "Capoeira"}]}, "ZHN4qO6izow": {"duration": 212.35, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=ZHN4qO6izow", "annotations": []}, "Ktxr4ZiqO3o": {"duration": 121.53, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Ktxr4ZiqO3o", "annotations": [{"segment": [29.701749699999997, 46.03710414], "label": "Shaving"}, {"segment": [54.67529209, 91.52953435999999], "label": "Shaving"}]}, "2FRMVhgxh40": {"duration": 129.82, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=2FRMVhgxh40", "annotations": []}, "IMelyusE2pk": {"duration": 19.064, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=IMelyusE2pk", "annotations": []}, "NgG4AWP1F6Q": {"duration": 192.54, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=NgG4AWP1F6Q", "annotations": [{"segment": [47.30893692890573, 55.11866619970922], "label": "Putting on shoes"}, {"segment": [85.45646067475352, 92.66544154011058], "label": "Putting on shoes"}]}, "H9ekrZnisUI": {"duration": 22.292, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=H9ekrZnisUI", "annotations": [{"segment": [0.2, 20.280444284603423], "label": "Croquet"}]}, "gMaakjqhNmM": {"duration": 19.993, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gMaakjqhNmM", "annotations": [{"segment": [0.010000000000000002, 3.9469063962558506], "label": "Throwing darts"}, {"segment": [12.524435257410296, 18.242787831513258], "label": "Throwing darts"}]}, "gmxFEr2ZltI": {"duration": 35.6, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=gmxFEr2ZltI", "annotations": []}, "qqLiKDMtps8": {"duration": 168.392, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qqLiKDMtps8", "annotations": [{"segment": [0.01, 168.321], "label": "BMX"}]}, "P8M00PRbI3c": {"duration": 205.8, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=P8M00PRbI3c", "annotations": [{"segment": [16.695038718555544, 51.04829146635253], "label": "Archery"}]}, "HSEnmPWF5GY": {"duration": 33.577, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=HSEnmPWF5GY", "annotations": [{"segment": [0.01, 25.98643839747586], "label": "Kneeling"}]}, "_BHoMDR2ZVg": {"duration": 31.51, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_BHoMDR2ZVg", "annotations": [{"segment": [23.10360724633067, 25.413966555382213], "label": "Getting a tattoo"}]}, "-jNouTszLJ0": {"duration": 103.26, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-jNouTszLJ0", "annotations": [{"segment": [0.2, 64.5603744149766], "label": "Beer pong"}]}, "o8-v0rPP06U": {"duration": 106.232, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=o8-v0rPP06U", "annotations": [{"segment": [13.072021919879061, 103.08695604361372], "label": "Futsal"}]}, "3iLo6lxAarc": {"duration": 123.902, "subset": "validation", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=3iLo6lxAarc", "annotations": [{"segment": [55.73810998439937, 70.8191209048362], "label": "Putting on shoes"}]}, "7iTrw4kr8e0": {"duration": 81.74, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7iTrw4kr8e0", "annotations": [{"segment": [8.601950007800312, 30.520992990639623], "label": "Putting on makeup"}]}, "Y3maPWNO96w": {"duration": 25.101, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=Y3maPWNO96w", "annotations": []}, "PEmSc-lmDBQ": {"duration": 229.39, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PEmSc-lmDBQ", "annotations": []}, "smfBAiFujmE": {"duration": 28.236, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=smfBAiFujmE", "annotations": [{"segment": [13.170433163806553, 27.08968694227769], "label": "Waterskiing"}]}, "vgdcVhRSa9E": {"duration": 122.53, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=vgdcVhRSa9E", "annotations": [{"segment": [0.01, 122.481], "label": "Bathing dog"}]}, "mgkIGWnC41U": {"duration": 56.1, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=mgkIGWnC41U", "annotations": []}, "E9HbfcT1ZWM": {"duration": 153.415, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=E9HbfcT1ZWM", "annotations": [{"segment": [0.01, 153.361], "label": "Slacklining"}]}, "Y39LnzOipck": {"duration": 20.132, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=Y39LnzOipck", "annotations": [{"segment": [12.091582082683308, 17.90182282371295], "label": "Playing ten pins"}]}, "mzXRehGBEOg": {"duration": 94.11, "subset": "training", "resolution": "640x350", "url": "https://www.youtube.com/watch?v=mzXRehGBEOg", "annotations": [{"segment": [10.932477860374416, 38.37373101326053], "label": "Brushing teeth"}, {"segment": [39.1554066, 82.69062113182528], "label": "Brushing teeth"}]}, "RmwaWeIPX70": {"duration": 37.29, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=RmwaWeIPX70", "annotations": [{"segment": [7.324315132605305, 21.101003120124805], "label": "Hammer throw"}]}, "r3H_75-lRjE": {"duration": 184.46, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=r3H_75-lRjE", "annotations": [{"segment": [18.12938694851794, 168.34430737909517], "label": "Playing harmonica"}]}, "HEINsRxsVlQ": {"duration": 103.376, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HEINsRxsVlQ", "annotations": []}, "doNSg6PuKDM": {"duration": 48.0, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=doNSg6PuKDM", "annotations": [{"segment": [2.038897460517943, 39.642962001016386], "label": "Shoveling snow"}]}, "GUbKGqYKj7U": {"duration": 41.77, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GUbKGqYKj7U", "annotations": [{"segment": [0.01, 20.59309243681747], "label": "Playing kickball"}]}, "k9LTYVTi6tc": {"duration": 27.96, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=k9LTYVTi6tc", "annotations": []}, "dFSfS3JQBck": {"duration": 58.61, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=dFSfS3JQBck", "annotations": []}, "fmdG9uPnWHQ": {"duration": 109.738, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=fmdG9uPnWHQ", "annotations": [{"segment": [6.505499457098284, 29.27474755694228], "label": "Grooming dog"}, {"segment": [35.43785230577223, 109.737504], "label": "Grooming dog"}]}, "TqO5Ddh5Lp4": {"duration": 60.14, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=TqO5Ddh5Lp4", "annotations": [{"segment": [0.01, 0.02], "label": "Canoeing"}]}, "ZPEGRXxk-1Y": {"duration": 57.89, "subset": "testing", "resolution": "320x180", "url": "https://www.youtube.com/watch?v=ZPEGRXxk-1Y", "annotations": []}, "bJahexpTlr8": {"duration": 208.59, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=bJahexpTlr8", "annotations": [{"segment": [42.457289391575664, 59.37513650546022], "label": "Fixing bicycle"}, {"segment": [87.02930967238689, 99.7176950078003], "label": "Fixing bicycle"}, {"segment": [142.98834243369734, 191.46448127925117], "label": "Fixing bicycle"}]}, "3FZM5nuRLXI": {"duration": 155.667, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3FZM5nuRLXI", "annotations": []}, "GGPCuw5aotM": {"duration": 171.108, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GGPCuw5aotM", "annotations": []}, "0p34rFNYj_M": {"duration": 67.06, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=0p34rFNYj_M", "annotations": [{"segment": [5.649295338533542, 67.059228], "label": "Building sandcastles"}]}, "w_PJ9N8CMo8": {"duration": 45.09, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=w_PJ9N8CMo8", "annotations": [{"segment": [1.33661448770876, 17.164943594383775], "label": "Washing face"}, {"segment": [19.97886877379095, 23.355578989079564], "label": "Washing face"}, {"segment": [24.340452801872075, 26.521244815912638], "label": "Washing face"}, {"segment": [28.56134057098284, 30.108999419656787], "label": "Washing face"}, {"segment": [36.158938555382214, 37.00311610920436], "label": "Washing face"}]}, "raKtQF0Gnuw": {"duration": 175.13, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=raKtQF0Gnuw", "annotations": []}, "EfJIu_moZaE": {"duration": 191.8, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=EfJIu_moZaE", "annotations": [{"segment": [9.874089645865835, 67.6225533322933], "label": "Sailing"}, {"segment": [73.00842041185646, 82.58329521996879], "label": "Sailing"}, {"segment": [93.35502937909516, 99.63854097191887], "label": "Sailing"}, {"segment": [106.2212674024961, 119.98514993915755], "label": "Sailing"}, {"segment": [130.1584544227769, 139.1348995553822], "label": "Sailing"}, {"segment": [161.57601238689546, 171.7493168705148], "label": "Sailing"}]}, "HGDatODUecQ": {"duration": 85.311, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HGDatODUecQ", "annotations": [{"segment": [5.852549141965678, 64.91009048361934], "label": "Rafting"}]}, "n-hsQkfdUk0": {"duration": 189.568, "subset": "training", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=n-hsQkfdUk0", "annotations": [{"segment": [7.98491228628737, 189.27199450858035], "label": "Playing congas"}]}, "TT2e7-NbUxo": {"duration": 159.43, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TT2e7-NbUxo", "annotations": [{"segment": [18.15634092846707, 138.5353672620905], "label": "Getting a haircut"}]}, "PNQpC_FlE2M": {"duration": 65.69, "subset": "training", "resolution": "712x480", "url": "https://www.youtube.com/watch?v=PNQpC_FlE2M", "annotations": [{"segment": [18.24132087051482, 61.999995093603744], "label": "Waterskiing"}]}, "V-6nF6U6rfI": {"duration": 93.21000000000001, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=V-6nF6U6rfI", "annotations": [{"segment": [12.64228081123245, 87.62408424336974], "label": "Using the balance beam"}]}, "3m9Vkg7xCa4": {"duration": 23.89, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3m9Vkg7xCa4", "annotations": []}, "KujAY-AST9M": {"duration": 231.55, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=KujAY-AST9M", "annotations": [{"segment": [0.2, 231.30100000000002], "label": "Futsal"}]}, "j73Wh1olDsA": {"duration": 3.066, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=j73Wh1olDsA", "annotations": [{"segment": [0.19999999999999998, 2.865034013605442], "label": "Throwing darts"}]}, "sVk-Br0zfkA": {"duration": 10.055, "subset": "training", "resolution": "384x216", "url": "https://www.youtube.com/watch?v=sVk-Br0zfkA", "annotations": [{"segment": [0.2, 6.711884735202492], "label": "Throwing darts"}]}, "pnEYhDVXVJ0": {"duration": 102.122, "subset": "validation", "resolution": "322x240", "url": "https://www.youtube.com/watch?v=pnEYhDVXVJ0", "annotations": [{"segment": [35.65863338533541, 53.65765522620905], "label": "Putting in contact lenses"}, {"segment": [71.3312449297972, 85.34273946957879], "label": "Putting in contact lenses"}]}, "LGj7_gw2GbQ": {"duration": 163.097, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=LGj7_gw2GbQ", "annotations": [{"segment": [14.499964118564742, 138.64000780031202], "label": "Wakeboarding"}]}, "7MAADmeogHo": {"duration": 214.0, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=7MAADmeogHo", "annotations": [{"segment": [62.60364664586583, 189.14648439937596], "label": "Fixing bicycle"}]}, "JEkhAGnQD2g": {"duration": 54.75, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JEkhAGnQD2g", "annotations": []}, "s7ajQRC7w9w": {"duration": 95.78, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=s7ajQRC7w9w", "annotations": [{"segment": [81.28795192511701, 92.64435697347893], "label": "Windsurfing"}]}, "Bo2VarUFp_0": {"duration": 144.196, "subset": "testing", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=Bo2VarUFp_0", "annotations": []}, "EWNqkVaOfbE": {"duration": 233.06, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=EWNqkVaOfbE", "annotations": []}, "gmPoDbH-odg": {"duration": 15.697, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=gmPoDbH-odg", "annotations": [{"segment": [0.2, 11.229113282259311], "label": "Using the monkey bar"}]}, "UtV69uNmzlE": {"duration": 25.73, "subset": "testing", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=UtV69uNmzlE", "annotations": []}, "5vm6bjMeEN8": {"duration": 219.38, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=5vm6bjMeEN8", "annotations": [{"segment": [1.0267494454880235, 217.67088244346098], "label": "Doing step aerobics"}]}, "Xz3F4x70qjQ": {"duration": 111.062, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=Xz3F4x70qjQ", "annotations": [{"segment": [0.01, 104.38354036178458], "label": "Having an ice cream"}]}, "mb2fWzNSob0": {"duration": 200.946, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=mb2fWzNSob0", "annotations": [{"segment": [15.313103923334495, 39.68907286145407], "label": "Bullfighting"}, {"segment": [69.69026540067816, 74.06543931264834], "label": "Bullfighting"}, {"segment": [91.25362253824547, 150.0059579275593], "label": "Bullfighting"}, {"segment": [168.7567019810501, 191.57010863423827], "label": "Bullfighting"}]}, "9D2yOzDsW8k": {"duration": 143.5, "subset": "testing", "resolution": "320x256", "url": "https://www.youtube.com/watch?v=9D2yOzDsW8k", "annotations": []}, "_WPcOFgi5vU": {"duration": 138.82999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_WPcOFgi5vU", "annotations": [{"segment": [7.469815132605304, 125.47124258970359], "label": "Playing racquetball"}]}, "9IQdMHlxKfQ": {"duration": 126.689, "subset": "testing", "resolution": "432x720", "url": "https://www.youtube.com/watch?v=9IQdMHlxKfQ", "annotations": []}, "TL7hwQauhFo": {"duration": 160.381, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=TL7hwQauhFo", "annotations": [{"segment": [6.002658346333853, 151.0857472698908], "label": "Futsal"}]}, "krNVpENNPCM": {"duration": 237.146, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=krNVpENNPCM", "annotations": [{"segment": [17.788950125561982, 209.05872179482878], "label": "Making a cake"}]}, "hz3VDXxErJM": {"duration": 81.712, "subset": "testing", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=hz3VDXxErJM", "annotations": []}, "oDZ6refg3Ew": {"duration": 205.52, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=oDZ6refg3Ew", "annotations": [{"segment": [10.66849129, 194.90241497659906], "label": "Zumba"}]}, "ha40KZvmcEo": {"duration": 106.58, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=ha40KZvmcEo", "annotations": [{"segment": [23.431296411856476, 62.661399375975044], "label": "Kayaking"}, {"segment": [84.93362090483619, 92.74551950078003], "label": "Kayaking"}]}, "HaGLPOqibaM": {"duration": 200.76, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=HaGLPOqibaM", "annotations": [{"segment": [35.704542222505225, 99.91007867525585], "label": "Plastering"}]}, "_Rrs63vi29o": {"duration": 290.11, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=_Rrs63vi29o", "annotations": []}, "8ulb1O_5gRs": {"duration": 146.47, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8ulb1O_5gRs", "annotations": [{"segment": [11.33392598, 58.47862714508581], "label": "Paintball"}, {"segment": [83.83459438377537, 128.37886115444618], "label": "Paintball"}]}, "CXWX6FvV1qs": {"duration": 52.245, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=CXWX6FvV1qs", "annotations": []}, "rcpsp52nguY": {"duration": 162.38, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=rcpsp52nguY", "annotations": [{"segment": [18.238930034321374, 154.27094987363495], "label": "Shoveling snow"}]}, "9InwA-ajB2M": {"duration": 61.35, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=9InwA-ajB2M", "annotations": []}, "AmhfmeKk6Bg": {"duration": 22.5, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AmhfmeKk6Bg", "annotations": [{"segment": [1.0512168486739468, 22.040513260530417], "label": "Brushing hair"}]}, "mVC8DBsg1i0": {"duration": 165.001, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=mVC8DBsg1i0", "annotations": [{"segment": [14.113611605419294, 27.200782361251328], "label": "Grooming dog"}, {"segment": [45.42017694289985, 58.507347698731884], "label": "Grooming dog"}, {"segment": [86.47796754943174, 92.12341375782987], "label": "Grooming dog"}]}, "ZXWFGIQVY0s": {"duration": 121.28, "subset": "testing", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=ZXWFGIQVY0s", "annotations": []}, "O1AokzlsyK4": {"duration": 226.79, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=O1AokzlsyK4", "annotations": []}, "_z1FN-KUHXw": {"duration": 55.82, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=_z1FN-KUHXw", "annotations": []}, "3KsOJiA_uak": {"duration": 24.776, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3KsOJiA_uak", "annotations": [{"segment": [17.685911869289228, 20.72989244471691], "label": "Throwing darts"}]}, "3VYPdypGB_Q": {"duration": 44.676, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3VYPdypGB_Q", "annotations": [{"segment": [0.01, 40.46515444617784], "label": "Fun sliding down"}]}, "PEpfA3L4m20": {"duration": 23.22, "subset": "validation", "resolution": "1080x1920", "url": "https://www.youtube.com/watch?v=PEpfA3L4m20", "annotations": [{"segment": [0.01, 21.372385413398625], "label": "Welding"}]}, "T7Mg-Owb14Y": {"duration": 121.255, "subset": "validation", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=T7Mg-Owb14Y", "annotations": [{"segment": [40.48252739091677, 119.40834982082332], "label": "Kneeling"}]}, "YrMrSXH8k6k": {"duration": 58.561, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YrMrSXH8k6k", "annotations": [{"segment": [2.920486739469579, 55.59951482059282], "label": "Welding"}]}, "U8m-9nmAfGU": {"duration": 93.3, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=U8m-9nmAfGU", "annotations": [{"segment": [49.92366177223089, 84.12791983775351], "label": "Bungee jumping"}]}, "WlFBWlCklm8": {"duration": 161.658, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WlFBWlCklm8", "annotations": [{"segment": [0.2, 156.36112923556942], "label": "Powerbocking"}]}, "7ByEF57KBVs": {"duration": 151.65, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=7ByEF57KBVs", "annotations": []}, "0PLaN0hmMDY": {"duration": 234.73, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=0PLaN0hmMDY", "annotations": [{"segment": [25.633598296312805, 234.7305215419501], "label": "Wrapping presents"}]}, "3TbZ7uSYUe8": {"duration": 32.6, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=3TbZ7uSYUe8", "annotations": [{"segment": [0.01, 32.60081632653061], "label": "Playing water polo"}]}, "7Eh6c1eYMFk": {"duration": 70.47, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=7Eh6c1eYMFk", "annotations": [{"segment": [7.366086719188767, 61.23746720280811], "label": "Vacuuming floor"}]}, "zk7lA7a0u0U": {"duration": 140.02, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=zk7lA7a0u0U", "annotations": []}, "v9bcQsDl-yk": {"duration": 162.308, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=v9bcQsDl-yk", "annotations": [{"segment": [8.352500780031201, 162.241], "label": "Snow tubing"}]}, "1SouLWwpbvU": {"duration": 130.82, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=1SouLWwpbvU", "annotations": [{"segment": [0.01, 107.50553354134165], "label": "Doing step aerobics"}]}, "Kt1JpqwDvl8": {"duration": 46.51, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Kt1JpqwDvl8", "annotations": [{"segment": [1.015809622861105, 15.30970217312094], "label": "Hand car wash"}, {"segment": [21.549675570696298, 32.505907819032764], "label": "Hand car wash"}]}, "wqM6TdjBCfs": {"duration": 212.39, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wqM6TdjBCfs", "annotations": [{"segment": [6.626924342279814, 107.35617434493298], "label": "Chopping wood"}, {"segment": [131.21310197714033, 159.7088766489435], "label": "Chopping wood"}]}, "WidcZbpPEC0": {"duration": 92.23, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=WidcZbpPEC0", "annotations": [{"segment": [34.81994234633385, 58.70469618720748], "label": "Arm wrestling"}]}, "wAAu-2U5Pso": {"duration": 189.359, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=wAAu-2U5Pso", "annotations": [{"segment": [0.01, 188.96574102964118], "label": "Surfing"}]}, "6jgWCFWtCfU": {"duration": 56.611, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=6jgWCFWtCfU", "annotations": [{"segment": [0.01, 53.27732293291731], "label": "Drum corps"}]}, "K8W0PtyPlD4": {"duration": 113.43, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=K8W0PtyPlD4", "annotations": [{"segment": [7.34062268174727, 108.69428043213729], "label": "Volleyball"}]}, "p4rdUt1iDkY": {"duration": 37.478, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p4rdUt1iDkY", "annotations": [{"segment": [0.010000000000000002, 32.77476083627148], "label": "Kneeling"}]}, "msELZwMnoFo": {"duration": 147.446, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=msELZwMnoFo", "annotations": [{"segment": [4.8305475076145905, 76.8287079782511], "label": "Surfing"}, {"segment": [86.25977692168912, 106.96212264430577], "label": "Surfing"}, {"segment": [112.25272225897035, 119.84358257566304], "label": "Surfing"}]}, "hP-xRyx88aA": {"duration": 174.92000000000002, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=hP-xRyx88aA", "annotations": []}, "-Q1l3kk_Uew": {"duration": 40.24, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=-Q1l3kk_Uew", "annotations": []}, "bXLa6zLsJ50": {"duration": 119.19, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=bXLa6zLsJ50", "annotations": [{"segment": [24.42877248, 94.62335320000001], "label": "Getting a piercing"}]}, "ipYVfTYZze8": {"duration": 180.094, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ipYVfTYZze8", "annotations": [{"segment": [20.223014040561623, 167.401616224649], "label": "Skiing"}]}, "Ul8qLMmszx4": {"duration": 93.182, "subset": "validation", "resolution": "294x240", "url": "https://www.youtube.com/watch?v=Ul8qLMmszx4", "annotations": [{"segment": [0.01, 64.83469228393135], "label": "Removing ice from car"}, {"segment": [69.92261656630265, 89.838205900156], "label": "Removing ice from car"}]}, "p8UOE62POAE": {"duration": 18.948, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=p8UOE62POAE", "annotations": [{"segment": [0.009999999999999998, 18.901], "label": "Calf roping"}]}, "TQco5PWc2JU": {"duration": 229.51, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TQco5PWc2JU", "annotations": [{"segment": [42.24904730733229, 206.23263770358815], "label": "Shaving"}]}, "JQavlg895jU": {"duration": 165.51, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=JQavlg895jU", "annotations": [{"segment": [11.840656118957734, 163.45258584679598], "label": "Cheerleading"}]}, "9IIcG8AiUnA": {"duration": 132.45, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=9IIcG8AiUnA", "annotations": [{"segment": [79.7418190327613, 123.3312589703588], "label": "Polishing forniture"}]}, "o9LZZeQZ-Zc": {"duration": 66.6, "subset": "training", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=o9LZZeQZ-Zc", "annotations": [{"segment": [53.50442579563183, 63.78974259906397], "label": "Kayaking"}]}, "toOQD4oLneA": {"duration": 157.94, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=toOQD4oLneA", "annotations": []}, "ODW0I_ITug0": {"duration": 191.73, "subset": "training", "resolution": "640x356", "url": "https://www.youtube.com/watch?v=ODW0I_ITug0", "annotations": [{"segment": [25.719634945397814, 161.7944477379095], "label": "Playing bagpipes"}]}, "GPSAx3o5Y6s": {"duration": 230.32, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=GPSAx3o5Y6s", "annotations": []}, "u0A5NIUKEQo": {"duration": 165.68, "subset": "testing", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=u0A5NIUKEQo", "annotations": []}, "awqVlr-YDM4": {"duration": 132.076, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=awqVlr-YDM4", "annotations": [{"segment": [4.813947672292892, 123.10538917101074], "label": "Skiing"}]}, "LZleSe6Kovg": {"duration": 27.75, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=LZleSe6Kovg", "annotations": [{"segment": [3.2899159812792513, 25.15054190951638], "label": "Doing karate"}]}, "_XRJk2oFwZw": {"duration": 227.79, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_XRJk2oFwZw", "annotations": [{"segment": [12.96780655226209, 199.52026875351012], "label": "Cricket"}]}, "sEpgbMTaKvY": {"duration": 73.63, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sEpgbMTaKvY", "annotations": []}, "aeMXJq-ouaA": {"duration": 94.436, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=aeMXJq-ouaA", "annotations": []}, "YFTQImGp_nY": {"duration": 160.38, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=YFTQImGp_nY", "annotations": [{"segment": [54.73557507214464, 85.15038378132039], "label": "Hula hoop"}, {"segment": [89.3885128637465, 101.35499497883204], "label": "Hula hoop"}]}, "FxGXQIH3-RQ": {"duration": 159.8, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=FxGXQIH3-RQ", "annotations": [{"segment": [0.01, 159.799749], "label": "Disc dog"}]}, "DsqNpc9JChY": {"duration": 222.89, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DsqNpc9JChY", "annotations": [{"segment": [2.4340360904836196, 222.68816326530614], "label": "Washing dishes"}]}, "dtEs78TCOZQ": {"duration": 61.19, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=dtEs78TCOZQ", "annotations": [{"segment": [0, 40.418916378734274], "label": "Grooming horse"}]}, "_gDw8q2sRLk": {"duration": 75.047, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=_gDw8q2sRLk", "annotations": [{"segment": [7.839418096723869, 75.001], "label": "Rope skipping"}]}, "WQXoBfyUpaY": {"duration": 159.34, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=WQXoBfyUpaY", "annotations": [{"segment": [13.671520017263276, 150.63529283307332], "label": "Cumbia"}]}, "_gJFrtUQDLw": {"duration": 226.49, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=_gJFrtUQDLw", "annotations": [{"segment": [18.18541856318253, 27.013291652106084], "label": "Playing harmonica"}]}, "yYFMmKk0Ets": {"duration": 74.908, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=yYFMmKk0Ets", "annotations": []}, "MjmDj36sVxM": {"duration": 32.76, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=MjmDj36sVxM", "annotations": [{"segment": [3.271224312012481, 4.395707669266771], "label": "Walking the dog"}, {"segment": [7.360254702028082, 10.01812445553822], "label": "Walking the dog"}, {"segment": [14.464945004680189, 16.45834731981279], "label": "Walking the dog"}]}, "TVeIcQOvMZI": {"duration": 93.809, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=TVeIcQOvMZI", "annotations": [{"segment": [7.463712099504388, 17.268980608424336], "label": "Croquet"}, {"segment": [7.756406544461778, 16.829938728549145], "label": "Croquet"}, {"segment": [44.48957716068643, 46.977481146645864], "label": "Croquet"}, {"segment": [71.27113183307333, 76.83232897815913], "label": "Croquet"}, {"segment": [81.80813695007801, 92.19879477379096], "label": "Croquet"}]}, "w2zTEQMg_v0": {"duration": 142.037, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=w2zTEQMg_v0", "annotations": [{"segment": [7.9770868222484, 142.036461], "label": "Elliptical trainer"}]}, "dXdhOe0DPC4": {"duration": 198.16, "subset": "training", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=dXdhOe0DPC4", "annotations": [{"segment": [54.537857330733225, 166.11272652], "label": "Polishing forniture"}]}, "rE8IwfGCw4k": {"duration": 43.491, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=rE8IwfGCw4k", "annotations": []}, "hvy_V1EWKEI": {"duration": 222.77, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hvy_V1EWKEI", "annotations": [{"segment": [12.163851021840875, 58.734023505460215], "label": "Getting a haircut"}]}, "4efHnJ-T6co": {"duration": 203.895, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4efHnJ-T6co", "annotations": []}, "DQVkDzj4cPE": {"duration": 178.539, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=DQVkDzj4cPE", "annotations": [{"segment": [0.01, 170.8622345028456], "label": "Swimming"}]}, "u9tarG7QQPA": {"duration": 226.627, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=u9tarG7QQPA", "annotations": []}, "UXAw9BjQpbg": {"duration": 193.52, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=UXAw9BjQpbg", "annotations": []}, "pnN9AK7WKHU": {"duration": 69.06, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pnN9AK7WKHU", "annotations": [{"segment": [1.238059016380655, 68.41621781825272], "label": "Windsurfing"}]}, "5rlLaok_U5M": {"duration": 68.28999999999999, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=5rlLaok_U5M", "annotations": [{"segment": [0.01, 68.289885], "label": "Playing badminton"}]}, "ulopyhvgyQg": {"duration": 40.566, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ulopyhvgyQg", "annotations": [{"segment": [18.016357254290174, 29.662603744149763], "label": "Hand car wash"}]}, "2B_bp8h6gxc": {"duration": 88.72, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=2B_bp8h6gxc", "annotations": []}, "ttsZ4M-AKgY": {"duration": 128.337, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ttsZ4M-AKgY", "annotations": [{"segment": [34.03625137285491, 101.30790114508581], "label": "Longboarding"}]}, "XyQSmMYbP6o": {"duration": 124.55, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=XyQSmMYbP6o", "annotations": [{"segment": [60.80354602184087, 62.940411856474256], "label": "Washing face"}]}, "wt0XC2EEh7Y": {"duration": 97.66, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=wt0XC2EEh7Y", "annotations": [{"segment": [21.17801159282371, 95.52959186115444], "label": "Grooming horse"}]}, "2sbF8W0_bbg": {"duration": 14.26, "subset": "validation", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=2sbF8W0_bbg", "annotations": [{"segment": [0.01, 12.778932917316693], "label": "Smoking hookah"}]}, "pRxS8ScXsvw": {"duration": 97.92, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=pRxS8ScXsvw", "annotations": []}, "kxUkqmzS2wc": {"duration": 92.18, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=kxUkqmzS2wc", "annotations": [{"segment": [14.589503274570985, 73.52247216692669], "label": "Shaving"}]}, "lQP65cm11FA": {"duration": 115.589, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lQP65cm11FA", "annotations": [{"segment": [24.52432926160584, 49.5896358424337], "label": "Blowing leaves"}]}, "bay60VGnTls": {"duration": 101.22, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=bay60VGnTls", "annotations": [{"segment": [2.94029575, 49.44512854212169], "label": "Playing badminton"}]}, "HGtfDm5EOtc": {"duration": 150.141, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=HGtfDm5EOtc", "annotations": [{"segment": [6.790840873634946, 138.41109360374415], "label": "Capoeira"}]}, "46l7dmH8pCg": {"duration": 105.094, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=46l7dmH8pCg", "annotations": [{"segment": [53.1204189881881, 64.43309982683307], "label": "Croquet"}, {"segment": [78.04110818720748, 97.55138523400936], "label": "Croquet"}]}, "R3HC-IAZVZg": {"duration": 119.93, "subset": "validation", "resolution": "426x234", "url": "https://www.youtube.com/watch?v=R3HC-IAZVZg", "annotations": [{"segment": [60.620382612168484, 79.33037724555382], "label": "Spinning"}, {"segment": [90.93057391825273, 95.60807257659906], "label": "Spinning"}, {"segment": [106.2727695176287, 109.0792687126365], "label": "Spinning"}]}, "AjohJX4RedM": {"duration": 61.348, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=AjohJX4RedM", "annotations": [{"segment": [17.82980733229329, 42.399568642745706], "label": "Using the rowing machine"}]}, "LZC9MLWo9bE": {"duration": 89.91, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=LZC9MLWo9bE", "annotations": [{"segment": [6.7325551076443055, 83.45563102184087], "label": "Chopping wood"}]}, "-E0tJqqOh_4": {"duration": 143.616, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=-E0tJqqOh_4", "annotations": []}, "EyCFMW7OjyM": {"duration": 105.419, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EyCFMW7OjyM", "annotations": []}, "4Rto4Aa6fxw": {"duration": 52.153, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=4Rto4Aa6fxw", "annotations": [{"segment": [10.46284509677041, 32.52404046846559], "label": "Hurling"}, {"segment": [42.01359877173153, 50.205439700191874], "label": "Hurling"}]}, "HVD8N0bbPVo": {"duration": 148.79, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=HVD8N0bbPVo", "annotations": [{"segment": [40.622086856474255, 43.63972759438377], "label": "Doing karate"}]}, "YGVzktgIEwA": {"duration": 4.064, "subset": "testing", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=YGVzktgIEwA", "annotations": []}, "tnWmUCOEdFY": {"duration": 15.14, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=tnWmUCOEdFY", "annotations": [{"segment": [1.1970280811232448, 14.498317519999999], "label": "Brushing hair"}]}, "xNX6EO_Av5U": {"duration": 104.676, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=xNX6EO_Av5U", "annotations": []}, "QBlveXzAZsY": {"duration": 213.694, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=QBlveXzAZsY", "annotations": [{"segment": [50.993873634945395, 153.32926677067084], "label": "Assembling bicycle"}]}, "yJezvcXU4YE": {"duration": 27.795, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=yJezvcXU4YE", "annotations": [{"segment": [5.385151545206706, 14.095151545206706], "label": "Throwing darts"}]}, "zwX7e_5Koh0": {"duration": 192.59, "subset": "training", "resolution": "854x470", "url": "https://www.youtube.com/watch?v=zwX7e_5Koh0", "annotations": [{"segment": [11.416972661466458, 179.36664944461776], "label": "Doing step aerobics"}]}, "wWtOjL3U7iM": {"duration": 16.626, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=wWtOjL3U7iM", "annotations": []}, "8E5JYKost84": {"duration": 205.75, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=8E5JYKost84", "annotations": []}, "-vDMeHr1ZfI": {"duration": 229.09, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-vDMeHr1ZfI", "annotations": [{"segment": [55.03818933709729, 97.56769927939975], "label": "Kayaking"}, {"segment": [109.36159699449202, 125.44418478779968], "label": "Kayaking"}, {"segment": [130.80504738556888, 160.8258779330765], "label": "Kayaking"}, {"segment": [180.1249832850457, 189.41714512117898], "label": "Kayaking"}]}, "0ufZs4e24Xs": {"duration": 174.06, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0ufZs4e24Xs", "annotations": [{"segment": [6.51694652275887, 162.10904475362688], "label": "Doing kickboxing"}]}, "uLeJBFypCHE": {"duration": 233.652, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=uLeJBFypCHE", "annotations": [{"segment": [0.01, 233.641], "label": "Wakeboarding"}]}, "eaI8My4pGq4": {"duration": 182.881, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=eaI8My4pGq4", "annotations": [{"segment": [21.673753510140404, 51.617755070202804], "label": "Snowboarding"}, {"segment": [62.16945085803432, 90.68754758190326], "label": "Snowboarding"}, {"segment": [126.04998751950077, 140.59421684867394], "label": "Snowboarding"}, {"segment": [153.42736037441497, 163.1235132605304], "label": "Snowboarding"}]}, "hEabp9fbqtw": {"duration": 200.09, "subset": "training", "resolution": "654x480", "url": "https://www.youtube.com/watch?v=hEabp9fbqtw", "annotations": [{"segment": [0.19999999999999998, 199.841], "label": "Tai chi"}]}, "j5WshG7CEYY": {"duration": 57.261, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=j5WshG7CEYY", "annotations": [{"segment": [0.2, 57.020999999999994], "label": "Table soccer"}]}, "ZUl43hNzGjA": {"duration": 137.97, "subset": "testing", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=ZUl43hNzGjA", "annotations": []}, "9wRQsxVFwkE": {"duration": 56.15, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9wRQsxVFwkE", "annotations": [{"segment": [0.01, 55.488916289391575], "label": "Mowing the lawn"}]}, "ByTL6_iHoIA": {"duration": 161.217, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=ByTL6_iHoIA", "annotations": [{"segment": [5.533159427057354, 138.07747832293293], "label": "Scuba diving"}]}, "4QgjQuoEyEA": {"duration": 70.403, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=4QgjQuoEyEA", "annotations": []}, "FelGHxSz9Ho": {"duration": 118.074, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=FelGHxSz9Ho", "annotations": []}, "az-UGTdlbaA": {"duration": 117.38, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=az-UGTdlbaA", "annotations": [{"segment": [11.719375550532225, 109.86914414976599], "label": "Doing kickboxing"}]}, "Io1OF2OkTWY": {"duration": 78.948, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Io1OF2OkTWY", "annotations": [{"segment": [26.110481001552987, 32.88442654440872], "label": "Scuba diving"}]}, "Et4vNJN26Sg": {"duration": 206.1, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Et4vNJN26Sg", "annotations": []}, "-Kmx-BCAsQc": {"duration": 65.16, "subset": "training", "resolution": "426x238", "url": "https://www.youtube.com/watch?v=-Kmx-BCAsQc", "annotations": [{"segment": [3.9642018741974168, 44.31774402948907], "label": "Using the pommel horse"}]}, "M6sXLVNtTSA": {"duration": 51.688, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=M6sXLVNtTSA", "annotations": [{"segment": [47.63128081123245, 51.440999999999995], "label": "Camel ride"}]}, "A5-lBFZJ5R0": {"duration": 121.046, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=A5-lBFZJ5R0", "annotations": []}, "9I42aiA-UcY": {"duration": 56.66, "subset": "validation", "resolution": "568x320", "url": "https://www.youtube.com/watch?v=9I42aiA-UcY", "annotations": [{"segment": [3.97101904, 24.91900848], "label": "Bathing dog"}, {"segment": [29.890572480000003, 50.73704368174727], "label": "Bathing dog"}]}, "_Ga4HoMl6yM": {"duration": 35.248, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=_Ga4HoMl6yM", "annotations": [{"segment": [0.2, 29.64775794392523], "label": "Hitting a pinata"}]}, "Qwj7f4vqveo": {"duration": 83.36, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=Qwj7f4vqveo", "annotations": [{"segment": [0.01, 59.089214216677945], "label": "Gargling mouthwash"}]}, "qYD6UnDI63E": {"duration": 131.94, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=qYD6UnDI63E", "annotations": []}, "vMYPNyBR3d0": {"duration": 12.191, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=vMYPNyBR3d0", "annotations": [{"segment": [9.949858099688473, 11.99047619047619], "label": "Applying sunscreen"}]}, "sgPkVKPp1dU": {"duration": 63.74, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=sgPkVKPp1dU", "annotations": [{"segment": [10.142519659970072, 34.007270995319814], "label": "Snatch"}, {"segment": [42.26049758190328, 55.68442034321373], "label": "Snatch"}]}, "wBwBm7Ju9a8": {"duration": 131.63, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=wBwBm7Ju9a8", "annotations": [{"segment": [86.66070930109204, 110.07142224024959], "label": "Springboard diving"}]}, "lhH3qdh8n3g": {"duration": 89.0, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=lhH3qdh8n3g", "annotations": [{"segment": [0.01, 89.002108], "label": "Doing step aerobics"}]}, "qt-0mo_8ufU": {"duration": 161.727, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=qt-0mo_8ufU", "annotations": [{"segment": [31.942947845804987, 130.28698412698412], "label": "Skiing"}]}, "r79KH1TuGMA": {"duration": 195.72, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=r79KH1TuGMA", "annotations": []}, "me4rMjlnTlI": {"duration": 33.902, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=me4rMjlnTlI", "annotations": [{"segment": [0.01, 33.861], "label": "Tug of war"}]}, "cWdWucjUhks": {"duration": 69.64, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=cWdWucjUhks", "annotations": [{"segment": [43.395762479999995, 55.94138424], "label": "Using parallel bars"}, {"segment": [43.395762479999995, 55.94138424], "label": "Using parallel bars"}]}, "zC8yZvN6zTs": {"duration": 20.85, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=zC8yZvN6zTs", "annotations": []}, "AF8sbdPa4rE": {"duration": 23.035, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=AF8sbdPa4rE", "annotations": [{"segment": [11.678803510140405, 22.83419501133787], "label": "Throwing darts"}]}, "8iTz6Jy3lJg": {"duration": 189.081, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=8iTz6Jy3lJg", "annotations": [{"segment": [157.90980545546938, 160.8503978193885], "label": "Wakeboarding"}]}, "UCzKdpP9sLE": {"duration": 87.82, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=UCzKdpP9sLE", "annotations": [{"segment": [2.9092940625698764, 24.24411649247642], "label": "Putting on makeup"}, {"segment": [48.72445761397175, 54.19492490369138], "label": "Putting on makeup"}]}, "WKXIl7wvlk0": {"duration": 68.2, "subset": "validation", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=WKXIl7wvlk0", "annotations": [{"segment": [0, 56.81310707122163], "label": "Playing field hockey"}]}, "dGHryLMDBIU": {"duration": 58.21, "subset": "validation", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=dGHryLMDBIU", "annotations": [{"segment": [0.7275983, 26.6197789], "label": "Smoking hookah"}]}, "9tAyfaBGwyg": {"duration": 134.19, "subset": "training", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=9tAyfaBGwyg", "annotations": [{"segment": [118.69682023712949, 130.83864664586585], "label": "Hopscotch"}]}, "4vOxhqUbHL8": {"duration": 158.453, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=4vOxhqUbHL8", "annotations": [{"segment": [6.921502546021841, 138.6772474399376], "label": "Playing congas"}]}, "rQlHlit_sFE": {"duration": 36.665, "subset": "testing", "resolution": "480x360", "url": "https://www.youtube.com/watch?v=rQlHlit_sFE", "annotations": []}, "M4IUb6kp2yo": {"duration": 70.427, "subset": "training", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=M4IUb6kp2yo", "annotations": [{"segment": [16.80995007800312, 42.299547581903276], "label": "Trimming branches or hedges"}]}, "K_0DLTcFy-8": {"duration": 180.466, "subset": "training", "resolution": "270x480", "url": "https://www.youtube.com/watch?v=K_0DLTcFy-8", "annotations": [{"segment": [54.05519182399949, 180.46551], "label": "Ice fishing"}]}, "p1-APric9UM": {"duration": 114.82, "subset": "training", "resolution": "272x480", "url": "https://www.youtube.com/watch?v=p1-APric9UM", "annotations": [{"segment": [15.57904896, 47.73609828], "label": "Drinking beer"}, {"segment": [56.16173664, 113.21052184711388], "label": "Drinking beer"}]}, "Xv2-EY_zKNM": {"duration": 49.807, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=Xv2-EY_zKNM", "annotations": [{"segment": [0.01, 49.022889235569416], "label": "Hurling"}]}, "0bXzFAKRoBU": {"duration": 31.185, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0bXzFAKRoBU", "annotations": [{"segment": [0.2, 21.3724846105919], "label": "Playing drums"}]}, "D9eo9NfFhkg": {"duration": 120.605, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=D9eo9NfFhkg", "annotations": [{"segment": [0.01, 106.23145865834633], "label": "Having an ice cream"}]}, "t3Zg4mSksXk": {"duration": 187.943, "subset": "training", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=t3Zg4mSksXk", "annotations": [{"segment": [0.19999999999999998, 180.29130218068536], "label": "Longboarding"}]}, "6e2y5J4Yv8g": {"duration": 133.24, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=6e2y5J4Yv8g", "annotations": []}, "EZdnNs-5tlo": {"duration": 156.944, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=EZdnNs-5tlo", "annotations": [{"segment": [12.971439937597504, 156.881], "label": "Riding bumper cars"}]}, "de6ieO3-H40": {"duration": 124.85, "subset": "testing", "resolution": "600x480", "url": "https://www.youtube.com/watch?v=de6ieO3-H40", "annotations": []}, "xWSO0Inej7s": {"duration": 82.106, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=xWSO0Inej7s", "annotations": []}, "WVJEFQpO5jg": {"duration": 205.17000000000002, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=WVJEFQpO5jg", "annotations": [{"segment": [20.165063516826386, 30.087555088598098], "label": "Shot put"}, {"segment": [38.72972516723798, 42.57068964663348], "label": "Shot put"}, {"segment": [50.25261860542449, 68.81728025583608], "label": "Shot put"}, {"segment": [77.45945033447596, 105.6265231833763], "label": "Shot put"}, {"segment": [112.98837176888435, 147.23697171016093], "label": "Shot put"}, {"segment": [154.91890066895192, 172.52332119951464], "label": "Shot put"}, {"segment": [180.84541090487156, 195.8891884491706], "label": "Shot put"}]}, "3Y_4Azzta6Q": {"duration": 171.805, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=3Y_4Azzta6Q", "annotations": [{"segment": [44.49225862367828, 162.21770462408222], "label": "Making a cake"}]}, "539ByIcbUPs": {"duration": 116.24, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=539ByIcbUPs", "annotations": [{"segment": [20.672787205365765, 38.98815130836526], "label": "Making a lemonade"}, {"segment": [42.977636558523564, 52.40732896798865], "label": "Making a lemonade"}, {"segment": [56.578154456790514, 70.17867235505747], "label": "Making a lemonade"}, {"segment": [73.44279665064153, 96.47300695837357], "label": "Making a lemonade"}, {"segment": [97.74238862887849, 103.54527626547238], "label": "Making a lemonade"}]}, "OYAyb_Ire24": {"duration": 180.09, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=OYAyb_Ire24", "annotations": [{"segment": [8.492111637639185, 172.59642736193825], "label": "Putting on makeup"}]}, "CN0F45eLq1s": {"duration": 136.42000000000002, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=CN0F45eLq1s", "annotations": [{"segment": [0, 126.62753984399377], "label": "Sailing"}]}, "KzK4sf7RmRM": {"duration": 147.238, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=KzK4sf7RmRM", "annotations": [{"segment": [15.160158977787683, 134.14443701557585], "label": "Making a cake"}]}, "0QNcOwi5bu8": {"duration": 33.669, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=0QNcOwi5bu8", "annotations": [{"segment": [10.610107354134165, 33.668707], "label": "Plastering"}]}, "DBGraxQspow": {"duration": 77.927, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DBGraxQspow", "annotations": []}, "Rl6US0JizDs": {"duration": 71.52, "subset": "training", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=Rl6US0JizDs", "annotations": [{"segment": [31.103929797191885, 64.4555663026521], "label": "Mixing drinks"}]}, "RkBHHWFh2rk": {"duration": 158.802, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=RkBHHWFh2rk", "annotations": [{"segment": [54.24127769110765, 113.18841965678628], "label": "Longboarding"}]}, "akwJwcvfjLA": {"duration": 125.57, "subset": "validation", "resolution": "848x480", "url": "https://www.youtube.com/watch?v=akwJwcvfjLA", "annotations": [{"segment": [17.265913633680242, 21.96025056825968], "label": "Playing squash"}, {"segment": [38.97722195611015, 40.34640356202916], "label": "Playing squash"}, {"segment": [58.92815392807277, 63.23129611810392], "label": "Playing squash"}]}, "mShwD_I43ao": {"duration": 183.717, "subset": "validation", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=mShwD_I43ao", "annotations": [{"segment": [65.07333699739688, 158.9346140067427], "label": "Rock-paper-scissors"}]}, "3SbuecocgOA": {"duration": 121.464, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=3SbuecocgOA", "annotations": []}, "XC6tvSBS0PA": {"duration": 179.677, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=XC6tvSBS0PA", "annotations": [{"segment": [16.252758190327615, 29.703316692667705], "label": "Swimming"}, {"segment": [36.7088159126365, 71.73631201248051], "label": "Swimming"}, {"segment": [77.62093135725429, 85.46709048361934], "label": "Swimming"}, {"segment": [93.87368954758192, 170.39271138845552], "label": "Swimming"}]}, "iAWXaWo82u4": {"duration": 43.89, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=iAWXaWo82u4", "annotations": [{"segment": [0, 43.885463], "label": "Bathing dog"}]}, "CPnLc0MtBYc": {"duration": 15.0, "subset": "validation", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=CPnLc0MtBYc", "annotations": [{"segment": [3.100642732059282, 13.420517787441499], "label": "High jump"}]}, "-XqCigUiMFI": {"duration": 114.38, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=-XqCigUiMFI", "annotations": []}, "YEZrwxz0Ysk": {"duration": 132.08, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=YEZrwxz0Ysk", "annotations": [{"segment": [31.00983260608424, 123.31817152652106], "label": "Mixing drinks"}]}, "i49blayQ93Q": {"duration": 201.503, "subset": "validation", "resolution": "426x240", "url": "https://www.youtube.com/watch?v=i49blayQ93Q", "annotations": [{"segment": [44.310734789391574, 144.24558346333853], "label": "Camel ride"}]}, "FKQIdqjY9nI": {"duration": 212.09, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=FKQIdqjY9nI", "annotations": [{"segment": [0.6617505951938758, 208.4514374860709], "label": "Playing bagpipes"}]}, "qV3e2L59_ic": {"duration": 178.59, "subset": "testing", "resolution": "720x480", "url": "https://www.youtube.com/watch?v=qV3e2L59_ic", "annotations": []}, "pi6sBUrSNGk": {"duration": 181.4, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=pi6sBUrSNGk", "annotations": [{"segment": [7.64063007418256, 11.602438260795738], "label": "Washing hands"}, {"segment": [146.3039164547582, 174.88553262870514], "label": "Washing hands"}]}, "PUJYZEq8H64": {"duration": 104.58, "subset": "validation", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=PUJYZEq8H64", "annotations": [{"segment": [27.736434602184087, 100.99325305148206], "label": "Playing polo"}]}, "PUI2Qrn0qbE": {"duration": 37.756, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=PUI2Qrn0qbE", "annotations": [{"segment": [0.2, 35.172054088483016], "label": "Riding bumper cars"}]}, "zWA4Fb4fVmE": {"duration": 60.77, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zWA4Fb4fVmE", "annotations": [{"segment": [2.1787566302652106, 59.96317160686427], "label": "Playing saxophone"}]}, "gKpfkH32eRM": {"duration": 132.19, "subset": "testing", "resolution": "640x360", "url": "https://www.youtube.com/watch?v=gKpfkH32eRM", "annotations": []}, "YiBenqCKGcA": {"duration": 105.4, "subset": "validation", "resolution": "1440x1080", "url": "https://www.youtube.com/watch?v=YiBenqCKGcA", "annotations": [{"segment": [5.9716807, 56.61856586349453], "label": "Tumbling"}, {"segment": [57.07560829999999, 101.32175864976598], "label": "Tumbling"}]}, "2UbwK1Qtveg": {"duration": 6.48, "subset": "validation", "resolution": "480x480", "url": "https://www.youtube.com/watch?v=2UbwK1Qtveg", "annotations": [{"segment": [0.8388523837753511, 5.457593822152886], "label": "Shaving"}]}, "4E8rMLHIpag": {"duration": 125.33, "subset": "training", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=4E8rMLHIpag", "annotations": [{"segment": [75.00646689999999, 85.9459812], "label": "Playing violin"}]}, "PqcdYoa--8g": {"duration": 91.56, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=PqcdYoa--8g", "annotations": [{"segment": [62.98951536505461, 72.13085092823714], "label": "Preparing pasta"}, {"segment": [6.998835040561622, 56.27634706084244], "label": "Preparing pasta"}]}, "mGgidUE8drE": {"duration": 153.88, "subset": "training", "resolution": "480x270", "url": "https://www.youtube.com/watch?v=mGgidUE8drE", "annotations": [{"segment": [6.15204244, 135.95263461622466], "label": "Fixing bicycle"}]}, "RarzzsTjl94": {"duration": 170.13, "subset": "testing", "resolution": "648x480", "url": "https://www.youtube.com/watch?v=RarzzsTjl94", "annotations": []}, "zFUUBWJ58UA": {"duration": 132.285, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=zFUUBWJ58UA", "annotations": [{"segment": [62.53054062870514, 78.62751148361934], "label": "Painting furniture"}]}, "o2EUvWyuDAU": {"duration": 91.116, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=o2EUvWyuDAU", "annotations": []}, "b-3l2qIHL5w": {"duration": 117.842, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=b-3l2qIHL5w", "annotations": [{"segment": [4.844975980966633, 112.388754797166], "label": "Kneeling"}]}, "hQb3VtvSqaE": {"duration": 31.6, "subset": "testing", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=hQb3VtvSqaE", "annotations": []}, "7rT8tqLyFa0": {"duration": 139.32, "subset": "training", "resolution": "960x720", "url": "https://www.youtube.com/watch?v=7rT8tqLyFa0", "annotations": [{"segment": [20.702567245744305, 135.28281180462707], "label": "Using the rowing machine"}]}, "kxXoosv0iDg": {"duration": 116.867, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=kxXoosv0iDg", "annotations": [{"segment": [0.01, 109.79718314369892], "label": "Baton twirling"}]}, "sMVf7HDvsEc": {"duration": 168.879, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=sMVf7HDvsEc", "annotations": [{"segment": [0.01, 5.005765792536463], "label": "Decorating the Christmas tree"}, {"segment": [9.484608870069088, 21.86729267265929], "label": "Decorating the Christmas tree"}, {"segment": [46.632660277839676, 87.99609340564098], "label": "Decorating the Christmas tree"}]}, "DpI9JtaMtec": {"duration": 173.82999999999998, "subset": "training", "resolution": "1280x720", "url": "https://www.youtube.com/watch?v=DpI9JtaMtec", "annotations": [{"segment": [41.21893328522257, 164.604555475819], "label": "Making a sandwich"}]}, "DkiJwIJQKaM": {"duration": 106.905, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=DkiJwIJQKaM", "annotations": [{"segment": [17.845241326053042, 106.90467], "label": "Disc dog"}]}, "t0ZuC58UIOM": {"duration": 99.614, "subset": "validation", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=t0ZuC58UIOM", "annotations": [{"segment": [4.957439897615345, 15.956764915336274], "label": "Bullfighting"}, {"segment": [17.815805763401784, 29.434811063811217], "label": "Bullfighting"}, {"segment": [32.84305261859799, 44.15221777766317], "label": "Bullfighting"}, {"segment": [51.27854028618512, 59.64422398136396], "label": "Bullfighting"}]}, "uWBPngv3j0M": {"duration": 77.811, "subset": "testing", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=uWBPngv3j0M", "annotations": []}, "w28ML-KpYdI": {"duration": 7.013, "subset": "training", "resolution": "192x144", "url": "https://www.youtube.com/watch?v=w28ML-KpYdI", "annotations": [{"segment": [0.2, 6.781], "label": "Painting fence"}]}, "GeKKB4pE8qA": {"duration": 207.07999999999998, "subset": "testing", "resolution": "320x240", "url": "https://www.youtube.com/watch?v=GeKKB4pE8qA", "annotations": []}, "f0JySdrM_l8": {"duration": 78.9, "subset": "training", "resolution": "640x480", "url": "https://www.youtube.com/watch?v=f0JySdrM_l8", "annotations": [{"segment": [6.454890809999999, 72.85109516380656], "label": "Discus throw"}]}, "ukXZ8nIRAJQ": {"duration": 174.475, "subset": "testing", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=ukXZ8nIRAJQ", "annotations": []}, "pG0uYjcjBR8": {"duration": 175.171, "subset": "training", "resolution": "384x288", "url": "https://www.youtube.com/watch?v=pG0uYjcjBR8", "annotations": [{"segment": [10.111274340093605, 160.41400101716067], "label": "Capoeira"}]}, "IWHN3TD8mMQ": {"duration": 139.95, "subset": "training", "resolution": "1920x1080", "url": "https://www.youtube.com/watch?v=IWHN3TD8mMQ", "annotations": [{"segment": [37.4005685, 119.18387831513259], "label": "Preparing pasta"}]}, "O_VBQsTRjzM": {"duration": 79.9, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=O_VBQsTRjzM", "annotations": []}, "aVu2j7JbYgk": {"duration": 25.682, "subset": "testing", "resolution": "854x480", "url": "https://www.youtube.com/watch?v=aVu2j7JbYgk", "annotations": []}}}
\ No newline at end of file
+{"taxonomy":[{"parentName":"Health-related self care","nodeName":"Applying sunscreen","nodeId":389,"parentId":269},{"parentName":"Health-related Self Care","nodeName":"Health-related self care","nodeId":269,"parentId":270},{"parentName":"Personal Care","nodeName":"Health-related Self Care","nodeId":270,"parentId":2},{"parentName":"Wrestling","nodeName":"Arm wrestling","nodeId":271,"parentId":272},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Wrestling","nodeId":272,"parentId":17},{"parentName":"Vehicle repair and maintenance (by self)","nodeName":"Assembling bicycle","nodeId":273,"parentId":56},{"parentName":"Biking","nodeName":"BMX","nodeId":274,"parentId":275},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Biking","nodeId":275,"parentId":17},{"parentName":"Food and drink preparation ","nodeName":"Baking cookies","nodeId":276,"parentId":31},{"parentName":"Playing sports","nodeName":"Baton twirling","nodeId":277,"parentId":64},{"parentName":"Playing soccer","nodeName":"Beach soccer","nodeId":278,"parentId":279},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Playing soccer","nodeId":279,"parentId":17},{"parentName":"Playing games","nodeName":"Beer pong","nodeId":280,"parentId":48},{"parentName":"Washing, dressing and grooming oneself","nodeName":"Blow-drying hair","nodeId":281,"parentId":62},{"parentName":"Lawn, garden, and houseplant care","nodeName":"Blowing leaves","nodeId":282,"parentId":66},{"parentName":"Bowling","nodeName":"Playing ten pins","nodeId":283,"parentId":284},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Bowling","nodeId":284,"parentId":17},{"parentName":"Washing, dressing and grooming oneself","nodeName":"Braiding hair","nodeId":285,"parentId":62},{"parentName":"Beach activities","nodeName":"Building sandcastles","nodeId":286,"parentId":287},{"parentName":"Arts and Entertainment","nodeName":"Beach activities","nodeId":287,"parentId":21},{"parentName":"Participating in rodeo competitions","nodeName":"Bullfighting","nodeId":288,"parentId":289},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Participating in rodeo competitions","nodeId":289,"parentId":17},{"parentName":"Participating in rodeo competitions","nodeName":"Calf roping","nodeId":290,"parentId":289},{"parentName":"Playing sports","nodeName":"Camel ride","nodeId":291,"parentId":64},{"parentName":"Boating","nodeName":"Canoeing","nodeId":292,"parentId":293},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Boating","nodeId":293,"parentId":17},{"parentName":"Participating in martial arts","nodeName":"Capoeira","nodeId":294,"parentId":27},{"parentName":"Interior arrangement, decoration, & repairs","nodeName":"Carving jack-o-lanterns","nodeId":295,"parentId":33},{"parentName":"Vehicle repair and maintenance (by self)","nodeName":"Changing car wheel","nodeId":296,"parentId":56},{"parentName":"Interior arrangement, decoration, & repairs","nodeName":"Cleaning sink","nodeId":297,"parentId":33},{"parentName":"Care for animals and pets (not veterinary care)","nodeName":"Clipping cat claws","nodeId":298,"parentId":65},{"parentName":"Playing sports","nodeName":"Croquet","nodeId":299,"parentId":64},{"parentName":"Playing sports","nodeName":"Curling","nodeId":300,"parentId":64},{"parentName":"Lawn, garden, and houseplant care","nodeName":"Cutting the grass","nodeId":301,"parentId":66},{"parentName":"Interior arrangement, decoration, & repairs","nodeName":"Decorating the Christmas tree","nodeId":302,"parentId":33},{"parentName":"Walking / exercising / playing with animals","nodeName":"Disc dog","nodeId":303,"parentId":26},{"parentName":"Playing sports","nodeName":"Doing a powerbomb","nodeId":304,"parentId":64},{"parentName":"Working out","nodeName":"Doing crunches","nodeId":305,"parentId":306},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Working out","nodeId":306,"parentId":17},{"parentName":"Playing musical instruments","nodeName":"Drum corps","nodeId":307,"parentId":61},{"parentName":"Using cardiovascular equipment","nodeName":"Elliptical trainer","nodeId":308,"parentId":58},{"parentName":"Fencing","nodeName":"Doing fencing","nodeId":309,"parentId":310},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Fencing","nodeId":310,"parentId":17},{"parentName":"Exterior repair, improvements, & decoration","nodeName":"Fixing the roof","nodeId":311,"parentId":312},{"parentName":"Exterior Maintenance, Repair, & Decoration","nodeName":"Exterior repair, improvements, & decoration","nodeId":312,"parentId":12},{"parentName":"Park activities","nodeName":"Fun sliding down","nodeId":313,"parentId":314},{"parentName":"Arts and Entertainment","nodeName":"Park activities","nodeId":314,"parentId":21},{"parentName":"Playing soccer","nodeName":"Futsal","nodeId":315,"parentId":279},{"parentName":"Washing, dressing and grooming oneself","nodeName":"Gargling mouthwash","nodeId":316,"parentId":62},{"parentName":"Care for animals and pets (not veterinary care)","nodeName":"Grooming dog","nodeId":317,"parentId":65},{"parentName":"Vehicle repair and maintenance (by self)","nodeName":"Hand car wash","nodeId":318,"parentId":56},{"parentName":"Interior arrangement, decoration, & repairs","nodeName":"Hanging wallpaper","nodeId":319,"parentId":33},{"parentName":"Eating and Drinking","nodeName":"Having an ice cream","nodeId":320,"parentId":46},{"parentName":"Playing games","nodeName":"Hitting a pinata","nodeId":321,"parentId":48},{"parentName":"Playing games","nodeName":"Hula hoop","nodeId":322,"parentId":48},{"parentName":"Playing sports","nodeName":"Hurling","nodeId":323,"parentId":64},{"parentName":"Fishing","nodeName":"Ice fishing","nodeId":324,"parentId":325},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Fishing","nodeId":325,"parentId":17},{"parentName":"Interior arrangement, decoration, & repairs","nodeName":"Installing carpet","nodeId":326,"parentId":33},{"parentName":"Playing games","nodeName":"Kite flying","nodeId":327,"parentId":48},{"parentName":"Working out","nodeName":"Kneeling","nodeId":328,"parentId":306},{"parentName":"Sewing, repairing, & maintaining textiles","nodeName":"Knitting","nodeId":329,"parentId":45},{"parentName":"Interior arrangement, decoration, & repairs","nodeName":"Laying tile","nodeId":330,"parentId":33},{"parentName":"Playing sports","nodeName":"Longboarding","nodeId":331,"parentId":64},{"parentName":"Food and drink preparation ","nodeName":"Making a cake","nodeId":332,"parentId":31},{"parentName":"Food and drink preparation ","nodeName":"Making a lemonade","nodeId":333,"parentId":31},{"parentName":"Food and drink preparation ","nodeName":"Making an omelette","nodeId":334,"parentId":31},{"parentName":"Interior cleaning","nodeName":"Mooping floor","nodeId":335,"parentId":37},{"parentName":"Exterior Maintenance, Repair, & Decoration","nodeName":"Painting fence","nodeId":336,"parentId":12},{"parentName":"Building and repairing furniture","nodeName":"Painting furniture","nodeId":337,"parentId":338},{"parentName":"Interior Maintenance, Repair, & Decoration","nodeName":"Building and repairing furniture","nodeId":338,"parentId":15},{"parentName":"Food and drink preparation ","nodeName":"Peeling potatoes","nodeId":339,"parentId":31},{"parentName":"Interior arrangement, decoration, & repairs","nodeName":"Plastering","nodeId":340,"parentId":33},{"parentName":"Playing volleyball","nodeName":"Playing beach volleyball","nodeId":341,"parentId":342},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Playing volleyball","nodeId":342,"parentId":17},{"parentName":"Attending gambling establishments","nodeName":"Playing blackjack","nodeId":343,"parentId":344},{"parentName":"Arts and Entertainment","nodeName":"Attending gambling establishments","nodeId":344,"parentId":21},{"parentName":"Playing musical instruments","nodeName":"Playing congas","nodeId":345,"parentId":61},{"parentName":"Playing musical instruments","nodeName":"Playing drums","nodeId":346,"parentId":61},{"parentName":"Playing hockey","nodeName":"Playing ice hockey","nodeId":347,"parentId":60},{"parentName":"Playing games","nodeName":"Playing pool","nodeId":348,"parentId":48},{"parentName":"Playing games","nodeName":"Playing rubik cube","nodeId":349,"parentId":48},{"parentName":"Playing sports","nodeName":"Powerbocking","nodeId":350,"parentId":64},{"parentName":"Washing, dressing and grooming oneself","nodeName":"Putting in contact lenses","nodeId":351,"parentId":62},{"parentName":"Washing, dressing and grooming oneself","nodeName":"Putting on shoes","nodeId":352,"parentId":62},{"parentName":"Boating","nodeName":"Rafting","nodeId":353,"parentId":293},{"parentName":"Lawn, garden, and houseplant care","nodeName":"Raking leaves","nodeId":354,"parentId":66},{"parentName":"Vehicle repair and maintenance (by self)","nodeName":"Removing ice from car","nodeId":355,"parentId":56},{"parentName":"Playing games","nodeName":"Riding bumper cars","nodeId":356,"parentId":48},{"parentName":"Boating","nodeName":"River tubing","nodeId":357,"parentId":293},{"parentName":"Playing games","nodeName":"Rock-paper-scissors","nodeId":358,"parentId":48},{"parentName":"Playing sports","nodeName":"Rollerblading","nodeId":359,"parentId":64},{"parentName":"Exterior repair, improvements, & decoration","nodeName":"Roof shingle removal","nodeId":360,"parentId":312},{"parentName":"Working out","nodeName":"Rope skipping","nodeId":361,"parentId":306},{"parentName":"Running","nodeName":"Running a marathon","nodeId":362,"parentId":363},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Running","nodeId":363,"parentId":17},{"parentName":"Participating in water sports","nodeName":"Scuba diving","nodeId":364,"parentId":42},{"parentName":"Appliance, tool, and toy set-up, repair, & maintenance (by self)","nodeName":"Sharpening knives","nodeId":365,"parentId":366},{"parentName":"Appliances, Tools, and Toys","nodeName":"Appliance, tool, and toy set-up, repair, & maintenance (by self)","nodeId":366,"parentId":367},{"parentName":"Household Activities","nodeName":"Appliances, Tools, and Toys","nodeId":367,"parentId":1},{"parentName":"Playing games","nodeName":"Shuffleboard","nodeId":368,"parentId":48},{"parentName":"Skiing, ice skating, snowboarding","nodeName":"Skiing","nodeId":369,"parentId":370},{"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Skiing, ice skating, snowboarding","nodeId":370,"parentId":17},{"parentName":"Playing games","nodeName":"Slacklining","nodeId":371,"parentId":48},{"parentName":"Skiing, ice skating, snowboarding","nodeName":"Snow tubing","nodeId":372,"parentId":370},{"parentName":"Skiing, ice skating, snowboarding","nodeName":"Snowboarding","nodeId":373,"parentId":370},{"parentName":"Lawn, garden, and houseplant care","nodeName":"Spread mulch","nodeId":374,"parentId":66},{"parentName":"Wrestling","nodeName":"Sumo","nodeId":375,"parentId":272},{"parentName":"Participating in water sports","nodeName":"Surfing","nodeId":376,"parentId":42},{"parentName":"Participating in water sports","nodeName":"Swimming","nodeId":377,"parentId":42},{"parentName":"Park activities","nodeName":"Swinging at the playground","nodeId":378,"parentId":314},{"parentName":"Playing games","nodeName":"Table soccer","nodeId":379,"parentId":48},{"parentName":"Playing games","nodeName":"Throwing darts","nodeId":380,"parentId":48},{"parentName":"Lawn, garden, and houseplant care","nodeName":"Trimming branches or hedges","nodeId":381,"parentId":66},{"parentName":"Playing games","nodeName":"Tug of war","nodeId":382,"parentId":48},{"parentName":"Park activities","nodeName":"Using the monkey bar","nodeId":383,"parentId":314},{"parentName":"Using cardiovascular equipment","nodeName":"Using the rowing machine","nodeId":384,"parentId":58},{"parentName":"Participating in water sports","nodeName":"Wakeboarding","nodeId":385,"parentId":42},{"parentName":"Participating in water sports","nodeName":"Waterskiing","nodeId":386,"parentId":42},{"parentName":"Appliance, tool, and toy set-up, repair, & maintenance (by self)","nodeName":"Waxing skis","nodeId":387,"parentId":366},{"parentName":"Appliance, tool, and toy set-up, repair, & maintenance (by self)","nodeName":"Welding","nodeId":388,"parentId":366},{"nodeId":68,"parentName":"Eating and Drinking","nodeName":"Drinking coffee","parentId":46},{"nodeId":46,"parentName":"Food & Drink Prep., Presentation, & Clean-up","nodeName":"Eating and Drinking","parentId":18},{"nodeId":18,"parentName":"Eating and drinking Activities","nodeName":"Food & Drink Prep., Presentation, & Clean-up","parentId":4},{"nodeId":4,"parentName":"Root","nodeName":"Eating and drinking Activities","parentId":0},{"nodeId":0,"parentName":null,"nodeName":"Root","parentId":null},{"nodeId":69,"parentName":"Doing aerobics","nodeName":"Zumba","parentId":55},{"nodeId":55,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Doing aerobics","parentId":17},{"nodeId":17,"parentName":"Sports, Exercise, and Recreation","nodeName":"Participating in Sports, Exercise, or Recreation","parentId":5},{"nodeId":5,"parentName":"Root","nodeName":"Sports, Exercise, and Recreation","parentId":0},{"nodeId":70,"parentName":"Participating in martial arts","nodeName":"Doing kickboxing","parentId":27},{"nodeId":27,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Participating in martial arts","parentId":17},{"nodeId":99,"parentName":"Participating in martial arts","nodeName":"Doing karate","parentId":27},{"nodeId":73,"parentName":"Dancing","nodeName":"Tango","parentId":23},{"nodeId":23,"parentName":"Arts and Entertainment","nodeName":"Dancing","parentId":21},{"nodeId":21,"parentName":"Socializing, Relaxing, and Leisure","nodeName":"Arts and Entertainment","parentId":7},{"nodeId":7,"parentName":"Root","nodeName":"Socializing, Relaxing, and Leisure","parentId":0},{"nodeId":76,"parentName":"Washing, dressing and grooming oneself","nodeName":"Putting on makeup","parentId":62},{"nodeId":62,"parentName":"Grooming","nodeName":"Washing, dressing and grooming oneself","parentId":20},{"nodeId":20,"parentName":"Personal Care","nodeName":"Grooming","parentId":2},{"nodeId":2,"parentName":"Root","nodeName":"Personal Care","parentId":0},{"nodeId":195,"parentName":"Playing sports","nodeName":"High jump","parentId":64},{"nodeId":64,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Playing sports","parentId":17},{"nodeId":136,"parentName":"Playing musical instruments","nodeName":"Playing bagpipes","parentId":61},{"nodeId":61,"parentName":"Arts and Entertainment","nodeName":"Playing musical instruments","parentId":21},{"nodeId":140,"parentName":"Dancing","nodeName":"Cheerleading","parentId":23},{"nodeId":179,"parentName":"Household & personal organization and planning","nodeName":"Wrapping presents","parentId":41},{"nodeId":41,"parentName":"Household Management","nodeName":"Household & personal organization and planning","parentId":19},{"nodeId":19,"parentName":"Household Activities","nodeName":"Household Management","parentId":1},{"nodeId":1,"parentName":"Root","nodeName":"Household Activities","parentId":0},{"nodeId":82,"parentName":"Playing sports","nodeName":"Cricket","parentId":64},{"nodeId":84,"parentName":"Weightlifting","nodeName":"Clean and jerk","parentId":35},{"nodeId":35,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Weightlifting","parentId":17},{"nodeId":186,"parentName":"Food and drink preparation ","nodeName":"Preparing pasta","parentId":31},{"nodeId":31,"parentName":"Food & Drink Prep., Presentation, & Clean-up","nodeName":"Food and drink preparation ","parentId":18},{"nodeId":90,"parentName":"Care for animals and pets (not veterinary care)","nodeName":"Bathing dog","parentId":65},{"nodeId":65,"parentName":"Animals and Pets","nodeName":"Care for animals and pets (not veterinary care)","parentId":22},{"nodeId":22,"parentName":"Household Activities","nodeName":"Animals and Pets","parentId":1},{"nodeId":92,"parentName":"Playing sports","nodeName":"Discus throw","parentId":64},{"nodeId":94,"parentName":"Playing hockey","nodeName":"Playing field hockey","parentId":60},{"nodeId":60,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Playing hockey","parentId":17},{"nodeId":95,"parentName":"Walking / exercising / playing with animals","nodeName":"Grooming horse","parentId":26},{"nodeId":26,"parentName":"Animals and Pets","nodeName":"Walking / exercising / playing with animals","parentId":22},{"nodeId":96,"parentName":"Food and drink preparation ","nodeName":"Preparing salad","parentId":31},{"nodeId":100,"parentName":"Playing musical instruments","nodeName":"Playing harmonica","parentId":61},{"nodeId":109,"parentName":"Playing musical instruments","nodeName":"Playing saxophone","parentId":61},{"nodeId":110,"parentName":"Heating and cooling","nodeName":"Chopping wood","parentId":34},{"nodeId":34,"parentName":"Interior Maintenance, Repair, & Decoration","nodeName":"Heating and cooling","parentId":15},{"nodeId":15,"parentName":"Household Activities","nodeName":"Interior Maintenance, Repair, & Decoration","parentId":1},{"nodeId":113,"parentName":"Washing, dressing and grooming oneself","nodeName":"Washing face","parentId":62},{"nodeId":114,"parentName":"Doing gymnastics","nodeName":"Using the pommel horse","parentId":24},{"nodeId":24,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Doing gymnastics","parentId":17},{"nodeId":116,"parentName":"Playing sports","nodeName":"Javelin throw","parentId":64},{"nodeId":117,"parentName":"Using cardiovascular equipment","nodeName":"Spinning","parentId":58},{"nodeId":58,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Using cardiovascular equipment","parentId":17},{"nodeId":119,"parentName":"Playing racquet sports ","nodeName":"Ping-pong","parentId":54},{"nodeId":54,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Playing racquet sports ","parentId":17},{"nodeId":129,"parentName":"Food and drink preparation ","nodeName":"Making a sandwich","parentId":31},{"nodeId":125,"parentName":"Washing, dressing and grooming oneself","nodeName":"Brushing hair","parentId":62},{"nodeId":251,"parentName":"Playing musical instruments","nodeName":"Playing guitarra","parentId":61},{"nodeId":230,"parentName":"Doing aerobics","nodeName":"Doing step aerobics","parentId":55},{"nodeId":135,"parentName":"Eating and Drinking","nodeName":"Drinking beer","parentId":46},{"nodeId":75,"parentName":"Participating in equestrian sports","nodeName":"Playing polo","parentId":49},{"nodeId":49,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Participating in equestrian sports","parentId":17},{"nodeId":79,"parentName":"Weightlifting","nodeName":"Snatch","parentId":35},{"nodeId":141,"parentName":"Playing sports","nodeName":"Paintball","parentId":64},{"nodeId":80,"parentName":"Playing sports","nodeName":"Long jump","parentId":64},{"nodeId":143,"parentName":"Interior cleaning","nodeName":"Cleaning windows","parentId":37},{"nodeId":37,"parentName":"Housework","nodeName":"Interior cleaning","parentId":16},{"nodeId":16,"parentName":"Household Activities","nodeName":"Housework","parentId":1},{"nodeId":148,"parentName":"Washing, dressing and grooming oneself","nodeName":"Brushing teeth","parentId":62},{"nodeId":149,"parentName":"Playing musical instruments","nodeName":"Playing flauta","parentId":61},{"nodeId":152,"parentName":"Playing racquet sports ","nodeName":"Tennis serve with ball bouncing","parentId":54},{"nodeId":154,"parentName":"Playing sports","nodeName":"Bungee jumping","parentId":64},{"nodeId":155,"parentName":"Playing sports","nodeName":"Triple jump","parentId":64},{"nodeId":175,"parentName":"Participating in equestrian sports","nodeName":"Horseback riding","parentId":49},{"nodeId":159,"parentName":"Playing basketball","nodeName":"Layup drill in basketball","parentId":52},{"nodeId":52,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Playing basketball","parentId":17},{"nodeId":160,"parentName":"Interior cleaning","nodeName":"Vacuuming floor","parentId":37},{"nodeId":134,"parentName":"Sewing, repairing, & maintaining textiles","nodeName":"Cleaning shoes","parentId":45},{"nodeId":45,"parentName":"Housework","nodeName":"Sewing, repairing, & maintaining textiles","parentId":16},{"nodeId":167,"parentName":"Washing, dressing and grooming oneself","nodeName":"Doing nails","parentId":62},{"nodeId":169,"parentName":"Playing sports","nodeName":"Shot put","parentId":64},{"nodeId":171,"parentName":"Vehicle repair and maintenance (by self)","nodeName":"Fixing bicycle","parentId":56},{"nodeId":56,"parentName":"Vehicles","nodeName":"Vehicle repair and maintenance (by self)","parentId":13},{"nodeId":13,"parentName":"Household Activities","nodeName":"Vehicles","parentId":1},{"nodeId":173,"parentName":"Washing, dressing and grooming oneself","nodeName":"Washing hands","parentId":62},{"nodeId":83,"parentName":"Laundry","nodeName":"Ironing clothes","parentId":30},{"nodeId":30,"parentName":"Housework","nodeName":"Laundry","parentId":16},{"nodeId":180,"parentName":"Doing gymnastics","nodeName":"Using the balance beam","parentId":24},{"nodeId":183,"parentName":"Exterior cleaning","nodeName":"Shoveling snow","parentId":51},{"nodeId":51,"parentName":"Exterior Maintenance, Repair, & Decoration","nodeName":"Exterior cleaning","parentId":12},{"nodeId":12,"parentName":"Household Activities","nodeName":"Exterior Maintenance, Repair, & Decoration","parentId":1},{"nodeId":258,"parentName":"Doing gymnastics","nodeName":"Tumbling","parentId":24},{"nodeId":88,"parentName":"Doing gymnastics","nodeName":"Using parallel bars","parentId":24},{"nodeId":187,"parentName":"Washing, dressing and grooming","nodeName":"Getting a tattoo","parentId":38},{"nodeId":38,"parentName":"Grooming","nodeName":"Washing, dressing and grooming","parentId":20},{"nodeId":188,"parentName":"Climbing, spelunking, caving","nodeName":"Rock climbing","parentId":50},{"nodeId":50,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Climbing, spelunking, caving","parentId":17},{"nodeId":189,"parentName":"Tobacco and drug use","nodeName":"Smoking hookah","parentId":28},{"nodeId":28,"parentName":"Relaxing and Leisure","nodeName":"Tobacco and drug use","parentId":11},{"nodeId":11,"parentName":"Socializing, Relaxing, and Leisure","nodeName":"Relaxing and Leisure","parentId":7},{"nodeId":190,"parentName":"Washing, dressing and grooming oneself","nodeName":"Shaving","parentId":62},{"nodeId":214,"parentName":"Washing, dressing and grooming","nodeName":"Getting a piercing","parentId":38},{"nodeId":193,"parentName":"Participating in water sports","nodeName":"Springboard diving","parentId":42},{"nodeId":42,"parentName":"Participating in Sports, Exercise, or Recreation","nodeName":"Participating in water sports","parentId":17},{"nodeId":194,"parentName":"Playing racquet sports ","nodeName":"Playing squash","parentId":54},{"nodeId":197,"parentName":"Playing musical instruments","nodeName":"Playing piano","parentId":61},{"nodeId":161,"parentName":"Playing sports","nodeName":"Dodgeball","parentId":64},{"nodeId":201,"parentName":"Tobacco and drug use","nodeName":"Smoking a cigarette","parentId":28},{"nodeId":240,"parentName":"Participating in water sports","nodeName":"Sailing","parentId":42},{"nodeId":208,"parentName":"Washing, dressing and grooming","nodeName":"Getting a haircut","parentId":38},{"nodeId":209,"parentName":"Playing racquet sports ","nodeName":"Playing lacrosse","parentId":54},{"nodeId":211,"parentName":"Dancing","nodeName":"Cumbia","parentId":23},{"nodeId":263,"parentName":"Participating in martial arts","nodeName":"Tai chi","parentId":27},{"nodeId":217,"parentName":"Interior arrangement, decoration, & repairs","nodeName":"Painting","parentId":33},{"nodeId":33,"parentName":"Interior Maintenance, Repair, & Decoration","nodeName":"Interior arrangement, decoration, & repairs","parentId":15},{"nodeId":218,"parentName":"Lawn, garden, and houseplant care","nodeName":"Mowing the lawn","parentId":66},{"nodeId":66,"parentName":"Lawn, Garden, and Houseplants","nodeName":"Lawn, garden, and houseplant care","parentId":10},{"nodeId":10,"parentName":"Household Activities","nodeName":"Lawn, Garden, and Houseplants","parentId":1},{"nodeId":200,"parentName":"Washing, dressing and grooming oneself","nodeName":"Shaving legs","parentId":62},{"nodeId":221,"parentName":"Walking / exercising / playing with animals","nodeName":"Walking the dog","parentId":26},{"nodeId":224,"parentName":"Playing sports","nodeName":"Hammer throw","parentId":64},{"nodeId":176,"parentName":"Playing sports","nodeName":"Skateboarding","parentId":64},{"nodeId":227,"parentName":"Sewing, repairing, & maintaining textiles","nodeName":"Polishing shoes","parentId":45},{"nodeId":229,"parentName":"Attending arts and entertainment","nodeName":"Ballet","parentId":39},{"nodeId":39,"parentName":"Arts and Entertainment","nodeName":"Attending arts and entertainment","parentId":21},{"nodeId":232,"parentName":"Laundry","nodeName":"Hand washing clothes","parentId":30},{"nodeId":233,"parentName":"Participating in water sports","nodeName":"Plataform diving","parentId":42},{"nodeId":234,"parentName":"Playing musical instruments","nodeName":"Playing violin","parentId":61},{"nodeId":236,"parentName":"Dancing","nodeName":"Breakdancing","parentId":23},{"nodeId":237,"parentName":"Participating in water sports","nodeName":"Windsurfing","parentId":42},{"nodeId":241,"parentName":"Playing games","nodeName":"Hopscotch","parentId":48},{"nodeId":48,"parentName":"Relaxing and Leisure","nodeName":"Playing games","parentId":11},{"nodeId":243,"parentName":"Playing sports","nodeName":"Doing motocross","parentId":64},{"nodeId":244,"parentName":"Food and drink preparation ","nodeName":"Mixing drinks","parentId":31},{"nodeId":153,"parentName":"Playing sports","nodeName":"Starting a campfire","parentId":64},{"nodeId":247,"parentName":"Dancing","nodeName":"Belly dance","parentId":23},{"nodeId":248,"parentName":"Washing, dressing and grooming oneself","nodeName":"Removing curlers","parentId":62},{"nodeId":250,"parentName":"Playing sports","nodeName":"Archery","parentId":64},{"nodeId":123,"parentName":"Playing volleyball","nodeName":"Volleyball","parentId":342},{"nodeId":130,"parentName":"Participating in water sports","nodeName":"Playing water polo","parentId":42},{"nodeId":253,"parentName":"Playing racquet sports ","nodeName":"Playing racquetball","parentId":54},{"nodeId":254,"parentName":"Participating in water sports","nodeName":"Kayaking","parentId":42},{"nodeId":156,"parentName":"Interior cleaning","nodeName":"Polishing forniture","parentId":37},{"nodeId":257,"parentName":"Playing sports","nodeName":"Playing kickball","parentId":64},{"nodeId":192,"parentName":"Doing gymnastics","nodeName":"Using uneven bars","parentId":24},{"nodeId":213,"parentName":"Kitchen and food clean-up","nodeName":"Washing dishes","parentId":43},{"nodeId":43,"parentName":"Food & Drink Prep., Presentation, & Clean-up","nodeName":"Kitchen and food clean-up","parentId":18},{"nodeId":106,"parentName":"Playing sports","nodeName":"Pole vault","parentId":64},{"nodeId":267,"parentName":"Playing musical instruments","nodeName":"Playing accordion","parentId":61},{"nodeId":268,"parentName":"Playing racquet sports ","nodeName":"Playing badminton","parentId":54}],"version":"VERSION 1.3","database":{"sJFgo9H6zNo":{"duration":139.042,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=sJFgo9H6zNo","annotations":[{"segment":[0.01,123.42336739937599],"label":"Fun sliding down"}]},"V1zhqaGFY2A":{"duration":136.98,"subset":"testing","resolution":"658x480","url":"https://www.youtube.com/watch?v=V1zhqaGFY2A","annotations":[]},"JDg--pjY5gg":{"duration":126.178,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JDg--pjY5gg","annotations":[{"segment":[61.420442338881465,64.56014561469672],"label":"Beer pong"}]},"KsFid_YVsn0":{"duration":235.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KsFid_YVsn0","annotations":[]},"-TmWR_keSfI":{"duration":104.955,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-TmWR_keSfI","annotations":[]},"u2uoYvo8J5s":{"duration":208.864,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u2uoYvo8J5s","annotations":[{"segment":[127.33201508846798,201.39247572684465],"label":"Swinging at the playground"}]},"0Gr4aKQzGYk":{"duration":67.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0Gr4aKQzGYk","annotations":[{"segment":[30.096653368174724,56.67415180499219],"label":"Getting a piercing"}]},"a-6rpItrRSk":{"duration":8.38,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=a-6rpItrRSk","annotations":[]},"--0edUL8zmA":{"duration":92.18,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=--0edUL8zmA","annotations":[{"segment":[5.464839815912637,86.71837918408735],"label":"Dodgeball"}]},"c_NlYvL96y0":{"duration":125.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c_NlYvL96y0","annotations":[{"segment":[12.82082075,108.5218213728549],"label":"Ballet"}]},"hHiPEAiYKv0":{"duration":83.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hHiPEAiYKv0","annotations":[{"segment":[0.01,83.05780045351474],"label":"Shoveling snow"}]},"GCtrfXIBbwA":{"duration":108.03999999999999,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=GCtrfXIBbwA","annotations":[{"segment":[2.863770670826833,20.046394695787832],"label":"Playing harmonica"}]},"rs7er4e67ec":{"duration":107.671,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=rs7er4e67ec","annotations":[{"segment":[28.704790951638067,44.9876255850234],"label":"Hurling"}]},"PLek2e8NlKc":{"duration":184.599,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=PLek2e8NlKc","annotations":[{"segment":[8.205021060842434,80.18757956318254],"label":"Kneeling"},{"segment":[109.55610062402496,144.39522854914196],"label":"Kneeling"}]},"_uOfIm1tFcI":{"duration":133.56,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=_uOfIm1tFcI","annotations":[{"segment":[6.459276987519502,124.18480917940718],"label":"Tumbling"}]},"0n3VRoYYYGU":{"duration":232.432,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=0n3VRoYYYGU","annotations":[{"segment":[95.18451452343808,177.49645660656358],"label":"Sharpening knives"}]},"x90zpDHTpjQ":{"duration":91.58,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x90zpDHTpjQ","annotations":[]},"T_q3f10pkOg":{"duration":142.71,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=T_q3f10pkOg","annotations":[{"segment":[2.1143354134165366,135.4287472698908],"label":"Playing water polo"}]},"6ZabZdqdLyA":{"duration":71.82,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=6ZabZdqdLyA","annotations":[{"segment":[4.366113884555382,69.65332605304212],"label":"Swinging at the playground"}]},"UMVYt6Bgrrs":{"duration":136.51,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UMVYt6Bgrrs","annotations":[]},"yUSo_xjyGhM":{"duration":120.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yUSo_xjyGhM","annotations":[{"segment":[1.4982488387970894,84.65105939203553],"label":"Washing dishes"},{"segment":[106.93750979251169,117.79981376443057],"label":"Washing dishes"}]},"Ou24uqaFRPg":{"duration":50.48,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Ou24uqaFRPg","annotations":[{"segment":[26.214528861154445,40.54198907956318],"label":"Polishing forniture"}]},"Uw_1iNGntZU":{"duration":64.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Uw_1iNGntZU","annotations":[]},"sjyZWmvTGA4":{"duration":35.968,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sjyZWmvTGA4","annotations":[{"segment":[5.274515916380655,34.78936029953198],"label":"Blowing leaves"}]},"c5dvRUBZw2Q":{"duration":234.289,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c5dvRUBZw2Q","annotations":[{"segment":[66.15323674076616,152.14434841612578],"label":"Scuba diving"}]},"BxzOUJbq28s":{"duration":225.42,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BxzOUJbq28s","annotations":[{"segment":[5.273658346333853,218.7004399375975],"label":"Playing congas"}]},"Z8yNbpjUSLc":{"duration":166.511,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=Z8yNbpjUSLc","annotations":[{"segment":[52.46359126365054,143.36585335413415],"label":"Making a lemonade"}]},"A1EflBqBv14":{"duration":97.013,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=A1EflBqBv14","annotations":[{"segment":[49.77651950078003,96.981],"label":"Arm wrestling"}]},"6uhLrPgbpUA":{"duration":46.162,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6uhLrPgbpUA","annotations":[{"segment":[6.337222664586584,12.098334177847114],"label":"BMX"},{"segment":[21.17208481123245,24.412710037441496],"label":"BMX"},{"segment":[29.23764092979719,38.095349881435254],"label":"BMX"}]},"jIQFVSymHQs":{"duration":11.73,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jIQFVSymHQs","annotations":[{"segment":[0.28354788135725434,9.759535142199688],"label":"Playing kickball"}]},"2U0vMYnC49c":{"duration":50.62,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=2U0vMYnC49c","annotations":[{"segment":[0.01,49.15295385862808],"label":"Removing ice from car"}]},"t2wawfMaTzM":{"duration":66.2,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=t2wawfMaTzM","annotations":[{"segment":[1.3425832542901717,9.294807145085802],"label":"Playing racquetball"},{"segment":[18.383063020280808,34.49406207176287],"label":"Playing racquetball"},{"segment":[39.554568184087366,55.76884287051482],"label":"Playing racquetball"}]},"gKkk82LG8H0":{"duration":121.534,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gKkk82LG8H0","annotations":[{"segment":[0.01,110.91567327613106],"label":"Playing congas"}]},"R-hsYQNtnn0":{"duration":41.727,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=R-hsYQNtnn0","annotations":[]},"UvuXGKesWS0":{"duration":102.795,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=UvuXGKesWS0","annotations":[{"segment":[9.937507020280812,33.97986271450858],"label":"Swimming"},{"segment":[38.94861622464899,64.92699687987519],"label":"Swimming"},{"segment":[70.85859906396256,101.79911856474259],"label":"Swimming"}]},"thpmgolDsbQ":{"duration":123.74,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=thpmgolDsbQ","annotations":[]},"8pR6NBu7Ay0":{"duration":196.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8pR6NBu7Ay0","annotations":[{"segment":[31.094469578783155,169.25807332293292],"label":"Paintball"}]},"mh_0QLZhrSY":{"duration":100.1,"subset":"validation","resolution":"800x480","url":"https://www.youtube.com/watch?v=mh_0QLZhrSY","annotations":[{"segment":[0,100.041],"label":"Playing bagpipes"}]},"jhbqzV3tbDk":{"duration":57.701,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jhbqzV3tbDk","annotations":[]},"NTKUu3gpIr8":{"duration":119.21000000000001,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=NTKUu3gpIr8","annotations":[]},"pRwx9my8-is":{"duration":11.332,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=pRwx9my8-is","annotations":[]},"S2qF3yvM_PA":{"duration":89.14,"subset":"testing","resolution":"300x240","url":"https://www.youtube.com/watch?v=S2qF3yvM_PA","annotations":[]},"-YjGbsbDoxs":{"duration":162.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-YjGbsbDoxs","annotations":[{"segment":[30.768311232449296,157.386875975039],"label":"Painting"}]},"p800u2wCKbE":{"duration":40.473,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=p800u2wCKbE","annotations":[{"segment":[8.523823970358814,15.532301457098283],"label":"Making a lemonade"},{"segment":[33.337622639625586,36.49459448049922],"label":"Making a lemonade"}]},"BfnM0eyjB5Q":{"duration":185.621,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=BfnM0eyjB5Q","annotations":[{"segment":[15.492493287486601,168.67995027964383],"label":"Assembling bicycle"}]},"RTnNxbG2V5o":{"duration":235.776,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=RTnNxbG2V5o","annotations":[{"segment":[60.13928406932196,190.71693449506688],"label":"Riding bumper cars"}]},"-Xl95IW5H_s":{"duration":178.21,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=-Xl95IW5H_s","annotations":[{"segment":[63.38938945312915,148.095003],"label":"Starting a campfire"}]},"8bSCVzFXjzk":{"duration":175.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=8bSCVzFXjzk","annotations":[]},"maXU1lGguxs":{"duration":85.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=maXU1lGguxs","annotations":[{"segment":[10.39562183,59.04174628],"label":"Playing violin"}]},"R4ES1QLRvtg":{"duration":187.39,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R4ES1QLRvtg","annotations":[{"segment":[3.3613190327613105,92.80163416536661],"label":"Shoveling snow"},{"segment":[101.57029251170046,105.077755850234],"label":"Shoveling snow"},{"segment":[113.26183697347894,119.10760920436817],"label":"Shoveling snow"},{"segment":[129.92228783151327,174.35015678627144],"label":"Shoveling snow"}]},"oD8sGLt2FR0":{"duration":28.05,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=oD8sGLt2FR0","annotations":[]},"9pk3tgrILeY":{"duration":53.64,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9pk3tgrILeY","annotations":[]},"boWMu522Pfc":{"duration":12.15,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=boWMu522Pfc","annotations":[]},"UkA6pgt29VI":{"duration":236.38,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UkA6pgt29VI","annotations":[{"segment":[37.239377535101404,58.255659906396254],"label":"Surfing"},{"segment":[67.84203432137286,150.80104368174727],"label":"Surfing"},{"segment":[161.12483151326052,185.09076755070205],"label":"Surfing"},{"segment":[191.72748829953198,206.84446333853356],"label":"Surfing"}]},"AwcM25G1h-g":{"duration":127.01,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=AwcM25G1h-g","annotations":[]},"jafEh82XETQ":{"duration":111.6,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=jafEh82XETQ","annotations":[{"segment":[10.184159699999999,111.5951019],"label":"Making a sandwich"}]},"0YQPGAsZPgY":{"duration":166.093,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=0YQPGAsZPgY","annotations":[{"segment":[0.19999999999999998,166.09233560090703],"label":"Playing congas"}]},"akrKZpGXeo4":{"duration":164.467,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=akrKZpGXeo4","annotations":[]},"5Bo0gFXxDQk":{"duration":16.998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5Bo0gFXxDQk","annotations":[{"segment":[0.2,15.382024774143304],"label":"Welding"}]},"hmT9H0IjiGs":{"duration":215.551,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hmT9H0IjiGs","annotations":[{"segment":[62.882906845525525,181.5870021528861],"label":"Making a lemonade"}]},"97McCuWAynA":{"duration":127.316,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=97McCuWAynA","annotations":[{"segment":[3.423820584947818,5.604253606754671],"label":"Surfing"}]},"WEohpavjsIY":{"duration":136.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WEohpavjsIY","annotations":[{"segment":[12.764816878389421,134.88156501498156],"label":"Cumbia"}]},"3joaQzU05MY":{"duration":40.26,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3joaQzU05MY","annotations":[{"segment":[0,29.208238689547578],"label":"Hopscotch"}]},"kcV3HTdtgxk":{"duration":11.123,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kcV3HTdtgxk","annotations":[]},"3X9dtfeV1mQ":{"duration":144.87,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3X9dtfeV1mQ","annotations":[]},"br-L1PS2ADg":{"duration":50.086,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=br-L1PS2ADg","annotations":[]},"w2HnFjJei7k":{"duration":35.202,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w2HnFjJei7k","annotations":[{"segment":[10.983291731669267,34.432619578783154],"label":"Gargling mouthwash"}]},"dUbwRd6ZT-Y":{"duration":128.639,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dUbwRd6ZT-Y","annotations":[]},"IGcsVPa34Hc":{"duration":214.437,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=IGcsVPa34Hc","annotations":[{"segment":[7.35785023400936,30.769191887675508],"label":"Baking cookies"},{"segment":[100.66876911076443,113.71223088923557],"label":"Baking cookies"},{"segment":[139.79915444617782,148.4947956318253],"label":"Baking cookies"},{"segment":[191.6385538221529,213.71210452418097],"label":"Baking cookies"}]},"CTyvQHkAKuY":{"duration":217.27,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CTyvQHkAKuY","annotations":[]},"qogdv5DWzkQ":{"duration":221.403,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=qogdv5DWzkQ","annotations":[{"segment":[0.01,185.65658993426882],"label":"Braiding hair"}]},"gSeE_Ms-2TI":{"duration":158.313,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=gSeE_Ms-2TI","annotations":[{"segment":[0.01,152.00033074885772],"label":"Roof shingle removal"}]},"1PTNnaEu8xo":{"duration":163.423,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=1PTNnaEu8xo","annotations":[{"segment":[30.84877837562482,117.53129481903277],"label":"Capoeira"}]},"rhOtqArO-3Y":{"duration":231.248,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rhOtqArO-3Y","annotations":[{"segment":[33.55073344158256,165.94986433470945],"label":"Slacklining"}]},"-1K773_2MIY":{"duration":72.24,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=-1K773_2MIY","annotations":[]},"SrSmr4qKfJA":{"duration":74.21000000000001,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SrSmr4qKfJA","annotations":[]},"lW4OZ8eP3ns":{"duration":70.03,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=lW4OZ8eP3ns","annotations":[{"segment":[0.01,17.699038820592822],"label":"Playing violin"},{"segment":[35.50733096723868,60.63559595943837],"label":"Playing violin"}]},"rWQz-EwA4EA":{"duration":9.149,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rWQz-EwA4EA","annotations":[{"segment":[0.01,8.413348081557242],"label":"Plastering"}]},"IZnRIj69JKo":{"duration":15.117,"subset":"testing","resolution":"608x1080","url":"https://www.youtube.com/watch?v=IZnRIj69JKo","annotations":[]},"Yk3pQ18So90":{"duration":203.825,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Yk3pQ18So90","annotations":[{"segment":[0.01,203.8247619047619],"label":"Changing car wheel"}]},"xr64cSwpNOM":{"duration":82.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xr64cSwpNOM","annotations":[{"segment":[1.6752152886115446,80.66805928237129],"label":"Chopping wood"}]},"xi4puZ1ulP4":{"duration":226.21,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xi4puZ1ulP4","annotations":[{"segment":[109.72865990639626,182.41066614664587],"label":"Removing curlers"}]},"M1-G6KEhY-M":{"duration":130.03,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M1-G6KEhY-M","annotations":[{"segment":[15.51303042121685,123.80006630265211],"label":"Horseback riding"}]},"89f62by1ef0":{"duration":56.193,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=89f62by1ef0","annotations":[]},"KE9T_srKrnI":{"duration":204.174,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KE9T_srKrnI","annotations":[]},"l_pp-1qu9Ig":{"duration":113.18,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l_pp-1qu9Ig","annotations":[]},"i0AsepC37Sk":{"duration":211.604,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i0AsepC37Sk","annotations":[{"segment":[19.472853354134166,189.46566926677065],"label":"Playing congas"}]},"x99PS_O6JW8":{"duration":117.08,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=x99PS_O6JW8","annotations":[{"segment":[3.3768127925117004,14.511168486739468],"label":"Smoking hookah"},{"segment":[47.731705148205926,88.0709282371295],"label":"Smoking hookah"}]},"D_XMD02v690":{"duration":194.514,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=D_XMD02v690","annotations":[{"segment":[10.013958631814662,192.08593267862713],"label":"Doing a powerbomb"}]},"sl4qoHiJ8FY":{"duration":121.975,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=sl4qoHiJ8FY","annotations":[]},"lVMMPkvnid8":{"duration":158.175,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=lVMMPkvnid8","annotations":[{"segment":[25.40789859594384,49.33572542901717],"label":"Playing ten pins"},{"segment":[68.32997971918877,79.43051794071762],"label":"Playing ten pins"},{"segment":[83.13069734789391,88.31094851794073],"label":"Playing ten pins"},{"segment":[98.43722152886114,110.77271294851793],"label":"Playing ten pins"},{"segment":[142.3515709828393,157.89429017160685],"label":"Playing ten pins"}]},"whJ6ESGNoyY":{"duration":29.56,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=whJ6ESGNoyY","annotations":[{"segment":[0.01,19.275604396255847],"label":"Walking the dog"}]},"qJYp25ni6tg":{"duration":193.19,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=qJYp25ni6tg","annotations":[{"segment":[21.39858285487882,181.43587089547583],"label":"Preparing salad"}]},"jzNdWjZm92s":{"duration":194.096,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jzNdWjZm92s","annotations":[{"segment":[16.348352574102964,154.40110764430577],"label":"Rafting"}]},"j4Ru2L4u0Qk":{"duration":91.371,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=j4Ru2L4u0Qk","annotations":[{"segment":[1.9956105291830721,21.38154138410434],"label":"Hurling"},{"segment":[58.01524895553645,82.53274927769111],"label":"Hurling"},{"segment":[84.8134470124805,91.170453],"label":"Hurling"}]},"obVMUmZQW_M":{"duration":142.62,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=obVMUmZQW_M","annotations":[{"segment":[22.694118439937597,102.79100705148205],"label":"Removing curlers"}]},"mQHY2uMOMP4":{"duration":115.47,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mQHY2uMOMP4","annotations":[{"segment":[29.183462065013213,94.03559879251169],"label":"Getting a haircut"}]},"kQ4rE7o6rrg":{"duration":128.36,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kQ4rE7o6rrg","annotations":[{"segment":[29.923556162246488,60.14734867394696],"label":"Playing beach volleyball"},{"segment":[69.75491185647425,78.16152964118564],"label":"Playing beach volleyball"},{"segment":[97.77697113884554,112.40599297971919],"label":"Playing beach volleyball"}]},"a2vaM_aRHv4":{"duration":177.262,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a2vaM_aRHv4","annotations":[{"segment":[53.911263650546026,98.69908268330732],"label":"Making a cake"}]},"XnctHnlJB4g":{"duration":17.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XnctHnlJB4g","annotations":[{"segment":[2.174854914196568,16.311411856474262],"label":"Clean and jerk"}]},"5DhbyYavYp4":{"duration":234.522,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5DhbyYavYp4","annotations":[{"segment":[27.354733064175083,42.308656848307706],"label":"Trimming branches or hedges"},{"segment":[84.25258941355776,101.03016243965777],"label":"Trimming branches or hedges"},{"segment":[145.52720394366216,148.44504273081],"label":"Trimming branches or hedges"}]},"YUFPWt0VQ1Y":{"duration":88.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YUFPWt0VQ1Y","annotations":[{"segment":[53.73328981852345,82.67725137850468],"label":"Drum corps"}]},"aoIGBV31OT4":{"duration":197.79,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aoIGBV31OT4","annotations":[{"segment":[0,197.787074],"label":"Windsurfing"}]},"yVsOoFr61x4":{"duration":24.08,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yVsOoFr61x4","annotations":[{"segment":[6.636234789391575,20.0215015600624],"label":"Plataform diving"}]},"WMdeqUyHWSo":{"duration":130.172,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WMdeqUyHWSo","annotations":[{"segment":[20.307498558445744,26.196673140395006],"label":"Kite flying"},{"segment":[38.17809706708268,45.48879650546022],"label":"Kite flying"}]},"M-n0vW3p2sE":{"duration":40.15,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M-n0vW3p2sE","annotations":[{"segment":[4.13373135725429,34.00933525741029],"label":"Using parallel bars"}]},"5iQsKA84vWo":{"duration":155.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5iQsKA84vWo","annotations":[{"segment":[23.24372562,136.58451465000002],"label":"Doing kickboxing"}]},"k89EbTl0yfY":{"duration":25.15,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k89EbTl0yfY","annotations":[]},"2CxYCq5kOZ4":{"duration":201.619,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=2CxYCq5kOZ4","annotations":[]},"G9ZEi6eRhi0":{"duration":132.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G9ZEi6eRhi0","annotations":[{"segment":[0,132.8878],"label":"Dodgeball"}]},"lpeyMIH1YqA":{"duration":24.78,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=lpeyMIH1YqA","annotations":[{"segment":[8.928524789391576,18.398172293291733],"label":"Cleaning shoes"}]},"btrx-2wYVYw":{"duration":206.71,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=btrx-2wYVYw","annotations":[]},"zp86ztwZEKk":{"duration":88.42,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zp86ztwZEKk","annotations":[{"segment":[14.484035361414458,19.72587673030731],"label":"Playing field hockey"},{"segment":[25.51949087487309,32.27870737686651],"label":"Playing field hockey"},{"segment":[52.41841368892852,58.07408463957606],"label":"Playing field hockey"}]},"5yEBtodt_9U":{"duration":169.46,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=5yEBtodt_9U","annotations":[]},"-5xEgcRn1-g":{"duration":111.572,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-5xEgcRn1-g","annotations":[]},"br8Ao_UBEZA":{"duration":82.48,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=br8Ao_UBEZA","annotations":[{"segment":[8.84874608,73.72775803432137],"label":"Playing squash"}]},"KyrDumISv4A":{"duration":72.84,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=KyrDumISv4A","annotations":[{"segment":[18.06820377285491,66.47735350390016],"label":"Tumbling"}]},"rAUwt3trNMM":{"duration":160.91,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rAUwt3trNMM","annotations":[]},"buhaBimF4M0":{"duration":209.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=buhaBimF4M0","annotations":[{"segment":[45.08595631825273,200.92654446177846],"label":"Rollerblading"}]},"LcmTBkWvV74":{"duration":180.4,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=LcmTBkWvV74","annotations":[{"segment":[53.32115410000001,177.30421216848674],"label":"Getting a piercing"}]},"Sd08rsPTroE":{"duration":234.197,"subset":"validation","resolution":"853x470","url":"https://www.youtube.com/watch?v=Sd08rsPTroE","annotations":[{"segment":[125.85323923962109,164.88598300379343],"label":"Playing drums"},{"segment":[184.58474963551402,222.88790914485983],"label":"Playing drums"}]},"KaGGtAhiH64":{"duration":213.113,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KaGGtAhiH64","annotations":[{"segment":[20.613089100434767,126.33828803492277],"label":"Playing rubik cube"},{"segment":[93.42383931003498,128.66557228819767],"label":"Playing rubik cube"},{"segment":[131.6577948995511,162.24495937116396],"label":"Playing rubik cube"},{"segment":[166.2345895196352,212.91274376417235],"label":"Playing rubik cube"}]},"waDC7lUystA":{"duration":69.195,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=waDC7lUystA","annotations":[]},"0bosp4-pyTM":{"duration":115.636,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=0bosp4-pyTM","annotations":[{"segment":[70.17497744807751,75.76732708268331],"label":"Sharpening knives"},{"segment":[78.11250625429017,82.8028645975039],"label":"Sharpening knives"}]},"IgEc5mY9L44":{"duration":91.673,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IgEc5mY9L44","annotations":[{"segment":[19.306975296411856,91.67238095238095],"label":"Welding"}]},"amCD-2TIKw0":{"duration":124.18,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=amCD-2TIKw0","annotations":[{"segment":[0.01,124.18031746031745],"label":"Zumba"}]},"_N1MWv9bW6Q":{"duration":86.36,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=_N1MWv9bW6Q","annotations":[]},"8L1UjGLOCeI":{"duration":12.45,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=8L1UjGLOCeI","annotations":[]},"R8rWDSjHG60":{"duration":38.17,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R8rWDSjHG60","annotations":[]},"071ZYjqfq3o":{"duration":155.55,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=071ZYjqfq3o","annotations":[]},"LDqNQpdbsXo":{"duration":119.47,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=LDqNQpdbsXo","annotations":[]},"0MtST08F2Xc":{"duration":36.851,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0MtST08F2Xc","annotations":[{"segment":[0,36.821],"label":"Raking leaves"}]},"WUOiJOqrHlI":{"duration":175.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WUOiJOqrHlI","annotations":[{"segment":[6.571718934063485,24.096302758232778],"label":"Ping-pong"},{"segment":[39.15649156630265,48.740248243369734],"label":"Ping-pong"},{"segment":[115.82654498283931,121.57679898907956],"label":"Ping-pong"},{"segment":[145.67310149141966,157.7212527425897],"label":"Ping-pong"}]},"V90aT-d_FKo":{"duration":77.83,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=V90aT-d_FKo","annotations":[{"segment":[0.01,77.83331065759637],"label":"Bathing dog"}]},"I4wkBjpwAAU":{"duration":93.136,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=I4wkBjpwAAU","annotations":[{"segment":[3.683435423770798,88.08340738638762],"label":"Tug of war"}]},"M9uQCTr2-r0":{"duration":184.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M9uQCTr2-r0","annotations":[{"segment":[7.196606864274571,181.0862542901716],"label":"Ping-pong"}]},"Jifw8dC5yTM":{"duration":150.09,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Jifw8dC5yTM","annotations":[{"segment":[14.283496096306438,139.0884693978159],"label":"Breakdancing"}]},"8LV3M7Pw6Fc":{"duration":121.02,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=8LV3M7Pw6Fc","annotations":[]},"5o9iv0wC59g":{"duration":45.396,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5o9iv0wC59g","annotations":[{"segment":[6.515352641316538,41.92487663026521],"label":"Playing congas"}]},"4S5ethZhgqs":{"duration":17.0,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4S5ethZhgqs","annotations":[{"segment":[5.038114102964118,5.780573023400936],"label":"Drinking coffee"},{"segment":[6.6821302839313566,7.6367203244929796],"label":"Drinking coffee"},{"segment":[8.564793975039002,9.38680206552262],"label":"Drinking coffee"},{"segment":[15.35298981903276,15.697702889235568],"label":"Drinking coffee"}]},"Ci9WKE6wkOQ":{"duration":155.156,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ci9WKE6wkOQ","annotations":[{"segment":[69.92853198127925,83.96263182527301],"label":"Kneeling"},{"segment":[90.0118127925117,155.121],"label":"Kneeling"}]},"VT871SvzqTY":{"duration":154.72,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VT871SvzqTY","annotations":[]},"IlD4gfHmWIg":{"duration":93.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IlD4gfHmWIg","annotations":[{"segment":[3.800311304969205,88.13798911140118],"label":"Using the balance beam"}]},"Z4OyG8ZzUpg":{"duration":176.36,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z4OyG8ZzUpg","annotations":[{"segment":[130.558155,147.2557275],"label":"Playing lacrosse"},{"segment":[12.50989727223089,114.34487750000001],"label":"Playing lacrosse"}]},"raxeBxPQqlk":{"duration":161.519,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=raxeBxPQqlk","annotations":[]},"BenCx65cYvc":{"duration":142.269,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BenCx65cYvc","annotations":[]},"Bmb315fcUpQ":{"duration":183.97,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=Bmb315fcUpQ","annotations":[]},"F_EHrfbYsRE":{"duration":226.883,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F_EHrfbYsRE","annotations":[{"segment":[0.01,0.02],"label":"Running a marathon"}]},"1FMjfLRpXwM":{"duration":142.037,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1FMjfLRpXwM","annotations":[{"segment":[54.953264775488975,113.89507296210215],"label":"Braiding hair"}]},"yxSBQXuOwuc":{"duration":144.011,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yxSBQXuOwuc","annotations":[{"segment":[13.255202259791073,130.5300423229329],"label":"Scuba diving"}]},"C0F-xroK_Eg":{"duration":205.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C0F-xroK_Eg","annotations":[{"segment":[6.91026305,152.83489368],"label":"Preparing pasta"}]},"adz3kDJLCF4":{"duration":68.85,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=adz3kDJLCF4","annotations":[{"segment":[1.7287481699999998,66.23273498829953],"label":"Brushing teeth"}]},"1YU4MSK80cQ":{"duration":16.28,"subset":"testing","resolution":"404x720","url":"https://www.youtube.com/watch?v=1YU4MSK80cQ","annotations":[]},"TOP1Fwili-k":{"duration":139.09,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=TOP1Fwili-k","annotations":[{"segment":[83.53895592043682,130.6245492574103],"label":"Playing badminton"}]},"ZEgmWaMa9PU":{"duration":229.47,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZEgmWaMa9PU","annotations":[{"segment":[21.840667607656684,91.65919520590347],"label":"Bungee jumping"}]},"1BfYAuxv6Wk":{"duration":63.58,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=1BfYAuxv6Wk","annotations":[{"segment":[18.43779407176287,56.50291731669267],"label":"Mixing drinks"}]},"kcB7HQPxzDM":{"duration":77.18,"subset":"training","resolution":"634x360","url":"https://www.youtube.com/watch?v=kcB7HQPxzDM","annotations":[{"segment":[47.34713952,54.41846734733226],"label":"Discus throw"},{"segment":[5.20505613237898,14.207509403407018],"label":"Discus throw"},{"segment":[28.85150005761263,35.93342996415468],"label":"Discus throw"},{"segment":[58.259514076304214,65.94160753424813],"label":"Discus throw"}]},"Iuk7PNmG3tQ":{"duration":115.032,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=Iuk7PNmG3tQ","annotations":[{"segment":[5.74008112324493,106.0305631825273],"label":"Playing ice hockey"}]},"3C4Nry679BA":{"duration":14.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3C4Nry679BA","annotations":[{"segment":[1.3845655226209048,13.991991419656786],"label":"Preparing salad"}]},"JnYteBN2LDI":{"duration":61.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JnYteBN2LDI","annotations":[{"segment":[6.0965741029641185,60.20366926677067],"label":"Mixing drinks"}]},"8i_pspVcWPY":{"duration":94.46000000000001,"subset":"testing","resolution":"1920x818","url":"https://www.youtube.com/watch?v=8i_pspVcWPY","annotations":[]},"VCldTcZi0aE":{"duration":92.439,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VCldTcZi0aE","annotations":[{"segment":[0.01,92.361],"label":"Drum corps"}]},"uJZFC7gHZGI":{"duration":121.23,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uJZFC7gHZGI","annotations":[{"segment":[45.46738157020281,94.43225403042122],"label":"Getting a piercing"}]},"K8G5CBJtluY":{"duration":69.684,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=K8G5CBJtluY","annotations":[{"segment":[35.522386821246066,39.10422748479747],"label":"Doing crunches"},{"segment":[50.28391198012457,59.61840583059186],"label":"Doing crunches"}]},"SokK_O2s9tQ":{"duration":110.574,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=SokK_O2s9tQ","annotations":[{"segment":[0.2,54.16893761152725],"label":"Camel ride"}]},"srXhWAGzd-s":{"duration":218.268,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=srXhWAGzd-s","annotations":[{"segment":[29.275017160686424,49.359040561622464],"label":"Hand car wash"},{"segment":[54.46514820592824,90.54830889235569],"label":"Hand car wash"},{"segment":[105.52622464898595,152.8428221528861],"label":"Hand car wash"}]},"b5GrYqt5tRQ":{"duration":164.096,"subset":"training","resolution":"400x230","url":"https://www.youtube.com/watch?v=b5GrYqt5tRQ","annotations":[{"segment":[0.20000000000000004,163.841],"label":"Capoeira"}]},"jCVi9xt3GUY":{"duration":40.59,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jCVi9xt3GUY","annotations":[{"segment":[0.34706057956318254,33.47557044695788],"label":"Washing dishes"}]},"k_ZXmr8pmrs":{"duration":182.51,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k_ZXmr8pmrs","annotations":[{"segment":[39.007320939157566,169.69608233385335],"label":"Sailing"}]},"CHkT4As5jZI":{"duration":66.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CHkT4As5jZI","annotations":[{"segment":[7.30402992,56.361093471138844],"label":"Mixing drinks"}]},"jgNeMnPVfkQ":{"duration":70.612,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jgNeMnPVfkQ","annotations":[{"segment":[0.01,70.29058452472604],"label":"Playing ice hockey"}]},"SLisp6hn700":{"duration":177.89,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=SLisp6hn700","annotations":[{"segment":[20.36461824,131.58197472],"label":"Wrapping presents"}]},"V1SEaTS9hos":{"duration":11.15,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=V1SEaTS9hos","annotations":[{"segment":[0.01,11.145577],"label":"Hand washing clothes"}]},"vYHtmvftHoU":{"duration":166.417,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vYHtmvftHoU","annotations":[{"segment":[40.424168224299066,146.94396728971964],"label":"Surfing"}]},"8qAdvUvNuts":{"duration":120.19,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8qAdvUvNuts","annotations":[{"segment":[45.93712007800312,113.436561825273],"label":"Rock climbing"}]},"MbCAiWBhAjo":{"duration":135.35,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MbCAiWBhAjo","annotations":[{"segment":[11.402234957878315,121.834992049922],"label":"Making a lemonade"}]},"r2oMItb5EVM":{"duration":109.158,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r2oMItb5EVM","annotations":[{"segment":[0.01,109.101],"label":"Making an omelette"}]},"zDBpa2miW8A":{"duration":52.385,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=zDBpa2miW8A","annotations":[{"segment":[5.311743061486869,20.464875817318905],"label":"Cutting the grass"}]},"TlDh_RZ3HDk":{"duration":145.334,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TlDh_RZ3HDk","annotations":[{"segment":[6.801889055154043,140.57237366614663],"label":"Braiding hair"}]},"_xszCju7ER0":{"duration":128.36,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_xszCju7ER0","annotations":[]},"gF14lRI0HUM":{"duration":19.13,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=gF14lRI0HUM","annotations":[{"segment":[1.2238110468018721,19.103391950078006],"label":"Doing kickboxing"}]},"T9SW-T8XE2g":{"duration":20.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T9SW-T8XE2g","annotations":[]},"7tdlcmbuLOA":{"duration":204.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7tdlcmbuLOA","annotations":[{"segment":[19.11353645982574,23.57336163378508],"label":"Wakeboarding"},{"segment":[157.36811594383775,158.32379276131044],"label":"Wakeboarding"},{"segment":[174.25173971918878,179.6672416848674],"label":"Wakeboarding"}]},"JG7T6qtO4jg":{"duration":136.37,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JG7T6qtO4jg","annotations":[]},"E7NTSqMgx44":{"duration":18.855,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E7NTSqMgx44","annotations":[{"segment":[0.01,14.971438546021842],"label":"Drum corps"}]},"0J55EGFLglQ":{"duration":19.854,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=0J55EGFLglQ","annotations":[{"segment":[5.20587920245259,19.853061],"label":"Futsal"}]},"PaoElimUG3s":{"duration":132.052,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=PaoElimUG3s","annotations":[]},"NiaE7amNW7s":{"duration":189.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NiaE7amNW7s","annotations":[{"segment":[3.258679692667707,92.72424943681747],"label":"Using the balance beam"},{"segment":[119.97866141185646,156.71286885647427],"label":"Using the balance beam"}]},"KvscV2XxyTE":{"duration":103.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KvscV2XxyTE","annotations":[{"segment":[7.72713712324493,101.4186747425897],"label":"Using the balance beam"}]},"zlXvqnBqVvI":{"duration":22.64,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zlXvqnBqVvI","annotations":[{"segment":[1.9602024897322425,3.8321074375975037],"label":"Shuffleboard"}]},"YOIvlE8q9pY":{"duration":48.368,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=YOIvlE8q9pY","annotations":[]},"R0sADQPdso8":{"duration":114.336,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R0sADQPdso8","annotations":[{"segment":[19.611404056162247,63.82620592823714],"label":"Hula hoop"}]},"3l7quTy4c2s":{"duration":174.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3l7quTy4c2s","annotations":[{"segment":[21.75210077790867,26.918224712661978],"label":"Cheerleading"},{"segment":[35.34716376410159,69.60672248930774],"label":"Cheerleading"},{"segment":[87.00840237129485,95.70924260842433],"label":"Cheerleading"},{"segment":[99.78776146957878,102.77867530109204],"label":"Cheerleading"},{"segment":[104.41008284555382,121.81176331981278],"label":"Cheerleading"},{"segment":[122.08366457722308,127.2497884680187],"label":"Cheerleading"},{"segment":[129.96880104212167,154.71181546645866],"label":"Cheerleading"}]},"ooFuSspUuVY":{"duration":210.84,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=ooFuSspUuVY","annotations":[]},"hjsu3SGAdLs":{"duration":131.38,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hjsu3SGAdLs","annotations":[{"segment":[4.304128965158606,51.03467201545204],"label":"Playing piano"},{"segment":[60.46276403437089,119.90073390795632],"label":"Playing piano"}]},"wmfI4aiJdH4":{"duration":67.71,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wmfI4aiJdH4","annotations":[]},"dAa10hlgxCY":{"duration":228.392,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dAa10hlgxCY","annotations":[{"segment":[6.767705148205929,220.1285148205928],"label":"Baton twirling"}]},"P24G3V7FUfg":{"duration":21.76,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P24G3V7FUfg","annotations":[]},"UxlDSfC7S-0":{"duration":156.34,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UxlDSfC7S-0","annotations":[{"segment":[21.701698907956317,156.281],"label":"Mooping floor"}]},"pOyP_kDaskY":{"duration":197.07,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=pOyP_kDaskY","annotations":[{"segment":[23.672665577223086,28.284223806552262],"label":"Playing harmonica"},{"segment":[36.585028619344776,51.95688938377534],"label":"Playing harmonica"},{"segment":[56.875884828393126,59.33538255070202],"label":"Playing harmonica"},{"segment":[78.70392711388456,86.38985749609984],"label":"Playing harmonica"},{"segment":[150.64423549141966,157.7152914430577],"label":"Playing harmonica"}]},"mvQxhjjVWOk":{"duration":176.5,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=mvQxhjjVWOk","annotations":[{"segment":[15.143866053961885,173.74144382839316],"label":"Triple jump"}]},"nzjivjnk2Ac":{"duration":233.013,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nzjivjnk2Ac","annotations":[{"segment":[17.983326833073324,56.493077223088925],"label":"Longboarding"},{"segment":[76.111251950078,203.6661131045242],"label":"Longboarding"}]},"WBkqGBFsdgI":{"duration":121.232,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WBkqGBFsdgI","annotations":[{"segment":[51.22407332293292,58.59580343213728],"label":"Shuffleboard"},{"segment":[71.82711388455539,78.07761778471139],"label":"Shuffleboard"},{"segment":[82.23671606864275,97.73880967238689],"label":"Shuffleboard"},{"segment":[100.19645865834633,106.62415600624024],"label":"Shuffleboard"}]},"fN2DiOswmOA":{"duration":185.48,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=fN2DiOswmOA","annotations":[{"segment":[98.383067800312,109.37882243681747],"label":"Preparing salad"},{"segment":[125.00436849921998,151.33630723400935],"label":"Preparing salad"}]},"rHctuDlE-Mw":{"duration":92.21000000000001,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rHctuDlE-Mw","annotations":[]},"oOu5vC3EB5E":{"duration":65.13,"subset":"training","resolution":"626x360","url":"https://www.youtube.com/watch?v=oOu5vC3EB5E","annotations":[{"segment":[0.01,65.13165532879819],"label":"Playing squash"}]},"jmmOBfS3JIg":{"duration":105.07,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jmmOBfS3JIg","annotations":[{"segment":[103.75867394695787,104.86999999999999],"label":"Polishing shoes"}]},"UA_RDwuO47s":{"duration":72.53999999999999,"subset":"training","resolution":"400x226","url":"https://www.youtube.com/watch?v=UA_RDwuO47s","annotations":[{"segment":[0.01,72.32798751950078],"label":"Mixing drinks"}]},"fxlTN3mCWDA":{"duration":169.901,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fxlTN3mCWDA","annotations":[{"segment":[17.460289719626168,166.13730218068537],"label":"Playing congas"}]},"FOF8YgUhRNA":{"duration":68.27,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=FOF8YgUhRNA","annotations":[{"segment":[33.65408216328653,62.196150327613104],"label":"Mixing drinks"}]},"231pbDe3uQc":{"duration":10.89,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=231pbDe3uQc","annotations":[{"segment":[0.01,10.412480499219969],"label":"Doing motocross"}]},"TxxUmibEy9Q":{"duration":189.243,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TxxUmibEy9Q","annotations":[{"segment":[68.49321093387954,123.110642066499],"label":"Arm wrestling"}]},"0UcBldDI0RA":{"duration":156.573,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=0UcBldDI0RA","annotations":[{"segment":[0.01,141.15553510140404],"label":"Welding"}]},"IGPZFun1Ok4":{"duration":127.315,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IGPZFun1Ok4","annotations":[]},"LE4FHZR2avg":{"duration":152.347,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LE4FHZR2avg","annotations":[]},"uw9x69DT8_g":{"duration":27.911,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uw9x69DT8_g","annotations":[]},"SID0y9y6Uk4":{"duration":140.179,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SID0y9y6Uk4","annotations":[{"segment":[0.01,6.991126365054602],"label":"Doing fencing"}]},"J65l9OEP3pI":{"duration":120.164,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=J65l9OEP3pI","annotations":[{"segment":[0.2,114.55688802247916],"label":"Removing ice from car"}]},"h-MWdTHW_Eg":{"duration":62.07,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h-MWdTHW_Eg","annotations":[{"segment":[14.040103155146614,49.091946894202295],"label":"Playing badminton"}]},"rDT4ngAfeHs":{"duration":11.7,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rDT4ngAfeHs","annotations":[{"segment":[1.8987289734789392,11.684486753619806],"label":"Vacuuming floor"}]},"dWBnXy2nauU":{"duration":30.3,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dWBnXy2nauU","annotations":[{"segment":[4.129972698907956,29.665003900156005],"label":"Hammer throw"}]},"sFKOnFMJF2Q":{"duration":119.281,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sFKOnFMJF2Q","annotations":[{"segment":[16.42160838314306,83.2969354859468],"label":"Longboarding"},{"segment":[91.0990569812739,110.79012551709944],"label":"Longboarding"}]},"uPqh5uA1mhA":{"duration":118.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uPqh5uA1mhA","annotations":[{"segment":[31.332672418096724,103.5821288174727],"label":"Shot put"}]},"h5fX5z-unJc":{"duration":21.85,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=h5fX5z-unJc","annotations":[{"segment":[0.01,20.94466551984489],"label":"Playing pool"}]},"8l7SuE4_lCk":{"duration":13.7,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=8l7SuE4_lCk","annotations":[{"segment":[0,13.63],"label":"Brushing teeth"}]},"BbQ39yhaC54":{"duration":99.87,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BbQ39yhaC54","annotations":[{"segment":[74.78491727424199,88.02807958190328],"label":"Playing racquetball"}]},"q6sLCLnTuik":{"duration":120.419,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q6sLCLnTuik","annotations":[{"segment":[38.48019500780031,79.78927457098284],"label":"Blow-drying hair"}]},"zLZTqSaGxJo":{"duration":136.6,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zLZTqSaGxJo","annotations":[{"segment":[44.52467962119845,93.31009413437764],"label":"Cricket"}]},"5TV-V6Cxero":{"duration":142.41,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=5TV-V6Cxero","annotations":[{"segment":[32.75834633385336,94.27741029641186],"label":"Starting a campfire"}]},"1XNHcfxTqW0":{"duration":46.84,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1XNHcfxTqW0","annotations":[]},"Yojuicji624":{"duration":146.983,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Yojuicji624","annotations":[{"segment":[8.71345993447738,135.74653371606865],"label":"Slacklining"}]},"TzuHnPm0_-E":{"duration":191.31,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=TzuHnPm0_-E","annotations":[]},"RLMvrl_vaqc":{"duration":20.39,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=RLMvrl_vaqc","annotations":[{"segment":[1.2085967581903276,10.33668280031201],"label":"Smoking hookah"}]},"hZJDP-YVPXc":{"duration":108.856,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=hZJDP-YVPXc","annotations":[]},"lGwUq5vZGEk":{"duration":236.449,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=lGwUq5vZGEk","annotations":[{"segment":[63.80245397815913,178.1456209048362],"label":"Braiding hair"}]},"C6VTRxJn56A":{"duration":107.14,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=C6VTRxJn56A","annotations":[]},"xizex7lawY4":{"duration":100.82,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=xizex7lawY4","annotations":[]},"Yp-NJgTAOCc":{"duration":156.67000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Yp-NJgTAOCc","annotations":[{"segment":[71.20224748673947,137.15287123088922],"label":"Breakdancing"}]},"TRWDARS8lRE":{"duration":70.055,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=TRWDARS8lRE","annotations":[{"segment":[0.01,59.62279538893277],"label":"Doing fencing"}]},"R4ouBBtg-B4":{"duration":121.302,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R4ouBBtg-B4","annotations":[{"segment":[7.758725064648844,13.814315359008917],"label":"Slacklining"},{"segment":[24.79007276753655,81.18275665834634],"label":"Slacklining"},{"segment":[89.31995604368174,97.45715542901716],"label":"Slacklining"},{"segment":[102.75579688923555,113.16384261466459],"label":"Slacklining"}]},"G16xScfD5WM":{"duration":103.747,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G16xScfD5WM","annotations":[{"segment":[9.549210558898546,90.96027515132604],"label":"Capoeira"}]},"bMCb1DT26aE":{"duration":182.9,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bMCb1DT26aE","annotations":[]},"yrPVjR4jbv0":{"duration":170.83,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=yrPVjR4jbv0","annotations":[]},"U6M5hbiHn5Q":{"duration":70.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U6M5hbiHn5Q","annotations":[{"segment":[5.274078479999999,58.534315620904835],"label":"Tumbling"}]},"FLZPaPf027E":{"duration":102.35,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=FLZPaPf027E","annotations":[{"segment":[15.1694042199688,102.353559],"label":"Playing harmonica"}]},"SpwOzrXw9JU":{"duration":169.692,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SpwOzrXw9JU","annotations":[{"segment":[88.1182418096724,163.27013572542904],"label":"Sharpening knives"}]},"Oent5pguFk4":{"duration":21.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Oent5pguFk4","annotations":[{"segment":[0.01,10.15878731279251],"label":"Spinning"}]},"Vox9KOxC1ZA":{"duration":64.528,"subset":"testing","resolution":"512x288","url":"https://www.youtube.com/watch?v=Vox9KOxC1ZA","annotations":[]},"WltsIClflnA":{"duration":68.871,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WltsIClflnA","annotations":[{"segment":[0.01,68.841],"label":"Swinging at the playground"}]},"yxZ4ouqcld4":{"duration":137.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yxZ4ouqcld4","annotations":[{"segment":[49.45814508580344,81.58571138845554],"label":"Running a marathon"},{"segment":[98.5024336973479,119.27359906396256],"label":"Running a marathon"}]},"jL5kxxC791g":{"duration":141.827,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jL5kxxC791g","annotations":[]},"VcbMGRcN9Cc":{"duration":58.352,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=VcbMGRcN9Cc","annotations":[{"segment":[0.01,57.84226330753417],"label":"Table soccer"}]},"WdCoVsU8Bbs":{"duration":23.128,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=WdCoVsU8Bbs","annotations":[{"segment":[7.757130904836194,10.318788087363494],"label":"Playing blackjack"}]},"C8Toxe4fE30":{"duration":7.52,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=C8Toxe4fE30","annotations":[{"segment":[0.01,7.317870677067083],"label":"Discus throw"}]},"zsw9WEsSowI":{"duration":89.33,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=zsw9WEsSowI","annotations":[{"segment":[12.187207488299531,84.75332293291731],"label":"Playing flauta"}]},"hMUoeOFmquI":{"duration":107.904,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=hMUoeOFmquI","annotations":[{"segment":[13.963811232449299,16.995258970358815],"label":"Running a marathon"},{"segment":[74.03875195007801,75.55318096723869],"label":"Running a marathon"}]},"Wj7VGbIgjXE":{"duration":43.074,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=Wj7VGbIgjXE","annotations":[]},"bLqd98mmu3w":{"duration":94.645,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=bLqd98mmu3w","annotations":[{"segment":[0.2,75.9220171495327],"label":"Swimming"}]},"zRBspE-uJUo":{"duration":172.989,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zRBspE-uJUo","annotations":[{"segment":[14.258781420486807,20.446556271225443],"label":"Swimming"},{"segment":[24.751095297826232,41.70021771506685],"label":"Swimming"},{"segment":[45.735723052505094,55.42093586235686],"label":"Swimming"},{"segment":[65.10614867220865,86.35981011605006],"label":"Swimming"},{"segment":[86.89787749437514,89.05014642519832],"label":"Swimming"},{"segment":[116.22254885288105,117.83675097729785],"label":"Swimming"},{"segment":[148.23755765381418,156.30856827589815],"label":"Swimming"}]},"2GCNxOKMtBo":{"duration":193.91,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2GCNxOKMtBo","annotations":[{"segment":[22.680304212168487,63.50485179407176],"label":"Ice fishing"},{"segment":[73.4841856474259,87.39477223088923],"label":"Ice fishing"},{"segment":[113.40152106084244,171.765503900156],"label":"Ice fishing"}]},"uO9qoK8zGQc":{"duration":189.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uO9qoK8zGQc","annotations":[{"segment":[5.321300901015633,165.8472114149872],"label":"Tai chi"}]},"UCOn2HkJJt8":{"duration":185.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UCOn2HkJJt8","annotations":[{"segment":[7.4247155,178.29951248049923],"label":"Archery"}]},"-b2lkhdJXf4":{"duration":107.671,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=-b2lkhdJXf4","annotations":[{"segment":[3.862313572542902,93.36723244929797],"label":"Futsal"}]},"Ptw9hFDV4ow":{"duration":103.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ptw9hFDV4ow","annotations":[{"segment":[7.7612480499219965,92.67728609999999],"label":"Doing kickboxing"}]},"KwcoiO_6dGg":{"duration":52.617,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KwcoiO_6dGg","annotations":[]},"7XGimCWcafc":{"duration":223.353,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7XGimCWcafc","annotations":[{"segment":[53.29006708268331,179.04279875195007],"label":"Changing car wheel"}]},"xYuqZ_RrCC0":{"duration":102.4,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xYuqZ_RrCC0","annotations":[{"segment":[11.788288000000001,73.06035200000001],"label":"Shaving"}]},"EY4YIa-kNgs":{"duration":201.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EY4YIa-kNgs","annotations":[{"segment":[43.69971117,77.55049173],"label":"Washing dishes"},{"segment":[102.34130539,178.26779842277693],"label":"Washing dishes"}]},"C5KcdwGSvbw":{"duration":109.158,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C5KcdwGSvbw","annotations":[{"segment":[10.715012480499219,68.03182527301092],"label":"Baking cookies"},{"segment":[75.52917940717629,82.8439422776911],"label":"Baking cookies"}]},"CtA4VynDEkI":{"duration":24.033,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CtA4VynDEkI","annotations":[{"segment":[0,24.032652],"label":"Using the monkey bar"}]},"YcDlkZkPb6g":{"duration":208.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YcDlkZkPb6g","annotations":[{"segment":[55.09337067,197.3612893354134],"label":"Layup drill in basketball"}]},"e0Qj2F3V6fM":{"duration":214.623,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e0Qj2F3V6fM","annotations":[{"segment":[162.7243540624025,171.09494840717628],"label":"Making an omelette"}]},"s9Q8SZbixV4":{"duration":234.85,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=s9Q8SZbixV4","annotations":[{"segment":[92.4588702,228.34431064820595],"label":"Cumbia"},{"segment":[15.564768568642746,90.64188754680187],"label":"Cumbia"}]},"Obj__zWaZqk":{"duration":9.149,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Obj__zWaZqk","annotations":[{"segment":[0,9.148661],"label":"Spread mulch"}]},"EcQ7DcVefdw":{"duration":151.57999999999998,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=EcQ7DcVefdw","annotations":[{"segment":[8.00033962,151.57986394557824],"label":"Polishing shoes"}]},"IaXie5azIcQ":{"duration":63.53,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IaXie5azIcQ","annotations":[]},"t13MQSYjlas":{"duration":7.34,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=t13MQSYjlas","annotations":[]},"zwx1Kig4nXg":{"duration":185.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zwx1Kig4nXg","annotations":[{"segment":[5.7770957368907006,136.62831316848673],"label":"Cumbia"}]},"1G3rv9ssDY4":{"duration":105.767,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1G3rv9ssDY4","annotations":[{"segment":[11.220200862456268,97.02173648049921],"label":"BMX"}]},"RztM-NVwV5o":{"duration":235.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RztM-NVwV5o","annotations":[{"segment":[110.18411920150275,188.0475634372314],"label":"Paintball"}]},"KRfdZgxLZPY":{"duration":143.92000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KRfdZgxLZPY","annotations":[{"segment":[4.991623633209419,134.52934637464244],"label":"Skateboarding"}]},"okvQJRTfGHk":{"duration":57.888,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=okvQJRTfGHk","annotations":[{"segment":[2.2576782605304215,46.146943645241805],"label":"Canoeing"}]},"hhE8W4Jo88I":{"duration":201.155,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hhE8W4Jo88I","annotations":[]},"AIQ_SRvssmE":{"duration":135.4,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=AIQ_SRvssmE","annotations":[]},"UFJ7-LQC2N4":{"duration":188.92,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UFJ7-LQC2N4","annotations":[]},"puK4NxGKNdQ":{"duration":181.05,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=puK4NxGKNdQ","annotations":[{"segment":[8.473292156176043,16.09925509673448],"label":"Mowing the lawn"},{"segment":[63.832134243192854,147.43528264586584],"label":"Mowing the lawn"}]},"MYRsm8m9mk4":{"duration":141.247,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=MYRsm8m9mk4","annotations":[{"segment":[7.710975039001561,47.147104524180975],"label":"Tug of war"},{"segment":[72.48316536661467,105.53020124804992],"label":"Tug of war"}]},"dwAvPDFGKcs":{"duration":57.261,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=dwAvPDFGKcs","annotations":[]},"yAT27EJRTXQ":{"duration":189.75,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yAT27EJRTXQ","annotations":[]},"LAkaYds-8X0":{"duration":118.678,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LAkaYds-8X0","annotations":[{"segment":[8.146328037441497,10.738341503900157],"label":"Playing ten pins"},{"segment":[11.664060599063962,13.330354970358815],"label":"Playing ten pins"},{"segment":[14.441217884555384,17.77380662714509],"label":"Playing ten pins"},{"segment":[19.81038863650546,22.402402102964118],"label":"Playing ten pins"},{"segment":[30.363586321372853,33.32588742589704],"label":"Playing ten pins"},{"segment":[40.36135254914196,45.175091843993755],"label":"Playing ten pins"},{"segment":[48.13739294851794,51.65512551014041],"label":"Playing ten pins"},{"segment":[55.913433347893914,59.06087827145086],"label":"Playing ten pins"},{"segment":[76.64954107956318,79.05641072698907],"label":"Playing ten pins"},{"segment":[82.57414328861154,86.6473073073323],"label":"Playing ten pins"},{"segment":[88.86903313572543,91.64619042121684],"label":"Playing ten pins"},{"segment":[95.90449825897036,102.56967574414976],"label":"Playing ten pins"},{"segment":[103.68053865834634,107.38341503900156],"label":"Playing ten pins"}]},"8FO4W-SBpxs":{"duration":177.981,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=8FO4W-SBpxs","annotations":[{"segment":[0.2,177.961],"label":"Capoeira"}]},"GTxmHim5JnY":{"duration":15.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GTxmHim5JnY","annotations":[{"segment":[0,15.139409],"label":"Swinging at the playground"}]},"-LtQMRfj0eM":{"duration":227.695,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-LtQMRfj0eM","annotations":[{"segment":[16.336171606864273,213.45438533541343],"label":"Trimming branches or hedges"}]},"QeL3ScQVelo":{"duration":170.67000000000002,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=QeL3ScQVelo","annotations":[{"segment":[72.42017680707228,124.87155486219447],"label":"Preparing salad"}]},"IoGpS8NQklE":{"duration":231.968,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IoGpS8NQklE","annotations":[{"segment":[0.01,231.881],"label":"Beach soccer"}]},"aYrhuTGO440":{"duration":139.85,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aYrhuTGO440","annotations":[{"segment":[0.7129336345227558,138.82399234168787],"label":"Walking the dog"}]},"hz3n1wrXYAc":{"duration":97.78,"subset":"validation","resolution":"850x480","url":"https://www.youtube.com/watch?v=hz3n1wrXYAc","annotations":[{"segment":[40.17902262090483,61.52651092043682],"label":"BMX"}]},"E-6XZrDYRuM":{"duration":207.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=E-6XZrDYRuM","annotations":[{"segment":[3.567895826037123,52.221020726543344],"label":"Horseback riding"},{"segment":[58.38374988060747,171.58335265366617],"label":"Horseback riding"}]},"4gYF0U37kr0":{"duration":215.853,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=4gYF0U37kr0","annotations":[]},"-XCESzrIWXA":{"duration":171.154,"subset":"training","resolution":"488x276","url":"https://www.youtube.com/watch?v=-XCESzrIWXA","annotations":[{"segment":[26.392940110623687,29.3254890118041],"label":"Futsal"},{"segment":[46.387591709581024,146.89403896573208],"label":"Futsal"}]},"nHwqBo0xvog":{"duration":48.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nHwqBo0xvog","annotations":[{"segment":[1.72780184,32.37813128],"label":"Playing kickball"},{"segment":[38.653615280000004,41.988518080000006],"label":"Playing kickball"}]},"Po6tK3rYE8E":{"duration":106.952,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Po6tK3rYE8E","annotations":[{"segment":[0.2,86.73291731669266],"label":"Hanging wallpaper"}]},"wvewX55VADs":{"duration":216.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wvewX55VADs","annotations":[{"segment":[108.61778007332293,212.15996294695788],"label":"Tango"}]},"zNrIr2VuyCA":{"duration":116.91,"subset":"testing","resolution":"620x480","url":"https://www.youtube.com/watch?v=zNrIr2VuyCA","annotations":[]},"ZJbGChNlQ7U":{"duration":73.78999999999999,"subset":"testing","resolution":"638x360","url":"https://www.youtube.com/watch?v=ZJbGChNlQ7U","annotations":[]},"FgRIl4bNl_M":{"duration":121.16,"subset":"validation","resolution":"640x358","url":"https://www.youtube.com/watch?v=FgRIl4bNl_M","annotations":[{"segment":[6.42382527301092,115.25098283931358],"label":"Shoveling snow"}]},"352jrqIWSFY":{"duration":7.755,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=352jrqIWSFY","annotations":[{"segment":[0.7407823712948518,6.727267550702028],"label":"Doing a powerbomb"}]},"e_k31tUVWHI":{"duration":238.05,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=e_k31tUVWHI","annotations":[]},"RjZ7jJBE1Qw":{"duration":123.415,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RjZ7jJBE1Qw","annotations":[{"segment":[59.300250105242306,90.87570707020281],"label":"Swimming"}]},"v_Vg4a8igc0":{"duration":36.73,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=v_Vg4a8igc0","annotations":[{"segment":[0,36.733968],"label":"Bathing dog"}]},"zDdD_qA86rg":{"duration":172.9,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zDdD_qA86rg","annotations":[]},"1NAlbF88oUI":{"duration":64.645,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1NAlbF88oUI","annotations":[{"segment":[17.99508346333853,60.94133307332293],"label":"Arm wrestling"}]},"ATk8OkvNHHQ":{"duration":179.375,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ATk8OkvNHHQ","annotations":[{"segment":[14.785112278290011,173.7948561447324],"label":"BMX"}]},"RhEunVjB_Ns":{"duration":154.82999999999998,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=RhEunVjB_Ns","annotations":[{"segment":[8.212546496099845,152.4152027196734],"label":"Cheerleading"}]},"UBQfURrVB_Y":{"duration":229.58,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UBQfURrVB_Y","annotations":[{"segment":[0,180.51298595943837],"label":"Removing ice from car"}]},"qY3Tn9-of-Q":{"duration":20.9,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qY3Tn9-of-Q","annotations":[]},"FGAlwzRvAHg":{"duration":183.485,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FGAlwzRvAHg","annotations":[{"segment":[19.748531981279253,169.43668018720751],"label":"Waxing skis"}]},"LG7p5JrgMfE":{"duration":161.32999999999998,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LG7p5JrgMfE","annotations":[{"segment":[0.01,155.61884555382215],"label":"Tango"}]},"dKiy-7TZqeI":{"duration":17.532,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dKiy-7TZqeI","annotations":[{"segment":[2.5668536074766353,13.407714056074767],"label":"Calf roping"}]},"plZs-3GtoOo":{"duration":65.2,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=plZs-3GtoOo","annotations":[{"segment":[4.666435569999999,58.87015407644306],"label":"Getting a haircut"}]},"DU36SNYN-rw":{"duration":110.388,"subset":"training","resolution":"300x240","url":"https://www.youtube.com/watch?v=DU36SNYN-rw","annotations":[{"segment":[34.786745129669136,61.13512043681748],"label":"Disc dog"}]},"hq48S65dlyo":{"duration":105.65,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=hq48S65dlyo","annotations":[]},"55bimE5eU9E":{"duration":113.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=55bimE5eU9E","annotations":[{"segment":[15.4262685,76.4027217],"label":"Snatch"}]},"fbr7v8RMkf8":{"duration":202.804,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=fbr7v8RMkf8","annotations":[]},"TZOhIMjwEE8":{"duration":82.41,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TZOhIMjwEE8","annotations":[]},"8d8juWOKb0c":{"duration":157.083,"subset":"training","resolution":"582x480","url":"https://www.youtube.com/watch?v=8d8juWOKb0c","annotations":[{"segment":[9.799750390015602,37.23905148205928],"label":"Doing fencing"},{"segment":[46.548814352574105,101.68535881435257],"label":"Doing fencing"},{"segment":[111.97640717628707,130.10825429017163],"label":"Doing fencing"},{"segment":[135.4988034321373,139.41920280811235],"label":"Doing fencing"}]},"dl1RH5wfGGM":{"duration":148.655,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=dl1RH5wfGGM","annotations":[]},"16725zS5kVM":{"duration":236.216,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=16725zS5kVM","annotations":[]},"QN6YFgrx_Ig":{"duration":158.69,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QN6YFgrx_Ig","annotations":[{"segment":[134.9195274648986,156.20958134009362],"label":"Breakdancing"}]},"oVPFTkC4-Lc":{"duration":70.334,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=oVPFTkC4-Lc","annotations":[{"segment":[10.53352751326053,70.133241],"label":"Mooping floor"}]},"Jz9ZxenBLa8":{"duration":117.145,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Jz9ZxenBLa8","annotations":[{"segment":[0.01,107.36738585189666],"label":"Riding bumper cars"}]},"El4QfhJ6RvE":{"duration":11.332,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=El4QfhJ6RvE","annotations":[{"segment":[0.2,10.659587531981279],"label":"Powerbocking"}]},"o2mqzjWJJNw":{"duration":72.702,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=o2mqzjWJJNw","annotations":[{"segment":[11.33244929797192,61.43878627145086],"label":"Welding"}]},"o-aSCtwxsTw":{"duration":41.1,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=o-aSCtwxsTw","annotations":[{"segment":[0.01,41.041],"label":"Kite flying"}]},"unz00-r84NY":{"duration":49.668,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=unz00-r84NY","annotations":[{"segment":[0.01,49.621],"label":"Hurling"}]},"IOoj4yWCoPg":{"duration":79.11,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IOoj4yWCoPg","annotations":[{"segment":[6.091469999999999,75.28361154446178],"label":"Hand washing clothes"}]},"BhCNHWQhhEw":{"duration":191.61,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=BhCNHWQhhEw","annotations":[{"segment":[2.9892521684867397,188.32288661466458],"label":"Getting a tattoo"}]},"QASZ8CTxBSg":{"duration":41.61,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=QASZ8CTxBSg","annotations":[{"segment":[0.8428907956318253,34.62336037441498],"label":"Wrapping presents"}]},"6gZuc4umTPk":{"duration":233.13,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=6gZuc4umTPk","annotations":[{"segment":[21.45799116318394,222.94489123782637],"label":"Doing nails"}]},"uyGxlvak-Bg":{"duration":156.29,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=uyGxlvak-Bg","annotations":[{"segment":[71.19768007046812,93.62982584609507],"label":"Cleaning shoes"}]},"_vUE7PhDBcA":{"duration":136.417,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_vUE7PhDBcA","annotations":[{"segment":[0.009999999999999998,120.02980073634944],"label":"Running a marathon"}]},"gnVupEzu59g":{"duration":193.376,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=gnVupEzu59g","annotations":[{"segment":[12.393933578546754,56.13723397722937],"label":"Snowboarding"},{"segment":[56.438911911013385,183.14364357414104],"label":"Snowboarding"}]},"usXxdwUHwA0":{"duration":211.4,"subset":"testing","resolution":"900x720","url":"https://www.youtube.com/watch?v=usXxdwUHwA0","annotations":[]},"8P0N5psQrEI":{"duration":110.179,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8P0N5psQrEI","annotations":[{"segment":[15.680510230625904,95.62709278514303],"label":"Putting on shoes"}]},"qI_Bo0D6p7I":{"duration":221.286,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=qI_Bo0D6p7I","annotations":[{"segment":[2.415928237129485,211.91141965678628],"label":"Hula hoop"}]},"Qv97mL6UyBk":{"duration":23.1,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Qv97mL6UyBk","annotations":[]},"qsTCTQo-wI8":{"duration":196.232,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qsTCTQo-wI8","annotations":[{"segment":[29.64873545679954,193.78657848909657],"label":"Playing congas"}]},"UBtVJ-w9O28":{"duration":236.171,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=UBtVJ-w9O28","annotations":[{"segment":[13.996627145085803,225.05103120124807],"label":"Capoeira"}]},"NsYBZUTzgDw":{"duration":33.066,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NsYBZUTzgDw","annotations":[{"segment":[8.711277691107643,33.021],"label":"Clipping cat claws"}]},"-MFzpFMdWZs":{"duration":46.97,"subset":"validation","resolution":"128x96","url":"https://www.youtube.com/watch?v=-MFzpFMdWZs","annotations":[{"segment":[0.6954810093603744,35.908782641185645],"label":"Shaving legs"}]},"914SKlbnHEs":{"duration":226.209,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=914SKlbnHEs","annotations":[]},"W0RYvQUUNe0":{"duration":11.54,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=W0RYvQUUNe0","annotations":[{"segment":[0.20000000000000004,11.301],"label":"Windsurfing"}]},"w6ofRgXA0xc":{"duration":124.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w6ofRgXA0xc","annotations":[{"segment":[39.30498345,95.36709485],"label":"Starting a campfire"}]},"w8lqIgJn09Y":{"duration":61.7,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=w8lqIgJn09Y","annotations":[{"segment":[4.48497648,52.55179258598915],"label":"Hammer throw"}]},"priCnteGpSE":{"duration":227.79,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=priCnteGpSE","annotations":[]},"LMxfRVcBrD8":{"duration":187.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LMxfRVcBrD8","annotations":[{"segment":[27.865837134165364,147.4992740452418],"label":"Preparing pasta"}]},"EwzkySgI6CQ":{"duration":130.57,"subset":"training","resolution":"362x272","url":"https://www.youtube.com/watch?v=EwzkySgI6CQ","annotations":[{"segment":[3.1543668666146645,123.63083041731669],"label":"Getting a tattoo"}]},"mTtBz5d83C4":{"duration":21.34,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=mTtBz5d83C4","annotations":[{"segment":[0.19999999999999998,10.952536775351016],"label":"Snow tubing"}]},"QQBmydn6--I":{"duration":226.67000000000002,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QQBmydn6--I","annotations":[{"segment":[12.376851575663027,115.9887804804992],"label":"Starting a campfire"}]},"wz_kM0oBW5g":{"duration":72.934,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=wz_kM0oBW5g","annotations":[{"segment":[32.4277091491823,67.81373541341654],"label":"Curling"}]},"FX4inHmWQtE":{"duration":156.782,"subset":"validation","resolution":"1920x960","url":"https://www.youtube.com/watch?v=FX4inHmWQtE","annotations":[{"segment":[58.426737909516376,156.701],"label":"River tubing"}]},"VBY8feOQgy4":{"duration":159.289,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VBY8feOQgy4","annotations":[{"segment":[6.459073322932918,87.44591575663027],"label":"Doing fencing"}]},"QMm6gYzsMw8":{"duration":185.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QMm6gYzsMw8","annotations":[{"segment":[0.5796644929797192,185.20280550702026],"label":"Zumba"}]},"g7_YL5C71u8":{"duration":23.22,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=g7_YL5C71u8","annotations":[]},"v2zVnmbPmeQ":{"duration":238.12,"subset":"validation","resolution":"540x360","url":"https://www.youtube.com/watch?v=v2zVnmbPmeQ","annotations":[{"segment":[25.94118744,223.75675507020281],"label":"Putting on makeup"}]},"bDkSq9T55RI":{"duration":40.403,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=bDkSq9T55RI","annotations":[{"segment":[0.01,29.96496645111607],"label":"Decorating the Christmas tree"}]},"ObkyDlB5wvs":{"duration":167.834,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ObkyDlB5wvs","annotations":[{"segment":[44.904065695961165,48.30787242510108],"label":"Doing fencing"},{"segment":[60.875774194233074,65.58873735765756],"label":"Doing fencing"},{"segment":[121.35880145818078,131.57022164560053],"label":"Doing fencing"}]},"4zFiORtQrlA":{"duration":106.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4zFiORtQrlA","annotations":[{"segment":[42.44288299531981,100.13867706708267],"label":"Fixing bicycle"}]},"tuhHQ-lHIs4":{"duration":95.67,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tuhHQ-lHIs4","annotations":[{"segment":[23.730002334787265,54.026797768509375],"label":"Dodgeball"},{"segment":[59.99660853042121,74.62264742589704],"label":"Dodgeball"}]},"ql7I6qtyhBU":{"duration":187.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ql7I6qtyhBU","annotations":[{"segment":[35.153682065649974,80.56052140044785],"label":"Curling"},{"segment":[87.59125781357785,132.41220244728157],"label":"Curling"}]},"PpbBWqlsbxk":{"duration":123.925,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PpbBWqlsbxk","annotations":[{"segment":[20.29971027412525,110.19842720239421],"label":"Rafting"}]},"jlhW3eBciyM":{"duration":89.07,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jlhW3eBciyM","annotations":[]},"fOuFF7dGPtI":{"duration":145.636,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fOuFF7dGPtI","annotations":[{"segment":[119.9615934102964,122.6879932605304],"label":"Playing ten pins"}]},"P2H4fTMMHNE":{"duration":90.26,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=P2H4fTMMHNE","annotations":[{"segment":[15.792235229999998,88.05534028783151],"label":"Ironing clothes"}]},"drfE5oASHaU":{"duration":201.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=drfE5oASHaU","annotations":[{"segment":[14.136274516380656,20.419063190327613],"label":"Paintball"}]},"WJ4tCIdAgVo":{"duration":44.652,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WJ4tCIdAgVo","annotations":[{"segment":[0.01,44.65167800453515],"label":"Hitting a pinata"}]},"oNpkjfX2rTc":{"duration":58.05,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oNpkjfX2rTc","annotations":[{"segment":[30.322987519500785,36.568617784711385],"label":"Sharpening knives"}]},"JUhfg2uTcvM":{"duration":184.86,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JUhfg2uTcvM","annotations":[]},"_j5JUQzOCtc":{"duration":129.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_j5JUQzOCtc","annotations":[{"segment":[14.88042752,49.027085803432136],"label":"Snatch"},{"segment":[58.1436926677067,125.80917472698907],"label":"Snatch"}]},"2AjyB3mCW_U":{"duration":227.02,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=2AjyB3mCW_U","annotations":[]},"3Y46yorcd5Q":{"duration":234.034,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3Y46yorcd5Q","annotations":[{"segment":[10.264006083414445,195.88966735021017],"label":"Surfing"}]},"MYzG-TsW8w8":{"duration":48.437,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MYzG-TsW8w8","annotations":[{"segment":[11.271747008580574,21.60795399923478],"label":"Fun sliding down"}]},"enASD1KDX24":{"duration":63.809,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=enASD1KDX24","annotations":[{"segment":[46.082716510903424,60.80057943925234],"label":"Raking leaves"}]},"7ET-TtUVU7s":{"duration":130.497,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7ET-TtUVU7s","annotations":[{"segment":[25.8037492211838,104.65329439252338],"label":"Canoeing"}]},"-JqLjPz-07E":{"duration":222.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-JqLjPz-07E","annotations":[{"segment":[32.42855434,58.733767820000004],"label":"Getting a haircut"},{"segment":[69.43125098,180.86893188],"label":"Getting a haircut"}]},"PziLzNjG2bI":{"duration":21.11,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PziLzNjG2bI","annotations":[{"segment":[0.01,19.675804212168487],"label":"High jump"}]},"ZWlwKbuK2fM":{"duration":174.5,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZWlwKbuK2fM","annotations":[{"segment":[0,173.1368206564997],"label":"Doing karate"}]},"j_ImkPKoA-U":{"duration":23.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j_ImkPKoA-U","annotations":[{"segment":[0.2,22.621813959438377],"label":"Kayaking"}]},"xBtydVn3MVI":{"duration":174.13,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=xBtydVn3MVI","annotations":[{"segment":[18.246044385227883,168.23345904877928],"label":"Shaving"}]},"afAw8FA9Wbk":{"duration":84.57,"subset":"testing","resolution":"426x180","url":"https://www.youtube.com/watch?v=afAw8FA9Wbk","annotations":[]},"p4RUPQAV0ro":{"duration":34.23,"subset":"training","resolution":"480x272","url":"https://www.youtube.com/watch?v=p4RUPQAV0ro","annotations":[{"segment":[5.392897678627144,27.071278446177843],"label":"Clean and jerk"}]},"HQdnIIRVqEk":{"duration":143.904,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HQdnIIRVqEk","annotations":[{"segment":[27.833453978159127,125.71694227769112],"label":"Rollerblading"}]},"PyM_VG5KFa4":{"duration":93.414,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PyM_VG5KFa4","annotations":[{"segment":[12.970069968798752,81.75516014040562],"label":"Playing drums"}]},"CrCtYGvG9Uw":{"duration":99.5,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CrCtYGvG9Uw","annotations":[{"segment":[9.616824141172605,86.87383348696699],"label":"Getting a haircut"}]},"J_QIJTz0UYM":{"duration":112.64,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J_QIJTz0UYM","annotations":[]},"Z1img-5JbDo":{"duration":11.31,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z1img-5JbDo","annotations":[{"segment":[1.2699336973478939,5.443841653666146],"label":"High jump"}]},"IN8-B39kq_k":{"duration":66.76,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IN8-B39kq_k","annotations":[{"segment":[2.916078521060842,5.832157042121684],"label":"Getting a tattoo"},{"segment":[7.602633287051481,9.581400854914197],"label":"Getting a tattoo"},{"segment":[25.932269705148205,32.07686373166926],"label":"Getting a tattoo"},{"segment":[44.78263443057722,49.156752212168485],"label":"Getting a tattoo"},{"segment":[54.57232660842433,57.90498777535101],"label":"Getting a tattoo"}]},"OMGTFZ9csg0":{"duration":31.371,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OMGTFZ9csg0","annotations":[{"segment":[11.76839469578783,20.60689859594384],"label":"Waxing skis"}]},"9g1omTdpoYs":{"duration":125.017,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9g1omTdpoYs","annotations":[{"segment":[30.035101899314068,87.1798074851794],"label":"Making a cake"}]},"UomiLRs--C8":{"duration":53.731,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UomiLRs--C8","annotations":[{"segment":[6.194873634945398,50.749900156006234],"label":"Welding"}]},"cwhkG3LhZO8":{"duration":209.26,"subset":"training","resolution":"352x288","url":"https://www.youtube.com/watch?v=cwhkG3LhZO8","annotations":[{"segment":[25.463560126078512,209.258231292517],"label":"Playing guitarra"}]},"fSUwyioi_ws":{"duration":103.469,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=fSUwyioi_ws","annotations":[{"segment":[66.15071762870515,95.19249609984399],"label":"Sharpening knives"}]},"STAvUAslEYM":{"duration":157.362,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=STAvUAslEYM","annotations":[{"segment":[42.63298130841121,133.79636448598131],"label":"Futsal"}]},"MDsJa6Yitwc":{"duration":41.053,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=MDsJa6Yitwc","annotations":[]},"dojDT4CtenU":{"duration":76.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dojDT4CtenU","annotations":[{"segment":[64.84563547581902,69.40884686115444],"label":"Cricket"},{"segment":[1.2008451222402639,49.47481817784712],"label":"Cricket"}]},"_n9eNF1WaFU":{"duration":201.55,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=_n9eNF1WaFU","annotations":[{"segment":[22.010053697347896,102.81839370046802],"label":"Grooming dog"},{"segment":[135.83347424648986,183.62673370358814],"label":"Grooming dog"}]},"5QS_VBDwKzw":{"duration":142.5,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5QS_VBDwKzw","annotations":[{"segment":[7.33315600624025,90.88669110764431],"label":"Removing curlers"}]},"b8S0HFHh_p0":{"duration":187.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b8S0HFHh_p0","annotations":[{"segment":[20.154455430000002,139.70003615288613],"label":"Bungee jumping"}]},"HdNOiIdGqvE":{"duration":71.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HdNOiIdGqvE","annotations":[{"segment":[49.4128143525741,58.3361216848674],"label":"Washing hands"}]},"IYLJU87AkOg":{"duration":160.334,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IYLJU87AkOg","annotations":[]},"v-LmMLpvsbk":{"duration":224.28,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=v-LmMLpvsbk","annotations":[{"segment":[18.008261776911077,28.848186341653665],"label":"Breakdancing"}]},"hkBbDre6unA":{"duration":73.03,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=hkBbDre6unA","annotations":[]},"f7qFnCkFIuQ":{"duration":34.552,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=f7qFnCkFIuQ","annotations":[{"segment":[2.6951086206713573,34.5512925170068],"label":"Painting fence"}]},"cXfflEWa83E":{"duration":181.07,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=cXfflEWa83E","annotations":[{"segment":[12.802588280758012,169.02799949571127],"label":"Wakeboarding"}]},"dUa0E0ASviM":{"duration":92.23,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=dUa0E0ASviM","annotations":[{"segment":[11.779472698907956,79.58326677067083],"label":"Tug of war"}]},"CLbEvGHmK8k":{"duration":175.03,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CLbEvGHmK8k","annotations":[{"segment":[0.2,166.2937889204368],"label":"Doing step aerobics"}]},"70rKlFJdkKw":{"duration":147.656,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=70rKlFJdkKw","annotations":[{"segment":[0.01,147.6556916099773],"label":"Swinging at the playground"}]},"07GFy_Z7Nuc":{"duration":60.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=07GFy_Z7Nuc","annotations":[{"segment":[2.203109712168487,27.234186016380654],"label":"Using parallel bars"},{"segment":[27.195028859999997,54.98399345475819],"label":"Using parallel bars"}]},"RrEJ2-TfWCI":{"duration":67.39,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RrEJ2-TfWCI","annotations":[{"segment":[0,67.384307],"label":"Playing lacrosse"}]},"nyEcPLZQerc":{"duration":86.379,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nyEcPLZQerc","annotations":[{"segment":[1.6170651724028144,5.929238965476986],"label":"Running a marathon"},{"segment":[16.170651724028144,19.674292930900908],"label":"Running a marathon"},{"segment":[21.830379827437994,24.525488448109353],"label":"Running a marathon"},{"segment":[49.859508736349454,52.150351029641186],"label":"Running a marathon"},{"segment":[62.930785351014045,64.27833964118565],"label":"Running a marathon"}]},"sWv11vroZKA":{"duration":63.53,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sWv11vroZKA","annotations":[]},"ZmkJColoU0M":{"duration":156.108,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZmkJColoU0M","annotations":[]},"5O9myPtSriI":{"duration":128.24,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=5O9myPtSriI","annotations":[{"segment":[18.40628757566303,43.81496716380656],"label":"Archery"}]},"yfeKtTU0aOE":{"duration":41.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yfeKtTU0aOE","annotations":[{"segment":[6.658758190327613,37.537825273010924],"label":"Using the rowing machine"}]},"NKNgAE07TIQ":{"duration":169.25,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NKNgAE07TIQ","annotations":[{"segment":[40.662306416066166,145.48656263338535],"label":"Mixing drinks"}]},"l5xW6UZWwBs":{"duration":87.818,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l5xW6UZWwBs","annotations":[{"segment":[8.488583463338534,32.31138221528861],"label":"Shuffleboard"},{"segment":[41.484528861154445,80.79686427457099],"label":"Shuffleboard"}]},"7xnwE0aWFSI":{"duration":203.59,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7xnwE0aWFSI","annotations":[]},"XoCvj2IbVGE":{"duration":221.73,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=XoCvj2IbVGE","annotations":[{"segment":[57.305343150000006,176.54858034321373],"label":"Ironing clothes"}]},"xIU6DO35R_c":{"duration":32.46,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=xIU6DO35R_c","annotations":[{"segment":[9.030204,21.696984],"label":"Polishing shoes"}]},"8oI4xGqkzSE":{"duration":205.009,"subset":"training","resolution":"855x472","url":"https://www.youtube.com/watch?v=8oI4xGqkzSE","annotations":[{"segment":[0.2,25.58614409882518],"label":"Disc dog"},{"segment":[30.063719316119585,71.96103027794581],"label":"Disc dog"},{"segment":[73.5601642841224,142.3229254446178],"label":"Disc dog"},{"segment":[146.16084702964122,198.29261522620905],"label":"Disc dog"}]},"qi_6u0mMJQM":{"duration":172.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qi_6u0mMJQM","annotations":[{"segment":[19.91359703563084,47.26127789170831],"label":"Installing carpet"}]},"9eqfQyr3J7k":{"duration":26.425,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9eqfQyr3J7k","annotations":[{"segment":[0,26.401],"label":"Snow tubing"}]},"Uj5OYHrC8B8":{"duration":129.196,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Uj5OYHrC8B8","annotations":[]},"23-lat1nVJg":{"duration":10.147,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=23-lat1nVJg","annotations":[{"segment":[0,5.799014040561623],"label":"Knitting"}]},"Ffi7vDa3C2I":{"duration":200.273,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ffi7vDa3C2I","annotations":[{"segment":[10.933595943837755,200.221],"label":"Rollerblading"}]},"dmiLuqRB09U":{"duration":68.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dmiLuqRB09U","annotations":[{"segment":[3.6148217067082684,66.12997357566302],"label":"Getting a piercing"}]},"KGIDKn3t2Qk":{"duration":188.64,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=KGIDKn3t2Qk","annotations":[{"segment":[52.661464898595945,173.30107488299532],"label":"Mixing drinks"}]},"AP9XodmOtsA":{"duration":100.64,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=AP9XodmOtsA","annotations":[]},"WhEdTfs7U5E":{"duration":142.362,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WhEdTfs7U5E","annotations":[{"segment":[0.2,7.773251120521011],"label":"Playing beach volleyball"},{"segment":[54.190664954489336,130.59061703588142],"label":"Playing beach volleyball"}]},"M6FdaEstXbI":{"duration":114.057,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M6FdaEstXbI","annotations":[{"segment":[12.62725585023401,29.16718252730109],"label":"Powerbocking"},{"segment":[34.14694539781591,47.48559594383776],"label":"Powerbocking"},{"segment":[50.15332605304212,67.06071294851795],"label":"Powerbocking"},{"segment":[75.77682683307333,101.03576911076443],"label":"Powerbocking"},{"segment":[101.92516848673948,106.19428549141965],"label":"Powerbocking"}]},"0-F3q8Aj9Zk":{"duration":207.4,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=0-F3q8Aj9Zk","annotations":[{"segment":[1.6177896639675111,8.73606418542456],"label":"High jump"},{"segment":[23.943287026719165,34.94425674169824],"label":"High jump"},{"segment":[47.88657405343833,85.4192942574846],"label":"High jump"},{"segment":[99.3322853676052,151.10155461456554],"label":"High jump"},{"segment":[161.1318505311641,195.42899140727533],"label":"High jump"}]},"Fn71KcANJ7o":{"duration":80.968,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=Fn71KcANJ7o","annotations":[]},"96c3BWVDoq4":{"duration":123.69,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=96c3BWVDoq4","annotations":[{"segment":[40.716317262214304,50.17176534680435],"label":"Getting a tattoo"},{"segment":[54.8030052249709,60.5920550726791],"label":"Getting a tattoo"},{"segment":[72.74905968658346,78.92404618517942],"label":"Getting a tattoo"},{"segment":[83.55528605912636,98.22087899329173],"label":"Getting a tattoo"}]},"lUmjnFf7IPQ":{"duration":195.12,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lUmjnFf7IPQ","annotations":[]},"kBh_98QimD4":{"duration":55.5,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kBh_98QimD4","annotations":[{"segment":[0,55.49569160997732],"label":"Playing saxophone"}]},"aM2ZdUJshvU":{"duration":40.89,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=aM2ZdUJshvU","annotations":[]},"9rW35YTKYq8":{"duration":236.98,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9rW35YTKYq8","annotations":[{"segment":[18.85511031869846,84.29343436594607],"label":"Playing violin"},{"segment":[89.46934700245153,214.06095729173168],"label":"Playing violin"}]},"eLJYFoCx-gc":{"duration":89.21000000000001,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eLJYFoCx-gc","annotations":[{"segment":[8.765059282371293,34.36459750390016],"label":"Long jump"}]},"kWMNCeb3tmg":{"duration":190.752,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kWMNCeb3tmg","annotations":[]},"4nL_8C15KgQ":{"duration":92.787,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4nL_8C15KgQ","annotations":[]},"PlcyBfZi76Q":{"duration":220.52,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PlcyBfZi76Q","annotations":[]},"NwfosNrFnwk":{"duration":127.62,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NwfosNrFnwk","annotations":[{"segment":[2.389074044594437,41.01243776553784],"label":"Javelin throw"},{"segment":[63.70864118918499,116.2682701702626],"label":"Javelin throw"}]},"OJ43U4saH74":{"duration":23.058,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=OJ43U4saH74","annotations":[]},"jwqotL-yNvE":{"duration":168.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jwqotL-yNvE","annotations":[{"segment":[84.74700231002438,131.1873100774371],"label":"Triple jump"},{"segment":[167.65738076443057,167.9821315192744],"label":"Triple jump"}]},"cCYLiE1fiEo":{"duration":203.454,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cCYLiE1fiEo","annotations":[{"segment":[42.90898620225079,191.22069459477416],"label":"Putting on shoes"}]},"j6D8Ctt94Nc":{"duration":43.608,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=j6D8Ctt94Nc","annotations":[]},"tXuNa_h804c":{"duration":234.89,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=tXuNa_h804c","annotations":[{"segment":[73.98578242121685,230.38147100468018],"label":"Breakdancing"}]},"FnPS6zn5S7M":{"duration":46.07,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=FnPS6zn5S7M","annotations":[]},"ns8v94HCtlM":{"duration":71.588,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ns8v94HCtlM","annotations":[{"segment":[22.622836969761117,71.58702947845805],"label":"Grooming dog"}]},"yWONID7ysSI":{"duration":50.62,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yWONID7ysSI","annotations":[]},"x2J5RWf_LJk":{"duration":108.53,"subset":"testing","resolution":"1920x816","url":"https://www.youtube.com/watch?v=x2J5RWf_LJk","annotations":[]},"r9AepFEEfsQ":{"duration":59.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r9AepFEEfsQ","annotations":[{"segment":[6.130586583463338,58.33346021840873],"label":"Playing accordion"}]},"zW_8T8w7304":{"duration":199.553,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zW_8T8w7304","annotations":[{"segment":[0.01,23.037237875909593],"label":"Mooping floor"},{"segment":[31.442716560363092,46.69710380251944],"label":"Mooping floor"},{"segment":[95.26209175713967,118.6106436583994],"label":"Mooping floor"},{"segment":[150.05336003120124,157.5248966302652],"label":"Mooping floor"},{"segment":[169.04351555382215,171.84534177847112],"label":"Mooping floor"}]},"bodOObk5K00":{"duration":144.1,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=bodOObk5K00","annotations":[{"segment":[38.66720706379275,46.76034342598194],"label":"Playing harmonica"},{"segment":[53.50462372780626,72.38860857291434],"label":"Playing harmonica"}]},"Ed7kAmkawTo":{"duration":233.802,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ed7kAmkawTo","annotations":[{"segment":[122.34025819032763,224.0969336973479],"label":"Braiding hair"}]},"BkjWeWUwG0A":{"duration":216.828,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=BkjWeWUwG0A","annotations":[{"segment":[0.01,46.84971795062278],"label":"Swinging at the playground"},{"segment":[53.61502018175965,56.659406185771246],"label":"Swinging at the playground"},{"segment":[105.03131713839981,213.61441893332588],"label":"Swinging at the playground"}]},"2PFU_Ee0x14":{"duration":150.0,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=2PFU_Ee0x14","annotations":[{"segment":[41.182500000000005,116.97000000000001],"label":"Playing badminton"}]},"zPu3JBSPa94":{"duration":218.76,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zPu3JBSPa94","annotations":[{"segment":[0.8531793788758354,59.21064889398297],"label":"Getting a tattoo"},{"segment":[65.69481217343932,87.53620427266071],"label":"Getting a tattoo"},{"segment":[97.09181331607007,109.03632462033175],"label":"Getting a tattoo"},{"segment":[188.55264273155962,213.80675234628433],"label":"Getting a tattoo"}]},"yeEe8-aYA2E":{"duration":70.636,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yeEe8-aYA2E","annotations":[{"segment":[8.595220099843994,41.212978427457095],"label":"BMX"},{"segment":[47.27371054914196,69.64332183463338],"label":"BMX"}]},"poFH53rF9uY":{"duration":131.054,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=poFH53rF9uY","annotations":[{"segment":[53.60349692352316,57.67980871314311],"label":"Hurling"},{"segment":[90.29030303010266,93.55135172205827],"label":"Hurling"},{"segment":[107.00318052143619,109.44896757586854],"label":"Hurling"}]},"qu-3Dn-bTSU":{"duration":183.44,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=qu-3Dn-bTSU","annotations":[]},"Ch_qHjUtOpE":{"duration":22.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ch_qHjUtOpE","annotations":[{"segment":[0,16.62467344],"label":"Cheerleading"}]},"0acEl97ZBME":{"duration":102.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0acEl97ZBME","annotations":[{"segment":[7.405676078434906,23.73017202236014],"label":"Smoking a cigarette"}]},"6VXQoPNphgI":{"duration":134.79,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6VXQoPNphgI","annotations":[]},"qNHfEf72V3g":{"duration":159.52,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=qNHfEf72V3g","annotations":[{"segment":[22.548293349999998,157.645639625585],"label":"Paintball"}]},"Uo9q4riquHM":{"duration":102.865,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Uo9q4riquHM","annotations":[{"segment":[0.01,10.558396221695002],"label":"Cutting the grass"}]},"-5h2E52tl4Q":{"duration":114.521,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-5h2E52tl4Q","annotations":[{"segment":[19.080166666666663,106.4566308411215],"label":"Cleaning sink"}]},"-HaFSqzE4Nc":{"duration":53.52,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=-HaFSqzE4Nc","annotations":[{"segment":[1.8369483566302653,52.019037553666145],"label":"Smoking hookah"}]},"Hxb8D4W-G0I":{"duration":186.132,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Hxb8D4W-G0I","annotations":[{"segment":[0.01,169.22552730109206],"label":"Longboarding"}]},"a0hFDXB1RgY":{"duration":114.17,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=a0hFDXB1RgY","annotations":[{"segment":[7.115195007800312,114.021],"label":"Kayaking"}]},"8cKjwHtCGU0":{"duration":81.71000000000001,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=8cKjwHtCGU0","annotations":[{"segment":[18.69956235,68.84081201248051],"label":"Playing racquetball"}]},"5z5xDI4IMVE":{"duration":66.897,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5z5xDI4IMVE","annotations":[]},"iSpQDXpCxiw":{"duration":60.0,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iSpQDXpCxiw","annotations":[]},"PQ-qpoPeqM0":{"duration":191.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PQ-qpoPeqM0","annotations":[{"segment":[6.8577811384564225,184.8619263409992],"label":"Fixing bicycle"}]},"0rDb-zfhH0E":{"duration":201.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0rDb-zfhH0E","annotations":[{"segment":[76.23027794581171,197.0066430078003],"label":"Doing karate"}]},"SvM0twHYsus":{"duration":49.25,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=SvM0twHYsus","annotations":[{"segment":[0.01,49.24952380952381],"label":"Ironing clothes"}]},"qS7TStZg_5c":{"duration":11.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qS7TStZg_5c","annotations":[{"segment":[0,11.474568636505461],"label":"Playing guitarra"}]},"slUvIngXl4g":{"duration":85.496,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=slUvIngXl4g","annotations":[{"segment":[47.04537285491419,52.117067082683306],"label":"Sumo"}]},"YptHsVTHquc":{"duration":170.481,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YptHsVTHquc","annotations":[{"segment":[6.64902133500306,168.08725833385336],"label":"Hula hoop"}]},"bV_X5E1-T50":{"duration":175.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bV_X5E1-T50","annotations":[{"segment":[28.621771450858038,34.047699687987524],"label":"Smoking a cigarette"},{"segment":[12.88657956318253,16.68472932917317],"label":"Smoking a cigarette"},{"segment":[49.511595163806554,53.85233775351014],"label":"Smoking a cigarette"},{"segment":[153.96071372854914,156.9630241809672],"label":"Smoking a cigarette"}]},"3K62qZ2hGyw":{"duration":128.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3K62qZ2hGyw","annotations":[{"segment":[6.420705647425897,125.40440717628704],"label":"Belly dance"}]},"jIKAVLlyXIQ":{"duration":13.86,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=jIKAVLlyXIQ","annotations":[{"segment":[0.625015600624025,13.060670826833073],"label":"Chopping wood"}]},"6iuD3pSgBcw":{"duration":208.214,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6iuD3pSgBcw","annotations":[{"segment":[17.536184087363495,198.11296411856475],"label":"Playing drums"}]},"ZyZt6fhcg0M":{"duration":31.19,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZyZt6fhcg0M","annotations":[]},"l8TsSm4Yh4M":{"duration":163.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l8TsSm4Yh4M","annotations":[{"segment":[91.49205441809673,149.76073712012482],"label":"Paintball"}]},"1dFLeGRT0NQ":{"duration":151.765,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1dFLeGRT0NQ","annotations":[{"segment":[18.030482047451635,128.42688798384015],"label":"Hula hoop"}]},"XobM2YRX0X8":{"duration":105.14,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=XobM2YRX0X8","annotations":[]},"BngR6rNiO_g":{"duration":115.96000000000001,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BngR6rNiO_g","annotations":[{"segment":[11.21610642384879,108.36206042433699],"label":"Tango"}]},"wkJWeSw8FSE":{"duration":20.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wkJWeSw8FSE","annotations":[{"segment":[4.93385974726989,20.627058658990805],"label":"Starting a campfire"}]},"_4S7eaL-cR8":{"duration":92.72,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=_4S7eaL-cR8","annotations":[]},"mbmMY04yMUA":{"duration":135.0,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=mbmMY04yMUA","annotations":[{"segment":[16.84877566302652,135.000815],"label":"Washing dishes"}]},"K2qRHJ7P6Cg":{"duration":160.85,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=K2qRHJ7P6Cg","annotations":[]},"JsoDi1QN4yk":{"duration":17.624,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=JsoDi1QN4yk","annotations":[]},"J4vq7MLMAtY":{"duration":149.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J4vq7MLMAtY","annotations":[{"segment":[15.816577316482581,146.30336760793108],"label":"Table soccer"}]},"YAMhMB1jdio":{"duration":12.47,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=YAMhMB1jdio","annotations":[]},"JGStyHrlN2U":{"duration":40.33,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JGStyHrlN2U","annotations":[{"segment":[1.7618185870291956,38.130787990703304],"label":"Playing polo"}]},"WaWkDf6b_j4":{"duration":85.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WaWkDf6b_j4","annotations":[{"segment":[16.57384399375975,74.48245553822153],"label":"High jump"}]},"UWgzslG97MQ":{"duration":220.357,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=UWgzslG97MQ","annotations":[{"segment":[17.18856237242687,27.501699795882992],"label":"Surfing"},{"segment":[33.689582249956665,44.002719673412784],"label":"Surfing"},{"segment":[49.84683088003792,58.09734081880281],"label":"Surfing"},{"segment":[65.31653701522211,73.91081820143553],"label":"Surfing"},{"segment":[81.81755689275191,89.38052433661971],"label":"Surfing"},{"segment":[94.53709304834778,127.19536155595883],"label":"Surfing"},{"segment":[133.03947276258398,146.1027801656284],"label":"Surfing"},{"segment":[151.94689137225353,172.57316621916578],"label":"Surfing"},{"segment":[178.76104867323943,190.10549965397817],"label":"Surfing"},{"segment":[199.73109457316693,213.48194445772234],"label":"Surfing"}]},"-lEsnrNNZFU":{"duration":102.075,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-lEsnrNNZFU","annotations":[{"segment":[0.01,102.074943],"label":"Rope skipping"}]},"1PQiq8zajCE":{"duration":87.122,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1PQiq8zajCE","annotations":[{"segment":[25.64771028037383,43.560396],"label":"Throwing darts"}]},"j7vUMNMB4Yo":{"duration":93.345,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j7vUMNMB4Yo","annotations":[{"segment":[45.265046016194,89.01834975451175],"label":"Roof shingle removal"}]},"pb0k7YrMwZY":{"duration":6.85,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=pb0k7YrMwZY","annotations":[{"segment":[0.01,6.849885],"label":"Ping-pong"}]},"tVC_5_SgseY":{"duration":137.81,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tVC_5_SgseY","annotations":[{"segment":[39.236050618895504,47.62074088813892],"label":"Making a sandwich"},{"segment":[72.55981963768346,89.9741763507275],"label":"Making a sandwich"}]},"uNlryAGT0Tk":{"duration":79.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uNlryAGT0Tk","annotations":[]},"ABBQqwPOxw4":{"duration":6.99,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ABBQqwPOxw4","annotations":[{"segment":[1.6459157566302651,6.941],"label":"Tennis serve with ball bouncing"}]},"XuwqWdp-u2M":{"duration":41.448,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XuwqWdp-u2M","annotations":[{"segment":[0.2,41.247641723356004],"label":"Hitting a pinata"}]},"PDt5sm0EBIY":{"duration":189.82,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PDt5sm0EBIY","annotations":[]},"PemokF80u70":{"duration":185.737,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PemokF80u70","annotations":[]},"WXQol1TisqE":{"duration":60.581,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=WXQol1TisqE","annotations":[{"segment":[19.185500334299086,58.31257951950078],"label":"Calf roping"}]},"cMeKc9mp9wI":{"duration":54.2,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=cMeKc9mp9wI","annotations":[{"segment":[4.183854,51.87029849531981],"label":"Playing polo"}]},"7hRls13HJow":{"duration":132.54,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7hRls13HJow","annotations":[{"segment":[12.3676040531552,130.89050458414835],"label":"Mooping floor"}]},"R52e9UAE60k":{"duration":77.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=R52e9UAE60k","annotations":[{"segment":[2.2984071762870513,75.96840561622464],"label":"Horseback riding"}]},"JEvEoAESqJ0":{"duration":9.64,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JEvEoAESqJ0","annotations":[{"segment":[0.3963975421216849,9.147060264430579],"label":"Triple jump"}]},"0FTaJwbFdZI":{"duration":203.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0FTaJwbFdZI","annotations":[{"segment":[6.354508438840955,188.09344936037442],"label":"Belly dance"}]},"STgbw-zqZJM":{"duration":27.516,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=STgbw-zqZJM","annotations":[{"segment":[0.2,26.452070202808116],"label":"Blowing leaves"}]},"2gc7pCOg-NE":{"duration":155.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2gc7pCOg-NE","annotations":[{"segment":[5.095726989079563,135.64339937597504],"label":"Playing squash"}]},"FFyJjF4MjHM":{"duration":35.202,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=FFyJjF4MjHM","annotations":[{"segment":[0,35.181],"label":"Skiing"}]},"UAgb3FEjBN8":{"duration":234.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UAgb3FEjBN8","annotations":[{"segment":[13.147783119488045,187.35590945270462],"label":"Doing motocross"}]},"M37LbI4NxzM":{"duration":224.65,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M37LbI4NxzM","annotations":[]},"57buK1yvKPk":{"duration":74.862,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=57buK1yvKPk","annotations":[{"segment":[2.8613069856127575,8.583920956838273],"label":"Peeling potatoes"},{"segment":[26.102126991202095,53.54731516692667],"label":"Peeling potatoes"},{"segment":[60.437809375975036,73.28449349453979],"label":"Peeling potatoes"}]},"uOmCwWVJnLQ":{"duration":107.09,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=uOmCwWVJnLQ","annotations":[{"segment":[89.54774643369734,107.089749],"label":"Using uneven bars"}]},"4XnXyKYxnQQ":{"duration":22.62,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=4XnXyKYxnQQ","annotations":[]},"puPMvwv2kmg":{"duration":97.71000000000001,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=puPMvwv2kmg","annotations":[{"segment":[0,97.709569],"label":"Doing step aerobics"}]},"2QydTDAYJsA":{"duration":19.064,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2QydTDAYJsA","annotations":[{"segment":[0.19999999999999998,18.801000000000002],"label":"Calf roping"}]},"ki2kGFwczMg":{"duration":70.19,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ki2kGFwczMg","annotations":[{"segment":[2.737672425897036,68.98934513260531],"label":"Javelin throw"}]},"MEbEcvzdytY":{"duration":174.034,"subset":"training","resolution":"712x480","url":"https://www.youtube.com/watch?v=MEbEcvzdytY","annotations":[{"segment":[100.37364649917924,106.06633304420222],"label":"Elliptical trainer"},{"segment":[148.8970213545413,157.84267158351327],"label":"Elliptical trainer"}]},"hQ4xkr6VpnA":{"duration":99.544,"subset":"training","resolution":"1280x640","url":"https://www.youtube.com/watch?v=hQ4xkr6VpnA","annotations":[{"segment":[14.675268404836194,36.57170062792512],"label":"Braiding hair"},{"segment":[38.279932929017164,66.07753128315133],"label":"Braiding hair"}]},"30y8Uy0B_uk":{"duration":172.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=30y8Uy0B_uk","annotations":[{"segment":[7.677259750390015,39.19443135725429],"label":"Rock climbing"},{"segment":[54.81832839313572,163.64685257410298],"label":"Rock climbing"}]},"1Cdqwg7Giv4":{"duration":186.36,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=1Cdqwg7Giv4","annotations":[{"segment":[16.57206023751154,32.85338257611937],"label":"Discus throw"},{"segment":[37.79592685748246,59.601269275260805],"label":"Discus throw"},{"segment":[67.1604546467573,86.34915597440224],"label":"Discus throw"},{"segment":[91.58243815466905,110.48040158341028],"label":"Discus throw"},{"segment":[119.49327644942532,141.5893567661074],"label":"Discus throw"},{"segment":[147.11337684527788,176.76864253345644],"label":"Discus throw"}]},"VViW4wfqxDA":{"duration":46.649,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=VViW4wfqxDA","annotations":[]},"7Lkcan0X5VA":{"duration":146.59,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7Lkcan0X5VA","annotations":[{"segment":[10.06217354544522,142.69991408424337],"label":"Playing saxophone"}]},"hfBTv5b_Cok":{"duration":192.378,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=hfBTv5b_Cok","annotations":[{"segment":[24.271905397673088,50.94103601980771],"label":"Playing drums"},{"segment":[68.02126574409618,76.71120667912771],"label":"Playing drums"}]},"r8RJSDKy9iA":{"duration":98.918,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=r8RJSDKy9iA","annotations":[{"segment":[6.789936500861395,98.91702947845805],"label":"Putting in contact lenses"}]},"VRwI8Iydb_o":{"duration":141.155,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VRwI8Iydb_o","annotations":[{"segment":[48.95390840891178,60.588694611625115],"label":"Building sandcastles"}]},"kMRQmijCc5Y":{"duration":108.716,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kMRQmijCc5Y","annotations":[{"segment":[18.646973478939156,32.54744461778471],"label":"Elliptical trainer"},{"segment":[56.44947425897036,108.681],"label":"Elliptical trainer"}]},"zDPsJ3ECaTI":{"duration":221.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zDPsJ3ECaTI","annotations":[{"segment":[22.27363104524181,202.53464508580342],"label":"Zumba"}]},"AWwPqCohlrE":{"duration":93.786,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=AWwPqCohlrE","annotations":[]},"YJ4ynq5Adg0":{"duration":120.814,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=YJ4ynq5Adg0","annotations":[{"segment":[49.642280373831774,120.721],"label":"Rafting"}]},"qm1dudWLlUg":{"duration":18.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qm1dudWLlUg","annotations":[{"segment":[0.8912641738308821,9.498181382553623],"label":"Drinking beer"}]},"F2Igw6lvqwk":{"duration":49.389,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=F2Igw6lvqwk","annotations":[{"segment":[2.9250514820592826,49.141],"label":"Putting on shoes"}]},"wxoe1M2L4Qo":{"duration":216.387,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wxoe1M2L4Qo","annotations":[{"segment":[81.00879875195008,84.72170202808113],"label":"Scuba diving"},{"segment":[159.31730421216847,166.06803744149767],"label":"Scuba diving"}]},"eAOBdYKju70":{"duration":170.92000000000002,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=eAOBdYKju70","annotations":[{"segment":[0,64.79571837285492],"label":"Horseback riding"},{"segment":[91.46062305304213,105.05972443993761],"label":"Horseback riding"},{"segment":[137.32425910296413,170.72203900000002],"label":"Horseback riding"}]},"lUds16WLsHI":{"duration":221.66,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=lUds16WLsHI","annotations":[{"segment":[99.91960842433697,155.58416536661466],"label":"Shaving legs"}]},"Bhz-WgJH8R0":{"duration":42.354,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Bhz-WgJH8R0","annotations":[{"segment":[0.2,40.08531669266771],"label":"Skiing"}]},"bkRQgwbP0WM":{"duration":34.621,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bkRQgwbP0WM","annotations":[{"segment":[0.7561518159126365,33.270679900156004],"label":"Playing rubik cube"}]},"zfBQjkv0dfI":{"duration":203.18,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=zfBQjkv0dfI","annotations":[]},"bx5ZHOUG-nI":{"duration":119.82,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=bx5ZHOUG-nI","annotations":[{"segment":[14.484755999999999,98.14700468018721],"label":"Hand washing clothes"}]},"e5e16U5hnzY":{"duration":151.28,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=e5e16U5hnzY","annotations":[{"segment":[55.46073432917317,63.720843697347895],"label":"Discus throw"},{"segment":[99.35731554290172,129.56571551794073],"label":"Discus throw"}]},"4rKTw99bM8g":{"duration":9.57,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=4rKTw99bM8g","annotations":[{"segment":[0,9.441666],"label":"Kayaking"}]},"kqVIHq94kgs":{"duration":76.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kqVIHq94kgs","annotations":[{"segment":[41.09109446567863,62.92636218174727],"label":"Polishing forniture"}]},"KptXzRFIvTw":{"duration":144.475,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KptXzRFIvTw","annotations":[{"segment":[25.49069670771346,29.766423496871493],"label":"Decorating the Christmas tree"},{"segment":[40.79329784785799,45.519101141137924],"label":"Decorating the Christmas tree"},{"segment":[63.522161306013835,65.99758207868427],"label":"Decorating the Christmas tree"},{"segment":[106.05438960722931,116.18111082218257],"label":"Decorating the Christmas tree"}]},"etPTFvpHxoY":{"duration":75.627,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=etPTFvpHxoY","annotations":[]},"axoyB4pypWY":{"duration":138.902,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=axoyB4pypWY","annotations":[{"segment":[46.1561259511605,118.09900626365055],"label":"Disc dog"}]},"32z1yiC0Co0":{"duration":192.517,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=32z1yiC0Co0","annotations":[{"segment":[34.53886748667226,41.74697896215168],"label":"Running a marathon"}]},"v7OluHKECRA":{"duration":120.651,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v7OluHKECRA","annotations":[{"segment":[0.01,97.87587563182528],"label":"Baking cookies"}]},"a50gTER-FsQ":{"duration":211.488,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=a50gTER-FsQ","annotations":[{"segment":[22.435475167550702,25.734809751014044],"label":"Curling"},{"segment":[32.003545459594385,35.302880043057726],"label":"Curling"},{"segment":[56.74855483556942,59.38802250234009],"label":"Curling"},{"segment":[71.59556046115445,82.81329804492981],"label":"Curling"},{"segment":[106.89844050421218,109.2079747126365],"label":"Curling"},{"segment":[121.08557921310454,128.34411529672386],"label":"Curling"},{"segment":[147.1503224224649,164.96672917316693],"label":"Curling"},{"segment":[186.08247050733232,193.34100659095165],"label":"Curling"}]},"fGDVlbI90pw":{"duration":10.89,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fGDVlbI90pw","annotations":[{"segment":[0.22091887675507021,8.78138069422777],"label":"Brushing hair"}]},"qcYRPEEitZU":{"duration":38.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qcYRPEEitZU","annotations":[{"segment":[0.17785179407176285,4.683430577223088],"label":"Shaving legs"},{"segment":[14.228143525741029,31.24263182527301],"label":"Shaving legs"}]},"zpJ91CkCiyw":{"duration":135.651,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zpJ91CkCiyw","annotations":[{"segment":[23.115602614001205,33.680320297543986],"label":"Rollerblading"},{"segment":[38.54009043197367,92.42014989811052],"label":"Rollerblading"}]},"ztMUqZAqoNI":{"duration":82.338,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ztMUqZAqoNI","annotations":[]},"8Kv7piYg9yc":{"duration":95.086,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8Kv7piYg9yc","annotations":[{"segment":[4.649401160516472,90.53005536612396],"label":"Rock-paper-scissors"}]},"3lkZPJES45Q":{"duration":122.695,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3lkZPJES45Q","annotations":[{"segment":[19.332452800152822,23.3520716991945],"label":"Playing drums"},{"segment":[54.360560348944574,113.88920062402497],"label":"Playing drums"}]},"ILgkiTHnGVI":{"duration":60.26,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=ILgkiTHnGVI","annotations":[{"segment":[3.8399068135310475,55.538183640902744],"label":"Tai chi"}]},"wjD2BdbSb1o":{"duration":91.487,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wjD2BdbSb1o","annotations":[{"segment":[5.703026521060843,63.16101872074883],"label":"Painting fence"}]},"Adjpq4A5WtI":{"duration":103.17,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Adjpq4A5WtI","annotations":[{"segment":[0.9656747925117005,99.46450507816232],"label":"Playing piano"}]},"KKrHX-pywBQ":{"duration":193.446,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KKrHX-pywBQ","annotations":[{"segment":[0.2,16.59446957878315],"label":"Longboarding"},{"segment":[19.309928237129487,21.120234009360377],"label":"Longboarding"},{"segment":[23.83569266770671,27.45630421216849],"label":"Longboarding"},{"segment":[29.266609984399377,36.20611544461779],"label":"Longboarding"},{"segment":[38.318138845553825,63.66241965678628],"label":"Longboarding"},{"segment":[65.77444305772231,177.7300608424337],"label":"Longboarding"},{"segment":[184.06678627145084,193.221],"label":"Longboarding"}]},"y-HkIwa-jWM":{"duration":41.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=y-HkIwa-jWM","annotations":[{"segment":[1.172372209048362,36.40867026989079],"label":"Playing kickball"}]},"GuzLtwhxsxA":{"duration":112.362,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GuzLtwhxsxA","annotations":[{"segment":[12.791627145085803,91.30992355694228],"label":"Canoeing"}]},"72F30o9V8v0":{"duration":203.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=72F30o9V8v0","annotations":[{"segment":[41.5200112,178.08438560000002],"label":"Horseback riding"}]},"M1_4oDuh2as":{"duration":59.86,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=M1_4oDuh2as","annotations":[{"segment":[18.21045772443143,51.64299036723374],"label":"Cleaning windows"}]},"QIUxLD27NuI":{"duration":83.639,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QIUxLD27NuI","annotations":[]},"Zv3fY847yN0":{"duration":156.92000000000002,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zv3fY847yN0","annotations":[]},"yPsEMk0KMp4":{"duration":24.66,"subset":"testing","resolution":"854x474","url":"https://www.youtube.com/watch?v=yPsEMk0KMp4","annotations":[]},"gtd2Ye-raxE":{"duration":68.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gtd2Ye-raxE","annotations":[{"segment":[12.85188388,60.37880926],"label":"Using the pommel horse"}]},"b5E7-GxvCdk":{"duration":19.087,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=b5E7-GxvCdk","annotations":[{"segment":[0.01,17.47886603151326],"label":"Cutting the grass"}]},"xFqGY-y4OUg":{"duration":186.711,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xFqGY-y4OUg","annotations":[{"segment":[21.549007800312012,186.661],"label":"Playing pool"}]},"f4Nl8x0F5og":{"duration":21.177,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=f4Nl8x0F5og","annotations":[]},"0d7bz-Se1mU":{"duration":109.738,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0d7bz-Se1mU","annotations":[]},"PplBDkLlpEw":{"duration":16.53,"subset":"testing","resolution":"512x288","url":"https://www.youtube.com/watch?v=PplBDkLlpEw","annotations":[]},"uDpLB-JDjEA":{"duration":99.15,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=uDpLB-JDjEA","annotations":[{"segment":[55.8387685765934,78.73111549921998],"label":"Calf roping"}]},"lhnhGI-rrzM":{"duration":180.768,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=lhnhGI-rrzM","annotations":[{"segment":[0.20000000000000004,80.93639402718966],"label":"Longboarding"},{"segment":[98.4209112037951,100.11296125314401],"label":"Longboarding"}]},"HNIs8lFvKkA":{"duration":221.07999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HNIs8lFvKkA","annotations":[{"segment":[0.01,221.07718820861678],"label":"Ping-pong"}]},"A6DBt7UgWEg":{"duration":208.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A6DBt7UgWEg","annotations":[{"segment":[23.255124024961,65.21192121684868],"label":"Playing badminton"},{"segment":[119.20283853354134,128.63498673946958],"label":"Playing badminton"}]},"9lX18LyTuAQ":{"duration":205.31,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=9lX18LyTuAQ","annotations":[{"segment":[104.6156331868751,143.6140163644452],"label":"Playing violin"},{"segment":[162.79354907472558,177.17819860743586],"label":"Playing violin"}]},"1oMFr9eRKQE":{"duration":77.857,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1oMFr9eRKQE","annotations":[]},"NjlskpV3WuI":{"duration":77.39,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NjlskpV3WuI","annotations":[]},"ihMn28mft2s":{"duration":126.11,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=ihMn28mft2s","annotations":[{"segment":[52.303749553968345,53.678754233096065],"label":"Volleyball"},{"segment":[70.76809810225495,73.91096594026118],"label":"Volleyball"},{"segment":[86.28600805241071,125.96471450723938],"label":"Volleyball"}]},"H-S8LvNLvdk":{"duration":36.92,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H-S8LvNLvdk","annotations":[]},"HPrkxpOoep0":{"duration":233.338,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HPrkxpOoep0","annotations":[{"segment":[12.732191887675508,22.55416848673947],"label":"Playing ten pins"},{"segment":[30.55726053042122,63.66095943837754],"label":"Playing ten pins"},{"segment":[68.75383619344774,94.58199687987519],"label":"Playing ten pins"},{"segment":[99.6748736349454,116.04483463338534],"label":"Playing ten pins"},{"segment":[121.50148829953199,126.23058814352574],"label":"Playing ten pins"},{"segment":[130.97092043681747,184.08690483619344],"label":"Playing ten pins"},{"segment":[189.54402652106086,196.45638065522618],"label":"Playing ten pins"},{"segment":[202.64111856474258,207.0068159126365],"label":"Playing ten pins"},{"segment":[212.1001294851794,224.46960530421217],"label":"Playing ten pins"}]},"QvM8SFUUvsw":{"duration":28.282,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=QvM8SFUUvsw","annotations":[{"segment":[1.897187973478939,9.662422469578784],"label":"Playing rubik cube"},{"segment":[15.486348341653668,22.898617633385335],"label":"Playing rubik cube"}]},"qRFZMO_mTGU":{"duration":114.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qRFZMO_mTGU","annotations":[{"segment":[38.88151776,43.42158687708052],"label":"Playing field hockey"},{"segment":[27.6644749381812,48.25615531287918],"label":"Playing field hockey"},{"segment":[66.87819669521475,74.2195783940201],"label":"Playing field hockey"}]},"exzuzQLrv7g":{"duration":196.395,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=exzuzQLrv7g","annotations":[{"segment":[127.11624804992199,149.79801716068644],"label":"Doing fencing"}]},"3HBAcaU552I":{"duration":123.925,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=3HBAcaU552I","annotations":[{"segment":[0,21.0730325702824],"label":"Gargling mouthwash"},{"segment":[24.3596523289503,28.612924957814638],"label":"Gargling mouthwash"},{"segment":[66.89237861759368,76.55890731955809],"label":"Gargling mouthwash"},{"segment":[78.87887420802954,93.18533596567862],"label":"Gargling mouthwash"}]},"ELlRh3gjpeE":{"duration":160.543,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=ELlRh3gjpeE","annotations":[{"segment":[35.063942717628706,160.5427663],"label":"Futsal"}]},"wOwWidUOaxc":{"duration":73.282,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=wOwWidUOaxc","annotations":[{"segment":[16.75346637916442,64.1646398239545],"label":"Sumo"}]},"YrIEvBXoaIA":{"duration":33.135,"subset":"testing","resolution":"376x240","url":"https://www.youtube.com/watch?v=YrIEvBXoaIA","annotations":[]},"29UfCoftDkA":{"duration":47.18,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=29UfCoftDkA","annotations":[{"segment":[1.8389625585023404,46.26829797191888],"label":"Using parallel bars"}]},"BurbdCh--Lo":{"duration":101.704,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BurbdCh--Lo","annotations":[]},"EGrXaq213Oc":{"duration":126.387,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EGrXaq213Oc","annotations":[{"segment":[14.386010920436817,49.07009204368175],"label":"Snowboarding"},{"segment":[54.39094539781591,75.48924024960999],"label":"Snowboarding"},{"segment":[81.20513572542902,83.7674336973479],"label":"Snowboarding"},{"segment":[92.24272698907956,99.14122152886115],"label":"Snowboarding"}]},"kMYZWwZXx3E":{"duration":60.309,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kMYZWwZXx3E","annotations":[{"segment":[15.527246489859595,21.173517940717627],"label":"Painting fence"},{"segment":[29.92523868954758,32.183747269890794],"label":"Painting fence"}]},"rFx-j3NhSgI":{"duration":69.916,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rFx-j3NhSgI","annotations":[{"segment":[11.119909516380655,63.77595163806553],"label":"Baton twirling"}]},"BC1ZIeZvFXw":{"duration":117.447,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BC1ZIeZvFXw","annotations":[{"segment":[22.74872987404939,106.68495884601201],"label":"Longboarding"}]},"uqaiE1bcAXY":{"duration":96.85,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=uqaiE1bcAXY","annotations":[]},"XHSNzRWXROM":{"duration":35.968,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XHSNzRWXROM","annotations":[]},"GxSh-XQhIjU":{"duration":233.82999999999998,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GxSh-XQhIjU","annotations":[{"segment":[35.38380577399967,211.93805314117327],"label":"Playing lacrosse"}]},"dfex2oZYqmU":{"duration":80.643,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=dfex2oZYqmU","annotations":[{"segment":[0.19999999999999998,80.44290249433107],"label":"Futsal"}]},"my9Z4bun_Dk":{"duration":124.181,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=my9Z4bun_Dk","annotations":[{"segment":[80.7594336973479,116.8004414976599],"label":"BMX"}]},"xdoV6-DzsCE":{"duration":57.632,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=xdoV6-DzsCE","annotations":[]},"xaCOYdzox0g":{"duration":229.74,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xaCOYdzox0g","annotations":[{"segment":[31.18123075127087,219.34383011238816],"label":"Preparing pasta"}]},"ikHGZpkhfNc":{"duration":213.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ikHGZpkhfNc","annotations":[{"segment":[21.268753966485193,73.44366604051918],"label":"Longboarding"},{"segment":[84.4103673044881,199.39456843579865],"label":"Longboarding"}]},"3F8ZTg8dvh8":{"duration":60.05,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3F8ZTg8dvh8","annotations":[]},"gSH5ya0pfko":{"duration":96.967,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=gSH5ya0pfko","annotations":[{"segment":[14.371014677321787,82.14169305304212],"label":"Croquet"}]},"v1FHFQ1UZHw":{"duration":164.351,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v1FHFQ1UZHw","annotations":[{"segment":[90.73785488256395,134.95658506408978],"label":"Hanging wallpaper"}]},"5nu47s7JrfY":{"duration":126.55,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5nu47s7JrfY","annotations":[]},"6iA4RXGAR_k":{"duration":64.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6iA4RXGAR_k","annotations":[{"segment":[1.2246627964833579,63.59174612981669],"label":"Tango"}]},"dJVEAuAXON0":{"duration":129.22,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dJVEAuAXON0","annotations":[]},"JDg7jk6v3OQ":{"duration":54.5,"subset":"testing","resolution":"810x1440","url":"https://www.youtube.com/watch?v=JDg7jk6v3OQ","annotations":[]},"wvFJbY3SmXI":{"duration":61.19,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=wvFJbY3SmXI","annotations":[{"segment":[0,59.13787636973479],"label":"Discus throw"}]},"Y5uVICaJU-0":{"duration":6.04,"subset":"validation","resolution":"322x240","url":"https://www.youtube.com/watch?v=Y5uVICaJU-0","annotations":[{"segment":[0.01,5.706099063962559],"label":"Discus throw"}]},"x_FAc0KqMVw":{"duration":119.37,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x_FAc0KqMVw","annotations":[{"segment":[71.6790717628705,114.31415600624025],"label":"Playing guitarra"}]},"YU0DjX7wvRA":{"duration":114.893,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YU0DjX7wvRA","annotations":[{"segment":[0.01,114.892334],"label":"Clipping cat claws"}]},"xfyoEDb6kVg":{"duration":130.89,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=xfyoEDb6kVg","annotations":[{"segment":[3.4713651076655307,125.58173771848833],"label":"Playing racquetball"}]},"UO9bRJmfMzI":{"duration":215.644,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UO9bRJmfMzI","annotations":[{"segment":[15.475212271075877,80.06740261991432],"label":"Removing ice from car"},{"segment":[92.17843831032152,203.53268313045447],"label":"Removing ice from car"}]},"clUgEizjVkM":{"duration":23.174,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=clUgEizjVkM","annotations":[{"segment":[0.2,7.653497659906395],"label":"Playing ice hockey"},{"segment":[10.505508580343214,14.260054602184086],"label":"Playing ice hockey"}]},"TiQcZKml5Bs":{"duration":78.0,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TiQcZKml5Bs","annotations":[]},"cz20Wqx9d4Q":{"duration":149.792,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cz20Wqx9d4Q","annotations":[{"segment":[9.113666691107644,31.07994025429017],"label":"Arm wrestling"},{"segment":[66.83355573478939,85.76194040093604],"label":"Arm wrestling"},{"segment":[124.31976101716069,139.976573024961],"label":"Arm wrestling"}]},"95L-8JtlwRU":{"duration":126.39,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=95L-8JtlwRU","annotations":[{"segment":[20.301283931357254,105.44841653666147],"label":"Cleaning shoes"}]},"hJn5L1nrkL4":{"duration":70.73,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hJn5L1nrkL4","annotations":[{"segment":[0,15.70760515600624],"label":"Ping-pong"}]},"aFpWNTroghs":{"duration":93.159,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aFpWNTroghs","annotations":[{"segment":[21.243564677726784,80.59204734597581],"label":"Spread mulch"}]},"xTk7LrjyKPs":{"duration":49.087,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xTk7LrjyKPs","annotations":[]},"xAI1y9izXOM":{"duration":40.96,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xAI1y9izXOM","annotations":[{"segment":[8.558648985959438,38.577790951638065],"label":"Arm wrestling"}]},"hhN1647pP88":{"duration":195.118,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hhN1647pP88","annotations":[{"segment":[11.869202808112325,195.081],"label":"Plastering"}]},"tjuYS-6UC_M":{"duration":167.463,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=tjuYS-6UC_M","annotations":[{"segment":[132.80833238138217,136.19847724119523],"label":"Hitting a pinata"}]},"WY9zvUkK_4Y":{"duration":203.593,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=WY9zvUkK_4Y","annotations":[{"segment":[20.962728729557345,188.66455823400935],"label":"Braiding hair"}]},"CUtkQIV2eto":{"duration":206.843,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CUtkQIV2eto","annotations":[{"segment":[15.46026168224299,200.6807211838006],"label":"Waterskiing"}]},"teIE_kXbMiw":{"duration":107.927,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=teIE_kXbMiw","annotations":[{"segment":[35.06475585023401,73.08615366614664],"label":"Sharpening knives"}]},"ZoKZTceDTLA":{"duration":165.001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZoKZTceDTLA","annotations":[{"segment":[29.141504130437735,150.6929262488178],"label":"Laying tile"}]},"Sn8IRyUFNac":{"duration":37.2,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=Sn8IRyUFNac","annotations":[{"segment":[0.01,4.874606489859595],"label":"Smoking a cigarette"},{"segment":[7.253878705148206,12.360609313572544],"label":"Smoking a cigarette"}]},"QJmoA3byOzo":{"duration":189.499,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QJmoA3byOzo","annotations":[{"segment":[10.346956630265211,26.90208723868955],"label":"Hand car wash"},{"segment":[52.91729248049922,60.6036031201248],"label":"Hand car wash"},{"segment":[74.49808773790951,188.01898333853353],"label":"Hand car wash"}]},"35SpLMcN-m8":{"duration":49.37,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=35SpLMcN-m8","annotations":[{"segment":[5.198408099589289,41.00966258190328],"label":"Playing kickball"}]},"0PS48XWOsKA":{"duration":65.09,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=0PS48XWOsKA","annotations":[{"segment":[31.612633385335414,40.74629953198128],"label":"Cleaning windows"}]},"vska7f8Zm9s":{"duration":57.66,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vska7f8Zm9s","annotations":[]},"UD4RnQp5tPY":{"duration":220.38,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=UD4RnQp5tPY","annotations":[{"segment":[57.05980230577224,136.1185645366615],"label":"Shaving legs"}]},"HpjomKhpIdk":{"duration":63.484,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=HpjomKhpIdk","annotations":[{"segment":[0.01,62.03965268515252],"label":"Roof shingle removal"}]},"mOVPZhGyUrs":{"duration":67.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mOVPZhGyUrs","annotations":[{"segment":[3.806583444617785,47.15933934165367],"label":"Ping-pong"},{"segment":[47.851769839999996,63.654534268330735],"label":"Ping-pong"}]},"2F5aJzMxPSM":{"duration":83.616,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2F5aJzMxPSM","annotations":[]},"x7QFsngAsJ0":{"duration":99.126,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x7QFsngAsJ0","annotations":[]},"Ecp2ZEu882E":{"duration":25.75,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ecp2ZEu882E","annotations":[]},"0AbJgWxIYVI":{"duration":202.07999999999998,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0AbJgWxIYVI","annotations":[{"segment":[81.37074078,144.70961547],"label":"Skateboarding"}]},"Osw2GBrUR1Q":{"duration":54.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Osw2GBrUR1Q","annotations":[]},"evkiciK1nRc":{"duration":58.82,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=evkiciK1nRc","annotations":[{"segment":[0.01,58.781],"label":"Playing squash"}]},"vOpFUVofoM4":{"duration":70.01,"subset":"training","resolution":"606x480","url":"https://www.youtube.com/watch?v=vOpFUVofoM4","annotations":[{"segment":[19.323926677067078,69.981],"label":"Drinking coffee"}]},"2Ci0ek9BNvg":{"duration":187.69,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2Ci0ek9BNvg","annotations":[]},"zSZc7XDBUGE":{"duration":194.56,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zSZc7XDBUGE","annotations":[]},"-wFNUGYf298":{"duration":224.1,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-wFNUGYf298","annotations":[{"segment":[16.776861154446177,224.061],"label":"Doing kickboxing"}]},"EQrcCmgraa8":{"duration":35.2,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EQrcCmgraa8","annotations":[]},"Jz7bt59z6Qg":{"duration":22.52,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Jz7bt59z6Qg","annotations":[{"segment":[4.3598295309880815,18.04210354967967],"label":"Horseback riding"}]},"xeOHoiH-dmo":{"duration":186.364,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xeOHoiH-dmo","annotations":[{"segment":[104.66584736858862,115.71390903527296],"label":"Fixing the roof"}]},"5T_P4x0q0VM":{"duration":22.9,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=5T_P4x0q0VM","annotations":[{"segment":[1.2655046092043682,21.549226373634948],"label":"Polishing shoes"}]},"HtZ0bvriAvQ":{"duration":64.44,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=HtZ0bvriAvQ","annotations":[{"segment":[11.067701492580413,21.505830452081973],"label":"Using the balance beam"},{"segment":[33.34909215613182,41.579155374200354],"label":"Using the balance beam"},{"segment":[50.61215158915362,59.64514780410689],"label":"Using the balance beam"}]},"9H5QZzyI3iI":{"duration":94.62,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=9H5QZzyI3iI","annotations":[]},"3xmgPNrmUYM":{"duration":56.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3xmgPNrmUYM","annotations":[{"segment":[10.298931198064249,42.3400504809308],"label":"Snatch"}]},"BnO8AfQS8yE":{"duration":39.15,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=BnO8AfQS8yE","annotations":[]},"PJ7HOHdOdy4":{"duration":56.61,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PJ7HOHdOdy4","annotations":[]},"eaFvnf8IF-I":{"duration":66.78,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eaFvnf8IF-I","annotations":[{"segment":[0.01,66.780589569161],"label":"Playing accordion"}]},"Vtsv9iPHDqg":{"duration":184.41,"subset":"validation","resolution":"634x360","url":"https://www.youtube.com/watch?v=Vtsv9iPHDqg","annotations":[{"segment":[75.00404864000001,88.55095416],"label":"Long jump"},{"segment":[105.00603691999999,148.0194033767551],"label":"Long jump"}]},"hGziyfXmotc":{"duration":227.3,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hGziyfXmotc","annotations":[{"segment":[26.24057732921562,208.50620808112325],"label":"Playing violin"}]},"_ekWWP0dQZM":{"duration":145.87,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=_ekWWP0dQZM","annotations":[{"segment":[32.76895666847082,52.79443018809188],"label":"Ballet"},{"segment":[58.02836076708375,84.65313806021629],"label":"Ballet"},{"segment":[109.68497987363493,112.18816406162246],"label":"Ballet"},{"segment":[116.51184584087363,130.62070217316693],"label":"Ballet"}]},"YAbfhT3fhac":{"duration":75.07,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YAbfhT3fhac","annotations":[{"segment":[4.503278691107644,30.236299783151324],"label":"Cheerleading"},{"segment":[33.74534811388455,74.77666599999999],"label":"Cheerleading"}]},"NojaHuKBagQ":{"duration":225.91,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NojaHuKBagQ","annotations":[]},"9nh_6Hzg_aw":{"duration":192.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9nh_6Hzg_aw","annotations":[{"segment":[94.79704152000001,184.3510628049922],"label":"Cleaning windows"},{"segment":[20.88026122,69.73455788],"label":"Cleaning windows"}]},"Lb7Q8qv3MHU":{"duration":117.72999999999999,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Lb7Q8qv3MHU","annotations":[]},"325CAs4hhq8":{"duration":199.0,"subset":"testing","resolution":"640x358","url":"https://www.youtube.com/watch?v=325CAs4hhq8","annotations":[]},"d8MgSp3oCoA":{"duration":69.939,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=d8MgSp3oCoA","annotations":[{"segment":[39.3473104524181,55.914599063962555],"label":"Sumo"}]},"mnS-nMZne3I":{"duration":9.22,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=mnS-nMZne3I","annotations":[{"segment":[0.06422386661466459,9.148333],"label":"Springboard diving"}]},"mUxpNrgAhKk":{"duration":42.238,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mUxpNrgAhKk","annotations":[]},"x4qwV0Ti1Og":{"duration":57.749,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x4qwV0Ti1Og","annotations":[{"segment":[0,11.981940951638064],"label":"Snow tubing"}]},"AXDa8WmY2og":{"duration":228.879,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AXDa8WmY2og","annotations":[{"segment":[6.070116350232241,214.59646626409275],"label":"Grooming dog"}]},"5LveCNjz_zg":{"duration":161.519,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=5LveCNjz_zg","annotations":[{"segment":[60.957195007800316,158.96179563182528],"label":"Clipping cat claws"}]},"F-jZv4hqYds":{"duration":37.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F-jZv4hqYds","annotations":[{"segment":[14.43033697347894,34.78532449297972],"label":"Calf roping"}]},"e4AIrRnWakA":{"duration":203.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=e4AIrRnWakA","annotations":[{"segment":[15.349531201248048,18.197897815912636],"label":"Smoking a cigarette"},{"segment":[97.3191926677067,106.49726287051482],"label":"Smoking a cigarette"},{"segment":[127.38528471138846,132.44904758190327],"label":"Smoking a cigarette"},{"segment":[180.2383096723869,185.61855772230888],"label":"Smoking a cigarette"}]},"8PfDevDmhv4":{"duration":109.924,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8PfDevDmhv4","annotations":[]},"loedAFEIVWE":{"duration":32.625,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=loedAFEIVWE","annotations":[]},"ObyxzCOV7BU":{"duration":61.882,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=ObyxzCOV7BU","annotations":[]},"79LyHiS908U":{"duration":87.934,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=79LyHiS908U","annotations":[{"segment":[0.01,79.3988751950078],"label":"Playing congas"}]},"Ls2QBv49HJo":{"duration":237.378,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ls2QBv49HJo","annotations":[]},"p4YTDxTASBI":{"duration":33.762,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=p4YTDxTASBI","annotations":[{"segment":[0.01,33.761814058956915],"label":"Canoeing"}]},"2uMG44uNDJ8":{"duration":7.082,"subset":"training","resolution":"360x360","url":"https://www.youtube.com/watch?v=2uMG44uNDJ8","annotations":[{"segment":[2.1260109204368174,3.1504617784711386],"label":"Hand car wash"}]},"TQepyWudKYQ":{"duration":31.7,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=TQepyWudKYQ","annotations":[{"segment":[9.0487182074883,28.38889455],"label":"Cricket"}]},"Ttx3kt2fW1o":{"duration":39.24,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ttx3kt2fW1o","annotations":[{"segment":[0.57605788,39.241722],"label":"Spinning"}]},"SMhW9V-cJsQ":{"duration":27.493,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SMhW9V-cJsQ","annotations":[{"segment":[0.01,25.61884243369735],"label":"Welding"}]},"1oM26-0yFcM":{"duration":176.19,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1oM26-0yFcM","annotations":[{"segment":[10.981799251170047,31.791509600000005],"label":"Discus throw"},{"segment":[41.035949120000005,171.31606831825272],"label":"Discus throw"}]},"gHSa0An1RO8":{"duration":195.025,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gHSa0An1RO8","annotations":[]},"-Tus9Ryuc-k":{"duration":185.946,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-Tus9Ryuc-k","annotations":[]},"Hy54nZSDxTk":{"duration":216.27,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=Hy54nZSDxTk","annotations":[]},"nr34x_-K5c0":{"duration":154.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nr34x_-K5c0","annotations":[{"segment":[4.581105061889551,152.62313179874135],"label":"Kayaking"}]},"8M_RipfsJI8":{"duration":63.0,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=8M_RipfsJI8","annotations":[{"segment":[0,59.09753014022003],"label":"Hand washing clothes"}]},"aGqPaW5U7g8":{"duration":109.668,"subset":"testing","resolution":"425x236","url":"https://www.youtube.com/watch?v=aGqPaW5U7g8","annotations":[]},"en4Boe2rAuo":{"duration":45.047,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=en4Boe2rAuo","annotations":[]},"LLLuBjEVHI8":{"duration":45.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LLLuBjEVHI8","annotations":[{"segment":[3.2343476224648984,4.7812095288611545],"label":"Layup drill in basketball"},{"segment":[8.296804770670827,9.703042867394695],"label":"Layup drill in basketball"},{"segment":[12.585830965678626,13.499885728549142],"label":"Layup drill in basketball"},{"segment":[17.78891192355694,19.687333354134164],"label":"Layup drill in basketball"},{"segment":[22.921680976599063,24.749790502340094],"label":"Layup drill in basketball"},{"segment":[27.843514315132605,29.531000031201245],"label":"Layup drill in basketball"},{"segment":[33.2575309875195,34.804392893915754],"label":"Layup drill in basketball"},{"segment":[37.61686908736349,38.81217146957878],"label":"Layup drill in basketball"},{"segment":[42.398078616224645,44.08556433229329],"label":"Layup drill in basketball"}]},"HowkVy5fOyk":{"duration":105.164,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=HowkVy5fOyk","annotations":[]},"60tIsGR0Zzg":{"duration":102.66,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=60tIsGR0Zzg","annotations":[]},"1GXQLnTalcg":{"duration":110.03999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1GXQLnTalcg","annotations":[{"segment":[8.583413812742986,13.561793824133918],"label":"Brushing teeth"},{"segment":[35.36366490850111,61.97224772800436],"label":"Brushing teeth"},{"segment":[69.35398360696333,86.00580640368472],"label":"Brushing teeth"}]},"yNmV_c3rlLA":{"duration":77.323,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=yNmV_c3rlLA","annotations":[{"segment":[3.6188353656595247,77.08119328854787],"label":"Playing congas"}]},"9vdSDvLUcjc":{"duration":29.47,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=9vdSDvLUcjc","annotations":[]},"qXi05KUFOSk":{"duration":82.524,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qXi05KUFOSk","annotations":[{"segment":[0.01,82.481],"label":"Installing carpet"}]},"j89l589KFrg":{"duration":182.393,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=j89l589KFrg","annotations":[{"segment":[31.364737592382042,51.53590007680572],"label":"Playing blackjack"},{"segment":[67.16144848023251,96.13973824658765],"label":"Playing blackjack"},{"segment":[110.62888312976521,137.05026497555963],"label":"Playing blackjack"},{"segment":[148.69840105811414,153.244015139111],"label":"Playing blackjack"},{"segment":[164.89215122166553,174.5515811437839],"label":"Playing blackjack"}]},"iL__89eDRzg":{"duration":89.397,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iL__89eDRzg","annotations":[{"segment":[8.538712673258559,87.66782326515887],"label":"Waterskiing"}]},"pZzTMUJG9Rk":{"duration":214.205,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pZzTMUJG9Rk","annotations":[{"segment":[5.012576013244612,36.090547295361205],"label":"Futsal"},{"segment":[38.09557770065905,63.15845776688211],"label":"Futsal"},{"segment":[68.50520551434303,103.5932376070553],"label":"Futsal"},{"segment":[106.93495494921838,136.00789582603713],"label":"Futsal"},{"segment":[140.6863001050654,175.44015912636507],"label":"Futsal"},{"segment":[183.1261089547582,214.20408],"label":"Futsal"}]},"NH2TVi42xDE":{"duration":129.196,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=NH2TVi42xDE","annotations":[{"segment":[36.76957152599247,49.227023027069826],"label":"Grooming dog"},{"segment":[82.5808447880189,115.7337384208836],"label":"Grooming dog"}]},"nKn2uQTVo-U":{"duration":223.01,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nKn2uQTVo-U","annotations":[{"segment":[0,222.961],"label":"Breakdancing"}]},"h4m7bYDVLAc":{"duration":24.916,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=h4m7bYDVLAc","annotations":[{"segment":[0.01,23.049300347893915],"label":"Hitting a pinata"}]},"KEUJ7ulD-_A":{"duration":230.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KEUJ7ulD-_A","annotations":[{"segment":[143.6376976167482,230.179410430839],"label":"BMX"}]},"xfThpRE6bLc":{"duration":207.564,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xfThpRE6bLc","annotations":[{"segment":[15.842049151955694,189.78128269791824],"label":"Surfing"}]},"Mk9n7isIyBs":{"duration":136.325,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mk9n7isIyBs","annotations":[{"segment":[7.441240249609985,135.23780967238687],"label":"Baton twirling"}]},"3xEonDbWxTM":{"duration":108.46,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=3xEonDbWxTM","annotations":[{"segment":[31.2883775351014,96.2346770670827],"label":"Hanging wallpaper"}]},"loYCYGQpGRw":{"duration":168.82999999999998,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=loYCYGQpGRw","annotations":[{"segment":[0.01,129.5868890920437],"label":"Preparing pasta"}]},"WBA-ftw3wkQ":{"duration":208.32999999999998,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WBA-ftw3wkQ","annotations":[]},"dgts0oITT28":{"duration":77.69,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dgts0oITT28","annotations":[]},"wZgSzWl5Hgg":{"duration":103.793,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wZgSzWl5Hgg","annotations":[{"segment":[52.59879095163806,93.22124180967239],"label":"Tug of war"}]},"O_FO09g9WDM":{"duration":111.503,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=O_FO09g9WDM","annotations":[]},"FL9yhHtoc6c":{"duration":220.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FL9yhHtoc6c","annotations":[{"segment":[15.48926581553058,211.34242690523948],"label":"Zumba"}]},"V1NtrnPz3sA":{"duration":79.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=V1NtrnPz3sA","annotations":[{"segment":[0.01,79.13360544217687],"label":"Shot put"}]},"HzyTD2uZ8jI":{"duration":149.23,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=HzyTD2uZ8jI","annotations":[{"segment":[24.2729101,145.2767858471139],"label":"Triple jump"}]},"r5oPTDuHhoE":{"duration":41.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r5oPTDuHhoE","annotations":[{"segment":[22.580192039999996,36.433269890795636],"label":"Playing field hockey"},{"segment":[5.535797191887675,21.62823088923557],"label":"Playing field hockey"}]},"AmWcQz_KJG4":{"duration":95.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AmWcQz_KJG4","annotations":[{"segment":[1.1869994758190328,93.77295858970359],"label":"Using the balance beam"}]},"KwY8nf4MqR4":{"duration":33.93,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=KwY8nf4MqR4","annotations":[{"segment":[6.67149384,32.665033479719185],"label":"Tennis serve with ball bouncing"}]},"Q43BuVI2r-s":{"duration":83.917,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Q43BuVI2r-s","annotations":[{"segment":[6.195735045244628,82.00852429758108],"label":"Installing carpet"}]},"EI_6eT-0-X4":{"duration":103.05,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=EI_6eT-0-X4","annotations":[{"segment":[64.9071726478684,103.050157],"label":"Rope skipping"}]},"S2MHi_oC3Lg":{"duration":217.29,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=S2MHi_oC3Lg","annotations":[{"segment":[32.54293037129485,202.37634824648984],"label":"Doing nails"}]},"CYj2ltxPNDo":{"duration":190.1,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=CYj2ltxPNDo","annotations":[{"segment":[40.78807056,139.9813323307332],"label":"Using parallel bars"}]},"NO5J7RjTTTo":{"duration":43.561,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=NO5J7RjTTTo","annotations":[{"segment":[0.2,41.871294851794076],"label":"Table soccer"}]},"modITAJlP_0":{"duration":69.962,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=modITAJlP_0","annotations":[{"segment":[4.147457876546355,27.176762770670823],"label":"Elliptical trainer"},{"segment":[32.41565679875195,37.76369445241809],"label":"Elliptical trainer"},{"segment":[48.35062613416536,61.447861204368174],"label":"Elliptical trainer"}]},"H8K1E49607E":{"duration":97.385,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H8K1E49607E","annotations":[{"segment":[0.01,94.95369734789392],"label":"Playing drums"}]},"AZv1pI1YESM":{"duration":61.7,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AZv1pI1YESM","annotations":[]},"3DCGDf3lBy4":{"duration":162.354,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=3DCGDf3lBy4","annotations":[]},"LYShFLEK1xs":{"duration":106.836,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=LYShFLEK1xs","annotations":[]},"mE02JHvCEUM":{"duration":198.55,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mE02JHvCEUM","annotations":[{"segment":[0.01,190.96425594574802],"label":"Playing piano"}]},"gM7JdDs5f4o":{"duration":218.152,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gM7JdDs5f4o","annotations":[{"segment":[23.817581903276132,211.31535257410297],"label":"Drum corps"}]},"FeXBmKYV8HY":{"duration":160.8,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=FeXBmKYV8HY","annotations":[{"segment":[31.60044617784711,47.65146645865835],"label":"Playing violin"},{"segment":[69.47082215288611,72.22959126365055],"label":"Playing violin"},{"segment":[88.28061154446178,98.56329641185648],"label":"Playing violin"},{"segment":[111.6047503900156,129.41135101404055],"label":"Playing violin"},{"segment":[140.44642745709828,146.7163572542902],"label":"Playing violin"}]},"s-Rg2hr7DRA":{"duration":72.888,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=s-Rg2hr7DRA","annotations":[]},"tRatWgaZ-a0":{"duration":22.15,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=tRatWgaZ-a0","annotations":[{"segment":[0.10367473478939157,22.1172767550702],"label":"Playing water polo"}]},"gCDpUPvD3s4":{"duration":51.57,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gCDpUPvD3s4","annotations":[{"segment":[16.97595990327613,39.34239048673947],"label":"Playing polo"}]},"75xhANnCOEg":{"duration":137.97,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=75xhANnCOEg","annotations":[{"segment":[38.95960627815912,132.59180942475794],"label":"Doing karate"}]},"BTUgRe4aSsg":{"duration":27.72,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=BTUgRe4aSsg","annotations":[{"segment":[8.736933307332293,15.311259360374413],"label":"Hammer throw"}]},"mucFmOzqWW8":{"duration":104.787,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mucFmOzqWW8","annotations":[{"segment":[42.5008736349454,48.38560998439938],"label":"Building sandcastles"}]},"qjacthwabek":{"duration":11.378,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qjacthwabek","annotations":[{"segment":[0.08830733229329173,10.474059282371295],"label":"Kneeling"}]},"p3vw2UJvLZE":{"duration":38.058,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p3vw2UJvLZE","annotations":[{"segment":[0.2,26.45454914196568],"label":"Playing blackjack"}]},"vNyXwHA_k5A":{"duration":148.352,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vNyXwHA_k5A","annotations":[{"segment":[60.56696099843994,81.14123400936039],"label":"Playing drums"}]},"UeH0TCDAbSU":{"duration":139.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UeH0TCDAbSU","annotations":[{"segment":[50.794062402496095,122.20964586583463],"label":"Ironing clothes"}]},"XxfatT0sWXw":{"duration":150.303,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=XxfatT0sWXw","annotations":[{"segment":[0.01,148.73605549736857],"label":"Baton twirling"}]},"4XavNhCs-Do":{"duration":134.142,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4XavNhCs-Do","annotations":[{"segment":[25.13019555698383,29.72695087801151],"label":"Slacklining"},{"segment":[53.33755775419917,85.51484500139296],"label":"Slacklining"},{"segment":[105.99129972860659,110.37911159309256],"label":"Slacklining"},{"segment":[121.45311296536671,123.33360376443213],"label":"Slacklining"}]},"a2JBhm22-M4":{"duration":74.93,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=a2JBhm22-M4","annotations":[{"segment":[16.29676279797192,48.07252968720748],"label":"Using uneven bars"}]},"0vJfctL116Y":{"duration":148.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0vJfctL116Y","annotations":[{"segment":[11.784905812559032,45.059933957878314],"label":"Preparing salad"},{"segment":[53.37869099625585,125.47458532886114],"label":"Preparing salad"}]},"1fbU_MkV7NE":{"duration":102.958,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1fbU_MkV7NE","annotations":[{"segment":[38.53243966989054,46.06980970058081],"label":"Doing crunches"},{"segment":[54.569397988742814,79.10594531117272],"label":"Doing crunches"},{"segment":[84.71888097316648,93.37883885167115],"label":"Doing crunches"}]},"iNs17kcwlDk":{"duration":100.032,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=iNs17kcwlDk","annotations":[{"segment":[0.01,100.031586],"label":"Playing congas"}]},"EsYsshh2CyA":{"duration":68.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=EsYsshh2CyA","annotations":[{"segment":[8.490787388455537,68.89360544217688],"label":"Mixing drinks"}]},"zSCZphJS2vA":{"duration":96.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zSCZphJS2vA","annotations":[{"segment":[17.241088010867372,50.6645998673947],"label":"Hammer throw"},{"segment":[75.31633054914197,83.63439918408737],"label":"Hammer throw"}]},"K1LiXUpoSmI":{"duration":38.499,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=K1LiXUpoSmI","annotations":[]},"8yFimnZ8XhA":{"duration":60.117,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8yFimnZ8XhA","annotations":[{"segment":[0.2,59.881],"label":"River tubing"}]},"u3nOmvgcs84":{"duration":173.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=u3nOmvgcs84","annotations":[{"segment":[36.447540513865455,164.9588685479392],"label":"Making a sandwich"}]},"VYOKYSmoyk0":{"duration":79.691,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=VYOKYSmoyk0","annotations":[{"segment":[0.009999999999999998,8.702592527301093],"label":"Croquet"},{"segment":[22.378095070202807,79.690883],"label":"Croquet"},{"segment":[71.36125872386896,79.19359199843994],"label":"Croquet"}]},"9DIDpTlfBWs":{"duration":163.028,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=9DIDpTlfBWs","annotations":[{"segment":[14.487514820592823,155.82345241809674],"label":"Making an omelette"}]},"1B3XsffrM4M":{"duration":115.5,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1B3XsffrM4M","annotations":[{"segment":[9.006318252730109,115.2808736349454],"label":"Tai chi"}]},"HWuRcUpcsHY":{"duration":53.94,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=HWuRcUpcsHY","annotations":[{"segment":[0,53.781],"label":"Kayaking"}]},"R2dSuxrGNwM":{"duration":87.493,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=R2dSuxrGNwM","annotations":[]},"CsCbJSnAhRs":{"duration":86.936,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=CsCbJSnAhRs","annotations":[{"segment":[15.312469578783151,56.50707800312013],"label":"Baking cookies"}]},"0gm_g4rYCic":{"duration":143.569,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0gm_g4rYCic","annotations":[{"segment":[0.2,135.70334789391575],"label":"Rafting"}]},"0A-p1Q0rseE":{"duration":102.52000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0A-p1Q0rseE","annotations":[{"segment":[25.81690449219969,38.12589301170047],"label":"Mixing drinks"},{"segment":[49.955310549922,58.427731219188765],"label":"Mixing drinks"}]},"HFDsuGHojDU":{"duration":10.94,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=HFDsuGHojDU","annotations":[{"segment":[0.01,10.901],"label":"Discus throw"}]},"5g7bqiT7Y3c":{"duration":5.85,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5g7bqiT7Y3c","annotations":[{"segment":[0.01,5.705369539781591],"label":"Discus throw"}]},"aYSm25veKTs":{"duration":184.971,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aYSm25veKTs","annotations":[{"segment":[12.696859569620882,172.27329916053785],"label":"BMX"}]},"nLAm7USuYbA":{"duration":80.13,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nLAm7USuYbA","annotations":[{"segment":[4.998502340093604,78.47648673946958],"label":"High jump"}]},"O09jaT1gJ3w":{"duration":95.551,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=O09jaT1gJ3w","annotations":[]},"UWRq_HzBuTw":{"duration":23.62,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=UWRq_HzBuTw","annotations":[]},"SeVftOMjNMM":{"duration":16.138,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=SeVftOMjNMM","annotations":[{"segment":[1.7906720008850097,14.531027141071926],"label":"Doing a powerbomb"}]},"ZdwkA72VMKE":{"duration":16.115,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZdwkA72VMKE","annotations":[{"segment":[0.2,9.854823126365055],"label":"Curling"}]},"g1YGP5x_Zug":{"duration":63.25,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=g1YGP5x_Zug","annotations":[]},"1qi8ZXUH_wY":{"duration":82.57,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=1qi8ZXUH_wY","annotations":[{"segment":[0,61.18693381435257],"label":"Wrapping presents"}]},"YAWoigyJ9sA":{"duration":9.03,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YAWoigyJ9sA","annotations":[{"segment":[0,3.7632886115444615],"label":"Drinking coffee"}]},"FmLxc-aNh88":{"duration":80.57,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=FmLxc-aNh88","annotations":[{"segment":[10.433040566302653,69.51170401404056],"label":"Grooming horse"}]},"u_sEVnBQ8is":{"duration":60.047,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=u_sEVnBQ8is","annotations":[]},"mbGpp_nDwI4":{"duration":98.2,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mbGpp_nDwI4","annotations":[{"segment":[11.79585543421737,53.61752470098804],"label":"Using parallel bars"},{"segment":[76.59646255850234,92.52852677067082],"label":"Using parallel bars"}]},"kkji9hespu0":{"duration":228.86,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kkji9hespu0","annotations":[]},"gzs6VcYc0OI":{"duration":202.13,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=gzs6VcYc0OI","annotations":[{"segment":[18.91179407176287,53.268219968798746],"label":"Using the pommel horse"},{"segment":[64.30009984399375,123.87225117004681],"label":"Using the pommel horse"},{"segment":[167.70117316692668,190.08234477379096],"label":"Using the pommel horse"}]},"QGnn2h2hreQ":{"duration":215.06,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QGnn2h2hreQ","annotations":[]},"dcclQadR-L0":{"duration":46.39,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dcclQadR-L0","annotations":[{"segment":[7.961437804450954,42.55750354446178],"label":"Playing polo"}]},"4q7LmVSgIPk":{"duration":203.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4q7LmVSgIPk","annotations":[{"segment":[155.7363872776911,191.5779590951638],"label":"Fixing bicycle"}]},"TssNxg5XJ-o":{"duration":93.461,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TssNxg5XJ-o","annotations":[{"segment":[0.19999999999999998,93.241],"label":"Ice fishing"}]},"gh5di42-RJo":{"duration":62.323,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gh5di42-RJo","annotations":[{"segment":[0.19445303900156005,62.32219954648526],"label":"Playing drums"}]},"fsSCNCY0iQY":{"duration":63.51,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=fsSCNCY0iQY","annotations":[{"segment":[17.931480169381487,56.80700096960189],"label":"Putting on makeup"}]},"4oZtb0kglx4":{"duration":217.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4oZtb0kglx4","annotations":[{"segment":[24.498137433906585,202.45630387805434],"label":"Wrapping presents"}]},"cf6y94fjhHw":{"duration":95.109,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=cf6y94fjhHw","annotations":[]},"2WzsyEvhPPI":{"duration":223.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2WzsyEvhPPI","annotations":[{"segment":[54.266826833073324,118.96958190327614],"label":"Wrapping presents"},{"segment":[152.7261138845554,176.73090171606867],"label":"Wrapping presents"}]},"yRup8RDsCzk":{"duration":179.491,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yRup8RDsCzk","annotations":[{"segment":[13.440721572542902,175.28941050858032],"label":"Capoeira"}]},"QV4E2B0AdGw":{"duration":177.07999999999998,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QV4E2B0AdGw","annotations":[{"segment":[8.009021840873634,50.5396895475819],"label":"Getting a tattoo"},{"segment":[85.88985491419656,127.31582995319812],"label":"Getting a tattoo"}]},"h-_dzZ3cCfc":{"duration":170.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=h-_dzZ3cCfc","annotations":[{"segment":[49.51588313328452,50.846955260523345],"label":"Playing lacrosse"},{"segment":[77.46839780529997,91.84397677947935],"label":"Playing lacrosse"},{"segment":[101.96012494649447,116.86813277156936],"label":"Playing lacrosse"}]},"1KBpN4TKj8A":{"duration":162.331,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=1KBpN4TKj8A","annotations":[]},"8381XS5ZDNs":{"duration":68.82,"subset":"training","resolution":"310x240","url":"https://www.youtube.com/watch?v=8381XS5ZDNs","annotations":[{"segment":[31.010466458658346,51.719878315132604],"label":"Spinning"}]},"mzxz0EQy1pY":{"duration":234.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mzxz0EQy1pY","annotations":[{"segment":[20.704179608424337,63.94476710920436],"label":"Kayaking"},{"segment":[64.46021343,198.43032314976597],"label":"Kayaking"}]},"v7o9uSu9AVI":{"duration":202.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=v7o9uSu9AVI","annotations":[{"segment":[0,177.5484601544462],"label":"Using parallel bars"}]},"p3PEMCN4h_g":{"duration":77.834,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p3PEMCN4h_g","annotations":[{"segment":[19.30701482656366,21.487229779834685],"label":"Running a marathon"},{"segment":[24.636429156781727,40.1505039231844],"label":"Running a marathon"},{"segment":[49.11591513813768,72.13521420355825],"label":"Running a marathon"}]},"BYlpp0-ztfs":{"duration":106.348,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BYlpp0-ztfs","annotations":[]},"Zc8zn0sKfwo":{"duration":205.29,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zc8zn0sKfwo","annotations":[{"segment":[70.91340330577223,192.2505583978159],"label":"Preparing pasta"}]},"5Qav3tLfiZA":{"duration":6.688,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5Qav3tLfiZA","annotations":[{"segment":[0,5.808890795631824],"label":"Kneeling"}]},"Zt9qvbUnNhc":{"duration":208.539,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=Zt9qvbUnNhc","annotations":[]},"bV1UAhMjq5M":{"duration":106.046,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bV1UAhMjq5M","annotations":[]},"_nup1BwPnXc":{"duration":106.688,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_nup1BwPnXc","annotations":[{"segment":[19.975226209048362,82.06488767550702],"label":"Tug of war"}]},"6UjZaj86bKs":{"duration":227.86,"subset":"validation","resolution":"492x360","url":"https://www.youtube.com/watch?v=6UjZaj86bKs","annotations":[{"segment":[17.842700681762192,221.92060349397744],"label":"Belly dance"}]},"64oq7grVNVs":{"duration":95.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=64oq7grVNVs","annotations":[{"segment":[0.01,2.5264227769110765],"label":"Getting a haircut"},{"segment":[5.201458658346334,12.334887675507021],"label":"Getting a haircut"},{"segment":[48.89422464898596,68.67366926677067],"label":"Getting a haircut"},{"segment":[74.32215288611545,78.48419344773792],"label":"Getting a haircut"},{"segment":[82.49758970358815,86.65963026521061],"label":"Getting a haircut"}]},"rhfoyYzp93Q":{"duration":67.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rhfoyYzp93Q","annotations":[{"segment":[0.01,67.059228],"label":"Canoeing"}]},"jyYdVAyz6JA":{"duration":23.824,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jyYdVAyz6JA","annotations":[]},"bzZMUbtgZho":{"duration":182.974,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bzZMUbtgZho","annotations":[{"segment":[11.189730019198029,16.60327986343479],"label":"Surfing"},{"segment":[24.86606646779616,57.06244185720426],"label":"Surfing"},{"segment":[65.89507581359055,68.74431257371516],"label":"Surfing"},{"segment":[77.57694653010145,86.6945041625002],"label":"Surfing"},{"segment":[109.20347456748463,120.60042160798307],"label":"Surfing"},{"segment":[128.57828453633198,162.78692231601943],"label":"Surfing"},{"segment":[165.35151577396334,167.3461995745864],"label":"Surfing"}]},"7I_6nIgHK34":{"duration":14.118,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=7I_6nIgHK34","annotations":[]},"iAes5SVj_Fg":{"duration":173.291,"subset":"testing","resolution":"352x240","url":"https://www.youtube.com/watch?v=iAes5SVj_Fg","annotations":[]},"NQOH_mu1-gQ":{"duration":25.45,"subset":"testing","resolution":"1280x704","url":"https://www.youtube.com/watch?v=NQOH_mu1-gQ","annotations":[]},"YuCMWTdK_DY":{"duration":30.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YuCMWTdK_DY","annotations":[{"segment":[15.345533541341654,28.626224648985957],"label":"Discus throw"}]},"XDOtHC4E6L8":{"duration":89.56,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=XDOtHC4E6L8","annotations":[{"segment":[0.01,89.55936507936508],"label":"Using parallel bars"}]},"EML3BvJxknA":{"duration":116.449,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=EML3BvJxknA","annotations":[{"segment":[7.808491419656786,99.16671762870514],"label":"Waterskiing"},{"segment":[105.5235585023401,113.69663962558502],"label":"Waterskiing"}]},"xOmfJGR5fBw":{"duration":200.923,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xOmfJGR5fBw","annotations":[{"segment":[36.66626677067082,150.42570982839314],"label":"Clipping cat claws"}]},"4LF0hL-mgks":{"duration":225.68,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4LF0hL-mgks","annotations":[]},"4OsfdT79z8c":{"duration":8.31,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=4OsfdT79z8c","annotations":[]},"wJNYaRkVQfA":{"duration":17.183,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=wJNYaRkVQfA","annotations":[]},"SIX51vtOOek":{"duration":91.092,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SIX51vtOOek","annotations":[{"segment":[34.674575086404815,91.09181405895691],"label":"Cutting the grass"}]},"G6ayznrS0tY":{"duration":30.116,"subset":"validation","resolution":"400x224","url":"https://www.youtube.com/watch?v=G6ayznrS0tY","annotations":[{"segment":[0.28189965678627144,2.8659798439937596],"label":"Gargling mouthwash"},{"segment":[13.296267145085803,14.658782152886117],"label":"Gargling mouthwash"}]},"1jgsRbGzCls":{"duration":194.7,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=1jgsRbGzCls","annotations":[]},"yCAsVc5Tb_0":{"duration":163.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yCAsVc5Tb_0","annotations":[{"segment":[9.920391925881118,155.4194723447738],"label":"Cheerleading"}]},"8RntjHIwMNo":{"duration":35.295,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8RntjHIwMNo","annotations":[{"segment":[7.045209752056087,28.080977664828673],"label":"Playing drums"}]},"QdmUojrWdmE":{"duration":228.647,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QdmUojrWdmE","annotations":[{"segment":[8.547547417720981,22.793459780589284],"label":"Surfing"},{"segment":[30.984859389238558,35.25863309809905],"label":"Surfing"},{"segment":[60.18897973311858,127.85706345674303],"label":"Surfing"},{"segment":[129.63780250210155,162.40340093669866],"label":"Surfing"}]},"H3PWbSF9ax4":{"duration":66.828,"subset":"validation","resolution":"688x720","url":"https://www.youtube.com/watch?v=H3PWbSF9ax4","annotations":[{"segment":[11.040040561622463,55.21673946957878],"label":"Swinging at the playground"}]},"sgwg1C3i-Vw":{"duration":118.329,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sgwg1C3i-Vw","annotations":[{"segment":[49.62063806552262,96.12165522620906],"label":"Canoeing"}]},"gA7GpvB10UY":{"duration":187.756,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=gA7GpvB10UY","annotations":[{"segment":[60.73617452404757,83.8042425018867],"label":"Putting in contact lenses"},{"segment":[106.58030961924686,142.20441459768202],"label":"Putting in contact lenses"}]},"n_sfeihU3f8":{"duration":78.159,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=n_sfeihU3f8","annotations":[{"segment":[0.009999999999999998,78.158367],"label":"Playing ice hockey"}]},"SFvaB2idy-4":{"duration":217.55,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SFvaB2idy-4","annotations":[]},"TNFoUBRsngY":{"duration":156.364,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TNFoUBRsngY","annotations":[{"segment":[0,57.30957098283932],"label":"Tug of war"},{"segment":[63.89407488299531,113.64365990639625],"label":"Tug of war"},{"segment":[126.81266770670827,147.78553198127923],"label":"Tug of war"}]},"AdXXy6WTVi0":{"duration":114.92,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AdXXy6WTVi0","annotations":[{"segment":[17.769387505626383,82.56581658350801],"label":"Windsurfing"},{"segment":[86.50372111310303,95.632499795346],"label":"Windsurfing"}]},"QePg4GKh3rw":{"duration":65.9,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=QePg4GKh3rw","annotations":[{"segment":[12.94615600624025,58.154954758190335],"label":"Using uneven bars"}]},"9XjHgUP5QW0":{"duration":42.93,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9XjHgUP5QW0","annotations":[{"segment":[17.428980488299533,27.130332391575664],"label":"Shaving legs"}]},"AYTIuVLRMAQ":{"duration":128.128,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=AYTIuVLRMAQ","annotations":[]},"AV8q7nnOtq4":{"duration":83.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AV8q7nnOtq4","annotations":[{"segment":[4.981818336277423,75.16837075372291],"label":"Playing drums"}]},"c6ie4KwFnAs":{"duration":215.691,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c6ie4KwFnAs","annotations":[{"segment":[16.15149797828648,127.52953612022031],"label":"Futsal"},{"segment":[127.86602566143462,192.13552721528862],"label":"Futsal"}]},"2M4NxbRxDek":{"duration":26.889,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2M4NxbRxDek","annotations":[]},"2giX_iWq5mg":{"duration":70.078,"subset":"testing","resolution":"400x224","url":"https://www.youtube.com/watch?v=2giX_iWq5mg","annotations":[]},"Zzcj8Eea3Mc":{"duration":84.289,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zzcj8Eea3Mc","annotations":[{"segment":[37.621741316083195,76.29217309776138],"label":"Elliptical trainer"}]},"aCknCFmU0sA":{"duration":95.47999999999999,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aCknCFmU0sA","annotations":[{"segment":[6.107174093766471,61.369651869067965],"label":"Brushing hair"}]},"UElXV5zgUsE":{"duration":72.725,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=UElXV5zgUsE","annotations":[]},"TrWAMPr_h6Q":{"duration":47.973,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TrWAMPr_h6Q","annotations":[]},"Dl0JNkGbZT4":{"duration":153.48,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Dl0JNkGbZT4","annotations":[{"segment":[61.05833784371782,140.79334260842433],"label":"Getting a piercing"}]},"mNM01g9wLy4":{"duration":75.396,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mNM01g9wLy4","annotations":[{"segment":[12.213496372588494,70.22760414238385],"label":"Elliptical trainer"}]},"mFDC1CLt6B4":{"duration":74.931,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=mFDC1CLt6B4","annotations":[{"segment":[34.36360031104199,71.5228833592535],"label":"BMX"}]},"_ENXbB6aaa4":{"duration":136.371,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_ENXbB6aaa4","annotations":[{"segment":[8.578455775715481,110.71303677260022],"label":"Longboarding"}]},"PDEUAMa8aOU":{"duration":125.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PDEUAMa8aOU","annotations":[{"segment":[50.7308532,119.73680800624025],"label":"Getting a haircut"}]},"hpU50i4WBK8":{"duration":231.085,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=hpU50i4WBK8","annotations":[{"segment":[7.928957878315132,231.021],"label":"Scuba diving"}]},"oNN6NE8fq0U":{"duration":196.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oNN6NE8fq0U","annotations":[{"segment":[36.69266770670827,176.73634945397816],"label":"Playing accordion"}]},"D4wcmmQsPng":{"duration":74.444,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=D4wcmmQsPng","annotations":[{"segment":[0.2,69.29391107644305],"label":"Playing ice hockey"}]},"GBTpVjwLxe0":{"duration":23.22,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=GBTpVjwLxe0","annotations":[{"segment":[2.189797971918877,17.319311232449298],"label":"Kayaking"}]},"Mvw0hvMeM-w":{"duration":216.132,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mvw0hvMeM-w","annotations":[{"segment":[49.139915887850464,198.59749221183802],"label":"BMX"}]},"Pyf_5Bffu5A":{"duration":198.369,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Pyf_5Bffu5A","annotations":[{"segment":[6.497255850234009,192.77136193447737],"label":"Beach soccer"}]},"ZkJmA1TNdPQ":{"duration":193.423,"subset":"testing","resolution":"400x224","url":"https://www.youtube.com/watch?v=ZkJmA1TNdPQ","annotations":[]},"uqzy1FAUpPM":{"duration":56.61,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=uqzy1FAUpPM","annotations":[{"segment":[10.153569599999999,15.896793912572829],"label":"Triple jump"}]},"6J0IdWi4O0Q":{"duration":58.56,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=6J0IdWi4O0Q","annotations":[{"segment":[3.8308891918876755,47.79490325117004],"label":"Tennis serve with ball bouncing"},{"segment":[15.338911900000001,41.500193149999994],"label":"Tennis serve with ball bouncing"}]},"ZGK-w7-bkNw":{"duration":134.212,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ZGK-w7-bkNw","annotations":[{"segment":[24.51603987311656,96.22070490738446],"label":"Braiding hair"}]},"OCdmlTxq1Co":{"duration":115.264,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=OCdmlTxq1Co","annotations":[{"segment":[9.33092211838006,17.947196261682244],"label":"Welding"},{"segment":[52.405813084112154,68.0198738317757],"label":"Welding"},{"segment":[90.09492523364486,103.55532242990655],"label":"Welding"}]},"TiiSh9R89vM":{"duration":209.793,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TiiSh9R89vM","annotations":[{"segment":[29.12160530421217,41.882758190327614],"label":"Snow tubing"},{"segment":[52.02623868954758,71.65878159126366],"label":"Snow tubing"},{"segment":[82.78388923556943,86.05597971918877],"label":"Snow tubing"},{"segment":[142.3359360374415,161.31406084243372],"label":"Snow tubing"},{"segment":[175.72801404056162,202.56171450858034],"label":"Snow tubing"}]},"88qqQjRPFUQ":{"duration":30.953,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=88qqQjRPFUQ","annotations":[{"segment":[21.608548053627587,25.616390485920526],"label":"Fun sliding down"}]},"uMGfCaGMnEE":{"duration":73.38,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=uMGfCaGMnEE","annotations":[{"segment":[12.343057666146645,65.02962788923557],"label":"Mixing drinks"}]},"arfBwR8qgPw":{"duration":5.46,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=arfBwR8qgPw","annotations":[{"segment":[3.1559531981279254,5.209],"label":"High jump"}]},"rFTVKkMqpIQ":{"duration":230.48,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=rFTVKkMqpIQ","annotations":[{"segment":[29.84390833697348,110.74606949141966],"label":"Washing hands"}]},"L6Uf3epn3zA":{"duration":187.571,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=L6Uf3epn3zA","annotations":[{"segment":[11.683551401869158,16.35697196261682],"label":"Canoeing"},{"segment":[32.129766355140184,70.39339719626167],"label":"Canoeing"},{"segment":[87.33454672897196,114.20671495327102],"label":"Canoeing"},{"segment":[125.30608878504673,137.0042507788162],"label":"Canoeing"},{"segment":[146.64420872274144,157.74476635514017],"label":"Canoeing"},{"segment":[169.13744392523364,172.3507632398754],"label":"Canoeing"}]},"0xtS7g7vZLU":{"duration":145.195,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0xtS7g7vZLU","annotations":[{"segment":[53.43531318787103,58.1846619028243],"label":"Shuffleboard"},{"segment":[65.87408363179625,67.90951879534767],"label":"Shuffleboard"},{"segment":[80.57444870188972,123.54474659908598],"label":"Shuffleboard"}]},"qKSmh8Q0zBc":{"duration":78.414,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=qKSmh8Q0zBc","annotations":[]},"mRbqt5ugQSE":{"duration":165.164,"subset":"validation","resolution":"1908x1080","url":"https://www.youtube.com/watch?v=mRbqt5ugQSE","annotations":[{"segment":[0.2,164.80100000000002],"label":"Surfing"}]},"nuEK3POl9jA":{"duration":40.01,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=nuEK3POl9jA","annotations":[{"segment":[10.610540717628705,31.769207207488304],"label":"Clean and jerk"}]},"66BGqvwHbUw":{"duration":30.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=66BGqvwHbUw","annotations":[{"segment":[17.629432490639626,21.40548228783151],"label":"Playing guitarra"},{"segment":[26.975155738689548,29.00478250468019],"label":"Playing guitarra"}]},"s0-xTG38cPw":{"duration":46.12,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=s0-xTG38cPw","annotations":[{"segment":[0,46.114829],"label":"Smoking hookah"}]},"jaaWdcA_COY":{"duration":169.019,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jaaWdcA_COY","annotations":[{"segment":[30.861724906398496,47.1808401712079],"label":"Hula hoop"},{"segment":[63.497946090211016,86.92088845781227],"label":"Hula hoop"}]},"3ZR5ameytF4":{"duration":100.102,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3ZR5ameytF4","annotations":[]},"5JkN8t5k0jU":{"duration":41.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5JkN8t5k0jU","annotations":[{"segment":[7.522721937448926,34.03136101677067],"label":"Clean and jerk"}]},"TlDkQ8SNG64":{"duration":25.844,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TlDkQ8SNG64","annotations":[{"segment":[5.393656786271451,25.601],"label":"Camel ride"}]},"Hz7-mBFGsSs":{"duration":204.13,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Hz7-mBFGsSs","annotations":[]},"si2M-Sm9fnI":{"duration":239.03,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=si2M-Sm9fnI","annotations":[]},"akUXL2VzFEs":{"duration":56.982,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=akUXL2VzFEs","annotations":[{"segment":[0.2,55.82793059657321],"label":"River tubing"}]},"cW2R4AuUnK8":{"duration":193.144,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=cW2R4AuUnK8","annotations":[{"segment":[9.02341121495327,187.98773364485982],"label":"Rafting"}]},"exPOJAoyG7Q":{"duration":88.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=exPOJAoyG7Q","annotations":[{"segment":[14.50288600624025,77.62497081435257],"label":"Tumbling"}]},"h4Cf5u1j0TU":{"duration":101.727,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=h4Cf5u1j0TU","annotations":[{"segment":[0.20000000000000004,71.03460470882577],"label":"Having an ice cream"},{"segment":[75.62258601520212,96.82222377569978],"label":"Having an ice cream"}]},"JZz2O0y0ufY":{"duration":14.28,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JZz2O0y0ufY","annotations":[{"segment":[0.01,14.280272108843537],"label":"Chopping wood"}]},"8UXuHMmOYGI":{"duration":60.256,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=8UXuHMmOYGI","annotations":[{"segment":[5.167168486739469,60.201],"label":"Sharpening knives"}]},"JvPVYSb_5o8":{"duration":24.59,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=JvPVYSb_5o8","annotations":[]},"iEGYd3DJ3Wo":{"duration":17.508,"subset":"training","resolution":"384x240","url":"https://www.youtube.com/watch?v=iEGYd3DJ3Wo","annotations":[{"segment":[0.01,17.507346],"label":"Playing ice hockey"}]},"WvdGPaEAhF8":{"duration":99.28999999999999,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=WvdGPaEAhF8","annotations":[]},"faF9-lWmK3Y":{"duration":71.59,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=faF9-lWmK3Y","annotations":[{"segment":[13.629219137066993,70.47674250155298],"label":"Hand washing clothes"}]},"kdfJW8YV378":{"duration":231.55,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kdfJW8YV378","annotations":[{"segment":[7.788577299999999,168.73783605],"label":"Sailing"}]},"qlK-c7hRqcA":{"duration":174.85,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qlK-c7hRqcA","annotations":[]},"RNqJj9tt3F0":{"duration":214.762,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RNqJj9tt3F0","annotations":[{"segment":[45.49455118288229,94.66880871143888],"label":"Playing ice hockey"},{"segment":[96.67592126362487,107.71504030064777],"label":"Playing ice hockey"},{"segment":[172.94619824669226,179.30205375700933],"label":"Playing ice hockey"}]},"eFi84LnF1TI":{"duration":47.18,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=eFi84LnF1TI","annotations":[]},"PVAHm5MjHdw":{"duration":172.873,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PVAHm5MjHdw","annotations":[{"segment":[9.424506749740395,169.10257634267913],"label":"Baton twirling"}]},"FGtoyNOi6gg":{"duration":105.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FGtoyNOi6gg","annotations":[{"segment":[13.617072530520268,22.968556075576355],"label":"Bungee jumping"},{"segment":[50.85894559591907,56.108901270336524],"label":"Bungee jumping"},{"segment":[62.671345503900156,85.3117792199688],"label":"Bungee jumping"},{"segment":[99.58509612792511,101.38976838065523],"label":"Bungee jumping"}]},"cRpzDPS6zeY":{"duration":165.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cRpzDPS6zeY","annotations":[{"segment":[0.01,165.46539682539682],"label":"Cumbia"}]},"LXHc1X1pKEM":{"duration":201.36,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LXHc1X1pKEM","annotations":[{"segment":[135.53610218408738,195.21596177847115],"label":"Shaving legs"}]},"VOGF4tBFEuw":{"duration":30.046,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=VOGF4tBFEuw","annotations":[{"segment":[0.2,29.893061224489795],"label":"Cutting the grass"}]},"KOFXi1RYx_g":{"duration":79.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KOFXi1RYx_g","annotations":[{"segment":[5.51796476,65.16792511700469],"label":"Playing polo"}]},"JI5_cMwgnxI":{"duration":144.452,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JI5_cMwgnxI","annotations":[{"segment":[45.07062023977558,52.056566376940786],"label":"Trimming branches or hedges"}]},"M_WEOecjwLY":{"duration":100.97999999999999,"subset":"validation","resolution":"384x288","url":"https://www.youtube.com/watch?v=M_WEOecjwLY","annotations":[{"segment":[35.15925111,75.86549841],"label":"Plataform diving"},{"segment":[82.23623912948518,98.93555205616225],"label":"Plataform diving"}]},"qObVIn_GI10":{"duration":172.223,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qObVIn_GI10","annotations":[]},"F_DHgp5hERs":{"duration":133.306,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=F_DHgp5hERs","annotations":[{"segment":[0.01,133.201],"label":"Using the rowing machine"}]},"uA3dbKjT9rM":{"duration":166.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uA3dbKjT9rM","annotations":[{"segment":[82.14343319855243,91.24159826801235],"label":"Getting a piercing"},{"segment":[96.18060216286202,101.37955363112484],"label":"Getting a piercing"}]},"GG_Bi89pNlg":{"duration":34.76,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GG_Bi89pNlg","annotations":[{"segment":[0.01,34.67892871216849],"label":"Discus throw"}]},"FCzT4Knfhqs":{"duration":142.664,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=FCzT4Knfhqs","annotations":[{"segment":[38.503545020712394,78.1199092616766],"label":"Hitting a pinata"},{"segment":[90.13835684039606,104.15987825897035],"label":"Hitting a pinata"}]},"WmPkqkqAxyc":{"duration":39.358,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WmPkqkqAxyc","annotations":[{"segment":[0.01,38.866617720748835],"label":"Cutting the grass"}]},"tl3wQEusmj8":{"duration":107.207,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tl3wQEusmj8","annotations":[{"segment":[12.538338533541342,100.80824180967238],"label":"Canoeing"}]},"DzxPreFrmFE":{"duration":195.443,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DzxPreFrmFE","annotations":[{"segment":[11.99441576251715,195.441994],"label":"Playing ten pins"}]},"VeU_EBE-YPE":{"duration":144.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VeU_EBE-YPE","annotations":[{"segment":[0,144.404647],"label":"Playing guitarra"}]},"kdQfR6oqJ_M":{"duration":80.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kdQfR6oqJ_M","annotations":[{"segment":[0,24.73066426521061],"label":"Playing polo"},{"segment":[48.96171915132605,80.062403],"label":"Playing polo"}]},"jcRdr9jC9vQ":{"duration":134.63,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jcRdr9jC9vQ","annotations":[]},"7Ozl-ud1zTo":{"duration":155.174,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7Ozl-ud1zTo","annotations":[{"segment":[8.313923110213235,26.68189195756526],"label":"Surfing"},{"segment":[37.07429538435654,154.961],"label":"Surfing"}]},"k3WPc9HmtLM":{"duration":219.87,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=k3WPc9HmtLM","annotations":[]},"nSR-JSqGoWQ":{"duration":143.082,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nSR-JSqGoWQ","annotations":[{"segment":[0.01,143.041],"label":"Beach soccer"}]},"ZlHaPkhGr_g":{"duration":231.09,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=ZlHaPkhGr_g","annotations":[{"segment":[2.523549,198.27885000000003],"label":"Playing harmonica"}]},"ZlwkO1oFBHw":{"duration":140.97,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZlwkO1oFBHw","annotations":[{"segment":[11.761792716068642,129.15987328393135],"label":"Layup drill in basketball"}]},"Cdmd9VB8tJI":{"duration":59.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Cdmd9VB8tJI","annotations":[{"segment":[1.5672563354134166,58.08067595943838],"label":"Bathing dog"}]},"YmhbwAv0Xbc":{"duration":125.551,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YmhbwAv0Xbc","annotations":[{"segment":[6.265260530421218,86.53891107644306],"label":"Snow tubing"},{"segment":[96.71995943837754,125.301],"label":"Snow tubing"}]},"rdkrg8Bj9_I":{"duration":82.62,"subset":"validation","resolution":"854x478","url":"https://www.youtube.com/watch?v=rdkrg8Bj9_I","annotations":[{"segment":[5.542143113104523,75.52781079719189],"label":"Long jump"}]},"YIYZHQUOtnA":{"duration":123.089,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YIYZHQUOtnA","annotations":[{"segment":[18.818564511700465,92.55661321060842],"label":"Playing ten pins"}]},"q2TF-3bWZuU":{"duration":134.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q2TF-3bWZuU","annotations":[{"segment":[63.463375525741036,126.29840079875196],"label":"Horseback riding"}]},"3cmHM1Yu92s":{"duration":75.442,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3cmHM1Yu92s","annotations":[{"segment":[4.354665185647426,24.24489265522621],"label":"Elliptical trainer"},{"segment":[48.13670434945398,66.8499952823713],"label":"Elliptical trainer"}]},"4Lrq6RXeRHI":{"duration":9.78,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4Lrq6RXeRHI","annotations":[]},"NJRrhNrEMsk":{"duration":226.046,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NJRrhNrEMsk","annotations":[]},"ZLmoqxkCJL0":{"duration":158.01,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZLmoqxkCJL0","annotations":[{"segment":[111.40001872074883,134.8137394695788],"label":"Removing curlers"}]},"O9crPOB_9tE":{"duration":227.649,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O9crPOB_9tE","annotations":[{"segment":[0.01,85.59012885179408],"label":"Surfing"},{"segment":[64.99167460530421,227.44843400000002],"label":"Surfing"}]},"H_JLBvfP88U":{"duration":214.042,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=H_JLBvfP88U","annotations":[]},"ucR3VztMyeE":{"duration":30.581,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ucR3VztMyeE","annotations":[{"segment":[0.2,28.848773790951636],"label":"Using the monkey bar"}]},"z6GHmDSskgY":{"duration":189.73,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=z6GHmDSskgY","annotations":[{"segment":[0.8877425897035882,188.51721840873634],"label":"Doing step aerobics"}]},"j9QPrMZuegY":{"duration":121.46,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=j9QPrMZuegY","annotations":[{"segment":[6.821667575663027,40.93000545397816],"label":"Clean and jerk"},{"segment":[40.93000545397816,66.32176809672387],"label":"Clean and jerk"},{"segment":[71.24852801248049,96.45079988923558],"label":"Clean and jerk"},{"segment":[100.61959674102965,115.96834878627146],"label":"Clean and jerk"}]},"rScqKu8Mwhk":{"duration":55.26,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=rScqKu8Mwhk","annotations":[{"segment":[16.55309458970359,17.760091070202808],"label":"Vacuuming floor"},{"segment":[26.55663465,39.83088385647426],"label":"Vacuuming floor"},{"segment":[43.1932311950078,45.089939950078005],"label":"Vacuuming floor"}]},"X5UoLcloHIM":{"duration":162.099,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X5UoLcloHIM","annotations":[{"segment":[12.391305252210088,153.50045353198126],"label":"Longboarding"}]},"ymJTN8aKZEw":{"duration":86.286,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=ymJTN8aKZEw","annotations":[{"segment":[0.01,86.2853514739229],"label":"Hitting a pinata"}]},"b1RAYvxWawA":{"duration":181.859,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b1RAYvxWawA","annotations":[{"segment":[0.2,13.618114553153555],"label":"Canoeing"},{"segment":[27.80365054602184,172.49611631825272],"label":"Canoeing"}]},"HfN967uah8o":{"duration":154.645,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HfN967uah8o","annotations":[{"segment":[12.541734789391576,146.90201092043682],"label":"Snow tubing"}]},"eQnwUWfah9w":{"duration":168.252,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=eQnwUWfah9w","annotations":[{"segment":[17.323897396712198,151.7153438681765],"label":"Futsal"}]},"-YAhgJdZ4TU":{"duration":152.23,"subset":"testing","resolution":"1152x720","url":"https://www.youtube.com/watch?v=-YAhgJdZ4TU","annotations":[]},"lM3uGxHWZBA":{"duration":144.2,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=lM3uGxHWZBA","annotations":[{"segment":[2.6994555700595373,10.122958387723264],"label":"Cricket"},{"segment":[22.945372345506062,35.31787704161228],"label":"Cricket"}]},"KEU4Sj6x1p4":{"duration":183.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KEU4Sj6x1p4","annotations":[{"segment":[12.69888544,172.0583619344774],"label":"Triple jump"}]},"p2iL_CeHNwM":{"duration":126.967,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p2iL_CeHNwM","annotations":[]},"FWZ6wgaJ_L4":{"duration":29.165,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FWZ6wgaJ_L4","annotations":[]},"v0azz4XzW_s":{"duration":32.392,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=v0azz4XzW_s","annotations":[]},"HgFb-8lxeMI":{"duration":214.99,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=HgFb-8lxeMI","annotations":[]},"6d9z33GFEGo":{"duration":165.79,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=6d9z33GFEGo","annotations":[{"segment":[47.19461388455539,152.44506786271452],"label":"Preparing pasta"}]},"9JMbahMzBjk":{"duration":52.617,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9JMbahMzBjk","annotations":[{"segment":[0.9343103157107153,51.91158563035246],"label":"Decorating the Christmas tree"}]},"O0nOzufJ_OM":{"duration":181.21,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O0nOzufJ_OM","annotations":[{"segment":[21.900500479719188,36.029855627925116],"label":"Discus throw"},{"segment":[57.78906255616225,67.96219826287052],"label":"Discus throw"},{"segment":[77.57015976365054,87.46070836739469],"label":"Discus throw"},{"segment":[115.43683156084242,122.21892203198128],"label":"Discus throw"},{"segment":[146.23882578393133,160.08559382917315],"label":"Discus throw"},{"segment":[164.32440037363492,167.71544560920435],"label":"Discus throw"}]},"NOXPH0_y4Ww":{"duration":208.747,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=NOXPH0_y4Ww","annotations":[{"segment":[0.2,30.60513884555382],"label":"Making a lemonade"},{"segment":[126.00200780031201,185.25876599063963],"label":"Making a lemonade"}]},"RoAtTRjWKEg":{"duration":6.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RoAtTRjWKEg","annotations":[{"segment":[0,6.729598721911814],"label":"Using the balance beam"}]},"ox2AGCcE9a0":{"duration":40.06,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ox2AGCcE9a0","annotations":[{"segment":[14.247125990639626,33.618218346333855],"label":"Clean and jerk"}]},"KpOSIph2hHw":{"duration":62.694,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KpOSIph2hHw","annotations":[{"segment":[5.4071954760328635,57.91750388724782],"label":"River tubing"}]},"xftFhOCEqFs":{"duration":166.79,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=xftFhOCEqFs","annotations":[{"segment":[12.745198127925116,25.750502340093608],"label":"Pole vault"},{"segment":[28.351563182527304,33.03347269890796],"label":"Pole vault"},{"segment":[33.81379095163807,36.414851794071765],"label":"Pole vault"},{"segment":[36.9350639625585,81.67331045241811],"label":"Pole vault"},{"segment":[86.61532605304212,141.23760374414977],"label":"Pole vault"}]},"wohWWAB47KM":{"duration":222.215,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wohWWAB47KM","annotations":[{"segment":[0,221.961],"label":"Wakeboarding"}]},"USaP2UCPBEc":{"duration":146.17,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=USaP2UCPBEc","annotations":[{"segment":[24.37480031201248,27.56402652106084],"label":"Drum corps"}]},"K3Z3z8t-RIQ":{"duration":203.756,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K3Z3z8t-RIQ","annotations":[{"segment":[9.853990894329652,196.4440755351014],"label":"Rope skipping"}]},"c8VbTNlH47w":{"duration":63.019,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c8VbTNlH47w","annotations":[]},"Ir_Ul8FaXs4":{"duration":37.733,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ir_Ul8FaXs4","annotations":[{"segment":[12.072025159774316,16.891431468185534],"label":"Building sandcastles"}]},"9IwS7pfJXu4":{"duration":211.09,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=9IwS7pfJXu4","annotations":[{"segment":[0.01,211.09256235827664],"label":"Preparing salad"}]},"QXAs-KJj7K8":{"duration":126.062,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=QXAs-KJj7K8","annotations":[{"segment":[41.08300936037441,51.30461934477379],"label":"Croquet"},{"segment":[59.363965678627146,68.02381591263651],"label":"Croquet"},{"segment":[75.69124024960999,81.1960577223089],"label":"Croquet"},{"segment":[104.98473322932918,122.67878939157566],"label":"Croquet"}]},"5i2Vf0seCWI":{"duration":141.6,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=5i2Vf0seCWI","annotations":[]},"8w9pf_Ecm_U":{"duration":100.079,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8w9pf_Ecm_U","annotations":[{"segment":[44.722378504672896,88.82144859813084],"label":"Baton twirling"}]},"hZRPw0IZ7Os":{"duration":27.702,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=hZRPw0IZ7Os","annotations":[]},"DWsO49YhWUI":{"duration":64.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DWsO49YhWUI","annotations":[{"segment":[25.752102964118567,45.31571606864275],"label":"Washing face"}]},"G72bQBMZmzs":{"duration":197.626,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=G72bQBMZmzs","annotations":[{"segment":[19.3628946659464,175.80282755563934],"label":"Sumo"}]},"PLvg8riZZVw":{"duration":159.034,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PLvg8riZZVw","annotations":[{"segment":[0.2,152.29488317757009],"label":"Ice fishing"}]},"bIxkfJuPGIU":{"duration":100.57,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=bIxkfJuPGIU","annotations":[]},"NwfS7eZiMLg":{"duration":134.72,"subset":"training","resolution":"810x1440","url":"https://www.youtube.com/watch?v=NwfS7eZiMLg","annotations":[{"segment":[69.79507365,128.6871099843994],"label":"Playing bagpipes"},{"segment":[16.3045261,58.672836149999995],"label":"Playing bagpipes"}]},"tP0viuKibJU":{"duration":6.22,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=tP0viuKibJU","annotations":[{"segment":[1.3785623619344773,5.028840165366614],"label":"Tumbling"}]},"ykPa3jGYdAM":{"duration":107.42,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ykPa3jGYdAM","annotations":[]},"D0pVkTEYQg8":{"duration":21.41,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=D0pVkTEYQg8","annotations":[{"segment":[0.01,20.82845748829953],"label":"Spinning"}]},"ZY8UyWtoMWg":{"duration":37.57,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZY8UyWtoMWg","annotations":[{"segment":[19.258048361934478,24.760347893915757],"label":"Roof shingle removal"},{"segment":[31.784560062402495,33.189402496099845],"label":"Roof shingle removal"}]},"aYbZ5DFWN0c":{"duration":234.15,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aYbZ5DFWN0c","annotations":[]},"-fWbEt6saQ0":{"duration":176.82,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-fWbEt6saQ0","annotations":[]},"cU95GxjPb3A":{"duration":46.58,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=cU95GxjPb3A","annotations":[]},"BgXRQDT0byI":{"duration":70.427,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=BgXRQDT0byI","annotations":[]},"CqscMsSNiNY":{"duration":235.27,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=CqscMsSNiNY","annotations":[{"segment":[28.995127581974028,227.55669647425896],"label":"Windsurfing"}]},"Hcoc9euI0tk":{"duration":69.242,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Hcoc9euI0tk","annotations":[{"segment":[0.2,69.00099999999999],"label":"Skiing"}]},"phEu4AcX3go":{"duration":218.27,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=phEu4AcX3go","annotations":[]},"8vHbM54afHA":{"duration":157.989,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=8vHbM54afHA","annotations":[]},"oT42qP2zIXo":{"duration":155.156,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oT42qP2zIXo","annotations":[{"segment":[0.2,154.95573696145127],"label":"Sharpening knives"}]},"SXRSA62VbZg":{"duration":197.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SXRSA62VbZg","annotations":[{"segment":[85.69931848361934,176.02270091419655],"label":"Getting a tattoo"}]},"KoP6ZgL6fMY":{"duration":172.64,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=KoP6ZgL6fMY","annotations":[{"segment":[10.0614592,86.56592286973478],"label":"Long jump"}]},"0F8F-ON083s":{"duration":66.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0F8F-ON083s","annotations":[{"segment":[4.579912636505461,32.37165522620905],"label":"Plataform diving"},{"segment":[32.996188767550706,62.869709828393134],"label":"Plataform diving"}]},"diY-XBT5cD4":{"duration":115.868,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=diY-XBT5cD4","annotations":[]},"-DpnaHTk8PA":{"duration":209.607,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=-DpnaHTk8PA","annotations":[{"segment":[17.715649759108405,130.33020583387477],"label":"Arm wrestling"},{"segment":[134.5736528743732,206.71225256284674],"label":"Arm wrestling"}]},"-OH1BDqao9w":{"duration":208.562,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=-OH1BDqao9w","annotations":[{"segment":[58.24107994523863,95.33316437963641],"label":"Hand car wash"}]},"tAgVokWkdnQ":{"duration":207.03,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tAgVokWkdnQ","annotations":[{"segment":[60.05420592823713,85.88397191887677],"label":"Drum corps"}]},"jt-Vnap5KP4":{"duration":50.67,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=jt-Vnap5KP4","annotations":[]},"wsmMniNThlI":{"duration":140.9,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wsmMniNThlI","annotations":[{"segment":[1.098603744149766,140.18183775351014],"label":"Horseback riding"}]},"JyjONoyBr4Q":{"duration":172.339,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JyjONoyBr4Q","annotations":[{"segment":[6.448960998439938,169.84235881435256],"label":"Disc dog"}]},"VIROYxBPp70":{"duration":37.85,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=VIROYxBPp70","annotations":[{"segment":[0.01,37.801],"label":"Doing kickboxing"}]},"dEUFGAwRhEM":{"duration":213.252,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dEUFGAwRhEM","annotations":[{"segment":[15.965129485179407,58.538808112324496],"label":"Canoeing"},{"segment":[83.15171606864274,109.09505148205928],"label":"Canoeing"},{"segment":[139.02966926677067,210.89253354134166],"label":"Canoeing"}]},"J2TQsCCFPF8":{"duration":61.88,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J2TQsCCFPF8","annotations":[]},"Kofjs0rkwxc":{"duration":131.43,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Kofjs0rkwxc","annotations":[]},"dSOVaLk3ZmU":{"duration":8.36,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=dSOVaLk3ZmU","annotations":[{"segment":[0,8.359182],"label":"Kayaking"}]},"60Y0DfZhlHM":{"duration":169.901,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=60Y0DfZhlHM","annotations":[{"segment":[5.020353137486762,169.10676200239513],"label":"Fixing the roof"}]},"E9EU08OD-M0":{"duration":38.197,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E9EU08OD-M0","annotations":[]},"LChqFAESyCg":{"duration":140.74,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=LChqFAESyCg","annotations":[{"segment":[23.07737975442248,115.36696951143182],"label":"Putting on makeup"}]},"ZT63nK4lexg":{"duration":93.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZT63nK4lexg","annotations":[{"segment":[41.94828237129485,79.97752106084243],"label":"Using parallel bars"}]},"r_jT3a-_K4I":{"duration":222.123,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=r_jT3a-_K4I","annotations":[]},"JKFxKhCQPUs":{"duration":45.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JKFxKhCQPUs","annotations":[{"segment":[9.1525389,40.600006205148205],"label":"Using the pommel horse"}]},"I7Sole68Gag":{"duration":96.43,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=I7Sole68Gag","annotations":[{"segment":[0.01,95.37938530421216],"label":"Playing water polo"}]},"zE8dA1_QKZg":{"duration":203.291,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zE8dA1_QKZg","annotations":[]},"D4lIN0Fn3MM":{"duration":182.695,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=D4lIN0Fn3MM","annotations":[]},"sf6q25u_nac":{"duration":155.46,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=sf6q25u_nac","annotations":[]},"6y_gnZgf0N8":{"duration":11.564,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6y_gnZgf0N8","annotations":[{"segment":[0.01,11.521],"label":"Doing a powerbomb"}]},"TSO5Phe2ZM4":{"duration":125.23,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TSO5Phe2ZM4","annotations":[{"segment":[2.5387722308892355,121.47048673946958],"label":"Playing squash"}]},"JLipYEVwKTg":{"duration":28.54,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=JLipYEVwKTg","annotations":[{"segment":[16.071721689547584,22.21548233541342],"label":"Smoking hookah"}]},"4mfnfTpcfck":{"duration":112.014,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4mfnfTpcfck","annotations":[{"segment":[24.63232605304212,97.48112012480499],"label":"Drum corps"}]},"v0mUPr68x2U":{"duration":204.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=v0mUPr68x2U","annotations":[{"segment":[71.31691731669267,75.13746645865834],"label":"Swimming"},{"segment":[88.19100936037441,154.09548205928238],"label":"Swimming"},{"segment":[173.21520124804994,201.23530733229327],"label":"Swimming"}]},"z9l32VOM6wY":{"duration":203.361,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z9l32VOM6wY","annotations":[{"segment":[10.462235569422777,162.95724492979718],"label":"Sharpening knives"}]},"_MWyhJS4KbM":{"duration":66.57,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=_MWyhJS4KbM","annotations":[{"segment":[27.812146645865834,41.195435257410296],"label":"Cleaning windows"}]},"Gl8HTsP7Jfk":{"duration":150.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Gl8HTsP7Jfk","annotations":[{"segment":[41.306438629999995,127.82954070999999],"label":"Playing lacrosse"}]},"owLQzBg4olo":{"duration":30.326,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=owLQzBg4olo","annotations":[{"segment":[5.629806273010921,30.325259],"label":"Knitting"}]},"Cx6oTdEq40w":{"duration":87.05,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Cx6oTdEq40w","annotations":[{"segment":[0.01,67.49555406978183],"label":"Rock climbing"},{"segment":[77.95261174256494,86.7799980897195],"label":"Rock climbing"}]},"9GP266ETAuE":{"duration":145.32999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9GP266ETAuE","annotations":[{"segment":[14.84598537129485,131.80061821996878],"label":"Hammer throw"}]},"LSCQ1yqocHg":{"duration":24.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LSCQ1yqocHg","annotations":[{"segment":[7.403832871682214,14.193876489753467],"label":"Washing hands"}]},"rdkPwRWW91s":{"duration":142.85,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rdkPwRWW91s","annotations":[{"segment":[12.034092775955937,136.1635309173167],"label":"Windsurfing"}]},"-ZBsdK10Trs":{"duration":49.25,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-ZBsdK10Trs","annotations":[{"segment":[8.605220692667707,41.71995389391575],"label":"Dodgeball"}]},"kanXjG-zLyU":{"duration":146.031,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kanXjG-zLyU","annotations":[]},"P5Y-b-lcBs0":{"duration":105.744,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P5Y-b-lcBs0","annotations":[{"segment":[0.2,87.48933862470544],"label":"Playing pool"}]},"w8rTULZCkzk":{"duration":185.2,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=w8rTULZCkzk","annotations":[{"segment":[22.82525148673947,164.1106689173167],"label":"Cleaning shoes"}]},"BD_ON66t6rM":{"duration":48.739,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BD_ON66t6rM","annotations":[{"segment":[1.748794436131187,48.205897232449296],"label":"Canoeing"}]},"QwQqY5VDzIk":{"duration":181.163,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=QwQqY5VDzIk","annotations":[]},"BOOX9aGlSEs":{"duration":222.819,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BOOX9aGlSEs","annotations":[{"segment":[45.54079944763897,56.64215770309068],"label":"Skiing"}]},"1RKExOpIGas":{"duration":215.25,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1RKExOpIGas","annotations":[{"segment":[5.707358814352574,201.45491419656787],"label":"Doing motocross"}]},"a68fUj833qg":{"duration":103.93,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a68fUj833qg","annotations":[{"segment":[21.402639800312013,85.44841799063963],"label":"Mixing drinks"}]},"a8IdfA5fXJs":{"duration":155.551,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a8IdfA5fXJs","annotations":[]},"xH7qxBIz-tU":{"duration":190.66,"subset":"training","resolution":"1280x544","url":"https://www.youtube.com/watch?v=xH7qxBIz-tU","annotations":[{"segment":[4.164159175890846,178.4639644305772],"label":"Tai chi"}]},"MOOeHWuuxlo":{"duration":49.738,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=MOOeHWuuxlo","annotations":[{"segment":[7.230644043360916,42.91936834242633],"label":"Elliptical trainer"}]},"pIk9qMEyEd4":{"duration":108.832,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pIk9qMEyEd4","annotations":[{"segment":[10.014444617784712,100.99312792511701],"label":"Assembling bicycle"}]},"XHQLkc06XIc":{"duration":189.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XHQLkc06XIc","annotations":[{"segment":[20.981927756021804,137.12133068724108],"label":"Playing violin"}]},"PgS2F7NxNBg":{"duration":175.171,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PgS2F7NxNBg","annotations":[{"segment":[8.566086709102738,167.8843795439937],"label":"Painting fence"}]},"DwaoxjXwC1M":{"duration":31.998,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=DwaoxjXwC1M","annotations":[]},"GjcRJzPjbRI":{"duration":206.45,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=GjcRJzPjbRI","annotations":[]},"1_YFTTzzLrI":{"duration":28.909,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=1_YFTTzzLrI","annotations":[{"segment":[4.2393621653666145,7.486533185647427],"label":"Playing rubik cube"},{"segment":[8.74932191575663,28.708842],"label":"Playing rubik cube"}]},"TNqc2yWZztE":{"duration":111.03999999999999,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TNqc2yWZztE","annotations":[]},"kzeKQBi6Rhg":{"duration":187.08,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=kzeKQBi6Rhg","annotations":[{"segment":[23.911078230000005,70.76414475],"label":"Playing violin"},{"segment":[79.28016291,123.45739851634882],"label":"Playing violin"}]},"LTmYxVYBa90":{"duration":35.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LTmYxVYBa90","annotations":[{"segment":[0.01,35.549749],"label":"Camel ride"}]},"6LOfAoP4Mg8":{"duration":106.51,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6LOfAoP4Mg8","annotations":[{"segment":[8.557350228703026,63.889342969637156],"label":"Hanging wallpaper"}]},"e0E2EVuhYu0":{"duration":168.809,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e0E2EVuhYu0","annotations":[{"segment":[0,168.761],"label":"Scuba diving"}]},"pULb9WvZwU4":{"duration":118.21000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pULb9WvZwU4","annotations":[{"segment":[79.2655694227769,113.36819812792513],"label":"Bungee jumping"}]},"xf9iLflgRro":{"duration":205.752,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=xf9iLflgRro","annotations":[{"segment":[60.325280029653385,148.7596351698403],"label":"Fixing the roof"}]},"MnaDFXhhEgs":{"duration":26.146,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=MnaDFXhhEgs","annotations":[]},"YJTz7mpI-mY":{"duration":47.51,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=YJTz7mpI-mY","annotations":[{"segment":[14.37022464898596,46.518046801872075],"label":"Shaving"}]},"1H2bRd91sZw":{"duration":120.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1H2bRd91sZw","annotations":[{"segment":[44.88836154,108.93275751],"label":"Making a sandwich"}]},"mz2-lgoNz1M":{"duration":143.64,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mz2-lgoNz1M","annotations":[{"segment":[54.11649171681153,75.85272234426793],"label":"Getting a tattoo"}]},"ZGSi-QAkf5A":{"duration":123.35,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ZGSi-QAkf5A","annotations":[{"segment":[51.46516380655226,53.004308892355695],"label":"Cricket"},{"segment":[62.04678627145086,63.008751950078],"label":"Cricket"},{"segment":[72.62840873634946,81.09370670826834],"label":"Cricket"},{"segment":[86.48071450858033,94.17643993759751],"label":"Cricket"}]},"1pzRdAVCrDU":{"duration":84.71000000000001,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1pzRdAVCrDU","annotations":[]},"We0DJIKCBOw":{"duration":24.96,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=We0DJIKCBOw","annotations":[{"segment":[10.2163698,24.216965031201248],"label":"Clean and jerk"},{"segment":[0,9.900751165366614],"label":"Clean and jerk"}]},"NvRH4SoF09c":{"duration":65.78,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=NvRH4SoF09c","annotations":[]},"GGv0sCOf_tM":{"duration":17.42,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GGv0sCOf_tM","annotations":[{"segment":[1.2089952985874537,17.41496598639456],"label":"Hand washing clothes"}]},"RUG5nCMr11w":{"duration":24.38,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=RUG5nCMr11w","annotations":[]},"6aHGUTy1iho":{"duration":81.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6aHGUTy1iho","annotations":[{"segment":[8.87453198127925,38.243486739469574],"label":"Javelin throw"},{"segment":[39.09582473,74.50776131045241],"label":"Javelin throw"}]},"vNzTWjEFr-Y":{"duration":69.52,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vNzTWjEFr-Y","annotations":[{"segment":[25.08217628705148,68.20401716068643],"label":"Playing guitarra"}]},"1oaJBEdY6ao":{"duration":39.29,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=1oaJBEdY6ao","annotations":[{"segment":[16.83571450858034,37.382032761310455],"label":"Mowing the lawn"}]},"7JoYkshshVI":{"duration":75.001,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=7JoYkshshVI","annotations":[{"segment":[10.296474117468101,12.16856032064412],"label":"Shuffleboard"},{"segment":[15.093695013106649,21.29498056112721],"label":"Shuffleboard"},{"segment":[40.71787409672387,42.47295487675507],"label":"Shuffleboard"},{"segment":[49.25926722620904,56.39659573166926],"label":"Shuffleboard"},{"segment":[66.81007502652106,68.33114503588143],"label":"Shuffleboard"}]},"vWB4t0Vl64A":{"duration":9.1,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=vWB4t0Vl64A","annotations":[]},"nt9tMdFVQ1c":{"duration":14.931,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nt9tMdFVQ1c","annotations":[{"segment":[0.2,14.729954648526078],"label":"Playing beach volleyball"}]},"my4UPLGI6w4":{"duration":76.533,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=my4UPLGI6w4","annotations":[{"segment":[19.470815446621906,72.0205677830705],"label":"Blowing leaves"}]},"vbLxVuLLPek":{"duration":60.19,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=vbLxVuLLPek","annotations":[{"segment":[6.051361120904836,47.83342536],"label":"Playing bagpipes"}]},"pYvqbfVY-s8":{"duration":212.6,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=pYvqbfVY-s8","annotations":[{"segment":[15.413915756630264,200.71238689547582],"label":"Playing water polo"}]},"zEdAxKm9SLA":{"duration":122.555,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zEdAxKm9SLA","annotations":[{"segment":[74.34148049921997,96.52590483619345],"label":"Trimming branches or hedges"}]},"-SYV9IB-36s":{"duration":220.17000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-SYV9IB-36s","annotations":[{"segment":[13.39577512324493,197.15833130109203],"label":"Putting on makeup"}]},"VuEy38XdUgA":{"duration":99.126,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VuEy38XdUgA","annotations":[{"segment":[0,98.92600907029478],"label":"Shuffleboard"}]},"MP31A6fHsh4":{"duration":150.768,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MP31A6fHsh4","annotations":[{"segment":[2.8420735569643876,16.248826186253233],"label":"Curling"},{"segment":[18.836094237519504,20.717743729349515],"label":"Curling"},{"segment":[32.007640680329594,150.76716553287983],"label":"Curling"},{"segment":[38.35820771525589,45.17918712313969],"label":"Curling"},{"segment":[48.707279920320964,60.702795430737304],"label":"Curling"},{"segment":[81.1657336543887,84.22341407861246],"label":"Curling"},{"segment":[98.33578526733757,122.79722866112775],"label":"Curling"},{"segment":[127.26614620422403,133.14630039559316],"label":"Curling"},{"segment":[139.9672797793685,145.84743442055415],"label":"Curling"}]},"3ZUy7h-dN38":{"duration":190.92,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=3ZUy7h-dN38","annotations":[{"segment":[0,184.32970202808113],"label":"Playing guitarra"}]},"SK7VENBeVqE":{"duration":89.26,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SK7VENBeVqE","annotations":[]},"Kic33dtLja8":{"duration":210.65,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Kic33dtLja8","annotations":[]},"nh1GBPeyyMo":{"duration":136.84,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=nh1GBPeyyMo","annotations":[{"segment":[1.4943000767649754,132.35229251346922],"label":"Wrapping presents"}]},"xfVx1-emCV8":{"duration":90.558,"subset":"training","resolution":"642x480","url":"https://www.youtube.com/watch?v=xfVx1-emCV8","annotations":[{"segment":[54.56639719626168,76.56215420560748],"label":"Curling"}]},"xE5KichXWrA":{"duration":110.81,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=xE5KichXWrA","annotations":[{"segment":[13.304823152886115,105.05626593447738],"label":"Hammer throw"}]},"6X1N1i7lfLs":{"duration":179.96,"subset":"testing","resolution":"640x352","url":"https://www.youtube.com/watch?v=6X1N1i7lfLs","annotations":[]},"xVPTVGpOkGE":{"duration":29.75,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xVPTVGpOkGE","annotations":[{"segment":[14.941985522620904,27.192557503900154],"label":"Using parallel bars"}]},"5yGj9JooT_Q":{"duration":52.52,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5yGj9JooT_Q","annotations":[{"segment":[0.6964878385335415,48.87705831591264],"label":"Archery"}]},"K4cNyYu2LLg":{"duration":190.85,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=K4cNyYu2LLg","annotations":[{"segment":[36.62076644698441,53.293635723660245],"label":"Brushing teeth"},{"segment":[81.87569734081882,88.72348293659638],"label":"Brushing teeth"},{"segment":[128.9170062371295,135.7647917878315],"label":"Brushing teeth"}]},"SBJWn8AjK-4":{"duration":60.303,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=SBJWn8AjK-4","annotations":[{"segment":[9.595669572542901,60.302198],"label":"Swinging at the playground"}]},"eAFFf5cSX5c":{"duration":85.71000000000001,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=eAFFf5cSX5c","annotations":[{"segment":[0,85.704874],"label":"Cumbia"}]},"ZpwXqrCxcGg":{"duration":50.666,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZpwXqrCxcGg","annotations":[{"segment":[17.993519825375632,30.383793411214953],"label":"Plastering"}]},"R2izNBHhctM":{"duration":71.216,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R2izNBHhctM","annotations":[{"segment":[0.01,68.59770487580433],"label":"Clipping cat claws"}]},"0Lw6MFCE4xw":{"duration":215.575,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0Lw6MFCE4xw","annotations":[]},"P1Ve5WrfTb8":{"duration":78.95,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=P1Ve5WrfTb8","annotations":[{"segment":[41.00744021,64.88969718],"label":"Dodgeball"},{"segment":[0,36.43088262],"label":"Dodgeball"}]},"eLh51DwDp-E":{"duration":203.988,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eLh51DwDp-E","annotations":[]},"SKdouCRLoKE":{"duration":93.089,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=SKdouCRLoKE","annotations":[{"segment":[1.742688580343214,64.04380532761311],"label":"Bullfighting"},{"segment":[78.56621016380656,83.50382780811232],"label":"Bullfighting"}]},"_i_ERtwvcJE":{"duration":31.719,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_i_ERtwvcJE","annotations":[{"segment":[3.463794056162247,27.80931799375975],"label":"Beer pong"}]},"vfI78g82VfE":{"duration":157.618,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vfI78g82VfE","annotations":[]},"uevUOX7Wpz4":{"duration":38.313,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uevUOX7Wpz4","annotations":[{"segment":[0.009999999999999998,38.312403],"label":"Canoeing"}]},"QJm_B5Hx4DI":{"duration":200.83,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=QJm_B5Hx4DI","annotations":[{"segment":[114.98344041516764,131.58867725429016],"label":"Applying sunscreen"}]},"RmuhtC-TCck":{"duration":176.32999999999998,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RmuhtC-TCck","annotations":[{"segment":[53.078346333853354,57.49005304212169],"label":"Breakdancing"},{"segment":[70.72517316692668,74.86114820592823],"label":"Breakdancing"},{"segment":[99.95273010920437,118.426751950078],"label":"Breakdancing"},{"segment":[137.72796879875196,165.02540405616224],"label":"Breakdancing"}]},"0VVNybUx7DE":{"duration":224.98,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0VVNybUx7DE","annotations":[{"segment":[55.42782986,165.921275],"label":"Cricket"}]},"5Foo5NSjEXQ":{"duration":120.02,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=5Foo5NSjEXQ","annotations":[{"segment":[39.20885647425897,99.47258814352574],"label":"Preparing pasta"}]},"xr0tV2KNqgI":{"duration":98.291,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=xr0tV2KNqgI","annotations":[{"segment":[28.350023400936035,88.57647113884555],"label":"Bullfighting"}]},"sSasqxIi5UU":{"duration":72.26,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=sSasqxIi5UU","annotations":[]},"8ErMpNsK3_c":{"duration":32.996,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8ErMpNsK3_c","annotations":[{"segment":[0.2,13.413201248049923],"label":"Using the monkey bar"}]},"YDSSJ6Tp47g":{"duration":106.836,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=YDSSJ6Tp47g","annotations":[{"segment":[0.01,106.83501133786848],"label":"Rollerblading"}]},"zOj_cfNTppg":{"duration":49.97,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zOj_cfNTppg","annotations":[]},"tY9dmG6IyPo":{"duration":8.66,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=tY9dmG6IyPo","annotations":[]},"SibfKtVX3CQ":{"duration":234.104,"subset":"validation","resolution":"450x360","url":"https://www.youtube.com/watch?v=SibfKtVX3CQ","annotations":[{"segment":[11.32170213067026,14.973864108305827],"label":"Beach soccer"},{"segment":[24.469485250158304,43.0955113360997],"label":"Beach soccer"},{"segment":[51.13026768689795,91.66926563865276],"label":"Beach soccer"},{"segment":[100.43445438497812,110.66050792235771],"label":"Beach soccer"},{"segment":[118.69526427315594,144.6256143143685],"label":"Beach soccer"},{"segment":[154.12123545622097,173.84291013545305],"label":"Beach soccer"},{"segment":[182.24288268401483,191.00807143034018],"label":"Beach soccer"},{"segment":[199.408043978902,210.36452991180872],"label":"Beach soccer"}]},"z6l9utYlIrw":{"duration":26.053,"subset":"testing","resolution":"544x360","url":"https://www.youtube.com/watch?v=z6l9utYlIrw","annotations":[]},"b39hqdpOn7k":{"duration":84.52,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=b39hqdpOn7k","annotations":[{"segment":[13.443154446177845,45.73308424336973],"label":"Playing squash"},{"segment":[51.2685007800312,74.85991887675506],"label":"Playing squash"}]},"xootaiXcXZw":{"duration":171.898,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xootaiXcXZw","annotations":[{"segment":[6.959292834890966,166.77477102803738],"label":"Capoeira"}]},"SY5WpDrtmEE":{"duration":186.85,"subset":"training","resolution":"400x300","url":"https://www.youtube.com/watch?v=SY5WpDrtmEE","annotations":[{"segment":[10.202447745709827,181.02057285959438],"label":"Playing flauta"}]},"LHe5cTzHB3o":{"duration":62.3,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LHe5cTzHB3o","annotations":[{"segment":[0.9968568332348762,48.54604239398254],"label":"Doing kickboxing"}]},"DW7Zm9DzEDk":{"duration":23.92,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DW7Zm9DzEDk","annotations":[{"segment":[0.2,23.014397035881434],"label":"Long jump"}]},"_uNQ1KITuGE":{"duration":166.395,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=_uNQ1KITuGE","annotations":[]},"IVz9QyL_O1I":{"duration":5.109,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IVz9QyL_O1I","annotations":[]},"0q9yZPTBbus":{"duration":119.885,"subset":"testing","resolution":"722x480","url":"https://www.youtube.com/watch?v=0q9yZPTBbus","annotations":[]},"pf9u7M6vafU":{"duration":19.528,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pf9u7M6vafU","annotations":[{"segment":[0.2,15.367274622125462],"label":"Knitting"}]},"0ynXG7NB3fM":{"duration":186.39,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0ynXG7NB3fM","annotations":[]},"_B2kFN9MfwU":{"duration":141.155,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_B2kFN9MfwU","annotations":[{"segment":[66.7233909601282,73.77008571499324],"label":"Applying sunscreen"},{"segment":[96.89205287939409,102.17707394554287],"label":"Applying sunscreen"}]},"bD7VxDypbPQ":{"duration":80.3,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bD7VxDypbPQ","annotations":[]},"V4tqzi5uw9Y":{"duration":17.253,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=V4tqzi5uw9Y","annotations":[{"segment":[0.2,12.488494851794071],"label":"Cutting the grass"}]},"JHuLY-ygFkc":{"duration":128.569,"subset":"validation","resolution":"293x240","url":"https://www.youtube.com/watch?v=JHuLY-ygFkc","annotations":[{"segment":[8.424170566822674,120.74644301716067],"label":"Playing blackjack"}]},"61W153jXL3U":{"duration":208.63,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=61W153jXL3U","annotations":[{"segment":[23.486469195231702,45.2594599257613],"label":"Washing face"},{"segment":[74.18179089616629,131.70148282607283],"label":"Washing face"},{"segment":[195.7205749740479,197.02045501766162],"label":"Washing face"}]},"f0NDjR9C28o":{"duration":28.282,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=f0NDjR9C28o","annotations":[]},"t8_XO-nWaYg":{"duration":74.931,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=t8_XO-nWaYg","annotations":[{"segment":[0.01,54.06472901716068],"label":"Playing rubik cube"}]},"xQljKBB3498":{"duration":90.23,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=xQljKBB3498","annotations":[{"segment":[34.76988035984024,52.084436166562305],"label":"Washing face"}]},"1MQkI4UafqE":{"duration":79.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1MQkI4UafqE","annotations":[{"segment":[1.9242971918876755,69.58506942277691],"label":"Tumbling"}]},"Kq115s7yL3I":{"duration":141.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Kq115s7yL3I","annotations":[{"segment":[55.67415199465121,60.10151994651214],"label":"Cleaning windows"},{"segment":[62.53657232003565,66.0784666815244],"label":"Cleaning windows"},{"segment":[84.00930688656116,90.87172721194561],"label":"Cleaning windows"},{"segment":[115.88635613995987,120.97782928459996],"label":"Cleaning windows"}]},"1J3lfO_RmoM":{"duration":186.619,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1J3lfO_RmoM","annotations":[]},"klqHoK_8ar8":{"duration":125.179,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=klqHoK_8ar8","annotations":[{"segment":[8.006722737909516,123.03012987519502],"label":"Painting fence"}]},"7xLL5okHSIU":{"duration":127.478,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7xLL5okHSIU","annotations":[{"segment":[25.853481482059284,33.21178005772231],"label":"Raking leaves"},{"segment":[40.768951567862715,48.32612307800312],"label":"Raking leaves"},{"segment":[59.06526153978159,60.05962621216849],"label":"Raking leaves"},{"segment":[62.8438472948518,63.440466098283935],"label":"Raking leaves"},{"segment":[67.41792478783152,68.4122894602184],"label":"Raking leaves"}]},"zlXs92vjuSU":{"duration":219.85,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zlXs92vjuSU","annotations":[{"segment":[140.27607121372856,201.32531492043682],"label":"Getting a piercing"}]},"pwaSQyDNyWs":{"duration":115.33,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=pwaSQyDNyWs","annotations":[{"segment":[0.01,115.33351473922903],"label":"Doing step aerobics"}]},"Fy3Gt8X6XUY":{"duration":233.22,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Fy3Gt8X6XUY","annotations":[]},"JFYuFs_akM4":{"duration":74.58,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=JFYuFs_akM4","annotations":[]},"2eep4G5Tgho":{"duration":26.843,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2eep4G5Tgho","annotations":[{"segment":[0,26.64183673469388],"label":"Cutting the grass"}]},"xMNmGr96O9U":{"duration":179.978,"subset":"testing","resolution":"658x480","url":"https://www.youtube.com/watch?v=xMNmGr96O9U","annotations":[]},"qyssgQ2xq2M":{"duration":195.54,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qyssgQ2xq2M","annotations":[{"segment":[27.4494383775351,195.481],"label":"Doing nails"}]},"28P5f3p_32g":{"duration":139.529,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=28P5f3p_32g","annotations":[{"segment":[0.01,139.501],"label":"Sharpening knives"}]},"DIF8qouivIU":{"duration":222.703,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DIF8qouivIU","annotations":[{"segment":[0,222.7024036281179],"label":"Doing fencing"}]},"nhW0O7NC0GM":{"duration":69.24,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=nhW0O7NC0GM","annotations":[{"segment":[55.63118859594385,68.70181737285492],"label":"Kayaking"}]},"FRjVP5r9rqc":{"duration":80.74,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FRjVP5r9rqc","annotations":[{"segment":[12.123167599999997,67.13287349859594],"label":"Hand washing clothes"}]},"FWKz_eRNswI":{"duration":64.854,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FWKz_eRNswI","annotations":[]},"QLACTCzs0R0":{"duration":47.183,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=QLACTCzs0R0","annotations":[{"segment":[0.01,47.18224489795919],"label":"Tug of war"}]},"AAfFlwaXW3c":{"duration":60.93,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=AAfFlwaXW3c","annotations":[{"segment":[0,54.214712948517935],"label":"Snatch"}]},"OeiFr5qqFJ4":{"duration":206.19,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OeiFr5qqFJ4","annotations":[]},"kJrHvEECvEA":{"duration":70.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kJrHvEECvEA","annotations":[{"segment":[32.638590483619346,37.70225039001561],"label":"Smoking a cigarette"},{"segment":[44.63726287051483,46.72877457098284],"label":"Smoking a cigarette"},{"segment":[7.1021443057722315,11.50657488299532],"label":"Smoking a cigarette"},{"segment":[18.113220748829956,21.85698673946958],"label":"Smoking a cigarette"}]},"7lNAmkaMyyg":{"duration":13.654,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7lNAmkaMyyg","annotations":[{"segment":[0,13.653105],"label":"Removing ice from car"}]},"vIQPDP8jW8A":{"duration":98.59,"subset":"testing","resolution":"1920x1440","url":"https://www.youtube.com/watch?v=vIQPDP8jW8A","annotations":[]},"dIj05gBdBbI":{"duration":127.22,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=dIj05gBdBbI","annotations":[]},"1p_HYu0EZAk":{"duration":45.54,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1p_HYu0EZAk","annotations":[{"segment":[10.501073322932918,29.884432137285486],"label":"Washing dishes"}]},"P0N68OQDhNs":{"duration":228.37,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=P0N68OQDhNs","annotations":[{"segment":[89.06718173488845,91.20479409652577],"label":"Long jump"},{"segment":[98.33016863531684,100.82404972389372],"label":"Long jump"},{"segment":[146.78271549909618,148.92032786073347],"label":"Long jump"},{"segment":[155.33316494564545,157.11450858034323],"label":"Long jump"},{"segment":[168.51510784240895,170.2964514771067],"label":"Long jump"},{"segment":[174.9279449273209,177.06555728895825],"label":"Long jump"}]},"yH018Jl5GMQ":{"duration":23.755,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yH018Jl5GMQ","annotations":[{"segment":[0,23.754012],"label":"Hand car wash"}]},"mo8CBVntUjE":{"duration":166.232,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mo8CBVntUjE","annotations":[{"segment":[0,150.3846801872075],"label":"Playing drums"}]},"ZSdnDVGXU3k":{"duration":88.68,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=ZSdnDVGXU3k","annotations":[]},"TbLBu2TDey8":{"duration":118.074,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=TbLBu2TDey8","annotations":[{"segment":[15.472966347225318,109.41597502652107],"label":"Disc dog"}]},"QZi1yBFRZzc":{"duration":122.137,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QZi1yBFRZzc","annotations":[{"segment":[15.23631825273011,122.081],"label":"Trimming branches or hedges"}]},"VNROQWtYhlw":{"duration":212.347,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VNROQWtYhlw","annotations":[{"segment":[13.913487753510141,212.34632653061223],"label":"Having an ice cream"}]},"pAAGm-KEFW0":{"duration":229.065,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=pAAGm-KEFW0","annotations":[{"segment":[0.2,21.798683321482518],"label":"Applying sunscreen"},{"segment":[127.21854528602913,189.0410406076107],"label":"Applying sunscreen"}]},"en6lOb4wRUU":{"duration":181.16,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=en6lOb4wRUU","annotations":[{"segment":[3.9567382314340196,18.08794620084123],"label":"Smoking a cigarette"},{"segment":[24.3056777073804,29.110288416978857],"label":"Smoking a cigarette"},{"segment":[39.56738231434019,70.09079152825977],"label":"Smoking a cigarette"},{"segment":[80.26526126623295,98.63583162646233],"label":"Smoking a cigarette"},{"segment":[114.4627845521984,136.2248448250855],"label":"Smoking a cigarette"},{"segment":[158.83477742901715,169.0092471575663],"label":"Smoking a cigarette"}]},"iBpyYbLv1WU":{"duration":95.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iBpyYbLv1WU","annotations":[{"segment":[3.6325103876755067,91.25755688221528],"label":"Playing guitarra"}]},"r-BJYixThME":{"duration":20.898,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r-BJYixThME","annotations":[{"segment":[0.2,16.243116887850466],"label":"Snow tubing"}]},"t1urvYx1X_w":{"duration":41.1,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=t1urvYx1X_w","annotations":[{"segment":[0.01,30.904635375975033],"label":"Removing ice from car"}]},"tjEMbP2SODQ":{"duration":180.44,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=tjEMbP2SODQ","annotations":[{"segment":[6.049717628705149,170.93969578783154],"label":"Tai chi"}]},"Lr5GuPjfU7Q":{"duration":90.19,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Lr5GuPjfU7Q","annotations":[{"segment":[35.85952418096724,41.48454758190328],"label":"Wrapping presents"},{"segment":[43.73455694227769,54.42210140405617],"label":"Wrapping presents"}]},"0-ReTddxkhg":{"duration":74.955,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0-ReTddxkhg","annotations":[{"segment":[8.416297838038112,74.86075726261043],"label":"Gargling mouthwash"}]},"_I8sqYLhRKM":{"duration":48.205,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_I8sqYLhRKM","annotations":[{"segment":[0.01,46.582786297748314],"label":"Raking leaves"}]},"mfK2jcMGe1Y":{"duration":95.92,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mfK2jcMGe1Y","annotations":[{"segment":[41.486791710000006,90.23618761778472],"label":"Brushing hair"},{"segment":[9.067412130000001,31.297103880000005],"label":"Brushing hair"}]},"BgsxbT9RCeg":{"duration":195.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BgsxbT9RCeg","annotations":[{"segment":[27.460765993759747,175.7489023600624],"label":"Ballet"}]},"3I6LMFdA2lc":{"duration":86.4,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3I6LMFdA2lc","annotations":[{"segment":[0.9433182527301093,84.89864274570984],"label":"Brushing hair"}]},"HWcWElJfEjw":{"duration":172.73,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HWcWElJfEjw","annotations":[{"segment":[98.83782260000001,121.14456222],"label":"Wrapping presents"}]},"40Sf-iICgzw":{"duration":37.269,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=40Sf-iICgzw","annotations":[{"segment":[0.2,35.58457934267913],"label":"Playing congas"}]},"bON69f83fSY":{"duration":129.41,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=bON69f83fSY","annotations":[{"segment":[64.73182839313573,113.12945709828394],"label":"Fixing bicycle"}]},"7rf06_5zNJk":{"duration":237.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7rf06_5zNJk","annotations":[{"segment":[159.69058062,166.8430338],"label":"Belly dance"},{"segment":[53.007546,122.50210272000001],"label":"Belly dance"}]},"ORKAMBnsX64":{"duration":72.33,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=ORKAMBnsX64","annotations":[{"segment":[35.75572074882996,40.831453978159125],"label":"Plataform diving"}]},"jQHGyqk21GI":{"duration":178.214,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jQHGyqk21GI","annotations":[{"segment":[7.784639080801328,8.89673037805866],"label":"Rafting"},{"segment":[12.233004269830658,15.569278161602655],"label":"Rafting"},{"segment":[22.797871593775312,30.026465025947978],"label":"Rafting"},{"segment":[37.25505845812064,42.53749212009297],"label":"Rafting"},{"segment":[48.09794860637963,51.156199563182525],"label":"Rafting"},{"segment":[54.77049627145085,61.72106686427457],"label":"Rafting"},{"segment":[66.16943204368175,89.24532641185647],"label":"Rafting"},{"segment":[98.97612524180967,114.26738054602183],"label":"Rafting"},{"segment":[115.65749466458658,134.00700102964117],"label":"Rafting"},{"segment":[143.73779985959436,153.19057586583463],"label":"Rafting"}]},"5Z5DAI4Grb0":{"duration":139.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5Z5DAI4Grb0","annotations":[{"segment":[21.734746645865837,118.67171668642746],"label":"Painting"}]},"rtbMQGNS5ko":{"duration":193.33,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rtbMQGNS5ko","annotations":[]},"Mv2uecqTSdY":{"duration":6.061,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Mv2uecqTSdY","annotations":[{"segment":[1.8436498900156006,6.013135025897036],"label":"Canoeing"}]},"B2HIj_QWaG0":{"duration":108.78999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B2HIj_QWaG0","annotations":[{"segment":[0,105.90038048049922],"label":"Playing violin"}]},"juIOpLYnW64":{"duration":74.63,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=juIOpLYnW64","annotations":[{"segment":[21.874428524180967,57.594904890795625],"label":"Getting a piercing"}]},"D_xht_r3n54":{"duration":21.758,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D_xht_r3n54","annotations":[{"segment":[0.01,21.757096],"label":"Removing ice from car"}]},"RjFoJggnfj4":{"duration":136.557,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RjFoJggnfj4","annotations":[{"segment":[10.866721329173165,93.96517855226209],"label":"Futsal"},{"segment":[110.87813203725982,127.65853948360508],"label":"Futsal"}]},"1v5HE_Nm99g":{"duration":226.178,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1v5HE_Nm99g","annotations":[{"segment":[0.01,226.201],"label":"Sumo"}]},"oYmE833AO2w":{"duration":178.33,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oYmE833AO2w","annotations":[{"segment":[0,178.301],"label":"Plastering"}]},"pXSlIEoZhj4":{"duration":39.99,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pXSlIEoZhj4","annotations":[]},"2LzWCwYtUzs":{"duration":55.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2LzWCwYtUzs","annotations":[{"segment":[12.931591263650546,20.690546021840877],"label":"Peeling potatoes"}]},"0ERgbWePjWk":{"duration":4.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0ERgbWePjWk","annotations":[{"segment":[0.2,2.5527285491419653],"label":"Springboard diving"}]},"c689s-L_nDM":{"duration":21.154,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c689s-L_nDM","annotations":[]},"Y_nyVFn70Lk":{"duration":119.7,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y_nyVFn70Lk","annotations":[]},"vAHR3iJhBXU":{"duration":93.53,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=vAHR3iJhBXU","annotations":[]},"Vq0-j_C-kZc":{"duration":166.557,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vq0-j_C-kZc","annotations":[{"segment":[9.96013497521944,134.98057422755588],"label":"Bullfighting"}]},"5j6j4NJkUXA":{"duration":67.92,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=5j6j4NJkUXA","annotations":[{"segment":[0.24043609008439681,29.756361501299352],"label":"Using the pommel horse"},{"segment":[44.14405274117475,60.22441353868253],"label":"Using the pommel horse"}]},"dcmfRooP6dg":{"duration":237.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dcmfRooP6dg","annotations":[{"segment":[11.8789781,196.22653871138846],"label":"Painting"}]},"pPsqB3tY7hk":{"duration":98.825,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pPsqB3tY7hk","annotations":[{"segment":[19.579819230864477,26.517550454399128],"label":"Shuffleboard"},{"segment":[46.714056905133354,60.435347547235224],"label":"Shuffleboard"},{"segment":[79.86099497313226,88.57227853186966],"label":"Shuffleboard"}]},"XxyTLG8B-Ns":{"duration":144.173,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XxyTLG8B-Ns","annotations":[{"segment":[28.700004971888962,111.6609656393456],"label":"Riding bumper cars"}]},"RbbjYXAxaPI":{"duration":133.86,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RbbjYXAxaPI","annotations":[{"segment":[5.42970203161868,133.86303854875283],"label":"Belly dance"}]},"insSTTMSC38":{"duration":177.587,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=insSTTMSC38","annotations":[{"segment":[8.031371294851795,95.26868018720748],"label":"Elliptical trainer"},{"segment":[115.49859126365054,119.37624180967238],"label":"Elliptical trainer"},{"segment":[143.47307020280812,166.18502340093602],"label":"Elliptical trainer"}]},"lS_bztQjpGM":{"duration":161.03,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=lS_bztQjpGM","annotations":[{"segment":[14.570611968798753,129.3769855850234],"label":"Removing curlers"}]},"N7ppHQNikv8":{"duration":66.224,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=N7ppHQNikv8","annotations":[{"segment":[0.01,66.22331065759637],"label":"Building sandcastles"}]},"uQDTcusxDCg":{"duration":179.119,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uQDTcusxDCg","annotations":[{"segment":[0.2,7.265321475444052],"label":"Futsal"},{"segment":[14.251207509524871,27.38467325359681],"label":"Futsal"},{"segment":[105.34716139393876,110.93587022120342],"label":"Futsal"}]},"x-rGfBaFQek":{"duration":170.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x-rGfBaFQek","annotations":[{"segment":[0,170.07192743764173],"label":"Shoveling snow"}]},"WGUzmra3h_w":{"duration":10.12,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=WGUzmra3h_w","annotations":[]},"gPVq0g5UZjg":{"duration":156.81,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=gPVq0g5UZjg","annotations":[]},"HYYQ56oZSDM":{"duration":214.25,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HYYQ56oZSDM","annotations":[{"segment":[1.671217733063064,209.5707023775351],"label":"Tango"}]},"gRqvh21b8Hw":{"duration":156.34,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gRqvh21b8Hw","annotations":[{"segment":[19.02420664989865,27.80460971908264],"label":"Plataform diving"},{"segment":[50.73121773306307,91.7064320559217],"label":"Plataform diving"}]},"JAZu9CWEVbQ":{"duration":200.249,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=JAZu9CWEVbQ","annotations":[{"segment":[0,4.9984122031547935],"label":"Surfing"},{"segment":[12.808431270584157,200.248887],"label":"Surfing"}]},"e8gJpLlqzA8":{"duration":195.61,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=e8gJpLlqzA8","annotations":[{"segment":[0.01,193.77360232449297],"label":"Cheerleading"}]},"mZxvYV53uf0":{"duration":225.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mZxvYV53uf0","annotations":[{"segment":[11.256860135629916,217.74988824859116],"label":"Cumbia"}]},"33SI8z8PovA":{"duration":227.51,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=33SI8z8PovA","annotations":[{"segment":[40.46183930733229,103.28416875819033],"label":"Washing face"}]},"Y53B2WdEbAg":{"duration":12.7,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Y53B2WdEbAg","annotations":[{"segment":[0.6364471100000001,12.327623180187208],"label":"Preparing salad"}]},"CtYhHrsnW3c":{"duration":216.57,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CtYhHrsnW3c","annotations":[]},"3knQ7DMA9WI":{"duration":117.192,"subset":"testing","resolution":"360x240","url":"https://www.youtube.com/watch?v=3knQ7DMA9WI","annotations":[]},"bXrdB5AASAM":{"duration":179.096,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bXrdB5AASAM","annotations":[{"segment":[11.176006877009774,100.58406189308795],"label":"Capoeira"},{"segment":[101.42226240886369,171.83110573402527],"label":"Capoeira"}]},"N1-TZnH0jy4":{"duration":15.163,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=N1-TZnH0jy4","annotations":[{"segment":[0,14.736845346333853],"label":"Having an ice cream"}]},"OYPyiUC4h68":{"duration":135.953,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=OYPyiUC4h68","annotations":[{"segment":[2.1209490327613105,135.52864319344772],"label":"River tubing"}]},"_SzFi60-OGA":{"duration":199.51,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=_SzFi60-OGA","annotations":[{"segment":[128.54277096798157,194.83722669723113],"label":"Doing karate"}]},"eH5qh83OcNI":{"duration":121.51,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eH5qh83OcNI","annotations":[]},"T60xwc6nKJI":{"duration":132.26,"subset":"validation","resolution":"360x240","url":"https://www.youtube.com/watch?v=T60xwc6nKJI","annotations":[{"segment":[97.80288243369735,122.7694410296412],"label":"Polishing forniture"}]},"VWGV8I5byKI":{"duration":126.74,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VWGV8I5byKI","annotations":[]},"Th--fToJeyI":{"duration":33.85,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Th--fToJeyI","annotations":[]},"zTHkqpNFGno":{"duration":105.91,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=zTHkqpNFGno","annotations":[]},"G1aYtrFJqUc":{"duration":69.753,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G1aYtrFJqUc","annotations":[{"segment":[23.778795317355716,41.02137942950525],"label":"Mooping floor"},{"segment":[48.50401027062674,53.92620653230899],"label":"Mooping floor"}]},"p8uTRV3u3JM":{"duration":79.83,"subset":"training","resolution":"464x348","url":"https://www.youtube.com/watch?v=p8uTRV3u3JM","annotations":[{"segment":[0.01,78.95842231201247],"label":"Volleyball"}]},"8nj8tlvLyUo":{"duration":190.799,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8nj8tlvLyUo","annotations":[{"segment":[0.01,190.741],"label":"Building sandcastles"}]},"uUzmPV8Vgqg":{"duration":116.03,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uUzmPV8Vgqg","annotations":[{"segment":[13.480122166146646,94.8127542],"label":"Bathing dog"}]},"mqwC7rqeXsk":{"duration":91.208,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mqwC7rqeXsk","annotations":[{"segment":[0.2,83.83192594288697],"label":"Drum corps"}]},"5yrLDF_ZmN0":{"duration":124.529,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5yrLDF_ZmN0","annotations":[{"segment":[0.2,120.5316065318818],"label":"Scuba diving"}]},"pHhcYS_wPys":{"duration":5.02,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=pHhcYS_wPys","annotations":[{"segment":[1.3497238689547584,3.2299750390015602],"label":"Javelin throw"}]},"G5gvb71BB5I":{"duration":211.441,"subset":"training","resolution":"400x200","url":"https://www.youtube.com/watch?v=G5gvb71BB5I","annotations":[{"segment":[25.977960105237663,196.64332591239162],"label":"Beach soccer"}]},"NURIoQoKk1E":{"duration":192.169,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NURIoQoKk1E","annotations":[{"segment":[0.01,191.22183775351016],"label":"Blow-drying hair"}]},"SHfFrzy_fKM":{"duration":78.321,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=SHfFrzy_fKM","annotations":[{"segment":[18.68485647425897,68.40648985959439],"label":"Camel ride"}]},"IuY073Pr4E4":{"duration":42.539,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=IuY073Pr4E4","annotations":[{"segment":[10.585058026299935,35.12916366549123],"label":"Using the monkey bar"}]},"y4Y8gD1mpTw":{"duration":156.94,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=y4Y8gD1mpTw","annotations":[{"segment":[96.58987397455083,114.70812544227769],"label":"Doing nails"}]},"9fnmcyUz5jg":{"duration":75.23,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=9fnmcyUz5jg","annotations":[{"segment":[3.9877597503900155,74.0278174726989],"label":"Kayaking"}]},"Cgquef_qgcs":{"duration":6.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Cgquef_qgcs","annotations":[{"segment":[0.3222464898595944,4.88838],"label":"Pole vault"},{"segment":[5.3438604000000005,5.377488299531982],"label":"Pole vault"}]},"uINj6L2EEUs":{"duration":200.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uINj6L2EEUs","annotations":[{"segment":[47.238154103034866,98.85600461297363],"label":"Chopping wood"},{"segment":[64.44410427301446,81.96288990063003],"label":"Chopping wood"},{"segment":[88.53243451098588,99.48167552824562],"label":"Chopping wood"},{"segment":[133.2679049529328,173.93651327925116],"label":"Chopping wood"}]},"0UR4hXxt68E":{"duration":207.84,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=0UR4hXxt68E","annotations":[{"segment":[12.321355591638632,200.38415044305773],"label":"Cheerleading"}]},"QhDPo3amF6o":{"duration":113.987,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QhDPo3amF6o","annotations":[{"segment":[2.13391745465737,112.20849147581903],"label":"Riding bumper cars"}]},"8kGiDHAnTqY":{"duration":174.057,"subset":"training","resolution":"543x360","url":"https://www.youtube.com/watch?v=8kGiDHAnTqY","annotations":[{"segment":[33.121001560062396,98.54855382215287],"label":"Fun sliding down"},{"segment":[114.83757098283931,118.63834165366613],"label":"Fun sliding down"},{"segment":[164.24758970358815,174.021],"label":"Fun sliding down"}]},"KRGiJIHSd9E":{"duration":236.84,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KRGiJIHSd9E","annotations":[{"segment":[11.636653633385334,225.16001236661464],"label":"Getting a tattoo"}]},"JhoGvKrhs8s":{"duration":96.74000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JhoGvKrhs8s","annotations":[{"segment":[2.8181986,89.50115512792512],"label":"Playing bagpipes"}]},"aj6uPa3mnn4":{"duration":219.174,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=aj6uPa3mnn4","annotations":[]},"DYahQgHG1Gk":{"duration":112.803,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DYahQgHG1Gk","annotations":[{"segment":[20.574659906396256,53.63479719188768],"label":"Playing drums"},{"segment":[62.07568330733229,82.84089079563182],"label":"Playing drums"},{"segment":[92.51445553822153,100.07742433697348],"label":"Playing drums"}]},"al_NNsjwU-Q":{"duration":55.66,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=al_NNsjwU-Q","annotations":[{"segment":[0.01,55.65823129251701],"label":"Using the balance beam"}]},"MBouLt-hXDU":{"duration":234.174,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MBouLt-hXDU","annotations":[{"segment":[61.00085335413416,104.83380187207487],"label":"Making a cake"}]},"WGEKoGRIJGk":{"duration":134.35,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=WGEKoGRIJGk","annotations":[{"segment":[63.48503884399376,91.35140902964119],"label":"Playing flauta"}]},"qfCqVXhNPE8":{"duration":218.38,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qfCqVXhNPE8","annotations":[{"segment":[124.93203576127657,142.2802713874448],"label":"Removing curlers"}]},"PeMJjLrDjhQ":{"duration":189.87,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=PeMJjLrDjhQ","annotations":[{"segment":[28.58410830582883,107.6717447582257],"label":"Washing hands"}]},"IkjulgI7gzg":{"duration":143.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IkjulgI7gzg","annotations":[{"segment":[24.686320592823712,133.26145007800312],"label":"Snatch"}]},"8fLHaW3YvJY":{"duration":213.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8fLHaW3YvJY","annotations":[{"segment":[78.85116614664587,185.54196177847115],"label":"Shoveling snow"}]},"i5kuZ8zlhMg":{"duration":203.06,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=i5kuZ8zlhMg","annotations":[{"segment":[85.67000858034321,93.5877542901716],"label":"Playing water polo"},{"segment":[100.87208034321372,104.67259828393135],"label":"Playing water polo"}]},"7SxEQiFHGm8":{"duration":185.481,"subset":"validation","resolution":"562x480","url":"https://www.youtube.com/watch?v=7SxEQiFHGm8","annotations":[{"segment":[0.01,178.04812081791388],"label":"Futsal"}]},"AOteP9srRpw":{"duration":11.1,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=AOteP9srRpw","annotations":[{"segment":[0.021927794447569086,10.703890411269999],"label":"Windsurfing"}]},"RRFC2NrZSf4":{"duration":52.849,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RRFC2NrZSf4","annotations":[{"segment":[0.2,51.09321028037383],"label":"Playing congas"}]},"WyMTj1vWm10":{"duration":39.61,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WyMTj1vWm10","annotations":[]},"6mes_Rv4QqU":{"duration":2.624,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=6mes_Rv4QqU","annotations":[]},"djQnCU5MNJg":{"duration":8.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=djQnCU5MNJg","annotations":[{"segment":[0.2,8.15918367346939],"label":"Swinging at the playground"}]},"wr_1p7obP-g":{"duration":218.663,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wr_1p7obP-g","annotations":[{"segment":[34.53629482344446,199.72468473777468],"label":"Laying tile"}]},"6Ke30NtYOC0":{"duration":79.99,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6Ke30NtYOC0","annotations":[{"segment":[13.033970358814353,21.88958658346334],"label":"Playing guitarra"},{"segment":[31.618291731669267,69.53529641185648],"label":"Playing guitarra"}]},"4Lu8ECLHvK4":{"duration":124.23,"subset":"validation","resolution":"408x720","url":"https://www.youtube.com/watch?v=4Lu8ECLHvK4","annotations":[{"segment":[0.01,124.22675736961452],"label":"Playing kickball"}]},"P49Ci0Ph8eU":{"duration":54.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P49Ci0Ph8eU","annotations":[{"segment":[7.756515182527302,45.94243608112325],"label":"Using parallel bars"}]},"EhxFw5Eb1VE":{"duration":67.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EhxFw5Eb1VE","annotations":[{"segment":[2.458456826053042,57.17219810374416],"label":"Using parallel bars"}]},"UpnFBPGFgnw":{"duration":122.253,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UpnFBPGFgnw","annotations":[{"segment":[30.888926677067083,36.41842589703588],"label":"Changing car wheel"},{"segment":[52.0535616224649,54.34163026521061],"label":"Changing car wheel"},{"segment":[66.73533541341654,76.84097191887676],"label":"Changing car wheel"}]},"X6CpfuJLx0U":{"duration":100.89,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X6CpfuJLx0U","annotations":[{"segment":[51.5527722,76.7994858],"label":"Playing accordion"},{"segment":[22.942386,40.4609256],"label":"Playing accordion"}]},"UtqKByc2YH8":{"duration":12.307,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UtqKByc2YH8","annotations":[]},"9G172wz5Yqo":{"duration":207.308,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=9G172wz5Yqo","annotations":[]},"GE2q5qDJ-xU":{"duration":122.14,"subset":"validation","resolution":"480x272","url":"https://www.youtube.com/watch?v=GE2q5qDJ-xU","annotations":[{"segment":[0.01,122.1368253968254],"label":"Walking the dog"}]},"jbrcRk6BtXw":{"duration":50.434,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jbrcRk6BtXw","annotations":[]},"YgDcGyka5_o":{"duration":173.27,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YgDcGyka5_o","annotations":[]},"BB2vrBWgXkU":{"duration":196.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BB2vrBWgXkU","annotations":[{"segment":[81.15390898127924,173.81357065366615],"label":"Doing nails"}]},"Zev1KxIua7o":{"duration":168.577,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zev1KxIua7o","annotations":[{"segment":[0.2,163.33341820203975],"label":"Drum corps"}]},"FiFzHgBjryA":{"duration":157.76,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FiFzHgBjryA","annotations":[{"segment":[12.83865423435437,153.0814128013326],"label":"Vacuuming floor"}]},"EETygPQ-j08":{"duration":41.146,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EETygPQ-j08","annotations":[]},"A0XGYLim9IU":{"duration":64.761,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A0XGYLim9IU","annotations":[{"segment":[7.678306500967522,56.47596360062402],"label":"Croquet"}]},"S7CW-si2JjA":{"duration":190.891,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=S7CW-si2JjA","annotations":[]},"-CEi03j4-Bw":{"duration":10.264,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-CEi03j4-Bw","annotations":[{"segment":[0.01,10.263219954648527],"label":"Building sandcastles"}]},"4j7_nh61bvo":{"duration":111.224,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4j7_nh61bvo","annotations":[{"segment":[11.260947753971786,15.072345455316082],"label":"Sumo"},{"segment":[40.19292121417622,46.776243785046724],"label":"Sumo"},{"segment":[78.48014235046729,86.62267367601245],"label":"Sumo"},{"segment":[101.86826424299065,103.77396306386294],"label":"Sumo"}]},"TOBHIXCu4Ic":{"duration":123.62,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TOBHIXCu4Ic","annotations":[{"segment":[7.036641965678627,107.86304602184089],"label":"Cheerleading"}]},"dB4SbhaQQV8":{"duration":22.57,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=dB4SbhaQQV8","annotations":[{"segment":[5.0702813354134175,20.914910508580345],"label":"Playing kickball"}]},"_CYdiU1sMwc":{"duration":24.056,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_CYdiU1sMwc","annotations":[{"segment":[1.7262908274570983,24.0554874],"label":"Blowing leaves"}]},"jmPV_y8YntQ":{"duration":160.984,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jmPV_y8YntQ","annotations":[{"segment":[38.41016068642745,157.17482995319813],"label":"Rope skipping"}]},"IfKGdI5egKc":{"duration":112.55,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IfKGdI5egKc","annotations":[{"segment":[19.752809704224905,98.58846791911077],"label":"Bathing dog"}]},"vSAHWgFjQAY":{"duration":157.99,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=vSAHWgFjQAY","annotations":[{"segment":[17.51770944,110.19663],"label":"Cumbia"}]},"64UBH371Jj8":{"duration":54.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=64UBH371Jj8","annotations":[{"segment":[7.589773572542902,26.14255341653666],"label":"Washing dishes"},{"segment":[36.76823641809673,40.22579993447738],"label":"Washing dishes"},{"segment":[46.04462634009361,48.15289677691108],"label":"Washing dishes"}]},"A-SbpY2MpCM":{"duration":233.802,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A-SbpY2MpCM","annotations":[]},"HOTCR1uIaBM":{"duration":173.988,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HOTCR1uIaBM","annotations":[{"segment":[38.257193447737905,83.30715600624025],"label":"Baking cookies"}]},"vr0Wi359kWM":{"duration":172.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vr0Wi359kWM","annotations":[{"segment":[64.65652455538222,163.2577245023401],"label":"Cleaning shoes"}]},"YnvefOl6a7k":{"duration":145.404,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YnvefOl6a7k","annotations":[{"segment":[36.953702028081125,134.4574305772231],"label":"Decorating the Christmas tree"}]},"K98WGaMR4eM":{"duration":105.395,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K98WGaMR4eM","annotations":[{"segment":[0.01,104.27784978312793],"label":"Doing a powerbomb"}]},"fM5egP_1DvQ":{"duration":142.32,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=fM5egP_1DvQ","annotations":[]},"krUhklOpyQU":{"duration":74.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=krUhklOpyQU","annotations":[{"segment":[36.40027853120125,64.13382407878315],"label":"Doing karate"}]},"aljYWkDQzN8":{"duration":60.326,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aljYWkDQzN8","annotations":[{"segment":[6.39957632449298,59.19608100156006],"label":"Skiing"}]},"YRMbCxetWtg":{"duration":82.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YRMbCxetWtg","annotations":[{"segment":[7.972164960000001,52.10857344],"label":"Playing racquetball"}]},"gr53ih2aFkE":{"duration":134.49,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=gr53ih2aFkE","annotations":[]},"M_OPqcaCDYI":{"duration":190.451,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=M_OPqcaCDYI","annotations":[{"segment":[58.52119074937218,181.33478601925583],"label":"Rafting"}]},"YaC68rE4lwA":{"duration":51.595,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=YaC68rE4lwA","annotations":[]},"TsPh4-BsCYk":{"duration":161.32999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TsPh4-BsCYk","annotations":[{"segment":[43.0018067,114.1542973],"label":"Hammer throw"}]},"c-aDlEaZmFk":{"duration":58.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c-aDlEaZmFk","annotations":[{"segment":[3.627200946957878,51.522989579999994],"label":"Using uneven bars"}]},"L_8Gyi8FMk4":{"duration":19.691,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=L_8Gyi8FMk4","annotations":[{"segment":[0.21502907956318254,2.518912074882995],"label":"Peeling potatoes"}]},"xVQkf_TrX7U":{"duration":139.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xVQkf_TrX7U","annotations":[{"segment":[46.58623088923557,65.55485192],"label":"Making a sandwich"},{"segment":[75.55018252730109,121.9253666146646],"label":"Making a sandwich"}]},"qK2iRSxxRQU":{"duration":236.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qK2iRSxxRQU","annotations":[{"segment":[1.8452768314814227,232.13582363806552],"label":"Belly dance"}]},"8YTuNZ-mIaY":{"duration":225.35,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=8YTuNZ-mIaY","annotations":[{"segment":[44.12071679737938,211.8145954602184],"label":"Paintball"}]},"5-ydqbn30kA":{"duration":30.488,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5-ydqbn30kA","annotations":[{"segment":[0.2,30.207],"label":"Knitting"}]},"MzmOJ324rcM":{"duration":52.454,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=MzmOJ324rcM","annotations":[]},"XkOO6lc5vxw":{"duration":70.427,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XkOO6lc5vxw","annotations":[{"segment":[15.637429797191887,29.903155226209044],"label":"Trimming branches or hedges"},{"segment":[40.328108424336975,47.570707488299526],"label":"Trimming branches or hedges"}]},"QvBG_Da97h8":{"duration":58.143,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QvBG_Da97h8","annotations":[{"segment":[5.801035881435258,53.04334633385336],"label":"Hand car wash"}]},"Tv3v-UpBJ2Y":{"duration":238.26,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Tv3v-UpBJ2Y","annotations":[{"segment":[0.01,238.201],"label":"Tug of war"}]},"b4b6YkxsHk4":{"duration":7.942,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=b4b6YkxsHk4","annotations":[{"segment":[0.2,6.150700468018721],"label":"Shuffleboard"}]},"Pv4FwH2TC6Y":{"duration":35.39,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Pv4FwH2TC6Y","annotations":[]},"5Ra6milBrOM":{"duration":234.963,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5Ra6milBrOM","annotations":[{"segment":[128.58448560507944,222.2601491564813],"label":"Rock-paper-scissors"}]},"o7pWqZoV_LM":{"duration":126.851,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=o7pWqZoV_LM","annotations":[]},"RWjCwIOuzrc":{"duration":207.424,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RWjCwIOuzrc","annotations":[{"segment":[29.032883007369612,44.51708990319171],"label":"Hanging wallpaper"},{"segment":[74.19515312018405,83.87278243007285],"label":"Hanging wallpaper"},{"segment":[164.5196933458129,183.8749503032087],"label":"Hanging wallpaper"}]},"mUmSJps9WJI":{"duration":109.25,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=mUmSJps9WJI","annotations":[]},"reJ45Z3HU9s":{"duration":35.201,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=reJ45Z3HU9s","annotations":[]},"b_mWxzi_aps":{"duration":99.173,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b_mWxzi_aps","annotations":[]},"6uNhwOSZXco":{"duration":161.495,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=6uNhwOSZXco","annotations":[{"segment":[27.204037441497665,51.13351482059283],"label":"Snow tubing"},{"segment":[98.74058034321374,124.93706084243371],"label":"Snow tubing"},{"segment":[128.9672886115445,146.85142433697348],"label":"Snow tubing"}]},"fO2fWjYEhMc":{"duration":26.216,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fO2fWjYEhMc","annotations":[{"segment":[0.2,26.2153287],"label":"Croquet"}]},"XxbuqeqOGaM":{"duration":86.495,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XxbuqeqOGaM","annotations":[{"segment":[17.33926535564824,45.81074387737414],"label":"Sharpening knives"}]},"M1hPI8WvySo":{"duration":223.32999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M1hPI8WvySo","annotations":[{"segment":[46.154166528861154,215.09583269110766],"label":"Wrapping presents"}]},"DCCD1fufXfY":{"duration":52.13,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=DCCD1fufXfY","annotations":[{"segment":[11.873283273010921,29.276588892355694],"label":"Drinking beer"}]},"Z9gstJONME4":{"duration":35.295,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Z9gstJONME4","annotations":[{"segment":[7.601625063618575,11.120062945238512],"label":"Doing crunches"},{"segment":[15.463134705363123,20.575864752092098],"label":"Doing crunches"},{"segment":[25.908497166422315,31.790885499755646],"label":"Doing crunches"}]},"5ICSEb7X5-k":{"duration":59.049,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=5ICSEb7X5-k","annotations":[]},"rgfxN6n4ZLo":{"duration":103.78999999999999,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rgfxN6n4ZLo","annotations":[]},"AG2g8djPKV0":{"duration":44.44,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=AG2g8djPKV0","annotations":[{"segment":[5.4080395881435255,37.85627711700467],"label":"Hand washing clothes"}]},"DyVqsDJYXdM":{"duration":212.18,"subset":"training","resolution":"462x360","url":"https://www.youtube.com/watch?v=DyVqsDJYXdM","annotations":[{"segment":[3.805884023400936,83.89492173322932],"label":"Cricket"},{"segment":[85.21870748049922,207.99983553978157],"label":"Cricket"}]},"IROb83YwQ8Y":{"duration":144.73,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=IROb83YwQ8Y","annotations":[{"segment":[20.09511385625493,107.47499096154324],"label":"Shaving"}]},"61sN9tqZHwk":{"duration":81.757,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=61sN9tqZHwk","annotations":[{"segment":[18.608964118564742,57.35639625585023],"label":"Grooming dog"}]},"Po8gmt7hVTY":{"duration":92.09,"subset":"validation","resolution":"480x270","url":"https://www.youtube.com/watch?v=Po8gmt7hVTY","annotations":[{"segment":[11.636983667101786,84.61954690639627],"label":"Preparing pasta"}]},"3JgFuceFs3c":{"duration":61.719,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3JgFuceFs3c","annotations":[]},"erXZ0pZEtGE":{"duration":61.069,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=erXZ0pZEtGE","annotations":[{"segment":[0.01,61.06848],"label":"Powerbocking"}]},"pMDFkrK0KRc":{"duration":185.087,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pMDFkrK0KRc","annotations":[{"segment":[97.8681448374202,163.20952590328565],"label":"Putting in contact lenses"}]},"E4ItlgO16LI":{"duration":85.53999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E4ItlgO16LI","annotations":[{"segment":[34.02994851014041,78.3356069625585],"label":"Using parallel bars"}]},"uRwr0x--Ai4":{"duration":96.18,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=uRwr0x--Ai4","annotations":[]},"J2gGPC98yec":{"duration":21.9,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=J2gGPC98yec","annotations":[{"segment":[7.093741029641186,21.861],"label":"Shot put"}]},"iYegrB5DT7Y":{"duration":105.72,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iYegrB5DT7Y","annotations":[]},"DozJpP2cClI":{"duration":118.68,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=DozJpP2cClI","annotations":[{"segment":[1.4811400686427458,45.91534212792512],"label":"Playing badminton"}]},"E33xUgVqEH0":{"duration":32.485,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=E33xUgVqEH0","annotations":[{"segment":[0.2,27.27878159126365],"label":"Cleaning sink"}]},"Ca0dN5O5ogM":{"duration":71.56,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Ca0dN5O5ogM","annotations":[]},"IKWlTZnjebw":{"duration":153.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IKWlTZnjebw","annotations":[]},"DbF6oI705UA":{"duration":90.28,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DbF6oI705UA","annotations":[{"segment":[10.844766609712009,14.084112480145466],"label":"Cheerleading"},{"segment":[16.760093851373107,20.562804221012378],"label":"Cheerleading"},{"segment":[38.02710369639276,41.68897294123058],"label":"Cheerleading"},{"segment":[44.22411318765676,59.85747804061823],"label":"Cheerleading"},{"segment":[63.37850616065459,82.11037575924806],"label":"Cheerleading"}]},"ICzctKJ6nqc":{"duration":209.352,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ICzctKJ6nqc","annotations":[{"segment":[30.404294193802592,41.813312885391376],"label":"Hula hoop"},{"segment":[95.9246586797839,124.28421942744745],"label":"Hula hoop"}]},"DAd-joUpMIk":{"duration":100.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DAd-joUpMIk","annotations":[{"segment":[10.1855286,79.33454575507021],"label":"Bungee jumping"}]},"SqEHpHNuy-w":{"duration":117.052,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=SqEHpHNuy-w","annotations":[{"segment":[71.03427410920436,74.86902926677068],"label":"Arm wrestling"}]},"RIaNmXzBQnE":{"duration":168.624,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=RIaNmXzBQnE","annotations":[{"segment":[0.01,156.25937473946956],"label":"Having an ice cream"}]},"44FeihJUKvM":{"duration":59.1,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=44FeihJUKvM","annotations":[{"segment":[0.01,59.09478458049887],"label":"Drinking coffee"}]},"aQHGZzqZLxs":{"duration":120.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aQHGZzqZLxs","annotations":[{"segment":[6.289272230889235,118.74521450858033],"label":"Starting a campfire"}]},"0e7d39SwHCA":{"duration":49.436,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0e7d39SwHCA","annotations":[]},"SIj-ti_70HQ":{"duration":18.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SIj-ti_70HQ","annotations":[{"segment":[2.5797441497659905,17.773154446177845],"label":"Brushing hair"}]},"5co1E0umtJQ":{"duration":188.361,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5co1E0umtJQ","annotations":[{"segment":[16.74439469578783,175.10033697347893],"label":"Scuba diving"}]},"aPEqCGdCsp0":{"duration":213.25,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aPEqCGdCsp0","annotations":[{"segment":[0,209.92519675819034],"label":"Getting a tattoo"}]},"ZkIGGQ9iOSA":{"duration":48.67,"subset":"validation","resolution":"854x478","url":"https://www.youtube.com/watch?v=ZkIGGQ9iOSA","annotations":[{"segment":[0.20000000000000004,45.30500024906282],"label":"Doing crunches"}]},"LvaU_4xbxb0":{"duration":72.26,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LvaU_4xbxb0","annotations":[]},"wPCQfs0Rgx0":{"duration":96.247,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wPCQfs0Rgx0","annotations":[{"segment":[6.906910907956319,80.9309778127925],"label":"Baking cookies"}]},"b7_ok8iTlT8":{"duration":167.18,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b7_ok8iTlT8","annotations":[{"segment":[65.9777425897036,167.141],"label":"Preparing salad"}]},"ykcLgz3DlYg":{"duration":133.167,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ykcLgz3DlYg","annotations":[{"segment":[68.14132864898595,109.06767542901716],"label":"Making a cake"}]},"gsQ-OEvrxjo":{"duration":57.632,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gsQ-OEvrxjo","annotations":[]},"2Ua6E2a6kT4":{"duration":44.676,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=2Ua6E2a6kT4","annotations":[{"segment":[10.095257410296412,38.98858034321373],"label":"Painting fence"}]},"apPeChgjzs4":{"duration":25.891,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=apPeChgjzs4","annotations":[{"segment":[0.2,25.690248],"label":"Playing congas"}]},"awag66wjX4k":{"duration":161.217,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=awag66wjX4k","annotations":[{"segment":[26.399227769110766,143.57933073322934],"label":"Assembling bicycle"}]},"Lbdp-HCjWw8":{"duration":73.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Lbdp-HCjWw8","annotations":[{"segment":[5.935469592933377,10.729502711388456],"label":"Playing squash"},{"segment":[17.121546879875194,19.860994380655228],"label":"Playing squash"},{"segment":[22.02972365210608,40.977568865834634],"label":"Playing squash"},{"segment":[43.71701636661467,52.962651681747275],"label":"Playing squash"},{"segment":[58.44154668330734,65.51845272698908],"label":"Playing squash"}]},"yEFS8M4pgMU":{"duration":161.727,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yEFS8M4pgMU","annotations":[{"segment":[0.01,161.661],"label":"Tug of war"}]},"FmaW2KK4wWU":{"duration":215.342,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FmaW2KK4wWU","annotations":[{"segment":[161.4226639968455,204.62499525994403],"label":"Hanging wallpaper"}]},"1JEQbXJC85w":{"duration":223.86,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1JEQbXJC85w","annotations":[{"segment":[51.70339848,193.61363696879874],"label":"Preparing salad"}]},"vth3IYGHu5k":{"duration":161.05,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vth3IYGHu5k","annotations":[{"segment":[15.83481903276131,106.06815288611546],"label":"High jump"}]},"9ddD5Ob93J0":{"duration":112.455,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=9ddD5Ob93J0","annotations":[{"segment":[25.08725965678627,98.7701201872075],"label":"Changing car wheel"}]},"r96PJ548pn8":{"duration":143.035,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r96PJ548pn8","annotations":[{"segment":[0.01,0.02],"label":"Doing crunches"}]},"OwSdSL_4sxU":{"duration":36.22,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=OwSdSL_4sxU","annotations":[{"segment":[1.5257791825273013,35.997086639625586],"label":"Playing water polo"}]},"zdWZ4wn1_fQ":{"duration":143.059,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zdWZ4wn1_fQ","annotations":[]},"s69uPXLvzIg":{"duration":92.207,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=s69uPXLvzIg","annotations":[{"segment":[9.637802411856475,92.20643990929705],"label":"Welding"}]},"D9EQ6jCH0_M":{"duration":210.442,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D9EQ6jCH0_M","annotations":[{"segment":[102.1023426406444,170.3894399694355],"label":"Painting furniture"}]},"_mupI51ie9A":{"duration":69.15,"subset":"training","resolution":"240x176","url":"https://www.youtube.com/watch?v=_mupI51ie9A","annotations":[{"segment":[0.2,68.94902494331066],"label":"Playing racquetball"},{"segment":[24.48803154602184,28.695226393135727],"label":"Playing racquetball"},{"segment":[34.3048195226209,47.03428085491419],"label":"Playing racquetball"},{"segment":[59.11648144149766,68.949023],"label":"Playing racquetball"}]},"hCFCYe_Jp-k":{"duration":99.057,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hCFCYe_Jp-k","annotations":[{"segment":[13.135394313731732,98.747257900156],"label":"Baton twirling"}]},"F1-PnXa9SwQ":{"duration":33.066,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=F1-PnXa9SwQ","annotations":[{"segment":[11.967441310452418,32.86521541950113],"label":"Cutting the grass"}]},"uRBE-ANigvs":{"duration":98.5,"subset":"training","resolution":"512x288","url":"https://www.youtube.com/watch?v=uRBE-ANigvs","annotations":[{"segment":[8.758841152886115,22.742254221528864],"label":"Discus throw"},{"segment":[34.26704521216848,49.3261054399376],"label":"Discus throw"},{"segment":[56.240980034321375,76.37094829797192],"label":"Discus throw"}]},"nGOEmEDesPI":{"duration":208.632,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nGOEmEDesPI","annotations":[{"segment":[21.156059303596635,31.896827873114923],"label":"Snowboarding"},{"segment":[43.61402994895306,64.7700892525497],"label":"Snowboarding"},{"segment":[73.5579908094283,99.59621764462416],"label":"Snowboarding"},{"segment":[111.3134197204623,139.6299914037378],"label":"Snowboarding"},{"segment":[145.81407027709682,164.0408290617339],"label":"Snowboarding"},{"segment":[179.66376385647425,195.93765551014042],"label":"Snowboarding"}]},"TM4-Miytfv4":{"duration":24.08,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=TM4-Miytfv4","annotations":[{"segment":[3.4101575663026527,23.12161778471139],"label":"High jump"}]},"UfjR8ewF8xo":{"duration":213.76,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UfjR8ewF8xo","annotations":[{"segment":[22.66394383775351,71.99135101404056],"label":"Layup drill in basketball"},{"segment":[71.65805772230888,197.97621528861154],"label":"Layup drill in basketball"}]},"q53Ajkll_kw":{"duration":208.1,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=q53Ajkll_kw","annotations":[{"segment":[93.36896196000001,202.48361934477379],"label":"Playing bagpipes"},{"segment":[4.542901716068643,91.83151326053043],"label":"Playing bagpipes"}]},"WnLYDJ8Jwv8":{"duration":134.33,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=WnLYDJ8Jwv8","annotations":[{"segment":[91.75519188767552,134.261],"label":"Washing face"}]},"mDqiAEB4Ads":{"duration":186.317,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mDqiAEB4Ads","annotations":[{"segment":[58.242141636126746,80.55380952881725],"label":"Laying tile"},{"segment":[101.41666781808631,141.11405095183437],"label":"Laying tile"},{"segment":[167.48238573410495,172.40833838573795],"label":"Laying tile"}]},"yfjnahzAPSc":{"duration":207.73,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yfjnahzAPSc","annotations":[{"segment":[14.906957736812874,200.2717351825273],"label":"Cricket"}]},"UwGJfYcU5nk":{"duration":186.83,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=UwGJfYcU5nk","annotations":[]},"yWCEDAQvhzA":{"duration":171.13,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=yWCEDAQvhzA","annotations":[{"segment":[6.138631825273011,93.69181123244931],"label":"Doing karate"},{"segment":[102.50044305772231,154.28452106084242],"label":"Doing karate"}]},"j_r7zJWxI8w":{"duration":34.62,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=j_r7zJWxI8w","annotations":[]},"5Q_FrGFVGNY":{"duration":143.2,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5Q_FrGFVGNY","annotations":[{"segment":[96.72764568,97.93956716000001],"label":"Cumbia"},{"segment":[22.7575102,138.28412604290173],"label":"Cumbia"}]},"lZmfH8ksz8A":{"duration":160.358,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lZmfH8ksz8A","annotations":[]},"FtbrPGaINt0":{"duration":85.17,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FtbrPGaINt0","annotations":[{"segment":[13.6680816,45.6110901],"label":"Plataform diving"}]},"Jdu5AqaWfz8":{"duration":110.13,"subset":"testing","resolution":"1200x720","url":"https://www.youtube.com/watch?v=Jdu5AqaWfz8","annotations":[]},"61iCOJ3Meus":{"duration":7.152,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=61iCOJ3Meus","annotations":[{"segment":[1.1045598395364387,4.317824827278805],"label":"Doing a powerbomb"}]},"P1gGM89_T2g":{"duration":63.955,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P1gGM89_T2g","annotations":[{"segment":[0.01,63.941],"label":"Scuba diving"}]},"QylENMzsW9w":{"duration":64.506,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=QylENMzsW9w","annotations":[{"segment":[2.6164288367453064,61.687341420187416],"label":"Mooping floor"}]},"kUQ4bTeoG-Y":{"duration":137.161,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kUQ4bTeoG-Y","annotations":[{"segment":[60.34175109045178,63.55141860046801],"label":"Running a marathon"}]},"029IFU5pBqI":{"duration":222.076,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=029IFU5pBqI","annotations":[]},"5ayMRPi7Lg4":{"duration":21.46,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=5ayMRPi7Lg4","annotations":[{"segment":[1.6386955,21.365],"label":"Springboard diving"}]},"SIf4H2dqbpg":{"duration":100.566,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SIf4H2dqbpg","annotations":[{"segment":[0.01,100.565645],"label":"Decorating the Christmas tree"}]},"OJJMEEsOdPQ":{"duration":12.38,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=OJJMEEsOdPQ","annotations":[{"segment":[0,12.376234],"label":"Using uneven bars"}]},"j8i-9T0UeRQ":{"duration":133.701,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j8i-9T0UeRQ","annotations":[{"segment":[46.1079293660286,56.52074080420719],"label":"Hurling"},{"segment":[83.59405054347152,86.92615020368868],"label":"Hurling"},{"segment":[92.96558083783228,105.25269686574887],"label":"Hurling"}]},"tFjGMdff3WM":{"duration":123.089,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tFjGMdff3WM","annotations":[{"segment":[17.907232115292476,27.493544654233283],"label":"Welding"},{"segment":[36.88813094239528,43.79027597043266],"label":"Welding"},{"segment":[55.67730351871926,60.853912289747306],"label":"Welding"},{"segment":[72.93266608881272,76.76719110438906],"label":"Welding"},{"segment":[88.46249240189684,93.44737492214605],"label":"Welding"},{"segment":[104.5674974673174,109.55237998756661],"label":"Welding"}]},"mi_zRluISrE":{"duration":164.932,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mi_zRluISrE","annotations":[{"segment":[18.783132081745855,143.57517236817472],"label":"Laying tile"}]},"xZUa0OQ8l0Q":{"duration":194.096,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xZUa0OQ8l0Q","annotations":[]},"-2dxp-mv2zo":{"duration":126.456,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-2dxp-mv2zo","annotations":[{"segment":[41.82315891419657,50.10887907644306],"label":"Baking cookies"}]},"_QyQSAtMdj8":{"duration":183.972,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_QyQSAtMdj8","annotations":[{"segment":[24.675828393135724,99.56409828393134],"label":"Snowboarding"}]},"x18x9BKMAlk":{"duration":205.89,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x18x9BKMAlk","annotations":[{"segment":[14.611344230109205,27.456482014820594],"label":"Tai chi"},{"segment":[46.40306024726989,55.07352825195008],"label":"Tai chi"},{"segment":[173.56992431591266,188.34183276833073],"label":"Tai chi"}]},"9ntzCNcFTKM":{"duration":92.56,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9ntzCNcFTKM","annotations":[{"segment":[2.021476859074363,88.22302396255851],"label":"Doing karate"}]},"MmOVjM5-D-U":{"duration":93.159,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=MmOVjM5-D-U","annotations":[{"segment":[0.2,67.28918188923556],"label":"Grooming dog"}]},"81LATpIw6zE":{"duration":40.31,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=81LATpIw6zE","annotations":[]},"qX7UMKN6ygM":{"duration":87.633,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=qX7UMKN6ygM","annotations":[{"segment":[0.19999999999999998,77.75883177570094],"label":"Beach soccer"}]},"kgzplQb0__4":{"duration":196.674,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=kgzplQb0__4","annotations":[]},"F4eF_nO2UFs":{"duration":215.017,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F4eF_nO2UFs","annotations":[{"segment":[26.830702028081124,203.2425678627145],"label":"Assembling bicycle"}]},"dLRZeBxrGIU":{"duration":67.11,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dLRZeBxrGIU","annotations":[]},"28_jzEjO7lM":{"duration":122.323,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=28_jzEjO7lM","annotations":[]},"Qo3riKtRg2c":{"duration":10.821,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Qo3riKtRg2c","annotations":[{"segment":[5.806943787831512,9.301238450858033],"label":"Playing ten pins"}]},"sWEbq5Ry63Q":{"duration":234.2,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=sWEbq5Ry63Q","annotations":[{"segment":[71.61077483099324,227.6199613151326],"label":"Fixing bicycle"}]},"YODfHuzK2As":{"duration":165.837,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=YODfHuzK2As","annotations":[{"segment":[24.569726989079566,62.070889235569425],"label":"Throwing darts"},{"segment":[69.8297503900156,72.67466614664586],"label":"Throwing darts"},{"segment":[112.77571918876754,116.65561778471138],"label":"Throwing darts"},{"segment":[123.12211544461779,126.22603432137284],"label":"Throwing darts"},{"segment":[136.5724305772231,140.96964898595942],"label":"Throwing darts"},{"segment":[147.69480655226207,151.83336505460218],"label":"Throwing darts"}]},"ZOczr9WCVig":{"duration":226.186,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZOczr9WCVig","annotations":[{"segment":[0.01,217.71645752418098],"label":"Laying tile"}]},"Gg8Ha8OPtZ0":{"duration":21.433,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gg8Ha8OPtZ0","annotations":[{"segment":[1.4690234009360374,12.987502340093602],"label":"Playing ten pins"}]},"6EKVtCJ2nrs":{"duration":62.09,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=6EKVtCJ2nrs","annotations":[{"segment":[21.6700309,51.3416001],"label":"Bungee jumping"},{"segment":[4.667305300000001,16.9418774],"label":"Bungee jumping"}]},"ZJCbT3300OA":{"duration":124.877,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZJCbT3300OA","annotations":[{"segment":[27.46895037098284,123.90249954570982],"label":"Snow tubing"}]},"Uw_0h2UrfyY":{"duration":139.04,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=Uw_0h2UrfyY","annotations":[{"segment":[19.07183775351014,117.91560686427458],"label":"Ballet"}]},"NbdAUGbrWrE":{"duration":130.82,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=NbdAUGbrWrE","annotations":[{"segment":[0,127.75990736661466],"label":"Brushing hair"}]},"rhDxMg72ofo":{"duration":214.785,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rhDxMg72ofo","annotations":[{"segment":[34.177842638377534,193.00428784024962],"label":"Blow-drying hair"}]},"orwTrxIwCpo":{"duration":132.192,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=orwTrxIwCpo","annotations":[{"segment":[0,40.111058896777635],"label":"Hand car wash"},{"segment":[43.20445675514095,58.05276647528486],"label":"Hand car wash"},{"segment":[83.41862891386404,91.25523625975039],"label":"Hand car wash"},{"segment":[105.07241327535102,129.20091642199688],"label":"Hand car wash"}]},"3dHFCjBbGKA":{"duration":60.233,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=3dHFCjBbGKA","annotations":[]},"sN1MPx9hb1I":{"duration":36.827,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sN1MPx9hb1I","annotations":[{"segment":[0.2,8.894968798751949],"label":"Knitting"},{"segment":[9.583611544461776,34.432137285491415],"label":"Knitting"}]},"79gvzh0wn0Q":{"duration":90.77,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=79gvzh0wn0Q","annotations":[{"segment":[0,60.32239816848674],"label":"Doing karate"}]},"lq20hEghHtU":{"duration":187.548,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lq20hEghHtU","annotations":[{"segment":[0.2,152.54646546539837],"label":"Camel ride"}]},"18QVPZgjy8w":{"duration":132.38,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=18QVPZgjy8w","annotations":[{"segment":[62.731662639999996,74.5806384],"label":"Playing badminton"},{"segment":[86.5209536,112.09864432],"label":"Playing badminton"}]},"kTStFRAp99E":{"duration":10.75,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=kTStFRAp99E","annotations":[{"segment":[2.730698907956318,9.731659906396255],"label":"Shaving legs"}]},"r3dM-5cZ7e8":{"duration":67.269,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r3dM-5cZ7e8","annotations":[{"segment":[16.47598829797192,67.268207],"label":"Swimming"}]},"v1Iv-ySc2LM":{"duration":39.405,"subset":"training","resolution":"432x720","url":"https://www.youtube.com/watch?v=v1Iv-ySc2LM","annotations":[{"segment":[0.01,39.404263],"label":"Mooping floor"}]},"vlwOSDuiKQA":{"duration":201.97,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=vlwOSDuiKQA","annotations":[{"segment":[9.294900753853282,190.46669510862068],"label":"Playing racquetball"}]},"PMDpnLURLp8":{"duration":91.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PMDpnLURLp8","annotations":[{"segment":[1.242593,82.05007020280812],"label":"Pole vault"}]},"045Tkq12H_c":{"duration":231.92000000000002,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=045Tkq12H_c","annotations":[{"segment":[6.14919968798752,113.21761778471138],"label":"Rock climbing"},{"segment":[138.53785179407177,145.77220436817473],"label":"Rock climbing"},{"segment":[183.7525553822153,202.20015444617783],"label":"Rock climbing"},{"segment":[210.1579422776911,217.0305772230889],"label":"Rock climbing"}]},"95UgspVYJSM":{"duration":46.557,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=95UgspVYJSM","annotations":[{"segment":[0,31.44891091575663],"label":"Decorating the Christmas tree"}]},"72_TVgyRzC0":{"duration":171.062,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=72_TVgyRzC0","annotations":[]},"yzN9jN3qncA":{"duration":150.35,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yzN9jN3qncA","annotations":[{"segment":[0.01,146.10467706708266],"label":"Playing piano"}]},"BSwChZlOb0k":{"duration":30.98,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=BSwChZlOb0k","annotations":[]},"ODx-nocNBos":{"duration":174.429,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ODx-nocNBos","annotations":[{"segment":[36.72610764430577,46.79178159126365],"label":"Sumo"},{"segment":[82.42970826833073,91.40720124804993],"label":"Sumo"},{"segment":[103.10514664586583,118.88377067082682],"label":"Sumo"},{"segment":[134.93443993759752,145.54420436817472],"label":"Sumo"}]},"MXN34b2OUjQ":{"duration":24.85,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=MXN34b2OUjQ","annotations":[{"segment":[8.39785845,22.334707488299532],"label":"Playing field hockey"},{"segment":[5.45720425,6.7911323],"label":"Playing field hockey"}]},"n9EdHGz7ArI":{"duration":110.551,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=n9EdHGz7ArI","annotations":[]},"RX8YG2zlyUo":{"duration":85.47,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RX8YG2zlyUo","annotations":[{"segment":[28.602671606864273,75.22435647425897],"label":"Horseback riding"}]},"H5Z__A99EG4":{"duration":118.49000000000001,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=H5Z__A99EG4","annotations":[{"segment":[30.870621796300423,32.53430756942278],"label":"Smoking hookah"},{"segment":[64.88376111856475,68.39598750390014],"label":"Smoking hookah"}]},"apdhN0kHZwo":{"duration":154.901,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=apdhN0kHZwo","annotations":[]},"n0Th8ZqMeGk":{"duration":214.901,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=n0Th8ZqMeGk","annotations":[{"segment":[0.2,2.3468093009434665],"label":"Snowboarding"},{"segment":[11.063529561590627,214.900679],"label":"Snowboarding"}]},"b4xvXZq5uD8":{"duration":215.71,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=b4xvXZq5uD8","annotations":[]},"03c6QhTMDSs":{"duration":161.91,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=03c6QhTMDSs","annotations":[]},"EwEV5_sHGJk":{"duration":204.07999999999998,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=EwEV5_sHGJk","annotations":[{"segment":[70.04311950078004,189.11642265210608],"label":"Ironing clothes"}]},"E2oIRY0ZjJQ":{"duration":39.87,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=E2oIRY0ZjJQ","annotations":[]},"lIXHKxbq59c":{"duration":71.634,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lIXHKxbq59c","annotations":[{"segment":[23.004190327613106,69.92546957878315],"label":"Snow tubing"}]},"HX3ltWwf2xk":{"duration":42.098,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HX3ltWwf2xk","annotations":[]},"_rX3VLTtzNg":{"duration":217.409,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_rX3VLTtzNg","annotations":[]},"UhNgPK81rKM":{"duration":32.9,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=UhNgPK81rKM","annotations":[{"segment":[0.6968643600000001,32.363106096107266],"label":"Playing bagpipes"}]},"Zkz4ef53YjA":{"duration":43.68,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zkz4ef53YjA","annotations":[{"segment":[19.69190445273011,36.67430044],"label":"Preparing pasta"},{"segment":[8.926088177535101,11.924163595943838],"label":"Preparing pasta"}]},"xwSeXFkTNlE":{"duration":162.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xwSeXFkTNlE","annotations":[{"segment":[19.219463338533544,94.32710296411855],"label":"Using the pommel horse"}]},"UojTppgtyCE":{"duration":171.526,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UojTppgtyCE","annotations":[{"segment":[70.6255600624025,159.17503120124806],"label":"Trimming branches or hedges"}]},"Ck0IhZZI5CA":{"duration":80.667,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ck0IhZZI5CA","annotations":[{"segment":[0.01,80.66612244897959],"label":"Painting fence"}]},"0NrS_wVpo-o":{"duration":122.068,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0NrS_wVpo-o","annotations":[]},"cYLIJC6QjQA":{"duration":236.728,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cYLIJC6QjQA","annotations":[{"segment":[11.445383775351015,21.784990639625587],"label":"Slacklining"},{"segment":[62.031837753510146,67.20115756630265],"label":"Slacklining"},{"segment":[94.52470514820594,96.74012792511701],"label":"Slacklining"},{"segment":[120.74054134165367,223.75770046801873],"label":"Slacklining"}]},"ACki-MP9qdQ":{"duration":81.32,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ACki-MP9qdQ","annotations":[{"segment":[27.52410402,48.9461248049922],"label":"Mixing drinks"},{"segment":[56.42752730109205,59.34400624024961],"label":"Mixing drinks"}]},"F6FyJl5OQ0A":{"duration":182.945,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F6FyJl5OQ0A","annotations":[{"segment":[19.405035881435257,169.81262870514823],"label":"Wakeboarding"}]},"Paus1tL8KjE":{"duration":199.135,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Paus1tL8KjE","annotations":[{"segment":[59.646937714243265,193.23122351014038],"label":"Waterskiing"}]},"LoA7h0bZmDI":{"duration":188.5,"subset":"training","resolution":"378x218","url":"https://www.youtube.com/watch?v=LoA7h0bZmDI","annotations":[{"segment":[42.09559668,186.14702101404058],"label":"Preparing pasta"}]},"OtcRXishIxw":{"duration":84.103,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OtcRXishIxw","annotations":[{"segment":[0.01,84.102675],"label":"Hitting a pinata"}]},"Mkljhl3D9-Q":{"duration":148.1,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Mkljhl3D9-Q","annotations":[{"segment":[66.40801482059283,74.26148439937597],"label":"Preparing pasta"}]},"KHOTebb8UC8":{"duration":148.144,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KHOTebb8UC8","annotations":[{"segment":[85.9739649994163,128.72983324960998],"label":"Elliptical trainer"}]},"yrCBN0tYjPE":{"duration":52.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yrCBN0tYjPE","annotations":[{"segment":[16.460214240000003,49.87392577379095],"label":"Using parallel bars"}]},"fVzpG-QQ1n8":{"duration":179.932,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=fVzpG-QQ1n8","annotations":[{"segment":[13.468530421216848,19.080418096723868],"label":"Clipping cat claws"},{"segment":[62.57254758190327,72.11275663026521],"label":"Clipping cat claws"}]},"4o4Vh49v-q4":{"duration":89.351,"subset":"training","resolution":"482x360","url":"https://www.youtube.com/watch?v=4o4Vh49v-q4","annotations":[{"segment":[12.534066025387657,85.34837666302374],"label":"Gargling mouthwash"}]},"jBlG0L_M96g":{"duration":20.922,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jBlG0L_M96g","annotations":[]},"xZOzVaTezm8":{"duration":80.968,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=xZOzVaTezm8","annotations":[{"segment":[45.825776911076446,67.41312636505461],"label":"Snow tubing"}]},"dZMMhVx7yY4":{"duration":164.026,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dZMMhVx7yY4","annotations":[{"segment":[31.721753510140406,40.675474258970354],"label":"Rock-paper-scissors"},{"segment":[45.280244929797185,52.69904212168487],"label":"Rock-paper-scissors"},{"segment":[69.83902184087363,77.76946021840874],"label":"Rock-paper-scissors"},{"segment":[98.74674882995319,104.11898127925117],"label":"Rock-paper-scissors"}]},"5QbiJmDyoM0":{"duration":97.69,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5QbiJmDyoM0","annotations":[{"segment":[3.962316816482183,84.73262115246514],"label":"Using the balance beam"}]},"P1oGwMXGYVI":{"duration":37.27,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=P1oGwMXGYVI","annotations":[]},"rZmNsUX-7SU":{"duration":16.417,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rZmNsUX-7SU","annotations":[{"segment":[0.01,16.416],"label":"Swinging at the playground"}]},"c1T9LbXOVUs":{"duration":105.07,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=c1T9LbXOVUs","annotations":[{"segment":[1.802232449297972,4.587500780031202],"label":"Rock-paper-scissors"},{"segment":[6.553572542901716,8.519644305772232],"label":"Rock-paper-scissors"},{"segment":[12.124109204368175,13.926341653666146],"label":"Rock-paper-scissors"},{"segment":[22.282146645865836,24.739736349453977],"label":"Rock-paper-scissors"},{"segment":[28.835719188767552,29.32723712948518],"label":"Rock-paper-scissors"},{"segment":[46.202686427457095,48.16875819032761],"label":"Rock-paper-scissors"},{"segment":[57.99911700468018,59.47367082683307],"label":"Rock-paper-scissors"},{"segment":[81.42813884555382,83.72188923556942],"label":"Rock-paper-scissors"}]},"tC5eMgePakk":{"duration":231.23,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tC5eMgePakk","annotations":[]},"dJgea9sOlBY":{"duration":20.713,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=dJgea9sOlBY","annotations":[{"segment":[0.2,20.301590868595518],"label":"Doing crunches"}]},"KPv8ZGrb78Y":{"duration":200.82999999999998,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KPv8ZGrb78Y","annotations":[{"segment":[1.5665318857652266,194.56325851170047],"label":"Zumba"}]},"XgcPVh41wxA":{"duration":231.82999999999998,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=XgcPVh41wxA","annotations":[]},"wTVZdpVurlc":{"duration":172.78,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=wTVZdpVurlc","annotations":[]},"SXZ5bETIdGk":{"duration":47.346,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=SXZ5bETIdGk","annotations":[]},"bz4QwqHiVlo":{"duration":184.37,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bz4QwqHiVlo","annotations":[]},"QBy64z68s0w":{"duration":97.1,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QBy64z68s0w","annotations":[{"segment":[8.941215288611545,89.10906084243369],"label":"Using the balance beam"}]},"pD-zyfLtC6w":{"duration":93.07,"subset":"training","resolution":"440x330","url":"https://www.youtube.com/watch?v=pD-zyfLtC6w","annotations":[{"segment":[3.7748797659906397,10.889076248049921],"label":"Doing motocross"},{"segment":[12.486140764430576,22.213715546021838],"label":"Doing motocross"},{"segment":[24.10115542901716,34.264293260530415],"label":"Doing motocross"},{"segment":[38.47473607644305,72.30346628705148],"label":"Doing motocross"},{"segment":[73.90053080343213,90.01636365054601],"label":"Doing motocross"}]},"vr7iD3pjmSU":{"duration":79.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vr7iD3pjmSU","annotations":[{"segment":[14.096811299999999,52.6269315],"label":"Archery"}]},"AFnEr0_wStU":{"duration":180.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AFnEr0_wStU","annotations":[{"segment":[15.167812410455602,46.626978891400555],"label":"Getting a piercing"},{"segment":[57.30062466172116,72.46843707217676],"label":"Getting a piercing"}]},"VbhCWRXsEHQ":{"duration":184.251,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VbhCWRXsEHQ","annotations":[{"segment":[57.200879153533485,160.10497241653667],"label":"Baking cookies"}]},"4mzM3JjBJ74":{"duration":166.139,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4mzM3JjBJ74","annotations":[{"segment":[46.64302652106084,117.38495007800311],"label":"Clipping cat claws"}]},"90LkAH4tZC4":{"duration":161.449,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=90LkAH4tZC4","annotations":[{"segment":[0.2,152.409267464373],"label":"Playing drums"}]},"6e8GkCHe0Cw":{"duration":6.53,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=6e8GkCHe0Cw","annotations":[]},"iVgPP9Aq1Gs":{"duration":209.189,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iVgPP9Aq1Gs","annotations":[]},"mNiFmkm5OZA":{"duration":61.77,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=mNiFmkm5OZA","annotations":[]},"bRVazNYfpGE":{"duration":94.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bRVazNYfpGE","annotations":[{"segment":[50.939366029999995,90.07335381669267],"label":"Using the balance beam"},{"segment":[4.057358280031202,39.212018070000006],"label":"Using the balance beam"}]},"cC1nvRqyXWs":{"duration":6.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cC1nvRqyXWs","annotations":[{"segment":[0,6.431926],"label":"Ironing clothes"}]},"ILARi15rMWI":{"duration":55.543,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ILARi15rMWI","annotations":[{"segment":[0.2,55.300999999999995],"label":"Powerbocking"}]},"dmdllfN9Yv0":{"duration":126.224,"subset":"testing","resolution":"1152x720","url":"https://www.youtube.com/watch?v=dmdllfN9Yv0","annotations":[]},"n9xULD7oYXc":{"duration":52.361,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=n9xULD7oYXc","annotations":[{"segment":[0,52.360996],"label":"Knitting"}]},"Tm5O7cRr9wU":{"duration":87.84,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Tm5O7cRr9wU","annotations":[]},"pleP5-tuJ_Q":{"duration":142.548,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pleP5-tuJ_Q","annotations":[{"segment":[0.2,115.50099708382709],"label":"Knitting"},{"segment":[124.81201823468247,140.3303868194414],"label":"Knitting"}]},"KApqxr02rcQ":{"duration":45.23,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=KApqxr02rcQ","annotations":[]},"JErW7ISPd0U":{"duration":60.233,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=JErW7ISPd0U","annotations":[]},"JdTP5XEst5k":{"duration":48.51,"subset":"testing","resolution":"398x226","url":"https://www.youtube.com/watch?v=JdTP5XEst5k","annotations":[]},"SHZ9fsusMGc":{"duration":53.615,"subset":"training","resolution":"352x264","url":"https://www.youtube.com/watch?v=SHZ9fsusMGc","annotations":[{"segment":[0.01,36.509973574453184],"label":"Playing rubik cube"}]},"yaWwad6WXVY":{"duration":192.47,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yaWwad6WXVY","annotations":[{"segment":[17.261482059282372,31.671067082683308],"label":"Layup drill in basketball"},{"segment":[103.71899219968799,105.82039001560062],"label":"Layup drill in basketball"},{"segment":[123.23197191887675,133.43876131045243],"label":"Layup drill in basketball"}]},"R_HDifqMHSY":{"duration":161.751,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R_HDifqMHSY","annotations":[{"segment":[15.390555324492977,139.52421466302653],"label":"Playing drums"}]},"BmlBdeOzwI8":{"duration":121.255,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BmlBdeOzwI8","annotations":[]},"YZacstLO0XA":{"duration":130.172,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=YZacstLO0XA","annotations":[{"segment":[15.828889235569422,108.36701092043681],"label":"Making an omelette"}]},"euF5okzyaaA":{"duration":91.44,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=euF5okzyaaA","annotations":[{"segment":[6.276706676430323,10.413626985895762],"label":"Horseback riding"},{"segment":[27.103960327613105,52.21078673634945],"label":"Horseback riding"},{"segment":[63.05237086739469,84.02227701560062],"label":"Horseback riding"}]},"ZLG1Exv8HrY":{"duration":22.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZLG1Exv8HrY","annotations":[]},"xYW64moSLKg":{"duration":151.813,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xYW64moSLKg","annotations":[{"segment":[5.684071001560063,22.26261142277691],"label":"Swimming"},{"segment":[31.262390508580342,47.13042205460218],"label":"Swimming"},{"segment":[55.41969226521061,59.919581808112326],"label":"Swimming"},{"segment":[69.39303347737909,76.02444964586583],"label":"Swimming"},{"segment":[81.47168435569422,82.4190295226209],"label":"Swimming"},{"segment":[88.33993681591262,93.55033523400935],"label":"Swimming"},{"segment":[98.28706106864273,135.70719516224648],"label":"Swimming"},{"segment":[138.07555807956317,142.10177503900155],"label":"Swimming"},{"segment":[142.10177503900155,143.9964653728549],"label":"Swimming"}]},"gf3JIm2C59g":{"duration":103.49000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gf3JIm2C59g","annotations":[{"segment":[52.79511307799251,80.72647113884555],"label":"Cumbia"}]},"xXGc5_0_2QI":{"duration":42.191,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=xXGc5_0_2QI","annotations":[{"segment":[0.2,39.62767222429906],"label":"Calf roping"}]},"UajYunTsr70":{"duration":21.57,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UajYunTsr70","annotations":[{"segment":[0.01,21.57124716553288],"label":"Washing face"}]},"ux_qqONPSrc":{"duration":123.51,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ux_qqONPSrc","annotations":[{"segment":[34.778474959406545,116.66684922542902],"label":"Preparing salad"}]},"I2Y-5EEXAE0":{"duration":201.46,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I2Y-5EEXAE0","annotations":[{"segment":[135.73359126365057,163.68096879875196],"label":"Making a sandwich"}]},"SwIxaPdYIJE":{"duration":25.12,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SwIxaPdYIJE","annotations":[{"segment":[0.01,25.123989],"label":"Playing lacrosse"}]},"on2DKKcvWhE":{"duration":213.717,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=on2DKKcvWhE","annotations":[{"segment":[55.33186583463339,96.99742745709828],"label":"Mooping floor"},{"segment":[95.99745397815913,108.99710920436817],"label":"Mooping floor"}]},"U_qCCwgS0OM":{"duration":160.427,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U_qCCwgS0OM","annotations":[{"segment":[12.84207421301013,146.75942000740267],"label":"Mooping floor"}]},"Ofnuo7FTHfM":{"duration":10.473,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ofnuo7FTHfM","annotations":[{"segment":[0.01,10.441],"label":"Doing a powerbomb"}]},"UEtVbSLGhfY":{"duration":8.43,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=UEtVbSLGhfY","annotations":[{"segment":[0.12174199211370164,7.290196403328655],"label":"Walking the dog"}]},"ho8cKYrtufU":{"duration":176.31,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ho8cKYrtufU","annotations":[{"segment":[70.3784336973479,176.221],"label":"Archery"}]},"a_sm5Hv7HNw":{"duration":149.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a_sm5Hv7HNw","annotations":[{"segment":[15.27564792,99.68337753978159],"label":"Rock climbing"}]},"Ox16PeB954Q":{"duration":44.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ox16PeB954Q","annotations":[{"segment":[2.087597503900156,40.22104524180967],"label":"Smoking hookah"}]},"BR4dNQG5QBc":{"duration":23.36,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BR4dNQG5QBc","annotations":[]},"j3OHA8wXvqo":{"duration":23.778,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j3OHA8wXvqo","annotations":[]},"tnavaGow7BI":{"duration":5.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tnavaGow7BI","annotations":[{"segment":[0.42459774,1.54671972],"label":"Preparing salad"},{"segment":[1.8803030399999998,5.836666],"label":"Preparing salad"}]},"bAsbqk-CqKU":{"duration":160.8,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bAsbqk-CqKU","annotations":[]},"w4pO-1-FG5w":{"duration":9.99,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w4pO-1-FG5w","annotations":[]},"OQWqRMr0PSA":{"duration":53.128,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OQWqRMr0PSA","annotations":[]},"qfb0Qe7s9oo":{"duration":155.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qfb0Qe7s9oo","annotations":[{"segment":[38.94667862714508,111.53251326053042],"label":"Making a lemonade"}]},"U5wliityRuU":{"duration":197.208,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U5wliityRuU","annotations":[{"segment":[0.01,6.4863985823463555],"label":"Scuba diving"},{"segment":[36.94427753423912,40.32848630667166],"label":"Scuba diving"},{"segment":[54.4806320822986,58.48015154062794],"label":"Scuba diving"},{"segment":[72.63229731625489,112.93514724249684],"label":"Scuba diving"},{"segment":[115.98606429155126,161.2113997049678],"label":"Scuba diving"}]},"lU-PEm5L5EU":{"duration":103.89,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lU-PEm5L5EU","annotations":[{"segment":[12.755610288984853,23.573661610913312],"label":"Spinning"},{"segment":[41.657568298316114,54.41318105103773],"label":"Spinning"},{"segment":[78.55411076443058,96.04657254290171],"label":"Spinning"}]},"1HjQpe8PiqA":{"duration":146.448,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1HjQpe8PiqA","annotations":[]},"2Mj26IwwEiY":{"duration":211.07,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=2Mj26IwwEiY","annotations":[{"segment":[0.2,210.86938775510205],"label":"Hanging wallpaper"},{"segment":[181.1979748854382,188.09137629445843],"label":"Hanging wallpaper"}]},"FMlWHXByLL0":{"duration":69.428,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=FMlWHXByLL0","annotations":[{"segment":[0.010000000000000002,69.401],"label":"Rope skipping"}]},"Tz5eN8gV6F4":{"duration":163.19,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Tz5eN8gV6F4","annotations":[{"segment":[32.58705098680138,163.18984126984128],"label":"Grooming dog"}]},"TqcoukXhXeA":{"duration":176.519,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TqcoukXhXeA","annotations":[{"segment":[0.01,174.70342242514835],"label":"Kite flying"}]},"lwo7fssfLiw":{"duration":67.152,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lwo7fssfLiw","annotations":[{"segment":[30.680893915756627,50.47164118564742],"label":"Using the rowing machine"},{"segment":[54.86958502340093,63.246620904836185],"label":"Using the rowing machine"}]},"J8WxSDLZwu0":{"duration":41.006,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=J8WxSDLZwu0","annotations":[{"segment":[0.2,40.741],"label":"Hurling"}]},"ZjbBmS8Q8kM":{"duration":135.257,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=ZjbBmS8Q8kM","annotations":[{"segment":[23.55940334191608,125.58004608733073],"label":"Playing pool"}]},"c5Io6wg8D60":{"duration":34.48,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=c5Io6wg8D60","annotations":[{"segment":[17.03757566302652,22.472209048361933],"label":"Triple jump"}]},"aQQr1ad0v1I":{"duration":188.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aQQr1ad0v1I","annotations":[{"segment":[81.64024024960999,111.40798127925117],"label":"Doing motocross"}]},"X74SOIQRD4M":{"duration":16.21,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=X74SOIQRD4M","annotations":[{"segment":[9.67087897,15.406614028081124],"label":"Playing lacrosse"},{"segment":[1.510452355694228,8.8522634],"label":"Playing lacrosse"}]},"vZzl0RbIQPM":{"duration":178.516,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vZzl0RbIQPM","annotations":[{"segment":[42.052544635118736,138.1328607301092],"label":"Making a lemonade"}]},"iXLXRHHFp60":{"duration":172.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iXLXRHHFp60","annotations":[{"segment":[80.15696098,166.06350721918878],"label":"Playing polo"},{"segment":[10.89161464352574,65.09454457000001],"label":"Playing polo"}]},"GqkvSUNfZFk":{"duration":145.032,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GqkvSUNfZFk","annotations":[{"segment":[14.933072898426142,136.20772552806875],"label":"Playing ice hockey"}]},"0z7qHOca3D4":{"duration":115.5,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0z7qHOca3D4","annotations":[{"segment":[16.3914992199688,30.981734789391574],"label":"Horseback riding"},{"segment":[34.58426209048362,39.44767394695788],"label":"Horseback riding"},{"segment":[42.32969578783152,48.093739469578786],"label":"Horseback riding"},{"segment":[49.895003120124805,82.1376224648986],"label":"Horseback riding"},{"segment":[86.28052886115444,94.38621528861154],"label":"Horseback riding"}]},"PNuJUYo3Hf8":{"duration":220.73,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PNuJUYo3Hf8","annotations":[]},"7hEUl8lziZs":{"duration":23.058,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=7hEUl8lziZs","annotations":[{"segment":[0.2,22.857414965986397],"label":"Laying tile"}]},"mg0n3DNtUZU":{"duration":115.497,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mg0n3DNtUZU","annotations":[{"segment":[1.4402620904836194,6.481179407176287],"label":"Tug of war"},{"segment":[50.22914040561622,68.77251482059282],"label":"Tug of war"}]},"5Owjrx-YjjM":{"duration":72.888,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5Owjrx-YjjM","annotations":[{"segment":[44.80120032449298,72.54610610920436],"label":"Kite flying"}]},"D9hS68pULz0":{"duration":168.9,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D9hS68pULz0","annotations":[{"segment":[50.06454059522925,57.70597047555372],"label":"Drinking beer"}]},"iuXjatyFcwo":{"duration":53.34,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=iuXjatyFcwo","annotations":[]},"dUzqM38vwPI":{"duration":193.794,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dUzqM38vwPI","annotations":[{"segment":[0.01,129.87152444796698],"label":"Snowboarding"}]},"aj-klsonETc":{"duration":35.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aj-klsonETc","annotations":[{"segment":[1.3975337004202826,34.29375240631138],"label":"Playing lacrosse"}]},"am4Z43QlUrg":{"duration":216.32,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=am4Z43QlUrg","annotations":[{"segment":[44.526489790951636,170.68487753198127],"label":"Fixing bicycle"}]},"CQweu0ZYNHU":{"duration":96.46000000000001,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=CQweu0ZYNHU","annotations":[{"segment":[35.71762870514821,45.492979719188774],"label":"Skateboarding"},{"segment":[51.358190327613116,76.47332293291733],"label":"Skateboarding"}]},"EzmOoUkzJf0":{"duration":131.26,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EzmOoUkzJf0","annotations":[]},"LTPrtyWIcA4":{"duration":130.66,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LTPrtyWIcA4","annotations":[{"segment":[63.58732885999999,121.48603470826833],"label":"Cleaning windows"}]},"MbEtgOmOY-4":{"duration":171.78,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=MbEtgOmOY-4","annotations":[{"segment":[11.244784260000001,127.69696329077655],"label":"Getting a piercing"}]},"8aPE_PF1pgA":{"duration":124.413,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8aPE_PF1pgA","annotations":[]},"Sd4C8_FMdjA":{"duration":119.0,"subset":"validation","resolution":"352x264","url":"https://www.youtube.com/watch?v=Sd4C8_FMdjA","annotations":[{"segment":[12.28443115575124,19.32817898752694],"label":"Breakdancing"},{"segment":[28.596268239863388,52.69330029593815],"label":"Breakdancing"}]},"kC8DS6b76yI":{"duration":65.852,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=kC8DS6b76yI","annotations":[{"segment":[3.7951980065130932,63.184917146417455],"label":"Painting furniture"}]},"cJYtpnf5aOM":{"duration":90.349,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cJYtpnf5aOM","annotations":[{"segment":[0.01,90.23643397201415],"label":"Cutting the grass"}]},"4naz02RpAH4":{"duration":39.916,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4naz02RpAH4","annotations":[]},"MZQmJZ_31O0":{"duration":223.778,"subset":"training","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=MZQmJZ_31O0","annotations":[{"segment":[27.882990654205607,162.41842056074765],"label":"Hula hoop"}]},"Nj_rPQwzllA":{"duration":164.79,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Nj_rPQwzllA","annotations":[{"segment":[80.46771031657592,105.9191586275696],"label":"Playing harmonica"},{"segment":[125.9718148725949,140.1115083787025],"label":"Playing harmonica"}]},"XB139ATiXuk":{"duration":207.32999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XB139ATiXuk","annotations":[{"segment":[76.78141575663027,104.26108034321375],"label":"Playing saxophone"},{"segment":[110.40359360374417,125.92152184087365],"label":"Playing saxophone"},{"segment":[130.12429407176288,134.97364664586584],"label":"Playing saxophone"},{"segment":[139.8229992199688,151.4614453978159],"label":"Playing saxophone"},{"segment":[160.19028003120127,164.71634243369735],"label":"Playing saxophone"},{"segment":[166.9793736349454,170.85885569422777],"label":"Playing saxophone"},{"segment":[175.38491809672388,184.1137527301092],"label":"Playing saxophone"}]},"liI1E_ZZV5w":{"duration":12.655,"subset":"training","resolution":"480x264","url":"https://www.youtube.com/watch?v=liI1E_ZZV5w","annotations":[{"segment":[1.9544723634945398,9.515713931357253],"label":"Putting on shoes"}]},"u2329Chp6IY":{"duration":165.187,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=u2329Chp6IY","annotations":[{"segment":[15.59094979853616,153.97673561364223],"label":"Riding bumper cars"}]},"EjaS2rW81S4":{"duration":172.618,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=EjaS2rW81S4","annotations":[{"segment":[42.41599279327154,44.83209388191851],"label":"Building sandcastles"},{"segment":[71.40920585703515,76.50986371084541],"label":"Building sandcastles"},{"segment":[147.65031262220694,150.87177407788346],"label":"Building sandcastles"}]},"E2sbaDQabkA":{"duration":165.536,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E2sbaDQabkA","annotations":[{"segment":[14.954849782072746,157.28376347352025],"label":"Rope skipping"}]},"9dSOQrpovQI":{"duration":60.98,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9dSOQrpovQI","annotations":[{"segment":[12.223657904139294,18.597082066302654],"label":"Washing dishes"},{"segment":[33.817200381435256,43.329774328393135],"label":"Washing dishes"}]},"U0p4tW1LoPg":{"duration":60.07,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=U0p4tW1LoPg","annotations":[{"segment":[4.5425054602184085,33.38975663026521],"label":"Playing water polo"},{"segment":[42.474767550702026,53.43297659906396],"label":"Playing water polo"}]},"J9DxW6O6lrg":{"duration":56.4,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=J9DxW6O6lrg","annotations":[]},"wNvLv7PtvQg":{"duration":18.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wNvLv7PtvQg","annotations":[{"segment":[0,18.947482],"label":"Kayaking"}]},"d3Rx-nw5Zlw":{"duration":200.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d3Rx-nw5Zlw","annotations":[{"segment":[7.8237519500780035,184.03299219968798],"label":"Smoking a cigarette"}]},"2r7qhNGm44I":{"duration":141.294,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2r7qhNGm44I","annotations":[{"segment":[25.7766879875195,55.51902028081123],"label":"Snowboarding"},{"segment":[93.63326833073324,112.5961794071763],"label":"Snowboarding"},{"segment":[117.22341965678628,137.05444929797193],"label":"Snowboarding"}]},"pVkwQOp1xZo":{"duration":45.628,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pVkwQOp1xZo","annotations":[{"segment":[0.2,45.400999999999996],"label":"Painting fence"}]},"dbMPw8PfXHo":{"duration":116.57,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=dbMPw8PfXHo","annotations":[{"segment":[7.816541341653665,94.36002964118565],"label":"Making a sandwich"}]},"ssXkWpxq3Yc":{"duration":136.0,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ssXkWpxq3Yc","annotations":[]},"jwOZsKS25oc":{"duration":56.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jwOZsKS25oc","annotations":[{"segment":[33.1228593,54.719052262090486],"label":"Spinning"},{"segment":[9.316673166926678,28.39511457],"label":"Spinning"}]},"mNkOMZXvJrk":{"duration":179.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mNkOMZXvJrk","annotations":[]},"mk3srKjFB3A":{"duration":49.6,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mk3srKjFB3A","annotations":[{"segment":[3.5779275800000003,47.88802652106084],"label":"Using the pommel horse"}]},"5olJd75WWFw":{"duration":71.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5olJd75WWFw","annotations":[{"segment":[12.201306552262091,50.86663416536661],"label":"Polishing forniture"},{"segment":[53.429465678627146,62.12080733229329],"label":"Polishing forniture"}]},"oOnKQgQZOZ0":{"duration":31.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oOnKQgQZOZ0","annotations":[{"segment":[3.7515983244929796,31.230838],"label":"Playing water polo"}]},"D_GSISuQw3Y":{"duration":201.9,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=D_GSISuQw3Y","annotations":[{"segment":[62.013510140405614,145.11790951638065],"label":"Doing nails"}]},"TDWbRqIEcts":{"duration":127.478,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TDWbRqIEcts","annotations":[{"segment":[83.70298855858381,99.166828337744],"label":"Baking cookies"}]},"CoHVA7nr82A":{"duration":205.776,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CoHVA7nr82A","annotations":[{"segment":[125.12926120584315,133.44987736521514],"label":"Welding"},{"segment":[150.09110968395913,167.05236481200475],"label":"Welding"}]},"hQqoh0YxJYs":{"duration":188.268,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hQqoh0YxJYs","annotations":[]},"iaKlx11RAiY":{"duration":135.307,"subset":"validation","resolution":"362x240","url":"https://www.youtube.com/watch?v=iaKlx11RAiY","annotations":[{"segment":[8.72630779826084,118.96444331227953],"label":"Rock-paper-scissors"}]},"48xSiJdaH2g":{"duration":173.291,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=48xSiJdaH2g","annotations":[{"segment":[14.82266880342728,22.099253689325653],"label":"Hanging wallpaper"},{"segment":[53.36161838429644,54.43963095998508],"label":"Hanging wallpaper"},{"segment":[97.29063084360882,101.87218429028557],"label":"Hanging wallpaper"},{"segment":[112.92181319109423,132.59554269741204],"label":"Hanging wallpaper"}]},"Iqaw9bsIa18":{"duration":238.93,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Iqaw9bsIa18","annotations":[]},"HJWwqjLf1I4":{"duration":193.027,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=HJWwqjLf1I4","annotations":[]},"w--HmpjK-s8":{"duration":90.187,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w--HmpjK-s8","annotations":[{"segment":[49.607789391575665,69.28226677067083],"label":"Playing congas"}]},"w-st-23ZQSs":{"duration":234.62,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=w-st-23ZQSs","annotations":[{"segment":[33.66554134165367,57.450978159126365],"label":"Pole vault"},{"segment":[73.91781903276132,118.5612542901716],"label":"Pole vault"},{"segment":[131.0028673946958,192.49548517940715],"label":"Pole vault"},{"segment":[193.59336817472698,203.47431513260528],"label":"Pole vault"},{"segment":[208.96373010920436,214.0871840873635],"label":"Pole vault"},{"segment":[219.9425600624025,229.0915850234009],"label":"Pole vault"}]},"i_16EfqIrFg":{"duration":4.714,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i_16EfqIrFg","annotations":[{"segment":[0.2,4.713649],"label":"Roof shingle removal"}]},"mFSdzT5gXQs":{"duration":221.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mFSdzT5gXQs","annotations":[{"segment":[25.945840717628705,51.19979234945398],"label":"Discus throw"},{"segment":[92.3671929547582,133.53459356006238],"label":"Discus throw"},{"segment":[161.21015699219967,205.49105848361933],"label":"Discus throw"}]},"le0tJsyuPks":{"duration":125.133,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=le0tJsyuPks","annotations":[{"segment":[16.39799696723869,20.107067709828396],"label":"Playing ten pins"},{"segment":[43.33756341341654,55.636061138845555],"label":"Playing ten pins"},{"segment":[59.93077463026521,79.64741384087363],"label":"Playing ten pins"},{"segment":[108.14869428393136,124.932334],"label":"Playing ten pins"}]},"t1s7ST4FRJc":{"duration":192.1,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=t1s7ST4FRJc","annotations":[{"segment":[8.990571704500834,189.4013772414842],"label":"Zumba"}]},"38iF2VtR_2Y":{"duration":150.16,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=38iF2VtR_2Y","annotations":[]},"7RDn5qTQquE":{"duration":226.65,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7RDn5qTQquE","annotations":[{"segment":[3.1822929733515872,213.5672173227065],"label":"Playing squash"}]},"TBFAAb62ou4":{"duration":54.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TBFAAb62ou4","annotations":[{"segment":[4.238767550702028,48.40672542901716],"label":"Playing guitarra"}]},"dN9ZPc7NMVQ":{"duration":31.49,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=dN9ZPc7NMVQ","annotations":[{"segment":[5.599739934477379,16.94658138065523],"label":"Washing face"}]},"D-y_N4u0uRQ":{"duration":73.03,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D-y_N4u0uRQ","annotations":[{"segment":[23.959563182527297,41.7158190327613],"label":"Smoking a cigarette"},{"segment":[44.675195007800305,54.00861154446177],"label":"Smoking a cigarette"},{"segment":[55.488299531981276,67.43962558502339],"label":"Smoking a cigarette"}]},"_dLbtK8_SHo":{"duration":213.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_dLbtK8_SHo","annotations":[{"segment":[48.82494744,97.95267864],"label":"Tennis serve with ball bouncing"},{"segment":[113.07480840000001,173.38634820000001],"label":"Tennis serve with ball bouncing"}]},"1R25VGmqS9o":{"duration":72.19,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=1R25VGmqS9o","annotations":[{"segment":[0,72.190838],"label":"Playing field hockey"}]},"2WyRPSKFUi8":{"duration":216.062,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2WyRPSKFUi8","annotations":[{"segment":[6.234038221528862,185.50475897035884],"label":"Blowing leaves"}]},"KpmdpL5btYo":{"duration":97.06,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=KpmdpL5btYo","annotations":[{"segment":[15.310631371429654,83.6449732187059],"label":"Swimming"}]},"Xf8y5_b2dq4":{"duration":165.164,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Xf8y5_b2dq4","annotations":[{"segment":[15.435812829804817,117.31217659813083],"label":"Painting furniture"}]},"gCxLG4hinNw":{"duration":88.1,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=gCxLG4hinNw","annotations":[{"segment":[3.298465195750687,43.15491918564743],"label":"Playing bagpipes"}]},"j1oB2NAlYsQ":{"duration":202.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j1oB2NAlYsQ","annotations":[{"segment":[14.038789605304212,25.39601265678627],"label":"Plataform diving"},{"segment":[32.65201627301092,60.414117065522625],"label":"Plataform diving"}]},"d0FP6xp9O4c":{"duration":131.588,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=d0FP6xp9O4c","annotations":[{"segment":[20.528468485678204,131.3874829931973],"label":"Disc dog"}]},"lG5d8bCHLM4":{"duration":33.484,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=lG5d8bCHLM4","annotations":[{"segment":[0.009999999999999998,33.483173],"label":"Using the monkey bar"}]},"ewTlNriXY3c":{"duration":218.616,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ewTlNriXY3c","annotations":[{"segment":[2.3867815912636505,115.58842277691109],"label":"Tug of war"},{"segment":[117.97520436817473,195.71609048361935],"label":"Tug of war"}]},"6Bm-_hI5A9A":{"duration":171.94,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=6Bm-_hI5A9A","annotations":[{"segment":[4.8283740329205,162.82350211015242],"label":"Playing violin"}]},"Usowsx0PDbA":{"duration":58.93,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Usowsx0PDbA","annotations":[{"segment":[13.331006606864275,21.237672594383774],"label":"Javelin throw"},{"segment":[31.075036090483618,41.8317793525741],"label":"Javelin throw"}]},"l_T3zfRQhic":{"duration":24.01,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l_T3zfRQhic","annotations":[{"segment":[0.01,23.981],"label":"Trimming branches or hedges"}]},"LNlGgtg70uQ":{"duration":78.83,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=LNlGgtg70uQ","annotations":[{"segment":[5.532051482059282,78.801],"label":"Mixing drinks"}]},"jqKK2KH6l4Q":{"duration":122.695,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jqKK2KH6l4Q","annotations":[{"segment":[5.933730813178106,17.609781768141474],"label":"Capoeira"},{"segment":[26.988904666390738,62.78270021685221],"label":"Capoeira"},{"segment":[72.73605512928,78.09555392827959],"label":"Capoeira"},{"segment":[88.62314085488589,112.93229486739469],"label":"Capoeira"}]},"aAZSPRL18Ss":{"duration":78.23,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=aAZSPRL18Ss","annotations":[{"segment":[2.36483244,33.52695624],"label":"Spinning"},{"segment":[35.552995319812794,69.45945397815913],"label":"Spinning"}]},"3zjhYw-TVQw":{"duration":63.577,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3zjhYw-TVQw","annotations":[{"segment":[44.03720547189235,48.40125146957878],"label":"Spread mulch"}]},"fjoaurNKg60":{"duration":225.117,"subset":"training","resolution":"362x268","url":"https://www.youtube.com/watch?v=fjoaurNKg60","annotations":[{"segment":[23.17902087512072,225.1174603174603],"label":"Putting in contact lenses"}]},"HJHRPfkWx4A":{"duration":33.623,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HJHRPfkWx4A","annotations":[]},"NDvp3JeVWcM":{"duration":125.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NDvp3JeVWcM","annotations":[{"segment":[7.850567954690977,29.832158186583463],"label":"Using uneven bars"},{"segment":[47.888464457410294,69.870054700156],"label":"Using uneven bars"}]},"IZNrdIkMCoc":{"duration":89.653,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IZNrdIkMCoc","annotations":[{"segment":[0.01,88.01460942685188],"label":"Capoeira"}]},"RfpUYx-RCnE":{"duration":205.71,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RfpUYx-RCnE","annotations":[]},"ZsicrMkZEN8":{"duration":86.309,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZsicrMkZEN8","annotations":[{"segment":[0,86.308594],"label":"Baton twirling"}]},"Mgvj2B6MyjI":{"duration":17.67,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mgvj2B6MyjI","annotations":[{"segment":[13.535348742589703,17.670384],"label":"Springboard diving"}]},"bxRZ-Lvjgzk":{"duration":236.84,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bxRZ-Lvjgzk","annotations":[{"segment":[25.125367313572543,224.28085234321372],"label":"Painting"}]},"IVFGb72s3oY":{"duration":157.48,"subset":"validation","resolution":"448x336","url":"https://www.youtube.com/watch?v=IVFGb72s3oY","annotations":[{"segment":[70.50854765619196,135.12091014252817],"label":"Playing piano"}]},"jXARa4T2G8o":{"duration":166.65,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=jXARa4T2G8o","annotations":[{"segment":[41.32536505460218,50.682051482059286],"label":"Snowboarding"},{"segment":[54.84057878315133,71.994503900156],"label":"Snowboarding"},{"segment":[74.07376755070203,155.9634945397816],"label":"Snowboarding"}]},"L2jO-evVIRE":{"duration":62.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L2jO-evVIRE","annotations":[{"segment":[0.2,40.35459594383775],"label":"Raking leaves"},{"segment":[48.30910764430577,55.390563182527295],"label":"Raking leaves"}]},"BV3tS9BTLmI":{"duration":163.678,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=BV3tS9BTLmI","annotations":[]},"520b6oA9s-U":{"duration":234.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=520b6oA9s-U","annotations":[{"segment":[59.942919262348724,204.68313894460542],"label":"Making a cake"}]},"LAicExwwM54":{"duration":212.858,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LAicExwwM54","annotations":[{"segment":[44.48444617784711,192.54914196567864],"label":"Fixing the roof"}]},"bAD_-DEgubk":{"duration":58.03,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bAD_-DEgubk","annotations":[{"segment":[2.4438159126365058,53.877776310452425],"label":"Playing lacrosse"}]},"ikUNgSw4Gys":{"duration":196.98,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ikUNgSw4Gys","annotations":[]},"Y7VWbYGI0Oc":{"duration":24.289,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y7VWbYGI0Oc","annotations":[{"segment":[0.01,24.261],"label":"Kite flying"}]},"DSBeb2sl0Tg":{"duration":123.043,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=DSBeb2sl0Tg","annotations":[]},"jLPePkIH8CI":{"duration":49.64,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jLPePkIH8CI","annotations":[]},"iQqPRgiXrmA":{"duration":44.977,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=iQqPRgiXrmA","annotations":[]},"c0OPXHtIcqg":{"duration":153.6,"subset":"testing","resolution":"854x476","url":"https://www.youtube.com/watch?v=c0OPXHtIcqg","annotations":[]},"DthkauVLag0":{"duration":52.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DthkauVLag0","annotations":[{"segment":[15.99578003120125,45.69051014040562],"label":"Brushing teeth"}]},"T-vwsIOVNTw":{"duration":6.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=T-vwsIOVNTw","annotations":[{"segment":[0.6794071762870515,6.327613104524181],"label":"Playing kickball"}]},"EWt03pUigc0":{"duration":120.98,"subset":"training","resolution":"524x360","url":"https://www.youtube.com/watch?v=EWt03pUigc0","annotations":[{"segment":[17.079651326053042,19.910532761310453],"label":"Discus throw"},{"segment":[39.72670280811232,42.935035101404054],"label":"Discus throw"}]},"So3EfVyUP64":{"duration":63.88,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=So3EfVyUP64","annotations":[{"segment":[3.7274765990639627,59.291727769110764],"label":"Ironing clothes"}]},"zYN_cbLO3MU":{"duration":202.966,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zYN_cbLO3MU","annotations":[{"segment":[12.97678783151326,174.395368174727],"label":"Surfing"},{"segment":[180.72550858034322,186.106127925117],"label":"Surfing"}]},"Mcutz37Bowg":{"duration":72.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Mcutz37Bowg","annotations":[{"segment":[2.4996593783218053,66.24607288610997],"label":"Wrapping presents"}]},"uICwWvS_AOo":{"duration":190.869,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=uICwWvS_AOo","annotations":[{"segment":[75.632571925117,146.20312131981277],"label":"Playing blackjack"}]},"fNYGe8C3gMM":{"duration":226.14,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=fNYGe8C3gMM","annotations":[]},"HBJzxWj7CaY":{"duration":173.546,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HBJzxWj7CaY","annotations":[{"segment":[75.72711388455538,90.33162870514819],"label":"Swimming"}]},"kWtY5wkkAMY":{"duration":56.773,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kWtY5wkkAMY","annotations":[{"segment":[2.9227800889235573,50.48438335413417],"label":"Trimming branches or hedges"}]},"7LvEI5zhENk":{"duration":232.13,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=7LvEI5zhENk","annotations":[{"segment":[15.209749647128742,169.48006749657742],"label":"High jump"}]},"RSJYnStAgio":{"duration":136.09,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RSJYnStAgio","annotations":[]},"99Mk5a8Tvyk":{"duration":143.477,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=99Mk5a8Tvyk","annotations":[{"segment":[22.598316692667705,126.8638174726989],"label":"Disc dog"}]},"8Zgys8Vn-4Y":{"duration":178.167,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8Zgys8Vn-4Y","annotations":[{"segment":[0.2,169.17183931357255],"label":"Waterskiing"}]},"lX006hvkTGo":{"duration":63.02,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=lX006hvkTGo","annotations":[]},"V3uCGRAWG2M":{"duration":104.21000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V3uCGRAWG2M","annotations":[{"segment":[4.848937830000001,94.41325502808111],"label":"Fixing bicycle"}]},"pON-g7IJKEY":{"duration":26.378,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pON-g7IJKEY","annotations":[{"segment":[0.2,26.177868480725625],"label":"Hitting a pinata"}]},"XqDVvCRa_Gc":{"duration":43.14,"subset":"testing","resolution":"2560x810","url":"https://www.youtube.com/watch?v=XqDVvCRa_Gc","annotations":[]},"KjkD7CZcXK8":{"duration":82.547,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=KjkD7CZcXK8","annotations":[{"segment":[0.01,82.546937],"label":"Removing ice from car"}]},"uokQq9Xrjf8":{"duration":26.053,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=uokQq9Xrjf8","annotations":[{"segment":[3.5723057722308895,21.717995319812793],"label":"Kneeling"}]},"bDD56LnztiQ":{"duration":144.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bDD56LnztiQ","annotations":[{"segment":[55.89096601,113.53979188000001],"label":"Skateboarding"}]},"HWFosaUWoSI":{"duration":95.365,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=HWFosaUWoSI","annotations":[]},"p4qZGZenAoU":{"duration":213.42000000000002,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=p4qZGZenAoU","annotations":[{"segment":[200.39838065522622,206.7232464898596],"label":"Washing face"}]},"K5X_ytiMuqc":{"duration":111.271,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K5X_ytiMuqc","annotations":[{"segment":[40.78836693105965,108.01813017093504],"label":"Applying sunscreen"}]},"o0d1PXKkJqM":{"duration":96.02000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=o0d1PXKkJqM","annotations":[{"segment":[11.68351583775351,89.72341008736349],"label":"Playing bagpipes"}]},"XPN_PSadJ_A":{"duration":82.896,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XPN_PSadJ_A","annotations":[{"segment":[29.336828393135725,74.84636349453979],"label":"BMX"}]},"_8m1tlowwKM":{"duration":130.15,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_8m1tlowwKM","annotations":[{"segment":[0.01,130.14784580498866],"label":"Playing piano"}]},"hAjmf3OLu-c":{"duration":78.204,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hAjmf3OLu-c","annotations":[]},"EfQtRtIFGVo":{"duration":163.52,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EfQtRtIFGVo","annotations":[{"segment":[21.682948627145084,51.78398319188768],"label":"Doing nails"},{"segment":[57.906227510140404,91.32347774726989],"label":"Doing nails"},{"segment":[96.68044152574102,116.57773556006241],"label":"Doing nails"},{"segment":[121.679605825273,142.34218039937596],"label":"Doing nails"}]},"bCtiTOEf9KI":{"duration":14.304,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bCtiTOEf9KI","annotations":[{"segment":[0.612007556179634,13.515990448870147],"label":"Doing crunches"}]},"kTBEGydNpgg":{"duration":86.448,"subset":"validation","resolution":"512x290","url":"https://www.youtube.com/watch?v=kTBEGydNpgg","annotations":[{"segment":[71.09699535889122,77.69501197363681],"label":"Throwing darts"}]},"rBsRSdPFOww":{"duration":77.184,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rBsRSdPFOww","annotations":[{"segment":[50.451716135725434,64.29884586271451],"label":"Sharpening knives"},{"segment":[71.28261563806554,73.08876299375976],"label":"Sharpening knives"}]},"ptPJ7W5g4Dg":{"duration":88.24,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ptPJ7W5g4Dg","annotations":[]},"JeUjZe3sOwE":{"duration":123.04,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=JeUjZe3sOwE","annotations":[]},"PTwe2IIo3To":{"duration":188.8,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PTwe2IIo3To","annotations":[]},"5ry-UTd0y_o":{"duration":156.457,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5ry-UTd0y_o","annotations":[{"segment":[3.752155200435365,154.0819947642983],"label":"Snow tubing"}]},"B69Fkd_L9gA":{"duration":181.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B69Fkd_L9gA","annotations":[{"segment":[49.29230265210609,54.09441809672388],"label":"Shoveling snow"},{"segment":[90.25152262090484,173.01739469578783],"label":"Shoveling snow"}]},"bKec2fA5RZ8":{"duration":181.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bKec2fA5RZ8","annotations":[{"segment":[5.2728655,48.579336399999995],"label":"Vacuuming floor"},{"segment":[58.972599,106.84054822932917],"label":"Vacuuming floor"}]},"p8h3G0Tblqc":{"duration":101.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=p8h3G0Tblqc","annotations":[{"segment":[27.337803432137285,101.861],"label":"Layup drill in basketball"}]},"VLQQVNIZQqo":{"duration":52.8,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=VLQQVNIZQqo","annotations":[{"segment":[21.911667004680186,35.585865210608425],"label":"Hammer throw"},{"segment":[43.82333400936037,50.24855967238689],"label":"Hammer throw"}]},"WSDytipClpI":{"duration":48.902,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WSDytipClpI","annotations":[]},"7KYj4FQUESo":{"duration":29.49,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=7KYj4FQUESo","annotations":[{"segment":[0.6884165288611545,28.638127600624028],"label":"Vacuuming floor"}]},"ZZVrH3Hv1fM":{"duration":58.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZZVrH3Hv1fM","annotations":[{"segment":[0,0],"label":"Washing hands"}]},"I6riV09yqm8":{"duration":60.07,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I6riV09yqm8","annotations":[{"segment":[6.926792511700468,45.30496723868954],"label":"Using uneven bars"}]},"n2L9F6cMNaE":{"duration":153.99,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n2L9F6cMNaE","annotations":[{"segment":[65.57153354134165,79.02210452418097],"label":"Fun sliding down"},{"segment":[112.42294539781591,135.0036224648986],"label":"Fun sliding down"}]},"_wITx73-BXw":{"duration":187.71,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_wITx73-BXw","annotations":[{"segment":[6.0017134578783145,172.2930912176287],"label":"Playing field hockey"}]},"sYPd_0tC318":{"duration":213.972,"subset":"training","resolution":"632x480","url":"https://www.youtube.com/watch?v=sYPd_0tC318","annotations":[{"segment":[0.01,213.881],"label":"Waterskiing"}]},"UUjXw1xjprU":{"duration":191.1,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UUjXw1xjprU","annotations":[{"segment":[37.58305160830251,105.44359366437727],"label":"Skateboarding"},{"segment":[119.43238961453304,164.67275098524954],"label":"Skateboarding"}]},"-pLiSHAz4fo":{"duration":111.433,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-pLiSHAz4fo","annotations":[{"segment":[0.01,111.43256235827664],"label":"Using the rowing machine"}]},"MU2DUVy_wqQ":{"duration":52.59,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=MU2DUVy_wqQ","annotations":[{"segment":[0.01,52.541],"label":"Getting a haircut"}]},"_8KsVaJLOYI":{"duration":101.17,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_8KsVaJLOYI","annotations":[{"segment":[11.995116992199689,44.98168872074883],"label":"Playing field hockey"},{"segment":[49.40094234945398,94.85612253042122],"label":"Playing field hockey"}]},"IhWxuvzIHkc":{"duration":29.21,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=IhWxuvzIHkc","annotations":[{"segment":[7.200007744149766,25.701293466458658],"label":"Vacuuming floor"}]},"4DFKpHF2aq4":{"duration":60.094,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4DFKpHF2aq4","annotations":[{"segment":[10.968657391193608,60.09324263038549],"label":"Slacklining"}]},"Zezsxn3vcwQ":{"duration":210.188,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zezsxn3vcwQ","annotations":[]},"YT-iFs0NE1s":{"duration":42.89,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YT-iFs0NE1s","annotations":[]},"SbX8KDWMhio":{"duration":68.45,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SbX8KDWMhio","annotations":[{"segment":[9.105664779670954,18.915046742217136],"label":"Smoking hookah"},{"segment":[24.13960887444282,26.91182551603196],"label":"Smoking hookah"},{"segment":[45.57097598826655,48.449816131774995],"label":"Smoking hookah"}]},"ItpDcSURusI":{"duration":54.43,"subset":"training","resolution":"626x360","url":"https://www.youtube.com/watch?v=ItpDcSURusI","annotations":[{"segment":[14.434697745709828,47.294862613884554],"label":"Plataform diving"}]},"IcR28Yqt4mY":{"duration":118.28,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=IcR28Yqt4mY","annotations":[]},"IWhEUNOUIyc":{"duration":227.301,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=IWhEUNOUIyc","annotations":[{"segment":[12.765659524340158,119.50075721396202],"label":"Playing drums"},{"segment":[166.66277712332985,211.69718711197427],"label":"Playing drums"}]},"Rn_frSrkkbM":{"duration":61.39,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=Rn_frSrkkbM","annotations":[{"segment":[0.09577778485289072,15.707556715874077],"label":"Cricket"},{"segment":[28.35022431645565,48.65511463338534],"label":"Cricket"}]},"7Iy7Cjv2SAE":{"duration":195.466,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=7Iy7Cjv2SAE","annotations":[{"segment":[38.1173122074883,131.42849249141963],"label":"Grooming dog"}]},"WtNvqSFTgxI":{"duration":237.66,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=WtNvqSFTgxI","annotations":[{"segment":[81.12862872000001,132.1754260102377],"label":"Playing lacrosse"}]},"bYNQ0GrCt64":{"duration":35.3,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bYNQ0GrCt64","annotations":[]},"EmlmKZ6Gi3Y":{"duration":205.242,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=EmlmKZ6Gi3Y","annotations":[{"segment":[41.86307009345794,196.8714890965732],"label":"River tubing"}]},"rliYO1sEYts":{"duration":196.232,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rliYO1sEYts","annotations":[{"segment":[54.17166458658346,56.31404680187208],"label":"Rock-paper-scissors"},{"segment":[64.57752106084244,69.47439469578784],"label":"Rock-paper-scissors"},{"segment":[82.94079719188768,90.2953120124805],"label":"Rock-paper-scissors"},{"segment":[93.662255850234,96.11094227769111],"label":"Rock-paper-scissors"},{"segment":[111.72131825273011,115.39434789391576],"label":"Rock-paper-scissors"},{"segment":[122.74040717628705,124.27083619344774],"label":"Rock-paper-scissors"},{"segment":[141.41164118564743,147.22727145085804],"label":"Rock-paper-scissors"}]},"yJgC3-t_ciw":{"duration":61.255,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yJgC3-t_ciw","annotations":[{"segment":[0.2,61.001],"label":"Table soccer"}]},"kF-J9mKwA8A":{"duration":140.388,"subset":"training","resolution":"1264x720","url":"https://www.youtube.com/watch?v=kF-J9mKwA8A","annotations":[{"segment":[0.009999999999999998,140.387845],"label":"Playing drums"}]},"nc4twXSueZo":{"duration":63.69,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=nc4twXSueZo","annotations":[{"segment":[1.1923682171776668,61.506327202747975],"label":"Doing kickboxing"}]},"dth80jcqPxU":{"duration":107.28,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dth80jcqPxU","annotations":[{"segment":[18.74404573104524,99.5777431097244],"label":"Shot put"}]},"kr4RUFi1UGs":{"duration":36.32,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=kr4RUFi1UGs","annotations":[{"segment":[0.8498286037441498,35.46618039625585],"label":"Rock climbing"}]},"AXw2bkQyRPo":{"duration":112.00999999999999,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=AXw2bkQyRPo","annotations":[{"segment":[1.922221019453023,107.81912445477411],"label":"Cumbia"}]},"dLOwmXV_yTw":{"duration":115.38,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dLOwmXV_yTw","annotations":[{"segment":[0.01,109.4396111076443],"label":"Wrapping presents"}]},"FfVfjZOHsl4":{"duration":78.39,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=FfVfjZOHsl4","annotations":[{"segment":[7.020292511700468,28.38640015600624],"label":"Cutting the grass"}]},"VWpRBfhoFVg":{"duration":18.251,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=VWpRBfhoFVg","annotations":[{"segment":[0.2,17.879889235569422],"label":"Hitting a pinata"}]},"Po3-9ktM5Do":{"duration":148.678,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Po3-9ktM5Do","annotations":[{"segment":[10.669474142230994,101.59194944124295],"label":"Blow-drying hair"}]},"Z5h4QgKfn9k":{"duration":137.068,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=Z5h4QgKfn9k","annotations":[]},"mC_8ckG6WpU":{"duration":216.573,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mC_8ckG6WpU","annotations":[{"segment":[1.1040211796490242,5.826777447265106],"label":"Hula hoop"}]},"UlGX-5x0Mak":{"duration":25.148,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UlGX-5x0Mak","annotations":[{"segment":[0.2,23.504622434345322],"label":"Having an ice cream"}]},"jkaevzzYdP8":{"duration":223.45,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jkaevzzYdP8","annotations":[{"segment":[60.654444196567866,88.89013373634945],"label":"Walking the dog"},{"segment":[106.31957172386895,193.81535042121683],"label":"Walking the dog"},{"segment":[206.01595701248047,221.35386244149765],"label":"Walking the dog"}]},"-oJb3Acw-_s":{"duration":191.8,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-oJb3Acw-_s","annotations":[{"segment":[20.34660896723869,165.46580527769112],"label":"Getting a piercing"}]},"zMWhT5Rv6WE":{"duration":192.703,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zMWhT5Rv6WE","annotations":[{"segment":[70.92915569422777,81.44831014040561],"label":"Playing congas"},{"segment":[93.79578803120125,97.4033183400936],"label":"Playing congas"},{"segment":[113.6372047301092,132.5767388517941],"label":"Playing congas"},{"segment":[162.63949142589703,174.96521998127926],"label":"Playing congas"}]},"TUhitcbJ5C4":{"duration":69.92,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=TUhitcbJ5C4","annotations":[{"segment":[1.1990743572542903,23.436453346333852],"label":"Washing hands"},{"segment":[23.82074534,63.55094093447738],"label":"Washing hands"}]},"83a5nOzZU1g":{"duration":92.22999999999999,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=83a5nOzZU1g","annotations":[{"segment":[37.96539457797458,82.78728444713344],"label":"Mixing drinks"}]},"MyPQ_e4dGDw":{"duration":217.571,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MyPQ_e4dGDw","annotations":[{"segment":[24.777919527665457,206.0300962230889],"label":"Making a lemonade"}]},"APAxAnwS9oM":{"duration":102.49000000000001,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=APAxAnwS9oM","annotations":[{"segment":[35.17696280811232,93.21895144149767],"label":"Walking the dog"}]},"K-r_Tj7uFGw":{"duration":60.12,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=K-r_Tj7uFGw","annotations":[{"segment":[2.3446357644305773,59.647533847113884],"label":"Getting a tattoo"}]},"hjDGz9OhwvM":{"duration":129.45,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hjDGz9OhwvM","annotations":[{"segment":[9.18541599297972,119.81416245787831],"label":"Ironing clothes"}]},"M0cKfbLMlSA":{"duration":209.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=M0cKfbLMlSA","annotations":[{"segment":[15.03018251401869,205.8481518224299],"label":"Doing kickboxing"}]},"p07Y--q4JQw":{"duration":136.26,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=p07Y--q4JQw","annotations":[]},"X5wlZXJ6SCY":{"duration":70.054,"subset":"testing","resolution":"640x354","url":"https://www.youtube.com/watch?v=X5wlZXJ6SCY","annotations":[]},"0pegrKSh4iw":{"duration":177.169,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0pegrKSh4iw","annotations":[{"segment":[0.01,177.16757369614513],"label":"Playing drums"}]},"qhnWJ4G5JMA":{"duration":167.0,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qhnWJ4G5JMA","annotations":[{"segment":[126.50857735,139.87334726],"label":"Spinning"},{"segment":[8.63875481,116.35515975000001],"label":"Spinning"}]},"VEzhpSPnBrY":{"duration":77.764,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VEzhpSPnBrY","annotations":[{"segment":[0.01,73.13232917316692],"label":"Kneeling"}]},"piKE8obU4JY":{"duration":69.498,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=piKE8obU4JY","annotations":[{"segment":[2.9249719188767553,66.42682215288612],"label":"Drum corps"}]},"j62IuPoWMSE":{"duration":158.871,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=j62IuPoWMSE","annotations":[]},"0gwhdJGq2eg":{"duration":124.44,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0gwhdJGq2eg","annotations":[{"segment":[3.7850382215288616,120.05364820592824],"label":"Playing water polo"}]},"r-3RwNYrXBU":{"duration":63.088,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=r-3RwNYrXBU","annotations":[]},"AR-VPPtV7ag":{"duration":18.02,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=AR-VPPtV7ag","annotations":[{"segment":[9.220168524180966,14.392458184087364],"label":"Drinking beer"}]},"X4P9YA6Oabg":{"duration":231.062,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=X4P9YA6Oabg","annotations":[{"segment":[49.30756401833838,186.7928885074279],"label":"Braiding hair"}]},"wDlbcTFI90o":{"duration":94.575,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wDlbcTFI90o","annotations":[{"segment":[20.213287555382216,69.04977062714508],"label":"Laying tile"}]},"9BR5LChbBLQ":{"duration":116.867,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9BR5LChbBLQ","annotations":[{"segment":[44.93745638629284,86.6001183800623],"label":"Hand car wash"}]},"z6pmp8TrAVo":{"duration":11.49,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z6pmp8TrAVo","annotations":[]},"BjjN_aq_wq4":{"duration":124.2,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=BjjN_aq_wq4","annotations":[]},"XFZ5_uNU7jE":{"duration":96.131,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XFZ5_uNU7jE","annotations":[{"segment":[7.048548499219969,88.18184079875195],"label":"Peeling potatoes"}]},"wBgU5jXb_V4":{"duration":76.81,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=wBgU5jXb_V4","annotations":[{"segment":[14.527101905504793,61.78277495000001],"label":"Playing polo"}]},"bfZZdTPYmos":{"duration":228.206,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=bfZZdTPYmos","annotations":[{"segment":[8.54436366926677,213.25307657878315],"label":"Playing congas"}]},"qz82piU5PZE":{"duration":46.46,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=qz82piU5PZE","annotations":[]},"QvP8tPp8Wog":{"duration":64.81,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=QvP8tPp8Wog","annotations":[{"segment":[0,56.33335473],"label":"Playing guitarra"}]},"5ASU6NFr4Ck":{"duration":227.951,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5ASU6NFr4Ck","annotations":[]},"KuAVv3uS4zc":{"duration":172.548,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=KuAVv3uS4zc","annotations":[{"segment":[14.262953198127924,158.23804680187206],"label":"BMX"}]},"0D9EmyHxhcg":{"duration":194.439,"subset":"training","resolution":"544x360","url":"https://www.youtube.com/watch?v=0D9EmyHxhcg","annotations":[{"segment":[25.710226973370997,183.29882294675258],"label":"Rollerblading"}]},"ksPQdczmNKs":{"duration":225.373,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ksPQdczmNKs","annotations":[{"segment":[0.2,217.6069875195008],"label":"Hula hoop"}]},"iKJB-RU_PKY":{"duration":175.125,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iKJB-RU_PKY","annotations":[{"segment":[12.567465376166068,131.13876914260243],"label":"Shuffleboard"},{"segment":[135.23685567830876,160.64499219968798],"label":"Shuffleboard"}]},"SV-8M3HNk1s":{"duration":208.49,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=SV-8M3HNk1s","annotations":[{"segment":[0.2,3.414465678627145],"label":"Windsurfing"},{"segment":[9.593022620904836,13.82045631825273],"label":"Windsurfing"},{"segment":[28.453880655226207,128.93672776911077],"label":"Windsurfing"},{"segment":[146.49683697347893,155.92726599063963],"label":"Windsurfing"},{"segment":[160.4798868954758,171.21106474258968],"label":"Windsurfing"}]},"TmnVK-cYrzg":{"duration":147.61,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TmnVK-cYrzg","annotations":[{"segment":[31.288133054602184,125.38259202028081],"label":"Vacuuming floor"}]},"ZW0Wi1_IJgc":{"duration":179.51,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=ZW0Wi1_IJgc","annotations":[]},"igwT-3gprOI":{"duration":114.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=igwT-3gprOI","annotations":[{"segment":[104.3107425897036,114.081],"label":"Drinking coffee"}]},"sa-BrKIzFKw":{"duration":84.521,"subset":"testing","resolution":"847x480","url":"https://www.youtube.com/watch?v=sa-BrKIzFKw","annotations":[]},"N3WVKg7Q2io":{"duration":102.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N3WVKg7Q2io","annotations":[{"segment":[0.01,64.67137658970358],"label":"Playing water polo"}]},"kh-LnD1XeEg":{"duration":203.15,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kh-LnD1XeEg","annotations":[]},"PwRqqR1YmPM":{"duration":200.25,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PwRqqR1YmPM","annotations":[]},"G_Nzm0WkGEI":{"duration":127.06,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=G_Nzm0WkGEI","annotations":[{"segment":[7.5210556224649,115.388827049922],"label":"Ironing clothes"}]},"BioBrxuKOsw":{"duration":68.45,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=BioBrxuKOsw","annotations":[{"segment":[14.065547839999999,42.682676009999994],"label":"Playing polo"}]},"qwJhmfZKdNQ":{"duration":45.187,"subset":"training","resolution":"350x196","url":"https://www.youtube.com/watch?v=qwJhmfZKdNQ","annotations":[{"segment":[5.4929765990639625,7.816928237129486],"label":"Trimming branches or hedges"},{"segment":[28.802915756630266,35.36789703588144],"label":"Trimming branches or hedges"},{"segment":[37.69287831513261,38.96104992199688],"label":"Trimming branches or hedges"}]},"iEdRtsNQh04":{"duration":213.484,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iEdRtsNQh04","annotations":[]},"Fhk2t6beztU":{"duration":14.42,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fhk2t6beztU","annotations":[{"segment":[0.2,14.161000000000001],"label":"Rollerblading"}]},"n50dwUENM1E":{"duration":65.411,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n50dwUENM1E","annotations":[{"segment":[11.318363494539783,49.148209048361934],"label":"Cleaning sink"}]},"COeXmBZpLSE":{"duration":110.85,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=COeXmBZpLSE","annotations":[{"segment":[10.203232053091646,108.77682985414654],"label":"Tango"}]},"TFp9J_gQqlk":{"duration":91.95,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TFp9J_gQqlk","annotations":[]},"bqBJ1tcHCHk":{"duration":80.88,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bqBJ1tcHCHk","annotations":[{"segment":[38.56603364723419,60.72805233882297],"label":"Grooming horse"}]},"0je8CeAJABY":{"duration":65.066,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0je8CeAJABY","annotations":[{"segment":[24.46883151326053,32.18514352574103],"label":"Doing a powerbomb"},{"segment":[35.84023868954758,39.29227301092044],"label":"Doing a powerbomb"}]},"4iFlM_Zu7xs":{"duration":220.24,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4iFlM_Zu7xs","annotations":[]},"rnPfQuWS4Rk":{"duration":31.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rnPfQuWS4Rk","annotations":[{"segment":[23.084269890795632,30.05310608424337],"label":"Hammer throw"}]},"GEmuTigZlYA":{"duration":91.743,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=GEmuTigZlYA","annotations":[{"segment":[0.01,91.601],"label":"Grooming dog"}]},"D5xp0LuEcKw":{"duration":190.66,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D5xp0LuEcKw","annotations":[{"segment":[29.000394260668386,107.52453872032433],"label":"Tango"}]},"VQPfRCsSmQk":{"duration":82.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VQPfRCsSmQk","annotations":[{"segment":[32.86464898595944,51.09340198000001],"label":"Polishing forniture"}]},"xUHXFXela-0":{"duration":132.03,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xUHXFXela-0","annotations":[{"segment":[6.797107410827046,23.89286241381628],"label":"Rock climbing"},{"segment":[35.83929362072442,102.98647592162189],"label":"Rock climbing"}]},"nDggAkf1ulc":{"duration":61.42,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=nDggAkf1ulc","annotations":[{"segment":[7.09293384,34.281797039999994],"label":"Hopscotch"}]},"l12QfUhRLLc":{"duration":45.84,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=l12QfUhRLLc","annotations":[{"segment":[18.277840873634947,36.555681747269894],"label":"Cricket"}]},"UaiKJ_7mKIA":{"duration":30.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UaiKJ_7mKIA","annotations":[{"segment":[0.01,25.793818595943836],"label":"Shoveling snow"}]},"RnAFO7-y0kI":{"duration":61.86,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RnAFO7-y0kI","annotations":[]},"Xm23RMCpDd8":{"duration":49.53,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Xm23RMCpDd8","annotations":[{"segment":[6.876765082683307,14.83526849297972],"label":"Discus throw"},{"segment":[24.957248558502343,44.73760654914197],"label":"Discus throw"}]},"nJMS8jN3uU4":{"duration":51.39,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=nJMS8jN3uU4","annotations":[{"segment":[32.4659368174727,35.188111544461776],"label":"Playing kickball"},{"segment":[20.53640639625585,21.49717394695788],"label":"Playing kickball"},{"segment":[40.15207722308892,48.63885725429017],"label":"Playing kickball"}]},"-UwqKYkkKlU":{"duration":64.0,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-UwqKYkkKlU","annotations":[{"segment":[9.776557360374415,55.46699890171607],"label":"Spinning"}]},"YS3Q47uz3HI":{"duration":159.777,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YS3Q47uz3HI","annotations":[{"segment":[7.727069205217189,142.07836925721926],"label":"Making a cake"}]},"oLEagb_RGq8":{"duration":18.3,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=oLEagb_RGq8","annotations":[{"segment":[0,18.297323],"label":"Mowing the lawn"}]},"8P1vKpL3Zcs":{"duration":189.011,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8P1vKpL3Zcs","annotations":[{"segment":[19.756098853477948,172.79214818116535],"label":"Waterskiing"}]},"IhmtmKxEU40":{"duration":222.22,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IhmtmKxEU40","annotations":[]},"RNAUncQEASo":{"duration":122.95,"subset":"validation","resolution":"1280x714","url":"https://www.youtube.com/watch?v=RNAUncQEASo","annotations":[{"segment":[0.2,4.98702940770692],"label":"Grooming dog"},{"segment":[26.469617625521348,122.74945578231292],"label":"Grooming dog"}]},"jLwUrFMRGzY":{"duration":78.62,"subset":"training","resolution":"854x428","url":"https://www.youtube.com/watch?v=jLwUrFMRGzY","annotations":[{"segment":[11.652363276131046,78.622788],"label":"Tai chi"}]},"cQAIzEOeOKI":{"duration":30.09,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cQAIzEOeOKI","annotations":[]},"l7nsRRyzD4k":{"duration":32.555,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l7nsRRyzD4k","annotations":[{"segment":[0.01,18.32578064590971],"label":"Building sandcastles"}]},"KV0L45xelkE":{"duration":68.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KV0L45xelkE","annotations":[{"segment":[28.17800312012481,37.73168798751951],"label":"Layup drill in basketball"},{"segment":[48.78819968798753,60.48878003120126],"label":"Layup drill in basketball"}]},"-FWGLSfI13Q":{"duration":223.051,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-FWGLSfI13Q","annotations":[{"segment":[0.2,170.15894194539783],"label":"Carving jack-o-lanterns"}]},"4R5KDUqBPcU":{"duration":10.24,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4R5KDUqBPcU","annotations":[{"segment":[1.9905094984179346,9.776319467265287],"label":"Shot put"}]},"KoHzXi7Usl8":{"duration":159.057,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KoHzXi7Usl8","annotations":[{"segment":[0.2,16.34589719626168],"label":"Snowboarding"},{"segment":[32.19646417445483,74.54719781931465],"label":"Snowboarding"},{"segment":[77.76684423676012,89.15943925233644],"label":"Snowboarding"},{"segment":[93.61741121495326,113.92595015576323],"label":"Snowboarding"},{"segment":[121.85123364485982,148.84673052959502],"label":"Snowboarding"}]},"-r-4Ncgbj8E":{"duration":57.516,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-r-4Ncgbj8E","annotations":[]},"Vhn4SuPhu-0":{"duration":166.859,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vhn4SuPhu-0","annotations":[]},"MT8qyL2n5wg":{"duration":18.3,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=MT8qyL2n5wg","annotations":[]},"ygKWviTozbc":{"duration":187.55,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ygKWviTozbc","annotations":[]},"BfSqo9ONd6g":{"duration":115.08,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BfSqo9ONd6g","annotations":[{"segment":[21.2612908900156,92.2868021],"label":"Doing motocross"}]},"IVHmmp3Nsd4":{"duration":202.46,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=IVHmmp3Nsd4","annotations":[{"segment":[14.367636505460219,191.51585803432135],"label":"Painting"}]},"ZJk05q3y5iM":{"duration":56.36,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=ZJk05q3y5iM","annotations":[{"segment":[8.176285497659906,48.4422936474259],"label":"Doing nails"}]},"QeIsJBPG8EM":{"duration":12.539,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=QeIsJBPG8EM","annotations":[{"segment":[0.2,12.501],"label":"Hurling"}]},"y8RpTBtGG1g":{"duration":37.57,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=y8RpTBtGG1g","annotations":[{"segment":[2.3956110031201248,34.356567556942274],"label":"Vacuuming floor"}]},"ZjHIN-M-dQY":{"duration":66.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZjHIN-M-dQY","annotations":[{"segment":[10.818929017160686,12.889537441497659],"label":"Spinning"},{"segment":[14.44249375975039,55.54407098283931],"label":"Spinning"}]},"_-JNaelSKO8":{"duration":27.35,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=_-JNaelSKO8","annotations":[]},"gma-VLqBCcQ":{"duration":20.875,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=gma-VLqBCcQ","annotations":[]},"VwmYoF9Rh_8":{"duration":132.56,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VwmYoF9Rh_8","annotations":[{"segment":[22.111274570982843,128.3602814595651],"label":"Kayaking"}]},"r5mwKEhEsHA":{"duration":126.363,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=r5mwKEhEsHA","annotations":[{"segment":[89.84907332293291,120.80307800312012],"label":"Putting in contact lenses"}]},"F4ERtlGMFt4":{"duration":61.951,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=F4ERtlGMFt4","annotations":[{"segment":[0.2,3.9501958797947245],"label":"Bullfighting"}]},"m-B1tlnywNY":{"duration":198.07,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m-B1tlnywNY","annotations":[{"segment":[1.5449722386895475,193.73951873166928],"label":"Cheerleading"}]},"d_A-ylxNbFU":{"duration":10.22,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=d_A-ylxNbFU","annotations":[]},"BV4zqyN8hLs":{"duration":161.519,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BV4zqyN8hLs","annotations":[{"segment":[0.01,161.51791383219955],"label":"Wakeboarding"}]},"HtHsvOu96rI":{"duration":35.899,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=HtHsvOu96rI","annotations":[]},"g7glOdM6BYo":{"duration":80.458,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g7glOdM6BYo","annotations":[{"segment":[0.6764385088982984,77.21418149955251],"label":"Clipping cat claws"}]},"CjPN7fw0B48":{"duration":34.761,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CjPN7fw0B48","annotations":[{"segment":[1.897970608424337,32.8077776599064],"label":"Cutting the grass"}]},"OjQHQptSXk0":{"duration":134.537,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OjQHQptSXk0","annotations":[{"segment":[0.01,133.36259752287512],"label":"Drum corps"}]},"1BUnQWRBpYg":{"duration":218.175,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1BUnQWRBpYg","annotations":[{"segment":[0.2,195.4983665422945],"label":"Longboarding"}]},"cvEOK7xAPSs":{"duration":67.129,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cvEOK7xAPSs","annotations":[{"segment":[6.911869048361935,7.644946068642746],"label":"Slacklining"},{"segment":[12.357584056162247,65.03440422776912],"label":"Slacklining"}]},"0vIFg1eE7Tw":{"duration":203.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0vIFg1eE7Tw","annotations":[{"segment":[8.737434968798752,195.24195593915758],"label":"Putting on makeup"}]},"iM8ZD2PwKDg":{"duration":127.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iM8ZD2PwKDg","annotations":[{"segment":[69.58802587,92.89540457999999],"label":"Doing step aerobics"},{"segment":[99.98823383999999,114.99003955999999],"label":"Doing step aerobics"}]},"3vYwSm6B_u0":{"duration":150.81,"subset":"testing","resolution":"384x288","url":"https://www.youtube.com/watch?v=3vYwSm6B_u0","annotations":[]},"eCXiGAChev4":{"duration":83.96000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eCXiGAChev4","annotations":[{"segment":[0.01,69.48887247425897],"label":"Washing dishes"}]},"HTYzbAFUghU":{"duration":147.888,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=HTYzbAFUghU","annotations":[]},"M2Y2Cw2kwDo":{"duration":11.03,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=M2Y2Cw2kwDo","annotations":[]},"St1RRi-aeNM":{"duration":31.21,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=St1RRi-aeNM","annotations":[]},"siGEHA6fs80":{"duration":61.07,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=siGEHA6fs80","annotations":[{"segment":[7.431104293291733,15.433831993759751],"label":"Chopping wood"},{"segment":[27.05684127301092,39.72782679875195],"label":"Chopping wood"}]},"OAot8XBeLrs":{"duration":223.887,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=OAot8XBeLrs","annotations":[{"segment":[0.2,9.779755837852562],"label":"Futsal"},{"segment":[38.42046936299221,50.99444115451693],"label":"Futsal"},{"segment":[61.47275098078753,69.85539884180402],"label":"Futsal"},{"segment":[89.41491051750914,107.57731421637817],"label":"Futsal"},{"segment":[125.73971791524721,135.17019675889077],"label":"Futsal"},{"segment":[141.10790547893916,145.99778339157564],"label":"Futsal"},{"segment":[160.6674171294852,173.93994289235567],"label":"Futsal"},{"segment":[183.7196987176287,192.10234656786272],"label":"Futsal"},{"segment":[205.3748723307332,220.3937830624025],"label":"Futsal"}]},"01m9yM04RwY":{"duration":73.17,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=01m9yM04RwY","annotations":[]},"TaDgnM-XKiw":{"duration":236.22,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TaDgnM-XKiw","annotations":[]},"EMz_XwbJ8l8":{"duration":31.022,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EMz_XwbJ8l8","annotations":[]},"zahB34blHyk":{"duration":40.171,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zahB34blHyk","annotations":[{"segment":[6.1370015600624015,34.88071294851794],"label":"Painting fence"}]},"iXF01UxOtLI":{"duration":215.295,"subset":"validation","resolution":"368x208","url":"https://www.youtube.com/watch?v=iXF01UxOtLI","annotations":[{"segment":[22.127579439252337,197.8071495327103],"label":"Futsal"}]},"htBt3oP9zuo":{"duration":165.396,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=htBt3oP9zuo","annotations":[{"segment":[59.676226612890595,151.76282223368742],"label":"Hanging wallpaper"}]},"UE2mDvY9rew":{"duration":15.837,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=UE2mDvY9rew","annotations":[{"segment":[0.01,15.56472340204114],"label":"Roof shingle removal"}]},"lSgkR94_h8Q":{"duration":230.11,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=lSgkR94_h8Q","annotations":[{"segment":[16.872320003120123,221.4940732324493],"label":"Belly dance"}]},"Qhs0AjFvcOA":{"duration":74.26,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Qhs0AjFvcOA","annotations":[{"segment":[26.91519222,41.736889420000004],"label":"Tennis serve with ball bouncing"},{"segment":[48.46531619,64.37859129],"label":"Tennis serve with ball bouncing"}]},"1uiEkwykOxo":{"duration":200.51,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1uiEkwykOxo","annotations":[{"segment":[74.5473872,172.33719807999998],"label":"Playing polo"}]},"GAqzjkkb98Q":{"duration":71.518,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GAqzjkkb98Q","annotations":[{"segment":[5.687255850234009,58.99134009360374],"label":"Calf roping"}]},"JFGJYIZ7ONE":{"duration":156.224,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=JFGJYIZ7ONE","annotations":[{"segment":[0.01,143.08269297794934],"label":"Futsal"}]},"82EtiBkAsPg":{"duration":78.3,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=82EtiBkAsPg","annotations":[]},"ztEGh6HkkV8":{"duration":72.098,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ztEGh6HkkV8","annotations":[]},"YBK6SfHd-0Y":{"duration":201.039,"subset":"validation","resolution":"712x480","url":"https://www.youtube.com/watch?v=YBK6SfHd-0Y","annotations":[{"segment":[5.643772230889236,194.39659906396255],"label":"Roof shingle removal"}]},"FCiKVtVqTAA":{"duration":126.433,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FCiKVtVqTAA","annotations":[{"segment":[7.283640186915888,112.02830062305296],"label":"Playing congas"}]},"Ouy1exEw_dI":{"duration":129.266,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ouy1exEw_dI","annotations":[{"segment":[21.352302652106086,94.6753042121685],"label":"Waterskiing"}]},"Ff8QLpH5T1c":{"duration":217.826,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ff8QLpH5T1c","annotations":[{"segment":[7.3061727884081344,49.10427757790584],"label":"Canoeing"},{"segment":[65.75555509567322,67.79448703662432],"label":"Canoeing"},{"segment":[74.9307488299532,110.95187978675611],"label":"Canoeing"},{"segment":[186.3923610101404,200.66488455148206],"label":"Canoeing"}]},"JQn65jxJgbE":{"duration":62.439,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=JQn65jxJgbE","annotations":[]},"7pSlF1snFmQ":{"duration":31.39,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=7pSlF1snFmQ","annotations":[]},"vOJtbsRKkzc":{"duration":236.31,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vOJtbsRKkzc","annotations":[]},"5gBRDguUe8U":{"duration":143.054,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=5gBRDguUe8U","annotations":[{"segment":[0.20000000000000004,124.53672074882996],"label":"Assembling bicycle"}]},"37TrAhYUirU":{"duration":304.879,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=37TrAhYUirU","annotations":[]},"kXCtmH4V0TI":{"duration":12.59,"subset":"training","resolution":"426x236","url":"https://www.youtube.com/watch?v=kXCtmH4V0TI","annotations":[{"segment":[3.84820896099844,12.585215419501134],"label":"Hammer throw"}]},"y-7ZKlq2UFw":{"duration":147.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=y-7ZKlq2UFw","annotations":[{"segment":[12.43440561622465,24.8688112324493],"label":"Playing guitarra"},{"segment":[65.39576287051482,133.11222776911077],"label":"Playing guitarra"}]},"p1_6T9d4ZuY":{"duration":38.55,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=p1_6T9d4ZuY","annotations":[{"segment":[0,36.04405226209048],"label":"Using uneven bars"}]},"5ohc1_X2W80":{"duration":60.49,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=5ohc1_X2W80","annotations":[{"segment":[5.75957214,31.093276166420807],"label":"Mowing the lawn"},{"segment":[40.4354125273011,54.109250720000006],"label":"Mowing the lawn"}]},"TtrDWeSW3mg":{"duration":204.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TtrDWeSW3mg","annotations":[{"segment":[22.455698054602184,173.75295445085803],"label":"Preparing salad"}]},"b6QSzGwI9m4":{"duration":126.735,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b6QSzGwI9m4","annotations":[]},"kkEjwEXkz3w":{"duration":78.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kkEjwEXkz3w","annotations":[{"segment":[33.31686207800312,36.856015925117],"label":"Doing nails"},{"segment":[40.151090196567864,43.32412468018721],"label":"Doing nails"},{"segment":[46.37511937597504,50.28039258658347],"label":"Doing nails"},{"segment":[54.42974537285491,64.5590477628705],"label":"Doing nails"}]},"kWN4zFblj6o":{"duration":84.15,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=kWN4zFblj6o","annotations":[{"segment":[2.82041358,21.410989800000003],"label":"Doing step aerobics"},{"segment":[28.416316780000002,62.23108798],"label":"Doing step aerobics"}]},"KmJFUfqGKZA":{"duration":219.544,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KmJFUfqGKZA","annotations":[{"segment":[8.902505460218409,14.380970358814352],"label":"Scuba diving"},{"segment":[33.55559750390015,43.48531513260531],"label":"Scuba diving"},{"segment":[109.22689391575663,115.04776287051482],"label":"Scuba diving"},{"segment":[139.35845085803433,144.83691575663028],"label":"Scuba diving"}]},"rhHvxiCxEis":{"duration":28.282,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=rhHvxiCxEis","annotations":[]},"7hDCO9fJb90":{"duration":113.45,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7hDCO9fJb90","annotations":[{"segment":[20.70821484287943,98.58526088767552],"label":"Using uneven bars"}]},"96kej9yK8To":{"duration":129.24,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=96kej9yK8To","annotations":[{"segment":[0,119.56369345865835],"label":"Playing violin"}]},"-nlAKyoyIuU":{"duration":213.671,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-nlAKyoyIuU","annotations":[{"segment":[54.32172074882996,62.98653510140406],"label":"Sumo"},{"segment":[79.98290171606865,90.98055070202808],"label":"Sumo"},{"segment":[139.97007800312014,147.968368174727],"label":"Sumo"}]},"3vs3ofTw1vY":{"duration":220.915,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3vs3ofTw1vY","annotations":[{"segment":[16.883290171606863,210.17973478939155],"label":"Snow tubing"}]},"j7hSNqcWIO4":{"duration":116.84,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=j7hSNqcWIO4","annotations":[{"segment":[6.379872718718273,26.795465418616743],"label":"Pole vault"},{"segment":[32.44620982662436,45.93508357477156],"label":"Pole vault"},{"segment":[52.86180252652283,55.04918745865481],"label":"Pole vault"},{"segment":[57.23657239078679,59.423957322918774],"label":"Pole vault"},{"segment":[61.611342255050744,73.8242414594543],"label":"Pole vault"},{"segment":[76.37619054694161,80.75096041120557],"label":"Pole vault"},{"segment":[83.12062742101521,87.31311461622464],"label":"Pole vault"},{"segment":[90.0473457628705,97.70319297347893],"label":"Pole vault"},{"segment":[102.07796280811233,116.842811],"label":"Pole vault"}]},"-od9zDziq9U":{"duration":180.071,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-od9zDziq9U","annotations":[{"segment":[29.488619344773788,171.64312178745655],"label":"Baton twirling"}]},"ysiKsmRwfx0":{"duration":130.66,"subset":"testing","resolution":"638x360","url":"https://www.youtube.com/watch?v=ysiKsmRwfx0","annotations":[]},"AWXdK-ix3gQ":{"duration":47.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AWXdK-ix3gQ","annotations":[{"segment":[29.046171279999996,44.84936817472699],"label":"Using the pommel horse"},{"segment":[1.9467784711388456,21.163072839999998],"label":"Using the pommel horse"}]},"iqAyzgsha_M":{"duration":134.91,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iqAyzgsha_M","annotations":[]},"tDG8VoRQTuM":{"duration":174.197,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=tDG8VoRQTuM","annotations":[{"segment":[43.68469168768233,164.9707611559681],"label":"Powerbocking"}]},"7coVNZXiU6U":{"duration":189.73,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=7coVNZXiU6U","annotations":[{"segment":[75.4777123329831,92.6451920008773],"label":"Getting a haircut"},{"segment":[103.00487800736519,111.88460887006909],"label":"Getting a haircut"},{"segment":[122.54028590531375,158.05920935612934],"label":"Getting a haircut"}]},"mtU66vCjVVs":{"duration":69.36,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=mtU66vCjVVs","annotations":[{"segment":[6.432683307332294,63.29976599063963],"label":"Playing flauta"}]},"fVL37oteUQY":{"duration":208.121,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=fVL37oteUQY","annotations":[]},"5aMigcn2cU8":{"duration":196.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5aMigcn2cU8","annotations":[{"segment":[14.975704577223087,183.07034779095164],"label":"Zumba"}]},"5w841zW9R5A":{"duration":102.469,"subset":"testing","resolution":"400x300","url":"https://www.youtube.com/watch?v=5w841zW9R5A","annotations":[]},"5OhIRnEFMyw":{"duration":217.5,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=5OhIRnEFMyw","annotations":[{"segment":[68.70973478939158,89.06817472698908],"label":"Playing violin"},{"segment":[101.62254602184088,124.69544461778473],"label":"Playing violin"},{"segment":[140.6428892355694,202.05751638065524],"label":"Playing violin"}]},"V35ubrbe8gA":{"duration":223.562,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V35ubrbe8gA","annotations":[{"segment":[27.894040561622468,223.501],"label":"Trimming branches or hedges"}]},"5rw6n16ILgY":{"duration":155.57,"subset":"validation","resolution":"1920x1214","url":"https://www.youtube.com/watch?v=5rw6n16ILgY","annotations":[{"segment":[8.25195892189429,38.832747867737844],"label":"High jump"},{"segment":[50.725276902232544,86.64556859906396],"label":"High jump"},{"segment":[97.32457425273012,135.91461741029642],"label":"High jump"}]},"ciW2Eb82vrY":{"duration":128.38,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ciW2Eb82vrY","annotations":[]},"fUivipdikFU":{"duration":170.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fUivipdikFU","annotations":[{"segment":[48.64584276000001,152.69517132000001],"label":"Doing step aerobics"}]},"BY5mz30u5_8":{"duration":32.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BY5mz30u5_8","annotations":[{"segment":[7.140588829953199,32.28566235257411],"label":"Windsurfing"}]},"T7fzZX0qKKQ":{"duration":50.09,"subset":"validation","resolution":"480x720","url":"https://www.youtube.com/watch?v=T7fzZX0qKKQ","annotations":[{"segment":[6.1522602399999995,45.57738138923557],"label":"Playing bagpipes"}]},"e5VrEH2X8LI":{"duration":93.948,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e5VrEH2X8LI","annotations":[{"segment":[0.20000000000000004,93.747686],"label":"Installing carpet"}]},"Xpb6vHnzZJ0":{"duration":79.41,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=Xpb6vHnzZJ0","annotations":[]},"iiY3TE2O1QE":{"duration":38.801,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iiY3TE2O1QE","annotations":[{"segment":[6.779402982839314,25.362230801872077],"label":"Beach soccer"}]},"jbCQY7V7O1s":{"duration":165.745,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jbCQY7V7O1s","annotations":[]},"cnDeHeGJo_A":{"duration":157.57,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cnDeHeGJo_A","annotations":[]},"altXks0a0qY":{"duration":35.69,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=altXks0a0qY","annotations":[{"segment":[3.3302051390561935,32.136485582322805],"label":"Camel ride"}]},"xuoWaq6XPZo":{"duration":79.715,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=xuoWaq6XPZo","annotations":[{"segment":[0.01,77.60002281435257],"label":"Playing drums"}]},"HGduo1zU6Ok":{"duration":216.225,"subset":"validation","resolution":"655x480","url":"https://www.youtube.com/watch?v=HGduo1zU6Ok","annotations":[{"segment":[15.849558502340093,192.57309048361935],"label":"Hurling"}]},"SndKvA_2DcE":{"duration":162.47,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SndKvA_2DcE","annotations":[{"segment":[37.639279505874114,41.694690092365605],"label":"Rafting"},{"segment":[79.71416434072329,86.55766884165368],"label":"Rafting"},{"segment":[123.30982691497661,129.64640589313575],"label":"Rafting"}]},"e1tC-0BSwV4":{"duration":87.4,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e1tC-0BSwV4","annotations":[{"segment":[5.999338612792513,70.9012745148206],"label":"Making a lemonade"}]},"HZWdMK6zhec":{"duration":79.6,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=HZWdMK6zhec","annotations":[{"segment":[0,79.5980045351474],"label":"Playing lacrosse"}]},"iUMDlxU14bM":{"duration":134.189,"subset":"validation","resolution":"1280x684","url":"https://www.youtube.com/watch?v=iUMDlxU14bM","annotations":[{"segment":[0.2,122.50143571986209],"label":"Camel ride"}]},"E-bv464MTsQ":{"duration":27.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E-bv464MTsQ","annotations":[{"segment":[12.940758931357255,23.703370319812795],"label":"Playing polo"}]},"cXw6os9Xk5c":{"duration":34.087,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cXw6os9Xk5c","annotations":[{"segment":[0,34.061],"label":"Playing drums"}]},"Vk9Sh8kQF80":{"duration":7.8,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Vk9Sh8kQF80","annotations":[]},"oKJ1BnvHbbA":{"duration":34.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=oKJ1BnvHbbA","annotations":[]},"YTBmMSIczEc":{"duration":13.654,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YTBmMSIczEc","annotations":[{"segment":[1.1753947364236708,5.524779471626164],"label":"Playing ice hockey"},{"segment":[8.176843334554512,10.086329315862923],"label":"Playing ice hockey"}]},"cGtK7bs-TE8":{"duration":60.442,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cGtK7bs-TE8","annotations":[{"segment":[5.940434429017161,39.32001836349454],"label":"Raking leaves"}]},"EZZMYzY6Pug":{"duration":33.089,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=EZZMYzY6Pug","annotations":[{"segment":[3.8715016380655225,33.08843537414966],"label":"Waxing skis"}]},"RGVDeXqN1ao":{"duration":164.42000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RGVDeXqN1ao","annotations":[{"segment":[14.155690268822797,161.9289879806608],"label":"Wrapping presents"}]},"Aa33vHLEXJA":{"duration":73.491,"subset":"validation","resolution":"512x288","url":"https://www.youtube.com/watch?v=Aa33vHLEXJA","annotations":[{"segment":[3.554174493510353,10.54787248049922],"label":"Bullfighting"},{"segment":[24.076665444617788,35.08314107644306],"label":"Bullfighting"},{"segment":[37.94941077223089,68.90512348673948],"label":"Bullfighting"}]},"iiqaJGokpEw":{"duration":60.697,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=iiqaJGokpEw","annotations":[{"segment":[26.173048286604363,28.06279906542056],"label":"Hitting a pinata"}]},"WiJMIl5Tp3A":{"duration":168.0,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=WiJMIl5Tp3A","annotations":[{"segment":[6.290035764695895,129.99407178783153],"label":"Tumbling"}]},"uID_HFDKFKw":{"duration":69.34,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=uID_HFDKFKw","annotations":[{"segment":[0,67.06737974882995],"label":"Playing kickball"}]},"ILeNuidJc00":{"duration":76.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ILeNuidJc00","annotations":[{"segment":[6.06080424,76.904488],"label":"Plataform diving"}]},"iiyYRmc1YiA":{"duration":89.049,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=iiyYRmc1YiA","annotations":[{"segment":[19.726697819032758,80.4349157550702],"label":"Arm wrestling"}]},"b16KVkNtcwc":{"duration":142.014,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=b16KVkNtcwc","annotations":[]},"ae-aDWHvAGc":{"duration":127.501,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ae-aDWHvAGc","annotations":[{"segment":[2.3857909516380658,31.810546021840874],"label":"Shuffleboard"},{"segment":[41.751341653666145,125.25402496099845],"label":"Shuffleboard"}]},"OO3NO29L50U":{"duration":204.59,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OO3NO29L50U","annotations":[{"segment":[0,195.65386569578783],"label":"Doing nails"}]},"WuoK1WKaF1U":{"duration":78.37,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=WuoK1WKaF1U","annotations":[]},"WwDGutCfElc":{"duration":106.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WwDGutCfElc","annotations":[{"segment":[0,97.38383623712949],"label":"Playing flauta"}]},"M7GusD6qyGU":{"duration":96.9,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=M7GusD6qyGU","annotations":[]},"FZix27qFZTc":{"duration":20.736,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FZix27qFZTc","annotations":[{"segment":[8.287717122243594,16.123963738106735],"label":"Painting fence"}]},"YozbZM_nA0c":{"duration":35.83,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YozbZM_nA0c","annotations":[{"segment":[10.06838456,33.201346436817474],"label":"Hopscotch"}]},"9Oga_D8AnNI":{"duration":224.027,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9Oga_D8AnNI","annotations":[]},"xCedPpnP6Wg":{"duration":15.117,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xCedPpnP6Wg","annotations":[{"segment":[0.7287739165556153,15.116189],"label":"Powerbocking"}]},"xUKQWr2aLTU":{"duration":194.328,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=xUKQWr2aLTU","annotations":[{"segment":[77.55283779131216,100.24921629598505],"label":"BMX"}]},"2AE847UXu3Q":{"duration":121.37,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=2AE847UXu3Q","annotations":[{"segment":[14.200940282509258,30.67403101022],"label":"Brushing hair"}]},"o8gBXh-SuTI":{"duration":16.58,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o8gBXh-SuTI","annotations":[]},"t7WI6H6UVG4":{"duration":117.539,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=t7WI6H6UVG4","annotations":[{"segment":[28.15096299587746,105.10911380248834],"label":"Making a cake"}]},"6jxqb8FyWnQ":{"duration":40.125,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6jxqb8FyWnQ","annotations":[{"segment":[4.950305566825331,21.976436661989137],"label":"Arm wrestling"}]},"Y6UKk3t8Hj8":{"duration":37.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y6UKk3t8Hj8","annotations":[{"segment":[11.884695220000001,26.498137379999996],"label":"Doing kickboxing"}]},"rBnygEUFOvE":{"duration":54.15,"subset":"training","resolution":"612x360","url":"https://www.youtube.com/watch?v=rBnygEUFOvE","annotations":[{"segment":[5.989365054602184,39.394837753510146],"label":"Using uneven bars"}]},"fmRio4-6Xqc":{"duration":10.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fmRio4-6Xqc","annotations":[{"segment":[0,10.054239],"label":"Doing motocross"}]},"6d8bmara2Wc":{"duration":150.93,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=6d8bmara2Wc","annotations":[]},"UCZGbQFg6io":{"duration":117.82,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=UCZGbQFg6io","annotations":[{"segment":[0,117.701666],"label":"Long jump"}]},"jdYq4TB888Y":{"duration":67.08,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jdYq4TB888Y","annotations":[]},"yBjsQN4fzVY":{"duration":164.79,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=yBjsQN4fzVY","annotations":[{"segment":[20.823952352574103,34.70658725429017],"label":"Brushing teeth"},{"segment":[82.78163774726988,83.55289524180967],"label":"Brushing teeth"},{"segment":[82.26746608424338,83.80998107332294],"label":"Brushing teeth"},{"segment":[88.95169770358815,94.60758599687989],"label":"Brushing teeth"},{"segment":[116.7169675070203,118.00239666458658],"label":"Brushing teeth"}]},"rpDBtiySZ3o":{"duration":33.948,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rpDBtiySZ3o","annotations":[{"segment":[8.837452418096722,33.815162246489855],"label":"Playing beach volleyball"}]},"a74RMGL_c8E":{"duration":31.998,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=a74RMGL_c8E","annotations":[{"segment":[5.840343575663026,18.269792723868953],"label":"Hurling"}]},"VOTBtV0CQ8c":{"duration":222.331,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VOTBtV0CQ8c","annotations":[{"segment":[22.99497518952707,217.27480462063875],"label":"Waterskiing"}]},"p_88hssiQ9c":{"duration":116.495,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p_88hssiQ9c","annotations":[]},"6skP3w9WDIM":{"duration":95.78,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=6skP3w9WDIM","annotations":[{"segment":[6.3068218976161345,89.55686958024059],"label":"Wrapping presents"}]},"WGewgs24UZg":{"duration":193.03,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WGewgs24UZg","annotations":[]},"soeRPsbkfas":{"duration":63.32,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=soeRPsbkfas","annotations":[{"segment":[7.369814799999999,62.73042608658346],"label":"Long jump"}]},"hG4VsZQQ2_c":{"duration":49.06,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hG4VsZQQ2_c","annotations":[]},"bJkemB0CcKo":{"duration":110.133,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=bJkemB0CcKo","annotations":[{"segment":[8.645895973907269,105.56913163590104],"label":"Making an omelette"}]},"rRybNmeLtpA":{"duration":191.89,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rRybNmeLtpA","annotations":[]},"kOIj7AgonHM":{"duration":177.47,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kOIj7AgonHM","annotations":[{"segment":[93.66926901872075,168.66002796723868],"label":"Brushing hair"}]},"5qublDzEgsw":{"duration":130.845,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5qublDzEgsw","annotations":[{"segment":[2.8577569769457445,82.46670042433698],"label":"Futsal"},{"segment":[88.18221431513261,118.80103873010923],"label":"Futsal"},{"segment":[122.67942315600627,129.21143903120128],"label":"Futsal"}]},"2fzIi1lPd8U":{"duration":23.78,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=2fzIi1lPd8U","annotations":[]},"SSLcbqaBiRM":{"duration":208.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SSLcbqaBiRM","annotations":[{"segment":[22.034074179999998,116.62431126999999],"label":"Doing motocross"},{"segment":[137.11228125,193.73342111856473],"label":"Doing motocross"}]},"BVzW_OoUmnU":{"duration":84.27,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BVzW_OoUmnU","annotations":[{"segment":[12.9443751950078,22.800498439937595],"label":"Starting a campfire"},{"segment":[25.823042901716068,69.32140015600623],"label":"Starting a campfire"}]},"W5BD9287ZCY":{"duration":43.7,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=W5BD9287ZCY","annotations":[]},"wbrnTczxvVw":{"duration":234.847,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wbrnTczxvVw","annotations":[{"segment":[38.13052262689465,88.22877652097566],"label":"Hula hoop"}]},"U-N92yQynQA":{"duration":39.59,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=U-N92yQynQA","annotations":[{"segment":[22.350138600000005,36.83436037441498],"label":"Playing badminton"},{"segment":[5.446684867394696,20.5004938],"label":"Playing badminton"}]},"9svdYGBSMvM":{"duration":62.369,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=9svdYGBSMvM","annotations":[{"segment":[5.055681747269891,57.08918408736349],"label":"Carving jack-o-lanterns"}]},"52PO939EtGw":{"duration":64.25,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=52PO939EtGw","annotations":[{"segment":[0.01,64.201],"label":"Painting fence"}]},"U0Q10ZdqnJo":{"duration":13.82,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=U0Q10ZdqnJo","annotations":[{"segment":[1.638075425897036,3.5563479641185647],"label":"Tennis serve with ball bouncing"},{"segment":[10.79836563650546,12.587316430577223],"label":"Tennis serve with ball bouncing"}]},"fYuJdh9gqr0":{"duration":63.786,"subset":"training","resolution":"990x720","url":"https://www.youtube.com/watch?v=fYuJdh9gqr0","annotations":[{"segment":[16.208709828393136,26.658265210608423],"label":"Plastering"}]},"UMKGRC2QMFo":{"duration":164.1,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UMKGRC2QMFo","annotations":[]},"9SEc3QVBCo4":{"duration":24.032,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=9SEc3QVBCo4","annotations":[{"segment":[0,24.032652],"label":"Roof shingle removal"}]},"cFOaL4Ir4hg":{"duration":126.55,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=cFOaL4Ir4hg","annotations":[]},"5z6q_Ix3kWI":{"duration":146.704,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=5z6q_Ix3kWI","annotations":[{"segment":[4.57600624024961,76.20089391575664],"label":"Skiing"},{"segment":[86.72714352574103,137.5277394695788],"label":"Skiing"}]},"ISJodiRZ_uo":{"duration":130.85,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ISJodiRZ_uo","annotations":[{"segment":[10.4544356,118.53245939625586],"label":"Grooming horse"}]},"Jv7AjsdOk2I":{"duration":190.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jv7AjsdOk2I","annotations":[{"segment":[2.9699375975039,138.69608580343214],"label":"Skateboarding"}]},"PBzlHfEMU5s":{"duration":19.575,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PBzlHfEMU5s","annotations":[{"segment":[0,17.803255631825273],"label":"Calf roping"}]},"3gpgeAMy6a8":{"duration":36.827,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=3gpgeAMy6a8","annotations":[{"segment":[0.01,36.826848],"label":"Kite flying"}]},"QlTddnlIJpA":{"duration":74.65,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=QlTddnlIJpA","annotations":[{"segment":[8.963833073322933,13.736783151326051],"label":"Horseback riding"},{"segment":[36.67022620904836,44.120684867394694],"label":"Horseback riding"},{"segment":[53.31734477379095,74.621],"label":"Horseback riding"}]},"9nE6C-efQdc":{"duration":142.014,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9nE6C-efQdc","annotations":[{"segment":[16.610101404056163,30.12389391575663],"label":"Rock-paper-scissors"}]},"hlwSgE7Epwo":{"duration":117.168,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hlwSgE7Epwo","annotations":[{"segment":[23.04534127781366,31.988061854137648],"label":"Baton twirling"},{"segment":[44.033358956941385,51.33353901924668],"label":"Baton twirling"},{"segment":[59.72874609089777,71.40903419058624],"label":"Baton twirling"},{"segment":[84.36685380117814,86.55690781986972],"label":"Baton twirling"}]},"ch_jiSTNiEE":{"duration":133.376,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ch_jiSTNiEE","annotations":[]},"wSGrM1nrWq4":{"duration":144.939,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wSGrM1nrWq4","annotations":[{"segment":[15.601853190327612,125.49316696567861],"label":"Blow-drying hair"}]},"cROJALtLB1k":{"duration":33.414,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cROJALtLB1k","annotations":[{"segment":[6.661790951638065,10.51313884555382],"label":"Using the rowing machine"},{"segment":[12.907219968798751,16.550386895475818],"label":"Using the rowing machine"},{"segment":[19.100603744149765,26.7512542901716],"label":"Using the rowing machine"}]},"EN63ldqfGsI":{"duration":229.88,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=EN63ldqfGsI","annotations":[{"segment":[28.689789329173166,225.573468600624],"label":"Zumba"}]},"8ZyBER6U9Wc":{"duration":85.589,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8ZyBER6U9Wc","annotations":[{"segment":[0.01,79.457805128845],"label":"Rope skipping"}]},"vS0ppdYTwTc":{"duration":110.295,"subset":"validation","resolution":"632x480","url":"https://www.youtube.com/watch?v=vS0ppdYTwTc","annotations":[{"segment":[52.65242370982839,87.58197277223088],"label":"Braiding hair"}]},"assDWZW6zTA":{"duration":99.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=assDWZW6zTA","annotations":[{"segment":[48.85478315132605,78.09064742589703],"label":"Layup drill in basketball"}]},"L5nqeFWufrE":{"duration":52.39,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=L5nqeFWufrE","annotations":[{"segment":[35.71279624336974,43.6398928923557],"label":"Plataform diving"}]},"ush7vIeB2PA":{"duration":173.62,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ush7vIeB2PA","annotations":[]},"4efxBizoKyQ":{"duration":26.866,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=4efxBizoKyQ","annotations":[{"segment":[4.652213644305773,25.230924449297973],"label":"Painting furniture"}]},"98SSjNuSxvM":{"duration":193.446,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=98SSjNuSxvM","annotations":[{"segment":[27.14054602184087,161.33546801872075],"label":"Using the rowing machine"}]},"ubNDaGOws0E":{"duration":122.39,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ubNDaGOws0E","annotations":[{"segment":[1.909397518757893,122.392379],"label":"Cumbia"}]},"dL9mlqbG5CU":{"duration":219.127,"subset":"training","resolution":"384x216","url":"https://www.youtube.com/watch?v=dL9mlqbG5CU","annotations":[{"segment":[27.689958926677065,207.1619149329173],"label":"Curling"}]},"j56eH9M0ObY":{"duration":141.94,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=j56eH9M0ObY","annotations":[{"segment":[3.3184399375975038,137.825872074883],"label":"Breakdancing"}]},"9E5oElxdPXA":{"duration":20.62,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=9E5oElxdPXA","annotations":[]},"ERGoTBC8NkA":{"duration":60.19,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ERGoTBC8NkA","annotations":[{"segment":[43.472970394695786,59.71665191505619],"label":"Brushing teeth"}]},"xmvwhxB1q30":{"duration":192.262,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xmvwhxB1q30","annotations":[{"segment":[0.2,192.001],"label":"Canoeing"}]},"STFtlC29FLo":{"duration":12.214,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=STFtlC29FLo","annotations":[]},"7UZWLp3MvLE":{"duration":11.75,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7UZWLp3MvLE","annotations":[]},"uhiQp0GCeKg":{"duration":187.27,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=uhiQp0GCeKg","annotations":[{"segment":[65.41813416536661,181.08739469578782],"label":"Washing face"}]},"ko89yQozE-4":{"duration":218.41,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ko89yQozE-4","annotations":[{"segment":[55.53214196567863,198.96178471138845],"label":"Windsurfing"}]},"sbr3HKm2Y9I":{"duration":196.581,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=sbr3HKm2Y9I","annotations":[{"segment":[0.2,176.70811046722426],"label":"Drum corps"}]},"lgXtDr9pNAk":{"duration":63.995,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lgXtDr9pNAk","annotations":[{"segment":[0.01,56.10723493973772],"label":"Fixing the roof"}]},"ZwK52lXCbhA":{"duration":148.32999999999998,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZwK52lXCbhA","annotations":[]},"w3N0Pyz2-m0":{"duration":144.2,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w3N0Pyz2-m0","annotations":[{"segment":[5.397603744149766,13.943809672386896],"label":"Playing violin"},{"segment":[31.036221528861155,48.353533541341655],"label":"Playing violin"},{"segment":[58.923840873634944,69.269248049922],"label":"Playing violin"},{"segment":[76.24115288611544,80.06445553822152],"label":"Playing violin"},{"segment":[91.30946333853355,107.72717472698908],"label":"Playing violin"}]},"WpKQV53ENHE":{"duration":206.518,"subset":"validation","resolution":"400x226","url":"https://www.youtube.com/watch?v=WpKQV53ENHE","annotations":[{"segment":[38.045918930818694,58.311731124855186],"label":"Rafting"},{"segment":[196.9556193288883,200.8157740017855],"label":"Rafting"}]},"UhgVO1QaP2s":{"duration":166.673,"subset":"validation","resolution":"1280x712","url":"https://www.youtube.com/watch?v=UhgVO1QaP2s","annotations":[{"segment":[40.823143184087364,102.96792803120125],"label":"Sumo"}]},"7OcxT66BxX0":{"duration":130.17,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7OcxT66BxX0","annotations":[]},"_kj3B0T_TE0":{"duration":65.2,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_kj3B0T_TE0","annotations":[{"segment":[0.5348369437600964,64.64149114936757],"label":"Cleaning shoes"}]},"uBkJa679ffE":{"duration":208.376,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uBkJa679ffE","annotations":[{"segment":[93.9797822628922,120.55337667875041],"label":"Putting in contact lenses"}]},"kL-MVsMzDUU":{"duration":186.503,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=kL-MVsMzDUU","annotations":[]},"_QTQEw1b_-U":{"duration":157.501,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_QTQEw1b_-U","annotations":[{"segment":[58.70265054602184,144.17947113884557],"label":"Using the monkey bar"}]},"c7Rxe5IzUQw":{"duration":171.39,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=c7Rxe5IzUQw","annotations":[{"segment":[3.340893135725429,171.321],"label":"Playing guitarra"}]},"zvoDDqvr2DU":{"duration":18.762,"subset":"testing","resolution":"360x240","url":"https://www.youtube.com/watch?v=zvoDDqvr2DU","annotations":[]},"Dxgmm4BqjX0":{"duration":211.232,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Dxgmm4BqjX0","annotations":[]},"8_jbsmj5Z9w":{"duration":105.0,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=8_jbsmj5Z9w","annotations":[{"segment":[24.243516331129438,98.44833320951886],"label":"Shaving"}]},"iXaMOysrAuA":{"duration":13.259,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=iXaMOysrAuA","annotations":[]},"mOISOUKHpNM":{"duration":50.43,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=mOISOUKHpNM","annotations":[{"segment":[3.068511573115986,47.36522992348266],"label":"Using uneven bars"}]},"Ek2OvKMdvys":{"duration":215.969,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ek2OvKMdvys","annotations":[{"segment":[126.55130255555959,143.70406423780258],"label":"Doing crunches"}]},"WqGRUZ1E3CY":{"duration":54.59,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=WqGRUZ1E3CY","annotations":[]},"d0mKlsMsmZM":{"duration":208.167,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=d0mKlsMsmZM","annotations":[]},"1t0yXLlX1XE":{"duration":84.869,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1t0yXLlX1XE","annotations":[{"segment":[0.2,83.0182301246106],"label":"Playing ten pins"}]},"h67ctuwV-Nw":{"duration":190.195,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=h67ctuwV-Nw","annotations":[{"segment":[18.391173166926677,142.67990795631823],"label":"Canoeing"}]},"DeoqC3oVV38":{"duration":201.758,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DeoqC3oVV38","annotations":[{"segment":[7.868884007060963,193.88930006864274],"label":"River tubing"}]},"huI9TvxZxmM":{"duration":107.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=huI9TvxZxmM","annotations":[{"segment":[7.368455538221529,103.32584243369735],"label":"Tango"}]},"fgEMvRrOCRI":{"duration":97.524,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=fgEMvRrOCRI","annotations":[{"segment":[0.3042849709828393,96.00190847633905],"label":"Hurling"}]},"xBH0D39dyf8":{"duration":48.252,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=xBH0D39dyf8","annotations":[]},"o0cVs7THLi8":{"duration":75.047,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=o0cVs7THLi8","annotations":[{"segment":[33.015949806552264,40.04061997815913],"label":"Curling"},{"segment":[41.91386535725429,43.787110736349454],"label":"Curling"},{"segment":[45.42620044305772,48.82145769266771],"label":"Curling"},{"segment":[58.773073769110766,65.91482177691108],"label":"Curling"}]},"vX8L1RcZ4cc":{"duration":52.57,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vX8L1RcZ4cc","annotations":[{"segment":[4.590165366614665,36.55738845553822],"label":"Doing karate"}]},"PV6OCwbp6nQ":{"duration":119.746,"subset":"training","resolution":"836x480","url":"https://www.youtube.com/watch?v=PV6OCwbp6nQ","annotations":[{"segment":[69.4359996438704,77.08328482633591],"label":"Rope skipping"},{"segment":[97.60039315966924,113.08148399300258],"label":"Rope skipping"}]},"Gms3Yt6RrV4":{"duration":149.026,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Gms3Yt6RrV4","annotations":[{"segment":[32.3019953198128,141.77599063962558],"label":"Longboarding"}]},"FHExUjhAej4":{"duration":43.54,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FHExUjhAej4","annotations":[]},"Rd18n3PeZvk":{"duration":3.37,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=Rd18n3PeZvk","annotations":[{"segment":[0.12715132605304214,2.7708393135725427],"label":"Smoking hookah"}]},"2-AMrzL7V-U":{"duration":114.38,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2-AMrzL7V-U","annotations":[]},"ysoLva-xTGo":{"duration":210.211,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ysoLva-xTGo","annotations":[]},"KsKUIIHiMg0":{"duration":164.91,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KsKUIIHiMg0","annotations":[{"segment":[27.141663712807016,64.95990604250021],"label":"Hand washing clothes"}]},"86Unahb4sJM":{"duration":187.55,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=86Unahb4sJM","annotations":[]},"RjFwq94RgpM":{"duration":43.1,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=RjFwq94RgpM","annotations":[]},"_4CLYKFzmoY":{"duration":231.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_4CLYKFzmoY","annotations":[{"segment":[0,231.7815873015873],"label":"Playing violin"}]},"ZIBWRRBft8g":{"duration":170.365,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZIBWRRBft8g","annotations":[{"segment":[31.09622824024961,164.51765197191887],"label":"Laying tile"}]},"oDD_TyfKjNc":{"duration":76.139,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oDD_TyfKjNc","annotations":[]},"lVu-4SKcb4c":{"duration":17.88,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=lVu-4SKcb4c","annotations":[{"segment":[0,17.87936507936508],"label":"Throwing darts"}]},"V0e5tItt1RM":{"duration":224.793,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=V0e5tItt1RM","annotations":[{"segment":[52.311498077267835,62.81581485296877],"label":"Rock-paper-scissors"},{"segment":[113.23653537633325,120.93970101184728],"label":"Rock-paper-scissors"}]},"OkXrN2_hI68":{"duration":154.947,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OkXrN2_hI68","annotations":[]},"w8j8UXYSOx8":{"duration":185.783,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=w8j8UXYSOx8","annotations":[]},"PBxI7l0AqAY":{"duration":154.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PBxI7l0AqAY","annotations":[{"segment":[88.64907765290202,112.43297653538794],"label":"Washing hands"}]},"4-_ZQGwppfI":{"duration":224.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4-_ZQGwppfI","annotations":[{"segment":[108.35459617024136,189.0962468648406],"label":"Braiding hair"}]},"HTSxgkgecS0":{"duration":190.427,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HTSxgkgecS0","annotations":[]},"BJuL-yK-I8g":{"duration":105.836,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BJuL-yK-I8g","annotations":[{"segment":[11.062479018720747,33.187437056162246],"label":"Swimming"},{"segment":[38.305897497659906,61.75174855226209],"label":"Swimming"}]},"7hvq4VqEGCE":{"duration":180.164,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7hvq4VqEGCE","annotations":[{"segment":[0.01,180.101],"label":"Putting in contact lenses"}]},"A4L4ObzZ5VE":{"duration":9.36,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A4L4ObzZ5VE","annotations":[{"segment":[0,9.35764],"label":"Doing motocross"}]},"x2HPIysha58":{"duration":216.39,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x2HPIysha58","annotations":[]},"xf_exEkpJe8":{"duration":167.76,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xf_exEkpJe8","annotations":[{"segment":[0.01,163.0531646380655],"label":"Cumbia"}]},"ND9mMyNjm5M":{"duration":78.09,"subset":"validation","resolution":"854x468","url":"https://www.youtube.com/watch?v=ND9mMyNjm5M","annotations":[{"segment":[4.045642395292859,76.9998480027695],"label":"Grooming horse"}]},"JksdZ0YMkoQ":{"duration":127.223,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JksdZ0YMkoQ","annotations":[{"segment":[0.2,118.45094695787832],"label":"Changing car wheel"}]},"u00zk79cJDY":{"duration":232.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=u00zk79cJDY","annotations":[]},"JgfVp6Estgw":{"duration":186.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JgfVp6Estgw","annotations":[{"segment":[43.57277691107644,137.98046021840872],"label":"Archery"},{"segment":[157.75039469578783,161.817624024961],"label":"Archery"}]},"pfSWDHsAhME":{"duration":83.801,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=pfSWDHsAhME","annotations":[]},"HzSCfBOefA4":{"duration":30.02,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=HzSCfBOefA4","annotations":[{"segment":[4.7306763454211636,10.023413246733952],"label":"Washing dishes"},{"segment":[15.784533229329172,27.587804368174726],"label":"Washing dishes"}]},"aQ-F9wr0HQ4":{"duration":30.98,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=aQ-F9wr0HQ4","annotations":[]},"rojFvhZw_z8":{"duration":161.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rojFvhZw_z8","annotations":[{"segment":[0.7566973478939159,129.89971138845556],"label":"Bathing dog"}]},"bGql7ldp84A":{"duration":189.661,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bGql7ldp84A","annotations":[{"segment":[39.64823524492979,180.4882350702028],"label":"Baking cookies"}]},"ti3EHJLR2mU":{"duration":207.42000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ti3EHJLR2mU","annotations":[{"segment":[51.45134727838093,186.7133797460742],"label":"Preparing pasta"}]},"P7UbKv72LAs":{"duration":218.059,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P7UbKv72LAs","annotations":[{"segment":[38.0413746724027,68.61033646272631],"label":"Applying sunscreen"},{"segment":[88.31033406093486,130.7672254363843],"label":"Applying sunscreen"},{"segment":[137.89998248909657,187.1499762352025],"label":"Applying sunscreen"}]},"bpg2PC2neQM":{"duration":218.66,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=bpg2PC2neQM","annotations":[{"segment":[5.627375195007801,96.8891322],"label":"Playing field hockey"},{"segment":[107.40458778,210.60025351014042],"label":"Playing field hockey"}]},"wRKFbuczjsw":{"duration":143.778,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=wRKFbuczjsw","annotations":[{"segment":[14.90191341653666,16.91871372854914],"label":"Disc dog"},{"segment":[22.520936817472695,24.313648205928235],"label":"Disc dog"},{"segment":[46.49845163806552,51.652496879875194],"label":"Disc dog"},{"segment":[54.341563962558496,55.9101864274571],"label":"Disc dog"},{"segment":[63.97738767550702,67.11463260530421],"label":"Disc dog"},{"segment":[0.2,143.441],"label":"Disc dog"},{"segment":[87.07066848673946,90.65659048361934],"label":"Disc dog"},{"segment":[108.8103205928237,113.74096333853353],"label":"Disc dog"},{"segment":[126.5158104524181,128.30877145085805],"label":"Disc dog"}]},"fWq7Ar4tsfo":{"duration":72.679,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fWq7Ar4tsfo","annotations":[]},"qGtUXGG78tM":{"duration":66.247,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qGtUXGG78tM","annotations":[]},"gEOVtyRAaFY":{"duration":135.396,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gEOVtyRAaFY","annotations":[{"segment":[75.61844276755072,76.88579097035883],"label":"Clipping cat claws"},{"segment":[94.6286658096724,98.00826101716069],"label":"Clipping cat claws"},{"segment":[114.27256295319815,120.6093039672387],"label":"Clipping cat claws"}]},"pCd9P32wX3I":{"duration":192.889,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pCd9P32wX3I","annotations":[]},"yWCrbhU_X_U":{"duration":117.192,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yWCrbhU_X_U","annotations":[{"segment":[1.6418831775700933,116.92099999999999],"label":"Having an ice cream"}]},"iLHVaeiPpuw":{"duration":199.228,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=iLHVaeiPpuw","annotations":[{"segment":[3.7296825716068644,9.324206429017162],"label":"Futsal"},{"segment":[40.40489452574103,103.49869136209048],"label":"Futsal"},{"segment":[117.48500100561623,169.07894324617786],"label":"Futsal"}]},"oKdzcTnENks":{"duration":32.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oKdzcTnENks","annotations":[{"segment":[6.973774421216848,27.192703138845552],"label":"Hopscotch"}]},"hwP4uLVR-uc":{"duration":110.295,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hwP4uLVR-uc","annotations":[{"segment":[0.2,110.09478299999999],"label":"Playing congas"}]},"FaYFlKz-E6U":{"duration":18.251,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FaYFlKz-E6U","annotations":[]},"sc_L4zUEb7E":{"duration":128.221,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sc_L4zUEb7E","annotations":[{"segment":[0.01,127.98102964118566],"label":"Snow tubing"}]},"f5nGfktddZI":{"duration":216.039,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=f5nGfktddZI","annotations":[]},"SrcZRhXkr2k":{"duration":94.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SrcZRhXkr2k","annotations":[{"segment":[0.01,71.74034321372856],"label":"Playing racquetball"}]},"MFvdMVCzfB0":{"duration":190.64,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=MFvdMVCzfB0","annotations":[{"segment":[51.74825899158416,166.5461203120125],"label":"Brushing teeth"}]},"e9p0xmsU3h0":{"duration":228.508,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e9p0xmsU3h0","annotations":[{"segment":[11.016689421200052,105.19166147326608],"label":"Fun sliding down"}]},"CMTiL1ctmDs":{"duration":46.44,"subset":"validation","resolution":"608x1080","url":"https://www.youtube.com/watch?v=CMTiL1ctmDs","annotations":[{"segment":[8.05374414976599,17.154836193447736],"label":"Smoking hookah"},{"segment":[33.767940717628704,42.79680187207488],"label":"Smoking hookah"}]},"ak8MVjE2p3Y":{"duration":81.92,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ak8MVjE2p3Y","annotations":[{"segment":[0.01,81.92],"label":"Smoking a cigarette"}]},"wd7W8NTi_58":{"duration":97.013,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wd7W8NTi_58","annotations":[{"segment":[9.212805295950155,72.81128037383176],"label":"Canoeing"}]},"QBI5ZH_cdik":{"duration":123.09,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QBI5ZH_cdik","annotations":[{"segment":[23.235166575663026,66.82510717628706],"label":"Doing motocross"},{"segment":[72.39386610764431,123.088775],"label":"Doing motocross"}]},"AS43SCztbKY":{"duration":226.19,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AS43SCztbKY","annotations":[]},"Mv2L6HxB2dk":{"duration":33.251,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mv2L6HxB2dk","annotations":[{"segment":[0.2,33],"label":"Rollerblading"}]},"lu1bVIcDn4c":{"duration":65.038,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lu1bVIcDn4c","annotations":[{"segment":[0.01,64.18946650977448],"label":"Making an omelette"}]},"Ktun1-2Y540":{"duration":179.096,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Ktun1-2Y540","annotations":[{"segment":[5.6546069620792,143.7233686443222],"label":"Hula hoop"}]},"qCnHgcP3tM8":{"duration":138.345,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qCnHgcP3tM8","annotations":[{"segment":[0.01,127.29733229329172],"label":"Beer pong"}]},"q3NOVbRyFfw":{"duration":208.608,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q3NOVbRyFfw","annotations":[{"segment":[22.448251170046802,208.341],"label":"River tubing"}]},"pH7Wwl7oyAc":{"duration":112.106,"subset":"testing","resolution":"608x342","url":"https://www.youtube.com/watch?v=pH7Wwl7oyAc","annotations":[]},"qKPXW7jD9N4":{"duration":87.099,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qKPXW7jD9N4","annotations":[]},"EQK_o1qHx7M":{"duration":178.237,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EQK_o1qHx7M","annotations":[{"segment":[21.966729139913895,87.58885669712502],"label":"Making a lemonade"},{"segment":[116.22902253777225,163.49919744149764],"label":"Making a lemonade"}]},"s4Ryxk3TxKA":{"duration":52.199,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=s4Ryxk3TxKA","annotations":[{"segment":[0.2,4.870772353833212],"label":"Painting fence"},{"segment":[9.092109467363539,29.22464031650817],"label":"Painting fence"}]},"2oizmWFx4PA":{"duration":161.89,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2oizmWFx4PA","annotations":[{"segment":[1.60593888,47.006090040000004],"label":"Fixing bicycle"},{"segment":[28.28646905876235,142.19001857217145],"label":"Fixing bicycle"}]},"o4zmTkyTu8g":{"duration":78.67,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o4zmTkyTu8g","annotations":[{"segment":[8.24293782,77.56464517940718],"label":"Volleyball"}]},"7X3wPRKuAsU":{"duration":91.39,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7X3wPRKuAsU","annotations":[{"segment":[3.4896255850234006,87.6679407176287],"label":"Cricket"}]},"aKhD_bFB7MA":{"duration":159.777,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aKhD_bFB7MA","annotations":[{"segment":[4.237442488175717,31.905449322734814],"label":"Powerbocking"},{"segment":[66.80203451947601,71.28873833048559],"label":"Powerbocking"},{"segment":[75.52618081866132,77.27100962558502],"label":"Powerbocking"},{"segment":[80.26214548205928,82.25623605304213],"label":"Powerbocking"},{"segment":[88.23850776599063,141.0819078970359],"label":"Powerbocking"}]},"VTwQgMELGVE":{"duration":52.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VTwQgMELGVE","annotations":[{"segment":[13.03204869,48.27575663026521],"label":"Drinking beer"}]},"8wKBvYGGldQ":{"duration":82.199,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8wKBvYGGldQ","annotations":[{"segment":[7.695735329626222,67.1395411383354],"label":"Carving jack-o-lanterns"}]},"62h9hnNTKWM":{"duration":135.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=62h9hnNTKWM","annotations":[{"segment":[55.31969266770671,128.51515600624026],"label":"Using the balance beam"}]},"J3DxJ8gI95U":{"duration":134.77,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J3DxJ8gI95U","annotations":[{"segment":[9.671361640860193,13.035313515941999],"label":"Preparing pasta"},{"segment":[17.870994336372092,117.31782164347797],"label":"Preparing pasta"}]},"zk7ZAIPkKHo":{"duration":191.356,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zk7ZAIPkKHo","annotations":[]},"duGxOdcqg3I":{"duration":198.16,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=duGxOdcqg3I","annotations":[{"segment":[91.04164213371256,95.67873765834987],"label":"Drinking beer"},{"segment":[109.89916393390429,110.82658303883176],"label":"Drinking beer"},{"segment":[116.39109766839653,118.24593587825144],"label":"Drinking beer"},{"segment":[171.41796304446177,172.34538214118564],"label":"Drinking beer"}]},"qknJO39WUGk":{"duration":33.228,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=qknJO39WUGk","annotations":[{"segment":[0.01,33.227754],"label":"Cleaning sink"}]},"1BrN3RGFXDs":{"duration":114.78,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=1BrN3RGFXDs","annotations":[]},"-ApIBtH_pVc":{"duration":114.312,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-ApIBtH_pVc","annotations":[{"segment":[0.01,114.281],"label":"Changing car wheel"}]},"FAaC_ZqY1MA":{"duration":35.016,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=FAaC_ZqY1MA","annotations":[]},"DbzOayCsDM4":{"duration":158.24,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DbzOayCsDM4","annotations":[]},"sR0hZeYm06I":{"duration":72.1,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=sR0hZeYm06I","annotations":[{"segment":[11.352652106084243,66.99188767550702],"label":"Getting a tattoo"}]},"FCe1NVTbaZ4":{"duration":10.937,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=FCe1NVTbaZ4","annotations":[{"segment":[0.2,4.4300780031201255],"label":"Doing crunches"}]},"5Lv0g7ISQVU":{"duration":59.07,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5Lv0g7ISQVU","annotations":[{"segment":[9.302872074882995,59.041],"label":"Playing kickball"}]},"kSdWy3subNE":{"duration":26.8,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kSdWy3subNE","annotations":[{"segment":[3.4696623369734794,14.254877794071763],"label":"Springboard diving"}]},"avBlEff5U5U":{"duration":58.608,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=avBlEff5U5U","annotations":[{"segment":[0.2,57.23570300090915],"label":"Cutting the grass"}]},"GKBYgS99oiI":{"duration":165.257,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GKBYgS99oiI","annotations":[{"segment":[21.398256833674708,69.86659761356441],"label":"Changing car wheel"},{"segment":[78.37433828237484,144.11596964742589],"label":"Changing car wheel"}]},"k5lyc7zsox8":{"duration":128.57,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=k5lyc7zsox8","annotations":[{"segment":[17.650642833073324,111.11881965366614],"label":"Archery"}]},"J332Dy1gQqU":{"duration":160.241,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=J332Dy1gQqU","annotations":[]},"s6fEIuHPJ4Y":{"duration":49.51,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=s6fEIuHPJ4Y","annotations":[]},"na4vSFfVi2s":{"duration":112.849,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=na4vSFfVi2s","annotations":[{"segment":[0.01,112.849001],"label":"Using the monkey bar"}]},"GQdkuWJGYFg":{"duration":165.094,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GQdkuWJGYFg","annotations":[{"segment":[39.406107166735644,54.34437001425635],"label":"Making a lemonade"},{"segment":[58.980382622107605,70.57041414173574],"label":"Making a lemonade"},{"segment":[73.66108921363656,81.64533314938039],"label":"Making a lemonade"}]},"7B1FZR0IA6M":{"duration":78.25,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7B1FZR0IA6M","annotations":[{"segment":[2.9298235113077995,59.084772673946965],"label":"Skateboarding"}]},"4-rJZdNgMX8":{"duration":137.9,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4-rJZdNgMX8","annotations":[{"segment":[31.185405616224646,85.61097971918876],"label":"Washing hands"}]},"IRz9PYgpb8U":{"duration":10.171,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=IRz9PYgpb8U","annotations":[{"segment":[0.01,10.170339],"label":"Trimming branches or hedges"}]},"7AtINKDyH2Q":{"duration":90.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7AtINKDyH2Q","annotations":[{"segment":[2.0218288946145075,25.75023493511295],"label":"Hurling"},{"segment":[33.89371156439644,40.21192619056466],"label":"Hurling"},{"segment":[46.67054558620329,60.71102253324379],"label":"Hurling"},{"segment":[69.83733254882011,80.9293093369821],"label":"Hurling"}]},"LpTVTiu8oro":{"duration":89.165,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=LpTVTiu8oro","annotations":[{"segment":[7.511528542774363,81.83148829953198],"label":"Cutting the grass"}]},"0RIc6mwDRaQ":{"duration":158.52,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0RIc6mwDRaQ","annotations":[{"segment":[0,88.0406488673947],"label":"Playing flauta"}]},"27OICfYnvnk":{"duration":8.685,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=27OICfYnvnk","annotations":[]},"Zzj03Cew2vk":{"duration":27.377,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zzj03Cew2vk","annotations":[{"segment":[6.49325185271379,9.989916416576719],"label":"Applying sunscreen"},{"segment":[10.885403682931859,13.44393872966083],"label":"Applying sunscreen"}]},"RBkieLmOUlQ":{"duration":36.781,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RBkieLmOUlQ","annotations":[{"segment":[3.7830046801872075,36.721],"label":"Capoeira"}]},"VhzPqd0Su5I":{"duration":95.295,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VhzPqd0Su5I","annotations":[{"segment":[18.721319812792512,95.261],"label":"Cutting the grass"}]},"bHxWHW2PGy4":{"duration":185.06,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=bHxWHW2PGy4","annotations":[{"segment":[49.702369909999994,144.59342316],"label":"Playing water polo"}]},"trUjrNzz4dg":{"duration":66.13,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=trUjrNzz4dg","annotations":[]},"unM25XJGBJg":{"duration":22.199,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=unM25XJGBJg","annotations":[]},"7pfHxav4b0U":{"duration":57.01,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=7pfHxav4b0U","annotations":[]},"91XkPU8A5hs":{"duration":77.32,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=91XkPU8A5hs","annotations":[{"segment":[10.39671612,54.25343213728549],"label":"Playing violin"}]},"dGxJGvw_sUg":{"duration":102.31,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dGxJGvw_sUg","annotations":[{"segment":[15.660435630068164,23.15020913941396],"label":"Cheerleading"},{"segment":[81.63418526558218,91.51431202259153],"label":"Cheerleading"}]},"W6Sz8ajVsjc":{"duration":149.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W6Sz8ajVsjc","annotations":[{"segment":[10.701175105507623,20.704447486743007],"label":"Paintball"},{"segment":[35.825673179308126,42.10679769775825],"label":"Paintball"}]},"NaTBiY3G3zc":{"duration":212.138,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NaTBiY3G3zc","annotations":[{"segment":[20.511360374414977,70.46644773790952],"label":"Curling"},{"segment":[84.03041185647427,131.338872074883],"label":"Curling"},{"segment":[211.7301716068643,211.86100000000002],"label":"Curling"}]},"ElXkXNbsZac":{"duration":192.169,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=ElXkXNbsZac","annotations":[{"segment":[0.01,192.16834467120182],"label":"Hanging wallpaper"}]},"1ekMEuW7HLE":{"duration":139.598,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1ekMEuW7HLE","annotations":[]},"QWFedtlPRYM":{"duration":86.01,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QWFedtlPRYM","annotations":[{"segment":[10.460152886115445,52.703078003120126],"label":"Hand washing clothes"}]},"3S7EwcX-Rd0":{"duration":75.117,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3S7EwcX-Rd0","annotations":[{"segment":[10.663927395191047,43.66697208892356],"label":"BMX"},{"segment":[38.86711724804993,43.66697208892356],"label":"BMX"},{"segment":[52.21305509828393,66.144341099844],"label":"BMX"}]},"dW6OZV43ulU":{"duration":190.89,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dW6OZV43ulU","annotations":[]},"5vv5e_E93gM":{"duration":101.402,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=5vv5e_E93gM","annotations":[{"segment":[6.96048025866613,42.87023028237129],"label":"Snowboarding"},{"segment":[46.1922776474259,98.23768636661467],"label":"Snowboarding"}]},"EMOfWVKMqDM":{"duration":96.433,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EMOfWVKMqDM","annotations":[{"segment":[11.884762895277715,27.229646633484386],"label":"Curling"},{"segment":[34.60120842928956,58.37073331669267],"label":"Curling"},{"segment":[69.05197575351013,81.38805856786271],"label":"Curling"}]},"ugDN2gDN99E":{"duration":7.524,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ugDN2gDN99E","annotations":[{"segment":[0.01,7.52281],"label":"Using the rowing machine"}]},"TXq9MO-dYhI":{"duration":121.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TXq9MO-dYhI","annotations":[{"segment":[10.419992051482058,118.59845498595944],"label":"Playing congas"}]},"alcSAjjsH9w":{"duration":162.934,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=alcSAjjsH9w","annotations":[{"segment":[63.78023556942278,140.51980187207488],"label":"Making an omelette"}]},"9bUNAiuRXyo":{"duration":17.25,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9bUNAiuRXyo","annotations":[{"segment":[9.985413488299532,14.130302106084244],"label":"Discus throw"}]},"zb6WUBWwXfk":{"duration":199.86,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=zb6WUBWwXfk","annotations":[{"segment":[24.624460218408736,184.52760062402496],"label":"Spinning"}]},"ywB6YQE7ynA":{"duration":37.57,"subset":"testing","resolution":"293x240","url":"https://www.youtube.com/watch?v=ywB6YQE7ynA","annotations":[]},"U81x8t2iMhw":{"duration":182.63,"subset":"testing","resolution":"512x288","url":"https://www.youtube.com/watch?v=U81x8t2iMhw","annotations":[]},"jfFnL_o8QS0":{"duration":236.194,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jfFnL_o8QS0","annotations":[{"segment":[0.2,16.581409787003725],"label":"Hurling"},{"segment":[20.634643290493525,35.005198213728555],"label":"Hurling"},{"segment":[65.58868717940717,74.43210567550702],"label":"Hurling"}]},"PSUGaj7_5LI":{"duration":181.859,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PSUGaj7_5LI","annotations":[{"segment":[26.38509694673501,33.76157566302652],"label":"Playing ten pins"},{"segment":[66.38830844662358,73.19736572320035],"label":"Playing ten pins"},{"segment":[105.54038778694004,111.49831290394474],"label":"Playing ten pins"}]},"CbfgZlo0Ut4":{"duration":16.115,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CbfgZlo0Ut4","annotations":[{"segment":[7.565349891571652,13.062402372880063],"label":"Hand car wash"}]},"mhYFpct97UE":{"duration":155.32,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=mhYFpct97UE","annotations":[{"segment":[15.507597044017817,139.08376098853478],"label":"Playing field hockey"}]},"VIQMJQWkDcY":{"duration":118.306,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VIQMJQWkDcY","annotations":[]},"rzsp_r9fq_A":{"duration":201.085,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=rzsp_r9fq_A","annotations":[{"segment":[8.456837688346365,171.64248345236328],"label":"Painting fence"}]},"mwUP1yZQsh0":{"duration":160.149,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mwUP1yZQsh0","annotations":[{"segment":[22.415738732993198,30.63484432857513],"label":"Fun sliding down"},{"segment":[95.88956148137714,129.76223908741179],"label":"Fun sliding down"}]},"iGax3fokst8":{"duration":22.965,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iGax3fokst8","annotations":[{"segment":[0,21.24488090795632],"label":"Doing crunches"}]},"wh6P2C2avqc":{"duration":102.49000000000001,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=wh6P2C2avqc","annotations":[]},"yxDsp8EBZtY":{"duration":8.34,"subset":"validation","resolution":"800x480","url":"https://www.youtube.com/watch?v=yxDsp8EBZtY","annotations":[{"segment":[2.00616,4.55941655],"label":"Discus throw"}]},"sgLSdMLbA0Y":{"duration":64.552,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=sgLSdMLbA0Y","annotations":[]},"P5lLZ_jsA2o":{"duration":62.86,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=P5lLZ_jsA2o","annotations":[{"segment":[0,43.93086462402496],"label":"Using uneven bars"}]},"wy_oDiDK6lk":{"duration":121.023,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wy_oDiDK6lk","annotations":[{"segment":[0,110.03420124804992],"label":"Clipping cat claws"}]},"4CsTbXdERSU":{"duration":188.756,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=4CsTbXdERSU","annotations":[{"segment":[6.1838614820592825,179.92092216848675],"label":"Fixing the roof"}]},"4M4MXPP9oRY":{"duration":181.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4M4MXPP9oRY","annotations":[{"segment":[0.01,179.60435685454627],"label":"Playing water polo"}]},"Cnf7cFBF1eQ":{"duration":145.07999999999998,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Cnf7cFBF1eQ","annotations":[]},"5qY9yPt-5Go":{"duration":31.35,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=5qY9yPt-5Go","annotations":[]},"zAvcY-MoWeQ":{"duration":149.049,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=zAvcY-MoWeQ","annotations":[]},"zXThQYb_axY":{"duration":135.489,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=zXThQYb_axY","annotations":[]},"CCL8kqQMCRw":{"duration":37.06,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=CCL8kqQMCRw","annotations":[{"segment":[0.010000000000000002,36.447167758485385],"label":"Hitting a pinata"}]},"v6go4RA0ZB4":{"duration":13.514,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=v6go4RA0ZB4","annotations":[{"segment":[0.01,13.481],"label":"Swimming"}]},"PpiV66Wi4Vc":{"duration":29.722,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PpiV66Wi4Vc","annotations":[]},"xaicDAewb6o":{"duration":95.2,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=xaicDAewb6o","annotations":[{"segment":[5.08228323808159,94.64909796082303],"label":"Washing dishes"}]},"WVM8itzRVjs":{"duration":105.698,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WVM8itzRVjs","annotations":[{"segment":[29.42426580900192,41.91725123979507],"label":"Hurling"},{"segment":[44.87611621024609,55.06776221957734],"label":"Hurling"}]},"Taw5ILRt9hI":{"duration":48.739,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Taw5ILRt9hI","annotations":[{"segment":[0.01,45.2785479215087],"label":"Rollerblading"}]},"3DQKqC14UcI":{"duration":61.65,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3DQKqC14UcI","annotations":[]},"WR_L7CdwPKo":{"duration":19.18,"subset":"testing","resolution":"1152x720","url":"https://www.youtube.com/watch?v=WR_L7CdwPKo","annotations":[]},"RgMAHuMVRcU":{"duration":222.517,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RgMAHuMVRcU","annotations":[{"segment":[3.8175834633385337,16.658546021840873],"label":"Trimming branches or hedges"},{"segment":[153.05039157566304,156.86797503900158],"label":"Trimming branches or hedges"},{"segment":[182.21922776911077,200.26760842433697],"label":"Trimming branches or hedges"}]},"jJ5t3ZUxP7Y":{"duration":36.433,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jJ5t3ZUxP7Y","annotations":[{"segment":[8.555595457953787,14.844807897496368],"label":"Beer pong"}]},"EbQezcKi1pc":{"duration":58.93,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=EbQezcKi1pc","annotations":[]},"69DNcmkoapw":{"duration":146.07999999999998,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=69DNcmkoapw","annotations":[{"segment":[16.35695574968265,34.53135487876508],"label":"Hand washing clothes"},{"segment":[41.24787624492979,86.82564005148205],"label":"Hand washing clothes"},{"segment":[92.75074934633385,139.92373488611545],"label":"Hand washing clothes"}]},"Dutm5TiFBqQ":{"duration":184.181,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Dutm5TiFBqQ","annotations":[{"segment":[23.273970199624312,62.351253497758954],"label":"Arm wrestling"},{"segment":[110.9105246549998,125.85183885722776],"label":"Arm wrestling"}]},"bYxVbXzQToI":{"duration":89.72,"subset":"testing","resolution":"480x320","url":"https://www.youtube.com/watch?v=bYxVbXzQToI","annotations":[]},"0kykNR9FUWU":{"duration":97.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0kykNR9FUWU","annotations":[{"segment":[0.4568845553822153,97.401],"label":"Using the balance beam"}]},"CjuTFlxFvH0":{"duration":152.672,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=CjuTFlxFvH0","annotations":[{"segment":[17.744159476583146,132.06894407722308],"label":"Powerbocking"}]},"ehO8oXanK1Y":{"duration":54.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ehO8oXanK1Y","annotations":[{"segment":[1.8552779946957878,52.62243051354707],"label":"Doing karate"}]},"dWJIJM3qmyQ":{"duration":178.538,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=dWJIJM3qmyQ","annotations":[{"segment":[60.1626565634054,80.21687541787387],"label":"Painting furniture"},{"segment":[105.8417106208058,112.8049810563851],"label":"Painting furniture"},{"segment":[126.73152192754377,134.25185399796945],"label":"Painting furniture"}]},"mMebSMn5D0E":{"duration":166.67000000000002,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mMebSMn5D0E","annotations":[]},"CKSlgK_2FP4":{"duration":224.12,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=CKSlgK_2FP4","annotations":[]},"8M8Rp_CC5k8":{"duration":235.033,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=8M8Rp_CC5k8","annotations":[]},"Ax8GCogz6Yg":{"duration":196.929,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ax8GCogz6Yg","annotations":[{"segment":[14.743039001560064,185.84259750390018],"label":"Rope skipping"}]},"CaHsFgD2lIg":{"duration":19.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CaHsFgD2lIg","annotations":[]},"DJE9nX2qKYs":{"duration":13.54,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=DJE9nX2qKYs","annotations":[{"segment":[0.38014072399630683,13.135973363494541],"label":"Clean and jerk"}]},"cdcn6XP1N6A":{"duration":144.03,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=cdcn6XP1N6A","annotations":[{"segment":[4.267935497659907,139.26947413416536],"label":"Playing flauta"}]},"K_ia4VP3w2c":{"duration":146.73,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K_ia4VP3w2c","annotations":[]},"CkC1wxAaDjM":{"duration":121.88,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CkC1wxAaDjM","annotations":[{"segment":[5.323998712329446,5.894427145793315],"label":"Archery"},{"segment":[17.49313862622532,89.55726405382745],"label":"Archery"},{"segment":[94.31083282371296,117.12796979719188],"label":"Archery"}]},"4W4mrswC2tA":{"duration":16.46,"subset":"validation","resolution":"568x320","url":"https://www.youtube.com/watch?v=4W4mrswC2tA","annotations":[{"segment":[0.6017979719188767,16.415],"label":"Clean and jerk"}]},"fnZIksNLUXA":{"duration":167.602,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fnZIksNLUXA","annotations":[{"segment":[18.818957878315132,56.71824804992199],"label":"Scuba diving"},{"segment":[66.38910140405616,84.94668486739468],"label":"Scuba diving"},{"segment":[92.78791731669267,105.85663806552262],"label":"Scuba diving"},{"segment":[115.00474258970358,145.34152262090484],"label":"Scuba diving"}]},"HddRC-twMLc":{"duration":14.118,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=HddRC-twMLc","annotations":[{"segment":[0.01,14.117459],"label":"Throwing darts"}]},"8r4bF9RF0Kc":{"duration":27.33,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8r4bF9RF0Kc","annotations":[]},"CktICaG32Ps":{"duration":151.0,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=CktICaG32Ps","annotations":[]},"9Ufy_JGbnys":{"duration":223.45,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9Ufy_JGbnys","annotations":[]},"mi3LGXT_Cqw":{"duration":95.18,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mi3LGXT_Cqw","annotations":[]},"Z9piz8SUX_s":{"duration":39.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z9piz8SUX_s","annotations":[]},"1ILXipBgbTs":{"duration":60.349,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=1ILXipBgbTs","annotations":[{"segment":[0.01,54.276194631596184],"label":"River tubing"}]},"4YJ_L7jqgoA":{"duration":169.11,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4YJ_L7jqgoA","annotations":[{"segment":[15.04221996879875,153.85288143525742],"label":"Clean and jerk"}]},"YnFDzvuDJSQ":{"duration":204.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YnFDzvuDJSQ","annotations":[{"segment":[93.0297883479965,161.20915288611545],"label":"Decorating the Christmas tree"}]},"6lIi6-Y6q10":{"duration":138.74,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=6lIi6-Y6q10","annotations":[{"segment":[11.250252730109207,126.79997815912637],"label":"Making a lemonade"}]},"PLCfV5XnJTg":{"duration":30.047,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=PLCfV5XnJTg","annotations":[]},"NTZ_mWvnHVY":{"duration":162.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NTZ_mWvnHVY","annotations":[{"segment":[23.588938202426053,138.99718424655353],"label":"Windsurfing"}]},"MuOkEmd4Gp0":{"duration":223.376,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MuOkEmd4Gp0","annotations":[{"segment":[0.2,128.5892833264351],"label":"BMX"},{"segment":[151.5889925392934,205.60345956318253],"label":"BMX"}]},"PJcpWZrN03U":{"duration":26.12,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PJcpWZrN03U","annotations":[]},"5mWZY2XkWSQ":{"duration":24.59,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=5mWZY2XkWSQ","annotations":[]},"a7DEFV0_nCE":{"duration":62.787,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=a7DEFV0_nCE","annotations":[{"segment":[0,0.19590195007800312],"label":"Kite flying"}]},"WdtytUagoJs":{"duration":195.1,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WdtytUagoJs","annotations":[]},"UcttD5hL6Bs":{"duration":87.96000000000001,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UcttD5hL6Bs","annotations":[]},"wHGDq_8dCuc":{"duration":232.43,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wHGDq_8dCuc","annotations":[{"segment":[18.85561746279375,228.8056647347894],"label":"Tai chi"}]},"x0HMqVvqHJE":{"duration":183.74,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=x0HMqVvqHJE","annotations":[{"segment":[6.306191926588629,69.0814661049027],"label":"Playing lacrosse"}]},"eVTMUEYhwDE":{"duration":13.1,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eVTMUEYhwDE","annotations":[{"segment":[0.3677468209048362,10.848531216692669],"label":"Clean and jerk"}]},"fgQ2HYMl3pA":{"duration":239.073,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fgQ2HYMl3pA","annotations":[{"segment":[28.7185558279474,223.40798618876755],"label":"Beach soccer"}]},"SkVe9zHoPXc":{"duration":120.419,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SkVe9zHoPXc","annotations":[]},"U0IJmOHoa1M":{"duration":230.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U0IJmOHoa1M","annotations":[{"segment":[16.54499651550688,197.10126223400937],"label":"Cricket"}]},"WiGOn6O-S1g":{"duration":15.67,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=WiGOn6O-S1g","annotations":[{"segment":[0.4631929079563183,15.090337369734788],"label":"Plataform diving"}]},"6hsOVkC7hxA":{"duration":6.43,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=6hsOVkC7hxA","annotations":[{"segment":[0.6438599613518879,6.393975593750642],"label":"Chopping wood"}]},"dVLIfsEMD8Y":{"duration":17.137,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dVLIfsEMD8Y","annotations":[{"segment":[4.695438377535101,13.019170046801872],"label":"Throwing darts"}]},"jVp2BsFOIVA":{"duration":115.311,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jVp2BsFOIVA","annotations":[{"segment":[13.22200728029121,15.200811090947036],"label":"Swinging at the playground"},{"segment":[18.79863620123036,24.01548261114118],"label":"Swinging at the playground"},{"segment":[92.5540517646906,100.1094845618082],"label":"Swinging at the playground"},{"segment":[106.04589604525773,107.30513484477733],"label":"Swinging at the playground"}]},"BLamvR0GIE8":{"duration":49.9,"subset":"validation","resolution":"482x360","url":"https://www.youtube.com/watch?v=BLamvR0GIE8","annotations":[{"segment":[14.946550315132606,40.63593366926677],"label":"Washing hands"}]},"MrWfTznlveQ":{"duration":18.37,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MrWfTznlveQ","annotations":[]},"DLJqhYP-C0k":{"duration":186.968,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DLJqhYP-C0k","annotations":[{"segment":[11.083851549980366,16.62577732497055],"label":"Playing ten pins"},{"segment":[22.459383403907584,27.709628874950916],"label":"Playing ten pins"},{"segment":[28.876350090738324,32.959874345994244],"label":"Playing ten pins"},{"segment":[35.29331677756906,42.877004680187206],"label":"Playing ten pins"},{"segment":[45.793807719655724,49.29397136701794],"label":"Playing ten pins"},{"segment":[51.04405319069905,54.83589714200812],"label":"Playing ten pins"},{"segment":[58.62774109331719,62.12790474067942],"label":"Playing ten pins"},{"segment":[63.58630626041368,68.25319112356331],"label":"Playing ten pins"},{"segment":[69.12823203540385,72.33671537881924],"label":"Playing ten pins"},{"segment":[75.25351841828774,79.33704267354368],"label":"Playing ten pins"},{"segment":[80.50376388933107,84.29560784064014],"label":"Playing ten pins"},{"segment":[85.46232905642756,87.21241088010868],"label":"Playing ten pins"},{"segment":[89.25417300773663,113.46363773166925],"label":"Playing ten pins"},{"segment":[114.6303589422777,120.46396499531981],"label":"Playing ten pins"},{"segment":[122.21404681123245,131.25613619344773],"label":"Playing ten pins"},{"segment":[133.8812589173167,143.5067089048362],"label":"Playing ten pins"},{"segment":[144.3817498127925,154.007199800312],"label":"Playing ten pins"},{"segment":[171.21633765678627,178.2166649204368],"label":"Playing ten pins"}]},"ne7uJQ0MUtE":{"duration":223.423,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ne7uJQ0MUtE","annotations":[{"segment":[109.6951872074883,223.241],"label":"Making a lemonade"}]},"BZK8b0xCkB0":{"duration":218.872,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BZK8b0xCkB0","annotations":[{"segment":[46.77904379848663,204.87172466490495],"label":"Decorating the Christmas tree"}]},"-b_3CyA0NeA":{"duration":41.66,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-b_3CyA0NeA","annotations":[]},"hS7VEMlJ4N0":{"duration":70.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hS7VEMlJ4N0","annotations":[{"segment":[56.22994253042122,70.123333],"label":"High jump"}]},"TRXLUcm2CuQ":{"duration":168.57999999999998,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TRXLUcm2CuQ","annotations":[{"segment":[16.83138801622465,63.11770506084244],"label":"Shaving legs"},{"segment":[98.88440459531981,115.97878304929799],"label":"Shaving legs"}]},"Nq6cd5Xue9Q":{"duration":121.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Nq6cd5Xue9Q","annotations":[{"segment":[30.113898595943837,97.51616692667707],"label":"Pole vault"}]},"WzDnorAzWVU":{"duration":154.2,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WzDnorAzWVU","annotations":[{"segment":[20.44823080343214,31.75489960062403],"label":"Painting"}]},"h2HRRkwHoKw":{"duration":111.47999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=h2HRRkwHoKw","annotations":[{"segment":[98.08752767550702,100.17449634945397],"label":"Drinking coffee"}]},"-ucwgrxTBEg":{"duration":193.03,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-ucwgrxTBEg","annotations":[{"segment":[64.27648985959438,183.79764430577222],"label":"Tai chi"}]},"_Zq8ugolzlA":{"duration":140.9,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=_Zq8ugolzlA","annotations":[{"segment":[66.80016609048361,89.65285448985959],"label":"Snatch"}]},"MT852hP9wVk":{"duration":91.98,"subset":"validation","resolution":"800x480","url":"https://www.youtube.com/watch?v=MT852hP9wVk","annotations":[{"segment":[1.2913703078735395,35.15396949211302],"label":"Chopping wood"},{"segment":[42.32824898029935,91.77426303854875],"label":"Chopping wood"}]},"VNnYIsTljo8":{"duration":209.4,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VNnYIsTljo8","annotations":[]},"GU_8hjy79VQ":{"duration":24.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GU_8hjy79VQ","annotations":[{"segment":[3.6201391825273013,13.801780633385334],"label":"Javelin throw"},{"segment":[21.60770574570983,23.342355770670824],"label":"Javelin throw"}]},"eMkBTRYwBAU":{"duration":195.93,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eMkBTRYwBAU","annotations":[{"segment":[23.767550920121806,191.05457666043347],"label":"Wakeboarding"}]},"pcFn1KBwn6Y":{"duration":163.585,"subset":"training","resolution":"320x180","url":"https://www.youtube.com/watch?v=pcFn1KBwn6Y","annotations":[{"segment":[0.19999999999999998,163.37900000000002],"label":"Playing pool"}]},"ct3uprBbPHw":{"duration":158.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ct3uprBbPHw","annotations":[{"segment":[36.36434469734789,142.2554489],"label":"Brushing hair"}]},"xPhJuUiRAbc":{"duration":6.641,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xPhJuUiRAbc","annotations":[]},"kUTDgJmoRE8":{"duration":77.76,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=kUTDgJmoRE8","annotations":[{"segment":[3.274581903276131,77.61971918876755],"label":"Cumbia"}]},"cdLH2Ue3uAE":{"duration":208.731,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cdLH2Ue3uAE","annotations":[]},"hI11n4_r178":{"duration":134.88,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=hI11n4_r178","annotations":[]},"uG2sRzp5Bk4":{"duration":192.053,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uG2sRzp5Bk4","annotations":[{"segment":[42.83329641185647,163.24578003120126],"label":"Playing blackjack"}]},"Z2GWtB7MGVs":{"duration":223.678,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z2GWtB7MGVs","annotations":[{"segment":[0.01,212.7301248049922],"label":"Surfing"}]},"CL6TbOgnLzA":{"duration":207.49,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=CL6TbOgnLzA","annotations":[{"segment":[17.31555460218409,197.591515600624],"label":"Playing water polo"}]},"yPEf9ajKwI8":{"duration":216.945,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=yPEf9ajKwI8","annotations":[{"segment":[14.553187995319814,184.4531966848674],"label":"Installing carpet"}]},"3TLhUYQ8geM":{"duration":200.342,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=3TLhUYQ8geM","annotations":[{"segment":[4.6785280373831775,171.85792990654207],"label":"Changing car wheel"}]},"Pu85El6jhao":{"duration":119.885,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Pu85El6jhao","annotations":[{"segment":[16.800140186915886,40.50700467289719],"label":"River tubing"}]},"XH-YlSbgxkY":{"duration":151.418,"subset":"validation","resolution":"562x480","url":"https://www.youtube.com/watch?v=XH-YlSbgxkY","annotations":[{"segment":[11.334365054602184,44.86519500780031],"label":"Baton twirling"},{"segment":[53.12983619344773,147.11061310452416],"label":"Baton twirling"}]},"2Tuht3F2uc8":{"duration":46.35,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2Tuht3F2uc8","annotations":[{"segment":[2.1647425741029642,44.305064683307336],"label":"Shoveling snow"}]},"T97WL2cKD6M":{"duration":58.65,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=T97WL2cKD6M","annotations":[{"segment":[0.01,58.47059743525741],"label":"Playing accordion"}]},"3jliBZqVjMU":{"duration":82.28999999999999,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3jliBZqVjMU","annotations":[]},"7-5sFmNf4i4":{"duration":207.192,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=7-5sFmNf4i4","annotations":[{"segment":[92.44411374102964,123.79753693291734],"label":"Playing ten pins"},{"segment":[133.81770310764432,138.3429394446178],"label":"Playing ten pins"}]},"ubgCjUUmP9I":{"duration":107.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ubgCjUUmP9I","annotations":[{"segment":[24.582178951620236,75.15773577871244],"label":"Using the pommel horse"}]},"2zVpWu1i5qM":{"duration":96.18,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2zVpWu1i5qM","annotations":[{"segment":[12.85020897,93.02618605304211],"label":"Making a sandwich"}]},"d4prmED8v-w":{"duration":105.42,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=d4prmED8v-w","annotations":[{"segment":[5.18234888,7.3184279275932935],"label":"Pole vault"},{"segment":[41.854829383651534,50.24224116583711],"label":"Pole vault"},{"segment":[44.32171520194141,50.406700220389766],"label":"Pole vault"}]},"9Bo7Hr77DgA":{"duration":51.8,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=9Bo7Hr77DgA","annotations":[{"segment":[12.526640276495412,51.561267718735955],"label":"Brushing teeth"}]},"ETZOazHvdZg":{"duration":102.562,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ETZOazHvdZg","annotations":[{"segment":[0.01,15.373689414740547],"label":"Playing ten pins"},{"segment":[45.454387728387374,56.6546472127001],"label":"Playing ten pins"},{"segment":[30.894049427988705,61.93476974000119],"label":"Playing ten pins"},{"segment":[67.85490711909635,82.57524871036],"label":"Playing ten pins"}]},"UBCZIKByREw":{"duration":108.205,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=UBCZIKByREw","annotations":[]},"XCXcwMyrqYM":{"duration":75.63,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=XCXcwMyrqYM","annotations":[{"segment":[39.377218533558654,69.0550137],"label":"Archery"}]},"cAiig_569fI":{"duration":131.193,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cAiig_569fI","annotations":[]},"cAhu8H9qsAI":{"duration":59.351,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=cAhu8H9qsAI","annotations":[{"segment":[24.0808127925117,43.64070670826833],"label":"Putting on shoes"}]},"wZ9PIXtmrq0":{"duration":221.681,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wZ9PIXtmrq0","annotations":[]},"HvDf9BN0blE":{"duration":141.41,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HvDf9BN0blE","annotations":[]},"5deGCvJOGg0":{"duration":198.926,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=5deGCvJOGg0","annotations":[{"segment":[25.447548801872074,52.136441447737916],"label":"Curling"},{"segment":[59.58450451170047,95.89381194851795],"label":"Curling"},{"segment":[107.68657846645866,143.06487802028082],"label":"Curling"},{"segment":[175.33981796411857,188.0635923650546],"label":"Curling"}]},"BaJwohtZMtA":{"duration":197.161,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BaJwohtZMtA","annotations":[{"segment":[5.227858034321374,156.2365491419657],"label":"Hula hoop"}]},"_0mR06FcftE":{"duration":124.506,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=_0mR06FcftE","annotations":[{"segment":[0,118.8492542901716],"label":"Cleaning sink"}]},"KhAtzEJxz9M":{"duration":203.59,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=KhAtzEJxz9M","annotations":[{"segment":[0.2,199.1456995600624],"label":"Zumba"}]},"aVDyPmUgHU0":{"duration":76.092,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aVDyPmUgHU0","annotations":[{"segment":[33.640083463338534,64.25433931357254],"label":"Trimming branches or hedges"}]},"aAtM2Gaf1Xg":{"duration":105.094,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=aAtM2Gaf1Xg","annotations":[]},"gjfa7WGhM6U":{"duration":115.8,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=gjfa7WGhM6U","annotations":[{"segment":[10.658468276131044,115.797935],"label":"Shaving legs"}]},"IMto8gJvRek":{"duration":195.489,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IMto8gJvRek","annotations":[{"segment":[15.543542901716068,195.361],"label":"Making a lemonade"}]},"1jX8p54Dfjs":{"duration":201.596,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=1jX8p54Dfjs","annotations":[{"segment":[55.34280187207489,172.6318081123245],"label":"Clipping cat claws"}]},"JnpcJP82WLI":{"duration":64.65,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=JnpcJP82WLI","annotations":[{"segment":[9.6311008049922,53.19796517940718],"label":"Ping-pong"}]},"C8IHSB9mfeE":{"duration":34.203,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C8IHSB9mfeE","annotations":[{"segment":[0.01,11.644584243369735],"label":"Throwing darts"},{"segment":[19.26552496099844,29.870890015600626],"label":"Throwing darts"}]},"j5V7kD_HeMw":{"duration":179.468,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j5V7kD_HeMw","annotations":[{"segment":[0.2,169.9477160624025],"label":"Snowboarding"}]},"lQj3qYaI6xo":{"duration":225.65,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lQj3qYaI6xo","annotations":[]},"D99jYbtGO38":{"duration":8.8,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=D99jYbtGO38","annotations":[{"segment":[2.727296,7.939360000000001],"label":"Playing lacrosse"}]},"9cxGx2BsKkM":{"duration":83.31,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=9cxGx2BsKkM","annotations":[{"segment":[0.12997346832648818,59.657821104524174],"label":"Archery"},{"segment":[64.72678629641185,82.14323080187206],"label":"Archery"}]},"W840XGSymlQ":{"duration":181.534,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W840XGSymlQ","annotations":[]},"Lnwcg2H0Gt4":{"duration":226.91,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Lnwcg2H0Gt4","annotations":[]},"HD_vpQCUSCQ":{"duration":161.03,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HD_vpQCUSCQ","annotations":[{"segment":[66.78816203929976,71.05220651482621],"label":"Volleyball"}]},"P4TNzMk6OWo":{"duration":121.069,"subset":"testing","resolution":"544x360","url":"https://www.youtube.com/watch?v=P4TNzMk6OWo","annotations":[]},"n637bl2-DDo":{"duration":61.09,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=n637bl2-DDo","annotations":[{"segment":[40.600723516380654,53.848377433697344],"label":"Spinning"},{"segment":[59.37617547113884,61.091699],"label":"Spinning"}]},"dTSVM5fF1L8":{"duration":127.548,"subset":"testing","resolution":"600x360","url":"https://www.youtube.com/watch?v=dTSVM5fF1L8","annotations":[]},"rVLkm3MLzns":{"duration":218.082,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rVLkm3MLzns","annotations":[{"segment":[24.829224648985956,71.42653666146646],"label":"Snowboarding"},{"segment":[88.09272854914197,203.75438221528862],"label":"Snowboarding"}]},"eZ_qerXno64":{"duration":67.11,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=eZ_qerXno64","annotations":[{"segment":[2.35220356474259,62.986784344773795],"label":"Ping-pong"}]},"3kJuuyL4tGQ":{"duration":177.703,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3kJuuyL4tGQ","annotations":[]},"rjjmrZvHMnA":{"duration":78.81,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rjjmrZvHMnA","annotations":[]},"tS-MHCJKMgM":{"duration":40.566,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tS-MHCJKMgM","annotations":[{"segment":[3.982563182527301,26.676851794071766],"label":"Welding"},{"segment":[30.027262090483617,34.2626864274571],"label":"Welding"}]},"A9TcWJnbcWs":{"duration":7.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=A9TcWJnbcWs","annotations":[{"segment":[0.24355629,4.56718308],"label":"Playing racquetball"}]},"iZtie23nW_A":{"duration":52.245,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iZtie23nW_A","annotations":[{"segment":[5.619140405616225,43.66685803432137],"label":"Hand car wash"}]},"Nc9yr4urskA":{"duration":160.728,"subset":"testing","resolution":"300x240","url":"https://www.youtube.com/watch?v=Nc9yr4urskA","annotations":[]},"iPXyExgGD5Q":{"duration":39.08,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=iPXyExgGD5Q","annotations":[{"segment":[21.73403965665395,31.585506946373577],"label":"Gargling mouthwash"}]},"ndUSRZq0QHk":{"duration":119.119,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=ndUSRZq0QHk","annotations":[]},"rpbVkkhUcuw":{"duration":126.608,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rpbVkkhUcuw","annotations":[]},"T4ZeB_TvS68":{"duration":64.412,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=T4ZeB_TvS68","annotations":[{"segment":[9.667178346164503,63.61885747389036],"label":"Playing blackjack"}]},"wjcCl0jPxO4":{"duration":84.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wjcCl0jPxO4","annotations":[{"segment":[5.267038003120125,13.694298808112325],"label":"Washing face"},{"segment":[37.9226736224649,42.79468377535101],"label":"Washing face"},{"segment":[58.595797784711394,64.91624338845554],"label":"Washing face"}]},"k4WHaby5jd8":{"duration":133.051,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k4WHaby5jd8","annotations":[{"segment":[0.01,97.97154447425898],"label":"Assembling bicycle"}]},"3bi2XM3scQA":{"duration":52.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3bi2XM3scQA","annotations":[{"segment":[5.075955635725429,14.821790456318254],"label":"Clean and jerk"},{"segment":[28.62838978549142,49.500719359594385],"label":"Clean and jerk"}]},"rF1n8L571vM":{"duration":166.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rF1n8L571vM","annotations":[{"segment":[7.535328090674648,159.02140498595944],"label":"Spinning"}]},"moGDCWEoaK8":{"duration":95.156,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=moGDCWEoaK8","annotations":[{"segment":[0.01,95.101],"label":"Hurling"}]},"w3OuSKoSsC0":{"duration":67.13,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w3OuSKoSsC0","annotations":[]},"1oyWMusaDTI":{"duration":61.72,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=1oyWMusaDTI","annotations":[{"segment":[0.01,61.71852607709751],"label":"Windsurfing"}]},"tPiuksisb4E":{"duration":165.23,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tPiuksisb4E","annotations":[{"segment":[32.479536438600405,153.11781345397816],"label":"Smoking hookah"}]},"FMtUqoxfR50":{"duration":64.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FMtUqoxfR50","annotations":[{"segment":[9.34187457878315,56.50571704680188],"label":"Volleyball"}]},"bJ1vEQKX-hE":{"duration":215.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bJ1vEQKX-hE","annotations":[{"segment":[92.28314162000001,178.32421442],"label":"Dodgeball"}]},"RcDEaYPwI6I":{"duration":120.98,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RcDEaYPwI6I","annotations":[{"segment":[32.63978315132605,39.4318767550702],"label":"Cheerleading"},{"segment":[61.12884243369734,117.16361466458659],"label":"Cheerleading"}]},"c0Cn3HFXpLs":{"duration":139.57999999999998,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c0Cn3HFXpLs","annotations":[{"segment":[0.2,5.985990639625585],"label":"Playing racquetball"},{"segment":[17.304954758190327,133.10666458658346],"label":"Playing racquetball"}]},"7x5oY2Myd7I":{"duration":209.096,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7x5oY2Myd7I","annotations":[{"segment":[6.837789719626167,20.18776012461059],"label":"Playing ice hockey"},{"segment":[45.25965576323988,60.56328037383177],"label":"Playing ice hockey"},{"segment":[168.66547975077881,206.78151869158881],"label":"Playing ice hockey"}]},"ur873jaQO3A":{"duration":130.38,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ur873jaQO3A","annotations":[{"segment":[49.40483392675172,89.81857875385451],"label":"Archery"}]},"FADShG5lyB8":{"duration":12.585,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=FADShG5lyB8","annotations":[]},"vWGkTOLx57s":{"duration":66.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vWGkTOLx57s","annotations":[{"segment":[12.693996306791048,58.92903949297972],"label":"Using parallel bars"}]},"Nn4sVR3__DQ":{"duration":39.126,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Nn4sVR3__DQ","annotations":[{"segment":[19.897477133733105,37.4827100388653],"label":"Fun sliding down"}]},"f_BzYUCp4J0":{"duration":214.065,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=f_BzYUCp4J0","annotations":[{"segment":[9.000042056074767,201.3342741433022],"label":"Blowing leaves"}]},"VRRLOIP6EmA":{"duration":85.287,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VRRLOIP6EmA","annotations":[{"segment":[0.01,84.68919968798753],"label":"Surfing"}]},"drarkco9LfQ":{"duration":71.564,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=drarkco9LfQ","annotations":[{"segment":[0.2,60.36327769110765],"label":"Arm wrestling"}]},"wP3gExRMMy4":{"duration":108.6,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=wP3gExRMMy4","annotations":[]},"lSX_yj2ohls":{"duration":237.541,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lSX_yj2ohls","annotations":[{"segment":[0.01,32.337954005695885],"label":"Beer pong"},{"segment":[41.217941220649145,52.31792523934074],"label":"Beer pong"},{"segment":[80.43788475335943,212.89769404307907],"label":"Beer pong"}]},"w_TcHZn4Qy8":{"duration":44.3,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=w_TcHZn4Qy8","annotations":[]},"sLtQvJXnrOE":{"duration":21.316,"subset":"training","resolution":"1444x1080","url":"https://www.youtube.com/watch?v=sLtQvJXnrOE","annotations":[{"segment":[0.2,21.115849],"label":"Changing car wheel"}]},"3SjuIcAfeWk":{"duration":94.53,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3SjuIcAfeWk","annotations":[{"segment":[7.547233746953472,90.12521038246749],"label":"Wrapping presents"}]},"MQ7k1wig4jk":{"duration":16.162,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=MQ7k1wig4jk","annotations":[{"segment":[0.2,15.961088435374151],"label":"Table soccer"}]},"HXn5ZpSEMqg":{"duration":111.27,"subset":"training","resolution":"2276x720","url":"https://www.youtube.com/watch?v=HXn5ZpSEMqg","annotations":[{"segment":[62.984747269890796,66.81401716068642],"label":"Waxing skis"}]},"vWEbHvnkogA":{"duration":190.92,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vWEbHvnkogA","annotations":[{"segment":[79.37395505304212,88.30910946801872],"label":"Playing saxophone"},{"segment":[129.7086582574103,140.13300507488302],"label":"Playing saxophone"},{"segment":[174.68226881279253,179.74552298127924],"label":"Playing saxophone"}]},"vbWdGj1sfO8":{"duration":11.076,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vbWdGj1sfO8","annotations":[{"segment":[1.3232486668048988,10.938],"label":"Doing crunches"}]},"pKOwZrmQan4":{"duration":118.074,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pKOwZrmQan4","annotations":[{"segment":[15.472966165366616,113.1000145897036],"label":"Disc dog"}]},"XTErpg4IdiE":{"duration":201.55,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=XTErpg4IdiE","annotations":[{"segment":[108.16369191887675,188.65760218408735],"label":"Installing carpet"}]},"u3uYs6SZFKo":{"duration":40.078,"subset":"validation","resolution":"630x480","url":"https://www.youtube.com/watch?v=u3uYs6SZFKo","annotations":[{"segment":[6.66921781160624,36.649537669979864],"label":"Swinging at the playground"}]},"7n1L0evoi34":{"duration":83.616,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7n1L0evoi34","annotations":[{"segment":[8.721602184087363,11.58541185647426],"label":"Grooming dog"},{"segment":[20.176840873634944,33.71485023400936],"label":"Grooming dog"},{"segment":[38.0105647425897,63.13398595943838],"label":"Grooming dog"}]},"NQOPahBcpSE":{"duration":171.22,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NQOPahBcpSE","annotations":[{"segment":[46.47886805616225,118.06700965990639],"label":"Walking the dog"},{"segment":[120.47108904212169,145.84748252106084],"label":"Walking the dog"},{"segment":[150.6556412854914,155.73091998127924],"label":"Walking the dog"}]},"NzxZdC-63LE":{"duration":187.85,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NzxZdC-63LE","annotations":[{"segment":[12.076415117697174,173.29921423296196],"label":"Tumbling"}]},"R5_Xq9OJuoQ":{"duration":150.57999999999998,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R5_Xq9OJuoQ","annotations":[]},"uZC3WPkta-I":{"duration":95.411,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uZC3WPkta-I","annotations":[{"segment":[3.2644416865696515,15.135146961940556],"label":"Croquet"},{"segment":[22.2575701271631,52.082716230399456],"label":"Croquet"},{"segment":[57.57291732526727,73.59836916974626],"label":"Croquet"},{"segment":[84.28200373273228,90.81089152122372],"label":"Croquet"}]},"KjUxjcpIG_Y":{"duration":68.337,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KjUxjcpIG_Y","annotations":[{"segment":[0.2,53.753669376947045],"label":"Gargling mouthwash"}]},"SXcxyuXxY8M":{"duration":237.29,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SXcxyuXxY8M","annotations":[]},"nE0_PaRBXeA":{"duration":36.224,"subset":"training","resolution":"300x240","url":"https://www.youtube.com/watch?v=nE0_PaRBXeA","annotations":[{"segment":[0.010000000000000002,36.201],"label":"Using the rowing machine"}]},"CXbVcrVgNzQ":{"duration":46.39,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=CXbVcrVgNzQ","annotations":[{"segment":[1.36461831,4.234037360686427],"label":"Ironing clothes"},{"segment":[27.466960314196566,43.89647280780031],"label":"Ironing clothes"}]},"IN23CO8Kotk":{"duration":60.56,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IN23CO8Kotk","annotations":[]},"pLcrLi5oAvU":{"duration":174.29,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=pLcrLi5oAvU","annotations":[{"segment":[9.788461905823171,112.02350766302654],"label":"Getting a haircut"}]},"Uw_4gKpmAhI":{"duration":48.159,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Uw_4gKpmAhI","annotations":[{"segment":[0.19999999999999998,12.23161154446178],"label":"Plastering"},{"segment":[34.593698907956316,40.8220655226209],"label":"Plastering"}]},"FDvZUUc5tw4":{"duration":104.513,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=FDvZUUc5tw4","annotations":[{"segment":[18.415121684867394,91.4412496099844],"label":"Clipping cat claws"}]},"txsupdxCToQ":{"duration":88.817,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=txsupdxCToQ","annotations":[{"segment":[10.497728555360712,14.917825676635353],"label":"Rollerblading"},{"segment":[20.02856297310915,53.86993062950301],"label":"Rollerblading"},{"segment":[69.75465438533504,81.35740912872541],"label":"Rollerblading"}]},"1-Ud-q4y1oc":{"duration":80.02,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=1-Ud-q4y1oc","annotations":[{"segment":[10.98023088923557,76.61206552262091],"label":"Cleaning windows"}]},"8Kjuph57CRE":{"duration":112.176,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=8Kjuph57CRE","annotations":[]},"fmtW5lcdT_0":{"duration":13.4,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=fmtW5lcdT_0","annotations":[{"segment":[0.6270474492449085,10.8485],"label":"Playing kickball"}]},"_tPDUYSu1IQ":{"duration":181.232,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_tPDUYSu1IQ","annotations":[{"segment":[8.762263650546023,10.740839313572543],"label":"Arm wrestling"},{"segment":[67.27157254290172,101.75531981279251],"label":"Arm wrestling"},{"segment":[113.9220015600624,117.87958970358814],"label":"Arm wrestling"},{"segment":[124.66402652106085,127.7735600624025],"label":"Arm wrestling"},{"segment":[132.01383307332293,137.66753042121684],"label":"Arm wrestling"},{"segment":[152.64982839313572,171.02434477379094],"label":"Arm wrestling"}]},"U4UHEGNFyz8":{"duration":187.873,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=U4UHEGNFyz8","annotations":[{"segment":[11.430597613104524,187.87213151927438],"label":"BMX"}]},"OXbfnzs-qUU":{"duration":58.074,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=OXbfnzs-qUU","annotations":[{"segment":[0,58.073105],"label":"Knitting"}]},"WMVJqLMtaws":{"duration":109.47999999999999,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=WMVJqLMtaws","annotations":[{"segment":[5.807162074882996,63.19558728549141],"label":"Hopscotch"},{"segment":[85.74104004680187,108.28649280811233],"label":"Hopscotch"}]},"yB4-L2LA5So":{"duration":156.294,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yB4-L2LA5So","annotations":[]},"0x4TP4MPelY":{"duration":147.8,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=0x4TP4MPelY","annotations":[{"segment":[2.9974027260410145,139.95564909516378],"label":"Doing nails"}]},"aMQVH3dF0QM":{"duration":147.17000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aMQVH3dF0QM","annotations":[{"segment":[37.186960998439936,111.56088299531982],"label":"Horseback riding"}]},"OkvWTZ4XLyM":{"duration":12.59,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=OkvWTZ4XLyM","annotations":[]},"yauw3CrvbtY":{"duration":186.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yauw3CrvbtY","annotations":[{"segment":[90.76328237129486,108.21775975039002],"label":"Plataform diving"}]},"arTumJHoLB0":{"duration":12.214,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=arTumJHoLB0","annotations":[{"segment":[0.01,12.213696145124716],"label":"Applying sunscreen"}]},"do_AQlxV9SI":{"duration":73.7,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=do_AQlxV9SI","annotations":[{"segment":[42.99016224648986,57.24358502340093],"label":"Shot put"}]},"Fsn1rBifmjc":{"duration":121.255,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fsn1rBifmjc","annotations":[]},"NkJA_0tErGY":{"duration":194.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NkJA_0tErGY","annotations":[{"segment":[52.3588143525741,145.27301092043683],"label":"Making a sandwich"}]},"F1RpG1Vwi28":{"duration":130.33,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F1RpG1Vwi28","annotations":[{"segment":[57.135314754086764,63.4384989440394],"label":"Shaving"}]},"gA0m7YUH408":{"duration":11.053,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gA0m7YUH408","annotations":[{"segment":[0.2,10.852697000000001],"label":"Croquet"}]},"Jz2xJx1ICCM":{"duration":29.188,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Jz2xJx1ICCM","annotations":[{"segment":[2.178847182438294,25.73763983169084],"label":"Arm wrestling"}]},"TI3WqIauOYs":{"duration":89.75,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=TI3WqIauOYs","annotations":[{"segment":[4.97234012,76.19554212168487],"label":"Doing karate"}]},"OrBcDTsll-A":{"duration":81.78,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OrBcDTsll-A","annotations":[]},"wEgt41AJaU4":{"duration":28.61,"subset":"training","resolution":"352x264","url":"https://www.youtube.com/watch?v=wEgt41AJaU4","annotations":[{"segment":[0,28.606983],"label":"Paintball"}]},"gKLbdLKEG6U":{"duration":231.364,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gKLbdLKEG6U","annotations":[{"segment":[14.786928237129485,149.67256630265211],"label":"Rafting"}]},"6PaTEwG8Ym4":{"duration":20.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6PaTEwG8Ym4","annotations":[{"segment":[1.653402496099844,17.798391575663025],"label":"Getting a haircut"}]},"BiRD6i3hVlw":{"duration":175.358,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BiRD6i3hVlw","annotations":[]},"b3CAq_EiavU":{"duration":65.69,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b3CAq_EiavU","annotations":[]},"BkDbDnvoL28":{"duration":211.07,"subset":"testing","resolution":"854x468","url":"https://www.youtube.com/watch?v=BkDbDnvoL28","annotations":[]},"vLEz1mHahdM":{"duration":49.32,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vLEz1mHahdM","annotations":[{"segment":[8.641181990000002,45.23615513416537],"label":"Playing bagpipes"}]},"xD19KSWY72A":{"duration":62.532,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xD19KSWY72A","annotations":[]},"hiVs1hNyPpw":{"duration":64.55,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hiVs1hNyPpw","annotations":[{"segment":[0.01,64.551473],"label":"Brushing hair"}]},"vwZ-8NnLahA":{"duration":20.875,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=vwZ-8NnLahA","annotations":[]},"iH8eHkIHrg4":{"duration":188.221,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iH8eHkIHrg4","annotations":[{"segment":[10.277246613674071,179.41136151326054],"label":"Surfing"}]},"TfIGKODkpPY":{"duration":122.88,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TfIGKODkpPY","annotations":[{"segment":[77.44698844929798,122.879999],"label":"Kite flying"}]},"3lsq0a5FQfc":{"duration":193.167,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3lsq0a5FQfc","annotations":[]},"kpAwQpA1nPs":{"duration":181.05,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kpAwQpA1nPs","annotations":[{"segment":[0.01,123.28640014352574],"label":"Pole vault"}]},"5LGh56euaZs":{"duration":122.904,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5LGh56euaZs","annotations":[]},"lARaqx1e7wM":{"duration":98.825,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lARaqx1e7wM","annotations":[{"segment":[4.617949859071355,91.89720219551997],"label":"Decorating the Christmas tree"}]},"9WXl-2sINno":{"duration":119.35,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=9WXl-2sINno","annotations":[{"segment":[21.57848,77.37460499999999],"label":"Putting on makeup"}]},"L9MTwigRhmk":{"duration":174.568,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L9MTwigRhmk","annotations":[{"segment":[27.778310675284153,166.94219942901717],"label":"BMX"}]},"_1vYKA7mNLI":{"duration":61.858,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_1vYKA7mNLI","annotations":[{"segment":[11.4975506142233,52.7115506142233],"label":"Kneeling"}]},"gUQFX_IydG8":{"duration":113.8,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=gUQFX_IydG8","annotations":[{"segment":[17.22102150480577,111.49279902080437],"label":"Doing karate"}]},"QEfXMtnI6rw":{"duration":78.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QEfXMtnI6rw","annotations":[{"segment":[49.68447737909516,70.43720748829953],"label":"Shaving"}]},"Iy53PoamjTk":{"duration":127.01,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Iy53PoamjTk","annotations":[]},"FIaXCUPjFY0":{"duration":29.03,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=FIaXCUPjFY0","annotations":[{"segment":[0.05754027321477396,22.02969262835496],"label":"Bathing dog"}]},"R37pbIySnjg":{"duration":120.093,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=R37pbIySnjg","annotations":[{"segment":[0.01,119.23309768325275],"label":"Painting fence"}]},"sAJ-hixMlO8":{"duration":106.464,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=sAJ-hixMlO8","annotations":[]},"nqt4s3j9L6o":{"duration":85.845,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nqt4s3j9L6o","annotations":[]},"eUCVKv4R-7A":{"duration":140.88,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eUCVKv4R-7A","annotations":[{"segment":[19.335425897035883,140.18183775351014],"label":"Kayaking"}]},"uWnlmM2uOs4":{"duration":30.395,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=uWnlmM2uOs4","annotations":[{"segment":[0.2,30.194919000000002],"label":"Applying sunscreen"}]},"uY0ngQlbIZc":{"duration":59.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uY0ngQlbIZc","annotations":[{"segment":[0.01,45.017822916998306],"label":"Playing squash"}]},"86hyAYM5d3E":{"duration":168.902,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=86hyAYM5d3E","annotations":[{"segment":[13.414191588785044,141.24354672897195],"label":"Cutting the grass"}]},"GZiG-aSzIJI":{"duration":137.09,"subset":"training","resolution":"938x720","url":"https://www.youtube.com/watch?v=GZiG-aSzIJI","annotations":[{"segment":[42.11067082683308,119.91921996879876],"label":"Paintball"}]},"qdMjXJTsX94":{"duration":172.803,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qdMjXJTsX94","annotations":[{"segment":[9.797539053701026,160.7980659415515],"label":"Playing drums"}]},"bqv3vCT0T5s":{"duration":132.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bqv3vCT0T5s","annotations":[{"segment":[8.505728329853236,131.33293976910556],"label":"Belly dance"}]},"gdYr4E3qobI":{"duration":100.496,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gdYr4E3qobI","annotations":[{"segment":[48.58488299531981,67.3919344773791],"label":"Ice fishing"}]},"vgC8jB2FhAg":{"duration":183.09,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vgC8jB2FhAg","annotations":[{"segment":[2.2850463775351013,173.09226309828392],"label":"Ballet"}]},"GYsCbanlHSc":{"duration":146.333,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=GYsCbanlHSc","annotations":[]},"dDYQNtOtb-Q":{"duration":33.48,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dDYQNtOtb-Q","annotations":[{"segment":[2.8729711622464897,5.171348092043682],"label":"Drinking coffee"}]},"ED7SKNfAKyI":{"duration":13.77,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=ED7SKNfAKyI","annotations":[{"segment":[0,13.769433],"label":"Baton twirling"}]},"iAIl5eawd6I":{"duration":215.273,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=iAIl5eawd6I","annotations":[{"segment":[5.363763239875389,211.19817757009346],"label":"Skiing"}]},"sx_npA4wRrw":{"duration":203.66,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=sx_npA4wRrw","annotations":[{"segment":[32.2462663,196.34945397815912],"label":"Preparing pasta"}]},"62s1ZSNLJ6g":{"duration":135.49,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=62s1ZSNLJ6g","annotations":[{"segment":[75.03866025999999,115.13402939],"label":"Cleaning shoes"}]},"hbOzx9P3l40":{"duration":127.547,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=hbOzx9P3l40","annotations":[]},"vygPNS-eAYM":{"duration":49.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vygPNS-eAYM","annotations":[{"segment":[0,49.36562358276644],"label":"Hopscotch"}]},"LOINRpWGrlw":{"duration":146.472,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=LOINRpWGrlw","annotations":[{"segment":[6.169599477856665,143.95731950078002],"label":"Playing ten pins"}]},"WrFNI5GQFPM":{"duration":214.019,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WrFNI5GQFPM","annotations":[{"segment":[47.07735320025046,148.9113434726989],"label":"Braiding hair"}]},"5FSdOfvJnek":{"duration":96.316,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5FSdOfvJnek","annotations":[{"segment":[38.76697253354134,68.96914880967239],"label":"Rafting"}]},"DHiFKO68gT8":{"duration":181.302,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DHiFKO68gT8","annotations":[{"segment":[15.264982839313573,45.79494851794072],"label":"BMX"},{"segment":[46.07763338533542,78.58639313572542],"label":"BMX"},{"segment":[101.76655226209047,118.16227457098283],"label":"BMX"},{"segment":[131.44846333853354,137.95021528861153],"label":"BMX"},{"segment":[151.80177379095164,163.97531981279252],"label":"BMX"}]},"Vh4TxFOCNM8":{"duration":57.656,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=Vh4TxFOCNM8","annotations":[{"segment":[0.01,57.621],"label":"Futsal"}]},"ULZpTYSwlTs":{"duration":27.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ULZpTYSwlTs","annotations":[{"segment":[2.3653753400000004,26.341591263650546],"label":"Snatch"}]},"KXXnSU0PccI":{"duration":235.613,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KXXnSU0PccI","annotations":[]},"m1pNOYN-DoI":{"duration":68.987,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m1pNOYN-DoI","annotations":[{"segment":[3.3259408871441267,8.368498907760356],"label":"Beer pong"},{"segment":[16.415134047041573,17.70259566932657],"label":"Beer pong"},{"segment":[26.82211500365442,28.109576602410254],"label":"Beer pong"},{"segment":[34.97603846244136,41.62792338934649],"label":"Beer pong"}]},"IC-Rd7ouQmE":{"duration":46.115,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IC-Rd7ouQmE","annotations":[{"segment":[0.01,46.081],"label":"BMX"}]},"rVxxAI6wlXk":{"duration":224.17000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rVxxAI6wlXk","annotations":[{"segment":[103.84389547581904,123.0742464898596],"label":"Shaving"},{"segment":[127.9692449297972,143.00388299531983],"label":"Shaving"},{"segment":[164.33209048361937,168.17816068642747],"label":"Shaving"},{"segment":[173.7724446177847,189.8560109204368],"label":"Shaving"}]},"aAz3YL2RMr4":{"duration":207.66,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=aAz3YL2RMr4","annotations":[]},"WcQV3OzK_sc":{"duration":218.5,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WcQV3OzK_sc","annotations":[{"segment":[39.20042711388456,105.67071656786271],"label":"Getting a haircut"},{"segment":[106.07689452,176.91323193135727],"label":"Getting a haircut"}]},"bi_xkH87Rnw":{"duration":87.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bi_xkH87Rnw","annotations":[{"segment":[5.746417056682268,82.22848996567863],"label":"Using the balance beam"}]},"-fBxVUu0KcQ":{"duration":145.172,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-fBxVUu0KcQ","annotations":[{"segment":[8.815761682242991,131.3322445482866],"label":"Beach soccer"}]},"LoBjzA2z2Ls":{"duration":229.53,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LoBjzA2z2Ls","annotations":[{"segment":[23.633214046929226,221.29282103588145],"label":"Preparing salad"}]},"F03y7m3Nwuw":{"duration":49.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F03y7m3Nwuw","annotations":[{"segment":[9.507435257410297,23.612728549141966],"label":"Discus throw"},{"segment":[39.43247737909517,45.90065054602184],"label":"Discus throw"}]},"g0XO-rWHmzs":{"duration":73.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=g0XO-rWHmzs","annotations":[{"segment":[0,73.978774],"label":"Walking the dog"}]},"ouKsBHHEnYc":{"duration":102.22,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ouKsBHHEnYc","annotations":[]},"jDlF26-Uu2I":{"duration":75.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jDlF26-Uu2I","annotations":[{"segment":[19.48544773790952,68.9582792152886],"label":"Cheerleading"}]},"3JcvtncHhLw":{"duration":104.746,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3JcvtncHhLw","annotations":[{"segment":[18.62858171606864,84.48225216848672],"label":"Running a marathon"}]},"QRn9v8F8Nn0":{"duration":375.537,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QRn9v8F8Nn0","annotations":[{"segment":[100.75845865834631,330.4116131045242],"label":"Removing ice from car"}]},"2rHsoF35eQw":{"duration":136.534,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2rHsoF35eQw","annotations":[{"segment":[24.49505956318253,127.80031076443058],"label":"Rafting"}]},"dth916VxJUw":{"duration":141.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dth916VxJUw","annotations":[{"segment":[23.55850198633796,127.70029023400937],"label":"Using parallel bars"}]},"Te9e32TDiZ8":{"duration":147.63,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Te9e32TDiZ8","annotations":[{"segment":[1.8425269266770672,123.21898822152887],"label":"Getting a piercing"}]},"xIG7FQWBWZU":{"duration":33.53,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xIG7FQWBWZU","annotations":[{"segment":[21.394090042121686,25.787986285491417],"label":"Curling"}]},"aXm3sPITCKM":{"duration":180.84,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aXm3sPITCKM","annotations":[{"segment":[31.091305409999997,131.63848578],"label":"Getting a haircut"}]},"wCexKIuAj3A":{"duration":133.63,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=wCexKIuAj3A","annotations":[]},"qwbxSLVwkfA":{"duration":129.22,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qwbxSLVwkfA","annotations":[]},"doZb3RlLSts":{"duration":223.655,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=doZb3RlLSts","annotations":[{"segment":[35.758428237129486,188.56029719188768],"label":"Installing carpet"}]},"YgrWck47C-g":{"duration":86.31,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=YgrWck47C-g","annotations":[{"segment":[28.865810452418096,49.32083697347893],"label":"Washing hands"}]},"EWlt9TTOw30":{"duration":82.501,"subset":"validation","resolution":"540x360","url":"https://www.youtube.com/watch?v=EWlt9TTOw30","annotations":[{"segment":[12.475471138845554,74.099120124805],"label":"Decorating the Christmas tree"}]},"J_dUIu1pXYU":{"duration":93.832,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J_dUIu1pXYU","annotations":[]},"6mhc4G2odkc":{"duration":38.64,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6mhc4G2odkc","annotations":[]},"C8hdbmjUMRc":{"duration":154.738,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=C8hdbmjUMRc","annotations":[{"segment":[0.2,154.7374829931973],"label":"Laying tile"}]},"iFwmJi-I5sg":{"duration":78.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iFwmJi-I5sg","annotations":[{"segment":[3.306809194816772,78.26114917628705],"label":"Discus throw"}]},"ivBgqENFmt0":{"duration":166.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ivBgqENFmt0","annotations":[{"segment":[16.120096723868954,42.125307332293296],"label":"Fixing the roof"},{"segment":[108.43366146645866,119.09500468018722],"label":"Fixing the roof"},{"segment":[123.77559438377537,129.4963151326053],"label":"Fixing the roof"}]},"7kO_qcJEiu0":{"duration":186.573,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7kO_qcJEiu0","annotations":[{"segment":[45.04472824436109,178.14463370872275],"label":"Braiding hair"}]},"gYBIX_tGaXo":{"duration":33.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gYBIX_tGaXo","annotations":[{"segment":[1.24297701,20.95013943],"label":"Hammer throw"}]},"ygHElEUxVwA":{"duration":222.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ygHElEUxVwA","annotations":[{"segment":[16.674885966867247,183.0763521778966],"label":"Zumba"}]},"VCg6_fuipp8":{"duration":98.59,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=VCg6_fuipp8","annotations":[{"segment":[11.997145441497661,20.918099744149767],"label":"High jump"},{"segment":[42.605247273010924,67.36858594071764],"label":"High jump"}]},"3jZq0UaDIks":{"duration":130.636,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3jZq0UaDIks","annotations":[{"segment":[45.75289467143411,114.4228707305321],"label":"River tubing"}]},"gY0qgtM0Gt8":{"duration":101.1,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=gY0qgtM0Gt8","annotations":[{"segment":[20.018889942002815,32.60219212268817],"label":"Blowing leaves"},{"segment":[52.106310502750475,66.89169056505577],"label":"Blowing leaves"},{"segment":[75.54271081427694,99.45098495757914],"label":"Blowing leaves"}]},"UPZm0tx77Vw":{"duration":120.21,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=UPZm0tx77Vw","annotations":[{"segment":[27.004988353198126,112.33325027150747],"label":"Ping-pong"}]},"P9qhbSYblG4":{"duration":115.706,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=P9qhbSYblG4","annotations":[{"segment":[16.239453978159126,105.93564274570983],"label":"Playing ten pins"}]},"DcoHV4WiHQk":{"duration":17.137,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DcoHV4WiHQk","annotations":[{"segment":[0.010000000000000002,17.136325],"label":"Building sandcastles"}]},"-zlGTPxjils":{"duration":101.03,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-zlGTPxjils","annotations":[]},"KxXlMXEkTv8":{"duration":147.19,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KxXlMXEkTv8","annotations":[]},"ksk7okDk_wY":{"duration":50.875,"subset":"training","resolution":"636x360","url":"https://www.youtube.com/watch?v=ksk7okDk_wY","annotations":[{"segment":[0.19999999999999998,50.620999999999995],"label":"Beach soccer"}]},"vI-O6QWIIGU":{"duration":85.68,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vI-O6QWIIGU","annotations":[]},"s7mP374Twwo":{"duration":37.83,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s7mP374Twwo","annotations":[]},"HoViId2LHl8":{"duration":135.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HoViId2LHl8","annotations":[{"segment":[0.2,126.53922964118566],"label":"Cumbia"}]},"-lV2zo5XsCg":{"duration":30.67,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-lV2zo5XsCg","annotations":[{"segment":[10.240470889235569,17.226960374414976],"label":"Grooming horse"}]},"uPyciwjOz3E":{"duration":183.58,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uPyciwjOz3E","annotations":[{"segment":[5.726708268330733,158.1748127159879],"label":"Walking the dog"}]},"LQ7X62seYYI":{"duration":120.628,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LQ7X62seYYI","annotations":[{"segment":[13.173062922516902,104.06719704680188],"label":"Snowboarding"}]},"KtUshVJ28us":{"duration":104.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KtUshVJ28us","annotations":[{"segment":[17.87533757132598,71.1763433822153],"label":"Tumbling"}]},"gDdkyHysoJs":{"duration":9.71,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=gDdkyHysoJs","annotations":[]},"Cu8kOOnAjaw":{"duration":121.63,"subset":"training","resolution":"240x180","url":"https://www.youtube.com/watch?v=Cu8kOOnAjaw","annotations":[{"segment":[0.01,120.86714508580343],"label":"Washing face"}]},"vALXRFRlVRk":{"duration":40.47,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=vALXRFRlVRk","annotations":[]},"po9uigsunAU":{"duration":139.669,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=po9uigsunAU","annotations":[{"segment":[57.50381279251171,128.3128689547582],"label":"Running a marathon"}]},"L1XpfS1RCzE":{"duration":25.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=L1XpfS1RCzE","annotations":[{"segment":[8.21573434365946,23.942997115132606],"label":"Kayaking"}]},"nOm1JLcBD30":{"duration":178.655,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nOm1JLcBD30","annotations":[{"segment":[14.447935442256217,169.20759703421285],"label":"Riding bumper cars"}]},"0al9HOh_jjA":{"duration":50.78,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=0al9HOh_jjA","annotations":[]},"Marb8CMxPV0":{"duration":175.938,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Marb8CMxPV0","annotations":[{"segment":[0.01,38.97516996190051],"label":"Disc dog"},{"segment":[77.1269208400989,175.73721088435374],"label":"Disc dog"}]},"8CwhsT1vwKE":{"duration":202.734,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=8CwhsT1vwKE","annotations":[]},"s3heOnMEtHo":{"duration":86.495,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s3heOnMEtHo","annotations":[]},"W_5Mx-mfNmU":{"duration":87.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W_5Mx-mfNmU","annotations":[{"segment":[5.33408424,72.89759954602184],"label":"Starting a campfire"}]},"tjUuEqmLGeI":{"duration":161.031,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tjUuEqmLGeI","annotations":[{"segment":[14.384064868356582,150.95749944779584],"label":"Baton twirling"}]},"0Z_mhRoqG2E":{"duration":108.35,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=0Z_mhRoqG2E","annotations":[{"segment":[9.296313512404442,58.14421542303869],"label":"Rock climbing"}]},"hThdSxhMhDo":{"duration":42.26,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hThdSxhMhDo","annotations":[{"segment":[2.24157687675507,6.724730630265211],"label":"Washing face"},{"segment":[9.361879897035882,13.185746333853356],"label":"Washing face"},{"segment":[15.229537015600624,20.174191890795633],"label":"Washing face"},{"segment":[25.44849042433697,27.228566179407178],"label":"Washing face"}]},"T3mpSZst-84":{"duration":78.367,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=T3mpSZst-84","annotations":[]},"eS1r2Qi0qUM":{"duration":61.49,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eS1r2Qi0qUM","annotations":[{"segment":[0.01,60.23944502814126],"label":"Playing badminton"}]},"5wchVLM0f5Y":{"duration":40.914,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5wchVLM0f5Y","annotations":[{"segment":[2.489280500780031,35.61585947269891],"label":"Playing rubik cube"}]},"kfO5PGlsMJw":{"duration":194.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kfO5PGlsMJw","annotations":[]},"7YOV0kZQIAo":{"duration":84.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7YOV0kZQIAo","annotations":[{"segment":[1.974477379095164,82.40152262090484],"label":"Playing saxophone"}]},"22n4w0uferc":{"duration":189.475,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=22n4w0uferc","annotations":[{"segment":[7.092954758190328,177.61940873634947],"label":"Waxing skis"}]},"Xk3KngL2ZH0":{"duration":62.601,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Xk3KngL2ZH0","annotations":[{"segment":[12.403005447737911,40.52950599063962],"label":"Playing congas"}]},"a_EqAwUOxpM":{"duration":54.29,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=a_EqAwUOxpM","annotations":[]},"rWdXyKZnL2U":{"duration":151.49,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rWdXyKZnL2U","annotations":[{"segment":[24.57823143401927,31.90443503454424],"label":"Playing violin"},{"segment":[54.35570413292723,58.13697050739173],"label":"Playing violin"},{"segment":[72.78937770844168,85.55115172225938],"label":"Playing violin"}]},"4z0meF-_b1I":{"duration":105.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4z0meF-_b1I","annotations":[]},"UnZtNrtvSPA":{"duration":93.321,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UnZtNrtvSPA","annotations":[{"segment":[9.608714096723869,82.25641613104523],"label":"Braiding hair"}]},"k25ECDpOD0E":{"duration":183.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=k25ECDpOD0E","annotations":[{"segment":[12.277539781591262,54.820642745709826],"label":"Brushing teeth"}]},"cfwJd80WCZU":{"duration":17.88,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cfwJd80WCZU","annotations":[{"segment":[0.01,17.861],"label":"Powerbocking"}]},"eepvgF5Fi_Q":{"duration":139.065,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eepvgF5Fi_Q","annotations":[{"segment":[0.01,137.8083560936592],"label":"Playing blackjack"}]},"5HCYb6qfkdk":{"duration":30.7,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=5HCYb6qfkdk","annotations":[{"segment":[5.156996919999999,25.868408736349455],"label":"Shaving legs"}]},"STaS53CNlZI":{"duration":184.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=STaS53CNlZI","annotations":[{"segment":[39.73199167966718,71.11450684694054],"label":"Bungee jumping"},{"segment":[76.29694054428843,178.21813659213035],"label":"Bungee jumping"}]},"lKSlIMfWZXI":{"duration":187.71,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=lKSlIMfWZXI","annotations":[{"segment":[10.09988845553822,95.5830023400936],"label":"Washing dishes"}]},"KWpATsiXF_M":{"duration":170.76,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=KWpATsiXF_M","annotations":[]},"TGVWaA_xTGE":{"duration":122.973,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=TGVWaA_xTGE","annotations":[{"segment":[4.407711388455538,17.63084555382215],"label":"Playing congas"},{"segment":[21.08035881435257,29.517307332293292],"label":"Playing congas"},{"segment":[37.56748205928237,119.02758346333854],"label":"Playing congas"}]},"qcsGJTJstZ4":{"duration":30.93,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qcsGJTJstZ4","annotations":[{"segment":[0.5790136287051483,27.88915644929797],"label":"Dodgeball"}]},"ss6XN-JP_x8":{"duration":214.367,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ss6XN-JP_x8","annotations":[{"segment":[16.721265210608426,168.21592801872075],"label":"Making a cake"}]},"rLCRgksVNE8":{"duration":113.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rLCRgksVNE8","annotations":[{"segment":[0,113.54523809523809],"label":"Getting a piercing"}]},"UKnr1rv3jJ8":{"duration":142.41,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UKnr1rv3jJ8","annotations":[]},"NjD6iWI3NvM":{"duration":51.711,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NjD6iWI3NvM","annotations":[]},"3gc3nddr8TE":{"duration":58.14,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=3gc3nddr8TE","annotations":[{"segment":[3.918996967373845,56.53729364182867],"label":"Playing field hockey"}]},"n1NqFiDdlEU":{"duration":65.6,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n1NqFiDdlEU","annotations":[{"segment":[9.721720773592848,15.657087140628482],"label":"Doing motocross"},{"segment":[23.843799371022456,27.52781987469975],"label":"Doing motocross"},{"segment":[32.43984634945398,45.33391776911076],"label":"Doing motocross"},{"segment":[57.61398578783152,63.65168589703588],"label":"Doing motocross"}]},"--1DO2V4K74":{"duration":211.53,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=--1DO2V4K74","annotations":[{"segment":[30.025882995319815,205.2318595943838],"label":"Rock climbing"}]},"C7yd6yEkxXE":{"duration":8.52,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=C7yd6yEkxXE","annotations":[]},"m978SIFnHS8":{"duration":217.2,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=m978SIFnHS8","annotations":[{"segment":[10.843030553875217,214.48869814384412],"label":"Sailing"}]},"8ZqRn0ovdAI":{"duration":157.571,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8ZqRn0ovdAI","annotations":[{"segment":[2.210560062402496,142.96770982839317],"label":"Playing congas"}]},"vt46wY2Q9JQ":{"duration":54.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vt46wY2Q9JQ","annotations":[{"segment":[0.2,53.46781557067271],"label":"Decorating the Christmas tree"}]},"TUfYisuVrs0":{"duration":104.56,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=TUfYisuVrs0","annotations":[{"segment":[7.5000571544461785,100.92468214352574],"label":"Plataform diving"}]},"HKgnushHynQ":{"duration":234.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HKgnushHynQ","annotations":[{"segment":[133.3432506021841,191.5893408377535],"label":"Playing water polo"}]},"yUYTlwiP16E":{"duration":121.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yUYTlwiP16E","annotations":[{"segment":[17.47085928627145,42.402247673166926],"label":"Shoveling snow"}]},"IxaoK4TbALQ":{"duration":102.28,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IxaoK4TbALQ","annotations":[{"segment":[59.67890577223089,88.08223525741029],"label":"Polishing forniture"}]},"f-4PjtdFbP4":{"duration":12.702,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=f-4PjtdFbP4","annotations":[{"segment":[0,12.661],"label":"Blowing leaves"}]},"lbbe4PnEGRY":{"duration":159.289,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=lbbe4PnEGRY","annotations":[]},"7_5VT6QCqNI":{"duration":184.088,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7_5VT6QCqNI","annotations":[{"segment":[61.458329151326055,159.10240350390015],"label":"Blow-drying hair"}]},"3AgR6BJTQiI":{"duration":7.08,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=3AgR6BJTQiI","annotations":[]},"NGvote9Y6gI":{"duration":79.761,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=NGvote9Y6gI","annotations":[{"segment":[0.01,79.721],"label":"Playing blackjack"}]},"-wWtZjqIH9o":{"duration":19.32,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-wWtZjqIH9o","annotations":[{"segment":[0.2,12.987631825273011],"label":"Fun sliding down"}]},"IsHM24qWmpI":{"duration":208.028,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=IsHM24qWmpI","annotations":[{"segment":[0.01,0.02],"label":"Assembling bicycle"}]},"ccKJg_f1UDo":{"duration":33.02,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ccKJg_f1UDo","annotations":[{"segment":[0.01,33.01877551020408],"label":"Using the balance beam"}]},"XXIDYn4H_1A":{"duration":78.554,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XXIDYn4H_1A","annotations":[{"segment":[34.25993176086634,76.22834695327103],"label":"BMX"}]},"ypf6WHYpeRU":{"duration":156.712,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ypf6WHYpeRU","annotations":[{"segment":[0.2,106.59284604056161],"label":"Kite flying"}]},"fc3yCFrmkCU":{"duration":202.827,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fc3yCFrmkCU","annotations":[{"segment":[12.656856287051482,198.07980089235568],"label":"Snowboarding"}]},"pzZ4TGcMK1k":{"duration":184.37,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pzZ4TGcMK1k","annotations":[{"segment":[0,184.36643990929704],"label":"Plataform diving"}]},"xe_TqEhok74":{"duration":23.66,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xe_TqEhok74","annotations":[]},"wfc_R5VnoHU":{"duration":38.174,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wfc_R5VnoHU","annotations":[{"segment":[0.01,30.252689683740478],"label":"Scuba diving"}]},"TtQPtLPTqBA":{"duration":169.07,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=TtQPtLPTqBA","annotations":[{"segment":[10.151807332293291,29.928055382215287],"label":"Washing hands"}]},"Scv939uhNCc":{"duration":43.584,"subset":"training","resolution":"423x240","url":"https://www.youtube.com/watch?v=Scv939uhNCc","annotations":[{"segment":[0.01,40.92582293291731],"label":"Playing rubik cube"}]},"O5dU3oaxWz8":{"duration":35.39,"subset":"testing","resolution":"160x112","url":"https://www.youtube.com/watch?v=O5dU3oaxWz8","annotations":[]},"fhOmyyI0eQE":{"duration":55.752,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fhOmyyI0eQE","annotations":[]},"ghHZ9M-jx0w":{"duration":26.03,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ghHZ9M-jx0w","annotations":[]},"HgKZ4KAuhdI":{"duration":55.29,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=HgKZ4KAuhdI","annotations":[{"segment":[0.2,55.286371],"label":"Grooming horse"}]},"HKkzII7ap7E":{"duration":209.3,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HKkzII7ap7E","annotations":[{"segment":[40.16286934742696,86.52975916315563],"label":"Playing flauta"},{"segment":[124.08040936603452,160.65147738970785],"label":"Playing flauta"},{"segment":[177.30437443620193,209.1040589569161],"label":"Playing flauta"}]},"vpu4Bwq-xHI":{"duration":132.33,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vpu4Bwq-xHI","annotations":[{"segment":[21.883050364191437,126.75653701522211],"label":"Playing flauta"}]},"qMcb8794F6U":{"duration":114.428,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qMcb8794F6U","annotations":[]},"4mSPGxeKK2k":{"duration":60.442,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4mSPGxeKK2k","annotations":[{"segment":[0.01,60.441541950113375],"label":"Tug of war"}]},"ktfxYSZjJyM":{"duration":18.74,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ktfxYSZjJyM","annotations":[{"segment":[1.2277958546913303,18.738502],"label":"Windsurfing"}]},"NSavdMErlYA":{"duration":104.50999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NSavdMErlYA","annotations":[{"segment":[43.04436223163212,83.96911571700964],"label":"Preparing pasta"}]},"C03QJbrKzaw":{"duration":126.015,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C03QJbrKzaw","annotations":[{"segment":[72.12952730109205,95.51757566302652],"label":"Trimming branches or hedges"}]},"HsklqPvsMEQ":{"duration":54.8,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HsklqPvsMEQ","annotations":[{"segment":[10.58223489,47.61793173790952],"label":"Paintball"}]},"Pt5jMqQXTZ8":{"duration":10.26,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Pt5jMqQXTZ8","annotations":[{"segment":[0.7845519154099497,9.975017210212219],"label":"Chopping wood"}]},"pZN1qyF9nXE":{"duration":31.719,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pZN1qyF9nXE","annotations":[]},"Jxhz1ln7UXk":{"duration":204.89,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Jxhz1ln7UXk","annotations":[{"segment":[7.032204923571093,145.11913796823984],"label":"Cricket"}]},"r4bI22hGTg4":{"duration":23.48,"subset":"training","resolution":"224x128","url":"https://www.youtube.com/watch?v=r4bI22hGTg4","annotations":[{"segment":[6.73916746723869,13.643600621684868],"label":"Dodgeball"}]},"Ji3qvOdmOZA":{"duration":173.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ji3qvOdmOZA","annotations":[{"segment":[11.609216608120107,166.57875923744436],"label":"Getting a tattoo"}]},"Yd98o-oEGvI":{"duration":164.305,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Yd98o-oEGvI","annotations":[{"segment":[0.19999999999999998,103.29883625004864],"label":"Disc dog"},{"segment":[113.03917226677068,157.38333735101403],"label":"Disc dog"}]},"Bg526A61c1w":{"duration":238.93,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Bg526A61c1w","annotations":[{"segment":[70.82690919000001,193.63369253],"label":"Snatch"}]},"G4kjV4ulJzo":{"duration":54.1,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=G4kjV4ulJzo","annotations":[{"segment":[0.01,0.02],"label":"Walking the dog"}]},"R6MnhM2omiE":{"duration":51.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=R6MnhM2omiE","annotations":[{"segment":[9.3089399,36.02327846],"label":"Starting a campfire"}]},"DgcoDX3HbKY":{"duration":58.84,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=DgcoDX3HbKY","annotations":[{"segment":[0.2,49.35247737909517],"label":"Making an omelette"}]},"AMMECm7Huhk":{"duration":166.789,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AMMECm7Huhk","annotations":[{"segment":[112.83514220000193,119.8387019891282],"label":"Having an ice cream"},{"segment":[156.4128525054579,162.37884868897268],"label":"Having an ice cream"}]},"I4B52tt3mNM":{"duration":62.276,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I4B52tt3mNM","annotations":[{"segment":[9.423948290171607,23.70560188455538],"label":"Gargling mouthwash"},{"segment":[30.79785162870515,53.240450134165364],"label":"Gargling mouthwash"}]},"XZ1IuZjsk8A":{"duration":62.72,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XZ1IuZjsk8A","annotations":[{"segment":[9.882101167039878,55.18321839812368],"label":"Mixing drinks"}]},"ndGz8-hHSt4":{"duration":37.431,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ndGz8-hHSt4","annotations":[{"segment":[13.934432223564398,33.057835443925235],"label":"Welding"}]},"jqYzz6YoMEY":{"duration":54.586,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jqYzz6YoMEY","annotations":[{"segment":[30.143017160686426,52.38629485179407],"label":"Fun sliding down"}]},"XSu5dteR2Ko":{"duration":98.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XSu5dteR2Ko","annotations":[{"segment":[19.733849851953263,32.88974975325544],"label":"Playing harmonica"},{"segment":[59.96642627145086,78.47647111544461],"label":"Playing harmonica"}]},"_EZnGbfMqsc":{"duration":94.319,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_EZnGbfMqsc","annotations":[{"segment":[0.2,94.11918367346938],"label":"Rope skipping"},{"segment":[68.35573808994826,69.08916753032659],"label":"Rope skipping"}]},"iRwRwpVLE_Y":{"duration":196.65,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iRwRwpVLE_Y","annotations":[{"segment":[27.910594383775354,174.21118252730108],"label":"Baking cookies"}]},"8EDq3cjHhz0":{"duration":190.194,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8EDq3cjHhz0","annotations":[{"segment":[64.3758767550702,90.77888611544462],"label":"Playing ten pins"}]},"YPXeQVi39nU":{"duration":137.741,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YPXeQVi39nU","annotations":[{"segment":[24.066943001560062,33.09204662714509],"label":"Surfing"},{"segment":[42.3320336723869,74.13477978159128],"label":"Surfing"},{"segment":[89.1766191575663,92.61475387207489],"label":"Surfing"}]},"XRiqpcV0Qmc":{"duration":18.83,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=XRiqpcV0Qmc","annotations":[]},"2Of52x_q5gI":{"duration":178.492,"subset":"testing","resolution":"1916x1080","url":"https://www.youtube.com/watch?v=2Of52x_q5gI","annotations":[]},"Lg-mJS-BZZo":{"duration":15.047,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Lg-mJS-BZZo","annotations":[{"segment":[0.5249879634878212,13.837181845107757],"label":"Cutting the grass"}]},"o1bO6f-Uo8A":{"duration":141.97,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=o1bO6f-Uo8A","annotations":[{"segment":[16.826708268330734,115.96634676],"label":"Mowing the lawn"}]},"gmBGjj6NoUg":{"duration":171.433,"subset":"testing","resolution":"512x288","url":"https://www.youtube.com/watch?v=gmBGjj6NoUg","annotations":[]},"idTzZaMtGy8":{"duration":52.11,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=idTzZaMtGy8","annotations":[{"segment":[0.8941674680187207,52.105577],"label":"Playing lacrosse"}]},"Q-HdDcyC9d8":{"duration":65.67,"subset":"training","resolution":"562x480","url":"https://www.youtube.com/watch?v=Q-HdDcyC9d8","annotations":[{"segment":[3.2269578783151327,61.41464279523562],"label":"Cleaning shoes"}]},"MpeN-IVIBc4":{"duration":113.24000000000001,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MpeN-IVIBc4","annotations":[]},"Z5lRHCRBA_E":{"duration":55.84,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=Z5lRHCRBA_E","annotations":[{"segment":[4.6569760234009365,27.00653813],"label":"Using uneven bars"},{"segment":[31.037306299999997,49.57285692199688],"label":"Using uneven bars"}]},"JguB9cLEXGY":{"duration":83.17,"subset":"training","resolution":"426x234","url":"https://www.youtube.com/watch?v=JguB9cLEXGY","annotations":[{"segment":[3.89269317711484,81.3572858845554],"label":"Javelin throw"}]},"yFJVEplkVHA":{"duration":102.7,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=yFJVEplkVHA","annotations":[{"segment":[4.3259677162596715,4.4861887427878075],"label":"Playing guitarra"},{"segment":[14.2596713610041,23.232048846579715],"label":"Playing guitarra"},{"segment":[35.08840480966178,40.05525663203399],"label":"Playing guitarra"}]},"W4XaqnwD6gU":{"duration":186.97,"subset":"training","resolution":"450x360","url":"https://www.youtube.com/watch?v=W4XaqnwD6gU","annotations":[{"segment":[31.273970090000002,155.58084971000002],"label":"Playing violin"}]},"10uSu442cOE":{"duration":141.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=10uSu442cOE","annotations":[{"segment":[7.269294788118056,126.88223630169698],"label":"Playing flauta"}]},"2oNsMva04MM":{"duration":199.82999999999998,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2oNsMva04MM","annotations":[{"segment":[69.34675897035882,76.82685881435258],"label":"Painting"},{"segment":[78.69688377535103,83.99528783151327],"label":"Painting"},{"segment":[111.11064976599064,117.96740795631825],"label":"Painting"}]},"AyWXuUCDSjw":{"duration":74.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AyWXuUCDSjw","annotations":[{"segment":[51.57111904122016,63.24978092768983],"label":"Drinking beer"}]},"X2LI_I_qw-A":{"duration":32.648,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X2LI_I_qw-A","annotations":[{"segment":[0.2,31.17914953271028],"label":"Peeling potatoes"}]},"No7CCXJOmT0":{"duration":200.04,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=No7CCXJOmT0","annotations":[]},"WJwLjPDvLrY":{"duration":177.006,"subset":"training","resolution":"488x360","url":"https://www.youtube.com/watch?v=WJwLjPDvLrY","annotations":[{"segment":[0.01,144.87133385335414],"label":"Playing beach volleyball"}]},"n9oQEa-d5rU":{"duration":219.66,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=n9oQEa-d5rU","annotations":[]},"9bnt6lDcF6Y":{"duration":83.616,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9bnt6lDcF6Y","annotations":[{"segment":[48.18666190843724,74.97468788667175],"label":"Doing crunches"}]},"D4Y6DyRD0kY":{"duration":61.12,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=D4Y6DyRD0kY","annotations":[{"segment":[5.243869804992199,59.01737107800312],"label":"Kayaking"}]},"y4hcu0hOAEw":{"duration":27.423,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=y4hcu0hOAEw","annotations":[]},"OMa1i3ITBbo":{"duration":141.107,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=OMa1i3ITBbo","annotations":[{"segment":[44.70604417463095,126.68910391917923],"label":"Rock-paper-scissors"}]},"I8nK8c7k9ko":{"duration":146.843,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I8nK8c7k9ko","annotations":[{"segment":[0.01,95.45935989946153],"label":"River tubing"}]},"cjFHlsWZY4c":{"duration":105.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cjFHlsWZY4c","annotations":[{"segment":[3.75776316,69.85346939625586],"label":"Doing nails"}]},"ok6LTNirAPo":{"duration":60.79,"subset":"training","resolution":"293x240","url":"https://www.youtube.com/watch?v=ok6LTNirAPo","annotations":[{"segment":[0.01,60.07020963980986],"label":"Having an ice cream"}]},"0fw8it7Gj7k":{"duration":163.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0fw8it7Gj7k","annotations":[{"segment":[3.42533711,85.15912867394695],"label":"Polishing forniture"}]},"NxvmOPe6Fx4":{"duration":88.98,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=NxvmOPe6Fx4","annotations":[]},"8OCTuqJVToU":{"duration":119.723,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8OCTuqJVToU","annotations":[{"segment":[0.01,119.661],"label":"Swimming"}]},"ksKlcjeIBi0":{"duration":5.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ksKlcjeIBi0","annotations":[{"segment":[0.17372212480499222,4.903701795631825],"label":"Using the balance beam"}]},"lZ6zN5Q447M":{"duration":222.54,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lZ6zN5Q447M","annotations":[{"segment":[91.00740218492095,151.56366190498318],"label":"Mixing drinks"}]},"YrXzg15ZECw":{"duration":194.398,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YrXzg15ZECw","annotations":[]},"DjT4-5H3xDQ":{"duration":105.535,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DjT4-5H3xDQ","annotations":[{"segment":[22.743518007759736,94.22784043766629],"label":"Sharpening knives"}]},"Gi1N3FtCZGo":{"duration":14.86,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Gi1N3FtCZGo","annotations":[{"segment":[3.8021314820592824,14.86077],"label":"Clean and jerk"}]},"3TxZTZEEg44":{"duration":156.81,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3TxZTZEEg44","annotations":[{"segment":[76.4219315522621,114.08265942121686],"label":"Tennis serve with ball bouncing"}]},"lUw8OA9R4mQ":{"duration":219.22,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lUw8OA9R4mQ","annotations":[{"segment":[30.77958228533223,219.21902494331067],"label":"Zumba"}]},"aFSaGCvYXXY":{"duration":220.961,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aFSaGCvYXXY","annotations":[{"segment":[0,13.786021840873635],"label":"Waterskiing"},{"segment":[149.9229875195008,155.78204680187207],"label":"Waterskiing"}]},"2gV2apTrRHs":{"duration":70.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2gV2apTrRHs","annotations":[{"segment":[4.265054602184088,63.32944716000001],"label":"Breakdancing"}]},"iHzyX1djHU4":{"duration":177.238,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iHzyX1djHU4","annotations":[{"segment":[47.76554290171607,170.07846489859594],"label":"Wakeboarding"}]},"LElk0AlBpbI":{"duration":175.194,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LElk0AlBpbI","annotations":[{"segment":[24.59358814352574,122.14815444617784],"label":"Longboarding"},{"segment":[131.4390655226209,148.65457722308892],"label":"Longboarding"},{"segment":[151.38719812792513,156.30591575663027],"label":"Longboarding"}]},"0EbXjHsMBmM":{"duration":208.144,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0EbXjHsMBmM","annotations":[{"segment":[0.01,164.89077691107647],"label":"Waxing skis"}]},"j8NwT9JBQJk":{"duration":157.01,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j8NwT9JBQJk","annotations":[{"segment":[1.7143010920436816,155.2666989079563],"label":"Sailing"}]},"wXsQbzBQ6ck":{"duration":122.49,"subset":"testing","resolution":"400x228","url":"https://www.youtube.com/watch?v=wXsQbzBQ6ck","annotations":[]},"-UfNSW7yeSo":{"duration":153.484,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-UfNSW7yeSo","annotations":[{"segment":[149.1730171856474,153.483],"label":"Welding"}]},"mDopaDI7U3c":{"duration":207.7,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mDopaDI7U3c","annotations":[]},"MzNI-qdQfQc":{"duration":107.021,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=MzNI-qdQfQc","annotations":[{"segment":[0.01,101.51114219032762],"label":"Playing rubik cube"}]},"SjfrdNchlqw":{"duration":121.464,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=SjfrdNchlqw","annotations":[{"segment":[0,16.864678418429254],"label":"Playing blackjack"},{"segment":[58.5526475426364,69.35362136118098],"label":"Playing blackjack"}]},"n9P4ltD0g2k":{"duration":213.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n9P4ltD0g2k","annotations":[{"segment":[43.75750944,193.02116424336972],"label":"Mixing drinks"}]},"WglqrQ9uR-A":{"duration":212.51,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WglqrQ9uR-A","annotations":[{"segment":[11.271929560175606,62.65866726097616],"label":"Longboarding"},{"segment":[128.9641352620091,158.80159441029642],"label":"Longboarding"}]},"SgkdciuEuKs":{"duration":68.499,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=SgkdciuEuKs","annotations":[{"segment":[30.915716510903426,49.692875389408094],"label":"Cleaning sink"}]},"2XOTxAZZhsQ":{"duration":58.84,"subset":"validation","resolution":"640x352","url":"https://www.youtube.com/watch?v=2XOTxAZZhsQ","annotations":[{"segment":[37.03385499,50.489745899999996],"label":"Washing dishes"},{"segment":[11.51596908,32.366745509999994],"label":"Washing dishes"}]},"gtAl_FkXdR8":{"duration":144.24,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gtAl_FkXdR8","annotations":[{"segment":[10.576272759753927,128.7154886177847],"label":"Paintball"}]},"B5Zi054Fa5k":{"duration":120.675,"subset":"training","resolution":"1916x1080","url":"https://www.youtube.com/watch?v=B5Zi054Fa5k","annotations":[{"segment":[0,103.16596615973482],"label":"Playing rubik cube"}]},"iFA1XhZ6VM8":{"duration":144.521,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=iFA1XhZ6VM8","annotations":[{"segment":[13.73174589046418,98.59843652336448],"label":"Making a cake"}]},"IPVMq_-SzfQ":{"duration":63.693,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=IPVMq_-SzfQ","annotations":[]},"3A26j50_awY":{"duration":26.61,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=3A26j50_awY","annotations":[{"segment":[12.824695007800312,22.31372620904836],"label":"Long jump"}]},"47SHPAe0s0k":{"duration":170.04,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=47SHPAe0s0k","annotations":[{"segment":[20.23526025398764,163.78906704214964],"label":"Bullfighting"}]},"LgUCZHYBBEc":{"duration":65.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LgUCZHYBBEc","annotations":[{"segment":[1.2188845741029641,62.670981851794075],"label":"Playing polo"}]},"qeyFjCAA_dg":{"duration":144.661,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qeyFjCAA_dg","annotations":[{"segment":[41.52173103876156,52.78811090327227],"label":"Doing crunches"},{"segment":[74.1942326458426,127.82220080091356],"label":"Doing crunches"}]},"wu0G4yQIwKo":{"duration":143.035,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=wu0G4yQIwKo","annotations":[{"segment":[3.7816358534076997,17.351047271681594],"label":"Painting fence"},{"segment":[23.35718019452414,25.804123237163694],"label":"Painting fence"},{"segment":[29.363313117366683,32.922502997569666],"label":"Painting fence"},{"segment":[74.07563598741673,80.30421827777195],"label":"Painting fence"},{"segment":[97.43281957624885,106.33079330242612],"label":"Painting fence"}]},"TpB_zMG3XBA":{"duration":163.051,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TpB_zMG3XBA","annotations":[{"segment":[10.143109301327563,145.30009121469075],"label":"Waterskiing"}]},"sV6JEbmqDUw":{"duration":57.563,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sV6JEbmqDUw","annotations":[{"segment":[0.01,9.967880695787832],"label":"Elliptical trainer"},{"segment":[25.683007918876754,35.56108788767551],"label":"Elliptical trainer"},{"segment":[37.89590678939158,40.230725691107644],"label":"Elliptical trainer"},{"segment":[44.09215695163807,47.59438530421217],"label":"Elliptical trainer"}]},"l8aaaDY4ehA":{"duration":186.457,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=l8aaaDY4ehA","annotations":[{"segment":[0.2,23.852435784506206],"label":"Cutting the grass"},{"segment":[29.088336322568548,123.62542937091634],"label":"Cutting the grass"},{"segment":[128.86132864586583,134.3881124929797],"label":"Cutting the grass"},{"segment":[139.9148963400936,171.91206598127923],"label":"Cutting the grass"}]},"Mno1JV_6y_M":{"duration":183.35,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=Mno1JV_6y_M","annotations":[{"segment":[52.61108268330734,88.3523073322933],"label":"Ironing clothes"},{"segment":[120.38958034321371,162.1398861154446],"label":"Ironing clothes"}]},"rNXlLVuSlMA":{"duration":15.42,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=rNXlLVuSlMA","annotations":[]},"ds3mAmUPxYA":{"duration":141.43,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ds3mAmUPxYA","annotations":[]},"F1xZKduLnWg":{"duration":130.636,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F1xZKduLnWg","annotations":[{"segment":[21.189553822152888,23.634502340093604],"label":"Doing fencing"},{"segment":[33.00680499219969,42.786599063962555],"label":"Doing fencing"},{"segment":[46.45402184087363,53.992613104524175],"label":"Doing fencing"},{"segment":[65.60611856474259,68.2548127925117],"label":"Doing fencing"},{"segment":[80.88704680187207,92.5005522620905],"label":"Doing fencing"},{"segment":[99.22416068642745,103.9103120124805],"label":"Doing fencing"},{"segment":[115.52381747269891,117.76502028081123],"label":"Doing fencing"}]},"Ggw7rtn3J-8":{"duration":61.417,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ggw7rtn3J-8","annotations":[{"segment":[22.116721952693855,53.466267124033415],"label":"Welding"}]},"I0w8zmcO_Cw":{"duration":94.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I0w8zmcO_Cw","annotations":[{"segment":[34.21300091107644,58.692992942277684],"label":"Fixing bicycle"}]},"qxlVkDpdyOA":{"duration":118.747,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qxlVkDpdyOA","annotations":[{"segment":[0.2,118.50099999999999],"label":"Powerbocking"}]},"YuppPIPojmw":{"duration":114.382,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YuppPIPojmw","annotations":[]},"ROvaMfVEMSw":{"duration":14.072,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ROvaMfVEMSw","annotations":[{"segment":[0.01,14.041],"label":"Calf roping"}]},"CJWSed5v4jE":{"duration":77.02,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=CJWSed5v4jE","annotations":[{"segment":[1.3523865051855641,68.65542368275565],"label":"Mowing the lawn"}]},"mi8tpPUeAWA":{"duration":42.82,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=mi8tpPUeAWA","annotations":[]},"Jd5tpIdMGh8":{"duration":21.34,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jd5tpIdMGh8","annotations":[{"segment":[0.01,20.62733944773791],"label":"Smoking a cigarette"}]},"s7HB851cCrg":{"duration":123.11,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s7HB851cCrg","annotations":[{"segment":[32.30581992,123.041666],"label":"Playing badminton"},{"segment":[0.3839053541341654,32.05609707020281],"label":"Playing badminton"}]},"eRN5gqZFXHI":{"duration":186.02,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=eRN5gqZFXHI","annotations":[{"segment":[77.91738334023157,132.4740614332056],"label":"Cleaning shoes"}]},"6asjaqo0OHc":{"duration":158.128,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6asjaqo0OHc","annotations":[{"segment":[43.52806535387501,151.24159106091366],"label":"Assembling bicycle"}]},"JvYDila36v4":{"duration":122.3,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JvYDila36v4","annotations":[]},"qMJi2nXWOkg":{"duration":5.25,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qMJi2nXWOkg","annotations":[{"segment":[0.01,0.02],"label":"Playing kickball"}]},"GUpr-QSMZFs":{"duration":43.7,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GUpr-QSMZFs","annotations":[]},"EoYvQ_pn8KE":{"duration":45.535,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=EoYvQ_pn8KE","annotations":[{"segment":[0.01,32.53467019968799],"label":"Curling"}]},"7ghaFHKMUZ0":{"duration":70.264,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7ghaFHKMUZ0","annotations":[{"segment":[0.01,70.221],"label":"Having an ice cream"}]},"hxQhQsl-N1k":{"duration":9.335,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=hxQhQsl-N1k","annotations":[{"segment":[0,8.977645158346334],"label":"Kneeling"}]},"HX6ajS7kbrc":{"duration":148.515,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HX6ajS7kbrc","annotations":[]},"SCGQIhvDcIo":{"duration":213.833,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SCGQIhvDcIo","annotations":[{"segment":[9.67274414976599,196.4567691107644],"label":"Playing pool"}]},"Jp8L9h4aaV4":{"duration":183.83,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jp8L9h4aaV4","annotations":[{"segment":[4.01402496099844,175.75695007800311],"label":"Doing step aerobics"}]},"5rtrGkZNfLo":{"duration":70.4,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=5rtrGkZNfLo","annotations":[{"segment":[4.279717628705148,12.290471138845554],"label":"Smoking hookah"}]},"RpVkFIpEhIE":{"duration":14.88,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RpVkFIpEhIE","annotations":[{"segment":[4.564563182527301,14.181505460218409],"label":"Hammer throw"}]},"vbHLA5l_BRc":{"duration":154.691,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vbHLA5l_BRc","annotations":[{"segment":[59.58865366614664,131.49804212168488],"label":"Waterskiing"}]},"wVHD_Y5J3qE":{"duration":141.108,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=wVHD_Y5J3qE","annotations":[{"segment":[54.79592667706708,138.6599531981279],"label":"Blow-drying hair"}]},"ssBiSN8XofA":{"duration":176.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ssBiSN8XofA","annotations":[{"segment":[40.11546015819033,52.47981431653666],"label":"Layup drill in basketball"},{"segment":[62.92082449469579,120.62114390031202],"label":"Layup drill in basketball"}]},"y1IjkACdnfs":{"duration":28.84,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=y1IjkACdnfs","annotations":[{"segment":[0.1919345474405972,26.290945048998232],"label":"Doing motocross"}]},"B3fiiuk0EqE":{"duration":93.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B3fiiuk0EqE","annotations":[{"segment":[0.01,93.501],"label":"Shoveling snow"}]},"-9B2XtqICFw":{"duration":184.3,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=-9B2XtqICFw","annotations":[{"segment":[13.513181976599062,149.79505978315132],"label":"Playing badminton"}]},"9TiMh2Qw0KM":{"duration":71.61,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=9TiMh2Qw0KM","annotations":[]},"2DvMNOas7jY":{"duration":170.179,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2DvMNOas7jY","annotations":[{"segment":[0.19999999999999998,155.86490134994807],"label":"Longboarding"}]},"WYyqptHdgkk":{"duration":139.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WYyqptHdgkk","annotations":[{"segment":[0.2,130.53472386895476],"label":"Slacklining"}]},"uAKyRSEaY1s":{"duration":41.82,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=uAKyRSEaY1s","annotations":[{"segment":[7.816661368597283,30.550117216510902],"label":"Playing rubik cube"}]},"FgO-dLvr3rs":{"duration":60.256,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=FgO-dLvr3rs","annotations":[]},"jdz4PzF1pO0":{"duration":191.217,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jdz4PzF1pO0","annotations":[{"segment":[10.885142745709828,12.67448127925117],"label":"Rock-paper-scissors"},{"segment":[19.23538923556942,20.726504680187205],"label":"Rock-paper-scissors"},{"segment":[79.47645319812793,82.4586840873635],"label":"Rock-paper-scissors"},{"segment":[89.91426131045242,96.77339235569423],"label":"Rock-paper-scissors"},{"segment":[103.33430031201247,105.42186193447738],"label":"Rock-paper-scissors"},{"segment":[107.21120046801872,108.70231591263651],"label":"Rock-paper-scissors"},{"segment":[112.28099297971917,115.26322386895477],"label":"Rock-paper-scissors"},{"segment":[116.15789313572544,118.84190093603743],"label":"Rock-paper-scissors"},{"segment":[127.50370904836194,137.04784633385336],"label":"Rock-paper-scissors"},{"segment":[140.32864352574103,147.188492199688],"label":"Rock-paper-scissors"},{"segment":[159.7151723868955,162.99596957878316],"label":"Rock-paper-scissors"}]},"CDecXoRSpIc":{"duration":55.937,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=CDecXoRSpIc","annotations":[]},"Vl4gId1_zxo":{"duration":223.446,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Vl4gId1_zxo","annotations":[{"segment":[32.7673767051482,177.78044808112324],"label":"Kite flying"}]},"Ix8WFQ6-yx8":{"duration":69.24,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ix8WFQ6-yx8","annotations":[{"segment":[0.972195205928237,67.08146920904835],"label":"Playing squash"}]},"od1jHUzgrAU":{"duration":169.37,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=od1jHUzgrAU","annotations":[{"segment":[91.52699279953198,135.90371658112326],"label":"Polishing shoes"}]},"kyafh7Ownao":{"duration":39.962,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kyafh7Ownao","annotations":[{"segment":[21.68403744149766,31.65370982839314],"label":"Hand car wash"}]},"nreNuE_HuU4":{"duration":238.887,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nreNuE_HuU4","annotations":[]},"BsxCumewgFQ":{"duration":88.352,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BsxCumewgFQ","annotations":[]},"uJuGXnGqozs":{"duration":53.82,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uJuGXnGqozs","annotations":[{"segment":[4.7942269059772435,37.18950260691182],"label":"Using uneven bars"}]},"v0NEZPgjcXI":{"duration":27.075,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v0NEZPgjcXI","annotations":[{"segment":[0.01,26.24745344254153],"label":"Doing a powerbomb"}]},"U0d68z5HTwE":{"duration":127.385,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U0d68z5HTwE","annotations":[{"segment":[100.65191213422037,109.97754627440727],"label":"Baking cookies"}]},"pExl_cwmT8M":{"duration":52.315,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=pExl_cwmT8M","annotations":[{"segment":[16.027873128617895,42.38843721889062],"label":"Rollerblading"}]},"nypZiLC1_Z8":{"duration":153.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nypZiLC1_Z8","annotations":[{"segment":[7.12317396,138.71248509828393],"label":"Fixing bicycle"}]},"3TVKXCyNObQ":{"duration":216.248,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3TVKXCyNObQ","annotations":[{"segment":[0.01,216.201],"label":"Longboarding"}]},"vHnZ3NrZER0":{"duration":90.465,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=vHnZ3NrZER0","annotations":[]},"RoTbDaCV3Ek":{"duration":237.43,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=RoTbDaCV3Ek","annotations":[]},"RatbnDSuY7w":{"duration":52.29,"subset":"training","resolution":"626x360","url":"https://www.youtube.com/watch?v=RatbnDSuY7w","annotations":[{"segment":[22.048914976599065,36.39497425897036],"label":"High jump"}]},"DHjoAOus3xM":{"duration":65.225,"subset":"testing","resolution":"328x240","url":"https://www.youtube.com/watch?v=DHjoAOus3xM","annotations":[]},"SO5KnbKienU":{"duration":202.27,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=SO5KnbKienU","annotations":[{"segment":[41.65290373247583,192.80245591320252],"label":"Baton twirling"}]},"oXh-7Uff1jQ":{"duration":18.413,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=oXh-7Uff1jQ","annotations":[]},"p1yiPvIkRec":{"duration":6.758,"subset":"training","resolution":"540x360","url":"https://www.youtube.com/watch?v=p1yiPvIkRec","annotations":[{"segment":[0,4.770756630265211],"label":"Playing rubik cube"}]},"5tFfEA1JWjA":{"duration":56.96,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=5tFfEA1JWjA","annotations":[]},"gqJE_SZyoIc":{"duration":105.907,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=gqJE_SZyoIc","annotations":[{"segment":[0.01,105.881],"label":"Elliptical trainer"}]},"FmfAB-ppHBs":{"duration":25.87,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=FmfAB-ppHBs","annotations":[]},"gVixuVE0-ek":{"duration":135.68,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gVixuVE0-ek","annotations":[{"segment":[9.099162246489858,131.40883151326054],"label":"Doing kickboxing"}]},"z6huZTFj0FU":{"duration":157.827,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z6huZTFj0FU","annotations":[]},"fVg3StD8LL0":{"duration":60.256,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fVg3StD8LL0","annotations":[{"segment":[21.620639048361937,48.78744202652106],"label":"Laying tile"}]},"h91i2x9Gouk":{"duration":142.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h91i2x9Gouk","annotations":[{"segment":[36.33098907956318,45.63526677067083],"label":"Mooping floor"},{"segment":[70.67815756630264,79.54062246489859],"label":"Mooping floor"},{"segment":[86.85215600624024,90.17558034321372],"label":"Mooping floor"}]},"HHDMFrN7vFs":{"duration":4.44,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HHDMFrN7vFs","annotations":[{"segment":[0.01,4.267195202808113],"label":"Spinning"}]},"nvsK9ggeics":{"duration":73.259,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nvsK9ggeics","annotations":[{"segment":[7.767594383775352,73.201],"label":"Playing drums"}]},"TwQQPCjT_A4":{"duration":53.499,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=TwQQPCjT_A4","annotations":[{"segment":[3.3642125881230722,51.32922193391746],"label":"Beach soccer"}]},"zX9DZ_x9rJ4":{"duration":173.988,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zX9DZ_x9rJ4","annotations":[{"segment":[9.771507529688943,157.70127322776912],"label":"Making a cake"}]},"06Eq9tgprBw":{"duration":172.59,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=06Eq9tgprBw","annotations":[{"segment":[0.01,172.593672],"label":"Zumba"}]},"QIC1jlFIjtg":{"duration":140.39,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QIC1jlFIjtg","annotations":[]},"isUCIXYjOXE":{"duration":45.047,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=isUCIXYjOXE","annotations":[{"segment":[17.4804992199688,25.765003120124806],"label":"Playing pool"}]},"IjBMVPd2Rcs":{"duration":84.242,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IjBMVPd2Rcs","annotations":[{"segment":[41.531432779047336,42.97258592001428],"label":"Using the rowing machine"},{"segment":[66.9481456431869,73.62985900866126],"label":"Using the rowing machine"}]},"50oy8wP2Wo8":{"duration":95.9,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=50oy8wP2Wo8","annotations":[{"segment":[9.26566302652106,18.232433697347894],"label":"Getting a haircut"},{"segment":[20.922464898595944,34.07372854914197],"label":"Getting a haircut"},{"segment":[82.7931825273011,87.42601404056163],"label":"Getting a haircut"}]},"39FIjDeot-s":{"duration":105.84,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=39FIjDeot-s","annotations":[{"segment":[13.367021840873635,49.681904836193446],"label":"Doing karate"},{"segment":[70.47898127925117,105.601],"label":"Doing karate"}]},"VpCf0THwEJA":{"duration":138.113,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VpCf0THwEJA","annotations":[]},"EoQgUlLb3U4":{"duration":52.11,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=EoQgUlLb3U4","annotations":[{"segment":[0.24365522620904836,26.39598283931357],"label":"Discus throw"}]},"dq1mR2tK7Vk":{"duration":20.78,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dq1mR2tK7Vk","annotations":[]},"zEvCa6PJDrA":{"duration":142.733,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zEvCa6PJDrA","annotations":[]},"bCD6_kGsF9A":{"duration":134.258,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bCD6_kGsF9A","annotations":[{"segment":[13.989505988313635,127.36716989368917],"label":"Rollerblading"}]},"1FlnQzPzS2Q":{"duration":81.015,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1FlnQzPzS2Q","annotations":[{"segment":[0.20000000000000004,80.81442176870748],"label":"Running a marathon"}]},"OtD2B81S2og":{"duration":15.54,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=OtD2B81S2og","annotations":[]},"Yjkz62cLVdQ":{"duration":142.943,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Yjkz62cLVdQ","annotations":[{"segment":[0.01,132.20014508580343],"label":"Welding"}]},"Ap7GCrt9C4w":{"duration":143.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ap7GCrt9C4w","annotations":[{"segment":[12.814008030421217,135.82848512246488],"label":"Clean and jerk"}]},"3nvVkcxxaJ4":{"duration":10.96,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3nvVkcxxaJ4","annotations":[{"segment":[0,10.39185647425897],"label":"Playing piano"}]},"feKK7p9XR-o":{"duration":147.94,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=feKK7p9XR-o","annotations":[]},"4uwtqRBE4Kk":{"duration":223.028,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=4uwtqRBE4Kk","annotations":[{"segment":[25.74732252963588,58.45338087809227],"label":"Surfing"},{"segment":[65.7600534878538,77.24196758890764],"label":"Surfing"},{"segment":[86.2883241533743,90.4635656446666],"label":"Surfing"},{"segment":[116.55882496524352,155.8756823415794],"label":"Surfing"},{"segment":[164.57410211510503,172.5766483067486],"label":"Surfing"},{"segment":[175.01220584333578,180.23125770745116],"label":"Surfing"},{"segment":[185.4503087457098,213.98112547581903],"label":"Surfing"}]},"EzX0FZI6pCg":{"duration":4.44,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EzX0FZI6pCg","annotations":[{"segment":[0.1190600624024961,3.1154987999999997],"label":"Discus throw"}]},"CSruNOwxCRY":{"duration":99.057,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=CSruNOwxCRY","annotations":[{"segment":[12.953401869158878,88.36070560747665],"label":"Playing beach volleyball"}]},"QqR_8CdDFKI":{"duration":31.649,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QqR_8CdDFKI","annotations":[]},"NzMZlzy5Q9A":{"duration":100.102,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=NzMZlzy5Q9A","annotations":[{"segment":[12.629590883082203,19.646030262572317],"label":"Sumo"},{"segment":[52.2334931584264,62.212428037383184],"label":"Sumo"}]},"fZBfB3PWVds":{"duration":234.94,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fZBfB3PWVds","annotations":[]},"svZ7DPi7rLI":{"duration":53.92,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=svZ7DPi7rLI","annotations":[{"segment":[10.548665399999999,45.28620504],"label":"Javelin throw"}]},"rBJBnf4F9sA":{"duration":209.49,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=rBJBnf4F9sA","annotations":[{"segment":[81.05089991898996,188.90088605616225],"label":"Polishing forniture"}]},"rwQDoaheIFs":{"duration":33.46,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rwQDoaheIFs","annotations":[]},"M2ntILX6VP0":{"duration":213.276,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=M2ntILX6VP0","annotations":[{"segment":[20.296087816301768,56.2301449336885],"label":"Calf roping"},{"segment":[60.555540697818394,98.1532115706397],"label":"Calf roping"}]},"-ap649M020k":{"duration":212.927,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-ap649M020k","annotations":[{"segment":[9.965381472401754,10.961919619641929],"label":"Longboarding"},{"segment":[15.280251591016023,32.88575885892579],"label":"Longboarding"},{"segment":[37.86844959512666,44.84421662580789],"label":"Longboarding"},{"segment":[54.80959809820965,60.456647599237314],"label":"Longboarding"},{"segment":[72.7472847485328,74.07600227818637],"label":"Longboarding"},{"segment":[83.04484560334795,84.37356313300153],"label":"Longboarding"},{"segment":[89.35625386920239,91.34933016368275],"label":"Longboarding"},{"segment":[96.66420028229702,104.9686848426318],"label":"Longboarding"},{"segment":[112.9409900205532,119.58457766882104],"label":"Longboarding"},{"segment":[123.90290964019513,129.8821385236362],"label":"Longboarding"},{"segment":[139.18316123121116,142.50495505534508],"label":"Longboarding"},{"segment":[146.82328702671919,148.81636332119953],"label":"Longboarding"},{"segment":[162.4357180001486,165.09315305945572],"label":"Longboarding"},{"segment":[167.08622925429017,174.7263550452418],"label":"Longboarding"},{"segment":[180.04122516068642,190.33878600936038],"label":"Longboarding"}]},"H0JTf557Fwc":{"duration":199.739,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H0JTf557Fwc","annotations":[]},"rnhtmtW_a8o":{"duration":198.93,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rnhtmtW_a8o","annotations":[{"segment":[9.930750702028082,190.8566150546022],"label":"Volleyball"}]},"AyicWbHhUWc":{"duration":54.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AyicWbHhUWc","annotations":[{"segment":[9.33737855,53.54429485179407],"label":"Clean and jerk"}]},"smGijLg8Cho":{"duration":124.274,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=smGijLg8Cho","annotations":[{"segment":[27.135319812792513,113.58069578783152],"label":"Throwing darts"}]},"SHzmpIt0Euw":{"duration":179.211,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=SHzmpIt0Euw","annotations":[{"segment":[18.73194675270004,165.51212653131975],"label":"Installing carpet"}]},"F_uouNToDWo":{"duration":106.418,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=F_uouNToDWo","annotations":[{"segment":[10.127015080603224,100.44006602964119],"label":"Using the rowing machine"}]},"KYjWkaXVnbw":{"duration":169.76,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KYjWkaXVnbw","annotations":[{"segment":[58.737306,99.18178951872075],"label":"Mixing drinks"}]},"f7ndXtwTep0":{"duration":125.09,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=f7ndXtwTep0","annotations":[{"segment":[46.931602975039,78.9482516],"label":"Hand washing clothes"}]},"FTmGHtBdWi0":{"duration":123.182,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FTmGHtBdWi0","annotations":[{"segment":[19.40927798472483,28.441318235042328],"label":"Arm wrestling"},{"segment":[31.900397479844774,42.277634758190324],"label":"Arm wrestling"},{"segment":[43.23849009360374,82.05704564430576],"label":"Arm wrestling"}]},"jpvgI6bNO1g":{"duration":199.413,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jpvgI6bNO1g","annotations":[{"segment":[37.04642771175224,190.72818618289108],"label":"Slacklining"}]},"uzdhP_UT3B4":{"duration":88.72,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uzdhP_UT3B4","annotations":[]},"G43ExfqGB9o":{"duration":143.9,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=G43ExfqGB9o","annotations":[]},"8zznGr9DEsg":{"duration":11.796,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=8zznGr9DEsg","annotations":[]},"LAU_pVRs9RQ":{"duration":25.868,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=LAU_pVRs9RQ","annotations":[{"segment":[5.6395319812792515,13.4543120124805],"label":"Arm wrestling"}]},"EpWZ_-hNKKs":{"duration":32.113,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EpWZ_-hNKKs","annotations":[{"segment":[3.9577885865834634,21.993280879875197],"label":"Surfing"}]},"-gHcnPAhXWY":{"duration":14.91,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=-gHcnPAhXWY","annotations":[]},"PUzd4mfmzqo":{"duration":194.4,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=PUzd4mfmzqo","annotations":[{"segment":[72.17866676571826,194.39716553287982],"label":"Layup drill in basketball"}]},"K7oJNZsI1Cc":{"duration":163.352,"subset":"training","resolution":"640x272","url":"https://www.youtube.com/watch?v=K7oJNZsI1Cc","annotations":[{"segment":[10.445149765990639,23.69265678627145],"label":"Doing fencing"},{"segment":[51.461469578783145,109.5466926677067],"label":"Doing fencing"},{"segment":[158.98955382215289,161.7922542901716],"label":"Doing fencing"}]},"BKRKHkkZq6g":{"duration":38.987,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BKRKHkkZq6g","annotations":[{"segment":[2.155800224018542,32.108213267044974],"label":"Doing crunches"}]},"n7dHbqoEiek":{"duration":224.607,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=n7dHbqoEiek","annotations":[{"segment":[8.760008580343214,104.4193022776911],"label":"Trimming branches or hedges"},{"segment":[128.59692595943838,160.13295684867396],"label":"Trimming branches or hedges"},{"segment":[195.52339151326052,220.05141553822153],"label":"Trimming branches or hedges"}]},"mZYWfmsYQPA":{"duration":104.31,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mZYWfmsYQPA","annotations":[]},"plMBtIbzX6w":{"duration":20.318,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=plMBtIbzX6w","annotations":[{"segment":[6.783052274471931,13.40762114976599],"label":"Playing ten pins"}]},"SJ7jk5XAjng":{"duration":123.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SJ7jk5XAjng","annotations":[]},"xddZ9YjTGgo":{"duration":153.019,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=xddZ9YjTGgo","annotations":[{"segment":[7.150443978214339,139.19530944257247],"label":"Beach soccer"}]},"2TOuajpiof8":{"duration":192.84,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=2TOuajpiof8","annotations":[]},"i9t0zZzzbhE":{"duration":156.18,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i9t0zZzzbhE","annotations":[{"segment":[65.29726553960117,152.76636283619345],"label":"Playing polo"}]},"39dTxOhrW68":{"duration":41.634,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=39dTxOhrW68","annotations":[{"segment":[11.171514979719188,40.9838718151326],"label":"Trimming branches or hedges"}]},"FNHoSA0hLgM":{"duration":75.37,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=FNHoSA0hLgM","annotations":[{"segment":[4.170834633385335,71.49163026521062],"label":"Playing racquetball"}]},"F3FjEM9ls0o":{"duration":56.053,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F3FjEM9ls0o","annotations":[{"segment":[5.154538221528861,56.021],"label":"Playing drums"}]},"8C6iIFY47Kc":{"duration":220.938,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8C6iIFY47Kc","annotations":[{"segment":[42.91226930709881,101.5073358308482],"label":"Arm wrestling"}]},"Sx5MlpX6NIY":{"duration":142.664,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Sx5MlpX6NIY","annotations":[{"segment":[44.27703432137286,117.27270982839312],"label":"Drum corps"}]},"qnozppP4rm0":{"duration":163.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qnozppP4rm0","annotations":[{"segment":[22.775750757921475,91.23024186273574],"label":"Hopscotch"}]},"ox6cIfguQ00":{"duration":103.82,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ox6cIfguQ00","annotations":[{"segment":[1.6195970015671375,102.19656902652106],"label":"Horseback riding"}]},"GH0Umpw6tME":{"duration":133.31,"subset":"testing","resolution":"606x480","url":"https://www.youtube.com/watch?v=GH0Umpw6tME","annotations":[]},"W6JJ1L_EEBY":{"duration":99.568,"subset":"validation","resolution":"636x480","url":"https://www.youtube.com/watch?v=W6JJ1L_EEBY","annotations":[{"segment":[16.143687987519503,52.32227301092043],"label":"BMX"},{"segment":[64.43247269890796,99.321],"label":"BMX"}]},"wdvQC41PGPk":{"duration":120.95,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wdvQC41PGPk","annotations":[]},"CevHfmrd_x4":{"duration":201.78,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=CevHfmrd_x4","annotations":[]},"DlE6Rtuo__o":{"duration":82.71000000000001,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=DlE6Rtuo__o","annotations":[{"segment":[0,82.32238116380655],"label":"Getting a tattoo"}]},"diBZlwUO8rc":{"duration":119.978,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=diBZlwUO8rc","annotations":[{"segment":[33.12951404586796,38.93185831378833],"label":"Sharpening knives"},{"segment":[58.3977874706825,87.78385238065522],"label":"Sharpening knives"},{"segment":[94.14771374726989,96.01943767862714],"label":"Sharpening knives"},{"segment":[98.45267878939157,99.57571314820592],"label":"Sharpening knives"}]},"Y-lL9JiLhz0":{"duration":35.086,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y-lL9JiLhz0","annotations":[{"segment":[11.086266355140186,31.12892523364486],"label":"Waterskiing"}]},"6ITE3hona-4":{"duration":208.42000000000002,"subset":"training","resolution":"854x468","url":"https://www.youtube.com/watch?v=6ITE3hona-4","annotations":[{"segment":[36.7338143525741,181.0684477379095],"label":"Vacuuming floor"}]},"yH0xeA_OvWg":{"duration":70.055,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=yH0xeA_OvWg","annotations":[{"segment":[7.868847650546022,70.054602],"label":"Cleaning sink"}]},"rRoRvt4p2LU":{"duration":168.322,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rRoRvt4p2LU","annotations":[{"segment":[0,168.301],"label":"Rafting"}]},"q6bl7g5JGng":{"duration":81.32,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=q6bl7g5JGng","annotations":[{"segment":[14.842441123244932,74.46592255850234],"label":"Playing squash"}]},"AJtJcV2_l1Q":{"duration":182.904,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=AJtJcV2_l1Q","annotations":[{"segment":[0.01,182.9035827664399],"label":"Roof shingle removal"}]},"kTqt7ry3eVM":{"duration":97.39,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=kTqt7ry3eVM","annotations":[]},"Jp7KeCimrMI":{"duration":89.0,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Jp7KeCimrMI","annotations":[{"segment":[4.889917615268743,9.326131447044444],"label":"Ironing clothes"},{"segment":[13.346450232091174,37.88425798910052],"label":"Ironing clothes"},{"segment":[50.22247770872669,52.99511135358651],"label":"Ironing clothes"}]},"-5KAycAQlC4":{"duration":139.436,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-5KAycAQlC4","annotations":[{"segment":[5.438182527301094,126.60089831513261],"label":"Wakeboarding"}]},"sZf6ijFwPDs":{"duration":82.76,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=sZf6ijFwPDs","annotations":[{"segment":[18.338507999999997,72.29846102964119],"label":"Mixing drinks"}]},"7FY8MUnu7L0":{"duration":136.162,"subset":"testing","resolution":"492x360","url":"https://www.youtube.com/watch?v=7FY8MUnu7L0","annotations":[]},"Ggtcmy29TxE":{"duration":153.136,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=Ggtcmy29TxE","annotations":[{"segment":[9.556043738348173,143.340655975039],"label":"Swimming"}]},"yxaYVPa4zXY":{"duration":125.46,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=yxaYVPa4zXY","annotations":[]},"bNh4SQ2nR80":{"duration":166.255,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bNh4SQ2nR80","annotations":[{"segment":[36.80502652106084,151.98965710465154],"label":"Blow-drying hair"}]},"OMRu1rPRBHo":{"duration":238.4,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OMRu1rPRBHo","annotations":[{"segment":[91.79942251747346,133.00791317167906],"label":"Snow tubing"}]},"kistIwzeQOU":{"duration":95.365,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=kistIwzeQOU","annotations":[]},"cIRMaWUTHuk":{"duration":143.87,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cIRMaWUTHuk","annotations":[{"segment":[21.32251124058568,131.7506747181452],"label":"Preparing pasta"}]},"WV_MFi9EZeQ":{"duration":56.471,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=WV_MFi9EZeQ","annotations":[{"segment":[12.261761145744288,37.24268027970067],"label":"Removing ice from car"}]},"qYN_YrwUCBs":{"duration":164.514,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=qYN_YrwUCBs","annotations":[{"segment":[15.137595943837754,26.939789391575665],"label":"Playing beach volleyball"},{"segment":[33.09745553822153,52.85330109204369],"label":"Playing beach volleyball"},{"segment":[55.4189953198128,58.49782839313573],"label":"Playing beach volleyball"},{"segment":[77.99710452418097,105.9760577223089],"label":"Playing beach volleyball"},{"segment":[114.95707956318253,129.58331513260532],"label":"Playing beach volleyball"},{"segment":[146.26235569422778,155.49997815912636],"label":"Playing beach volleyball"}]},"lHnSteuHdZ4":{"duration":226.581,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lHnSteuHdZ4","annotations":[{"segment":[40.64308112324493,185.20743213728548],"label":"Ice fishing"}]},"xKPF_SpZ1gQ":{"duration":14.397,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xKPF_SpZ1gQ","annotations":[{"segment":[0.01,14.396098],"label":"Calf roping"}]},"QX-uieRfKgg":{"duration":195.652,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QX-uieRfKgg","annotations":[{"segment":[0.01,183.41376131045243],"label":"Waxing skis"}]},"hE-PN3XfQmg":{"duration":69.15,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=hE-PN3XfQmg","annotations":[]},"ilE77hVgjjo":{"duration":109.924,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ilE77hVgjjo","annotations":[{"segment":[0.01,109.881],"label":"Fun sliding down"}]},"jCB1EC3RzWI":{"duration":63.76,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jCB1EC3RzWI","annotations":[{"segment":[7.66981069,16.037166720000002],"label":"Playing water polo"},{"segment":[15.11985426521061,17.20878149921997],"label":"Playing water polo"},{"segment":[24.86818135725429,49.13952636193448],"label":"Playing water polo"}]},"HuUIIKA3o_A":{"duration":193.585,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=HuUIIKA3o_A","annotations":[{"segment":[25.963815912636502,62.49430109204368],"label":"Removing ice from car"}]},"4P0zJEfqCoM":{"duration":112.00999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4P0zJEfqCoM","annotations":[{"segment":[0.34949473080964055,57.14238786271451],"label":"Getting a piercing"}]},"K_IqYFJKIgk":{"duration":233.2,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=K_IqYFJKIgk","annotations":[{"segment":[0.01,28.37666825856708],"label":"Smoking a cigarette"}]},"zT6ITDwPi0w":{"duration":41.43,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=zT6ITDwPi0w","annotations":[{"segment":[11.244688380655226,40.45502831201247],"label":"Washing hands"}]},"ujl7QIOkrZw":{"duration":121.851,"subset":"testing","resolution":"854x474","url":"https://www.youtube.com/watch?v=ujl7QIOkrZw","annotations":[]},"CYPfbnL0bCI":{"duration":182.35,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CYPfbnL0bCI","annotations":[{"segment":[121.16809048361935,136.2429937597504],"label":"Starting a campfire"}]},"zHXpo62bCaQ":{"duration":130.828,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zHXpo62bCaQ","annotations":[{"segment":[43.05356457067948,108.06000070775112],"label":"Hula hoop"}]},"Bdpqtvdx2T8":{"duration":107.393,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Bdpqtvdx2T8","annotations":[{"segment":[0,23.28787546177847],"label":"Painting fence"},{"segment":[29.319267667706708,105.04674758658348],"label":"Painting fence"}]},"NjXCZYO3zwU":{"duration":43.236,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NjXCZYO3zwU","annotations":[]},"gYARGADLcmQ":{"duration":112.687,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=gYARGADLcmQ","annotations":[{"segment":[16.173404517940718,52.036171057722306],"label":"Building sandcastles"},{"segment":[66.97899044929797,112.48643799999999],"label":"Building sandcastles"}]},"p-lwebxSYyo":{"duration":57.307,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=p-lwebxSYyo","annotations":[{"segment":[5.72174458134788,56.23401991107644],"label":"Painting fence"}]},"dFlmzpAb6AQ":{"duration":202.85,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=dFlmzpAb6AQ","annotations":[{"segment":[6.721431991449706,187.13729934970516],"label":"Sailing"}]},"-L-LiCO1v-s":{"duration":162.49,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=-L-LiCO1v-s","annotations":[{"segment":[75.28938075074022,127.25679844064511],"label":"Bathing dog"}]},"A20uQSBtYMs":{"duration":136.84,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=A20uQSBtYMs","annotations":[]},"pKxZtNYsSNQ":{"duration":38.662,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=pKxZtNYsSNQ","annotations":[]},"p1gH8y8X0kA":{"duration":97.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=p1gH8y8X0kA","annotations":[{"segment":[0.3018932146049013,84.83263822238095],"label":"Playing bagpipes"}]},"pp_J6AzUu4s":{"duration":216.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pp_J6AzUu4s","annotations":[{"segment":[113.20959520748832,203.77727137347895],"label":"Preparing pasta"}]},"0xBP-TSsqb0":{"duration":94.16,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=0xBP-TSsqb0","annotations":[]},"q0P0EvJOfRQ":{"duration":68.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=q0P0EvJOfRQ","annotations":[{"segment":[0.2,65.45234115444617],"label":"Playing accordion"}]},"17nr8u_ze0o":{"duration":57.284,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=17nr8u_ze0o","annotations":[{"segment":[44.89904593048809,51.49922350058155],"label":"Blowing leaves"}]},"C9yUg3e1dEg":{"duration":174.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C9yUg3e1dEg","annotations":[{"segment":[49.15457098283932,67.12798751950078],"label":"Snatch"},{"segment":[72.57447737909516,84.55675507020281],"label":"Snatch"},{"segment":[90.2755694227769,112.06152886115444],"label":"Snatch"},{"segment":[118.324992199688,158.62901716068643],"label":"Snatch"}]},"nOvrsb0XyGk":{"duration":135.419,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nOvrsb0XyGk","annotations":[{"segment":[0.2,13.943274857524914],"label":"Throwing darts"},{"segment":[20.069865325225255,28.097811455315355],"label":"Throwing darts"},{"segment":[34.435663663281225,40.77351587124709],"label":"Throwing darts"},{"segment":[46.05505937788532,53.66048202744437],"label":"Throwing darts"},{"segment":[58.519502053551534,70.98394472921774],"label":"Throwing darts"},{"segment":[72.88530039160752,81.54703174249418],"label":"Throwing darts"},{"segment":[86.19479002833583,92.95516571683275],"label":"Throwing darts"},{"segment":[97.18040052214334,105.84213069266771],"label":"Throwing darts"},{"segment":[109.4335802371295,121.89802277379097],"label":"Throwing darts"},{"segment":[126.33451926989082,135.21877400000002],"label":"Throwing darts"}]},"CcKeoeSZVnE":{"duration":222.87,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=CcKeoeSZVnE","annotations":[{"segment":[0.01,218.3452391918806],"label":"Smoking hookah"}]},"tEnhhhWqnWQ":{"duration":188.338,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tEnhhhWqnWQ","annotations":[{"segment":[20.65982917035164,181.72819792784574],"label":"Hula hoop"}]},"tCN1GlR4cf4":{"duration":231.85,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=tCN1GlR4cf4","annotations":[]},"SYAatoDZalo":{"duration":209.375,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=SYAatoDZalo","annotations":[]},"CuZpm0Il6YM":{"duration":18.18,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=CuZpm0Il6YM","annotations":[{"segment":[5.670825385988037,15.855741404650555],"label":"Walking the dog"}]},"-OLPVREPy6Y":{"duration":171.085,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-OLPVREPy6Y","annotations":[{"segment":[9.875400840873635,170.884647],"label":"Painting furniture"}]},"CTIVIXvCI3c":{"duration":187.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CTIVIXvCI3c","annotations":[{"segment":[70.41699594242273,173.55890285803432],"label":"Ballet"}]},"4j_LRkB4T6c":{"duration":223.45,"subset":"testing","resolution":"568x320","url":"https://www.youtube.com/watch?v=4j_LRkB4T6c","annotations":[]},"oAJlaJ8xcwY":{"duration":23.569,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=oAJlaJ8xcwY","annotations":[{"segment":[12.243726320481391,19.303172126885077],"label":"Applying sunscreen"}]},"PHgD2MRUxtk":{"duration":53.17,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PHgD2MRUxtk","annotations":[]},"gee7iCW34_E":{"duration":116.25999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gee7iCW34_E","annotations":[{"segment":[91.79698734,97.25200038],"label":"Starting a campfire"}]},"nXMZ43OChxE":{"duration":20.225,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nXMZ43OChxE","annotations":[]},"fHEUdQY1cdc":{"duration":176.147,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fHEUdQY1cdc","annotations":[{"segment":[0.2,175.881],"label":"Bullfighting"}]},"R_UR3iHl1TQ":{"duration":140.23,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R_UR3iHl1TQ","annotations":[]},"nlXavqvgf6A":{"duration":150.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nlXavqvgf6A","annotations":[{"segment":[121.02993805928237,142.13981097659908],"label":"BMX"}]},"4yZ1agUX004":{"duration":114.01,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=4yZ1agUX004","annotations":[{"segment":[49.53475613854486,62.34086903612198],"label":"Playing beach volleyball"},{"segment":[66.43170988455539,80.12713601872075],"label":"Playing beach volleyball"}]},"fwU91k-Vmb8":{"duration":117.922,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fwU91k-Vmb8","annotations":[{"segment":[0.2,117.721],"label":"Riding bumper cars"}]},"vZuy4OyWBsQ":{"duration":164.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vZuy4OyWBsQ","annotations":[{"segment":[105.16791107644305,124.61501872074882],"label":"Washing dishes"}]},"68kccxCgbys":{"duration":216.74,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=68kccxCgbys","annotations":[{"segment":[111.4103765351014,165.50949715912637],"label":"Playing saxophone"}]},"ujS0VNOXeVg":{"duration":166.023,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ujS0VNOXeVg","annotations":[{"segment":[30.002852898870795,139.73790233800779],"label":"Braiding hair"}]},"V6BpJThrTok":{"duration":113.71000000000001,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V6BpJThrTok","annotations":[]},"AEQ9zIG9LqA":{"duration":80.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AEQ9zIG9LqA","annotations":[{"segment":[18.762520296730237,77.67683393026522],"label":"Tumbling"}]},"c9bvuUO9Q0Q":{"duration":43.166,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=c9bvuUO9Q0Q","annotations":[{"segment":[13.380809672386894,14.99457566302652],"label":"Hitting a pinata"},{"segment":[23.803048361934476,24.811652106084242],"label":"Hitting a pinata"},{"segment":[39.06658502340093,41.01655226209048],"label":"Hitting a pinata"}]},"5BVcwR8hEo0":{"duration":11.94,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=5BVcwR8hEo0","annotations":[{"segment":[0.4914833455538221,11.888333],"label":"Tennis serve with ball bouncing"}]},"fynO9wmckLA":{"duration":106.25999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fynO9wmckLA","annotations":[{"segment":[52.364798751950076,59.98752262090484],"label":"Triple jump"}]},"tT-HAk-DEDk":{"duration":174.8,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tT-HAk-DEDk","annotations":[]},"mWsH6LOjzcM":{"duration":73.61,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mWsH6LOjzcM","annotations":[]},"Cj3W6CW3Zbc":{"duration":61.649,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Cj3W6CW3Zbc","annotations":[{"segment":[0.2,59.602215288611546],"label":"Roof shingle removal"}]},"aGKySEwCMnI":{"duration":200.017,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aGKySEwCMnI","annotations":[{"segment":[0.2,50.08193457560578],"label":"Building sandcastles"},{"segment":[60.96931268911141,68.74601134161544],"label":"Building sandcastles"},{"segment":[128.15998904674623,132.51494029214848],"label":"Building sandcastles"},{"segment":[176.37551950853427,185.08542194088264],"label":"Building sandcastles"}]},"xTEXsZO-PVI":{"duration":62.578,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=xTEXsZO-PVI","annotations":[{"segment":[0,62.5778],"label":"Waterskiing"}]},"UMOU6hHyPuQ":{"duration":81.5,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UMOU6hHyPuQ","annotations":[]},"6fgk5PKVS2o":{"duration":224.955,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6fgk5PKVS2o","annotations":[{"segment":[71.57535725429017,210.53447737909516],"label":"Snow tubing"}]},"EGLJPCJnG64":{"duration":89.68,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=EGLJPCJnG64","annotations":[{"segment":[15.729114664586582,76.56544071762872],"label":"Playing racquetball"}]},"9uHffl5yny4":{"duration":45.67,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=9uHffl5yny4","annotations":[{"segment":[4.667323870000001,35.03530157],"label":"Cleaning shoes"}]},"t3eRbi1Uk5E":{"duration":186.201,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t3eRbi1Uk5E","annotations":[{"segment":[0.01,173.41948073634944],"label":"Elliptical trainer"}]},"ZNBXTNwkEhU":{"duration":150.98,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=ZNBXTNwkEhU","annotations":[]},"ZuuY0xffLYE":{"duration":194.932,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZuuY0xffLYE","annotations":[{"segment":[66.59906820762627,106.4368663026521],"label":"Blow-drying hair"}]},"0e5zwJywyqk":{"duration":130.61,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0e5zwJywyqk","annotations":[]},"DFAodsf1dWk":{"duration":231.712,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=DFAodsf1dWk","annotations":[{"segment":[57.157480924434005,59.68337967832809],"label":"Beer pong"},{"segment":[69.06528933564896,74.11708684343712],"label":"Beer pong"},{"segment":[181.2873625443717,220.27738746653762],"label":"Beer pong"}]},"DZ_FojmVFss":{"duration":58.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=DZ_FojmVFss","annotations":[]},"xWY7A8Sf-HE":{"duration":60.07,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=xWY7A8Sf-HE","annotations":[{"segment":[0.6061063,42.73311946333853],"label":"Playing kickball"}]},"1926p23ooUM":{"duration":61.26,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1926p23ooUM","annotations":[{"segment":[0.01,51.55479871303695],"label":"Layup drill in basketball"}]},"cbB57ODSydw":{"duration":5.74,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=cbB57ODSydw","annotations":[]},"2HmhRdKRVb4":{"duration":215.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2HmhRdKRVb4","annotations":[{"segment":[48.128088923556945,181.7424336973479],"label":"Putting on makeup"}]},"t6f_O8a4sSg":{"duration":218.52,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t6f_O8a4sSg","annotations":[{"segment":[14.999980897195073,30.681779107899008],"label":"Skateboarding"},{"segment":[34.431774332197776,34.77268298895221],"label":"Skateboarding"},{"segment":[36.47722627272438,37.159043586233246],"label":"Skateboarding"},{"segment":[38.86358687000541,40.227221497023145],"label":"Skateboarding"},{"segment":[41.59085612404088,43.63630806456748],"label":"Skateboarding"},{"segment":[45.681760005094084,46.36357731860295],"label":"Skateboarding"},{"segment":[48.06812060237512,49.43175522939285],"label":"Skateboarding"},{"segment":[51.136298513165016,53.18175045369162],"label":"Skateboarding"},{"segment":[54.88629373746378,55.90901970772708],"label":"Skateboarding"},{"segment":[57.95447164825369,59.659014932025855],"label":"Skateboarding"},{"segment":[61.363558215798015,63.40901015632462],"label":"Skateboarding"},{"segment":[66.47718806711453,92.7271546372059],"label":"Skateboarding"},{"segment":[96.8180585182591,102.27259702633003],"label":"Skateboarding"},{"segment":[105.68168359387437,110.45440478843643],"label":"Skateboarding"},{"segment":[118.2953038937884,127.49983762615813],"label":"Skateboarding"},{"segment":[131.5907415072113,143.86345315037093],"label":"Skateboarding"},{"segment":[148.63617434493298,157.84070807730268],"label":"Skateboarding"},{"segment":[162.61342927186476,177.95431882581425],"label":"Skateboarding"}]},"FbvTQ1-FCag":{"duration":234.69,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=FbvTQ1-FCag","annotations":[{"segment":[16.42745709828393,212.46177847113884],"label":"Archery"}]},"ueKSGag6w4Q":{"duration":102.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ueKSGag6w4Q","annotations":[{"segment":[41.370558767550705,63.05223889235569],"label":"Playing saxophone"}]},"EPql2nTJb9I":{"duration":73.31,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=EPql2nTJb9I","annotations":[{"segment":[32.06453196,59.34086068],"label":"Preparing salad"},{"segment":[23.76051,28.033718],"label":"Preparing salad"}]},"4OIkfJ_IkpA":{"duration":88.7,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=4OIkfJ_IkpA","annotations":[{"segment":[0.2,19.649659828393137],"label":"Cutting the grass"},{"segment":[26.845309906396256,34.04095998439938],"label":"Cutting the grass"}]},"MldT9eNxLrQ":{"duration":60.047,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MldT9eNxLrQ","annotations":[]},"JhqQGe0TkBU":{"duration":133.93,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=JhqQGe0TkBU","annotations":[{"segment":[14.834941258970359,17.969083778471138],"label":"Plataform diving"}]},"zoSNO_k9RoM":{"duration":45.001,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=zoSNO_k9RoM","annotations":[{"segment":[0.01,35.98619813094231],"label":"Putting in contact lenses"}]},"dXTfrVz9A9o":{"duration":110.75999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dXTfrVz9A9o","annotations":[{"segment":[9.184136279999999,87.02002353],"label":"Shoveling snow"}]},"gwKy0W1xof4":{"duration":75.88,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=gwKy0W1xof4","annotations":[{"segment":[0.01,65.32803744149766],"label":"Playing piano"}]},"a6Wwa7qvlrA":{"duration":153.67,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=a6Wwa7qvlrA","annotations":[{"segment":[17.50057505304212,143.3608750920437],"label":"Snowboarding"}]},"fh68-PXZ9Oo":{"duration":93.345,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fh68-PXZ9Oo","annotations":[{"segment":[0.01,79.76434945397816],"label":"Disc dog"}]},"Jk3oA2mhPD8":{"duration":170.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Jk3oA2mhPD8","annotations":[{"segment":[50.36447382,105.2811218299072],"label":"Tumbling"}]},"e6Dhfn1d3ic":{"duration":99.34,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=e6Dhfn1d3ic","annotations":[{"segment":[17.35650318876755,92.05145441185647],"label":"Shaving legs"}]},"ZpqkapUghHo":{"duration":169.65,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZpqkapUghHo","annotations":[]},"nVk5nIE-6bM":{"duration":197.626,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nVk5nIE-6bM","annotations":[{"segment":[62.488912624239724,193.93110714953272],"label":"Drum corps"}]},"5v9p5jBN_Hg":{"duration":112.72999999999999,"subset":"training","resolution":"640x354","url":"https://www.youtube.com/watch?v=5v9p5jBN_Hg","annotations":[{"segment":[26.02877179576979,27.96334267248241],"label":"Long jump"},{"segment":[33.06357498381568,37.108586816942065],"label":"Long jump"},{"segment":[38.16380729514894,42.736429367378776],"label":"Long jump"},{"segment":[47.6607915990109,51.35406327273499],"label":"Long jump"},{"segment":[54.87146486675793,66.12714996763135],"label":"Long jump"},{"segment":[68.9410712428497,79.84501618432084],"label":"Long jump"},{"segment":[82.13132715600624,104.46682726053041],"label":"Long jump"}]},"3gsF785TAmg":{"duration":108.18,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=3gsF785TAmg","annotations":[{"segment":[5.482663806552262,20.834122464898595],"label":"Volleyball"}]},"6WlnhNQOWFA":{"duration":127.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6WlnhNQOWFA","annotations":[{"segment":[3.767424336973479,101.72045709828393],"label":"Hopscotch"},{"segment":[104.89302496099843,126.901],"label":"Hopscotch"}]},"aK0238Rz6UA":{"duration":215.67000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aK0238Rz6UA","annotations":[{"segment":[74.01225787999999,143.96136832],"label":"Archery"}]},"-zHX3Gdx6I4":{"duration":50.34,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=-zHX3Gdx6I4","annotations":[{"segment":[1.6084451271450857,45.38953688065522],"label":"High jump"}]},"q8-iXvYyCGg":{"duration":15.77,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=q8-iXvYyCGg","annotations":[{"segment":[2.270795631825273,15.257291731669268],"label":"Hopscotch"}]},"gvHcfC9snJA":{"duration":6.73,"subset":"training","resolution":"360x360","url":"https://www.youtube.com/watch?v=gvHcfC9snJA","annotations":[{"segment":[3.2881040639625585,4.737811287051483],"label":"Removing curlers"}]},"fY2IeYSxY4U":{"duration":185.95,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fY2IeYSxY4U","annotations":[{"segment":[19.870919746489857,166.60114165],"label":"Rock climbing"}]},"u20epr7tSEU":{"duration":146.008,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u20epr7tSEU","annotations":[]},"IWDWVvf_R8w":{"duration":84.27,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IWDWVvf_R8w","annotations":[]},"1NbXNvyT-rY":{"duration":141.433,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1NbXNvyT-rY","annotations":[]},"JKCwcCMAnxg":{"duration":207.98,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=JKCwcCMAnxg","annotations":[{"segment":[92.46419297999999,154.89384975],"label":"Tango"}]},"INmaUkmVK24":{"duration":22.01,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=INmaUkmVK24","annotations":[{"segment":[2.040358034321373,11.53916770670827],"label":"Shaving legs"}]},"qQT-Fx20yiE":{"duration":63.76,"subset":"testing","resolution":"1920x1072","url":"https://www.youtube.com/watch?v=qQT-Fx20yiE","annotations":[]},"BjLF4jvF1is":{"duration":139.529,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=BjLF4jvF1is","annotations":[]},"3nvKiZeVfGY":{"duration":15.117,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3nvKiZeVfGY","annotations":[{"segment":[0.01,11.472835549862586],"label":"Curling"}]},"7WrW0JyhHXU":{"duration":212.05,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7WrW0JyhHXU","annotations":[]},"rBmFxH3BUOw":{"duration":127.223,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rBmFxH3BUOw","annotations":[{"segment":[27.380620904836192,110.51453510140405],"label":"Waxing skis"}]},"WMLB3GIdrp4":{"duration":38.592,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WMLB3GIdrp4","annotations":[]},"RQwgREUcreI":{"duration":217.46,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=RQwgREUcreI","annotations":[{"segment":[24.086184879875194,217.45414965986396],"label":"Removing curlers"}]},"4pg401NKSEo":{"duration":74.931,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4pg401NKSEo","annotations":[]},"GSbq_ankn7w":{"duration":45.558,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GSbq_ankn7w","annotations":[]},"7uk4sqUBhks":{"duration":185.783,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7uk4sqUBhks","annotations":[{"segment":[11.557250832314548,16.75801569079044],"label":"Skiing"},{"segment":[23.11450607337209,30.626721980059493],"label":"Skiing"},{"segment":[38.42786926777334,41.02825169701128],"label":"Skiing"},{"segment":[45.651153793434304,51.14085003293663],"label":"Skiing"},{"segment":[58.653065939624035,82.345439183792],"label":"Skiing"},{"segment":[86.96834128021503,114.99468523977956],"label":"Skiing"},{"segment":[126.26300909981067,145.33248024755562],"label":"Skiing"},{"segment":[154.8672158214281,179.42638320867536],"label":"Skiing"}]},"RPr1ZbIGLwU":{"duration":223.423,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=RPr1ZbIGLwU","annotations":[{"segment":[63.4248705148206,138.34985491419658],"label":"Installing carpet"}]},"E0QSccoRKWg":{"duration":139.09,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E0QSccoRKWg","annotations":[]},"HCsfxpQ8C1M":{"duration":8.963,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HCsfxpQ8C1M","annotations":[{"segment":[0.01,8.097036469848728],"label":"Kneeling"}]},"EHA-wy50jB0":{"duration":88.422,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EHA-wy50jB0","annotations":[]},"LiJ4bdfa8R4":{"duration":125.388,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=LiJ4bdfa8R4","annotations":[]},"q92zSoMudWU":{"duration":37.292,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=q92zSoMudWU","annotations":[{"segment":[4.770487007800312,17.62753126053042],"label":"Beach soccer"},{"segment":[24.492378418096724,35.08053250858034],"label":"Beach soccer"}]},"qOy73DJ8J6E":{"duration":223.446,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qOy73DJ8J6E","annotations":[]},"Q684UQhx7yI":{"duration":29.095,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q684UQhx7yI","annotations":[{"segment":[14.275111259457054,27.77981846573209],"label":"Calf roping"}]},"GsPE54bMFwE":{"duration":134.653,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GsPE54bMFwE","annotations":[]},"9FYVaOGQV6o":{"duration":90.907,"subset":"validation","resolution":"1920x1072","url":"https://www.youtube.com/watch?v=9FYVaOGQV6o","annotations":[{"segment":[0.2,9.360068770097742],"label":"Playing ice hockey"},{"segment":[34.60389060460377,53.891305039956706],"label":"Playing ice hockey"},{"segment":[87.6442789048362,90.906121],"label":"Playing ice hockey"}]},"MjHlAvy2qVY":{"duration":120.141,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=MjHlAvy2qVY","annotations":[{"segment":[7.309612661466459,8.62159442121685],"label":"Applying sunscreen"},{"segment":[12.744965666146646,20.99170815600624],"label":"Applying sunscreen"},{"segment":[23.80309764118565,25.864783263650548],"label":"Applying sunscreen"}]},"oQBrINtoodw":{"duration":207.49,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oQBrINtoodw","annotations":[]},"60Fyun_Szw4":{"duration":16.58,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=60Fyun_Szw4","annotations":[{"segment":[0,14.855288012480502],"label":"Discus throw"}]},"nlYl30Dqfc0":{"duration":158.36,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=nlYl30Dqfc0","annotations":[{"segment":[134.88970204680186,150.45389843681747],"label":"Cumbia"}]},"TxgvL4ZJZbo":{"duration":10.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TxgvL4ZJZbo","annotations":[{"segment":[0.7599186000000001,8.389584000000001],"label":"Windsurfing"}]},"Eaq95CTpfZI":{"duration":66.386,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Eaq95CTpfZI","annotations":[{"segment":[8.160995327102803,10.9534984423676],"label":"Shuffleboard"},{"segment":[43.09064953271028,59.210892523364485],"label":"Shuffleboard"}]},"PqCIr0WeMNM":{"duration":85.264,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=PqCIr0WeMNM","annotations":[]},"q4QPF-qNBTY":{"duration":129.1,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=q4QPF-qNBTY","annotations":[{"segment":[6.2420817,115.2689898595944],"label":"Using uneven bars"}]},"Gp5Vos--_0k":{"duration":38.453,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Gp5Vos--_0k","annotations":[{"segment":[0.01,38.421],"label":"Welding"}]},"19YCgLDhfoE":{"duration":106.209,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=19YCgLDhfoE","annotations":[{"segment":[9.637782754190987,12.780211103100642],"label":"Baton twirling"},{"segment":[15.426466554814038,23.03445097849005],"label":"Baton twirling"},{"segment":[39.2427656202346,47.84309583830313],"label":"Baton twirling"},{"segment":[50.98552418721279,54.12795253612245],"label":"Baton twirling"},{"segment":[66.532274966029,84.06371733363024],"label":"Baton twirling"},{"segment":[88.69466437412868,90.34857403144954],"label":"Baton twirling"}]},"FBtrv1Vg8hU":{"duration":80.71000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FBtrv1Vg8hU","annotations":[{"segment":[31.101408584234523,50.996235889235564],"label":"Horseback riding"}]},"q_DajTs5gIA":{"duration":135.489,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q_DajTs5gIA","annotations":[{"segment":[23.24260530421217,135.441],"label":"Clipping cat claws"}]},"IpCYVZ-sMTc":{"duration":110.133,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IpCYVZ-sMTc","annotations":[{"segment":[0.01,110.081],"label":"Waxing skis"}]},"NttiEaiT1Nc":{"duration":234.313,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NttiEaiT1Nc","annotations":[]},"2SKZB0bfqF8":{"duration":164.82,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2SKZB0bfqF8","annotations":[{"segment":[7.968160686427457,13.108909516380654],"label":"Shoveling snow"},{"segment":[20.562995319812792,26.988931357254287],"label":"Shoveling snow"},{"segment":[30.07338065522621,43.95340249609985],"label":"Shoveling snow"},{"segment":[96.90311544461778,115.15277379095164],"label":"Shoveling snow"}]},"fqFqQjH8M20":{"duration":183.299,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=fqFqQjH8M20","annotations":[{"segment":[0.01,171.66757130142753],"label":"Longboarding"}]},"-vnSFKJNB94":{"duration":106.37,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-vnSFKJNB94","annotations":[]},"HCntSYltlmA":{"duration":215.04,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HCntSYltlmA","annotations":[{"segment":[120.08973166926677,131.83034789391576],"label":"Archery"},{"segment":[170.07121216848674,197.57779875195007],"label":"Archery"}]},"F2x2fynkbAQ":{"duration":102.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F2x2fynkbAQ","annotations":[{"segment":[28.567673946957882,59.86017940717628],"label":"Polishing shoes"},{"segment":[62.25458658346334,95.29740561622464],"label":"Cleaning shoes"}]},"N9iuF02-CwQ":{"duration":228.3,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=N9iuF02-CwQ","annotations":[]},"vkk-sOo8Hqw":{"duration":200.06,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=vkk-sOo8Hqw","annotations":[{"segment":[153.246484224805,191.32402205663027],"label":"Mowing the lawn"}]},"FYv-2ehoGWo":{"duration":128.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FYv-2ehoGWo","annotations":[{"segment":[13.678545639784776,118.68149878315131],"label":"Playing violin"}]},"kNkYgYnJTXk":{"duration":202.525,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=kNkYgYnJTXk","annotations":[{"segment":[5.371158903498998,19.904882995319813],"label":"Canoeing"},{"segment":[44.233073322932924,187.99056003120126],"label":"Canoeing"}]},"iJqLgrShN-w":{"duration":211.047,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iJqLgrShN-w","annotations":[{"segment":[1.6462259438377536,206.10748816848673],"label":"Skiing"}]},"8-WiWyjqAv4":{"duration":31.02,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8-WiWyjqAv4","annotations":[{"segment":[1.6671411856474259,29.25953588143526],"label":"Kayaking"}]},"e9AsyRGUzTc":{"duration":53.151,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=e9AsyRGUzTc","annotations":[{"segment":[34.82558489859594,45.02450619032761],"label":"Applying sunscreen"}]},"Hev5Vj0Jjuk":{"duration":165.51,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Hev5Vj0Jjuk","annotations":[{"segment":[92.43854549828252,133.4936536944471],"label":"Bungee jumping"}]},"4cd0sNdLmT4":{"duration":5.457,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4cd0sNdLmT4","annotations":[{"segment":[0.31291263650546025,5.201],"label":"Roof shingle removal"}]},"UGCn1zgYboQ":{"duration":23.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UGCn1zgYboQ","annotations":[{"segment":[0.2,22.74510140405616],"label":"Table soccer"}]},"FEGf7ejk8Fc":{"duration":58.89,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=FEGf7ejk8Fc","annotations":[{"segment":[5.328202664586584,47.770092854914196],"label":"Putting on makeup"}]},"jVw5KawqUIg":{"duration":215.598,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jVw5KawqUIg","annotations":[]},"9JrRZ9i1sXo":{"duration":201.85,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9JrRZ9i1sXo","annotations":[{"segment":[20.783417067294938,185.1613504399376],"label":"Shoveling snow"}]},"u024Wa07GNs":{"duration":17.79,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=u024Wa07GNs","annotations":[{"segment":[12.606193447737908,12.619700580000002],"label":"Preparing salad"}]},"XztphLXwZjU":{"duration":126.967,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=XztphLXwZjU","annotations":[{"segment":[0.01,68.33621785687754],"label":"Laying tile"},{"segment":[77.05156158355177,120.82635619344774],"label":"Laying tile"}]},"RJ5U2y6ccvA":{"duration":52.222,"subset":"training","resolution":"638x480","url":"https://www.youtube.com/watch?v=RJ5U2y6ccvA","annotations":[{"segment":[4.969613567862714,16.86409850078003],"label":"Waterskiing"},{"segment":[23.544562641185646,42.20098078939158],"label":"Waterskiing"}]},"AK-9sj8btp8":{"duration":210.63,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AK-9sj8btp8","annotations":[{"segment":[25.30167248594706,88.72015027539877],"label":"Cricket"},{"segment":[95.29201325876164,119.60790629720428],"label":"Cricket"},{"segment":[122.23665149054942,126.17976928056716],"label":"Cricket"},{"segment":[135.70897060644333,146.55254452899206],"label":"Cricket"},{"segment":[150.8242554681779,172.83999646244354],"label":"Cricket"}]},"sbT8DfccM7I":{"duration":182.28,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=sbT8DfccM7I","annotations":[{"segment":[53.175869619818805,179.71737753863894],"label":"Fixing bicycle"}]},"tCfu0LplM64":{"duration":197.86,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=tCfu0LplM64","annotations":[{"segment":[107.41678542243731,125.9369208400989],"label":"Polishing shoes"}]},"kpu2zZSmphc":{"duration":57.66,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kpu2zZSmphc","annotations":[]},"RWU7pmO_3Jg":{"duration":234.17000000000002,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RWU7pmO_3Jg","annotations":[]},"B-lmXhRr6bE":{"duration":188.8,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=B-lmXhRr6bE","annotations":[{"segment":[5.007214775665857,180.84881482059282],"label":"Painting"}]},"q5tYHwZLRYU":{"duration":24.08,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q5tYHwZLRYU","annotations":[{"segment":[0.57596968,7.51890854],"label":"Javelin throw"}]},"96ADEnJy4GU":{"duration":63.62,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=96ADEnJy4GU","annotations":[]},"pPt_fygNjtQ":{"duration":205.68,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=pPt_fygNjtQ","annotations":[]},"IQGg87yZZjs":{"duration":198.949,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IQGg87yZZjs","annotations":[{"segment":[79.14490751058614,149.28902161800758],"label":"Croquet"}]},"FCKkV9mYuI4":{"duration":140.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FCKkV9mYuI4","annotations":[{"segment":[58.6987884,87.88203270000001],"label":"Tai chi"},{"segment":[95.94311895000001,117.7017156],"label":"Tai chi"}]},"0yi-nkwLEnI":{"duration":115.17,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0yi-nkwLEnI","annotations":[{"segment":[0.01,90.01662793042334],"label":"Getting a tattoo"}]},"15IRaGI4Ml0":{"duration":183.995,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=15IRaGI4Ml0","annotations":[{"segment":[17.47726012461059,163.90294704049845],"label":"Elliptical trainer"}]},"t9j3GNVm8jw":{"duration":215.64,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=t9j3GNVm8jw","annotations":[{"segment":[40.46400019734789,201.3125984921997],"label":"Playing saxophone"}]},"e9l3PzP4uGQ":{"duration":145.079,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=e9l3PzP4uGQ","annotations":[{"segment":[37.10877379095164,94.12957254290171],"label":"Baking cookies"},{"segment":[101.3703088923557,126.48661310452417],"label":"Baking cookies"}]},"Df7AnfIEhYw":{"duration":5.55,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=Df7AnfIEhYw","annotations":[]},"VshQp9mHeh4":{"duration":106.86,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=VshQp9mHeh4","annotations":[{"segment":[20.12579994,87.47697870000002],"label":"Playing kickball"}]},"uTVjevTM6V0":{"duration":188.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uTVjevTM6V0","annotations":[{"segment":[24.031931819999997,115.31340443999999],"label":"Snatch"}]},"g_Cz69Q5bKM":{"duration":44.24,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=g_Cz69Q5bKM","annotations":[{"segment":[3.8153868954758194,26.638962558502342],"label":"Playing squash"},{"segment":[30.901196567862716,40.80057878315133],"label":"Playing squash"}]},"uEaxyzKonCY":{"duration":163.47,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=uEaxyzKonCY","annotations":[{"segment":[0.010000000000000002,163.34097021023697],"label":"Playing badminton"}]},"yTv7XBpDKBQ":{"duration":30.26,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=yTv7XBpDKBQ","annotations":[]},"_BWSmg81iMM":{"duration":34.3,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_BWSmg81iMM","annotations":[{"segment":[0.01,27.714916842433702],"label":"Playing bagpipes"}]},"nKQsVIJCvUQ":{"duration":194.026,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nKQsVIJCvUQ","annotations":[]},"ZJKwP17_gww":{"duration":30.21,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZJKwP17_gww","annotations":[]},"GOxmnVFdMfY":{"duration":161.727,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GOxmnVFdMfY","annotations":[{"segment":[0.2,132.2991145042672],"label":"Riding bumper cars"}]},"8iHklV25LaE":{"duration":29.91,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=8iHklV25LaE","annotations":[{"segment":[0.6998588143525741,1.7263184087363495],"label":"Washing dishes"},{"segment":[13.11068845553822,24.495058502340093],"label":"Washing dishes"}]},"qisMX7KNi7s":{"duration":149.44,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=qisMX7KNi7s","annotations":[{"segment":[65.94321818,130.09909808],"label":"Hand washing clothes"},{"segment":[65.94321818,130.09909808],"label":"Hand washing clothes"}]},"AFs_aK0DY7E":{"duration":56.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AFs_aK0DY7E","annotations":[{"segment":[14.76342048,51.23273010920437],"label":"Using the pommel horse"}]},"DDZFQKi1v2U":{"duration":112.34,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DDZFQKi1v2U","annotations":[{"segment":[16.298669152886117,77.98825562402496],"label":"Getting a piercing"}]},"VpZzLvzU7_k":{"duration":130.868,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VpZzLvzU7_k","annotations":[]},"rLlm6h0Nfvo":{"duration":216.898,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rLlm6h0Nfvo","annotations":[{"segment":[56.153837753510146,173.19897659906397],"label":"Hula hoop"}]},"UUohrz_ldJ4":{"duration":15.117,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=UUohrz_ldJ4","annotations":[]},"xizEiQZfJa4":{"duration":157.15,"subset":"testing","resolution":"192x144","url":"https://www.youtube.com/watch?v=xizEiQZfJa4","annotations":[]},"-wcSkAAeOK8":{"duration":224.82,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=-wcSkAAeOK8","annotations":[{"segment":[69.04558034321373,221.52635257410296],"label":"Spinning"}]},"2Vf8yqNHaGM":{"duration":29.002,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=2Vf8yqNHaGM","annotations":[{"segment":[0.01,28.941],"label":"Hitting a pinata"}]},"J80e9M4AQ5w":{"duration":189.59,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=J80e9M4AQ5w","annotations":[{"segment":[16.705107845553822,89.7345173650546],"label":"Shoveling snow"}]},"5P_Scs7Vo2c":{"duration":193.79,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=5P_Scs7Vo2c","annotations":[{"segment":[6.953597588801512,53.81479873072474],"label":"Hand washing clothes"},{"segment":[102.48998185233532,191.67742770670827],"label":"Hand washing clothes"}]},"ZbS9R9faBQk":{"duration":44.351,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZbS9R9faBQk","annotations":[{"segment":[0.01,43.796327831197004],"label":"Making an omelette"}]},"4Y94njisJd0":{"duration":48.76,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=4Y94njisJd0","annotations":[{"segment":[6.385965678627145,45.46199375975039],"label":"Playing racquetball"}]},"-nl4G-00PtA":{"duration":6.22,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=-nl4G-00PtA","annotations":[]},"4WKwnpn38ws":{"duration":16.463,"subset":"testing","resolution":"476x480","url":"https://www.youtube.com/watch?v=4WKwnpn38ws","annotations":[]},"-sd2XAFkeC0":{"duration":230.62,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-sd2XAFkeC0","annotations":[{"segment":[19.81801815252882,190.3549283799583],"label":"Rafting"},{"segment":[195.03210102332662,210.50274847803385],"label":"Rafting"}]},"Q3FkUH9kImU":{"duration":59.931,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Q3FkUH9kImU","annotations":[{"segment":[43.10148699063963,51.04861582839313],"label":"Shuffleboard"}]},"ejkGxGrEdO4":{"duration":133.887,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ejkGxGrEdO4","annotations":[{"segment":[0.2,124.2547503900156],"label":"Disc dog"}]},"i1c4-zdkOto":{"duration":229.832,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i1c4-zdkOto","annotations":[]},"uVcyJg_3Fj8":{"duration":81.131,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uVcyJg_3Fj8","annotations":[{"segment":[8.986376021027235,81.130521],"label":"Decorating the Christmas tree"}]},"X8o3FbH0gyo":{"duration":170.226,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=X8o3FbH0gyo","annotations":[{"segment":[0.01,170.22548752834467],"label":"Wakeboarding"}]},"8lw8uAimUXI":{"duration":9.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8lw8uAimUXI","annotations":[{"segment":[4.008466873634946,6.871657497659906],"label":"Springboard diving"}]},"J5wZTdD4XeU":{"duration":180.187,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J5wZTdD4XeU","annotations":[{"segment":[5.72474248427394,157.2620322039001],"label":"Doing fencing"}]},"Ym_hy49DaS4":{"duration":220.01,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ym_hy49DaS4","annotations":[{"segment":[81.83898673946958,212.91862168486742],"label":"Shaving legs"}]},"goSGYjLTGnQ":{"duration":65.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=goSGYjLTGnQ","annotations":[{"segment":[0,65.1314739229025],"label":"Breakdancing"}]},"Fky1ioAUt38":{"duration":54.103,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fky1ioAUt38","annotations":[{"segment":[0.01,21.590664586583465],"label":"Mooping floor"},{"segment":[34.57879875195008,54.061],"label":"Mooping floor"}]},"tqVzDmYHY8U":{"duration":134.096,"subset":"testing","resolution":"640x356","url":"https://www.youtube.com/watch?v=tqVzDmYHY8U","annotations":[]},"SzQHOcBrSso":{"duration":121.185,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SzQHOcBrSso","annotations":[]},"JGRpVcxltKE":{"duration":172.409,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JGRpVcxltKE","annotations":[{"segment":[26.089822758190326,58.63485939469579],"label":"Making a lemonade"},{"segment":[68.3176802121685,79.07637000936037],"label":"Making a lemonade"},{"segment":[92.52473225585022,100.59374960374416],"label":"Making a lemonade"},{"segment":[121.30422746333855,153.5802968549142],"label":"Making a lemonade"}]},"UISTzYOTSKU":{"duration":101.216,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UISTzYOTSKU","annotations":[]},"wyyO9ynxqmE":{"duration":102.447,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wyyO9ynxqmE","annotations":[]},"XOUyGnr6iTM":{"duration":124.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XOUyGnr6iTM","annotations":[{"segment":[0.01,115.3329697597504],"label":"Playing racquetball"}]},"QpJ5npI8qO0":{"duration":145.94,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=QpJ5npI8qO0","annotations":[{"segment":[3.6413104524180966,140.41803432137286],"label":"Tai chi"}]},"sq0cKsoX7mg":{"duration":11.564,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=sq0cKsoX7mg","annotations":[{"segment":[1.028270840873635,2.1106611996879874],"label":"Baton twirling"},{"segment":[5.0150753291731665,10.589385677067083],"label":"Baton twirling"}]},"sa5ZuxFDZNw":{"duration":124.9,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=sa5ZuxFDZNw","annotations":[{"segment":[7.44356867071847,124.861],"label":"Dodgeball"}]},"GavbA_SHlVM":{"duration":127.32,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GavbA_SHlVM","annotations":[{"segment":[25.209995319812794,30.966608424336975],"label":"Bungee jumping"},{"segment":[73.84345085803432,79.99707176287052],"label":"Bungee jumping"}]},"w6xH78z3l08":{"duration":70.519,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=w6xH78z3l08","annotations":[{"segment":[18.446805476194534,63.96062480745425],"label":"Changing car wheel"}]},"zxzBoK5t0XY":{"duration":232.34,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zxzBoK5t0XY","annotations":[{"segment":[40.45469256,205.91419926],"label":"Horseback riding"}]},"qgJzdtHCebM":{"duration":122.625,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=qgJzdtHCebM","annotations":[{"segment":[0.01,119.79333269009932],"label":"Bullfighting"}]},"Y39-bbpK_TY":{"duration":50.132,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=Y39-bbpK_TY","annotations":[]},"Son8UiVlzu0":{"duration":126.317,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Son8UiVlzu0","annotations":[]},"HpJ2pr0ykqo":{"duration":45.93,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HpJ2pr0ykqo","annotations":[{"segment":[0.01,38.19062991731669],"label":"Painting fence"}]},"C1djGYqkhYI":{"duration":107.184,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C1djGYqkhYI","annotations":[{"segment":[0.01,99.99173503501393],"label":"Baton twirling"}]},"aYT4LSpcT88":{"duration":184.204,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aYT4LSpcT88","annotations":[]},"r7E7cVoJ0JE":{"duration":56.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r7E7cVoJ0JE","annotations":[{"segment":[0.01,56.101],"label":"Roof shingle removal"}]},"uBls-XJdcBs":{"duration":14.17,"subset":"validation","resolution":"240x176","url":"https://www.youtube.com/watch?v=uBls-XJdcBs","annotations":[{"segment":[0.07220479525121053,14.059875222042487],"label":"Playing water polo"}]},"gGs1qNxsFTg":{"duration":19.9,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gGs1qNxsFTg","annotations":[{"segment":[6.568692667706708,18.063904836193448],"label":"Playing rubik cube"}]},"4l8r_wBuJ6Y":{"duration":39.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4l8r_wBuJ6Y","annotations":[{"segment":[0.010000000000000002,39.938321995464854],"label":"Cleaning windows"}]},"7EeO9V2B574":{"duration":173.99,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7EeO9V2B574","annotations":[]},"jAlYwQevBxA":{"duration":219.865,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jAlYwQevBxA","annotations":[{"segment":[18.833730529595016,143.47878348909657],"label":"Snowboarding"}]},"cMndipw7_As":{"duration":109.97,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=cMndipw7_As","annotations":[{"segment":[10.976948517940718,101.19374414976599],"label":"Welding"}]},"c05oRliUOxk":{"duration":221.32999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c05oRliUOxk","annotations":[{"segment":[13.530188854074925,207.8387309101497],"label":"Horseback riding"}]},"eZbdiuUu0S8":{"duration":193.678,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eZbdiuUu0S8","annotations":[{"segment":[91.24895369692338,176.75707918112644],"label":"Swimming"}]},"YmVdnkDo0xQ":{"duration":64.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YmVdnkDo0xQ","annotations":[{"segment":[11.79281432,31.21794851794072],"label":"Washing hands"}]},"hWZ3B28_fAs":{"duration":103.47,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=hWZ3B28_fAs","annotations":[{"segment":[0.6453728549141966,102.7756271450858],"label":"Playing guitarra"}]},"ym2trHhFDUs":{"duration":3.69,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ym2trHhFDUs","annotations":[]},"Iq9cAZxki9Y":{"duration":39.66,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Iq9cAZxki9Y","annotations":[{"segment":[6.310799831513259,31.987093263650543],"label":"Rock-paper-scissors"}]},"F67zl57FSXE":{"duration":101.63,"subset":"validation","resolution":"658x480","url":"https://www.youtube.com/watch?v=F67zl57FSXE","annotations":[{"segment":[1.36391486,97.04976599063963],"label":"Snatch"}]},"-VcxQ6i6Ejk":{"duration":102.75,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=-VcxQ6i6Ejk","annotations":[{"segment":[0.01,37.34845401707224],"label":"Cricket"},{"segment":[83.51306670770232,102.54832199546485],"label":"Cricket"}]},"m3yLm_dJU94":{"duration":140.13,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=m3yLm_dJU94","annotations":[{"segment":[10.274920397815913,14.647226950078004],"label":"Cheerleading"},{"segment":[20.112610140405618,23.610455382215292],"label":"Cheerleading"},{"segment":[46.56506478159127,50.937371333853356],"label":"Cheerleading"},{"segment":[69.95690483619346,80.88767121684867],"label":"Cheerleading"},{"segment":[91.16259161466459,94.00459087363495],"label":"Cheerleading"},{"segment":[118.70812289391576,120.89427617004682],"label":"Cheerleading"},{"segment":[132.91811918876758,133.5739651716069],"label":"Cheerleading"}]},"rYXmAD8u7N8":{"duration":195.88,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rYXmAD8u7N8","annotations":[{"segment":[126.51448346957878,173.26983605616223],"label":"Playing violin"},{"segment":[86.48212275819033,103.28960244617784],"label":"Playing violin"}]},"tl2hDYGBfqU":{"duration":227.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tl2hDYGBfqU","annotations":[{"segment":[8.873210438621626,134.87279866704873],"label":"Beach soccer"}]},"Ws5jA8cMKas":{"duration":182.463,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ws5jA8cMKas","annotations":[{"segment":[0.01,3.1214368665423464],"label":"Blow-drying hair"}]},"d3VVwwVkJBs":{"duration":15.186,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=d3VVwwVkJBs","annotations":[{"segment":[3.624703427457098,11.750672549141965],"label":"Blowing leaves"}]},"fJ4xMCc5SKk":{"duration":28.33,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fJ4xMCc5SKk","annotations":[{"segment":[1.2574531684867394,20.22955360530421],"label":"Tumbling"}]},"GrqBqKMW4ps":{"duration":61.49,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GrqBqKMW4ps","annotations":[{"segment":[9.549205928237129,44.19506864274571],"label":"Using parallel bars"}]},"h1t5QZjERms":{"duration":175.1,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=h1t5QZjERms","annotations":[{"segment":[2.8018257325007,79.4429120549306],"label":"Playing badminton"},{"segment":[93.35286010633249,150.62911678857546],"label":"Playing badminton"}]},"IJAR9ERJt4s":{"duration":31.19,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IJAR9ERJt4s","annotations":[{"segment":[3.602690064795663,12.188292868533981],"label":"Playing bagpipes"}]},"wAqQ-8OxhXY":{"duration":86.08,"subset":"training","resolution":"1262x720","url":"https://www.youtube.com/watch?v=wAqQ-8OxhXY","annotations":[{"segment":[7.922786006240249,84.73352491419656],"label":"Walking the dog"}]},"vWiNs8nEF4g":{"duration":174.27,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=vWiNs8nEF4g","annotations":[]},"yA3AD9jU7QU":{"duration":207.89,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yA3AD9jU7QU","annotations":[{"segment":[14.65991957834211,40.56499795444852],"label":"Washing hands"},{"segment":[57.4032988989177,84.2798177141281],"label":"Washing hands"},{"segment":[99.17523778038928,177.21428638840985],"label":"Washing hands"}]},"SEebwJMNMQs":{"duration":237.56,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SEebwJMNMQs","annotations":[{"segment":[27.05479705928237,232.7453784301032],"label":"Playing violin"}]},"w6Avae5on_0":{"duration":37.85,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=w6Avae5on_0","annotations":[{"segment":[0,37.730432878315135],"label":"Playing badminton"}]},"EmM2yXiiz5c":{"duration":32.323,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EmM2yXiiz5c","annotations":[{"segment":[0.2,31.467492303297675],"label":"Running a marathon"}]},"L4rKeN_4CLk":{"duration":113.732,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L4rKeN_4CLk","annotations":[{"segment":[10.468230185647425,59.97053902964118],"label":"Surfing"},{"segment":[66.53536134945398,97.05291375507021],"label":"Surfing"}]},"ZYcZZJ0XItM":{"duration":140.39,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=ZYcZZJ0XItM","annotations":[{"segment":[7.008441510140406,17.521103775351015],"label":"Springboard diving"},{"segment":[23.65349009672387,32.63305578159127],"label":"Springboard diving"},{"segment":[38.54642830577223,51.46824234009361],"label":"Springboard diving"},{"segment":[55.62950448673947,71.83652547893915],"label":"Springboard diving"},{"segment":[77.53088420592823,84.97735331045241],"label":"Springboard diving"},{"segment":[84.75833951326054,101.403388099844],"label":"Springboard diving"},{"segment":[105.34563644929797,116.95336770046802],"label":"Springboard diving"},{"segment":[120.895616049922,136.66460944773792],"label":"Springboard diving"}]},"AtbTioB1_Kw":{"duration":9.43,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AtbTioB1_Kw","annotations":[]},"OW5Tyr8MICg":{"duration":15.12,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=OW5Tyr8MICg","annotations":[{"segment":[3.85584822,14.82247074102964],"label":"Brushing hair"}]},"Vvi0HQ6Pu7c":{"duration":8.104,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vvi0HQ6Pu7c","annotations":[{"segment":[0.01,6.7225224490655915],"label":"Doing a powerbomb"}]},"sahQxLbmM0U":{"duration":37.32,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sahQxLbmM0U","annotations":[{"segment":[11.060450472440667,27.30184870982839],"label":"Clean and jerk"}]},"k-I4zLbgdpM":{"duration":95.78,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k-I4zLbgdpM","annotations":[]},"BII4aKnegaU":{"duration":106.33,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BII4aKnegaU","annotations":[{"segment":[1.6582215288611546,6.467063962558503],"label":"Spinning"},{"segment":[34.49100780031201,62.349129485179404],"label":"Spinning"},{"segment":[79.76045553822154,86.72498595943837],"label":"Spinning"}]},"4Oug7S32B-4":{"duration":113.175,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4Oug7S32B-4","annotations":[{"segment":[14.83092190202808,59.676804796255844],"label":"Running a marathon"},{"segment":[84.74812515444619,110.87879707706708],"label":"Running a marathon"}]},"vQ82ItCG1x4":{"duration":172.27,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=vQ82ItCG1x4","annotations":[]},"mZYqH82delk":{"duration":53.569,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=mZYqH82delk","annotations":[{"segment":[5.181346190327614,40.94934892355695],"label":"Kneeling"}]},"XptFota2__4":{"duration":194.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XptFota2__4","annotations":[{"segment":[2.63679405,31.76381559828393],"label":"Smoking hookah"},{"segment":[32.06442905,132.67852161388456],"label":"Smoking hookah"}]},"YPSV5JEqhBw":{"duration":99.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YPSV5JEqhBw","annotations":[{"segment":[0,4.950315132605304],"label":"Shot put"},{"segment":[6.033196567862714,13.14927457098284],"label":"Shot put"},{"segment":[15.624432137285492,29.392496099843996],"label":"Shot put"},{"segment":[32.02235101404056,44.398138845553824],"label":"Shot put"},{"segment":[48.884361934477376,52.13300624024961],"label":"Shot put"},{"segment":[57.08332137285491,62.652425897035876],"label":"Shot put"},{"segment":[63.58060998439937,77.19397659906397],"label":"Shot put"},{"segment":[80.59731825273012,82.91777847113885],"label":"Shot put"},{"segment":[83.38187051482059,96.2217503900156],"label":"Shot put"}]},"qJdn48DsMXc":{"duration":61.88,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qJdn48DsMXc","annotations":[]},"rBCf1qjOwCc":{"duration":147.052,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rBCf1qjOwCc","annotations":[{"segment":[16.97630381949972,103.23427998344424],"label":"Making a lemonade"},{"segment":[113.0988877301092,117.68707790171605],"label":"Making a lemonade"}]},"ao6SHL-wo5k":{"duration":75.698,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ao6SHL-wo5k","annotations":[]},"I-1UKECfQko":{"duration":88.143,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I-1UKECfQko","annotations":[{"segment":[37.521954758190326,88.121],"label":"Snow tubing"}]},"6RxF2UHMYQM":{"duration":210.57999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6RxF2UHMYQM","annotations":[{"segment":[145.97883859126364,193.59848222620903],"label":"Skateboarding"}]},"HAFNv8zvte4":{"duration":152.79,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HAFNv8zvte4","annotations":[]},"4ivePL3RW0Q":{"duration":163.84,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=4ivePL3RW0Q","annotations":[{"segment":[57.33125548211511,139.37865778071543],"label":"Baking cookies"}]},"zuNQFkkyBNo":{"duration":41.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zuNQFkkyBNo","annotations":[{"segment":[24.306716559999998,36.55221528861155],"label":"Smoking a cigarette"},{"segment":[5.485782680000001,20.336105579999998],"label":"Smoking a cigarette"}]},"BSg989GP5ro":{"duration":235.358,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=BSg989GP5ro","annotations":[{"segment":[56.90289391575663,235.301],"label":"Making a lemonade"}]},"8EfkFxoXI_4":{"duration":170.74,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8EfkFxoXI_4","annotations":[{"segment":[27.03953666146646,84.31540249609985],"label":"Fixing bicycle"},{"segment":[101.63136193447738,158.37442901716068],"label":"Fixing bicycle"}]},"ORL5yMppl8E":{"duration":165.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ORL5yMppl8E","annotations":[{"segment":[49.96411700468019,160.7653712948518],"label":"Preparing pasta"}]},"ejIEsnkvLWY":{"duration":227.81,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ejIEsnkvLWY","annotations":[{"segment":[14.038460218408735,27.5438143525741],"label":"Playing accordion"},{"segment":[36.07351170046802,50.289673946957876],"label":"Playing accordion"},{"segment":[64.15043213728549,75.52336193447738],"label":"Playing accordion"},{"segment":[84.7638673946958,94.0043728549142],"label":"Playing accordion"},{"segment":[109.99755538221528,113.19619188767551],"label":"Playing accordion"},{"segment":[122.08129329173165,129.9001825273011],"label":"Playing accordion"},{"segment":[137.7190717628705,146.2487691107644],"label":"Playing accordion"},{"segment":[170.4162449297972,174.3256895475819],"label":"Playing accordion"},{"segment":[182.14457878315133,203.11341809672385],"label":"Playing accordion"}]},"SB0dU5oDvaE":{"duration":157.66,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=SB0dU5oDvaE","annotations":[]},"hFzMCWZ3Jps":{"duration":103.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hFzMCWZ3Jps","annotations":[{"segment":[17.34518792667707,93.04801747581904],"label":"Braiding hair"}]},"QRcwvzq7QUM":{"duration":127.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QRcwvzq7QUM","annotations":[{"segment":[28.789302106084243,60.35826096723869],"label":"Playing polo"},{"segment":[69.49141887675506,88.5519223400936],"label":"Playing polo"},{"segment":[107.81097271450857,118.33395900156006],"label":"Playing polo"}]},"f1YrGAkQd0U":{"duration":166.719,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=f1YrGAkQd0U","annotations":[{"segment":[46.54027925117005,63.18037909516381],"label":"Playing ten pins"},{"segment":[88.66053198127925,93.87182683307334],"label":"Playing ten pins"},{"segment":[102.97297347893917,105.83333385335415],"label":"Playing ten pins"},{"segment":[109.21375975039003,112.07412012480499],"label":"Playing ten pins"},{"segment":[127.41605304212169,132.3566755070203],"label":"Playing ten pins"},{"segment":[139.8976255850234,148.73873946957877],"label":"Playing ten pins"}]},"G3cINUdHdGo":{"duration":52.292,"subset":"training","resolution":"855x470","url":"https://www.youtube.com/watch?v=G3cINUdHdGo","annotations":[{"segment":[24.840424043681747,26.96144547269891],"label":"Throwing darts"},{"segment":[20.02733695475819,21.006269921996882],"label":"Throwing darts"},{"segment":[25.08515728549142,26.798289978159126],"label":"Throwing darts"}]},"Qm6HmQv5uOo":{"duration":32.76,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Qm6HmQv5uOo","annotations":[{"segment":[0,18.80938306396256],"label":"Playing accordion"}]},"UmRGPupwDA0":{"duration":176.217,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=UmRGPupwDA0","annotations":[]},"iE3I2ICIvHE":{"duration":100.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iE3I2ICIvHE","annotations":[{"segment":[0.3561350562906711,97.1963500095617],"label":"Using the balance beam"}]},"h8ywxC9bqCE":{"duration":111.874,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=h8ywxC9bqCE","annotations":[]},"eHxRr7Zhsek":{"duration":196.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eHxRr7Zhsek","annotations":[{"segment":[8.276374287624567,189.43701028081125],"label":"Doing nails"}]},"N-LaOcSqZaM":{"duration":17.14,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=N-LaOcSqZaM","annotations":[{"segment":[0.01,16.221603744149768],"label":"Volleyball"},{"segment":[0.010000000000000002,17.136326530612244],"label":"Playing beach volleyball"}]},"-zIN0_GSaWM":{"duration":76.72,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=-zIN0_GSaWM","annotations":[]},"znIFzXChO1M":{"duration":7.756,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=znIFzXChO1M","annotations":[{"segment":[0,7.755463],"label":"Hitting a pinata"}]},"FqYhC6_CMfI":{"duration":90.53999999999999,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=FqYhC6_CMfI","annotations":[]},"HWV_ccmZVPA":{"duration":50.318,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HWV_ccmZVPA","annotations":[{"segment":[13.89426252730109,34.539409672386896],"label":"Running a marathon"}]},"bCdIcqRYJvo":{"duration":124.02,"subset":"training","resolution":"640x264","url":"https://www.youtube.com/watch?v=bCdIcqRYJvo","annotations":[{"segment":[10.540946177847115,23.70001021450858],"label":"Playing piano"},{"segment":[28.92368593525741,53.88124771216849],"label":"Playing piano"},{"segment":[56.58982030811233,68.39145804758189],"label":"Playing piano"},{"segment":[91.99473352652106,106.69841333307332],"label":"Playing piano"}]},"6O8QXbjvYKs":{"duration":75.837,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=6O8QXbjvYKs","annotations":[{"segment":[37.9771584524181,45.54892836193448],"label":"Playing ten pins"}]},"AonniE_CsPY":{"duration":28.097,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AonniE_CsPY","annotations":[{"segment":[0.01,19.37362815600624],"label":"Playing blackjack"}]},"MR0vMF_5hp8":{"duration":118.608,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=MR0vMF_5hp8","annotations":[{"segment":[4.345886895475819,104.02388845553823],"label":"Removing ice from car"}]},"MeHEDK_Y8sE":{"duration":149.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MeHEDK_Y8sE","annotations":[{"segment":[84.55110393135725,112.26895893915756],"label":"Removing curlers"}]},"D_C28uJcrPU":{"duration":67.965,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=D_C28uJcrPU","annotations":[]},"mt3G8TrI-uY":{"duration":153.57999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mt3G8TrI-uY","annotations":[{"segment":[28.750723057722308,147.34745567082683],"label":"Doing nails"}]},"iUe1t0sN4Jo":{"duration":54.103,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=iUe1t0sN4Jo","annotations":[{"segment":[23.44611232449298,28.08473166926677],"label":"Playing ten pins"}]},"R0YS8JS_0rk":{"duration":62.67,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R0YS8JS_0rk","annotations":[{"segment":[2.0012818299531983,59.89201964274571],"label":"Using the pommel horse"}]},"V8UMJTrfXdw":{"duration":7.895,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=V8UMJTrfXdw","annotations":[{"segment":[0.2,7.641],"label":"Table soccer"}]},"C-M4L_WDzK8":{"duration":209.56,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=C-M4L_WDzK8","annotations":[{"segment":[0.01,206.9446761543931],"label":"Playing field hockey"}]},"Nq3b9OReeEI":{"duration":91.417,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Nq3b9OReeEI","annotations":[{"segment":[0.7130808112324494,18.112252605304214],"label":"Hurling"},{"segment":[26.526606177847114,88.7072529173167],"label":"Hurling"}]},"Vc9S_2EL9mQ":{"duration":156.735,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Vc9S_2EL9mQ","annotations":[]},"ZLXhlWhrkno":{"duration":235.22,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZLXhlWhrkno","annotations":[{"segment":[142.35604992199688,212.06648673946958],"label":"Archery"}]},"zCI36lTmBqY":{"duration":165.56,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=zCI36lTmBqY","annotations":[{"segment":[1.807968699700369,160.1343689547582],"label":"Smoking hookah"}]},"vYk5m-xNk-Y":{"duration":99.52000000000001,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=vYk5m-xNk-Y","annotations":[]},"kbK-9Me0BnA":{"duration":90.883,"subset":"training","resolution":"658x480","url":"https://www.youtube.com/watch?v=kbK-9Me0BnA","annotations":[{"segment":[0.01,90.841],"label":"Camel ride"}]},"sfPGQnxbJ3U":{"duration":30.05,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=sfPGQnxbJ3U","annotations":[{"segment":[2.437479410920437,29.99974659594384],"label":"Doing kickboxing"}]},"Tssba1kYoo4":{"duration":140.41,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=Tssba1kYoo4","annotations":[{"segment":[23.8764526798759,129.89666295163806],"label":"Removing curlers"}]},"bM5VpCdPOrw":{"duration":177.029,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bM5VpCdPOrw","annotations":[{"segment":[163.26307201133434,175.10170991543325],"label":"Camel ride"}]},"j4-w606GnYw":{"duration":195.05,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j4-w606GnYw","annotations":[{"segment":[2.4337098283931358,193.19553042121683],"label":"Zumba"}]},"0_ZLjaAEx5s":{"duration":163.98,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0_ZLjaAEx5s","annotations":[{"segment":[0.01,156.0489828549142],"label":"Braiding hair"}]},"y0Kio7VOk5o":{"duration":28.58,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y0Kio7VOk5o","annotations":[{"segment":[14.626035384711388,27.022492204680187],"label":"Plataform diving"}]},"IOTpJ2QgoxU":{"duration":116.22,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IOTpJ2QgoxU","annotations":[]},"wuHLveuf6PQ":{"duration":46.812,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wuHLveuf6PQ","annotations":[{"segment":[0.01,37.06175016646856],"label":"Rollerblading"}]},"wtG2efnMFh0":{"duration":226.3,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=wtG2efnMFh0","annotations":[]},"fvUckFYBQks":{"duration":106.464,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fvUckFYBQks","annotations":[{"segment":[19.590761310452418,90.98082371294852],"label":"Braiding hair"}]},"_rkRndMV8J8":{"duration":96.11,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=_rkRndMV8J8","annotations":[]},"Igm1Mx4Ng1k":{"duration":139.087,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Igm1Mx4Ng1k","annotations":[{"segment":[0.2,122.35921400312012],"label":"Cutting the grass"}]},"MYi6p113py8":{"duration":220.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MYi6p113py8","annotations":[{"segment":[9.299426915852145,163.256605856071],"label":"Tango"}]},"Bu0YfgUJmhc":{"duration":105.721,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Bu0YfgUJmhc","annotations":[]},"5Yq5GMPBguI":{"duration":118.05,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=5Yq5GMPBguI","annotations":[{"segment":[0.60178558887736,41.97453605616708],"label":"Mowing the lawn"},{"segment":[54.84605842376832,117.18100246115151],"label":"Mowing the lawn"}]},"YMOYyt3kSOs":{"duration":66.43,"subset":"testing","resolution":"562x480","url":"https://www.youtube.com/watch?v=YMOYyt3kSOs","annotations":[]},"iHQ-Kg4Fms8":{"duration":50.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iHQ-Kg4Fms8","annotations":[{"segment":[26.61272386583463,43.74910688923557],"label":"Drinking coffee"}]},"aeEMrTpNUss":{"duration":56.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aeEMrTpNUss","annotations":[{"segment":[38.88658845,50.0409156],"label":"Using parallel bars"}]},"OeWhSE4s6Fw":{"duration":233.384,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=OeWhSE4s6Fw","annotations":[]},"fZ729TTJq8g":{"duration":165.489,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=fZ729TTJq8g","annotations":[{"segment":[70.97702808112325,128.27484711388456],"label":"BMX"}]},"F79Tzy2i7bI":{"duration":57.075,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=F79Tzy2i7bI","annotations":[{"segment":[9.784165366614664,47.94409204368175],"label":"Putting in contact lenses"}]},"eu3CLlQ9c2s":{"duration":22.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eu3CLlQ9c2s","annotations":[{"segment":[0.6738080026602424,15.213874628705149],"label":"Fixing bicycle"}]},"rMwNMDgXpYs":{"duration":143.895,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rMwNMDgXpYs","annotations":[]},"WwwfCw1dYlg":{"duration":142.849,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WwwfCw1dYlg","annotations":[{"segment":[18.05112197140947,28.07952306663695],"label":"Swimming"},{"segment":[38.776484234879604,54.821925987243574],"label":"Swimming"},{"segment":[57.27331292163251,65.96459387082966],"label":"Swimming"},{"segment":[80.45006211949159,98.94688992823713],"label":"Swimming"},{"segment":[108.30673086739469,117.44371845085801],"label":"Swimming"}]},"GJDl-whUpq4":{"duration":109.645,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GJDl-whUpq4","annotations":[{"segment":[0.01,94.74264274570983],"label":"Knitting"}]},"Iiwz1JtC7rk":{"duration":106.116,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Iiwz1JtC7rk","annotations":[{"segment":[6.9493946957878325,12.90601872074883],"label":"Braiding hair"},{"segment":[24.81926677067083,26.639346333853354],"label":"Braiding hair"},{"segment":[33.75483931357254,45.33738221528861],"label":"Braiding hair"},{"segment":[49.8049344773791,52.4523728549142],"label":"Braiding hair"},{"segment":[54.768881435257406,60.394687987519504],"label":"Braiding hair"},{"segment":[62.88733229329174,70.50000936037442],"label":"Braiding hair"},{"segment":[72.1549391575663,82.08451794071763],"label":"Braiding hair"},{"segment":[83.07747581903277,86.05634945397816],"label":"Braiding hair"}]},"1U8y7e22SQg":{"duration":141.062,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1U8y7e22SQg","annotations":[{"segment":[14.150066941498979,113.46419902249586],"label":"Mooping floor"}]},"6qojVSLbyUU":{"duration":139.134,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=6qojVSLbyUU","annotations":[{"segment":[26.904904836193445,118.26840093603744],"label":"Trimming branches or hedges"}]},"gUFRtx51OJc":{"duration":65.13,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gUFRtx51OJc","annotations":[{"segment":[36.68249828,48.14215736],"label":"Getting a piercing"},{"segment":[12.02778068798752,33.13541444],"label":"Getting a piercing"}]},"jmerKGN0VPs":{"duration":47.183,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=jmerKGN0VPs","annotations":[{"segment":[1.030516801624446,46.52047093915757],"label":"Hand car wash"}]},"OrVgspx6Axs":{"duration":186.921,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=OrVgspx6Axs","annotations":[{"segment":[13.797756518937346,164.3098515345612],"label":"BMX"}]},"KNyM0KvDHMM":{"duration":189.87,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KNyM0KvDHMM","annotations":[{"segment":[43.82777935725429,170.27684547581902],"label":"Fixing bicycle"},{"segment":[134.23168693,154.11476861],"label":"Fixing bicycle"}]},"B7Q1egUKmZg":{"duration":83.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B7Q1egUKmZg","annotations":[{"segment":[3.49609984399376,77.82059282371296],"label":"Layup drill in basketball"}]},"fjoMc-eo-jA":{"duration":128.45,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=fjoMc-eo-jA","annotations":[]},"WgL0EbRWjF8":{"duration":29.304,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=WgL0EbRWjF8","annotations":[]},"IsVpeZoK5FI":{"duration":70.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IsVpeZoK5FI","annotations":[{"segment":[7.271429017160687,63.790263650546024],"label":"Doing step aerobics"}]},"59LX-HXwcVk":{"duration":20.95,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=59LX-HXwcVk","annotations":[]},"gV6W0rAHyZg":{"duration":118.42,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=gV6W0rAHyZg","annotations":[{"segment":[22.3638408,104.1821928],"label":"Playing lacrosse"}]},"QLVgy-1ydr0":{"duration":215.389,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QLVgy-1ydr0","annotations":[{"segment":[0.01,103.80222411870945],"label":"Bullfighting"},{"segment":[107.15717640076453,190.35999166301542],"label":"Bullfighting"}]},"sEENChh3zno":{"duration":69.89,"subset":"training","resolution":"300x240","url":"https://www.youtube.com/watch?v=sEENChh3zno","annotations":[{"segment":[0.8722878427457098,13.629497542901715],"label":"Washing dishes"}]},"R58R070r2GE":{"duration":84.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=R58R070r2GE","annotations":[{"segment":[4.490529183072085,72.37676316380656],"label":"Using the balance beam"},{"segment":[75.67862279719189,77.52766419188768],"label":"Using the balance beam"}]},"dtvIQ9Pd-Bg":{"duration":169.901,"subset":"training","resolution":"854x468","url":"https://www.youtube.com/watch?v=dtvIQ9Pd-Bg","annotations":[{"segment":[14.131903415961757,158.89128445801782],"label":"Sharpening knives"}]},"UPfQNZl0_dg":{"duration":127.59,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=UPfQNZl0_dg","annotations":[{"segment":[29.646815912636505,48.35017628705148],"label":"Putting on makeup"},{"segment":[69.043255850234,97.51152886115446],"label":"Putting on makeup"},{"segment":[104.07863182527302,108.05869422776911],"label":"Putting on makeup"}]},"FI_1dsErIXE":{"duration":151.57999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FI_1dsErIXE","annotations":[{"segment":[27.76017806,121.76795807],"label":"Polishing forniture"}]},"mFWRIp164r4":{"duration":161.449,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=mFWRIp164r4","annotations":[{"segment":[77.30125897035882,158.3990780031201],"label":"Tug of war"}]},"PaAJ-6HT6bw":{"duration":33.76,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PaAJ-6HT6bw","annotations":[{"segment":[16.912283931357255,32.879160686427454],"label":"Hammer throw"}]},"Ho0eNR4jvEg":{"duration":141.061,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ho0eNR4jvEg","annotations":[{"segment":[10.78315132605304,16.28475914546786],"label":"Scuba diving"},{"segment":[25.52746028208475,29.928746537616604],"label":"Scuba diving"},{"segment":[87.3655321723073,141.061223],"label":"Scuba diving"}]},"wO9DGYTAX70":{"duration":150.28,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wO9DGYTAX70","annotations":[{"segment":[14.54828838,56.19340433],"label":"Grooming horse"},{"segment":[64.5284554,135.42573002808112],"label":"Grooming horse"}]},"ps0a-GGomX4":{"duration":32.6,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ps0a-GGomX4","annotations":[{"segment":[11.757516,16.454524],"label":"Javelin throw"}]},"CNdCnkKhitI":{"duration":195.91,"subset":"validation","resolution":"642x480","url":"https://www.youtube.com/watch?v=CNdCnkKhitI","annotations":[{"segment":[33.00762966028845,76.1009238776911],"label":"Cricket"},{"segment":[83.1303264848674,136.61491153946957],"label":"Cricket"},{"segment":[176.0406913797192,186.4319821903276],"label":"Cricket"}]},"uR-o9wILN6U":{"duration":191.472,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uR-o9wILN6U","annotations":[{"segment":[7.766400070202808,152.04221675897037],"label":"Slacklining"},{"segment":[164.58793994929798,172.65304771450857],"label":"Slacklining"}]},"KhpFBHrVsCE":{"duration":16.72,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=KhpFBHrVsCE","annotations":[]},"xSWpGhhM1H8":{"duration":66.386,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xSWpGhhM1H8","annotations":[{"segment":[50.521322932917315,60.770525741029644],"label":"Tug of war"}]},"PvrozrgcGf0":{"duration":53.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PvrozrgcGf0","annotations":[{"segment":[0.3551231727694574,52.13509568211525],"label":"Putting on makeup"}]},"6TUA9ipKk9I":{"duration":65.57,"subset":"validation","resolution":"418x240","url":"https://www.youtube.com/watch?v=6TUA9ipKk9I","annotations":[{"segment":[9.244836193447739,43.568205928237134],"label":"Plataform diving"}]},"NBXH7A2EO7Q":{"duration":100.195,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=NBXH7A2EO7Q","annotations":[{"segment":[0.01,100.141],"label":"Assembling bicycle"}]},"Yf49q7bGpkk":{"duration":188.036,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Yf49q7bGpkk","annotations":[{"segment":[0.01,182.2614060543096],"label":"Bullfighting"}]},"u0WgmhffTyI":{"duration":224.282,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=u0WgmhffTyI","annotations":[{"segment":[3.6738745264096275,97.79504096490389],"label":"Table soccer"}]},"WXEq3OeD68o":{"duration":78.07,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=WXEq3OeD68o","annotations":[{"segment":[1.2178702964118566,62.23317214664587],"label":"Snatch"}]},"0UTrTg3DF5g":{"duration":86.52,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0UTrTg3DF5g","annotations":[]},"98ukqD62X5s":{"duration":173.66,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=98ukqD62X5s","annotations":[]},"bqsWU8HqpxE":{"duration":232.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bqsWU8HqpxE","annotations":[{"segment":[0,45.398452418096724],"label":"Pole vault"}]},"4KgCkxpsN2g":{"duration":190.845,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4KgCkxpsN2g","annotations":[{"segment":[8.779172386895475,14.433554602184087],"label":"Rafting"},{"segment":[45.68145631825273,48.35984789391575],"label":"Rafting"},{"segment":[92.99970748829953,119.48602418096723],"label":"Rafting"},{"segment":[150.74972932917316,160.57152808112323],"label":"Rafting"},{"segment":[168.60754524180967,179.91749531981282],"label":"Rafting"}]},"zU6SnkNIdrw":{"duration":177.935,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zU6SnkNIdrw","annotations":[{"segment":[13.04664208064921,17.76564028003297],"label":"Baking cookies"},{"segment":[28.313989196302543,76.05914744889115],"label":"Baking cookies"}]},"Cb3IonOw0bs":{"duration":72.33,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Cb3IonOw0bs","annotations":[{"segment":[13.084112324492981,69.70673634945398],"label":"Shaving legs"}]},"Fp_uU-qMDSc":{"duration":120.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fp_uU-qMDSc","annotations":[{"segment":[31.78098133853354,51.46831491107644],"label":"Tai chi"}]},"BI_3bkY3mO4":{"duration":227.347,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BI_3bkY3mO4","annotations":[]},"2imjxY43yYM":{"duration":188.36,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2imjxY43yYM","annotations":[{"segment":[0.01,181.89551087621737],"label":"Cumbia"}]},"T-Ngg5bptUc":{"duration":126.48,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=T-Ngg5bptUc","annotations":[{"segment":[35.30879719188768,117.76174258970359],"label":"Decorating the Christmas tree"}]},"L1oWKMy5ofM":{"duration":136.44,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=L1oWKMy5ofM","annotations":[{"segment":[39.2152277,121.22088],"label":"Cumbia"}]},"W2Mh01F3K5c":{"duration":41.03,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W2Mh01F3K5c","annotations":[]},"vu65aIIJHtU":{"duration":132.029,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vu65aIIJHtU","annotations":[{"segment":[27.178614664586583,75.56478471138846],"label":"Snowboarding"}]},"k9VKFjPYT3o":{"duration":160.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k9VKFjPYT3o","annotations":[{"segment":[20.126674425117002,92.85475819999998],"label":"Rock climbing"},{"segment":[103.63362172932916,150.88755298829952],"label":"Rock climbing"}]},"tN8mcdcwCwc":{"duration":46.905,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tN8mcdcwCwc","annotations":[{"segment":[0,42.511422776911076],"label":"Clipping cat claws"}]},"gJKw6jGjiDE":{"duration":105.49000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gJKw6jGjiDE","annotations":[{"segment":[17.855656092910383,99.81064903548523],"label":"Using the balance beam"}]},"WQAVlmS5IWs":{"duration":92.16,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=WQAVlmS5IWs","annotations":[{"segment":[13.51488299531981,27.317316692667706],"label":"Washing face"},{"segment":[52.478003120124804,56.07238689547582],"label":"Washing face"}]},"c4AVX65S8Qc":{"duration":67.571,"subset":"testing","resolution":"656x480","url":"https://www.youtube.com/watch?v=c4AVX65S8Qc","annotations":[]},"cuyD4bT4Bvc":{"duration":89.003,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cuyD4bT4Bvc","annotations":[{"segment":[15.963112324492982,82.45294539781591],"label":"Rollerblading"}]},"xDD9rWISPpk":{"duration":12.841,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=xDD9rWISPpk","annotations":[{"segment":[0.01,12.726566708751928],"label":"Powerbocking"}]},"cT4EquMmRiw":{"duration":176.82,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cT4EquMmRiw","annotations":[{"segment":[23.520849797204278,78.05396661963418],"label":"Canoeing"},{"segment":[132.31166366013264,155.17150556044416],"label":"Canoeing"}]},"pgmfi9ADZuU":{"duration":11.076,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pgmfi9ADZuU","annotations":[]},"29k1TypoU4w":{"duration":86.704,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=29k1TypoU4w","annotations":[{"segment":[3.5827090483619344,82.55655304212168],"label":"Kneeling"}]},"T84iinm8-V0":{"duration":165.536,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T84iinm8-V0","annotations":[{"segment":[9.125756468122251,22.273032169056833],"label":"Doing fencing"},{"segment":[27.94440600083253,67.38623310363627],"label":"Doing fencing"},{"segment":[75.11992469242132,90.32951815036525],"label":"Doing fencing"},{"segment":[97.80542001952412,115.33512095410356],"label":"Doing fencing"}]},"vxwoaEGTzc0":{"duration":143.407,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vxwoaEGTzc0","annotations":[]},"pEmtlUeJaI8":{"duration":207.866,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=pEmtlUeJaI8","annotations":[]},"VR19Scunfhg":{"duration":137.672,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VR19Scunfhg","annotations":[{"segment":[0.01,113.28583272589329],"label":"Slacklining"}]},"S24MDjduWIY":{"duration":206.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=S24MDjduWIY","annotations":[{"segment":[17.429055366296286,40.02227528556925],"label":"Layup drill in basketball"},{"segment":[45.509200123106964,53.25544695257198],"label":"Layup drill in basketball"},{"segment":[59.06513207467074,64.8748171967695],"label":"Layup drill in basketball"},{"segment":[71.33002288799035,76.49418692199689],"label":"Layup drill in basketball"},{"segment":[82.94939256942277,92.95496132293292],"label":"Layup drill in basketball"},{"segment":[101.02396838221529,107.1564137472699],"label":"Layup drill in basketball"},{"segment":[113.6116193946958,140.07796254914194],"label":"Layup drill in basketball"},{"segment":[145.56488734945398,152.66561356162248],"label":"Layup drill in basketball"},{"segment":[157.8297780795632,174.9360730452418],"label":"Layup drill in basketball"}]},"DzfQl845nUw":{"duration":197.9,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=DzfQl845nUw","annotations":[]},"INMUrGdA_RA":{"duration":201.503,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=INMUrGdA_RA","annotations":[]},"BLTOTjVYiuE":{"duration":18.81,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BLTOTjVYiuE","annotations":[{"segment":[0.11694851794071763,10.028335413416537],"label":"Triple jump"}]},"OsrRpGbIpKA":{"duration":183.345,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OsrRpGbIpKA","annotations":[{"segment":[34.46649290189294,160.8913050399567],"label":"Hanging wallpaper"}]},"3hZjxdMcG6o":{"duration":11.24,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=3hZjxdMcG6o","annotations":[{"segment":[9.362459550702027,10.677411734789391],"label":"Layup drill in basketball"}]},"8RMrbKCQheM":{"duration":36.363,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=8RMrbKCQheM","annotations":[{"segment":[1.0179215035547657,3.95858611776748],"label":"Painting fence"},{"segment":[12.724028717824611,13.45919487137779],"label":"Painting fence"},{"segment":[19.62328031270829,33.195578532151586],"label":"Painting fence"}]},"p7VTBhv3OLY":{"duration":138.368,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p7VTBhv3OLY","annotations":[{"segment":[19.744729329173165,27.513147425897035],"label":"Hurling"},{"segment":[29.23946255850234,33.33946099843994],"label":"Hurling"},{"segment":[62.039450078003114,65.27629095163806],"label":"Hurling"},{"segment":[76.49733931357255,79.73418018720749],"label":"Hurling"}]},"wU-8acM-IUM":{"duration":210.03,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wU-8acM-IUM","annotations":[{"segment":[8.846585369578783,26.86740741872075],"label":"Playing saxophone"},{"segment":[46.52648601778471,65.20261068689547],"label":"Playing saxophone"},{"segment":[107.469629674883,118.60977421435258],"label":"Playing saxophone"},{"segment":[157.9279314124805,202.16085826037443],"label":"Playing saxophone"}]},"s9PkkDEl6aY":{"duration":204.87,"subset":"testing","resolution":"640x356","url":"https://www.youtube.com/watch?v=s9PkkDEl6aY","annotations":[]},"gSkE0KCvves":{"duration":14.722,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gSkE0KCvves","annotations":[{"segment":[0.2,12.26404810296412],"label":"Calf roping"}]},"S9QG5MAga68":{"duration":119.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=S9QG5MAga68","annotations":[{"segment":[82.27344912081915,94.95176388094376],"label":"Removing curlers"}]},"0N8iIUS660o":{"duration":128.987,"subset":"validation","resolution":"620x348","url":"https://www.youtube.com/watch?v=0N8iIUS660o","annotations":[{"segment":[12.251403426791278,98.42817757009347],"label":"Bullfighting"}]},"ac1DjqY4xHs":{"duration":30.604,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ac1DjqY4xHs","annotations":[{"segment":[0.01,30.6039],"label":"Hitting a pinata"}]},"RPKcM8q74FE":{"duration":75.256,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RPKcM8q74FE","annotations":[{"segment":[0.2,67.64835685048385],"label":"Riding bumper cars"}]},"wHxB-5jKjbQ":{"duration":196.719,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wHxB-5jKjbQ","annotations":[{"segment":[5.522461778471139,191.15797659906397],"label":"Canoeing"}]},"wLKePf07V14":{"duration":197.23,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wLKePf07V14","annotations":[{"segment":[22.6696162,98.5597756],"label":"Wrapping presents"},{"segment":[22.6696162,98.5597756],"label":"Wrapping presents"}]},"0vCynjlkM5A":{"duration":13.143,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0vCynjlkM5A","annotations":[{"segment":[0.01,13.142494],"label":"Changing car wheel"}]},"D32TzYSHM08":{"duration":63.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D32TzYSHM08","annotations":[{"segment":[4.183365279999999,47.80430112],"label":"Playing kickball"}]},"p9lhwYsBWdc":{"duration":55.54,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=p9lhwYsBWdc","annotations":[]},"GI8tylrKKlA":{"duration":199.367,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GI8tylrKKlA","annotations":[{"segment":[15.96174438443798,182.41105831590218],"label":"Clipping cat claws"}]},"y0ca-LbQErY":{"duration":77.65,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y0ca-LbQErY","annotations":[]},"KPRymTsPCC0":{"duration":105.257,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KPRymTsPCC0","annotations":[]},"8ao9LZLn0Hs":{"duration":138.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8ao9LZLn0Hs","annotations":[{"segment":[0.01,138.36721088435374],"label":"Playing violin"}]},"L61yQxW8HVE":{"duration":105.74000000000001,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=L61yQxW8HVE","annotations":[{"segment":[16.991573115985865,25.40487630933809],"label":"Playing flauta"},{"segment":[46.68558438664078,56.2536546849629],"label":"Playing flauta"},{"segment":[87.7622999777134,94.5259345647426],"label":"Playing flauta"}]},"V3ZDyR7mVpc":{"duration":213.531,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V3ZDyR7mVpc","annotations":[{"segment":[37.963422776911074,68.93358346333854],"label":"Waterskiing"},{"segment":[98.90470670826834,116.55436817472699],"label":"Waterskiing"},{"segment":[157.84939469578782,172.50208112324492],"label":"Waterskiing"},{"segment":[186.50446177847115,196.8288159126365],"label":"Waterskiing"}]},"UGjF8G0HLZA":{"duration":72.73,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UGjF8G0HLZA","annotations":[{"segment":[23.598725990639625,72.72492],"label":"Paintball"}]},"YULJjXegG1w":{"duration":116.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YULJjXegG1w","annotations":[{"segment":[6.353224024960999,116.3547619047619],"label":"Layup drill in basketball"}]},"N1JcXEim40g":{"duration":47.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N1JcXEim40g","annotations":[{"segment":[13.866369737909517,29.66068393135726],"label":"Using uneven bars"}]},"ziL8fjQI4O4":{"duration":19.83,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ziL8fjQI4O4","annotations":[]},"iwKW9WiQlLA":{"duration":13.05,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=iwKW9WiQlLA","annotations":[]},"iiQQ8xZvZok":{"duration":26.1,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=iiQQ8xZvZok","annotations":[{"segment":[2.117254148669348,22.516182658346334],"label":"Throwing darts"}]},"cFcrXdvfxoo":{"duration":15.047,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=cFcrXdvfxoo","annotations":[{"segment":[0,14.997566302652107],"label":"Kneeling"}]},"pcaPtKtS03E":{"duration":231.225,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pcaPtKtS03E","annotations":[{"segment":[1.6232595752809706,10.28064397677948],"label":"Decorating the Christmas tree"},{"segment":[21.463098828715054,40.220765031961825],"label":"Decorating the Christmas tree"},{"segment":[48.87814943346033,53.92829033433447],"label":"Decorating the Christmas tree"},{"segment":[100.10100714232651,106.95476979351284],"label":"Decorating the Christmas tree"}]},"Kkkrap77n5M":{"duration":37.57,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Kkkrap77n5M","annotations":[{"segment":[10.667268439937597,12.132552566302651],"label":"Chopping wood"}]},"YeZz5PZiiwU":{"duration":174.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=YeZz5PZiiwU","annotations":[{"segment":[57.56234002285262,67.92901737293981],"label":"Table soccer"},{"segment":[99.84747131926093,116.21590924045125],"label":"Table soccer"}]},"w4QM4nztkkE":{"duration":131.983,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w4QM4nztkkE","annotations":[{"segment":[36.021333853354136,84.6114992199688],"label":"Tug of war"}]},"BP9MfTepAv4":{"duration":132.38,"subset":"validation","resolution":"240x180","url":"https://www.youtube.com/watch?v=BP9MfTepAv4","annotations":[{"segment":[19.402722308892358,127.195624024961],"label":"Playing accordion"}]},"BSlVLi81VGM":{"duration":76.63,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=BSlVLi81VGM","annotations":[{"segment":[0.01,46.85245949453978],"label":"Shaving legs"}]},"yVx2NK2OOts":{"duration":36.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yVx2NK2OOts","annotations":[]},"bFm6E4cz5tM":{"duration":179.769,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bFm6E4cz5tM","annotations":[{"segment":[10.192504300787466,12.15260119954136],"label":"Wakeboarding"},{"segment":[22.51311337866909,138.7188580905071],"label":"Wakeboarding"}]},"7ezC4N3ArmQ":{"duration":111.712,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=7ezC4N3ArmQ","annotations":[{"segment":[0.01,111.71120181405895],"label":"Beer pong"}]},"FkRzwWhf8zQ":{"duration":165.512,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FkRzwWhf8zQ","annotations":[]},"bivzXu38CTE":{"duration":219.61,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=bivzXu38CTE","annotations":[]},"mdXSm8Yr6aU":{"duration":120.071,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mdXSm8Yr6aU","annotations":[{"segment":[0.2,114.84181077429446],"label":"Swinging at the playground"}]},"UgTqwLydrmU":{"duration":25.287,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UgTqwLydrmU","annotations":[]},"wOHj64rrjAo":{"duration":52.269,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wOHj64rrjAo","annotations":[]},"kW_2ihjROgw":{"duration":117.38,"subset":"training","resolution":"640x320","url":"https://www.youtube.com/watch?v=kW_2ihjROgw","annotations":[{"segment":[20.325705583325373,55.84991173796611],"label":"Smoking a cigarette"},{"segment":[72.8795557878315,108.22064691107643],"label":"Smoking a cigarette"}]},"DEEgoxzTvdk":{"duration":140.597,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DEEgoxzTvdk","annotations":[{"segment":[33.984173166926674,76.95770826833073],"label":"Wakeboarding"},{"segment":[101.29476131045242,124.99184087363496],"label":"Wakeboarding"}]},"VFC-V06hVj0":{"duration":108.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VFC-V06hVj0","annotations":[]},"Gi55CA6ktE8":{"duration":109.134,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gi55CA6ktE8","annotations":[{"segment":[0.01,96.84628549141966],"label":"Sumo"}]},"dfjl7sS1IGo":{"duration":206.17,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=dfjl7sS1IGo","annotations":[{"segment":[19.93488611544462,50.158745709828395],"label":"Blow-drying hair"},{"segment":[59.483127925116996,104.49738689547581],"label":"Blow-drying hair"},{"segment":[107.0696302652106,118.64472542901716],"label":"Blow-drying hair"},{"segment":[125.40903276131047,164.6395507020281],"label":"Blow-drying hair"}]},"pV1b0Fs6qW8":{"duration":31.324,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pV1b0Fs6qW8","annotations":[{"segment":[0.2,14.318017287051482],"label":"Kite flying"}]},"R8Boo7g8o7I":{"duration":64.67,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R8Boo7g8o7I","annotations":[]},"fUa3pwpNZ6I":{"duration":154.02,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fUa3pwpNZ6I","annotations":[{"segment":[18.880016100000002,60.640743900000004],"label":"Using the balance beam"},{"segment":[120.37909041809671,147.29018448361933],"label":"Using the balance beam"}]},"fV3gpTBX2Q8":{"duration":181.79,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fV3gpTBX2Q8","annotations":[]},"o4Y6P16eQoY":{"duration":53.15,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o4Y6P16eQoY","annotations":[]},"NJQROeaBiVE":{"duration":88.654,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NJQROeaBiVE","annotations":[{"segment":[7.88336368556783,71.08857674882995],"label":"Kneeling"}]},"_hzMRy2_q4g":{"duration":159.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_hzMRy2_q4g","annotations":[{"segment":[10.91563806552262,121.32768954758191],"label":"Making a lemonade"}]},"wjZhqbB0i_Y":{"duration":44.4,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=wjZhqbB0i_Y","annotations":[]},"1wjnveHAhGE":{"duration":131.076,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1wjnveHAhGE","annotations":[{"segment":[66.7349953271028,128.57200934579438],"label":"Elliptical trainer"}]},"jdAJ7cJBxnQ":{"duration":9.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jdAJ7cJBxnQ","annotations":[{"segment":[5.999260906396255,8.138648212168487],"label":"Playing water polo"}]},"z2GmaQXZU90":{"duration":82.62,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=z2GmaQXZU90","annotations":[]},"IToczVhbOc0":{"duration":15.12,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=IToczVhbOc0","annotations":[]},"kYrztBFfaZ8":{"duration":33.76,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kYrztBFfaZ8","annotations":[{"segment":[0,7.742485413416537],"label":"Cricket"},{"segment":[17.697109516380657,33.603440093603744],"label":"Cricket"}]},"RVKYgN2S9KA":{"duration":79.715,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RVKYgN2S9KA","annotations":[{"segment":[0.2,76.94623868954758],"label":"Doing crunches"}]},"jTpyOsXB2kk":{"duration":59.37,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jTpyOsXB2kk","annotations":[]},"-tQr3fi7nWE":{"duration":39.13,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-tQr3fi7nWE","annotations":[]},"aKLKf4Qd_4U":{"duration":21.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aKLKf4Qd_4U","annotations":[{"segment":[2.1020335413416538,21.276680967238686],"label":"Mowing the lawn"}]},"PlUAKvaRd8s":{"duration":29.118,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PlUAKvaRd8s","annotations":[{"segment":[0.2,28.573563644859814],"label":"Snow tubing"}]},"ZGL-PmMopeM":{"duration":133.144,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZGL-PmMopeM","annotations":[{"segment":[42.80592496378426,67.93904448720464],"label":"Skiing"},{"segment":[71.88556738757644,82.89428916229778],"label":"Skiing"},{"segment":[89.33335284185176,91.20275777230664],"label":"Skiing"},{"segment":[93.07216334328949,121.94409382846891],"label":"Skiing"}]},"bnItfkwBogY":{"duration":73.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bnItfkwBogY","annotations":[{"segment":[12.86913104524181,54.09590483619345],"label":"Hula hoop"}]},"QQtzcCmoHOA":{"duration":107.97,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=QQtzcCmoHOA","annotations":[]},"G-fsWVYrAgc":{"duration":169.645,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G-fsWVYrAgc","annotations":[{"segment":[34.39240249609984,151.34441809672387],"label":"Making a cake"}]},"slFZFCU1de0":{"duration":183.786,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=slFZFCU1de0","annotations":[]},"xzbIb0CCjTc":{"duration":144.07999999999998,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=xzbIb0CCjTc","annotations":[]},"iyGv338SnrQ":{"duration":70.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iyGv338SnrQ","annotations":[{"segment":[1.8847882914860095,60.59098124475704],"label":"Playing harmonica"}]},"eKiRykHu734":{"duration":151.302,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eKiRykHu734","annotations":[{"segment":[43.18942745709828,118.23990795631825],"label":"Wakeboarding"}]},"0D7IzGA1yNE":{"duration":90.349,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=0D7IzGA1yNE","annotations":[]},"12qgT1JASV4":{"duration":159.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=12qgT1JASV4","annotations":[{"segment":[2.9824419752300293,27.3390514396086],"label":"Doing kickboxing"},{"segment":[32.309788064991984,54.42956575975039],"label":"Doing kickboxing"},{"segment":[74.31251215600624,137.93794062402495],"label":"Doing kickboxing"}]},"YvMAOuGCGFA":{"duration":114.381,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=YvMAOuGCGFA","annotations":[]},"3zpODTqnYLs":{"duration":31.42,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3zpODTqnYLs","annotations":[]},"r9OvUc28Qi8":{"duration":69.567,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r9OvUc28Qi8","annotations":[{"segment":[9.659066282371294,38.31067862558502],"label":"Arm wrestling"},{"segment":[44.49682219968798,61.4273204024961],"label":"Arm wrestling"}]},"9Zn0zErRckc":{"duration":23.04,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9Zn0zErRckc","annotations":[{"segment":[2.239274570982839,14.38509984399376],"label":"Long jump"}]},"dvzwVrZkDiI":{"duration":10.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dvzwVrZkDiI","annotations":[{"segment":[0.01,9.560864274570983],"label":"Drinking coffee"}]},"Alv7N6Ynm1Y":{"duration":197.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Alv7N6Ynm1Y","annotations":[{"segment":[86.07840045846731,111.37742855020323],"label":"Playing saxophone"},{"segment":[117.54792320672419,133.28268458085262],"label":"Playing saxophone"}]},"z8lxaUC1Shk":{"duration":75.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=z8lxaUC1Shk","annotations":[{"segment":[10.120904836193448,69.32527301092044],"label":"Playing bagpipes"}]},"o4_Vi7asvxg":{"duration":88.561,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o4_Vi7asvxg","annotations":[{"segment":[0.01,73.29986419554477],"label":"BMX"}]},"9I4H8O6B7yM":{"duration":139.71,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9I4H8O6B7yM","annotations":[{"segment":[13.07653666146646,135.34215444617786],"label":"Doing motocross"}]},"IcDadC2tw5c":{"duration":30.256,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=IcDadC2tw5c","annotations":[{"segment":[0.2,30.05560090702948],"label":"Blowing leaves"}]},"IIAaKRUze8A":{"duration":188.268,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IIAaKRUze8A","annotations":[{"segment":[52.27859302011016,54.624598205154065],"label":"Fixing the roof"},{"segment":[98.02569412846628,105.06370968359799],"label":"Fixing the roof"},{"segment":[130.57651607095045,138.78753340786784],"label":"Fixing the roof"}]},"4HC2-Unzuu4":{"duration":71.38,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4HC2-Unzuu4","annotations":[{"segment":[13.682358814352575,61.181279251170054],"label":"Mixing drinks"}]},"PmyAt4zuLRY":{"duration":15.535,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PmyAt4zuLRY","annotations":[{"segment":[0.01,15.180879250628083],"label":"Beer pong"}]},"K_AFdv3PKp0":{"duration":78.53,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=K_AFdv3PKp0","annotations":[{"segment":[2.327718900156006,77.30476978939157],"label":"Plastering"}]},"KRz3aBw_TlQ":{"duration":188.338,"subset":"training","resolution":"568x480","url":"https://www.youtube.com/watch?v=KRz3aBw_TlQ","annotations":[{"segment":[9.989769174440447,18.21663790633258],"label":"Throwing darts"},{"segment":[30.263124263746064,36.72709255308988],"label":"Throwing darts"},{"segment":[45.24777802540673,52.005563055175266],"label":"Throwing darts"},{"segment":[61.99533222961572,71.39746792320673],"label":"Throwing darts"},{"segment":[77.56761947212583,88.43883886784043],"label":"Throwing darts"},{"segment":[99.89769174440447,108.41837686427458],"label":"Throwing darts"},{"segment":[114.58852839313573,122.22776361934478],"label":"Throwing darts"},{"segment":[131.33608254290172,139.85676798751953],"label":"Throwing darts"},{"segment":[146.32073625585022,153.07852126365057],"label":"Throwing darts"},{"segment":[167.7693582371295,173.64569302652106],"label":"Throwing darts"}]},"HQUjviIH9PU":{"duration":193.585,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HQUjviIH9PU","annotations":[]},"C_fqFJyf5SU":{"duration":172.85,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=C_fqFJyf5SU","annotations":[{"segment":[89.19032796705103,167.53781403312857],"label":"Hula hoop"}]},"jwyLw2n_YVw":{"duration":171.875,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jwyLw2n_YVw","annotations":[{"segment":[34.589310070362,103.76792924180967],"label":"Baking cookies"}]},"e0a1lp4ZWu8":{"duration":89.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e0a1lp4ZWu8","annotations":[{"segment":[5.06187013,75.59172098829953],"label":"Horseback riding"}]},"FNlEHAIh6LQ":{"duration":173.825,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FNlEHAIh6LQ","annotations":[{"segment":[71.31960167114168,112.53853495636424],"label":"Braiding hair"}]},"763huDov1lM":{"duration":184.065,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=763huDov1lM","annotations":[]},"FrvK75jGdjE":{"duration":140.34,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=FrvK75jGdjE","annotations":[{"segment":[0.2,136.18149801248052],"label":"Playing violin"}]},"-ROljbRVlos":{"duration":7.222,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=-ROljbRVlos","annotations":[{"segment":[0.01,6.880207805277031],"label":"Powerbocking"}]},"uZw4m7-9A7o":{"duration":61.88,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=uZw4m7-9A7o","annotations":[]},"CHaTWk6uqd8":{"duration":230.76,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CHaTWk6uqd8","annotations":[{"segment":[9.89599581123245,188.02392041341653],"label":"Fixing bicycle"}]},"_LAABOgXU1s":{"duration":129.684,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_LAABOgXU1s","annotations":[{"segment":[27.51474545512433,129.48346938775512],"label":"Changing car wheel"}]},"RZqM8Z0ocjE":{"duration":62.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RZqM8Z0ocjE","annotations":[{"segment":[14.701807332293292,24.958882215288607],"label":"Dodgeball"},{"segment":[34.141406396255846,42.933184867394694],"label":"Dodgeball"}]},"chMp_uvII5g":{"duration":45.419,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=chMp_uvII5g","annotations":[{"segment":[3.3301979121684866,45.4182311],"label":"Snow tubing"}]},"s1YjWVUu6pM":{"duration":197.091,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s1YjWVUu6pM","annotations":[{"segment":[59.640926677067085,181.9970546021841],"label":"Kneeling"}]},"rX2tRvv2ZfU":{"duration":217.64,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rX2tRvv2ZfU","annotations":[{"segment":[0,217.640633],"label":"Preparing salad"}]},"K-ihmOEk7bM":{"duration":45.999,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=K-ihmOEk7bM","annotations":[]},"tRFK1BMKaRo":{"duration":36.08,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=tRFK1BMKaRo","annotations":[]},"ZrSK-fisXGg":{"duration":100.45,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZrSK-fisXGg","annotations":[]},"y4PXBhxpZHk":{"duration":139.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=y4PXBhxpZHk","annotations":[{"segment":[10.845872274143302,44.69141121495327],"label":"Hurling"},{"segment":[55.32189252336449,67.68796261682243],"label":"Hurling"},{"segment":[73.11167757009346,90.90146261682244],"label":"Hurling"},{"segment":[100.01330373831776,107.38955607476636],"label":"Hurling"}]},"wgAFLxYOmro":{"duration":120.582,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wgAFLxYOmro","annotations":[]},"DfiPwDSjjHE":{"duration":203.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DfiPwDSjjHE","annotations":[{"segment":[40.03491528,182.143026],"label":"Wrapping presents"}]},"RYv3eIxMouY":{"duration":9.706,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=RYv3eIxMouY","annotations":[{"segment":[6.253593166926677,9.191116349453978],"label":"Hitting a pinata"}]},"Cl96RZAFcZo":{"duration":17.137,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Cl96RZAFcZo","annotations":[{"segment":[2.4194032808112325,16.45461568330733],"label":"Hand car wash"}]},"N9LTq3gYmsI":{"duration":35.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=N9LTq3gYmsI","annotations":[{"segment":[18.645758566978195,33.75048286604361],"label":"Snow tubing"}]},"yipNcBb7q3Y":{"duration":131.03,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yipNcBb7q3Y","annotations":[]},"I5o7mRBAXdE":{"duration":56.38,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=I5o7mRBAXdE","annotations":[]},"VOyKKN3NdXM":{"duration":227.185,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VOyKKN3NdXM","annotations":[{"segment":[74.78288869786084,78.6815227058062],"label":"Hand car wash"},{"segment":[80.0992077996045,81.87131416685239],"label":"Hand car wash"},{"segment":[87.18763326859606,90.73184600309183],"label":"Hand car wash"},{"segment":[116.25017769146139,119.4399691525076],"label":"Hand car wash"},{"segment":[132.90797754359156,149.2113561222721],"label":"Hand car wash"},{"segment":[189.26095978783152,207.69086598439938],"label":"Hand car wash"}]},"KRSBbX-itrY":{"duration":43.329,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KRSBbX-itrY","annotations":[{"segment":[1.4847688275725657,42.5183800623053],"label":"River tubing"}]},"5Y1AJsAE9UE":{"duration":200.32,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=5Y1AJsAE9UE","annotations":[{"segment":[104.90504397168264,135.47753306825584],"label":"Spinning"},{"segment":[171.35341313056114,192.8789411679443],"label":"Spinning"}]},"r9X01daYa5o":{"duration":35.619,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r9X01daYa5o","annotations":[{"segment":[0.2,35.361],"label":"Welding"}]},"pCcB9E2CoD0":{"duration":147.052,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pCcB9E2CoD0","annotations":[]},"Pu5p7SC3sqg":{"duration":28.89,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Pu5p7SC3sqg","annotations":[{"segment":[0.2703749297971919,8.42668531201248],"label":"Shoveling snow"}]},"Zts8FynhoJs":{"duration":20.09,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Zts8FynhoJs","annotations":[{"segment":[0.01,20.069591871294854],"label":"Brushing teeth"}]},"5aUWT0CmJlc":{"duration":79.39,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5aUWT0CmJlc","annotations":[{"segment":[26.008684118564744,45.20557001560063],"label":"Curling"}]},"nR7tqWpNpe8":{"duration":168.067,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nR7tqWpNpe8","annotations":[{"segment":[19.73853539849746,71.04825451565048],"label":"Rafting"},{"segment":[80.99605722904613,106.65091685832961],"label":"Rafting"},{"segment":[113.71909247047915,135.1854035888592],"label":"Rafting"}]},"mHbsKzezfFQ":{"duration":179.63,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=mHbsKzezfFQ","annotations":[{"segment":[28.443590372186318,147.5423677926709],"label":"Getting a haircut"}]},"z6g5QbIPatk":{"duration":204.986,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=z6g5QbIPatk","annotations":[{"segment":[6.586366826928051,170.41115912636505],"label":"Carving jack-o-lanterns"}]},"WoB4lSNBDww":{"duration":155.783,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WoB4lSNBDww","annotations":[{"segment":[17.49352886115445,139.23721216848674],"label":"Hanging wallpaper"}]},"A1U24_iC6ww":{"duration":91.046,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=A1U24_iC6ww","annotations":[{"segment":[10.08458889703588,90.90333653666147],"label":"Plastering"}]},"Z5apXICOLTc":{"duration":64.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z5apXICOLTc","annotations":[]},"teZ99ahe1zY":{"duration":40.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=teZ99ahe1zY","annotations":[{"segment":[0.4546524,36.40900468018721],"label":"Playing polo"}]},"7OcfzMIZsxA":{"duration":176.263,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=7OcfzMIZsxA","annotations":[{"segment":[0.01,11.821377535101403],"label":"Having an ice cream"},{"segment":[53.60857254290172,56.082814352574104],"label":"Having an ice cream"},{"segment":[91.27203120124805,94.57102028081123],"label":"Having an ice cream"},{"segment":[140.7568673946958,146.27178159126368],"label":"Having an ice cream"},{"segment":[151.49577379095166,157.26965990639627],"label":"Having an ice cream"},{"segment":[164.41828081123248,166.8928034321373],"label":"Having an ice cream"}]},"HV_yqsiFoKA":{"duration":30.79,"subset":"training","resolution":"640x358","url":"https://www.youtube.com/watch?v=HV_yqsiFoKA","annotations":[{"segment":[0.01,29.917707861273456],"label":"Doing a powerbomb"}]},"k5zENHXMSmI":{"duration":206.774,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=k5zENHXMSmI","annotations":[]},"5czN-C_DwmU":{"duration":126.967,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=5czN-C_DwmU","annotations":[]},"g0RK-2ydod0":{"duration":186.55,"subset":"training","resolution":"562x480","url":"https://www.youtube.com/watch?v=g0RK-2ydod0","annotations":[{"segment":[38.68411856474259,181.49541341653668],"label":"Preparing salad"}]},"FaiSWZFSHSE":{"duration":180.512,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=FaiSWZFSHSE","annotations":[{"segment":[26.752934290171606,82.79329138221527],"label":"Making a cake"},{"segment":[91.80480609048362,150.66126152886113],"label":"Making a cake"}]},"B7Ddfw2PXOI":{"duration":80.69,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=B7Ddfw2PXOI","annotations":[{"segment":[7.426946140405616,74.14358096099843],"label":"Playing bagpipes"}]},"jlqC3lFqxhQ":{"duration":201.688,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jlqC3lFqxhQ","annotations":[{"segment":[7.86616716369335,15.7323343273867],"label":"Snowboarding"},{"segment":[20.45203462560271,40.274775878109956],"label":"Snowboarding"},{"segment":[44.67982948977823,49.71417647454197],"label":"Snowboarding"},{"segment":[60.726810503712656,68.27833098085827],"label":"Snowboarding"},{"segment":[76.7737915176471,191.9344787207488],"label":"Snowboarding"}]},"TIEzvhv6xaI":{"duration":19.25,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=TIEzvhv6xaI","annotations":[{"segment":[0.5778618576693313,18.987667736174007],"label":"Volleyball"}]},"GsR4fagoV-Q":{"duration":118.0,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=GsR4fagoV-Q","annotations":[{"segment":[13.991091120124807,17.857050508580343],"label":"Ping-pong"},{"segment":[29.45492867394696,43.2619264898596],"label":"Ping-pong"},{"segment":[56.884831001560066,62.22353682371295],"label":"Ping-pong"},{"segment":[66.64177612480499,78.42374759438378],"label":"Ping-pong"},{"segment":[86.5238529797192,111.37644904836195],"label":"Ping-pong"}]},"Nb6AQxxrihA":{"duration":149.769,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Nb6AQxxrihA","annotations":[]},"RAluocUocdw":{"duration":87.261,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RAluocUocdw","annotations":[{"segment":[0.01,87.201],"label":"Surfing"}]},"wLcpMJv83XY":{"duration":168.995,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=wLcpMJv83XY","annotations":[{"segment":[28.200759750390016,166.041856474259],"label":"Playing ten pins"}]},"iwHZqcYwZYU":{"duration":11.59,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iwHZqcYwZYU","annotations":[{"segment":[3.2536912324492975,6.67006702652106],"label":"Springboard diving"}]},"TxHDsb8V0Gg":{"duration":208.724,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TxHDsb8V0Gg","annotations":[]},"MsalIjwP3no":{"duration":63.414,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=MsalIjwP3no","annotations":[{"segment":[3.0572684492731774,30.474075874624887],"label":"Plastering"},{"segment":[41.02658810635117,60.0605587673154],"label":"Plastering"}]},"7mDiIJ9r4EU":{"duration":152.184,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7mDiIJ9r4EU","annotations":[{"segment":[45.814466458658345,140.52934789391577],"label":"Wakeboarding"}]},"6-QyMZa-hFM":{"duration":95.62,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=6-QyMZa-hFM","annotations":[]},"9Rd8PpfRNmY":{"duration":73.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9Rd8PpfRNmY","annotations":[{"segment":[3.9055166677067086,36.41319951950078],"label":"Doing nails"},{"segment":[48.244617659906396,73.430476],"label":"Doing nails"}]},"-5c9WHk408g":{"duration":142.5,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-5c9WHk408g","annotations":[{"segment":[0.01,142.5007029478458],"label":"Paintball"}]},"TMAXjnsN0Mk":{"duration":207.98,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TMAXjnsN0Mk","annotations":[]},"tnXjdv_GM9g":{"duration":56.17,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=tnXjdv_GM9g","annotations":[{"segment":[13.032331379999999,49.401758879999996],"label":"Long jump"}]},"0AjYz-s4Rek":{"duration":81.851,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0AjYz-s4Rek","annotations":[{"segment":[29.734216848673945,37.01730109204368],"label":"Ice fishing"},{"segment":[45.186636505460214,51.696575663026515],"label":"Ice fishing"},{"segment":[57.56828549141966,63.05705772230889],"label":"Ice fishing"}]},"9_wuEvN2OnQ":{"duration":184.6,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9_wuEvN2OnQ","annotations":[{"segment":[1.7279123818013946,182.2947548424337],"label":"Playing badminton"}]},"7D5ItfgJFVw":{"duration":47.69,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=7D5ItfgJFVw","annotations":[{"segment":[0.01,11.011981560062402],"label":"Washing face"}]},"R9qRR8CcSJA":{"duration":70.06,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=R9qRR8CcSJA","annotations":[{"segment":[14.53551048708615,70.05460317460317],"label":"Cleaning windows"}]},"GMwV9roiBYo":{"duration":166.488,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GMwV9roiBYo","annotations":[{"segment":[0,166.487073],"label":"Playing ice hockey"}]},"4bUxtqX_oxM":{"duration":19.784,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4bUxtqX_oxM","annotations":[{"segment":[0.2,19.58278911564626],"label":"Camel ride"}]},"T3ngvDyxYiE":{"duration":122.486,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=T3ngvDyxYiE","annotations":[{"segment":[0.2,119.76678159126365],"label":"Tug of war"}]},"svWiQtzgtOc":{"duration":82.62,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=svWiQtzgtOc","annotations":[{"segment":[9.349408400936037,68.7987500951638],"label":"Breakdancing"}]},"wsQCNAdwFSY":{"duration":170.62,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=wsQCNAdwFSY","annotations":[]},"LbGr5TjfTBY":{"duration":225.582,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LbGr5TjfTBY","annotations":[{"segment":[134.7496770670827,166.76591887675505],"label":"Sharpening knives"}]},"0r-_a6m5k-0":{"duration":46.65,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=0r-_a6m5k-0","annotations":[{"segment":[0.6913641599063963,29.874209225429016],"label":"Preparing salad"}]},"GcjzvBatqb4":{"duration":57.423,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GcjzvBatqb4","annotations":[]},"6URUyopL8qM":{"duration":79.691,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6URUyopL8qM","annotations":[{"segment":[7.832333274570982,77.45307349297971],"label":"River tubing"}]},"8YkCDiVc7RI":{"duration":107.75999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8YkCDiVc7RI","annotations":[{"segment":[19.33360049921997,76.66192893603744],"label":"Tumbling"},{"segment":[82.54606821840873,91.12009974414977],"label":"Tumbling"}]},"kDurnkUZCXs":{"duration":29.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kDurnkUZCXs","annotations":[{"segment":[0.6074861466458659,29.01914592823713],"label":"Cleaning shoes"}]},"ry-AEtNb4c0":{"duration":188.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ry-AEtNb4c0","annotations":[{"segment":[45.61471302652106,168.33300549141967],"label":"Painting"}]},"5SzzJMuyC_M":{"duration":91.557,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5SzzJMuyC_M","annotations":[{"segment":[3.7828026521060845,81.45511778471139],"label":"Plastering"}]},"kZMDKbfIis0":{"duration":76.394,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kZMDKbfIis0","annotations":[{"segment":[4.2904403196536025,56.13325976911076],"label":"Curling"}]},"73LZVxxlJV8":{"duration":62.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=73LZVxxlJV8","annotations":[{"segment":[16.16065912792512,52.7063442074883],"label":"Cleaning windows"}]},"9SIPJd4Hls4":{"duration":96.87,"subset":"testing","resolution":"2276x720","url":"https://www.youtube.com/watch?v=9SIPJd4Hls4","annotations":[]},"7ZbH4vHTmVs":{"duration":105.582,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7ZbH4vHTmVs","annotations":[{"segment":[0.01,18.694943346033163],"label":"Raking leaves"},{"segment":[24.624617006448965,28.083593308358182],"label":"Raking leaves"},{"segment":[39.613514314722245,96.43955447379024],"label":"Raking leaves"}]},"k3oPZS_Id3M":{"duration":131.5,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k3oPZS_Id3M","annotations":[{"segment":[52.92606492831142,100.10821583339525],"label":"Hand washing clothes"}]},"YIb8sArOPww":{"duration":79.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YIb8sArOPww","annotations":[{"segment":[3.1071401326053043,76.18707605148207],"label":"Using the balance beam"}]},"EHXpUlacN88":{"duration":6.665,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=EHXpUlacN88","annotations":[{"segment":[0.01,6.52295546260755],"label":"Doing a powerbomb"}]},"RW6jDO5QmPI":{"duration":31.46,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RW6jDO5QmPI","annotations":[]},"IV5llWEEfAw":{"duration":73.86,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=IV5llWEEfAw","annotations":[{"segment":[3.0516341653666146,68.92086973478939],"label":"Sailing"}]},"7pcnWE7jpQY":{"duration":85.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7pcnWE7jpQY","annotations":[{"segment":[0,85.781],"label":"High jump"}]},"g8Yx4qWzNH0":{"duration":14.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g8Yx4qWzNH0","annotations":[{"segment":[5.140267394695788,12.944127893915757],"label":"Clean and jerk"}]},"nN1u9BVysQs":{"duration":140.899,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nN1u9BVysQs","annotations":[{"segment":[37.14801497035881,76.93375881435257],"label":"Capoeira"}]},"jIYRVlrXJgc":{"duration":10.03,"subset":"training","resolution":"326x240","url":"https://www.youtube.com/watch?v=jIYRVlrXJgc","annotations":[{"segment":[0.9389409360374416,3.53667752574103],"label":"Playing racquetball"}]},"i5-OVkjT0nM":{"duration":201.643,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i5-OVkjT0nM","annotations":[{"segment":[25.165924841075277,165.1513817695565],"label":"Making a lemonade"}]},"Vwfi0Nya4UQ":{"duration":188.55,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Vwfi0Nya4UQ","annotations":[]},"66nA52ux2Sk":{"duration":216.341,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=66nA52ux2Sk","annotations":[{"segment":[0.2,30.70731825273011],"label":"Playing drums"},{"segment":[52.64111700468018,75.5872449297972],"label":"Playing drums"},{"segment":[137.00188143525742,145.77540093603744],"label":"Playing drums"}]},"k6pN0WRr2Ws":{"duration":210.65,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=k6pN0WRr2Ws","annotations":[]},"roTIeCtx94Q":{"duration":104.328,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=roTIeCtx94Q","annotations":[{"segment":[16.387327041852252,72.20159701880625],"label":"Baking cookies"}]},"w87EDMJo5NM":{"duration":88.35,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=w87EDMJo5NM","annotations":[{"segment":[3.4458638889773283,12.40511000031838],"label":"Shaving legs"},{"segment":[34.59647344533238,47.13941800120985],"label":"Shaving legs"},{"segment":[68.36593869578783,78.29002657098285],"label":"Shaving legs"}]},"n9TuUTNpKwg":{"duration":88.47,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n9TuUTNpKwg","annotations":[{"segment":[13.104516380655229,19.44986115444618],"label":"Triple jump"},{"segment":[44.14152886115445,62.34990951638066],"label":"Triple jump"}]},"DhYJl8KAZN4":{"duration":230.76,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=DhYJl8KAZN4","annotations":[{"segment":[11.15999342014497,217.79987069422776],"label":"Playing congas"}]},"_AdJtwjeITM":{"duration":60.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_AdJtwjeITM","annotations":[{"segment":[6.283623848673947,31.32433381279251],"label":"Removing curlers"}]},"m73Cod-PmMQ":{"duration":105.396,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=m73Cod-PmMQ","annotations":[]},"N5XBi-uPkAU":{"duration":12.377,"subset":"validation","resolution":"642x480","url":"https://www.youtube.com/watch?v=N5XBi-uPkAU","annotations":[{"segment":[0.6353400936037441,10.45423244929797],"label":"Doing a powerbomb"}]},"bscUMcCmyYw":{"duration":14.4,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=bscUMcCmyYw","annotations":[{"segment":[0.30361164,12.838502340093605],"label":"Hopscotch"}]},"e-44ig51evc":{"duration":197.022,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e-44ig51evc","annotations":[{"segment":[5.839931265277822,196.82083900226758],"label":"Futsal"}]},"-ypKYhjrlXw":{"duration":212.811,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-ypKYhjrlXw","annotations":[{"segment":[45.4837612652106,212.810883],"label":"Applying sunscreen"}]},"uLT7PmD_AJA":{"duration":120.884,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uLT7PmD_AJA","annotations":[{"segment":[8.10633853354134,92.955496099844],"label":"Braiding hair"}]},"evj6y2xZCnM":{"duration":214.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=evj6y2xZCnM","annotations":[{"segment":[14.397397251170046,205.91626301092043],"label":"Skateboarding"}]},"aSPPD0fDI3g":{"duration":197.79,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aSPPD0fDI3g","annotations":[{"segment":[12.031383210000001,180.75189746645867],"label":"Putting on makeup"}]},"GfSGZZSGH6g":{"duration":7.756,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=GfSGZZSGH6g","annotations":[{"segment":[0.2,6.558894462616823],"label":"Roof shingle removal"}]},"Zf3RcqHO82M":{"duration":211.57999999999998,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zf3RcqHO82M","annotations":[{"segment":[2.697645,196.39662234108414],"label":"Tango"}]},"loUYsxA2RHg":{"duration":86.7,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=loUYsxA2RHg","annotations":[{"segment":[8.584552716068643,76.31464579875195],"label":"Mowing the lawn"}]},"kxwTPAhlhpw":{"duration":116.22,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=kxwTPAhlhpw","annotations":[{"segment":[41.33731484555382,63.3116077],"label":"Mixing drinks"}]},"oqVNFPUANfs":{"duration":124.9,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=oqVNFPUANfs","annotations":[{"segment":[6.428101404056163,86.68197347893916],"label":"Using the balance beam"},{"segment":[109.0829329173167,117.84852574102965],"label":"Using the balance beam"}]},"xxXspvK4tEY":{"duration":95.76,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=xxXspvK4tEY","annotations":[{"segment":[1.9427191887675508,93.39996099843994],"label":"Knitting"}]},"Io8EwPbNUbE":{"duration":84.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Io8EwPbNUbE","annotations":[{"segment":[4.726003338533541,63.01337784711388],"label":"Washing dishes"}]},"FrDKwP9XaOo":{"duration":99.243,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FrDKwP9XaOo","annotations":[{"segment":[0.01,98.04642141962498],"label":"Tug of war"}]},"CQXhtaNkhrw":{"duration":136.98,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=CQXhtaNkhrw","annotations":[{"segment":[0.010000000000000002,117.50007800312014],"label":"Playing accordion"}]},"Ve0Ufrv-T0s":{"duration":76.3,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=Ve0Ufrv-T0s","annotations":[{"segment":[0,70.46775026521061],"label":"Walking the dog"}]},"3j4OdbgaoAM":{"duration":127.66,"subset":"training","resolution":"426x238","url":"https://www.youtube.com/watch?v=3j4OdbgaoAM","annotations":[{"segment":[33.144409516380655,38.91730967238689],"label":"Playing polo"},{"segment":[49.46778237129485,77.33695553822153],"label":"Playing polo"}]},"6IoiW8zpps0":{"duration":197.18,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=6IoiW8zpps0","annotations":[]},"Z-gDuGMpClo":{"duration":187.571,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Z-gDuGMpClo","annotations":[{"segment":[24.872882153381376,158.601200099844],"label":"Hand car wash"}]},"OiNsqyHkg7E":{"duration":149.37,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=OiNsqyHkg7E","annotations":[]},"Ygt3z-K-ZMQ":{"duration":99.06,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=Ygt3z-K-ZMQ","annotations":[{"segment":[32.334249609984404,89.13140405616225],"label":"Mixing drinks"}]},"p771liKjycc":{"duration":46.186,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=p771liKjycc","annotations":[{"segment":[4.2913266411237405,14.027950112580966],"label":"Using the monkey bar"},{"segment":[34.00605901327468,42.01172720091729],"label":"Using the monkey bar"}]},"BdAPzcdrk9g":{"duration":55.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BdAPzcdrk9g","annotations":[{"segment":[1.7250143837753509,53.64794733541341],"label":"Playing badminton"}]},"ZpyCrs-q-so":{"duration":45.28,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZpyCrs-q-so","annotations":[{"segment":[8.885063962558503,11.000555382215289],"label":"Chopping wood"},{"segment":[20.167684867394694,36.5980015600624],"label":"Chopping wood"}]},"BGeB6KEQM94":{"duration":212.184,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BGeB6KEQM94","annotations":[{"segment":[1.489589926454201,9.10304955055345],"label":"Longboarding"},{"segment":[41.54300794888938,53.79074734417947],"label":"Longboarding"},{"segment":[105.42986479459178,114.03638436966048],"label":"Longboarding"},{"segment":[171.3028415422331,190.8330192074883],"label":"Longboarding"}]},"VopRFLr6Zlg":{"duration":206.635,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VopRFLr6Zlg","annotations":[]},"9UpVdljXQ4E":{"duration":46.464,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9UpVdljXQ4E","annotations":[{"segment":[5.865992199687987,42.963358814352574],"label":"Rollerblading"}]},"ShT4biEuQDo":{"duration":22.29,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ShT4biEuQDo","annotations":[{"segment":[0,3.0602521684867394],"label":"Shoveling snow"}]},"iubDO1DSMZk":{"duration":77.927,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iubDO1DSMZk","annotations":[{"segment":[7.271444707625315,33.44865148203528],"label":"Decorating the Christmas tree"},{"segment":[40.11414399369936,45.082965684212574],"label":"Decorating the Christmas tree"},{"segment":[53.32393824408816,66.04896940271958],"label":"Decorating the Christmas tree"}]},"pYUqRy9Ujaw":{"duration":71.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pYUqRy9Ujaw","annotations":[{"segment":[24.4252008,56.9195236474259],"label":"Windsurfing"},{"segment":[1.775267794071763,21.03115119],"label":"Windsurfing"}]},"j_q_MOjw80Q":{"duration":139.761,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j_q_MOjw80Q","annotations":[{"segment":[22.239645112335104,122.53608295163806],"label":"Baking cookies"}]},"v6P6bBPBXYw":{"duration":223.005,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=v6P6bBPBXYw","annotations":[]},"rSTXxDLYxTQ":{"duration":214.74,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rSTXxDLYxTQ","annotations":[{"segment":[8.375122487892712,211.05308650858035],"label":"Polishing shoes"}]},"eGLD-0b1LV0":{"duration":112.014,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=eGLD-0b1LV0","annotations":[{"segment":[32.30169266770671,79.98280499219969],"label":"Elliptical trainer"}]},"kpGgixgrvn8":{"duration":226.743,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=kpGgixgrvn8","annotations":[]},"G122-qoH3dA":{"duration":117.283,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=G122-qoH3dA","annotations":[]},"qYVMKC4LA1U":{"duration":105.094,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qYVMKC4LA1U","annotations":[]},"R7BkxDWH74A":{"duration":215.34,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=R7BkxDWH74A","annotations":[]},"rBgMeunuviE":{"duration":92.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rBgMeunuviE","annotations":[{"segment":[12.762486559999997,30.34473536],"label":"Playing accordion"},{"segment":[12.762486559999997,30.344735359999998],"label":"Playing accordion"}]},"qRPq2PEiyM0":{"duration":56.59,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qRPq2PEiyM0","annotations":[{"segment":[0.6179550639625585,53.32069409048361],"label":"Playing saxophone"}]},"wQeY9FcRTLY":{"duration":54.13,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=wQeY9FcRTLY","annotations":[]},"Cy2wqpjppy8":{"duration":64.18,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=Cy2wqpjppy8","annotations":[{"segment":[17.709112324492978,61.23150702028081],"label":"Building sandcastles"}]},"Y-CZasxVlx4":{"duration":238.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y-CZasxVlx4","annotations":[{"segment":[7.798862640219895,223.19602058814354],"label":"Playing guitarra"}]},"5hXH-TorJ6M":{"duration":70.33,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5hXH-TorJ6M","annotations":[{"segment":[4.715979719188767,68.87523868954759],"label":"Mixing drinks"}]},"Eb_9_Bcij0Q":{"duration":56.12,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Eb_9_Bcij0Q","annotations":[{"segment":[3.430613484546032,53.92098420105694],"label":"Shaving"}]},"H2l7GraYFhI":{"duration":189.1,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=H2l7GraYFhI","annotations":[{"segment":[16.815738865834632,179.0728682730109],"label":"Playing saxophone"}]},"Gk_sOXqbwZE":{"duration":17.16,"subset":"testing","resolution":"608x1080","url":"https://www.youtube.com/watch?v=Gk_sOXqbwZE","annotations":[]},"NCKTeRW3OhQ":{"duration":66.78,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NCKTeRW3OhQ","annotations":[]},"U1nvAxorOPQ":{"duration":115.28999999999999,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=U1nvAxorOPQ","annotations":[{"segment":[59.47310469000001,85.99718478000001],"label":"Grooming horse"},{"segment":[8.63960778,50.107188810000004],"label":"Grooming horse"}]},"RCyZgp1F06s":{"duration":36.642,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RCyZgp1F06s","annotations":[{"segment":[0.2,34.361704148416756],"label":"Swinging at the playground"}]},"rfH9VLQAuwY":{"duration":39.242,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rfH9VLQAuwY","annotations":[{"segment":[2.0202446583463343,12.243907020280812],"label":"Shuffleboard"},{"segment":[18.182201925117006,25.95708288299532],"label":"Shuffleboard"}]},"bMJlN9iPpCI":{"duration":189.522,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bMJlN9iPpCI","annotations":[{"segment":[0,7.687291116134442],"label":"Applying sunscreen"},{"segment":[96.97813408046525,99.63911946681948],"label":"Applying sunscreen"},{"segment":[115.9006968278731,117.37902204251435],"label":"Applying sunscreen"}]},"-D1gdv_gQyw":{"duration":18.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-D1gdv_gQyw","annotations":[]},"7LmSZAoD6-c":{"duration":109.58,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7LmSZAoD6-c","annotations":[{"segment":[0.01,109.57430839002268],"label":"High jump"}]},"svNcNLEPKMc":{"duration":79.51,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=svNcNLEPKMc","annotations":[]},"ZxCdhz4ep0s":{"duration":217.76,"subset":"testing","resolution":"480x270","url":"https://www.youtube.com/watch?v=ZxCdhz4ep0s","annotations":[]},"tJLm2D4J3X0":{"duration":210.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tJLm2D4J3X0","annotations":[{"segment":[14.7491991575663,210.09414965986394],"label":"Triple jump"}]},"Gkv5ibx0tYw":{"duration":97.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gkv5ibx0tYw","annotations":[{"segment":[33.80172074882995,65.17820592823713],"label":"Removing curlers"}]},"kGxikShxkpQ":{"duration":77.184,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kGxikShxkpQ","annotations":[{"segment":[23.02809906396256,72.71069344773791],"label":"Tug of war"}]},"EVSuDXeNUYQ":{"duration":169.9,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EVSuDXeNUYQ","annotations":[{"segment":[14.31292941513579,165.12879556318254],"label":"Cheerleading"}]},"vlIAZsrj6yk":{"duration":154.02,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=vlIAZsrj6yk","annotations":[{"segment":[0.7205663026521062,146.53418096723868],"label":"Shaving"}]},"UAAk2SxqSvs":{"duration":217.965,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UAAk2SxqSvs","annotations":[{"segment":[21.08243666770671,73.44848903588144],"label":"Longboarding"},{"segment":[78.88911785335414,199.94310904212168],"label":"Longboarding"}]},"_4u2dthCcMs":{"duration":19.78,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_4u2dthCcMs","annotations":[]},"I0j_h1bjj7w":{"duration":54.149,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=I0j_h1bjj7w","annotations":[]},"k5wY4N61bzU":{"duration":160.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=k5wY4N61bzU","annotations":[{"segment":[10.24347748205928,144.90773023400934],"label":"Walking the dog"}]},"sVeC5VlDsjk":{"duration":174.243,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sVeC5VlDsjk","annotations":[{"segment":[15.766095482059283,150.04973631201247],"label":"Surfing"}]},"teDceMxV9PA":{"duration":112.849,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=teDceMxV9PA","annotations":[{"segment":[12.90204853176616,42.08106776541102],"label":"Powerbocking"},{"segment":[50.166820083168034,55.26435958784093],"label":"Powerbocking"},{"segment":[65.81099304578485,70.20542365326149],"label":"Powerbocking"},{"segment":[103.95465071868205,112.648978],"label":"Powerbocking"}]},"7c18sUik4zM":{"duration":103.19,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7c18sUik4zM","annotations":[]},"ersl2OEJ-eI":{"duration":138.46,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ersl2OEJ-eI","annotations":[{"segment":[2.590970358814353,8.420653666146647],"label":"Throwing darts"},{"segment":[15.113993759750391,24.182390015600628],"label":"Throwing darts"},{"segment":[27.852931357254292,34.546271450858036],"label":"Throwing darts"},{"segment":[39.29638377535102,48.364780031201256],"label":"Throwing darts"},{"segment":[96.29773166926678,103.00595475819033],"label":"Throwing darts"},{"segment":[108.40458970358814,115.96267862714508],"label":"Throwing darts"},{"segment":[119.20185959438376,125.68022152886115],"label":"Throwing darts"}]},"4ImpZRtbzYw":{"duration":192.494,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4ImpZRtbzYw","annotations":[{"segment":[26.685225450512498,155.01417397352026],"label":"Painting furniture"}]},"3UrypnvwAOY":{"duration":46.09,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3UrypnvwAOY","annotations":[{"segment":[3.807979719188767,43.61214508580343],"label":"Playing violin"}]},"JkZZvDHTty4":{"duration":119.189,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=JkZZvDHTty4","annotations":[{"segment":[0.01,118.28734202771767],"label":"Roof shingle removal"}]},"oO3bpAF-_R4":{"duration":177.61,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=oO3bpAF-_R4","annotations":[]},"g_AwwSsBj0s":{"duration":232.154,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g_AwwSsBj0s","annotations":[{"segment":[0.2,198.2146019683149],"label":"Rollerblading"}]},"M2OoQFcDflU":{"duration":30.186,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=M2OoQFcDflU","annotations":[{"segment":[0.01,30.141],"label":"Playing blackjack"}]},"9AfQ2Q-96oE":{"duration":98.8,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=9AfQ2Q-96oE","annotations":[]},"aCiN9mqkXoA":{"duration":163.28,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aCiN9mqkXoA","annotations":[{"segment":[0.3236555148092014,120.09924897275312],"label":"Washing dishes"}]},"Jt-DIHv8YK8":{"duration":72.622,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jt-DIHv8YK8","annotations":[{"segment":[0.2,70.46842745709827],"label":"Longboarding"}]},"yGCklVOLgk8":{"duration":62.393,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yGCklVOLgk8","annotations":[{"segment":[19.360123244929795,33.66131981279251],"label":"Waterskiing"}]},"tuhEnqKmUKg":{"duration":37.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tuhEnqKmUKg","annotations":[{"segment":[9.465223439937597,29.036393137285494],"label":"Skateboarding"}]},"Mil3AyFfjX4":{"duration":6.966,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=Mil3AyFfjX4","annotations":[{"segment":[0.2,6.765986394557823],"label":"Shuffleboard"}]},"k0w1lMRoqYk":{"duration":181.65,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=k0w1lMRoqYk","annotations":[{"segment":[53.73892523364485,91.63900934579438],"label":"Raking leaves"}]},"jl7aBkPfcS8":{"duration":145.45,"subset":"validation","resolution":"406x720","url":"https://www.youtube.com/watch?v=jl7aBkPfcS8","annotations":[{"segment":[44.70132132449298,76.01493727769112],"label":"Cleaning shoes"}]},"O337_rGYZPc":{"duration":155.25,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O337_rGYZPc","annotations":[{"segment":[78.95639480545209,149.4358743447738],"label":"Starting a campfire"}]},"p35FXT4drA0":{"duration":82.94,"subset":"training","resolution":"318x240","url":"https://www.youtube.com/watch?v=p35FXT4drA0","annotations":[{"segment":[5.3012808112324485,37.50591263650546],"label":"Hammer throw"}]},"r4F2X_MlL-U":{"duration":179.235,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r4F2X_MlL-U","annotations":[{"segment":[26.481035938772866,163.34619980263383],"label":"Hanging wallpaper"}]},"PwvSxX9E2ms":{"duration":112.966,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PwvSxX9E2ms","annotations":[]},"4MKbQ816r9A":{"duration":86.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4MKbQ816r9A","annotations":[{"segment":[3.9278682684722352,82.48523258658346],"label":"Mixing drinks"}]},"_fKxAnsi55M":{"duration":138.44,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=_fKxAnsi55M","annotations":[]},"_0CqozZun3U":{"duration":294.29,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_0CqozZun3U","annotations":[{"segment":[120.74129047141814,286.68034475179195],"label":"Removing ice from car"}]},"Fi2Al65EH0g":{"duration":187.13,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Fi2Al65EH0g","annotations":[{"segment":[18.68091107644306,97.4910046801872],"label":"Disc dog"},{"segment":[109.75035257410296,179.511879875195],"label":"Disc dog"}]},"SmBEf-g82Ew":{"duration":90.047,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=SmBEf-g82Ew","annotations":[{"segment":[17.559864299333878,90.04698412698413],"label":"Plastering"}]},"T0nCDJuikuA":{"duration":219.267,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=T0nCDJuikuA","annotations":[]},"lGvI5pyjpFg":{"duration":25.496,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lGvI5pyjpFg","annotations":[{"segment":[13.416121684867393,18.496783151326053],"label":"Making a lemonade"}]},"5laHmakSAAg":{"duration":182.765,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5laHmakSAAg","annotations":[{"segment":[17.67765114820593,166.5120688798752],"label":"Beer pong"}]},"haUWdHvwpUo":{"duration":205.172,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=haUWdHvwpUo","annotations":[{"segment":[122.34583021806853,185.293738317757],"label":"Playing congas"}]},"y3Ll1puxM8c":{"duration":231.62,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=y3Ll1puxM8c","annotations":[{"segment":[3.9747402902918836,203.07309315132605],"label":"Zumba"}]},"OrF46J3HmSw":{"duration":35.02,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=OrF46J3HmSw","annotations":[{"segment":[5.6388156,33.4315168174727],"label":"Hopscotch"}]},"O-zE1-Yw3w0":{"duration":136.19,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=O-zE1-Yw3w0","annotations":[{"segment":[115.9905023400936,133.62322822308892],"label":"Using the balance beam"}]},"sqEsAtdFfrg":{"duration":76.63,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sqEsAtdFfrg","annotations":[{"segment":[3.8838260530421214,14.161027301092044],"label":"Javelin throw"},{"segment":[33.75941107644306,42.84158892355694],"label":"Javelin throw"},{"segment":[52.16277145085803,63.51549375975039],"label":"Javelin throw"}]},"hFzbUNtTKPY":{"duration":9.01,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=hFzbUNtTKPY","annotations":[]},"BGHQbw5HZ9Y":{"duration":204.219,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BGHQbw5HZ9Y","annotations":[{"segment":[82.51675351014042,84.7469360374415],"label":"Rock-paper-scissors"},{"segment":[141.77588923556942,143.68747425897035],"label":"Rock-paper-scissors"},{"segment":[144.96186427457098,147.52509048361935],"label":"Rock-paper-scissors"}]},"z93quI8Gxx4":{"duration":59.42,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=z93quI8Gxx4","annotations":[{"segment":[9.980215049778604,56.92419454639844],"label":"Camel ride"}]},"lZ2X-e33E0A":{"duration":157.153,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=lZ2X-e33E0A","annotations":[{"segment":[41.17467706708268,147.07113884555383],"label":"Plastering"}]},"hYAE418i-ZY":{"duration":219.36,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hYAE418i-ZY","annotations":[{"segment":[31.396710540000004,138.5965040826833],"label":"Getting a haircut"}]},"ThWgMXhkS2E":{"duration":85.87,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=ThWgMXhkS2E","annotations":[{"segment":[9.17615644851794,78.83457036427457],"label":"Cleaning windows"}]},"IIKwRyf4Oyg":{"duration":141.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IIKwRyf4Oyg","annotations":[{"segment":[4.483683909133092,111.30795204464712],"label":"Playing guitarra"}]},"59oIJsSLsh0":{"duration":45.09,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=59oIJsSLsh0","annotations":[]},"oJU8LkbcNEg":{"duration":208.26,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oJU8LkbcNEg","annotations":[]},"94lnTGe5xU4":{"duration":88.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=94lnTGe5xU4","annotations":[]},"ThYidZUtnuo":{"duration":53.685,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ThYidZUtnuo","annotations":[{"segment":[11.966936037441497,20.837531981279252],"label":"Roof shingle removal"},{"segment":[32.21867394695788,41.92611856474259],"label":"Roof shingle removal"}]},"cWpT8nb2a9s":{"duration":126.11,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=cWpT8nb2a9s","annotations":[{"segment":[32.67306263,72.39849752901681],"label":"Cheerleading"}]},"g4tKl5yXmGg":{"duration":120.58,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g4tKl5yXmGg","annotations":[]},"Cu-p0FZOqi8":{"duration":137.07,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Cu-p0FZOqi8","annotations":[{"segment":[6.414990327613104,115.25599288611544],"label":"Plataform diving"}]},"g0EkYCOp9OU":{"duration":154.599,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=g0EkYCOp9OU","annotations":[]},"A4PdcfWqrN8":{"duration":188.406,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A4PdcfWqrN8","annotations":[{"segment":[20.28085113608626,179.8823318157216],"label":"Disc dog"}]},"OJiLPJkzel4":{"duration":96.781,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OJiLPJkzel4","annotations":[{"segment":[47.25800517190755,51.03260622397684],"label":"Curling"}]},"02WlEYafQ9Q":{"duration":80.086,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=02WlEYafQ9Q","annotations":[{"segment":[9.745173166926678,69.59053146125845],"label":"Rafting"}]},"YJOoLvwqWCw":{"duration":41.309,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YJOoLvwqWCw","annotations":[{"segment":[7.3143398174727,34.57394988611544],"label":"Arm wrestling"}]},"08uNrWxxHGY":{"duration":192.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=08uNrWxxHGY","annotations":[{"segment":[5.473497837750076,183.40301780971268],"label":"Painting"}]},"ajYA9O6R-AY":{"duration":52.73,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=ajYA9O6R-AY","annotations":[{"segment":[31.343351943837753,46.973894907956314],"label":"Hopscotch"}]},"6KXVjADefBY":{"duration":133.42,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6KXVjADefBY","annotations":[{"segment":[0,0],"label":"Vacuuming floor"}]},"aotVhoXjqS0":{"duration":72.49,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aotVhoXjqS0","annotations":[{"segment":[31.92514430577223,35.654453198127925],"label":"Playing racquetball"},{"segment":[45.59927691107644,49.55460452418097],"label":"Playing racquetball"}]},"0df62FHrMT0":{"duration":23.57,"subset":"testing","resolution":"586x480","url":"https://www.youtube.com/watch?v=0df62FHrMT0","annotations":[]},"QrBIWtD4JyI":{"duration":83.151,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QrBIWtD4JyI","annotations":[]},"gBOWkZBlc8g":{"duration":126.224,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gBOWkZBlc8g","annotations":[{"segment":[0.01,124.66438941259753],"label":"Beach soccer"}]},"aDe5hUB3AdY":{"duration":133.93,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=aDe5hUB3AdY","annotations":[]},"Cqbs_wM3oc4":{"duration":15.05,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cqbs_wM3oc4","annotations":[{"segment":[0.01,13.45553822152886],"label":"Playing saxophone"}]},"Sfdj_kMDUh8":{"duration":151.348,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Sfdj_kMDUh8","annotations":[{"segment":[10.620351014040562,136.43054290171605],"label":"Installing carpet"}]},"9_uq9j0r0aY":{"duration":187.99,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=9_uq9j0r0aY","annotations":[]},"74cccfEVUSU":{"duration":178.562,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=74cccfEVUSU","annotations":[]},"8kkKxLpiMus":{"duration":34.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8kkKxLpiMus","annotations":[{"segment":[0.01,33.239980897195075],"label":"Doing step aerobics"}]},"Ye3u6ujLEl0":{"duration":199.692,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ye3u6ujLEl0","annotations":[{"segment":[56.684340093603744,71.32260374414976],"label":"Swimming"},{"segment":[100.5991310452418,105.27091731669265],"label":"Swimming"},{"segment":[119.90918096723868,152.3154898595944],"label":"Swimming"},{"segment":[177.2341794071763,190.62797503900157],"label":"Swimming"}]},"kTJDylL6pCU":{"duration":181.21,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kTJDylL6pCU","annotations":[{"segment":[41.516564880000004,165.6601494570983],"label":"Bathing dog"}]},"r8ZIHLqFPfg":{"duration":99.916,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r8ZIHLqFPfg","annotations":[{"segment":[30.611734138996447,39.31354756317379],"label":"Putting in contact lenses"},{"segment":[64.95281925941062,67.74982946098137],"label":"Putting in contact lenses"}]},"bEniqIC5Ric":{"duration":61.905,"subset":"training","resolution":"1920x1440","url":"https://www.youtube.com/watch?v=bEniqIC5Ric","annotations":[{"segment":[0.01,60.9556767825902],"label":"Camel ride"}]},"ebmi7XJA8Oo":{"duration":212.648,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ebmi7XJA8Oo","annotations":[{"segment":[15.035815770146261,206.46095284179736],"label":"Cleaning sink"}]},"_nTjmHQ0-gQ":{"duration":43.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_nTjmHQ0-gQ","annotations":[{"segment":[2.518428217628705,40.395588610764435],"label":"Triple jump"}]},"m0ZPQ8q4Qq4":{"duration":11.75,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=m0ZPQ8q4Qq4","annotations":[{"segment":[0.2829576021840874,11.701666],"label":"Hopscotch"}]},"cu8TBL7msJg":{"duration":209.0,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=cu8TBL7msJg","annotations":[{"segment":[0.01,4.401763348084944],"label":"Tennis serve with ball bouncing"},{"segment":[5.05387643669012,7.988385335413415],"label":"Tennis serve with ball bouncing"},{"segment":[16.13979894297813,19.074307841701422],"label":"Tennis serve with ball bouncing"},{"segment":[34.072908879620485,39.61587013276449],"label":"Tennis serve with ball bouncing"},{"segment":[64.39616691497659,66.67856270436818],"label":"Tennis serve with ball bouncing"},{"segment":[70.91729774180968,82.32927668876755],"label":"Tennis serve with ball bouncing"},{"segment":[109.39196962012481,112.65253503354134],"label":"Tennis serve with ball bouncing"},{"segment":[127.32507939391576,168.73426014430575],"label":"Tennis serve with ball bouncing"},{"segment":[171.66876901638065,181.45046525663028],"label":"Tennis serve with ball bouncing"},{"segment":[195.796953075663,199.05751848907954],"label":"Tennis serve with ball bouncing"}]},"IQp6JF5v1qc":{"duration":62.07,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=IQp6JF5v1qc","annotations":[{"segment":[1.7583297800000002,60.03652106084244],"label":"Breakdancing"}]},"WZSILJ9rBuc":{"duration":120.094,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=WZSILJ9rBuc","annotations":[]},"ilXO32we9uo":{"duration":104.24000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ilXO32we9uo","annotations":[{"segment":[13.496806747269892,85.85920436817473],"label":"Sailing"}]},"ctWolbJDJyc":{"duration":17.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ctWolbJDJyc","annotations":[{"segment":[1.065002496099844,14.643784321372856],"label":"Springboard diving"}]},"dPKc7ZCEC30":{"duration":150.21,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dPKc7ZCEC30","annotations":[]},"JowEwlQllT8":{"duration":7.593,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=JowEwlQllT8","annotations":[]},"4Q2uDqvjFy0":{"duration":145.868,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4Q2uDqvjFy0","annotations":[]},"U3-MtKZeUXM":{"duration":23.29,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U3-MtKZeUXM","annotations":[]},"VHUC47iq1Wg":{"duration":212.02,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=VHUC47iq1Wg","annotations":[{"segment":[9.258783151326053,206.9999375975039],"label":"Cricket"}]},"vg-FrXO1coA":{"duration":231.53,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vg-FrXO1coA","annotations":[{"segment":[39.4288778,215.9501452948518],"label":"Brushing teeth"}]},"Lvia-mF_Hnk":{"duration":76.116,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Lvia-mF_Hnk","annotations":[{"segment":[11.83748404344836,47.11319124817537],"label":"Welding"}]},"dXLhYjRSNcs":{"duration":49.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dXLhYjRSNcs","annotations":[{"segment":[0.20000000000000004,45.79421684867395],"label":"Using parallel bars"}]},"2q3Z7UQZnBY":{"duration":110.086,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2q3Z7UQZnBY","annotations":[]},"wItTq-gpK90":{"duration":224.77,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wItTq-gpK90","annotations":[]},"Fdu_dKh2PUE":{"duration":110.527,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Fdu_dKh2PUE","annotations":[]},"2FIQwmB362w":{"duration":174.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2FIQwmB362w","annotations":[{"segment":[23.383547581903272,135.95085803432136],"label":"Skateboarding"},{"segment":[147.64263182527299,163.68483307332292],"label":"Skateboarding"}]},"V3Sz4WWJ2_M":{"duration":208.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=V3Sz4WWJ2_M","annotations":[{"segment":[132.47030577034468,182.47135333812332],"label":"Hanging wallpaper"}]},"F2BFV1930tQ":{"duration":39.428,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F2BFV1930tQ","annotations":[]},"1aRRlW0k-_4":{"duration":93.368,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1aRRlW0k-_4","annotations":[{"segment":[31.658295631825276,80.56490405616225],"label":"Hurling"}]},"rzZutQSmyJo":{"duration":139.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rzZutQSmyJo","annotations":[{"segment":[4.132159126365055,20.878277691107645],"label":"Playing bagpipes"},{"segment":[25.227918876755073,51.97821216848674],"label":"Playing bagpipes"},{"segment":[94.60469578783152,130.92419968798754],"label":"Playing bagpipes"}]},"mvWKOkRzfos":{"duration":80.272,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mvWKOkRzfos","annotations":[{"segment":[5.001330875029139,35.63448248458262],"label":"Playing beach volleyball"},{"segment":[69.51849782554517,74.76989514330218],"label":"Playing beach volleyball"}]},"V7C34-4tBok":{"duration":109.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=V7C34-4tBok","annotations":[{"segment":[31.28861154446178,101.34789391575663],"label":"Washing dishes"}]},"rS8T1dAdiCs":{"duration":155.92000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rS8T1dAdiCs","annotations":[{"segment":[30.610410719999997,152.39452207612115],"label":"Wrapping presents"}]},"JNvwkiz8Bzw":{"duration":82.38,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=JNvwkiz8Bzw","annotations":[{"segment":[6.552903276131045,72.4849984399376],"label":"Tug of war"}]},"H2fG7JRRMAs":{"duration":60.094,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=H2fG7JRRMAs","annotations":[]},"nwhCE8TfhDA":{"duration":125.69,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=nwhCE8TfhDA","annotations":[]},"3M290KZV5lo":{"duration":87.19,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=3M290KZV5lo","annotations":[]},"PzJsoZT2hXk":{"duration":105.95,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=PzJsoZT2hXk","annotations":[]},"jDeBuorU4hY":{"duration":123.507,"subset":"validation","resolution":"426x238","url":"https://www.youtube.com/watch?v=jDeBuorU4hY","annotations":[{"segment":[69.30477379095163,79.52086271450858],"label":"Curling"},{"segment":[90.11080811232449,118.41484399375976],"label":"Curling"}]},"T8XtEP8LaNM":{"duration":102.122,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=T8XtEP8LaNM","annotations":[]},"bj-MDk8ADdg":{"duration":78.35,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bj-MDk8ADdg","annotations":[]},"g4uvBcIE1Os":{"duration":28.979,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g4uvBcIE1Os","annotations":[{"segment":[7.088513260530421,26.45776287051482],"label":"Snow tubing"}]},"di8Vr1fzUh8":{"duration":121.487,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=di8Vr1fzUh8","annotations":[{"segment":[0.01,121.48680272108844],"label":"Throwing darts"}]},"pw0A0Uv2TZM":{"duration":58.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pw0A0Uv2TZM","annotations":[{"segment":[2.717930031201248,16.30758018720749],"label":"Washing face"}]},"w4CiGbHLTBg":{"duration":139.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w4CiGbHLTBg","annotations":[{"segment":[71.69982766000001,129.6437851326053],"label":"Painting"},{"segment":[19.827873020280812,56.97257492],"label":"Painting"}]},"MbCIH5bay4Q":{"duration":31.63,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=MbCIH5bay4Q","annotations":[{"segment":[8.288762770670827,31.625577],"label":"High jump"}]},"4gQpW3zR-Aw":{"duration":116.031,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4gQpW3zR-Aw","annotations":[{"segment":[59.52846957878315,93.01796255850233],"label":"Camel ride"}]},"SNuH7bpgHuY":{"duration":66.061,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=SNuH7bpgHuY","annotations":[{"segment":[0.2,12.225864289306962],"label":"Bullfighting"},{"segment":[26.30102078010442,28.766741625207622],"label":"Bullfighting"},{"segment":[35.958427423425306,43.15011322164298],"label":"Bullfighting"}]},"T8mTuW85NNo":{"duration":32.09,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=T8mTuW85NNo","annotations":[{"segment":[15.855439937597504,19.356641185647426],"label":"Washing hands"},{"segment":[23.20796255850234,28.959936037441498],"label":"Washing hands"}]},"uuhcDXyGrEI":{"duration":170.69,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uuhcDXyGrEI","annotations":[{"segment":[52.57936817472699,129.2520670826833],"label":"Making a sandwich"}]},"6QbIJ2pnXXo":{"duration":162.981,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6QbIJ2pnXXo","annotations":[{"segment":[0.010000000000000002,150.38902568269282],"label":"Making a cake"}]},"BadeHG8y4PQ":{"duration":126.48,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=BadeHG8y4PQ","annotations":[{"segment":[42.42278469370068,51.49928746537617],"label":"Volleyball"},{"segment":[63.732834679373575,88.00261382972326],"label":"Volleyball"},{"segment":[99.24958465549507,113.25896936829855],"label":"Volleyball"}]},"fgkiHZ0nENg":{"duration":44.652,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=fgkiHZ0nENg","annotations":[{"segment":[0.2,43.746392804992205],"label":"Croquet"}]},"V1CX5B7ih6c":{"duration":51.851,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=V1CX5B7ih6c","annotations":[{"segment":[2.6693529455463936,5.662263634945399],"label":"Doing a powerbomb"},{"segment":[28.06864973322933,31.223339472698907],"label":"Doing a powerbomb"},{"segment":[36.642934666146644,41.738971937597505],"label":"Doing a powerbomb"}]},"e2fXKrG2BkY":{"duration":148.24,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=e2fXKrG2BkY","annotations":[{"segment":[41.699807400000005,100.27814827457097],"label":"Doing karate"},{"segment":[101.33845576000002,142.7923862527301],"label":"Doing karate"}]},"fSA-eTz3GkU":{"duration":126.69,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=fSA-eTz3GkU","annotations":[{"segment":[11.66083648829953,17.39243408424337],"label":"Shot put"},{"segment":[73.12727967238689,82.02113801092042],"label":"Shot put"}]},"TEkIJQljABs":{"duration":56.82,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=TEkIJQljABs","annotations":[{"segment":[1.2390491025070463,54.87217289719626],"label":"Having an ice cream"}]},"34ZxyyFjbHQ":{"duration":176.59,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=34ZxyyFjbHQ","annotations":[]},"wipGHu-nmNY":{"duration":128.41,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wipGHu-nmNY","annotations":[{"segment":[11.418349198127926,20.63315732293292],"label":"Mixing drinks"},{"segment":[24.038629890795633,35.45697908892355],"label":"Mixing drinks"},{"segment":[41.86728039313572,50.68144468642746],"label":"Mixing drinks"},{"segment":[56.49078024336974,67.70880752574102],"label":"Mixing drinks"},{"segment":[73.51814308268332,78.7265128923557],"label":"Mixing drinks"},{"segment":[85.53745802808112,94.15130040561624],"label":"Mixing drinks"}]},"Q-879RNVOdg":{"duration":202.246,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Q-879RNVOdg","annotations":[{"segment":[35.227876412164676,36.48601502686406],"label":"Wakeboarding"},{"segment":[40.57496552463704,42.46217344668611],"label":"Wakeboarding"},{"segment":[46.236589290784245,56.616232862054126],"label":"Wakeboarding"},{"segment":[65.10866851127493,68.88308435537306],"label":"Wakeboarding"},{"segment":[80.20633188766747,82.09353980971655],"label":"Wakeboarding"},{"segment":[93.10225268833611,95.93306457140972],"label":"Wakeboarding"},{"segment":[117.32142102129914,120.15223290437277],"label":"Wakeboarding"},{"segment":[121.724906172747,138.3952419190891],"label":"Wakeboarding"},{"segment":[148.46035077134414,155.69464775890248],"label":"Wakeboarding"},{"segment":[157.89639032033327,195.01147921302376],"label":"Wakeboarding"}]},"gCuRcP4txc0":{"duration":55.543,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gCuRcP4txc0","annotations":[{"segment":[0.01,45.630307332293285],"label":"Drum corps"}]},"f5xSylPY_bQ":{"duration":202.41,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f5xSylPY_bQ","annotations":[{"segment":[22.25662629173167,193.68000326209048],"label":"Playing flauta"}]},"Eaqf6cekoZ4":{"duration":52.36,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Eaqf6cekoZ4","annotations":[]},"zPl4spxrvg4":{"duration":208.17000000000002,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zPl4spxrvg4","annotations":[{"segment":[51.67372608,160.78148015],"label":"Archery"}]},"2oc0OBWkYfg":{"duration":43.03,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=2oc0OBWkYfg","annotations":[{"segment":[0.01,15.012892355694229],"label":"Removing curlers"}]},"XXJ-UtODmIs":{"duration":152.254,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=XXJ-UtODmIs","annotations":[]},"Jgtpkfgjytg":{"duration":88.747,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jgtpkfgjytg","annotations":[]},"yyUmEgvq3B0":{"duration":83.267,"subset":"training","resolution":"480x352","url":"https://www.youtube.com/watch?v=yyUmEgvq3B0","annotations":[{"segment":[31.1113703432491,42.41278671010786],"label":"Playing ten pins"},{"segment":[57.35143958502341,62.287690330733234],"label":"Playing ten pins"},{"segment":[65.40532238065522,68.65285576599064],"label":"Playing ten pins"}]},"_7XW-BFK_ZY":{"duration":169.065,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_7XW-BFK_ZY","annotations":[{"segment":[31.641996879875194,169.021],"label":"Disc dog"}]},"MbEoZXWVc-I":{"duration":114.707,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=MbEoZXWVc-I","annotations":[{"segment":[13.73624354420358,17.12570690860585],"label":"Beer pong"},{"segment":[23.54784802010488,99.0080060802185],"label":"Beer pong"}]},"UrPn5-8yNgA":{"duration":191.15,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=UrPn5-8yNgA","annotations":[{"segment":[1.2721459520438008,173.36369169970735],"label":"Shaving legs"}]},"FBUtGL5_tto":{"duration":152.138,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FBUtGL5_tto","annotations":[{"segment":[0.2,146.92352171339562],"label":"Rafting"}]},"zqqiiZCmPLw":{"duration":181.77,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=zqqiiZCmPLw","annotations":[{"segment":[2.9084589847751143,95.49010732012171],"label":"Ballet"}]},"KTNJc_NVOmg":{"duration":114.637,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KTNJc_NVOmg","annotations":[{"segment":[0.01,106.40996708268331],"label":"Clipping cat claws"}]},"vMcXLYBcHsM":{"duration":135.77,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vMcXLYBcHsM","annotations":[]},"oYhVH1vIXFM":{"duration":134.42,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oYhVH1vIXFM","annotations":[{"segment":[68.15382710546515,94.36683753064406],"label":"Preparing pasta"},{"segment":[104.85204170071565,126.45156229106306],"label":"Preparing pasta"}]},"7GOPv-XegSc":{"duration":180.838,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=7GOPv-XegSc","annotations":[{"segment":[91.82909999257114,103.67801612064483],"label":"Cutting the grass"}]},"1ioKX0iuico":{"duration":177.145,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1ioKX0iuico","annotations":[{"segment":[8.938817243403737,170.3341209817215],"label":"Wakeboarding"}]},"BLmAF2wbTz8":{"duration":169.99,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BLmAF2wbTz8","annotations":[{"segment":[21.481136438377533,169.99269841269842],"label":"Mixing drinks"}]},"EiPofuIoUic":{"duration":61.278,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=EiPofuIoUic","annotations":[{"segment":[0.2,60.41842920093458],"label":"River tubing"}]},"x08PpU3uQ6Y":{"duration":118.817,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x08PpU3uQ6Y","annotations":[{"segment":[0.01,110.07162870514821],"label":"Spread mulch"}]},"MrUvJIAiyvk":{"duration":55.496,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=MrUvJIAiyvk","annotations":[]},"A0b_pqaKCX8":{"duration":155.11,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=A0b_pqaKCX8","annotations":[]},"UAFIstY4a8Q":{"duration":168.949,"subset":"training","resolution":"853x468","url":"https://www.youtube.com/watch?v=UAFIstY4a8Q","annotations":[{"segment":[16.604911867394694,152.87061719188767],"label":"Playing blackjack"}]},"08afjR_3SwI":{"duration":61.184,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=08afjR_3SwI","annotations":[{"segment":[6.872526926677067,59.84825531981279],"label":"Canoeing"}]},"YfWKbMPFmcU":{"duration":166.093,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YfWKbMPFmcU","annotations":[{"segment":[64.83541788718469,69.48497005019607],"label":"Painting fence"},{"segment":[127.08775395588196,130.96238072104526],"label":"Painting fence"},{"segment":[142.32795256552427,147.75243003675288],"label":"Painting fence"}]},"Oz4q98BhqKo":{"duration":203.9,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Oz4q98BhqKo","annotations":[{"segment":[6.997923242099752,195.94185077879305],"label":"Doing step aerobics"}]},"no9vPN7D-1s":{"duration":149.862,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=no9vPN7D-1s","annotations":[{"segment":[0.01,149.721],"label":"Welding"}]},"LxwKUIJdt_4":{"duration":86.564,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LxwKUIJdt_4","annotations":[{"segment":[0.01,0.02],"label":"Blowing leaves"}]},"H7k8ATbTjzs":{"duration":109.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H7k8ATbTjzs","annotations":[{"segment":[0,109.598184],"label":"Walking the dog"}]},"bW5HfDWUP3U":{"duration":137.463,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=bW5HfDWUP3U","annotations":[{"segment":[6.431560062402496,133.99083463338533],"label":"Putting on shoes"}]},"1JKgr3KfoHo":{"duration":206.862,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1JKgr3KfoHo","annotations":[{"segment":[0.01,196.56144234945398],"label":"Playing ice hockey"}]},"EOradtej9GU":{"duration":121.139,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EOradtej9GU","annotations":[{"segment":[18.142427906396257,120.5715525274072],"label":"Skiing"}]},"Pt4BlfBqduE":{"duration":109.831,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Pt4BlfBqduE","annotations":[{"segment":[35.98187356065671,109.830384],"label":"Fixing the roof"}]},"x44fn0snUvw":{"duration":100.8,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x44fn0snUvw","annotations":[{"segment":[1.7269656786271452,57.76760087000002],"label":"Washing hands"},{"segment":[62.344718799999995,97.02407176287052],"label":"Washing hands"}]},"mISzl7h3qIA":{"duration":189.452,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mISzl7h3qIA","annotations":[]},"91RXe65HfKE":{"duration":114.1,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=91RXe65HfKE","annotations":[]},"x3z3dVUxmJ0":{"duration":131.054,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=x3z3dVUxmJ0","annotations":[{"segment":[0.01,115.71955915444619],"label":"Braiding hair"}]},"L-rPS8tlAy4":{"duration":108.322,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=L-rPS8tlAy4","annotations":[{"segment":[18.69928330245395,46.83244312937072],"label":"Bullfighting"},{"segment":[94.67565978406125,99.56105873118412],"label":"Bullfighting"}]},"7p99ez6MEeo":{"duration":18.25,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=7p99ez6MEeo","annotations":[{"segment":[0.1993076146645866,5.55214069422777],"label":"Volleyball"}]},"fhEvf47Jqi8":{"duration":196.256,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fhEvf47Jqi8","annotations":[{"segment":[78.6477285036105,190.47783130734882],"label":"Trimming branches or hedges"}]},"rKTpKq7bh_U":{"duration":150.14,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=rKTpKq7bh_U","annotations":[{"segment":[0,150.14022675736962],"label":"Playing flauta"}]},"EOSvdsejPQQ":{"duration":143.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EOSvdsejPQQ","annotations":[{"segment":[20.19045790951638,129.89194588455538],"label":"Playing harmonica"}]},"paRAKx8svJ8":{"duration":103.074,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=paRAKx8svJ8","annotations":[]},"8UJ5PGzu1PE":{"duration":59.19,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8UJ5PGzu1PE","annotations":[{"segment":[31.199106084243372,53.65509984399376],"label":"Archery"}]},"lcUtnIptSjM":{"duration":228.88,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=lcUtnIptSjM","annotations":[{"segment":[36.77767632065827,128.54333471298034],"label":"Washing hands"},{"segment":[183.1742505631825,206.02639878159127],"label":"Washing hands"}]},"YwUW6Qh_5TA":{"duration":87.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YwUW6Qh_5TA","annotations":[{"segment":[15.70400546021841,49.03215678627146],"label":"Using the pommel horse"}]},"4LoAbHdHWqM":{"duration":54.869,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=4LoAbHdHWqM","annotations":[]},"DRK5oUhjwTg":{"duration":102.424,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DRK5oUhjwTg","annotations":[{"segment":[68.3352407429081,92.70655124533614],"label":"Rollerblading"}]},"i1CVl-0-gJE":{"duration":72.749,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i1CVl-0-gJE","annotations":[{"segment":[20.088013838920904,28.372900902430658],"label":"Raking leaves"},{"segment":[39.495077560062406,43.01331722776911],"label":"Raking leaves"}]},"FB5RmmpV25g":{"duration":153.345,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FB5RmmpV25g","annotations":[]},"Fia1JynWFBA":{"duration":7.64,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Fia1JynWFBA","annotations":[{"segment":[0.01,7.639364],"label":"Baking cookies"}]},"4Ex-sB0vtwk":{"duration":9.196,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=4Ex-sB0vtwk","annotations":[{"segment":[4.590400125495946,7.7364688968800825],"label":"Shuffleboard"}]},"MiTIi_QYt2k":{"duration":113.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MiTIi_QYt2k","annotations":[{"segment":[66.37509259999999,106.19606703042122],"label":"Polishing forniture"},{"segment":[36.27854604,57.38655079329173],"label":"Polishing forniture"}]},"6PnPu_cLCvE":{"duration":128.92,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=6PnPu_cLCvE","annotations":[{"segment":[4.020624024960998,18.293839313572544],"label":"Smoking a cigarette"},{"segment":[82.63664742589704,96.10782839313572],"label":"Smoking a cigarette"}]},"00KMCm2oGhk":{"duration":31.556,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=00KMCm2oGhk","annotations":[{"segment":[3.1619197010860263,30.607774841272946],"label":"Drum corps"}]},"ApIHaCeRF5s":{"duration":236.031,"subset":"testing","resolution":"480x272","url":"https://www.youtube.com/watch?v=ApIHaCeRF5s","annotations":[]},"DPw74KWZzGY":{"duration":128.66,"subset":"training","resolution":"240x180","url":"https://www.youtube.com/watch?v=DPw74KWZzGY","annotations":[{"segment":[38.47221222,95.86402449],"label":"Playing accordion"},{"segment":[2.4086446559903214,11.441061964118564],"label":"Playing accordion"},{"segment":[42.15128092043682,47.57073132449298],"label":"Playing accordion"},{"segment":[52.3880205725429,68.24493101404056],"label":"Playing accordion"},{"segment":[74.26654257410296,87.31336762090484],"label":"Playing accordion"},{"segment":[91.52849571294851,97.95154804368174],"label":"Playing accordion"},{"segment":[104.97676153042121,125.65096121996879],"label":"Playing accordion"}]},"2DMOP9Fy91U":{"duration":132.619,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=2DMOP9Fy91U","annotations":[{"segment":[9.763880063957144,105.60011682408175],"label":"Hula hoop"}]},"XkzEXA4b20k":{"duration":232.99,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=XkzEXA4b20k","annotations":[{"segment":[20.12325993,213.13617333697348],"label":"Getting a haircut"}]},"uo6WV2gLQgk":{"duration":85.68,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uo6WV2gLQgk","annotations":[]},"Z6xXj6tbMCo":{"duration":162.842,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z6xXj6tbMCo","annotations":[{"segment":[0.01,162.841541],"label":"Capoeira"}]},"k_ZCg9tqGQo":{"duration":23.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k_ZCg9tqGQo","annotations":[{"segment":[3.4617074882995325,5.879407956318253],"label":"Getting a tattoo"}]},"penDFcEdu7U":{"duration":53.546,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=penDFcEdu7U","annotations":[]},"vSiu8qzHV6c":{"duration":212.28,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vSiu8qzHV6c","annotations":[]},"U7iNqQs1kRk":{"duration":69.753,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=U7iNqQs1kRk","annotations":[{"segment":[0.01,69.752743],"label":"Playing congas"}]},"NtlJJ5V9_iY":{"duration":198.229,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NtlJJ5V9_iY","annotations":[]},"5eM2Hcvj6R8":{"duration":189.406,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5eM2Hcvj6R8","annotations":[{"segment":[28.147208677061094,147.74011889454363],"label":"Hula hoop"}]},"tRgK4fFA8sE":{"duration":161.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tRgK4fFA8sE","annotations":[{"segment":[10.577381760000002,154.8825165803432],"label":"Fixing bicycle"}]},"bjzG4cFArvI":{"duration":167.25,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=bjzG4cFArvI","annotations":[]},"inEuUt5xEG0":{"duration":225.327,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=inEuUt5xEG0","annotations":[]},"owdkZsM5IFg":{"duration":130.06,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=owdkZsM5IFg","annotations":[]},"_TeU8A-CGbU":{"duration":64.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_TeU8A-CGbU","annotations":[{"segment":[3.01026244773791,60.60661728112324],"label":"Installing carpet"}]},"0Q9RHSEFeMI":{"duration":166.57999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0Q9RHSEFeMI","annotations":[{"segment":[11.667193160000002,158.71702028081123],"label":"Doing step aerobics"}]},"EDEFxLnf_Wk":{"duration":32.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EDEFxLnf_Wk","annotations":[{"segment":[1.651703034321373,31.58564417940718],"label":"Spinning"}]},"73ZdOEJcsUw":{"duration":103.747,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=73ZdOEJcsUw","annotations":[{"segment":[8.736037441497661,71.18252730109204],"label":"Knitting"}]},"5vk8OuKkttU":{"duration":144.73,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=5vk8OuKkttU","annotations":[{"segment":[15.124753510140405,20.542575663026522],"label":"Installing carpet"},{"segment":[25.734655226209046,33.63564586583463],"label":"Installing carpet"},{"segment":[39.50495319812792,50.79208268330733],"label":"Installing carpet"},{"segment":[54.62970670826833,69.30297503900157],"label":"Installing carpet"},{"segment":[71.78614352574102,102.48713572542901],"label":"Installing carpet"},{"segment":[109.03367082683307,123.7069391575663],"label":"Installing carpet"}]},"RIr3Y2XS5NA":{"duration":153.368,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RIr3Y2XS5NA","annotations":[{"segment":[28.233036461594516,129.20203112012481],"label":"Rafting"}]},"yUHN5TIprwk":{"duration":56.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yUHN5TIprwk","annotations":[{"segment":[4.123755070202808,50.888892355694225],"label":"Spinning"}]},"GAS3NRYiLiI":{"duration":186.689,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=GAS3NRYiLiI","annotations":[]},"FWmOjpmojyM":{"duration":77.78999999999999,"subset":"testing","resolution":"656x480","url":"https://www.youtube.com/watch?v=FWmOjpmojyM","annotations":[]},"-YreL-4QCLg":{"duration":41.75,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=-YreL-4QCLg","annotations":[{"segment":[20.081376755070206,38.43777301092044],"label":"Playing lacrosse"}]},"aSc-UUhmYf8":{"duration":194.56,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=aSc-UUhmYf8","annotations":[]},"ir1jpnsK5p8":{"duration":147.006,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ir1jpnsK5p8","annotations":[{"segment":[16.479364485981307,116.9736308411215],"label":"Fixing the roof"}]},"1G5cu1NvDFw":{"duration":43.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1G5cu1NvDFw","annotations":[{"segment":[18.394115634945397,32.07179136349454],"label":"Powerbocking"},{"segment":[38.20316324180967,42.043692879875195],"label":"Powerbocking"}]},"LdPxd-qdtis":{"duration":160.27,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=LdPxd-qdtis","annotations":[]},"EM4FVYEFQRc":{"duration":50.806,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=EM4FVYEFQRc","annotations":[]},"iSH43hQoxio":{"duration":184.924,"subset":"validation","resolution":"426x222","url":"https://www.youtube.com/watch?v=iSH43hQoxio","annotations":[{"segment":[34.90759746852459,37.79252287914645],"label":"Hand car wash"},{"segment":[44.13935878251456,45.58182148782549],"label":"Hand car wash"},{"segment":[51.35167230906923,54.52509026075328],"label":"Hand car wash"},{"segment":[75.5850457582929,77.60449354572822],"label":"Hand car wash"},{"segment":[96.64500125583255,99.52992666645441],"label":"Hand car wash"},{"segment":[126.07124044417557,127.51370314948652],"label":"Hand car wash"},{"segment":[113.0890749828393,116.83947797971919],"label":"Hand car wash"},{"segment":[128.37917950858036,130.97561235257413],"label":"Hand car wash"},{"segment":[133.8605377347894,143.09229895787834],"label":"Hand car wash"},{"segment":[173.38401547113887,183.19276177067084],"label":"Hand car wash"}]},"JU_o9ZtH-VM":{"duration":220.57,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JU_o9ZtH-VM","annotations":[{"segment":[41.454694425117005,176.99950441263653],"label":"Getting a haircut"}]},"33eH3ozXLmU":{"duration":178.864,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=33eH3ozXLmU","annotations":[{"segment":[0.20000000000000004,18.13317472698908],"label":"Scuba diving"},{"segment":[31.523826833073322,48.26214196567863],"label":"Scuba diving"},{"segment":[53.56260842433697,86.48129485179406],"label":"Scuba diving"},{"segment":[94.57148049921997,163.47754446177848],"label":"Scuba diving"}]},"uRz8FNXP3eU":{"duration":69.591,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uRz8FNXP3eU","annotations":[]},"NQyAC1aweec":{"duration":77.416,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NQyAC1aweec","annotations":[{"segment":[0.01,77.381],"label":"Clipping cat claws"}]},"LNKdVrX_0Fg":{"duration":226.72,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=LNKdVrX_0Fg","annotations":[{"segment":[60.482034407176286,170.83521999219968],"label":"Wrapping presents"}]},"UaCSf-kW2Ho":{"duration":95.133,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UaCSf-kW2Ho","annotations":[{"segment":[0.01,95.13215419501134],"label":"Playing congas"}]},"TX8FGTL1flw":{"duration":116.681,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=TX8FGTL1flw","annotations":[{"segment":[4.173630603582802,9.436037899316025],"label":"Ice fishing"},{"segment":[12.520897348538949,14.879907515591773],"label":"Ice fishing"},{"segment":[19.416465529154898,21.231088734580144],"label":"Ice fishing"},{"segment":[27.58226995356852,29.941280120621343],"label":"Ice fishing"},{"segment":[49.902135380299086,55.890391958202414],"label":"Ice fishing"},{"segment":[84.01705084385748,109.2403131595651],"label":"Ice fishing"}]},"RAaRFD5M8h4":{"duration":106.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RAaRFD5M8h4","annotations":[{"segment":[0.2,106.28832917316691],"label":"Grooming dog"}]},"UBgNp8hNG2s":{"duration":197.509,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=UBgNp8hNG2s","annotations":[]},"W4aPcuQSxFI":{"duration":111.526,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=W4aPcuQSxFI","annotations":[{"segment":[72.30636848926247,106.86200556091356],"label":"Mooping floor"}]},"IaT8-cA_AVU":{"duration":182.602,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IaT8-cA_AVU","annotations":[{"segment":[29.93853134122613,67.47803671346848],"label":"Arm wrestling"},{"segment":[77.43169344095698,116.6775399664831],"label":"Arm wrestling"},{"segment":[119.80583208083661,174.12435879370244],"label":"Arm wrestling"}]},"5QDwobEnN3g":{"duration":84.359,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5QDwobEnN3g","annotations":[{"segment":[0.2,10.101978298803969],"label":"Building sandcastles"},{"segment":[22.040682290509515,28.8627988571984],"label":"Building sandcastles"}]},"E15z95ZcEYU":{"duration":56.5,"subset":"validation","resolution":"608x1080","url":"https://www.youtube.com/watch?v=E15z95ZcEYU","annotations":[{"segment":[0.01,56.49414965986394],"label":"Getting a tattoo"}]},"jikOPvJPU-c":{"duration":210.19,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jikOPvJPU-c","annotations":[{"segment":[20.658007577445954,67.22050084724478],"label":"Making a sandwich"}]},"kTSuw4T-_Nk":{"duration":118.47,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=kTSuw4T-_Nk","annotations":[{"segment":[71.1264976599064,97.54491107644307],"label":"Shaving legs"}]},"ZoCRdAYWtKg":{"duration":148.445,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=ZoCRdAYWtKg","annotations":[{"segment":[14.313527650270839,120.04896760541308],"label":"Riding bumper cars"}]},"RJpWgi0EaUE":{"duration":210.03,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RJpWgi0EaUE","annotations":[{"segment":[51.97463928,160.95819312],"label":"Breakdancing"}]},"QsxIJ-L-JJU":{"duration":147.099,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QsxIJ-L-JJU","annotations":[]},"9fw8ODTEso4":{"duration":153.09,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=9fw8ODTEso4","annotations":[{"segment":[0,152.61130014851793],"label":"Beach soccer"}]},"CFbkVO5b2g0":{"duration":110.18,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CFbkVO5b2g0","annotations":[]},"XN7mRI1qGlQ":{"duration":62.067,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=XN7mRI1qGlQ","annotations":[{"segment":[2.224684867394696,62.021],"label":"Blow-drying hair"}]},"KanNWuKvkxs":{"duration":126.665,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KanNWuKvkxs","annotations":[{"segment":[30.017035881435255,71.29947113884556],"label":"Putting in contact lenses"}]},"Dty6AUdBdrE":{"duration":206.263,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Dty6AUdBdrE","annotations":[{"segment":[0.01,206.26285714285714],"label":"Raking leaves"}]},"FwV1XbjLJHY":{"duration":113.36,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=FwV1XbjLJHY","annotations":[{"segment":[7.250784028640057,103.27945885803433],"label":"Mixing drinks"}]},"GPWXB0wy5dY":{"duration":23.41,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=GPWXB0wy5dY","annotations":[{"segment":[0,23.369198627145085],"label":"Windsurfing"}]},"0AX436vpIBY":{"duration":83.499,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=0AX436vpIBY","annotations":[]},"OqjwVXLOSSw":{"duration":197.39,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OqjwVXLOSSw","annotations":[]},"A1k7CSI7aRs":{"duration":15.419,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=A1k7CSI7aRs","annotations":[{"segment":[0.01,14.539075206738435],"label":"Doing a powerbomb"}]},"RK0vNzp_CwI":{"duration":194.188,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RK0vNzp_CwI","annotations":[]},"CU15ndFYics":{"duration":142.269,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=CU15ndFYics","annotations":[]},"ZwIu2pPxB3Y":{"duration":39.103,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZwIu2pPxB3Y","annotations":[{"segment":[4.401861013196533,16.278579795854313],"label":"Elliptical trainer"},{"segment":[27.85076744382939,31.32242373822191],"label":"Elliptical trainer"}]},"l88zD-xVQn0":{"duration":7.849,"subset":"testing","resolution":"426x224","url":"https://www.youtube.com/watch?v=l88zD-xVQn0","annotations":[]},"4EloxAiCydc":{"duration":195.466,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4EloxAiCydc","annotations":[{"segment":[0.01,181.09215912636503],"label":"Waxing skis"}]},"_7a80bvsbk8":{"duration":53.151,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_7a80bvsbk8","annotations":[{"segment":[26.664714508580342,51.444182527301095],"label":"Bullfighting"}]},"hr8zkCXbTTk":{"duration":180.094,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=hr8zkCXbTTk","annotations":[{"segment":[22.995890965732084,139.93840965732088],"label":"Curling"}]},"L9cxYlPkUYs":{"duration":182.649,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L9cxYlPkUYs","annotations":[{"segment":[72.09046069597886,81.20862173262441],"label":"Running a marathon"},{"segment":[86.05264478334234,93.46115062561685],"label":"Running a marathon"},{"segment":[120.53069120315833,148.4550591622465],"label":"Running a marathon"}]},"37pwbUp8t1I":{"duration":200.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=37pwbUp8t1I","annotations":[{"segment":[17.784118535101403,190.63327061310454],"label":"Breakdancing"}]},"IytPbGtfo14":{"duration":81.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IytPbGtfo14","annotations":[{"segment":[10.284994716068642,79.66079758346334],"label":"Mixing drinks"}]},"g6tF83rCXh0":{"duration":41.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g6tF83rCXh0","annotations":[{"segment":[15.02044831825273,33.12201424024961],"label":"Clean and jerk"}]},"16Mq9vM7Nck":{"duration":77.462,"subset":"training","resolution":"320x180","url":"https://www.youtube.com/watch?v=16Mq9vM7Nck","annotations":[{"segment":[11.926463709122304,20.359318136533496],"label":"Croquet"},{"segment":[38.42972048098605,50.47665447593606],"label":"Croquet"},{"segment":[67.58330172321443,69.99268865945083],"label":"Croquet"}]},"immCYvN8pwQ":{"duration":198.717,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=immCYvN8pwQ","annotations":[{"segment":[28.600222158280822,174.69659267541476],"label":"Longboarding"}]},"Xa1yH3vxlAk":{"duration":137.57999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Xa1yH3vxlAk","annotations":[{"segment":[18.666558311312045,126.8038600577223],"label":"Playing bagpipes"}]},"LQLDFUFlXAc":{"duration":32.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=LQLDFUFlXAc","annotations":[]},"G_rVqf_hwXw":{"duration":17.787,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G_rVqf_hwXw","annotations":[]},"zUnv3MIOxGM":{"duration":198.183,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zUnv3MIOxGM","annotations":[]},"dygVFom6V5c":{"duration":201.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dygVFom6V5c","annotations":[{"segment":[91.42767382,174.2963728549142],"label":"Paintball"},{"segment":[28.092290730000006,89.90354914196567],"label":"Paintball"}]},"8PkoDfbK5Bc":{"duration":157.641,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=8PkoDfbK5Bc","annotations":[{"segment":[13.238842796838798,148.8143979020218],"label":"Capoeira"}]},"CvhSPJ0sBCk":{"duration":60.09,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=CvhSPJ0sBCk","annotations":[{"segment":[0,60.09324263038549],"label":"Windsurfing"}]},"atw5LkvnAyo":{"duration":146.588,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=atw5LkvnAyo","annotations":[{"segment":[0.2,141.28289859594383],"label":"Hanging wallpaper"}]},"hEoASC7qC28":{"duration":118.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hEoASC7qC28","annotations":[{"segment":[12.348591263650546,44.04945241809673],"label":"Wakeboarding"},{"segment":[56.02942901716069,105.44164118564743],"label":"Wakeboarding"}]},"YC7ye0ZqmRI":{"duration":142.385,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YC7ye0ZqmRI","annotations":[]},"YIEv6_HQtAc":{"duration":169.92000000000002,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YIEv6_HQtAc","annotations":[{"segment":[71.83978661459383,82.17835369197081],"label":"Layup drill in basketball"},{"segment":[85.35945125424064,89.86600613412293],"label":"Layup drill in basketball"},{"segment":[89.60091460842433,117.43551825897036],"label":"Layup drill in basketball"}]},"s6pFNNnub80":{"duration":210.47,"subset":"training","resolution":"1916x1080","url":"https://www.youtube.com/watch?v=s6pFNNnub80","annotations":[{"segment":[38.566946177847115,167.5610725429017],"label":"Doing nails"}]},"2bEr09bbqAA":{"duration":102.05,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=2bEr09bbqAA","annotations":[{"segment":[71.00633151856687,99.34518131745678],"label":"Kayaking"}]},"HW9c7WFW6q8":{"duration":225.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HW9c7WFW6q8","annotations":[{"segment":[39.69342379563182,218.4894654945398],"label":"Paintball"}]},"RrScnC2xQpw":{"duration":173.732,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RrScnC2xQpw","annotations":[{"segment":[24.100882069196263,72.1945721486495],"label":"Baking cookies"}]},"vJkL_4niC4w":{"duration":169.09,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=vJkL_4niC4w","annotations":[{"segment":[0.01,165.13032329728563],"label":"Playing piano"}]},"aMcqZWGHCbM":{"duration":101.17,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aMcqZWGHCbM","annotations":[]},"IRDoo2NEeF4":{"duration":48.786,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IRDoo2NEeF4","annotations":[]},"0EDEA8dZeGo":{"duration":134.212,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0EDEA8dZeGo","annotations":[{"segment":[18.244491983077488,25.979396335264884],"label":"Waterskiing"},{"segment":[69.67115335167476,82.21424149035703],"label":"Waterskiing"},{"segment":[103.1193883881608,121.72496814414457],"label":"Waterskiing"}]},"Fb-t6zr7K5c":{"duration":80.085,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Fb-t6zr7K5c","annotations":[{"segment":[11.743547581903275,67.35481903276131],"label":"Baton twirling"}]},"Av0VsTxJd78":{"duration":202.664,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Av0VsTxJd78","annotations":[]},"zh2Thxc6NVI":{"duration":102.935,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zh2Thxc6NVI","annotations":[{"segment":[58.59417316692667,78.01854290171607],"label":"Playing ice hockey"}]},"zacXKdNZHrI":{"duration":157.34,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=zacXKdNZHrI","annotations":[{"segment":[21.35482664204528,130.3380798497246],"label":"Playing accordion"}]},"0_xap_BBDrw":{"duration":48.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0_xap_BBDrw","annotations":[{"segment":[1.1228003120124805,47.382173166926684],"label":"Layup drill in basketball"}]},"kqzIDPXbATw":{"duration":156.34,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kqzIDPXbATw","annotations":[{"segment":[36.79617940717628,156.221],"label":"Cutting the grass"}]},"dFGqzBVhCBI":{"duration":88.26,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=dFGqzBVhCBI","annotations":[]},"7IIi_92vRLo":{"duration":178.54,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=7IIi_92vRLo","annotations":[{"segment":[94.97870659282371,140.65761533541342],"label":"Getting a haircut"}]},"MD6p1s-N9rM":{"duration":69.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MD6p1s-N9rM","annotations":[{"segment":[1.2009896667975564,36.79395546645866],"label":"Ironing clothes"}]},"DTWZhe352y8":{"duration":238.96,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DTWZhe352y8","annotations":[{"segment":[77.16693644929796,197.5771802808112],"label":"Wrapping presents"}]},"lJeJ2LFna5o":{"duration":35.481,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lJeJ2LFna5o","annotations":[{"segment":[2.7122064914196566,34.15166133073323],"label":"Playing beach volleyball"}]},"x2VS9ipZN1A":{"duration":34.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=x2VS9ipZN1A","annotations":[]},"V6B8zFv1DdA":{"duration":126.271,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=V6B8zFv1DdA","annotations":[{"segment":[0.2,74.14927218654856],"label":"Beach soccer"}]},"eBEnQDNyzFk":{"duration":196.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eBEnQDNyzFk","annotations":[{"segment":[58.54723139,177.12713589],"label":"Preparing pasta"}]},"5QZpCDyXNx4":{"duration":88.469,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5QZpCDyXNx4","annotations":[{"segment":[9.793901716068643,14.345996879875196],"label":"BMX"},{"segment":[16.966900156006243,20.691341653666147],"label":"BMX"},{"segment":[22.898418096723873,27.58845553822153],"label":"BMX"},{"segment":[28.554051482059283,31.17495475819033],"label":"BMX"},{"segment":[33.79585803432138,52.69395007800312],"label":"BMX"},{"segment":[57.52192979719189,60.970486739469585],"label":"BMX"},{"segment":[62.625794071762876,63.59139001560063],"label":"BMX"},{"segment":[65.24669734789393,68.14348517940718],"label":"BMX"},{"segment":[71.31615756630265,71.72998439937598],"label":"BMX"},{"segment":[76.42002184087364,81.2480015600624],"label":"BMX"},{"segment":[86.21392355694229,86.90363494539783],"label":"BMX"}]},"31KEa5VhvPs":{"duration":212.672,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=31KEa5VhvPs","annotations":[{"segment":[0.01,0.02],"label":"Curling"}]},"a1WhnMcTbrY":{"duration":57.54,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a1WhnMcTbrY","annotations":[{"segment":[40.680073,49.68858496723869],"label":"Shoveling snow"},{"segment":[8.789677485179407,38.07298091],"label":"Shoveling snow"}]},"8Kj5Whf2JyA":{"duration":60.19,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8Kj5Whf2JyA","annotations":[{"segment":[0.01,60.186122],"label":"Dodgeball"}]},"VCdZ9gfzRWo":{"duration":39.01,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=VCdZ9gfzRWo","annotations":[{"segment":[0.8520020624024962,33.77579604524181],"label":"Decorating the Christmas tree"}]},"NgBfcoVZ254":{"duration":111.572,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NgBfcoVZ254","annotations":[{"segment":[10.791664101938228,100.4320988096724],"label":"Longboarding"}]},"86iCOCtA4Ww":{"duration":129.057,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=86iCOCtA4Ww","annotations":[{"segment":[31.811120521011315,47.515344575687784],"label":"Rope skipping"},{"segment":[67.44762895277717,93.82267191640045],"label":"Rope skipping"}]},"-c71seS8bWk":{"duration":213.37,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=-c71seS8bWk","annotations":[{"segment":[196.5089956493727,201.16188391728548],"label":"Drinking coffee"},{"segment":[207.80886715716088,210.13531129111723],"label":"Drinking coffee"}]},"dG8fhIBgx9k":{"duration":81.55,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=dG8fhIBgx9k","annotations":[{"segment":[20.03389716,74.2861506],"label":"Getting a haircut"}]},"YApgx2YL9wM":{"duration":48.995,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YApgx2YL9wM","annotations":[{"segment":[0.01,48.961],"label":"Slacklining"}]},"bz9R-Xa5xqM":{"duration":124.76,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=bz9R-Xa5xqM","annotations":[{"segment":[28.41666019293833,35.61814257060078],"label":"Playing accordion"},{"segment":[72.20945843548029,75.5182476360279],"label":"Playing accordion"},{"segment":[79.99484458034321,81.74655650546022],"label":"Playing accordion"},{"segment":[97.31732917316693,105.68661948205929],"label":"Playing accordion"}]},"uyBGDfYo0qE":{"duration":133.66,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=uyBGDfYo0qE","annotations":[{"segment":[0.01,128.54559648508388],"label":"Vacuuming floor"}]},"PI1ZaFQ28Ao":{"duration":68.17,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=PI1ZaFQ28Ao","annotations":[{"segment":[15.88408482,55.53296968000001],"label":"Washing dishes"}]},"OqLUp37WKMA":{"duration":69.126,"subset":"validation","resolution":"540x360","url":"https://www.youtube.com/watch?v=OqLUp37WKMA","annotations":[{"segment":[0.01,69.12580498866213],"label":"Futsal"}]},"yE6IDGXoopw":{"duration":228.53,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yE6IDGXoopw","annotations":[]},"jLcYOkRvdic":{"duration":111.34,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jLcYOkRvdic","annotations":[{"segment":[0.914429861435079,29.703200677634456],"label":"Shoveling snow"},{"segment":[56.93113452187122,111.339704],"label":"Shoveling snow"}]},"D5jLypnn6Ps":{"duration":236.844,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D5jLypnn6Ps","annotations":[{"segment":[0.01,2.5864348859668675],"label":"Hurling"},{"segment":[8.498286053891135,16.627081409787003],"label":"Hurling"},{"segment":[33.62365351756928,48.77277213537521],"label":"Hurling"},{"segment":[73.52864890105808,113.4336442845469],"label":"Hurling"},{"segment":[126.7353094123765,161.0979443259363],"label":"Hurling"},{"segment":[169.59623037982743,188.070764950078],"label":"Hurling"},{"segment":[210.97918818564742,222.43339980343214],"label":"Hurling"}]},"Fe-DUewB8ok":{"duration":20.48,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Fe-DUewB8ok","annotations":[{"segment":[3.067207488299532,19.201996879875196],"label":"Doing karate"}]},"_CIoHeD7KQc":{"duration":122.787,"subset":"testing","resolution":"2276x720","url":"https://www.youtube.com/watch?v=_CIoHeD7KQc","annotations":[]},"s0ekpZ8H3k8":{"duration":183.37,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=s0ekpZ8H3k8","annotations":[]},"7qBA7XPDsC4":{"duration":32.23,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=7qBA7XPDsC4","annotations":[{"segment":[5.0204368174726985,25.403410296411856],"label":"Powerbocking"}]},"E5uAH7_WTFs":{"duration":9.43,"subset":"training","resolution":"384x226","url":"https://www.youtube.com/watch?v=E5uAH7_WTFs","annotations":[{"segment":[3.7482676771339944,9.283903722305334],"label":"Playing water polo"}]},"LFOlEafI35c":{"duration":16.069,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LFOlEafI35c","annotations":[{"segment":[2.230917444773791,9.049002219812794],"label":"Arm wrestling"}]},"-fjUWhSM6Hc":{"duration":198.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-fjUWhSM6Hc","annotations":[{"segment":[157.4063694,169.5282687],"label":"Javelin throw"},{"segment":[31.275609749999997,148.40509634999998],"label":"Javelin throw"}]},"Lm4oeMdqOgw":{"duration":81.36,"subset":"validation","resolution":"318x240","url":"https://www.youtube.com/watch?v=Lm4oeMdqOgw","annotations":[{"segment":[23.122550702028082,31.611377535101404],"label":"Javelin throw"},{"segment":[45.54825741029641,51.12300936037442],"label":"Javelin throw"}]},"cukeWRPJzf8":{"duration":74.142,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=cukeWRPJzf8","annotations":[{"segment":[4.857933210608424,47.538346418096715],"label":"Swimming"}]},"7O9kkDxEvaY":{"duration":82.39,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=7O9kkDxEvaY","annotations":[{"segment":[3.853510109204368,71.77870768],"label":"Brushing hair"}]},"sBwUwgTjhAU":{"duration":65.46,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sBwUwgTjhAU","annotations":[]},"YufXlj-WpEw":{"duration":144.73,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YufXlj-WpEw","annotations":[{"segment":[15.80514572963871,139.08528230140405],"label":"Triple jump"}]},"U7QjLGMeGOo":{"duration":73.259,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U7QjLGMeGOo","annotations":[{"segment":[0.01,5.600114369734789],"label":"Playing ten pins"},{"segment":[10.057348255850235,12.00024507800312],"label":"Playing ten pins"},{"segment":[22.74332162402496,25.029082591263652],"label":"Playing ten pins"},{"segment":[53.258230536661465,56.6868719875195],"label":"Playing ten pins"},{"segment":[68.45854096879876,72.11575851638065],"label":"Playing ten pins"}]},"cvatsuUhqhk":{"duration":212.301,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cvatsuUhqhk","annotations":[{"segment":[8.609650546021841,195.72241965678626],"label":"Table soccer"}]},"7J6cZ_Gz8q4":{"duration":184.2,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=7J6cZ_Gz8q4","annotations":[{"segment":[1.436845773150654,101.15394242980604],"label":"Playing saxophone"},{"segment":[117.82135268330734,179.60572055382215],"label":"Playing saxophone"}]},"IZCzbslH8jo":{"duration":163.17000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IZCzbslH8jo","annotations":[{"segment":[65.49809572,149.97149735335412],"label":"Playing field hockey"},{"segment":[28.15755662,52.404024220000004],"label":"Playing field hockey"}]},"AE2gHOwcV14":{"duration":66.9,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AE2gHOwcV14","annotations":[]},"jnnHUCs-t3o":{"duration":26.52,"subset":"training","resolution":"270x360","url":"https://www.youtube.com/watch?v=jnnHUCs-t3o","annotations":[{"segment":[6.304204260000001,24.76342109516381],"label":"Playing water polo"},{"segment":[1.7881197,4.97052672],"label":"Playing water polo"}]},"zdDWwJTRiv0":{"duration":166.162,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=zdDWwJTRiv0","annotations":[]},"yw9W5dZ6T18":{"duration":100.64,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=yw9W5dZ6T18","annotations":[{"segment":[7.3788740312012475,75.5157108299532],"label":"Preparing salad"}]},"RI-l0tK8Ok0":{"duration":74.68,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RI-l0tK8Ok0","annotations":[{"segment":[8.504371652106084,67.45248200780031],"label":"Playing piano"}]},"5Eb_NVjFah0":{"duration":110.898,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=5Eb_NVjFah0","annotations":[{"segment":[11.297137383205115,103.19433841747302],"label":"Playing ten pins"}]},"Y-2nhi8JdO8":{"duration":187.08,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y-2nhi8JdO8","annotations":[{"segment":[8.609912091721764,154.24876408389667],"label":"Playing guitarra"}]},"RTbvrcdPcrs":{"duration":129.82,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=RTbvrcdPcrs","annotations":[{"segment":[17.417719054340402,129.8227664399093],"label":"Doing step aerobics"}]},"rRyjcL-OpIM":{"duration":31.046,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rRyjcL-OpIM","annotations":[]},"MpGK4SjpuGY":{"duration":26.332,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MpGK4SjpuGY","annotations":[]},"DgIDfZB16Do":{"duration":96.085,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DgIDfZB16Do","annotations":[]},"ctHNLEOtgPY":{"duration":237.01,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ctHNLEOtgPY","annotations":[{"segment":[20.45598786,132.48872406],"label":"Breakdancing"},{"segment":[157.45967622,208.84433118564743],"label":"Breakdancing"}]},"u1ZFiwJJXSQ":{"duration":108.53,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=u1ZFiwJJXSQ","annotations":[{"segment":[9.7904913,94.43634916380655],"label":"Doing step aerobics"}]},"TEgPIueNqTI":{"duration":147.45,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=TEgPIueNqTI","annotations":[]},"ez9pf35BMtc":{"duration":127.85,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ez9pf35BMtc","annotations":[{"segment":[6.3820780031201245,44.27566614664587],"label":"Using parallel bars"},{"segment":[86.57047425897035,98.93768486739471],"label":"Using parallel bars"}]},"JxEnKQ7iXUI":{"duration":129.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JxEnKQ7iXUI","annotations":[{"segment":[3.6369765899999997,127.58198582371297],"label":"Ballet"}]},"CG-7jcSB5_c":{"duration":36.92,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=CG-7jcSB5_c","annotations":[{"segment":[4.03229318,17.09977323],"label":"Playing lacrosse"},{"segment":[18.91877236,28.287337799999996],"label":"Playing lacrosse"}]},"w-Rccu4-xyA":{"duration":223.21,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=w-Rccu4-xyA","annotations":[]},"IoOkg-_S4cY":{"duration":78.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IoOkg-_S4cY","annotations":[{"segment":[4.254656786271451,74.6579937597504],"label":"Using the balance beam"}]},"i_tMiGS11fs":{"duration":156.248,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i_tMiGS11fs","annotations":[{"segment":[0.01,132.79039781591266],"label":"Doing fencing"}]},"EJxRNuLtEP4":{"duration":16.371,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EJxRNuLtEP4","annotations":[]},"x2CZHFxoZaM":{"duration":167.904,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x2CZHFxoZaM","annotations":[{"segment":[39.03947229460357,42.43937760044346],"label":"Running a marathon"},{"segment":[47.40846997051714,50.80837527635702],"label":"Running a marathon"},{"segment":[68.59249533767334,70.42321357927943],"label":"Running a marathon"},{"segment":[90.82264541431874,95.26867542964783],"label":"Running a marathon"},{"segment":[152.28247039804566,160.38993680614536],"label":"Running a marathon"}]},"fQ1OuD7x3to":{"duration":83.17,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=fQ1OuD7x3to","annotations":[]},"aEWVDbV76_Q":{"duration":157.687,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aEWVDbV76_Q","annotations":[{"segment":[29.507831513260534,125.91605616224649],"label":"Wakeboarding"}]},"5P9Tbut04xE":{"duration":141.74,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5P9Tbut04xE","annotations":[]},"Gba1MksaNBQ":{"duration":192.958,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Gba1MksaNBQ","annotations":[{"segment":[0,192.957822],"label":"Playing pool"}]},"2Dlfwnvz76Q":{"duration":77.53,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=2Dlfwnvz76Q","annotations":[{"segment":[5.862623479719189,68.59873865444618],"label":"Playing harmonica"}]},"QHmZWkRK528":{"duration":202.06,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QHmZWkRK528","annotations":[{"segment":[2.679421574141877,188.66280142634275],"label":"Cheerleading"}]},"vdq_xoRyxCU":{"duration":70.752,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vdq_xoRyxCU","annotations":[{"segment":[22.678106846563903,57.732709030651264],"label":"Snow tubing"}]},"lxlXHOHSOlk":{"duration":103.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lxlXHOHSOlk","annotations":[{"segment":[16.31772323868955,94.19042225897036],"label":"Sailing"}]},"bFRqNqyVznA":{"duration":151.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bFRqNqyVznA","annotations":[{"segment":[41.253642084243374,75.15749736037442],"label":"Polishing forniture"},{"segment":[90.80543056474261,104.31955469578783],"label":"Polishing forniture"},{"segment":[114.75151016536662,124.70928584087363],"label":"Polishing forniture"}]},"b1wnLw3H1vo":{"duration":53.57,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=b1wnLw3H1vo","annotations":[{"segment":[0.08357009984399377,53.15058350078003],"label":"Doing kickboxing"}]},"Lp3c3nwHrqM":{"duration":215.62,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lp3c3nwHrqM","annotations":[{"segment":[38.8827546,194.44180360000001],"label":"Washing dishes"}]},"m22TDwfa8go":{"duration":183.86,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=m22TDwfa8go","annotations":[{"segment":[89.565620124805,147.17422698907956],"label":"Mixing drinks"}]},"a39_RoOBkX0":{"duration":11.43,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=a39_RoOBkX0","annotations":[{"segment":[0.08911244929797192,10.746961385335412],"label":"Washing dishes"}]},"UYzgXZIRUR8":{"duration":193.748,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UYzgXZIRUR8","annotations":[{"segment":[14.508378159126366,62.26512293291732],"label":"Longboarding"},{"segment":[93.39768439937598,169.86892761310452],"label":"Longboarding"}]},"FDBimzPAudY":{"duration":86.5,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FDBimzPAudY","annotations":[]},"8lMncYsbrWw":{"duration":123.32,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=8lMncYsbrWw","annotations":[]},"POvVSjY_8HU":{"duration":82.199,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=POvVSjY_8HU","annotations":[{"segment":[1.1515093457943926,80.24135046728972],"label":"Hand car wash"}]},"v-mNvC8zVhE":{"duration":141.456,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=v-mNvC8zVhE","annotations":[{"segment":[0.2,136.78786271450858],"label":"Hanging wallpaper"}]},"NSMAftE6fb4":{"duration":38.546,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NSMAftE6fb4","annotations":[{"segment":[1.1718556942277691,31.399723088923558],"label":"Surfing"}]},"s7xcZopLHO8":{"duration":69.94,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=s7xcZopLHO8","annotations":[]},"w5J3Gt5WLwU":{"duration":142.99,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=w5J3Gt5WLwU","annotations":[{"segment":[5.353702866482006,139.1962738221529],"label":"Playing water polo"}]},"Q_Rufuciohg":{"duration":63.484,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Q_Rufuciohg","annotations":[{"segment":[10.993217655236823,47.14208577223089],"label":"Peeling potatoes"}]},"OBDq689jDDY":{"duration":61.25,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=OBDq689jDDY","annotations":[{"segment":[18.67266936,58.440717628705144],"label":"Doing step aerobics"}]},"Riww8oJ_gdE":{"duration":53.76,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Riww8oJ_gdE","annotations":[]},"P7Z4i1gmzjs":{"duration":12.33,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=P7Z4i1gmzjs","annotations":[]},"XI6sjZg9Yeg":{"duration":236.983,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XI6sjZg9Yeg","annotations":[{"segment":[0,168.21685486467078],"label":"Fixing the roof"},{"segment":[192.98724887771021,236.982426],"label":"Fixing the roof"}]},"4hbMYlgO8_o":{"duration":3.576,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4hbMYlgO8_o","annotations":[{"segment":[0,3.575872],"label":"Cutting the grass"}]},"D2JvqkKa-qM":{"duration":14.12,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=D2JvqkKa-qM","annotations":[{"segment":[0.022024541341653665,11.71705599375975],"label":"Springboard diving"}]},"Y_ITwC4_pEY":{"duration":124.065,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y_ITwC4_pEY","annotations":[{"segment":[15.67735009048362,64.06423308580342],"label":"Playing drums"},{"segment":[72.19322942901715,96.77376599063963],"label":"Playing drums"}]},"Nx4rK_jvvR4":{"duration":28.19,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Nx4rK_jvvR4","annotations":[{"segment":[4.301297971918877,23.701029641185645],"label":"Ballet"}]},"cNR7JAcbjH8":{"duration":153.066,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cNR7JAcbjH8","annotations":[]},"Oe1tNvkhrI8":{"duration":61.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Oe1tNvkhrI8","annotations":[{"segment":[1.1588966864274572,60.938650761310456],"label":"Mowing the lawn"}]},"eoSwoeLmeyQ":{"duration":30.14,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eoSwoeLmeyQ","annotations":[]},"-6dz6tBH77I":{"duration":8.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-6dz6tBH77I","annotations":[]},"V9mLJJn8Lvw":{"duration":134.12,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=V9mLJJn8Lvw","annotations":[]},"ZdUUBA1czgY":{"duration":18.48,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZdUUBA1czgY","annotations":[{"segment":[7.900725411329378,16.118633229682928],"label":"Discus throw"}]},"6h-WE-0eBlA":{"duration":194.723,"subset":"training","resolution":"566x360","url":"https://www.youtube.com/watch?v=6h-WE-0eBlA","annotations":[{"segment":[2.725500822355654,39.06557339882014],"label":"Playing pool"},{"segment":[60.56678300656163,81.46232473802871],"label":"Playing pool"},{"segment":[110.83721673733751,115.07689187125835],"label":"Playing pool"}]},"JfF80Uho8U8":{"duration":143.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JfF80Uho8U8","annotations":[{"segment":[46.70071359714527,134.62464585571226],"label":"Making a sandwich"}]},"H4spfNy_LG4":{"duration":149.258,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=H4spfNy_LG4","annotations":[{"segment":[10.711173089100434,139.47810036349455],"label":"Rafting"}]},"YKLZAgdkFrk":{"duration":13.561,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=YKLZAgdkFrk","annotations":[{"segment":[0.5906209048361934,12.909285491419658],"label":"Swinging at the playground"}]},"eFcpy2RClJQ":{"duration":670.895,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eFcpy2RClJQ","annotations":[]},"NG2WCtJ8DMA":{"duration":22.733,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NG2WCtJ8DMA","annotations":[]},"kzdRUWpBXd4":{"duration":129.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kzdRUWpBXd4","annotations":[{"segment":[15.69698144,68.94167983999999],"label":"Dodgeball"},{"segment":[75.51782636,109.70190443999999],"label":"Dodgeball"}]},"3uWV7MOoNk0":{"duration":106.023,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3uWV7MOoNk0","annotations":[]},"4KMG6lEDSIM":{"duration":84.24,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4KMG6lEDSIM","annotations":[]},"fbIEeQknsuo":{"duration":223.585,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fbIEeQknsuo","annotations":[{"segment":[0.01,219.54486709838503],"label":"Camel ride"}]},"42XFIWVIWpw":{"duration":172.687,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=42XFIWVIWpw","annotations":[{"segment":[22.62116068642746,158.3481248049922],"label":"Baton twirling"}]},"8uV6u0QcTSs":{"duration":64.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8uV6u0QcTSs","annotations":[{"segment":[0,63.13274506084243],"label":"Putting on makeup"}]},"jEOtcfk220s":{"duration":41.22,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jEOtcfk220s","annotations":[]},"4aBJ_L0u7Lo":{"duration":141.828,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=4aBJ_L0u7Lo","annotations":[{"segment":[31.408583463338534,76.53077379095164],"label":"Shuffleboard"},{"segment":[87.59013416536662,132.95226365054603],"label":"Shuffleboard"}]},"iA2Q4t-o58w":{"duration":219.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iA2Q4t-o58w","annotations":[{"segment":[44.18867698,169.84344359999997],"label":"Cheerleading"},{"segment":[69.40426697999999,161.84242726],"label":"Tumbling"}]},"LnqAgjTpi0k":{"duration":129.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LnqAgjTpi0k","annotations":[{"segment":[37.643751893915756,71.23978853042122],"label":"Running a marathon"},{"segment":[77.7161329422777,90.26405024024962],"label":"Running a marathon"},{"segment":[99.5737953322933,103.01435330109204],"label":"Running a marathon"}]},"KNyWPCoHEng":{"duration":164.723,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=KNyWPCoHEng","annotations":[{"segment":[0.01,164.681],"label":"Hand car wash"}]},"py26bxAfOEg":{"duration":227.185,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=py26bxAfOEg","annotations":[{"segment":[8.15168928934028,36.50539095943838],"label":"River tubing"},{"segment":[44.65708020280811,77.61825844773792],"label":"River tubing"},{"segment":[90.02300294851794,131.8447129797192],"label":"River tubing"},{"segment":[137.86987459438376,166.93241885335414],"label":"River tubing"},{"segment":[192.09632912636505,195.64054184087362],"label":"River tubing"},{"segment":[205.2099161700468,220.80445211388454],"label":"River tubing"}]},"EkwX8Swnut8":{"duration":231.294,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EkwX8Swnut8","annotations":[{"segment":[114.89899921996881,121.75325663026521],"label":"Skiing"},{"segment":[125.72151092043683,138.34777457098286],"label":"Skiing"},{"segment":[149.89178705148208,163.9610522620905],"label":"Skiing"},{"segment":[169.3723081123245,184.52382449297974],"label":"Skiing"}]},"OVRYoPA6h-Y":{"duration":197.463,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OVRYoPA6h-Y","annotations":[{"segment":[7.197231571107155,11.503267273030708],"label":"Croquet"},{"segment":[51.48788450517798,66.25143548320159],"label":"Croquet"},{"segment":[73.63321097221339,79.47711656768107],"label":"Croquet"},{"segment":[92.70279765216056,119.76930777853717],"label":"Croquet"},{"segment":[132.9949879665169,141.6070593123113],"label":"Croquet"},{"segment":[154.8327403076384,162.21451574689073],"label":"Croquet"}]},"udIYvmhOlOM":{"duration":107.09,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=udIYvmhOlOM","annotations":[]},"V9xuy-rVj9w":{"duration":193.678,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V9xuy-rVj9w","annotations":[{"segment":[22.082043825278053,23.290639238482335],"label":"Snowboarding"},{"segment":[32.05295598421339,186.14887023932252],"label":"Snowboarding"}]},"pq2xsK79FcQ":{"duration":42.87,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=pq2xsK79FcQ","annotations":[{"segment":[22.757192667706708,26.7004360374415],"label":"Long jump"}]},"nrvB8pcrY7o":{"duration":175.404,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nrvB8pcrY7o","annotations":[{"segment":[58.27792979719189,65.9388783151326],"label":"Waxing skis"},{"segment":[79.34553822152886,128.320887675507],"label":"Waxing skis"},{"segment":[141.18033697347894,154.31339157566302],"label":"Waxing skis"},{"segment":[163.88957722308893,170.72970982839314],"label":"Waxing skis"}]},"_-_SwG4EnRU":{"duration":23.847,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_-_SwG4EnRU","annotations":[{"segment":[0.2,22.933132643594192],"label":"Mooping floor"}]},"5nvximBXhFI":{"duration":190.938,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5nvximBXhFI","annotations":[{"segment":[8.905800933125972,155.2740637636081],"label":"Longboarding"}]},"cNvTm72aGcM":{"duration":195.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cNvTm72aGcM","annotations":[{"segment":[0.6091525075969025,192.18761614682276],"label":"Belly dance"}]},"x9VlxLDK-VM":{"duration":121.39,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=x9VlxLDK-VM","annotations":[{"segment":[1.3256746443057723,120.06824635569424],"label":"Washing dishes"}]},"wYHU4F2dLAc":{"duration":97.22,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=wYHU4F2dLAc","annotations":[]},"TP8lUusp66Y":{"duration":191.89,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=TP8lUusp66Y","annotations":[{"segment":[47.59822610920436,168.24027090171606],"label":"Playing field hockey"}]},"L4mSCZwXelk":{"duration":148.655,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=L4mSCZwXelk","annotations":[{"segment":[10.426591263650545,141.12553666146647],"label":"Futsal"}]},"5koLOwu786I":{"duration":60.628,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=5koLOwu786I","annotations":[{"segment":[10.309478641185647,44.6428799875195],"label":"Blow-drying hair"},{"segment":[47.574933546021846,58.92481828861155],"label":"Blow-drying hair"}]},"z4_OuffZPAo":{"duration":28.352,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=z4_OuffZPAo","annotations":[{"segment":[0.01,22.955478496099843],"label":"Peeling potatoes"}]},"pFh-8ICMTFY":{"duration":72.865,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pFh-8ICMTFY","annotations":[{"segment":[16.141532186457525,62.40634510296412],"label":"Elliptical trainer"}]},"tY6UFSLtIoE":{"duration":231.97,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tY6UFSLtIoE","annotations":[{"segment":[11.36406333,99.63678551000001],"label":"Cumbia"}]},"aMexiSP5Vjc":{"duration":91.185,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=aMexiSP5Vjc","annotations":[{"segment":[0.01,88.07457404839585],"label":"Elliptical trainer"}]},"saMBbZcDAjM":{"duration":22.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=saMBbZcDAjM","annotations":[{"segment":[0.01,22.082175],"label":"Ping-pong"}]},"GfqUICvzbzk":{"duration":196.93,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=GfqUICvzbzk","annotations":[]},"bESksIhdEvo":{"duration":23.592,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=bESksIhdEvo","annotations":[{"segment":[0.01,23.521],"label":"Cleaning sink"}]},"-doxoUNGLJE":{"duration":206.565,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-doxoUNGLJE","annotations":[{"segment":[0.01,194.96316976599064],"label":"Changing car wheel"}]},"JZqQjiUpOSw":{"duration":91.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=JZqQjiUpOSw","annotations":[]},"7j8cTyXi5a4":{"duration":83.92,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7j8cTyXi5a4","annotations":[{"segment":[35.134963338533545,71.25135023400937],"label":"Cleaning shoes"}]},"-9dlY4JGGR0":{"duration":145.357,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=-9dlY4JGGR0","annotations":[]},"lSVj34-DR0o":{"duration":141.22,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=lSVj34-DR0o","annotations":[]},"xb8iMASjw1A":{"duration":131.96,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xb8iMASjw1A","annotations":[{"segment":[6.5876568994732585,11.528399574078202],"label":"Snowboarding"},{"segment":[13.998770911380674,15.233956580031911],"label":"Snowboarding"},{"segment":[17.292599361117304,24.29198481680764],"label":"Snowboarding"},{"segment":[95.72688912636505,111.99016706396256],"label":"Snowboarding"}]},"fBov3cwPEho":{"duration":103.399,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fBov3cwPEho","annotations":[{"segment":[0.01,95.24898384357246],"label":"Decorating the Christmas tree"}]},"agIcVWDyeH0":{"duration":86.936,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=agIcVWDyeH0","annotations":[{"segment":[37.112252730109205,75.44352106084243],"label":"Putting on shoes"}]},"ozgrP6Znupc":{"duration":94.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ozgrP6Znupc","annotations":[{"segment":[20.45304852730109,94.31945578231293],"label":"Playing lacrosse"}]},"K-E0UXNscjU":{"duration":44.397,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=K-E0UXNscjU","annotations":[{"segment":[0.01,44.396552],"label":"Throwing darts"}]},"kMkHZ_nGsPQ":{"duration":19.6,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kMkHZ_nGsPQ","annotations":[]},"3bTGq7QDjUg":{"duration":63.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3bTGq7QDjUg","annotations":[{"segment":[0.01,63.144919777094756],"label":"River tubing"}]},"IY1bVy1yZv4":{"duration":72.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IY1bVy1yZv4","annotations":[{"segment":[49.1279657,55.6442359],"label":"Fixing bicycle"},{"segment":[49.1279657,55.6442359],"label":"Fixing bicycle"}]},"iEaiLh3GZA8":{"duration":225.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iEaiLh3GZA8","annotations":[{"segment":[32.661347405616226,45.65564691107644],"label":"Polishing shoes"}]},"gWz4P3Jnis8":{"duration":174.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gWz4P3Jnis8","annotations":[{"segment":[44.80401318093541,48.334026340645465],"label":"Smoking hookah"},{"segment":[81.46184214715528,86.07801320216075],"label":"Smoking hookah"},{"segment":[124.36507849609985,127.08047322308893],"label":"Smoking hookah"}]},"UYUvjnlU9Ho":{"duration":202.827,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UYUvjnlU9Ho","annotations":[{"segment":[29.110769524658537,37.33772586271451],"label":"Croquet"},{"segment":[73.09334469734789,158.84354561934478],"label":"Croquet"}]},"pSWcVR96xlc":{"duration":4.06,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=pSWcVR96xlc","annotations":[{"segment":[0.10776809204368175,3.638757931357254],"label":"Playing water polo"}]},"ePaIRTk2geU":{"duration":185.249,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ePaIRTk2geU","annotations":[]},"ypfWA4aDL6k":{"duration":111.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ypfWA4aDL6k","annotations":[]},"oN_5o1t_Lvk":{"duration":217.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oN_5o1t_Lvk","annotations":[{"segment":[0.01,217.481],"label":"Tumbling"}]},"rV9RzL8o7tk":{"duration":47.926,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rV9RzL8o7tk","annotations":[{"segment":[11.30217165440693,43.25282832107359],"label":"Playing pool"}]},"7gSltoRSFCU":{"duration":198.461,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7gSltoRSFCU","annotations":[]},"3q_MOQNfSmA":{"duration":210.745,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3q_MOQNfSmA","annotations":[{"segment":[20.15529715166712,194.13424218281978],"label":"Croquet"}]},"uHLEUps_ahs":{"duration":234.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uHLEUps_ahs","annotations":[{"segment":[35.86509516380655,47.21017628705148],"label":"Playing saxophone"},{"segment":[125.52783307332292,135.40903276131044],"label":"Playing saxophone"},{"segment":[147.48605460218408,211.53086739469578],"label":"Playing saxophone"}]},"g4G1gg-9y7w":{"duration":114.82,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g4G1gg-9y7w","annotations":[{"segment":[26.332189287571502,94.58092335725429],"label":"Getting a piercing"}]},"lCIJJgxTs2U":{"duration":62.58,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lCIJJgxTs2U","annotations":[{"segment":[3.93984792,60.02497367706708],"label":"High jump"}]},"GHBeLaysVaA":{"duration":136.14,"subset":"validation","resolution":"852x480","url":"https://www.youtube.com/watch?v=GHBeLaysVaA","annotations":[{"segment":[0,133.16520719344774],"label":"Ballet"}]},"DFaolyuIgZ4":{"duration":109.22999999999999,"subset":"testing","resolution":"240x180","url":"https://www.youtube.com/watch?v=DFaolyuIgZ4","annotations":[]},"1FKoT6uFsjY":{"duration":184.947,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1FKoT6uFsjY","annotations":[{"segment":[47.02359282371295,160.97647113884554],"label":"Putting in contact lenses"}]},"0jdSl5-lMpY":{"duration":173.779,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0jdSl5-lMpY","annotations":[]},"L21zcZlFfIY":{"duration":201.76,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=L21zcZlFfIY","annotations":[{"segment":[0,201.721],"label":"Dodgeball"}]},"qpqFF-Hs1vU":{"duration":127.32,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=qpqFF-Hs1vU","annotations":[]},"TxYZLJQOHvY":{"duration":24.498,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TxYZLJQOHvY","annotations":[{"segment":[0.2,3.3199797191887677],"label":"Fun sliding down"},{"segment":[18.50793291731669,23.621464898595942],"label":"Fun sliding down"}]},"Pzrh78MZPJg":{"duration":226.024,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Pzrh78MZPJg","annotations":[]},"SiBSwSu_REQ":{"duration":222.006,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SiBSwSu_REQ","annotations":[{"segment":[20.43424835768941,219.2352394773791],"label":"Baton twirling"}]},"qYpb_3oAM0k":{"duration":95.32,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=qYpb_3oAM0k","annotations":[{"segment":[25.322752032761308,92.45403601404057],"label":"Playing accordion"}]},"mL7Ipu-HJ5M":{"duration":123.74,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=mL7Ipu-HJ5M","annotations":[{"segment":[17.7520576349454,121.17708907332293],"label":"Preparing pasta"}]},"KMB-MNnnINk":{"duration":119.142,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=KMB-MNnnINk","annotations":[]},"mQCFphhDFw8":{"duration":140.27,"subset":"training","resolution":"1920x1072","url":"https://www.youtube.com/watch?v=mQCFphhDFw8","annotations":[{"segment":[16.631283460862246,124.73462595646683],"label":"Fixing bicycle"}]},"iNhoeDq_EBQ":{"duration":144.73,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=iNhoeDq_EBQ","annotations":[{"segment":[4.514227769110765,137.92872230889236],"label":"Tumbling"}]},"V26HGC2a0ME":{"duration":88.561,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=V26HGC2a0ME","annotations":[]},"t1MXansbY5s":{"duration":45.001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t1MXansbY5s","annotations":[{"segment":[13.51402013223386,14.637263364003948],"label":"Putting on shoes"},{"segment":[23.974222728092798,26.08030378766171],"label":"Putting on shoes"}]},"2tpwfPdSEVo":{"duration":204.754,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=2tpwfPdSEVo","annotations":[{"segment":[0.01,189.3908783151326],"label":"Clipping cat claws"}]},"asWvAGhlbdQ":{"duration":180.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=asWvAGhlbdQ","annotations":[{"segment":[37.54082483619345,132.94532705148208],"label":"Sharpening knives"}]},"QESBvmN1hcM":{"duration":114.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QESBvmN1hcM","annotations":[{"segment":[1.429856552085213,6.970550691415412],"label":"Playing water polo"},{"segment":[9.115335519543232,16.264618279969298],"label":"Playing water polo"},{"segment":[20.19672379820363,21.805312419299494],"label":"Playing water polo"},{"segment":[53.97708484121678,60.232707256589585],"label":"Playing water polo"},{"segment":[67.73945342433697,82.93167912636505],"label":"Playing water polo"}]},"QxPi0RyYnqY":{"duration":215.853,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=QxPi0RyYnqY","annotations":[]},"AJ9GUwEPkdI":{"duration":83.336,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=AJ9GUwEPkdI","annotations":[]},"dcARAWmvWSo":{"duration":215.643,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dcARAWmvWSo","annotations":[{"segment":[0.01,215.601],"label":"Roof shingle removal"}]},"MnzVdvbvnMc":{"duration":52.18,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MnzVdvbvnMc","annotations":[{"segment":[20.837536587177773,51.11708184711389],"label":"Polishing shoes"}]},"PNdG3SUdJzc":{"duration":52.25,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PNdG3SUdJzc","annotations":[{"segment":[14.868103034321372,51.52917900936037],"label":"Starting a campfire"}]},"rjnulRGsve8":{"duration":60.86,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rjnulRGsve8","annotations":[]},"V08Q-DbT0GQ":{"duration":186.85,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V08Q-DbT0GQ","annotations":[]},"7uhJ_0oKV6s":{"duration":61.998,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=7uhJ_0oKV6s","annotations":[{"segment":[0.01,52.43009485257234],"label":"Rope skipping"}]},"bsF9yyz1jH0":{"duration":185.156,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=bsF9yyz1jH0","annotations":[]},"YCrSiqY-TVw":{"duration":203.779,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YCrSiqY-TVw","annotations":[{"segment":[30.51906211544462,34.65185177691108],"label":"Curling"},{"segment":[66.12463458346335,79.79463115600625],"label":"Curling"},{"segment":[129.70601399063963,138.9253140046802],"label":"Curling"}]},"s1WQ4zNikKA":{"duration":93.3,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=s1WQ4zNikKA","annotations":[{"segment":[6.911441759999999,35.10206328],"label":"Shaving legs"},{"segment":[41.04321624,86.48537441497659],"label":"Shaving legs"}]},"sBumUzHPg2s":{"duration":134.86,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sBumUzHPg2s","annotations":[]},"eDm8bUVWxAM":{"duration":50.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eDm8bUVWxAM","annotations":[{"segment":[9.667683449999998,46.20512840171607],"label":"Archery"}]},"wfyHIL89Hmg":{"duration":35.6,"subset":"testing","resolution":"400x300","url":"https://www.youtube.com/watch?v=wfyHIL89Hmg","annotations":[]},"84OwFujqHyw":{"duration":60.84,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=84OwFujqHyw","annotations":[{"segment":[0.01,10.718429017160688],"label":"Playing polo"}]},"sOyox_6AG1w":{"duration":117.981,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sOyox_6AG1w","annotations":[{"segment":[7.546306995163806,21.35052710826833],"label":"Running a marathon"},{"segment":[27.9765527625585,36.811253634945395],"label":"Running a marathon"},{"segment":[42.14888541201248,53.00820523432137],"label":"Running a marathon"},{"segment":[58.89800581591263,73.62250726989079],"label":"Running a marathon"},{"segment":[78.96013904695788,88.89917752839314],"label":"Running a marathon"},{"segment":[93.86869676911076,110.98592970936036],"label":"Running a marathon"}]},"iuc48VIWLJY":{"duration":47.903,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=iuc48VIWLJY","annotations":[]},"cduejHfXPDc":{"duration":166.46,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cduejHfXPDc","annotations":[{"segment":[37.006309497277854,108.16230109204369],"label":"Painting"}]},"6cPXFUqRB1s":{"duration":56.19,"subset":"validation","resolution":"854x478","url":"https://www.youtube.com/watch?v=6cPXFUqRB1s","annotations":[{"segment":[3.7277685000000003,54.75916536661467],"label":"Hammer throw"}]},"J2gJYNO2qh8":{"duration":139.274,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J2gJYNO2qh8","annotations":[{"segment":[14.33064898595944,17.153352574102964],"label":"Making a cake"},{"segment":[24.752939157566303,39.08920436817472],"label":"Making a cake"},{"segment":[51.68461466458658,62.54272698907956],"label":"Making a cake"},{"segment":[70.79489235569423,92.51111700468019],"label":"Making a cake"},{"segment":[96.20287519500779,103.1520670826833],"label":"Making a cake"}]},"AguxVxEWEs0":{"duration":118.98,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AguxVxEWEs0","annotations":[]},"ytvIoI5uwFA":{"duration":98.407,"subset":"training","resolution":"320x180","url":"https://www.youtube.com/watch?v=ytvIoI5uwFA","annotations":[{"segment":[11.05342290426311,58.79806798439938],"label":"Arm wrestling"}]},"e0yLhjXQIXI":{"duration":157.78,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=e0yLhjXQIXI","annotations":[]},"QVdsLRKpCT0":{"duration":60.65,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QVdsLRKpCT0","annotations":[{"segment":[3.0254394009360377,60.03606311232449],"label":"Using parallel bars"}]},"bzHSF5GJc8o":{"duration":72.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bzHSF5GJc8o","annotations":[{"segment":[1.1345541185647425,71.02308782215289],"label":"Volleyball"}]},"9ZGy24Geu5k":{"duration":114.893,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9ZGy24Geu5k","annotations":[{"segment":[3.2262975070202806,113.27889024648985],"label":"Surfing"}]},"BJ9r8_JnG0k":{"duration":224.027,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BJ9r8_JnG0k","annotations":[{"segment":[80.74710253795179,114.94423303293274],"label":"Using the monkey bar"},{"segment":[125.41274236813099,149.8392629642316],"label":"Using the monkey bar"},{"segment":[179.8489894875961,192.76015089881105],"label":"Using the monkey bar"}]},"XPiMnbCr-sc":{"duration":83.43,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=XPiMnbCr-sc","annotations":[]},"VOnP9N7FAT8":{"duration":190.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VOnP9N7FAT8","annotations":[{"segment":[7.710836102964119,167.85897054914196],"label":"Playing accordion"}]},"JW5P5FdOrcw":{"duration":207.889,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JW5P5FdOrcw","annotations":[{"segment":[159.9638589724571,181.98317559214755],"label":"Mooping floor"}]},"HZ0tf9Cp340":{"duration":155.39,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=HZ0tf9Cp340","annotations":[{"segment":[37.2773413,120.01625719],"label":"Hammer throw"}]},"ZCZoL2b6euQ":{"duration":22.849,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ZCZoL2b6euQ","annotations":[{"segment":[0.19999999999999998,22.648434],"label":"Throwing darts"}]},"SwbvD590YtQ":{"duration":60.26,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=SwbvD590YtQ","annotations":[{"segment":[9.974371280589729,47.61077113648224],"label":"Sailing"}]},"UXc2y7lR5t0":{"duration":18.46,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=UXc2y7lR5t0","annotations":[{"segment":[0.2,18.259863945578232],"label":"Using the monkey bar"}]},"WhhbKOSW7m0":{"duration":69.28999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WhhbKOSW7m0","annotations":[{"segment":[14.325089703588143,54.54345475819033],"label":"Playing squash"}]},"Ht2gV7oaqbo":{"duration":5.48,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ht2gV7oaqbo","annotations":[{"segment":[0.01,4.914725429017161],"label":"Doing a powerbomb"}]},"MdOAr_4FJvc":{"duration":233.13,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=MdOAr_4FJvc","annotations":[{"segment":[0.01,230.21858344305775],"label":"Playing piano"}]},"oUdEoaKDHpA":{"duration":213.53,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oUdEoaKDHpA","annotations":[{"segment":[19.654152065402343,144.90773000780032],"label":"Breakdancing"}]},"1ebIpLiTCvw":{"duration":184.79,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1ebIpLiTCvw","annotations":[{"segment":[57.65495381720031,89.65345318574647],"label":"Making a sandwich"}]},"HktZZPJMU8s":{"duration":211.63,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=HktZZPJMU8s","annotations":[{"segment":[15.845316692667707,200.70734477379096],"label":"Tai chi"}]},"zyi4NVylqB0":{"duration":100.195,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zyi4NVylqB0","annotations":[{"segment":[10.14446177847114,89.89553822152887],"label":"Beach soccer"}]},"fdYyIOWkycU":{"duration":24.567,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fdYyIOWkycU","annotations":[{"segment":[0.01,11.13414955922302],"label":"Throwing darts"}]},"Gfu8e6R6jUU":{"duration":199.14,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Gfu8e6R6jUU","annotations":[]},"gZB_XrTnDCI":{"duration":174.707,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gZB_XrTnDCI","annotations":[]},"MlxMV7WH9-g":{"duration":101.936,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=MlxMV7WH9-g","annotations":[{"segment":[22.882970358814354,85.81113884555381],"label":"Baking cookies"}]},"CwImmV7q1MY":{"duration":197.3,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CwImmV7q1MY","annotations":[]},"IxrncvpPWV8":{"duration":23.847,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IxrncvpPWV8","annotations":[{"segment":[0.01,23.821],"label":"Tug of war"}]},"KylUHdDXuCg":{"duration":104.397,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=KylUHdDXuCg","annotations":[{"segment":[19.860496099843992,91.81585647425896],"label":"Bullfighting"}]},"ArzhjEk4j_Y":{"duration":19.32,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ArzhjEk4j_Y","annotations":[{"segment":[3.7914638411214954,15.045491433021807],"label":"Building sandcastles"}]},"CKjHXMoXye4":{"duration":55.68,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=CKjHXMoXye4","annotations":[{"segment":[26.851029641185647,44.347507020280815],"label":"High jump"}]},"knXbIp4x5Vo":{"duration":188.85,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=knXbIp4x5Vo","annotations":[]},"VdeYnCIbRJ4":{"duration":7.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VdeYnCIbRJ4","annotations":[{"segment":[0,7.916666],"label":"Triple jump"}]},"0pGXfomqurw":{"duration":156.666,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=0pGXfomqurw","annotations":[{"segment":[47.41500935683686,126.35855450078002],"label":"Elliptical trainer"},{"segment":[142.73384106084242,156.465055],"label":"Elliptical trainer"}]},"KlgrI3Ngwv0":{"duration":54.64,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KlgrI3Ngwv0","annotations":[]},"el-ogdlS5nc":{"duration":8.01,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=el-ogdlS5nc","annotations":[{"segment":[2.924409706708268,8.010883],"label":"Starting a campfire"}]},"yw1IZdbEzck":{"duration":175.52,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yw1IZdbEzck","annotations":[{"segment":[0.2,27.108337100830973],"label":"Getting a piercing"},{"segment":[36.14444946777463,128.42232424535075],"label":"Getting a piercing"}]},"E_Z4BceegCQ":{"duration":89.3,"subset":"training","resolution":"484x268","url":"https://www.youtube.com/watch?v=E_Z4BceegCQ","annotations":[{"segment":[1.1145501653666148,87.49218798127924],"label":"Breakdancing"}]},"qenGkKGoq6o":{"duration":66.131,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=qenGkKGoq6o","annotations":[{"segment":[10.296105919003114,34.6978769470405],"label":"Drum corps"}]},"p3-tF0riSpQ":{"duration":110.179,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p3-tF0riSpQ","annotations":[{"segment":[57.40257811701015,61.17229983669855],"label":"Having an ice cream"},{"segment":[83.1052248426232,84.47603271820641],"label":"Having an ice cream"}]},"1q8BSnedfqA":{"duration":132.66,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1q8BSnedfqA","annotations":[]},"0RUMAGGab1k":{"duration":124.48,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0RUMAGGab1k","annotations":[{"segment":[22.43004252028081,94.08965888377534],"label":"Playing harmonica"}]},"Gd6YooP1_CU":{"duration":31.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gd6YooP1_CU","annotations":[{"segment":[0.01,16.751232449297973],"label":"Drinking coffee"}]},"Jy93pLzV2uw":{"duration":70.53999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jy93pLzV2uw","annotations":[]},"aAlbRFeu32E":{"duration":165.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aAlbRFeu32E","annotations":[{"segment":[146.18401244929797,161.12440491107645],"label":"Volleyball"}]},"ItukN-TWrJM":{"duration":150.37,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ItukN-TWrJM","annotations":[{"segment":[25.45305499839041,137.58725001950077],"label":"Wrapping presents"}]},"0W11dYp51EM":{"duration":152.486,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=0W11dYp51EM","annotations":[]},"4llkVfMzsN4":{"duration":200.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4llkVfMzsN4","annotations":[{"segment":[8.750371294851794,200.101],"label":"Playing racquetball"}]},"1T66cuSjizE":{"duration":12.143,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=1T66cuSjizE","annotations":[{"segment":[1.2521466458658346,12.161],"label":"Doing a powerbomb"}]},"079MEwdDNjg":{"duration":162.795,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=079MEwdDNjg","annotations":[{"segment":[96.1861739070928,145.87644805039497],"label":"Rock-paper-scissors"}]},"tj0sI8M3tro":{"duration":211.51,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=tj0sI8M3tro","annotations":[{"segment":[72.90701496567863,202.55614112636508],"label":"Painting"}]},"rvWlRHPYrZI":{"duration":154.9,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rvWlRHPYrZI","annotations":[]},"QjKmQO_9TaA":{"duration":164.0,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QjKmQO_9TaA","annotations":[{"segment":[14.99962292,153.98521372854916],"label":"Doing step aerobics"}]},"jnOqi_9KJiE":{"duration":35.225,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=jnOqi_9KJiE","annotations":[{"segment":[0.01,12.915712417053866],"label":"Peeling potatoes"}]},"IPehEfMBLM0":{"duration":173.99,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=IPehEfMBLM0","annotations":[{"segment":[0,173.941],"label":"Chopping wood"}]},"ZHVzukkU9sk":{"duration":111.758,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZHVzukkU9sk","annotations":[{"segment":[8.746436682549593,59.549590180144506],"label":"Baking cookies"}]},"8BsIeOSzK_U":{"duration":54.22,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8BsIeOSzK_U","annotations":[{"segment":[11.165139482059283,48.12851791887676],"label":"Using parallel bars"}]},"1VAugQRO05g":{"duration":110.086,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1VAugQRO05g","annotations":[{"segment":[10.61480284528923,43.657664995851896],"label":"Drum corps"},{"segment":[56.6693620085087,60.94952549951422],"label":"Drum corps"},{"segment":[67.96899362476329,85.2608541284256],"label":"Drum corps"}]},"Lshue6AmrIw":{"duration":38.78,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=Lshue6AmrIw","annotations":[]},"9TjPn_ejmFU":{"duration":37.454,"subset":"testing","resolution":"432x720","url":"https://www.youtube.com/watch?v=9TjPn_ejmFU","annotations":[]},"I116ZNTNUdE":{"duration":184.924,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=I116ZNTNUdE","annotations":[]},"NFLa94V-knM":{"duration":85.148,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=NFLa94V-knM","annotations":[]},"hsJJbTqSQR4":{"duration":13.52,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=hsJJbTqSQR4","annotations":[{"segment":[0.5530373224445949,10.614871802195372],"label":"Tennis serve with ball bouncing"}]},"e4XYZAs7tcs":{"duration":10.84,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=e4XYZAs7tcs","annotations":[{"segment":[0.084584375975039,10.742215748829953],"label":"Playing racquetball"}]},"iesGbWk_nqQ":{"duration":139.46,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=iesGbWk_nqQ","annotations":[]},"3UbQ0UDmbbw":{"duration":425.645,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3UbQ0UDmbbw","annotations":[{"segment":[73.85802523564284,365.57733096773006],"label":"Removing ice from car"}]},"FS_xWK5tBtY":{"duration":99.17,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=FS_xWK5tBtY","annotations":[]},"2YeImloBCA8":{"duration":167.79,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2YeImloBCA8","annotations":[{"segment":[3.1411056278985856,164.64628539625585],"label":"Doing step aerobics"}]},"XfF30DaqAGU":{"duration":51.67,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=XfF30DaqAGU","annotations":[{"segment":[4.829953198127925,26.001248049921998],"label":"Snatch"},{"segment":[30.267706708268328,50.71450858034322],"label":"Snatch"}]},"xLWlqef2_yM":{"duration":182.51,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=xLWlqef2_yM","annotations":[]},"MWWDqMI-rxU":{"duration":137.7,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=MWWDqMI-rxU","annotations":[{"segment":[0.01,0.02],"label":"Breakdancing"}]},"Y_QRRCIujIc":{"duration":169.46,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y_QRRCIujIc","annotations":[{"segment":[0.01,169.4592290249433],"label":"Ping-pong"}]},"Xu54UPG1cME":{"duration":190.056,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Xu54UPG1cME","annotations":[{"segment":[22.244929797191887,169.65466458658346],"label":"Hanging wallpaper"}]},"oSoi5owiybU":{"duration":61.324,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oSoi5owiybU","annotations":[{"segment":[36.870756211103334,56.26133412928349],"label":"Running a marathon"}]},"yKLX0iXyLsQ":{"duration":235.36,"subset":"validation","resolution":"810x480","url":"https://www.youtube.com/watch?v=yKLX0iXyLsQ","annotations":[{"segment":[0,235.301],"label":"Discus throw"}]},"E3UCEbGZmz0":{"duration":237.19,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=E3UCEbGZmz0","annotations":[{"segment":[14.79195007800312,28.10470514820593],"label":"Painting"},{"segment":[29.214101404056162,100.58526053042122],"label":"Painting"},{"segment":[107.2506864274571,117.97575507020281],"label":"Painting"},{"segment":[123.52320436817473,188.61327613104524],"label":"Painting"}]},"MzYA5oo-mzk":{"duration":7.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MzYA5oo-mzk","annotations":[{"segment":[1.2124119,6.27699383775351],"label":"Hammer throw"}]},"HIIUd1osd_I":{"duration":235.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HIIUd1osd_I","annotations":[{"segment":[20.943276042121685,40.78427439781592],"label":"Walking the dog"},{"segment":[55.11388432137286,143.29609923556944],"label":"Walking the dog"},{"segment":[155.78857968174728,198.40998355694228],"label":"Walking the dog"}]},"PZjWkB_q2lE":{"duration":225.164,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=PZjWkB_q2lE","annotations":[{"segment":[18.683948916842827,25.347677913178163],"label":"Hand car wash"},{"segment":[0.2,224.963899],"label":"Hand car wash"},{"segment":[106.3645948430847,118.99060787112208],"label":"Hand car wash"},{"segment":[139.33251774962676,141.08613067018752],"label":"Hand car wash"}]},"LTFtillWvs4":{"duration":75.91,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LTFtillWvs4","annotations":[]},"z1YFE1yRVo0":{"duration":94.599,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z1YFE1yRVo0","annotations":[]},"-pkfcMUIEMo":{"duration":104.49000000000001,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-pkfcMUIEMo","annotations":[]},"s0YJOzALDNg":{"duration":64.482,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s0YJOzALDNg","annotations":[{"segment":[0.01,64.48151927437642],"label":"Riding bumper cars"}]},"81dGQTVec_s":{"duration":223.21,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=81dGQTVec_s","annotations":[{"segment":[85.64369110764432,145.52464586583463],"label":"Cleaning shoes"}]},"Gr9gtmq4XVY":{"duration":193.469,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gr9gtmq4XVY","annotations":[]},"ow9bWn5gOvg":{"duration":223.934,"subset":"validation","resolution":"854x468","url":"https://www.youtube.com/watch?v=ow9bWn5gOvg","annotations":[{"segment":[27.947986599736097,223.93324263038548],"label":"Sharpening knives"}]},"nB0JECwGK0c":{"duration":231.991,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nB0JECwGK0c","annotations":[{"segment":[50.51115991474999,79.01388897319555],"label":"Sumo"},{"segment":[143.2352278643767,170.65557480667877],"label":"Sumo"}]},"ngE2y5ExVqw":{"duration":60.814,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ngE2y5ExVqw","annotations":[{"segment":[20.492395806552263,23.907795107644304],"label":"Hitting a pinata"},{"segment":[27.038577800312012,30.738593709828393],"label":"Hitting a pinata"},{"segment":[35.387331647425896,36.90528689235569],"label":"Hitting a pinata"},{"segment":[42.02838584399376,43.26172448049922],"label":"Hitting a pinata"}]},"lvlVfgZ53NQ":{"duration":67.29,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lvlVfgZ53NQ","annotations":[{"segment":[3.5676661466458657,66.94620592823712],"label":"Mixing drinks"}]},"NNiJfr_ChH4":{"duration":100.08,"subset":"testing","resolution":"2560x810","url":"https://www.youtube.com/watch?v=NNiJfr_ChH4","annotations":[]},"b3e7PrPNb30":{"duration":144.568,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=b3e7PrPNb30","annotations":[{"segment":[8.79583473951203,34.281202061687914],"label":"Applying sunscreen"},{"segment":[39.017420767579004,115.69905564430576],"label":"Applying sunscreen"}]},"-deoJLN27hE":{"duration":227.742,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=-deoJLN27hE","annotations":[{"segment":[11.724591889798042,217.79317643525738],"label":"Futsal"}]},"BpiExjaNwvI":{"duration":138.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BpiExjaNwvI","annotations":[{"segment":[11.850438089999999,63.735765529999995],"label":"Doing motocross"},{"segment":[76.70848292,129.1072276677067],"label":"Doing motocross"}]},"p4KNKI2UZLc":{"duration":187.2,"subset":"training","resolution":"470x360","url":"https://www.youtube.com/watch?v=p4KNKI2UZLc","annotations":[{"segment":[15.170046801872076,161.61934477379094],"label":"Using the balance beam"}]},"C1YNGahOoiA":{"duration":230.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C1YNGahOoiA","annotations":[{"segment":[9.688872074882996,219.97328081123243],"label":"Sailing"}]},"pdXrJJyTogA":{"duration":74.049,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pdXrJJyTogA","annotations":[]},"9PGFsuE3Ye0":{"duration":143.778,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9PGFsuE3Ye0","annotations":[{"segment":[18.1613120124805,30.268853354134166],"label":"Playing ten pins"},{"segment":[46.41224180967239,47.30909672386896],"label":"Playing ten pins"},{"segment":[60.98613416536662,71.97260686427457],"label":"Playing ten pins"},{"segment":[100.01323868954759,106.74058658346335],"label":"Playing ten pins"}]},"6azNzt8rXAs":{"duration":202.92000000000002,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=6azNzt8rXAs","annotations":[{"segment":[31.340064312776594,183.60845759000426],"label":"Ping-pong"},{"segment":[183.92502274726988,190.2563488315133],"label":"Ping-pong"}]},"Tbo7I63oIms":{"duration":191.008,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Tbo7I63oIms","annotations":[{"segment":[10.131434939030214,190.8073469387755],"label":"Skiing"}]},"XO2YGnweswY":{"duration":172.27,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XO2YGnweswY","annotations":[]},"ykdRdg1XvFM":{"duration":94.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ykdRdg1XvFM","annotations":[{"segment":[8.517559416536661,94.13371882086167],"label":"Playing accordion"}]},"x03YF3JbWjs":{"duration":171.735,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x03YF3JbWjs","annotations":[{"segment":[0.01,157.66430118729983],"label":"Braiding hair"}]},"vvdmMyyAtN0":{"duration":222.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vvdmMyyAtN0","annotations":[{"segment":[88.3544085725429,156.9590081700468],"label":"Getting a haircut"}]},"BLLeQHgNmYc":{"duration":92.21000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BLLeQHgNmYc","annotations":[{"segment":[0.01,88.75411435382216],"label":"Tai chi"}]},"T5GVJRbvNb8":{"duration":162.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T5GVJRbvNb8","annotations":[{"segment":[6.828809672386896,154.28051482059283],"label":"High jump"}]},"vZVnMzH4pkc":{"duration":70.102,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=vZVnMzH4pkc","annotations":[{"segment":[2.4059640372009437,48.666090000000004],"label":"Beer pong"}]},"UqSjGwxBuqA":{"duration":204.243,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=UqSjGwxBuqA","annotations":[{"segment":[36.9536911076443,47.1478127925117],"label":"BMX"},{"segment":[57.6605007800312,182.5563697347894],"label":"BMX"},{"segment":[190.5213073322933,204.221],"label":"BMX"}]},"Pu92wJ-7UTQ":{"duration":62.044,"subset":"training","resolution":"320x180","url":"https://www.youtube.com/watch?v=Pu92wJ-7UTQ","annotations":[{"segment":[0,14.76312247945258],"label":"Playing congas"},{"segment":[22.19292995845725,60.01740439702645],"label":"Playing congas"}]},"Upd7zpT6tuc":{"duration":201.48,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Upd7zpT6tuc","annotations":[{"segment":[7.229375249609984,193.9358490873635],"label":"Playing kickball"}]},"pMtu7fOHdII":{"duration":52.269,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pMtu7fOHdII","annotations":[]},"5BCWB7Pf2Tk":{"duration":97.153,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5BCWB7Pf2Tk","annotations":[{"segment":[8.942212741570886,55.775156443057725],"label":"Longboarding"},{"segment":[58.200163244929804,66.38456120124806],"label":"Longboarding"},{"segment":[77.14552888455539,86.8455560920437],"label":"Longboarding"}]},"DU2hdNIS2WI":{"duration":23.99,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=DU2hdNIS2WI","annotations":[{"segment":[3.741998751950078,23.986212],"label":"Clean and jerk"}]},"rosZj4ZcPBA":{"duration":74.513,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rosZj4ZcPBA","annotations":[{"segment":[0.01,72.4701137677532],"label":"Blowing leaves"}]},"xlCOq0ryx-Y":{"duration":180.79,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xlCOq0ryx-Y","annotations":[{"segment":[15.347715751950076,151.92830547113883],"label":"Playing guitarra"}]},"MmOQhq95Z_g":{"duration":182.86,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MmOQhq95Z_g","annotations":[{"segment":[15.119235215667128,181.1455538673947],"label":"Pole vault"}]},"Z7BPvGPizYE":{"duration":171.341,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z7BPvGPizYE","annotations":[{"segment":[29.927413416536663,81.50827613104524],"label":"Waxing skis"}]},"34SBoGK8mT8":{"duration":57.4,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=34SBoGK8mT8","annotations":[]},"dRqbDamDLT0":{"duration":219.48,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=dRqbDamDLT0","annotations":[{"segment":[7.528770670826833,211.49001248049922],"label":"Fixing bicycle"}]},"zlAwW2Pxc1A":{"duration":149.049,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zlAwW2Pxc1A","annotations":[{"segment":[14.603542341670986,139.31318809940385],"label":"Hanging wallpaper"}]},"JhihdPxI_Xc":{"duration":134.003,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JhihdPxI_Xc","annotations":[{"segment":[16.933215159126362,133.16614884399374],"label":"Capoeira"}]},"FeWZkO6kZl0":{"duration":214.37,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=FeWZkO6kZl0","annotations":[{"segment":[5.1796437776911075,206.6844952581903],"label":"Playing field hockey"}]},"pmn8_4zKo7I":{"duration":74.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pmn8_4zKo7I","annotations":[{"segment":[28.131323849999998,72.40748829953198],"label":"Smoking a cigarette"},{"segment":[1.6973926199999998,27.678346333853355],"label":"Smoking a cigarette"}]},"4QqoWbK2ELc":{"duration":90.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4QqoWbK2ELc","annotations":[{"segment":[22.37658346333853,69.2407488299532],"label":"Bungee jumping"}]},"RfXFS9fy-mI":{"duration":50.016,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RfXFS9fy-mI","annotations":[{"segment":[4.447528684867395,44.319233210608424],"label":"Futsal"}]},"VyQ4ZbgqMXk":{"duration":77.555,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=VyQ4ZbgqMXk","annotations":[{"segment":[13.49039912480853,17.120102925205444],"label":"Shuffleboard"},{"segment":[19.90287578705148,21.95970793525741],"label":"Shuffleboard"},{"segment":[32.24386867628705,34.663671203588144],"label":"Shuffleboard"},{"segment":[66.96803494305773,70.35575848127925],"label":"Shuffleboard"}]},"N2zoVF76Pgg":{"duration":186.062,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N2zoVF76Pgg","annotations":[{"segment":[20.894714508580343,160.48301560062401],"label":"Futsal"}]},"9dqz5ojw-ys":{"duration":8.87,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=9dqz5ojw-ys","annotations":[{"segment":[0.01,8.524076343213729],"label":"Playing water polo"}]},"VTFIAQVjHv8":{"duration":192.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VTFIAQVjHv8","annotations":[{"segment":[70.46663806552263,80.06209516380656],"label":"Skateboarding"},{"segment":[95.65471294851794,100.45244149765992],"label":"Skateboarding"},{"segment":[110.64761466458658,117.84420748829955],"label":"Skateboarding"},{"segment":[129.83852886115446,137.93469578783152],"label":"Skateboarding"},{"segment":[143.93185647425898,152.3278814352574],"label":"Skateboarding"},{"segment":[159.52447425897037,168.22035725429018],"label":"Skateboarding"},{"segment":[170.91907956318252,176.01666614664586],"label":"Skateboarding"}]},"d0woFZ26Cx0":{"duration":30.094,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=d0woFZ26Cx0","annotations":[{"segment":[1.2642476635514017,7.491838006230529],"label":"Playing pool"},{"segment":[9.458445482866043,11.237757009345794],"label":"Playing pool"},{"segment":[12.455180685358256,13.157540498442367],"label":"Playing pool"},{"segment":[15.451915887850465,17.605819314641742],"label":"Playing pool"},{"segment":[23.177873831775702,24.488945482866043],"label":"Playing pool"}]},"ndJqptBTxAY":{"duration":108.809,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ndJqptBTxAY","annotations":[{"segment":[5.974826512274341,10.209561714766554],"label":"Hand car wash"}]},"hRsn4nesCQo":{"duration":234.75,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hRsn4nesCQo","annotations":[]},"89H7D34qzPs":{"duration":234.847,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=89H7D34qzPs","annotations":[]},"E2nAOID5DLM":{"duration":28.47,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E2nAOID5DLM","annotations":[{"segment":[4.675202808112324,17.498616224648988],"label":"Long jump"}]},"cwQQj3VZLC8":{"duration":73.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cwQQj3VZLC8","annotations":[{"segment":[6.366284520000001,59.723008920000005],"label":"Triple jump"}]},"_qzzQdKJ7F0":{"duration":97.11,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=_qzzQdKJ7F0","annotations":[{"segment":[21.81473585522621,24.99605150078003],"label":"Doing nails"},{"segment":[45.750348807488294,65.29271634446178],"label":"Doing nails"}]},"4L0mci9CTPg":{"duration":43.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4L0mci9CTPg","annotations":[{"segment":[0.01,43.723174603174606],"label":"Tango"}]},"_3I4nm2zF5Y":{"duration":66.08,"subset":"validation","resolution":"810x480","url":"https://www.youtube.com/watch?v=_3I4nm2zF5Y","annotations":[{"segment":[4.840291187207488,58.984364140000004],"label":"Getting a haircut"}]},"6UPfqdssD6g":{"duration":77.74,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6UPfqdssD6g","annotations":[{"segment":[5.821434533541342,60.76122294383775],"label":"Washing dishes"}]},"M6xFXdXOzzc":{"duration":209.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M6xFXdXOzzc","annotations":[{"segment":[30.547957439999998,145.67996352],"label":"Playing badminton"}]},"zSOK9jmWE1E":{"duration":186.038,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zSOK9jmWE1E","annotations":[{"segment":[56.57768330733229,167.42991731669267],"label":"Scuba diving"},{"segment":[173.81372698907958,185.80100000000002],"label":"Scuba diving"}]},"lTFCUuLtUJs":{"duration":216.039,"subset":"training","resolution":"540x360","url":"https://www.youtube.com/watch?v=lTFCUuLtUJs","annotations":[{"segment":[175.21079563182525,196.7934227769111],"label":"Running a marathon"}]},"Sma-ydx49eQ":{"duration":78.623,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Sma-ydx49eQ","annotations":[{"segment":[5.7632558502340085,75.4128159126365],"label":"Rope skipping"}]},"f2JqhNe3F5E":{"duration":38.662,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=f2JqhNe3F5E","annotations":[]},"UBqFQKI1YW8":{"duration":78.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UBqFQKI1YW8","annotations":[{"segment":[4.380655226209049,71.15598127925117],"label":"Using parallel bars"}]},"n4iI127TShc":{"duration":14.05,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n4iI127TShc","annotations":[]},"S2jbh7QMrKc":{"duration":227.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S2jbh7QMrKc","annotations":[{"segment":[30.859862836193454,213.53606238377537],"label":"Surfing"}]},"WMVw1i62o4E":{"duration":15.976,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=WMVw1i62o4E","annotations":[]},"6NQl2Vcf0P0":{"duration":15.326,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6NQl2Vcf0P0","annotations":[{"segment":[0.2,7.1313084112149525],"label":"Calf roping"}]},"AhWkUdag6NA":{"duration":136.32999999999998,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=AhWkUdag6NA","annotations":[{"segment":[3.0824375975039,136.265],"label":"Windsurfing"}]},"VgQmPHpRFXQ":{"duration":64.459,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=VgQmPHpRFXQ","annotations":[{"segment":[19.508529260898328,32.68181392355694],"label":"Putting on shoes"}]},"dla6qDuAXTw":{"duration":49.97,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dla6qDuAXTw","annotations":[{"segment":[0.20000000000000004,49.736],"label":"Swinging at the playground"}]},"8nQGd6hiduA":{"duration":167.77,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=8nQGd6hiduA","annotations":[{"segment":[118.2986209822153,165.6704141063602],"label":"Tennis serve with ball bouncing"}]},"b_v5uGSheEw":{"duration":34.505,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=b_v5uGSheEw","annotations":[{"segment":[5.6449375975039,21.020672386895473],"label":"River tubing"},{"segment":[24.461396255850232,33.49329641185647],"label":"River tubing"}]},"Qnm0p06p9ok":{"duration":172.07999999999998,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Qnm0p06p9ok","annotations":[]},"wdb4-V33wIo":{"duration":112.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wdb4-V33wIo","annotations":[{"segment":[11.086525741029641,50.659217199200775],"label":"Shoveling snow"}]},"Ck5i8CVEkJI":{"duration":131.658,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Ck5i8CVEkJI","annotations":[{"segment":[12.939781408736348,113.99331241029641],"label":"Throwing darts"}]},"CneLYCPg7Es":{"duration":40.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CneLYCPg7Es","annotations":[{"segment":[3.691030887675507,35.47143242901716],"label":"Brushing teeth"}]},"28LdZLfdmMQ":{"duration":204.011,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=28LdZLfdmMQ","annotations":[]},"HPJuOvOmEW8":{"duration":211.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HPJuOvOmEW8","annotations":[{"segment":[0.01,191.59955537597503],"label":"Skateboarding"}]},"NCD5yDOzc5k":{"duration":149.54,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NCD5yDOzc5k","annotations":[]},"6k5m4u5AchQ":{"duration":157.664,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6k5m4u5AchQ","annotations":[{"segment":[17.674878504672897,84.44664174454827],"label":"Ice fishing"}]},"yWfTd8pRLgM":{"duration":110.47999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yWfTd8pRLgM","annotations":[{"segment":[0.34456578159126366,109.22735276443058],"label":"Tumbling"}]},"tnk1skdLN0Q":{"duration":113.24000000000001,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=tnk1skdLN0Q","annotations":[{"segment":[5.733385335413416,60.18299234999999],"label":"Shot put"},{"segment":[68.63771473,93.05725429017161],"label":"Shot put"}]},"vBdm15wRqig":{"duration":147.029,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vBdm15wRqig","annotations":[{"segment":[24.887082188049426,26.72207442311298],"label":"Surfing"},{"segment":[44.38387468559967,54.01758391968332],"label":"Surfing"},{"segment":[82.68933759255131,126.95852526345953],"label":"Surfing"}]},"AIZWoAbhUTY":{"duration":155.226,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AIZWoAbhUTY","annotations":[{"segment":[0.01,155.161],"label":"Building sandcastles"}]},"Oya9LWABkf4":{"duration":100.404,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Oya9LWABkf4","annotations":[{"segment":[6.916751736792449,71.77674437347602],"label":"Hula hoop"}]},"4Mo-IYfNKBo":{"duration":37.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4Mo-IYfNKBo","annotations":[{"segment":[0.2,37.369410430839],"label":"Mooping floor"}]},"HDHS_7pOiDk":{"duration":23.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HDHS_7pOiDk","annotations":[{"segment":[8.762558502340093,21.275195007800313],"label":"Hopscotch"}]},"EhjiQFHfDmY":{"duration":166.0,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=EhjiQFHfDmY","annotations":[{"segment":[18.170147350071257,151.59026906152297],"label":"Hula hoop"}]},"R-VaPP7ToKQ":{"duration":6.6,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=R-VaPP7ToKQ","annotations":[{"segment":[0.76074978,5.0208034800000005],"label":"Drinking beer"}]},"V-46E7jMm-Y":{"duration":118.53999999999999,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V-46E7jMm-Y","annotations":[]},"EbGq9gXcXLQ":{"duration":127.594,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EbGq9gXcXLQ","annotations":[{"segment":[6.568783801872075,127.39365079365079],"label":"Playing congas"}]},"ND41BOzqVoA":{"duration":191.89,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ND41BOzqVoA","annotations":[]},"L23hItTmPNY":{"duration":150.56,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=L23hItTmPNY","annotations":[]},"-zZJmRT9udU":{"duration":161.751,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=-zZJmRT9udU","annotations":[{"segment":[26.80235926287642,147.46335458997922],"label":"Playing congas"}]},"hLnVjkmioH8":{"duration":98.314,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=hLnVjkmioH8","annotations":[{"segment":[30.73245860708202,91.27998899715406],"label":"Arm wrestling"}]},"zCijq4G1B3Q":{"duration":172.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zCijq4G1B3Q","annotations":[{"segment":[53.08650339157566,54.97282584711388],"label":"Painting furniture"},{"segment":[66.02128594383775,80.03396704212167],"label":"Painting furniture"},{"segment":[85.15398513572542,126.6530791575663],"label":"Painting furniture"},{"segment":[141.74365880187207,166.80479999687986],"label":"Painting furniture"}]},"pW6U4hxCago":{"duration":44.95,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=pW6U4hxCago","annotations":[]},"o00AfbAIOlA":{"duration":101.797,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o00AfbAIOlA","annotations":[{"segment":[29.679904836193447,80.4706287051482],"label":"Assembling bicycle"}]},"3ekMufBfUdw":{"duration":127.199,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3ekMufBfUdw","annotations":[{"segment":[25.00321818587029,25.796971144151886],"label":"Blow-drying hair"},{"segment":[34.131377206108645,98.62380462870514],"label":"Blow-drying hair"}]},"tnB7LNIcXC0":{"duration":115.682,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=tnB7LNIcXC0","annotations":[{"segment":[5.77506716050955,96.55190408976904],"label":"Fixing the roof"}]},"Pw0ANcAM-zc":{"duration":156.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Pw0ANcAM-zc","annotations":[{"segment":[24.819463920000004,141.53956876131045],"label":"Snatch"}]},"IkbEC202hYg":{"duration":153.79,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=IkbEC202hYg","annotations":[{"segment":[6.115955524960999,135.390466425117],"label":"Cheerleading"}]},"G-EDozcMT_4":{"duration":206.17000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G-EDozcMT_4","annotations":[{"segment":[12.21949207764229,13.184188820614049],"label":"Skateboarding"},{"segment":[20.58019718339754,154.9946100374627],"label":"Skateboarding"},{"segment":[157.24556802496102,166.5709698096724],"label":"Skateboarding"},{"segment":[175.25324043681746,184.90020780031202],"label":"Skateboarding"}]},"RAF_yKyoam8":{"duration":155.51,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RAF_yKyoam8","annotations":[]},"KFS_lGlO-Ew":{"duration":58.654,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=KFS_lGlO-Ew","annotations":[{"segment":[38.84314336661466,43.601314049922],"label":"Gargling mouthwash"}]},"TVmuh_sR1KI":{"duration":134.7,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=TVmuh_sR1KI","annotations":[{"segment":[0,129.82017096411857],"label":"Playing bagpipes"}]},"U89hsv1dzuQ":{"duration":27.17,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=U89hsv1dzuQ","annotations":[{"segment":[7.372852130000002,22.7251955],"label":"Smoking a cigarette"}]},"Jtmz74DAzqc":{"duration":102.66,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Jtmz74DAzqc","annotations":[]},"bWQ3vk499Uw":{"duration":160.07999999999998,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bWQ3vk499Uw","annotations":[]},"8lH1h46S7Jg":{"duration":194.44,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8lH1h46S7Jg","annotations":[]},"L2ZlwZl_5h8":{"duration":196.84,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L2ZlwZl_5h8","annotations":[]},"rklYNAAaI3Y":{"duration":153.23,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=rklYNAAaI3Y","annotations":[]},"UW1wC80VNd8":{"duration":123.35,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=UW1wC80VNd8","annotations":[{"segment":[15.393996766673387,113.91557506396256],"label":"Preparing pasta"}]},"TQ5Z-aKYLug":{"duration":35.67,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=TQ5Z-aKYLug","annotations":[]},"9UGNhbRWM14":{"duration":116.63,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=9UGNhbRWM14","annotations":[{"segment":[0,116.586666],"label":"Doing kickboxing"}]},"DAPX3S1Nmqg":{"duration":81.6,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=DAPX3S1Nmqg","annotations":[{"segment":[0,81.594806],"label":"Walking the dog"}]},"q8TBL7DBgXY":{"duration":144.568,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q8TBL7DBgXY","annotations":[{"segment":[0.01,120.4132511700468],"label":"Rollerblading"}]},"e51NGAPMp1s":{"duration":105.907,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=e51NGAPMp1s","annotations":[{"segment":[10.078401538221529,37.504871297971924],"label":"Swimming"},{"segment":[44.939757678627146,71.20968955694228],"label":"Swimming"},{"segment":[75.8358410826833,100.6187956848674],"label":"Swimming"}]},"kI0AyrGe-NU":{"duration":37.43,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=kI0AyrGe-NU","annotations":[{"segment":[0.01,37.430566893424036],"label":"Washing face"}]},"i-G50GikqTg":{"duration":129.754,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=i-G50GikqTg","annotations":[{"segment":[43.63771962616822,75.3674968847352],"label":"Playing drums"}]},"ZTwmb1d44bc":{"duration":50.69,"subset":"training","resolution":"640x320","url":"https://www.youtube.com/watch?v=ZTwmb1d44bc","annotations":[{"segment":[22.796338006230528,37.54690965732087],"label":"Curling"}]},"uDs0wOnwaAM":{"duration":213.09,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=uDs0wOnwaAM","annotations":[{"segment":[1.329731806552262,213.089522],"label":"Ping-pong"}]},"JTQsElq5UN4":{"duration":50.95,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=JTQsElq5UN4","annotations":[{"segment":[2.3048249469578783,36.797722429017156],"label":"Ping-pong"}]},"G3H3Gflf1SM":{"duration":156.39,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=G3H3Gflf1SM","annotations":[{"segment":[6.343286814465776,148.0913486926677],"label":"Shaving"}]},"4cktlXntujs":{"duration":161.98,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4cktlXntujs","annotations":[]},"mTmcz5NWNQA":{"duration":184.599,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mTmcz5NWNQA","annotations":[{"segment":[0.20000000000000004,51.82118564742589],"label":"BMX"},{"segment":[101.91499843993759,146.53879719188768],"label":"BMX"}]},"k5Yl5DV5wys":{"duration":132.89,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k5Yl5DV5wys","annotations":[]},"aEopPs2eY-Q":{"duration":217.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aEopPs2eY-Q","annotations":[{"segment":[71.40842433697348,151.9979318029864],"label":"Doing motocross"},{"segment":[174.4405784227769,208.1045496973479],"label":"Doing motocross"}]},"jYyN-nJcm0M":{"duration":220.753,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jYyN-nJcm0M","annotations":[{"segment":[17.047159731287213,198.8835301983508],"label":"Playing congas"}]},"8iQ_ZjJGQkE":{"duration":60.047,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8iQ_ZjJGQkE","annotations":[{"segment":[7.706941579476865,50.73112921187562],"label":"Rollerblading"}]},"5ya20wcGE-8":{"duration":95.388,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=5ya20wcGE-8","annotations":[{"segment":[26.711503196182267,32.81273790951638],"label":"Doing fencing"},{"segment":[63.91415455729072,70.16419993891171],"label":"Doing fencing"}]},"KVmuW9kwZV4":{"duration":12.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KVmuW9kwZV4","annotations":[{"segment":[0.43601791731669265,11.656212322932918],"label":"Washing dishes"}]},"mS7SAG1nW1o":{"duration":61.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mS7SAG1nW1o","annotations":[{"segment":[45.36657566302652,56.92311388455538],"label":"Grooming horse"}]},"cyfPRx3neNg":{"duration":29.23,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=cyfPRx3neNg","annotations":[]},"Pzt4s_Jtu4o":{"duration":101.332,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=Pzt4s_Jtu4o","annotations":[]},"6TSzQRNOlTY":{"duration":109.55,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=6TSzQRNOlTY","annotations":[]},"mHVmDOxtVt0":{"duration":126.6,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mHVmDOxtVt0","annotations":[{"segment":[5.922355694227769,117.08373322932918],"label":"Doing kickboxing"}]},"RPkLocpR8VQ":{"duration":77.671,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RPkLocpR8VQ","annotations":[{"segment":[3.4201193135817105,71.7675180674758],"label":"Playing rubik cube"}]},"WAvz9zHwWo4":{"duration":32.63,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WAvz9zHwWo4","annotations":[]},"__dXUJsj3yo":{"duration":29.98,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=__dXUJsj3yo","annotations":[{"segment":[0.14012948517940718,9.528804992199687],"label":"Shoveling snow"}]},"vLL-voBPWM4":{"duration":82.18,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=vLL-voBPWM4","annotations":[{"segment":[7.884224971918877,73.26560278783151],"label":"Kayaking"}]},"d7-tFV0RSCk":{"duration":116.03,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=d7-tFV0RSCk","annotations":[{"segment":[28.58296411856474,84.66346333853353],"label":"Cleaning windows"}]},"NpfdyZntPbg":{"duration":222.61,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=NpfdyZntPbg","annotations":[{"segment":[106.41957332293292,202.96192589703585],"label":"Waxing skis"}]},"0RUs80IYF38":{"duration":29.629,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=0RUs80IYF38","annotations":[{"segment":[0.2,2.1724603229329174],"label":"Calf roping"},{"segment":[13.173429617784713,29.25888051950078],"label":"Calf roping"}]},"15vwVfNA99o":{"duration":175.752,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=15vwVfNA99o","annotations":[{"segment":[0.01,168.44034839272425],"label":"Having an ice cream"}]},"EpV0Zmg50nQ":{"duration":69.03,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=EpV0Zmg50nQ","annotations":[{"segment":[32.041276131045244,68.38339469578783],"label":"Springboard diving"}]},"rQc7UhpiDU8":{"duration":126.74,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rQc7UhpiDU8","annotations":[{"segment":[10.176753510140406,43.17709984399376],"label":"Archery"}]},"-nskwoky7vk":{"duration":160.43,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-nskwoky7vk","annotations":[{"segment":[119.5223712948518,147.02751950078002],"label":"Playing accordion"}]},"rBaR5xF8orE":{"duration":128.175,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rBaR5xF8orE","annotations":[{"segment":[0,52.17597659906396],"label":"Kite flying"}]},"Rewtuc-f6wU":{"duration":46.84,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rewtuc-f6wU","annotations":[{"segment":[1.5150799,37.66290117940717],"label":"Pole vault"}]},"NLpWaffN3XM":{"duration":216.67000000000002,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=NLpWaffN3XM","annotations":[{"segment":[9.464323106257583,213.28528130686428],"label":"Playing guitarra"}]},"3wU13sUaXao":{"duration":113.34,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=3wU13sUaXao","annotations":[]},"jBFn08ZRKSE":{"duration":24.707,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jBFn08ZRKSE","annotations":[{"segment":[0.2,24.35914346333853],"label":"Playing congas"}]},"JcAAwYpJhm8":{"duration":100.06,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=JcAAwYpJhm8","annotations":[]},"f_mbX87xabo":{"duration":19.11,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=f_mbX87xabo","annotations":[{"segment":[0.01,19.081],"label":"Playing flauta"}]},"vo2M4sY0E38":{"duration":132.68,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=vo2M4sY0E38","annotations":[]},"daDd48y4x0g":{"duration":19.39,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=daDd48y4x0g","annotations":[{"segment":[0,19.388662131519276],"label":"Preparing salad"}]},"Vi2gzCrS-fE":{"duration":75.651,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Vi2gzCrS-fE","annotations":[{"segment":[17.015414215939437,74.6368647953787],"label":"Sharpening knives"}]},"ADUmfTuiDH8":{"duration":117.447,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ADUmfTuiDH8","annotations":[{"segment":[13.558512245251716,105.53652617784711],"label":"Making a lemonade"}]},"RlC3n0VSGOU":{"duration":217.897,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RlC3n0VSGOU","annotations":[{"segment":[12.917367916485368,57.44829415489544],"label":"Surfing"},{"segment":[65.26670105171554,197.83968583151326],"label":"Surfing"}]},"nIIFlJnOXXs":{"duration":122.81,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=nIIFlJnOXXs","annotations":[{"segment":[53.02036387051482,65.6091266],"label":"Ironing clothes"},{"segment":[72.8511165,118.48232937129485],"label":"Ironing clothes"}]},"cqVvHj1oC-8":{"duration":224.77,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=cqVvHj1oC-8","annotations":[{"segment":[23.839357254290174,25.942829953198128],"label":"Removing ice from car"}]},"701qhmCLPxU":{"duration":36.015,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=701qhmCLPxU","annotations":[{"segment":[6.405012280811232,32.867826177847114],"label":"Slacklining"}]},"G1tvQndOiB8":{"duration":113.871,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=G1tvQndOiB8","annotations":[]},"27Xuhwv1mQ8":{"duration":30.929,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=27Xuhwv1mQ8","annotations":[]},"Cu8lmHiXkUU":{"duration":178.77,"subset":"training","resolution":"1080x720","url":"https://www.youtube.com/watch?v=Cu8lmHiXkUU","annotations":[{"segment":[12.825699404056163,169.522287775351],"label":"Ping-pong"}]},"d44RiNHK6KA":{"duration":95.44,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=d44RiNHK6KA","annotations":[{"segment":[0,92.60525829468553],"label":"Spinning"}]},"brY1jVHquE0":{"duration":67.04,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=brY1jVHquE0","annotations":[{"segment":[1.4641250015600626,20.706910736349457],"label":"Playing field hockey"},{"segment":[44.23749111856475,54.590946486739476],"label":"Playing field hockey"}]},"K-eZfCs8yOc":{"duration":182.46,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K-eZfCs8yOc","annotations":[{"segment":[6.686745945397815,131.6008510530421],"label":"Playing violin"}]},"NZskwwovSZI":{"duration":63.65,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=NZskwwovSZI","annotations":[]},"PuylbPlzbH0":{"duration":45.09,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=PuylbPlzbH0","annotations":[{"segment":[0.20000000000000004,39.324603510140406],"label":"Playing piano"}]},"W_scepAxfPA":{"duration":12.4,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=W_scepAxfPA","annotations":[{"segment":[4.139599307332293,7.234626826833074],"label":"Plataform diving"}]},"A8RQcVFtovg":{"duration":69.8,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A8RQcVFtovg","annotations":[]},"P0j0bBKsNAo":{"duration":204.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P0j0bBKsNAo","annotations":[{"segment":[26.145596485083892,123.07561272246808],"label":"Rock climbing"},{"segment":[127.53949504918972,146.35157056894522],"label":"Rock climbing"},{"segment":[153.68509153427362,171.54062084116018],"label":"Rock climbing"}]},"A0LLegTPpWk":{"duration":119.72,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=A0LLegTPpWk","annotations":[{"segment":[11.21308341,114.26485245709829],"label":"Grooming horse"}]},"UsqhZb0co6o":{"duration":38.569,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UsqhZb0co6o","annotations":[{"segment":[3.4855195007800313,29.822677067082683],"label":"Putting on shoes"}]},"IDVWoE02zjM":{"duration":25.89,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IDVWoE02zjM","annotations":[{"segment":[13.53078483619345,23.668775862714508],"label":"Polishing shoes"}]},"P-YyUw0iuBQ":{"duration":126.642,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=P-YyUw0iuBQ","annotations":[{"segment":[38.32822644606465,39.90877186652092],"label":"Rock-paper-scissors"},{"segment":[58.87531691199621,62.233975930465796],"label":"Rock-paper-scissors"},{"segment":[78.82970284525666,81.59565733105515],"label":"Rock-paper-scissors"},{"segment":[111.23088237129485,112.81142776911076],"label":"Rock-paper-scissors"}]},"mnNgZ-D5cKc":{"duration":168.461,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=mnNgZ-D5cKc","annotations":[]},"KfX3APC8lp4":{"duration":15.813,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KfX3APC8lp4","annotations":[{"segment":[0.01,15.600965643813707],"label":"Kneeling"}]},"InVpvGiubi0":{"duration":201.53,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=InVpvGiubi0","annotations":[{"segment":[115.38227302153312,190.52222606864274],"label":"Starting a campfire"}]},"Of2Umm79Kxg":{"duration":170.88,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Of2Umm79Kxg","annotations":[]},"ZREM_AcXHX8":{"duration":227.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZREM_AcXHX8","annotations":[{"segment":[28.934534321372855,32.129758970358814],"label":"Archery"},{"segment":[38.16518330733229,53.43125663026521],"label":"Archery"},{"segment":[71.89255460218408,73.31265444617785],"label":"Archery"},{"segment":[104.55485101404057,109.88022542901716],"label":"Archery"}]},"9FQ-VLkCZo8":{"duration":166.859,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=9FQ-VLkCZo8","annotations":[]},"k3vao2RnhiA":{"duration":123.646,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k3vao2RnhiA","annotations":[]},"bjYah4IvnII":{"duration":62.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bjYah4IvnII","annotations":[{"segment":[18.617621591263653,32.53184404368175],"label":"River tubing"},{"segment":[54.383052542901716,59.478401609984395],"label":"River tubing"}]},"NoVa1C5KBno":{"duration":151.84,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=NoVa1C5KBno","annotations":[{"segment":[29.60906463469423,125.06868782527302],"label":"Tennis serve with ball bouncing"}]},"bzndhMU3izM":{"duration":192.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bzndhMU3izM","annotations":[{"segment":[9.028549141965678,100.81879875195008],"label":"Shoveling snow"}]},"_bz66SOrklQ":{"duration":227.626,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=_bz66SOrklQ","annotations":[{"segment":[20.70609477403195,214.64845000456154],"label":"Drum corps"}]},"jwlYaPt2DIs":{"duration":79.157,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=jwlYaPt2DIs","annotations":[{"segment":[0.2,79.156847],"label":"Making a lemonade"}]},"9v3xU3UkO0M":{"duration":155.319,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9v3xU3UkO0M","annotations":[]},"r1x3hjFWGYM":{"duration":174.336,"subset":"training","resolution":"582x360","url":"https://www.youtube.com/watch?v=r1x3hjFWGYM","annotations":[{"segment":[0.01,161.5095989578783],"label":"Putting in contact lenses"}]},"JMpwIWxoB5s":{"duration":166.673,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JMpwIWxoB5s","annotations":[{"segment":[48.62361805616225,127.66949981591263],"label":"Making a lemonade"}]},"YIq3Tar9x6s":{"duration":50.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=YIq3Tar9x6s","annotations":[]},"_K3Cpdks6NQ":{"duration":60.744,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_K3Cpdks6NQ","annotations":[{"segment":[0.2,55.247117316692666],"label":"Powerbocking"}]},"gvFZb_DoqrY":{"duration":232.46,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gvFZb_DoqrY","annotations":[]},"Wyr2o0lsSTU":{"duration":48.95,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Wyr2o0lsSTU","annotations":[{"segment":[0.07636140873634946,34.82080238377535],"label":"Walking the dog"}]},"T7kOKW76EsA":{"duration":103.7,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=T7kOKW76EsA","annotations":[{"segment":[3.1546898447366467,103.70031746031746],"label":"Vacuuming floor"}]},"8d7FyPzxzjg":{"duration":164.4,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8d7FyPzxzjg","annotations":[{"segment":[4.18225968,155.76072542901716],"label":"Doing motocross"}]},"-IdOL3LhhCM":{"duration":26.541,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-IdOL3LhhCM","annotations":[]},"M1dEZrQWeRY":{"duration":90.814,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M1dEZrQWeRY","annotations":[{"segment":[10.058871943837755,22.526206184087364],"label":"Snowboarding"},{"segment":[39.10209375351014,78.20418750702028],"label":"Snowboarding"}]},"N2hi_TNBk94":{"duration":222.447,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N2hi_TNBk94","annotations":[{"segment":[22.209997559086037,52.054681779107895],"label":"River tubing"},{"segment":[63.506711770511636,146.44717140522357],"label":"River tubing"},{"segment":[155.4699823525741,192.94935309828392],"label":"River tubing"},{"segment":[198.84888367862715,207.87169515444617],"label":"River tubing"}]},"slHv7r8A4OI":{"duration":198.09,"subset":"validation","resolution":"1920x1072","url":"https://www.youtube.com/watch?v=slHv7r8A4OI","annotations":[{"segment":[0.2,198.041],"label":"Skiing"}]},"WXH-Tj0-g4w":{"duration":55.171,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=WXH-Tj0-g4w","annotations":[]},"mlYYpBaT68I":{"duration":132.214,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=mlYYpBaT68I","annotations":[]},"pdHvH20ZH1s":{"duration":47.207,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pdHvH20ZH1s","annotations":[]},"LBjl6fb279E":{"duration":159.68,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=LBjl6fb279E","annotations":[]},"Cq7Myyfbtn0":{"duration":117.401,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Cq7Myyfbtn0","annotations":[{"segment":[7.687179407176287,105.99132449297971],"label":"Powerbocking"}]},"VLjfzOpn-AQ":{"duration":167.14,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VLjfzOpn-AQ","annotations":[{"segment":[20.59881925897036,35.20051392355694],"label":"Kayaking"},{"segment":[41.45838306552262,151.2318375975039],"label":"Kayaking"}]},"crbkEVcbF2M":{"duration":100.15,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=crbkEVcbF2M","annotations":[{"segment":[0,81.86798036193447],"label":"Windsurfing"}]},"K1OsjA-f17E":{"duration":31.092,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=K1OsjA-f17E","annotations":[]},"1OLKg-G9TDQ":{"duration":126.34,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=1OLKg-G9TDQ","annotations":[]},"HUvBb3Hmir8":{"duration":236.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HUvBb3Hmir8","annotations":[{"segment":[40.69973622,212.32587212480502],"label":"Washing dishes"}]},"J-uW8raljqE":{"duration":119.374,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=J-uW8raljqE","annotations":[{"segment":[26.07207488299532,94.97684330733229],"label":"Disc dog"}]},"rIqITS6qMB0":{"duration":27.93,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rIqITS6qMB0","annotations":[{"segment":[5.207590761310452,7.996593344773792],"label":"Starting a campfire"}]},"_HQTWGmXXsc":{"duration":104.25999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_HQTWGmXXsc","annotations":[{"segment":[0.01,104.25734693877551],"label":"Preparing salad"}]},"CAa74sedG9A":{"duration":80.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CAa74sedG9A","annotations":[{"segment":[0.01,80.55004535147393],"label":"Playing kickball"}]},"1ZuUq5rVwPs":{"duration":126.48,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=1ZuUq5rVwPs","annotations":[{"segment":[3.8208031042580295,91.85682646874402],"label":"Plastering"}]},"sVma83g_wmg":{"duration":222.123,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=sVma83g_wmg","annotations":[{"segment":[35.69197897983947,184.35080405120965],"label":"Braiding hair"}]},"_nBOWe_VhXg":{"duration":185.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_nBOWe_VhXg","annotations":[{"segment":[23.67105039937597,180.1309201123245],"label":"Painting"}]},"lzcVvTHZlQo":{"duration":61.231,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=lzcVvTHZlQo","annotations":[{"segment":[0.01,4.970853899125593],"label":"Arm wrestling"},{"segment":[8.018120835361985,9.25607302820802],"label":"Arm wrestling"},{"segment":[11.065387771598377,11.731977413900088],"label":"Arm wrestling"},{"segment":[18.778782203946744,25.920814085750788],"label":"Arm wrestling"}]},"Cy3tUZIN8nk":{"duration":100.87,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Cy3tUZIN8nk","annotations":[{"segment":[0,63.07210764430577],"label":"Playing harmonica"},{"segment":[66.0605616224649,69.67816380655226],"label":"Playing harmonica"},{"segment":[72.66661778471139,75.96964586583464],"label":"Playing harmonica"},{"segment":[89.18175819032761,100.50642589703588],"label":"Playing harmonica"}]},"82s4QeuHioo":{"duration":182.022,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=82s4QeuHioo","annotations":[{"segment":[42.026715201941414,118.41310900156006],"label":"Rafting"}]},"sEvP3GFbLdw":{"duration":151.046,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sEvP3GFbLdw","annotations":[{"segment":[31.802082683307333,95.87739001560062],"label":"Putting on shoes"}]},"i3DJXbrg0vk":{"duration":65.13,"subset":"validation","resolution":"652x480","url":"https://www.youtube.com/watch?v=i3DJXbrg0vk","annotations":[{"segment":[0.01,53.24360811856474],"label":"Hammer throw"}]},"_T4bZFH4E_A":{"duration":210.19,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=_T4bZFH4E_A","annotations":[]},"SidTquTAtyk":{"duration":200.574,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=SidTquTAtyk","annotations":[]},"r8MwPAJWPDk":{"duration":132.45,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=r8MwPAJWPDk","annotations":[{"segment":[3.7192498965264735,82.85662269483977],"label":"Rock climbing"},{"segment":[82.85662187207488,122.52862037441497],"label":"Rock climbing"}]},"zLeCGU8SVVc":{"duration":104.1,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zLeCGU8SVVc","annotations":[{"segment":[7.632554734789391,103.12068631045241],"label":"Cheerleading"}]},"DOgmd5jNhXY":{"duration":150.419,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=DOgmd5jNhXY","annotations":[{"segment":[12.43234477379095,145.9041216848674],"label":"Scuba diving"}]},"fqOp0uhdEyM":{"duration":90.187,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fqOp0uhdEyM","annotations":[{"segment":[0.20000000000000004,14.366977134636304],"label":"Hula hoop"},{"segment":[19.845584378156765,25.043237404060793],"label":"Hula hoop"},{"segment":[32.62900127970451,33.75281815017024],"label":"Hula hoop"}]},"Lh9NHhMANKA":{"duration":226.859,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Lh9NHhMANKA","annotations":[]},"bQovpBODTuM":{"duration":98.801,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bQovpBODTuM","annotations":[]},"L9gznB55hhE":{"duration":238.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L9gznB55hhE","annotations":[{"segment":[50.99157098283931,210.2937051482059],"label":"Starting a campfire"}]},"CKczltkk_7g":{"duration":111.712,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=CKczltkk_7g","annotations":[]},"fSVDG4h0lzM":{"duration":32.35,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=fSVDG4h0lzM","annotations":[{"segment":[0.01,32.321],"label":"Cricket"}]},"HP2pyzpWnxI":{"duration":157.85,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HP2pyzpWnxI","annotations":[]},"1Ao43T5yRjI":{"duration":182.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1Ao43T5yRjI","annotations":[{"segment":[14.456125100000001,145.09841649999998],"label":"Shot put"}]},"89S-sQUWIcg":{"duration":66.084,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=89S-sQUWIcg","annotations":[{"segment":[0,66.083604],"label":"Hurling"}]},"Q47ZEz1iuNU":{"duration":156.039,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q47ZEz1iuNU","annotations":[]},"F53s94562zc":{"duration":10.496,"subset":"training","resolution":"400x230","url":"https://www.youtube.com/watch?v=F53s94562zc","annotations":[{"segment":[0.01,8.267774791218411],"label":"Rock-paper-scissors"}]},"6f1HnAlpphA":{"duration":31.603,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6f1HnAlpphA","annotations":[{"segment":[10.323691588785046,15.731339563862928],"label":"Calf roping"}]},"NAl-SP-92dI":{"duration":229.6,"subset":"training","resolution":"512x288","url":"https://www.youtube.com/watch?v=NAl-SP-92dI","annotations":[{"segment":[3.7585192600000004,206.0951385070203],"label":"Cheerleading"}]},"BLSycXOF3Ro":{"duration":184.228,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BLSycXOF3Ro","annotations":[{"segment":[10.697060444845775,161.3690665704784],"label":"Hula hoop"}]},"wsrSku74OpM":{"duration":169.808,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wsrSku74OpM","annotations":[{"segment":[0.20000000000000004,157.41602184087364],"label":"Beer pong"}]},"T1fJmCDf4z0":{"duration":21.061,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T1fJmCDf4z0","annotations":[]},"bBXbFpJMveI":{"duration":175.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bBXbFpJMveI","annotations":[{"segment":[0.01,5.756195007800312],"label":"Skateboarding"},{"segment":[12.608808112324493,27.958661466458654],"label":"Skateboarding"},{"segment":[40.293365054602184,43.58261934477379],"label":"Skateboarding"},{"segment":[46.323664586583455,54.54680031201248],"label":"Skateboarding"},{"segment":[61.12530889235569,68.80023556942278],"label":"Skateboarding"},{"segment":[88.53576131045241,96.75889703588143],"label":"Skateboarding"},{"segment":[104.98203276131046,112.1087503900156],"label":"Skateboarding"},{"segment":[106.90076443057723,113.20516848673947],"label":"Skateboarding"}]},"7univ6G4RXA":{"duration":83.62,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=7univ6G4RXA","annotations":[{"segment":[1.108780303593096,54.59112435926009],"label":"Brushing teeth"}]},"3_hJrb_aDWU":{"duration":43.19,"subset":"training","resolution":"160x128","url":"https://www.youtube.com/watch?v=3_hJrb_aDWU","annotations":[{"segment":[13.45965344,29.82921184],"label":"Kayaking"}]},"So4BX0I1VRI":{"duration":70.659,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=So4BX0I1VRI","annotations":[{"segment":[0.01,7.504917572849672],"label":"Playing ten pins"},{"segment":[17.756433652412856,21.724762457405056],"label":"Playing ten pins"},{"segment":[40.24363021403532,44.98357850888711],"label":"Playing ten pins"},{"segment":[65.37637931231924,70.658299],"label":"Playing ten pins"}]},"vnpatTiKhBE":{"duration":206.19,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=vnpatTiKhBE","annotations":[]},"SlFGykD5Nsg":{"duration":142.39,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SlFGykD5Nsg","annotations":[]},"mAfp5ABlKyM":{"duration":42.89,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=mAfp5ABlKyM","annotations":[{"segment":[0.01,0.02],"label":"Ping-pong"}]},"ZMTi498qnPc":{"duration":100.5,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZMTi498qnPc","annotations":[{"segment":[5.12186676,43.0049238],"label":"Doing nails"}]},"5YEX9xzF8sQ":{"duration":14.61,"subset":"testing","resolution":"404x720","url":"https://www.youtube.com/watch?v=5YEX9xzF8sQ","annotations":[]},"xXXQyLS1uuY":{"duration":60.674,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xXXQyLS1uuY","annotations":[{"segment":[1.2275280373831776,56.65514018691589],"label":"Snow tubing"}]},"Jhcis4S2Vsg":{"duration":72.238,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=Jhcis4S2Vsg","annotations":[]},"eJc8bebOF2A":{"duration":6.64,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eJc8bebOF2A","annotations":[]},"FRJLhGFpCGE":{"duration":201.875,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=FRJLhGFpCGE","annotations":[{"segment":[41.54416848673947,200.16735725429018],"label":"Running a marathon"}]},"Scjxbdh6b54":{"duration":47.439,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Scjxbdh6b54","annotations":[{"segment":[3.104527301092044,47.181],"label":"Hand car wash"}]},"otWcr0Rxy5g":{"duration":202.618,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=otWcr0Rxy5g","annotations":[{"segment":[26.54727613104524,190.25547893915757],"label":"Drum corps"}]},"k8C1Jb_xvh0":{"duration":129.684,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k8C1Jb_xvh0","annotations":[{"segment":[26.494494539781588,103.95549765990638],"label":"Throwing darts"}]},"04ketA7YN-0":{"duration":112.85,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=04ketA7YN-0","annotations":[]},"eM2miz5uf8Q":{"duration":76.255,"subset":"validation","resolution":"847x480","url":"https://www.youtube.com/watch?v=eM2miz5uf8Q","annotations":[{"segment":[22.781130107789348,63.82285186427456],"label":"Removing ice from car"}]},"Qre7RVxEn78":{"duration":119.77000000000001,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Qre7RVxEn78","annotations":[{"segment":[25.61934477379095,107.62351076443058],"label":"Getting a piercing"}]},"JsPIzRsUtlE":{"duration":186.921,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JsPIzRsUtlE","annotations":[]},"a42c_maArv4":{"duration":10.89,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=a42c_maArv4","annotations":[{"segment":[1.1438003120124804,9.621379095163807],"label":"Volleyball"}]},"XKuD0F-p6YA":{"duration":133.167,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XKuD0F-p6YA","annotations":[{"segment":[49.212070202808114,130.21352106084245],"label":"Assembling bicycle"}]},"QazS2ixUp5A":{"duration":142.408,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QazS2ixUp5A","annotations":[]},"p42wxuN8MZE":{"duration":118.0,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=p42wxuN8MZE","annotations":[{"segment":[58.978333,117.956666],"label":"Playing badminton"}]},"maE7PmL7Zjk":{"duration":140.342,"subset":"validation","resolution":"1920x608","url":"https://www.youtube.com/watch?v=maE7PmL7Zjk","annotations":[{"segment":[22.988841839387863,96.55313572542903],"label":"Playing pool"}]},"JKa3jnnowNo":{"duration":140.342,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=JKa3jnnowNo","annotations":[{"segment":[20.321079439252337,29.279834890965734],"label":"Fixing the roof"},{"segment":[58.12265732087228,77.35120560747663],"label":"Fixing the roof"},{"segment":[89.58755451713395,120.63263551401867],"label":"Fixing the roof"}]},"b0bM4vKxoXQ":{"duration":129.846,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b0bM4vKxoXQ","annotations":[{"segment":[55.07936349453978,129.801],"label":"Bullfighting"}]},"lviFcaF4HUo":{"duration":36.97,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=lviFcaF4HUo","annotations":[{"segment":[0,19.363769110764434],"label":"Using uneven bars"},{"segment":[20.32094952,35.32735257410297],"label":"Using uneven bars"}]},"L61Le9sOGK0":{"duration":16.603,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L61Le9sOGK0","annotations":[{"segment":[0.01,0.02],"label":"Table soccer"}]},"KYN1NR7l2zQ":{"duration":110.133,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KYN1NR7l2zQ","annotations":[{"segment":[25.256536661466455,48.451314393135725],"label":"Swimming"},{"segment":[62.36818129329173,74.73872964898597],"label":"Swimming"},{"segment":[86.42202531825274,92.7791126677067],"label":"Swimming"}]},"DVcAKbTkIXQ":{"duration":166.813,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=DVcAKbTkIXQ","annotations":[]},"1jl5qtS4mNQ":{"duration":118.631,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1jl5qtS4mNQ","annotations":[{"segment":[29.796348215833394,64.77466926677067],"label":"Installing carpet"}]},"9-U2WOLIqB8":{"duration":87.075,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9-U2WOLIqB8","annotations":[{"segment":[24.95602602411681,77.03816729183885],"label":"Playing ten pins"}]},"EFEI0-awheU":{"duration":19.78,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EFEI0-awheU","annotations":[{"segment":[6.682990639625585,9.885898595943837],"label":"Ironing clothes"},{"segment":[11.949310452418096,19.58701404056162],"label":"Ironing clothes"}]},"9J72KkOrAq4":{"duration":7.059,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9J72KkOrAq4","annotations":[{"segment":[0.01,3.160521458658346],"label":"Shuffleboard"}]},"_rzxmgPv6Co":{"duration":192.54,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_rzxmgPv6Co","annotations":[{"segment":[76.29504749169558,132.76539760365924],"label":"Doing motocross"}]},"qeYKXF8tsp4":{"duration":118.93,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qeYKXF8tsp4","annotations":[{"segment":[1.8546489859594384,116.47195631825272],"label":"Playing field hockey"}]},"5iIlhXLDsf8":{"duration":122.439,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5iIlhXLDsf8","annotations":[{"segment":[6.491347893915757,111.49844617784711],"label":"Doing crunches"}]},"E-XerA_sOjw":{"duration":54.567,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=E-XerA_sOjw","annotations":[{"segment":[12.513780224648986,36.009041054602186],"label":"Disc dog"}]},"I41kmlYZmt4":{"duration":168.229,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I41kmlYZmt4","annotations":[{"segment":[0.01,57.213461109872966],"label":"Trimming branches or hedges"},{"segment":[72.96028526855359,145.65812223088923],"label":"Trimming branches or hedges"}]},"QV2QXt6eDdI":{"duration":30.79,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QV2QXt6eDdI","annotations":[{"segment":[0,30.185131045241807],"label":"Washing dishes"}]},"TvlCdfubl9Y":{"duration":25.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TvlCdfubl9Y","annotations":[{"segment":[0,23.416943904967084],"label":"Fixing bicycle"}]},"DZx2G-OZAPk":{"duration":32.07,"subset":"validation","resolution":"1270x720","url":"https://www.youtube.com/watch?v=DZx2G-OZAPk","annotations":[{"segment":[9.85580576,29.840940260000004],"label":"Playing piano"}]},"o9MKXpyIvEM":{"duration":152.068,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=o9MKXpyIvEM","annotations":[]},"WjX-xi2Ocbo":{"duration":23.36,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=WjX-xi2Ocbo","annotations":[{"segment":[2.5175294748131902,23.359274],"label":"Calf roping"}]},"OmlzSz6AKfI":{"duration":67.803,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OmlzSz6AKfI","annotations":[{"segment":[2.643564742589704,67.781],"label":"BMX"}]},"aDJQFTO0dPY":{"duration":46.81,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=aDJQFTO0dPY","annotations":[]},"GejFc4J2mfc":{"duration":181.63,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GejFc4J2mfc","annotations":[{"segment":[42.77493135725428,114.4441872074883],"label":"Shaving legs"}]},"9c6p0S1appo":{"duration":148.933,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9c6p0S1appo","annotations":[]},"IkXuooaGnCc":{"duration":188.686,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IkXuooaGnCc","annotations":[{"segment":[74.2416656979666,173.71963346950238],"label":"Mooping floor"}]},"IQvSj-3BGPo":{"duration":204.034,"subset":"training","resolution":"853x474","url":"https://www.youtube.com/watch?v=IQvSj-3BGPo","annotations":[{"segment":[6.0478019656786275,198.30424340093603],"label":"Disc dog"}]},"Qtj7QFDjH1A":{"duration":59.44,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=Qtj7QFDjH1A","annotations":[{"segment":[3.9140517677024533,53.912905786394035],"label":"Playing bagpipes"}]},"Doy6s1y58uc":{"duration":8.545,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=Doy6s1y58uc","annotations":[{"segment":[0.2,8.344420000000001],"label":"Throwing darts"}]},"3ve9a8YKP90":{"duration":184.25,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=3ve9a8YKP90","annotations":[{"segment":[0.01,12.6440624024961],"label":"Kayaking"},{"segment":[17.529268330733228,32.4722511700468],"label":"Kayaking"},{"segment":[36.78272698907956,90.80735725429017],"label":"Kayaking"},{"segment":[95.6925631825273,108.04926053042121],"label":"Kayaking"},{"segment":[112.35973634945397,117.5323073322933],"label":"Kayaking"},{"segment":[122.70487831513259,129.60163962558502],"label":"Kayaking"},{"segment":[134.7742106084243,142.82043213728548],"label":"Kayaking"},{"segment":[146.55617784711387,157.47604992199686],"label":"Kayaking"},{"segment":[161.21179563182525,173.56849297971917],"label":"Kayaking"}]},"fOgfpA9MTOQ":{"duration":32.416,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fOgfpA9MTOQ","annotations":[{"segment":[14.626931464174454,23.907467289719627],"label":"Calf roping"}]},"p31a9wkiSHA":{"duration":206.217,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p31a9wkiSHA","annotations":[{"segment":[21.32830283328609,89.10347992688673],"label":"Longboarding"},{"segment":[102.5942734969802,171.33307883031353],"label":"Longboarding"},{"segment":[187.07233799542254,189.96322233187115],"label":"Longboarding"}]},"u35hesPTsNE":{"duration":131.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=u35hesPTsNE","annotations":[{"segment":[0.2,66.83525245771735],"label":"Playing congas"}]},"Zl6o69-R2DE":{"duration":81.78,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zl6o69-R2DE","annotations":[{"segment":[19.39261061054687,22.709767640873633],"label":"Shoveling snow"},{"segment":[30.747494390171603,35.34048110405616],"label":"Shoveling snow"},{"segment":[44.65403749609984,62.0053206374415],"label":"Shoveling snow"}]},"59R_1aBnFn4":{"duration":6.177,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=59R_1aBnFn4","annotations":[{"segment":[0.19999999999999998,5.976506],"label":"Doing a powerbomb"}]},"C0gGikr-Dw8":{"duration":65.225,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=C0gGikr-Dw8","annotations":[{"segment":[14.5378127925117,64.95768954758191],"label":"Hitting a pinata"}]},"ITyNMYw_pxc":{"duration":65.62,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ITyNMYw_pxc","annotations":[{"segment":[16.369672386895477,59.85161466458659],"label":"Throwing darts"}]},"EVQlh2Et5tc":{"duration":24.24,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EVQlh2Et5tc","annotations":[{"segment":[5.256677062402496,21.59397555850234],"label":"Using uneven bars"}]},"8KH35bzOO_0":{"duration":31.09,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=8KH35bzOO_0","annotations":[]},"C0t3fbC2RCg":{"duration":133.21,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=C0t3fbC2RCg","annotations":[{"segment":[10.90664976599064,75.93105694227769],"label":"Playing flauta"},{"segment":[95.45915366614663,130.98367004680188],"label":"Playing flauta"}]},"eEBiomp_9tQ":{"duration":30.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eEBiomp_9tQ","annotations":[{"segment":[5.812450670826833,25.031037566302654],"label":"Doing step aerobics"}]},"uE9MHR27_gc":{"duration":238.934,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=uE9MHR27_gc","annotations":[{"segment":[0.01,213.21351935101404],"label":"Sharpening knives"}]},"gzuLIo5ZR9A":{"duration":50.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gzuLIo5ZR9A","annotations":[{"segment":[0.01,50.45696145124717],"label":"Walking the dog"}]},"S0Kl5D5mrvQ":{"duration":50.574,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=S0Kl5D5mrvQ","annotations":[{"segment":[0.19999999999999998,49.23391939252337],"label":"Cleaning sink"}]},"1BWF1U1dJ_w":{"duration":105.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1BWF1U1dJ_w","annotations":[{"segment":[23.290516380655227,26.924497659906397],"label":"Making a sandwich"}]},"YZhNtSJMzq8":{"duration":49.34,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YZhNtSJMzq8","annotations":[{"segment":[0.7691263650546022,43.07107644305773],"label":"Mowing the lawn"}]},"uOk4EFDsDP4":{"duration":54.15,"subset":"validation","resolution":"350x240","url":"https://www.youtube.com/watch?v=uOk4EFDsDP4","annotations":[{"segment":[1.0309779200000002,38.57170259282372],"label":"Preparing pasta"}]},"chpGQpd9Gls":{"duration":33.72,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=chpGQpd9Gls","annotations":[]},"SlogVhfmTVs":{"duration":169.53,"subset":"training","resolution":"1480x1080","url":"https://www.youtube.com/watch?v=SlogVhfmTVs","annotations":[{"segment":[142.79335413416536,167.64997503900156],"label":"Shaving"}]},"XThYcZoFMMs":{"duration":217.989,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XThYcZoFMMs","annotations":[{"segment":[61.89389396528243,217.78893424036283],"label":"Making a cake"}]},"pajUG7Y6Gtk":{"duration":61.092,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pajUG7Y6Gtk","annotations":[{"segment":[0,61.061],"label":"Beach soccer"}]},"e-k2J91a954":{"duration":61.16,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=e-k2J91a954","annotations":[{"segment":[0.01,58.67952574102964],"label":"Volleyball"}]},"uJAL31wAD2A":{"duration":188.268,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uJAL31wAD2A","annotations":[]},"JN1wuOpCZvk":{"duration":220.06,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JN1wuOpCZvk","annotations":[]},"HCraAphAW1A":{"duration":28.561,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HCraAphAW1A","annotations":[{"segment":[1.0854786491899222,24.796957494984316],"label":"Clipping cat claws"}]},"xbWSMxwcP_8":{"duration":12.12,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=xbWSMxwcP_8","annotations":[{"segment":[0,11.39817042],"label":"Ironing clothes"}]},"JpUgQNMC5tw":{"duration":124.6,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=JpUgQNMC5tw","annotations":[]},"TKgU9QJXw2w":{"duration":40.87,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TKgU9QJXw2w","annotations":[]},"Z-sWZUYL0ZU":{"duration":126.39,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Z-sWZUYL0ZU","annotations":[]},"lmv-due_RPs":{"duration":131.61,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lmv-due_RPs","annotations":[]},"UrQ7Jq1s95o":{"duration":21.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UrQ7Jq1s95o","annotations":[{"segment":[4.03068819,15.180632960218409],"label":"Clean and jerk"}]},"5zYETEiYiCQ":{"duration":65.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5zYETEiYiCQ","annotations":[{"segment":[0.01,65.177913],"label":"Playing piano"}]},"vWULU8NttX8":{"duration":116.704,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vWULU8NttX8","annotations":[{"segment":[85.38804307176287,116.703061],"label":"BMX"}]},"DCjklOgbzGs":{"duration":158.407,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DCjklOgbzGs","annotations":[{"segment":[7.413722152886116,155.68816521060845],"label":"Skiing"}]},"0_9_HIvxNMw":{"duration":198.415,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0_9_HIvxNMw","annotations":[{"segment":[21.600331019363423,183.9114238389344],"label":"Snowboarding"}]},"oZnbAhzN3jg":{"duration":172.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oZnbAhzN3jg","annotations":[{"segment":[13.25172397129974,158.75173918002247],"label":"Cleaning windows"}]},"smZ0pWbxFWo":{"duration":30.44,"subset":"testing","resolution":"328x240","url":"https://www.youtube.com/watch?v=smZ0pWbxFWo","annotations":[]},"pYYA4rARL1Y":{"duration":192.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pYYA4rARL1Y","annotations":[{"segment":[9.571180506087758,14.0591354045245],"label":"Using the pommel horse"},{"segment":[17.34957241852139,20.340878794882197],"label":"Using the pommel horse"},{"segment":[23.93044644651517,27.81914473578422],"label":"Using the pommel horse"},{"segment":[31.408712387417193,33.90722776911077],"label":"Using the pommel horse"},{"segment":[37.808059282371296,40.80869890795632],"label":"Using the pommel horse"},{"segment":[44.709530421216854,50.4107457098284],"label":"Using the pommel horse"},{"segment":[59.41266458658347,63.01343213728549],"label":"Using the pommel horse"},{"segment":[72.31541497659906,76.81637441497661],"label":"Using the pommel horse"},{"segment":[80.71720592823714,170.73639469578785],"label":"Using the pommel horse"}]},"zcdJNPYkIE0":{"duration":66.084,"subset":"training","resolution":"624x360","url":"https://www.youtube.com/watch?v=zcdJNPYkIE0","annotations":[{"segment":[0.01,66.08399092970521],"label":"Scuba diving"}]},"nZ40a3LSFeU":{"duration":112.11,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=nZ40a3LSFeU","annotations":[{"segment":[12.242458444617784,104.93535809672386],"label":"Sailing"}]},"jqRJM8b4pgE":{"duration":78.65,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=jqRJM8b4pgE","annotations":[{"segment":[62.327863307332294,78.645985],"label":"Smoking hookah"}]},"uCLUexLKpnU":{"duration":14.98,"subset":"training","resolution":"562x480","url":"https://www.youtube.com/watch?v=uCLUexLKpnU","annotations":[{"segment":[0.023218928237129486,14.883333],"label":"Windsurfing"}]},"xs4MvE_uAtU":{"duration":216.11,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xs4MvE_uAtU","annotations":[{"segment":[65.74259478159127,115.63953851326053],"label":"Cleaning shoes"},{"segment":[117.66238758346334,167.5593313151326],"label":"Cleaning shoes"}]},"iVrhxY1zxMo":{"duration":234.685,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=iVrhxY1zxMo","annotations":[]},"O8vPTn6Ho7w":{"duration":182.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O8vPTn6Ho7w","annotations":[{"segment":[14.214513886677915,30.13476943975718],"label":"Kayaking"},{"segment":[58.27950693537945,65.10247360098485],"label":"Kayaking"},{"segment":[91.54146943020577,96.37440415167627],"label":"Kayaking"},{"segment":[100.63875831767963,108.59888609421927],"label":"Kayaking"},{"segment":[129.63636664650258,133.04784997930528],"label":"Kayaking"},{"segment":[139.87081662402497,147.26236384399377],"label":"Kayaking"},{"segment":[153.51674995319812,158.34968467394694],"label":"Kayaking"}]},"ymEh5T11kSQ":{"duration":35.248,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ymEh5T11kSQ","annotations":[{"segment":[0.2,20.34589594383775],"label":"Beach soccer"}]},"a1XSqgYWcDM":{"duration":70.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=a1XSqgYWcDM","annotations":[]},"20i6YGSnt-c":{"duration":54.61,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=20i6YGSnt-c","annotations":[{"segment":[5.39521827,49.842095074883],"label":"Shaving"}]},"-2DajzzRwn0":{"duration":191.147,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-2DajzzRwn0","annotations":[{"segment":[0.01,186.17924251032323],"label":"Tug of war"}]},"H0gSWEElh6A":{"duration":145.752,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=H0gSWEElh6A","annotations":[{"segment":[15.21882527301092,126.53806084243371],"label":"Fixing the roof"}]},"hD97MAqtyjk":{"duration":146.8,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hD97MAqtyjk","annotations":[]},"BzxK6r4UG5k":{"duration":56.96,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=BzxK6r4UG5k","annotations":[{"segment":[4.520647399375975,45.56103457410296],"label":"Clean and jerk"}]},"aEG-_-m49mY":{"duration":220.637,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=aEG-_-m49mY","annotations":[{"segment":[0.01,19.2755327736919],"label":"Painting fence"}]},"R2qPyJVTUlI":{"duration":91.93,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=R2qPyJVTUlI","annotations":[{"segment":[44.28244773790952,91.861],"label":"Putting on makeup"}]},"IGZvj1J9tNc":{"duration":179.28,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IGZvj1J9tNc","annotations":[]},"huFuRGoZt9I":{"duration":23.36,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=huFuRGoZt9I","annotations":[{"segment":[0,23.359273],"label":"Playing blackjack"}]},"CfGFk5dvKGg":{"duration":110.34,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=CfGFk5dvKGg","annotations":[]},"G9oSp_hnm_U":{"duration":83.639,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=G9oSp_hnm_U","annotations":[]},"hKE7vI--XGU":{"duration":103.42,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hKE7vI--XGU","annotations":[{"segment":[30.49400713171384,98.90384302355695],"label":"Playing flauta"}]},"dID-dQpaLbc":{"duration":235.13,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=dID-dQpaLbc","annotations":[{"segment":[9.898595943837753,221.801872074883],"label":"Tai chi"}]},"fFWHlFZEm3M":{"duration":169.46,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=fFWHlFZEm3M","annotations":[{"segment":[7.00572471450858,144.68239961],"label":"Sailing"}]},"vWNDj8KxmBg":{"duration":88.03,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vWNDj8KxmBg","annotations":[{"segment":[5.699053809771439,78.2074963900156],"label":"Breakdancing"}]},"AdLAUGIklAo":{"duration":119.398,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AdLAUGIklAo","annotations":[]},"BqvU8S76g1E":{"duration":11.77,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=BqvU8S76g1E","annotations":[]},"CRKA3xPEKm8":{"duration":50.95,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CRKA3xPEKm8","annotations":[{"segment":[9.21627904,41.12915008437072],"label":"Using parallel bars"}]},"qmP7dbzrhnw":{"duration":229.716,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=qmP7dbzrhnw","annotations":[]},"TCYrINZQn6g":{"duration":158.57,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TCYrINZQn6g","annotations":[]},"93jSVyXW5gc":{"duration":111.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=93jSVyXW5gc","annotations":[{"segment":[46.72968816,100.52036472],"label":"Archery"},{"segment":[21.57672964,34.03217932],"label":"Archery"}]},"WjEh7acrr5o":{"duration":12.446,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WjEh7acrr5o","annotations":[{"segment":[0.01,12.445895691609977],"label":"Knitting"}]},"CpTIytatsno":{"duration":97.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CpTIytatsno","annotations":[{"segment":[6.5936092043681755,10.989348673946958],"label":"Shot put"},{"segment":[22.054485959438377,26.75337987519501],"label":"Shot put"},{"segment":[34.64252496099844,48.89371684867395],"label":"Shot put"}]},"vREBYOCUCj0":{"duration":111.526,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vREBYOCUCj0","annotations":[{"segment":[62.582169112315434,94.35946350483879],"label":"Skiing"}]},"4wiLmu1yTZI":{"duration":125.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4wiLmu1yTZI","annotations":[{"segment":[55.2009375975039,84.4711856474259],"label":"Brushing hair"}]},"pesi_zocmrM":{"duration":112.733,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pesi_zocmrM","annotations":[]},"B_U7vUtoCKY":{"duration":145.07999999999998,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=B_U7vUtoCKY","annotations":[]},"czmYE1FzBXM":{"duration":192.01,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=czmYE1FzBXM","annotations":[{"segment":[84.77006282371295,175.231401950078],"label":"Playing lacrosse"}]},"1izVZnE0l7k":{"duration":208.32999999999998,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=1izVZnE0l7k","annotations":[{"segment":[7.475159066226595,162.8284648773706],"label":"Washing dishes"}]},"PT4RBe2UA4k":{"duration":34.529,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PT4RBe2UA4k","annotations":[]},"-F7QWQA8Eh8":{"duration":104.31,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=-F7QWQA8Eh8","annotations":[{"segment":[37.26296723161444,84.12643693774962],"label":"Tango"}]},"_z7M8BENWVE":{"duration":103.306,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_z7M8BENWVE","annotations":[]},"8OA30kYcAUQ":{"duration":168.368,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8OA30kYcAUQ","annotations":[{"segment":[8.928102964118565,33.61168174726989],"label":"Raking leaves"},{"segment":[88.23066458658346,121.84234633385336],"label":"Raking leaves"}]},"bOk4SVceq0A":{"duration":146.91,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bOk4SVceq0A","annotations":[]},"qbUQVhjQJ48":{"duration":16.905,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=qbUQVhjQJ48","annotations":[{"segment":[0.26117004680187206,5.223400936037441],"label":"Shuffleboard"}]},"QNsonWAaFk4":{"duration":186.41,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=QNsonWAaFk4","annotations":[{"segment":[0,150.93086284867394],"label":"Volleyball"}]},"OaG9uH7BgjI":{"duration":84.73,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=OaG9uH7BgjI","annotations":[{"segment":[53.755112324492984,62.604234009360376],"label":"Clipping cat claws"}]},"0czF2CCgq6I":{"duration":47.091,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0czF2CCgq6I","annotations":[{"segment":[0.01,17.796089521714773],"label":"Cutting the grass"}]},"lMbDEY-CVwE":{"duration":176.263,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lMbDEY-CVwE","annotations":[{"segment":[0.01,175.54883804447897],"label":"Disc dog"}]},"FXb8rBMvol4":{"duration":139.947,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FXb8rBMvol4","annotations":[{"segment":[17.237159126365054,26.401218408736348],"label":"Snowboarding"},{"segment":[32.29239937597504,40.801882995319815],"label":"Snowboarding"},{"segment":[47.129447737909516,53.89339625585023],"label":"Snowboarding"},{"segment":[58.257234009360374,74.8398174726989],"label":"Snowboarding"},{"segment":[77.23992823712948,82.27010452418098],"label":"Snowboarding"},{"segment":[86.19812012480499,94.70882059282371],"label":"Snowboarding"},{"segment":[98.85505928237129,105.40175195007801],"label":"Snowboarding"},{"segment":[109.32976755070203,121.55026053042123],"label":"Snowboarding"}]},"hhHiBeL08-M":{"duration":39.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hhHiBeL08-M","annotations":[{"segment":[0.01,38.94898382713582],"label":"Painting fence"}]},"VFsRRXYbuHs":{"duration":19.389,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=VFsRRXYbuHs","annotations":[{"segment":[0.2,19.141000000000002],"label":"Waterskiing"}]},"45P3UDcb4Gc":{"duration":165.19,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=45P3UDcb4Gc","annotations":[{"segment":[15.977473760174897,165.186462585034],"label":"Smoking a cigarette"}]},"1WSFrupQzQI":{"duration":62.462,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1WSFrupQzQI","annotations":[{"segment":[34.66751326053042,62.401],"label":"Bullfighting"}]},"Nn-KZMYbOv4":{"duration":141.46,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=Nn-KZMYbOv4","annotations":[{"segment":[6.620403837753511,131.74603637129485],"label":"Spinning"}]},"0E0t_6jIkvw":{"duration":231.921,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0E0t_6jIkvw","annotations":[]},"3-FGPutRFAs":{"duration":220.01,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3-FGPutRFAs","annotations":[]},"TtAEG3yXDnI":{"duration":105.16,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=TtAEG3yXDnI","annotations":[{"segment":[4.263702521060843,83.96214195319813],"label":"Using uneven bars"}]},"uMQEaWGQvrc":{"duration":17.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uMQEaWGQvrc","annotations":[{"segment":[5.678703613104524,9.624469503900157],"label":"Springboard diving"}]},"d6gGZFQOxOg":{"duration":127.36,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=d6gGZFQOxOg","annotations":[]},"jsfPXYQuikQ":{"duration":41.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jsfPXYQuikQ","annotations":[{"segment":[0.01,41.12253968253968],"label":"Springboard diving"}]},"YoJxVl_hUWg":{"duration":56.379,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YoJxVl_hUWg","annotations":[{"segment":[15.557499219968797,20.831227769110765],"label":"Changing car wheel"}]},"BodF651KcIg":{"duration":236.87,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BodF651KcIg","annotations":[{"segment":[0.01,226.15008681622467],"label":"Belly dance"}]},"MHhMO3yhcfU":{"duration":139.971,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MHhMO3yhcfU","annotations":[{"segment":[10.335046127093545,130.47450251338637],"label":"Bullfighting"}]},"kkjBLmM6KEg":{"duration":125.64,"subset":"training","resolution":"624x352","url":"https://www.youtube.com/watch?v=kkjBLmM6KEg","annotations":[{"segment":[10.97662679840527,119.76283881831463],"label":"Tango"}]},"llcbN-s26vs":{"duration":30.651,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=llcbN-s26vs","annotations":[{"segment":[6.816477339251428,25.26386782630372],"label":"Hanging wallpaper"}]},"XnUi1SG9jxA":{"duration":69.17,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=XnUi1SG9jxA","annotations":[{"segment":[1.2393148666146647,63.42059121762871],"label":"High jump"}]},"LB9-RIKxk6E":{"duration":158.895,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LB9-RIKxk6E","annotations":[{"segment":[0,0.743651705148206],"label":"Running a marathon"}]},"qtl0pZQk-E8":{"duration":160.659,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qtl0pZQk-E8","annotations":[]},"sNnNDCx6RRA":{"duration":178.01,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sNnNDCx6RRA","annotations":[{"segment":[0.01,170.7426131045242],"label":"Ballet"}]},"ral5Oaib_vk":{"duration":207.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ral5Oaib_vk","annotations":[{"segment":[36.8323151326053,132.7901887675507],"label":"Playing harmonica"},{"segment":[150.23707488299533,189.0261856474259],"label":"Playing harmonica"}]},"uSuHnQPWfNY":{"duration":169.645,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=uSuHnQPWfNY","annotations":[{"segment":[39.169201366614665,131.26975593135725],"label":"Grooming dog"}]},"P-ENnGQ_FuU":{"duration":74.165,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=P-ENnGQ_FuU","annotations":[{"segment":[0.2,54.94067862714509],"label":"Hand car wash"}]},"C3K-OgdK8FY":{"duration":192.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=C3K-OgdK8FY","annotations":[{"segment":[18.48876131045242,182.93351638065525],"label":"Zumba"}]},"SyOdA4ZKEtQ":{"duration":87.93,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SyOdA4ZKEtQ","annotations":[{"segment":[10.356574102964117,25.85714196567863],"label":"Tumbling"},{"segment":[32.990146645865835,35.87078315132605],"label":"Tumbling"},{"segment":[39.84880499219968,41.632056162246485],"label":"Tumbling"},{"segment":[44.10117316692667,49.45092667706707],"label":"Tumbling"},{"segment":[61.796511700468024,65.63736037441497],"label":"Tumbling"}]},"k_7hLIwul48":{"duration":33.414,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k_7hLIwul48","annotations":[{"segment":[6.249173166926678,22.809482059282374],"label":"Throwing darts"}]},"p74gAY-kWaY":{"duration":59.63,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=p74gAY-kWaY","annotations":[{"segment":[6.883828989079563,35.349392106084245],"label":"Brushing hair"},{"segment":[40.4657514898596,59.628843],"label":"Brushing hair"}]},"1kkc9hDshP4":{"duration":220.07999999999998,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=1kkc9hDshP4","annotations":[]},"DJTvj9lfSWU":{"duration":152.14,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=DJTvj9lfSWU","annotations":[]},"lgu-DBDWlEs":{"duration":18.785,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=lgu-DBDWlEs","annotations":[{"segment":[0.5861136349453978,17.905771547581907],"label":"Peeling potatoes"}]},"8hkaaBJ4994":{"duration":98.62,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=8hkaaBJ4994","annotations":[]},"dO01vfB9z4g":{"duration":128.15,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dO01vfB9z4g","annotations":[]},"B2p4QMI3sfk":{"duration":200.528,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=B2p4QMI3sfk","annotations":[]},"oB5eeXXprts":{"duration":108.3,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=oB5eeXXprts","annotations":[]},"gMLA3a0FMS0":{"duration":107.03999999999999,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gMLA3a0FMS0","annotations":[{"segment":[47.521585800000004,79.7974518],"label":"Playing lacrosse"}]},"PVAV7PdHhgE":{"duration":143.824,"subset":"training","resolution":"202x360","url":"https://www.youtube.com/watch?v=PVAV7PdHhgE","annotations":[{"segment":[0.2,132.32015600624027],"label":"Swinging at the playground"}]},"v5peBgLKWL8":{"duration":191.704,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v5peBgLKWL8","annotations":[{"segment":[42.46793664943876,53.8325957528097],"label":"Skiing"},{"segment":[57.12236549325918,61.30934516292216],"label":"Skiing"},{"segment":[67.29074469101212,83.73959339325954],"label":"Skiing"},{"segment":[114.84287093932737,124.4131089672387],"label":"Skiing"},{"segment":[136.97404785335414,183.32989374258972],"label":"Skiing"}]},"J4FBxa5FWSo":{"duration":12.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=J4FBxa5FWSo","annotations":[{"segment":[0,1.2959704461778472],"label":"Powerbocking"},{"segment":[0.2,12.27122016224649],"label":"Powerbocking"}]},"2PAVJbmj2lQ":{"duration":57.82,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=2PAVJbmj2lQ","annotations":[{"segment":[0.8184733700000001,55.14683307332293],"label":"Layup drill in basketball"}]},"7LimgSQsHm0":{"duration":133.585,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7LimgSQsHm0","annotations":[{"segment":[32.30843311667541,65.39241980792512],"label":"Playing ice hockey"},{"segment":[72.88313377575525,77.25271692365615],"label":"Playing ice hockey"}]},"s04x6lhUmtY":{"duration":63.762,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=s04x6lhUmtY","annotations":[{"segment":[6.564609494847706,41.96590402198983],"label":"Slacklining"}]},"abjQ1EyqJuw":{"duration":37.779,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=abjQ1EyqJuw","annotations":[{"segment":[0.009999999999999998,37.741],"label":"Trimming branches or hedges"}]},"DmAOCYuMgtE":{"duration":205.01,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DmAOCYuMgtE","annotations":[{"segment":[3.677159374414977,203.52277754914198],"label":"Zumba"}]},"-bzI8_hCbWk":{"duration":219.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-bzI8_hCbWk","annotations":[{"segment":[16.94699297971919,43.993709048361936],"label":"Dodgeball"},{"segment":[69.32860764430578,80.28423946957878],"label":"Dodgeball"},{"segment":[99.11423166926679,108.70040951638066],"label":"Dodgeball"},{"segment":[132.3234906396256,141.5673049921997],"label":"Dodgeball"}]},"G3TPqsLNlrw":{"duration":138.925,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G3TPqsLNlrw","annotations":[{"segment":[0.2,3.4677064252638132],"label":"Waterskiing"},{"segment":[22.323360112635797,73.03856658211906],"label":"Waterskiing"},{"segment":[78.67358952317275,126.35455135881435],"label":"Waterskiing"}]},"lCX7y_KAihU":{"duration":58.77,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lCX7y_KAihU","annotations":[{"segment":[17.50316848673947,32.07386895475819],"label":"Shaving"}]},"m11-00dRe40":{"duration":204.615,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m11-00dRe40","annotations":[]},"uDmEOkAXTfo":{"duration":127.85,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uDmEOkAXTfo","annotations":[{"segment":[102.6794305772231,111.25266458658346],"label":"Getting a haircut"}]},"3osNjmYjTGg":{"duration":28.38,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=3osNjmYjTGg","annotations":[{"segment":[0.01,28.01172230889236],"label":"Pole vault"}]},"nEj34gf508E":{"duration":72.26,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nEj34gf508E","annotations":[{"segment":[25.138987255850235,54.674479009360375],"label":"Using parallel bars"}]},"y-OZ45FQZ0g":{"duration":60.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y-OZ45FQZ0g","annotations":[{"segment":[4.27376045,10.64582417472699],"label":"Playing guitarra"},{"segment":[11.032662850000001,55.32026490795632],"label":"Playing guitarra"}]},"W6fPv5eoXC4":{"duration":45.02,"subset":"testing","resolution":"644x480","url":"https://www.youtube.com/watch?v=W6fPv5eoXC4","annotations":[]},"kKWHrXgGPDk":{"duration":55.19,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kKWHrXgGPDk","annotations":[]},"rMdojBVP-aM":{"duration":29.351,"subset":"training","resolution":"202x360","url":"https://www.youtube.com/watch?v=rMdojBVP-aM","annotations":[{"segment":[0.01,23.901265965678625],"label":"Slacklining"}]},"kZXVFSjRt5g":{"duration":141.782,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kZXVFSjRt5g","annotations":[{"segment":[20.349229117004683,128.06743107332295],"label":"Rafting"}]},"ILIpCfCWyT0":{"duration":139.16,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ILIpCfCWyT0","annotations":[{"segment":[12.58636193447738,15.62444929797192],"label":"Skateboarding"},{"segment":[23.219667706708268,128.05209048361934],"label":"Skateboarding"}]},"J_jcark5INI":{"duration":157.223,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J_jcark5INI","annotations":[]},"eGHua_oQFpA":{"duration":126.69,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eGHua_oQFpA","annotations":[]},"wyOf_L4cNHc":{"duration":235.38,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=wyOf_L4cNHc","annotations":[{"segment":[16.70795198474606,35.43554649750389],"label":"Playing piano"},{"segment":[60.772880262558495,70.68748912714508],"label":"Playing piano"},{"segment":[83.90696761326053,91.25112232776911],"label":"Playing piano"},{"segment":[99.69690024945396,106.30663949251169],"label":"Playing piano"},{"segment":[114.75241741419656,136.41767382199689],"label":"Playing piano"},{"segment":[141.55858212215287,163.95825400140404],"label":"Playing piano"},{"segment":[169.09916230156006,189.29558776645865],"label":"Playing piano"},{"segment":[197.3741579524181,199.21019663104522],"label":"Playing piano"},{"segment":[202.14785851684866,204.71831266692666],"label":"Playing piano"},{"segment":[212.42967511716066,230.79006190343213],"label":"Playing piano"}]},"tBNOJJx4Z9k":{"duration":127.85,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=tBNOJJx4Z9k","annotations":[{"segment":[52.43341965678627,100.28140717628705],"label":"Hand washing clothes"}]},"Yzb_4XMgcM4":{"duration":143.268,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Yzb_4XMgcM4","annotations":[{"segment":[0.01,136.31358814352575],"label":"Baton twirling"}]},"TcxOTZ4xnQ4":{"duration":39.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TcxOTZ4xnQ4","annotations":[{"segment":[6.79860567855835,33.00098106796645],"label":"Tumbling"}]},"qF3EbR8y8go":{"duration":204.1,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qF3EbR8y8go","annotations":[{"segment":[2.865726384157407,9.55242128052469],"label":"Painting"},{"segment":[14.647045963471191,17.831186390312755],"label":"Painting"},{"segment":[26.109951500100816,28.02043575620576],"label":"Painting"},{"segment":[30.249334054994854,32.478232353783945],"label":"Painting"},{"segment":[35.025544695257196,37.57285703673045],"label":"Painting"},{"segment":[40.75699739157566,44.57796589703588],"label":"Painting"},{"segment":[52.538316950078,55.722457371294844],"label":"Painting"},{"segment":[60.817082045241804,64.00122246645866],"label":"Painting"},{"segment":[68.45901905616225,72.27998756162246],"label":"Painting"},{"segment":[75.14571394071763,87.56386158346334],"label":"Painting"},{"segment":[95.20579859438378,102.84773560530421],"label":"Painting"},{"segment":[106.98711815288611,112.40015686895475],"label":"Painting"},{"segment":[121.31575004836193,130.5497572698908],"label":"Painting"},{"segment":[142.01266278627145,148.06252958658345],"label":"Painting"},{"segment":[150.60984192355696,153.47556830265208],"label":"Painting"},{"segment":[157.2965368081123,159.8438491450858],"label":"Painting"},{"segment":[166.84895807176287,170.66992657722307],"label":"Painting"},{"segment":[172.5804108299532,178.6302776302652],"label":"Painting"},{"segment":[183.40648826209048,197.0982920733229],"label":"Painting"}]},"StTr5O_wGXI":{"duration":44.049,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=StTr5O_wGXI","annotations":[{"segment":[11.888218309684769,39.65030037391972],"label":"Beach soccer"}]},"DPctp-VRHxk":{"duration":147.447,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DPctp-VRHxk","annotations":[]},"aaZtnSeatP0":{"duration":6.57,"subset":"testing","resolution":"656x480","url":"https://www.youtube.com/watch?v=aaZtnSeatP0","annotations":[]},"uxMOn-NmmZo":{"duration":91.67,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uxMOn-NmmZo","annotations":[{"segment":[32.16025741029641,91.601],"label":"Starting a campfire"}]},"StvwTXdXG2U":{"duration":183.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=StvwTXdXG2U","annotations":[{"segment":[0,130.71523077249623],"label":"Skateboarding"}]},"boQbuyDjWnc":{"duration":207.261,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=boQbuyDjWnc","annotations":[{"segment":[12.286942171606865,162.9636540655226],"label":"Playing blackjack"}]},"4WikrzXQ3Bo":{"duration":75.047,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=4WikrzXQ3Bo","annotations":[{"segment":[7.492983822754271,73.29074694227769],"label":"Rope skipping"}]},"V9ktV85lA9E":{"duration":88.96000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=V9ktV85lA9E","annotations":[{"segment":[18.734808229329172,76.18822013260531],"label":"Wrapping presents"}]},"crTQ2qXSW5c":{"duration":227.417,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=crTQ2qXSW5c","annotations":[]},"CCMtNNSzHw8":{"duration":122.53,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=CCMtNNSzHw8","annotations":[{"segment":[4.00273576,68.30952387],"label":"Putting on makeup"}]},"bJ6SpcLM7GE":{"duration":85.1,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=bJ6SpcLM7GE","annotations":[{"segment":[9.293415460218409,23.100775572542904],"label":"Playing saxophone"},{"segment":[30.535507940717633,59.07956971138846],"label":"Playing saxophone"}]},"U6-j4rUn3dk":{"duration":169.065,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U6-j4rUn3dk","annotations":[{"segment":[0.01,169.06448979591838],"label":"Trimming branches or hedges"}]},"CIJp-SG3sWE":{"duration":28.58,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CIJp-SG3sWE","annotations":[{"segment":[16.4299984399376,21.684035881435257],"label":"Plataform diving"}]},"auO4r3De6vc":{"duration":180.652,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=auO4r3De6vc","annotations":[]},"bF3iEQf3TUc":{"duration":192.587,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bF3iEQf3TUc","annotations":[]},"Tz3OXEWBSVA":{"duration":122.904,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tz3OXEWBSVA","annotations":[{"segment":[71.26716038685399,74.90448309579219],"label":"Spread mulch"},{"segment":[79.11611991666798,98.06848561060912],"label":"Spread mulch"},{"segment":[103.62018869267268,106.10888317773565],"label":"Spread mulch"}]},"Ib3XqmBqy10":{"duration":201.875,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=Ib3XqmBqy10","annotations":[{"segment":[10.705014040561622,29.91106864274571],"label":"Rope skipping"},{"segment":[38.097255850234006,51.321096723868955],"label":"Rope skipping"},{"segment":[56.3587503900156,61.711257410296405],"label":"Rope skipping"},{"segment":[71.47171138845555,96.98444305772232],"label":"Rope skipping"},{"segment":[103.28213416536661,141.38316536661466],"label":"Rope skipping"},{"segment":[152.08924024961,158.0720468018721],"label":"Rope skipping"},{"segment":[163.42508424336975,175.39069734789393],"label":"Rope skipping"},{"segment":[188.93073322932918,191.13492511700468],"label":"Rope skipping"}]},"PKQ5iJjiDDg":{"duration":6.781,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PKQ5iJjiDDg","annotations":[]},"WUYSGCwm6OM":{"duration":156.294,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=WUYSGCwm6OM","annotations":[{"segment":[17.555592346142827,149.4663626136882],"label":"Putting in contact lenses"}]},"UgXPt2LydrY":{"duration":26.43,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UgXPt2LydrY","annotations":[{"segment":[2.0611785015618316,25.06393057899187],"label":"Volleyball"}]},"H_Spgfds1iY":{"duration":119.049,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=H_Spgfds1iY","annotations":[{"segment":[55.28620592823713,94.07657878315132],"label":"Playing pool"}]},"PmeBYO3ARvk":{"duration":93.879,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=PmeBYO3ARvk","annotations":[{"segment":[0.2,91.97706908722742],"label":"Baton twirling"}]},"VlfnvORj6KY":{"duration":145.102,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=VlfnvORj6KY","annotations":[{"segment":[5.740776003749705,135.47326530281512],"label":"Playing congas"}]},"ogQozSI5V8U":{"duration":36.55,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=ogQozSI5V8U","annotations":[{"segment":[0.01,36.548207],"label":"Walking the dog"}]},"SEyQMOMPT7A":{"duration":120.628,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=SEyQMOMPT7A","annotations":[]},"FiJi18WpPe8":{"duration":30.0,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=FiJi18WpPe8","annotations":[{"segment":[0,5.66306068798752],"label":"Brushing teeth"},{"segment":[9.36043088923557,30.000181],"label":"Brushing teeth"}]},"_vK_sDOdgbM":{"duration":62.253,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=_vK_sDOdgbM","annotations":[{"segment":[3.8185474999092075,9.8274291198469],"label":"Putting in contact lenses"},{"segment":[13.80104438464441,19.325338777167772],"label":"Putting in contact lenses"},{"segment":[22.232862141653754,41.5194337927441],"label":"Putting in contact lenses"},{"segment":[44.233122266264346,52.0834353503765],"label":"Putting in contact lenses"}]},"p1uwQVtHHOQ":{"duration":92.718,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p1uwQVtHHOQ","annotations":[]},"vGcH8N8sJlM":{"duration":181.163,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=vGcH8N8sJlM","annotations":[{"segment":[23.457805229215975,61.32944258722729],"label":"Croquet"},{"segment":[87.04824109154842,94.96171755441647],"label":"Croquet"},{"segment":[100.04895242340305,133.39860237129483],"label":"Croquet"}]},"a6kF1_4rs2E":{"duration":101.425,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a6kF1_4rs2E","annotations":[{"segment":[7.800968153730731,88.81101955559988],"label":"Waxing skis"}]},"56sQMnSUWaI":{"duration":95.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=56sQMnSUWaI","annotations":[{"segment":[2.1624453978159126,91.19554212168487],"label":"Using the balance beam"}]},"zRqJZ0u6f88":{"duration":91.78999999999999,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zRqJZ0u6f88","annotations":[]},"NRdxsuSCJyM":{"duration":62.114,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NRdxsuSCJyM","annotations":[{"segment":[0.29070234009360374,49.12869547581903],"label":"Scuba diving"}]},"bY0vSkxH0PE":{"duration":20.2,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=bY0vSkxH0PE","annotations":[{"segment":[0.3463198127925117,8.689478939157567],"label":"Shoveling snow"}]},"6fuOwhx91zM":{"duration":92.67,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6fuOwhx91zM","annotations":[{"segment":[74.32558502340093,84.43790951638064],"label":"Skateboarding"}]},"zBg0FEtpC-o":{"duration":223.07999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zBg0FEtpC-o","annotations":[{"segment":[63.09368325,190.82743074],"label":"Starting a campfire"}]},"Oskk7eLgerM":{"duration":78.67,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Oskk7eLgerM","annotations":[]},"THOVGzbo8w8":{"duration":122.32,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=THOVGzbo8w8","annotations":[{"segment":[0,63.14044461778471],"label":"Playing lacrosse"},{"segment":[103.39009360374415,122.075],"label":"Playing lacrosse"}]},"qm8sJxsZ5VY":{"duration":223.7,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qm8sJxsZ5VY","annotations":[{"segment":[28.616953073634942,150.4134972528861],"label":"Getting a haircut"}]},"HCub1SN4X20":{"duration":232.107,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=HCub1SN4X20","annotations":[{"segment":[203.46591887675507,231.86700000000002],"label":"Swinging at the playground"}]},"NPt1niJMbvE":{"duration":16.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NPt1niJMbvE","annotations":[{"segment":[4.080054602184087,16.451833073322934],"label":"Long jump"}]},"G77y1JRjZDU":{"duration":73.52,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G77y1JRjZDU","annotations":[{"segment":[15.935558502340092,33.132204368174726],"label":"Hand washing clothes"},{"segment":[35.42509048361934,39.89621840873635],"label":"Hand washing clothes"},{"segment":[41.730527301092046,62.59579095163806],"label":"Hand washing clothes"}]},"FOZdNPQ7d5Q":{"duration":86.28999999999999,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FOZdNPQ7d5Q","annotations":[]},"JMemapZ0KE4":{"duration":20.2,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JMemapZ0KE4","annotations":[]},"HCZpS7mhvrE":{"duration":220.219,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HCZpS7mhvrE","annotations":[{"segment":[40.25997155130689,82.44035005597979],"label":"Waxing skis"},{"segment":[92.38531734569942,107.13130332700783],"label":"Waxing skis"},{"segment":[132.85104631766202,146.22531267280223],"label":"Waxing skis"}]},"K1EW6wZDnOs":{"duration":56.19,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=K1EW6wZDnOs","annotations":[{"segment":[0,56.141],"label":"Grooming horse"}]},"IajP-SB2D5c":{"duration":182.07,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IajP-SB2D5c","annotations":[{"segment":[30.6054627,105.51510918000001],"label":"Long jump"}]},"am1_JLFDFMw":{"duration":172.76,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=am1_JLFDFMw","annotations":[]},"1w9MbbXJZVs":{"duration":72.35,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1w9MbbXJZVs","annotations":[]},"3YiGMRp-7B4":{"duration":23.96,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=3YiGMRp-7B4","annotations":[{"segment":[3.275769110764431,23.173138845553822],"label":"Doing karate"}]},"phDLuo6dYrk":{"duration":206.07999999999998,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=phDLuo6dYrk","annotations":[{"segment":[9.325401848727012,150.49269087051482],"label":"Playing field hockey"},{"segment":[152.10051876443057,180.71985527613103],"label":"Playing field hockey"},{"segment":[183.93551106396254,197.76283095163808],"label":"Playing field hockey"}]},"K90dEpCDiM0":{"duration":174.498,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=K90dEpCDiM0","annotations":[]},"BHQasKzBDQU":{"duration":94.715,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=BHQasKzBDQU","annotations":[{"segment":[0.01,94.71419501133786],"label":"Arm wrestling"}]},"ZA_XYgP4fMA":{"duration":70.473,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZA_XYgP4fMA","annotations":[{"segment":[4.617546898595944,69.81291144305771],"label":"Arm wrestling"}]},"89rUyq2rCJ8":{"duration":41.448,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=89rUyq2rCJ8","annotations":[{"segment":[0.01,36.468731850234015],"label":"Surfing"}]},"RjztFj0Zl40":{"duration":109.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RjztFj0Zl40","annotations":[]},"h2e8XAsjPEA":{"duration":125.76,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=h2e8XAsjPEA","annotations":[]},"iC1C5wfb0iU":{"duration":25.101,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=iC1C5wfb0iU","annotations":[{"segment":[3.9550356762569825,18.482939001913817],"label":"Table soccer"}]},"0lzqfDIWXtw":{"duration":84.312,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0lzqfDIWXtw","annotations":[{"segment":[5.129726299256052,61.81977750390016],"label":"Curling"},{"segment":[69.5801325522621,84.311654],"label":"Curling"}]},"KfzVxgHEyzI":{"duration":20.04,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=KfzVxgHEyzI","annotations":[{"segment":[10.663357254290172,17.647544461778473],"label":"Playing kickball"}]},"v0cihSAXQbI":{"duration":115.15,"subset":"validation","resolution":"634x360","url":"https://www.youtube.com/watch?v=v0cihSAXQbI","annotations":[{"segment":[9.874344773790952,26.570964118564746],"label":"Javelin throw"},{"segment":[36.4453088923557,53.51029329173167],"label":"Javelin throw"},{"segment":[70.3893790951638,85.4728174726989],"label":"Javelin throw"}]},"K757XUwinrc":{"duration":28.56,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=K757XUwinrc","annotations":[{"segment":[7.21942875,27.15465678627145],"label":"Brushing hair"},{"segment":[0.3986817472698908,6.910483619344774],"label":"Brushing hair"}]},"vrMWYB0UuGM":{"duration":33.07,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=vrMWYB0UuGM","annotations":[{"segment":[4.64254174726989,31.105029706708265],"label":"Playing squash"}]},"RXDVJlb97OI":{"duration":96.92,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RXDVJlb97OI","annotations":[]},"W3a7BIqWov4":{"duration":34.064,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=W3a7BIqWov4","annotations":[{"segment":[3.3479117450412303,15.41102231844376],"label":"Playing ten pins"}]},"fJ7gcHxxJMM":{"duration":5.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fJ7gcHxxJMM","annotations":[{"segment":[0.1476170046801872,5.207],"label":"Springboard diving"}]},"H8ddGMpqBSw":{"duration":125.04,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=H8ddGMpqBSw","annotations":[]},"tik7rHU_DM8":{"duration":44.397,"subset":"validation","resolution":"712x480","url":"https://www.youtube.com/watch?v=tik7rHU_DM8","annotations":[{"segment":[0.2,44.141],"label":"Bullfighting"}]},"fgP3D3Gqn6U":{"duration":105.535,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=fgP3D3Gqn6U","annotations":[{"segment":[5.350823012512337,15.064624789073195],"label":"Calf roping"},{"segment":[31.03477347257156,39.9253717087459],"label":"Calf roping"},{"segment":[47.169562864147224,52.6027062306982],"label":"Calf roping"}]},"ll4HKY9ZBOA":{"duration":125.458,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ll4HKY9ZBOA","annotations":[{"segment":[1.5657711696222951,37.96995086334066],"label":"Drum corps"},{"segment":[43.2544285608159,123.50020100395852],"label":"Drum corps"}]},"l5QQ1vVctOo":{"duration":25.473,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=l5QQ1vVctOo","annotations":[{"segment":[0.2,22.8973291653557],"label":"Putting in contact lenses"}]},"A9VCuZ53ObQ":{"duration":24.823,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A9VCuZ53ObQ","annotations":[]},"KO7wfRb38J0":{"duration":219.034,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KO7wfRb38J0","annotations":[]},"LIaNG_UliKk":{"duration":206.728,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LIaNG_UliKk","annotations":[{"segment":[58.83527917666007,195.1530958485108],"label":"Skiing"}]},"z08g5S7J-CY":{"duration":57.214,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z08g5S7J-CY","annotations":[{"segment":[6.509755070202808,45.40581747269891],"label":"Hand car wash"}]},"fAJDaOuzgQI":{"duration":169.738,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fAJDaOuzgQI","annotations":[]},"GHcNgllmcpM":{"duration":168.438,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GHcNgllmcpM","annotations":[{"segment":[21.280001560062402,122.4256879875195],"label":"Playing drums"},{"segment":[126.10371294851795,149.48544305772234],"label":"Playing drums"}]},"oOURAJ7nraU":{"duration":93.11,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oOURAJ7nraU","annotations":[{"segment":[13.363971402393512,25.85637945245701],"label":"Long jump"},{"segment":[24.54903442396199,26.727942804787023],"label":"Long jump"},{"segment":[38.494048061242175,55.344272872955734],"label":"Long jump"},{"segment":[68.70824343369735,85.70372859594383],"label":"Long jump"}]},"r34VM4v0ShI":{"duration":136.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r34VM4v0ShI","annotations":[{"segment":[11.379947942277692,107.31184554914196],"label":"Polishing shoes"}]},"wqc2KnHfPHk":{"duration":130.148,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=wqc2KnHfPHk","annotations":[{"segment":[0.01,130.101],"label":"Drum corps"}]},"pieT6K771WQ":{"duration":65.13,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=pieT6K771WQ","annotations":[{"segment":[4.60801825,52.189843993759744],"label":"Hopscotch"}]},"WpQHQeY43zo":{"duration":198.25,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=WpQHQeY43zo","annotations":[{"segment":[9.739036628705149,184.1141686474259],"label":"Playing accordion"}]},"VkRjs03YEjE":{"duration":234.499,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=VkRjs03YEjE","annotations":[{"segment":[0.4648788119555858,13.614300898547231],"label":"Futsal"}]},"Cdiv-IzHcjQ":{"duration":151.905,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=Cdiv-IzHcjQ","annotations":[{"segment":[35.413774570982845,71.65663416536663],"label":"Waxing skis"},{"segment":[83.97446879875196,94.87101482059283],"label":"Waxing skis"}]},"-ew9R4QppTk":{"duration":184.506,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-ew9R4QppTk","annotations":[{"segment":[0.01,46.097706798328154],"label":"Playing congas"},{"segment":[78.86041580716534,176.57375720861913],"label":"Playing congas"}]},"Gr5jGxLEp70":{"duration":96.363,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Gr5jGxLEp70","annotations":[{"segment":[9.771579978845413,16.23585596485084],"label":"Croquet"},{"segment":[24.20345194760171,34.87702327613105],"label":"Croquet"}]},"LnDz1rvDaPY":{"duration":73.955,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LnDz1rvDaPY","annotations":[{"segment":[26.77627457098284,73.981],"label":"Removing ice from car"}]},"r-GVVgxtoMk":{"duration":34.761,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r-GVVgxtoMk","annotations":[{"segment":[10.19476721372855,32.4822636224649],"label":"Hitting a pinata"}]},"mlUg8iMtew0":{"duration":203.62,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mlUg8iMtew0","annotations":[{"segment":[40.97723805278742,70.5189678117737],"label":"Painting"},{"segment":[77.82498699947998,86.71927122799197],"label":"Painting"},{"segment":[90.84876033408683,99.42539155443768],"label":"Painting"},{"segment":[105.14314570133826,114.03742992985025],"label":"Painting"},{"segment":[120.70814310123426,129.60242732974623],"label":"Painting"},{"segment":[135.3201814766468,145.48507773780338],"label":"Painting"},{"segment":[150.88517887654282,158.50885107241024],"label":"Painting"},{"segment":[164.54425822747197,171.21497139885594],"label":"Painting"},{"segment":[176.61507146957877,187.09762067550702],"label":"Painting"},{"segment":[194.40363981903278,196.30955785647427],"label":"Painting"}]},"rP2MviNn52g":{"duration":49.064,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rP2MviNn52g","annotations":[{"segment":[9.94591263650546,35.422750390015594],"label":"Tug of war"}]},"LV0nevBELso":{"duration":162.01,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LV0nevBELso","annotations":[{"segment":[0.01,161.961],"label":"Cleaning windows"}]},"yuYqFaAnTxI":{"duration":106.0,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=yuYqFaAnTxI","annotations":[{"segment":[14.57450015336807,105.99909297052154],"label":"Pole vault"}]},"fxEcsM0EaA4":{"duration":20.295,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=fxEcsM0EaA4","annotations":[{"segment":[0,20.2942403],"label":"Rope skipping"}]},"N3rlaQHKrXY":{"duration":126.828,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N3rlaQHKrXY","annotations":[]},"xAMZGWqRmqE":{"duration":63.577,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=xAMZGWqRmqE","annotations":[{"segment":[0.2,62.28685639937598],"label":"Disc dog"}]},"CRNycmwvGXs":{"duration":152.86,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=CRNycmwvGXs","annotations":[{"segment":[89.54413179407175,131.75268393135724],"label":"Smoking a cigarette"}]},"Qs0hIRhHPVM":{"duration":60.77,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Qs0hIRhHPVM","annotations":[{"segment":[26.89804717394696,39.56707466536662],"label":"Doing kickboxing"},{"segment":[41.93069919734789,49.7146234],"label":"Doing kickboxing"}]},"j7Tk8I_DCtw":{"duration":27.93,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j7Tk8I_DCtw","annotations":[{"segment":[0,27.086033541341653],"label":"Playing bagpipes"}]},"UudvO6y1jUw":{"duration":160.52,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UudvO6y1jUw","annotations":[]},"Flh-37A4HWY":{"duration":18.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Flh-37A4HWY","annotations":[{"segment":[0.01,12.01855316848674],"label":"Drinking coffee"}]},"5sdUW55eS4M":{"duration":95.481,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5sdUW55eS4M","annotations":[{"segment":[0.01,59.40015866151346],"label":"Mooping floor"}]},"xlGJt4qHLhM":{"duration":41.1,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xlGJt4qHLhM","annotations":[]},"ZmUk9OQ1zfk":{"duration":124.227,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZmUk9OQ1zfk","annotations":[{"segment":[0.01,95.10616380655226],"label":"Beach soccer"}]},"S_1_ZSMxRfg":{"duration":212.74,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S_1_ZSMxRfg","annotations":[{"segment":[14.51531843,157.82403826],"label":"Making a sandwich"}]},"N8BlpYSpgg4":{"duration":52.153,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=N8BlpYSpgg4","annotations":[{"segment":[0.01,50.52976710207217],"label":"Bullfighting"}]},"ig8RaXkocJ0":{"duration":215.9,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ig8RaXkocJ0","annotations":[{"segment":[6.522903005261287,214.31433291180335],"label":"Doing a powerbomb"}]},"BH-kBRn84i8":{"duration":22.106,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=BH-kBRn84i8","annotations":[{"segment":[0.19999999999999998,19.832877757009346],"label":"Cutting the grass"}]},"jpclX7wgcZU":{"duration":32.207,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jpclX7wgcZU","annotations":[{"segment":[0.2,31.7044237258567],"label":"River tubing"}]},"x8cuLOUppmU":{"duration":45.23,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=x8cuLOUppmU","annotations":[{"segment":[33.94199386895476,35.35330338533541],"label":"Hammer throw"}]},"WtBr459dWY4":{"duration":96.97,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WtBr459dWY4","annotations":[{"segment":[23.75647581903276,82.6180624024961],"label":"Dodgeball"}]},"AGDsfpZQBIs":{"duration":20.69,"subset":"training","resolution":"400x226","url":"https://www.youtube.com/watch?v=AGDsfpZQBIs","annotations":[{"segment":[2.130222383775351,20.65670190327613],"label":"Shot put"}]},"V2PhVadSbpQ":{"duration":72.749,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=V2PhVadSbpQ","annotations":[{"segment":[5.257796514337343,66.10772400031865],"label":"Raking leaves"}]},"coK0QVLhuj0":{"duration":92.648,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=coK0QVLhuj0","annotations":[{"segment":[3.6134016789243,7.949483680967238],"label":"Grooming dog"},{"segment":[15.031750960374415,47.841438152730106],"label":"Grooming dog"},{"segment":[65.33030225085803,66.1975186524181],"label":"Grooming dog"},{"segment":[78.33854827425897,84.9872073528861],"label":"Grooming dog"}]},"FARJEomZRrc":{"duration":139.576,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FARJEomZRrc","annotations":[{"segment":[70.30465366614665,93.39015444617785],"label":"Painting fence"}]},"ZH8hnmjRDsI":{"duration":59.049,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZH8hnmjRDsI","annotations":[{"segment":[6.540456088923557,59.04834467120181],"label":"Building sandcastles"}]},"v5patZyuYys":{"duration":213.23,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=v5patZyuYys","annotations":[{"segment":[28.607925966018232,39.58538592972291],"label":"Cricket"},{"segment":[48.234293779914466,49.897545289566686],"label":"Cricket"},{"segment":[82.1646245768198,97.13388816368982],"label":"Cricket"},{"segment":[102.78894329650737,135.72132318762138],"label":"Cricket"}]},"1QIUV7WYKXg":{"duration":10.08,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1QIUV7WYKXg","annotations":[]},"mn1ozn0j7D8":{"duration":131.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mn1ozn0j7D8","annotations":[{"segment":[22.542449297971917,31.969291731669266],"label":"Shaving"},{"segment":[59.22516224648985,100.22669110764431],"label":"Shaving"}]},"zc7_D9RiOY0":{"duration":66.526,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=zc7_D9RiOY0","annotations":[{"segment":[0.01,58.845196291731675],"label":"Scuba diving"}]},"4WUFEnFE5sY":{"duration":219.75,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=4WUFEnFE5sY","annotations":[{"segment":[10.625439385335415,29.819781500780035],"label":"Belly dance"}]},"6Dt510HMEWA":{"duration":31.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6Dt510HMEWA","annotations":[{"segment":[0,12.816902496099845],"label":"Kayaking"},{"segment":[10.297133385335414,12.962273790951638],"label":"Kayaking"}]},"in5nO2Rh6Fc":{"duration":69.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=in5nO2Rh6Fc","annotations":[]},"VhADj7cF6h8":{"duration":134.235,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VhADj7cF6h8","annotations":[]},"TdqEtrrPX_Q":{"duration":27.214,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=TdqEtrrPX_Q","annotations":[{"segment":[6.186436817472699,26.18642433697348],"label":"Disc dog"}]},"-sXrwv7C5CI":{"duration":54.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-sXrwv7C5CI","annotations":[{"segment":[5.23749439625585,49.16486675195008],"label":"Using parallel bars"}]},"Zi2Pah5-BXI":{"duration":156.34,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zi2Pah5-BXI","annotations":[{"segment":[29.02403855070203,134.14471599063964],"label":"Getting a tattoo"}]},"Irmh66trRY4":{"duration":238.469,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Irmh66trRY4","annotations":[{"segment":[19.34124180967239,39.798324492979724],"label":"Baking cookies"}]},"ZqkJAD0CGBU":{"duration":71.425,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZqkJAD0CGBU","annotations":[{"segment":[2.4492074882995323,70.49012948517941],"label":"Hitting a pinata"}]},"QZxtQ49Ug7U":{"duration":193.86,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=QZxtQ49Ug7U","annotations":[]},"Y-SehqCvzME":{"duration":238.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y-SehqCvzME","annotations":[{"segment":[13.392923047534147,207.96233287698854],"label":"Playing guitarra"}]},"Yr2unlRQZa0":{"duration":118.608,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Yr2unlRQZa0","annotations":[{"segment":[0.2,48.93047507788162],"label":"Elliptical trainer"},{"segment":[54.28513084112149,62.23529127725857],"label":"Elliptical trainer"},{"segment":[68.69889719626168,72.7617352024922],"label":"Elliptical trainer"},{"segment":[79.41001557632399,83.28817912772587],"label":"Elliptical trainer"},{"segment":[102.86367133956388,115.23685981308412],"label":"Elliptical trainer"}]},"-cApkmTI3F8":{"duration":19.04,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=-cApkmTI3F8","annotations":[{"segment":[0.01,18.06012400624025],"label":"Drinking beer"}]},"1UgjxeAPq_A":{"duration":179.03,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1UgjxeAPq_A","annotations":[{"segment":[10.672597615272744,36.048535903360566],"label":"Painting"},{"segment":[60.866761262039866,158.74537918491345],"label":"Painting"}]},"G71xFbDSSno":{"duration":30.33,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=G71xFbDSSno","annotations":[{"segment":[3.973980898595944,26.587824583463338],"label":"Washing face"}]},"NA5siWQEegU":{"duration":225.745,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NA5siWQEegU","annotations":[]},"c1eUdyyT4zg":{"duration":32.253,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c1eUdyyT4zg","annotations":[{"segment":[0.2,26.734977505234312],"label":"Riding bumper cars"}]},"Y7BBrdCwIJw":{"duration":130.13,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=Y7BBrdCwIJw","annotations":[{"segment":[3.6364440200000003,128.749540860546],"label":"Playing accordion"}]},"_Z7DaKYQ9J8":{"duration":191.08,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_Z7DaKYQ9J8","annotations":[]},"hXSee4C6pyE":{"duration":75.024,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hXSee4C6pyE","annotations":[{"segment":[10.7601248049922,65.73208112324492],"label":"Assembling bicycle"}]},"4-KiAur4dI8":{"duration":114.68,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4-KiAur4dI8","annotations":[]},"V-kMGMX-l2g":{"duration":90.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V-kMGMX-l2g","annotations":[{"segment":[8.145677893915757,80.75456532761311],"label":"Zumba"}]},"kHTcsP6nQdY":{"duration":52.129,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kHTcsP6nQdY","annotations":[{"segment":[0.01,0.02],"label":"Fun sliding down"}]},"3f6G-qzwzfg":{"duration":205.36,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=3f6G-qzwzfg","annotations":[{"segment":[24.988873020280813,140.32213311388458],"label":"Cricket"},{"segment":[108.9258567550702,128.14806677067085],"label":"Cricket"},{"segment":[136.47769111076443,140.64250328081124],"label":"Cricket"},{"segment":[151.53508895631828,199.5906139953198],"label":"Cricket"}]},"tOW6_7jmht8":{"duration":137.741,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tOW6_7jmht8","annotations":[]},"Wdp8yduPUX4":{"duration":161.96,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Wdp8yduPUX4","annotations":[{"segment":[17.433983889967845,30.572638415740713],"label":"Longboarding"},{"segment":[32.84663631443217,37.394632111815085],"label":"Longboarding"},{"segment":[40.93196217644624,50.280620204400016],"label":"Longboarding"},{"segment":[54.828616001782926,65.18793976248847],"label":"Longboarding"},{"segment":[68.72526982711962,84.64325511795984],"label":"Longboarding"},{"segment":[96.77124391098094,145.7885304430577],"label":"Longboarding"}]},"20ooSJixdyg":{"duration":75.61,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=20ooSJixdyg","annotations":[{"segment":[0,75.561],"label":"Mixing drinks"}]},"dD2R0yyk87g":{"duration":104.119,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dD2R0yyk87g","annotations":[]},"j-qAgWm_Ylw":{"duration":185.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j-qAgWm_Ylw","annotations":[{"segment":[4.783738689547582,8.262821372854914],"label":"Long jump"},{"segment":[14.351216068642747,18.410145865834636],"label":"Long jump"},{"segment":[23.048922776911077,28.84739391575663],"label":"Long jump"},{"segment":[32.326476599063966,42.18387753510141],"label":"Long jump"},{"segment":[48.272272230889236,53.49089625585024],"label":"Long jump"},{"segment":[71.75608034321372,90.3111879875195],"label":"Long jump"},{"segment":[103.93759516380656,111.47560764430578],"label":"Long jump"},{"segment":[122.49270280811233,140.46796333853354],"label":"Long jump"},{"segment":[148.8757464898596,163.66184789391576],"label":"Long jump"},{"segment":[176.70840795631827,185.64100000000002],"label":"Long jump"}]},"jrXoqceGS-k":{"duration":202.966,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jrXoqceGS-k","annotations":[{"segment":[0.01,184.2615631825273],"label":"Rollerblading"}]},"fBbjlXgtd50":{"duration":29.12,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=fBbjlXgtd50","annotations":[{"segment":[0,29.117822],"label":"Cumbia"}]},"kW4ajodPtWU":{"duration":194.58,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kW4ajodPtWU","annotations":[{"segment":[24.09326706,101.97900446999999],"label":"Tumbling"},{"segment":[112.94959401000001,123.04066839],"label":"Tumbling"}]},"Zz2Cv6Epkes":{"duration":113.128,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zz2Cv6Epkes","annotations":[]},"I5RVQUYA1w4":{"duration":33.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I5RVQUYA1w4","annotations":[{"segment":[15.643422850000002,26.466275000000003],"label":"Hopscotch"}]},"SNQ3dLJKgro":{"duration":236.89,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=SNQ3dLJKgro","annotations":[]},"s_VFaQTlskE":{"duration":121.998,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s_VFaQTlskE","annotations":[{"segment":[9.8922496099844,119.1069984399376],"label":"Playing congas"}]},"f4OyAeQAOi4":{"duration":34.854,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=f4OyAeQAOi4","annotations":[]},"lBhNeACY8y4":{"duration":206.61,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=lBhNeACY8y4","annotations":[{"segment":[38.15691948,197.35845620124803],"label":"Doing kickboxing"}]},"eQc-8npRq18":{"duration":4.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eQc-8npRq18","annotations":[{"segment":[0.01,3.7169305975039],"label":"Pole vault"}]},"Lu1dzpAN-lE":{"duration":130.566,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lu1dzpAN-lE","annotations":[]},"J959vSGFedM":{"duration":28.259,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=J959vSGFedM","annotations":[{"segment":[0.2,26.643892129036495],"label":"Doing crunches"}]},"KVIC0mnlhVk":{"duration":231.921,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=KVIC0mnlhVk","annotations":[]},"kyvxaxRFLG8":{"duration":103.59,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kyvxaxRFLG8","annotations":[{"segment":[7.918294331773271,63.83114776131045],"label":"Chopping wood"},{"segment":[72.3958334102964,77.72856221060842],"label":"Chopping wood"},{"segment":[0.2,103.38421699999999],"label":"Chopping wood"},{"segment":[82.41489964118564,83.70768238065523],"label":"Chopping wood"}]},"_8-4M5XVuwI":{"duration":25.635,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_8-4M5XVuwI","annotations":[{"segment":[0.2,16.091644367601244],"label":"Raking leaves"}]},"lkCpF06uEmE":{"duration":46.56,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=lkCpF06uEmE","annotations":[{"segment":[0,22.442558644305773],"label":"Clean and jerk"},{"segment":[30.577078282371293,43.650413414976605],"label":"Clean and jerk"}]},"yN3e-giBxns":{"duration":35.202,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yN3e-giBxns","annotations":[{"segment":[8.731717174726988,14.992193639625583],"label":"Calf roping"}]},"wBij_rEXdCk":{"duration":183.58,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wBij_rEXdCk","annotations":[{"segment":[46.39699824,77.973906],"label":"Breakdancing"},{"segment":[5.3328828,31.031687039999998],"label":"Breakdancing"}]},"y76trNI6U0Y":{"duration":171.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y76trNI6U0Y","annotations":[{"segment":[0.01,144.36218808112324],"label":"Bathing dog"}]},"fOGXvBAmTsY":{"duration":199.924,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fOGXvBAmTsY","annotations":[{"segment":[40.85792964507696,46.47199609828393],"label":"Running a marathon"},{"segment":[64.87365898283932,68.61637007800311],"label":"Running a marathon"},{"segment":[115.08836617628704,118.83107727145085],"label":"Running a marathon"}]},"5qh_gx81Bd8":{"duration":179.77,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=5qh_gx81Bd8","annotations":[{"segment":[94.79705944,127.64966144],"label":"Breakdancing"},{"segment":[52.36821608,81.24974295999999],"label":"Breakdancing"},{"segment":[134.02021271138844,179.521666],"label":"Breakdancing"},{"segment":[12.05621160374415,32.80411064274571],"label":"Breakdancing"}]},"rgAALWYnRrg":{"duration":28.491,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rgAALWYnRrg","annotations":[{"segment":[12.000839953198128,28.290884353741497],"label":"Hitting a pinata"}]},"NCvNIKw4EZ4":{"duration":23.267,"subset":"validation","resolution":"544x360","url":"https://www.youtube.com/watch?v=NCvNIKw4EZ4","annotations":[{"segment":[6.636917316692667,10.722117004680186],"label":"Playing pool"}]},"XbN3TvLEm2A":{"duration":20.086,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XbN3TvLEm2A","annotations":[{"segment":[0.01,19.222654876245517],"label":"Kneeling"}]},"QjMNQxu3Zf8":{"duration":70.59,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=QjMNQxu3Zf8","annotations":[{"segment":[4.071789391575663,67.91909048361936],"label":"Brushing teeth"}]},"uatIP3FGQXk":{"duration":65.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uatIP3FGQXk","annotations":[{"segment":[5.483024452418097,63.054781202808115],"label":"Playing kickball"}]},"RFhzBUJw6w0":{"duration":208.353,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RFhzBUJw6w0","annotations":[]},"4rvACiBclFI":{"duration":227.927,"subset":"training","resolution":"416x320","url":"https://www.youtube.com/watch?v=4rvACiBclFI","annotations":[{"segment":[0.2,39.46943073790952],"label":"Changing car wheel"},{"segment":[72.8939937051482,92.45091884555382],"label":"Changing car wheel"},{"segment":[170.323038950078,188.8132227191888],"label":"Changing car wheel"},{"segment":[204.10318237441498,227.927073],"label":"Changing car wheel"}]},"DZVJtWJrWTU":{"duration":209.607,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=DZVJtWJrWTU","annotations":[{"segment":[0.01,51.3479608550488],"label":"Blow-drying hair"},{"segment":[75.83470508545125,166.27241377640428],"label":"Blow-drying hair"}]},"dL-ybVv7Sgs":{"duration":33.83,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dL-ybVv7Sgs","annotations":[{"segment":[11.816895475819033,32.92413416536662],"label":"Starting a campfire"}]},"C4qzKKhLR8g":{"duration":116.17,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C4qzKKhLR8g","annotations":[]},"o8n1YnT8ZZ8":{"duration":203.848,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o8n1YnT8ZZ8","annotations":[{"segment":[38.79789982347593,136.4286792823713],"label":"Making a lemonade"}]},"GtfngouN8xs":{"duration":84.17,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=GtfngouN8xs","annotations":[]},"umn2TO6ACKs":{"duration":19.761,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=umn2TO6ACKs","annotations":[]},"w5xTzAe_O9s":{"duration":114.428,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w5xTzAe_O9s","annotations":[]},"EbBlHnunlSI":{"duration":123.32,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EbBlHnunlSI","annotations":[{"segment":[10.388974497755422,43.86455899052289],"label":"Shaving"},{"segment":[39.63201382477068,79.07163923291625],"label":"Shaving"},{"segment":[90.42255581379719,109.85378589293232],"label":"Shaving"}]},"lsgB3WU-1r0":{"duration":204.592,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lsgB3WU-1r0","annotations":[{"segment":[0.01,181.5837893915757],"label":"Doing fencing"}]},"nnWON1EzK0o":{"duration":20.155,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nnWON1EzK0o","annotations":[{"segment":[3.6788229687987517,14.998278257410297],"label":"Calf roping"}]},"p4pj27t5B6g":{"duration":154.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p4pj27t5B6g","annotations":[{"segment":[110.86818342937799,113.27313317838835],"label":"Playing saxophone"}]},"kuMevlNUDCs":{"duration":144.17000000000002,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=kuMevlNUDCs","annotations":[{"segment":[11.470840523416854,96.93984828549141],"label":"Washing dishes"}]},"0GWJ-VHFlTk":{"duration":66.25,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0GWJ-VHFlTk","annotations":[{"segment":[6.2008850546021845,7.441062065522621],"label":"Layup drill in basketball"},{"segment":[22.323186196567864,24.28679979719189],"label":"Layup drill in basketball"},{"segment":[28.007330829953197,29.350855925117003],"label":"Layup drill in basketball"},{"segment":[31.72786186271451,33.48477929485179],"label":"Layup drill in basketball"},{"segment":[35.75843714820593,37.618702664586586],"label":"Layup drill in basketball"},{"segment":[40.71914519188768,41.95932220280812],"label":"Layup drill in basketball"},{"segment":[45.36980898283932,46.50663790951638],"label":"Layup drill in basketball"},{"segment":[49.40038426833073,50.5372131950078],"label":"Layup drill in basketball"}]},"mMm1LfVb8Pg":{"duration":82.99,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=mMm1LfVb8Pg","annotations":[]},"lDriXPZt_Yg":{"duration":83.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lDriXPZt_Yg","annotations":[{"segment":[3.642308842433698,70.11444521684868],"label":"Doing nails"}]},"eUecHAdv1uU":{"duration":14.188,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=eUecHAdv1uU","annotations":[{"segment":[0.2,9.610039001560063],"label":"Calf roping"}]},"BWAedc31WdY":{"duration":155.39,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BWAedc31WdY","annotations":[]},"coEvniePQLA":{"duration":143.5,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=coEvniePQLA","annotations":[{"segment":[9.846184087363495,135.18009984399376],"label":"Making a lemonade"}]},"LvkqCFRC5Vk":{"duration":117.05,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=LvkqCFRC5Vk","annotations":[{"segment":[13.147782964118566,53.13895614664587],"label":"Playing lacrosse"},{"segment":[62.99979336973479,73.77367107644305],"label":"Playing lacrosse"},{"segment":[81.99103542901716,96.41707507020281],"label":"Playing lacrosse"}]},"MkKUQ4MMHd8":{"duration":37.547,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MkKUQ4MMHd8","annotations":[{"segment":[0.2,36.90434169064286],"label":"Camel ride"}]},"9R2wP-iceaw":{"duration":151.7,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9R2wP-iceaw","annotations":[{"segment":[9.229544716482536,150.2759203837541],"label":"Dodgeball"}]},"SBj7yuFEwQI":{"duration":131.82,"subset":"validation","resolution":"1280x564","url":"https://www.youtube.com/watch?v=SBj7yuFEwQI","annotations":[{"segment":[58.99355078050132,124.06298535994057],"label":"Painting fence"}]},"Oh83skxSJkQ":{"duration":152.79,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Oh83skxSJkQ","annotations":[{"segment":[1.6679875195007803,152.74],"label":"Playing piano"}]},"ey6s7PdS4JA":{"duration":26.96,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ey6s7PdS4JA","annotations":[]},"ldAHwbF8uWI":{"duration":187.269,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ldAHwbF8uWI","annotations":[]},"FEqLmpNzxdg":{"duration":173.941,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FEqLmpNzxdg","annotations":[{"segment":[46.40221146805056,61.32689883307332],"label":"Ice fishing"},{"segment":[86.29183110140404,136.76441155694226],"label":"Ice fishing"},{"segment":[148.4328038127925,164.44292341965678],"label":"Ice fishing"}]},"1cccnJAK7TI":{"duration":66.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1cccnJAK7TI","annotations":[{"segment":[0.6185653666146645,15.051757254290171],"label":"Getting a tattoo"},{"segment":[24.12404929797192,35.67060280811232],"label":"Getting a tattoo"}]},"Y9wrTRY3mUI":{"duration":228.717,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=Y9wrTRY3mUI","annotations":[{"segment":[48.510414976599066,60.281324492979714],"label":"Ice fishing"},{"segment":[87.74678003120125,98.80430109204367],"label":"Ice fishing"},{"segment":[134.11702964118564,186.21067394695788],"label":"Ice fishing"}]},"6pY2zz2_CCU":{"duration":76.417,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6pY2zz2_CCU","annotations":[{"segment":[5.305072944414374,34.63199304162642],"label":"Clipping cat claws"}]},"COwvUuj-KDE":{"duration":224.31,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=COwvUuj-KDE","annotations":[]},"IclzwpxVvIk":{"duration":169.79,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IclzwpxVvIk","annotations":[]},"D7ZeRbotot0":{"duration":17.067,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D7ZeRbotot0","annotations":[{"segment":[0,15.176285569422777],"label":"Welding"}]},"wLXty2wzuJg":{"duration":109.529,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wLXty2wzuJg","annotations":[]},"bX8QXK9mjKA":{"duration":64.088,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bX8QXK9mjKA","annotations":[{"segment":[0.01,64.08668934240363],"label":"Riding bumper cars"}]},"9bFevj6bomQ":{"duration":58.213,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=9bFevj6bomQ","annotations":[{"segment":[10.171281921996881,16.709963157566303],"label":"Futsal"}]},"JD2Mb8pM6E8":{"duration":63.0,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=JD2Mb8pM6E8","annotations":[]},"3s4tAYvAr5A":{"duration":67.45,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3s4tAYvAr5A","annotations":[]},"dmoZPkjzhQ4":{"duration":31.858,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dmoZPkjzhQ4","annotations":[]},"7UrJuhFvlQ4":{"duration":90.952,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=7UrJuhFvlQ4","annotations":[{"segment":[0.20000000000000004,90.721],"label":"Blowing leaves"}]},"iKH8FgONbVw":{"duration":25.588,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=iKH8FgONbVw","annotations":[{"segment":[0.01,25.587844],"label":"Kneeling"}]},"I5F59PkcDWM":{"duration":167.0,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=I5F59PkcDWM","annotations":[{"segment":[2.6053198127925117,163.13706685179406],"label":"Playing guitarra"}]},"VnBZR-yBLXA":{"duration":117.586,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VnBZR-yBLXA","annotations":[{"segment":[16.533949158044297,74.59425154589476],"label":"Peeling potatoes"},{"segment":[87.2319829804742,108.11171317673588],"label":"Peeling potatoes"}]},"pUIicfDCZC0":{"duration":157.87,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pUIicfDCZC0","annotations":[{"segment":[24.006142745709827,145.63726599063963],"label":"Preparing pasta"}]},"U_ia-tINzpw":{"duration":129.08,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U_ia-tINzpw","annotations":[{"segment":[87.99933356942277,107.93510936661467],"label":"Playing saxophone"}]},"Ocd1VWIIeVs":{"duration":64.668,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ocd1VWIIeVs","annotations":[{"segment":[6.2548716043879855,7.36460670826833],"label":"Hand car wash"}]},"FEOdYNR4gls":{"duration":177.587,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=FEOdYNR4gls","annotations":[]},"ZJ6BFrKcRe0":{"duration":154.878,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZJ6BFrKcRe0","annotations":[{"segment":[0.2,154.621],"label":"Kite flying"}]},"mc5XVTo5PVk":{"duration":16.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mc5XVTo5PVk","annotations":[{"segment":[1.7070469641185648,10.99539073946958],"label":"Using uneven bars"}]},"Fi0PQwhsGs4":{"duration":45.07,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Fi0PQwhsGs4","annotations":[{"segment":[37.12468655480913,42.25745571863691],"label":"Rock-paper-scissors"}]},"m2X4HZxeaGk":{"duration":153.066,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=m2X4HZxeaGk","annotations":[]},"cfcYZBrKVP8":{"duration":55.87,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cfcYZBrKVP8","annotations":[]},"8QEG_1GhoEc":{"duration":148.028,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8QEG_1GhoEc","annotations":[{"segment":[40.87490846572638,114.77304660374415],"label":"Spread mulch"}]},"Mh-qSztuLtE":{"duration":23.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Mh-qSztuLtE","annotations":[{"segment":[7.0311368,15.24667792979719],"label":"Zumba"},{"segment":[1.0909579999999999,5.30357272],"label":"Zumba"}]},"_uPVFvEQocU":{"duration":114.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_uPVFvEQocU","annotations":[{"segment":[51.88226006000001,81.76329177000001],"label":"Ping-pong"},{"segment":[22.66821054,41.36667166],"label":"Ping-pong"}]},"4o7Abew2ExY":{"duration":107.695,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4o7Abew2ExY","annotations":[{"segment":[3.360191856474259,85.01285396879875],"label":"Ice fishing"}]},"vK0Jp1KbThQ":{"duration":208.38,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=vK0Jp1KbThQ","annotations":[]},"70GQ4Nnrk4E":{"duration":148.794,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=70GQ4Nnrk4E","annotations":[{"segment":[52.69285109363558,146.24006995319812],"label":"Rope skipping"}]},"YO8EY7miuk8":{"duration":58.027,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YO8EY7miuk8","annotations":[{"segment":[19.010296193447736,56.75931292043682],"label":"Capoeira"}]},"l3k_7ydV74A":{"duration":8.963,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=l3k_7ydV74A","annotations":[{"segment":[0.47537882371294854,7.913659241809673],"label":"Doing a powerbomb"}]},"P62B_iDlQXM":{"duration":30.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P62B_iDlQXM","annotations":[{"segment":[0,30.533333],"label":"Mowing the lawn"}]},"OHNH7IV0768":{"duration":18.07,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OHNH7IV0768","annotations":[{"segment":[0.7497308892355694,17.3286856474259],"label":"Playing water polo"}]},"1nXLVjfupto":{"duration":229.37,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1nXLVjfupto","annotations":[{"segment":[125.04684711388457,171.9170530421217],"label":"Wrapping presents"}]},"gjkK_eSIs2g":{"duration":202.8,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gjkK_eSIs2g","annotations":[{"segment":[21.5142117086044,89.22070149744766],"label":"Snatch"}]},"ZW46Rcuhqac":{"duration":33.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZW46Rcuhqac","annotations":[{"segment":[7.82052192,29.97799584],"label":"Drinking beer"}]},"wUsmKmxxHYQ":{"duration":176.728,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wUsmKmxxHYQ","annotations":[{"segment":[52.38400033960543,58.72522143334713],"label":"Swimming"},{"segment":[104.49229541426556,111.10922177295257],"label":"Swimming"},{"segment":[137.3012212979719,144.4695581528861],"label":"Swimming"}]},"Y1j_e1DXW6I":{"duration":39.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y1j_e1DXW6I","annotations":[{"segment":[3.120855960218409,16.160471952418096],"label":"Dodgeball"}]},"Lv6ypQZWIEg":{"duration":118.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lv6ypQZWIEg","annotations":[{"segment":[5.890845553822153,7.731734789391576],"label":"Javelin throw"},{"segment":[53.38578783151326,61.680241809672395],"label":"Javelin throw"},{"segment":[73.27980967238689,83.95877691107644],"label":"Javelin throw"},{"segment":[96.29482527301091,110.47207488299532],"label":"Javelin throw"}]},"qhubAySHZic":{"duration":193.07,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qhubAySHZic","annotations":[]},"W30cufYc_ZI":{"duration":220.729,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W30cufYc_ZI","annotations":[{"segment":[0.01,213.8418702449298],"label":"Scuba diving"}]},"uBhuULfs3Hc":{"duration":144.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uBhuULfs3Hc","annotations":[{"segment":[9.002532182919971,18.23012767041294],"label":"Discus throw"},{"segment":[45.46278752374585,81.02278964627973],"label":"Discus throw"},{"segment":[104.65443573322933,115.68253756318252],"label":"Discus throw"}]},"owNzsQ9mxXE":{"duration":224.189,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=owNzsQ9mxXE","annotations":[]},"0K1SrDmREzs":{"duration":52.62,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=0K1SrDmREzs","annotations":[{"segment":[1.0251170046801872,51.296854914196565],"label":"Discus throw"}]},"0w4OkBenR_k":{"duration":140.83,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0w4OkBenR_k","annotations":[]},"BFICeyu5oMM":{"duration":103.19,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=BFICeyu5oMM","annotations":[{"segment":[20.403281931464175,100.2686355140187],"label":"Making a lemonade"}]},"krFle3KU4Ts":{"duration":65.09,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=krFle3KU4Ts","annotations":[{"segment":[0.01,63.96862045910409],"label":"Playing accordion"}]},"R7iFa9OpoTY":{"duration":103.72,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R7iFa9OpoTY","annotations":[{"segment":[22.239534438377536,26.283086154446174],"label":"Spinning"},{"segment":[39.22245164586583,50.54439645085803],"label":"Spinning"},{"segment":[67.04208745241809,73.83525433541341],"label":"Spinning"}]},"WebWWFKJ4b0":{"duration":140.922,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WebWWFKJ4b0","annotations":[{"segment":[81.12352918408736,88.59832591107644],"label":"Welding"},{"segment":[95.853275675507,106.62577684087361],"label":"Welding"}]},"uhxQoWHBbaw":{"duration":43.375,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=uhxQoWHBbaw","annotations":[]},"4VVIFV6XmKo":{"duration":114.85,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=4VVIFV6XmKo","annotations":[{"segment":[6.629170266130374,111.0833935600624],"label":"Doing kickboxing"}]},"yslzj3NGuLU":{"duration":232.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yslzj3NGuLU","annotations":[{"segment":[13.99388059331897,222.9933959480828],"label":"Removing ice from car"}]},"_asXSQ6Ql24":{"duration":190.61,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_asXSQ6Ql24","annotations":[]},"UArl5DMLpSI":{"duration":224.63,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=UArl5DMLpSI","annotations":[]},"h1Jpycw4mT8":{"duration":51.618,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=h1Jpycw4mT8","annotations":[]},"RhsCYNiAaV8":{"duration":206.518,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RhsCYNiAaV8","annotations":[{"segment":[44.362271028037384,175.85879595015578],"label":"Rafting"}]},"skSPijG9sgU":{"duration":220.033,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=skSPijG9sgU","annotations":[{"segment":[0.2,89.57104680187207],"label":"Curling"}]},"PeZRmM_-_mY":{"duration":28.47,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PeZRmM_-_mY","annotations":[]},"PjFWE2F8tBE":{"duration":91.65,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PjFWE2F8tBE","annotations":[]},"JGurKGNFYpU":{"duration":124.483,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JGurKGNFYpU","annotations":[]},"hokqvyeqhmg":{"duration":102.89,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hokqvyeqhmg","annotations":[{"segment":[26.163258407887337,100.80077472486654],"label":"Rock climbing"}]},"IAtxK0w_ybY":{"duration":177.703,"subset":"validation","resolution":"402x226","url":"https://www.youtube.com/watch?v=IAtxK0w_ybY","annotations":[{"segment":[18.374907587366312,173.08675649707118],"label":"Making a lemonade"}]},"0FuZcOAxfJ8":{"duration":60.023,"subset":"testing","resolution":"240x180","url":"https://www.youtube.com/watch?v=0FuZcOAxfJ8","annotations":[]},"vvk6f13VO5c":{"duration":62.276,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vvk6f13VO5c","annotations":[{"segment":[17.29346837129485,48.18854107956319],"label":"Beach soccer"}]},"dRwQj921TS4":{"duration":200.969,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dRwQj921TS4","annotations":[]},"_nGlzZystmo":{"duration":139.018,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_nGlzZystmo","annotations":[{"segment":[5.421890753181148,8.675025205089836],"label":"Futsal"},{"segment":[10.410030246107803,11.494408396744033],"label":"Futsal"},{"segment":[16.48254788967069,42.5076235049402],"label":"Futsal"},{"segment":[44.676379806212665,51.61639997028453],"label":"Futsal"},{"segment":[54.86953442219322,62.89393273690131],"label":"Futsal"},{"segment":[65.06268903817377,74.82209239389985],"label":"Futsal"},{"segment":[77.20772432529954,86.75025205089837],"label":"Futsal"},{"segment":[88.48525709191632,97.59403355726066],"label":"Futsal"},{"segment":[102.14842178993283,113.64282944149765],"label":"Futsal"},{"segment":[114.07658069890796,138.81727800000002],"label":"Futsal"}]},"ZuHwSBCjfds":{"duration":228.25,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ZuHwSBCjfds","annotations":[{"segment":[19.22873051794072,228.25215419501134],"label":"Playing water polo"}]},"mbB7UFoTwpo":{"duration":60.12,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=mbB7UFoTwpo","annotations":[{"segment":[18.004292760358567,21.46894861238349],"label":"Washing face"},{"segment":[31.11380138964206,37.10671962017165],"label":"Washing face"}]},"tFAcBYp3o0w":{"duration":49.111,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tFAcBYp3o0w","annotations":[]},"AwIts-6Vtms":{"duration":69.06,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=AwIts-6Vtms","annotations":[]},"cRDXBF2RcYI":{"duration":30.65,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cRDXBF2RcYI","annotations":[{"segment":[1.1218408736349454,25.94555382215289],"label":"Shaving legs"}]},"IoJoUIxzdac":{"duration":53.383,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IoJoUIxzdac","annotations":[{"segment":[1.2492045631825273,9.077553159126365],"label":"Playing rubik cube"},{"segment":[10.743159243369735,52.71643256630266],"label":"Playing rubik cube"}]},"6Y8wppTQFPo":{"duration":100.218,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6Y8wppTQFPo","annotations":[{"segment":[17.030527301092043,91.72859438377535],"label":"Assembling bicycle"}]},"MWdPh6J-YXM":{"duration":171.503,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MWdPh6J-YXM","annotations":[{"segment":[9.079429906542057,142.3845440373832],"label":"Canoeing"}]},"jwYgWG2fqOA":{"duration":169.297,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=jwYgWG2fqOA","annotations":[{"segment":[2.7419231437255287,169.061],"label":"Baton twirling"}]},"araMJB23T1s":{"duration":75.81,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=araMJB23T1s","annotations":[]},"YaIuavIbk6A":{"duration":171.317,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YaIuavIbk6A","annotations":[{"segment":[23.78657950127529,26.459228883441057],"label":"Slacklining"},{"segment":[36.61529653567095,40.89153554713617],"label":"Slacklining"},{"segment":[51.04760319936607,53.987517519748415],"label":"Slacklining"},{"segment":[68.95435405987668,72.69606319490876],"label":"Slacklining"},{"segment":[79.11042171210659,83.92119060000496],"label":"Slacklining"},{"segment":[86.59383998217072,94.611788128668],"label":"Slacklining"},{"segment":[105.30238565733106,112.51853898917861],"label":"Slacklining"},{"segment":[118.13110269172672,127.75264017160686],"label":"Slacklining"},{"segment":[133.36520386115444,140.04682730109204],"label":"Slacklining"},{"segment":[153.94460405616223,159.55716774570982],"label":"Slacklining"}]},"e4V0JFm4hhI":{"duration":75.814,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=e4V0JFm4hhI","annotations":[]},"f_EIRRM0EZE":{"duration":91.19,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=f_EIRRM0EZE","annotations":[]},"SXwCedd4MKg":{"duration":173.825,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SXwCedd4MKg","annotations":[{"segment":[0.01,173.36785150342564],"label":"Disc dog"}]},"u2TTS37kXXY":{"duration":214.86,"subset":"testing","resolution":"640x352","url":"https://www.youtube.com/watch?v=u2TTS37kXXY","annotations":[]},"JbW8efAOOt0":{"duration":210.77,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=JbW8efAOOt0","annotations":[]},"gF0ymMmpS_M":{"duration":137.741,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gF0ymMmpS_M","annotations":[{"segment":[36.60057800312012,51.8418744149766],"label":"Elliptical trainer"}]},"vcl8rFSCH4w":{"duration":70.473,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vcl8rFSCH4w","annotations":[{"segment":[6.912434579439252,46.74122429906542],"label":"Snow tubing"}]},"MWjdJIAkMyI":{"duration":36.22,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=MWjdJIAkMyI","annotations":[]},"mU3ujj4Z7lE":{"duration":56.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mU3ujj4Z7lE","annotations":[{"segment":[3.8715195007800314,12.758416536661468],"label":"Shoveling snow"},{"segment":[15.222110764430578,30.356232449297977],"label":"Shoveling snow"},{"segment":[35.107642745709825,41.882801872074886],"label":"Shoveling snow"}]},"woRQ2JxcVHA":{"duration":29.49,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=woRQ2JxcVHA","annotations":[{"segment":[0.01,25.69391099609984],"label":"Playing saxophone"}]},"YAhMxt-3ciU":{"duration":102.424,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=YAhMxt-3ciU","annotations":[{"segment":[13.097101404056161,58.936956318252726],"label":"Croquet"},{"segment":[86.72836661466458,93.75607956318252],"label":"Croquet"}]},"YRGiRmcHdUY":{"duration":35.666,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=YRGiRmcHdUY","annotations":[]},"CvVqbdk6owc":{"duration":70.52,"subset":"training","resolution":"642x480","url":"https://www.youtube.com/watch?v=CvVqbdk6owc","annotations":[{"segment":[4.398190327613104,70.461],"label":"Breakdancing"}]},"lxNV7FI-LQQ":{"duration":146.263,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lxNV7FI-LQQ","annotations":[{"segment":[17.060163052941498,131.70448209315478],"label":"Camel ride"}]},"Qos-Lzt5U6M":{"duration":150.54,"subset":"testing","resolution":"854x476","url":"https://www.youtube.com/watch?v=Qos-Lzt5U6M","annotations":[]},"sJUUm0n4cXA":{"duration":201.06,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=sJUUm0n4cXA","annotations":[{"segment":[6.273370336173992,194.7881486723869],"label":"Playing lacrosse"}]},"yNyv7Ykn5oo":{"duration":211.361,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yNyv7Ykn5oo","annotations":[{"segment":[3.9451483356432258,119.01212110712612],"label":"Building sandcastles"}]},"dkG9YRNJllQ":{"duration":34.018,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=dkG9YRNJllQ","annotations":[{"segment":[0.2,30.620815700468018],"label":"Blowing leaves"}]},"Xg_MSiPhJns":{"duration":93.39,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=Xg_MSiPhJns","annotations":[{"segment":[16.125651299999998,72.99332083619345],"label":"Making a sandwich"}]},"O2Y6rn4gFd4":{"duration":24.66,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=O2Y6rn4gFd4","annotations":[{"segment":[0.19999999999999998,24.10109204368175],"label":"Shaving legs"}]},"iCkDtX_mTII":{"duration":33.995,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iCkDtX_mTII","annotations":[{"segment":[2.7550265210608424,33.941],"label":"Using the rowing machine"}]},"QYU2srH753Q":{"duration":178.93,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=QYU2srH753Q","annotations":[{"segment":[42.83751560062402,163.95466068642745],"label":"Doing motocross"}]},"i1PpX1IOcIs":{"duration":184.02,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=i1PpX1IOcIs","annotations":[{"segment":[6.36334244,119.3448739],"label":"Snatch"},{"segment":[126.8767550702028,178.8330733229329],"label":"Snatch"}]},"W1Cp1UyY8S8":{"duration":186.991,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=W1Cp1UyY8S8","annotations":[{"segment":[16.59578971962617,170.6346199376947],"label":"Painting fence"}]},"lYP05IRcOPE":{"duration":26.077,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lYP05IRcOPE","annotations":[]},"2WwWzzO6rZQ":{"duration":90.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2WwWzzO6rZQ","annotations":[{"segment":[4.2208878559224,87.23168152886115],"label":"Using the balance beam"}]},"OYIAhO9nJmk":{"duration":139.738,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=OYIAhO9nJmk","annotations":[{"segment":[18.31195899264542,130.36370706396255],"label":"Capoeira"}]},"iDz8nKDpumY":{"duration":112.176,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iDz8nKDpumY","annotations":[{"segment":[0.01,112.1756],"label":"Waterskiing"}]},"dPdeb0SgIw8":{"duration":192.122,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dPdeb0SgIw8","annotations":[{"segment":[16.184996508580344,30.871382229329175],"label":"Playing ice hockey"}]},"kgz7c2kb4Rg":{"duration":87.61,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=kgz7c2kb4Rg","annotations":[]},"I6VBFfeBqF0":{"duration":168.48,"subset":"training","resolution":"608x1080","url":"https://www.youtube.com/watch?v=I6VBFfeBqF0","annotations":[{"segment":[0.5256910793438541,19.450569935722598],"label":"Shaving legs"},{"segment":[27.598781665552337,44.15805066488374],"label":"Shaving legs"},{"segment":[48.626424839306495,64.13431167995019],"label":"Shaving legs"},{"segment":[69.65406801306065,94.62439428189371],"label":"Shaving legs"},{"segment":[119.33187364430579,126.9543942074883],"label":"Shaving legs"}]},"sEGceBU8icE":{"duration":110.44,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sEGceBU8icE","annotations":[{"segment":[2.9288289980578814,98.71876418096724],"label":"Painting"}]},"xGLwosapbIw":{"duration":162.215,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xGLwosapbIw","annotations":[]},"5xgBlI9Xx0I":{"duration":65.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5xgBlI9Xx0I","annotations":[{"segment":[6.812720131045241,55.51858494851793],"label":"Using parallel bars"}]},"Z3DhUZhoRDA":{"duration":110.829,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z3DhUZhoRDA","annotations":[{"segment":[0.01,110.82884353741497],"label":"Hanging wallpaper"}]},"7OZHg_OOfxw":{"duration":175.01,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7OZHg_OOfxw","annotations":[{"segment":[13.373024960998439,29.748157566302652],"label":"Cheerleading"},{"segment":[27.83772542901716,29.475238689547584],"label":"Cheerleading"},{"segment":[147.3761934477379,174.941],"label":"Cheerleading"}]},"v1hQ0vf66QY":{"duration":212.045,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=v1hQ0vf66QY","annotations":[{"segment":[16.818453723249913,36.275102087105566],"label":"Sumo"},{"segment":[39.57283909792856,60.0188085650311],"label":"Sumo"},{"segment":[66.28450888559479,77.82658842347526],"label":"Sumo"},{"segment":[83.10296764079204,89.69844166243803],"label":"Sumo"},{"segment":[95.63436828191942,107.17644781979989],"label":"Sumo"},{"segment":[110.80395853170516,120.3673948436359],"label":"Sumo"},{"segment":[125.64377401626422,138.17517455125645],"label":"Sumo"},{"segment":[140.48359043928133,147.40883810335598],"label":"Sumo"},{"segment":[151.3661224828272,168.5143547938692],"label":"Sumo"},{"segment":[175.7693761562331,194.23670326043217],"label":"Sumo"}]},"vFO70WwrQgE":{"duration":183.65,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vFO70WwrQgE","annotations":[]},"hyW_cjYi8P4":{"duration":189.45,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=hyW_cjYi8P4","annotations":[{"segment":[85.71133680721378,189.45160997732427],"label":"Polishing forniture"}]},"HPTb6DO0P2k":{"duration":177.96,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=HPTb6DO0P2k","annotations":[]},"oKauZV0DHHk":{"duration":160.59,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oKauZV0DHHk","annotations":[{"segment":[0.01,102.66054602184087],"label":"Blow-drying hair"}]},"vXOT_VXWOqo":{"duration":207.401,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vXOT_VXWOqo","annotations":[{"segment":[13.91033229329173,195.73401716068642],"label":"Hurling"}]},"uWWo9qZh3cU":{"duration":107.32,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=uWWo9qZh3cU","annotations":[{"segment":[77.0177992823713,88.06817917940718],"label":"Playing saxophone"}]},"4CRacVTadPQ":{"duration":101.866,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4CRacVTadPQ","annotations":[{"segment":[83.21953822152885,101.821],"label":"Mooping floor"}]},"Gr2I0xdGN5A":{"duration":53.52,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gr2I0xdGN5A","annotations":[]},"K9kVsnTQh-g":{"duration":102.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=K9kVsnTQh-g","annotations":[{"segment":[2.867682496099844,50.34375937597503],"label":"Washing dishes"},{"segment":[82.84416099843993,88.89815737909515],"label":"Washing dishes"}]},"NnLjEbG31ZE":{"duration":46.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NnLjEbG31ZE","annotations":[{"segment":[0.01,42.373771781591266],"label":"River tubing"}]},"tIAdhrfT70I":{"duration":153.647,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=tIAdhrfT70I","annotations":[{"segment":[25.40047737909516,29.713765990639626],"label":"Beer pong"},{"segment":[74.28441497659907,86.50539937597505],"label":"Beer pong"},{"segment":[120.54814820592824,123.4240483619345],"label":"Beer pong"},{"segment":[127.01892355694228,128.9361903276131],"label":"Beer pong"},{"segment":[141.63808268330735,145.472616224649],"label":"Beer pong"}]},"uxRBnIWE79s":{"duration":63.786,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uxRBnIWE79s","annotations":[{"segment":[3.3312379095163807,53.16732371294852],"label":"Doing fencing"}]},"7bUu05RIksU":{"duration":199.228,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7bUu05RIksU","annotations":[{"segment":[25.47769422776911,60.276496099843996],"label":"Hand car wash"},{"segment":[66.49056786271451,108.43555226209048],"label":"Hand car wash"}]},"CUJJViubZ9c":{"duration":147.238,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CUJJViubZ9c","annotations":[{"segment":[15.389903240249609,135.75272858190326],"label":"Doing a powerbomb"}]},"w2fsq9BOoZo":{"duration":149.861,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=w2fsq9BOoZo","annotations":[{"segment":[39.27729555070203,139.10708840873633],"label":"Rock-paper-scissors"}]},"O_e0pqEMZMw":{"duration":111.71000000000001,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=O_e0pqEMZMw","annotations":[{"segment":[0,111.581],"label":"Drinking coffee"}]},"QLsdBUBpz_8":{"duration":29.188,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=QLsdBUBpz_8","annotations":[{"segment":[18.922483619344774,26.291335413416537],"label":"Fun sliding down"}]},"6_NquDQUFm0":{"duration":97.32,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6_NquDQUFm0","annotations":[{"segment":[17.610709566302653,46.91128668954758],"label":"Pole vault"}]},"mtSJG4q2vP4":{"duration":111.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mtSJG4q2vP4","annotations":[{"segment":[10.922353465567193,19.24414658218982],"label":"Triple jump"},{"segment":[29.6463879779681,63.97378458403642],"label":"Triple jump"}]},"qq1wRySmDTc":{"duration":97.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qq1wRySmDTc","annotations":[{"segment":[42.82571450858035,87.63675351014041],"label":"Starting a campfire"}]},"L9dtiopWGe4":{"duration":77.137,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=L9dtiopWGe4","annotations":[{"segment":[5.655888199687988,10.108395931357256],"label":"Making a lemonade"},{"segment":[37.184456461778474,51.62502207800313],"label":"Making a lemonade"}]},"TIue1jR51j8":{"duration":238.03,"subset":"training","resolution":"480x272","url":"https://www.youtube.com/watch?v=TIue1jR51j8","annotations":[{"segment":[162.8317786723869,220.3891929064918],"label":"High jump"}]},"rze0k4LklN4":{"duration":142.594,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rze0k4LklN4","annotations":[{"segment":[97.35404407651475,117.97802100185412],"label":"Assembling bicycle"},{"segment":[126.84854915115427,132.39262924446686],"label":"Assembling bicycle"}]},"K0XdsHfUpco":{"duration":148.283,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=K0XdsHfUpco","annotations":[{"segment":[8.09655548126687,26.14030769666161],"label":"Croquet"},{"segment":[37.2441552138276,102.94191969039305],"label":"Croquet"},{"segment":[110.57581457098284,138.5667634477379],"label":"Croquet"}]},"3KmMvfdidvQ":{"duration":96.34,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3KmMvfdidvQ","annotations":[{"segment":[26.36966540670503,34.460358903388304],"label":"Sumo"},{"segment":[41.35243114130369,61.878819122831636],"label":"Sumo"},{"segment":[80.30762065626864,86.30072684600427],"label":"Sumo"}]},"__mIAEE03bE":{"duration":207.331,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=__mIAEE03bE","annotations":[{"segment":[4.197131671858775,185.31950612668743],"label":"Snowboarding"}]},"OeIQ0P0FCCg":{"duration":227.486,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OeIQ0P0FCCg","annotations":[{"segment":[7.452720402496101,223.58161207488303],"label":"BMX"}]},"5DBzvN5Qe_0":{"duration":210.28,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=5DBzvN5Qe_0","annotations":[{"segment":[12.281770404636712,190.5127965497199],"label":"Hula hoop"}]},"M9og58TMAm0":{"duration":142.48,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=M9og58TMAm0","annotations":[{"segment":[18.58042557,37.86642745709828],"label":"Doing step aerobics"},{"segment":[38.494435859999996,134.4757878315133],"label":"Doing step aerobics"}]},"e_y8_3siD0c":{"duration":15.79,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=e_y8_3siD0c","annotations":[{"segment":[0.5911850733229329,14.336238028081123],"label":"Hopscotch"}]},"_yWADgOFxP0":{"duration":238.377,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_yWADgOFxP0","annotations":[{"segment":[31.97710764430577,190.003511700468],"label":"Snowboarding"}]},"cRP9tyF1N4I":{"duration":26.889,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cRP9tyF1N4I","annotations":[{"segment":[0.01,26.841],"label":"Snow tubing"}]},"ysbUc6WS12k":{"duration":59.68,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ysbUc6WS12k","annotations":[{"segment":[10.936848673946958,51.24029953198128],"label":"Starting a campfire"}]},"H8MY7XGrN6Q":{"duration":53.267,"subset":"training","resolution":"240x136","url":"https://www.youtube.com/watch?v=H8MY7XGrN6Q","annotations":[{"segment":[10.79365054602184,24.410255850234005],"label":"Rollerblading"}]},"DgVo1IAVrDM":{"duration":149.165,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=DgVo1IAVrDM","annotations":[{"segment":[0.009999999999999998,149.164987],"label":"Longboarding"}]},"q3-S2tg4ULw":{"duration":170.923,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=q3-S2tg4ULw","annotations":[{"segment":[35.99763125218886,61.86259592968753],"label":"Playing drums"},{"segment":[66.92892921703263,163.98920887675507],"label":"Playing drums"}]},"O-YKLVm0ciI":{"duration":202.39,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=O-YKLVm0ciI","annotations":[{"segment":[156.53190721996882,165.68397437597505],"label":"Triple jump"},{"segment":[3.7870622714508584,137.28100734009362],"label":"Triple jump"},{"segment":[146.74866301872075,152.74484494851794],"label":"Triple jump"}]},"WINtiw-_3vg":{"duration":39.87,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=WINtiw-_3vg","annotations":[]},"QZWyv6SShks":{"duration":159.82,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=QZWyv6SShks","annotations":[{"segment":[7.480013159710062,89.5108241445304],"label":"Playing field hockey"}]},"yxcikJ3Hp8w":{"duration":172.11,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yxcikJ3Hp8w","annotations":[{"segment":[32.35379878732564,163.91696398060003],"label":"Cheerleading"}]},"x0zCL2FQV4E":{"duration":88.422,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=x0zCL2FQV4E","annotations":[]},"Aen-RfnlK3A":{"duration":188.082,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Aen-RfnlK3A","annotations":[]},"-0i3BpAQAgE":{"duration":64.831,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=-0i3BpAQAgE","annotations":[{"segment":[1.6182170007888752,3.9444039394228834],"label":"Rock-paper-scissors"}]},"ogOrhXUgna0":{"duration":189.963,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ogOrhXUgna0","annotations":[{"segment":[26.079088134789394,189.9624488],"label":"Surfing"}]},"dzR4voNDZ7g":{"duration":129.15,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=dzR4voNDZ7g","annotations":[{"segment":[6.0444330421216845,111.62053017784712],"label":"Canoeing"}]},"uDNLf3ty9ao":{"duration":174.482,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uDNLf3ty9ao","annotations":[{"segment":[80.29220508764392,141.72090602222337],"label":"Rock-paper-scissors"}]},"9iJ8snVY2s0":{"duration":125.81,"subset":"validation","resolution":"1920x812","url":"https://www.youtube.com/watch?v=9iJ8snVY2s0","annotations":[{"segment":[17.467564073991532,112.85223980387786],"label":"Wrapping presents"}]},"Q_AmwG-xRK4":{"duration":19.482,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=Q_AmwG-xRK4","annotations":[]},"VtIMPJjcdn4":{"duration":82.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VtIMPJjcdn4","annotations":[{"segment":[21.176707313190487,72.00080370982839],"label":"Cleaning shoes"}]},"8xS6cDOq5do":{"duration":125.74,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8xS6cDOq5do","annotations":[{"segment":[13.73092637991234,111.61281585957317],"label":"Removing curlers"}]},"3S_aifr5rG4":{"duration":148.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3S_aifr5rG4","annotations":[{"segment":[31.862714508580343,71.89659252],"label":"Horseback riding"},{"segment":[82.98934583999997,131.14508580343212],"label":"Horseback riding"}]},"ajSnAVhwWww":{"duration":134.792,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ajSnAVhwWww","annotations":[]},"GTOP7XXi_vI":{"duration":218.059,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GTOP7XXi_vI","annotations":[{"segment":[77.5209917393622,144.42314835222192],"label":"Capoeira"}]},"j5M9l0qxwnU":{"duration":79.506,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j5M9l0qxwnU","annotations":[{"segment":[0.4958564742589704,20.95520904836193],"label":"Doing fencing"},{"segment":[27.77495163806552,34.7186895475819],"label":"Doing fencing"},{"segment":[39.43051170046802,46.49824492979719],"label":"Doing fencing"},{"segment":[52.82200624024961,58.27780031201248],"label":"Doing fencing"},{"segment":[73.15723868954758,76.62910764430576],"label":"Doing fencing"}]},"AK4p_3Pojzs":{"duration":23.96,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=AK4p_3Pojzs","annotations":[]},"6d7oOG3dS-Y":{"duration":60.303,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6d7oOG3dS-Y","annotations":[{"segment":[0.2,60.041],"label":"Installing carpet"}]},"QUJXOFPJ_YI":{"duration":27.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QUJXOFPJ_YI","annotations":[{"segment":[1.6982544,25.890722510140407],"label":"Playing squash"}]},"tPZYyaX63yE":{"duration":31.417,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=tPZYyaX63yE","annotations":[{"segment":[8.920157492013967,28.96600441029641],"label":"Blowing leaves"}]},"ZH5b3e8ivvM":{"duration":66.29,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZH5b3e8ivvM","annotations":[]},"JSbVBXPP28Y":{"duration":174.777,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=JSbVBXPP28Y","annotations":[]},"_oalsZ5Gtdk":{"duration":100.71000000000001,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=_oalsZ5Gtdk","annotations":[]},"AHXcJCpvkAQ":{"duration":81.34,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AHXcJCpvkAQ","annotations":[{"segment":[8.816199687987519,66.66061778471139],"label":"Doing step aerobics"}]},"V9_mEvC24nk":{"duration":152.741,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=V9_mEvC24nk","annotations":[{"segment":[10.246266806552262,133.91632430889234],"label":"Hurling"}]},"0dkIbKXXFzI":{"duration":15.49,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0dkIbKXXFzI","annotations":[{"segment":[0.01,15.415222627145086],"label":"Using the balance beam"}]},"_9v34KLET0g":{"duration":192.98,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_9v34KLET0g","annotations":[{"segment":[34.02005888611544,186.05660523556944],"label":"Mixing drinks"}]},"5gNXNT-s_ww":{"duration":194.862,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=5gNXNT-s_ww","annotations":[]},"BfsCKU69hHs":{"duration":27.21,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BfsCKU69hHs","annotations":[{"segment":[0.69719706,26.151991700468017],"label":"Ironing clothes"}]},"o8wdvkauJQQ":{"duration":45.813,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=o8wdvkauJQQ","annotations":[{"segment":[2.8499292634125006,23.654420107239126],"label":"Using the rowing machine"}]},"_Y4czVoIxvE":{"duration":189.127,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_Y4czVoIxvE","annotations":[{"segment":[74.02385491419658,84.64082215288612],"label":"Installing carpet"},{"segment":[109.41374570982839,146.5731310452418],"label":"Installing carpet"}]},"Xd_xJnlUHm0":{"duration":185.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Xd_xJnlUHm0","annotations":[{"segment":[25.977372727562162,168.85292272915407],"label":"Putting on makeup"}]},"CUU2_Msh7YU":{"duration":33.762,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=CUU2_Msh7YU","annotations":[]},"RNrxxPOyHo4":{"duration":20.25,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=RNrxxPOyHo4","annotations":[{"segment":[0,1.4530402496099846],"label":"Ironing clothes"}]},"EVDFLo3vufo":{"duration":111.67,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EVDFLo3vufo","annotations":[]},"bF55ewgaJLA":{"duration":201.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bF55ewgaJLA","annotations":[{"segment":[86.67251976609676,121.21591532504836],"label":"Playing saxophone"}]},"aPzHheM0Egw":{"duration":64.297,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=aPzHheM0Egw","annotations":[{"segment":[4.814659067995374,60.082931422776916],"label":"Fixing the roof"}]},"NQ51aXtD6Q0":{"duration":96.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NQ51aXtD6Q0","annotations":[{"segment":[23.465158174330572,93.07544010579474],"label":"Doing a powerbomb"}]},"ivivR6ZdgRM":{"duration":49.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ivivR6ZdgRM","annotations":[{"segment":[0.2,49.513332999999996],"label":"Playing harmonica"}]},"QJ5xv6oj1sk":{"duration":75.14,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QJ5xv6oj1sk","annotations":[]},"HgOHqD0lWTE":{"duration":137.56,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HgOHqD0lWTE","annotations":[{"segment":[83.4772213572543,100.85936770670828],"label":"Starting a campfire"}]},"yqFdxxWgvh8":{"duration":92.14,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=yqFdxxWgvh8","annotations":[{"segment":[16.44143779797192,79.1917287823713],"label":"Mixing drinks"}]},"uc1RbyLfuSs":{"duration":7.76,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=uc1RbyLfuSs","annotations":[{"segment":[0.04839602496099844,7.694967968798752],"label":"Windsurfing"}]},"3VkqckKyshc":{"duration":66.711,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3VkqckKyshc","annotations":[{"segment":[0.01,66.681],"label":"Kite flying"}]},"V2UitlmX7Uo":{"duration":78.53,"subset":"testing","resolution":"270x360","url":"https://www.youtube.com/watch?v=V2UitlmX7Uo","annotations":[]},"WTfeKnRJ17g":{"duration":91.719,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=WTfeKnRJ17g","annotations":[{"segment":[29.332852225653653,86.99694532293292],"label":"Rope skipping"}]},"3SL_Au0H0cw":{"duration":120.09,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=3SL_Au0H0cw","annotations":[{"segment":[7.205085803432136,117.43354134165367],"label":"Tai chi"}]},"hoYF0DhYVOI":{"duration":72.03,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hoYF0DhYVOI","annotations":[{"segment":[4.270008305772231,67.42118377535103],"label":"Mixing drinks"}]},"rA3fOmbAfi0":{"duration":194.259,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rA3fOmbAfi0","annotations":[]},"uxsZFuXKs3A":{"duration":12.934,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uxsZFuXKs3A","annotations":[{"segment":[0.2,12.733514739229026],"label":"Trimming branches or hedges"}]},"Al5u0b_QmiQ":{"duration":230.71,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Al5u0b_QmiQ","annotations":[]},"t2zLJ5mKTT8":{"duration":85.311,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=t2zLJ5mKTT8","annotations":[{"segment":[0.01,85.309794],"label":"Wakeboarding"}]},"ERYwCKVUTSQ":{"duration":60.26,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=ERYwCKVUTSQ","annotations":[]},"-ux1hQSR1AM":{"duration":53.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-ux1hQSR1AM","annotations":[{"segment":[13.625121684867397,50.256163806552266],"label":"Using the pommel horse"}]},"XlEmG7nM0jw":{"duration":66.22,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=XlEmG7nM0jw","annotations":[{"segment":[35.15579314,54.46102443],"label":"Mixing drinks"},{"segment":[0.7876966200000001,34.54586583463338],"label":"Mixing drinks"}]},"GyOLWizKXaE":{"duration":123.25,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GyOLWizKXaE","annotations":[{"segment":[0.2,4.034751950078004],"label":"Mowing the lawn"},{"segment":[16.139007800312015,18.636711388455538],"label":"Mowing the lawn"},{"segment":[43.99800936037442,51.87538221528861],"label":"Mowing the lawn"},{"segment":[69.3593073322933,76.85241809672388],"label":"Mowing the lawn"},{"segment":[97.79470202808113,105.67207488299533],"label":"Mowing the lawn"}]},"uqd8A4iJ6Bo":{"duration":114.08,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uqd8A4iJ6Bo","annotations":[{"segment":[14.059736730109202,93.4349592823713],"label":"Baking cookies"}]},"9B-CgqaLGeI":{"duration":138.86,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9B-CgqaLGeI","annotations":[]},"zKgFb1QojnQ":{"duration":29.629,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zKgFb1QojnQ","annotations":[]},"aUbSDHBZNEQ":{"duration":35.85,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=aUbSDHBZNEQ","annotations":[{"segment":[0.8926302652106085,31.018901716068644],"label":"Walking the dog"}]},"x-WGuz3_2Fc":{"duration":87.122,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x-WGuz3_2Fc","annotations":[{"segment":[2.6326366544182322,86.49703482264253],"label":"Peeling potatoes"}]},"Mngqx9_VIEA":{"duration":99.68,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mngqx9_VIEA","annotations":[]},"cdufbM2OCwM":{"duration":100.1,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=cdufbM2OCwM","annotations":[{"segment":[21.550653634308638,93.85465682215289],"label":"Playing saxophone"}]},"4zYY4abpCgI":{"duration":120.373,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4zYY4abpCgI","annotations":[{"segment":[40.54498595943838,79.60153510140405],"label":"Painting furniture"},{"segment":[100.44061622464898,114.89655538221528],"label":"Painting furniture"}]},"lXTcGMN9I8g":{"duration":29.745,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lXTcGMN9I8g","annotations":[{"segment":[0,22.691401137285492],"label":"Slacklining"}]},"J8pZtBhpqMI":{"duration":81.549,"subset":"validation","resolution":"482x360","url":"https://www.youtube.com/watch?v=J8pZtBhpqMI","annotations":[{"segment":[0.2,78.20765990639626],"label":"Using the monkey bar"}]},"2DiyTT1vZG4":{"duration":127.061,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=2DiyTT1vZG4","annotations":[]},"X9Z9uqrb9EY":{"duration":70.52,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=X9Z9uqrb9EY","annotations":[{"segment":[0,70.519023],"label":"Playing congas"}]},"FpxVS1Xpl1U":{"duration":237.05,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=FpxVS1Xpl1U","annotations":[{"segment":[17.51577228,178.31288492000002],"label":"Plataform diving"}]},"ZdaS-WZHUZY":{"duration":154.622,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZdaS-WZHUZY","annotations":[{"segment":[0.01,154.581],"label":"Hurling"}]},"Vq7Mm98I5Qs":{"duration":229.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Vq7Mm98I5Qs","annotations":[{"segment":[5.363034480562896,46.837167796915956],"label":"Doing motocross"},{"segment":[65.78655629490486,100.82504823458244],"label":"Doing motocross"},{"segment":[103.32779765884511,149.0923585596485],"label":"Doing motocross"},{"segment":[154.81292867224892,226.32005395787834],"label":"Doing motocross"}]},"fhpTXBJgFbo":{"duration":38.06,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fhpTXBJgFbo","annotations":[]},"X7f-hwiYZ1Q":{"duration":167.77,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X7f-hwiYZ1Q","annotations":[{"segment":[104.66209048361935,145.21865054602185],"label":"Brushing teeth"}]},"G0g7vEwI0z8":{"duration":106.046,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=G0g7vEwI0z8","annotations":[{"segment":[25.82816909933833,86.43518934855952],"label":"Making an omelette"}]},"__c8enCfzqw":{"duration":172.803,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=__c8enCfzqw","annotations":[{"segment":[14.825202808112325,153.37346177847115],"label":"Blow-drying hair"}]},"nibek2g971I":{"duration":168.624,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nibek2g971I","annotations":[{"segment":[26.36375803307506,160.28324401438346],"label":"Slacklining"}]},"EQMDnhIKU4w":{"duration":113.22,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=EQMDnhIKU4w","annotations":[{"segment":[46.45387996723869,93.08439065678627],"label":"Throwing darts"}]},"NwlIZ1T93xE":{"duration":84.01,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=NwlIZ1T93xE","annotations":[]},"1dM62Xpm9Ns":{"duration":187.153,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=1dM62Xpm9Ns","annotations":[{"segment":[73.57846625353982,143.2506268251909],"label":"Sharpening knives"}]},"1opIvQF0m_M":{"duration":116.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1opIvQF0m_M","annotations":[{"segment":[58.0245,112.89240795631827],"label":"Kayaking"}]},"ybhc3-s7aYo":{"duration":19.62,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ybhc3-s7aYo","annotations":[{"segment":[0.01,19.62086],"label":"Chopping wood"}]},"kPM3RAn0Mk4":{"duration":232.29,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kPM3RAn0Mk4","annotations":[{"segment":[31.89035810684128,196.41561421840873],"label":"Doing nails"}]},"RM5ywQYztnM":{"duration":5.016,"subset":"testing","resolution":"406x720","url":"https://www.youtube.com/watch?v=RM5ywQYztnM","annotations":[]},"xtfTn-bdIPU":{"duration":57.098,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xtfTn-bdIPU","annotations":[]},"g5CYoFJFkPk":{"duration":105.675,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=g5CYoFJFkPk","annotations":[{"segment":[0.01,105.621],"label":"Baton twirling"}]},"dgbGv3G4k5M":{"duration":119.769,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dgbGv3G4k5M","annotations":[{"segment":[10.276550599438943,115.09736663962558],"label":"Decorating the Christmas tree"}]},"dwCeFVAaP9c":{"duration":144.266,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dwCeFVAaP9c","annotations":[{"segment":[4.2725101404056165,138.06953822152886],"label":"Installing carpet"}]},"lztbD1NRU4M":{"duration":23.011,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=lztbD1NRU4M","annotations":[{"segment":[7.41486271450858,15.438675507020278],"label":"Doing a powerbomb"}]},"2HUGDgn1Dp0":{"duration":156.271,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=2HUGDgn1Dp0","annotations":[]},"MubE2kOK6z0":{"duration":93.69,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MubE2kOK6z0","annotations":[{"segment":[13.466146528577928,17.4064869117555],"label":"Ballet"},{"segment":[29.227508061288212,54.91268981829756],"label":"Ballet"},{"segment":[82.34913396783026,85.41384315474616],"label":"Ballet"}]},"vrWcBuRPDBw":{"duration":132.77,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vrWcBuRPDBw","annotations":[{"segment":[28.637376989999996,51.60808769999999],"label":"Horseback riding"},{"segment":[59.30482257,97.82168967],"label":"Horseback riding"}]},"RK8VF2pL72I":{"duration":34.157,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=RK8VF2pL72I","annotations":[{"segment":[0.20000000000000004,33.11118847352025],"label":"Knitting"}]},"lKi-hl_KGJ4":{"duration":207.17000000000002,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=lKi-hl_KGJ4","annotations":[{"segment":[44.12264064,195.6508093026521],"label":"Zumba"},{"segment":[2.99979264,19.93992],"label":"Zumba"}]},"6I1aP4O04R8":{"duration":235.427,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6I1aP4O04R8","annotations":[{"segment":[13.773037452110328,100.8186341494476],"label":"Blowing leaves"},{"segment":[129.09927105111416,139.3831390153565],"label":"Blowing leaves"}]},"ivjnwAR91Sk":{"duration":119.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ivjnwAR91Sk","annotations":[{"segment":[8.589027665106604,54.521653874154964],"label":"High jump"}]},"TADJh_DZOfE":{"duration":64.13,"subset":"training","resolution":"448x338","url":"https://www.youtube.com/watch?v=TADJh_DZOfE","annotations":[{"segment":[21.911449839313573,24.913018310452415],"label":"High jump"},{"segment":[39.22049468954758,43.12253370202807],"label":"High jump"},{"segment":[46.324206737909506,49.42582749141965],"label":"High jump"}]},"ZoKode_Gexg":{"duration":47.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZoKode_Gexg","annotations":[{"segment":[4.582597471918877,29.470200002340093],"label":"Washing dishes"}]},"MERZe5qdfWc":{"duration":128.082,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MERZe5qdfWc","annotations":[{"segment":[1.7971216848673948,11.980811232449298],"label":"Snow tubing"},{"segment":[16.773135725429018,21.565460218408738],"label":"Snow tubing"},{"segment":[55.5485066801872,73.13220663650546],"label":"Snow tubing"},{"segment":[77.52813162558502,94.91201680967238],"label":"Snow tubing"},{"segment":[97.70942362090483,115.69275312168486],"label":"Snow tubing"},{"segment":[117.89071561622464,124.88423264430577],"label":"Snow tubing"}]},"K-kJ-0nkKZk":{"duration":130.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K-kJ-0nkKZk","annotations":[{"segment":[63.14505467293522,94.41302386789349],"label":"Starting a campfire"}]},"htKhaMfxxrM":{"duration":188.245,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=htKhaMfxxrM","annotations":[{"segment":[124.81054793212137,175.90945324180967],"label":"Arm wrestling"}]},"Va3NsrY1DJ8":{"duration":85.612,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Va3NsrY1DJ8","annotations":[{"segment":[6.410880823712948,74.7936096099844],"label":"Slacklining"}]},"Ej_a4iCd45I":{"duration":174.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ej_a4iCd45I","annotations":[{"segment":[5.72933073322933,12.82278783151326],"label":"Playing beach volleyball"},{"segment":[22.91732293291732,32.19338221528861],"label":"Playing beach volleyball"},{"segment":[36.285761310452415,44.19769422776911],"label":"Playing beach volleyball"},{"segment":[47.19877223088923,54.83787987519501],"label":"Playing beach volleyball"},{"segment":[58.384608424336975,153.87345397815912],"label":"Playing beach volleyball"}]},"ouqYXdtEuCI":{"duration":100.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ouqYXdtEuCI","annotations":[{"segment":[29.422860016380657,36.91515328315133],"label":"Springboard diving"},{"segment":[50.182755943057714,67.5086841224649],"label":"Springboard diving"},{"segment":[74.5327090600624,99.0387516201248],"label":"Springboard diving"}]},"GXEuuWhEMCY":{"duration":181.84,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=GXEuuWhEMCY","annotations":[{"segment":[98.40959680343212,150.025004925117],"label":"Snatch"},{"segment":[9.7572661,95.2124427],"label":"Snatch"}]},"6cha8sVi65k":{"duration":161.008,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6cha8sVi65k","annotations":[{"segment":[44.69743837753511,129.08616848673947],"label":"Playing ice hockey"}]},"I3WRp3QB8eg":{"duration":123.93,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=I3WRp3QB8eg","annotations":[{"segment":[44.64606552262091,114.80416848673947],"label":"Getting a haircut"}]},"WlYhEPyLcBc":{"duration":90.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WlYhEPyLcBc","annotations":[{"segment":[4.139802652106084,37.39855616224649],"label":"Playing badminton"},{"segment":[50.87045631825273,75.00761076443058],"label":"Playing badminton"}]},"TscC5kgurqY":{"duration":170.133,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TscC5kgurqY","annotations":[{"segment":[59.156229329173165,101.33488611544462],"label":"Making an omelette"}]},"2ZJLCKVgPRA":{"duration":36.69,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=2ZJLCKVgPRA","annotations":[]},"DACWnbWrd18":{"duration":74.142,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=DACWnbWrd18","annotations":[{"segment":[0,10.294195117464563],"label":"Cutting the grass"},{"segment":[25.33065989578359,73.79434166302651],"label":"Cutting the grass"}]},"ID44l9VqqGQ":{"duration":110.85,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ID44l9VqqGQ","annotations":[{"segment":[0,110.85201814058956],"label":"Playing accordion"}]},"ULwRERVQD6A":{"duration":41.52,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=ULwRERVQD6A","annotations":[]},"FXQxGgdXTXE":{"duration":27.888,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FXQxGgdXTXE","annotations":[]},"P17kaxPrbdc":{"duration":136.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=P17kaxPrbdc","annotations":[{"segment":[80.65085803432137,136.76507936507937],"label":"Doing karate"}]},"rOtmhurweqo":{"duration":84.2,"subset":"testing","resolution":"634x480","url":"https://www.youtube.com/watch?v=rOtmhurweqo","annotations":[]},"EuIBTFqVOt8":{"duration":230.435,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EuIBTFqVOt8","annotations":[{"segment":[14.716241584899793,202.79698769435083],"label":"Arm wrestling"}]},"clSku91LoQQ":{"duration":126.27,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=clSku91LoQQ","annotations":[{"segment":[0,20.7231768934908],"label":"Playing accordion"},{"segment":[61.82980213959672,80.5146317969176],"label":"Playing accordion"},{"segment":[98.60941420190201,121.03120979068706],"label":"Playing accordion"}]},"zfU85oBVpfA":{"duration":44.931,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zfU85oBVpfA","annotations":[{"segment":[6.448650599085188,36.30870573790951],"label":"Kneeling"}]},"fHa6VNVy7m8":{"duration":32.462,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=fHa6VNVy7m8","annotations":[]},"VvfY0GWxd7Y":{"duration":124.51,"subset":"testing","resolution":"320x178","url":"https://www.youtube.com/watch?v=VvfY0GWxd7Y","annotations":[]},"6_uxd3nj2d0":{"duration":189.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6_uxd3nj2d0","annotations":[{"segment":[4.739619344773791,45.9150624024961],"label":"Putting on makeup"}]},"1aCwFDS0j2A":{"duration":75.744,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=1aCwFDS0j2A","annotations":[{"segment":[0.010000000000000002,75.681],"label":"Kneeling"}]},"224E-VtB4k4":{"duration":225.49,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=224E-VtB4k4","annotations":[{"segment":[32.363472889936006,103.77417937533828],"label":"Playing violin"},{"segment":[108.69905568467637,207.54835820592822],"label":"Playing violin"}]},"_Af_9cK5x4E":{"duration":232.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_Af_9cK5x4E","annotations":[{"segment":[21.199840332388806,202.75744728156474],"label":"Doing motocross"}]},"5Jo_AWt9fB0":{"duration":65.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5Jo_AWt9fB0","annotations":[{"segment":[3.887910434865506,64.13027024794962],"label":"Kneeling"}]},"ru7UAr2488M":{"duration":203.756,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ru7UAr2488M","annotations":[{"segment":[21.581510868106328,31.737515982509304],"label":"Playing ice hockey"},{"segment":[100.6079256645545,109.17705497983201],"label":"Playing ice hockey"},{"segment":[136.78869388461513,159.32233023219672],"label":"Playing ice hockey"},{"segment":[175.19108822345137,177.41271294236762],"label":"Playing ice hockey"}]},"vMdSEzQkRTg":{"duration":8.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vMdSEzQkRTg","annotations":[{"segment":[6.21483260374415,8.080543],"label":"Starting a campfire"}]},"3am00ii0n-o":{"duration":213.09,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=3am00ii0n-o","annotations":[{"segment":[88.42717529653567,100.06232994081668],"label":"Playing flauta"}]},"03KTIy44yTo":{"duration":96.433,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=03KTIy44yTo","annotations":[{"segment":[0,96.401],"label":"Blowing leaves"}]},"n0Az4cLruf4":{"duration":148.515,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n0Az4cLruf4","annotations":[{"segment":[84.9152784410696,96.03651545381543],"label":"Using the rowing machine"}]},"6C-hZvDrIK0":{"duration":105.44,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=6C-hZvDrIK0","annotations":[]},"HQVSLlKhupk":{"duration":79.495,"subset":"testing","resolution":"656x480","url":"https://www.youtube.com/watch?v=HQVSLlKhupk","annotations":[]},"4FdWr8WykqE":{"duration":135.977,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4FdWr8WykqE","annotations":[]},"CAh5HrKU9jE":{"duration":132.447,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CAh5HrKU9jE","annotations":[]},"XKvzx2cD9KY":{"duration":146.426,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=XKvzx2cD9KY","annotations":[{"segment":[90.17777797063616,137.77160611311245],"label":"Sumo"}]},"StGxFfLiYNU":{"duration":69.985,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=StGxFfLiYNU","annotations":[]},"1Y1pKGFm-pQ":{"duration":32.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1Y1pKGFm-pQ","annotations":[{"segment":[0.25592523020648716,5.42561463026521],"label":"Putting on makeup"},{"segment":[13.717591706708268,20.187377699999995],"label":"Putting on makeup"}]},"j_Xr8nLEjLQ":{"duration":141.991,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j_Xr8nLEjLQ","annotations":[{"segment":[0.19999999999999998,19.265003120124806],"label":"Slacklining"},{"segment":[25.908107644305773,120.70007020280812],"label":"Slacklining"}]},"1uYv7kTOfCE":{"duration":110.226,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=1uYv7kTOfCE","annotations":[]},"NNqghz7Fd0M":{"duration":132.703,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NNqghz7Fd0M","annotations":[{"segment":[0.01,132.701881],"label":"Playing drums"}]},"gyCXP8w8GRA":{"duration":122.21,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=gyCXP8w8GRA","annotations":[{"segment":[42.7598794,115.6803744149766],"label":"Doing karate"},{"segment":[16.97074722,36.18397454],"label":"Doing karate"}]},"yNHc-xADBm8":{"duration":83.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yNHc-xADBm8","annotations":[{"segment":[2.657004680187207,19.765522620904836],"label":"Starting a campfire"}]},"cJiwi-wB1FY":{"duration":79.529,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cJiwi-wB1FY","annotations":[{"segment":[49.2259567715111,59.244303754276835],"label":"Throwing darts"}]},"nuaTROuaZPY":{"duration":165.93,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nuaTROuaZPY","annotations":[{"segment":[92.85890639625585,139.67634945397813],"label":"Starting a campfire"}]},"wqZzLAPmr9k":{"duration":184.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wqZzLAPmr9k","annotations":[{"segment":[20.727093508230123,165.87406448],"label":"Preparing pasta"}]},"E8H-67ILaqc":{"duration":193.59,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=E8H-67ILaqc","annotations":[]},"_jT57LdAHUs":{"duration":208.167,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_jT57LdAHUs","annotations":[{"segment":[47.7939564062876,60.43957567731564],"label":"Playing drums"},{"segment":[85.73081421937171,101.29465332217545],"label":"Playing drums"},{"segment":[92.86424047482343,102.59163991407577],"label":"Playing drums"},{"segment":[154.14685694211315,173.60165582061782],"label":"Playing drums"}]},"uIcMgjypbqk":{"duration":177.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uIcMgjypbqk","annotations":[]},"bPZRYmr7p1k":{"duration":143.73,"subset":"validation","resolution":"576x320","url":"https://www.youtube.com/watch?v=bPZRYmr7p1k","annotations":[{"segment":[37.51527543525741,40.8450336099844],"label":"Archery"},{"segment":[78.13832516692668,78.3603090452418],"label":"Archery"},{"segment":[55.71795345709828,58.60374387519501],"label":"Archery"},{"segment":[63.04342144149766,67.92706676443058],"label":"Archery"}]},"ULPw4uZcx5I":{"duration":81.131,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ULPw4uZcx5I","annotations":[{"segment":[3.5876456756235284,78.49869396222789],"label":"Snow tubing"}]},"gGSs4U8CTeM":{"duration":114.00999999999999,"subset":"testing","resolution":"854x468","url":"https://www.youtube.com/watch?v=gGSs4U8CTeM","annotations":[]},"iWj81FBROQQ":{"duration":222.22,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iWj81FBROQQ","annotations":[{"segment":[11.957577985179407,202.58563282137283],"label":"Using the balance beam"}]},"-mX18jJkPDk":{"duration":189.62,"subset":"validation","resolution":"208x160","url":"https://www.youtube.com/watch?v=-mX18jJkPDk","annotations":[{"segment":[14.490622464898596,109.71471294851796],"label":"Brushing hair"},{"segment":[127.16260530421216,187.2149344773791],"label":"Brushing hair"}]},"UNPQdg_tWh4":{"duration":132.099,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UNPQdg_tWh4","annotations":[{"segment":[0.01,132.041],"label":"Croquet"}]},"ghxiG0Dnrt0":{"duration":62.79,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ghxiG0Dnrt0","annotations":[]},"_JE5T2RKZvo":{"duration":118.213,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_JE5T2RKZvo","annotations":[{"segment":[98.4799201123245,109.36066034945398],"label":"Painting furniture"}]},"OFn-c_XvIhU":{"duration":89.722,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OFn-c_XvIhU","annotations":[{"segment":[31.744940717628708,45.30995943837754],"label":"Scuba diving"}]},"p9yq0kAqGU4":{"duration":141.433,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=p9yq0kAqGU4","annotations":[{"segment":[0.01,141.381],"label":"Tug of war"}]},"jCxQQ9DMpT0":{"duration":188.99,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jCxQQ9DMpT0","annotations":[]},"Rw4swFpJOCc":{"duration":136.952,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Rw4swFpJOCc","annotations":[]},"Kb8SPjD121k":{"duration":58.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Kb8SPjD121k","annotations":[{"segment":[6.339246630265211,23.998576528861154],"label":"Arm wrestling"},{"segment":[33.77912847269891,56.60041634165367],"label":"Arm wrestling"}]},"rmMCA60HqZ0":{"duration":128.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rmMCA60HqZ0","annotations":[{"segment":[66.52482371294853,67.52670358814352],"label":"Washing face"}]},"KMK3_VeODlk":{"duration":217.43,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KMK3_VeODlk","annotations":[]},"utgRhyBvtRU":{"duration":32.183,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=utgRhyBvtRU","annotations":[]},"rEJb170mQuA":{"duration":161.077,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rEJb170mQuA","annotations":[]},"UIH5arBo3-Y":{"duration":122.0,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UIH5arBo3-Y","annotations":[{"segment":[15.184168799999997,108.83259282371294],"label":"Playing guitarra"}]},"aEol4D2AncU":{"duration":85.543,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aEol4D2AncU","annotations":[]},"_kdhjXHUfes":{"duration":113.99000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_kdhjXHUfes","annotations":[{"segment":[10.669587273286849,99.2271616415677],"label":"Tango"}]},"gHk4mjlJ5cg":{"duration":210.883,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gHk4mjlJ5cg","annotations":[{"segment":[55.2705920808261,89.15672841965679],"label":"Rope skipping"},{"segment":[103.30336798439937,203.97480302652104],"label":"Rope skipping"}]},"mBsIWxXJPY0":{"duration":209.375,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mBsIWxXJPY0","annotations":[{"segment":[2.2793423369542216,108.10618097231922],"label":"Snowboarding"},{"segment":[125.36409619782465,177.78908395832232],"label":"Snowboarding"}]},"WQmJrfjOF7o":{"duration":187.084,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WQmJrfjOF7o","annotations":[{"segment":[30.131388092313923,135.0375733259588],"label":"Laying tile"},{"segment":[144.071161498856,177.58285955960363],"label":"Laying tile"}]},"oXPgInpuHDA":{"duration":82.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oXPgInpuHDA","annotations":[{"segment":[0,82.54696145124717],"label":"Cheerleading"}]},"uCvjzcVCtEs":{"duration":19.41,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uCvjzcVCtEs","annotations":[]},"Ds-LrRrLj9c":{"duration":95.46000000000001,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Ds-LrRrLj9c","annotations":[{"segment":[11.535530421216848,78.8379363],"label":"Brushing teeth"}]},"zlOErVVHKEk":{"duration":185.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zlOErVVHKEk","annotations":[{"segment":[2.8939815551805745,58.747825570165666],"label":"Cricket"},{"segment":[67.14037208018934,176.8222730215331],"label":"Cricket"}]},"l9LHfuoU8js":{"duration":107.09,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=l9LHfuoU8js","annotations":[{"segment":[3.5084079934625954,97.23302123868956],"label":"Fixing the roof"}]},"JLDZdxTf5TA":{"duration":125.69,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JLDZdxTf5TA","annotations":[{"segment":[37.42659090320195,43.88374510880943],"label":"Playing bagpipes"}]},"nd5l829R8mw":{"duration":13.073,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nd5l829R8mw","annotations":[{"segment":[0.2,11.193638065522622],"label":"Drum corps"}]},"cfhc7tf8vU4":{"duration":151.74,"subset":"training","resolution":"634x360","url":"https://www.youtube.com/watch?v=cfhc7tf8vU4","annotations":[{"segment":[2.6069275600000004,133.82764820592823],"label":"Triple jump"}]},"LbXhdPZakpo":{"duration":50.67,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=LbXhdPZakpo","annotations":[{"segment":[0.01,49.915040733229326],"label":"Playing field hockey"}]},"0zjA3KPnLK8":{"duration":180.234,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=0zjA3KPnLK8","annotations":[{"segment":[0.01,180.2332879818594],"label":"Sumo"}]},"IlKOWIBAEFE":{"duration":226.627,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IlKOWIBAEFE","annotations":[{"segment":[73.99348367029549,177.23201088646968],"label":"Scuba diving"}]},"k1aFJ-F8xTs":{"duration":27.05,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=k1aFJ-F8xTs","annotations":[{"segment":[2.5494001560062403,17.930078783151327],"label":"Shaving"},{"segment":[21.427602964118567,24.714432917316692],"label":"Shaving"}]},"EAp9xQx2o6k":{"duration":68.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EAp9xQx2o6k","annotations":[{"segment":[0.13492700640954705,68.061],"label":"Doing step aerobics"}]},"maHLwXvNN3w":{"duration":43.886,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=maHLwXvNN3w","annotations":[{"segment":[0.01,43.680320742277694],"label":"Playing congas"}]},"Gq8-XVrlAt4":{"duration":173.29,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Gq8-XVrlAt4","annotations":[{"segment":[8.379188767550701,92.17107644305771],"label":"Tennis serve with ball bouncing"},{"segment":[117.30864274570982,169.74614664586582],"label":"Tennis serve with ball bouncing"}]},"ShozMaLUS20":{"duration":14.072,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ShozMaLUS20","annotations":[{"segment":[0.01,14.071292517006803],"label":"Raking leaves"}]},"Tc8L-74Ilck":{"duration":233.34,"subset":"training","resolution":"624x352","url":"https://www.youtube.com/watch?v=Tc8L-74Ilck","annotations":[{"segment":[9.464531256080175,169.63352064898595],"label":"Rock climbing"}]},"5JVHUcOW0GE":{"duration":129.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5JVHUcOW0GE","annotations":[{"segment":[7.6507758215090504,26.57637916945249],"label":"Making a sandwich"}]},"63lmoqLxB8c":{"duration":237.03,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=63lmoqLxB8c","annotations":[{"segment":[18.8190135,203.69364274570984],"label":"Volleyball"}]},"aGlfi9PqRdY":{"duration":122.532,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aGlfi9PqRdY","annotations":[{"segment":[0.01,122.531699],"label":"Peeling potatoes"}]},"GbykXyc8LA8":{"duration":59.86,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GbykXyc8LA8","annotations":[{"segment":[12.42032451326053,38.661762018720744],"label":"Getting a piercing"}]},"eA5ANAdLvFE":{"duration":121.836,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=eA5ANAdLvFE","annotations":[{"segment":[0.01,114.3200596278904],"label":"Making an omelette"}]},"tBC1HnUZzco":{"duration":31.533,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=tBC1HnUZzco","annotations":[]},"axzmwzPQ134":{"duration":98.5,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=axzmwzPQ134","annotations":[{"segment":[0.20000000000000004,11.057335413416537],"label":"Croquet"},{"segment":[34.554173166926674,40.859010920436816],"label":"Croquet"},{"segment":[77.26346801872074,82.48604836193448],"label":"Croquet"}]},"xuq9oRm8QZo":{"duration":92.834,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xuq9oRm8QZo","annotations":[{"segment":[25.69881802390474,31.906959988829644],"label":"Painting fence"},{"segment":[41.724486059836835,48.07700330400479],"label":"Painting fence"},{"segment":[69.15581052328939,80.27271570058333],"label":"Painting fence"}]},"sFrtQ8j8_jY":{"duration":20.922,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sFrtQ8j8_jY","annotations":[]},"jhuRtkqUSSI":{"duration":38.75,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=jhuRtkqUSSI","annotations":[{"segment":[1.2982735085803432,37.106468419656785],"label":"Plataform diving"}]},"-_Ud3LXSjdE":{"duration":66.6,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=-_Ud3LXSjdE","annotations":[{"segment":[0.5194604368174728,65.97147547581903],"label":"Smoking a cigarette"}]},"h49mHiWjXBA":{"duration":218.732,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=h49mHiWjXBA","annotations":[{"segment":[18.796388473954746,28.350983194703577],"label":"Curling"},{"segment":[18.11391742247269,29.37468977192667],"label":"Curling"},{"segment":[47.11893711046021,70.66418838659126],"label":"Curling"},{"segment":[98.64550149735568,119.4608685675585],"label":"Curling"},{"segment":[129.35669881404837,197.26256843651328],"label":"Curling"}]},"l866oFamRY4":{"duration":18.11,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=l866oFamRY4","annotations":[{"segment":[1.4127585023400937,15.964171076443058],"label":"Walking the dog"}]},"weB3srg6o4c":{"duration":128.13,"subset":"training","resolution":"624x480","url":"https://www.youtube.com/watch?v=weB3srg6o4c","annotations":[{"segment":[48.65441497659907,110.39656786271452],"label":"Brushing teeth"}]},"GZsplhHXQy0":{"duration":11.03,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=GZsplhHXQy0","annotations":[{"segment":[0.22270358814352575,10.929606864274572],"label":"Hammer throw"}]},"XCwPsQwzfiA":{"duration":176.263,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XCwPsQwzfiA","annotations":[{"segment":[27.223096564678915,160.58877007176287],"label":"Making a cake"}]},"JFA1iR09BUA":{"duration":232.339,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JFA1iR09BUA","annotations":[{"segment":[19.573009010156326,217.11541362714507],"label":"Capoeira"}]},"Cw5NDI8NnZU":{"duration":67.013,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cw5NDI8NnZU","annotations":[{"segment":[0.2,21.849723388455537],"label":"Surfing"},{"segment":[33.87229845865835,64.60827298595945],"label":"Surfing"}]},"jmL68eFmLjk":{"duration":126.781,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jmL68eFmLjk","annotations":[]},"lE3Hs4bsPhg":{"duration":236.45,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=lE3Hs4bsPhg","annotations":[{"segment":[33.81915744,236.44836734693877],"label":"Tango"}]},"V1yT7gh-59E":{"duration":130.682,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=V1yT7gh-59E","annotations":[]},"CEQqdt0vV0o":{"duration":219.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CEQqdt0vV0o","annotations":[{"segment":[64.01454107537302,190.83169697879978],"label":"Breakdancing"}]},"dAjhBWalWRM":{"duration":32.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dAjhBWalWRM","annotations":[{"segment":[1.101372031201248,31.43916525429017],"label":"Playing blackjack"}]},"Pd9qzQ2MOE8":{"duration":123.856,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Pd9qzQ2MOE8","annotations":[{"segment":[9.270589703588143,104.50415132605305],"label":"Plastering"}]},"YHqFNnhaFAY":{"duration":63.925,"subset":"testing","resolution":"360x240","url":"https://www.youtube.com/watch?v=YHqFNnhaFAY","annotations":[]},"Qp_zbvHLwLk":{"duration":70.4,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Qp_zbvHLwLk","annotations":[{"segment":[0.01,70.341],"label":"Mowing the lawn"}]},"tokwDYW6rS0":{"duration":128.76,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tokwDYW6rS0","annotations":[{"segment":[54.21945397815912,111.25028705148206],"label":"Painting"}]},"oQDz6oh5vPI":{"duration":68.73,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=oQDz6oh5vPI","annotations":[]},"cyJAifECfH0":{"duration":17.6,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=cyJAifECfH0","annotations":[{"segment":[2.5278042121684865,16.08230031201248],"label":"Clean and jerk"}]},"vJyJG6Lvy9Q":{"duration":179.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vJyJG6Lvy9Q","annotations":[{"segment":[17.667599040000002,95.02312792511701],"label":"Tumbling"},{"segment":[96.30351528,158.37187987519502],"label":"Tumbling"}]},"tzsgg_o6xHo":{"duration":98.2,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=tzsgg_o6xHo","annotations":[{"segment":[69.33043658190327,95.96067050078004],"label":"Tango"},{"segment":[5.03161428,68.08194404000001],"label":"Tango"}]},"GiHxeCLGNy8":{"duration":37.06,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=GiHxeCLGNy8","annotations":[{"segment":[0.2,36.859047619047615],"label":"Starting a campfire"}]},"e-VWzL3rU_c":{"duration":171.41,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e-VWzL3rU_c","annotations":[]},"0bzSBV3jHIY":{"duration":152.184,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0bzSBV3jHIY","annotations":[{"segment":[0.01,152.183445],"label":"Playing pool"}]},"SPhcOskjsDs":{"duration":170.95,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=SPhcOskjsDs","annotations":[]},"1MBVaveQDd8":{"duration":131.45,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1MBVaveQDd8","annotations":[{"segment":[0.01,97.18183151326052],"label":"Cumbia"}]},"ym_OhvcJ--w":{"duration":76.533,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ym_OhvcJ--w","annotations":[{"segment":[20.297247427312058,28.893728690644227],"label":"Skiing"},{"segment":[33.43076046851398,46.44487711232449],"label":"Skiing"}]},"Lomlff9wClo":{"duration":55.43,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=Lomlff9wClo","annotations":[{"segment":[5.630636989878718,54.54189399922451],"label":"Chopping wood"}]},"YrdpvaBDDlE":{"duration":77.69,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YrdpvaBDDlE","annotations":[{"segment":[0,77.69399],"label":"Preparing salad"}]},"TjLoGNBzNRA":{"duration":110.319,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TjLoGNBzNRA","annotations":[{"segment":[30.612945397815913,96.99832137285492],"label":"Hanging wallpaper"}]},"XjV0D7nJx0Q":{"duration":101.03,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=XjV0D7nJx0Q","annotations":[{"segment":[17.7310437,72.42323778393312],"label":"Fixing bicycle"}]},"3ImTO0bzXPA":{"duration":190.31,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3ImTO0bzXPA","annotations":[{"segment":[56.57916300000001,164.73658317472697],"label":"Getting a piercing"}]},"KgGQ6dHKl8U":{"duration":24.242,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KgGQ6dHKl8U","annotations":[{"segment":[0.2,24.001],"label":"Calf roping"}]},"FSfxYKauc9w":{"duration":117.981,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FSfxYKauc9w","annotations":[{"segment":[1.4724566560893728,117.780589569161],"label":"Playing ice hockey"}]},"1krxpAYQVj0":{"duration":88.445,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1krxpAYQVj0","annotations":[]},"pwOTvCDsALk":{"duration":107.63,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pwOTvCDsALk","annotations":[]},"Ju64Ld8S7kA":{"duration":20.202,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ju64Ld8S7kA","annotations":[]},"W9H2qVnIWXs":{"duration":35.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=W9H2qVnIWXs","annotations":[]},"0fvL6IHKYF0":{"duration":166.488,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0fvL6IHKYF0","annotations":[{"segment":[86.4766193447738,142.309872074883],"label":"Elliptical trainer"}]},"lC12SCZ_-1o":{"duration":102.77000000000001,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=lC12SCZ_-1o","annotations":[{"segment":[6.252858196567863,21.484179444617784],"label":"Shoveling snow"},{"segment":[35.11220371918877,92.34990567238691],"label":"Shoveling snow"}]},"KePjkCySBCs":{"duration":40.751,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=KePjkCySBCs","annotations":[{"segment":[1.7165006115444619,40.24240322620905],"label":"Rollerblading"}]},"SaG9e90z1j8":{"duration":99.99000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SaG9e90z1j8","annotations":[{"segment":[0.01,90.89797659906397],"label":"Polishing forniture"}]},"0Bd8Js6Pf6E":{"duration":68.151,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0Bd8Js6Pf6E","annotations":[{"segment":[5.42227592199688,67.19369377847113],"label":"Playing congas"}]},"A_SU2jjfMJk":{"duration":62.97,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=A_SU2jjfMJk","annotations":[{"segment":[12.13391496,55.573987360000004],"label":"Cleaning shoes"}]},"t_PZmjzVSoE":{"duration":125.04,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=t_PZmjzVSoE","annotations":[]},"6DXH6kwMe-Q":{"duration":180.68,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=6DXH6kwMe-Q","annotations":[{"segment":[16.770874873090165,164.18545569033648],"label":"Painting"}]},"U7OZcgwLRcc":{"duration":199.599,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=U7OZcgwLRcc","annotations":[{"segment":[7.161868572702092,175.9328582706708],"label":"Playing drums"}]},"HlYwtqJALns":{"duration":14.884,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=HlYwtqJALns","annotations":[{"segment":[0.7894531076443059,13.420702829953198],"label":"Baton twirling"}]},"8C1EFngZC3Q":{"duration":57.24,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8C1EFngZC3Q","annotations":[{"segment":[4.330738889419522,26.029080129191563],"label":"Doing step aerobics"}]},"9MN6GNhvwzA":{"duration":12.515,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=9MN6GNhvwzA","annotations":[]},"gMV5Mso7758":{"duration":177.146,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gMV5Mso7758","annotations":[{"segment":[81.22885179407176,84.82060374414976],"label":"Rock-paper-scissors"},{"segment":[89.79379875195006,94.21441653666146],"label":"Rock-paper-scissors"},{"segment":[136.4865741029641,139.54150546021842],"label":"Rock-paper-scissors"},{"segment":[145.89686115444619,150.87061778471138],"label":"Rock-paper-scissors"}]},"_n0cR3Oshxg":{"duration":218.454,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_n0cR3Oshxg","annotations":[{"segment":[65.7491542999529,84.11933187004637],"label":"Baking cookies"},{"segment":[98.06705928437658,102.48950944013981],"label":"Baking cookies"},{"segment":[113.3755405927878,160.33623133197676],"label":"Baking cookies"}]},"nXVNITiaaTQ":{"duration":116.49000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nXVNITiaaTQ","annotations":[{"segment":[33.439798315132606,116.49408163265306],"label":"Doing motocross"}]},"EF74-5YIhAk":{"duration":44.676,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=EF74-5YIhAk","annotations":[{"segment":[10.45441287051482,37.91467067706708],"label":"Canoeing"}]},"LaWlIUKH3PU":{"duration":176.728,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=LaWlIUKH3PU","annotations":[{"segment":[10.198435257410297,24.531371294851795],"label":"Baton twirling"},{"segment":[29.49277223088924,165.67441653666148],"label":"Baton twirling"}]},"spJaetMCD20":{"duration":114.61,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=spJaetMCD20","annotations":[{"segment":[12.337511753531366,82.78649191137713],"label":"Springboard diving"}]},"-8awLlFLcQc":{"duration":161.12,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-8awLlFLcQc","annotations":[{"segment":[95.26632926053043,161.12326530612245],"label":"Vacuuming floor"}]},"NHznDFD3V3k":{"duration":61.21,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NHznDFD3V3k","annotations":[{"segment":[12.213117004680187,21.754614664586583],"label":"Vacuuming floor"}]},"B42CY1Z6eV8":{"duration":127.826,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=B42CY1Z6eV8","annotations":[{"segment":[0.01,127.761],"label":"Putting on shoes"}]},"4sVEFT3jdlU":{"duration":43.98,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=4sVEFT3jdlU","annotations":[]},"B5hzlU0OepQ":{"duration":154.6,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B5hzlU0OepQ","annotations":[{"segment":[49.68360936886456,116.97354541341653],"label":"Preparing salad"}]},"Rj_SwlpOhNk":{"duration":109.552,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=Rj_SwlpOhNk","annotations":[{"segment":[64.20768587669362,102.77262977355048],"label":"Canoeing"}]},"s84BStnbfK8":{"duration":169.09,"subset":"training","resolution":"480x720","url":"https://www.youtube.com/watch?v=s84BStnbfK8","annotations":[{"segment":[133.98622966999997,142.53188665],"label":"Playing squash"},{"segment":[71.70641496,115.92266545999999],"label":"Playing squash"}]},"AjaR1IOAwcw":{"duration":65.852,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AjaR1IOAwcw","annotations":[]},"bdC2YPVHGZM":{"duration":141.039,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bdC2YPVHGZM","annotations":[{"segment":[50.58600624024961,115.48443837753511],"label":"Hurling"}]},"5n8wY8hwy3Y":{"duration":175.334,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5n8wY8hwy3Y","annotations":[{"segment":[139.06701237110872,162.79020930890033],"label":"Changing car wheel"}]},"vlqrUu4gi0Q":{"duration":220.056,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vlqrUu4gi0Q","annotations":[{"segment":[26.87282295292533,77.25936500900009],"label":"Slacklining"},{"segment":[79.31595856230928,200.65497710962316],"label":"Slacklining"}]},"ImXRDdhcCgs":{"duration":213.82999999999998,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ImXRDdhcCgs","annotations":[{"segment":[13.60826848,210.4498205928237],"label":"Chopping wood"}]},"MtFX7uTHwFQ":{"duration":234.894,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MtFX7uTHwFQ","annotations":[{"segment":[0.01,186.86991950078004],"label":"Playing ten pins"}]},"DbZ0ns9s2X0":{"duration":181.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DbZ0ns9s2X0","annotations":[{"segment":[18.634549141965678,162.36439157566303],"label":"Painting"}]},"WaXfGbfUYJg":{"duration":223.77,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=WaXfGbfUYJg","annotations":[{"segment":[58.64810043830325,80.99023393860925],"label":"Doing nails"},{"segment":[100.88869658731927,141.3838135566239],"label":"Doing nails"},{"segment":[164.0750428928722,166.51871374446816],"label":"Doing nails"},{"segment":[184.3226013775245,186.0680805572359],"label":"Doing nails"},{"segment":[196.89004985335413,201.77739151638065],"label":"Doing nails"}]},"CIPSw35iF3k":{"duration":205.45,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CIPSw35iF3k","annotations":[{"segment":[114.5838460137045,138.62241510748865],"label":"Preparing salad"}]},"T84P-XmTJUE":{"duration":149.63,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=T84P-XmTJUE","annotations":[]},"pYb8jzB5vs0":{"duration":194.653,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pYb8jzB5vs0","annotations":[{"segment":[24.900992426091598,78.65069558972836],"label":"Scuba diving"},{"segment":[83.81309645855221,99.90764034370899],"label":"Scuba diving"},{"segment":[102.9443467371348,126.63065660585606],"label":"Scuba diving"},{"segment":[138.4738115402167,152.746331589318],"label":"Scuba diving"},{"segment":[167.9298619875195,188.5794652698908],"label":"Scuba diving"}]},"xGpaEqbVgBI":{"duration":80.39,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=xGpaEqbVgBI","annotations":[{"segment":[19.52090592,55.441201920000005],"label":"Playing field hockey"},{"segment":[63.04884384,66.26261728],"label":"Playing field hockey"}]},"JpEhNDnRZZY":{"duration":20.02,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JpEhNDnRZZY","annotations":[]},"MCnvxOLnbsg":{"duration":53.613,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=MCnvxOLnbsg","annotations":[{"segment":[2.4555412489394906,52.485103554235444],"label":"Hand car wash"}]},"ekgfptN7tQk":{"duration":159.591,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ekgfptN7tQk","annotations":[{"segment":[10.207832568174727,19.170807506084245],"label":"Surfing"},{"segment":[23.901266501092046,26.390981761622466],"label":"Surfing"},{"segment":[29.627611600312015,32.36629838689548],"label":"Surfing"},{"segment":[38.34161501216849,59.25522320062403],"label":"Surfing"},{"segment":[62.242881513260535,64.23465372168486],"label":"Surfing"},{"segment":[67.72025508642746,72.20174255538222],"label":"Surfing"},{"segment":[74.94042934196568,80.91574596723869],"label":"Surfing"},{"segment":[89.62974937909516,92.61740769173167],"label":"Surfing"},{"segment":[98.34375279095164,106.06187009859595],"label":"Surfing"},{"segment":[110.54335756755071,112.03718672386896],"label":"Surfing"},{"segment":[116.51867419282372,128.71827896942278],"label":"Surfing"},{"segment":[131.7059372820593,135.68948169890797],"label":"Surfing"}]},"xYeqvN8cihg":{"duration":36.061,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xYeqvN8cihg","annotations":[{"segment":[11.417654353316406,24.72974057761547],"label":"Playing rubik cube"}]},"p9hJmlWGvFI":{"duration":50.759,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=p9hJmlWGvFI","annotations":[{"segment":[0,45.61166976599064],"label":"Beach soccer"}]},"F51cKkjt6tk":{"duration":31.42,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=F51cKkjt6tk","annotations":[{"segment":[3.945454069422776,22.91304071372855],"label":"Putting on makeup"}]},"ksyr3tHc0Vs":{"duration":38.69,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ksyr3tHc0Vs","annotations":[{"segment":[11.821460218408737,27.38236193447738],"label":"Shot put"}]},"Z9pWpZK4k2M":{"duration":228.438,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z9pWpZK4k2M","annotations":[{"segment":[0.01,211.81130692705227],"label":"Powerbocking"}]},"GVmGXTEm46s":{"duration":77.462,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GVmGXTEm46s","annotations":[{"segment":[0.2,76.01388350467289],"label":"Cleaning sink"}]},"UF2v4emX7rc":{"duration":69.24,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=UF2v4emX7rc","annotations":[{"segment":[18.03946870202808,69.241314],"label":"Paintball"}]},"yUvSujxnFE0":{"duration":79.53,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=yUvSujxnFE0","annotations":[]},"nQUfEEVHMTs":{"duration":19.67,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nQUfEEVHMTs","annotations":[]},"Tix8XrlBZxg":{"duration":200.249,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tix8XrlBZxg","annotations":[{"segment":[37.47538221528861,149.91650546021842],"label":"Removing ice from car"}]},"cByxAZfw_hM":{"duration":51.06,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=cByxAZfw_hM","annotations":[{"segment":[10.992782960298005,38.30832621],"label":"Long jump"}]},"FRag7CCTP4k":{"duration":62.35,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=FRag7CCTP4k","annotations":[{"segment":[21.00052418096724,54.3109423],"label":"High jump"}]},"bh6VHVHMoo8":{"duration":26.587,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=bh6VHVHMoo8","annotations":[{"segment":[4.109307539717059,22.630129969623603],"label":"Kneeling"}]},"M0AAcy8nYOU":{"duration":84.85,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=M0AAcy8nYOU","annotations":[]},"C_FKTQZgANw":{"duration":208.19,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=C_FKTQZgANw","annotations":[]},"SnFWB9NLFjY":{"duration":216.782,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SnFWB9NLFjY","annotations":[{"segment":[45.676889516107565,204.0418571272434],"label":"Skiing"}]},"cnMvMvVBXiI":{"duration":144.893,"subset":"testing","resolution":"1200x720","url":"https://www.youtube.com/watch?v=cnMvMvVBXiI","annotations":[]},"DNlnKpM-QSw":{"duration":128.012,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DNlnKpM-QSw","annotations":[]},"09G6PvM5q9Y":{"duration":203.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=09G6PvM5q9Y","annotations":[{"segment":[88.79335153,188.75151295085806],"label":"Cleaning shoes"},{"segment":[46.73066895,67.88394284],"label":"Cleaning shoes"}]},"hVTYRjSDyC4":{"duration":136.07,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hVTYRjSDyC4","annotations":[]},"hzU9--vcDMY":{"duration":124.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hzU9--vcDMY","annotations":[{"segment":[11.952203588143528,108.5415561622465],"label":"Paintball"}]},"7Xl0K-revJc":{"duration":136.44,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=7Xl0K-revJc","annotations":[{"segment":[0,136.44045351473923],"label":"Playing saxophone"}]},"0-kEI13BtcY":{"duration":158.244,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0-kEI13BtcY","annotations":[{"segment":[57.52082595475819,62.45823590795632],"label":"Spread mulch"},{"segment":[105.90744349609984,109.61050096099845],"label":"Spread mulch"},{"segment":[115.7822634024961,117.01661589079563],"label":"Spread mulch"},{"segment":[139.2349606801872,149.85039207956316],"label":"Spread mulch"}]},"14X-i7vqXWU":{"duration":211.14,"subset":"training","resolution":"612x360","url":"https://www.youtube.com/watch?v=14X-i7vqXWU","annotations":[{"segment":[84.60762008,201.05641263650546],"label":"Cheerleading"}]},"hPJw9_nPo_s":{"duration":204.289,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=hPJw9_nPo_s","annotations":[{"segment":[6.491422364049953,196.46097439676024],"label":"Waterskiing"}]},"n5RVQbgz1Rs":{"duration":214.414,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=n5RVQbgz1Rs","annotations":[{"segment":[5.852289391575663,57.686852574102964],"label":"Hurling"}]},"-EIsT868Trw":{"duration":157.9,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-EIsT868Trw","annotations":[]},"lQq_JPRFEMw":{"duration":171.8,"subset":"training","resolution":"346x240","url":"https://www.youtube.com/watch?v=lQq_JPRFEMw","annotations":[{"segment":[13.93345808424337,147.10516323556942],"label":"Playing field hockey"}]},"E4yf1mz4ij0":{"duration":89.49,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E4yf1mz4ij0","annotations":[{"segment":[17.15495007800312,39.74927457098284],"label":"Shot put"},{"segment":[44.07287987519501,57.32263806552262],"label":"Shot put"},{"segment":[58.71734945397815,71.13028081123244],"label":"Shot put"},{"segment":[73.64076131045242,85.7747503900156],"label":"Shot put"}]},"ztogHoQpE4A":{"duration":52.942,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ztogHoQpE4A","annotations":[{"segment":[0.01,52.901],"label":"Welding"}]},"CBoitanoH4s":{"duration":226.49,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=CBoitanoH4s","annotations":[{"segment":[69.6069036121989,226.487437],"label":"Starting a campfire"}]},"V1IHwwpyFUE":{"duration":134.19,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=V1IHwwpyFUE","annotations":[{"segment":[21.980892950003714,99.19713712],"label":"Playing kickball"}]},"nypn_gYS8Ik":{"duration":61.12,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=nypn_gYS8Ik","annotations":[{"segment":[1.00043618,59.84222776911077],"label":"Brushing hair"}]},"nfIM66dU_J0":{"duration":94.22999999999999,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=nfIM66dU_J0","annotations":[{"segment":[37.474656786271446,86.7060686427457],"label":"Doing kickboxing"}]},"Keuj_3QyLq0":{"duration":8.987,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=Keuj_3QyLq0","annotations":[{"segment":[3.6168786552262095,6.140282368174728],"label":"Doing a powerbomb"}]},"9wH6BIpe9X4":{"duration":92.3,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9wH6BIpe9X4","annotations":[{"segment":[11.087440507144095,75.74017800984149],"label":"Horseback riding"}]},"jelxK3R-heg":{"duration":18.019,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=jelxK3R-heg","annotations":[{"segment":[1.2367989516380655,17.5119487925117],"label":"Blowing leaves"}]},"wzhUvh_-eok":{"duration":153.647,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wzhUvh_-eok","annotations":[]},"0yy1Df51_QE":{"duration":111.456,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=0yy1Df51_QE","annotations":[]},"IjwOh2YmT9U":{"duration":176.26,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IjwOh2YmT9U","annotations":[{"segment":[6.09161472,61.433443605304205],"label":"Skateboarding"},{"segment":[62.252213160000004,163.13534129641184],"label":"Skateboarding"}]},"m4NBHYqtBP8":{"duration":146.635,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m4NBHYqtBP8","annotations":[]},"aAY_M6M26TI":{"duration":72.73,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=aAY_M6M26TI","annotations":[{"segment":[2.097580593603744,50.10029084000001],"label":"Ping-pong"}]},"FPbPkjLAEzs":{"duration":78.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FPbPkjLAEzs","annotations":[{"segment":[0.49206448049922,78.48428463962559],"label":"Windsurfing"},{"segment":[34.88844418,43.102038279999995],"label":"Windsurfing"}]},"XxM72xEvsjE":{"duration":83.92,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XxM72xEvsjE","annotations":[{"segment":[2.667066,63.18430320000001],"label":"Mowing the lawn"}]},"XilAaJ_r4tA":{"duration":105.187,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=XilAaJ_r4tA","annotations":[]},"XQ0_P0LpCFM":{"duration":165.233,"subset":"training","resolution":"488x272","url":"https://www.youtube.com/watch?v=XQ0_P0LpCFM","annotations":[{"segment":[0.2,18.559735107771658],"label":"Hurling"},{"segment":[40.728307597610026,56.710301718191175],"label":"Hurling"},{"segment":[60.31913910025789,76.30113322083905],"label":"Hurling"},{"segment":[93.3142237362964,165.0331972789116],"label":"Hurling"}]},"V50vtrHboIE":{"duration":219.569,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V50vtrHboIE","annotations":[{"segment":[119.70190959257064,147.70191270299054],"label":"Hula hoop"}]},"LLFhSU-XuTI":{"duration":59.98,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=LLFhSU-XuTI","annotations":[{"segment":[0.01,5.801221290795632],"label":"Spinning"},{"segment":[15.62587025101404,33.21667029407177],"label":"Spinning"}]},"onBAyGhqubg":{"duration":77.37,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=onBAyGhqubg","annotations":[{"segment":[0,62.16065024180968],"label":"Springboard diving"}]},"s-kFpQRPcyE":{"duration":163.56,"subset":"training","resolution":"1920x800","url":"https://www.youtube.com/watch?v=s-kFpQRPcyE","annotations":[{"segment":[1.48513388,57.06806851],"label":"Getting a haircut"},{"segment":[64.4921023,147.05996329875194],"label":"Getting a haircut"}]},"0pcrpO0Gd8M":{"duration":167.95,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0pcrpO0Gd8M","annotations":[{"segment":[86.9881082916786,167.7499319727891],"label":"Putting on makeup"}]},"674t-jyyKeg":{"duration":17.532,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=674t-jyyKeg","annotations":[{"segment":[0.7636006240249611,12.572138845553823],"label":"Having an ice cream"}]},"UaV-pj2Lu2Y":{"duration":13.329,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UaV-pj2Lu2Y","annotations":[]},"E5YBb4rzV30":{"duration":78.81,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E5YBb4rzV30","annotations":[]},"gHCTTjqJ36c":{"duration":91.045,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=gHCTTjqJ36c","annotations":[]},"aEAmMKa8iuM":{"duration":40.612,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aEAmMKa8iuM","annotations":[{"segment":[0.01,40.611678],"label":"BMX"}]},"U_ZW0tTvf0k":{"duration":85.8,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=U_ZW0tTvf0k","annotations":[{"segment":[20.31844554,68.56638849000001],"label":"Getting a piercing"},{"segment":[4.76087553,13.009399109999999],"label":"Getting a piercing"}]},"im-nQ-KEzaQ":{"duration":129.38,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=im-nQ-KEzaQ","annotations":[{"segment":[55.85292846,62.186965889999996],"label":"Cleaning windows"},{"segment":[5.18192353,39.45795931],"label":"Cleaning windows"}]},"0-igBOtXYeE":{"duration":154.344,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0-igBOtXYeE","annotations":[{"segment":[0.01,131.20896255850235],"label":"Doing a powerbomb"}]},"sJK-NjgOB6g":{"duration":97.55,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=sJK-NjgOB6g","annotations":[{"segment":[20.834258970358814,71.93141965678628],"label":"Brushing teeth"}]},"GasbPKADdTo":{"duration":235.96,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=GasbPKADdTo","annotations":[]},"rq-Xyoo5Rs0":{"duration":130.64,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=rq-Xyoo5Rs0","annotations":[{"segment":[0.01,130.581],"label":"Tumbling"}]},"7vQCa7Vg1cQ":{"duration":26.332,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=7vQCa7Vg1cQ","annotations":[]},"TrRsujPlt5w":{"duration":63.414,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TrRsujPlt5w","annotations":[{"segment":[0.01,63.413696],"label":"Elliptical trainer"}]},"ktYgm8bRK-g":{"duration":151.418,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ktYgm8bRK-g","annotations":[{"segment":[19.205157515993093,118.34459037571033],"label":"Hula hoop"}]},"0_BvBpwDMpU":{"duration":57.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0_BvBpwDMpU","annotations":[{"segment":[0,57.167528344671204],"label":"Using the pommel horse"}]},"aEFEaypwmFM":{"duration":75.21000000000001,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aEFEaypwmFM","annotations":[]},"ZzarNWOJnro":{"duration":79.25,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=ZzarNWOJnro","annotations":[{"segment":[0.01,43.02480029953198],"label":"Chopping wood"}]},"E1-eVfnNfXI":{"duration":49.691,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E1-eVfnNfXI","annotations":[{"segment":[0.01,49.28531694041457],"label":"Having an ice cream"}]},"abWGVKYPffM":{"duration":22.036,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=abWGVKYPffM","annotations":[{"segment":[0.2,7.494212867394696],"label":"Cutting the grass"},{"segment":[12.547649984399376,21.107553672386896],"label":"Cutting the grass"}]},"x2DTmwZa8yE":{"duration":16.069,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=x2DTmwZa8yE","annotations":[{"segment":[0.01,16.068208616780044],"label":"Peeling potatoes"}]},"ol4ScGsyhbI":{"duration":222.308,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ol4ScGsyhbI","annotations":[{"segment":[17.340705601013156,37.45592409818842],"label":"Arm wrestling"},{"segment":[40.57725110637079,50.28804624293815],"label":"Arm wrestling"},{"segment":[48.9007897948571,57.22432848334341],"label":"Arm wrestling"},{"segment":[64.85423894778921,90.17166912526841],"label":"Arm wrestling"},{"segment":[97.8015795897142,111.32732995850446],"label":"Arm wrestling"},{"segment":[116.52954163880841,128.66803555951762],"label":"Arm wrestling"},{"segment":[134.21706135184183,180.34333825053682],"label":"Arm wrestling"},{"segment":[182.4242229226584,201.84581155694227],"label":"Arm wrestling"},{"segment":[208.08846552262088,212.2502348330733],"label":"Arm wrestling"}]},"otWTm1_aAqI":{"duration":40.59,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=otWTm1_aAqI","annotations":[{"segment":[3.2293450624024955,36.599244040561615],"label":"Using the pommel horse"}]},"tNFK-TZ7jv8":{"duration":84.707,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tNFK-TZ7jv8","annotations":[]},"BD4i-g3U5Kw":{"duration":181.604,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BD4i-g3U5Kw","annotations":[{"segment":[0.01,181.603174],"label":"Playing ice hockey"}]},"eQ0sW4hJ9zU":{"duration":179.49,"subset":"testing","resolution":"852x480","url":"https://www.youtube.com/watch?v=eQ0sW4hJ9zU","annotations":[]},"00Dk03Jr70M":{"duration":152.347,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=00Dk03Jr70M","annotations":[{"segment":[13.091624758230125,152.3461224489796],"label":"Plastering"}]},"aoY0XhAXm7M":{"duration":138.252,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=aoY0XhAXm7M","annotations":[{"segment":[0.2,117.73582839313572],"label":"Making a cake"}]},"f0kwKCAseXU":{"duration":23.99,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=f0kwKCAseXU","annotations":[]},"r1dvt537JQU":{"duration":178.66,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=r1dvt537JQU","annotations":[{"segment":[20.206613108061738,100.75435363536991],"label":"Using parallel bars"}]},"34KalqGygZ0":{"duration":104.97999999999999,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=34KalqGygZ0","annotations":[{"segment":[33.245476897035886,37.99483073946958],"label":"Horseback riding"},{"segment":[41.597788826833074,43.235497048361935],"label":"Horseback riding"},{"segment":[46.019601024961,54.04437131045242],"label":"Horseback riding"},{"segment":[57.31978775351014,99.24511822464899],"label":"Horseback riding"}]},"iYH0Ol3xIZU":{"duration":141.25,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iYH0Ol3xIZU","annotations":[{"segment":[9.911302652106084,133.91271138845556],"label":"Tai chi"}]},"ssktVpcv9WI":{"duration":76.86,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ssktVpcv9WI","annotations":[{"segment":[6.528805223868955,21.503129131825272],"label":"Washing hands"}]},"KiZQNharaCE":{"duration":13.909,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=KiZQNharaCE","annotations":[]},"nSuPseBeQI0":{"duration":30.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nSuPseBeQI0","annotations":[{"segment":[0.01,1.2206233385335414],"label":"Skateboarding"},{"segment":[25.914772418096724,28.30907204368175],"label":"Skateboarding"}]},"shLUZZS9oYc":{"duration":52.408,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=shLUZZS9oYc","annotations":[{"segment":[0,52.361],"label":"Trimming branches or hedges"}]},"lmqLciDIUgk":{"duration":89.65,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=lmqLciDIUgk","annotations":[]},"yu1XjQUctiM":{"duration":94.32,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yu1XjQUctiM","annotations":[{"segment":[24.616315565522623,88.83918065288611],"label":"Brushing teeth"}]},"ul9kg5QdvJo":{"duration":72.981,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ul9kg5QdvJo","annotations":[{"segment":[5.797053042121685,55.924511700468024],"label":"Having an ice cream"}]},"pwKZRo19Vf0":{"duration":129.196,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pwKZRo19Vf0","annotations":[{"segment":[18.333589703588142,120.27640717628704],"label":"Making an omelette"}]},"9fh_ray_B80":{"duration":56.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9fh_ray_B80","annotations":[{"segment":[2.278308623501403,56.16907],"label":"Rock climbing"}]},"op58Lalekrk":{"duration":205.59,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=op58Lalekrk","annotations":[{"segment":[3.5279625585023404,71.84214664586584],"label":"Playing flauta"},{"segment":[83.06748205928238,99.42439937597504],"label":"Playing flauta"},{"segment":[102.63163806552262,172.22871762870517],"label":"Playing flauta"},{"segment":[188.26491107644307,200.13169422776912],"label":"Playing flauta"}]},"zy7rd78yBnY":{"duration":209.63,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zy7rd78yBnY","annotations":[{"segment":[8.173985959438378,209.581],"label":"Painting"}]},"Qm-XTJ-uG5s":{"duration":70.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Qm-XTJ-uG5s","annotations":[{"segment":[0.6565125263545153,70.35643633039807],"label":"Playing racquetball"}]},"1RQ27XZKU1E":{"duration":227.161,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1RQ27XZKU1E","annotations":[{"segment":[71.12044249475491,148.96371288702397],"label":"Braiding hair"}]},"E5FiPYZARLE":{"duration":189.684,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=E5FiPYZARLE","annotations":[{"segment":[0,2.6632672164029416],"label":"Playing beach volleyball"},{"segment":[17.755114776019614,20.418381992422553],"label":"Playing beach volleyball"},{"segment":[33.73471807443726,39.06125250724315],"label":"Playing beach volleyball"},{"segment":[65.98984325087288,74.57148205928236],"label":"Playing beach volleyball"},{"segment":[100.02047990491047,102.38782854171309],"label":"Playing beach volleyball"},{"segment":[126.35723348933956,129.61233786494316],"label":"Playing beach volleyball"},{"segment":[154.469498124805,160.387869700468],"label":"Playing beach volleyball"}]},"eS0v1PJ-6Y8":{"duration":52.106,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eS0v1PJ-6Y8","annotations":[]},"gNFrILoB8LY":{"duration":16.463,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=gNFrILoB8LY","annotations":[{"segment":[0.01,9.541422776911075],"label":"Mooping floor"},{"segment":[13.388770670826833,16.441],"label":"Mooping floor"}]},"3abD7z6vRPM":{"duration":78.03999999999999,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3abD7z6vRPM","annotations":[{"segment":[5.478786322932917,18.50612269079563],"label":"Snatch"},{"segment":[58.92739067332293,73.17223511294853],"label":"Snatch"}]},"Kwhp0DNop9I":{"duration":195.466,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Kwhp0DNop9I","annotations":[{"segment":[14.941958745016466,71.965351800312],"label":"Scuba diving"},{"segment":[84.46780698595944,111.91222080811234],"label":"Scuba diving"},{"segment":[117.4011035725429,140.271448424337],"label":"Scuba diving"}]},"qHU7T2LBToI":{"duration":235.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qHU7T2LBToI","annotations":[{"segment":[21.328889235569424,235.00545865834633],"label":"Cheerleading"}]},"iXaW8uyzBNs":{"duration":191.844,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=iXaW8uyzBNs","annotations":[{"segment":[85.24790171606864,133.1063728549142],"label":"Painting fence"},{"segment":[160.02676287051483,168.10287987519501],"label":"Painting fence"}]},"qCe2NUqGiPg":{"duration":53.94,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qCe2NUqGiPg","annotations":[]},"c975-fpWTS4":{"duration":18.576,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=c975-fpWTS4","annotations":[]},"m731tx8N5-Y":{"duration":99.568,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=m731tx8N5-Y","annotations":[]},"DDwBAbeGejE":{"duration":143.43,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=DDwBAbeGejE","annotations":[{"segment":[23.606597684315535,27.186745200420262],"label":"Raking leaves"},{"segment":[47.548834198265894,53.59033313169261],"label":"Raking leaves"},{"segment":[56.05168454901461,58.73679518609316],"label":"Raking leaves"},{"segment":[70.81979305294661,76.63753276661679],"label":"Raking leaves"}]},"15yU0R2MBQM":{"duration":58.26,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=15yU0R2MBQM","annotations":[]},"HPKHDze6XfA":{"duration":57.331,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HPKHDze6XfA","annotations":[]},"2n6MvpNewpI":{"duration":77.091,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=2n6MvpNewpI","annotations":[{"segment":[0.2,75.52923192122125],"label":"Mooping floor"}]},"hvVu0YlsiIA":{"duration":166.72,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hvVu0YlsiIA","annotations":[]},"MNSDK-vCwTM":{"duration":84.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MNSDK-vCwTM","annotations":[{"segment":[37.93931509369218,64.72000708424336],"label":"Gargling mouthwash"}]},"eWKFbwBJK2w":{"duration":49.85,"subset":"testing","resolution":"360x240","url":"https://www.youtube.com/watch?v=eWKFbwBJK2w","annotations":[]},"MJgqA2vwCi4":{"duration":90.883,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MJgqA2vwCi4","annotations":[]},"g5-CU3qfSWY":{"duration":70.71000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g5-CU3qfSWY","annotations":[{"segment":[2.867899858851497,4.52245746972736],"label":"Pole vault"},{"segment":[10.589168709605529,13.67767624990714],"label":"Pole vault"},{"segment":[17.648614516009214,20.185602594383777],"label":"Pole vault"},{"segment":[26.142009917316695,29.782036614664587],"label":"Pole vault"},{"segment":[32.42932875819033,34.52510170514821],"label":"Pole vault"},{"segment":[39.488774474258975,44.34214340405616],"label":"Pole vault"},{"segment":[50.29855072698908,51.29128528081124],"label":"Pole vault"},{"segment":[57.247692603744156,59.12285787207489],"label":"Pole vault"},{"segment":[62.873188408736354,70.59445716068643],"label":"Pole vault"}]},"1HsdK9wc-ZU":{"duration":48.089,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1HsdK9wc-ZU","annotations":[]},"kgvbU_3jEy8":{"duration":163.886,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=kgvbU_3jEy8","annotations":[{"segment":[12.783653659071533,52.41298000219329],"label":"Futsal"},{"segment":[59.57182605127334,136.52942107888398],"label":"Futsal"},{"segment":[143.17691931357254,156.98326510452418],"label":"Futsal"}]},"GqCS5MNl6a4":{"duration":194.793,"subset":"training","resolution":"426x236","url":"https://www.youtube.com/watch?v=GqCS5MNl6a4","annotations":[{"segment":[31.604350628446905,160.75674502354244],"label":"Removing ice from car"}]},"wdsZn1Yu_5M":{"duration":119.212,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wdsZn1Yu_5M","annotations":[{"segment":[1.668583834324295,3.893366052032529],"label":"Wakeboarding"},{"segment":[19.09604453970546,24.287203047691342],"label":"Wakeboarding"},{"segment":[31.703143773385456,32.815534882239575],"label":"Wakeboarding"},{"segment":[44.68104004335015,48.01821336991251],"label":"Wakeboarding"},{"segment":[53.023973359756035,54.87795854117956],"label":"Wakeboarding"},{"segment":[60.6253126035925,66.92886222043249],"label":"Wakeboarding"},{"segment":[69.89523851071014,77.86737402951904],"label":"Wakeboarding"},{"segment":[81.57534435611313,83.05853248675076],"label":"Wakeboarding"},{"segment":[99.55900044009448,103.08157225035886],"label":"Wakeboarding"}]},"c3SJUucMqJQ":{"duration":183.113,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c3SJUucMqJQ","annotations":[{"segment":[50.27734018204266,79.12967744560123],"label":"Clipping cat claws"},{"segment":[87.41401190741507,125.97901716068644],"label":"Clipping cat claws"}]},"jVxsUaM12rs":{"duration":191.91,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jVxsUaM12rs","annotations":[]},"dsVvnFkGAn0":{"duration":82.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dsVvnFkGAn0","annotations":[{"segment":[26.61037926142896,65.23418217000788],"label":"Playing violin"}]},"1wLF2M60OrI":{"duration":87.87,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1wLF2M60OrI","annotations":[]},"0DFz3sgfda0":{"duration":92.03999999999999,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0DFz3sgfda0","annotations":[]},"mfq5Y7S3BWs":{"duration":101.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mfq5Y7S3BWs","annotations":[{"segment":[20.778436037441498,91.25130655226208],"label":"Spinning"}]},"FFCmoQp_xVA":{"duration":68.11,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FFCmoQp_xVA","annotations":[{"segment":[10.35905207636877,56.895101404056156],"label":"Mixing drinks"}]},"xE9anOqq2XQ":{"duration":153.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xE9anOqq2XQ","annotations":[{"segment":[7.42390128802431,12.692476395654465],"label":"Javelin throw"},{"segment":[26.103394851440317,28.977163091965856],"label":"Javelin throw"},{"segment":[65.37822739469578,68.01251494539781],"label":"Javelin throw"},{"segment":[83.81824024960997,87.88941191887676],"label":"Javelin throw"},{"segment":[105.85046340093604,108.72423163806552],"label":"Javelin throw"},{"segment":[125.24839900156005,127.88268655226209],"label":"Javelin throw"}]},"kdGhUyT0fiM":{"duration":236.473,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=kdGhUyT0fiM","annotations":[]},"nxs4o4S43oY":{"duration":172.43,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=nxs4o4S43oY","annotations":[{"segment":[18.554864274570985,167.2626895475819],"label":"Shaving"}]},"qY7LG7r_IA8":{"duration":228.65,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=qY7LG7r_IA8","annotations":[{"segment":[12.206156900090251,214.14196338607158],"label":"Cumbia"}]},"BNa85xIhNqA":{"duration":109.041,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BNa85xIhNqA","annotations":[{"segment":[0,94.20679251170047],"label":"Kite flying"}]},"NbO4k5EtU4Y":{"duration":117.12,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=NbO4k5EtU4Y","annotations":[{"segment":[7.674104368174727,117.12145],"label":"Hammer throw"}]},"fevlUD03yCM":{"duration":87.354,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=fevlUD03yCM","annotations":[{"segment":[0,6.81384305772231],"label":"Applying sunscreen"},{"segment":[41.97327323556942,62.278525547581914],"label":"Applying sunscreen"}]},"nc_NMp3vAH0":{"duration":66.432,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=nc_NMp3vAH0","annotations":[]},"zdaD55zVNdQ":{"duration":51.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zdaD55zVNdQ","annotations":[{"segment":[8.677512388140242,16.50414322926174],"label":"Discus throw"}]},"9n_cwQLpo_c":{"duration":61.626,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=9n_cwQLpo_c","annotations":[{"segment":[0.01,61.625758],"label":"Snowboarding"}]},"pcBmq0G-q30":{"duration":13.166,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pcBmq0G-q30","annotations":[]},"JHHHuKeA-WQ":{"duration":107.602,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JHHHuKeA-WQ","annotations":[{"segment":[18.102383775351015,86.3376209048362],"label":"Hitting a pinata"}]},"EEJsmkVet3I":{"duration":13.422,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EEJsmkVet3I","annotations":[]},"tOVv0cAyjcg":{"duration":185.64,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tOVv0cAyjcg","annotations":[{"segment":[18.53535580655226,175.79626522776908],"label":"Getting a haircut"}]},"eQ9NP0__xvM":{"duration":14.54,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=eQ9NP0__xvM","annotations":[]},"_4oBeoCuU7M":{"duration":124.761,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=_4oBeoCuU7M","annotations":[{"segment":[58.36790746158826,119.5726037232705],"label":"Welding"}]},"fq3x7MPEtY0":{"duration":30.651,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fq3x7MPEtY0","annotations":[]},"E22gU_8tafI":{"duration":82.55,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E22gU_8tafI","annotations":[{"segment":[0.01,82.546937],"label":"Triple jump"}]},"t1-GV2bAL4I":{"duration":5.32,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=t1-GV2bAL4I","annotations":[{"segment":[0.11959672386895476,3.212615444617785],"label":"Playing harmonica"},{"segment":[4.029172386895476,5.1426591263650545],"label":"Playing harmonica"}]},"3NFWgG4_6NI":{"duration":190.149,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3NFWgG4_6NI","annotations":[{"segment":[9.32832216597419,170.1433517149633],"label":"Hula hoop"}]},"D_y9uXMbImA":{"duration":49.598,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=D_y9uXMbImA","annotations":[{"segment":[0.20000000000000004,47.125656417445484],"label":"Cutting the grass"}]},"P_sjOVZiv6A":{"duration":57.28,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=P_sjOVZiv6A","annotations":[{"segment":[12.243867394695787,32.26303744149766],"label":"Doing nails"}]},"aTXsp_UF3VY":{"duration":231.11,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=aTXsp_UF3VY","annotations":[]},"5WJL9eJ8YDw":{"duration":83.2,"subset":"training","resolution":"634x360","url":"https://www.youtube.com/watch?v=5WJL9eJ8YDw","annotations":[{"segment":[3.5632735023400937,68.09091729017162],"label":"Shot put"}]},"gdr6iVHHYcU":{"duration":134.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gdr6iVHHYcU","annotations":[{"segment":[52.85394179304588,87.17741793753383],"label":"Putting on makeup"},{"segment":[100.2329722620905,114.13082135725429],"label":"Putting on makeup"}]},"hkpYSfc5Ewc":{"duration":47.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hkpYSfc5Ewc","annotations":[{"segment":[1.9678627145085803,46.597503900156006],"label":"Using parallel bars"}]},"6t1HVCqCbU0":{"duration":140.853,"subset":"testing","resolution":"300x170","url":"https://www.youtube.com/watch?v=6t1HVCqCbU0","annotations":[]},"HVU5EmblAP8":{"duration":174.359,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=HVU5EmblAP8","annotations":[{"segment":[16.014640186915887,174.281],"label":"Playing congas"}]},"c-KBzCiSPcU":{"duration":167.09,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=c-KBzCiSPcU","annotations":[]},"3Hbm8FdirRc":{"duration":73.4,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3Hbm8FdirRc","annotations":[{"segment":[1.2023118639031276,60.51636246099844],"label":"Mowing the lawn"}]},"Ls8ha6c0ye8":{"duration":214.57999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ls8ha6c0ye8","annotations":[{"segment":[117.52312324492982,184.82268954758192],"label":"Horseback riding"}]},"7Sfl-Fmr_3E":{"duration":71.216,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=7Sfl-Fmr_3E","annotations":[{"segment":[27.611808112324493,43.3705007800312],"label":"Painting fence"}]},"0Qed9-9yp9Q":{"duration":209.63,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0Qed9-9yp9Q","annotations":[{"segment":[47.09311089178261,67.69634690693749],"label":"Hammer throw"},{"segment":[129.17901946009812,202.7620037129485],"label":"Hammer throw"},{"segment":[88.95365324180968,107.26764067394696],"label":"Hammer throw"}]},"AKShRE_4eTA":{"duration":202.99,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=AKShRE_4eTA","annotations":[{"segment":[24.308121350916316,177.73343716429113],"label":"Mowing the lawn"}]},"szdKUpvx9Sk":{"duration":167.3,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=szdKUpvx9Sk","annotations":[{"segment":[95.7858949921997,132.06447647425898],"label":"Playing blackjack"}]},"2CTWueZkvUg":{"duration":151.58,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2CTWueZkvUg","annotations":[]},"ZYrQ-TSPQfU":{"duration":147.006,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZYrQ-TSPQfU","annotations":[{"segment":[0.2,136.88890654205608],"label":"Rafting"}]},"HVKveVRZ-JY":{"duration":27.1,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=HVKveVRZ-JY","annotations":[{"segment":[0.01,27.097686],"label":"Playing water polo"}]},"L0K-WJuI1-I":{"duration":136.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L0K-WJuI1-I","annotations":[{"segment":[18.305502320000002,35.558987834343306],"label":"Vacuuming floor"},{"segment":[73.0343283064656,81.55145114103885],"label":"Vacuuming floor"},{"segment":[107.10281964475858,136.2868934240363],"label":"Vacuuming floor"}]},"AY2usHrPYL0":{"duration":226.488,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=AY2usHrPYL0","annotations":[{"segment":[29.66877379095164,203.46099219968798],"label":"River tubing"}]},"NLdyQ1oMmAo":{"duration":151.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NLdyQ1oMmAo","annotations":[{"segment":[38.883400650000006,141.7591299524181],"label":"Skateboarding"},{"segment":[4.12090605,37.590326230109206],"label":"Skateboarding"}]},"SIKxtWHWybU":{"duration":9.126,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SIKxtWHWybU","annotations":[{"segment":[1.2052496099843995,8.011365054602184],"label":"Doing crunches"}]},"UYFDYX4i2EY":{"duration":121.37,"subset":"training","resolution":"426x214","url":"https://www.youtube.com/watch?v=UYFDYX4i2EY","annotations":[{"segment":[7.664040561622465,111.55436817472699],"label":"Breakdancing"}]},"36lphp9tnMk":{"duration":151.35,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=36lphp9tnMk","annotations":[{"segment":[0.01,143.08375004368176],"label":"Drinking beer"}]},"7xYnGM84888":{"duration":221.171,"subset":"testing","resolution":"300x240","url":"https://www.youtube.com/watch?v=7xYnGM84888","annotations":[]},"XNzQVEmoJrY":{"duration":81.78,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=XNzQVEmoJrY","annotations":[{"segment":[9.053995319812792,10.839290171606864],"label":"Playing squash"},{"segment":[20.148327613104524,78.93553666146646],"label":"Playing squash"}]},"l8boztVhl7w":{"duration":230.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l8boztVhl7w","annotations":[{"segment":[51.219354100000004,121.1074878],"label":"Volleyball"},{"segment":[14.375361560062403,213.47411916692667],"label":"Volleyball"}]},"wEehiYq9ttE":{"duration":18.72,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=wEehiYq9ttE","annotations":[]},"Ivq9_wPFNs4":{"duration":20.69,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ivq9_wPFNs4","annotations":[]},"gY-TqRhcWY8":{"duration":34.087,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gY-TqRhcWY8","annotations":[{"segment":[0.2,32.253191887675506],"label":"Table soccer"}]},"okC18bPTils":{"duration":89.977,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=okC18bPTils","annotations":[{"segment":[0.01,32.82607488299532],"label":"Scuba diving"},{"segment":[34.790028081123246,67.61610296411857],"label":"Scuba diving"},{"segment":[73.22739781591264,84.87083463338534],"label":"Scuba diving"}]},"UYcpD3r6Ol0":{"duration":233.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UYcpD3r6Ol0","annotations":[{"segment":[19.47895213026521,208.80708498517942],"label":"Doing nails"}]},"deuSw3RnNLU":{"duration":211.79,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=deuSw3RnNLU","annotations":[{"segment":[11.564153232795979,191.63453806552263],"label":"Making a lemonade"}]},"ADy-2Y0jmXQ":{"duration":233.62,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=ADy-2Y0jmXQ","annotations":[{"segment":[25.86582059282371,33.51627457098284],"label":"Drinking coffee"},{"segment":[95.81282839313572,97.63436505460217],"label":"Drinking coffee"}]},"E3h0UHRsSQk":{"duration":112.849,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E3h0UHRsSQk","annotations":[{"segment":[4.4012756428624495,45.24511360862597],"label":"Sharpening knives"},{"segment":[58.096838485784325,89.43391988767551],"label":"Sharpening knives"}]},"WRXSn7DyaoI":{"duration":141.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WRXSn7DyaoI","annotations":[{"segment":[6.1709204368174735,140.82921996879875],"label":"Cumbia"}]},"P7lUYSLtErY":{"duration":150.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P7lUYSLtErY","annotations":[{"segment":[9.301048361934477,140.2221341653666],"label":"Tango"}]},"9wukG78Z0u8":{"duration":85.845,"subset":"training","resolution":"1273x720","url":"https://www.youtube.com/watch?v=9wukG78Z0u8","annotations":[{"segment":[12.983926677067084,22.353770670826833],"label":"Kneeling"},{"segment":[32.928308892355695,40.8257488299532],"label":"Kneeling"},{"segment":[50.32944773790951,59.43158190327614],"label":"Kneeling"}]},"memk6ryZCjE":{"duration":182.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=memk6ryZCjE","annotations":[{"segment":[12.263420218408735,178.53258271450858],"label":"Doing karate"}]},"qn22hNGngQE":{"duration":137.88,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=qn22hNGngQE","annotations":[]},"6Ni6csyQbzw":{"duration":73.955,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6Ni6csyQbzw","annotations":[{"segment":[19.882172386895476,59.4159992199688],"label":"BMX"}]},"xMImWgn3owc":{"duration":95.09,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=xMImWgn3owc","annotations":[{"segment":[80.8336602,84.50108864999999],"label":"Using the balance beam"},{"segment":[3.780776117784711,71.95367205],"label":"Using the balance beam"}]},"vUuC72xikqw":{"duration":214.599,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vUuC72xikqw","annotations":[{"segment":[0.2,106.79722910276955],"label":"Carving jack-o-lanterns"},{"segment":[155.00663659743668,192.50284075663023],"label":"Carving jack-o-lanterns"}]},"KIanyNsP7wY":{"duration":81.03999999999999,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=KIanyNsP7wY","annotations":[]},"HLmiBuyTTmM":{"duration":217.27,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HLmiBuyTTmM","annotations":[]},"lQH1mZe3ZmA":{"duration":114.173,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=lQH1mZe3ZmA","annotations":[]},"zh0haUMeZV0":{"duration":144.498,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zh0haUMeZV0","annotations":[{"segment":[24.7904992199688,119.8958689547582],"label":"Kneeling"}]},"9PP5_HGpu4c":{"duration":187.15,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=9PP5_HGpu4c","annotations":[{"segment":[6.817947359999999,178.79266770670824],"label":"Preparing salad"}]},"avGe67wRvxA":{"duration":42.82,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=avGe67wRvxA","annotations":[{"segment":[0,27.658681747269885],"label":"Putting on makeup"}]},"mzxp9dHn0Rs":{"duration":220.102,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mzxp9dHn0Rs","annotations":[{"segment":[18.195371294851792,209.4184243369735],"label":"Futsal"}]},"rCLGy2NA0aE":{"duration":30.558,"subset":"testing","resolution":"423x240","url":"https://www.youtube.com/watch?v=rCLGy2NA0aE","annotations":[]},"XKpx9fyNINg":{"duration":194.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XKpx9fyNINg","annotations":[{"segment":[13.070457098283931,176.31728549141965],"label":"Snatch"}]},"CUG8vpMIFEQ":{"duration":197.97,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=CUG8vpMIFEQ","annotations":[{"segment":[8.45540472,133.04717176],"label":"Plataform diving"}]},"qoVYcplxgFE":{"duration":68.76,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=qoVYcplxgFE","annotations":[{"segment":[45.96591107644306,67.93097347893915],"label":"Using the balance beam"}]},"SSJjjggYBxc":{"duration":53.499,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=SSJjjggYBxc","annotations":[{"segment":[13.604212805246904,40.72917583775351],"label":"Removing ice from car"}]},"ahVeDMNS9ws":{"duration":208.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ahVeDMNS9ws","annotations":[{"segment":[90.58596896723868,208.12045351473924],"label":"Cleaning shoes"}]},"7DY1vm9RiIk":{"duration":197.277,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7DY1vm9RiIk","annotations":[{"segment":[9.2303120124805,17.229915756630266],"label":"Beach soccer"},{"segment":[51.997424336973474,62.458444617784714],"label":"Beach soccer"},{"segment":[77.84229797191888,153.22317940717627],"label":"Beach soccer"}]},"7OYvyg32iqw":{"duration":192.912,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7OYvyg32iqw","annotations":[{"segment":[11.731698907956318,169.05678939157565],"label":"Waterskiing"}]},"NHYHcPe_Te8":{"duration":105.07,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NHYHcPe_Te8","annotations":[]},"JcMOzfurtK4":{"duration":235.566,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JcMOzfurtK4","annotations":[{"segment":[121.05706630265212,125.09842511700468],"label":"Canoeing"},{"segment":[194.16892121684867,197.12483073322932],"label":"Canoeing"}]},"uRQjuRNAABI":{"duration":146.495,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uRQjuRNAABI","annotations":[]},"3sAcnQxBchE":{"duration":67.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3sAcnQxBchE","annotations":[{"segment":[0.26307852184087366,59.19266741419657],"label":"Smoking a cigarette"}]},"685wnEW1Uq4":{"duration":7.34,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=685wnEW1Uq4","annotations":[{"segment":[1.4423177934477378,6.994096601560063],"label":"Javelin throw"}]},"f2W1Mt04CIM":{"duration":58.747,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=f2W1Mt04CIM","annotations":[{"segment":[4.765705512574245,58.746485260770974],"label":"Riding bumper cars"}]},"Bi3vlu17qp8":{"duration":28.445,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Bi3vlu17qp8","annotations":[{"segment":[0.2,25.38810140405616],"label":"Beach soccer"}]},"blCvgADz8LY":{"duration":55.54,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=blCvgADz8LY","annotations":[{"segment":[17.87035729953198,31.024324900156007],"label":"Clean and jerk"},{"segment":[32.149335287051485,55.42839636973479],"label":"Clean and jerk"}]},"oVt9uNNAGEo":{"duration":34.78,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=oVt9uNNAGEo","annotations":[]},"i0Z8I2WCLNk":{"duration":190.985,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=i0Z8I2WCLNk","annotations":[{"segment":[3.5753645982574,184.13127681025608],"label":"Trimming branches or hedges"}]},"iXaA7PVRhIY":{"duration":71.7,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iXaA7PVRhIY","annotations":[{"segment":[11.121852330000001,38.46396177847114],"label":"Ping-pong"}]},"yE5whKJ-DE4":{"duration":150.7,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=yE5whKJ-DE4","annotations":[{"segment":[3.8754836021840875,125.07242534321374],"label":"Mowing the lawn"}]},"ByDsWCKyWlo":{"duration":226.39,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ByDsWCKyWlo","annotations":[]},"Zu-tYiGypek":{"duration":35.039,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zu-tYiGypek","annotations":[]},"SfQku6CicrU":{"duration":5.57,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=SfQku6CicrU","annotations":[{"segment":[0.01,5.016378589703589],"label":"Washing face"}]},"AXyRIyXqnzI":{"duration":130.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AXyRIyXqnzI","annotations":[{"segment":[2.8419344773790955,13.194695787831513],"label":"Discus throw"},{"segment":[21.314508580343215,25.983400936037444],"label":"Discus throw"},{"segment":[32.073260530421216,36.74215288611545],"label":"Discus throw"},{"segment":[42.02003120124805,62.5225585023401],"label":"Discus throw"},{"segment":[69.62739469578783,73.0783151326053],"label":"Discus throw"},{"segment":[98.85872074882995,116.9253042121685],"label":"Discus throw"},{"segment":[122.2031825273011,124.43613104524181],"label":"Discus throw"}]},"rXgxlwrRFTg":{"duration":36.69,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rXgxlwrRFTg","annotations":[{"segment":[13.507420237129486,36.6875283446712],"label":"Horseback riding"}]},"Sf27WmcLb2w":{"duration":124.3,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Sf27WmcLb2w","annotations":[]},"cXkwVOPNisA":{"duration":168.0,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cXkwVOPNisA","annotations":[]},"dyLGepr7VR0":{"duration":178.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dyLGepr7VR0","annotations":[{"segment":[12.824080288381603,176.1917117881994],"label":"Zumba"}]},"VFvHqc5Bg-0":{"duration":174.289,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VFvHqc5Bg-0","annotations":[{"segment":[19.57689053354134,152.53660540717627],"label":"Snowboarding"}]},"yqBRp0txs-c":{"duration":59.815,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yqBRp0txs-c","annotations":[{"segment":[9.395449468019454,14.697832642128912],"label":"Beer pong"},{"segment":[20.000215423486168,24.279331585228004],"label":"Beer pong"},{"segment":[31.907321264854755,33.02361243748305],"label":"Beer pong"},{"segment":[36.74458301291074,39.256238151324425],"label":"Beer pong"},{"segment":[47.34934915287963,49.58193149813624],"label":"Beer pong"}]},"4j7sZBThR7s":{"duration":130.311,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4j7sZBThR7s","annotations":[{"segment":[14.834716068642745,114.63102028081124],"label":"Making an omelette"}]},"Y5zJT3BjIxM":{"duration":185.74,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y5zJT3BjIxM","annotations":[{"segment":[41.857885335413414,146.7198541341654],"label":"Getting a haircut"}]},"GxOjqC_IDX4":{"duration":83.69,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GxOjqC_IDX4","annotations":[{"segment":[10.770624430577223,71.47778031201248],"label":"Triple jump"}]},"I2XqSiNerwg":{"duration":170.203,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=I2XqSiNerwg","annotations":[]},"DUb48prwNZk":{"duration":89.56,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DUb48prwNZk","annotations":[{"segment":[8.790678561622464,75.62774254602184],"label":"Smoking hookah"}]},"8OEts-YLeW0":{"duration":57.31,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=8OEts-YLeW0","annotations":[{"segment":[7.248843732209964,56.750175507910896],"label":"Doing karate"}]},"Y-1QkIGm81w":{"duration":56.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y-1QkIGm81w","annotations":[{"segment":[7.822445397815912,16.40478549141966],"label":"Playing guitarra"},{"segment":[24.808326833073323,42.95640015600624],"label":"Playing guitarra"}]},"V9MTU7xLukc":{"duration":114.521,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=V9MTU7xLukc","annotations":[{"segment":[0,16.436685026584737],"label":"Fixing the roof"},{"segment":[33.5880085325862,37.697179789232386],"label":"Fixing the roof"},{"segment":[48.95273584004585,107.37443028861155],"label":"Fixing the roof"}]},"Ms6qkgx5fyk":{"duration":179.4,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ms6qkgx5fyk","annotations":[]},"0xxl3iG3VKY":{"duration":46.86,"subset":"training","resolution":"634x360","url":"https://www.youtube.com/watch?v=0xxl3iG3VKY","annotations":[{"segment":[0.657904422299341,6.140441274793849],"label":"Javelin throw"},{"segment":[19.15232873804748,34.06482897683254],"label":"Javelin throw"}]},"8_RfK2rp2To":{"duration":94.18,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8_RfK2rp2To","annotations":[{"segment":[18.79263650546022,32.00620904836193],"label":"Pole vault"},{"segment":[57.69926677067083,63.71878315132605],"label":"Pole vault"}]},"RpyIg_j4I3E":{"duration":184.9,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RpyIg_j4I3E","annotations":[{"segment":[0,183.11081903276133],"label":"Kayaking"}]},"zGwcX0S4DVA":{"duration":51.688,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zGwcX0S4DVA","annotations":[{"segment":[0.2,50.32098151370285],"label":"Camel ride"}]},"Y76wuHBZgdU":{"duration":47.485,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y76wuHBZgdU","annotations":[{"segment":[8.43013104524181,15.233394695787833],"label":"Installing carpet"},{"segment":[28.76597347893916,40.74563338533542],"label":"Installing carpet"}]},"enESbo2pA9U":{"duration":124.32,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=enESbo2pA9U","annotations":[{"segment":[46.33132449297972,51.961478939157566],"label":"Snowboarding"}]},"q4jeW0iSA9Y":{"duration":82.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q4jeW0iSA9Y","annotations":[{"segment":[47.58851495475819,76.19293229953198],"label":"Triple jump"}]},"SipyRTPgdfY":{"duration":22.2,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=SipyRTPgdfY","annotations":[{"segment":[14.960368923556942,19.39307082683307],"label":"Snatch"}]},"oQ_PwsBgozM":{"duration":162.262,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oQ_PwsBgozM","annotations":[{"segment":[1.3122191739578155,161.30211235466192],"label":"Decorating the Christmas tree"}]},"a8_pA-cEAao":{"duration":205.938,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a8_pA-cEAao","annotations":[]},"21krmbuT21s":{"duration":58.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=21krmbuT21s","annotations":[{"segment":[0,51.592588143525745],"label":"Using uneven bars"}]},"_AaSNARQPfE":{"duration":200.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_AaSNARQPfE","annotations":[{"segment":[24.1134284228512,169.73348184399376],"label":"Snatch"}]},"x_BIktHkEc4":{"duration":69.48,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x_BIktHkEc4","annotations":[{"segment":[18.426088979229025,52.08100096063633],"label":"Playing harmonica"}]},"7orO93DICYw":{"duration":76.86,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=7orO93DICYw","annotations":[]},"PJysE5c1WDs":{"duration":40.47,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PJysE5c1WDs","annotations":[{"segment":[16.037417347893918,34.347854477379094],"label":"Clean and jerk"}]},"6rOmYOU7748":{"duration":59.49,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6rOmYOU7748","annotations":[{"segment":[7.888548057722309,49.28022374882996],"label":"Roof shingle removal"}]},"ta4QlTBHTTw":{"duration":91.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ta4QlTBHTTw","annotations":[{"segment":[3.763006325273011,19.80903329719189],"label":"Tai chi"}]},"DfOqhNeHDgM":{"duration":57.772,"subset":"validation","resolution":"360x240","url":"https://www.youtube.com/watch?v=DfOqhNeHDgM","annotations":[{"segment":[6.573530421216848,51.97590795631825],"label":"Playing drums"}]},"fVBXRUWEyoA":{"duration":219.824,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fVBXRUWEyoA","annotations":[{"segment":[0.2,219.6233106575964],"label":"Clipping cat claws"},{"segment":[138.54698518825109,210.90691893135724],"label":"Clipping cat claws"}]},"Zr1xfVeUGeo":{"duration":213.856,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Zr1xfVeUGeo","annotations":[{"segment":[4.754364748486354,206.6180184896047],"label":"Making a cake"}]},"LW8UDBQ1yJc":{"duration":184.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LW8UDBQ1yJc","annotations":[{"segment":[4.327920341303448,156.38218699219968],"label":"Skateboarding"}]},"QFz-keqof9g":{"duration":43.909,"subset":"training","resolution":"718x480","url":"https://www.youtube.com/watch?v=QFz-keqof9g","annotations":[{"segment":[3.1510311450858035,43.908934],"label":"Slacklining"}]},"hJKX5ZulTgI":{"duration":76.324,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hJKX5ZulTgI","annotations":[{"segment":[21.74580829716589,61.15415224581927],"label":"Blow-drying hair"}]},"WZ4u6Epo9lg":{"duration":221.914,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=WZ4u6Epo9lg","annotations":[]},"Q8EoKJNm4mc":{"duration":65.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q8EoKJNm4mc","annotations":[{"segment":[4.366089372826613,16.956672643057722],"label":"Playing violin"},{"segment":[50.159259195631826,63.46060120904836],"label":"Playing violin"}]},"Bq4Dg1t_zUA":{"duration":21.827,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Bq4Dg1t_zUA","annotations":[]},"-h3NbdE8N9M":{"duration":179.4,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-h3NbdE8N9M","annotations":[{"segment":[8.393681747269891,154.34337414],"label":"Playing lacrosse"}]},"XP5Oqr1giQ4":{"duration":146.542,"subset":"training","resolution":"854x468","url":"https://www.youtube.com/watch?v=XP5Oqr1giQ4","annotations":[{"segment":[16.460158553280905,139.22550606552264],"label":"Bullfighting"}]},"KlJqjBqpnjw":{"duration":112.896,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KlJqjBqpnjw","annotations":[{"segment":[8.630071069509446,72.3869226442527],"label":"Rope skipping"}]},"Q_kqplOxytU":{"duration":85.984,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q_kqplOxytU","annotations":[{"segment":[17.410918614590177,65.62576869158877],"label":"Swimming"}]},"pibXLs9IOX8":{"duration":214.692,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pibXLs9IOX8","annotations":[{"segment":[17.416487418680422,37.177501989875516],"label":"Croquet"},{"segment":[62.63236821717767,89.09203179555753],"label":"Croquet"}]},"vQ8NJRCSyb4":{"duration":185.969,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vQ8NJRCSyb4","annotations":[{"segment":[3.5919106432576093,178.55269786572708],"label":"Kneeling"}]},"ZLJ2BmBkLJg":{"duration":75.81,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZLJ2BmBkLJg","annotations":[]},"IGmzP4DnNbQ":{"duration":159.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IGmzP4DnNbQ","annotations":[]},"xyJCmMW4bEo":{"duration":143.76,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xyJCmMW4bEo","annotations":[]},"JBnwqVLJRBc":{"duration":182.86,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JBnwqVLJRBc","annotations":[]},"w64fHbGOl4E":{"duration":200.435,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w64fHbGOl4E","annotations":[]},"Jmip8PuILhM":{"duration":36.154,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Jmip8PuILhM","annotations":[{"segment":[0.2,32.29509968847352],"label":"Painting furniture"}]},"ywFa_D5QZ-k":{"duration":219.777,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=ywFa_D5QZ-k","annotations":[{"segment":[24.337271450858033,219.721],"label":"Waxing skis"}]},"er6fi7nYsuw":{"duration":16.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=er6fi7nYsuw","annotations":[{"segment":[0.3012325366614665,16.065735288611545],"label":"Playing field hockey"}]},"iGuOr5KwLQ0":{"duration":116.332,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iGuOr5KwLQ0","annotations":[{"segment":[0.2,112.35173780495815],"label":"Rollerblading"}]},"8VYre-lXqIo":{"duration":177.38,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8VYre-lXqIo","annotations":[{"segment":[7.748147487804275,174.33331847559617],"label":"Tai chi"}]},"deLpSUF1wio":{"duration":14.65,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=deLpSUF1wio","annotations":[{"segment":[2.8343558252730108,7.154462687987519],"label":"Starting a campfire"}]},"n5m7OWnwMC0":{"duration":206.263,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=n5m7OWnwMC0","annotations":[]},"evOhhiSN84I":{"duration":50.178,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=evOhhiSN84I","annotations":[]},"75cjK13ylJM":{"duration":222.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=75cjK13ylJM","annotations":[{"segment":[166.54039740909363,217.6498721880848],"label":"Preparing salad"}]},"ObU5as8-u-s":{"duration":32.79,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ObU5as8-u-s","annotations":[{"segment":[0.01,32.741],"label":"Playing field hockey"}]},"EtrCRyT4tkI":{"duration":140.16,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EtrCRyT4tkI","annotations":[]},"L2MfC4jcRo0":{"duration":38.453,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L2MfC4jcRo0","annotations":[{"segment":[27.9866334514359,34.68428953852765],"label":"Table soccer"}]},"pVuuVHA3RVw":{"duration":30.163,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pVuuVHA3RVw","annotations":[]},"O_L0CSZ7nnA":{"duration":83.151,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=O_L0CSZ7nnA","annotations":[{"segment":[4.784717048658499,81.21090310920945],"label":"Carving jack-o-lanterns"}]},"_wl0vFvb8Bc":{"duration":160.241,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=_wl0vFvb8Bc","annotations":[{"segment":[57.756621469950545,115.66300043568263],"label":"Hand car wash"}]},"9lAT3oA9Ixs":{"duration":54.22,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=9lAT3oA9Ixs","annotations":[]},"6GGQUpJR5qs":{"duration":133.12,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=6GGQUpJR5qs","annotations":[{"segment":[64.0400384,125.49583431903275],"label":"Bathing dog"},{"segment":[1.8184192000000001,56.9780224],"label":"Bathing dog"}]},"UzCeMCr0lM8":{"duration":58.584,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UzCeMCr0lM8","annotations":[{"segment":[1.0053317697333743,55.38464015288612],"label":"Peeling potatoes"}]},"PoAB1ktiNQM":{"duration":47.903,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PoAB1ktiNQM","annotations":[]},"FJ7yrh2UiQ8":{"duration":207.7,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=FJ7yrh2UiQ8","annotations":[{"segment":[32.23132527301092,187.41324258970357],"label":"Paintball"}]},"uIOIcv5MhuA":{"duration":27.14,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=uIOIcv5MhuA","annotations":[{"segment":[1.754308329173167,26.821894815912636],"label":"Smoking a cigarette"}]},"ZN9kbYULUtw":{"duration":97.72999999999999,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ZN9kbYULUtw","annotations":[{"segment":[12.19748031201248,97.73231292517006],"label":"Doing karate"}]},"wvQS6xeOhOI":{"duration":205.683,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wvQS6xeOhOI","annotations":[]},"qlbuekEsXL4":{"duration":27.029,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qlbuekEsXL4","annotations":[{"segment":[8.406850369494638,13.78723501646198],"label":"Calf roping"}]},"Uk_VxavVjP0":{"duration":63.066,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Uk_VxavVjP0","annotations":[{"segment":[0,23.00610608424337],"label":"River tubing"},{"segment":[28.315207488299535,34.607475819032764],"label":"River tubing"},{"segment":[45.422312012480496,59.67823244929797],"label":"River tubing"}]},"C4QrTmNDADY":{"duration":235.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C4QrTmNDADY","annotations":[{"segment":[0.01,97.43505930713418],"label":"Breakdancing"},{"segment":[106.62704603422232,235.48253968253968],"label":"Breakdancing"}]},"B6W_0AvgGTY":{"duration":23.197,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B6W_0AvgGTY","annotations":[{"segment":[4.624894144573718,19.077686953271026],"label":"Disc dog"}]},"IEPoIqIrprg":{"duration":233.57,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=IEPoIqIrprg","annotations":[{"segment":[43.57696833,227.85211745943838],"label":"Volleyball"}]},"aM-fUKjbCd0":{"duration":60.21,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=aM-fUKjbCd0","annotations":[]},"iZg4cUU6Hkc":{"duration":102.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iZg4cUU6Hkc","annotations":[{"segment":[41.88097300624025,93.63502710140405],"label":"Playing kickball"}]},"8miqQQJEsO0":{"duration":135.77,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8miqQQJEsO0","annotations":[{"segment":[9.633790951638066,128.83872074882996],"label":"Clean and jerk"}]},"dQyZheN0Fw0":{"duration":149.142,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dQyZheN0Fw0","annotations":[{"segment":[0,6.494505754707989],"label":"Curling"}]},"Ba3uRADSg50":{"duration":145.961,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ba3uRADSg50","annotations":[{"segment":[49.03187389685008,145.0526318193118],"label":"Using the monkey bar"}]},"RiUvp2idW3U":{"duration":33.553,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RiUvp2idW3U","annotations":[]},"BeusKDfs3cs":{"duration":230.25,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BeusKDfs3cs","annotations":[]},"bV4xdc71azY":{"duration":211.883,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bV4xdc71azY","annotations":[{"segment":[17.84966092521252,208.90714152886113],"label":"Making an omelette"}]},"7WfF6FrZEuU":{"duration":36.2,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7WfF6FrZEuU","annotations":[{"segment":[0.8729570982839314,34.60852496099844],"label":"Polishing shoes"}]},"wJV-kQRmaGc":{"duration":24.1,"subset":"training","resolution":"384x240","url":"https://www.youtube.com/watch?v=wJV-kQRmaGc","annotations":[{"segment":[0,24.061],"label":"Springboard diving"}]},"43gst-Mw43s":{"duration":75.372,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=43gst-Mw43s","annotations":[{"segment":[22.684575663026518,28.914018720748828],"label":"Roof shingle removal"}]},"NLdhDlsMnxQ":{"duration":41.31,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NLdhDlsMnxQ","annotations":[{"segment":[0.7080670826833074,39.587386895475824],"label":"Walking the dog"}]},"8CAvjawFn3w":{"duration":89.88,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=8CAvjawFn3w","annotations":[{"segment":[29.70306664,37.15894444],"label":"Horseback riding"}]},"oqX-n2KHWAM":{"duration":213.09,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=oqX-n2KHWAM","annotations":[{"segment":[42.218984858034325,190.15164833697347],"label":"Having an ice cream"}]},"G_kJ-Af89ew":{"duration":165.884,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G_kJ-Af89ew","annotations":[{"segment":[0.2,165.561],"label":"Playing pool"}]},"eIs6m8Xt4rw":{"duration":55.241,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=eIs6m8Xt4rw","annotations":[]},"uGVkH6PjXLs":{"duration":32.3,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=uGVkH6PjXLs","annotations":[{"segment":[0.01,2.5698077953198126],"label":"Chopping wood"},{"segment":[29.326041899531983,32.098956799999996],"label":"Chopping wood"}]},"RxXBMgsu6uU":{"duration":177.447,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=RxXBMgsu6uU","annotations":[{"segment":[95.76858970358813,136.7331887675507],"label":"BMX"}]},"VcyO9tZ3NkY":{"duration":124.018,"subset":"training","resolution":"448x360","url":"https://www.youtube.com/watch?v=VcyO9tZ3NkY","annotations":[{"segment":[52.99655850234009,120.69289235569423],"label":"Fixing the roof"}]},"Nkcc9kLDQz8":{"duration":161.495,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Nkcc9kLDQz8","annotations":[{"segment":[9.555697819314641,150.3951370716511],"label":"Skiing"}]},"DHaH6wgdotw":{"duration":66.161,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DHaH6wgdotw","annotations":[]},"jZh3iGJszMw":{"duration":41.796,"subset":"testing","resolution":"400x224","url":"https://www.youtube.com/watch?v=jZh3iGJszMw","annotations":[]},"zV08tITKFMM":{"duration":121.997,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=zV08tITKFMM","annotations":[]},"D0RDF1ez-8Y":{"duration":178.887,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=D0RDF1ez-8Y","annotations":[{"segment":[39.070380748829955,178.8865306122449],"label":"Putting in contact lenses"}]},"hrcSCXaNhmY":{"duration":40.96,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hrcSCXaNhmY","annotations":[{"segment":[0.01,40.96],"label":"Disc dog"}]},"uXPm6SMQH1M":{"duration":35.83,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uXPm6SMQH1M","annotations":[{"segment":[7.28214430577223,34.45765600624025],"label":"Grooming horse"}]},"pIv2jZdHP0k":{"duration":448.401,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pIv2jZdHP0k","annotations":[{"segment":[0.01,448.40054421768707],"label":"Putting on shoes"}]},"tJ2xOG_EWOg":{"duration":216.666,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tJ2xOG_EWOg","annotations":[{"segment":[0.01,140.612799900156],"label":"Carving jack-o-lanterns"}]},"IucYlH_gAic":{"duration":64.064,"subset":"training","resolution":"636x360","url":"https://www.youtube.com/watch?v=IucYlH_gAic","annotations":[{"segment":[0.01,0.02],"label":"Swimming"}]},"Gg32cIypcdc":{"duration":237.354,"subset":"validation","resolution":"290x240","url":"https://www.youtube.com/watch?v=Gg32cIypcdc","annotations":[{"segment":[6.664240249609985,212.14498127925117],"label":"Bullfighting"}]},"36JC5C6I4sg":{"duration":187.687,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=36JC5C6I4sg","annotations":[{"segment":[0.01,136.73912485335413],"label":"Hand car wash"}]},"J__1J4MmH4w":{"duration":112.524,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=J__1J4MmH4w","annotations":[{"segment":[19.074810898504037,22.399778401316464],"label":"Beer pong"},{"segment":[48.64952131418594,53.72447224360196],"label":"Beer pong"},{"segment":[59.84941172787178,68.59932611356385],"label":"Beer pong"},{"segment":[89.42412235151097,90.82410865322171],"label":"Beer pong"}]},"gIzsM5cv7XM":{"duration":181.79,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=gIzsM5cv7XM","annotations":[{"segment":[24.241584243369736,26.793329953198132],"label":"Rock-paper-scissors"},{"segment":[30.195657566302653,40.68616770670827],"label":"Rock-paper-scissors"},{"segment":[56.56369656786271,61.38366068642746],"label":"Rock-paper-scissors"},{"segment":[77.2611895475819,82.93173556942278],"label":"Rock-paper-scissors"},{"segment":[95.12340951638068,98.24220982839314],"label":"Rock-paper-scissors"}]},"dHzX9NqyBZw":{"duration":12.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dHzX9NqyBZw","annotations":[{"segment":[0.43911430265210605,11.676448502340094],"label":"Springboard diving"}]},"8LiO75NtXNw":{"duration":224.14,"subset":"training","resolution":"596x480","url":"https://www.youtube.com/watch?v=8LiO75NtXNw","annotations":[{"segment":[7.691248049921997,207.31409516380654],"label":"Long jump"}]},"kGLMybXl5Kk":{"duration":115.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kGLMybXl5Kk","annotations":[{"segment":[19.6079295,104.63073322932918],"label":"Snatch"}]},"H0r_ZPXJLtg":{"duration":98.128,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=H0r_ZPXJLtg","annotations":[]},"lQWij22wbNU":{"duration":32.3,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=lQWij22wbNU","annotations":[{"segment":[0.01,32.298955],"label":"Playing kickball"}]},"Tvzd_WIvBU4":{"duration":68.03999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Tvzd_WIvBU4","annotations":[{"segment":[5.428389235569423,65.08796801872074],"label":"Chopping wood"}]},"Czd1PFeumIo":{"duration":164.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Czd1PFeumIo","annotations":[{"segment":[21.763001918876757,149.78066026521063],"label":"Tango"}]},"_6fbXk6y8X0":{"duration":95.27000000000001,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=_6fbXk6y8X0","annotations":[{"segment":[2.601657566302652,65.9334360374415],"label":"Windsurfing"}]},"SHT08nPhIb4":{"duration":44.93,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=SHT08nPhIb4","annotations":[{"segment":[8.03294472,43.07974258970359],"label":"Layup drill in basketball"}]},"L6BxxvCbwpQ":{"duration":41.425,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=L6BxxvCbwpQ","annotations":[{"segment":[0.01,29.642318542692543],"label":"Installing carpet"}]},"zvFff-07npY":{"duration":181.836,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zvFff-07npY","annotations":[]},"l5qCvYE1-tA":{"duration":27.4,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=l5qCvYE1-tA","annotations":[{"segment":[3.559757410296412,8.718208268330734],"label":"High jump"},{"segment":[10.039794851794072,15.581932137285492],"label":"High jump"},{"segment":[16.94615054602184,20.228801092043682],"label":"High jump"},{"segment":[21.294596723868956,26.879365834633386],"label":"High jump"}]},"P79nFNUaysw":{"duration":73.82,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P79nFNUaysw","annotations":[{"segment":[28.20100935898772,48.89707991973538],"label":"Preparing pasta"}]},"kTf-Id-lWX8":{"duration":66.061,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=kTf-Id-lWX8","annotations":[{"segment":[14.325190585854724,65.03018172427576],"label":"Shuffleboard"}]},"CnrvRF_N7fU":{"duration":153.577,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=CnrvRF_N7fU","annotations":[{"segment":[0.01,153.521],"label":"Using the rowing machine"}]},"JRg3W4xKvJI":{"duration":181.7,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=JRg3W4xKvJI","annotations":[{"segment":[12.18630733229329,34.575104524180965],"label":"Polishing forniture"},{"segment":[87.85477379095164,89.5551887675507],"label":"Polishing forniture"}]},"KuyaO3brwBc":{"duration":181.07,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KuyaO3brwBc","annotations":[{"segment":[55.37271089,159.74196212338288],"label":"Cleaning shoes"}]},"9SY9ufDznFQ":{"duration":221.15,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9SY9ufDznFQ","annotations":[{"segment":[12.5899375975039,218.51302652106085],"label":"Playing field hockey"}]},"j6OcN4OQZVM":{"duration":157.15,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=j6OcN4OQZVM","annotations":[]},"QgeqX3cWWRA":{"duration":116.611,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QgeqX3cWWRA","annotations":[]},"dIC0nm4nrI8":{"duration":113.08,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=dIC0nm4nrI8","annotations":[{"segment":[3.2587995319812793,109.30189781591264],"label":"Ballet"}]},"QkX8GFbwN7c":{"duration":32.694,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QkX8GFbwN7c","annotations":[{"segment":[0.2,19.82079407176287],"label":"Welding"}]},"7MWFfqhKgIY":{"duration":33.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7MWFfqhKgIY","annotations":[{"segment":[14.615483399999999,29.123255728549143],"label":"Using parallel bars"}]},"0vssmM38x-0":{"duration":135.42,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0vssmM38x-0","annotations":[]},"Mn_wH5lwlkc":{"duration":66.735,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Mn_wH5lwlkc","annotations":[]},"eSZh5MONfwQ":{"duration":110.017,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eSZh5MONfwQ","annotations":[]},"Swz9TXxW-kE":{"duration":15.14,"subset":"testing","resolution":"864x720","url":"https://www.youtube.com/watch?v=Swz9TXxW-kE","annotations":[]},"-4WdzYpCJPU":{"duration":203.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-4WdzYpCJPU","annotations":[{"segment":[99.4414976599064,160.21370094383778],"label":"Archery"}]},"4uitkT3dDIM":{"duration":131.1,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=4uitkT3dDIM","annotations":[{"segment":[7.567386834009361,127.4184325087289],"label":"Javelin throw"}]},"A8KtrGjBodw":{"duration":110.41,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A8KtrGjBodw","annotations":[{"segment":[0,19.627385335413415],"label":"Drinking beer"}]},"DXhVbxfmrYM":{"duration":80.04,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DXhVbxfmrYM","annotations":[{"segment":[34.853763201709185,42.07347168693469],"label":"Sumo"}]},"6kUeZVukTEI":{"duration":125.18,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=6kUeZVukTEI","annotations":[{"segment":[108.79077613104523,124.20686817472699],"label":"High jump"}]},"0IIVlMtO520":{"duration":206.24,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=0IIVlMtO520","annotations":[]},"0PbmXOVIwGc":{"duration":120.84,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=0PbmXOVIwGc","annotations":[{"segment":[4.999354924314431,117.91840157673253],"label":"Grooming horse"}]},"eMgC7I-GcgA":{"duration":118.445,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eMgC7I-GcgA","annotations":[]},"Cg_jN5G1ZpY":{"duration":70.775,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cg_jN5G1ZpY","annotations":[{"segment":[0.2,63.22308580343214],"label":"Bullfighting"}]},"Ai6RYjJ97Zg":{"duration":208.32999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ai6RYjJ97Zg","annotations":[{"segment":[26.975574021600323,179.0788106735154],"label":"Making a sandwich"}]},"7VWp-Dfvqkg":{"duration":186.39,"subset":"training","resolution":"352x288","url":"https://www.youtube.com/watch?v=7VWp-Dfvqkg","annotations":[{"segment":[63.970439470640045,147.10142278],"label":"Playing harmonica"}]},"Q5HdVXF390c":{"duration":25.57,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Q5HdVXF390c","annotations":[]},"p4Bm_NB_QoI":{"duration":185.342,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=p4Bm_NB_QoI","annotations":[{"segment":[22.229412797309994,176.39183401501828],"label":"Baton twirling"}]},"mhPpeG3LrUk":{"duration":90.396,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mhPpeG3LrUk","annotations":[{"segment":[45.543,78.114],"label":"Applying sunscreen"}]},"j5mhELw7XaM":{"duration":126.2,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=j5mhELw7XaM","annotations":[{"segment":[15.5535688049922,124.03478920436818],"label":"Skateboarding"}]},"et029cxyEOs":{"duration":108.507,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=et029cxyEOs","annotations":[{"segment":[0.01,108.506847],"label":"Playing rubik cube"}]},"7t666Y8cLGk":{"duration":34.99,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=7t666Y8cLGk","annotations":[{"segment":[0,34.25196255850234],"label":"Smoking hookah"}]},"FyxltEKCfZ0":{"duration":68.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FyxltEKCfZ0","annotations":[{"segment":[49.21310782839314,67.25435560062404],"label":"Drinking coffee"}]},"WhOsURGm0Kg":{"duration":16.301,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=WhOsURGm0Kg","annotations":[{"segment":[2.45894207042261,8.492223042428854],"label":"Beer pong"}]},"X1E7i3ocxKY":{"duration":213.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=X1E7i3ocxKY","annotations":[{"segment":[35.00815689770448,37.342034024218115],"label":"Painting"},{"segment":[41.00955522302525,61.347627325501186],"label":"Painting"},{"segment":[69.01608074118883,191.37792385023403],"label":"Painting"}]},"uG7GIj313aA":{"duration":107.555,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=uG7GIj313aA","annotations":[]},"_RfQQ3F37jY":{"duration":24.1,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_RfQQ3F37jY","annotations":[]},"aVHmi3Kjr_Q":{"duration":56.169,"subset":"training","resolution":"1920x1440","url":"https://www.youtube.com/watch?v=aVHmi3Kjr_Q","annotations":[{"segment":[3.1092129485179405,17.91081825273011],"label":"Decorating the Christmas tree"},{"segment":[26.31883073322933,46.112693447737904],"label":"Decorating the Christmas tree"}]},"tJSF2GAp9TU":{"duration":80.85,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tJSF2GAp9TU","annotations":[{"segment":[5.802158187207489,50.45354945397816],"label":"Using parallel bars"}]},"8ZA8UGBEx74":{"duration":104.65,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=8ZA8UGBEx74","annotations":[{"segment":[0.4882665709675126,104.1640624621028],"label":"Cumbia"}]},"mEjU4uJZccw":{"duration":69.451,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=mEjU4uJZccw","annotations":[{"segment":[5.200690248018084,69.4508843537415],"label":"Playing congas"}]},"zGT6EplIOHY":{"duration":164.537,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zGT6EplIOHY","annotations":[{"segment":[11.805063962558503,123.69653978159126],"label":"Futsal"},{"segment":[129.3424399375975,160.39489079563182],"label":"Futsal"}]},"QHd9bJOuvIU":{"duration":86.239,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=QHd9bJOuvIU","annotations":[{"segment":[0,81.35975819032761],"label":"Fixing the roof"}]},"kmWf36zfL7o":{"duration":24.06,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=kmWf36zfL7o","annotations":[{"segment":[12.799424949999999,21.655754299999998],"label":"Getting a tattoo"}]},"gmFioIDX4X0":{"duration":111.53,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=gmFioIDX4X0","annotations":[{"segment":[6.694955758190328,92.59906340873636],"label":"Kayaking"}]},"goXkohySkU8":{"duration":26.75,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=goXkohySkU8","annotations":[{"segment":[11.297021840873635,18.925638065522623],"label":"Disc dog"}]},"0e-qdFlRmPU":{"duration":167.65,"subset":"validation","resolution":"480x352","url":"https://www.youtube.com/watch?v=0e-qdFlRmPU","annotations":[{"segment":[20.138692023871428,161.63261894414975],"label":"Tango"}]},"NOacQAV81Hw":{"duration":46.53,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=NOacQAV81Hw","annotations":[]},"N5Dt3ycaun4":{"duration":48.204,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=N5Dt3ycaun4","annotations":[]},"tXLvsYsWCoY":{"duration":70.496,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=tXLvsYsWCoY","annotations":[{"segment":[10.227937024960998,64.11706758658346],"label":"Drum corps"}]},"TQ22icNBoSI":{"duration":66.479,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TQ22icNBoSI","annotations":[{"segment":[5.0089478214656085,8.113667447633832],"label":"Roof shingle removal"},{"segment":[16.599901092493646,19.497639410250656],"label":"Roof shingle removal"},{"segment":[25.18962539155907,40.29926090557776],"label":"Roof shingle removal"},{"segment":[46.71568146632542,53.23559268127869],"label":"Roof shingle removal"}]},"SV3v-nA6hXM":{"duration":173.45,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SV3v-nA6hXM","annotations":[]},"l9jSJaV0DA0":{"duration":197.602,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=l9jSJaV0DA0","annotations":[{"segment":[111.90202044000127,148.8944228096724],"label":"Longboarding"}]},"8--Dvot-OcQ":{"duration":65.295,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8--Dvot-OcQ","annotations":[]},"KKEyKFmatq0":{"duration":135.884,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=KKEyKFmatq0","annotations":[{"segment":[0.01,135.883174],"label":"Futsal"}]},"BrnUW2LSJDI":{"duration":42.772,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BrnUW2LSJDI","annotations":[{"segment":[0.01,0.02],"label":"Shuffleboard"},{"segment":[0.2,20.646668553658362],"label":"Shuffleboard"},{"segment":[30.761297837147456,33.02378070319107],"label":"Shuffleboard"}]},"ds3ZFTS5dC0":{"duration":81.688,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ds3ZFTS5dC0","annotations":[]},"FNB1BMEtIvw":{"duration":183.949,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FNB1BMEtIvw","annotations":[{"segment":[16.062739469578784,24.95461310452418],"label":"Snowboarding"},{"segment":[42.743009360374415,170.1114399375975],"label":"Snowboarding"}]},"JuC7EY32Spg":{"duration":26.634,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JuC7EY32Spg","annotations":[{"segment":[2.8011973478939156,21.93233775351014],"label":"Putting on shoes"}]},"l5EP4OdpeJs":{"duration":46.95,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=l5EP4OdpeJs","annotations":[]},"Wy0u0amd4Ko":{"duration":60.67,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Wy0u0amd4Ko","annotations":[{"segment":[0.6625837604932768,57.73944189391575],"label":"Playing squash"}]},"Gl6EMAgTNKo":{"duration":62.95,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=Gl6EMAgTNKo","annotations":[{"segment":[1.887676646805998,35.98414620219436],"label":"Rafting"}]},"Snj5CuEUbPI":{"duration":178.144,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Snj5CuEUbPI","annotations":[{"segment":[61.50502534020896,106.66412362636758],"label":"Rafting"},{"segment":[122.73300522512343,143.23468174767396],"label":"Rafting"}]},"brCkKw_gNWQ":{"duration":167.788,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=brCkKw_gNWQ","annotations":[]},"SQiIQDKhnh0":{"duration":155.27,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=SQiIQDKhnh0","annotations":[{"segment":[84.05511192667707,132.74409606864276],"label":"Polishing forniture"}]},"3dR4MEUDHa0":{"duration":153.76,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3dR4MEUDHa0","annotations":[{"segment":[3.8380614190554017,79.39989560670861],"label":"Ping-pong"}]},"WXST-TXQjoU":{"duration":14.86,"subset":"validation","resolution":"496x480","url":"https://www.youtube.com/watch?v=WXST-TXQjoU","annotations":[{"segment":[3.1544834836193445,7.476010307332293],"label":"Getting a haircut"},{"segment":[11.936195745709828,14.270282425897037],"label":"Getting a haircut"}]},"3B39HkjJnbc":{"duration":131.031,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3B39HkjJnbc","annotations":[]},"qyRFJiORdH0":{"duration":52.04,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qyRFJiORdH0","annotations":[]},"huUb8mM5fv4":{"duration":29.65,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=huUb8mM5fv4","annotations":[{"segment":[15.462480499219968,28.887987519500783],"label":"Triple jump"}]},"54chD8QFKyk":{"duration":104.374,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=54chD8QFKyk","annotations":[{"segment":[38.571102964118566,48.17319188767551],"label":"Clipping cat claws"},{"segment":[55.98506084243369,62.82044617784711],"label":"Clipping cat claws"},{"segment":[79.25792043681747,83.65209672386895],"label":"Clipping cat claws"}]},"GKpeX_lWMgg":{"duration":38.569,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GKpeX_lWMgg","annotations":[{"segment":[0.19999999999999998,37.8],"label":"Waterskiing"}]},"jDfTrTtPs5s":{"duration":40.47,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=jDfTrTtPs5s","annotations":[{"segment":[0.2525577472698908,39.71470575819032],"label":"Playing racquetball"}]},"fHXgxSFDmJc":{"duration":38.917,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=fHXgxSFDmJc","annotations":[{"segment":[0.7909516380655226,38.21377535101404],"label":"Bullfighting"}]},"aYxni8ohTfU":{"duration":199.44,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aYxni8ohTfU","annotations":[]},"jhy6npIdn5o":{"duration":187.896,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jhy6npIdn5o","annotations":[]},"HxenZkvSkPc":{"duration":228.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HxenZkvSkPc","annotations":[{"segment":[9.98158532055568,72.01000838400883],"label":"Walking the dog"},{"segment":[89.12129750496142,221.02081781230433],"label":"Walking the dog"}]},"ZPLbrQ391jg":{"duration":187.13,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZPLbrQ391jg","annotations":[{"segment":[27.441717837420978,167.86157187784107],"label":"BMX"}]},"2aw1pVJsnKs":{"duration":93.78999999999999,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2aw1pVJsnKs","annotations":[{"segment":[1.8270605694227768,82.6583476],"label":"Doing motocross"}]},"TjRibiXhHrg":{"duration":209.63,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TjRibiXhHrg","annotations":[]},"GvJxJf4m6_M":{"duration":92.114,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GvJxJf4m6_M","annotations":[{"segment":[51.28561913986046,62.74609317913921],"label":"Putting on shoes"}]},"WCS8dzRT7Xk":{"duration":113.55,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=WCS8dzRT7Xk","annotations":[]},"CqAJxW84FkU":{"duration":220.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CqAJxW84FkU","annotations":[{"segment":[71.83304884304215,91.42388034569002],"label":"Chopping wood"},{"segment":[92.11127794227414,133.01143390951637],"label":"Chopping wood"}]},"SSqVGeD2XaQ":{"duration":118.144,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SSqVGeD2XaQ","annotations":[{"segment":[18.78980031201248,20.263510140405614],"label":"Rock-paper-scissors"},{"segment":[23.579357254290173,27.632059282371294],"label":"Rock-paper-scissors"},{"segment":[40.711234009360375,42.73758502340094],"label":"Rock-paper-scissors"},{"segment":[63.01176599063963,64.30148049921996],"label":"Rock-paper-scissors"},{"segment":[69.09184867394696,70.93429797191888],"label":"Rock-paper-scissors"},{"segment":[85.48964742589703,87.14785179407177],"label":"Rock-paper-scissors"},{"segment":[92.12246489859594,93.2279344773791],"label":"Rock-paper-scissors"},{"segment":[102.62442589703588,105.20385491419657],"label":"Rock-paper-scissors"},{"segment":[109.99422308892355,111.65242745709828],"label":"Rock-paper-scissors"}]},"K7iw2_1BrIk":{"duration":205.729,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K7iw2_1BrIk","annotations":[]},"eGA5tbX8oH0":{"duration":78.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eGA5tbX8oH0","annotations":[{"segment":[20.943819656786268,61.47627063962558],"label":"Shoveling snow"}]},"VQ1t1jMI0LE":{"duration":131.913,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VQ1t1jMI0LE","annotations":[]},"hoyQ36EH1a8":{"duration":98.24000000000001,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=hoyQ36EH1a8","annotations":[{"segment":[7.5067847113884545,16.851965678627142],"label":"Horseback riding"},{"segment":[20.52875819032761,30.48673790951638],"label":"Horseback riding"},{"segment":[40.75111700468018,43.81511076443057],"label":"Horseback riding"},{"segment":[50.55589703588143,69.70585803432137],"label":"Horseback riding"},{"segment":[73.99544929797192,90.3878159126365],"label":"Horseback riding"}]},"bM7Tgb1mJfc":{"duration":65.34,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bM7Tgb1mJfc","annotations":[{"segment":[1.7800312012480501,55.38439937597504],"label":"Archery"}]},"bfBTnUiGVUo":{"duration":69.59,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=bfBTnUiGVUo","annotations":[{"segment":[0.4631464276311405,63.98317595722616],"label":"Brushing teeth"}]},"v9vc_d-qFnk":{"duration":48.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=v9vc_d-qFnk","annotations":[{"segment":[0.7585179407176288,44.752558502340094],"label":"Using parallel bars"}]},"8klHUzPHtQw":{"duration":179.653,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=8klHUzPHtQw","annotations":[{"segment":[43.98963650546022,91.34153822152886],"label":"BMX"},{"segment":[99.74720124804992,175.97814040561624],"label":"BMX"}]},"FCYdwCXRNVI":{"duration":83.244,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FCYdwCXRNVI","annotations":[{"segment":[0.01,76.74850546021841],"label":"Welding"}]},"6eqIYm3Y-jY":{"duration":158.848,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=6eqIYm3Y-jY","annotations":[]},"pxt1-L_-H74":{"duration":224.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pxt1-L_-H74","annotations":[{"segment":[0.01,168.63087398127925],"label":"Zumba"}]},"_tRAypMWUdc":{"duration":215.584,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_tRAypMWUdc","annotations":[{"segment":[21.48894704049844,120.87532710280374],"label":"Canoeing"}]},"2nDJpvD-eZU":{"duration":70.705,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2nDJpvD-eZU","annotations":[]},"IiG80Vp4WyY":{"duration":202.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IiG80Vp4WyY","annotations":[{"segment":[46.21758825117005,125.20046562],"label":"Playing saxophone"},{"segment":[125.40267119344773,191.0221821575663],"label":"Playing saxophone"}]},"jzl0Ar3H5f8":{"duration":170.481,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jzl0Ar3H5f8","annotations":[{"segment":[18.34915600624025,123.92328549141966],"label":"Doing fencing"}]},"OPp3DqFq0O0":{"duration":238.63,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OPp3DqFq0O0","annotations":[{"segment":[0.01,230.44131227457098],"label":"Playing violin"}]},"I637SOXSbHY":{"duration":57.61,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=I637SOXSbHY","annotations":[{"segment":[19.457102,48.73406368],"label":"Clean and jerk"}]},"4XTJzFjjFp0":{"duration":165.048,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=4XTJzFjjFp0","annotations":[{"segment":[0.01,153.46064252106083],"label":"Assembling bicycle"}]},"WlsVDIIo2dk":{"duration":236.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WlsVDIIo2dk","annotations":[{"segment":[5.529862990437985,228.5676702714367],"label":"Cumbia"}]},"m8SFyH4vhik":{"duration":105.77000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m8SFyH4vhik","annotations":[{"segment":[8.25241809672387,91.76688923556942],"label":"Using the balance beam"}]},"wPYr19iFxhw":{"duration":56.611,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wPYr19iFxhw","annotations":[{"segment":[0.01,56.541],"label":"Welding"}]},"kCOMyIb7f0Q":{"duration":182.44,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kCOMyIb7f0Q","annotations":[{"segment":[49.807889458435476,96.20038078257824],"label":"Skiing"},{"segment":[103.6004100735458,135.7620745928237],"label":"Skiing"}]},"fvslbZDJ3C8":{"duration":34.44,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fvslbZDJ3C8","annotations":[{"segment":[2.3077113884555382,33.32764586583464],"label":"Dodgeball"}]},"sOUNe30PXY4":{"duration":4.598,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=sOUNe30PXY4","annotations":[{"segment":[0.01,4.561],"label":"Doing a powerbomb"}]},"sRol1BJ9EUk":{"duration":13.54,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=sRol1BJ9EUk","annotations":[{"segment":[0,13.537232],"label":"Layup drill in basketball"}]},"8A7nbBMC4eA":{"duration":204.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8A7nbBMC4eA","annotations":[{"segment":[49.40331759558379,174.12644670600264],"label":"Tango"}]},"i7rJN34TruM":{"duration":45.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i7rJN34TruM","annotations":[{"segment":[0.01,45.97551020408163],"label":"Playing racquetball"}]},"yfPa5IKWTvU":{"duration":225.35,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yfPa5IKWTvU","annotations":[{"segment":[23.20289626521061,209.17762542121687],"label":"Cheerleading"}]},"MonWdYi3a0I":{"duration":78.14,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=MonWdYi3a0I","annotations":[]},"BnkUgUQBED0":{"duration":124.088,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BnkUgUQBED0","annotations":[{"segment":[7.356198817472699,124.08746031746031],"label":"Clipping cat claws"}]},"S7MZle1gPzI":{"duration":31.092,"subset":"training","resolution":"1252x720","url":"https://www.youtube.com/watch?v=S7MZle1gPzI","annotations":[{"segment":[0.01,31.091451247165534],"label":"Waterskiing"}]},"WdNtrGxSm1Y":{"duration":109.835,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WdNtrGxSm1Y","annotations":[{"segment":[0.01,109.821],"label":"Disc dog"}]},"6tzQ5-VBgVU":{"duration":159.591,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6tzQ5-VBgVU","annotations":[{"segment":[0.2,154.5243697347894],"label":"Rollerblading"}]},"WttP_X-aCEA":{"duration":23.639,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WttP_X-aCEA","annotations":[{"segment":[0.2,16.891697347893913],"label":"Using the monkey bar"}]},"u1VIetb75rs":{"duration":148.03,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=u1VIetb75rs","annotations":[{"segment":[28.581053159999996,79.40908414999998],"label":"Snatch"}]},"LbVnSPZZWj4":{"duration":68.035,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LbVnSPZZWj4","annotations":[]},"jUmfhYsA5r0":{"duration":66.76,"subset":"validation","resolution":"482x360","url":"https://www.youtube.com/watch?v=jUmfhYsA5r0","annotations":[{"segment":[0.01,56.01984243369734],"label":"Playing guitarra"}]},"aYHErLYOPrs":{"duration":59.33,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=aYHErLYOPrs","annotations":[]},"xhyCKLKaG0c":{"duration":47.485,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xhyCKLKaG0c","annotations":[{"segment":[6.830835626822023,43.57245089161953],"label":"Rock-paper-scissors"}]},"-nzBW4S8UqA":{"duration":164.56,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=-nzBW4S8UqA","annotations":[]},"mc-1msYF8Xo":{"duration":114.34,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mc-1msYF8Xo","annotations":[{"segment":[24.030930299999998,98.10340132605305],"label":"Making a sandwich"}]},"XQaaA2UZYh8":{"duration":64.157,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=XQaaA2UZYh8","annotations":[{"segment":[6.230377722446049,53.55728426450212],"label":"Welding"}]},"dI1RqUHyWYo":{"duration":90.42,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dI1RqUHyWYo","annotations":[{"segment":[10.579387909516381,79.19803038],"label":"Springboard diving"}]},"Ntyl5alZXeM":{"duration":70.68,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=Ntyl5alZXeM","annotations":[]},"dJVWRkiRXHM":{"duration":72.56,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dJVWRkiRXHM","annotations":[{"segment":[0,0],"label":"Playing racquetball"}]},"ApH2nZIq2tU":{"duration":42.098,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ApH2nZIq2tU","annotations":[{"segment":[4.5211684867394695,26.799390015600626],"label":"Drum corps"}]},"8qzR38WgsS8":{"duration":126.735,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8qzR38WgsS8","annotations":[{"segment":[69.48936221554719,120.97425473891167],"label":"BMX"}]},"6YNLwicEaG4":{"duration":147.725,"subset":"testing","resolution":"384x288","url":"https://www.youtube.com/watch?v=6YNLwicEaG4","annotations":[]},"f025sQGYfJM":{"duration":200.064,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=f025sQGYfJM","annotations":[{"segment":[13.399866912497084,189.77951045838896],"label":"Decorating the Christmas tree"}]},"DnHpfQKDNnc":{"duration":72.888,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=DnHpfQKDNnc","annotations":[]},"1hiyhNqakMI":{"duration":78.948,"subset":"validation","resolution":"320x192","url":"https://www.youtube.com/watch?v=1hiyhNqakMI","annotations":[{"segment":[0,48.649607457098284],"label":"Cutting the grass"}]},"pniQHSjY7dc":{"duration":85.38,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=pniQHSjY7dc","annotations":[{"segment":[24.22368618395321,84.2516799534236],"label":"Kneeling"}]},"IdEcXDZ4Cos":{"duration":214.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IdEcXDZ4Cos","annotations":[{"segment":[68.87074150519462,72.88262965703389],"label":"Wakeboarding"},{"segment":[129.71771180809029,139.74743218768847],"label":"Wakeboarding"},{"segment":[163.4844362221041,182.2065808317464],"label":"Wakeboarding"},{"segment":[198.25413335429695,201.59737346316163],"label":"Wakeboarding"}]},"tElXd9QahbU":{"duration":16.951,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tElXd9QahbU","annotations":[]},"P3BkKdkf43I":{"duration":72.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=P3BkKdkf43I","annotations":[{"segment":[11.235672259543444,62.07992652495216],"label":"Playing squash"}]},"J8mSgO4r-kQ":{"duration":190.799,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=J8mSgO4r-kQ","annotations":[{"segment":[10.117307332293294,129.15799375975038],"label":"Ice fishing"}]},"Vh8RINfopdk":{"duration":56.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vh8RINfopdk","annotations":[{"segment":[14.93437887051482,49.07010200312012],"label":"Doing nails"}]},"pt6XC-p_CFE":{"duration":121.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pt6XC-p_CFE","annotations":[{"segment":[28.634985352574102,102.02398754758191],"label":"Making a cake"}]},"fh21bbDSVmA":{"duration":52.32,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=fh21bbDSVmA","annotations":[]},"74EoyLUbtgI":{"duration":171.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=74EoyLUbtgI","annotations":[{"segment":[2.7390901028047843,99.52834711215058],"label":"Polishing shoes"}]},"IAj0JIDDaOQ":{"duration":65.13,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=IAj0JIDDaOQ","annotations":[{"segment":[29.051368174726992,42.43629173166927],"label":"Cleaning windows"}]},"0WVkoTBmhA0":{"duration":153.902,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0WVkoTBmhA0","annotations":[{"segment":[28.615270459148366,125.43745737503622],"label":"Sumo"}]},"9fC5mvyEkzE":{"duration":200.667,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=9fC5mvyEkzE","annotations":[]},"cfaBPxE-A5k":{"duration":95.2,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cfaBPxE-A5k","annotations":[{"segment":[9.950858448517941,86.88436108034323],"label":"Chopping wood"}]},"TET92DxNsJY":{"duration":39.474,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TET92DxNsJY","annotations":[]},"prtLoXM3Oik":{"duration":148.933,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=prtLoXM3Oik","annotations":[]},"VSONGdnvKiM":{"duration":190.776,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VSONGdnvKiM","annotations":[{"segment":[10.51639204208279,15.863709799092137],"label":"Scuba diving"},{"segment":[69.93103378663108,75.57542475236316],"label":"Scuba diving"},{"segment":[82.70518176170896,89.53786556233202],"label":"Scuba diving"},{"segment":[110.03591696420118,115.08616151248778],"label":"Scuba diving"},{"segment":[127.26616307012019,143.62024873994966],"label":"Scuba diving"},{"segment":[154.01890138792473,161.44651042219263],"label":"Scuba diving"}]},"UL_3QfD3ERM":{"duration":90.953,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UL_3QfD3ERM","annotations":[{"segment":[18.293648985959436,31.34028237129485],"label":"Disc dog"},{"segment":[36.30367550702028,46.66616068642747],"label":"Disc dog"},{"segment":[53.190912636505466,77.871496099844],"label":"Disc dog"}]},"r_j3snNLxks":{"duration":51.432,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r_j3snNLxks","annotations":[]},"Z-kXPcLsxBg":{"duration":130.334,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z-kXPcLsxBg","annotations":[{"segment":[17.486255897035882,123.01377695007801],"label":"Slacklining"}]},"SEZvtGus0HU":{"duration":206.054,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SEZvtGus0HU","annotations":[{"segment":[0,13.501190003120126],"label":"Surfing"}]},"Ck-9AHZNkq4":{"duration":61.26,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ck-9AHZNkq4","annotations":[{"segment":[9.74716439625585,52.65379982683307],"label":"Spinning"}]},"hNvTMSL2jOA":{"duration":138.23,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hNvTMSL2jOA","annotations":[]},"OmE9IhdWFa0":{"duration":147.749,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=OmE9IhdWFa0","annotations":[{"segment":[61.446835186915884,70.42221560747663],"label":"Surfing"},{"segment":[74.10442295950155,81.00856174454827],"label":"Surfing"},{"segment":[83.77021725856697,89.98394216510904],"label":"Surfing"},{"segment":[94.58670135514019,109.31553076323988],"label":"Surfing"},{"segment":[112.07718627725858,116.2196695482866],"label":"Surfing"},{"segment":[119.21146302180685,127.26629160436137],"label":"Surfing"},{"segment":[119.21146302180685,127.26629160436137],"label":"Surfing"},{"segment":[131.17863691588784,138.5430516199377],"label":"Surfing"}]},"puGw7gu96AM":{"duration":54.103,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=puGw7gu96AM","annotations":[{"segment":[0,51.54990795631826],"label":"Calf roping"}]},"D-ZEf_qoDIY":{"duration":93.46,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D-ZEf_qoDIY","annotations":[]},"LYTWYSqsTCE":{"duration":7.617,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=LYTWYSqsTCE","annotations":[]},"xF7etZzLFbY":{"duration":47.601,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xF7etZzLFbY","annotations":[{"segment":[0,47.600906],"label":"Cutting the grass"}]},"y0VZOcxbjRM":{"duration":76.139,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y0VZOcxbjRM","annotations":[{"segment":[0.2,76.101],"label":"Shuffleboard"}]},"kNAgK0nC9Ig":{"duration":233.663,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=kNAgK0nC9Ig","annotations":[{"segment":[19.319980331186034,233.66240362811791],"label":"Surfing"}]},"Ed9xVend710":{"duration":237.471,"subset":"training","resolution":"634x480","url":"https://www.youtube.com/watch?v=Ed9xVend710","annotations":[{"segment":[28.89656340539336,221.54031833073324],"label":"Waterskiing"}]},"OCZqTCabREw":{"duration":38.43,"subset":"testing","resolution":"328x240","url":"https://www.youtube.com/watch?v=OCZqTCabREw","annotations":[]},"N3UOyPU2t0E":{"duration":196.975,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N3UOyPU2t0E","annotations":[{"segment":[25.119594991357484,38.90474114935412],"label":"Beer pong"},{"segment":[58.81661893312704,84.54889176138742],"label":"Beer pong"},{"segment":[119.77759860960104,157.763334689414],"label":"Beer pong"}]},"p6ea3QpXryw":{"duration":138.484,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=p6ea3QpXryw","annotations":[{"segment":[13.174572542901716,31.10063026521061],"label":"Throwing darts"},{"segment":[56.36989235569423,85.52673322932918],"label":"Throwing darts"},{"segment":[102.58888455538221,137.7930702028081],"label":"Throwing darts"}]},"Relgb880pRI":{"duration":127.46,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Relgb880pRI","annotations":[{"segment":[6.957464898595944,111.71700780031202],"label":"Preparing salad"}]},"v8XB6emr_MU":{"duration":180.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=v8XB6emr_MU","annotations":[]},"K7f0co9akMI":{"duration":191.078,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=K7f0co9akMI","annotations":[{"segment":[0.20000000000000004,175.54889408099686],"label":"Playing beach volleyball"}]},"UadYaZOC6B8":{"duration":75.26,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UadYaZOC6B8","annotations":[{"segment":[35.439535101404054,56.79713572542902],"label":"Washing hands"}]},"CZGDWYYkO-U":{"duration":232.455,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CZGDWYYkO-U","annotations":[{"segment":[158.4387472698908,221.52419812792513],"label":"Putting in contact lenses"}]},"dpUDMvzlwmI":{"duration":111.735,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dpUDMvzlwmI","annotations":[{"segment":[0.01,110.8773828228383],"label":"Beach soccer"}]},"09Kr5TQ9DHQ":{"duration":122.254,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=09Kr5TQ9DHQ","annotations":[{"segment":[30.507581903276133,94.76417628705148],"label":"Skiing"}]},"YQiTt8b6N60":{"duration":198.531,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YQiTt8b6N60","annotations":[{"segment":[22.915872074882998,26.322285491419656],"label":"Scuba diving"},{"segment":[46.76076599063963,62.244463338533535],"label":"Scuba diving"},{"segment":[145.23708112324493,147.4047987519501],"label":"Scuba diving"},{"segment":[168.4626271450858,174.3464321372855],"label":"Scuba diving"}]},"Y-UOZRZ01hI":{"duration":42.191,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y-UOZRZ01hI","annotations":[{"segment":[2.497032761310453,42.121],"label":"Welding"}]},"7qjiHcYuq2Y":{"duration":23.081,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7qjiHcYuq2Y","annotations":[{"segment":[0.01,20.11091887675507],"label":"Playing beach volleyball"}]},"GSXP6BYeS-o":{"duration":182.765,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GSXP6BYeS-o","annotations":[{"segment":[27.080335413416538,182.721],"label":"Clipping cat claws"}]},"xlr_sSnttZo":{"duration":188.38,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xlr_sSnttZo","annotations":[{"segment":[0.01,166.63563088455538],"label":"Brushing teeth"}]},"liK3LME0gNY":{"duration":236.565,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=liK3LME0gNY","annotations":[{"segment":[0.2,234.71964044276055],"label":"Plastering"}]},"A_Sr0mCX2rI":{"duration":32.137,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=A_Sr0mCX2rI","annotations":[]},"OHOi0ZSuDUg":{"duration":224.166,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=OHOi0ZSuDUg","annotations":[{"segment":[9.791938191813388,217.87062459594384],"label":"Playing congas"}]},"w1qa9NOiFbE":{"duration":142.76,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=w1qa9NOiFbE","annotations":[{"segment":[20.93461513154404,25.834205907011793],"label":"Hand washing clothes"},{"segment":[37.41505683084467,40.53297823341506],"label":"Hand washing clothes"},{"segment":[44.09631697920979,57.013419932715685],"label":"Hand washing clothes"},{"segment":[60.57675867851041,93.3149334054995],"label":"Hand washing clothes"},{"segment":[105.56390997191887,117.8128868673947],"label":"Hand washing clothes"},{"segment":[132.73436781279253,134.96145452106083],"label":"Hand washing clothes"}]},"zKYiTA6WBS4":{"duration":87.167,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zKYiTA6WBS4","annotations":[]},"iJzt1rvGiOc":{"duration":189.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iJzt1rvGiOc","annotations":[{"segment":[27.127787107644306,42.46088416848674],"label":"Cleaning shoes"},{"segment":[65.4605297597504,173.3819436879875],"label":"Cleaning shoes"}]},"h1bdAd1cXSw":{"duration":27.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h1bdAd1cXSw","annotations":[{"segment":[0,15.790150416536662],"label":"Springboard diving"}]},"sNTOUuE7ZfM":{"duration":146.52,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sNTOUuE7ZfM","annotations":[]},"otq24Pdm3sc":{"duration":211.14,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=otq24Pdm3sc","annotations":[{"segment":[0.01,211.101],"label":"Tug of war"}]},"tHhdS8t9ytI":{"duration":106.21000000000001,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=tHhdS8t9ytI","annotations":[{"segment":[28.48625897035881,58.95993135725429],"label":"Drinking coffee"}]},"5bZ2FdbAKiQ":{"duration":110.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5bZ2FdbAKiQ","annotations":[{"segment":[0,110.2018820861678],"label":"Using the balance beam"}]},"gik__XL3xL4":{"duration":139.436,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gik__XL3xL4","annotations":[]},"Ni7Lqloy5Qc":{"duration":217.94,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Ni7Lqloy5Qc","annotations":[{"segment":[122.4014008723614,205.36235035251747],"label":"Getting a tattoo"}]},"1o-Fx2dGfpc":{"duration":77.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1o-Fx2dGfpc","annotations":[{"segment":[28.21257028393136,54.35784015912636],"label":"Playing violin"},{"segment":[59.34368232137285,71.13945524180969],"label":"Playing violin"}]},"ynpvos7UFZo":{"duration":200.621,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ynpvos7UFZo","annotations":[{"segment":[2.8080566223384316,9.672206670123227],"label":"Knitting"},{"segment":[31.512684094893032,38.06482732232398],"label":"Knitting"},{"segment":[48.36105239400117,63.96139341169389],"label":"Knitting"},{"segment":[77.37768668690963,81.43377535150974],"label":"Knitting"},{"segment":[87.98591857894068,98.90615729132557],"label":"Knitting"},{"segment":[107.6423482612335,108.57836872229505],"label":"Knitting"},{"segment":[133.5389143506034,149.13925536829612],"label":"Knitting"},{"segment":[159.74748607585676,166.92364289078674],"label":"Knitting"}]},"DvYEsBlxfhM":{"duration":223.66,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DvYEsBlxfhM","annotations":[]},"06xJ8-Dg_j8":{"duration":124.181,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=06xJ8-Dg_j8","annotations":[{"segment":[0,116.30053198127925],"label":"Playing drums"}]},"WXkuikumRDg":{"duration":65.295,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=WXkuikumRDg","annotations":[]},"rlhs3UzfcVE":{"duration":184.32,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=rlhs3UzfcVE","annotations":[]},"cip9fHzktRE":{"duration":14.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=cip9fHzktRE","annotations":[]},"-Cefl3S_L-w":{"duration":82.919,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-Cefl3S_L-w","annotations":[{"segment":[0.01,82.861],"label":"Tug of war"}]},"MtmQjudesdM":{"duration":26.12,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MtmQjudesdM","annotations":[{"segment":[6.505085803432138,10.245510140405615],"label":"High jump"}]},"qVqlImNflY8":{"duration":97.59,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=qVqlImNflY8","annotations":[{"segment":[13.185790230000002,91.23124222620905],"label":"Bathing dog"}]},"S_CnkpTkjYQ":{"duration":160.265,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=S_CnkpTkjYQ","annotations":[{"segment":[12.2493120124805,62.74647581903277],"label":"Playing congas"},{"segment":[66.49626521060843,83.99528237129486],"label":"Playing congas"},{"segment":[120.49323244929798,160.241],"label":"Playing congas"}]},"le7JdKQB65I":{"duration":143.82999999999998,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=le7JdKQB65I","annotations":[]},"lGPUCwHjiK0":{"duration":179.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lGPUCwHjiK0","annotations":[{"segment":[18.20103933409037,147.00839344773792],"label":"High jump"}]},"G8OyFOhVGCI":{"duration":143.5,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=G8OyFOhVGCI","annotations":[{"segment":[55.519158627569595,67.16027253335031],"label":"Getting a piercing"},{"segment":[113.94859442433697,123.35103247113884],"label":"Getting a piercing"}]},"aO_1pB_MXUw":{"duration":69.962,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aO_1pB_MXUw","annotations":[]},"Z86tpjRaiK8":{"duration":231.526,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z86tpjRaiK8","annotations":[{"segment":[0.01,204.79771625897035],"label":"Doing fencing"}]},"0HhNhRExwSQ":{"duration":133.86,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0HhNhRExwSQ","annotations":[{"segment":[55.793611000640986,78.69397502863475],"label":"Rock climbing"}]},"G4-qjGC_3VA":{"duration":84.8,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=G4-qjGC_3VA","annotations":[]},"f6wAW0Jv2Eo":{"duration":223.56,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f6wAW0Jv2Eo","annotations":[{"segment":[57.03573790951638,187.15396567862715],"label":"Kayaking"}]},"i4SvqrGYH-Q":{"duration":172.084,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i4SvqrGYH-Q","annotations":[{"segment":[4.2947644305772235,23.621204368174727],"label":"Beer pong"},{"segment":[51.2687503900156,66.30042589703588],"label":"Beer pong"},{"segment":[71.40045865834634,82.13736973478939],"label":"Beer pong"},{"segment":[106.83226521060843,119.98498127925116],"label":"Beer pong"},{"segment":[134.74823400936037,145.21672230889234],"label":"Beer pong"}]},"GG9FU9MRkeA":{"duration":120.28,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=GG9FU9MRkeA","annotations":[]},"eJvOgEahkys":{"duration":120.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eJvOgEahkys","annotations":[{"segment":[6.197009880395216,46.00810365747963],"label":"Ballet"},{"segment":[57.650970705494885,116.42866929797191],"label":"Ballet"}]},"LCt-h6opmyE":{"duration":57.563,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LCt-h6opmyE","annotations":[]},"jjuitTS2cuM":{"duration":36.06,"subset":"training","resolution":"482x360","url":"https://www.youtube.com/watch?v=jjuitTS2cuM","annotations":[{"segment":[0.688591797623679,35.69965721818443],"label":"Walking the dog"}]},"QbXjUvFrvTA":{"duration":119.003,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=QbXjUvFrvTA","annotations":[]},"mi6wetIKq2w":{"duration":139.994,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mi6wetIKq2w","annotations":[{"segment":[22.268302652106083,36.240570982839316],"label":"Baking cookies"},{"segment":[38.64205460218408,72.04450858034322],"label":"Baking cookies"},{"segment":[74.6643088923557,88.21254914196568],"label":"Baking cookies"},{"segment":[92.79785491419658,103.06020592823714],"label":"Baking cookies"},{"segment":[103.71524960998441,117.68951482059283],"label":"Baking cookies"}]},"AjqnGUbsduw":{"duration":148.144,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AjqnGUbsduw","annotations":[]},"81F42Yyw_iY":{"duration":66.67,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=81F42Yyw_iY","annotations":[{"segment":[4.472005476137412,5.824007131713841],"label":"Pole vault"},{"segment":[11.23201375401955,54.704066987169284],"label":"Pole vault"}]},"CD8FFj8AJE0":{"duration":119.05,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=CD8FFj8AJE0","annotations":[{"segment":[79.11778187644306,84.88955736],"label":"Getting a tattoo"}]},"9IvKkq9k81o":{"duration":49.436,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9IvKkq9k81o","annotations":[{"segment":[5.398548894336726,35.16768891107644],"label":"Carving jack-o-lanterns"}]},"krs42Q-HhQY":{"duration":167.649,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=krs42Q-HhQY","annotations":[]},"aZQWCEejbk8":{"duration":21.85,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aZQWCEejbk8","annotations":[{"segment":[10.228382063134461,21.65],"label":"Fun sliding down"}]},"cVuHOF56B64":{"duration":40.334,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cVuHOF56B64","annotations":[{"segment":[14.286187967824501,34.766826693681196],"label":"Beer pong"}]},"8WFB_LUOx4k":{"duration":153.716,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8WFB_LUOx4k","annotations":[{"segment":[15.701709048361934,16.660592043681746],"label":"Croquet"},{"segment":[72.27580577223088,74.19357176287052],"label":"Croquet"},{"segment":[78.74826599063962,81.86463572542903],"label":"Croquet"},{"segment":[88.81653744149767,92.66412870514822],"label":"Croquet"}]},"lgB0Ynn38-k":{"duration":107.091,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lgB0Ynn38-k","annotations":[{"segment":[5.403078759131031,39.255700254458134],"label":"Beer pong"},{"segment":[58.099770347916085,68.60575632922449],"label":"Beer pong"},{"segment":[73.27508343202823,97.2887656750189],"label":"Beer pong"}]},"OnfvTk_DECY":{"duration":218.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OnfvTk_DECY","annotations":[{"segment":[38.60916416,198.44934867394693],"label":"Getting a haircut"}]},"CQ4dPckD_Xc":{"duration":105.164,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CQ4dPckD_Xc","annotations":[{"segment":[4.92183336973479,10.828033413416538],"label":"Playing ten pins"},{"segment":[33.46846691419657,49.218333697347894],"label":"Playing ten pins"},{"segment":[65.78850604212168,73.66343943369735],"label":"Playing ten pins"},{"segment":[92.03828401404056,103.35850076443059],"label":"Playing ten pins"}]},"UeuZeIASSPw":{"duration":129.03,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=UeuZeIASSPw","annotations":[{"segment":[10.064999062547537,15.902698518825106],"label":"Triple jump"},{"segment":[27.779397412631198,33.41579688765783],"label":"Triple jump"},{"segment":[42.272996062699654,53.34449503150194],"label":"Triple jump"},{"segment":[61.59779426279092,67.4354937190685],"label":"Triple jump"},{"segment":[78.30569270661982,89.37719161872076],"label":"Triple jump"},{"segment":[101.05259052386896,108.50068982542902],"label":"Triple jump"}]},"jsxrJJkUl2E":{"duration":223.864,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jsxrJJkUl2E","annotations":[{"segment":[76.46926521060843,94.27717628705147],"label":"Rock-paper-scissors"}]},"9Tip00S7cFY":{"duration":60.81,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=9Tip00S7cFY","annotations":[{"segment":[25.498346333853355,41.51775351014041],"label":"Playing racquetball"}]},"GcE-0A4Titg":{"duration":119.096,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=GcE-0A4Titg","annotations":[{"segment":[8.70517658721237,10.742559003112891],"label":"Welding"},{"segment":[17.22513941734183,31.857249495172855],"label":"Welding"},{"segment":[51.30499073785967,63.158852066735435],"label":"Welding"},{"segment":[67.23361689853648,84.273541823897],"label":"Welding"},{"segment":[92.79350457972905,96.31261963105098],"label":"Welding"}]},"BwwjQZDn3NE":{"duration":65.968,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BwwjQZDn3NE","annotations":[{"segment":[0.8233122141212179,55.676487503900155],"label":"Hand car wash"}]},"T3dcZ5GT1qQ":{"duration":95.062,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T3dcZ5GT1qQ","annotations":[]},"C6YSkEP3t9U":{"duration":159.452,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C6YSkEP3t9U","annotations":[]},"nXr1r26HZis":{"duration":47.276,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nXr1r26HZis","annotations":[{"segment":[0,41.492485179407176],"label":"Table soccer"}]},"tETVZtWqdDs":{"duration":93.159,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tETVZtWqdDs","annotations":[{"segment":[35.38851850672666,39.893832977808906],"label":"Applying sunscreen"}]},"cA2ZLOZcCyc":{"duration":20.23,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=cA2ZLOZcCyc","annotations":[{"segment":[0.010000000000000002,18.48924070826833],"label":"Doing motocross"}]},"85DumZo8-ik":{"duration":18.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=85DumZo8-ik","annotations":[{"segment":[5.821180967238689,12.144697347893915],"label":"Triple jump"}]},"TbFqsEFrCB4":{"duration":197.858,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TbFqsEFrCB4","annotations":[{"segment":[120.68950764289076,129.94957216792073],"label":"Swimming"},{"segment":[162.35979800552568,167.2984990702028],"label":"Swimming"}]},"zxm4KYn7r8c":{"duration":195.05,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=zxm4KYn7r8c","annotations":[{"segment":[20.086446177847115,25.564567862714508],"label":"Archery"}]},"mdv8UW_871g":{"duration":123.314,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mdv8UW_871g","annotations":[{"segment":[0.01,113.79339655478645],"label":"Wakeboarding"}]},"oeSyeZqlC1g":{"duration":81.46000000000001,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=oeSyeZqlC1g","annotations":[{"segment":[12.707567862714509,79.04107210608424],"label":"Kayaking"}]},"GhvNhbL2ivg":{"duration":150.652,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=GhvNhbL2ivg","annotations":[{"segment":[0.01,149.49552466826566],"label":"Playing congas"}]},"SMLSQG3tsro":{"duration":103.654,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=SMLSQG3tsro","annotations":[]},"K1G84xfLr9s":{"duration":126.97,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=K1G84xfLr9s","annotations":[{"segment":[7.328811711388456,124.05413577862917],"label":"Doing karate"}]},"RHtpBRwZ9hM":{"duration":138.531,"subset":"validation","resolution":"852x476","url":"https://www.youtube.com/watch?v=RHtpBRwZ9hM","annotations":[{"segment":[20.955783151326052,46.664424336973475],"label":"Curling"},{"segment":[74.96553354134164,81.67453666146646],"label":"Curling"}]},"Y03qa1Qlnbc":{"duration":52.5,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=Y03qa1Qlnbc","annotations":[]},"B8WIh6PUjE0":{"duration":107.718,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B8WIh6PUjE0","annotations":[{"segment":[28.720797191887677,94.07388455538222],"label":"Spread mulch"}]},"PJgy8J1f3jg":{"duration":186.02,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PJgy8J1f3jg","annotations":[{"segment":[0.01,160.76810277191888],"label":"Paintball"}]},"bTC_MdhURLg":{"duration":68.337,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bTC_MdhURLg","annotations":[{"segment":[13.954463338533541,56.047310452418095],"label":"Playing blackjack"}]},"8tCRp3NKtkI":{"duration":89.05,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=8tCRp3NKtkI","annotations":[]},"y1jyeWW-G4E":{"duration":104.56,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=y1jyeWW-G4E","annotations":[{"segment":[13.538899890689507,18.4324781644327],"label":"Building sandcastles"},{"segment":[49.42514056480626,55.623672096723865],"label":"Building sandcastles"}]},"hUzsmIWojH0":{"duration":117.38,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hUzsmIWojH0","annotations":[{"segment":[9.885285491419657,112.94853978159126],"label":"High jump"}]},"RW-nnJiVPsU":{"duration":161.054,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=RW-nnJiVPsU","annotations":[{"segment":[24.62286341029641,49.99948794539782],"label":"Welding"},{"segment":[90.20008126833073,121.8580485101404],"label":"Welding"}]},"k1WnO7UeBJ8":{"duration":82.083,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k1WnO7UeBJ8","annotations":[{"segment":[15.878681623455416,67.3563416349454],"label":"Installing carpet"}]},"GZ30mkTroI8":{"duration":119.33,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GZ30mkTroI8","annotations":[]},"4mRkWNLE66E":{"duration":195.19,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=4mRkWNLE66E","annotations":[]},"Bkhr72sazvQ":{"duration":212.74,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Bkhr72sazvQ","annotations":[]},"MkL-tApJgXc":{"duration":67.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MkL-tApJgXc","annotations":[{"segment":[13.17417472698908,20.493160686427455],"label":"Drinking coffee"}]},"crSMHcUkPcA":{"duration":90.326,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=crSMHcUkPcA","annotations":[{"segment":[14.514105202808112,77.5025035101404],"label":"Hanging wallpaper"}]},"FhGOGbL-A84":{"duration":47.95,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=FhGOGbL-A84","annotations":[{"segment":[0.2,47.74920634920635],"label":"Blowing leaves"}]},"sVw0kD_9vxA":{"duration":138.72,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sVw0kD_9vxA","annotations":[]},"sPSfixKrDc0":{"duration":130.775,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sPSfixKrDc0","annotations":[{"segment":[28.721564837773116,95.33115022429907],"label":"Making a cake"}]},"KqmxWapIzSY":{"duration":237.82,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=KqmxWapIzSY","annotations":[]},"ySvPn8Yb7Xw":{"duration":24.92,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ySvPn8Yb7Xw","annotations":[]},"d28kk1Lzusk":{"duration":28.7,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=d28kk1Lzusk","annotations":[]},"NpBZn7OHUKo":{"duration":119.839,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NpBZn7OHUKo","annotations":[{"segment":[30.65111388455538,62.42360998439938],"label":"Kneeling"}]},"Koxtbgzexmw":{"duration":12.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Koxtbgzexmw","annotations":[{"segment":[0.01,12.724534],"label":"Ping-pong"}]},"xC5RVs9mXyM":{"duration":57.331,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xC5RVs9mXyM","annotations":[{"segment":[8.31777883151326,57.330068027210885],"label":"Drum corps"}]},"2RfasAe0-tA":{"duration":85.75,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=2RfasAe0-tA","annotations":[{"segment":[7.625311959417152,82.00554791443358],"label":"Cumbia"}]},"gLfIPN_WM48":{"duration":192.192,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gLfIPN_WM48","annotations":[{"segment":[14.48919664620064,158.48306786738445],"label":"Baking cookies"}]},"OUpTPRtEITY":{"duration":175.82,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=OUpTPRtEITY","annotations":[{"segment":[13.988887067082683,18.37755751950078],"label":"Playing badminton"}]},"H33jSILKmfI":{"duration":15.05,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=H33jSILKmfI","annotations":[]},"rMes9poepAI":{"duration":219.36,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=rMes9poepAI","annotations":[{"segment":[5.817631039935475,14.715184395130906],"label":"Vacuuming floor"},{"segment":[104.3751451282541,110.87720334935847],"label":"Vacuuming floor"}]},"K8XNOs0AwaE":{"duration":139.3,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K8XNOs0AwaE","annotations":[{"segment":[0,35.421588741029645],"label":"Doing motocross"}]},"-5xWaBSwnjY":{"duration":143.337,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-5xWaBSwnjY","annotations":[{"segment":[16.99468842971406,143.336779],"label":"Disc dog"}]},"2VoWT4gnQDg":{"duration":130.751,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=2VoWT4gnQDg","annotations":[{"segment":[53.22649141965679,87.89508736349454],"label":"Roof shingle removal"}]},"35sykahTJV0":{"duration":56.262,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=35sykahTJV0","annotations":[]},"TJ98vfFChfI":{"duration":126.76,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=TJ98vfFChfI","annotations":[{"segment":[80.05666249999999,104.64086175],"label":"Chopping wood"},{"segment":[2.9662495553822157,78.11123829173167],"label":"Chopping wood"}]},"sYKtWOfH6VQ":{"duration":115.72999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sYKtWOfH6VQ","annotations":[{"segment":[36.2749416,90.49119504],"label":"Washing hands"},{"segment":[16.332355694227772,18.678439937597503],"label":"Washing hands"},{"segment":[20.844056162246492,23.009672386895478],"label":"Washing hands"},{"segment":[28.965117004680188,33.83775351014041],"label":"Washing hands"},{"segment":[35.46196567862715,38.71039001560063],"label":"Washing hands"},{"segment":[43.76349453978159,46.83145085803432],"label":"Washing hands"},{"segment":[52.245491419656794,54.591575663026525],"label":"Washing hands"},{"segment":[61.44936037441498,64.15638065522622],"label":"Washing hands"},{"segment":[67.76574102964119,70.29229329173168],"label":"Washing hands"},{"segment":[72.63837753510141,77.15007800312013],"label":"Washing hands"},{"segment":[78.41335413416537,81.30084243369735],"label":"Washing hands"}]},"qRT0Z6Sz_Ck":{"duration":218.709,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=qRT0Z6Sz_Ck","annotations":[{"segment":[22.17795465560119,218.70875283446713],"label":"Rollerblading"}]},"isGfZVCL4gE":{"duration":139.23,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=isGfZVCL4gE","annotations":[{"segment":[12.577223088923557,33.88360618408736],"label":"Ironing clothes"}]},"ej_fSoVO0R8":{"duration":229.716,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ej_fSoVO0R8","annotations":[]},"_Boy9qJLibI":{"duration":171.11,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_Boy9qJLibI","annotations":[{"segment":[32.55382527301092,53.10009204368175],"label":"Paintball"},{"segment":[79.7835553822153,133.68415132605304],"label":"Paintball"}]},"nPlMYHjlonk":{"duration":55.984,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nPlMYHjlonk","annotations":[]},"s2VpBgSWIPg":{"duration":203.29,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=s2VpBgSWIPg","annotations":[{"segment":[10.465824020715932,15.22301675740499],"label":"Hopscotch"},{"segment":[29.177448785026233,49.474804461566215],"label":"Hopscotch"},{"segment":[89.75236963220026,140.81290500599616],"label":"Hopscotch"}]},"IbYlWVA8Dh8":{"duration":86.193,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IbYlWVA8Dh8","annotations":[{"segment":[31.098845124373582,44.23542635274262],"label":"Assembling bicycle"}]},"amgpZtvd9h0":{"duration":222.68,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=amgpZtvd9h0","annotations":[]},"PLnfT1PoVHw":{"duration":112.69,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=PLnfT1PoVHw","annotations":[{"segment":[37.26914954134165,47.4654263026521],"label":"Shaving legs"},{"segment":[73.30771395631825,91.59069297659907],"label":"Shaving legs"}]},"D84XQFQ4tsU":{"duration":76.07,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=D84XQFQ4tsU","annotations":[{"segment":[10.2729834,28.24328772],"label":"Hand washing clothes"},{"segment":[54.47031767550701,65.12257548],"label":"Hand washing clothes"}]},"tQmsDeu1d6M":{"duration":131.913,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=tQmsDeu1d6M","annotations":[{"segment":[86.46959657320872,128.38882398753896],"label":"Building sandcastles"}]},"kBTqTp0SKq8":{"duration":39.521,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kBTqTp0SKq8","annotations":[]},"TGvY7GtyTK4":{"duration":106.301,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TGvY7GtyTK4","annotations":[{"segment":[0,10.28183925429017],"label":"Kneeling"},{"segment":[14.261906062402495,62.68605222776911],"label":"Kneeling"},{"segment":[72.8020553650546,73.96290818408735],"label":"Kneeling"},{"segment":[78.44048334321373,86.2347808424337],"label":"Kneeling"}]},"wmCcT0gcUxw":{"duration":170.06,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wmCcT0gcUxw","annotations":[]},"4WrU5OdkvY0":{"duration":81.131,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4WrU5OdkvY0","annotations":[{"segment":[0.01,75.00941185647426],"label":"Doing fencing"}]},"VOxaxr5LPiQ":{"duration":226.604,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VOxaxr5LPiQ","annotations":[{"segment":[173.54360530421218,189.44882371294852],"label":"Hula hoop"}]},"zSRhiHJP0QY":{"duration":110.017,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zSRhiHJP0QY","annotations":[]},"KMBeFIRSzMY":{"duration":180.211,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KMBeFIRSzMY","annotations":[]},"6vylz7u_tHw":{"duration":123.252,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6vylz7u_tHw","annotations":[{"segment":[98.59924024960998,120.70238377535101],"label":"Camel ride"}]},"W74AfHdwWhM":{"duration":107.764,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=W74AfHdwWhM","annotations":[{"segment":[32.345899834758946,35.195020770847215],"label":"Having an ice cream"},{"segment":[65.52977897272828,71.22801983772129],"label":"Having an ice cream"}]},"UCndppz9rWc":{"duration":11.52,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=UCndppz9rWc","annotations":[{"segment":[0.5390152886115445,11.445091294851794],"label":"Mowing the lawn"}]},"dAdCETrsDQw":{"duration":207.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dAdCETrsDQw","annotations":[{"segment":[82.61792161482377,99.78948963672832],"label":"Playing polo"},{"segment":[133.16065013212773,143.20439746569454],"label":"Playing polo"}]},"Gl3okOVEwgg":{"duration":129.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Gl3okOVEwgg","annotations":[{"segment":[30.53003276131045,128.18570046801872],"label":"Rock climbing"}]},"VvD2fdPNWEg":{"duration":188.29,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=VvD2fdPNWEg","annotations":[{"segment":[10.638385,173.4129300101404],"label":"Triple jump"}]},"Ct9HYiWaXt0":{"duration":38.69,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Ct9HYiWaXt0","annotations":[]},"lFlQ_xWVt8M":{"duration":126.015,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=lFlQ_xWVt8M","annotations":[{"segment":[58.205850079577594,82.70326298782021],"label":"Welding"}]},"AIJpVd5JLdY":{"duration":215.41,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=AIJpVd5JLdY","annotations":[]},"FNLPN4yftWc":{"duration":191.936,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FNLPN4yftWc","annotations":[]},"oA_uJ9gLvUQ":{"duration":70.705,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=oA_uJ9gLvUQ","annotations":[{"segment":[24.258689547581902,40.02683775351014],"label":"Sharpening knives"}]},"VEDRmPt_-Ms":{"duration":232.07999999999998,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=VEDRmPt_-Ms","annotations":[{"segment":[15.568780241809671,21.723879407176288],"label":"Tumbling"},{"segment":[21.723879407176288,27.878978572542902],"label":"Tumbling"},{"segment":[29.32723719968799,34.75820705148206],"label":"Tumbling"},{"segment":[34.75820705148206,39.82711224648986],"label":"Tumbling"},{"segment":[40.189176903276135,49.24079332293292],"label":"Tumbling"},{"segment":[49.60285797971919,55.757957145085804],"label":"Tumbling"},{"segment":[57.56828042901716,64.80957356474259],"label":"Tumbling"},{"segment":[65.17163822152887,77.4818365522621],"label":"Tumbling"},{"segment":[77.4818365522621,85.80932365834634],"label":"Tumbling"},{"segment":[85.44725900156007,93.41268145085805],"label":"Tumbling"},{"segment":[93.05061679407176,98.8436513026521],"label":"Tumbling"},{"segment":[93.41268145085805,108.98146169266771],"label":"Tumbling"},{"segment":[108.98146169266771,118.75720742589704],"label":"Tumbling"},{"segment":[130.70534109984402,136.86044026521063],"label":"Tumbling"},{"segment":[137.94663423556943,149.17063859594387],"label":"Tumbling"},{"segment":[149.53270325273013,158.94638432917318],"label":"Tumbling"},{"segment":[159.30844898595944,166.5497421216849],"label":"Tumbling"},{"segment":[166.5497421216849,177.7737464820593],"label":"Tumbling"},{"segment":[177.41168182527304,190.80807412636506],"label":"Tumbling"},{"segment":[190.4460094695788,200.94588451638066],"label":"Tumbling"}]},"4-8qvijRPCM":{"duration":129.96,"subset":"testing","resolution":"1920x1440","url":"https://www.youtube.com/watch?v=4-8qvijRPCM","annotations":[]},"Dys_KC7uIzs":{"duration":234.267,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Dys_KC7uIzs","annotations":[{"segment":[13.522381483014422,74.55583304148492],"label":"Playing ten pins"},{"segment":[89.17462383393294,223.30202797347894],"label":"Playing ten pins"}]},"MMVfzKCnpnI":{"duration":83.151,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=MMVfzKCnpnI","annotations":[{"segment":[10.50734009204368,82.95068027210884],"label":"Baton twirling"}]},"ayDqRzRN8_M":{"duration":124.924,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ayDqRzRN8_M","annotations":[{"segment":[28.20975770419614,34.62106640808205],"label":"Curling"},{"segment":[42.97519593132733,50.35791504489292],"label":"Curling"},{"segment":[67.84330204722256,81.05448354644496],"label":"Curling"}]},"MWnYL4JiMP0":{"duration":208.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MWnYL4JiMP0","annotations":[{"segment":[12.839143224648986,204.93872919344773],"label":"Zumba"}]},"of4bEaqQzOk":{"duration":199.14,"subset":"training","resolution":"658x480","url":"https://www.youtube.com/watch?v=of4bEaqQzOk","annotations":[{"segment":[8.54581752,142.01175849453978],"label":"Snatch"}]},"n2sTs3k9G-U":{"duration":133.585,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n2sTs3k9G-U","annotations":[{"segment":[11.319301331828061,131.3787992227938],"label":"Assembling bicycle"}]},"gjz9pSK0Y9I":{"duration":124.39,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gjz9pSK0Y9I","annotations":[{"segment":[0.01,124.321],"label":"Putting on shoes"}]},"O2Vd29Slt7g":{"duration":70.589,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O2Vd29Slt7g","annotations":[{"segment":[5.497557590014199,57.284548906542064],"label":"Capoeira"}]},"--veKG73Di4":{"duration":115.45,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=--veKG73Di4","annotations":[{"segment":[33.50020015494497,109.86624771294852],"label":"Playing congas"}]},"kK9k01CtXnE":{"duration":80.411,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kK9k01CtXnE","annotations":[{"segment":[6.127717020265024,73.157458768321],"label":"Fixing the roof"}]},"q0L4clHNIbc":{"duration":126.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q0L4clHNIbc","annotations":[{"segment":[9.581965678627146,105.32080192],"label":"Shoveling snow"}]},"Nogr8GCIL9k":{"duration":118.376,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Nogr8GCIL9k","annotations":[{"segment":[30.45704368174727,72.92464118564742],"label":"Wakeboarding"}]},"Bm6HrokioXU":{"duration":105.97999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Bm6HrokioXU","annotations":[{"segment":[15.872199687987521,59.19007800312013],"label":"Dodgeball"}]},"kmtuO9Xdp-Y":{"duration":207.12,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=kmtuO9Xdp-Y","annotations":[{"segment":[13.245430577223088,206.4548190327613],"label":"Playing guitarra"}]},"FAPMunnTNsE":{"duration":92.973,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=FAPMunnTNsE","annotations":[{"segment":[0.01,92.9726984126984],"label":"Canoeing"}]},"vt81bZ6_GcQ":{"duration":215.92000000000002,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=vt81bZ6_GcQ","annotations":[{"segment":[11.61928783151326,196.51752028081123],"label":"Playing water polo"}]},"gUR1wXosHMo":{"duration":50.41,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=gUR1wXosHMo","annotations":[{"segment":[2.908396255850234,9.039609984399377],"label":"Drinking beer"},{"segment":[0.2,50.186],"label":"Drinking beer"},{"segment":[0.2,50.186],"label":"Drinking beer"},{"segment":[0.2,50.186],"label":"Drinking beer"},{"segment":[39.302652106084246,43.94036505460218],"label":"Drinking beer"}]},"HXCLDIk4pgI":{"duration":119.82,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HXCLDIk4pgI","annotations":[{"segment":[6.915996476593758,118.5065342205525],"label":"Shoveling snow"}]},"GA8XzZTETPE":{"duration":158.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GA8XzZTETPE","annotations":[{"segment":[5.926630795844078,71.61345461778471],"label":"Ballet"},{"segment":[76.05842766302652,156.31488542433698],"label":"Ballet"}]},"arhhcCWkAoM":{"duration":76.28,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=arhhcCWkAoM","annotations":[{"segment":[0,3.331937696555481],"label":"Hammer throw"},{"segment":[6.187884293603037,7.020868717741907],"label":"Hammer throw"},{"segment":[9.162828665527574,11.423786388190223],"label":"Hammer throw"},{"segment":[23.79955444617785,26.298507663026523],"label":"Hammer throw"},{"segment":[64.49679254914197,74.01661432761311],"label":"Hammer throw"}]},"DEVD2oszH48":{"duration":86.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DEVD2oszH48","annotations":[{"segment":[20.965227957098286,72.8015853913068],"label":"Tumbling"}]},"MAT310OaVLI":{"duration":128.082,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MAT310OaVLI","annotations":[]},"Woelo4XJkHE":{"duration":75.93,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=Woelo4XJkHE","annotations":[]},"vrwJEvpeHyM":{"duration":134.75,"subset":"validation","resolution":"636x480","url":"https://www.youtube.com/watch?v=vrwJEvpeHyM","annotations":[{"segment":[6.306336824901567,29.009149394547208],"label":"Discus throw"},{"segment":[33.21337394448159,58.018298789094416],"label":"Discus throw"},{"segment":[62.64294579402224,72.52287348636803],"label":"Discus throw"},{"segment":[74.41477407800312,85.34575784087365],"label":"Discus throw"},{"segment":[87.44787010296412,103.0035008424337],"label":"Discus throw"},{"segment":[117.71828667706708,133.69433986895476],"label":"Discus throw"}]},"8rypSctbfjU":{"duration":226.57999999999998,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8rypSctbfjU","annotations":[{"segment":[18.91588065522621,209.13539079563185],"label":"Painting"}]},"-DTxZliHCTE":{"duration":46.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-DTxZliHCTE","annotations":[{"segment":[0,2.1952093759750393],"label":"Mowing the lawn"},{"segment":[8.634490212168487,20.781315425897038],"label":"Mowing the lawn"},{"segment":[24.879039594383777,28.24502730421217],"label":"Mowing the lawn"},{"segment":[38.269816787831516,46.704307],"label":"Mowing the lawn"}]},"TW1qn3wVAtw":{"duration":221.31,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TW1qn3wVAtw","annotations":[]},"6Lh3yNFvskc":{"duration":54.43,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=6Lh3yNFvskc","annotations":[{"segment":[5.7739077940717625,49.163126658346336],"label":"Layup drill in basketball"}]},"i9DEbIo8HGs":{"duration":69.5,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i9DEbIo8HGs","annotations":[]},"3Wjig_NRlnU":{"duration":110.97,"subset":"training","resolution":"1280x544","url":"https://www.youtube.com/watch?v=3Wjig_NRlnU","annotations":[{"segment":[27.35252698271196,29.42993375975039],"label":"Tai chi"},{"segment":[57.821163975039,59.03298477691108],"label":"Tai chi"}]},"rG-WlnAoc3M":{"duration":129.823,"subset":"training","resolution":"1444x1080","url":"https://www.youtube.com/watch?v=rG-WlnAoc3M","annotations":[{"segment":[0.2,104.10781152647976],"label":"Hand car wash"}]},"AZaSkc1gnhY":{"duration":189.87,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=AZaSkc1gnhY","annotations":[{"segment":[16.29146059282371,20.438377834633382],"label":"Drinking coffee"},{"segment":[75.53313547581902,90.04734582215288],"label":"Drinking coffee"},{"segment":[99.52601380343214,101.007055675507],"label":"Drinking coffee"}]},"c7HroaL0WDc":{"duration":143.453,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c7HroaL0WDc","annotations":[{"segment":[12.716659347966766,13.609056734085549],"label":"Wakeboarding"},{"segment":[82.32365546523172,83.88535089093959],"label":"Wakeboarding"},{"segment":[89.68593390071166,93.70172213824618],"label":"Wakeboarding"},{"segment":[105.97218485393903,114.0037612271894],"label":"Wakeboarding"}]},"9gU5be5YCVw":{"duration":58.65,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9gU5be5YCVw","annotations":[{"segment":[6.171528790951637,52.617997321372854],"label":"Using parallel bars"}]},"hoisfXCLJDQ":{"duration":15.186,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hoisfXCLJDQ","annotations":[{"segment":[0,9.626407176287051],"label":"River tubing"}]},"Yu18MvEn-To":{"duration":82.501,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Yu18MvEn-To","annotations":[{"segment":[0.12830363630220643,79.42116078404715],"label":"Knitting"}]},"sOL5ksIFX50":{"duration":53.824,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sOL5ksIFX50","annotations":[{"segment":[12.763222552262091,46.1827131825273],"label":"Curling"}]},"MOQQCJBDEyA":{"duration":52.663,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MOQQCJBDEyA","annotations":[]},"T3mcQnlsmq4":{"duration":159.847,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=T3mcQnlsmq4","annotations":[]},"yQ2AirKmnTM":{"duration":39.52,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=yQ2AirKmnTM","annotations":[{"segment":[0.01,38.84880289762265],"label":"Riding bumper cars"}]},"VcQHv5PHb-M":{"duration":160.868,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VcQHv5PHb-M","annotations":[{"segment":[0.01,12.866092905311941],"label":"Throwing darts"},{"segment":[17.87235458755493,22.628303185685773],"label":"Throwing darts"},{"segment":[45.65710692400353,52.41556019503157],"label":"Throwing darts"},{"segment":[59.92495271839605,60.93378760940932],"label":"Throwing darts"},{"segment":[84.96683433838128,89.47303060006352],"label":"Throwing darts"},{"segment":[98.485423123428,102.99161938511025],"label":"Throwing darts"}]},"VlLq4bAHCXI":{"duration":44.56,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VlLq4bAHCXI","annotations":[{"segment":[10.557241809672387,38.82564586583463],"label":"Running a marathon"}]},"8zq6C0SRyDQ":{"duration":144.405,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=8zq6C0SRyDQ","annotations":[{"segment":[15.799323293858228,52.89788460793896],"label":"Making a cake"}]},"J9OSz5iCwNU":{"duration":48.084,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J9OSz5iCwNU","annotations":[{"segment":[0.5985897886889333,43.32301651768745],"label":"Rollerblading"}]},"ORVjNqVSLe0":{"duration":187.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ORVjNqVSLe0","annotations":[{"segment":[3.9101066399999995,172.67770510062402],"label":"Shaving legs"}]},"-bqaXU4s8Qs":{"duration":55.357,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-bqaXU4s8Qs","annotations":[{"segment":[0.01,55.321],"label":"Hand car wash"}]},"_jxpaVW4_cE":{"duration":126.201,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=_jxpaVW4_cE","annotations":[{"segment":[23.724110216109324,85.15085269520237],"label":"Camel ride"}]},"h9WOFfVGeAQ":{"duration":33.92,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=h9WOFfVGeAQ","annotations":[{"segment":[0,11.590379232449298],"label":"Playing bagpipes"},{"segment":[22.069352237129483,23.23368257098284],"label":"Playing bagpipes"}]},"kkIClKG5xY8":{"duration":165.721,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=kkIClKG5xY8","annotations":[{"segment":[45.76066223057085,104.44806520424082],"label":"Surfing"},{"segment":[107.80901779744659,157.1891661778471],"label":"Surfing"}]},"NOxiMgQMeuk":{"duration":130.8,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=NOxiMgQMeuk","annotations":[{"segment":[6.325644525100731,124.88046464274571],"label":"Painting"}]},"tES39s-LJsQ":{"duration":100.055,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tES39s-LJsQ","annotations":[{"segment":[19.606378429903582,20.228802897799646],"label":"Snowboarding"},{"segment":[22.874108202620796,25.052594924238214],"label":"Snowboarding"},{"segment":[27.23108164585563,28.32032500666434],"label":"Snowboarding"},{"segment":[35.9450285323253,36.87866569873277],"label":"Snowboarding"},{"segment":[54.30655947167211,64.57656830215423],"label":"Snowboarding"},{"segment":[68.77793555098782,71.26763466140773],"label":"Snowboarding"},{"segment":[76.24703288224754,80.91521871428488],"label":"Snowboarding"},{"segment":[83.24931163030352,84.80537357431598],"label":"Snowboarding"}]},"P1P0Q9kW0GU":{"duration":17.81,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=P1P0Q9kW0GU","annotations":[]},"pr9GgpH34m4":{"duration":35.945,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pr9GgpH34m4","annotations":[]},"LrputIUn4oY":{"duration":84.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LrputIUn4oY","annotations":[{"segment":[33.55011748,70.74519890795632],"label":"Plataform diving"}]},"jEppv00aBBc":{"duration":235.85,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=jEppv00aBBc","annotations":[{"segment":[18.764585097689622,235.84507936507936],"label":"Walking the dog"}]},"ZYv-6yZ6te4":{"duration":38.41,"subset":"testing","resolution":"1920x1440","url":"https://www.youtube.com/watch?v=ZYv-6yZ6te4","annotations":[]},"e4kS-n7_LRU":{"duration":41.1,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=e4kS-n7_LRU","annotations":[]},"idSdnubrlds":{"duration":24.544,"subset":"testing","resolution":"712x480","url":"https://www.youtube.com/watch?v=idSdnubrlds","annotations":[]},"ivkkCEynzPs":{"duration":76.394,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=ivkkCEynzPs","annotations":[{"segment":[4.448444729778255,73.67290267370349],"label":"Welding"}]},"ig867kFeLic":{"duration":17.415,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ig867kFeLic","annotations":[{"segment":[0.01,17.41496598639456],"label":"Powerbocking"}]},"iWSKl7vOd2s":{"duration":67.45,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=iWSKl7vOd2s","annotations":[{"segment":[0.01,40.83017035257411],"label":"Ballet"}]},"qTAG23IVSeM":{"duration":107.53,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qTAG23IVSeM","annotations":[{"segment":[47.889173166926675,99.38471138845554],"label":"Playing polo"}]},"JxN0fchWCNI":{"duration":105.37,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=JxN0fchWCNI","annotations":[{"segment":[10.685163841927826,46.3571723603638],"label":"Preparing salad"},{"segment":[60.823240330973775,105.37215419501133],"label":"Preparing salad"}]},"BsceC-igcTY":{"duration":124.738,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BsceC-igcTY","annotations":[{"segment":[47.676503120124806,124.7373],"label":"Playing ice hockey"}]},"WMx0-3GZGUI":{"duration":182.509,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WMx0-3GZGUI","annotations":[{"segment":[6.833398495123478,70.89650868486738],"label":"Making a cake"},{"segment":[89.97307929485179,140.65411763182527],"label":"Making a cake"}]},"Vp63nbOfxgs":{"duration":162.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Vp63nbOfxgs","annotations":[{"segment":[9.60127403120125,138.20781302808115],"label":"Playing violin"}]},"ZtfdFFYa8vY":{"duration":74.0,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ZtfdFFYa8vY","annotations":[{"segment":[0.01,71.8084871575663],"label":"Walking the dog"}]},"Y05GAiavfOg":{"duration":107.86,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y05GAiavfOg","annotations":[]},"jWuq6clN5q4":{"duration":78.972,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=jWuq6clN5q4","annotations":[{"segment":[0.01,78.921],"label":"Drum corps"}]},"kLflIBGCNrc":{"duration":122.32,"subset":"training","resolution":"562x480","url":"https://www.youtube.com/watch?v=kLflIBGCNrc","annotations":[{"segment":[11.731532761310454,56.94085413416537],"label":"Playing water polo"},{"segment":[65.33414586583464,72.20138455538222],"label":"Playing water polo"},{"segment":[77.92408346333853,108.44514430577223],"label":"Playing water polo"}]},"7_xkhnbnI7M":{"duration":235.962,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7_xkhnbnI7M","annotations":[{"segment":[7.129077507767722,209.60956193144372],"label":"Clipping cat claws"}]},"SL7iKDqir6g":{"duration":44.14,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SL7iKDqir6g","annotations":[{"segment":[14.94325373478939,23.61998170982839],"label":"Preparing pasta"},{"segment":[28.440386140405618,33.26079057098284],"label":"Preparing pasta"}]},"yt0K2HWC0WI":{"duration":235.869,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yt0K2HWC0WI","annotations":[{"segment":[26.677771763931784,35.14106487524807],"label":"Slacklining"},{"segment":[113.8864877370605,121.98181158266738],"label":"Slacklining"},{"segment":[136.70058221104355,145.89981385377865],"label":"Slacklining"},{"segment":[184.16861748755665,194.10378657566304],"label":"Slacklining"}]},"3G3Zear4vog":{"duration":15.26,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=3G3Zear4vog","annotations":[{"segment":[0.30938764898595944,15.255191],"label":"Putting on makeup"}]},"Dx9wjBdbZ1M":{"duration":43.213,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Dx9wjBdbZ1M","annotations":[{"segment":[10.853598491419655,36.80785575351014],"label":"Fixing the roof"}]},"fLvPz8W00l4":{"duration":67.153,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fLvPz8W00l4","annotations":[{"segment":[5.021728549141966,66.13914508580343],"label":"Kneeling"}]},"cxVbaN4GxxU":{"duration":179.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cxVbaN4GxxU","annotations":[{"segment":[31.329333630488076,178.46530942901714],"label":"Kayaking"}]},"HjRevGoqqz0":{"duration":176.84,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=HjRevGoqqz0","annotations":[{"segment":[9.656024812421068,91.04251965997007],"label":"Dodgeball"}]},"1sp7zRy1Rxo":{"duration":132.54,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1sp7zRy1Rxo","annotations":[{"segment":[16.955110858658344,18.609268015600623],"label":"Skiing"},{"segment":[25.84620557722309,28.534210957254288],"label":"Skiing"},{"segment":[37.63207532043682,62.2376630299532],"label":"Skiing"},{"segment":[70.71521845928237,130.47164575382214],"label":"Skiing"}]},"sMO2IlNgDuw":{"duration":175.52,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=sMO2IlNgDuw","annotations":[{"segment":[4.928779012384986,20.810400274514382],"label":"Putting on makeup"},{"segment":[34.77527414293851,47.37104273014458],"label":"Putting on makeup"},{"segment":[64.07412716100481,72.83640096080033],"label":"Putting on makeup"},{"segment":[76.12225363572365,88.71802222292973],"label":"Putting on makeup"}]},"6EN1q4TmPtI":{"duration":99.289,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6EN1q4TmPtI","annotations":[{"segment":[32.528222271748014,99.288525],"label":"River tubing"}]},"VO49rhXzhk4":{"duration":155.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VO49rhXzhk4","annotations":[{"segment":[0.01,153.62526664331878],"label":"Wrapping presents"}]},"PjcTk1hcf4k":{"duration":183.252,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=PjcTk1hcf4k","annotations":[{"segment":[72.76675233644859,157.5186168224299],"label":"Decorating the Christmas tree"}]},"mXBWTtd4pgs":{"duration":70.798,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mXBWTtd4pgs","annotations":[]},"sQtT9sUyUwE":{"duration":9.706,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=sQtT9sUyUwE","annotations":[{"segment":[0,9.705941],"label":"Canoeing"}]},"9_zC7CdvYu4":{"duration":91.812,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9_zC7CdvYu4","annotations":[{"segment":[10.885630657879377,81.6422284399376],"label":"Snowboarding"}]},"mIC02-VKqUE":{"duration":61.858,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mIC02-VKqUE","annotations":[{"segment":[0.01,45.489471174970966],"label":"Rock-paper-scissors"}]},"djE5A2S1Ezg":{"duration":204.545,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=djE5A2S1Ezg","annotations":[{"segment":[91.09278193146417,175.19556074766356],"label":"Painting fence"}]},"TJFl99tLNWk":{"duration":216.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TJFl99tLNWk","annotations":[{"segment":[6.30223,195.5833590327613],"label":"Doing nails"}]},"Likt_9dbMqE":{"duration":230.877,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Likt_9dbMqE","annotations":[{"segment":[15.463654890824449,216.13154859036035],"label":"Futsal"}]},"fI12XNNqldA":{"duration":220.985,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fI12XNNqldA","annotations":[{"segment":[5.860736483881124,94.11653294703216],"label":"Spread mulch"}]},"nnWJGghixr0":{"duration":45.535,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nnWJGghixr0","annotations":[{"segment":[14.917643508580344,21.310919297971918],"label":"Throwing darts"},{"segment":[26.780721917784714,28.41455906396256],"label":"Throwing darts"}]},"gbuRv8phs1Y":{"duration":165.048,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gbuRv8phs1Y","annotations":[{"segment":[6.437107552329304,22.14364975975039],"label":"Canoeing"},{"segment":[42.484909422776916,67.71837077691107],"label":"Canoeing"},{"segment":[98.35900242121684,106.34101570670828],"label":"Canoeing"},{"segment":[133.11938285803433,163.50253020280812],"label":"Canoeing"}]},"KHxBqZmSaJU":{"duration":158.9,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KHxBqZmSaJU","annotations":[{"segment":[8.428051549272855,12.889961193005544],"label":"Cheerleading"},{"segment":[15.864567622160669,26.52357399329987],"label":"Cheerleading"},{"segment":[37.67834784399376,56.021754031201255],"label":"Cheerleading"},{"segment":[67.42441193135726,73.62150861622465],"label":"Cheerleading"},{"segment":[90.72549546645867,97.17047601872075],"label":"Cheerleading"},{"segment":[119.48002408424337,128.15595944305772],"label":"Cheerleading"},{"segment":[130.8826819843994,137.82343027145086],"label":"Cheerleading"},{"segment":[152.20069458034322,158.69355900000002],"label":"Cheerleading"}]},"z4qaGiWt-bo":{"duration":79.67,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=z4qaGiWt-bo","annotations":[{"segment":[1.21173507,75.78708236583464],"label":"Cricket"}]},"meMZuh8iNDs":{"duration":178.492,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=meMZuh8iNDs","annotations":[{"segment":[88.54947987307247,137.00108117316694],"label":"Braiding hair"}]},"IJUokB_p3Yo":{"duration":111.317,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=IJUokB_p3Yo","annotations":[{"segment":[56.07451326053042,108.15610452418098],"label":"Having an ice cream"}]},"cxFt88Cs-fs":{"duration":183.0,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=cxFt88Cs-fs","annotations":[{"segment":[150.5231200750182,179.26193967066362],"label":"Bungee jumping"}]},"xYt__nSK8NA":{"duration":46.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xYt__nSK8NA","annotations":[{"segment":[0.01,5.929174726989079],"label":"Smoking a cigarette"},{"segment":[13.32234321372855,34.403853354134164],"label":"Smoking a cigarette"}]},"xMQcYD044z4":{"duration":95.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xMQcYD044z4","annotations":[{"segment":[40.43918748,84.76355664664587],"label":"Clean and jerk"},{"segment":[22.524011639999998,39.287945780811235],"label":"Clean and jerk"}]},"kUxz_SPi72U":{"duration":72.167,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kUxz_SPi72U","annotations":[{"segment":[18.238930242924,69.35296831201248],"label":"Changing car wheel"}]},"qZk7okgCU2M":{"duration":124.622,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qZk7okgCU2M","annotations":[{"segment":[0.2,23.913242276629845],"label":"Surfing"},{"segment":[36.16148832075732,42.382819644758584],"label":"Surfing"},{"segment":[67.46256154463866,93.70880225273011],"label":"Surfing"},{"segment":[111.59512965366615,124.421042],"label":"Surfing"}]},"nOJUyDGq_l0":{"duration":196.952,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nOJUyDGq_l0","annotations":[{"segment":[34.66586573985773,150.62778691588787],"label":"Capoeira"}]},"PRkEpM8ffak":{"duration":50.179,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PRkEpM8ffak","annotations":[{"segment":[3.1312525426187117,49.317227546244716],"label":"Elliptical trainer"}]},"iM-HdlplAhw":{"duration":22.733,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iM-HdlplAhw","annotations":[{"segment":[0.20000000000000004,19.444453115290997],"label":"Using the monkey bar"}]},"H-hgVm5G54Y":{"duration":146.87,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=H-hgVm5G54Y","annotations":[{"segment":[10.31042058008851,133.11898557410294],"label":"Brushing hair"}]},"p7j6yY99vEg":{"duration":130.31,"subset":"validation","resolution":"328x240","url":"https://www.youtube.com/watch?v=p7j6yY99vEg","annotations":[{"segment":[53.66916658707164,55.498797266176354],"label":"Layup drill in basketball"},{"segment":[59.56464321974239,60.987689303490505],"label":"Layup drill in basketball"},{"segment":[66.27328904312635,67.69633512687446],"label":"Layup drill in basketball"},{"segment":[72.1687656757971,73.59181175954521],"label":"Layup drill in basketball"},{"segment":[78.6741181076443,80.70704105616225],"label":"Layup drill in basketball"},{"segment":[85.5860561326053,87.21239449141966],"label":"Layup drill in basketball"},{"segment":[91.88811727301092,93.71774792667706],"label":"Layup drill in basketball"},{"segment":[97.78359382371295,98.80005529797192],"label":"Layup drill in basketball"},{"segment":[103.88236266926677,104.89882414352574],"label":"Layup drill in basketball"},{"segment":[109.98113151482059,110.59100839937598],"label":"Layup drill in basketball"},{"segment":[116.6897772449298,118.51940789859594],"label":"Layup drill in basketball"}]},"fgBFlwM466w":{"duration":191.495,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=fgBFlwM466w","annotations":[{"segment":[88.72699672068515,136.52605219310814],"label":"Swimming"},{"segment":[155.64567438207735,187.0138045358549],"label":"Swimming"}]},"xzoquwJYEZw":{"duration":107.95,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=xzoquwJYEZw","annotations":[{"segment":[21.5562322193568,22.9034967330666],"label":"Playing squash"},{"segment":[73.59432406139783,75.61522001872075],"label":"Playing squash"}]},"iOnlcEk50CM":{"duration":83.24,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iOnlcEk50CM","annotations":[{"segment":[10.778804376664864,12.207320619355386],"label":"Skateboarding"},{"segment":[47.920226686618484,53.504426544408716],"label":"Skateboarding"}]},"legp5cXwuHc":{"duration":169.53,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=legp5cXwuHc","annotations":[]},"3YDQ9wZtIgw":{"duration":235.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3YDQ9wZtIgw","annotations":[{"segment":[1.1033644284546893,234.28104697521235],"label":"Playing lacrosse"}]},"AElVyfjSHIo":{"duration":150.814,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AElVyfjSHIo","annotations":[{"segment":[82.34752223088923,123.05072607644306],"label":"Playing drums"}]},"nezTU6Bq5hM":{"duration":45.187,"subset":"validation","resolution":"320x480","url":"https://www.youtube.com/watch?v=nezTU6Bq5hM","annotations":[{"segment":[1.2688744773790952,38.55968550702028],"label":"Cutting the grass"}]},"nwBb-WkaCYU":{"duration":55.078,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=nwBb-WkaCYU","annotations":[{"segment":[2.663665666146646,50.867421753510136],"label":"Playing rubik cube"}]},"UJe1UjxgcdQ":{"duration":90.373,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UJe1UjxgcdQ","annotations":[]},"tp0L6sR1qts":{"duration":62.323,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=tp0L6sR1qts","annotations":[{"segment":[7.479210608424337,56.43404368174727],"label":"Assembling bicycle"}]},"Y16uL6dYDzQ":{"duration":76.6,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y16uL6dYDzQ","annotations":[]},"TUPCQpyoSbI":{"duration":164.816,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=TUPCQpyoSbI","annotations":[{"segment":[17.478546021840874,154.2411856474259],"label":"Knitting"}]},"Fr7rhb2Vw_k":{"duration":153.229,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fr7rhb2Vw_k","annotations":[{"segment":[67.76953866726097,129.20435855257338],"label":"Hanging wallpaper"}]},"sGwra7t-ARo":{"duration":99.5,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sGwra7t-ARo","annotations":[{"segment":[11.020716730577222,79.3181161876755],"label":"Long jump"}]},"nqbYEJlRwoQ":{"duration":186.94,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=nqbYEJlRwoQ","annotations":[{"segment":[76.70240154134164,97.11748940405616],"label":"Vacuuming floor"},{"segment":[135.90615634321372,154.2797354196568],"label":"Vacuuming floor"}]},"ocvKEzb7OaE":{"duration":53.267,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ocvKEzb7OaE","annotations":[{"segment":[0.2,53.06657596371882],"label":"Playing rubik cube"}]},"SqIVJrXxO3g":{"duration":146.077,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SqIVJrXxO3g","annotations":[{"segment":[0.01,146.07673469387754],"label":"Rafting"}]},"3lla2AaJgNA":{"duration":21.64,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=3lla2AaJgNA","annotations":[{"segment":[0,7.697545703588142],"label":"Ironing clothes"}]},"y3Zq6RZZNtc":{"duration":25.124,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=y3Zq6RZZNtc","annotations":[{"segment":[7.747329173166927,10.408028081123245],"label":"Putting on shoes"}]},"E5SWUUNrHDU":{"duration":146.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=E5SWUUNrHDU","annotations":[{"segment":[30.00288299531981,133.2952511700468],"label":"Shaving"}]},"19SHLOheCmo":{"duration":231.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=19SHLOheCmo","annotations":[{"segment":[0.01,227.70592377980833],"label":"Tai chi"}]},"Vq2mQvMp_c4":{"duration":109.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vq2mQvMp_c4","annotations":[{"segment":[0.01,97.76397723088924],"label":"Horseback riding"}]},"ntJk6Rj-lRA":{"duration":179.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ntJk6Rj-lRA","annotations":[{"segment":[100.33271138845554,110.70229329173165],"label":"Installing carpet"},{"segment":[139.0084492979719,159.46912948517942],"label":"Installing carpet"}]},"v-qFJJx0bTs":{"duration":122.579,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=v-qFJJx0bTs","annotations":[{"segment":[9.943916287263736,120.66559842433698],"label":"Tug of war"}]},"RhOV_K2XzZA":{"duration":63.62,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RhOV_K2XzZA","annotations":[{"segment":[2.42527064,59.01267550702028],"label":"Volleyball"}]},"dgRYwmcRpuo":{"duration":161.379,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dgRYwmcRpuo","annotations":[{"segment":[0.2,145.96907956318253],"label":"Cleaning sink"}]},"J6-rLsT975U":{"duration":215.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J6-rLsT975U","annotations":[{"segment":[45.9561263650546,120.77185647425897],"label":"Shoveling snow"}]},"ANaMmXJFE54":{"duration":197.35,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ANaMmXJFE54","annotations":[]},"7o7hL0VccJQ":{"duration":93.0,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=7o7hL0VccJQ","annotations":[{"segment":[0.5803177433219778,78.05273647680602],"label":"Playing bagpipes"}]},"MdFHY1UBH5w":{"duration":265.799,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=MdFHY1UBH5w","annotations":[]},"xs5imfBbWmw":{"duration":232.363,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xs5imfBbWmw","annotations":[{"segment":[0.01,232.36208616780044],"label":"Rafting"}]},"z8VqGGu5vPc":{"duration":215.04,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=z8VqGGu5vPc","annotations":[{"segment":[0,29.51651794071763],"label":"Hand car wash"},{"segment":[67.75382527301092,168.3783182527301],"label":"Hand car wash"},{"segment":[194.89422932917316,196.57146021840873],"label":"Hand car wash"},{"segment":[204.62216848673944,207.30573790951638],"label":"Hand car wash"}]},"hTx_y1ils70":{"duration":218.175,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hTx_y1ils70","annotations":[{"segment":[92.57958928969404,195.02979487675506],"label":"Elliptical trainer"}]},"f59PJGzAFA8":{"duration":13.45,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=f59PJGzAFA8","annotations":[]},"DM4WamWf9N4":{"duration":84.568,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=DM4WamWf9N4","annotations":[]},"KPeXy0Mr5p4":{"duration":85.171,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KPeXy0Mr5p4","annotations":[]},"4JmFOPoKluM":{"duration":200.18,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4JmFOPoKluM","annotations":[]},"stDgqxV-VGg":{"duration":40.473,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=stDgqxV-VGg","annotations":[]},"nGABbRHJ2Ug":{"duration":113.34,"subset":"validation","resolution":"406x720","url":"https://www.youtube.com/watch?v=nGABbRHJ2Ug","annotations":[{"segment":[28.801159126365054,64.84678159126365],"label":"Spinning"},{"segment":[83.39967550702028,111.5137457098284],"label":"Spinning"}]},"3LrLgh2rlAM":{"duration":15.21,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=3LrLgh2rlAM","annotations":[{"segment":[1.115173626677067,14.876890721840875],"label":"Clean and jerk"}]},"yISeNkFiVAg":{"duration":141.062,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=yISeNkFiVAg","annotations":[{"segment":[0.01,136.01548527410574],"label":"Playing congas"}]},"saB1t3Znhk0":{"duration":90.19,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=saB1t3Znhk0","annotations":[{"segment":[8.462685314035863,77.54270400562464],"label":"Doing karate"}]},"4lmYUyqQWzo":{"duration":75.42,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=4lmYUyqQWzo","annotations":[]},"EqSXihtiv5g":{"duration":93.02000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EqSXihtiv5g","annotations":[{"segment":[14.87432618393171,80.75670586193449],"label":"Wrapping presents"}]},"AI0q-ros2CA":{"duration":67.08,"subset":"testing","resolution":"240x176","url":"https://www.youtube.com/watch?v=AI0q-ros2CA","annotations":[]},"X5Dsemc20pg":{"duration":190.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X5Dsemc20pg","annotations":[{"segment":[30.319460218408736,48.451686427457105],"label":"Mixing drinks"},{"segment":[62.422418096723874,77.58214820592823],"label":"Mixing drinks"},{"segment":[90.6611310452418,115.92734789391577],"label":"Mixing drinks"},{"segment":[130.78982839313574,161.10928861154446],"label":"Mixing drinks"}]},"ptY-0n53uq0":{"duration":103.538,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ptY-0n53uq0","annotations":[{"segment":[0.2,103.33755102040816],"label":"Playing drums"}]},"H-5nHSHwFOk":{"duration":745.5,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H-5nHSHwFOk","annotations":[{"segment":[113.96757878315132,138.38920280811234],"label":"Waxing skis"},{"segment":[251.19384711388457,391.9089188767551],"label":"Waxing skis"},{"segment":[483.7807425897036,695.4534758190329],"label":"Waxing skis"}]},"7_HWPDDW7Cw":{"duration":162.145,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7_HWPDDW7Cw","annotations":[{"segment":[3.3834168961783435,152.35474088371728],"label":"Mooping floor"}]},"Zq9jr2S5wGM":{"duration":145.334,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zq9jr2S5wGM","annotations":[{"segment":[17.23145227305691,25.39371893915757],"label":"Making a lemonade"},{"segment":[32.87579684087363,42.39844144305773],"label":"Making a lemonade"},{"segment":[50.78743787831514,51.46762677847114],"label":"Making a lemonade"},{"segment":[58.49624541341654,75.95442718408738],"label":"Making a lemonade"},{"segment":[78.90191241809673,94.7729867550702],"label":"Making a lemonade"},{"segment":[105.88273879095165,112.91135742589704],"label":"Making a lemonade"},{"segment":[118.3528686271451,125.60821689547582],"label":"Making a lemonade"},{"segment":[128.32897249609985,130.5962688299532],"label":"Making a lemonade"}]},"RWTLd_0BeAg":{"duration":220.219,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RWTLd_0BeAg","annotations":[{"segment":[64.58813319607614,211.28564848716397],"label":"Baton twirling"}]},"dH_gpSfaA6E":{"duration":194.33,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=dH_gpSfaA6E","annotations":[{"segment":[94.03289203,110.64007745],"label":"Wrapping presents"},{"segment":[7.60595878,83.27494931],"label":"Wrapping presents"}]},"2KhMB_ZWJXM":{"duration":177.053,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2KhMB_ZWJXM","annotations":[]},"KB9sqtTJTZw":{"duration":189.06,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=KB9sqtTJTZw","annotations":[{"segment":[6.728503039999999,171.4653900156006],"label":"Breakdancing"}]},"4_CcDjbCXBQ":{"duration":172.548,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4_CcDjbCXBQ","annotations":[{"segment":[35.208254639342755,160.18412034388],"label":"Futsal"}]},"QosVN26lb1g":{"duration":33.46,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=QosVN26lb1g","annotations":[{"segment":[0.01,32.073606117220926],"label":"Hitting a pinata"}]},"84g4J1rxkQc":{"duration":174.591,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=84g4J1rxkQc","annotations":[{"segment":[0.01,164.63727152243624],"label":"Slacklining"}]},"FzqZsQx20jY":{"duration":67.55,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FzqZsQx20jY","annotations":[]},"EwjDShmfFHM":{"duration":89.82,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=EwjDShmfFHM","annotations":[{"segment":[3.3628000466957455,86.87233446177846],"label":"Washing hands"}]},"gDexNGeZln0":{"duration":59.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gDexNGeZln0","annotations":[{"segment":[13.655651706340363,42.08933611544462],"label":"Starting a campfire"}]},"8KxL1itwI3Q":{"duration":188.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8KxL1itwI3Q","annotations":[{"segment":[13.848545569033643,171.33810507],"label":"Wrapping presents"}]},"A32TgJfp2z8":{"duration":90.56,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A32TgJfp2z8","annotations":[{"segment":[0.01,90.43040639625586],"label":"Ping-pong"}]},"xNiYjTyCTtg":{"duration":104.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xNiYjTyCTtg","annotations":[{"segment":[41.823322179095165,80.7059420174727],"label":"Getting a piercing"}]},"uwGcEaW3z3Q":{"duration":11.36,"subset":"training","resolution":"812x480","url":"https://www.youtube.com/watch?v=uwGcEaW3z3Q","annotations":[{"segment":[0.25576701560062404,10.821590625585022],"label":"Clean and jerk"}]},"BNQPVf2Ia5Y":{"duration":145.822,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BNQPVf2Ia5Y","annotations":[{"segment":[0.01,145.781],"label":"Trimming branches or hedges"}]},"l6_QM53dI5s":{"duration":210.91,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l6_QM53dI5s","annotations":[]},"iBefG1qFbsE":{"duration":183.229,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=iBefG1qFbsE","annotations":[{"segment":[0,183.228683],"label":"Knitting"}]},"Y9B22Ii7-eE":{"duration":206.797,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y9B22Ii7-eE","annotations":[{"segment":[50.69187183098481,196.90603694001908],"label":"Making an omelette"}]},"EA58Fo2e0CI":{"duration":78.809,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=EA58Fo2e0CI","annotations":[{"segment":[6.675524174782347,78.781],"label":"Running a marathon"}]},"Rai5nKbB6wU":{"duration":199.39,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rai5nKbB6wU","annotations":[{"segment":[33.28346850336599,34.838770769878415],"label":"Longboarding"},{"segment":[45.414826182162926,50.391793435002704],"label":"Longboarding"},{"segment":[69.05542063315185,76.83193196571399],"label":"Longboarding"},{"segment":[95.80661961716564,99.53934505679547],"label":"Longboarding"},{"segment":[103.2720704964253,124.11312086769183],"label":"Longboarding"},{"segment":[125.35736268090179,160.1961334507802],"label":"Longboarding"}]},"XU8FzYcRat4":{"duration":106.255,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=XU8FzYcRat4","annotations":[{"segment":[1.491873030023242,16.576366770670827],"label":"Playing beach volleyball"}]},"E6_Vin5HBiw":{"duration":91.16,"subset":"testing","resolution":"256x144","url":"https://www.youtube.com/watch?v=E6_Vin5HBiw","annotations":[]},"v7ND9FPFEhk":{"duration":55.334,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v7ND9FPFEhk","annotations":[]},"DrQZ28PEXpM":{"duration":42.4,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=DrQZ28PEXpM","annotations":[]},"HVmZOfkh8Pk":{"duration":56.29,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=HVmZOfkh8Pk","annotations":[{"segment":[21.59158502340094,33.00177223088924],"label":"Playing field hockey"}]},"TrFoFWXLMFg":{"duration":54.61,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=TrFoFWXLMFg","annotations":[{"segment":[0.01,22.871714508580343],"label":"Shaving legs"},{"segment":[26.69783775351014,47.443928237129484],"label":"Shaving legs"}]},"sLzAJGfkswM":{"duration":157.641,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sLzAJGfkswM","annotations":[]},"S5bjFaZUnOM":{"duration":119.723,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S5bjFaZUnOM","annotations":[{"segment":[11.9434383775351,62.340240249609984],"label":"Doing fencing"},{"segment":[99.29643057722309,105.45579563182527],"label":"Doing fencing"}]},"AntmYynwbhQ":{"duration":122.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AntmYynwbhQ","annotations":[{"segment":[36.860296411856474,91.24120124804992],"label":"Getting a haircut"}]},"vA6Ykf9pV64":{"duration":58.42,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=vA6Ykf9pV64","annotations":[]},"P90sZu5CbcE":{"duration":99.359,"subset":"testing","resolution":"638x360","url":"https://www.youtube.com/watch?v=P90sZu5CbcE","annotations":[]},"hc4DBHpRuGM":{"duration":63.832,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hc4DBHpRuGM","annotations":[{"segment":[4.38157999375975,6.8711140811232445],"label":"Elliptical trainer"},{"segment":[11.352275438377534,27.982363141965678],"label":"Elliptical trainer"},{"segment":[32.46352449921997,45.3095203900156],"label":"Elliptical trainer"},{"segment":[49.79068174726989,54.27184310452418],"label":"Elliptical trainer"}]},"RjPoQP1HxAE":{"duration":85.36,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RjPoQP1HxAE","annotations":[]},"UkZiyYfO57g":{"duration":169.785,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UkZiyYfO57g","annotations":[{"segment":[17.5557673816265,23.108043082561082],"label":"Skiing"},{"segment":[38.97168794237416,42.673205076330554],"label":"Skiing"},{"segment":[53.77775647819971,61.180790746112486],"label":"Skiing"},{"segment":[77.04443560592557,80.48155865888508],"label":"Skiing"},{"segment":[87.62019884580096,96.09213031002294],"label":"Skiing"}]},"IeqBzYDyHGQ":{"duration":168.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IeqBzYDyHGQ","annotations":[{"segment":[34.2358488897379,114.82177012250558],"label":"Beach soccer"},{"segment":[125.35587747319418,158.2749617301092],"label":"Beach soccer"}]},"fRLDhkPqrTE":{"duration":36.11,"subset":"training","resolution":"270x360","url":"https://www.youtube.com/watch?v=fRLDhkPqrTE","annotations":[{"segment":[1.4082304212168486,36.050698783151326],"label":"Playing piano"}]},"FP0tI2Tjigs":{"duration":48.37,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=FP0tI2Tjigs","annotations":[{"segment":[2.808062533136814,47.70966649575364],"label":"Vacuuming floor"}]},"ouv2veXexVw":{"duration":42.052,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ouv2veXexVw","annotations":[{"segment":[0.01,42.001],"label":"Beach soccer"}]},"gXvRxyT5rWw":{"duration":47.81,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gXvRxyT5rWw","annotations":[{"segment":[0,47.809387],"label":"Using parallel bars"}]},"dP2DgvNt12Y":{"duration":72.794,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=dP2DgvNt12Y","annotations":[{"segment":[17.547675068807504,64.41695373754779],"label":"Blowing leaves"}]},"Pi5fonaOYTI":{"duration":210.838,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Pi5fonaOYTI","annotations":[{"segment":[68.70534488927538,72.3167031447271],"label":"Hula hoop"}]},"MvKhjWRNTvI":{"duration":166.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MvKhjWRNTvI","annotations":[{"segment":[33.24530925,117.44419201872076],"label":"Paintball"}]},"Zest0gXEWEc":{"duration":56.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zest0gXEWEc","annotations":[{"segment":[18.969289847113888,47.137042400000006],"label":"Washing dishes"}]},"ojIhxWrBM0s":{"duration":83.31,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ojIhxWrBM0s","annotations":[]},"5P-4_nS8euM":{"duration":111.22,"subset":"training","resolution":"4096x2160","url":"https://www.youtube.com/watch?v=5P-4_nS8euM","annotations":[{"segment":[14.57531238392393,20.127812339704473],"label":"Getting a haircut"},{"segment":[27.762499778902722,33.31499973468327],"label":"Getting a haircut"},{"segment":[38.86749969046381,42.858359033681076],"label":"Getting a haircut"},{"segment":[46.155155882425774,52.74874957991518],"label":"Getting a haircut"},{"segment":[59.342343277404574,63.50671782215288],"label":"Getting a haircut"},{"segment":[69.05921774102964,71.83546770046802],"label":"Getting a haircut"},{"segment":[78.42906135413416,82.59343629329173],"label":"Getting a haircut"}]},"uqiooW1OAXU":{"duration":158.268,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=uqiooW1OAXU","annotations":[{"segment":[32.7364535746901,133.40721333530766],"label":"Camel ride"}]},"FUrLg7FETWo":{"duration":29.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FUrLg7FETWo","annotations":[{"segment":[0.01,3.7354785075756776],"label":"Chopping wood"},{"segment":[17.0431206099844,18.210457638065524],"label":"Chopping wood"}]},"thgW2RT30qQ":{"duration":203.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=thgW2RT30qQ","annotations":[{"segment":[56.72766,123.152095],"label":"Hammer throw"}]},"qHRCGBIiNFg":{"duration":189.011,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qHRCGBIiNFg","annotations":[{"segment":[0,186.0617493447738],"label":"Ice fishing"}]},"a6lCnOqNaMY":{"duration":37.94,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=a6lCnOqNaMY","annotations":[{"segment":[0.7091419656786272,32.443244929797196],"label":"Javelin throw"}]},"lOCw2uO3UK8":{"duration":172.2,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=lOCw2uO3UK8","annotations":[{"segment":[30.08784488522398,172.19918367346938],"label":"Playing racquetball"}]},"VJh9W3Gcpmo":{"duration":220.659,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=VJh9W3Gcpmo","annotations":[]},"Vxw3yFCV2iA":{"duration":195.118,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=Vxw3yFCV2iA","annotations":[]},"2wUKu5UIHmI":{"duration":100.87,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=2wUKu5UIHmI","annotations":[{"segment":[3.656482075306758,7.584343562580797],"label":"Putting on makeup"},{"segment":[33.66534383808041,60.217687492052896],"label":"Putting on makeup"}]},"J7GJKAoU0rE":{"duration":211.627,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=J7GJKAoU0rE","annotations":[{"segment":[29.71357251950078,211.6266665],"label":"Kite flying"}]},"pWotXONgXtc":{"duration":206.57,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=pWotXONgXtc","annotations":[{"segment":[5.557532912201035,193.4606381458459],"label":"Playing squash"}]},"uVHuVnv5XAk":{"duration":56.52,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=uVHuVnv5XAk","annotations":[{"segment":[11.852180070000001,55.47613104524181],"label":"Triple jump"}]},"_CMIO5R_OGA":{"duration":35.06,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=_CMIO5R_OGA","annotations":[{"segment":[9.900539048361935,30.303307363494543],"label":"Playing field hockey"}]},"1L_4N307nBk":{"duration":93.0,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1L_4N307nBk","annotations":[{"segment":[64.7170804,83.88550801950078],"label":"Using the balance beam"},{"segment":[12.246511550000001,57.38070485],"label":"Using the balance beam"}]},"UySzNwvkQKQ":{"duration":199.6,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UySzNwvkQKQ","annotations":[{"segment":[10.121223315132605,178.91208598595944],"label":"Dodgeball"}]},"dkP1In7K63Y":{"duration":174.034,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=dkP1In7K63Y","annotations":[{"segment":[13.930921597358967,146.1933639649602],"label":"Cleaning sink"}]},"J9XPELRRXT0":{"duration":230.157,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J9XPELRRXT0","annotations":[]},"WiVzjWuu_Dk":{"duration":231.573,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WiVzjWuu_Dk","annotations":[]},"-vY2uUmv6Dk":{"duration":142.85,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-vY2uUmv6Dk","annotations":[]},"fpVaXCwccNk":{"duration":39.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fpVaXCwccNk","annotations":[{"segment":[21.419565522620907,23.991222308892358],"label":"Hammer throw"}]},"nxyENrsY4mo":{"duration":138.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nxyENrsY4mo","annotations":[{"segment":[13.123740159999999,27.73138295],"label":"Playing polo"},{"segment":[34.03554445,129.21526718876754],"label":"Playing polo"}]},"DgJ-GG1Agyw":{"duration":222.77,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=DgJ-GG1Agyw","annotations":[{"segment":[0.34753863650546024,45.87510001872075],"label":"Getting a piercing"},{"segment":[86.88465912636505,221.03457281747268],"label":"Getting a piercing"}]},"L67RSiR2X78":{"duration":300.327,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=L67RSiR2X78","annotations":[{"segment":[0.01,300.3268707482993],"label":"Waxing skis"}]},"_E_9te0nq3A":{"duration":167.67000000000002,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_E_9te0nq3A","annotations":[{"segment":[55.43916850881153,120.58698333541342],"label":"Starting a campfire"}]},"t6FuJ4L8sHY":{"duration":86.332,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=t6FuJ4L8sHY","annotations":[{"segment":[1.2121468315132604,86.331791],"label":"Futsal"}]},"6czh95dpwAA":{"duration":42.772,"subset":"validation","resolution":"626x360","url":"https://www.youtube.com/watch?v=6czh95dpwAA","annotations":[{"segment":[4.60407142698908,9.741948236817473],"label":"Hand car wash"},{"segment":[12.811329188143526,16.547966868018722],"label":"Hand car wash"},{"segment":[21.6191180049922,23.68761386349454],"label":"Hand car wash"},{"segment":[26.957171833385335,30.760535186115447],"label":"Hand car wash"}]},"qC4ynfF8JrE":{"duration":152.184,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qC4ynfF8JrE","annotations":[]},"aHx8nMBV6rc":{"duration":148.86,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=aHx8nMBV6rc","annotations":[]},"vqyQF-1Zc_E":{"duration":78.136,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vqyQF-1Zc_E","annotations":[]},"HqxTRzf11tc":{"duration":23.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HqxTRzf11tc","annotations":[{"segment":[2.76038291,23.545],"label":"Long jump"}]},"EnMS9O1U6E0":{"duration":106.22999999999999,"subset":"testing","resolution":"562x480","url":"https://www.youtube.com/watch?v=EnMS9O1U6E0","annotations":[]},"wkSm7bUCgGQ":{"duration":134.35,"subset":"validation","resolution":"288x208","url":"https://www.youtube.com/watch?v=wkSm7bUCgGQ","annotations":[{"segment":[0.01,134.35068027210883],"label":"Shaving legs"}]},"joehK-w2bCc":{"duration":74.05,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=joehK-w2bCc","annotations":[{"segment":[7.21301568,70.52509048127925],"label":"Ballet"}]},"f4CSejhkTd8":{"duration":120.814,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=f4CSejhkTd8","annotations":[{"segment":[99.15433415732367,108.20120522128623],"label":"Elliptical trainer"}]},"PjNF7HoQ6yY":{"duration":97.594,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=PjNF7HoQ6yY","annotations":[{"segment":[7.003553248049922,86.1741551825273],"label":"Playing ten pins"}]},"rSGgal9LtDk":{"duration":212.39,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rSGgal9LtDk","annotations":[{"segment":[0.9933276131045243,192.04333853354134],"label":"Making a sandwich"}]},"TV8putYbiCU":{"duration":123.554,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TV8putYbiCU","annotations":[{"segment":[44.629644859813084,117.55659034267913],"label":"Cleaning sink"}]},"vgXU0u-rN9c":{"duration":59.26,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vgXU0u-rN9c","annotations":[{"segment":[0.01,37.25538403900156],"label":"Mixing drinks"}]},"7ftCKS5SCCk":{"duration":184.367,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7ftCKS5SCCk","annotations":[{"segment":[15.527822152886117,22.141524180967238],"label":"Putting on shoes"},{"segment":[35.36892823712949,41.69507800312012],"label":"Putting on shoes"},{"segment":[48.596332293291724,59.52331825273011],"label":"Putting on shoes"},{"segment":[69.58764742589703,76.20134945397815],"label":"Putting on shoes"},{"segment":[168.21807332293292,184.121],"label":"Putting on shoes"}]},"WKoHUS5B2u4":{"duration":180.28,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WKoHUS5B2u4","annotations":[{"segment":[34.72482176443057,114.29668054446178],"label":"Horseback riding"},{"segment":[132.2917298798752,177.56052586427455],"label":"Horseback riding"}]},"8slE1UskRS4":{"duration":216.41,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8slE1UskRS4","annotations":[{"segment":[0,0],"label":"Capoeira"}]},"moMisK3nGuY":{"duration":85.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=moMisK3nGuY","annotations":[{"segment":[1.0664518662379148,61.18767582540036],"label":"Grooming horse"}]},"SpEdr2o2TP0":{"duration":198.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SpEdr2o2TP0","annotations":[{"segment":[111.5163182527301,182.47146489859594],"label":"Horseback riding"}]},"nSgBX7D4XLA":{"duration":72.006,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=nSgBX7D4XLA","annotations":[{"segment":[0.01,70.3675795125717],"label":"Shuffleboard"}]},"dU4sCY0kHtk":{"duration":149.91,"subset":"training","resolution":"576x324","url":"https://www.youtube.com/watch?v=dU4sCY0kHtk","annotations":[{"segment":[12.94155764,143.3597814945398],"label":"Cumbia"}]},"Auy0KGsXAIg":{"duration":133.75,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=Auy0KGsXAIg","annotations":[{"segment":[31.923936735725427,129.99093193697345],"label":"Fixing bicycle"}]},"y3xcwZpcLvI":{"duration":59.54,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y3xcwZpcLvI","annotations":[{"segment":[0,52.01269778471139],"label":"Using the balance beam"}]},"Z0noyvIqi_U":{"duration":219.313,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z0noyvIqi_U","annotations":[]},"dJO_4TrLr7E":{"duration":98.00999999999999,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dJO_4TrLr7E","annotations":[{"segment":[6.788043398595944,94.95633742979719],"label":"Tango"}]},"unqP4N-QYgQ":{"duration":15.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=unqP4N-QYgQ","annotations":[]},"yzmPtZyuo4s":{"duration":68.52,"subset":"training","resolution":"448x338","url":"https://www.youtube.com/watch?v=yzmPtZyuo4s","annotations":[{"segment":[20.096961289859593,48.4251248101404],"label":"High jump"}]},"jKPgBtesr74":{"duration":110.736,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jKPgBtesr74","annotations":[{"segment":[42.238076323987535,82.4222398753894],"label":"Making a lemonade"}]},"PG-AK5fawJI":{"duration":232.13,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PG-AK5fawJI","annotations":[{"segment":[128.89365990639624,142.28991107644305],"label":"Rock-paper-scissors"}]},"qI1ZayfiGHI":{"duration":95.03999999999999,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qI1ZayfiGHI","annotations":[{"segment":[16.309337380297933,91.77708944004019],"label":"Playing racquetball"}]},"0Jhynsxxv_M":{"duration":114.47999999999999,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0Jhynsxxv_M","annotations":[]},"i6wR9_tkg6A":{"duration":217.362,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i6wR9_tkg6A","annotations":[]},"PgZ2e0H1ZVE":{"duration":186.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PgZ2e0H1ZVE","annotations":[{"segment":[61.2770256225215,121.39240054602182],"label":"Playing harmonica"},{"segment":[140.26920924336974,152.17611934477378],"label":"Playing harmonica"}]},"TIfAkOBMf5A":{"duration":221.68,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TIfAkOBMf5A","annotations":[{"segment":[11.066753549053526,171.18884396192172],"label":"Doing step aerobics"}]},"UZPSbNS1LU0":{"duration":123.089,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UZPSbNS1LU0","annotations":[{"segment":[6.335433697347894,105.97452730109205],"label":"Rollerblading"}]},"cNCkHqOnJV0":{"duration":234.219,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cNCkHqOnJV0","annotations":[{"segment":[10.596509988290688,234.219409],"label":"River tubing"}]},"K4F8I74OM-Y":{"duration":137.695,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=K4F8I74OM-Y","annotations":[{"segment":[19.32556942277691,24.26432605304212],"label":"Running a marathon"},{"segment":[108.65264586583463,114.89646645865835],"label":"Running a marathon"},{"segment":[125.63445397815913,135.29864274570983],"label":"Running a marathon"}]},"sGTDKZuG1vE":{"duration":214.46,"subset":"training","resolution":"854x392","url":"https://www.youtube.com/watch?v=sGTDKZuG1vE","annotations":[{"segment":[0.01,214.45950113378686],"label":"Cleaning sink"}]},"D_zM6YmlUnw":{"duration":224.073,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D_zM6YmlUnw","annotations":[{"segment":[0.01,205.13040093603743],"label":"Sharpening knives"}]},"O8JVA-scYlM":{"duration":135.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O8JVA-scYlM","annotations":[{"segment":[7.220544001829618,86.22516841771747],"label":"Making a sandwich"}]},"knY08LrNyHg":{"duration":224.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=knY08LrNyHg","annotations":[{"segment":[15.37597503900156,56.61154446177847],"label":"Sailing"},{"segment":[57.006569250000005,213.86583463338533],"label":"Sailing"}]},"xxvBR4zdV48":{"duration":142.107,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=xxvBR4zdV48","annotations":[{"segment":[34.356560062402494,61.39849765990639],"label":"Playing blackjack"}]},"OtMNVvCSxVo":{"duration":71.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OtMNVvCSxVo","annotations":[{"segment":[3.331936614664587,59.19740718720749],"label":"Washing dishes"}]},"qJrJeICcevI":{"duration":32.439,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qJrJeICcevI","annotations":[{"segment":[1.5134491514010997,29.00778791606719],"label":"Camel ride"}]},"ASKSqeyhpgc":{"duration":82.68,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=ASKSqeyhpgc","annotations":[{"segment":[11.164257580343214,71.56740841965679],"label":"Putting on makeup"}]},"nLddpveoSHE":{"duration":72.89,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nLddpveoSHE","annotations":[{"segment":[7.554303102964118,9.599076875195006],"label":"Getting a tattoo"},{"segment":[25.162077252730107,29.365223340093603],"label":"Getting a tattoo"},{"segment":[34.13636214196568,42.20185868798752],"label":"Getting a tattoo"},{"segment":[47.31379311856474,52.19853046333853],"label":"Getting a tattoo"}]},"GLsIq3P-J3A":{"duration":92.927,"subset":"training","resolution":"284x240","url":"https://www.youtube.com/watch?v=GLsIq3P-J3A","annotations":[{"segment":[12.896113884555382,76.08896255850235],"label":"Playing blackjack"}]},"1OP64qG2v7I":{"duration":85.26,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=1OP64qG2v7I","annotations":[]},"AbVuOBhG634":{"duration":155.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AbVuOBhG634","annotations":[{"segment":[17.89849165667307,24.18715088739604],"label":"Walking the dog"}]},"IZXFAoiSj0c":{"duration":72.42,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IZXFAoiSj0c","annotations":[{"segment":[3.97095309,59.466318252730105],"label":"Hammer throw"}]},"BCC6fxrmA9M":{"duration":102.82,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=BCC6fxrmA9M","annotations":[{"segment":[54.37631607800312,97.8452885179407],"label":"Playing piano"}]},"c6wINALtTvs":{"duration":217.014,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c6wINALtTvs","annotations":[{"segment":[36.225299471842824,205.16384406552262],"label":"Baking cookies"}]},"MGF7I9aCPPM":{"duration":65.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MGF7I9aCPPM","annotations":[{"segment":[9.300624237214386,15.704332728411178],"label":"Spinning"},{"segment":[15.80597889493811,19.97347118798752],"label":"Spinning"},{"segment":[21.90474830031201,29.01997976677067],"label":"Spinning"},{"segment":[39.89611929407176,41.624104078783155],"label":"Spinning"},{"segment":[49.24756636427457,53.92328989937598],"label":"Spinning"}]},"M_E1i4S8Vp0":{"duration":101.308,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=M_E1i4S8Vp0","annotations":[{"segment":[10.10830577223089,96.3638221528861],"label":"Swinging at the playground"}]},"DR0hc7lT0hE":{"duration":131.565,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DR0hc7lT0hE","annotations":[{"segment":[35.89978603840766,121.10591687952916],"label":"Hula hoop"}]},"MY6o5ZObFLE":{"duration":91.952,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MY6o5ZObFLE","annotations":[{"segment":[0.01,91.95102040816326],"label":"Applying sunscreen"}]},"5HEunlL4nF0":{"duration":62.32,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5HEunlL4nF0","annotations":[{"segment":[0.6800702028081124,60.040483619344776],"label":"Playing racquetball"}]},"GO_36Qd9bb0":{"duration":191.47,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=GO_36Qd9bb0","annotations":[{"segment":[43.312610681298,190.27677933784017],"label":"Tango"}]},"oRtMsdNQ1LE":{"duration":71.796,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=oRtMsdNQ1LE","annotations":[{"segment":[54.883132636505465,71.796098],"label":"Playing congas"}]},"bQ0L24YbGGI":{"duration":80.88,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bQ0L24YbGGI","annotations":[{"segment":[9.210419680187208,62.07570524180968],"label":"Ballet"}]},"paY7wXmzcTQ":{"duration":53.964,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=paY7wXmzcTQ","annotations":[{"segment":[39.97187987519501,50.15419032761311],"label":"Snow tubing"}]},"PRTcRz--03E":{"duration":179.398,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PRTcRz--03E","annotations":[{"segment":[17.045544464930313,172.13205558027988],"label":"Playing congas"}]},"jkWqhnD8hyQ":{"duration":170.505,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jkWqhnD8hyQ","annotations":[{"segment":[93.89695603878577,102.94085548728073],"label":"Painting furniture"},{"segment":[109.32478367082682,119.43267121216847],"label":"Painting furniture"},{"segment":[124.4866149828393,133.53051436193448],"label":"Painting furniture"},{"segment":[139.11645221372854,148.1603515928237],"label":"Painting furniture"}]},"6aWTCiY3eww":{"duration":35.829,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=6aWTCiY3eww","annotations":[{"segment":[4.244736349453978,34.963223088923556],"label":"Camel ride"}]},"7knqgoHxuGE":{"duration":38.128,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7knqgoHxuGE","annotations":[{"segment":[0.01,38.12716553287982],"label":"Beer pong"}]},"DpoIgaZ1m_U":{"duration":216.57,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=DpoIgaZ1m_U","annotations":[{"segment":[12.030574600000001,190.6838728549142],"label":"Doing karate"}]},"P3vu94B0KGY":{"duration":235.497,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=P3vu94B0KGY","annotations":[]},"In5hkMlOG58":{"duration":180.745,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=In5hkMlOG58","annotations":[]},"N49yT-kvXuw":{"duration":146.565,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N49yT-kvXuw","annotations":[{"segment":[28.123810113884556,130.32985174726988],"label":"Snowboarding"}]},"nmMs1pWkiAU":{"duration":55.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nmMs1pWkiAU","annotations":[{"segment":[6.3065522620904835,49.808892355694226],"label":"Doing karate"}]},"Hk-wwGuHuC0":{"duration":98.43,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Hk-wwGuHuC0","annotations":[{"segment":[17.036335413416538,89.32565054602185],"label":"Mooping floor"}]},"jlKyDZ6nWcM":{"duration":26.378,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jlKyDZ6nWcM","annotations":[]},"SQncxXAlBFE":{"duration":125.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SQncxXAlBFE","annotations":[{"segment":[4.57671824,117.61743369734789],"label":"Clean and jerk"}]},"yFPevKUgKUI":{"duration":70.496,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yFPevKUgKUI","annotations":[]},"QZCBVJHF81w":{"duration":120.21,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=QZCBVJHF81w","annotations":[{"segment":[13.871940717628705,105.55679095163806],"label":"Hand car wash"}]},"7BFbmEY3BNw":{"duration":97.083,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=7BFbmEY3BNw","annotations":[]},"6eQuZddoEcI":{"duration":99.03,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=6eQuZddoEcI","annotations":[{"segment":[3.0918102600000004,13.577672386895475],"label":"Discus throw"},{"segment":[22.218009360374413,28.241096723868957],"label":"Discus throw"},{"segment":[39.198024960998445,46.75984867394696],"label":"Discus throw"},{"segment":[50.30927613104525,56.327870514820596],"label":"Discus throw"}]},"e6Ti5g_zQ_4":{"duration":90.837,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e6Ti5g_zQ_4","annotations":[{"segment":[0.01,90.781],"label":"Playing pool"}]},"xAoQ6JisbhI":{"duration":60.163,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=xAoQ6JisbhI","annotations":[{"segment":[8.428177570093457,59.921],"label":"Playing rubik cube"}]},"8ycO15nH8YM":{"duration":186.642,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8ycO15nH8YM","annotations":[{"segment":[0.01,186.64201814058956],"label":"Making a cake"}]},"xATeffo_kP4":{"duration":231.805,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=xATeffo_kP4","annotations":[{"segment":[0.01,227.97749978521196],"label":"Table soccer"}]},"uC27rJLCn70":{"duration":67.362,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uC27rJLCn70","annotations":[{"segment":[14.598469578783149,67.321],"label":"Snow tubing"}]},"gJxR-KzawO4":{"duration":194.44,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=gJxR-KzawO4","annotations":[{"segment":[7.429245957098283,141.45728250000002],"label":"Washing dishes"},{"segment":[164.82155338,193.00877762012482],"label":"Washing dishes"}]},"QR2BmWR5FGM":{"duration":51.224,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QR2BmWR5FGM","annotations":[{"segment":[3.514577223088924,51.181],"label":"Fun sliding down"}]},"B3xMfP_UOt4":{"duration":50.74,"subset":"testing","resolution":"622x360","url":"https://www.youtube.com/watch?v=B3xMfP_UOt4","annotations":[]},"OQPBLjX1LHk":{"duration":48.809,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=OQPBLjX1LHk","annotations":[{"segment":[0.01,38.111171606864275],"label":"Using the monkey bar"}]},"oW0G_C86fz0":{"duration":205.799,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oW0G_C86fz0","annotations":[{"segment":[22.15303203276131,42.05865501872075],"label":"Plastering"},{"segment":[54.900992429017165,61.96427800468019],"label":"Plastering"},{"segment":[73.20132323868955,86.36471908424338],"label":"Plastering"},{"segment":[109.1598679875195,127.4601987971919],"label":"Plastering"},{"segment":[135.48665967862715,177.54531469734792],"label":"Plastering"}]},"EGJII8_NOXo":{"duration":224.514,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EGJII8_NOXo","annotations":[]},"NtX9Q0QX3sw":{"duration":183.79,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NtX9Q0QX3sw","annotations":[{"segment":[13.546792349999999,165.14930021840874],"label":"Cleaning windows"}]},"ddLFSNa3ci0":{"duration":158.198,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ddLFSNa3ci0","annotations":[{"segment":[0.01,149.5596182527301],"label":"Spread mulch"}]},"Sh8JODtAMhc":{"duration":182.974,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Sh8JODtAMhc","annotations":[{"segment":[0.01,172.69705273790953],"label":"Riding bumper cars"}]},"TJsdyRfYItg":{"duration":188.454,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=TJsdyRfYItg","annotations":[]},"REZTMjILEQ4":{"duration":199.14,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=REZTMjILEQ4","annotations":[]},"4xgUlwGdjJ4":{"duration":19.18,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4xgUlwGdjJ4","annotations":[]},"xQcJ8gTPRSU":{"duration":165.7,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=xQcJ8gTPRSU","annotations":[{"segment":[29.20036349453978,160.49217004680187],"label":"Preparing salad"}]},"YZvdzvM-124":{"duration":6.618,"subset":"validation","resolution":"360x360","url":"https://www.youtube.com/watch?v=YZvdzvM-124","annotations":[{"segment":[4.496845553822153,6.361],"label":"Rock-paper-scissors"}]},"xAPz4YOcIX0":{"duration":169.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xAPz4YOcIX0","annotations":[{"segment":[43.26097926638154,154.57886493963161],"label":"Painting"}]},"ykov_joUUTk":{"duration":204.27,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ykov_joUUTk","annotations":[{"segment":[2.5485304212168485,204.201],"label":"Doing karate"}]},"2c9-_NElJLw":{"duration":226.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2c9-_NElJLw","annotations":[]},"eOsASILOUZM":{"duration":12.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eOsASILOUZM","annotations":[{"segment":[3.88225716,8.21952774],"label":"Springboard diving"}]},"GP2S0V5NiPs":{"duration":216.318,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GP2S0V5NiPs","annotations":[{"segment":[85.5481813068441,91.28514021105062],"label":"Ice fishing"},{"segment":[143.59270569110765,149.66713272386895],"label":"Ice fishing"}]},"88Mt7VfUQBU":{"duration":172.222,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=88Mt7VfUQBU","annotations":[{"segment":[4.566953198127925,172.181],"label":"Kneeling"}]},"O8jThut7tAQ":{"duration":155.296,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=O8jThut7tAQ","annotations":[{"segment":[53.76613066981726,56.9107346214706],"label":"Playing ten pins"},{"segment":[103.59600867293959,108.67575351791807],"label":"Playing ten pins"}]},"Lwy92HbuZII":{"duration":180.16,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Lwy92HbuZII","annotations":[{"segment":[10.961594868954759,52.278375528861154],"label":"Sailing"},{"segment":[73.92049873166927,89.09809162714508],"label":"Sailing"},{"segment":[103.71355145241809,105.11888412792511],"label":"Sailing"},{"segment":[137.722602199688,144.18713250702027],"label":"Sailing"}]},"e5XATG2IK6Q":{"duration":156.503,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=e5XATG2IK6Q","annotations":[{"segment":[9.27419344773791,100.55178159126366],"label":"Hand car wash"},{"segment":[110.80220592823714,115.45406084243372],"label":"Hand car wash"},{"segment":[117.89495007800313,151.8233104524181],"label":"Hand car wash"}]},"d-NsZZkxTa0":{"duration":52.106,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=d-NsZZkxTa0","annotations":[{"segment":[0.2,49.508414283489095],"label":"Shuffleboard"}]},"YTuQrhSKkNE":{"duration":100.287,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=YTuQrhSKkNE","annotations":[{"segment":[14.233620904836194,65.85004836193447],"label":"Curling"},{"segment":[76.48616068642745,99.32251950078003],"label":"Curling"}]},"ukXHzfVXkDo":{"duration":164.63,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ukXHzfVXkDo","annotations":[]},"He7CDkVm_aE":{"duration":197.556,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=He7CDkVm_aE","annotations":[{"segment":[60.21905181785,190.48884641608745],"label":"Blow-drying hair"}]},"aVJRaXUjsEE":{"duration":6.595,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aVJRaXUjsEE","annotations":[{"segment":[0,6.594466],"label":"Beer pong"}]},"agZNSscDJww":{"duration":111.062,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=agZNSscDJww","annotations":[{"segment":[8.83477535101404,107.21060686427457],"label":"Hurling"}]},"2ehPAKS6Gpo":{"duration":30.233,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2ehPAKS6Gpo","annotations":[{"segment":[0.2,17.49799158970359],"label":"Playing ice hockey"}]},"4zLTW7lT3fs":{"duration":106.35,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=4zLTW7lT3fs","annotations":[{"segment":[13.438545321404693,26.877090187207486],"label":"Snatch"},{"segment":[41.31108306552262,58.73141929797191],"label":"Snatch"},{"segment":[84.77896966458657,85.27669355694226],"label":"Snatch"},{"segment":[0.2,106.147005],"label":"Snatch"}]},"TyLHQAu66KA":{"duration":171.11,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TyLHQAu66KA","annotations":[]},"ZU4Mgdd3omA":{"duration":34.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZU4Mgdd3omA","annotations":[{"segment":[0.01,32.50438649926949],"label":"Playing flauta"}]},"KBG7wrKsZAI":{"duration":181.26,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=KBG7wrKsZAI","annotations":[{"segment":[6.926429017160686,160.43871294851792],"label":"Doing karate"}]},"DrQgYA5_8VA":{"duration":29.676,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=DrQgYA5_8VA","annotations":[{"segment":[10.40440717628705,29.641],"label":"Waterskiing"}]},"UmU8dx36O9w":{"duration":85.961,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=UmU8dx36O9w","annotations":[{"segment":[0,85.721],"label":"Putting in contact lenses"}]},"U0jlI70N_DQ":{"duration":103.78999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=U0jlI70N_DQ","annotations":[{"segment":[8.901820015600624,92.57892816224648],"label":"Playing water polo"}]},"99A2KqvfBLI":{"duration":126.39,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=99A2KqvfBLI","annotations":[{"segment":[29.09532106,49.29259483304502],"label":"Playing harmonica"},{"segment":[52.84166166102426,53.82751355768516],"label":"Playing harmonica"},{"segment":[76.69927756021805,116.33052380598625],"label":"Playing harmonica"}]},"rBEQh8V2TI8":{"duration":137.973,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rBEQh8V2TI8","annotations":[{"segment":[27.919828660436135,128.21644392523365],"label":"Baton twirling"}]},"nhQTxZlzG1Q":{"duration":125.16,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nhQTxZlzG1Q","annotations":[]},"Mo9-1cuMVtQ":{"duration":95.342,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Mo9-1cuMVtQ","annotations":[{"segment":[28.649677570093456,82.23793457943925],"label":"Futsal"}]},"KwBuRjh_v9M":{"duration":108.3,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KwBuRjh_v9M","annotations":[{"segment":[27.549673830000003,91.98422289],"label":"Getting a piercing"}]},"-mWhX_yXOS8":{"duration":76.7,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-mWhX_yXOS8","annotations":[]},"IsHMvAfUOGs":{"duration":11.87,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=IsHMvAfUOGs","annotations":[{"segment":[0,9.956784711388455],"label":"Hopscotch"}]},"NulnzF8avMI":{"duration":92.56,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NulnzF8avMI","annotations":[{"segment":[15.8822664,77.65373154000001],"label":"Using the pommel horse"}]},"jhakEVLCqNM":{"duration":145.891,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jhakEVLCqNM","annotations":[{"segment":[47.55842277691107,145.841],"label":"Swimming"}]},"cr2lbZ6or1Q":{"duration":125.76,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cr2lbZ6or1Q","annotations":[{"segment":[27.27065785803432,116.734110975039],"label":"Playing violin"}]},"eKdtOKqLMRE":{"duration":149.235,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eKdtOKqLMRE","annotations":[]},"_ajGnJjNUX0":{"duration":121.023,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_ajGnJjNUX0","annotations":[{"segment":[24.741064773286244,89.70048066113672],"label":"Installing carpet"}]},"extd66Y1tJ8":{"duration":88.352,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=extd66Y1tJ8","annotations":[{"segment":[17.80177186100802,60.44898369901425],"label":"Putting in contact lenses"}]},"ZnKB0U96ZI4":{"duration":72.49,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZnKB0U96ZI4","annotations":[]},"YveUW4bLL5A":{"duration":69.75,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YveUW4bLL5A","annotations":[{"segment":[2.1518491999999996,63.07160686427457],"label":"Vacuuming floor"}]},"iuqJ_h0tOwk":{"duration":71.77,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=iuqJ_h0tOwk","annotations":[]},"0EewuppFjEw":{"duration":55.148,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=0EewuppFjEw","annotations":[{"segment":[18.83229173166927,49.53150702028081],"label":"Snow tubing"}]},"7CeAzJF2z4o":{"duration":185.53,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=7CeAzJF2z4o","annotations":[{"segment":[0,185.481],"label":"Playing guitarra"}]},"TUTcdyeBEQs":{"duration":141.94,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TUTcdyeBEQs","annotations":[]},"7Ql7Hw6c1-o":{"duration":175.241,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7Ql7Hw6c1-o","annotations":[{"segment":[63.67733697347894,149.78182215288612],"label":"Painting furniture"}]},"YPNLMBhuDS8":{"duration":175.07999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YPNLMBhuDS8","annotations":[{"segment":[51.34906409547581,148.58452589329173],"label":"Cleaning shoes"}]},"7c9ku5U1Xj4":{"duration":127.432,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=7c9ku5U1Xj4","annotations":[]},"oWsEp7hsmZg":{"duration":98.058,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oWsEp7hsmZg","annotations":[{"segment":[82.82506066101901,86.33800958313739],"label":"Doing crunches"}]},"JmtcnoHa66U":{"duration":94.25,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=JmtcnoHa66U","annotations":[{"segment":[4.03102973,90.06176521060843],"label":"Using the balance beam"}]},"DlgFhgRivlE":{"duration":132.8,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DlgFhgRivlE","annotations":[]},"lM4FQ_FqEhQ":{"duration":161.87,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lM4FQ_FqEhQ","annotations":[{"segment":[34.34290468761608,151.51281435257408],"label":"Skateboarding"}]},"pMVo7PaXD1c":{"duration":237.32999999999998,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pMVo7PaXD1c","annotations":[{"segment":[28.509304410271646,176.97983776766034],"label":"Paintball"}]},"iDofsMwceTo":{"duration":126.062,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iDofsMwceTo","annotations":[{"segment":[30.458588143525738,31.4411232449298],"label":"Plastering"},{"segment":[52.07436037441497,53.64641653666146],"label":"Plastering"},{"segment":[86.65959594383776,88.24566146645866],"label":"Plastering"}]},"5vlGKyxl22M":{"duration":90.907,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5vlGKyxl22M","annotations":[{"segment":[9.826918712011937,29.509075353756486],"label":"Running a marathon"},{"segment":[35.59780007026739,40.97853354067237],"label":"Running a marathon"},{"segment":[72.41334486777518,81.19243631948858],"label":"Running a marathon"}]},"Z_1Zoc6lINU":{"duration":210.61,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=Z_1Zoc6lINU","annotations":[{"segment":[105.46677196099843,196.80559627613104],"label":"Polishing shoes"}]},"aBdrTqSnWbw":{"duration":121.21,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=aBdrTqSnWbw","annotations":[{"segment":[10.40007046801872,17.396481510140408],"label":"Paintball"},{"segment":[18.90921903276131,20.421956555382216],"label":"Paintball"},{"segment":[38.76389901716069,42.16755844305772],"label":"Paintball"},{"segment":[54.08036643369734,57.29493366926677],"label":"Paintball"},{"segment":[77.52779803432138,81.49873403120124],"label":"Paintball"},{"segment":[102.4879671575663,110.42983915132605],"label":"Paintball"}]},"LLD4SapVrgQ":{"duration":206.496,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LLD4SapVrgQ","annotations":[{"segment":[12.885806969693753,34.147388469688444],"label":"BMX"},{"segment":[36.08025951514251,111.14008511360862],"label":"BMX"},{"segment":[113.0729561590627,153.34110293935566],"label":"BMX"},{"segment":[174.28053868330736,185.23347457098282],"label":"BMX"}]},"mbVZlDl_kMU":{"duration":64.74,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mbVZlDl_kMU","annotations":[]},"HEw5wIWVpWE":{"duration":236.03,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HEw5wIWVpWE","annotations":[{"segment":[100.15661186991699,101.99772605870221],"label":"Vacuuming floor"}]},"EPsSfOEd3BU":{"duration":46.67,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EPsSfOEd3BU","annotations":[]},"o2hmponBzIE":{"duration":137.625,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o2hmponBzIE","annotations":[{"segment":[15.565972815912637,113.04116810452419],"label":"Baton twirling"}]},"EQNJfWiAS28":{"duration":157.82999999999998,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=EQNJfWiAS28","annotations":[{"segment":[3.017065754618695,155.68058431069346],"label":"Tango"}]},"qpeovDXc1cg":{"duration":69.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qpeovDXc1cg","annotations":[{"segment":[0.01,8.256967238689548],"label":"Drinking coffee"}]},"DWvFgDSAUzE":{"duration":19.83,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=DWvFgDSAUzE","annotations":[{"segment":[0.2,18.842973464106773],"label":"Knitting"}]},"GPl7nFwqSgk":{"duration":202.061,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=GPl7nFwqSgk","annotations":[{"segment":[0.01,185.0203853354134],"label":"Drum corps"}]},"-K_cgWfJxiU":{"duration":101.03,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=-K_cgWfJxiU","annotations":[{"segment":[28.756134945397818,83.57319890795632],"label":"Kayaking"}]},"ciIaZrpHqgs":{"duration":173.779,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ciIaZrpHqgs","annotations":[{"segment":[0.01,162.12040283931358],"label":"Curling"}]},"PwbZimjxzps":{"duration":190.218,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PwbZimjxzps","annotations":[{"segment":[61.82819846030688,63.60317079449717],"label":"Bullfighting"},{"segment":[90.5235845297165,102.9483908690485],"label":"Bullfighting"},{"segment":[113.89405359655525,130.75629077136298],"label":"Bullfighting"},{"segment":[139.03949391524992,177.49722752333702],"label":"Bullfighting"}]},"jCCdvbdbwB8":{"duration":225.095,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jCCdvbdbwB8","annotations":[]},"cUEGoRjraxM":{"duration":166.65,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cUEGoRjraxM","annotations":[{"segment":[11.439287007800312,25.738395767550703],"label":"Walking the dog"},{"segment":[34.57784481903276,83.71478219344772],"label":"Walking the dog"},{"segment":[90.47436087987519,93.85415022308892],"label":"Walking the dog"},{"segment":[109.97314555226208,113.87290248673946],"label":"Walking the dog"},{"segment":[119.85252978627145,123.49230292511699],"label":"Walking the dog"},{"segment":[129.47193022464896,133.37168715912637],"label":"Walking the dog"},{"segment":[149.4906824882995,153.65042321840872],"label":"Walking the dog"}]},"_yk7gjnHkjs":{"duration":230.13,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_yk7gjnHkjs","annotations":[{"segment":[17.213873600000003,189.35851014040563],"label":"Hammer throw"}]},"e1hN5VLndNI":{"duration":12.45,"subset":"testing","resolution":"360x240","url":"https://www.youtube.com/watch?v=e1hN5VLndNI","annotations":[]},"VoMTywbceKI":{"duration":168.299,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VoMTywbceKI","annotations":[]},"8onOVVuN_Is":{"duration":64.41,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=8onOVVuN_Is","annotations":[{"segment":[0.01,0.02],"label":"Long jump"}]},"10fX73-AXcg":{"duration":184.135,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=10fX73-AXcg","annotations":[]},"IKeV3kdX-_I":{"duration":35.782,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IKeV3kdX-_I","annotations":[{"segment":[0.01,35.781950113378684],"label":"Calf roping"}]},"cg0sa6wYA1U":{"duration":192.73,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=cg0sa6wYA1U","annotations":[{"segment":[3.9834929864116924,189.16133878080421],"label":"Kayaking"}]},"T98RJsOiQWc":{"duration":88.654,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=T98RJsOiQWc","annotations":[{"segment":[7.080253362758916,9.013514006580309],"label":"Hitting a pinata"},{"segment":[49.33580658685569,52.37378754635724],"label":"Hitting a pinata"},{"segment":[55.4117685058588,57.345029116450704],"label":"Hitting a pinata"}]},"QsfIM28uvHM":{"duration":26.216,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=QsfIM28uvHM","annotations":[{"segment":[0.2,25.961000000000002],"label":"Kite flying"}]},"25dJIdZj6Xs":{"duration":6.897,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=25dJIdZj6Xs","annotations":[{"segment":[0.01,6.234746745278902],"label":"Scuba diving"}]},"w6kBE7BHHeU":{"duration":186.108,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=w6kBE7BHHeU","annotations":[{"segment":[24.35057113187954,162.33714087919694],"label":"Arm wrestling"}]},"ghWwS5ev6xk":{"duration":95.47999999999999,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=ghWwS5ev6xk","annotations":[{"segment":[42.93519110764431,91.89767940717628],"label":"Kayaking"}]},"TdFvHyfq97o":{"duration":59.26,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=TdFvHyfq97o","annotations":[{"segment":[3.8487120000000004,15.623225564742588],"label":"Mowing the lawn"}]},"ZwDfAhQVKLQ":{"duration":55.821,"subset":"training","resolution":"1920x960","url":"https://www.youtube.com/watch?v=ZwDfAhQVKLQ","annotations":[{"segment":[20.537154446177844,52.49298439937598],"label":"Using the rowing machine"}]},"zVMDHCnT-d4":{"duration":61.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zVMDHCnT-d4","annotations":[{"segment":[20.936502340093604,27.371115444617786],"label":"Drinking beer"}]},"8UKLht_ZKr4":{"duration":44.91,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8UKLht_ZKr4","annotations":[]},"lnx8WSmcLF8":{"duration":97.03999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lnx8WSmcLF8","annotations":[{"segment":[12.408864274570982,76.87442745709829],"label":"Using parallel bars"}]},"EOvotFy4YX0":{"duration":44.676,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EOvotFy4YX0","annotations":[{"segment":[0,44.641],"label":"Camel ride"}]},"A-FI48u86dQ":{"duration":85.636,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=A-FI48u86dQ","annotations":[]},"w-6Oo3bgms4":{"duration":114.428,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=w-6Oo3bgms4","annotations":[{"segment":[0.01,111.8256800523414],"label":"Futsal"}]},"uvnrLngXHh8":{"duration":199.0,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uvnrLngXHh8","annotations":[{"segment":[16.44984399375975,182.81053042121684],"label":"Shaving"}]},"2D22fVcAcyo":{"duration":215.784,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2D22fVcAcyo","annotations":[{"segment":[10.433652106084244,25.242706708268333],"label":"Slacklining"},{"segment":[38.368914196567864,66.30417628705149],"label":"Slacklining"},{"segment":[74.71841185647428,91.2103135725429],"label":"Slacklining"},{"segment":[103.66338221528862,126.8866723868955],"label":"Slacklining"},{"segment":[132.27178315132608,180.0855070202808],"label":"Slacklining"}]},"-Pa52Pf_Raw":{"duration":221.1,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-Pa52Pf_Raw","annotations":[{"segment":[1.3797218773104665,85.88768686257653],"label":"Smoking a cigarette"},{"segment":[113.13719393945824,221.10043083900226],"label":"Smoking a cigarette"}]},"kIjYdddY3Xk":{"duration":59.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kIjYdddY3Xk","annotations":[{"segment":[0.6481301528861154,45.92465083307332],"label":"Cleaning shoes"}]},"wBcP3SQ3Qg4":{"duration":229.86,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=wBcP3SQ3Qg4","annotations":[{"segment":[36.39659047581904,206.72546216068645],"label":"Washing hands"}]},"sPEed6XrTZA":{"duration":124.854,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=sPEed6XrTZA","annotations":[{"segment":[49.45312636505461,118.97914352574104],"label":"Hand car wash"}]},"lHy_OZ8GygM":{"duration":204.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lHy_OZ8GygM","annotations":[{"segment":[5.584893377535102,175.36565205460218],"label":"Getting a haircut"}]},"q2JRY6-riYA":{"duration":42.052,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q2JRY6-riYA","annotations":[{"segment":[0.01,42.051336],"label":"Riding bumper cars"}]},"IiiQJFM7FAc":{"duration":71.448,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=IiiQJFM7FAc","annotations":[]},"Bg-0ibLZrgg":{"duration":212.115,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=Bg-0ibLZrgg","annotations":[{"segment":[44.011232093603745,198.21600018096723],"label":"Surfing"}]},"cwVZNpqc-mQ":{"duration":236.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cwVZNpqc-mQ","annotations":[{"segment":[7.717709714759485,208.74581835375682],"label":"Playing drums"}]},"7a7UMZtTraw":{"duration":60.14,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7a7UMZtTraw","annotations":[]},"AlLg4ZaxDoQ":{"duration":179.909,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AlLg4ZaxDoQ","annotations":[{"segment":[71.27097347893915,134.98090639625585],"label":"River tubing"}]},"rwxSphRRIL8":{"duration":88.52,"subset":"validation","resolution":"640x348","url":"https://www.youtube.com/watch?v=rwxSphRRIL8","annotations":[{"segment":[37.974225363572366,40.59789911596464],"label":"Preparing pasta"},{"segment":[42.39304431496988,47.08803945082973],"label":"Preparing pasta"},{"segment":[68.21551756219908,70.97727940682253],"label":"Preparing pasta"},{"segment":[75.67227454268239,83.81947198432155],"label":"Preparing pasta"}]},"n33Uv--Lbl4":{"duration":37.18,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=n33Uv--Lbl4","annotations":[{"segment":[0.01,35.2578159126365],"label":"Brushing teeth"}]},"PRT0Z9HPF4U":{"duration":141.247,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PRT0Z9HPF4U","annotations":[{"segment":[0.01,141.246983],"label":"Playing pool"}]},"rXgC48CLncg":{"duration":201.97,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=rXgC48CLncg","annotations":[{"segment":[0.9452436131045242,167.3081195195008],"label":"Getting a haircut"}]},"VTtRk9vvZoY":{"duration":203.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VTtRk9vvZoY","annotations":[{"segment":[16.848228669999997,142.63624387999997],"label":"Starting a campfire"}]},"6dc-fQCzOiw":{"duration":174.289,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=6dc-fQCzOiw","annotations":[{"segment":[0.01,140.30126778159126],"label":"Longboarding"}]},"feWO_gqAcGk":{"duration":68.221,"subset":"validation","resolution":"240x184","url":"https://www.youtube.com/watch?v=feWO_gqAcGk","annotations":[{"segment":[5.623127491730234,7.108482351144191],"label":"Bullfighting"},{"segment":[17.187676040024616,19.627901880490402],"label":"Bullfighting"},{"segment":[45.30332042619281,46.682578474404316],"label":"Bullfighting"}]},"oS7Twj3Pou0":{"duration":37.92,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=oS7Twj3Pou0","annotations":[{"segment":[26.50132048673947,28.867509815912634],"label":"Hopscotch"}]},"l8gg9TYh_B0":{"duration":139.76,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l8gg9TYh_B0","annotations":[]},"j_1kNiGcepU":{"duration":91.49,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j_1kNiGcepU","annotations":[]},"U2u9JboK97A":{"duration":88.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U2u9JboK97A","annotations":[{"segment":[4.389807575663027,64.0637543073323],"label":"Layup drill in basketball"}]},"8j3UI3vaC2g":{"duration":79.807,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8j3UI3vaC2g","annotations":[]},"k-071peyck4":{"duration":40.38,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=k-071peyck4","annotations":[{"segment":[2.11295631825273,38.56933697347894],"label":"Hand washing clothes"}]},"W04glwJ_IYI":{"duration":96.387,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=W04glwJ_IYI","annotations":[{"segment":[15.951494045253307,83.01543766147796],"label":"Grooming dog"}]},"BUqr7or97JA":{"duration":52.918,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=BUqr7or97JA","annotations":[{"segment":[0.2,52.71809523809524],"label":"Arm wrestling"}]},"tAleUKVZCD8":{"duration":136.464,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=tAleUKVZCD8","annotations":[{"segment":[0.2,6.38675538858289],"label":"Playing ice hockey"},{"segment":[14.050861854882358,23.630994937756693],"label":"Playing ice hockey"},{"segment":[26.398588939475946,56.41633926581552],"label":"Playing ice hockey"},{"segment":[59.396825113820874,70.25430927441178],"label":"Playing ice hockey"},{"segment":[73.87347066127542,97.50446559903212],"label":"Playing ice hockey"},{"segment":[103.03965360247062,126.24486348829953],"label":"Playing ice hockey"}]},"j_e8c6tOQG8":{"duration":116.89,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=j_e8c6tOQG8","annotations":[{"segment":[14.855631825273012,86.12620904836193],"label":"Kayaking"},{"segment":[99.61475819032762,113.46786271450858],"label":"Kayaking"}]},"PqP3JIJEU5I":{"duration":54.103,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=PqP3JIJEU5I","annotations":[{"segment":[0.01,54.061],"label":"Fixing the roof"}]},"8itO1pQI9ww":{"duration":12.8,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=8itO1pQI9ww","annotations":[{"segment":[0.16486430920588413,9.332043810763516],"label":"Drinking beer"}]},"9Zy5ylJYiA4":{"duration":197.02,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9Zy5ylJYiA4","annotations":[{"segment":[10.906419656786271,34.56259750390016],"label":"Shot put"},{"segment":[60.06211388455539,85.56163026521061],"label":"Shot put"},{"segment":[95.39276911076443,119.66339313572544],"label":"Shot put"},{"segment":[161.4457332293292,184.48746489859593],"label":"Shot put"}]},"qZTAv1s_eBQ":{"duration":84.266,"subset":"training","resolution":"660x480","url":"https://www.youtube.com/watch?v=qZTAv1s_eBQ","annotations":[{"segment":[8.939211620165487,84.265214],"label":"Powerbocking"}]},"jFZRNe7xFY8":{"duration":21.36,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=jFZRNe7xFY8","annotations":[{"segment":[0.01,21.195723950078],"label":"Using the balance beam"}]},"7PSicg5Q2ZI":{"duration":229.367,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=7PSicg5Q2ZI","annotations":[{"segment":[31.4887216349454,229.3667120181406],"label":"Making a lemonade"}]},"8BAn48zfc68":{"duration":8.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8BAn48zfc68","annotations":[{"segment":[0.82036254,8.753333],"label":"Starting a campfire"}]},"sb0r71oGs3M":{"duration":62.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sb0r71oGs3M","annotations":[{"segment":[17.27060881901679,38.54379021241137],"label":"Waterskiing"},{"segment":[50.20671700368863,57.0672621750282],"label":"Waterskiing"}]},"OTwG3OCsPoY":{"duration":102.192,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OTwG3OCsPoY","annotations":[{"segment":[0.2,101.901],"label":"Using the rowing machine"}]},"6uRQHl4dKlk":{"duration":151.05,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=6uRQHl4dKlk","annotations":[]},"C-6kvesNmU4":{"duration":39.96,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C-6kvesNmU4","annotations":[{"segment":[8.100358814352575,17.010753510140407],"label":"Kayaking"}]},"sPK-sbHIb3w":{"duration":214.07,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sPK-sbHIb3w","annotations":[]},"9af8-f0Xir8":{"duration":43.422,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=9af8-f0Xir8","annotations":[]},"hFi6S_guB7I":{"duration":201.02,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hFi6S_guB7I","annotations":[]},"uIl5Tj74sLw":{"duration":38.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uIl5Tj74sLw","annotations":[{"segment":[0.6771977896129573,26.50901511048523],"label":"Washing face"}]},"jtyWcZGp4VA":{"duration":217.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jtyWcZGp4VA","annotations":[{"segment":[74.86452574102964,217.161],"label":"Grooming dog"}]},"sByOncVAMqg":{"duration":99.71000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sByOncVAMqg","annotations":[{"segment":[6.63643136,95.97273580467028],"label":"Using the balance beam"}]},"Z6l2Yu9Q0mU":{"duration":210.002,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z6l2Yu9Q0mU","annotations":[{"segment":[30.174331860907724,111.27448224323784],"label":"Hula hoop"},{"segment":[144.95708660460855,172.4263950158235],"label":"Hula hoop"}]},"TKHKTbbSynM":{"duration":121.84,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TKHKTbbSynM","annotations":[]},"TNwMZhQ7SFc":{"duration":163.1,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=TNwMZhQ7SFc","annotations":[{"segment":[83.45679150703442,85.74676444472745],"label":"Washing face"},{"segment":[86.76453019481322,95.16109763302097],"label":"Washing face"},{"segment":[98.46883604836194,99.99548466926677],"label":"Washing face"}]},"aYSJn94g_Io":{"duration":81.572,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=aYSJn94g_Io","annotations":[{"segment":[29.71449626964671,33.02317652106085],"label":"Playing ten pins"},{"segment":[42.56744719188767,46.63966934477379],"label":"Playing ten pins"},{"segment":[52.36623174726989,55.54765530421217],"label":"Playing ten pins"},{"segment":[62.80130101404057,64.32838432137287],"label":"Playing ten pins"}]},"b02GzZM5iGI":{"duration":84.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=b02GzZM5iGI","annotations":[{"segment":[0.01,10.335705689452068],"label":"Volleyball"},{"segment":[23.719119098283933,30.609589450858035],"label":"Volleyball"},{"segment":[37.10253266770671,60.55663367550703],"label":"Volleyball"},{"segment":[71.42237538533541,84.738298],"label":"Volleyball"}]},"FSU0YVLCoSc":{"duration":163.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FSU0YVLCoSc","annotations":[{"segment":[73.64751709000001,136.74636129],"label":"Fixing bicycle"},{"segment":[73.64751709000001,136.74636129],"label":"Fixing bicycle"}]},"poERMbR_nY4":{"duration":186.619,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=poERMbR_nY4","annotations":[{"segment":[35.11791982733996,170.9459072587044],"label":"Snow tubing"}]},"64vbnwtL7I8":{"duration":127.432,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=64vbnwtL7I8","annotations":[{"segment":[0.5964014560582424,4.7712116068642745],"label":"Playing ten pins"},{"segment":[44.33250784711389,50.49532283931358],"label":"Playing ten pins"},{"segment":[56.459337347893914,69.77896975039002],"label":"Playing ten pins"},{"segment":[72.76097700468019,78.52619102964118],"label":"Playing ten pins"},{"segment":[88.46621521060842,101.38824664586583],"label":"Playing ten pins"},{"segment":[110.53306889235569,115.70188146645867],"label":"Playing ten pins"}]},"8AUTKSqcmiM":{"duration":98.8,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=8AUTKSqcmiM","annotations":[]},"ennVaOEePHk":{"duration":237.587,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ennVaOEePHk","annotations":[{"segment":[38.16961466458659,93.01527457098284],"label":"BMX"},{"segment":[97.83279875195008,217.17745085803432],"label":"BMX"}]},"mSonugqhYuE":{"duration":194.328,"subset":"validation","resolution":"384x288","url":"https://www.youtube.com/watch?v=mSonugqhYuE","annotations":[{"segment":[21.457625438951585,82.50609403323222],"label":"Assembling bicycle"}]},"OZftVCeblMk":{"duration":6.67,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=OZftVCeblMk","annotations":[{"segment":[0.22872198408736352,2.1520659411856475],"label":"Washing dishes"}]},"0h4UT-2XTAw":{"duration":170.16,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=0h4UT-2XTAw","annotations":[{"segment":[25.99690795631825,124.41377379095162],"label":"Removing curlers"}]},"bgYVKA4VCbw":{"duration":64.32,"subset":"testing","resolution":"640x356","url":"https://www.youtube.com/watch?v=bgYVKA4VCbw","annotations":[]},"jETKNHeojN8":{"duration":111.03999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jETKNHeojN8","annotations":[{"segment":[24.078404703535078,105.66781920256402],"label":"Ballet"}]},"le1aEgEms9Y":{"duration":144.92000000000002,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=le1aEgEms9Y","annotations":[{"segment":[33.46892779520622,138.65698608809763],"label":"Brushing teeth"}]},"gaNiULmtQg8":{"duration":109.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gaNiULmtQg8","annotations":[{"segment":[23.79139737018052,105.26409627813683],"label":"Using the balance beam"}]},"a9qztQPPsJg":{"duration":113.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a9qztQPPsJg","annotations":[{"segment":[30.442784905954056,40.8853681004383],"label":"Cleaning shoes"},{"segment":[43.18627626193483,53.45186652091933],"label":"Cleaning shoes"},{"segment":[77.69989868438275,98.58506450390016],"label":"Cleaning shoes"}]},"wC-k0Zfx-cM":{"duration":87.702,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wC-k0Zfx-cM","annotations":[{"segment":[7.251472154446178,30.100450452418098],"label":"Sumo"},{"segment":[71.00969902184087,87.501767],"label":"Sumo"}]},"O_StqLLpAVI":{"duration":191.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O_StqLLpAVI","annotations":[{"segment":[18.329848468594122,188.19442172432414],"label":"Fixing bicycle"}]},"z3MP0yJBtGg":{"duration":17.83,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=z3MP0yJBtGg","annotations":[]},"GLL1vOrV5Qo":{"duration":228.137,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=GLL1vOrV5Qo","annotations":[{"segment":[12.934817254373819,221.17115223568223],"label":"Hanging wallpaper"}]},"elgmPvU19K8":{"duration":213.111,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=elgmPvU19K8","annotations":[{"segment":[22.871505823331148,26.18621754524127],"label":"Wakeboarding"},{"segment":[36.46182388316265,81.87337447333132],"label":"Wakeboarding"}]},"OrDUCd94ONQ":{"duration":180.42,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OrDUCd94ONQ","annotations":[]},"0T7yANM5I5Y":{"duration":22.22,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=0T7yANM5I5Y","annotations":[{"segment":[12.44542387675507,21.66682429797192],"label":"Clean and jerk"}]},"X-xQLFHZIOU":{"duration":138.995,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=X-xQLFHZIOU","annotations":[{"segment":[0.009999999999999998,138.994647],"label":"Bullfighting"}]},"Y5puaQ9oNVw":{"duration":48.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y5puaQ9oNVw","annotations":[{"segment":[1.4432886115444619,46.0333104524181],"label":"Tumbling"}]},"xynscQyItDQ":{"duration":67.571,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=xynscQyItDQ","annotations":[{"segment":[36.156837723868954,64.30224784711388],"label":"Putting on shoes"}]},"LWt2FuU0uC4":{"duration":153.23,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=LWt2FuU0uC4","annotations":[]},"V4U5SaPDL0E":{"duration":194.1,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=V4U5SaPDL0E","annotations":[{"segment":[0.01,190.7647871629151],"label":"Cumbia"}]},"qY8t0S0WWoA":{"duration":45.651,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qY8t0S0WWoA","annotations":[{"segment":[0.01,45.650429],"label":"Painting fence"}]},"dUFgilgnJmk":{"duration":225.884,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dUFgilgnJmk","annotations":[]},"Z2wZr0Beq9c":{"duration":132.725,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=Z2wZr0Beq9c","annotations":[{"segment":[0.2,6.211790675708662],"label":"Playing blackjack"},{"segment":[34.99308747315879,131.6899605678627],"label":"Playing blackjack"}]},"_Z71mu4aQy4":{"duration":47.07,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=_Z71mu4aQy4","annotations":[{"segment":[25.148810015600624,40.0578726],"label":"Cricket"}]},"Lfj9JC7uWmE":{"duration":81.061,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Lfj9JC7uWmE","annotations":[{"segment":[4.4228315132605305,55.222210608424334],"label":"Tug of war"}]},"4ehqHRSGpI4":{"duration":146.379,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=4ehqHRSGpI4","annotations":[]},"qcaaF0SqVss":{"duration":48.925,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qcaaF0SqVss","annotations":[{"segment":[5.037406263597482,6.563892845553822],"label":"Doing a powerbomb"},{"segment":[17.783570151326053,26.713517394695785],"label":"Doing a powerbomb"}]},"JDfFAljt4JM":{"duration":233.64,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JDfFAljt4JM","annotations":[{"segment":[10.383826053042121,206.7938789],"label":"Ballet"}]},"i7NKJhCdKx0":{"duration":195.118,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i7NKJhCdKx0","annotations":[{"segment":[8.216297971918877,189.9072449297972],"label":"Snowboarding"}]},"s9MNW35YCMw":{"duration":201.689,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=s9MNW35YCMw","annotations":[{"segment":[42.162622815116684,101.63079977076634],"label":"Gargling mouthwash"}]},"qakxXmFvFy0":{"duration":58.422,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qakxXmFvFy0","annotations":[{"segment":[0.2,55.4237503900156],"label":"Building sandcastles"}]},"J8VMY1SPJaY":{"duration":225.37,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J8VMY1SPJaY","annotations":[{"segment":[25.66649021335003,174.74309090458857],"label":"Kayaking"},{"segment":[190.56489840249608,213.06702662714508],"label":"Kayaking"}]},"8-QvFzI1c0U":{"duration":113.801,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8-QvFzI1c0U","annotations":[]},"IDIALY6Ayk8":{"duration":166.69,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=IDIALY6Ayk8","annotations":[]},"39FOmbBNcI0":{"duration":54.149,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=39FOmbBNcI0","annotations":[]},"9pavfCL1HvU":{"duration":28.86,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9pavfCL1HvU","annotations":[]},"3G1T_V102GA":{"duration":57.31,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3G1T_V102GA","annotations":[{"segment":[40.549929797191886,41.53567394695788],"label":"Shaving"},{"segment":[42.342191887675504,43.14870982839314],"label":"Shaving"}]},"dgXPoqHrKRM":{"duration":152.997,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dgXPoqHrKRM","annotations":[{"segment":[29.73192068429238,143.20765474339035],"label":"Snow tubing"}]},"rBsE1WGXbz0":{"duration":65.62,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=rBsE1WGXbz0","annotations":[{"segment":[5.732756780031202,7.677799258970358],"label":"Shot put"}]},"hrN0hOmQrD4":{"duration":99.521,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hrN0hOmQrD4","annotations":[{"segment":[15.985246489859595,69.69726833073324],"label":"River tubing"}]},"lkC_md7KKq0":{"duration":32.95,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lkC_md7KKq0","annotations":[{"segment":[12.859736349453978,22.300366614664586],"label":"Sharpening knives"}]},"IRj0d3cLi6g":{"duration":30.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IRj0d3cLi6g","annotations":[{"segment":[0.01,30.905758],"label":"Playing bagpipes"}]},"gUGfWzgDyKA":{"duration":114.243,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gUGfWzgDyKA","annotations":[]},"w-KZEq6JhnQ":{"duration":116.332,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w-KZEq6JhnQ","annotations":[{"segment":[0.01,110.52444670670826],"label":"Knitting"}]},"7c5vvEn3uLk":{"duration":145.311,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7c5vvEn3uLk","annotations":[{"segment":[35.13747859594383,37.404412698907954],"label":"Calf roping"},{"segment":[42.391667725429016,55.31319211232449],"label":"Calf roping"},{"segment":[58.94028667706708,62.56738124180967],"label":"Calf roping"},{"segment":[72.31519788455537,112.89331832761309],"label":"Calf roping"}]},"qL7kMgxpFJY":{"duration":54.08,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=qL7kMgxpFJY","annotations":[{"segment":[10.817454758190326,53.961],"label":"Plastering"}]},"sLq6p6NKApk":{"duration":74.885,"subset":"training","resolution":"647x480","url":"https://www.youtube.com/watch?v=sLq6p6NKApk","annotations":[{"segment":[42.05673525117004,45.32781465959438],"label":"Croquet"}]},"JjGhHZgdWVI":{"duration":112.919,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JjGhHZgdWVI","annotations":[{"segment":[15.453845389410716,103.61102318351647],"label":"Welding"}]},"cK0-WcGXacQ":{"duration":13.38,"subset":"testing","resolution":"426x236","url":"https://www.youtube.com/watch?v=cK0-WcGXacQ","annotations":[]},"LFiGhD1OEws":{"duration":129.568,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LFiGhD1OEws","annotations":[{"segment":[6.266127543060906,8.893858448215479],"label":"Hurling"},{"segment":[17.383450603330253,20.819714094686237],"label":"Hurling"},{"segment":[28.298640517049254,29.915705689452068],"label":"Hurling"},{"segment":[51.139686077239006,53.565283835843225],"label":"Hurling"},{"segment":[62.054875990958,82.26819064599319],"label":"Hurling"},{"segment":[107.33269921216848,110.97109579563181],"label":"Hurling"}]},"43dxxkLDw6M":{"duration":131.634,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=43dxxkLDw6M","annotations":[]},"S-DOW63629o":{"duration":84.428,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=S-DOW63629o","annotations":[{"segment":[0.2,82.6696848673947],"label":"Playing congas"}]},"3AWvyAJv20g":{"duration":135.91,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3AWvyAJv20g","annotations":[{"segment":[72.92209672386895,105.56745397815912],"label":"Ironing clothes"}]},"ku65ME0vW8s":{"duration":32.764,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ku65ME0vW8s","annotations":[{"segment":[1.2738396036594009,27.413042883473803],"label":"Using the monkey bar"}]},"ICl9CT-9fKY":{"duration":126.944,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ICl9CT-9fKY","annotations":[{"segment":[69.40344936105284,120.6156800623053],"label":"Rope skipping"}]},"r-1TiwT38ck":{"duration":39.27,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=r-1TiwT38ck","annotations":[]},"2ErgL7-MXHw":{"duration":11.052,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=2ErgL7-MXHw","annotations":[{"segment":[0.20000000000000004,10.488003120124805],"label":"Trimming branches or hedges"}]},"mdJHGKABLhU":{"duration":60.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mdJHGKABLhU","annotations":[{"segment":[25.231577453978158,27.585269380655227],"label":"Shaving"},{"segment":[42.931340742589704,47.45042924180967],"label":"Shaving"}]},"aXnllTmgeqg":{"duration":88.051,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aXnllTmgeqg","annotations":[{"segment":[7.950246105919003,85.12246261682245],"label":"Playing drums"}]},"OjV4UScwkU0":{"duration":161.077,"subset":"validation","resolution":"160x120","url":"https://www.youtube.com/watch?v=OjV4UScwkU0","annotations":[{"segment":[23.32547196261682,51.16555140186915],"label":"Welding"},{"segment":[90.29214953271027,121.89440186915887],"label":"Welding"}]},"ErEr4Sxdprw":{"duration":142.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ErEr4Sxdprw","annotations":[{"segment":[7.983600878729027,133.947081409787],"label":"Clean and jerk"}]},"aRurjtzwmS8":{"duration":65.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aRurjtzwmS8","annotations":[{"segment":[12.756528737661473,29.288991999793698],"label":"Sumo"},{"segment":[38.47369273518588,44.39272262632118],"label":"Sumo"}]},"ueDoskgg2yw":{"duration":15.09,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ueDoskgg2yw","annotations":[{"segment":[0.02349609984399376,9.492424336973478],"label":"Shaving"}]},"qrvPTE0kb5U":{"duration":218.384,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qrvPTE0kb5U","annotations":[{"segment":[117.02775637556114,123.50090738323411],"label":"Croquet"}]},"DIATqGGmG2g":{"duration":187.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DIATqGGmG2g","annotations":[{"segment":[42.98882995319813,47.66788627463466],"label":"Tumbling"},{"segment":[69.01608074118883,73.11025468018721],"label":"Tumbling"},{"segment":[86.8549825600624,92.99624395319812],"label":"Tumbling"},{"segment":[106.74097183307332,113.1746742449298],"label":"Tumbling"},{"segment":[125.16475601248051,133.93798657410295],"label":"Tumbling"},{"segment":[153.53153482839312,158.50303214664586],"label":"Tumbling"}]},"X4J9UBXb0FM":{"duration":203.73,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=X4J9UBXb0FM","annotations":[]},"pxW19lG4iaI":{"duration":47.81,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=pxW19lG4iaI","annotations":[]},"Wir90-9HmAI":{"duration":122.35,"subset":"training","resolution":"480x272","url":"https://www.youtube.com/watch?v=Wir90-9HmAI","annotations":[{"segment":[41.131857995408254,106.59927002522278],"label":"Breakdancing"}]},"DfVEiJ4ioNU":{"duration":55.125,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DfVEiJ4ioNU","annotations":[]},"WreRcthWXv8":{"duration":155.667,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WreRcthWXv8","annotations":[{"segment":[0.01,145.2947470988455],"label":"Rollerblading"}]},"GoVqtUSnshE":{"duration":224.26,"subset":"training","resolution":"352x288","url":"https://www.youtube.com/watch?v=GoVqtUSnshE","annotations":[{"segment":[112.12916099773243,114.92801681046832],"label":"Drinking beer"},{"segment":[148.51428656329927,152.01285632921918],"label":"Drinking beer"}]},"HlECGLpmqLc":{"duration":44.165,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HlECGLpmqLc","annotations":[]},"T8nteSe3U9Q":{"duration":145.636,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=T8nteSe3U9Q","annotations":[{"segment":[4.089609940717629,20.448049703588143],"label":"Applying sunscreen"},{"segment":[24.310459092043686,79.97459439625587],"label":"Applying sunscreen"},{"segment":[104.28505348829954,140.40994129797193],"label":"Applying sunscreen"}]},"p9JNte3VMKQ":{"duration":218.082,"subset":"training","resolution":"3414x1080","url":"https://www.youtube.com/watch?v=p9JNte3VMKQ","annotations":[{"segment":[0.01,218.041],"label":"Sharpening knives"}]},"MiOJxYa5Nt4":{"duration":17.28,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=MiOJxYa5Nt4","annotations":[{"segment":[0.01,12.653020019344773],"label":"Hammer throw"}]},"Dzj5X11anrk":{"duration":88.492,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Dzj5X11anrk","annotations":[{"segment":[28.290959438377534,77.00661154446178],"label":"Spread mulch"}]},"_CFadTE9L4I":{"duration":125.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_CFadTE9L4I","annotations":[{"segment":[0,125.364036],"label":"Hand washing clothes"}]},"d3crFny-e3E":{"duration":81.28999999999999,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=d3crFny-e3E","annotations":[{"segment":[14.608352100000001,17.79097305],"label":"Rock climbing"},{"segment":[48.92754740212496,59.547480590305355],"label":"Rock climbing"},{"segment":[54.616797324364455,59.67390836635512],"label":"Rock climbing"},{"segment":[67.89171380958996,72.3166859713318],"label":"Rock climbing"}]},"GChGC-VJZHs":{"duration":103.35,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GChGC-VJZHs","annotations":[]},"894d9_pty3o":{"duration":60.349,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=894d9_pty3o","annotations":[{"segment":[6.773279251170047,17.12702340093604],"label":"Skiing"},{"segment":[23.14971294851794,24.655385335413413],"label":"Skiing"},{"segment":[29.64292511700468,30.301656786271447],"label":"Skiing"},{"segment":[34.34815132605304,35.94792823712948],"label":"Skiing"},{"segment":[39.712109204368176,41.02957254290171],"label":"Skiing"},{"segment":[50.8164430577223,52.51032449297972],"label":"Skiing"}]},"_3lgJ6yp7o8":{"duration":22.9,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_3lgJ6yp7o8","annotations":[{"segment":[0.21245631999999998,11.17955625897036],"label":"Kayaking"}]},"XFySa1Gc-yM":{"duration":79.13,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=XFySa1Gc-yM","annotations":[{"segment":[4.920744623661783,18.206760175761314],"label":"Removing curlers"}]},"I9wra8bj2sg":{"duration":23.081,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=I9wra8bj2sg","annotations":[{"segment":[2.0524119828393133,15.483107940717627],"label":"Calf roping"}]},"xI_XIVYtERk":{"duration":123.81,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=xI_XIVYtERk","annotations":[{"segment":[11.778900156006241,116.24422776911078],"label":"Smoking a cigarette"}]},"FRX4LTw9650":{"duration":162.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FRX4LTw9650","annotations":[{"segment":[41.528234789391576,47.116131825273015],"label":"Brushing hair"},{"segment":[61.0858744149766,93.34328003120126],"label":"Brushing hair"},{"segment":[105.5350553822153,118.74281201248051],"label":"Brushing hair"},{"segment":[138.04645631825275,150.74622230889236],"label":"Brushing hair"}]},"Q6KyDc24uSk":{"duration":224.863,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Q6KyDc24uSk","annotations":[{"segment":[80.6837269422777,88.40130082371294],"label":"Playing pool"},{"segment":[104.5380462121685,112.25562009360375],"label":"Playing pool"},{"segment":[143.8275132449298,146.9847025600624],"label":"Playing pool"},{"segment":[165.226240825273,176.45180283463338],"label":"Playing pool"}]},"rtJTJ10ppRc":{"duration":20.48,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rtJTJ10ppRc","annotations":[{"segment":[0.010000000000000002,20.479999],"label":"Playing beach volleyball"}]},"rtePSWZ-SWA":{"duration":83.732,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rtePSWZ-SWA","annotations":[]},"Mzojo2EeWu8":{"duration":161.797,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Mzojo2EeWu8","annotations":[{"segment":[10.086801872074883,20.677943837753507],"label":"Beach soccer"},{"segment":[25.973514820592825,31.016915756630265],"label":"Beach soccer"},{"segment":[37.82550702028081,57.494770670826824],"label":"Beach soccer"},{"segment":[60.77298127925116,62.286001560062395],"label":"Beach soccer"},{"segment":[63.04251170046801,79.68573478939157],"label":"Beach soccer"},{"segment":[82.96394539781592,87.25083619344774],"label":"Beach soccer"},{"segment":[92.79857722308893,117.52578159126364],"label":"Beach soccer"},{"segment":[140.98049765990638,151.82515132605306],"label":"Beach soccer"}]},"nHafujMomWg":{"duration":120.048,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=nHafujMomWg","annotations":[{"segment":[8.240366951638066,102.81730582839313],"label":"Peeling potatoes"}]},"87hsTxVtn-A":{"duration":235.89,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=87hsTxVtn-A","annotations":[{"segment":[3.680048861154446,231.84307825273012],"label":"Belly dance"}]},"ZVKUKjVYwF8":{"duration":116.379,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZVKUKjVYwF8","annotations":[{"segment":[0,116.378389],"label":"Installing carpet"}]},"LKf1eTzmK14":{"duration":196.325,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LKf1eTzmK14","annotations":[{"segment":[0.01,7.682491900653917],"label":"Running a marathon"},{"segment":[12.276674000687764,24.22154751706842],"label":"Running a marathon"},{"segment":[28.203172022528637,46.57990050926811],"label":"Running a marathon"},{"segment":[97.11590384780165,108.14194093984534],"label":"Running a marathon"},{"segment":[115.1863535264288,155.61515619725563],"label":"Running a marathon"}]},"e8MK2naV6E8":{"duration":49.32,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=e8MK2naV6E8","annotations":[{"segment":[0.6924690140405616,2.1543480436817473],"label":"Hitting a pinata"},{"segment":[8.232687166926677,45.54907292355694],"label":"Hitting a pinata"}]},"vpvhtd7CqUA":{"duration":215.064,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vpvhtd7CqUA","annotations":[{"segment":[142.14909051185276,149.8418650980999],"label":"Bullfighting"}]},"JH31FxBZ5Xw":{"duration":138.299,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JH31FxBZ5Xw","annotations":[{"segment":[0.01,131.49085468882566],"label":"Doing a powerbomb"}]},"6YIZ00dNpMU":{"duration":179.536,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=6YIZ00dNpMU","annotations":[{"segment":[28.75937188878312,41.04493063217348],"label":"Playing congas"},{"segment":[66.17448260729012,78.46004135068048],"label":"Playing congas"},{"segment":[92.14168631491064,99.68055190744565],"label":"Playing congas"},{"segment":[0.2,179.336688],"label":"Playing congas"},{"segment":[130.6736660100895,155.2447834968702],"label":"Playing congas"},{"segment":[172.27703539111593,179.336688],"label":"Playing congas"}]},"9DVsv84awMg":{"duration":213.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9DVsv84awMg","annotations":[{"segment":[29.977649718233625,39.970199624311505],"label":"Paintball"},{"segment":[46.964984558566016,164.87707345028494],"label":"Paintball"},{"segment":[171.87185838453945,201.8495081027731],"label":"Paintball"}]},"jGo19SQpsng":{"duration":231.132,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jGo19SQpsng","annotations":[]},"GldxuTsExZM":{"duration":228.346,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GldxuTsExZM","annotations":[{"segment":[53.078080393889245,91.48850633557043],"label":"Carving jack-o-lanterns"},{"segment":[100.37873219226825,153.36447829818724],"label":"Carving jack-o-lanterns"}]},"efkbBgZ2Mzo":{"duration":60.256,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=efkbBgZ2Mzo","annotations":[{"segment":[7.54605422590576,53.34797452881405],"label":"Elliptical trainer"}]},"2fndjkCHsEY":{"duration":193.144,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2fndjkCHsEY","annotations":[{"segment":[0.19999999999999998,192.901],"label":"Table soccer"}]},"l-PDSOCk7z0":{"duration":140.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l-PDSOCk7z0","annotations":[{"segment":[56.4010227,128.0661266926677],"label":"Painting"}]},"QixK0AeqcsI":{"duration":13.4,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=QixK0AeqcsI","annotations":[{"segment":[0.18811420904836193,13.23069936973479],"label":"Using uneven bars"}]},"TSPOjp7vDeo":{"duration":225.05,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TSPOjp7vDeo","annotations":[]},"4w_46rTInXI":{"duration":221.89,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=4w_46rTInXI","annotations":[{"segment":[0.01,221.88988662131518],"label":"Beer pong"}]},"sNeT1WZ9UwA":{"duration":99.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sNeT1WZ9UwA","annotations":[{"segment":[14.168316692667709,60.098574102964115],"label":"Horseback riding"}]},"b8eqn-GTdcc":{"duration":178.56,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b8eqn-GTdcc","annotations":[{"segment":[100.5626886895476,155.99752262090485],"label":"Polishing forniture"}]},"EjraNJWCZVs":{"duration":183.229,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EjraNJWCZVs","annotations":[]},"sEcXR5b8HyY":{"duration":193.59,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sEcXR5b8HyY","annotations":[]},"CBvXpTgK3cA":{"duration":66.13,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=CBvXpTgK3cA","annotations":[]},"WwcbpTANbeU":{"duration":77.83,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WwcbpTANbeU","annotations":[{"segment":[9.913589210000001,64.0876922],"label":"Bungee jumping"}]},"X7-1DRvX-LU":{"duration":228.35,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=X7-1DRvX-LU","annotations":[{"segment":[12.64366742823713,218.8601023845554],"label":"Wrapping presents"}]},"w9M6GUCtoQQ":{"duration":232.71,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w9M6GUCtoQQ","annotations":[{"segment":[101.6663448,203.365269],"label":"Getting a tattoo"}]},"FK73jqIGUzE":{"duration":17.299,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FK73jqIGUzE","annotations":[{"segment":[0.01,17.298366],"label":"Welding"}]},"ckzLOLHjP44":{"duration":32.462,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ckzLOLHjP44","annotations":[{"segment":[1.468616850796481,31.094162137285494],"label":"Hurling"}]},"kkcTQHFNXAg":{"duration":33.74,"subset":"validation","resolution":"408x720","url":"https://www.youtube.com/watch?v=kkcTQHFNXAg","annotations":[{"segment":[0.01,20.23789236895476],"label":"Using parallel bars"}]},"LwJFjFdJy5c":{"duration":33.112,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LwJFjFdJy5c","annotations":[{"segment":[0.826499977005883,33.111654],"label":"Disc dog"}]},"1jjsTfZS5DY":{"duration":172.9,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1jjsTfZS5DY","annotations":[{"segment":[83.07619047619048,108.1608843537415],"label":"Wrapping presents"}]},"pOGc06VJF4k":{"duration":92.253,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=pOGc06VJF4k","annotations":[]},"shpZ47Mvxfg":{"duration":179.05,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=shpZ47Mvxfg","annotations":[{"segment":[30.38643213728549,46.06975195007801],"label":"Putting on makeup"},{"segment":[48.310226209048366,69.31467238689547],"label":"Putting on makeup"},{"segment":[71.27508736349453,91.15929641185649],"label":"Putting on makeup"},{"segment":[93.67982995319812,123.64617316692669],"label":"Putting on makeup"},{"segment":[126.72682527301092,146.33097503900157],"label":"Putting on makeup"},{"segment":[148.01133073322933,160.8940577223089],"label":"Putting on makeup"}]},"mV07bEBkIcM":{"duration":42.052,"subset":"testing","resolution":"656x480","url":"https://www.youtube.com/watch?v=mV07bEBkIcM","annotations":[]},"obUkL-Ya8dE":{"duration":211.81,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=obUkL-Ya8dE","annotations":[{"segment":[130.3296294851794,183.51865444617783],"label":"Fixing bicycle"},{"segment":[29.850665160000002,126.58308744000001],"label":"Fixing bicycle"}]},"OCDRmpS8N5s":{"duration":166.627,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OCDRmpS8N5s","annotations":[]},"e1_oskOyQoU":{"duration":162.099,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e1_oskOyQoU","annotations":[{"segment":[0.009999999999999998,161.962],"label":"Sharpening knives"}]},"Q3tPDohXUYc":{"duration":58.35,"subset":"validation","resolution":"632x360","url":"https://www.youtube.com/watch?v=Q3tPDohXUYc","annotations":[{"segment":[21.574460878315133,24.760562695787833],"label":"Triple jump"},{"segment":[41.78344954914196,48.88390502808113],"label":"Triple jump"}]},"EVfTWz5GRGw":{"duration":126.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EVfTWz5GRGw","annotations":[{"segment":[102.64128542901715,121.98521999063962],"label":"Washing face"}]},"eksIn1NfWJg":{"duration":45.442,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eksIn1NfWJg","annotations":[{"segment":[4.03721840873635,43.20531981279251],"label":"Putting in contact lenses"}]},"7pYrfYFoxdc":{"duration":189.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7pYrfYFoxdc","annotations":[{"segment":[33.652126036061844,58.743623074883],"label":"Slacklining"},{"segment":[66.12347521996881,130.475785925117],"label":"Slacklining"},{"segment":[148.48262515912637,155.56728321840873],"label":"Slacklining"}]},"4dyn1oKJO34":{"duration":79.74,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4dyn1oKJO34","annotations":[{"segment":[0,3.3562979719188766],"label":"Playing lacrosse"}]},"29kX1NaPGP0":{"duration":99.61,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=29kX1NaPGP0","annotations":[{"segment":[37.28461778471139,99.581],"label":"Tennis serve with ball bouncing"}]},"CBPGCziyRP8":{"duration":7.501,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CBPGCziyRP8","annotations":[]},"a4gI4LC1rJk":{"duration":82.175,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=a4gI4LC1rJk","annotations":[]},"yL7tvoBkkkI":{"duration":26.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yL7tvoBkkkI","annotations":[{"segment":[0.01,4.745892355694228],"label":"Hand washing clothes"}]},"_CJaovqcgtM":{"duration":8.174,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_CJaovqcgtM","annotations":[{"segment":[1.9050702028081123,7.315469578783151],"label":"Doing a powerbomb"}]},"4E89GQ2UnoQ":{"duration":174.08,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=4E89GQ2UnoQ","annotations":[{"segment":[0.2,14.933315132605303],"label":"Rafting"},{"segment":[23.350274570982837,37.469045241809674],"label":"Rafting"},{"segment":[47.24357878315133,100.46048361934476],"label":"Rafting"},{"segment":[108.06289859594384,128.96953978159127],"label":"Rafting"},{"segment":[133.04226209048363,159.1076848673947],"label":"Rafting"}]},"Y8WY_c3onSY":{"duration":5.016,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y8WY_c3onSY","annotations":[{"segment":[0.2,4.70320551782897],"label":"Fun sliding down"}]},"3HHAEmr0Q34":{"duration":12.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3HHAEmr0Q34","annotations":[{"segment":[0.9283416536661465,12.28571294851794],"label":"Tennis serve with ball bouncing"}]},"EvJqfGXb5Fo":{"duration":158.175,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=EvJqfGXb5Fo","annotations":[{"segment":[0.2,11.844567692911799],"label":"Raking leaves"},{"segment":[105.61406188455538,128.56291178003121],"label":"Raking leaves"}]},"XQ4owd3yQ_4":{"duration":30.349,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XQ4owd3yQ_4","annotations":[{"segment":[4.261093806552262,28.1705646099844],"label":"Surfing"}]},"TPqgLJpxyvY":{"duration":221.415,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TPqgLJpxyvY","annotations":[{"segment":[13.817223088923557,203.45860998439937],"label":"Rollerblading"}]},"9rHQ4INoBdQ":{"duration":44.47,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=9rHQ4INoBdQ","annotations":[]},"aIXUWoP-L-E":{"duration":58.375,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=aIXUWoP-L-E","annotations":[{"segment":[0.01,54.74355147599475],"label":"Cutting the grass"}]},"mLijl36SjJU":{"duration":144.173,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mLijl36SjJU","annotations":[{"segment":[10.346246687962758,56.00468315875492],"label":"Installing carpet"}]},"LPV3n9LeQ80":{"duration":125.95,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LPV3n9LeQ80","annotations":[{"segment":[57.33691334691083,60.27955430485476],"label":"Shoveling snow"}]},"0hdwFR5qWz4":{"duration":228.74,"subset":"validation","resolution":"242x144","url":"https://www.youtube.com/watch?v=0hdwFR5qWz4","annotations":[{"segment":[133.40188409905824,145.1412503774514],"label":"Table soccer"},{"segment":[155.45766316755447,164.35112246936748],"label":"Table soccer"}]},"hSYfK-W2UJQ":{"duration":96.22,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hSYfK-W2UJQ","annotations":[{"segment":[29.117757472926865,41.25810460243287],"label":"Spinning"},{"segment":[52.798928416901546,68.68629626539088],"label":"Spinning"},{"segment":[78.12878761372514,89.51973047353823],"label":"Spinning"}]},"JkcoGLKl_0A":{"duration":30.047,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JkcoGLKl_0A","annotations":[{"segment":[0.2,29.846621315192746],"label":"Swinging at the playground"}]},"x6E92fGgdH4":{"duration":134.93,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=x6E92fGgdH4","annotations":[{"segment":[12.000763140000002,126.3610631302652],"label":"Washing hands"}]},"OCT2h1kmZoA":{"duration":173.43,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=OCT2h1kmZoA","annotations":[{"segment":[14.610314241141074,160.44289410296412],"label":"Snowboarding"}]},"5mxY_p7N_Yw":{"duration":189.011,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5mxY_p7N_Yw","annotations":[{"segment":[0,36.56364028710808],"label":"Playing ice hockey"},{"segment":[45.11481422522207,163.94664515832332],"label":"Playing ice hockey"},{"segment":[170.4337426286167,189.01043083900225],"label":"Playing ice hockey"}]},"0VoNAs7Ia0A":{"duration":148.283,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0VoNAs7Ia0A","annotations":[{"segment":[0.01,148.221],"label":"Doing a powerbomb"}]},"K8ivAsscK98":{"duration":205.636,"subset":"training","resolution":"424x320","url":"https://www.youtube.com/watch?v=K8ivAsscK98","annotations":[{"segment":[16.36102104492343,87.57958324047247],"label":"Playing drums"}]},"M_LSDRzVN38":{"duration":43.584,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=M_LSDRzVN38","annotations":[{"segment":[0.2,43.383852999999995],"label":"Hitting a pinata"}]},"88s9AYMyhTk":{"duration":161.89,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=88s9AYMyhTk","annotations":[]},"gWyBBQtsDhc":{"duration":217.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gWyBBQtsDhc","annotations":[{"segment":[66.54660596000001,183.69887647347892],"label":"Brushing hair"}]},"znhZPUF6xQw":{"duration":75.47,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=znhZPUF6xQw","annotations":[{"segment":[3.588547826053042,63.00548724102964],"label":"Tumbling"}]},"SHWgBc4Ahc8":{"duration":181.697,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SHWgBc4Ahc8","annotations":[]},"-PbWrGLuHT8":{"duration":53.13,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=-PbWrGLuHT8","annotations":[{"segment":[7.459364976599063,53.127256235827666],"label":"Windsurfing"}]},"TPAG_7k1MrY":{"duration":153.461,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=TPAG_7k1MrY","annotations":[]},"Cb_R59_-0zU":{"duration":99.99000000000001,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=Cb_R59_-0zU","annotations":[{"segment":[62.45925585023401,64.79855382215288],"label":"Washing hands"},{"segment":[68.22952418096725,70.56882215288611],"label":"Washing hands"},{"segment":[73.37597971918876,74.93551170046801],"label":"Washing hands"},{"segment":[76.96290327613104,78.36648205928238],"label":"Washing hands"}]},"lBfyQsXSvUk":{"duration":126.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lBfyQsXSvUk","annotations":[{"segment":[20.06318247425897,93.2347891450858],"label":"Getting a tattoo"},{"segment":[14.87898,17.242706000000002],"label":"Getting a tattoo"}]},"qkHBMmMlEr8":{"duration":71.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qkHBMmMlEr8","annotations":[{"segment":[27.143919319812795,71.308411],"label":"Layup drill in basketball"}]},"ybcStSlBGPc":{"duration":235.335,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ybcStSlBGPc","annotations":[{"segment":[99.4713744149766,165.5409219968799],"label":"Carving jack-o-lanterns"}]},"kbdBKIWKOWk":{"duration":52.94,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=kbdBKIWKOWk","annotations":[{"segment":[4.955496692667707,32.82289059000001],"label":"Doing step aerobics"},{"segment":[37.308581520000004,40.46988965678627],"label":"Doing step aerobics"}]},"792896eLpfM":{"duration":137.72,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=792896eLpfM","annotations":[]},"nXBuKxYvgbk":{"duration":77.044,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=nXBuKxYvgbk","annotations":[]},"UnpBbcIa3zA":{"duration":72.05,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=UnpBbcIa3zA","annotations":[{"segment":[9.491551482059283,36.67445631825273],"label":"Discus throw"}]},"XkVicWlqTV4":{"duration":64.784,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XkVicWlqTV4","annotations":[{"segment":[0.01,16.574917999375973],"label":"Playing beach volleyball"},{"segment":[33.85730201092044,47.29915624212168],"label":"Playing beach volleyball"}]},"Vx4ZW7Xseyg":{"duration":164.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Vx4ZW7Xseyg","annotations":[{"segment":[17.54937975,126.93256766999997],"label":"Springboard diving"}]},"MNSzt39_UbE":{"duration":82.198,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=MNSzt39_UbE","annotations":[{"segment":[0.01,82.141],"label":"Tug of war"}]},"hDPLy21Yyuk":{"duration":76.23,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hDPLy21Yyuk","annotations":[{"segment":[21.392480499219968,76.161],"label":"Cleaning shoes"}]},"_uKKSGTNJAY":{"duration":37.036,"subset":"validation","resolution":"800x480","url":"https://www.youtube.com/watch?v=_uKKSGTNJAY","annotations":[{"segment":[11.544773790951638,30.24730733229329],"label":"Fun sliding down"}]},"Vkf3eEsHWOY":{"duration":120.93,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Vkf3eEsHWOY","annotations":[]},"fny1HWXezlE":{"duration":144.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fny1HWXezlE","annotations":[{"segment":[0.19999999999999998,136.69237733987504],"label":"Camel ride"}]},"8jUdeuAOEJg":{"duration":73.91,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8jUdeuAOEJg","annotations":[{"segment":[2.1907544900435476,58.57385689169064],"label":"Breakdancing"}]},"KDPa4AvVt0s":{"duration":191.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KDPa4AvVt0s","annotations":[{"segment":[34.51749495842434,159.73680567340094],"label":"Dodgeball"}]},"eWK9ZfcDvyM":{"duration":91.74,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eWK9ZfcDvyM","annotations":[]},"PCTqA_ov8RA":{"duration":182.022,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PCTqA_ov8RA","annotations":[{"segment":[7.099111719570824,23.285086249609982],"label":"Surfing"},{"segment":[48.27395929797191,49.97774609672387],"label":"Surfing"},{"segment":[56.792893291731666,66.73164961778471],"label":"Surfing"},{"segment":[96.83188306240248,104.21495919032762],"label":"Surfing"},{"segment":[110.46217745241809,121.82075611076444],"label":"Surfing"},{"segment":[124.66040077535101,128.63590330577222],"label":"Surfing"},{"segment":[138.57465963182526,150.78513168954757],"label":"Surfing"},{"segment":[160.72388801560064,177.19382707020281],"label":"Surfing"}]},"XFijgUPprk4":{"duration":55.87,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XFijgUPprk4","annotations":[{"segment":[2.8761590171606866,48.72039062402496],"label":"Using parallel bars"}]},"gRg50bxJxcI":{"duration":23.615,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=gRg50bxJxcI","annotations":[]},"89dNrk4VM1c":{"duration":80.9,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=89dNrk4VM1c","annotations":[]},"xFx7UXyjW4Y":{"duration":57.145,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=xFx7UXyjW4Y","annotations":[]},"Fr9F2xRLd0A":{"duration":70.543,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Fr9F2xRLd0A","annotations":[{"segment":[10.244309337009893,65.70935606598185],"label":"Canoeing"}]},"xQxT2_meU50":{"duration":2.56,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=xQxT2_meU50","annotations":[{"segment":[0,2.436666],"label":"Chopping wood"}]},"K5wPwCFVkhU":{"duration":15.79,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=K5wPwCFVkhU","annotations":[{"segment":[0.01,15.789568],"label":"Washing face"}]},"-lGMxgEoYVE":{"duration":24.03,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-lGMxgEoYVE","annotations":[]},"vwWlNsCCQQw":{"duration":50.829,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=vwWlNsCCQQw","annotations":[]},"Xj0LbNWxJpE":{"duration":52.872,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Xj0LbNWxJpE","annotations":[]},"cgWhl3ZDuTQ":{"duration":138.97,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=cgWhl3ZDuTQ","annotations":[{"segment":[0,86.96142644999999],"label":"Discus throw"}]},"NgFjrooP-FA":{"duration":202.386,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=NgFjrooP-FA","annotations":[]},"zOI9SjQpc_Q":{"duration":84.683,"subset":"testing","resolution":"484x352","url":"https://www.youtube.com/watch?v=zOI9SjQpc_Q","annotations":[]},"8874vqLm958":{"duration":178.91,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=8874vqLm958","annotations":[{"segment":[45.48264476981713,82.2536914987891],"label":"Elliptical trainer"}]},"xi1oCOBPAzQ":{"duration":230.644,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xi1oCOBPAzQ","annotations":[{"segment":[77.34666926677068,179.53179251170047],"label":"Ice fishing"}]},"w--X02F3MHM":{"duration":35.83,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=w--X02F3MHM","annotations":[{"segment":[0.22357809048361937,5.086401558502341],"label":"Hopscotch"},{"segment":[15.92993894695788,22.469598093603743],"label":"Hopscotch"},{"segment":[26.829370858034324,29.512307943837754],"label":"Hopscotch"},{"segment":[30.909671009360377,32.86597930109204],"label":"Hopscotch"}]},"dQs2-z3TIes":{"duration":58.75,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=dQs2-z3TIes","annotations":[{"segment":[13.27542108,49.43475900000001],"label":"Ballet"},{"segment":[6.63771054,10.60835268],"label":"Ballet"}]},"8oG9huHwmLw":{"duration":225.326,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=8oG9huHwmLw","annotations":[]},"zFb7PKk_-vA":{"duration":94.25,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zFb7PKk_-vA","annotations":[{"segment":[0.19999999999999998,94.00099999999999],"label":"River tubing"}]},"o-RbNz6gD5k":{"duration":41.33,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o-RbNz6gD5k","annotations":[{"segment":[8.509339466879302,32.95541423323444],"label":"Ping-pong"}]},"9_7Z6Nhkzx8":{"duration":196.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9_7Z6Nhkzx8","annotations":[{"segment":[63.76614432449298,184.55393693915755],"label":"Doing motocross"}]},"2FcdD7WapJM":{"duration":56.448,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2FcdD7WapJM","annotations":[{"segment":[5.9871440633342505,24.861572673554907],"label":"Gargling mouthwash"}]},"r0XjfYHrAkk":{"duration":71.727,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=r0XjfYHrAkk","annotations":[]},"OLUOk4xfYWE":{"duration":161.73,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=OLUOk4xfYWE","annotations":[{"segment":[0.01,161.661],"label":"Playing piano"}]},"D3d3CkKm2b4":{"duration":71.216,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=D3d3CkKm2b4","annotations":[]},"T4NXGL9efME":{"duration":212.74,"subset":"testing","resolution":"3414x1080","url":"https://www.youtube.com/watch?v=T4NXGL9efME","annotations":[]},"t7J7SugZPlE":{"duration":74.47,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=t7J7SugZPlE","annotations":[{"segment":[0.01,74.466393],"label":"Removing curlers"}]},"JdxO6sLceBc":{"duration":145.172,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JdxO6sLceBc","annotations":[]},"Et4GHTvGbg0":{"duration":59.77,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Et4GHTvGbg0","annotations":[{"segment":[20.932843009360372,38.46235074102964],"label":"Shoveling snow"}]},"8b3ZBE0n3V8":{"duration":37.803,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8b3ZBE0n3V8","annotations":[{"segment":[16.92542651326053,31.020119672386894],"label":"Arm wrestling"}]},"f1d7_TKXNYQ":{"duration":55.54,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=f1d7_TKXNYQ","annotations":[]},"OD4MrhX85-M":{"duration":32.926,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=OD4MrhX85-M","annotations":[{"segment":[10.117686427457098,18.078302652106085],"label":"Shuffleboard"}]},"LoS0yfHXQjU":{"duration":91.116,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=LoS0yfHXQjU","annotations":[]},"iEWe6IYVIuk":{"duration":50.62,"subset":"training","resolution":"320x180","url":"https://www.youtube.com/watch?v=iEWe6IYVIuk","annotations":[{"segment":[19.41174102964119,27.855059282371297],"label":"Spinning"},{"segment":[40.16494383775351,48.371533541341655],"label":"Spinning"}]},"vAzTt06gpgE":{"duration":146.124,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vAzTt06gpgE","annotations":[{"segment":[0.2,142.6625678627145],"label":"Running a marathon"}]},"UGCqUiaia7k":{"duration":8.57,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UGCqUiaia7k","annotations":[]},"HT2xgemwHgQ":{"duration":130.543,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=HT2xgemwHgQ","annotations":[]},"RX-9yj3PkYI":{"duration":106.7,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RX-9yj3PkYI","annotations":[{"segment":[55.64952829410874,59.135168481024635],"label":"Drinking coffee"},{"segment":[83.36866692339223,85.19447844987198],"label":"Drinking coffee"},{"segment":[104.2336146645866,104.89858190327614],"label":"Drinking coffee"}]},"FRsHew2Pcko":{"duration":61.673,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FRsHew2Pcko","annotations":[{"segment":[0.2,50.5033738317757],"label":"Playing drums"}]},"lfuNoeRYWz4":{"duration":104.97999999999999,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=lfuNoeRYWz4","annotations":[{"segment":[46.249348673946955,78.66482137285492],"label":"Washing hands"}]},"fo0gFCcKdtg":{"duration":51.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fo0gFCcKdtg","annotations":[{"segment":[1.6363148399999998,42.027184087363494],"label":"Kayaking"}]},"E9R1H8xRIW8":{"duration":24.96,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E9R1H8xRIW8","annotations":[{"segment":[0.01,20.113243252730108],"label":"Mowing the lawn"}]},"3Lt8fMFoS30":{"duration":133.56,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=3Lt8fMFoS30","annotations":[]},"Fu46pdVz4qY":{"duration":106.39,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Fu46pdVz4qY","annotations":[{"segment":[57.24578003120126,76.99142589703588],"label":"Ironing clothes"}]},"4lxS8OJRsa8":{"duration":170.458,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=4lxS8OJRsa8","annotations":[{"segment":[27.676288346085595,142.86948772303265],"label":"Capoeira"}]},"GYkKMTl-0Os":{"duration":62.69,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=GYkKMTl-0Os","annotations":[{"segment":[11.489746109999999,51.87030741],"label":"Preparing salad"}]},"5RLeo8LymY4":{"duration":12.63,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=5RLeo8LymY4","annotations":[{"segment":[0,12.631654],"label":"Kayaking"}]},"XdqHO4x2FL0":{"duration":68.128,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XdqHO4x2FL0","annotations":[{"segment":[30.396679663026518,49.102328686427455],"label":"Arm wrestling"}]},"s_6odqEAcyE":{"duration":141.155,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s_6odqEAcyE","annotations":[]},"4DCxc6mO864":{"duration":166.21,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4DCxc6mO864","annotations":[{"segment":[0.01,164.13407112611037],"label":"Zumba"}]},"uhAIe9rcjSo":{"duration":76.811,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=uhAIe9rcjSo","annotations":[{"segment":[32.11450187831513,76.81117913832199],"label":"Cleaning sink"}]},"m16Cn9VA3Lk":{"duration":29.652,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m16Cn9VA3Lk","annotations":[{"segment":[0.2,29.501],"label":"Using the monkey bar"}]},"ncXZIWMNKZQ":{"duration":30.14,"subset":"validation","resolution":"852x480","url":"https://www.youtube.com/watch?v=ncXZIWMNKZQ","annotations":[{"segment":[0.2,6.9372403423137285],"label":"Knitting"},{"segment":[11.296452628472359,12.42141063780362],"label":"Knitting"},{"segment":[18.42118668757034,24.702202239669873],"label":"Knitting"}]},"WJn0HtVCrJU":{"duration":81.78,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=WJn0HtVCrJU","annotations":[]},"uMCyo89mK_E":{"duration":104.746,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=uMCyo89mK_E","annotations":[{"segment":[0.2,94.12362443681748],"label":"Decorating the Christmas tree"}]},"Z-6dR4H2dns":{"duration":159.684,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z-6dR4H2dns","annotations":[{"segment":[12.70488893841468,25.90800881559072],"label":"Hand car wash"},{"segment":[48.577516529232604,62.52798281455068],"label":"Hand car wash"},{"segment":[81.46075848748235,91.17626179332888],"label":"Hand car wash"},{"segment":[97.65326399722655,99.14795565054601],"label":"Hand car wash"},{"segment":[113.59665270514819,134.27323642121684],"label":"Hand car wash"},{"segment":[139.50466121684866,148.72193347581901],"label":"Hand car wash"}]},"SgnBsgrqfj0":{"duration":237.355,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=SgnBsgrqfj0","annotations":[{"segment":[90.14476365054601,224.17763728549141],"label":"Making a lemonade"}]},"h-N9TvaBax8":{"duration":63.39,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=h-N9TvaBax8","annotations":[{"segment":[17.9490392925117,25.86046157020281],"label":"Dodgeball"}]},"c1RR1cmS9LU":{"duration":150.744,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=c1RR1cmS9LU","annotations":[{"segment":[80.66329608736349,111.47056077379095],"label":"Rollerblading"},{"segment":[138.0447661903276,141.80748554134166],"label":"Rollerblading"}]},"Ig_AvatvYrk":{"duration":232.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ig_AvatvYrk","annotations":[{"segment":[19.21245453355549,138.83717144059915],"label":"Getting a haircut"}]},"c-3fceLb7n4":{"duration":76.069,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=c-3fceLb7n4","annotations":[{"segment":[5.755580053134098,9.79041967801161],"label":"Shuffleboard"},{"segment":[16.31736613001935,19.75884698653252],"label":"Shuffleboard"},{"segment":[25.21774765548445,27.235167467923205],"label":"Shuffleboard"},{"segment":[34.47414444196815,37.79695354480846],"label":"Shuffleboard"},{"segment":[44.917258765180534,47.40936559231077],"label":"Shuffleboard"},{"segment":[51.08818995616967,53.10560976860843],"label":"Shuffleboard"},{"segment":[61.412632525709185,64.7354416285495],"label":"Shuffleboard"}]},"3V3bOyZzzSk":{"duration":117.61,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=3V3bOyZzzSk","annotations":[]},"i4yQ54eWfy4":{"duration":137.37,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=i4yQ54eWfy4","annotations":[{"segment":[40.712806754183546,118.17023432427699],"label":"Preparing salad"}]},"uM7QSskmloo":{"duration":147.215,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uM7QSskmloo","annotations":[]},"EznRcs1DffA":{"duration":162.17000000000002,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EznRcs1DffA","annotations":[]},"Pl4zAGveukQ":{"duration":129.103,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=Pl4zAGveukQ","annotations":[{"segment":[52.75016603883405,97.378736132292],"label":"Removing ice from car"}]},"u9ec3Exc5mI":{"duration":60.581,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=u9ec3Exc5mI","annotations":[{"segment":[0.01,56.10949453631962],"label":"Hitting a pinata"}]},"vKnQegYrYrg":{"duration":182.21,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vKnQegYrYrg","annotations":[]},"-lER_VO9LUo":{"duration":25.45,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-lER_VO9LUo","annotations":[{"segment":[0.3279333720137338,12.774985811266072],"label":"Walking the dog"},{"segment":[16.104770540238036,25.22203825051841],"label":"Walking the dog"}]},"utEbOY3UDDI":{"duration":8.499,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=utEbOY3UDDI","annotations":[]},"ZblmMtkVXIc":{"duration":45.488,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZblmMtkVXIc","annotations":[{"segment":[25.69327570093458,40.132471962616826],"label":"Mooping floor"}]},"LURZ8QDfowU":{"duration":172.06,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=LURZ8QDfowU","annotations":[{"segment":[0.01,172.001],"label":"Hand car wash"}]},"q2VG0zzPJMw":{"duration":31.138,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=q2VG0zzPJMw","annotations":[{"segment":[0.8732948517940718,28.624664586583464],"label":"Welding"}]},"aOrDnKEJAwQ":{"duration":144.986,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aOrDnKEJAwQ","annotations":[{"segment":[57.787813084112145,118.300753894081],"label":"Scuba diving"}]},"qkk2tK19sx8":{"duration":51.11,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qkk2tK19sx8","annotations":[{"segment":[8.690590745709828,42.73538201560063],"label":"Using the pommel horse"}]},"Y7aacWy3WDo":{"duration":173.41,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y7aacWy3WDo","annotations":[]},"ay_YB-S4qR0":{"duration":34.621,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ay_YB-S4qR0","annotations":[{"segment":[0,34.42095238095238],"label":"Throwing darts"}]},"q2KR3lzTrq4":{"duration":195.581,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=q2KR3lzTrq4","annotations":[{"segment":[3.8139952808996713,28.528684701129542],"label":"Fun sliding down"},{"segment":[87.72189146069243,93.51916428765993],"label":"Fun sliding down"},{"segment":[159.11988393270684,167.66323340560382],"label":"Fun sliding down"}]},"WRv7Kpf1KT0":{"duration":30.186,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=WRv7Kpf1KT0","annotations":[{"segment":[0.2,30.18594],"label":"Applying sunscreen"}]},"PwyvQ3BKziA":{"duration":91.02,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=PwyvQ3BKziA","annotations":[{"segment":[20.860010920436817,42.28764118564742],"label":"Washing face"}]},"Mm-bwu8Q2IU":{"duration":82.501,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Mm-bwu8Q2IU","annotations":[{"segment":[6.845837758851854,15.841221727600308],"label":"Disc dog"},{"segment":[27.27820991643763,30.747857443985982],"label":"Disc dog"},{"segment":[37.68715351875233,41.28530703900156],"label":"Disc dog"},{"segment":[54.392866291338,72.12662292685202],"label":"Disc dog"}]},"4YF9R3Sz2bA":{"duration":64.993,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4YF9R3Sz2bA","annotations":[]},"ODblEia5mcI":{"duration":119.235,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ODblEia5mcI","annotations":[{"segment":[0.2,114.36601404056162],"label":"Assembling bicycle"}]},"KpoH4pBp9fg":{"duration":222.982,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KpoH4pBp9fg","annotations":[{"segment":[52.86114196567863,197.55146333853355],"label":"Making a cake"}]},"tTBbFGv9emI":{"duration":231.9,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=tTBbFGv9emI","annotations":[{"segment":[4.341298333853354,87.54951639937597],"label":"Chopping wood"},{"segment":[108.53245834633385,225.0239636380655],"label":"Chopping wood"}]},"TkhUtx0Eyfw":{"duration":188.45,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TkhUtx0Eyfw","annotations":[]},"zpaS2iYrJjk":{"duration":60.02,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=zpaS2iYrJjk","annotations":[]},"aq-pRXpvrD8":{"duration":209.468,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=aq-pRXpvrD8","annotations":[]},"eLeGJBfqURA":{"duration":151.464,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eLeGJBfqURA","annotations":[{"segment":[9.451711263650546,29.300304917316694],"label":"Disc dog"},{"segment":[35.443917238689544,45.36821406552262],"label":"Disc dog"},{"segment":[47.731141881435256,73.72334785647426],"label":"Disc dog"},{"segment":[77.97661792511701,122.8722464274571],"label":"Disc dog"},{"segment":[123.34483199063963,125.70775980655226],"label":"Disc dog"},{"segment":[130.90620100156008,148.8644524024961],"label":"Disc dog"}]},"cxDPCkefl1A":{"duration":73.816,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=cxDPCkefl1A","annotations":[{"segment":[15.776636986567897,34.66253733853354],"label":"Making a cake"}]},"tNIfKjCEPq4":{"duration":50.968,"subset":"testing","resolution":"524x352","url":"https://www.youtube.com/watch?v=tNIfKjCEPq4","annotations":[]},"gyQ3NBwXhDU":{"duration":208.237,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gyQ3NBwXhDU","annotations":[{"segment":[0.01,155.90714508580342],"label":"Hurling"}]},"X1vyuNazaIc":{"duration":13.445,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=X1vyuNazaIc","annotations":[{"segment":[0.2,7.265346333853353],"label":"Decorating the Christmas tree"}]},"M7tUnCF9lU4":{"duration":40.426,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=M7tUnCF9lU4","annotations":[{"segment":[0.01,39.35636364742529],"label":"Fun sliding down"}]},"Db6sq4DjW7E":{"duration":43.07,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Db6sq4DjW7E","annotations":[{"segment":[24.172792511700465,26.25433853354134],"label":"Layup drill in basketball"},{"segment":[29.813110764430576,36.594921996879876],"label":"Layup drill in basketball"}]},"sfT9Siql3P8":{"duration":34.529,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sfT9Siql3P8","annotations":[{"segment":[0.2,34.297],"label":"Riding bumper cars"}]},"BnswTxq-Hbg":{"duration":10.148,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BnswTxq-Hbg","annotations":[]},"5wOmHw6Boj8":{"duration":39.59,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=5wOmHw6Boj8","annotations":[{"segment":[1.6355171606864274,6.140904056162246],"label":"Washing dishes"},{"segment":[18.546147425897036,25.396804212168487],"label":"Washing dishes"}]},"wvnP2VwQb6E":{"duration":221.425,"subset":"testing","resolution":"432x344","url":"https://www.youtube.com/watch?v=wvnP2VwQb6E","annotations":[]},"t6iq95QPZIg":{"duration":108.763,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=t6iq95QPZIg","annotations":[{"segment":[0,93.49143302028081],"label":"Scuba diving"}]},"ZOKC86lF6E8":{"duration":83.128,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=ZOKC86lF6E8","annotations":[{"segment":[8.429459355244958,32.93973348049569],"label":"Disc dog"},{"segment":[33.458468780031204,72.36366503588144],"label":"Disc dog"},{"segment":[72.62303301092044,78.45881244929798],"label":"Disc dog"}]},"cFCN9QE1M0c":{"duration":42.98,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=cFCN9QE1M0c","annotations":[{"segment":[1.18186773,40.16871754853673],"label":"Playing flauta"}]},"F1zAvnkqa0c":{"duration":59.838,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F1zAvnkqa0c","annotations":[]},"q_eRxmeGJBY":{"duration":98.03999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q_eRxmeGJBY","annotations":[{"segment":[16.976358793127236,95.43466513572544],"label":"Using the pommel horse"}]},"sW5Fp_XgoiY":{"duration":109.111,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sW5Fp_XgoiY","annotations":[]},"yHXZZEucV1I":{"duration":201.341,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=yHXZZEucV1I","annotations":[{"segment":[9.108996218352136,148.8849721123245],"label":"Making a lemonade"}]},"X3476L-FelA":{"duration":123.577,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=X3476L-FelA","annotations":[]},"Z_lah8-xYx8":{"duration":27.423,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z_lah8-xYx8","annotations":[{"segment":[5.948883587824385,15.644942494366441],"label":"Shuffleboard"},{"segment":[20.727898485020646,25.938996223338403],"label":"Shuffleboard"}]},"lAa8tS962Qg":{"duration":220.94,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=lAa8tS962Qg","annotations":[{"segment":[111.25107005630447,220.937867],"label":"Skateboarding"}]},"RpH774VD6Hw":{"duration":100.4,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RpH774VD6Hw","annotations":[{"segment":[20.51919035945111,29.44738769142602],"label":"Archery"},{"segment":[41.19501575981406,73.61846922856506],"label":"Archery"},{"segment":[79.57059981279251,92.10140293291731],"label":"Archery"}]},"R6kXT4Spiwo":{"duration":157.52,"subset":"validation","resolution":"1440x1440","url":"https://www.youtube.com/watch?v=R6kXT4Spiwo","annotations":[{"segment":[40.05095631825274,130.96417004680188],"label":"Plataform diving"}]},"ZpuZEui-Bu0":{"duration":151.16,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ZpuZEui-Bu0","annotations":[{"segment":[98.96964399,120.90950919],"label":"Playing squash"},{"segment":[31.09054176,85.03057674],"label":"Playing squash"}]},"8lRi0udDXqk":{"duration":224.886,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=8lRi0udDXqk","annotations":[]},"efaYmJsTDJc":{"duration":74.026,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=efaYmJsTDJc","annotations":[{"segment":[26.51993647975078,37.24321514330218],"label":"Gargling mouthwash"}]},"lk2niPrG3y8":{"duration":28.724,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=lk2niPrG3y8","annotations":[{"segment":[0.2,26.703714508580347],"label":"Rope skipping"}]},"w0d32MVTY9Q":{"duration":43.027,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=w0d32MVTY9Q","annotations":[{"segment":[20.125273010920434,29.181645865834632],"label":"Calf roping"}]},"DWal5PJry6k":{"duration":109.83,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=DWal5PJry6k","annotations":[{"segment":[2.7382464898595944,25.15763962558502],"label":"Washing hands"},{"segment":[70.33870670826832,79.58028861154446],"label":"Washing hands"},{"segment":[96.52318876755069,97.89231201248049],"label":"Washing hands"}]},"yOFIXQCbSqc":{"duration":37.57,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yOFIXQCbSqc","annotations":[{"segment":[5.985007209237535,37.46880491951791],"label":"Playing water polo"}]},"EbRPwTlRLr0":{"duration":13.306,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EbRPwTlRLr0","annotations":[]},"PSh-caJvSHU":{"duration":194.746,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=PSh-caJvSHU","annotations":[{"segment":[16.65787990675793,125.08553457256411],"label":"Carving jack-o-lanterns"}]},"bUCMSDMB7MU":{"duration":210.954,"subset":"training","resolution":"854x474","url":"https://www.youtube.com/watch?v=bUCMSDMB7MU","annotations":[{"segment":[5.265604692214899,52.32694662888556],"label":"Disc dog"},{"segment":[61.2126545469982,193.51097243889754],"label":"Disc dog"}]},"y47RXYfefvQ":{"duration":12.562,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y47RXYfefvQ","annotations":[{"segment":[0.01,12.561994],"label":"Removing ice from car"}]},"iyGve9RCjV4":{"duration":233.5,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iyGve9RCjV4","annotations":[{"segment":[32.733622607921674,178.21638878504672],"label":"Capoeira"}]},"cyznGwlE9hM":{"duration":233.78,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=cyznGwlE9hM","annotations":[{"segment":[50.694558459889414,106.4950436711346],"label":"Discus throw"},{"segment":[8.023599180701922,45.22392265486538],"label":"Shot put"},{"segment":[117.07160622751441,159.01314739936538],"label":"Discus throw"}]},"Kzpxiedbf2Y":{"duration":136.63,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Kzpxiedbf2Y","annotations":[]},"CJCONq_i1cQ":{"duration":238.632,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=CJCONq_i1cQ","annotations":[{"segment":[0.01,238.541],"label":"Removing ice from car"}]},"mB90PudOrnU":{"duration":27.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mB90PudOrnU","annotations":[{"segment":[0.01,27.119377414196567],"label":"Bathing dog"}]},"RdfM7bCMDeM":{"duration":96.387,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RdfM7bCMDeM","annotations":[]},"9ZboVy59qrw":{"duration":115.03,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9ZboVy59qrw","annotations":[{"segment":[46.830270410000004,94.84420981],"label":"Playing polo"}]},"Qmo9D73hWXY":{"duration":203.709,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Qmo9D73hWXY","annotations":[{"segment":[25.7416562131873,195.445907199688],"label":"Playing ice hockey"}]},"3fAVDJyrSQs":{"duration":75.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3fAVDJyrSQs","annotations":[{"segment":[6.041174606864274,74.86318336349453],"label":"Chopping wood"}]},"DuLweixeP8I":{"duration":91.069,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=DuLweixeP8I","annotations":[{"segment":[0.01,25.145759406318547],"label":"Doing fencing"},{"segment":[38.32814725678584,59.461382459325584],"label":"Doing fencing"},{"segment":[66.69202264624147,70.80356314468384],"label":"Doing fencing"},{"segment":[73.6391083160234,83.84707093284584],"label":"Doing fencing"},{"segment":[88.95105224125705,91.021],"label":"Doing fencing"}]},"K_hvDDWcjDQ":{"duration":182.393,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K_hvDDWcjDQ","annotations":[]},"KSbIw1m2KoU":{"duration":222.59,"subset":"training","resolution":"470x360","url":"https://www.youtube.com/watch?v=KSbIw1m2KoU","annotations":[{"segment":[14.235570982839313,101.38504212168486],"label":"Using the balance beam"},{"segment":[124.6592496099844,156.95259282371293],"label":"Using the balance beam"}]},"USwMjvs45MU":{"duration":122.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=USwMjvs45MU","annotations":[{"segment":[26.645865834633387,105.3352352],"label":"Long jump"}]},"ReOsTqcQBmY":{"duration":187.734,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ReOsTqcQBmY","annotations":[{"segment":[0.01,146.27435146254484],"label":"Rope skipping"},{"segment":[148.02614015616848,172.84314664917002],"label":"Rope skipping"}]},"IRua6QGP-Ow":{"duration":175.54,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IRua6QGP-Ow","annotations":[]},"5OWJ7WqKWMU":{"duration":150.95,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=5OWJ7WqKWMU","annotations":[{"segment":[0.01,26.846542073927854],"label":"Brushing teeth"},{"segment":[83.60107400214376,103.38273658293271],"label":"Brushing teeth"}]},"pmIJcJiydpk":{"duration":12.54,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pmIJcJiydpk","annotations":[{"segment":[4.6096446,10.42615685179407],"label":"Hopscotch"}]},"UZm05CvtxkA":{"duration":51.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UZm05CvtxkA","annotations":[]},"N-92_rikI8U":{"duration":123.391,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N-92_rikI8U","annotations":[{"segment":[21.55967378471139,91.05112232293291],"label":"Snowboarding"}]},"H50kgN8JkrQ":{"duration":95.39,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H50kgN8JkrQ","annotations":[]},"jB4jEtHC5RI":{"duration":219.127,"subset":"testing","resolution":"1920x1440","url":"https://www.youtube.com/watch?v=jB4jEtHC5RI","annotations":[]},"b7Q-Jj9ZEH0":{"duration":90.907,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b7Q-Jj9ZEH0","annotations":[{"segment":[0.01,90.841],"label":"Fun sliding down"}]},"_I7igpv-A0A":{"duration":30.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=_I7igpv-A0A","annotations":[]},"S8oIiWRiIfE":{"duration":28.75,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S8oIiWRiIfE","annotations":[{"segment":[21.48124595631825,23.813239254290174],"label":"Drinking coffee"}]},"qp9OcGPufEc":{"duration":185.737,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qp9OcGPufEc","annotations":[{"segment":[0,173.85624056162246],"label":"Disc dog"}]},"Ie7DREiXp08":{"duration":161.008,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ie7DREiXp08","annotations":[{"segment":[13.061423600624025,143.9268408299532],"label":"Putting in contact lenses"}]},"w_X7cLnFCYw":{"duration":104.96000000000001,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=w_X7cLnFCYw","annotations":[{"segment":[33.39139469578783,83.80585335413417],"label":"Triple jump"},{"segment":[9.982778471138845,13.094664586583464],"label":"Triple jump"}]},"VqZr2oZk4z4":{"duration":30.117,"subset":"training","resolution":"711x480","url":"https://www.youtube.com/watch?v=VqZr2oZk4z4","annotations":[{"segment":[0,3.8924539781591267],"label":"Scuba diving"},{"segment":[5.158673946957878,7.222143525741029],"label":"Scuba diving"},{"segment":[8.1131872074883,9.613892355694228],"label":"Scuba diving"},{"segment":[10.317347893915755,10.88011232449298],"label":"Scuba diving"},{"segment":[12.005641185647427,14.022213728549143],"label":"Scuba diving"},{"segment":[15.52291887675507,16.13258034321373],"label":"Scuba diving"},{"segment":[17.680182527301092,17.91466770670827],"label":"Scuba diving"},{"segment":[22.979547581903276,29.861],"label":"Scuba diving"}]},"ywWHBghVyJ4":{"duration":30.279,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=ywWHBghVyJ4","annotations":[{"segment":[8.833290953406843,15.210264770670825],"label":"Waxing skis"},{"segment":[25.03552897035881,27.208423937597505],"label":"Waxing skis"}]},"gzVpwbiB9fE":{"duration":34.34,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gzVpwbiB9fE","annotations":[{"segment":[4.9290059112568585,16.769335328515183],"label":"Tennis serve with ball bouncing"}]},"8NRv-75RlPI":{"duration":81.131,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=8NRv-75RlPI","annotations":[{"segment":[31.377609984399378,69.46091887675507],"label":"Elliptical trainer"}]},"qokr0bO828E":{"duration":188.85,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qokr0bO828E","annotations":[{"segment":[38.899101733281974,173.00074659309504],"label":"Shaving"}]},"RLdlDewtKbE":{"duration":100.94,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=RLdlDewtKbE","annotations":[{"segment":[4.094174274882995,74.32501002897259],"label":"Cricket"}]},"IwViXKNNBEU":{"duration":9.85,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IwViXKNNBEU","annotations":[{"segment":[2.00237455,6.79590505],"label":"Discus throw"}]},"5X7zeOps9uA":{"duration":109.575,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=5X7zeOps9uA","annotations":[{"segment":[0.01,109.57496598639456],"label":"Hitting a pinata"}]},"ZlVydB9uUe4":{"duration":71.332,"subset":"testing","resolution":"626x360","url":"https://www.youtube.com/watch?v=ZlVydB9uUe4","annotations":[]},"Rvs3tRNxEjE":{"duration":113.5,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Rvs3tRNxEjE","annotations":[]},"o8EOFXW5GJo":{"duration":185.34,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=o8EOFXW5GJo","annotations":[{"segment":[55.35724258970359,177.92367004680185],"label":"Preparing salad"}]},"tLkNfGM7GOQ":{"duration":178.32999999999998,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tLkNfGM7GOQ","annotations":[]},"dTZZq23pYFY":{"duration":91.93,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=dTZZq23pYFY","annotations":[{"segment":[41.30297365366614,52.48919568486739],"label":"Drinking coffee"}]},"xvWw66PvUNg":{"duration":72.261,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xvWw66PvUNg","annotations":[]},"SYh-j9bK_ls":{"duration":185.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SYh-j9bK_ls","annotations":[]},"8GJWokVCzUM":{"duration":42.68,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=8GJWokVCzUM","annotations":[{"segment":[22.50430154134165,29.095798146645862],"label":"Long jump"}]},"zJX_Oh4yVnc":{"duration":216.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zJX_Oh4yVnc","annotations":[{"segment":[27.03272758978317,70.13814108643832],"label":"Doing motocross"},{"segment":[131.76541194493748,168.4723644029283],"label":"Doing motocross"}]},"qzt9U8tEdJQ":{"duration":183.646,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qzt9U8tEdJQ","annotations":[{"segment":[0.01,183.64662131519273],"label":"Skiing"}]},"ClW3USojCoE":{"duration":176.1,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ClW3USojCoE","annotations":[{"segment":[23.89866926677067,92.29830889235569],"label":"Rock climbing"},{"segment":[106.3078736349454,142.01852886115444],"label":"Rock climbing"},{"segment":[153.5558174726989,157.4015803432137],"label":"Rock climbing"}]},"MidLOhRnpnU":{"duration":102.75,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MidLOhRnpnU","annotations":[]},"qkN9uA8izVE":{"duration":53.848,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qkN9uA8izVE","annotations":[{"segment":[0.01,53.847073],"label":"Arm wrestling"}]},"wRNmTUPzVq0":{"duration":230.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wRNmTUPzVq0","annotations":[]},"PdNb0g36a6U":{"duration":69.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PdNb0g36a6U","annotations":[{"segment":[41.47382565210608,61.054205078003115],"label":"Using the pommel horse"}]},"y5j9TqTy9Xw":{"duration":44.84,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=y5j9TqTy9Xw","annotations":[{"segment":[0,39.6611767327836],"label":"High jump"}]},"awrIHVi-1OM":{"duration":135.187,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=awrIHVi-1OM","annotations":[]},"evr-Bzs4UJE":{"duration":79.064,"subset":"testing","resolution":"224x128","url":"https://www.youtube.com/watch?v=evr-Bzs4UJE","annotations":[]},"KlmlCbJup5A":{"duration":48.879,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=KlmlCbJup5A","annotations":[{"segment":[20.67623028021907,25.769272583485012],"label":"Hanging wallpaper"},{"segment":[41.12441505004799,46.59753513713976],"label":"Hanging wallpaper"}]},"3H7ZS0E90pY":{"duration":107.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3H7ZS0E90pY","annotations":[{"segment":[3.33412038,95.78439157566304],"label":"Using the balance beam"}]},"mouMMOraPyw":{"duration":176.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mouMMOraPyw","annotations":[{"segment":[6.040561622464899,162.82059282371296],"label":"Kayaking"}]},"HMs703DhjQU":{"duration":74.53999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HMs703DhjQU","annotations":[]},"rWHm1-o_zL0":{"duration":162.378,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rWHm1-o_zL0","annotations":[{"segment":[93.28515420560747,117.06327570093458],"label":"Mooping floor"}]},"wJlRDfqjNOM":{"duration":129.289,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=wJlRDfqjNOM","annotations":[]},"lTDkfbr7znU":{"duration":184.32,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=lTDkfbr7znU","annotations":[{"segment":[0.8590360661531867,179.82579345340048],"label":"Cumbia"}]},"P0A0rMgjXGQ":{"duration":158.593,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=P0A0rMgjXGQ","annotations":[{"segment":[22.493963251033705,137.1835823490119],"label":"Installing carpet"}]},"9qUbxwjh-4A":{"duration":14.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9qUbxwjh-4A","annotations":[{"segment":[0.27304177999999996,5.70441997],"label":"Smoking a cigarette"},{"segment":[7.34296971,12.387501511076444],"label":"Smoking a cigarette"}]},"-aGpz-Ndctw":{"duration":51.433,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=-aGpz-Ndctw","annotations":[]},"shBiO7aGy6k":{"duration":159.382,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=shBiO7aGy6k","annotations":[{"segment":[5.967492917316693,145.20899432137284],"label":"Slacklining"}]},"R7DhZaY3A08":{"duration":83.06,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=R7DhZaY3A08","annotations":[{"segment":[0.01,83.05780045351474],"label":"Playing squash"}]},"Ry1jXcxnsDs":{"duration":140.899,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ry1jXcxnsDs","annotations":[]},"cudeag10U7Q":{"duration":168.484,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cudeag10U7Q","annotations":[{"segment":[3.1492334753215925,29.130409646724733],"label":"Elliptical trainer"},{"segment":[35.166440474424455,42.51465142990654],"label":"Elliptical trainer"},{"segment":[45.139012629283485,46.97606546884735],"label":"Elliptical trainer"},{"segment":[49.07555442834891,54.84914906697819],"label":"Elliptical trainer"},{"segment":[59.5729992258567,75.84403866199378],"label":"Elliptical trainer"},{"segment":[81.61763330062305,91.85264197819315],"label":"Elliptical trainer"},{"segment":[95.00187541744548,97.10136437694703],"label":"Elliptical trainer"},{"segment":[109.69829813395638,123.8698486105919],"label":"Elliptical trainer"},{"segment":[134.3672934080997,138.8287074470405],"label":"Elliptical trainer"},{"segment":[147.22666328504675,155.62461912305298],"label":"Elliptical trainer"}]},"DV1ITGBfo5w":{"duration":155.783,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=DV1ITGBfo5w","annotations":[{"segment":[61.924952706417194,149.93888261295925],"label":"Kneeling"}]},"Eeq-noKrGwY":{"duration":45.953,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Eeq-noKrGwY","annotations":[{"segment":[0.01,44.46580823913646],"label":"Gargling mouthwash"}]},"QElXIVBV-gk":{"duration":213.86,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QElXIVBV-gk","annotations":[{"segment":[89.69708651282784,196.95808570597427],"label":"Preparing pasta"}]},"-5K3KZ6fHuI":{"duration":39.591,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-5K3KZ6fHuI","annotations":[{"segment":[16.181880658346333,33.90783389859594],"label":"Snow tubing"}]},"wUg9S2xx850":{"duration":30.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wUg9S2xx850","annotations":[{"segment":[0.01,22.85386785023401],"label":"Playing kickball"}]},"HE2alYPoG60":{"duration":156.155,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HE2alYPoG60","annotations":[]},"wUXpeZHrTWw":{"duration":215.85,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=wUXpeZHrTWw","annotations":[{"segment":[12.456531981279252,201.34314820592826],"label":"Tai chi"}]},"sqjfhVDIN24":{"duration":163.585,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=sqjfhVDIN24","annotations":[]},"zQ7SL6hxS-c":{"duration":221.194,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zQ7SL6hxS-c","annotations":[]},"MyMYuXh8E0Y":{"duration":226.349,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MyMYuXh8E0Y","annotations":[{"segment":[85.06484021394776,103.39833247563],"label":"Gargling mouthwash"}]},"J4UFrrwKUQA":{"duration":172.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J4UFrrwKUQA","annotations":[{"segment":[0.01,21.58107120124805],"label":"Sailing"},{"segment":[68.78966445397815,134.3421682277691],"label":"Sailing"}]},"gLfvk2SSj1c":{"duration":138.02,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=gLfvk2SSj1c","annotations":[{"segment":[5.813588143525741,8.182087016814005],"label":"Rock climbing"},{"segment":[40.91043508407003,47.80061362454498],"label":"Rock climbing"},{"segment":[56.84397259282372,62.87287877691108],"label":"Rock climbing"},{"segment":[100.98417858346333,103.99863167550703],"label":"Rock climbing"}]},"okh34JcrQQc":{"duration":55.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=okh34JcrQQc","annotations":[]},"1pOmOTz-bzk":{"duration":63.53,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=1pOmOTz-bzk","annotations":[{"segment":[4.24310191,59.56537807634755],"label":"Kayaking"}]},"lGldNWWnjzQ":{"duration":89.722,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=lGldNWWnjzQ","annotations":[]},"kkLRLb8TGwI":{"duration":126.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kkLRLb8TGwI","annotations":[{"segment":[15.834487638008921,19.001385165610706],"label":"Playing water polo"},{"segment":[21.178627215836933,103.91382496099844],"label":"Playing water polo"}]},"gLFCNQe9akU":{"duration":29.211,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gLFCNQe9akU","annotations":[]},"dSdZz_Royyc":{"duration":202.386,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dSdZz_Royyc","annotations":[{"segment":[32.828741372784165,172.6665531818552],"label":"Wakeboarding"}]},"PzrtfuUx7GY":{"duration":130.404,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PzrtfuUx7GY","annotations":[]},"AXA-G5DdNrU":{"duration":90.047,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AXA-G5DdNrU","annotations":[{"segment":[0,14.469328182651116],"label":"Rafting"},{"segment":[64.479821707154,69.81802035569422],"label":"Rafting"},{"segment":[79.79202326365053,83.72543286115445],"label":"Rafting"}]},"0wwx4wnHv-U":{"duration":8.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0wwx4wnHv-U","annotations":[{"segment":[0.6385951638065523,6.255485959438378],"label":"Tennis serve with ball bouncing"}]},"zO5JMTzLDHw":{"duration":33.251,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zO5JMTzLDHw","annotations":[]},"3-KLYPzd1zU":{"duration":117.702,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=3-KLYPzd1zU","annotations":[{"segment":[8.247266355140185,49.48359813084112],"label":"River tubing"},{"segment":[72.39267133956386,106.13286448598132],"label":"River tubing"}]},"waTjpd7yDkc":{"duration":71.541,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=waTjpd7yDkc","annotations":[{"segment":[0.01,71.501],"label":"Rollerblading"}]},"G0WjPZqVwPo":{"duration":212.39,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G0WjPZqVwPo","annotations":[]},"KWhXvv1WtFM":{"duration":14.188,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=KWhXvv1WtFM","annotations":[{"segment":[0.5523010920436817,3.512634945397816],"label":"Doing fencing"}]},"LCyLWiw7n5Q":{"duration":116.263,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=LCyLWiw7n5Q","annotations":[{"segment":[0.01,109.27208510093801],"label":"Having an ice cream"}]},"dm5ZnhOeBBQ":{"duration":106.67,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=dm5ZnhOeBBQ","annotations":[{"segment":[49.75799124180968,57.579481503900155],"label":"Vacuuming floor"},{"segment":[86.53563694227769,95.35561532293292],"label":"Vacuuming floor"}]},"M-bUoaIqtDk":{"duration":29.7,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=M-bUoaIqtDk","annotations":[{"segment":[0.23165617781173833,29.37400334652842],"label":"Windsurfing"}]},"dukaFaotZGc":{"duration":117.401,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dukaFaotZGc","annotations":[{"segment":[0.2,115.56810577691108],"label":"Fixing the roof"}]},"9AqHhUuE9bE":{"duration":33.67,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=9AqHhUuE9bE","annotations":[{"segment":[0.5777820204368175,33.248728994227776],"label":"Polishing shoes"}]},"Tfqln6lL7q0":{"duration":122.346,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tfqln6lL7q0","annotations":[{"segment":[12.976330733229329,117.55029017160686],"label":"Skiing"}]},"G0DPDo44wt8":{"duration":21.53,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G0DPDo44wt8","annotations":[{"segment":[0.01,21.524897959183672],"label":"Clean and jerk"}]},"z-iuSgXKUcw":{"duration":169.18,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=z-iuSgXKUcw","annotations":[{"segment":[0.01,169.18061224489796],"label":"Playing violin"}]},"YGBldj7DUq4":{"duration":215.97,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YGBldj7DUq4","annotations":[{"segment":[135.2203143525741,201.56751716068644],"label":"Kayaking"}]},"APQWGGPPjes":{"duration":149.583,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=APQWGGPPjes","annotations":[]},"CiCNA-Nr1QQ":{"duration":82.013,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=CiCNA-Nr1QQ","annotations":[{"segment":[0.2,6.781084897817681],"label":"Playing ten pins"},{"segment":[8.188479876610032,18.296134724300536],"label":"Playing ten pins"},{"segment":[27.89200957970292,47.21170428857971],"label":"Playing ten pins"},{"segment":[65.63578401095228,82.01274376417234],"label":"Playing ten pins"}]},"94w7SEcPDho":{"duration":230.993,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=94w7SEcPDho","annotations":[{"segment":[21.256659906396255,64.49054446177847],"label":"BMX"},{"segment":[71.69619188767551,112.04781747269891],"label":"BMX"},{"segment":[123.93713572542902,161.76678471138845],"label":"BMX"},{"segment":[180.14118564742589,220.51190639625585],"label":"BMX"}]},"PbzmcZ_IORE":{"duration":77.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PbzmcZ_IORE","annotations":[{"segment":[0,75.53114820592823],"label":"Walking the dog"}]},"Af_0zV79zsk":{"duration":83.17,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=Af_0zV79zsk","annotations":[]},"WOkHZ33CgAs":{"duration":20.922,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WOkHZ33CgAs","annotations":[{"segment":[5.342408736349453,11.053726989079564],"label":"Doing a powerbomb"}]},"cYvFlrAnlzA":{"duration":111.503,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cYvFlrAnlzA","annotations":[]},"jmS3NFo4XCc":{"duration":39.173,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=jmS3NFo4XCc","annotations":[{"segment":[3.422208224648986,34.16097138533541],"label":"Throwing darts"}]},"1uC0xzyx4Bc":{"duration":211.975,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1uC0xzyx4Bc","annotations":[{"segment":[118.64734536354564,182.03156966261108],"label":"Laying tile"}]},"NB4IBjUmrcI":{"duration":26.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NB4IBjUmrcI","annotations":[{"segment":[0.24273722000000003,7.58459624],"label":"Doing kickboxing"},{"segment":[9.58677464,21.660394939157566],"label":"Doing kickboxing"}]},"rt1kYX3slRI":{"duration":69.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rt1kYX3slRI","annotations":[{"segment":[11.730215288611545,63.66556318252731],"label":"Using parallel bars"}]},"Ur_ToogEkCA":{"duration":14.44,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ur_ToogEkCA","annotations":[]},"df5rlVZD0Zc":{"duration":194.212,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=df5rlVZD0Zc","annotations":[]},"sgUMHHuAhZg":{"duration":203.663,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=sgUMHHuAhZg","annotations":[{"segment":[8.26086097049324,203.6619954648526],"label":"Capoeira"}]},"4OCbTYrThtc":{"duration":125.968,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4OCbTYrThtc","annotations":[{"segment":[2.15752492211838,18.437031152647975],"label":"Bullfighting"},{"segment":[25.498021806853583,30.20534890965732],"label":"Bullfighting"},{"segment":[77.2786199376947,122.60611370716512],"label":"Bullfighting"}]},"wE9sQbGdeAk":{"duration":109.459,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wE9sQbGdeAk","annotations":[]},"m1ECYUCexX8":{"duration":189.591,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m1ECYUCexX8","annotations":[]},"KA6YYIl2z4E":{"duration":105.001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KA6YYIl2z4E","annotations":[{"segment":[34.78237327662507,77.72962598589956],"label":"Putting in contact lenses"}]},"1KVxpHAC08g":{"duration":137.207,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1KVxpHAC08g","annotations":[{"segment":[1.580983815095135,136.17822680574935],"label":"Having an ice cream"}]},"wKThOOUV6lY":{"duration":167.347,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wKThOOUV6lY","annotations":[{"segment":[58.54546723868955,167.181],"label":"Tug of war"}]},"03BMaaMEcNo":{"duration":186.781,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=03BMaaMEcNo","annotations":[{"segment":[12.817042121684867,159.33913728549143],"label":"Surfing"}]},"IlCsGkFnRkc":{"duration":174.986,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IlCsGkFnRkc","annotations":[{"segment":[0,174.78557823129253],"label":"Decorating the Christmas tree"}]},"wnnoaLzYqVQ":{"duration":180.512,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wnnoaLzYqVQ","annotations":[{"segment":[33.51157046281851,154.32219003045836],"label":"Rafting"}]},"YnB0VW34wlY":{"duration":26.08,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=YnB0VW34wlY","annotations":[{"segment":[0.01,26.005],"label":"Chopping wood"}]},"T-rV3KKFOgg":{"duration":51.526,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=T-rV3KKFOgg","annotations":[{"segment":[10.4565770693984,11.258461804600891],"label":"Hand car wash"},{"segment":[21.76315183575354,24.569748408962262],"label":"Hand car wash"},{"segment":[30.503695449460707,36.99896180460089],"label":"Hand car wash"}]},"G6FhQuR3_88":{"duration":134.073,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=G6FhQuR3_88","annotations":[{"segment":[1.045803736349454,132.60791376911075],"label":"Playing congas"}]},"UZBHdQZc0ZM":{"duration":133.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UZBHdQZc0ZM","annotations":[{"segment":[8.93939274305666,24.947142538762776],"label":"Snow tubing"},{"segment":[29.10499962855657,37.42071380814416],"label":"Snow tubing"},{"segment":[55.50739214874718,61.53628492894818],"label":"Snow tubing"},{"segment":[78.16771328812335,98.12542731913358],"label":"Snow tubing"},{"segment":[101.24381958346335,107.6884980374415],"label":"Snow tubing"},{"segment":[112.26214081123244,127.43831910608425],"label":"Snow tubing"}]},"Fyi7pbkKk7w":{"duration":145.57,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fyi7pbkKk7w","annotations":[{"segment":[27.02393383354382,137.84477013728548],"label":"Chopping wood"}]},"PMvs8Dbbxts":{"duration":117.981,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PMvs8Dbbxts","annotations":[]},"s2PFIKG5PYI":{"duration":17.53,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=s2PFIKG5PYI","annotations":[{"segment":[7.244513200000001,16.095723200000002],"label":"Polishing forniture"}]},"aYmIsBUmHTo":{"duration":142.2,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aYmIsBUmHTo","annotations":[]},"iKAs4UYyzSI":{"duration":32.555,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=iKAs4UYyzSI","annotations":[]},"WzSZJ-yJX48":{"duration":78.25,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WzSZJ-yJX48","annotations":[{"segment":[59.93972286782627,68.1188693728549],"label":"Cricket"}]},"Wm1CL_g91G4":{"duration":157.223,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Wm1CL_g91G4","annotations":[{"segment":[17.65991658441848,29.43319430736413],"label":"Snow tubing"},{"segment":[35.07455654960892,78.73379477219905],"label":"Snow tubing"},{"segment":[81.92239082216349,146.6754174352574],"label":"Snow tubing"}]},"UMUGmKRkaxo":{"duration":126.18,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UMUGmKRkaxo","annotations":[{"segment":[0.01,64.5649486677067],"label":"Ping-pong"}]},"OT98MiVje0g":{"duration":90.605,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=OT98MiVje0g","annotations":[{"segment":[7.770444617784712,87.75475975039002],"label":"Playing drums"}]},"eZdao9O8jrw":{"duration":227.28,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=eZdao9O8jrw","annotations":[{"segment":[118.42486423400936,165.58207065054603],"label":"Washing face"}]},"oW2v7KlfmJk":{"duration":7.593,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oW2v7KlfmJk","annotations":[{"segment":[2.0374148642745706,6.112244592823713],"label":"Shuffleboard"}]},"48zOi9j1E0A":{"duration":164.77,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=48zOi9j1E0A","annotations":[{"segment":[65.03355061005162,118.75691765054603],"label":"Paintball"}]},"AjPT7Eki3AQ":{"duration":232.64,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=AjPT7Eki3AQ","annotations":[]},"VFOpGMVouVg":{"duration":203.523,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VFOpGMVouVg","annotations":[{"segment":[32.38582359620915,175.8046833073323],"label":"Braiding hair"}]},"xSiT1pgUEm8":{"duration":233.756,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=xSiT1pgUEm8","annotations":[{"segment":[0,222.0531809672387],"label":"Rafting"}]},"jw2Y3L3iSOw":{"duration":11.66,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jw2Y3L3iSOw","annotations":[]},"0T14tLwBafQ":{"duration":38.429,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=0T14tLwBafQ","annotations":[{"segment":[2.5869946605527883,28.592812417562133],"label":"Playing beach volleyball"}]},"F559bkkKSp8":{"duration":21.502,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=F559bkkKSp8","annotations":[{"segment":[0.2,21.299],"label":"Camel ride"}]},"J7B7DAIg-MA":{"duration":59.676,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=J7B7DAIg-MA","annotations":[]},"VM-ldOw7e4Y":{"duration":222.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VM-ldOw7e4Y","annotations":[{"segment":[16.670527301092044,213.26298595943837],"label":"Long jump"}]},"CCRPXH8ui-s":{"duration":229.692,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CCRPXH8ui-s","annotations":[{"segment":[37.50798166324645,63.227744255686225],"label":"Fun sliding down"},{"segment":[206.47253313857996,211.8308154372967],"label":"Fun sliding down"}]},"U37UAWdI-vY":{"duration":88.03,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U37UAWdI-vY","annotations":[{"segment":[2.8817176287051485,82.90253354134164],"label":"Playing harmonica"}]},"7m--xUKvqkc":{"duration":99.521,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=7m--xUKvqkc","annotations":[{"segment":[14.86071028037383,79.56671962616822],"label":"Mooping floor"}]},"yrvV4Ncqyf0":{"duration":120.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yrvV4Ncqyf0","annotations":[{"segment":[36.7302672,67.2181032],"label":"Washing dishes"},{"segment":[29.944721274570984,94.16579017160687],"label":"Washing dishes"}]},"5SyvdU5LWPk":{"duration":73.863,"subset":"training","resolution":"640x358","url":"https://www.youtube.com/watch?v=5SyvdU5LWPk","annotations":[{"segment":[8.757509135739578,11.63826871986444],"label":"Hurling"},{"segment":[17.39978788811416,20.050086234009363],"label":"Hurling"},{"segment":[22.930845750390013,27.07913945397816],"label":"Hurling"},{"segment":[36.87372180967239,60.61118022464898],"label":"Hurling"}]},"J3jICOa0WCk":{"duration":213.07,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J3jICOa0WCk","annotations":[{"segment":[21.93818650705212,213.06632653061226],"label":"Dodgeball"}]},"zfi8f5NnYaY":{"duration":45.95,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=zfi8f5NnYaY","annotations":[{"segment":[0,14.758360419656787],"label":"Playing polo"}]},"H2KVO7LWU5w":{"duration":51.571,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=H2KVO7LWU5w","annotations":[{"segment":[0.2,51.541],"label":"Blowing leaves"}]},"JXucFXh58VE":{"duration":144.893,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JXucFXh58VE","annotations":[{"segment":[18.309350823012515,32.549957018688914],"label":"Ice fishing"},{"segment":[37.52286711876639,50.407225105330745],"label":"Ice fishing"},{"segment":[68.49053456015793,76.40198244305772],"label":"Ice fishing"},{"segment":[78.88843749297972,85.21759580187208],"label":"Ice fishing"},{"segment":[124.32275249609985,131.55607627769112],"label":"Ice fishing"}]},"eGl_I8h832w":{"duration":73.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eGl_I8h832w","annotations":[{"segment":[29.686683576186585,60.40098312231809],"label":"Getting a haircut"}]},"G7tMapFIcHg":{"duration":140.574,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=G7tMapFIcHg","annotations":[]},"MfFM_a26bR8":{"duration":107.88,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MfFM_a26bR8","annotations":[{"segment":[42.247573980000006,107.33628393135724],"label":"Horseback riding"},{"segment":[1.4542089200000001,42.22007956318253],"label":"Horseback riding"}]},"x9BN93AXNTQ":{"duration":196.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x9BN93AXNTQ","annotations":[{"segment":[0.01,196.201],"label":"Drinking coffee"}]},"r-_JFgDJRrQ":{"duration":184.924,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=r-_JFgDJRrQ","annotations":[{"segment":[54.52509694673501,79.04696594394387],"label":"Changing car wheel"},{"segment":[104.434312670466,114.24306026934956],"label":"Changing car wheel"},{"segment":[121.16688210385558,157.5169463120125],"label":"Changing car wheel"}]},"lZcTesK6CfA":{"duration":225.72,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=lZcTesK6CfA","annotations":[{"segment":[61.78804680187208,68.12528237129486],"label":"Washing hands"},{"segment":[73.4063120124805,77.2790670826833],"label":"Washing hands"},{"segment":[83.26423400936038,93.12215600624025],"label":"Washing hands"},{"segment":[103.68421528861154,104.74042121684867],"label":"Washing hands"},{"segment":[112.838,118.47109828393135],"label":"Washing hands"},{"segment":[126.92074570982838,133.96211856474258],"label":"Washing hands"},{"segment":[223.7396224648986,225.476],"label":"Washing hands"}]},"7x_1tRem1gA":{"duration":58.724,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7x_1tRem1gA","annotations":[{"segment":[0.01,57.02195921728315],"label":"Using the monkey bar"}]},"qA_KTu8oTW4":{"duration":130.961,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qA_KTu8oTW4","annotations":[{"segment":[38.818230998439944,128.1001622948518],"label":"Baton twirling"}]},"a9ItMklOTyE":{"duration":100.15,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=a9ItMklOTyE","annotations":[{"segment":[20.51653009282371,98.5261502176287],"label":"Cricket"}]},"yduDG8EcM0A":{"duration":67.942,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=yduDG8EcM0A","annotations":[{"segment":[0,67.941586],"label":"Making a lemonade"}]},"sL7KD7QBm3U":{"duration":14.954,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sL7KD7QBm3U","annotations":[]},"s0N0PzdwVik":{"duration":42.7,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=s0N0PzdwVik","annotations":[{"segment":[0.01,40.89732371294852],"label":"Playing bagpipes"}]},"uty4eGb_vvU":{"duration":210.63,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=uty4eGb_vvU","annotations":[]},"Z6b4oFjIznU":{"duration":68.87,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z6b4oFjIznU","annotations":[]},"dc9m-G9CKmQ":{"duration":137.463,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dc9m-G9CKmQ","annotations":[{"segment":[42.89590126119596,124.87758817708381],"label":"Sharpening knives"}]},"AWryNQMbcd8":{"duration":3.251,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AWryNQMbcd8","annotations":[{"segment":[0.2,3.001],"label":"Swinging at the playground"}]},"r1y_ASZDdEo":{"duration":188.31,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r1y_ASZDdEo","annotations":[{"segment":[56.99357785959438,60.51895380967239],"label":"Cricket"},{"segment":[60.51895380967239,62.869204443057725],"label":"Cricket"},{"segment":[67.86348703900155,72.27020697659907],"label":"Cricket"},{"segment":[73.44533229329173,78.43961488923557],"label":"Cricket"},{"segment":[86.37171077691107,112.81203040249609],"label":"Cricket"}]},"zTzF5R_VhGk":{"duration":203.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zTzF5R_VhGk","annotations":[{"segment":[30.19769890795632,197.39758970358815],"label":"Playing harmonica"}]},"q_QR5HoROhI":{"duration":166.14,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=q_QR5HoROhI","annotations":[]},"m4EcgRjCpi8":{"duration":186.039,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m4EcgRjCpi8","annotations":[{"segment":[26.991515934477377,163.9807150858034],"label":"Using the rowing machine"}]},"IWoCFfIwv2g":{"duration":76.53,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=IWoCFfIwv2g","annotations":[]},"aKvCtSitxJY":{"duration":96.061,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aKvCtSitxJY","annotations":[{"segment":[18.88190324729848,71.06881913514894],"label":"Hula hoop"}]},"Hxr8DZcDuLI":{"duration":122.602,"subset":"testing","resolution":"240x180","url":"https://www.youtube.com/watch?v=Hxr8DZcDuLI","annotations":[]},"wfUKi83iJCU":{"duration":116.31,"subset":"training","resolution":"630x360","url":"https://www.youtube.com/watch?v=wfUKi83iJCU","annotations":[{"segment":[26.491540789391575,95.98647313416537],"label":"Playing water polo"}]},"y_Ak7a3oXRY":{"duration":34.041,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=y_Ak7a3oXRY","annotations":[{"segment":[5.514341379334703,21.686206959501558],"label":"Building sandcastles"}]},"rgrzN8ELIxI":{"duration":208.562,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rgrzN8ELIxI","annotations":[{"segment":[25.053425252730108,208.56163265306122],"label":"Surfing"}]},"lh8ths6sKAE":{"duration":32.369,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=lh8ths6sKAE","annotations":[{"segment":[4.746723572542901,18.633414875195005],"label":"Mooping floor"}]},"GVa7TvByL5g":{"duration":47.11,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GVa7TvByL5g","annotations":[]},"9r5orsK1Tpw":{"duration":73.38,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9r5orsK1Tpw","annotations":[]},"LANB732DHbo":{"duration":99.68,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LANB732DHbo","annotations":[{"segment":[1.6317316638065522,94.56273499297971],"label":"Using the balance beam"}]},"dsYLNeumBig":{"duration":79.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dsYLNeumBig","annotations":[{"segment":[32.05079152825977,62.005954266146645],"label":"Vacuuming floor"}]},"R4oYA0Zu-m0":{"duration":215.737,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R4oYA0Zu-m0","annotations":[{"segment":[149.97526302191642,210.3679866658263],"label":"Wakeboarding"}]},"-ceM4HHukhE":{"duration":210.977,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=-ceM4HHukhE","annotations":[{"segment":[122.10964699843994,207.0268678221529],"label":"Using the rowing machine"}]},"0kfJ7Lu4tvo":{"duration":221.589,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0kfJ7Lu4tvo","annotations":[{"segment":[55.483155520664724,61.686241481858566],"label":"Changing car wheel"},{"segment":[102.35091810394255,124.06172002929246],"label":"Changing car wheel"},{"segment":[145.0832901474884,149.90791279756618],"label":"Changing car wheel"},{"segment":[171.9633306264931,174.37564195153197],"label":"Changing car wheel"}]},"OzRxQDkrhTg":{"duration":65.434,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OzRxQDkrhTg","annotations":[{"segment":[0,53.0391887675507],"label":"Hurling"}]},"aa0MLYA8F7s":{"duration":90.33,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aa0MLYA8F7s","annotations":[{"segment":[23.098414976599063,51.70119656786272],"label":"Using uneven bars"}]},"pCEg_GoXoU4":{"duration":220.079,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pCEg_GoXoU4","annotations":[{"segment":[28.143653666146644,57.66016848673947],"label":"Scuba diving"},{"segment":[73.44807176287053,112.57461466458658],"label":"Scuba diving"},{"segment":[149.98508112324492,204.5563120124805],"label":"Scuba diving"}]},"jzCnWUUUviE":{"duration":15.74,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=jzCnWUUUviE","annotations":[{"segment":[0.6254063962558501,13.378791731669267],"label":"Clean and jerk"}]},"u2hzK9YyIXY":{"duration":43.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=u2hzK9YyIXY","annotations":[{"segment":[2.1825612,39.180219112324494],"label":"Cleaning shoes"}]},"B_9S_qzlD38":{"duration":15.419,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B_9S_qzlD38","annotations":[{"segment":[0.01,9.046235569422779],"label":"Building sandcastles"}]},"hRuHqoXEvsI":{"duration":28.004,"subset":"validation","resolution":"400x224","url":"https://www.youtube.com/watch?v=hRuHqoXEvsI","annotations":[{"segment":[0,26.69265977379095],"label":"Baton twirling"}]},"anfZnOQGPk0":{"duration":122.28,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=anfZnOQGPk0","annotations":[{"segment":[7.530573322932917,37.73804188],"label":"Breakdancing"},{"segment":[43.285703999999996,110.67083073322932],"label":"Breakdancing"}]},"hyv8OkQcgEA":{"duration":137.532,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=hyv8OkQcgEA","annotations":[{"segment":[0.01,87.24261621460958],"label":"Blowing leaves"},{"segment":[99.66301808376846,137.281],"label":"Blowing leaves"}]},"4IMiN1lM7JA":{"duration":79.529,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4IMiN1lM7JA","annotations":[]},"Kcy0A0DuckE":{"duration":118.56,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Kcy0A0DuckE","annotations":[{"segment":[10.81886919,46.853323849999995],"label":"Dodgeball"},{"segment":[10.81886919,46.853323849999995],"label":"Dodgeball"}]},"yB35e1u3pJs":{"duration":176.727,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yB35e1u3pJs","annotations":[{"segment":[62.115585287153465,122.30724058068611],"label":"Putting in contact lenses"}]},"hgLDMHCcw4k":{"duration":93.229,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hgLDMHCcw4k","annotations":[{"segment":[3.9777376046938318,92.09768620282466],"label":"Kneeling"}]},"aXI9v77XV7A":{"duration":141.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aXI9v77XV7A","annotations":[{"segment":[6.608424336973479,132.38876755070203],"label":"Rock climbing"}]},"eTflVlhe0YA":{"duration":13.84,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eTflVlhe0YA","annotations":[]},"aWKZSqbOi2Q":{"duration":191.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aWKZSqbOi2Q","annotations":[{"segment":[5.814914176287052,104.28155775999998],"label":"Cheerleading"},{"segment":[110.78157007644306,188.31375909360375],"label":"Cheerleading"}]},"1YWyQUYAyvs":{"duration":37.97,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=1YWyQUYAyvs","annotations":[]},"7o-2My6U3GU":{"duration":107.88,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7o-2My6U3GU","annotations":[{"segment":[10.43456208424337,95.59405264274571],"label":"Shoveling snow"}]},"HnM44lX65cQ":{"duration":58.956,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HnM44lX65cQ","annotations":[{"segment":[0,56.62298907956318],"label":"Sharpening knives"}]},"u1upxlAgsqM":{"duration":47.07,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=u1upxlAgsqM","annotations":[{"segment":[6.300405616224649,41.68524180967239],"label":"Layup drill in basketball"}]},"E3dV8LdAPx8":{"duration":168.16,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=E3dV8LdAPx8","annotations":[{"segment":[8.2733736,162.38746686193977],"label":"Volleyball"}]},"YMbZ7RFRuYI":{"duration":179.491,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YMbZ7RFRuYI","annotations":[{"segment":[27.861590980646028,37.66215062207931],"label":"BMX"},{"segment":[58.383333863966804,60.90347777176394],"label":"BMX"},{"segment":[84.98485289071427,87.50499609984399],"label":"BMX"},{"segment":[111.30635503900156,122.2269785522621],"label":"BMX"}]},"Ia3Aa4AO-_E":{"duration":99.99000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ia3Aa4AO-_E","annotations":[{"segment":[35.40814808268331,89.22229384711387],"label":"Brushing teeth"}]},"OhydQlKoSUw":{"duration":74.629,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OhydQlKoSUw","annotations":[{"segment":[18.919122615244746,61.065106533513045],"label":"Surfing"}]},"Ieb7EkMxpJk":{"duration":29.931,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ieb7EkMxpJk","annotations":[{"segment":[11.234510140405616,24.33366926677067],"label":"Grooming dog"}]},"ean7ZR6XmG4":{"duration":121.603,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ean7ZR6XmG4","annotations":[{"segment":[38.70045523244929,42.68432562402496],"label":"Rock-paper-scissors"}]},"xgb-qIgWHw0":{"duration":68.801,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xgb-qIgWHw0","annotations":[{"segment":[0.2,59.75019812792512],"label":"Playing drums"}]},"RZkwDTyxMZI":{"duration":109.37,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=RZkwDTyxMZI","annotations":[{"segment":[34.3609368174727,78.52710374414977],"label":"Ironing clothes"}]},"3xUAyMuqBrQ":{"duration":29.443,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3xUAyMuqBrQ","annotations":[{"segment":[4.543960998439938,8.812530421216849],"label":"Making an omelette"},{"segment":[9.868198127925117,13.03520124804992],"label":"Making an omelette"}]},"mfENhAPlES8":{"duration":31.464,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=mfENhAPlES8","annotations":[{"segment":[0.2,28.03870826833073],"label":"Putting in contact lenses"}]},"pbsFHiYd57w":{"duration":142.85,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pbsFHiYd57w","annotations":[]},"i5jZVSzLbYQ":{"duration":150.0,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=i5jZVSzLbYQ","annotations":[{"segment":[63.06,121.78747502808113],"label":"Breakdancing"},{"segment":[8.882771691107646,62.41315898751951],"label":"Breakdancing"}]},"tqqWTxQ5-kY":{"duration":50.02,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tqqWTxQ5-kY","annotations":[{"segment":[20.744296648341848,27.44422992871568],"label":"Getting a piercing"}]},"NVR52Aed_7s":{"duration":21.572,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NVR52Aed_7s","annotations":[{"segment":[0.01,21.47378939157566],"label":"Kneeling"}]},"vwiaVgMqw5o":{"duration":79.529,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=vwiaVgMqw5o","annotations":[]},"H80bs53Arrw":{"duration":164.189,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=H80bs53Arrw","annotations":[{"segment":[28.372602803738317,122.69233644859813],"label":"Hula hoop"}]},"Rx9SIyFMfcY":{"duration":160.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Rx9SIyFMfcY","annotations":[{"segment":[1.2493759750390014,160.17],"label":"Spinning"}]},"5jfQNanBKAM":{"duration":69.126,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5jfQNanBKAM","annotations":[{"segment":[4.205782206798475,68.26308043342142],"label":"Canoeing"}]},"M4npKXFKxPA":{"duration":28.75,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=M4npKXFKxPA","annotations":[{"segment":[14.171344380655226,25.472543063962558],"label":"Using uneven bars"}]},"5_M10vevgJg":{"duration":206.89,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=5_M10vevgJg","annotations":[{"segment":[0.01,204.30770798178867],"label":"Waxing skis"}]},"cp52LdlmlUk":{"duration":129.96,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=cp52LdlmlUk","annotations":[{"segment":[0,129.962107],"label":"Doing step aerobics"}]},"EKyV_WFsJH0":{"duration":129.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EKyV_WFsJH0","annotations":[{"segment":[12.283718254852642,89.61073153130208],"label":"Layup drill in basketball"}]},"ELLVAzDqshU":{"duration":44.79,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ELLVAzDqshU","annotations":[]},"f4IL30BPe2w":{"duration":132.656,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=f4IL30BPe2w","annotations":[{"segment":[0.01,132.621],"label":"Disc dog"}]},"PZj0ODkGrUg":{"duration":211.581,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PZj0ODkGrUg","annotations":[]},"rBQO0Nf05vg":{"duration":76.719,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rBQO0Nf05vg","annotations":[{"segment":[7.767474975452279,67.87577970093457],"label":"Installing carpet"}]},"WXMb7XF6k8o":{"duration":236.473,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WXMb7XF6k8o","annotations":[{"segment":[55.33657019750177,205.8520411347066],"label":"Blow-drying hair"}]},"KgM8_YBJbM4":{"duration":58.31,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=KgM8_YBJbM4","annotations":[{"segment":[0,58.305306],"label":"Spinning"}]},"1Z3mjpu1I2A":{"duration":152.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1Z3mjpu1I2A","annotations":[{"segment":[7.258857254290172,140.77423166926678],"label":"Washing dishes"}]},"Gfsk28SzgXk":{"duration":139.065,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Gfsk28SzgXk","annotations":[{"segment":[99.79653956226278,138.4134613928775],"label":"Waterskiing"}]},"oEdRUyN6Kfo":{"duration":41.471,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oEdRUyN6Kfo","annotations":[{"segment":[22.56304056162247,29.868552262090486],"label":"Hula hoop"}]},"sD4B_NfJ-II":{"duration":176.031,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sD4B_NfJ-II","annotations":[]},"wD-9KvI1-AI":{"duration":115.589,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wD-9KvI1-AI","annotations":[{"segment":[40.93399066770671,90.52362693915757],"label":"Gargling mouthwash"}]},"5M-WpzuvSRw":{"duration":28.91,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=5M-WpzuvSRw","annotations":[]},"_0sDAdyMtnk":{"duration":126.25,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=_0sDAdyMtnk","annotations":[{"segment":[7.2872385460218405,17.528762989079564],"label":"Playing violin"},{"segment":[21.07390606552262,24.816001535101403],"label":"Playing violin"},{"segment":[28.164192218408733,59.873527513260534],"label":"Playing violin"},{"segment":[79.1748620405616,82.52305272386896],"label":"Playing violin"},{"segment":[90.5981008424337,122.50438853042121],"label":"Playing violin"}]},"lGAK_3Jp2I8":{"duration":40.404,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lGAK_3Jp2I8","annotations":[{"segment":[0.2,40.201],"label":"Doing a powerbomb"},{"segment":[13.11633073322933,15.134227769110765],"label":"Doing a powerbomb"}]},"38wV6msixSE":{"duration":218.13,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=38wV6msixSE","annotations":[]},"hvrKRg166eQ":{"duration":186.06,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=hvrKRg166eQ","annotations":[]},"_akwfjIbsUA":{"duration":26.216,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_akwfjIbsUA","annotations":[{"segment":[8.84716321802827,26.01532879818594],"label":"Fun sliding down"}]},"AUPs7Ukfc1I":{"duration":136.32999999999998,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=AUPs7Ukfc1I","annotations":[{"segment":[9.570353939157567,112.29215288611545],"label":"Shaving"}]},"3CA0DKDv060":{"duration":192.633,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3CA0DKDv060","annotations":[]},"nD4VObLUOuE":{"duration":92.02000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nD4VObLUOuE","annotations":[{"segment":[5.02704104524181,90.48673881435258],"label":"Hammer throw"}]},"jEhjwT1aiLM":{"duration":192.006,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jEhjwT1aiLM","annotations":[]},"ycYPyXN0CPg":{"duration":47.091,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ycYPyXN0CPg","annotations":[{"segment":[0.01,46.45899624836966],"label":"Painting fence"}]},"NK0WynwKc34":{"duration":19.714,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=NK0WynwKc34","annotations":[{"segment":[0,19.51374149659864],"label":"Playing congas"}]},"-EKpX0t44pQ":{"duration":132.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-EKpX0t44pQ","annotations":[{"segment":[20.71154446177847,97.56607020280812],"label":"Getting a tattoo"}]},"k8YJwiYJI9g":{"duration":101.75,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k8YJwiYJI9g","annotations":[]},"RogHmanNFRU":{"duration":49.78,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=RogHmanNFRU","annotations":[]},"v_UmAH95xls":{"duration":117.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=v_UmAH95xls","annotations":[{"segment":[0.01,117.05179138321995],"label":"Shaving legs"}]},"H1_5M9mQ79w":{"duration":166.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H1_5M9mQ79w","annotations":[{"segment":[6.758620904836194,159.08753822152886],"label":"Playing harmonica"}]},"DHfiz3MNbcc":{"duration":70.87,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=DHfiz3MNbcc","annotations":[{"segment":[0,63.4599535101404],"label":"Mixing drinks"}]},"s3EHHOAkQYE":{"duration":238.32999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s3EHHOAkQYE","annotations":[{"segment":[31.04598948991973,215.09107688525228],"label":"Fixing bicycle"}]},"8LB2tdMOZ6g":{"duration":40.96,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8LB2tdMOZ6g","annotations":[{"segment":[11.363397815912636,25.025010920436817],"label":"Playing beach volleyball"},{"segment":[27.642422776911076,39.00582059282372],"label":"Playing beach volleyball"}]},"FKtYeu2bJpA":{"duration":151.51,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FKtYeu2bJpA","annotations":[]},"e4YsOfQR3sI":{"duration":160.5,"subset":"training","resolution":"320x176","url":"https://www.youtube.com/watch?v=e4YsOfQR3sI","annotations":[{"segment":[5.5084542647012,13.771135661753],"label":"Playing piano"}]},"yowQI5JxGqo":{"duration":121.766,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=yowQI5JxGqo","annotations":[]},"Ye6_xb7h7jc":{"duration":103.747,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ye6_xb7h7jc","annotations":[{"segment":[0.01,96.6770195611615],"label":"Decorating the Christmas tree"}]},"rzm4V_McRhQ":{"duration":69.2,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rzm4V_McRhQ","annotations":[{"segment":[7.281610729329172,62.5139765577223],"label":"Mixing drinks"}]},"rA7qVOp47Qg":{"duration":154.82999999999998,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=rA7qVOp47Qg","annotations":[{"segment":[5.190075380655227,152.44337687831515],"label":"Horseback riding"}]},"zL7Rz4I8UyA":{"duration":27.56,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zL7Rz4I8UyA","annotations":[{"segment":[4.127863139015357,14.146530879875197],"label":"Playing polo"}]},"bLpeUxij7YY":{"duration":66.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bLpeUxij7YY","annotations":[{"segment":[2.2862496099843996,64.95027301092044],"label":"Snatch"}]},"BB8YBkGNgVU":{"duration":237.98,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=BB8YBkGNgVU","annotations":[]},"xFDQTAemadA":{"duration":114.498,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xFDQTAemadA","annotations":[{"segment":[0.01,114.49759637188208],"label":"Calf roping"}]},"ZNsX4KYJlVQ":{"duration":10.566,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZNsX4KYJlVQ","annotations":[{"segment":[0.8570734914196568,2.307505553822153],"label":"Throwing darts"},{"segment":[5.735799519500781,7.730143605304212],"label":"Throwing darts"}]},"8r167TmBebg":{"duration":78.623,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=8r167TmBebg","annotations":[{"segment":[0.01,78.561],"label":"Tug of war"}]},"eLcqu3-3lw4":{"duration":86.472,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=eLcqu3-3lw4","annotations":[]},"Rvx2EoMScKg":{"duration":130.66,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Rvx2EoMScKg","annotations":[{"segment":[12.736056942277692,87.72596021840874],"label":"Playing piano"}]},"UGWOtDmIcns":{"duration":89.05,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=UGWOtDmIcns","annotations":[]},"I8jhEprzTN8":{"duration":202.246,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I8jhEprzTN8","annotations":[{"segment":[9.149966216335729,171.95626165182662],"label":"Beach soccer"}]},"E0DbrJVJUho":{"duration":184.62,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=E0DbrJVJUho","annotations":[{"segment":[46.05329173166927,152.55152886115445],"label":"Cleaning windows"}]},"lMFp8FPeDHI":{"duration":124.483,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=lMFp8FPeDHI","annotations":[{"segment":[0.01,124.441],"label":"Using the monkey bar"}]},"IWXIEfaltJ4":{"duration":228.88,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IWXIEfaltJ4","annotations":[{"segment":[44.99016714954312,76.05480637184671],"label":"Plastering"},{"segment":[90.33739911773341,228.878548],"label":"Plastering"}]},"TkNCytAQ5cw":{"duration":100.53999999999999,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=TkNCytAQ5cw","annotations":[{"segment":[12.184113199999999,29.88478236],"label":"Polishing shoes"},{"segment":[63.45105304212168,98.54496879875194],"label":"Polishing shoes"}]},"ukPz_13Agis":{"duration":38.71,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ukPz_13Agis","annotations":[{"segment":[0.01,34.1853143525741],"label":"Layup drill in basketball"}]},"f14r3tC-Y4E":{"duration":203.11,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=f14r3tC-Y4E","annotations":[{"segment":[0.950569157460176,195.18353366515612],"label":"Sailing"}]},"DOI6tsATsE4":{"duration":6.46,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=DOI6tsATsE4","annotations":[]},"qxQWNu_MN94":{"duration":6.97,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qxQWNu_MN94","annotations":[{"segment":[1.7240561622464898,4.525647425897036],"label":"Plataform diving"}]},"wJaFA5a6YAg":{"duration":52.87,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=wJaFA5a6YAg","annotations":[]},"TVbPV7X49tw":{"duration":94.0,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TVbPV7X49tw","annotations":[{"segment":[3.114512102395946,93.15585263977913],"label":"Doing motocross"}]},"Q-dFr9ZS29s":{"duration":96.92,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Q-dFr9ZS29s","annotations":[{"segment":[0.01,96.920112],"label":"Removing curlers"}]},"W01QffZYsxQ":{"duration":18.391,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=W01QffZYsxQ","annotations":[{"segment":[1.174160686427457,17.09692511700468],"label":"Doing crunches"}]},"eQ9cmxPKvT8":{"duration":225.13,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eQ9cmxPKvT8","annotations":[{"segment":[25.565561955147533,202.77346573081996],"label":"Rollerblading"}]},"cpy_BFGQoQ8":{"duration":93.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cpy_BFGQoQ8","annotations":[{"segment":[8.514359634301728,54.731002332121044],"label":"Slacklining"}]},"yAa24V7sXJM":{"duration":203.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yAa24V7sXJM","annotations":[{"segment":[22.17487556645123,24.075579186432766],"label":"Washing hands"},{"segment":[28.827338236386602,32.311961539686074],"label":"Washing hands"},{"segment":[34.212665159667615,71.27638574930752],"label":"Washing hands"},{"segment":[161.5598076984304,165.04443067550704],"label":"Washing hands"},{"segment":[176.76543630889236,186.58573832605305],"label":"Washing hands"}]},"pNcQf-aDNP0":{"duration":144.196,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pNcQf-aDNP0","annotations":[]},"vBpYwyXfE0o":{"duration":200.458,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vBpYwyXfE0o","annotations":[{"segment":[84.0912464898596,198.5251716068643],"label":"Skiing"}]},"EZyLBq2jJtE":{"duration":62.63,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EZyLBq2jJtE","annotations":[]},"p8tlnMtsTf8":{"duration":123.136,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=p8tlnMtsTf8","annotations":[{"segment":[29.19903863365419,106.23071292375505],"label":"Surfing"}]},"vZmDiMGYCo0":{"duration":134.003,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vZmDiMGYCo0","annotations":[{"segment":[54.135002499125825,116.74294642435947],"label":"Fun sliding down"}]},"P06Slwn3JG0":{"duration":65.667,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=P06Slwn3JG0","annotations":[{"segment":[10.654083153802343,26.942537206250154],"label":"Raking leaves"},{"segment":[29.093842458460244,36.572189287571504],"label":"Raking leaves"}]},"kLpYpfT5bok":{"duration":215.04,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=kLpYpfT5bok","annotations":[{"segment":[30.48069945112001,192.93277894335304],"label":"Capoeira"}]},"0Q9lJXWwjFg":{"duration":93.75999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0Q9lJXWwjFg","annotations":[]},"Jj7Xcisw62E":{"duration":99.683,"subset":"training","resolution":"320x218","url":"https://www.youtube.com/watch?v=Jj7Xcisw62E","annotations":[{"segment":[0,62.51547166989079],"label":"Arm wrestling"}]},"ExNd4o46kao":{"duration":170.203,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ExNd4o46kao","annotations":[{"segment":[0,47.529182204368176],"label":"Shuffleboard"},{"segment":[54.69838845865835,58.68128082215289],"label":"Shuffleboard"},{"segment":[89.48231509984399,93.73073362090484],"label":"Shuffleboard"},{"segment":[142.32202045553822,147.63254360686426],"label":"Shuffleboard"},{"segment":[153.47411907332292,158.2535899095164],"label":"Shuffleboard"},{"segment":[161.43990380031204,166.48490079407176],"label":"Shuffleboard"}]},"lVOBMs6op7o":{"duration":11.01,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=lVOBMs6op7o","annotations":[{"segment":[0.01,10.961],"label":"Drinking coffee"}]},"cUdIbmXb2yI":{"duration":102.49000000000001,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=cUdIbmXb2yI","annotations":[{"segment":[33.73790556786272,71.95287917316692],"label":"Cumbia"},{"segment":[73.23204146957879,73.71172733073324],"label":"Cumbia"}]},"b7KUDaXZTfU":{"duration":116.449,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=b7KUDaXZTfU","annotations":[]},"6KG4p_pfMQc":{"duration":12.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6KG4p_pfMQc","annotations":[{"segment":[0.01,12.260135],"label":"Playing kickball"}]},"7QA5qyCXwyg":{"duration":238.353,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7QA5qyCXwyg","annotations":[{"segment":[0.01,238.35283446712018],"label":"Playing congas"}]},"zg6BRB4a3Fo":{"duration":84.68,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zg6BRB4a3Fo","annotations":[{"segment":[30.081154297005725,41.15044401663189],"label":"Spinning"},{"segment":[53.142174546226904,61.83947361164746],"label":"Spinning"},{"segment":[70.27321815993407,82.00139417239512],"label":"Spinning"}]},"02fTz-0Kra8":{"duration":170.226,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=02fTz-0Kra8","annotations":[]},"jr7JA5eKkwY":{"duration":186.016,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jr7JA5eKkwY","annotations":[{"segment":[0.01,173.50489547581904],"label":"Blow-drying hair"}]},"I3DxLmTo9Y4":{"duration":196.12,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=I3DxLmTo9Y4","annotations":[]},"rrKGM5hck1A":{"duration":99.15,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rrKGM5hck1A","annotations":[{"segment":[6.032480330733229,99.14922902494331],"label":"Tango"}]},"U-mwyUsNOMU":{"duration":204.847,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=U-mwyUsNOMU","annotations":[{"segment":[0.2,162.66277214040562],"label":"Ice fishing"}]},"PPJ6gSuk33o":{"duration":69.96000000000001,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=PPJ6gSuk33o","annotations":[{"segment":[0,69.961722],"label":"Shaving legs"}]},"BC1VpNRFtXo":{"duration":155.806,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BC1VpNRFtXo","annotations":[{"segment":[24.811650550592727,51.25535615806937],"label":"Arm wrestling"}]},"ZKo1lM6TJTY":{"duration":12.03,"subset":"training","resolution":"252x240","url":"https://www.youtube.com/watch?v=ZKo1lM6TJTY","annotations":[{"segment":[0.2908471021840874,10.423584855694227],"label":"Hammer throw"}]},"OfwGarnPTcU":{"duration":107.207,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=OfwGarnPTcU","annotations":[]},"qPZwXF1Xcpw":{"duration":109.41,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=qPZwXF1Xcpw","annotations":[{"segment":[20.99489609672387,25.944912249609985],"label":"Shot put"},{"segment":[61.61916659282372,64.52090019968799],"label":"Shot put"},{"segment":[92.85547541965678,96.95204051170046],"label":"Shot put"}]},"Y_ZFCDVhS-4":{"duration":210.791,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y_ZFCDVhS-4","annotations":[]},"3AZgr5HkSDY":{"duration":226.79,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3AZgr5HkSDY","annotations":[{"segment":[13.090801857924657,206.97619057722312],"label":"Doing step aerobics"}]},"LjPKKqe-VO0":{"duration":43.1,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LjPKKqe-VO0","annotations":[]},"u7OvguFW-Hs":{"duration":17.462,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u7OvguFW-Hs","annotations":[{"segment":[5.448020592823713,14.682415497659907],"label":"Doing a powerbomb"}]},"B9ZdRhm2yQs":{"duration":168.972,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=B9ZdRhm2yQs","annotations":[]},"rggwT0-G5sk":{"duration":78.55,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rggwT0-G5sk","annotations":[{"segment":[0.612738728549142,78.35310657596372],"label":"Playing squash"}]},"L2J_QI2_qms":{"duration":142.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=L2J_QI2_qms","annotations":[{"segment":[12.2125462,127.69554542433697],"label":"Tumbling"}]},"tmBTO0cA_qw":{"duration":203.663,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tmBTO0cA_qw","annotations":[{"segment":[31.44848517940718,48.919865834633384],"label":"Sharpening knives"},{"segment":[60.03801716068643,77.50939781591264],"label":"Sharpening knives"},{"segment":[93.71013260530421,121.02901872074884],"label":"Sharpening knives"},{"segment":[122.9349875195008,160.73670202808114],"label":"Sharpening knives"},{"segment":[162.00734789391575,169.94888455538222],"label":"Sharpening knives"},{"segment":[177.57275975039002,200.44438533541341],"label":"Sharpening knives"}]},"wB9LBEHR5-c":{"duration":176.923,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=wB9LBEHR5-c","annotations":[{"segment":[4.139648985959439,169.74479719188767],"label":"Riding bumper cars"}]},"_1Zoq6-OrRQ":{"duration":116.1,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=_1Zoq6-OrRQ","annotations":[]},"zB8knKX0W8Q":{"duration":147.96,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zB8knKX0W8Q","annotations":[]},"iJWmjVjBNzE":{"duration":94.831,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iJWmjVjBNzE","annotations":[{"segment":[18.16966100787127,72.29519473406262],"label":"Decorating the Christmas tree"}]},"pn41XETdQB4":{"duration":229.72,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=pn41XETdQB4","annotations":[{"segment":[19.706131045241808,36.187622464898595],"label":"Doing karate"},{"segment":[76.67476443057723,112.14580031201247],"label":"Doing karate"},{"segment":[160.51539469578785,173.41395319812793],"label":"Doing karate"},{"segment":[187.7456848673947,218.91720124804993],"label":"Doing karate"}]},"pi2AaadXnnI":{"duration":144.96,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pi2AaadXnnI","annotations":[{"segment":[1.3565148205928237,141.9818845553822],"label":"Hand washing clothes"}]},"hR6VFvMXCN4":{"duration":147.63,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hR6VFvMXCN4","annotations":[{"segment":[16.574918017128848,76.19858051482059],"label":"Long jump"},{"segment":[118.09628943837754,123.85146923556944],"label":"Long jump"}]},"jBnNbI48KjQ":{"duration":174.336,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=jBnNbI48KjQ","annotations":[{"segment":[43.508553430991014,57.081840035352386],"label":"Surfing"},{"segment":[61.42529174874802,67.66900358675426],"label":"Surfing"},{"segment":[80.15642726276671,85.31427617242403],"label":"Surfing"},{"segment":[111.38776753466597,114.9172270362236],"label":"Surfing"},{"segment":[141.79541862500864,147.2253563197127],"label":"Surfing"},{"segment":[154.01277843809277,159.714213017532],"label":"Surfing"}]},"ihdkXBpzKbE":{"duration":193.52,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=ihdkXBpzKbE","annotations":[{"segment":[9.056869050272216,119.55067146359325],"label":"Washing dishes"},{"segment":[128.60754051386544,166.34449485491422],"label":"Washing dishes"}]},"Z2KHO87wHzg":{"duration":110.226,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Z2KHO87wHzg","annotations":[{"segment":[87.0107835226209,89.93407071606863],"label":"Applying sunscreen"}]},"aIwFZCRFHx8":{"duration":148.33,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aIwFZCRFHx8","annotations":[{"segment":[25.90816848673947,135.09259282371295],"label":"Riding bumper cars"}]},"CteuM2BUmHo":{"duration":106.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CteuM2BUmHo","annotations":[{"segment":[4.523949079648085,102.26766097769911],"label":"Kneeling"}]},"W1krUTxgsMc":{"duration":143.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W1krUTxgsMc","annotations":[{"segment":[34.98220904836194,136.78940717628706],"label":"Playing guitarra"}]},"pqVWGi0d4RU":{"duration":203.268,"subset":"validation","resolution":"1920x1072","url":"https://www.youtube.com/watch?v=pqVWGi0d4RU","annotations":[{"segment":[12.045644305772232,194.96171606864274],"label":"Playing ice hockey"}]},"fYIr-1aIHmA":{"duration":117.029,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fYIr-1aIHmA","annotations":[{"segment":[4.199137649859736,50.20708059614902],"label":"Waterskiing"},{"segment":[55.136503054680006,112.82900237129485],"label":"Waterskiing"}]},"djgM0IWIBKc":{"duration":235.985,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=djgM0IWIBKc","annotations":[{"segment":[0.01,78.04788238332254],"label":"Canoeing"},{"segment":[92.4057475387451,94.24649948174799],"label":"Canoeing"},{"segment":[158.30466709824856,205.79606627457096],"label":"Canoeing"},{"segment":[217.9450290421217,235.984398],"label":"Canoeing"}]},"5YLDQMvskH8":{"duration":185.203,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=5YLDQMvskH8","annotations":[]},"CE93IieQkAg":{"duration":148.004,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=CE93IieQkAg","annotations":[{"segment":[15.008205008472448,144.77145355694228],"label":"Cutting the grass"}]},"f4mDKdD0z3U":{"duration":81.64,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=f4mDKdD0z3U","annotations":[{"segment":[58.97019796879874,73.99931969734789],"label":"Bungee jumping"}]},"bSZnvk2Cx28":{"duration":101.17,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bSZnvk2Cx28","annotations":[{"segment":[14.274010140405617,38.24803822152886],"label":"Capoeira"},{"segment":[37.62458424336973,70.91090405616224],"label":"Capoeira"}]},"cqkcRy3k4v0":{"duration":29.652,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cqkcRy3k4v0","annotations":[{"segment":[12.107179407176286,29.401],"label":"Camel ride"}]},"pmlK-IV4vko":{"duration":19.97,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=pmlK-IV4vko","annotations":[{"segment":[0.3033835,18.733229329173167],"label":"Pole vault"}]},"NL6UDkBKrgM":{"duration":41.66,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NL6UDkBKrgM","annotations":[]},"QoRlOa6R1LY":{"duration":62.69,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QoRlOa6R1LY","annotations":[]},"dSww-S8qyCM":{"duration":94.25,"subset":"validation","resolution":"328x240","url":"https://www.youtube.com/watch?v=dSww-S8qyCM","annotations":[{"segment":[9.995363494539783,82.4617488299532],"label":"Long jump"}]},"UKboc2GEA6U":{"duration":61.394,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UKboc2GEA6U","annotations":[]},"mmgoptOJM0s":{"duration":13.79,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=mmgoptOJM0s","annotations":[{"segment":[0.5259664586583462,8.082709048361933],"label":"Ping-pong"}]},"Jvxk68D3F2w":{"duration":183.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Jvxk68D3F2w","annotations":[{"segment":[4.578201410296412,173.3993784149766],"label":"Paintball"}]},"FnbVnRX6WxQ":{"duration":94.227,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FnbVnRX6WxQ","annotations":[{"segment":[11.650125143255824,86.04170834698833],"label":"Doing crunches"}]},"1ftLLKrC81s":{"duration":220.32999999999998,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=1ftLLKrC81s","annotations":[{"segment":[35.06096856786271,197.64761692667705],"label":"Tango"}]},"L0QdLXym4F4":{"duration":30.697,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=L0QdLXym4F4","annotations":[{"segment":[0.2,18.128734789391576],"label":"Using the monkey bar"}]},"cam-KNnddec":{"duration":166.093,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cam-KNnddec","annotations":[]},"fXdw7jwiP8g":{"duration":57.56,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=fXdw7jwiP8g","annotations":[{"segment":[4.796498932137286,49.44428338455538],"label":"Cumbia"}]},"hlFJj2dT3sU":{"duration":41.68,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=hlFJj2dT3sU","annotations":[{"segment":[0,38.717684867394695],"label":"Tennis serve with ball bouncing"}]},"P9jIpcRGeOk":{"duration":65.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P9jIpcRGeOk","annotations":[{"segment":[13.410560844202475,29.380388347893916],"label":"Sumo"},{"segment":[38.798491929797194,46.066810998439934],"label":"Sumo"}]},"BZAzrFF5emE":{"duration":183.206,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BZAzrFF5emE","annotations":[{"segment":[85.43871332808997,99.7070358180413],"label":"Longboarding"},{"segment":[110.83632689377544,116.8290223143362],"label":"Longboarding"},{"segment":[168.19498306199975,175.32914427695303],"label":"Longboarding"}]},"jHbf_E8aY1k":{"duration":141.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jHbf_E8aY1k","annotations":[{"segment":[43.78558907488299,73.9294368299532],"label":"Bungee jumping"},{"segment":[76.12971768798752,78.77005471762871],"label":"Bungee jumping"}]},"TEh6gfRUFZQ":{"duration":109.088,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TEh6gfRUFZQ","annotations":[{"segment":[48.98291419656786,57.15721684867395],"label":"Removing ice from car"},{"segment":[63.79153666146646,87.60704368174727],"label":"Removing ice from car"}]},"psgIH8U1adg":{"duration":121.069,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=psgIH8U1adg","annotations":[{"segment":[4.5995523125597995,44.37436695430435],"label":"Swinging at the playground"},{"segment":[49.65254140913301,63.413496237793446],"label":"Swinging at the playground"}]},"jVC3DZdphYM":{"duration":167.3,"subset":"validation","resolution":"160x120","url":"https://www.youtube.com/watch?v=jVC3DZdphYM","annotations":[{"segment":[6.785950240730718,80.12641245785886],"label":"Playing harmonica"},{"segment":[104.39923447278028,136.7629969609984],"label":"Playing harmonica"}]},"sMITf5WBIxM":{"duration":158.45,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=sMITf5WBIxM","annotations":[{"segment":[21.506097196567865,132.99172749141965],"label":"Brushing hair"}]},"oJ9BFy1KNlY":{"duration":42.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oJ9BFy1KNlY","annotations":[{"segment":[20.34806968798752,40.23666683463338],"label":"Clean and jerk"}]},"ORI5ZNZARw8":{"duration":23.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ORI5ZNZARw8","annotations":[{"segment":[3.355834633385335,22.85696255850234],"label":"Using the rowing machine"}]},"BYLxSOPFOuc":{"duration":489.384,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BYLxSOPFOuc","annotations":[{"segment":[345.04382527301095,413.7472418096724],"label":"Removing ice from car"}]},"1jPEEuKPfKs":{"duration":82.57,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1jPEEuKPfKs","annotations":[]},"n25mDmcBC6E":{"duration":185.64,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=n25mDmcBC6E","annotations":[{"segment":[0,185.652],"label":"Playing flauta"}]},"55rDKtgbGqI":{"duration":187.11,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=55rDKtgbGqI","annotations":[]},"xunKd050v7U":{"duration":63.066,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xunKd050v7U","annotations":[{"segment":[5.9031381663429805,14.167531207488299],"label":"Making a lemonade"}]},"N-1b20gDnCM":{"duration":184.32,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=N-1b20gDnCM","annotations":[{"segment":[27.024046801872075,164.15670982839313],"label":"Canoeing"}]},"nEcOF04KK0g":{"duration":37.48,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nEcOF04KK0g","annotations":[{"segment":[9.112006240249611,12.733444617784713],"label":"Polishing shoes"},{"segment":[15.770780031201248,18.57447425897036],"label":"Polishing shoes"}]},"L35c6QI1l_0":{"duration":105.373,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L35c6QI1l_0","annotations":[{"segment":[19.059650546021842,100.90385959438377],"label":"Camel ride"}]},"2fMpsSrmeIA":{"duration":58.701,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=2fMpsSrmeIA","annotations":[{"segment":[4.578742355694228,55.402782503900156],"label":"Playing pool"}]},"EE_cxG4E-Q0":{"duration":93.856,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=EE_cxG4E-Q0","annotations":[]},"c1tbdVxIhH4":{"duration":68.639,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c1tbdVxIhH4","annotations":[{"segment":[0.2,68.43818594104309],"label":"Carving jack-o-lanterns"}]},"gVMG_FHDrvo":{"duration":7.849,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gVMG_FHDrvo","annotations":[{"segment":[0.01,6.81474674756059],"label":"Doing a powerbomb"}]},"9FK7tjzBKio":{"duration":50.6,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9FK7tjzBKio","annotations":[{"segment":[7.104001872074883,9.550935850234008],"label":"Playing squash"},{"segment":[11.445336349453978,32.046941778471144],"label":"Playing squash"}]},"DxtISmKol2I":{"duration":128.382,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DxtISmKol2I","annotations":[{"segment":[38.1493709616179,46.3385036023644],"label":"Sumo"},{"segment":[117.24440817468167,124.434866103142],"label":"Sumo"}]},"ZyDAF0AU3Ck":{"duration":15.65,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=ZyDAF0AU3Ck","annotations":[]},"_Dkkr7L00zA":{"duration":102.563,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=_Dkkr7L00zA","annotations":[]},"FYhB2rQwfCc":{"duration":103.61,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=FYhB2rQwfCc","annotations":[{"segment":[12.924929797191888,96.93697347893917],"label":"Playing racquetball"}]},"sWtwatYMbX0":{"duration":112.47999999999999,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=sWtwatYMbX0","annotations":[{"segment":[28.338704568642747,83.08592330187207],"label":"Mixing drinks"}]},"ABB755sPZfY":{"duration":12.818,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ABB755sPZfY","annotations":[{"segment":[0.2,12.617414965986395],"label":"Rope skipping"}]},"3hSPAtKORgk":{"duration":81.131,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=3hSPAtKORgk","annotations":[{"segment":[42.28448436520879,81.130521],"label":"Futsal"}]},"zmmiX3_TJ84":{"duration":210.61,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=zmmiX3_TJ84","annotations":[{"segment":[6.898254290171607,199.06390951638065],"label":"Zumba"}]},"Xf0c2abFH3Y":{"duration":93.11,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Xf0c2abFH3Y","annotations":[{"segment":[27.44189219911522,31.21152771313391],"label":"Painting"},{"segment":[49.62474733930214,57.30900434864793],"label":"Painting"}]},"VxBRMCxxcG0":{"duration":157.618,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VxBRMCxxcG0","annotations":[]},"UqU_mAjgknQ":{"duration":77.787,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=UqU_mAjgknQ","annotations":[{"segment":[8.980072820592824,68.44271717316693],"label":"Sumo"}]},"ER0UPx6-VcM":{"duration":78.07,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ER0UPx6-VcM","annotations":[{"segment":[22.378820592823715,73.23644617784711],"label":"Playing racquetball"}]},"CRDBKk44RWg":{"duration":143.5,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CRDBKk44RWg","annotations":[{"segment":[63.55352886115445,139.41495943837754],"label":"Tennis serve with ball bouncing"}]},"nkrA8sJydF0":{"duration":216.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nkrA8sJydF0","annotations":[{"segment":[38.09228237129485,56.295673946957876],"label":"Vacuuming floor"},{"segment":[102.81545241809671,108.54614976599062],"label":"Vacuuming floor"}]},"-kky8-yaEPg":{"duration":37.803,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=-kky8-yaEPg","annotations":[{"segment":[0.01,36.17694872536044],"label":"Making an omelette"}]},"4kNx9rVwmAg":{"duration":191.91,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=4kNx9rVwmAg","annotations":[{"segment":[7.15256024,99.52172496000001],"label":"Doing step aerobics"},{"segment":[111.73500464,178.6948595943838],"label":"Doing step aerobics"}]},"udSHsodv3gY":{"duration":165.65,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=udSHsodv3gY","annotations":[]},"FhbsTTHT2Zs":{"duration":71.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FhbsTTHT2Zs","annotations":[{"segment":[0.01,70.96],"label":"Grooming horse"}]},"Jd3D2d-RwL0":{"duration":114.497,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Jd3D2d-RwL0","annotations":[{"segment":[22.483628705148206,40.86310296411857],"label":"Blowing leaves"}]},"xQqladBKjFU":{"duration":220.38,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xQqladBKjFU","annotations":[]},"0gw1Qq3WRbU":{"duration":114.8,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0gw1Qq3WRbU","annotations":[{"segment":[0.01,114.79945578231292],"label":"Springboard diving"}]},"wnNiWoJxfSQ":{"duration":228.07,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wnNiWoJxfSQ","annotations":[]},"zXe-F6ebHGM":{"duration":192.656,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=zXe-F6ebHGM","annotations":[]},"VTuf9nQkbCk":{"duration":128.732,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VTuf9nQkbCk","annotations":[]},"8Q-P5KEvXN0":{"duration":46.139,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8Q-P5KEvXN0","annotations":[{"segment":[0.01,7.479725429017161],"label":"Futsal"},{"segment":[9.277736349453978,46.101],"label":"Futsal"}]},"GbvZWitR2_g":{"duration":160.775,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GbvZWitR2_g","annotations":[{"segment":[0,2.508190150735281],"label":"Kite flying"},{"segment":[7.524570452205843,159.52089292979718],"label":"Kite flying"}]},"cZWgq6ATrRI":{"duration":60.16,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cZWgq6ATrRI","annotations":[{"segment":[37.78232995319813,50.048712168486745],"label":"Putting on makeup"}]},"I1P5GuVHFQA":{"duration":205.567,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I1P5GuVHFQA","annotations":[{"segment":[132.56141903490368,154.9751855383898],"label":"Welding"}]},"Of_UWPP-SSA":{"duration":46.12,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Of_UWPP-SSA","annotations":[]},"WzAGE-xKDpw":{"duration":56.123,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WzAGE-xKDpw","annotations":[{"segment":[21.452481685726383,54.758901699745074],"label":"Using the rowing machine"}]},"IRvFx8K0gAI":{"duration":166.53,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IRvFx8K0gAI","annotations":[{"segment":[0.01,166.53310657596373],"label":"Tango"}]},"6Pgz2EgPES0":{"duration":140.295,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6Pgz2EgPES0","annotations":[]},"Gpocjp7hSzU":{"duration":197.86,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Gpocjp7hSzU","annotations":[{"segment":[87.48296957878314,174.81164820592824],"label":"Preparing pasta"}]},"JB3h1trZ1cE":{"duration":108.531,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=JB3h1trZ1cE","annotations":[{"segment":[15.739053042121684,24.70862090483619],"label":"Waterskiing"},{"segment":[29.108786271450853,105.26549453978159],"label":"Waterskiing"}]},"ZgVmS-AhYgA":{"duration":152.741,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZgVmS-AhYgA","annotations":[{"segment":[26.6150031152648,75.80523208722742],"label":"Rafting"},{"segment":[79.13210747663551,112.16322741433022],"label":"Rafting"}]},"3k5dJSs2zkg":{"duration":59.049,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3k5dJSs2zkg","annotations":[{"segment":[27.846266355140184,36.22161059190031],"label":"Running a marathon"},{"segment":[38.060271028037384,41.27792679127726],"label":"Running a marathon"}]},"aG5kKRuw0Fk":{"duration":212.881,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aG5kKRuw0Fk","annotations":[{"segment":[11.955849597249196,38.19229732454605],"label":"Playing drums"},{"segment":[52.805002387850614,205.9062986192917],"label":"Playing drums"}]},"P6ijX0vabJs":{"duration":218.99,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=P6ijX0vabJs","annotations":[{"segment":[40.648803432137285,50.21322776911076],"label":"Playing flauta"},{"segment":[70.70842277691108,90.5204446177847],"label":"Playing flauta"},{"segment":[130.48607488299533,153.71396255850232],"label":"Playing flauta"},{"segment":[177.62502340093602,202.56084399375973],"label":"Playing flauta"}]},"pizl41xmw7k":{"duration":172.084,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pizl41xmw7k","annotations":[{"segment":[49.64789653250436,98.95473765399969],"label":"Mooping floor"}]},"TXSOSSGJYO0":{"duration":127.942,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TXSOSSGJYO0","annotations":[]},"y_vaJ59chHo":{"duration":49.51,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=y_vaJ59chHo","annotations":[]},"6ypZ2Qeip0M":{"duration":44.03,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=6ypZ2Qeip0M","annotations":[]},"r1wtXxEQihE":{"duration":170.37,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r1wtXxEQihE","annotations":[]},"yZErFOSkogc":{"duration":92.207,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yZErFOSkogc","annotations":[{"segment":[0.01,92.06245541341654],"label":"Scuba diving"}]},"QriYk3MKSnM":{"duration":37.97,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=QriYk3MKSnM","annotations":[]},"rdZ0TZcP0NU":{"duration":39.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rdZ0TZcP0NU","annotations":[{"segment":[12.110231305772231,37.79122432605304],"label":"Playing polo"}]},"C53reDr47oU":{"duration":18.93,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=C53reDr47oU","annotations":[{"segment":[0.8801392499999999,16.16385872854914],"label":"Putting on makeup"}]},"c6BcS5NdI6E":{"duration":178.933,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=c6BcS5NdI6E","annotations":[{"segment":[0.2,178.73297052154197],"label":"Playing beach volleyball"}]},"kyx4MMmBr3A":{"duration":44.258,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kyx4MMmBr3A","annotations":[]},"f98FHTGx64Q":{"duration":173.477,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=f98FHTGx64Q","annotations":[{"segment":[76.56496567862715,121.4898736349454],"label":"Kite flying"}]},"yATxB9giT34":{"duration":168.879,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yATxB9giT34","annotations":[{"segment":[28.136833333333332,31.555327102803737],"label":"Beer pong"},{"segment":[62.32177102803738,76.7846292834891],"label":"Beer pong"},{"segment":[80.2031230529595,84.4105],"label":"Beer pong"}]},"qXQBHaML9Xg":{"duration":8.267,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qXQBHaML9Xg","annotations":[{"segment":[0.6383494516380657,4.945596256630266],"label":"Doing a powerbomb"}]},"2vAaAy_WC7Y":{"duration":179.862,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2vAaAy_WC7Y","annotations":[{"segment":[15.238863978840483,171.82939201622366],"label":"Making a cake"}]},"wEwp63lUsrE":{"duration":158.059,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=wEwp63lUsrE","annotations":[{"segment":[126.71262714508579,141.7505070202808],"label":"Welding"}]},"adS4bgldY64":{"duration":93.25,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=adS4bgldY64","annotations":[]},"1ErQKcUju8o":{"duration":104.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1ErQKcUju8o","annotations":[{"segment":[97.96126203666147,102.01930934555382],"label":"Ballet"}]},"26U3SkeZU6M":{"duration":30.325,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=26U3SkeZU6M","annotations":[]},"YS8swiRbbIE":{"duration":79.273,"subset":"validation","resolution":"562x480","url":"https://www.youtube.com/watch?v=YS8swiRbbIE","annotations":[{"segment":[8.038598455502846,72.96573812792512],"label":"Baton twirling"}]},"h0PcCIFuBF8":{"duration":217.037,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=h0PcCIFuBF8","annotations":[]},"k42CKebiSLA":{"duration":72.841,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k42CKebiSLA","annotations":[{"segment":[53.66622488161995,63.42372031464175],"label":"Sumo"}]},"AdnLY0a6yn0":{"duration":84.38,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AdnLY0a6yn0","annotations":[{"segment":[8.161687157566304,15.401893507020281],"label":"Triple jump"},{"segment":[22.642099856474257,29.487385859594387],"label":"Triple jump"},{"segment":[32.646748630265215,42.25647705772231],"label":"Triple jump"},{"segment":[43.30959798127925,51.33964502340094],"label":"Triple jump"},{"segment":[53.709167101404056,64.10873622152886],"label":"Triple jump"},{"segment":[65.95169783775351,73.58682453354135],"label":"Triple jump"}]},"vcCwvRYqU2I":{"duration":156.177,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vcCwvRYqU2I","annotations":[{"segment":[11.203691107644307,143.71792511700468],"label":"Raking leaves"}]},"VFrQxkhhW80":{"duration":121.51,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VFrQxkhhW80","annotations":[]},"WAm4oWlM6to":{"duration":183.531,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WAm4oWlM6to","annotations":[]},"r0PXIqB_urk":{"duration":196.836,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=r0PXIqB_urk","annotations":[]},"_6TamBiyYWA":{"duration":162.42000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_6TamBiyYWA","annotations":[{"segment":[21.538228369734792,26.859437731669267],"label":"Archery"},{"segment":[47.13071149141966,51.184966243369736],"label":"Archery"},{"segment":[88.68682269890796,90.713950074883],"label":"Archery"}]},"wmI-4NI4QNc":{"duration":111.18,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wmI-4NI4QNc","annotations":[]},"yy6PwbO6chc":{"duration":148.469,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yy6PwbO6chc","annotations":[]},"vxjoR-VO5iI":{"duration":196.093,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=vxjoR-VO5iI","annotations":[]},"0x6ZV9Y8g_c":{"duration":44.03,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0x6ZV9Y8g_c","annotations":[]},"F8K9WQfHth4":{"duration":112.083,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=F8K9WQfHth4","annotations":[{"segment":[0.2,103.8644862402496],"label":"Croquet"}]},"No5ZwqHdEQU":{"duration":180.16,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=No5ZwqHdEQU","annotations":[{"segment":[0.01,180.16362811791385],"label":"Volleyball"}]},"0WRxp0X0edY":{"duration":159.243,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0WRxp0X0edY","annotations":[{"segment":[24.517888180641897,29.223342193083578],"label":"Curling"},{"segment":[61.17089838281919,84.20285749634952],"label":"Curling"},{"segment":[138.68706185093737,143.39251586337906],"label":"Curling"},{"segment":[152.80342388826242,159.04247],"label":"Curling"}]},"sS-KyhAzeUY":{"duration":153.79,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sS-KyhAzeUY","annotations":[{"segment":[18.947798751950078,24.70409204368175],"label":"Washing hands"},{"segment":[30.940076443057727,35.25729641185647],"label":"Washing hands"},{"segment":[44.371427457098285,48.688647425897045],"label":"Washing hands"},{"segment":[51.32694851794073,53.725404056162255],"label":"Washing hands"},{"segment":[58.762160686427464,68.83567394695788],"label":"Washing hands"},{"segment":[80.10841497659906,82.74671606864275],"label":"Washing hands"},{"segment":[92.34053822152887,110.08910920436819],"label":"Washing hands"},{"segment":[117.76416692667708,124.47984243369736],"label":"Washing hands"}]},"1QjVfj0S8qQ":{"duration":62.857,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1QjVfj0S8qQ","annotations":[{"segment":[8.531189574113577,60.30668316692668],"label":"Fixing the roof"}]},"gIf0VWXI_DY":{"duration":203.87,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=gIf0VWXI_DY","annotations":[{"segment":[126.42558814352573,156.6405085803432],"label":"Getting a tattoo"}]},"Zrv8_-Y-jSM":{"duration":27.168,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zrv8_-Y-jSM","annotations":[{"segment":[3.8992134664586584,23.903873859594384],"label":"Rope skipping"}]},"9x7FyIJdVsI":{"duration":99.266,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9x7FyIJdVsI","annotations":[{"segment":[15.021337762354031,22.76429501560062],"label":"Swimming"}]},"tyeLIzY0MJ4":{"duration":20.25,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=tyeLIzY0MJ4","annotations":[{"segment":[2.401415677067083,20.121666],"label":"Drinking beer"}]},"PhJVHb-J7zM":{"duration":96.92,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PhJVHb-J7zM","annotations":[{"segment":[3.6288333492523375,78.3223192199688],"label":"Polishing shoes"}]},"sg5z7jus30w":{"duration":208.01,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sg5z7jus30w","annotations":[{"segment":[1.6224988455538223,198.59385869578784],"label":"Getting a tattoo"}]},"gzyu1S2LNIw":{"duration":90.535,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gzyu1S2LNIw","annotations":[{"segment":[5.5083455909664965,85.16749721417428],"label":"Baton twirling"}]},"yjd9UOJdes8":{"duration":123.04,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=yjd9UOJdes8","annotations":[{"segment":[22.26666864769829,109.4138028378278],"label":"Playing violin"}]},"nI_XzNfxjlY":{"duration":215.504,"subset":"validation","resolution":"352x288","url":"https://www.youtube.com/watch?v=nI_XzNfxjlY","annotations":[{"segment":[63.20556528383584,203.40088768330736],"label":"Swimming"}]},"vjMuhHo6wMY":{"duration":162.842,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vjMuhHo6wMY","annotations":[{"segment":[16.512792865456113,158.77685257410297],"label":"Disc dog"}]},"c51KKeTaJl4":{"duration":95.06,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c51KKeTaJl4","annotations":[]},"tb8KKdC7r-A":{"duration":55.636,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tb8KKdC7r-A","annotations":[{"segment":[11.022849360232915,14.147436580456418],"label":"Shuffleboard"},{"segment":[18.313552874087755,21.52493418376191],"label":"Shuffleboard"},{"segment":[27.60052044530761,30.898695844432417],"label":"Shuffleboard"},{"segment":[36.36672347982355,44.09139744093166],"label":"Shuffleboard"},{"segment":[47.1291905717045,53.11798274379955],"label":"Shuffleboard"}]},"9jivQgF6J1g":{"duration":152.045,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9jivQgF6J1g","annotations":[{"segment":[0.01,17.261618359503046],"label":"Playing ice hockey"}]},"kYkwA_lvqYc":{"duration":13.98,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kYkwA_lvqYc","annotations":[{"segment":[2.342134165366615,10.908273010920437],"label":"Polishing shoes"}]},"7hxCoi0XU6I":{"duration":116.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7hxCoi0XU6I","annotations":[{"segment":[47.90251286786165,96.71226877223089],"label":"Brushing hair"}]},"HGqaDftZe5s":{"duration":38.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HGqaDftZe5s","annotations":[{"segment":[0,35.071730171606866],"label":"Doing motocross"}]},"RVYINuYPY6o":{"duration":233.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RVYINuYPY6o","annotations":[{"segment":[7.276068784247968,230.6513789422777],"label":"Cricket"}]},"2VA80MQNvs4":{"duration":41.634,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2VA80MQNvs4","annotations":[]},"g0vAi9iuVPA":{"duration":13.26,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g0vAi9iuVPA","annotations":[{"segment":[1.1550327613104525,13.221],"label":"Tumbling"}]},"Rq1MoqtH8fM":{"duration":237.52,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rq1MoqtH8fM","annotations":[{"segment":[19.642523975788965,60.41002656704908],"label":"Mowing the lawn"},{"segment":[100.06568817854755,209.39671785510876],"label":"Mowing the lawn"}]},"zB8kwWJqQ8Q":{"duration":159.243,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zB8kwWJqQ8Q","annotations":[{"segment":[12.664946957878316,147.27955226209048],"label":"Table soccer"}]},"OH5UXwa3F7E":{"duration":186.97,"subset":"training","resolution":"820x480","url":"https://www.youtube.com/watch?v=OH5UXwa3F7E","annotations":[{"segment":[16.475875975039,33.09755616224649],"label":"Pole vault"},{"segment":[60.80035647425897,72.75630187207489],"label":"Pole vault"},{"segment":[83.25420514820593,96.08497581903276],"label":"Pole vault"},{"segment":[115.331131825273,124.37099297971918],"label":"Pole vault"},{"segment":[150.32414274570982,162.57169656786272],"label":"Pole vault"}]},"GjUaqfAtEqQ":{"duration":128.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GjUaqfAtEqQ","annotations":[]},"ko9efCldvgs":{"duration":14.88,"subset":"training","resolution":"420x240","url":"https://www.youtube.com/watch?v=ko9efCldvgs","annotations":[{"segment":[5.015509553822153,14.883989],"label":"Walking the dog"}]},"S-4ftlylgig":{"duration":158.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S-4ftlylgig","annotations":[{"segment":[30.881355304212168,52.86888028081123],"label":"Triple jump"},{"segment":[66.70372745709828,70.16243925117004],"label":"Triple jump"},{"segment":[82.76203221528861,102.03199792511701],"label":"Triple jump"},{"segment":[109.93762488299532,114.13748920436817],"label":"Triple jump"},{"segment":[126.24298048361933,142.05423439937596],"label":"Triple jump"}]},"7vXt9RxeXWY":{"duration":79.622,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7vXt9RxeXWY","annotations":[{"segment":[4.8419017160686435,74.01278627145086],"label":"Kneeling"}]},"O_IrzZbXiCc":{"duration":98.08,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=O_IrzZbXiCc","annotations":[{"segment":[5.352834095163806,94.66869442589703],"label":"Playing field hockey"}]},"Q711Ki5aFHw":{"duration":126.2,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=Q711Ki5aFHw","annotations":[{"segment":[20.151616,95.60659600000001],"label":"Breakdancing"}]},"EHianByJXXM":{"duration":28.236,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=EHianByJXXM","annotations":[{"segment":[0,28.235463],"label":"Calf roping"}]},"1PpVSeE2BJ8":{"duration":48.252,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=1PpVSeE2BJ8","annotations":[{"segment":[0.2,3.9021066745679507],"label":"Having an ice cream"},{"segment":[22.28703774766282,30.766618201784123],"label":"Having an ice cream"},{"segment":[39.771482400851,47.95090071500341],"label":"Having an ice cream"}]},"_gO4UzCTqhk":{"duration":32.508,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_gO4UzCTqhk","annotations":[{"segment":[12.041242990654206,28.48411682242991],"label":"Spread mulch"}]},"j1IYrdjnY6M":{"duration":26.22,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=j1IYrdjnY6M","annotations":[{"segment":[0,26.21532879818594],"label":"Washing face"}]},"KOiRoi5_SnQ":{"duration":134.885,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KOiRoi5_SnQ","annotations":[{"segment":[0.01,134.0194336973479],"label":"Skiing"}]},"Q6uc1kl008o":{"duration":120.3,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Q6uc1kl008o","annotations":[{"segment":[0.01,105.8248159126365],"label":"Layup drill in basketball"}]},"XeC4nqBB5BM":{"duration":139.57999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XeC4nqBB5BM","annotations":[{"segment":[42.46027834633386,107.13054844305773],"label":"Playing flauta"}]},"toeGFZLoeHU":{"duration":40.54,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=toeGFZLoeHU","annotations":[{"segment":[6.876015355694227,38.56272206396256],"label":"Playing bagpipes"}]},"peaOnHl5YS8":{"duration":46.301,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=peaOnHl5YS8","annotations":[{"segment":[0.01,46.300589569161],"label":"Mooping floor"}]},"W2wyahjyTFc":{"duration":25.45,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=W2wyahjyTFc","annotations":[{"segment":[8.377150638065524,17.191024769110765],"label":"Hopscotch"}]},"gOe72cTd0IA":{"duration":187.153,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gOe72cTd0IA","annotations":[{"segment":[20.432246489859594,77.93442589703588],"label":"Scuba diving"},{"segment":[88.4424383775351,93.6964446177847],"label":"Scuba diving"},{"segment":[99.8261185647426,119.09080811232448],"label":"Scuba diving"},{"segment":[136.60416224648986,146.5440592823713],"label":"Scuba diving"}]},"IAZV246DKis":{"duration":81.967,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IAZV246DKis","annotations":[{"segment":[5.882138877391831,11.636405170492536],"label":"Peeling potatoes"},{"segment":[15.600455283517464,77.23503946957878],"label":"Peeling potatoes"}]},"M6O-QNDUdwU":{"duration":80.57,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=M6O-QNDUdwU","annotations":[{"segment":[27.276745165044698,70.391599875195],"label":"Playing water polo"}]},"JM7YQFgxu2I":{"duration":38.149,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JM7YQFgxu2I","annotations":[{"segment":[2.0791419240477755,35.34542721176162],"label":"Fun sliding down"}]},"CQtu83639js":{"duration":139.23,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CQtu83639js","annotations":[{"segment":[15.29992979719189,122.07390795631827],"label":"Spinning"}]},"7NG6UrY2Foo":{"duration":65.09,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7NG6UrY2Foo","annotations":[{"segment":[0,65.041],"label":"Mixing drinks"}]},"77w2Am_ttbg":{"duration":21.061,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=77w2Am_ttbg","annotations":[{"segment":[0,18.47576443057722],"label":"Using the rowing machine"}]},"UU8Xtm8Gl3I":{"duration":109.204,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UU8Xtm8Gl3I","annotations":[{"segment":[0.2,39.161357254290174],"label":"Snowboarding"},{"segment":[56.35830109204368,94.68567238689548],"label":"Snowboarding"}]},"A1SuSeqDTEI":{"duration":222.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A1SuSeqDTEI","annotations":[{"segment":[33.38488256151431,67.96432181385076],"label":"Kayaking"},{"segment":[70.38488256151432,77.64656480450498],"label":"Kayaking"},{"segment":[81.10450872973861,97.05198795837458],"label":"Kayaking"},{"segment":[106.42578916398206,138.3661488275335],"label":"Kayaking"},{"segment":[148.78148350043065,156.0722177714587],"label":"Kayaking"}]},"EIl8Yiu-txE":{"duration":175.06,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=EIl8Yiu-txE","annotations":[]},"jIVP2bEWmJE":{"duration":175.45,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jIVP2bEWmJE","annotations":[{"segment":[74.7236252349454,165.04888650795633],"label":"Making a sandwich"}]},"ABCRXwB5-JQ":{"duration":49.691,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ABCRXwB5-JQ","annotations":[{"segment":[0.2,49.490702947845804],"label":"Canoeing"}]},"3fg_CB3rBSk":{"duration":156.062,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3fg_CB3rBSk","annotations":[{"segment":[0,142.1470577223089],"label":"Hitting a pinata"}]},"TZ5Vd9eL5WA":{"duration":67.41,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=TZ5Vd9eL5WA","annotations":[{"segment":[23.398088545241812,62.307269496879876],"label":"Preparing salad"}]},"BCzsVKHrN5Y":{"duration":27.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BCzsVKHrN5Y","annotations":[{"segment":[1.536211825273011,18.173333200000002],"label":"Shoveling snow"},{"segment":[20.995068680000003,24.835424508580346],"label":"Shoveling snow"}]},"E4oP6Ex9IPo":{"duration":161.611,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=E4oP6Ex9IPo","annotations":[{"segment":[37.05065054602184,151.4791903276131],"label":"Waxing skis"}]},"Bgi2Y85Mbwk":{"duration":199.46,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=Bgi2Y85Mbwk","annotations":[]},"EQWdo0FcKAo":{"duration":185.34,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EQWdo0FcKAo","annotations":[]},"nfe_V3ei6ag":{"duration":32.286,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=nfe_V3ei6ag","annotations":[{"segment":[0,32.229296],"label":"Playing beach volleyball"}]},"IeTk_XgEsk4":{"duration":219.476,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IeTk_XgEsk4","annotations":[]},"wt-PGxOkL_s":{"duration":172.36,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=wt-PGxOkL_s","annotations":[{"segment":[20.167112398781665,45.17433177327093],"label":"Hand washing clothes"},{"segment":[49.47664908501102,126.3805710323651],"label":"Hand washing clothes"}]},"lV86EXbNQkI":{"duration":73.4,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=lV86EXbNQkI","annotations":[]},"yj7YkvTZ4zw":{"duration":31.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yj7YkvTZ4zw","annotations":[{"segment":[0.01,3.743222382215288],"label":"High jump"}]},"bWCIcijlgCo":{"duration":146.007,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bWCIcijlgCo","annotations":[{"segment":[120.95092029875195,122.31759736427458],"label":"Longboarding"},{"segment":[11.844534567862715,43.733666096723866],"label":"Longboarding"},{"segment":[0.2,145.8066665],"label":"Longboarding"},{"segment":[0.2,145.8066665],"label":"Longboarding"}]},"51wFW1g42VQ":{"duration":108.809,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=51wFW1g42VQ","annotations":[{"segment":[13.232134165366615,22.05355694227769],"label":"Hurling"},{"segment":[38.169617784711384,106.89458658346334],"label":"Hurling"}]},"eRco7yXc-40":{"duration":8.43,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=eRco7yXc-40","annotations":[]},"6HmKyms-U2s":{"duration":82.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6HmKyms-U2s","annotations":[{"segment":[0.01,82.05934240362812],"label":"Playing squash"}]},"MCsGSMze_6Q":{"duration":172.25,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=MCsGSMze_6Q","annotations":[{"segment":[77.92704687987519,138.1189727457098],"label":"Playing piano"}]},"_jV5sAOOHLk":{"duration":209.51,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_jV5sAOOHLk","annotations":[]},"pYbOylWZx-s":{"duration":140.85,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=pYbOylWZx-s","annotations":[{"segment":[93.18580689079563,128.9674005850234],"label":"Spinning"}]},"fSBX-gjPblw":{"duration":34.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fSBX-gjPblw","annotations":[]},"2x-Xqt98Ek4":{"duration":139.436,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2x-Xqt98Ek4","annotations":[{"segment":[80.48521165366614,133.34441822620906],"label":"Playing congas"}]},"3ArMGKnxlcU":{"duration":152.82999999999998,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3ArMGKnxlcU","annotations":[{"segment":[0,119.93037749265072],"label":"Kayaking"},{"segment":[124.69897975039002,147.58827622464898],"label":"Kayaking"}]},"Jo2-IPbuapw":{"duration":110.46000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Jo2-IPbuapw","annotations":[{"segment":[0.6892812667706709,92.70833038065523],"label":"Painting"}]},"rzD3EEqJy-U":{"duration":137.63,"subset":"testing","resolution":"852x480","url":"https://www.youtube.com/watch?v=rzD3EEqJy-U","annotations":[]},"pZ8jUGWdKGw":{"duration":155.992,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pZ8jUGWdKGw","annotations":[]},"Lf3oTCD4d08":{"duration":39.41,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lf3oTCD4d08","annotations":[{"segment":[0.24589243057722313,3.1351284898595946],"label":"Ironing clothes"},{"segment":[30.429188283931357,39.404262],"label":"Ironing clothes"}]},"2SYTRqm4Ym4":{"duration":15.21,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=2SYTRqm4Ym4","annotations":[{"segment":[0.2,14.921000000000001],"label":"Fun sliding down"}]},"Y97KgwAmdrU":{"duration":143.36,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y97KgwAmdrU","annotations":[{"segment":[20.15261315398647,119.06518084021596],"label":"Baking cookies"}]},"H9E7PaqXpps":{"duration":236.333,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H9E7PaqXpps","annotations":[]},"yCEuQR_jKeo":{"duration":204.1,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yCEuQR_jKeo","annotations":[]},"ahY7nqwwJEg":{"duration":140.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ahY7nqwwJEg","annotations":[{"segment":[7.666832250000001,137.29110729055012],"label":"Skateboarding"}]},"oMtB-y49Yo8":{"duration":222.378,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=oMtB-y49Yo8","annotations":[]},"KNpeiKm3xyM":{"duration":78.159,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KNpeiKm3xyM","annotations":[{"segment":[22.6791871825273,78.157844],"label":"Beach soccer"}]},"v8jzbjQqNhE":{"duration":211.28,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v8jzbjQqNhE","annotations":[{"segment":[110.25365229711228,149.14731738249122],"label":"Kayaking"},{"segment":[160.02435931314804,178.81197719337345],"label":"Kayaking"}]},"QhaxzjLAVd4":{"duration":237.12,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=QhaxzjLAVd4","annotations":[]},"HtCQ-OmHJl4":{"duration":7.41,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HtCQ-OmHJl4","annotations":[{"segment":[0.2642667706708268,7.24930497],"label":"Hammer throw"}]},"hIYN8cC3pEA":{"duration":12.586,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=hIYN8cC3pEA","annotations":[]},"s2cXljpR-xk":{"duration":173.988,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s2cXljpR-xk","annotations":[{"segment":[138.3929953198128,173.921],"label":"Rock-paper-scissors"}]},"NzL_uQyQjIw":{"duration":182.996,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NzL_uQyQjIw","annotations":[{"segment":[22.62788735074195,182.22003688345225],"label":"Wakeboarding"}]},"jqrLD8zs7R4":{"duration":173.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jqrLD8zs7R4","annotations":[{"segment":[25.30379502,151.70114231999997],"label":"Tango"}]},"6O4S2CpRB0A":{"duration":136.72,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6O4S2CpRB0A","annotations":[]},"3tfGg2rVv6o":{"duration":187.2,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=3tfGg2rVv6o","annotations":[{"segment":[23.982243900845724,113.7632843992881],"label":"Baton twirling"}]},"nv22QK8brp4":{"duration":94.041,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=nv22QK8brp4","annotations":[{"segment":[13.49728096099844,89.34613157878314],"label":"River tubing"}]},"MMB5Cn3JCGI":{"duration":20.23,"subset":"validation","resolution":"632x360","url":"https://www.youtube.com/watch?v=MMB5Cn3JCGI","annotations":[{"segment":[0.26724388533541343,18.785673116224647],"label":"High jump"}]},"lmYmYYXs1mo":{"duration":193.423,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lmYmYYXs1mo","annotations":[{"segment":[0.01,193.42222222222222],"label":"Sharpening knives"}]},"YTWXDIMEjpg":{"duration":226.163,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YTWXDIMEjpg","annotations":[{"segment":[46.07661355016399,55.573321265909705],"label":"Wakeboarding"},{"segment":[68.5873281356353,106.22242908322013],"label":"Wakeboarding"},{"segment":[123.10546502232363,189.58241903254358],"label":"Wakeboarding"}]},"SYifUxnoov8":{"duration":10.1,"subset":"testing","resolution":"380x294","url":"https://www.youtube.com/watch?v=SYifUxnoov8","annotations":[]},"p066OEa7IeY":{"duration":88.074,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=p066OEa7IeY","annotations":[]},"lzb6HHN59K4":{"duration":150.814,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lzb6HHN59K4","annotations":[{"segment":[0.01,94.79679095163807],"label":"Grooming dog"}]},"f6vZaMgqDvQ":{"duration":161.24,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=f6vZaMgqDvQ","annotations":[]},"yQe9_PLHcIs":{"duration":138.35,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yQe9_PLHcIs","annotations":[]},"q7Pp1TAgv3I":{"duration":226.395,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q7Pp1TAgv3I","annotations":[{"segment":[3.531896377895932,63.92732443991637],"label":"Arm wrestling"},{"segment":[68.87197936897068,113.37387373045942],"label":"Arm wrestling"},{"segment":[123.61637322635762,162.82042302100248],"label":"Arm wrestling"},{"segment":[176.24162925700702,190.7224028705148],"label":"Arm wrestling"},{"segment":[197.78619556942277,219.33076330109205],"label":"Arm wrestling"}]},"rzIaKwWJDZI":{"duration":201.875,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rzIaKwWJDZI","annotations":[{"segment":[37.156377535101406,175.39069734789393],"label":"Making a cake"}]},"HwTSF0VgmMU":{"duration":56.15,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=HwTSF0VgmMU","annotations":[{"segment":[1.489047512920925,56.145849],"label":"Doing kickboxing"}]},"Sx7YgFLnwfM":{"duration":226.673,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Sx7YgFLnwfM","annotations":[{"segment":[0.01,0.02],"label":"Rock-paper-scissors"},{"segment":[24.748003120124807,44.192862714508585],"label":"Rock-paper-scissors"},{"segment":[80.25423868954759,82.72903900156007],"label":"Rock-paper-scissors"},{"segment":[183.1513853354134,186.33355226209048],"label":"Rock-paper-scissors"}]},"H_dERoTis5Y":{"duration":124.9,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=H_dERoTis5Y","annotations":[{"segment":[24.55135278627145,31.371173004680184],"label":"Paintball"},{"segment":[45.205665447737914,54.55856174726989],"label":"Paintball"},{"segment":[70.34157425273011,97.81570713260531],"label":"Paintball"}]},"xBR7YEKPgDA":{"duration":68.917,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xBR7YEKPgDA","annotations":[{"segment":[0.2,66.3940171339564],"label":"Hurling"}]},"5kmGgH4xFW0":{"duration":179.375,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5kmGgH4xFW0","annotations":[{"segment":[14.547101404056162,91.75863962558502],"label":"Snow tubing"},{"segment":[142.409624024961,165.91140873634947],"label":"Snow tubing"}]},"ChCIKwDB4aw":{"duration":130.5,"subset":"testing","resolution":"640x356","url":"https://www.youtube.com/watch?v=ChCIKwDB4aw","annotations":[]},"0YHCiC7IIg8":{"duration":128.08,"subset":"validation","resolution":"322x240","url":"https://www.youtube.com/watch?v=0YHCiC7IIg8","annotations":[{"segment":[7.1933317441497655,121.28756579719187],"label":"Playing harmonica"}]},"MDWaKr7Gu5Q":{"duration":155.504,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MDWaKr7Gu5Q","annotations":[]},"5n7NCViB5TU":{"duration":121.44,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5n7NCViB5TU","annotations":[{"segment":[24.250181653524645,38.08036312324492],"label":"Discus throw"},{"segment":[62.14109007176286,71.61381721684867],"label":"Discus throw"},{"segment":[97.00072596567863,106.28399856786271],"label":"Discus throw"}]},"N8nv95D4-pw":{"duration":66.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=N8nv95D4-pw","annotations":[{"segment":[18.79838262918272,64.95885528549142],"label":"Doing step aerobics"}]},"SthTqCtPg7s":{"duration":216.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SthTqCtPg7s","annotations":[{"segment":[76.43957655449076,158.29080454646757],"label":"Bungee jumping"},{"segment":[173.17284463026522,190.7607116630265],"label":"Bungee jumping"}]},"LfMnWmwQBpI":{"duration":223.399,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LfMnWmwQBpI","annotations":[]},"Vshvpb9eecg":{"duration":108.972,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vshvpb9eecg","annotations":[{"segment":[9.350107011083164,27.37031325062526],"label":"Baking cookies"},{"segment":[85.680980610653,92.14105454558317],"label":"Baking cookies"}]},"02yDi9BaDO8":{"duration":77.81,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=02yDi9BaDO8","annotations":[{"segment":[7.757212655226209,73.32990088143525],"label":"Doing step aerobics"}]},"-0NfmccuL9Q":{"duration":215.691,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=-0NfmccuL9Q","annotations":[{"segment":[20.86238664784687,212.6617477651487],"label":"Changing car wheel"}]},"WuO75Sb0Kgg":{"duration":135.906,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=WuO75Sb0Kgg","annotations":[{"segment":[0.01,126.32317628705147],"label":"Putting in contact lenses"}]},"uLhOFVNmRu8":{"duration":87.96000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uLhOFVNmRu8","annotations":[{"segment":[22.229297971918875,25.522527301092044],"label":"Triple jump"},{"segment":[45.69355694227769,81.91907801092043],"label":"Triple jump"}]},"Wr7YbcQ_Q9g":{"duration":40.194,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Wr7YbcQ_Q9g","annotations":[{"segment":[1.2530733229329174,26.25188611544462],"label":"Cutting the grass"}]},"xhp9YNqXV20":{"duration":104.188,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xhp9YNqXV20","annotations":[]},"A9-AGBIG_x4":{"duration":167.56,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=A9-AGBIG_x4","annotations":[]},"LdzaFk5VrD0":{"duration":161.17,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LdzaFk5VrD0","annotations":[{"segment":[48.90406811918735,55.18993805738624],"label":"Putting in contact lenses"}]},"ULBhK8jXNws":{"duration":184.622,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ULBhK8jXNws","annotations":[{"segment":[34.56259458541607,38.882918908593076],"label":"Surfing"},{"segment":[43.779286474860356,46.371481068766556],"label":"Surfing"},{"segment":[81.5101188972729,167.34056174570983],"label":"Surfing"}]},"U01xasUtlvw":{"duration":188.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=U01xasUtlvw","annotations":[{"segment":[90.46863114252461,110.21259950261955],"label":"Fixing bicycle"}]},"T3XGYHEFVIQ":{"duration":26.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T3XGYHEFVIQ","annotations":[{"segment":[2.7601043635418123,21.79671078966934],"label":"Welding"}]},"QtiqvB4uw2Y":{"duration":36.99,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=QtiqvB4uw2Y","annotations":[{"segment":[6.544576443057722,29.09020982839313],"label":"Blowing leaves"}]},"JGKjMJm0HRI":{"duration":41.239,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JGKjMJm0HRI","annotations":[{"segment":[0.2,37.0057454692608],"label":"Riding bumper cars"}]},"PDMdNcUsPZ0":{"duration":150.814,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=PDMdNcUsPZ0","annotations":[]},"NA8fCmbHwU8":{"duration":83.894,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NA8fCmbHwU8","annotations":[{"segment":[12.989148615282895,36.38006980687168],"label":"Beach soccer"},{"segment":[40.43101146575019,64.47531034425485],"label":"Beach soccer"}]},"1SQAnbh_lcM":{"duration":224.12,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=1SQAnbh_lcM","annotations":[{"segment":[5.942865834633385,214.99191107644305],"label":"Sharpening knives"}]},"7H5oYHs7EJw":{"duration":146.635,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=7H5oYHs7EJw","annotations":[{"segment":[21.46971461066254,142.979163258242],"label":"Skiing"}]},"o7O6XJYVDRE":{"duration":151.57999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o7O6XJYVDRE","annotations":[{"segment":[17.262605188767548,129.114827850234],"label":"Wrapping presents"}]},"m7XKITsRtVw":{"duration":79.74,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=m7XKITsRtVw","annotations":[{"segment":[51.499612670826835,55.97783985959438],"label":"Drinking coffee"}]},"S2VKbwm0uE8":{"duration":137.65,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=S2VKbwm0uE8","annotations":[{"segment":[94.98976989079563,118.60304602184087],"label":"Bungee jumping"}]},"bJx4unQR6nU":{"duration":71.26,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bJx4unQR6nU","annotations":[{"segment":[40.68941534804249,50.472661697347895],"label":"Removing curlers"}]},"cQnoiDzv1vk":{"duration":22.27,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cQnoiDzv1vk","annotations":[]},"orR865dd2l4":{"duration":59.653,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=orR865dd2l4","annotations":[{"segment":[0.01,59.651132],"label":"Shuffleboard"}]},"qNxA4UTadGo":{"duration":180.09,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=qNxA4UTadGo","annotations":[{"segment":[32.59110813956368,94.9637461307976],"label":"Springboard diving"},{"segment":[113.22600500210486,159.30308781435258],"label":"Springboard diving"}]},"bH6KL0ai3Ww":{"duration":55.66,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bH6KL0ai3Ww","annotations":[{"segment":[0.8672126833073323,52.37964607176286],"label":"Using the pommel horse"}]},"MVUqd8iVUEk":{"duration":42.77,"subset":"validation","resolution":"292x240","url":"https://www.youtube.com/watch?v=MVUqd8iVUEk","annotations":[{"segment":[6.5345054602184085,40.873998439937594],"label":"Using the pommel horse"}]},"mgoEYrTMKtE":{"duration":82.153,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mgoEYrTMKtE","annotations":[]},"2YSsqivrvR4":{"duration":74.61,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=2YSsqivrvR4","annotations":[{"segment":[5.470309770202808,68.32067734274571],"label":"Playing guitarra"}]},"cn7MtTHk9ZA":{"duration":41.43,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cn7MtTHk9ZA","annotations":[]},"T_CvR3zm6K8":{"duration":193.144,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=T_CvR3zm6K8","annotations":[{"segment":[10.244745419748764,19.28422667246826],"label":"Beach soccer"},{"segment":[23.80396729882801,54.5382035580743],"label":"Beach soccer"},{"segment":[59.961892309705995,85.5737558590779],"label":"Beach soccer"},{"segment":[90.39481252719497,118.11588836886808],"label":"Beach soccer"},{"segment":[131.67511024794734,136.4961669160644],"label":"Beach soccer"},{"segment":[140.4132754589095,169.94224599687988],"label":"Beach soccer"},{"segment":[179.5843592449298,184.40541586895478],"label":"Beach soccer"}]},"2VYQ4q24D-4":{"duration":92.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2VYQ4q24D-4","annotations":[{"segment":[0.2,44.880358814352576],"label":"Longboarding"},{"segment":[45.169909516380656,90.79370826833073],"label":"Longboarding"}]},"_1JGrGGCY9k":{"duration":184.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_1JGrGGCY9k","annotations":[{"segment":[16.98256227347894,181.6270644648915],"label":"Zumba"}]},"CbQPrRwG2BM":{"duration":96.71000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CbQPrRwG2BM","annotations":[{"segment":[16.052534990639625,33.235530191887676],"label":"Getting a haircut"}]},"9x-Ffd1kP1g":{"duration":152.393,"subset":"testing","resolution":"638x360","url":"https://www.youtube.com/watch?v=9x-Ffd1kP1g","annotations":[]},"0GpNcvAVWVg":{"duration":194.096,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0GpNcvAVWVg","annotations":[{"segment":[44.97704754818594,53.73097045068027],"label":"Fun sliding down"}]},"UeMcMSTO-Yg":{"duration":128.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UeMcMSTO-Yg","annotations":[{"segment":[0,2.5071935842433697],"label":"Grooming horse"}]},"Xq2LIzE5eDs":{"duration":119.0,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Xq2LIzE5eDs","annotations":[{"segment":[3.7130192199687992,44.92753256162247],"label":"Playing water polo"},{"segment":[57.366146948517944,118.44531311700469],"label":"Playing water polo"}]},"AUHORHUgC-s":{"duration":144.057,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AUHORHUgC-s","annotations":[{"segment":[59.33053010920437,68.54474122464897],"label":"Sharpening knives"},{"segment":[104.5026382605304,108.77263853354133],"label":"Sharpening knives"},{"segment":[117.98684964898595,127.42579762090483],"label":"Sharpening knives"}]},"4XEowfJ-1W0":{"duration":193.144,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4XEowfJ-1W0","annotations":[{"segment":[21.9934992199688,170.5249391575663],"label":"Having an ice cream"}]},"cWrOETjOOTc":{"duration":4.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cWrOETjOOTc","annotations":[{"segment":[0.01,4.416210673946957],"label":"Fixing bicycle"}]},"YeikEC85CGk":{"duration":206.937,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=YeikEC85CGk","annotations":[{"segment":[15.49600517898267,43.90534800711757],"label":"Beach soccer"},{"segment":[50.68485027292248,69.08635642296441],"label":"Beach soccer"},{"segment":[74.89735836508291,161.093887173174],"label":"Beach soccer"},{"segment":[167.22772255652131,184.33789331357255],"label":"Beach soccer"}]},"tSk1GWyofaU":{"duration":37.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tSk1GWyofaU","annotations":[{"segment":[0.8388208377535101,36.58994343993759],"label":"Springboard diving"},{"segment":[33.88598304,34.40131968],"label":"Springboard diving"}]},"egowZOcbEkI":{"duration":163.63,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=egowZOcbEkI","annotations":[]},"CIgdBoHjGXU":{"duration":223.98,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CIgdBoHjGXU","annotations":[{"segment":[34.59280238148302,104.82667388328186],"label":"Getting a haircut"}]},"S3OJBVHL9y8":{"duration":176.217,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=S3OJBVHL9y8","annotations":[{"segment":[0,164.34364742589705],"label":"Tug of war"}]},"DfFqlrv7F2g":{"duration":78.461,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DfFqlrv7F2g","annotations":[{"segment":[22.132544461778473,27.757390015600624],"label":"Putting on shoes"},{"segment":[36.072379095163804,41.08582839313573],"label":"Putting on shoes"}]},"8r54fZNICBQ":{"duration":40.566,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8r54fZNICBQ","annotations":[]},"eGTZNTfgg24":{"duration":94.34,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=eGTZNTfgg24","annotations":[{"segment":[32.52688196235332,39.591543379095164],"label":"Long jump"},{"segment":[56.07575474882996,71.38252244929797],"label":"Long jump"}]},"bFgXdHQnTtQ":{"duration":165.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bFgXdHQnTtQ","annotations":[{"segment":[4.894267212865386,164.6019339781591],"label":"Cheerleading"}]},"guwcWEMMlBA":{"duration":193.423,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=guwcWEMMlBA","annotations":[]},"k3MFZmhXynI":{"duration":29.0,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k3MFZmhXynI","annotations":[{"segment":[3.7576595700000004,17.78863338],"label":"Mowing the lawn"},{"segment":[3.75765957,17.78863338],"label":"Mowing the lawn"}]},"0-4oritGq84":{"duration":199.46,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0-4oritGq84","annotations":[]},"8kyg5u6o21k":{"duration":165.257,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8kyg5u6o21k","annotations":[{"segment":[14.69518842794528,153.1393308954758],"label":"Longboarding"}]},"oBZ_oAUGO0E":{"duration":119.19,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=oBZ_oAUGO0E","annotations":[{"segment":[17.292405149267193,119.18743764172335],"label":"Getting a tattoo"}]},"EE1UoJSNyms":{"duration":35.434,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EE1UoJSNyms","annotations":[{"segment":[0.01,35.433649],"label":"Raking leaves"}]},"MYWnPFGvxwQ":{"duration":46.14,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=MYWnPFGvxwQ","annotations":[{"segment":[0,46.137686],"label":"Mowing the lawn"}]},"MEuGzNPI1pw":{"duration":79.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MEuGzNPI1pw","annotations":[{"segment":[19.247572882507136,56.5009388923557],"label":"Using parallel bars"}]},"qhjDVfcIxqk":{"duration":180.442,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qhjDVfcIxqk","annotations":[]},"LQAFcEvifrU":{"duration":205.265,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=LQAFcEvifrU","annotations":[{"segment":[117.1571249378445,182.59911745554254],"label":"Camel ride"}]},"k1GP0cJOXbQ":{"duration":192.33,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k1GP0cJOXbQ","annotations":[]},"8teo_0gxzgk":{"duration":70.752,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8teo_0gxzgk","annotations":[]},"HsGz6S2MBU4":{"duration":13.468,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=HsGz6S2MBU4","annotations":[{"segment":[0.2,10.933229329173168],"label":"Putting in contact lenses"}]},"LbS-C68GTX8":{"duration":164.07,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=LbS-C68GTX8","annotations":[{"segment":[6.652957878315132,160.20228705148205],"label":"Dodgeball"}]},"yHtapvYRcMw":{"duration":146.541,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=yHtapvYRcMw","annotations":[{"segment":[14.63125186895476,142.6547057223089],"label":"Capoeira"}]},"AP_zXlTlZVk":{"duration":15.84,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=AP_zXlTlZVk","annotations":[]},"3bC-rAjOXJE":{"duration":7.2,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3bC-rAjOXJE","annotations":[]},"RMkaNGdydws":{"duration":173.105,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RMkaNGdydws","annotations":[{"segment":[52.76593333812053,141.87575504142106],"label":"Hanging wallpaper"}]},"YKZnKmOizms":{"duration":235.404,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YKZnKmOizms","annotations":[]},"XD3yFrJHiv8":{"duration":157.316,"subset":"training","resolution":"636x360","url":"https://www.youtube.com/watch?v=XD3yFrJHiv8","annotations":[{"segment":[74.85357363954422,112.89391434160768],"label":"Beach soccer"}]},"YcjLd_XBK5Y":{"duration":62.741,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YcjLd_XBK5Y","annotations":[{"segment":[0.2,62.501],"label":"Carving jack-o-lanterns"}]},"d4B9z_WJAKs":{"duration":238.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=d4B9z_WJAKs","annotations":[{"segment":[8.273923288585532,227.81993808079525],"label":"Playing badminton"}]},"65mQtjchFdQ":{"duration":142.525,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=65mQtjchFdQ","annotations":[]},"A904-cbaYDw":{"duration":156.81,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=A904-cbaYDw","annotations":[]},"YZJghuPmff8":{"duration":150.071,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YZJghuPmff8","annotations":[{"segment":[10.76949466005851,150.070566],"label":"Braiding hair"}]},"n-1KBnDiWQU":{"duration":30.3,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=n-1KBnDiWQU","annotations":[{"segment":[2.690915756630265,30.024954758190326],"label":"Kayaking"}]},"mhHoL-9mY1E":{"duration":123.37,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=mhHoL-9mY1E","annotations":[{"segment":[16.3337908,104.39438907],"label":"Cleaning shoes"}]},"4eHP5IvDl6o":{"duration":71.24,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4eHP5IvDl6o","annotations":[{"segment":[9.219474258970358,71.201],"label":"Grooming horse"}]},"kuJO1VapxuQ":{"duration":198.74,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=kuJO1VapxuQ","annotations":[{"segment":[6.304001079999999,190.1674525600624],"label":"Belly dance"}]},"ReKUs0km4X8":{"duration":132.82,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ReKUs0km4X8","annotations":[{"segment":[65.47655963806552,84.53935548205928],"label":"Shot put"},{"segment":[104.22376423400937,118.52086111700467],"label":"Shot put"}]},"MSr04jhuTes":{"duration":132.98,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MSr04jhuTes","annotations":[]},"bUHE8XcPZgs":{"duration":74.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bUHE8XcPZgs","annotations":[{"segment":[1.5457725,70.72810030811233],"label":"Using parallel bars"}]},"OEQM6wYtYlk":{"duration":52.687,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=OEQM6wYtYlk","annotations":[{"segment":[0.01,50.47909576379176],"label":"Fun sliding down"}]},"xc0Wm-TH5K8":{"duration":25.01,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xc0Wm-TH5K8","annotations":[{"segment":[6.398199032761309,17.517020522620903],"label":"Clean and jerk"}]},"rqnzzNYt2cE":{"duration":177.494,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rqnzzNYt2cE","annotations":[{"segment":[45.43816303855682,158.77507581119002],"label":"Longboarding"}]},"CGIrDfEP5lE":{"duration":95.09,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=CGIrDfEP5lE","annotations":[{"segment":[0,22.547631853573463],"label":"Mowing the lawn"},{"segment":[44.20522560766376,92.56396234624896],"label":"Mowing the lawn"}]},"56kqqlco5H0":{"duration":105.21000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=56kqqlco5H0","annotations":[{"segment":[15.161630265210608,91.70737441497661],"label":"Using the balance beam"}]},"VXCV3KUtCdk":{"duration":236.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VXCV3KUtCdk","annotations":[{"segment":[43.63856468096724,53.58152878549142],"label":"Rock climbing"},{"segment":[68.31184597737911,92.24861141419657],"label":"Rock climbing"},{"segment":[126.49659888533542,133.86175748127926],"label":"Rock climbing"},{"segment":[157.06200705850233,189.10044695085804],"label":"Rock climbing"}]},"uzXbaoWOm5o":{"duration":58.7,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=uzXbaoWOm5o","annotations":[{"segment":[50.56698907956318,51.849485179407175],"label":"Drinking coffee"}]},"34_5muoYtiQ":{"duration":73.93,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=34_5muoYtiQ","annotations":[]},"xIAq1OdWQM0":{"duration":151.44,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xIAq1OdWQM0","annotations":[{"segment":[16.537968947329322,21.735616330775677],"label":"High jump"},{"segment":[42.99871926305624,46.30631305252209],"label":"High jump"}]},"JAEMwQqh51c":{"duration":76.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JAEMwQqh51c","annotations":[{"segment":[13.637660400000001,58.46295866302653],"label":"Chopping wood"}]},"lfH_S2LTEXA":{"duration":218.29,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lfH_S2LTEXA","annotations":[{"segment":[19.3055676,35.757462298491944],"label":"Rock climbing"},{"segment":[42.56840749820469,48.698258177946165],"label":"Rock climbing"},{"segment":[69.47164103707006,196.83631627169848],"label":"Rock climbing"}]},"1FFbrlvDQ7c":{"duration":196.279,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1FFbrlvDQ7c","annotations":[{"segment":[47.14225897035882,99.18241497659906],"label":"Capoeira"},{"segment":[116.3250546021841,147.86129485179407],"label":"Capoeira"}]},"Zfo0XXcWXiU":{"duration":65.597,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Zfo0XXcWXiU","annotations":[{"segment":[0.01,65.59637],"label":"Blow-drying hair"}]},"lNvX6h3o4EA":{"duration":32.79,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=lNvX6h3o4EA","annotations":[{"segment":[3.0177824399375974,28.745656461778474],"label":"Playing guitarra"}]},"3RRZMS4Qcvc":{"duration":141.503,"subset":"testing","resolution":"360x240","url":"https://www.youtube.com/watch?v=3RRZMS4Qcvc","annotations":[]},"AsTfp_4Um1A":{"duration":140.27,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=AsTfp_4Um1A","annotations":[{"segment":[18.047679407176286,43.64257020280811],"label":"Ballet"}]},"An9hDoF1BPU":{"duration":79.552,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=An9hDoF1BPU","annotations":[]},"3JHIcli-Wlg":{"duration":48.65,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3JHIcli-Wlg","annotations":[{"segment":[13.414722308892355,48.561],"label":"Using the pommel horse"}]},"yrf93aLQXBE":{"duration":130.845,"subset":"training","resolution":"510x360","url":"https://www.youtube.com/watch?v=yrf93aLQXBE","annotations":[{"segment":[15.513537703588145,125.33305460530423],"label":"Surfing"}]},"bJ5YjjFLGyA":{"duration":218.78,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bJ5YjjFLGyA","annotations":[{"segment":[129.6970309288562,183.28238318104152],"label":"Preparing pasta"}]},"JouywOCeGHI":{"duration":206.17000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JouywOCeGHI","annotations":[{"segment":[127.86973981486527,174.10248747841666],"label":"Rock climbing"}]},"1lu7Sbu3a6o":{"duration":61.719,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1lu7Sbu3a6o","annotations":[{"segment":[32.23578003120125,61.681],"label":"Snow tubing"}]},"0FgX7ryQ8hI":{"duration":182.579,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0FgX7ryQ8hI","annotations":[]},"y3E6Qx4ZF1k":{"duration":201.735,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y3E6Qx4ZF1k","annotations":[]},"fQDj7pnGdVY":{"duration":164.24,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fQDj7pnGdVY","annotations":[{"segment":[4.611895953389156,162.18500648049923],"label":"Doing kickboxing"}]},"k8ZXFB6xqZs":{"duration":216.06,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=k8ZXFB6xqZs","annotations":[{"segment":[45.487628705148204,59.97628081123245],"label":"Tennis serve with ball bouncing"}]},"okxCZZIE-qU":{"duration":53.85,"subset":"testing","resolution":"240x176","url":"https://www.youtube.com/watch?v=okxCZZIE-qU","annotations":[]},"ySQBZ8hqBo4":{"duration":85.87,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ySQBZ8hqBo4","annotations":[{"segment":[0,85.821],"label":"Using the balance beam"}]},"XQWebGbAjxU":{"duration":33.135,"subset":"training","resolution":"293x240","url":"https://www.youtube.com/watch?v=XQWebGbAjxU","annotations":[{"segment":[10.40599375975039,14.063546021840873],"label":"Curling"}]},"xMChLTVPzNI":{"duration":183.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xMChLTVPzNI","annotations":[{"segment":[45.16035503,147.75666913],"label":"Preparing pasta"}]},"YonmpJvwmKM":{"duration":232.39,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YonmpJvwmKM","annotations":[{"segment":[11.681780907385745,214.7473257812175],"label":"Tai chi"}]},"UZNw4utsywI":{"duration":170.02,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=UZNw4utsywI","annotations":[{"segment":[19.355932917316693,149.04836193447738],"label":"Skateboarding"}]},"aw4ehW-wTKA":{"duration":143.662,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aw4ehW-wTKA","annotations":[{"segment":[17.929717067082684,137.6105784898596],"label":"Slacklining"}]},"kbRciA51ouY":{"duration":122.88,"subset":"training","resolution":"424x236","url":"https://www.youtube.com/watch?v=kbRciA51ouY","annotations":[{"segment":[0.01,122.88],"label":"Knitting"}]},"4R37E4Kevs4":{"duration":26.4,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4R37E4Kevs4","annotations":[{"segment":[0.28787363494539786,23.52338845553822],"label":"Smoking hookah"}]},"2DvHUEFc5VA":{"duration":191.426,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=2DvHUEFc5VA","annotations":[{"segment":[17.918116887940826,20.90446970259763],"label":"Making a cake"},{"segment":[27.17581061337692,182.16752034321374],"label":"Making a cake"}]},"5RKzXZ-0xQ8":{"duration":14.4,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5RKzXZ-0xQ8","annotations":[{"segment":[0,9.073531800312013],"label":"Doing nails"}]},"4Q5YJKHa5W0":{"duration":131.36,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=4Q5YJKHa5W0","annotations":[{"segment":[13.524873828393135,113.11712656474258],"label":"Making a sandwich"}]},"hKa9wV1orM8":{"duration":100.008,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hKa9wV1orM8","annotations":[{"segment":[0.20000000000000004,97.32939781591263],"label":"Riding bumper cars"}]},"tkQK_2celDY":{"duration":203.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tkQK_2celDY","annotations":[{"segment":[94.48908312,116.85366432000002],"label":"Paintball"},{"segment":[61.11299391965679,127.14677436271451],"label":"Paintball"}]},"-lJS58hyo1c":{"duration":35.3,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=-lJS58hyo1c","annotations":[{"segment":[1.1838191809672387,32.67891553042122],"label":"Clean and jerk"}]},"rteN6_HkMkI":{"duration":121.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rteN6_HkMkI","annotations":[{"segment":[0,6.097683167722007],"label":"Walking the dog"},{"segment":[30.471238742488364,36.13950748080612],"label":"Walking the dog"},{"segment":[76.95104239669396,81.48565738734817],"label":"Walking the dog"}]},"xJNaedisXfQ":{"duration":184.646,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xJNaedisXfQ","annotations":[]},"H1cKUnazzFM":{"duration":30.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H1cKUnazzFM","annotations":[{"segment":[11.644127925117006,23.47759126365055],"label":"Hammer throw"}]},"iMiKGoQ7RjE":{"duration":127.896,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iMiKGoQ7RjE","annotations":[{"segment":[10.973830926026157,31.724347586148344],"label":"Croquet"},{"segment":[55.86677562340589,94.57446917316693],"label":"Croquet"}]},"VAG6ECk5WYo":{"duration":214.437,"subset":"validation","resolution":"852x480","url":"https://www.youtube.com/watch?v=VAG6ECk5WYo","annotations":[{"segment":[55.178728549141965,177.2407644305772],"label":"Curling"}]},"45WfFPM5yvw":{"duration":170.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=45WfFPM5yvw","annotations":[{"segment":[23.890833837435128,169.9558276643991],"label":"Windsurfing"}]},"oy1XjDer7o4":{"duration":77.346,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oy1XjDer7o4","annotations":[{"segment":[11.342422452418097,20.392227600624025],"label":"Cutting the grass"},{"segment":[27.39074358190328,33.90660328861154],"label":"Cutting the grass"},{"segment":[36.56121279875195,38.25050975975039],"label":"Cutting the grass"},{"segment":[39.215822308892356,41.02578333853354],"label":"Cutting the grass"},{"segment":[44.16304912324493,45.61101794695788],"label":"Cutting the grass"},{"segment":[55.74679971294852,59.366721772230896],"label":"Cutting the grass"},{"segment":[65.03793299843994,70.22648795007801],"label":"Cutting the grass"}]},"8eGaK2mVEhY":{"duration":78.46000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8eGaK2mVEhY","annotations":[{"segment":[23.99095803432137,77.48100222308892],"label":"Pole vault"}]},"nHuu6aInr1w":{"duration":190.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nHuu6aInr1w","annotations":[{"segment":[131.39683303794737,174.40482564430576],"label":"Starting a campfire"}]},"NHDjJ8auZQ0":{"duration":111.74000000000001,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NHDjJ8auZQ0","annotations":[{"segment":[0.6972506708268331,110.68854399375975],"label":"Ping-pong"}]},"M5UrBI13R3s":{"duration":40.543,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M5UrBI13R3s","annotations":[{"segment":[28.71462670514821,35.98815549609985],"label":"Peeling potatoes"}]},"rFM3OUUL5fI":{"duration":227.788,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rFM3OUUL5fI","annotations":[{"segment":[39.8006685981725,179.10300869177624],"label":"Slacklining"}]},"jimvzigX1ak":{"duration":32.183,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jimvzigX1ak","annotations":[{"segment":[0.01,32.141],"label":"Playing blackjack"}]},"Tywf5EzWntY":{"duration":157.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tywf5EzWntY","annotations":[{"segment":[0.01,157.121],"label":"Wakeboarding"}]},"NhM6jEV08s4":{"duration":13.42,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=NhM6jEV08s4","annotations":[{"segment":[5.488478159126365,10.42998138],"label":"Chopping wood"}]},"o_e0KLnKpj8":{"duration":132.31,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=o_e0KLnKpj8","annotations":[{"segment":[3.970533069999999,81.51963497999999],"label":"Mowing the lawn"},{"segment":[122.5406858096724,132.03666600000003],"label":"Mowing the lawn"}]},"tTIsHfF0UgA":{"duration":76.859,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=tTIsHfF0UgA","annotations":[{"segment":[0.01,76.85804988662132],"label":"Having an ice cream"}]},"0uh3HP895OA":{"duration":131.634,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0uh3HP895OA","annotations":[{"segment":[58.690680187207484,91.31941497659906],"label":"Knitting"}]},"1o9RGnujlkI":{"duration":130.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1o9RGnujlkI","annotations":[{"segment":[23.402051482059285,96.47194071762871],"label":"Archery"},{"segment":[5.49523712948518,15.264547581903278],"label":"Archery"},{"segment":[106.44477847113885,130.461],"label":"Archery"}]},"pouxwDABDrg":{"duration":222.59,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pouxwDABDrg","annotations":[{"segment":[2.7779881916365095,217.7248227051482],"label":"Doing step aerobics"}]},"yJ5zO0T9jgk":{"duration":152.21,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=yJ5zO0T9jgk","annotations":[{"segment":[12.822415750390016,39.179603681747274],"label":"Playing badminton"},{"segment":[119.91333248049924,123.47511463338535],"label":"Playing badminton"}]},"XGwP_Y-x-dI":{"duration":93.09,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=XGwP_Y-x-dI","annotations":[{"segment":[19.97241554,83.77065080999999],"label":"Shoveling snow"}]},"LvFnL9lr-Yc":{"duration":90.721,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LvFnL9lr-Yc","annotations":[]},"eVwRbDXyBbc":{"duration":7.547,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eVwRbDXyBbc","annotations":[{"segment":[0.2,6.876469065420562],"label":"Playing pool"}]},"2QO41PbOUJ0":{"duration":85.52,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=2QO41PbOUJ0","annotations":[{"segment":[3.6668811466458657,84.07158410764431],"label":"Smoking a cigarette"}]},"aKacWW7Mn2c":{"duration":22.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aKacWW7Mn2c","annotations":[{"segment":[0.7462565569422778,17.66140518096724],"label":"Plataform diving"}]},"7yndv2xOmn0":{"duration":106.069,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=7yndv2xOmn0","annotations":[{"segment":[0.2,106.021],"label":"Making a cake"}]},"XHuKkrKjnhQ":{"duration":228.3,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=XHuKkrKjnhQ","annotations":[{"segment":[110.40961649350328,131.42305963258937],"label":"Wrapping presents"}]},"NVnjzpoY1O8":{"duration":59.56,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=NVnjzpoY1O8","annotations":[{"segment":[0,9.477436296411856],"label":"Breakdancing"},{"segment":[14.587818613104524,24.158170951638063],"label":"Breakdancing"},{"segment":[27.503148468018722,32.42769870046802],"label":"Breakdancing"},{"segment":[37.445164975039,41.16180665990639],"label":"Breakdancing"}]},"h_Kt_rqmODs":{"duration":72.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=h_Kt_rqmODs","annotations":[{"segment":[0.01,72.461],"label":"Hopscotch"}]},"7wBrvMGZROQ":{"duration":5.04,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7wBrvMGZROQ","annotations":[{"segment":[0.01,5.038730158730159],"label":"Using parallel bars"}]},"rMZtiiLAqoY":{"duration":118.144,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rMZtiiLAqoY","annotations":[{"segment":[65.9612692119612,98.85003715286322],"label":"Table soccer"}]},"7A7XJLhRVVE":{"duration":184.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7A7XJLhRVVE","annotations":[{"segment":[18.293151326053042,159.74098283931357],"label":"Sailing"}]},"w24Pub_hNSw":{"duration":48.135,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w24Pub_hNSw","annotations":[{"segment":[0.01,48.081],"label":"Using the monkey bar"}]},"qHNbc4EYQR0":{"duration":102.49000000000001,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qHNbc4EYQR0","annotations":[]},"y_HCdBEJbNE":{"duration":121.603,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y_HCdBEJbNE","annotations":[{"segment":[0.01,121.561],"label":"Plastering"}]},"PcSnU_nGi5E":{"duration":82.28999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PcSnU_nGi5E","annotations":[{"segment":[3.976366614664587,10.646400936037443],"label":"Tennis serve with ball bouncing"},{"segment":[55.68267394695788,57.09398595943838],"label":"Tennis serve with ball bouncing"},{"segment":[74.67123556942278,77.87876287051482],"label":"Tennis serve with ball bouncing"}]},"993xtlhuVII":{"duration":180.47,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=993xtlhuVII","annotations":[{"segment":[22.21343685,95.3360502],"label":"Playing violin"},{"segment":[106.15492695000002,161.4195225826833],"label":"Playing violin"}]},"itlF0DEBetk":{"duration":54.103,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=itlF0DEBetk","annotations":[{"segment":[7.0054710124805,41.442003218408736],"label":"Spread mulch"},{"segment":[44.56492403120125,53.902493],"label":"Spread mulch"}]},"6QhZNQlsGys":{"duration":85.472,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6QhZNQlsGys","annotations":[{"segment":[3.598076443057723,5.996794071762871],"label":"Slacklining"},{"segment":[11.860326053042122,19.056478939157568],"label":"Slacklining"},{"segment":[38.64600624024961,52.23873946957878],"label":"Slacklining"},{"segment":[57.835747269890796,63.832541341653666],"label":"Slacklining"},{"segment":[77.8250608424337,82.48923400936037],"label":"Slacklining"}]},"047dUA_39x8":{"duration":81.131,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=047dUA_39x8","annotations":[{"segment":[27.069163806552265,75.00941185647426],"label":"Calf roping"}]},"Z6WJ0A9VvxQ":{"duration":218.663,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z6WJ0A9VvxQ","annotations":[{"segment":[53.882929797191885,218.601],"label":"Running a marathon"}]},"-7wfTI8Qv1Q":{"duration":8.081,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-7wfTI8Qv1Q","annotations":[{"segment":[0.2,7.880544217687076],"label":"Swinging at the playground"}]},"wb8TkqxxEuo":{"duration":234.197,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wb8TkqxxEuo","annotations":[{"segment":[0.01,0.02],"label":"Tug of war"}]},"gdyEfPbUEjw":{"duration":156.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gdyEfPbUEjw","annotations":[{"segment":[10.866400156006241,152.00750780031203],"label":"Windsurfing"}]},"9PFlQcBl_jU":{"duration":72.865,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9PFlQcBl_jU","annotations":[{"segment":[0.01,72.86421768707483],"label":"Bullfighting"}]},"lAsPxkZD6Xc":{"duration":132.17,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=lAsPxkZD6Xc","annotations":[{"segment":[11.132003120124805,83.2838751950078],"label":"Getting a tattoo"}]},"U2eN4DbVLfE":{"duration":191.89,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U2eN4DbVLfE","annotations":[]},"LM8C4FSpN0w":{"duration":61.02,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=LM8C4FSpN0w","annotations":[{"segment":[0.47599095800566715,58.64208428081123],"label":"Playing water polo"}]},"j6Kf_y7k-yU":{"duration":156.782,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j6Kf_y7k-yU","annotations":[{"segment":[72.87796879875195,86.09502652106084],"label":"Sumo"},{"segment":[143.0840639625585,151.15547269890797],"label":"Sumo"}]},"_DlDtsPxdyY":{"duration":189.615,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_DlDtsPxdyY","annotations":[{"segment":[53.74414641744548,129.0450109034268],"label":"Playing pool"}]},"6BKkSQ7uGco":{"duration":61.927,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6BKkSQ7uGco","annotations":[{"segment":[6.762766510660427,16.327250575737313],"label":"Elliptical trainer"},{"segment":[18.162858614664586,31.88161352574103],"label":"Elliptical trainer"},{"segment":[33.81383252730109,44.92409178627145],"label":"Elliptical trainer"},{"segment":[45.69697938689548,61.727619],"label":"Elliptical trainer"}]},"HzAlvJ1fNWU":{"duration":48.507,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=HzAlvJ1fNWU","annotations":[{"segment":[1.5134628393135725,48.27946457410297],"label":"Snow tubing"}]},"L5l0tTAhVJM":{"duration":112.896,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=L5l0tTAhVJM","annotations":[]},"QGzrtgTrwiQ":{"duration":170.25,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=QGzrtgTrwiQ","annotations":[{"segment":[16.467113528861155,153.78159247113885],"label":"Triple jump"}]},"c-8tNd5MDT8":{"duration":211.023,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=c-8tNd5MDT8","annotations":[]},"wJk3KOUAwXA":{"duration":133.19,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wJk3KOUAwXA","annotations":[]},"f07eWOCKLI8":{"duration":129.823,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=f07eWOCKLI8","annotations":[{"segment":[19.433784711388455,122.89714040561623],"label":"Decorating the Christmas tree"}]},"oNqtuh6qa2E":{"duration":59.33,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oNqtuh6qa2E","annotations":[{"segment":[7.867072778149222,26.92095228],"label":"Playing bagpipes"}]},"len7R78v5NY":{"duration":98.291,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=len7R78v5NY","annotations":[{"segment":[0.01,98.221],"label":"Using the rowing machine"}]},"9VtxNvLx6zA":{"duration":26.84,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=9VtxNvLx6zA","annotations":[]},"C4td7rprQGM":{"duration":54.64,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=C4td7rprQGM","annotations":[{"segment":[14.252743022620905,44.82009624],"label":"Hopscotch"}]},"4KE6dUAGZ94":{"duration":208.59,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4KE6dUAGZ94","annotations":[{"segment":[6.833494539781592,205.98104800624026],"label":"Using the pommel horse"}]},"RPkH81M6-NE":{"duration":143.45,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RPkH81M6-NE","annotations":[{"segment":[11.861158399375974,135.39624210608426],"label":"Sailing"}]},"SlT6OBmQqpc":{"duration":52.315,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SlT6OBmQqpc","annotations":[{"segment":[28.564890717628703,30.360398134165365],"label":"Throwing darts"},{"segment":[31.99267760374415,33.298501179407175],"label":"Throwing darts"},{"segment":[36.31821819812793,43.25540594383775],"label":"Throwing darts"},{"segment":[47.58094653822153,49.13161203432137],"label":"Throwing darts"}]},"bPvxr2-L_HU":{"duration":10.798,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bPvxr2-L_HU","annotations":[{"segment":[1.3405304212168487,6.8326474258970356],"label":"Doing a powerbomb"}]},"TnqjltyMoVA":{"duration":54.753,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TnqjltyMoVA","annotations":[]},"iUVz4A5oblQ":{"duration":203.222,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=iUVz4A5oblQ","annotations":[{"segment":[35.50120436817473,177.8405007800312],"label":"Installing carpet"}]},"bGDwE1v16HU":{"duration":185.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bGDwE1v16HU","annotations":[{"segment":[91.72773322932917,155.09799687987518],"label":"Spinning"}]},"bW4AfWvteIo":{"duration":56.982,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bW4AfWvteIo","annotations":[{"segment":[27.560168204475026,41.73903357125419],"label":"Table soccer"}]},"CIcVR4m7nOc":{"duration":87.563,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CIcVR4m7nOc","annotations":[{"segment":[9.410917413694449,82.78879521902219],"label":"River tubing"}]},"saJTy2rnf2Y":{"duration":121.998,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=saJTy2rnf2Y","annotations":[]},"UhB739uudDo":{"duration":81.549,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UhB739uudDo","annotations":[{"segment":[8.014896367283262,72.64294961458322],"label":"Disc dog"}]},"QYb2ektVZek":{"duration":150.527,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QYb2ektVZek","annotations":[{"segment":[15.22206700704517,55.03363621741464],"label":"Wakeboarding"},{"segment":[76.57872073126165,120.60563104558338],"label":"Wakeboarding"},{"segment":[122.24493093360826,133.01747305491466],"label":"Wakeboarding"}]},"Chj1_JbBc4A":{"duration":169.88,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Chj1_JbBc4A","annotations":[{"segment":[9.008726989079564,125.85721528861154],"label":"Paintball"}]},"AeOUzM7nl5w":{"duration":79.715,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=AeOUzM7nl5w","annotations":[{"segment":[36.68589823865063,68.14871832137285],"label":"Putting in contact lenses"}]},"p2C4MeV02GU":{"duration":232.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p2C4MeV02GU","annotations":[{"segment":[166.51990405616223,184.67910842433696],"label":"Clipping cat claws"}]},"WSpfyZuoi3A":{"duration":89.676,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WSpfyZuoi3A","annotations":[{"segment":[0.20000000000000004,20.005607982142415],"label":"Snow tubing"},{"segment":[53.30165483354028,85.75830554582727],"label":"Snow tubing"}]},"9JiZDwpdjb8":{"duration":210.86,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9JiZDwpdjb8","annotations":[]},"r2K5IcpZEU4":{"duration":214.04,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=r2K5IcpZEU4","annotations":[]},"SGAUpoTMSnA":{"duration":198.82999999999998,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=SGAUpoTMSnA","annotations":[]},"rRSTE1EsAUM":{"duration":81.67,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rRSTE1EsAUM","annotations":[{"segment":[31.314877440000004,70.3290368],"label":"High jump"}]},"1HOnhMOjDEg":{"duration":54.103,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=1HOnhMOjDEg","annotations":[]},"_KOVk8iGbrA":{"duration":183.02,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_KOVk8iGbrA","annotations":[{"segment":[81.37380580937524,112.49571750489068],"label":"Getting a piercing"},{"segment":[144.18867334165367,165.03178849797192],"label":"Getting a piercing"}]},"DCYz8p4zH6o":{"duration":34.482,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DCYz8p4zH6o","annotations":[{"segment":[6.724187012480499,25.390530159126364],"label":"Kneeling"}]},"swId_MFHywI":{"duration":223.93,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=swId_MFHywI","annotations":[{"segment":[9.432434970868222,211.35641323612128],"label":"Breakdancing"}]},"JQpx7CcTstU":{"duration":49.877,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JQpx7CcTstU","annotations":[{"segment":[6.528804992199688,44.164879875195005],"label":"Snow tubing"}]},"gCx-ucvPhDY":{"duration":5.6,"subset":"training","resolution":"640x354","url":"https://www.youtube.com/watch?v=gCx-ucvPhDY","annotations":[{"segment":[2.12743132,5.488109120000001],"label":"Plataform diving"}]},"unI7FhokvbM":{"duration":124.993,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=unI7FhokvbM","annotations":[{"segment":[41.63183914023227,49.626712230393984],"label":"Installing carpet"},{"segment":[85.5061426838026,92.72102676599063],"label":"Installing carpet"}]},"3gQsAKZ71tU":{"duration":106.46000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3gQsAKZ71tU","annotations":[{"segment":[20.42894934572893,22.255928555509566],"label":"Playing squash"},{"segment":[32.38735871883855,39.52918653889013],"label":"Playing squash"},{"segment":[44.51185659906396,48.99626006240249],"label":"Playing squash"},{"segment":[52.31804040561622,92.34549354134165],"label":"Playing squash"}]},"5rVXCKLihyg":{"duration":92.0,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=5rVXCKLihyg","annotations":[{"segment":[0.01,91.941],"label":"Ironing clothes"}]},"yX_DJiboktI":{"duration":158.059,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=yX_DJiboktI","annotations":[{"segment":[10.35638956986851,158.058231292517],"label":"Baton twirling"}]},"jSEsIZ9ucgM":{"duration":83.55,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jSEsIZ9ucgM","annotations":[]},"Xmq-gWQw02A":{"duration":149.583,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Xmq-gWQw02A","annotations":[]},"YVbMt3qsB3o":{"duration":61.49,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YVbMt3qsB3o","annotations":[{"segment":[6.71459029789763,10.5514987675507],"label":"Shot put"},{"segment":[15.539480003120124,24.652138029641183],"label":"Shot put"}]},"zto8JvkVLVw":{"duration":47.39,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zto8JvkVLVw","annotations":[{"segment":[4.657864900156007,42.43832464586583],"label":"Using the pommel horse"}]},"uM6ErLnAGW4":{"duration":193.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uM6ErLnAGW4","annotations":[{"segment":[8.441911631825274,45.224526599063964],"label":"Carving jack-o-lanterns"}]},"sdMr24Qyrw8":{"duration":119.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sdMr24Qyrw8","annotations":[{"segment":[50.50775789671042,78.09133047498771],"label":"Getting a piercing"},{"segment":[82.9370929875195,93.56049590951639],"label":"Getting a piercing"}]},"LjOgqXGS5ug":{"duration":71.75,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LjOgqXGS5ug","annotations":[]},"86S2O0fpX4g":{"duration":18.042,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=86S2O0fpX4g","annotations":[{"segment":[0.2,17.411610449436388],"label":"Doing crunches"}]},"Qfi8dqC4E4s":{"duration":181.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Qfi8dqC4E4s","annotations":[{"segment":[27.816711388455534,42.29275507020281],"label":"Triple jump"},{"segment":[45.98272698907956,60.74261466458658],"label":"Triple jump"},{"segment":[65.85180655226209,82.8824461778471],"label":"Triple jump"},{"segment":[85.72088611544461,107.0091856474259],"label":"Triple jump"},{"segment":[122.90444929797191,134.25820904836195],"label":"Triple jump"},{"segment":[138.79971294851794,161.7910764430577],"label":"Triple jump"}]},"0MyaFtXcTKI":{"duration":217.892,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0MyaFtXcTKI","annotations":[{"segment":[47.587113884555386,49.6265616224649],"label":"Fun sliding down"},{"segment":[54.385273010920436,62.88297191887675],"label":"Fun sliding down"},{"segment":[72.06048673946958,77.83892199687988],"label":"Fun sliding down"},{"segment":[79.8783697347894,90.41551638065522],"label":"Fun sliding down"},{"segment":[102.65220280811232,104.35174258970359],"label":"Fun sliding down"},{"segment":[113.52925741029641,118.6278767550702],"label":"Fun sliding down"},{"segment":[164.17554290171608,169.27416224648985],"label":"Fun sliding down"},{"segment":[179.1479360374415,185.60678003120125],"label":"Fun sliding down"},{"segment":[197.16471138845554,200.90404212168488],"label":"Fun sliding down"}]},"0_GpcrCLdag":{"duration":84.103,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0_GpcrCLdag","annotations":[{"segment":[16.480464290349435,19.488803374330775],"label":"Having an ice cream"},{"segment":[21.712358349447413,24.459102730473855],"label":"Having an ice cream"},{"segment":[30.21418619548163,37.5388378782188],"label":"Having an ice cream"},{"segment":[44.34030015504617,55.719669733584276],"label":"Having an ice cream"},{"segment":[72.98492012860761,76.25485391554385],"label":"Having an ice cream"}]},"CTJej0rZMiI":{"duration":96.5,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CTJej0rZMiI","annotations":[]},"ofZURf7w9wk":{"duration":164.7,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ofZURf7w9wk","annotations":[{"segment":[115.26377874306151,138.31653416312693],"label":"Doing fencing"}]},"xmSN6La-2vQ":{"duration":116.913,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xmSN6La-2vQ","annotations":[{"segment":[54.5470291021087,60.18355572934851],"label":"Putting in contact lenses"}]},"Pr6zL1ToSC4":{"duration":145.29,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Pr6zL1ToSC4","annotations":[{"segment":[43.461153180000004,136.85647425897037],"label":"Rock climbing"},{"segment":[7.81934634,28.48932783],"label":"Rock climbing"}]},"H-fl49tiqGE":{"duration":59.049,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=H-fl49tiqGE","annotations":[{"segment":[0.2,40.51363494539781],"label":"Snow tubing"}]},"ZbFyGkfGFcE":{"duration":15.117,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ZbFyGkfGFcE","annotations":[{"segment":[0.01,15.041],"label":"Building sandcastles"}]},"YPlsA_vTltk":{"duration":215.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YPlsA_vTltk","annotations":[{"segment":[12.097994770670827,215.411518],"label":"Zumba"}]},"WXaFTEWJThM":{"duration":76.56,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=WXaFTEWJThM","annotations":[{"segment":[37.50178418096724,46.100919407176285],"label":"Washing face"},{"segment":[56.13324383775351,65.21010879875195],"label":"Washing face"}]},"kCb2Km85Yn4":{"duration":24.707,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=kCb2Km85Yn4","annotations":[{"segment":[21.061633385335416,24.481],"label":"Fun sliding down"}]},"oMegp6a547Q":{"duration":117.702,"subset":"training","resolution":"357x240","url":"https://www.youtube.com/watch?v=oMegp6a547Q","annotations":[{"segment":[5.600482805706786,8.538440998864445],"label":"Capoeira"},{"segment":[31.674861769981007,33.51108564070454],"label":"Capoeira"},{"segment":[36.99991099507926,38.46889009165809],"label":"Capoeira"},{"segment":[57.565617802652106,60.687198353354134],"label":"Capoeira"},{"segment":[82.17101743759751,86.94519945631825],"label":"Capoeira"},{"segment":[89.88315762168487,92.45387101638066],"label":"Capoeira"}]},"2PBdNJcQcp4":{"duration":217.85,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=2PBdNJcQcp4","annotations":[{"segment":[21.789256979999998,199.6232118174727],"label":"Belly dance"}]},"aPXqTfbtgYI":{"duration":41.19,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aPXqTfbtgYI","annotations":[{"segment":[4.57638606,18.18508068],"label":"Playing saxophone"}]},"xpEbejnuGEY":{"duration":147.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xpEbejnuGEY","annotations":[{"segment":[46.50917069063715,75.51984151747023],"label":"Polishing forniture"},{"segment":[43.28576282098903,120.41730766458657],"label":"Painting furniture"}]},"dRa5x7LsBJI":{"duration":222.959,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dRa5x7LsBJI","annotations":[]},"9Ds9DvdPQkM":{"duration":59.14,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=9Ds9DvdPQkM","annotations":[{"segment":[25.187810925117002,29.616437021840873],"label":"Playing saxophone"},{"segment":[36.720691385335414,57.01856099531982],"label":"Playing saxophone"}]},"T8ae3_Pm5eE":{"duration":211.79,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=T8ae3_Pm5eE","annotations":[{"segment":[39.318120741029645,192.95615556942278],"label":"Ballet"}]},"RxuL7k7-hYQ":{"duration":235.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RxuL7k7-hYQ","annotations":[{"segment":[11.018429906396257,196.12805233385336],"label":"Using uneven bars"}]},"hHmn5tf5d5I":{"duration":181.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hHmn5tf5d5I","annotations":[{"segment":[0.2,181.35666600000002],"label":"Belly dance"}]},"jpWevi1HBYo":{"duration":16.95,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=jpWevi1HBYo","annotations":[{"segment":[6.8223539719188775,15.91882593447738],"label":"Playing lacrosse"}]},"w50et2-vEOQ":{"duration":92.206,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w50et2-vEOQ","annotations":[{"segment":[19.15412077983963,61.36784975180225],"label":"Wakeboarding"},{"segment":[74.29041984526019,87.21298993871814],"label":"Wakeboarding"}]},"FJnCSFPoWVU":{"duration":213.462,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FJnCSFPoWVU","annotations":[{"segment":[153.85179704331028,199.14150345397815],"label":"Slacklining"}]},"fFn51O3yHCQ":{"duration":134.79,"subset":"testing","resolution":"180x102","url":"https://www.youtube.com/watch?v=fFn51O3yHCQ","annotations":[]},"ako4JDxckE0":{"duration":220.219,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ako4JDxckE0","annotations":[]},"BtrGC6PUPJk":{"duration":26.59,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=BtrGC6PUPJk","annotations":[{"segment":[0,26.38684807256236],"label":"Doing karate"}]},"DbuQn8EOJx0":{"duration":104.35,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=DbuQn8EOJx0","annotations":[{"segment":[46.23328430280068,54.861326795928974],"label":"Smoking a cigarette"},{"segment":[58.60557165143748,65.11730183493054],"label":"Smoking a cigarette"}]},"9mpPAwSmUmk":{"duration":127.455,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=9mpPAwSmUmk","annotations":[{"segment":[5.4382174368053215,120.35528753026327],"label":"Fixing the roof"}]},"vzxT-k8dsVs":{"duration":51.967,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=vzxT-k8dsVs","annotations":[{"segment":[1.7807751619937693,49.78076021028038],"label":"Powerbocking"}]},"LA5UXJ_hVU4":{"duration":22.501,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=LA5UXJ_hVU4","annotations":[{"segment":[6.17788447048086,22.500135],"label":"Calf roping"}]},"W7JxYa-PiB8":{"duration":45.86,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W7JxYa-PiB8","annotations":[{"segment":[0.07154350663822472,10.731525585023402],"label":"Tennis serve with ball bouncing"},{"segment":[26.971900970358817,40.064362184087365],"label":"Tennis serve with ball bouncing"}]},"A72gcVxGdwU":{"duration":184.112,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=A72gcVxGdwU","annotations":[]},"Z7gECmnHeKk":{"duration":69.66,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=Z7gECmnHeKk","annotations":[{"segment":[0,61.5836302652106],"label":"Using the rowing machine"}]},"vfLKyUMzFHg":{"duration":114.776,"subset":"training","resolution":"352x288","url":"https://www.youtube.com/watch?v=vfLKyUMzFHg","annotations":[{"segment":[15.040879578040265,109.04637520436818],"label":"Playing blackjack"}]},"tVbrnWNOmFY":{"duration":200.34,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tVbrnWNOmFY","annotations":[{"segment":[24.222288728283825,56.10194615131544],"label":"Fixing bicycle"},{"segment":[100.79597567576174,189.24639774162395],"label":"Fixing bicycle"}]},"YVSD611Zpvo":{"duration":123.42,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=YVSD611Zpvo","annotations":[{"segment":[34.46352032012481,109.55163721872076],"label":"Cleaning windows"}]},"VVezUub0ocQ":{"duration":193.144,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VVezUub0ocQ","annotations":[{"segment":[70.34178393135726,88.41676053042121],"label":"Clipping cat claws"},{"segment":[111.31173088923556,123.37449219968799],"label":"Clipping cat claws"}]},"HJws_Cwu3qE":{"duration":22.83,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=HJws_Cwu3qE","annotations":[{"segment":[2.8590686427457093,20.794840873634943],"label":"Playing racquetball"}]},"7IW2BELXDHA":{"duration":50.272,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=7IW2BELXDHA","annotations":[{"segment":[3.5270592823712947,49.53558814352574],"label":"Beach soccer"}]},"KgEHEyz3oKw":{"duration":188.78,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=KgEHEyz3oKw","annotations":[{"segment":[31.21761703121186,176.70349262950108],"label":"Shoveling snow"}]},"-KWToNMY1Lc":{"duration":86.379,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=-KWToNMY1Lc","annotations":[{"segment":[16.952736982643522,82.47640964174455],"label":"Calf roping"}]},"ecWUNT_HcN8":{"duration":233.918,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ecWUNT_HcN8","annotations":[{"segment":[0.2,209.7817082683307],"label":"Roof shingle removal"}]},"mkF3Rlj3JSs":{"duration":178.32999999999998,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=mkF3Rlj3JSs","annotations":[{"segment":[32.21216848673947,163.5600624024961],"label":"Brushing hair"}]},"LGku-onF4VI":{"duration":64.227,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LGku-onF4VI","annotations":[{"segment":[0,64.226393],"label":"Playing congas"}]},"eWG5T3TC_cg":{"duration":108.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eWG5T3TC_cg","annotations":[{"segment":[0.01,107.961],"label":"Drum corps"}]},"YF_HHxbHA18":{"duration":23.917,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=YF_HHxbHA18","annotations":[]},"ke9gaIRnaEo":{"duration":126.874,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=ke9gaIRnaEo","annotations":[{"segment":[0.2,3.5627597185520075],"label":"Running a marathon"},{"segment":[6.135863959728457,14.053107778732917],"label":"Running a marathon"},{"segment":[35.627597185520074,44.93035824804992],"label":"Running a marathon"},{"segment":[66.50484745085804,81.1517483775351],"label":"Running a marathon"},{"segment":[87.68347446645865,91.24623415132605],"label":"Running a marathon"},{"segment":[107.47658382683306,125.48831334477379],"label":"Running a marathon"}]},"JSxnQ-tgE3g":{"duration":36.39,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=JSxnQ-tgE3g","annotations":[]},"XsqslHC79FQ":{"duration":238.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XsqslHC79FQ","annotations":[{"segment":[0,233.9653428393136],"label":"Playing violin"}]},"W3KCFRADoX8":{"duration":177.842,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W3KCFRADoX8","annotations":[{"segment":[84.32241861022554,164.37885374885482],"label":"Braiding hair"}]},"-Jp86pFKlsw":{"duration":27.609,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-Jp86pFKlsw","annotations":[{"segment":[4.953167511700468,27.408526077097505],"label":"Disc dog"}]},"c2ejr7Lci8s":{"duration":27.73,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=c2ejr7Lci8s","annotations":[{"segment":[6.278099800000001,15.801571039999999],"label":"Playing accordion"}]},"z48kSSKMoXo":{"duration":85.938,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=z48kSSKMoXo","annotations":[{"segment":[4.941830482115086,77.93884914463452],"label":"Rope skipping"}]},"TQR1P0ZjByU":{"duration":221.96,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=TQR1P0ZjByU","annotations":[]},"YtgiDWEY_1A":{"duration":755.113,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YtgiDWEY_1A","annotations":[{"segment":[190.83170358814354,267.39997971918876],"label":"Removing ice from car"},{"segment":[355.7479906396256,371.06164586583463],"label":"Removing ice from car"},{"segment":[414.6466645865835,581.9188985959438],"label":"Removing ice from car"}]},"6pBqSAsP0DU":{"duration":17.62,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6pBqSAsP0DU","annotations":[]},"2zohqWPmeQU":{"duration":185.18,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2zohqWPmeQU","annotations":[{"segment":[48.24466893777792,152.8229321404056],"label":"Painting furniture"}]},"m_sBF4vLRRc":{"duration":91.74,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=m_sBF4vLRRc","annotations":[{"segment":[15.886687255245315,28.624661721162727],"label":"Zumba"},{"segment":[29.340277683307335,38.07079933541342],"label":"Zumba"},{"segment":[42.65074512012481,51.953759995319814],"label":"Zumba"},{"segment":[56.67682908580344,60.11178842433698],"label":"Zumba"},{"segment":[63.97611768018721,71.56165288611545],"label":"Zumba"}]},"asLRIsN6wLQ":{"duration":155.02,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=asLRIsN6wLQ","annotations":[{"segment":[15.71367394695788,25.141878315132608],"label":"Shot put"},{"segment":[32.87784087363495,39.168365054602184],"label":"Shot put"},{"segment":[49.56490639625585,58.51076755070203],"label":"Shot put"},{"segment":[64.07170826833072,82.68877067082684],"label":"Shot put"},{"segment":[87.52437129485179,95.98667238689548],"label":"Shot put"},{"segment":[102.03117316692668,110.00991419656786],"label":"Shot put"},{"segment":[122.82425585023401,133.4625772230889],"label":"Shot put"},{"segment":[148.4529391575663,154.781],"label":"Shot put"}]},"_fwExctFJpg":{"duration":55.171,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_fwExctFJpg","annotations":[{"segment":[26.337296358814353,41.657684436817476],"label":"Calf roping"}]},"8gTDyNpuHos":{"duration":72.17,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=8gTDyNpuHos","annotations":[]},"GD9SfOn3irM":{"duration":97.269,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GD9SfOn3irM","annotations":[{"segment":[4.8524680187207485,97.201],"label":"Calf roping"}]},"aorWXCqqyJs":{"duration":141.897,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=aorWXCqqyJs","annotations":[{"segment":[123.1216838122573,136.6180490062749],"label":"Surfing"}]},"MIAhrxkYadk":{"duration":155.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MIAhrxkYadk","annotations":[{"segment":[10.436187207488299,13.834015600624024],"label":"Getting a haircut"},{"segment":[106.30348829953199,120.137503900156],"label":"Getting a haircut"}]},"sP5Y_UMakiQ":{"duration":23.267,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sP5Y_UMakiQ","annotations":[]},"i0jy3fjw8xQ":{"duration":206.84,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i0jy3fjw8xQ","annotations":[{"segment":[126.1331840873635,168.0863042121685],"label":"Bungee jumping"}]},"JWN0cMm-8ug":{"duration":217.618,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JWN0cMm-8ug","annotations":[{"segment":[95.77873268241979,194.60343745702014],"label":"Bullfighting"}]},"Nb87GFizCB8":{"duration":483.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Nb87GFizCB8","annotations":[{"segment":[165.88240249609984,183.22465366614668],"label":"Removing ice from car"}]},"OyvOOO8DxKg":{"duration":223.21,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=OyvOOO8DxKg","annotations":[]},"KlqEMsj4nKM":{"duration":150.05,"subset":"testing","resolution":"632x360","url":"https://www.youtube.com/watch?v=KlqEMsj4nKM","annotations":[]},"oEC5UG-rBFc":{"duration":198.601,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=oEC5UG-rBFc","annotations":[{"segment":[85.99824867018458,198.40027210884355],"label":"Futsal"}]},"Oa26_SgrY8w":{"duration":129.08,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Oa26_SgrY8w","annotations":[{"segment":[10.265321372854913,18.517834633385334],"label":"Curling"},{"segment":[31.601087363494536,34.01645709828392],"label":"Curling"},{"segment":[38.84719656786271,42.872812792511695],"label":"Curling"},{"segment":[46.294586583463335,49.11251794071762],"label":"Curling"},{"segment":[69.441879875195,75.08938065522621],"label":"Curling"},{"segment":[82.33661310452419,87.57072542901716],"label":"Curling"},{"segment":[96.02582995319813,99.04551014040563],"label":"Curling"},{"segment":[108.70848673946956,112.93603900156006],"label":"Curling"}]},"vF-XFROSmv4":{"duration":117.19,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=vF-XFROSmv4","annotations":[{"segment":[21.55025299,42.73604197],"label":"Kayaking"},{"segment":[65.99998613277864,112.62047388455538],"label":"Kayaking"}]},"I9kOPQ3J5HA":{"duration":92.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I9kOPQ3J5HA","annotations":[{"segment":[2.600997166417269,3.9014957496259033],"label":"Rock climbing"}]},"KR1-rdV18pI":{"duration":221.937,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KR1-rdV18pI","annotations":[{"segment":[4.845638065522621,217.3810733229329],"label":"Waterskiing"}]},"UYGiq0CsYEs":{"duration":83.546,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=UYGiq0CsYEs","annotations":[{"segment":[10.035874282371294,77.28926557722309],"label":"Playing drums"}]},"Lou-YFz3kb0":{"duration":148.61,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Lou-YFz3kb0","annotations":[{"segment":[40.8033649237126,135.1611447176287],"label":"Playing violin"}]},"xND_kp-aYXw":{"duration":39.567,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xND_kp-aYXw","annotations":[]},"OCQ9OyMKQKw":{"duration":55.798,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OCQ9OyMKQKw","annotations":[{"segment":[39.25809638689548,49.61666283931358],"label":"Trimming branches or hedges"}]},"uYLYJABDubw":{"duration":21.363,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uYLYJABDubw","annotations":[{"segment":[0.01,19.059177847113887],"label":"Arm wrestling"}]},"9P-LVflVPTo":{"duration":120.117,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9P-LVflVPTo","annotations":[]},"InA8Xbg-hvo":{"duration":165.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=InA8Xbg-hvo","annotations":[{"segment":[54.22373471138845,138.14141938377534],"label":"Putting on makeup"}]},"Cf7xy_HQ9H0":{"duration":180.489,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cf7xy_HQ9H0","annotations":[{"segment":[45.321374414976596,154.56020124804994],"label":"Rollerblading"}]},"hHpcr5tYWso":{"duration":41.425,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hHpcr5tYWso","annotations":[{"segment":[12.207159126365056,40.10923712948519],"label":"Kite flying"}]},"zzz_3yWpTXo":{"duration":21.53,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zzz_3yWpTXo","annotations":[{"segment":[0.23458190327613107,21.481],"label":"Washing dishes"}]},"07G2P4soRZw":{"duration":197.927,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=07G2P4soRZw","annotations":[]},"54K2F3zAZ0o":{"duration":37.76,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=54K2F3zAZ0o","annotations":[{"segment":[1.4136083494539782,24.738146115444614],"label":"Playing saxophone"},{"segment":[30.51038020904836,37.755123],"label":"Playing saxophone"}]},"91Wdp_uaWrY":{"duration":72.261,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=91Wdp_uaWrY","annotations":[{"segment":[0.010000000000000002,72.221],"label":"Doing fencing"}]},"jzBR9ihZBbI":{"duration":223.79,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=jzBR9ihZBbI","annotations":[{"segment":[119.0541894573742,176.66105532384563],"label":"Playing squash"}]},"rApbE1VkEjE":{"duration":19.69,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rApbE1VkEjE","annotations":[]},"F9HKxxoNNSQ":{"duration":44.722,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F9HKxxoNNSQ","annotations":[{"segment":[3.2791212709732878,33.209822708268334],"label":"Shuffleboard"}]},"JY-H1u1hJZw":{"duration":81.873,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JY-H1u1hJZw","annotations":[{"segment":[5.105834633385335,42.38878627145086],"label":"Canoeing"},{"segment":[45.453035881435255,81.841],"label":"Canoeing"}]},"jJNDLjhemaE":{"duration":78.205,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=jJNDLjhemaE","annotations":[{"segment":[0.2,78.181],"label":"Playing ice hockey"}]},"bRaUAmvOhT4":{"duration":147.49,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=bRaUAmvOhT4","annotations":[]},"Qf6gZtm9BIg":{"duration":54.149,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Qf6gZtm9BIg","annotations":[{"segment":[12.322536661466458,42.80709360374415],"label":"Slacklining"}]},"BWKKwqX62Y4":{"duration":43.329,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BWKKwqX62Y4","annotations":[{"segment":[0.2,37.94683619344774],"label":"Swinging at the playground"}]},"pxb95-aSDYo":{"duration":168.72,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=pxb95-aSDYo","annotations":[{"segment":[5.376384984667055,13.75021676036799],"label":"Playing harmonica"},{"segment":[66.34834760148948,83.09601115289135],"label":"Playing harmonica"},{"segment":[116.32965601270443,164.74087096597546],"label":"Playing harmonica"}]},"7gaC3b5nJqs":{"duration":213.51,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7gaC3b5nJqs","annotations":[]},"vuizEjJ1XCM":{"duration":201.71,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=vuizEjJ1XCM","annotations":[]},"lR4vyq1WbyU":{"duration":75.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lR4vyq1WbyU","annotations":[{"segment":[18.16944170283818,68.22332203432137],"label":"Tai chi"}]},"BD7txKlwoj8":{"duration":20.88,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=BD7txKlwoj8","annotations":[{"segment":[8.59739599375975,16.76154825],"label":"Hammer throw"}]},"PUGP8PSlJEA":{"duration":176.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PUGP8PSlJEA","annotations":[{"segment":[91.10721392,149.27413200156005],"label":"Rock climbing"},{"segment":[25.93672556698186,90.50261687202182],"label":"Rock climbing"}]},"aZfNeGBMU1Y":{"duration":19.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aZfNeGBMU1Y","annotations":[{"segment":[0.19999999999999998,19.721],"label":"Hitting a pinata"}]},"qEU4vKowVo4":{"duration":90.558,"subset":"validation","resolution":"698x480","url":"https://www.youtube.com/watch?v=qEU4vKowVo4","annotations":[{"segment":[0.01,89.28636279056605],"label":"Changing car wheel"}]},"ccfffP3pXrc":{"duration":70.125,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ccfffP3pXrc","annotations":[{"segment":[0.010000000000000002,70.12428571428572],"label":"Using the monkey bar"}]},"Xfu6WQuFEks":{"duration":7.01,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=Xfu6WQuFEks","annotations":[{"segment":[0.01,7.012425],"label":"Chopping wood"}]},"DJ5tenTs0A0":{"duration":80.689,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=DJ5tenTs0A0","annotations":[{"segment":[0.7552826684495951,18.63030582175668],"label":"Skiing"},{"segment":[31.72187165054602,55.890916717628706],"label":"Skiing"},{"segment":[60.548493110764426,76.28354849297972],"label":"Skiing"}]},"tBGeBbO8gh0":{"duration":166.766,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tBGeBbO8gh0","annotations":[{"segment":[19.244733229329174,152.13741809672388],"label":"Ice fishing"}]},"CdjU2OZri4c":{"duration":104.885,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=CdjU2OZri4c","annotations":[{"segment":[0.01,93.26705831513262],"label":"Installing carpet"}]},"b0LY869JBrE":{"duration":169.46,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b0LY869JBrE","annotations":[{"segment":[42.57407644305771,50.50713416536661],"label":"Doing nails"},{"segment":[57.38245085803431,145.17495631825273],"label":"Doing nails"},{"segment":[152.57914352574102,163.1565538221529],"label":"Doing nails"}]},"4TgVsc3GVdw":{"duration":190.99,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=4TgVsc3GVdw","annotations":[]},"v5KAaaZD6uk":{"duration":226.256,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=v5KAaaZD6uk","annotations":[{"segment":[41.3644824690893,123.4592379208027],"label":"Running a marathon"},{"segment":[154.4649824690893,170.33525193954182],"label":"Running a marathon"}]},"E1Xsc4kfFRE":{"duration":138.669,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=E1Xsc4kfFRE","annotations":[{"segment":[23.136204368174727,28.32563338533541],"label":"Playing ten pins"},{"segment":[52.97542121684867,59.89465990639626],"label":"Playing ten pins"},{"segment":[82.59841185647426,90.61185491419658],"label":"Playing ten pins"},{"segment":[113.31888299531981,118.07654602184087],"label":"Playing ten pins"},{"segment":[128.02438689547583,134.07959438377534],"label":"Playing ten pins"}]},"B0sXYJeZ8Xk":{"duration":50.783,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=B0sXYJeZ8Xk","annotations":[{"segment":[0.20000000000000004,23.133160187207487],"label":"Cutting the grass"},{"segment":[38.26478209048362,50.58204],"label":"Cutting the grass"}]},"nEmuDmbOp1E":{"duration":152.068,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nEmuDmbOp1E","annotations":[{"segment":[0.2,139.75036378504672],"label":"Plastering"}]},"1y9s_l_DIEk":{"duration":20.78,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=1y9s_l_DIEk","annotations":[{"segment":[0.5835779157566303,20.74943700468019],"label":"Doing karate"}]},"JFVZdzzJVr8":{"duration":62.764,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JFVZdzzJVr8","annotations":[{"segment":[8.980162495305276,36.70153744681873],"label":"Welding"}]},"oFc4uYTxEqs":{"duration":214.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oFc4uYTxEqs","annotations":[{"segment":[11.384008308892357,193.19331747737908],"label":"Tango"}]},"NkPkbeEnIXU":{"duration":168.14,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NkPkbeEnIXU","annotations":[]},"HGSZ9_CVuM4":{"duration":148.469,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HGSZ9_CVuM4","annotations":[{"segment":[40.070098910078855,136.42363051170048],"label":"Assembling bicycle"}]},"MqQ9mgUb664":{"duration":114.45,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=MqQ9mgUb664","annotations":[{"segment":[10.177403928810213,82.13343509828394],"label":"Preparing salad"}]},"34eCEU2Y4T0":{"duration":154.62,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=34eCEU2Y4T0","annotations":[{"segment":[31.358535101404055,35.459266614664585],"label":"Washing face"},{"segment":[44.38438814352575,47.279022152886114],"label":"Washing face"},{"segment":[65.61170421216849,70.19487472698908],"label":"Washing face"},{"segment":[78.1551182527301,85.39170327613104],"label":"Washing face"}]},"cyhWzLsM29E":{"duration":171.526,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cyhWzLsM29E","annotations":[{"segment":[33.46961704819479,42.553533062112336],"label":"Baking cookies"},{"segment":[62.59158364622449,149.6903101851653],"label":"Baking cookies"}]},"JY9oOSfjj_E":{"duration":55.821,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JY9oOSfjj_E","annotations":[]},"4NSWcmO_u4I":{"duration":73.422,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=4NSWcmO_u4I","annotations":[{"segment":[6.394405426439627,7.993007218633953],"label":"Painting fence"},{"segment":[42.02038730898428,63.14476767445861],"label":"Painting fence"}]},"aNQRlsp5yv8":{"duration":213.438,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=aNQRlsp5yv8","annotations":[{"segment":[16.98179247986246,186.13376463221795],"label":"Building sandcastles"}]},"-E9YQ_Uhu50":{"duration":62.09,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-E9YQ_Uhu50","annotations":[{"segment":[3.342355694227769,57.788845553822156],"label":"Playing violin"}]},"d1doBRc_hAM":{"duration":191.286,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=d1doBRc_hAM","annotations":[{"segment":[20.28765678627145,35.8017472698908],"label":"Elliptical trainer"},{"segment":[97.26141341653667,107.10689391575664],"label":"Elliptical trainer"},{"segment":[148.87559906396257,177.5355616224649],"label":"Elliptical trainer"}]},"mKm75VWThAI":{"duration":212.51,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mKm75VWThAI","annotations":[{"segment":[3.3142121684867396,81.86104056162246],"label":"Sumo"},{"segment":[145.82533541341652,167.7149235569423],"label":"Sumo"}]},"Erje-PwY9hE":{"duration":58.44,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Erje-PwY9hE","annotations":[]},"ZoMYVoG76O0":{"duration":175.845,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZoMYVoG76O0","annotations":[]},"MXDeLfF5rok":{"duration":167.74,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MXDeLfF5rok","annotations":[{"segment":[21.719970112324493,151.77810439937596],"label":"Windsurfing"}]},"uy7WAwrrNiA":{"duration":164.096,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uy7WAwrrNiA","annotations":[{"segment":[10.495946915427638,154.36721920436815],"label":"Kite flying"}]},"NVeTGvwk8y0":{"duration":87.61,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=NVeTGvwk8y0","annotations":[{"segment":[9.493815580343213,85.71754354914196],"label":"Mixing drinks"}]},"wMDyshGO2og":{"duration":173.731,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=wMDyshGO2og","annotations":[{"segment":[0.01,116.80772386895475],"label":"Futsal"}]},"FuVi170R3cs":{"duration":129.103,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FuVi170R3cs","annotations":[]},"oPuZJt4J6jY":{"duration":68.59,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oPuZJt4J6jY","annotations":[]},"j05b3qqgRxw":{"duration":56.75,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=j05b3qqgRxw","annotations":[{"segment":[0.01,24.85644461778471],"label":"Getting a tattoo"},{"segment":[30.51769890795632,44.31700624024961],"label":"Getting a tattoo"}]},"Znb6Y9m_PsQ":{"duration":50.574,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=Znb6Y9m_PsQ","annotations":[]},"9uZoTZsTwv0":{"duration":11.401,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9uZoTZsTwv0","annotations":[{"segment":[1.5118325429017159,11.400996],"label":"Calf roping"}]},"OGJsBzZX04o":{"duration":199.762,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OGJsBzZX04o","annotations":[]},"HkzMA1jrm00":{"duration":72.957,"subset":"validation","resolution":"480x320","url":"https://www.youtube.com/watch?v=HkzMA1jrm00","annotations":[{"segment":[13.999533042546192,68.85949179463778],"label":"Playing blackjack"}]},"oIz8QNVb4P8":{"duration":153.972,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=oIz8QNVb4P8","annotations":[]},"yGO1YnMQNRI":{"duration":102.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yGO1YnMQNRI","annotations":[{"segment":[15.293110764430576,21.378327613104524],"label":"Discus throw"},{"segment":[49.7226271450858,58.85045241809672],"label":"Discus throw"},{"segment":[88.63598751950077,93.27996879875195],"label":"Discus throw"}]},"RfbEuBH1Pow":{"duration":85.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RfbEuBH1Pow","annotations":[{"segment":[7.423995860000001,68.37721138845554],"label":"Playing water polo"}]},"lwXIgNoLGhM":{"duration":204.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lwXIgNoLGhM","annotations":[{"segment":[0,204.9857596371882],"label":"Paintball"},{"segment":[15.455869,122.37399515000001],"label":"Paintball"}]},"Re-SsHmajds":{"duration":204.99,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Re-SsHmajds","annotations":[{"segment":[4.477068073199119,190.59518368761962],"label":"Playing racquetball"}]},"vBCnsp-NEAg":{"duration":121.42,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=vBCnsp-NEAg","annotations":[{"segment":[10.319290171606864,121.17],"label":"Preparing pasta"}]},"_aEHpGmhHe8":{"duration":162.22,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=_aEHpGmhHe8","annotations":[{"segment":[91.86255998811382,153.10426664685636],"label":"Painting"}]},"Mpph0kFsyZ0":{"duration":120.44,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Mpph0kFsyZ0","annotations":[{"segment":[20.377518720748828,41.7879992199688],"label":"Shot put"},{"segment":[68.8328166926677,97.9435577223089],"label":"Shot put"}]},"FFUh8Fx4FsA":{"duration":59.258,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FFUh8Fx4FsA","annotations":[{"segment":[24.577272659175517,32.69977176197926],"label":"Hand car wash"},{"segment":[53.0060195189886,56.97496794422225],"label":"Hand car wash"}]},"JgAlMwG3fWw":{"duration":122.9,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JgAlMwG3fWw","annotations":[{"segment":[3.115219103590943,121.80680223736366],"label":"Sailing"}]},"1WR4jgFCqRw":{"duration":81.294,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1WR4jgFCqRw","annotations":[{"segment":[0.01,81.293082],"label":"Painting fence"}]},"pA8QJ2ZoeBM":{"duration":44.188,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=pA8QJ2ZoeBM","annotations":[{"segment":[0.2,10.20243511237048],"label":"Elliptical trainer"},{"segment":[12.753043400936038,20.818481659906396],"label":"Elliptical trainer"}]},"1fqn2LtVdKA":{"duration":95.992,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1fqn2LtVdKA","annotations":[]},"Fok7z0mLNbU":{"duration":112.66,"subset":"validation","resolution":"480x272","url":"https://www.youtube.com/watch?v=Fok7z0mLNbU","annotations":[{"segment":[0.01,112.48745830105314],"label":"Ballet"}]},"OZX6nSa3HQE":{"duration":36.525,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=OZX6nSa3HQE","annotations":[]},"wy3CaE40RXY":{"duration":218.77,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wy3CaE40RXY","annotations":[]},"mrT7FqQ0lCs":{"duration":136.63,"subset":"training","resolution":"352x288","url":"https://www.youtube.com/watch?v=mrT7FqQ0lCs","annotations":[{"segment":[29.369124960000004,110.14241616],"label":"Breakdancing"}]},"h-NNy3iaytg":{"duration":219.684,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=h-NNy3iaytg","annotations":[]},"w83HV_L4AiU":{"duration":62.555,"subset":"training","resolution":"1916x1080","url":"https://www.youtube.com/watch?v=w83HV_L4AiU","annotations":[{"segment":[8.418557217835946,19.331501566745604],"label":"Rock-paper-scissors"},{"segment":[24.30078872562411,36.18810310568641],"label":"Rock-paper-scissors"},{"segment":[39.01377619602909,55.480629722508844],"label":"Rock-paper-scissors"}]},"fTQYGNJpSUk":{"duration":182.207,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fTQYGNJpSUk","annotations":[{"segment":[24.15286271450858,155.14662402496097],"label":"Curling"}]},"rse-MW1helQ":{"duration":129.41,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rse-MW1helQ","annotations":[{"segment":[11.910894780031201,62.17890834321373],"label":"Playing water polo"},{"segment":[67.42777722932918,122.13714138845555],"label":"Playing water polo"}]},"d1JA5V3hO3Q":{"duration":120.001,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=d1JA5V3hO3Q","annotations":[{"segment":[31.102991677113042,86.61734530328127],"label":"Hand car wash"}]},"JzadrhtPWzs":{"duration":92.86,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JzadrhtPWzs","annotations":[]},"rA_RAZx2_aA":{"duration":128.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rA_RAZx2_aA","annotations":[{"segment":[0,128.26702947845806],"label":"Triple jump"}]},"ss9mNDN7jYc":{"duration":140.644,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ss9mNDN7jYc","annotations":[]},"K6UXN8usLJM":{"duration":20.457,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=K6UXN8usLJM","annotations":[]},"8vDSV9ruYlc":{"duration":30.419,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=8vDSV9ruYlc","annotations":[]},"PpC4kPd5KfA":{"duration":175.45,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=PpC4kPd5KfA","annotations":[{"segment":[1.475747889823115,11.860637513377627],"label":"Playing ten pins"},{"segment":[20.605807722686688,24.97839282734122],"label":"Playing ten pins"},{"segment":[37.54957500322299,48.75432433390023],"label":"Playing ten pins"},{"segment":[60.50564680265928,72.53025584045925],"label":"Playing ten pins"},{"segment":[90.02059625907735,96.85275975407386],"label":"Playing ten pins"},{"segment":[115.436246308591,121.17526421513304],"label":"Playing ten pins"},{"segment":[129.92043435843522,135.6594522649773],"label":"Playing ten pins"},{"segment":[147.4107746450396,154.5162253864726],"label":"Playing ten pins"},{"segment":[164.90111493164395,169.27370000329503],"label":"Playing ten pins"}]},"dea_92hDJnU":{"duration":214.251,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dea_92hDJnU","annotations":[{"segment":[56.18081110095681,180.2856896056297],"label":"Clipping cat claws"}]},"-_gDSRlC1kg":{"duration":65.225,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-_gDSRlC1kg","annotations":[{"segment":[0.30526452028081125,51.99672328783151],"label":"Curling"},{"segment":[63.08800085803431,65.0248525],"label":"Curling"}]},"aCzKnKvZesg":{"duration":199.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aCzKnKvZesg","annotations":[]},"ouEWLFNLLos":{"duration":69.24,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ouEWLFNLLos","annotations":[{"segment":[10.910194268330734,19.011823675507024],"label":"Cleaning windows"}]},"cKHdMUfKsQ8":{"duration":237.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cKHdMUfKsQ8","annotations":[{"segment":[23.1239288,217.6030062402496],"label":"Painting"}]},"eDuV8t5R-aI":{"duration":222.169,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=eDuV8t5R-aI","annotations":[]},"79FMLEeVp7Q":{"duration":135.094,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=79FMLEeVp7Q","annotations":[{"segment":[66.5787192027042,100.87812241921512],"label":"Canoeing"}]},"qRI4UJ2HR2g":{"duration":162.84,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=qRI4UJ2HR2g","annotations":[{"segment":[25.912386238689546,145.82068334321374],"label":"Getting a tattoo"}]},"jwD-G9U01mk":{"duration":46.3,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jwD-G9U01mk","annotations":[]},"uHmoFLB-PLc":{"duration":118.53999999999999,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=uHmoFLB-PLc","annotations":[{"segment":[0,118.15166599999999],"label":"Playing guitarra"}]},"jdfe-CV_cpE":{"duration":226.32999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jdfe-CV_cpE","annotations":[{"segment":[0,5.472746169710734],"label":"Getting a haircut"},{"segment":[87.74047891439467,133.64093066035565],"label":"Getting a haircut"}]},"hLLYYzwnEjA":{"duration":83.43,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=hLLYYzwnEjA","annotations":[]},"hfk93bEIjwc":{"duration":86.054,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hfk93bEIjwc","annotations":[{"segment":[13.819193447737911,85.801],"label":"Rafting"}]},"5pqVrMgiMcs":{"duration":190.776,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5pqVrMgiMcs","annotations":[{"segment":[7.210777181405516,100.78883792906906],"label":"Longboarding"},{"segment":[104.94786285118745,155.4666096651428],"label":"Longboarding"}]},"73iABxDLM7I":{"duration":14.072,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=73iABxDLM7I","annotations":[]},"9VWoQpg9wqE":{"duration":128.012,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9VWoQpg9wqE","annotations":[{"segment":[12.357604361370717,56.40648442367601],"label":"Grooming dog"}]},"LcXB-fSLTKY":{"duration":151.23,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LcXB-fSLTKY","annotations":[{"segment":[7.8491758800000015,141.043104524181],"label":"Starting a campfire"}]},"FQVs9_IbgOY":{"duration":117.819,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FQVs9_IbgOY","annotations":[{"segment":[14.84177331734923,20.338727433365776],"label":"Bullfighting"},{"segment":[30.416476646062776,36.46312617368098],"label":"Bullfighting"},{"segment":[41.96008028969753,51.30490228692565],"label":"Bullfighting"},{"segment":[0.2,117.61804988662131],"label":"Bullfighting"},{"segment":[75.49150039739847,80.80522141561315],"label":"Bullfighting"}]},"B-60jGEds7M":{"duration":107.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=B-60jGEds7M","annotations":[{"segment":[0,106.908849475039],"label":"Sailing"}]},"cmM0li5O1t0":{"duration":26.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cmM0li5O1t0","annotations":[{"segment":[0.01,25.198502340093604],"label":"Waterskiing"}]},"QilmFv8ZIx0":{"duration":137.973,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=QilmFv8ZIx0","annotations":[]},"UTL1NSue01M":{"duration":155.57,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UTL1NSue01M","annotations":[]},"mYHezmI0U6U":{"duration":136.023,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mYHezmI0U6U","annotations":[{"segment":[12.518032761310453,123.69513728549143],"label":"Skiing"}]},"AFb77tjPuwQ":{"duration":238.841,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=AFb77tjPuwQ","annotations":[{"segment":[32.40865366614665,40.60394539781591],"label":"Croquet"},{"segment":[57.36704212168487,71.15003276131046],"label":"Croquet"},{"segment":[119.94926989079562,131.49718096723868],"label":"Croquet"}]},"7Nj1Y-fyq_c":{"duration":98.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7Nj1Y-fyq_c","annotations":[{"segment":[44.33193302730109,73.37434322542902],"label":"Washing dishes"}]},"NcEWimHnC54":{"duration":148.376,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=NcEWimHnC54","annotations":[{"segment":[0.01,126.81882683307333],"label":"Snowboarding"}]},"mpLYUgMhacA":{"duration":61.324,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=mpLYUgMhacA","annotations":[{"segment":[0.2,61.037782930437885],"label":"Rollerblading"}]},"p28XWij4U1c":{"duration":220.637,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=p28XWij4U1c","annotations":[{"segment":[75.7009360374415,158.97196567862713],"label":"Using the monkey bar"}]},"Pv6oIFroaCQ":{"duration":173.709,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Pv6oIFroaCQ","annotations":[{"segment":[33.332516582295945,73.98192631357254],"label":"Baking cookies"}]},"leIVsOMWwGk":{"duration":76.069,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=leIVsOMWwGk","annotations":[]},"QIkDYInr17A":{"duration":145.566,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=QIkDYInr17A","annotations":[{"segment":[0.2,101.96425454841322],"label":"Making a lemonade"}]},"yEBiJguscMY":{"duration":74.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yEBiJguscMY","annotations":[{"segment":[3.7561583463338533,24.328348673946955],"label":"Playing squash"},{"segment":[40.04642667706708,52.87515210608424],"label":"Playing squash"}]},"g9lNpSAtHgI":{"duration":159.87,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g9lNpSAtHgI","annotations":[]},"m3AbqJNPn-o":{"duration":135.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m3AbqJNPn-o","annotations":[{"segment":[0.01,135.7206575963719],"label":"Getting a piercing"}]},"uE6_kwnfDCQ":{"duration":190.57,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uE6_kwnfDCQ","annotations":[{"segment":[10.40532767550702,185.80942277691108],"label":"Tango"}]},"87JvCGMC514":{"duration":113.662,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=87JvCGMC514","annotations":[{"segment":[7.092751193040919,105.85931149235569],"label":"Kite flying"}]},"LtdO87DWEHM":{"duration":15.059,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LtdO87DWEHM","annotations":[{"segment":[2.188039001560062,11.31663182527301],"label":"Rollerblading"}]},"VXLyTLY1PAw":{"duration":6.32,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=VXLyTLY1PAw","annotations":[{"segment":[1.67063325,6.287656050000001],"label":"High jump"}]},"QPKJDlQSO6c":{"duration":136.627,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QPKJDlQSO6c","annotations":[{"segment":[33.14725842449473,39.521731821940804],"label":"Beer pong"},{"segment":[49.93337170443605,50.995783937343724],"label":"Beer pong"},{"segment":[58.64515201427901,59.70756424718669],"label":"Beer pong"},{"segment":[62.68231849932819,64.16969562539894],"label":"Beer pong"},{"segment":[70.11920412968193,72.03154614891577],"label":"Beer pong"},{"segment":[84.35552805064482,85.630422008164],"label":"Beer pong"},{"segment":[92.4298602414455,97.74192136119666],"label":"Beer pong"},{"segment":[105.60377181842838,112.82817494128999],"label":"Beer pong"}]},"OxZDfLtnTvc":{"duration":35.295,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=OxZDfLtnTvc","annotations":[]},"4MBGT228QiQ":{"duration":150.094,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4MBGT228QiQ","annotations":[{"segment":[78.39635456226623,136.36384365883944],"label":"Making a lemonade"}]},"C8fyaj592GA":{"duration":110.92,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C8fyaj592GA","annotations":[{"segment":[0,110.92165532879818],"label":"Doing step aerobics"}]},"9qVcdqGeAzE":{"duration":155.179,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9qVcdqGeAzE","annotations":[{"segment":[35.829072205065074,75.28946929577863],"label":"Blow-drying hair"},{"segment":[82.06794241565582,130.96978420905546],"label":"Blow-drying hair"}]},"injaml6fr7s":{"duration":224.31,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=injaml6fr7s","annotations":[{"segment":[18.88834627769111,187.48432601560063],"label":"Playing polo"}]},"yhNC43Jco4M":{"duration":158.43,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=yhNC43Jco4M","annotations":[]},"OD7lx6blG9M":{"duration":28.422,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=OD7lx6blG9M","annotations":[{"segment":[0.01,28.421224489795918],"label":"Making an omelette"}]},"8wxFQrFEo3M":{"duration":145.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8wxFQrFEo3M","annotations":[{"segment":[23.255711659412125,139.67897137280778],"label":"Smoking hookah"}]},"_86X1xtj67w":{"duration":209.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_86X1xtj67w","annotations":[{"segment":[4.733575127925117,202.56437012948518],"label":"Dodgeball"}]},"SaBcTNvRA6g":{"duration":142.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SaBcTNvRA6g","annotations":[{"segment":[0.01,103.86963001560062],"label":"Washing face"}]},"lUZCcRbMSNE":{"duration":16.35,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lUZCcRbMSNE","annotations":[]},"-01K1HxqPB8":{"duration":215.714,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-01K1HxqPB8","annotations":[{"segment":[88.46852262090484,215.601],"label":"Sumo"}]},"w30TFlJiRKA":{"duration":233.69,"subset":"training","resolution":"480x272","url":"https://www.youtube.com/watch?v=w30TFlJiRKA","annotations":[{"segment":[16.406115444617782,91.14508580343214],"label":"Playing piano"},{"segment":[98.80127301092044,145.46755694227767],"label":"Playing piano"},{"segment":[155.31122620904836,228.22729485179406],"label":"Playing piano"}]},"hW6aZXhKl9M":{"duration":196.93,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=hW6aZXhKl9M","annotations":[{"segment":[0.30722064586583464,195.39233077067084],"label":"Horseback riding"}]},"MXbwIfqSq_Q":{"duration":227.02,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=MXbwIfqSq_Q","annotations":[{"segment":[34.708434737389496,41.791788765428166],"label":"Tai chi"},{"segment":[58.791838432720986,70.4793725789848],"label":"Tai chi"},{"segment":[78.27106145865834,108.37531586583462],"label":"Tai chi"},{"segment":[119.35451453198127,176.37551405616222],"label":"Tai chi"},{"segment":[179.20885564742588,189.47971891575662],"label":"Tai chi"}]},"Npj77L31bhw":{"duration":61.98,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=Npj77L31bhw","annotations":[]},"GyBIC-DBoss":{"duration":78.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GyBIC-DBoss","annotations":[{"segment":[9.623975788963532,72.24072965639715],"label":"Clean and jerk"}]},"WhBnR7yIvJc":{"duration":35.086,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=WhBnR7yIvJc","annotations":[{"segment":[0.2,34.8853514739229],"label":"Swinging at the playground"}]},"3boxQwSpv-8":{"duration":30.09,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=3boxQwSpv-8","annotations":[{"segment":[3.0985055110764432,21.642591524336975],"label":"Washing hands"},{"segment":[27.13539674851794,29.8930611],"label":"Washing hands"}]},"UJiQEkByWcM":{"duration":15.302,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=UJiQEkByWcM","annotations":[{"segment":[3.1033595943837753,10.193342667706707],"label":"Throwing darts"}]},"8aYk3so14fE":{"duration":198.763,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8aYk3so14fE","annotations":[{"segment":[60.747020747092655,176.51895136167104],"label":"Playing congas"}]},"u-YaDbGZoPk":{"duration":68.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u-YaDbGZoPk","annotations":[{"segment":[15.669293379999997,48.39079797191887],"label":"Cleaning windows"}]},"cWU5059m_1Q":{"duration":95.109,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=cWU5059m_1Q","annotations":[{"segment":[5.48990858954086,88.87716699687986],"label":"Rope skipping"}]},"dMryzJswHY8":{"duration":143.593,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dMryzJswHY8","annotations":[{"segment":[0.2,129.8991887675507],"label":"Assembling bicycle"}]},"KEMMmoIdT3g":{"duration":160.752,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KEMMmoIdT3g","annotations":[{"segment":[46.39479612324492,144.20004200468017],"label":"Arm wrestling"}]},"_Xydl5mYkQ8":{"duration":56.98,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_Xydl5mYkQ8","annotations":[{"segment":[31.61291107644306,48.39617004680187],"label":"Drinking beer"}]},"ZUM89wyBcYY":{"duration":199.51,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZUM89wyBcYY","annotations":[{"segment":[49.45529445,173.82272135],"label":"Tango"}]},"EwYgRPVDQWQ":{"duration":122.23,"subset":"validation","resolution":"854x468","url":"https://www.youtube.com/watch?v=EwYgRPVDQWQ","annotations":[{"segment":[5.91099444,117.01441497659907],"label":"Making a sandwich"}]},"5UlxCwq-LOs":{"duration":123.46,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=5UlxCwq-LOs","annotations":[{"segment":[1.3642394695787832,118.45452396],"label":"Long jump"}]},"4At1Vd-0lWE":{"duration":162.89,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=4At1Vd-0lWE","annotations":[{"segment":[0.01,143.82932399375974],"label":"Playing racquetball"}]},"FkbpD1zWdPw":{"duration":183.21,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FkbpD1zWdPw","annotations":[]},"mBHsAuDJmj4":{"duration":176.426,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=mBHsAuDJmj4","annotations":[{"segment":[20.359928237129484,80.6143104524181],"label":"Welding"},{"segment":[94.64615288611543,151.06578627145086],"label":"Welding"}]},"y-X0DjEHD_k":{"duration":71.332,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=y-X0DjEHD_k","annotations":[{"segment":[60.23806305531212,62.789583293259234],"label":"Having an ice cream"}]},"zm8gZpwZkRE":{"duration":60.883,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zm8gZpwZkRE","annotations":[{"segment":[21.27570908550628,34.19310327613104],"label":"Playing drums"},{"segment":[46.3506511076443,60.31283494539782],"label":"Playing drums"}]},"dSF2i1OQtMc":{"duration":227.928,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dSF2i1OQtMc","annotations":[{"segment":[16.712281617795323,22.401568977044796],"label":"Rope skipping"},{"segment":[37.33594829507466,60.804258651978735],"label":"Rope skipping"},{"segment":[68.27144831099366,107.74087936578687],"label":"Rope skipping"},{"segment":[122.31967822386363,156.81098283931357],"label":"Rope skipping"},{"segment":[161.78910927865687,169.96745985757798],"label":"Rope skipping"},{"segment":[176.7234885966867,191.65786637597503],"label":"Rope skipping"}]},"GdEKSyad_rk":{"duration":72.261,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GdEKSyad_rk","annotations":[{"segment":[4.171034936037442,42.95038677379095],"label":"Waterskiing"},{"segment":[45.43046160062403,53.65980079875195],"label":"Waterskiing"},{"segment":[57.60537438689547,61.438217301092045],"label":"Waterskiing"}]},"eUxFTEeNIGg":{"duration":84.8,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eUxFTEeNIGg","annotations":[{"segment":[7.672920527301093,76.86149700624026],"label":"Rope skipping"}]},"jDL2tRtoxN4":{"duration":48.072,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jDL2tRtoxN4","annotations":[{"segment":[0.2,47.861],"label":"Riding bumper cars"}]},"hrwcr7BxS5I":{"duration":15.09,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hrwcr7BxS5I","annotations":[{"segment":[0.38768564742589706,7.060578003120125],"label":"Tennis serve with ball bouncing"}]},"F4wq7O-V76I":{"duration":144.29,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F4wq7O-V76I","annotations":[]},"aIvWwX--xKw":{"duration":83.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aIvWwX--xKw","annotations":[{"segment":[4.9399742,24.671441899999998],"label":"Preparing salad"},{"segment":[29.8237982,67.16374875],"label":"Preparing salad"}]},"OGzYaA3k7vo":{"duration":152.765,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=OGzYaA3k7vo","annotations":[]},"iUOCQ9sE2_A":{"duration":65.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iUOCQ9sE2_A","annotations":[{"segment":[1.644885616224649,62.19723736349454],"label":"Doing kickboxing"}]},"W1JPhjmZgc8":{"duration":80.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=W1JPhjmZgc8","annotations":[]},"y9kk0ptXevk":{"duration":94.85,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y9kk0ptXevk","annotations":[{"segment":[51.15619043999999,73.91653666146645],"label":"Cleaning shoes"}]},"Tv8r1w-rLME":{"duration":5.32,"subset":"validation","resolution":"360x360","url":"https://www.youtube.com/watch?v=Tv8r1w-rLME","annotations":[{"segment":[0.10784053666146647,4.877710427457098],"label":"Smoking hookah"}]},"AjizhJc_reI":{"duration":98.616,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=AjizhJc_reI","annotations":[]},"xCVZBvwhoWE":{"duration":16.44,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xCVZBvwhoWE","annotations":[{"segment":[0.76941,15.849846000000001],"label":"Playing racquetball"}]},"xsBFnpdLWkU":{"duration":38.453,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=xsBFnpdLWkU","annotations":[{"segment":[0.2,38.25224489795918],"label":"Playing blackjack"}]},"ALjodjboELk":{"duration":200.203,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ALjodjboELk","annotations":[{"segment":[8.745192779139737,167.09564692667706],"label":"Scuba diving"}]},"MF3VcmCCgzY":{"duration":206.773,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MF3VcmCCgzY","annotations":[{"segment":[9.032216526756308,206.5732426303855],"label":"Capoeira"}]},"8xvoAyY70I8":{"duration":34.58,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8xvoAyY70I8","annotations":[{"segment":[8.36552504,32.228190830733226],"label":"Washing dishes"}]},"uFMlVrTUoR8":{"duration":96.781,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=uFMlVrTUoR8","annotations":[{"segment":[20.533829516380656,75.19005219968798],"label":"Spread mulch"}]},"Nl_DJpK-WzY":{"duration":150.419,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Nl_DJpK-WzY","annotations":[]},"-eJT11AuOa0":{"duration":69.03,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-eJT11AuOa0","annotations":[]},"bvL1xS5OOE8":{"duration":6.6,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=bvL1xS5OOE8","annotations":[]},"L3X8OmIbj2c":{"duration":222.633,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=L3X8OmIbj2c","annotations":[{"segment":[11.458928237129484,73.26769266770671],"label":"Arm wrestling"},{"segment":[81.60145865834633,213.91921372854912],"label":"Arm wrestling"}]},"5kIl4TspdR0":{"duration":141.74,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=5kIl4TspdR0","annotations":[{"segment":[3.757530421216849,137.05868954758188],"label":"Brushing teeth"}]},"d4sX1jnL4BY":{"duration":115.961,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d4sX1jnL4BY","annotations":[]},"vjCKf3el7pM":{"duration":218.268,"subset":"training","resolution":"730x480","url":"https://www.youtube.com/watch?v=vjCKf3el7pM","annotations":[{"segment":[17.47500550443907,127.6287334296727],"label":"Wakeboarding"},{"segment":[132.04848177235186,149.3874945013238],"label":"Wakeboarding"},{"segment":[155.84712669447023,165.70656530506213],"label":"Wakeboarding"},{"segment":[183.38555867577864,198.34470691253875],"label":"Wakeboarding"}]},"8kC5L_Iu3KY":{"duration":6.316,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8kC5L_Iu3KY","annotations":[{"segment":[0.01,6.187737492667707],"label":"Playing congas"}]},"dFGS7YCDk3Y":{"duration":86.63,"subset":"testing","resolution":"640x352","url":"https://www.youtube.com/watch?v=dFGS7YCDk3Y","annotations":[]},"Vbkq1ldn3A8":{"duration":72.35,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Vbkq1ldn3A8","annotations":[{"segment":[0.7895585023400936,70.40297035881436],"label":"Smoking hookah"}]},"2KzuD0SpFOc":{"duration":163.283,"subset":"testing","resolution":"350x240","url":"https://www.youtube.com/watch?v=2KzuD0SpFOc","annotations":[]},"j5Fq7Y8EJk8":{"duration":197.881,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j5Fq7Y8EJk8","annotations":[{"segment":[53.62045794392523,61.01638317757009],"label":"Waterskiing"},{"segment":[80.43068691588785,177.21195482866042],"label":"Waterskiing"}]},"vrY1ZMqjMog":{"duration":51.851,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=vrY1ZMqjMog","annotations":[{"segment":[0.2,51.65015873015873],"label":"Hanging wallpaper"}]},"PtPIxxX-5lY":{"duration":47.6,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PtPIxxX-5lY","annotations":[]},"f0CViWbj3V4":{"duration":27.632,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=f0CViWbj3V4","annotations":[{"segment":[0.01,26.213630882951936],"label":"Playing congas"}]},"X52XHYKeLiE":{"duration":192.87,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=X52XHYKeLiE","annotations":[]},"M_6oQOAMe0s":{"duration":161.12,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M_6oQOAMe0s","annotations":[]},"x1wy8QmHZL8":{"duration":120.117,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x1wy8QmHZL8","annotations":[{"segment":[0.01,119.367288958225],"label":"Baton twirling"}]},"18k_K9cFFJg":{"duration":192.318,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=18k_K9cFFJg","annotations":[{"segment":[0.20000000000000004,184.2201154446178],"label":"Rollerblading"}]},"Ip1EWQCXvRM":{"duration":130.287,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ip1EWQCXvRM","annotations":[{"segment":[11.179085920436815,21.341891302652105],"label":"Rafting"},{"segment":[23.78096459438377,88.61966293291731],"label":"Rafting"},{"segment":[94.31083394695787,118.70156686427455],"label":"Rafting"}]},"_WMRdq7yFpA":{"duration":10.542,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=_WMRdq7yFpA","annotations":[{"segment":[0.20000000000000004,10.461],"label":"Scuba diving"}]},"_D0od3oWcD8":{"duration":18.182,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=_D0od3oWcD8","annotations":[]},"NXFnVeZen1U":{"duration":52.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NXFnVeZen1U","annotations":[{"segment":[2.3948385335413414,47.20673244929797],"label":"Playing badminton"}]},"V4S9ppnrXzc":{"duration":145.752,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V4S9ppnrXzc","annotations":[{"segment":[24.3297786848674,41.61074298439938],"label":"Swimming"},{"segment":[61.62028059438378,63.89409168642746],"label":"Swimming"},{"segment":[92.31673033697349,95.50006586583464],"label":"Swimming"}]},"qNE6ju5dRc0":{"duration":120.0,"subset":"validation","resolution":"400x224","url":"https://www.youtube.com/watch?v=qNE6ju5dRc0","annotations":[{"segment":[35.56963737909517,39.12660111700468],"label":"Kayaking"},{"segment":[49.048657859594385,59.53234045553822],"label":"Kayaking"}]},"duZnMXDWkGw":{"duration":231.945,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=duZnMXDWkGw","annotations":[{"segment":[63.30604524180967,70.54102184087364],"label":"Running a marathon"},{"segment":[85.74186739469579,91.53034789391576],"label":"Running a marathon"},{"segment":[121.91987051482059,139.2853120124805],"label":"Running a marathon"},{"segment":[146.15913260530422,152.30939313572543],"label":"Running a marathon"},{"segment":[157.73609360374417,175.46331513260532],"label":"Running a marathon"},{"segment":[180.89001560062403,189.57273634945398],"label":"Running a marathon"}]},"aNE5ZWD5E34":{"duration":140.737,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=aNE5ZWD5E34","annotations":[{"segment":[32.72284255663493,36.230284860659346],"label":"Applying sunscreen"},{"segment":[49.60240864475242,52.013775228769205],"label":"Applying sunscreen"},{"segment":[61.44002642083481,75.03136534892943],"label":"Applying sunscreen"},{"segment":[89.71877907331229,96.95287875088238],"label":"Applying sunscreen"},{"segment":[103.09090271972973,110.32500239729983],"label":"Applying sunscreen"},{"segment":[116.90145664963627,135.09631341443378],"label":"Applying sunscreen"}]},"3V9tzjyr51I":{"duration":8.92,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=3V9tzjyr51I","annotations":[]},"l_0HvMt2tGg":{"duration":209.31,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=l_0HvMt2tGg","annotations":[]},"EeGunBjmpFw":{"duration":70.798,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=EeGunBjmpFw","annotations":[{"segment":[47.45529266369099,61.76895305087835],"label":"Roof shingle removal"}]},"_LmtW0w6ad4":{"duration":143.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_LmtW0w6ad4","annotations":[{"segment":[30.266178205928238,36.09521993447738],"label":"Tai chi"},{"segment":[46.63233382839314,50.21943643057723],"label":"Tai chi"},{"segment":[55.37589642121685,56.27267207176288],"label":"Tai chi"},{"segment":[70.17269465522621,73.3114094321373],"label":"Tai chi"},{"segment":[74.88076682059283,76.89851203432139],"label":"Tai chi"},{"segment":[83.62432941341653,86.3146563650546],"label":"Tai chi"},{"segment":[90.57434070514822,91.91950418096725],"label":"Tai chi"},{"segment":[93.26466765678629,95.05821895787832],"label":"Tai chi"},{"segment":[96.85177025897038,109.85501719188768],"label":"Tai chi"}]},"E88Sr9H3Wi8":{"duration":169.854,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E88Sr9H3Wi8","annotations":[{"segment":[31.532834113364537,139.38042532293292],"label":"Making a lemonade"}]},"JKE9D1anR7I":{"duration":50.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JKE9D1anR7I","annotations":[{"segment":[4.147312012480499,46.559446177847114],"label":"Using parallel bars"}]},"nIymjHWIz7Y":{"duration":15.23,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=nIymjHWIz7Y","annotations":[{"segment":[5.132877416536662,13.521329861154447],"label":"Plataform diving"}]},"SGiMk9KdOQw":{"duration":136.63,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=SGiMk9KdOQw","annotations":[{"segment":[0.01,0.02],"label":"Playing water polo"}]},"r2vO_RnVJYQ":{"duration":208.7,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=r2vO_RnVJYQ","annotations":[]},"vWz90CichbM":{"duration":78.089,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vWz90CichbM","annotations":[{"segment":[11.073464898595946,52.21686271450858],"label":"Fixing the roof"}]},"y9FLYBhbdM0":{"duration":120.117,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y9FLYBhbdM0","annotations":[]},"qp3OfC0dCOs":{"duration":138.42000000000002,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=qp3OfC0dCOs","annotations":[{"segment":[13.810421216848674,116.52542901716068],"label":"Playing lacrosse"}]},"mGA8SbtKhc8":{"duration":72.1,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=mGA8SbtKhc8","annotations":[]},"qr5vqi5tTL8":{"duration":41.73,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qr5vqi5tTL8","annotations":[{"segment":[1.3670068595943836,41.01020578783151],"label":"Bathing dog"}]},"-QuFk_ThRNg":{"duration":135.373,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-QuFk_ThRNg","annotations":[{"segment":[0.01,127.76922041341653],"label":"Arm wrestling"}]},"t0y6dkIwEvc":{"duration":34.06,"subset":"validation","resolution":"360x240","url":"https://www.youtube.com/watch?v=t0y6dkIwEvc","annotations":[{"segment":[23.966795744149763,34.063672],"label":"Triple jump"}]},"vzUeFzhVYLg":{"duration":8.85,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vzUeFzhVYLg","annotations":[]},"Kd_qpMbvRtI":{"duration":152.811,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Kd_qpMbvRtI","annotations":[{"segment":[117.63781654066283,120.96494590870803],"label":"Wakeboarding"},{"segment":[129.9957279957998,134.27346687915906],"label":"Wakeboarding"},{"segment":[143.77955328662406,146.1560748884903],"label":"Wakeboarding"}]},"YWu9gYCBAas":{"duration":216.109,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=YWu9gYCBAas","annotations":[{"segment":[17.868533930472864,192.84530788143525],"label":"Cleaning sink"}]},"GWJw2jR2mTY":{"duration":4.598,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=GWJw2jR2mTY","annotations":[{"segment":[0.19999999999999998,4.39755],"label":"Mooping floor"}]},"eCd8x8KqxKQ":{"duration":56.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eCd8x8KqxKQ","annotations":[{"segment":[28.18529251170047,30.02061388455538],"label":"Playing blackjack"},{"segment":[31.418953978159127,35.87616302652106],"label":"Playing blackjack"}]},"2Xfigl8xrXc":{"duration":57.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2Xfigl8xrXc","annotations":[{"segment":[3.644262849453978,7.9633891895475815],"label":"Shaving legs"},{"segment":[30.402076319999995,41.16667292901716],"label":"Shaving legs"}]},"ACnOpQNrhqs":{"duration":189.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ACnOpQNrhqs","annotations":[{"segment":[1.7718165585023402,189.289069],"label":"Volleyball"}]},"6g80a1NnftU":{"duration":128.43,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=6g80a1NnftU","annotations":[{"segment":[3.0552371243041683,73.07108470997395],"label":"Washing hands"}]},"GBNj2k2OVGo":{"duration":226.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GBNj2k2OVGo","annotations":[{"segment":[4.599470923054609,225.0202687301092],"label":"Playing saxophone"}]},"fK8JkYW4cM8":{"duration":148.38,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=fK8JkYW4cM8","annotations":[]},"x39KxoQHEp4":{"duration":238.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x39KxoQHEp4","annotations":[{"segment":[0.01,238.241],"label":"River tubing"}]},"FqlXey9KFZA":{"duration":49.552,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FqlXey9KFZA","annotations":[{"segment":[0,49.501],"label":"Fixing the roof"}]},"qlvW6jW2mfQ":{"duration":123.206,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qlvW6jW2mfQ","annotations":[{"segment":[9.41817299358641,40.36359854394176],"label":"Playing drums"},{"segment":[41.1324289923978,44.39995839833593],"label":"Playing drums"},{"segment":[45.937619295248,48.43631825273011],"label":"Playing drums"},{"segment":[51.70384765866825,65.73500334299086],"label":"Playing drums"},{"segment":[69.77136299063962,79.18953595631825],"label":"Playing drums"},{"segment":[80.34278162558502,81.49602729485179],"label":"Playing drums"},{"segment":[81.88044251794072,83.61031102184087],"label":"Playing drums"},{"segment":[83.9947262449298,92.06744592979719],"label":"Playing drums"},{"segment":[113.40249081123244,118.39988871138846],"label":"Playing drums"}]},"yE5euaQmcXw":{"duration":100.705,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yE5euaQmcXw","annotations":[{"segment":[11.149658346333853,36.589723868954756],"label":"Curling"}]},"FkKgmF1qeIA":{"duration":182.068,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FkKgmF1qeIA","annotations":[]},"bvnXdr-Hre4":{"duration":89.14,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bvnXdr-Hre4","annotations":[{"segment":[4.865273010920437,86.04582839313572],"label":"Springboard diving"}]},"gEqT50dX-iY":{"duration":76.86,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=gEqT50dX-iY","annotations":[{"segment":[54.25626633385335,57.49365681123245],"label":"Cleaning windows"}]},"JAkb4JGhs94":{"duration":187.04,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=JAkb4JGhs94","annotations":[]},"9wpV8Gknh1E":{"duration":126.619,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=9wpV8Gknh1E","annotations":[{"segment":[5.827183220662159,106.37078607150285],"label":"Camel ride"}]},"kWdIYqh6kEo":{"duration":193.167,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=kWdIYqh6kEo","annotations":[{"segment":[7.522071757051729,61.380105537542114],"label":"Playing ice hockey"},{"segment":[68.60129442431177,75.52160044079938],"label":"Playing ice hockey"},{"segment":[78.22954627333799,93.27368978744146],"label":"Playing ice hockey"},{"segment":[103.80459024731388,124.26462542649459],"label":"Playing ice hockey"},{"segment":[127.57433699959734,137.80435458918768],"label":"Playing ice hockey"},{"segment":[155.85732680611184,170.60058592990654],"label":"Playing ice hockey"},{"segment":[156.45909115264797,192.966801],"label":"Playing ice hockey"}]},"bp1blshSKJU":{"duration":118.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bp1blshSKJU","annotations":[{"segment":[40.591263650546026,98.39469578783152],"label":"Playing piano"}]},"u9IwWaXtQlw":{"duration":144.03,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u9IwWaXtQlw","annotations":[]},"UyThFVEvNsI":{"duration":61.12,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=UyThFVEvNsI","annotations":[{"segment":[13.054753510140406,24.108413416536663],"label":"Playing badminton"}]},"ffyPN2-EDg4":{"duration":108.16,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ffyPN2-EDg4","annotations":[{"segment":[6.997918595163807,91.20315192],"label":"Shoveling snow"}]},"Q_v6MdHJ7Go":{"duration":37.872,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q_v6MdHJ7Go","annotations":[{"segment":[10.804316912321287,32.30691037026521],"label":"Kneeling"}]},"HwM3ionUBsg":{"duration":27.56,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HwM3ionUBsg","annotations":[{"segment":[6.09526159,22.144264859594383],"label":"Using uneven bars"}]},"FT_34R0dZnA":{"duration":152.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FT_34R0dZnA","annotations":[{"segment":[68.44820503630595,111.79397139144614],"label":"Canoeing"}]},"SaiY5Fnk2kw":{"duration":63.577,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SaiY5Fnk2kw","annotations":[]},"hGKKovntCUQ":{"duration":66.71,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hGKKovntCUQ","annotations":[]},"X5_KlfVvi6E":{"duration":146.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=X5_KlfVvi6E","annotations":[{"segment":[0.010000000000000002,136.7054297971919],"label":"Playing flauta"}]},"DAxBgyhuhyU":{"duration":31.626,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=DAxBgyhuhyU","annotations":[]},"9a6C5rvS56A":{"duration":78.74,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=9a6C5rvS56A","annotations":[{"segment":[19.2435672,61.00384026],"label":"Hopscotch"}]},"swmNnPkPBek":{"duration":169.135,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=swmNnPkPBek","annotations":[{"segment":[5.404836193447737,9.623244929797192],"label":"Kite flying"},{"segment":[24.12402496099844,30.18798751950078],"label":"Kite flying"},{"segment":[143.55772230889235,155.4219968798752],"label":"Kite flying"}]},"gkwLhml3mnA":{"duration":236.17000000000002,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=gkwLhml3mnA","annotations":[]},"SWbUJh4XuMQ":{"duration":73.724,"subset":"training","resolution":"854x468","url":"https://www.youtube.com/watch?v=SWbUJh4XuMQ","annotations":[{"segment":[0.2,72.60605304212169],"label":"Welding"}]},"ZTtPztql5jM":{"duration":125.53,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ZTtPztql5jM","annotations":[{"segment":[18.408026452418095,21.149647413416538],"label":"Discus throw"},{"segment":[41.51597455226209,45.23674585647426],"label":"Discus throw"},{"segment":[58.55319052418097,76.96121697659908],"label":"Discus throw"},{"segment":[83.81526937909517,87.73187075195008],"label":"Discus throw"}]},"NnMMEFglHBQ":{"duration":158.9,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NnMMEFglHBQ","annotations":[{"segment":[20.21216848673947,129.53674456],"label":"Vacuuming floor"}]},"inw7wXFL9dw":{"duration":204.87,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=inw7wXFL9dw","annotations":[{"segment":[46.02369002515203,178.02218866926677],"label":"Beer pong"}]},"WPYQ27GjFKc":{"duration":194.514,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WPYQ27GjFKc","annotations":[]},"NxnEKnqWLCI":{"duration":183.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NxnEKnqWLCI","annotations":[{"segment":[28.70586689590032,167.52229785517952],"label":"Windsurfing"}]},"arRoR5Hmegg":{"duration":199.46,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=arRoR5Hmegg","annotations":[]},"fnPX_0Rs4eE":{"duration":102.05,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=fnPX_0Rs4eE","annotations":[{"segment":[3.5404513516368383,95.89564929801652],"label":"Using the balance beam"}]},"oumu91uHB9A":{"duration":152.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=oumu91uHB9A","annotations":[]},"dBzWXTH5j00":{"duration":142.571,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=dBzWXTH5j00","annotations":[{"segment":[0.01,142.570497],"label":"Rafting"}]},"2G8vCitCz_0":{"duration":141.18,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=2G8vCitCz_0","annotations":[]},"HytB88Fhqw8":{"duration":96.5,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HytB88Fhqw8","annotations":[{"segment":[0.3010986942879076,6.774720514820592],"label":"Wrapping presents"},{"segment":[56.30545494539781,85.81312652106084],"label":"Wrapping presents"}]},"4fEY6zIq8bQ":{"duration":219.661,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=4fEY6zIq8bQ","annotations":[{"segment":[15.760633385335415,219.621],"label":"Plastering"}]},"9-yA1F4il_A":{"duration":16.835,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=9-yA1F4il_A","annotations":[{"segment":[0.01,14.756572542901715],"label":"Blowing leaves"}]},"mjbzWcSeiwQ":{"duration":88.747,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mjbzWcSeiwQ","annotations":[{"segment":[20.75686427457098,28.09095631825273],"label":"Playing ice hockey"},{"segment":[31.96557098283931,53.137572542901715],"label":"Playing ice hockey"},{"segment":[57.1505663026521,67.11386115444617],"label":"Playing ice hockey"}]},"hg8WbkmC2nU":{"duration":214.901,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hg8WbkmC2nU","annotations":[{"segment":[33.22659137723755,37.24299250466672],"label":"Making a cake"}]},"JkxO6IhuhOs":{"duration":166.73,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=JkxO6IhuhOs","annotations":[{"segment":[98.87643527509809,136.6056011700468],"label":"Bungee jumping"}]},"6hjRnngC73o":{"duration":119.165,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=6hjRnngC73o","annotations":[{"segment":[29.92465054602184,100.18252574102965],"label":"Waxing skis"}]},"Ny8NDMWfGJk":{"duration":196.418,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ny8NDMWfGJk","annotations":[{"segment":[43.833691001456515,55.45963183261764],"label":"Changing car wheel"},{"segment":[86.97205040129121,93.70285825091081],"label":"Changing car wheel"},{"segment":[101.04555772322311,104.71690745937926],"label":"Changing car wheel"}]},"pBaeRTgaNBM":{"duration":123.647,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pBaeRTgaNBM","annotations":[{"segment":[0.01,123.541],"label":"Using the rowing machine"}]},"zCND0HJq6Iw":{"duration":75.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zCND0HJq6Iw","annotations":[{"segment":[7.314973962870514,66.54266637191887],"label":"Playing bagpipes"}]},"M4db4WfqDoE":{"duration":163.074,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M4db4WfqDoE","annotations":[{"segment":[36.88875576443058,125.42176959906398],"label":"Snowboarding"},{"segment":[128.2202269329173,143.48453966302654],"label":"Snowboarding"}]},"xpKAvKrrBDs":{"duration":153.856,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xpKAvKrrBDs","annotations":[{"segment":[82.79000435419113,106.47847485281048],"label":"Fun sliding down"}]},"EZsumzBA68I":{"duration":127.733,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=EZsumzBA68I","annotations":[{"segment":[0.2,102.30558154113409],"label":"Painting fence"}]},"fFoYCI-Si6s":{"duration":81.34,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=fFoYCI-Si6s","annotations":[{"segment":[30.074051704925342,71.44173305148206],"label":"Doing kickboxing"}]},"PiDMuIRtgZo":{"duration":49.947,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PiDMuIRtgZo","annotations":[{"segment":[0.5454230624024962,48.85432287519501],"label":"Playing ice hockey"}]},"COuOkc3U-sQ":{"duration":180.98,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=COuOkc3U-sQ","annotations":[]},"Ogx1hUoOSrE":{"duration":176.588,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Ogx1hUoOSrE","annotations":[{"segment":[19.829878315132607,147.63915132605305],"label":"Gargling mouthwash"}]},"Prt07JFztMI":{"duration":9.683,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Prt07JFztMI","annotations":[{"segment":[0.2,9.441],"label":"Powerbocking"}]},"ZY0cdXr_1MA":{"duration":60.12,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZY0cdXr_1MA","annotations":[]},"FGKdmKX1z64":{"duration":108.09,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=FGKdmKX1z64","annotations":[{"segment":[26.338883839999998,63.436847199999995],"label":"Cleaning windows"}]},"5R3h6lxne90":{"duration":197.649,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=5R3h6lxne90","annotations":[{"segment":[36.75009299301433,62.60442944161246],"label":"Putting in contact lenses"},{"segment":[75.53159766591152,85.68865841357508],"label":"Putting in contact lenses"},{"segment":[99.53919579675264,117.69878925469658],"label":"Putting in contact lenses"},{"segment":[136.16617243226668,141.10508832016467],"label":"Putting in contact lenses"},{"segment":[144.49111791518024,149.41625187156654],"label":"Putting in contact lenses"},{"segment":[161.113445017984,168.50114595256343],"label":"Putting in contact lenses"}]},"jEGTpBJrD4s":{"duration":188.105,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jEGTpBJrD4s","annotations":[{"segment":[7.042928247742155,16.726954588387617],"label":"Playing congas"},{"segment":[29.05207902193639,33.74736452043116],"label":"Playing congas"},{"segment":[52.23505117075431,179.30121497376902],"label":"Playing congas"}]},"GhVERbQMZWQ":{"duration":142.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GhVERbQMZWQ","annotations":[{"segment":[32.641655502370725,37.970905933491856],"label":"Wakeboarding"},{"segment":[45.964781580173536,46.18683331470876],"label":"Wakeboarding"},{"segment":[52.40429210164499,55.06891729604624],"label":"Wakeboarding"},{"segment":[58.177646689514354,63.72894917785028],"label":"Wakeboarding"},{"segment":[66.1715222727181,68.16999116851902],"label":"Wakeboarding"},{"segment":[71.0566684624537,77.27412724938993],"label":"Wakeboarding"},{"segment":[79.71670034425775,82.60337763819243],"label":"Wakeboarding"},{"segment":[105.03063969106957,117.2435051654086],"label":"Wakeboarding"}]},"FofBEedm80o":{"duration":205.99,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=FofBEedm80o","annotations":[{"segment":[9.31069466996367,201.3036389063963],"label":"Tai chi"}]},"3j2d27w3x5Q":{"duration":194.44,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3j2d27w3x5Q","annotations":[{"segment":[49.44509742901716,188.68006503588143],"label":"Preparing pasta"}]},"HrKO4BfXVbk":{"duration":180.42,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=HrKO4BfXVbk","annotations":[{"segment":[49.917926900000005,150.0846264],"label":"Playing saxophone"}]},"4KzG9JBE9V8":{"duration":25.542,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=4KzG9JBE9V8","annotations":[{"segment":[0.2383271028037383,23.17561214953271],"label":"Snow tubing"}]},"jfhKC2WFDTo":{"duration":179.31,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jfhKC2WFDTo","annotations":[{"segment":[26.294178742589704,176.78639324804993],"label":"Cheerleading"}]},"Snw8Jf1DQwg":{"duration":101.286,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Snw8Jf1DQwg","annotations":[{"segment":[9.32268489703588,101.08544217687074],"label":"Baton twirling"}]},"3TsNntqwbSQ":{"duration":43.422,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3TsNntqwbSQ","annotations":[{"segment":[5.073439937597503,25.10819188767551],"label":"Camel ride"}]},"IdhpB7doBOE":{"duration":166.91,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=IdhpB7doBOE","annotations":[{"segment":[26.3976948,84.4372395],"label":"Ballet"},{"segment":[107.80393950000001,140.77936035000002],"label":"Ballet"}]},"DrEsyzI2u4c":{"duration":37.501,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DrEsyzI2u4c","annotations":[{"segment":[0.009999999999999998,37.461],"label":"Kneeling"}]},"LgoMRWkBDkQ":{"duration":20.016,"subset":"training","resolution":"240x176","url":"https://www.youtube.com/watch?v=LgoMRWkBDkQ","annotations":[{"segment":[3.091332762870515,7.775170282371295],"label":"Removing ice from car"},{"segment":[10.179540209048364,19.234959413416536],"label":"Removing ice from car"}]},"pJnMBfgK9Gw":{"duration":154.227,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pJnMBfgK9Gw","annotations":[]},"xbA294b3Z5c":{"duration":159.869,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=xbA294b3Z5c","annotations":[{"segment":[52.60417628705148,79.52953666146645],"label":"Putting in contact lenses"}]},"FmDGejzydo8":{"duration":203.965,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FmDGejzydo8","annotations":[{"segment":[21.921373259584207,192.5268418691589],"label":"Longboarding"}]},"4oi936sw1Mo":{"duration":159.498,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4oi936sw1Mo","annotations":[]},"fkmYC91-biI":{"duration":43.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fkmYC91-biI","annotations":[{"segment":[3.324002809672387,19.30810112],"label":"Smoking hookah"},{"segment":[22.00571164,42.93789196411856],"label":"Smoking hookah"}]},"2qcdjyT7nDY":{"duration":225.75,"subset":"training","resolution":"658x480","url":"https://www.youtube.com/watch?v=2qcdjyT7nDY","annotations":[{"segment":[7.788168,201.20485149687988],"label":"Cheerleading"}]},"bu7zk-YxEoY":{"duration":128.85,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bu7zk-YxEoY","annotations":[{"segment":[5.426157566302652,94.85727301092044],"label":"Using the balance beam"}]},"-4VuHlphgL4":{"duration":157.525,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-4VuHlphgL4","annotations":[{"segment":[56.90902180685358,157.281],"label":"Painting furniture"}]},"l4YcjFl6SDc":{"duration":172.85,"subset":"training","resolution":"1280x716","url":"https://www.youtube.com/watch?v=l4YcjFl6SDc","annotations":[{"segment":[0.01,172.84918367346938],"label":"Playing pool"}]},"lKDTjsH9XtU":{"duration":232.641,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lKDTjsH9XtU","annotations":[{"segment":[147.2546995007503,159.91788052225166],"label":"Fun sliding down"}]},"RotRDzNBDRM":{"duration":222.099,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RotRDzNBDRM","annotations":[]},"Wzpyu5NEqZ4":{"duration":202.32,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Wzpyu5NEqZ4","annotations":[{"segment":[36.44484477379095,39.91578237129485],"label":"Playing squash"},{"segment":[53.799532761310445,56.639390795631826],"label":"Playing squash"}]},"_pYDg0B6_tw":{"duration":19.85,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_pYDg0B6_tw","annotations":[{"segment":[3.784886895475819,19.29519890795632],"label":"Springboard diving"}]},"PtoVL69LEh4":{"duration":148.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PtoVL69LEh4","annotations":[{"segment":[35.7683615099706,57.0673211146133],"label":"Getting a tattoo"}]},"u_HDCcby_B0":{"duration":117.517,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u_HDCcby_B0","annotations":[{"segment":[7.0431941869032695,13.984901807432042],"label":"Disc dog"},{"segment":[35.17537770167777,66.96109154304635],"label":"Disc dog"}]},"3MJQEQ98168":{"duration":24.103,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3MJQEQ98168","annotations":[{"segment":[0.2,15.008424336973478],"label":"Using the monkey bar"}]},"OaFYMXKxTbk":{"duration":144.823,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OaFYMXKxTbk","annotations":[{"segment":[7.455778916870961,124.94077904524183],"label":"Skiing"}]},"86lVBoc5Ihs":{"duration":119.745,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=86lVBoc5Ihs","annotations":[]},"fMyTTjKV-1U":{"duration":186.665,"subset":"testing","resolution":"320x180","url":"https://www.youtube.com/watch?v=fMyTTjKV-1U","annotations":[]},"Ocid5pcnSVY":{"duration":234.03,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ocid5pcnSVY","annotations":[]},"4o8MaHTb7E4":{"duration":188.45,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=4o8MaHTb7E4","annotations":[{"segment":[6.900421216848674,169.63150828],"label":"Triple jump"}]},"zxpQHLwZCug":{"duration":34.81,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zxpQHLwZCug","annotations":[]},"4YGuIycW_IA":{"duration":100.357,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4YGuIycW_IA","annotations":[{"segment":[56.5190956236889,100.35662131519274],"label":"Applying sunscreen"}]},"gHYcZYkrxxQ":{"duration":137.764,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gHYcZYkrxxQ","annotations":[{"segment":[3.0088859173414555,137.54907050703798],"label":"Removing ice from car"}]},"-s1MQAiOMeY":{"duration":125.9,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=-s1MQAiOMeY","annotations":[{"segment":[31.823237459999994,71.64603384],"label":"Sailing"},{"segment":[31.823237459999998,71.64603384],"label":"Sailing"}]},"tTkavaWq0QM":{"duration":181.98,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tTkavaWq0QM","annotations":[{"segment":[24.48642144,169.19964369731252],"label":"Tango"}]},"yvzlX5St_Ok":{"duration":30.303,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=yvzlX5St_Ok","annotations":[{"segment":[0.01,30.302039],"label":"Riding bumper cars"}]},"5TjIJOFGupI":{"duration":67.62,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5TjIJOFGupI","annotations":[{"segment":[0,67.581],"label":"Making a sandwich"}]},"ybAEMliC7p4":{"duration":18.414,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=ybAEMliC7p4","annotations":[{"segment":[1.0772283346333853,16.503138086583466],"label":"Playing rubik cube"}]},"osLHgm_yuMc":{"duration":132.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=osLHgm_yuMc","annotations":[{"segment":[8.077940186841674,128.81696533793203],"label":"Paintball"}]},"aTl4KzTuJoU":{"duration":41.402,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=aTl4KzTuJoU","annotations":[{"segment":[15.824163198127925,41.401178],"label":"Waterskiing"}]},"3zPoB-_JxEc":{"duration":42.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3zPoB-_JxEc","annotations":[{"segment":[3.1470826333853354,37.96168426521061],"label":"Smoking a cigarette"}]},"iOpoR0UeJNg":{"duration":77.78999999999999,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=iOpoR0UeJNg","annotations":[{"segment":[69.25134321372855,77.741],"label":"Using uneven bars"}]},"WCCkmuFrSQ0":{"duration":185.85,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=WCCkmuFrSQ0","annotations":[{"segment":[3.479298714805735,122.35533813733502],"label":"Hopscotch"}]},"62BPME-ikJU":{"duration":168.902,"subset":"validation","resolution":"854x270","url":"https://www.youtube.com/watch?v=62BPME-ikJU","annotations":[{"segment":[72.08587902035165,156.53685304517134],"label":"Cutting the grass"}]},"j1XZ3FA8EYY":{"duration":199.32,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=j1XZ3FA8EYY","annotations":[{"segment":[0.01,199.40266666666668],"label":"Futsal"}]},"c7pbA5pQPYE":{"duration":157.339,"subset":"testing","resolution":"704x480","url":"https://www.youtube.com/watch?v=c7pbA5pQPYE","annotations":[]},"ufP69gKRisM":{"duration":90.58,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=ufP69gKRisM","annotations":[]},"Geh_BZchxFY":{"duration":58.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Geh_BZchxFY","annotations":[{"segment":[0,58.049885],"label":"Peeling potatoes"}]},"NpsOCOnQS6c":{"duration":49.111,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=NpsOCOnQS6c","annotations":[{"segment":[6.9917019145424035,45.239523565632744],"label":"Painting fence"}]},"8FSKFy1tPQc":{"duration":123.02,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=8FSKFy1tPQc","annotations":[{"segment":[15.7372347628599,31.4744695257198],"label":"Playing polo"},{"segment":[37.23199443896123,58.34291912084647],"label":"Playing polo"},{"segment":[63.716609039871805,71.20139142708565],"label":"Playing polo"},{"segment":[76.57508094383775,87.13054322932918],"label":"Playing polo"},{"segment":[92.31231562402496,106.32229283931358],"label":"Playing polo"}]},"iPiP-7UwAa4":{"duration":227.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iPiP-7UwAa4","annotations":[{"segment":[11.161860374414976,221.2883112324493],"label":"Cheerleading"}]},"NcTZ3wgdNOQ":{"duration":212.23,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=NcTZ3wgdNOQ","annotations":[{"segment":[29.791965678627143,206.5576287051482],"label":"Grooming horse"}]},"dsCJ4xdRq-Q":{"duration":28.863,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dsCJ4xdRq-Q","annotations":[{"segment":[4.097470717876335,19.406700876755067],"label":"Playing rubik cube"}]},"4ACqWG_p1bI":{"duration":192.285,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4ACqWG_p1bI","annotations":[{"segment":[12.898956491592996,15.298762350494018],"label":"Playing drums"},{"segment":[35.39713641879008,43.49648119258104],"label":"Playing drums"},{"segment":[53.69565609291038,59.695170740162936],"label":"Playing drums"},{"segment":[98.69201594730455,128.38961345120472],"label":"Playing drums"},{"segment":[161.38694363806553,167.9864097347894],"label":"Playing drums"},{"segment":[183.58514778159127,192.08444400000002],"label":"Playing drums"}]},"sYhUC2uQR7I":{"duration":230.41,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sYhUC2uQR7I","annotations":[{"segment":[28.037603288611546,217.1116972605304],"label":"Doing kickboxing"}]},"0ysVELHeEyc":{"duration":165.443,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0ysVELHeEyc","annotations":[{"segment":[13.417865834633387,155.83482683307332],"label":"Painting fence"}]},"URYKdg-MG6g":{"duration":200.11,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=URYKdg-MG6g","annotations":[]},"h1d-jcsWap8":{"duration":87.052,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=h1d-jcsWap8","annotations":[{"segment":[30.98359282371295,50.872624024961],"label":"Peeling potatoes"}]},"dcEdjqyHj8M":{"duration":62.53,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dcEdjqyHj8M","annotations":[{"segment":[0,52.19053283151326],"label":"Chopping wood"}]},"peoMXSP3VgY":{"duration":84.986,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=peoMXSP3VgY","annotations":[{"segment":[10.208810516380655,15.379506751950078],"label":"Longboarding"},{"segment":[27.17930072542902,53.297945812792506],"label":"Longboarding"},{"segment":[69.8706901575663,79.6817548096724],"label":"Longboarding"}]},"ZFJkIiqOErk":{"duration":124.18,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZFJkIiqOErk","annotations":[{"segment":[0,123.98031746031745],"label":"Dodgeball"}]},"R3YoG3Hi4iI":{"duration":169.054,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R3YoG3Hi4iI","annotations":[{"segment":[31.956560175213365,39.07132653035355],"label":"Longboarding"},{"segment":[59.888605865763736,74.1181385760441],"label":"Longboarding"},{"segment":[102.31369413159968,111.009519676771],"label":"Longboarding"},{"segment":[125.50256225205656,129.19170036212924],"label":"Longboarding"},{"segment":[134.98891739224348,142.10368374738368],"label":"Longboarding"}]},"HUZ9PuMm8yM":{"duration":207.84,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=HUZ9PuMm8yM","annotations":[{"segment":[0.01,187.04791731669266],"label":"Springboard diving"}]},"1UQYk6ohuRE":{"duration":177.076,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1UQYk6ohuRE","annotations":[{"segment":[0.01,165.1272979719189],"label":"Trimming branches or hedges"}]},"2uBPhFis_4Y":{"duration":48.739,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2uBPhFis_4Y","annotations":[{"segment":[0.2,48.480999999999995],"label":"Playing congas"}]},"-uZnWuW8fB8":{"duration":52.52,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=-uZnWuW8fB8","annotations":[]},"X5_bftRM7BQ":{"duration":114.31,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=X5_bftRM7BQ","annotations":[]},"ih3LAZtQM50":{"duration":52.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ih3LAZtQM50","annotations":[{"segment":[0.01,46.25222858268331],"label":"Doing karate"}]},"DrigU09Wf7k":{"duration":150.977,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DrigU09Wf7k","annotations":[{"segment":[41.68918197544228,54.87897966258787],"label":"Surfing"},{"segment":[134.25329788701754,150.7760770975057],"label":"Surfing"}]},"3zT7x5jZEfI":{"duration":230.57999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3zT7x5jZEfI","annotations":[{"segment":[19.017160686427456,225.33541341653665],"label":"Volleyball"}]},"22IqnOqbTaE":{"duration":15.883,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=22IqnOqbTaE","annotations":[]},"YAiCO8en_ls":{"duration":216.41,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YAiCO8en_ls","annotations":[{"segment":[10.972424747330027,158.1717228961267],"label":"Disc dog"}]},"CcBPELJaRlk":{"duration":220.055,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CcBPELJaRlk","annotations":[{"segment":[18.538217691107644,191.56158280811232],"label":"Rafting"}]},"PUWg7fXnCf0":{"duration":133.468,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PUWg7fXnCf0","annotations":[{"segment":[10.378540456359033,108.351992263513],"label":"Rope skipping"}]},"_I1DKeGg_w8":{"duration":135.489,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=_I1DKeGg_w8","annotations":[{"segment":[0.2,131.6896941954352],"label":"Throwing darts"}]},"pv6aKuS_TkU":{"duration":122.439,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pv6aKuS_TkU","annotations":[]},"KTZcBhW4RHY":{"duration":18.553,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=KTZcBhW4RHY","annotations":[]},"C1v3-D1kfwU":{"duration":53.453,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=C1v3-D1kfwU","annotations":[]},"QJKFphS_3UQ":{"duration":118.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QJKFphS_3UQ","annotations":[{"segment":[96.52708867550702,100.95663002964119],"label":"Playing flauta"}]},"tD30qafrkhM":{"duration":162.07999999999998,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=tD30qafrkhM","annotations":[{"segment":[63.45366146645866,76.34663650546021],"label":"Smoking a cigarette"}]},"rVcgTALWCRA":{"duration":25.751,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=rVcgTALWCRA","annotations":[{"segment":[0.01,25.75092970521542],"label":"Building sandcastles"}]},"Rte9HqaQ1tw":{"duration":80.9,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Rte9HqaQ1tw","annotations":[{"segment":[23.85301537775797,79.13143196748278],"label":"Washing dishes"}]},"P_zz379qSuo":{"duration":123.925,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P_zz379qSuo","annotations":[{"segment":[19.33301849080766,90.2851952402496],"label":"Slacklining"}]},"VTmGlaJABNA":{"duration":236.89,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VTmGlaJABNA","annotations":[]},"ClE77VhGQsk":{"duration":60.001,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=ClE77VhGQsk","annotations":[{"segment":[0.2,56.85992056074767],"label":"Elliptical trainer"}]},"7eL_xnoyYco":{"duration":183.654,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7eL_xnoyYco","annotations":[]},"dRRht9vdYDE":{"duration":101.797,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dRRht9vdYDE","annotations":[{"segment":[6.669959063962558,20.48630283931357],"label":"Curling"},{"segment":[26.203410608424335,29.538390140405614],"label":"Curling"},{"segment":[26.203410608424335,32.238135475819035],"label":"Curling"},{"segment":[90.99729865834634,101.59628],"label":"Curling"}]},"R246xMs2aig":{"duration":191.01,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R246xMs2aig","annotations":[{"segment":[0.01,186.88708268330734],"label":"Belly dance"}]},"QP2IYVyoq5I":{"duration":84.94,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QP2IYVyoq5I","annotations":[]},"dhsSq0K1xEg":{"duration":20.32,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=dhsSq0K1xEg","annotations":[]},"tgPD2SZo_DY":{"duration":36.943,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tgPD2SZo_DY","annotations":[]},"IAozAC1YFr4":{"duration":23.755,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IAozAC1YFr4","annotations":[]},"QRdQ8KIVf40":{"duration":195.582,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=QRdQ8KIVf40","annotations":[{"segment":[0.01,195.581677],"label":"Playing pool"}]},"H8aW-6HhMBA":{"duration":137.09,"subset":"training","resolution":"490x360","url":"https://www.youtube.com/watch?v=H8aW-6HhMBA","annotations":[{"segment":[13.3964348,88.985119],"label":"Drinking beer"},{"segment":[0.2,136.89013500000001],"label":"Drinking beer"},{"segment":[89.39731112324493,92.17761027301091],"label":"Drinking beer"}]},"EmlhHfW5xCM":{"duration":192.215,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EmlhHfW5xCM","annotations":[]},"FIw076A69Oc":{"duration":61.07,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FIw076A69Oc","annotations":[{"segment":[3.04865823650546,52.3988134399376],"label":"Using parallel bars"}]},"NVTXHst6uh4":{"duration":88.329,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NVTXHst6uh4","annotations":[]},"Hub83eTB8mM":{"duration":21.224,"subset":"training","resolution":"204x360","url":"https://www.youtube.com/watch?v=Hub83eTB8mM","annotations":[{"segment":[13.588287051482059,21.141],"label":"Plastering"}]},"w3DvA9405_o":{"duration":68.01,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=w3DvA9405_o","annotations":[{"segment":[3.0227784711388455,64.64503432137286],"label":"Playing harmonica"}]},"NyNuB328oi0":{"duration":33.46,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NyNuB328oi0","annotations":[{"segment":[3.0330333400000002,12.539392355694227],"label":"Playing kickball"}]},"c8HFew22GnI":{"duration":165.011,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c8HFew22GnI","annotations":[{"segment":[38.101572542901714,135.67249141965678],"label":"Wakeboarding"}]},"JMrfe-UVA0Y":{"duration":72.934,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=JMrfe-UVA0Y","annotations":[{"segment":[0.01,63.37623858346334],"label":"Clipping cat claws"}]},"D-0MV6LRvbs":{"duration":186.619,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D-0MV6LRvbs","annotations":[{"segment":[143.90758558810464,152.04333013639123],"label":"Changing car wheel"}]},"N5LZMH9mZVU":{"duration":191.29,"subset":"testing","resolution":"468x270","url":"https://www.youtube.com/watch?v=N5LZMH9mZVU","annotations":[]},"frWLB7ZxMNI":{"duration":76.116,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=frWLB7ZxMNI","annotations":[{"segment":[4.747644305772231,69.7903712948518],"label":"Playing pool"}]},"4R0tKohd7OI":{"duration":62.091,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4R0tKohd7OI","annotations":[]},"NzrOOXRyDPM":{"duration":221.263,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=NzrOOXRyDPM","annotations":[{"segment":[7.937726989079564,210.54135725429018],"label":"Bullfighting"}]},"an1R4BP97JY":{"duration":141.224,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=an1R4BP97JY","annotations":[]},"h4N22DApP_Y":{"duration":213.067,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=h4N22DApP_Y","annotations":[{"segment":[97.05974132917316,113.679560049922],"label":"Playing rubik cube"},{"segment":[121.32467666146647,124.9810367800312],"label":"Playing rubik cube"},{"segment":[129.96698239625584,139.2740808798752],"label":"Playing rubik cube"},{"segment":[145.92200836817474,157.8882778471139],"label":"Playing rubik cube"},{"segment":[171.18413282371296,182.15321317940717],"label":"Playing rubik cube"},{"segment":[186.4743660468019,194.45187903276133],"label":"Playing rubik cube"}]},"NYRlfaKwTag":{"duration":170.92000000000002,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NYRlfaKwTag","annotations":[{"segment":[0.01,168.7888932047078],"label":"Getting a tattoo"}]},"f0CVl853HUg":{"duration":42.4,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=f0CVl853HUg","annotations":[{"segment":[3.9727863,12.19098447],"label":"Hand washing clothes"},{"segment":[15.79998735,34.75361232],"label":"Hand washing clothes"}]},"QZ9y5S-lnes":{"duration":19.85,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=QZ9y5S-lnes","annotations":[]},"I4T6Bf528Zk":{"duration":92.973,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=I4T6Bf528Zk","annotations":[]},"JDM9Akcs96g":{"duration":14.07,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=JDM9Akcs96g","annotations":[{"segment":[0.01,13.65420272321097],"label":"Bathing dog"}]},"n0meVEbpXak":{"duration":25.08,"subset":"training","resolution":"544x360","url":"https://www.youtube.com/watch?v=n0meVEbpXak","annotations":[{"segment":[0,13.966482711388455],"label":"Doing motocross"}]},"sRNqK_QZbMs":{"duration":54.312,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=sRNqK_QZbMs","annotations":[]},"pcLGX5qvUcI":{"duration":31.022,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=pcLGX5qvUcI","annotations":[]},"FuaxI8PkeHQ":{"duration":24.94,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=FuaxI8PkeHQ","annotations":[{"segment":[10.582213042121685,16.884854633385334],"label":"Drinking coffee"}]},"an5XI45pIl8":{"duration":190.334,"subset":"validation","resolution":"636x480","url":"https://www.youtube.com/watch?v=an5XI45pIl8","annotations":[{"segment":[86.39145104677033,96.47144185050864],"label":"Playing drums"},{"segment":[102.69731852340585,125.52553299069557],"label":"Playing drums"},{"segment":[158.7302085794806,176.51842764490118],"label":"Playing drums"}]},"rfWlK9njyzg":{"duration":45.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rfWlK9njyzg","annotations":[{"segment":[28.882326411856475,41.726800811232444],"label":"Hopscotch"}]},"2fs09c8gFsk":{"duration":101.286,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2fs09c8gFsk","annotations":[{"segment":[0.01,96.30794708523035],"label":"Riding bumper cars"}]},"u0PEB9bXWpY":{"duration":167.56,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=u0PEB9bXWpY","annotations":[{"segment":[36.715945064742584,142.3815368],"label":"Doing nails"}]},"cMeMbnjX-pg":{"duration":28.143,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cMeMbnjX-pg","annotations":[{"segment":[0,12.150724988955204],"label":"Curling"}]},"43hQNmW-6_k":{"duration":170.37,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=43hQNmW-6_k","annotations":[{"segment":[24.701076360000002,164.91606311778472],"label":"Breakdancing"}]},"6LLDsbc8XMM":{"duration":101.216,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6LLDsbc8XMM","annotations":[{"segment":[0.2,69.73342389521905],"label":"Capoeira"}]},"BEA4YVQv7Wk":{"duration":157.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BEA4YVQv7Wk","annotations":[{"segment":[10.051907956318251,14.464940717628703],"label":"Springboard diving"},{"segment":[23.04583775351014,24.762017160686426],"label":"Springboard diving"},{"segment":[30.15572386895476,36.039767550702024],"label":"Springboard diving"},{"segment":[45.356170046801864,75.51189391575663],"label":"Springboard diving"},{"segment":[89.48649765990639,141.46221684867393],"label":"Springboard diving"}]},"civPiYFf2vE":{"duration":103.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=civPiYFf2vE","annotations":[{"segment":[19.323744149765993,42.99533073322933],"label":"Vacuuming floor"}]},"U40FhqwfBvs":{"duration":101.03,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=U40FhqwfBvs","annotations":[{"segment":[15.368683599999999,97.24730570514822],"label":"Preparing salad"}]},"HxJnPUqF9i8":{"duration":121.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HxJnPUqF9i8","annotations":[{"segment":[10.477378315132606,14.253010140405616],"label":"Javelin throw"},{"segment":[39.17218018720749,55.029833853354134],"label":"Javelin throw"},{"segment":[82.02560140405618,87.68904914196568],"label":"Javelin throw"},{"segment":[114.87424380655227,115.62872464898595],"label":"Javelin throw"}]},"2R_TVXvocQs":{"duration":169.23,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2R_TVXvocQs","annotations":[{"segment":[5.808084519299139,97.94542530272639],"label":"Zumba"},{"segment":[106.95654080999999,160.7783396478716],"label":"Zumba"}]},"wZeV6W1VEoM":{"duration":88.19,"subset":"training","resolution":"270x360","url":"https://www.youtube.com/watch?v=wZeV6W1VEoM","annotations":[{"segment":[0.2,5.639003120124805],"label":"Snatch"}]},"LmWcpkqUurg":{"duration":187.316,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LmWcpkqUurg","annotations":[{"segment":[0.01,187.31489795918367],"label":"Changing car wheel"}]},"66jfSBGjD2g":{"duration":133.79,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=66jfSBGjD2g","annotations":[]},"pbeQcGC3nXY":{"duration":121.232,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pbeQcGC3nXY","annotations":[{"segment":[29.288229329173163,49.128642745709826],"label":"Doing fencing"},{"segment":[82.00704212168486,97.32857254290172],"label":"Doing fencing"},{"segment":[104.32111076443059,107.72288611544462],"label":"Doing fencing"},{"segment":[113.77048673946959,115.47137441497661],"label":"Doing fencing"}]},"EL47wkhiOq8":{"duration":43.52,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EL47wkhiOq8","annotations":[{"segment":[1.8335569422776912,41.017347893915755],"label":"Playing badminton"}]},"uBPWqgUiQWA":{"duration":39.567,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=uBPWqgUiQWA","annotations":[{"segment":[9.814541901716067,18.82663698127925],"label":"Throwing darts"},{"segment":[25.616571630265206,26.9745585600624],"label":"Throwing darts"}]},"P-6ITEpg0mw":{"duration":27.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=P-6ITEpg0mw","annotations":[{"segment":[3.8980197121094315,26.16089665914993],"label":"Using uneven bars"}]},"ep2Kyk8CHT8":{"duration":152.393,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=ep2Kyk8CHT8","annotations":[{"segment":[41.47192589703589,70.22896333853355],"label":"Plastering"}]},"DXOKFXlx84M":{"duration":5.27,"subset":"training","resolution":"342x240","url":"https://www.youtube.com/watch?v=DXOKFXlx84M","annotations":[{"segment":[0.002217932573360075,5.198820736311678],"label":"Bathing dog"}]},"zfqach0dDYo":{"duration":7.106,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=zfqach0dDYo","annotations":[{"segment":[0.01,6.9004227582153135],"label":"Doing a powerbomb"}]},"j6HDZh7W6Z4":{"duration":85.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j6HDZh7W6Z4","annotations":[{"segment":[32.37651373810054,42.062928804992204],"label":"Discus throw"},{"segment":[48.03400702652106,54.80122901092044],"label":"Discus throw"},{"segment":[61.83383224960998,75.766348099844],"label":"Discus throw"},{"segment":[81.87011694851795,83.59509510140407],"label":"Discus throw"}]},"fuSTjQ46_Aw":{"duration":176.31,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=fuSTjQ46_Aw","annotations":[]},"AmW-O5MmJHk":{"duration":70.102,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=AmW-O5MmJHk","annotations":[{"segment":[13.779616324492977,70.101064],"label":"Hitting a pinata"}]},"_wo35butdOc":{"duration":19.85,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_wo35butdOc","annotations":[{"segment":[6.411206583463338,8.73410751950078],"label":"Discus throw"}]},"12IrPzohRSw":{"duration":113.593,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=12IrPzohRSw","annotations":[{"segment":[0.2,105.64212988163662],"label":"Riding bumper cars"}]},"NJuf9o45A8U":{"duration":67.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NJuf9o45A8U","annotations":[{"segment":[3.8962505975039003,59.07558338377535],"label":"Snowboarding"}]},"xJNqBSJ7rqU":{"duration":177.87,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=xJNqBSJ7rqU","annotations":[{"segment":[20.533539926631082,176.19996939937596],"label":"Playing field hockey"}]},"oR8o_PuKS28":{"duration":29.91,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=oR8o_PuKS28","annotations":[]},"kXP1tgr476c":{"duration":65.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kXP1tgr476c","annotations":[{"segment":[5.214775979999999,15.704964869999998],"label":"Polishing shoes"},{"segment":[20.55657128,60.63175897035882],"label":"Polishing shoes"}]},"2GSSuYAWFc8":{"duration":125.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2GSSuYAWFc8","annotations":[{"segment":[5.24259376,96.70414196567863],"label":"Getting a piercing"}]},"stqHuZqLLuI":{"duration":67.292,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=stqHuZqLLuI","annotations":[]},"wz8d9DKMrTc":{"duration":137.811,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wz8d9DKMrTc","annotations":[{"segment":[0.01,137.03768343782252],"label":"Drum corps"}]},"u9oARzRT3fI":{"duration":23.38,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=u9oARzRT3fI","annotations":[{"segment":[0,23.335],"label":"Using uneven bars"}]},"slQuWp_rMTE":{"duration":7.872,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=slQuWp_rMTE","annotations":[{"segment":[0.2,7.641],"label":"Canoeing"}]},"WX1TLuHXCd8":{"duration":89.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WX1TLuHXCd8","annotations":[{"segment":[41.006085840000004,53.189300429999996],"label":"Playing lacrosse"},{"segment":[14.97174327,28.67034555],"label":"Playing lacrosse"}]},"nMTyJxJIZUs":{"duration":68.71000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nMTyJxJIZUs","annotations":[{"segment":[0.01,68.70784580498866],"label":"Doing nails"}]},"kyObhFkHrak":{"duration":7.361,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=kyObhFkHrak","annotations":[{"segment":[0.01,6.903634596839501],"label":"Doing a powerbomb"}]},"DFOoFr2H-24":{"duration":66.69,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=DFOoFr2H-24","annotations":[{"segment":[0.3118923556942278,3.0149594383775353],"label":"Playing saxophone"},{"segment":[15.911283931357255,16.847241809672386],"label":"Playing saxophone"},{"segment":[35.46240405616224,46.58990327613105],"label":"Playing saxophone"}]},"N-KiVTbKKyI":{"duration":169.924,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N-KiVTbKKyI","annotations":[]},"e07y3QI4Kbg":{"duration":30.929,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=e07y3QI4Kbg","annotations":[{"segment":[0.2,30.728979591836737],"label":"Arm wrestling"}]},"Nsl_tnIRNEo":{"duration":156.851,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Nsl_tnIRNEo","annotations":[{"segment":[7.427183665007309,69.971885073107],"label":"Playing ten pins"},{"segment":[79.98880990799796,143.75508751547457],"label":"Playing ten pins"}]},"RhokmoZJrco":{"duration":26.29,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RhokmoZJrco","annotations":[{"segment":[0.2870435397815913,26.12096212012481],"label":"Windsurfing"}]},"NhcOmldkGIo":{"duration":175.195,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=NhcOmldkGIo","annotations":[{"segment":[5.991033039284922,160.39640073535247],"label":"Hurling"}]},"5c5PFa8GR-A":{"duration":57.49,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=5c5PFa8GR-A","annotations":[]},"53dqgEMV5e4":{"duration":60.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=53dqgEMV5e4","annotations":[{"segment":[8.432407956318253,40.18348595943838],"label":"Mixing drinks"}]},"qnl1q5Q7_qQ":{"duration":194.328,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=qnl1q5Q7_qQ","annotations":[]},"sVDRluetSyg":{"duration":125.46,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sVDRluetSyg","annotations":[{"segment":[0.01,5.480184235940866],"label":"Cleaning windows"},{"segment":[19.18064482579303,24.465108196164582],"label":"Cleaning windows"},{"segment":[31.119617625521347,39.14417311386333],"label":"Cleaning windows"},{"segment":[48.93021567862714,58.12909622620904],"label":"Cleaning windows"},{"segment":[63.21783865678627,72.41671920436816],"label":"Cleaning windows"},{"segment":[73.98248610608424,91.59736375039002],"label":"Cleaning windows"},{"segment":[92.96740978939157,104.71066155226208],"label":"Cleaning windows"}]},"B8imoIn6NUE":{"duration":124.51,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B8imoIn6NUE","annotations":[{"segment":[11.45446,106.2471951068519],"label":"Skateboarding"}]},"Tu1oKxyfHTs":{"duration":69.43,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Tu1oKxyfHTs","annotations":[{"segment":[4.6988391599999995,63.048127925117],"label":"Springboard diving"}]},"-3baQGb2zIY":{"duration":104.537,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-3baQGb2zIY","annotations":[{"segment":[0.01,0.02],"label":"Baton twirling"}]},"e-OJAoH_eeM":{"duration":88.538,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=e-OJAoH_eeM","annotations":[]},"KeP8tUI5GZg":{"duration":114.27000000000001,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=KeP8tUI5GZg","annotations":[]},"r7gYDRQvA8w":{"duration":102.12,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=r7gYDRQvA8w","annotations":[]},"8gKV8f3qSOg":{"duration":86.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8gKV8f3qSOg","annotations":[{"segment":[48.06065777847114,68.60054783463337],"label":"Washing face"}]},"aTQaYDmcMDY":{"duration":201.74,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aTQaYDmcMDY","annotations":[{"segment":[120.90927290000002,149.1217728],"label":"Doing motocross"},{"segment":[48.78143822932917,197.0140666552262],"label":"Doing motocross"}]},"0HrPpZa_xv8":{"duration":20.991,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=0HrPpZa_xv8","annotations":[{"segment":[2.521520441770052,19.680957313572545],"label":"Calf roping"}]},"jbh_K0yyJFo":{"duration":23.662,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=jbh_K0yyJFo","annotations":[{"segment":[11.728296411856473,18.219429017160685],"label":"Sumo"}]},"P3wH4l2KKrE":{"duration":144.336,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P3wH4l2KKrE","annotations":[]},"bNuRrXSjJl0":{"duration":228.76,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bNuRrXSjJl0","annotations":[{"segment":[38.5434391416473,41.75539240345124],"label":"Plataform diving"},{"segment":[75.30245980451463,78.15752937056257],"label":"Plataform diving"},{"segment":[120.9835728612818,124.19552612308574],"label":"Plataform diving"},{"segment":[161.31143048170907,164.88026743926898],"label":"Plataform diving"},{"segment":[214.84398392511702,219.12658825585024],"label":"Plataform diving"}]},"1a8PCm9e1YU":{"duration":135.326,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1a8PCm9e1YU","annotations":[{"segment":[2.111168419525897,134.90366032137285],"label":"Playing congas"}]},"LZ-Cy0GqCiU":{"duration":96.084,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LZ-Cy0GqCiU","annotations":[{"segment":[8.36813714566957,44.53045395004814],"label":"Bullfighting"},{"segment":[59.02526606356439,74.56609620975412],"label":"Bullfighting"}]},"aBr9bdWrl0I":{"duration":166.302,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aBr9bdWrl0I","annotations":[{"segment":[0.2,30.873411088824508],"label":"Raking leaves"},{"segment":[77.57268827613105,113.11602705148205],"label":"Raking leaves"}]},"s__e_v6aaq0":{"duration":30.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s__e_v6aaq0","annotations":[{"segment":[0.01,29.57976911076443],"label":"Vacuuming floor"}]},"Ac_8KQGAe0c":{"duration":306.62,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ac_8KQGAe0c","annotations":[{"segment":[0.009999999999999998,306.61950113378686],"label":"Clipping cat claws"}]},"x7lP6GKepco":{"duration":76.092,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=x7lP6GKepco","annotations":[{"segment":[16.449079068738694,25.206143706931968],"label":"Welding"},{"segment":[53.96244957843146,60.47107858309709],"label":"Welding"}]},"4KMbeat6yoE":{"duration":162.42000000000002,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4KMbeat6yoE","annotations":[{"segment":[3.8008638299531987,159.63628085803435],"label":"Playing accordion"}]},"onFddYAkyyc":{"duration":62.996,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=onFddYAkyyc","annotations":[{"segment":[0.01,62.99573696145125],"label":"Mooping floor"}]},"zemqddZ_YO4":{"duration":161.07999999999998,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=zemqddZ_YO4","annotations":[{"segment":[5.5271482059282375,157.021255850234],"label":"Rock climbing"}]},"BbJeZoks_c4":{"duration":216.062,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BbJeZoks_c4","annotations":[{"segment":[92.69400159906397,191.1181778424337],"label":"Slacklining"}]},"3uPBTEy9oOw":{"duration":182.42,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=3uPBTEy9oOw","annotations":[]},"mA4wEXlASpc":{"duration":42.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mA4wEXlASpc","annotations":[{"segment":[3.12307198,32.3525252],"label":"Long jump"}]},"cMoy7UJtlyA":{"duration":98.66,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=cMoy7UJtlyA","annotations":[{"segment":[12.621295099423024,81.57666344749028],"label":"Sailing"}]},"LnEed8TnWwk":{"duration":178.771,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LnEed8TnWwk","annotations":[{"segment":[0.01,162.2590577223089],"label":"Tug of war"}]},"8XB_0x_erho":{"duration":59.119,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8XB_0x_erho","annotations":[{"segment":[29.300152886115445,49.58748517940718],"label":"Fun sliding down"}]},"KNLGluuewIU":{"duration":226.0,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=KNLGluuewIU","annotations":[{"segment":[45.4701856474259,99.75242277691109],"label":"Brushing hair"},{"segment":[185.05308112324494,221.37832761310455],"label":"Brushing hair"}]},"qiw2I1oQIVQ":{"duration":135.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qiw2I1oQIVQ","annotations":[{"segment":[9.972493335048581,126.10785501729156],"label":"Drum corps"}]},"Vrac4j7C79Y":{"duration":228.76,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=Vrac4j7C79Y","annotations":[]},"Mx-rOsiQTos":{"duration":180.025,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=Mx-rOsiQTos","annotations":[{"segment":[28.49616458658347,82.68098985959439],"label":"Blowing leaves"}]},"YYddEHI8KRY":{"duration":166.906,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YYddEHI8KRY","annotations":[]},"f0lxilPpXeM":{"duration":101.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f0lxilPpXeM","annotations":[{"segment":[33.108907956318255,35.158507020280815],"label":"Gargling mouthwash"},{"segment":[86.87146801872076,89.86703588143527],"label":"Gargling mouthwash"}]},"EbeVfU-rWFE":{"duration":183.833,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EbeVfU-rWFE","annotations":[]},"gT_8511vwVE":{"duration":232.55,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gT_8511vwVE","annotations":[{"segment":[10.883674530654696,226.01764108659583],"label":"Cumbia"}]},"c0wxdAaqCOg":{"duration":68.105,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c0wxdAaqCOg","annotations":[]},"CeXtKbFnPIQ":{"duration":9.99,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=CeXtKbFnPIQ","annotations":[]},"Oyycph_Ukfg":{"duration":66.572,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Oyycph_Ukfg","annotations":[]},"TH269r3O-Zo":{"duration":77.16,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=TH269r3O-Zo","annotations":[]},"U_4goX5w_1c":{"duration":111.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=U_4goX5w_1c","annotations":[{"segment":[8.70114360374415,15.836081358814353],"label":"Triple jump"}]},"nDHupsLdar0":{"duration":122.93,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nDHupsLdar0","annotations":[]},"qJ4ObH27qjc":{"duration":102.238,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qJ4ObH27qjc","annotations":[{"segment":[3.019463452566096,95.15156920684294],"label":"Playing congas"}]},"QrQN-Hm5xew":{"duration":90.12,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QrQN-Hm5xew","annotations":[{"segment":[0.01,0.02],"label":"Paintball"},{"segment":[31.98415756630265,63.19507176287051],"label":"Paintball"},{"segment":[68.39689079563182,81.75291263650546],"label":"Paintball"}]},"zhPqZtWuhow":{"duration":33.298,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=zhPqZtWuhow","annotations":[{"segment":[5.592722096809174,21.853046045984488],"label":"Table soccer"}]},"laeOL4ipHck":{"duration":184.86,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=laeOL4ipHck","annotations":[{"segment":[12.400486508580343,155.727039875195],"label":"Volleyball"}]},"-YPp9kc3XDs":{"duration":167.42000000000002,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=-YPp9kc3XDs","annotations":[{"segment":[29.77430462714508,106.8218473026521],"label":"Playing flauta"}]},"Rzf7AoRxwTw":{"duration":147.447,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rzf7AoRxwTw","annotations":[]},"1RJgvoFfbkI":{"duration":96.735,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1RJgvoFfbkI","annotations":[{"segment":[7.696491239241405,93.11245283552839],"label":"Capoeira"}]},"6VT2jBflMAM":{"duration":194.12,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6VT2jBflMAM","annotations":[{"segment":[134.91409468623644,179.73403912090484],"label":"Playing kickball"}]},"iA0CccCcqts":{"duration":85.496,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iA0CccCcqts","annotations":[]},"xoSA8_kTiBY":{"duration":82.55,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xoSA8_kTiBY","annotations":[{"segment":[46.621270710000005,76.27898045085803],"label":"Spinning"},{"segment":[13.579941060000001,41.13461772],"label":"Spinning"}]},"44htu_ovvLs":{"duration":56.96,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=44htu_ovvLs","annotations":[{"segment":[24.03174632,40.63929168],"label":"Horseback riding"}]},"QdTUPBtxAV0":{"duration":7.059,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QdTUPBtxAV0","annotations":[{"segment":[0.2,6.858412698412698],"label":"Fun sliding down"}]},"LYqfB7HsQwQ":{"duration":78.9,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=LYqfB7HsQwQ","annotations":[{"segment":[12.924567268405022,74.71630773775351],"label":"Playing harmonica"}]},"aoEszrK9wTM":{"duration":75.0,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aoEszrK9wTM","annotations":[]},"AQ3eFWxZ67U":{"duration":72.888,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=AQ3eFWxZ67U","annotations":[]},"W70Urhy8Yl4":{"duration":185.249,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W70Urhy8Yl4","annotations":[{"segment":[0.19999999999999998,147.4484126984127],"label":"Ice fishing"}]},"eic6dpU0ytM":{"duration":128.314,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eic6dpU0ytM","annotations":[{"segment":[32.92633078563199,52.881684622345524],"label":"Knitting"},{"segment":[100.1758732153566,111.74997742161767],"label":"Knitting"}]},"E6ePEI4UOtU":{"duration":105.489,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E6ePEI4UOtU","annotations":[{"segment":[51.523400942346605,95.4629748846243],"label":"Mooping floor"}]},"NbTlXQLScRo":{"duration":180.16,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=NbTlXQLScRo","annotations":[{"segment":[21.923187060842434,180.16362811791385],"label":"Preparing pasta"}]},"lnvsb4-71BQ":{"duration":128.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lnvsb4-71BQ","annotations":[{"segment":[32.85747269890796,106.96155863026522],"label":"Ping-pong"}]},"6l0tenUYS-w":{"duration":112.15,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=6l0tenUYS-w","annotations":[{"segment":[10.354469455062832,85.47201538964227],"label":"Breakdancing"}]},"98OypfeTKEc":{"duration":41.193,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=98OypfeTKEc","annotations":[{"segment":[0.01,38.718097077624606],"label":"Swinging at the playground"}]},"Px08sPeSsG0":{"duration":86.26,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Px08sPeSsG0","annotations":[{"segment":[3.0952090872750557,9.420201569967562],"label":"Getting a tattoo"},{"segment":[22.743058076064536,28.79890194247226],"label":"Getting a tattoo"}]},"a4hE5hlE1tQ":{"duration":9.66,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a4hE5hlE1tQ","annotations":[]},"Ue2EbbpegkE":{"duration":160.149,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ue2EbbpegkE","annotations":[{"segment":[34.7278873359016,46.22056947584025],"label":"Snowboarding"},{"segment":[82.19766139216998,92.94082008112323],"label":"Snowboarding"},{"segment":[120.67316155694228,133.91473000936037],"label":"Snowboarding"}]},"0cscG-qOaQY":{"duration":160.73,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0cscG-qOaQY","annotations":[{"segment":[20.310463243019516,49.64779903849215],"label":"Wrapping presents"},{"segment":[54.411981774509066,76.97916315564187],"label":"Wrapping presents"},{"segment":[82.74633173082024,121.110540078746],"label":"Wrapping presents"},{"segment":[123.11651120124804,146.1851853978159],"label":"Wrapping presents"}]},"tgdLeBX7aXk":{"duration":64.691,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tgdLeBX7aXk","annotations":[{"segment":[13.01884099843994,17.257533416536663],"label":"Croquet"},{"segment":[32.19387812792512,33.10216936037442],"label":"Croquet"},{"segment":[35.726121809672385,39.66205048361935],"label":"Croquet"},{"segment":[42.891530421216856,44.40534914196568],"label":"Croquet"}]},"IfSQS2WQrdY":{"duration":34.761,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=IfSQS2WQrdY","annotations":[]},"4QvpJ71d8Nk":{"duration":144.986,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=4QvpJ71d8Nk","annotations":[{"segment":[9.246491419656786,139.6757566302652],"label":"Rope skipping"}]},"Mmdcsw_SEzc":{"duration":41.425,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Mmdcsw_SEzc","annotations":[{"segment":[1.417319048148031,39.813806114951554],"label":"Building sandcastles"}]},"UVJ0mQndDhE":{"duration":138.159,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=UVJ0mQndDhE","annotations":[{"segment":[0.01,138.15873015873015],"label":"Shuffleboard"}]},"0mNZfG6CL74":{"duration":89.68,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0mNZfG6CL74","annotations":[]},"iDhzxzLmwoI":{"duration":185.172,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iDhzxzLmwoI","annotations":[{"segment":[15.889165366614664,57.194817472698915],"label":"Roof shingle removal"}]},"MvZFYjs80Y4":{"duration":209.979,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=MvZFYjs80Y4","annotations":[{"segment":[0,82.52744461778471],"label":"Rock-paper-scissors"},{"segment":[126.08359594383775,168.67334633385337],"label":"Rock-paper-scissors"}]},"jWODw4vy45M":{"duration":3.07,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jWODw4vy45M","annotations":[]},"Tu9QF2ALd2s":{"duration":19.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tu9QF2ALd2s","annotations":[{"segment":[0.4854565,19.016163360374414],"label":"Preparing salad"}]},"gU81ZXdYh7o":{"duration":5.09,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=gU81ZXdYh7o","annotations":[{"segment":[0.007933182527301092,1.5073046801872076],"label":"Doing kickboxing"},{"segment":[2.633816599063963,4.88517],"label":"Doing kickboxing"}]},"Launtf-qjDM":{"duration":20.179,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Launtf-qjDM","annotations":[{"segment":[1.2253057722308893,19.939],"label":"Welding"}]},"jibbgUBuuSA":{"duration":96.02000000000001,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=jibbgUBuuSA","annotations":[]},"eU1ORbzgRjE":{"duration":89.82,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eU1ORbzgRjE","annotations":[]},"Vg043D46E7Q":{"duration":35.69,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vg043D46E7Q","annotations":[{"segment":[0.2,35.661],"label":"Snow tubing"}]},"St9FULDo1zY":{"duration":65.55,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=St9FULDo1zY","annotations":[]},"bY-4XBIGiwI":{"duration":182.927,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=bY-4XBIGiwI","annotations":[{"segment":[10.556313572542901,40.79872542901716],"label":"Rope skipping"},{"segment":[48.50198127925117,125.24923400936038],"label":"Rope skipping"},{"segment":[133.2377956318253,162.64207488299533],"label":"Rope skipping"}]},"PwOMgya8qYI":{"duration":152.114,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PwOMgya8qYI","annotations":[{"segment":[56.17874807321006,124.53980578871351],"label":"Hula hoop"}]},"K_I5LyzD8m0":{"duration":54.13,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=K_I5LyzD8m0","annotations":[{"segment":[15.0305125,53.449795160686435],"label":"Shaving"}]},"eL0LrJio1XE":{"duration":135.326,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eL0LrJio1XE","annotations":[{"segment":[29.968645865834635,113.96527301092044],"label":"BMX"}]},"8HvJ1B9w4GI":{"duration":180.09,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8HvJ1B9w4GI","annotations":[]},"zmaDLAZu4kA":{"duration":30.14,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=zmaDLAZu4kA","annotations":[{"segment":[2.4412056074766353,17.93347196261682],"label":"Calf roping"}]},"IXUh06YCtjw":{"duration":83.13,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IXUh06YCtjw","annotations":[{"segment":[11.408561622464898,71.69243837753511],"label":"Drinking coffee"}]},"PMBnZ-2oKxs":{"duration":213.206,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=PMBnZ-2oKxs","annotations":[]},"XZZRyOhxQBE":{"duration":66.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XZZRyOhxQBE","annotations":[{"segment":[11.046684840873635,17.550807691107643],"label":"Pole vault"},{"segment":[32.933574432137284,40.263617644305775],"label":"Pole vault"}]},"af05Mqm03Y4":{"duration":225.44,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=af05Mqm03Y4","annotations":[]},"DFQi5aWn87I":{"duration":39.869,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DFQi5aWn87I","annotations":[]},"TKGjHAmCaRo":{"duration":136.882,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TKGjHAmCaRo","annotations":[{"segment":[0.20000000000000004,129.83468432614856],"label":"Disc dog"}]},"TSVdiNxLu50":{"duration":219.777,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TSVdiNxLu50","annotations":[{"segment":[29.257787118221266,195.2043072732606],"label":"Surfing"},{"segment":[198.52066274450286,201.25930797814772],"label":"Surfing"}]},"jYphKtLFIUk":{"duration":138.995,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jYphKtLFIUk","annotations":[{"segment":[0.2,138.741],"label":"Building sandcastles"}]},"L1B_cE8waag":{"duration":199.831,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L1B_cE8waag","annotations":[{"segment":[15.58738647425897,188.60737633853356],"label":"Longboarding"}]},"TcoSgl3qiHU":{"duration":211.93,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=TcoSgl3qiHU","annotations":[]},"xzuQIbnXt2U":{"duration":26.843,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=xzuQIbnXt2U","annotations":[{"segment":[0.01,26.16927483010218],"label":"Doing a powerbomb"}]},"VfhDSTKBwgc":{"duration":136.952,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=VfhDSTKBwgc","annotations":[]},"KFo88zRw5CM":{"duration":71.843,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KFo88zRw5CM","annotations":[{"segment":[0.010000000000000002,71.05798709204369],"label":"Waterskiing"}]},"ZluOmpTPdWw":{"duration":29.606,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZluOmpTPdWw","annotations":[{"segment":[1.1971744548286605,28.70554984423676],"label":"Beach soccer"}]},"LUDZ7e0RdEE":{"duration":94.97,"subset":"validation","resolution":"640x358","url":"https://www.youtube.com/watch?v=LUDZ7e0RdEE","annotations":[{"segment":[19.40877116380655,74.52375492667706],"label":"Tai chi"},{"segment":[89.63592789391575,91.26567203744149],"label":"Tai chi"}]},"fzHg9PhhlAg":{"duration":235.17000000000002,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=fzHg9PhhlAg","annotations":[{"segment":[11.00647585087077,203.61980178627144],"label":"Cricket"}]},"nwNJccohnow":{"duration":131.77,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nwNJccohnow","annotations":[]},"RMrL2txm79c":{"duration":102.772,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RMrL2txm79c","annotations":[{"segment":[17.944636505460217,92.94567862714509],"label":"Carving jack-o-lanterns"}]},"C84V-jw-Sjg":{"duration":29.35,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C84V-jw-Sjg","annotations":[]},"jX26w60MxmU":{"duration":67.919,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jX26w60MxmU","annotations":[{"segment":[6.981070202808112,45.90582527301092],"label":"Canoeing"}]},"nIpT_lGpjck":{"duration":24.73,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=nIpT_lGpjck","annotations":[{"segment":[6.58360167,20.14473798],"label":"Preparing salad"}]},"yLQe0SqFIMk":{"duration":68.198,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yLQe0SqFIMk","annotations":[]},"DMUFb7gvUx4":{"duration":181.557,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DMUFb7gvUx4","annotations":[{"segment":[26.8576246105919,170.19252647975077],"label":"Skiing"}]},"mOrhfrmmxAQ":{"duration":34.018,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mOrhfrmmxAQ","annotations":[{"segment":[0.2,8.789712948517941],"label":"Waterskiing"}]},"LygR7ds26JY":{"duration":20.69,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=LygR7ds26JY","annotations":[{"segment":[0.8694617503900156,9.55537344],"label":"Pole vault"}]},"kzbQWKUMyS0":{"duration":236.264,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kzbQWKUMyS0","annotations":[{"segment":[17.29197352024922,218.19167133956387],"label":"Slacklining"}]},"nuVlFxONEdI":{"duration":96.712,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nuVlFxONEdI","annotations":[]},"OS-h1xzAZno":{"duration":79.92,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=OS-h1xzAZno","annotations":[{"segment":[1.8087901716068644,23.763760530421216],"label":"Using the balance beam"},{"segment":[29.127758970358816,78.02746567862714],"label":"Using the balance beam"}]},"z9PXpyWbXk8":{"duration":7.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z9PXpyWbXk8","annotations":[{"segment":[5.2198159282371295,6.937730031201248],"label":"High jump"}]},"aDDq5ofaFHI":{"duration":157.153,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aDDq5ofaFHI","annotations":[{"segment":[21.507668706128822,40.326882087155255],"label":"Longboarding"},{"segment":[47.65904314469803,119.02541077144764],"label":"Longboarding"},{"segment":[127.09078793474468,141.2662993126607],"label":"Longboarding"}]},"sbIh_M0oGs8":{"duration":33.55,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sbIh_M0oGs8","annotations":[{"segment":[11.620482451951139,32.34891015912636],"label":"Doing motocross"}]},"E3IP4Y8e_ho":{"duration":89.119,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=E3IP4Y8e_ho","annotations":[{"segment":[18.351950927016674,30.169494427457096],"label":"Slacklining"},{"segment":[45.60181646177847,70.34914368798752],"label":"Slacklining"}]},"ZlwU7HKcoYs":{"duration":29.768,"subset":"validation","resolution":"360x240","url":"https://www.youtube.com/watch?v=ZlwU7HKcoYs","annotations":[{"segment":[0.01,27.745893915756632],"label":"Waterskiing"}]},"InavXU3iWBE":{"duration":25.96,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=InavXU3iWBE","annotations":[{"segment":[10.263422776911078,14.476939157566303],"label":"Doing a powerbomb"}]},"2QeU2xYPdWE":{"duration":76.23,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=2QeU2xYPdWE","annotations":[{"segment":[41.25094217628705,70.57175575195008],"label":"Discus throw"},{"segment":[11.692713106084245,43.38153748],"label":"Discus throw"}]},"w9NZm9-cxbg":{"duration":49.81,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=w9NZm9-cxbg","annotations":[]},"fxxeCpqgRfk":{"duration":64.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fxxeCpqgRfk","annotations":[{"segment":[3.4251065865834636,58.02533511388456],"label":"Using parallel bars"}]},"9L-aeZsgwZs":{"duration":177.772,"subset":"validation","resolution":"640x270","url":"https://www.youtube.com/watch?v=9L-aeZsgwZs","annotations":[{"segment":[0,173.30440717628707],"label":"Tug of war"}]},"6mrjcnKVJyY":{"duration":166.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6mrjcnKVJyY","annotations":[{"segment":[2.3444187971600496,163.06735135101403],"label":"Doing kickboxing"}]},"Z1siDLDwbiY":{"duration":239.677,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z1siDLDwbiY","annotations":[{"segment":[0.01,222.4436739469579],"label":"Removing ice from car"}]},"CpEJfaG-WhE":{"duration":223.864,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=CpEJfaG-WhE","annotations":[]},"1rf7t4sYtIA":{"duration":38.174,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1rf7t4sYtIA","annotations":[{"segment":[0.2,37.940999999999995],"label":"Blowing leaves"}]},"VQ44ZEIaQXA":{"duration":78.252,"subset":"testing","resolution":"852x480","url":"https://www.youtube.com/watch?v=VQ44ZEIaQXA","annotations":[]},"ZbfYwmb39H0":{"duration":192.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZbfYwmb39H0","annotations":[{"segment":[64.36862432,133.88858384],"label":"Playing lacrosse"}]},"aj2cioE_yG0":{"duration":226.093,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aj2cioE_yG0","annotations":[{"segment":[0.01,196.28796758684348],"label":"Table soccer"}]},"TEvkTLNFpR8":{"duration":205.5,"subset":"testing","resolution":"352x262","url":"https://www.youtube.com/watch?v=TEvkTLNFpR8","annotations":[]},"7S7THbo8uds":{"duration":78.228,"subset":"training","resolution":"352x288","url":"https://www.youtube.com/watch?v=7S7THbo8uds","annotations":[{"segment":[0.2,78.02802721088435],"label":"Arm wrestling"}]},"A7ER02-zr54":{"duration":237.96,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=A7ER02-zr54","annotations":[{"segment":[51.91529686,140.22626982],"label":"Fixing bicycle"}]},"bsVJlftOY8Q":{"duration":98.87,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bsVJlftOY8Q","annotations":[]},"UJebyFqxDXI":{"duration":141.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UJebyFqxDXI","annotations":[{"segment":[41.8810461261988,51.35938814423325],"label":"Mowing the lawn"},{"segment":[57.751758342442535,77.59014861274723],"label":"Mowing the lawn"},{"segment":[90.15446245060687,99.63280446864133],"label":"Mowing the lawn"}]},"9FaSaHgQSO8":{"duration":228.485,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9FaSaHgQSO8","annotations":[{"segment":[0,197.11676545397816],"label":"Riding bumper cars"}]},"hpQkElxsSYQ":{"duration":105.49000000000001,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hpQkElxsSYQ","annotations":[]},"NrKBxbZXXg4":{"duration":92.787,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NrKBxbZXXg4","annotations":[{"segment":[0,6.800042121684867],"label":"Rafting"},{"segment":[10.851131045241809,79.71964274570983],"label":"Rafting"}]},"ZQSa_8wofFw":{"duration":91.208,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=ZQSa_8wofFw","annotations":[{"segment":[39.5566597700468,47.24032749516381],"label":"Shuffleboard"}]},"ZhPL4xb8JD0":{"duration":22.106,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ZhPL4xb8JD0","annotations":[{"segment":[7.66641173825574,22.105396825396827],"label":"Calf roping"}]},"AoyoL-921Ws":{"duration":72.89,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=AoyoL-921Ws","annotations":[{"segment":[0,72.68743764172335],"label":"Mowing the lawn"}]},"P6lwZ6gKvUw":{"duration":136.557,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P6lwZ6gKvUw","annotations":[{"segment":[6.39110232449298,131.23063439625585],"label":"Peeling potatoes"}]},"smYAy6TYS3M":{"duration":67.11,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=smYAy6TYS3M","annotations":[{"segment":[7.658904935142421,48.42404372018915],"label":"Hopscotch"},{"segment":[54.48655153950379,62.744105293397865],"label":"Hopscotch"}]},"4EoFt8F3_nw":{"duration":5.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4EoFt8F3_nw","annotations":[{"segment":[0.01,5.041],"label":"Walking the dog"}]},"KZ-6u3MUp44":{"duration":68.43,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=KZ-6u3MUp44","annotations":[{"segment":[14.828329173166926,20.162260530421218],"label":"Discus throw"},{"segment":[30.83914040561622,39.696056162246485],"label":"Discus throw"}]},"8Ztw2YrUHRE":{"duration":161.797,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8Ztw2YrUHRE","annotations":[{"segment":[0.010000000000000002,156.35301897017894],"label":"Baton twirling"}]},"uWguPHhQON4":{"duration":38.27,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uWguPHhQON4","annotations":[{"segment":[33.78912627769111,38.26648526077098],"label":"Starting a campfire"}]},"IV47aK7qJdY":{"duration":54.567,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IV47aK7qJdY","annotations":[{"segment":[16.174193525564153,44.09595893603744],"label":"Blowing leaves"}]},"oXKrreio9hg":{"duration":130.45,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=oXKrreio9hg","annotations":[{"segment":[48.19899687987519,122.8362620904836],"label":"Tumbling"}]},"Tab-dSCaMC8":{"duration":74.3,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tab-dSCaMC8","annotations":[{"segment":[10.441967369947106,73.71419011138013],"label":"Grooming horse"}]},"hru57dsi0N0":{"duration":160.683,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hru57dsi0N0","annotations":[{"segment":[1.8518561450922602,160.661],"label":"Putting on shoes"}]},"69X7tP6p7E0":{"duration":70.06,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=69X7tP6p7E0","annotations":[{"segment":[4.1226779,67.7677632652106],"label":"Dodgeball"}]},"HW5QhCSKTsw":{"duration":230.67000000000002,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HW5QhCSKTsw","annotations":[{"segment":[67.65272533541341,73.05054916536662],"label":"Layup drill in basketball"},{"segment":[186.15288234,191.97030408],"label":"Layup drill in basketball"}]},"Xjw9vUwILOE":{"duration":102.89,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Xjw9vUwILOE","annotations":[{"segment":[3.208861154446178,19.253166926677068],"label":"Long jump"},{"segment":[23.424686427457097,38.66677691107644],"label":"Long jump"},{"segment":[85.83703588143526,96.10539157566302],"label":"Long jump"}]},"QnQ2D-tJ9pM":{"duration":123.79,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=QnQ2D-tJ9pM","annotations":[{"segment":[1.8336185647425898,13.414367394695788],"label":"Chopping wood"}]},"xV7uPiqNuwQ":{"duration":194.096,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=xV7uPiqNuwQ","annotations":[{"segment":[20.281976599063963,175.2906692667707],"label":"Assembling bicycle"}]},"z3kkSRbahfE":{"duration":32.95,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=z3kkSRbahfE","annotations":[]},"TVPiI9551As":{"duration":137.834,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TVPiI9551As","annotations":[{"segment":[0.01,112.03015776755072],"label":"Arm wrestling"}]},"2EQMXueAmCA":{"duration":27.28,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2EQMXueAmCA","annotations":[{"segment":[0.38307491326053045,26.432169123499634],"label":"Playing squash"}]},"5iIJl-wjE9U":{"duration":111.707,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5iIJl-wjE9U","annotations":[{"segment":[46.963212948517935,53.41085257410296],"label":"Rock-paper-scissors"},{"segment":[59.16145007800312,60.72979485179407],"label":"Rock-paper-scissors"}]},"t_Creyg6ANs":{"duration":173.686,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=t_Creyg6ANs","annotations":[{"segment":[14.360871795416033,86.97811030808579],"label":"Shuffleboard"}]},"ZPVrC5185NM":{"duration":51.758,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZPVrC5185NM","annotations":[{"segment":[0.2,50.3082415608518],"label":"Swinging at the playground"}]},"PUJqlmTdlak":{"duration":223.562,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PUJqlmTdlak","annotations":[{"segment":[25.111457225635966,220.77156109048363],"label":"Playing blackjack"}]},"hh6Yt_LJPwQ":{"duration":120.535,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hh6Yt_LJPwQ","annotations":[{"segment":[20.49655436349454,24.257390026521062],"label":"Gargling mouthwash"}]},"CTWo9EfQ4Hc":{"duration":67.01,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=CTWo9EfQ4Hc","annotations":[{"segment":[19.7562496099844,51.11537597503901],"label":"Using parallel bars"}]},"S633fHu_NrU":{"duration":159.24,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=S633fHu_NrU","annotations":[{"segment":[85.26945397815912,136.65453978159127],"label":"Preparing salad"}]},"bTlfN4vJkiY":{"duration":22.87,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bTlfN4vJkiY","annotations":[{"segment":[2.013286271450858,13.202169266770673],"label":"Mowing the lawn"}]},"5WHnYEinw4A":{"duration":72.261,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=5WHnYEinw4A","annotations":[{"segment":[0.01,46.86256131353526],"label":"Croquet"},{"segment":[63.270075751776545,70.35003113435819],"label":"Croquet"}]},"lrxLP-R_ILw":{"duration":344.421,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lrxLP-R_ILw","annotations":[{"segment":[152.57490171606864,344.368],"label":"Putting on shoes"}]},"NYhBaOJrxCM":{"duration":66.851,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NYhBaOJrxCM","annotations":[]},"mpyN1mrMl3U":{"duration":65.388,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mpyN1mrMl3U","annotations":[{"segment":[2.979070022440402,5.828615193779966],"label":"Baking cookies"}]},"u-yFENQQxAo":{"duration":30.163,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u-yFENQQxAo","annotations":[{"segment":[3.500802652106084,11.583192667706708],"label":"Raking leaves"}]},"s1E6EKs1MZw":{"duration":152.74,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s1E6EKs1MZw","annotations":[{"segment":[10.484541231989416,28.117633303971616],"label":"Playing saxophone"},{"segment":[62.43067733593698,76.72777901592254],"label":"Playing saxophone"},{"segment":[127.48248918876754,141.06473570046802],"label":"Playing saxophone"}]},"RVbejE3s3m4":{"duration":38.78,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RVbejE3s3m4","annotations":[{"segment":[1.8148513572542901,37.083462733229325],"label":"Doing nails"}]},"pwPid8YHHpU":{"duration":188.848,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pwPid8YHHpU","annotations":[{"segment":[13.548901716068642,28.27596879875195],"label":"Bullfighting"},{"segment":[79.52616224648985,83.06065834633385],"label":"Bullfighting"},{"segment":[98.96589079563182,105.44580031201248],"label":"Bullfighting"},{"segment":[134.31085179407174,138.72897191887674],"label":"Bullfighting"},{"segment":[148.44883619344773,154.0614399375975],"label":"Bullfighting"},{"segment":[161.4257535101404,168.79006708268332],"label":"Bullfighting"}]},"Vckc3ZOollk":{"duration":159.521,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Vckc3ZOollk","annotations":[{"segment":[4.571944964812923,145.9543617716665],"label":"Playing blackjack"}]},"vy91mJTl7rQ":{"duration":137.602,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vy91mJTl7rQ","annotations":[{"segment":[0.2,85.6520875120719],"label":"Rafting"}]},"Zcq_xLi2NGo":{"duration":131.29,"subset":"validation","resolution":"540x360","url":"https://www.youtube.com/watch?v=Zcq_xLi2NGo","annotations":[{"segment":[11.059937584399377,128.8277914923557],"label":"Playing saxophone"}]},"mDvWGOr_sws":{"duration":194.723,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mDvWGOr_sws","annotations":[{"segment":[0.01,0.02],"label":"Rope skipping"}]},"i0rC2mfp7mI":{"duration":24.03,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=i0rC2mfp7mI","annotations":[{"segment":[0,21.965275301872076],"label":"Playing accordion"}]},"U3uwdU8QH8k":{"duration":206.565,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U3uwdU8QH8k","annotations":[]},"U7vWTmVzWSc":{"duration":197.091,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=U7vWTmVzWSc","annotations":[{"segment":[21.215721070202807,178.94999511388454],"label":"Surfing"}]},"93JbDUNFeWs":{"duration":71.1,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=93JbDUNFeWs","annotations":[]},"7GSCDxBSFsw":{"duration":49.48,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7GSCDxBSFsw","annotations":[{"segment":[1.9684363783151326,35.47045160140406],"label":"Javelin throw"}]},"N_zbcVs419E":{"duration":237.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N_zbcVs419E","annotations":[{"segment":[2.3644483500000004,221.84087363494538],"label":"Painting"}]},"yO0mTE_231M":{"duration":27.33,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=yO0mTE_231M","annotations":[]},"51XRujaBAbE":{"duration":159.41,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=51XRujaBAbE","annotations":[{"segment":[142.7433112917317,159.15630527301093],"label":"Doing motocross"}]},"PKdQIdYuXo8":{"duration":116.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PKdQIdYuXo8","annotations":[{"segment":[67.8252873299532,97.60667186037442],"label":"Playing harmonica"}]},"OOKHi7EBNKk":{"duration":173.523,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=OOKHi7EBNKk","annotations":[{"segment":[0.01,172.7106230826833],"label":"Snow tubing"}]},"j6ElczD-Pw4":{"duration":27.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=j6ElczD-Pw4","annotations":[]},"y56qXoJh6U0":{"duration":131.658,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y56qXoJh6U0","annotations":[{"segment":[5.132644305772231,13.757577223088925],"label":"Playing ten pins"},{"segment":[24.84577379095164,33.67526365054602],"label":"Playing ten pins"},{"segment":[54.20896099843994,57.90502652106085],"label":"Playing ten pins"},{"segment":[110.26595475819035,116.42606396255852],"label":"Playing ten pins"}]},"ISa7BQJasm4":{"duration":68.221,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ISa7BQJasm4","annotations":[]},"TGffgRXc1eQ":{"duration":207.889,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=TGffgRXc1eQ","annotations":[]},"bc7r5_gSAVg":{"duration":167.254,"subset":"validation","resolution":"632x360","url":"https://www.youtube.com/watch?v=bc7r5_gSAVg","annotations":[{"segment":[0.01,127.73392313887696],"label":"Playing blackjack"}]},"p1fpQ4yR1co":{"duration":112.501,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=p1fpQ4yR1co","annotations":[{"segment":[30.352685762957993,32.98027922090192],"label":"Blow-drying hair"}]},"QuEHZ2Y3H40":{"duration":107.114,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QuEHZ2Y3H40","annotations":[{"segment":[4.344703464329049,9.023614887452641],"label":"Surfing"},{"segment":[10.026238763836268,15.206462125151674],"label":"Surfing"},{"segment":[16.209086001535297,18.882749671891638],"label":"Surfing"},{"segment":[19.718269568877993,29.74450833271426],"label":"Surfing"},{"segment":[35.76025159101602,42.277306787509595],"label":"Surfing"},{"segment":[45.619386375455015,48.085720960998444],"label":"Surfing"},{"segment":[53.473273010920444,75.86520608424338],"label":"Surfing"},{"segment":[76.53362199687989,92.74270787831513],"label":"Surfing"},{"segment":[95.75057948517941,98.59134711388457],"label":"Surfing"}]},"5BbHu0WQZqw":{"duration":118.25999999999999,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=5BbHu0WQZqw","annotations":[{"segment":[1.18259,116.4143111324493],"label":"Shaving legs"}]},"5lFEpP89xQ4":{"duration":45.46,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=5lFEpP89xQ4","annotations":[]},"ggVQ4_B_sw0":{"duration":42.493,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ggVQ4_B_sw0","annotations":[]},"jIOlMUtxhH0":{"duration":125.02,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jIOlMUtxhH0","annotations":[]},"Bc0_vvpfMAE":{"duration":226.79,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Bc0_vvpfMAE","annotations":[{"segment":[55.59039524295843,211.36103231465705],"label":"Baking cookies"}]},"32vYs9wKXE8":{"duration":91.301,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=32vYs9wKXE8","annotations":[]},"b82y7f7TFbw":{"duration":167.718,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=b82y7f7TFbw","annotations":[{"segment":[42.38732628393136,55.208184234009366],"label":"Playing pool"},{"segment":[60.44118747893916,150.7104934539782],"label":"Playing pool"}]},"PlSk7Q9v15A":{"duration":140.16,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=PlSk7Q9v15A","annotations":[{"segment":[23.758885173241953,113.99906717946278],"label":"Mixing drinks"}]},"KCOzA0Suy5U":{"duration":66.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KCOzA0Suy5U","annotations":[{"segment":[8.262129402496099,63.239065929797185],"label":"Doing motocross"}]},"gN8F0o1baAo":{"duration":23.66,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=gN8F0o1baAo","annotations":[{"segment":[0.6974938,23.070528178627146],"label":"Mowing the lawn"}]},"q-ID2mgEIow":{"duration":89.58,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=q-ID2mgEIow","annotations":[{"segment":[2.236071331288626,83.71291980187208],"label":"Playing accordion"}]},"-8FS4LbXAgE":{"duration":169.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-8FS4LbXAgE","annotations":[{"segment":[33.52860022428108,49.63288852098301],"label":"Getting a tattoo"},{"segment":[84.74551636193446,120.38615408424336],"label":"Getting a tattoo"}]},"WUdefl_j2-8":{"duration":124.181,"subset":"training","resolution":"2560x1080","url":"https://www.youtube.com/watch?v=WUdefl_j2-8","annotations":[{"segment":[26.5367503900156,85.61491731669267],"label":"Snowboarding"},{"segment":[101.11082995319813,123.961],"label":"Snowboarding"}]},"26qGsfI9tZ8":{"duration":49.227,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=26qGsfI9tZ8","annotations":[{"segment":[33.608624925141896,45.704668029340965],"label":"Fun sliding down"}]},"Lr0Vm9zBXKE":{"duration":193.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Lr0Vm9zBXKE","annotations":[{"segment":[81.69887560149644,88.62250908812058],"label":"Skiing"},{"segment":[101.86772097557545,106.68416166192269],"label":"Skiing"},{"segment":[115.41396040592703,139.49616272630976],"label":"Skiing"},{"segment":[167.19069645216646,177.4256328291135],"label":"Skiing"}]},"PJdE8Ui2eno":{"duration":184.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PJdE8Ui2eno","annotations":[{"segment":[42.907588248049926,158.1145268],"label":"Rock climbing"}]},"V-Yz865cbn4":{"duration":27.26,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=V-Yz865cbn4","annotations":[{"segment":[4.841176287051482,26.456603744149763],"label":"Cumbia"}]},"OauFXDlQfWM":{"duration":7.97,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=OauFXDlQfWM","annotations":[]},"pzkwJYJol7o":{"duration":159.498,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=pzkwJYJol7o","annotations":[{"segment":[0.01,127.6480601101595],"label":"Canoeing"}]},"MlGh0geYhSg":{"duration":61.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MlGh0geYhSg","annotations":[{"segment":[0.19322147579780743,45.50365755038365],"label":"Horseback riding"}]},"Ra21PsRb6dU":{"duration":124.505,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ra21PsRb6dU","annotations":[]},"IUnqrqZ_x_A":{"duration":59.026,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IUnqrqZ_x_A","annotations":[{"segment":[0.01,59.024580498866214],"label":"Doing fencing"}]},"13vEnzRCKzo":{"duration":131.31,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=13vEnzRCKzo","annotations":[{"segment":[5.030409479999999,124.97154056162246],"label":"Ironing clothes"}]},"2AcaLeBzcnY":{"duration":136.232,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2AcaLeBzcnY","annotations":[]},"fVtiM88rOJI":{"duration":175.54,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fVtiM88rOJI","annotations":[{"segment":[23.00405614352574,175.542857],"label":"Paintball"}]},"APCbvR0SBl0":{"duration":33.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=APCbvR0SBl0","annotations":[{"segment":[0,31.67876723400936],"label":"Javelin throw"}]},"te5xo60oVZM":{"duration":123.716,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=te5xo60oVZM","annotations":[{"segment":[0.01,123.715578],"label":"Grooming dog"}]},"mwDQENGsvd8":{"duration":43.097,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=mwDQENGsvd8","annotations":[{"segment":[0.2,38.142943925233645],"label":"Playing drums"}]},"x7yOoVf5-Do":{"duration":96.897,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x7yOoVf5-Do","annotations":[{"segment":[14.80564430577223,87.79444773790952],"label":"BMX"}]},"wfy5UXaAVnQ":{"duration":76.092,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wfy5UXaAVnQ","annotations":[{"segment":[7.359894651568376,13.414001542374619],"label":"Rafting"},{"segment":[19.3494004399376,22.791931806552263],"label":"Rafting"},{"segment":[24.572551478939157,46.65223541653666],"label":"Rafting"},{"segment":[48.67027104524181,54.486961975039],"label":"Rafting"},{"segment":[56.861121538221525,65.52680394383775],"label":"Rafting"}]},"K5v9-h2S5pw":{"duration":147.91,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K5v9-h2S5pw","annotations":[{"segment":[115.14453024961,139.83484034321373],"label":"Playing saxophone"}]},"K-Kkuz4oI38":{"duration":120.56,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K-Kkuz4oI38","annotations":[]},"ahpoDWYqtfw":{"duration":80.6,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ahpoDWYqtfw","annotations":[{"segment":[16.901677067082684,70.4341263650546],"label":"Chopping wood"}]},"g4lQoscuTFY":{"duration":197.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=g4lQoscuTFY","annotations":[]},"DW-pIg-YKtk":{"duration":48.089,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=DW-pIg-YKtk","annotations":[]},"ZoCeeRbpPsU":{"duration":150.373,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ZoCeeRbpPsU","annotations":[]},"VkHUry3ZBok":{"duration":93.58,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VkHUry3ZBok","annotations":[]},"ofrkSTRhCE4":{"duration":114.243,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ofrkSTRhCE4","annotations":[{"segment":[12.70085914993286,69.62347597236275],"label":"Running a marathon"},{"segment":[76.02727036488612,108.22412550507303],"label":"Running a marathon"}]},"VrynprPwU2M":{"duration":110.504,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VrynprPwU2M","annotations":[]},"pbTzltaBByk":{"duration":57.981,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pbTzltaBByk","annotations":[{"segment":[6.241241257813577,35.63839164430578],"label":"Trimming branches or hedges"},{"segment":[37.26654151638066,42.69370775663027],"label":"Trimming branches or hedges"},{"segment":[51.377173741029644,56.080717815912635],"label":"Trimming branches or hedges"}]},"Ka210U3btNE":{"duration":180.61,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ka210U3btNE","annotations":[]},"5Chl3x_mx40":{"duration":208.098,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5Chl3x_mx40","annotations":[{"segment":[67.98160608424338,72.20003510140405],"label":"Curling"},{"segment":[91.34521294851794,95.56364196567863],"label":"Curling"},{"segment":[124.76815054602184,128.33759048361935],"label":"Curling"},{"segment":[168.89940795631824,174.0913205928237],"label":"Curling"}]},"3mlxwa9Ve5I":{"duration":110.27000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3mlxwa9Ve5I","annotations":[{"segment":[38.001312012480504,79.09775351014041],"label":"Shaving"}]},"yDCZNNI3tDo":{"duration":111.36,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=yDCZNNI3tDo","annotations":[{"segment":[23.10648007800312,84.95540419656787],"label":"Getting a haircut"}]},"I6B4g85H2iI":{"duration":119.096,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=I6B4g85H2iI","annotations":[{"segment":[16.57952271288578,60.717401217558674],"label":"BMX"},{"segment":[66.46645261942784,76.29547598391382],"label":"BMX"},{"segment":[85.38268626428766,92.24445729232504],"label":"BMX"}]},"0YPfoB84Fes":{"duration":86.192,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=0YPfoB84Fes","annotations":[{"segment":[5.378601248049923,82.02366903276132],"label":"Making a cake"}]},"PSAfQ1qjtOA":{"duration":111.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PSAfQ1qjtOA","annotations":[{"segment":[20.178249263303865,105.06605629953198],"label":"Using the balance beam"}]},"vmFfa1yv72k":{"duration":219.986,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=vmFfa1yv72k","annotations":[{"segment":[30.279203637916353,100.83933759430265],"label":"Beer pong"},{"segment":[105.97721142607836,159.75362419866403],"label":"Beer pong"},{"segment":[191.60844195567338,198.45894039804097],"label":"Beer pong"}]},"j6BCgvUusWs":{"duration":43.12,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j6BCgvUusWs","annotations":[]},"-SCRtjT7dto":{"duration":126.57,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=-SCRtjT7dto","annotations":[{"segment":[77.97279884,111.09516383],"label":"Preparing salad"}]},"aeKIYkn99_Q":{"duration":219.104,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aeKIYkn99_Q","annotations":[{"segment":[48.87454446177846,219.061],"label":"Snow tubing"}]},"S47vfJ9g3Mw":{"duration":187.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=S47vfJ9g3Mw","annotations":[{"segment":[28.933729822662297,179.44757582215288],"label":"Painting"}]},"ZO8ValsY3rE":{"duration":107.39,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=ZO8ValsY3rE","annotations":[{"segment":[53.40721039186323,56.58548685759533],"label":"Washing hands"},{"segment":[73.31325772986946,90.87741714575732],"label":"Washing hands"}]},"7H4-gDM3r0w":{"duration":117.00999999999999,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7H4-gDM3r0w","annotations":[{"segment":[40.52417784711389,47.09566614664587],"label":"Mixing drinks"},{"segment":[60.056101404056164,80.68327301092044],"label":"Mixing drinks"},{"segment":[84.69918252730109,96.19928705148206],"label":"Mixing drinks"}]},"bb-DPA34qvw":{"duration":124.785,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bb-DPA34qvw","annotations":[{"segment":[6.424139153321235,11.096240355736677],"label":"Playing congas"},{"segment":[17.909721275925868,69.88684715279767],"label":"Playing congas"},{"segment":[81.95644108424337,110.1837188923557],"label":"Playing congas"}]},"8ofNg0y8w60":{"duration":81.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8ofNg0y8w60","annotations":[{"segment":[67.18424336973479,80.81213728549142],"label":"Tennis serve with ball bouncing"},{"segment":[0.2,5.540312012480499],"label":"Tennis serve with ball bouncing"}]},"8TNzCiy8iaY":{"duration":159.07999999999998,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=8TNzCiy8iaY","annotations":[{"segment":[53.60551179598204,140.71446846445286],"label":"Tango"}]},"TfpCjzGqA7w":{"duration":26.285,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=TfpCjzGqA7w","annotations":[{"segment":[0.01,26.284988662131518],"label":"Hitting a pinata"}]},"QSoT1qBELhs":{"duration":108.972,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=QSoT1qBELhs","annotations":[]},"W0M9tXQV3Xg":{"duration":191.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=W0M9tXQV3Xg","annotations":[{"segment":[77.75703588143526,188.73003276131047],"label":"Spinning"}]},"0lYwrXV1R3A":{"duration":146.193,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=0lYwrXV1R3A","annotations":[{"segment":[91.08587817266054,145.9651183068536],"label":"Playing ice hockey"}]},"wSgONQANu6M":{"duration":19.064,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wSgONQANu6M","annotations":[]},"MleQSQt6Utw":{"duration":180.977,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MleQSQt6Utw","annotations":[{"segment":[0.01,180.97632653061225],"label":"Building sandcastles"}]},"oM-f4_Z2gf4":{"duration":151.604,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=oM-f4_Z2gf4","annotations":[]},"H5kerJ_FoBU":{"duration":60.047,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=H5kerJ_FoBU","annotations":[{"segment":[8.099557722308893,60.021],"label":"Plastering"}]},"HdZjxdQhtZo":{"duration":157.943,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HdZjxdQhtZo","annotations":[{"segment":[28.089552251477812,32.77114429339078],"label":"Shuffleboard"},{"segment":[36.95993717299712,47.062320000283],"label":"Shuffleboard"},{"segment":[56.425504084108944,61.10709612602191],"label":"Shuffleboard"},{"segment":[72.934276021381,82.29746010520692],"label":"Shuffleboard"},{"segment":[116.30060230436428,122.21419225204382],"label":"Shuffleboard"},{"segment":[140.94055906396255,142.91175569422776],"label":"Shuffleboard"}]},"k-5moAuYOcE":{"duration":103.236,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k-5moAuYOcE","annotations":[]},"NOEpoA4Mjrw":{"duration":174.266,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NOEpoA4Mjrw","annotations":[]},"osaJxzwVt6Q":{"duration":98.848,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=osaJxzwVt6Q","annotations":[{"segment":[7.093570870514821,94.68375031513261],"label":"Playing congas"}]},"jgHYzuDU2Iw":{"duration":38.755,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=jgHYzuDU2Iw","annotations":[{"segment":[0.01,34.16025819032761],"label":"Mooping floor"}]},"Nf-7lKa387k":{"duration":63.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Nf-7lKa387k","annotations":[{"segment":[1.2730257,38.0697125],"label":"Smoking hookah"}]},"9yktj7NCdFI":{"duration":233.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9yktj7NCdFI","annotations":[{"segment":[82.70672539010404,96.91625089765495],"label":"Hopscotch"},{"segment":[112.21881682886362,170.51430609061097],"label":"Hopscotch"}]},"qW926_opnTE":{"duration":109.0,"subset":"validation","resolution":"384x240","url":"https://www.youtube.com/watch?v=qW926_opnTE","annotations":[{"segment":[52.37175643216205,57.13282519872224],"label":"Playing violin"},{"segment":[65.63473371043685,71.92614600910568],"label":"Playing violin"}]},"8O1cLqQrn4o":{"duration":129.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8O1cLqQrn4o","annotations":[{"segment":[83.83309242573785,122.50971236128358],"label":"Playing badminton"}]},"NfTfLk1D-tI":{"duration":228.856,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NfTfLk1D-tI","annotations":[{"segment":[13.240134629698264,228.85503401360543],"label":"Playing congas"}]},"eBajcta4sD4":{"duration":122.346,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eBajcta4sD4","annotations":[{"segment":[40.82392199687988,57.22979719188768],"label":"Doing fencing"},{"segment":[69.05728861154446,73.25414040561623],"label":"Doing fencing"},{"segment":[76.11563026521061,79.56242901716068],"label":"Doing fencing"},{"segment":[79.75322620904836,106.27403588143527],"label":"Doing fencing"}]},"UoBFbIxneZ0":{"duration":36.27,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UoBFbIxneZ0","annotations":[]},"xH8l5rCWrMU":{"duration":94.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xH8l5rCWrMU","annotations":[{"segment":[36.53969534321373,89.22142477379096],"label":"Preparing pasta"}]},"DkouAjobzac":{"duration":177.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DkouAjobzac","annotations":[{"segment":[33.86585536346624,36.641745147356914],"label":"Sailing"},{"segment":[56.07297363459164,142.40314591359166],"label":"Sailing"}]},"FjZKKAEEHbY":{"duration":179.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FjZKKAEEHbY","annotations":[{"segment":[27.40630449815554,112.4647235012708],"label":"Grooming horse"}]},"1TWdrO8cCxA":{"duration":82.965,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1TWdrO8cCxA","annotations":[{"segment":[0.01,82.96489795918367],"label":"Rope skipping"}]},"xOTW_-tiEjY":{"duration":171.967,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xOTW_-tiEjY","annotations":[{"segment":[0,171.966983],"label":"Having an ice cream"}]},"zWiu-wdKeWs":{"duration":53.43,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=zWiu-wdKeWs","annotations":[{"segment":[1.2809586938994697,52.02022660667206],"label":"Spread mulch"}]},"hiz0rH1bO7k":{"duration":192.26,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hiz0rH1bO7k","annotations":[{"segment":[91.57584444,174.94065666],"label":"Playing field hockey"}]},"IfYiYObrUlY":{"duration":208.42000000000002,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=IfYiYObrUlY","annotations":[{"segment":[94.29402612839208,181.43471153822153],"label":"Preparing salad"}]},"ANwaFSIHdW0":{"duration":38.755,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ANwaFSIHdW0","annotations":[{"segment":[0.01,38.081681400441674],"label":"Fun sliding down"}]},"5rO2DwFhdwo":{"duration":199.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5rO2DwFhdwo","annotations":[{"segment":[0,135.20610452418097],"label":"Playing racquetball"}]},"PFrFwE3CfjE":{"duration":180.35,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=PFrFwE3CfjE","annotations":[{"segment":[82.43739565092808,151.9324000624025],"label":"Hanging wallpaper"}]},"vGKdr_au240":{"duration":39.103,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vGKdr_au240","annotations":[{"segment":[17.324621166926676,27.023968932917313],"label":"Curling"}]},"LrmtSSUVRaQ":{"duration":107.904,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=LrmtSSUVRaQ","annotations":[{"segment":[5.05004503900156,99.31755243369736],"label":"Playing blackjack"}]},"iqyc2JM0uIU":{"duration":109.69,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iqyc2JM0uIU","annotations":[]},"ELiXlJUBzzw":{"duration":136.627,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ELiXlJUBzzw","annotations":[{"segment":[7.673235060014646,11.509852590021968],"label":"Hand car wash"},{"segment":[19.18308765003661,60.959589643449675],"label":"Hand car wash"},{"segment":[72.04315139680416,74.60089641680905],"label":"Hand car wash"},{"segment":[78.01122311014889,85.25816723868955],"label":"Hand car wash"},{"segment":[92.9314022901716,101.883509850234],"label":"Hand car wash"},{"segment":[103.16238235881436,121.49288831513262],"label":"Hand car wash"},{"segment":[123.83748791419657,128.95297794851794],"label":"Hand car wash"}]},"Q0FbJovQ0Lw":{"duration":148.701,"subset":"validation","resolution":"400x224","url":"https://www.youtube.com/watch?v=Q0FbJovQ0Lw","annotations":[{"segment":[0.2,141.25110591900312],"label":"Playing drums"}]},"e7r6SP4_CUQ":{"duration":121.069,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=e7r6SP4_CUQ","annotations":[]},"TqG43Oee7gc":{"duration":178.423,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=TqG43Oee7gc","annotations":[]},"QHF28_yGOV8":{"duration":113.407,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QHF28_yGOV8","annotations":[{"segment":[26.36120482527301,32.022671633385336],"label":"Surfing"},{"segment":[38.21490095475819,112.87549448673947],"label":"Surfing"}]},"opAcl-kukdU":{"duration":179.24,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=opAcl-kukdU","annotations":[{"segment":[0.01,7.270055504260987],"label":"Cheerleading"},{"segment":[13.701258450338013,168.6093623775351],"label":"Cheerleading"}]},"P3HENCwV5iY":{"duration":23.546,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=P3HENCwV5iY","annotations":[]},"Ufki4AjZHFI":{"duration":12.144,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Ufki4AjZHFI","annotations":[{"segment":[0.01,10.882467941581274],"label":"Powerbocking"}]},"im6KJzbZzKE":{"duration":135.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=im6KJzbZzKE","annotations":[{"segment":[4.85650751200116,107.68777380655226],"label":"Long jump"}]},"62M1FRXsUiM":{"duration":91.49,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=62M1FRXsUiM","annotations":[]},"zufK6CufVhA":{"duration":169.228,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=zufK6CufVhA","annotations":[{"segment":[50.80708658346334,77.728244149766],"label":"Shuffleboard"},{"segment":[95.41174960998441,101.49420358814352],"label":"Shuffleboard"}]},"dL--vW-AJJo":{"duration":213.229,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dL--vW-AJJo","annotations":[{"segment":[26.612024154435566,37.92213442007068],"label":"Changing car wheel"},{"segment":[45.573091364470905,50.89549619535802],"label":"Changing car wheel"},{"segment":[56.55055092043681,74.84631739469577],"label":"Changing car wheel"},{"segment":[96.13593656474258,104.7848443525741],"label":"Changing car wheel"},{"segment":[117.42555573478938,153.68443838377533],"label":"Changing car wheel"}]},"EoguY2ethBo":{"duration":54.405,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EoguY2ethBo","annotations":[]},"hhoewBggtuw":{"duration":198.322,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hhoewBggtuw","annotations":[{"segment":[16.18531573611108,157.96126433424192],"label":"Changing car wheel"}]},"nt8MurhX7HY":{"duration":91.069,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nt8MurhX7HY","annotations":[{"segment":[16.2552488299532,82.99808814352573],"label":"Tug of war"}]},"VgQ6a7oVx7g":{"duration":62.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VgQ6a7oVx7g","annotations":[{"segment":[8.6365527,59.62137285491419],"label":"Using parallel bars"}]},"hsJct3UsbAs":{"duration":211.023,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hsJct3UsbAs","annotations":[{"segment":[20.410955953884415,202.7927228580343],"label":"Waterskiing"}]},"TrAU4xsihas":{"duration":80.968,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=TrAU4xsihas","annotations":[]},"8NcRuVMSQLs":{"duration":221.64,"subset":"training","resolution":"582x480","url":"https://www.youtube.com/watch?v=8NcRuVMSQLs","annotations":[{"segment":[32.49687051482059,38.719675507020284],"label":"High jump"},{"segment":[48.05388299531981,64.99374102964119],"label":"High jump"},{"segment":[100.60201404056163,105.7876848673947],"label":"High jump"},{"segment":[120.65327457098284,140.01311232449297],"label":"High jump"},{"segment":[136.90170982839314,144.8530717628705],"label":"High jump"},{"segment":[202.24116224648986,220.5638658346334],"label":"High jump"}]},"I9_eKiGZPxc":{"duration":170.946,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I9_eKiGZPxc","annotations":[{"segment":[34.12687675507021,94.64875975039003],"label":"Rafting"}]},"6ECElQXTBIY":{"duration":77.578,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6ECElQXTBIY","annotations":[{"segment":[13.252296093603746,70.25532312636506],"label":"Baton twirling"}]},"lu7viu9nf6g":{"duration":84.17,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=lu7viu9nf6g","annotations":[{"segment":[14.0301872074883,21.373088923556942],"label":"Archery"},{"segment":[26.486895475819033,44.05741029641186],"label":"Archery"},{"segment":[48.25335413416536,68.05296411856474],"label":"Archery"}]},"M_CU9guT3C0":{"duration":98.27000000000001,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=M_CU9guT3C0","annotations":[{"segment":[30.459215049999997,88.6805249],"label":"Washing dishes"}]},"feqYwvoENzk":{"duration":84.75,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=feqYwvoENzk","annotations":[{"segment":[46.673394561622466,75.10053289235569],"label":"Playing piano"}]},"3L0MnbQkLWM":{"duration":151.42000000000002,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3L0MnbQkLWM","annotations":[{"segment":[22.204724433697347,151.417323],"label":"Hopscotch"}]},"p-l6as8o1f4":{"duration":123.53,"subset":"validation","resolution":"638x360","url":"https://www.youtube.com/watch?v=p-l6as8o1f4","annotations":[{"segment":[5.8503808,116.42990639625584],"label":"Horseback riding"}]},"16ZtFro1khg":{"duration":90.489,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=16ZtFro1khg","annotations":[]},"nxDAt9SkPoA":{"duration":182.811,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nxDAt9SkPoA","annotations":[{"segment":[7.697358814352575,79.5393744149766],"label":"Rope skipping"},{"segment":[92.65339313572542,142.27416380655225],"label":"Rope skipping"}]},"Ls-0SqAeXW0":{"duration":96.6,"subset":"validation","resolution":"160x120","url":"https://www.youtube.com/watch?v=Ls-0SqAeXW0","annotations":[{"segment":[4.141474258970359,38.02626365054603],"label":"Playing squash"},{"segment":[48.417599063962555,65.13409516380656],"label":"Playing squash"},{"segment":[75.52543057722309,92.84432293291732],"label":"Playing squash"}]},"Kj2SF5bCxJ8":{"duration":114.28999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Kj2SF5bCxJ8","annotations":[{"segment":[13.57801198654076,97.95912504261553],"label":"Making a sandwich"}]},"WYjtYDyHUxY":{"duration":140.16,"subset":"training","resolution":"592x360","url":"https://www.youtube.com/watch?v=WYjtYDyHUxY","annotations":[{"segment":[0,22.52110988299532],"label":"Paintball"}]},"dnZ5wcF69lI":{"duration":78.67,"subset":"testing","resolution":"416x240","url":"https://www.youtube.com/watch?v=dnZ5wcF69lI","annotations":[]},"ibDWgL2D2l4":{"duration":24.566,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ibDWgL2D2l4","annotations":[{"segment":[0,4.785686427457098],"label":"Hitting a pinata"}]},"qKA-mFjzpok":{"duration":200.876,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=qKA-mFjzpok","annotations":[]},"SYp68jyl0-Q":{"duration":97.28999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SYp68jyl0-Q","annotations":[]},"QTPz2j16KFk":{"duration":95.272,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QTPz2j16KFk","annotations":[{"segment":[0.20000000000000004,19.7676502488671],"label":"Playing rubik cube"},{"segment":[25.26692137073238,40.724332091651014],"label":"Playing rubik cube"}]},"dZSjgAYlpS0":{"duration":62.067,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=dZSjgAYlpS0","annotations":[{"segment":[0.2,50.350714134165365],"label":"Peeling potatoes"}]},"jE0XAclET5w":{"duration":167.161,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jE0XAclET5w","annotations":[{"segment":[0.01,158.81482333385335],"label":"Playing rubik cube"}]},"r1BcU3OOz34":{"duration":191.1,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=r1BcU3OOz34","annotations":[]},"qn7LRqyyjVE":{"duration":223.957,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qn7LRqyyjVE","annotations":[]},"lIo8HfzzSNs":{"duration":163.05,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lIo8HfzzSNs","annotations":[]},"Y6eNXuhsFzM":{"duration":63.32,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y6eNXuhsFzM","annotations":[]},"nTNkGOtp7aQ":{"duration":34.67,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=nTNkGOtp7aQ","annotations":[{"segment":[5.8409956755070205,25.58139772698908],"label":"Springboard diving"}]},"qKG1mU0Feug":{"duration":80.92,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=qKG1mU0Feug","annotations":[{"segment":[0.01,80.921563],"label":"Playing water polo"}]},"at_ahUtnF9Q":{"duration":236.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=at_ahUtnF9Q","annotations":[{"segment":[163.60327464527154,224.03331125117006],"label":"Ping-pong"}]},"iHzwizJqnUc":{"duration":64.92,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=iHzwizJqnUc","annotations":[]},"I-ji2Cw-rD8":{"duration":186.921,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I-ji2Cw-rD8","annotations":[]},"spZ_RrpyNJw":{"duration":112.083,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=spZ_RrpyNJw","annotations":[{"segment":[8.715605511936502,29.63306483631947],"label":"Playing pool"},{"segment":[36.256926982509206,106.50472816446876],"label":"Playing pool"}]},"Da9uVKDLwVY":{"duration":238.4,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Da9uVKDLwVY","annotations":[{"segment":[17.077392523364484,222.00610280373832],"label":"Grooming dog"}]},"cxF2YQeHDZE":{"duration":113.105,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=cxF2YQeHDZE","annotations":[]},"arbdFQpe0fk":{"duration":128.732,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=arbdFQpe0fk","annotations":[{"segment":[0.01,47.20153197263022],"label":"Hitting a pinata"},{"segment":[98.53369895082336,111.36674069537166],"label":"Hitting a pinata"},{"segment":[113.97345229973301,128.53145],"label":"Hitting a pinata"}]},"HjNVTXM5JKk":{"duration":174.382,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HjNVTXM5JKk","annotations":[]},"980U7SiE0Nk":{"duration":125.179,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=980U7SiE0Nk","annotations":[{"segment":[0.01,125.141],"label":"Baton twirling"}]},"ArGF2CfOxkY":{"duration":57.934,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ArGF2CfOxkY","annotations":[{"segment":[18.02067004680187,30.39576677067083],"label":"Calf roping"}]},"iyOyZJm7fVU":{"duration":103.654,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=iyOyZJm7fVU","annotations":[{"segment":[83.198001875195,89.82796898283931],"label":"Baking cookies"}]},"U9b8U-EymNw":{"duration":160.938,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U9b8U-EymNw","annotations":[{"segment":[0.009999999999999998,160.93750566893425],"label":"Wakeboarding"}]},"sTMbpIfI-6o":{"duration":150.234,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=sTMbpIfI-6o","annotations":[{"segment":[40.766761310452424,127.9403213728549],"label":"Making a lemonade"}]},"dZZE8HI0OBE":{"duration":13.17,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dZZE8HI0OBE","annotations":[{"segment":[1.9306973822152886,11.527676855532135],"label":"Putting on makeup"}]},"CSCB9NjpUCk":{"duration":181.882,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=CSCB9NjpUCk","annotations":[]},"diZvvLb46O8":{"duration":201.131,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=diZvvLb46O8","annotations":[{"segment":[11.606859594383774,192.31655694227769],"label":"Rope skipping"}]},"stkWKPWbQpE":{"duration":158.244,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=stkWKPWbQpE","annotations":[{"segment":[11.812924165550855,154.30636564446033],"label":"Baton twirling"}]},"O4LdjwlsGKI":{"duration":94.83,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O4LdjwlsGKI","annotations":[{"segment":[41.541093603744145,56.02873478939157],"label":"Skateboarding"},{"segment":[80.12552574102963,86.18668174726989],"label":"Skateboarding"}]},"e8w3gmiv-mM":{"duration":148.515,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=e8w3gmiv-mM","annotations":[{"segment":[16.676368174726992,28.025563182527303],"label":"Fun sliding down"},{"segment":[51.41880187207489,58.13567238689548],"label":"Fun sliding down"},{"segment":[88.70901404056164,90.79356006240249],"label":"Fun sliding down"},{"segment":[125.99922620904837,133.19278471138844],"label":"Fun sliding down"}]},"nxNir9CE3zI":{"duration":62.32,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nxNir9CE3zI","annotations":[{"segment":[10.007711388455538,58.97748361934477],"label":"Kayaking"}]},"FklvvNrpsUk":{"duration":172.153,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=FklvvNrpsUk","annotations":[{"segment":[0.19999999999999998,172.121],"label":"Blow-drying hair"}]},"y-rgla4aNUo":{"duration":226.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y-rgla4aNUo","annotations":[{"segment":[0,226.11532879818594],"label":"Playing guitarra"}]},"9Om-YIcAE7k":{"duration":182.56,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9Om-YIcAE7k","annotations":[]},"0CZuZZ17mck":{"duration":198.74,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0CZuZZ17mck","annotations":[]},"makIgB4X3q8":{"duration":151.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=makIgB4X3q8","annotations":[{"segment":[58.918096723868956,72.82276755070203],"label":"Brushing teeth"}]},"FsQWaEejutU":{"duration":19.95,"subset":"training","resolution":"136x240","url":"https://www.youtube.com/watch?v=FsQWaEejutU","annotations":[{"segment":[0.010000000000000002,17.59014976599064],"label":"Smoking hookah"}]},"WqnnGmL-lmU":{"duration":224.236,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WqnnGmL-lmU","annotations":[{"segment":[139.22850442725192,152.171857853906],"label":"Braiding hair"}]},"1kMPW5huHWQ":{"duration":56.309,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1kMPW5huHWQ","annotations":[{"segment":[0.2,6.924023400936037],"label":"Playing rubik cube"},{"segment":[10.429858034321374,21.297945397815912],"label":"Playing rubik cube"},{"segment":[28.747843993759748,48.46816380655226],"label":"Playing rubik cube"}]},"aEUbl9oT_0s":{"duration":209.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aEUbl9oT_0s","annotations":[{"segment":[0,3.264914196567863],"label":"Javelin throw"},{"segment":[55.830032761310456,209.301],"label":"Javelin throw"}]},"lEGetBydfl4":{"duration":16.86,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=lEGetBydfl4","annotations":[{"segment":[0.33465328,15.964637946957879],"label":"Chopping wood"}]},"53tCYiyAyf0":{"duration":28.213,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=53tCYiyAyf0","annotations":[{"segment":[10.563086302652106,20.377953992199686],"label":"Throwing darts"}]},"0y4mO86t4Z0":{"duration":169.88,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=0y4mO86t4Z0","annotations":[{"segment":[0,164.8418257628705],"label":"Horseback riding"}]},"PN99KIY7jRY":{"duration":5.13,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=PN99KIY7jRY","annotations":[{"segment":[0.816574287051482,3.378375971918877],"label":"Tennis serve with ball bouncing"}]},"S-VSs8o98Ho":{"duration":49.784,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=S-VSs8o98Ho","annotations":[{"segment":[7.5101191249810855,46.06723162575869],"label":"Playing blackjack"}]},"nd50oUKdygI":{"duration":128.059,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nd50oUKdygI","annotations":[{"segment":[30.96567508958862,41.953494664586586],"label":"Sharpening knives"},{"segment":[49.3453008673947,62.13112781279252],"label":"Sharpening knives"},{"segment":[73.71828348205929,80.91031113884556],"label":"Sharpening knives"}]},"72_9wLYUDGM":{"duration":48.37,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=72_9wLYUDGM","annotations":[]},"AKQqaMyZtf8":{"duration":153.855,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AKQqaMyZtf8","annotations":[{"segment":[18.032742732003467,132.32265309736067],"label":"Blowing leaves"}]},"X3c6Mshh7Lw":{"duration":197.602,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=X3c6Mshh7Lw","annotations":[]},"R1Q-KP8GHFE":{"duration":175.567,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R1Q-KP8GHFE","annotations":[{"segment":[17.74773302085868,137.8862608103799],"label":"Wakeboarding"}]},"gPtpPiea0iQ":{"duration":47.83,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=gPtpPiea0iQ","annotations":[{"segment":[16.33138689547582,27.29355070202808],"label":"Hammer throw"}]},"njsO-Nlwkmg":{"duration":36.9,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=njsO-Nlwkmg","annotations":[{"segment":[17.71118688,29.62970176],"label":"Vacuuming floor"},{"segment":[2.1835052800000003,15.1937728],"label":"Vacuuming floor"}]},"CsvEXvHlO3M":{"duration":88.817,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CsvEXvHlO3M","annotations":[{"segment":[79.18853666146646,88.541],"label":"Snow tubing"},{"segment":[0.2,82.2342496099844],"label":"Snow tubing"}]},"UcQ63-sQ9bI":{"duration":225.77,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=UcQ63-sQ9bI","annotations":[{"segment":[3.6098718098554285,216.91175187216072],"label":"Playing guitarra"}]},"KLN5w_G9TwI":{"duration":233.663,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KLN5w_G9TwI","annotations":[]},"mpRRbDm7FUM":{"duration":193.864,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mpRRbDm7FUM","annotations":[]},"zz69LCrUQHU":{"duration":95.16,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zz69LCrUQHU","annotations":[]},"p54LLMnyg54":{"duration":232.25,"subset":"testing","resolution":"300x240","url":"https://www.youtube.com/watch?v=p54LLMnyg54","annotations":[]},"8Tyu4-JnfDc":{"duration":178.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8Tyu4-JnfDc","annotations":[{"segment":[112.0624463971756,129.08812318479133],"label":"Baking cookies"},{"segment":[138.57784467297057,144.71825269473362],"label":"Baking cookies"}]},"iVVatZsgnGo":{"duration":160.416,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iVVatZsgnGo","annotations":[{"segment":[18.267196567862715,94.10030577223088],"label":"Scuba diving"},{"segment":[113.12058034321372,141.9012589703588],"label":"Scuba diving"}]},"c_RXwivt4JM":{"duration":80.202,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c_RXwivt4JM","annotations":[]},"ajA_cE6b9AY":{"duration":124.552,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ajA_cE6b9AY","annotations":[{"segment":[22.382747024928296,105.20278908100306],"label":"Rafting"}]},"ujiWstJ3tt0":{"duration":58.375,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ujiWstJ3tt0","annotations":[{"segment":[0.2,58.174965986394554],"label":"Plastering"}]},"CGpjeyRKB6o":{"duration":219.801,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CGpjeyRKB6o","annotations":[]},"A92F-HvSZx0":{"duration":61.696,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A92F-HvSZx0","annotations":[{"segment":[0.01,61.69544217687075],"label":"Playing congas"}]},"ozVKcVAgRMY":{"duration":129.73,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ozVKcVAgRMY","annotations":[]},"_OY-1VtINQQ":{"duration":57.772,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_OY-1VtINQQ","annotations":[{"segment":[0.8104352574102964,46.75129329173167],"label":"Using the rowing machine"}]},"ZIfB1zKleJU":{"duration":204.27,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZIfB1zKleJU","annotations":[]},"s0swzu1jIpc":{"duration":91.0,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s0swzu1jIpc","annotations":[{"segment":[32.08389114820593,42.16334367706708],"label":"Shaving"}]},"c-X500da7JU":{"duration":57.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c-X500da7JU","annotations":[{"segment":[8.932980992709096,44.3075855975039],"label":"Wrapping presents"}]},"hRoHfDR4-e0":{"duration":237.042,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hRoHfDR4-e0","annotations":[{"segment":[4.578748850041089,227.23884698088222],"label":"Bullfighting"}]},"lGESoAdgps8":{"duration":231.36,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=lGESoAdgps8","annotations":[{"segment":[12.813617004680186,29.056230109204364],"label":"Discus throw"},{"segment":[61.180509360374415,71.28702418096724],"label":"Discus throw"},{"segment":[79.9497511700468,87.1686903276131],"label":"Discus throw"},{"segment":[118.5710756630265,127.59474960998439],"label":"Discus throw"},{"segment":[0.2,231.167],"label":"Discus throw"},{"segment":[0.2,231.167],"label":"Discus throw"},{"segment":[186.42910374414976,191.12141419656786],"label":"Discus throw"},{"segment":[201.58887597503897,207.72497425897035],"label":"Discus throw"}]},"9pJBfTZOcxI":{"duration":63.623,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9pJBfTZOcxI","annotations":[{"segment":[9.81984243369735,54.95144149765991],"label":"Wakeboarding"}]},"UjiJbwQqgW0":{"duration":15.372,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UjiJbwQqgW0","annotations":[]},"yslrb52Di5w":{"duration":51.29,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=yslrb52Di5w","annotations":[{"segment":[5.294882496070726,20.954370208081805],"label":"Playing piano"}]},"9xOUoTQLea4":{"duration":226.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9xOUoTQLea4","annotations":[{"segment":[102.58257135725428,223.9129919625585],"label":"Drinking coffee"}]},"RzFqIN5hWJQ":{"duration":175.358,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=RzFqIN5hWJQ","annotations":[{"segment":[25.441825379137615,149.91527212653133],"label":"Baton twirling"}]},"E0U3v4WLuEA":{"duration":119.978,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E0U3v4WLuEA","annotations":[{"segment":[55.60388002897505,76.68857280449234],"label":"Assembling bicycle"}]},"ETHVjrG7S4k":{"duration":67.176,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=ETHVjrG7S4k","annotations":[{"segment":[26.744763837202843,57.98181253497022],"label":"Using the monkey bar"}]},"vlX9sU9bM9s":{"duration":4.51,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vlX9sU9bM9s","annotations":[{"segment":[0,4.411666],"label":"Javelin throw"}]},"yb2CXXx7_QU":{"duration":40.055,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yb2CXXx7_QU","annotations":[]},"ik8uSCSBv1U":{"duration":9.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ik8uSCSBv1U","annotations":[{"segment":[0,9.751666],"label":"Tennis serve with ball bouncing"}]},"27Pg4wHah2I":{"duration":93.55,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=27Pg4wHah2I","annotations":[{"segment":[0,1.7513816634298025],"label":"Playing piano"},{"segment":[8.173114429339078,11.675877756198684],"label":"Playing piano"},{"segment":[25.103137175827168,27.730209670971874],"label":"Playing piano"}]},"rua32XjJh54":{"duration":185.876,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=rua32XjJh54","annotations":[{"segment":[0.01,13.918931940951108],"label":"Curling"},{"segment":[64.08508247812907,80.90379190677831],"label":"Curling"},{"segment":[122.37060998086183,127.8801872074883],"label":"Curling"},{"segment":[155.71805108939054,165.5772945475642],"label":"Curling"}]},"jRXF5_vNUWE":{"duration":131.12,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jRXF5_vNUWE","annotations":[{"segment":[0.01,129.69104890171607],"label":"Cheerleading"}]},"96vBhCFBbQk":{"duration":138.345,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=96vBhCFBbQk","annotations":[]},"8DIfyvX7H8Y":{"duration":227.486,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8DIfyvX7H8Y","annotations":[{"segment":[177.8006164899657,209.3859555470654],"label":"Kite flying"}]},"S6VgTNGiIkg":{"duration":235.358,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=S6VgTNGiIkg","annotations":[{"segment":[13.582117004680187,211.458496099844],"label":"Playing pool"}]},"8sTJGmY-SGQ":{"duration":46.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8sTJGmY-SGQ","annotations":[{"segment":[9.824648588143525,46.648887],"label":"Using parallel bars"}]},"GaxyzK2mHqw":{"duration":156.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GaxyzK2mHqw","annotations":[{"segment":[43.10480520445308,51.18695618028802],"label":"Making a sandwich"},{"segment":[88.65874706825008,95.76124338034745],"label":"Making a sandwich"},{"segment":[105.80270368365755,111.43571800014858],"label":"Making a sandwich"},{"segment":[113.63993964430577,149.39733444617784],"label":"Making a sandwich"}]},"GaIvG8u1tzo":{"duration":122.93,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GaIvG8u1tzo","annotations":[{"segment":[32.85566128,104.35556918],"label":"Getting a piercing"}]},"OJYajhN1RHs":{"duration":237.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OJYajhN1RHs","annotations":[{"segment":[17.948404836193447,186.70041731669266],"label":"Brushing hair"}]},"-zp0Sban5Wk":{"duration":186.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-zp0Sban5Wk","annotations":[{"segment":[80.88578091,167.05022064898597],"label":"Preparing pasta"}]},"zymyLkiT-W0":{"duration":84.73,"subset":"testing","resolution":"408x720","url":"https://www.youtube.com/watch?v=zymyLkiT-W0","annotations":[]},"DfYDYCWkB-o":{"duration":73.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DfYDYCWkB-o","annotations":[{"segment":[7.020690252226833,34.863872495217485],"label":"Bathing dog"}]},"kdjLJTGj4H0":{"duration":89.165,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kdjLJTGj4H0","annotations":[{"segment":[29.072396904836193,84.99155267394697],"label":"Curling"}]},"z5bc9KKiAGI":{"duration":40.589,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=z5bc9KKiAGI","annotations":[{"segment":[0.2,34.5729883151326],"label":"Throwing darts"}]},"WZrKIuJaqTM":{"duration":50.713,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=WZrKIuJaqTM","annotations":[]},"NKZzGiS5hv0":{"duration":131.774,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=NKZzGiS5hv0","annotations":[{"segment":[51.23376086982879,56.76700738149287],"label":"Sumo"},{"segment":[69.26804579673393,85.45791521975103],"label":"Sumo"}]},"oEd7WT4mVSE":{"duration":65.97,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=oEd7WT4mVSE","annotations":[{"segment":[2.518072542901716,24.718222308892354],"label":"Smoking hookah"}]},"_p0eoCHZ8BA":{"duration":190.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_p0eoCHZ8BA","annotations":[{"segment":[70.91808872,177.63788611544462],"label":"Grooming horse"},{"segment":[3.27346576,69.23857254290171],"label":"Grooming horse"}]},"rWoSJ9YBy5E":{"duration":28.515,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rWoSJ9YBy5E","annotations":[{"segment":[7.432498281284088,23.624594780504058],"label":"Curling"}]},"7toItxBIVtk":{"duration":227.35,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=7toItxBIVtk","annotations":[{"segment":[0,224.86382961713028],"label":"Belly dance"}]},"Hv3We5wjaJE":{"duration":126.06,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Hv3We5wjaJE","annotations":[{"segment":[0.5897082683307333,83.94846645865834],"label":"Hand washing clothes"}]},"cht49ZCSDmY":{"duration":19.39,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cht49ZCSDmY","annotations":[{"segment":[1.9963363900156006,12.219998508580343],"label":"Washing hands"}]},"yOcWUk9cOws":{"duration":30.3,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=yOcWUk9cOws","annotations":[{"segment":[19.269578783151328,29.06926677067083],"label":"Playing field hockey"}]},"AnPLZtJs7zE":{"duration":95.713,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AnPLZtJs7zE","annotations":[{"segment":[25.04630173564753,82.74224588785047],"label":"Painting fence"}]},"3Vj0GcB_qh4":{"duration":66.08,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3Vj0GcB_qh4","annotations":[{"segment":[6.185708979379583,26.279226610000002],"label":"Playing violin"}]},"5U8RCiiZJM4":{"duration":97.13,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5U8RCiiZJM4","annotations":[]},"ZksAXEVmFrg":{"duration":88.33,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZksAXEVmFrg","annotations":[{"segment":[25.553290399999998,80.75905065678627],"label":"Using the balance beam"}]},"vxlbVkMj2Js":{"duration":177.63,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vxlbVkMj2Js","annotations":[]},"WHYEBsWp5qY":{"duration":21.11,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=WHYEBsWp5qY","annotations":[{"segment":[3.62209527301092,19.55931447425897],"label":"Long jump"}]},"KgccYb6ufPY":{"duration":84.03,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=KgccYb6ufPY","annotations":[{"segment":[3.341653666146646,14.61154446177847],"label":"Long jump"},{"segment":[17.88767550702028,24.833073322932915],"label":"Long jump"},{"segment":[36.234009360374415,44.35881435257411],"label":"Long jump"},{"segment":[55.10452418096724,77.6443057722309],"label":"Long jump"}]},"sZ95YHZtVCc":{"duration":85.15,"subset":"validation","resolution":"324x240","url":"https://www.youtube.com/watch?v=sZ95YHZtVCc","annotations":[{"segment":[10.626845079563182,12.951467440717629],"label":"Polishing shoes"},{"segment":[22.78129913931357,25.43801040920437],"label":"Polishing shoes"},{"segment":[36.33052661575663,37.26037556021841],"label":"Polishing shoes"},{"segment":[54.528998814508576,57.58421677488299],"label":"Polishing shoes"}]},"JA-B3KK9A-I":{"duration":238.03,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JA-B3KK9A-I","annotations":[{"segment":[12.254158999013022,13.368173453468753],"label":"Plataform diving"},{"segment":[18.566907574262157,23.394303543570313],"label":"Plataform diving"},{"segment":[25.622332452481775,28.593037664363717],"label":"Plataform diving"},{"segment":[33.42043363367188,40.84719666337674],"label":"Plataform diving"},{"segment":[51.24466490496355,62.0134712980356],"label":"Plataform diving"},{"segment":[62.384809449520844,85.40777484160591],"label":"Plataform diving"},{"segment":[106.57404947626478,121.05623738418926],"label":"Plataform diving"},{"segment":[131.0823674742908,195.32386768123786],"label":"Plataform diving"},{"segment":[202.75063062402495,225.03091970358815],"label":"Plataform diving"}]},"x0PE_98UO3s":{"duration":123.44,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=x0PE_98UO3s","annotations":[{"segment":[12.489633385335413,119.5161840873635],"label":"Archery"}]},"OVpvpvmV2pg":{"duration":106.53,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=OVpvpvmV2pg","annotations":[{"segment":[15.539547122020936,85.34285885335413],"label":"Fixing bicycle"}]},"eC90hOqQ0yk":{"duration":38.058,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eC90hOqQ0yk","annotations":[{"segment":[9.594084112149531,29.72981619937695],"label":"Sumo"}]},"KKAPxOMogDE":{"duration":6.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KKAPxOMogDE","annotations":[{"segment":[0.01,6.989205],"label":"Drinking coffee"}]},"8hkVzhEKqpY":{"duration":68.11,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8hkVzhEKqpY","annotations":[{"segment":[16.29750114040562,58.87273213260531],"label":"Volleyball"}]},"bUKp023w6QM":{"duration":198.57999999999998,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bUKp023w6QM","annotations":[]},"fRmHJKlQmmw":{"duration":23.89,"subset":"training","resolution":"414x240","url":"https://www.youtube.com/watch?v=fRmHJKlQmmw","annotations":[{"segment":[2.45715612,12.162257410296412],"label":"Using the pommel horse"}]},"JBnL0T2XzGs":{"duration":114.428,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=JBnL0T2XzGs","annotations":[{"segment":[0.2,13.735190327613104],"label":"Curling"},{"segment":[18.55142589703588,21.227112324492982],"label":"Curling"},{"segment":[26.043347893915755,27.827138845553822],"label":"Curling"},{"segment":[31.03796255850234,40.49205460218409],"label":"Curling"},{"segment":[50.490113884555385,58.69698751950079],"label":"Curling"},{"segment":[67.08227145085803,72.43458034321374],"label":"Curling"},{"segment":[107.58140873634946,114.161],"label":"Curling"}]},"J9gOPvgsFdg":{"duration":207.262,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=J9gOPvgsFdg","annotations":[]},"OWyqpSBJH8M":{"duration":60.37,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=OWyqpSBJH8M","annotations":[{"segment":[11.678804022909215,50.10583654290171],"label":"Wrapping presents"}]},"2qN85UKyL2s":{"duration":77.16,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=2qN85UKyL2s","annotations":[{"segment":[0.01,77.081],"label":"Playing pool"}]},"xmb7j1e0wts":{"duration":199.692,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xmb7j1e0wts","annotations":[{"segment":[5.296016471138846,191.27965372230892],"label":"Baking cookies"}]},"0IFJNGqjsUQ":{"duration":118.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0IFJNGqjsUQ","annotations":[{"segment":[12.712384755070202,20.634595544461778],"label":"Walking the dog"},{"segment":[89.53940566614665,94.32957963182527],"label":"Walking the dog"}]},"gwpQuO5DPOA":{"duration":22.083,"subset":"validation","resolution":"274x240","url":"https://www.youtube.com/watch?v=gwpQuO5DPOA","annotations":[{"segment":[0.01,22.022],"label":"Welding"}]},"vynLNpomc30":{"duration":64.85,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vynLNpomc30","annotations":[{"segment":[0,64.57751273616387],"label":"Playing bagpipes"}]},"Gxjfc5O6AAc":{"duration":133.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Gxjfc5O6AAc","annotations":[{"segment":[12.549453978159127,80.52566302652106],"label":"Getting a haircut"}]},"uwxuvGci98g":{"duration":151.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uwxuvGci98g","annotations":[{"segment":[70.49085490795632,87.52220240249609],"label":"Cricket"}]},"nYxjWwJrHwk":{"duration":71.982,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nYxjWwJrHwk","annotations":[{"segment":[14.257483619344775,61.857271450858036],"label":"Snow tubing"}]},"sRYDJ379UYc":{"duration":69.753,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sRYDJ379UYc","annotations":[{"segment":[7.853595804149117,30.199461847762826],"label":"Using the rowing machine"},{"segment":[34.00585281354245,57.22618614687578],"label":"Using the rowing machine"}]},"Z5eMyGnyuKg":{"duration":182.649,"subset":"testing","resolution":"1920x816","url":"https://www.youtube.com/watch?v=Z5eMyGnyuKg","annotations":[]},"aYfWH960W0Q":{"duration":99.428,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aYfWH960W0Q","annotations":[{"segment":[0,62.82066706708268],"label":"Running a marathon"}]},"KEXm-3H6eTg":{"duration":69.06,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=KEXm-3H6eTg","annotations":[{"segment":[16.67035881435257,61.5190015600624],"label":"Mixing drinks"}]},"cHHVpBYfwdU":{"duration":228.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cHHVpBYfwdU","annotations":[{"segment":[3.2080486240249613,227.41500245865834],"label":"Walking the dog"}]},"H-RuFL---Kc":{"duration":118.771,"subset":"testing","resolution":"1916x1080","url":"https://www.youtube.com/watch?v=H-RuFL---Kc","annotations":[]},"CphkHGSqvYY":{"duration":77.23,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CphkHGSqvYY","annotations":[]},"CrnNXizH0IQ":{"duration":102.49000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CrnNXizH0IQ","annotations":[{"segment":[40.34290104,82.35334691999999],"label":"Polishing forniture"}]},"mIi5fkvHDAo":{"duration":75.743,"subset":"training","resolution":"320x208","url":"https://www.youtube.com/watch?v=mIi5fkvHDAo","annotations":[{"segment":[12.849266355140186,36.55344236760124],"label":"Waterskiing"}]},"tGX22SWP82Y":{"duration":190.892,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tGX22SWP82Y","annotations":[]},"SOyOank50IY":{"duration":61.77,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=SOyOank50IY","annotations":[{"segment":[2.601649151326053,61.37964849609985],"label":"Washing dishes"}]},"asgrHR8b--E":{"duration":221.844,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=asgrHR8b--E","annotations":[]},"2Is_nJdG2to":{"duration":18.72,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=2Is_nJdG2to","annotations":[{"segment":[2.91149255,14.8604586],"label":"Brushing hair"}]},"Tw1vg9qWLx0":{"duration":150.094,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Tw1vg9qWLx0","annotations":[{"segment":[0.2,3.8044164529778683],"label":"Croquet"},{"segment":[50.79639644746788,54.53705157140738],"label":"Croquet"},{"segment":[116.72544300690163,150.093785],"label":"Croquet"}]},"nNoy9buW2Xg":{"duration":109.621,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=nNoy9buW2Xg","annotations":[]},"HCFF0svChQY":{"duration":31.23,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HCFF0svChQY","annotations":[{"segment":[3.453302652106084,29.961048361934477],"label":"Triple jump"}]},"h3qKte2gv14":{"duration":90.05,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=h3qKte2gv14","annotations":[{"segment":[28.938663652668556,89.3446102652106],"label":"Smoking a cigarette"}]},"ZSUgpFz2HQc":{"duration":236.66,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZSUgpFz2HQc","annotations":[]},"mcDVNexy6Ps":{"duration":41.727,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mcDVNexy6Ps","annotations":[{"segment":[4.23121170826833,21.741918624024958],"label":"Decorating the Christmas tree"}]},"3VoIt9XkPEM":{"duration":168.345,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3VoIt9XkPEM","annotations":[{"segment":[17.84347581903276,19.680304212168487],"label":"River tubing"},{"segment":[22.566748829953195,36.474163806552255],"label":"River tubing"},{"segment":[40.14782059282371,83.96929797191888],"label":"River tubing"},{"segment":[86.85574258970358,126.21635101404055],"label":"River tubing"},{"segment":[131.98924024961,136.18770514820594],"label":"River tubing"},{"segment":[139.07414976599063,160.3288783151326],"label":"River tubing"}]},"yweAN9o4QYI":{"duration":35.18,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=yweAN9o4QYI","annotations":[{"segment":[1.4524976599063961,33.90074726989079],"label":"Putting on makeup"}]},"h7Tq3Gsj6lY":{"duration":64.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h7Tq3Gsj6lY","annotations":[{"segment":[0.20010457016920133,5.803032534906839],"label":"Skateboarding"}]},"BAf6PXfx05c":{"duration":233.802,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=BAf6PXfx05c","annotations":[]},"B2RXamsVZ9A":{"duration":48.948,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=B2RXamsVZ9A","annotations":[]},"msiX-xky6Ac":{"duration":191.94,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=msiX-xky6Ac","annotations":[{"segment":[26.18390912,167.19131666224646],"label":"Hopscotch"}]},"XvM1rCVQWWY":{"duration":107.973,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=XvM1rCVQWWY","annotations":[{"segment":[18.154934579439253,91.46402492211838],"label":"Fixing the roof"}]},"MAXfslyf7Dw":{"duration":153.808,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MAXfslyf7Dw","annotations":[{"segment":[102.69395943837753,120.94506084243369],"label":"Sumo"}]},"ynispYtdDq8":{"duration":67.64,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=ynispYtdDq8","annotations":[{"segment":[13.516301369999999,65.83067862714508],"label":"Smoking a cigarette"}]},"8ltNqT5N-xs":{"duration":180.14,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8ltNqT5N-xs","annotations":[{"segment":[0.01,180.021],"label":"Belly dance"}]},"8ohisLftwZ4":{"duration":83.28999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8ohisLftwZ4","annotations":[{"segment":[0,83.289772],"label":"Playing squash"}]},"wrf4MfW4MGg":{"duration":16.7,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=wrf4MfW4MGg","annotations":[{"segment":[0.2344092457575854,16.695146],"label":"Putting on makeup"}]},"-kAzTQi_pQA":{"duration":91.67,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=-kAzTQi_pQA","annotations":[]},"BR4dhPz42mQ":{"duration":100.868,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BR4dhPz42mQ","annotations":[{"segment":[4.392360784904942,7.686633168819627],"label":"Drum corps"},{"segment":[18.510671001682166,58.041939608658396],"label":"Drum corps"},{"segment":[66.82666596576422,93.49458526412121],"label":"Drum corps"}]},"wlcU-u-xsH4":{"duration":34.761,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wlcU-u-xsH4","annotations":[{"segment":[0.01,17.840297191887675],"label":"Tug of war"}]},"pI-rY6fLM1Q":{"duration":126.549,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pI-rY6fLM1Q","annotations":[{"segment":[15.17223831775701,115.26960280373832],"label":"Ice fishing"}]},"VcthLhKIntA":{"duration":234.902,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VcthLhKIntA","annotations":[{"segment":[69.99752679832974,220.54905361952999],"label":"Hula hoop"}]},"5oD3-y66g_8":{"duration":146.703,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=5oD3-y66g_8","annotations":[{"segment":[36.85249474001049,138.3111100365964],"label":"Hula hoop"}]},"b3bawTEPLtA":{"duration":187.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b3bawTEPLtA","annotations":[{"segment":[7.602524683300257,177.7821138471139],"label":"Ballet"}]},"eMQ5sazq-q0":{"duration":140.853,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eMQ5sazq-q0","annotations":[{"segment":[5.9215373831775695,120.62390965732087],"label":"Cutting the grass"}]},"CvMXJpJZ6YA":{"duration":222.053,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CvMXJpJZ6YA","annotations":[]},"jto8_gMKUjE":{"duration":109.576,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jto8_gMKUjE","annotations":[{"segment":[11.467731860294819,55.74523712948518],"label":"Rafting"},{"segment":[58.53678029017161,94.30925713416536],"label":"Rafting"}]},"-Vu1wUnFZvU":{"duration":190.776,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=-Vu1wUnFZvU","annotations":[]},"LPeyqO2pRd4":{"duration":84.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LPeyqO2pRd4","annotations":[{"segment":[1.36395594,75.95917646957878],"label":"Mixing drinks"}]},"VyD5Z0S9tXU":{"duration":139.738,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VyD5Z0S9tXU","annotations":[{"segment":[13.733969244484063,33.57192481984993],"label":"Peeling potatoes"},{"segment":[44.90789943434472,90.68779691595827],"label":"Peeling potatoes"}]},"js9SDSngI7A":{"duration":113.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=js9SDSngI7A","annotations":[{"segment":[55.519020238360554,57.640766234731025],"label":"Layup drill in basketball"},{"segment":[64.71325252418096,66.4813741778471],"label":"Layup drill in basketball"},{"segment":[71.07849047737909,72.49298780031201],"label":"Layup drill in basketball"},{"segment":[86.81477319500779,87.69883402184087],"label":"Layup drill in basketball"}]},"sd2MhdKdQTo":{"duration":222.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sd2MhdKdQTo","annotations":[{"segment":[131.11765021349154,147.02742352],"label":"Playing harmonica"}]},"auxBRPzLiIo":{"duration":18.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=auxBRPzLiIo","annotations":[{"segment":[0.5285517223088924,17.831666],"label":"Spinning"}]},"EMDTvPUEr7E":{"duration":59.24,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EMDTvPUEr7E","annotations":[{"segment":[0.01,17.280292131413148],"label":"Hopscotch"},{"segment":[26.059050166088277,58.77147484266717],"label":"Hopscotch"}]},"DzCVpvVJCo8":{"duration":50.83,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=DzCVpvVJCo8","annotations":[{"segment":[2.2995723728549144,21.96488094071763],"label":"Getting a piercing"},{"segment":[32.51119561622465,45.7535606599064],"label":"Getting a piercing"}]},"0X34KkUlCAI":{"duration":101.611,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0X34KkUlCAI","annotations":[{"segment":[0.01,95.87598283931358],"label":"Tug of war"}]},"Ta70WnVT6Lc":{"duration":172.687,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ta70WnVT6Lc","annotations":[{"segment":[4.57769422776911,17.772224648985958],"label":"Longboarding"},{"segment":[23.69629953198128,31.77458346333853],"label":"Longboarding"},{"segment":[35.81372542901716,79.97501092043682],"label":"Longboarding"},{"segment":[92.90026521060842,158.36555382215286],"label":"Longboarding"}]},"ydMOC0TQO7k":{"duration":52.45,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ydMOC0TQO7k","annotations":[]},"1GQARL4coRo":{"duration":117.819,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1GQARL4coRo","annotations":[{"segment":[0,106.60558436817472],"label":"Surfing"}]},"CIAZbDkD0TE":{"duration":118.53999999999999,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=CIAZbDkD0TE","annotations":[{"segment":[17.189911076443057,41.41064586583464],"label":"Ironing clothes"},{"segment":[0.2,86.70353978159126],"label":"Ironing clothes"}]},"f0Z-qud2N74":{"duration":17.253,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=f0Z-qud2N74","annotations":[{"segment":[5.1985390015600625,12.479180187207488],"label":"Doing a powerbomb"}]},"wfB4vYKRvv0":{"duration":77.49,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wfB4vYKRvv0","annotations":[]},"0QyHaXliXKs":{"duration":158.57,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=0QyHaXliXKs","annotations":[{"segment":[46.49289859594383,54.653886115444614],"label":"Sumo"},{"segment":[100.65217940717628,116.24691107644306],"label":"Sumo"}]},"3nqzJlFsXXA":{"duration":79.67,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3nqzJlFsXXA","annotations":[]},"g_qHPA0RkAY":{"duration":101.727,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g_qHPA0RkAY","annotations":[{"segment":[6.506694967118412,71.41494476105575],"label":"Carving jack-o-lanterns"}]},"6w4OWjY6k_k":{"duration":53.731,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=6w4OWjY6k_k","annotations":[{"segment":[0.01,52.67644099312611],"label":"Shuffleboard"}]},"U36rsW_WhUA":{"duration":133.515,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U36rsW_WhUA","annotations":[{"segment":[21.498342061800376,126.07925016148884],"label":"Clipping cat claws"}]},"HguqDEvSN68":{"duration":32.114,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=HguqDEvSN68","annotations":[{"segment":[1.4514929797191887,30.731609984399377],"label":"Riding bumper cars"}]},"POafWgyeV0I":{"duration":169.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=POafWgyeV0I","annotations":[{"segment":[92.28272714664588,112.93711084087364],"label":"Dodgeball"},{"segment":[142.85948721840873,160.07147363026522],"label":"Dodgeball"},{"segment":[14.431588606864276,37.733970210608426],"label":"Dodgeball"}]},"1HWajOhmPVg":{"duration":26.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1HWajOhmPVg","annotations":[{"segment":[4.69741584,13.758739540000002],"label":"Hammer throw"}]},"USOc5S2-3zA":{"duration":153.3,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=USOc5S2-3zA","annotations":[{"segment":[4.065629271450858,153.29814058956916],"label":"Belly dance"}]},"6qghJRLpHpo":{"duration":35.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6qghJRLpHpo","annotations":[{"segment":[0,35.08535],"label":"Kayaking"}]},"09MaNbzc2TA":{"duration":143.616,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=09MaNbzc2TA","annotations":[{"segment":[8.286672386895477,19.260914196567864],"label":"Beach soccer"},{"segment":[28.891371294851794,38.969756630265216],"label":"Beach soccer"},{"segment":[43.225074882995315,52.40760374414977],"label":"Beach soccer"},{"segment":[62.0380608424337,88.91375507020281],"label":"Beach soccer"},{"segment":[96.30457098283931,125.88536973478938],"label":"Beach soccer"}]},"G9m5zxl2f08":{"duration":98.15,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=G9m5zxl2f08","annotations":[{"segment":[2.7561838963354455,94.78210176842448],"label":"Washing face"}]},"Vhf-vNRYQEg":{"duration":86.94,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vhf-vNRYQEg","annotations":[{"segment":[5.696212762796226,7.052453896795328],"label":"Playing accordion"},{"segment":[11.528049638992362,15.054276587390028],"label":"Playing accordion"},{"segment":[38.24599967550702,40.95848192199688],"label":"Playing accordion"},{"segment":[44.21346061778472,46.79031875195008],"label":"Playing accordion"},{"segment":[50.1809215600624,66.18456681435258],"label":"Playing accordion"},{"segment":[68.49017672386896,81.91696384399376],"label":"Playing accordion"}]},"FPf7mAgdy0I":{"duration":133.167,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FPf7mAgdy0I","annotations":[]},"wEI_iPEfLNk":{"duration":214.76,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wEI_iPEfLNk","annotations":[{"segment":[125.64042063182528,131.6711608221529],"label":"Washing face"}]},"JyfelXz6GaA":{"duration":123.902,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=JyfelXz6GaA","annotations":[{"segment":[13.890953271028037,118.07310280373832],"label":"Futsal"}]},"W40iaV960sw":{"duration":16.974,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W40iaV960sw","annotations":[{"segment":[0.2,16.721],"label":"Camel ride"}]},"XFlKGUFgBnc":{"duration":167.0,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XFlKGUFgBnc","annotations":[{"segment":[10.421086666595915,157.8794627794072],"label":"Zumba"}]},"RNA9HtKreug":{"duration":91.116,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RNA9HtKreug","annotations":[{"segment":[43.36115059748627,90.12317705395628],"label":"Table soccer"}]},"R_YZNqP1gSE":{"duration":119.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R_YZNqP1gSE","annotations":[{"segment":[42.74801521856792,52.06130829450865],"label":"Getting a haircut"},{"segment":[57.463017684867395,83.7265038876755],"label":"Getting a haircut"}]},"Amnp8434e90":{"duration":193.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Amnp8434e90","annotations":[{"segment":[6.337347893915757,105.62246489859594],"label":"Windsurfing"},{"segment":[117.99538221528861,170.8066146645866],"label":"Windsurfing"}]},"ADN1EPKzwlE":{"duration":19.598,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ADN1EPKzwlE","annotations":[]},"CV3pIbE8BnE":{"duration":217.11,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=CV3pIbE8BnE","annotations":[{"segment":[93.6576,203.10202808112325],"label":"Shaving legs"}]},"sVT71OQjHE0":{"duration":55.449,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sVT71OQjHE0","annotations":[{"segment":[13.40803899375975,55.44873015873016],"label":"Plastering"}]},"8tddzer_NfY":{"duration":189.081,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=8tddzer_NfY","annotations":[{"segment":[20.64837183963549,173.15134606864277],"label":"Baton twirling"}]},"R547-iOgb6g":{"duration":141.851,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=R547-iOgb6g","annotations":[{"segment":[35.62092538683882,49.535976788707984],"label":"Mooping floor"},{"segment":[73.16947678870797,89.73501417188555],"label":"Mooping floor"}]},"opCMfcAl7yw":{"duration":211.162,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=opCMfcAl7yw","annotations":[{"segment":[0.2,187.07226209048363],"label":"Swimming"}]},"yMK2zxDDs2A":{"duration":65.85,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yMK2zxDDs2A","annotations":[]},"2l50doS2wB4":{"duration":203.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2l50doS2wB4","annotations":[{"segment":[11.30527762,151.5200546021841],"label":"Cleaning shoes"}]},"Zp9mSiw8Vkw":{"duration":49.807,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zp9mSiw8Vkw","annotations":[{"segment":[0.2,48.30543213728549],"label":"Table soccer"}]},"nfjIQXyL7_Y":{"duration":11.75,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nfjIQXyL7_Y","annotations":[{"segment":[0.7148557628705148,11.749296],"label":"Doing a powerbomb"}]},"D74hnUdBs04":{"duration":128.36,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D74hnUdBs04","annotations":[{"segment":[5.807234585982079,128.35990929705216],"label":"Volleyball"}]},"mAWuiYWXmb8":{"duration":23.592,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=mAWuiYWXmb8","annotations":[{"segment":[0.01,20.768230109204367],"label":"Using the rowing machine"}]},"2TEJnQzCPUM":{"duration":72.818,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2TEJnQzCPUM","annotations":[{"segment":[3.4053510140405616,67.3309407176287],"label":"Rollerblading"}]},"eSQnn2RFxeY":{"duration":124.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eSQnn2RFxeY","annotations":[{"segment":[14.861805732449298,17.970157258190326],"label":"Sailing"}]},"3IdaN49b9xg":{"duration":36.99,"subset":"training","resolution":"484x480","url":"https://www.youtube.com/watch?v=3IdaN49b9xg","annotations":[{"segment":[0.01,27.98728893915757],"label":"Carving jack-o-lanterns"}]},"bsl3oM4svdQ":{"duration":49.273,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=bsl3oM4svdQ","annotations":[{"segment":[0.01,43.249115444617786],"label":"Kneeling"}]},"ERubh80hIuE":{"duration":194.05,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ERubh80hIuE","annotations":[]},"qemvXlRMQgM":{"duration":98.52000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qemvXlRMQgM","annotations":[{"segment":[79.77075827457098,87.14840065834633],"label":"Smoking hookah"},{"segment":[90.68352096723869,95.60194922308892],"label":"Smoking hookah"}]},"JUfowIpmwaE":{"duration":187.78,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=JUfowIpmwaE","annotations":[{"segment":[0.01,187.779794],"label":"Windsurfing"}]},"EVcn8N-YxtI":{"duration":175.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EVcn8N-YxtI","annotations":[{"segment":[9.29267421581217,170.27458930738183],"label":"Tango"}]},"y7tActqdm9A":{"duration":146.936,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=y7tActqdm9A","annotations":[{"segment":[5.583153620176227,144.01789661083043],"label":"Bullfighting"}]},"IjKWgD0y4rc":{"duration":147.378,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IjKWgD0y4rc","annotations":[{"segment":[12.645417979984504,147.37659863945578],"label":"Rafting"}]},"RrVsNvO6Yd4":{"duration":116.5,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=RrVsNvO6Yd4","annotations":[{"segment":[7.1224431599999996,111.67862714508581],"label":"Breakdancing"}]},"Ol2rfUv3zHI":{"duration":157.339,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ol2rfUv3zHI","annotations":[{"segment":[10.55083151326053,126.62607800312011],"label":"Playing drums"}]},"KZ9RARLLAys":{"duration":154.437,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KZ9RARLLAys","annotations":[]},"2xQVZorob1Q":{"duration":136.02,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2xQVZorob1Q","annotations":[{"segment":[37.336046451069485,127.99004645106947],"label":"Shoveling snow"}]},"5VHqMj5iikc":{"duration":148.655,"subset":"training","resolution":"1280x544","url":"https://www.youtube.com/watch?v=5VHqMj5iikc","annotations":[{"segment":[39.64238845553822,66.99795475819032],"label":"BMX"},{"segment":[109.43699219968799,136.79624024960998],"label":"BMX"}]},"lp_Rwbp8S1k":{"duration":170.3,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=lp_Rwbp8S1k","annotations":[{"segment":[43.55415288611544,167.04611076443058],"label":"Shoveling snow"}]},"SLrhe0T_ep0":{"duration":23.081,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=SLrhe0T_ep0","annotations":[]},"-xiKk3ZKDt8":{"duration":38.96,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-xiKk3ZKDt8","annotations":[]},"T1T3UvwZcKU":{"duration":6.223,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=T1T3UvwZcKU","annotations":[{"segment":[0.23785058068989423,3.150306670770232],"label":"Fun sliding down"}]},"i7L1KNGeEqU":{"duration":158.267,"subset":"training","resolution":"2560x1080","url":"https://www.youtube.com/watch?v=i7L1KNGeEqU","annotations":[{"segment":[0.2,13.082235569422776],"label":"Skiing"},{"segment":[21.96828237129485,140.96039157566304],"label":"Skiing"},{"segment":[142.19472074882998,152.06935413416537],"label":"Skiing"}]},"5imysrEC7ok":{"duration":65.64,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=5imysrEC7ok","annotations":[]},"3FAvxuTw4NI":{"duration":148.422,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3FAvxuTw4NI","annotations":[{"segment":[5.325592553822154,139.85469141341653],"label":"Kite flying"}]},"PfyOv5VFk74":{"duration":79.37,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=PfyOv5VFk74","annotations":[]},"ECjumI_otNw":{"duration":160.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ECjumI_otNw","annotations":[{"segment":[11.519447737909516,158.03646021840873],"label":"Cheerleading"}]},"LlZPcEaTNcM":{"duration":70.84,"subset":"testing","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=LlZPcEaTNcM","annotations":[]},"eS-VU_XV1K8":{"duration":123.9,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eS-VU_XV1K8","annotations":[]},"6YmZNDXap64":{"duration":142.339,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=6YmZNDXap64","annotations":[{"segment":[4.663189956169676,142.13832199546488],"label":"Playing rubik cube"}]},"j81ndpMC9gI":{"duration":91.766,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j81ndpMC9gI","annotations":[]},"S5MD51gg-vA":{"duration":193.03,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=S5MD51gg-vA","annotations":[{"segment":[6.3255865834633385,30.121840873634945],"label":"Tennis serve with ball bouncing"},{"segment":[40.06204836193447,50.303474258970354],"label":"Tennis serve with ball bouncing"},{"segment":[106.028879875195,180.12860842433696],"label":"Tennis serve with ball bouncing"}]},"FC5FsugGt8o":{"duration":137.788,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=FC5FsugGt8o","annotations":[{"segment":[0,137.441],"label":"Playing beach volleyball"}]},"bb9AIdvKkZU":{"duration":31.16,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bb9AIdvKkZU","annotations":[{"segment":[0.01,12.785319522620904],"label":"Shoveling snow"},{"segment":[21.827408614664584,31.161178],"label":"Shoveling snow"}]},"ZJnXQw84xvo":{"duration":79.413,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZJnXQw84xvo","annotations":[]},"aS6Qv9N5mG0":{"duration":26.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aS6Qv9N5mG0","annotations":[{"segment":[0,26.702946],"label":"Playing badminton"}]},"J8Ziy8QR8WQ":{"duration":51.74,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=J8Ziy8QR8WQ","annotations":[{"segment":[5.242691107644306,48.232758190327615],"label":"Cricket"}]},"U6KrVIJSeDo":{"duration":66.224,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U6KrVIJSeDo","annotations":[{"segment":[29.950530421216847,63.515780031201245],"label":"Using the rowing machine"}]},"cK1ssoaX768":{"duration":208.887,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=cK1ssoaX768","annotations":[{"segment":[0.01,208.841],"label":"Beach soccer"}]},"5LHIoaSvKCU":{"duration":70.148,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=5LHIoaSvKCU","annotations":[{"segment":[16.404290171606863,52.83688455538221],"label":"Making a lemonade"}]},"98YZQ0gNjpQ":{"duration":142.73,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=98YZQ0gNjpQ","annotations":[{"segment":[13.137659764893998,138.05676363108947],"label":"Doing motocross"}]},"ZXm8ed-ZHRs":{"duration":169.23,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZXm8ed-ZHRs","annotations":[{"segment":[6.940193000000001,161.921887675507],"label":"Using uneven bars"}]},"0y_5NIIvUzI":{"duration":202.896,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0y_5NIIvUzI","annotations":[{"segment":[28.39912400418955,194.06068069529525],"label":"Skiing"}]},"59nQKYriHQc":{"duration":50.55,"subset":"training","resolution":"576x360","url":"https://www.youtube.com/watch?v=59nQKYriHQc","annotations":[{"segment":[13.201142745709829,25.180687207488297],"label":"Bathing dog"}]},"NEthbdTgx-M":{"duration":237.517,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NEthbdTgx-M","annotations":[{"segment":[0.01,237.51691609977325],"label":"Snowboarding"}]},"28WDlVOgslg":{"duration":141.07999999999998,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=28WDlVOgslg","annotations":[]},"rCSFBiXxbVs":{"duration":56.24,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rCSFBiXxbVs","annotations":[{"segment":[24.390588049921995,44.39437968798752],"label":"Getting a haircut"}]},"uMw7YplUbAM":{"duration":126.78,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uMw7YplUbAM","annotations":[{"segment":[18.888581828987444,96.0251987222346],"label":"Playing accordion"}]},"4WGjeXTgpis":{"duration":93.46,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4WGjeXTgpis","annotations":[]},"pe0MhPhhVIk":{"duration":52.29,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=pe0MhPhhVIk","annotations":[{"segment":[0,52.291336],"label":"Playing badminton"}]},"pXcFBfv5Sf4":{"duration":84.382,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pXcFBfv5Sf4","annotations":[{"segment":[4.212483694227769,9.083167965678626],"label":"BMX"},{"segment":[17.37649523868955,24.221781241809673],"label":"BMX"},{"segment":[42.9146776349454,47.12716132917317],"label":"BMX"},{"segment":[50.023243868954765,63.58217575975039],"label":"BMX"},{"segment":[74.7715855725429,77.27274776599064],"label":"BMX"}]},"EBhAGOXvPcA":{"duration":79.715,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EBhAGOXvPcA","annotations":[{"segment":[56.406659461390134,64.09284188751766],"label":"Beer pong"},{"segment":[68.30784515345857,73.01873115656899],"label":"Beer pong"}]},"m--b-Ltjm_Y":{"duration":143.152,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=m--b-Ltjm_Y","annotations":[{"segment":[53.76480655226209,144],"label":"Powerbocking"}]},"4k-F7EXpcrM":{"duration":11.03,"subset":"training","resolution":"426x226","url":"https://www.youtube.com/watch?v=4k-F7EXpcrM","annotations":[{"segment":[0.2,10.781],"label":"Using the rowing machine"}]},"2NAs35b7fck":{"duration":158.616,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=2NAs35b7fck","annotations":[{"segment":[0.2,158.4149433106576],"label":"Futsal"}]},"4_MR2Tpc7SM":{"duration":11.982,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=4_MR2Tpc7SM","annotations":[{"segment":[7.046838712948519,9.046869859594384],"label":"Croquet"}]},"6kgJx6ahgq0":{"duration":72.66,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=6kgJx6ahgq0","annotations":[{"segment":[0,0],"label":"Playing kickball"}]},"5K7V1uJEYFM":{"duration":17.95,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5K7V1uJEYFM","annotations":[]},"-fMxoShIXiM":{"duration":117.168,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=-fMxoShIXiM","annotations":[{"segment":[44.4176003693209,95.41558519188767],"label":"Fixing the roof"}]},"cy-RLevyo_8":{"duration":142.292,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=cy-RLevyo_8","annotations":[{"segment":[22.856198127925122,124.71227145085803],"label":"Roof shingle removal"}]},"Z0eBz6QsI-c":{"duration":41.19,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z0eBz6QsI-c","annotations":[{"segment":[0.01,40.632236134561246],"label":"Playing racquetball"}]},"8fVB8gAjTO4":{"duration":75.698,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8fVB8gAjTO4","annotations":[{"segment":[6.652490558412063,70.06765651844003],"label":"Playing drums"}]},"SXZjg8ap1Uk":{"duration":186.18,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SXZjg8ap1Uk","annotations":[{"segment":[17.18549043049322,177.81183622488575],"label":"Cheerleading"}]},"rsJUOsr1u3o":{"duration":98.36,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=rsJUOsr1u3o","annotations":[{"segment":[3.5292890219010125,35.292890219010125],"label":"Making an omelette"},{"segment":[41.27733682136401,78.2581466302652],"label":"Making an omelette"}]},"ChH3zlLeWug":{"duration":159.38,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ChH3zlLeWug","annotations":[{"segment":[10.494511324141003,109.05263978702659],"label":"Shaving"}]},"iksziMLKsBQ":{"duration":43.05,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iksziMLKsBQ","annotations":[{"segment":[4.753533395801711,7.833288088762359],"label":"Playing pool"},{"segment":[15.934381395202232,26.177912948856974],"label":"Playing pool"}]},"ROrLMf0zXko":{"duration":49.71,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ROrLMf0zXko","annotations":[]},"UqE_T7oG1Gc":{"duration":196.047,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=UqE_T7oG1Gc","annotations":[{"segment":[23.94082968464575,180.59446985287005],"label":"Braiding hair"}]},"1dvrNvxw43Q":{"duration":47.323,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=1dvrNvxw43Q","annotations":[{"segment":[0.2,41.77518224299066],"label":"Ice fishing"}]},"AKk_3aRSVec":{"duration":145.8,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AKk_3aRSVec","annotations":[]},"03JdaRepHkA":{"duration":123.48,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=03JdaRepHkA","annotations":[{"segment":[21.191903276131043,86.30884243369735],"label":"Mixing drinks"}]},"efRG5AMu1dU":{"duration":30.14,"subset":"testing","resolution":"308x232","url":"https://www.youtube.com/watch?v=efRG5AMu1dU","annotations":[]},"V4ZKb2ne34U":{"duration":68.36,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=V4ZKb2ne34U","annotations":[]},"uZNgbdDLclM":{"duration":24.521,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=uZNgbdDLclM","annotations":[]},"tmOndbOAHvM":{"duration":193.91,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=tmOndbOAHvM","annotations":[]},"9OVSvoKwa0A":{"duration":111.316,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=9OVSvoKwa0A","annotations":[{"segment":[0.2,107.78951794071763],"label":"Riding bumper cars"}]},"87fIi0-1JVM":{"duration":208.73,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=87fIi0-1JVM","annotations":[{"segment":[45.739581237129485,119.63911818252731],"label":"Getting a piercing"}]},"sk6hmmlS8kY":{"duration":82.431,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sk6hmmlS8kY","annotations":[]},"SSldR9yOJq8":{"duration":236.217,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=SSldR9yOJq8","annotations":[{"segment":[9.949841128339012,233.6370101987753],"label":"Canoeing"}]},"soVddfhLIy0":{"duration":224.79,"subset":"testing","resolution":"844x480","url":"https://www.youtube.com/watch?v=soVddfhLIy0","annotations":[]},"nEAypudaSFs":{"duration":126.78,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nEAypudaSFs","annotations":[{"segment":[0.01,122.62744224054676],"label":"Playing harmonica"}]},"Eo3PUIfZSJU":{"duration":101.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Eo3PUIfZSJU","annotations":[{"segment":[7.103995553822153,99.14020461778472],"label":"Using the balance beam"}]},"FXN6qiDsClw":{"duration":87.377,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FXN6qiDsClw","annotations":[{"segment":[20.415114332337296,29.80606692521245],"label":"Welding"},{"segment":[44.913251531142045,53.35149796884735],"label":"Welding"},{"segment":[68.73088386292835,72.67780590654206],"label":"Welding"},{"segment":[80.02724695327103,81.38825455451713],"label":"Welding"}]},"wZZ1W6D1nwA":{"duration":58.561,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wZZ1W6D1nwA","annotations":[{"segment":[4.012923556942278,45.160834633385335],"label":"Rope skipping"}]},"PXBcPu2_KOo":{"duration":164.47,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PXBcPu2_KOo","annotations":[{"segment":[18.88149966,91.7252418096724],"label":"Using the balance beam"}]},"ReumF9EcX7c":{"duration":196.882,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ReumF9EcX7c","annotations":[]},"gvHQDDF7EGU":{"duration":172.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gvHQDDF7EGU","annotations":[{"segment":[1.8886026298194787,168.3554336349454],"label":"Playing saxophone"}]},"g_KbloQigw8":{"duration":17.903,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=g_KbloQigw8","annotations":[{"segment":[6.66701872074883,16.988344773790953],"label":"Welding"}]},"d7gY7YJ3Fdc":{"duration":12.354,"subset":"validation","resolution":"144x176","url":"https://www.youtube.com/watch?v=d7gY7YJ3Fdc","annotations":[{"segment":[0.7130444898595945,5.5694556099843995],"label":"Putting on shoes"}]},"KcgpoXEh44M":{"duration":98.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KcgpoXEh44M","annotations":[{"segment":[28.1357488299532,85.79476287051483],"label":"Wrapping presents"}]},"l1gyH11zqNk":{"duration":131.193,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l1gyH11zqNk","annotations":[]},"7dXqXFoju2M":{"duration":79.32,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=7dXqXFoju2M","annotations":[]},"yai74Fw-j24":{"duration":115.473,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=yai74Fw-j24","annotations":[]},"UlWLcqIvLKk":{"duration":131.17,"subset":"validation","resolution":"640x352","url":"https://www.youtube.com/watch?v=UlWLcqIvLKk","annotations":[{"segment":[3.6833875132605307,130.14635880187208],"label":"Ironing clothes"}]},"ytXLI-2wgaQ":{"duration":185.876,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ytXLI-2wgaQ","annotations":[{"segment":[17.6886426749587,169.3470052815718],"label":"Playing ice hockey"}]},"FhWxgGQtCJY":{"duration":117.586,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FhWxgGQtCJY","annotations":[{"segment":[0.8058846028376427,108.31817956078157],"label":"Spread mulch"}]},"QuU6I9w1btI":{"duration":82.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QuU6I9w1btI","annotations":[{"segment":[10.092724820592824,41.7942322698908],"label":"Vacuuming floor"}]},"fJEXzQ0L1uU":{"duration":185.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fJEXzQ0L1uU","annotations":[{"segment":[96.40491650670826,119.78455194695788],"label":"Hopscotch"}]},"exCENNu1qBU":{"duration":123.554,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=exCENNu1qBU","annotations":[{"segment":[0.2,119.51191302689301],"label":"Powerbocking"}]},"i-8vfNV-klk":{"duration":100.264,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i-8vfNV-klk","annotations":[{"segment":[0,5.318202779811873],"label":"Trimming branches or hedges"}]},"CbGBQPE9t6g":{"duration":56.402,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CbGBQPE9t6g","annotations":[]},"Fn0-GpW-Ufw":{"duration":65.2,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=Fn0-GpW-Ufw","annotations":[]},"CvkyjR4yk60":{"duration":197.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CvkyjR4yk60","annotations":[{"segment":[13.895023619344773,149.75747678627144],"label":"Breakdancing"}]},"xiICsWY0xOk":{"duration":111.967,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xiICsWY0xOk","annotations":[{"segment":[8.266684691912529,19.079647442952716],"label":"Making a lemonade"},{"segment":[21.695686818204372,93.54956727172475],"label":"Making a lemonade"}]},"Kv-A_H9TXug":{"duration":91.626,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Kv-A_H9TXug","annotations":[{"segment":[33.85328705148206,67.42089235569424],"label":"Doing crunches"}]},"kS_wMOFfMDk":{"duration":23.69,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=kS_wMOFfMDk","annotations":[{"segment":[1.551860817472699,22.834523457098285],"label":"Washing hands"}]},"a2IvS0xbNGQ":{"duration":176.867,"subset":"testing","resolution":"360x240","url":"https://www.youtube.com/watch?v=a2IvS0xbNGQ","annotations":[]},"JGxf-jke0e8":{"duration":27.12,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=JGxf-jke0e8","annotations":[]},"Pho9Hgdvc0g":{"duration":120.93,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Pho9Hgdvc0g","annotations":[]},"TmA7xnrULME":{"duration":64.134,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TmA7xnrULME","annotations":[{"segment":[0.2,32.04209672386895],"label":"Building sandcastles"},{"segment":[52.69816536661466,64.38799999999999],"label":"Building sandcastles"}]},"zoWqv-sIdBY":{"duration":100.5,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=zoWqv-sIdBY","annotations":[]},"L0arB1mOuSE":{"duration":91.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L0arB1mOuSE","annotations":[{"segment":[0.01,11.090146348711091],"label":"Playing kickball"},{"segment":[18.62575861129683,27.867547235222737],"label":"Playing kickball"},{"segment":[46.066761756184526,47.34639402718966],"label":"Playing kickball"},{"segment":[55.45073174355546,60.42707946413095],"label":"Playing kickball"},{"segment":[70.2375935418369,85.45099943045341],"label":"Playing kickball"}]},"vKYHz3PyXGM":{"duration":191.519,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vKYHz3PyXGM","annotations":[]},"AjtrZW0HR1g":{"duration":131.17,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AjtrZW0HR1g","annotations":[]},"mZe-MuljtiA":{"duration":171.248,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mZe-MuljtiA","annotations":[]},"l-YtPYZ_534":{"duration":104.328,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l-YtPYZ_534","annotations":[{"segment":[45.66335788161994,54.60102579439252],"label":"Croquet"},{"segment":[61.10114791277258,74.91390741433023],"label":"Croquet"},{"segment":[82.87655700934579,84.01407838006232],"label":"Croquet"}]},"BSKolF3MMe4":{"duration":178.051,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=BSKolF3MMe4","annotations":[{"segment":[0.01,175.94284718081568],"label":"Blowing leaves"}]},"x86YIU9TIPw":{"duration":37.99,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=x86YIU9TIPw","annotations":[{"segment":[2.0123120124804994,34.56441809672387],"label":"Playing lacrosse"}]},"gCf7wYa4AFA":{"duration":130.2,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gCf7wYa4AFA","annotations":[{"segment":[23.580737279999997,54.70882074],"label":"Playing field hockey"},{"segment":[63.25605684,101.74921488],"label":"Playing field hockey"}]},"KBnNTxNygkI":{"duration":223.21,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KBnNTxNygkI","annotations":[]},"DzjOS3pQTXs":{"duration":172.9,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=DzjOS3pQTXs","annotations":[]},"Bg_CMSihJl0":{"duration":38.36,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Bg_CMSihJl0","annotations":[{"segment":[3.3512080873634944,27.22856570982839],"label":"Knitting"}]},"iixZLnyLuN8":{"duration":61.673,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=iixZLnyLuN8","annotations":[]},"FZk40J_drws":{"duration":151.302,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FZk40J_drws","annotations":[{"segment":[23.095825545171337,113.59375309345793],"label":"Running a marathon"}]},"5dN-MpXG9OE":{"duration":9.5,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=5dN-MpXG9OE","annotations":[]},"dufQjqa3v6w":{"duration":55.59,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dufQjqa3v6w","annotations":[{"segment":[29.398635304212167,55.58857],"label":"Playing harmonica"}]},"TY_xri4VWPg":{"duration":10.26,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TY_xri4VWPg","annotations":[]},"eyBSKNXo6Vo":{"duration":124.714,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eyBSKNXo6Vo","annotations":[{"segment":[18.53227634702235,74.86728740232714],"label":"Waterskiing"},{"segment":[79.52949521380063,81.47208180191458],"label":"Waterskiing"},{"segment":[86.13428961338809,92.15630803654136],"label":"Waterskiing"},{"segment":[96.42999853039207,104.78312085928209],"label":"Waterskiing"}]},"crz2vi_dguk":{"duration":172.478,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=crz2vi_dguk","annotations":[{"segment":[14.261036857800843,23.40962654016365],"label":"Cleaning sink"},{"segment":[25.83131204431851,163.0601571575663],"label":"Cleaning sink"}]},"Q9UovyMsTLc":{"duration":154.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q9UovyMsTLc","annotations":[{"segment":[34.3199556396057,115.67985915904583],"label":"Making a cake"}]},"uaLMHEtFlNA":{"duration":175.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uaLMHEtFlNA","annotations":[{"segment":[4.6361568,173.19086680577223],"label":"Mowing the lawn"}]},"J7Q3b1uFHyc":{"duration":115.056,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=J7Q3b1uFHyc","annotations":[{"segment":[24.78000233478727,106.66174918017128],"label":"Snow tubing"}]},"NlbnZy3zyVk":{"duration":181.26,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NlbnZy3zyVk","annotations":[]},"4w_ScRrkkmU":{"duration":183.531,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=4w_ScRrkkmU","annotations":[]},"a5Xc9ZgN2yo":{"duration":19.48,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=a5Xc9ZgN2yo","annotations":[{"segment":[0.5457566302652106,18.88924336973479],"label":"Hopscotch"}]},"yggwFa68Cp4":{"duration":110.2,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=yggwFa68Cp4","annotations":[]},"pYaZLHpl_Ww":{"duration":197.6,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pYaZLHpl_Ww","annotations":[]},"Acogo5mgX4Q":{"duration":86.286,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Acogo5mgX4Q","annotations":[{"segment":[26.030146417445483,74.08235514018692],"label":"Raking leaves"}]},"G0VWk5WqDp0":{"duration":6.29,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=G0VWk5WqDp0","annotations":[]},"OhVBwILhXvs":{"duration":37.872,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=OhVBwILhXvs","annotations":[]},"6QrVxwNUbBk":{"duration":184.878,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6QrVxwNUbBk","annotations":[{"segment":[40.95558774731941,81.33433623059209],"label":"Skiing"},{"segment":[108.73420127281283,177.0896529828393],"label":"Skiing"}]},"eQMT-WmxKM8":{"duration":67.129,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=eQMT-WmxKM8","annotations":[{"segment":[2.5639383775351012,53.738055382215286],"label":"Baton twirling"}]},"At7fzSxdiCg":{"duration":43.61,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=At7fzSxdiCg","annotations":[{"segment":[2.480405616224649,42.234851794071766],"label":"Triple jump"}]},"ygmYkKbS-4U":{"duration":113.035,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ygmYkKbS-4U","annotations":[{"segment":[0.01,38.089648190009235],"label":"Mooping floor"}]},"VOLBBe1br3Q":{"duration":12.28,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=VOLBBe1br3Q","annotations":[{"segment":[0.01,4.733211677067083],"label":"Cricket"}]},"Yx5dmoGLD-4":{"duration":160.497,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=Yx5dmoGLD-4","annotations":[]},"hnO1Fse52oA":{"duration":217.386,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hnO1Fse52oA","annotations":[]},"_-4ngMPCA9A":{"duration":154.25,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=_-4ngMPCA9A","annotations":[{"segment":[30.0031675,88.09988750000001],"label":"Washing dishes"}]},"XOtnVABMQhM":{"duration":168.322,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XOtnVABMQhM","annotations":[]},"GlJ4DvArV6Q":{"duration":133.61,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GlJ4DvArV6Q","annotations":[{"segment":[13.76033073322933,118.63073010920436],"label":"Playing polo"}]},"kN2ZNcn241g":{"duration":40.19,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=kN2ZNcn241g","annotations":[{"segment":[4.138513010920437,29.91016221528861],"label":"Using uneven bars"}]},"ZBP_mLWROZU":{"duration":160.381,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZBP_mLWROZU","annotations":[]},"oHUsS3B4P4g":{"duration":66.34,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oHUsS3B4P4g","annotations":[]},"B8KJJecq2F0":{"duration":166.84,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B8KJJecq2F0","annotations":[{"segment":[5.59586859392743,84.71884778248274],"label":"Cricket"},{"segment":[85.00076415000002,159.1569137296104],"label":"Cricket"}]},"AB5cWhlQ0Kk":{"duration":90.976,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AB5cWhlQ0Kk","annotations":[]},"I-slndrzKIc":{"duration":126.48,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I-slndrzKIc","annotations":[]},"1X4hgrBjw-U":{"duration":78.136,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1X4hgrBjw-U","annotations":[{"segment":[68.74917682371296,78.135146],"label":"Having an ice cream"}]},"KRES3eBM2l4":{"duration":40.705,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KRES3eBM2l4","annotations":[{"segment":[0.2,20.384040341653666],"label":"Disc dog"}]},"_qv7KzCGLMc":{"duration":92.0,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=_qv7KzCGLMc","annotations":[]},"Y7yGeNl7POk":{"duration":21.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y7yGeNl7POk","annotations":[{"segment":[0,21.315917],"label":"Hopscotch"}]},"m5NK0eErs90":{"duration":75.81,"subset":"validation","resolution":"632x360","url":"https://www.youtube.com/watch?v=m5NK0eErs90","annotations":[{"segment":[6.918423556942278,68.88857644305773],"label":"Playing water polo"}]},"BTEv-FJHs8c":{"duration":71.054,"subset":"testing","resolution":"192x144","url":"https://www.youtube.com/watch?v=BTEv-FJHs8c","annotations":[]},"jPaeFy4Phz4":{"duration":204.429,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jPaeFy4Phz4","annotations":[{"segment":[52.667399904257174,193.41099448058117],"label":"Braiding hair"}]},"-npRRmY2wBs":{"duration":114.661,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-npRRmY2wBs","annotations":[{"segment":[12.159482059282372,106.59287987519501],"label":"Bullfighting"}]},"JNr0oI927ng":{"duration":12.68,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JNr0oI927ng","annotations":[{"segment":[7.5867177,11.891611544461778],"label":"Springboard diving"}]},"RG0wtfLnNrE":{"duration":52.222,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=RG0wtfLnNrE","annotations":[{"segment":[2.6963601092256395,26.73619943737494],"label":"Rafting"},{"segment":[29.57874800658178,52.221678],"label":"Rafting"}]},"ursegSrjG1M":{"duration":115.264,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ursegSrjG1M","annotations":[{"segment":[79.83931442155647,95.48350296879876],"label":"Spread mulch"}]},"5BYayNkmlBo":{"duration":29.977,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=5BYayNkmlBo","annotations":[{"segment":[3.1878642294967507,5.755983855664975],"label":"Calf roping"}]},"ltlJ2sOx4as":{"duration":183.25,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ltlJ2sOx4as","annotations":[]},"byO53jHoAwI":{"duration":135.63,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=byO53jHoAwI","annotations":[{"segment":[4.866518474258971,135.627754],"label":"Playing bagpipes"}]},"cAmGHiqmnaw":{"duration":25.078,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cAmGHiqmnaw","annotations":[{"segment":[5.655123432887954,19.539427942996817],"label":"Calf roping"}]},"bWBcYdYEup4":{"duration":146.449,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bWBcYdYEup4","annotations":[{"segment":[0.2,131.55534477379098],"label":"Doing fencing"}]},"2dFLbFsFyuY":{"duration":97.71000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2dFLbFsFyuY","annotations":[{"segment":[4.420535102964119,22.712404494539783],"label":"Shot put"},{"segment":[30.638881230889233,40.394544906396256],"label":"Shot put"},{"segment":[49.235615112324496,63.10694940093604],"label":"Shot put"}]},"ecUypvzBAOQ":{"duration":157.73,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=ecUypvzBAOQ","annotations":[{"segment":[4.673851794071763,16.481477379095164],"label":"Preparing pasta"},{"segment":[40.09672854914197,149.33620436817472],"label":"Preparing pasta"}]},"exxji9vqjSw":{"duration":185.621,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=exxji9vqjSw","annotations":[{"segment":[23.992559190031148,143.39267289719626],"label":"Snowboarding"},{"segment":[153.51110747663552,173.74797663551402],"label":"Snowboarding"}]},"g2GZd1160m4":{"duration":52.919,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=g2GZd1160m4","annotations":[{"segment":[12.79615074102964,49.86370998439937],"label":"Capoeira"}]},"iJ6rHJf_Hgo":{"duration":134.165,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iJ6rHJf_Hgo","annotations":[{"segment":[7.30289236828974,118.51555891883407],"label":"Longboarding"}]},"p1pkFRHG7UY":{"duration":35.13,"subset":"training","resolution":"624x352","url":"https://www.youtube.com/watch?v=p1pkFRHG7UY","annotations":[{"segment":[8.19451248049922,18.77338143525741],"label":"Bungee jumping"}]},"GQ1eaxmDlzQ":{"duration":106.67,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=GQ1eaxmDlzQ","annotations":[{"segment":[15.367415,92.4443716],"label":"Tango"}]},"-ibmoYKRxQk":{"duration":121.905,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-ibmoYKRxQk","annotations":[{"segment":[17.490190327613107,113.51475351014041],"label":"Snow tubing"}]},"TFIlTCvL4oQ":{"duration":221.96,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TFIlTCvL4oQ","annotations":[{"segment":[72.71685610281554,136.43067287861584],"label":"Snow tubing"}]},"mixxaL3Bdv8":{"duration":99.614,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mixxaL3Bdv8","annotations":[]},"MW76ZGSmOR0":{"duration":33.182,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=MW76ZGSmOR0","annotations":[]},"CQvmcZSfyhI":{"duration":199.82999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CQvmcZSfyhI","annotations":[{"segment":[0.01,7.947698894695788],"label":"Cheerleading"},{"segment":[32.258307278471136,196.19907663572542],"label":"Cheerleading"}]},"5XiBb9XGsg8":{"duration":125.829,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=5XiBb9XGsg8","annotations":[{"segment":[10.583742132366964,70.55828088244643],"label":"Playing rubik cube"}]},"f9eW4rFQG-A":{"duration":195.35,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=f9eW4rFQG-A","annotations":[{"segment":[27.269016380655227,31.534560842433695],"label":"Sailing"}]},"dZ4y7J3ASEE":{"duration":29.03,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dZ4y7J3ASEE","annotations":[{"segment":[0.75810688,29.024399],"label":"Playing kickball"}]},"1lagsBNqNe4":{"duration":163.68,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=1lagsBNqNe4","annotations":[{"segment":[31.27094383775351,140.46269109],"label":"Ironing clothes"}]},"G5frRzhSNJ8":{"duration":232.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G5frRzhSNJ8","annotations":[{"segment":[0.2,11.952908326053043],"label":"Getting a haircut"},{"segment":[23.543607308892355,153.21455217940718],"label":"Getting a haircut"},{"segment":[164.08083247581902,192.33316124648985],"label":"Getting a haircut"}]},"adLA8Fu4Wlc":{"duration":57.656,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=adLA8Fu4Wlc","annotations":[]},"_O6ONcIPR_s":{"duration":40.148,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_O6ONcIPR_s","annotations":[{"segment":[0.1876801872074883,38.849798751950075],"label":"Kite flying"}]},"ZUZ7EVzHS5E":{"duration":51.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZUZ7EVzHS5E","annotations":[{"segment":[0,50.68716707644305],"label":"Playing polo"}]},"_UlKLq9w_nw":{"duration":167.161,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_UlKLq9w_nw","annotations":[{"segment":[19.069284519219323,43.012356170309666],"label":"Shuffleboard"}]},"K68iNoSnZMg":{"duration":77.903,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=K68iNoSnZMg","annotations":[{"segment":[0.01,28.18493735830534],"label":"Peeling potatoes"},{"segment":[41.89682660752652,76.35856595955144],"label":"Peeling potatoes"}]},"adUapXmi-xA":{"duration":121.116,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=adUapXmi-xA","annotations":[{"segment":[8.691580299531982,103.54317400312011],"label":"Playing rubik cube"}]},"ot-Y1sa-ujc":{"duration":136.09,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ot-Y1sa-ujc","annotations":[{"segment":[74.73391241185648,82.37715345397815],"label":"Bungee jumping"},{"segment":[87.04802297971919,107.64231134321372],"label":"Bungee jumping"}]},"PHIZza_xVog":{"duration":69.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PHIZza_xVog","annotations":[{"segment":[14.347613599782088,40.88530491967978],"label":"Applying sunscreen"}]},"wfFnIYdSjCI":{"duration":26.12,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=wfFnIYdSjCI","annotations":[{"segment":[0.996855694227769,24.73690717628705],"label":"Playing lacrosse"}]},"hJf7uOUiEFo":{"duration":185.04,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hJf7uOUiEFo","annotations":[{"segment":[100.13792043681747,131.01618408736348],"label":"Blow-drying hair"},{"segment":[154.11939781591263,172.8792496099844],"label":"Blow-drying hair"}]},"ibIRzsC9NjY":{"duration":37.872,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ibIRzsC9NjY","annotations":[{"segment":[0.19999999999999998,36.59547581903276],"label":"Camel ride"}]},"35WvCw9Qcqk":{"duration":182.486,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=35WvCw9Qcqk","annotations":[{"segment":[8.540662442824244,101.91857181770264],"label":"Longboarding"},{"segment":[107.32765803149132,113.02143299337415],"label":"Longboarding"},{"segment":[126.11711540570465,130.95682412330507],"label":"Longboarding"},{"segment":[142.3443736349454,170.81324836193448],"label":"Longboarding"}]},"yBL1hCKmX7s":{"duration":15.47,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yBL1hCKmX7s","annotations":[{"segment":[0.2171250278582572,9.915376272193745],"label":"Javelin throw"}]},"pG7TQ7HfWfg":{"duration":115.33,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pG7TQ7HfWfg","annotations":[]},"wVCDyGGog4I":{"duration":233.27,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wVCDyGGog4I","annotations":[{"segment":[28.90244538906081,225.47206426600783],"label":"Tai chi"}]},"g0L0LnF3JiY":{"duration":83.337,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g0L0LnF3JiY","annotations":[]},"JYRVSy54UUY":{"duration":134.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JYRVSy54UUY","annotations":[{"segment":[36.47289391575663,62.96041341653666],"label":"Playing accordion"},{"segment":[69.68740249609984,122.87265990639625],"label":"Playing accordion"}]},"zrwpgILg7VI":{"duration":159.75,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=zrwpgILg7VI","annotations":[{"segment":[30.40546198718697,152.52576013244612],"label":"Breakdancing"}]},"1abFB1HE4T8":{"duration":69.915,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=1abFB1HE4T8","annotations":[]},"z9uVyf0D8eM":{"duration":176.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=z9uVyf0D8eM","annotations":[{"segment":[94.70927394,96.10346061],"label":"Playing bagpipes"}]},"TfFVoogTDJY":{"duration":60.163,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TfFVoogTDJY","annotations":[{"segment":[0.2,57.819220839813376],"label":"BMX"}]},"Yl85vnsndx0":{"duration":219.99,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Yl85vnsndx0","annotations":[{"segment":[0.01,199.39434987363495],"label":"Belly dance"}]},"e3DB_y78IHc":{"duration":60.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=e3DB_y78IHc","annotations":[]},"OApnN8Jw8Z0":{"duration":28.44,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=OApnN8Jw8Z0","annotations":[]},"0IxId05Eo6k":{"duration":85.171,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0IxId05Eo6k","annotations":[{"segment":[25.701690327613107,43.89875273010921],"label":"Tug of war"},{"segment":[49.34458892355695,67.27600078003121],"label":"Tug of war"}]},"-DaX_1bBNAo":{"duration":22.872,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-DaX_1bBNAo","annotations":[{"segment":[8.017511700468019,17.10402496099844],"label":"Throwing darts"}]},"dnQcp43wbRY":{"duration":143.337,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dnQcp43wbRY","annotations":[{"segment":[0.2,134.9916443057722],"label":"Assembling bicycle"}]},"OyKEEws65l8":{"duration":104.978,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=OyKEEws65l8","annotations":[{"segment":[4.897855127762614,96.81431644323749],"label":"Camel ride"}]},"M7Lc8nh9auA":{"duration":48.089,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=M7Lc8nh9auA","annotations":[{"segment":[0.01,43.13713241029641],"label":"Rollerblading"}]},"uVL5BdceGR4":{"duration":203.175,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uVL5BdceGR4","annotations":[]},"K2kgYzeLl7U":{"duration":211.14,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=K2kgYzeLl7U","annotations":[]},"BhAQhPasmhU":{"duration":154.51,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BhAQhPasmhU","annotations":[{"segment":[0,154.505578],"label":"Discus throw"}]},"Vf02ZDnbduk":{"duration":72.029,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Vf02ZDnbduk","annotations":[{"segment":[27.020904984423673,64.4868769470405],"label":"Elliptical trainer"}]},"bpXFhOs6Yhg":{"duration":63.136,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bpXFhOs6Yhg","annotations":[]},"BshSmo_rRII":{"duration":43.47,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=BshSmo_rRII","annotations":[{"segment":[27.853745709828395,34.76635413416537],"label":"Polishing shoes"}]},"ScWdvpS6epQ":{"duration":149.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ScWdvpS6epQ","annotations":[{"segment":[7.444251576865796,149.11766439909297],"label":"Cheerleading"}]},"z1tV0-C3IBw":{"duration":15.28,"subset":"validation","resolution":"810x480","url":"https://www.youtube.com/watch?v=z1tV0-C3IBw","annotations":[{"segment":[0.01,15.278729],"label":"Mowing the lawn"}]},"yAd6iTYWX_g":{"duration":198.717,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=yAd6iTYWX_g","annotations":[]},"68m6uCPwwNo":{"duration":9.823,"subset":"training","resolution":"632x480","url":"https://www.youtube.com/watch?v=68m6uCPwwNo","annotations":[{"segment":[0.19999999999999998,9.622040816326532],"label":"Croquet"}]},"QudbWIbFWGs":{"duration":102.17,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=QudbWIbFWGs","annotations":[]},"5vwd9hV4PRY":{"duration":212.57999999999998,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=5vwd9hV4PRY","annotations":[]},"0bbL30qENMg":{"duration":201.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0bbL30qENMg","annotations":[{"segment":[16.386117260530423,24.894293530421216],"label":"Washing face"},{"segment":[79.72476282527302,132.03429100312013],"label":"Washing face"},{"segment":[144.9541142277691,159.44952565054604],"label":"Washing face"}]},"ruNII4WvE3k":{"duration":162.192,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=ruNII4WvE3k","annotations":[{"segment":[19.446817757009345,159.11032710280372],"label":"Grooming dog"}]},"aqpeiIMe47Q":{"duration":173.5,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=aqpeiIMe47Q","annotations":[{"segment":[75.51694305772232,173.49950113378685],"label":"Preparing salad"}]},"PxGggNnMGtQ":{"duration":63.46,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PxGggNnMGtQ","annotations":[{"segment":[1.5816952511700468,56.34789332293292],"label":"Mixing drinks"}]},"YAm1JXAtqh4":{"duration":27.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YAm1JXAtqh4","annotations":[{"segment":[0.3579414976599064,19.307785491419654],"label":"Javelin throw"}]},"j9ww01sQkvY":{"duration":9.637,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j9ww01sQkvY","annotations":[{"segment":[0.7054597507788161,9.200996323987539],"label":"Applying sunscreen"}]},"ZBAQx9DxYTo":{"duration":86.08,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZBAQx9DxYTo","annotations":[{"segment":[58.85135179407176,76.96978393135726],"label":"Archery"}]},"2vy0dMXhlWI":{"duration":110.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2vy0dMXhlWI","annotations":[{"segment":[3.4587103276131046,108.25763325429016],"label":"Chopping wood"}]},"AItKK-a8eEI":{"duration":237.378,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AItKK-a8eEI","annotations":[{"segment":[74.76812897345285,110.62322710429396],"label":"Surfing"}]},"ri5WJu1K0ak":{"duration":176.01,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ri5WJu1K0ak","annotations":[{"segment":[75.5101333305033,155.3085768],"label":"Putting on makeup"}]},"EYkkKSVHbmM":{"duration":183.183,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=EYkkKSVHbmM","annotations":[]},"LJdI1neOr2c":{"duration":89.68,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LJdI1neOr2c","annotations":[{"segment":[2.307125576443058,88.71946534867395],"label":"Using the balance beam"}]},"sX8Rr1o4XWM":{"duration":74.792,"subset":"training","resolution":"293x240","url":"https://www.youtube.com/watch?v=sX8Rr1o4XWM","annotations":[{"segment":[20.87151170046802,50.03505304212168],"label":"Plastering"},{"segment":[55.28348517940717,68.57951326053042],"label":"Plastering"}]},"hiYPv3MrrUw":{"duration":56.8,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hiYPv3MrrUw","annotations":[{"segment":[10.24773442,44.50767321000001],"label":"Windsurfing"}]},"Gp2bIEiDWTI":{"duration":186.155,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gp2bIEiDWTI","annotations":[{"segment":[32.38098747351254,46.61119721523554],"label":"Braiding hair"},{"segment":[50.38655898344777,93.65801309603405],"label":"Braiding hair"},{"segment":[99.75667441391532,140.99524142054116],"label":"Braiding hair"}]},"O-hUa9e9_DE":{"duration":207.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O-hUa9e9_DE","annotations":[{"segment":[29.793216848673946,189.46425117004682],"label":"Preparing salad"}]},"EBdkGs_jFDw":{"duration":219.66,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=EBdkGs_jFDw","annotations":[{"segment":[47.633146904107456,200.47043840937312],"label":"Cleaning windows"}]},"8inrvRctXQ0":{"duration":197.39,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8inrvRctXQ0","annotations":[{"segment":[11.391209048361935,183.79869734789392],"label":"Playing squash"}]},"cIKAwgMLKw4":{"duration":98.94,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cIKAwgMLKw4","annotations":[{"segment":[57.57364106864274,75.01552160686428],"label":"Shaving legs"}]},"YaHTgeeDs7s":{"duration":39.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YaHTgeeDs7s","annotations":[{"segment":[0.2,37.56201120431481],"label":"Riding bumper cars"}]},"WNgI2qBvPrY":{"duration":60.16,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=WNgI2qBvPrY","annotations":[]},"ySu0ymIa8yo":{"duration":211.766,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ySu0ymIa8yo","annotations":[]},"bWZkD_s1940":{"duration":159.893,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=bWZkD_s1940","annotations":[]},"OIA7lPraPSM":{"duration":218.383,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OIA7lPraPSM","annotations":[{"segment":[0.2,212.89099063962558],"label":"Longboarding"}]},"KnHUAc20WEU":{"duration":159.73,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KnHUAc20WEU","annotations":[{"segment":[84.640927,131.06804879999999],"label":"Hammer throw"}]},"lyjz4sNglQg":{"duration":111.874,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lyjz4sNglQg","annotations":[{"segment":[0.7667361786694393,106.01866602290619],"label":"Decorating the Christmas tree"}]},"YZQ_qh9wC4w":{"duration":184.575,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=YZQ_qh9wC4w","annotations":[{"segment":[46.647765547581905,157.22024684555382],"label":"Swimming"}]},"8yeUJm0Pl24":{"duration":60.651,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8yeUJm0Pl24","annotations":[{"segment":[17.50444024960998,22.51922583463339],"label":"Beer pong"}]},"uHPrH22rPgU":{"duration":12.121,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uHPrH22rPgU","annotations":[{"segment":[0.01,12.120816],"label":"Throwing darts"}]},"nQjxgOz1WYo":{"duration":33.019,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nQjxgOz1WYo","annotations":[{"segment":[0.01,27.816128705148202],"label":"Playing blackjack"}]},"D2Trvi7Im_s":{"duration":42.261,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D2Trvi7Im_s","annotations":[{"segment":[0.01,39.458198127925115],"label":"Rollerblading"}]},"uOUjBTlwoxg":{"duration":55.543,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uOUjBTlwoxg","annotations":[{"segment":[10.657850487051483,47.05051068673947],"label":"Snowboarding"}]},"3nLA8R3ULSE":{"duration":128.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3nLA8R3ULSE","annotations":[{"segment":[0,116.4371965678627],"label":"Doing motocross"}]},"3JBUWRKezUI":{"duration":123.88,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3JBUWRKezUI","annotations":[]},"1Xt1gZfZ2Rw":{"duration":160.218,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1Xt1gZfZ2Rw","annotations":[{"segment":[15.36837987519501,17.867303432137287],"label":"Waxing skis"},{"segment":[52.602340873634944,59.59932683307333],"label":"Waxing skis"},{"segment":[63.347712168486744,82.58942355694228],"label":"Waxing skis"}]},"Htp7EK8IB18":{"duration":38.36,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Htp7EK8IB18","annotations":[{"segment":[0,38.359364],"label":"Snow tubing"}]},"2bS6JfPF1mk":{"duration":23.453,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=2bS6JfPF1mk","annotations":[{"segment":[0.20000000000000004,22.11600624024961],"label":"Using the monkey bar"}]},"VtS4vy8Z0RQ":{"duration":14.652,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VtS4vy8Z0RQ","annotations":[{"segment":[0,13.480516380655226],"label":"Beer pong"}]},"b9AaHQu2bA8":{"duration":83.848,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b9AaHQu2bA8","annotations":[]},"DAv8CEings8":{"duration":47.137,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=DAv8CEings8","annotations":[{"segment":[7.206517450858034,8.677235297971919],"label":"Shuffleboard"},{"segment":[16.766183457098283,47.136507],"label":"Shuffleboard"}]},"gLPJ7_VhWVU":{"duration":216.76,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gLPJ7_VhWVU","annotations":[{"segment":[57.63706829407176,205.36374773400937],"label":"Putting on makeup"}]},"fwwo0GsYB7c":{"duration":81.874,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=fwwo0GsYB7c","annotations":[{"segment":[8.467919692412986,67.76886195876813],"label":"Roof shingle removal"}]},"F3jJVS3NHf8":{"duration":207.355,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F3jJVS3NHf8","annotations":[{"segment":[46.246556942277685,175.9479313572543],"label":"Making a cake"}]},"Sl-9a4w9UMo":{"duration":130.845,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=Sl-9a4w9UMo","annotations":[]},"vifcEi_nBio":{"duration":139.2,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vifcEi_nBio","annotations":[]},"MRzsZN5p9QY":{"duration":65.85,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MRzsZN5p9QY","annotations":[{"segment":[21.984719778471142,54.448137769110765],"label":"Snatch"},{"segment":[60.50934555850234,64.618638975039],"label":"Snatch"}]},"oxp7V1AKJWk":{"duration":136.58,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=oxp7V1AKJWk","annotations":[{"segment":[27.273340054690625,69.03564201343563],"label":"Playing ten pins"},{"segment":[73.72324733533559,110.15872406240248],"label":"Playing ten pins"},{"segment":[131.46602078627146,136.379772],"label":"Playing ten pins"}]},"DPkrjs9-pwQ":{"duration":173.41,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=DPkrjs9-pwQ","annotations":[]},"je6wJ_Ky5wg":{"duration":77.49,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=je6wJ_Ky5wg","annotations":[{"segment":[0,77.441],"label":"Doing step aerobics"}]},"uF9othvTXn8":{"duration":98.105,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uF9othvTXn8","annotations":[{"segment":[0.20000000000000004,97.877],"label":"Riding bumper cars"}]},"oGU7m7rCZ-Y":{"duration":120.65,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=oGU7m7rCZ-Y","annotations":[]},"C3Mdjku7ZmM":{"duration":127.153,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=C3Mdjku7ZmM","annotations":[{"segment":[46.9566879875195,125.01970514820593],"label":"Cleaning sink"}]},"xWIKFgygROg":{"duration":94.09,"subset":"training","resolution":"624x360","url":"https://www.youtube.com/watch?v=xWIKFgygROg","annotations":[{"segment":[13.21451915,22.73236007],"label":"Archery"},{"segment":[27.18737952,34.053429429017164],"label":"Archery"}]},"FcfoTk3UK5g":{"duration":166.464,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FcfoTk3UK5g","annotations":[{"segment":[15.054755070202809,166.381],"label":"Doing fencing"}]},"ZQs-OWTZA6o":{"duration":31.649,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZQs-OWTZA6o","annotations":[{"segment":[0.01,31.058149959685352],"label":"Snow tubing"}]},"Kd3YFwF-2UM":{"duration":213.462,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Kd3YFwF-2UM","annotations":[{"segment":[5.994225858830272,22.644853244469914],"label":"Playing ten pins"},{"segment":[56.27912056346199,72.59673540138884],"label":"Playing ten pins"},{"segment":[147.85757118448004,179.82677576490812],"label":"Playing ten pins"},{"segment":[199.80752761310453,203.13765307332292],"label":"Playing ten pins"}]},"Fi_ja3Urdtg":{"duration":55.66,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=Fi_ja3Urdtg","annotations":[{"segment":[24.354063962558502,33.45433697347894],"label":"Mixing drinks"}]},"PgoRelvwBUI":{"duration":45.883,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PgoRelvwBUI","annotations":[{"segment":[9.448528904836195,45.682629],"label":"Changing car wheel"}]},"PL1JmxPH7y4":{"duration":47.764,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=PL1JmxPH7y4","annotations":[{"segment":[15.71824561358487,34.46651318102607],"label":"Doing crunches"}]},"a0Zlu4AvdnI":{"duration":52.18,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=a0Zlu4AvdnI","annotations":[{"segment":[29.75047919578783,42.69254812090484],"label":"Mowing the lawn"}]},"aVH9QsSATKM":{"duration":136.42000000000002,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aVH9QsSATKM","annotations":[{"segment":[33.944693842175454,43.73438611013828],"label":"Getting a tattoo"},{"segment":[64.59068702884169,73.52910170828602],"label":"Getting a tattoo"},{"segment":[82.46751638773034,109.06994102893367],"label":"Getting a tattoo"}]},"Qxquum8p8S8":{"duration":40.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Qxquum8p8S8","annotations":[{"segment":[1.0142881747269892,40.43492063492063],"label":"Washing hands"}]},"kB0h-fHb_WU":{"duration":33.298,"subset":"training","resolution":"427x240","url":"https://www.youtube.com/watch?v=kB0h-fHb_WU","annotations":[{"segment":[0.01,28.17585491419657],"label":"Powerbocking"}]},"x0cSfzhQRAs":{"duration":28.909,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=x0cSfzhQRAs","annotations":[{"segment":[0.2,17.724141819032763],"label":"Slacklining"}]},"wOZdXVdiB04":{"duration":134.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wOZdXVdiB04","annotations":[{"segment":[7.131566302652106,126.48066389859595],"label":"Dodgeball"}]},"7xpkFhlxo2Q":{"duration":186.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7xpkFhlxo2Q","annotations":[{"segment":[2.757282371294852,7.691366614664586],"label":"Playing squash"},{"segment":[16.39857410296412,36.71539157566303],"label":"Playing squash"},{"segment":[45.13235881435258,67.77109828393137],"label":"Playing squash"},{"segment":[72.12470202808113,156.58461466458658],"label":"Playing squash"}]},"Ha5KU-pp4gk":{"duration":80.458,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=Ha5KU-pp4gk","annotations":[{"segment":[4.769691560451534,80.25716553287981],"label":"Trimming branches or hedges"}]},"cbd1LfrWYms":{"duration":157.827,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=cbd1LfrWYms","annotations":[]},"pmbHoeudjKc":{"duration":15.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pmbHoeudjKc","annotations":[{"segment":[2.79086708,14.07556572],"label":"Long jump"}]},"agu58IVL7IQ":{"duration":85.36,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=agu58IVL7IQ","annotations":[]},"5c8HvpeRWrc":{"duration":40.73,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5c8HvpeRWrc","annotations":[{"segment":[0,37.99566989079563],"label":"Ballet"}]},"Cp4a_gN4pDQ":{"duration":65.179,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cp4a_gN4pDQ","annotations":[{"segment":[0,64.9779365079365],"label":"Playing congas"}]},"FZtIVsmpYcI":{"duration":192.819,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FZtIVsmpYcI","annotations":[{"segment":[30.38169866386492,185.89989740405616],"label":"Cutting the grass"}]},"QPEAp7nf2Tk":{"duration":82.849,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QPEAp7nf2Tk","annotations":[]},"yYdqlon7sR4":{"duration":179.351,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yYdqlon7sR4","annotations":[]},"rFXOInUQjKo":{"duration":73.53999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rFXOInUQjKo","annotations":[{"segment":[0.01,25.525920261310453],"label":"Cleaning windows"}]},"O-6zE9_S2KM":{"duration":16.603,"subset":"training","resolution":"632x480","url":"https://www.youtube.com/watch?v=O-6zE9_S2KM","annotations":[{"segment":[1.5799348299531981,11.991964365054601],"label":"Playing blackjack"}]},"LITdMW0xh7o":{"duration":107.86,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=LITdMW0xh7o","annotations":[{"segment":[24.70745859,50.14297531],"label":"Chopping wood"},{"segment":[57.510101410000004,89.67555813999999],"label":"Chopping wood"}]},"NGiDXRIx1gk":{"duration":175.45,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NGiDXRIx1gk","annotations":[{"segment":[0.2,152.80214712729605],"label":"Doing fencing"}]},"0qQvcJJekN8":{"duration":25.101,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0qQvcJJekN8","annotations":[{"segment":[0.2,24.861],"label":"Swinging at the playground"}]},"qy-LbstiMYg":{"duration":192.77,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=qy-LbstiMYg","annotations":[{"segment":[10.826512146200134,103.15260103900157],"label":"Doing motocross"},{"segment":[125.1063616099844,147.06012218096726],"label":"Doing motocross"}]},"MEU83dUao4g":{"duration":63.25,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MEU83dUao4g","annotations":[{"segment":[21.70866518876755,54.27166297191888],"label":"Getting a piercing"}]},"fJMVz_TBWU0":{"duration":76.78999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fJMVz_TBWU0","annotations":[{"segment":[25.484527200000002,61.78774800000001],"label":"Playing bagpipes"},{"segment":[10.2728208,21.2423376],"label":"Playing bagpipes"}]},"UKo5IFacUyE":{"duration":38.57,"subset":"validation","resolution":"300x240","url":"https://www.youtube.com/watch?v=UKo5IFacUyE","annotations":[{"segment":[0.6969237599999999,15.53198127925117],"label":"Polishing shoes"},{"segment":[15.69298127925117,16.655003120124803],"label":"Polishing shoes"}]},"xyZ87TEcDSI":{"duration":63.182,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xyZ87TEcDSI","annotations":[{"segment":[0.01,59.63308030421217],"label":"Surfing"}]},"8rqI8xwXVac":{"duration":106.812,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=8rqI8xwXVac","annotations":[]},"uC7y97DXEH8":{"duration":23.36,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=uC7y97DXEH8","annotations":[]},"eufCY8KHuMA":{"duration":145.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eufCY8KHuMA","annotations":[{"segment":[0.22712757489891433,8.630847846158744],"label":"Horseback riding"},{"segment":[15.217547518227258,22.2585023400936],"label":"Horseback riding"},{"segment":[29.072329587061034,34.75051895953389],"label":"Horseback riding"},{"segment":[42.47285650609698,137.1850551351014],"label":"Horseback riding"}]},"yToUeIIlkOg":{"duration":190.474,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=yToUeIIlkOg","annotations":[{"segment":[118.26578567360382,173.53572571202167],"label":"Making an omelette"}]},"J4rzLO4u_pI":{"duration":147.796,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=J4rzLO4u_pI","annotations":[{"segment":[68.22361310452419,90.81116068642747],"label":"Trimming branches or hedges"},{"segment":[109.26476443057722,135.543631825273],"label":"Trimming branches or hedges"}]},"frbNKAZALzI":{"duration":201.9,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=frbNKAZALzI","annotations":[{"segment":[22.993007665884864,115.59498374492803],"label":"Kayaking"},{"segment":[122.83935602322052,130.71367328393134],"label":"Kayaking"},{"segment":[154.33662628705147,160.9510531279251],"label":"Kayaking"},{"segment":[163.78580748829953,187.72373319812792],"label":"Kayaking"}]},"7kQ3JRkEQr0":{"duration":66.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7kQ3JRkEQr0","annotations":[{"segment":[1.6574352574102964,62.77536037441497],"label":"Doing karate"}]},"VDj-dpVbgaI":{"duration":174.29,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=VDj-dpVbgaI","annotations":[]},"5Vd6PEUWo64":{"duration":168.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5Vd6PEUWo64","annotations":[{"segment":[92.65936532699402,121.49254831417747],"label":"Surfing"}]},"IC61TQ-sd8o":{"duration":159.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IC61TQ-sd8o","annotations":[{"segment":[0.7462596645865835,156.21702312012482],"label":"Cheerleading"}]},"bjKd--KFl0E":{"duration":230.923,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bjKd--KFl0E","annotations":[{"segment":[0,230.881],"label":"Surfing"}]},"sIYRsGZm2XY":{"duration":107.81,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=sIYRsGZm2XY","annotations":[{"segment":[2.8579360374414975,9.414377535101403],"label":"Smoking hookah"},{"segment":[26.56199375975039,29.58804368174727],"label":"Smoking hookah"},{"segment":[44.88640717628705,53.12398751950078],"label":"Smoking hookah"},{"segment":[64.05139001560062,65.73252886115445],"label":"Smoking hookah"},{"segment":[87.75544773790952,96.16114196567862],"label":"Smoking hookah"}]},"zN9COeDCm9Q":{"duration":209.352,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zN9COeDCm9Q","annotations":[{"segment":[0,205.38272854914194],"label":"Snowboarding"}]},"U20a95sHi8E":{"duration":86.123,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U20a95sHi8E","annotations":[{"segment":[13.435696067298474,73.22454204368175],"label":"Hand car wash"}]},"Ux9b6Yq6kTc":{"duration":162.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ux9b6Yq6kTc","annotations":[]},"DhgdEfKAvO0":{"duration":156.689,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=DhgdEfKAvO0","annotations":[{"segment":[83.09594806092474,106.73321187884174],"label":"Hitting a pinata"}]},"JPZoLIUBIoc":{"duration":5.39,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=JPZoLIUBIoc","annotations":[]},"JQcN61A1MEU":{"duration":65.434,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JQcN61A1MEU","annotations":[{"segment":[4.396176271713518,53.64963612863419],"label":"Futsal"}]},"zcl7tr-pLFU":{"duration":118.53999999999999,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zcl7tr-pLFU","annotations":[]},"7-uMJ_5WsZM":{"duration":133.701,"subset":"training","resolution":"627x360","url":"https://www.youtube.com/watch?v=7-uMJ_5WsZM","annotations":[{"segment":[3.3363120124804992,84.46303432137286],"label":"Cutting the grass"}]},"hhQ1Xbytds4":{"duration":60.233,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hhQ1Xbytds4","annotations":[{"segment":[0.2,60.001999999999995],"label":"Roof shingle removal"}]},"TFwELfVs19g":{"duration":73.53999999999999,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TFwELfVs19g","annotations":[{"segment":[11.142059927930802,56.8453315868093],"label":"Getting a piercing"}]},"6DzBNkTen1g":{"duration":80.76,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6DzBNkTen1g","annotations":[{"segment":[20.630025501373964,36.47992257009346],"label":"Hitting a pinata"}]},"3oS_28utt2Y":{"duration":89.257,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=3oS_28utt2Y","annotations":[{"segment":[0.20000000000000004,64.4085418171872],"label":"Surfing"}]},"cqxrPBGP4nU":{"duration":62.253,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cqxrPBGP4nU","annotations":[{"segment":[8.934864611544462,47.68498395943838],"label":"Cleaning sink"}]},"es0HLQRbv1E":{"duration":141.158,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=es0HLQRbv1E","annotations":[]},"Q6SooTA1os4":{"duration":32.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Q6SooTA1os4","annotations":[{"segment":[6.367094710999325,28.676993501508765],"label":"Bathing dog"}]},"s4pnHlWlt5k":{"duration":53.453,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=s4pnHlWlt5k","annotations":[{"segment":[3.7525039469578783,51.45099856162247],"label":"Hitting a pinata"}]},"X_p-lKYPPhM":{"duration":84.1,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=X_p-lKYPPhM","annotations":[]},"q8lLTxHen0I":{"duration":75.419,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=q8lLTxHen0I","annotations":[{"segment":[14.236548722308893,26.23760632293292],"label":"Beach soccer"},{"segment":[30.473273711388458,44.82747986115445],"label":"Beach soccer"}]},"GgiaxJ1JeSM":{"duration":99.986,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GgiaxJ1JeSM","annotations":[{"segment":[15.394139246636227,95.78576770309915],"label":"Riding bumper cars"}]},"TwJvPxAq9_o":{"duration":149.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TwJvPxAq9_o","annotations":[{"segment":[1.5140062473247229,6.405411046373827],"label":"Smoking a cigarette"}]},"VXiaPftjWqQ":{"duration":220.195,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VXiaPftjWqQ","annotations":[{"segment":[12.141584497181688,194.2653441046583],"label":"Laying tile"}]},"_l251ZuOCYU":{"duration":128.477,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=_l251ZuOCYU","annotations":[]},"c9A384zijnk":{"duration":175.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c9A384zijnk","annotations":[{"segment":[87.57638090483618,106.18636184711387],"label":"Preparing pasta"},{"segment":[107.82841898907955,121.2385523151326],"label":"Preparing pasta"},{"segment":[135.19603802184088,139.84853325741028],"label":"Preparing pasta"},{"segment":[143.67999992199688,149.15352372854915],"label":"Preparing pasta"},{"segment":[163.11100943525742,165.3004189578783],"label":"Preparing pasta"}]},"zhH7wxXrGSY":{"duration":176.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zhH7wxXrGSY","annotations":[{"segment":[0.01,168.9323775351014],"label":"Painting"}]},"9pNfaRJ0K4o":{"duration":149.47,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9pNfaRJ0K4o","annotations":[{"segment":[54.79673837293628,132.67805919344775],"label":"Ballet"}]},"S4GiCywMi38":{"duration":199.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S4GiCywMi38","annotations":[{"segment":[40.02424180967238,188.59671918876754],"label":"Putting on makeup"}]},"06dIgOEgYp0":{"duration":237.703,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=06dIgOEgYp0","annotations":[{"segment":[52.27800468018721,99.36528549141966],"label":"Snowboarding"},{"segment":[106.40983931357255,137.18341653666147],"label":"Snowboarding"},{"segment":[147.19409828393137,179.4507394695788],"label":"Snowboarding"},{"segment":[196.50597503900156,199.47210296411856],"label":"Snowboarding"},{"segment":[210.2243166926677,218.01040249609983],"label":"Snowboarding"}]},"jherly5DNjg":{"duration":35.899,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jherly5DNjg","annotations":[{"segment":[6.318304212168487,35.861],"label":"Calf roping"}]},"n1z0OF9tt4M":{"duration":225.559,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=n1z0OF9tt4M","annotations":[]},"AEZroPZf0_4":{"duration":33.344,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=AEZroPZf0_4","annotations":[]},"otMw5GJWYWs":{"duration":51.851,"subset":"testing","resolution":"352x240","url":"https://www.youtube.com/watch?v=otMw5GJWYWs","annotations":[]},"B6kbmiYicYg":{"duration":76.7,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=B6kbmiYicYg","annotations":[]},"VoQ2VhCqir0":{"duration":150.884,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=VoQ2VhCqir0","annotations":[{"segment":[29.1880263618708,41.663553758476866],"label":"Fixing the roof"},{"segment":[48.96056034894457,62.377636983030335],"label":"Fixing the roof"}]},"V4wwal5FQZE":{"duration":155.34,"subset":"validation","resolution":"344x194","url":"https://www.youtube.com/watch?v=V4wwal5FQZE","annotations":[{"segment":[9.451354707249514,14.782888131851804],"label":"Vacuuming floor"},{"segment":[26.172982266229425,37.32073397221603],"label":"Vacuuming floor"},{"segment":[65.43245566557356,70.0369618050028],"label":"Vacuuming floor"},{"segment":[83.36579536650854,94.513546099844],"label":"Vacuuming floor"},{"segment":[103.9649007098284,107.3576946723869],"label":"Vacuuming floor"},{"segment":[134.2577039469579,141.770319149766],"label":"Vacuuming floor"}]},"bdDzTqaiB3E":{"duration":67.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bdDzTqaiB3E","annotations":[{"segment":[0,7.005315132605304],"label":"Hand washing clothes"}]},"Tm0jUAQf6zY":{"duration":131.054,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Tm0jUAQf6zY","annotations":[{"segment":[0.01,131.001],"label":"Baton twirling"}]},"a_EkYuPdaJ0":{"duration":232.827,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=a_EkYuPdaJ0","annotations":[{"segment":[154.8549987637838,168.63600476635514],"label":"Painting fence"}]},"OFe8toY6Ch4":{"duration":190.225,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=OFe8toY6Ch4","annotations":[{"segment":[10.683238689547583,25.82054134165367],"label":"Curling"},{"segment":[35.911327613104525,61.7318689547582],"label":"Curling"},{"segment":[68.85477691107646,97.0496209048362],"label":"Curling"},{"segment":[99.72071138845556,136.2256146645866],"label":"Curling"},{"segment":[138.59991731669268,170.0594274570983],"label":"Curling"}]},"N1_pNhVqG20":{"duration":200.67000000000002,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=N1_pNhVqG20","annotations":[]},"1VwNfMlb4JU":{"duration":29.35,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1VwNfMlb4JU","annotations":[{"segment":[0.01,24.74674102964119],"label":"Getting a haircut"}]},"9ctFBfPUcI8":{"duration":74.861,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=9ctFBfPUcI8","annotations":[]},"_a527xEzhkw":{"duration":161.32999999999998,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=_a527xEzhkw","annotations":[]},"GNubfPDmTnE":{"duration":96.108,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GNubfPDmTnE","annotations":[]},"eEfvYiuGULM":{"duration":163.54,"subset":"training","resolution":"634x360","url":"https://www.youtube.com/watch?v=eEfvYiuGULM","annotations":[{"segment":[5.863062402496101,161.87150546021843],"label":"Javelin throw"}]},"JSqJmZPqDy8":{"duration":34.78,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=JSqJmZPqDy8","annotations":[{"segment":[0.8939087363494539,16.82173712948518],"label":"Mowing the lawn"},{"segment":[27.44028939157566,30.961748049921994],"label":"Mowing the lawn"}]},"_5UtmlGMIUc":{"duration":182.65,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=_5UtmlGMIUc","annotations":[{"segment":[28.209310707122164,133.06816250811232],"label":"Shaving legs"}]},"GlcDmblxOcQ":{"duration":69.636,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GlcDmblxOcQ","annotations":[]},"C8m_dPhMGbg":{"duration":128.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C8m_dPhMGbg","annotations":[{"segment":[11.196502991643179,116.96346757009346],"label":"Snowboarding"}]},"_s20ZN8WZbo":{"duration":67.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_s20ZN8WZbo","annotations":[{"segment":[6.682194454758191,58.66695],"label":"Mixing drinks"}]},"reit7blyxIo":{"duration":72.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=reit7blyxIo","annotations":[{"segment":[10.55581373478939,57.72008786895476],"label":"Shaving"}]},"furUOKw0Qzs":{"duration":137.602,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=furUOKw0Qzs","annotations":[{"segment":[10.37215048276004,129.95189814631144],"label":"Changing car wheel"}]},"aYC7F72n924":{"duration":84.034,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=aYC7F72n924","annotations":[{"segment":[21.641893285105358,84.033038],"label":"Mooping floor"}]},"MAZlsi4Bon8":{"duration":10.612,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MAZlsi4Bon8","annotations":[{"segment":[0.01,10.5949646421231],"label":"Hitting a pinata"}]},"CsyHOM8ngZA":{"duration":238.377,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CsyHOM8ngZA","annotations":[{"segment":[25.841964898595943,40.715038221528864],"label":"Grooming dog"},{"segment":[52.24167004680187,66.74291653666147],"label":"Grooming dog"},{"segment":[75.66676053042121,118.42684633385336],"label":"Grooming dog"},{"segment":[139.24914898595944,158.95597113884554],"label":"Grooming dog"}]},"wFmczV8lv-I":{"duration":154.07,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=wFmczV8lv-I","annotations":[{"segment":[63.09999248,76.83017616],"label":"Using parallel bars"},{"segment":[18.578577759999998,51.007509119999995],"label":"Using parallel bars"}]},"crfrKqFp0Zg":{"duration":129.43,"subset":"training","resolution":"320x214","url":"https://www.youtube.com/watch?v=crfrKqFp0Zg","annotations":[{"segment":[4.541419656786271,128.26987519500778],"label":"Playing flauta"}]},"MQ9Ojge7CyA":{"duration":63.0,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=MQ9Ojge7CyA","annotations":[]},"_B3Q8bTJWG4":{"duration":56.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_B3Q8bTJWG4","annotations":[{"segment":[9.539887675507021,21.02308580343214],"label":"Calf roping"}]},"D9rHZpZoeRA":{"duration":202.502,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D9rHZpZoeRA","annotations":[{"segment":[95.38431348410398,175.60820491107643],"label":"Changing car wheel"}]},"ikZwamRM5mo":{"duration":193.074,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ikZwamRM5mo","annotations":[{"segment":[0.6013115264797507,190.01444236760122],"label":"Drum corps"}]},"W9goHnSz8H8":{"duration":143.709,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=W9goHnSz8H8","annotations":[]},"qgQVbGtIn0M":{"duration":168.995,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qgQVbGtIn0M","annotations":[{"segment":[78.0379327176287,83.31076600936036],"label":"Slacklining"},{"segment":[105.98394916380654,138.4118739079563],"label":"Slacklining"}]},"PcvK0XvNHgQ":{"duration":45.3,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PcvK0XvNHgQ","annotations":[]},"hiifjzLG8Io":{"duration":19.6,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=hiifjzLG8Io","annotations":[{"segment":[7.9491213978159125,19.291906161622464],"label":"Grooming horse"}]},"y7WO-8cQfO0":{"duration":191.728,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y7WO-8cQfO0","annotations":[{"segment":[8.671090483619345,155.48162246489858],"label":"Rope skipping"},{"segment":[164.16984243369734,184.20514196567862],"label":"Rope skipping"}]},"y9xNMLhgTjY":{"duration":166.023,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y9xNMLhgTjY","annotations":[{"segment":[6.276983246731974,9.380210830600413],"label":"Longboarding"},{"segment":[15.328063699681586,26.447962541876823],"label":"Longboarding"},{"segment":[56.44582918593839,74.80659239049332],"label":"Longboarding"},{"segment":[138.681360158452,150.57706432124786],"label":"Longboarding"}]},"4qstg-tiRvQ":{"duration":23.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4qstg-tiRvQ","annotations":[{"segment":[0.01,23.661132],"label":"Cheerleading"}]},"wyNM_7YDgfo":{"duration":139.06,"subset":"validation","resolution":"352x262","url":"https://www.youtube.com/watch?v=wyNM_7YDgfo","annotations":[{"segment":[67.85806416043219,127.85908593613311],"label":"Paintball"}]},"-IMXSEIabMM":{"duration":126.39,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=-IMXSEIabMM","annotations":[]},"q1yuDuO01tg":{"duration":95.39,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q1yuDuO01tg","annotations":[{"segment":[43.89897127857903,73.21455453978159],"label":"Bungee jumping"}]},"JJzBlV3p1Wc":{"duration":131.89,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JJzBlV3p1Wc","annotations":[{"segment":[0.2,29.628807029832217],"label":"Hand washing clothes"},{"segment":[82.50799735390777,131.68934240362813],"label":"Hand washing clothes"}]},"JfifgnVgJEU":{"duration":35.597,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=JfifgnVgJEU","annotations":[{"segment":[15.200801872074884,19.805424336973477],"label":"Having an ice cream"}]},"ETbbYC0OhL0":{"duration":27.121,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ETbbYC0OhL0","annotations":[]},"f38Jt5D0z4A":{"duration":20.76,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=f38Jt5D0z4A","annotations":[{"segment":[0.7772344960998441,20.208096898595947],"label":"Grooming horse"}]},"2SBTnunPQrQ":{"duration":229.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2SBTnunPQrQ","annotations":[{"segment":[15.422319540400663,212.68454621994402],"label":"Polishing shoes"}]},"SvYeqLg4dQU":{"duration":113.825,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=SvYeqLg4dQU","annotations":[{"segment":[11.506333456983105,28.323286303440508],"label":"Playing pool"},{"segment":[41.59982802432793,50.45085583825288],"label":"Playing pool"},{"segment":[71.69332259167275,86.20900692874075],"label":"Playing pool"},{"segment":[104.26510340152457,109.92976111847636],"label":"Playing pool"}]},"kYfewEkkBf4":{"duration":91.65,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kYfewEkkBf4","annotations":[]},"OVf4tUiUqA0":{"duration":111.712,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=OVf4tUiUqA0","annotations":[{"segment":[0.01,98.94453666146646],"label":"Hand car wash"}]},"ygkcjkVqxAA":{"duration":45.047,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ygkcjkVqxAA","annotations":[{"segment":[6.822549000318541,39.289685171479675],"label":"Disc dog"}]},"eCh_SqpkjtA":{"duration":84.45,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=eCh_SqpkjtA","annotations":[{"segment":[53.7684705,74.287287],"label":"Cheerleading"},{"segment":[4.545943500000001,41.280848999999996],"label":"Cheerleading"}]},"EXPYu9bfNcE":{"duration":234.127,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EXPYu9bfNcE","annotations":[]},"Y75Fvr71aRQ":{"duration":144.71,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Y75Fvr71aRQ","annotations":[{"segment":[76.84357254290171,120.17386895475819],"label":"Playing saxophone"}]},"g16MLNJnyF4":{"duration":102.17,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g16MLNJnyF4","annotations":[]},"m3SlyuAh1Wk":{"duration":184.738,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m3SlyuAh1Wk","annotations":[{"segment":[48.89953357587203,76.76824584918272],"label":"Raking leaves"},{"segment":[152.3298060622465,160.3743828149681],"label":"Raking leaves"}]},"3j52keiQuiw":{"duration":99.71000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3j52keiQuiw","annotations":[{"segment":[0,99.70587301587301],"label":"Using the balance beam"}]},"fpIcr1RaEDc":{"duration":88.56,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=fpIcr1RaEDc","annotations":[{"segment":[13.954132268330733,75.29705035881435],"label":"Using parallel bars"}]},"I5g6I-FOguQ":{"duration":73.259,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I5g6I-FOguQ","annotations":[{"segment":[5.600122931357254,73.25875283446712],"label":"Surfing"}]},"1AiQt87brik":{"duration":75.303,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1AiQt87brik","annotations":[{"segment":[0.01,57.00525199294758],"label":"Playing congas"},{"segment":[59.818747320050385,68.37646227332142],"label":"Playing congas"}]},"0iqx7o5WYW8":{"duration":182.72,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=0iqx7o5WYW8","annotations":[]},"kG7inHibWLU":{"duration":171.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kG7inHibWLU","annotations":[{"segment":[70.43610608424336,92.04718408736349],"label":"Making a sandwich"}]},"OCy35F-1WgQ":{"duration":16.184,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=OCy35F-1WgQ","annotations":[]},"mb1LoOTM_NI":{"duration":60.233,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=mb1LoOTM_NI","annotations":[{"segment":[0.01,56.63214196567863],"label":"Arm wrestling"}]},"Wi9bm9omdoM":{"duration":146.657,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=Wi9bm9omdoM","annotations":[{"segment":[0.01,145.14954117847535],"label":"Rock-paper-scissors"}]},"2mOQx0l1158":{"duration":182.393,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2mOQx0l1158","annotations":[{"segment":[30.437577223088926,172.1191965678627],"label":"Assembling bicycle"}]},"RSyk6rS8ay0":{"duration":211.47,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RSyk6rS8ay0","annotations":[{"segment":[78.97521840873634,173.94333073322935],"label":"Clean and jerk"},{"segment":[23.88062952,48.967932917316695],"label":"Clean and jerk"}]},"qtOP38458F4":{"duration":54.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qtOP38458F4","annotations":[{"segment":[6.572101560062403,47.44633319812793],"label":"Using parallel bars"}]},"nw32dno_RcM":{"duration":34.97,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nw32dno_RcM","annotations":[{"segment":[1.6888793229329175,33.94102639313573],"label":"Spinning"}]},"tT7ljH8GCl8":{"duration":80.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tT7ljH8GCl8","annotations":[{"segment":[0.2,79.90884199999999],"label":"Mowing the lawn"}]},"gM7x-gjE8dg":{"duration":121.534,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gM7x-gjE8dg","annotations":[]},"38nAdby_fTw":{"duration":148.0,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=38nAdby_fTw","annotations":[{"segment":[14.546414085134835,132.99578418720748],"label":"Doing motocross"}]},"b7B0NRizzYo":{"duration":26.216,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b7B0NRizzYo","annotations":[{"segment":[0.3476248049921997,24.190596723868957],"label":"Raking leaves"}]},"hPhmxNbqqcs":{"duration":17.067,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hPhmxNbqqcs","annotations":[]},"HF_a81qOmVQ":{"duration":65.835,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HF_a81qOmVQ","annotations":[{"segment":[4.724484611544462,28.552320043681746],"label":"Hurling"}]},"f3spxjpfuxM":{"duration":183.485,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=f3spxjpfuxM","annotations":[{"segment":[9.4461383679837,183.48408],"label":"Installing carpet"}]},"4usf67inE3w":{"duration":159.289,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4usf67inE3w","annotations":[{"segment":[0.01,155.66642982376254],"label":"Playing ten pins"}]},"h3jponlpQNs":{"duration":21.874,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h3jponlpQNs","annotations":[]},"V8gr2EMHNwQ":{"duration":120.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V8gr2EMHNwQ","annotations":[{"segment":[7.500842433697348,15.75176911076443],"label":"Sailing"},{"segment":[42.379759750390015,65.26818720748831],"label":"Sailing"},{"segment":[101.27822152886115,118.1579251170047],"label":"Sailing"}]},"d-uGSELb0N0":{"duration":67.66,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=d-uGSELb0N0","annotations":[{"segment":[37.9516158,61.41409092000001],"label":"Volleyball"}]},"hwY9jvSjC9I":{"duration":92.857,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hwY9jvSjC9I","annotations":[{"segment":[0.01,85.88339336688793],"label":"Putting on shoes"}]},"s_QH-5G33Fw":{"duration":158.895,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=s_QH-5G33Fw","annotations":[{"segment":[44.60433697347894,158.861],"label":"Playing beach volleyball"}]},"LSaUJwsU4GQ":{"duration":122.83,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=LSaUJwsU4GQ","annotations":[{"segment":[8.814875849455747,117.08454660907525],"label":"Breakdancing"}]},"9cJi1iD7Iyo":{"duration":180.65,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9cJi1iD7Iyo","annotations":[{"segment":[16.345980734467474,179.24213357105714],"label":"Tai chi"}]},"ozkDsQ1UY1k":{"duration":224.839,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=ozkDsQ1UY1k","annotations":[{"segment":[0,202.74077594695788],"label":"Playing pool"}]},"LPRLV0eVgfY":{"duration":146.913,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LPRLV0eVgfY","annotations":[]},"JwdW7Tlc4gs":{"duration":179.61,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JwdW7Tlc4gs","annotations":[{"segment":[5.87925717,135.20315571999998],"label":"Playing guitarra"}]},"DG3VlOODYaI":{"duration":179.63,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DG3VlOODYaI","annotations":[]},"g1na24Bd5Rs":{"duration":180.84,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g1na24Bd5Rs","annotations":[]},"5JG8Dc2wsdc":{"duration":111.64,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5JG8Dc2wsdc","annotations":[{"segment":[18.290148205928237,91.27654914196567],"label":"Doing nails"}]},"pElJ7U46XnQ":{"duration":43.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pElJ7U46XnQ","annotations":[{"segment":[11.218874344773791,25.70142122620905],"label":"Clean and jerk"}]},"O36pn7cHNVc":{"duration":227.25,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=O36pn7cHNVc","annotations":[{"segment":[46.96549648205928,206.46616808999997],"label":"Tai chi"}]},"kF6tSW-ZOFg":{"duration":15.42,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kF6tSW-ZOFg","annotations":[{"segment":[1.2267089672386895,13.421639288611544],"label":"Hopscotch"}]},"LqCg09IRp-o":{"duration":45.953,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LqCg09IRp-o","annotations":[{"segment":[15.722267432083369,35.44656794219223],"label":"Rollerblading"}]},"PQunZdghRls":{"duration":62.462,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PQunZdghRls","annotations":[]},"ABQYqpWF1LA":{"duration":132.447,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ABQYqpWF1LA","annotations":[{"segment":[34.2879344773791,132.401],"label":"Applying sunscreen"}]},"0nPeqy-DA2E":{"duration":202.9,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=0nPeqy-DA2E","annotations":[{"segment":[0.01,202.89596371882087],"label":"Removing curlers"}]},"63d_t0U1pXw":{"duration":210.327,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=63d_t0U1pXw","annotations":[{"segment":[22.968556075576355,188.6702820493772],"label":"Powerbocking"}]},"pF-r_m8LVPs":{"duration":216.016,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=pF-r_m8LVPs","annotations":[{"segment":[0.01,199.115368174727],"label":"Snow tubing"}]},"LryRyt_SHhw":{"duration":25.59,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LryRyt_SHhw","annotations":[]},"lo8CcE7B9cI":{"duration":112.408,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lo8CcE7B9cI","annotations":[]},"MoSuxL57xRY":{"duration":225.745,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MoSuxL57xRY","annotations":[{"segment":[28.878378527301088,172.2137451201248],"label":"Snowboarding"}]},"uowNtTu74nA":{"duration":226.024,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=uowNtTu74nA","annotations":[{"segment":[13.399181692437764,200.63511512121687],"label":"Surfing"}]},"C3BBG_9Vo48":{"duration":210.141,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C3BBG_9Vo48","annotations":[{"segment":[7.660365920081689,184.76539551509725],"label":"Making a cake"}]},"Ku76zinwWkg":{"duration":53.92,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ku76zinwWkg","annotations":[]},"NdcZrdvh24E":{"duration":56.797,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NdcZrdvh24E","annotations":[]},"LMMimz1-fa0":{"duration":113.871,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LMMimz1-fa0","annotations":[{"segment":[30.393064756553862,104.15919559767535],"label":"Wakeboarding"}]},"1IhbkbuDPpc":{"duration":103.027,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1IhbkbuDPpc","annotations":[{"segment":[0.01,90.44977067082684],"label":"Rollerblading"}]},"mUyMYnGXKgk":{"duration":55.75,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mUyMYnGXKgk","annotations":[{"segment":[20.699973884555384,22.87434088923557],"label":"Layup drill in basketball"},{"segment":[25.483581294851795,26.7012268174727],"label":"Layup drill in basketball"},{"segment":[28.527695101404056,30.180214024961],"label":"Layup drill in basketball"},{"segment":[38.355833962558506,40.18230224648986],"label":"Layup drill in basketball"},{"segment":[44.18313753510141,45.052884336973484],"label":"Layup drill in basketball"}]},"H5Sgn5ndjp0":{"duration":180.37,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H5Sgn5ndjp0","annotations":[]},"THOLslLjRqs":{"duration":150.698,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=THOLslLjRqs","annotations":[{"segment":[0,138.43889079563183],"label":"Painting furniture"}]},"CGz-ghbUXvk":{"duration":79.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CGz-ghbUXvk","annotations":[{"segment":[0.37014284087363497,76.37280616692668],"label":"Playing bagpipes"}]},"-y5p8UMdM20":{"duration":141.11,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=-y5p8UMdM20","annotations":[{"segment":[15.40229329173167,17.38258814352574],"label":"Springboard diving"},{"segment":[27.287931357254294,30.368826833073324],"label":"Springboard diving"},{"segment":[32.78953042121685,34.109914196567864],"label":"Springboard diving"},{"segment":[40.49176911076444,47.09368798751951],"label":"Springboard diving"},{"segment":[52.815351014040566,55.23605460218409],"label":"Springboard diving"},{"segment":[62.71822932917317,65.35899687987519],"label":"Springboard diving"},{"segment":[71.96091575663027,74.16155538221528],"label":"Springboard diving"},{"segment":[87.58545709828394,89.12590483619344],"label":"Springboard diving"},{"segment":[100.34916692667707,105.63070202808113],"label":"Springboard diving"},{"segment":[121.69537129485181,124.33613884555382],"label":"Springboard diving"}]},"R4kvr-667Ag":{"duration":86.68,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R4kvr-667Ag","annotations":[]},"Y5hT9Sk5EY4":{"duration":121.302,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y5hT9Sk5EY4","annotations":[{"segment":[9.651097031636366,27.628630718017835],"label":"Grooming dog"},{"segment":[43.33531804401428,62.82674874258971],"label":"Grooming dog"},{"segment":[74.18098044305772,83.83207738845553],"label":"Grooming dog"}]},"cgPt46YiXNo":{"duration":15.395,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=cgPt46YiXNo","annotations":[{"segment":[3.770652573041697,14.9865408299532],"label":"Powerbocking"}]},"knZfggvbS50":{"duration":95.527,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=knZfggvbS50","annotations":[]},"XBO6AIdaCzU":{"duration":107.16,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=XBO6AIdaCzU","annotations":[{"segment":[0,3.0091756661466458],"label":"Playing harmonica"},{"segment":[8.191644868954757,96.62797416848673],"label":"Playing harmonica"},{"segment":[106.6585597223089,106.960089],"label":"Playing harmonica"}]},"1gM0xfKN-Kc":{"duration":220.985,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1gM0xfKN-Kc","annotations":[{"segment":[0.2,189.02234252342072],"label":"Using the monkey bar"}]},"_hdjRMpleWs":{"duration":58.677,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_hdjRMpleWs","annotations":[{"segment":[0.2,58.47625850340136],"label":"Calf roping"}]},"VIYMKDJQ80g":{"duration":206.518,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=VIYMKDJQ80g","annotations":[]},"xMbfjxCCCFo":{"duration":48.55,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xMbfjxCCCFo","annotations":[]},"3kEAg-JtDBY":{"duration":29.17,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3kEAg-JtDBY","annotations":[{"segment":[0.3639845621035726,26.02489619040544],"label":"Doing karate"}]},"7lUaR1veDJU":{"duration":13.52,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=7lUaR1veDJU","annotations":[{"segment":[0.021016805276114315,13.514012],"label":"Playing water polo"}]},"iDMzTPfELoc":{"duration":222.84,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=iDMzTPfELoc","annotations":[{"segment":[4.86706184399376,189.12011736661466],"label":"Hopscotch"}]},"qJfhu80Ldgo":{"duration":15.12,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=qJfhu80Ldgo","annotations":[{"segment":[0.9099831999999999,11.70930708],"label":"Clean and jerk"}]},"NIJTz15ikgA":{"duration":136.46,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NIJTz15ikgA","annotations":[{"segment":[16.160492979719187,115.67510764430577],"label":"Putting on makeup"}]},"IjdTjpaPPNg":{"duration":150.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IjdTjpaPPNg","annotations":[{"segment":[18.049511923334077,58.60231143939635],"label":"Horseback riding"},{"segment":[63.52490560030565,89.30992172386895],"label":"Horseback riding"},{"segment":[98.45188221528862,126.81540066302652],"label":"Horseback riding"}]},"05BGDQvQ2YM":{"duration":173.755,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=05BGDQvQ2YM","annotations":[{"segment":[20.58532917316693,165.76607176287052],"label":"Riding bumper cars"}]},"eT9oCTQUMhA":{"duration":178.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eT9oCTQUMhA","annotations":[{"segment":[29.856967252839777,167.4222462776062],"label":"Plataform diving"}]},"98iRGAxo05k":{"duration":64.97,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=98iRGAxo05k","annotations":[]},"aHzNMe-VKfE":{"duration":18.99,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=aHzNMe-VKfE","annotations":[{"segment":[2.326088817394696,12.037763399999998],"label":"Playing field hockey"}]},"Trzd5ijRN1A":{"duration":162.819,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Trzd5ijRN1A","annotations":[{"segment":[0,153.13095631825274],"label":"Doing fencing"}]},"fM4pJeHb8hQ":{"duration":185.412,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fM4pJeHb8hQ","annotations":[{"segment":[41.36313989974565,169.79135049755732],"label":"Braiding hair"}]},"nGsRDNIsc2o":{"duration":134.28,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=nGsRDNIsc2o","annotations":[{"segment":[11.731257212193249,24.928921575910657],"label":"Spinning"},{"segment":[102.4390137697348,120.66435977784712],"label":"Spinning"}]},"_ye90Ou8SnE":{"duration":90.465,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=_ye90Ou8SnE","annotations":[{"segment":[10.302557078003119,44.45623944617785],"label":"Installing carpet"},{"segment":[60.12177144149766,82.70271846177847],"label":"Installing carpet"}]},"suyh4tGuScw":{"duration":113.11,"subset":"validation","resolution":"638x360","url":"https://www.youtube.com/watch?v=suyh4tGuScw","annotations":[{"segment":[4.940589778471138,103.39948607800312],"label":"Tango"}]},"IgyBIt3GTAU":{"duration":194.03,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=IgyBIt3GTAU","annotations":[{"segment":[0.01,194.0259410430839],"label":"Polishing forniture"}]},"m-C1TSa1PGE":{"duration":177.703,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=m-C1TSa1PGE","annotations":[]},"QacSWR8c-8Y":{"duration":16.93,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QacSWR8c-8Y","annotations":[{"segment":[4.146011177847114,8.027945210608424],"label":"Pole vault"}]},"biAQgXDFskw":{"duration":93.6,"subset":"training","resolution":"448x336","url":"https://www.youtube.com/watch?v=biAQgXDFskw","annotations":[{"segment":[23.782742589703588,91.4833104524181],"label":"Cumbia"}]},"OWkly270P00":{"duration":202.734,"subset":"testing","resolution":"655x480","url":"https://www.youtube.com/watch?v=OWkly270P00","annotations":[]},"DF8nlagyN6k":{"duration":218.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DF8nlagyN6k","annotations":[{"segment":[27.941531981279248,212.9689937597504],"label":"Doing fencing"}]},"fT7dpRY0DTo":{"duration":138.484,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=fT7dpRY0DTo","annotations":[{"segment":[18.205662846899624,120.23502743101179],"label":"Laying tile"}]},"_S6D21MV8Ks":{"duration":238.26,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=_S6D21MV8Ks","annotations":[{"segment":[3.902090478159127,234.68287018642746],"label":"Playing squash"}]},"TokZDNwr664":{"duration":185.714,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=TokZDNwr664","annotations":[{"segment":[0.2,173.00498030530252],"label":"Riding bumper cars"}]},"sCTmSj_tsDQ":{"duration":132.447,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=sCTmSj_tsDQ","annotations":[{"segment":[46.69724870083239,129.34724511700466],"label":"Bullfighting"}]},"xS4twW2gXqc":{"duration":57.21,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xS4twW2gXqc","annotations":[]},"M8aDrPK7D94":{"duration":82.39,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=M8aDrPK7D94","annotations":[{"segment":[14.535678354084265,44.17866268118707],"label":"Bungee jumping"}]},"-Sqc9fQ7vpk":{"duration":27.12,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=-Sqc9fQ7vpk","annotations":[]},"fh9d3gpT-8I":{"duration":183.6,"subset":"testing","resolution":"512x288","url":"https://www.youtube.com/watch?v=fh9d3gpT-8I","annotations":[]},"2bnO6YWXCiU":{"duration":55.287,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=2bnO6YWXCiU","annotations":[{"segment":[16.885048361934476,21.709347893915755],"label":"Spread mulch"}]},"p0menuS7Mlk":{"duration":38.01,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p0menuS7Mlk","annotations":[{"segment":[8.271631825273012,31.515213728549146],"label":"Plataform diving"}]},"veNIfCkd1Pw":{"duration":91.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=veNIfCkd1Pw","annotations":[{"segment":[15.004953088923555,66.02179359126364],"label":"Using uneven bars"}]},"zyh1cftKtQM":{"duration":233.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zyh1cftKtQM","annotations":[{"segment":[62.30939192607322,115.06070812545016],"label":"Brushing hair"},{"segment":[142.7096738575374,144.89248694164954],"label":"Brushing hair"}]},"NNuIaUFYGCE":{"duration":52.083,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=NNuIaUFYGCE","annotations":[{"segment":[14.462807758568847,44.363442936037444],"label":"Plastering"}]},"N3xWCui6fNY":{"duration":50.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=N3xWCui6fNY","annotations":[{"segment":[0.3909204368174727,49.25597503900156],"label":"Playing accordion"}]},"T-oY05uN3hY":{"duration":55.06,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=T-oY05uN3hY","annotations":[]},"hG2kfPRWjO0":{"duration":213.531,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hG2kfPRWjO0","annotations":[{"segment":[75.59144137620002,81.57828351492466],"label":"Snowboarding"},{"segment":[93.88457013341421,108.85167548022584],"label":"Snowboarding"},{"segment":[145.7705353356945,149.0965587460971],"label":"Snowboarding"},{"segment":[169.38530154955285,179.36337014459218],"label":"Snowboarding"}]},"py9diWBvxiM":{"duration":69.01,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=py9diWBvxiM","annotations":[]},"vvvjTjsXbzE":{"duration":156.29,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vvvjTjsXbzE","annotations":[{"segment":[67.90599665347159,136.90921766938706],"label":"Mixing drinks"}]},"TYRDXDR5l9U":{"duration":50.2,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=TYRDXDR5l9U","annotations":[{"segment":[3.720082917316693,47.65622010920437],"label":"Washing hands"}]},"gBihqIq4vEc":{"duration":191.59,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=gBihqIq4vEc","annotations":[]},"DCyLmohIwkQ":{"duration":155.92000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DCyLmohIwkQ","annotations":[{"segment":[96.56923426052688,126.73191700185015],"label":"Archery"}]},"rEDbnmvTblI":{"duration":58.584,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rEDbnmvTblI","annotations":[{"segment":[0.2,45.100427457098284],"label":"Snow tubing"}]},"0rr7iGHamw0":{"duration":60.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0rr7iGHamw0","annotations":[]},"5iWfvSjZBnE":{"duration":38.615,"subset":"testing","resolution":"426x234","url":"https://www.youtube.com/watch?v=5iWfvSjZBnE","annotations":[]},"5OGNGIF22SY":{"duration":207.262,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5OGNGIF22SY","annotations":[{"segment":[97.64864309946547,144.85626526013422],"label":"Using the rowing machine"}]},"iA8ylJWzzVc":{"duration":196.023,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iA8ylJWzzVc","annotations":[{"segment":[16.81943392021395,180.7324625382215],"label":"Decorating the Christmas tree"}]},"zo8XBVxxkmo":{"duration":190.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zo8XBVxxkmo","annotations":[{"segment":[39.247736635996056,43.410375370116846],"label":"Longboarding"},{"segment":[61.844918335508936,68.08887643669013],"label":"Longboarding"},{"segment":[83.55010602056736,88.30740743099112],"label":"Longboarding"},{"segment":[91.57805215065746,93.95670285586934],"label":"Longboarding"},{"segment":[95.14602820847529,104.36329969117133],"label":"Longboarding"},{"segment":[106.14728772008023,113.28323983571589],"label":"Longboarding"},{"segment":[113.87790206084243,118.33787211544461],"label":"Longboarding"},{"segment":[119.52719746333851,123.68983618096723],"label":"Longboarding"},{"segment":[124.28449885491418,129.6364629204368],"label":"Longboarding"},{"segment":[137.36707768174725,142.12437907332293],"label":"Longboarding"},{"segment":[154.31496388923557,155.50428923712948],"label":"Longboarding"}]},"ZZ71FIfxX-c":{"duration":151.813,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ZZ71FIfxX-c","annotations":[{"segment":[0.01,118.30406951962527],"label":"Beer pong"}]},"4_yy5vswBn4":{"duration":116.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4_yy5vswBn4","annotations":[{"segment":[9.617639625585023,108.51600936037443],"label":"Tango"}]},"U2aNQHCnVfI":{"duration":171.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U2aNQHCnVfI","annotations":[{"segment":[80.5231656,156.08992155850234],"label":"Playing violin"}]},"9PY28-zQhm4":{"duration":38.755,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9PY28-zQhm4","annotations":[{"segment":[1.5114705148205927,29.262069166926675],"label":"Peeling potatoes"}]},"5MLEO5JWRYI":{"duration":122.88,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=5MLEO5JWRYI","annotations":[{"segment":[6.517807425897036,122.87984126984126],"label":"Cleaning sink"}]},"8lVgQlJoglo":{"duration":28.42,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=8lVgQlJoglo","annotations":[]},"lWFGSRpuopM":{"duration":154.018,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lWFGSRpuopM","annotations":[{"segment":[0,150.65409458010976],"label":"Riding bumper cars"}]},"3b1AhVN3_MI":{"duration":162.31,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=3b1AhVN3_MI","annotations":[]},"zNMLz9GIQLk":{"duration":138.23,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=zNMLz9GIQLk","annotations":[{"segment":[66.98390652,123.35881404],"label":"Cleaning windows"}]},"LCe0toF3058":{"duration":152.72,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LCe0toF3058","annotations":[{"segment":[7.909213430000001,141.35670046801872],"label":"Snatch"}]},"GqjFT65KAA0":{"duration":130.08,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GqjFT65KAA0","annotations":[]},"RpyRgnHdUXc":{"duration":171.851,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=RpyRgnHdUXc","annotations":[{"segment":[83.04759750390015,120.55296411856473],"label":"Using the rowing machine"}]},"KI6FNa3BwMM":{"duration":214.53,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=KI6FNa3BwMM","annotations":[{"segment":[28.20289955693791,214.52916099773242],"label":"Futsal"}]},"28Dgwq9xNx8":{"duration":95.829,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=28Dgwq9xNx8","annotations":[]},"qN0a8-A-5Pg":{"duration":164.56,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=qN0a8-A-5Pg","annotations":[{"segment":[13.088223088923556,164.501],"label":"Hammer throw"}]},"1fF1OQtpctg":{"duration":213.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1fF1OQtpctg","annotations":[{"segment":[7.6667862714508574,191.3363182527301],"label":"Cheerleading"}]},"MRpg1K7YWSE":{"duration":32.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MRpg1K7YWSE","annotations":[{"segment":[6.737593457943924,18.577401869158876],"label":"Throwing darts"}]},"XulEpl5Bi9A":{"duration":50.643,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XulEpl5Bi9A","annotations":[]},"BqjNfptu17Q":{"duration":93.25,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BqjNfptu17Q","annotations":[{"segment":[12.870191887675507,20.868616224648985],"label":"Playing field hockey"},{"segment":[23.631708268330733,26.2493744149766],"label":"Playing field hockey"},{"segment":[29.012466458658345,33.956946957878316],"label":"Playing field hockey"},{"segment":[36.865464898595945,39.04685335413417],"label":"Playing field hockey"},{"segment":[57.5159422776911,58.243071762870514],"label":"Playing field hockey"},{"segment":[61.29701560062403,68.13203276131046],"label":"Playing field hockey"},{"segment":[70.60427301092044,73.51279095163807],"label":"Playing field hockey"},{"segment":[76.56673478939157,80.92951170046803],"label":"Playing field hockey"}]},"IfpcrV_cwHI":{"duration":97.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IfpcrV_cwHI","annotations":[{"segment":[0.01,94.88241341653666],"label":"Playing pool"}]},"mlNP3uaTB3Q":{"duration":65.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mlNP3uaTB3Q","annotations":[{"segment":[37.98925663026521,50.8721287051482],"label":"Cleaning windows"}]},"qTvZtsi-3KU":{"duration":133.933,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qTvZtsi-3KU","annotations":[{"segment":[14.99709841817049,111.43679340658576],"label":"Swimming"}]},"XeRiPVEZ6pY":{"duration":118.97999999999999,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=XeRiPVEZ6pY","annotations":[{"segment":[0,112.29608233308925],"label":"Playing accordion"}]},"2C_YKYbhm3s":{"duration":126.062,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=2C_YKYbhm3s","annotations":[]},"iKclcQEl4zI":{"duration":26.91,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=iKclcQEl4zI","annotations":[]},"71vVRQ4l8OI":{"duration":49.13,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=71vVRQ4l8OI","annotations":[{"segment":[1.09173526,47.861413806552264],"label":"Using parallel bars"}]},"3CeZS6-0NfU":{"duration":18.019,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=3CeZS6-0NfU","annotations":[{"segment":[0.5043890731695884,11.713044645299945],"label":"Welding"}]},"BdKxwCdax_w":{"duration":146.24,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=BdKxwCdax_w","annotations":[{"segment":[73.68978688767551,109.96432594383775],"label":"Breakdancing"},{"segment":[127.75938283931357,140.30717936817473],"label":"Breakdancing"}]},"Ujm7CiWkOBY":{"duration":137.091,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ujm7CiWkOBY","annotations":[{"segment":[104.77361934477379,118.67216068642746],"label":"Playing pool"}]},"xfV4V_AOVSM":{"duration":156.34,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=xfV4V_AOVSM","annotations":[]},"H8f7pTaBFKk":{"duration":169.297,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=H8f7pTaBFKk","annotations":[{"segment":[16.90325746021841,169.29668934240362],"label":"Removing ice from car"}]},"QnATCZ_P9DE":{"duration":94.75999999999999,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=QnATCZ_P9DE","annotations":[{"segment":[5.318620904836194,92.947751950078],"label":"Kayaking"}]},"J6T9pokWX1E":{"duration":101.263,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J6T9pokWX1E","annotations":[{"segment":[10.573770670826834,27.933692667706705],"label":"Grooming dog"},{"segment":[79.87142901716068,88.07956006240249],"label":"Grooming dog"}]},"bX-PSFlwV5s":{"duration":142.734,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bX-PSFlwV5s","annotations":[{"segment":[57.42854602184088,126.0043151326053],"label":"Roof shingle removal"}]},"Jdtr90RIkHk":{"duration":142.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Jdtr90RIkHk","annotations":[{"segment":[98.63785127999999,120.581008274571],"label":"Skateboarding"},{"segment":[45.88284689859594,54.52744124180968],"label":"Skateboarding"},{"segment":[72.48159872386896,80.46122427145086],"label":"Skateboarding"}]},"2AQg1DDVYHI":{"duration":192.773,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2AQg1DDVYHI","annotations":[{"segment":[16.239768219300203,87.51430651511775],"label":"Powerbocking"},{"segment":[109.76880370452913,185.25365180655226],"label":"Powerbocking"}]},"D88jUp91KPk":{"duration":49.85,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D88jUp91KPk","annotations":[]},"dFkdab5i1no":{"duration":154.23,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=dFkdab5i1no","annotations":[]},"XNigP28gH58":{"duration":82.59,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=XNigP28gH58","annotations":[]},"jWPr92KwXeY":{"duration":150.373,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jWPr92KwXeY","annotations":[{"segment":[7.976072517940717,150.372426],"label":"Snowboarding"}]},"PzNpPDd-VWE":{"duration":99.103,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PzNpPDd-VWE","annotations":[{"segment":[15.151436770670827,71.27359542121685],"label":"Slacklining"}]},"80g-pLQce54":{"duration":71.007,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=80g-pLQce54","annotations":[]},"15HUSdsIHxg":{"duration":116.867,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=15HUSdsIHxg","annotations":[{"segment":[0.01,116.8656],"label":"Baton twirling"}]},"qhei_KanLyo":{"duration":46.44,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qhei_KanLyo","annotations":[]},"RZ2P53zcZk0":{"duration":120.326,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RZ2P53zcZk0","annotations":[]},"_ekSmyvWdRQ":{"duration":15.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_ekSmyvWdRQ","annotations":[{"segment":[0,15.069749],"label":"Preparing salad"}]},"yDH9iAn82Q8":{"duration":178.54,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yDH9iAn82Q8","annotations":[{"segment":[8.939397660000001,39.09268048],"label":"Preparing pasta"},{"segment":[8.12169362,128.57949684000002],"label":"Preparing salad"},{"segment":[51.243976759999995,163.96896255850234],"label":"Preparing pasta"}]},"cdP8_KujTCE":{"duration":110.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cdP8_KujTCE","annotations":[{"segment":[27.27812124180967,30.73104798127925],"label":"Drinking beer"}]},"BFvMwb0Ld2g":{"duration":76.278,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=BFvMwb0Ld2g","annotations":[]},"7-_Nur_xiV4":{"duration":185.9,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7-_Nur_xiV4","annotations":[{"segment":[4.350209600220744,179.51864765210607],"label":"Breakdancing"}]},"CnPYJnV1gCU":{"duration":148.469,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=CnPYJnV1gCU","annotations":[]},"6LX02yo9iGo":{"duration":155.621,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=6LX02yo9iGo","annotations":[{"segment":[115.20561773998931,148.89907297964987],"label":"BMX"}]},"EbQJuDQdW8U":{"duration":124.13,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=EbQJuDQdW8U","annotations":[{"segment":[21.490188767550702,124.081],"label":"Ironing clothes"}]},"65rZgGe7Zr4":{"duration":26.355,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=65rZgGe7Zr4","annotations":[{"segment":[0.2,24.791875581903277],"label":"Raking leaves"}]},"U9Ofeof4rlA":{"duration":22.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U9Ofeof4rlA","annotations":[{"segment":[0.8485678499999999,20.981817472698907],"label":"Hammer throw"}]},"WPVb8fYLFUM":{"duration":177.656,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WPVb8fYLFUM","annotations":[{"segment":[33.812727632914836,105.87263898174974],"label":"Baking cookies"}]},"l7EktV5FYGk":{"duration":197.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l7EktV5FYGk","annotations":[{"segment":[7.991484889327546,189.3367189163757],"label":"Sailing"}]},"CGR-z8vaawI":{"duration":42.73,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CGR-z8vaawI","annotations":[]},"_cA6yS9SeEc":{"duration":134.165,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=_cA6yS9SeEc","annotations":[{"segment":[24.69806233882008,37.46570473431182],"label":"Changing car wheel"},{"segment":[45.00070680378235,60.070710942723416],"label":"Changing car wheel"},{"segment":[89.37349676844215,105.69933458562832],"label":"Changing car wheel"}]},"cdb7VSVuUl8":{"duration":207.712,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cdb7VSVuUl8","annotations":[{"segment":[75.82220592823712,90.72742589703589],"label":"Sumo"}]},"jkZIHh5TYIg":{"duration":34.134,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jkZIHh5TYIg","annotations":[]},"jcaOoRM6J80":{"duration":215.273,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jcaOoRM6J80","annotations":[{"segment":[27.87455644348223,138.36526812909253],"label":"Making a cake"},{"segment":[169.26236683753064,207.88373938221528],"label":"Making a cake"}]},"OLWpT2NYgaQ":{"duration":211.93,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=OLWpT2NYgaQ","annotations":[]},"k7MXH55q28U":{"duration":189.38,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k7MXH55q28U","annotations":[{"segment":[15.206158346333854,181.73573712948516],"label":"Rock climbing"}]},"ub0pmYL8i5s":{"duration":59.629,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ub0pmYL8i5s","annotations":[{"segment":[0.01,59.6288434],"label":"Cleaning sink"}]},"z-EPAHsmTMA":{"duration":30.93,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=z-EPAHsmTMA","annotations":[]},"SzCK4QuEG3c":{"duration":105.21,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SzCK4QuEG3c","annotations":[{"segment":[6.893609588143526,75.33730478471139],"label":"Sharpening knives"}]},"ZX8sFpq471A":{"duration":111.83,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZX8sFpq471A","annotations":[{"segment":[13.258764249609984,19.713688950078],"label":"Archery"},{"segment":[36.81051653510141,41.52086699219969],"label":"Archery"},{"segment":[60.18781139625585,65.94490639937597],"label":"Archery"}]},"jiM0HUFn0e8":{"duration":99.66,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jiM0HUFn0e8","annotations":[]},"rfxXEgc9RTQ":{"duration":66.64,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=rfxXEgc9RTQ","annotations":[{"segment":[0,66.641291],"label":"Playing squash"}]},"DVXOr56dlKg":{"duration":131.15,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DVXOr56dlKg","annotations":[{"segment":[39.21971898907957,110.55316827],"label":"Pole vault"}]},"CoP3xaSZt7A":{"duration":119.35,"subset":"validation","resolution":"360x242","url":"https://www.youtube.com/watch?v=CoP3xaSZt7A","annotations":[{"segment":[5.772024295937824,10.24068826698646],"label":"Capoeira"},{"segment":[24.172090483619343,25.845542901716065],"label":"Capoeira"},{"segment":[33.469048361934476,39.97691887675507],"label":"Capoeira"},{"segment":[0.2,118.987],"label":"Capoeira"},{"segment":[50.76139001560062,51.13326833073323],"label":"Capoeira"},{"segment":[54.66611232449298,56.3395647425897],"label":"Capoeira"},{"segment":[77.90850702028081,81.18072819968799],"label":"Capoeira"},{"segment":[83.60125449921996,93.65574835881435],"label":"Capoeira"},{"segment":[98.12441229641185,107.06174017160686],"label":"Capoeira"}]},"hRIXXCe0Hi0":{"duration":133.004,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hRIXXCe0Hi0","annotations":[{"segment":[25.106820479621906,28.011741857429396],"label":"Rock-paper-scissors"},{"segment":[41.29138244169222,46.271247660790785],"label":"Rock-paper-scissors"},{"segment":[57.683438787891646,62.66330400699021],"label":"Rock-paper-scissors"},{"segment":[77.6028996642859,84.65770872467552],"label":"Rock-paper-scissors"},{"segment":[92.12750655332336,99.38980999784208],"label":"Rock-paper-scissors"},{"segment":[108.72705628081121,112.876943925117],"label":"Rock-paper-scissors"}]},"w5lQ5vNAksM":{"duration":101.66,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=w5lQ5vNAksM","annotations":[{"segment":[0.01,101.65696145124717],"label":"Playing flauta"}]},"4SLvbRa2NI0":{"duration":166.789,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=4SLvbRa2NI0","annotations":[{"segment":[32.52514294071763,154.03907696723869],"label":"Blow-drying hair"}]},"0ixNLmIRd30":{"duration":87.702,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=0ixNLmIRd30","annotations":[]},"zPGc84TC74A":{"duration":60.0,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zPGc84TC74A","annotations":[{"segment":[4.680215507940046,14.695876694931743],"label":"Playing racquetball"},{"segment":[17.129588759060567,52.98003954988132],"label":"Playing racquetball"}]},"nARkYpOZWyg":{"duration":59.862,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nARkYpOZWyg","annotations":[{"segment":[0.01,59.821],"label":"Blowing leaves"}]},"JBnHqQjeVMk":{"duration":34.529,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=JBnHqQjeVMk","annotations":[{"segment":[11.457804992199689,25.712820592823714],"label":"Calf roping"}]},"wKYPRyIgKuA":{"duration":21.572,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wKYPRyIgKuA","annotations":[]},"5L3ZcYo6SkY":{"duration":53.99,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5L3ZcYo6SkY","annotations":[]},"R-RQx5pbMvo":{"duration":142.07999999999998,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=R-RQx5pbMvo","annotations":[{"segment":[99.5245281575663,107.06090668174727],"label":"Washing hands"}]},"kp-mNHSU5Rc":{"duration":172.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kp-mNHSU5Rc","annotations":[{"segment":[38.78100468018721,98.62989079563184],"label":"Grooming horse"},{"segment":[107.21807176287052,126.80985959438378],"label":"Grooming horse"}]},"jXORdfzz4oE":{"duration":75.61,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=jXORdfzz4oE","annotations":[{"segment":[11.603023400936037,26.563266770670825],"label":"Volleyball"},{"segment":[37.16501404056162,48.00235569422777],"label":"Volleyball"}]},"Y4svcMuQgBw":{"duration":164.79,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Y4svcMuQgBw","annotations":[{"segment":[51.91526053042122,143.42689235569424],"label":"Ironing clothes"},{"segment":[150.10986583463338,158.84913884555382],"label":"Ironing clothes"}]},"GjHanmMGvuw":{"duration":129.196,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=GjHanmMGvuw","annotations":[{"segment":[3.8278923556942277,75.76370670826833],"label":"Baton twirling"}]},"LA0tW9OQW80":{"duration":165.77,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LA0tW9OQW80","annotations":[]},"RTwa2d6Oqvo":{"duration":45.07,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RTwa2d6Oqvo","annotations":[{"segment":[0,29.10912923047534],"label":"Polishing shoes"}]},"lBXRkMZfIXk":{"duration":191.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lBXRkMZfIXk","annotations":[{"segment":[101.55884146794443,135.66107394908042],"label":"Sharpening knives"},{"segment":[159.59246424102963,164.0796000678627],"label":"Sharpening knives"}]},"r3lWJy0FYIw":{"duration":23.546,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r3lWJy0FYIw","annotations":[]},"KdiaNSVVe7Q":{"duration":143.59,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=KdiaNSVVe7Q","annotations":[]},"PG0ao4HkF8M":{"duration":78.948,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PG0ao4HkF8M","annotations":[{"segment":[7.0143790951638065,62.63717472698908],"label":"Hanging wallpaper"}]},"pznmOdbp7E0":{"duration":210.397,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=pznmOdbp7E0","annotations":[{"segment":[111.43437968947329,162.63839509906927],"label":"Making a cake"}]},"SGHjq0KLpvk":{"duration":20.62,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=SGHjq0KLpvk","annotations":[]},"ZeUNeRiU6YY":{"duration":121.255,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZeUNeRiU6YY","annotations":[{"segment":[0.2,115.16922395023327],"label":"Snowboarding"}]},"kS_5cRXUTLA":{"duration":166.93,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kS_5cRXUTLA","annotations":[]},"_MYAaRrTeMQ":{"duration":5.02,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_MYAaRrTeMQ","annotations":[{"segment":[0.01,5.015509],"label":"Chopping wood"}]},"mek9Zodu8LM":{"duration":126.781,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=mek9Zodu8LM","annotations":[]},"CP9NQpJD0-Y":{"duration":97.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CP9NQpJD0-Y","annotations":[{"segment":[45.81872045347681,48.698755270751214],"label":"Drinking coffee"},{"segment":[60.370470434785226,62.94734282730859],"label":"Drinking coffee"},{"segment":[74.31589750020578,76.74118916375718],"label":"Drinking coffee"},{"segment":[85.98761363104691,88.26132456562634],"label":"Drinking coffee"}]},"ZYAY3TEUjGI":{"duration":178.03,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZYAY3TEUjGI","annotations":[{"segment":[8.614227769110766,170.61728549141966],"label":"Getting a haircut"}]},"HhtSc4kz3tk":{"duration":74.281,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HhtSc4kz3tk","annotations":[]},"w_wIOJrztdU":{"duration":140.063,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w_wIOJrztdU","annotations":[{"segment":[0.01,131.08361934477378],"label":"Building sandcastles"}]},"RmTDHszp1v0":{"duration":127.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RmTDHszp1v0","annotations":[{"segment":[43.51350631825274,66.56175624024962],"label":"Using parallel bars"},{"segment":[77.68849758190328,119.01639399375976],"label":"Using parallel bars"}]},"mYrTrVCYSY0":{"duration":79.761,"subset":"training","resolution":"426x236","url":"https://www.youtube.com/watch?v=mYrTrVCYSY0","annotations":[{"segment":[0.01,79.721],"label":"Running a marathon"}]},"uN8lcRwsBno":{"duration":165.559,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uN8lcRwsBno","annotations":[{"segment":[17.040664586583464,156.22496879875195],"label":"Using the rowing machine"}]},"N0DA6RpIf5Y":{"duration":37.872,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N0DA6RpIf5Y","annotations":[{"segment":[0.2,23.98740790951638],"label":"Raking leaves"}]},"bhAVJ2uUw0U":{"duration":63.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bhAVJ2uUw0U","annotations":[{"segment":[0.8975363588143527,24.931565522620904],"label":"Washing face"}]},"Jv-bPV8eswU":{"duration":105.05,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Jv-bPV8eswU","annotations":[{"segment":[34.168978318316405,75.63061627771233],"label":"Shaving"}]},"ZK4Y6nOton8":{"duration":117.49000000000001,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZK4Y6nOton8","annotations":[]},"f2DS_AVorrY":{"duration":18.34,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=f2DS_AVorrY","annotations":[]},"MBTSe-NHK-I":{"duration":133.747,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MBTSe-NHK-I","annotations":[{"segment":[11.051533541341653,95.93332293291733],"label":"River tubing"},{"segment":[105.73520592823714,121.37650858034323],"label":"River tubing"},{"segment":[129.3014352574103,132.6382464898596],"label":"River tubing"}]},"WW0Sm6oDc9M":{"duration":22.919,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WW0Sm6oDc9M","annotations":[{"segment":[4.736001557632399,8.910046728971963],"label":"Playing ten pins"}]},"yRjaK8wT4Rc":{"duration":103.097,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yRjaK8wT4Rc","annotations":[{"segment":[7.395953198127926,79.10454290171607],"label":"Cleaning sink"}]},"fnf7FbZkL6k":{"duration":76.58,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fnf7FbZkL6k","annotations":[{"segment":[0.01,76.26866498232083],"label":"Painting fence"}]},"C1IuvUSmcvA":{"duration":238.469,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C1IuvUSmcvA","annotations":[{"segment":[34.216680187207494,142.44552730109206],"label":"Carving jack-o-lanterns"},{"segment":[162.52923088923558,192.28609204368175],"label":"Carving jack-o-lanterns"}]},"ybT1q6cKpNY":{"duration":88.49,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=ybT1q6cKpNY","annotations":[]},"9hPFweZeIWs":{"duration":82.176,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9hPFweZeIWs","annotations":[{"segment":[23.9731723037629,77.81666090044962],"label":"Calf roping"}]},"WjueR8K-nnI":{"duration":122.602,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WjueR8K-nnI","annotations":[{"segment":[17.160374149659862,100.67420790241326],"label":"Riding bumper cars"}]},"jTMdMnbW9OI":{"duration":123.9,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jTMdMnbW9OI","annotations":[{"segment":[92.39469829329174,120.03579004212169],"label":"Washing face"}]},"YSrnHPcdGL4":{"duration":187.76,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YSrnHPcdGL4","annotations":[{"segment":[14.645557956318253,129.75964349297973],"label":"Archery"}]},"pGKTRM1vcfw":{"duration":43.98,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=pGKTRM1vcfw","annotations":[{"segment":[2.5385459817472698,21.063070713416536],"label":"Playing lacrosse"}]},"ZeEpbvUaTrQ":{"duration":234.336,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZeEpbvUaTrQ","annotations":[]},"OYMGPkucZYI":{"duration":72.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OYMGPkucZYI","annotations":[{"segment":[24.564791683307334,49.80879419656787],"label":"Cleaning shoes"}]},"YjvAy19whWU":{"duration":202.386,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YjvAy19whWU","annotations":[]},"6VD_qwIgTzU":{"duration":109.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6VD_qwIgTzU","annotations":[{"segment":[0.6824670826833074,7.677754680187207],"label":"Getting a tattoo"}]},"iWes42ows7w":{"duration":61.16,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=iWes42ows7w","annotations":[]},"uS5QIHQztyA":{"duration":166.05,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uS5QIHQztyA","annotations":[]},"xrbKvttgcMA":{"duration":38.82,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=xrbKvttgcMA","annotations":[{"segment":[0.01,38.823763],"label":"Playing squash"}]},"hi4aLY1ajTY":{"duration":140.203,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=hi4aLY1ajTY","annotations":[{"segment":[15.7167476635514,32.74789719626168],"label":"Swimming"},{"segment":[38.64251869158878,107.8497414330218],"label":"Swimming"},{"segment":[123.56873208722742,140.161],"label":"Swimming"}]},"CikGafc6H_8":{"duration":190.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CikGafc6H_8","annotations":[{"segment":[4.75904014976599,23.200320730109205],"label":"Playing water polo"},{"segment":[60.97520191887675,76.44208240561622],"label":"Playing water polo"},{"segment":[103.21168324804991,106.18608334165366],"label":"Playing water polo"},{"segment":[113.62208357566301,121.9504038377535],"label":"Playing water polo"}]},"k2vkwy2vdP4":{"duration":89.629,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=k2vkwy2vdP4","annotations":[{"segment":[20.908580715396578,74.9921094992224],"label":"Surfing"}]},"l9mvKfKACio":{"duration":154.645,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l9mvKfKACio","annotations":[{"segment":[28.701277691107645,47.996254290171606],"label":"Snow tubing"}]},"yVKmkR78Jn0":{"duration":37.106,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yVKmkR78Jn0","annotations":[{"segment":[7.92099375975039,36.840999999999994],"label":"Hand car wash"}]},"8ma-p7ap2MQ":{"duration":224.189,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=8ma-p7ap2MQ","annotations":[{"segment":[69.25016392511701,162.28321243057724],"label":"Trimming branches or hedges"}]},"IJ76Wtgg2g4":{"duration":109.88,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IJ76Wtgg2g4","annotations":[{"segment":[4.091782240000001,106.74585023400935],"label":"Using the balance beam"}]},"RN2QwhcAsUY":{"duration":121.35,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=RN2QwhcAsUY","annotations":[{"segment":[11.354134165366615,113.1628705148206],"label":"Bathing dog"}]},"MoVZoCmkdjY":{"duration":160.17000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MoVZoCmkdjY","annotations":[{"segment":[0.01,152.1464414976599],"label":"Mixing drinks"}]},"lUrHCNqVl-A":{"duration":176.54,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=lUrHCNqVl-A","annotations":[{"segment":[15.285558491727425,18.315128643240968],"label":"Discus throw"},{"segment":[27.954670034420424,35.390887679044575],"label":"Discus throw"},{"segment":[43.10252079198814,48.6108301583764],"label":"Discus throw"},{"segment":[0.2,176.34131519274376],"label":"Discus throw"},{"segment":[69.5424057506518,73.9490532437624],"label":"Discus throw"},{"segment":[81.93610182502537,85.79191838149715],"label":"Discus throw"},{"segment":[94.88062883603779,98.46102992419017],"label":"Discus throw"},{"segment":[106.44807850545314,110.85472599856375],"label":"Discus throw"},{"segment":[119.94343645310438,124.07466847789559],"label":"Discus throw"},{"segment":[143.07833579193507,146.93415234840685],"label":"Discus throw"},{"segment":[157.12452467622512,167.039481535724],"label":"Discus throw"}]},"3TNDCTlLlGk":{"duration":214.691,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3TNDCTlLlGk","annotations":[{"segment":[46.74462449372621,50.083526607189526],"label":"Wakeboarding"},{"segment":[59.0985623135405,72.12028055604745],"label":"Wakeboarding"},{"segment":[85.14199879855441,89.48257154605673],"label":"Wakeboarding"},{"segment":[141.2355543047382,149.91669979974287],"label":"Wakeboarding"},{"segment":[154.25727254724518,160.93507677417182],"label":"Wakeboarding"},{"segment":[166.27732015571314,187.31240181659277],"label":"Wakeboarding"}]},"3ZJQHfrfR2M":{"duration":40.055,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3ZJQHfrfR2M","annotations":[{"segment":[0.01,0.02],"label":"Raking leaves"}]},"HxrtuZSCwvY":{"duration":165.72,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HxrtuZSCwvY","annotations":[]},"nY8a1E4QH5Q":{"duration":236.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nY8a1E4QH5Q","annotations":[{"segment":[8.468592823712948,232.7022028081123],"label":"Zumba"}]},"i8hNrK3r_MY":{"duration":219.267,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i8hNrK3r_MY","annotations":[]},"SjbKlrUx7Do":{"duration":112.66,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SjbKlrUx7Do","annotations":[]},"9xHLzVojpBc":{"duration":96.69,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=9xHLzVojpBc","annotations":[{"segment":[14.613809051482058,76.83549088923556],"label":"Tennis serve with ball bouncing"}]},"mSPUMHD4F-E":{"duration":169.553,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mSPUMHD4F-E","annotations":[{"segment":[5.554729217740139,33.59288622157131],"label":"Installing carpet"},{"segment":[49.19902975039001,160.0291021450858],"label":"Installing carpet"}]},"ujWG6rjlN3s":{"duration":46.81,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ujWG6rjlN3s","annotations":[{"segment":[4.673839825273011,18.549301806552265],"label":"Cheerleading"},{"segment":[20.594106730109207,26.071262775351013],"label":"Cheerleading"},{"segment":[27.458808973478938,35.418942425897036],"label":"Cheerleading"}]},"dc8pLGl9Ccc":{"duration":208.376,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dc8pLGl9Ccc","annotations":[{"segment":[114.68490610265286,178.29056965405474],"label":"Rock-paper-scissors"}]},"RQajzyqAcQM":{"duration":177.454,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RQajzyqAcQM","annotations":[{"segment":[57.69223010627268,58.79638770088128],"label":"Building sandcastles"},{"segment":[123.94168578278902,136.915536490928],"label":"Building sandcastles"},{"segment":[142.98840321565584,149.06126994038365],"label":"Building sandcastles"}]},"r5NqGSZsN0E":{"duration":56.26,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=r5NqGSZsN0E","annotations":[{"segment":[11.147063439000146,40.55073471510289],"label":"Hand washing clothes"}]},"KZLEUd2ALVI":{"duration":42.03,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KZLEUd2ALVI","annotations":[{"segment":[3.143663026521061,11.788736349453979],"label":"Hammer throw"},{"segment":[22.59507800312013,29.733812792511703],"label":"Hammer throw"}]},"8WJWtfmEipI":{"duration":15.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8WJWtfmEipI","annotations":[{"segment":[1.5429495070202808,15.752108843537416],"label":"Shoveling snow"}]},"Yg03CCu9GMo":{"duration":150.74,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=Yg03CCu9GMo","annotations":[]},"dN14VPSHimI":{"duration":110.964,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=dN14VPSHimI","annotations":[{"segment":[14.54489262402496,31.167627051482057],"label":"Calf roping"},{"segment":[36.362231560062405,48.48297541341654],"label":"Calf roping"},{"segment":[55.58226824180967,62.68156107020281],"label":"Calf roping"},{"segment":[65.27886332449299,76.70699324336974],"label":"Calf roping"},{"segment":[80.86267685023401,86.74989529329173],"label":"Calf roping"}]},"m7cHlmcFk9Y":{"duration":181.627,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m7cHlmcFk9Y","annotations":[{"segment":[19.91168641288258,174.340387347462],"label":"Clipping cat claws"}]},"cinmiQ4tHYk":{"duration":70.26,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cinmiQ4tHYk","annotations":[{"segment":[14.578870940717628,62.042413176287056],"label":"Tai chi"}]},"MhO48WnApoM":{"duration":143.686,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MhO48WnApoM","annotations":[{"segment":[10.759569125622168,137.40866273634944],"label":"Ice fishing"}]},"-hiRu8Sh7Tk":{"duration":68.8,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-hiRu8Sh7Tk","annotations":[]},"BJ-1HVacaF4":{"duration":226.372,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BJ-1HVacaF4","annotations":[{"segment":[59.58996277223246,183.35373009345795],"label":"Scuba diving"},{"segment":[188.64278000000002,191.11100328971963],"label":"Scuba diving"},{"segment":[195.34224321495327,196.7526565233645],"label":"Scuba diving"},{"segment":[199.22087981308414,201.33649977570096],"label":"Scuba diving"},{"segment":[204.50992971962617,206.625549682243],"label":"Scuba diving"}]},"L7dH-I2qLU8":{"duration":125.063,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=L7dH-I2qLU8","annotations":[]},"XoFikALe8Q0":{"duration":184.715,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=XoFikALe8Q0","annotations":[{"segment":[42.648650174578414,117.86012109055791],"label":"Hand car wash"}]},"msd9vrplD-I":{"duration":99.567,"subset":"validation","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=msd9vrplD-I","annotations":[{"segment":[70.13194335350272,75.25786612352617],"label":"Swinging at the playground"}]},"ybFvomdhW2Y":{"duration":183.09,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ybFvomdhW2Y","annotations":[{"segment":[4.8557235894877975,57.98305227447193],"label":"Making an omelette"},{"segment":[67.40886865406588,79.97662382685783],"label":"Making an omelette"},{"segment":[87.68865541016199,103.68398017553355],"label":"Making an omelette"},{"segment":[114.53795055203568,114.8235813514173],"label":"Making an omelette"},{"segment":[123.10687453348473,167.37964715444616],"label":"Making an omelette"}]},"t8hx8ihRPGc":{"duration":172.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t8hx8ihRPGc","annotations":[{"segment":[8.179636547635278,19.21138265355428],"label":"Swinging at the playground"},{"segment":[28.897793868507552,34.27913343237048],"label":"Swinging at the playground"},{"segment":[39.929539974426554,45.310879538289484],"label":"Swinging at the playground"},{"segment":[58.49516146975366,67.64343872832065],"label":"Swinging at the playground"},{"segment":[92.3976007220901,95.89547143860102],"label":"Swinging at the playground"},{"segment":[101.00774402427079,105.043748697168],"label":"Swinging at the playground"},{"segment":[112.57762408657611,119.03523156321162],"label":"Swinging at the playground"},{"segment":[157.2427424666384,162.3550150523082],"label":"Swinging at the playground"}]},"mO1T8zhIliY":{"duration":233.849,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=mO1T8zhIliY","annotations":[{"segment":[5.609436451127097,231.4438686006598],"label":"Doing fencing"}]},"jRj8deMhPrQ":{"duration":123.67,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=jRj8deMhPrQ","annotations":[{"segment":[7.331419939790789,19.87200667890661],"label":"Playing ice hockey"},{"segment":[35.306574973203006,45.53197646817437],"label":"Playing ice hockey"},{"segment":[49.39061854174847,61.159476866149475],"label":"Playing ice hockey"},{"segment":[67.14037208018934,80.64561933769866],"label":"Playing ice hockey"},{"segment":[84.31132930759406,98.78123591887676],"label":"Playing ice hockey"},{"segment":[107.07731627925116,116.91685345085803],"label":"Playing ice hockey"}]},"mRgS35iyhYE":{"duration":55.682,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=mRgS35iyhYE","annotations":[{"segment":[0.2,53.20285335413416],"label":"Riding bumper cars"}]},"PgfUSPHXluA":{"duration":50.783,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=PgfUSPHXluA","annotations":[{"segment":[0,31.66365054602184],"label":"Arm wrestling"}]},"Lb1PFRh8cYs":{"duration":22.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lb1PFRh8cYs","annotations":[]},"CZveW9zX0s8":{"duration":141.41,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=CZveW9zX0s8","annotations":[{"segment":[11.247131045241808,12.349790951638065],"label":"Grooming dog"},{"segment":[44.76799219968799,50.06075975039001],"label":"Grooming dog"},{"segment":[63.07214664586583,66.8211903276131],"label":"Grooming dog"},{"segment":[75.64246957878315,81.15576911076442],"label":"Grooming dog"},{"segment":[104.98807488299532,121.7508767550702],"label":"Grooming dog"},{"segment":[129.25002496099845,135.20523088923557],"label":"Grooming dog"}]},"ITVfOVR34Jo":{"duration":137.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ITVfOVR34Jo","annotations":[{"segment":[7.303599463706439,122.65750769110765],"label":"Paintball"}]},"uElCsF1fOgE":{"duration":221.01,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=uElCsF1fOgE","annotations":[{"segment":[120.73170396,184.46128248000002],"label":"Cumbia"}]},"pMmlJGSucss":{"duration":74.24,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pMmlJGSucss","annotations":[{"segment":[31.44054068018721,71.89711876131045],"label":"Plataform diving"}]},"WJfMz7joX4s":{"duration":87.099,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=WJfMz7joX4s","annotations":[{"segment":[0.01,87.061],"label":"Playing ice hockey"}]},"OtcJMIJTTx4":{"duration":8.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OtcJMIJTTx4","annotations":[{"segment":[0.5463696,5.382383775351014],"label":"Springboard diving"}]},"aEpRYY_wi0M":{"duration":39.34,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aEpRYY_wi0M","annotations":[{"segment":[10.371145457957972,15.02757809347199],"label":"Fixing bicycle"},{"segment":[19.806548429920586,24.03409911216358],"label":"Fixing bicycle"}]},"YFIkhFEtD8w":{"duration":87.656,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YFIkhFEtD8w","annotations":[{"segment":[0.01,4.9229201821134065],"label":"Playing rubik cube"},{"segment":[26.255574304604835,81.6384256099844],"label":"Playing rubik cube"}]},"huo2ldsQHUI":{"duration":15.09,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=huo2ldsQHUI","annotations":[{"segment":[0.07041784867740233,14.341841102176623],"label":"Spinning"}]},"3CO7fRiJOy4":{"duration":24.196,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=3CO7fRiJOy4","annotations":[{"segment":[0.01,23.833393808907992],"label":"Riding bumper cars"}]},"1V0TqgcXSVk":{"duration":104.84,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=1V0TqgcXSVk","annotations":[{"segment":[27.75900564,87.73158353999999],"label":"Doing step aerobics"}]},"5AoshxUQv0U":{"duration":16.069,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=5AoshxUQv0U","annotations":[{"segment":[4.229218408736349,13.688653666146648],"label":"Doing a powerbomb"}]},"hZ0jI9U5Nws":{"duration":26.36,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hZ0jI9U5Nws","annotations":[{"segment":[2.795812786271451,24.298902304212167],"label":"Using uneven bars"}]},"Nosx28FNB5E":{"duration":73.027,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Nosx28FNB5E","annotations":[{"segment":[15.607902608424338,32.013289291731674],"label":"Playing pool"},{"segment":[27.228384842433698,32.69684707020281],"label":"Playing pool"},{"segment":[30.987952624024963,63.570873397815916],"label":"Playing pool"}]},"TXSNkjjTzdE":{"duration":225.327,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=TXSNkjjTzdE","annotations":[{"segment":[48.51021639232916,191.931724099844],"label":"Blow-drying hair"}]},"Fdjw9ld-hbA":{"duration":6.084,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Fdjw9ld-hbA","annotations":[{"segment":[0.01,5.717851310097941],"label":"Doing a powerbomb"}]},"ho-aTcEykAc":{"duration":12.7,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ho-aTcEykAc","annotations":[]},"9VGxxRNOdbM":{"duration":68.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9VGxxRNOdbM","annotations":[{"segment":[23.838039001560063,62.641663026521066],"label":"Using the pommel horse"}]},"ZgIKmwnCqr4":{"duration":17.206,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ZgIKmwnCqr4","annotations":[{"segment":[0.2,17.005986394557823],"label":"Hitting a pinata"}]},"HAEyXtEdERI":{"duration":194.978,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HAEyXtEdERI","annotations":[{"segment":[30.417778343786814,35.2846228787927],"label":"BMX"},{"segment":[40.455645197236464,51.40604540099971],"label":"BMX"},{"segment":[76.65280142634276,90.34080159594383],"label":"BMX"},{"segment":[93.99093499375975,97.64106839157566],"label":"BMX"},{"segment":[110.11235750078004,121.67111326053042],"label":"BMX"},{"segment":[125.92960222464897,170.03538078159124],"label":"BMX"}]},"tIbSsad0z9U":{"duration":158.918,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tIbSsad0z9U","annotations":[{"segment":[19.21381274298591,23.676375702647153],"label":"Swimming"},{"segment":[53.426795433722106,58.88103905108585],"label":"Swimming"}]},"CB0yYGVNcOk":{"duration":143.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CB0yYGVNcOk","annotations":[{"segment":[29.84959893,135.22091574882995],"label":"Playing saxophone"},{"segment":[4.7327193,7.64882995],"label":"Playing saxophone"}]},"QKXh-onHFIM":{"duration":17.903,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QKXh-onHFIM","annotations":[]},"CvbL36Y8hN0":{"duration":175.27,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CvbL36Y8hN0","annotations":[]},"gGBDLkTjbpE":{"duration":89.49,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=gGBDLkTjbpE","annotations":[]},"Rvsc5eymWIg":{"duration":68.592,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Rvsc5eymWIg","annotations":[{"segment":[5.655066978193147,55.91047352024923],"label":"Changing car wheel"}]},"BlfFKVGQbi8":{"duration":66.293,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=BlfFKVGQbi8","annotations":[{"segment":[7.988475858420968,59.07756464346769],"label":"Shuffleboard"}]},"ks018ZYzrx4":{"duration":226.49,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ks018ZYzrx4","annotations":[]},"AZx_lm2XLHk":{"duration":48.44,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AZx_lm2XLHk","annotations":[{"segment":[15.566105906396258,17.07737832449298],"label":"High jump"},{"segment":[21.762322820592825,25.61606748673947],"label":"High jump"}]},"RodH5r1FjzU":{"duration":61.39,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=RodH5r1FjzU","annotations":[]},"7NMds32-lMc":{"duration":237.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7NMds32-lMc","annotations":[{"segment":[4.51474193,225.71151002652107],"label":"Clean and jerk"},{"segment":[90.76744966999999,199.53686874000002],"label":"Clean and jerk"}]},"5I0K3y27EUM":{"duration":15.024,"subset":"validation","resolution":"270x360","url":"https://www.youtube.com/watch?v=5I0K3y27EUM","annotations":[{"segment":[5.887829014738347,15.023310657596372],"label":"Fun sliding down"}]},"cY0E0B1b3KA":{"duration":60.024,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=cY0E0B1b3KA","annotations":[]},"8VlAzVqm2HQ":{"duration":119.305,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8VlAzVqm2HQ","annotations":[]},"G98dfNXqu4Q":{"duration":26.912,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=G98dfNXqu4Q","annotations":[{"segment":[0.01,24.900179773326354],"label":"Calf roping"}]},"IL8__uSFpqg":{"duration":115.4,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=IL8__uSFpqg","annotations":[]},"gS4dNLjHouo":{"duration":186.782,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gS4dNLjHouo","annotations":[{"segment":[0.01,186.78131519274376],"label":"Drum corps"}]},"EVtM8DKW4bc":{"duration":126.294,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EVtM8DKW4bc","annotations":[{"segment":[0.01,126.29333333333334],"label":"Riding bumper cars"}]},"AZrI6X2XAUU":{"duration":107.602,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=AZrI6X2XAUU","annotations":[{"segment":[16.90159659736108,32.63179778722012],"label":"Putting in contact lenses"},{"segment":[69.78184621171843,75.63883591622854],"label":"Putting in contact lenses"}]},"RclRzty0YVE":{"duration":100.473,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RclRzty0YVE","annotations":[{"segment":[4.536151090342679,80.41538006230529],"label":"Beach soccer"}]},"1Uur858vdY8":{"duration":30.81,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=1Uur858vdY8","annotations":[{"segment":[3.8801551599999993,23.91843124],"label":"Long jump"}]},"H6O2o-WLyK4":{"duration":46.12,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=H6O2o-WLyK4","annotations":[]},"Sh8r9g_lp7U":{"duration":69.428,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Sh8r9g_lp7U","annotations":[{"segment":[34.226150783151326,56.863066965678634],"label":"Laying tile"}]},"-G-sh-NhYtk":{"duration":20.83,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-G-sh-NhYtk","annotations":[{"segment":[0,20.828253],"label":"Shoveling snow"}]},"_15t4WTR19s":{"duration":238.28,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_15t4WTR19s","annotations":[{"segment":[27.872971918876754,223.74583775351016],"label":"Preparing salad"}]},"hIjOvSJf9c0":{"duration":170.528,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=hIjOvSJf9c0","annotations":[{"segment":[138.4374193829948,154.0845599703881],"label":"Hanging wallpaper"}]},"87hjft6OBiU":{"duration":71.28999999999999,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=87hjft6OBiU","annotations":[{"segment":[22.797938312090306,65.50236910156679],"label":"Snatch"}]},"1XQUDJhMcj8":{"duration":121.0,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=1XQUDJhMcj8","annotations":[{"segment":[7.3595616224648985,20.56903120124805],"label":"Running a marathon"},{"segment":[34.91074102964118,45.478316692667704],"label":"Running a marathon"},{"segment":[56.04589235569423,69.4440686427457],"label":"Running a marathon"},{"segment":[80.20035101404056,101.52420904836194],"label":"Running a marathon"}]},"dxgeEdlt7do":{"duration":130.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dxgeEdlt7do","annotations":[{"segment":[1.4225389396528243,106.08076092839632],"label":"Doing motocross"}]},"qaB0igbuKuQ":{"duration":160.868,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qaB0igbuKuQ","annotations":[{"segment":[73.45615132605305,76.46459438377535],"label":"Clipping cat claws"},{"segment":[89.75188455538222,96.01947425897036],"label":"Clipping cat claws"},{"segment":[97.02228861154447,127.85882995319812],"label":"Clipping cat claws"}]},"KXMZHfCwTO0":{"duration":54.823,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=KXMZHfCwTO0","annotations":[]},"yjQ-0AGqFS4":{"duration":202.339,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yjQ-0AGqFS4","annotations":[{"segment":[0.2,201.9654290171607],"label":"Playing drums"}]},"F54PZypvzCc":{"duration":80.062,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F54PZypvzCc","annotations":[{"segment":[0,6.744728555382216],"label":"Throwing darts"},{"segment":[18.36064995631825,26.479304698907956],"label":"Throwing darts"},{"segment":[34.597959441497665,42.34190704212169],"label":"Throwing darts"},{"segment":[52.583902255850234,59.078826049922],"label":"Throwing darts"},{"segment":[67.44728555382216,73.31769744461779],"label":"Throwing darts"}]},"ZT_gv2W65Zc":{"duration":28.79,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=ZT_gv2W65Zc","annotations":[{"segment":[4.668072390155257,28.43983407239825],"label":"Brushing hair"}]},"sjHCZWdopsQ":{"duration":193.07,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=sjHCZWdopsQ","annotations":[{"segment":[26.45486246,142.88367365],"label":"Playing accordion"}]},"YbEX3klOS3Q":{"duration":68.99,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=YbEX3klOS3Q","annotations":[{"segment":[6.49957636657905,18.09161381891839],"label":"Javelin throw"},{"segment":[23.03457061642618,30.663916977796898],"label":"Javelin throw"},{"segment":[32.59811746377821,40.01255266003989],"label":"Javelin throw"},{"segment":[53.01467814913646,61.933491501161384],"label":"Javelin throw"}]},"5T7vTuJmYeQ":{"duration":11.355,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=5T7vTuJmYeQ","annotations":[{"segment":[0.20000000000000004,7.994283931357254],"label":"Using the monkey bar"}]},"ayLeSjJz53I":{"duration":146.01,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ayLeSjJz53I","annotations":[{"segment":[5.238930384426261,140.76778163371432],"label":"Ballet"}]},"n04zQqQI3wQ":{"duration":68.128,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=n04zQqQI3wQ","annotations":[{"segment":[12.827696261682243,36.055358255451715],"label":"Snow tubing"}]},"LBC8Fa1oCGI":{"duration":16.6,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=LBC8Fa1oCGI","annotations":[{"segment":[0,16.555],"label":"Windsurfing"}]},"J8B2dX3FLTo":{"duration":204.544,"subset":"validation","resolution":"480x272","url":"https://www.youtube.com/watch?v=J8B2dX3FLTo","annotations":[{"segment":[60.75896514618973,125.01713770917574],"label":"Playing drums"}]},"0VklWRtiNyw":{"duration":179.513,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=0VklWRtiNyw","annotations":[]},"KlxUgvLHP7M":{"duration":168.995,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KlxUgvLHP7M","annotations":[{"segment":[27.678656860560135,158.6909650826833],"label":"Rope skipping"}]},"PxEu0AAH9LI":{"duration":233.11,"subset":"training","resolution":"492x360","url":"https://www.youtube.com/watch?v=PxEu0AAH9LI","annotations":[{"segment":[3.6365854089945913,227.65024492667706],"label":"Belly dance"}]},"ljxNDSS_AcA":{"duration":179.28,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ljxNDSS_AcA","annotations":[{"segment":[93.36802374726989,153.4701947223089],"label":"Painting"},{"segment":[24.0408683900156,85.62101998],"label":"Painting"}]},"c0c9G-qotQE":{"duration":53.66,"subset":"testing","resolution":"640x356","url":"https://www.youtube.com/watch?v=c0c9G-qotQE","annotations":[]},"l0aksT_7wKo":{"duration":30.953,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l0aksT_7wKo","annotations":[{"segment":[10.817347893915757,19.84790171606864],"label":"Doing crunches"}]},"Gda1xYAMb10":{"duration":140.16,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Gda1xYAMb10","annotations":[]},"TlwJfzlZZsE":{"duration":120.51,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=TlwJfzlZZsE","annotations":[{"segment":[22.551223429999997,94.75177375],"label":"Discus throw"}]},"9fQ2wWFJJGo":{"duration":213.415,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9fQ2wWFJJGo","annotations":[{"segment":[55.832785046728965,106.3581308411215],"label":"Building sandcastles"}]},"UYhKDweME3A":{"duration":91.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UYhKDweME3A","annotations":[{"segment":[16.479989281203903,70.64899752724803],"label":"Preparing pasta"}]},"weqZ0sgHXNc":{"duration":193.608,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=weqZ0sgHXNc","annotations":[]},"rt3t2n6K_ww":{"duration":210.838,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rt3t2n6K_ww","annotations":[{"segment":[0.2,210.58100000000002],"label":"Playing drums"}]},"XLsuG0cNl4o":{"duration":54.17,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=XLsuG0cNl4o","annotations":[{"segment":[0.01,13.810521060842433],"label":"Shaving"},{"segment":[24.369023400936037,45.232624024961],"label":"Shaving"}]},"F8EK6W0XAKY":{"duration":160.427,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F8EK6W0XAKY","annotations":[]},"afORRx0QZgI":{"duration":131.96,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=afORRx0QZgI","annotations":[{"segment":[20.99815636707101,123.3127014024961],"label":"Belly dance"}]},"y21yGd1uMU4":{"duration":217.873,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y21yGd1uMU4","annotations":[]},"TdNiQsZNJTU":{"duration":198.23,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=TdNiQsZNJTU","annotations":[{"segment":[0.01,198.22875283446712],"label":"Playing water polo"}]},"-GRvxWH4axc":{"duration":21.967,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-GRvxWH4axc","annotations":[{"segment":[0.6151682242990654,20.71066355140187],"label":"Hitting a pinata"}]},"3Ay2SFDtYVo":{"duration":67.45,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=3Ay2SFDtYVo","annotations":[]},"Yc9pZ8Vy-3s":{"duration":218.71,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Yc9pZ8Vy-3s","annotations":[{"segment":[8.871181828393135,58.00388118564742],"label":"Kayaking"},{"segment":[98.60659801560062,209.83757017160687],"label":"Kayaking"}]},"M30w7UiD8Ac":{"duration":80.899,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=M30w7UiD8Ac","annotations":[]},"gR9qUDlytF8":{"duration":16.045,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=gR9qUDlytF8","annotations":[{"segment":[0.01,16.001],"label":"Camel ride"}]},"Us795clHJmw":{"duration":30.953,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Us795clHJmw","annotations":[{"segment":[0.01,30.95219954648526],"label":"Hitting a pinata"}]},"KezLNkSQF1Y":{"duration":210.025,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=KezLNkSQF1Y","annotations":[]},"HwYZEZYgeBI":{"duration":72.53999999999999,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=HwYZEZYgeBI","annotations":[{"segment":[25.108165366614667,64.01451170046802],"label":"Polishing forniture"}]},"tZswexUR6Q0":{"duration":192.818,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tZswexUR6Q0","annotations":[{"segment":[18.945308892355694,192.761],"label":"Wakeboarding"}]},"7gRbYNWSUjk":{"duration":100.613,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7gRbYNWSUjk","annotations":[{"segment":[0.01,90.09562934898348],"label":"Capoeira"}]},"wriCGl_odCY":{"duration":69.684,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wriCGl_odCY","annotations":[]},"VRiUhsafjRM":{"duration":198.067,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=VRiUhsafjRM","annotations":[{"segment":[35.21390639625585,80.62945553822152],"label":"Baking cookies"}]},"t7JiXZlKhpA":{"duration":161.89,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=t7JiXZlKhpA","annotations":[]},"-KGo1Xq3wQ8":{"duration":76.51,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-KGo1Xq3wQ8","annotations":[]},"YDwHdB6MBrE":{"duration":94.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YDwHdB6MBrE","annotations":[{"segment":[18.54967356,57.529316],"label":"Playing field hockey"},{"segment":[62.681532319999995,83.1514762698908],"label":"Playing field hockey"}]},"laVTxf8Vwvg":{"duration":201.76,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=laVTxf8Vwvg","annotations":[]},"mua8hNPuQHw":{"duration":109.343,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mua8hNPuQHw","annotations":[{"segment":[0.01,107.66659278314656],"label":"Doing fencing"}]},"ZjvmWr5LoFw":{"duration":226.233,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZjvmWr5LoFw","annotations":[{"segment":[10.411613939387507,197.82066484836264],"label":"Riding bumper cars"}]},"1494UwmvAJM":{"duration":42.075,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1494UwmvAJM","annotations":[]},"p0O-EsMFcL4":{"duration":24.614,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p0O-EsMFcL4","annotations":[{"segment":[0.01,23.729995815912634],"label":"River tubing"}]},"_032TQam_mY":{"duration":69.08,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_032TQam_mY","annotations":[{"segment":[3.0912852499999994,65.50947056240248],"label":"Grooming horse"}]},"Wms7wsENt0A":{"duration":106.464,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Wms7wsENt0A","annotations":[]},"onW5hJXnI5s":{"duration":204.57,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=onW5hJXnI5s","annotations":[{"segment":[11.488987151326054,27.765052282371297],"label":"Horseback riding"},{"segment":[33.19040732605305,64.14684492823713],"label":"Horseback riding"},{"segment":[86.48654216692668,93.50758987051482],"label":"Horseback riding"},{"segment":[103.72002289391575,187.97259533697348],"label":"Horseback riding"}]},"A5wdaSwVeTo":{"duration":53.15,"subset":"testing","resolution":"318x240","url":"https://www.youtube.com/watch?v=A5wdaSwVeTo","annotations":[]},"C8IEqXTBod0":{"duration":75.09,"subset":"training","resolution":"316x240","url":"https://www.youtube.com/watch?v=C8IEqXTBod0","annotations":[{"segment":[22.834469578783153,29.50916068642746],"label":"Shoveling snow"},{"segment":[61.243218408736354,72.13350390015601],"label":"Shoveling snow"}]},"JHKAcpYlxkQ":{"duration":17.35,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JHKAcpYlxkQ","annotations":[]},"kzvFQv5UCx4":{"duration":217.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kzvFQv5UCx4","annotations":[{"segment":[8.489209555382216,59.4244668876755],"label":"Getting a tattoo"},{"segment":[59.64577802000001,205.0993028580343],"label":"Getting a tattoo"}]},"x5MPJnwJrfE":{"duration":131.1,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=x5MPJnwJrfE","annotations":[{"segment":[0,109.21572269578783],"label":"Getting a tattoo"}]},"QJVtvcYcwac":{"duration":206.426,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QJVtvcYcwac","annotations":[{"segment":[41.864662287171754,168.42475538377533],"label":"Braiding hair"}]},"u1TCzaAQkQ0":{"duration":67.97,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u1TCzaAQkQ0","annotations":[]},"pne20gW35tA":{"duration":52.55,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=pne20gW35tA","annotations":[]},"vpWkDgOwyvM":{"duration":151.07,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=vpWkDgOwyvM","annotations":[]},"0WJX2A6PSnA":{"duration":22.036,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0WJX2A6PSnA","annotations":[{"segment":[6.732194227769111,17.2844375975039],"label":"Cleaning sink"}]},"MnZ9L54twws":{"duration":136.626,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=MnZ9L54twws","annotations":[{"segment":[0.01,124.24084711388456],"label":"Using the rowing machine"}]},"jzDXtd0__DM":{"duration":180.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jzDXtd0__DM","annotations":[{"segment":[144.23991311761316,152.40975194654044],"label":"Bungee jumping"}]},"FRoL-CpfbJ4":{"duration":209.54,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FRoL-CpfbJ4","annotations":[{"segment":[28.81958144,123.40413184],"label":"Sailing"},{"segment":[148.19224064,171.83209216],"label":"Sailing"},{"segment":[36.61174652700394,193.84612223672622],"label":"Sailing"}]},"QTD0vCdofTE":{"duration":32.18,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QTD0vCdofTE","annotations":[{"segment":[1.5541107644305772,14.006571860000001],"label":"Hopscotch"},{"segment":[15.795247420000003,31.232613104524177],"label":"Hopscotch"}]},"lc-piYwzqsA":{"duration":48.6,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lc-piYwzqsA","annotations":[{"segment":[9.552966315444618,42.154359436962515],"label":"Using the pommel horse"}]},"moUL_qLnNDM":{"duration":4.83,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=moUL_qLnNDM","annotations":[{"segment":[0,4.829749],"label":"Walking the dog"}]},"_D9oML1HvVw":{"duration":133.237,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_D9oML1HvVw","annotations":[{"segment":[11.016384495319812,20.16206218954758],"label":"Slacklining"},{"segment":[23.279906858034323,35.751285531981274],"label":"Slacklining"},{"segment":[37.41413602184087,132.19661394383775],"label":"Slacklining"}]},"r43Wv6V-HLo":{"duration":120.535,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r43Wv6V-HLo","annotations":[]},"mj0lRelI0xw":{"duration":131.193,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=mj0lRelI0xw","annotations":[{"segment":[0.2,119.70231669266771],"label":"Drum corps"}]},"WUcjqCvYfQ0":{"duration":150.54,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WUcjqCvYfQ0","annotations":[{"segment":[92.24842008,107.75730889235571],"label":"Shaving"}]},"3D4Cdc3pDrs":{"duration":206.658,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3D4Cdc3pDrs","annotations":[]},"rInf05YA7ww":{"duration":66.177,"subset":"training","resolution":"300x240","url":"https://www.youtube.com/watch?v=rInf05YA7ww","annotations":[{"segment":[0.01,66.121],"label":"Futsal"}]},"anol2rdzkdM":{"duration":129.73,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=anol2rdzkdM","annotations":[]},"vwpaEsh0-1U":{"duration":205.683,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=vwpaEsh0-1U","annotations":[{"segment":[91.7707600440072,133.80561866556295],"label":"Futsal"}]},"ElmBEYKRpZA":{"duration":121.72,"subset":"testing","resolution":"192x144","url":"https://www.youtube.com/watch?v=ElmBEYKRpZA","annotations":[]},"5ytocb1ypRU":{"duration":78.763,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5ytocb1ypRU","annotations":[{"segment":[5.899357254290172,74.23357878315133],"label":"Trimming branches or hedges"}]},"VjMMwpgOCPo":{"duration":46.88,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=VjMMwpgOCPo","annotations":[]},"z4B1JZQnHl4":{"duration":231.225,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=z4B1JZQnHl4","annotations":[{"segment":[44.398210696824165,54.842922446529236],"label":"Curling"},{"segment":[63.84698429972327,75.37218347181162],"label":"Curling"},{"segment":[87.25754511802775,103.46485645377699],"label":"Curling"},{"segment":[136.23964159940328,147.0445158232361],"label":"Curling"},{"segment":[154.24776530579135,156.76890262468567],"label":"Curling"},{"segment":[167.5737768485185,176.57783764020127],"label":"Curling"},{"segment":[196.0266111261826,206.11116034113587],"label":"Curling"},{"segment":[224.11928393926672,231.02430700000002],"label":"Curling"}]},"EU-IdUKbLSg":{"duration":79.3,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=EU-IdUKbLSg","annotations":[{"segment":[5.690518914196568,11.752158627145086],"label":"Playing water polo"},{"segment":[14.96853888299532,68.78105470202807],"label":"Playing water polo"}]},"tbW6ErTogio":{"duration":235.265,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=tbW6ErTogio","annotations":[]},"Jm1y_W7RLSI":{"duration":163.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jm1y_W7RLSI","annotations":[{"segment":[59.52118366146645,72.54942557878314],"label":"Washing face"}]},"zLVNSgR4IoA":{"duration":60.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zLVNSgR4IoA","annotations":[{"segment":[22.616013289999998,53.9811232449298],"label":"Using parallel bars"},{"segment":[0.69732758,22.58249609984399],"label":"Using parallel bars"}]},"vB00ah6E5E4":{"duration":37.76,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vB00ah6E5E4","annotations":[{"segment":[0.3828575546021841,37.755645],"label":"Playing violin"}]},"rczR9C00KOM":{"duration":212.788,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rczR9C00KOM","annotations":[{"segment":[0,83.820342364715],"label":"Plastering"},{"segment":[109.04943551211434,137.59814617890837],"label":"Plastering"},{"segment":[143.2414959618793,145.2332664735161],"label":"Plastering"},{"segment":[175.44178508112324,183.74082884087366],"label":"Plastering"}]},"ObrXN_FjVD8":{"duration":62.46,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ObrXN_FjVD8","annotations":[]},"NK5FWZ2BOQs":{"duration":92.787,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NK5FWZ2BOQs","annotations":[{"segment":[13.290627145085804,83.95132761310451],"label":"Disc dog"}]},"H5F01WIyJWI":{"duration":51.735,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H5F01WIyJWI","annotations":[{"segment":[0.2,51.7340589569161],"label":"Playing congas"}]},"PdxtuqiUsZ4":{"duration":125.41,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PdxtuqiUsZ4","annotations":[]},"z_-uG3dJKsk":{"duration":75.44,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=z_-uG3dJKsk","annotations":[]},"3mymOHc5-Gs":{"duration":83.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3mymOHc5-Gs","annotations":[{"segment":[30.218905409277593,55.563793817058794],"label":"Cleaning shoes"}]},"hyk4wNZwwxc":{"duration":224.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hyk4wNZwwxc","annotations":[{"segment":[0,224.30448979591836],"label":"Doing step aerobics"}]},"9voJIuym_f8":{"duration":59.61,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=9voJIuym_f8","annotations":[{"segment":[25.924878477379096,58.81881030889236],"label":"Bathing dog"}]},"09ocXmGF-T4":{"duration":47.531,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=09ocXmGF-T4","annotations":[]},"jkay2K3RA1M":{"duration":10.68,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=jkay2K3RA1M","annotations":[{"segment":[2.8827519359277773,10.281259794609472],"label":"Doing nails"}]},"dx5VK79QWlg":{"duration":24.103,"subset":"training","resolution":"240x176","url":"https://www.youtube.com/watch?v=dx5VK79QWlg","annotations":[{"segment":[0.01,23.85453028999565],"label":"Knitting"}]},"IlN_XipVf44":{"duration":65.713,"subset":"validation","resolution":"293x240","url":"https://www.youtube.com/watch?v=IlN_XipVf44","annotations":[{"segment":[0.2,56.90986498442367],"label":"Painting furniture"}]},"yj2WJBqmEHk":{"duration":18.878,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yj2WJBqmEHk","annotations":[{"segment":[0.2,18.877277],"label":"Snow tubing"}]},"R8RrOgz9hj8":{"duration":186.851,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=R8RrOgz9hj8","annotations":[{"segment":[15.443670826833072,160.28166926677068],"label":"Rafting"}]},"YnG1Uhh2h5E":{"duration":176.797,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=YnG1Uhh2h5E","annotations":[{"segment":[6.341720748829954,169.57209828393138],"label":"Playing pool"}]},"SrKGO2Xu670":{"duration":209.584,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=SrKGO2Xu670","annotations":[{"segment":[0.01,206.77580722803546],"label":"Making an omelette"}]},"yGPzIGJ_2B8":{"duration":100.125,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yGPzIGJ_2B8","annotations":[{"segment":[29.347063962558504,35.122815912636504],"label":"Cutting the grass"},{"segment":[50.10855070202808,62.596663026521064],"label":"Cutting the grass"},{"segment":[68.84071918876757,72.7432542901716],"label":"Cutting the grass"}]},"ICBrXUuwvgg":{"duration":193.93,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ICBrXUuwvgg","annotations":[{"segment":[97.72276269009946,167.61117811243062],"label":"Rock climbing"}]},"7KT-z20qxI4":{"duration":228.462,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=7KT-z20qxI4","annotations":[{"segment":[49.897907535349034,63.44162529494377],"label":"Playing ice hockey"},{"segment":[85.18285643534585,188.54280710920435],"label":"Playing ice hockey"}]},"2IRQ5fPwHV8":{"duration":164.84,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=2IRQ5fPwHV8","annotations":[{"segment":[0,0],"label":"Getting a haircut"}]},"btMVEIvr0oc":{"duration":34.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=btMVEIvr0oc","annotations":[{"segment":[2.2861154446177845,31.252005959999998],"label":"Dodgeball"}]},"LB1A7BobPwg":{"duration":218.48,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LB1A7BobPwg","annotations":[{"segment":[118.95213275741914,124.74636271981491],"label":"Drinking beer"}]},"O_bTFogdpc4":{"duration":209.79,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O_bTFogdpc4","annotations":[]},"lV7tpgelpAY":{"duration":101.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lV7tpgelpAY","annotations":[{"segment":[7.28853003916075,29.31256646184214],"label":"Paintball"},{"segment":[79.69849023400937,87.14546645865835],"label":"Paintball"}]},"MmipoQF8EJs":{"duration":76.208,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=MmipoQF8EJs","annotations":[{"segment":[0.01,76.161],"label":"Camel ride"}]},"bWquUXC2Te8":{"duration":62.76,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=bWquUXC2Te8","annotations":[{"segment":[43.52260782,46.705295549999995],"label":"Shaving"},{"segment":[43.52260782,46.705295549999995],"label":"Shaving"}]},"9cD-Z6Aw7bE":{"duration":149.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9cD-Z6Aw7bE","annotations":[{"segment":[71.31774190695519,79.50174507660579],"label":"Hammer throw"},{"segment":[87.45191958426636,95.86975120124805],"label":"Hammer throw"},{"segment":[109.43181356630265,113.64072947269892],"label":"Hammer throw"},{"segment":[124.39684790015602,130.94405042121687],"label":"Hammer throw"}]},"i5K-DXt9djA":{"duration":164.86,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=i5K-DXt9djA","annotations":[{"segment":[29.06292315366084,150.7156894851794],"label":"Fixing bicycle"}]},"W80KSM1I47Q":{"duration":214.205,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=W80KSM1I47Q","annotations":[{"segment":[12.030182431787068,108.27164188608361],"label":"Tug of war"}]},"oD0RWEO8D1g":{"duration":179.7,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oD0RWEO8D1g","annotations":[{"segment":[83.41179166578365,162.34509787300175],"label":"Playing ice hockey"}]},"uptOE6bfBgA":{"duration":84.684,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=uptOE6bfBgA","annotations":[{"segment":[0.2,83.36208529797192],"label":"Blowing leaves"}]},"Tg1G0EtWw9s":{"duration":231.387,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tg1G0EtWw9s","annotations":[{"segment":[11.31705194527774,201.97693054808147],"label":"Playing blackjack"}]},"zTAJDgpAQ4Q":{"duration":173.268,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zTAJDgpAQ4Q","annotations":[]},"J27dBmSpRW4":{"duration":151.42000000000002,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=J27dBmSpRW4","annotations":[{"segment":[98.71647113884555,124.93065366614664],"label":"Smoking hookah"},{"segment":[139.10048205928237,151.181],"label":"Smoking hookah"}]},"NtUIIxVvDsA":{"duration":167.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NtUIIxVvDsA","annotations":[{"segment":[29.46814740405616,39.899350024960995],"label":"Rafting"},{"segment":[63.369555921996884,82.4065007051482],"label":"Rafting"},{"segment":[116.0471291575663,119.17648994383777],"label":"Rafting"},{"segment":[0.2,166.960022],"label":"Rafting"},{"segment":[135.08407394071764,160.64052036193448],"label":"Rafting"}]},"4mRdgV8t4KY":{"duration":189.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4mRdgV8t4KY","annotations":[{"segment":[12.99166926677067,172.73014820592823],"label":"Horseback riding"}]},"MZz0OTfvjrY":{"duration":102.424,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=MZz0OTfvjrY","annotations":[{"segment":[22.6929433278473,27.478036785791225],"label":"Arm wrestling"},{"segment":[52.839032112894024,87.45120812535508],"label":"Arm wrestling"}]},"U7vH9pEfGVw":{"duration":164.096,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=U7vH9pEfGVw","annotations":[{"segment":[28.662390015600625,95.2116879875195],"label":"Playing blackjack"},{"segment":[101.61032293291733,110.05652106084244],"label":"Playing blackjack"},{"segment":[133.6034976599064,146.40076755070203],"label":"Playing blackjack"}]},"BbFCkDdq3S0":{"duration":87.96000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BbFCkDdq3S0","annotations":[{"segment":[2.880407176287052,87.64667550702028],"label":"Triple jump"}]},"2FKWKDEg5KE":{"duration":204.475,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=2FKWKDEg5KE","annotations":[{"segment":[12.759728457165497,129.5112438402298],"label":"Fixing the roof"},{"segment":[140.35701302882046,196.4998177347894],"label":"Fixing the roof"}]},"_QdPfYK9s6o":{"duration":173.616,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_QdPfYK9s6o","annotations":[{"segment":[0.20000000000000004,169.51904212168486],"label":"Surfing"}]},"XkkOVpXegS8":{"duration":172.084,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=XkkOVpXegS8","annotations":[{"segment":[4.663930721149123,165.75715587068183],"label":"Playing ice hockey"}]},"vMy5o7pvy4c":{"duration":6.94,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=vMy5o7pvy4c","annotations":[{"segment":[2.97339744,6.848333],"label":"Hammer throw"},{"segment":[0,2.735059669266771],"label":"Hammer throw"}]},"r0P0egQt-jU":{"duration":185.273,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r0P0egQt-jU","annotations":[{"segment":[10.389066056329073,99.27329787158892],"label":"Surfing"}]},"H9PaZiWLgXU":{"duration":186.57,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=H9PaZiWLgXU","annotations":[]},"Ydep68S6ViE":{"duration":115.28999999999999,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Ydep68S6ViE","annotations":[{"segment":[4.586289757712757,9.981924766786591],"label":"Hammer throw"},{"segment":[13.938723773440733,17.895522780094876],"label":"Hammer throw"},{"segment":[20.413485784329332,30.66519230156961],"label":"Hammer throw"},{"segment":[36.60039081155083,42.53558932153204],"label":"Hammer throw"},{"segment":[48.650642331815725,60.16133035117323],"label":"Hammer throw"},{"segment":[75.98852526131046,81.02445119578783],"label":"Hammer throw"}]},"z3-tII3XcUs":{"duration":6.06,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=z3-tII3XcUs","annotations":[{"segment":[0.01,4.655622464898596],"label":"Playing guitarra"}]},"NAfDr0se-9I":{"duration":125.69,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=NAfDr0se-9I","annotations":[]},"8kaQq1mR2ys":{"duration":207.122,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8kaQq1mR2ys","annotations":[]},"mWNTl9Bh7kI":{"duration":33.37,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mWNTl9Bh7kI","annotations":[{"segment":[0,27.745164496099846],"label":"Playing polo"}]},"mdwbRGlPn3A":{"duration":29.211,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mdwbRGlPn3A","annotations":[{"segment":[2.00510272074883,27.251168795631827],"label":"Powerbocking"}]},"vYxBAbbvSxc":{"duration":87.958,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vYxBAbbvSxc","annotations":[{"segment":[4.847980582798754,80.99140114354643],"label":"Wakeboarding"}]},"cgfJnog1WLE":{"duration":184.25,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cgfJnog1WLE","annotations":[{"segment":[15.97079,117.15720499999999],"label":"Snatch"},{"segment":[126.36970500000001,130.70326500000002],"label":"Snatch"}]},"eHC1rys4tT4":{"duration":84.939,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=eHC1rys4tT4","annotations":[]},"KovhYgNTBI0":{"duration":16.742,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KovhYgNTBI0","annotations":[]},"55FFh89yqVw":{"duration":211.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=55FFh89yqVw","annotations":[{"segment":[20.13036037087742,39.60070892631624],"label":"Washing face"},{"segment":[53.13095114280761,65.01116382070249],"label":"Washing face"},{"segment":[77.55138831403598,86.4615478224571],"label":"Washing face"},{"segment":[92.07164825368525,108.90194954736965],"label":"Washing face"}]},"OD5rJOPicoM":{"duration":30.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OD5rJOPicoM","annotations":[{"segment":[1.4052001926677067,23.55496594149766],"label":"Windsurfing"}]},"f-JfdEfNQlE":{"duration":112.548,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f-JfdEfNQlE","annotations":[{"segment":[51.00614416957631,52.93753000732274],"label":"Fun sliding down"}]},"N9xp9VbpklQ":{"duration":23.778,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=N9xp9VbpklQ","annotations":[{"segment":[0.01,21.98076391935497],"label":"Croquet"}]},"ma0HmFqdFqg":{"duration":107.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ma0HmFqdFqg","annotations":[{"segment":[18.33325675,89.78632995319813],"label":"Cleaning windows"}]},"DLfOiuxMstg":{"duration":56.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DLfOiuxMstg","annotations":[{"segment":[4.819661346233523,46.15142770137371],"label":"Wrapping presents"}]},"n18TxTWnbFI":{"duration":54.71,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=n18TxTWnbFI","annotations":[{"segment":[7.336296411856473,53.913248049922],"label":"Springboard diving"}]},"nhFbFMZrHhc":{"duration":142.385,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nhFbFMZrHhc","annotations":[]},"tLxv2E7gj5c":{"duration":30.72,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tLxv2E7gj5c","annotations":[{"segment":[0,14.312447999999998],"label":"Playing racquetball"},{"segment":[16.0103424,29.186395305772233],"label":"Playing racquetball"}]},"fIG7aXzSyMU":{"duration":91.487,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fIG7aXzSyMU","annotations":[{"segment":[2.6404094695787834,3.924932995319813],"label":"Raking leaves"},{"segment":[32.755349906396255,34.75349761310453],"label":"Raking leaves"},{"segment":[50.16777992199688,57.304021731669266],"label":"Raking leaves"}]},"LMePtW22Wjg":{"duration":100.357,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LMePtW22Wjg","annotations":[]},"44BfVJnhBgc":{"duration":169.924,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=44BfVJnhBgc","annotations":[{"segment":[83.71342781804879,160.17733591773725],"label":"Running a marathon"}]},"cDnJjAQtf-g":{"duration":87.45,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=cDnJjAQtf-g","annotations":[{"segment":[11.323006215486716,36.15176683257806],"label":"Playing water polo"},{"segment":[60.025574290171605,72.84921970670827],"label":"Playing water polo"},{"segment":[59.34346549141966,72.84921970670827],"label":"Playing water polo"}]},"gHHlX92NYBY":{"duration":202.9,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gHHlX92NYBY","annotations":[]},"93bg_XU3OTg":{"duration":174.311,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=93bg_XU3OTg","annotations":[{"segment":[34.6658775351014,52.338677847113885],"label":"Waxing skis"},{"segment":[84.96538611544462,136.0805624024961],"label":"Waxing skis"}]},"Rzkne_2st5k":{"duration":25.22,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Rzkne_2st5k","annotations":[]},"YEwdGCEgsNo":{"duration":79.924,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YEwdGCEgsNo","annotations":[]},"tPHfkxAwyp8":{"duration":230.064,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tPHfkxAwyp8","annotations":[{"segment":[13.273995319812792,218.87390015600624],"label":"Capoeira"}]},"AfCRHmcLo9Y":{"duration":219.615,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AfCRHmcLo9Y","annotations":[]},"WOUkPgHtt4E":{"duration":47.3,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WOUkPgHtt4E","annotations":[]},"3TwqeiVbpS8":{"duration":151.395,"subset":"training","resolution":"392x240","url":"https://www.youtube.com/watch?v=3TwqeiVbpS8","annotations":[{"segment":[11.615239755683971,58.76973423319071],"label":"Waterskiing"},{"segment":[66.54997255094771,97.67092582197576],"label":"Waterskiing"}]},"-0r0HEwAYiQ":{"duration":43.68,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=-0r0HEwAYiQ","annotations":[{"segment":[27.52112112,39.94781664],"label":"Vacuuming floor"}]},"Hv6LHoTM9mU":{"duration":7.895,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Hv6LHoTM9mU","annotations":[]},"Ynvd8OuXKGw":{"duration":158.34,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ynvd8OuXKGw","annotations":[]},"ZYwfvPJv4Rk":{"duration":61.58,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZYwfvPJv4Rk","annotations":[{"segment":[0.2,61.37922902494331],"label":"Cutting the grass"}]},"qZaWIb28kfU":{"duration":88.63,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qZaWIb28kfU","annotations":[{"segment":[6.78985374,62.17068414],"label":"Starting a campfire"}]},"hchuVbHYK_k":{"duration":46.557,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hchuVbHYK_k","annotations":[{"segment":[2.103789391575663,46.300999999999995],"label":"Canoeing"}]},"Zk4EvGw7cAw":{"duration":23.777,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zk4EvGw7cAw","annotations":[{"segment":[9.000871395990485,20.79741189443285],"label":"Doing a powerbomb"}]},"CocYQOgnegg":{"duration":157.223,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CocYQOgnegg","annotations":[{"segment":[0.01,154.97409048361936],"label":"Blowing leaves"}]},"zHzKPtrgv1k":{"duration":101.936,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zHzKPtrgv1k","annotations":[{"segment":[0.01,101.93560090702948],"label":"Riding bumper cars"}]},"tN1_lOJlUlc":{"duration":182.137,"subset":"validation","resolution":"432x720","url":"https://www.youtube.com/watch?v=tN1_lOJlUlc","annotations":[{"segment":[13.946811346093298,176.7622351200234],"label":"Playing congas"}]},"4I5h_JoKdx0":{"duration":150.907,"subset":"testing","resolution":"316x240","url":"https://www.youtube.com/watch?v=4I5h_JoKdx0","annotations":[]},"xS5JDBeMxOs":{"duration":96.15,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xS5JDBeMxOs","annotations":[]},"PWw-DAcSszM":{"duration":159.73,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=PWw-DAcSszM","annotations":[{"segment":[4.485381896908529,159.729432],"label":"Tai chi"}]},"TgDzZIop_uw":{"duration":50.248,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=TgDzZIop_uw","annotations":[{"segment":[0.19999999999999998,42.33057304212168],"label":"Changing car wheel"}]},"KkBMOQOGTdk":{"duration":24.056,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KkBMOQOGTdk","annotations":[{"segment":[3.7153376396255853,15.6869811450858],"label":"Raking leaves"}]},"ESlUzrtqC98":{"duration":107.718,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ESlUzrtqC98","annotations":[{"segment":[27.1387437541437,31.494345013828497],"label":"Beer pong"},{"segment":[41.88077878692301,47.7440874593521],"label":"Beer pong"},{"segment":[54.109966128092374,58.29804419963204],"label":"Beer pong"},{"segment":[65.66906160554183,70.69475529138943],"label":"Beer pong"},{"segment":[94.98560810631943,100.01130179216703],"label":"Beer pong"}]},"yJ5OnuQuvRI":{"duration":181.581,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yJ5OnuQuvRI","annotations":[]},"p2RMXdo5avw":{"duration":34.854,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p2RMXdo5avw","annotations":[{"segment":[6.400121495327102,33.89894859813084],"label":"Shuffleboard"}]},"_IqOAGFrYfk":{"duration":163.82,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_IqOAGFrYfk","annotations":[]},"boRX-UKXzy8":{"duration":118.747,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=boRX-UKXzy8","annotations":[{"segment":[0.01,118.701],"label":"Riding bumper cars"}]},"O5vpeIfQxLQ":{"duration":158.732,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O5vpeIfQxLQ","annotations":[{"segment":[14.855007800312013,31.690683307332293],"label":"Having an ice cream"},{"segment":[34.166517940717625,106.70847269890794],"label":"Having an ice cream"}]},"xwu60huH3ds":{"duration":176.495,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xwu60huH3ds","annotations":[{"segment":[0.01,167.33848361934477],"label":"Grooming dog"}]},"00ZRoqhhb8g":{"duration":72.56,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=00ZRoqhhb8g","annotations":[{"segment":[0.01,7.8109246818852345],"label":"Javelin throw"}]},"eXUcFgDxslw":{"duration":24.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eXUcFgDxslw","annotations":[{"segment":[0,19.979734215852258],"label":"Ballet"}]},"reEjJfSUGa8":{"duration":218.036,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=reEjJfSUGa8","annotations":[{"segment":[0.01,164.809934162948],"label":"BMX"},{"segment":[177.71223634363335,214.4016070601918],"label":"BMX"}]},"EsOajUNpEzU":{"duration":72.91,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=EsOajUNpEzU","annotations":[{"segment":[21.952825729355705,70.29454041835143],"label":"Playing bagpipes"}]},"JXazqQitVdQ":{"duration":12.493,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JXazqQitVdQ","annotations":[{"segment":[0.2,12.181261117240396],"label":"Using the monkey bar"}]},"ctFlPn4EfkU":{"duration":76.51,"subset":"training","resolution":"854x476","url":"https://www.youtube.com/watch?v=ctFlPn4EfkU","annotations":[{"segment":[9.071358695787831,76.509749],"label":"Croquet"}]},"U0HiAZCgmd8":{"duration":67.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=U0HiAZCgmd8","annotations":[{"segment":[6.695461244300112,67.0592290249433],"label":"Mixing drinks"}]},"wQbVxdVTN-I":{"duration":50.34,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=wQbVxdVTN-I","annotations":[{"segment":[8.638837417442275,16.02111644929797],"label":"Chopping wood"},{"segment":[17.35620948673947,30.86420963026521],"label":"Chopping wood"},{"segment":[32.82758174414976,35.34069804992199],"label":"Chopping wood"},{"segment":[37.0684655101404,39.42451204680187],"label":"Chopping wood"},{"segment":[40.91667485335413,49.476977269890796],"label":"Chopping wood"}]},"RHfjDkr6mcc":{"duration":135.19,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RHfjDkr6mcc","annotations":[{"segment":[11.000196480000001,118.94731474446179],"label":"Ironing clothes"}]},"mgNfayAiTQc":{"duration":11.146,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=mgNfayAiTQc","annotations":[{"segment":[4.2339282363494535,10.893453963338532],"label":"Laying tile"}]},"xm7pETlJrwA":{"duration":217.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xm7pETlJrwA","annotations":[{"segment":[55.31435137133377,151.77519576],"label":"Hand washing clothes"}]},"Q2PC60uwjdY":{"duration":169.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Q2PC60uwjdY","annotations":[{"segment":[31.906380030855622,146.611149152162],"label":"Preparing salad"}]},"Ce0t7gfJl5w":{"duration":176.96,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ce0t7gfJl5w","annotations":[{"segment":[16.564050643658398,45.27507175933295],"label":"Longboarding"},{"segment":[57.69810974207675,60.45878484935316],"label":"Longboarding"},{"segment":[89.16980596502772,93.3108186259423],"label":"Longboarding"},{"segment":[124.23037982743799,126.71498742398673],"label":"Longboarding"},{"segment":[138.30982179875195,142.17476691887677],"label":"Longboarding"},{"segment":[145.48757702184085,152.94139975351013],"label":"Longboarding"}]},"Zhfm2dWtnpc":{"duration":89.862,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zhfm2dWtnpc","annotations":[{"segment":[5.6050546021840875,84.21594539781591],"label":"Rope skipping"}]},"D8dHEAYIawg":{"duration":59.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D8dHEAYIawg","annotations":[{"segment":[0,59.559182],"label":"Playing beach volleyball"}]},"-eCMRApm6n4":{"duration":196.975,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-eCMRApm6n4","annotations":[{"segment":[84.50560172066747,147.5006866397105],"label":"Making a lemonade"}]},"jFp6ld_IGuY":{"duration":30.117,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jFp6ld_IGuY","annotations":[{"segment":[3.98890015600624,9.854929797191888],"label":"Doing a powerbomb"},{"segment":[12.717552262090484,24.21497035881435],"label":"Doing a powerbomb"}]},"Kc1onvwa3rE":{"duration":206.01,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Kc1onvwa3rE","annotations":[]},"6Ik0MQ53FBU":{"duration":80.388,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6Ik0MQ53FBU","annotations":[{"segment":[0.01,80.301],"label":"Playing beach volleyball"}]},"LNTalSYEOpQ":{"duration":99.475,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LNTalSYEOpQ","annotations":[]},"7-u3OI6HDns":{"duration":196.12,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=7-u3OI6HDns","annotations":[{"segment":[53.847690647761965,59.354840827646704],"label":"Belly dance"},{"segment":[67.9215188852452,95.7632225724403],"label":"Belly dance"},{"segment":[108.00133408329529,159.09544886115444],"label":"Belly dance"},{"segment":[166.1323629453978,182.65381340405617],"label":"Belly dance"}]},"jNPOEMYJlgc":{"duration":79.668,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=jNPOEMYJlgc","annotations":[{"segment":[0.009999999999999998,70.34617462901717],"label":"Playing rubik cube"}]},"iABuDhagUm8":{"duration":98.06,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=iABuDhagUm8","annotations":[]},"Z8u205eKUTo":{"duration":206.542,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z8u205eKUTo","annotations":[]},"MYLqqR528eU":{"duration":167.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MYLqqR528eU","annotations":[]},"4WNso9fVJG8":{"duration":110.899,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=4WNso9fVJG8","annotations":[{"segment":[60.53712752986109,79.33640626411065],"label":"Putting in contact lenses"}]},"vAh8YJnIytk":{"duration":88.77,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vAh8YJnIytk","annotations":[]},"cj6QcTKVGpA":{"duration":203.454,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=cj6QcTKVGpA","annotations":[]},"esuEWVNHfsM":{"duration":220.82,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=esuEWVNHfsM","annotations":[{"segment":[2.7561809672386897,214.9821154446178],"label":"Ping-pong"}]},"RtbfdVDciWQ":{"duration":126.619,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RtbfdVDciWQ","annotations":[{"segment":[0.01,126.261],"label":"Putting on shoes"}]},"BPpq8m93LSI":{"duration":102.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BPpq8m93LSI","annotations":[{"segment":[9.944930964118564,15.398602783151325],"label":"Ping-pong"},{"segment":[26.305946421216845,99.12850541653665],"label":"Ping-pong"}]},"VNR1j2U7gE4":{"duration":40.68,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=VNR1j2U7gE4","annotations":[{"segment":[1.0789127971918877,40.42749716536662],"label":"Playing squash"}]},"NouHsmaE-xA":{"duration":50.272,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NouHsmaE-xA","annotations":[{"segment":[18.116454943912043,27.762878555382212],"label":"Calf roping"}]},"w8gNl6HWctA":{"duration":150.98,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=w8gNl6HWctA","annotations":[{"segment":[34.62323452938118,97.51033398070618],"label":"Playing racquetball"}]},"L_fvvmvV-Tc":{"duration":58.17,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=L_fvvmvV-Tc","annotations":[{"segment":[1.0889107956318254,45.462025717628705],"label":"Mowing the lawn"}]},"oHOR69nFt-4":{"duration":214.181,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oHOR69nFt-4","annotations":[{"segment":[15.70436895298941,190.12310349453978],"label":"Making a cake"}]},"0rDLcTmgzGQ":{"duration":134.1,"subset":"training","resolution":"854x468","url":"https://www.youtube.com/watch?v=0rDLcTmgzGQ","annotations":[{"segment":[3.768683307332293,130.2289453978159],"label":"Playing polo"}]},"FUCYJSSqQpo":{"duration":105.418,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FUCYJSSqQpo","annotations":[]},"_kG3DxvGnnQ":{"duration":208.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_kG3DxvGnnQ","annotations":[{"segment":[0,198.58567224648985],"label":"Getting a piercing"}]},"4dw_Hk9Wpc4":{"duration":98.151,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4dw_Hk9Wpc4","annotations":[{"segment":[11.024733038301125,25.418134504972038],"label":"Scuba diving"},{"segment":[31.84922877731436,65.53591306101224],"label":"Scuba diving"},{"segment":[86.20728636661467,88.35098442901716],"label":"Scuba diving"}]},"YZp-m0_kQxc":{"duration":54.61,"subset":"testing","resolution":"3414x1080","url":"https://www.youtube.com/watch?v=YZp-m0_kQxc","annotations":[]},"5rftu62ML_c":{"duration":105.535,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5rftu62ML_c","annotations":[{"segment":[0.2,9.8784424024961],"label":"Canoeing"},{"segment":[14.81766360374415,21.074010458658346],"label":"Canoeing"},{"segment":[35.891674062402494,39.18448819656787],"label":"Canoeing"},{"segment":[66.0209233900156,78.69825780655226],"label":"Canoeing"},{"segment":[84.29604183463339,85.94244890171608],"label":"Canoeing"}]},"ADbpTpH4U7s":{"duration":102.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ADbpTpH4U7s","annotations":[{"segment":[25.955806297558027,92.12709025367818],"label":"Preparing salad"}]},"aT5S8PcnlXo":{"duration":95.46000000000001,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=aT5S8PcnlXo","annotations":[{"segment":[1.9339984399375976,14.87691107644306],"label":"Hammer throw"},{"segment":[26.183363494539783,37.34104680187208],"label":"Hammer throw"}]},"wkMQ_e674IM":{"duration":167.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wkMQ_e674IM","annotations":[]},"qaml4eEk9hY":{"duration":57.84,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qaml4eEk9hY","annotations":[]},"OqA83jGQtfg":{"duration":224.584,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OqA83jGQtfg","annotations":[{"segment":[20.31728237129485,224.541],"label":"Ice fishing"}]},"gl2eHq4nRus":{"duration":141.828,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gl2eHq4nRus","annotations":[]},"rBVbsbJJcyM":{"duration":132.73,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rBVbsbJJcyM","annotations":[{"segment":[41.524457098283925,90.60822932917317],"label":"Ironing clothes"},{"segment":[96.20005148205928,102.62029173166927],"label":"Ironing clothes"},{"segment":[110.49026365054601,119.80996723868954],"label":"Ironing clothes"}]},"ovTKJgSWMKQ":{"duration":208.03,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ovTKJgSWMKQ","annotations":[{"segment":[39.917927557918645,43.48782351838291],"label":"Plataform diving"},{"segment":[89.8964710044184,114.56120673126243],"label":"Plataform diving"}]},"XnrQRuchl7E":{"duration":65.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XnrQRuchl7E","annotations":[{"segment":[0.7105091076443059,62.22029756942278],"label":"Playing piano"}]},"8jJdHFW-lys":{"duration":102.8,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=8jJdHFW-lys","annotations":[{"segment":[9.63888449,90.81763884555382],"label":"Using the balance beam"}]},"s0Pjp1qD18A":{"duration":150.512,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=s0Pjp1qD18A","annotations":[]},"1LdbczjQPII":{"duration":163.33,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1LdbczjQPII","annotations":[{"segment":[16.307435631825275,163.32916099773243],"label":"Playing ten pins"}]},"cLTDcBhgRw8":{"duration":30.07,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=cLTDcBhgRw8","annotations":[{"segment":[5.101574102964118,28.99063806552262],"label":"Powerbocking"}]},"-02DygXbn6w":{"duration":109.668,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-02DygXbn6w","annotations":[{"segment":[1.3678751950078003,101.24123556942277],"label":"Snowboarding"}]},"_bj64aqyIyk":{"duration":8.127,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_bj64aqyIyk","annotations":[{"segment":[0.2,7.8709999999999996],"label":"Using the monkey bar"}]},"DOfpOkDHe_8":{"duration":111.94,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=DOfpOkDHe_8","annotations":[{"segment":[11.870932917316694,106.83839625585023],"label":"Doing karate"}]},"wlP0gzQtU20":{"duration":112.176,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=wlP0gzQtU20","annotations":[]},"rcqXzX-7ULg":{"duration":210.61,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rcqXzX-7ULg","annotations":[{"segment":[36.14125392226573,184.32039385647425],"label":"Ironing clothes"}]},"54Hp_Z-cu-s":{"duration":61.603,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=54Hp_Z-cu-s","annotations":[{"segment":[19.065170228375177,58.72839013066854],"label":"Drum corps"}]},"cQgP0-Id-p4":{"duration":133.677,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=cQgP0-Id-p4","annotations":[]},"ResZdSbk9kE":{"duration":96.99000000000001,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=ResZdSbk9kE","annotations":[{"segment":[4.387991835319672,96.98975056689342],"label":"Kayaking"}]},"ivkF2jbavhc":{"duration":66.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ivkF2jbavhc","annotations":[{"segment":[26.737785000000002,59.054337000000004],"label":"Playing piano"}]},"dS9TsX_zyPg":{"duration":11.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dS9TsX_zyPg","annotations":[{"segment":[0,11.447346],"label":"Washing dishes"}]},"EAMThMTRRy4":{"duration":64.41,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EAMThMTRRy4","annotations":[]},"omLZQlz1PAo":{"duration":172.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=omLZQlz1PAo","annotations":[{"segment":[3.629014820592824,159.542244149766],"label":"Tango"}]},"s9ATMSQLb80":{"duration":182.556,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s9ATMSQLb80","annotations":[]},"ZBVaJaLg0G8":{"duration":215.691,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZBVaJaLg0G8","annotations":[]},"2cal3nR0PnY":{"duration":101.53999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2cal3nR0PnY","annotations":[]},"5YsdlM3Stjo":{"duration":150.32999999999998,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5YsdlM3Stjo","annotations":[{"segment":[14.301058210608424,107.60960194539781],"label":"Grooming horse"},{"segment":[107.68531375,144.1827999921997],"label":"Grooming horse"}]},"PKLLNgcEuRI":{"duration":8.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PKLLNgcEuRI","annotations":[{"segment":[0,6.2233525741029645],"label":"Using the pommel horse"}]},"ZGzXbvRHNAE":{"duration":58.979,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZGzXbvRHNAE","annotations":[{"segment":[23.539619344773794,45.516060842433696],"label":"Cleaning sink"}]},"NdZn0wf7MCc":{"duration":195.79,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NdZn0wf7MCc","annotations":[]},"0pxONuRdIAY":{"duration":147.076,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0pxONuRdIAY","annotations":[{"segment":[78.21241965678625,106.1945756630265],"label":"Applying sunscreen"}]},"j0cDmSTkVaA":{"duration":101.425,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=j0cDmSTkVaA","annotations":[]},"iqe_HmjojQ8":{"duration":8.59,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iqe_HmjojQ8","annotations":[{"segment":[0.01,8.591382],"label":"Playing kickball"}]},"deU1mGcNh1Q":{"duration":33.53,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=deU1mGcNh1Q","annotations":[{"segment":[0.7834022082353189,33.32961451247165],"label":"Having an ice cream"}]},"shZRGB58YBU":{"duration":221.89,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=shZRGB58YBU","annotations":[{"segment":[0.2,214.28618177570092],"label":"Playing congas"}]},"Hxgjh9Yb408":{"duration":29.247,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Hxgjh9Yb408","annotations":[{"segment":[8.365921226209048,20.89194535725429],"label":"Calf roping"}]},"xekPSA9h_jg":{"duration":60.558,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xekPSA9h_jg","annotations":[{"segment":[3.2101622464898596,60.501],"label":"Table soccer"}]},"Uc0Z2tuIJVA":{"duration":90.349,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Uc0Z2tuIJVA","annotations":[{"segment":[0.2,4.074829057497805],"label":"Beer pong"},{"segment":[11.521934356849272,13.77011708872896],"label":"Beer pong"},{"segment":[24.167962223672514,24.87051932738492],"label":"Beer pong"},{"segment":[29.788419053371737,30.490975638236517],"label":"Beer pong"},{"segment":[57.46916796172019,58.874282145234965],"label":"Beer pong"}]},"zrR9hGDeQhg":{"duration":236.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zrR9hGDeQhg","annotations":[{"segment":[0.2,92.73334009360374],"label":"Snow tubing"},{"segment":[101.96972854914196,236.621],"label":"Snow tubing"}]},"pA5W1NYnmyY":{"duration":91.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pA5W1NYnmyY","annotations":[{"segment":[51.998475819032755,57.155349453978154],"label":"Cleaning windows"}]},"IZdVPuT8Hfs":{"duration":93.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IZdVPuT8Hfs","annotations":[{"segment":[52.155458864274564,56.246083088923555],"label":"Bungee jumping"},{"segment":[78.4523288798752,91.16248272074883],"label":"Bungee jumping"}]},"q2-2jIQGfvg":{"duration":76.487,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=q2-2jIQGfvg","annotations":[{"segment":[2.8637710951638065,54.65029839937598],"label":"Playing rubik cube"}]},"YAzDGbY1dAM":{"duration":144.532,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YAzDGbY1dAM","annotations":[]},"6Zr1lLVoC4w":{"duration":55.08,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6Zr1lLVoC4w","annotations":[]},"QWRGRAod0no":{"duration":227.44,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QWRGRAod0no","annotations":[]},"_mfa4AcWV6k":{"duration":72.702,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_mfa4AcWV6k","annotations":[]},"YPKvzHQfPYU":{"duration":192.749,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YPKvzHQfPYU","annotations":[]},"xcz61rIR5lQ":{"duration":63.14,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xcz61rIR5lQ","annotations":[]},"NiPqyUecGdc":{"duration":56.309,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=NiPqyUecGdc","annotations":[{"segment":[0.21039639544858368,53.77377801538628],"label":"Waterskiing"}]},"QXN6odBnVmI":{"duration":48.67,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=QXN6odBnVmI","annotations":[]},"Pr3iVnKpv0c":{"duration":84.2,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Pr3iVnKpv0c","annotations":[{"segment":[0.9537871654433728,81.60840665142469],"label":"Playing violin"}]},"98wL6zakU9o":{"duration":139.204,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=98wL6zakU9o","annotations":[{"segment":[0.01,133.08220436817473],"label":"Waxing skis"}]},"6gyD-Mte2ZM":{"duration":188.245,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6gyD-Mte2ZM","annotations":[{"segment":[21.43810386973302,32.59766478822418],"label":"Playing ten pins"},{"segment":[36.12173665722139,39.93948118196837],"label":"Playing ten pins"},{"segment":[36.41540931297116,40.5268264934679],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.0441723356009],"label":"Playing ten pins"},{"segment":[0.2,188.044172],"label":"Playing ten pins"},{"segment":[0.2,188.044172],"label":"Playing ten pins"},{"segment":[176.79093844617785,180.02133765366614],"label":"Playing ten pins"}]},"bpB0GiH6uDw":{"duration":95.922,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bpB0GiH6uDw","annotations":[{"segment":[17.33163865753986,83.07233586292836],"label":"Cutting the grass"}]},"5K-soRkdGDs":{"duration":225.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5K-soRkdGDs","annotations":[{"segment":[50.234494539781586,66.74513260530422],"label":"Bathing dog"},{"segment":[73.41964586583462,90.63286427457098],"label":"Bathing dog"},{"segment":[98.00995787831513,117.33091731669266],"label":"Bathing dog"},{"segment":[124.00543057722308,143.32639001560062],"label":"Bathing dog"},{"segment":[151.75735413416535,164.40380031201246],"label":"Bathing dog"},{"segment":[173.8886349453978,191.10185335413416],"label":"Bathing dog"}]},"wtA-5-6DRcY":{"duration":87.656,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wtA-5-6DRcY","annotations":[{"segment":[0,87.655327],"label":"River tubing"}]},"sYAGVE3luRo":{"duration":159.1,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sYAGVE3luRo","annotations":[{"segment":[22.39692931,64.52422165],"label":"Playing bagpipes"},{"segment":[71.76818124,131.23133646],"label":"Playing bagpipes"}]},"al_769KF6Qc":{"duration":124.645,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=al_769KF6Qc","annotations":[{"segment":[9.330496099843995,104.19053978159126],"label":"Spread mulch"}]},"_L8VrJYsOAQ":{"duration":70.52,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_L8VrJYsOAQ","annotations":[{"segment":[14.411839517940717,34.214367099844],"label":"Washing hands"},{"segment":[44.775715143525744,63.148060177847114],"label":"Washing hands"}]},"rTiSrV2wdWo":{"duration":115.52,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=rTiSrV2wdWo","annotations":[]},"q4FZTmaqEx8":{"duration":203.523,"subset":"training","resolution":"548x360","url":"https://www.youtube.com/watch?v=q4FZTmaqEx8","annotations":[{"segment":[0.2,203.501],"label":"Roof shingle removal"}]},"6UqWORrn3KI":{"duration":59.049,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=6UqWORrn3KI","annotations":[{"segment":[10.3125616224649,50.734120124804996],"label":"Making a lemonade"}]},"54VcTlhF2H8":{"duration":70.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=54VcTlhF2H8","annotations":[{"segment":[0,70.581],"label":"Using parallel bars"}]},"dgas2Fku3No":{"duration":87.05,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=dgas2Fku3No","annotations":[{"segment":[3.57605508,81.64353744149766],"label":"Playing badminton"}]},"rNQbSptv1cM":{"duration":72.52,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=rNQbSptv1cM","annotations":[{"segment":[20.466553822152886,27.024897035881434],"label":"Cheerleading"}]},"Pn2_AuY2rxg":{"duration":73.352,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Pn2_AuY2rxg","annotations":[]},"5_cLvl6mV3s":{"duration":128.011,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5_cLvl6mV3s","annotations":[]},"KYl67H9-4TA":{"duration":235.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KYl67H9-4TA","annotations":[{"segment":[6.232449297971919,206.4040561622465],"label":"Playing violin"}]},"I8m878YGmU8":{"duration":192.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I8m878YGmU8","annotations":[{"segment":[0.01,192.7022448979592],"label":"Triple jump"}]},"nX-GvQmf5Tc":{"duration":81.758,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nX-GvQmf5Tc","annotations":[{"segment":[0.01,81.757345],"label":"Riding bumper cars"}]},"oppGT2l_wZ4":{"duration":193.817,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=oppGT2l_wZ4","annotations":[]},"RS65b_z37_4":{"duration":62.439,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RS65b_z37_4","annotations":[]},"bQa8S4Z-1cE":{"duration":43.86,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bQa8S4Z-1cE","annotations":[{"segment":[12.168702028081123,17.090873634945396],"label":"Polishing shoes"}]},"pi5k3Vt09K0":{"duration":22.11,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pi5k3Vt09K0","annotations":[]},"7s7YqryNMAE":{"duration":84.823,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=7s7YqryNMAE","annotations":[{"segment":[9.498006683589713,13.05975994476977],"label":"Putting in contact lenses"},{"segment":[23.613102940858823,28.362107289098894],"label":"Putting in contact lenses"}]},"LnMvFpR0xCY":{"duration":144.313,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=LnMvFpR0xCY","annotations":[{"segment":[37.82280632761311,41.65011411076443],"label":"Mooping floor"},{"segment":[62.13746753822153,66.86531832917316],"label":"Mooping floor"},{"segment":[68.66640434477378,75.87074840717628],"label":"Mooping floor"},{"segment":[77.22156291887676,87.57780750858034],"label":"Mooping floor"},{"segment":[96.1329660826833,119.99735578939156],"label":"Mooping floor"}]},"Vg5Vfb16Kb8":{"duration":98.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Vg5Vfb16Kb8","annotations":[{"segment":[23.630755515934922,34.37200802317807],"label":"Putting on makeup"}]},"KzZlSbM16aY":{"duration":99.87,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=KzZlSbM16aY","annotations":[{"segment":[24.90483619344774,95.10534321372856],"label":"Playing flauta"}]},"XftM9eALPy0":{"duration":68.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XftM9eALPy0","annotations":[{"segment":[5.51759796,13.914838260000002],"label":"Using parallel bars"}]},"Qlh-VSBxcJs":{"duration":222.47,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Qlh-VSBxcJs","annotations":[{"segment":[0.01,213.27309043369735],"label":"Belly dance"}]},"c9BlG8bO6pQ":{"duration":134.05,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=c9BlG8bO6pQ","annotations":[]},"EO_BnsrWMnI":{"duration":212.86,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=EO_BnsrWMnI","annotations":[{"segment":[27.229797950078,195.92171695787832],"label":"Using the balance beam"}]},"iZlG0UOtImQ":{"duration":158.941,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iZlG0UOtImQ","annotations":[{"segment":[2.4787363494539782,100.88837909516381],"label":"Playing pool"}]},"lmcBk-gqMzA":{"duration":68.03,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=lmcBk-gqMzA","annotations":[{"segment":[7.532092043681748,67.981],"label":"Grooming horse"}]},"cPIsE2ZCWzM":{"duration":85.06,"subset":"testing","resolution":"1920x792","url":"https://www.youtube.com/watch?v=cPIsE2ZCWzM","annotations":[]},"SymvoBsqt3Y":{"duration":72.493,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=SymvoBsqt3Y","annotations":[{"segment":[0.2,66.92185959438378],"label":"Drum corps"}]},"DVZCBD8-y2I":{"duration":110.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DVZCBD8-y2I","annotations":[{"segment":[49.96529017160687,95.29462558502341],"label":"Removing ice from car"}]},"a2HjLtnVDaY":{"duration":36.04,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=a2HjLtnVDaY","annotations":[{"segment":[0.5760408205406689,33.49192867100796],"label":"Walking the dog"}]},"7_7KsHDtvJk":{"duration":191.194,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=7_7KsHDtvJk","annotations":[]},"afI4V541WAA":{"duration":65.23,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=afI4V541WAA","annotations":[]},"O4P07fipvIA":{"duration":219.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O4P07fipvIA","annotations":[{"segment":[0,200.20084867394698],"label":"Cheerleading"}]},"tqanHyXTFFw":{"duration":81.67,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tqanHyXTFFw","annotations":[{"segment":[20.766471327613107,78.73422871450859],"label":"Fixing bicycle"}]},"bNVM1vgxEKA":{"duration":37.69,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bNVM1vgxEKA","annotations":[]},"9hODmjEMkYE":{"duration":73.468,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=9hODmjEMkYE","annotations":[]},"Jd6dM5p91M4":{"duration":194.77,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Jd6dM5p91M4","annotations":[{"segment":[6.683778471138846,186.5381809672387],"label":"Kayaking"}]},"Ki1bs5X_S5k":{"duration":23.987,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=Ki1bs5X_S5k","annotations":[]},"Cgi8ib7_7BY":{"duration":105.605,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=Cgi8ib7_7BY","annotations":[]},"Ie09KEAPP2I":{"duration":16.49,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=Ie09KEAPP2I","annotations":[]},"I4_jFA46Uio":{"duration":35.782,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I4_jFA46Uio","annotations":[{"segment":[0.01,35.67030596325894],"label":"Peeling potatoes"}]},"eYgXvnnlPQA":{"duration":140.99,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eYgXvnnlPQA","annotations":[{"segment":[22.10553105797701,114.9267646450858],"label":"Rock climbing"}]},"1iP5k9jHBRU":{"duration":50.829,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=1iP5k9jHBRU","annotations":[{"segment":[2.375162650730074,37.21088026479751],"label":"Plastering"}]},"EbCiL9JYTMQ":{"duration":231.434,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EbCiL9JYTMQ","annotations":[]},"aDBSNU2trmo":{"duration":59.536,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aDBSNU2trmo","annotations":[{"segment":[0.2,50.03282215288611],"label":"Snow tubing"}]},"-AjZCBMb4qU":{"duration":116.7,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=-AjZCBMb4qU","annotations":[{"segment":[33.0969708,107.83006958658346],"label":"Playing accordion"},{"segment":[15.39662679,25.03512756],"label":"Playing accordion"}]},"7rvrBulkd5c":{"duration":98.08,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=7rvrBulkd5c","annotations":[{"segment":[14.842148262090484,50.34089462090484],"label":"Doing step aerobics"},{"segment":[71.76255190639625,80.17820298283931],"label":"Doing step aerobics"},{"segment":[86.91072384399376,95.63239859594384],"label":"Doing step aerobics"}]},"mQoYAZ2BLDM":{"duration":68.081,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mQoYAZ2BLDM","annotations":[{"segment":[50.71597870667189,61.93913766718507],"label":"Arm wrestling"}]},"ZMopjyYvcqw":{"duration":172.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZMopjyYvcqw","annotations":[{"segment":[10.244171606864274,17.52292511700468],"label":"Archery"},{"segment":[86.53629173166927,126.43464430577222],"label":"Archery"}]},"Eilil6FZhK8":{"duration":218.848,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Eilil6FZhK8","annotations":[{"segment":[17.35808454327169,19.74056744830514],"label":"Powerbocking"},{"segment":[26.88801616340549,30.291563170596127],"label":"Powerbocking"},{"segment":[38.46007598785367,41.18291359360619],"label":"Powerbocking"},{"segment":[45.267170002234955,47.99000760798747],"label":"Powerbocking"},{"segment":[64.66738794322161,70.79377255616477],"label":"Powerbocking"},{"segment":[75.55873836623167,85.08866998636546],"label":"Powerbocking"},{"segment":[89.5132810957133,94.95895630721833],"label":"Powerbocking"},{"segment":[116.40130245251936,124.56981526977691],"label":"Powerbocking"},{"segment":[129.6751357805629,139.20506740069666],"label":"Powerbocking"},{"segment":[166.09308757176277,168.47557045978763],"label":"Powerbocking"},{"segment":[179.02656610675498,201.48997619384673],"label":"Powerbocking"}]},"OpNcCJErjP8":{"duration":82.5,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=OpNcCJErjP8","annotations":[]},"pbZVsdQNWNU":{"duration":211.743,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=pbZVsdQNWNU","annotations":[{"segment":[0.2,211.54276643990931],"label":"Playing ice hockey"}]},"5AG9Q5bF4pM":{"duration":121.6,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=5AG9Q5bF4pM","annotations":[{"segment":[50.613369734789394,118.66655226209049],"label":"Preparing salad"}]},"vc820BteGzY":{"duration":156.689,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=vc820BteGzY","annotations":[{"segment":[8.05404581770403,156.687799],"label":"Making a cake"}]},"E3QtX6r9QX0":{"duration":103.329,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E3QtX6r9QX0","annotations":[{"segment":[0.01,103.32879818594104],"label":"Blowing leaves"}]},"tOEomEC1rY8":{"duration":86.611,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tOEomEC1rY8","annotations":[{"segment":[38.75664118564743,58.60760374414976],"label":"Blow-drying hair"}]},"LeSq1XE93mE":{"duration":228.044,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=LeSq1XE93mE","annotations":[{"segment":[33.79735013260531,228.0431746031746],"label":"Playing drums"}]},"QuaoqO4JgKY":{"duration":17.83,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=QuaoqO4JgKY","annotations":[{"segment":[7.984099680000001,17.66014430577223],"label":"Springboard diving"},{"segment":[0,6.283996799999999],"label":"Springboard diving"}]},"uoy7NjeESZc":{"duration":101.518,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uoy7NjeESZc","annotations":[{"segment":[16.148302652106082,87.54913104524181],"label":"BMX"}]},"c8iet_4ndU0":{"duration":35.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c8iet_4ndU0","annotations":[{"segment":[1.6967954690976756,28.626581623809173],"label":"Using uneven bars"}]},"Qg3Lih9PTBM":{"duration":62.927,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Qg3Lih9PTBM","annotations":[{"segment":[0.2,62.681],"label":"Sumo"}]},"uDlyfvy0NOs":{"duration":80.179,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=uDlyfvy0NOs","annotations":[{"segment":[20.638771965678625,69.42132388455538],"label":"Baking cookies"}]},"Ue90f5r-2Qw":{"duration":156.53,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Ue90f5r-2Qw","annotations":[{"segment":[36.38429210140406,142.36269996723868],"label":"Playing violin"}]},"zuqNxHmtBD8":{"duration":54.73,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=zuqNxHmtBD8","annotations":[{"segment":[6.0918849900000005,31.91412177],"label":"Long jump"},{"segment":[35.15736231,54.681666],"label":"Long jump"}]},"CSsilC4QbB4":{"duration":139.018,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CSsilC4QbB4","annotations":[{"segment":[0.01,132.35667229294108],"label":"Scuba diving"}]},"KHsl04qla5A":{"duration":195.47,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KHsl04qla5A","annotations":[]},"Mdt2E8KYpCg":{"duration":51.688,"subset":"validation","resolution":"1252x720","url":"https://www.youtube.com/watch?v=Mdt2E8KYpCg","annotations":[{"segment":[0.01,51.3171803561821],"label":"Doing a powerbomb"}]},"ifmHO5lQq18":{"duration":150.56,"subset":"training","resolution":"2560x810","url":"https://www.youtube.com/watch?v=ifmHO5lQq18","annotations":[{"segment":[2.2307659906396258,9.979742589703587],"label":"High jump"},{"segment":[14.910909516380656,21.955433697347896],"label":"High jump"},{"segment":[27.591053042121686,32.52221996879875],"label":"High jump"},{"segment":[37.21856942277691,43.55864118564743],"label":"High jump"},{"segment":[50.36834789391576,57.647689547581905],"label":"High jump"},{"segment":[61.40476911076443,68.68411076443059],"label":"High jump"},{"segment":[73.38046021840874,79.25089703588144],"label":"High jump"},{"segment":[84.88651638065522,93.33994539781591],"label":"High jump"},{"segment":[113.06461310452418,119.4046848673947],"label":"High jump"},{"segment":[123.39658190327613,131.38037597503902],"label":"High jump"},{"segment":[136.07672542901716,144.99978939157566],"label":"High jump"}]},"UdBFm97tOJs":{"duration":15.14,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=UdBFm97tOJs","annotations":[{"segment":[1.9603290904836197,12.966514104524181],"label":"Drinking coffee"}]},"nxFbmoV3Idk":{"duration":229.506,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=nxFbmoV3Idk","annotations":[{"segment":[13.939219626168224,229.26100000000002],"label":"Changing car wheel"}]},"5UZ2ft8Y3sI":{"duration":151.487,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5UZ2ft8Y3sI","annotations":[]},"uy9Z2i8AmyY":{"duration":184.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uy9Z2i8AmyY","annotations":[{"segment":[34.89669594016411,62.23331291835726],"label":"Paintball"},{"segment":[109.42494643860648,164.67368812084945],"label":"Paintball"}]},"aiTAx3oo97U":{"duration":24.196,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aiTAx3oo97U","annotations":[{"segment":[0.2,23.961000000000002],"label":"Elliptical trainer"}]},"JKZ-3N1fYL8":{"duration":11.378,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JKZ-3N1fYL8","annotations":[{"segment":[0.2,11.177777777777779],"label":"Raking leaves"}]},"2Sr1ODuJ0kE":{"duration":122.834,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=2Sr1ODuJ0kE","annotations":[]},"K65lcuHQn-E":{"duration":178.77,"subset":"testing","resolution":"634x480","url":"https://www.youtube.com/watch?v=K65lcuHQn-E","annotations":[]},"ZrrPDWi3lFE":{"duration":176.217,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZrrPDWi3lFE","annotations":[]},"0t49I3VyI0g":{"duration":234.99,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0t49I3VyI0g","annotations":[]},"Y5VEl3e9Hbo":{"duration":107.49000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y5VEl3e9Hbo","annotations":[{"segment":[16.76396255850234,35.874879875195006],"label":"Polishing shoes"},{"segment":[56.82983307332293,69.23516536661467],"label":"Polishing shoes"},{"segment":[82.98161466458657,89.68719968798752],"label":"Polishing shoes"}]},"Hy8bl_lnrX0":{"duration":10.06,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Hy8bl_lnrX0","annotations":[]},"UUdqzU0g9fM":{"duration":159.057,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=UUdqzU0g9fM","annotations":[]},"zUuT2FBf-Oc":{"duration":35.99,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zUuT2FBf-Oc","annotations":[{"segment":[0,35.941],"label":"Wrapping presents"}]},"QsRW8dj-F7M":{"duration":159.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QsRW8dj-F7M","annotations":[{"segment":[33.87961917,77.39700741],"label":"Playing lacrosse"},{"segment":[84.63928005000001,95.00382648000001],"label":"Playing lacrosse"}]},"twJ2uE1GS2I":{"duration":198.903,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=twJ2uE1GS2I","annotations":[{"segment":[15.514986857977721,45.92436109961405],"label":"Making a cake"},{"segment":[53.99215426576247,91.8487221992281],"label":"Making a cake"},{"segment":[94.33112009650455,99.91651536537651],"label":"Making a cake"},{"segment":[103.95041194845074,116.67270117199246],"label":"Making a cake"},{"segment":[122.56839617802399,166.0103581123245],"label":"Making a cake"}]},"DIhyfNJAm-M":{"duration":213.81,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DIhyfNJAm-M","annotations":[]},"73zt29iKyL4":{"duration":201.921,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=73zt29iKyL4","annotations":[{"segment":[0.01,201.881],"label":"Ice fishing"}]},"h-A3CAx_eyU":{"duration":157.083,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=h-A3CAx_eyU","annotations":[{"segment":[144.33947258034323,157.08251700680273],"label":"Rock-paper-scissors"}]},"R_TRpIHkgMs":{"duration":96.781,"subset":"validation","resolution":"852x480","url":"https://www.youtube.com/watch?v=R_TRpIHkgMs","annotations":[{"segment":[0.01,96.780361],"label":"Playing pool"}]},"j55LAXY-T0E":{"duration":147.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=j55LAXY-T0E","annotations":[{"segment":[31.824177847113884,33.66905772230889],"label":"Drinking coffee"},{"segment":[43.81589703588144,45.66077691107645],"label":"Drinking coffee"}]},"_pMQwrt-cf4":{"duration":70.52,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_pMQwrt-cf4","annotations":[{"segment":[32.43665366614664,65.86290015600623],"label":"Installing carpet"}]},"5MfhJjbNNJE":{"duration":48.902,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5MfhJjbNNJE","annotations":[{"segment":[13.960879862714508,15.562948043681747],"label":"Rope skipping"},{"segment":[21.437198040561622,46.30739932605304],"label":"Rope skipping"}]},"ugK_mLTN18U":{"duration":230.41,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ugK_mLTN18U","annotations":[]},"MC0L0ljTUiw":{"duration":235.06,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=MC0L0ljTUiw","annotations":[{"segment":[0,235.05528344671202],"label":"Belly dance"}]},"juKQ_gU42EM":{"duration":45.14,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=juKQ_gU42EM","annotations":[{"segment":[0.01,45.101],"label":"Bullfighting"}]},"bmIWsU8sNlw":{"duration":208.0,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=bmIWsU8sNlw","annotations":[{"segment":[30.219721848334945,197.4001701287088],"label":"Ballet"}]},"7TgvbmIn03A":{"duration":122.21,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7TgvbmIn03A","annotations":[{"segment":[14.870679953728763,119.72803834633386],"label":"Mowing the lawn"}]},"UcI4miTi0Cg":{"duration":74.304,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=UcI4miTi0Cg","annotations":[{"segment":[8.705255815131641,68.49790931272655],"label":"Futsal"}]},"GN0TdqHETvc":{"duration":165.32999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GN0TdqHETvc","annotations":[{"segment":[21.340687210602233,160.69797041574537],"label":"Playing squash"}]},"eZ_IOp638kQ":{"duration":222.15,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=eZ_IOp638kQ","annotations":[{"segment":[37.08197758970359,222.14530612244897],"label":"Cricket"}]},"e_0bMJEFiN8":{"duration":92.857,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=e_0bMJEFiN8","annotations":[{"segment":[7.532830327613105,48.09422439937598],"label":"Cleaning sink"},{"segment":[54.323295631825275,81.55737450858035],"label":"Cleaning sink"}]},"GCbDiIoN_RU":{"duration":212.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GCbDiIoN_RU","annotations":[]},"sjE-hHKFghc":{"duration":213.462,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sjE-hHKFghc","annotations":[{"segment":[30.637151135024997,194.47930592199688],"label":"Rollerblading"}]},"iBz_YrU-T80":{"duration":168.97,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=iBz_YrU-T80","annotations":[{"segment":[7.117368907001178,15.816375348891507],"label":"Drinking coffee"},{"segment":[20.2976816977441,22.670138000077824],"label":"Drinking coffee"},{"segment":[35.05963202337617,35.58684453500589],"label":"Drinking coffee"},{"segment":[38.48651334896933,47.976338558304235],"label":"Drinking coffee"},{"segment":[54.566494953675694,59.83862006997286],"label":"Drinking coffee"},{"segment":[65.11074518627002,100.96119597709078],"label":"Drinking coffee"},{"segment":[105.70610858175823,128.1126403260212],"label":"Drinking coffee"},{"segment":[145.77425946561672,166.33554741917567],"label":"Drinking coffee"}]},"h8J08sJ2no0":{"duration":45.51,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=h8J08sJ2no0","annotations":[{"segment":[1.436174362360874,30.623358162983926],"label":"Rock climbing"}]},"eAIJhHmdjmk":{"duration":124.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eAIJhHmdjmk","annotations":[{"segment":[14.004360797097748,60.50940752606971],"label":"Shoveling snow"},{"segment":[70.58550098401363,112.24627201205102],"label":"Shoveling snow"}]},"DlJPKOVd0bA":{"duration":126.29,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=DlJPKOVd0bA","annotations":[{"segment":[5.799365559282371,46.264387],"label":"Tai chi"},{"segment":[52.08588398,115.49583952808112],"label":"Tai chi"}]},"UWTpfygMUQw":{"duration":173.941,"subset":"validation","resolution":"450x360","url":"https://www.youtube.com/watch?v=UWTpfygMUQw","annotations":[{"segment":[33.31364953271028,145.45924766355142],"label":"Fixing the roof"}]},"dnJLvsqqSgQ":{"duration":176.216,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=dnJLvsqqSgQ","annotations":[{"segment":[10.688047573983546,169.09045404846015],"label":"Capoeira"}]},"gefe0G9DHEk":{"duration":66.46,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=gefe0G9DHEk","annotations":[]},"mo_PhyaQHh0":{"duration":59.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mo_PhyaQHh0","annotations":[{"segment":[7.426021840873635,59.300999999999995],"label":"Installing carpet"}]},"JO_CW4PR3I8":{"duration":40.5,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=JO_CW4PR3I8","annotations":[{"segment":[24.856483619344775,36.620620904836194],"label":"Polishing shoes"}]},"27uzQVdrvrk":{"duration":89.28,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=27uzQVdrvrk","annotations":[]},"lJ8gPeDG6kk":{"duration":219.29,"subset":"testing","resolution":"320x180","url":"https://www.youtube.com/watch?v=lJ8gPeDG6kk","annotations":[]},"X1pGJqP89Nk":{"duration":212.49,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=X1pGJqP89Nk","annotations":[{"segment":[3.977429017160687,210.47228549141965],"label":"Hand washing clothes"}]},"lHObolD8eIg":{"duration":26.22,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=lHObolD8eIg","annotations":[]},"p5H5ZmAwdH8":{"duration":84.591,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=p5H5ZmAwdH8","annotations":[{"segment":[0.2,80.90675575015463],"label":"Hanging wallpaper"}]},"d_6BVfxlAsI":{"duration":181.488,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=d_6BVfxlAsI","annotations":[{"segment":[12.54963968362541,179.31286398269083],"label":"Playing blackjack"}]},"qHu0fz_anTQ":{"duration":123.62,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=qHu0fz_anTQ","annotations":[{"segment":[6.942947562800471,121.69444185709828],"label":"Wrapping presents"}]},"SdbxPbXeQJ0":{"duration":171.39,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SdbxPbXeQJ0","annotations":[{"segment":[21.122506808112323,25.40048287051482],"label":"Drinking beer"},{"segment":[123.79393230577222,143.04482458658347],"label":"Drinking beer"}]},"6fI4gF_2VCk":{"duration":163.21,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6fI4gF_2VCk","annotations":[{"segment":[2.8008481645388263,156.33825092043685],"label":"Smoking a cigarette"}]},"HHxPjC6T2e0":{"duration":19.087,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HHxPjC6T2e0","annotations":[{"segment":[0.2,18.886802721088436],"label":"Hitting a pinata"}]},"HpQrpwNgCjc":{"duration":11.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HpQrpwNgCjc","annotations":[{"segment":[2.3054547,7.917408],"label":"Plataform diving"}]},"RP_PlV5Jzuc":{"duration":25.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RP_PlV5Jzuc","annotations":[{"segment":[11.147842267432194,15.230713360374416],"label":"Grooming horse"},{"segment":[15.030572580343215,23.356429029641188],"label":"Grooming horse"}]},"UBDcmeET6ys":{"duration":30.535,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=UBDcmeET6ys","annotations":[{"segment":[0.23791731669266772,30.301000000000002],"label":"Using the rowing machine"}]},"17ODXa4CEmI":{"duration":112.339,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=17ODXa4CEmI","annotations":[{"segment":[11.742051970358814,101.82286857878314],"label":"Making a lemonade"}]},"uOxKf-kNEyg":{"duration":72.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uOxKf-kNEyg","annotations":[{"segment":[12.242414976599065,34.57348673946958],"label":"Polishing shoes"}]},"RUv9YPd_0Zc":{"duration":210.466,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=RUv9YPd_0Zc","annotations":[{"segment":[54.50436980200297,177.3033716450699],"label":"Changing car wheel"}]},"flrXQtlMzTk":{"duration":46.35,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=flrXQtlMzTk","annotations":[]},"ULH_AqrP3to":{"duration":50.852,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ULH_AqrP3to","annotations":[{"segment":[4.704964110581668,45.576368465721856],"label":"Mooping floor"}]},"UjG_uHlFf3s":{"duration":136.069,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UjG_uHlFf3s","annotations":[{"segment":[35.22074570982839,56.86157254290172],"label":"Sumo"}]},"AGgJVF3nT6I":{"duration":65.003,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AGgJVF3nT6I","annotations":[{"segment":[0.2,61.6220006317697],"label":"Fun sliding down"}]},"QQfH0FDsYPc":{"duration":11.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QQfH0FDsYPc","annotations":[{"segment":[0.8298325366614664,9.777592062402496],"label":"Hammer throw"}]},"2Voht8wf3dQ":{"duration":117.888,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2Voht8wf3dQ","annotations":[{"segment":[0.01,117.8877097505669],"label":"Sharpening knives"}]},"l1aHbJlvI3U":{"duration":120.141,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=l1aHbJlvI3U","annotations":[]},"kWmf0_XSfBU":{"duration":39.43,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kWmf0_XSfBU","annotations":[{"segment":[13.409034439937596,18.51430902028081],"label":"Ironing clothes"}]},"ibWb6iRQiD4":{"duration":182.277,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ibWb6iRQiD4","annotations":[{"segment":[108.91100569192837,145.59382484142904],"label":"Blow-drying hair"},{"segment":[156.39961652887885,166.3523193988984],"label":"Blow-drying hair"}]},"cTZw5NUK1-c":{"duration":155.783,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cTZw5NUK1-c","annotations":[{"segment":[42.75317316692668,58.78561310452418],"label":"Surfing"},{"segment":[78.95471918876756,88.18634789391577],"label":"Surfing"}]},"q-FOdt9a7Js":{"duration":148.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q-FOdt9a7Js","annotations":[{"segment":[1.48449506,144.25451170046802],"label":"Shoveling snow"}]},"Imui46JTe7s":{"duration":122.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Imui46JTe7s","annotations":[{"segment":[23.08966926677067,108.96212168486738],"label":"Doing crunches"}]},"dyvB6XCWPZs":{"duration":220.637,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dyvB6XCWPZs","annotations":[{"segment":[24.094413490825346,74.34847591454678],"label":"Rafting"},{"segment":[100.50812484744287,113.24374340687913],"label":"Rafting"},{"segment":[148.35274565678625,172.10295319812792],"label":"Rafting"}]},"530VjKtBVKc":{"duration":187.69,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=530VjKtBVKc","annotations":[]},"DCKwTvFrA6Q":{"duration":185.481,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=DCKwTvFrA6Q","annotations":[{"segment":[59.60855721684868,111.69370429953197],"label":"Playing pool"}]},"p5Ynl_rGoEU":{"duration":180.838,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=p5Ynl_rGoEU","annotations":[{"segment":[7.61715744149766,161.3708909828393],"label":"Wakeboarding"}]},"ZWEUIJcB6_Q":{"duration":126.201,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZWEUIJcB6_Q","annotations":[]},"FG1-LeVFscs":{"duration":90.905,"subset":"testing","resolution":"480x270","url":"https://www.youtube.com/watch?v=FG1-LeVFscs","annotations":[]},"z2HRvo3APwU":{"duration":85.984,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=z2HRvo3APwU","annotations":[]},"D0fHzHqABqk":{"duration":8.55,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=D0fHzHqABqk","annotations":[]},"rvcSqYeUZ9s":{"duration":51.688,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=rvcSqYeUZ9s","annotations":[{"segment":[12.244461804838567,44.22146803536816],"label":"Applying sunscreen"}]},"2U_BUV8PLtk":{"duration":89.397,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2U_BUV8PLtk","annotations":[{"segment":[8.341847837826162,80.91594121914886],"label":"Assembling bicycle"}]},"0w7cO4tscBc":{"duration":98.8,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=0w7cO4tscBc","annotations":[{"segment":[25.790819032761313,32.25777067082683],"label":"Grooming horse"},{"segment":[72.90718096723869,94.00176131045241],"label":"Grooming horse"}]},"5MpJCKOxRVs":{"duration":60.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5MpJCKOxRVs","annotations":[{"segment":[9.083385210608425,55.635734414976596],"label":"Fixing bicycle"}]},"AUSqTMtjD0Y":{"duration":44.68,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=AUSqTMtjD0Y","annotations":[{"segment":[0.22774103891881306,43.99829421493127],"label":"Shoveling snow"}]},"KFk4ZpyYeuE":{"duration":96.53,"subset":"training","resolution":"400x300","url":"https://www.youtube.com/watch?v=KFk4ZpyYeuE","annotations":[{"segment":[2.55914976599064,93.93585023400937],"label":"Doing kickboxing"}]},"7jS5kDQ8cQU":{"duration":211.98,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7jS5kDQ8cQU","annotations":[]},"4w3QwJdzwsE":{"duration":69.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4w3QwJdzwsE","annotations":[{"segment":[0,69.659229],"label":"Sailing"}]},"hDpYoAAFwOA":{"duration":47.79,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hDpYoAAFwOA","annotations":[{"segment":[2.6838064586583465,29.44732086583463],"label":"Doing motocross"}]},"9029wVyGkvg":{"duration":112.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=9029wVyGkvg","annotations":[{"segment":[54.281568,105.59006728861155],"label":"Layup drill in basketball"},{"segment":[15.908437290171609,49.1898672],"label":"Layup drill in basketball"}]},"bXT72HACH4I":{"duration":201.851,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=bXT72HACH4I","annotations":[{"segment":[11.000521806853582,181.99809813084113],"label":"Rope skipping"}]},"vCAGiXqYXBk":{"duration":93.67,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vCAGiXqYXBk","annotations":[{"segment":[3.8685452418096724,88.39260920436818],"label":"Playing accordion"}]},"6aj1njn541k":{"duration":196.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6aj1njn541k","annotations":[{"segment":[79.96695755993505,85.4819201502754],"label":"Getting a piercing"},{"segment":[96.20545852038164,107.5417705116368],"label":"Getting a piercing"}]},"AcLZk6JyXUM":{"duration":225.745,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AcLZk6JyXUM","annotations":[{"segment":[0.01,225.743037],"label":"Kneeling"}]},"ej9TkJVjK9M":{"duration":42.42,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=ej9TkJVjK9M","annotations":[]},"YH536Kl_lTo":{"duration":180.05,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=YH536Kl_lTo","annotations":[]},"YGVIW3fNihQ":{"duration":25.659,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YGVIW3fNihQ","annotations":[]},"oUTfQ4c5iuQ":{"duration":187.641,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=oUTfQ4c5iuQ","annotations":[]},"RrKCACSu9xU":{"duration":49.018,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RrKCACSu9xU","annotations":[{"segment":[0.840204368174727,40.57552418096724],"label":"Swinging at the playground"}]},"bmf7otW5TQo":{"duration":84.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bmf7otW5TQo","annotations":[{"segment":[16.66275663026521,28.208603744149766],"label":"Using the pommel horse"},{"segment":[49.988269890795635,78.08423556942277],"label":"Using the pommel horse"}]},"ke3R1rOeQzE":{"duration":80.25,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ke3R1rOeQzE","annotations":[{"segment":[0.010000000000000002,80.248162],"label":"Volleyball"}]},"McFsP60TqH4":{"duration":62.88,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=McFsP60TqH4","annotations":[{"segment":[0.2942884735797595,30.802192985959437],"label":"Brushing hair"}]},"ot7hBY4lQ2c":{"duration":164.304,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ot7hBY4lQ2c","annotations":[{"segment":[0.01,164.30439909297053],"label":"Capoeira"}]},"lz2FMoj9tpo":{"duration":69.13,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=lz2FMoj9tpo","annotations":[{"segment":[3.1273765127475848,50.361545912176624],"label":"Discus throw"}]},"eudqfH7Vz_c":{"duration":175.334,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eudqfH7Vz_c","annotations":[{"segment":[50.056317173166924,55.800484717628706],"label":"Croquet"},{"segment":[72.75945556318253,77.95655953198128],"label":"Croquet"}]},"0lbv0AE5gJQ":{"duration":92.3,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=0lbv0AE5gJQ","annotations":[]},"xww6n1FoJIM":{"duration":154.97,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=xww6n1FoJIM","annotations":[{"segment":[15.714549792876069,33.363198021798425],"label":"Hurling"},{"segment":[42.308403288512494,49.803034728191854],"label":"Hurling"},{"segment":[58.74823999490592,62.6164368669985],"label":"Hurling"},{"segment":[74.46278978778199,88.96852737597504],"label":"Hurling"},{"segment":[99.12254408736351,104.44131474570983],"label":"Hurling"},{"segment":[111.45242152262091,114.35356915444619],"label":"Hurling"},{"segment":[121.36467593135727,132.24397955070205],"label":"Hurling"}]},"OBfVj8mCVUw":{"duration":75.047,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=OBfVj8mCVUw","annotations":[{"segment":[0.2,71.3062369470405],"label":"Knitting"}]},"fCLnOf-YjEI":{"duration":116.5,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fCLnOf-YjEI","annotations":[{"segment":[1.1382217855178156,92.0474897010454],"label":"Shoveling snow"}]},"HzmT-lvHrpM":{"duration":210.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HzmT-lvHrpM","annotations":[{"segment":[53.30584864,110.24934463999999],"label":"Fixing bicycle"},{"segment":[135.9170728,193.84091362948516],"label":"Fixing bicycle"}]},"o_crN4ZgdmM":{"duration":45.303,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=o_crN4ZgdmM","annotations":[]},"cIN4HNSRoAg":{"duration":66.595,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=cIN4HNSRoAg","annotations":[{"segment":[0.2,66.261],"label":"Table soccer"}]},"LCLDhKiMAPA":{"duration":215.226,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=LCLDhKiMAPA","annotations":[{"segment":[0.2,208.80278003120125],"label":"Table soccer"}]},"EwMAJPldfKs":{"duration":31.86,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EwMAJPldfKs","annotations":[{"segment":[1.6898040312012481,21.023150152886117],"label":"Washing hands"}]},"fLCWIE8a4AU":{"duration":70.565,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=fLCWIE8a4AU","annotations":[{"segment":[0.19999999999999998,55.38571450858035],"label":"Bullfighting"}]},"fBxpQxIqGN8":{"duration":164.51,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=fBxpQxIqGN8","annotations":[{"segment":[1.2832556839688554,145.77784569886197],"label":"Tai chi"}]},"7BUr2cN8CLs":{"duration":32.834,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=7BUr2cN8CLs","annotations":[]},"sxQ9H3c5bRM":{"duration":129.71,"subset":"validation","resolution":"810x480","url":"https://www.youtube.com/watch?v=sxQ9H3c5bRM","annotations":[{"segment":[30.548878315132605,117.13775195007801],"label":"Doing step aerobics"}]},"D7tSpFiMXYM":{"duration":53.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=D7tSpFiMXYM","annotations":[]},"_8Zk9dfBgPg":{"duration":122.35,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_8Zk9dfBgPg","annotations":[{"segment":[0.01,101.44585858264263],"label":"Painting"}]},"0rRk3C1yor8":{"duration":180.63,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0rRk3C1yor8","annotations":[]},"RUAWJc2OIJY":{"duration":173.64,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RUAWJc2OIJY","annotations":[{"segment":[32.2355580124805,44.42547490795632],"label":"Breakdancing"},{"segment":[118.91941149141967,143.8410193666147],"label":"Breakdancing"}]},"o4Q7F1KqmII":{"duration":65.62,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o4Q7F1KqmII","annotations":[]},"UvYRPf1xV3o":{"duration":195.768,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UvYRPf1xV3o","annotations":[]},"e4ZTZDDFtYY":{"duration":204.383,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=e4ZTZDDFtYY","annotations":[{"segment":[131.36550390639627,135.5105319422777],"label":"Rock-paper-scissors"}]},"J3qC2hqRhPc":{"duration":84.68,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=J3qC2hqRhPc","annotations":[{"segment":[14.664274040349369,17.43859615609114],"label":"Brushing teeth"},{"segment":[20.609250002653166,22.590908656754433],"label":"Brushing teeth"},{"segment":[26.157894234136712,31.045985580919837],"label":"Brushing teeth"},{"segment":[34.216639096723874,36.46251888299532],"label":"Brushing teeth"},{"segment":[44.12493227145086,49.40935529797192],"label":"Brushing teeth"},{"segment":[52.58000911388455,61.695638834633385],"label":"Brushing teeth"},{"segment":[71.07548970670827,73.18925891731669],"label":"Brushing teeth"},{"segment":[79.53056654914197,83.6259943946958],"label":"Brushing teeth"}]},"KEWlDtEt2cQ":{"duration":97.18,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=KEWlDtEt2cQ","annotations":[{"segment":[8.641191907956319,87.77631780187208],"label":"Tango"}]},"zvglI3yW_Hs":{"duration":199.39,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=zvglI3yW_Hs","annotations":[]},"EXr5QXCpkYY":{"duration":234.754,"subset":"validation","resolution":"852x480","url":"https://www.youtube.com/watch?v=EXr5QXCpkYY","annotations":[{"segment":[0,234.721],"label":"Installing carpet"}]},"85HxidMLf5Y":{"duration":111.18,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=85HxidMLf5Y","annotations":[{"segment":[10.666761578003122,103.97924497581904],"label":"Polishing forniture"}]},"QjoEYtjPa3w":{"duration":131.124,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=QjoEYtjPa3w","annotations":[{"segment":[23.85908375795399,39.96906549040851],"label":"Putting in contact lenses"},{"segment":[62.80853326300225,65.86739055397463],"label":"Putting in contact lenses"}]},"cCqjsuJa2vk":{"duration":49.923,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=cCqjsuJa2vk","annotations":[{"segment":[0.01,48.85544735087532],"label":"Baton twirling"}]},"suL3ZeuQ3DA":{"duration":24.13,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=suL3ZeuQ3DA","annotations":[{"segment":[4.497661395475819,20.98281362324493],"label":"Playing kickball"}]},"5cNNxBAPxQI":{"duration":22.94,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=5cNNxBAPxQI","annotations":[]},"s3HhNlysFDs":{"duration":187.687,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s3HhNlysFDs","annotations":[{"segment":[16.82319218580511,169.39885521232617],"label":"Swimming"}]},"g9b-kCC79kQ":{"duration":133.655,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g9b-kCC79kQ","annotations":[{"segment":[19.904673166926678,23.0310616224649],"label":"Playing blackjack"},{"segment":[42.206244149765986,48.87587285491419],"label":"Playing blackjack"},{"segment":[58.463464118564744,80.34818330733229],"label":"Playing blackjack"},{"segment":[81.80716458658345,85.78009594383775],"label":"Playing blackjack"},{"segment":[96.82832215288612,99.74672152886116],"label":"Playing blackjack"},{"segment":[115.17254680187207,121.42625975039003],"label":"Playing blackjack"}]},"ZVNRQ_MPZAs":{"duration":206.66,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZVNRQ_MPZAs","annotations":[{"segment":[39.65495876977936,147.33590372186316],"label":"Doing motocross"}]},"6SOluodeJ7s":{"duration":17.067,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6SOluodeJ7s","annotations":[{"segment":[9.953065580345301,11.465930618336895],"label":"Sumo"}]},"JTGuJPi88Wk":{"duration":93.484,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JTGuJPi88Wk","annotations":[]},"wUvC0TXK1PM":{"duration":81.27,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=wUvC0TXK1PM","annotations":[{"segment":[8.238166926677067,25.85517004680187],"label":"Cleaning windows"},{"segment":[31.17829329173167,76.04461778471139],"label":"Cleaning windows"}]},"l9SPN6pgNow":{"duration":204.99,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l9SPN6pgNow","annotations":[]},"IaSMwUYheOs":{"duration":170.156,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IaSMwUYheOs","annotations":[]},"zqXJkZgKmSk":{"duration":10.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zqXJkZgKmSk","annotations":[{"segment":[3.12562368,10.383773769110766],"label":"Playing kickball"}]},"6iTymXuGFhg":{"duration":121.603,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6iTymXuGFhg","annotations":[{"segment":[3.4135694227769116,107.54512792511701],"label":"Swinging at the playground"}]},"CgWVpLVd16o":{"duration":22.036,"subset":"validation","resolution":"800x480","url":"https://www.youtube.com/watch?v=CgWVpLVd16o","annotations":[{"segment":[1.6818237129485178,11.94438065522621],"label":"Fun sliding down"}]},"mCdKLZGnKxk":{"duration":75.047,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mCdKLZGnKxk","annotations":[{"segment":[46.47990034945398,74.846892],"label":"Fixing the roof"}]},"tKQHLe1UpwE":{"duration":18.34,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tKQHLe1UpwE","annotations":[]},"-yOwB5rvMAo":{"duration":72.075,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-yOwB5rvMAo","annotations":[{"segment":[0.01,57.04578471138846],"label":"Knitting"}]},"8HdfGmQaabM":{"duration":231.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8HdfGmQaabM","annotations":[{"segment":[22.132404822667,25.38330697792162],"label":"Preparing pasta"},{"segment":[129.41217594606965,225.85560655195687],"label":"Preparing pasta"}]},"ycA2gqWhPGk":{"duration":18.53,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ycA2gqWhPGk","annotations":[{"segment":[1.5898965834633385,2.6016489547581902],"label":"Washing face"}]},"wONwHYy59Tc":{"duration":191.124,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wONwHYy59Tc","annotations":[{"segment":[94.52138901727746,130.48707650118757],"label":"Blow-drying hair"}]},"kl_JsmJ84PI":{"duration":91.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kl_JsmJ84PI","annotations":[{"segment":[1.1411764539668412,4.99264698610493],"label":"Futsal"}]},"vT6bb2y34UE":{"duration":57.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vT6bb2y34UE","annotations":[{"segment":[15.225298586583463,55.67594394383776],"label":"Wrapping presents"}]},"3eF0whKP6EY":{"duration":234.986,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3eF0whKP6EY","annotations":[]},"KrwFx7kw2pY":{"duration":223.66,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KrwFx7kw2pY","annotations":[]},"3jReEwvx48A":{"duration":81.57,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=3jReEwvx48A","annotations":[]},"a2Y7AK4VD0o":{"duration":30.19,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=a2Y7AK4VD0o","annotations":[{"segment":[0,30.18594],"label":"Playing accordion"}]},"FzdLe1mjk7c":{"duration":48.228,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=FzdLe1mjk7c","annotations":[{"segment":[13.95082153670529,36.90217497994012],"label":"Table soccer"}]},"5cTnHXcD7DE":{"duration":215.946,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5cTnHXcD7DE","annotations":[{"segment":[6.3977831513260535,14.815918876755072],"label":"Running a marathon"},{"segment":[35.019444617784714,69.36543837753511],"label":"Running a marathon"},{"segment":[84.18135725429018,91.92604212168487],"label":"Running a marathon"},{"segment":[107.0786864274571,113.8131950078003],"label":"Running a marathon"},{"segment":[131.32291731669267,136.71052418096724],"label":"Running a marathon"},{"segment":[156.24059906396258,169.38858502340094],"label":"Running a marathon"}]},"rWDMssiL7hE":{"duration":138.95,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rWDMssiL7hE","annotations":[{"segment":[0.9751287004680188,3.1420813681747273],"label":"Brushing teeth"},{"segment":[8.99285357098284,11.15980623868955],"label":"Brushing teeth"},{"segment":[28.495427580343215,33.47941871606864],"label":"Brushing teeth"},{"segment":[39.54688618564743,51.89851639157566],"label":"Brushing teeth"},{"segment":[56.882507527301094,71.83448093447737],"label":"Brushing teeth"},{"segment":[79.41881527145085,83.10263480655227],"label":"Brushing teeth"},{"segment":[95.45426501248049,99.35477981435258],"label":"Brushing teeth"}]},"Z-zG845frso":{"duration":184.135,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z-zG845frso","annotations":[]},"c4ctwOucndQ":{"duration":117.865,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=c4ctwOucndQ","annotations":[{"segment":[3.861395141519947,112.16433335413416],"label":"Using the rowing machine"}]},"4BGapLta7Pk":{"duration":6.83,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=4BGapLta7Pk","annotations":[]},"MrVj3D-DuJI":{"duration":81.967,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=MrVj3D-DuJI","annotations":[{"segment":[14.334261386765245,79.3201129874475],"label":"Hula hoop"}]},"ymLLzMVY6V4":{"duration":101.57,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ymLLzMVY6V4","annotations":[]},"wbXUEc4sBAo":{"duration":88.35,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=wbXUEc4sBAo","annotations":[]},"9PuaRdU6ViM":{"duration":218.849,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9PuaRdU6ViM","annotations":[{"segment":[17.385127259626593,213.39391499071073],"label":"Making a lemonade"}]},"Ty0BvWyYPVA":{"duration":17.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ty0BvWyYPVA","annotations":[{"segment":[0.009999999999999998,14.385317570213543],"label":"Shuffleboard"}]},"UZ-Twd5hujM":{"duration":112.014,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UZ-Twd5hujM","annotations":[]},"BAgUUUOGaxI":{"duration":165.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BAgUUUOGaxI","annotations":[{"segment":[0,56.58446430000001],"label":"Playing accordion"},{"segment":[67.58585190000001,160.37386895475817],"label":"Playing accordion"}]},"6YvF2YHee5I":{"duration":85.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6YvF2YHee5I","annotations":[{"segment":[3.338078510140406,49.21785990000001],"label":"Shoveling snow"},{"segment":[53.43024555,81.4491156474259],"label":"Shoveling snow"}]},"cd_-gvb82Bs":{"duration":183.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cd_-gvb82Bs","annotations":[{"segment":[20.93876627013542,80.09207935424757],"label":"Cleaning windows"},{"segment":[134.97363302178545,173.5560862928135],"label":"Cleaning windows"}]},"gFv3PrFkeL0":{"duration":67.71,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gFv3PrFkeL0","annotations":[{"segment":[12.253180907332293,13.309489606240248],"label":"Ironing clothes"},{"segment":[51.23097189703588,58.62513278939157],"label":"Ironing clothes"}]},"nMK_jVy5ZuI":{"duration":183.53,"subset":"training","resolution":"2276x720","url":"https://www.youtube.com/watch?v=nMK_jVy5ZuI","annotations":[{"segment":[38.4605468,114.62549680000001],"label":"Long jump"},{"segment":[22.5796959,160.972113299532],"label":"Triple jump"}]},"N5x5VUK7Kx8":{"duration":71.657,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=N5x5VUK7Kx8","annotations":[{"segment":[0,71.621],"label":"Kite flying"}]},"EWlpD9yoALg":{"duration":25.635,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=EWlpD9yoALg","annotations":[]},"fcSJAHXHDvM":{"duration":73.33,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=fcSJAHXHDvM","annotations":[{"segment":[10.982132934296963,55.82584182215288],"label":"Tennis serve with ball bouncing"}]},"3ohvA6Raf4w":{"duration":231.32,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=3ohvA6Raf4w","annotations":[{"segment":[11.15241804,62.82312966],"label":"Cumbia"},{"segment":[74.61157320000001,139.00973328],"label":"Cumbia"}]},"UQFMy9Tz8dY":{"duration":210.81,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=UQFMy9Tz8dY","annotations":[]},"IRmk1AM-yJE":{"duration":158.639,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IRmk1AM-yJE","annotations":[{"segment":[0.2,153.4496116962617],"label":"Playing pool"}]},"xslLJNiCmKg":{"duration":148.051,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=xslLJNiCmKg","annotations":[]},"-PpuDhbMdoY":{"duration":56.17,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-PpuDhbMdoY","annotations":[]},"xywGFiZ0hqs":{"duration":89.722,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=xywGFiZ0hqs","annotations":[{"segment":[31.843577743109726,49.20007726023327],"label":"Putting in contact lenses"}]},"LIJBolW8k5o":{"duration":131.26,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LIJBolW8k5o","annotations":[{"segment":[8.086839313572543,36.74905460218409],"label":"Windsurfing"},{"segment":[40.43419656786271,108.81405304212169],"label":"Windsurfing"}]},"OqajYZVt1Wc":{"duration":75.698,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=OqajYZVt1Wc","annotations":[{"segment":[0.2,74.2445694227769],"label":"Knitting"}]},"UGd9cVzHQOU":{"duration":34.134,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=UGd9cVzHQOU","annotations":[{"segment":[3.4313798751950078,29.392827613104522],"label":"Blowing leaves"}]},"-qGmUrF_7v4":{"duration":204.963,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-qGmUrF_7v4","annotations":[{"segment":[11.899552935936185,17.64616619806347],"label":"Hitting a pinata"},{"segment":[33.289724522743285,37.75931261550895],"label":"Hitting a pinata"},{"segment":[46.059976216359466,50.52956430912513],"label":"Hitting a pinata"},{"segment":[55.956921278912006,63.619072295081715],"label":"Hitting a pinata"},{"segment":[70.0041981418898,76.7085802810383],"label":"Hitting a pinata"},{"segment":[109.91123468444037,118.8504108699717],"label":"Hitting a pinata"},{"segment":[125.23553671677978,131.30140627124746],"label":"Hitting a pinata"}]},"nqmw4BCi6io":{"duration":153.624,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=nqmw4BCi6io","annotations":[{"segment":[38.82521315546499,44.81675839550589],"label":"Mooping floor"},{"segment":[57.51883430439259,60.874099638815494],"label":"Mooping floor"},{"segment":[102.57525450950013,107.84781432073609],"label":"Mooping floor"}]},"ksvK_P-Eas4":{"duration":238.32999999999998,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ksvK_P-Eas4","annotations":[{"segment":[20.447043681747267,228.6351248049922],"label":"Belly dance"}]},"gaILpaBa7M8":{"duration":79.41,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gaILpaBa7M8","annotations":[{"segment":[14.608631520000005,79.412266],"label":"Ping-pong"}]},"zxr6UZKPDh4":{"duration":238.07,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zxr6UZKPDh4","annotations":[]},"5HLW2AI1Ink":{"duration":146.866,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5HLW2AI1Ink","annotations":[{"segment":[3.0644885109518176,139.59429692216676],"label":"Swimming"}]},"VuR8vLoumyE":{"duration":216.735,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VuR8vLoumyE","annotations":[]},"twM3ZBS1irA":{"duration":55.61,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=twM3ZBS1irA","annotations":[]},"m0_F34CgAco":{"duration":122.439,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m0_F34CgAco","annotations":[{"segment":[0.2,111.27114352574102],"label":"Baking cookies"}]},"UNKf15ALDh8":{"duration":218.547,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UNKf15ALDh8","annotations":[]},"0xJPQ1I8-e0":{"duration":27.981,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0xJPQ1I8-e0","annotations":[{"segment":[0.01,27.718141302652107],"label":"Canoeing"}]},"_gF1wR7uHIA":{"duration":34.65,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_gF1wR7uHIA","annotations":[]},"3OXh6OV2Zrc":{"duration":176.73,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3OXh6OV2Zrc","annotations":[{"segment":[111.66063114664585,159.63334675039002],"label":"Bungee jumping"}]},"VYjsuhjRu6A":{"duration":53.13,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=VYjsuhjRu6A","annotations":[]},"S_ONutgLuV8":{"duration":97.616,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=S_ONutgLuV8","annotations":[{"segment":[16.7516122413604,81.62603770358814],"label":"Surfing"}]},"Sw7iRtHxMSs":{"duration":90.791,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Sw7iRtHxMSs","annotations":[{"segment":[0.01,90.79004535147392],"label":"Beer pong"}]},"6S9NCdsUxfE":{"duration":49.97,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=6S9NCdsUxfE","annotations":[{"segment":[6.230390015600624,16.822053042121684],"label":"Waxing skis"},{"segment":[25.77823868954758,44.39152886115445],"label":"Waxing skis"}]},"HcPhIfhQLmA":{"duration":31.858,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=HcPhIfhQLmA","annotations":[{"segment":[15.407037614837925,27.136264736349453],"label":"Doing a powerbomb"}]},"BcMHGhxdMl4":{"duration":42.029,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BcMHGhxdMl4","annotations":[{"segment":[0.01,40.22503953219353],"label":"Riding bumper cars"}]},"C27eI-S1Vkw":{"duration":181.975,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C27eI-S1Vkw","annotations":[]},"BCRFFkvfB_Q":{"duration":24.59,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=BCRFFkvfB_Q","annotations":[{"segment":[15.721252362961339,24.541],"label":"Long jump"}]},"Ynmg-Br3m_4":{"duration":219.684,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ynmg-Br3m_4","annotations":[]},"aKJKZHvP53E":{"duration":142.548,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aKJKZHvP53E","annotations":[{"segment":[0.01,133.75466314253882],"label":"Playing blackjack"}]},"KrLVIvCoW90":{"duration":213.693,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KrLVIvCoW90","annotations":[{"segment":[37.671307658346336,210.35924915363964],"label":"Longboarding"}]},"MVVhOW7t3u0":{"duration":34.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MVVhOW7t3u0","annotations":[{"segment":[24.35914509578783,33.656943904967086],"label":"Javelin throw"}]},"dSob6koudZY":{"duration":40.71,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=dSob6koudZY","annotations":[]},"2Ot4ZPYpNwI":{"duration":207.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2Ot4ZPYpNwI","annotations":[{"segment":[2.2661733712948515,198.77577856786272],"label":"Playing harmonica"}]},"whcxrkF2hws":{"duration":205.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=whcxrkF2hws","annotations":[{"segment":[21.207004680187207,202.1091809672387],"label":"Volleyball"}]},"0wXKWpGzMJQ":{"duration":72.772,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0wXKWpGzMJQ","annotations":[]},"weKPXw4nxKA":{"duration":141.967,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=weKPXw4nxKA","annotations":[{"segment":[12.070500387362435,128.78891541575663],"label":"Playing congas"}]},"t-8wEopB3AQ":{"duration":194.723,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=t-8wEopB3AQ","annotations":[{"segment":[25.821199514647255,30.985439417576703],"label":"Scuba diving"},{"segment":[49.21216848673947,57.7179753856821],"label":"Scuba diving"},{"segment":[74.72958909828394,106.32258611544461],"label":"Scuba diving"},{"segment":[112.7019412823713,131.53622796567862],"label":"Scuba diving"}]},"9YnYdsmjkIY":{"duration":115.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9YnYdsmjkIY","annotations":[{"segment":[106.32406260218409,110.27529465834634],"label":"Playing harmonica"}]},"r2H-cnti8GI":{"duration":121.371,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=r2H-cnti8GI","annotations":[]},"lCuMD23jOxo":{"duration":157.8,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=lCuMD23jOxo","annotations":[{"segment":[10.958385335413416,15.144734789391576],"label":"Smoking hookah"},{"segment":[62.67211388455538,70.05978939157566],"label":"Smoking hookah"}]},"TxLBvNWVk-4":{"duration":60.813,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=TxLBvNWVk-4","annotations":[{"segment":[0.01,59.240635228650106],"label":"Playing ten pins"}]},"0tJ7eW6TKKM":{"duration":159.243,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0tJ7eW6TKKM","annotations":[{"segment":[0.2,123.22038317628706],"label":"Running a marathon"}]},"ie9PgZt9svA":{"duration":207.053,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ie9PgZt9svA","annotations":[{"segment":[0.01,200.9150796474259],"label":"Rope skipping"}]},"PB5RsoruLmE":{"duration":89.7,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PB5RsoruLmE","annotations":[]},"J9Sl_H-1BOA":{"duration":15.14,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=J9Sl_H-1BOA","annotations":[{"segment":[0.6849188783151325,14.52500379875195],"label":"Playing piano"}]},"YAjqqbSsFTg":{"duration":148.097,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YAjqqbSsFTg","annotations":[{"segment":[88.85043427281349,134.2944896270424],"label":"Spread mulch"}]},"AHd87Cn-NkU":{"duration":83.616,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=AHd87Cn-NkU","annotations":[{"segment":[42.86803276131046,75.05163182527302],"label":"Cleaning sink"}]},"4IC1_C_dtvk":{"duration":62.555,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4IC1_C_dtvk","annotations":[{"segment":[0.2,62.554307],"label":"Playing congas"}]},"rIh_pfcVySw":{"duration":51.108,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=rIh_pfcVySw","annotations":[]},"rI-Iq496Sgk":{"duration":62.764,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rI-Iq496Sgk","annotations":[{"segment":[0.01,48.27211192043682],"label":"Trimming branches or hedges"},{"segment":[52.580373430577225,61.588556588143526],"label":"Trimming branches or hedges"}]},"n6A8YtBAJX0":{"duration":179.816,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=n6A8YtBAJX0","annotations":[{"segment":[14.58441809672387,64.50800312012481],"label":"Shuffleboard"},{"segment":[82.45805616224649,179.781],"label":"Shuffleboard"}]},"FA9n39rLSpU":{"duration":85.98,"subset":"testing","resolution":"256x144","url":"https://www.youtube.com/watch?v=FA9n39rLSpU","annotations":[]},"V1AqyQ3qaQY":{"duration":197.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V1AqyQ3qaQY","annotations":[{"segment":[11.438481278834855,14.814228003844502],"label":"Archery"},{"segment":[32.6136203402931,39.978886134685624],"label":"Archery"},{"segment":[58.69893669543329,60.84713921879778],"label":"Archery"},{"segment":[66.98486071412488,72.20192398515292],"label":"Archery"},{"segment":[91.84263277019964,93.99083529356412],"label":"Archery"},{"segment":[101.96987323748937,103.50430361132113],"label":"Archery"},{"segment":[115.16597445244263,118.54172127487254],"label":"Archery"},{"segment":[129.28273389169496,132.04470856459218],"label":"Archery"},{"segment":[147.6958983776763,151.6854173496389],"label":"Archery"},{"segment":[150.15098697580711,151.6854173496389],"label":"Archery"},{"segment":[159.35756921879778,167.64349323748937],"label":"Archery"},{"segment":[173.47432865805013,177.4638476300127],"label":"Archery"}]},"0-zXxER0-g0":{"duration":105.024,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0-zXxER0-g0","annotations":[]},"rb-PUa4uGLQ":{"duration":52.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rb-PUa4uGLQ","annotations":[{"segment":[0,52.63963718820862],"label":"Playing accordion"}]},"0H_9zlnmW4U":{"duration":230.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0H_9zlnmW4U","annotations":[{"segment":[144.25717768,220.954257350234],"label":"Tai chi"},{"segment":[14.538826031981278,131.49732903999998],"label":"Tai chi"}]},"eFbZ0_TJLE0":{"duration":113.383,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=eFbZ0_TJLE0","annotations":[{"segment":[0.01,113.321],"label":"Sumo"}]},"Pmt3R5olRP0":{"duration":32.69,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Pmt3R5olRP0","annotations":[{"segment":[9.84381144305772,14.434189836193447],"label":"Drinking coffee"},{"segment":[27.848295585023397,29.225409102964118],"label":"Drinking coffee"}]},"W06Ojnv-Wxg":{"duration":44.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W06Ojnv-Wxg","annotations":[{"segment":[0.9640754773048066,20.943138859999998],"label":"Using parallel bars"},{"segment":[23.36780399,41.248656656786274],"label":"Using parallel bars"}]},"RNxHKDlWAGQ":{"duration":192.679,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=RNxHKDlWAGQ","annotations":[{"segment":[10.520704256741698,183.96202712012482],"label":"BMX"}]},"r8qq4rU0tPE":{"duration":180.814,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r8qq4rU0tPE","annotations":[{"segment":[149.91004142657883,157.23271807766918],"label":"Skiing"}]},"_rDRANqiA-0":{"duration":129.986,"subset":"testing","resolution":"423x240","url":"https://www.youtube.com/watch?v=_rDRANqiA-0","annotations":[]},"mXqmRAUsn9U":{"duration":192.842,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mXqmRAUsn9U","annotations":[]},"jYA_3BMF2d4":{"duration":186.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jYA_3BMF2d4","annotations":[{"segment":[18.334742589703588,175.4896790728772],"label":"Tai chi"}]},"LxLvhGHwca8":{"duration":154.901,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LxLvhGHwca8","annotations":[{"segment":[0.01,135.35327884185648],"label":"Powerbocking"}]},"9RAW6QibWRs":{"duration":47.02,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9RAW6QibWRs","annotations":[{"segment":[0,47.02040816326531],"label":"Hand washing clothes"}]},"Zfak-LkgQfQ":{"duration":132.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zfak-LkgQfQ","annotations":[{"segment":[15.856474258970358,114.4155516],"label":"Kayaking"}]},"BMy7CBVxH5M":{"duration":16.49,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=BMy7CBVxH5M","annotations":[]},"gitnWhi2idI":{"duration":62.9,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gitnWhi2idI","annotations":[]},"byiKkWjH-nw":{"duration":105.84,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=byiKkWjH-nw","annotations":[]},"BOckSaGr-uI":{"duration":135.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BOckSaGr-uI","annotations":[{"segment":[92.72958894596617,123.288204544722],"label":"Painting"}]},"bmc11KqVbEE":{"duration":97.99000000000001,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=bmc11KqVbEE","annotations":[{"segment":[41.00895788,96.33731733697348],"label":"Cleaning windows"},{"segment":[8.938513979719188,40.56710190795632],"label":"Cleaning windows"}]},"6iBXtHrJ4gc":{"duration":145.473,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6iBXtHrJ4gc","annotations":[{"segment":[95.9982292698908,111.88446106396258],"label":"Trimming branches or hedges"},{"segment":[134.80602408112324,138.89105511388456],"label":"Trimming branches or hedges"}]},"kD_EJjQCLoY":{"duration":192.169,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kD_EJjQCLoY","annotations":[]},"pu-2w-UxdYg":{"duration":135.721,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pu-2w-UxdYg","annotations":[{"segment":[3.4669984615151592,132.84522169516],"label":"Painting furniture"}]},"m6H1tLAkyjQ":{"duration":24.056,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=m6H1tLAkyjQ","annotations":[{"segment":[0.2,23.855873015873016],"label":"Surfing"}]},"BiZF6o-AU64":{"duration":140.899,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=BiZF6o-AU64","annotations":[{"segment":[7.473562779245864,129.24867261154446],"label":"Playing pool"}]},"EpLRCz5dwb0":{"duration":32.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EpLRCz5dwb0","annotations":[{"segment":[3.2576287051482056,26.881653666146647],"label":"Javelin throw"}]},"DR2sW3Wd8fo":{"duration":179.4,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DR2sW3Wd8fo","annotations":[{"segment":[17.352009862707433,24.068916906336113],"label":"Ballet"},{"segment":[27.707241554968324,36.38324648632204],"label":"Ballet"},{"segment":[39.18195775450066,43.10015352995072],"label":"Ballet"},{"segment":[55.974225363572366,74.72559086036911],"label":"Ballet"},{"segment":[78.92365776263703,90.95811621580508],"label":"Ballet"},{"segment":[98.51463663988737,108.86986833214824],"label":"Ballet"},{"segment":[139.9355634089309,151.97002186209897],"label":"Ballet"}]},"lgwDuuJy2zY":{"duration":151.91,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lgwDuuJy2zY","annotations":[{"segment":[0.2,9.71622882330259],"label":"Kayaking"},{"segment":[91.23775846271946,138.16003396099845],"label":"Kayaking"}]},"2belnHaa36g":{"duration":22.71,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2belnHaa36g","annotations":[{"segment":[0.2,18.77221684867395],"label":"Playing drums"}]},"PFn7a6eEhb4":{"duration":111.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PFn7a6eEhb4","annotations":[{"segment":[13.841943256663384,19.246228303392357],"label":"Bullfighting"},{"segment":[25.34784045292507,28.48581241554189],"label":"Bullfighting"},{"segment":[41.21203204171012,46.441985312738154],"label":"Bullfighting"},{"segment":[76.9500460604017,79.21635914451386],"label":"Bullfighting"},{"segment":[82.17999933142974,86.18963017255125],"label":"Bullfighting"},{"segment":[91.24525166787835,93.51156475199049],"label":"Bullfighting"}]},"etfSrbmOvUE":{"duration":188.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=etfSrbmOvUE","annotations":[{"segment":[7.546480000000002,153.97815912636506],"label":"Rock climbing"}]},"67NwPB79MTo":{"duration":34.44,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=67NwPB79MTo","annotations":[{"segment":[3.0909097200000004,28.33368577],"label":"Dodgeball"}]},"xckyFTyrkmw":{"duration":172.525,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xckyFTyrkmw","annotations":[]},"4T8uFygBeNo":{"duration":221.959,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4T8uFygBeNo","annotations":[{"segment":[26.26865420560748,210.8595171339564],"label":"Longboarding"}]},"-voGnJbk3CI":{"duration":77.07,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-voGnJbk3CI","annotations":[{"segment":[13.758041341653666,25.9464976],"label":"Plataform diving"},{"segment":[29.553344199999998,38.6406887675507],"label":"Plataform diving"},{"segment":[41.64540795631825,48.37597893915757],"label":"Plataform diving"},{"segment":[67.36580421216848,70.61090093603744],"label":"Plataform diving"}]},"4BRCZaaITn0":{"duration":25.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4BRCZaaITn0","annotations":[{"segment":[0.24016893603744152,25.337822751950082],"label":"Mowing the lawn"}]},"3LyyqeVeYkI":{"duration":16.626,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3LyyqeVeYkI","annotations":[{"segment":[0.9855982340093603,13.175892180967239],"label":"Calf roping"}]},"12qA1Z4EsUI":{"duration":209.26,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=12qA1Z4EsUI","annotations":[]},"f1HMhfSlPSw":{"duration":202.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=f1HMhfSlPSw","annotations":[{"segment":[10.40246139641504,193.5488271939041],"label":"Tumbling"}]},"t9UsqWspqZM":{"duration":82.76,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t9UsqWspqZM","annotations":[{"segment":[1.5813276169797714,79.66376686931622],"label":"Playing violin"}]},"WxefBZ7Iaow":{"duration":151.58,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=WxefBZ7Iaow","annotations":[{"segment":[67.13717098413524,128.58274951767658],"label":"Rafting"},{"segment":[24.1252660106563,28.851848974774867],"label":"Rafting"},{"segment":[33.3421027906875,58.86565079692775],"label":"Rafting"}]},"2KsL5MBiZcc":{"duration":32.14,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=2KsL5MBiZcc","annotations":[{"segment":[0.8748179836193447,31.868369403276127],"label":"Playing field hockey"}]},"6DLfwAf8new":{"duration":212.93,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6DLfwAf8new","annotations":[]},"E7rhlhVA0SY":{"duration":234.011,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=E7rhlhVA0SY","annotations":[{"segment":[60.413304891379326,106.77275940017194],"label":"Making a cake"}]},"hC8ZZZhabp4":{"duration":226.743,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=hC8ZZZhabp4","annotations":[]},"c4f6d1GRDEw":{"duration":179.05,"subset":"training","resolution":"423x240","url":"https://www.youtube.com/watch?v=c4f6d1GRDEw","annotations":[{"segment":[66.88102886115445,134.73944227769113],"label":"Waxing skis"}]},"0k6GFx2ZCg8":{"duration":48.95,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0k6GFx2ZCg8","annotations":[]},"A0F6uT95xhA":{"duration":76.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A0F6uT95xhA","annotations":[{"segment":[16.80498361934477,75.41311154446178],"label":"Breakdancing"}]},"h61C_KfvG0Y":{"duration":125.992,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h61C_KfvG0Y","annotations":[{"segment":[17.689910405616224,113.41198115600623],"label":"Fixing the roof"}]},"GqnJBjH_b24":{"duration":220.22,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=GqnJBjH_b24","annotations":[]},"UgSLUt8X1Lc":{"duration":65.992,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UgSLUt8X1Lc","annotations":[{"segment":[34.78130733229329,53.71551014040561],"label":"Mooping floor"}]},"RXT17X7lRoQ":{"duration":123.6,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RXT17X7lRoQ","annotations":[]},"xld0M1MXNiE":{"duration":6.59,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=xld0M1MXNiE","annotations":[{"segment":[1.0319610000000001,2.8530919200000002],"label":"Smoking hookah"}]},"rYumFn7S1YA":{"duration":70.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rYumFn7S1YA","annotations":[{"segment":[12.975108899999999,61.062311205148205],"label":"Using the balance beam"}]},"jVM8v6uJx8c":{"duration":85.96000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jVM8v6uJx8c","annotations":[{"segment":[25.211437061564094,50.42287347269891],"label":"Rock climbing"},{"segment":[54.98238862714509,71.07479505460219],"label":"Rock climbing"}]},"b3oobKr2RQ4":{"duration":212.277,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b3oobKr2RQ4","annotations":[]},"O0hvsRrzHNI":{"duration":170.435,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O0hvsRrzHNI","annotations":[]},"VWAIjYs9Lws":{"duration":64.018,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VWAIjYs9Lws","annotations":[]},"PCSlAOYPMOs":{"duration":88.51,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PCSlAOYPMOs","annotations":[{"segment":[1.2427928143525742,64.76331443681747],"label":"Playing badminton"}]},"MhWIIdVyWVs":{"duration":238.56,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=MhWIIdVyWVs","annotations":[{"segment":[17.119839890194246,191.66777268369646],"label":"Tai chi"}]},"OkEqcSlWMJg":{"duration":213.368,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OkEqcSlWMJg","annotations":[{"segment":[31.951816782875397,47.262062324669856],"label":"Swimming"},{"segment":[95.18978749898294,116.49099868756655],"label":"Swimming"},{"segment":[140.4548612747231,144.78166958658346],"label":"Swimming"},{"segment":[153.76811804368174,155.43227516536663],"label":"Swimming"},{"segment":[182.39162053666146,189.04824902340093],"label":"Swimming"},{"segment":[197.3690346318253,201.36301172386896],"label":"Swimming"}]},"4Sf9C_vtYIs":{"duration":14.14,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=4Sf9C_vtYIs","annotations":[{"segment":[0.01,13.41294686053042],"label":"Washing hands"}]},"rlHxg7nDKgI":{"duration":106.092,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rlHxg7nDKgI","annotations":[]},"-FbWTFJ3kV0":{"duration":49.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-FbWTFJ3kV0","annotations":[{"segment":[0.01,47.413979719188774],"label":"Sailing"}]},"rYcac4QmSms":{"duration":235.96,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rYcac4QmSms","annotations":[{"segment":[146.48448985959436,235.921],"label":"Bungee jumping"}]},"ngMxE3CnGVg":{"duration":216.364,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ngMxE3CnGVg","annotations":[]},"AR6_PW1um-I":{"duration":206.705,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AR6_PW1um-I","annotations":[{"segment":[11.92895007800312,196.34407020280813],"label":"Rollerblading"}]},"G1hRHCymRGE":{"duration":93.21000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G1hRHCymRGE","annotations":[{"segment":[6.107015823490083,87.97010805772231],"label":"Using the balance beam"}]},"zEyFuCsJh3Y":{"duration":61.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zEyFuCsJh3Y","annotations":[{"segment":[0.1210646476798607,55.19680185328734],"label":"Spinning"}]},"U7MAQwyFeDQ":{"duration":236.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U7MAQwyFeDQ","annotations":[{"segment":[65.40456274033274,73.87952016584065],"label":"Bungee jumping"},{"segment":[77.19580785408287,95.61962834431745],"label":"Bungee jumping"},{"segment":[107.04239704826287,121.41297703064585],"label":"Bungee jumping"},{"segment":[134.67812778361474,142.78460879931797],"label":"Bungee jumping"},{"segment":[192.89739915678626,205.79407340795632],"label":"Bungee jumping"}]},"470dhR3Yrjs":{"duration":180.28,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=470dhR3Yrjs","annotations":[{"segment":[12.093647886739468,175.77976579563182],"label":"Braiding hair"}]},"z1vuU-3I52Y":{"duration":167.718,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=z1vuU-3I52Y","annotations":[]},"8XxsgEw49p0":{"duration":190.03,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8XxsgEw49p0","annotations":[{"segment":[35.2789715023401,109.9873817425897],"label":"Painting"}]},"GFE2wAi9fjo":{"duration":17.113,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=GFE2wAi9fjo","annotations":[{"segment":[0.01,17.061],"label":"Putting on shoes"}]},"7DO3wdJxP8Y":{"duration":80.527,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=7DO3wdJxP8Y","annotations":[]},"Ocvdvqvu7I8":{"duration":114.61,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ocvdvqvu7I8","annotations":[{"segment":[0.01,65.4424665276131],"label":"Grooming horse"}]},"EEaLID3z9tM":{"duration":46.115,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EEaLID3z9tM","annotations":[{"segment":[1.870476627145086,38.344770856474256],"label":"Gargling mouthwash"}]},"riYmZxbHN_0":{"duration":213.0,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=riYmZxbHN_0","annotations":[]},"K-t4tUTq_Ik":{"duration":12.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=K-t4tUTq_Ik","annotations":[{"segment":[0.4619344773790951,10.42792511700468],"label":"Playing badminton"}]},"2zl0O1VRDC4":{"duration":132.935,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2zl0O1VRDC4","annotations":[{"segment":[3.1107814108482708,62.008242789575526],"label":"Fixing the roof"}]},"q4M7SsGjFro":{"duration":168.972,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=q4M7SsGjFro","annotations":[{"segment":[20.034075420904838,158.16375332293293],"label":"Baton twirling"}]},"uQ20ZKxr6iM":{"duration":205.544,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uQ20ZKxr6iM","annotations":[]},"-5Q7iNtaWCU":{"duration":76.77,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-5Q7iNtaWCU","annotations":[{"segment":[0.01,58.81207722308891],"label":"Dodgeball"}]},"9IKaAzeBkG8":{"duration":58.422,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=9IKaAzeBkG8","annotations":[]},"C7UEAqu1KtY":{"duration":192.099,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=C7UEAqu1KtY","annotations":[{"segment":[61.103954752925034,88.62384883392191],"label":"Raking leaves"}]},"xwSHzGCP6iA":{"duration":79.13,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xwSHzGCP6iA","annotations":[{"segment":[14.252808112324491,37.08198127925117],"label":"Bungee jumping"}]},"9Cs3CYCMH2w":{"duration":67.129,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9Cs3CYCMH2w","annotations":[{"segment":[0,38.224718322066224],"label":"Rope skipping"},{"segment":[44.50823307332294,54.142956468018724],"label":"Rope skipping"}]},"RYl-eG9hasI":{"duration":201.549,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=RYl-eG9hasI","annotations":[{"segment":[40.24695533229329,177.65257627145084],"label":"Baton twirling"}]},"Db2Dnt2OOo0":{"duration":128.22,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Db2Dnt2OOo0","annotations":[]},"WFL8DhccHr0":{"duration":237.332,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WFL8DhccHr0","annotations":[{"segment":[33.32256041262058,206.59987345085804],"label":"Making a cake"}]},"haoaTAxA6n0":{"duration":34.296,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=haoaTAxA6n0","annotations":[]},"VmjCJxgPSiE":{"duration":44.72,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=VmjCJxgPSiE","annotations":[{"segment":[0.13947269890795633,43.79442745709829],"label":"Starting a campfire"}]},"BWqsgYhgUbI":{"duration":70.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BWqsgYhgUbI","annotations":[{"segment":[0.9396616,57.34042327145087],"label":"Starting a campfire"}]},"BgT03Dy-jD4":{"duration":151.627,"subset":"testing","resolution":"540x360","url":"https://www.youtube.com/watch?v=BgT03Dy-jD4","annotations":[]},"sODu6d-3zAQ":{"duration":89.885,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sODu6d-3zAQ","annotations":[{"segment":[35.16474520611931,49.14366268145677],"label":"Hula hoop"}]},"a68k87VXX1c":{"duration":126.71,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=a68k87VXX1c","annotations":[{"segment":[0,126.711292],"label":"Cumbia"}]},"DuDF7hkrjYo":{"duration":148.77,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DuDF7hkrjYo","annotations":[{"segment":[43.96045241809672,144.64032761310452],"label":"Polishing forniture"}]},"VufybWyvP98":{"duration":95.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VufybWyvP98","annotations":[{"segment":[12.437461778471137,16.608107644305772],"label":"Playing water polo"}]},"iAQY-FHckIM":{"duration":75.837,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iAQY-FHckIM","annotations":[{"segment":[9.199386907636377,36.32578883800109],"label":"Putting in contact lenses"},{"segment":[57.555147002853346,58.852496668483205],"label":"Putting in contact lenses"}]},"dWyE0o2NetQ":{"duration":187.15,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=dWyE0o2NetQ","annotations":[{"segment":[22.621899375975037,100.55799141965677],"label":"Tumbling"}]},"xuvp0jKMqRU":{"duration":80.736,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xuvp0jKMqRU","annotations":[]},"j2ESEJmy7aA":{"duration":75.071,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j2ESEJmy7aA","annotations":[{"segment":[10.419109204368175,13.93116848673947],"label":"Grooming dog"},{"segment":[20.13580655226209,22.594248049921994],"label":"Grooming dog"},{"segment":[25.75510140405616,31.14025897035881],"label":"Grooming dog"},{"segment":[34.65231825273011,37.34489703588143],"label":"Grooming dog"},{"segment":[43.783672386895475,47.41280031201248],"label":"Grooming dog"}]},"hmPeCPjaxAM":{"duration":150.257,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hmPeCPjaxAM","annotations":[{"segment":[0.01,81.80882676939731],"label":"Shuffleboard"},{"segment":[111.34439173485308,150.2563264],"label":"Shuffleboard"}]},"eChLCFAGyx0":{"duration":196.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eChLCFAGyx0","annotations":[{"segment":[13.206903276131044,28.56376755070203],"label":"Playing flauta"},{"segment":[47.29914196567863,55.591848673946956],"label":"Playing flauta"},{"segment":[114.56220748829952,120.3978159126365],"label":"Playing flauta"},{"segment":[170.4611934477379,178.1396255850234],"label":"Playing flauta"}]},"AO-0r8H2DOo":{"duration":51.46,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=AO-0r8H2DOo","annotations":[{"segment":[14.404172509999999,42.81902226],"label":"Shaving legs"}]},"7y31RbZFHGk":{"duration":28.793,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=7y31RbZFHGk","annotations":[]},"iRp-pWnPoYw":{"duration":187.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iRp-pWnPoYw","annotations":[{"segment":[0.01,167.19889703588143],"label":"Longboarding"}]},"2tf414bkudE":{"duration":192.656,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2tf414bkudE","annotations":[{"segment":[15.178043943526449,34.413584782847096],"label":"Sharpening knives"},{"segment":[46.4357978074225,134.7990635380517],"label":"Sharpening knives"}]},"4XGQR2VmWpw":{"duration":188.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4XGQR2VmWpw","annotations":[{"segment":[30.212743516543384,41.11880702983221],"label":"Doing motocross"},{"segment":[43.77163328982139,96.23864154294063],"label":"Doing motocross"},{"segment":[101.54429406291896,155.779853156031],"label":"Doing motocross"},{"segment":[161.6750214196568,174.0548772074883],"label":"Doing motocross"}]},"vL8s-b4eJiU":{"duration":192.285,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vL8s-b4eJiU","annotations":[{"segment":[9.704075150471613,178.92636532959935],"label":"Hanging wallpaper"}]},"Np4nVolWFxI":{"duration":62.04,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Np4nVolWFxI","annotations":[]},"cgIv0CwYHrA":{"duration":197.277,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=cgIv0CwYHrA","annotations":[]},"6hm-Y7CyhMU":{"duration":24.614,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=6hm-Y7CyhMU","annotations":[]},"wfLV0MnRVZM":{"duration":158.17000000000002,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wfLV0MnRVZM","annotations":[]},"icjyAZ3KVBw":{"duration":62.996,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=icjyAZ3KVBw","annotations":[{"segment":[43.34013911232449,55.5264820826833],"label":"Elliptical trainer"}]},"jmxzDxfSbZM":{"duration":194.491,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jmxzDxfSbZM","annotations":[{"segment":[91.3285367223089,152.92220102340093],"label":"Decorating the Christmas tree"}]},"36K1e9WgEpU":{"duration":110.667,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=36K1e9WgEpU","annotations":[{"segment":[0.2,110.42099999999999],"label":"River tubing"}]},"NSdBL6HqlWk":{"duration":190.613,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NSdBL6HqlWk","annotations":[{"segment":[0.2,179.34778183745325],"label":"Drum corps"}]},"78UikTQkGgg":{"duration":180.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=78UikTQkGgg","annotations":[{"segment":[30.105124009041997,135.33237857410296],"label":"Making a sandwich"}]},"eg2s5lOfKGI":{"duration":86.47,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=eg2s5lOfKGI","annotations":[]},"CZZE6SDPBmU":{"duration":14.768,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CZZE6SDPBmU","annotations":[]},"k6AzbT12a9c":{"duration":8.151,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=k6AzbT12a9c","annotations":[{"segment":[0.01,7.38088479226826],"label":"Kneeling"}]},"zW8qDxX6Nmw":{"duration":76.835,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=zW8qDxX6Nmw","annotations":[]},"hcRf6HmOAzo":{"duration":172.64,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hcRf6HmOAzo","annotations":[{"segment":[0,129.5180670826833],"label":"Hand washing clothes"}]},"hIJ6VTEKji8":{"duration":17.65,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hIJ6VTEKji8","annotations":[{"segment":[0.8809816661466459,16.38302512],"label":"Hopscotch"}]},"RAw8sshR51c":{"duration":86.43,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RAw8sshR51c","annotations":[{"segment":[7.280705530262027,11.595197696343227],"label":"Snatch"},{"segment":[13.213132258623679,18.60624746622518],"label":"Snatch"},{"segment":[23.99936267382668,30.20144516256841],"label":"Snatch"},{"segment":[37.75180645321051,46.38079078537291],"label":"Snatch"},{"segment":[51.639077992199695,62.155652622464906],"label":"Snatch"}]},"j-rX65T0aBo":{"duration":27.45,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=j-rX65T0aBo","annotations":[]},"RgLFbpTUT7U":{"duration":8.17,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=RgLFbpTUT7U","annotations":[]},"xfNYfCAlkM4":{"duration":38.615,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xfNYfCAlkM4","annotations":[{"segment":[6.19944305772231,12.45907488299532],"label":"Gargling mouthwash"},{"segment":[18.177007800312012,26.001547581903278],"label":"Gargling mouthwash"}]},"r7kQ1sko74M":{"duration":148.864,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r7kQ1sko74M","annotations":[{"segment":[0.010000000000000002,64.06022771771933],"label":"Playing drums"},{"segment":[87.71128902697825,130.83969375003272],"label":"Playing drums"}]},"9Dr3VuYqkx8":{"duration":222.31,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9Dr3VuYqkx8","annotations":[]},"FtAQcl1h7O8":{"duration":115.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FtAQcl1h7O8","annotations":[{"segment":[36.025273010920436,39.26754758190327],"label":"Spinning"},{"segment":[44.31108580343214,47.91361310452418],"label":"Spinning"},{"segment":[66.6467550702028,68.62814508580344],"label":"Spinning"}]},"lRRBeGRTpdc":{"duration":200.627,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lRRBeGRTpdc","annotations":[{"segment":[16.963763281346807,19.776849618685482],"label":"Hula hoop"},{"segment":[26.028152590549197,31.029194968040176],"label":"Hula hoop"},{"segment":[43.844366060360805,62.910840124545146],"label":"Hula hoop"},{"segment":[83.85270508028862,87.60348686340683],"label":"Hula hoop"}]},"rqRDdzQ4RjM":{"duration":156.06,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=rqRDdzQ4RjM","annotations":[]},"AtZEyAZuSVM":{"duration":72.68,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AtZEyAZuSVM","annotations":[]},"FrRKm_V0lZU":{"duration":40.01,"subset":"validation","resolution":"644x480","url":"https://www.youtube.com/watch?v=FrRKm_V0lZU","annotations":[{"segment":[8.80050791700468,11.546765706708268],"label":"Drinking beer"}]},"hj7rkE0fPsE":{"duration":47.0,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hj7rkE0fPsE","annotations":[{"segment":[30.427195951638062,42.8913485101404],"label":"Brushing hair"}]},"mplyiAfIOCc":{"duration":85.798,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mplyiAfIOCc","annotations":[]},"r4QPhlrqqP4":{"duration":141.385,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r4QPhlrqqP4","annotations":[{"segment":[20.95646645865835,115.37086271450859],"label":"Spread mulch"}]},"eMI2x3HFozQ":{"duration":40.984,"subset":"validation","resolution":"800x480","url":"https://www.youtube.com/watch?v=eMI2x3HFozQ","annotations":[{"segment":[0.01,27.812324906396256],"label":"Arm wrestling"}]},"0Mn0gxECBLA":{"duration":63.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0Mn0gxECBLA","annotations":[{"segment":[10.9142822,43.01993293],"label":"Mowing the lawn"}]},"yAkVtmP7654":{"duration":216.619,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yAkVtmP7654","annotations":[{"segment":[0.01,216.61895691609976],"label":"Removing ice from car"}]},"O1z0Q-3OUg0":{"duration":151.12,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=O1z0Q-3OUg0","annotations":[{"segment":[11.315973416536663,137.44192712168487],"label":"Preparing salad"}]},"hQXWnoipdFE":{"duration":153.82999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hQXWnoipdFE","annotations":[{"segment":[41.039279678510994,134.4885944915951],"label":"Rock climbing"}]},"8AP2he781Cw":{"duration":51.223,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=8AP2he781Cw","annotations":[{"segment":[0.2,51.201],"label":"Playing ice hockey"}]},"VsFHNZHhNwI":{"duration":23.62,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=VsFHNZHhNwI","annotations":[{"segment":[0.01,23.61469387755102],"label":"Bathing dog"}]},"dNgXkPmvU-c":{"duration":172.966,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dNgXkPmvU-c","annotations":[{"segment":[28.32231669266771,40.19071606864275],"label":"Putting on shoes"},{"segment":[61.49988767550702,67.16435101404056],"label":"Putting on shoes"}]},"b1QkoG9hxk8":{"duration":77.53,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=b1QkoG9hxk8","annotations":[{"segment":[25.09568055,70.79069500780032],"label":"Mixing drinks"}]},"PlAVnu-ueM4":{"duration":120.02,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PlAVnu-ueM4","annotations":[{"segment":[4.306631375975039,8.42601790951638],"label":"Snatch"},{"segment":[73.77446791887675,80.51528224648986],"label":"Snatch"}]},"EP9Ul7UdzYI":{"duration":211.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EP9Ul7UdzYI","annotations":[{"segment":[37.01325567715729,48.22212817735155],"label":"Skiing"},{"segment":[107.8928906048563,109.87092692841999],"label":"Skiing"},{"segment":[126.35456295811744,131.95899920821458],"label":"Skiing"}]},"C4V6fqELvPY":{"duration":1.579,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=C4V6fqELvPY","annotations":[{"segment":[0,1.578955],"label":"Using the monkey bar"}]},"8YKUwWUU-O0":{"duration":195.837,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=8YKUwWUU-O0","annotations":[{"segment":[16.340535881435258,89.9493049921997],"label":"Tug of war"},{"segment":[101.86109750390015,172.43317394695785],"label":"Tug of war"}]},"QFxe74w7aNc":{"duration":118.771,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QFxe74w7aNc","annotations":[{"segment":[6.666514820592823,104.08923868954759],"label":"Drum corps"}]},"a1ltYmbbBYI":{"duration":31.662,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=a1ltYmbbBYI","annotations":[{"segment":[0.01,31.059223439784066],"label":"Sumo"}]},"AA1y6Zg5fIc":{"duration":230.55,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AA1y6Zg5fIc","annotations":[]},"YLT7YEwUCwI":{"duration":230.086,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YLT7YEwUCwI","annotations":[{"segment":[17.89163754145908,39.719442003356434],"label":"Swimming"},{"segment":[45.802600623885205,85.52204808733778],"label":"Swimming"},{"segment":[135.97648135172346,174.26459737505164],"label":"Swimming"},{"segment":[179.27425741548709,191.7984075165757],"label":"Swimming"},{"segment":[193.94540467676234,196.80806755701119],"label":"Swimming"}]},"gIhVeU8xbrs":{"duration":170.16,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gIhVeU8xbrs","annotations":[{"segment":[9.819776911076444,166.93620748829954],"label":"Snatch"}]},"fcJg9MY4OEo":{"duration":34.25,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fcJg9MY4OEo","annotations":[]},"Z2b84JjYdtw":{"duration":30.349,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z2b84JjYdtw","annotations":[{"segment":[0.01,0.02],"label":"Drum corps"}]},"p-HvSweux4c":{"duration":152.718,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=p-HvSweux4c","annotations":[]},"LTBrHLqhRMs":{"duration":66.22,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=LTBrHLqhRMs","annotations":[{"segment":[54.65230961154446,61.98749672386895],"label":"Volleyball"}]},"ijHyZJCmZJM":{"duration":225.722,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ijHyZJCmZJM","annotations":[]},"4KgIfmPLuAY":{"duration":72.633,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=4KgIfmPLuAY","annotations":[{"segment":[0.01,71.43255108197395],"label":"Kneeling"}]},"We9NR-CaIgM":{"duration":180.512,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=We9NR-CaIgM","annotations":[{"segment":[0.01,180.511903],"label":"Capoeira"}]},"KUorCsuIe2A":{"duration":196.46,"subset":"training","resolution":"448x336","url":"https://www.youtube.com/watch?v=KUorCsuIe2A","annotations":[{"segment":[75.24480640722227,99.7644982722309],"label":"Breakdancing"}]},"kd_XmN-fncQ":{"duration":202.896,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kd_XmN-fncQ","annotations":[]},"rduaWxRvcw0":{"duration":236.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rduaWxRvcw0","annotations":[{"segment":[63.36548123149415,95.78502976853767],"label":"Removing curlers"},{"segment":[97.25864561113056,113.46841987965232],"label":"Removing curlers"},{"segment":[120.46809513196854,137.04627336113853],"label":"Removing curlers"},{"segment":[141.46712088891718,147.72998821993696],"label":"Removing curlers"},{"segment":[154.36125951160494,183.46517240281446],"label":"Removing curlers"},{"segment":[186.04400012735204,187.88601993059314],"label":"Removing curlers"}]},"RAmQyeaBu-k":{"duration":172.9,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=RAmQyeaBu-k","annotations":[{"segment":[46.38391887675506,147.51164898595943],"label":"Rock climbing"}]},"N_um3L3w1uQ":{"duration":39.591,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=N_um3L3w1uQ","annotations":[{"segment":[0.20000000000000004,38.678735202492206],"label":"Powerbocking"}]},"SsjGQDAAuT8":{"duration":98.11,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=SsjGQDAAuT8","annotations":[]},"O9qI26TB3R0":{"duration":157.9,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=O9qI26TB3R0","annotations":[{"segment":[16.25759070046802,154.4471117620215],"label":"Bathing dog"}]},"9XyrLUWZl40":{"duration":20.132,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=9XyrLUWZl40","annotations":[{"segment":[0.4201943883466275,1.235496840060023],"label":"Playing rubik cube"},{"segment":[5.50015581825317,8.134209893019525],"label":"Playing rubik cube"},{"segment":[13.841327055013295,17.133894648471237],"label":"Playing rubik cube"}]},"Lml6HGhM4yg":{"duration":44.4,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lml6HGhM4yg","annotations":[{"segment":[7.853633385335414,9.375923556942277],"label":"Tennis serve with ball bouncing"}]},"aJbl1eVBzk0":{"duration":187.827,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aJbl1eVBzk0","annotations":[{"segment":[11.574314226990849,165.4101362312996],"label":"Assembling bicycle"}]},"wzp6sPsgTDY":{"duration":106.627,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wzp6sPsgTDY","annotations":[]},"q-RNYziOHjE":{"duration":142.989,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q-RNYziOHjE","annotations":[{"segment":[33.68367113460049,37.698943190380675],"label":"Blow-drying hair"},{"segment":[43.05263926475426,55.09845543209484],"label":"Blow-drying hair"},{"segment":[74.50560370169909,80.75158245513495],"label":"Blow-drying hair"},{"segment":[83.6515011620873,85.88220785974296],"label":"Blow-drying hair"},{"segment":[89.67440924575759,97.4818817878315],"label":"Blow-drying hair"},{"segment":[110.41998051482058,120.01201922620903],"label":"Blow-drying hair"}]},"bG7hnpAeja0":{"duration":200.265,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bG7hnpAeja0","annotations":[{"segment":[24.681152886115445,153.085631825273],"label":"Sumo"}]},"d6lajCxMhPE":{"duration":38.081,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=d6lajCxMhPE","annotations":[{"segment":[7.125304212168487,36.101541341653665],"label":"Mooping floor"}]},"NH0YKVu9F6U":{"duration":177.796,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=NH0YKVu9F6U","annotations":[]},"6gk4PrePHmQ":{"duration":66.34,"subset":"testing","resolution":"192x144","url":"https://www.youtube.com/watch?v=6gk4PrePHmQ","annotations":[]},"c9mrRiwmhZw":{"duration":183.72,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=c9mrRiwmhZw","annotations":[{"segment":[38.978809784711395,169.6724661216849],"label":"Wrapping presents"}]},"L149Uf5V7K0":{"duration":137.253,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L149Uf5V7K0","annotations":[{"segment":[0.01,126.22962558502338],"label":"Rope skipping"}]},"6tveUnZe7WA":{"duration":50.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6tveUnZe7WA","annotations":[{"segment":[0.3158780392032008,48.17139928237129],"label":"Playing bagpipes"}]},"3W92sMbvoaE":{"duration":29.699,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3W92sMbvoaE","annotations":[{"segment":[0.01,29.561],"label":"Snow tubing"}]},"TmI9MxCDBMw":{"duration":191.937,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TmI9MxCDBMw","annotations":[{"segment":[12.27672691125332,20.960265458237377],"label":"River tubing"},{"segment":[27.547777459397697,172.77247373634947],"label":"River tubing"}]},"X5kj4fZ38rc":{"duration":217.32,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=X5kj4fZ38rc","annotations":[]},"hJiaSHwOkcs":{"duration":82.664,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hJiaSHwOkcs","annotations":[{"segment":[0.2,80.08384707800313],"label":"Bullfighting"}]},"KlheP4IiS8w":{"duration":211.395,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KlheP4IiS8w","annotations":[{"segment":[0.01,202.2703142040439],"label":"Disc dog"}]},"26KzUnEbTUs":{"duration":143.709,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=26KzUnEbTUs","annotations":[]},"13hdhdieZuc":{"duration":53.69,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=13hdhdieZuc","annotations":[{"segment":[14.740215263650546,44.220645790951636],"label":"Putting on makeup"}]},"KvtqF0LaWU0":{"duration":184.14,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=KvtqF0LaWU0","annotations":[]},"8K_gkiWH5EI":{"duration":224.746,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8K_gkiWH5EI","annotations":[{"segment":[2.804941541879362,7.713589240168246],"label":"Surfing"},{"segment":[10.869148474782529,43.476593899130116],"label":"Surfing"},{"segment":[45.229682362804716,62.409949306815804],"label":"Surfing"},{"segment":[66.96797931236976,90.1087470328745],"label":"Surfing"},{"segment":[91.51121780381419,104.48407243500624],"label":"Surfing"},{"segment":[107.63963166962053,120.26186860807765],"label":"Surfing"},{"segment":[123.41742784269194,137.44213555208873],"label":"Surfing"},{"segment":[145.155724792257,157.7779617307141],"label":"Surfing"},{"segment":[160.58290324180967,196.69652558658345],"label":"Surfing"},{"segment":[199.8520848205928,224.545941],"label":"Surfing"}]},"W2Wjbhsuacw":{"duration":20.62,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=W2Wjbhsuacw","annotations":[{"segment":[4.429767146645866,17.89068918],"label":"Starting a campfire"}]},"NRhoHN8x_00":{"duration":84.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NRhoHN8x_00","annotations":[{"segment":[5.644948365054602,76.40371973166927],"label":"Playing drums"}]},"xor90CAOc94":{"duration":73.672,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xor90CAOc94","annotations":[{"segment":[0.01,68.62333385335413],"label":"Roof shingle removal"}]},"bp1wsvCaiB0":{"duration":109.274,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=bp1wsvCaiB0","annotations":[{"segment":[6.645271450858035,11.75701872074883],"label":"Doing fencing"},{"segment":[23.17325429017161,31.522441497659905],"label":"Doing fencing"},{"segment":[43.44985179407176,65.77114820592823],"label":"Doing fencing"},{"segment":[71.5775663026521,74.81559906396257],"label":"Doing fencing"},{"segment":[82.99589235569422,88.2789984399376],"label":"Doing fencing"},{"segment":[94.92548673946958,102.7649344773791],"label":"Doing fencing"}]},"sWQ65uwxXbA":{"duration":126.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sWQ65uwxXbA","annotations":[{"segment":[12.667590110407138,44.33656538642498],"label":"Snatch"},{"segment":[47.50346291402677,60.96277740633435],"label":"Snatch"},{"segment":[64.92139931583657,81.34968024027083],"label":"Snatch"},{"segment":[84.51657763494539,94.61106348829952],"label":"Snatch"},{"segment":[98.56968539157565,113.21658643369733],"label":"Snatch"}]},"Xq9ueKle4fY":{"duration":129.289,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Xq9ueKle4fY","annotations":[{"segment":[0.01,129.221],"label":"Grooming dog"}]},"etWdTMJ9_To":{"duration":40.71,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=etWdTMJ9_To","annotations":[{"segment":[0.01,40.704579],"label":"Cricket"}]},"7qTVbrGZXVY":{"duration":40.17,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7qTVbrGZXVY","annotations":[]},"tZzse87ICr8":{"duration":21.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tZzse87ICr8","annotations":[{"segment":[1.8249942379095163,20.307208761819858],"label":"Hopscotch"}]},"7QvvqWJRwNo":{"duration":22.431,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7QvvqWJRwNo","annotations":[{"segment":[0.01,14.524986945781642],"label":"Snow tubing"}]},"4x0LdQRN248":{"duration":22.547,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4x0LdQRN248","annotations":[{"segment":[0.20000000000000004,22.301000000000002],"label":"Welding"}]},"h9YbUJuYzLk":{"duration":121.67,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=h9YbUJuYzLk","annotations":[{"segment":[10.42485696,85.42834463999999],"label":"Getting a haircut"}]},"nO2tXrQDD8I":{"duration":55.334,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=nO2tXrQDD8I","annotations":[{"segment":[0.01,54.24717242123316],"label":"Table soccer"}]},"PJsx3P4ubt8":{"duration":22.57,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=PJsx3P4ubt8","annotations":[]},"k--dW53UQWs":{"duration":12.562,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=k--dW53UQWs","annotations":[]},"wvmuUuLOoEQ":{"duration":237.308,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wvmuUuLOoEQ","annotations":[{"segment":[62.740817395517055,181.57931632809994],"label":"Skiing"},{"segment":[191.9130988786443,194.12748085376091],"label":"Skiing"}]},"5KEq4f4dGto":{"duration":189.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5KEq4f4dGto","annotations":[{"segment":[102.04085179407176,165.44773946957878],"label":"Doing nails"}]},"xcSWHI3K0KA":{"duration":44.47,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=xcSWHI3K0KA","annotations":[{"segment":[0.01,43.009820592823715],"label":"Javelin throw"}]},"8btD4-N4stM":{"duration":70.566,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8btD4-N4stM","annotations":[{"segment":[5.15796833563054,16.68109247162857],"label":"Bullfighting"}]},"JgJia7VviHs":{"duration":74.21000000000001,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=JgJia7VviHs","annotations":[]},"g1UBXl7GHCc":{"duration":101.263,"subset":"testing","resolution":"640x352","url":"https://www.youtube.com/watch?v=g1UBXl7GHCc","annotations":[]},"qb_5_9xDYqc":{"duration":139.78,"subset":"training","resolution":"352x288","url":"https://www.youtube.com/watch?v=qb_5_9xDYqc","annotations":[{"segment":[58.930138719999995,83.19445311145779],"label":"Rock climbing"}]},"g-21fg072ZM":{"duration":14.91,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g-21fg072ZM","annotations":[]},"J1fcLhB-Slg":{"duration":31.67,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=J1fcLhB-Slg","annotations":[{"segment":[6.176290366614665,22.679338226209047],"label":"Drinking beer"}]},"rnI9vGr5V1k":{"duration":84.057,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=rnI9vGr5V1k","annotations":[]},"s07U8_fkUWU":{"duration":141.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s07U8_fkUWU","annotations":[{"segment":[10.610034066668788,19.89381387500398],"label":"Volleyball"},{"segment":[22.546322391671175,32.935314081951034],"label":"Volleyball"},{"segment":[35.366780222229295,44.42951765417555],"label":"Volleyball"},{"segment":[46.639941418064886,53.934339838899675],"label":"Volleyball"},{"segment":[56.36580597917794,66.97584004584672],"label":"Volleyball"},{"segment":[69.407306186125,76.03857747779298],"label":"Volleyball"},{"segment":[78.24900124168231,85.32235728612817],"label":"Volleyball"},{"segment":[87.53278105001752,96.37447610557483],"label":"Volleyball"},{"segment":[99.02698462224203,116.48933235696774],"label":"Volleyball"},{"segment":[118.47871352262091,135.27793409672387],"label":"Volleyball"}]},"1VSqWp5DZiU":{"duration":209.49,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1VSqWp5DZiU","annotations":[{"segment":[9.477697475245948,202.626635677672],"label":"Zumba"}]},"8UyB9XigBXM":{"duration":238.98,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=8UyB9XigBXM","annotations":[{"segment":[39.51918607900779,143.90948831201248],"label":"Braiding hair"}]},"ijSmiDjlmlU":{"duration":212.881,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ijSmiDjlmlU","annotations":[{"segment":[0.01,212.88034013605443],"label":"Swimming"}]},"Chzp7HI7-uA":{"duration":42.098,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Chzp7HI7-uA","annotations":[]},"HQitzJ_OtqY":{"duration":38.29,"subset":"testing","resolution":"320x180","url":"https://www.youtube.com/watch?v=HQitzJ_OtqY","annotations":[]},"OhPSVjVQZ8o":{"duration":9.962,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OhPSVjVQZ8o","annotations":[{"segment":[0.01,9.941],"label":"Using the monkey bar"}]},"78je-JBAqxQ":{"duration":118.515,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=78je-JBAqxQ","annotations":[{"segment":[10.690227436244017,15.11377098741254],"label":"Beer pong"},{"segment":[20.827514741005217,25.066743977541716],"label":"Beer pong"},{"segment":[29.305973214078218,31.33343067503046],"label":"Beer pong"},{"segment":[32.99225950671866,34.466774023774825],"label":"Beer pong"},{"segment":[87.54929567125708,92.7100964239787],"label":"Beer pong"}]},"YuuWL4EK7Q4":{"duration":174.52,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YuuWL4EK7Q4","annotations":[{"segment":[126.83124180967239,155.69911388455537],"label":"Polishing shoes"}]},"c1KmEWYsYjQ":{"duration":164.235,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=c1KmEWYsYjQ","annotations":[]},"hSZKCOmb7Bk":{"duration":24.24,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=hSZKCOmb7Bk","annotations":[]},"2GEZgHcA7zU":{"duration":99.66,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2GEZgHcA7zU","annotations":[{"segment":[24.82143395,75.91943776000001],"label":"Springboard diving"}]},"Ww-zqYBIiJ0":{"duration":125.992,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ww-zqYBIiJ0","annotations":[]},"qxAR5ZVYe00":{"duration":141.99,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=qxAR5ZVYe00","annotations":[]},"ppL_HTF9U4M":{"duration":65.92,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ppL_HTF9U4M","annotations":[]},"ZSoPIyVJWWk":{"duration":131.87,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=ZSoPIyVJWWk","annotations":[{"segment":[0.01,131.821],"label":"Removing curlers"}]},"n--IF_1qqYA":{"duration":10.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n--IF_1qqYA","annotations":[{"segment":[0,9.28651120124805],"label":"Mowing the lawn"}]},"SFkpNuLyXdw":{"duration":56.33,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SFkpNuLyXdw","annotations":[]},"jTanjLR8QAU":{"duration":123.02,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=jTanjLR8QAU","annotations":[]},"Zd22n1caVgM":{"duration":32.49,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Zd22n1caVgM","annotations":[{"segment":[0.8615291029641187,32.02861135725429],"label":"Doing karate"}]},"BIL1t2eQrDU":{"duration":151.35,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BIL1t2eQrDU","annotations":[{"segment":[8.970336690000002,131.35483619344777],"label":"Volleyball"}]},"wZZUcxhISyE":{"duration":61.696,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wZZUcxhISyE","annotations":[{"segment":[0.01,44.30086837080483],"label":"Having an ice cream"}]},"buyWwRYGhig":{"duration":80.109,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=buyWwRYGhig","annotations":[]},"j0Iv6dmSw0k":{"duration":79.34,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=j0Iv6dmSw0k","annotations":[{"segment":[3.2182391382215294,74.63839232106085],"label":"Doing karate"}]},"ICMcCoyuBAw":{"duration":197.417,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ICMcCoyuBAw","annotations":[{"segment":[27.632032640245225,143.37956563869002],"label":"Hanging wallpaper"}]},"b2uAgV5n3FU":{"duration":143.48,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=b2uAgV5n3FU","annotations":[{"segment":[57.52473819968799,135.41815801872076],"label":"Chopping wood"}]},"UljyGBNeimQ":{"duration":139.367,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UljyGBNeimQ","annotations":[]},"jkmbAc9BWaE":{"duration":205.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jkmbAc9BWaE","annotations":[{"segment":[163.06060269578785,191.30732127301093],"label":"Skateboarding"},{"segment":[31.135587522620906,88.27099555382216],"label":"Skateboarding"}]},"zq621OgpFFk":{"duration":5.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zq621OgpFFk","annotations":[{"segment":[0.5339502703588143,4.9569711666146645],"label":"Tennis serve with ball bouncing"}]},"Z-syOvXCc20":{"duration":29.861,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z-syOvXCc20","annotations":[{"segment":[0.01,29.86086],"label":"Swinging at the playground"}]},"-4Q_zG9EChY":{"duration":166.116,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=-4Q_zG9EChY","annotations":[{"segment":[17.489028861154445,125.79145943837752],"label":"Slacklining"}]},"ijgLl3PHHE0":{"duration":151.418,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ijgLl3PHHE0","annotations":[{"segment":[34.7517315123965,50.7837377429261],"label":"Scuba diving"},{"segment":[55.734798490589654,96.75787325694479],"label":"Scuba diving"},{"segment":[98.87975643451487,125.30196827256866],"label":"Scuba diving"}]},"Kyaxs5nDQZ4":{"duration":166.139,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Kyaxs5nDQZ4","annotations":[]},"zPDbMflNURc":{"duration":76.069,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zPDbMflNURc","annotations":[{"segment":[41.09403510140405,67.659875975039],"label":"Waxing skis"}]},"WVZ4CxvFYcM":{"duration":50.32,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WVZ4CxvFYcM","annotations":[]},"cdO-xmdjM7Q":{"duration":180.094,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cdO-xmdjM7Q","annotations":[{"segment":[53.36628705148206,178.65646801872074],"label":"Rafting"}]},"fqWjjWqsJPU":{"duration":179.77,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fqWjjWqsJPU","annotations":[{"segment":[42.628503625585026,45.99391180655226],"label":"Skateboarding"},{"segment":[70.95402248205929,73.4780786177847],"label":"Skateboarding"},{"segment":[86.37880997815913,89.18331679563182],"label":"Skateboarding"},{"segment":[104.3276536099844,114.42387815288612],"label":"Skateboarding"},{"segment":[124.52010269578784,129.28776428549142],"label":"Skateboarding"}]},"bZ4r3Y_qceE":{"duration":36.85,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=bZ4r3Y_qceE","annotations":[{"segment":[0.009999999999999998,33.74779641185647],"label":"Triple jump"}]},"vSpl69BWMKI":{"duration":82.777,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vSpl69BWMKI","annotations":[{"segment":[9.042379188767551,13.82192247425897],"label":"Trimming branches or hedges"},{"segment":[48.18296339157566,60.58394056474258],"label":"Trimming branches or hedges"}]},"76tyabobb_0":{"duration":60.512,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=76tyabobb_0","annotations":[{"segment":[0.01,57.33269734005319],"label":"Having an ice cream"}]},"flfLCoLHm2k":{"duration":212.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=flfLCoLHm2k","annotations":[{"segment":[45.308523539999996,165.03863182839314],"label":"Making a sandwich"}]},"2glmVDTPY44":{"duration":226.186,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=2glmVDTPY44","annotations":[{"segment":[25.711577881619938,226.141],"label":"Snowboarding"}]},"tfPm9xAZ5z0":{"duration":88.886,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tfPm9xAZ5z0","annotations":[{"segment":[19.126455538221528,79.7115054602184],"label":"Clipping cat claws"}]},"701UoJ5pgeo":{"duration":142.757,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=701UoJ5pgeo","annotations":[{"segment":[12.026270177337706,15.812318196129205],"label":"Cleaning sink"},{"segment":[57.458846402835704,95.09661788376296],"label":"Cleaning sink"},{"segment":[104.45038357724783,142.75628],"label":"Cleaning sink"}]},"gNUCdXGxp0w":{"duration":143.71,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=gNUCdXGxp0w","annotations":[]},"-X7fBYN5fBc":{"duration":25.542,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=-X7fBYN5fBc","annotations":[{"segment":[5.658158361934477,19.44493859594384],"label":"Calf roping"}]},"OW_iCiS_U4Y":{"duration":166.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OW_iCiS_U4Y","annotations":[{"segment":[5.702907800665768,15.294161829058195],"label":"Plataform diving"},{"segment":[31.625215985510167,35.254339131388384],"label":"Plataform diving"},{"segment":[46.14170856902303,54.955293351870125],"label":"Plataform diving"},{"segment":[90.98730172880384,93.32030946543983],"label":"Plataform diving"},{"segment":[105.76301739416515,114.3173790951638],"label":"Plataform diving"},{"segment":[141.79502537441496,146.72026391575662],"label":"Plataform diving"}]},"wh4JNeQh9FQ":{"duration":158.06,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wh4JNeQh9FQ","annotations":[]},"zD_wAe6Eoxc":{"duration":226.581,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zD_wAe6Eoxc","annotations":[{"segment":[0.01,226.579885],"label":"Longboarding"}]},"K6oVOQG0lOo":{"duration":41.75,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K6oVOQG0lOo","annotations":[{"segment":[0.2,39.92851946417446],"label":"Changing car wheel"}]},"swkSdgwCxHs":{"duration":96.2,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=swkSdgwCxHs","annotations":[{"segment":[11.326295631825273,16.12684477379095],"label":"High jump"},{"segment":[50.48077457098284,53.63113494539782],"label":"High jump"}]},"9Me0tmjDgRQ":{"duration":222.051,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=9Me0tmjDgRQ","annotations":[]},"XPctbL-V1ww":{"duration":120.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XPctbL-V1ww","annotations":[{"segment":[2.0708372277691107,106.7422461950078],"label":"Brushing teeth"}]},"NYwrSPaSQPI":{"duration":59.35,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=NYwrSPaSQPI","annotations":[]},"ISEbX4WvBW4":{"duration":75.14,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ISEbX4WvBW4","annotations":[{"segment":[0.2,73.03601666782033],"label":"Swinging at the playground"}]},"ruHObln9mwI":{"duration":131.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ruHObln9mwI","annotations":[{"segment":[1.6457550383647999,8.0230558120284],"label":"Playing field hockey"}]},"dXSlCw8AXv0":{"duration":206.17000000000002,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dXSlCw8AXv0","annotations":[{"segment":[60.95040671286715,80.24868852169054],"label":"Windsurfing"},{"segment":[110.48266335551381,118.20197607904318],"label":"Windsurfing"}]},"9g-5J05BIiQ":{"duration":126.34,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9g-5J05BIiQ","annotations":[{"segment":[8.078044700468018,114.42902247],"label":"Skateboarding"}]},"ZBG-KhSyVgQ":{"duration":111.11,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ZBG-KhSyVgQ","annotations":[{"segment":[6.933384274146476,68.12525705],"label":"Horseback riding"}]},"73gFk8M4qbk":{"duration":82.176,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=73gFk8M4qbk","annotations":[]},"Rho3u46ZIEE":{"duration":40.59,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rho3u46ZIEE","annotations":[{"segment":[0.822204368174727,38.725946957878314],"label":"Playing accordion"}]},"fpIS8jWoK9w":{"duration":221.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fpIS8jWoK9w","annotations":[{"segment":[45.184266789999995,191.94836765],"label":"Getting a tattoo"}]},"mTlReluteEQ":{"duration":106.97999999999999,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=mTlReluteEQ","annotations":[{"segment":[10.00099926,28.670101740000003],"label":"Hammer throw"},{"segment":[42.248311560000005,79.98018084],"label":"Hammer throw"}]},"GtCXZRGSaqk":{"duration":149.398,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GtCXZRGSaqk","annotations":[{"segment":[22.141548954475187,135.87918970982838],"label":"Waterskiing"}]},"jWzLhOqcGtE":{"duration":22.99,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=jWzLhOqcGtE","annotations":[{"segment":[0.07157566302652106,22.367394695787834],"label":"Layup drill in basketball"}]},"Kud8SsSxj4w":{"duration":143.035,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Kud8SsSxj4w","annotations":[]},"2ISOAmuzs24":{"duration":210.304,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2ISOAmuzs24","annotations":[{"segment":[39.33996879875195,210.161],"label":"Beach soccer"}]},"cSCskKqO7CE":{"duration":172.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cSCskKqO7CE","annotations":[{"segment":[55.36253006038609,58.58753181147654],"label":"Putting on makeup"},{"segment":[62.88753414626381,67.72503677289949],"label":"Putting on makeup"},{"segment":[72.56253939953515,77.40004202617085],"label":"Putting on makeup"},{"segment":[82.77504494465492,87.0750472794422],"label":"Putting on makeup"},{"segment":[91.37504961422947,92.98755048977469],"label":"Putting on makeup"},{"segment":[98.63130355418299,103.20005603489446],"label":"Putting on makeup"},{"segment":[118.51881398283932,119.32506441809673],"label":"Putting on makeup"},{"segment":[123.62506673946959,124.7000673198128],"label":"Putting on makeup"}]},"airowj9k5ro":{"duration":60.581,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=airowj9k5ro","annotations":[{"segment":[0.01,35.317816966368774],"label":"Fun sliding down"}]},"pKFBr0pMn7I":{"duration":60.58,"subset":"validation","resolution":"1280x686","url":"https://www.youtube.com/watch?v=pKFBr0pMn7I","annotations":[{"segment":[9.734521965678628,16.4447264274571],"label":"Pole vault"},{"segment":[24.19454003120125,39.59965731669267],"label":"Pole vault"}]},"SjwFCog0amE":{"duration":54.567,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SjwFCog0amE","annotations":[{"segment":[9.662000377535103,22.68654714196568],"label":"Putting on shoes"}]},"wZVuXEYbZxo":{"duration":162.68,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=wZVuXEYbZxo","annotations":[]},"RKDjetk5Kko":{"duration":212.77,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RKDjetk5Kko","annotations":[{"segment":[28.848670759999997,85.33538512],"label":"Preparing pasta"},{"segment":[99.4565318,200.2079079024961],"label":"Preparing pasta"}]},"mhw52v2-f4U":{"duration":226.581,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mhw52v2-f4U","annotations":[]},"m5YvKrjGtPM":{"duration":25.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m5YvKrjGtPM","annotations":[{"segment":[0.01,23.597704550358884],"label":"Snatch"}]},"hToIzZ3o6Ic":{"duration":129.196,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hToIzZ3o6Ic","annotations":[{"segment":[0.01,129.149386],"label":"Painting furniture"}]},"fKbrGn6c3us":{"duration":78.03999999999999,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=fKbrGn6c3us","annotations":[{"segment":[53.157527879999996,67.29259438377535],"label":"Ironing clothes"}]},"FLbTQGyoEMk":{"duration":41.076,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=FLbTQGyoEMk","annotations":[{"segment":[13.8623841742179,28.874643342501532],"label":"Putting in contact lenses"}]},"U8SmnfsEzOc":{"duration":189.22,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=U8SmnfsEzOc","annotations":[{"segment":[23.603369734789393,177.6153572542902],"label":"Assembling bicycle"}]},"9o_SlEkq-L0":{"duration":85.53999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9o_SlEkq-L0","annotations":[{"segment":[68.8894968798752,79.95983229329174],"label":"Mowing the lawn"}]},"faqijFZWQlo":{"duration":158.52,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=faqijFZWQlo","annotations":[]},"e1bHYT-G__8":{"duration":168.624,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=e1bHYT-G__8","annotations":[{"segment":[12.36395569564279,15.257647454197487],"label":"Playing ten pins"},{"segment":[18.15133921275218,19.466653648458863],"label":"Playing ten pins"},{"segment":[30.515294908394974,39.19637018405906],"label":"Playing ten pins"},{"segment":[47.08825679829914,49.98194855685384],"label":"Playing ten pins"}]},"ywSeEtroEXo":{"duration":207.355,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=ywSeEtroEXo","annotations":[{"segment":[0.01,6.954921926128745],"label":"Longboarding"},{"segment":[14.071586222632579,18.276887852384842],"label":"Longboarding"},{"segment":[22.158704741386934,27.33446035335413],"label":"Longboarding"},{"segment":[34.12763984945398,37.36248722854914],"label":"Longboarding"},{"segment":[49.00793779329173,55.47763255148205],"label":"Longboarding"},{"segment":[66.1526289024961,70.3579304953198],"label":"Longboarding"},{"segment":[96.23670952808112,104.0003432379095],"label":"Longboarding"},{"segment":[107.88216009282371,112.08746168564741],"label":"Longboarding"},{"segment":[167.0798671302652,178.40183295709826],"label":"Longboarding"},{"segment":[181.63668033619342,192.95864616302651],"label":"Longboarding"}]},"0-CYG7bH54A":{"duration":125.83,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0-CYG7bH54A","annotations":[{"segment":[45.73800686739469,113.65796556318253],"label":"Preparing pasta"}]},"J_CqwWJZTh4":{"duration":202.547,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=J_CqwWJZTh4","annotations":[{"segment":[40.46405162066166,51.50491610664296],"label":"Throwing darts"},{"segment":[99.1383600318766,107.6555983496336],"label":"Throwing darts"},{"segment":[140.14728526552145,145.19453760197004],"label":"Throwing darts"},{"segment":[176.1089581627177,185.2571030225308],"label":"Throwing darts"}]},"ux4iT6vQQb0":{"duration":73.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ux4iT6vQQb0","annotations":[{"segment":[0.4564298159126365,70.6325140124805],"label":"Doing motocross"}]},"EnPDoWJj3Ug":{"duration":90.65,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=EnPDoWJj3Ug","annotations":[]},"KlIAmoVrptA":{"duration":113.03999999999999,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=KlIAmoVrptA","annotations":[{"segment":[31.60656708,100.13104524180967],"label":"Shaving legs"}]},"EJxWhTE_ZeE":{"duration":205.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EJxWhTE_ZeE","annotations":[{"segment":[19.912220488819553,19.912220488819553],"label":"Cleaning windows"},{"segment":[46.89006760270411,72.90442017680708],"label":"Cleaning windows"},{"segment":[82.86053042121685,98.59760790431618],"label":"Cleaning windows"},{"segment":[101.48809152366094,134.8892355694228],"label":"Cleaning windows"},{"segment":[147.73582943317734,160.90358814352575],"label":"Cleaning windows"},{"segment":[168.29037907020282,178.88881897347895],"label":"Cleaning windows"},{"segment":[188.20259949453978,194.62589640561623],"label":"Cleaning windows"}]},"kGvs0Nv5zJo":{"duration":123.322,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=kGvs0Nv5zJo","annotations":[{"segment":[46.10129060970182,65.31016169707758],"label":"Surfing"}]},"iB20nDf5yJs":{"duration":178.45,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iB20nDf5yJs","annotations":[{"segment":[11.970594444617783,167.3099363073323],"label":"Springboard diving"}]},"BFrsZmuj4Ns":{"duration":205.218,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BFrsZmuj4Ns","annotations":[{"segment":[31.695129238689553,200.41566569110765],"label":"Capoeira"}]},"AeefhelpxGA":{"duration":69.893,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=AeefhelpxGA","annotations":[{"segment":[0.2,69.892062],"label":"Playing congas"}]},"BWanQWn1OYQ":{"duration":138.925,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BWanQWn1OYQ","annotations":[{"segment":[32.72647899687987,45.296914638065516],"label":"River tubing"},{"segment":[60.03466676911076,87.99304948829953],"label":"River tubing"}]},"9khzc3a4zz0":{"duration":43.933,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=9khzc3a4zz0","annotations":[{"segment":[0.44436271450858034,37.15555928237129],"label":"Rope skipping"}]},"z4_gDZk7z3s":{"duration":20.78,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z4_gDZk7z3s","annotations":[]},"vE0kyySqMmc":{"duration":57.26,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vE0kyySqMmc","annotations":[]},"6x3qs7jQQko":{"duration":205.22,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=6x3qs7jQQko","annotations":[{"segment":[131.26265720016556,171.60191267394697],"label":"Removing curlers"}]},"gK71Ut9XQ-Y":{"duration":69.59,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=gK71Ut9XQ-Y","annotations":[{"segment":[12.6403276,53.5028797],"label":"Playing guitarra"}]},"rKwiUoQDEJ8":{"duration":202.734,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=rKwiUoQDEJ8","annotations":[]},"BgAiDS4fF_I":{"duration":132.562,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=BgAiDS4fF_I","annotations":[{"segment":[65.48271891970055,86.74114882624261],"label":"Doing crunches"}]},"6z6iFou8nW0":{"duration":118.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6z6iFou8nW0","annotations":[{"segment":[49.31835725429017,63.55198751950078],"label":"Washing hands"},{"segment":[71.1264976599064,104.7499329173167],"label":"Washing hands"}]},"LlI1IJEkW5E":{"duration":74.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LlI1IJEkW5E","annotations":[{"segment":[15.030473820313357,31.873961992493307],"label":"Making a sandwich"}]},"_dFzOHyZSNk":{"duration":201.178,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_dFzOHyZSNk","annotations":[{"segment":[18.203279102592678,67.47767253547285],"label":"Longboarding"},{"segment":[83.4840041601664,86.62250055716515],"label":"Longboarding"},{"segment":[94.468741549662,95.72414010846148],"label":"Longboarding"},{"segment":[131.1891493945472,133.38609687244633],"label":"Longboarding"},{"segment":[0.2,200.97761904761907],"label":"Longboarding"},{"segment":[147.50932989079564,169.1649549173167],"label":"Longboarding"}]},"eH2ORakkiy4":{"duration":193.841,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=eH2ORakkiy4","annotations":[]},"YqB7UEdhKug":{"duration":75.98,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=YqB7UEdhKug","annotations":[{"segment":[7.3486626833073325,70.04934912636506],"label":"Playing saxophone"}]},"vaV6_bxiTLI":{"duration":33.09,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vaV6_bxiTLI","annotations":[]},"YMsyF-g2TqE":{"duration":193.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YMsyF-g2TqE","annotations":[{"segment":[8.622072542901716,171.70283541341655],"label":"Carving jack-o-lanterns"}]},"g3InHLh678M":{"duration":112.594,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g3InHLh678M","annotations":[{"segment":[0.01,29.702170568686036],"label":"Doing fencing"},{"segment":[44.60585696536309,108.09178905262456],"label":"Doing fencing"}]},"N9VOdVOiPNM":{"duration":135.19,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N9VOdVOiPNM","annotations":[]},"ejMpWeFZLec":{"duration":119.583,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ejMpWeFZLec","annotations":[{"segment":[3.5880596023630873,112.12873490681159],"label":"Hula hoop"}]},"soGmSXFO7wI":{"duration":119.21000000000001,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=soGmSXFO7wI","annotations":[{"segment":[24.54792912,68.9754846],"label":"Shot put"}]},"VpZ3PaLi2RQ":{"duration":79.157,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=VpZ3PaLi2RQ","annotations":[{"segment":[0.19999999999999998,27.66166704524181],"label":"Cutting the grass"},{"segment":[35.318021316692665,63.35015711700468],"label":"Cutting the grass"},{"segment":[70.14208429329173,78.956824],"label":"Cutting the grass"}]},"zgnBeiEB5pE":{"duration":176.38,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zgnBeiEB5pE","annotations":[{"segment":[0,176.3787755102041],"label":"Mixing drinks"}]},"UvPUywSVy1k":{"duration":177.42000000000002,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=UvPUywSVy1k","annotations":[{"segment":[42.62585939628061,71.68894534829012],"label":"Preparing pasta"},{"segment":[81.09984937084558,152.23521212957363],"label":"Preparing pasta"}]},"PfRGaUpEhu0":{"duration":20.85,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PfRGaUpEhu0","annotations":[]},"F7K2oT7ADkk":{"duration":22.04,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=F7K2oT7ADkk","annotations":[]},"Z3jo5hbPo1Q":{"duration":233.57,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Z3jo5hbPo1Q","annotations":[{"segment":[12.543701230109205,54.35603866380656],"label":"Ballet"}]},"qoDCBrRmiF0":{"duration":112.71,"subset":"testing","resolution":"642x480","url":"https://www.youtube.com/watch?v=qoDCBrRmiF0","annotations":[]},"WDbG2_sDHow":{"duration":22.36,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=WDbG2_sDHow","annotations":[{"segment":[0.39443039999999996,6.795874799999999],"label":"Springboard diving"},{"segment":[1.2035072035881433,8.389666158346333],"label":"Springboard diving"}]},"8nyOw9vBh2E":{"duration":158.96,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=8nyOw9vBh2E","annotations":[{"segment":[28.51924819849937,151.02801727301093],"label":"Doing kickboxing"}]},"fWD0rL_72nw":{"duration":60.19,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=fWD0rL_72nw","annotations":[{"segment":[0.01,60.061],"label":"Doing kickboxing"}]},"YW3mCNKVaa4":{"duration":43.143,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YW3mCNKVaa4","annotations":[{"segment":[15.480211262872285,40.517768717628705],"label":"Snow tubing"}]},"wfSh2F8ymIg":{"duration":23.731,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wfSh2F8ymIg","annotations":[{"segment":[1.2217100405616226,19.1031024524181],"label":"Snow tubing"}]},"XEqijKYIJNg":{"duration":125.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XEqijKYIJNg","annotations":[{"segment":[42.87885149297973,103.5749426474259],"label":"Preparing salad"}]},"gE2Fn5yDXeU":{"duration":203.62,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=gE2Fn5yDXeU","annotations":[]},"Ule69iMpA3Y":{"duration":60.93,"subset":"validation","resolution":"256x192","url":"https://www.youtube.com/watch?v=Ule69iMpA3Y","annotations":[{"segment":[0,60.929183],"label":"Doing step aerobics"}]},"RFgusQogDyQ":{"duration":132.029,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RFgusQogDyQ","annotations":[{"segment":[0.2,120.94071433033051],"label":"Swinging at the playground"}]},"NDWJOqVUGeA":{"duration":149.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NDWJOqVUGeA","annotations":[{"segment":[35.03083478939157,87.34354807488299],"label":"Fixing the roof"}]},"liNIRD7YwFQ":{"duration":195.187,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=liNIRD7YwFQ","annotations":[{"segment":[51.461142984494884,59.37824190518642],"label":"Surfing"},{"segment":[64.85931038874209,73.08091311407559],"label":"Surfing"},{"segment":[92.8736604158044,99.57274411792798],"label":"Surfing"},{"segment":[102.6177821643478,141.59426915852146],"label":"Surfing"},{"segment":[147.07533764207713,180.87525995733708],"label":"Surfing"}]},"osjru9UsWsI":{"duration":221.914,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=osjru9UsWsI","annotations":[{"segment":[65.91819590961708,86.62548423707872],"label":"Blow-drying hair"},{"segment":[96.2888854565608,112.16447317428138],"label":"Blow-drying hair"},{"segment":[147.36686333096614,159.10099338319438],"label":"Blow-drying hair"},{"segment":[180.49852337305558,185.33022394848328],"label":"Blow-drying hair"}]},"2UfljrwzsLs":{"duration":67.27,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=2UfljrwzsLs","annotations":[{"segment":[0.7325538221528862,39.66255694227769],"label":"Doing nails"}]},"xoXTlNWkC7w":{"duration":181.116,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=xoXTlNWkC7w","annotations":[{"segment":[18.077853354134167,172.6060702028081],"label":"Playing congas"}]},"y3Wfx-RvTpk":{"duration":82.08,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=y3Wfx-RvTpk","annotations":[{"segment":[11.188897464898597,75.63694686271451],"label":"Layup drill in basketball"}]},"Qq2vKc9hWUk":{"duration":46.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Qq2vKc9hWUk","annotations":[{"segment":[5.477156045241809,13.437289497659908],"label":"Playing racquetball"}]},"K9qWnvl8A1Q":{"duration":75.419,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=K9qWnvl8A1Q","annotations":[{"segment":[0.01,75.418411],"label":"Painting fence"}]},"yRQpolWhJ6A":{"duration":180.094,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yRQpolWhJ6A","annotations":[]},"fUJ88Ir7Dgc":{"duration":79.9,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=fUJ88Ir7Dgc","annotations":[{"segment":[51.123580343213725,78.89595163806553],"label":"High jump"}]},"LNd1TrbshXM":{"duration":151.441,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LNd1TrbshXM","annotations":[{"segment":[15.120428751843951,141.28150501404056],"label":"Braiding hair"}]},"tSNg5rvDvVc":{"duration":54.1,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tSNg5rvDvVc","annotations":[]},"fRUoSevi63M":{"duration":132.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fRUoSevi63M","annotations":[{"segment":[22.680909576519117,128.04404328393136],"label":"Painting"}]},"DgdEQgh8TKU":{"duration":78.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DgdEQgh8TKU","annotations":[{"segment":[0.8485144499999999,47.087932516380654],"label":"Chopping wood"}]},"U_FxyViYYBA":{"duration":93.888,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U_FxyViYYBA","annotations":[{"segment":[5.271444617784712,86.10026209048362],"label":"Fun sliding down"}]},"KyMDXMXZLyM":{"duration":110.969,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=KyMDXMXZLyM","annotations":[{"segment":[23.370829750390016,106.98646507956319],"label":"Removing ice from car"}]},"dcsQy55tjw4":{"duration":60.12,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=dcsQy55tjw4","annotations":[{"segment":[0,52.70741198439937],"label":"Shot put"}]},"7EiY0caSyIM":{"duration":119.61,"subset":"testing","resolution":"854x474","url":"https://www.youtube.com/watch?v=7EiY0caSyIM","annotations":[]},"nTsXn3oHf_8":{"duration":44.792,"subset":"training","resolution":"180x240","url":"https://www.youtube.com/watch?v=nTsXn3oHf_8","annotations":[{"segment":[0.01,44.7912924],"label":"Elliptical trainer"}]},"myHHyzx6TPc":{"duration":99.68,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=myHHyzx6TPc","annotations":[{"segment":[3.2634633276131044,96.50527268798751],"label":"Using the balance beam"}]},"aEf-rEjboOc":{"duration":145.38,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aEf-rEjboOc","annotations":[]},"NE-xkW0byCM":{"duration":148.91,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=NE-xkW0byCM","annotations":[]},"qpdREcmjhUw":{"duration":44.467,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=qpdREcmjhUw","annotations":[{"segment":[0.01,44.009082823442455],"label":"Shuffleboard"}]},"CZISTSKNsSk":{"duration":103.492,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=CZISTSKNsSk","annotations":[{"segment":[10.194695099007676,30.34094191296301],"label":"Skiing"},{"segment":[56.77023163258918,61.92716621202844],"label":"Skiing"},{"segment":[67.7287176138976,85.77798864193497],"label":"Skiing"},{"segment":[88.67876434286954,91.09607742698171],"label":"Skiing"}]},"CjFqAb8UWEc":{"duration":34.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CjFqAb8UWEc","annotations":[]},"RgaFRpTYYR4":{"duration":70.13,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RgaFRpTYYR4","annotations":[{"segment":[34.0416958,47.410135159999996],"label":"Playing accordion"},{"segment":[2.4248879199999998,29.797791319999995],"label":"Playing accordion"}]},"yJN4jnk_S1U":{"duration":177.42000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yJN4jnk_S1U","annotations":[{"segment":[4.9799344773790954,176.53091731669267],"label":"Belly dance"}]},"xZPExy2EO0E":{"duration":162.192,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xZPExy2EO0E","annotations":[]},"GONk0qrh2Cw":{"duration":75.651,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=GONk0qrh2Cw","annotations":[]},"U2cCOj3LiAA":{"duration":25.217,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U2cCOj3LiAA","annotations":[]},"npB7FaauqBw":{"duration":33.669,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=npB7FaauqBw","annotations":[]},"QxsELAtsSWE":{"duration":170.57,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=QxsELAtsSWE","annotations":[{"segment":[48.165134657086966,57.212729012561866],"label":"Spinning"},{"segment":[69.98580339676172,96.59637503051142],"label":"Spinning"}]},"bMRx3vSVZUQ":{"duration":175.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bMRx3vSVZUQ","annotations":[{"segment":[19.72558693368143,31.50614561622465],"label":"Windsurfing"},{"segment":[88.217207725429,89.58704014352574],"label":"Windsurfing"},{"segment":[98.08000113572544,163.28402423712947],"label":"Windsurfing"}]},"MM2ZYfEWCQo":{"duration":73.19,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=MM2ZYfEWCQo","annotations":[{"segment":[0,63.82656593135725],"label":"Starting a campfire"}]},"Io0_nTofT7w":{"duration":196.72,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=Io0_nTofT7w","annotations":[{"segment":[3.3748377535101404,192.99898439937598],"label":"Drum corps"}]},"A-Oz6mBBBZQ":{"duration":25.357,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=A-Oz6mBBBZQ","annotations":[{"segment":[19.165019388950636,23.463346917722024],"label":"Building sandcastles"}]},"8yM9Ay7cKwM":{"duration":50.202,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=8yM9Ay7cKwM","annotations":[]},"kEKIUQBGSmQ":{"duration":156.025,"subset":"testing","resolution":"426x236","url":"https://www.youtube.com/watch?v=kEKIUQBGSmQ","annotations":[]},"2DCeEAz8iO8":{"duration":164.12,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=2DCeEAz8iO8","annotations":[{"segment":[59.673304800000004,98.52216068642744],"label":"Playing polo"},{"segment":[10.48549902,51.2786691],"label":"Playing polo"}]},"ioHDdIBu2go":{"duration":157.18,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=ioHDdIBu2go","annotations":[{"segment":[25.73744929797192,35.54219188767551],"label":"Vacuuming floor"},{"segment":[74.2803790951638,78.69307488299532],"label":"Vacuuming floor"}]},"-A6e83tl4Y8":{"duration":215.82999999999998,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-A6e83tl4Y8","annotations":[{"segment":[10.94299227751423,102.86412740863376],"label":"Volleyball"}]},"rVmNL8rzHnU":{"duration":199.878,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=rVmNL8rzHnU","annotations":[{"segment":[0.01,173.02925897035882],"label":"Hanging wallpaper"}]},"wiflxvRl04M":{"duration":63.251,"subset":"training","resolution":"560x340","url":"https://www.youtube.com/watch?v=wiflxvRl04M","annotations":[{"segment":[0.01,63.25115646258504],"label":"Gargling mouthwash"}]},"3nzXMKByUnk":{"duration":209.44,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=3nzXMKByUnk","annotations":[{"segment":[13.233161991785792,81.19606802367332],"label":"Cumbia"}]},"n7lk6-nQ8fM":{"duration":108.507,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n7lk6-nQ8fM","annotations":[]},"D1x5KVBdiuI":{"duration":197.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D1x5KVBdiuI","annotations":[{"segment":[13.24787162915088,172.53042009360377],"label":"Snatch"}]},"lPw_BRbPw5s":{"duration":177.87,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=lPw_BRbPw5s","annotations":[]},"RO5WKh6dViA":{"duration":75.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RO5WKh6dViA","annotations":[{"segment":[9.772485722006431,33.22645145482186],"label":"Using the monkey bar"}]},"S1bEYnNyLZE":{"duration":57.38,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=S1bEYnNyLZE","annotations":[{"segment":[2.2377733204566277,53.348515959686004],"label":"Brushing teeth"}]},"I5YCAQXHoFw":{"duration":114.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I5YCAQXHoFw","annotations":[{"segment":[19.69034321372855,94.5301216848674],"label":"Painting furniture"}]},"bNGldPjMYHc":{"duration":204.499,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bNGldPjMYHc","annotations":[{"segment":[11.276049927625465,17.646699329650033],"label":"Rafting"},{"segment":[27.839738372889347,185.8318419028644],"label":"Rafting"}]},"8i2lkdpB2w0":{"duration":183.322,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8i2lkdpB2w0","annotations":[{"segment":[2.647320503876588,160.24074106462427],"label":"Canoeing"}]},"Il2jGm3L0-0":{"duration":74.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Il2jGm3L0-0","annotations":[{"segment":[9.773572455538222,71.55651262090484],"label":"Using parallel bars"}]},"AZn294ubbps":{"duration":104.47,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=AZn294ubbps","annotations":[{"segment":[51.55049297971919,96.83023088923557],"label":"Kayaking"}]},"x4QVVFhamJ4":{"duration":124.85,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x4QVVFhamJ4","annotations":[{"segment":[80.054397996328,85.89778471138845],"label":"Washing hands"}]},"mxZRW_Xy7ks":{"duration":169.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mxZRW_Xy7ks","annotations":[{"segment":[6.6091742989447475,16.126385289425183],"label":"Skiing"},{"segment":[29.873467831230258,44.149284316950904],"label":"Skiing"},{"segment":[48.64352284023334,103.10311906353806],"label":"Skiing"},{"segment":[107.86172455877826,169.19486202808113],"label":"Skiing"}]},"MduMjuxuIac":{"duration":180.81,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MduMjuxuIac","annotations":[{"segment":[1.2033771398129605,177.5109013873105],"label":"Cheerleading"}]},"TfDNYSd4q04":{"duration":200.2,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=TfDNYSd4q04","annotations":[]},"FDTkn9SFPfs":{"duration":37.176,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FDTkn9SFPfs","annotations":[{"segment":[0.01,37.081],"label":"Clipping cat claws"}]},"OhgsnOwo4b4":{"duration":190.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OhgsnOwo4b4","annotations":[{"segment":[21.366477379095166,176.27529173166928],"label":"Preparing salad"}]},"Q1XC-TJe2kM":{"duration":177.84,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Q1XC-TJe2kM","annotations":[]},"SZqwvjwqwK4":{"duration":222.563,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=SZqwvjwqwK4","annotations":[{"segment":[13.541290712853003,124.6493170747238],"label":"Playing drums"},{"segment":[131.59356872234073,212.4940991700468],"label":"Playing drums"}]},"XgaEYidtX6E":{"duration":192.293,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XgaEYidtX6E","annotations":[{"segment":[15.599351014040563,192.326],"label":"Longboarding"}]},"p4s_tEv1JsE":{"duration":115.288,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=p4s_tEv1JsE","annotations":[]},"X1bmeq4u6YE":{"duration":171.46,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=X1bmeq4u6YE","annotations":[{"segment":[1.84941804,111.01127507999999],"label":"Making a sandwich"},{"segment":[15.508982839313575,70.59261154446179],"label":"Making a sandwich"},{"segment":[110.70204992199689,145.4635631825273],"label":"Making a sandwich"}]},"G4mX4StOvQE":{"duration":122.973,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G4mX4StOvQE","annotations":[{"segment":[16.76300702028081,106.61272464898596],"label":"Kneeling"}]},"f11zga3X2L4":{"duration":132.73,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f11zga3X2L4","annotations":[{"segment":[7.968358814352574,99.24228705148207],"label":"Archery"}]},"apjGHMrnMV0":{"duration":223.86,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=apjGHMrnMV0","annotations":[{"segment":[22.69635725429017,40.50426833073323],"label":"Snatch"},{"segment":[47.48776287051482,65.29567394695788],"label":"Snatch"},{"segment":[76.12009048361935,94.27717628705147],"label":"Snatch"},{"segment":[101.26067082683308,131.2896973478939],"label":"Snatch"},{"segment":[142.1141138845554,162.366248049922],"label":"Snatch"},{"segment":[171.09561622464898,196.93454602184087],"label":"Snatch"}]},"G5HZX5i26us":{"duration":22.036,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=G5HZX5i26us","annotations":[{"segment":[0.2,21.04378315132605],"label":"Knitting"}]},"8MSrdVG-U_s":{"duration":137.7,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8MSrdVG-U_s","annotations":[]},"3I4EzlMo124":{"duration":98.66,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=3I4EzlMo124","annotations":[{"segment":[7.311116063690167,85.6554954898596],"label":"Using the balance beam"}]},"cBCi-pOE5NQ":{"duration":165.95,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cBCi-pOE5NQ","annotations":[{"segment":[56.7393307,159.36877731045243],"label":"Playing saxophone"}]},"V2ltLccVh_Q":{"duration":52.22,"subset":"training","resolution":"854x468","url":"https://www.youtube.com/watch?v=V2ltLccVh_Q","annotations":[{"segment":[13.19799038492152,41.54922819032761],"label":"Paintball"}]},"0hfKWxuVT5k":{"duration":236.936,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0hfKWxuVT5k","annotations":[{"segment":[5.544533944623091,236.9364172335601],"label":"Canoeing"}]},"3q-MxrLyr0g":{"duration":136.49,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3q-MxrLyr0g","annotations":[]},"W0EN9JeU66A":{"duration":30.372,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=W0EN9JeU66A","annotations":[{"segment":[0.01,30.241],"label":"Swinging at the playground"}]},"W8ayZca_fAY":{"duration":154.74,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=W8ayZca_fAY","annotations":[{"segment":[37.76438823,73.52942145000002],"label":"Ironing clothes"},{"segment":[96.89659230000001,146.03633483853355],"label":"Ironing clothes"}]},"oFh_AGspaEQ":{"duration":113.55,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oFh_AGspaEQ","annotations":[{"segment":[23.457254290171605,95.51085803432137],"label":"Polishing forniture"}]},"LSvjzZHbetU":{"duration":195.861,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LSvjzZHbetU","annotations":[{"segment":[77.29761778471139,92.57477223088922],"label":"Painting fence"},{"segment":[121.59986583463338,163.45710608424335],"label":"Painting fence"}]},"e0G-AY68U64":{"duration":82.92,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=e0G-AY68U64","annotations":[]},"PfNtxlM1QHw":{"duration":92.555,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PfNtxlM1QHw","annotations":[{"segment":[13.962380237151345,47.788770668907254],"label":"Bullfighting"}]},"iazhs1cz_1Q":{"duration":216.88,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=iazhs1cz_1Q","annotations":[{"segment":[39.24707878939157,94.73432811232449],"label":"Playing piano"},{"segment":[104.20776092355693,150.22157743525742],"label":"Playing piano"},{"segment":[157.32665204368175,177.28852832449297],"label":"Playing piano"}]},"APlxSpTZVPI":{"duration":94.204,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=APlxSpTZVPI","annotations":[{"segment":[0.2,88.19633414796712],"label":"Capoeira"}]},"-kmomJph-6E":{"duration":18.344,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-kmomJph-6E","annotations":[{"segment":[0.01,16.97765054602184],"label":"Doing a powerbomb"}]},"ma9R2AjCRZE":{"duration":175.079,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ma9R2AjCRZE","annotations":[{"segment":[8.713076183687575,151.9343333351707],"label":"Sumo"}]},"9SiYS0SEKTw":{"duration":200.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9SiYS0SEKTw","annotations":[{"segment":[43.67031885,166.31417651326052],"label":"Fixing bicycle"}]},"21biKVGaY1Y":{"duration":158.152,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=21biKVGaY1Y","annotations":[{"segment":[138.22849363422327,153.72385899969763],"label":"Building sandcastles"}]},"RcYsKjVGqNE":{"duration":88.051,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RcYsKjVGqNE","annotations":[]},"k3nRbFeancA":{"duration":84.777,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k3nRbFeancA","annotations":[{"segment":[0.01,84.7760544217687],"label":"Sumo"}]},"Mfk4bUp_ZC0":{"duration":80.736,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Mfk4bUp_ZC0","annotations":[{"segment":[0.2,75.33665343052073],"label":"Riding bumper cars"}]},"1lwG-y_QCkc":{"duration":101.843,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1lwG-y_QCkc","annotations":[]},"SmQSId7boic":{"duration":163.051,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=SmQSId7boic","annotations":[]},"ookecPKcp6w":{"duration":12.725,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ookecPKcp6w","annotations":[]},"ojTFTIwsa_c":{"duration":202.246,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ojTFTIwsa_c","annotations":[]},"9ZnJyY_-Hzw":{"duration":126.921,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9ZnJyY_-Hzw","annotations":[{"segment":[35.118597200622084,117.3908164852255],"label":"Slacklining"}]},"IjmeHBgFb3g":{"duration":66.53,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=IjmeHBgFb3g","annotations":[{"segment":[12.671682000000002,45.774522000000005],"label":"Using the pommel horse"}]},"d-RYiiPzVKg":{"duration":148.91,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d-RYiiPzVKg","annotations":[]},"fid8KlncwTA":{"duration":176.01,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=fid8KlncwTA","annotations":[{"segment":[3.704882988299532,97.01304713806552],"label":"Horseback riding"},{"segment":[113.20475797581904,163.70094092745708],"label":"Horseback riding"}]},"aBmKEmXVCqM":{"duration":21.131,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=aBmKEmXVCqM","annotations":[{"segment":[0.01,21.130158],"label":"Elliptical trainer"}]},"egmJbYMfCeU":{"duration":13.538,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=egmJbYMfCeU","annotations":[]},"j_vP89LZlp8":{"duration":130.54,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=j_vP89LZlp8","annotations":[{"segment":[10.793692592823712,123.82198295163808],"label":"Brushing hair"}]},"MO37MVz4uuA":{"duration":91.78999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MO37MVz4uuA","annotations":[{"segment":[63.87819344773791,72.61489703588144],"label":"Shaving legs"}]},"Jh07fhoPWEI":{"duration":144.104,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Jh07fhoPWEI","annotations":[{"segment":[11.240486583463339,32.14779162870514],"label":"Playing rubik cube"},{"segment":[39.11689331045242,58.675339965678624],"label":"Playing rubik cube"},{"segment":[64.74520272074884,88.35022454602185],"label":"Playing rubik cube"},{"segment":[94.86970676443057,117.80029939469578],"label":"Playing rubik cube"}]},"HEfOp_pz_j4":{"duration":41.123,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=HEfOp_pz_j4","annotations":[{"segment":[5.954466458658346,10.116190327613104],"label":"Rock-paper-scissors"},{"segment":[23.305653666146647,26.05879407176287],"label":"Rock-paper-scissors"}]},"Mvl-TR0rDdA":{"duration":63.09,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=Mvl-TR0rDdA","annotations":[{"segment":[3.8384648751950077,48.03002202808112],"label":"Using uneven bars"}]},"YfxK4HAp8jI":{"duration":78.69,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=YfxK4HAp8jI","annotations":[{"segment":[6.929990840873636,26.281554159999995],"label":"Layup drill in basketball"},{"segment":[30.1311668,43.347488199999994],"label":"Layup drill in basketball"}]},"jxk6KOLu5kU":{"duration":120.326,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jxk6KOLu5kU","annotations":[{"segment":[13.66062508265359,31.812418370212264],"label":"Putting in contact lenses"},{"segment":[77.09833564432772,97.12144782751099],"label":"Putting in contact lenses"}]},"v1ukgczEA5A":{"duration":96.015,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v1ukgczEA5A","annotations":[{"segment":[26.27325351014041,32.41116458658346],"label":"Waxing skis"},{"segment":[40.94478471138846,82.5632496099844],"label":"Waxing skis"}]},"AA6nXQMyfhU":{"duration":117.82,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=AA6nXQMyfhU","annotations":[{"segment":[49.25934067022544,53.85442841931364],"label":"Polishing forniture"},{"segment":[74.99183150078002,102.92996480499221],"label":"Polishing forniture"}]},"GvvmZ1Bi1xk":{"duration":22.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GvvmZ1Bi1xk","annotations":[{"segment":[1.6064634399999995,22.09699258970359],"label":"Mowing the lawn"}]},"6hu3V1PS4vM":{"duration":170.71,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=6hu3V1PS4vM","annotations":[{"segment":[12.65034721116736,99.20535444547032],"label":"Playing field hockey"},{"segment":[104.53181642911973,167.11774473700035],"label":"Playing field hockey"}]},"EEMGyhO3OVI":{"duration":145.333,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EEMGyhO3OVI","annotations":[{"segment":[23.80661169303915,145.13369614512473],"label":"BMX"},{"segment":[53.05473421216849,69.1525381825273],"label":"BMX"},{"segment":[84.11669398595944,88.87801628705148],"label":"BMX"},{"segment":[107.46984622464899,129.00916139625588],"label":"BMX"}]},"sEaPxagod8o":{"duration":20.782,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sEaPxagod8o","annotations":[]},"aq9R-_cscTw":{"duration":101.797,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=aq9R-_cscTw","annotations":[]},"HDVk1O78gwc":{"duration":223.61,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HDVk1O78gwc","annotations":[{"segment":[0,210.35213991575662],"label":"Using uneven bars"}]},"Nr49JFixNE4":{"duration":172.82999999999998,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Nr49JFixNE4","annotations":[{"segment":[17.12360008,140.20215744149766],"label":"Dodgeball"}]},"iwhejKH3DSw":{"duration":203.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=iwhejKH3DSw","annotations":[{"segment":[9.999872550000001,175.48318195319814],"label":"Preparing salad"}]},"0kTxTWwkY6k":{"duration":52.13,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=0kTxTWwkY6k","annotations":[]},"v3tNh1and0U":{"duration":140.319,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=v3tNh1and0U","annotations":[{"segment":[0.2,140.1181859410431],"label":"Swimming"},{"segment":[0,128.5340752348367],"label":"Swimming"}]},"MewROhYmQP0":{"duration":62.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MewROhYmQP0","annotations":[{"segment":[0.01,60.20882371294852],"label":"Playing polo"}]},"3fyR5F18WKg":{"duration":15.12,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=3fyR5F18WKg","annotations":[{"segment":[0,3.8222317599999993],"label":"Playing piano"},{"segment":[4.768968847113885,15.021666],"label":"Playing piano"}]},"Vjsd6Ec6eKA":{"duration":152.86,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Vjsd6Ec6eKA","annotations":[{"segment":[6.198221528861155,123.01085803432137],"label":"Cheerleading"}]},"WSaGZOdrjtM":{"duration":211.07,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=WSaGZOdrjtM","annotations":[{"segment":[8.56131682001974,11.195568149256584],"label":"High jump"},{"segment":[23.049699130822376,28.31820178929606],"label":"High jump"},{"segment":[46.75796109395396,48.40436817472699],"label":"High jump"},{"segment":[59.27065490782896,63.22203190168423],"label":"High jump"},{"segment":[86.27173103250661,88.24741952943424],"label":"High jump"},{"segment":[110.63855582794741,115.57777707026649],"label":"High jump"},{"segment":[122.1634053933586,126.44406380336846],"label":"High jump"},{"segment":[166.28711515807572,167.60424082269415],"label":"High jump"},{"segment":[184.7268744627336,188.34897004043427],"label":"High jump"},{"segment":[196.25172332605305,200.20310030577224],"label":"High jump"}]},"tgj3VBjomI8":{"duration":88.89,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tgj3VBjomI8","annotations":[]},"bY2dgTJFWko":{"duration":73.863,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bY2dgTJFWko","annotations":[{"segment":[4.954889731669266,68.67707628081124],"label":"Slacklining"}]},"HnEvJUr8DEM":{"duration":8.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HnEvJUr8DEM","annotations":[{"segment":[0.09446177457098284,7.840327289391575],"label":"Playing kickball"}]},"c0qbyRWSptg":{"duration":134.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c0qbyRWSptg","annotations":[{"segment":[13.254570982839315,128.12751950078004],"label":"Sailing"}]},"98MoyGZKHXc":{"duration":187.595,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=98MoyGZKHXc","annotations":[{"segment":[46.56888424641024,132.46130480715792],"label":"Changing car wheel"}]},"PfleCcLgZ7E":{"duration":180.466,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PfleCcLgZ7E","annotations":[{"segment":[23.89340566964065,157.97757630985936],"label":"Baton twirling"}]},"8HY9CPl9CqE":{"duration":32.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8HY9CPl9CqE","annotations":[{"segment":[15.469270801872074,20.17513311700468],"label":"Springboard diving"}]},"6FeSQ_hXOsg":{"duration":177.819,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=6FeSQ_hXOsg","annotations":[]},"K8ZPkLVlAiA":{"duration":91.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=K8ZPkLVlAiA","annotations":[{"segment":[39.602022774788544,91.11334847113883],"label":"Vacuuming floor"}]},"FkSf3pxra3M":{"duration":16.09,"subset":"training","resolution":"400x226","url":"https://www.youtube.com/watch?v=FkSf3pxra3M","annotations":[{"segment":[8.443926677067083,16.061],"label":"Discus throw"}]},"2duxXAsCW0k":{"duration":36.665,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2duxXAsCW0k","annotations":[{"segment":[2.3165436658282657,19.762119210608425],"label":"Hand car wash"},{"segment":[28.055917432137285,36.664308],"label":"Hand car wash"}]},"f6Nhpyq1XdU":{"duration":33.07,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=f6Nhpyq1XdU","annotations":[{"segment":[2.321270873634945,29.59219296],"label":"Washing hands"}]},"y6WVmOiCZlI":{"duration":63.55,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=y6WVmOiCZlI","annotations":[]},"qs_VoH8fOhs":{"duration":192.494,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=qs_VoH8fOhs","annotations":[{"segment":[45.045263035011196,179.28014687934456],"label":"Decorating the Christmas tree"}]},"PKYg6_rs3LQ":{"duration":24.1,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=PKYg6_rs3LQ","annotations":[{"segment":[2.6696788642745712,21.282228692667708],"label":"Chopping wood"}]},"fTGtoTlJyh0":{"duration":192.819,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=fTGtoTlJyh0","annotations":[{"segment":[0.01,177.47708264793178],"label":"Assembling bicycle"}]},"Yd5Fka2DvGA":{"duration":12.86,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Yd5Fka2DvGA","annotations":[]},"hSSHf_c1q5I":{"duration":135.977,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hSSHf_c1q5I","annotations":[{"segment":[15.479302652106083,120.45941965678627],"label":"Running a marathon"}]},"Z57GknpXOBg":{"duration":106.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z57GknpXOBg","annotations":[{"segment":[29.07820380624795,77.35586354369907],"label":"Waterskiing"}]},"JZN0L8pp5hY":{"duration":27.33,"subset":"validation","resolution":"320x176","url":"https://www.youtube.com/watch?v=JZN0L8pp5hY","annotations":[{"segment":[0.01,27.281],"label":"Camel ride"}]},"vy5IRxiudmI":{"duration":54.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vy5IRxiudmI","annotations":[{"segment":[16.812841046975212,19.729354289817852],"label":"Spinning"},{"segment":[42.54678018096724,43.74769736349454],"label":"Spinning"}]},"3SLaaTD8t3Q":{"duration":21.293,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3SLaaTD8t3Q","annotations":[{"segment":[1.2072431309811602,19.017395168364338],"label":"Applying sunscreen"}]},"EZZzVXqxG6U":{"duration":192.12,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=EZZzVXqxG6U","annotations":[{"segment":[108.62615054602183,114.31965912636505],"label":"Discus throw"}]},"VTbMcI6nw54":{"duration":64.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VTbMcI6nw54","annotations":[{"segment":[22.498196567862717,27.92186895475819],"label":"Using uneven bars"},{"segment":[40.37622776911076,45.900338533541344],"label":"Using uneven bars"},{"segment":[58.957327613104525,62.27179407176287],"label":"Using uneven bars"}]},"4zYIek_ET8w":{"duration":176.47,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4zYIek_ET8w","annotations":[]},"Y1I-gDeNV9I":{"duration":178.52,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y1I-gDeNV9I","annotations":[]},"spAlIMm8jSg":{"duration":101.36,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=spAlIMm8jSg","annotations":[{"segment":[12.333382151612595,39.84631156674838],"label":"Using uneven bars"},{"segment":[49.96600974243051,82.06442739342226],"label":"Using uneven bars"}]},"9pNeqZ8y3yA":{"duration":175.776,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=9pNeqZ8y3yA","annotations":[]},"IjJo9dfIh9o":{"duration":55.59,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=IjJo9dfIh9o","annotations":[]},"uqlErIm56Jg":{"duration":230.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uqlErIm56Jg","annotations":[{"segment":[10.796734481624163,218.0940365288081],"label":"Cleaning shoes"}]},"-1EC1ZP6aC4":{"duration":170.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-1EC1ZP6aC4","annotations":[]},"P6t2HLPZ3Dk":{"duration":79.413,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=P6t2HLPZ3Dk","annotations":[{"segment":[0.01,79.412243],"label":"Playing ten pins"}]},"X5Bxa0OvdwI":{"duration":213.46,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=X5Bxa0OvdwI","annotations":[{"segment":[130.70742431357252,145.35997633853353],"label":"Polishing shoes"},{"segment":[149.68913943681747,209.63139772074882],"label":"Polishing shoes"}]},"D644NPLLTCQ":{"duration":84.15,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D644NPLLTCQ","annotations":[]},"LoigxYt5Hgk":{"duration":121.42,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LoigxYt5Hgk","annotations":[]},"sWaU9O4xzFE":{"duration":172.734,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sWaU9O4xzFE","annotations":[{"segment":[0.01,155.9605600624025],"label":"Throwing darts"}]},"2tlLq9qvG-c":{"duration":180.164,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2tlLq9qvG-c","annotations":[{"segment":[15.709744548286604,101.27174610591901],"label":"Baking cookies"},{"segment":[119.50627102803738,131.86428348909658],"label":"Baking cookies"}]},"_4LZrf1GL1s":{"duration":37.013,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_4LZrf1GL1s","annotations":[{"segment":[17.65395631825273,30.57711388455538],"label":"Running a marathon"}]},"YoDlwg4eAEA":{"duration":125.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YoDlwg4eAEA","annotations":[{"segment":[23.92064667845186,119.19418090119329],"label":"Preparing salad"}]},"pXyT_AybrQ8":{"duration":161.96,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pXyT_AybrQ8","annotations":[{"segment":[78.77793704413062,120.90750619366334],"label":"Installing carpet"}]},"jFqY3M37F-g":{"duration":129.429,"subset":"training","resolution":"608x1080","url":"https://www.youtube.com/watch?v=jFqY3M37F-g","annotations":[{"segment":[3.8332277691107643,128.5140046801872],"label":"Playing congas"}]},"9Rvz-oIAn50":{"duration":79.41,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=9Rvz-oIAn50","annotations":[{"segment":[13.672349453978159,22.457297971918877],"label":"Doing step aerobics"},{"segment":[28.148954758190328,36.56270826833073],"label":"Doing step aerobics"},{"segment":[40.64585335413416,51.41050858034321],"label":"Doing step aerobics"},{"segment":[54.87499531981279,69.72279563182528],"label":"Doing step aerobics"}]},"cQxTLj4XwuU":{"duration":15.09,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=cQxTLj4XwuU","annotations":[{"segment":[7.7949525792843835,12.697988404829553],"label":"Cleaning shoes"}]},"i-LI4TpJNQ8":{"duration":161.379,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i-LI4TpJNQ8","annotations":[{"segment":[0,134.44020072074883],"label":"Canoeing"}]},"VIjOP5ZBvHg":{"duration":71.216,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=VIjOP5ZBvHg","annotations":[{"segment":[16.84808722741433,46.45613551401869],"label":"Grooming dog"}]},"uE_8WuNO7cA":{"duration":167.904,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uE_8WuNO7cA","annotations":[{"segment":[20.685552262090486,156.07668642745708],"label":"Carving jack-o-lanterns"}]},"yzmrLFHRaTY":{"duration":148.306,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yzmrLFHRaTY","annotations":[]},"DgZ5-oatDg8":{"duration":174.173,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DgZ5-oatDg8","annotations":[]},"yVrru7t3478":{"duration":50.365,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=yVrru7t3478","annotations":[{"segment":[39.71769699033717,42.86054146039351],"label":"Fun sliding down"}]},"vdYFwqfqgJA":{"duration":118.283,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vdYFwqfqgJA","annotations":[{"segment":[0.01,118.221],"label":"Playing ice hockey"}]},"eXK0iVPl1j8":{"duration":97.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eXK0iVPl1j8","annotations":[{"segment":[0.9555892348022698,82.4576475681356],"label":"Breakdancing"}]},"xlPpP0eVL9A":{"duration":194.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xlPpP0eVL9A","annotations":[{"segment":[91.13631981279251,103.85301560062402],"label":"Washing face"},{"segment":[139.5808751950078,149.26978627145084],"label":"Washing face"}]},"rkDuiu5H8JY":{"duration":107.044,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rkDuiu5H8JY","annotations":[{"segment":[16.67344466343131,70.02846644859812],"label":"Braiding hair"}]},"G72MBCYwT8E":{"duration":50.18,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=G72MBCYwT8E","annotations":[{"segment":[18.265091263650547,34.144378315132606],"label":"Hammer throw"}]},"9snScUyE8_4":{"duration":96.78,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=9snScUyE8_4","annotations":[]},"knCj92zA0tU":{"duration":80.62,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=knCj92zA0tU","annotations":[]},"_dp7ZH3Upuw":{"duration":55.427,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_dp7ZH3Upuw","annotations":[{"segment":[0.2,31.44766926677067],"label":"Using the monkey bar"}]},"rJ6BLT5atMo":{"duration":177.19,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rJ6BLT5atMo","annotations":[]},"ZKo3_ifK2tQ":{"duration":33.79,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=ZKo3_ifK2tQ","annotations":[{"segment":[2.5299245429017163,3.9003003369734794],"label":"Drinking coffee"},{"segment":[6.113984312012481,7.589773628705149],"label":"Drinking coffee"},{"segment":[9.856164365054603,11.384660443057724],"label":"Drinking coffee"},{"segment":[14.230825553822154,16.12826896099844],"label":"Drinking coffee"},{"segment":[18.02571236817473,19.659621968798753],"label":"Drinking coffee"},{"segment":[21.820599182527303,23.349095260530422],"label":"Drinking coffee"},{"segment":[25.351952190327616,27.091275313572545],"label":"Drinking coffee"},{"segment":[28.93601195943838,30.4645080374415],"label":"Drinking coffee"}]},"hKA9Ui60n-Q":{"duration":161.07999999999998,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hKA9Ui60n-Q","annotations":[]},"8-1h1YXYvhk":{"duration":127.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8-1h1YXYvhk","annotations":[{"segment":[4.379986203529774,5.773618177380157],"label":"Breakdancing"},{"segment":[7.167250151230539,12.74177804663207],"label":"Breakdancing"},{"segment":[66.89433474481837,91.581529710168],"label":"Breakdancing"},{"segment":[94.16970282371295,102.33240433697348],"label":"Breakdancing"},{"segment":[103.72603630265212,124.03324494539781],"label":"Breakdancing"}]},"kM7jFLYDUy0":{"duration":11.541,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kM7jFLYDUy0","annotations":[{"segment":[0.01,11.521],"label":"Riding bumper cars"}]},"ZAztpVmBVIE":{"duration":42.87,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZAztpVmBVIE","annotations":[]},"Z1N185E4gsk":{"duration":50.62,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z1N185E4gsk","annotations":[{"segment":[24.40159984399376,49.11907800312013],"label":"Shoveling snow"}]},"4NHBmXhDPzA":{"duration":13.42,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4NHBmXhDPzA","annotations":[]},"B8d9FYuZglQ":{"duration":120.05,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B8d9FYuZglQ","annotations":[{"segment":[16.676878315132605,99.87388923556942],"label":"Putting on makeup"}]},"JOYduGqZSRc":{"duration":183.67,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=JOYduGqZSRc","annotations":[{"segment":[62.17833271426095,137.82386102340092],"label":"Beer pong"}]},"I3SLLGYL1zY":{"duration":118.724,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=I3SLLGYL1zY","annotations":[]},"QWhn9Ncvvso":{"duration":238.52,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=QWhn9Ncvvso","annotations":[{"segment":[0.01,203.72413002642554],"label":"Making a sandwich"}]},"9MGnF59gZfM":{"duration":107.74000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9MGnF59gZfM","annotations":[{"segment":[7.056349432137285,94.92470069422777],"label":"Archery"}]},"QX5e5Mla660":{"duration":184.74,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=QX5e5Mla660","annotations":[]},"bMWWSk_YFY8":{"duration":121.952,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=bMWWSk_YFY8","annotations":[{"segment":[24.72251170046802,97.57484087363495],"label":"Bullfighting"}]},"HDt5z5Y0opo":{"duration":40.543,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HDt5z5Y0opo","annotations":[]},"WxFhomp9xsI":{"duration":136.81,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=WxFhomp9xsI","annotations":[]},"bBRzBh0gAZQ":{"duration":86.98,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bBRzBh0gAZQ","annotations":[]},"ygDZHsUfnv0":{"duration":154.32,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ygDZHsUfnv0","annotations":[]},"O2vrVwjqktg":{"duration":146.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=O2vrVwjqktg","annotations":[{"segment":[58.139207976482325,142.9540515070203],"label":"Ping-pong"}]},"KjmdARv3lg4":{"duration":59.98,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=KjmdARv3lg4","annotations":[]},"586OlQMU420":{"duration":183.995,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=586OlQMU420","annotations":[{"segment":[16.935569918034815,148.40151945125425],"label":"Snowboarding"},{"segment":[155.5776070171607,176.53178656006241],"label":"Snowboarding"}]},"-MbZ-W0AbN0":{"duration":77.23,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-MbZ-W0AbN0","annotations":[]},"wiaZCl-poRw":{"duration":60.42,"subset":"testing","resolution":"160x120","url":"https://www.youtube.com/watch?v=wiaZCl-poRw","annotations":[]},"ViCGpj478Ik":{"duration":46.81,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ViCGpj478Ik","annotations":[{"segment":[0.01,46.8109977324263],"label":"Doing kickboxing"}]},"Luzz1xyiFaw":{"duration":138.18,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Luzz1xyiFaw","annotations":[]},"E8r9-dSKLwc":{"duration":49.343,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E8r9-dSKLwc","annotations":[{"segment":[0.2,49.301],"label":"BMX"}]},"Mf3IyeMF8ug":{"duration":214.95,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Mf3IyeMF8ug","annotations":[]},"hV65qrcSBJU":{"duration":136.95,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hV65qrcSBJU","annotations":[]},"UPSd8xmShoA":{"duration":217.014,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UPSd8xmShoA","annotations":[]},"bCOS_e4HmT4":{"duration":46.05,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bCOS_e4HmT4","annotations":[]},"bqnRA6rZcqs":{"duration":112.664,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=bqnRA6rZcqs","annotations":[{"segment":[107.91765530757286,112.66321995464853],"label":"Hitting a pinata"}]},"sA8fmJulHMs":{"duration":103.28,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=sA8fmJulHMs","annotations":[{"segment":[13.373493584641345,71.8623872138559],"label":"Smoking a cigarette"}]},"L5Jl7Tql2V8":{"duration":50.32,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=L5Jl7Tql2V8","annotations":[]},"wyARf3xHOgs":{"duration":60.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wyARf3xHOgs","annotations":[{"segment":[4.09180364,58.38001170046802],"label":"Using uneven bars"}]},"wSTU_jI-FRU":{"duration":50.063,"subset":"training","resolution":"320x180","url":"https://www.youtube.com/watch?v=wSTU_jI-FRU","annotations":[{"segment":[6.32090639625585,45.729026521060845],"label":"Sumo"}]},"xm01x3C3RJo":{"duration":229.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xm01x3C3RJo","annotations":[{"segment":[32.223856219554904,46.90361294179658],"label":"Bathing dog"},{"segment":[90.58484026163768,114.57371100286188],"label":"Bathing dog"},{"segment":[133.19193904082692,154.67450985386355],"label":"Bathing dog"}]},"juLxWt_3omw":{"duration":28.05,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=juLxWt_3omw","annotations":[{"segment":[4.196592823712948,26.053847113884558],"label":"Clean and jerk"}]},"qRmaZNDyDhA":{"duration":199.901,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=qRmaZNDyDhA","annotations":[{"segment":[85.12020748829953,124.7182527301092],"label":"Changing car wheel"}]},"YpA8T0ZjRIo":{"duration":80.249,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YpA8T0ZjRIo","annotations":[{"segment":[12.417413586257029,56.89024224669316],"label":"Hand car wash"}]},"KTo5SIY3INk":{"duration":94.297,"subset":"testing","resolution":"1280x714","url":"https://www.youtube.com/watch?v=KTo5SIY3INk","annotations":[]},"f8SWWURHV1M":{"duration":38.13,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f8SWWURHV1M","annotations":[]},"uFMDSiHu7g4":{"duration":181.23,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=uFMDSiHu7g4","annotations":[{"segment":[0,181.201],"label":"Cumbia"}]},"6s9jdXxtoW4":{"duration":193.47,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=6s9jdXxtoW4","annotations":[]},"SH0t2-GgwiQ":{"duration":118.747,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=SH0t2-GgwiQ","annotations":[{"segment":[51.100491673532076,55.90955405653305],"label":"Doing crunches"},{"segment":[62.38329131633624,77.55033407957612],"label":"Doing crunches"},{"segment":[97.89636705465401,105.29492450013687],"label":"Doing crunches"}]},"KRw3ZeLhvy0":{"duration":168.74,"subset":"testing","resolution":"1920x810","url":"https://www.youtube.com/watch?v=KRw3ZeLhvy0","annotations":[]},"oRAYA7hFGCc":{"duration":36.22,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oRAYA7hFGCc","annotations":[]},"WNzVlf5i1ps":{"duration":115.78,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=WNzVlf5i1ps","annotations":[]},"qdE6dbQOnt0":{"duration":42.03,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=qdE6dbQOnt0","annotations":[{"segment":[4.392942277691107,38.94638377535101],"label":"Washing face"}]},"MRsur5x9yz0":{"duration":161.495,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MRsur5x9yz0","annotations":[]},"j82E082KJSw":{"duration":231.11,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j82E082KJSw","annotations":[{"segment":[16.94553155850234,68.50321268330734],"label":"Volleyball"},{"segment":[73.19027460374416,141.69348728705148],"label":"Volleyball"},{"segment":[144.93837630889234,177.3872665273011],"label":"Volleyball"},{"segment":[183.15595812168488,220.29191026053041],"label":"Volleyball"}]},"Rr5lVqc01vw":{"duration":132.656,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rr5lVqc01vw","annotations":[]},"8-F3l2Y1F54":{"duration":229.692,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=8-F3l2Y1F54","annotations":[]},"taGuG7DT6zE":{"duration":102.284,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=taGuG7DT6zE","annotations":[]},"D-yu30OVvKw":{"duration":209.14,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=D-yu30OVvKw","annotations":[]},"Ed08LA1pjIg":{"duration":185.481,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ed08LA1pjIg","annotations":[{"segment":[13.889372555382216,23.72767811544462],"label":"Skiing"},{"segment":[66.84260542277691,80.15325412168487],"label":"Skiing"},{"segment":[87.3873023276131,88.2553881123245],"label":"Skiing"},{"segment":[92.30645510764431,94.04262667706708],"label":"Skiing"},{"segment":[117.77030479251171,125.29371492667707],"label":"Skiing"},{"segment":[131.3703154196568,154.51926967862715],"label":"Skiing"}]},"SQs7o8LeU6A":{"duration":131.29,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SQs7o8LeU6A","annotations":[{"segment":[3.276405616224649,128.18936973478938],"label":"Playing bagpipes"}]},"dzsSiqXxfpw":{"duration":156.224,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dzsSiqXxfpw","annotations":[{"segment":[33.81055919003115,142.78272118380062],"label":"Disc dog"}]},"XLOhMBLYy2g":{"duration":237.889,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XLOhMBLYy2g","annotations":[{"segment":[57.52360947499124,59.750329841765094],"label":"Slacklining"},{"segment":[65.31713075869973,67.91497118660257],"label":"Slacklining"},{"segment":[73.11065204240823,138.42778280110795],"label":"Slacklining"},{"segment":[156.24154573529879,161.43722659110446],"label":"Slacklining"},{"segment":[188.52899105351972,190.75571142029355],"label":"Slacklining"}]},"O5CYGnqXJbQ":{"duration":106.371,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O5CYGnqXJbQ","annotations":[]},"rWfQj0X8WU4":{"duration":128.059,"subset":"testing","resolution":"586x480","url":"https://www.youtube.com/watch?v=rWfQj0X8WU4","annotations":[]},"0ZHZ1ZqmT7s":{"duration":10.5,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=0ZHZ1ZqmT7s","annotations":[{"segment":[0.01,10.495418],"label":"Getting a haircut"}]},"IBscTNN6qfY":{"duration":237.03,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IBscTNN6qfY","annotations":[{"segment":[124.61581209915063,168.98939369578784],"label":"Braiding hair"}]},"1xLtBuMxBDY":{"duration":30.883,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1xLtBuMxBDY","annotations":[]},"wlAnWPxnQ8k":{"duration":93.21000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wlAnWPxnQ8k","annotations":[{"segment":[4.685311012616166,86.13053252663485],"label":"Using the balance beam"}]},"YDNEa5ZOf6c":{"duration":122.741,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YDNEa5ZOf6c","annotations":[{"segment":[51.50867394695789,63.572043681747296],"label":"Removing ice from car"},{"segment":[67.78464898595945,103.59175440405616],"label":"Removing ice from car"}]},"WqDep-4l0yc":{"duration":93.182,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WqDep-4l0yc","annotations":[{"segment":[68.21698232087228,86.06951173676013],"label":"Canoeing"}]},"C7so_y5I_9s":{"duration":191.389,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=C7so_y5I_9s","annotations":[{"segment":[32.83886115444618,179.13978159126364],"label":"Wakeboarding"}]},"ntKbTDQUhDA":{"duration":126.921,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ntKbTDQUhDA","annotations":[{"segment":[5.611035192436147,120.59773612701558],"label":"Making a lemonade"}]},"JNFUZz1bqmg":{"duration":213.37,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JNFUZz1bqmg","annotations":[{"segment":[126.65601945302302,169.26303782355376],"label":"Bungee jumping"}]},"aNsj5OrEGLY":{"duration":137.32,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aNsj5OrEGLY","annotations":[]},"puwMWETdvL0":{"duration":103.585,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=puwMWETdvL0","annotations":[{"segment":[5.0426543174458445,83.90896966722671],"label":"Carving jack-o-lanterns"}]},"SOkS5d8GjZ4":{"duration":20.921,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=SOkS5d8GjZ4","annotations":[{"segment":[0.01,18.669133210608422],"label":"Arm wrestling"}]},"9WmsYbZl1pw":{"duration":237.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9WmsYbZl1pw","annotations":[{"segment":[103.40341645000001,190.23192582],"label":"Making a sandwich"}]},"RkhSR7pz9qc":{"duration":100.566,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=RkhSR7pz9qc","annotations":[{"segment":[0.01,43.458155509828394],"label":"Futsal"},{"segment":[49.57681278377535,55.85235870577223],"label":"Futsal"},{"segment":[65.26567758876756,88.01453155600625],"label":"Futsal"}]},"Zguc8yykcgk":{"duration":14.792,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zguc8yykcgk","annotations":[{"segment":[3.8304590639625586,11.560602355694227],"label":"Putting on shoes"}]},"bKEvJveN6k4":{"duration":95.13,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=bKEvJveN6k4","annotations":[{"segment":[41.953212,89.58516692667706],"label":"Dodgeball"},{"segment":[6.880897560000002,36.67909392000001],"label":"Dodgeball"}]},"BoKEPRjFydk":{"duration":116.77000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BoKEPRjFydk","annotations":[{"segment":[8.013422776911076,88.32977379095163],"label":"Discus throw"}]},"by8QQWbyimw":{"duration":191.38,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=by8QQWbyimw","annotations":[{"segment":[6.267511689547582,189.517615374415],"label":"Tumbling"},{"segment":[100.21891725999998,151.19053377999998],"label":"Tumbling"}]},"mK3keyPMe3o":{"duration":208.61,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mK3keyPMe3o","annotations":[{"segment":[12.16426480967679,123.61597820531541],"label":"Skateboarding"},{"segment":[129.7717414451908,154.8457682380605],"label":"Skateboarding"},{"segment":[155.49563599506985,165.5685862287147],"label":"Skateboarding"},{"segment":[166.21845398572407,184.08981730348106],"label":"Skateboarding"},{"segment":[185.0646189389951,195.13756917263993],"label":"Skateboarding"},{"segment":[199.036775714696,204.56065164927546],"label":"Skateboarding"}]},"_m8WprCWHF4":{"duration":106.84,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_m8WprCWHF4","annotations":[{"segment":[19.333636755070202,21.833675990639623],"label":"Walking the dog"},{"segment":[48.16742260530421,50.667461840873635],"label":"Walking the dog"},{"segment":[73.50115352574103,83.33464118564743],"label":"Walking the dog"}]},"ucEqZtmQS-0":{"duration":78.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ucEqZtmQS-0","annotations":[{"segment":[31.126557198127927,70.12318653],"label":"Shaving"}]},"pO86ZQSyYdA":{"duration":162.68,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pO86ZQSyYdA","annotations":[]},"wRyUFEWHPFM":{"duration":15.465,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wRyUFEWHPFM","annotations":[]},"NfVL1o5HMEo":{"duration":66.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NfVL1o5HMEo","annotations":[{"segment":[15.268965678627145,51.223546021840875],"label":"Using parallel bars"}]},"tt-uDaZQWWc":{"duration":43.12,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tt-uDaZQWWc","annotations":[]},"fWVUEOVUzS4":{"duration":149.4,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=fWVUEOVUzS4","annotations":[{"segment":[10.138457183185286,121.54495220761211],"label":"Playing badminton"}]},"b40ECuGuck8":{"duration":105.791,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b40ECuGuck8","annotations":[{"segment":[8.956595889855025,17.682690905431347],"label":"Throwing darts"},{"segment":[24.59770959702013,32.17130149733166],"label":"Throwing darts"},{"segment":[38.59239028237839,48.30634511103882],"label":"Throwing darts"},{"segment":[69.39381551607079,77.79221271233247],"label":"Throwing darts"},{"segment":[84.87321426996486,96.72977501762843],"label":"Throwing darts"}]},"aFlumCYsZgM":{"duration":54.846,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=aFlumCYsZgM","annotations":[{"segment":[0.01,54.84553287981859],"label":"Having an ice cream"}]},"A6rMOyqUm44":{"duration":9.57,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A6rMOyqUm44","annotations":[]},"5yqHTGQm2B0":{"duration":84.869,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5yqHTGQm2B0","annotations":[{"segment":[7.811649173449932,72.02605319188767],"label":"Powerbocking"}]},"PntiXZ692xM":{"duration":187.339,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=PntiXZ692xM","annotations":[{"segment":[3.9077284823622658,165.17441539825012],"label":"Running a marathon"}]},"tcGO-GHcQIQ":{"duration":124.134,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=tcGO-GHcQIQ","annotations":[{"segment":[11.519515600624025,68.8266856474259],"label":"Sharpening knives"}]},"gOKkjA2yvGo":{"duration":54.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gOKkjA2yvGo","annotations":[{"segment":[4.561688687955682,48.826963887675504],"label":"Spinning"}]},"toS-_4Ik5us":{"duration":113.732,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=toS-_4Ik5us","annotations":[{"segment":[12.94878783151326,106.25101248049921],"label":"Playing blackjack"}]},"MJpfymVwtGc":{"duration":133.96,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MJpfymVwtGc","annotations":[{"segment":[28.212168486739465,96.3393309065696],"label":"Getting a piercing"}]},"FtHP0PNqo2s":{"duration":190.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FtHP0PNqo2s","annotations":[{"segment":[19.42601076,161.62791703432137],"label":"Shaving legs"}]},"tUCGJk6aSeg":{"duration":43.817,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=tUCGJk6aSeg","annotations":[{"segment":[0.2,43.23459126365055],"label":"Playing drums"}]},"MSSb3wPd5hM":{"duration":159.127,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MSSb3wPd5hM","annotations":[{"segment":[0.2,155.36415420560746],"label":"Drum corps"}]},"wIthr5w6RV4":{"duration":76.046,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wIthr5w6RV4","annotations":[{"segment":[19.218949982489097,36.77700282371295],"label":"Curling"},{"segment":[40.92924507800312,44.84421634633385],"label":"Curling"},{"segment":[51.487803953198124,53.03006536193447],"label":"Curling"},{"segment":[65.72406311076443,70.4694828299532],"label":"Curling"}]},"PvB98KAatK8":{"duration":204.963,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PvB98KAatK8","annotations":[{"segment":[24.2257385511553,28.05085593247797],"label":"Wakeboarding"},{"segment":[108.69708072203089,163.20500340587893],"label":"Wakeboarding"},{"segment":[174.99911389505422,179.46175080329684],"label":"Wakeboarding"}]},"sHxzQu_4edY":{"duration":165.26,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=sHxzQu_4edY","annotations":[{"segment":[38.30799336,93.43739495999999],"label":"Horseback riding"}]},"tseeGOVN_eY":{"duration":105.19,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=tseeGOVN_eY","annotations":[{"segment":[3.444469567862715,100.21766218876756],"label":"Washing dishes"}]},"Oc8ACBiwIyE":{"duration":202.2,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Oc8ACBiwIyE","annotations":[{"segment":[0,202.161],"label":"Fixing the roof"}]},"asM7fbK_F-U":{"duration":126.596,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=asM7fbK_F-U","annotations":[{"segment":[7.304238689547582,120.24282215288612],"label":"Playing congas"}]},"6HZ55yit9es":{"duration":80.179,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=6HZ55yit9es","annotations":[]},"3nX5ZwzHftM":{"duration":33.112,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=3nX5ZwzHftM","annotations":[{"segment":[0.01,33.111655],"label":"River tubing"}]},"KxJpfKZbNiI":{"duration":15.395,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=KxJpfKZbNiI","annotations":[{"segment":[0.2,13.003198127925115],"label":"Using the monkey bar"}]},"o7ChwZHkdUc":{"duration":8.43,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o7ChwZHkdUc","annotations":[]},"J6ScF5n_Cug":{"duration":136.19,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=J6ScF5n_Cug","annotations":[{"segment":[4.512503364628907,130.30335160294666],"label":"Playing flauta"}]},"HJZj6z1dyT4":{"duration":55.961,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HJZj6z1dyT4","annotations":[{"segment":[0.01,55.960089],"label":"Table soccer"}]},"EXUKhI7WTqo":{"duration":104.72,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=EXUKhI7WTqo","annotations":[{"segment":[0.2,100.2908174726989],"label":"Cumbia"}]},"jfIcmcE320Q":{"duration":25.659,"subset":"training","resolution":"180x240","url":"https://www.youtube.com/watch?v=jfIcmcE320Q","annotations":[{"segment":[0.2,25.458049886621318],"label":"Hurling"}]},"FBqtQ3HCk48":{"duration":181.023,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FBqtQ3HCk48","annotations":[{"segment":[0.19999999999999998,16.37959098772114],"label":"Carving jack-o-lanterns"},{"segment":[28.523080857928193,148.26353911299307],"label":"Carving jack-o-lanterns"}]},"KyDcuYjDi_Y":{"duration":220.03,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=KyDcuYjDi_Y","annotations":[{"segment":[41.70658855741985,63.332227068674584],"label":"Tumbling"}]},"I9ZeyASdgTk":{"duration":48.948,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=I9ZeyASdgTk","annotations":[{"segment":[35.16904992199688,40.4495007800312],"label":"Welding"}]},"tYV23sjRgt8":{"duration":84.48,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=tYV23sjRgt8","annotations":[{"segment":[41.561685880000006,69.60452184087364],"label":"Mixing drinks"}]},"rnyG80WzNPk":{"duration":231.13,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rnyG80WzNPk","annotations":[]},"B1DNoole3Wo":{"duration":93.41,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B1DNoole3Wo","annotations":[{"segment":[14.718846914196568,17.196276592823715],"label":"Smoking a cigarette"},{"segment":[23.462716368174725,31.477930034321375],"label":"Smoking a cigarette"}]},"Anvhv9FFv1Q":{"duration":227.719,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Anvhv9FFv1Q","annotations":[{"segment":[49.73551742069683,94.85273635881435],"label":"Sharpening knives"},{"segment":[121.49676342589704,213.15221653666146],"label":"Sharpening knives"}]},"pfDGqEJR2-E":{"duration":139.343,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pfDGqEJR2-E","annotations":[]},"RiQdIJNDTo4":{"duration":209.03,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RiQdIJNDTo4","annotations":[{"segment":[7.497605304212169,188.26553768000002],"label":"Playing piano"}]},"NLTd7qCLT7o":{"duration":155.551,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NLTd7qCLT7o","annotations":[{"segment":[0.19999999999999998,77.4112354208454],"label":"Powerbocking"},{"segment":[84.44862045910409,146.32907398595944],"label":"Powerbocking"}]},"ohNtDBYmhX0":{"duration":154.92000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ohNtDBYmhX0","annotations":[{"segment":[9.273690779999999,117.06356423868955],"label":"Skateboarding"}]},"l_R7AbA-d04":{"duration":61.998,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=l_R7AbA-d04","annotations":[{"segment":[24.506252336448597,42.368970404984424],"label":"Curling"},{"segment":[49.124842679127724,60.22377570093458],"label":"Curling"}]},"szW1c2SGdew":{"duration":136.371,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=szW1c2SGdew","annotations":[{"segment":[10.068498173104263,116.06386441882447],"label":"Capoeira"}]},"ItFq_6cIByw":{"duration":27.539,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ItFq_6cIByw","annotations":[{"segment":[7.174712098283932,27.338866213151928],"label":"Playing drums"}]},"dKf46wUnn-s":{"duration":65.876,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dKf46wUnn-s","annotations":[{"segment":[0.01,64.09960358318436],"label":"Croquet"}]},"Paj9y9YMSg0":{"duration":91.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Paj9y9YMSg0","annotations":[{"segment":[0.01,80.39487173166927],"label":"Playing polo"}]},"pTmlOZY0e4c":{"duration":124.668,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=pTmlOZY0e4c","annotations":[{"segment":[10.69693678627145,83.43610693291731],"label":"Rope skipping"},{"segment":[85.769984049922,119.61120224648985],"label":"Rope skipping"}]},"4a0Q-IAqO8U":{"duration":231.596,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4a0Q-IAqO8U","annotations":[{"segment":[62.14427825004156,76.23513203929518],"label":"Swimming"},{"segment":[126.45638015996829,129.70811564979604],"label":"Swimming"}]},"Z2gpcqXWNek":{"duration":153.79,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z2gpcqXWNek","annotations":[]},"JZK-HJ-Tdw4":{"duration":221.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JZK-HJ-Tdw4","annotations":[{"segment":[11.75843057722309,22.825188767550703],"label":"Pole vault"},{"segment":[49.108739469578786,69.16723868954759],"label":"Pole vault"},{"segment":[88.87990171606864,98.56331513260531],"label":"Pole vault"},{"segment":[136.95113260530422,186.40570826833073],"label":"Pole vault"},{"segment":[201.6406911076443,215.4753868954758],"label":"Pole vault"}]},"oSDHYvvYo5M":{"duration":52.083,"subset":"validation","resolution":"408x720","url":"https://www.youtube.com/watch?v=oSDHYvvYo5M","annotations":[{"segment":[0.01,52.082357],"label":"Plastering"}]},"lkSkFmHYdtI":{"duration":21.64,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lkSkFmHYdtI","annotations":[{"segment":[19.664653666146645,19.6727076],"label":"Discus throw"}]},"O62LVI0XNHo":{"duration":171.5,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=O62LVI0XNHo","annotations":[{"segment":[11.772409889592863,163.20840983299198],"label":"Playing bagpipes"}]},"JuS9DLgPQfk":{"duration":86.63,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=JuS9DLgPQfk","annotations":[]},"8xm7fWZ0RmE":{"duration":33.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8xm7fWZ0RmE","annotations":[{"segment":[8.31615942901716,28.053879592823712],"label":"Playing piano"}]},"B67jaG6qKWE":{"duration":155.876,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=B67jaG6qKWE","annotations":[{"segment":[12.160608424336974,149.0890592823713],"label":"Table soccer"}]},"DsnVkCHbEVM":{"duration":65.9,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=DsnVkCHbEVM","annotations":[{"segment":[0.01,65.841],"label":"Playing water polo"}]},"vh55SaEpuws":{"duration":71.495,"subset":"validation","resolution":"422x240","url":"https://www.youtube.com/watch?v=vh55SaEpuws","annotations":[{"segment":[0.01,71.441],"label":"Peeling potatoes"}]},"_HIJu_hNweY":{"duration":150.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_HIJu_hNweY","annotations":[{"segment":[4.9095,87.31304275844504],"label":"Playing badminton"}]},"rmoa-Ffel2k":{"duration":94.552,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=rmoa-Ffel2k","annotations":[{"segment":[28.32124465386779,86.4387975725429],"label":"Elliptical trainer"}]},"bRgvAHL3dJk":{"duration":58.42,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=bRgvAHL3dJk","annotations":[]},"aELu8QS8T54":{"duration":88.306,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aELu8QS8T54","annotations":[{"segment":[0,11.603159711332708],"label":"Building sandcastles"},{"segment":[18.064634789214324,87.07868774871588],"label":"Building sandcastles"}]},"8J_bB1vI0uE":{"duration":161.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8J_bB1vI0uE","annotations":[{"segment":[12.874903216848674,141.1190372199688],"label":"Rock climbing"}]},"0L959X4oxaY":{"duration":59.095,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0L959X4oxaY","annotations":[{"segment":[31.160456758190325,52.364317865834636],"label":"Snow tubing"}]},"Lm7KLQgzQDk":{"duration":107.6,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Lm7KLQgzQDk","annotations":[{"segment":[76.37843516380656,104.07610945397816],"label":"Breakdancing"}]},"sicaCRRCkiY":{"duration":16.045,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sicaCRRCkiY","annotations":[{"segment":[10.22216770670827,15.41437987519501],"label":"Baking cookies"}]},"w7IeqGuuA7Q":{"duration":233.013,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=w7IeqGuuA7Q","annotations":[{"segment":[0.01,24.537170874590085],"label":"Hitting a pinata"},{"segment":[71.79394441083765,77.24664904963547],"label":"Hitting a pinata"},{"segment":[124.13990894329652,143.04261835779556],"label":"Hitting a pinata"},{"segment":[176.48587347575534,179.75749625903404],"label":"Hitting a pinata"}]},"CecqF_uHeOI":{"duration":80.13,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=CecqF_uHeOI","annotations":[]},"tTEAlDsmZrA":{"duration":193.319,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tTEAlDsmZrA","annotations":[{"segment":[23.825833073322933,174.92383775351013],"label":"Trimming branches or hedges"}]},"Q7eK0xT6VSQ":{"duration":28.027,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Q7eK0xT6VSQ","annotations":[{"segment":[4.968588785046729,13.171809968847352],"label":"Changing car wheel"}]},"jv70BvlA-jI":{"duration":78.646,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jv70BvlA-jI","annotations":[]},"dTwH5Fzu4eE":{"duration":36.02,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dTwH5Fzu4eE","annotations":[{"segment":[2.0788198711621937,34.83427828393136],"label":"Ballet"}]},"4dVVaDvCzn0":{"duration":11.007,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4dVVaDvCzn0","annotations":[{"segment":[0.01,10.944],"label":"Playing blackjack"}]},"4VWl4OsFyK0":{"duration":20.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4VWl4OsFyK0","annotations":[]},"LFbW1KVHT8s":{"duration":79.064,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LFbW1KVHT8s","annotations":[{"segment":[17.391600786271454,64.75596037441498],"label":"Wakeboarding"}]},"Exq_WdPgCu0":{"duration":223.26,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Exq_WdPgCu0","annotations":[]},"n_hNYA5kWME":{"duration":192.447,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=n_hNYA5kWME","annotations":[{"segment":[15.036482802169232,81.98761763156114],"label":"Playing rubik cube"},{"segment":[85.29013997789498,192.446984],"label":"Playing rubik cube"}]},"jRnJRqvmZvk":{"duration":115.357,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jRnJRqvmZvk","annotations":[{"segment":[0.01,9.358112642873063],"label":"Arm wrestling"},{"segment":[41.211688369575604,110.13778564118566],"label":"Arm wrestling"}]},"4fEMDQnD4Xg":{"duration":55.705,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4fEMDQnD4Xg","annotations":[{"segment":[0.01,55.70467],"label":"Playing congas"}]},"iwGYaSil0Z8":{"duration":23.29,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=iwGYaSil0Z8","annotations":[]},"xcBJP14YBvg":{"duration":61.0,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xcBJP14YBvg","annotations":[{"segment":[2.85804992199688,39.53635725429017],"label":"Using the pommel horse"}]},"rpjxjU-mLiw":{"duration":21.46,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rpjxjU-mLiw","annotations":[]},"Zsaa3XXIAhk":{"duration":52.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zsaa3XXIAhk","annotations":[{"segment":[20.67558658346334,52.801],"label":"Playing guitarra"}]},"n1sWb2K6hBM":{"duration":167.35,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n1sWb2K6hBM","annotations":[{"segment":[32.73703055070203,145.73828448],"label":"Grooming horse"}]},"6VygM9-XgAk":{"duration":172.827,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6VygM9-XgAk","annotations":[{"segment":[14.821684867394696,26.948517940717633],"label":"Snowboarding"},{"segment":[33.955132605304215,48.23784711388456],"label":"Snowboarding"},{"segment":[92.1639313572543,98.09260530421219],"label":"Snowboarding"},{"segment":[99.70951638065522,134.27052848049922],"label":"Snowboarding"}]},"A49K-WtIzsY":{"duration":52.849,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A49K-WtIzsY","annotations":[{"segment":[0.01,49.17720043869251],"label":"Removing ice from car"}]},"HcQVvWPkogM":{"duration":126.224,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HcQVvWPkogM","annotations":[]},"rF1AqlLjECk":{"duration":182.138,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rF1AqlLjECk","annotations":[{"segment":[14.771001560062402,164.46942121684867],"label":"Playing ice hockey"}]},"w_56T4AZgw0":{"duration":49.668,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=w_56T4AZgw0","annotations":[]},"hgpekgU915k":{"duration":13.654,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=hgpekgU915k","annotations":[{"segment":[1.1289026458658347,3.6849086365054604],"label":"Doing a powerbomb"}]},"yVK-GMdc9UA":{"duration":53.38,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=yVK-GMdc9UA","annotations":[{"segment":[4.497136343213729,53.382674],"label":"Ironing clothes"}]},"UmH4VPH0KG4":{"duration":57.632,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=UmH4VPH0KG4","annotations":[{"segment":[0.2,34.32696099843994],"label":"Welding"}]},"x6Z0xTgWoVI":{"duration":152.207,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x6Z0xTgWoVI","annotations":[{"segment":[0.01,152.206802],"label":"Skiing"}]},"Zc44Ddk2NG8":{"duration":7.059,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zc44Ddk2NG8","annotations":[{"segment":[0.2,6.858866213151927],"label":"Fun sliding down"}]},"CgaWju3yGc4":{"duration":164.86,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=CgaWju3yGc4","annotations":[{"segment":[32.66370219434628,124.08098303999999],"label":"Fixing bicycle"}]},"fs3_MvZJe-8":{"duration":219.03,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fs3_MvZJe-8","annotations":[]},"6l0JqBhldeA":{"duration":126.085,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6l0JqBhldeA","annotations":[{"segment":[3.9339778761218476,125.8839909297052],"label":"Croquet"}]},"4b2_OpAGwW0":{"duration":238.4,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=4b2_OpAGwW0","annotations":[{"segment":[17.108178830554586,168.10645285675372],"label":"Braiding hair"}]},"mTkzorlDa9U":{"duration":184.088,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mTkzorlDa9U","annotations":[]},"agr3ZKq1Iso":{"duration":90.303,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=agr3ZKq1Iso","annotations":[{"segment":[0.2,76.67981863763465],"label":"Riding bumper cars"}]},"8pt2jxlkNpw":{"duration":119.978,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8pt2jxlkNpw","annotations":[]},"OSw73cXwjR4":{"duration":159.71,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=OSw73cXwjR4","annotations":[{"segment":[55.6340663026521,106.91199765990639],"label":"Playing accordion"}]},"TMGG5x-UQ2s":{"duration":90.907,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TMGG5x-UQ2s","annotations":[{"segment":[11.770989560062404,28.080191962558505],"label":"Snow tubing"},{"segment":[44.67303266770671,54.31673495787832],"label":"Snow tubing"},{"segment":[59.56404355694227,79.1350864399376],"label":"Snow tubing"}]},"XSDIekFXkv0":{"duration":78.6,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XSDIekFXkv0","annotations":[{"segment":[8.319126365054602,22.38823712948518],"label":"Plataform diving"},{"segment":[23.489297971918877,34.010546021840874],"label":"Plataform diving"},{"segment":[36.579687987519506,47.1009360374415],"label":"Plataform diving"},{"segment":[48.201996879875196,59.45728549141966],"label":"Plataform diving"},{"segment":[60.55834633385335,64.22854914196569],"label":"Plataform diving"},{"segment":[66.30833073322933,72.42533541341655],"label":"Plataform diving"}]},"ORcQpMrCLTI":{"duration":208.334,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ORcQpMrCLTI","annotations":[{"segment":[10.39980031201248,192.41477691107644],"label":"Rope skipping"}]},"pBLKQ9yCSC0":{"duration":24.06,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=pBLKQ9yCSC0","annotations":[]},"4mlA78hn4mY":{"duration":214.14,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=4mlA78hn4mY","annotations":[{"segment":[4.675672386895476,55.10613884555382],"label":"Playing racquetball"},{"segment":[63.789530421216845,200.71993603744153],"label":"Playing racquetball"}]},"oEfj2MWh5O0":{"duration":75.3,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=oEfj2MWh5O0","annotations":[]},"Lg6QZST92xo":{"duration":52.32,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=Lg6QZST92xo","annotations":[{"segment":[14.397859080000002,50.437436284867395],"label":"Chopping wood"}]},"Tcf9sOzU-7M":{"duration":224.28,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Tcf9sOzU-7M","annotations":[{"segment":[80.81071918876755,207.1178034321373],"label":"Paintball"}]},"N3rFdrfE3q8":{"duration":34.505,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=N3rFdrfE3q8","annotations":[{"segment":[0.01,27.955881435257407],"label":"Fun sliding down"}]},"R3CGHNqunGA":{"duration":182.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R3CGHNqunGA","annotations":[{"segment":[69.56242849713988,73.253496131682],"label":"Skateboarding"},{"segment":[87.73383839027031,91.4249060248124],"label":"Skateboarding"},{"segment":[105.05346344466022,108.74453107920236],"label":"Skateboarding"},{"segment":[126.91594097233275,128.33558237023357],"label":"Skateboarding"}]},"Z4yZr5dIMec":{"duration":160.2,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z4yZr5dIMec","annotations":[{"segment":[53.46550159750391,73.20276620592824],"label":"Playing polo"},{"segment":[110.01803531999998,124.16987987831514],"label":"Playing polo"}]},"4uKoAk5NCkI":{"duration":62.07,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4uKoAk5NCkI","annotations":[]},"kBUDMFgWO9I":{"duration":12.61,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=kBUDMFgWO9I","annotations":[{"segment":[6.776295652106084,8.52061248],"label":"Plataform diving"}]},"5MJmwr1cmnU":{"duration":124.297,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5MJmwr1cmnU","annotations":[{"segment":[23.848375975039005,109.3605678627145],"label":"Surfing"}]},"XxYoSn6NE_4":{"duration":220.962,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=XxYoSn6NE_4","annotations":[{"segment":[17.925080497097433,174.42482141653664],"label":"Braiding hair"}]},"eUCKEnKxG8w":{"duration":167.555,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=eUCKEnKxG8w","annotations":[{"segment":[48.60403432137286,138.2340546021841],"label":"Gargling mouthwash"}]},"JXL7LxJSjzw":{"duration":142.76,"subset":"training","resolution":"432x720","url":"https://www.youtube.com/watch?v=JXL7LxJSjzw","annotations":[{"segment":[8.124808892355695,134.33759360374415],"label":"Chopping wood"}]},"UciHec7xncc":{"duration":18.228,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UciHec7xncc","annotations":[{"segment":[5.981394424366929,18.02766439909297],"label":"Using the monkey bar"}]},"DG9-yffcXS4":{"duration":191.89,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=DG9-yffcXS4","annotations":[{"segment":[14.66491263650546,184.07880655226208],"label":"Fixing the roof"}]},"W5jswt09V3s":{"duration":192.982,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W5jswt09V3s","annotations":[{"segment":[90.6198033826115,187.86298004368174],"label":"Capoeira"}]},"LnIxI3Av3W0":{"duration":103.887,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LnIxI3Av3W0","annotations":[]},"d4Z5KlP8vlE":{"duration":128.616,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d4Z5KlP8vlE","annotations":[]},"BtYKJOmw-aU":{"duration":112.66,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=BtYKJOmw-aU","annotations":[{"segment":[0.01,0.02],"label":"Brushing teeth"}]},"pCQNYPvJu1I":{"duration":83.08,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pCQNYPvJu1I","annotations":[]},"I5setZ8js9I":{"duration":113.5,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I5setZ8js9I","annotations":[{"segment":[82.25583366429817,113.32241563630438],"label":"Surfing"}]},"s0Pw7vKtqpo":{"duration":204.475,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=s0Pw7vKtqpo","annotations":[{"segment":[25.20046649757147,146.09890703655358],"label":"Surfing"},{"segment":[170.9803792823713,183.74010907956318],"label":"Surfing"}]},"Y1kCMRoDjWk":{"duration":164.026,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y1kCMRoDjWk","annotations":[{"segment":[29.41578003120125,36.83367238689548],"label":"Trimming branches or hedges"},{"segment":[45.0189329173167,67.01682059282372],"label":"Trimming branches or hedges"},{"segment":[104.11898127925117,152.7248939157566],"label":"Trimming branches or hedges"}]},"cRBnLFujD3g":{"duration":111.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cRBnLFujD3g","annotations":[{"segment":[39.602881853354134,52.630145620904834],"label":"Breakdancing"},{"segment":[60.96759443213729,64.09413773634945],"label":"Breakdancing"},{"segment":[68.43655899219969,77.29509835413417],"label":"Breakdancing"}]},"85RJm2qymRY":{"duration":71.472,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=85RJm2qymRY","annotations":[{"segment":[33.1152775975039,59.986597129485176],"label":"Changing car wheel"}]},"fxyqt18d-jo":{"duration":105.02000000000001,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=fxyqt18d-jo","annotations":[{"segment":[42.615117784711394,66.33562480499221],"label":"Making a sandwich"}]},"1Se1ZqCSQvk":{"duration":124.134,"subset":"validation","resolution":"628x360","url":"https://www.youtube.com/watch?v=1Se1ZqCSQvk","annotations":[{"segment":[42.973450858034326,60.007971918876756],"label":"Sharpening knives"},{"segment":[75.68747425897035,123.881],"label":"Sharpening knives"}]},"MwkRK7A46P4":{"duration":187.547,"subset":"training","resolution":"448x252","url":"https://www.youtube.com/watch?v=MwkRK7A46P4","annotations":[{"segment":[46.15657356192737,95.81870967286187],"label":"Futsal"},{"segment":[113.9307828427321,134.6720279243577],"label":"Futsal"},{"segment":[146.9414968458827,166.5142198598131],"label":"Futsal"}]},"mB-hMTdtq38":{"duration":92.811,"subset":"testing","resolution":"400x224","url":"https://www.youtube.com/watch?v=mB-hMTdtq38","annotations":[]},"blOgPoTkhks":{"duration":17.39,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=blOgPoTkhks","annotations":[{"segment":[0.9224565460218409,17.309625775351012],"label":"Using uneven bars"}]},"ZJWLTokAJbo":{"duration":63.855,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ZJWLTokAJbo","annotations":[{"segment":[0.2,9.742146417445483],"label":"Elliptical trainer"},{"segment":[13.718532710280373,50.10246728971962],"label":"Elliptical trainer"},{"segment":[54.675311526479746,56.166456386292836],"label":"Elliptical trainer"}]},"dEpUIpmBvow":{"duration":200.69,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=dEpUIpmBvow","annotations":[]},"jHyWtrhNTTk":{"duration":43.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jHyWtrhNTTk","annotations":[{"segment":[9.314408736349455,18.765794071762873],"label":"High jump"},{"segment":[24.58729953198128,35.066009360374416],"label":"High jump"}]},"nCzB1iXKYk4":{"duration":208.68,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nCzB1iXKYk4","annotations":[{"segment":[10.70826833073323,177.82215288611545],"label":"Cumbia"}]},"CuLC5G7yWxA":{"duration":138.508,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=CuLC5G7yWxA","annotations":[]},"4x3dgSgXQ38":{"duration":70.171,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=4x3dgSgXQ38","annotations":[{"segment":[0.01,70.17070294784581],"label":"Fun sliding down"}]},"24vWSTx6N5M":{"duration":20.225,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=24vWSTx6N5M","annotations":[{"segment":[0.2,19.994],"label":"Swinging at the playground"}]},"QHTFsw_1Pi8":{"duration":26.425,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=QHTFsw_1Pi8","annotations":[]},"ZW3SKczQzJU":{"duration":136.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZW3SKczQzJU","annotations":[{"segment":[49.9274301950078,83.70811700780031],"label":"Brushing teeth"},{"segment":[92.41886014820594,94.54343164586584],"label":"Brushing teeth"}]},"Pcro3S-4EnQ":{"duration":150.28,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Pcro3S-4EnQ","annotations":[{"segment":[4.923354877052225,20.865646859888],"label":"Putting on makeup"},{"segment":[26.023447207276046,28.602347380970066],"label":"Putting on makeup"},{"segment":[64.9413952830222,144.6528537675507],"label":"Putting on makeup"}]},"AQmWv31K2GI":{"duration":213.717,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AQmWv31K2GI","annotations":[]},"yr_ZOq9ZB9E":{"duration":196.186,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yr_ZOq9ZB9E","annotations":[{"segment":[24.178855458980262,102.53058960453657],"label":"Longboarding"}]},"6r3qgd1y5KE":{"duration":152.184,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=6r3qgd1y5KE","annotations":[{"segment":[55.14578656573113,125.91227267287486],"label":"Hanging wallpaper"}]},"HbF6UM4jUDk":{"duration":231.596,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HbF6UM4jUDk","annotations":[{"segment":[174.55738940809968,191.14755451713395],"label":"Hula hoop"}]},"QooDDNWaQjU":{"duration":62.439,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QooDDNWaQjU","annotations":[{"segment":[0.01,62.261],"label":"Disc dog"}]},"6yD5jZn6SEs":{"duration":13.21,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=6yD5jZn6SEs","annotations":[{"segment":[0,13.212154],"label":"Using uneven bars"}]},"8cbHNUbu3Tk":{"duration":76.65,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=8cbHNUbu3Tk","annotations":[{"segment":[5.73971367017946,76.44909297052153],"label":"Baton twirling"}]},"Q8iXOTXdy2Y":{"duration":137.95,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Q8iXOTXdy2Y","annotations":[{"segment":[11.617244929797193,137.881],"label":"Shot put"}]},"45AIj4-_RBw":{"duration":196.39,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=45AIj4-_RBw","annotations":[{"segment":[11.94908691575663,188.734295900156],"label":"Playing piano"}]},"fykq7xuc3zk":{"duration":6.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fykq7xuc3zk","annotations":[{"segment":[0.08509153198127925,3.5360258845553822],"label":"Springboard diving"}]},"6G8m-vsGTA8":{"duration":153.86,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=6G8m-vsGTA8","annotations":[]},"8j75QYV-iCg":{"duration":154.018,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8j75QYV-iCg","annotations":[{"segment":[0.2,153.76100000000002],"label":"Baton twirling"}]},"g5BPyMdlurg":{"duration":154.95,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=g5BPyMdlurg","annotations":[{"segment":[18.612948848673945,32.149638920436814],"label":"Tumbling"},{"segment":[37.467624305772226,42.302156474258965],"label":"Tumbling"},{"segment":[46.16978220904836,49.553954726989076],"label":"Tumbling"},{"segment":[68.6503567925117,75.1769752199688],"label":"Tumbling"},{"segment":[84.84603955694229,92.33956441809671],"label":"Tumbling"},{"segment":[97.65754980343213,102.97553518876755],"label":"Tumbling"},{"segment":[108.05179396567861,114.57841239313572],"label":"Tumbling"},{"segment":[134.6417208923557,142.13524575351013],"label":"Tumbling"},{"segment":[148.90359078939156,153.7381229578783],"label":"Tumbling"}]},"kt3hzGla8r4":{"duration":226.79,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=kt3hzGla8r4","annotations":[{"segment":[0.01,4.599470923054609],"label":"Playing guitarra"},{"segment":[6.01469274553295,47.056125597404844],"label":"Playing guitarra"},{"segment":[61.91595473342743,154.61298410575878],"label":"Playing guitarra"}]},"QSV7f5XHohE":{"duration":223.747,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=QSV7f5XHohE","annotations":[{"segment":[33.50976344359897,208.7379014507519],"label":"Making a cake"}]},"rWfi4xN3XQI":{"duration":225.21,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=rWfi4xN3XQI","annotations":[{"segment":[35.8296879875195,88.87167706708269],"label":"Grooming horse"},{"segment":[106.4352496099844,130.32170826833072],"label":"Grooming horse"},{"segment":[161.5848673946958,212.16795631825272],"label":"Grooming horse"}]},"Ihmu18WVMpk":{"duration":182.37,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ihmu18WVMpk","annotations":[{"segment":[0.01,178.10177337705753],"label":"Futsal"}]},"TLQFjTXZ58E":{"duration":127.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TLQFjTXZ58E","annotations":[{"segment":[93.73446645865835,112.70097191887676],"label":"Mowing the lawn"}]},"d6PdD2O3Yrs":{"duration":12.75,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=d6PdD2O3Yrs","annotations":[]},"ZKP5J7OKEvI":{"duration":173.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZKP5J7OKEvI","annotations":[{"segment":[7.862026276969446,161.57819506396257],"label":"Tango"}]},"EXxckPa76vc":{"duration":70.148,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EXxckPa76vc","annotations":[{"segment":[0.2,64.94986895475819],"label":"Table soccer"}]},"AffS41W1Jgg":{"duration":58.213,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=AffS41W1Jgg","annotations":[{"segment":[22.81419990718553,32.139051301320194],"label":"Hanging wallpaper"},{"segment":[40.55857777876965,48.52544154237774],"label":"Hanging wallpaper"}]},"38ZxXyECPPU":{"duration":112.176,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=38ZxXyECPPU","annotations":[{"segment":[5.075027186121458,13.825074058744661],"label":"Capoeira"},{"segment":[22.575120931367866,28.52515280475165],"label":"Capoeira"},{"segment":[35.70019124030267,36.75019686501746],"label":"Capoeira"},{"segment":[41.300221238781525,43.75023436311602],"label":"Capoeira"},{"segment":[47.77525592452269,52.150278474258975],"label":"Capoeira"},{"segment":[56.00029903276131,63.700340149766],"label":"Capoeira"},{"segment":[71.40038126677067,74.02539528393136],"label":"Capoeira"},{"segment":[82.0754382698908,98.17552424180968],"label":"Capoeira"}]},"MO8Dfz2Lp3w":{"duration":129.196,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MO8Dfz2Lp3w","annotations":[{"segment":[0.01,129.19521541950112],"label":"Playing beach volleyball"}]},"4L0AI0f-cMs":{"duration":232.717,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=4L0AI0f-cMs","annotations":[{"segment":[45.382410296411855,202.58707956318253],"label":"Making a cake"}]},"pVehZjbud_M":{"duration":97.06,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=pVehZjbud_M","annotations":[{"segment":[10.900081123244929,97.041],"label":"Brushing hair"}]},"VgubvtFkE8A":{"duration":16.09,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VgubvtFkE8A","annotations":[]},"jl10JmELMqY":{"duration":208.005,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=jl10JmELMqY","annotations":[{"segment":[12.652853354134166,187.19734321372857],"label":"Knitting"}]},"jcQy1x8lDaQ":{"duration":128.825,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=jcQy1x8lDaQ","annotations":[{"segment":[10.512729901144438,125.47063177030331],"label":"Playing ten pins"}]},"Hujj6Q1Et3k":{"duration":136.255,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Hujj6Q1Et3k","annotations":[{"segment":[0.2,102.4013135725429],"label":"Playing congas"}]},"dlrp1gteCz4":{"duration":59.79,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dlrp1gteCz4","annotations":[]},"qmOYKPkURiw":{"duration":34.16,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=qmOYKPkURiw","annotations":[{"segment":[0,34.156552],"label":"Playing bagpipes"}]},"S3i3cTtxpi8":{"duration":139.738,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=S3i3cTtxpi8","annotations":[]},"g4vyzwiJYWw":{"duration":151.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=g4vyzwiJYWw","annotations":[{"segment":[88.65545211856472,115.29924490951637],"label":"Washing face"}]},"4IRb308UjTA":{"duration":60.814,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4IRb308UjTA","annotations":[{"segment":[13.376975976599063,51.041227485179405],"label":"Changing car wheel"}]},"riuJrZqkYYU":{"duration":74.33,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=riuJrZqkYYU","annotations":[{"segment":[0.01,73.51541313353214],"label":"Dodgeball"}]},"-KbDXeEoQ1E":{"duration":207.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-KbDXeEoQ1E","annotations":[{"segment":[14.21584399375975,16.800542901716067],"label":"Horseback riding"},{"segment":[49.43236661466459,102.41869422776911],"label":"Horseback riding"},{"segment":[105.6495678627145,115.34218876755068],"label":"Horseback riding"},{"segment":[160.89750702028078,164.77455538221528],"label":"Horseback riding"}]},"Amgt0yzQido":{"duration":76.742,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Amgt0yzQido","annotations":[{"segment":[3.140531040002092,76.741971],"label":"Rope skipping"}]},"vgUSEkvJRlI":{"duration":207.657,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=vgUSEkvJRlI","annotations":[{"segment":[120.80370202808112,144.1223790951638],"label":"Doing fencing"},{"segment":[149.3043073322933,175.55473010920437],"label":"Doing fencing"}]},"CaDoaqs_5jE":{"duration":222.982,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=CaDoaqs_5jE","annotations":[{"segment":[18.436825113820877,40.00443185074342],"label":"Ice fishing"},{"segment":[50.092505969626544,81.40032220064313],"label":"Ice fishing"},{"segment":[102.62006431277659,165.23569641185648],"label":"Ice fishing"},{"segment":[190.62981396567864,217.41539001560062],"label":"Ice fishing"}]},"p-vfyM7ew04":{"duration":226.906,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=p-vfyM7ew04","annotations":[{"segment":[0.01,226.841],"label":"Waxing skis"}]},"xfOs5nRsSuQ":{"duration":135.72,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=xfOs5nRsSuQ","annotations":[{"segment":[49.01611073966768,88.18665279944531],"label":"Playing field hockey"}]},"kU2FVf0ldx4":{"duration":27.725,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kU2FVf0ldx4","annotations":[{"segment":[0.01,27.724625850340136],"label":"Swinging at the playground"}]},"hQ0Tjmm44lQ":{"duration":198.694,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=hQ0Tjmm44lQ","annotations":[]},"cGoj8xGxrG0":{"duration":44.443,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=cGoj8xGxrG0","annotations":[{"segment":[8.741879233256308,32.602229700546026],"label":"Kneeling"}]},"0uOMJSUza68":{"duration":108.46000000000001,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0uOMJSUza68","annotations":[{"segment":[72.73171606864275,108.421],"label":"Removing curlers"}]},"5JCqKshcfHE":{"duration":18.3,"subset":"validation","resolution":"1216x720","url":"https://www.youtube.com/watch?v=5JCqKshcfHE","annotations":[{"segment":[0.8114274570982839,16.641380655226207],"label":"Shaving legs"}]},"V9nOM1VWdnc":{"duration":126.247,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V9nOM1VWdnc","annotations":[{"segment":[12.801947071080122,121.32306625897036],"label":"Making a lemonade"}]},"kShrO0yutUQ":{"duration":223.7,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kShrO0yutUQ","annotations":[{"segment":[35.5967336723869,205.90267516380655],"label":"Fixing bicycle"}]},"hYBctolxeqQ":{"duration":130.636,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hYBctolxeqQ","annotations":[{"segment":[0.2,125.70705909797822],"label":"Playing drums"}]},"iJVJ1G4TlqQ":{"duration":179.82,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=iJVJ1G4TlqQ","annotations":[]},"6TgVNz6PATk":{"duration":35.016,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6TgVNz6PATk","annotations":[{"segment":[0.2,32.91047352024922],"label":"Snow tubing"}]},"zkLld95_bhg":{"duration":112.34,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zkLld95_bhg","annotations":[{"segment":[4.729464898595944,13.487733229329173],"label":"Sailing"},{"segment":[16.115213728549143,31.00426989079563],"label":"Sailing"},{"segment":[32.75592355694228,76.37209984399377],"label":"Sailing"},{"segment":[77.59825741029641,105.44955070202809],"label":"Sailing"}]},"KX-vl2o8U1w":{"duration":180.582,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KX-vl2o8U1w","annotations":[{"segment":[7.4655414407052465,18.170846148131638],"label":"Playing drums"},{"segment":[24.086935591709384,36.76427011366169],"label":"Playing drums"},{"segment":[0.2,180.3815873015873],"label":"Playing drums"},{"segment":[86.34673402174181,87.75532674640318],"label":"Playing drums"},{"segment":[115.36374414976599,117.89921020436817],"label":"Playing drums"},{"segment":[142.69044197971917,150.86027972386896],"label":"Playing drums"}]},"PbZIYJxoWdY":{"duration":158.361,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PbZIYJxoWdY","annotations":[{"segment":[13.660242432318,57.05740131082267],"label":"Wakeboarding"},{"segment":[91.58943869404777,138.9377751426459],"label":"Wakeboarding"}]},"1qKXZ9fThTg":{"duration":196.19,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=1qKXZ9fThTg","annotations":[{"segment":[68.3868159126365,184.96568330733228],"label":"Doing nails"}]},"OM58jhy61Mc":{"duration":27.7,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=OM58jhy61Mc","annotations":[{"segment":[6.115052521060843,18.993396346333853],"label":"Smoking hookah"}]},"ck05xSh9-ig":{"duration":22.454,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ck05xSh9-ig","annotations":[{"segment":[2.019684867394696,22.321],"label":"Hand car wash"}]},"EYgfoKt1zrw":{"duration":161.77,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EYgfoKt1zrw","annotations":[]},"pF7z3Z_JSGk":{"duration":119.351,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pF7z3Z_JSGk","annotations":[{"segment":[0.01,118.12317127899354],"label":"Clipping cat claws"}]},"n2fFj1mBjhQ":{"duration":16.65,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=n2fFj1mBjhQ","annotations":[{"segment":[1.1168109594383775,13.405802],"label":"Playing lacrosse"}]},"yCPeF59MOEk":{"duration":119.955,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=yCPeF59MOEk","annotations":[{"segment":[0.01,117.00703206072419],"label":"Hitting a pinata"}]},"3K_8CdJS9lE":{"duration":223.562,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=3K_8CdJS9lE","annotations":[{"segment":[115.23182398753895,223.30100000000002],"label":"Hitting a pinata"}]},"NKblxYCeetg":{"duration":198.183,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NKblxYCeetg","annotations":[{"segment":[23.49253666146646,180.21248517940717],"label":"Snowboarding"}]},"nTkMD63Wj14":{"duration":65.5,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nTkMD63Wj14","annotations":[{"segment":[32.45608147,56.36598653],"label":"Shoveling snow"}]},"YtKUxxMo6Cs":{"duration":66.93,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YtKUxxMo6Cs","annotations":[{"segment":[0.2,63.68961214953272],"label":"Wakeboarding"}]},"Su6aFxSeeps":{"duration":107.811,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Su6aFxSeeps","annotations":[]},"uaBTWbu0jps":{"duration":40.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=uaBTWbu0jps","annotations":[{"segment":[25.317496879875197,33.9959976599064],"label":"Hammer throw"}]},"7DDKcs7C-4A":{"duration":34.16,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=7DDKcs7C-4A","annotations":[]},"umi5d_a6bfc":{"duration":61.46,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=umi5d_a6bfc","annotations":[{"segment":[11.794034406274212,28.382391741928185],"label":"Fixing bicycle"},{"segment":[31.450758416731226,54.65528139492927],"label":"Fixing bicycle"}]},"tF8Nv-QxbIU":{"duration":90.74,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tF8Nv-QxbIU","annotations":[]},"3_Gha8keu-A":{"duration":212.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3_Gha8keu-A","annotations":[{"segment":[11.945408004075267,210.37190762732553],"label":"Zumba"}]},"qGLRri_c6n8":{"duration":105.117,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qGLRri_c6n8","annotations":[]},"dsgQVrJi71g":{"duration":123.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dsgQVrJi71g","annotations":[{"segment":[32.27262125429017,45.71954677691108],"label":"Mowing the lawn"},{"segment":[63.96894570046802,67.23462761310452],"label":"Mowing the lawn"},{"segment":[72.61339782215289,81.83414675195009],"label":"Mowing the lawn"},{"segment":[92.20748929797192,120.44603289547582],"label":"Mowing the lawn"}]},"vRNcq6nOk0E":{"duration":167.51,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vRNcq6nOk0E","annotations":[{"segment":[7.93245896513514,166.05975803055568],"label":"Rock-paper-scissors"}]},"SX075xQ0j6c":{"duration":44.374,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=SX075xQ0j6c","annotations":[{"segment":[5.19188751950078,44.373333333333335],"label":"Knitting"}]},"6JZVpl2S2Mg":{"duration":97.246,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=6JZVpl2S2Mg","annotations":[]},"EM0N4dzewIg":{"duration":206.542,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EM0N4dzewIg","annotations":[{"segment":[53.97134526197375,127.11460122894711],"label":"Assembling bicycle"}]},"8Yi308KEGw0":{"duration":66.11,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=8Yi308KEGw0","annotations":[]},"EQPiYEvFmSo":{"duration":177.24,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=EQPiYEvFmSo","annotations":[{"segment":[14.79286785023401,166.59257719188767],"label":"Cheerleading"}]},"rQeJGZ0SsIc":{"duration":70.775,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rQeJGZ0SsIc","annotations":[]},"JeSxkw4ed-I":{"duration":45.05,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=JeSxkw4ed-I","annotations":[{"segment":[1.3352379235569425,45.04671201814059],"label":"Bathing dog"}]},"RJEqnUPQylM":{"duration":196.835,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=RJEqnUPQylM","annotations":[]},"sCzauf2u4dc":{"duration":116.53999999999999,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=sCzauf2u4dc","annotations":[{"segment":[0,0],"label":"Grooming horse"}]},"4UdxT6Zmero":{"duration":98.221,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4UdxT6Zmero","annotations":[]},"4_wfCFTnExI":{"duration":177.703,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4_wfCFTnExI","annotations":[{"segment":[4.705500349672579,163.58533568567614],"label":"Playing beach volleyball"}]},"xXRKrdnuZBQ":{"duration":96.178,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=xXRKrdnuZBQ","annotations":[{"segment":[35.709901196047845,44.412314092563705],"label":"Hurling"},{"segment":[49.21364534581382,62.56734789391575],"label":"Hurling"}]},"nYlAXMmPWw0":{"duration":177.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nYlAXMmPWw0","annotations":[{"segment":[2.497648444713299,177.61055450858035],"label":"Zumba"}]},"0DEF7Mp7ZP4":{"duration":142.339,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0DEF7Mp7ZP4","annotations":[{"segment":[0.01,134.3090561622465],"label":"Playing blackjack"}]},"t19uSLVQGWA":{"duration":63.182,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t19uSLVQGWA","annotations":[{"segment":[1.182804926677067,52.33911800546022],"label":"Playing drums"}]},"exhsUZg_xQA":{"duration":14.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=exhsUZg_xQA","annotations":[{"segment":[3.9743260530421214,13.508365054602184],"label":"Tumbling"}]},"CI6cPLeVCTk":{"duration":8.987,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=CI6cPLeVCTk","annotations":[{"segment":[0.2,8.62276286140385],"label":"Swinging at the playground"}]},"SfYJTxMRKDE":{"duration":230.064,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SfYJTxMRKDE","annotations":[{"segment":[69.64516316347924,227.29801051555168],"label":"River tubing"}]},"lKlAe8dKOyk":{"duration":73.78999999999999,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=lKlAe8dKOyk","annotations":[]},"cjUz6gVQPEs":{"duration":89.07,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cjUz6gVQPEs","annotations":[{"segment":[6.250928237129485,63.8983775351014],"label":"Using parallel bars"}]},"qVHazdU4_vY":{"duration":164.93,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=qVHazdU4_vY","annotations":[{"segment":[53.69988429,149.06379325039],"label":"Breakdancing"}]},"dWZEniTrr_Q":{"duration":46.86,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dWZEniTrr_Q","annotations":[]},"9WOvWFdA7lY":{"duration":172.46,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9WOvWFdA7lY","annotations":[{"segment":[27.831327613104527,75.15802964118565],"label":"Getting a piercing"},{"segment":[84.83849141965679,114.68658190327614],"label":"Getting a piercing"}]},"VFOjLDa5VtA":{"duration":48.76,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=VFOjLDa5VtA","annotations":[{"segment":[0.6401300007800312,46.804799468798755],"label":"Playing badminton"}]},"j0MefCTx3p0":{"duration":158.709,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=j0MefCTx3p0","annotations":[]},"Ta_Kf0dCd3U":{"duration":157.06,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ta_Kf0dCd3U","annotations":[{"segment":[14.701384953357318,86.24812505969626],"label":"Laying tile"},{"segment":[103.64476392116909,124.4717259384253],"label":"Laying tile"}]},"K-rEA43wOS0":{"duration":152.022,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K-rEA43wOS0","annotations":[{"segment":[19.44729500156006,110.28039238689547],"label":"Beer pong"}]},"G0PzY40uZoE":{"duration":101.216,"subset":"testing","resolution":"854x468","url":"https://www.youtube.com/watch?v=G0PzY40uZoE","annotations":[]},"o_Davs3OrOw":{"duration":29.792,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o_Davs3OrOw","annotations":[{"segment":[0.01,29.337151449898087],"label":"Swimming"}]},"XWG_-4VMTcA":{"duration":55.546,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XWG_-4VMTcA","annotations":[{"segment":[6.845146645865834,9.968042121684869],"label":"Doing a powerbomb"},{"segment":[20.369477379095166,26.090266770670826],"label":"Doing a powerbomb"},{"segment":[36.49170202808112,40.73895475819033],"label":"Doing a powerbomb"}]},"CTxuJBTp_CQ":{"duration":179.932,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=CTxuJBTp_CQ","annotations":[{"segment":[0.19999999999999998,179.73142857142858],"label":"Decorating the Christmas tree"}]},"lxtG6PbeaqY":{"duration":133.79,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lxtG6PbeaqY","annotations":[{"segment":[11.162943262090485,103.17898024492979],"label":"Dodgeball"}]},"pxfc6AIPrig":{"duration":94.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pxfc6AIPrig","annotations":[{"segment":[8.424584286390013,86.97380253872646],"label":"Windsurfing"}]},"RuBLAghI1vE":{"duration":132.261,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RuBLAghI1vE","annotations":[{"segment":[0,0],"label":"Curling"}]},"o_JAjYZDs9Y":{"duration":16.045,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o_JAjYZDs9Y","annotations":[{"segment":[0.2,15.84498866213152],"label":"Hurling"}]},"LmioUbGNv04":{"duration":41.54,"subset":"validation","resolution":"904x720","url":"https://www.youtube.com/watch?v=LmioUbGNv04","annotations":[{"segment":[0.01,41.54049886621315],"label":"Doing karate"}]},"iODCh_DsGak":{"duration":171.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iODCh_DsGak","annotations":[{"segment":[13.782744382995322,147.86342274960998],"label":"Tango"}]},"CBN0dqyWB7w":{"duration":143.128,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CBN0dqyWB7w","annotations":[{"segment":[0.2,0.4465041497659907],"label":"Changing car wheel"}]},"a_tqFOElJxU":{"duration":95.75999999999999,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=a_tqFOElJxU","annotations":[]},"zQd3B8WBXgE":{"duration":115.102,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=zQd3B8WBXgE","annotations":[]},"1xBdquA6dx8":{"duration":19.51,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1xBdquA6dx8","annotations":[]},"orcF8QNkg9g":{"duration":121.394,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=orcF8QNkg9g","annotations":[]},"IZx-EMbylmM":{"duration":134.1,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IZx-EMbylmM","annotations":[{"segment":[72.98020124804992,110.60391419656786],"label":"Drinking coffee"}]},"0T1FKHx3yOk":{"duration":237.494,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0T1FKHx3yOk","annotations":[{"segment":[22.899853090406715,46.16906437678752],"label":"Sumo"}]},"unLrTQt07kI":{"duration":43.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=unLrTQt07kI","annotations":[{"segment":[4.013646943057722,34.33897940171607],"label":"Washing dishes"}]},"e_E9Kv2odGM":{"duration":76.65,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=e_E9Kv2odGM","annotations":[]},"weeD4A7gl2A":{"duration":27.515,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=weeD4A7gl2A","annotations":[{"segment":[0.01,27.515646258503402],"label":"Hand car wash"}]},"ufBz1xfqQoM":{"duration":180.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ufBz1xfqQoM","annotations":[{"segment":[20.22896353198128,61.52976407644306],"label":"Tango"},{"segment":[96.08757677691108,179.893967],"label":"Tango"}]},"-erT3ckPkAg":{"duration":50.109,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=-erT3ckPkAg","annotations":[{"segment":[0,2.1888339095163807],"label":"Hitting a pinata"},{"segment":[4.143149900156006,5.6284300530421225],"label":"Hitting a pinata"},{"segment":[7.9736092418096725,9.380716755070203],"label":"Hitting a pinata"},{"segment":[14.696456249609984,16.572599600624024],"label":"Hitting a pinata"},{"segment":[20.246713663026522,21.966511734789393],"label":"Hitting a pinata"},{"segment":[22.51372021216849,30.409156814352574],"label":"Hitting a pinata"},{"segment":[32.3634728049922,34.00509823712949],"label":"Hitting a pinata"}]},"37Q3so6ERxs":{"duration":8.22,"subset":"training","resolution":"632x480","url":"https://www.youtube.com/watch?v=37Q3so6ERxs","annotations":[{"segment":[0.09344773010920436,7.706322809672386],"label":"Drinking beer"}]},"B0jASjqZl7s":{"duration":157.618,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=B0jASjqZl7s","annotations":[{"segment":[14.999438645865833,107.94677976287052],"label":"Snow tubing"},{"segment":[123.68389571918877,156.87937468954757],"label":"Snow tubing"}]},"A7oh6l1AIvs":{"duration":89.165,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A7oh6l1AIvs","annotations":[{"segment":[0,82.0302496099844],"label":"Using the rowing machine"}]},"a5WhYgyH_C0":{"duration":142.22,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=a5WhYgyH_C0","annotations":[{"segment":[90.36328393135724,114.53407644305771],"label":"Vacuuming floor"}]},"Mx6Gt14tnmY":{"duration":141.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mx6Gt14tnmY","annotations":[{"segment":[64.21861130291869,79.59911848360406],"label":"Doing nails"},{"segment":[104.64737303500591,141.061223],"label":"Doing nails"}]},"FlLDPameKGM":{"duration":10.08,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FlLDPameKGM","annotations":[{"segment":[0.01,7.144457098283932],"label":"Brushing hair"}]},"mWaGZtvQd_E":{"duration":24.22,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=mWaGZtvQd_E","annotations":[]},"mgEX5A1o2jg":{"duration":121.441,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mgEX5A1o2jg","annotations":[{"segment":[3.3154545229428227,112.06236287546741],"label":"Assembling bicycle"}]},"It9tfX8Ux2o":{"duration":165.187,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=It9tfX8Ux2o","annotations":[]},"prqwtY9cn6s":{"duration":100.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=prqwtY9cn6s","annotations":[{"segment":[0.01,86.46181045241809],"label":"Playing piano"}]},"boqfpo3YAps":{"duration":222.239,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=boqfpo3YAps","annotations":[{"segment":[56.04417757009345,137.70118068535825],"label":"Running a marathon"}]},"2UhbDOd43zU":{"duration":137.068,"subset":"testing","resolution":"293x240","url":"https://www.youtube.com/watch?v=2UhbDOd43zU","annotations":[]},"xOGOd2UJaMY":{"duration":191.379,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xOGOd2UJaMY","annotations":[{"segment":[16.71952653344229,138.53321984852184],"label":"Capoeira"},{"segment":[144.2059161903276,188.9903622121685],"label":"Capoeira"}]},"CEnA3D5WX1I":{"duration":167.625,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CEnA3D5WX1I","annotations":[]},"kpOAGEYHts8":{"duration":142.594,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=kpOAGEYHts8","annotations":[]},"T5CMoNMZ-lo":{"duration":132.63,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T5CMoNMZ-lo","annotations":[]},"Ws12Dt0Ny5Q":{"duration":219.22,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=Ws12Dt0Ny5Q","annotations":[]},"T9gKHEOvRKk":{"duration":79.134,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T9gKHEOvRKk","annotations":[{"segment":[14.567500942277691,36.295298957878316],"label":"Applying sunscreen"},{"segment":[45.06049020280811,52.961507663026524],"label":"Applying sunscreen"}]},"lJRwUE12drE":{"duration":87.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lJRwUE12drE","annotations":[{"segment":[15.759625700000003,46.580871699999996],"label":"Using parallel bars"}]},"ggyGuKFjdxk":{"duration":171.132,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ggyGuKFjdxk","annotations":[{"segment":[3.571894888980188,161.37499440299888],"label":"Making an omelette"}]},"qZp8DmGpH6Y":{"duration":63.786,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qZp8DmGpH6Y","annotations":[]},"OmIcycXGkG8":{"duration":162.26,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OmIcycXGkG8","annotations":[{"segment":[8.605606864274572,80.74084087363495],"label":"Ping-pong"},{"segment":[98.20516068642746,135.41175507020282],"label":"Ping-pong"},{"segment":[145.28289235569423,159.20372698907957],"label":"Ping-pong"}]},"mEsW__Np9Xo":{"duration":54.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mEsW__Np9Xo","annotations":[{"segment":[0.8440326521060844,29.794352619344775],"label":"Playing polo"}]},"kMsWDe0V1Xg":{"duration":190.85,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kMsWDe0V1Xg","annotations":[{"segment":[16.220339313572545,181.9951833073323],"label":"Playing saxophone"}]},"8wB0BOjuyes":{"duration":16.23,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=8wB0BOjuyes","annotations":[{"segment":[6.861232500000001,16.183333],"label":"Playing polo"},{"segment":[0.23984658892355698,5.6467416],"label":"Playing polo"}]},"Lb43_7s9t7E":{"duration":128.523,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lb43_7s9t7E","annotations":[{"segment":[0,128.52244897959184],"label":"Capoeira"}]},"Zxddl4l5HUY":{"duration":155.203,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Zxddl4l5HUY","annotations":[]},"6Kp_fvkZWTE":{"duration":64.13,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6Kp_fvkZWTE","annotations":[{"segment":[5.950928237129485,61.45958658346334],"label":"High jump"}]},"sqcJOpPrexQ":{"duration":44.397,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sqcJOpPrexQ","annotations":[{"segment":[10.352808112324492,34.509360374414975],"label":"Roof shingle removal"}]},"_MR8G1jwM4o":{"duration":201.55,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=_MR8G1jwM4o","annotations":[{"segment":[45.59225416635713,48.10768855694228],"label":"Elliptical trainer"},{"segment":[83.6382036349454,93.69994241809673],"label":"Elliptical trainer"},{"segment":[122.62744141965679,133.31803887675505],"label":"Elliptical trainer"},{"segment":[150.61165241029641,154.38480445397815],"label":"Elliptical trainer"},{"segment":[167.2764072698908,178.59586340093603],"label":"Elliptical trainer"}]},"MLU2wreEGIg":{"duration":19.877,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=MLU2wreEGIg","annotations":[{"segment":[0.20000000000000004,17.121432137285492],"label":"Using the monkey bar"}]},"fkUnZB6NTT8":{"duration":159.359,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=fkUnZB6NTT8","annotations":[{"segment":[24.845709828393137,59.885680187207484],"label":"Snow tubing"}]},"rdwOiqU_2JA":{"duration":114.57,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rdwOiqU_2JA","annotations":[]},"iuEuMQUXLVw":{"duration":221.844,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iuEuMQUXLVw","annotations":[{"segment":[10.035583463338535,12.457965678627145],"label":"Making a cake"},{"segment":[33.221241809672385,36.33573322932917],"label":"Making a cake"},{"segment":[55.71479095163806,67.48064742589703],"label":"Making a cake"},{"segment":[88.93603276131044,157.80089859594383],"label":"Making a cake"},{"segment":[184.44710296411856,192.40635881435256],"label":"Making a cake"}]},"yeWCfvmeUvM":{"duration":56.146,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=yeWCfvmeUvM","annotations":[{"segment":[0.01,53.080163017160686],"label":"Applying sunscreen"}]},"JiZCjH6ePq0":{"duration":230.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JiZCjH6ePq0","annotations":[{"segment":[4.306333853354134,16.148751950078],"label":"Dodgeball"},{"segment":[175.12424336973478,211.01035881435257],"label":"Dodgeball"}]},"AEH6JEd_03c":{"duration":106.023,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AEH6JEd_03c","annotations":[{"segment":[51.37128081123245,83.10172074882995],"label":"Baking cookies"}]},"UaZcqOngDzo":{"duration":142.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UaZcqOngDzo","annotations":[{"segment":[37.71321372854914,127.78124180967238],"label":"Doing motocross"}]},"mS5QXB8lv9A":{"duration":53.08,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=mS5QXB8lv9A","annotations":[]},"1SXVKikAHdM":{"duration":179.587,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1SXVKikAHdM","annotations":[{"segment":[64.1489375975039,156.32792199687987],"label":"Rollerblading"}]},"66-sCRx4QRI":{"duration":213.856,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=66-sCRx4QRI","annotations":[{"segment":[37.250102968621405,54.87738623922794],"label":"Surfing"}]},"bQ3yPbCICRg":{"duration":10.496,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bQ3yPbCICRg","annotations":[{"segment":[0.19999999999999998,10.295419501133788],"label":"Drum corps"}]},"gSwjTXkXK3Q":{"duration":208.771,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gSwjTXkXK3Q","annotations":[{"segment":[54.54659357940299,65.58578613978639],"label":"Beer pong"},{"segment":[73.70283949300949,78.24838937081442],"label":"Beer pong"},{"segment":[103.24891369874155,104.22296010112832],"label":"Beer pong"},{"segment":[142.2107697942124,146.10695540375949],"label":"Beer pong"},{"segment":[164.2891549149792,172.08152510795222],"label":"Beer pong"}]},"SAEf6byveZU":{"duration":119.978,"subset":"testing","resolution":"282x192","url":"https://www.youtube.com/watch?v=SAEf6byveZU","annotations":[]},"9xC9rVAJHm8":{"duration":115.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9xC9rVAJHm8","annotations":[{"segment":[3.4105444617784713,107.3610577223089],"label":"Smoking a cigarette"}]},"l2xRpn3K6Rs":{"duration":18.576,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=l2xRpn3K6Rs","annotations":[]},"_AKzq9X1Aik":{"duration":26.49,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=_AKzq9X1Aik","annotations":[{"segment":[0.01,24.530098803952157],"label":"Using parallel bars"}]},"Srpn1NaBueI":{"duration":36.131,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Srpn1NaBueI","annotations":[{"segment":[14.993056801872076,34.6643982449298],"label":"Calf roping"}]},"PTkNBVyhUuo":{"duration":140.5,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PTkNBVyhUuo","annotations":[{"segment":[35.48861466458658,133.41090327613105],"label":"Archery"}]},"zrnxRV3yLR8":{"duration":69.173,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=zrnxRV3yLR8","annotations":[{"segment":[30.87471117653963,40.771831661765134],"label":"Hanging wallpaper"}]},"TPu-vPWJtIA":{"duration":101.727,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=TPu-vPWJtIA","annotations":[{"segment":[31.8986753266049,38.88146992546368],"label":"Swimming"},{"segment":[46.49906403330963,49.990461332739024],"label":"Swimming"},{"segment":[59.35375332293292,74.74764121684868],"label":"Swimming"}]},"lyJpgvmTOpo":{"duration":8.22,"subset":"validation","resolution":"632x480","url":"https://www.youtube.com/watch?v=lyJpgvmTOpo","annotations":[{"segment":[1.2182322776911076,5.924455918876754],"label":"Drinking beer"}]},"4PgIW76SBI0":{"duration":40.19,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4PgIW76SBI0","annotations":[]},"on8ok9e-zyM":{"duration":199.53,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=on8ok9e-zyM","annotations":[]},"oq54_GlzK6A":{"duration":166.626,"subset":"training","resolution":"512x288","url":"https://www.youtube.com/watch?v=oq54_GlzK6A","annotations":[{"segment":[33.79309716606351,44.7108670197148],"label":"Playing ten pins"},{"segment":[57.70821208358539,68.62598193723667],"label":"Playing ten pins"},{"segment":[71.74534475256561,73.5649730615075],"label":"Playing ten pins"},{"segment":[89.16178713815219,97.48008797902936],"label":"Playing ten pins"},{"segment":[121.13525599527381,134.91244037129485],"label":"Playing ten pins"},{"segment":[142.71084732917316,149.9893604898596],"label":"Playing ten pins"}]},"Hj3kEemIPic":{"duration":196.256,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Hj3kEemIPic","annotations":[{"segment":[0.01,196.2550566893424],"label":"Removing ice from car"}]},"NNKEE-015ZY":{"duration":91.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NNKEE-015ZY","annotations":[{"segment":[48.76807176287052,51.611691107644305],"label":"Walking the dog"},{"segment":[57.58329173166926,73.36537909516382],"label":"Walking the dog"}]},"MKLoBzvca30":{"duration":169.599,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=MKLoBzvca30","annotations":[{"segment":[35.18959668174727,133.6146340171607],"label":"Futsal"}]},"wj0D-wiqEb0":{"duration":86.63,"subset":"training","resolution":"658x480","url":"https://www.youtube.com/watch?v=wj0D-wiqEb0","annotations":[{"segment":[1.8245776688210384,24.12497139885595],"label":"Playing flauta"}]},"EsGpVb1-W0E":{"duration":86.63,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=EsGpVb1-W0E","annotations":[{"segment":[27.52590309,33.77015054602184],"label":"Playing racquetball"},{"segment":[68.21570410296412,72.42172268],"label":"Playing racquetball"}]},"LZ4dINjqX5U":{"duration":49.13,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LZ4dINjqX5U","annotations":[{"segment":[1.00083921,46.5757225124805],"label":"Using the pommel horse"}]},"mOTaXxGKUsw":{"duration":192.7,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=mOTaXxGKUsw","annotations":[]},"a_c-FIC_W4A":{"duration":59.98,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=a_c-FIC_W4A","annotations":[{"segment":[0.01,58.3980569422777],"label":"Polishing shoes"}]},"CBDFNn15G90":{"duration":217.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CBDFNn15G90","annotations":[{"segment":[30.69248907956318,71.72885569422776],"label":"Kayaking"},{"segment":[78.51172620904835,125.65267628705149],"label":"Kayaking"},{"segment":[134.47040795631824,209.08198361934475],"label":"Kayaking"}]},"aM9WZ1_8yC0":{"duration":172.084,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=aM9WZ1_8yC0","annotations":[{"segment":[0.01,168.14808670178232],"label":"Throwing darts"}]},"ZVjLscGMOs8":{"duration":105.19,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZVjLscGMOs8","annotations":[]},"HfowLX1u38Y":{"duration":62.25,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=HfowLX1u38Y","annotations":[{"segment":[7.866565455538221,45.15991280031201],"label":"Starting a campfire"}]},"00SfeRtiM2o":{"duration":175.868,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=00SfeRtiM2o","annotations":[{"segment":[101.2406685981725,122.9154990327613],"label":"Snowboarding"},{"segment":[145.96215510140405,150.07762939937598],"label":"Snowboarding"}]},"ff4S4Am-Zoo":{"duration":234.383,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ff4S4Am-Zoo","annotations":[]},"c8ntlBKuVmc":{"duration":142.85,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c8ntlBKuVmc","annotations":[{"segment":[20.272840873634944,142.601],"label":"Trimming branches or hedges"}]},"B3YfGd4Wt24":{"duration":101.24000000000001,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=B3YfGd4Wt24","annotations":[{"segment":[6.001688121946647,10.423984632854703],"label":"Cheerleading"},{"segment":[48.32938329778089,99.65961065653511],"label":"Cheerleading"}]},"0drl-yrfBAA":{"duration":235.683,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0drl-yrfBAA","annotations":[{"segment":[0.2,235.441],"label":"Drum corps"}]},"G2uCtAeVgLM":{"duration":146.333,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=G2uCtAeVgLM","annotations":[]},"f2fyhNjY3EA":{"duration":88.52,"subset":"training","resolution":"320x180","url":"https://www.youtube.com/watch?v=f2fyhNjY3EA","annotations":[{"segment":[14.284453198127924,82.87743135725428],"label":"Sailing"}]},"Evygqtr18Us":{"duration":232.5,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Evygqtr18Us","annotations":[]},"DUi-hLMpQQo":{"duration":96.15,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DUi-hLMpQQo","annotations":[]},"n8gFxENOyR0":{"duration":195.86,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n8gFxENOyR0","annotations":[{"segment":[97.16727301092044,103.88953718148726],"label":"Skateboarding"},{"segment":[141.7786622652106,147.2786965772231],"label":"Skateboarding"}]},"IXnGjiNnJ3A":{"duration":53.894,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IXnGjiNnJ3A","annotations":[]},"Qn61vl2khuo":{"duration":155.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Qn61vl2khuo","annotations":[{"segment":[35.30795122620905,44.01402139157567],"label":"Elliptical trainer"},{"segment":[45.706868368174725,58.28230305148205],"label":"Elliptical trainer"},{"segment":[74.9689375351014,81.74032544149765],"label":"Elliptical trainer"},{"segment":[85.12601939469579,86.81886637129485],"label":"Elliptical trainer"},{"segment":[103.5055008549142,110.76055932605304],"label":"Elliptical trainer"},{"segment":[113.90441799687987,131.5583936099844],"label":"Elliptical trainer"}]},"jpGyplACWUg":{"duration":45.21,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=jpGyplACWUg","annotations":[{"segment":[4.714145085803432,16.042165366614665],"label":"Shot put"},{"segment":[18.926940717628707,21.037751950078004],"label":"Shot put"},{"segment":[25.89261778471139,29.34027613104524],"label":"Shot put"}]},"1Vu0bzAKL8Q":{"duration":56.007,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1Vu0bzAKL8Q","annotations":[{"segment":[0.01,18.52913562470186],"label":"Painting fence"}]},"CvbBSwz-9XI":{"duration":32.76,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CvbBSwz-9XI","annotations":[]},"L7Gt1NO1nx8":{"duration":73.027,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=L7Gt1NO1nx8","annotations":[{"segment":[0.2,63.68480056630265],"label":"Cutting the grass"}]},"8kOMPCgHQ68":{"duration":226.488,"subset":"testing","resolution":"540x360","url":"https://www.youtube.com/watch?v=8kOMPCgHQ68","annotations":[]},"J1LIG037WzE":{"duration":167.44,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=J1LIG037WzE","annotations":[{"segment":[7.052797605781783,160.90827130228067],"label":"Doing kickboxing"}]},"xIB7VSLKvaM":{"duration":198.37,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xIB7VSLKvaM","annotations":[{"segment":[1.85679943116092,41.15905405740039],"label":"Mowing the lawn"},{"segment":[51.37145092878545,65.60691323435249],"label":"Mowing the lawn"},{"segment":[147.30608820543299,183.82314223712947],"label":"Mowing the lawn"}]},"12sz7NMvyfI":{"duration":68.615,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=12sz7NMvyfI","annotations":[{"segment":[0,68.614443],"label":"Blowing leaves"}]},"BG4QxmqBaS4":{"duration":182.18,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BG4QxmqBaS4","annotations":[{"segment":[13.213036661466457,28.55720826833073],"label":"Pole vault"},{"segment":[38.218353354134166,50.15270904836193],"label":"Pole vault"},{"segment":[53.278373634945396,62.087064742589696],"label":"Pole vault"},{"segment":[124.61403822152887,133.7078790951638],"label":"Pole vault"},{"segment":[146.78027535101404,155.30575117004682],"label":"Pole vault"},{"segment":[172.64088533541343,181.961],"label":"Pole vault"}]},"R586XlFT7Go":{"duration":69.103,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=R586XlFT7Go","annotations":[{"segment":[5.28241289651586,65.11382330109204],"label":"Running a marathon"}]},"Qyruw9480BU":{"duration":118.21000000000001,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Qyruw9480BU","annotations":[{"segment":[0.2,118.012788],"label":"Ironing clothes"}]},"kI6maggAugg":{"duration":119.95,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=kI6maggAugg","annotations":[{"segment":[2.244742577223089,118.41017094851794],"label":"Clean and jerk"}]},"cTY0q4wS3eI":{"duration":202.07999999999998,"subset":"testing","resolution":"640x354","url":"https://www.youtube.com/watch?v=cTY0q4wS3eI","annotations":[]},"SMpmoqIk3Ts":{"duration":25.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SMpmoqIk3Ts","annotations":[{"segment":[0,25.10077097505669],"label":"Playing water polo"}]},"NMBu3DIn1eg":{"duration":202.29,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=NMBu3DIn1eg","annotations":[{"segment":[35.34591484804992,180.2010480199688],"label":"Polishing forniture"}]},"hGPCJb2g1tQ":{"duration":110.504,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=hGPCJb2g1tQ","annotations":[]},"mwGs9TeYpM0":{"duration":52.66,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=mwGs9TeYpM0","annotations":[{"segment":[14.874528519104574,41.7068649677027],"label":"Playing lacrosse"}]},"43OU5XCzLzo":{"duration":36.084,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=43OU5XCzLzo","annotations":[{"segment":[0.2,32.89233229329173],"label":"River tubing"}]},"28rFeoIxM4M":{"duration":113.546,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=28rFeoIxM4M","annotations":[]},"kElViDpjunQ":{"duration":120.77,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kElViDpjunQ","annotations":[{"segment":[34.666341777480625,84.59340930889235],"label":"Cleaning shoes"}]},"ubVPP8BVcfs":{"duration":67.385,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ubVPP8BVcfs","annotations":[{"segment":[0.01,7.564043681747269],"label":"Canoeing"},{"segment":[11.346065522620904,46.22471138845553],"label":"Canoeing"}]},"bbDWLOPnWOY":{"duration":50.945,"subset":"training","resolution":"286x240","url":"https://www.youtube.com/watch?v=bbDWLOPnWOY","annotations":[{"segment":[0.20000000000000004,39.06909828393136],"label":"Playing pool"}]},"OYEKKzQjaJI":{"duration":181.697,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OYEKKzQjaJI","annotations":[]},"WhYyvKiwLc8":{"duration":64.343,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=WhYyvKiwLc8","annotations":[{"segment":[29.912619312935785,32.221310457098284],"label":"Arm wrestling"},{"segment":[35.43340371138845,36.5375607675507],"label":"Arm wrestling"},{"segment":[43.26288101872074,48.884044213728544],"label":"Arm wrestling"}]},"jd609r5yKkI":{"duration":143.709,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jd609r5yKkI","annotations":[{"segment":[0.01,128.91131677067082],"label":"Playing drums"}]},"ynda_PQe_CM":{"duration":211.232,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ynda_PQe_CM","annotations":[{"segment":[15.134997916092708,145.75661036584935],"label":"Futsal"}]},"8UvKQnfXnCk":{"duration":78.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8UvKQnfXnCk","annotations":[{"segment":[0.01,76.71465783221541],"label":"Waterskiing"}]},"VIQG4W0vYxw":{"duration":148.121,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VIQG4W0vYxw","annotations":[{"segment":[12.474842433697347,148.081],"label":"Running a marathon"}]},"LXH96dFBVYk":{"duration":222.819,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LXH96dFBVYk","annotations":[{"segment":[5.544472094825489,214.50196170615354],"label":"Bullfighting"}]},"ctMcsGnutho":{"duration":216.736,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ctMcsGnutho","annotations":[{"segment":[78.05562640231123,185.71961705651682],"label":"Changing car wheel"}]},"Me3ykrZobJE":{"duration":134.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Me3ykrZobJE","annotations":[{"segment":[9.866307163806553,124.27348597815914],"label":"Wrapping presents"}]},"u_G3ZPXU35A":{"duration":69.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u_G3ZPXU35A","annotations":[{"segment":[5.887095163806553,68.57375663026521],"label":"Cleaning windows"}]},"wDFpFJ1CP9g":{"duration":229.459,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wDFpFJ1CP9g","annotations":[{"segment":[0.01,129.92172074882996],"label":"Wakeboarding"},{"segment":[130.63754290171607,219.39948985959438],"label":"Wakeboarding"}]},"mpC5EaOOe1w":{"duration":48.832,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mpC5EaOOe1w","annotations":[]},"rDlPcW_2nG0":{"duration":143.34,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rDlPcW_2nG0","annotations":[{"segment":[20.79613189425536,116.27944715067514],"label":"Making a sandwich"}]},"nLjD4aESipo":{"duration":72.238,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=nLjD4aESipo","annotations":[]},"Lh3dhueuKmQ":{"duration":145.404,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=Lh3dhueuKmQ","annotations":[{"segment":[8.846673238689549,61.24619934477379],"label":"Hanging wallpaper"},{"segment":[78.48587027145086,135.64898965990642],"label":"Hanging wallpaper"}]},"fJNauQt9Di0":{"duration":182.46,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fJNauQt9Di0","annotations":[{"segment":[8.539582073078842,75.4329749788631],"label":"Playing violin"},{"segment":[92.2274863892515,172.2149045475819],"label":"Playing violin"}]},"W4mJsJGa0CY":{"duration":72.17,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W4mJsJGa0CY","annotations":[]},"h15m87WsCHQ":{"duration":137.509,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h15m87WsCHQ","annotations":[{"segment":[7.722790283039894,117.34350791174505],"label":"Playing ice hockey"}]},"swbCsf51XVg":{"duration":98.476,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=swbCsf51XVg","annotations":[{"segment":[6.757260530421218,81.24070046801872],"label":"Elliptical trainer"}]},"RZurdKhOxk0":{"duration":152.21,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RZurdKhOxk0","annotations":[{"segment":[5.936302758232778,127.51178180499222],"label":"Tango"}]},"9oCLvMVaZmw":{"duration":143.848,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9oCLvMVaZmw","annotations":[]},"PUqHiigzb7A":{"duration":25.22,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PUqHiigzb7A","annotations":[{"segment":[3.815969407176287,25.21687],"label":"Clean and jerk"}]},"bWYIL4KG8kM":{"duration":182.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bWYIL4KG8kM","annotations":[{"segment":[40.06452814820593,153.72276402964118],"label":"Dodgeball"}]},"f-Cf16fQTB4":{"duration":84.66,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=f-Cf16fQTB4","annotations":[{"segment":[10.970862159999998,74.29065883385336],"label":"Getting a haircut"}]},"W2eumNilwuI":{"duration":33.484,"subset":"testing","resolution":"640x356","url":"https://www.youtube.com/watch?v=W2eumNilwuI","annotations":[]},"a2jpe1QfZdM":{"duration":40.38,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=a2jpe1QfZdM","annotations":[{"segment":[12.47291177223089,33.57607057878315],"label":"Hand washing clothes"}]},"Q9n6B1AVO4E":{"duration":83.732,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Q9n6B1AVO4E","annotations":[{"segment":[9.006168486739469,59.12886739469579],"label":"Putting in contact lenses"}]},"U697zTxxDHM":{"duration":95.76,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U697zTxxDHM","annotations":[{"segment":[13.43695787831513,86.16383307332293],"label":"Rollerblading"}]},"Y1Vop80R9b8":{"duration":56.286,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y1Vop80R9b8","annotations":[{"segment":[14.939241251533259,19.936522611346344],"label":"Painting fence"},{"segment":[25.196818779570645,28.352996480505222],"label":"Painting fence"},{"segment":[36.769470349664104,40.01331965340242],"label":"Painting fence"},{"segment":[46.85170467209401,50.09555397583233],"label":"Painting fence"}]},"vbfPAKV3i3w":{"duration":58.38,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vbfPAKV3i3w","annotations":[]},"bnQVFmXUx_U":{"duration":131.01,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bnQVFmXUx_U","annotations":[{"segment":[30.55840639625585,39.75658892355694],"label":"Cleaning shoes"},{"segment":[48.137155226209046,66.32911622464898],"label":"Cleaning shoes"},{"segment":[73.4832581903276,82.88584477379095],"label":"Cleaning shoes"},{"segment":[88.60915834633384,96.78532059282371],"label":"Cleaning shoes"},{"segment":[103.73505850234008,109.8671801872075],"label":"Cleaning shoes"},{"segment":[116.40810998439937,125.60629251170047],"label":"Cleaning shoes"}]},"inFPa4wxOwQ":{"duration":29.07,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=inFPa4wxOwQ","annotations":[{"segment":[13.818260530421217,17.35211076443058],"label":"Springboard diving"}]},"ROMy00dG8Ds":{"duration":222.07999999999998,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ROMy00dG8Ds","annotations":[{"segment":[6.581558502340093,196.75395943837754],"label":"Walking the dog"}]},"JMIvoKPtxRA":{"duration":107.788,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JMIvoKPtxRA","annotations":[{"segment":[2.921316773939495,95.42967316957812],"label":"Doing a powerbomb"}]},"xeOwFqcsITY":{"duration":83.73,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xeOwFqcsITY","annotations":[]},"2_hcULoN4Ls":{"duration":220.89,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2_hcULoN4Ls","annotations":[{"segment":[31.524105304212167,212.03638923556943],"label":"Playing polo"}]},"a7YSE6dZ1yk":{"duration":125.946,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=a7YSE6dZ1yk","annotations":[{"segment":[47.70760081571062,86.14466623627136],"label":"Baking cookies"}]},"pc_QYeZG9GA":{"duration":21.78,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=pc_QYeZG9GA","annotations":[{"segment":[0.01,0.02],"label":"Brushing teeth"}]},"z7uCdnvk55Y":{"duration":52.11,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z7uCdnvk55Y","annotations":[]},"yttmx8mcjGw":{"duration":188.38,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=yttmx8mcjGw","annotations":[]},"4H9MMsj4mcM":{"duration":31.696,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=4H9MMsj4mcM","annotations":[]},"54x_-2w7-7E":{"duration":109.738,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=54x_-2w7-7E","annotations":[]},"gl_0jjJBUkc":{"duration":17.07,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gl_0jjJBUkc","annotations":[{"segment":[4.762291920000001,9.949270199999999],"label":"Hand washing clothes"},{"segment":[4.762291920000001,9.9492702],"label":"Hand washing clothes"}]},"1rdecGieY-M":{"duration":134.537,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1rdecGieY-M","annotations":[{"segment":[0.01,133.54577822623727],"label":"Baking cookies"}]},"lfllVwgOWBk":{"duration":68.55,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=lfllVwgOWBk","annotations":[{"segment":[35.18160017829285,60.418248556942274],"label":"Brushing hair"}]},"CfWROyOu8Xs":{"duration":151.186,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=CfWROyOu8Xs","annotations":[]},"QCBaT1NJ32E":{"duration":171.29,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QCBaT1NJ32E","annotations":[{"segment":[2.404319812792512,171.241],"label":"Smoking a cigarette"}]},"uBT9n8wOKP4":{"duration":43.213,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uBT9n8wOKP4","annotations":[{"segment":[0.2,41.79867251573527],"label":"Blowing leaves"}]},"F4FpAZJSV1g":{"duration":94.16,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=F4FpAZJSV1g","annotations":[{"segment":[4.625323680187208,83.62291542433698],"label":"Triple jump"}]},"0RYLNnfWeBg":{"duration":168.531,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0RYLNnfWeBg","annotations":[]},"MJvwDHbFMU0":{"duration":38.94,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MJvwDHbFMU0","annotations":[]},"oWaqaK58Tds":{"duration":199.297,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=oWaqaK58Tds","annotations":[{"segment":[79.63442768273717,190.5878304821151],"label":"Curling"}]},"nDGhaYCgRiI":{"duration":211.743,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nDGhaYCgRiI","annotations":[{"segment":[42.970676740632555,114.65270531701371],"label":"Slacklining"},{"segment":[117.62569267732444,190.62904896939924],"label":"Slacklining"}]},"2k-vgnzg7fk":{"duration":182.15,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2k-vgnzg7fk","annotations":[]},"IGdB41eBvHg":{"duration":188.036,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IGdB41eBvHg","annotations":[]},"gY-ijXvMjuA":{"duration":133.61,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=gY-ijXvMjuA","annotations":[]},"9tks__Y0mqI":{"duration":204.754,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=9tks__Y0mqI","annotations":[{"segment":[0.19999999999999998,4.1396472113339895],"label":"Having an ice cream"},{"segment":[21.335125041657413,25.474777111920456],"label":"Having an ice cream"},{"segment":[47.446776561778165,49.357385209591875],"label":"Having an ice cream"},{"segment":[53.49703727985492,58.27355889938921],"label":"Having an ice cream"}]},"UVxXr_mh5BM":{"duration":196.511,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UVxXr_mh5BM","annotations":[{"segment":[9.807725429017161,176.55702964118564],"label":"Braiding hair"}]},"-r_bvqjYjYg":{"duration":66.06,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=-r_bvqjYjYg","annotations":[{"segment":[5.5781064,38.9549214],"label":"Shot put"}]},"1LnkDmSaKg4":{"duration":70.055,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1LnkDmSaKg4","annotations":[]},"FAZvfHh5B6E":{"duration":21.92,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FAZvfHh5B6E","annotations":[]},"CXSoih6nFME":{"duration":154.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CXSoih6nFME","annotations":[{"segment":[78.33698620999999,123.73259089],"label":"Putting on makeup"}]},"lly76lvHbB8":{"duration":17.95,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=lly76lvHbB8","annotations":[{"segment":[0.2,16.441540498442368],"label":"Curling"}]},"UdkzkCJMBaQ":{"duration":37.988,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UdkzkCJMBaQ","annotations":[]},"bvBNoeCBDdg":{"duration":151.232,"subset":"training","resolution":"712x480","url":"https://www.youtube.com/watch?v=bvBNoeCBDdg","annotations":[{"segment":[11.77815923877339,139.2178422023015],"label":"Playing ice hockey"}]},"7fV1wG7yZPI":{"duration":13.468,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=7fV1wG7yZPI","annotations":[]},"MKLwNTbEK4E":{"duration":134.537,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MKLwNTbEK4E","annotations":[{"segment":[12.799627145085804,59.801536661466464],"label":"Playing blackjack"}]},"12v5k4Z8lAE":{"duration":115.8,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=12v5k4Z8lAE","annotations":[{"segment":[41.53442537234041,73.27964735364883],"label":"Wrapping presents"}]},"NctohVMw1Kc":{"duration":84.126,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=NctohVMw1Kc","annotations":[]},"wq4nRbBHHwo":{"duration":30.56,"subset":"testing","resolution":"202x360","url":"https://www.youtube.com/watch?v=wq4nRbBHHwo","annotations":[]},"HqGWA16ECWE":{"duration":10.426,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=HqGWA16ECWE","annotations":[{"segment":[0.2,10.201],"label":"Using the monkey bar"}]},"uThAYmmxBFk":{"duration":91.63,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uThAYmmxBFk","annotations":[{"segment":[5.6679224999999995,19.215734459438377],"label":"Doing step aerobics"},{"segment":[20.49009875,79.36312633619345],"label":"Doing step aerobics"}]},"gi0wjdtxaC4":{"duration":64.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gi0wjdtxaC4","annotations":[{"segment":[0,63.9792464898596],"label":"Tumbling"}]},"8jvjOg-6zhA":{"duration":85.496,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8jvjOg-6zhA","annotations":[]},"4CTczKzaPa4":{"duration":52.617,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4CTczKzaPa4","annotations":[{"segment":[0.2,52.361],"label":"Hitting a pinata"}]},"VSdjLyGA8Oc":{"duration":238.72,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=VSdjLyGA8Oc","annotations":[{"segment":[30.16935018625235,228.69112363406103],"label":"Hurling"}]},"9jFyJhik9VM":{"duration":15.51,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9jFyJhik9VM","annotations":[{"segment":[0,15.43834566926677],"label":"Windsurfing"}]},"hsUfo_jHQ60":{"duration":83.777,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hsUfo_jHQ60","annotations":[{"segment":[1.6990729859594385,80.7713173506532],"label":"Running a marathon"}]},"SP84bAfiRAo":{"duration":10.728,"subset":"testing","resolution":"432x720","url":"https://www.youtube.com/watch?v=SP84bAfiRAo","annotations":[]},"SGdhHAZHwmI":{"duration":137.973,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=SGdhHAZHwmI","annotations":[{"segment":[40.03583811856475,42.403548975039],"label":"Putting in contact lenses"}]},"re4vD9S8ThA":{"duration":2.415,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=re4vD9S8ThA","annotations":[{"segment":[0.01,1.8860090767676096],"label":"Kneeling"}]},"vctal2m9Zvg":{"duration":20.434,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vctal2m9Zvg","annotations":[{"segment":[3.634049494539781,16.895142386895476],"label":"Calf roping"}]},"WOonpO98TdQ":{"duration":231.67000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WOonpO98TdQ","annotations":[{"segment":[27.467347925117004,228.77409521840875],"label":"Snatch"}]},"ZRnCnUeqKv4":{"duration":164.31,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=ZRnCnUeqKv4","annotations":[{"segment":[8.24313168,112.09311792],"label":"Playing field hockey"},{"segment":[120.33624960000002,156.41879843447737],"label":"Playing field hockey"}]},"E5bRptEXtq8":{"duration":156.758,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E5bRptEXtq8","annotations":[{"segment":[0.2,145.54350088967976],"label":"Riding bumper cars"}]},"u9aFICSj7zw":{"duration":58.306,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u9aFICSj7zw","annotations":[{"segment":[9.001277691107644,58.281],"label":"Ice fishing"}]},"CBckvP5FR4A":{"duration":132.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CBckvP5FR4A","annotations":[{"segment":[7.310729329173166,131.49015990639626],"label":"Playing guitarra"}]},"c3a9H6a60k0":{"duration":14.58,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=c3a9H6a60k0","annotations":[]},"vBOFzuS-Djo":{"duration":185.783,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vBOFzuS-Djo","annotations":[]},"Ar3eaYtLlKI":{"duration":56.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ar3eaYtLlKI","annotations":[{"segment":[0.01,56.447709750566894],"label":"Walking the dog"}]},"Jsx38_s3Mnc":{"duration":194.26,"subset":"validation","resolution":"480x272","url":"https://www.youtube.com/watch?v=Jsx38_s3Mnc","annotations":[{"segment":[15.758830483831598,173.65018961466458],"label":"Polishing shoes"}]},"Vyj5eIh3jh0":{"duration":10.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vyj5eIh3jh0","annotations":[{"segment":[0.2,10.666938775510205],"label":"Starting a campfire"}]},"cTxVTEzQDr8":{"duration":185.71,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cTxVTEzQDr8","annotations":[]},"Lk_Xe9yQ8Zc":{"duration":157.455,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lk_Xe9yQ8Zc","annotations":[{"segment":[13.259990639625586,16.20665522620905],"label":"Rope skipping"},{"segment":[18.416653666146647,27.01109204368175],"label":"Rope skipping"},{"segment":[37.324418096723875,42.481081123244934],"label":"Rope skipping"},{"segment":[48.37441029641186,51.32107488299532],"label":"Rope skipping"},{"segment":[56.968848673946965,103.63753822152886],"label":"Rope skipping"},{"segment":[106.09340405616224,116.89921372854914],"label":"Rope skipping"},{"segment":[126.72267706708267,154.71954758190327],"label":"Rope skipping"}]},"_-zOtZZ_fwI":{"duration":182.927,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_-zOtZZ_fwI","annotations":[{"segment":[60.4999792324493,182.926824],"label":"Removing ice from car"}]},"8eqO6e2wWmY":{"duration":53.66,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=8eqO6e2wWmY","annotations":[{"segment":[6.643810411076443,34.97402084321373],"label":"Playing badminton"},{"segment":[40.32249714898597,48.679491376755074],"label":"Playing badminton"}]},"eU27exUJZSM":{"duration":151.28,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=eU27exUJZSM","annotations":[{"segment":[8.966102964118566,137.79484555382214],"label":"Getting a tattoo"}]},"F3tKnLz9YyE":{"duration":22.964,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=F3tKnLz9YyE","annotations":[{"segment":[1.5166429031786341,22.620871220935644],"label":"Swimming"}]},"VxoBV76IkLM":{"duration":112.47999999999999,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=VxoBV76IkLM","annotations":[{"segment":[9.115007800312013,69.45421040000001],"label":"Discus throw"},{"segment":[74.57557919999999,108.67893915756629],"label":"Discus throw"}]},"vdTisVMhW7I":{"duration":187.873,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vdTisVMhW7I","annotations":[{"segment":[68.07808563100478,170.04912936127286],"label":"Assembling bicycle"}]},"lhdro5SnxU0":{"duration":227.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lhdro5SnxU0","annotations":[{"segment":[48.59541660033748,207.50597424336976],"label":"Cleaning sink"}]},"8TDYCXqSHCw":{"duration":22.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8TDYCXqSHCw","annotations":[{"segment":[0.46055925429017164,18.13894909204368],"label":"Clean and jerk"}]},"0gLAhptj34w":{"duration":91.65,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0gLAhptj34w","annotations":[{"segment":[0.2,88.22810869603207],"label":"Table soccer"}]},"jBKlpn2mE8I":{"duration":209.47,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=jBKlpn2mE8I","annotations":[{"segment":[7.514326053042121,81.02403744149765],"label":"Brushing hair"},{"segment":[120.89418096723868,183.30171762870515],"label":"Brushing hair"},{"segment":[200.618992199688,209.241],"label":"Brushing hair"}]},"1PNjVTM0Zto":{"duration":232.13,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=1PNjVTM0Zto","annotations":[]},"ZazjI26IQUo":{"duration":67.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZazjI26IQUo","annotations":[{"segment":[3.5811503507653057,66.79747688137755],"label":"Getting a tattoo"}]},"Db9VWkgMpbE":{"duration":115.43,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Db9VWkgMpbE","annotations":[]},"NogC2IkaaAI":{"duration":14.536,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NogC2IkaaAI","annotations":[{"segment":[0.01,8.798335819032761],"label":"Sumo"}]},"2cc7BoZHEJI":{"duration":88.38,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=2cc7BoZHEJI","annotations":[{"segment":[48.761790000000005,84.67669364820593],"label":"Playing water polo"},{"segment":[0.4822919898595944,15.915635665366615],"label":"Playing water polo"}]},"tx5o8t-JJ_M":{"duration":29.77,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tx5o8t-JJ_M","annotations":[]},"UB2GzjNzo3M":{"duration":237.332,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UB2GzjNzo3M","annotations":[{"segment":[0.01,196.60310643446147],"label":"BMX"},{"segment":[220.66940006579856,237.33068],"label":"BMX"}]},"xbcP38aF5Ok":{"duration":176.89,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=xbcP38aF5Ok","annotations":[{"segment":[0,42.80454880109204],"label":"Tumbling"}]},"MDucYea4ie8":{"duration":80.016,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MDucYea4ie8","annotations":[{"segment":[2.6838222748663685,80.0153524739229],"label":"Riding bumper cars"}]},"nXpIrzQnprs":{"duration":31.12,"subset":"testing","resolution":"160x120","url":"https://www.youtube.com/watch?v=nXpIrzQnprs","annotations":[]},"V8AkmiL0o70":{"duration":168.554,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V8AkmiL0o70","annotations":[]},"IlzwpnsIRZM":{"duration":33.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IlzwpnsIRZM","annotations":[{"segment":[0.01,22.81713643525741],"label":"Ping-pong"}]},"6XINT0LDdz8":{"duration":39.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6XINT0LDdz8","annotations":[{"segment":[1.2954914087363494,37.75432105460218],"label":"Using parallel bars"}]},"DepG0r3JiV4":{"duration":76.07,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=DepG0r3JiV4","annotations":[{"segment":[11.088963761310453,28.04855539625585],"label":"Playing squash"}]},"4miOTutCPkE":{"duration":61.742,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=4miOTutCPkE","annotations":[]},"3vtYXzE4i1M":{"duration":41.169,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3vtYXzE4i1M","annotations":[]},"k85EQoiLckw":{"duration":80.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k85EQoiLckw","annotations":[{"segment":[47.03138338845553,76.5510814726989],"label":"Polishing shoes"}]},"j8lH0saRXl4":{"duration":91.301,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j8lH0saRXl4","annotations":[{"segment":[0.01,81.54406132707894],"label":"Playing rubik cube"}]},"XHQqBdlu4wE":{"duration":159.986,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XHQqBdlu4wE","annotations":[]},"2PCZkpF1_wU":{"duration":169.553,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2PCZkpF1_wU","annotations":[{"segment":[57.66358771901896,152.35883561934477],"label":"Making a cake"}]},"F9Wv_Lxe_QM":{"duration":43.05,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=F9Wv_Lxe_QM","annotations":[{"segment":[14.036516296411856,43.049795918367344],"label":"Bungee jumping"}]},"Nk4p-ah_PkA":{"duration":63.18,"subset":"training","resolution":"240x176","url":"https://www.youtube.com/watch?v=Nk4p-ah_PkA","annotations":[{"segment":[0.2,62.98151927437642],"label":"Playing kickball"}]},"KDzAXuav8Ag":{"duration":206.449,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KDzAXuav8Ag","annotations":[]},"n3wYcW4mnrs":{"duration":60.442,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n3wYcW4mnrs","annotations":[{"segment":[5.2803843201347105,37.90561601239559],"label":"River tubing"},{"segment":[55.34974191419657,60.241541],"label":"River tubing"}]},"A0R3KFXD_BU":{"duration":46.673,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A0R3KFXD_BU","annotations":[{"segment":[1.8918346333853353,46.05889703588144],"label":"Playing blackjack"}]},"AU3_oU1_rfA":{"duration":103.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AU3_oU1_rfA","annotations":[{"segment":[47.245670522620905,66.79003337129485],"label":"Shaving"},{"segment":[15.587033180967238,24.955405455538223],"label":"Shaving"},{"segment":[74.54316904680186,81.00411544305771],"label":"Shaving"}]},"v4KygkYPdp0":{"duration":193.237,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=v4KygkYPdp0","annotations":[{"segment":[11.154054691107644,163.0903672402496],"label":"Playing congas"}]},"9A9_sNvJ8zQ":{"duration":230.644,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9A9_sNvJ8zQ","annotations":[{"segment":[44.97733287831513,131.3338120046802],"label":"Using the rowing machine"},{"segment":[205.81627525117005,212.65282984867395],"label":"Using the rowing machine"}]},"xFIfGrhYpAg":{"duration":33.693,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xFIfGrhYpAg","annotations":[{"segment":[0.2,33.361],"label":"Playing ice hockey"}]},"fs2per3zoZQ":{"duration":105.907,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=fs2per3zoZQ","annotations":[{"segment":[13.036971918876755,102.3348205928237],"label":"Carving jack-o-lanterns"}]},"Yfg0bZbxjVg":{"duration":229.25,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Yfg0bZbxjVg","annotations":[{"segment":[109.08180457830558,185.26024414664587],"label":"Bathing dog"}]},"zXHyFBrj5Ag":{"duration":27.15,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zXHyFBrj5Ag","annotations":[]},"M7MDkkGdkas":{"duration":51.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M7MDkkGdkas","annotations":[{"segment":[16.22558892355694,37.93983385335413],"label":"Washing hands"}]},"ZkkTMR21XLc":{"duration":116.077,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZkkTMR21XLc","annotations":[{"segment":[24.801274570982837,106.08428393135725],"label":"Rafting"}]},"yCpdDJAW-jU":{"duration":245.807,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=yCpdDJAW-jU","annotations":[]},"fsyMiHRW3z4":{"duration":54.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fsyMiHRW3z4","annotations":[{"segment":[25.858497932137286,42.64753900390016],"label":"Hopscotch"}]},"8DZj0ggPr74":{"duration":36.108,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=8DZj0ggPr74","annotations":[{"segment":[7.322798190327613,34.98044366302652],"label":"Playing ice hockey"}]},"AQnp2QAnwI0":{"duration":198.02,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AQnp2QAnwI0","annotations":[]},"TbxVdELEiO8":{"duration":137.532,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TbxVdELEiO8","annotations":[{"segment":[19.464060524579196,131.9706257098467],"label":"Mooping floor"}]},"-wOaPhSf6OE":{"duration":203.31,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-wOaPhSf6OE","annotations":[]},"qKp2QCheLwU":{"duration":131.867,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qKp2QCheLwU","annotations":[]},"azfkn6EsuJA":{"duration":16.533,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=azfkn6EsuJA","annotations":[{"segment":[0.3610776474258971,15.681086402496101],"label":"Waxing skis"}]},"OfvVyonfqvg":{"duration":34.62,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=OfvVyonfqvg","annotations":[]},"UNJTM62HyqQ":{"duration":139.07,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UNJTM62HyqQ","annotations":[]},"P6Z-7k-erfc":{"duration":67.106,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=P6Z-7k-erfc","annotations":[{"segment":[12.143927607444432,22.298763624014345],"label":"Carving jack-o-lanterns"},{"segment":[25.753501650270092,31.197331267400354],"label":"Carving jack-o-lanterns"},{"segment":[32.55828867168292,41.24747825287161],"label":"Carving jack-o-lanterns"},{"segment":[47.00537496329785,54.64767265834633],"label":"Carving jack-o-lanterns"}]},"F9mk2Dn2tFw":{"duration":40.52,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=F9mk2Dn2tFw","annotations":[]},"QOsG3BeQeX0":{"duration":12.33,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=QOsG3BeQeX0","annotations":[]},"l5ZUUChppHU":{"duration":24.36,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l5ZUUChppHU","annotations":[]},"PwMDJapSo70":{"duration":85.496,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=PwMDJapSo70","annotations":[{"segment":[0.01,85.441],"label":"Clipping cat claws"}]},"XXfoSppjX1o":{"duration":223.539,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XXfoSppjX1o","annotations":[]},"Z_F01eV0Iqw":{"duration":228.391,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z_F01eV0Iqw","annotations":[{"segment":[57.25770567745651,187.97008835863846],"label":"Skiing"}]},"RgzbNJPchqc":{"duration":69.24,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RgzbNJPchqc","annotations":[{"segment":[6.45741566,37.06781165522621],"label":"Playing bagpipes"}]},"HrWr7FmBYn4":{"duration":182.277,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HrWr7FmBYn4","annotations":[{"segment":[79.89036037441498,164.04533073322935],"label":"Blow-drying hair"}]},"LACH47i14lY":{"duration":101.82,"subset":"validation","resolution":"854x478","url":"https://www.youtube.com/watch?v=LACH47i14lY","annotations":[{"segment":[12.434032779210463,23.377250190785684],"label":"Futsal"},{"segment":[36.065038494061305,56.68269448688418],"label":"Futsal"},{"segment":[65.56414629917711,96.01483715788821],"label":"Futsal"}]},"f-aGlEjgZoE":{"duration":235.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f-aGlEjgZoE","annotations":[{"segment":[126.50664770536399,136.4359485427036],"label":"Putting on makeup"},{"segment":[180.56617448643524,192.334234738097],"label":"Putting on makeup"}]},"81k4vwur1Gk":{"duration":171.48,"subset":"validation","resolution":"478x360","url":"https://www.youtube.com/watch?v=81k4vwur1Gk","annotations":[{"segment":[2.6751850858034323,8.560592274570983],"label":"Drinking coffee"},{"segment":[138.0395504274571,141.2497725304212],"label":"Drinking coffee"},{"segment":[152.75306839937596,171.27936400000002],"label":"Drinking coffee"}]},"X4l1wbSYQFo":{"duration":101.797,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=X4l1wbSYQFo","annotations":[{"segment":[61.30173790951638,87.02941029641187],"label":"Applying sunscreen"}]},"f6j6lb0AaxM":{"duration":134.56,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f6j6lb0AaxM","annotations":[{"segment":[1.8884695787831514,18.884695787831514],"label":"Playing badminton"},{"segment":[26.018914196567863,44.4905647425897],"label":"Playing badminton"},{"segment":[50.99626053042122,57.08223400936037],"label":"Playing badminton"},{"segment":[64.00765210608424,131.79280499219968],"label":"Playing badminton"}]},"J76bFZWXHFY":{"duration":195.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J76bFZWXHFY","annotations":[{"segment":[0.01,16.761528861154446],"label":"Rock climbing"},{"segment":[24.07565054602184,185.59583775351015],"label":"Rock climbing"}]},"9-XGzUTSsuI":{"duration":91.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9-XGzUTSsuI","annotations":[{"segment":[5.8499839399999995,80.96916926677066],"label":"Doing kickboxing"}]},"0LebHO3TvzE":{"duration":18.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0LebHO3TvzE","annotations":[{"segment":[12.590394336973478,18.321194517940718],"label":"Kayaking"}]},"bLFudA2DtFs":{"duration":40.64,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bLFudA2DtFs","annotations":[]},"PaAeG07keJE":{"duration":142.014,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PaAeG07keJE","annotations":[]},"6NqS3vYvf6Q":{"duration":154.9,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=6NqS3vYvf6Q","annotations":[{"segment":[110.6776048798752,146.68407458970358],"label":"Horseback riding"}]},"N4wKdmmX0lk":{"duration":38.824,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N4wKdmmX0lk","annotations":[{"segment":[0.20000000000000004,38.6237641723356],"label":"Powerbocking"}]},"Uae2MWoldTU":{"duration":202.989,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=Uae2MWoldTU","annotations":[]},"BydyVDyq_mE":{"duration":32.44,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=BydyVDyq_mE","annotations":[]},"UKhH_iEifrU":{"duration":187.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UKhH_iEifrU","annotations":[{"segment":[5.275002069470534,179.64312430421217],"label":"Making a lemonade"}]},"NpKBAtQ5M6I":{"duration":173.76,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NpKBAtQ5M6I","annotations":[{"segment":[27.670483619344772,155.44301092043682],"label":"Fixing bicycle"}]},"Xpb1Lup1GAU":{"duration":143.732,"subset":"training","resolution":"810x1440","url":"https://www.youtube.com/watch?v=Xpb1Lup1GAU","annotations":[{"segment":[0.2,72.8716533285813],"label":"Knitting"},{"segment":[102.15443563797992,106.40155579289437],"label":"Knitting"}]},"2ZoraV9ts0c":{"duration":21.67,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2ZoraV9ts0c","annotations":[]},"Aqx7rM9gZgg":{"duration":102.35,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Aqx7rM9gZgg","annotations":[{"segment":[17.076932917316697,98.63125741029641],"label":"Grooming horse"}]},"wdecG9VSAl0":{"duration":139.761,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wdecG9VSAl0","annotations":[{"segment":[0.20000000000000004,16.025626625065],"label":"Canoeing"},{"segment":[111.0892076700468,126.35170921450859],"label":"Canoeing"}]},"GxHHcl962mU":{"duration":131.031,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GxHHcl962mU","annotations":[]},"GkPmeXaNY0s":{"duration":108.229,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GkPmeXaNY0s","annotations":[{"segment":[68.04363193847483,94.21425819968799],"label":"Applying sunscreen"}]},"Z8xxgFpK-NM":{"duration":146.378,"subset":"training","resolution":"352x288","url":"https://www.youtube.com/watch?v=Z8xxgFpK-NM","annotations":[{"segment":[9.132043681747271,138.57876287051482],"label":"Capoeira"}]},"AH4v5vqsUlc":{"duration":56.332,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=AH4v5vqsUlc","annotations":[{"segment":[25.032425897035882,34.34273166926677],"label":"Fun sliding down"}]},"D6HdoIsLMFg":{"duration":80.852,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D6HdoIsLMFg","annotations":[]},"du3da-_eL-Q":{"duration":162.122,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=du3da-_eL-Q","annotations":[]},"VwclmKWo_-M":{"duration":120.91,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VwclmKWo_-M","annotations":[{"segment":[58.79725798955661,113.40082496774976],"label":"Making a sandwich"}]},"NcjQI0avKHE":{"duration":220.544,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NcjQI0avKHE","annotations":[{"segment":[15.307791731669267,23.563679407176288],"label":"Snowboarding"},{"segment":[46.26737051482059,96.1466918876755],"label":"Snowboarding"},{"segment":[119.53837363494539,151.5299383775351],"label":"Snowboarding"},{"segment":[173.20164352574102,220.521],"label":"Snowboarding"}]},"Qpo9dRh1-tI":{"duration":131.04,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Qpo9dRh1-tI","annotations":[]},"Gs79qz286QE":{"duration":111.851,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gs79qz286QE","annotations":[]},"NbmseYFNIlA":{"duration":49.13,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=NbmseYFNIlA","annotations":[{"segment":[8.086702225429018,42.579744893135725],"label":"Triple jump"}]},"kuPWb9E4aUQ":{"duration":173.221,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kuPWb9E4aUQ","annotations":[{"segment":[15.379406192383493,130.32023141967068],"label":"Scuba diving"}]},"Nj_fvFbf56I":{"duration":32.86,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=Nj_fvFbf56I","annotations":[{"segment":[0.42338113065323874,32.51194611196165],"label":"Hopscotch"}]},"suwVnxHZtI8":{"duration":103.654,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=suwVnxHZtI8","annotations":[{"segment":[3.3958368620459107,101.06657176287051],"label":"Snow tubing"}]},"ULwdDmQ8Z_8":{"duration":122.834,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ULwdDmQ8Z_8","annotations":[{"segment":[22.270757718113916,122.7187615732541],"label":"Waterskiing"}]},"-1CEVKeAyA8":{"duration":202.386,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-1CEVKeAyA8","annotations":[{"segment":[20.522658588143525,179.33646273946957],"label":"Knitting"}]},"iJPPWotXvV0":{"duration":35.46,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=iJPPWotXvV0","annotations":[{"segment":[4.25924947425897,25.83207148673947],"label":"Clean and jerk"}]},"tlgEi4bU9Fc":{"duration":86.75,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=tlgEi4bU9Fc","annotations":[{"segment":[1.082680174726989,21.112263407176286],"label":"Removing curlers"}]},"dn1qrAHh7k0":{"duration":105.7,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=dn1qrAHh7k0","annotations":[{"segment":[29.021392873634948,105.20254916692667],"label":"Making a sandwich"}]},"63HZk1SInLk":{"duration":110.388,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=63HZk1SInLk","annotations":[{"segment":[40.98637149295496,104.3602568265996],"label":"Making a cake"}]},"rDYeRbucGt4":{"duration":82.803,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=rDYeRbucGt4","annotations":[{"segment":[0.01,82.741],"label":"Kneeling"}]},"G8dCenteoT0":{"duration":27.052,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=G8dCenteoT0","annotations":[{"segment":[4.459458455330032,11.443139683134786],"label":"Putting in contact lenses"}]},"bOp6pObPeZ4":{"duration":154.37,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=bOp6pObPeZ4","annotations":[{"segment":[10.952297191887675,154.295],"label":"Tango"}]},"-2VzSMAdzl4":{"duration":223.445,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-2VzSMAdzl4","annotations":[{"segment":[36.23953822152886,42.511765990639624],"label":"Waterskiing"},{"segment":[45.29942277691107,99.31027301092044],"label":"Waterskiing"},{"segment":[111.85472854914195,210.8354212168487],"label":"Waterskiing"}]},"2jpnkf-xgf0":{"duration":161.681,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2jpnkf-xgf0","annotations":[{"segment":[9.832198127925118,141.9541700468019],"label":"Baking cookies"}]},"zPV8s8ZuLBY":{"duration":185.78,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=zPV8s8ZuLBY","annotations":[{"segment":[7.535655031643443,53.61908387900141],"label":"Playing harmonica"},{"segment":[58.5462429381529,60.575073138979974],"label":"Playing harmonica"},{"segment":[64.34290065480171,69.84972548561805],"label":"Playing harmonica"},{"segment":[93.3261892380457,100.28217849802427],"label":"Playing harmonica"},{"segment":[169.26240366926677,182.30488340405617],"label":"Playing harmonica"}]},"ZGnnFIDJjaY":{"duration":142.269,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZGnnFIDJjaY","annotations":[{"segment":[11.759302652106085,27.956078003120126],"label":"Applying sunscreen"},{"segment":[40.38100156006241,57.68714508580343],"label":"Applying sunscreen"},{"segment":[78.76514040561624,126.02422464898596],"label":"Applying sunscreen"}]},"KE2tDW9x9d8":{"duration":203.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KE2tDW9x9d8","annotations":[{"segment":[69.0588287450858,190.07017085803432],"label":"Making a sandwich"}]},"AhFc_MuiZIc":{"duration":83.871,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AhFc_MuiZIc","annotations":[]},"0z_dyY_89ys":{"duration":96.43,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=0z_dyY_89ys","annotations":[{"segment":[66.19359931357255,82.89243914040561],"label":"Cleaning windows"}]},"3ROld-bSgPQ":{"duration":84.568,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3ROld-bSgPQ","annotations":[{"segment":[0.2,27.092888603027895],"label":"Having an ice cream"},{"segment":[40.90237168086245,52.344514802496796],"label":"Having an ice cream"},{"segment":[70.7571589062762,80.48955650398818],"label":"Having an ice cream"}]},"lU6DMCif3eE":{"duration":129.82,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lU6DMCif3eE","annotations":[]},"MjRsR_7ECi8":{"duration":30.14,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MjRsR_7ECi8","annotations":[{"segment":[16.572340258268554,19.798896333034907],"label":"Gargling mouthwash"}]},"huC1ZNVVmM0":{"duration":148.515,"subset":"testing","resolution":"320x208","url":"https://www.youtube.com/watch?v=huC1ZNVVmM0","annotations":[]},"kWPvAxLzGw8":{"duration":206.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kWPvAxLzGw8","annotations":[{"segment":[51.48026538748625,67.61827961553837],"label":"Getting a tattoo"}]},"Z7ZODw0C_hY":{"duration":226.697,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Z7ZODw0C_hY","annotations":[{"segment":[65.7807512948518,120.2443840873635],"label":"Trimming branches or hedges"},{"segment":[137.22006184087363,202.29349322932916],"label":"Trimming branches or hedges"}]},"YRp5nE0Hlsc":{"duration":161.01,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=YRp5nE0Hlsc","annotations":[{"segment":[12.810242499495898,134.13077440648647],"label":"Ballet"}]},"EIibo7aTpys":{"duration":170.81,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EIibo7aTpys","annotations":[{"segment":[23.715651777091495,163.34488246468635],"label":"Doing kickboxing"}]},"VEihQG2UWKE":{"duration":125.202,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VEihQG2UWKE","annotations":[{"segment":[0.2,125.0019954648526],"label":"Kite flying"}]},"ASXqlsSfZ5E":{"duration":21.734,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ASXqlsSfZ5E","annotations":[{"segment":[0.2,21.53387755102041],"label":"Powerbocking"}]},"KTq7Gblfl-g":{"duration":68.894,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KTq7Gblfl-g","annotations":[]},"mhU_KzxJyr4":{"duration":56.193,"subset":"training","resolution":"562x480","url":"https://www.youtube.com/watch?v=mhU_KzxJyr4","annotations":[{"segment":[30.28773264692533,51.940612426348075],"label":"Camel ride"}]},"bXp_reTHyHY":{"duration":101.94,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=bXp_reTHyHY","annotations":[{"segment":[7.3151913287415855,93.6662524352574],"label":"Grooming horse"}]},"H0l29-F7Edg":{"duration":40.868,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=H0l29-F7Edg","annotations":[{"segment":[0.2,9.818309400936037],"label":"Laying tile"}]},"5l2_BVOqQX8":{"duration":219.336,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5l2_BVOqQX8","annotations":[{"segment":[40.02476911076443,148.81003900156009],"label":"Using the monkey bar"}]},"jprf0pE-4uI":{"duration":88.979,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jprf0pE-4uI","annotations":[{"segment":[13.603546541861675,88.77829931972789],"label":"Blowing leaves"}]},"opxLlaEPxqw":{"duration":117.03,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=opxLlaEPxqw","annotations":[{"segment":[0,88.89155538221529],"label":"Javelin throw"}]},"67FrPviS7ws":{"duration":135.628,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=67FrPviS7ws","annotations":[]},"g9XLOpSmr3o":{"duration":135.42,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=g9XLOpSmr3o","annotations":[]},"zwm-v9MLOFY":{"duration":224.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zwm-v9MLOFY","annotations":[{"segment":[22.61013609828393,190.87161403900157],"label":"Breakdancing"}]},"NRaKsY2xtqU":{"duration":13.909,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NRaKsY2xtqU","annotations":[]},"FIzlf7jdsUY":{"duration":149.61,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=FIzlf7jdsUY","annotations":[{"segment":[82.62181398439937,104.32754477691107],"label":"Removing curlers"}]},"Isx1Q5wyJZg":{"duration":215.83,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Isx1Q5wyJZg","annotations":[{"segment":[0.2,17.508787644022767],"label":"Rope skipping"},{"segment":[31.313793286425334,52.52636293206831],"label":"Rope skipping"},{"segment":[63.30100148223616,133.67285951301997],"label":"Rope skipping"},{"segment":[145.45762042726608,157.5790887962049],"label":"Rope skipping"},{"segment":[184.51568517162457,194.95361585335414],"label":"Rope skipping"}]},"BUk-fR6TAnk":{"duration":184.92,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BUk-fR6TAnk","annotations":[{"segment":[45.870314028887684,166.7486870920437],"label":"Wrapping presents"}]},"J3coWVRd5do":{"duration":44.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J3coWVRd5do","annotations":[{"segment":[7.506012074882996,34.63783553822153],"label":"High jump"}]},"1sTTv-XC-RA":{"duration":93.46000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1sTTv-XC-RA","annotations":[{"segment":[2.3654726,19.6181886],"label":"Kayaking"},{"segment":[24.106137799999996,77.13029536817473],"label":"Kayaking"}]},"NS8qkB39x9U":{"duration":64.04,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=NS8qkB39x9U","annotations":[{"segment":[0.01,64.040633],"label":"Removing curlers"}]},"ANuV_fDgI54":{"duration":223.35,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ANuV_fDgI54","annotations":[{"segment":[7.665772796898271,220.21674580180488],"label":"Pole vault"}]},"DvIng_zQPyY":{"duration":199.95,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DvIng_zQPyY","annotations":[{"segment":[4.5222808112324495,165.45310140405618],"label":"Dodgeball"}]},"i-VlZZWf7pY":{"duration":125.365,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=i-VlZZWf7pY","annotations":[]},"EYIYohKR0Qo":{"duration":108.623,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=EYIYohKR0Qo","annotations":[{"segment":[11.184266080847316,71.51151827145087],"label":"Swimming"},{"segment":[81.1706569953198,106.7589016848674],"label":"Swimming"}]},"Oj_PLXsWxL4":{"duration":171.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Oj_PLXsWxL4","annotations":[{"segment":[0,171.3631746031746],"label":"Clean and jerk"}]},"9mF5s6_dTlk":{"duration":130.357,"subset":"training","resolution":"544x360","url":"https://www.youtube.com/watch?v=9mF5s6_dTlk","annotations":[{"segment":[0.01,130.301],"label":"Waxing skis"}]},"5-EESwowW1Y":{"duration":67.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=5-EESwowW1Y","annotations":[]},"I3BWhaDRxGk":{"duration":34.947,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I3BWhaDRxGk","annotations":[{"segment":[0.01,32.82138855651903],"label":"Tug of war"}]},"KYtV2vpwuVw":{"duration":98.709,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KYtV2vpwuVw","annotations":[{"segment":[41.243987519500784,72.64013728549142],"label":"BMX"}]},"yqkjHCPsdF0":{"duration":40.57,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=yqkjHCPsdF0","annotations":[{"segment":[0,40.565237],"label":"Playing water polo"}]},"800KXmqsK-w":{"duration":184.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=800KXmqsK-w","annotations":[{"segment":[15.524452418096725,184.261],"label":"Riding bumper cars"}]},"Jb8YAhQ6EMk":{"duration":54.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Jb8YAhQ6EMk","annotations":[{"segment":[1.0981843109204368,49.8406725725429],"label":"Washing dishes"}]},"KmhzUY2ooCw":{"duration":97.385,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KmhzUY2ooCw","annotations":[{"segment":[13.990528992162686,26.423491608985117],"label":"Croquet"},{"segment":[40.52429067440568,43.10185609496642],"label":"Croquet"},{"segment":[52.66482876561958,56.75945026094669],"label":"Croquet"},{"segment":[80.41726334505883,87.5449377998875],"label":"Croquet"}]},"YIT4kdS5IVQ":{"duration":201.386,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YIT4kdS5IVQ","annotations":[]},"Hi0L9rcsXUI":{"duration":30.0,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Hi0L9rcsXUI","annotations":[{"segment":[10.106262878629739,26.087667177695156],"label":"Using uneven bars"}]},"-Sp-gOEGy2o":{"duration":229.34,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-Sp-gOEGy2o","annotations":[{"segment":[40.78052106084243,210.69935881435256],"label":"Starting a campfire"}]},"hjuvoK5En4s":{"duration":12.028,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=hjuvoK5En4s","annotations":[{"segment":[0.19999999999999998,11.781],"label":"Swinging at the playground"}]},"o8PA6XW32oI":{"duration":31.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=o8PA6XW32oI","annotations":[{"segment":[0.01,29.583887314352573],"label":"Playing rubik cube"}]},"UFmKIOTYWNs":{"duration":226.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UFmKIOTYWNs","annotations":[]},"or7N8oeI-SY":{"duration":218.9,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=or7N8oeI-SY","annotations":[{"segment":[49.84840873634945,116.08533541341653],"label":"Cleaning shoes"}]},"ntS2PA5YWuA":{"duration":139.088,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ntS2PA5YWuA","annotations":[{"segment":[18.842004672897193,132.5437570093458],"label":"Changing car wheel"}]},"bHgNCJohwSg":{"duration":41.75,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bHgNCJohwSg","annotations":[]},"6RePzOd3GvQ":{"duration":13.52,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=6RePzOd3GvQ","annotations":[{"segment":[5.045960998439937,6.2603416536661465],"label":"Brushing teeth"}]},"f119wB0sj0w":{"duration":114.64,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=f119wB0sj0w","annotations":[{"segment":[34.2131142,82.30520892],"label":"Cricket"},{"segment":[34.2131142,82.30520892],"label":"Cricket"}]},"XEriJg8cW4g":{"duration":74.78999999999999,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=XEriJg8cW4g","annotations":[{"segment":[60.14454404681586,72.37679414027382],"label":"Playing bagpipes"}]},"YzPsXYD1OgQ":{"duration":85.821,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=YzPsXYD1OgQ","annotations":[]},"8Qg395HjqFg":{"duration":165.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8Qg395HjqFg","annotations":[{"segment":[23.934575015653685,73.61498971528862],"label":"Making a sandwich"}]},"tRAntV9Om2o":{"duration":127.757,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tRAntV9Om2o","annotations":[{"segment":[42.05391727777955,122.17559829594383],"label":"Cutting the grass"}]},"g_T8qK7qTFI":{"duration":64.34,"subset":"training","resolution":"400x320","url":"https://www.youtube.com/watch?v=g_T8qK7qTFI","annotations":[{"segment":[51.19293514820593,63.53946788075606],"label":"Cricket"}]},"r1ZhBGLfRWY":{"duration":14.258,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r1ZhBGLfRWY","annotations":[{"segment":[7.94246177847114,8.408360374414976],"label":"Hitting a pinata"}]},"IfiAeg5elK4":{"duration":128.57,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IfiAeg5elK4","annotations":[]},"LlhgIOYsxsE":{"duration":74.978,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=LlhgIOYsxsE","annotations":[{"segment":[15.140186427457099,26.0130616224649],"label":"Using the rowing machine"},{"segment":[28.818964898595947,32.20943135725429],"label":"Using the rowing machine"},{"segment":[35.95063572542902,38.28888845553822],"label":"Using the rowing machine"},{"segment":[40.39331591263651,42.497743369734785],"label":"Using the rowing machine"}]},"e4mXFJzcWLs":{"duration":104.165,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=e4mXFJzcWLs","annotations":[]},"Vm07Uu4_iCE":{"duration":68.314,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vm07Uu4_iCE","annotations":[{"segment":[0.01,67.78024303012936],"label":"Changing car wheel"}]},"S6Sg1l78IW0":{"duration":95.34,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=S6Sg1l78IW0","annotations":[]},"Sk1RSHPavBU":{"duration":9.869,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Sk1RSHPavBU","annotations":[{"segment":[0.009999999999999998,9.8684806],"label":"Beach soccer"}]},"1Y3BV0Awjuo":{"duration":111.294,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=1Y3BV0Awjuo","annotations":[{"segment":[10.591077822704744,109.55688575351014],"label":"Peeling potatoes"}]},"tgPjcWP7Vig":{"duration":174.36,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tgPjcWP7Vig","annotations":[{"segment":[0,146.70076575],"label":"Spinning"}]},"or8ypNphr1A":{"duration":17.74,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=or8ypNphr1A","annotations":[]},"r9eXOf4hvCE":{"duration":68.15,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r9eXOf4hvCE","annotations":[{"segment":[0.6905954204368175,65.92530128939158],"label":"Using parallel bars"}]},"2wC0d2XUses":{"duration":30.906,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2wC0d2XUses","annotations":[{"segment":[0.01,30.905668934240364],"label":"Shuffleboard"}]},"GEiTdrt_boA":{"duration":167.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GEiTdrt_boA","annotations":[]},"SCM5-6hQCGY":{"duration":120.024,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=SCM5-6hQCGY","annotations":[{"segment":[2.24693813875004,117.9642522843771],"label":"Building sandcastles"}]},"dhISoJW6Ii0":{"duration":60.93,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dhISoJW6Ii0","annotations":[]},"e51ld7ANyQg":{"duration":167.858,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e51ld7ANyQg","annotations":[{"segment":[41.869828393135734,57.04764118564743],"label":"Curling"},{"segment":[85.57146177847115,121.68418876755071],"label":"Curling"},{"segment":[133.4759906396256,137.4017550702028],"label":"Curling"}]},"gRzHp84ptgE":{"duration":99.335,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gRzHp84ptgE","annotations":[{"segment":[31.923386715060442,76.24420380655226],"label":"Surfing"}]},"G2soQTiGL10":{"duration":204.96,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=G2soQTiGL10","annotations":[{"segment":[61.38427774479469,194.8334068475984],"label":"Preparing pasta"}]},"nyg5Q5E2-AU":{"duration":129.614,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nyg5Q5E2-AU","annotations":[]},"rob5gIT_pAw":{"duration":60.976,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=rob5gIT_pAw","annotations":[{"segment":[0.2,4.267343904837187],"label":"Welding"},{"segment":[26.078219470584518,27.974817355498985],"label":"Welding"},{"segment":[44.75970863699198,51.87195070542122],"label":"Welding"}]},"t97xM9sY2yg":{"duration":130.73,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=t97xM9sY2yg","annotations":[{"segment":[15.527915604243768,118.33789223975778],"label":"Preparing pasta"}]},"QHJTOHgjDMw":{"duration":96.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QHJTOHgjDMw","annotations":[{"segment":[19.04608202028081,43.491053432137285],"label":"Playing piano"},{"segment":[49.03991197347894,87.88192176287052],"label":"Playing piano"}]},"SWEWGVoDiz4":{"duration":176.17000000000002,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SWEWGVoDiz4","annotations":[]},"fgP2pf2rh4Q":{"duration":235.52,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fgP2pf2rh4Q","annotations":[{"segment":[42.25397815912637,235.52],"label":"Smoking a cigarette"}]},"fd7VuzALBCM":{"duration":81.201,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fd7VuzALBCM","annotations":[{"segment":[16.460109204368173,60.030879875195005],"label":"Removing ice from car"}]},"YAPkcWlWmYg":{"duration":222.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YAPkcWlWmYg","annotations":[{"segment":[15.277342620904838,219.7854063416537],"label":"Zumba"}]},"U6S8a3WI19w":{"duration":117.772,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=U6S8a3WI19w","annotations":[{"segment":[8.543494327528203,110.51423307544547],"label":"Beach soccer"}]},"mLFjww64sX8":{"duration":47.56,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=mLFjww64sX8","annotations":[]},"c_KU8M2YfzA":{"duration":171.201,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c_KU8M2YfzA","annotations":[{"segment":[29.699899783755388,163.80267704232236],"label":"Tug of war"}]},"nStBfiFl6RU":{"duration":171.92000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nStBfiFl6RU","annotations":[{"segment":[1.0728259656786272,168.16547012012484],"label":"Zumba"}]},"oeFUzyNgxQU":{"duration":145.706,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oeFUzyNgxQU","annotations":[{"segment":[0.01,145.661],"label":"Slacklining"}]},"k3qEvYiUthQ":{"duration":191.054,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k3qEvYiUthQ","annotations":[]},"Zv78Or7fW5U":{"duration":50.85,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=Zv78Or7fW5U","annotations":[{"segment":[7.166301092043682,17.302063962558503],"label":"Long jump"},{"segment":[29.655024960998443,41.928800312012484],"label":"Long jump"}]},"FBbN6FHC7L8":{"duration":65.9,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=FBbN6FHC7L8","annotations":[{"segment":[3.39572394,46.29466296],"label":"Ballet"}]},"9XmzbuByY_E":{"duration":49.668,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=9XmzbuByY_E","annotations":[{"segment":[6.27288767550702,28.886260530421215],"label":"Shuffleboard"},{"segment":[42.43879563182527,46.465834633385334],"label":"Shuffleboard"}]},"Ft3SzVtUsKA":{"duration":75.09,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ft3SzVtUsKA","annotations":[{"segment":[50.94725429017161,73.31719812792511],"label":"Springboard diving"}]},"xx5TLuZmjcE":{"duration":6.85,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xx5TLuZmjcE","annotations":[{"segment":[0.7266649734789391,5.51410479875195],"label":"Playing squash"}]},"cS19K2310Lc":{"duration":205.474,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cS19K2310Lc","annotations":[{"segment":[38.14553648812619,179.5084070029468],"label":"Braiding hair"}]},"gg_F8EtNMW0":{"duration":10.12,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=gg_F8EtNMW0","annotations":[{"segment":[0.576477868174727,10.123899],"label":"Doing kickboxing"}]},"r_jey4tT7zo":{"duration":52.41,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=r_jey4tT7zo","annotations":[{"segment":[14.716597113884555,49.627635822932916],"label":"Ballet"}]},"R_ffZ9kGeTI":{"duration":168.902,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R_ffZ9kGeTI","annotations":[{"segment":[8.4157507788162,168.861],"label":"Longboarding"}]},"1DvNkSGk-JA":{"duration":67.431,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1DvNkSGk-JA","annotations":[{"segment":[0.01,55.45813162432281],"label":"Waterskiing"}]},"DmaPpBMsuXg":{"duration":25.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DmaPpBMsuXg","annotations":[{"segment":[10.185401188767552,21.052502457098285],"label":"Playing badminton"}]},"sXdKLXuww-U":{"duration":184.67,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=sXdKLXuww-U","annotations":[]},"IKhLjVIYisg":{"duration":194.375,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=IKhLjVIYisg","annotations":[{"segment":[26.98060686427457,178.55704992199688],"label":"Rollerblading"}]},"hXGE1GGTHzM":{"duration":73.376,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hXGE1GGTHzM","annotations":[{"segment":[1.1438533541341653,65.10300936037441],"label":"Playing pool"}]},"46ac33Z6V4E":{"duration":79.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=46ac33Z6V4E","annotations":[{"segment":[42.35996879875195,77.61841341653667],"label":"Removing ice from car"}]},"yrGietojx78":{"duration":140.853,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yrGietojx78","annotations":[{"segment":[34.915755070202806,126.94448985959438],"label":"Cutting the grass"}]},"4G2jW3hbiO4":{"duration":165.153,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4G2jW3hbiO4","annotations":[{"segment":[25.465668224299062,89.00122429906541],"label":"Wakeboarding"}]},"igrjxhf0XyY":{"duration":57.679,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=igrjxhf0XyY","annotations":[{"segment":[6.553031201248049,44.16563494539782],"label":"Applying sunscreen"}]},"jA-KApGlXyA":{"duration":66.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jA-KApGlXyA","annotations":[{"segment":[28.518491419656783,34.159511700468016],"label":"Playing racquetball"},{"segment":[50.66471918876755,59.75302964118564],"label":"Playing racquetball"}]},"o1WPnnvs00I":{"duration":229.86,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=o1WPnnvs00I","annotations":[{"segment":[4.303033313169262,13.626272158369328],"label":"Playing flauta"},{"segment":[17.92930547153859,24.025269212168485],"label":"Playing flauta"},{"segment":[28.68688861154446,35.50002465678627],"label":"Playing flauta"},{"segment":[39.085885733229325,40.52023016380655],"label":"Playing flauta"},{"segment":[45.89902177847114,48.767710639625584],"label":"Playing flauta"},{"segment":[59.16670776131045,63.469741053042114],"label":"Playing flauta"},{"segment":[68.1313604524181,95.02531852574101],"label":"Playing flauta"},{"segment":[98.96976570982838,99.686937925117],"label":"Playing flauta"},{"segment":[100.7626962480499,108.65159061622464],"label":"Playing flauta"},{"segment":[111.16169336973478,111.52027947737909],"label":"Playing flauta"},{"segment":[114.38896833853353,120.84351827613104],"label":"Playing flauta"},{"segment":[125.50513767550702,128.73241264430575],"label":"Playing flauta"},{"segment":[131.95968761310453,140.56575419656784],"label":"Playing flauta"},{"segment":[143.075856950078,157.06071514820593],"label":"Playing flauta"},{"segment":[158.49505957878316,162.0809206552262],"label":"Playing flauta"},{"segment":[164.59102340873633,166.3839539469579],"label":"Playing flauta"},{"segment":[168.894056700468,171.04557334633384],"label":"Playing flauta"},{"segment":[173.1970899921997,190.40922315912636],"label":"Playing flauta"},{"segment":[193.9950842355694,194.712256450858],"label":"Playing flauta"},{"segment":[204.7526674648986,212.6415618330733],"label":"Playing flauta"},{"segment":[214.43449237129485,217.66176734009358],"label":"Playing flauta"},{"segment":[219.81328398595943,221.24762841653666],"label":"Playing flauta"},{"segment":[222.68197284711388,224.8334894929797],"label":"Playing flauta"}]},"YJcjkeVD0qk":{"duration":115.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YJcjkeVD0qk","annotations":[{"segment":[16.445446177847113,115.841],"label":"Smoking hookah"}]},"2SMmL6kIx-w":{"duration":231.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2SMmL6kIx-w","annotations":[{"segment":[9.407132605304213,221.067616224649],"label":"Ballet"}]},"3dAJEnMn6QA":{"duration":184.251,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=3dAJEnMn6QA","annotations":[{"segment":[0.19999999999999998,61.48944461778472],"label":"Building sandcastles"},{"segment":[75.28146957878315,138.79732137285492],"label":"Building sandcastles"}]},"uz04njTFKP8":{"duration":45.77,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uz04njTFKP8","annotations":[{"segment":[7.925249171606865,8.782032865834633],"label":"Tennis serve with ball bouncing"}]},"ncTkaZcDcTo":{"duration":89.908,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ncTkaZcDcTo","annotations":[{"segment":[0.19999999999999998,2.5246910121302815],"label":"Longboarding"},{"segment":[11.781891389941313,18.935182590977107],"label":"Longboarding"},{"segment":[21.880655438462437,50.213298374414975],"label":"Longboarding"}]},"58he7Xcwfws":{"duration":36.22,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=58he7Xcwfws","annotations":[{"segment":[6.131306563182527,34.83825342121684],"label":"Clean and jerk"}]},"oR0NlCzEvfw":{"duration":7.71,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oR0NlCzEvfw","annotations":[]},"9aJfBzzywJE":{"duration":44.327,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=9aJfBzzywJE","annotations":[]},"rKSaO4xIUDY":{"duration":53.105,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rKSaO4xIUDY","annotations":[]},"Hm3DAsKWs8E":{"duration":62.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Hm3DAsKWs8E","annotations":[]},"SqjBRSIWMoA":{"duration":215.993,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SqjBRSIWMoA","annotations":[]},"vV_d5bfNmBQ":{"duration":106.092,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vV_d5bfNmBQ","annotations":[{"segment":[2.151631273414202,96.98891722870515],"label":"Playing beach volleyball"}]},"1P_y_DLlg-E":{"duration":26.285,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1P_y_DLlg-E","annotations":[{"segment":[5.781877235569422,26.284988],"label":"Blowing leaves"}]},"E8Ae4lFrJ18":{"duration":111.921,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=E8Ae4lFrJ18","annotations":[]},"cMf_8UIl9uw":{"duration":174.731,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cMf_8UIl9uw","annotations":[]},"3QAGRGjqQZY":{"duration":201.201,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3QAGRGjqQZY","annotations":[]},"ECDCTq_bJwg":{"duration":16.42,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ECDCTq_bJwg","annotations":[]},"8W-x068F4tE":{"duration":58.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8W-x068F4tE","annotations":[{"segment":[7.00099375975039,54.098588143525745],"label":"Using the pommel horse"}]},"H6E-PA807Vo":{"duration":84.173,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H6E-PA807Vo","annotations":[]},"afrMWrFkgDg":{"duration":80.736,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=afrMWrFkgDg","annotations":[{"segment":[13.593678627145087,14.856034321372853],"label":"Having an ice cream"},{"segment":[25.683313572542897,61.438514820592815],"label":"Having an ice cream"},{"segment":[77.42763650546021,80.50099999999999],"label":"Having an ice cream"}]},"0gf3AgK1YLY":{"duration":133.05,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=0gf3AgK1YLY","annotations":[{"segment":[10.3698127925117,122.5711872074883],"label":"Putting on makeup"}]},"XbkGlZTlixw":{"duration":124.39,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XbkGlZTlixw","annotations":[{"segment":[24.64499312324493,118.37358901716068],"label":"Longboarding"}]},"b8pCuIPzb3o":{"duration":46.456,"subset":"validation","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=b8pCuIPzb3o","annotations":[{"segment":[4.67998004818152,8.59810287921721],"label":"Fun sliding down"},{"segment":[19.19154650169095,20.279913978157357],"label":"Fun sliding down"},{"segment":[26.737561005191363,32.687303209874386],"label":"Fun sliding down"},{"segment":[37.984024928677556,40.37843337690365],"label":"Fun sliding down"}]},"jN2CFk0xKp8":{"duration":32.25,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=jN2CFk0xKp8","annotations":[]},"Lr-FutvMXoA":{"duration":43.26,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lr-FutvMXoA","annotations":[{"segment":[6.97362218,42.92114094664586],"label":"Pole vault"}]},"GWRqx0obKzA":{"duration":64.738,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GWRqx0obKzA","annotations":[{"segment":[8.281480240249609,10.402347131045243],"label":"River tubing"},{"segment":[12.624207683307333,16.057992173166927],"label":"River tubing"},{"segment":[21.309662569422777,26.864313950078003],"label":"River tubing"},{"segment":[34.84281320592824,44.33621738377535],"label":"River tubing"}]},"T-2mwXMMZIk":{"duration":232.154,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=T-2mwXMMZIk","annotations":[]},"sAAARH12tdc":{"duration":211.6,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=sAAARH12tdc","annotations":[{"segment":[33.01144399517477,144.26001025891375],"label":"Putting on makeup"},{"segment":[160.10550337659765,176.94133981413677],"label":"Putting on makeup"},{"segment":[181.2328275335095,204.67095179407175],"label":"Putting on makeup"}]},"ruHRz56XGE4":{"duration":43.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ruHRz56XGE4","annotations":[{"segment":[4.419290171606864,37.597960998439945],"label":"Mowing the lawn"}]},"x_luDzL03vw":{"duration":229.69,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=x_luDzL03vw","annotations":[{"segment":[12.18330867656475,225.03287790831362],"label":"Tai chi"}]},"kUwCHpiuCuA":{"duration":153.415,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kUwCHpiuCuA","annotations":[{"segment":[61.26988025210609,86.40010457425898],"label":"Elliptical trainer"}]},"G-xh30e4s8E":{"duration":160.01,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=G-xh30e4s8E","annotations":[{"segment":[6.4902127840215655,154.01774183620407],"label":"Playing bagpipes"}]},"6AwS8vWMwSA":{"duration":53.964,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6AwS8vWMwSA","annotations":[{"segment":[1.4300421216848673,4.712474258970359],"label":"Rock-paper-scissors"},{"segment":[11.10797503900156,14.389876755070205],"label":"Rock-paper-scissors"},{"segment":[21.374436817472702,28.274845553822153],"label":"Rock-paper-scissors"},{"segment":[39.21451794071763,41.654906396255846],"label":"Rock-paper-scissors"}]},"qF9gCNZO3hk":{"duration":153.65,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=qF9gCNZO3hk","annotations":[]},"Ib1xMq2aUl0":{"duration":193.121,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=Ib1xMq2aUl0","annotations":[{"segment":[20.482851794071763,175.92977223088923],"label":"Rollerblading"}]},"4KqjBUpLQk8":{"duration":226.349,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=4KqjBUpLQk8","annotations":[{"segment":[0.2,198.45148220592824],"label":"Calf roping"}]},"kDJVAE81rww":{"duration":95.852,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kDJVAE81rww","annotations":[]},"rHUCx_JqNk4":{"duration":156.875,"subset":"training","resolution":"2276x720","url":"https://www.youtube.com/watch?v=rHUCx_JqNk4","annotations":[{"segment":[12.726129028834622,115.51409426172964],"label":"Braiding hair"}]},"cDV1vDdAT1w":{"duration":60.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cDV1vDdAT1w","annotations":[{"segment":[4.129746817083568,22.244317530421217],"label":"Windsurfing"},{"segment":[24.121475127925116,41.109751385335414],"label":"Windsurfing"},{"segment":[43.36234050234009,47.39822933697348],"label":"Windsurfing"},{"segment":[49.932392093603745,53.87442304836193],"label":"Windsurfing"}]},"WDvsx_q2h-4":{"duration":146.89,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WDvsx_q2h-4","annotations":[{"segment":[27.1550668067539,98.4227948818633],"label":"Playing violin"},{"segment":[103.00592852013399,128.4423192449298],"label":"Playing violin"}]},"2vOBrR0jKgY":{"duration":193.21,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2vOBrR0jKgY","annotations":[{"segment":[37.37319059,169.70211482200784],"label":"Smoking a cigarette"}]},"A_g93uJD-_8":{"duration":106.301,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=A_g93uJD-_8","annotations":[{"segment":[0.01,106.30095238095238],"label":"Making an omelette"}]},"xZEl3yh0Cos":{"duration":159.15,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=xZEl3yh0Cos","annotations":[{"segment":[21.82091939,145.13962528081123],"label":"Fixing bicycle"}]},"VvbWwNDHJf4":{"duration":123.786,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VvbWwNDHJf4","annotations":[{"segment":[14.475936037441498,115.61447581903276],"label":"Rollerblading"}]},"kC1DyxcRHBg":{"duration":117.586,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=kC1DyxcRHBg","annotations":[{"segment":[4.95291314976599,9.172061388455537],"label":"Cleaning sink"},{"segment":[11.740238577223089,19.07788768798752],"label":"Cleaning sink"},{"segment":[21.09574119344774,114.46732612792512],"label":"Cleaning sink"}]},"CdwgIN9FkdY":{"duration":35.806,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=CdwgIN9FkdY","annotations":[{"segment":[0.7814882995319813,34.83204992199688],"label":"Kneeling"}]},"4CSyAAoO18s":{"duration":154.72,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=4CSyAAoO18s","annotations":[{"segment":[43.92818562266301,68.06455134940092],"label":"Washing face"}]},"_0IkiqDouX8":{"duration":149.931,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_0IkiqDouX8","annotations":[{"segment":[14.501868770670827,133.55753335569423],"label":"Surfing"}]},"QnwlpzsfFws":{"duration":221.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QnwlpzsfFws","annotations":[]},"i9g2MGNUVQI":{"duration":97.199,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=i9g2MGNUVQI","annotations":[{"segment":[47.16330066312424,52.75638466914759],"label":"Bullfighting"}]},"gIwTydKpIe4":{"duration":106.58,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gIwTydKpIe4","annotations":[{"segment":[8.9736879875195,9.804585023400936],"label":"Ironing clothes"},{"segment":[60.833082683307325,63.8248736349454],"label":"Ironing clothes"},{"segment":[69.47603432137285,75.45961622464898],"label":"Ironing clothes"}]},"zb2YYXs4gtw":{"duration":206.263,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zb2YYXs4gtw","annotations":[{"segment":[0.01,206.2619999999999],"label":"Camel ride"}]},"Cm8hWFFA16I":{"duration":209.0,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=Cm8hWFFA16I","annotations":[{"segment":[27.714881438794965,128.14056947584027],"label":"Cleaning shoes"}]},"8EGM3zcvjs8":{"duration":186.74,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8EGM3zcvjs8","annotations":[{"segment":[22.570031201248046,180.4146365054602],"label":"Zumba"}]},"2xgecBn6YwM":{"duration":206.1,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2xgecBn6YwM","annotations":[]},"485DjSbjOV8":{"duration":148.724,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=485DjSbjOV8","annotations":[{"segment":[0,145.47555010296412],"label":"Playing congas"}]},"O1WvjCFqLz0":{"duration":63.07,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=O1WvjCFqLz0","annotations":[{"segment":[0.09838595163806553,34.828626879875195],"label":"Doing kickboxing"},{"segment":[41.32209968798752,52.439712223088925],"label":"Doing kickboxing"}]},"nwznKOuZM7w":{"duration":31.649,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nwznKOuZM7w","annotations":[{"segment":[1.152541196650805,30.01532312811498],"label":"Waxing skis"}]},"Opqg11Nkb7c":{"duration":192.89,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Opqg11Nkb7c","annotations":[{"segment":[31.722582683307337,121.32760296411857],"label":"Starting a campfire"}]},"0dc-NU85piM":{"duration":114.52000000000001,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0dc-NU85piM","annotations":[]},"hW25ecQ1GUc":{"duration":43.074,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hW25ecQ1GUc","annotations":[{"segment":[9.777103801872075,17.840694566302652],"label":"Putting on shoes"}]},"-vqefJDOxkw":{"duration":6.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-vqefJDOxkw","annotations":[{"segment":[0.1412625585023401,6.03662],"label":"Triple jump"}]},"3X6eP273RoI":{"duration":151.57999999999998,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3X6eP273RoI","annotations":[{"segment":[12.060176227769109,151.57986394557824],"label":"Making a sandwich"}]},"f9pbouXN2XA":{"duration":180.164,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=f9pbouXN2XA","annotations":[]},"C_2EFIuyDSA":{"duration":81.967,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=C_2EFIuyDSA","annotations":[{"segment":[0,81.766462585034],"label":"Playing drums"}]},"fBlvOzfFq-k":{"duration":88.65,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fBlvOzfFq-k","annotations":[{"segment":[9.690577223088924,12.874624024960998],"label":"Plataform diving"},{"segment":[28.656421216848674,36.68575663026521],"label":"Plataform diving"},{"segment":[45.68414976599064,50.667875195007795],"label":"Plataform diving"},{"segment":[56.759095163806556,60.08157878315132],"label":"Plataform diving"},{"segment":[66.17279875195008,69.49528237129485],"label":"Plataform diving"},{"segment":[76.97087051482059,84.58489547581904],"label":"Plataform diving"}]},"ave_VDl3LwE":{"duration":69.17,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ave_VDl3LwE","annotations":[{"segment":[17.579842433697348,22.433173166926675],"label":"Chopping wood"},{"segment":[28.472873634945397,32.2476864274571],"label":"Chopping wood"}]},"cV9xeDlKJok":{"duration":74.769,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cV9xeDlKJok","annotations":[{"segment":[20.81418410326752,71.9775444543183],"label":"Rollerblading"}]},"Uru0CastJqU":{"duration":54.61,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Uru0CastJqU","annotations":[{"segment":[1.5326957878315135,54.49585023400937],"label":"Springboard diving"}]},"ARNHE8qiBgw":{"duration":77.834,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ARNHE8qiBgw","annotations":[]},"zLjAfrfqRcc":{"duration":234.59,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zLjAfrfqRcc","annotations":[{"segment":[66.6077210707476,96.25181671212427],"label":"Sailing"},{"segment":[129.18970075809835,223.9776114121685],"label":"Sailing"}]},"QBJelFFVJu0":{"duration":77.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QBJelFFVJu0","annotations":[{"segment":[4.332143007800312,75.09047880187208],"label":"Playing harmonica"}]},"s_V2crzeMfI":{"duration":11.1,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=s_V2crzeMfI","annotations":[]},"HGy8UgabFMw":{"duration":63.112,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HGy8UgabFMw","annotations":[{"segment":[0.01,63.11163265306122],"label":"Trimming branches or hedges"}]},"zA2zZPyYLlU":{"duration":76.232,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zA2zZPyYLlU","annotations":[]},"IhuwmiSPx0w":{"duration":183.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IhuwmiSPx0w","annotations":[{"segment":[10.025015600624025,183.02814196567863],"label":"Cheerleading"}]},"45WdXofnTkI":{"duration":67.18,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=45WdXofnTkI","annotations":[{"segment":[0.2,66.861],"label":"Playing bagpipes"}]},"5u-PL0wr3TU":{"duration":12.052,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5u-PL0wr3TU","annotations":[{"segment":[0.2,6.7581903276131055],"label":"Fun sliding down"}]},"Dv2T0R7HVBQ":{"duration":17.183,"subset":"training","resolution":"1276x720","url":"https://www.youtube.com/watch?v=Dv2T0R7HVBQ","annotations":[{"segment":[0,17.161],"label":"Swimming"}]},"AimG8xzchfI":{"duration":227.208,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=AimG8xzchfI","annotations":[{"segment":[85.09933576031847,206.67825745766635],"label":"Curling"}]},"VthI1KPjEq8":{"duration":16.95,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=VthI1KPjEq8","annotations":[{"segment":[1.971624,13.589153999999999],"label":"Polishing forniture"}]},"eC4l8AuAmKw":{"duration":165.0,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eC4l8AuAmKw","annotations":[{"segment":[9.9396,159.33793529485177],"label":"Cumbia"}]},"EDNd6qqEu1M":{"duration":106.78999999999999,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EDNd6qqEu1M","annotations":[]},"P_b6_k2ocYw":{"duration":87.888,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=P_b6_k2ocYw","annotations":[{"segment":[0.2,85.0127013504673],"label":"Knitting"}]},"lO2Y1vkpyNc":{"duration":138.39,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=lO2Y1vkpyNc","annotations":[{"segment":[12.306213566302652,132.77756742589705],"label":"Cricket"}]},"smk2WJV1Zmo":{"duration":126.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=smk2WJV1Zmo","annotations":[{"segment":[18.474592823712946,96.89334321372854],"label":"Walking the dog"}]},"bDf_xpUpdmU":{"duration":143.569,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bDf_xpUpdmU","annotations":[{"segment":[0.2,57.11393443492842],"label":"Longboarding"},{"segment":[62.265387344745484,143.36875283446713],"label":"Longboarding"}]},"awv0jtZpeoo":{"duration":28.282,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=awv0jtZpeoo","annotations":[{"segment":[0.2,26.610499041139047],"label":"Using the rowing machine"}]},"iq0h4m3I8hY":{"duration":217.292,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=iq0h4m3I8hY","annotations":[{"segment":[11.214904312413994,43.75790621208182],"label":"Rafting"},{"segment":[46.469823037054134,136.98004707050526],"label":"Rafting"},{"segment":[142.74287032357145,217.09233560090703],"label":"Rafting"}]},"H9ejAfleKUg":{"duration":164.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H9ejAfleKUg","annotations":[{"segment":[0.01,146.9287717379095],"label":"Rock climbing"}]},"wQHMoyzJx_w":{"duration":88.445,"subset":"training","resolution":"640x320","url":"https://www.youtube.com/watch?v=wQHMoyzJx_w","annotations":[{"segment":[35.59868946645866,79.33816450858033],"label":"Peeling potatoes"}]},"vdeBWB2GdBY":{"duration":238.191,"subset":"testing","resolution":"576x360","url":"https://www.youtube.com/watch?v=vdeBWB2GdBY","annotations":[]},"oyljZJZJ7IA":{"duration":177.49,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=oyljZJZJ7IA","annotations":[{"segment":[155.8950794321373,164.47900032449297],"label":"Walking the dog"}]},"pZqX9rz3W40":{"duration":73.72,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=pZqX9rz3W40","annotations":[]},"XdX5ONI3yaQ":{"duration":128.89,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=XdX5ONI3yaQ","annotations":[]},"OkMH4AlF8hQ":{"duration":50.248,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OkMH4AlF8hQ","annotations":[{"segment":[0.19999999999999998,46.46030109204368],"label":"Having an ice cream"}]},"2UJ4wqJt_Y8":{"duration":3.066,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2UJ4wqJt_Y8","annotations":[{"segment":[0.19999999999999998,2.865034013605442],"label":"Rollerblading"}]},"6Kbv1OpIpaA":{"duration":195.47,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6Kbv1OpIpaA","annotations":[{"segment":[78.0100815,89.19451112738388],"label":"Playing guitarra"},{"segment":[109.32045209971665,130.6661470703726],"label":"Playing guitarra"},{"segment":[145.6081335498318,147.7427030468974],"label":"Playing guitarra"}]},"PhioVMXx0yY":{"duration":63.879,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PhioVMXx0yY","annotations":[{"segment":[0.6471708268330734,2.3397714508580343],"label":"Elliptical trainer"},{"segment":[7.5194937597503895,9.51141263650546],"label":"Elliptical trainer"},{"segment":[15.287977379095166,17.877471918876754],"label":"Elliptical trainer"},{"segment":[27.93666224648986,30.72534867394696],"label":"Elliptical trainer"},{"segment":[56.321506240249605,57.11827379095164],"label":"Elliptical trainer"}]},"nM_6V_Z6IwI":{"duration":97.71000000000001,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=nM_6V_Z6IwI","annotations":[{"segment":[0.01,35.82176856474259],"label":"Washing face"}]},"orrSdYqqr7M":{"duration":66.084,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=orrSdYqqr7M","annotations":[]},"-DzTAnE1t3w":{"duration":149.839,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-DzTAnE1t3w","annotations":[{"segment":[47.817779972506116,68.32443055809027],"label":"Decorating the Christmas tree"},{"segment":[76.71351488855652,85.80168957989495],"label":"Decorating the Christmas tree"}]},"JKmWaMLSm6Q":{"duration":178.632,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JKmWaMLSm6Q","annotations":[]},"VrUFvuLt9ME":{"duration":222.494,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VrUFvuLt9ME","annotations":[]},"bM7JNvFMfTw":{"duration":68.87,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=bM7JNvFMfTw","annotations":[]},"LkUnT9fMIXc":{"duration":21.734,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=LkUnT9fMIXc","annotations":[{"segment":[0.01,19.665598533541342],"label":"Curling"}]},"5GFpN0YZEog":{"duration":212.02,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5GFpN0YZEog","annotations":[{"segment":[0,67.5769856387742],"label":"Playing saxophone"},{"segment":[74.16986277718789,211.30170725619257],"label":"Playing saxophone"}]},"yqzV0O139Yk":{"duration":90.976,"subset":"training","resolution":"600x360","url":"https://www.youtube.com/watch?v=yqzV0O139Yk","annotations":[{"segment":[28.568676287051485,80.4778744149766],"label":"Kneeling"}]},"iHO42zwYsu0":{"duration":142.71,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iHO42zwYsu0","annotations":[{"segment":[5.788542692667708,13.358175444617785],"label":"Mowing the lawn"},{"segment":[20.037263166926678,32.504893581903275],"label":"Mowing the lawn"},{"segment":[41.63298013572543,56.549609382215294],"label":"Mowing the lawn"},{"segment":[60.334425758190335,91.72613805304213],"label":"Mowing the lawn"},{"segment":[94.84304565678627,105.0843134976599],"label":"Mowing the lawn"}]},"x0TiekqVMj0":{"duration":220.99,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=x0TiekqVMj0","annotations":[{"segment":[10.687225352959697,202.02303284555381],"label":"Belly dance"}]},"Vpue9WHrcAk":{"duration":90.26,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Vpue9WHrcAk","annotations":[{"segment":[5.3505875101404055,90.255963],"label":"Smoking a cigarette"}]},"ssies4ErWus":{"duration":44.26,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ssies4ErWus","annotations":[{"segment":[18.99703588143526,32.92589703588144],"label":"Hand washing clothes"}]},"G-giwt-TN8A":{"duration":93.02,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=G-giwt-TN8A","annotations":[{"segment":[24.436705928237128,41.83970124804992],"label":"Blowing leaves"},{"segment":[61.43128471138845,73.32589001560062],"label":"Blowing leaves"}]},"RnwidjJiDEE":{"duration":149.212,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RnwidjJiDEE","annotations":[{"segment":[7.678085803432137,90.9736833073323],"label":"Playing drums"},{"segment":[98.66499843993759,106.80951482059282],"label":"Playing drums"},{"segment":[108.9038190327613,110.53272230889236],"label":"Playing drums"},{"segment":[116.11753354134166,122.40044617784712],"label":"Playing drums"},{"segment":[127.98525741029641,135.89707332293293],"label":"Playing drums"}]},"wsc5GIgVwN8":{"duration":172.734,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wsc5GIgVwN8","annotations":[{"segment":[23.444293461534382,148.7499984898596],"label":"Longboarding"}]},"B0enQXEBfKk":{"duration":99.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B0enQXEBfKk","annotations":[{"segment":[5.505176788611545,29.541864175507023],"label":"Tango"},{"segment":[31.0372235,93.58800540639625],"label":"Tango"}]},"rmGvgGOkqyk":{"duration":49.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rmGvgGOkqyk","annotations":[{"segment":[3.6888112324492983,8.760926677067083],"label":"Javelin throw"},{"segment":[25.283726989079568,33.19930109204368],"label":"Javelin throw"}]},"YnsusYE6i_0":{"duration":224.19,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=YnsusYE6i_0","annotations":[{"segment":[36.70921216848674,195.10065210608425],"label":"Ballet"}]},"mhsotF4wdG8":{"duration":194.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mhsotF4wdG8","annotations":[{"segment":[29.88821918876755,32.315688767550704],"label":"Rock-paper-scissors"},{"segment":[75.09984009360375,80.25821294851794],"label":"Rock-paper-scissors"},{"segment":[131.2485686427457,133.9797527301092],"label":"Rock-paper-scissors"}]},"C2KzNljiTIU":{"duration":52.34,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=C2KzNljiTIU","annotations":[{"segment":[1.3943923599999999,52.301],"label":"Brushing hair"}]},"Ad9jrt2bP1o":{"duration":40.08,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ad9jrt2bP1o","annotations":[{"segment":[33.63770865392439,37.63921884555383],"label":"Triple jump"}]},"Nt6cha3hK_s":{"duration":187.06,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Nt6cha3hK_s","annotations":[{"segment":[35.01902396255851,169.25861581903277],"label":"Preparing salad"}]},"IdG3UPXRtio":{"duration":109.993,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IdG3UPXRtio","annotations":[]},"QeVVKZhoFCg":{"duration":206.71,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QeVVKZhoFCg","annotations":[{"segment":[16.364755680000002,200.85803432137286],"label":"Putting on makeup"}]},"pOVICBn8QMw":{"duration":50.06,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pOVICBn8QMw","annotations":[{"segment":[6.250358814352575,45.862007800312014],"label":"Using the pommel horse"}]},"rjgu0ucCW7U":{"duration":123.86,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rjgu0ucCW7U","annotations":[{"segment":[13.139089104524182,118.83146763650547],"label":"Mowing the lawn"}]},"2ENwSfYEmNo":{"duration":7.85,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2ENwSfYEmNo","annotations":[]},"tAEGMVLn0wk":{"duration":210.907,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tAEGMVLn0wk","annotations":[{"segment":[116.43507332293291,172.35022152886114],"label":"Using the monkey bar"}]},"k3rWocPJ428":{"duration":70.207,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k3rWocPJ428","annotations":[{"segment":[23.486278277997307,48.28341911386803],"label":"Table soccer"}]},"voVkip40EyE":{"duration":235.78,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=voVkip40EyE","annotations":[]},"-6prmNqkAN8":{"duration":47.137,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-6prmNqkAN8","annotations":[{"segment":[6.397622762053341,31.69396893291731],"label":"Curling"}]},"WWhL75t7p8A":{"duration":100.659,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WWhL75t7p8A","annotations":[{"segment":[74.74796716380655,85.89734882059282],"label":"Tug of war"}]},"T5pq-K73yao":{"duration":26.96,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=T5pq-K73yao","annotations":[]},"7Cjzxlb94JM":{"duration":147.4,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7Cjzxlb94JM","annotations":[]},"nIfYhQHFWZI":{"duration":23.64,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nIfYhQHFWZI","annotations":[{"segment":[10.380668656786272,19.378477368174725],"label":"Tumbling"}]},"1scjpxusQx0":{"duration":210.861,"subset":"validation","resolution":"320x180","url":"https://www.youtube.com/watch?v=1scjpxusQx0","annotations":[{"segment":[35.268186482378994,184.3534419340924],"label":"Rope skipping"}]},"nnUYbsoLNqo":{"duration":195.025,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=nnUYbsoLNqo","annotations":[{"segment":[11.662415416693761,177.20184376565095],"label":"Running a marathon"}]},"RiF_iAc0keQ":{"duration":11.889,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RiF_iAc0keQ","annotations":[{"segment":[6.751102196468812,9.774259134165366],"label":"Doing a powerbomb"}]},"KgfKmcsEMK0":{"duration":21.2,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=KgfKmcsEMK0","annotations":[{"segment":[0.01,21.076736805259383],"label":"Shaving legs"}]},"GVpKrpEHK80":{"duration":192.82,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=GVpKrpEHK80","annotations":[]},"WGHXYbCWvyU":{"duration":200.3,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WGHXYbCWvyU","annotations":[{"segment":[50.45073556942278,179.48509282371293],"label":"Tai chi"}]},"gWTndmDHZQc":{"duration":176.15,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gWTndmDHZQc","annotations":[]},"evGKkyIAfw8":{"duration":133.75,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=evGKkyIAfw8","annotations":[]},"11p9goB2rm8":{"duration":33.9,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=11p9goB2rm8","annotations":[]},"SzKFCQ7IArg":{"duration":135.628,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=SzKFCQ7IArg","annotations":[]},"Q8Y9pmhvJqE":{"duration":53.522,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q8Y9pmhvJqE","annotations":[{"segment":[5.427347386895475,12.77514077223089],"label":"Playing ice hockey"},{"segment":[19.204459984399378,47.00917813572543],"label":"Playing ice hockey"}]},"srARxP_ocyg":{"duration":30.86,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=srARxP_ocyg","annotations":[{"segment":[0.01,30.859318],"label":"Doing kickboxing"}]},"K8f4LNNiQy8":{"duration":13.61,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K8f4LNNiQy8","annotations":[{"segment":[0,13.42],"label":"Pole vault"}]},"I2w4N_GnyT8":{"duration":53.5,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=I2w4N_GnyT8","annotations":[{"segment":[1.919604650546022,34.71980585335414],"label":"Cricket"}]},"gN_Kq57w72s":{"duration":237.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gN_Kq57w72s","annotations":[{"segment":[48.17929797191888,70.04528705148206],"label":"Making a cake"},{"segment":[94.50554602184087,180.13161622464898],"label":"Making a cake"},{"segment":[184.57931045241807,214.6012464898596],"label":"Making a cake"}]},"zogfsf4O3Vg":{"duration":100.8,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zogfsf4O3Vg","annotations":[{"segment":[13.038091949999997,90.29777691107644],"label":"Drinking beer"}]},"YXl4cEB7E3Y":{"duration":19.2,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YXl4cEB7E3Y","annotations":[{"segment":[0.010000000000000002,19.202902494331067],"label":"Playing bagpipes"}]},"cxIfpBvuk0E":{"duration":191.8,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cxIfpBvuk0E","annotations":[{"segment":[8.827955538221529,12.418988299531982],"label":"Smoking a cigarette"},{"segment":[24.089844773790947,42.94276677067083],"label":"Smoking a cigarette"},{"segment":[45.03753588143526,64.48896333853354],"label":"Smoking a cigarette"},{"segment":[72.26953432137286,74.06505070202807],"label":"Smoking a cigarette"},{"segment":[80.34935803432138,83.64113806552261],"label":"Smoking a cigarette"},{"segment":[135.11260764430577,144.0901895475819],"label":"Smoking a cigarette"},{"segment":[158.75357332293294,164.43937519500778],"label":"Smoking a cigarette"}]},"d40Tv56RrnA":{"duration":19.11,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d40Tv56RrnA","annotations":[{"segment":[0.01,8.347146645865834],"label":"Hand washing clothes"},{"segment":[9.683878315132606,17.793383775351014],"label":"Hand washing clothes"}]},"W97fdJ2Rn9g":{"duration":123.97,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W97fdJ2Rn9g","annotations":[]},"DF9CGVGiKXY":{"duration":90.187,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DF9CGVGiKXY","annotations":[{"segment":[10.530490654205607,77.9256308411215],"label":"Rope skipping"}]},"485Wn1DXt5U":{"duration":119.212,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=485Wn1DXt5U","annotations":[{"segment":[24.93274043797815,32.54591977409474],"label":"Hula hoop"},{"segment":[44.42990703047185,47.029529242804344],"label":"Hula hoop"},{"segment":[74.69693634374252,79.33911882037803],"label":"Hula hoop"},{"segment":[85.83817428766774,89.73760756804158],"label":"Hula hoop"},{"segment":[94.56547734374251,112.94851995121915],"label":"Hula hoop"}]},"dZsk5Dp_5FE":{"duration":38.22,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=dZsk5Dp_5FE","annotations":[]},"rIr091-LMGY":{"duration":103.45,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rIr091-LMGY","annotations":[{"segment":[14.846973089807946,92.14806124217758],"label":"Putting on makeup"}]},"g7IZWqeyM20":{"duration":117.89,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g7IZWqeyM20","annotations":[{"segment":[25.829204368174725,108.92386895475819],"label":"Playing racquetball"}]},"iDgmPtf4rMI":{"duration":215.9,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iDgmPtf4rMI","annotations":[]},"iSIzuN9cEAs":{"duration":101.007,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=iSIzuN9cEAs","annotations":[{"segment":[0.20000000000000004,100.721],"label":"Disc dog"}]},"FMXPFrZYhes":{"duration":14.4,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=FMXPFrZYhes","annotations":[{"segment":[4.21196255850234,13.263201248049922],"label":"Shot put"}]},"pud1-V-4LL0":{"duration":88.84,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pud1-V-4LL0","annotations":[]},"XOEQfCu6KsE":{"duration":143.128,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=XOEQfCu6KsE","annotations":[{"segment":[23.66082059282371,56.69668330733229],"label":"Disc dog"},{"segment":[63.16992667706708,133.48274258970358],"label":"Disc dog"}]},"DinaQYSgbtg":{"duration":56.77,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=DinaQYSgbtg","annotations":[{"segment":[4.428454602184088,54.91283706708268],"label":"Washing dishes"}]},"Y4pyUctR49I":{"duration":42.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y4pyUctR49I","annotations":[{"segment":[1.4432596460320997,17.84393664898596],"label":"Doing nails"},{"segment":[37.45914642121685,41.851335999999996],"label":"Doing nails"}]},"a370DDsJtBA":{"duration":96.479,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=a370DDsJtBA","annotations":[{"segment":[1.0535921029641186,95.57585505460217],"label":"Playing rubik cube"}]},"fULvroAfMAo":{"duration":118.25999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fULvroAfMAo","annotations":[{"segment":[7.472155226209049,18.357517160686427],"label":"Shot put"},{"segment":[31.08785569422777,35.70029719188768],"label":"Shot put"},{"segment":[47.692645085803434,64.66642979719188],"label":"Shot put"},{"segment":[81.27121918876755,86.25265600624026],"label":"Shot put"}]},"kXvFkU7gQSM":{"duration":218.361,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=kXvFkU7gQSM","annotations":[{"segment":[94.69064223861487,173.9396347697675],"label":"Doing fencing"}]},"pJ3sECWr5Xg":{"duration":109.03999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pJ3sECWr5Xg","annotations":[]},"6MsWbxRblnY":{"duration":71.727,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=6MsWbxRblnY","annotations":[]},"6RdkwoTi-98":{"duration":13.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6RdkwoTi-98","annotations":[{"segment":[0.2646507550702028,11.88892622776911],"label":"Hammer throw"}]},"jw8yJ6tJXrA":{"duration":14.93,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jw8yJ6tJXrA","annotations":[{"segment":[0.9400435200000002,5.76111744],"label":"Long jump"},{"segment":[10.295006767550703,14.730089000000001],"label":"Long jump"}]},"Zn84iOuIkDs":{"duration":81.363,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Zn84iOuIkDs","annotations":[{"segment":[0.01,81.321],"label":"Clipping cat claws"}]},"UzmnkYD5YTA":{"duration":101.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UzmnkYD5YTA","annotations":[{"segment":[28.580437860842434,92.68904450599793],"label":"Playing flauta"}]},"nDRaXY5ddPs":{"duration":87.54,"subset":"training","resolution":"208x160","url":"https://www.youtube.com/watch?v=nDRaXY5ddPs","annotations":[{"segment":[0,4.779833098085828],"label":"Mooping floor"},{"segment":[7.511166296992015,70.60496317160687],"label":"Mooping floor"},{"segment":[76.20419622776912,82.21312926365056],"label":"Mooping floor"}]},"fm6UhMKF8DU":{"duration":62.28,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fm6UhMKF8DU","annotations":[{"segment":[24.6771964399376,52.851948280811236],"label":"Bungee jumping"}]},"pxFR0rLMvS0":{"duration":82.478,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=pxFR0rLMvS0","annotations":[]},"30Yk_1Yc7Vk":{"duration":57.47,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=30Yk_1Yc7Vk","annotations":[{"segment":[0.01,57.469386],"label":"Getting a haircut"}]},"_HeqBIO9TgQ":{"duration":65.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_HeqBIO9TgQ","annotations":[{"segment":[7.382332293291731,57.317627009999995],"label":"Washing dishes"}]},"Lt6waJFJWI0":{"duration":63.693,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lt6waJFJWI0","annotations":[{"segment":[0.01,53.17703759355634],"label":"Hurling"}]},"KTk1Rh8Lgdw":{"duration":176.17000000000002,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=KTk1Rh8Lgdw","annotations":[{"segment":[0,176.141],"label":"Removing curlers"}]},"G6qOkn-K6aw":{"duration":208.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=G6qOkn-K6aw","annotations":[{"segment":[8.92872453978159,190.75002425897034],"label":"Cumbia"}]},"Nns49SsSe_g":{"duration":107.695,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Nns49SsSe_g","annotations":[]},"StM48dIvK-M":{"duration":79.157,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=StM48dIvK-M","annotations":[{"segment":[0,74.55395319812791],"label":"Tug of war"}]},"Ez7s36AwgLk":{"duration":233.94,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ez7s36AwgLk","annotations":[{"segment":[0.01,202.18909751950076],"label":"Skateboarding"}]},"-o3M0VV74gw":{"duration":14.47,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-o3M0VV74gw","annotations":[]},"oYH-XLQzMxU":{"duration":33.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oYH-XLQzMxU","annotations":[{"segment":[0.16974046910430837,30.92576395014781],"label":"Tennis serve with ball bouncing"}]},"BK2ECcEE0Rs":{"duration":135.187,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BK2ECcEE0Rs","annotations":[]},"pT2fO9vlVn0":{"duration":82.5,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=pT2fO9vlVn0","annotations":[{"segment":[1.4154305772230888,80.55086739469579],"label":"Doing step aerobics"}]},"xXTfM9xXFQM":{"duration":152.184,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xXTfM9xXFQM","annotations":[{"segment":[48.19543198870813,144.34888004499774],"label":"Powerbocking"}]},"mlxcljsNEBg":{"duration":190.45,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mlxcljsNEBg","annotations":[]},"8bppcsg07Rc":{"duration":177.099,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8bppcsg07Rc","annotations":[{"segment":[34.70360832788922,121.73806439143067],"label":"Using the monkey bar"},{"segment":[126.9711487880092,134.1322116464851],"label":"Using the monkey bar"},{"segment":[151.75944329811807,161.67476110216163],"label":"Using the monkey bar"}]},"Yh9eGll8fYQ":{"duration":56.01,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Yh9eGll8fYQ","annotations":[{"segment":[6.635001560062403,53.254617784711385],"label":"Bungee jumping"}]},"roavmdw1ORo":{"duration":151.84,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=roavmdw1ORo","annotations":[{"segment":[15.637778471138846,26.773772230889236],"label":"Grooming horse"},{"segment":[29.143132605304213,47.150271450858035],"label":"Grooming horse"},{"segment":[48.57188767550702,98.80232761310452],"label":"Grooming horse"},{"segment":[99.03926365054602,115.86172230889237],"label":"Grooming horse"},{"segment":[117.04640249609984,145.95259906396257],"label":"Grooming horse"}]},"MTJ1EtiizVQ":{"duration":103.35,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=MTJ1EtiizVQ","annotations":[{"segment":[7.739293329173168,96.90240189235571],"label":"Tango"}]},"RnN4BXyOtxU":{"duration":121.534,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RnN4BXyOtxU","annotations":[{"segment":[20.287140345477766,79.06296710452418],"label":"Making a lemonade"},{"segment":[83.2341548174727,91.19733136037442],"label":"Making a lemonade"}]},"qgUHRwC3194":{"duration":122.788,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=qgUHRwC3194","annotations":[]},"n_CMnVD7kCg":{"duration":145.85,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=n_CMnVD7kCg","annotations":[{"segment":[8.873511626179333,125.36679246217467],"label":"Tumbling"}]},"dPZfExDmX9Y":{"duration":180.07,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=dPZfExDmX9Y","annotations":[{"segment":[7.303961656786272,178.94706059126366],"label":"Making a sandwich"}]},"SHS8pNOsZDU":{"duration":26.36,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SHS8pNOsZDU","annotations":[{"segment":[0.20557110764430578,26.31310177847114],"label":"Playing polo"}]},"wvlyV067oOU":{"duration":143.27,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=wvlyV067oOU","annotations":[{"segment":[26.261913605795932,128.69674609999998],"label":"Dodgeball"}]},"J4hnBPgwDlw":{"duration":191.054,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=J4hnBPgwDlw","annotations":[{"segment":[20.054230049026344,178.3457752203659],"label":"Gargling mouthwash"}]},"9XanCE5nX2Q":{"duration":168.82999999999998,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9XanCE5nX2Q","annotations":[{"segment":[5.133225418876756,66.73193044539782],"label":"Cumbia"}]},"YYpeT1sH_m0":{"duration":142.757,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YYpeT1sH_m0","annotations":[{"segment":[8.23703120124805,22.039624024961],"label":"Beach soccer"},{"segment":[30.276655226209048,34.06123712948518],"label":"Beach soccer"},{"segment":[37.845819032761305,42.74351326053042],"label":"Beach soccer"},{"segment":[51.64841185647426,59.885443057722306],"label":"Beach soccer"},{"segment":[73.68803588143525,106.42845241809673],"label":"Beach soccer"},{"segment":[137.3773120124805,142.721],"label":"Beach soccer"}]},"t6trrsdhMEc":{"duration":26.123,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=t6trrsdhMEc","annotations":[{"segment":[10.915795403295665,20.721886063858957],"label":"Doing crunches"}]},"o86qcfpzO0g":{"duration":139.34,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=o86qcfpzO0g","annotations":[{"segment":[9.890958076418295,11.195260240341588],"label":"Smoking a cigarette"},{"segment":[15.97770150806032,19.23845691786855],"label":"Smoking a cigarette"},{"segment":[24.2382818795745,26.62950251343387],"label":"Smoking a cigarette"},{"segment":[42.49851217450058,43.80281433842388],"label":"Smoking a cigarette"},{"segment":[51.193859933989195,52.932929485886916],"label":"Smoking a cigarette"},{"segment":[62.7151957153116,64.45426526720932],"label":"Smoking a cigarette"},{"segment":[103.14856279693365,107.27885298269074],"label":"Smoking a cigarette"}]},"HI_lWd3JhII":{"duration":71.588,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=HI_lWd3JhII","annotations":[{"segment":[24.330636505460216,69.53204836193447],"label":"Waxing skis"}]},"Q0U51Hqn21w":{"duration":6.34,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Q0U51Hqn21w","annotations":[{"segment":[2.195426574102964,5.814913628705148],"label":"Discus throw"}]},"HQk5hngL4Us":{"duration":153.531,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HQk5hngL4Us","annotations":[{"segment":[0.010000000000000002,151.32604056162245],"label":"Decorating the Christmas tree"}]},"oNo8ZpqE_6k":{"duration":90.698,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oNo8ZpqE_6k","annotations":[{"segment":[16.13022509471807,48.10768789391576],"label":"Hurling"}]},"F7V-FqgG3T8":{"duration":51.18,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=F7V-FqgG3T8","annotations":[{"segment":[3.3341163999999996,49.50016152574103],"label":"Playing badminton"}]},"qGf6earGAOc":{"duration":87.4,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qGf6earGAOc","annotations":[{"segment":[34.08851896286763,67.44190193015736],"label":"Wakeboarding"}]},"_4wEUsTft44":{"duration":113.894,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_4wEUsTft44","annotations":[{"segment":[0.01,113.841],"label":"Installing carpet"}]},"Exu0qsz42fQ":{"duration":197.393,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Exu0qsz42fQ","annotations":[{"segment":[3.0786427457098284,184.1214945397816],"label":"Playing drums"}]},"fAjpgUcycuA":{"duration":41.146,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=fAjpgUcycuA","annotations":[]},"PAGuZzrzSO4":{"duration":26.053,"subset":"validation","resolution":"640x352","url":"https://www.youtube.com/watch?v=PAGuZzrzSO4","annotations":[{"segment":[9.278559768995768,14.667417985169953],"label":"Calf roping"}]},"7X_wgaRaJYQ":{"duration":144.265,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7X_wgaRaJYQ","annotations":[{"segment":[3.685282277463218,135.36669774475294],"label":"Swimming"}]},"O_kbj_PTIUQ":{"duration":35.643,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=O_kbj_PTIUQ","annotations":[]},"PAiJNr97C6g":{"duration":122.37,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PAiJNr97C6g","annotations":[{"segment":[11.245764016925564,82.91368385360374],"label":"Snowboarding"}]},"H5TETCI731k":{"duration":121.21,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H5TETCI731k","annotations":[{"segment":[46.13849491122502,56.9167498699948],"label":"Playing field hockey"}]},"9o2iojC7gSw":{"duration":131.24,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9o2iojC7gSw","annotations":[{"segment":[15.355598713744468,18.631459748049924],"label":"Pole vault"},{"segment":[90.90514426521061,102.98488190405617],"label":"Pole vault"},{"segment":[108.92238006552262,122.8447895475819],"label":"Pole vault"}]},"q8mReXud6fE":{"duration":29.049,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=q8mReXud6fE","annotations":[{"segment":[0.01,3.715876436817472],"label":"Raking leaves"},{"segment":[10.286633550702028,18.3528043525741],"label":"Raking leaves"}]},"XG6ar8yc2pM":{"duration":144.03,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XG6ar8yc2pM","annotations":[{"segment":[78.42054605721646,133.6969177067083],"label":"Horseback riding"},{"segment":[5.168116215805095,77.29098846],"label":"Horseback riding"}]},"HPNZi_WsUeY":{"duration":118.1,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HPNZi_WsUeY","annotations":[{"segment":[13.904485179407175,99.17304992199688],"label":"Preparing pasta"}]},"lkJUUh-xJOY":{"duration":204.754,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=lkJUUh-xJOY","annotations":[]},"uY5UbRJIrxI":{"duration":153.345,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=uY5UbRJIrxI","annotations":[]},"o8RIoxL9FDE":{"duration":23.406,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=o8RIoxL9FDE","annotations":[{"segment":[4.295247708959529,9.500506198850665],"label":"Gargling mouthwash"}]},"Wu3wa5Rc_Qw":{"duration":17.624,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Wu3wa5Rc_Qw","annotations":[{"segment":[0.01,17.348999475819035],"label":"Canoeing"}]},"JGxSaJ7XoPQ":{"duration":79.25,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JGxSaJ7XoPQ","annotations":[{"segment":[0.9887176287051482,71.18766926677068],"label":"Grooming horse"}]},"IwSUfrzxFnU":{"duration":180.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IwSUfrzxFnU","annotations":[{"segment":[26.159397781591263,165.676185950078],"label":"Plataform diving"}]},"B5VIJnAFlK8":{"duration":30.4,"subset":"validation","resolution":"810x1440","url":"https://www.youtube.com/watch?v=B5VIJnAFlK8","annotations":[{"segment":[0.9009414852784587,30.252665088923557],"label":"Cleaning windows"}]},"D3NZ45e9llI":{"duration":224.42000000000002,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D3NZ45e9llI","annotations":[{"segment":[0.01,224.42056689342402],"label":"Kayaking"}]},"0jrThYemNLs":{"duration":63.995,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0jrThYemNLs","annotations":[{"segment":[4.190892355694228,58.57270982839313],"label":"Doing fencing"}]},"HNBiW2Nq318":{"duration":40.03,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=HNBiW2Nq318","annotations":[{"segment":[0.9589768510140406,9.002008504680187],"label":"Using the balance beam"},{"segment":[10.676512800000001,27.47999736],"label":"Using the balance beam"}]},"K5jY2chByPw":{"duration":71.91,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=K5jY2chByPw","annotations":[]},"0PnAEoMx-v0":{"duration":95.75999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0PnAEoMx-v0","annotations":[{"segment":[5.897988778471138,66.52035445085804],"label":"Playing guitarra"}]},"4Dj7wOTfyH4":{"duration":192.169,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4Dj7wOTfyH4","annotations":[{"segment":[68.93577223088924,95.01147737909517],"label":"Installing carpet"},{"segment":[107.00030733229329,125.88271450858035],"label":"Installing carpet"},{"segment":[148.37721528861152,155.2715101404056],"label":"Installing carpet"}]},"yRD0WDE5IG0":{"duration":218.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yRD0WDE5IG0","annotations":[{"segment":[0.01,215.00653900156007],"label":"Doing step aerobics"}]},"ZMG8WE3Y22k":{"duration":135.582,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZMG8WE3Y22k","annotations":[{"segment":[70.40326521060842,79.29465678627145],"label":"Rafting"},{"segment":[84.1580624024961,120.31642589703588],"label":"Rafting"}]},"9xKOEE8Ni-Q":{"duration":79.575,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9xKOEE8Ni-Q","annotations":[{"segment":[0.01,79.57480725623583],"label":"Camel ride"}]},"47f7KnSW2HA":{"duration":15.581,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=47f7KnSW2HA","annotations":[]},"5j1ytnL-KEI":{"duration":30.54,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5j1ytnL-KEI","annotations":[]},"aH39BSM0-Ag":{"duration":138.37,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=aH39BSM0-Ag","annotations":[]},"j_YzK7aHTIA":{"duration":113.871,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j_YzK7aHTIA","annotations":[{"segment":[10.83634138377535,84.91428166302651],"label":"Making a cake"}]},"URzytLugoVw":{"duration":54.08,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=URzytLugoVw","annotations":[{"segment":[0.01,54.079023],"label":"Playing kickball"}]},"8cH0mAgsuAc":{"duration":176.24,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8cH0mAgsuAc","annotations":[{"segment":[42.05136973478939,56.618184087363495],"label":"Kneeling"},{"segment":[75.03283619344774,95.64625273010921],"label":"Kneeling"}]},"ouc67LyENuw":{"duration":131.727,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ouc67LyENuw","annotations":[{"segment":[0.01,48.05615288611544],"label":"Bullfighting"},{"segment":[56.27087987519501,64.48560686427457],"label":"Bullfighting"}]},"jBvGvVw3R-Q":{"duration":218.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jBvGvVw3R-Q","annotations":[{"segment":[19.776733229329174,23.868471138845557],"label":"Snatch"},{"segment":[26.93727457098284,29.66509984399376],"label":"Snatch"},{"segment":[30.006078003120127,36.82564118564743],"label":"Snatch"},{"segment":[38.18955382215289,42.963248049921994],"label":"Snatch"},{"segment":[42.963248049921994,50.46476755070203],"label":"Snatch"},{"segment":[54.55650546021841,57.62530889235569],"label":"Snatch"},{"segment":[58.648243369734786,63.08095943837753],"label":"Snatch"},{"segment":[64.10389391575663,66.14976287051482],"label":"Snatch"},{"segment":[67.85465366614665,71.60541341653666],"label":"Snatch"},{"segment":[74.67421684867395,79.78888923556943],"label":"Snatch"},{"segment":[81.83475819032762,87.63138689547581],"label":"Snatch"},{"segment":[91.72312480499221,95.13290639625585],"label":"Snatch"},{"segment":[100.58855694227769,110.13594539781592],"label":"Snatch"},{"segment":[117.97844305772232,124.7980062402496],"label":"Snatch"},{"segment":[127.86680967238689,144.23376131045242],"label":"Snatch"},{"segment":[146.96158658346334,159.2368003120125],"label":"Snatch"},{"segment":[164.69245085803433,178.67255538221528],"label":"Snatch"},{"segment":[180.3774461778471,196.0624414976599],"label":"Snatch"}]},"lzQwtmUrSK4":{"duration":209.119,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lzQwtmUrSK4","annotations":[{"segment":[41.75841234465705,180.40939083277618],"label":"Futsal"}]},"6Yn2U58qxPs":{"duration":214.95,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6Yn2U58qxPs","annotations":[{"segment":[43.593019693576856,95.56931240514926],"label":"Rock climbing"},{"segment":[155.25821629327757,198.51590506613462],"label":"Rock climbing"}]},"Gk5iK0bEWak":{"duration":208.423,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gk5iK0bEWak","annotations":[]},"G7kqlq8WhRo":{"duration":171.108,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G7kqlq8WhRo","annotations":[{"segment":[11.607306552262092,154.38188533541341],"label":"Tug of war"}]},"5E2OdhrgG8s":{"duration":181.21,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5E2OdhrgG8s","annotations":[{"segment":[6.076405333853353,164.3455675179407],"label":"Playing harmonica"}]},"ubR7XPBJQH8":{"duration":209.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ubR7XPBJQH8","annotations":[{"segment":[2.9364729854500307,8.156869404027862],"label":"Tai chi"},{"segment":[44.37336923556943,182.38759854914196],"label":"Tai chi"}]},"GeR07RwzkBw":{"duration":220.52,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=GeR07RwzkBw","annotations":[]},"ldjIg23Rofw":{"duration":206.124,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ldjIg23Rofw","annotations":[{"segment":[25.72519251170047,174.28817926677067],"label":"Beer pong"}]},"IKrnz_OzqT8":{"duration":89.351,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IKrnz_OzqT8","annotations":[{"segment":[0.13939217704762613,87.53828670826833],"label":"Playing congas"}]},"hPOtGLv1GzM":{"duration":191.123,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hPOtGLv1GzM","annotations":[{"segment":[17.293535044803154,55.458577902299766],"label":"BMX"},{"segment":[57.8438930808933,83.78419564809802],"label":"BMX"},{"segment":[86.16951082669156,179.79313000156006],"label":"BMX"}]},"scwBQj4GE7Y":{"duration":36.711,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=scwBQj4GE7Y","annotations":[{"segment":[4.409819706864274,23.480858179407175],"label":"Swimming"}]},"8TDqnzrHE8c":{"duration":87.89,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8TDqnzrHE8c","annotations":[]},"n-i9nC6vxu0":{"duration":94.808,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=n-i9nC6vxu0","annotations":[{"segment":[22.333647697347892,76.91057485179407],"label":"Slacklining"}]},"UeN7XgwLkOU":{"duration":192.493,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UeN7XgwLkOU","annotations":[{"segment":[48.39320680050102,174.92315151389667],"label":"Rollerblading"}]},"GKM-K8jbfyE":{"duration":119.77000000000001,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=GKM-K8jbfyE","annotations":[{"segment":[0.01,119.7685260770975],"label":"Cleaning windows"}]},"dGw6pVNy8Ns":{"duration":201.898,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dGw6pVNy8Ns","annotations":[]},"nfTFGY2n_io":{"duration":228.183,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nfTFGY2n_io","annotations":[{"segment":[14.94184399375975,187.1288081123245],"label":"Making a lemonade"}]},"qiTQ02NDSlQ":{"duration":138.81,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=qiTQ02NDSlQ","annotations":[]},"wqThk38AdEA":{"duration":23.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wqThk38AdEA","annotations":[{"segment":[0.36383945999999995,22.87737839625585],"label":"Playing flauta"}]},"p7jJNlAY_lw":{"duration":180.7,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=p7jJNlAY_lw","annotations":[]},"FrkXeG1YoKg":{"duration":80.11,"subset":"validation","resolution":"426x238","url":"https://www.youtube.com/watch?v=FrkXeG1YoKg","annotations":[{"segment":[3.3743194567862713,72.48538092355695],"label":"Pole vault"}]},"lRoy13g3Iow":{"duration":170.76,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lRoy13g3Iow","annotations":[]},"hIQ4Lmazo4g":{"duration":67.083,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hIQ4Lmazo4g","annotations":[{"segment":[0.01,66.70628587372448],"label":"Building sandcastles"}]},"zTrI4pPFIlw":{"duration":150.09,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=zTrI4pPFIlw","annotations":[{"segment":[103.44822649921997,144.87432625117006],"label":"Using the pommel horse"},{"segment":[8.425647407176287,98.80472097],"label":"Using the pommel horse"}]},"EorPZy_QoHU":{"duration":134.189,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=EorPZy_QoHU","annotations":[]},"s60we-9PBhw":{"duration":79.668,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=s60we-9PBhw","annotations":[{"segment":[29.30707644305772,69.06283307332292],"label":"Clipping cat claws"}]},"JKJ_EvP7JwQ":{"duration":27.33,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=JKJ_EvP7JwQ","annotations":[]},"1kwIYXB4b8s":{"duration":61.23,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=1kwIYXB4b8s","annotations":[]},"QCcueK6xiZ4":{"duration":73.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QCcueK6xiZ4","annotations":[{"segment":[34.87779407176287,56.43540561622465],"label":"Hand car wash"},{"segment":[63.2760608424337,69.31863962558504],"label":"Hand car wash"}]},"k5wjc4OO1XM":{"duration":65.411,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=k5wjc4OO1XM","annotations":[{"segment":[5.204247614802551,61.022353048361936],"label":"Making an omelette"}]},"UxhKb-zZoWE":{"duration":126.828,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UxhKb-zZoWE","annotations":[{"segment":[0.01,126.82696145124717],"label":"Sumo"}]},"5kCv4zwc7-I":{"duration":166.46,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5kCv4zwc7-I","annotations":[{"segment":[6.36554512,90.47770374570983],"label":"Doing nails"}]},"JJwNO3BLQvY":{"duration":195.88,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=JJwNO3BLQvY","annotations":[]},"REOP3JWB0yc":{"duration":149.96,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=REOP3JWB0yc","annotations":[{"segment":[33.94358744,121.92609786],"label":"Playing lacrosse"}]},"NGF0rQiDxNk":{"duration":38.57,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=NGF0rQiDxNk","annotations":[{"segment":[0,38.568343],"label":"Using uneven bars"}]},"Tg3FU9Ro6KQ":{"duration":201.55,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=Tg3FU9Ro6KQ","annotations":[{"segment":[18.236901666542852,29.87078721244088],"label":"Doing nails"},{"segment":[32.38622192506748,72.94760666617141],"label":"Doing nails"},{"segment":[84.26706272698908,140.86434366302652],"label":"Doing nails"},{"segment":[177.3381469329173,193.68847253666146],"label":"Doing nails"}]},"tTEZnwfHX8E":{"duration":29.142,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tTEZnwfHX8E","annotations":[]},"W3ozAI2ozCs":{"duration":36.67,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=W3ozAI2ozCs","annotations":[{"segment":[0.01,36.664307],"label":"Using parallel bars"}]},"82cpSdoHdg4":{"duration":65.04,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=82cpSdoHdg4","annotations":[{"segment":[7.102553054503132,58.240935046925685],"label":"Playing congas"}]},"0U7_h4uasNY":{"duration":115.311,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0U7_h4uasNY","annotations":[]},"NURr5XJcwFs":{"duration":45.396,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=NURr5XJcwFs","annotations":[{"segment":[32.11643843213728,41.18127662246489],"label":"Hitting a pinata"}]},"6LrY7uMj8Kg":{"duration":155.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6LrY7uMj8Kg","annotations":[{"segment":[4.0919967999999995,144.58901555148205],"label":"Cheerleading"}]},"hiEVVSgAhls":{"duration":85.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hiEVVSgAhls","annotations":[{"segment":[1.5313299531981281,72.05743272],"label":"Ping-pong"}]},"r7Z5Pm3pKMA":{"duration":45.628,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r7Z5Pm3pKMA","annotations":[{"segment":[22.366557469295188,29.746386147366724],"label":"Hurling"}]},"ExMpLopi9ug":{"duration":41.96,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ExMpLopi9ug","annotations":[{"segment":[5.172096723868955,39.47815600624025],"label":"Using the pommel horse"}]},"ioue7a-6jn0":{"duration":111.154,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ioue7a-6jn0","annotations":[]},"IsM_xfhJzps":{"duration":134.607,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IsM_xfhJzps","annotations":[{"segment":[58.37829864759216,74.96781516974964],"label":"Using the rowing machine"},{"segment":[101.84703098283931,122.63642493603744],"label":"Using the rowing machine"}]},"kFmOtf_Ew1w":{"duration":228.717,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kFmOtf_Ew1w","annotations":[{"segment":[9.260414330218069,209.08994859813086],"label":"Swimming"}]},"KsAnLnOjeSQ":{"duration":14.58,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KsAnLnOjeSQ","annotations":[]},"gWeunYH-qoA":{"duration":142.339,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=gWeunYH-qoA","annotations":[]},"B3ZVUyLeT84":{"duration":59.49,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B3ZVUyLeT84","annotations":[{"segment":[6.125286382883887,55.31319050234009],"label":"Surfing"}]},"0IN40z63WAU":{"duration":232.014,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0IN40z63WAU","annotations":[{"segment":[0.01,108.92396411856475],"label":"Elliptical trainer"},{"segment":[114.71394227769112,223.29528393135726],"label":"Elliptical trainer"}]},"5pl_qttD8Fc":{"duration":75.78999999999999,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=5pl_qttD8Fc","annotations":[{"segment":[5.320666051131841,7.685406518301549],"label":"Smoking a cigarette"},{"segment":[44.575357806148986,52.14252730109205],"label":"Smoking a cigarette"},{"segment":[60.77383000626148,61.60148916977088],"label":"Smoking a cigarette"}]},"a7FNzxXWvgU":{"duration":168.57999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=a7FNzxXWvgU","annotations":[{"segment":[48.37420280811232,153.2908003120125],"label":"Making a sandwich"}]},"X4IE65LtDzQ":{"duration":155.57,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=X4IE65LtDzQ","annotations":[{"segment":[20.86821528861154,94.39227613104524],"label":"Hopscotch"}]},"ZAX6OojMg38":{"duration":82.06,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZAX6OojMg38","annotations":[]},"8EyYHnijfnI":{"duration":130.66,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=8EyYHnijfnI","annotations":[{"segment":[10.290340093603746,67.54946021840874],"label":"Getting a piercing"},{"segment":[75.70022464898597,83.85098907956319],"label":"Getting a piercing"}]},"LNiep92AJLY":{"duration":200.81,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LNiep92AJLY","annotations":[]},"NyL7m4JV8vQ":{"duration":98.47999999999999,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=NyL7m4JV8vQ","annotations":[{"segment":[5.607408163026522,61.854116999999995],"label":"High jump"}]},"bKVrokdj7HM":{"duration":123.577,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bKVrokdj7HM","annotations":[{"segment":[13.30231715522621,57.45058713416537],"label":"Playing drums"},{"segment":[70.75290428939158,117.21462073010922],"label":"Playing drums"}]},"O8TIRkB99Lo":{"duration":125.992,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=O8TIRkB99Lo","annotations":[{"segment":[5.89663790633258,125.99149659863946],"label":"Hanging wallpaper"}]},"UnOzWl0EGCA":{"duration":66.6,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=UnOzWl0EGCA","annotations":[{"segment":[14.329825273010922,60.95367706708269],"label":"Mixing drinks"}]},"fCE3Eo7_Nmw":{"duration":60.396,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=fCE3Eo7_Nmw","annotations":[{"segment":[7.160729722308893,53.045932021840876],"label":"Peeling potatoes"}]},"UgtzVS_oeq0":{"duration":41.1,"subset":"training","resolution":"320x176","url":"https://www.youtube.com/watch?v=UgtzVS_oeq0","annotations":[{"segment":[8.711850234009361,11.081985959438377],"label":"Smoking a cigarette"},{"segment":[28.5056864274571,34.91145865834633],"label":"Smoking a cigarette"}]},"hz0W27EwjQ8":{"duration":238.725,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=hz0W27EwjQ8","annotations":[{"segment":[7.43689575518681,238.724352],"label":"Playing beach volleyball"}]},"z-_snl6eaPE":{"duration":20.573,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z-_snl6eaPE","annotations":[{"segment":[2.1799949204368176,19.780248028081125],"label":"Playing ice hockey"}]},"hzpFVURhKwo":{"duration":139.018,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hzpFVURhKwo","annotations":[{"segment":[0.01,139.01732426303855],"label":"Hanging wallpaper"}]},"Z-2Gwqt_GCQ":{"duration":164.793,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z-2Gwqt_GCQ","annotations":[{"segment":[20.82003276131045,148.05356630265211],"label":"Mooping floor"}]},"SxIJ6MjcgnY":{"duration":152.834,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=SxIJ6MjcgnY","annotations":[{"segment":[6.6760448049922,143.77339347893917],"label":"Disc dog"}]},"JDn95TW9WoM":{"duration":148.585,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=JDn95TW9WoM","annotations":[{"segment":[17.15056786271451,38.47289547581904],"label":"Futsal"},{"segment":[0.2,148.36100000000002],"label":"Futsal"},{"segment":[50.29288143525741,110.3198689547582],"label":"Futsal"}]},"TeLWp5sSxg0":{"duration":186.25,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=TeLWp5sSxg0","annotations":[{"segment":[41.54384243369735,186.201],"label":"Removing curlers"}]},"Hfd5sxW8Clg":{"duration":187.246,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Hfd5sxW8Clg","annotations":[{"segment":[35.78408736349454,102.38630710942722],"label":"Playing rubik cube"}]},"5UAdJuvBEOI":{"duration":120.79,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5UAdJuvBEOI","annotations":[]},"fjN9Qe237bw":{"duration":99.382,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=fjN9Qe237bw","annotations":[{"segment":[0.01,98.824125335271],"label":"Playing pool"}]},"wideKBXF-60":{"duration":176.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wideKBXF-60","annotations":[{"segment":[33.63596032276665,55.4166231547221],"label":"Playing polo"},{"segment":[81.88426402230888,105.59485899173168],"label":"Playing polo"}]},"6dNh6AxVLx8":{"duration":117.517,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=6dNh6AxVLx8","annotations":[{"segment":[3.482276131045242,117.461],"label":"BMX"}]},"I1iUbyjWy7k":{"duration":31.86,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=I1iUbyjWy7k","annotations":[]},"FQEGKGn9vnU":{"duration":128.546,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FQEGKGn9vnU","annotations":[{"segment":[0.01,124.99768317150696],"label":"Fun sliding down"}]},"iIhEO_NA8gk":{"duration":221.101,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iIhEO_NA8gk","annotations":[{"segment":[86.85058435093057,116.8022993042016],"label":"Skiing"}]},"jhiJUgDpwFw":{"duration":59.792,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jhiJUgDpwFw","annotations":[{"segment":[4.370441833582837,59.59138321995464],"label":"Using the monkey bar"}]},"2Jr1K1wBKfQ":{"duration":133.49,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2Jr1K1wBKfQ","annotations":[{"segment":[21.241984144671907,47.27382745922082],"label":"Archery"},{"segment":[68.09930211085994,97.46322121123245],"label":"Archery"}]},"zUPadKgtYHw":{"duration":137.834,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=zUPadKgtYHw","annotations":[{"segment":[11.464630649091536,124.39338625033763],"label":"Running a marathon"}]},"WRX7aUqgZJ0":{"duration":46.603,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WRX7aUqgZJ0","annotations":[{"segment":[7.524732401540959,42.78555573243338],"label":"Waxing skis"}]},"_zAfwnDt4VE":{"duration":5.83,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_zAfwnDt4VE","annotations":[{"segment":[0.01,5.755468067082684],"label":"Hand washing clothes"}]},"cKFARm4dsws":{"duration":11.123,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cKFARm4dsws","annotations":[{"segment":[0.19999999999999998,10.861],"label":"Swinging at the playground"}]},"9GwsrWUq7mY":{"duration":73.77,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9GwsrWUq7mY","annotations":[{"segment":[1.8401497659906398,70.38572854914197],"label":"River tubing"}]},"RVZprJDJz1U":{"duration":84.24,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RVZprJDJz1U","annotations":[{"segment":[41.00390418882061,48.889269528861156],"label":"Polishing forniture"},{"segment":[69.39122126677067,78.065123925117],"label":"Polishing forniture"}]},"oEZToisRw4w":{"duration":214.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oEZToisRw4w","annotations":[{"segment":[29.390215288611543,184.69078471138846],"label":"Shot put"}]},"eaTRlHkPBfI":{"duration":12.283,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=eaTRlHkPBfI","annotations":[]},"OhpaFQeQtKs":{"duration":110.179,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OhpaFQeQtKs","annotations":[{"segment":[0.01,56.87468174726989],"label":"Doing fencing"}]},"gpmrwsV9skQ":{"duration":31.069,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=gpmrwsV9skQ","annotations":[{"segment":[5.223257410296412,7.544705148205929],"label":"Powerbocking"},{"segment":[8.850519500780031,10.833422776911076],"label":"Powerbocking"},{"segment":[16.44358814352574,19.200307332293292],"label":"Powerbocking"},{"segment":[22.489024960998442,25.584288611544462],"label":"Powerbocking"},{"segment":[27.03519344773791,29.550095163806553],"label":"Powerbocking"}]},"5CtecK635eI":{"duration":214.07,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=5CtecK635eI","annotations":[{"segment":[51.09502071762871,214.0647619047619],"label":"Clean and jerk"}]},"bOULU-0Nloc":{"duration":176.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bOULU-0Nloc","annotations":[{"segment":[8.237129485179407,125.20436817472698],"label":"Shoveling snow"}]},"D707ZMWH7jQ":{"duration":200.34,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D707ZMWH7jQ","annotations":[{"segment":[21.253104767550703,47.50694006864275],"label":"Fixing bicycle"},{"segment":[66.25967956942277,96.26406277067083],"label":"Fixing bicycle"},{"segment":[110.95370871294853,174.4004773572543],"label":"Fixing bicycle"}]},"ciXArKhjAv8":{"duration":7.547,"subset":"training","resolution":"608x1080","url":"https://www.youtube.com/watch?v=ciXArKhjAv8","annotations":[{"segment":[0.2,3.8485741029641183],"label":"Having an ice cream"}]},"o2qCnRFKD7o":{"duration":64.81,"subset":"testing","resolution":"200x160","url":"https://www.youtube.com/watch?v=o2qCnRFKD7o","annotations":[]},"uub0z8wJfhU":{"duration":80.11,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uub0z8wJfhU","annotations":[]},"i2dFL7sGf9c":{"duration":8.453,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=i2dFL7sGf9c","annotations":[{"segment":[0,6.428051033541342],"label":"Doing a powerbomb"}]},"c8GFfLcwFrE":{"duration":122.508,"subset":"training","resolution":"854x270","url":"https://www.youtube.com/watch?v=c8GFfLcwFrE","annotations":[{"segment":[0,122.30832199546485],"label":"Surfing"}]},"gsfIHiBB6xE":{"duration":85.867,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gsfIHiBB6xE","annotations":[{"segment":[0.2,85.621],"label":"Cutting the grass"}]},"ONAP4ATzU8E":{"duration":199.321,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ONAP4ATzU8E","annotations":[]},"wuZkli4TSZQ":{"duration":59.931,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wuZkli4TSZQ","annotations":[{"segment":[0.2,53.91155451713396],"label":"Hitting a pinata"}]},"Ay6oxBYCSnU":{"duration":39.451,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ay6oxBYCSnU","annotations":[{"segment":[0,39.450702],"label":"Playing ice hockey"}]},"2u1gdf_53C0":{"duration":73.561,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=2u1gdf_53C0","annotations":[{"segment":[7.80361948627605,51.18256200936037],"label":"Painting furniture"}]},"sG3JpMuXFnU":{"duration":126.038,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=sG3JpMuXFnU","annotations":[{"segment":[36.163032761310454,123.24902808112326],"label":"River tubing"}]},"f00gW6PbWDE":{"duration":48.019,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=f00gW6PbWDE","annotations":[{"segment":[0.2,3.745621294851794],"label":"Shuffleboard"},{"segment":[8.764753829953198,12.7351124024961],"label":"Shuffleboard"},{"segment":[14.083536068642744,15.881434290171606],"label":"Shuffleboard"},{"segment":[29.06602124804992,34.08515378315133],"label":"Shuffleboard"}]},"UYHgieL5IKM":{"duration":185.18,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UYHgieL5IKM","annotations":[{"segment":[69.03073166926677,120.44274102964118],"label":"Sharpening knives"},{"segment":[142.682767550702,153.65836505460217],"label":"Sharpening knives"}]},"jafelg2mzIk":{"duration":27.17,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jafelg2mzIk","annotations":[]},"SiqB6_9kNQU":{"duration":162.169,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SiqB6_9kNQU","annotations":[{"segment":[3.540427457098284,157.80190951638065],"label":"Riding bumper cars"}]},"8ikOQRbeQL8":{"duration":132.215,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=8ikOQRbeQL8","annotations":[{"segment":[0.01,126.02653762870514],"label":"Baton twirling"}]},"V6Sy8zpJSuc":{"duration":59.676,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=V6Sy8zpJSuc","annotations":[]},"gwyB0U6g0JM":{"duration":208.051,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=gwyB0U6g0JM","annotations":[]},"saMB0Mr1qdE":{"duration":157.71,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=saMB0Mr1qdE","annotations":[]},"UvIS1-hwgaQ":{"duration":156.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UvIS1-hwgaQ","annotations":[{"segment":[32.1660419775351,144.25982462652107],"label":"Rock climbing"}]},"H_xL4tssqEU":{"duration":116.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H_xL4tssqEU","annotations":[{"segment":[16.35104620105285,106.46349387582],"label":"Knitting"}]},"Spi0HgoqMs4":{"duration":32.56,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Spi0HgoqMs4","annotations":[]},"feio1hW6Dy0":{"duration":112.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=feio1hW6Dy0","annotations":[{"segment":[12.075645865834632,47.07751794071763],"label":"Ballet"},{"segment":[60.37822932917317,92.05492355694227],"label":"Ballet"},{"segment":[92.77149765990639,108.87522932917317],"label":"Ballet"}]},"G02h5bFOpCE":{"duration":102.47,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G02h5bFOpCE","annotations":[{"segment":[23.33942288299532,75.29361765678627],"label":"Surfing"}]},"asyXgaH1Sro":{"duration":61.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=asyXgaH1Sro","annotations":[{"segment":[24.99640730109204,61.33733791575663],"label":"Brushing hair"}]},"PxiYXUk05WA":{"duration":198.3,"subset":"testing","resolution":"624x360","url":"https://www.youtube.com/watch?v=PxiYXUk05WA","annotations":[]},"8_XQPqLdblg":{"duration":34.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8_XQPqLdblg","annotations":[{"segment":[1.2962492106084242,33.16237563806552],"label":"Playing guitarra"}]},"bVq8bimzJR4":{"duration":89.003,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=bVq8bimzJR4","annotations":[]},"MOBJ_kpzHc0":{"duration":189.545,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MOBJ_kpzHc0","annotations":[{"segment":[0.01,189.54448979591837],"label":"Making a lemonade"}]},"-fLJHBqizNM":{"duration":72.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-fLJHBqizNM","annotations":[{"segment":[8.761310452418098,57.73478939157566],"label":"Using parallel bars"}]},"XFlWVBU4z9E":{"duration":87.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XFlWVBU4z9E","annotations":[{"segment":[86.10509665522622,87.887527],"label":"Drinking coffee"}]},"74HRnmwIhfM":{"duration":213.79,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=74HRnmwIhfM","annotations":[{"segment":[18.66993138,191.4400797365228],"label":"Preparing pasta"}]},"31TT2oiYRO4":{"duration":217.53,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=31TT2oiYRO4","annotations":[{"segment":[4.748861154446178,213.35954758190329],"label":"Zumba"}]},"wZ-teiWX4mg":{"duration":115.497,"subset":"training","resolution":"300x240","url":"https://www.youtube.com/watch?v=wZ-teiWX4mg","annotations":[{"segment":[10.807581903276132,22.15554290171607],"label":"Playing ice hockey"},{"segment":[26.47857566302652,63.76473322932917],"label":"Playing ice hockey"}]},"yqOW5LBA81w":{"duration":67.156,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yqOW5LBA81w","annotations":[{"segment":[0.01,66.05211137457874],"label":"Trimming branches or hedges"}]},"GqWH2IYPw6U":{"duration":97.52000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GqWH2IYPw6U","annotations":[{"segment":[4.27345786,92.24000976911076],"label":"Playing violin"}]},"UFfi0gxL6_Y":{"duration":158.99,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=UFfi0gxL6_Y","annotations":[{"segment":[16.81349200320292,154.02586271138844],"label":"Playing violin"}]},"jDlvGWOdcqU":{"duration":180.118,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jDlvGWOdcqU","annotations":[]},"4Lbvc9rA4LU":{"duration":231.178,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4Lbvc9rA4LU","annotations":[]},"WNd6SHMi30M":{"duration":51.2,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WNd6SHMi30M","annotations":[{"segment":[6.864034321372855,11.09853198127925],"label":"Hand car wash"},{"segment":[14.531890795631826,16.767566302652106],"label":"Hand car wash"},{"segment":[20.20092511700468,29.223472698907955],"label":"Hand car wash"}]},"UvOEuhS0V3E":{"duration":8.17,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=UvOEuhS0V3E","annotations":[]},"Y8-7fr5bv24":{"duration":195.025,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y8-7fr5bv24","annotations":[{"segment":[14.299760931933875,16.125262327499904],"label":"Braiding hair"},{"segment":[23.123017677169674,27.3825209334904],"label":"Braiding hair"},{"segment":[29.512272561650764,118.96184094438608],"label":"Braiding hair"},{"segment":[135.08710327188598,157.60162048386698],"label":"Braiding hair"}]},"wG0OD99mjdY":{"duration":119.24000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wG0OD99mjdY","annotations":[{"segment":[2.9762113198127924,11.90484527925117],"label":"Rock climbing"},{"segment":[15.997135843993759,21.205505653666144],"label":"Rock climbing"},{"segment":[26.599888670826832,61.75638488611545],"label":"Rock climbing"}]},"R74yolowFPs":{"duration":185.11,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=R74yolowFPs","annotations":[]},"fruimvo90vA":{"duration":36.99,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fruimvo90vA","annotations":[{"segment":[0.01,8.586909516380656],"label":"Triple jump"}]},"b993qWuMRBA":{"duration":490.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b993qWuMRBA","annotations":[{"segment":[149.1129407176287,329.5778330733229],"label":"Removing ice from car"}]},"UH9qJ4Y6ENA":{"duration":198.252,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UH9qJ4Y6ENA","annotations":[{"segment":[4.329319812792512,68.03216848673947],"label":"Doing fencing"},{"segment":[81.94783931357254,107.30528393135725],"label":"Doing fencing"},{"segment":[130.18883151326054,170.0804212168487],"label":"Doing fencing"}]},"bphckj0BV5U":{"duration":70.28999999999999,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=bphckj0BV5U","annotations":[]},"BAtlo_AGFWs":{"duration":81.97,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=BAtlo_AGFWs","annotations":[]},"M679ctbf49M":{"duration":198.09,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M679ctbf49M","annotations":[]},"n4apOkL24BE":{"duration":108.07,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=n4apOkL24BE","annotations":[{"segment":[42.99025173251828,56.47738953095539],"label":"Hand washing clothes"},{"segment":[79.06834416536661,92.89266020280812],"label":"Hand washing clothes"}]},"aB5xErksFkI":{"duration":15.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aB5xErksFkI","annotations":[{"segment":[0.8916661466458659,14.102404056162248],"label":"Laying tile"}]},"2BQsHMTk_VQ":{"duration":119.165,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2BQsHMTk_VQ","annotations":[]},"wnkihU1fO5w":{"duration":85.22,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wnkihU1fO5w","annotations":[]},"Co_cpZWbzFk":{"duration":57.958,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=Co_cpZWbzFk","annotations":[]},"KMz8f9vDK38":{"duration":131.634,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=KMz8f9vDK38","annotations":[{"segment":[0,131.4338321995465],"label":"Playing ice hockey"}]},"Nqh3RtLRleU":{"duration":62.578,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Nqh3RtLRleU","annotations":[{"segment":[0.01,4.226497656385476],"label":"Running a marathon"},{"segment":[25.45636993052878,31.88376245389326],"label":"Running a marathon"},{"segment":[38.70069391806772,47.07578114548205],"label":"Running a marathon"}]},"bEt8fGREAAA":{"duration":121.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bEt8fGREAAA","annotations":[{"segment":[61.53072654,115.61720717082683],"label":"Playing saxophone"}]},"pev7rvOE8eM":{"duration":619.393,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=pev7rvOE8eM","annotations":[{"segment":[1.4494359366211385,148.32561084756315],"label":"Removing ice from car"},{"segment":[187.94352644854092,238.19063891807374],"label":"Removing ice from car"}]},"2_H3lxMWvPo":{"duration":137.857,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2_H3lxMWvPo","annotations":[]},"J98U-PGh1bQ":{"duration":27.795,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J98U-PGh1bQ","annotations":[{"segment":[1.0406316131045241,27.706816698907954],"label":"Mooping floor"}]},"fU4EgYmISro":{"duration":32.16,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=fU4EgYmISro","annotations":[{"segment":[0.01,32.05929491887675],"label":"Playing badminton"}]},"iYlglcAh34E":{"duration":144.498,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iYlglcAh34E","annotations":[]},"N_FbnDY_2j8":{"duration":30.186,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=N_FbnDY_2j8","annotations":[{"segment":[0.01,30.132812221218124],"label":"Snow tubing"}]},"-AaOr1DI2no":{"duration":113.847,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-AaOr1DI2no","annotations":[{"segment":[2.131309254290172,110.1176448049922],"label":"Hanging wallpaper"}]},"1AxGiLSmGZo":{"duration":60.16,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=1AxGiLSmGZo","annotations":[{"segment":[0.01,56.11544461778471],"label":"Windsurfing"}]},"nobV1wL__iY":{"duration":157.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nobV1wL__iY","annotations":[{"segment":[0.49214556337355536,157.73265306122448],"label":"Paintball"}]},"pv73L2pQX1s":{"duration":66.015,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pv73L2pQX1s","annotations":[{"segment":[0.2,65.761],"label":"Welding"}]},"msz1VkByvuU":{"duration":130.89,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=msz1VkByvuU","annotations":[]},"CB41iJu4ZVY":{"duration":95.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CB41iJu4ZVY","annotations":[{"segment":[11.062061794036387,79.36472384945398],"label":"Using the balance beam"}]},"SnBTp7iV2kE":{"duration":146.751,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=SnBTp7iV2kE","annotations":[{"segment":[0.010000000000000002,138.5082960374415],"label":"Playing pool"}]},"Czw85LWCGes":{"duration":56.053,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Czw85LWCGes","annotations":[{"segment":[0.2,50.3213728549142],"label":"Welding"}]},"H7EmlU0wQ6g":{"duration":22.62,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=H7EmlU0wQ6g","annotations":[{"segment":[0.4323409462685035,21.850830466517724],"label":"Vacuuming floor"}]},"myGBEXiYGOU":{"duration":203.76,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=myGBEXiYGOU","annotations":[]},"sARnRvNdl-Y":{"duration":126.22,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sARnRvNdl-Y","annotations":[{"segment":[2.4121263045432784,111.88691913943424],"label":"Playing guitarra"}]},"f0cxC1B5cbw":{"duration":228.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f0cxC1B5cbw","annotations":[{"segment":[14.983778471138846,59.578357254290175],"label":"Raking leaves"}]},"8qbx7yfwTNI":{"duration":212.533,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=8qbx7yfwTNI","annotations":[]},"bnkO0slLSyw":{"duration":221.52,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bnkO0slLSyw","annotations":[{"segment":[30.750356121684867,60.46418338533541],"label":"Tango"},{"segment":[61.579788820000005,213.17943513572544],"label":"Tango"}]},"7eR0DyDg7wQ":{"duration":53.29,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=7eR0DyDg7wQ","annotations":[{"segment":[41.52964118564743,53.241],"label":"Drinking coffee"}]},"FfYNkePtHjo":{"duration":154.79,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=FfYNkePtHjo","annotations":[{"segment":[4.103780533541342,151.11568317628706],"label":"Playing badminton"}]},"QBqfrJzcrns":{"duration":69.43,"subset":"validation","resolution":"328x240","url":"https://www.youtube.com/watch?v=QBqfrJzcrns","annotations":[{"segment":[0,3.249921996879875],"label":"Ironing clothes"}]},"SsW-IeF0bzE":{"duration":235.404,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SsW-IeF0bzE","annotations":[{"segment":[8.997496966545327,198.12855565106958],"label":"Riding bumper cars"}]},"GGSY1Qvo990":{"duration":18.16,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=GGSY1Qvo990","annotations":[{"segment":[5.823597503900156,16.198647425897036],"label":"Clean and jerk"}]},"gXp3KSWhf1g":{"duration":234.452,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gXp3KSWhf1g","annotations":[{"segment":[0.2,16.089928237129488],"label":"Running a marathon"},{"segment":[87.39756474258971,212.84402808112324],"label":"Running a marathon"}]},"9dhczBPiZoI":{"duration":93.86,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9dhczBPiZoI","annotations":[]},"CGO3djF-9cw":{"duration":192.053,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CGO3djF-9cw","annotations":[]},"p5ic2-xLyZg":{"duration":151.255,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p5ic2-xLyZg","annotations":[]},"nywOe9C3ajM":{"duration":53.2,"subset":"testing","resolution":"640x272","url":"https://www.youtube.com/watch?v=nywOe9C3ajM","annotations":[]},"5oyenoaVB-w":{"duration":47.07,"subset":"testing","resolution":"450x360","url":"https://www.youtube.com/watch?v=5oyenoaVB-w","annotations":[]},"Z90-HZ_geSI":{"duration":108.252,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z90-HZ_geSI","annotations":[{"segment":[9.28834410519278,63.66737686650324],"label":"Grooming dog"}]},"ByF8Pg3xXNA":{"duration":20.25,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ByF8Pg3xXNA","annotations":[{"segment":[0.10321734897740682,20.098707327170555],"label":"Hand washing clothes"}]},"hV_Ud3zrAnA":{"duration":92.323,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hV_Ud3zrAnA","annotations":[{"segment":[0.2,84.80652647975079],"label":"Rope skipping"}]},"Cx1614E1Mgk":{"duration":122.254,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Cx1614E1Mgk","annotations":[{"segment":[0.2,110.78034579439252],"label":"Playing congas"}]},"qyu0tJDF8Ic":{"duration":24.01,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=qyu0tJDF8Ic","annotations":[]},"dIlpPraDc-I":{"duration":96.596,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dIlpPraDc-I","annotations":[{"segment":[5.877076952465854,96.595034],"label":"Skiing"}]},"qU4GmCN2e1Y":{"duration":48.51,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=qU4GmCN2e1Y","annotations":[]},"S9alQwrQ-oo":{"duration":148.7,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=S9alQwrQ-oo","annotations":[{"segment":[9.333899,105.51638065522621],"label":"Doing nails"}]},"B3eCRzEMtrI":{"duration":162.63,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=B3eCRzEMtrI","annotations":[]},"LK1LJ63X81E":{"duration":89.44,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LK1LJ63X81E","annotations":[]},"xXDNNCl1m6c":{"duration":54.707,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xXDNNCl1m6c","annotations":[{"segment":[11.878507331891853,47.3265632571255],"label":"Removing ice from car"}]},"4rv1uLmbyHA":{"duration":116.05,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=4rv1uLmbyHA","annotations":[{"segment":[41.09831382370942,106.27625476131045],"label":"Preparing pasta"}]},"XhqFJNRt-5g":{"duration":30.953,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XhqFJNRt-5g","annotations":[{"segment":[6.03590266059622,24.674768606864273],"label":"Calf roping"}]},"Vx6vP1oxiAg":{"duration":19.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Vx6vP1oxiAg","annotations":[{"segment":[0.19999999999999998,17.016734789391574],"label":"Camel ride"}]},"-vKXPND_mD8":{"duration":8.081,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-vKXPND_mD8","annotations":[{"segment":[0.2,7.880544217687076],"label":"Camel ride"}]},"xpxxH2V7Fe8":{"duration":151.511,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xpxxH2V7Fe8","annotations":[{"segment":[0.01,151.461],"label":"Drum corps"}]},"CxUFit4ek3U":{"duration":152.857,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=CxUFit4ek3U","annotations":[]},"nOh81Wzig8U":{"duration":225.09,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=nOh81Wzig8U","annotations":[]},"uJ4OZfCjViM":{"duration":58.677,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uJ4OZfCjViM","annotations":[]},"tdJS3WJjeVo":{"duration":175.125,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tdJS3WJjeVo","annotations":[]},"o1DCCTev0CQ":{"duration":215.087,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o1DCCTev0CQ","annotations":[{"segment":[0.01,215.041],"label":"Hanging wallpaper"}]},"YqairWJU2Vw":{"duration":102.89,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YqairWJU2Vw","annotations":[{"segment":[12.910260760000002,45.30629045],"label":"Discus throw"}]},"fKmsf8B7pDo":{"duration":156.34,"subset":"testing","resolution":"568x320","url":"https://www.youtube.com/watch?v=fKmsf8B7pDo","annotations":[]},"1cCRZztswFA":{"duration":175.752,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1cCRZztswFA","annotations":[{"segment":[0.01,155.30667601687085],"label":"Baton twirling"}]},"aOTtBZynDOQ":{"duration":79.67,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aOTtBZynDOQ","annotations":[{"segment":[9.321489430577223,78.17622469110765],"label":"Javelin throw"}]},"cr9VTwfM_2w":{"duration":74.328,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cr9VTwfM_2w","annotations":[{"segment":[0.2,53.436444617784716],"label":"Longboarding"},{"segment":[58.304840873634944,69.54851794071763],"label":"Longboarding"}]},"BFChXuwZ4wo":{"duration":209.14,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=BFChXuwZ4wo","annotations":[]},"VejaBBey5DU":{"duration":63.321,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=VejaBBey5DU","annotations":[]},"hocTbE5f0uY":{"duration":183.856,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hocTbE5f0uY","annotations":[]},"KaGXQh-UHVU":{"duration":121.789,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KaGXQh-UHVU","annotations":[{"segment":[0.01,19.75978293553511],"label":"Mooping floor"},{"segment":[38.75957421970348,107.15882272698909],"label":"Mooping floor"}]},"HysxcMFrTkc":{"duration":201.689,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HysxcMFrTkc","annotations":[{"segment":[19.193447879411774,188.78801092043682],"label":"Rope skipping"}]},"YK5n3YTf8pI":{"duration":60.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YK5n3YTf8pI","annotations":[{"segment":[25.09901165,54.16840008],"label":"Clean and jerk"}]},"7EZM1DRIOHE":{"duration":78.48,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=7EZM1DRIOHE","annotations":[]},"WVxP7f_OyEs":{"duration":175.264,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=WVxP7f_OyEs","annotations":[{"segment":[19.352654403518795,40.06817624522593],"label":"Wakeboarding"},{"segment":[74.13975822171791,90.22154491462214],"label":"Wakeboarding"},{"segment":[95.40042537504893,101.39702380291152],"label":"Wakeboarding"},{"segment":[113.93536597026058,119.11424643068736],"label":"Wakeboarding"},{"segment":[149.09723713480605,157.0018440772633],"label":"Wakeboarding"}]},"niqc-dW54ic":{"duration":77.03999999999999,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=niqc-dW54ic","annotations":[{"segment":[58.44506449609984,73.92640601560062],"label":"Using the balance beam"}]},"G25py7yEVyo":{"duration":88.03,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=G25py7yEVyo","annotations":[]},"c2VcfB3YTBo":{"duration":147.42000000000002,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c2VcfB3YTBo","annotations":[{"segment":[0.01,128.33391858346334],"label":"Tango"}]},"UuJwtJBJ7oU":{"duration":159.289,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UuJwtJBJ7oU","annotations":[{"segment":[13.89435790931118,148.8681204569055],"label":"Playing congas"}]},"YTdLk7Nsn_k":{"duration":120.164,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YTdLk7Nsn_k","annotations":[{"segment":[41.414572542901716,95.75948673946957],"label":"Assembling bicycle"}]},"jQU4NYJzxco":{"duration":71.91,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=jQU4NYJzxco","annotations":[{"segment":[0.7853128767550702,66.6394069703588],"label":"Washing dishes"}]},"X2zoUDI1Gmc":{"duration":211.325,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=X2zoUDI1Gmc","annotations":[{"segment":[78.61414181879917,101.61995284523775],"label":"Blow-drying hair"},{"segment":[126.59769053108533,149.27484711428906],"label":"Blow-drying hair"},{"segment":[126.59769053108533,148.94619267105423],"label":"Blow-drying hair"},{"segment":[161.43506151397804,191.34261584834815],"label":"Blow-drying hair"}]},"KPJJfGK5NCw":{"duration":128.52,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=KPJJfGK5NCw","annotations":[{"segment":[17.440853354134166,126.29583463338534],"label":"Shot put"}]},"oO3iQ35xcTs":{"duration":35.34,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=oO3iQ35xcTs","annotations":[]},"fkgojk18K0E":{"duration":96.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fkgojk18K0E","annotations":[{"segment":[36.67334112,87.9552936],"label":"Belly dance"}]},"Ujgmih4OtMs":{"duration":197.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ujgmih4OtMs","annotations":[{"segment":[24.914156911076443,58.1330327925117],"label":"Archery"},{"segment":[83.04718970358813,190.393371950078],"label":"Archery"}]},"h0p_7Q2Bucc":{"duration":81.34,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h0p_7Q2Bucc","annotations":[{"segment":[2.06113026,73.49381145085803],"label":"Plataform diving"}]},"Cy56IkwsQVs":{"duration":237.657,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=Cy56IkwsQVs","annotations":[{"segment":[0.01,233.73231543650257],"label":"Beer pong"}]},"Tc0nHNkf0KM":{"duration":27.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Tc0nHNkf0KM","annotations":[{"segment":[4.357961494539782,18.362672705148206],"label":"Washing hands"}]},"2EgLTpVOLys":{"duration":49.342,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2EgLTpVOLys","annotations":[]},"dD6-HmQtMkw":{"duration":187.2,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=dD6-HmQtMkw","annotations":[]},"plE3KNmuwj4":{"duration":157.618,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=plE3KNmuwj4","annotations":[{"segment":[12.25631848788465,46.08376783776456],"label":"Wakeboarding"},{"segment":[49.76066450622977,106.62999964515834],"label":"Wakeboarding"},{"segment":[117.41556182420854,127.46574593307324],"label":"Wakeboarding"}]},"zM1-aiWdPvY":{"duration":72.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zM1-aiWdPvY","annotations":[{"segment":[24.354607802058496,56.371454864363784],"label":"Grooming horse"}]},"Q6tYENILn40":{"duration":31.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q6tYENILn40","annotations":[{"segment":[1.00060905,15.4953983],"label":"Playing piano"},{"segment":[17.163395599999998,29.68510111076443],"label":"Playing piano"}]},"C7rGK81C7SU":{"duration":238.957,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C7rGK81C7SU","annotations":[{"segment":[5.963207488299532,238.921],"label":"Making a cake"}]},"WHchTZ61VT4":{"duration":150.79,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WHchTZ61VT4","annotations":[{"segment":[0.01,103.83792878705147],"label":"Bathing dog"},{"segment":[121.20291574804992,150.218333],"label":"Bathing dog"}]},"ZWHFbIjh0N8":{"duration":187.943,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZWHFbIjh0N8","annotations":[{"segment":[10.262033471138846,176.21377574014525],"label":"Playing beach volleyball"}]},"OwaZOx15_x0":{"duration":108.322,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OwaZOx15_x0","annotations":[]},"qgasVDGUw3E":{"duration":172.431,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qgasVDGUw3E","annotations":[{"segment":[12.603845070363553,156.87769181789412],"label":"Fixing the roof"}]},"GnLUmMkyvCo":{"duration":176.43,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GnLUmMkyvCo","annotations":[{"segment":[5.225988029641186,164.20604493135724],"label":"Hammer throw"}]},"jvBTmCNtVpE":{"duration":51.43,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jvBTmCNtVpE","annotations":[]},"qCZhlIBr8_Q":{"duration":67.291,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qCZhlIBr8_Q","annotations":[{"segment":[0.01,63.61671810608425],"label":"Drum corps"}]},"-3JfzLB02vE":{"duration":162.424,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=-3JfzLB02vE","annotations":[]},"BryW4niZT9Y":{"duration":111.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BryW4niZT9Y","annotations":[{"segment":[0.01,43.06238845553822],"label":"Tennis serve with ball bouncing"},{"segment":[48.815663026521065,90.1346349453978],"label":"Tennis serve with ball bouncing"}]},"ztD4lVJATws":{"duration":14.165,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ztD4lVJATws","annotations":[{"segment":[0,13.932153581903277],"label":"Kneeling"}]},"moqRG7uwZ4Q":{"duration":104.257,"subset":"training","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=moqRG7uwZ4Q","annotations":[{"segment":[22.120176374414978,85.22773838377535],"label":"Blowing leaves"}]},"JMJp9kFudNA":{"duration":156.711,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JMJp9kFudNA","annotations":[{"segment":[25.670299383403908,37.4052933872457],"label":"Disc dog"},{"segment":[42.53935326392648,48.89580834934078],"label":"Disc dog"},{"segment":[52.31851493379464,59.89736522794245],"label":"Disc dog"},{"segment":[66.98725743859687,82.14495802689251],"label":"Disc dog"},{"segment":[94.124431072481,111.72692207824369],"label":"Disc dog"},{"segment":[121.01712566461842,144.487112049922],"label":"Disc dog"}]},"OWEwmnZvvJE":{"duration":151.975,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=OWEwmnZvvJE","annotations":[{"segment":[13.277020528440186,138.46035591887676],"label":"Swimming"}]},"2SCc8b-lYmc":{"duration":4.46,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=2SCc8b-lYmc","annotations":[]},"O-upcCp0jIs":{"duration":112.083,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O-upcCp0jIs","annotations":[{"segment":[16.786179594383775,102.99020605304212],"label":"Installing carpet"}]},"JlgDwIT9KLI":{"duration":109.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JlgDwIT9KLI","annotations":[{"segment":[0.01,88.35674882995319],"label":"Making a sandwich"}]},"di_PcoYYnro":{"duration":60.488,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=di_PcoYYnro","annotations":[]},"i_Fs0Qrtu90":{"duration":224.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=i_Fs0Qrtu90","annotations":[{"segment":[106.08264496022018,132.29746661431082],"label":"Washing hands"}]},"d79uK3AhtTU":{"duration":186.27,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=d79uK3AhtTU","annotations":[{"segment":[59.151901200000005,174.93696346021844],"label":"Bungee jumping"}]},"N5J9Zb07uuQ":{"duration":176.333,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N5J9Zb07uuQ","annotations":[{"segment":[29.68753198127925,74.21882995319812],"label":"Hanging wallpaper"}]},"GlvfqDfvbKA":{"duration":77.857,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GlvfqDfvbKA","annotations":[{"segment":[11.409195007800312,75.99991575663026],"label":"Playing drums"}]},"8fZbv6OUEm8":{"duration":64.111,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=8fZbv6OUEm8","annotations":[{"segment":[4.597201248049923,60.0821856474259],"label":"Beach soccer"}]},"CAW0CEuyvZo":{"duration":3.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CAW0CEuyvZo","annotations":[{"segment":[0.01,3.924171],"label":"Putting on makeup"}]},"kKxNVwk7Evc":{"duration":88.631,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=kKxNVwk7Evc","annotations":[{"segment":[36.745684867394694,87.888],"label":"Drum corps"}]},"NLkJgnrKaKM":{"duration":17.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NLkJgnrKaKM","annotations":[{"segment":[0,17.925804],"label":"Smoking hookah"}]},"vCaOS3TAjbI":{"duration":178.56,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vCaOS3TAjbI","annotations":[]},"Zr8cz8QrBp4":{"duration":84.5,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zr8cz8QrBp4","annotations":[{"segment":[2.89401248049922,84.321],"label":"Breakdancing"}]},"esZMkzlZiuY":{"duration":64.95,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=esZMkzlZiuY","annotations":[{"segment":[18.60962684,57.344720159999994],"label":"Playing water polo"}]},"Xueo66Jb8T4":{"duration":165.327,"subset":"training","resolution":"853x474","url":"https://www.youtube.com/watch?v=Xueo66Jb8T4","annotations":[{"segment":[19.601843644305774,97.23546123556943],"label":"Roof shingle removal"}]},"4mBVik8dq_w":{"duration":23.04,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4mBVik8dq_w","annotations":[{"segment":[0.8804021107644305,20.1055094274571],"label":"Mowing the lawn"}]},"g-rw2Kyh9xo":{"duration":238.91,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g-rw2Kyh9xo","annotations":[{"segment":[31.73289250619364,237.1509327304927],"label":"Windsurfing"}]},"A3a6MNgab0c":{"duration":95.67,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A3a6MNgab0c","annotations":[{"segment":[44.367414196567864,47.35009750390015],"label":"Archery"},{"segment":[70.61502730109204,78.37000390015601],"label":"Archery"},{"segment":[87.4671879875195,89.70420046801873],"label":"Archery"}]},"B5uwLKdF8fY":{"duration":195.164,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B5uwLKdF8fY","annotations":[]},"CNSVakEgGxc":{"duration":34.99,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=CNSVakEgGxc","annotations":[]},"G-HRgYT6CCY":{"duration":129.15,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=G-HRgYT6CCY","annotations":[]},"Ja8QImLWYII":{"duration":91.069,"subset":"validation","resolution":"854x270","url":"https://www.youtube.com/watch?v=Ja8QImLWYII","annotations":[{"segment":[8.734822932917316,18.96095709828393],"label":"Hand car wash"}]},"ZkGN3SMxio4":{"duration":24.103,"subset":"testing","resolution":"544x360","url":"https://www.youtube.com/watch?v=ZkGN3SMxio4","annotations":[]},"e5_lP2HgtSE":{"duration":22.2,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=e5_lP2HgtSE","annotations":[{"segment":[9.835117160686426,21.921229446177847],"label":"Walking the dog"}]},"30hcRe-5_3M":{"duration":35.272,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=30hcRe-5_3M","annotations":[]},"4XfNexN6t04":{"duration":37.826,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4XfNexN6t04","annotations":[{"segment":[6.78613116224649,27.734623010920437],"label":"Playing congas"}]},"P9I3zxNUu74":{"duration":171.921,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=P9I3zxNUu74","annotations":[{"segment":[20.920128477379098,88.24002909048363],"label":"Decorating the Christmas tree"}]},"OUxgs_QlPHE":{"duration":187.39,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=OUxgs_QlPHE","annotations":[]},"PWriIL3pTWY":{"duration":98.523,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PWriIL3pTWY","annotations":[{"segment":[18.286457098283933,27.81385491419657],"label":"Rock-paper-scissors"}]},"6-beYw2R10s":{"duration":48.95,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6-beYw2R10s","annotations":[{"segment":[10.22267394695788,38.75461466458659],"label":"Bathing dog"}]},"0CTnYEE7rdo":{"duration":148.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0CTnYEE7rdo","annotations":[{"segment":[1.8509310327613104,6.940991372854914],"label":"Skateboarding"},{"segment":[16.88974567394696,32.159926694227764],"label":"Skateboarding"},{"segment":[43.03414651170046,57.147495636505454],"label":"Skateboarding"},{"segment":[106.65990076287052,113.1381593775351],"label":"Skateboarding"}]},"3qkNnr1_78I":{"duration":131.147,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3qkNnr1_78I","annotations":[{"segment":[7.979260296942489,16.163117011755297],"label":"Slacklining"},{"segment":[89.61322975975038,108.0269071076443],"label":"Slacklining"}]},"iY7bZQnHXlk":{"duration":205.27,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=iY7bZQnHXlk","annotations":[{"segment":[1.8494286400000002,182.00573263806552],"label":"Washing dishes"}]},"1U0VxGw1cdA":{"duration":29.05,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1U0VxGw1cdA","annotations":[{"segment":[1.4058814352574103,27.34666146645866],"label":"Playing lacrosse"}]},"zcDA0s8eWU4":{"duration":137.57999999999998,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zcDA0s8eWU4","annotations":[{"segment":[19.619541341653665,68.722],"label":"Chopping wood"}]},"dYi4CDe8gHo":{"duration":101.12,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dYi4CDe8gHo","annotations":[]},"n--fgqwuTTI":{"duration":170.574,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n--fgqwuTTI","annotations":[{"segment":[17.508344996306473,161.02375044829074],"label":"Assembling bicycle"}]},"D9A_9FH7e-A":{"duration":168.21,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D9A_9FH7e-A","annotations":[{"segment":[19.418339538221527,153.7722563432137],"label":"Doing nails"}]},"1iUkXSE73W0":{"duration":50.97,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1iUkXSE73W0","annotations":[]},"dJAZplo9ke0":{"duration":30.697,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=dJAZplo9ke0","annotations":[{"segment":[0.01,30.696779],"label":"River tubing"}]},"m8LmntdRXNI":{"duration":165.791,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=m8LmntdRXNI","annotations":[]},"gwbRqyRZguM":{"duration":25.589,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=gwbRqyRZguM","annotations":[{"segment":[3.403765513153005,21.219727541190387],"label":"Swinging at the playground"}]},"mpFNy97oV0c":{"duration":234.1,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=mpFNy97oV0c","annotations":[{"segment":[17.30489376,218.90323130577224],"label":"Wrapping presents"}]},"g0ns-bQ4PjE":{"duration":180.03,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g0ns-bQ4PjE","annotations":[{"segment":[0.3568878039473893,176.73584279304396],"label":"Belly dance"}]},"m7wfcL6YSMY":{"duration":43.399,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m7wfcL6YSMY","annotations":[{"segment":[1.28637096099844,43.398094],"label":"Camel ride"}]},"TY4-gh8DV04":{"duration":32.229,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=TY4-gh8DV04","annotations":[{"segment":[3.3436009110764426,29.790729921996878],"label":"Using the rowing machine"}]},"I0yNAIWHcQQ":{"duration":37.269,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=I0yNAIWHcQQ","annotations":[{"segment":[0.2,37.06802721088435],"label":"Swinging at the playground"}]},"xpA16yFuDqw":{"duration":111.062,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xpA16yFuDqw","annotations":[{"segment":[0.01,111.06097505668934],"label":"Swimming"}]},"B2qCBkXrb8s":{"duration":68.894,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B2qCBkXrb8s","annotations":[{"segment":[0.01,61.12622308892357],"label":"Sumo"}]},"PmmKHLmG5Ec":{"duration":98.081,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PmmKHLmG5Ec","annotations":[{"segment":[6.7298034321372855,66.5502496099844],"label":"Tug of war"}]},"fm4ZaId1sL0":{"duration":15.21,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=fm4ZaId1sL0","annotations":[{"segment":[0.7118129485179407,3.582791840873635],"label":"Ironing clothes"},{"segment":[5.40977840873635,10.345014851794073],"label":"Ironing clothes"}]},"U2-GFZxIufw":{"duration":65.876,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=U2-GFZxIufw","annotations":[{"segment":[27.70429143620065,37.349488330218065],"label":"Waterskiing"}]},"RoHYo2x9gbc":{"duration":141.224,"subset":"training","resolution":"632x360","url":"https://www.youtube.com/watch?v=RoHYo2x9gbc","annotations":[{"segment":[0,141.223695],"label":"Cutting the grass"}]},"x6pYeqq3O08":{"duration":14.397,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x6pYeqq3O08","annotations":[]},"DBGea9pST1A":{"duration":60.44,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DBGea9pST1A","annotations":[{"segment":[3.7717016536661467,44.12890934789392],"label":"Shoveling snow"}]},"QokthYjtPzM":{"duration":112.9,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QokthYjtPzM","annotations":[{"segment":[2.2432236499999996,67.62297605],"label":"Doing nails"}]},"SHbvBTSGwYM":{"duration":191.542,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=SHbvBTSGwYM","annotations":[]},"dG_jxrIaK6w":{"duration":80.16,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=dG_jxrIaK6w","annotations":[{"segment":[12.504724406663343,77.90443291092043],"label":"Playing piano"}]},"pDuceHUHFt0":{"duration":188.105,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pDuceHUHFt0","annotations":[]},"gkhgAlVSKTI":{"duration":124.88,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gkhgAlVSKTI","annotations":[{"segment":[0.01,124.48699067146359],"label":"Playing bagpipes"}]},"4EQYvkfMUWw":{"duration":58.93,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=4EQYvkfMUWw","annotations":[{"segment":[15.748480060842434,55.877260157566305],"label":"Playing violin"}]},"twQbAuWVxlU":{"duration":19.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=twQbAuWVxlU","annotations":[{"segment":[0.01,19.57466711076443],"label":"Walking the dog"}]},"Aj5ncX5G_2w":{"duration":230.11,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Aj5ncX5G_2w","annotations":[]},"lcwR7eA-VOY":{"duration":179.629,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=lcwR7eA-VOY","annotations":[{"segment":[80.42668350543545,139.55570769734788],"label":"Arm wrestling"}]},"veKNTB-N9EQ":{"duration":151.348,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=veKNTB-N9EQ","annotations":[{"segment":[109.79192040561624,115.22248851170048],"label":"Hand car wash"}]},"ak5mpw8komA":{"duration":78.35,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ak5mpw8komA","annotations":[{"segment":[5.520240526371657,52.23392970792825],"label":"Walking the dog"}]},"3VAq3wYxnMs":{"duration":109.32,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3VAq3wYxnMs","annotations":[{"segment":[59.64742589703587,89.47113884555382],"label":"Shaving legs"},{"segment":[94.92461778471139,99.52599063962558],"label":"Shaving legs"}]},"eF1fbnVguUc":{"duration":23.847,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=eF1fbnVguUc","annotations":[]},"Qe33o8ezVfI":{"duration":236.24,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Qe33o8ezVfI","annotations":[]},"IpFsz2xc3sY":{"duration":236.22,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=IpFsz2xc3sY","annotations":[{"segment":[35.734285491419655,216.98447581903275],"label":"Removing curlers"}]},"5t_hQVg9NsE":{"duration":75.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5t_hQVg9NsE","annotations":[{"segment":[28.464752580000003,68.02374498],"label":"Springboard diving"}]},"b2eonKWCocE":{"duration":156.782,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=b2eonKWCocE","annotations":[{"segment":[0.2,144.5707651118599],"label":"Raking leaves"}]},"Ix2vIutdeNQ":{"duration":45.233,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Ix2vIutdeNQ","annotations":[{"segment":[15.594970726720227,28.790714134165366],"label":"Gargling mouthwash"}]},"kF7p6irju1Q":{"duration":109.99000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kF7p6irju1Q","annotations":[{"segment":[1.0295749625903405,7.721812137285491],"label":"Ballet"},{"segment":[25.224586315132605,107.76217827145086],"label":"Ballet"}]},"ICM-GjDa9QY":{"duration":126.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ICM-GjDa9QY","annotations":[{"segment":[15.598001560062404,112.34509984399378],"label":"Cleaning windows"}]},"Z9o_h64qVeo":{"duration":165.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z9o_h64qVeo","annotations":[{"segment":[0.01,158.73236349453978],"label":"Playing accordion"}]},"2FRzNpVz6f8":{"duration":56.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2FRzNpVz6f8","annotations":[{"segment":[0.5288063930720495,55.87720722620905],"label":"Playing polo"}]},"KkqsXWg9Gao":{"duration":75.628,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=KkqsXWg9Gao","annotations":[]},"0iIY3HLF3lU":{"duration":89.281,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0iIY3HLF3lU","annotations":[{"segment":[5.276274758069864,10.413702243320055],"label":"Canoeing"},{"segment":[20.27200903934069,54.15125951429692],"label":"Canoeing"},{"segment":[64.56496367448354,88.16935977090655],"label":"Canoeing"}]},"Vvu6wxPyB1E":{"duration":21.2,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Vvu6wxPyB1E","annotations":[]},"cZwFimlaZS4":{"duration":213.09,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=cZwFimlaZS4","annotations":[]},"eCNng1eGQ_0":{"duration":230.39,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eCNng1eGQ_0","annotations":[{"segment":[20.486955119021086,132.26665761052212],"label":"Playing field hockey"}]},"R2EZlSlDCuE":{"duration":204.336,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R2EZlSlDCuE","annotations":[{"segment":[22.08861508160651,182.18332893986195],"label":"Painting furniture"}]},"2Lip2nXEzfQ":{"duration":173.663,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=2Lip2nXEzfQ","annotations":[]},"bcRri59F0S8":{"duration":121.302,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bcRri59F0S8","annotations":[]},"OsiES8BiRac":{"duration":226.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OsiES8BiRac","annotations":[{"segment":[7.055823348580202,215.90819415912637],"label":"Belly dance"}]},"wmmOt79-Azo":{"duration":87.87,"subset":"training","resolution":"240x180","url":"https://www.youtube.com/watch?v=wmmOt79-Azo","annotations":[{"segment":[1.9860249609984402,83.48207492121685],"label":"Playing badminton"}]},"34cQhizPKfc":{"duration":183.461,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=34cQhizPKfc","annotations":[{"segment":[124.47446957878316,134.48965678627144],"label":"Skiing"},{"segment":[137.3661154446178,140.80026833073325],"label":"Skiing"},{"segment":[141.94498595943838,144.80678003120124],"label":"Skiing"},{"segment":[146.81003588143525,151.10272698907957],"label":"Skiing"}]},"VssVjdgvHrQ":{"duration":98.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VssVjdgvHrQ","annotations":[{"segment":[6.121892854914197,72.85052497347895],"label":"Washing hands"}]},"LaWCwEG6n3w":{"duration":185.087,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LaWCwEG6n3w","annotations":[{"segment":[7.794238689547582,90.06675819032762],"label":"Ice fishing"},{"segment":[99.59304992199688,108.54199063962558],"label":"Ice fishing"},{"segment":[129.90397815912635,161.65828393135726],"label":"Ice fishing"}]},"Zl3YebXhXC0":{"duration":16.301,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zl3YebXhXC0","annotations":[{"segment":[0.01,16.221],"label":"Blowing leaves"}]},"YjE1by2PX08":{"duration":73.03,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=YjE1by2PX08","annotations":[{"segment":[6.835577912912435,72.00142051544462],"label":"Shoveling snow"}]},"XwrF8edyJz0":{"duration":214.785,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XwrF8edyJz0","annotations":[{"segment":[10.384322932917318,129.3015694227769],"label":"Making a lemonade"}]},"-zjG7ZdFlrQ":{"duration":123.159,"subset":"testing","resolution":"640x352","url":"https://www.youtube.com/watch?v=-zjG7ZdFlrQ","annotations":[]},"uBmUiouilQY":{"duration":38.847,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=uBmUiouilQY","annotations":[{"segment":[0.2,6.480260530421217],"label":"Hula hoop"}]},"bNRE808ALfM":{"duration":183.97,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bNRE808ALfM","annotations":[{"segment":[4.592117333672939,161.58512867861654],"label":"Tai chi"}]},"HadTAlDM5YM":{"duration":131.19,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=HadTAlDM5YM","annotations":[{"segment":[9.210099016205547,87.3936062204393],"label":"Shaving"},{"segment":[97.62704825897036,106.42780942277692],"label":"Shaving"}]},"iANrLcieixM":{"duration":76.49,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iANrLcieixM","annotations":[{"segment":[28.63770256932726,65.50874324648986],"label":"Pole vault"}]},"HGK_K2XqhTA":{"duration":95.85,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=HGK_K2XqhTA","annotations":[{"segment":[64.0010200624025,66.69265174726989],"label":"Drinking coffee"}]},"rhfqzNjqdmo":{"duration":162.145,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rhfqzNjqdmo","annotations":[]},"FHywlXL0QAA":{"duration":7.06,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=FHywlXL0QAA","annotations":[]},"0bjHe_5nACw":{"duration":124.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0bjHe_5nACw","annotations":[{"segment":[21.20333212948518,24.315747854914196],"label":"Playing violin"},{"segment":[28.011741528861155,29.95700135725429],"label":"Playing violin"},{"segment":[81.50638680967239,86.75858834633385],"label":"Playing violin"},{"segment":[102.70971893915757,108.74002440717628],"label":"Playing violin"}]},"-n0F3QTuxug":{"duration":15.3,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=-n0F3QTuxug","annotations":[{"segment":[1.2413437566302652,9.596542118564741],"label":"Washing face"}]},"nnEPsdA0080":{"duration":81.874,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nnEPsdA0080","annotations":[]},"XdsGxg1qwI4":{"duration":169.3,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XdsGxg1qwI4","annotations":[]},"SGQQSH88isc":{"duration":75.767,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SGQQSH88isc","annotations":[{"segment":[39.82761212735833,56.67775648039099],"label":"Throwing darts"}]},"sfeUXTuKMs8":{"duration":159.99,"subset":"training","resolution":"608x342","url":"https://www.youtube.com/watch?v=sfeUXTuKMs8","annotations":[{"segment":[79.61804677357162,149.25263940625652],"label":"Horseback riding"}]},"I4y6q9oIIQo":{"duration":127.11,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I4y6q9oIIQo","annotations":[{"segment":[90.02483855850234,91.80947192199687],"label":"Washing face"}]},"fghAmraOM9A":{"duration":131.495,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fghAmraOM9A","annotations":[{"segment":[40.18397503900156,123.42220904836195],"label":"Rollerblading"}]},"9qgO4EQiQzI":{"duration":158.5,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=9qgO4EQiQzI","annotations":[{"segment":[121.01106969823867,147.91488433998325],"label":"Installing carpet"}]},"ZOeQh2-ci3M":{"duration":138.07,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ZOeQh2-ci3M","annotations":[{"segment":[69.55888143525742,108.106992199688],"label":"Washing hands"}]},"r1hot8Kj1ng":{"duration":105.16,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r1hot8Kj1ng","annotations":[]},"sf77PM1CtNQ":{"duration":109.74000000000001,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=sf77PM1CtNQ","annotations":[{"segment":[86.94170358814353,99.26414976599064],"label":"Vacuuming floor"}]},"yScYLIJ_txA":{"duration":41.262,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yScYLIJ_txA","annotations":[{"segment":[8.766506230529595,40.01265576323988],"label":"BMX"}]},"O9VbYvD4rvg":{"duration":39.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O9VbYvD4rvg","annotations":[]},"H9FxfosWRTE":{"duration":155.946,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H9FxfosWRTE","annotations":[{"segment":[2.7969758190327614,155.921],"label":"Kneeling"}]},"1aNOjjLWjxc":{"duration":2.996,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1aNOjjLWjxc","annotations":[{"segment":[0.2,2.795374149659864],"label":"Playing beach volleyball"}]},"bWdufJDosIo":{"duration":121.046,"subset":"validation","resolution":"656x480","url":"https://www.youtube.com/watch?v=bWdufJDosIo","annotations":[{"segment":[84.0022542901716,101.38576755070204],"label":"Hanging wallpaper"}]},"uqixFWxAQSA":{"duration":209.537,"subset":"training","resolution":"424x238","url":"https://www.youtube.com/watch?v=uqixFWxAQSA","annotations":[{"segment":[31.05460642915513,185.34696524648984],"label":"Removing ice from car"}]},"HeMpg3SAUUs":{"duration":27.66,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=HeMpg3SAUUs","annotations":[{"segment":[0.01,27.23595138011396],"label":"Washing face"}]},"ooWk0EL6I7o":{"duration":21.29,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ooWk0EL6I7o","annotations":[{"segment":[1.129409825273011,21.292697],"label":"Removing curlers"}]},"ciS5DtShxA8":{"duration":273.601,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=ciS5DtShxA8","annotations":[{"segment":[10.669305772230889,221.49478783151324],"label":"Putting on shoes"}]},"xsh5VD0fggU":{"duration":66.386,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xsh5VD0fggU","annotations":[]},"Y0fgqIsaZS8":{"duration":53.99,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y0fgqIsaZS8","annotations":[]},"P2fUelA4BfA":{"duration":32.555,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=P2fUelA4BfA","annotations":[{"segment":[0.2,31.23597352024922],"label":"Peeling potatoes"}]},"loZjywaPvUk":{"duration":67.59,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=loZjywaPvUk","annotations":[]},"l4UJiGsZVfE":{"duration":211.929,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l4UJiGsZVfE","annotations":[{"segment":[17.186851794071764,124.60467550702029],"label":"BMX"},{"segment":[130.55397035881435,196.67581123244932],"label":"BMX"}]},"e60HGAzBOvo":{"duration":156.224,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=e60HGAzBOvo","annotations":[{"segment":[16.324691107644306,135.73160218408736],"label":"Waxing skis"}]},"16czjUF1AA4":{"duration":86.61,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=16czjUF1AA4","annotations":[]},"RnRUwLtR33g":{"duration":27.145,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RnRUwLtR33g","annotations":[{"segment":[4.1370513689859045,18.57451859849046],"label":"Rope skipping"}]},"kPbae85fofk":{"duration":111.967,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=kPbae85fofk","annotations":[{"segment":[71.7040522776911,79.21507358814353],"label":"Throwing darts"}]},"9wtMJoqGTg0":{"duration":89.28,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9wtMJoqGTg0","annotations":[{"segment":[0,85.66436193447738],"label":"Using the balance beam"}]},"Cx3QGeQu7xM":{"duration":113.848,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cx3QGeQu7xM","annotations":[{"segment":[7.967547366568452,33.64076425311571],"label":"Disc dog"},{"segment":[40.72303098043909,74.3637968628709],"label":"Disc dog"},{"segment":[87.99716011637013,104.81754335120682],"label":"Disc dog"}]},"AjSFqqfzml0":{"duration":18.25,"subset":"training","resolution":"1216x720","url":"https://www.youtube.com/watch?v=AjSFqqfzml0","annotations":[{"segment":[2.4564560132605306,16.96459798],"label":"Shaving legs"}]},"q66BG9h_7XI":{"duration":182.347,"subset":"training","resolution":"657x480","url":"https://www.youtube.com/watch?v=q66BG9h_7XI","annotations":[{"segment":[43.4563621336385,174.1094752741433],"label":"Powerbocking"}]},"xNy1WgRRkiQ":{"duration":203.34,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xNy1WgRRkiQ","annotations":[]},"zLF5DSuDixg":{"duration":231.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zLF5DSuDixg","annotations":[{"segment":[10.48411230326764,148.94669892918165],"label":"Shot put"},{"segment":[167.74579611856473,219.08179406864272],"label":"Shot put"}]},"vNMBtwYDH9E":{"duration":104.21000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vNMBtwYDH9E","annotations":[{"segment":[5.118234009360374,21.041628705148206],"label":"Using the balance beam"},{"segment":[27.37848985959438,99.35873322932918],"label":"Using the balance beam"}]},"MwQTeFD0OKQ":{"duration":231.92000000000002,"subset":"validation","resolution":"854x270","url":"https://www.youtube.com/watch?v=MwQTeFD0OKQ","annotations":[{"segment":[37.990160463561395,217.44844227238477],"label":"Snatch"}]},"OqoUemFpetc":{"duration":140.27,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=OqoUemFpetc","annotations":[{"segment":[2.6209048361934477,128.42433697347894],"label":"Dodgeball"}]},"aL4758WHLjQ":{"duration":61.65,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=aL4758WHLjQ","annotations":[]},"hxbp-zM5JPQ":{"duration":106.72,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=hxbp-zM5JPQ","annotations":[{"segment":[19.472377535101405,41.274783151326055],"label":"Ironing clothes"},{"segment":[51.59347893915756,61.745744149766],"label":"Ironing clothes"},{"segment":[67.40438377535101,83.21528861154447],"label":"Ironing clothes"},{"segment":[87.70891419656786,100.8569297971919],"label":"Ironing clothes"}]},"b3mJ5rPzDv8":{"duration":145.543,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b3mJ5rPzDv8","annotations":[{"segment":[0.01,145.481],"label":"Roof shingle removal"}]},"IcfWEKjl_AY":{"duration":21.967,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=IcfWEKjl_AY","annotations":[{"segment":[14.650152886115444,20.70874414976599],"label":"Fun sliding down"}]},"c3lo99rni-o":{"duration":208.42000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c3lo99rni-o","annotations":[{"segment":[16.416439157566302,190.33317082683308],"label":"Preparing pasta"}]},"hXjxOvlNxQM":{"duration":7.129,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=hXjxOvlNxQM","annotations":[{"segment":[2.031491419656786,5.788658346333853],"label":"Doing a powerbomb"}]},"3xoIXUMqmqA":{"duration":159.614,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=3xoIXUMqmqA","annotations":[{"segment":[16.67798283931357,138.9176255850234],"label":"Kneeling"}]},"a_uamUiKq1o":{"duration":45.488,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=a_uamUiKq1o","annotations":[{"segment":[0.01,34.98522585023401],"label":"Arm wrestling"}]},"JWFJGkUKVfk":{"duration":116.356,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JWFJGkUKVfk","annotations":[{"segment":[28.848453978159124,59.52151014040561],"label":"Welding"}]},"daWdp5tXqFI":{"duration":105.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=daWdp5tXqFI","annotations":[]},"pn1XDTlNMso":{"duration":140.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pn1XDTlNMso","annotations":[{"segment":[10.55439311450009,16.711122431291813],"label":"Polishing forniture"},{"segment":[41.33803948829953,46.61523601872074],"label":"Polishing forniture"},{"segment":[54.750914003120116,64.42577430889236],"label":"Polishing forniture"}]},"1zyyumIi0iQ":{"duration":232.18,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1zyyumIi0iQ","annotations":[{"segment":[22.27057956318253,99.04070748829955],"label":"Cheerleading"}]},"hcFw88RcAbo":{"duration":84.15,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=hcFw88RcAbo","annotations":[{"segment":[6.432615704628185,17.459956912562216],"label":"Mooping floor"},{"segment":[29.40624322115742,36.88908117628705],"label":"Mooping floor"},{"segment":[52.24859184399376,52.77370331981279],"label":"Mooping floor"},{"segment":[73.64688448361935,78.24160989703589],"label":"Mooping floor"}]},"5wQLpjdsRUg":{"duration":122.346,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5wQLpjdsRUg","annotations":[{"segment":[10.084488021073504,19.217611877147814],"label":"Doing fencing"},{"segment":[23.02308015051211,26.067454769203547],"label":"Doing fencing"},{"segment":[29.111829387894986,41.47960127632895],"label":"Doing fencing"},{"segment":[45.85588979069789,49.66135806406219],"label":"Doing fencing"},{"segment":[57.65284143812721,75.91908915027582],"label":"Doing fencing"},{"segment":[83.91057252434085,90.37986858906017],"label":"Doing fencing"},{"segment":[97.22971148111588,103.12818730483055],"label":"Doing fencing"},{"segment":[107.88502264653592,109.78775678321807],"label":"Doing fencing"}]},"whwserG3XGc":{"duration":128.709,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=whwserG3XGc","annotations":[{"segment":[25.17459516380655,51.25186505460218],"label":"Hurling"},{"segment":[78.73329563182526,96.5861957878315],"label":"Hurling"}]},"bNvAqFcKzqU":{"duration":233.48,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bNvAqFcKzqU","annotations":[{"segment":[9.468745709828394,220.3304290171607],"label":"Playing saxophone"}]},"Ocv54K1U50g":{"duration":197.39,"subset":"testing","resolution":"492x360","url":"https://www.youtube.com/watch?v=Ocv54K1U50g","annotations":[]},"pop8YCGQpWk":{"duration":81.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pop8YCGQpWk","annotations":[]},"HPyLSmDjsHY":{"duration":171.48,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HPyLSmDjsHY","annotations":[{"segment":[27.55440655721467,61.79677587103483],"label":"Doing nails"},{"segment":[80.52307158953025,143.12240209048363],"label":"Doing nails"}]},"BMxtjh9E7BY":{"duration":67.849,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=BMxtjh9E7BY","annotations":[{"segment":[4.128080457098284,50.48959943681748],"label":"Curling"}]},"boJA2e8Lt-8":{"duration":152.79,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=boJA2e8Lt-8","annotations":[]},"GsNyn-6DDJM":{"duration":33.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GsNyn-6DDJM","annotations":[{"segment":[1.109827081903276,30.79124903978159],"label":"Brushing hair"}]},"xxvBpA0qDS4":{"duration":183.67,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xxvBpA0qDS4","annotations":[]},"MgjxjBxGrQU":{"duration":232.22,"subset":"testing","resolution":"658x480","url":"https://www.youtube.com/watch?v=MgjxjBxGrQU","annotations":[]},"AIBFZrfFUdk":{"duration":98.662,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AIBFZrfFUdk","annotations":[]},"62Dwj4l7_qs":{"duration":178.72,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=62Dwj4l7_qs","annotations":[{"segment":[1.2543907956318254,135.0711926677067],"label":"Snatch"}]},"TS-p15iwQPc":{"duration":129.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TS-p15iwQPc","annotations":[{"segment":[17.415584864210896,101.37279744659175],"label":"Doing karate"}]},"_ja1fre9OTE":{"duration":114.45,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=_ja1fre9OTE","annotations":[]},"BjQnUdue4qY":{"duration":120.187,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BjQnUdue4qY","annotations":[]},"q4Oy6EDTJiM":{"duration":74.68,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=q4Oy6EDTJiM","annotations":[{"segment":[1.2814810687665603,34.25049318252729],"label":"Bathing dog"}]},"hnwZmUR1FUw":{"duration":83.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hnwZmUR1FUw","annotations":[{"segment":[1.9631915480700861,67.40290981707295],"label":"Playing squash"}]},"7iPb7mLURVc":{"duration":111.294,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=7iPb7mLURVc","annotations":[{"segment":[74.6584924024961,110.59874339625584],"label":"Baton twirling"}]},"2VTEseqA5SA":{"duration":31.278,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=2VTEseqA5SA","annotations":[{"segment":[9.144833527403962,17.754596621689934],"label":"Welding"},{"segment":[19.84623411935712,20.235375979388223],"label":"Welding"},{"segment":[21.694657954504866,24.46729370722648],"label":"Welding"},{"segment":[26.072503879854786,27.094001262436436],"label":"Welding"}]},"rvkVdD2u_yA":{"duration":36.525,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rvkVdD2u_yA","annotations":[{"segment":[3.640411856474259,36.481],"label":"Kneeling"}]},"TeXm2KTfkzo":{"duration":73.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TeXm2KTfkzo","annotations":[{"segment":[0.01,73.9088888888889],"label":"Cutting the grass"}]},"a5R0qO5NynU":{"duration":238.632,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a5R0qO5NynU","annotations":[{"segment":[105.33295319812792,177.9121965678627],"label":"Hula hoop"}]},"mcS9LX_P4BA":{"duration":159.313,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mcS9LX_P4BA","annotations":[{"segment":[49.61401869158878,109.16454828660436],"label":"Carving jack-o-lanterns"}]},"at8e-jBBU5E":{"duration":67.15,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=at8e-jBBU5E","annotations":[{"segment":[1.0476148003120125,9.638056162870516],"label":"Rock climbing"},{"segment":[12.990423523868955,20.323727126053043],"label":"Rock climbing"},{"segment":[35.61890321060842,64.42831021918877],"label":"Rock climbing"}]},"tyjUDi3uLd0":{"duration":235.915,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=tyjUDi3uLd0","annotations":[{"segment":[80.96917727049218,195.79819128861155],"label":"Laying tile"}]},"NOGtIoZbEVg":{"duration":136.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NOGtIoZbEVg","annotations":[{"segment":[2.4847672299999997,129.92161811370414],"label":"Playing guitarra"}]},"NV9rPVAczy0":{"duration":103.631,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NV9rPVAczy0","annotations":[]},"Dx4LpX-X9JY":{"duration":227.811,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Dx4LpX-X9JY","annotations":[{"segment":[65.3791326053042,84.56648673946958],"label":"Running a marathon"},{"segment":[89.54098595943837,136.44340717628705],"label":"Running a marathon"},{"segment":[138.57533541341655,145.68176287051483],"label":"Running a marathon"}]},"iosb2TdQ7yY":{"duration":230.97,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iosb2TdQ7yY","annotations":[{"segment":[44.091791199999996,169.06857599999998],"label":"Chopping wood"}]},"ZeN0mrRQHg8":{"duration":127.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZeN0mrRQHg8","annotations":[{"segment":[8.545120837895812,105.72106320099682],"label":"Riding bumper cars"}]},"j3h8Di7V3nk":{"duration":54.219,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j3h8Di7V3nk","annotations":[{"segment":[0.01,54.21859410430839],"label":"Putting on shoes"}]},"8hrRE3_sWXo":{"duration":228.555,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=8hrRE3_sWXo","annotations":[{"segment":[45.995949144088215,73.45089553241995],"label":"Kneeling"},{"segment":[76.30335749484402,113.74192075166],"label":"Kneeling"},{"segment":[122.65586438423523,213.22153006864278],"label":"Kneeling"}]},"2Mh-OomUNpQ":{"duration":188.454,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2Mh-OomUNpQ","annotations":[{"segment":[0.01,171.11435569422778],"label":"Fun sliding down"}]},"TIAAUayALPI":{"duration":212.881,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TIAAUayALPI","annotations":[{"segment":[7.970566398166132,10.95952879747843],"label":"Braiding hair"},{"segment":[13.284277330276886,14.944811996561494],"label":"Braiding hair"},{"segment":[16.605346662846106,22.25116452821378],"label":"Braiding hair"},{"segment":[61.43978265253059,206.57051130421215],"label":"Braiding hair"}]},"VZ5R9goklpo":{"duration":94.22999999999999,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=VZ5R9goklpo","annotations":[]},"8wNsOZHTsj0":{"duration":70.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8wNsOZHTsj0","annotations":[{"segment":[0.01,26.113266770670826],"label":"Chopping wood"},{"segment":[29.281822152886114,38.67822776911076],"label":"Chopping wood"},{"segment":[39.661572542901716,51.57097035881435],"label":"Chopping wood"},{"segment":[52.66357566302652,55.504349453978165],"label":"Chopping wood"},{"segment":[56.59695475819033,61.18589703588144],"label":"Chopping wood"}]},"X6BHwIJKl-Q":{"duration":69.73,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=X6BHwIJKl-Q","annotations":[{"segment":[22.216165366614664,66.86576911076443],"label":"Kayaking"}]},"4-m0cFAVp9k":{"duration":178.75,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4-m0cFAVp9k","annotations":[]},"OlyMLJkWQbQ":{"duration":70.64,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=OlyMLJkWQbQ","annotations":[]},"zE0vlPLBVJo":{"duration":25.426,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=zE0vlPLBVJo","annotations":[{"segment":[0,2.221291020280811],"label":"Hitting a pinata"},{"segment":[3.0939410639625584,21.379926070202806],"label":"Hitting a pinata"}]},"0h45uztur-o":{"duration":171.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0h45uztur-o","annotations":[{"segment":[126.7101009126365,154.86790111544462],"label":"Wrapping presents"},{"segment":[27.51947715,122.71399967000002],"label":"Wrapping presents"}]},"xcDAjcRX93A":{"duration":85.96000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xcDAjcRX93A","annotations":[{"segment":[9.651032761310454,31.365856474258972],"label":"Using parallel bars"},{"segment":[51.484156006240255,65.42778159126365],"label":"Using parallel bars"}]},"tZ6Fjq6SJvQ":{"duration":200.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tZ6Fjq6SJvQ","annotations":[{"segment":[0,200.063128],"label":"Tango"}]},"N4Z9eYSu2uM":{"duration":128.918,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N4Z9eYSu2uM","annotations":[{"segment":[5.226811232449298,119.83319188767551],"label":"Tug of war"}]},"xtmxgih0yIk":{"duration":232.62,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xtmxgih0yIk","annotations":[{"segment":[64.9587168673947,71.12798048049922],"label":"Grooming horse"},{"segment":[74.75695907644307,84.9180991450858],"label":"Grooming horse"},{"segment":[93.26474991575662,101.61140068642746],"label":"Grooming horse"},{"segment":[129.9174337347894,161.1266496599064],"label":"Grooming horse"},{"segment":[193.06166130421215,215.19843073946956],"label":"Grooming horse"}]},"sAjMcMdflqs":{"duration":224.654,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sAjMcMdflqs","annotations":[]},"UP12y84KU_M":{"duration":50.574,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UP12y84KU_M","annotations":[]},"P7bvD_CZH5M":{"duration":233.221,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=P7bvD_CZH5M","annotations":[{"segment":[4.72909984399376,232.961],"label":"Surfing"}]},"HeOj7jZ0igI":{"duration":160.752,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=HeOj7jZ0igI","annotations":[{"segment":[14.039400936037442,155.18552106084243],"label":"Doing fencing"}]},"mxY7J50ItrU":{"duration":30.86,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mxY7J50ItrU","annotations":[{"segment":[1.2998446305199147,30.52227625897036],"label":"Doing kickboxing"}]},"tpDhYD9e_cU":{"duration":232.99,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=tpDhYD9e_cU","annotations":[{"segment":[0.01,58.87625897035882],"label":"Rollerblading"}]},"8IlwMnM_n38":{"duration":144.94,"subset":"training","resolution":"434x326","url":"https://www.youtube.com/watch?v=8IlwMnM_n38","annotations":[{"segment":[8.253153041767929,131.259050431405],"label":"Breakdancing"}]},"Dv1Y6_QalWM":{"duration":198.23,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=Dv1Y6_QalWM","annotations":[]},"CaQkeVwKiUs":{"duration":104.746,"subset":"validation","resolution":"640x338","url":"https://www.youtube.com/watch?v=CaQkeVwKiUs","annotations":[{"segment":[54.80725233644859,60.67945794392523],"label":"Playing ten pins"},{"segment":[92.97658878504673,99.82749532710281],"label":"Playing ten pins"}]},"2k-qlZxc9l4":{"duration":236.17000000000002,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=2k-qlZxc9l4","annotations":[]},"nKa1e_CpvoY":{"duration":223.91,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nKa1e_CpvoY","annotations":[{"segment":[37.02723868954758,213.08127829953196],"label":"Making a cake"}]},"mgmwdQixDXY":{"duration":177.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mgmwdQixDXY","annotations":[{"segment":[17.733185198127927,154.8882894648986],"label":"Doing nails"}]},"_3hVtPVXTmg":{"duration":45.86,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_3hVtPVXTmg","annotations":[{"segment":[6.362042121684867,31.309825273010922],"label":"Using uneven bars"}]},"dFsFL_WJasg":{"duration":23.267,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=dFsFL_WJasg","annotations":[{"segment":[0.01,21.505989653682068],"label":"Camel ride"}]},"fD9JNH5FWCk":{"duration":86.843,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fD9JNH5FWCk","annotations":[{"segment":[10.69704368174727,21.52998907956318],"label":"Hula hoop"}]},"e6r-A-LAe2E":{"duration":39.24,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e6r-A-LAe2E","annotations":[{"segment":[4.407740255850234,39.04113378684807],"label":"Playing kickball"}]},"ObOkhXGu7oY":{"duration":224.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ObOkhXGu7oY","annotations":[{"segment":[41.35356942277691,207.13674102964117],"label":"Doing step aerobics"}]},"of_tqCVh17o":{"duration":237.425,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=of_tqCVh17o","annotations":[]},"B7t85SESTXI":{"duration":128.59,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B7t85SESTXI","annotations":[{"segment":[7.422600705388759,12.036649792522313],"label":"Paintball"},{"segment":[43.532550082955694,83.25349313572542],"label":"Paintball"}]},"8VPiqCCOPWg":{"duration":198.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8VPiqCCOPWg","annotations":[{"segment":[50.83568941669231,55.48529515602393],"label":"Plataform diving"},{"segment":[64.47453291873171,91.44224620685507],"label":"Plataform diving"},{"segment":[114.07032747160227,143.2078567714137],"label":"Plataform diving"}]},"G00TjQ7JJ8Y":{"duration":27.52,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=G00TjQ7JJ8Y","annotations":[{"segment":[1.6371425,26.075632808892358],"label":"Playing polo"}]},"mwndGBRpOPg":{"duration":6.55,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=mwndGBRpOPg","annotations":[{"segment":[0,6.501],"label":"Windsurfing"}]},"qdY2-BpnThc":{"duration":158.337,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=qdY2-BpnThc","annotations":[]},"hJ4ZU-FA5XA":{"duration":170.13,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=hJ4ZU-FA5XA","annotations":[{"segment":[24.418408492979722,32.911767968798756],"label":"Getting a haircut"},{"segment":[64.49644851950079,88.11860456162248],"label":"Getting a haircut"},{"segment":[99.79697384087363,100.85864377535103],"label":"Getting a haircut"},{"segment":[143.06002367082684,144.6525285725429],"label":"Getting a haircut"}]},"2OEc8kPVn_Y":{"duration":212.77,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=2OEc8kPVn_Y","annotations":[{"segment":[7.381377819068802,186.67353305738655],"label":"Tai chi"}]},"HiEClhlj5_A":{"duration":154.738,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=HiEClhlj5_A","annotations":[{"segment":[50.055147952130035,76.28597260132616],"label":"Hanging wallpaper"}]},"YpXe04ALJw4":{"duration":15.465,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=YpXe04ALJw4","annotations":[{"segment":[0.01,15.401],"label":"Beach soccer"}]},"c2mpAjNATAw":{"duration":98.69,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c2mpAjNATAw","annotations":[]},"HCvhz5HkpF4":{"duration":161.263,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=HCvhz5HkpF4","annotations":[]},"Lxcm6HMOHaY":{"duration":31.022,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lxcm6HMOHaY","annotations":[]},"fFu37ig0pkY":{"duration":139.692,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=fFu37ig0pkY","annotations":[{"segment":[25.10961059431764,128.2461380148784],"label":"Baton twirling"}]},"RaYMOYWhba8":{"duration":152.811,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RaYMOYWhba8","annotations":[{"segment":[0.20000000000000004,151.58926053042123],"label":"Beach soccer"}]},"L4yBWOH-vUM":{"duration":140.528,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=L4yBWOH-vUM","annotations":[]},"m2IJHzLEcKU":{"duration":57.122,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m2IJHzLEcKU","annotations":[]},"ztnt2EYuK3I":{"duration":12.33,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=ztnt2EYuK3I","annotations":[{"segment":[0.01,12.329794],"label":"Removing ice from car"}]},"UXX8k68S3_g":{"duration":99.103,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=UXX8k68S3_g","annotations":[{"segment":[0.01,99.1027664399093],"label":"Carving jack-o-lanterns"}]},"E29kZvEtOUA":{"duration":237.63,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=E29kZvEtOUA","annotations":[{"segment":[19.455304212168485,107.07505347],"label":"Mowing the lawn"},{"segment":[140.99003523000002,198.44410296411854],"label":"Mowing the lawn"}]},"zyylgHTPUS8":{"duration":104.931,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=zyylgHTPUS8","annotations":[{"segment":[39.942393840873635,84.46834107332293],"label":"Grooming dog"}]},"RHpigjSwhVM":{"duration":122.161,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RHpigjSwhVM","annotations":[{"segment":[12.768678687283545,48.40663263537345],"label":"BMX"},{"segment":[51.646446630654346,108.43847899531981],"label":"BMX"}]},"gsyV9UpPvVs":{"duration":140.946,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=gsyV9UpPvVs","annotations":[]},"kfW5sm39ZYE":{"duration":208.91,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kfW5sm39ZYE","annotations":[{"segment":[55.55753400204794,98.44405464276272],"label":"Changing car wheel"},{"segment":[126.71017076562431,143.27996297557766],"label":"Changing car wheel"},{"segment":[172.84567064431795,202.08648042658854],"label":"Changing car wheel"}]},"8B3qhnSB7U8":{"duration":48.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8B3qhnSB7U8","annotations":[{"segment":[2.716362825941609,29.35180929953198],"label":"Getting a piercing"}]},"TakNLlCbjvw":{"duration":52.48,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=TakNLlCbjvw","annotations":[{"segment":[3.9261154446177846,47.35876755070203],"label":"Hopscotch"}]},"jXIKHEsmVl4":{"duration":30.07,"subset":"training","resolution":"144x176","url":"https://www.youtube.com/watch?v=jXIKHEsmVl4","annotations":[{"segment":[0.01,29.988938452196923],"label":"Chopping wood"}]},"l8-mKyFRzuA":{"duration":131.634,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l8-mKyFRzuA","annotations":[]},"cGxZAOpmpKQ":{"duration":139.0,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cGxZAOpmpKQ","annotations":[]},"esNQZCjMZaM":{"duration":93.86,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=esNQZCjMZaM","annotations":[{"segment":[1.00894125,87.10619672854915],"label":"Archery"}]},"Z8v3jTD-6H0":{"duration":175.45,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Z8v3jTD-6H0","annotations":[]},"qvLFSqPCvn8":{"duration":158.059,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qvLFSqPCvn8","annotations":[{"segment":[9.847864878038443,145.00981032911608],"label":"Changing car wheel"}]},"5tM7hwg_8wU":{"duration":221.472,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5tM7hwg_8wU","annotations":[]},"we6Ddq1ABcQ":{"duration":68.06,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=we6Ddq1ABcQ","annotations":[{"segment":[1.9628471138845556,67.63865054602185],"label":"Cricket"}]},"eS7ENymCpZE":{"duration":64.69,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=eS7ENymCpZE","annotations":[{"segment":[0.01,0.02],"label":"Sailing"}]},"Z5d8BZG4d2Y":{"duration":194.189,"subset":"testing","resolution":"300x240","url":"https://www.youtube.com/watch?v=Z5d8BZG4d2Y","annotations":[]},"rjzQD0vwnP0":{"duration":236.867,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=rjzQD0vwnP0","annotations":[{"segment":[0.01,211.73892541029642],"label":"Removing ice from car"}]},"GbqYSIQ29mA":{"duration":191.89,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GbqYSIQ29mA","annotations":[]},"ewGW8hMlxnA":{"duration":190.73,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ewGW8hMlxnA","annotations":[{"segment":[51.1783739267938,132.11161641567705],"label":"Cleaning shoes"}]},"tOiPx5Veyd8":{"duration":104.235,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=tOiPx5Veyd8","annotations":[{"segment":[36.433323845870774,56.8905376309175],"label":"Shuffleboard"}]},"HRbBaePTCR8":{"duration":183.879,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HRbBaePTCR8","annotations":[{"segment":[17.158205981232566,130.97433460168023],"label":"Wakeboarding"},{"segment":[139.2674696016626,169.29433599102936],"label":"Wakeboarding"}]},"M9Z2RKnwiz4":{"duration":234.313,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=M9Z2RKnwiz4","annotations":[{"segment":[52.63807922620905,234.31256235827664],"label":"Croquet"}]},"gHhgZ0Bd4H4":{"duration":215.02,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=gHhgZ0Bd4H4","annotations":[{"segment":[9.893315132605304,202.05838533541342],"label":"Spinning"}]},"Sw_zeKg3V9M":{"duration":15.5,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Sw_zeKg3V9M","annotations":[]},"PLgcXxX3A2A":{"duration":169.09,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=PLgcXxX3A2A","annotations":[{"segment":[9.850279303180253,135.21749029372356],"label":"Dodgeball"},{"segment":[137.5878787376488,138.64138471272668],"label":"Dodgeball"},{"segment":[141.0117731566519,141.53852614419085],"label":"Dodgeball"},{"segment":[142.85540861303818,143.64553809434662],"label":"Dodgeball"},{"segment":[145.4891735507329,160.23825720182325],"label":"Dodgeball"}]},"JDwd6Ec454A":{"duration":56.43,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=JDwd6Ec454A","annotations":[]},"1cU8sp05Bu0":{"duration":64.807,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1cU8sp05Bu0","annotations":[{"segment":[11.72792863650546,53.7867071950078],"label":"Playing congas"}]},"GKy0MNJZxDc":{"duration":37.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GKy0MNJZxDc","annotations":[{"segment":[1.5251575663026522,31.676349453978155],"label":"Using parallel bars"}]},"oghT33khlYQ":{"duration":166.395,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oghT33khlYQ","annotations":[{"segment":[16.950439221647077,162.35099245529193],"label":"Waterskiing"}]},"yDSjqGmia-k":{"duration":86.123,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yDSjqGmia-k","annotations":[]},"mTsEJFP3W1A":{"duration":206.194,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=mTsEJFP3W1A","annotations":[{"segment":[62.707862714508586,180.72727613104524],"label":"Assembling bicycle"}]},"d_z2CA-o13U":{"duration":182.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=d_z2CA-o13U","annotations":[{"segment":[10.225321329173168,160.4807375273011],"label":"Paintball"}]},"JE50XTpCN78":{"duration":219.03,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JE50XTpCN78","annotations":[{"segment":[93.09382215288612,207.1978096723869],"label":"Doing nails"}]},"SwXGVQXyXkQ":{"duration":148.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SwXGVQXyXkQ","annotations":[{"segment":[37.888081985927045,118.41080377097376],"label":"Getting a haircut"}]},"Qm7AL0RjHxI":{"duration":82.97,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=Qm7AL0RjHxI","annotations":[{"segment":[8.315132605304212,66.00936037441498],"label":"Tai chi"}]},"uklYBuQDwIw":{"duration":90.047,"subset":"training","resolution":"470x360","url":"https://www.youtube.com/watch?v=uklYBuQDwIw","annotations":[{"segment":[1.2760160894394517,5.911820257926191],"label":"Slacklining"},{"segment":[10.688103340609498,30.636109156522135],"label":"Slacklining"},{"segment":[38.36244943733337,59.99620222360482],"label":"Slacklining"},{"segment":[63.50817507851901,84.15857546541449],"label":"Slacklining"}]},"CJKwvmOuhJk":{"duration":36.27,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=CJKwvmOuhJk","annotations":[{"segment":[0.9393670999999999,13.42497035],"label":"Washing dishes"}]},"qrvbu5AkWzk":{"duration":105.93,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=qrvbu5AkWzk","annotations":[]},"RZr2KLNE6qE":{"duration":6.107,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RZr2KLNE6qE","annotations":[{"segment":[0.01,6.106438],"label":"Swinging at the playground"}]},"IEqnfSiCIXc":{"duration":198.72,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IEqnfSiCIXc","annotations":[{"segment":[14.877828393135728,44.9434399375975],"label":"Skateboarding"},{"segment":[48.35294227769111,170.47511700468021],"label":"Skateboarding"}]},"sYpC9oPhmY4":{"duration":84.707,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sYpC9oPhmY4","annotations":[{"segment":[0.2,81.73503900156007],"label":"Drum corps"}]},"HaprTZfP4sQ":{"duration":117.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HaprTZfP4sQ","annotations":[{"segment":[8.820465714242847,69.66672832639237],"label":"Wrapping presents"}]},"IGXq7a7Jl7s":{"duration":227.35,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IGXq7a7Jl7s","annotations":[{"segment":[26.06108190327613,27.833944617784713],"label":"Javelin throw"},{"segment":[33.152532761310454,39.53483853354134],"label":"Javelin throw"},{"segment":[42.37141887675507,46.62628939157566],"label":"Javelin throw"},{"segment":[52.65402262090483,71.8009399375975],"label":"Javelin throw"},{"segment":[77.82867316692668,87.75670436817472],"label":"Javelin throw"},{"segment":[110.09477457098285,128.8871193447738],"label":"Javelin throw"},{"segment":[160.79864820592823,172.14496957878313],"label":"Javelin throw"}]},"WHG0Q_0H24k":{"duration":43.93,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=WHG0Q_0H24k","annotations":[]},"WSKx2EK_Bqc":{"duration":177.54,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WSKx2EK_Bqc","annotations":[{"segment":[59.27224837441498,151.5042984149766],"label":"Playing congas"}]},"NGBaYycOQT0":{"duration":29.28,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NGBaYycOQT0","annotations":[{"segment":[9.27287563650546,19.8704477925117],"label":"Playing badminton"}]},"VbhW_K3NvmQ":{"duration":166.0,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=VbhW_K3NvmQ","annotations":[{"segment":[0.01,165.99909297052153],"label":"Playing piano"}]},"pSdlIsd_vZc":{"duration":43.375,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pSdlIsd_vZc","annotations":[{"segment":[2.8610754923716515,41.256167392683174],"label":"Fun sliding down"}]},"soE6TfvOiIo":{"duration":117.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=soE6TfvOiIo","annotations":[{"segment":[90.85261739781592,116.04858619032761],"label":"Walking the dog"}]},"yWvyLG3kq2I":{"duration":45.54,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yWvyLG3kq2I","annotations":[{"segment":[3.5454758190327618,44.60208580343214],"label":"Kayaking"}]},"-CqVpCRxwTY":{"duration":43.143,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-CqVpCRxwTY","annotations":[]},"1zEcIngghq4":{"duration":203.152,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=1zEcIngghq4","annotations":[{"segment":[12.994082234037661,200.2990225023401],"label":"Playing congas"}]},"FiqkrBh1VOI":{"duration":156.57,"subset":"validation","resolution":"640x356","url":"https://www.youtube.com/watch?v=FiqkrBh1VOI","annotations":[{"segment":[1.9540935542183595,143.13735190327614],"label":"Tango"}]},"NM7Vye5nxj8":{"duration":35.36,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=NM7Vye5nxj8","annotations":[]},"uz91AvGxjbw":{"duration":190.59,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=uz91AvGxjbw","annotations":[{"segment":[0.01,190.441],"label":"Croquet"}]},"EyOX-qduoFE":{"duration":180.466,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EyOX-qduoFE","annotations":[]},"CcMic7jNIuE":{"duration":60.56,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=CcMic7jNIuE","annotations":[{"segment":[13.78943447,17.57210770670827],"label":"Clean and jerk"},{"segment":[43.83579556942278,58.762639750390015],"label":"Clean and jerk"}]},"v8Dqgro-f-U":{"duration":159.289,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v8Dqgro-f-U","annotations":[{"segment":[0.01,153.7949438377535],"label":"Rope skipping"}]},"syfyM1Pble8":{"duration":80.71000000000001,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=syfyM1Pble8","annotations":[]},"1QdkjsSU44I":{"duration":154.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1QdkjsSU44I","annotations":[]},"xijxN7XB4d8":{"duration":234.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xijxN7XB4d8","annotations":[{"segment":[5.677126868954758,179.65359543369735],"label":"Playing violin"}]},"8QY00KU3gkw":{"duration":123.786,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=8QY00KU3gkw","annotations":[{"segment":[19.49426053042122,112.54446645865835],"label":"Elliptical trainer"}]},"48h91eAy4eI":{"duration":171.619,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=48h91eAy4eI","annotations":[]},"bMDyOXygNPM":{"duration":95.388,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bMDyOXygNPM","annotations":[{"segment":[0.2,93.26191277258566],"label":"Futsal"}]},"giWOxJiFmjk":{"duration":148.469,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=giWOxJiFmjk","annotations":[]},"pVoM_kUQ4_E":{"duration":51.735,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pVoM_kUQ4_E","annotations":[]},"DBGsPnuwdnU":{"duration":28.962,"subset":"training","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=DBGsPnuwdnU","annotations":[{"segment":[0.2,28.741],"label":"Riding bumper cars"}]},"PLPUWZ7XZDo":{"duration":145.799,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PLPUWZ7XZDo","annotations":[{"segment":[0.01,145.1706910006443],"label":"Kneeling"}]},"agX22QLvOcU":{"duration":57.77,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=agX22QLvOcU","annotations":[{"segment":[6.759447928937566,50.831047282371294],"label":"Layup drill in basketball"}]},"DSMSAIk_xhY":{"duration":190.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DSMSAIk_xhY","annotations":[{"segment":[0.5940830795631825,185.35392082371297],"label":"Playing guitarra"}]},"svSM-UqjNWE":{"duration":48.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=svSM-UqjNWE","annotations":[{"segment":[0.01,4.503439566154075],"label":"Smoking hookah"},{"segment":[12.159286828616,15.161579872718718],"label":"Smoking hookah"},{"segment":[25.519490327613102,44.433936099844],"label":"Smoking hookah"}]},"dTZEVW_vLWc":{"duration":204.057,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=dTZEVW_vLWc","annotations":[]},"s946M878MxI":{"duration":61.324,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s946M878MxI","annotations":[]},"FLL06mU-ua8":{"duration":141.317,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FLL06mU-ua8","annotations":[{"segment":[50.988272549883085,131.64645037985386],"label":"Using the monkey bar"}]},"zQCqzIgIpHI":{"duration":189.452,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zQCqzIgIpHI","annotations":[{"segment":[53.62914430577223,71.35778705148206],"label":"Skiing"},{"segment":[121.5889414976599,140.49949375975038],"label":"Skiing"}]},"HWgQhsTgj90":{"duration":22.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HWgQhsTgj90","annotations":[{"segment":[0.6685366614664586,21.23889547581903],"label":"Playing water polo"}]},"2mXGnG6ZBDA":{"duration":45.79,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=2mXGnG6ZBDA","annotations":[{"segment":[2.285915784930717,43.93244344773791],"label":"Hitting a pinata"}]},"Jd0KWW9LN4Q":{"duration":184.413,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=Jd0KWW9LN4Q","annotations":[{"segment":[9.493955978647309,68.75925693626384],"label":"Playing ten pins"},{"segment":[53.79908387900142,58.40221405046678],"label":"Playing ten pins"},{"segment":[63.58073549336531,68.47156130054726],"label":"Playing ten pins"},{"segment":[81.4178649077936,117.9552106437999],"label":"Playing ten pins"},{"segment":[150.46481707644307,159.095686124805],"label":"Playing ten pins"},{"segment":[168.30194644305774,172.6173809672387],"label":"Playing ten pins"}]},"Pi79Fa4YwDk":{"duration":77.11,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Pi79Fa4YwDk","annotations":[{"segment":[6.075242018720749,17.985122411856473],"label":"Cricket"},{"segment":[56.24110185647426,76.33150615600624],"label":"Cricket"}]},"5RW4JnGyjro":{"duration":140.063,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5RW4JnGyjro","annotations":[]},"VwIU_FcOI_Y":{"duration":69.196,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=VwIU_FcOI_Y","annotations":[]},"j6zAdpBqRu0":{"duration":142.896,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j6zAdpBqRu0","annotations":[{"segment":[5.1272927434104165,7.579476229389311],"label":"Skiing"},{"segment":[12.706768972799727,15.158952458778622],"label":"Skiing"},{"segment":[33.66179148934664,37.67445537549393],"label":"Skiing"},{"segment":[40.7954161758307,49.712447033935774],"label":"Skiing"},{"segment":[61.30458714947237,63.97969640690389],"label":"Skiing"},{"segment":[77.13231692260887,78.91572309422988],"label":"Skiing"},{"segment":[80.92205503730352,107.89607338307137],"label":"Skiing"}]},"JjRSGzdwtc0":{"duration":156.48,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JjRSGzdwtc0","annotations":[{"segment":[17.321452418096722,145.17721528861156],"label":"Rope skipping"}]},"QakbOSMJ0AA":{"duration":148.66,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=QakbOSMJ0AA","annotations":[{"segment":[66.64229054212169,120.41972151872075],"label":"Shaving"}]},"Ncfysce-svA":{"duration":34.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ncfysce-svA","annotations":[{"segment":[11.557700904836192,31.020433088923557],"label":"Playing guitarra"}]},"KoeytdNfQ_M":{"duration":73.677,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KoeytdNfQ_M","annotations":[{"segment":[3.5631581889125905,70.68846084455623],"label":"Disc dog"}]},"yPA6klGWEsc":{"duration":104.24000000000001,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yPA6klGWEsc","annotations":[{"segment":[30.571080343213726,36.42511700468019],"label":"Walking the dog"},{"segment":[94.8028714898596,99.19339898595943],"label":"Walking the dog"}]},"eaR8oEBlYWk":{"duration":133.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eaR8oEBlYWk","annotations":[{"segment":[11.704885719238293,46.6105270605382],"label":"Hand washing clothes"},{"segment":[51.62690665449747,89.66778435725429],"label":"Hand washing clothes"}]},"4innF37ifyY":{"duration":19.97,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=4innF37ifyY","annotations":[{"segment":[12.025230359999998,15.274488319999998],"label":"Plataform diving"}]},"TK5FnYshy10":{"duration":226.72,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=TK5FnYshy10","annotations":[{"segment":[0.01,226.581],"label":"Building sandcastles"}]},"p4cQt5On74M":{"duration":92.07,"subset":"training","resolution":"352x288","url":"https://www.youtube.com/watch?v=p4cQt5On74M","annotations":[{"segment":[2.333615774202139,90.24183082871927],"label":"Playing polo"}]},"adEkjcH5vbA":{"duration":123.41,"subset":"testing","resolution":"384x288","url":"https://www.youtube.com/watch?v=adEkjcH5vbA","annotations":[]},"iLaye6q55qk":{"duration":237.32999999999998,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iLaye6q55qk","annotations":[{"segment":[13.699274762870516,190.6790946723869],"label":"Skateboarding"}]},"UzmsAb15Bso":{"duration":158.43,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UzmsAb15Bso","annotations":[]},"JvP1O8ZIL3A":{"duration":228.485,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JvP1O8ZIL3A","annotations":[{"segment":[9.751490167972934,215.45809612435923],"label":"Braiding hair"}]},"_A5iOie5VkM":{"duration":145.5,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_A5iOie5VkM","annotations":[{"segment":[2.3824188767550702,144.8936794071763],"label":"Doing step aerobics"}]},"KSu59HUet3E":{"duration":152.834,"subset":"training","resolution":"1440x1440","url":"https://www.youtube.com/watch?v=KSu59HUet3E","annotations":[{"segment":[23.604587220223504,36.71824678701434],"label":"Plastering"},{"segment":[45.06330287497214,134.47461766614666],"label":"Plastering"},{"segment":[137.5742099173167,152.63374100000001],"label":"Plastering"}]},"Eq0Ibnq9sYA":{"duration":39.265,"subset":"training","resolution":"426x238","url":"https://www.youtube.com/watch?v=Eq0Ibnq9sYA","annotations":[{"segment":[2.0214400717628704,27.993882205928237],"label":"Curling"}]},"2VYZeOa6804":{"duration":190.29,"subset":"training","resolution":"632x480","url":"https://www.youtube.com/watch?v=2VYZeOa6804","annotations":[{"segment":[69.16847705304212,90.54242704368176],"label":"Playing water polo"},{"segment":[166.24183326053043,172.47590200780033],"label":"Playing water polo"}]},"tw0I1FEUigE":{"duration":185.04,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tw0I1FEUigE","annotations":[]},"2SnFlGUHKnc":{"duration":83.035,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2SnFlGUHKnc","annotations":[{"segment":[0.01,82.981],"label":"Baton twirling"}]},"iZT4XgzUHm4":{"duration":134.189,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=iZT4XgzUHm4","annotations":[]},"mTDRTiTnsjQ":{"duration":33.855,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mTDRTiTnsjQ","annotations":[{"segment":[0.2,33.65469387755102],"label":"Blowing leaves"}]},"oj6czGQ830c":{"duration":222.726,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oj6czGQ830c","annotations":[{"segment":[83.39187699219968,146.97818319875194],"label":"Scuba diving"},{"segment":[154.27497243556942,177.20773860842434],"label":"Scuba diving"},{"segment":[180.68240014976598,201.53036939781592],"label":"Scuba diving"}]},"jwndE_xn8sA":{"duration":8.52,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=jwndE_xn8sA","annotations":[{"segment":[0.5743896540662107,8.458973560608268],"label":"Smoking hookah"}]},"aPI0nPvzJlE":{"duration":34.041,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aPI0nPvzJlE","annotations":[]},"XNTy5ZTMqVU":{"duration":133.655,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XNTy5ZTMqVU","annotations":[{"segment":[13.65729254884481,23.66569014189139],"label":"Rock-paper-scissors"},{"segment":[48.061159274942426,48.686684124507835],"label":"Rock-paper-scissors"},{"segment":[50.56325867320407,50.77176695639254],"label":"Rock-paper-scissors"}]},"X0nwoVCrp6Q":{"duration":195.79,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=X0nwoVCrp6Q","annotations":[]},"nDo0nfs9Ee4":{"duration":17.32,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nDo0nfs9Ee4","annotations":[{"segment":[0.2501915607698981,17.221517830240305],"label":"Bathing dog"}]},"vHFm2rIPCwY":{"duration":112.69,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=vHFm2rIPCwY","annotations":[]},"yFPxSn69pcc":{"duration":93.81,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yFPxSn69pcc","annotations":[{"segment":[3.5837142230889234,84.7658119297972],"label":"Cricket"}]},"QtCNHRtycmg":{"duration":110.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QtCNHRtycmg","annotations":[{"segment":[6.86427457098284,98.33073322932917],"label":"Doing motocross"}]},"EE2nFMHRacg":{"duration":136.42000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EE2nFMHRacg","annotations":[{"segment":[15.322996418096725,122.15833255538223],"label":"Playing violin"}]},"OM0jqPYx700":{"duration":30.09,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=OM0jqPYx700","annotations":[{"segment":[0.01,0.046947051482059285],"label":"Polishing forniture"}]},"PRCMcOB2PwA":{"duration":202.57,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PRCMcOB2PwA","annotations":[]},"UlddctIswqo":{"duration":159.149,"subset":"training","resolution":"2276x720","url":"https://www.youtube.com/watch?v=UlddctIswqo","annotations":[{"segment":[19.67697763671162,46.4416318423191],"label":"Baking cookies"},{"segment":[75.68449477066801,78.16270349340944],"label":"Baking cookies"}]},"dpS_S4Zi2Po":{"duration":62.02,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=dpS_S4Zi2Po","annotations":[{"segment":[0,1.0643143331175424],"label":"Putting on makeup"},{"segment":[16.642005936019753,30.28458056961734],"label":"Putting on makeup"},{"segment":[45.0882247550702,50.21628465210609],"label":"Putting on makeup"}]},"VbbmHEKITMk":{"duration":101.699,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VbbmHEKITMk","annotations":[]},"5gaobaT6K1A":{"duration":66.038,"subset":"training","resolution":"308x172","url":"https://www.youtube.com/watch?v=5gaobaT6K1A","annotations":[{"segment":[0.01,66.001],"label":"Bullfighting"}]},"SrA6k_iQNGA":{"duration":134.235,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=SrA6k_iQNGA","annotations":[{"segment":[61.358396390277385,134.23458049886622],"label":"Carving jack-o-lanterns"}]},"Es3PxD9oi4I":{"duration":225.86,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Es3PxD9oi4I","annotations":[]},"tu-YAMiS5wA":{"duration":99.75,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=tu-YAMiS5wA","annotations":[]},"mrqSaH_FIcs":{"duration":232.293,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=mrqSaH_FIcs","annotations":[{"segment":[24.745565038009595,152.4529373121529],"label":"Trimming branches or hedges"}]},"CtQ25XC45As":{"duration":135.3,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CtQ25XC45As","annotations":[{"segment":[19.835466458658345,59.717414976599066],"label":"Dodgeball"}]},"90SltIDizo8":{"duration":82.314,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=90SltIDizo8","annotations":[{"segment":[5.562298326403182,81.17879365350599],"label":"Kneeling"}]},"Jc_cLD6yaRM":{"duration":134.49,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Jc_cLD6yaRM","annotations":[]},"xLmdKbYfI5E":{"duration":21.084,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=xLmdKbYfI5E","annotations":[]},"fZQclIXmRHE":{"duration":87.91,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fZQclIXmRHE","annotations":[{"segment":[31.80571498947972,33.03810863901243],"label":"Ironing clothes"},{"segment":[57.13825111876321,70.83151389134888],"label":"Ironing clothes"}]},"s24-UW2mnjM":{"duration":215.736,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=s24-UW2mnjM","annotations":[{"segment":[21.869609984399375,215.46800000000002],"label":"Rollerblading"}]},"xPnNcv7ErXk":{"duration":19.482,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xPnNcv7ErXk","annotations":[{"segment":[2.212549141965679,18.155026521060844],"label":"Using the monkey bar"}]},"l9XzNcLiMKc":{"duration":229.692,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l9XzNcLiMKc","annotations":[{"segment":[61.17958929366139,193.19870303261493],"label":"Making a cake"}]},"xI4LLnFt6Io":{"duration":184.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xI4LLnFt6Io","annotations":[{"segment":[7.465006066909344,174.2791799953198],"label":"Playing bagpipes"}]},"SIHG_BRgn2E":{"duration":187.71,"subset":"training","resolution":"512x288","url":"https://www.youtube.com/watch?v=SIHG_BRgn2E","annotations":[{"segment":[78.92005200208008,133.09522684580853],"label":"Playing piano"},{"segment":[29.4010173,78.62721321914101],"label":"Playing piano"}]},"xT7eQmyr1-Y":{"duration":215.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xT7eQmyr1-Y","annotations":[{"segment":[20.128549048361936,203.96929702340094],"label":"Blowing leaves"}]},"kH50-giCeDM":{"duration":30.094,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=kH50-giCeDM","annotations":[{"segment":[9.15916506217814,14.784036090215524],"label":"Rock-paper-scissors"}]},"mr2wnh2GwL8":{"duration":238.168,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mr2wnh2GwL8","annotations":[{"segment":[22.664885436233774,186.89241453822152],"label":"Building sandcastles"}]},"tSR4Pj_muN0":{"duration":47.97,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=tSR4Pj_muN0","annotations":[{"segment":[17.59576247041741,45.62969093075956],"label":"Bungee jumping"}]},"KoqE2gPCLe4":{"duration":67.13,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KoqE2gPCLe4","annotations":[{"segment":[23.667907751638065,28.380544250858033],"label":"Pole vault"}]},"45gAK3x_0ds":{"duration":188.43,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=45gAK3x_0ds","annotations":[{"segment":[4.260896255850234,7.199445397815912],"label":"Slacklining"},{"segment":[21.598336193447736,31.589403276131044],"label":"Slacklining"},{"segment":[57.742490639625586,123.8729976599064],"label":"Slacklining"},{"segment":[137.68564508580343,182.94410686427457],"label":"Slacklining"}]},"eCRYdEWeaXk":{"duration":87.934,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eCRYdEWeaXk","annotations":[]},"AOBkrb8yYS4":{"duration":113.128,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AOBkrb8yYS4","annotations":[{"segment":[25.9281856474259,39.50961622464899],"label":"Croquet"},{"segment":[54.32572230889236,70.72926833073323],"label":"Croquet"},{"segment":[78.13732137285491,100.00871606864276],"label":"Croquet"}]},"CiryRTSYkC4":{"duration":172.22,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CiryRTSYkC4","annotations":[{"segment":[91.08173920426205,133.53281403120124],"label":"Ping-pong"}]},"_5VJcnrEgbg":{"duration":67.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_5VJcnrEgbg","annotations":[{"segment":[0.01,59.70416536661467],"label":"Playing squash"}]},"jIsGq431gBs":{"duration":230.668,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=jIsGq431gBs","annotations":[{"segment":[59.191378072006955,117.30655464432422],"label":"Rafting"},{"segment":[132.37345227418425,136.31954451057615],"label":"Rafting"},{"segment":[152.46264911399763,189.05368621508626],"label":"Rafting"}]},"nOlCltba34g":{"duration":16.58,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=nOlCltba34g","annotations":[]},"eS_xTXii3is":{"duration":160.66,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eS_xTXii3is","annotations":[]},"dGT_KGb0gyw":{"duration":29.861,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dGT_KGb0gyw","annotations":[]},"b_c17BmZCCs":{"duration":236.055,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=b_c17BmZCCs","annotations":[]},"NW42KGlKi_c":{"duration":60.883,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NW42KGlKi_c","annotations":[{"segment":[6.1262644464962275,19.898486845596274],"label":"Canoeing"},{"segment":[20.65833359865007,22.367988393135725],"label":"Canoeing"},{"segment":[32.72090021840873,34.24059369734789],"label":"Canoeing"},{"segment":[41.2691760374415,43.45373541341654],"label":"Canoeing"}]},"-SggX9E_LOA":{"duration":40.798,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-SggX9E_LOA","annotations":[]},"mJQS2T-IVWc":{"duration":146.008,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=mJQS2T-IVWc","annotations":[{"segment":[0.01,146.00707482993198],"label":"Fixing the roof"}]},"jX9v6NXrLXs":{"duration":119.49,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jX9v6NXrLXs","annotations":[]},"U32mlg60EGA":{"duration":72.353,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=U32mlg60EGA","annotations":[]},"MOH9q_3Z9zg":{"duration":154.69,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MOH9q_3Z9zg","annotations":[{"segment":[69.01984809732525,114.3895384550076],"label":"Getting a haircut"}]},"CN01Gm2Yc4k":{"duration":17.555,"subset":"validation","resolution":"432x720","url":"https://www.youtube.com/watch?v=CN01Gm2Yc4k","annotations":[{"segment":[1.6411644199050477,17.261],"label":"Doing crunches"}]},"GMHzZXAQzIA":{"duration":97.687,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=GMHzZXAQzIA","annotations":[{"segment":[10.178826478264433,92.36907093893102],"label":"Using the monkey bar"}]},"tu6EkaMa9c8":{"duration":174.498,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tu6EkaMa9c8","annotations":[{"segment":[7.075609984399376,43.27007644305772],"label":"River tubing"},{"segment":[53.88349141965679,146.4274851794072],"label":"River tubing"},{"segment":[154.86475663026522,158.40296723868957],"label":"River tubing"}]},"_Rcb5kuhn90":{"duration":33.855,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_Rcb5kuhn90","annotations":[{"segment":[0,32.82006697819315],"label":"Playing rubik cube"}]},"mmoAdwG65kI":{"duration":189.66,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mmoAdwG65kI","annotations":[]},"oZjx7F1doGs":{"duration":144.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oZjx7F1doGs","annotations":[{"segment":[55.17854542753139,139.58253720625015],"label":"Tennis serve with ball bouncing"}]},"v-UmNR5yeDc":{"duration":230.691,"subset":"testing","resolution":"855x470","url":"https://www.youtube.com/watch?v=v-UmNR5yeDc","annotations":[]},"iaXlCCgLBdo":{"duration":36.36,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=iaXlCCgLBdo","annotations":[{"segment":[19.061687640000002,33.06251212],"label":"Javelin throw"}]},"BJWL_X79HRc":{"duration":170.9,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=BJWL_X79HRc","annotations":[{"segment":[19.729354149765992,154.63547847113884],"label":"Playing lacrosse"}]},"MFow119nrOk":{"duration":221.61,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=MFow119nrOk","annotations":[{"segment":[0,8.813770670826832],"label":"Skateboarding"}]},"W5cGKWhOTOU":{"duration":67.71,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=W5cGKWhOTOU","annotations":[{"segment":[2.5333291731669267,23.53885023400936],"label":"Putting on shoes"},{"segment":[31.24439313572543,54.88879875195008],"label":"Putting on shoes"}]},"Fq6JJ7a_3vY":{"duration":148.933,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Fq6JJ7a_3vY","annotations":[]},"Rvi30WJaH1w":{"duration":148.143,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Rvi30WJaH1w","annotations":[]},"kfwwya1qzXM":{"duration":209.31,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kfwwya1qzXM","annotations":[{"segment":[18.612063276131046,182.8553585023401],"label":"Playing guitarra"}]},"hwGvU9Csz98":{"duration":19.39,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hwGvU9Csz98","annotations":[{"segment":[0.01,0.02],"label":"Horseback riding"}]},"rHD_vDxPpDA":{"duration":131.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rHD_vDxPpDA","annotations":[{"segment":[7.381118695787831,131.424919],"label":"Cleaning shoes"}]},"dh4bxvmrCNs":{"duration":85.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dh4bxvmrCNs","annotations":[{"segment":[4.376384794167277,9.018005021528861],"label":"Archery"},{"segment":[16.444597392199686,49.59902761840873],"label":"Archery"}]},"sYAAXumvtck":{"duration":112.47999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sYAAXumvtck","annotations":[{"segment":[9.998653666146646,91.39120280811233],"label":"Doing nails"}]},"xQH8YS2_NxY":{"duration":93.855,"subset":"validation","resolution":"480x268","url":"https://www.youtube.com/watch?v=xQH8YS2_NxY","annotations":[{"segment":[6.179922489495799,77.52147814394864],"label":"Playing drums"}]},"WLGdtzCt4A8":{"duration":155.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WLGdtzCt4A8","annotations":[]},"MMnTMB6AmuU":{"duration":179.444,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MMnTMB6AmuU","annotations":[{"segment":[6.1572886115444625,90.1203151326053],"label":"Rope skipping"},{"segment":[97.95686427457099,168.4858065522621],"label":"Rope skipping"}]},"qPZBSTNEl78":{"duration":31.09,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=qPZBSTNEl78","annotations":[{"segment":[0.7725844867394696,11.588767301092044],"label":"Tennis serve with ball bouncing"},{"segment":[17.006451,27.2525428],"label":"Tennis serve with ball bouncing"}]},"Ox51OBhM5Ak":{"duration":32.16,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ox51OBhM5Ak","annotations":[{"segment":[0.2,21.506443218169355],"label":"Having an ice cream"}]},"XLdqEn8pqis":{"duration":142.06,"subset":"validation","resolution":"208x160","url":"https://www.youtube.com/watch?v=XLdqEn8pqis","annotations":[{"segment":[4.763261808112324,116.4229804726989],"label":"Playing accordion"}]},"qwBFQf5STBA":{"duration":229.46,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qwBFQf5STBA","annotations":[{"segment":[21.47335413416537,51.536049921996884],"label":"Doing nails"},{"segment":[64.06217316692668,122.75600780031202],"label":"Doing nails"}]},"hog7gGdk4i4":{"duration":13.073,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hog7gGdk4i4","annotations":[{"segment":[0.2,12.814],"label":"Swinging at the playground"}]},"MHAS5TK2eW0":{"duration":70.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MHAS5TK2eW0","annotations":[{"segment":[1e-06,69.98034321372855],"label":"Doing step aerobics"}]},"zPZGjZES31A":{"duration":110.74000000000001,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=zPZGjZES31A","annotations":[{"segment":[11.39469571918877,77.5167147],"label":"Mowing the lawn"}]},"ah3tGziTbds":{"duration":127.46,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ah3tGziTbds","annotations":[{"segment":[7.953468397239291,115.92180188976266],"label":"Sailing"}]},"QQQ7jSVSG3A":{"duration":209.44,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QQQ7jSVSG3A","annotations":[{"segment":[7.188385070096682,202.90850452418098],"label":"Belly dance"}]},"KfP205pf7PU":{"duration":103.051,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KfP205pf7PU","annotations":[{"segment":[27.405249720741068,29.969483338381636],"label":"Knitting"},{"segment":[49.521768181305426,71.79855173184974],"label":"Knitting"},{"segment":[87.9852793477129,98.56274491847495],"label":"Knitting"}]},"BzU_kAzMMxo":{"duration":193.794,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=BzU_kAzMMxo","annotations":[]},"pbz8c7TAlDs":{"duration":45.464,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pbz8c7TAlDs","annotations":[{"segment":[0.992988112324493,40.00323538221529],"label":"River tubing"}]},"ac1M42uRXS8":{"duration":141.735,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ac1M42uRXS8","annotations":[{"segment":[0.01,112.10521641341653],"label":"Baton twirling"}]},"hZD-CjWt0Rg":{"duration":167.091,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hZD-CjWt0Rg","annotations":[{"segment":[11.29554614915937,158.08558771925283],"label":"Futsal"}]},"h-lKlgls9OI":{"duration":49.83,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h-lKlgls9OI","annotations":[{"segment":[12.658819032761311,13.746079563182526],"label":"Playing water polo"},{"segment":[15.8429391575663,16.85253822152886],"label":"Playing water polo"},{"segment":[19.104720748829955,20.890934477379094],"label":"Playing water polo"},{"segment":[28.11345085803432,28.967726989079562],"label":"Playing water polo"},{"segment":[30.210310452418096,36.65621216848674],"label":"Playing water polo"}]},"pzTqZWPubk8":{"duration":212.42000000000002,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=pzTqZWPubk8","annotations":[]},"D2IDRG8_6To":{"duration":212.974,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D2IDRG8_6To","annotations":[{"segment":[86.68009516380656,90.00117160686428],"label":"Shuffleboard"},{"segment":[111.25606084243371,117.89821372854915],"label":"Shuffleboard"},{"segment":[144.46682527301093,151.78745241809673],"label":"Shuffleboard"}]},"dStkNaVlExA":{"duration":117.17,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=dStkNaVlExA","annotations":[]},"jsu65VwKf74":{"duration":145.984,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jsu65VwKf74","annotations":[{"segment":[2.2774392336237668,53.51982199015852],"label":"Futsal"},{"segment":[62.857322848015954,145.3006231051963],"label":"Futsal"}]},"uiCkFmebAZs":{"duration":235.729,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uiCkFmebAZs","annotations":[{"segment":[4.780774937119935,16.181084389391575],"label":"Swimming"},{"segment":[27.213641927613104,29.420153435257408],"label":"Swimming"},{"segment":[77.59565468549143,82.74418153666146],"label":"Swimming"},{"segment":[118.41611757691106,125.4034040177847],"label":"Swimming"},{"segment":[146.3652633404056,156.294565124805],"label":"Swimming"},{"segment":[162.91409964773788,170.26913800655225],"label":"Swimming"},{"segment":[180.93394362683307,182.77270321653666],"label":"Swimming"},{"segment":[188.65673390358813,190.86324541123244],"label":"Swimming"},{"segment":[194.17301267269892,200.7925471956318],"label":"Swimming"},{"segment":[209.61859322620904,215.50262391326052],"label":"Swimming"}]},"JoiZmVQCLCI":{"duration":186.32,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=JoiZmVQCLCI","annotations":[{"segment":[6.245397815912636,177.0497659906396],"label":"Pole vault"}]},"_il2j9UtSe4":{"duration":8.82,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_il2j9UtSe4","annotations":[{"segment":[2.298659906396256,4.913901716068643],"label":"Drinking coffee"}]},"uavot-yVwhg":{"duration":98.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uavot-yVwhg","annotations":[{"segment":[44.05720143907797,76.33521939625585],"label":"Cleaning shoes"}]},"gMwjhe0Njkc":{"duration":134.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gMwjhe0Njkc","annotations":[{"segment":[12.63871334,44.28331448],"label":"Discus throw"},{"segment":[46.63766302652107,92.22492823712949],"label":"Discus throw"}]},"1sA-lEbrgak":{"duration":70.17,"subset":"training","resolution":"470x360","url":"https://www.youtube.com/watch?v=1sA-lEbrgak","annotations":[{"segment":[6.91279191944681,15.762357340007558],"label":"Clean and jerk"},{"segment":[32.36894924031908,66.8931797699141],"label":"Clean and jerk"}]},"0ivHmKR8cUw":{"duration":92.22999999999999,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=0ivHmKR8cUw","annotations":[{"segment":[13.520895475819032,81.26921216848673],"label":"Mixing drinks"}]},"I58i4uJkHJ0":{"duration":158.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I58i4uJkHJ0","annotations":[{"segment":[60.852703469999994,123.31176466],"label":"Using the balance beam"},{"segment":[8.21274194,53.21615506],"label":"Using the balance beam"}]},"ML6CRJRp3ik":{"duration":188.918,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ML6CRJRp3ik","annotations":[]},"wpxozv4Yois":{"duration":173.779,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wpxozv4Yois","annotations":[{"segment":[24.94163617472699,119.82829553510142],"label":"Snowboarding"},{"segment":[126.33480931981279,173.23592951794072],"label":"Snowboarding"}]},"EdtXUHBGnRY":{"duration":229.16,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=EdtXUHBGnRY","annotations":[]},"beASFdYJFUU":{"duration":173.662,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=beASFdYJFUU","annotations":[{"segment":[0.01,13.00433378967812],"label":"Curling"},{"segment":[29.801598268012352,34.407299818523356],"label":"Curling"},{"segment":[67.73090515457353,79.38062084116018],"label":"Curling"},{"segment":[95.63603807825783,103.76374669680666],"label":"Curling"},{"segment":[134.10719157566302,149.54983787831512],"label":"Curling"}]},"HafSt7EDin0":{"duration":178.75,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=HafSt7EDin0","annotations":[{"segment":[69.3153552,168.29008076241416],"label":"Hopscotch"}]},"8gUKEh27AFM":{"duration":25.75,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8gUKEh27AFM","annotations":[{"segment":[1.0228642745709828,23.485765990639628],"label":"Hammer throw"}]},"FMmrxy3OanA":{"duration":213.276,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FMmrxy3OanA","annotations":[{"segment":[106.3689584574833,119.98587590296617],"label":"Hula hoop"}]},"1ASxtL1Brn4":{"duration":35.295,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1ASxtL1Brn4","annotations":[]},"Zre8db8Q3ss":{"duration":138.229,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zre8db8Q3ss","annotations":[]},"dSHw52PBi5I":{"duration":148.79,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dSHw52PBi5I","annotations":[{"segment":[7.97084101,144.33763845865835],"label":"Plataform diving"}]},"K-PpTJmmjhA":{"duration":208.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K-PpTJmmjhA","annotations":[{"segment":[0.01,208.56108843537416],"label":"Ironing clothes"}]},"AyaS46-OPPU":{"duration":15.28,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=AyaS46-OPPU","annotations":[]},"ebBqY4VYHlA":{"duration":122.346,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ebBqY4VYHlA","annotations":[]},"oKaNlV45bB8":{"duration":95.53,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=oKaNlV45bB8","annotations":[{"segment":[1.8261301541790353,10.15869394234103],"label":"Brushing teeth"},{"segment":[17.896074602777166,20.87199024140645],"label":"Brushing teeth"},{"segment":[34.11481483330677,95.12108542520708],"label":"Brushing teeth"}]},"kRMskyrrRcA":{"duration":49.087,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kRMskyrrRcA","annotations":[{"segment":[0.2,29.023349354134165],"label":"Playing rubik cube"}]},"kn41Y1h8-Uo":{"duration":71.31,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=kn41Y1h8-Uo","annotations":[]},"j0FMv4RDpEk":{"duration":195.234,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j0FMv4RDpEk","annotations":[{"segment":[27.326600437997907,186.12451187214128],"label":"Fixing the roof"}]},"zwa44U585FE":{"duration":143.453,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=zwa44U585FE","annotations":[{"segment":[9.608201022880595,133.84447471408086],"label":"Croquet"}]},"mZ1Di2gg-I4":{"duration":209.05,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mZ1Di2gg-I4","annotations":[{"segment":[101.56633222932916,205.5780817675507],"label":"Tai chi"},{"segment":[6.032029361934478,99.0683211],"label":"Tai chi"}]},"rsU73H6VyG8":{"duration":86.82,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=rsU73H6VyG8","annotations":[]},"-E2dqOULQgY":{"duration":68.337,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-E2dqOULQgY","annotations":[{"segment":[0,22.909071762870514],"label":"Playing drums"},{"segment":[31.220269890795635,67.98133853354135],"label":"Playing drums"}]},"hgzyQ_jHeJc":{"duration":143.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hgzyQ_jHeJc","annotations":[{"segment":[14.946957878315132,120.91419656786272],"label":"Using the pommel horse"}]},"BN3ejP3RbkQ":{"duration":55.47,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BN3ejP3RbkQ","annotations":[{"segment":[0.01,54.74906864274571],"label":"Playing flauta"}]},"DFzoAQ8AzWA":{"duration":59.68,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=DFzoAQ8AzWA","annotations":[]},"YCqbvmEG-Uw":{"duration":17.276,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=YCqbvmEG-Uw","annotations":[{"segment":[0.24179251170046803,16.434088923556942],"label":"Removing ice from car"}]},"2-1MNxfX5Bc":{"duration":68.383,"subset":"validation","resolution":"626x480","url":"https://www.youtube.com/watch?v=2-1MNxfX5Bc","annotations":[{"segment":[0.01,6.93428984399376],"label":"Rafting"}]},"ycBFz2RdgoY":{"duration":198.76,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ycBFz2RdgoY","annotations":[{"segment":[3.799629122973647,10.3012163659643],"label":"Windsurfing"},{"segment":[31.66357445007645,184.91527374914187],"label":"Windsurfing"}]},"oey6DFvL9Xk":{"duration":61.719,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=oey6DFvL9Xk","annotations":[{"segment":[34.45870905121001,42.820160276849016],"label":"Putting in contact lenses"},{"segment":[49.540813465029416,54.91733601557374],"label":"Putting in contact lenses"}]},"ze6Bu0bcNbg":{"duration":233.59,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ze6Bu0bcNbg","annotations":[{"segment":[65.59546626763029,204.07478240249608],"label":"Shaving"}]},"cIpBpGQ0XTI":{"duration":226.558,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=cIpBpGQ0XTI","annotations":[{"segment":[18.698680685358255,216.97525700934582],"label":"Changing car wheel"}]},"mH52O_EaYtA":{"duration":199.69,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mH52O_EaYtA","annotations":[]},"QXZkejzunpk":{"duration":71.332,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QXZkejzunpk","annotations":[{"segment":[14.577915565602215,71.13163265306122],"label":"BMX"}]},"2ooY3GqZieg":{"duration":142.988,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2ooY3GqZieg","annotations":[{"segment":[23.19934944149766,142.9882993197279],"label":"Longboarding"}]},"itgR5a-hH_o":{"duration":214.57999999999998,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=itgR5a-hH_o","annotations":[{"segment":[3.012755897991022,212.23191442433696],"label":"Playing piano"}]},"rgU4olNYrJs":{"duration":138.995,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rgU4olNYrJs","annotations":[]},"PdwWFueQErM":{"duration":43.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PdwWFueQErM","annotations":[]},"rxEQaswh_Uo":{"duration":59.65,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=rxEQaswh_Uo","annotations":[{"segment":[0.01,59.465940121684866],"label":"Polishing shoes"}]},"I9ficvPdpZg":{"duration":187.55,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I9ficvPdpZg","annotations":[]},"pKOy4HJwCqo":{"duration":58.956,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pKOy4HJwCqo","annotations":[]},"rgwKbJe5_wg":{"duration":38.55,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rgwKbJe5_wg","annotations":[]},"x3cOxXOYbwA":{"duration":37.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x3cOxXOYbwA","annotations":[{"segment":[9.26468096723869,22.59889469578783],"label":"Snatch"}]},"Hj_YKQiU-8Y":{"duration":121.79,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Hj_YKQiU-8Y","annotations":[{"segment":[37.71458569907422,70.39422670784383],"label":"Brushing hair"}]},"42i3phJnY4M":{"duration":51.804,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=42i3phJnY4M","annotations":[{"segment":[0.01,50.280802926809244],"label":"Bullfighting"}]},"TaLEPzEyZ34":{"duration":167.81,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=TaLEPzEyZ34","annotations":[{"segment":[17.584178476418973,22.289134290520312],"label":"Drinking coffee"},{"segment":[33.0059780893067,34.57429669400715],"label":"Drinking coffee"},{"segment":[72.99810250916809,86.06742421500516],"label":"Drinking coffee"}]},"A_ndiCY-rDc":{"duration":150.141,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=A_ndiCY-rDc","annotations":[{"segment":[12.64265522620905,86.39147737909516],"label":"Doing crunches"},{"segment":[96.69290015600625,139.53745397815914],"label":"Doing crunches"}]},"x4DuEusRR7Q":{"duration":31.741,"subset":"training","resolution":"256x144","url":"https://www.youtube.com/watch?v=x4DuEusRR7Q","annotations":[{"segment":[1.6799283489096573,30.63398753894081],"label":"Swimming"}]},"8vcO5DRxP6c":{"duration":203.17000000000002,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8vcO5DRxP6c","annotations":[{"segment":[27.5686848673947,49.1166224648986],"label":"Shot put"},{"segment":[88.40992043681747,99.81765210608425],"label":"Shot put"},{"segment":[148.61739313572542,176.52033853354135],"label":"Shot put"}]},"hIHeMA1oRrY":{"duration":21.061,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hIHeMA1oRrY","annotations":[]},"WaVrNbTmbU4":{"duration":107.625,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WaVrNbTmbU4","annotations":[{"segment":[45.850942081688366,55.08549228082716],"label":"Painting furniture"},{"segment":[59.954618928253055,74.22619703277724],"label":"Painting furniture"}]},"WKLLTGbDI7w":{"duration":79.436,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WKLLTGbDI7w","annotations":[{"segment":[8.548822873381026,79.434875],"label":"Making a lemonade"}]},"Pr5K2Jh2X94":{"duration":109.83,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=Pr5K2Jh2X94","annotations":[]},"l4C-l6XeNRc":{"duration":238.86,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l4C-l6XeNRc","annotations":[{"segment":[20.489287542901714,181.35673275],"label":"Mowing the lawn"},{"segment":[198.26823315,224.6370979703588],"label":"Mowing the lawn"}]},"pLfUo8hHnzs":{"duration":20.898,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pLfUo8hHnzs","annotations":[]},"huv327wdIU8":{"duration":115.125,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=huv327wdIU8","annotations":[{"segment":[27.313976941687123,100.54249152222125],"label":"Futsal"}]},"sZbkKa2iKrM":{"duration":59.952,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sZbkKa2iKrM","annotations":[{"segment":[9.257658346333855,38.259046801872074],"label":"Doing a powerbomb"}]},"q0buJ1CKj-Y":{"duration":48.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=q0buJ1CKj-Y","annotations":[{"segment":[1.364072686427457,45.31752591575663],"label":"Playing guitarra"}]},"tD-wp39rjnE":{"duration":33.81,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=tD-wp39rjnE","annotations":[{"segment":[1.7644552433697347,29.94306883151326],"label":"Javelin throw"}]},"DguywhRJ7ds":{"duration":4.81,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=DguywhRJ7ds","annotations":[{"segment":[0.60680556,2.00251602],"label":"Springboard diving"}]},"_lxs63ftS9M":{"duration":18.321,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=_lxs63ftS9M","annotations":[]},"WgZOr6_3VtI":{"duration":217.502,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=WgZOr6_3VtI","annotations":[]},"ESgcgoYgg0o":{"duration":173.407,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ESgcgoYgg0o","annotations":[{"segment":[87.91676824242417,98.70410227401503],"label":"Putting in contact lenses"},{"segment":[124.32402059904332,131.60547107036717],"label":"Putting in contact lenses"}]},"c6qtZXZoOmA":{"duration":222.29,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=c6qtZXZoOmA","annotations":[]},"RllrUfp2EIU":{"duration":155.69,"subset":"validation","resolution":"810x480","url":"https://www.youtube.com/watch?v=RllrUfp2EIU","annotations":[{"segment":[48.33427361584259,133.62475492],"label":"Starting a campfire"}]},"YXsAQqmWWtE":{"duration":210.26,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=YXsAQqmWWtE","annotations":[]},"1aT9uLUKuZc":{"duration":54.01,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1aT9uLUKuZc","annotations":[]},"Jy_cnXCPVI4":{"duration":236.17000000000002,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Jy_cnXCPVI4","annotations":[]},"akMN3uX51Ws":{"duration":92.798,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=akMN3uX51Ws","annotations":[{"segment":[14.993226145372345,20.20826132637142],"label":"Rollerblading"},{"segment":[57.43781803517039,84.38216737906863],"label":"Rollerblading"}]},"7R6rLXMQAWc":{"duration":169.88,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=7R6rLXMQAWc","annotations":[]},"Ycf5NG0xedg":{"duration":237.261,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ycf5NG0xedg","annotations":[{"segment":[78.82694695787832,112.5041872074883],"label":"Rafting"},{"segment":[125.45697191887676,161.73840405616224],"label":"Rafting"}]},"PBYg9ekd86w":{"duration":74.327,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PBYg9ekd86w","annotations":[]},"hQIwXufRVGY":{"duration":56.87,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=hQIwXufRVGY","annotations":[{"segment":[0.01,55.534959052783876],"label":"Walking the dog"}]},"jHfG1I40Cdw":{"duration":217.25,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jHfG1I40Cdw","annotations":[]},"KAbo60mowhw":{"duration":219.87,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KAbo60mowhw","annotations":[{"segment":[33.92080650893717,203.86678789578946],"label":"Disc dog"}]},"c-3ed17J3F8":{"duration":224.0,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=c-3ed17J3F8","annotations":[{"segment":[66.04765767773569,146.77257163806553],"label":"Polishing shoes"}]},"j46ll2_jR7k":{"duration":219.917,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=j46ll2_jR7k","annotations":[{"segment":[37.72897035881435,204.0794305772231],"label":"Hula hoop"}]},"RDC4sqe8xVk":{"duration":192.587,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RDC4sqe8xVk","annotations":[{"segment":[20.430355489049493,105.45668789200549],"label":"Skiing"},{"segment":[133.6986498915739,153.2276652730109],"label":"Skiing"},{"segment":[162.54150375039,181.7700735101404],"label":"Skiing"}]},"oUDqawe5d00":{"duration":167.556,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oUDqawe5d00","annotations":[]},"2wWWlklKCSs":{"duration":41.91,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2wWWlklKCSs","annotations":[]},"e0-lO2jb8vo":{"duration":188.756,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=e0-lO2jb8vo","annotations":[{"segment":[15.015243369734788,188.701],"label":"Baton twirling"}]},"2hgXJR5QdkU":{"duration":197.07,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=2hgXJR5QdkU","annotations":[{"segment":[5.223322932917316,193.57020280811233],"label":"Smoking hookah"}]},"AB480dHyDeM":{"duration":212.6,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AB480dHyDeM","annotations":[]},"BpSvtcblt9c":{"duration":60.233,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BpSvtcblt9c","annotations":[]},"bwRsZtPzipc":{"duration":228.86,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=bwRsZtPzipc","annotations":[{"segment":[13.206921996879874,214.5419984399376],"label":"Belly dance"}]},"5qBzjfW-sMs":{"duration":12.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5qBzjfW-sMs","annotations":[{"segment":[4.438524705148206,6.083156627145086],"label":"Chopping wood"}]},"k7nuduqYOdQ":{"duration":195.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k7nuduqYOdQ","annotations":[{"segment":[11.136865536661468,178.95264759594386],"label":"Snatch"}]},"MldEr60j33M":{"duration":25.87,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MldEr60j33M","annotations":[{"segment":[0,22.679047950078004],"label":"Clean and jerk"}]},"L2FgftH2VD8":{"duration":90.77,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=L2FgftH2VD8","annotations":[{"segment":[3.3952324492979717,60.265375975039],"label":"Hand washing clothes"},{"segment":[81.50355070202808,89.4275070202808],"label":"Hand washing clothes"}]},"YigV1ARspVU":{"duration":36.2,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YigV1ARspVU","annotations":[{"segment":[0.2,33.77907469030251],"label":"Riding bumper cars"}]},"7RESODKApso":{"duration":54.103,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=7RESODKApso","annotations":[{"segment":[32.50229088297528,53.60310757564299],"label":"Ice fishing"}]},"yiFKPKXevOU":{"duration":49.25,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yiFKPKXevOU","annotations":[{"segment":[3.0660426209048364,44.380966937597506],"label":"Chopping wood"}]},"SO67XxdevPw":{"duration":64.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SO67XxdevPw","annotations":[{"segment":[1.2501950078003121,62.45974258970359],"label":"Using parallel bars"}]},"hMOh-yKfZiE":{"duration":198.252,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hMOh-yKfZiE","annotations":[]},"KnED67CTI5c":{"duration":4.46,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=KnED67CTI5c","annotations":[]},"Tovbyt_ZiIc":{"duration":203.454,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Tovbyt_ZiIc","annotations":[{"segment":[78.34308716395488,190.81538537975558],"label":"Braiding hair"}]},"uuZN3GbSeAw":{"duration":224.03,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uuZN3GbSeAw","annotations":[]},"Ok_Tjkob8IA":{"duration":197.068,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ok_Tjkob8IA","annotations":[{"segment":[31.899975077881617,172.09330373831776],"label":"Making a lemonade"}]},"GwJ9DmiW8dk":{"duration":97.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GwJ9DmiW8dk","annotations":[{"segment":[0.2,91.381228099844],"label":"Using the balance beam"}]},"gt2Sp_iG2hU":{"duration":214.599,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gt2Sp_iG2hU","annotations":[{"segment":[78.67496659360374,85.37070843135726],"label":"Playing drums"},{"segment":[111.14931450670828,121.8625014471139],"label":"Playing drums"},{"segment":[134.91919803073324,142.2845140522621],"label":"Playing drums"},{"segment":[165.3848233925117,178.77630706801872],"label":"Playing drums"},{"segment":[187.48077145709829,197.8591713056162],"label":"Playing drums"}]},"mkEME_iWi9o":{"duration":88.469,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mkEME_iWi9o","annotations":[{"segment":[0.19999999999999998,21.932967510140404],"label":"Hurling"},{"segment":[36.417002658346334,50.625151422776916],"label":"Hurling"}]},"VwK2kBm9BDE":{"duration":97.92,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=VwK2kBm9BDE","annotations":[]},"4JnXF13ktSs":{"duration":35.02,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=4JnXF13ktSs","annotations":[{"segment":[4.18577986,22.29362228],"label":"Mowing the lawn"},{"segment":[23.12817472698908,34.965],"label":"Mowing the lawn"}]},"BxKFlAS3V6g":{"duration":17.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=BxKFlAS3V6g","annotations":[]},"R7QwxoVTtE0":{"duration":72.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=R7QwxoVTtE0","annotations":[{"segment":[26.28170041495537,68.40053779702208],"label":"Cleaning windows"}]},"90vop6PS2Y0":{"duration":271.233,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=90vop6PS2Y0","annotations":[{"segment":[0.2,38.97732258128229],"label":"Raking leaves"}]},"jZMxjRSL7dw":{"duration":94.78,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jZMxjRSL7dw","annotations":[]},"S2V2TgLAMKg":{"duration":40.055,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=S2V2TgLAMKg","annotations":[{"segment":[2.593226994385898,35.2116493450338],"label":"Waxing skis"}]},"knHJztfWFrA":{"duration":166.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=knHJztfWFrA","annotations":[{"segment":[0.3303272122994182,154.49867213753308],"label":"Playing flauta"}]},"hP8wUI0qdaQ":{"duration":162.053,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hP8wUI0qdaQ","annotations":[{"segment":[33.3831728046137,38.68318165508099],"label":"Elliptical trainer"},{"segment":[62.91179354293146,65.43560728124922],"label":"Elliptical trainer"},{"segment":[77.29753185134267,80.57848971115575],"label":"Elliptical trainer"},{"segment":[88.14993092610902,91.93565153358567],"label":"Elliptical trainer"}]},"UF5gg_E5Vd0":{"duration":87.15,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UF5gg_E5Vd0","annotations":[]},"AEer099-yUM":{"duration":47.137,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AEer099-yUM","annotations":[{"segment":[6.576504680187208,20.46431903276131],"label":"Throwing darts"},{"segment":[23.03613650546022,30.751588923556945],"label":"Throwing darts"},{"segment":[35.16041887675507,42.06758580343214],"label":"Throwing darts"}]},"FFMugev85IY":{"duration":92.927,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=FFMugev85IY","annotations":[]},"8QbHeaXOJOA":{"duration":210.559,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8QbHeaXOJOA","annotations":[{"segment":[48.616133192579326,108.62907471058449],"label":"Kite flying"},{"segment":[162.7349391965658,203.06840181338825],"label":"Kite flying"}]},"6DYQHmsezUw":{"duration":96.363,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=6DYQHmsezUw","annotations":[{"segment":[0.01,96.36240362811792],"label":"River tubing"}]},"LQW4hk1FLgw":{"duration":39.339,"subset":"training","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=LQW4hk1FLgw","annotations":[{"segment":[4.697529641185647,34.78628159126365],"label":"Removing ice from car"}]},"TvNNx-ZFfDA":{"duration":228.578,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TvNNx-ZFfDA","annotations":[{"segment":[23.46204343258817,34.1266110948008],"label":"Beer pong"},{"segment":[116.59926768257847,118.02121003754016],"label":"Beer pong"},{"segment":[142.5497156606292,153.92525450032267],"label":"Beer pong"},{"segment":[171.3440483486033,189.47381208116366],"label":"Beer pong"}]},"8eMjzrhxv2U":{"duration":76.347,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=8eMjzrhxv2U","annotations":[{"segment":[0.2,72.39795794392523],"label":"Ice fishing"}]},"UFVeN-ThOwU":{"duration":143.76,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UFVeN-ThOwU","annotations":[{"segment":[0,21.978107124284968],"label":"Washing dishes"},{"segment":[26.01490231037813,98.67721566005496],"label":"Washing dishes"},{"segment":[114.15159648829953,126.37879475232177],"label":"Washing dishes"}]},"oRR1nIUPbjo":{"duration":203.211,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oRR1nIUPbjo","annotations":[{"segment":[0.01,179.14378315132606],"label":"Rafting"}]},"9GYLUAFgCXE":{"duration":180.75,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9GYLUAFgCXE","annotations":[{"segment":[21.147911232449296,41.167933865834634],"label":"Playing flauta"}]},"pqcY83EZ9Vc":{"duration":31.162,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=pqcY83EZ9Vc","annotations":[]},"RfsugL4sLso":{"duration":107.74000000000001,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RfsugL4sLso","annotations":[{"segment":[0.01,24.53997792199688],"label":"Brushing teeth"},{"segment":[42.02051014040562,104.21086514820593],"label":"Brushing teeth"}]},"6gKir83gUhU":{"duration":128.151,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=6gKir83gUhU","annotations":[]},"3akCh2r4Bnk":{"duration":40.38,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=3akCh2r4Bnk","annotations":[]},"D7Oc3SLX0wo":{"duration":71.727,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=D7Oc3SLX0wo","annotations":[{"segment":[0.01,8.16853350078003],"label":"Playing beach volleyball"},{"segment":[12.86823770670827,20.02969173478939],"label":"Playing beach volleyball"},{"segment":[37.5976336474259,63.22221134165367],"label":"Playing beach volleyball"}]},"7lv1VOxIng4":{"duration":233.8,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=7lv1VOxIng4","annotations":[{"segment":[30.638606209048366,223.22413095163807],"label":"Putting on makeup"}]},"6sorAV7KJxA":{"duration":201.225,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6sorAV7KJxA","annotations":[{"segment":[61.86460702026224,188.16367711372018],"label":"Fixing the roof"}]},"5Wp2dxIAocI":{"duration":213.25,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=5Wp2dxIAocI","annotations":[{"segment":[20.947745709828393,180.55620336],"label":"Wrapping presents"}]},"_xgGaxc1jNE":{"duration":184.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_xgGaxc1jNE","annotations":[{"segment":[0,1.7248695070202809],"label":"Playing water polo"}]},"MWn4qozlaMs":{"duration":41.22,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=MWn4qozlaMs","annotations":[{"segment":[9.251269890795632,41.181],"label":"Removing curlers"}]},"2gc5VDx2xY8":{"duration":19.273,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2gc5VDx2xY8","annotations":[{"segment":[6.0034321372854915,14.198117004680189],"label":"Calf roping"}]},"8IrulONeC28":{"duration":104.769,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=8IrulONeC28","annotations":[]},"UxlSiLBleX4":{"duration":230.18,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UxlSiLBleX4","annotations":[{"segment":[15.516366390000002,207.6929310156006],"label":"Playing guitarra"}]},"vBOejU7dBzY":{"duration":107.7,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=vBOejU7dBzY","annotations":[{"segment":[0,58.05944773790952],"label":"Grooming horse"}]},"7n5UbaZ0a0c":{"duration":215.18,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7n5UbaZ0a0c","annotations":[]},"6g7FGRgUDYc":{"duration":50.016,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6g7FGRgUDYc","annotations":[]},"6QImnhUemH0":{"duration":62.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6QImnhUemH0","annotations":[{"segment":[0,1.8413400936037443],"label":"Mooping floor"},{"segment":[16.184410296411855,39.44344305772231],"label":"Mooping floor"},{"segment":[51.169872074882996,61.53952418096724],"label":"Mooping floor"}]},"Sjx7K9Ybx9Q":{"duration":18.321,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=Sjx7K9Ybx9Q","annotations":[{"segment":[0.01,18.320544],"label":"Cutting the grass"}]},"RtF6TGqwa7Y":{"duration":49.9,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=RtF6TGqwa7Y","annotations":[]},"PKNBIy7FyP8":{"duration":71.054,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PKNBIy7FyP8","annotations":[]},"vnZmsOgWs0o":{"duration":237.819,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vnZmsOgWs0o","annotations":[]},"4rmo35vpG00":{"duration":60.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4rmo35vpG00","annotations":[{"segment":[26.799666645865834,31.234822745709828],"label":"Bathing dog"},{"segment":[32.08410795631825,38.78402461778471],"label":"Bathing dog"},{"segment":[45.106481185647425,56.99647413416536],"label":"Bathing dog"}]},"IfugNwywj-k":{"duration":65.09,"subset":"testing","resolution":"386x240","url":"https://www.youtube.com/watch?v=IfugNwywj-k","annotations":[]},"RGMSc1tfkzA":{"duration":124.11,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RGMSc1tfkzA","annotations":[{"segment":[3.580611014820593,109.83766221138845],"label":"Plataform diving"}]},"ToLMOwlrgm0":{"duration":218.8,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ToLMOwlrgm0","annotations":[{"segment":[36.36202418096724,61.28622854914196],"label":"Brushing hair"},{"segment":[71.87047971918876,75.62618174726988],"label":"Brushing hair"},{"segment":[102.9403783151326,131.27885725429016],"label":"Brushing hair"},{"segment":[149.37451248049922,151.7645046801872],"label":"Brushing hair"},{"segment":[168.49445007800313,189.6629524180967],"label":"Brushing hair"},{"segment":[197.17435647425896,211.8557371294852],"label":"Brushing hair"}]},"FjY3eKNvzug":{"duration":163.237,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FjY3eKNvzug","annotations":[{"segment":[18.335432386895476,118.67099294851793],"label":"Using the rowing machine"}]},"3cjtV-ldvto":{"duration":212.138,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3cjtV-ldvto","annotations":[{"segment":[7.942740828000467,188.3091456973479],"label":"Baking cookies"}]},"K2B6AMIhaT8":{"duration":231.527,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K2B6AMIhaT8","annotations":[]},"04LdesS7Pxk":{"duration":185.13,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=04LdesS7Pxk","annotations":[{"segment":[13.57097503900156,164.87290951638064],"label":"Shaving"}]},"VhdidrZKuTU":{"duration":215.13,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=VhdidrZKuTU","annotations":[]},"Q5qIsUsM_-A":{"duration":49.807,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Q5qIsUsM_-A","annotations":[{"segment":[9.160371294851794,49.781],"label":"Playing rubik cube"}]},"fsz79uPkUSs":{"duration":199.18,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=fsz79uPkUSs","annotations":[{"segment":[19.88502964118565,150.06983307332294],"label":"Putting on makeup"}]},"GuwWFip-AF0":{"duration":134.258,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=GuwWFip-AF0","annotations":[{"segment":[6.490219968798752,133.57291419656787],"label":"Playing pool"}]},"mbGDPga031E":{"duration":187.734,"subset":"training","resolution":"256x192","url":"https://www.youtube.com/watch?v=mbGDPga031E","annotations":[{"segment":[7.71114437092576,177.706817268122],"label":"Shuffleboard"}]},"9Xrw-WOipSI":{"duration":74.095,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=9Xrw-WOipSI","annotations":[{"segment":[33.637455081876745,49.126866536661474],"label":"Croquet"}]},"Dk3DiAp2yAU":{"duration":85.1,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=Dk3DiAp2yAU","annotations":[{"segment":[19.25064648844457,48.59128597815913],"label":"Cleaning windows"},{"segment":[57.08812287051482,76.86982126053043],"label":"Cleaning windows"}]},"tScqYRQ7zyo":{"duration":16.21,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=tScqYRQ7zyo","annotations":[{"segment":[0.9440327223088925,14.97624042],"label":"Putting on makeup"}]},"rdszlTqxBXs":{"duration":15.93,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rdszlTqxBXs","annotations":[{"segment":[3.155957704974866,10.610975905702894],"label":"Putting on makeup"}]},"qmfZw9eEpIU":{"duration":95.02000000000001,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qmfZw9eEpIU","annotations":[]},"uLsq6gQXQPM":{"duration":148.748,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uLsq6gQXQPM","annotations":[]},"vCeaFAiokrU":{"duration":141.5,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=vCeaFAiokrU","annotations":[{"segment":[40.821505460218404,133.05604212168487],"label":"Shoveling snow"}]},"Damu3T2Yy0M":{"duration":99.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Damu3T2Yy0M","annotations":[{"segment":[66.77397648673947,80.37608280811233],"label":"Arm wrestling"}]},"feY5JrgSpzE":{"duration":65.992,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=feY5JrgSpzE","annotations":[{"segment":[0.01,45.46945709828393],"label":"Kneeling"},{"segment":[0.2,45.880945397815914],"label":"Kneeling"}]},"wZgBJlWqWWI":{"duration":54.753,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=wZgBJlWqWWI","annotations":[{"segment":[0.2,54.579],"label":"Using the monkey bar"}]},"QEdbqJijx1w":{"duration":76.255,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QEdbqJijx1w","annotations":[{"segment":[31.39211232449298,37.4564976599064],"label":"Putting in contact lenses"},{"segment":[64.21113884555382,74.0806287051482],"label":"Putting in contact lenses"}]},"Q2OOxx3RWhY":{"duration":209.84,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Q2OOxx3RWhY","annotations":[{"segment":[69.40064086372978,91.0065007552683],"label":"Removing curlers"},{"segment":[138.80128172745955,144.03906594359012],"label":"Removing curlers"}]},"XToVLTbQEm4":{"duration":182.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XToVLTbQEm4","annotations":[{"segment":[8.548275912636507,168.11609294851795],"label":"Layup drill in basketball"}]},"K-pwTH9ubkE":{"duration":150.37,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=K-pwTH9ubkE","annotations":[{"segment":[0,140.51962960218407],"label":"Playing piano"}]},"CZp3ZPTQrds":{"duration":121.98,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=CZp3ZPTQrds","annotations":[{"segment":[55.373723455060656,66.02983518524414],"label":"Shaving"},{"segment":[71.73846542901717,84.86831719188767],"label":"Shaving"},{"segment":[89.62550985959437,104.84852639625585],"label":"Shaving"},{"segment":[107.5125542901716,113.79204861154446],"label":"Shaving"}]},"f4UdgFrorCo":{"duration":209.17000000000002,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f4UdgFrorCo","annotations":[{"segment":[81.57775018483733,187.62882542512583],"label":"Tango"}]},"m5_20ftU4iQ":{"duration":126.549,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=m5_20ftU4iQ","annotations":[]},"ZQs0S7WwYiM":{"duration":159.336,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZQs0S7WwYiM","annotations":[{"segment":[20.378599063962554,141.9046349453978],"label":"Installing carpet"}]},"gywpvBoHPmQ":{"duration":63.507,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gywpvBoHPmQ","annotations":[]},"evXdygHVzNc":{"duration":79.81,"subset":"training","resolution":"634x360","url":"https://www.youtube.com/watch?v=evXdygHVzNc","annotations":[{"segment":[2.60914352574103,66.59528237129486],"label":"Discus throw"}]},"Y6rHCVo28kM":{"duration":144.22,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Y6rHCVo28kM","annotations":[{"segment":[12.374496934477378,128.4697772652106],"label":"Getting a haircut"}]},"gGYje5UGLdY":{"duration":238.957,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gGYje5UGLdY","annotations":[{"segment":[0.01,238.956553],"label":"Playing congas"}]},"9-hrNWXnuY8":{"duration":171.782,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9-hrNWXnuY8","annotations":[{"segment":[7.503703968798753,165.88545559594385],"label":"Disc dog"}]},"cNCrFL2l83o":{"duration":148.86,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cNCrFL2l83o","annotations":[{"segment":[9.866939633385336,129.8953582324493],"label":"Mixing drinks"}]},"7rwDYzqfGgU":{"duration":113.569,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7rwDYzqfGgU","annotations":[]},"NM4nBojCSwQ":{"duration":74.93,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=NM4nBojCSwQ","annotations":[{"segment":[21.82228776,64.10306448],"label":"Ironing clothes"}]},"bUfhRJjHNoU":{"duration":192.146,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bUfhRJjHNoU","annotations":[{"segment":[17.65819681974555,155.63156519097774],"label":"Slacklining"}]},"khIjshXJjaY":{"duration":4.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=khIjshXJjaY","annotations":[]},"Twa9ChH4zZU":{"duration":127.873,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Twa9ChH4zZU","annotations":[]},"1nltPeGC5ZQ":{"duration":84.753,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1nltPeGC5ZQ","annotations":[{"segment":[0.01,12.858140285860687],"label":"Sumo"},{"segment":[28.963818831032025,43.749359790533575],"label":"Sumo"},{"segment":[56.15865309582953,60.11906585283887],"label":"Sumo"},{"segment":[76.48877191514417,80.44918467215352],"label":"Sumo"}]},"sUL9HAplalo":{"duration":109.11,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=sUL9HAplalo","annotations":[{"segment":[0,29.107341844694197],"label":"Smoking a cigarette"},{"segment":[49.53354664798837,60.08708579635702],"label":"Smoking a cigarette"}]},"ggw-70xi0dA":{"duration":7.593,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ggw-70xi0dA","annotations":[{"segment":[0.01,1.086719775939656],"label":"Slacklining"}]},"3s46k9MIq_s":{"duration":115.43,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=3s46k9MIq_s","annotations":[]},"Flm_SCK10GQ":{"duration":30.697,"subset":"testing","resolution":"540x360","url":"https://www.youtube.com/watch?v=Flm_SCK10GQ","annotations":[]},"7phIVBx1BzQ":{"duration":210.257,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7phIVBx1BzQ","annotations":[{"segment":[0.01,210.25668934240363],"label":"Disc dog"}]},"bgAmrgXf500":{"duration":59.0,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=bgAmrgXf500","annotations":[]},"LFeoVokXAFY":{"duration":58.422,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LFeoVokXAFY","annotations":[{"segment":[0.01,57.96572263338533],"label":"Hitting a pinata"}]},"ix40OdQd7iE":{"duration":99.85,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ix40OdQd7iE","annotations":[{"segment":[23.18500745,39.87602477704551],"label":"Pole vault"}]},"c-C_9InvwKE":{"duration":73.0,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=c-C_9InvwKE","annotations":[{"segment":[22.537095163806555,53.38332137285491],"label":"Hand washing clothes"}]},"8wDm37sdBCI":{"duration":20.6,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8wDm37sdBCI","annotations":[{"segment":[0.2566115444617785,4.554854914196568],"label":"Hand washing clothes"},{"segment":[12.573965678627145,20.361],"label":"Hand washing clothes"}]},"nlsHB3c3fCw":{"duration":173.94,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nlsHB3c3fCw","annotations":[]},"LrwBGQ9B0Vg":{"duration":104.374,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=LrwBGQ9B0Vg","annotations":[{"segment":[47.856870514820585,51.27521840873635],"label":"Scuba diving"},{"segment":[61.85581903276131,95.55096255850233],"label":"Scuba diving"}]},"xgIxKDm9M38":{"duration":265.056,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xgIxKDm9M38","annotations":[{"segment":[39.38679445030543,133.51875452405454],"label":"Removing ice from car"},{"segment":[145.07881979626936,183.06189140497514],"label":"Removing ice from car"}]},"Lw4ZiLSPvfE":{"duration":78.646,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lw4ZiLSPvfE","annotations":[]},"6VNFwlzQiuc":{"duration":219.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6VNFwlzQiuc","annotations":[{"segment":[20.37027536339549,22.42442077819167],"label":"Getting a tattoo"},{"segment":[30.298644868243706,33.379862990437985],"label":"Getting a tattoo"},{"segment":[148.75436255850235,156.62858658346335],"label":"Getting a tattoo"}]},"I16FTpQQiic":{"duration":212.51,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I16FTpQQiic","annotations":[]},"GgnM5RGNtDE":{"duration":61.255,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GgnM5RGNtDE","annotations":[]},"pMCPCsvxRBw":{"duration":55.752,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pMCPCsvxRBw","annotations":[{"segment":[22.55803738317757,53.29080062305296],"label":"Making a lemonade"}]},"KKbfCtmIE0o":{"duration":122.58,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=KKbfCtmIE0o","annotations":[{"segment":[3.00070944,111.86928442277691],"label":"Tai chi"}]},"VwMSsicKRYI":{"duration":106.09,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VwMSsicKRYI","annotations":[]},"QY3q4DuiGq8":{"duration":18.07,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=QY3q4DuiGq8","annotations":[]},"ryZuSiZNN2o":{"duration":80.109,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=ryZuSiZNN2o","annotations":[{"segment":[0.009999999999999998,77.78793181469842],"label":"Hurling"}]},"BUhpWV98EjM":{"duration":112.176,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BUhpWV98EjM","annotations":[{"segment":[0.2,99.75053354134165],"label":"Capoeira"}]},"j4J8WqxAZIM":{"duration":86.28999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j4J8WqxAZIM","annotations":[]},"3-9pr55kr8g":{"duration":55.055,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3-9pr55kr8g","annotations":[]},"GgfyTMpHfnI":{"duration":214.599,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GgfyTMpHfnI","annotations":[{"segment":[20.087253122777973,200.87253042121682],"label":"Braiding hair"}]},"POhazPPwHhQ":{"duration":154.854,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=POhazPPwHhQ","annotations":[{"segment":[37.44516502340094,118.37503781591265],"label":"Slacklining"},{"segment":[128.52146962870515,154.65387600000003],"label":"Slacklining"}]},"XGj9lakgSic":{"duration":128.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XGj9lakgSic","annotations":[{"segment":[12.524538416536661,97.36454640000001],"label":"Preparing salad"}]},"-3Ss9jSRbKk":{"duration":67.663,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-3Ss9jSRbKk","annotations":[{"segment":[33.01683345234873,40.921326803486785],"label":"Doing crunches"},{"segment":[54.20087699040267,62.52694417108803],"label":"Doing crunches"}]},"AS1nLh3xfxI":{"duration":31.394,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AS1nLh3xfxI","annotations":[{"segment":[5.97288944198038,15.742671373444555],"label":"Spread mulch"}]},"ru4NMIVZqrg":{"duration":187.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ru4NMIVZqrg","annotations":[{"segment":[50.82158362252858,177.8755409531981],"label":"Getting a piercing"}]},"N7z9_g196z0":{"duration":187.246,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N7z9_g196z0","annotations":[{"segment":[33.293157566302646,172.01464742589704],"label":"Croquet"}]},"wuknZBoyMRE":{"duration":98.662,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=wuknZBoyMRE","annotations":[{"segment":[0,98.66160997732426],"label":"Powerbocking"}]},"zQ4HbFGX7t0":{"duration":59.026,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zQ4HbFGX7t0","annotations":[{"segment":[26.684071762870516,37.173672386895475],"label":"Waxing skis"}]},"tjDOXnDswdA":{"duration":45.001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tjDOXnDswdA","annotations":[{"segment":[0,42.543158864274574],"label":"Playing rubik cube"}]},"rj2TaVP_Bwk":{"duration":79.65,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=rj2TaVP_Bwk","annotations":[]},"zJquKzopJuI":{"duration":32.25,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=zJquKzopJuI","annotations":[{"segment":[0.27646723868954753,27.77239079563182],"label":"Drinking beer"}]},"NzRNlTfyYtI":{"duration":75.372,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=NzRNlTfyYtI","annotations":[{"segment":[0.23516996190051684,23.046656266250647],"label":"Elliptical trainer"},{"segment":[39.27338322620905,52.67807091419657],"label":"Elliptical trainer"}]},"j4YrzUjxAp8":{"duration":38.48,"subset":"training","resolution":"854x468","url":"https://www.youtube.com/watch?v=j4YrzUjxAp8","annotations":[{"segment":[6.962798477379096,38.275464852607705],"label":"Vacuuming floor"}]},"HD4aY4ATCFY":{"duration":159.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HD4aY4ATCFY","annotations":[{"segment":[44.175070202808115,118.33941341653664],"label":"Making a sandwich"}]},"57J-q04z1Hs":{"duration":22.198,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=57J-q04z1Hs","annotations":[{"segment":[0.01,22.161],"label":"Baton twirling"}]},"cdpPn-7R3GQ":{"duration":237.169,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cdpPn-7R3GQ","annotations":[{"segment":[0,7.376934240583221],"label":"Playing pool"},{"segment":[80.03979194768887,91.84289573767558],"label":"Playing pool"},{"segment":[141.2683928582449,145.69455677948991],"label":"Playing pool"},{"segment":[197.33313586068175,206.92315613700484],"label":"Playing pool"}]},"R4yz8nXO5hI":{"duration":97.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R4yz8nXO5hI","annotations":[{"segment":[2.2124997399999997,88.34769298254923],"label":"Painting"}]},"xn93Nn9y-SI":{"duration":30.35,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=xn93Nn9y-SI","annotations":[]},"bYUmtLBL7W4":{"duration":224.49,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bYUmtLBL7W4","annotations":[{"segment":[11.553655226209049,57.06805460218409],"label":"Wakeboarding"},{"segment":[68.62170982839314,126.03987519500778],"label":"Wakeboarding"},{"segment":[135.4928658346334,201.31368954758187],"label":"Wakeboarding"}]},"ar3Jzu6hZlY":{"duration":130.01,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=ar3Jzu6hZlY","annotations":[]},"nuqxJvTbG9w":{"duration":30.65,"subset":"training","resolution":"1920x1088","url":"https://www.youtube.com/watch?v=nuqxJvTbG9w","annotations":[{"segment":[0,30.650339],"label":"Playing kickball"}]},"PdWkg0NiVCA":{"duration":7.501,"subset":"testing","resolution":"310x240","url":"https://www.youtube.com/watch?v=PdWkg0NiVCA","annotations":[]},"hrpRVbLRoJ4":{"duration":203.2,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=hrpRVbLRoJ4","annotations":[{"segment":[44.06317849448671,181.95873709234084],"label":"Horseback riding"}]},"eXMF6Skt2To":{"duration":183.42,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eXMF6Skt2To","annotations":[{"segment":[83.8383241533743,114.09876569999999],"label":"Doing motocross"}]},"55sP2yXNFxY":{"duration":199.181,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=55sP2yXNFxY","annotations":[{"segment":[36.86237745999126,41.81866414055254],"label":"Playing pool"},{"segment":[45.22611123343843,47.08471873864891],"label":"Playing pool"},{"segment":[51.421469584140034,54.82891667702592],"label":"Playing pool"},{"segment":[122.35832269967345,127.62437729776983],"label":"Playing pool"}]},"Wxp53duZNAM":{"duration":193.191,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Wxp53duZNAM","annotations":[]},"zQFC5vNZ_Qs":{"duration":31.162,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=zQFC5vNZ_Qs","annotations":[{"segment":[0.01,31.161178],"label":"Using the monkey bar"}]},"_f0S0vQZ9JM":{"duration":149.305,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_f0S0vQZ9JM","annotations":[{"segment":[30.280124867394694,77.33078043057722],"label":"Making a lemonade"},{"segment":[92.00499478939156,138.1239542028081],"label":"Making a lemonade"}]},"bQhCEXZwnMM":{"duration":226.28,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bQhCEXZwnMM","annotations":[{"segment":[92.2535406,208.76135793915756],"label":"Archery"}]},"AVIMCVsLrVw":{"duration":212.997,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=AVIMCVsLrVw","annotations":[{"segment":[60.619568585177724,180.53369786156426],"label":"Powerbocking"}]},"jRfTdoqG7Tw":{"duration":135.19,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jRfTdoqG7Tw","annotations":[{"segment":[10.966746120184943,111.77645084034654],"label":"Putting on makeup"}]},"AH-3mF6wWhU":{"duration":122.3,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=AH-3mF6wWhU","annotations":[{"segment":[46.1723545622097,113.9045274524181],"label":"Kite flying"}]},"Kz6I9Zx5E_0":{"duration":177.54,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Kz6I9Zx5E_0","annotations":[{"segment":[3.8762527301092047,7.752505460218409],"label":"Cheerleading"},{"segment":[41.25440405616225,175.53887363494542],"label":"Cheerleading"}]},"fU-kGMQ68jg":{"duration":201.805,"subset":"validation","resolution":"423x240","url":"https://www.youtube.com/watch?v=fU-kGMQ68jg","annotations":[{"segment":[27.70484723062392,31.482780943890816],"label":"Snow tubing"},{"segment":[36.834853704352255,57.29866131788129],"label":"Snow tubing"},{"segment":[85.3183363579441,89.41109788064992],"label":"Snow tubing"},{"segment":[93.18903159391682,119.31973977734619],"label":"Snow tubing"},{"segment":[157.41390405616224,170.32184418876756],"label":"Snow tubing"}]},"VmD2k_dtddc":{"duration":80.88,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VmD2k_dtddc","annotations":[{"segment":[6.364234216848674,63.57932994851794],"label":"Playing lacrosse"}]},"kNUpypAppjk":{"duration":46.53,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kNUpypAppjk","annotations":[{"segment":[3.7748907581903275,44.57274856786271],"label":"Snatch"}]},"IeTMYNbQSp0":{"duration":19.134,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=IeTMYNbQSp0","annotations":[{"segment":[0.01,19.073565828393136],"label":"Hand car wash"}]},"TtewPltwuXw":{"duration":141.87,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TtewPltwuXw","annotations":[]},"FaKQvJBhMPg":{"duration":89.095,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FaKQvJBhMPg","annotations":[{"segment":[0,8.186457098283931],"label":"River tubing"},{"segment":[27.750702028081125,34.27211700468019],"label":"River tubing"},{"segment":[47.31494695787831,50.09001716068643],"label":"River tubing"},{"segment":[60.77403744149767,64.65913572542901],"label":"River tubing"},{"segment":[69.2380015600624,82.97459906396257],"label":"River tubing"}]},"Ki13JIVwMdk":{"duration":20.69,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ki13JIVwMdk","annotations":[]},"COx5Bf0PsQs":{"duration":71.889,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=COx5Bf0PsQs","annotations":[{"segment":[6.504762614664586,71.88884353741497],"label":"Making a lemonade"}]},"PZ4miMXdjHE":{"duration":78.623,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=PZ4miMXdjHE","annotations":[{"segment":[45.2338736349454,67.05400780031201],"label":"Sumo"}]},"G5mKcPzVMG4":{"duration":73.259,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=G5mKcPzVMG4","annotations":[{"segment":[4.10796872019836,71.54712040186917],"label":"Waterskiing"}]},"m5T_w1mhWgY":{"duration":212.138,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m5T_w1mhWgY","annotations":[{"segment":[19.194954949218378,212.13734693877552],"label":"Waxing skis"}]},"9peoWGfb9jc":{"duration":75.814,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9peoWGfb9jc","annotations":[{"segment":[0.01,75.54455382215289],"label":"Waxing skis"}]},"swopywrINVw":{"duration":139.13,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=swopywrINVw","annotations":[]},"zKYqEsVfEnU":{"duration":122.625,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zKYqEsVfEnU","annotations":[{"segment":[71.97688537445187,116.01453856263227],"label":"Doing crunches"}]},"ceDvPX6hP_U":{"duration":142.385,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ceDvPX6hP_U","annotations":[{"segment":[30.631478939157567,136.28788455538222],"label":"Kneeling"}]},"N_o9Qp0FLVY":{"duration":56.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=N_o9Qp0FLVY","annotations":[{"segment":[19.962914976599066,44.52839235569423],"label":"Shaving legs"}]},"RldXwiaqykc":{"duration":79.064,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RldXwiaqykc","annotations":[]},"Z8QrA66UVzI":{"duration":173.94,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=Z8QrA66UVzI","annotations":[{"segment":[31.477519465404466,108.00045471750842],"label":"Cleaning windows"}]},"MHo5kioyrFM":{"duration":201.67000000000002,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=MHo5kioyrFM","annotations":[{"segment":[12.254173146645867,187.26890244617783],"label":"Wrapping presents"}]},"bQ6TMowx36o":{"duration":16.21,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bQ6TMowx36o","annotations":[]},"aGu-FfGDdVI":{"duration":197.904,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=aGu-FfGDdVI","annotations":[{"segment":[0.2,197.70367346938778],"label":"Canoeing"}]},"_Yt94ffXcCE":{"duration":80.597,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=_Yt94ffXcCE","annotations":[{"segment":[29.58128972276389,52.26863627361855],"label":"Welding"}]},"TnOdqLQPQA4":{"duration":99.00999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TnOdqLQPQA4","annotations":[{"segment":[11.893546046603769,75.99512439313573],"label":"Starting a campfire"},{"segment":[92.52251933229329,99.009908],"label":"Starting a campfire"}]},"AxtbfvPM1CE":{"duration":123.415,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=AxtbfvPM1CE","annotations":[]},"VNMj5YKHEsY":{"duration":190.009,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=VNMj5YKHEsY","annotations":[{"segment":[19.85551794071763,62.24026521060842],"label":"Playing ice hockey"},{"segment":[74.6883182527301,94.24954446177847],"label":"Playing ice hockey"},{"segment":[101.95548205928236,103.43739313572543],"label":"Playing ice hockey"}]},"Fm7oIMqCymo":{"duration":29.58,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fm7oIMqCymo","annotations":[]},"u3XYsINR-y4":{"duration":169.69,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=u3XYsINR-y4","annotations":[{"segment":[1.853104507020281,147.98363134633385],"label":"Playing violin"}]},"T8P52q1gCNA":{"duration":99.056,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T8P52q1gCNA","annotations":[{"segment":[22.491663134616697,69.16957164172094],"label":"Baking cookies"}]},"k2U1lOpLqdk":{"duration":112.966,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=k2U1lOpLqdk","annotations":[{"segment":[24.936909095411437,28.990258276997746],"label":"Curling"},{"segment":[25.994304534086123,28.637793130772845],"label":"Curling"},{"segment":[42.73639897976871,46.96598073446747],"label":"Curling"},{"segment":[51.54802763539113,58.068632840551714],"label":"Curling"},{"segment":[89.26179828145507,105.12273079215264],"label":"Curling"}]},"Vc4NSwtI5jM":{"duration":30.117,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Vc4NSwtI5jM","annotations":[{"segment":[2.2551972542901715,16.585096474258968],"label":"Hand car wash"}]},"bNwrAuu0qiQ":{"duration":171.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bNwrAuu0qiQ","annotations":[{"segment":[26.607065730889236,160.5783213205928],"label":"Playing badminton"}]},"ayXuNcjC8wk":{"duration":37.733,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ayXuNcjC8wk","annotations":[{"segment":[5.105319615372069,34.387559057929835],"label":"Using the monkey bar"}]},"uavao6fYoMM":{"duration":184.019,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uavao6fYoMM","annotations":[{"segment":[24.11470173092638,76.0761423654225],"label":"Futsal"},{"segment":[84.1143762757313,140.66909343040388],"label":"Futsal"},{"segment":[144.40113060304725,182.582740099844],"label":"Futsal"}]},"LRhkbJ9dcP8":{"duration":157.525,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LRhkbJ9dcP8","annotations":[{"segment":[37.82994383775351,63.131789391575666],"label":"Blow-drying hair"},{"segment":[99.24219032761312,114.22677847113886],"label":"Blow-drying hair"}]},"VRS48f5tj_c":{"duration":191.08,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=VRS48f5tj_c","annotations":[{"segment":[0.01,191.041],"label":"Playing flauta"}]},"-qcPtBHelmc":{"duration":134.699,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-qcPtBHelmc","annotations":[{"segment":[23.745681952448166,67.03427029053951],"label":"Disc dog"},{"segment":[79.22231943427397,82.58453988771797],"label":"Disc dog"},{"segment":[88.88870323792544,102.12744482059284],"label":"Disc dog"},{"segment":[112.63438358814354,124.40215500780033],"label":"Disc dog"}]},"qZtdMXkHRXw":{"duration":4.784,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qZtdMXkHRXw","annotations":[]},"prBiREYQ2vQ":{"duration":237.865,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=prBiREYQ2vQ","annotations":[]},"hjtGItcwkQA":{"duration":78.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hjtGItcwkQA","annotations":[{"segment":[0,77.86526521060843],"label":"Sailing"}]},"2cUJTeArPsI":{"duration":14.536,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=2cUJTeArPsI","annotations":[{"segment":[0.8852574102964119,13.619344773790951],"label":"Using the monkey bar"}]},"b-ex8bAnl5U":{"duration":204.429,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=b-ex8bAnl5U","annotations":[{"segment":[14.456469250051565,196.91366312388334],"label":"Playing blackjack"}]},"Z_hwYD3_lBY":{"duration":184.79,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=Z_hwYD3_lBY","annotations":[{"segment":[10.374851794071763,180.69533541341653],"label":"Mowing the lawn"}]},"HtG-i1CG2Qs":{"duration":41.94,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=HtG-i1CG2Qs","annotations":[]},"GHmxFOXP1Q0":{"duration":59.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GHmxFOXP1Q0","annotations":[{"segment":[0,59.813968253968255],"label":"Getting a piercing"}]},"yo3s6V19YtQ":{"duration":185.04,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yo3s6V19YtQ","annotations":[{"segment":[20.777897035881434,140.2508049921997],"label":"Rock climbing"}]},"xq7Gr0FUwpo":{"duration":223.191,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xq7Gr0FUwpo","annotations":[{"segment":[79.73558502340093,223.19],"label":"Waterskiing"}]},"PzERiygIpkg":{"duration":72.24,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=PzERiygIpkg","annotations":[{"segment":[0.01,72.23727891156463],"label":"Washing hands"}]},"kRBqJhxfWHc":{"duration":80.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kRBqJhxfWHc","annotations":[{"segment":[0.01,60.72974339001561],"label":"Drinking beer"}]},"UU8a7vp6tgY":{"duration":33.925,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=UU8a7vp6tgY","annotations":[{"segment":[0.2,32.475257009345796],"label":"Decorating the Christmas tree"}]},"jMQgOcM9vZA":{"duration":149.003,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jMQgOcM9vZA","annotations":[{"segment":[29.98645229074469,34.86796777993568],"label":"Playing ice hockey"},{"segment":[45.560811232449296,50.20987360310739],"label":"Playing ice hockey"},{"segment":[51.372139195771915,111.8099500143271],"label":"Playing ice hockey"},{"segment":[113.20466872552453,127.6167620745646],"label":"Playing ice hockey"}]},"ZcgahXg_ELw":{"duration":194.816,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=ZcgahXg_ELw","annotations":[{"segment":[23.70608846396256,89.35371805647425],"label":"Scuba diving"},{"segment":[92.69688437831513,128.86386549641185],"label":"Scuba diving"},{"segment":[133.11880445148208,149.83463606068642],"label":"Scuba diving"},{"segment":[151.6581813271451,172.62895189141966],"label":"Scuba diving"}]},"zSd5dBteQ_s":{"duration":30.07,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zSd5dBteQ_s","annotations":[]},"RW7LEc-Ykh8":{"duration":50.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RW7LEc-Ykh8","annotations":[{"segment":[7.800053546021841,46.012437079563185],"label":"Using the pommel horse"}]},"vRf9sof3dwU":{"duration":63.414,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vRf9sof3dwU","annotations":[{"segment":[28.371360374414976,54.569634945397816],"label":"Hula hoop"}]},"DQLotF3P9Fc":{"duration":193.56,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=DQLotF3P9Fc","annotations":[{"segment":[51.938510476473475,75.49202104138587],"label":"Shot put"},{"segment":[108.10457413126457,114.65976743999998],"label":"Shot put"},{"segment":[133.77186128533575,160.64701915756632],"label":"Shot put"}]},"xqI9M6QiHws":{"duration":162.981,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xqI9M6QiHws","annotations":[{"segment":[35.85070436286839,43.22425348714629],"label":"Making a cake"},{"segment":[49.32650103827282,128.65571787831513],"label":"Making a cake"}]},"M_hfBH8xEig":{"duration":64.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M_hfBH8xEig","annotations":[{"segment":[23.859384900000002,59.672906301092056],"label":"Layup drill in basketball"},{"segment":[6.50702021372855,23.859074117004685],"label":"Layup drill in basketball"}]},"Vn4wrgBpgP8":{"duration":69.196,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Vn4wrgBpgP8","annotations":[{"segment":[34.52655226209048,49.092441497659905],"label":"Waxing skis"}]},"oXFKaTlg3p0":{"duration":162.726,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oXFKaTlg3p0","annotations":[{"segment":[14.97784881191166,140.6394617254078],"label":"Waterskiing"}]},"shGGt9TRlkk":{"duration":37.18,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=shGGt9TRlkk","annotations":[{"segment":[22.79225105330744,27.373898458658342],"label":"Kayaking"}]},"8jJ870_CgPQ":{"duration":30.047,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=8jJ870_CgPQ","annotations":[]},"_ucD-3rUWWE":{"duration":107.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_ucD-3rUWWE","annotations":[{"segment":[12.5469278,42.519185900000004],"label":"Kayaking"},{"segment":[55.64181909999999,98.30747263026521],"label":"Kayaking"}]},"yDWOqabreoU":{"duration":63.577,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yDWOqabreoU","annotations":[{"segment":[3.8672059282371296,33.119148205928234],"label":"Hitting a pinata"},{"segment":[36.589717628705145,42.53926521060843],"label":"Hitting a pinata"},{"segment":[51.166109204368176,63.361],"label":"Hitting a pinata"}]},"Liha_xwiwtc":{"duration":24.01,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Liha_xwiwtc","annotations":[{"segment":[0.2,23.761],"label":"Canoeing"}]},"0_IShglbOQ4":{"duration":78.763,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0_IShglbOQ4","annotations":[{"segment":[12.145801872074884,78.661],"label":"Cutting the grass"}]},"jQR4Hhaf8o8":{"duration":231.9,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jQR4Hhaf8o8","annotations":[{"segment":[39.635241809672394,226.04756630265211],"label":"Breakdancing"}]},"jGafq3cWPSI":{"duration":128.92,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jGafq3cWPSI","annotations":[]},"LlgGMRw16UY":{"duration":182.486,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=LlgGMRw16UY","annotations":[{"segment":[16.227270739809182,100.49520300267793],"label":"Longboarding"},{"segment":[106.75836013032358,136.36601200646663],"label":"Longboarding"}]},"ZjuM-Xv_jqY":{"duration":100.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZjuM-Xv_jqY","annotations":[{"segment":[51.00058369681725,65.1238222590128],"label":"Cleaning shoes"},{"segment":[68.73309396879876,77.36396193291732],"label":"Cleaning shoes"},{"segment":[85.52405528081123,90.23180144305772],"label":"Cleaning shoes"}]},"N6HUPyM5m2o":{"duration":8.71,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N6HUPyM5m2o","annotations":[{"segment":[5.869478159126366,8.125411076443058],"label":"Snatch"}]},"dot-t-Nen_k":{"duration":106.44,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=dot-t-Nen_k","annotations":[{"segment":[31.374755425081823,42.149467263088056],"label":"Playing lacrosse"},{"segment":[55.41065106371111,79.11501710732482],"label":"Playing lacrosse"},{"segment":[79.94384109486374,100.66444078333728],"label":"Playing lacrosse"}]},"Snq0l-gKpWo":{"duration":33.716,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Snq0l-gKpWo","annotations":[{"segment":[0.01,30.186989799677768],"label":"Doing a powerbomb"}]},"6-cHUULLVGQ":{"duration":197.439,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=6-cHUULLVGQ","annotations":[{"segment":[85.14172620904837,104.2331801872075],"label":"Hand car wash"},{"segment":[130.09902106084243,161.8318650546022],"label":"Hand car wash"}]},"H0Pvn5ejnRQ":{"duration":65.968,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H0Pvn5ejnRQ","annotations":[]},"Yxsw5nTf4Dg":{"duration":95.017,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Yxsw5nTf4Dg","annotations":[{"segment":[7.999834633385335,78.23707956318252],"label":"Hula hoop"}]},"7vmWw_tGXpQ":{"duration":123.531,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=7vmWw_tGXpQ","annotations":[]},"ez9uLqGV7AA":{"duration":154.901,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ez9uLqGV7AA","annotations":[{"segment":[17.88241054602184,23.440457067082683],"label":"Longboarding"},{"segment":[28.5151951950078,137.259583650546],"label":"Longboarding"}]},"BUu9nQ5rj7k":{"duration":176.426,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BUu9nQ5rj7k","annotations":[]},"G4tUTpJV-so":{"duration":69.66,"subset":"training","resolution":"450x360","url":"https://www.youtube.com/watch?v=G4tUTpJV-so","annotations":[{"segment":[0.01,69.65986394557824],"label":"Tai chi"}]},"50MmJszF2XM":{"duration":149.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=50MmJszF2XM","annotations":[{"segment":[23.377379095163807,58.67722152886115],"label":"Cheerleading"},{"segment":[79.71686271450857,145.40729797191887],"label":"Cheerleading"}]},"hj88A5tvA0Y":{"duration":192.517,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hj88A5tvA0Y","annotations":[{"segment":[19.221614627145087,143.26109651794073],"label":"Slacklining"},{"segment":[157.67730748829953,166.98777707332292],"label":"Slacklining"}]},"3ddoLUFrGsE":{"duration":120.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3ddoLUFrGsE","annotations":[{"segment":[8.001638609999999,91.32034576],"label":"Putting on makeup"}]},"FVD4vHnJAIA":{"duration":229.41,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=FVD4vHnJAIA","annotations":[]},"_ZiTTLhXjZQ":{"duration":155.83,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_ZiTTLhXjZQ","annotations":[{"segment":[0.2,155.58100000000002],"label":"Disc dog"}]},"wV2c3sdctbs":{"duration":50.666,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wV2c3sdctbs","annotations":[{"segment":[5.29111856474259,33.655329173166926],"label":"Putting on shoes"}]},"EbqurNRxneE":{"duration":34.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=EbqurNRxneE","annotations":[{"segment":[0.01,34.23356260231897],"label":"Applying sunscreen"}]},"gA1pa5MJwFs":{"duration":37.27,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=gA1pa5MJwFs","annotations":[]},"NxWYb3uDzb0":{"duration":163.051,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NxWYb3uDzb0","annotations":[]},"VpfLfFtu0d8":{"duration":171.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VpfLfFtu0d8","annotations":[{"segment":[36.344673946957876,111.43918408736349],"label":"Making a sandwich"}]},"VGVIaQuhdcI":{"duration":65.713,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=VGVIaQuhdcI","annotations":[{"segment":[0.2,39.46848822152886],"label":"Plastering"}]},"IGT0dIKDGCw":{"duration":70.241,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IGT0dIKDGCw","annotations":[]},"H8oehT8eeGc":{"duration":15.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=H8oehT8eeGc","annotations":[]},"nS9PgniAQAE":{"duration":161.681,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nS9PgniAQAE","annotations":[{"segment":[17.601301839736177,125.47216091438627],"label":"Grooming dog"}]},"qBvQjv1XyfQ":{"duration":77.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qBvQjv1XyfQ","annotations":[{"segment":[15.61569085803432,27.147277953198127],"label":"Using uneven bars"},{"segment":[32.67283010296412,46.84707257410297],"label":"Using uneven bars"},{"segment":[56.336607787831504,66.42674649609984],"label":"Using uneven bars"}]},"68oJj9QktGg":{"duration":181.843,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=68oJj9QktGg","annotations":[{"segment":[4.2490887850467285,171.37991433021807],"label":"Bullfighting"}]},"pOO6H6d5SXI":{"duration":146.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pOO6H6d5SXI","annotations":[{"segment":[0,145.983333],"label":"Playing accordion"}]},"AsdsR27UnJ0":{"duration":56.19,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=AsdsR27UnJ0","annotations":[]},"Q7R3ajucvyE":{"duration":239.305,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Q7R3ajucvyE","annotations":[{"segment":[74.43385205293886,78.89988351684634],"label":"Putting in contact lenses"},{"segment":[104.20739514565547,119.09416669201377],"label":"Putting in contact lenses"}]},"Yr7cZQAhJag":{"duration":233.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Yr7cZQAhJag","annotations":[]},"q_nBBJS-eJo":{"duration":220.892,"subset":"validation","resolution":"810x480","url":"https://www.youtube.com/watch?v=q_nBBJS-eJo","annotations":[{"segment":[66.16403329797193,197.45828687363493],"label":"Painting furniture"}]},"KaicDn9WDUo":{"duration":204.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KaicDn9WDUo","annotations":[{"segment":[0.01,192.56583931357252],"label":"Ballet"}]},"8X1GZojsjGQ":{"duration":192.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=8X1GZojsjGQ","annotations":[]},"R0B5bBr6t8w":{"duration":18.3,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R0B5bBr6t8w","annotations":[{"segment":[2.711771887675507,4.338835020280811],"label":"Layup drill in basketball"},{"segment":[8.106770695787832,9.36274925429017],"label":"Layup drill in basketball"},{"segment":[12.845235257410295,13.673039307332292],"label":"Layup drill in basketball"}]},"vLJz4a4NvRI":{"duration":104.699,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vLJz4a4NvRI","annotations":[{"segment":[11.102882995319813,95.51744929797192],"label":"Playing blackjack"}]},"I-C_fOUaknA":{"duration":18.948,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=I-C_fOUaknA","annotations":[{"segment":[0.2,18.180139445186175],"label":"Laying tile"}]},"lnHdEtuXU8w":{"duration":77.81,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lnHdEtuXU8w","annotations":[{"segment":[35.93102393759751,50.86182104680188],"label":"Ironing clothes"}]},"i3H8cucgXEc":{"duration":112.966,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i3H8cucgXEc","annotations":[{"segment":[10.569828393135726,106.95036973478939],"label":"Rollerblading"}]},"QQe2n2yjJuc":{"duration":196.581,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QQe2n2yjJuc","annotations":[{"segment":[0.2,196.3797052154195],"label":"Futsal"}]},"vWsPUB8dQMU":{"duration":178.724,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=vWsPUB8dQMU","annotations":[{"segment":[32.343186957878316,155.02424093603742],"label":"Changing car wheel"}]},"3FZ47muWIYA":{"duration":160.68,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3FZ47muWIYA","annotations":[{"segment":[88.8301786271451,120.15252652106085],"label":"Washing hands"}]},"q81H-V1_gGo":{"duration":160.15,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q81H-V1_gGo","annotations":[{"segment":[27.9821823900156,149.155025775351],"label":"Archery"}]},"jNJnPpIvtTU":{"duration":211.95,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jNJnPpIvtTU","annotations":[{"segment":[27.16175741179035,183.9796372793916],"label":"Tango"}]},"Lo0EQXmaeXM":{"duration":78.902,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Lo0EQXmaeXM","annotations":[{"segment":[20.310034278922178,52.43681577467181],"label":"Gargling mouthwash"}]},"pRkJ_9zq16A":{"duration":63.368,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=pRkJ_9zq16A","annotations":[{"segment":[7.6119791497659905,47.6490123400936],"label":"Futsal"}]},"huKjjvCG0EE":{"duration":130.613,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=huKjjvCG0EE","annotations":[{"segment":[0.2,23.22900676027041],"label":"Croquet"},{"segment":[39.53006413589877,48.08811925810365],"label":"Croquet"},{"segment":[54.404778991159645,61.33272837580169],"label":"Croquet"},{"segment":[96.99129047737911,101.27031799843994],"label":"Croquet"}]},"Ckkf840HZE4":{"duration":220.357,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ckkf840HZE4","annotations":[{"segment":[5.28583096432363,22.44761810908808],"label":"Wakeboarding"},{"segment":[55.05501368414053,67.06826468547564],"label":"Wakeboarding"},{"segment":[91.43800243104117,109.62949680449148],"label":"Wakeboarding"},{"segment":[112.37538274765379,152.8772004092979],"label":"Wakeboarding"},{"segment":[158.02573516236546,178.27664381501344],"label":"Wakeboarding"},{"segment":[181.70900121376732,218.435225380434],"label":"Wakeboarding"}]},"k7U1T2M5ufU":{"duration":78.371,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k7U1T2M5ufU","annotations":[]},"jzGsvw5Douk":{"duration":194.352,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jzGsvw5Douk","annotations":[{"segment":[59.088940855153666,194.152988],"label":"River tubing"}]},"fL8Gzk7ALxQ":{"duration":123.182,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fL8Gzk7ALxQ","annotations":[{"segment":[0,123.18185941043085],"label":"Capoeira"}]},"zwFxq1MnaO0":{"duration":166.23,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zwFxq1MnaO0","annotations":[{"segment":[7.519505460218409,156.35385491419655],"label":"Zumba"}]},"5fgJ5vWTcSQ":{"duration":180.094,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5fgJ5vWTcSQ","annotations":[{"segment":[14.040639625585024,92.38740873634946],"label":"Longboarding"},{"segment":[97.44203900156006,164.85542433697347],"label":"Longboarding"}]},"2yjRePOzYq4":{"duration":44.77,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2yjRePOzYq4","annotations":[{"segment":[10.24649984,16.00635072],"label":"Triple jump"},{"segment":[11.487792425897036,44.428332999999995],"label":"Triple jump"}]},"o1R8glzh0As":{"duration":227.88,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o1R8glzh0As","annotations":[{"segment":[53.30446687255245,207.53205668642747],"label":"River tubing"}]},"HdgVytIzu4s":{"duration":152.25,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=HdgVytIzu4s","annotations":[{"segment":[7.6007859035449865,146.79017619032763],"label":"Playing racquetball"}]},"CpxGRgJgRPA":{"duration":185.76,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CpxGRgJgRPA","annotations":[{"segment":[10.967238689547582,172.30109204368173],"label":"Wrapping presents"}]},"4DMnMu2Cb_c":{"duration":234.034,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4DMnMu2Cb_c","annotations":[{"segment":[18.372756971195365,217.77549028427947],"label":"Braiding hair"}]},"USCVBB97pOc":{"duration":145.148,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=USCVBB97pOc","annotations":[{"segment":[0.01,131.10866515600623],"label":"Clipping cat claws"}]},"cC3dwwPPHKE":{"duration":91.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cC3dwwPPHKE","annotations":[{"segment":[29.67458875,77.56697625000001],"label":"Ironing clothes"}]},"HSZz2XqWIqI":{"duration":154.553,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HSZz2XqWIqI","annotations":[]},"yuxoNmlNcc8":{"duration":146.843,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yuxoNmlNcc8","annotations":[{"segment":[17.868520487758285,139.05374179095162],"label":"Tug of war"}]},"fMRgMeDs9Ck":{"duration":51.67,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fMRgMeDs9Ck","annotations":[{"segment":[0,41.347638336973475],"label":"Shaving"}]},"kUlHkpKeN1g":{"duration":229.135,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=kUlHkpKeN1g","annotations":[]},"2CfCDcCuOO4":{"duration":91.0,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2CfCDcCuOO4","annotations":[{"segment":[0.01,90.961],"label":"Disc dog"}]},"kDz85hnSKdo":{"duration":31.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kDz85hnSKdo","annotations":[{"segment":[5.4284701653666145,28.596405335413415],"label":"Kayaking"}]},"G0vkQmcalvk":{"duration":43.863,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=G0vkQmcalvk","annotations":[{"segment":[0.2,40.74943457943925],"label":"Playing drums"}]},"HM3Rd5ZcME8":{"duration":225.721,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HM3Rd5ZcME8","annotations":[{"segment":[5.630726989079563,225.381],"label":"Snow tubing"}]},"BTG82umu8Ug":{"duration":25.47,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BTG82umu8Ug","annotations":[{"segment":[1.3113483884555381,3.775093845553822],"label":"High jump"},{"segment":[5.6030340234009355,6.4772662823712945],"label":"High jump"},{"segment":[11.603446346333852,12.596892095163806],"label":"High jump"},{"segment":[19.431798847113885,20.464982425897034],"label":"High jump"},{"segment":[24.12086278159126,25.271949],"label":"High jump"}]},"1Cv2YWq34ao":{"duration":49.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1Cv2YWq34ao","annotations":[]},"r46Vy3p19a0":{"duration":31.742,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=r46Vy3p19a0","annotations":[{"segment":[2.2778739285625846,30.008512109204368],"label":"Cutting the grass"}]},"odMI0DGsn7k":{"duration":152.277,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=odMI0DGsn7k","annotations":[{"segment":[0.009999999999999998,5.224433697347894],"label":"Doing fencing"},{"segment":[78.3665054602184,86.91557878315132],"label":"Doing fencing"},{"segment":[109.47563338533541,116.14017004680188],"label":"Doing fencing"}]},"Y34oqcxYmWc":{"duration":219.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y34oqcxYmWc","annotations":[{"segment":[21.639598198125963,175.88391642242502],"label":"Tango"}]},"aR5pR5tR1bw":{"duration":16.835,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=aR5pR5tR1bw","annotations":[]},"hIRq3O1EmLg":{"duration":153.183,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hIRq3O1EmLg","annotations":[]},"5c1zqvr3yKM":{"duration":80.318,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=5c1zqvr3yKM","annotations":[{"segment":[0.01,58.34927150055339],"label":"Clipping cat claws"}]},"sdR443ncw-I":{"duration":23.59,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sdR443ncw-I","annotations":[{"segment":[0.8492759999999999,23.498333],"label":"Brushing hair"}]},"qoSn8oUMBSE":{"duration":171.991,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qoSn8oUMBSE","annotations":[{"segment":[16.583812495481066,69.81251408913045],"label":"Beach soccer"},{"segment":[71.41739956431586,90.40854435400986],"label":"Beach soccer"},{"segment":[109.66717005623475,145.77709175837862],"label":"Beach soccer"}]},"a4-5QFOiAiw":{"duration":59.98,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=a4-5QFOiAiw","annotations":[{"segment":[0.5459708,59.921],"label":"Tennis serve with ball bouncing"}]},"9q6wWG6ql4E":{"duration":56.17,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9q6wWG6ql4E","annotations":[{"segment":[5.739585186128533,53.233557260657776],"label":"Using the monkey bar"}]},"OFKGyZxazQk":{"duration":168.02,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=OFKGyZxazQk","annotations":[{"segment":[19.659017433697347,21.493859060842432],"label":"Applying sunscreen"},{"segment":[27.522624407176288,31.978668358814353],"label":"Applying sunscreen"}]},"XtqTatsTGNY":{"duration":106.673,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=XtqTatsTGNY","annotations":[{"segment":[0.01,106.67247],"label":"Fun sliding down"}]},"l_Y_Xns6SpE":{"duration":54.683,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=l_Y_Xns6SpE","annotations":[{"segment":[6.312820399375975,18.255994127925117],"label":"Playing pool"},{"segment":[24.48350614352574,35.91482956942277],"label":"Playing pool"},{"segment":[41.88641643369735,54.682674],"label":"Playing pool"}]},"oUQPIZu5bVU":{"duration":113.639,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=oUQPIZu5bVU","annotations":[{"segment":[35.43868954758191,102.08139781591264],"label":"Hurling"}]},"8vVk8oHmvbs":{"duration":229.692,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8vVk8oHmvbs","annotations":[]},"5nOc03oiFvk":{"duration":213.44,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5nOc03oiFvk","annotations":[{"segment":[61.26764341965679,168.81899572698907],"label":"Rock climbing"},{"segment":[175.47852218564742,199.4528174368175],"label":"Rock climbing"}]},"AUwam1VhQm0":{"duration":87.586,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=AUwam1VhQm0","annotations":[{"segment":[21.452339655226208,76.38126030109204],"label":"Fun sliding down"}]},"3xbd33jK63A":{"duration":90.976,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3xbd33jK63A","annotations":[]},"G_US7iMc6Y4":{"duration":161.595,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G_US7iMc6Y4","annotations":[{"segment":[26.024105368462674,87.43495272048759],"label":"Sumo"},{"segment":[94.48209913793308,149.36765365511457],"label":"Sumo"}]},"HX6BeHLQh5s":{"duration":26.29,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HX6BeHLQh5s","annotations":[]},"vezvHLPR3P0":{"duration":12.49,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vezvHLPR3P0","annotations":[]},"5y9Lw8--ulU":{"duration":93.577,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5y9Lw8--ulU","annotations":[{"segment":[80.4688481973547,88.3351005338033],"label":"Using the rowing machine"}]},"8bX1g0mefzM":{"duration":140.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8bX1g0mefzM","annotations":[{"segment":[49.739430907952425,133.998026608887],"label":"Polishing shoes"}]},"TrQlBM3nKAs":{"duration":122.32,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=TrQlBM3nKAs","annotations":[]},"LvTwJTvluHE":{"duration":227.672,"subset":"testing","resolution":"840x480","url":"https://www.youtube.com/watch?v=LvTwJTvluHE","annotations":[]},"8c-s3TKrtdE":{"duration":157.69,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=8c-s3TKrtdE","annotations":[{"segment":[8.11803782355376,27.798129517017415],"label":"Playing field hockey"},{"segment":[62.48429112674712,69.12632207329109],"label":"Playing field hockey"},{"segment":[84.13239198955714,94.71044127479385],"label":"Playing field hockey"},{"segment":[103.07448024451591,117.83454901461364],"label":"Playing field hockey"},{"segment":[138.25264353822152,148.09268934165368],"label":"Playing field hockey"}]},"E_UkQGk7QAE":{"duration":88.236,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=E_UkQGk7QAE","annotations":[]},"ywsH9kD033I":{"duration":175.567,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ywsH9kD033I","annotations":[{"segment":[62.40201250905783,78.80397512588026],"label":"Arm wrestling"},{"segment":[90.55871500126966,116.25512310095814],"label":"Arm wrestling"},{"segment":[130.74352341248462,141.14751633235895],"label":"Arm wrestling"},{"segment":[161.6523060519851,167.39364717348045],"label":"Arm wrestling"}]},"HqpBHYNuA4Y":{"duration":193.655,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HqpBHYNuA4Y","annotations":[]},"02V8Hz-M6BM":{"duration":55.427,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=02V8Hz-M6BM","annotations":[{"segment":[31.462676114412847,41.54797140632333],"label":"Drum corps"},{"segment":[48.96109516993141,53.529648187038724],"label":"Drum corps"}]},"9ovdfIi4R1o":{"duration":12.261,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=9ovdfIi4R1o","annotations":[]},"UI98gtpg7FE":{"duration":192.169,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UI98gtpg7FE","annotations":[{"segment":[24.80553859283475,161.9831542674961],"label":"Snowboarding"}]},"jml_hnhjltg":{"duration":129.24,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jml_hnhjltg","annotations":[{"segment":[0,127.226006],"label":"Shoveling snow"}]},"YCHmXwLhs3A":{"duration":213.763,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YCHmXwLhs3A","annotations":[{"segment":[0.2,203.78951161316004],"label":"Assembling bicycle"}]},"--6bJUbfpnQ":{"duration":26.75,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=--6bJUbfpnQ","annotations":[{"segment":[2.578755070202808,24.914101404056165],"label":"Drinking beer"}]},"PBuzvPn0iho":{"duration":165.396,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PBuzvPn0iho","annotations":[]},"jdB2QeTdbp0":{"duration":176.588,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jdB2QeTdbp0","annotations":[]},"B-6kP8M_GmM":{"duration":186.572,"subset":"validation","resolution":"576x324","url":"https://www.youtube.com/watch?v=B-6kP8M_GmM","annotations":[{"segment":[35.861333260258824,41.6735427929691],"label":"Sumo"},{"segment":[56.204066624744804,152.97735534437095],"label":"Sumo"}]},"gpKYclCmQHM":{"duration":167.556,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gpKYclCmQHM","annotations":[{"segment":[85.47138516837528,94.59180779050864],"label":"Beer pong"},{"segment":[125.60124470576203,127.94649490561537],"label":"Beer pong"}]},"Zk4Ewz3v6aQ":{"duration":124.204,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=Zk4Ewz3v6aQ","annotations":[{"segment":[56.19181515559942,103.27668095839479],"label":"Using the rowing machine"}]},"LxntXVcMxtc":{"duration":38.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LxntXVcMxtc","annotations":[{"segment":[7.367514484619426,32.44508925097457],"label":"Doing karate"}]},"kO_4QDNExkY":{"duration":70.357,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=kO_4QDNExkY","annotations":[{"segment":[7.786850234009361,63.7205631825273],"label":"Blowing leaves"}]},"KEJP4Uxa5m0":{"duration":164.305,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KEJP4Uxa5m0","annotations":[{"segment":[25.140674207455646,28.7222318398544],"label":"Croquet"}]},"2jHMUtvWP_0":{"duration":23.36,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=2jHMUtvWP_0","annotations":[{"segment":[5.063781052313119,13.927214074119972],"label":"Plastering"}]},"sO7ZGxFwbm8":{"duration":233.82999999999998,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sO7ZGxFwbm8","annotations":[{"segment":[13.856190971448383,21.513559666196173],"label":"Javelin throw"},{"segment":[35.36975063764456,39.01611668276255],"label":"Javelin throw"},{"segment":[64.54067899858852,68.91631825273011],"label":"Javelin throw"},{"segment":[72.56268429784811,77.66759626521062],"label":"Javelin throw"},{"segment":[87.87742112636505,102.09824861154448],"label":"Javelin throw"},{"segment":[112.30807347269891,210.7599560624025],"label":"Javelin throw"}]},"tEAEFVJGGG0":{"duration":147.401,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tEAEFVJGGG0","annotations":[{"segment":[4.454151841612094,139.68586464379277],"label":"Trimming branches or hedges"}]},"9A0BwCxAKAg":{"duration":230.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9A0BwCxAKAg","annotations":[{"segment":[14.587265990639626,211.96558112324493],"label":"Cleaning windows"}]},"g-l5CI29DPQ":{"duration":114.55,"subset":"training","resolution":"432x720","url":"https://www.youtube.com/watch?v=g-l5CI29DPQ","annotations":[{"segment":[1.6082625998917508,110.61272770366597],"label":"Hopscotch"}]},"2pJTak2Qz8Q":{"duration":237.657,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2pJTak2Qz8Q","annotations":[{"segment":[19.645636505460217,227.61200312012483],"label":"Skiing"}]},"H306mUUJ5BE":{"duration":213.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=H306mUUJ5BE","annotations":[{"segment":[0.2,213.64000000000001],"label":"Playing harmonica"},{"segment":[0.2,213.64000000000001],"label":"Playing harmonica"},{"segment":[178.4780031201248,186.15088923556942],"label":"Playing harmonica"}]},"l-pltFa_oRw":{"duration":151.464,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l-pltFa_oRw","annotations":[]},"ojVY8I3GPa4":{"duration":166.023,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ojVY8I3GPa4","annotations":[{"segment":[52.79844617784711,165.881],"label":"Bullfighting"}]},"7LbBmVX33I4":{"duration":191.91,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7LbBmVX33I4","annotations":[]},"SPjv6P5fxug":{"duration":125.226,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=SPjv6P5fxug","annotations":[]},"656VWQU5dgE":{"duration":91.743,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=656VWQU5dgE","annotations":[{"segment":[5.009310848624421,85.15828339313573],"label":"Doing fencing"}]},"r_n1PfHvKI4":{"duration":176.472,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r_n1PfHvKI4","annotations":[{"segment":[13.351648607644305,35.374986517160686],"label":"Kite flying"},{"segment":[56.572449255070204,171.6443898322933],"label":"Kite flying"}]},"AuVL_nkYXw0":{"duration":226.999,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=AuVL_nkYXw0","annotations":[{"segment":[84.28329087363495,208.9375698127925],"label":"Elliptical trainer"}]},"YrWRdVrWPBA":{"duration":66.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YrWRdVrWPBA","annotations":[]},"TDwSu2nfSXw":{"duration":113.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TDwSu2nfSXw","annotations":[{"segment":[77.07417071450858,113.3131746031746],"label":"Breakdancing"}]},"brrf2inGs7E":{"duration":27.45,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=brrf2inGs7E","annotations":[{"segment":[4.794444617784711,25.898196567862712],"label":"Washing hands"}]},"4_3m_-SGzXw":{"duration":167.811,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4_3m_-SGzXw","annotations":[{"segment":[101.78388299531981,141.8171326053042],"label":"Hanging wallpaper"}]},"fqoEZaCd7hU":{"duration":79.83,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=fqoEZaCd7hU","annotations":[{"segment":[3.7361721528861156,77.83691985179406],"label":"Playing guitarra"}]},"VdY1Shdks6o":{"duration":82.199,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=VdY1Shdks6o","annotations":[{"segment":[26.67288097347894,67.70808247113885],"label":"Putting in contact lenses"}]},"x8yuq2i72eo":{"duration":8.476,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=x8yuq2i72eo","annotations":[{"segment":[0.2,8.218],"label":"Doing crunches"}]},"16g-sBraeKE":{"duration":225.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=16g-sBraeKE","annotations":[{"segment":[50.3395755,170.59282371294853],"label":"Doing nails"}]},"ZdgWC6Jk2Nk":{"duration":47.648,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ZdgWC6Jk2Nk","annotations":[{"segment":[15.297669266770672,21.832595943837752],"label":"Spread mulch"}]},"feYYvRmqCgI":{"duration":33.135,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=feYYvRmqCgI","annotations":[{"segment":[6.5542698907956325,33.081],"label":"Calf roping"}]},"iZUwLKd5TTk":{"duration":203.59,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=iZUwLKd5TTk","annotations":[{"segment":[33.81831903276131,188.4617121684867],"label":"Ironing clothes"}]},"sY7m7E-sAYc":{"duration":136.371,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sY7m7E-sAYc","annotations":[{"segment":[98.07604946918966,134.24292091107645],"label":"Peeling potatoes"}]},"nciIPwJTok8":{"duration":60.163,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=nciIPwJTok8","annotations":[]},"oijpI0s_R04":{"duration":170.3,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oijpI0s_R04","annotations":[{"segment":[15.143250236131895,45.42975070839569],"label":"Tango"},{"segment":[59.510316717430605,82.62369865678981],"label":"Tango"},{"segment":[90.85950141679137,116.62959392389301],"label":"Tango"},{"segment":[121.94301505937788,143.99371277164013],"label":"Tango"},{"segment":[150.3698169048362,168.43544861778471],"label":"Tango"}]},"b7fs8OAJzQk":{"duration":31.394,"subset":"training","resolution":"657x480","url":"https://www.youtube.com/watch?v=b7fs8OAJzQk","annotations":[{"segment":[0.01,18.73317738299532],"label":"Waterskiing"},{"segment":[29.458839727769114,31.193377],"label":"Waterskiing"}]},"tGHLUWWm_zU":{"duration":187.548,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tGHLUWWm_zU","annotations":[{"segment":[2.0480959809820964,186.9619037223089],"label":"Plastering"}]},"pkHb81FOGgg":{"duration":100.822,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pkHb81FOGgg","annotations":[]},"Oyi0X-rwUg8":{"duration":4.83,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Oyi0X-rwUg8","annotations":[{"segment":[0.01,4.6331828773118255],"label":"Doing a powerbomb"}]},"GvMep9y7d7c":{"duration":184.81,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=GvMep9y7d7c","annotations":[]},"iCveBOc8JzM":{"duration":11.193,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=iCveBOc8JzM","annotations":[{"segment":[0.20000000000000004,10.99201814058957],"label":"Applying sunscreen"}]},"D9DdNf-TIO8":{"duration":63.37,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=D9DdNf-TIO8","annotations":[{"segment":[0,62.67727392852641],"label":"Bathing dog"}]},"1EzvfTeJjvw":{"duration":108.67,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=1EzvfTeJjvw","annotations":[]},"K3sJnHGHQHM":{"duration":180.002,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K3sJnHGHQHM","annotations":[{"segment":[37.62893268383797,119.34549536271452],"label":"Rafting"},{"segment":[140.96809099313572,172.4191391828393],"label":"Rafting"}]},"K0e_DdvGP54":{"duration":24.984,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=K0e_DdvGP54","annotations":[{"segment":[0.01,12.70671255226209],"label":"Gargling mouthwash"}]},"ys4_S3JHQjs":{"duration":173.198,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ys4_S3JHQjs","annotations":[]},"wC97n9iS0Ak":{"duration":195.641,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wC97n9iS0Ak","annotations":[]},"aj1ole7T9hc":{"duration":30.14,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=aj1ole7T9hc","annotations":[{"segment":[0,30.139501133786847],"label":"Mowing the lawn"}]},"rDWa3Z2IaIw":{"duration":221.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rDWa3Z2IaIw","annotations":[{"segment":[45.664702932917315,221.75056689342404],"label":"Playing field hockey"}]},"rHWOESWciSc":{"duration":38.197,"subset":"training","resolution":"224x128","url":"https://www.youtube.com/watch?v=rHWOESWciSc","annotations":[{"segment":[18.025802277691106,32.26767581279251],"label":"Peeling potatoes"}]},"I4uZkBmE5eM":{"duration":82.082,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=I4uZkBmE5eM","annotations":[{"segment":[0.2,81.821],"label":"Playing congas"}]},"KXcVYfMuUGw":{"duration":131.333,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KXcVYfMuUGw","annotations":[]},"T69Cadlc62E":{"duration":68.43,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T69Cadlc62E","annotations":[{"segment":[30.88346255850234,46.778578003120124],"label":"Hurling"}]},"ylo_0z8si1g":{"duration":163.75,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ylo_0z8si1g","annotations":[{"segment":[88.82968197936724,97.24658996534853],"label":"Belly dance"},{"segment":[104.1331510447878,115.86581066161021],"label":"Belly dance"},{"segment":[130.14904845600273,156.67506150273172],"label":"Belly dance"}]},"32H1n87WgCM":{"duration":168.25,"subset":"validation","resolution":"384x288","url":"https://www.youtube.com/watch?v=32H1n87WgCM","annotations":[{"segment":[9.186915286135257,12.861681400589356],"label":"Doing karate"},{"segment":[28.610679033964082,32.0229618545286],"label":"Doing karate"},{"segment":[56.69639148014901,80.58237122410067],"label":"Doing karate"},{"segment":[97.6437853269233,123.36714812810202],"label":"Doing karate"},{"segment":[143.3158781372855,155.39010962870515],"label":"Doing karate"}]},"QTfz_zprGcQ":{"duration":181.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QTfz_zprGcQ","annotations":[{"segment":[73.2174698299532,77.18282602964119],"label":"Spinning"},{"segment":[112.02131264118565,136.37992929641186],"label":"Spinning"},{"segment":[142.32796359594386,151.95811436661467],"label":"Spinning"}]},"jOUwMAGYImE":{"duration":149.1,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=jOUwMAGYImE","annotations":[{"segment":[17.208290171606865,148.57597347893915],"label":"Spinning"}]},"qmHjHk0Hzz8":{"duration":20.95,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qmHjHk0Hzz8","annotations":[]},"gxILsv1RTEI":{"duration":68.8,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gxILsv1RTEI","annotations":[{"segment":[0,64.61472145085804],"label":"Brushing teeth"}]},"r6l4mxphtX0":{"duration":76.12,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=r6l4mxphtX0","annotations":[]},"PMZLlOrkizg":{"duration":94.691,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PMZLlOrkizg","annotations":[]},"2Z_2Xh30LBg":{"duration":103.353,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=2Z_2Xh30LBg","annotations":[]},"AYhduTuI_zM":{"duration":229.3,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=AYhduTuI_zM","annotations":[{"segment":[7.154335806085305,40.779714094686234],"label":"Playing flauta"},{"segment":[84.4211625118066,90.86006473728338],"label":"Playing flauta"},{"segment":[99.08755091428148,114.11165610706061],"label":"Playing flauta"},{"segment":[133.07064599318667,144.1598664926189],"label":"Playing flauta"},{"segment":[183.86642894539781,208.90660409360373],"label":"Playing flauta"}]},"jPLJAYnjsBw":{"duration":202.07999999999998,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=jPLJAYnjsBw","annotations":[{"segment":[108.13503830265212,155.10915115132607],"label":"Getting a tattoo"}]},"LZEiFNEAyyw":{"duration":127.826,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LZEiFNEAyyw","annotations":[{"segment":[18.34629989281204,107.08655368642746],"label":"Applying sunscreen"}]},"uu4_cV49pMI":{"duration":18.251,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=uu4_cV49pMI","annotations":[{"segment":[0.01,4.071569842433697],"label":"Running a marathon"}]},"ZrAxt3ZjKdM":{"duration":43.097,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZrAxt3ZjKdM","annotations":[]},"oc4v7GPk05c":{"duration":9.47,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=oc4v7GPk05c","annotations":[{"segment":[0.5744134165366616,5.611577223088924],"label":"Playing field hockey"}]},"f6Id4KERnoI":{"duration":144.452,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=f6Id4KERnoI","annotations":[{"segment":[0.01,140.1693641653666],"label":"Painting furniture"}]},"LI3wIHFQkAk":{"duration":67.29,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LI3wIHFQkAk","annotations":[{"segment":[6.235432917316693,63.97868564742589],"label":"Playing flauta"}]},"iUiXNXRs4rU":{"duration":79.67,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iUiXNXRs4rU","annotations":[]},"AT_pPlJTiyE":{"duration":205.752,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=AT_pPlJTiyE","annotations":[{"segment":[59.06142174394459,62.913253596810534],"label":"Changing car wheel"},{"segment":[68.37001538837063,71.57987526575893],"label":"Changing car wheel"},{"segment":[77.67860903279669,84.09832878757327],"label":"Changing car wheel"},{"segment":[102.71551607642537,106.2463619415525],"label":"Changing car wheel"},{"segment":[121.65368935301629,125.18453521814342],"label":"Changing car wheel"},{"segment":[144.12270849473435,147.9745403476003],"label":"Changing car wheel"},{"segment":[153.43130213916038,160.1720077722309],"label":"Changing car wheel"},{"segment":[164.0238396224649,166.9127135101404],"label":"Changing car wheel"},{"segment":[174.2953912230889,179.11018103588142],"label":"Changing car wheel"}]},"oY1y_UONCGg":{"duration":161.36,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=oY1y_UONCGg","annotations":[{"segment":[69.22421475090296,99.17942040674824],"label":"Vacuuming floor"},{"segment":[131.6518702353536,156.5725875456787],"label":"Vacuuming floor"}]},"IeMeu-7QihI":{"duration":76.324,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=IeMeu-7QihI","annotations":[{"segment":[7.370852706607046,60.98786036915888],"label":"Wakeboarding"}]},"rZ8tVgFyY84":{"duration":150.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rZ8tVgFyY84","annotations":[{"segment":[50.23198822776911,115.40417077907607],"label":"Washing hands"}]},"-cJova7MiO8":{"duration":29.513,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-cJova7MiO8","annotations":[{"segment":[0.5507662191841759,16.339417116148905],"label":"Sumo"}]},"IWuiVS3-Z7k":{"duration":119.93,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=IWuiVS3-Z7k","annotations":[{"segment":[5.3303447737909515,92.299127925117],"label":"Paintball"}]},"mSyfGQigb8U":{"duration":202.85,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=mSyfGQigb8U","annotations":[{"segment":[38.23176383325916,197.16215222429906],"label":"Cleaning sink"}]},"fllAtkXf7pw":{"duration":36.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fllAtkXf7pw","annotations":[{"segment":[13.58389012948518,16.880399031201247],"label":"Drinking coffee"}]},"zwQYVBQnn4o":{"duration":162.22,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=zwQYVBQnn4o","annotations":[{"segment":[99.06895622,137.10327279999998],"label":"Snatch"},{"segment":[18.36586908,83.24984694000001],"label":"Snatch"}]},"tzwIHzuzG9c":{"duration":24.57,"subset":"validation","resolution":"240x176","url":"https://www.youtube.com/watch?v=tzwIHzuzG9c","annotations":[{"segment":[0.30660481747269897,24.375082989079562],"label":"Getting a haircut"}]},"Po819PkLT9I":{"duration":103.608,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Po819PkLT9I","annotations":[]},"gTevMU1l0QE":{"duration":224.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=gTevMU1l0QE","annotations":[]},"hxErUs42bBI":{"duration":56.704,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hxErUs42bBI","annotations":[]},"YED3l3gkrk4":{"duration":159.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YED3l3gkrk4","annotations":[{"segment":[50.15228734633385,135.2365161279251],"label":"Doing nails"}]},"HCM-Hrxa5k8":{"duration":60.51,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HCM-Hrxa5k8","annotations":[]},"uG_hgODoDes":{"duration":207.726,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uG_hgODoDes","annotations":[{"segment":[33.047522620904836,42.11939157566303],"label":"Grooming dog"},{"segment":[44.387358814352574,61.23511544461779],"label":"Grooming dog"},{"segment":[61.88310608424337,83.91478783151327],"label":"Grooming dog"},{"segment":[83.91478783151327,121.4982449297972],"label":"Grooming dog"},{"segment":[122.79422620904836,127.65415600624026],"label":"Grooming dog"},{"segment":[132.51408580343215,152.29246489859594],"label":"Grooming dog"},{"segment":[154.56065054602183,171.41002964118564],"label":"Grooming dog"},{"segment":[172.706135725429,186.3152496099844],"label":"Grooming dog"}]},"mDaZqz7lB0o":{"duration":221.36,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=mDaZqz7lB0o","annotations":[{"segment":[33.151574814012974,146.07412652424466],"label":"Playing piano"}]},"MleVjHU-wI8":{"duration":232.32,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=MleVjHU-wI8","annotations":[{"segment":[23.92017574580534,201.14693240790854],"label":"Wrapping presents"}]},"r0685Fhcfjs":{"duration":64.064,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r0685Fhcfjs","annotations":[{"segment":[0.9376721148268085,60.489816974639886],"label":"Spread mulch"}]},"c6nEk5N4fSU":{"duration":95.156,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c6nEk5N4fSU","annotations":[{"segment":[24.642421386651385,30.431905929298395],"label":"Elliptical trainer"},{"segment":[39.63570186889108,51.3631191950078],"label":"Elliptical trainer"},{"segment":[56.26191380031201,75.4117472574103],"label":"Elliptical trainer"}]},"BbeZ2t__2iA":{"duration":9.43,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=BbeZ2t__2iA","annotations":[{"segment":[0.009999999999999998,9.4273],"label":"Drinking coffee"}]},"JguRZtFN-Ck":{"duration":25.64,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=JguRZtFN-Ck","annotations":[]},"WXwW9nf6suY":{"duration":118.283,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WXwW9nf6suY","annotations":[]},"Q0UzDeDra3U":{"duration":180.164,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Q0UzDeDra3U","annotations":[{"segment":[0.01,164.9653151326053],"label":"Changing car wheel"}]},"YdfWewUrcTw":{"duration":186.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YdfWewUrcTw","annotations":[{"segment":[48.7409321372855,69.11027691107644],"label":"Skateboarding"},{"segment":[71.43820202808112,81.62287441497661],"label":"Skateboarding"},{"segment":[91.22556552262091,99.37330343213729],"label":"Skateboarding"},{"segment":[107.23005070202808,138.65703978159127],"label":"Skateboarding"}]},"BH9zsAu6OMQ":{"duration":136.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BH9zsAu6OMQ","annotations":[{"segment":[30.567666271450857,40.96916382215288],"label":"Triple jump"}]},"2RpAv3irv1c":{"duration":161.426,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2RpAv3irv1c","annotations":[{"segment":[52.381319720816045,154.6256248393136],"label":"Braiding hair"}]},"Mu8-AwcMqaw":{"duration":146.309,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Mu8-AwcMqaw","annotations":[{"segment":[43.82408948602842,104.7669639275367],"label":"Braiding hair"}]},"An_CpsJkJMM":{"duration":28.03,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=An_CpsJkJMM","annotations":[{"segment":[15.171903195007799,28.026484],"label":"Washing face"}]},"3oy4P1gyU4k":{"duration":225.606,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=3oy4P1gyU4k","annotations":[]},"l3wFDLyBQ9U":{"duration":29.03,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=l3wFDLyBQ9U","annotations":[]},"0e7-mNDgIXw":{"duration":195.42,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0e7-mNDgIXw","annotations":[{"segment":[5.487579992995638,7.316773323994185],"label":"Skiing"},{"segment":[37.498463285470194,42.071446612966554],"label":"Skiing"},{"segment":[62.49743880911699,69.50934657794474],"label":"Skiing"},{"segment":[80.78937211910245,86.88668322243093],"label":"Skiing"},{"segment":[90.54506988442803,100.91049876008645],"label":"Skiing"},{"segment":[111.58079319091131,146.03060092471725],"label":"Skiing"}]},"Ar6TcWqwK-A":{"duration":216.62,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=Ar6TcWqwK-A","annotations":[]},"nn__PD7zCcs":{"duration":41.332,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nn__PD7zCcs","annotations":[{"segment":[0.01,41.331519],"label":"Plastering"}]},"nOxKfRwdf2w":{"duration":228.07,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nOxKfRwdf2w","annotations":[{"segment":[16.722496948857543,25.973239764430577],"label":"Ballet"},{"segment":[35.22398269422777,58.70663782371295],"label":"Ballet"},{"segment":[64.39940270358815,101.40237442277692],"label":"Ballet"},{"segment":[118.1248712574103,218.1040544602184],"label":"Ballet"}]},"dKwPfFN7DpM":{"duration":90.51,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dKwPfFN7DpM","annotations":[{"segment":[4.374675507020281,87.77574726989079],"label":"Hammer throw"}]},"Jy9kiITFsjI":{"duration":88.77,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Jy9kiITFsjI","annotations":[{"segment":[9.27859969364761,88.7698866213152],"label":"Bullfighting"}]},"cIlE202sClg":{"duration":33.715,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=cIlE202sClg","annotations":[]},"VToS7Ma-V4g":{"duration":49.23,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=VToS7Ma-V4g","annotations":[{"segment":[4.377377868954758,13.90009463650546],"label":"Triple jump"},{"segment":[27.493004861154446,40.5483423650546],"label":"Triple jump"}]},"sS1Zh8mqMOE":{"duration":108.113,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=sS1Zh8mqMOE","annotations":[{"segment":[20.661050442017682,83.23451749498551],"label":"Arm wrestling"}]},"6-P9SRNbwac":{"duration":116.216,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=6-P9SRNbwac","annotations":[{"segment":[14.373115909861912,90.40219105004883],"label":"Building sandcastles"}]},"IseUmPHr054":{"duration":151.3,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IseUmPHr054","annotations":[]},"0p3JN4KJsUs":{"duration":97.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0p3JN4KJsUs","annotations":[{"segment":[13.046374414976599,87.24660686427457],"label":"Ballet"}]},"Mk1gOZ5EOUk":{"duration":86.518,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Mk1gOZ5EOUk","annotations":[{"segment":[0.01,85.5328159126365],"label":"River tubing"}]},"tCkHrK6mRME":{"duration":124.111,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tCkHrK6mRME","annotations":[{"segment":[0.01,124.021],"label":"Disc dog"}]},"tGuPkIKGIK4":{"duration":94.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tGuPkIKGIK4","annotations":[{"segment":[1.6368337,69.87539001560063],"label":"Bathing dog"}]},"19fGZFUE7yE":{"duration":112.176,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=19fGZFUE7yE","annotations":[{"segment":[35.53974611801617,111.02239217409094],"label":"Playing blackjack"}]},"_SJOVswvGRc":{"duration":40.24,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_SJOVswvGRc","annotations":[]},"eg3oZ4XCEKk":{"duration":184.715,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eg3oZ4XCEKk","annotations":[{"segment":[14.404134165366616,18.43729173166927],"label":"Running a marathon"},{"segment":[23.046614664586585,25.351276131045243],"label":"Running a marathon"},{"segment":[30.24868174726989,33.12950858034321],"label":"Running a marathon"},{"segment":[63.666273010920435,74.32533229329174],"label":"Running a marathon"},{"segment":[75.7657457098284,78.07886271450859],"label":"Running a marathon"},{"segment":[101.41608736349455,114.9574399375975],"label":"Running a marathon"},{"segment":[121.87217316692669,130.80370358814355],"label":"Running a marathon"},{"segment":[133.68484243369736,144.92128393135727],"label":"Running a marathon"},{"segment":[150.6835616224649,169.12285023400938],"label":"Running a marathon"}]},"ZRH7JzcSm1s":{"duration":61.93,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ZRH7JzcSm1s","annotations":[]},"NVNUT5iwLtw":{"duration":101.43,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=NVNUT5iwLtw","annotations":[{"segment":[0.01,101.281],"label":"Hand washing clothes"}]},"S5zweEQSnho":{"duration":135.4,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S5zweEQSnho","annotations":[{"segment":[55.446549290543054,78.04761890611678],"label":"Tennis serve with ball bouncing"},{"segment":[52.91184921294852,79.10374263572544],"label":"Tennis serve with ball bouncing"}]},"bghOBRA0z-Y":{"duration":204.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bghOBRA0z-Y","annotations":[{"segment":[89.55565955971572,106.7656439590917],"label":"Playing harmonica"},{"segment":[119.51377944617784,129.39358521372856],"label":"Playing harmonica"},{"segment":[149.7906035725429,160.94522298751951],"label":"Playing harmonica"}]},"fdHpRUOSi28":{"duration":78.53,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fdHpRUOSi28","annotations":[{"segment":[5.390507037968594,74.4870048049922],"label":"Clipping cat claws"}]},"QImpWj3AJcY":{"duration":155.481,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QImpWj3AJcY","annotations":[]},"VEwZJ-5u_Xs":{"duration":153.299,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=VEwZJ-5u_Xs","annotations":[{"segment":[0.19999999999999998,142.73850155763242],"label":"Kite flying"}]},"-uJnucdW6DY":{"duration":220.07999999999998,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=-uJnucdW6DY","annotations":[]},"kl-2y3jtPNE":{"duration":99.28999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kl-2y3jtPNE","annotations":[]},"08ItAtlb60Q":{"duration":80.179,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=08ItAtlb60Q","annotations":[{"segment":[40.96347214275118,65.69142064174454],"label":"BMX"}]},"9LraB2HBC0g":{"duration":184.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9LraB2HBC0g","annotations":[{"segment":[59.874629485179405,119.89284321372854],"label":"Shaving legs"}]},"3kcGcDv0NOw":{"duration":73.65,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=3kcGcDv0NOw","annotations":[{"segment":[1.5495093603744148,5.107641965678627],"label":"Discus throw"},{"segment":[26.34165912636505,33.22836739469579],"label":"Discus throw"},{"segment":[55.49539079563182,61.349092823712944],"label":"Discus throw"}]},"PDjtB578yRk":{"duration":148.167,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PDjtB578yRk","annotations":[{"segment":[39.69570602072604,141.0120719689745],"label":"Capoeira"}]},"JZDTcGPKw2w":{"duration":195.33,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JZDTcGPKw2w","annotations":[{"segment":[9.44410979160255,12.79524552410668],"label":"Horseback riding"},{"segment":[19.49751698911494,22.239355315709226],"label":"Horseback riding"},{"segment":[35.33924954277083,40.51827749300448],"label":"Horseback riding"}]},"_IlyE43ZUH8":{"duration":230.34,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_IlyE43ZUH8","annotations":[{"segment":[99.16236505460218,146.2285600624025],"label":"Cleaning windows"}]},"CG-itBlFOzc":{"duration":49.041,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=CG-itBlFOzc","annotations":[{"segment":[0.01,28.919384889681304],"label":"Knitting"}]},"leJM3mgm_gU":{"duration":3.6,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=leJM3mgm_gU","annotations":[{"segment":[0.8256106,3.42470043],"label":"Playing kickball"}]},"JJ0jtr07EGE":{"duration":81.363,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JJ0jtr07EGE","annotations":[{"segment":[0.2,72.12558285989357],"label":"Riding bumper cars"}]},"zjbxaEVgzyE":{"duration":51.224,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=zjbxaEVgzyE","annotations":[{"segment":[0.20000000000000004,30.01899531981279],"label":"Sumo"}]},"d6uMS_ZgMg4":{"duration":110.67,"subset":"training","resolution":"624x360","url":"https://www.youtube.com/watch?v=d6uMS_ZgMg4","annotations":[{"segment":[15.365523990639625,99.27164375975039],"label":"Spinning"}]},"YtV4sdy7bqE":{"duration":203.97,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YtV4sdy7bqE","annotations":[]},"3C00m8lkJi0":{"duration":54.242,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3C00m8lkJi0","annotations":[{"segment":[5.820630741675857,42.17849015915985],"label":"Spread mulch"}]},"vlBzrnOuW9U":{"duration":28.49,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=vlBzrnOuW9U","annotations":[{"segment":[2.684589152106084,5.1251247449297965],"label":"Playing kickball"},{"segment":[17.682789704368172,18.43713706942278],"label":"Playing kickball"}]},"qVikbiCtK7k":{"duration":212.161,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qVikbiCtK7k","annotations":[]},"df4QVzHEHeo":{"duration":194.259,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=df4QVzHEHeo","annotations":[]},"nDG3MsnLE20":{"duration":60.721,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nDG3MsnLE20","annotations":[{"segment":[24.281866614664587,32.99115210608424],"label":"Curling"},{"segment":[40.375111544461774,56.373690327613105],"label":"Curling"}]},"QoTM5tmcJeI":{"duration":31.092,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=QoTM5tmcJeI","annotations":[{"segment":[2.466045013345389,28.19028696668909],"label":"Table soccer"}]},"TypqJox_rts":{"duration":35.83,"subset":"testing","resolution":"640x352","url":"https://www.youtube.com/watch?v=TypqJox_rts","annotations":[]},"KcSo4p-hhBs":{"duration":90.63,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=KcSo4p-hhBs","annotations":[{"segment":[18.58125381,89.47405063260531],"label":"Getting a haircut"}]},"8A4cZXyDrkU":{"duration":54.173,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8A4cZXyDrkU","annotations":[]},"EM7A0cTCrNw":{"duration":219.267,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EM7A0cTCrNw","annotations":[]},"HSo1WlL5CcI":{"duration":64.9,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=HSo1WlL5CcI","annotations":[]},"ENIllA4z1Aw":{"duration":27.12,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ENIllA4z1Aw","annotations":[]},"f9Bo4yVcnDE":{"duration":129.85,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=f9Bo4yVcnDE","annotations":[{"segment":[43.14695023719316,43.95722160314984],"label":"Chopping wood"},{"segment":[80.62200091268957,126.60489957098285],"label":"Chopping wood"}]},"C5SXR9nMY3I":{"duration":62.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=C5SXR9nMY3I","annotations":[]},"lX2rbXJpScw":{"duration":123.716,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lX2rbXJpScw","annotations":[]},"dFSGN8U8EnA":{"duration":106.418,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=dFSGN8U8EnA","annotations":[{"segment":[0.2,106.2166439909297],"label":"Applying sunscreen"}]},"qpVPY6w9Fp0":{"duration":234.336,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=qpVPY6w9Fp0","annotations":[{"segment":[45.68408736349454,205.4158221528861],"label":"Building sandcastles"}]},"caeb6tAThNI":{"duration":25.5,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=caeb6tAThNI","annotations":[]},"B39pJK4FU1o":{"duration":44.24,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B39pJK4FU1o","annotations":[{"segment":[17.58386115444618,44.13204368174727],"label":"Hopscotch"}]},"IaQGTPWI_KE":{"duration":229.692,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IaQGTPWI_KE","annotations":[]},"bd3Df5_QMNw":{"duration":60.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bd3Df5_QMNw","annotations":[{"segment":[30.774806864274574,60.51120181405896],"label":"Brushing teeth"}]},"Hc7A90HiA28":{"duration":147.192,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Hc7A90HiA28","annotations":[{"segment":[3.4432371294851793,112.47907956318252],"label":"Tug of war"}]},"f4k-dKaEZog":{"duration":9.03,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=f4k-dKaEZog","annotations":[{"segment":[0,8.990286923556942],"label":"Playing lacrosse"}]},"cptHZTGmOng":{"duration":152.323,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=cptHZTGmOng","annotations":[]},"HWymkBdgm1M":{"duration":143.96,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=HWymkBdgm1M","annotations":[{"segment":[21.111684074882994,33.239672798751954],"label":"Playing piano"},{"segment":[33.4642651825273,40.20203669578783],"label":"Playing piano"},{"segment":[50.30869396567863,87.5910296723869],"label":"Playing piano"}]},"Kdf1HhynNx8":{"duration":204.638,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=Kdf1HhynNx8","annotations":[{"segment":[45.33303279563183,204.637141],"label":"Gargling mouthwash"}]},"rbnlUn4UNSk":{"duration":199.622,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rbnlUn4UNSk","annotations":[{"segment":[0.2,199.381],"label":"Scuba diving"}]},"i1s-HuRTr5k":{"duration":165.791,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=i1s-HuRTr5k","annotations":[{"segment":[0.01,156.22066599063962],"label":"Putting on shoes"}]},"lx5Yfoj7Gk8":{"duration":101.565,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lx5Yfoj7Gk8","annotations":[{"segment":[12.555712702350082,87.19420491418809],"label":"Hanging wallpaper"}]},"UOF3w4T-bBE":{"duration":32.39,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=UOF3w4T-bBE","annotations":[{"segment":[1.42534256,30.9067511700468],"label":"Shaving legs"}]},"FoPNGc6Lg8k":{"duration":144.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FoPNGc6Lg8k","annotations":[{"segment":[9.458845553822155,92.78677067082685],"label":"Using the balance beam"}]},"B5Ea3Bs8hC4":{"duration":73.52,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=B5Ea3Bs8hC4","annotations":[{"segment":[2.0643662246489862,68.58283346333853],"label":"Hand washing clothes"}]},"86ZBHRSW6Gc":{"duration":222.22,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=86ZBHRSW6Gc","annotations":[]},"OESjoURo19U":{"duration":189.17,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=OESjoURo19U","annotations":[]},"i9Yxt2k2aWg":{"duration":35.434,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i9Yxt2k2aWg","annotations":[{"segment":[14.195344982895897,18.48242909504543],"label":"Doing crunches"}]},"GdkZLDpwk2o":{"duration":36.11,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GdkZLDpwk2o","annotations":[]},"cvuSpqwxRYE":{"duration":50.504,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=cvuSpqwxRYE","annotations":[{"segment":[0.01,50.50340136054422],"label":"Bullfighting"}]},"r-nPLO3vHfU":{"duration":18.785,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=r-nPLO3vHfU","annotations":[]},"cdiazgshHuc":{"duration":52.01,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cdiazgshHuc","annotations":[{"segment":[4.047184867394696,17.70132371294852],"label":"Grooming horse"}]},"1UIathRb404":{"duration":128.29,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1UIathRb404","annotations":[{"segment":[3.1520853,121.48546105460217],"label":"Playing field hockey"}]},"mV5DfYFg4H0":{"duration":87.099,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mV5DfYFg4H0","annotations":[{"segment":[15.513673727172186,75.18164880505381],"label":"Clipping cat claws"}]},"YJxqF6aSs7s":{"duration":147.121,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YJxqF6aSs7s","annotations":[{"segment":[33.17663693782334,121.03752249922239],"label":"Rafting"}]},"HPULzvqZ6SQ":{"duration":114.15,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HPULzvqZ6SQ","annotations":[]},"jQgAdClKvDE":{"duration":193.957,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jQgAdClKvDE","annotations":[{"segment":[138.76527678489091,180.44923005591895],"label":"Wakeboarding"}]},"56McYSkE4uQ":{"duration":49.087,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=56McYSkE4uQ","annotations":[{"segment":[0.30631502652106085,49.08698412698413],"label":"Playing rubik cube"}]},"ZmVy3gSaKv0":{"duration":105.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZmVy3gSaKv0","annotations":[{"segment":[11.205421234536455,104.80364398127925],"label":"Springboard diving"}]},"XxW4XWySzOw":{"duration":153.693,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=XxW4XWySzOw","annotations":[]},"R-H_kkrXtmM":{"duration":192.42,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=R-H_kkrXtmM","annotations":[]},"Mt42eefIErM":{"duration":26.63,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=Mt42eefIErM","annotations":[]},"er5jUsRr4y0":{"duration":119.351,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=er5jUsRr4y0","annotations":[]},"ga_pm6X1zUQ":{"duration":59.769,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ga_pm6X1zUQ","annotations":[]},"E0niuPtg16o":{"duration":65.02,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E0niuPtg16o","annotations":[{"segment":[11.25854765522621,58.92987556474258],"label":"Layup drill in basketball"}]},"gVX9h58RsPU":{"duration":155.876,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=gVX9h58RsPU","annotations":[]},"_RCe4Q0p1aA":{"duration":89.53999999999999,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=_RCe4Q0p1aA","annotations":[{"segment":[42.94683776,81.19661696],"label":"Clean and jerk"}]},"u3B3M17akWU":{"duration":35.899,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=u3B3M17akWU","annotations":[{"segment":[0.2,30.266460218408735],"label":"Having an ice cream"}]},"FEBEqdo45wM":{"duration":50.829,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FEBEqdo45wM","annotations":[]},"EBOT5e2xAoQ":{"duration":98.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=EBOT5e2xAoQ","annotations":[{"segment":[2.60627763,86.32168954758191],"label":"High jump"}]},"ZVIi4lPU6h0":{"duration":69.405,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZVIi4lPU6h0","annotations":[{"segment":[13.317857429017161,43.5266559875195],"label":"Carving jack-o-lanterns"}]},"uQsL4kCWu8U":{"duration":21.71,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=uQsL4kCWu8U","annotations":[]},"YotaXgC6Ee8":{"duration":110.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YotaXgC6Ee8","annotations":[{"segment":[22.581567059999998,33.86206942277691],"label":"Springboard diving"},{"segment":[3.5326840873634944,6.634553042121684],"label":"Springboard diving"},{"segment":[9.047117784711388,13.527595163806552],"label":"Springboard diving"},{"segment":[20.2483112324493,26.45204914196568],"label":"Springboard diving"},{"segment":[70.91217082683306,74.01403978159126],"label":"Springboard diving"},{"segment":[94.52084009360374,98.65666536661467],"label":"Springboard diving"}]},"MFlYmFZ7xlo":{"duration":34.784,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MFlYmFZ7xlo","annotations":[{"segment":[0.2,3.846772369158878],"label":"Mooping floor"},{"segment":[12.461375280373831,16.09142807943925],"label":"Mooping floor"}]},"1gp-5iOIfVo":{"duration":169.645,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1gp-5iOIfVo","annotations":[{"segment":[44.991488285381756,162.49866809672386],"label":"Running a marathon"}]},"rDGbxMgkt7w":{"duration":189.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rDGbxMgkt7w","annotations":[{"segment":[8.550702028081124,164.52730109204367],"label":"Zumba"}]},"sY8TfKQHe5w":{"duration":28.166,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=sY8TfKQHe5w","annotations":[{"segment":[5.045109204368175,17.24111232449298],"label":"Carving jack-o-lanterns"}]},"ZIHD-3iWCrE":{"duration":57.911,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZIHD-3iWCrE","annotations":[{"segment":[2.8006417535101407,55.380432093603744],"label":"Clipping cat claws"}]},"afL6f_pwgMs":{"duration":120.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=afL6f_pwgMs","annotations":[{"segment":[0.01,120.92952380952381],"label":"River tubing"}]},"mzVJHw9Jrb4":{"duration":140.713,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mzVJHw9Jrb4","annotations":[{"segment":[5.597784114409881,51.03861986667834],"label":"Rollerblading"},{"segment":[54.98999688861472,120.62675964189137],"label":"Rollerblading"}]},"lKCDcLEby-Y":{"duration":139.645,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lKCDcLEby-Y","annotations":[{"segment":[26.8368093661133,101.64930780848091],"label":"Tug of war"}]},"aBPtU0R0zx8":{"duration":123.14,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aBPtU0R0zx8","annotations":[]},"reL2X6B87Rs":{"duration":113.03999999999999,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=reL2X6B87Rs","annotations":[]},"mGLCCohSA0A":{"duration":169.924,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mGLCCohSA0A","annotations":[{"segment":[51.95772531045241,156.13826636661466],"label":"Laying tile"}]},"nYisYkLo5xM":{"duration":98.941,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nYisYkLo5xM","annotations":[]},"MyqWp3ipXxA":{"duration":219.081,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=MyqWp3ipXxA","annotations":[{"segment":[30.464008621117838,36.94769268664436],"label":"Carving jack-o-lanterns"},{"segment":[41.042651043819006,59.811210180869466],"label":"Carving jack-o-lanterns"},{"segment":[144.44034956247881,181.63622130681514],"label":"Carving jack-o-lanterns"},{"segment":[205.5234773501062,215.0783801351529],"label":"Carving jack-o-lanterns"}]},"jXZ6WVxXgLA":{"duration":12.911,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=jXZ6WVxXgLA","annotations":[{"segment":[0.2,12.681000000000001],"label":"Raking leaves"}]},"K9ccE4wrTts":{"duration":86.89,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=K9ccE4wrTts","annotations":[{"segment":[23.574106084243372,55.996945397815914],"label":"Shaving"}]},"DEduSDgovOQ":{"duration":170.621,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=DEduSDgovOQ","annotations":[{"segment":[0.01,28.037432078179002],"label":"Grooming dog"},{"segment":[30.699213733118665,42.14487484935923],"label":"Grooming dog"},{"segment":[55.45378312405757,165.65154258842554],"label":"Grooming dog"}]},"TnvAN5iwpIw":{"duration":93.46000000000001,"subset":"validation","resolution":"480x272","url":"https://www.youtube.com/watch?v=TnvAN5iwpIw","annotations":[{"segment":[57.00921028393135,58.02983553198128],"label":"Playing saxophone"},{"segment":[59.77947881435257,64.88260505460218],"label":"Playing saxophone"},{"segment":[67.36126637129485,78.15073327925117],"label":"Playing saxophone"}]},"AF6tasHTW-8":{"duration":72.678,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AF6tasHTW-8","annotations":[{"segment":[7.483275631825273,18.368040187207487],"label":"Rope skipping"},{"segment":[20.86246539781591,65.87550215288611],"label":"Rope skipping"}]},"1SdH6HFnOZA":{"duration":123.298,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=1SdH6HFnOZA","annotations":[{"segment":[0.01,75.402183975039],"label":"Beach soccer"}]},"EsdNBkcubkw":{"duration":143.22,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EsdNBkcubkw","annotations":[]},"bQ7CWiB6TAg":{"duration":59.05,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=bQ7CWiB6TAg","annotations":[]},"RzMKERQ9vOU":{"duration":157.014,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=RzMKERQ9vOU","annotations":[{"segment":[92.89639225958295,140.98553222847875],"label":"Doing crunches"}]},"lb-TlBtrKcU":{"duration":204.82,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=lb-TlBtrKcU","annotations":[{"segment":[49.98234009360374,56.369859594383776],"label":"Archery"},{"segment":[143.5595007800312,162.4026833073323],"label":"Archery"},{"segment":[186.3558814352574,193.06277691107644],"label":"Archery"}]},"BcflqWdlBjI":{"duration":4.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BcflqWdlBjI","annotations":[{"segment":[0,4.596666],"label":"Chopping wood"}]},"oA8ZUG1y4Lc":{"duration":113.08,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=oA8ZUG1y4Lc","annotations":[{"segment":[3.881101312079694,106.90669977819522],"label":"Cumbia"}]},"vGf06QD6AB4":{"duration":203.01,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=vGf06QD6AB4","annotations":[]},"KxWvKN_kGMY":{"duration":106.673,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KxWvKN_kGMY","annotations":[{"segment":[60.90815074235622,90.19731612665866],"label":"Canoeing"}]},"-xQvJmC2jhk":{"duration":95.086,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=-xQvJmC2jhk","annotations":[{"segment":[7.445822715158088,63.28276009989329],"label":"Canoeing"}]},"bmoS216hsoc":{"duration":122.3,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bmoS216hsoc","annotations":[{"segment":[15.672762451581864,116.63652413382485],"label":"Playing flauta"}]},"DyXsrXlQMr4":{"duration":86.96000000000001,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=DyXsrXlQMr4","annotations":[]},"DXG4djsPk7s":{"duration":19.48,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=DXG4djsPk7s","annotations":[{"segment":[0,19.441],"label":"Windsurfing"}]},"2-xMWBQA7iM":{"duration":74.792,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2-xMWBQA7iM","annotations":[{"segment":[14.695609984399374,43.27040717628705],"label":"Using the rowing machine"}]},"b1uGF3c3Vz4":{"duration":71.448,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b1uGF3c3Vz4","annotations":[{"segment":[30.863677067082687,62.73014508580344],"label":"Clipping cat claws"}]},"oO1g33vi4hg":{"duration":220.915,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=oO1g33vi4hg","annotations":[{"segment":[2.5848047620904837,39.805993336193445],"label":"Snowboarding"},{"segment":[54.97018127379095,205.5781387449298],"label":"Snowboarding"}]},"2zQk4ZEfzf4":{"duration":37.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2zQk4ZEfzf4","annotations":[{"segment":[0,37.940883],"label":"Hammer throw"}]},"0Y7c4uPB1tE":{"duration":223.59,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0Y7c4uPB1tE","annotations":[]},"wIK83guBfM0":{"duration":44.42,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=wIK83guBfM0","annotations":[{"segment":[6.473671606864275,11.73569344773791],"label":"Carving jack-o-lanterns"}]},"Kl9jdsVmB2A":{"duration":87.958,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Kl9jdsVmB2A","annotations":[]},"ZjYttT9itfY":{"duration":184.04,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=ZjYttT9itfY","annotations":[{"segment":[18.649929797191888,22.66683775351014],"label":"Shot put"},{"segment":[32.99602964118565,39.02139157566303],"label":"Shot put"},{"segment":[66.56590327613105,115.62956474258971],"label":"Shot put"},{"segment":[117.06417472698908,140.59177847113884],"label":"Shot put"},{"segment":[148.0517503900156,159.24170826833074],"label":"Shot put"}]},"t3UI2gjo8X8":{"duration":65.69,"subset":"training","resolution":"630x360","url":"https://www.youtube.com/watch?v=t3UI2gjo8X8","annotations":[{"segment":[8.65669398673947,61.415548361934476],"label":"Playing water polo"}]},"uFhZhnlYKRw":{"duration":143.871,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=uFhZhnlYKRw","annotations":[{"segment":[49.602894224648985,128.6084090078003],"label":"Making a lemonade"}]},"d_JH9U-UI3c":{"duration":142.13,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=d_JH9U-UI3c","annotations":[{"segment":[0.01,132.26232877342306],"label":"Baton twirling"}]},"x6Gs4PINiiI":{"duration":157.53,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=x6Gs4PINiiI","annotations":[{"segment":[1.2287376833073322,4.914950733229329],"label":"Hopscotch"},{"segment":[6.389435953198128,11.304386686427458],"label":"Hopscotch"},{"segment":[13.516114516380654,17.93957017628705],"label":"Hopscotch"},{"segment":[21.871530762870513,64.14010706864275],"label":"Hopscotch"}]},"l5HNvNpRTpk":{"duration":172.46,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=l5HNvNpRTpk","annotations":[{"segment":[46.27487011861426,163.57628395631826],"label":"Making a sandwich"}]},"k4a5XXRYKK4":{"duration":45.09,"subset":"training","resolution":"482x360","url":"https://www.youtube.com/watch?v=k4a5XXRYKK4","annotations":[{"segment":[1.055221918876755,44.03792808112325],"label":"Playing lacrosse"}]},"Dh3bLRYJkiY":{"duration":59.0,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Dh3bLRYJkiY","annotations":[{"segment":[5.303249200000001,27.9395856],"label":"Long jump"},{"segment":[31.545300800000003,51.545605200000004],"label":"Long jump"}]},"1KONAdZGdWU":{"duration":200.99,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=1KONAdZGdWU","annotations":[]},"VEXswbPrwxE":{"duration":79.016,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VEXswbPrwxE","annotations":[{"segment":[0.01,79.001],"label":"Bullfighting"}]},"q9nloCMmFuo":{"duration":120.14,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q9nloCMmFuo","annotations":[]},"kDj0WKEI-i8":{"duration":223.03,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kDj0WKEI-i8","annotations":[]},"GbOJ1Yqxyx0":{"duration":87.725,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GbOJ1Yqxyx0","annotations":[]},"xdl0lyO9ub0":{"duration":222.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xdl0lyO9ub0","annotations":[{"segment":[43.643569422776906,172.8423900156006],"label":"Clean and jerk"}]},"V_rril47Z5Q":{"duration":164.816,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=V_rril47Z5Q","annotations":[{"segment":[14.141713097095312,160.70128412636504],"label":"Baton twirling"}]},"FxpP7lqvm30":{"duration":97.849,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FxpP7lqvm30","annotations":[]},"kv8lsSjfguI":{"duration":120.512,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kv8lsSjfguI","annotations":[{"segment":[12.215234009360374,102.79589235569424],"label":"Disc dog"}]},"Yr6Oyltj6Eo":{"duration":52.2,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=Yr6Oyltj6Eo","annotations":[{"segment":[0.01,52.198298],"label":"High jump"}]},"VBpE3V49xEg":{"duration":236.403,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VBpE3V49xEg","annotations":[]},"9elfMU_LRKc":{"duration":43.86,"subset":"validation","resolution":"202x360","url":"https://www.youtube.com/watch?v=9elfMU_LRKc","annotations":[{"segment":[13.411932336973479,22.376029970358815],"label":"Fixing bicycle"}]},"v13d7g_uUXM":{"duration":167.49,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v13d7g_uUXM","annotations":[{"segment":[19.093290000000003,64.4013322],"label":"Clean and jerk"},{"segment":[73.85586045,159.47129843447738],"label":"Clean and jerk"}]},"cIaqen3kVIA":{"duration":145.4,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cIaqen3kVIA","annotations":[{"segment":[30.04670046801872,113.49715912636505],"label":"Playing flauta"},{"segment":[122.34109360374414,139.5754274570983],"label":"Playing flauta"}]},"uzgIBU9arcU":{"duration":25.333,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uzgIBU9arcU","annotations":[{"segment":[0.19999999999999998,24.118146645865835],"label":"Swinging at the playground"}]},"qXUdnj4VN80":{"duration":150.489,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qXUdnj4VN80","annotations":[{"segment":[0.2,143.16443057722307],"label":"Hula hoop"}]},"Dod5s-yv0es":{"duration":84.41,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Dod5s-yv0es","annotations":[]},"AxaksczuL80":{"duration":150.79,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=AxaksczuL80","annotations":[{"segment":[60.45729964164553,135.73487110109204],"label":"Shaving"}]},"HURRgbZGz8Q":{"duration":206.87,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HURRgbZGz8Q","annotations":[]},"sz1772O9AUM":{"duration":90.837,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sz1772O9AUM","annotations":[{"segment":[30.467768261750876,61.644088198127925],"label":"Hand car wash"}]},"FDIYymZEFi0":{"duration":24.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FDIYymZEFi0","annotations":[{"segment":[4.443451664597196,18.436426643460297],"label":"Washing hands"}]},"XkWO7aqcC8Y":{"duration":154.32,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XkWO7aqcC8Y","annotations":[{"segment":[39.71351794071763,138.63628081123247],"label":"Laying tile"}]},"Ci__IRtoMOo":{"duration":208.771,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ci__IRtoMOo","annotations":[{"segment":[0.2,198.38077781557274],"label":"Riding bumper cars"}]},"zqxzFxya6T8":{"duration":42.75,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zqxzFxya6T8","annotations":[]},"yGTIEmZ7S2w":{"duration":97.62,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=yGTIEmZ7S2w","annotations":[]},"2-h36nfbFK8":{"duration":180.675,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2-h36nfbFK8","annotations":[{"segment":[0,106.54438147581902],"label":"Snowboarding"},{"segment":[133.8851354524181,166.58129484711387],"label":"Snowboarding"}]},"7845vSLQoG8":{"duration":28.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7845vSLQoG8","annotations":[{"segment":[7.9317841028037375,23.929789327102803],"label":"Painting fence"}]},"lOZ9bfzq89o":{"duration":89.1,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=lOZ9bfzq89o","annotations":[]},"0yGGccaHMnI":{"duration":46.069,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=0yGGccaHMnI","annotations":[{"segment":[0.20000000000000004,45.86839002267573],"label":"Powerbocking"}]},"u3XOLSC7lg0":{"duration":33.67,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u3XOLSC7lg0","annotations":[]},"84x9A9xyLcE":{"duration":111.46000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=84x9A9xyLcE","annotations":[{"segment":[26.416461778471138,107.5775647425897],"label":"Windsurfing"}]},"RIvAh7e3XSk":{"duration":66.665,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RIvAh7e3XSk","annotations":[]},"Nc71GzZhAHE":{"duration":213.229,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Nc71GzZhAHE","annotations":[{"segment":[25.605923556942276,68.17161466458658],"label":"Skiing"},{"segment":[111.40239469578785,206.1775663026521],"label":"Skiing"}]},"jxAZc0VgG4w":{"duration":99.89,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=jxAZc0VgG4w","annotations":[{"segment":[3.7400906321967167,32.88162971294852],"label":"Triple jump"},{"segment":[50.64705998439938,79.47692489859594],"label":"Triple jump"}]},"zSvsPo5jbn8":{"duration":113.87,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=zSvsPo5jbn8","annotations":[]},"0O589B9zr84":{"duration":114.92,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0O589B9zr84","annotations":[{"segment":[60.431869119999995,85.48866926677067],"label":"Hand washing clothes"}]},"VLg5XqBRorY":{"duration":185.899,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=VLg5XqBRorY","annotations":[{"segment":[11.018280811232449,152.226248049922],"label":"Making an omelette"}]},"r0WjqMOxP-Y":{"duration":45.77,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=r0WjqMOxP-Y","annotations":[]},"8dhDEj0Dd5s":{"duration":157.339,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=8dhDEj0Dd5s","annotations":[{"segment":[0.009999999999999998,157.281],"label":"Baton twirling"}]},"kaRZaCGzNzw":{"duration":151.42000000000002,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=kaRZaCGzNzw","annotations":[{"segment":[24.59298649620823,56.904782837329726],"label":"Shaving"},{"segment":[94.40533479527366,114.924504734526],"label":"Shaving"}]},"gPjH9C9wdJw":{"duration":183.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gPjH9C9wdJw","annotations":[{"segment":[3.5137692066052795,178.16110286488686],"label":"Playing guitarra"}]},"7MWDmMh3zyA":{"duration":36.525,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=7MWDmMh3zyA","annotations":[{"segment":[1.0957536503087695,30.774136827878863],"label":"River tubing"}]},"XqmMZs2-3ZI":{"duration":104.537,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XqmMZs2-3ZI","annotations":[{"segment":[30.170279095163803,76.64881716068643],"label":"Baking cookies"}]},"B-j1ZCJHuD0":{"duration":96.62,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=B-j1ZCJHuD0","annotations":[]},"eUvvxpAK3_8":{"duration":75.47,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eUvvxpAK3_8","annotations":[{"segment":[2.6676523999999997,51.566060480000004],"label":"Polishing shoes"},{"segment":[26.58672184,68.36132832],"label":"Cleaning shoes"},{"segment":[57.90201792,63.32863416],"label":"Polishing shoes"}]},"l4LFSd-7hxU":{"duration":16.556,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=l4LFSd-7hxU","annotations":[{"segment":[0.2,15.448670835735838],"label":"Table soccer"}]},"PJ72Yl0B1rY":{"duration":206.332,"subset":"validation","resolution":"358x240","url":"https://www.youtube.com/watch?v=PJ72Yl0B1rY","annotations":[{"segment":[10.941581903276132,24.457653666146648],"label":"Beach soccer"},{"segment":[35.39923556942278,67.25854758190327],"label":"Beach soccer"},{"segment":[72.72933853354134,88.49808892355695],"label":"Beach soccer"},{"segment":[93.32525741029642,95.8997472698908],"label":"Beach soccer"},{"segment":[99.43967082683308,123.89732449297972],"label":"Beach soccer"},{"segment":[132.90803900156007,133.22985023400935],"label":"Beach soccer"},{"segment":[99.43967082683308,100.08329329173168],"label":"Beach soccer"},{"segment":[115.85204368174726,118.42653354134166],"label":"Beach soccer"},{"segment":[120.67921216848674,123.90933697347893],"label":"Beach soccer"},{"segment":[131.6335553822153,133.56460998439937],"label":"Beach soccer"},{"segment":[137.1048767550702,139.35777379095163],"label":"Beach soccer"},{"segment":[142.89804056162248,144.18541029641185],"label":"Beach soccer"},{"segment":[147.72567706708267,151.2659438377535],"label":"Beach soccer"},{"segment":[157.70279251170047,166.39253822152884],"label":"Beach soccer"},{"segment":[168.0017503900156,173.4730717628705],"label":"Beach soccer"},{"segment":[180.55360530421217,183.12834477379096],"label":"Beach soccer"},{"segment":[187.95598127925115,195.35835725429015],"label":"Beach soccer"}]},"4bw6ocN0jGU":{"duration":162.819,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=4bw6ocN0jGU","annotations":[{"segment":[11.938316887051482,147.32391052106084],"label":"Waterskiing"}]},"RZFU1gXLqDs":{"duration":139.204,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RZFU1gXLqDs","annotations":[{"segment":[24.030483043560867,134.87380745471464],"label":"Riding bumper cars"}]},"7n9It_jjaBg":{"duration":50.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7n9It_jjaBg","annotations":[{"segment":[16.91156386901832,49.44091066029558],"label":"Bungee jumping"}]},"_vVKdZM5Cy8":{"duration":107.02000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_vVKdZM5Cy8","annotations":[{"segment":[3.5156069999999997,59.13176059999999],"label":"Volleyball"},{"segment":[100.38137254212168,106.773333],"label":"Volleyball"}]},"xbEI98IA-XU":{"duration":111.201,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xbEI98IA-XU","annotations":[{"segment":[17.68546333853354,81.14506708268331],"label":"Kneeling"}]},"J7JLo0nQ5pA":{"duration":179.398,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J7JLo0nQ5pA","annotations":[{"segment":[30.011335820776726,156.59545997030943],"label":"Making a cake"}]},"7IfBJgi3WCE":{"duration":47.067,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=7IfBJgi3WCE","annotations":[{"segment":[14.758870900156007,45.52487541341654],"label":"Cutting the grass"}]},"qUFPq8D0jMc":{"duration":179.281,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qUFPq8D0jMc","annotations":[{"segment":[0.01,163.05924999063964],"label":"Rope skipping"}]},"G8gTBLLf8Bo":{"duration":102.63,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=G8gTBLLf8Bo","annotations":[{"segment":[74.29226189079563,93.02544139860832],"label":"Cleaning windows"}]},"HEuC9ZEY91E":{"duration":151.882,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HEuC9ZEY91E","annotations":[{"segment":[12.55636973478939,25.823477379095163],"label":"Snow tubing"},{"segment":[49.9885663026521,52.35769266770671],"label":"Snow tubing"},{"segment":[58.04359594383775,61.360372854914196],"label":"Snow tubing"},{"segment":[64.20332449297972,75.33821840873634],"label":"Snow tubing"},{"segment":[76.99660686427455,78.89190795631825],"label":"Snow tubing"}]},"mkK9iEzRrqQ":{"duration":17.51,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=mkK9iEzRrqQ","annotations":[{"segment":[0.12272152886115446,15.994705928237131],"label":"Playing field hockey"}]},"chLoV5zQxVA":{"duration":195.814,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=chLoV5zQxVA","annotations":[{"segment":[86.1458868477188,131.35720334935846],"label":"Kite flying"},{"segment":[153.65737929953198,195.20291326521058],"label":"Kite flying"}]},"32-Bxdbf3mQ":{"duration":35.41,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=32-Bxdbf3mQ","annotations":[{"segment":[3.9863057668026243,35.26000240698954],"label":"Mowing the lawn"}]},"6kBo1TR--dk":{"duration":148.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6kBo1TR--dk","annotations":[{"segment":[1.855850517013878,28.533701377535102],"label":"Running a marathon"},{"segment":[30.157570561622464,50.10796339469579],"label":"Running a marathon"},{"segment":[72.14618803588144,113.20688026209048],"label":"Running a marathon"}]},"tJbScqVWPsU":{"duration":200.76,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tJbScqVWPsU","annotations":[{"segment":[14.09131825273011,161.58044929797194],"label":"Skateboarding"}]},"t6Hc2tHRrs4":{"duration":120.419,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=t6Hc2tHRrs4","annotations":[{"segment":[10.892508580343215,116.24935881435258],"label":"BMX"}]},"ptukoEe5jvI":{"duration":180.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ptukoEe5jvI","annotations":[{"segment":[6.739469578783152,171.29485179407175],"label":"Volleyball"}]},"drzTgrfN19M":{"duration":197.72,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=drzTgrfN19M","annotations":[{"segment":[28.994514396255852,177.05160918564744],"label":"Preparing pasta"}]},"npzRJiGHc9c":{"duration":35.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=npzRJiGHc9c","annotations":[{"segment":[10.468214354134165,29.96457124648986],"label":"Playing field hockey"}]},"47QZ84A8H0o":{"duration":165.4,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=47QZ84A8H0o","annotations":[]},"eMNvlcxXZj8":{"duration":109.0,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=eMNvlcxXZj8","annotations":[{"segment":[18.28590093603744,104.01700858034322],"label":"Doing step aerobics"}]},"RfYI6hxUM44":{"duration":194.955,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=RfYI6hxUM44","annotations":[]},"6FPoGGaox4g":{"duration":28.003,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=6FPoGGaox4g","annotations":[{"segment":[0.01,27.524790951638064],"label":"Fun sliding down"}]},"C6DH4l7fop0":{"duration":29.35,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=C6DH4l7fop0","annotations":[{"segment":[2.838759856474259,29.349114],"label":"Baking cookies"}]},"k1a56VMhv6Q":{"duration":227.28,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k1a56VMhv6Q","annotations":[]},"GyLX64aJY80":{"duration":192.26,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=GyLX64aJY80","annotations":[{"segment":[4.797836705148206,172.87340075999998],"label":"Making a sandwich"}]},"Q1mh4F0L8Ec":{"duration":222.77,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Q1mh4F0L8Ec","annotations":[{"segment":[7.381578448844491,138.5465450710074],"label":"Grooming horse"},{"segment":[151.7324411864629,210.02798011804268],"label":"Grooming horse"}]},"IGamNJ5yIh4":{"duration":39.591,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=IGamNJ5yIh4","annotations":[{"segment":[2.0318352860096742,11.636879261126316],"label":"Beach soccer"},{"segment":[29.36926813826473,33.740794562837046],"label":"Beach soccer"}]},"6ChRD-1NwSg":{"duration":107.50999999999999,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6ChRD-1NwSg","annotations":[{"segment":[6.373325409206845,9.392269024094297],"label":"Vacuuming floor"},{"segment":[18.784538048188594,23.648391649951712],"label":"Vacuuming floor"},{"segment":[49.47713146621103,52.49607508109848],"label":"Vacuuming floor"},{"segment":[75.47359037218631,77.65393853849392],"label":"Vacuuming floor"},{"segment":[94.42584706396256,100.96689153198128],"label":"Vacuuming floor"}]},"SIeV0DPSpHQ":{"duration":175.799,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=SIeV0DPSpHQ","annotations":[{"segment":[12.979499226241245,156.73977083839077],"label":"Spread mulch"}]},"dDN37ufNu84":{"duration":85.26,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dDN37ufNu84","annotations":[{"segment":[8.437277691107646,27.731790750000002],"label":"Ping-pong"},{"segment":[34.64150427,81.11744929797192],"label":"Ping-pong"}]},"AvjExlAl18I":{"duration":148.79,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=AvjExlAl18I","annotations":[{"segment":[7.42806704524181,131.84819005304212],"label":"Tango"}]},"SA2GomtZkvE":{"duration":161.54,"subset":"training","resolution":"600x360","url":"https://www.youtube.com/watch?v=SA2GomtZkvE","annotations":[{"segment":[0,161.481],"label":"Playing accordion"}]},"B-nlhZ0RR4A":{"duration":109.74000000000001,"subset":"training","resolution":"626x360","url":"https://www.youtube.com/watch?v=B-nlhZ0RR4A","annotations":[{"segment":[8.788836329999999,96.55797422542902],"label":"Snatch"}]},"_3xMhj4mbsk":{"duration":197.602,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_3xMhj4mbsk","annotations":[{"segment":[15.078643302180685,143.0932476635514],"label":"Painting furniture"}]},"skIP_U4EYDQ":{"duration":17.857,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=skIP_U4EYDQ","annotations":[{"segment":[10.501975488299532,13.17621858346334],"label":"Building sandcastles"}]},"pnFRC2_HPrE":{"duration":180.489,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=pnFRC2_HPrE","annotations":[{"segment":[0.01,89.81010634805556],"label":"Elliptical trainer"}]},"5x1diXJ8gME":{"duration":72.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5x1diXJ8gME","annotations":[{"segment":[1.1569929811184014,71.99977082068227],"label":"Playing polo"}]},"vaqswUFfvMY":{"duration":11.91,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vaqswUFfvMY","annotations":[{"segment":[2.5458605273010924,10.499351809672387],"label":"Clean and jerk"}]},"W1FmiUTYt3I":{"duration":121.72,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=W1FmiUTYt3I","annotations":[{"segment":[67.00391575663026,85.03613104524182],"label":"Washing hands"}]},"qMj2sCoRHqY":{"duration":154.51,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qMj2sCoRHqY","annotations":[{"segment":[2.8924600998439938,152.09519358346333],"label":"Playing harmonica"}]},"4xmQoAK3QNc":{"duration":90.88,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4xmQoAK3QNc","annotations":[{"segment":[0.01,22.827051195007797],"label":"Cricket"},{"segment":[38.42317312948518,43.810924343213735],"label":"Cricket"},{"segment":[47.92262921684867,65.36192919812792],"label":"Cricket"},{"segment":[70.74968041185647,73.72712187207489],"label":"Cricket"},{"segment":[76.98812918564742,80.1073535725429],"label":"Cricket"}]},"KCxa27MM7Cg":{"duration":37.59,"subset":"training","resolution":"180x240","url":"https://www.youtube.com/watch?v=KCxa27MM7Cg","annotations":[{"segment":[8.797138806552262,37.5931065],"label":"Doing nails"}]},"n913aoCh1IQ":{"duration":78.972,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=n913aoCh1IQ","annotations":[{"segment":[0.01,25.201963379279913],"label":"Slacklining"},{"segment":[28.763644139968015,78.971065],"label":"Slacklining"}]},"VJyNbaAz6Xs":{"duration":167.44,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VJyNbaAz6Xs","annotations":[{"segment":[0.01,31.248260101227824],"label":"Snowboarding"},{"segment":[35.41469531118117,41.14354372486702],"label":"Snowboarding"},{"segment":[53.64284935472705,103.64007187416716],"label":"Snowboarding"},{"segment":[119.00380171087014,151.03327238738646],"label":"Snowboarding"}]},"C0MIMsY6okw":{"duration":217.734,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=C0MIMsY6okw","annotations":[{"segment":[14.606148414644068,197.01316399375978],"label":"Making a lemonade"}]},"eD2SW7ibzmM":{"duration":202.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eD2SW7ibzmM","annotations":[{"segment":[17.959090483619345,195.99404368174726],"label":"Zumba"}]},"DvTZ5mmF8NM":{"duration":26.61,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DvTZ5mmF8NM","annotations":[{"segment":[0,26.610067],"label":"Preparing pasta"}]},"6_XA3oKwwzU":{"duration":187.177,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=6_XA3oKwwzU","annotations":[{"segment":[21.024455410869496,175.78780774088105],"label":"Baton twirling"}]},"saShR7NwjMg":{"duration":193.28,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=saShR7NwjMg","annotations":[{"segment":[15.676336973478941,140.1826287051482],"label":"Washing hands"}]},"hFWd7qM_imU":{"duration":232.85,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hFWd7qM_imU","annotations":[]},"18yZR3NDaXM":{"duration":215.95,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=18yZR3NDaXM","annotations":[]},"FkQ1V0V_XCs":{"duration":238.03,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FkQ1V0V_XCs","annotations":[{"segment":[77.60967366041581,188.26844280301825],"label":"Doing nails"}]},"SPjlJjW0kBs":{"duration":180.071,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SPjlJjW0kBs","annotations":[]},"xVq_Q3zVUYw":{"duration":80.25,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xVq_Q3zVUYw","annotations":[]},"CAG75_XxmEE":{"duration":29.049,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=CAG75_XxmEE","annotations":[{"segment":[0.2,27.41748205928237],"label":"Cutting the grass"}]},"59yYq2zQ_TY":{"duration":149.142,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=59yYq2zQ_TY","annotations":[]},"oTKYejnHLtE":{"duration":57.15,"subset":"training","resolution":"634x480","url":"https://www.youtube.com/watch?v=oTKYejnHLtE","annotations":[{"segment":[9.761699679407176,54.95970641419657],"label":"Playing saxophone"}]},"7tDDXbiQ8AI":{"duration":177.029,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7tDDXbiQ8AI","annotations":[{"segment":[43.63583714505043,82.576679154241],"label":"Making a lemonade"},{"segment":[108.26106430923903,148.58278724074134],"label":"Making a lemonade"}]},"mQ9e5RyHE_k":{"duration":112.408,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=mQ9e5RyHE_k","annotations":[{"segment":[11.623206497632083,26.85240743221152],"label":"Playing ten pins"},{"segment":[45.232477525669466,47.50810525152617],"label":"Playing ten pins"},{"segment":[61.51196817987507,63.43749933252305],"label":"Playing ten pins"},{"segment":[72.89010680915857,77.44136226087197],"label":"Playing ten pins"}]},"TSIz-6s4OeY":{"duration":60.74,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=TSIz-6s4OeY","annotations":[{"segment":[4.6319906396255845,16.54282371294852],"label":"Ironing clothes"},{"segment":[30.81691731669267,49.43941029641186],"label":"Ironing clothes"},{"segment":[50.95189703588144,59.27057410296412],"label":"Ironing clothes"}]},"dF8WhhsAux8":{"duration":236.75,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dF8WhhsAux8","annotations":[]},"rAuz_Pf3lp8":{"duration":174.916,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=rAuz_Pf3lp8","annotations":[{"segment":[48.693546797814264,94.39475662674117],"label":"River tubing"},{"segment":[96.57100471383292,97.38709774649233],"label":"River tubing"},{"segment":[115.61317547588581,151.79329992378626],"label":"River tubing"}]},"Wzo3_EYrfAY":{"duration":41.959,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=Wzo3_EYrfAY","annotations":[{"segment":[5.760287388455539,41.75845804988662],"label":"Powerbocking"}]},"YBrcJxnXuVU":{"duration":15.65,"subset":"validation","resolution":"206x360","url":"https://www.youtube.com/watch?v=YBrcJxnXuVU","annotations":[{"segment":[5.200351801872075,12.524790959438377],"label":"Drinking beer"}]},"LHewj6shKGw":{"duration":71.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LHewj6shKGw","annotations":[{"segment":[0,68.09094383775351],"label":"High jump"}]},"F8pv5rFrEm4":{"duration":101.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F8pv5rFrEm4","annotations":[{"segment":[2.9225285819032765,90.914335074883],"label":"Doing step aerobics"}]},"ahOnhEvMgq0":{"duration":78.39,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ahOnhEvMgq0","annotations":[{"segment":[5.870120312012481,28.739130694227768],"label":"Ping-pong"},{"segment":[27.7931745,40.968548010920436],"label":"Ping-pong"}]},"8Da6w-Eg3Ko":{"duration":226.024,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=8Da6w-Eg3Ko","annotations":[{"segment":[22.20833541341654,223.14089391575666],"label":"Trimming branches or hedges"}]},"v6Ui5kgi2OI":{"duration":82.385,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v6Ui5kgi2OI","annotations":[{"segment":[24.815996836982322,73.01279809602904],"label":"Making a cake"}]},"MYFVsllwDnc":{"duration":96.688,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MYFVsllwDnc","annotations":[{"segment":[0.2,51.43614741029641],"label":"Cutting the grass"},{"segment":[79.94474524180967,96.48789],"label":"Cutting the grass"}]},"SI0GumzxYt4":{"duration":133.59,"subset":"training","resolution":"478x360","url":"https://www.youtube.com/watch?v=SI0GumzxYt4","annotations":[{"segment":[10.520439365054601,116.34980961154446],"label":"Sailing"}]},"iAPv-QSvZF4":{"duration":36.76,"subset":"validation","resolution":"482x360","url":"https://www.youtube.com/watch?v=iAPv-QSvZF4","annotations":[{"segment":[0,34.75016430889236],"label":"Using the balance beam"}]},"ePqfnkzAQjQ":{"duration":136.32,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=ePqfnkzAQjQ","annotations":[{"segment":[0.01,136.32374149659864],"label":"Getting a tattoo"}]},"chmegNbBTNc":{"duration":180.33,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=chmegNbBTNc","annotations":[]},"6HyNydVIji4":{"duration":224.32999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6HyNydVIji4","annotations":[{"segment":[8.049199380220108,219.77813952574104],"label":"Playing flauta"}]},"dRF1C_sBE5M":{"duration":227.23,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=dRF1C_sBE5M","annotations":[]},"oD5lGBt7U7g":{"duration":211.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oD5lGBt7U7g","annotations":[{"segment":[40.679662994683056,179.0234561749817],"label":"Assembling bicycle"}]},"Zhx6BYVb64g":{"duration":162.31,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zhx6BYVb64g","annotations":[]},"E1b_DF716Q4":{"duration":178.84,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=E1b_DF716Q4","annotations":[{"segment":[157.35695906396256,169.6330338845554],"label":"Removing curlers"}]},"hsI_BHN5h_0":{"duration":158.686,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hsI_BHN5h_0","annotations":[{"segment":[83.51489977474748,89.93394338845465],"label":"Beach soccer"}]},"cVFVREeD6hM":{"duration":86.216,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cVFVREeD6hM","annotations":[]},"p-JFdMUWVvU":{"duration":36.293,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=p-JFdMUWVvU","annotations":[{"segment":[0.01,36.261],"label":"Curling"}]},"GVn7MSMHXxk":{"duration":56.495,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GVn7MSMHXxk","annotations":[{"segment":[5.376198314352574,54.114520737909515],"label":"Disc dog"}]},"tr1sNwRTMd8":{"duration":106.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tr1sNwRTMd8","annotations":[{"segment":[41.071211399999996,83.11230744],"label":"Triple jump"}]},"E1AVyl1RwF8":{"duration":52.04,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E1AVyl1RwF8","annotations":[{"segment":[1.4612270321372856,51.22412540436817],"label":"Doing kickboxing"}]},"5kXIZCs22l8":{"duration":105.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5kXIZCs22l8","annotations":[{"segment":[17.14894598439938,84.42558023088924],"label":"Getting a haircut"}]},"pOABO1PhzSI":{"duration":26.75,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=pOABO1PhzSI","annotations":[]},"g21ESQzeffY":{"duration":135.54,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g21ESQzeffY","annotations":[]},"8xYzQMbI5fM":{"duration":90.744,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8xYzQMbI5fM","annotations":[{"segment":[0.01,81.52152262090485],"label":"Fixing the roof"}]},"a0bj_vVpK9s":{"duration":50.156,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=a0bj_vVpK9s","annotations":[{"segment":[9.78063612340412,44.36496453510141],"label":"Snow tubing"}]},"_eanWnL3FtM":{"duration":238.795,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_eanWnL3FtM","annotations":[{"segment":[51.02220492357109,68.5261730360371],"label":"Rock-paper-scissors"},{"segment":[83.42316717430602,87.8922654157867],"label":"Rock-paper-scissors"},{"segment":[157.53571301219395,163.49451066750152],"label":"Rock-paper-scissors"},{"segment":[181.0058096723869,185.10264898595943],"label":"Rock-paper-scissors"}]},"1ATBCBUgflA":{"duration":68.15,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=1ATBCBUgflA","annotations":[{"segment":[13.488248000000002,59.23183798361935],"label":"Polishing forniture"}]},"yXH7GcOduNo":{"duration":86.59,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=yXH7GcOduNo","annotations":[{"segment":[17.94515575,85.71113104524181],"label":"Hammer throw"}]},"OccEAILrU7M":{"duration":8.36,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=OccEAILrU7M","annotations":[]},"xSIh6JjAR_Y":{"duration":233.57,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xSIh6JjAR_Y","annotations":[{"segment":[40.68395505859809,214.3172869107791],"label":"Camel ride"}]},"-BVpaPkee2I":{"duration":96.39,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-BVpaPkee2I","annotations":[{"segment":[10.676146433612447,56.68883387988581],"label":"Spinning"}]},"dbLtukGqkhI":{"duration":64.622,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dbLtukGqkhI","annotations":[{"segment":[0.2,64.381],"label":"Kite flying"}]},"w9LRKWLmw0A":{"duration":144.429,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w9LRKWLmw0A","annotations":[{"segment":[0.01,144.42814058956915],"label":"Mooping floor"}]},"_2txWbQfJrY":{"duration":142.687,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_2txWbQfJrY","annotations":[]},"mwjAm0Ofwzc":{"duration":121.46,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mwjAm0Ofwzc","annotations":[]},"gTh9bd9yV9E":{"duration":156.85,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=gTh9bd9yV9E","annotations":[{"segment":[10.397382995319813,65.19770748829953],"label":"Putting on makeup"},{"segment":[72.53703666146646,152.0464360374415],"label":"Putting on makeup"}]},"wrn5qM9aVeo":{"duration":168.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wrn5qM9aVeo","annotations":[{"segment":[34.77736661466459,159.13279875195008],"label":"Preparing pasta"}]},"JNBWPj42n-g":{"duration":180.4,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JNBWPj42n-g","annotations":[{"segment":[106.3753236,139.89451855000002],"label":"Wrapping presents"},{"segment":[106.3753236,139.89451855000002],"label":"Wrapping presents"}]},"isqM83F_aUk":{"duration":98.128,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=isqM83F_aUk","annotations":[]},"BEtftLo6NKQ":{"duration":173.059,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BEtftLo6NKQ","annotations":[{"segment":[54.24836349453978,64.50427301092044],"label":"Mooping floor"},{"segment":[75.56985959438379,80.9770670826833],"label":"Mooping floor"},{"segment":[132.53246645865832,138.7407082683307],"label":"Mooping floor"}]},"AQ2-EcbX4dE":{"duration":14.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AQ2-EcbX4dE","annotations":[{"segment":[0,14.976869],"label":"Grooming horse"}]},"abDf2LeTQfk":{"duration":36.73,"subset":"testing","resolution":"640x352","url":"https://www.youtube.com/watch?v=abDf2LeTQfk","annotations":[]},"PwTwGnAVFhQ":{"duration":6.53,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=PwTwGnAVFhQ","annotations":[]},"23ApAC123aQ":{"duration":133.747,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=23ApAC123aQ","annotations":[{"segment":[34.222701697501435,72.3468281450592],"label":"Hand car wash"},{"segment":[90.47141306561996,94.84631287403117],"label":"Hand car wash"}]},"SB08N4L6Ujw":{"duration":133.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SB08N4L6Ujw","annotations":[{"segment":[48.59606864274571,71.85572542901717],"label":"Doing kickboxing"}]},"XCJ2StGMgW4":{"duration":34.76,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=XCJ2StGMgW4","annotations":[{"segment":[0.010000000000000002,32.899141649765994],"label":"Playing bagpipes"}]},"MlnK2sa7mm4":{"duration":157.53,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=MlnK2sa7mm4","annotations":[{"segment":[29.981203850234007,43.98881548517941],"label":"Playing harmonica"},{"segment":[52.09848537909516,62.66563099843994],"label":"Playing harmonica"},{"segment":[69.05506788455537,82.07968922932916],"label":"Playing harmonica"},{"segment":[94.12132028393137,97.31603872698908],"label":"Playing harmonica"},{"segment":[122.13654355382215,125.08551442433698],"label":"Playing harmonica"},{"segment":[134.66966975351014,140.56761149453976],"label":"Playing harmonica"}]},"9aRUmbcYxUM":{"duration":160.288,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9aRUmbcYxUM","annotations":[{"segment":[2.499235569422777,152.9723120124805],"label":"Arm wrestling"}]},"VQWUgufKtA0":{"duration":215.946,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VQWUgufKtA0","annotations":[]},"qtJhNEdrF8w":{"duration":116.63,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=qtJhNEdrF8w","annotations":[]},"gISiybfqigc":{"duration":91.91,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gISiybfqigc","annotations":[]},"rF2b7MLtVv4":{"duration":168.763,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=rF2b7MLtVv4","annotations":[]},"G836MDBhEpg":{"duration":61.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=G836MDBhEpg","annotations":[{"segment":[1.45518762,41.938211388455535],"label":"Mixing drinks"}]},"RHHXOAkuWjg":{"duration":37.199,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RHHXOAkuWjg","annotations":[]},"5oy5Yi6fzJU":{"duration":43.58,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=5oy5Yi6fzJU","annotations":[{"segment":[0.2,36.92046801872075],"label":"Mowing the lawn"}]},"IJ2m8Bi3LVE":{"duration":200.551,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=IJ2m8Bi3LVE","annotations":[{"segment":[0.01,183.3428045897036],"label":"Putting in contact lenses"}]},"SSTom962aPk":{"duration":85.13,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=SSTom962aPk","annotations":[{"segment":[35.372989859594384,81.96180577223089],"label":"Getting a haircut"}]},"NkvwL_M_lDg":{"duration":158.314,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=NkvwL_M_lDg","annotations":[{"segment":[0.2,156.06210747663553],"label":"Playing congas"}]},"_Hi9gQEaT-Q":{"duration":16.07,"subset":"training","resolution":"400x226","url":"https://www.youtube.com/watch?v=_Hi9gQEaT-Q","annotations":[{"segment":[0,16.067619],"label":"Walking the dog"}]},"Ufm19cvrxGs":{"duration":158.361,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ufm19cvrxGs","annotations":[{"segment":[59.107961667689366,118.95477666118288],"label":"Assembling bicycle"}]},"4MwnHtlaEUY":{"duration":196.232,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=4MwnHtlaEUY","annotations":[]},"oNZGC8cjquw":{"duration":237.8,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oNZGC8cjquw","annotations":[]},"Fz1NCLAB8bA":{"duration":122.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Fz1NCLAB8bA","annotations":[{"segment":[59.07873630421217,92.53756754446178],"label":"Shaving"}]},"_EKqIcXdW04":{"duration":54.08,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_EKqIcXdW04","annotations":[{"segment":[0.01,53.23291040881623],"label":"Kneeling"}]},"SllpW4we_zk":{"duration":82.199,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SllpW4we_zk","annotations":[]},"2ShsRSJ9cqE":{"duration":43.817,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=2ShsRSJ9cqE","annotations":[]},"m7v8AUpPZes":{"duration":213.25,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=m7v8AUpPZes","annotations":[]},"YbD0P8dH6Oc":{"duration":86.356,"subset":"training","resolution":"426x238","url":"https://www.youtube.com/watch?v=YbD0P8dH6Oc","annotations":[{"segment":[14.772951051120366,62.58359792586782],"label":"Sumo"}]},"9-xf_gylOR0":{"duration":38.128,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9-xf_gylOR0","annotations":[{"segment":[1.3045055364680866,1.8974629334769004],"label":"Painting fence"},{"segment":[4.506475711081877,8.894360837053883],"label":"Painting fence"},{"segment":[16.365624700195408,23.777592818391366],"label":"Painting fence"}]},"KnI7IWbLWmE":{"duration":161.87,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KnI7IWbLWmE","annotations":[{"segment":[1.010086271450858,84.84724680187207],"label":"Playing bagpipes"}]},"SCOViAddWJU":{"duration":121.302,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SCOViAddWJU","annotations":[]},"6Q8QqcnT05I":{"duration":237.89,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6Q8QqcnT05I","annotations":[]},"Mgy7IRETxSk":{"duration":176.263,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Mgy7IRETxSk","annotations":[{"segment":[0.01,176.2622902494331],"label":"Drum corps"}]},"scapZQe6_Iw":{"duration":214.88,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=scapZQe6_Iw","annotations":[{"segment":[2.6817779594383775,156.54878934204987],"label":"Cricket"}]},"bOBQLGfEeyg":{"duration":59.7,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bOBQLGfEeyg","annotations":[{"segment":[6.418650510140405,58.88414163650546],"label":"Dodgeball"}]},"fPbVku8Ssyw":{"duration":7.059,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=fPbVku8Ssyw","annotations":[{"segment":[0.2,6.858412698412698],"label":"Building sandcastles"}]},"YCdPa5gJYmc":{"duration":30.88,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YCdPa5gJYmc","annotations":[{"segment":[0,25.72738106396256],"label":"Hammer throw"}]},"aRZd3O6-PKw":{"duration":97.849,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=aRZd3O6-PKw","annotations":[{"segment":[1.8318048861154448,80.90471580343214],"label":"Hitting a pinata"}]},"4X8raZBOzvM":{"duration":97.28999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4X8raZBOzvM","annotations":[{"segment":[8.493566302652107,90.09247113884555],"label":"Playing bagpipes"}]},"u1ImXpBa4nM":{"duration":149.235,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=u1ImXpBa4nM","annotations":[]},"l0Sz7dqLzqs":{"duration":146.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=l0Sz7dqLzqs","annotations":[{"segment":[10.738950078003121,21.477900156006243],"label":"Plataform diving"},{"segment":[23.077318252730112,90.93834321372856],"label":"Plataform diving"},{"segment":[103.73368798751952,109.902872074883],"label":"Plataform diving"}]},"-G8N5FxjZbw":{"duration":116.61,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=-G8N5FxjZbw","annotations":[{"segment":[0.01,110.65333561700469],"label":"Playing bagpipes"}]},"MbjyQGdTtK8":{"duration":129.99,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MbjyQGdTtK8","annotations":[]},"PT4x_Y5lu_g":{"duration":147.94,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=PT4x_Y5lu_g","annotations":[{"segment":[27.69441455209229,122.77857118094248],"label":"Springboard diving"}]},"4ZyPBOe4P0U":{"duration":126.36,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=4ZyPBOe4P0U","annotations":[{"segment":[53.029087126365056,83.58488082371295],"label":"Playing badminton"}]},"sRgBK2_nb0I":{"duration":53.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sRgBK2_nb0I","annotations":[{"segment":[27.48716144,48.2157768],"label":"Using the pommel horse"}]},"eNq-jGrQuV0":{"duration":171.526,"subset":"training","resolution":"854x474","url":"https://www.youtube.com/watch?v=eNq-jGrQuV0","annotations":[{"segment":[21.94245867249656,157.87866514820593],"label":"Hurling"}]},"AJ_a4fE-rR0":{"duration":121.046,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=AJ_a4fE-rR0","annotations":[{"segment":[11.295078496097025,115.96283251164911],"label":"Surfing"}]},"lhKZvm9ukbs":{"duration":199.831,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lhKZvm9ukbs","annotations":[]},"epqnpUCVLK8":{"duration":63.321,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=epqnpUCVLK8","annotations":[{"segment":[0.2,4.662537066592864],"label":"Doing crunches"}]},"DcHywvflgqE":{"duration":159.48,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DcHywvflgqE","annotations":[]},"e6mpdQ3BFhA":{"duration":175.265,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=e6mpdQ3BFhA","annotations":[{"segment":[17.198791722296395,35.762566914616315],"label":"Elliptical trainer"},{"segment":[57.87529912899739,70.43314705321382],"label":"Elliptical trainer"},{"segment":[98.8248032297031,102.10076355775955],"label":"Elliptical trainer"},{"segment":[146.053231292517,158.61107781308414],"label":"Elliptical trainer"}]},"AdFg_21LRg4":{"duration":207.122,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=AdFg_21LRg4","annotations":[{"segment":[130.97041433021806,160.64843925233643],"label":"Bullfighting"}]},"ObqrS-ZONKM":{"duration":127.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ObqrS-ZONKM","annotations":[{"segment":[0.01,0.02],"label":"Cumbia"}]},"UfYMJBu4HI0":{"duration":120.187,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UfYMJBu4HI0","annotations":[{"segment":[47.04391791453972,56.22779033610324],"label":"Laying tile"},{"segment":[71.40929351794072,76.09494269890796],"label":"Laying tile"}]},"T1F9ddqt0I0":{"duration":66.804,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T1F9ddqt0I0","annotations":[]},"lsIiSTG2pn4":{"duration":41.61,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=lsIiSTG2pn4","annotations":[{"segment":[2.3341591263650545,38.059761310452416],"label":"Volleyball"}]},"GR6Ul2pD8_Y":{"duration":203.802,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=GR6Ul2pD8_Y","annotations":[{"segment":[118.59278380655225,145.61794901716067],"label":"Blowing leaves"},{"segment":[147.52560773790952,174.55077294851793],"label":"Blowing leaves"}]},"WP8YccCA2Jc":{"duration":73.793,"subset":"training","resolution":"614x480","url":"https://www.youtube.com/watch?v=WP8YccCA2Jc","annotations":[{"segment":[31.414591263650543,61.56339313572543],"label":"Removing ice from car"}]},"FGFPyp9nJug":{"duration":229.72,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FGFPyp9nJug","annotations":[{"segment":[9.317613510140406,193.51966521060842],"label":"Walking the dog"}]},"wS_T-RMSSCE":{"duration":60.28,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wS_T-RMSSCE","annotations":[{"segment":[0,10.344294290171606],"label":"Playing water polo"},{"segment":[15.140285279251168,23.321681672386894],"label":"Playing water polo"}]},"yG4C_s7ItA4":{"duration":114.47999999999999,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yG4C_s7ItA4","annotations":[{"segment":[21.241964118564745,23.919522620904836],"label":"Shaving legs"},{"segment":[60.155814352574104,62.8333728549142],"label":"Shaving legs"},{"segment":[69.2716193447738,74.44913728549142],"label":"Shaving legs"},{"segment":[84.80417316692667,88.19634009360374],"label":"Shaving legs"}]},"rM02KWikOo8":{"duration":59.119,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rM02KWikOo8","annotations":[]},"9K97gmkVbyQ":{"duration":175.96,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9K97gmkVbyQ","annotations":[{"segment":[22.394429200000005,164.59594352574103],"label":"Wrapping presents"}]},"91ozOcjKl5M":{"duration":186.78,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=91ozOcjKl5M","annotations":[{"segment":[110.70883432000001,143.86432963],"label":"Preparing pasta"}]},"2nDbYhgdw2A":{"duration":163.4,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=2nDbYhgdw2A","annotations":[]},"VGvjsCblFY0":{"duration":88.59,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VGvjsCblFY0","annotations":[{"segment":[0,7.4625775132605305],"label":"Tumbling"},{"segment":[9.95010335101404,15.616134425897036],"label":"Tumbling"}]},"5aTek77vxBA":{"duration":43.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5aTek77vxBA","annotations":[{"segment":[0.01,2.9127971918876754],"label":"Hand washing clothes"},{"segment":[31.092416536661464,43.221],"label":"Hand washing clothes"}]},"Byd_QaPB50k":{"duration":66.69,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Byd_QaPB50k","annotations":[]},"TF591ByrGYk":{"duration":51.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TF591ByrGYk","annotations":[{"segment":[18.00256901,37.36897683],"label":"Using parallel bars"},{"segment":[6.21292871,13.789696229999997],"label":"Using parallel bars"}]},"WjFaS7tSmPw":{"duration":189.87,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WjFaS7tSmPw","annotations":[]},"mY7QdIn51S8":{"duration":227.79,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mY7QdIn51S8","annotations":[]},"lKKimizxQJM":{"duration":39.45,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lKKimizxQJM","annotations":[{"segment":[3.659203588143526,36.49978705148206],"label":"Hopscotch"}]},"cHYZPYLwvks":{"duration":167.463,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cHYZPYLwvks","annotations":[{"segment":[10.186301092043681,66.34155070202809],"label":"Scuba diving"},{"segment":[80.70684711388455,120.14611544461779],"label":"Scuba diving"},{"segment":[128.78067550702028,150.72302184087363],"label":"Scuba diving"}]},"D5EgASje8MQ":{"duration":114.17,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D5EgASje8MQ","annotations":[{"segment":[11.07450793269995,110.48640358192537],"label":"Playing accordion"}]},"41xqneu4_RA":{"duration":100.96000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=41xqneu4_RA","annotations":[{"segment":[6.767368174726989,17.626633385335413],"label":"Playing racquetball"},{"segment":[20.931627145085802,36.040170046801876],"label":"Playing racquetball"},{"segment":[38.558260530421215,55.712751950078],"label":"Playing racquetball"},{"segment":[58.70298439937597,78.3755663026521],"label":"Playing racquetball"},{"segment":[81.36579875195008,95.05791575663027],"label":"Playing racquetball"}]},"rC24UspQv14":{"duration":228.555,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rC24UspQv14","annotations":[{"segment":[22.106573608424338,41.360686106084245],"label":"Hitting a pinata"},{"segment":[52.77053054914197,58.118895131825276],"label":"Hitting a pinata"},{"segment":[96.98367776599065,106.25417637597505],"label":"Hitting a pinata"},{"segment":[116.23779026365055,120.51648192979718],"label":"Hitting a pinata"},{"segment":[130.5000958174727,131.2132110951638],"label":"Hitting a pinata"},{"segment":[165.44274442433698,171.86078192355694],"label":"Hitting a pinata"},{"segment":[196.46325900390016,208.94277636349454],"label":"Hitting a pinata"}]},"9S9hPRDwi24":{"duration":8.313,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9S9hPRDwi24","annotations":[{"segment":[1.693575544568621,7.394849606777019],"label":"Croquet"}]},"-ntEh6HpeX0":{"duration":231.085,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-ntEh6HpeX0","annotations":[{"segment":[21.232739875389406,226.72247663551403],"label":"Cleaning sink"}]},"xyMCaug7LXM":{"duration":54.78,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xyMCaug7LXM","annotations":[{"segment":[9.849708660000001,15.616666146645866],"label":"Javelin throw"},{"segment":[32.09843369734789,43.53776911076443],"label":"Javelin throw"}]},"iJahVlx_yDE":{"duration":99.568,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iJahVlx_yDE","annotations":[{"segment":[2.17462048049922,39.76448878627145],"label":"Snow tubing"},{"segment":[42.24976933541342,66.63658472386896],"label":"Snow tubing"},{"segment":[72.38379599375975,99.566552],"label":"Snow tubing"}]},"OQEKZXv6jSw":{"duration":41.1,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=OQEKZXv6jSw","annotations":[]},"dF2p3CMDe_g":{"duration":146.89,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=dF2p3CMDe_g","annotations":[{"segment":[9.624553769110765,136.80615714664586],"label":"Capoeira"}]},"M4DcibAnW_E":{"duration":84.986,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=M4DcibAnW_E","annotations":[{"segment":[8.485245204368175,38.44876733229329],"label":"Surfing"}]},"VZ2CPdIPa30":{"duration":230.78,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=VZ2CPdIPa30","annotations":[{"segment":[133.93342217784712,230.783128],"label":"Bungee jumping"}]},"cJyl4yqFuK0":{"duration":95.85,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=cJyl4yqFuK0","annotations":[{"segment":[3.4286063943462297,19.37908984045878],"label":"Washing face"},{"segment":[29.813985594735605,36.52213286534214],"label":"Washing face"},{"segment":[64.24914158384914,76.17473673159408],"label":"Washing face"}]},"Np43Q2E8GCE":{"duration":15.581,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Np43Q2E8GCE","annotations":[{"segment":[0.01,10.78540717628705],"label":"Installing carpet"}]},"YNo7-L8VQWw":{"duration":149.165,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YNo7-L8VQWw","annotations":[{"segment":[12.098801872074883,110.05256318252731],"label":"Removing ice from car"}]},"WaFDgdqY1DM":{"duration":40.13,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=WaFDgdqY1DM","annotations":[{"segment":[10.641331669266771,40.12408],"label":"Drinking coffee"}]},"R_EnlXwBvtA":{"duration":187.83,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=R_EnlXwBvtA","annotations":[{"segment":[1.171675507020281,185.7105678627145],"label":"Playing squash"}]},"YH1n8khU-fQ":{"duration":213.07,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YH1n8khU-fQ","annotations":[]},"mzbhfWgJ2sU":{"duration":200.25,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=mzbhfWgJ2sU","annotations":[{"segment":[67.15010140405616,83.07873010920437],"label":"Preparing salad"},{"segment":[109.93877067082683,123.36879095163806],"label":"Preparing salad"},{"segment":[127.11670358814352,132.11392043681747],"label":"Preparing salad"},{"segment":[136.7988112324493,143.66998439937598],"label":"Preparing salad"},{"segment":[151.18091107644307,173.35827613104524],"label":"Preparing salad"}]},"WSGXoKV9vLs":{"duration":130.54,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=WSGXoKV9vLs","annotations":[{"segment":[24.879999780000002,108.27936731999999],"label":"Mixing drinks"}]},"y7R4iEhqx9A":{"duration":58.17,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=y7R4iEhqx9A","annotations":[{"segment":[9.80019739565093,13.974355545650399],"label":"Drinking beer"},{"segment":[0.2,57.965984999999996],"label":"Drinking beer"},{"segment":[40.01747173946958,43.46568925897036],"label":"Drinking beer"},{"segment":[53.084401287051485,56.441876240249606],"label":"Drinking beer"}]},"Lx_FcKpTZIw":{"duration":77.14,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Lx_FcKpTZIw","annotations":[{"segment":[5.41270670826833,77.081],"label":"Washing hands"}]},"zJKedNWsZKY":{"duration":119.467,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zJKedNWsZKY","annotations":[{"segment":[10.526187987519501,111.87567940717629],"label":"Using the rowing machine"}]},"F6BJoTLozoc":{"duration":232.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F6BJoTLozoc","annotations":[{"segment":[88.70384061,207.3270046801872],"label":"Belly dance"}]},"3Yx9IEOdD7o":{"duration":51.9,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3Yx9IEOdD7o","annotations":[]},"Pf-vJP5Yt5Q":{"duration":238.098,"subset":"testing","resolution":"1916x1080","url":"https://www.youtube.com/watch?v=Pf-vJP5Yt5Q","annotations":[]},"lO1r4df9uUc":{"duration":95.133,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lO1r4df9uUc","annotations":[{"segment":[0.19999999999999998,91.71867481123245],"label":"Waterskiing"}]},"Em4ekFj4XIs":{"duration":166.325,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Em4ekFj4XIs","annotations":[]},"5WCgpt2bEJw":{"duration":16.069,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5WCgpt2bEJw","annotations":[{"segment":[3.960650087363495,11.405669555382216],"label":"Playing rubik cube"}]},"D5iU1Qfy-wk":{"duration":184.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D5iU1Qfy-wk","annotations":[{"segment":[109.65452037441499,181.795652199688],"label":"Tai chi"}]},"nhky9RGjzwc":{"duration":26.889,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nhky9RGjzwc","annotations":[{"segment":[0,26.888707],"label":"Arm wrestling"}]},"yXzLQU_jS44":{"duration":182.556,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yXzLQU_jS44","annotations":[{"segment":[97.08711544461778,166.29058346333852],"label":"Putting in contact lenses"}]},"NqYEX8tUjYg":{"duration":216.527,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NqYEX8tUjYg","annotations":[{"segment":[28.54355542112841,34.62383941616168],"label":"Snowboarding"},{"segment":[56.242626954057755,61.309530283252144],"label":"Snowboarding"},{"segment":[132.24617689197365,138.66425444228653],"label":"Snowboarding"},{"segment":[188.65769893837754,193.7246022223089],"label":"Snowboarding"}]},"uzUVSpklbRs":{"duration":48.832,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uzUVSpklbRs","annotations":[{"segment":[0.01,48.83156462585034],"label":"Camel ride"}]},"PQkambF3iEk":{"duration":80.156,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PQkambF3iEk","annotations":[{"segment":[5.3770314948652365,34.262944874257556],"label":"Changing car wheel"}]},"ixfgBUWkb1E":{"duration":47.903,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ixfgBUWkb1E","annotations":[]},"yRfnDlO7TBw":{"duration":100.59,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yRfnDlO7TBw","annotations":[]},"iMATWwGyAUM":{"duration":178.098,"subset":"validation","resolution":"852x480","url":"https://www.youtube.com/watch?v=iMATWwGyAUM","annotations":[{"segment":[0.2,12.5029131777516],"label":"Skiing"},{"segment":[29.729149111542696,155.03612239937596],"label":"Skiing"}]},"IHpBwsyMT9Q":{"duration":59.327,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IHpBwsyMT9Q","annotations":[{"segment":[0.01,59.32698412698413],"label":"Throwing darts"}]},"LvbM6zv8XVc":{"duration":184.25,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=LvbM6zv8XVc","annotations":[{"segment":[2.58697825464039,56.05119551720845],"label":"Cumbia"}]},"5TMKHLOACYg":{"duration":151.046,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=5TMKHLOACYg","annotations":[{"segment":[31.213155226209047,39.22256864274571],"label":"Applying sunscreen"}]},"LYrqEz_vttQ":{"duration":142.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LYrqEz_vttQ","annotations":[{"segment":[0.01,131.77921847113885],"label":"Bathing dog"}]},"DfpUMDpSbS4":{"duration":235.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DfpUMDpSbS4","annotations":[{"segment":[69.87761310452419,73.18760530421217],"label":"Snatch"},{"segment":[78.33648205928237,84.22091263650546],"label":"Snatch"},{"segment":[128.35414196567862,182.41734789391577],"label":"Snatch"}]},"QKEFacWrn_8":{"duration":134.537,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QKEFacWrn_8","annotations":[{"segment":[21.198359316692667,35.68040677067083],"label":"Playing ten pins"},{"segment":[46.17464405616224,53.31072541029642],"label":"Playing ten pins"},{"segment":[74.08931523556943,82.48470506396256],"label":"Playing ten pins"},{"segment":[102.21387116068644,112.2883389547582],"label":"Playing ten pins"}]},"sQwx_m8Vghw":{"duration":25.96,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sQwx_m8Vghw","annotations":[{"segment":[0,24.38044401872075],"label":"Fixing bicycle"}]},"wHxyzVcKq0c":{"duration":165.77,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wHxyzVcKq0c","annotations":[{"segment":[16.29225751170047,153.0954991575663],"label":"Cheerleading"}]},"tJ9wPza9ZqE":{"duration":55.357,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=tJ9wPza9ZqE","annotations":[]},"15Yf7NIDLtM":{"duration":155.296,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=15Yf7NIDLtM","annotations":[{"segment":[7.254252336448599,134.92909345794394],"label":"Decorating the Christmas tree"}]},"2AeQkiArbdk":{"duration":53.267,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=2AeQkiArbdk","annotations":[]},"ecVUPpBNpdA":{"duration":84.8,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=ecVUPpBNpdA","annotations":[{"segment":[0.01,64.39720280811233],"label":"Arm wrestling"}]},"7OM1UcqHPwE":{"duration":57.98,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7OM1UcqHPwE","annotations":[]},"6Xsfwn5M6qs":{"duration":91.44,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=6Xsfwn5M6qs","annotations":[]},"l9HcwQPNvWo":{"duration":196.63,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=l9HcwQPNvWo","annotations":[{"segment":[88.80023412000001,130.82875920206874],"label":"Playing harmonica"},{"segment":[145.24599644121818,190.64495625811426],"label":"Playing harmonica"}]},"SjlILJ2st-I":{"duration":80.69,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=SjlILJ2st-I","annotations":[]},"PV5Y3NeR5yQ":{"duration":191.24,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=PV5Y3NeR5yQ","annotations":[{"segment":[18.251976180983817,70.083255134933],"label":"Hula hoop"},{"segment":[84.67941989782673,149.9153387962393],"label":"Hula hoop"}]},"edbczrXeEOk":{"duration":86.53999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=edbczrXeEOk","annotations":[{"segment":[50.5194558,83.86210056084244],"label":"Springboard diving"},{"segment":[8.16356731123245,46.0340876],"label":"Springboard diving"}]},"BSdXxBOJ12A":{"duration":238.01,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BSdXxBOJ12A","annotations":[{"segment":[5.569517760302249,233.5484447486743],"label":"Playing field hockey"}]},"STwMeZMg_aA":{"duration":10.124,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=STwMeZMg_aA","annotations":[{"segment":[3.443073294851794,6.570268305772231],"label":"Using the rowing machine"}]},"8s3b1f6OMw0":{"duration":33.07,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=8s3b1f6OMw0","annotations":[{"segment":[0,33.065215],"label":"Playing water polo"}]},"rVYuVW9tB3U":{"duration":80.133,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=rVYuVW9tB3U","annotations":[{"segment":[35.228003120124804,40.22588767550702],"label":"Sumo"}]},"yVJGQI5MBIk":{"duration":95.481,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yVJGQI5MBIk","annotations":[{"segment":[0.19999999999999998,95.32689342403629],"label":"Capoeira"}]},"BFxxrjqgF0w":{"duration":85.543,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=BFxxrjqgF0w","annotations":[{"segment":[30.938521060842433,69.89473322932918],"label":"Bullfighting"}]},"CzyMYAvKE2E":{"duration":44.258,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=CzyMYAvKE2E","annotations":[{"segment":[5.674466804111618,34.710348239369026],"label":"Carving jack-o-lanterns"}]},"6SWx37_Byzs":{"duration":33.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6SWx37_Byzs","annotations":[{"segment":[1.5548478939157564,8.7021707],"label":"Cumbia"},{"segment":[10.91588376,22.426704368174725],"label":"Cumbia"}]},"sqv3M6MPKjQ":{"duration":79.831,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sqv3M6MPKjQ","annotations":[]},"sk_pnoqXxhs":{"duration":93.93,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sk_pnoqXxhs","annotations":[]},"QgzpNg0Ponc":{"duration":143.62,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=QgzpNg0Ponc","annotations":[{"segment":[0.01,141.51215522620905],"label":"Skateboarding"}]},"eSyrFLhr3b0":{"duration":127.99,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eSyrFLhr3b0","annotations":[{"segment":[9.480856684867396,119.45879422932917],"label":"Preparing salad"}]},"-YVhOi8vt_Q":{"duration":21.967,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-YVhOi8vt_Q","annotations":[{"segment":[0.20000000000000004,14.034024960998439],"label":"Playing rubik cube"}]},"1RVu0qNtWCc":{"duration":136.56,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1RVu0qNtWCc","annotations":[{"segment":[15.977758498095024,101.83158082785896],"label":"Shaving"}]},"T9JSNxOyUQA":{"duration":228.09,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=T9JSNxOyUQA","annotations":[{"segment":[37.091205195938,140.8328887104653],"label":"River tubing"},{"segment":[152.9123998046226,227.8895918367347],"label":"River tubing"}]},"C1vhQIJtLsw":{"duration":36.572,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=C1vhQIJtLsw","annotations":[{"segment":[12.02382449297972,18.845852574102963],"label":"Powerbocking"}]},"259K1wC2IMk":{"duration":119.049,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=259K1wC2IMk","annotations":[{"segment":[5.38597896568924,54.60268262714508],"label":"Mooping floor"},{"segment":[64.63174678315133,119.048706],"label":"Mooping floor"}]},"hFtmkU7wdx4":{"duration":87.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hFtmkU7wdx4","annotations":[{"segment":[26.872677847113888,30.29159360374415],"label":"Triple jump"},{"segment":[56.15140483619344,60.528615444617785],"label":"Triple jump"}]},"V2KVxpDERCk":{"duration":160.73,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=V2KVxpDERCk","annotations":[]},"xS-F-9E3xKk":{"duration":71.564,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=xS-F-9E3xKk","annotations":[{"segment":[1.3364672897196261,66.50784579439252],"label":"Arm wrestling"}]},"uJbM9KQBXak":{"duration":190.985,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uJbM9KQBXak","annotations":[{"segment":[0.2,184.36832917316693],"label":"Slacklining"}]},"JhiX65Okpt0":{"duration":182.068,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JhiX65Okpt0","annotations":[{"segment":[0.2,147.18554064953273],"label":"Capoeira"}]},"VFUk9WX8y5w":{"duration":157.547,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VFUk9WX8y5w","annotations":[{"segment":[4.5153630112110275,8.196365160743738],"label":"Longboarding"},{"segment":[16.53997003301788,28.319176911522554],"label":"Longboarding"},{"segment":[45.49718694267521,59.73039525420169],"label":"Longboarding"},{"segment":[62.1843966872235,74.20900370903036],"label":"Longboarding"},{"segment":[92.12321417008954,134.822839104669],"label":"Longboarding"},{"segment":[146.84744612647583,157.34689200000003],"label":"Longboarding"}]},"ButnmRRcOOM":{"duration":42.563,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ButnmRRcOOM","annotations":[]},"MSjSXzk2k50":{"duration":140.621,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MSjSXzk2k50","annotations":[{"segment":[14.474798751950077,15.571374414976598],"label":"Mooping floor"},{"segment":[122.37784399375974,126.34348829953198],"label":"Mooping floor"}]},"vw64k9rIi_g":{"duration":15.4,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=vw64k9rIi_g","annotations":[{"segment":[10.807601560062402,14.025865135725429],"label":"Washing face"}]},"Q9Fw56ZvXq4":{"duration":12.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Q9Fw56ZvXq4","annotations":[{"segment":[0,12.941],"label":"Washing dishes"}]},"l5uaH-Qydxk":{"duration":96.015,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l5uaH-Qydxk","annotations":[{"segment":[0.2,91.3855272863741],"label":"Using the monkey bar"}]},"g_cdSUwwk8Y":{"duration":116.448,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=g_cdSUwwk8Y","annotations":[{"segment":[39.96657697347894,92.28645955694228],"label":"Blowing leaves"}]},"szl1InYab_k":{"duration":61.301,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=szl1InYab_k","annotations":[{"segment":[2.103920336973479,53.17180487987519],"label":"Painting furniture"}]},"kl9xvnAKfdE":{"duration":49.74,"subset":"validation","resolution":"1920x1088","url":"https://www.youtube.com/watch?v=kl9xvnAKfdE","annotations":[{"segment":[0.3103721747269891,49.34917578159126],"label":"Playing field hockey"}]},"Fe_YqV7TsaA":{"duration":82.8,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Fe_YqV7TsaA","annotations":[]},"2UjVfKEcj4g":{"duration":152.95,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=2UjVfKEcj4g","annotations":[{"segment":[27.65559438377535,152.801],"label":"Cleaning shoes"}]},"2q_4I3ae0J4":{"duration":122.973,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=2q_4I3ae0J4","annotations":[{"segment":[0.2,25.121140405616224],"label":"Hurling"},{"segment":[39.126308892355695,114.69378783151326],"label":"Hurling"},{"segment":[117.37892667706708,121.40663494539783],"label":"Hurling"}]},"sW_iLVVMF4w":{"duration":168.995,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sW_iLVVMF4w","annotations":[{"segment":[0.01,168.941],"label":"Blowing leaves"}]},"6RZsrNf3SgM":{"duration":211.093,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=6RZsrNf3SgM","annotations":[]},"Ko7Nt8UDP4Q":{"duration":136.35,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ko7Nt8UDP4Q","annotations":[]},"tQT-iQIv-KY":{"duration":91.58,"subset":"training","resolution":"1274x720","url":"https://www.youtube.com/watch?v=tQT-iQIv-KY","annotations":[{"segment":[18.99347893915757,84.68521060842434],"label":"Using the balance beam"}]},"CfqnHsp6olc":{"duration":91.65,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=CfqnHsp6olc","annotations":[]},"Wiu7FVCc6DA":{"duration":106.77000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Wiu7FVCc6DA","annotations":[{"segment":[2.998078848673947,97.60412251794072],"label":"Paintball"}]},"ZREEgMgSz_o":{"duration":43.65,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZREEgMgSz_o","annotations":[{"segment":[11.236863720748829,38.54584767238689],"label":"Washing face"}]},"3N1tbv5Z6Kk":{"duration":146.24,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3N1tbv5Z6Kk","annotations":[{"segment":[4.676718408736349,141.78441419656787],"label":"Tango"}]},"5ssP_EapV9Q":{"duration":101.797,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5ssP_EapV9Q","annotations":[{"segment":[0.01,0.02],"label":"Playing ice hockey"},{"segment":[5.081873631407841,101.79628],"label":"Playing ice hockey"}]},"rmzMfd9ftU8":{"duration":131.867,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rmzMfd9ftU8","annotations":[{"segment":[20.559906396255847,83.47321996879874],"label":"Cleaning sink"}]},"U7x4eGLBITE":{"duration":154.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U7x4eGLBITE","annotations":[{"segment":[35.286218408736346,65.49702184087363],"label":"Snow tubing"}]},"NuSM8LfLR90":{"duration":162.053,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=NuSM8LfLR90","annotations":[{"segment":[11.629321249040439,153.96210088403535],"label":"Baton twirling"}]},"D7aa0iuPlKc":{"duration":125.342,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=D7aa0iuPlKc","annotations":[{"segment":[4.2884899530790035,116.17916778141348],"label":"Drum corps"}]},"01yWsvgvmug":{"duration":83.267,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=01yWsvgvmug","annotations":[]},"7CM2OYr-Csw":{"duration":84.057,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7CM2OYr-Csw","annotations":[]},"MIQiVsnwcWE":{"duration":208.68,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=MIQiVsnwcWE","annotations":[{"segment":[4.8823946957878315,205.7115631825273],"label":"Tai chi"}]},"CIQ-mnURg9E":{"duration":176.07999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CIQ-mnURg9E","annotations":[{"segment":[21.213636479999998,32.68822623381126],"label":"Preparing salad"},{"segment":[61.53077840873635,132.95043191887677],"label":"Preparing salad"}]},"lBBefqJhytA":{"duration":213.508,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=lBBefqJhytA","annotations":[]},"wBODR3P39Nw":{"duration":134.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wBODR3P39Nw","annotations":[{"segment":[107.67035101404058,126.61528081123247],"label":"Playing field hockey"},{"segment":[8.73571762870515,107.92221012],"label":"Playing field hockey"}]},"WGzUnE043iA":{"duration":84.358,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=WGzUnE043iA","annotations":[{"segment":[19.201163806552263,69.58784555382215],"label":"Canoeing"}]},"VA8SXILXTj0":{"duration":222.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VA8SXILXTj0","annotations":[{"segment":[28.752979020403803,83.83399714948622],"label":"Making a lemonade"},{"segment":[125.40457686954845,142.03280875757332],"label":"Making a lemonade"}]},"_Mz7KEe_mz0":{"duration":179.212,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_Mz7KEe_mz0","annotations":[{"segment":[14.232415887850467,169.11458878504672],"label":"Longboarding"}]},"gmbC7D6HfuM":{"duration":141.34,"subset":"testing","resolution":"160x120","url":"https://www.youtube.com/watch?v=gmbC7D6HfuM","annotations":[]},"h9LI_6eBzw4":{"duration":184.367,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h9LI_6eBzw4","annotations":[{"segment":[43.076271006845104,182.93056231464175],"label":"Canoeing"}]},"APuN4vwgKJ4":{"duration":209.77,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=APuN4vwgKJ4","annotations":[{"segment":[18.097420124624378,191.23157900312907],"label":"Baking cookies"}]},"IcKNG8J2RjI":{"duration":27.33,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=IcKNG8J2RjI","annotations":[]},"UcnzlGqUEyA":{"duration":59.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UcnzlGqUEyA","annotations":[{"segment":[22.689191906396257,35.14964975663027],"label":"Shaving"}]},"Ke5MPiv-wrY":{"duration":136.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ke5MPiv-wrY","annotations":[{"segment":[4.68043279875195,127.86091418408736],"label":"Tumbling"}]},"7F-mVWMP2vM":{"duration":137.393,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=7F-mVWMP2vM","annotations":[]},"xgjRCA0_Hb8":{"duration":190.915,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xgjRCA0_Hb8","annotations":[]},"DjyO6zWUiDw":{"duration":62.07,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=DjyO6zWUiDw","annotations":[{"segment":[1.7913252730109206,5.083490639625585],"label":"Washing dishes"},{"segment":[28.031819812792513,34.42249375975039],"label":"Washing dishes"},{"segment":[44.39581825273011,60.37250312012481],"label":"Washing dishes"}]},"Wr_Eu2M7U9E":{"duration":219.058,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Wr_Eu2M7U9E","annotations":[{"segment":[22.213273502340094,206.41257223712947],"label":"Hanging wallpaper"}]},"P5Sg_kACPRM":{"duration":15.63,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=P5Sg_kACPRM","annotations":[{"segment":[2.94443934,15.58],"label":"Long jump"}]},"99xnJSBRzkE":{"duration":152.25,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=99xnJSBRzkE","annotations":[{"segment":[11.163654295831698,23.989980508063862],"label":"Sailing"},{"segment":[27.07779978137901,31.35324185212307],"label":"Sailing"},{"segment":[35.153634803895564,44.65461718332679],"label":"Sailing"},{"segment":[54.393124122243805,59.3811398714452],"label":"Sailing"},{"segment":[63.18153256162247,65.55677814664587],"label":"Sailing"},{"segment":[72.20746578471139,80.04577621528861],"label":"Sailing"},{"segment":[87.88408664586582,92.15952869890796],"label":"Sailing"},{"segment":[95.48487251794072,97.86011810296412],"label":"Sailing"},{"segment":[103.0856583900156,116.38703366614665],"label":"Sailing"},{"segment":[119.47485292667707,130.16345805928236],"label":"Sailing"},{"segment":[132.53870364430577,136.10157202184087],"label":"Sailing"}]},"fjGqWAxenL0":{"duration":39.173,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fjGqWAxenL0","annotations":[{"segment":[0.01,39.141],"label":"Welding"}]},"pf49xhMRrgQ":{"duration":174.08,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=pf49xhMRrgQ","annotations":[{"segment":[0.01,174.079999],"label":"Playing congas"}]},"x768VAsOQSw":{"duration":75.372,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=x768VAsOQSw","annotations":[{"segment":[22.5761669160644,32.45323911388455],"label":"Ice fishing"}]},"1f8iBakydSI":{"duration":13.77,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=1f8iBakydSI","annotations":[{"segment":[0.01,13.721],"label":"Using the rowing machine"}]},"BKrUkWSo4Ig":{"duration":216.57,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=BKrUkWSo4Ig","annotations":[{"segment":[10.135969469578782,197.65140465678627],"label":"Playing violin"}]},"r9xmgB116dE":{"duration":205.358,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r9xmgB116dE","annotations":[{"segment":[18.5781872074883,195.71159282371295],"label":"Longboarding"}]},"GIy6ZbAooOQ":{"duration":205.474,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=GIy6ZbAooOQ","annotations":[{"segment":[25.64410340093604,195.53628843213727],"label":"Rope skipping"}]},"V9LQVhq2ePE":{"duration":117.679,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=V9LQVhq2ePE","annotations":[{"segment":[8.995721873634945,80.77791070202808],"label":"Installing carpet"}]},"0dgmG3h9RLA":{"duration":78.41,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0dgmG3h9RLA","annotations":[{"segment":[1.0391084243369735,6.29577457098284],"label":"Playing bagpipes"},{"segment":[17.17585101404056,21.454532761310457],"label":"Playing bagpipes"}]},"D9v5e1TTwts":{"duration":210.28,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=D9v5e1TTwts","annotations":[]},"DJyfOeZc2lI":{"duration":41.239,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=DJyfOeZc2lI","annotations":[{"segment":[8.942529529752617,19.622096489859594],"label":"Throwing darts"},{"segment":[26.248574976599066,36.799472761310454],"label":"Throwing darts"}]},"VkJ03vm8FJk":{"duration":91.09,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=VkJ03vm8FJk","annotations":[{"segment":[35.45899357,58.552383889999994],"label":"Playing flauta"}]},"yUbdrBSmUHE":{"duration":139.343,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=yUbdrBSmUHE","annotations":[{"segment":[10.64859438377535,132.58316692667705],"label":"Rope skipping"}]},"OG7-gtjRaEg":{"duration":135.141,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OG7-gtjRaEg","annotations":[{"segment":[50.37307176287052,56.27451950078003],"label":"Tug of war"},{"segment":[97.38830265210609,114.67367238689549],"label":"Tug of war"}]},"OuEQLjwBIPI":{"duration":34.32,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=OuEQLjwBIPI","annotations":[{"segment":[0,34.318683],"label":"Cricket"}]},"n832gDqfWl4":{"duration":55.8,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n832gDqfWl4","annotations":[]},"j59UZGZtuNg":{"duration":13.49,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=j59UZGZtuNg","annotations":[{"segment":[0.48563999999999996,7.254922],"label":"Springboard diving"}]},"b-p57jzkrQI":{"duration":25.333,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=b-p57jzkrQI","annotations":[{"segment":[0.01,25.332969],"label":"Making an omelette"}]},"sxf0x55Cvb4":{"duration":11.657,"subset":"training","resolution":"3414x1088","url":"https://www.youtube.com/watch?v=sxf0x55Cvb4","annotations":[{"segment":[0.01,11.656416],"label":"Canoeing"}]},"U9pnR51t6As":{"duration":200.691,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U9pnR51t6As","annotations":[{"segment":[9.815679604717504,69.52251761094803],"label":"Sumo"},{"segment":[92.34240857668013,99.84483848322218],"label":"Sumo"},{"segment":[117.97571075736548,134.8561780470851],"label":"Sumo"},{"segment":[143.60901293805082,159.23907444988885],"label":"Sumo"}]},"JgHubY5Vw3Y":{"duration":143.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JgHubY5Vw3Y","annotations":[{"segment":[13.33626131045242,14.681094383775351],"label":"Fixing bicycle"},{"segment":[49.42261544461779,58.388169266770674],"label":"Fixing bicycle"}]},"qcA3mFHfo84":{"duration":60.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qcA3mFHfo84","annotations":[{"segment":[6.64848031825273,51.97051516380655],"label":"Pole vault"}]},"FKl0077EUoY":{"duration":56.982,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FKl0077EUoY","annotations":[{"segment":[12.347580343213728,27.804263650546023],"label":"Sharpening knives"}]},"dXONZBWOKHk":{"duration":58.7,"subset":"validation","resolution":"482x360","url":"https://www.youtube.com/watch?v=dXONZBWOKHk","annotations":[{"segment":[0.01,58.700044],"label":"Removing curlers"}]},"8wqlhbw4e30":{"duration":106.91,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=8wqlhbw4e30","annotations":[]},"78WY5lXk42A":{"duration":205.451,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=78WY5lXk42A","annotations":[{"segment":[58.4718132390333,170.30315935964848],"label":"Hanging wallpaper"}]},"SKMVLKmgxAw":{"duration":149.258,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=SKMVLKmgxAw","annotations":[{"segment":[0.009999999999999998,144.6008362371295],"label":"Playing congas"}]},"WEjMCo8OfjE":{"duration":169.553,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WEjMCo8OfjE","annotations":[{"segment":[11.63226833073323,38.86235101404056],"label":"Skiing"},{"segment":[42.827897035881435,47.850921996879876],"label":"Skiing"},{"segment":[52.60957722308893,84.06957566302653],"label":"Skiing"},{"segment":[94.90873478939159,120.5668268330733],"label":"Skiing"},{"segment":[130.87846333853352,153.08814196567863],"label":"Skiing"}]},"_zqj6daJo9U":{"duration":15.09,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=_zqj6daJo9U","annotations":[]},"FnrvqpjHe6I":{"duration":191.333,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=FnrvqpjHe6I","annotations":[{"segment":[14.92452623274999,191.332425],"label":"Futsal"}]},"FsMTWgT5t8s":{"duration":40.1,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FsMTWgT5t8s","annotations":[]},"BnAG37ecNbA":{"duration":70.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BnAG37ecNbA","annotations":[{"segment":[11.893245585023402,61.448435522620905],"label":"Shaving legs"}]},"lIqAkmaXnXg":{"duration":183.856,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lIqAkmaXnXg","annotations":[{"segment":[12.61691887675507,172.73082059282373],"label":"Painting furniture"}]},"_7JooVc_sCQ":{"duration":207.912,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=_7JooVc_sCQ","annotations":[{"segment":[51.803364485981305,179.38640809968848],"label":"BMX"}]},"fxgbk_Kk4Rw":{"duration":125.83,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fxgbk_Kk4Rw","annotations":[{"segment":[25.3228276396362,81.46491062363582],"label":"Vacuuming floor"}]},"seScyTd2XSc":{"duration":99.312,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=seScyTd2XSc","annotations":[{"segment":[37.028872545378,83.50862801092043],"label":"Cleaning sink"}]},"9hR1MHvXGv8":{"duration":112.52000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9hR1MHvXGv8","annotations":[{"segment":[46.60871138845554,72.0635631825273],"label":"Rock climbing"},{"segment":[92.25189391575664,103.6626895475819],"label":"Rock climbing"}]},"Rn6_IwckXg4":{"duration":31.53,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rn6_IwckXg4","annotations":[]},"-xDx4qvX3KQ":{"duration":214.483,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-xDx4qvX3KQ","annotations":[]},"sqAFVbKbcbM":{"duration":191.24,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=sqAFVbKbcbM","annotations":[]},"qbGml8XLhz8":{"duration":166.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qbGml8XLhz8","annotations":[{"segment":[9.453651808112324,157.60403082839315],"label":"Tai chi"}]},"2wDakH-_GzU":{"duration":144.707,"subset":"testing","resolution":"618x360","url":"https://www.youtube.com/watch?v=2wDakH-_GzU","annotations":[]},"pLVCuSq560Y":{"duration":185.853,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=pLVCuSq560Y","annotations":[{"segment":[17.106549834633384,174.83473814040562],"label":"Beach soccer"}]},"5eV7gLIMCas":{"duration":127.501,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5eV7gLIMCas","annotations":[]},"ZYPKueJon34":{"duration":24.34,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZYPKueJon34","annotations":[{"segment":[1.48486068,8.846251827613106],"label":"Bungee jumping"}]},"2ahuZDlObAQ":{"duration":196.4,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2ahuZDlObAQ","annotations":[{"segment":[19.34854396740447,182.64358135058205],"label":"Rock climbing"}]},"7PhDA2-meDc":{"duration":122.509,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7PhDA2-meDc","annotations":[]},"taHfD8TFfX4":{"duration":5.53,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=taHfD8TFfX4","annotations":[{"segment":[0,5.526349],"label":"Cricket"}]},"lfbspF18Wrw":{"duration":66.967,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lfbspF18Wrw","annotations":[]},"iYdbam6e8cI":{"duration":188.384,"subset":"training","resolution":"384x240","url":"https://www.youtube.com/watch?v=iYdbam6e8cI","annotations":[{"segment":[0.19999999999999998,172.85576487635464],"label":"Rollerblading"}]},"LkwkGj27pP8":{"duration":27.911,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=LkwkGj27pP8","annotations":[{"segment":[0,27.861],"label":"Painting fence"}]},"H-PhEsI9njQ":{"duration":60.094,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=H-PhEsI9njQ","annotations":[{"segment":[4.781209502340094,24.281044335413416],"label":"Shuffleboard"},{"segment":[32.0622284274571,42.74963790327613],"label":"Shuffleboard"},{"segment":[54.2807902324493,58.874501322932915],"label":"Shuffleboard"}]},"-g-qMUjVA-s":{"duration":62.694,"subset":"validation","resolution":"160x112","url":"https://www.youtube.com/watch?v=-g-qMUjVA-s","annotations":[{"segment":[0.01,62.641],"label":"Roof shingle removal"}]},"o3yvGAz5IJ8":{"duration":207.68,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=o3yvGAz5IJ8","annotations":[{"segment":[42.73855226209049,71.55469734789392],"label":"Polishing forniture"},{"segment":[92.27641965678627,134.04364118564743],"label":"Polishing forniture"}]},"pHAIrx_cRFU":{"duration":145.265,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=pHAIrx_cRFU","annotations":[{"segment":[0,64.21916302652106],"label":"Sharpening knives"},{"segment":[88.9101287051482,110.2184344773791],"label":"Sharpening knives"}]},"jQVT4u6NojM":{"duration":78.298,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jQVT4u6NojM","annotations":[{"segment":[11.47371918876755,57.38326053042122],"label":"Arm wrestling"}]},"YzyCFfrX_4I":{"duration":18.762,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YzyCFfrX_4I","annotations":[{"segment":[1.611560062402496,17.08253666146646],"label":"Rollerblading"}]},"EInkc1uEX3c":{"duration":228.183,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EInkc1uEX3c","annotations":[{"segment":[32.74410608424337,179.73666926677066],"label":"Laying tile"},{"segment":[191.8545694227769,200.39725897035882],"label":"Laying tile"}]},"kxfOrs5ZWkw":{"duration":203.64,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=kxfOrs5ZWkw","annotations":[{"segment":[7.9687112913938325,93.74489009364797],"label":"Making a cake"},{"segment":[100.09868111603716,115.66546912089068],"label":"Making a cake"},{"segment":[119.4777437343242,165.2250390955264],"label":"Making a cake"}]},"vgO3mzJGbSI":{"duration":203.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vgO3mzJGbSI","annotations":[{"segment":[112.23116537722733,123.64450422914877],"label":"Getting a piercing"}]},"BCdt22s9hlU":{"duration":97.757,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=BCdt22s9hlU","annotations":[{"segment":[5.335780031201248,97.701],"label":"Putting on shoes"}]},"I5Q0DcmTs9c":{"duration":227.02,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=I5Q0DcmTs9c","annotations":[{"segment":[4.12270136,227.021495],"label":"Playing harmonica"}]},"zOcXA1S_VPI":{"duration":159.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zOcXA1S_VPI","annotations":[{"segment":[27.178833739469578,125.84125408],"label":"Layup drill in basketball"}]},"P-04xkAdWSY":{"duration":30.186,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P-04xkAdWSY","annotations":[{"segment":[0.01,20.595922813118435],"label":"Sumo"}]},"QGZN8aXpXHU":{"duration":124.13,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=QGZN8aXpXHU","annotations":[{"segment":[38.687467394695794,109.67897006396257],"label":"Getting a haircut"}]},"AIxxMCcmIgM":{"duration":83.639,"subset":"training","resolution":"1270x720","url":"https://www.youtube.com/watch?v=AIxxMCcmIgM","annotations":[{"segment":[0.19999999999999998,76.01646957878314],"label":"Wakeboarding"}]},"YTXolcHh_qo":{"duration":135.49,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=YTXolcHh_qo","annotations":[]},"_R-jA9hOFCE":{"duration":206.66,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_R-jA9hOFCE","annotations":[{"segment":[4.835981194349815,8.05996865724969],"label":"Sailing"},{"segment":[18.054329792239308,21.60071600142917],"label":"Sailing"},{"segment":[25.147102210619035,42.879033256568356],"label":"Sailing"},{"segment":[142.8226446064645,145.07943583049442],"label":"Sailing"},{"segment":[152.81700574145415,155.71859445806405],"label":"Sailing"},{"segment":[164.10096077223088,169.90413816692666],"label":"Sailing"}]},"n41Ypwpn-P8":{"duration":102.53999999999999,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=n41Ypwpn-P8","annotations":[{"segment":[60.62778585288611,99.4999547192772],"label":"Playing kickball"}]},"K5R5tbbISAg":{"duration":43.677,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=K5R5tbbISAg","annotations":[]},"z-ttrQ38mOc":{"duration":221.05,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z-ttrQ38mOc","annotations":[{"segment":[12.409609984399378,188.55712948517942],"label":"Playing violin"}]},"BJGACTBaU_Y":{"duration":14.583,"subset":"training","resolution":"426x214","url":"https://www.youtube.com/watch?v=BJGACTBaU_Y","annotations":[{"segment":[0.2,14.281],"label":"Hitting a pinata"}]},"-Rv7tGWehRE":{"duration":60.071,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-Rv7tGWehRE","annotations":[{"segment":[0.2,58.244677570093465],"label":"Powerbocking"}]},"HQP20PGfwYM":{"duration":178.005,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=HQP20PGfwYM","annotations":[{"segment":[0.2,171.07206624705958],"label":"Installing carpet"}]},"qmar8pDP5co":{"duration":176.054,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qmar8pDP5co","annotations":[{"segment":[133.99140093603745,144.16696567862715],"label":"Sumo"}]},"i7yqaSTEdAA":{"duration":194.91,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i7yqaSTEdAA","annotations":[]},"PtRpSvyZQm4":{"duration":80.06,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=PtRpSvyZQm4","annotations":[]},"OKTmKPHpzqo":{"duration":238.539,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OKTmKPHpzqo","annotations":[]},"MlkXsUDCoww":{"duration":51.386,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=MlkXsUDCoww","annotations":[{"segment":[4.89006442901716,35.59325584399376],"label":"Cutting the grass"}]},"dnJJWt0SBTc":{"duration":210.885,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dnJJWt0SBTc","annotations":[{"segment":[0.01,43.822731653908704],"label":"Doing fencing"},{"segment":[54.99194349191494,75.68783601527943],"label":"Doing fencing"},{"segment":[99.66879084393986,109.19547152929812],"label":"Doing fencing"},{"segment":[130.8768827442514,133.50493258848815],"label":"Doing fencing"},{"segment":[136.4614886632545,141.717588351728],"label":"Doing fencing"},{"segment":[182.4523609373978,199.53468492493676],"label":"Doing fencing"}]},"1UqVaZCy8gU":{"duration":32.694,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1UqVaZCy8gU","annotations":[]},"GG3JgE6hy4g":{"duration":106.092,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=GG3JgE6hy4g","annotations":[{"segment":[0.01,106.041],"label":"Roof shingle removal"}]},"LCbKBMILWDk":{"duration":97.315,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LCbKBMILWDk","annotations":[]},"DS02_2l_BOE":{"duration":63.62,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=DS02_2l_BOE","annotations":[]},"Ygy-zufAejI":{"duration":58.213,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=Ygy-zufAejI","annotations":[{"segment":[4.714964118564743,17.777903276131045],"label":"Slacklining"},{"segment":[22.494489859594385,41.995761310452416],"label":"Slacklining"}]},"YYmx8EHIjAE":{"duration":38.917,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=YYmx8EHIjAE","annotations":[{"segment":[0.01,38.916643],"label":"Laying tile"}]},"5MnHirHxqkA":{"duration":198.16,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=5MnHirHxqkA","annotations":[{"segment":[13.602184228865754,175.9009733232867],"label":"Ironing clothes"}]},"VAKQMtYrl4I":{"duration":73.03,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VAKQMtYrl4I","annotations":[{"segment":[0.01,73.02657596371883],"label":"Walking the dog"}]},"-5Yp-vToI2E":{"duration":116.47,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-5Yp-vToI2E","annotations":[{"segment":[52.74244264664586,105.3941065624025],"label":"Ballet"},{"segment":[3.18198772,50.369048660000004],"label":"Ballet"}]},"HUxoNWi1qjs":{"duration":186.2,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HUxoNWi1qjs","annotations":[]},"qhsK94t7x7w":{"duration":235.985,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qhsK94t7x7w","annotations":[{"segment":[10.306315132605304,231.9117472698908],"label":"Bullfighting"}]},"2Mw-3jScc0M":{"duration":226.209,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2Mw-3jScc0M","annotations":[{"segment":[103.73980343213728,165.4896864274571],"label":"Wakeboarding"}]},"4UIAc9irBLs":{"duration":63.345,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4UIAc9irBLs","annotations":[{"segment":[0.01,62.35560656711983],"label":"Swimming"}]},"hgTgOjkcvpc":{"duration":15.697,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hgTgOjkcvpc","annotations":[{"segment":[0.01,15.661],"label":"Croquet"}]},"xcrGhwE7R5E":{"duration":180.09,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=xcrGhwE7R5E","annotations":[{"segment":[23.31949962714509,171.10331654134166],"label":"Preparing pasta"}]},"qH3HnhEaeok":{"duration":182.509,"subset":"validation","resolution":"352x288","url":"https://www.youtube.com/watch?v=qH3HnhEaeok","annotations":[{"segment":[21.923820158970358,180.80033507722308],"label":"Canoeing"}]},"cdHBwzbNI5Y":{"duration":26.541,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cdHBwzbNI5Y","annotations":[{"segment":[1.7804017176287048,26.08495539781591],"label":"Baton twirling"}]},"3VjaC7wdMso":{"duration":51.53,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3VjaC7wdMso","annotations":[]},"gBTnWp9VLz4":{"duration":208.144,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gBTnWp9VLz4","annotations":[{"segment":[22.107822220541884,208.14367346938775],"label":"Snowboarding"}]},"_K88UOvtQPk":{"duration":67.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_K88UOvtQPk","annotations":[{"segment":[1.3248829953198127,65.34322932917316],"label":"Playing saxophone"}]},"z60D7p37Lws":{"duration":80.759,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=z60D7p37Lws","annotations":[{"segment":[28.194296411856474,76.29454914196567],"label":"Hitting a pinata"}]},"YK1yqJUcnj8":{"duration":150.094,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YK1yqJUcnj8","annotations":[]},"2JyEK7sKsrY":{"duration":119.21000000000001,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2JyEK7sKsrY","annotations":[]},"c6uw9Qpp1Tg":{"duration":207.819,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=c6uw9Qpp1Tg","annotations":[]},"N0mNYZaCJoQ":{"duration":31.998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N0mNYZaCJoQ","annotations":[{"segment":[0.2,30.837015600624024],"label":"Camel ride"}]},"aN9vCyXMbb8":{"duration":65.23,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aN9vCyXMbb8","annotations":[{"segment":[4.578967698907956,62.78273489391576],"label":"Playing saxophone"}]},"5C0G3BQ-Nds":{"duration":213.717,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=5C0G3BQ-Nds","annotations":[{"segment":[0.2,207.33559750390017],"label":"Table soccer"}]},"k_bvz0NHKBo":{"duration":169.042,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k_bvz0NHKBo","annotations":[{"segment":[8.149731708649744,19.191309135888137],"label":"Beer pong"},{"segment":[34.17630707285453,46.53235800333558],"label":"Beer pong"},{"segment":[92.27603591618035,96.48235112655688],"label":"Beer pong"}]},"PVdd6E1S0Yc":{"duration":91.12,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=PVdd6E1S0Yc","annotations":[{"segment":[24.306812696997678,25.870408835401037],"label":"Washing face"},{"segment":[27.149714766821965,29.139746215698967],"label":"Washing face"},{"segment":[33.97267973440026,40.653499598487336],"label":"Washing face"},{"segment":[78.32195173322933,87.2770932199688],"label":"Washing face"}]},"15npAlupNU4":{"duration":122.068,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=15npAlupNU4","annotations":[{"segment":[0.01,73.12612138771266],"label":"Removing ice from car"},{"segment":[91.59808315132605,95.78760046801872],"label":"Removing ice from car"},{"segment":[107.02312418096723,110.07004586583463],"label":"Removing ice from car"}]},"44r5SAczrOg":{"duration":53.174,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=44r5SAczrOg","annotations":[{"segment":[0.2,49.35779893915757],"label":"Playing rubik cube"}]},"tbPbyTLkYF0":{"duration":60.628,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tbPbyTLkYF0","annotations":[]},"5KYUiMysyb0":{"duration":22.013,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=5KYUiMysyb0","annotations":[{"segment":[6.440979719188767,17.042990639625586],"label":"Cleaning sink"}]},"yNwdhK5UKec":{"duration":151.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yNwdhK5UKec","annotations":[{"segment":[0.01,17.257609984399377],"label":"Playing harmonica"},{"segment":[26.950240249609983,142.07977535101406],"label":"Playing harmonica"}]},"RDCZQ0nWIPc":{"duration":115.589,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RDCZQ0nWIPc","annotations":[{"segment":[6.471539453974955,10.60613585759602],"label":"Rollerblading"},{"segment":[58.78317158204176,61.299882409413456],"label":"Rollerblading"}]},"py4ekPd5G_k":{"duration":71.633,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=py4ekPd5G_k","annotations":[{"segment":[23.20835145273062,70.2945246728972],"label":"River tubing"}]},"u1grT0O81Qg":{"duration":159.336,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=u1grT0O81Qg","annotations":[{"segment":[0.01,149.8572589703588],"label":"Doing fencing"}]},"V44vW_JrDow":{"duration":49.97,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=V44vW_JrDow","annotations":[]},"FBcsA3MB3TA":{"duration":200.13,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FBcsA3MB3TA","annotations":[]},"stVRtmxHVaE":{"duration":234.29,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=stVRtmxHVaE","annotations":[{"segment":[78.92626521060843,181.25356630265213],"label":"Roof shingle removal"}]},"BQRidRi2V1c":{"duration":68.035,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=BQRidRi2V1c","annotations":[{"segment":[13.044716068642746,46.14732449297973],"label":"Rope skipping"},{"segment":[53.78550234009361,65.03059750390015],"label":"Rope skipping"}]},"plhiqYw0P_g":{"duration":124.923,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=plhiqYw0P_g","annotations":[{"segment":[13.057480634945398,112.06046813572543],"label":"Waterskiing"}]},"gdi_m9LjRAw":{"duration":104.328,"subset":"training","resolution":"582x480","url":"https://www.youtube.com/watch?v=gdi_m9LjRAw","annotations":[{"segment":[41.925906711594294,78.0016859813084],"label":"Trimming branches or hedges"}]},"-UWE4jXuLoo":{"duration":180.58,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-UWE4jXuLoo","annotations":[{"segment":[96.70473711999999,171.0998634945398],"label":"Skateboarding"},{"segment":[40.79144209,96.74535491419657],"label":"Skateboarding"}]},"j6S6eWSeGr0":{"duration":51.897,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j6S6eWSeGr0","annotations":[{"segment":[0.01,51.781],"label":"Painting fence"}]},"QOlSCBRmfWY":{"duration":82.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QOlSCBRmfWY","annotations":[{"segment":[6.195294851794072,77.73085420904837],"label":"Ballet"}]},"yeLB4QXA3NQ":{"duration":165.187,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=yeLB4QXA3NQ","annotations":[{"segment":[10.652230452422097,16.8274362841978],"label":"Hand car wash"},{"segment":[31.493550134665085,93.50290869541274],"label":"Hand car wash"},{"segment":[144.70565705055293,164.26047551784265],"label":"Hand car wash"}]},"GAEavSUmQRk":{"duration":216.34,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=GAEavSUmQRk","annotations":[{"segment":[57.51628458,200.17893915756628],"label":"Playing saxophone"}]},"fMVKdmMVixQ":{"duration":182.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fMVKdmMVixQ","annotations":[{"segment":[16.80733489672387,171.06448483868954],"label":"Tai chi"}]},"5K6oT9zBjVE":{"duration":144.777,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5K6oT9zBjVE","annotations":[{"segment":[0.01,144.741],"label":"Cleaning sink"}]},"Kmk0IVi2Xu4":{"duration":178.144,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Kmk0IVi2Xu4","annotations":[{"segment":[37.23172230889235,70.57356318252731],"label":"Playing congas"},{"segment":[115.3071996879875,119.21046645865836],"label":"Playing congas"}]},"jSxu2GOiBO0":{"duration":218.941,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=jSxu2GOiBO0","annotations":[{"segment":[18.102762021840874,64.89669404056163],"label":"Skiing"},{"segment":[84.02414070514821,104.51783356006241],"label":"Skiing"},{"segment":[126.71933415288613,157.8014349828393],"label":"Skiing"},{"segment":[198.1056975975039,208.352544024961],"label":"Skiing"}]},"LAZHNzFbDNY":{"duration":106.67,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=LAZHNzFbDNY","annotations":[{"segment":[12.718930577223087,29.6775046801872],"label":"Discus throw"},{"segment":[49.62876833073322,55.94666848673947],"label":"Discus throw"},{"segment":[72.73898205928238,77.06175585023401],"label":"Discus throw"}]},"7YbdzJWLBxs":{"duration":179.746,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=7YbdzJWLBxs","annotations":[]},"E15Q3Z9J-Zg":{"duration":130.473,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E15Q3Z9J-Zg","annotations":[{"segment":[0.2,4.261168629193513],"label":"Elliptical trainer"},{"segment":[22.52331989716571,111.39912173561432],"label":"Elliptical trainer"}]},"tO8yDY0x6yA":{"duration":218.315,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tO8yDY0x6yA","annotations":[]},"Boa880LnJ3w":{"duration":108.925,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Boa880LnJ3w","annotations":[{"segment":[19.70389391575663,95.479503900156],"label":"Assembling bicycle"}]},"DVlDRG0SxG8":{"duration":26.84,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DVlDRG0SxG8","annotations":[{"segment":[0.6071963803722217,25.39755887480499],"label":"Kayaking"}]},"2nPrH4Tv0yc":{"duration":83.06,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2nPrH4Tv0yc","annotations":[{"segment":[1.1661781450858035,83.057799],"label":"Doing kickboxing"}]},"8PIQD4aLQhI":{"duration":147.8,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8PIQD4aLQhI","annotations":[]},"WQzjqCkgZn0":{"duration":153.693,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=WQzjqCkgZn0","annotations":[]},"qBqUu4_qOnU":{"duration":16.254,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qBqUu4_qOnU","annotations":[{"segment":[0.01,16.253968],"label":"Hitting a pinata"}]},"dBCiKzkJogg":{"duration":236.24,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=dBCiKzkJogg","annotations":[{"segment":[0.2,227.37105607476636],"label":"Beer pong"}]},"bSaZYR-4t5k":{"duration":12.75,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bSaZYR-4t5k","annotations":[]},"UH_z4C6sv3E":{"duration":54.1,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=UH_z4C6sv3E","annotations":[{"segment":[15.783410982697811,54.102494331065756],"label":"Shaving"}]},"7KEM_rbhASw":{"duration":191.217,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7KEM_rbhASw","annotations":[{"segment":[40.54985335413417,135.08092511700465],"label":"Rafting"},{"segment":[143.43029797191886,172.95129485179407],"label":"Rafting"}]},"K4qFT8_yqJ0":{"duration":53.708,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K4qFT8_yqJ0","annotations":[]},"VNUVKrN4ndc":{"duration":83.71000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VNUVKrN4ndc","annotations":[{"segment":[27.796666146645865,69.55691575663026],"label":"Using parallel bars"}]},"6xAe1YVbxuY":{"duration":204.8,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6xAe1YVbxuY","annotations":[{"segment":[82.302976,196.92734757332292],"label":"Snatch"},{"segment":[14.514176,68.182016],"label":"Snatch"}]},"sHx5UcQQ1kU":{"duration":197.277,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sHx5UcQQ1kU","annotations":[{"segment":[71.84259516380655,93.07231279251171],"label":"Sharpening knives"},{"segment":[114.30203042121686,126.3014360374415],"label":"Sharpening knives"}]},"woI_zP55qWo":{"duration":27.08,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=woI_zP55qWo","annotations":[]},"OyV4eki18GE":{"duration":77.58,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=OyV4eki18GE","annotations":[{"segment":[3.97039086,27.367614059999998],"label":"Playing kickball"},{"segment":[9.863644556162248,36.731486169266766],"label":"Playing kickball"}]},"E50d5qFvzOI":{"duration":164.28,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E50d5qFvzOI","annotations":[{"segment":[9.349162728549143,159.96033216380656],"label":"Tango"}]},"hRMSh7vUmGY":{"duration":116.57,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=hRMSh7vUmGY","annotations":[{"segment":[7.819408555934073,15.093276980058794],"label":"Clean and jerk"},{"segment":[16.366203954280618,25.2766927738334],"label":"Clean and jerk"},{"segment":[28.00439343288017,38.73334935846413],"label":"Clean and jerk"},{"segment":[40.733663175098435,50.00784541585745],"label":"Clean and jerk"},{"segment":[52.00815923249174,61.10049476264765],"label":"Clean and jerk"},{"segment":[64.55558226410689,74.01161121546903],"label":"Clean and jerk"},{"segment":[100.379383825273,113.47234693291732],"label":"Clean and jerk"}]},"yeQDfh6K6Sc":{"duration":41.4,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yeQDfh6K6Sc","annotations":[{"segment":[2.9710674477379095,41.401179],"label":"Smoking hookah"}]},"-jl_v7zi17A":{"duration":214.553,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=-jl_v7zi17A","annotations":[{"segment":[7.6984473664660875,19.748191070499963],"label":"Elliptical trainer"},{"segment":[23.76477230517792,27.11192333407622],"label":"Elliptical trainer"},{"segment":[35.8145160092118,66.94302057796598],"label":"Elliptical trainer"},{"segment":[73.9720377386524,111.79484436520318],"label":"Elliptical trainer"},{"segment":[127.19173909813537,141.24977341950822],"label":"Elliptical trainer"},{"segment":[149.61765099175395,160.66324938711836],"label":"Elliptical trainer"},{"segment":[172.7129930911522,175.05599803432136],"label":"Elliptical trainer"},{"segment":[192.79589840873635,201.16377594383778],"label":"Elliptical trainer"}]},"9PqYL4zTnxY":{"duration":138.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9PqYL4zTnxY","annotations":[{"segment":[3.890386895475819,15.779957878315132],"label":"Rope skipping"},{"segment":[39.3418127925117,129.04979251170047],"label":"Rope skipping"}]},"kUlwaBYvyBI":{"duration":107.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kUlwaBYvyBI","annotations":[{"segment":[63.1787162915088,92.42812198202213],"label":"Polishing forniture"},{"segment":[65.31487563280717,93.80682504087524],"label":"Painting furniture"}]},"J3NhT51f0To":{"duration":193.26,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=J3NhT51f0To","annotations":[{"segment":[95.81837736973479,193.143333],"label":"Playing violin"}]},"J0EWvkkxh5o":{"duration":53.57,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J0EWvkkxh5o","annotations":[{"segment":[4.798992429797193,51.620466397035884],"label":"Plataform diving"}]},"pp1KvMmpgBk":{"duration":4.99,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=pp1KvMmpgBk","annotations":[]},"MVzypK0eMKc":{"duration":126.062,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MVzypK0eMKc","annotations":[{"segment":[21.826132605304213,83.96178939157566],"label":"Making a lemonade"}]},"RTM6iJxc-G0":{"duration":31.719,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=RTM6iJxc-G0","annotations":[{"segment":[8.3199193258544,10.147930087536643],"label":"Drum corps"},{"segment":[16.175425031461877,19.63382376977963],"label":"Drum corps"}]},"9WDvq8LXrxU":{"duration":20.11,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9WDvq8LXrxU","annotations":[{"segment":[3.42436563,18.44584491856474],"label":"Polishing shoes"}]},"LdDB7xXXHQM":{"duration":192.08,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LdDB7xXXHQM","annotations":[{"segment":[71.61628096688494,112.6682913956014],"label":"Doing kickboxing"},{"segment":[111.17004283980884,132.44517105460218],"label":"Doing kickboxing"},{"segment":[24.27162636973479,47.04500419812793],"label":"Doing kickboxing"}]},"dE1NAofn3ks":{"duration":46.231,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=dE1NAofn3ks","annotations":[{"segment":[4.9668159126365055,18.211658346333852],"label":"Curling"},{"segment":[20.65907488299532,32.32029485179407],"label":"Curling"},{"segment":[37.215127925117,46.141],"label":"Curling"}]},"nsdB1r31-70":{"duration":73.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nsdB1r31-70","annotations":[{"segment":[0.2,67.93761325856158],"label":"Swinging at the playground"}]},"pS-BoCsMcpQ":{"duration":131.52,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pS-BoCsMcpQ","annotations":[{"segment":[5.333082683307333,109.3281950078003],"label":"Playing water polo"}]},"7e035QnLp0c":{"duration":176.17,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=7e035QnLp0c","annotations":[{"segment":[7.134602249221183,166.5655235552165],"label":"Slacklining"}]},"v8XN1ajGeIw":{"duration":152.28,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=v8XN1ajGeIw","annotations":[{"segment":[7.364377652106085,93.59886435257411],"label":"Vacuuming floor"},{"segment":[109.2778619344774,143.48658393135727],"label":"Vacuuming floor"}]},"meNgZdu8tIk":{"duration":193.14,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=meNgZdu8tIk","annotations":[]},"CkE-dOzXddY":{"duration":173.69,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=CkE-dOzXddY","annotations":[]},"Ga-204sSRoc":{"duration":46.371,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ga-204sSRoc","annotations":[{"segment":[2.242554895475819,44.2000335850234],"label":"Fixing the roof"}]},"6iwKUJUUlaQ":{"duration":14.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6iwKUJUUlaQ","annotations":[{"segment":[0.27602531045241807,12.83517693603744],"label":"Hopscotch"}]},"k3UfVrIInMY":{"duration":22.222,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=k3UfVrIInMY","annotations":[]},"Yi-0wjSu0E0":{"duration":27.052,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=Yi-0wjSu0E0","annotations":[{"segment":[1.6849297971918877,10.66507488299532],"label":"Mooping floor"}]},"mfjO2ziAT7s":{"duration":223.887,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mfjO2ziAT7s","annotations":[]},"pJEOK1DbTfU":{"duration":16.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pJEOK1DbTfU","annotations":[{"segment":[0.01,15.366334633385335],"label":"Using uneven bars"}]},"YMSMASHyl2Y":{"duration":154.39,"subset":"training","resolution":"540x360","url":"https://www.youtube.com/watch?v=YMSMASHyl2Y","annotations":[{"segment":[50.56413416536662,97.51654446177847],"label":"Grooming dog"},{"segment":[111.73713572542901,115.3493276131045],"label":"Grooming dog"},{"segment":[123.53696255850232,142.56117316692666],"label":"Grooming dog"}]},"3Nd3Aai1bVA":{"duration":130.822,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=3Nd3Aai1bVA","annotations":[]},"dGHCQVdC27c":{"duration":67.965,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=dGHCQVdC27c","annotations":[{"segment":[0.2,67.721],"label":"Plastering"}]},"dB50ZkOlDzY":{"duration":41.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=dB50ZkOlDzY","annotations":[{"segment":[0.01,41.821],"label":"Using parallel bars"}]},"gGEkmrII58w":{"duration":130.744,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gGEkmrII58w","annotations":[]},"a2Xwx37YbhE":{"duration":43.84,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=a2Xwx37YbhE","annotations":[{"segment":[3.351189042121685,43.839024],"label":"Disc dog"}]},"gXdFGYPKClE":{"duration":67.896,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gXdFGYPKClE","annotations":[{"segment":[17.457823712948517,65.08925897035881],"label":"BMX"}]},"QrsOyEw4_7k":{"duration":164.514,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=QrsOyEw4_7k","annotations":[{"segment":[0.01,164.51337868480726],"label":"Putting in contact lenses"}]},"xR9VOguQeKM":{"duration":208.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xR9VOguQeKM","annotations":[{"segment":[18.849638065522623,25.02451950078003],"label":"Pole vault"},{"segment":[33.79935101404056,38.02426989079563],"label":"Pole vault"},{"segment":[70.19865210608424,90.67325897035882],"label":"Pole vault"},{"segment":[104.32299687987519,108.54791575663026],"label":"Pole vault"},{"segment":[117.64774102964118,135.52239781591263],"label":"Pole vault"},{"segment":[144.62222308892356,145.59720436817474],"label":"Pole vault"},{"segment":[155.34701716068642,163.1468673946958],"label":"Pole vault"},{"segment":[169.97173634945398,174.52164898595944],"label":"Pole vault"}]},"Le6fNx5IWRI":{"duration":161.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Le6fNx5IWRI","annotations":[{"segment":[16.03266214,90.92500489999999],"label":"Using the pommel horse"},{"segment":[98.40971474,124.92937318],"label":"Using the pommel horse"}]},"Y9d4iPDeigI":{"duration":46.9,"subset":"testing","resolution":"342x240","url":"https://www.youtube.com/watch?v=Y9d4iPDeigI","annotations":[]},"z2qG-TOSwqw":{"duration":49.2,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=z2qG-TOSwqw","annotations":[{"segment":[0,49.20308390022676],"label":"Getting a piercing"}]},"TUMk0wpBiP0":{"duration":165.582,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TUMk0wpBiP0","annotations":[{"segment":[31.514661628705145,36.68099960062402],"label":"Snowboarding"},{"segment":[43.397238964118564,155.76508985335414],"label":"Snowboarding"}]},"2WeG8QGOfcI":{"duration":212.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2WeG8QGOfcI","annotations":[{"segment":[8.961985361934477,207.12143947581905],"label":"Zumba"}]},"O7Aalo-T70I":{"duration":62.741,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O7Aalo-T70I","annotations":[]},"SjP2LnPSpS8":{"duration":174.986,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SjP2LnPSpS8","annotations":[]},"hbipKjS0G7w":{"duration":122.42,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=hbipKjS0G7w","annotations":[{"segment":[6.91032675,119.5509593978159],"label":"Shot put"}]},"E4PIKUW4v0k":{"duration":48.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=E4PIKUW4v0k","annotations":[{"segment":[9.03319296,22.27986816],"label":"Smoking a cigarette"},{"segment":[25.978444800000005,40.95296832],"label":"Smoking a cigarette"}]},"EBw5-KVV_PU":{"duration":140.3,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=EBw5-KVV_PU","annotations":[]},"LnInBoqM37Y":{"duration":29.049,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=LnInBoqM37Y","annotations":[]},"3CJW0qBwwx4":{"duration":126.596,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3CJW0qBwwx4","annotations":[{"segment":[0,99.11641497659907],"label":"Hand car wash"},{"segment":[104.84226365054603,122.41469578783152],"label":"Hand car wash"}]},"BhxKjirVF4Y":{"duration":106.33,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BhxKjirVF4Y","annotations":[{"segment":[32.11164196567863,55.510822932917314],"label":"Spinning"},{"segment":[65.63387285491419,71.94003510140405],"label":"Spinning"},{"segment":[80.73547191887675,88.53519890795631],"label":"Spinning"}]},"0LJ1mSpqGJg":{"duration":192.33,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0LJ1mSpqGJg","annotations":[{"segment":[44.735958000000004,116.0230725],"label":"Tennis serve with ball bouncing"}]},"sTtFSpelQk4":{"duration":209.537,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=sTtFSpelQk4","annotations":[{"segment":[0.2,192.23809386059722],"label":"Canoeing"}]},"HsfrvjsaIDU":{"duration":21.92,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=HsfrvjsaIDU","annotations":[{"segment":[0.2,11.05997503900156],"label":"Trimming branches or hedges"}]},"1KKVWzCwRSA":{"duration":124.041,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1KKVWzCwRSA","annotations":[{"segment":[0.3869609984399376,97.32069110764431],"label":"Snowboarding"},{"segment":[104.47946957878314,107.38167706708268],"label":"Snowboarding"}]},"Vkr3r1Cd0mI":{"duration":163.748,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Vkr3r1Cd0mI","annotations":[{"segment":[0,137.83984399375976],"label":"Blow-drying hair"}]},"5zCY_vezHaU":{"duration":168.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5zCY_vezHaU","annotations":[{"segment":[21.84738689547582,25.53248829953198],"label":"Skateboarding"},{"segment":[48.16953978159126,53.433970358814356],"label":"Skateboarding"},{"segment":[70.80659126365055,78.70323712948517],"label":"Skateboarding"},{"segment":[94.49652886115445,102.65639625585024],"label":"Skateboarding"},{"segment":[110.28982059282372,116.34391575663027],"label":"Skateboarding"},{"segment":[120.55546021840874,129.504992199688],"label":"Skateboarding"},{"segment":[142.40284711388455,151.08915756630265],"label":"Skateboarding"}]},"pk7LcugO3zg":{"duration":161.53,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=pk7LcugO3zg","annotations":[{"segment":[70.5582527301092,132.5766177847114],"label":"Drinking coffee"}]},"RpB5_XYoYhk":{"duration":63.48,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=RpB5_XYoYhk","annotations":[{"segment":[15.444486739469578,54.84772854914196],"label":"Wrapping presents"}]},"m5z6TfYGMt4":{"duration":84.53999999999999,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=m5z6TfYGMt4","annotations":[]},"ufgYKCfK2Vk":{"duration":180.559,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ufgYKCfK2Vk","annotations":[{"segment":[13.52074967896675,170.69946311700468],"label":"Baton twirling"}]},"S5kuckj4Ud4":{"duration":230.041,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=S5kuckj4Ud4","annotations":[{"segment":[26.19801325585023,43.424104163806554],"label":"Changing car wheel"},{"segment":[79.67067044929797,166.87775567082682],"label":"Changing car wheel"}]},"QrKzEkFKu5g":{"duration":100.728,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QrKzEkFKu5g","annotations":[]},"cc9iCNPSiKc":{"duration":189.73,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=cc9iCNPSiKc","annotations":[{"segment":[0.01,189.681],"label":"Blowing leaves"}]},"QgYjZzI6sEU":{"duration":122.741,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=QgYjZzI6sEU","annotations":[]},"MeGHSbUpM-U":{"duration":229.39,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=MeGHSbUpM-U","annotations":[{"segment":[0.01,229.341],"label":"Sailing"}]},"fG7iLOObw30":{"duration":181.28,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fG7iLOObw30","annotations":[{"segment":[42.13798660842434,50.622144985959444],"label":"Getting a haircut"},{"segment":[100.67867941341655,114.25333281747271],"label":"Getting a haircut"},{"segment":[138.29178155382218,143.38227658034322],"label":"Getting a haircut"},{"segment":[143.09947130109205,156.10851414664586],"label":"Getting a haircut"}]},"FutaoYIC4Ok":{"duration":26.796,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=FutaoYIC4Ok","annotations":[{"segment":[0.2,25.916882995319813],"label":"Rollerblading"}]},"1LrEiYVd3qg":{"duration":182.77,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1LrEiYVd3qg","annotations":[{"segment":[75.55776822152886,96.37179493915755],"label":"Starting a campfire"}]},"4r0qYLCucEI":{"duration":213.996,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4r0qYLCucEI","annotations":[{"segment":[67.99836438001992,177.66239320858148],"label":"Changing car wheel"}]},"jCd-VeMUwRg":{"duration":161.054,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jCd-VeMUwRg","annotations":[{"segment":[12.060176382565507,150.75220436817472],"label":"Scuba diving"}]},"3DnHpUPPSBw":{"duration":77.37,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3DnHpUPPSBw","annotations":[]},"w8LDUu66JWI":{"duration":160.2,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=w8LDUu66JWI","annotations":[{"segment":[13.495319691107646,16.99410627769111],"label":"Cheerleading"},{"segment":[123.20727051326054,125.45649046177849],"label":"Cheerleading"}]},"FJZTNJ0LuJ4":{"duration":146.913,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FJZTNJ0LuJ4","annotations":[{"segment":[56.61064790346716,99.92810650858034],"label":"Making a cake"}]},"75Tc1nPmZFQ":{"duration":62.253,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=75Tc1nPmZFQ","annotations":[]},"wW6es5Ew50s":{"duration":116.17,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wW6es5Ew50s","annotations":[]},"Pfc7KbwqdYk":{"duration":37.99,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Pfc7KbwqdYk","annotations":[{"segment":[30.15747191887676,36.96435959438378],"label":"Drinking beer"}]},"YRRcCaxIoR4":{"duration":97.57,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=YRRcCaxIoR4","annotations":[{"segment":[10.807313119735674,90.11168122371154],"label":"Pole vault"}]},"h8cXVe6N6Oc":{"duration":4.41,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=h8cXVe6N6Oc","annotations":[{"segment":[0.6075711399999999,3.5239479],"label":"Brushing hair"}]},"Jo3R272P8eM":{"duration":128.22,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Jo3R272P8eM","annotations":[{"segment":[0.01,128.141],"label":"Playing racquetball"}]},"kuyuhXuMPX4":{"duration":133.28,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kuyuhXuMPX4","annotations":[{"segment":[50.30088923556943,96.86039001560063],"label":"Shoveling snow"}]},"ANB_IQfi9kA":{"duration":132.238,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ANB_IQfi9kA","annotations":[{"segment":[9.902350706273149,12.79053632893615],"label":"Surfing"},{"segment":[34.24562952586131,67.25346484555382],"label":"Surfing"},{"segment":[80.45659904836194,92.83453736349453],"label":"Surfing"},{"segment":[94.48492913884556,97.16681577379094],"label":"Surfing"},{"segment":[102.73688801560061,107.48176436973479],"label":"Surfing"},{"segment":[109.33845511700467,127.08016670202808],"label":"Surfing"}]},"5FM_xJGb-Tk":{"duration":34.668,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5FM_xJGb-Tk","annotations":[{"segment":[0.2,34.641],"label":"Canoeing"}]},"qWdjYyFfrTM":{"duration":184.41,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qWdjYyFfrTM","annotations":[{"segment":[14.45605668,177.74971918876756],"label":"Playing saxophone"}]},"fs8yU4pBNm4":{"duration":21.502,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=fs8yU4pBNm4","annotations":[{"segment":[0.01,17.86447748381457],"label":"Doing a powerbomb"}]},"nQQ-tcG6wBA":{"duration":192.912,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=nQQ-tcG6wBA","annotations":[{"segment":[0,123.37162246489859],"label":"Kneeling"}]},"FLJzzot6F-s":{"duration":232.73,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=FLJzzot6F-s","annotations":[{"segment":[73.70502637248347,82.78200006367601],"label":"Tai chi"},{"segment":[90.76973691192545,228.01357912275674],"label":"Tai chi"}]},"2ji02dSx1nM":{"duration":162.656,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2ji02dSx1nM","annotations":[{"segment":[0.761003120124805,4.56601872074883],"label":"Surfing"},{"segment":[5.834357254290172,7.102695787831514],"label":"Surfing"},{"segment":[8.371034321372855,10.146708268330734],"label":"Surfing"},{"segment":[19.78608112324493,21.30808736349454],"label":"Surfing"},{"segment":[25.366770670826835,29.17178627145086],"label":"Surfing"},{"segment":[31.70846333853354,33.991472698907955],"label":"Surfing"},{"segment":[36.0208143525741,42.61617472698908],"label":"Surfing"},{"segment":[44.64551638065523,53.52388611544462],"label":"Surfing"},{"segment":[58.34357254290172,62.40225585023401],"label":"Surfing"},{"segment":[70.77329017160686,93.85705148205928],"label":"Surfing"},{"segment":[103.25545553822154,117.20889547581903],"label":"Surfing"},{"segment":[132.43082995319813,139.53439937597506],"label":"Surfing"}]},"i_LT0ef1n-Y":{"duration":83.15,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i_LT0ef1n-Y","annotations":[]},"ZXlJIrRiXrA":{"duration":78.414,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZXlJIrRiXrA","annotations":[{"segment":[17.465999251206192,57.772151369374335],"label":"Ice fishing"}]},"OqLrsVv44MY":{"duration":38.267,"subset":"training","resolution":"144x192","url":"https://www.youtube.com/watch?v=OqLrsVv44MY","annotations":[{"segment":[0.01,38.26569],"label":"Removing ice from car"}]},"81w6SClSFMU":{"duration":115.38,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=81w6SClSFMU","annotations":[{"segment":[70.10562629281398,79.26687862926258],"label":"Hula hoop"}]},"2uUNiV8xmEo":{"duration":25.287,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=2uUNiV8xmEo","annotations":[]},"feUoPtDWe1Y":{"duration":92.28,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=feUoPtDWe1Y","annotations":[{"segment":[27.809964898595947,33.41507410296412],"label":"Playing violin"}]},"ivWTI2J_UnY":{"duration":128.85,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ivWTI2J_UnY","annotations":[{"segment":[15.477784216130548,49.85052578701787],"label":"Using the pommel horse"},{"segment":[67.53942132917318,106.53539673946959],"label":"Using the pommel horse"}]},"e6J_ygZ779A":{"duration":142.32,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=e6J_ygZ779A","annotations":[{"segment":[142.281,142.281],"label":"High jump"}]},"Et8xkGzQOTA":{"duration":42.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Et8xkGzQOTA","annotations":[{"segment":[11.630914652106082,35.155589485179405],"label":"Breakdancing"}]},"40datCdG408":{"duration":90.86,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=40datCdG408","annotations":[]},"KUejIghF6K4":{"duration":32.137,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=KUejIghF6K4","annotations":[{"segment":[0.2,31.93641723356009],"label":"Hitting a pinata"}]},"Zw4illqWzFI":{"duration":24.59,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zw4illqWzFI","annotations":[{"segment":[5.055813009345794,24.389637188208617],"label":"Playing blackjack"}]},"JSYv9uYZP2o":{"duration":194.653,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=JSYv9uYZP2o","annotations":[{"segment":[0.20000000000000004,129.02562402496102],"label":"Hurling"}]},"lGWAepvduTI":{"duration":89.931,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lGWAepvduTI","annotations":[{"segment":[19.79815879581323,81.057454825267],"label":"Doing crunches"}]},"5zPTTiJiXUY":{"duration":14.21,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5zPTTiJiXUY","annotations":[{"segment":[10.685465519500779,13.500930500780031],"label":"Smoking a cigarette"}]},"FrVitupq7sQ":{"duration":165.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FrVitupq7sQ","annotations":[{"segment":[21.122624017885883,39.669318277493005],"label":"Tennis serve with ball bouncing"},{"segment":[103.8099692586343,108.44664282353607],"label":"Tennis serve with ball bouncing"},{"segment":[129.82685863338534,133.17556728861155],"label":"Tennis serve with ball bouncing"},{"segment":[157.38930679563182,159.7076435569423],"label":"Tennis serve with ball bouncing"}]},"5F4jcV8dHVs":{"duration":180.577,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5F4jcV8dHVs","annotations":[{"segment":[19.380604890932528,24.436415980531663],"label":"Wakeboarding"},{"segment":[31.45837582719713,148.86554446344374],"label":"Wakeboarding"}]},"qlq13iS0nEA":{"duration":212.23,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qlq13iS0nEA","annotations":[{"segment":[7.334668799999999,200.64201292979718],"label":"Playing guitarra"}]},"siMXfU6NPlE":{"duration":51.67,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=siMXfU6NPlE","annotations":[{"segment":[0.01,49.50797971918877],"label":"Cumbia"}]},"xAxax-pfZsk":{"duration":148.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xAxax-pfZsk","annotations":[{"segment":[65.43039469578783,111.45479251170046],"label":"Rollerblading"}]},"wZEf-Bs__sU":{"duration":82.78,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=wZEf-Bs__sU","annotations":[{"segment":[34.58983851482059,69.95407639937598],"label":"Long jump"}]},"xzQRc682Isc":{"duration":77.25,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=xzQRc682Isc","annotations":[{"segment":[28.826159126365052,44.746882995319815],"label":"Hammer throw"}]},"COThs7Ab2Do":{"duration":112.687,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=COThs7Ab2Do","annotations":[]},"E3KLk-55yC0":{"duration":61.348,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=E3KLk-55yC0","annotations":[{"segment":[1.6262979719188768,61.120999999999995],"label":"Beach soccer"}]},"BSsXKG9dFHI":{"duration":64.738,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=BSsXKG9dFHI","annotations":[{"segment":[3.0298247269890797,6.261637769110765],"label":"Elliptical trainer"},{"segment":[18.481930834633385,59.58655296411857],"label":"Elliptical trainer"}]},"kiQVjQe0mlY":{"duration":150.768,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kiQVjQe0mlY","annotations":[]},"TXgzRTa2C38":{"duration":62.903,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TXgzRTa2C38","annotations":[{"segment":[8.63564965455761,25.808816112324493],"label":"Playing ten pins"},{"segment":[35.818318939157564,56.720516018720744],"label":"Playing ten pins"}]},"s84PG-JfEwc":{"duration":222.749,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s84PG-JfEwc","annotations":[]},"PveTDlMybvU":{"duration":106.37,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PveTDlMybvU","annotations":[{"segment":[5.642102511311337,99.89840163806554],"label":"Doing motocross"}]},"Deo_GsHyw8Y":{"duration":88.376,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Deo_GsHyw8Y","annotations":[{"segment":[5.098092043681747,88.321],"label":"Waxing skis"}]},"qGID8CHyClA":{"duration":123.624,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=qGID8CHyClA","annotations":[{"segment":[0,112.99560998439938],"label":"Tug of war"}]},"Cz2ut3p7nw0":{"duration":232.53,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cz2ut3p7nw0","annotations":[{"segment":[3.9994127999999995,9.250190603542508],"label":"Brushing teeth"},{"segment":[24.12304608374811,169.5868269344774],"label":"Brushing teeth"}]},"QJfuxpFMn8s":{"duration":175.96,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=QJfuxpFMn8s","annotations":[{"segment":[33.20817628705148,167.98111076443058],"label":"Washing dishes"}]},"Coctj-3Yzic":{"duration":43.584,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Coctj-3Yzic","annotations":[{"segment":[1.4920591900311524,25.037397196261683],"label":"Beer pong"}]},"uZpVDAd7da8":{"duration":22.245,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=uZpVDAd7da8","annotations":[{"segment":[2.533329477379095,22.244715],"label":"Cutting the grass"}]},"XPwpA67qHjA":{"duration":220.102,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=XPwpA67qHjA","annotations":[{"segment":[25.75295826833073,123.61419968798751],"label":"Slacklining"},{"segment":[155.20449516380654,185.76467230889236],"label":"Slacklining"},{"segment":[193.6622461778471,212.8911216848674],"label":"Slacklining"}]},"F-WmsfI8HG0":{"duration":235.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=F-WmsfI8HG0","annotations":[{"segment":[12.514649878315133,38.28010551014041],"label":"Vacuuming floor"}]},"nVHL9qP11aA":{"duration":231.991,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nVHL9qP11aA","annotations":[{"segment":[0.01,231.05103989873757],"label":"Beer pong"}]},"ev-RTtbVjFI":{"duration":143.38,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ev-RTtbVjFI","annotations":[{"segment":[62.83747425897036,137.30323556942278],"label":"Cleaning windows"}]},"td15Nx9J0a4":{"duration":142.34,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=td15Nx9J0a4","annotations":[{"segment":[87.09377543999999,112.06413077999999],"label":"Shot put"},{"segment":[2.4553305,76.97212026],"label":"Shot put"}]},"i9DlXuEewE8":{"duration":153.65,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=i9DlXuEewE8","annotations":[{"segment":[8.389412527301092,116.0135903775351],"label":"Cumbia"}]},"4A49mkP6HsM":{"duration":129.2,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4A49mkP6HsM","annotations":[{"segment":[26.90727185413947,115.38886244565428],"label":"Playing violin"}]},"o9ghRI_Iddk":{"duration":148.07,"subset":"validation","resolution":"640x352","url":"https://www.youtube.com/watch?v=o9ghRI_Iddk","annotations":[{"segment":[39.733725004915364,121.61245614977517],"label":"Playing violin"}]},"1ezOsdHncXo":{"duration":83.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1ezOsdHncXo","annotations":[{"segment":[39.66410137285491,64.50293206864275],"label":"Washing hands"}]},"6H0D8VaIli0":{"duration":175.427,"subset":"testing","resolution":"852x480","url":"https://www.youtube.com/watch?v=6H0D8VaIli0","annotations":[]},"USWExMIMcik":{"duration":45.697,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=USWExMIMcik","annotations":[{"segment":[17.25217244929797,45.62558077833317],"label":"Shuffleboard"}]},"fgoXpih2Kws":{"duration":154.67000000000002,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=fgoXpih2Kws","annotations":[{"segment":[4.46121294851794,141.432507800312],"label":"Doing karate"}]},"GOF5guH_kuI":{"duration":110.55,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GOF5guH_kuI","annotations":[]},"T3A88zsRuVs":{"duration":223.748,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=T3A88zsRuVs","annotations":[]},"D5A6eBnKmD8":{"duration":170.57,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=D5A6eBnKmD8","annotations":[{"segment":[50.0278813220556,166.31609482066358],"label":"Doing kickboxing"}]},"lAZIxuxjogI":{"duration":156.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lAZIxuxjogI","annotations":[{"segment":[100.22779950093603,145.58636569843992],"label":"Bungee jumping"}]},"lgz1mojrClE":{"duration":188.9,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=lgz1mojrClE","annotations":[]},"oqWo2Rl8r3s":{"duration":163.259,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=oqWo2Rl8r3s","annotations":[]},"wCG7f6naJsQ":{"duration":235.428,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wCG7f6naJsQ","annotations":[{"segment":[14.686614664586585,235.15300000000002],"label":"Assembling bicycle"}]},"Pf8eE_zup_k":{"duration":62.83,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=Pf8eE_zup_k","annotations":[{"segment":[9.724404472698907,56.636104441497665],"label":"Mixing drinks"}]},"wIcK3bQNqcA":{"duration":140.504,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wIcK3bQNqcA","annotations":[{"segment":[50.913557038581374,133.07449566597634],"label":"Assembling bicycle"}]},"cAGABdvv5u8":{"duration":219.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cAGABdvv5u8","annotations":[{"segment":[19.153659906396257,90.29582527301093],"label":"Surfing"},{"segment":[113.55384087363497,193.9484976599064],"label":"Surfing"}]},"1y2aqd5HQlU":{"duration":60.187,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=1y2aqd5HQlU","annotations":[{"segment":[27.04150210296412,36.33701845085804],"label":"Carving jack-o-lanterns"},{"segment":[40.56225315444618,48.3554638299532],"label":"Carving jack-o-lanterns"}]},"sRTDDlI1-Yo":{"duration":167.231,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sRTDDlI1-Yo","annotations":[{"segment":[43.959865997360986,54.134553082803585],"label":"Swinging at the playground"}]},"m2hiQ9EOUUI":{"duration":97.617,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=m2hiQ9EOUUI","annotations":[{"segment":[0.01,0.02],"label":"Capoeira"},{"segment":[15.22881263650546,27.716438998439937],"label":"Capoeira"},{"segment":[40.81321786583463,59.08779302964118],"label":"Capoeira"},{"segment":[66.54991122152886,77.36236819344774],"label":"Capoeira"}]},"nrC41oAR-F0":{"duration":8.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nrC41oAR-F0","annotations":[{"segment":[3.446768057722309,5.514828892355694],"label":"High jump"}]},"dVCAykynRzs":{"duration":70.473,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dVCAykynRzs","annotations":[{"segment":[21.422925117004684,26.374165366614665],"label":"Rock-paper-scissors"},{"segment":[30.65996723868955,62.199073322932925],"label":"Rock-paper-scissors"}]},"DxhdDYQkQU8":{"duration":53.64,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DxhdDYQkQU8","annotations":[{"segment":[13.221195975039,27.27917650546022],"label":"Skateboarding"}]},"Pjnuoa4o55c":{"duration":209.79,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Pjnuoa4o55c","annotations":[]},"UQtJ2osfP9I":{"duration":229.0,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UQtJ2osfP9I","annotations":[]},"b0qGo-HT5iQ":{"duration":25.96,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=b0qGo-HT5iQ","annotations":[]},"Tp4g0ErB2oQ":{"duration":21.48,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Tp4g0ErB2oQ","annotations":[{"segment":[7.337099580000001,17.009072540000002],"label":"Playing field hockey"}]},"3EuY86B4uTo":{"duration":204.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3EuY86B4uTo","annotations":[{"segment":[57.49709955745168,98.38392590941733],"label":"Paintball"},{"segment":[127.4519040190179,164.1861611950078],"label":"Paintball"},{"segment":[171.8524410951638,193.254139149766],"label":"Paintball"}]},"jIs9DOPuZ7o":{"duration":122.973,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=jIs9DOPuZ7o","annotations":[]},"GPR8rtfZr4A":{"duration":78.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GPR8rtfZr4A","annotations":[{"segment":[28.035808736349455,36.568446177847115],"label":"Playing kickball"},{"segment":[28.279598377535102,36.81223581903276],"label":"Playing kickball"},{"segment":[41.688028642745714,52.049088393135726],"label":"Playing kickball"}]},"uKzelWWaYB4":{"duration":83.639,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uKzelWWaYB4","annotations":[{"segment":[0.01,0.02],"label":"Fun sliding down"}]},"v05j8FVewPc":{"duration":147.61,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=v05j8FVewPc","annotations":[{"segment":[49.263734789391584,71.5935585023401],"label":"Ironing clothes"},{"segment":[95.5348127925117,121.0874976599064],"label":"Ironing clothes"},{"segment":[129.14465054602184,140.88507332293293],"label":"Ironing clothes"}]},"Ctabf6Se00M":{"duration":170.133,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ctabf6Se00M","annotations":[]},"K49OCArHjXI":{"duration":62.114,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K49OCArHjXI","annotations":[]},"QjaqFPDoImc":{"duration":27.168,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=QjaqFPDoImc","annotations":[{"segment":[2.336896326047238,23.259288849411725],"label":"Powerbocking"}]},"qumU7AgV3Mk":{"duration":131.542,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qumU7AgV3Mk","annotations":[{"segment":[35.90115109080615,48.1909641749183],"label":"Curling"}]},"rNZXdUBWNgY":{"duration":187.2,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rNZXdUBWNgY","annotations":[]},"23KpderJKY4":{"duration":75.628,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=23KpderJKY4","annotations":[]},"QWqEi91fWOQ":{"duration":180.88,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=QWqEi91fWOQ","annotations":[{"segment":[11.710864152886115,26.666907528861152],"label":"Playing lacrosse"},{"segment":[38.51886643057722,42.751708895475815],"label":"Playing lacrosse"},{"segment":[51.499583322932914,68.7131426801872],"label":"Playing lacrosse"},{"segment":[92.13487098595944,168.32603535413415],"label":"Playing lacrosse"}]},"cZZM3bgmXE4":{"duration":235.43,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cZZM3bgmXE4","annotations":[{"segment":[110.52945397815913,208.57385335413414],"label":"Playing violin"}]},"brZ1m2qNUzc":{"duration":175.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=brZ1m2qNUzc","annotations":[{"segment":[44.28064595787832,150.11550255070202],"label":"Cleaning windows"}]},"dm5WXFiQZUQ":{"duration":159.89,"subset":"training","resolution":"1920x800","url":"https://www.youtube.com/watch?v=dm5WXFiQZUQ","annotations":[{"segment":[25.692473530233734,48.39164873010921],"label":"Doing motocross"},{"segment":[58.61874975039002,159.891994],"label":"Doing motocross"}]},"L69n16haU9Q":{"duration":20.69,"subset":"testing","resolution":"270x360","url":"https://www.youtube.com/watch?v=L69n16haU9Q","annotations":[]},"sgFp3HCSgCo":{"duration":216.201,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=sgFp3HCSgCo","annotations":[{"segment":[63.73545865834633,75.20109672386896],"label":"Curling"},{"segment":[89.70175663026521,105.21409048361934],"label":"Curling"},{"segment":[184.46188299531983,211.45941809672388],"label":"Curling"}]},"QYLxQ_uE8iE":{"duration":28.68,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QYLxQ_uE8iE","annotations":[]},"rCmpRDbS_O4":{"duration":95.34,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=rCmpRDbS_O4","annotations":[{"segment":[9.51920787035563,44.02633578783152],"label":"Washing face"}]},"HM_rHjh-wqQ":{"duration":121.53,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HM_rHjh-wqQ","annotations":[{"segment":[8.531974797191888,51.19184878315132],"label":"Playing violin"},{"segment":[71.28938941653666,106.7444846848674],"label":"Playing violin"}]},"BmZwZLKxHCY":{"duration":199.62,"subset":"training","resolution":"480x270","url":"https://www.youtube.com/watch?v=BmZwZLKxHCY","annotations":[{"segment":[15.456654030000001,154.89635335413416],"label":"Putting on makeup"}]},"zjAZ7m8TQ3E":{"duration":221.147,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zjAZ7m8TQ3E","annotations":[{"segment":[58.65306155652214,62.09771545130955],"label":"Snow tubing"},{"segment":[76.22079641993791,86.55475810430013],"label":"Snow tubing"},{"segment":[96.5442543991836,113.76752387312065],"label":"Snow tubing"},{"segment":[119.96790088373798,141.3247550314199],"label":"Snow tubing"},{"segment":[189.54990955844357,213.31802042413142],"label":"Snow tubing"}]},"9VRLj4IfUzY":{"duration":214.414,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9VRLj4IfUzY","annotations":[{"segment":[25.5120073631144,208.83781577432936],"label":"Painting furniture"}]},"z6IsPWucZnU":{"duration":103.538,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=z6IsPWucZnU","annotations":[{"segment":[6.7975261906309585,97.41327242048015],"label":"Swimming"}]},"CwvtmoTWNgQ":{"duration":153.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CwvtmoTWNgQ","annotations":[{"segment":[5.75629329173167,147.7448611544462],"label":"Windsurfing"}]},"NWbWuVtgt9Y":{"duration":223.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NWbWuVtgt9Y","annotations":[{"segment":[100.62709669203095,125.00044191155402],"label":"Playing guitarra"},{"segment":[131.26787290327613,212.39629302652105],"label":"Playing guitarra"}]},"i887uArWX9w":{"duration":134.955,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=i887uArWX9w","annotations":[{"segment":[0.2,129.84914196567863],"label":"Table soccer"}]},"RnZ-icC6xbQ":{"duration":29.977,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RnZ-icC6xbQ","annotations":[{"segment":[7.763786472487405,12.526481051926657],"label":"Doing crunches"},{"segment":[20.604384603328526,28.822367407066842],"label":"Doing crunches"}]},"sanasW2azw4":{"duration":125.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sanasW2azw4","annotations":[{"segment":[4.694664561622465,108.75972901092044],"label":"Mowing the lawn"}]},"yDrTpm9c7MY":{"duration":80.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yDrTpm9c7MY","annotations":[{"segment":[9.91307038676105,65.12510699063962],"label":"Tumbling"}]},"hfZQBDePOOE":{"duration":199.46,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hfZQBDePOOE","annotations":[{"segment":[26.760465575663027,177.3658764898596],"label":"Wakeboarding"}]},"2NITA0d4vqo":{"duration":158.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2NITA0d4vqo","annotations":[{"segment":[26.206474258970356,126.65432234],"label":"Ping-pong"}]},"YQfJWGJ75Pk":{"duration":89.56,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=YQfJWGJ75Pk","annotations":[{"segment":[0.2,80.9970109034268],"label":"Painting furniture"}]},"KLr1ZVJDFDs":{"duration":228.926,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=KLr1ZVJDFDs","annotations":[{"segment":[0.2,228.72555555555556],"label":"Longboarding"}]},"e-Z9xiEQqPQ":{"duration":153.35,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=e-Z9xiEQqPQ","annotations":[{"segment":[49.170046801872076,147.7858783151326],"label":"Horseback riding"}]},"mzGbmHjdCM8":{"duration":80.04,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mzGbmHjdCM8","annotations":[{"segment":[9.614691129485179,77.16726127301092],"label":"Cutting the grass"}]},"cCDffwsJvsY":{"duration":111.5,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=cCDffwsJvsY","annotations":[]},"YDIRaWzfyYY":{"duration":195.977,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=YDIRaWzfyYY","annotations":[{"segment":[56.85071138845554,136.3194477379095],"label":"Sharpening knives"}]},"OSjsVUg_wXA":{"duration":108.066,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OSjsVUg_wXA","annotations":[{"segment":[4.21298751950078,105.83024648985959],"label":"Drum corps"}]},"5HW6mjZZvtY":{"duration":85.8,"subset":"training","resolution":"480x270","url":"https://www.youtube.com/watch?v=5HW6mjZZvtY","annotations":[{"segment":[5.60769192,80.1314352574103],"label":"Skateboarding"}]},"d-eoNpp8mNM":{"duration":42.63,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=d-eoNpp8mNM","annotations":[{"segment":[0.01,40.30404533732369],"label":"Playing lacrosse"}]},"Mw_AIqw6rfM":{"duration":129.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mw_AIqw6rfM","annotations":[{"segment":[12.878191999999999,71.13561609672387],"label":"Playing polo"}]},"k5vE0ehf5TA":{"duration":67.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k5vE0ehf5TA","annotations":[{"segment":[1.688934689547582,67.662946],"label":"Vacuuming floor"}]},"-9n7u53oAZk":{"duration":20.527,"subset":"testing","resolution":"220x176","url":"https://www.youtube.com/watch?v=-9n7u53oAZk","annotations":[]},"xPDjkE7llg4":{"duration":119.885,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=xPDjkE7llg4","annotations":[{"segment":[2.805412418096724,113.89974417472699],"label":"Clipping cat claws"}]},"1stYB5_yR5k":{"duration":181.929,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1stYB5_yR5k","annotations":[{"segment":[120.24804817106613,134.67781446966643],"label":"Painting fence"}]},"teffBaLEZQg":{"duration":189.71,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=teffBaLEZQg","annotations":[]},"Z47QGlaQ1NE":{"duration":166.3,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z47QGlaQ1NE","annotations":[{"segment":[8.302074408736349,166.04148987728217],"label":"High jump"}]},"0_-Q1zOC3Kw":{"duration":39.27,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=0_-Q1zOC3Kw","annotations":[{"segment":[0,37.36601439937598],"label":"Playing harmonica"}]},"C_85qE-jaWA":{"duration":117.842,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C_85qE-jaWA","annotations":[]},"GOZ305xZvz8":{"duration":38.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GOZ305xZvz8","annotations":[{"segment":[0.01,33.25329342969637],"label":"Cricket"}]},"uIHWHnItois":{"duration":147.284,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uIHWHnItois","annotations":[]},"9k19UUJtldg":{"duration":182.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=9k19UUJtldg","annotations":[]},"NOXkMlnZKYM":{"duration":67.11,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NOXkMlnZKYM","annotations":[]},"84uYA9rFoSw":{"duration":87.21000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=84uYA9rFoSw","annotations":[{"segment":[14.558371758190328,86.94205190171606],"label":"Ballet"}]},"TnP6coXzbLY":{"duration":197.44,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TnP6coXzbLY","annotations":[]},"y8ENWnuzCIE":{"duration":38.13,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=y8ENWnuzCIE","annotations":[{"segment":[1.2482386895475819,20.031258970358813],"label":"Getting a piercing"},{"segment":[9.546604799999999,17.88050196],"label":"Getting a piercing"}]},"AS0hqTk_mIs":{"duration":41.889,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=AS0hqTk_mIs","annotations":[{"segment":[10.913305926677065,25.682210953198126],"label":"Playing rubik cube"}]},"xcI2e5T_BL0":{"duration":89.862,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xcI2e5T_BL0","annotations":[{"segment":[5.035570093457944,80.16734112149533],"label":"Carving jack-o-lanterns"}]},"kBDTz-dwQ2k":{"duration":82.5,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kBDTz-dwQ2k","annotations":[{"segment":[24.325373300646312,78.89658112855126],"label":"Cumbia"}]},"iGXUvIRX77c":{"duration":91.534,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iGXUvIRX77c","annotations":[{"segment":[0,23.561552421280524],"label":"Drum corps"},{"segment":[30.130227641758733,80.25207551975548],"label":"Drum corps"}]},"64djQm_uhiE":{"duration":152.18,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=64djQm_uhiE","annotations":[]},"Xj--zKhm6k4":{"duration":29.722,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=Xj--zKhm6k4","annotations":[{"segment":[0.2,16.666284112149533],"label":"Throwing darts"}]},"UIPTzsWiGSU":{"duration":55.334,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=UIPTzsWiGSU","annotations":[{"segment":[0.01,55.301],"label":"Having an ice cream"}]},"bI1L2D_erOY":{"duration":184.181,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=bI1L2D_erOY","annotations":[{"segment":[66.23025668509734,69.1035866931276],"label":"Applying sunscreen"}]},"ggDfJLB8jTk":{"duration":174.68,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ggDfJLB8jTk","annotations":[{"segment":[38.15244989079563,47.96308036267029],"label":"Archery"},{"segment":[120.99776965366615,124.54049714352576],"label":"Archery"}]},"hT_4wWPNYxo":{"duration":166.07,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hT_4wWPNYxo","annotations":[{"segment":[10.154691768123353,152.38574387802885],"label":"Hula hoop"}]},"OvSf6kk07uE":{"duration":210.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OvSf6kk07uE","annotations":[{"segment":[23.276100233124968,201.94475695218284],"label":"Tumbling"}]},"SzLlB2SgtQo":{"duration":136.3,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=SzLlB2SgtQo","annotations":[{"segment":[0.01,133.7490739738433],"label":"Windsurfing"}]},"_BJqPK0NNZU":{"duration":97.39,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_BJqPK0NNZU","annotations":[{"segment":[0.01,69.86904836193449],"label":"Discus throw"}]},"rMy6sItJID0":{"duration":93.21000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rMy6sItJID0","annotations":[{"segment":[16.818661799999997,73.88798517940718],"label":"Using the balance beam"}]},"ZXEc0cahpuw":{"duration":30.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZXEc0cahpuw","annotations":[{"segment":[1.5399067355694227,20.634750256630266],"label":"Hand washing clothes"}]},"JrRtv8puAZI":{"duration":183.62,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=JrRtv8puAZI","annotations":[{"segment":[74.33730703513855,135.0675925513211],"label":"Washing dishes"}]},"SvDnZ47J37U":{"duration":55.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SvDnZ47J37U","annotations":[{"segment":[0.8599661622464899,23.219086380655227],"label":"Playing field hockey"}]},"EvDrxFO5jk4":{"duration":25.78,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=EvDrxFO5jk4","annotations":[]},"DLJFoRluYx4":{"duration":178.38,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=DLJFoRluYx4","annotations":[{"segment":[73.98199821040704,130.1062725686625],"label":"Shaving"}]},"uE15Mhtj9nE":{"duration":230.668,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uE15Mhtj9nE","annotations":[{"segment":[126.84887346514269,219.69145414523967],"label":"Hanging wallpaper"}]},"9vqOA8KVcME":{"duration":207.03,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9vqOA8KVcME","annotations":[{"segment":[123.21596661961716,193.94806697425895],"label":"Putting on makeup"}]},"p7TUDtxbz4c":{"duration":86.4,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=p7TUDtxbz4c","annotations":[]},"oO6jZR9Aijc":{"duration":159.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oO6jZR9Aijc","annotations":[{"segment":[11.419322911691978,27.55532267821325],"label":"Paintball"},{"segment":[82.91421418489392,100.78793693166928],"label":"Paintball"}]},"JFBd-R1YuXY":{"duration":84.684,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=JFBd-R1YuXY","annotations":[{"segment":[15.672315993821822,29.76423173225035],"label":"Building sandcastles"}]},"dV7OXaQ8pQo":{"duration":186.457,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=dV7OXaQ8pQo","annotations":[]},"ZSLzzrc8pro":{"duration":175.427,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZSLzzrc8pro","annotations":[{"segment":[25.17818049320612,86.4815764766645],"label":"Elliptical trainer"}]},"um2h6O3zjGM":{"duration":45.743,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=um2h6O3zjGM","annotations":[{"segment":[5.406673269035453,6.900622761740777],"label":"Having an ice cream"}]},"PHr8BcCBQxw":{"duration":220.938,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PHr8BcCBQxw","annotations":[]},"sR3_5j8pUdI":{"duration":84.475,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=sR3_5j8pUdI","annotations":[{"segment":[13.205415153966886,77.39109739695753],"label":"Using the rowing machine"}]},"lFsPB8BbqPY":{"duration":30.26,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=lFsPB8BbqPY","annotations":[]},"DTprdtsGCts":{"duration":237.7,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=DTprdtsGCts","annotations":[{"segment":[33.74555385023401,233.6230651170047],"label":"Playing flauta"}]},"zLbSEo4xhVY":{"duration":160.194,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zLbSEo4xhVY","annotations":[{"segment":[0.01,5.8388632564444265],"label":"Wakeboarding"},{"segment":[30.791268708157823,156.55139218479334],"label":"Wakeboarding"}]},"kOVQPyzXkUY":{"duration":128.058,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kOVQPyzXkUY","annotations":[{"segment":[11.781683307332294,97.2488096723869],"label":"Slacklining"},{"segment":[101.04291107644306,127.00255226209048],"label":"Slacklining"}]},"86lJVqKyj3A":{"duration":189.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=86lJVqKyj3A","annotations":[{"segment":[20.090443291201037,184.65480889235567],"label":"Wrapping presents"}]},"Tt5ONIePiwI":{"duration":192.262,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tt5ONIePiwI","annotations":[]},"6jTH_gFx6Ik":{"duration":154.111,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=6jTH_gFx6Ik","annotations":[{"segment":[8.174365875315285,139.2046424061044],"label":"Rafting"}]},"cbR34GknrBs":{"duration":16.53,"subset":"validation","resolution":"320x180","url":"https://www.youtube.com/watch?v=cbR34GknrBs","annotations":[{"segment":[6.4399375975039,16.20288299531981],"label":"Triple jump"}]},"YySTmiavdMc":{"duration":36.92,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=YySTmiavdMc","annotations":[{"segment":[0.2,14.629657416536661],"label":"Cutting the grass"},{"segment":[21.944486124804992,36.719725999999994],"label":"Cutting the grass"}]},"EYoyxe8hd3g":{"duration":194.978,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EYoyxe8hd3g","annotations":[{"segment":[102.50791292199688,114.06666868174727],"label":"Croquet"},{"segment":[117.10844651326053,118.02097986271451],"label":"Croquet"}]},"HoybOj2Ro7o":{"duration":218.536,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=HoybOj2Ro7o","annotations":[{"segment":[31.025321372854915,202.53546645865833],"label":"Making a cake"}]},"PvqizTCKl8A":{"duration":150.63,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PvqizTCKl8A","annotations":[]},"uj0UBMgR2gk":{"duration":109.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uj0UBMgR2gk","annotations":[{"segment":[18.624867606949177,30.927532121684866],"label":"Playing violin"},{"segment":[40.49627134165367,63.22202698907956],"label":"Playing violin"},{"segment":[86.80213435257411,95.51652185647426],"label":"Playing violin"}]},"WnYqWQ005Tc":{"duration":198.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WnYqWQ005Tc","annotations":[{"segment":[12.079085612404088,20.751249641822405],"label":"Breakdancing"},{"segment":[44.0061598,167.57123180000002],"label":"Breakdancing"}]},"WMoofjkumOs":{"duration":153.438,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=WMoofjkumOs","annotations":[{"segment":[3.3447570093457943,18.63507476635514],"label":"Bullfighting"},{"segment":[31.297369158878503,38.7036168224299],"label":"Bullfighting"},{"segment":[67.6118738317757,76.45158878504672],"label":"Bullfighting"},{"segment":[81.70763551401869,99.63896728971963],"label":"Bullfighting"},{"segment":[117.0817601246106,124.96685825545173],"label":"Bullfighting"},{"segment":[137.15291900311527,143.60436292834893],"label":"Bullfighting"}]},"NI-TZyAxJU0":{"duration":150.373,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=NI-TZyAxJU0","annotations":[{"segment":[0.4690202808112325,56.51694383775351],"label":"Rafting"},{"segment":[62.848717628705145,84.42365054602183],"label":"Rafting"},{"segment":[87.94130265210609,150.341],"label":"Rafting"}]},"45Bscg4Qe-I":{"duration":226.07,"subset":"training","resolution":"624x360","url":"https://www.youtube.com/watch?v=45Bscg4Qe-I","annotations":[{"segment":[18.683202808112323,22.20833541341654],"label":"Playing ten pins"},{"segment":[25.02844149765991,27.496034321372857],"label":"Playing ten pins"},{"segment":[30.66865366614665,33.84127301092044],"label":"Playing ten pins"},{"segment":[46.5317503900156,51.11442277691108],"label":"Playing ten pins"},{"segment":[100.81879251170047,106.4590046801872],"label":"Playing ten pins"},{"segment":[114.5668096723869,118.09194227769113],"label":"Playing ten pins"},{"segment":[206.59105460218407,214.6995772230889],"label":"Playing ten pins"}]},"-crpjVnFv0U":{"duration":89.65,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-crpjVnFv0U","annotations":[]},"aEnq40UPxJ0":{"duration":61.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aEnq40UPxJ0","annotations":[{"segment":[0,61.71508817472699],"label":"Playing water polo"}]},"wBy9_ZSfEVA":{"duration":151.325,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wBy9_ZSfEVA","annotations":[{"segment":[0.01,139.16183434844646],"label":"Kite flying"}]},"X5gh32tHdsc":{"duration":158.92000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X5gh32tHdsc","annotations":[{"segment":[7.437603790951638,145.77703430265208],"label":"Ping-pong"}]},"IgDBDaIoNAM":{"duration":170.34,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IgDBDaIoNAM","annotations":[]},"mBZAFJ-Q6Mw":{"duration":225.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mBZAFJ-Q6Mw","annotations":[{"segment":[70.08680655226209,80.30046177847113],"label":"Breakdancing"},{"segment":[93.683872074883,126.08581279251172],"label":"Breakdancing"},{"segment":[131.36873790951637,158.83994851794074],"label":"Breakdancing"}]},"ezPArXEbFBg":{"duration":202.734,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ezPArXEbFBg","annotations":[]},"q1jvfsKWY5c":{"duration":89.746,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q1jvfsKWY5c","annotations":[{"segment":[21.701239206030827,64.40367641185648],"label":"Tug of war"}]},"aVDVMN9hE8A":{"duration":53.38,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aVDVMN9hE8A","annotations":[{"segment":[8.97244656,43.80527179407176],"label":"Making a sandwich"}]},"Zt8zZhMs4Es":{"duration":236.47,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zt8zZhMs4Es","annotations":[{"segment":[0.01,225.58877122698908],"label":"Tennis serve with ball bouncing"}]},"xCplsH6deic":{"duration":73.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xCplsH6deic","annotations":[{"segment":[10.562460998439935,17.185409516380656],"label":"Playing kickball"},{"segment":[20.040128705148206,44.24814742589703],"label":"Playing kickball"},{"segment":[47.67381045241809,53.04068252730109],"label":"Playing kickball"},{"segment":[54.98189157566302,65.07098915054601],"label":"Playing kickball"}]},"TB5AeIdAndA":{"duration":94.6,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TB5AeIdAndA","annotations":[]},"_--nxrRXdPg":{"duration":208.68,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=_--nxrRXdPg","annotations":[{"segment":[16.015765019419632,38.1187024103677],"label":"High jump"},{"segment":[43.96947995503043,49.49521430276745],"label":"High jump"},{"segment":[56.97120783205871,58.921467013612954],"label":"High jump"},{"segment":[68.02267652753275,74.52354046604688],"label":"High jump"},{"segment":[78.74910148783415,99.87690914359737],"label":"High jump"},{"segment":[106.37777303767838,125.23027833051327],"label":"High jump"},{"segment":[131.4060990298902,166.5107640579276],"label":"High jump"},{"segment":[175.93701670434504,208.677731],"label":"High jump"}]},"uktUeF3Fy7o":{"duration":181.581,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uktUeF3Fy7o","annotations":[]},"ZOQSDsJYXIA":{"duration":98.59,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZOQSDsJYXIA","annotations":[{"segment":[2.307143918409798,10.459052430124416],"label":"Triple jump"},{"segment":[18.149532158157076,65.98431532137286],"label":"Triple jump"},{"segment":[67.52241124960999,97.3614722574103],"label":"Triple jump"}]},"G5ueYVLGtm8":{"duration":6.827,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G5ueYVLGtm8","annotations":[{"segment":[0,6.826665],"label":"Croquet"}]},"P3q6n0ZjumU":{"duration":48.252,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=P3q6n0ZjumU","annotations":[{"segment":[4.516429329173167,48.250521541950114],"label":"Throwing darts"}]},"EzQL-i_MTdU":{"duration":56.75,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EzQL-i_MTdU","annotations":[{"segment":[16.20151506468422,52.76558989703588],"label":"Snatch"}]},"rxwmIyvVVUU":{"duration":16.649,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rxwmIyvVVUU","annotations":[{"segment":[0.01,16.648706],"label":"Playing pool"}]},"AzNHZyeXHBM":{"duration":42.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AzNHZyeXHBM","annotations":[{"segment":[11.912283359999998,37.25234586],"label":"Triple jump"}]},"3PC5_h68qkc":{"duration":193.26,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=3PC5_h68qkc","annotations":[{"segment":[20.495581903276133,43.7038143525741],"label":"Rafting"}]},"zBm3FR-CCI0":{"duration":133.65,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=zBm3FR-CCI0","annotations":[{"segment":[0.01,7.811992979719188],"label":"Discus throw"},{"segment":[10.936790171606862,15.72814586583463],"label":"Discus throw"},{"segment":[23.43597893915756,25.727496879875194],"label":"Discus throw"},{"segment":[30.31053276131045,34.06028939157566],"label":"Discus throw"},{"segment":[38.018365834633386,41.55980265210608],"label":"Discus throw"},{"segment":[47.80939703588143,53.01739235569423],"label":"Discus throw"},{"segment":[57.600428237129485,65.72490093603744],"label":"Discus throw"},{"segment":[69.47465756630264,111.3469399375975],"label":"Discus throw"},{"segment":[116.34661544461777,120.92965132605303],"label":"Discus throw"}]},"QyTiHhG9qf0":{"duration":144.754,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=QyTiHhG9qf0","annotations":[{"segment":[18.736634945397817,133.65808424336976],"label":"Riding bumper cars"}]},"dVpHF8tE2qo":{"duration":36.78,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dVpHF8tE2qo","annotations":[]},"luDywPbDOEQ":{"duration":133.237,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=luDywPbDOEQ","annotations":[{"segment":[33.88057877961377,35.54342927186475],"label":"Mooping floor"},{"segment":[39.07698656789809,45.52053222537065],"label":"Mooping floor"},{"segment":[52.587646817437324,55.28977886734517],"label":"Mooping floor"},{"segment":[59.86261772103538,62.56474977094322],"label":"Mooping floor"},{"segment":[69.42400805147852,70.25543329760401],"label":"Mooping floor"}]},"Z0GbQKiufUE":{"duration":108.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z0GbQKiufUE","annotations":[{"segment":[32.64957460883469,84.24605167862714],"label":"Cleaning windows"}]},"oSQNry_U7oA":{"duration":99.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oSQNry_U7oA","annotations":[{"segment":[26.431899815534187,97.99418674484382],"label":"Playing saxophone"}]},"qSP3_C7QWpU":{"duration":30.651,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=qSP3_C7QWpU","annotations":[]},"rKtktLDSOpA":{"duration":128.52,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rKtktLDSOpA","annotations":[{"segment":[14.063411778565877,81.50593047015467],"label":"Playing polo"},{"segment":[87.30958934865933,105.92132299351915],"label":"Playing polo"}]},"dD3eR9Sb1x0":{"duration":208.07999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dD3eR9Sb1x0","annotations":[{"segment":[25.644067199422672,194.11584894851796],"label":"Zumba"}]},"R1qdfsEyrDE":{"duration":123.18,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R1qdfsEyrDE","annotations":[]},"ehGHCYKzyZ8":{"duration":61.719,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=ehGHCYKzyZ8","annotations":[{"segment":[43.95990729267573,45.401932082395355],"label":"Doing crunches"}]},"fhtNAMK0Vqk":{"duration":27.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fhtNAMK0Vqk","annotations":[{"segment":[0.01,26.749261310452418],"label":"Grooming horse"}]},"zRNS_ebpi7o":{"duration":81.25,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=zRNS_ebpi7o","annotations":[{"segment":[20.4861789,54.51931584],"label":"Playing violin"},{"segment":[54.69249171294852,77.2539367301092],"label":"Playing violin"}]},"7A_NgDs7jZY":{"duration":216.72,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7A_NgDs7jZY","annotations":[{"segment":[13.192313127145086,204.6499856903276],"label":"Belly dance"}]},"bUhhrFqCMmc":{"duration":2.183,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=bUhhrFqCMmc","annotations":[]},"Z47Ig2AqKfg":{"duration":87.08,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Z47Ig2AqKfg","annotations":[{"segment":[4.346949388179609,5.9770554087469625],"label":"Playing violin"},{"segment":[8.558056516380654,21.463062374414974],"label":"Playing violin"},{"segment":[32.05875139469578,34.63975256630265],"label":"Playing violin"},{"segment":[44.556230751950075,75.39240264430576],"label":"Playing violin"}]},"beejEQ04nkY":{"duration":30.929,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=beejEQ04nkY","annotations":[{"segment":[12.352291145085804,16.019377578783153],"label":"Disc dog"},{"segment":[18.624938992199688,21.954267464898596],"label":"Disc dog"}]},"G-zVpNXq224":{"duration":192.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G-zVpNXq224","annotations":[{"segment":[60.8509515897036,164.56735183619344],"label":"Ping-pong"}]},"xmAcpNTJUxo":{"duration":123.391,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=xmAcpNTJUxo","annotations":[{"segment":[27.868606466470283,39.4004431152648],"label":"Carving jack-o-lanterns"},{"segment":[60.54214429906543,93.21568249221184],"label":"Carving jack-o-lanterns"}]},"quoyW7FZqdI":{"duration":233.849,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=quoyW7FZqdI","annotations":[{"segment":[73.69310947676003,128.41571552385906],"label":"Rope skipping"},{"segment":[195.1772944773791,224.36268430577223],"label":"Rope skipping"}]},"O0uVjGlOW8M":{"duration":220.985,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O0uVjGlOW8M","annotations":[{"segment":[28.608585023400938,215.44559282371296],"label":"Surfing"}]},"xlTKV-3QSW8":{"duration":78.07,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=xlTKV-3QSW8","annotations":[{"segment":[4.3843331232449305,78.065487],"label":"Kayaking"}]},"ArT40PO05h8":{"duration":152.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ArT40PO05h8","annotations":[{"segment":[26.147451840873636,132.40118795787834],"label":"Cleaning windows"}]},"rZGxJN2AOQY":{"duration":238.21,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=rZGxJN2AOQY","annotations":[{"segment":[13.378606132000382,226.69304834778424],"label":"Playing racquetball"}]},"oSyBkE3gYMM":{"duration":90.326,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oSyBkE3gYMM","annotations":[{"segment":[0.01,60.040939272745895],"label":"Longboarding"}]},"x4Vk5wSH7xE":{"duration":25.566,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=x4Vk5wSH7xE","annotations":[{"segment":[0.01,25.565169],"label":"Playing blackjack"}]},"vutxJfF0Rlg":{"duration":183.67,"subset":"validation","resolution":"426x238","url":"https://www.youtube.com/watch?v=vutxJfF0Rlg","annotations":[{"segment":[118.05300250105243,148.71239878166554],"label":"Drinking coffee"}]},"b_DY76Y6ekg":{"duration":210.26,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=b_DY76Y6ekg","annotations":[{"segment":[113.4926772074883,210.25666666666666],"label":"Bungee jumping"}]},"CSk99DHanfg":{"duration":197.161,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CSk99DHanfg","annotations":[{"segment":[30.435318252730113,54.41465990639625],"label":"Scuba diving"},{"segment":[65.17462090483619,71.93802496099845],"label":"Scuba diving"},{"segment":[96.53222152886116,104.21790795631826],"label":"Scuba diving"},{"segment":[120.8312527301092,129.1326365054602],"label":"Scuba diving"},{"segment":[141.7384414976599,161.41579563182526],"label":"Scuba diving"}]},"GlAEgSWM2nQ":{"duration":55.055,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GlAEgSWM2nQ","annotations":[{"segment":[0.01,54.11255371332614],"label":"Trimming branches or hedges"}]},"tvR1ovqyJZ8":{"duration":91.162,"subset":"training","resolution":"426x238","url":"https://www.youtube.com/watch?v=tvR1ovqyJZ8","annotations":[{"segment":[0.01,64.69446957878316],"label":"Waterskiing"}]},"B4LYBLihttQ":{"duration":186.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B4LYBLihttQ","annotations":[{"segment":[43.013114853845856,49.406956251039155],"label":"Preparing pasta"},{"segment":[52.313247795217926,56.672685111486096],"label":"Preparing pasta"},{"segment":[68.87910959703694,123.5173906275979],"label":"Preparing pasta"},{"segment":[129.62060287037332,186.293287],"label":"Preparing pasta"}]},"NvOo-wtEPPk":{"duration":68.058,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=NvOo-wtEPPk","annotations":[{"segment":[45.795292771314124,55.441985227414335],"label":"Swimming"}]},"thvpt_lUxTI":{"duration":52.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=thvpt_lUxTI","annotations":[{"segment":[2.727928010920437,9.589080280811233],"label":"Shoveling snow"},{"segment":[13.722304539781593,32.2391492199688],"label":"Shoveling snow"},{"segment":[52.73994154446178,52.787935],"label":"Shoveling snow"}]},"gxJeNdvNzhU":{"duration":42.563,"subset":"validation","resolution":"800x480","url":"https://www.youtube.com/watch?v=gxJeNdvNzhU","annotations":[{"segment":[17.595906981279253,37.914199570982845],"label":"Hitting a pinata"}]},"FV2TzKt9MdI":{"duration":198.18,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=FV2TzKt9MdI","annotations":[{"segment":[33.4569235,138.94178237129483],"label":"Spinning"}]},"BxEnhkx2srY":{"duration":16.3,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=BxEnhkx2srY","annotations":[{"segment":[1.6669571,10.4566649],"label":"Pole vault"},{"segment":[11.6387426,14.6696481],"label":"Pole vault"}]},"GvDu_0ktsJ0":{"duration":188.87,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GvDu_0ktsJ0","annotations":[{"segment":[13.39473132,87.0317568],"label":"Preparing salad"},{"segment":[108.30618624,157.76205759],"label":"Preparing salad"},{"segment":[55.667838540000005,116.51829732000002],"label":"Preparing pasta"}]},"mH_B3FvFbFc":{"duration":70.194,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mH_B3FvFbFc","annotations":[]},"cRTsarldNlc":{"duration":25.54,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cRTsarldNlc","annotations":[{"segment":[10.18381775,12.611586362714508],"label":"Hammer throw"},{"segment":[18.588641510920436,20.262216952418097],"label":"Hammer throw"}]},"-V00wKlFJA0":{"duration":92.72,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-V00wKlFJA0","annotations":[{"segment":[4.550152886115445,84.14171606864275],"label":"Playing bagpipes"}]},"0Yb2EQlAEYM":{"duration":70.937,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0Yb2EQlAEYM","annotations":[{"segment":[19.809229442589704,32.31449719126365],"label":"River tubing"},{"segment":[38.9544623675507,54.00505010046801],"label":"River tubing"}]},"0Zg9UgCp4a4":{"duration":109.85,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=0Zg9UgCp4a4","annotations":[{"segment":[53.577324299999994,92.32780539999999],"label":"Hopscotch"}]},"hHMqyl_Dugs":{"duration":237.773,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hHMqyl_Dugs","annotations":[{"segment":[9.088003120124805,24.296498137476515],"label":"Ice fishing"},{"segment":[78.4535779553631,232.39322264319142],"label":"Ice fishing"}]},"32EWNGHBctI":{"duration":64.6,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=32EWNGHBctI","annotations":[{"segment":[42.779546467572985,46.81059913825125],"label":"Tennis serve with ball bouncing"}]},"_4licRHiRRs":{"duration":120.233,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_4licRHiRRs","annotations":[{"segment":[18.194373985959437,71.46449988299531],"label":"Disc dog"}]},"J5eSX6Tb2hc":{"duration":44.4,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=J5eSX6Tb2hc","annotations":[]},"RiU6gIrM4v4":{"duration":72.981,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RiU6gIrM4v4","annotations":[]},"7vECSCWxbDk":{"duration":139.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7vECSCWxbDk","annotations":[{"segment":[13.231480499219968,131.23025741029642],"label":"Playing guitarra"}]},"zH1MiuiTsx8":{"duration":115.845,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zH1MiuiTsx8","annotations":[]},"8IJJGK2td2c":{"duration":154.692,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=8IJJGK2td2c","annotations":[{"segment":[14.457134380231844,132.2827779813084],"label":"Making a lemonade"}]},"pwoy7UXdnAE":{"duration":82.385,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pwoy7UXdnAE","annotations":[{"segment":[17.864952614431107,78.5286758673947],"label":"Disc dog"}]},"JX0pGTdQXRw":{"duration":37.04,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=JX0pGTdQXRw","annotations":[]},"5Qj8hHmZbiI":{"duration":94.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5Qj8hHmZbiI","annotations":[{"segment":[12.593474623338674,18.816132672517785],"label":"Beer pong"},{"segment":[38.22489944495739,41.632544856474254],"label":"Beer pong"},{"segment":[48.00336132917317,51.411007349453975],"label":"Beer pong"}]},"P8S8PIyPXxY":{"duration":202.061,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P8S8PIyPXxY","annotations":[]},"6u-faCwecyk":{"duration":45.23,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=6u-faCwecyk","annotations":[]},"snhPrCNubYA":{"duration":21.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=snhPrCNubYA","annotations":[]},"hCJTKVzkYFE":{"duration":62.21,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hCJTKVzkYFE","annotations":[{"segment":[1.3566094570982838,24.03136752574103],"label":"Playing polo"},{"segment":[24.19688988,52.669820200000004],"label":"Playing polo"}]},"4OeZViscNp4":{"duration":109.99000000000001,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4OeZViscNp4","annotations":[{"segment":[13.041167252132261,101.41223378939158],"label":"Cleaning shoes"}]},"Oheg1qwrESg":{"duration":65.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Oheg1qwrESg","annotations":[{"segment":[6.239034321372856,16.369672386895477],"label":"Doing fencing"},{"segment":[31.102377535101407,58.930820592823714],"label":"Doing fencing"}]},"XBMiD_7fdF4":{"duration":55.26,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=XBMiD_7fdF4","annotations":[{"segment":[50.1763836599064,53.797359800312016],"label":"Drinking beer"}]},"Dn8acaAwWnQ":{"duration":212.858,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Dn8acaAwWnQ","annotations":[]},"Bl2Hqx3h9Fw":{"duration":110.226,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Bl2Hqx3h9Fw","annotations":[{"segment":[23.644235021101522,98.96186940171607],"label":"Removing ice from car"}]},"UdIoEWadRxI":{"duration":170.3,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=UdIoEWadRxI","annotations":[{"segment":[55.776362786356394,132.96621868355265],"label":"Mixing drinks"}]},"rrpEOrMpBuE":{"duration":120.744,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rrpEOrMpBuE","annotations":[]},"CRdgzvZxB8A":{"duration":74.21000000000001,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=CRdgzvZxB8A","annotations":[{"segment":[3.180733229329173,73.04120124804992],"label":"Kayaking"}]},"b1s0BdtlFZs":{"duration":112.8,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=b1s0BdtlFZs","annotations":[{"segment":[6.863181036505461,108.40306465591958],"label":"Shaving legs"}]},"dtYofkPozXM":{"duration":145.404,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=dtYofkPozXM","annotations":[{"segment":[18.09057395204871,104.02081661417317],"label":"Hanging wallpaper"}]},"MgLQZD76PaI":{"duration":65.43,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=MgLQZD76PaI","annotations":[]},"63Zt34YuyRk":{"duration":169.158,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=63Zt34YuyRk","annotations":[{"segment":[54.08066302652106,89.16714196567862],"label":"Grooming dog"},{"segment":[0.2,168.901],"label":"Grooming dog"},{"segment":[117.92222620904835,168.901],"label":"Grooming dog"}]},"vyoVLhEyyrQ":{"duration":175.13,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=vyoVLhEyyrQ","annotations":[]},"WQlkB6EdnQc":{"duration":120.814,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WQlkB6EdnQc","annotations":[]},"-2VNLsk1s34":{"duration":212.65,"subset":"training","resolution":"1280x718","url":"https://www.youtube.com/watch?v=-2VNLsk1s34","annotations":[{"segment":[16.918966432126673,31.847466225179623],"label":"Shot put"},{"segment":[75.30598784495598,103.50426523183377],"label":"Shot put"},{"segment":[139.9961536148521,166.53570880250177],"label":"Shot put"}]},"Gc1Mk5UyECQ":{"duration":113.52000000000001,"subset":"training","resolution":"426x196","url":"https://www.youtube.com/watch?v=Gc1Mk5UyECQ","annotations":[{"segment":[34.69933853354134,44.26716068642746],"label":"Playing lacrosse"},{"segment":[54.71421060842434,61.79695631825273],"label":"Playing lacrosse"},{"segment":[77.02485959438378,93.4922433697348],"label":"Playing lacrosse"},{"segment":[102.34567550702027,111.73031357254291],"label":"Playing lacrosse"}]},"73LjSLUZGZc":{"duration":130.497,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=73LjSLUZGZc","annotations":[{"segment":[16.464464047301163,32.522397757009344],"label":"Playing ten pins"},{"segment":[98.78678318691587,104.27493780841121],"label":"Playing ten pins"}]},"9RcCkU6dVD0":{"duration":37.222,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9RcCkU6dVD0","annotations":[{"segment":[0.01,37.221587],"label":"Hitting a pinata"}]},"qXD7myRvw0M":{"duration":28.33,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qXD7myRvw0M","annotations":[{"segment":[0.01,28.32755],"label":"Playing badminton"}]},"vnlxIMiHKRQ":{"duration":183.345,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vnlxIMiHKRQ","annotations":[{"segment":[15.159551221528861,175.6219707550702],"label":"River tubing"}]},"oXqzXQRqAhI":{"duration":55.33,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oXqzXQRqAhI","annotations":[{"segment":[4.920420514820593,6.042621684867395],"label":"Washing face"}]},"B1u-Er4jXFA":{"duration":83.128,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=B1u-Er4jXFA","annotations":[{"segment":[4.149887682582134,80.14470586986745],"label":"Making an omelette"}]},"GwmpYLRpJQw":{"duration":183.299,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GwmpYLRpJQw","annotations":[{"segment":[67.48580929173166,122.10356172698907],"label":"Skiing"},{"segment":[85.50108889079563,122.96143218408736],"label":"Skiing"},{"segment":[146.12393452574102,150.98520044929796],"label":"Skiing"}]},"L0Fdx2r3qA8":{"duration":181.906,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L0Fdx2r3qA8","annotations":[{"segment":[75.77010612669405,100.74302499991155],"label":"Peeling potatoes"},{"segment":[117.20244880271402,145.01319936606987],"label":"Peeling potatoes"},{"segment":[150.1212964083189,180.20231099843994],"label":"Peeling potatoes"}]},"w_CXbraJr58":{"duration":80.71000000000001,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w_CXbraJr58","annotations":[]},"Uot4XZns2b4":{"duration":236.264,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Uot4XZns2b4","annotations":[{"segment":[9.2146270884849,225.94265472854914],"label":"Fixing the roof"}]},"IT9uvR9yjxI":{"duration":110.667,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IT9uvR9yjxI","annotations":[{"segment":[27.29340587039603,48.14253515388513],"label":"Roof shingle removal"},{"segment":[65.37322051214058,71.40396038752999],"label":"Roof shingle removal"}]},"J1QB1y3vXLY":{"duration":124.39,"subset":"testing","resolution":"480x272","url":"https://www.youtube.com/watch?v=J1QB1y3vXLY","annotations":[]},"sCCJpFKVV8g":{"duration":180.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sCCJpFKVV8g","annotations":[{"segment":[43.84072335954663,173.39568037285494],"label":"Getting a tattoo"}]},"3J_-uEjb8UE":{"duration":219.522,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=3J_-uEjb8UE","annotations":[{"segment":[150.23168183011205,177.58383136282234],"label":"Removing ice from car"}]},"hvCYwmaukDw":{"duration":60.488,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hvCYwmaukDw","annotations":[{"segment":[0.01,54.11884081666251],"label":"Sumo"},{"segment":[57.88756231198961,59.58348698488681],"label":"Sumo"}]},"foZ88hBB77I":{"duration":70.102,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=foZ88hBB77I","annotations":[{"segment":[41.79651557632399,69.84274143302181],"label":"Playing rubik cube"}]},"vL8Hy6lcnF8":{"duration":223.31,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vL8Hy6lcnF8","annotations":[{"segment":[6.270691839918495,213.5518943261132],"label":"Zumba"}]},"fIN8MmMloZE":{"duration":209.91,"subset":"testing","resolution":"352x262","url":"https://www.youtube.com/watch?v=fIN8MmMloZE","annotations":[]},"odbjmsyfJe4":{"duration":20.88,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=odbjmsyfJe4","annotations":[{"segment":[0.04138296392829396,13.14499575832082],"label":"Brushing hair"}]},"0qoi1RmRMbQ":{"duration":12.075,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=0qoi1RmRMbQ","annotations":[]},"f0On10HA3HQ":{"duration":135.605,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f0On10HA3HQ","annotations":[{"segment":[0.01,119.31506680675389],"label":"Wakeboarding"}]},"IBkHS9Zl2z8":{"duration":127.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IBkHS9Zl2z8","annotations":[{"segment":[0.01,32.62116068642746],"label":"Shoveling snow"},{"segment":[57.48485023400936,76.79473010920437],"label":"Shoveling snow"}]},"mkGUacg0GRc":{"duration":216.596,"subset":"testing","resolution":"432x720","url":"https://www.youtube.com/watch?v=mkGUacg0GRc","annotations":[]},"l9o9R7UcPuc":{"duration":153.948,"subset":"validation","resolution":"426x212","url":"https://www.youtube.com/watch?v=l9o9R7UcPuc","annotations":[{"segment":[39.74403629550576,71.10830878521703],"label":"Roof shingle removal"},{"segment":[78.05154467988595,99.36009621869742],"label":"Roof shingle removal"}]},"EqqqGf4wLH0":{"duration":156.456,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=EqqqGf4wLH0","annotations":[{"segment":[0.9763249672386896,4.393462352574103],"label":"Playing drums"},{"segment":[55.406441890795634,94.70352182215288],"label":"Playing drums"},{"segment":[109.5924775725429,123.01694587207487],"label":"Playing drums"},{"segment":[150.3540449547582,156.256076],"label":"Playing drums"}]},"4W3atvAalEs":{"duration":102.192,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=4W3atvAalEs","annotations":[]},"nrh2jDsmeLQ":{"duration":226.21,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nrh2jDsmeLQ","annotations":[{"segment":[79.9177511700468,170.94989157566303],"label":"Preparing salad"}]},"a073aSPGTIw":{"duration":194.653,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=a073aSPGTIw","annotations":[{"segment":[35.52946480308192,49.498314212840626],"label":"Elliptical trainer"},{"segment":[78.95436622907093,105.67738249121803],"label":"Elliptical trainer"},{"segment":[107.80307696661609,116.60952550755091],"label":"Elliptical trainer"},{"segment":[156.39037926142896,175.21795726365053],"label":"Elliptical trainer"},{"segment":[183.41706444929795,193.7418660904836],"label":"Elliptical trainer"}]},"IWdJF6lBSnM":{"duration":91.859,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IWdJF6lBSnM","annotations":[{"segment":[10.497915171507362,84.15494788178773],"label":"Roof shingle removal"}]},"mkwCGf92vqo":{"duration":49.95,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=mkwCGf92vqo","annotations":[{"segment":[1.7125792917316693,49.431265920436815],"label":"Doing karate"}]},"hv3tNd_6qB4":{"duration":121.627,"subset":"training","resolution":"180x240","url":"https://www.youtube.com/watch?v=hv3tNd_6qB4","annotations":[{"segment":[0.01,4.364119838262918],"label":"Mooping floor"},{"segment":[10.246194402878157,106.4465747925117],"label":"Mooping floor"}]},"QEaQzeB94N4":{"duration":111.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QEaQzeB94N4","annotations":[{"segment":[3.056171532575589,105.2196187051482],"label":"Playing violin"}]},"sV8nkrbJIKE":{"duration":191.542,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=sV8nkrbJIKE","annotations":[{"segment":[31.375737315206894,191.5414058956916],"label":"Making a cake"}]},"uCdUm-lou7w":{"duration":13.166,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uCdUm-lou7w","annotations":[{"segment":[0.2,11.87959438377535],"label":"Using the monkey bar"}]},"64JprLAtnzg":{"duration":15.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=64JprLAtnzg","annotations":[{"segment":[0,9.59714352574103],"label":"Using the balance beam"}]},"KiKZEKwn4Aw":{"duration":207.006,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KiKZEKwn4Aw","annotations":[{"segment":[17.540684461894525,22.05483170488518],"label":"Skiing"},{"segment":[23.02214897124032,43],"label":"Skiing"},{"segment":[44.9480070086235,72.67776864413752],"label":"Skiing"},{"segment":[84.9304540179693,103.63192116750201],"label":"Skiing"},{"segment":[104.59923843385714,112.98265474226835],"label":"Skiing"},{"segment":[117.49680198525901,132.00656098058613],"label":"Skiing"},{"segment":[138.45534275628705,139.74509911142724],"label":"Skiing"},{"segment":[140.38997728899733,143.93680726563284],"label":"Skiing"},{"segment":[145.5490027095581,149.7407108637637],"label":"Skiing"},{"segment":[153.2875408403992,155.54461446189453],"label":"Skiing"},{"segment":[158.44656626095997,160.38120079367025],"label":"Skiing"},{"segment":[166.82998256937117,178.43778976563286],"label":"Skiing"}]},"8eDJXDetgGE":{"duration":221.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8eDJXDetgGE","annotations":[{"segment":[7.260271896590149,31.461178218557315],"label":"Playing field hockey"},{"segment":[35.264177783437866,80.90017256200451],"label":"Playing field hockey"},{"segment":[82.62880872785931,137.94516603521282],"label":"Playing field hockey"},{"segment":[139.32807496789667,199.13888499219968],"label":"Playing field hockey"}]},"JLqjNaSEpV0":{"duration":230.09,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JLqjNaSEpV0","annotations":[]},"gSOUhz7yQ-s":{"duration":70.36,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=gSOUhz7yQ-s","annotations":[{"segment":[6.636123244929798,67.29358034321373],"label":"Cleaning windows"}]},"AF79fdkjLQw":{"duration":155.64,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AF79fdkjLQw","annotations":[]},"cKFTf1vZQOw":{"duration":127.733,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cKFTf1vZQOw","annotations":[{"segment":[25.497360374414978,110.75415912636505],"label":"Rollerblading"}]},"2O-TyeSMueg":{"duration":221.356,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2O-TyeSMueg","annotations":[{"segment":[34.86958034321373,75.26305460218408],"label":"Scuba diving"},{"segment":[80.78694851794071,88.38230265210608],"label":"Scuba diving"},{"segment":[104.60874102964118,117.72798907956317],"label":"Scuba diving"},{"segment":[106.33495787831512,126.01382995319813],"label":"Scuba diving"},{"segment":[139.8235647425897,146.38318876755068],"label":"Scuba diving"},{"segment":[172.63728549141965,180.92387519500778],"label":"Scuba diving"}]},"OmzUF102S5w":{"duration":93.298,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OmzUF102S5w","annotations":[]},"IaRYt0J7pHI":{"duration":193.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IaRYt0J7pHI","annotations":[{"segment":[15.111082810659365,38.684371995287975],"label":"Brushing hair"},{"segment":[48.65768665032315,60.74655289885064],"label":"Brushing hair"},{"segment":[72.53319749116496,105.77757967461555],"label":"Brushing hair"},{"segment":[111.51979114266611,182.54187881435254],"label":"Brushing hair"}]},"4NPgcPSwdMU":{"duration":30.256,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=4NPgcPSwdMU","annotations":[]},"5j5_YV25cFA":{"duration":121.88,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5j5_YV25cFA","annotations":[{"segment":[9.126854789391576,80.62055063962558],"label":"Playing squash"}]},"8jj-mhVrlGA":{"duration":71.87,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=8jj-mhVrlGA","annotations":[{"segment":[37.32928089781591,68.78213826599064],"label":"Layup drill in basketball"}]},"XJmBiSBx7Ss":{"duration":114.266,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=XJmBiSBx7Ss","annotations":[{"segment":[9.447818365054601,90.91296917316693],"label":"Trimming branches or hedges"},{"segment":[94.12166220280811,105.70860925429017],"label":"Trimming branches or hedges"}]},"pspJX3Lyys0":{"duration":96.85,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=pspJX3Lyys0","annotations":[{"segment":[3.5506796530364615,86.52385299999999],"label":"Discus throw"}]},"kliGNOFLJSk":{"duration":163.515,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kliGNOFLJSk","annotations":[{"segment":[83.79809098595943,92.98144342277692],"label":"Capoeira"}]},"S_hFs1vb3C4":{"duration":40.403,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=S_hFs1vb3C4","annotations":[]},"osQBvw8UzIg":{"duration":46.115,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=osQBvw8UzIg","annotations":[]},"MTC0fs4QyEY":{"duration":225.26,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=MTC0fs4QyEY","annotations":[{"segment":[19.850803432137287,206.41322152886116],"label":"Tai chi"}]},"O1XzCrHZm34":{"duration":34.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O1XzCrHZm34","annotations":[{"segment":[6.609172790000001,18.0085708],"label":"Doing kickboxing"}]},"NKQehrewXlM":{"duration":142.548,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NKQehrewXlM","annotations":[{"segment":[6.665569422776911,16.663923556942276],"label":"Running a marathon"},{"segment":[44.881500780031196,67.3222511700468],"label":"Running a marathon"},{"segment":[70.87722152886116,81.09776131045241],"label":"Running a marathon"},{"segment":[88.20770202808112,92.65141497659906],"label":"Running a marathon"},{"segment":[111.3150093603744,121.09117784711388],"label":"Running a marathon"},{"segment":[126.4236333853354,134.2001310452418],"label":"Running a marathon"}]},"sdq5E1nLX5A":{"duration":104.68,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=sdq5E1nLX5A","annotations":[]},"WVkOtMX9Gg8":{"duration":199.53,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=WVkOtMX9Gg8","annotations":[{"segment":[35.48556040200791,47.002803690378904],"label":"Cricket"},{"segment":[57.8974932874866,65.67941442827781],"label":"Cricket"},{"segment":[159.0624681177723,160.61885230577224],"label":"Cricket"},{"segment":[164.6654512979719,166.84438921684867],"label":"Cricket"}]},"fEsTL9tYOVc":{"duration":213.508,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fEsTL9tYOVc","annotations":[{"segment":[34.90857476635514,200.16124922118382],"label":"Scuba diving"}]},"FjfWqx-NL_w":{"duration":100.287,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=FjfWqx-NL_w","annotations":[{"segment":[0.19999999999999998,94.51619105862278],"label":"Rollerblading"}]},"IDuoF51IMJ8":{"duration":17.14,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IDuoF51IMJ8","annotations":[]},"N2fTZV7pnEw":{"duration":209.978,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N2fTZV7pnEw","annotations":[{"segment":[0.01,208.99531311794073],"label":"Snowboarding"}]},"_JW114l6IG8":{"duration":190.311,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_JW114l6IG8","annotations":[]},"cxxiokwoVgg":{"duration":199.413,"subset":"training","resolution":"1267x720","url":"https://www.youtube.com/watch?v=cxxiokwoVgg","annotations":[{"segment":[35.153924600521435,181.68045988234087],"label":"Snow tubing"}]},"0QjoAi4b1Lw":{"duration":128.85,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0QjoAi4b1Lw","annotations":[]},"2fp9Ni7Ms6A":{"duration":45.976,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2fp9Ni7Ms6A","annotations":[{"segment":[10.256627035881435,41.31340680187208],"label":"Grooming dog"}]},"Ly70Hwy52WA":{"duration":203.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ly70Hwy52WA","annotations":[{"segment":[49.752067586739464,57.35747919235569],"label":"Ping-pong"},{"segment":[118.5176641875195,163.1994573705148],"label":"Ping-pong"},{"segment":[182.5298785347894,186.3325843375975],"label":"Ping-pong"}]},"6lyXvR5VtTQ":{"duration":80.899,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6lyXvR5VtTQ","annotations":[{"segment":[21.532415887850462,80.64099999999999],"label":"Hitting a pinata"}]},"H3pQJc8d8H8":{"duration":160.056,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H3pQJc8d8H8","annotations":[{"segment":[22.722337617314217,139.8297699527029],"label":"Capoeira"}]},"2Sev8z4P7pE":{"duration":45.279,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=2Sev8z4P7pE","annotations":[{"segment":[3.49364976599064,4.2700163806552265],"label":"Raking leaves"},{"segment":[13.021785491419656,15.492042901716069],"label":"Raking leaves"}]},"yirVJRiG4mI":{"duration":44.815,"subset":"testing","resolution":"603x480","url":"https://www.youtube.com/watch?v=yirVJRiG4mI","annotations":[]},"boqZwv68BiM":{"duration":21.154,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=boqZwv68BiM","annotations":[{"segment":[2.967384910490235,21.02829811422274],"label":"Installing carpet"}]},"fgeW0L2acbI":{"duration":106.743,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fgeW0L2acbI","annotations":[{"segment":[52.91450546021841,106.641],"label":"Surfing"}]},"RgEyUF4POcE":{"duration":45.744,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=RgEyUF4POcE","annotations":[]},"CEEJ1gzdmAk":{"duration":171.55,"subset":"training","resolution":"426x238","url":"https://www.youtube.com/watch?v=CEEJ1gzdmAk","annotations":[{"segment":[0,171.501],"label":"Playing ice hockey"}]},"E-M2Cq0RNTs":{"duration":142.896,"subset":"validation","resolution":"853x470","url":"https://www.youtube.com/watch?v=E-M2Cq0RNTs","annotations":[{"segment":[13.577307874343923,113.7377757998319],"label":"Hand car wash"}]},"zm5n_kHOQcA":{"duration":128.43,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zm5n_kHOQcA","annotations":[]},"Y82dz6aTwJ0":{"duration":37.013,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y82dz6aTwJ0","annotations":[{"segment":[0.009999999999999998,36.966],"label":"Putting on shoes"}]},"bRXOj9Jm1_E":{"duration":188.5,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bRXOj9Jm1_E","annotations":[{"segment":[12.056882811366876,58.22592186952785],"label":"Surfing"},{"segment":[60.87255468177911,75.2819999929249],"label":"Surfing"},{"segment":[79.1049140550656,86.75074217934704],"label":"Surfing"},{"segment":[93.51435936621138,132.03757030009092],"label":"Surfing"},{"segment":[136.44862498717637,146.4470146053042],"label":"Surfing"},{"segment":[152.03435050390013,163.20902230109203],"label":"Surfing"},{"segment":[165.56158478471139,176.44218627145085],"label":"Surfing"}]},"IU6LVYI0FZM":{"duration":62.509,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IU6LVYI0FZM","annotations":[{"segment":[6.707714052961776,57.84188205090227],"label":"Capoeira"}]},"60yOoxs9AFc":{"duration":74.583,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=60yOoxs9AFc","annotations":[{"segment":[0.2,70.74881345327103],"label":"Playing congas"}]},"IxSk2VZeLeY":{"duration":235.87,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IxSk2VZeLeY","annotations":[]},"LDjomH0-hYA":{"duration":189.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LDjomH0-hYA","annotations":[{"segment":[12.242863559999998,182.15983295319813],"label":"Belly dance"}]},"-yn3D1fOfr0":{"duration":80.736,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=-yn3D1fOfr0","annotations":[{"segment":[19.48708113309476,60.16887502016748],"label":"Decorating the Christmas tree"}]},"1px5LTVIR3A":{"duration":224.282,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1px5LTVIR3A","annotations":[]},"6yVhTyPaaLQ":{"duration":160.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6yVhTyPaaLQ","annotations":[{"segment":[0,160.4266439909297],"label":"Painting"}]},"1JeK3U_nUUc":{"duration":141.967,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1JeK3U_nUUc","annotations":[{"segment":[0.01,141.921],"label":"Beach soccer"}]},"MRxC-Ygp4go":{"duration":127.316,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=MRxC-Ygp4go","annotations":[{"segment":[26.416276372854917,119.96564608424337],"label":"Surfing"}]},"1RYEOaCeV4k":{"duration":174.963,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1RYEOaCeV4k","annotations":[{"segment":[45.98543649033639,149.38465213574852],"label":"Cutting the grass"}]},"W19-fYOO9sg":{"duration":98.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W19-fYOO9sg","annotations":[{"segment":[13.1236365,80.0442695],"label":"Fixing bicycle"}]},"Zk3JXtS2fOY":{"duration":189.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zk3JXtS2fOY","annotations":[{"segment":[37.757204368174726,57.52074102964119],"label":"Doing fencing"},{"segment":[70.79475819032761,74.92445241809672],"label":"Doing fencing"},{"segment":[84.06877535101404,93.50807644305772],"label":"Doing fencing"},{"segment":[105.89715912636505,110.02685335413415],"label":"Doing fencing"},{"segment":[115.04148205928237,120.94104524180966],"label":"Doing fencing"},{"segment":[125.07073946957877,131.8691840873635],"label":"Doing fencing"},{"segment":[144.84959594383776,150.1597644305772],"label":"Doing fencing"},{"segment":[163.1401762870515,166.38527925117003],"label":"Doing fencing"}]},"24_3cq6pPsI":{"duration":215.366,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=24_3cq6pPsI","annotations":[]},"YiMOKwzEf1k":{"duration":18.205,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=YiMOKwzEf1k","annotations":[{"segment":[0.5964014560582424,17.466041255850236],"label":"Cutting the grass"}]},"gC4B1Az2Mk0":{"duration":96.921,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gC4B1Az2Mk0","annotations":[{"segment":[0.2,33.8624399375975],"label":"Having an ice cream"},{"segment":[40.66516224648986,95.54045553822152],"label":"Having an ice cream"}]},"41LaEr0i2Dc":{"duration":176.145,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=41LaEr0i2Dc","annotations":[{"segment":[29.863927009275717,75.6187573644349],"label":"Wakeboarding"},{"segment":[84.93411204752118,101.09899223287684],"label":"Wakeboarding"},{"segment":[109.86638487578158,112.88017609678009],"label":"Wakeboarding"},{"segment":[130.96292342277113,164.38860608376714],"label":"Wakeboarding"},{"segment":[172.0600745627718,173.42997964830835],"label":"Wakeboarding"}]},"0OxOI3sAIrM":{"duration":80.551,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0OxOI3sAIrM","annotations":[{"segment":[26.131422056734817,63.64615091964711],"label":"Doing crunches"}]},"N9Zo5lRPHdw":{"duration":57.122,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=N9Zo5lRPHdw","annotations":[]},"hZRgO5Kd-rE":{"duration":170.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hZRgO5Kd-rE","annotations":[{"segment":[27.194508155836438,135.9725407791822],"label":"Making a sandwich"}]},"twrPZghmNtA":{"duration":44.861,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=twrPZghmNtA","annotations":[{"segment":[0.20000000000000004,44.601],"label":"Beach soccer"}]},"kFP91VjB1AI":{"duration":82.896,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=kFP91VjB1AI","annotations":[{"segment":[0.01,74.4249648194519],"label":"Beer pong"}]},"3OGD7if0zs8":{"duration":95.156,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3OGD7if0zs8","annotations":[{"segment":[9.797591475819033,18.110699394695786],"label":"Baking cookies"},{"segment":[32.807086608424335,50.47244093603744],"label":"Baking cookies"},{"segment":[52.55071791575662,53.14451133853354],"label":"Baking cookies"},{"segment":[72.44279757878314,75.70866140405616],"label":"Baking cookies"},{"segment":[78.0838350951638,84.76401110140405],"label":"Baking cookies"}]},"qeyCTRqPCN4":{"duration":133.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qeyCTRqPCN4","annotations":[{"segment":[20.61747484300226,36.86154843511407],"label":"Bullfighting"},{"segment":[43.31752640120978,46.02487199989508],"label":"Bullfighting"},{"segment":[50.39827642854057,112.66722397154517],"label":"Bullfighting"}]},"_HMwzNA9DNY":{"duration":100.13,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_HMwzNA9DNY","annotations":[{"segment":[27.335066341653665,91.68962252886115],"label":"Archery"}]},"B2_qciB7U1Q":{"duration":184.32,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B2_qciB7U1Q","annotations":[{"segment":[0.19999999999999998,163.51071606864275],"label":"Longboarding"}]},"y_zFbT7xXKI":{"duration":169.545,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=y_zFbT7xXKI","annotations":[{"segment":[17.427392523364485,157.63868691588786],"label":"Rope skipping"}]},"38qpTblXDTw":{"duration":15.953,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=38qpTblXDTw","annotations":[{"segment":[0.004961699716998111,15.783407559748104],"label":"Cutting the grass"}]},"F3iZD7tm8Io":{"duration":74.86,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=F3iZD7tm8Io","annotations":[{"segment":[10.978079106837741,13.547416770140194],"label":"Pole vault"},{"segment":[19.85397285279166,74.66115646258503],"label":"Pole vault"},{"segment":[65.75168656542179,74.861156],"label":"Pole vault"}]},"leKf6Q87IYg":{"duration":176.774,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=leKf6Q87IYg","annotations":[{"segment":[27.9202315855102,168.07208114288827],"label":"Wakeboarding"}]},"l7QlSS_li6M":{"duration":21.15,"subset":"training","resolution":"480x272","url":"https://www.youtube.com/watch?v=l7QlSS_li6M","annotations":[{"segment":[2.7390490340702063,10.758192591649244],"label":"Tumbling"},{"segment":[13.101234536456287,20.953378684807255],"label":"Tumbling"}]},"vjVtKL3xd8w":{"duration":141.53,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vjVtKL3xd8w","annotations":[{"segment":[91.61141185647425,109.71294383775351],"label":"Removing curlers"}]},"XBbSLB3Ov0w":{"duration":120.187,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XBbSLB3Ov0w","annotations":[{"segment":[4.872302652106084,110.01991731669267],"label":"Rafting"}]},"IQ4SUx8ythk":{"duration":60.117,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IQ4SUx8ythk","annotations":[{"segment":[25.79099340873635,28.698341756630267],"label":"Applying sunscreen"},{"segment":[35.26332189703588,40.6090914399376],"label":"Applying sunscreen"},{"segment":[43.32886892667707,49.04978019188768],"label":"Applying sunscreen"}]},"y7i-jRmrwnI":{"duration":82.08,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=y7i-jRmrwnI","annotations":[{"segment":[7.171017500156007,81.5703241462992],"label":"Washing dishes"}]},"Fde_qSwXRzY":{"duration":97.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fde_qSwXRzY","annotations":[{"segment":[46.176240421216846,89.45694101404055],"label":"Starting a campfire"}]},"g_bb4RSu6TQ":{"duration":2.3,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=g_bb4RSu6TQ","annotations":[{"segment":[0.01,2.223464505460218],"label":"Using parallel bars"}]},"085A7Iv6xzA":{"duration":160.96,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=085A7Iv6xzA","annotations":[{"segment":[38.7848829149766,133.5066624],"label":"Kayaking"}]},"5JlwYD_GChY":{"duration":78.136,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5JlwYD_GChY","annotations":[{"segment":[4.064946769507697,47.14851235829274],"label":"Beach soccer"},{"segment":[65.16085051405598,71.73291984115879],"label":"Beach soccer"}]},"BBRNbo8c8gA":{"duration":130.61,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BBRNbo8c8gA","annotations":[{"segment":[13.448369840873637,110.8471695975039],"label":"Tumbling"}]},"-c-vnHG9Yho":{"duration":161.29,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-c-vnHG9Yho","annotations":[]},"lj-VovhJcPA":{"duration":213.02,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lj-VovhJcPA","annotations":[{"segment":[96.01517784711389,101.99536193447739],"label":"Walking the dog"},{"segment":[153.50580655226207,157.16070670826835],"label":"Walking the dog"}]},"2ttzbjuKoT0":{"duration":80.086,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=2ttzbjuKoT0","annotations":[]},"5c1jHhHUwPI":{"duration":42.17,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5c1jHhHUwPI","annotations":[{"segment":[0.01,10.525373354134166],"label":"Cricket"}]},"KGi_eGXrW6E":{"duration":86.45,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=KGi_eGXrW6E","annotations":[]},"6X5ZOKDZrCI":{"duration":225.025,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6X5ZOKDZrCI","annotations":[]},"TnYCazY5WEM":{"duration":13.491,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TnYCazY5WEM","annotations":[{"segment":[0.2,13.027769497659907],"label":"Playing beach volleyball"}]},"Fv1qhPABYk0":{"duration":18.321,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Fv1qhPABYk0","annotations":[{"segment":[0.01,18.301],"label":"Tug of war"}]},"lSTqYESahrY":{"duration":106.788,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=lSTqYESahrY","annotations":[{"segment":[79.43129017160686,90.60527925117005],"label":"Putting on shoes"}]},"mB0MeZjjCmU":{"duration":214.367,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mB0MeZjjCmU","annotations":[{"segment":[18.058945525167946,57.521085746831226],"label":"Making a cake"},{"segment":[72.90463193493726,92.97012696290163],"label":"Making a cake"},{"segment":[101.66517480835287,165.20590838377532],"label":"Making a cake"}]},"mw9WwLwcptY":{"duration":122.091,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mw9WwLwcptY","annotations":[]},"cGEhbEHCQyE":{"duration":235.822,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cGEhbEHCQyE","annotations":[{"segment":[0.01,235.82185941043085],"label":"Playing ice hockey"}]},"vVgvNxSSJao":{"duration":86.82,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vVgvNxSSJao","annotations":[]},"Qv0fN5A1vp4":{"duration":28.49,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=Qv0fN5A1vp4","annotations":[{"segment":[0.01,8.222797745709828],"label":"Removing curlers"}]},"gCHo9vxfEzM":{"duration":163.93,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gCHo9vxfEzM","annotations":[]},"vb1CG5i1lEs":{"duration":183.043,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vb1CG5i1lEs","annotations":[{"segment":[5.700342679127726,179.01032398753895],"label":"Painting fence"}]},"2lUqeOw61QY":{"duration":80.18,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=2lUqeOw61QY","annotations":[{"segment":[23.336608679999998,71.90158736349454],"label":"Layup drill in basketball"},{"segment":[8.058475039001559,23.175924336973477],"label":"Layup drill in basketball"}]},"k67UWfSt-iE":{"duration":86.309,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=k67UWfSt-iE","annotations":[{"segment":[4.847283262759082,68.93913859594385],"label":"Playing beach volleyball"}]},"9QnASEwOBT4":{"duration":99.06,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=9QnASEwOBT4","annotations":[{"segment":[51.6758143525741,63.26231981279251],"label":"Snatch"},{"segment":[79.79240093603744,90.76095943837753],"label":"Snatch"}]},"FA4BDINxLmE":{"duration":166.325,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FA4BDINxLmE","annotations":[{"segment":[0.19999999999999998,15.56859923376527],"label":"Slacklining"},{"segment":[18.94179573441441,33.731965006491414],"label":"Slacklining"},{"segment":[86.66520240129333,118.5808300124805],"label":"Slacklining"},{"segment":[131.03570931357254,137.78210226833073],"label":"Slacklining"},{"segment":[150.2369815694228,152.57227143837753],"label":"Slacklining"}]},"2wcD0wSzB5w":{"duration":150.094,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2wcD0wSzB5w","annotations":[{"segment":[0.01,150.041],"label":"Disc dog"}]},"lq-8Y-YLcNI":{"duration":44.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lq-8Y-YLcNI","annotations":[{"segment":[12.699552539952942,31.475019312538613],"label":"Rock climbing"}]},"QZyR7yfp8hA":{"duration":124.79,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QZyR7yfp8hA","annotations":[{"segment":[15.95533229329173,19.846876755070202],"label":"Playing polo"},{"segment":[89.50552262090484,105.26627769110765],"label":"Playing polo"}]},"0w2XFd-Q9Eg":{"duration":18.576,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0w2XFd-Q9Eg","annotations":[{"segment":[3.7673559906396257,16.8951426349454],"label":"Playing rubik cube"}]},"BsSZDCHPjtY":{"duration":51.06,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=BsSZDCHPjtY","annotations":[{"segment":[0.01,51.001],"label":"River tubing"}]},"KgONRjXP8_Y":{"duration":24.753,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=KgONRjXP8_Y","annotations":[{"segment":[0.46194286517462124,22.519742725205724],"label":"Painting furniture"}]},"k_gAGeXhmHo":{"duration":179.978,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k_gAGeXhmHo","annotations":[{"segment":[0.01,22.176815912636506],"label":"Rock-paper-scissors"}]},"vbnuIUgUVXA":{"duration":139.134,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vbnuIUgUVXA","annotations":[{"segment":[46.68139673536073,112.78086703909905],"label":"Building sandcastles"}]},"JHYMG87h3XI":{"duration":76.348,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=JHYMG87h3XI","annotations":[{"segment":[0.01,76.301],"label":"Waterskiing"}]},"A6CBFa22oJc":{"duration":112.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A6CBFa22oJc","annotations":[{"segment":[27.08598127925117,41.691664586583464],"label":"Walking the dog"}]},"iZImXrduFlA":{"duration":133.167,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iZImXrduFlA","annotations":[{"segment":[0.2,132.31016692667706],"label":"Blowing leaves"}]},"NfjeG6AsoJA":{"duration":55.101,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NfjeG6AsoJA","annotations":[{"segment":[25.45083628407404,47.30256552942692],"label":"Fun sliding down"}]},"3URkGUH1-vk":{"duration":98.546,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=3URkGUH1-vk","annotations":[]},"X3hQtXddMd4":{"duration":227.115,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=X3hQtXddMd4","annotations":[{"segment":[9.564528861154447,216.44174570982838],"label":"Wakeboarding"}]},"BUe-oRdwBFw":{"duration":70.06,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BUe-oRdwBFw","annotations":[]},"K5mrupCfD1w":{"duration":123.07,"subset":"testing","resolution":"432x720","url":"https://www.youtube.com/watch?v=K5mrupCfD1w","annotations":[]},"jzwdcO34AoY":{"duration":222.45,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jzwdcO34AoY","annotations":[]},"6j-H-tIjJvA":{"duration":230.714,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6j-H-tIjJvA","annotations":[{"segment":[25.194918653889015,40.671797255563696],"label":"Beach soccer"},{"segment":[43.91114393963514,115.17677098920693],"label":"Beach soccer"},{"segment":[119.85582731064345,160.52762456620715],"label":"Beach soccer"},{"segment":[165.92653570632623,193.64094622560413],"label":"Beach soccer"}]},"jqLFT1c8WS8":{"duration":187.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jqLFT1c8WS8","annotations":[{"segment":[3.942589703588143,172.74383775351015],"label":"Discus throw"}]},"--mFXNrRZ5E":{"duration":97.803,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=--mFXNrRZ5E","annotations":[{"segment":[3.659569422776911,96.23586739469579],"label":"Doing fencing"}]},"EolA3Rd_Vm4":{"duration":66.32,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=EolA3Rd_Vm4","annotations":[{"segment":[2.4829774726989084,23.795200780031205],"label":"Shaving legs"},{"segment":[26.485093042121687,28.968070514820596],"label":"Shaving legs"},{"segment":[36.10663074882996,39.52072477379095],"label":"Shaving legs"},{"segment":[42.93481879875195,64.45395689547583],"label":"Shaving legs"}]},"XX2sXEmR4BE":{"duration":23.45,"subset":"validation","resolution":"526x360","url":"https://www.youtube.com/watch?v=XX2sXEmR4BE","annotations":[{"segment":[0.01,19.28125527457098],"label":"Spinning"}]},"s3OxuR2zsZM":{"duration":112.014,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s3OxuR2zsZM","annotations":[{"segment":[40.87917940717629,111.981],"label":"Cutting the grass"}]},"J3TwVc0v89A":{"duration":204.48,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=J3TwVc0v89A","annotations":[{"segment":[29.02838202340094,73.6874312901716],"label":"Washing dishes"},{"segment":[82.30024793447738,159.17761131513262],"label":"Washing dishes"}]},"ZEEUm7RwSRQ":{"duration":193.98,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZEEUm7RwSRQ","annotations":[]},"5GZNSTv1rVs":{"duration":118.817,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=5GZNSTv1rVs","annotations":[{"segment":[24.048177570093458,111.17657476635515],"label":"Ice fishing"}]},"X095EDl0-eU":{"duration":59.862,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=X095EDl0-eU","annotations":[{"segment":[0.2,48.808709828393134],"label":"Doing fencing"}]},"ziJaskA3588":{"duration":190.056,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ziJaskA3588","annotations":[{"segment":[0.19999999999999998,122.71515444617785],"label":"Waterskiing"},{"segment":[127.45776911076443,180.51577067082684],"label":"Waterskiing"}]},"WWYXTYht4k0":{"duration":18.46,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=WWYXTYht4k0","annotations":[]},"sI7qq85QaA0":{"duration":78.28,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sI7qq85QaA0","annotations":[{"segment":[4.666319850352136,70.62657858243936],"label":"Ping-pong"}]},"n3MGZcDHr-U":{"duration":195.095,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n3MGZcDHr-U","annotations":[{"segment":[84.61164811218299,160.09254186895475],"label":"Blow-drying hair"}]},"1dJy1bMOvuU":{"duration":56.355,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1dJy1bMOvuU","annotations":[]},"TkNhbOYp9wc":{"duration":122.88,"subset":"testing","resolution":"624x360","url":"https://www.youtube.com/watch?v=TkNhbOYp9wc","annotations":[]},"ncgzVLi_hlI":{"duration":235.056,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ncgzVLi_hlI","annotations":[{"segment":[5.500521082067773,88.37503871855554],"label":"Beach soccer"}]},"A-oC3ReQiuQ":{"duration":88.33,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=A-oC3ReQiuQ","annotations":[]},"RhSNiFS9pR4":{"duration":29.58,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RhSNiFS9pR4","annotations":[]},"HGk9BqOuxCE":{"duration":190.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HGk9BqOuxCE","annotations":[{"segment":[0,190.89120181405895],"label":"Playing saxophone"}]},"SbY68ygmuEA":{"duration":119.049,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SbY68ygmuEA","annotations":[{"segment":[0.01,119.0487074829932],"label":"Running a marathon"}]},"-ySxFjhhK4Y":{"duration":152.37,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=-ySxFjhhK4Y","annotations":[{"segment":[27.79909048361934,38.966246489859586],"label":"Shot put"},{"segment":[63.43895007800312,73.65570982839313],"label":"Shot put"}]},"J7LhH4IKvnM":{"duration":102.49000000000001,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=J7LhH4IKvnM","annotations":[{"segment":[0,84.71814352574103],"label":"Bathing dog"}]},"IN4nGNF9gi8":{"duration":108.02000000000001,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IN4nGNF9gi8","annotations":[{"segment":[38.744177728988575,78.28373522120042],"label":"Getting a haircut"}]},"gDhfInESf_E":{"duration":104.86,"subset":"training","resolution":"160x112","url":"https://www.youtube.com/watch?v=gDhfInESf_E","annotations":[{"segment":[3.105833073322933,102.4924914196568],"label":"Brushing hair"}]},"eS445rQ9SBo":{"duration":120.09,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=eS445rQ9SBo","annotations":[{"segment":[4.121777409871905,93.48940467394696],"label":"Ironing clothes"}]},"jA05XIX7Yh0":{"duration":106.161,"subset":"validation","resolution":"480x270","url":"https://www.youtube.com/watch?v=jA05XIX7Yh0","annotations":[{"segment":[16.436800199671605,101.10120234297379],"label":"Surfing"}]},"fDPNV463JuE":{"duration":177.91,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fDPNV463JuE","annotations":[{"segment":[73.8290231037813,174.5806584071763],"label":"Paintball"}]},"Vk-aKj0R65o":{"duration":180.071,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Vk-aKj0R65o","annotations":[{"segment":[0,180.041],"label":"Doing fencing"}]},"5ObgpPDmVPA":{"duration":186.085,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5ObgpPDmVPA","annotations":[{"segment":[26.40291887675507,71.6650655226209],"label":"Scuba diving"}]},"MN0YWduOEHk":{"duration":234.708,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MN0YWduOEHk","annotations":[{"segment":[0.01,234.661],"label":"Tug of war"}]},"9xtYwXpaiZ0":{"duration":121.02,"subset":"training","resolution":"342x240","url":"https://www.youtube.com/watch?v=9xtYwXpaiZ0","annotations":[{"segment":[14.568797921760059,33.04546147316193],"label":"Polishing shoes"},{"segment":[45.48892876288156,92.05766240774136],"label":"Polishing shoes"}]},"T3DSDybuBsU":{"duration":85.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T3DSDybuBsU","annotations":[{"segment":[3.2708837753510136,5.139960218408736],"label":"Archery"}]},"FujUrbL1qc0":{"duration":161.054,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FujUrbL1qc0","annotations":[{"segment":[36.4502344787918,95.54969948656785],"label":"Doing crunches"}]},"kuv1yEeNQzQ":{"duration":206.983,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kuv1yEeNQzQ","annotations":[{"segment":[4.843577743109725,193.09729821840875],"label":"Beach soccer"}]},"KiE-fFIz5ic":{"duration":214.76,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KiE-fFIz5ic","annotations":[{"segment":[26.13073010920437,164.82460530421218],"label":"Pole vault"}]},"MAYrBia9Xnw":{"duration":22.66,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=MAYrBia9Xnw","annotations":[]},"kAQML4pRtck":{"duration":6.13,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=kAQML4pRtck","annotations":[{"segment":[0.28460218408736354,5.919725429017161],"label":"Hammer throw"}]},"jN6ljpARJaw":{"duration":60.024,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jN6ljpARJaw","annotations":[]},"vDaDT1vUOfI":{"duration":173.709,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vDaDT1vUOfI","annotations":[{"segment":[26.47500538185077,168.03526028111477],"label":"Swimming"}]},"Ye_uVlSo1HI":{"duration":111.967,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ye_uVlSo1HI","annotations":[]},"pfARuqcw3Q8":{"duration":141.22,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pfARuqcw3Q8","annotations":[]},"dskswWtslKc":{"duration":56.68,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=dskswWtslKc","annotations":[]},"bSBmunE-1KQ":{"duration":116.704,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=bSBmunE-1KQ","annotations":[{"segment":[23.30428525429017,60.263425149765986],"label":"Rafting"}]},"57cM1GcKktw":{"duration":134.05,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=57cM1GcKktw","annotations":[{"segment":[2.8207488229329174,133.41097210686428],"label":"Playing bagpipes"}]},"Cjq9wf9XnX0":{"duration":33.95,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Cjq9wf9XnX0","annotations":[{"segment":[9.313722308892356,19.579984399375974],"label":"Snatch"}]},"aHKZIUD3wPI":{"duration":28.445,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aHKZIUD3wPI","annotations":[{"segment":[0.2,27.67368535825545],"label":"Trimming branches or hedges"}]},"X1WExPnfJjE":{"duration":32.21,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=X1WExPnfJjE","annotations":[{"segment":[0,20.86528549141966],"label":"Brushing hair"}]},"P8H4VrbYuTs":{"duration":180.95,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P8H4VrbYuTs","annotations":[]},"lEqYTrzaHsc":{"duration":61.6,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lEqYTrzaHsc","annotations":[{"segment":[4.901295804992199,52.18438474726989],"label":"Sailing"}]},"0cYB-WFrVxk":{"duration":224.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0cYB-WFrVxk","annotations":[{"segment":[0,224.0493422],"label":"Waterskiing"}]},"FnLY_cMSO88":{"duration":23.73,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FnLY_cMSO88","annotations":[]},"xtAMq9qWdX8":{"duration":24.03,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xtAMq9qWdX8","annotations":[]},"FGdCWm4gI3M":{"duration":228.113,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FGdCWm4gI3M","annotations":[{"segment":[7.827366614664587,14.231575663026522],"label":"Longboarding"},{"segment":[17.43368018720749,54.07998751950078],"label":"Longboarding"},{"segment":[66.88840561622465,102.11155538221529],"label":"Longboarding"},{"segment":[117.05470982839314,198.1920702028081],"label":"Longboarding"},{"segment":[201.39445553822154,220.608767550702],"label":"Longboarding"}]},"oobYvNJU5ko":{"duration":70.311,"subset":"validation","resolution":"206x360","url":"https://www.youtube.com/watch?v=oobYvNJU5ko","annotations":[{"segment":[21.58118096723869,56.4178081123245],"label":"Blow-drying hair"}]},"ehYH-NSx3jM":{"duration":152.834,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ehYH-NSx3jM","annotations":[]},"6q7LQZLC_N8":{"duration":151.19,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=6q7LQZLC_N8","annotations":[{"segment":[0.01,144.10931139625586],"label":"Sailing"}]},"kl7qwEgYLZU":{"duration":31.417,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kl7qwEgYLZU","annotations":[{"segment":[0.5374527304117603,9.576442507963877],"label":"Building sandcastles"}]},"RpItv2ztook":{"duration":223.585,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RpItv2ztook","annotations":[{"segment":[5.232096957347681,223.5849431],"label":"Skiing"}]},"bVAUJAAg3TM":{"duration":174.62,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bVAUJAAg3TM","annotations":[{"segment":[0.01,173.11999375975037],"label":"Long jump"}]},"aoAytgRjR3w":{"duration":150.93,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=aoAytgRjR3w","annotations":[]},"nHkVfEKklJA":{"duration":100.775,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=nHkVfEKklJA","annotations":[{"segment":[8.833112052513266,98.30583027460126],"label":"Playing drums"}]},"YOLDcsX7EiA":{"duration":58.468,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YOLDcsX7EiA","annotations":[{"segment":[0.2,50.12722308892356],"label":"Skiing"}]},"8JgckTuL1WA":{"duration":124.18,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=8JgckTuL1WA","annotations":[{"segment":[0.01,116.43089928861154],"label":"Playing badminton"}]},"htfrXrKUIzg":{"duration":128.105,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=htfrXrKUIzg","annotations":[{"segment":[14.942142486854772,122.12711125922634],"label":"Grooming dog"}]},"3cQg4XOkC5Y":{"duration":106.88,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=3cQg4XOkC5Y","annotations":[{"segment":[6.502927535101405,100.04503900156006],"label":"Cheerleading"}]},"AgocNx3-De0":{"duration":152.555,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AgocNx3-De0","annotations":[{"segment":[12.851755866153015,31.1774077493712],"label":"Rafting"},{"segment":[52.12100990162056,118.99773868954759],"label":"Rafting"}]},"XyZoiuga6vw":{"duration":49.58,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XyZoiuga6vw","annotations":[{"segment":[2.702990101404056,47.03202776443057],"label":"Using the pommel horse"}]},"Tko7eefi1BI":{"duration":62.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tko7eefi1BI","annotations":[{"segment":[33.26647912012481,61.640829061733896],"label":"Playing accordion"}]},"XkTWEDKtP1A":{"duration":46.74,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=XkTWEDKtP1A","annotations":[{"segment":[5.104358065522621,42.220333141965675],"label":"Long jump"}]},"0dkIuFFlLRM":{"duration":94.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0dkIuFFlLRM","annotations":[{"segment":[20.223209048361934,32.17999687987519],"label":"Spinning"},{"segment":[44.43201404056162,45.76054602184087],"label":"Spinning"},{"segment":[57.56971918876755,62.29338845553822],"label":"Spinning"}]},"gVKgXyKh4BQ":{"duration":6.5,"subset":"validation","resolution":"1920x1440","url":"https://www.youtube.com/watch?v=gVKgXyKh4BQ","annotations":[{"segment":[1.8662901840873636,6.085728861154446],"label":"Triple jump"}]},"bqpG1zi2APg":{"duration":120.535,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=bqpG1zi2APg","annotations":[]},"1DmdX5QwqFI":{"duration":121.65,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1DmdX5QwqFI","annotations":[{"segment":[4.8790179899999995,93.941379859276],"label":"Washing hands"}]},"gdisMpHS668":{"duration":27.098,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gdisMpHS668","annotations":[{"segment":[0.01,24.906270626006414],"label":"Decorating the Christmas tree"}]},"naCGjbEz1T8":{"duration":105.651,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=naCGjbEz1T8","annotations":[{"segment":[3.790902053042122,86.3666380780031],"label":"Making a lemonade"}]},"3OcAjx8e4LU":{"duration":187.478,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=3OcAjx8e4LU","annotations":[{"segment":[19.840853582554516,170.9997289719626],"label":"River tubing"}]},"YB5psJsxEJI":{"duration":140.365,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YB5psJsxEJI","annotations":[]},"ys3NrLuOxMs":{"duration":54.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ys3NrLuOxMs","annotations":[{"segment":[11.644315132605305,17.302191887675505],"label":"Layup drill in basketball"},{"segment":[20.76263026521061,27.345903276131043],"label":"Layup drill in basketball"},{"segment":[32.83196411856474,34.09797815912636],"label":"Layup drill in basketball"},{"segment":[41.94726521060842,46.42051482059283],"label":"Layup drill in basketball"}]},"P6z57_sYXm8":{"duration":81.67,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=P6z57_sYXm8","annotations":[{"segment":[1.09103104,11.134977808112323],"label":"Windsurfing"},{"segment":[11.791464959999999,81.571666],"label":"Windsurfing"}]},"-uR5-jYe0Ag":{"duration":33.28,"subset":"validation","resolution":"810x480","url":"https://www.youtube.com/watch?v=-uR5-jYe0Ag","annotations":[{"segment":[3.9013074038320523,28.98652531037411],"label":"Putting on makeup"}]},"s2ra7HNzIF0":{"duration":60.581,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s2ra7HNzIF0","annotations":[{"segment":[8.201443925233644,39.700562305295946],"label":"Having an ice cream"}]},"g0upuaWM74M":{"duration":86.17,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g0upuaWM74M","annotations":[{"segment":[13.30851507529689,68.6934454024961],"label":"Hand car wash"}]},"zI6PsewSm7w":{"duration":134.444,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=zI6PsewSm7w","annotations":[{"segment":[17.195822152886112,120.79016536661466],"label":"Baton twirling"}]},"m_BaRLpmkvc":{"duration":183.647,"subset":"testing","resolution":"640x356","url":"https://www.youtube.com/watch?v=m_BaRLpmkvc","annotations":[]},"S-8ogZOLiWU":{"duration":61.162,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=S-8ogZOLiWU","annotations":[{"segment":[11.545280227535631,31.296296024960995],"label":"Playing ice hockey"}]},"OBb4013eIc8":{"duration":163.167,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OBb4013eIc8","annotations":[{"segment":[41.491667800312015,161.893870758912],"label":"Wakeboarding"}]},"x1Mb7cN8WgU":{"duration":233.477,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=x1Mb7cN8WgU","annotations":[{"segment":[0.2,232.02198172897195],"label":"Fixing the roof"}]},"Y9xPzIiy6mI":{"duration":49.575,"subset":"validation","resolution":"426x228","url":"https://www.youtube.com/watch?v=Y9xPzIiy6mI","annotations":[{"segment":[18.935327613104523,21.022078003120125],"label":"Sumo"}]},"KZPFJiEHn6U":{"duration":108.6,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=KZPFJiEHn6U","annotations":[]},"xqYII12ta4E":{"duration":154.25,"subset":"training","resolution":"606x480","url":"https://www.youtube.com/watch?v=xqYII12ta4E","annotations":[{"segment":[2.8876784461778473,153.28759751794072],"label":"Cheerleading"}]},"t3zAh4NBVko":{"duration":24.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t3zAh4NBVko","annotations":[{"segment":[4.629445741029642,19.983774115444618],"label":"Arm wrestling"}]},"J3l1jj070N8":{"duration":48.391,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J3l1jj070N8","annotations":[]},"qi5Oie047pQ":{"duration":31.02,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qi5Oie047pQ","annotations":[{"segment":[7.319238689547582,27.01376911076443],"label":"Doing nails"}]},"iOgmO3MMeH4":{"duration":104.89,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iOgmO3MMeH4","annotations":[{"segment":[35.67045156908317,54.32380662090485],"label":"Playing piano"},{"segment":[69.54101751170047,70.19552120592824],"label":"Playing piano"}]},"nEv28s46Kvo":{"duration":226.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nEv28s46Kvo","annotations":[{"segment":[66.48403822152886,75.6542503900156],"label":"Doing fencing"},{"segment":[131.38092433697346,160.6550631825273],"label":"Doing fencing"},{"segment":[175.11578237129484,212.85473244929796],"label":"Doing fencing"}]},"aWVEB3Ovj_U":{"duration":46.348,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aWVEB3Ovj_U","annotations":[]},"4pSWTD4vAz0":{"duration":169.135,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4pSWTD4vAz0","annotations":[{"segment":[22.649479750778813,133.26321806853582],"label":"Waterskiing"}]},"sGGnEgCnEt8":{"duration":203.523,"subset":"validation","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=sGGnEgCnEt8","annotations":[{"segment":[16.510438267870853,195.90269877847115],"label":"Arm wrestling"}]},"UoO7VCPs8Jw":{"duration":12.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UoO7VCPs8Jw","annotations":[{"segment":[0.01,12.492175],"label":"Shot put"}]},"KbbEbeCJTJg":{"duration":154.14,"subset":"validation","resolution":"426x238","url":"https://www.youtube.com/watch?v=KbbEbeCJTJg","annotations":[{"segment":[6.130581903276131,9.255976599063962],"label":"Shot put"},{"segment":[25.363780031201248,31.374154446177847],"label":"Shot put"},{"segment":[41.23116848673947,43.87573322932917],"label":"Shot put"},{"segment":[51.08818252730109,54.93482215288612],"label":"Shot put"},{"segment":[61.6664414976599,65.0322511700468],"label":"Shot put"},{"segment":[72.00428549141965,80.89963962558502],"label":"Shot put"},{"segment":[87.15042901716068,97.00744305772231],"label":"Shot put"}]},"1VmqefkNV_8":{"duration":171.82999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1VmqefkNV_8","annotations":[{"segment":[97.02334165366614,124.20171040999999],"label":"Doing kickboxing"}]},"Fk0z6MeCV0I":{"duration":67.83,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fk0z6MeCV0I","annotations":[{"segment":[7.031605529641186,59.56730625],"label":"Hopscotch"}]},"Xc70KHd4zhI":{"duration":60.953,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Xc70KHd4zhI","annotations":[{"segment":[0.01,60.95238095238095],"label":"Having an ice cream"}]},"Pc1uz-MzWRU":{"duration":194.65,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Pc1uz-MzWRU","annotations":[]},"hcsH2gglpuw":{"duration":44.72,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hcsH2gglpuw","annotations":[]},"Ti3z17QofU8":{"duration":24.126,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ti3z17QofU8","annotations":[{"segment":[10.312629475819032,24.125531],"label":"Fixing the roof"}]},"fwqDDFkOaCc":{"duration":162.98,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fwqDDFkOaCc","annotations":[{"segment":[8.390580978159127,81.87172954446177],"label":"Playing violin"},{"segment":[87.21119016692668,98.65289150078003],"label":"Playing violin"},{"segment":[102.21253191575663,153.57305790327612],"label":"Playing violin"}]},"r9vcB6tc1mM":{"duration":183.089,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r9vcB6tc1mM","annotations":[{"segment":[28.79616978193146,163.93859034267913],"label":"Capoeira"}]},"T35cHr4pjkc":{"duration":6.64,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=T35cHr4pjkc","annotations":[]},"M0ygYlmTHy4":{"duration":202.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M0ygYlmTHy4","annotations":[{"segment":[18.32724576466052,196.54391147618693],"label":"Playing water polo"}]},"vFVg-ImCW9w":{"duration":199.901,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vFVg-ImCW9w","annotations":[{"segment":[43.686021693291195,182.77441942784003],"label":"Making a cake"}]},"eI_LceS_qnQ":{"duration":148.21,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eI_LceS_qnQ","annotations":[{"segment":[0,148.01297052154197],"label":"Zumba"}]},"kYZSCs-ILNQ":{"duration":112.8,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kYZSCs-ILNQ","annotations":[{"segment":[35.72367976623827,76.1987840577223],"label":"Tennis serve with ball bouncing"}]},"li_mry59KCM":{"duration":101.24,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=li_mry59KCM","annotations":[]},"5-KmeeiTUgo":{"duration":133.19,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=5-KmeeiTUgo","annotations":[]},"t_2jTjX3a_o":{"duration":213.76,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=t_2jTjX3a_o","annotations":[{"segment":[7.166778471138844,191.50298751950075],"label":"Dodgeball"}]},"BtKLAioC21g":{"duration":207.261,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BtKLAioC21g","annotations":[{"segment":[13.25693353992663,136.77275335095035],"label":"Skiing"},{"segment":[150.67636657575144,190.44716719344774],"label":"Skiing"}]},"SfiAcQAPpQ8":{"duration":82.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SfiAcQAPpQ8","annotations":[{"segment":[34.36607074871836,82.241],"label":"Mixing drinks"}]},"ZHOPn9lONHA":{"duration":179.282,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=ZHOPn9lONHA","annotations":[{"segment":[98.37334230640086,102.28288932197718],"label":"Doing crunches"}]},"V90CMuokf0c":{"duration":151.139,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V90CMuokf0c","annotations":[]},"sDdgFSFaAdY":{"duration":167.57999999999998,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=sDdgFSFaAdY","annotations":[{"segment":[54.37786395265334,157.64351763650546],"label":"Polishing forniture"}]},"1xtKGlUp4Vg":{"duration":138.554,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1xtKGlUp4Vg","annotations":[]},"VJ0LBlxsutw":{"duration":87.633,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=VJ0LBlxsutw","annotations":[]},"uXcCES4BsQ0":{"duration":50.574,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=uXcCES4BsQ0","annotations":[{"segment":[19.329796723868956,50.57306],"label":"Playing rubik cube"}]},"yUCSKSMVrPo":{"duration":162.354,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=yUCSKSMVrPo","annotations":[{"segment":[10.381187207488297,73.68110920436817],"label":"Playing drums"},{"segment":[79.50470202808111,146.87391575663025],"label":"Playing drums"}]},"UgsqMLsPG0k":{"duration":157.966,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=UgsqMLsPG0k","annotations":[{"segment":[17.940830218068538,150.1622912772586],"label":"Plastering"}]},"P5D9S9NzQ28":{"duration":59.89,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=P5D9S9NzQ28","annotations":[]},"pw9Ve02R5nc":{"duration":38.731,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pw9Ve02R5nc","annotations":[]},"cICxG-28hK0":{"duration":25.06,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=cICxG-28hK0","annotations":[{"segment":[3.009589205928237,20.4808408299532],"label":"Dodgeball"}]},"pMHaNlUQb9s":{"duration":222.401,"subset":"testing","resolution":"300x240","url":"https://www.youtube.com/watch?v=pMHaNlUQb9s","annotations":[]},"U6Xnw2RGjsI":{"duration":17.067,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=U6Xnw2RGjsI","annotations":[{"segment":[0.01,16.55438072581306],"label":"Kneeling"}]},"sx4zAnVDV9Y":{"duration":150.234,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sx4zAnVDV9Y","annotations":[{"segment":[28.738211223123255,33.177447653828246],"label":"Drum corps"},{"segment":[45.5605808552685,49.29888521796744],"label":"Drum corps"},{"segment":[56.074561875359265,62.616594510082415],"label":"Drum corps"},{"segment":[69.15862714480556,76.86887989287213],"label":"Drum corps"},{"segment":[80.84082827823975,83.1772685049266],"label":"Drum corps"},{"segment":[90.88752125299317,96.96226584237894],"label":"Drum corps"},{"segment":[107.94353490780709,118.69115870548171],"label":"Drum corps"},{"segment":[128.9714955950618,146.96208515182695],"label":"Drum corps"}]},"4BIKUGTeGQw":{"duration":34.436,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4BIKUGTeGQw","annotations":[]},"tLifahMJYkA":{"duration":216.92000000000002,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tLifahMJYkA","annotations":[]},"tF4Tl56ntnE":{"duration":89.072,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=tF4Tl56ntnE","annotations":[{"segment":[9.310089524180967,79.76106547581904],"label":"Putting in contact lenses"}]},"4Gs7Xyn16QI":{"duration":87.47,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=4Gs7Xyn16QI","annotations":[{"segment":[9.552059034742342,13.509340634849885],"label":"Javelin throw"},{"segment":[37.52594620791635,42.71134911700467],"label":"Javelin throw"},{"segment":[56.2206895725429,60.17797112012481],"label":"Javelin throw"},{"segment":[70.13940398127924,74.09668552886114],"label":"Javelin throw"}]},"Aj0Pd6snB-k":{"duration":106.301,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=Aj0Pd6snB-k","annotations":[{"segment":[26.036270707971177,94.19491568234157],"label":"Baking cookies"}]},"C6OmnTWhsEc":{"duration":56.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C6OmnTWhsEc","annotations":[{"segment":[5.94052599,43.10041197],"label":"Playing flauta"}]},"Y2nPNns1RFY":{"duration":60.163,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y2nPNns1RFY","annotations":[]},"QGY7zA03RkE":{"duration":76.44,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=QGY7zA03RkE","annotations":[{"segment":[10.2131484,21.34586393447738],"label":"Ping-pong"}]},"7oeFpnRCJkY":{"duration":7.059,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7oeFpnRCJkY","annotations":[{"segment":[0.01,6.5332483032915825],"label":"Kneeling"}]},"n1KeC6NXPUA":{"duration":89.235,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=n1KeC6NXPUA","annotations":[{"segment":[9.045343213728549,43.69596567862714],"label":"Using the monkey bar"}]},"PjxH86Lw23o":{"duration":136.116,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PjxH86Lw23o","annotations":[]},"AzUK4Nwd-sY":{"duration":90.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AzUK4Nwd-sY","annotations":[{"segment":[5.903157269147909,44.97643633636502],"label":"Horseback riding"}]},"xmK2kbPSGUs":{"duration":88.515,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xmK2kbPSGUs","annotations":[{"segment":[26.082884555382215,77.69663494539782],"label":"Making a lemonade"}]},"koEfnIoZB_4":{"duration":95.47999999999999,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=koEfnIoZB_4","annotations":[{"segment":[3.2770202454356676,13.405991913145913],"label":"Bungee jumping"},{"segment":[21.002720663928596,32.17438059155019],"label":"Bungee jumping"},{"segment":[36.04722269979234,39.324242945228015],"label":"Bungee jumping"},{"segment":[51.538591132760956,52.879190039001564],"label":"Bungee jumping"},{"segment":[52.13441271450858,53.326056433697346],"label":"Bungee jumping"},{"segment":[66.1362264149766,75.07355430889235],"label":"Bungee jumping"},{"segment":[83.11714941341653,88.77745707956318],"label":"Bungee jumping"}]},"fFmfx_332I8":{"duration":61.719,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fFmfx_332I8","annotations":[]},"x7M1yRFtnxY":{"duration":184.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x7M1yRFtnxY","annotations":[{"segment":[36.17261466458658,159.04467082683306],"label":"Paintball"}]},"-2veyKpAwXU":{"duration":158.69,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-2veyKpAwXU","annotations":[]},"pEr9p5-qUsU":{"duration":119.862,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pEr9p5-qUsU","annotations":[{"segment":[13.32143860244751,92.42913953702696],"label":"Beer pong"}]},"AJGg44cliq8":{"duration":136.789,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AJGg44cliq8","annotations":[]},"ij87GXkcdho":{"duration":203.872,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ij87GXkcdho","annotations":[{"segment":[7.315182378723721,14.948416165218037],"label":"Elliptical trainer"},{"segment":[27.3524210682713,43.89109427234232],"label":"Elliptical trainer"},{"segment":[60.429767476413346,70.92546393284303],"label":"Elliptical trainer"},{"segment":[74.74208082609019,90.32659980684942],"label":"Elliptical trainer"},{"segment":[100.18619344773792,109.4096842730852],"label":"Elliptical trainer"},{"segment":[121.8136891761385,135.48989971027413],"label":"Elliptical trainer"},{"segment":[181.92540524478122,190.51279196411855],"label":"Elliptical trainer"}]},"6VUsbs84lCc":{"duration":17.857,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=6VUsbs84lCc","annotations":[{"segment":[0.2,17.106352148579884],"label":"Knitting"}]},"Dbas61NkTV0":{"duration":153.46,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Dbas61NkTV0","annotations":[]},"gd7SO0TQ-sY":{"duration":120.275,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gd7SO0TQ-sY","annotations":[{"segment":[0.2,9.929739875389409],"label":"Playing beach volleyball"},{"segment":[20.98360124610592,62.95080373831775],"label":"Playing beach volleyball"},{"segment":[104.16859190031153,120.281],"label":"Playing beach volleyball"}]},"dTkMZlj7jFU":{"duration":193.817,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dTkMZlj7jFU","annotations":[{"segment":[8.767003120124805,171.71233697347893],"label":"Hanging wallpaper"}]},"uM3RiCL0g2U":{"duration":190.404,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=uM3RiCL0g2U","annotations":[{"segment":[106.61403900156006,141.37497035881435],"label":"Disc dog"},{"segment":[164.24445085803433,178.7977566302652],"label":"Disc dog"}]},"ez8ram5yd70":{"duration":98.267,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=ez8ram5yd70","annotations":[{"segment":[0,98.06687074829932],"label":"Rope skipping"}]},"_xtOX9UCWmU":{"duration":89.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_xtOX9UCWmU","annotations":[{"segment":[9.63759,85.41131825273011],"label":"Playing guitarra"}]},"HRjSOacCdWI":{"duration":19.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HRjSOacCdWI","annotations":[{"segment":[0,4.638299531981279],"label":"Clean and jerk"},{"segment":[5.2185127499999995,14.780714508580344],"label":"Clean and jerk"}]},"x4c_wI6kQyE":{"duration":70.36,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x4c_wI6kQyE","annotations":[{"segment":[35.78191359126365,41.489458090483616],"label":"Smoking a cigarette"},{"segment":[43.57490704212168,54.221672742589696],"label":"Smoking a cigarette"}]},"mpC_UTM1tWQ":{"duration":16.95,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mpC_UTM1tWQ","annotations":[{"segment":[8.726500797191887,14.491280277061422],"label":"Discus throw"}]},"5qsXmDi8d74":{"duration":186.596,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5qsXmDi8d74","annotations":[{"segment":[61.402645865834636,173.44250858034323],"label":"Sumo"}]},"Yp9WuBrgbI4":{"duration":64.854,"subset":"training","resolution":"324x240","url":"https://www.youtube.com/watch?v=Yp9WuBrgbI4","annotations":[{"segment":[2.8543124461188865,45.65982023428087],"label":"Doing crunches"}]},"oLrkd6M2WRY":{"duration":100.195,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oLrkd6M2WRY","annotations":[{"segment":[77.68559936193448,98.31839436349453],"label":"Shuffleboard"}]},"aXXes0e-eM0":{"duration":46.928,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aXXes0e-eM0","annotations":[{"segment":[11.76843000543935,22.65970929906542],"label":"Hitting a pinata"}]},"B9sTAMQfQ1I":{"duration":120.21,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=B9sTAMQfQ1I","annotations":[]},"XxMzH75Bhr8":{"duration":157.455,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XxMzH75Bhr8","annotations":[{"segment":[0.01,157.454534],"label":"Futsal"}]},"8ATFQNEWHL0":{"duration":59.42,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=8ATFQNEWHL0","annotations":[{"segment":[10.753052765054603,59.04909147706709],"label":"Preparing salad"}]},"WgJy8seQ7z4":{"duration":56.448,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=WgJy8seQ7z4","annotations":[{"segment":[0,55.628817472698906],"label":"Arm wrestling"}]},"Igpy_o0h3Cs":{"duration":150.88,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Igpy_o0h3Cs","annotations":[{"segment":[32.00795007800312,136.03378783151325],"label":"Removing curlers"}]},"BrvdzlErtco":{"duration":39.66,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=BrvdzlErtco","annotations":[]},"5bKIKQaOOYY":{"duration":59.653,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=5bKIKQaOOYY","annotations":[]},"8olnQLX0hO8":{"duration":117.702,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8olnQLX0hO8","annotations":[{"segment":[4.40691856474259,110.54020733229329],"label":"River tubing"}]},"KzogfJrOqJE":{"duration":222.68,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=KzogfJrOqJE","annotations":[{"segment":[93.44890671817349,222.67936507936508],"label":"Hand washing clothes"}]},"-Fk62y-1WHo":{"duration":120.56,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-Fk62y-1WHo","annotations":[]},"WROGzgOpPXc":{"duration":209.77,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WROGzgOpPXc","annotations":[{"segment":[23.229627145085804,90.9554414976599],"label":"Rafting"},{"segment":[118.11120280811234,144.61260842433697],"label":"Rafting"}]},"f5Ja983oTMw":{"duration":36.02,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=f5Ja983oTMw","annotations":[{"segment":[2.00025774,13.209195021840873],"label":"Using the pommel horse"},{"segment":[13.78878228,34.43327158970359],"label":"Using the pommel horse"}]},"IssfjAvSwo0":{"duration":33.53,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=IssfjAvSwo0","annotations":[]},"au3iN1kGMWY":{"duration":224.03,"subset":"testing","resolution":"320x180","url":"https://www.youtube.com/watch?v=au3iN1kGMWY","annotations":[]},"XHMrvd3sKzE":{"duration":88.38,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=XHMrvd3sKzE","annotations":[{"segment":[2.4116146645865837,84.40651326053042],"label":"Rock climbing"}]},"FKAlXR9NkKw":{"duration":49.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FKAlXR9NkKw","annotations":[{"segment":[18.12710608424337,46.264703588143526],"label":"Cleaning windows"}]},"Ub88_ql0B78":{"duration":74.907,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ub88_ql0B78","annotations":[{"segment":[0.01,67.94105185847624],"label":"Baton twirling"}]},"MVxXCu4zxSM":{"duration":87.89,"subset":"validation","resolution":"160x120","url":"https://www.youtube.com/watch?v=MVxXCu4zxSM","annotations":[{"segment":[6.0328412592286,14.396552784711389],"label":"Shaving"},{"segment":[24.679804773790952,30.30131586115445],"label":"Shaving"},{"segment":[36.197047001560065,39.62479766458659],"label":"Shaving"},{"segment":[42.22988816848674,44.14942853978159],"label":"Shaving"},{"segment":[51.96470005148206,55.666670767550706],"label":"Shaving"},{"segment":[61.425291881435264,77.7413850374415],"label":"Shaving"}]},"0PmrImNqA2w":{"duration":54.289,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=0PmrImNqA2w","annotations":[{"segment":[1.0131543067589919,4.221480357760004],"label":"Painting fence"},{"segment":[19.503243916475352,41.792667007640276],"label":"Painting fence"},{"segment":[50.40448956559036,52.008652591090865],"label":"Painting fence"}]},"hKLDr9ZaDb8":{"duration":148.515,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hKLDr9ZaDb8","annotations":[{"segment":[6.716870514820593,134.56721372854915],"label":"Hula hoop"}]},"LHu41OIGw7Q":{"duration":99.846,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LHu41OIGw7Q","annotations":[{"segment":[0.01,99.701],"label":"Canoeing"}]},"ymmBQHiNK24":{"duration":132.17,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ymmBQHiNK24","annotations":[{"segment":[5.153705148205928,129.04877691107643],"label":"Playing bagpipes"}]},"2EfjOGiEeJs":{"duration":88.19,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=2EfjOGiEeJs","annotations":[{"segment":[30.357849584278153,68.27081916666667],"label":"Beach soccer"}]},"I4mFeQwqjnE":{"duration":51.92,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=I4mFeQwqjnE","annotations":[{"segment":[5.507874502340094,50.704844683307336],"label":"Dodgeball"}]},"GSrBZXLSwWI":{"duration":237.169,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GSrBZXLSwWI","annotations":[]},"pZZVzBD8bZY":{"duration":136.46,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pZZVzBD8bZY","annotations":[{"segment":[14.046468018720748,116.20259906396255],"label":"Doing motocross"}]},"5sYPXv0RkiM":{"duration":50.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5sYPXv0RkiM","annotations":[{"segment":[8.027138238689547,47.924399583463334],"label":"Chopping wood"}]},"A5-gsreb2zY":{"duration":157.87,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=A5-gsreb2zY","annotations":[{"segment":[4.493383177353119,153.7586485324933],"label":"Getting a piercing"}]},"IK9kE9IrcOM":{"duration":116.147,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IK9kE9IrcOM","annotations":[{"segment":[0.2,12.857645865834634],"label":"Mooping floor"},{"segment":[77.70254134165367,115.881],"label":"Mooping floor"}]},"hYRNSJwhVPw":{"duration":234.549,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hYRNSJwhVPw","annotations":[{"segment":[52.70696244539873,110.43830739019519],"label":"Doing crunches"}]},"28GYivx4lyk":{"duration":11.053,"subset":"training","resolution":"128x96","url":"https://www.youtube.com/watch?v=28GYivx4lyk","annotations":[{"segment":[0.19999999999999998,10.852698412698414],"label":"Table soccer"}]},"Ar3Bk2xWK4I":{"duration":163.399,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ar3Bk2xWK4I","annotations":[]},"SJHbdA5ZG4o":{"duration":147.54,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SJHbdA5ZG4o","annotations":[]},"16bJAOf1SJI":{"duration":159.893,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=16bJAOf1SJI","annotations":[]},"1gradpCDbPU":{"duration":60.21,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1gradpCDbPU","annotations":[{"segment":[6.011541067082684,17.56497155538222],"label":"Washing hands"},{"segment":[23.8583036099844,29.775914347893917],"label":"Washing hands"}]},"nlHN3bO_MOQ":{"duration":126.247,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nlHN3bO_MOQ","annotations":[{"segment":[10.433062402496098,111.81093291731669],"label":"Drum corps"}]},"2cYfV33Qw4Y":{"duration":165.419,"subset":"training","resolution":"1916x1080","url":"https://www.youtube.com/watch?v=2cYfV33Qw4Y","annotations":[{"segment":[0.2,137.00048517940718],"label":"Canoeing"}]},"q0KrlywYHM8":{"duration":37.69,"subset":"validation","resolution":"480x270","url":"https://www.youtube.com/watch?v=q0KrlywYHM8","annotations":[{"segment":[14.381904102931513,31.730511712051822],"label":"Playing water polo"}]},"jvz9ra2l7Qc":{"duration":29.885,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jvz9ra2l7Qc","annotations":[]},"T3rh5gQVFKA":{"duration":196.186,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T3rh5gQVFKA","annotations":[{"segment":[35.80108736349454,186.67427457098285],"label":"Longboarding"}]},"zNjk-Xw_AVU":{"duration":221.054,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zNjk-Xw_AVU","annotations":[]},"Wrbf7c58IuU":{"duration":104.4,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Wrbf7c58IuU","annotations":[{"segment":[5.537433927289064,18.07809311556136],"label":"Playing water polo"},{"segment":[20.03248156048691,94.78783948829954],"label":"Playing water polo"}]},"nUghBtcrTPA":{"duration":128.64,"subset":"training","resolution":"854x428","url":"https://www.youtube.com/watch?v=nUghBtcrTPA","annotations":[{"segment":[9.432115664586583,116.79768759126365],"label":"Tai chi"}]},"phg81-nhqH4":{"duration":27.957,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=phg81-nhqH4","annotations":[{"segment":[0,27.921],"label":"Kneeling"}]},"WRc1Jv1j3nk":{"duration":38.94,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=WRc1Jv1j3nk","annotations":[{"segment":[6.276188019999999,22.83110387],"label":"Archery"},{"segment":[26.85048153822153,32.317774159126365],"label":"Archery"}]},"6F9C3dIU4kU":{"duration":64.389,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6F9C3dIU4kU","annotations":[{"segment":[0.2,62.83536037441497],"label":"Swinging at the playground"}]},"mRe2xY_amWw":{"duration":180.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mRe2xY_amWw","annotations":[{"segment":[11.550460258970357,18.029986745709827],"label":"Tumbling"},{"segment":[26.19982448985959,30.70732117628705],"label":"Tumbling"},{"segment":[38.03200329173166,43.666374149765986],"label":"Tumbling"},{"segment":[51.55449335101404,61.69636089547582],"label":"Tumbling"},{"segment":[69.86619863962558,74.93713241185647],"label":"Tumbling"},{"segment":[84.79728141341653,88.17790392823711],"label":"Tumbling"},{"segment":[97.47461584399376,104.51757941653666],"label":"Tumbling"},{"segment":[112.68741716068642,120.01209927613104],"label":"Tumbling"},{"segment":[141.70442707956317,151.56457608112325],"label":"Tumbling"}]},"vJiOYQE9tts":{"duration":136.32999999999998,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vJiOYQE9tts","annotations":[{"segment":[12.329388455538222,16.36832605304212],"label":"Grooming horse"},{"segment":[46.55407020280811,70.14996879875194],"label":"Grooming horse"},{"segment":[88.23184867394696,110.34296255850235],"label":"Grooming horse"}]},"E7OCzz_XuiI":{"duration":165.907,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E7OCzz_XuiI","annotations":[{"segment":[27.694233429201113,36.49427021978838],"label":"Rock-paper-scissors"}]},"UALnEw4XhTY":{"duration":237.4,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=UALnEw4XhTY","annotations":[{"segment":[26.483666926677063,34.262086583463336],"label":"Rock climbing"},{"segment":[49.44852496099844,57.96774648985959],"label":"Rock climbing"},{"segment":[70.5613783151326,92.41503354134166],"label":"Rock climbing"},{"segment":[105.00866536661466,153.53118798751947],"label":"Rock climbing"},{"segment":[163.90241419656786,217.24014898595942],"label":"Rock climbing"}]},"2rA5pyel_NE":{"duration":115.055,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2rA5pyel_NE","annotations":[{"segment":[0.01,110.28780317003086],"label":"Having an ice cream"}]},"oZgyW8Yca_M":{"duration":97.013,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=oZgyW8Yca_M","annotations":[{"segment":[26.704974052282378,34.5505034023316],"label":"Building sandcastles"},{"segment":[81.321930338568,88.26220659362242],"label":"Building sandcastles"}]},"keFBEoBy0zY":{"duration":212.602,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=keFBEoBy0zY","annotations":[{"segment":[64.33445665418633,87.51533238973497],"label":"Snowboarding"},{"segment":[92.48266290449537,134.8705499637843],"label":"Snowboarding"},{"segment":[155.07102739047667,170.9664842080179],"label":"Snowboarding"}]},"tV_1whw_S4g":{"duration":40.357,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=tV_1whw_S4g","annotations":[{"segment":[0.01,13.15795243232941],"label":"Doing a powerbomb"}]},"AdP2aMo6OgY":{"duration":49.53,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=AdP2aMo6OgY","annotations":[{"segment":[4.404220431669267,48.13735664789392],"label":"Plataform diving"}]},"tzP0lKSUv_o":{"duration":19.74,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tzP0lKSUv_o","annotations":[]},"tlNpR7CkM-4":{"duration":100.25999999999999,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=tlNpR7CkM-4","annotations":[{"segment":[9.725386352301303,97.9637447557281],"label":"Playing bagpipes"}]},"-U4lNtzVQ8s":{"duration":89.19,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=-U4lNtzVQ8s","annotations":[{"segment":[6.21365829,39.40549221],"label":"Shaving legs"},{"segment":[44.95292361,65.74687266],"label":"Shaving legs"}]},"NVWk38z7f4U":{"duration":226.12,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=NVWk38z7f4U","annotations":[{"segment":[114.64535642650195,121.70045528351748],"label":"Getting a tattoo"},{"segment":[126.9917794262791,128.75555414053298],"label":"Getting a tattoo"},{"segment":[158.03421439714734,162.26727371135664],"label":"Getting a tattoo"}]},"l-R4wiSZcRc":{"duration":169.228,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=l-R4wiSZcRc","annotations":[{"segment":[12.672226856421196,59.92907284182524],"label":"Swimming"},{"segment":[64.68115791298318,94.24968724463263],"label":"Swimming"},{"segment":[101.641819577545,111.14598971986089],"label":"Swimming"}]},"UCFV1JYhq18":{"duration":86.541,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UCFV1JYhq18","annotations":[{"segment":[3.3752250770302923,73.579905850234],"label":"Rope skipping"}]},"X1lLinc_yvo":{"duration":201.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X1lLinc_yvo","annotations":[{"segment":[44.02008270807023,165.38973931746386],"label":"Making a cake"}]},"NbrC_xwOMQ4":{"duration":48.65,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NbrC_xwOMQ4","annotations":[]},"cB5ozEChN5Q":{"duration":113.012,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cB5ozEChN5Q","annotations":[{"segment":[21.50921271680799,45.310390723111915],"label":"Snow tubing"},{"segment":[49.18910121302811,78.80834495420632],"label":"Snow tubing"},{"segment":[88.85773122353464,109.13280878446021],"label":"Snow tubing"}]},"1AEZ9fDDkz4":{"duration":94.622,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=1AEZ9fDDkz4","annotations":[{"segment":[54.49777258566978,94.581],"label":"BMX"}]},"MRt7pz2OY5M":{"duration":45.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MRt7pz2OY5M","annotations":[{"segment":[2.9485834633385335,40.24498127925117],"label":"Hand car wash"}]},"CvmhLCrOjhM":{"duration":31.278,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CvmhLCrOjhM","annotations":[{"segment":[0.2,31.077209],"label":"Playing rubik cube"}]},"otrk9uW9UjU":{"duration":101.1,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=otrk9uW9UjU","annotations":[{"segment":[1.9729792764464953,94.84270089386484],"label":"Doing crunches"}]},"Lh-cI4M0dNw":{"duration":51.061,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Lh-cI4M0dNw","annotations":[{"segment":[4.460839455850234,50.34375957316693],"label":"Calf roping"}]},"t6d__c9sIUU":{"duration":427.062,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=t6d__c9sIUU","annotations":[{"segment":[18.226609223121475,396.0628669676698],"label":"Removing ice from car"}]},"9XMFp9AEiYw":{"duration":100.85,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=9XMFp9AEiYw","annotations":[]},"ooCciCGrdcA":{"duration":183.79,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ooCciCGrdcA","annotations":[{"segment":[11.038626422776911,163.28565578627146],"label":"Playing badminton"}]},"z6U8CyJRNXw":{"duration":166.324,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z6U8CyJRNXw","annotations":[{"segment":[3.880030119528982,5.690712683897394],"label":"Wakeboarding"},{"segment":[9.312077812634218,11.12276037700263],"label":"Wakeboarding"},{"segment":[12.674774003604124,16.296139132340947],"label":"Wakeboarding"},{"segment":[20.952180012145437,158.30538596637786],"label":"Wakeboarding"}]},"ywl6JVt7-7I":{"duration":34.552,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=ywl6JVt7-7I","annotations":[{"segment":[0.01,34.55102040816327],"label":"Cutting the grass"}]},"PMMuX5drbQ4":{"duration":114.5,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PMMuX5drbQ4","annotations":[]},"xbWAmySRE88":{"duration":145.311,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=xbWAmySRE88","annotations":[{"segment":[0.01,145.31013605442178],"label":"Braiding hair"}]},"NMUdgvut9E8":{"duration":104.84,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NMUdgvut9E8","annotations":[]},"hSq0yL5AB40":{"duration":190.543,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hSq0yL5AB40","annotations":[{"segment":[13.376650004775701,174.78822672906918],"label":"BMX"}]},"G1Lht9z3V7o":{"duration":110.99000000000001,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=G1Lht9z3V7o","annotations":[]},"fQB76oAKOQc":{"duration":154.41,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=fQB76oAKOQc","annotations":[{"segment":[3.9735140405616227,91.18955072],"label":"Getting a tattoo"}]},"ekbZecn088U":{"duration":220.45,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=ekbZecn088U","annotations":[{"segment":[26.475627145085806,67.73634477379096],"label":"Smoking a cigarette"},{"segment":[116.2176879875195,136.17272386895476],"label":"Smoking a cigarette"},{"segment":[144.0817457098284,194.28684087363493],"label":"Smoking a cigarette"}]},"BT9O3WA5Xys":{"duration":48.37,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BT9O3WA5Xys","annotations":[]},"eeoQE0dbA6U":{"duration":201.016,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=eeoQE0dbA6U","annotations":[{"segment":[0.01,184.70814507644306],"label":"Baton twirling"}]},"E5zIMqTj4nc":{"duration":103.956,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=E5zIMqTj4nc","annotations":[{"segment":[16.704275986005428,61.140893657515015],"label":"Snowboarding"},{"segment":[61.30307109427235,92.27896137160687],"label":"Snowboarding"}]},"HYAlS44yzdo":{"duration":116.68,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HYAlS44yzdo","annotations":[{"segment":[7.099111719570824,18.202850563002112],"label":"Shot put"},{"segment":[95.01887903588144,109.94521635569423],"label":"Shot put"}]},"vijDPbwU6vA":{"duration":93.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vijDPbwU6vA","annotations":[{"segment":[63.462894817472694,67.0934952074883],"label":"Chopping wood"},{"segment":[79.00186448673948,82.48724086115445],"label":"Chopping wood"}]},"6rMF2jWbeUQ":{"duration":133.074,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6rMF2jWbeUQ","annotations":[{"segment":[13.909404876875348,106.0851625684075],"label":"Putting on shoes"}]},"IB068eD8A7Y":{"duration":225.79,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IB068eD8A7Y","annotations":[{"segment":[0.01,225.790838],"label":"Archery"}]},"H-yBxySDyTw":{"duration":106.67,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=H-yBxySDyTw","annotations":[{"segment":[0,0],"label":"Smoking a cigarette"}]},"euyYRNOSPE0":{"duration":180.7,"subset":"validation","resolution":"534x360","url":"https://www.youtube.com/watch?v=euyYRNOSPE0","annotations":[{"segment":[6.201793470378272,178.72441108580344],"label":"Belly dance"}]},"hLyHeVmMDGU":{"duration":232.07999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hLyHeVmMDGU","annotations":[{"segment":[19.90648205928237,186.39705928237132],"label":"Pole vault"}]},"-YwrMtiqHKg":{"duration":150.63,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=-YwrMtiqHKg","annotations":[]},"ripbruSSD8w":{"duration":6.78,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=ripbruSSD8w","annotations":[{"segment":[2.6867038221528863,4.167564196567863],"label":"Chopping wood"}]},"QinlLV8QKBU":{"duration":219.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QinlLV8QKBU","annotations":[{"segment":[5.146788075604657,40.48806619475663],"label":"Preparing pasta"},{"segment":[45.63485427036129,99.16145025664972],"label":"Preparing pasta"},{"segment":[130.04217871027765,182.19629669110768],"label":"Preparing pasta"}]},"TMnmi7WJr_o":{"duration":154.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TMnmi7WJr_o","annotations":[{"segment":[57.02270206006241,78.96378403666147],"label":"Getting a tattoo"}]},"QHn9KyE-zZo":{"duration":196.279,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QHn9KyE-zZo","annotations":[{"segment":[0.01,7.961349719294894],"label":"Slacklining"},{"segment":[13.779259129548853,18.37234550606514],"label":"Slacklining"},{"segment":[33.070221910917255,35.21366221995819],"label":"Slacklining"},{"segment":[43.4812176976875,46.543275282031686],"label":"Slacklining"},{"segment":[54.50462500132658,64.30320927122798],"label":"Slacklining"},{"segment":[68.89629564774427,72.26455899052287],"label":"Slacklining"},{"segment":[77.470056883908,79.61349719294894],"label":"Slacklining"},{"segment":[90.02449297971918,94.31137359780105],"label":"Slacklining"},{"segment":[100.12928292823713,107.478221124805],"label":"Slacklining"},{"segment":[115.43957083775352,123.70712630889236],"label":"Slacklining"},{"segment":[128.3002126817473,130.44365298907957],"label":"Slacklining"},{"segment":[134.1181220873635,148.20358696411859],"label":"Slacklining"},{"segment":[151.57185030421218,161.676640324493],"label":"Slacklining"},{"segment":[164.73869790639625,166.57593245553824],"label":"Slacklining"},{"segment":[169.6379900374415,172.08763610296413],"label":"Slacklining"},{"segment":[178.51795702496102,184.9482779469579],"label":"Slacklining"},{"segment":[191.07239311076444,196.277891],"label":"Slacklining"}]},"VWmSZPIoBlw":{"duration":18.646,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VWmSZPIoBlw","annotations":[{"segment":[0.01,18.601],"label":"Removing ice from car"}]},"WFqm48C2BPk":{"duration":133.93,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=WFqm48C2BPk","annotations":[{"segment":[10.029281628407993,38.445579575563976],"label":"Sailing"},{"segment":[57.877312730604466,96.32289230616844],"label":"Sailing"},{"segment":[112.82941712948517,116.79934106552261],"label":"Sailing"}]},"krqxyYFHzBI":{"duration":63.693,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=krqxyYFHzBI","annotations":[{"segment":[16.599457160652612,62.114097347568496],"label":"Doing a powerbomb"}]},"9AOVI0OCZqg":{"duration":193.121,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9AOVI0OCZqg","annotations":[{"segment":[18.98060842433697,125.33227150038383],"label":"Braiding hair"}]},"u_RzyIJi8qc":{"duration":163.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u_RzyIJi8qc","annotations":[{"segment":[34.21771737184865,93.23132262114663],"label":"Snatch"},{"segment":[120.5709003800312,144.2272162773791],"label":"Snatch"}]},"m34BYRanODU":{"duration":114.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m34BYRanODU","annotations":[{"segment":[26.50410140405616,108.15096411856474],"label":"Doing step aerobics"}]},"9PxPcJS47js":{"duration":225.188,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9PxPcJS47js","annotations":[{"segment":[19.316154446177848,209.33546957878315],"label":"Slacklining"}]},"tM0BAYylvLA":{"duration":52.52,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=tM0BAYylvLA","annotations":[{"segment":[0,52.501],"label":"Getting a haircut"}]},"ddzfgHw_qlU":{"duration":59.82,"subset":"testing","resolution":"1016x720","url":"https://www.youtube.com/watch?v=ddzfgHw_qlU","annotations":[]},"HK9tOFFF5II":{"duration":141.27,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HK9tOFFF5II","annotations":[{"segment":[18.4555128,94.91083680000001],"label":"Getting a tattoo"}]},"syy0FVZ7SkQ":{"duration":192.308,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=syy0FVZ7SkQ","annotations":[{"segment":[0.01,181.20722067394695],"label":"Mooping floor"}]},"a0--cEyaRNM":{"duration":50.16,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=a0--cEyaRNM","annotations":[]},"m9eV0XgWzmQ":{"duration":220.52,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m9eV0XgWzmQ","annotations":[]},"YvIfwsbuiKI":{"duration":71.332,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=YvIfwsbuiKI","annotations":[{"segment":[0,71.301],"label":"Kneeling"}]},"RI8IORq_BbY":{"duration":114.288,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RI8IORq_BbY","annotations":[{"segment":[27.713690327613104,37.515960218408736],"label":"Sharpening knives"}]},"s5Ng5ZZ9f9k":{"duration":41.936,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=s5Ng5ZZ9f9k","annotations":[]},"zi_TdMQffkU":{"duration":43.097,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=zi_TdMQffkU","annotations":[{"segment":[9.270542901716068,17.26470670826833],"label":"Calf roping"}]},"RZpbFG3j-Bo":{"duration":160.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RZpbFG3j-Bo","annotations":[{"segment":[0.01,152.07123462714506],"label":"Putting in contact lenses"}]},"xv8OYJ7t7-E":{"duration":94.622,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xv8OYJ7t7-E","annotations":[{"segment":[79.56433952886115,80.892872099844],"label":"Having an ice cream"}]},"zIvC3Yytcv0":{"duration":188.129,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zIvC3Yytcv0","annotations":[{"segment":[72.18560543144677,186.44041851555892],"label":"Drum corps"}]},"BIy2GQjWUsA":{"duration":53.546,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BIy2GQjWUsA","annotations":[{"segment":[30.072055731669266,53.5449659],"label":"Grooming dog"}]},"sf2zGT5nN04":{"duration":225.93,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sf2zGT5nN04","annotations":[{"segment":[63.09126069110765,219.23331927301092],"label":"Preparing salad"}]},"xbQQhK7wQZQ":{"duration":217.804,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xbQQhK7wQZQ","annotations":[{"segment":[5.435531981279251,25.47905616224649],"label":"Making a cake"},{"segment":[35.33095787831513,187.1861326053042],"label":"Making a cake"}]},"GQVqSLs2Ulg":{"duration":85.427,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GQVqSLs2Ulg","annotations":[{"segment":[6.7868190327613105,80.66228705148205],"label":"Painting fence"}]},"yVp99wxlW90":{"duration":60.512,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=yVp99wxlW90","annotations":[{"segment":[0.01,2.83203856474259],"label":"Croquet"},{"segment":[6.513688698907956,7.835306695787832],"label":"Croquet"},{"segment":[8.968122121684868,11.611358115444618],"label":"Croquet"},{"segment":[12.744173541341652,16.3314223900156],"label":"Croquet"},{"segment":[19.918671238689548,25.582748368174727],"label":"Croquet"},{"segment":[0.2,60.311223999999996],"label":"Croquet"},{"segment":[28.131583076443057,30.208411357254292],"label":"Croquet"},{"segment":[37.85491548205928,40.403750190327614],"label":"Croquet"}]},"D-wP7_1A_Kw":{"duration":74.537,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D-wP7_1A_Kw","annotations":[{"segment":[7.128500104567875,66.57136594880464],"label":"Playing beach volleyball"}]},"Z8yKTJeRMOE":{"duration":65.829,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z8yKTJeRMOE","annotations":[{"segment":[11.896236002032882,55.20181225864252],"label":"Painting furniture"}]},"5Qm6ZTr9DJY":{"duration":33.72,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5Qm6ZTr9DJY","annotations":[]},"LyJxXonLzT4":{"duration":70.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LyJxXonLzT4","annotations":[{"segment":[9.665035881435257,64.9095023400936],"label":"Mixing drinks"}]},"3zMCxmdkcRY":{"duration":202.48,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3zMCxmdkcRY","annotations":[{"segment":[20.05351794071763,193.74540561622464],"label":"Ballet"}]},"7hbcifblzUY":{"duration":234.383,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7hbcifblzUY","annotations":[]},"GYh4XH7jUL4":{"duration":7.99,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=GYh4XH7jUL4","annotations":[{"segment":[1.7587059282371296,6.6805967238689545],"label":"Springboard diving"}]},"wHvOS2brBsI":{"duration":195.164,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wHvOS2brBsI","annotations":[]},"Q2jdtN4-RE0":{"duration":231.23,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Q2jdtN4-RE0","annotations":[{"segment":[6.8537626099844,219.3204035195008],"label":"Tai chi"}]},"EJr0qHpy77g":{"duration":51.967,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=EJr0qHpy77g","annotations":[{"segment":[17.59299920654297,47.95748518785138],"label":"Hitting a pinata"}]},"p1yEPOr467M":{"duration":72.098,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=p1yEPOr467M","annotations":[{"segment":[0.009999999999999998,64.44950157098285],"label":"Waterskiing"}]},"rSxO9uspxT8":{"duration":70.427,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rSxO9uspxT8","annotations":[{"segment":[17.579057223088924,53.835862745709825],"label":"Trimming branches or hedges"}]},"_i6yjCO8nzQ":{"duration":187.87,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_i6yjCO8nzQ","annotations":[{"segment":[23.74053806233882,184.06244220904836],"label":"Tumbling"}]},"wCGpPDYfs-s":{"duration":197.811,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wCGpPDYfs-s","annotations":[{"segment":[121.61269786309137,196.9102203707944],"label":"Playing drums"}]},"xXj-oQm-NbE":{"duration":221.82,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=xXj-oQm-NbE","annotations":[{"segment":[84.76515847893916,132.8564116567863],"label":"Wrapping presents"}]},"zJNYqTVWqyY":{"duration":203.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zJNYqTVWqyY","annotations":[{"segment":[11.442040179566366,178.94079407644304],"label":"Bathing dog"}]},"8K4cX9GfaII":{"duration":68.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8K4cX9GfaII","annotations":[{"segment":[1.5969032761310453,64.19551170046802],"label":"Hanging wallpaper"}]},"8J_erRJya-k":{"duration":118.747,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8J_erRJya-k","annotations":[{"segment":[5.372322299694709,115.96805962870515],"label":"Longboarding"}]},"bFi3hnk5yAM":{"duration":146.03,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bFi3hnk5yAM","annotations":[]},"58DHmEWMpug":{"duration":32.067,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=58DHmEWMpug","annotations":[]},"G3xayqdY0TY":{"duration":179.003,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=G3xayqdY0TY","annotations":[{"segment":[0.01,170.33584995781968],"label":"Wakeboarding"}]},"xuF9WjXsGKo":{"duration":18.16,"subset":"testing","resolution":"786x480","url":"https://www.youtube.com/watch?v=xuF9WjXsGKo","annotations":[]},"OHJzD35IfgI":{"duration":128.848,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=OHJzD35IfgI","annotations":[]},"rxfkWIGZtlQ":{"duration":121.743,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=rxfkWIGZtlQ","annotations":[{"segment":[0.01,121.701],"label":"Blow-drying hair"}]},"vvoqG7UQsdc":{"duration":49.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vvoqG7UQsdc","annotations":[{"segment":[7.9910314,42.4021714],"label":"Pole vault"}]},"oFtwNVuasmU":{"duration":178.237,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oFtwNVuasmU","annotations":[{"segment":[16.213404539165147,18.712045584368276],"label":"Waterskiing"},{"segment":[30.92762402758357,59.24555587321902],"label":"Waterskiing"},{"segment":[64.24283796362528,69.79537361963223],"label":"Waterskiing"},{"segment":[104.22109468687532,107.5526160804795],"label":"Waterskiing"},{"segment":[117.547180261292,119.49056669652205],"label":"Waterskiing"},{"segment":[130.87326469184913,137.81393420119494],"label":"Waterskiing"},{"segment":[143.08884302829773,149.19663219652205],"label":"Waterskiing"},{"segment":[163.63322477596128,164.18847833670895],"label":"Waterskiing"}]},"OzXD3WO6jrs":{"duration":65.97,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OzXD3WO6jrs","annotations":[{"segment":[10.242696089999999,59.28686427457099],"label":"Using parallel bars"}]},"OiL6Aj0gC14":{"duration":152.46,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=OiL6Aj0gC14","annotations":[{"segment":[1.9028049285236717,20.930854213760387],"label":"Ping-pong"},{"segment":[41.623857811455316,57.559849087841066],"label":"Ping-pong"},{"segment":[61.60330956095387,70.16593173931038],"label":"Ping-pong"},{"segment":[100.37295997962367,105.12997230093285],"label":"Ping-pong"}]},"70bS0DkAeDo":{"duration":32.88,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=70bS0DkAeDo","annotations":[{"segment":[3.30335313,23.3966964],"label":"Clean and jerk"}]},"z08lVo_kgDI":{"duration":65.667,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z08lVo_kgDI","annotations":[]},"cms1BP4eKOA":{"duration":55.75,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=cms1BP4eKOA","annotations":[]},"XiG7rgPoKKI":{"duration":184.506,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=XiG7rgPoKKI","annotations":[{"segment":[6.401007153607809,175.38758919026722],"label":"Canoeing"}]},"sObM6mb3hgs":{"duration":233.291,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=sObM6mb3hgs","annotations":[{"segment":[27.9368499774521,225.30892009943557],"label":"Rollerblading"}]},"wdU4BKY6Ajg":{"duration":85.73,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wdU4BKY6Ajg","annotations":[]},"XORmEz1vOeQ":{"duration":71.797,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XORmEz1vOeQ","annotations":[{"segment":[7.052953198127924,71.741],"label":"Spread mulch"}]},"tkHJgKq3jTs":{"duration":107.021,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=tkHJgKq3jTs","annotations":[{"segment":[0.01,106.38110679632406],"label":"Decorating the Christmas tree"}]},"W5g1IIAOZqs":{"duration":33.0,"subset":"training","resolution":"312x224","url":"https://www.youtube.com/watch?v=W5g1IIAOZqs","annotations":[{"segment":[11.50634635,18.736777341653667],"label":"Ping-pong"}]},"U0mKdo2-Gfc":{"duration":104.769,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=U0mKdo2-Gfc","annotations":[{"segment":[22.704273010920435,32.01557566302652],"label":"Running a marathon"}]},"Pt0CjFgzpxM":{"duration":173.477,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Pt0CjFgzpxM","annotations":[{"segment":[9.982301332177505,151.08353781225424],"label":"Laying tile"}]},"CMYeHWoB1FM":{"duration":13.793,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=CMYeHWoB1FM","annotations":[{"segment":[0,13.792652],"label":"Peeling potatoes"}]},"AtUAYnNTniM":{"duration":72.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AtUAYnNTniM","annotations":[{"segment":[2.533184750780031,70.19736409383775],"label":"Cheerleading"}]},"Odofg9Xz5ig":{"duration":230.226,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Odofg9Xz5ig","annotations":[]},"lbtW7nHTnwA":{"duration":168.37,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lbtW7nHTnwA","annotations":[{"segment":[0.3337780099885055,168.367073],"label":"Ping-pong"}]},"eCunwZEbEMs":{"duration":168.32,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eCunwZEbEMs","annotations":[]},"CBW_uJJpmZY":{"duration":115.938,"subset":"validation","resolution":"562x480","url":"https://www.youtube.com/watch?v=CBW_uJJpmZY","annotations":[{"segment":[38.5251649739459,109.24506728237128],"label":"Rope skipping"}]},"padyJHC5Y5Y":{"duration":148.84,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=padyJHC5Y5Y","annotations":[{"segment":[31.081519500780033,59.84352262090484],"label":"Blow-drying hair"},{"segment":[69.35353978159127,95.3321232449298],"label":"Blow-drying hair"}]},"wBiRGLLjFVU":{"duration":227.672,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wBiRGLLjFVU","annotations":[{"segment":[4.262174111454254,215.2397918845554],"label":"Slacklining"}]},"bPw2rvCjXUE":{"duration":175.892,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=bPw2rvCjXUE","annotations":[{"segment":[0.01,31.616572516181623],"label":"Arm wrestling"},{"segment":[74.35647933861154,134.63070690870498],"label":"Arm wrestling"},{"segment":[157.91847665169564,169.6993484040321],"label":"Arm wrestling"}]},"XgbTh1BCciA":{"duration":64.95,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=XgbTh1BCciA","annotations":[{"segment":[7.668174219999999,64.60039488377535],"label":"Playing bagpipes"}]},"r6r4Q9jLFvs":{"duration":54.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r6r4Q9jLFvs","annotations":[{"segment":[8.065072431468687,13.271098022152179],"label":"Playing squash"}]},"yVE4t-X5b-M":{"duration":114.64,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yVE4t-X5b-M","annotations":[{"segment":[1.6095622043681748,109.45022989703588],"label":"Tumbling"}]},"YDNgm6ufrJc":{"duration":61.16,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=YDNgm6ufrJc","annotations":[{"segment":[0.01,57.440153070202804],"label":"Washing dishes"}]},"fXgszraP2tc":{"duration":217.06,"subset":"training","resolution":"2560x1080","url":"https://www.youtube.com/watch?v=fXgszraP2tc","annotations":[{"segment":[25.058424321372858,37.58763648205929],"label":"Walking the dog"},{"segment":[198.4356304368175,210.62621524180966],"label":"Walking the dog"}]},"Mm6iYEyY9dI":{"duration":233.825,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mm6iYEyY9dI","annotations":[{"segment":[5.561104755520449,10.294663945551601],"label":"Hula hoop"}]},"JapT-ASJlX0":{"duration":211.32999999999998,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=JapT-ASJlX0","annotations":[]},"PcGzPNEnVD0":{"duration":202.339,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PcGzPNEnVD0","annotations":[{"segment":[60.796007980266474,166.21351896999082],"label":"Changing car wheel"}]},"QMHF20eV9N0":{"duration":34.81,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QMHF20eV9N0","annotations":[{"segment":[2.9843291731669264,10.092458658346333],"label":"Javelin throw"},{"segment":[17.146327613104525,21.16160686427457],"label":"Javelin throw"}]},"rcrhyi2EzZc":{"duration":90.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rcrhyi2EzZc","annotations":[{"segment":[0,89.20413131825272],"label":"Washing dishes"}]},"mpj1rR1wXKU":{"duration":195.814,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mpj1rR1wXKU","annotations":[{"segment":[185.78073088268624,195.55700000000002],"label":"Playing ten pins"}]},"_vbwjI1QA7g":{"duration":108.74,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=_vbwjI1QA7g","annotations":[{"segment":[70.26622274143303,76.53092211838005],"label":"Throwing darts"}]},"fsoAdigH0X4":{"duration":16.86,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=fsoAdigH0X4","annotations":[{"segment":[0.14464473166926678,16.791938394695787],"label":"Mowing the lawn"}]},"CNH37tJNzFE":{"duration":129.986,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CNH37tJNzFE","annotations":[{"segment":[49.682128541341655,129.37631860648574],"label":"Changing car wheel"}]},"618D4x4bwfc":{"duration":65.71,"subset":"testing","resolution":"562x480","url":"https://www.youtube.com/watch?v=618D4x4bwfc","annotations":[]},"kzBTJEMgoj0":{"duration":176.983,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=kzBTJEMgoj0","annotations":[{"segment":[0.2,10.491941941623244],"label":"Mooping floor"},{"segment":[14.357394235905492,29.819203413034483],"label":"Mooping floor"},{"segment":[42.519975237104724,55.772954531786716],"label":"Mooping floor"}]},"jkn6uvmqwh4":{"duration":73.236,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jkn6uvmqwh4","annotations":[{"segment":[34.15934579439252,64.78889252336448],"label":"Drum corps"}]},"pGNZ3GqYrVY":{"duration":152.672,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=pGNZ3GqYrVY","annotations":[]},"7ih5UMIU7zE":{"duration":166.42000000000002,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7ih5UMIU7zE","annotations":[{"segment":[22.58707504218536,166.41741496598638],"label":"Cheerleading"}]},"IDr50VT8BK8":{"duration":238.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IDr50VT8BK8","annotations":[{"segment":[117.6811809672387,194.5266209048362],"label":"Brushing hair"}]},"GIXToy60QnQ":{"duration":98.96000000000001,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GIXToy60QnQ","annotations":[]},"iGtvLylVVQQ":{"duration":94.25,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iGtvLylVVQQ","annotations":[]},"3YdmY4oPE5s":{"duration":61.533,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=3YdmY4oPE5s","annotations":[{"segment":[0.01,61.15689758181632],"label":"Croquet"}]},"nXNczyQpljQ":{"duration":204.34,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nXNczyQpljQ","annotations":[{"segment":[20.879846894556053,25.66149122155362],"label":"Playing saxophone"},{"segment":[47.65705512574245,52.11992316427351],"label":"Playing saxophone"},{"segment":[76.98447366466088,122.88825920383753],"label":"Playing saxophone"},{"segment":[149.98424372349044,192.38149008953556],"label":"Playing saxophone"}]},"Nh-RdjyfGNA":{"duration":31.185,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=Nh-RdjyfGNA","annotations":[{"segment":[7.686638131045242,10.313716985959438],"label":"Beach soccer"},{"segment":[15.713823521060842,21.162579664586584],"label":"Beach soccer"}]},"c3yTMTETJEY":{"duration":36.13,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=c3yTMTETJEY","annotations":[]},"_G4EVIG9YaE":{"duration":55.937,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=_G4EVIG9YaE","annotations":[]},"_LKFGLH8XxI":{"duration":39.196,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_LKFGLH8XxI","annotations":[{"segment":[0.2,4.1437566302652105],"label":"Blow-drying hair"},{"segment":[11.273455538221528,21.632847113884555],"label":"Blow-drying hair"},{"segment":[25.654728549141964,32.11411388455538],"label":"Blow-drying hair"}]},"cEVHZc_uT7c":{"duration":138.0,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=cEVHZc_uT7c","annotations":[{"segment":[37.02861897333035,74.48780328355991],"label":"Hand washing clothes"}]},"F035AUh0mSA":{"duration":36.967,"subset":"testing","resolution":"328x240","url":"https://www.youtube.com/watch?v=F035AUh0mSA","annotations":[]},"_zHSfEhEqkU":{"duration":194.352,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=_zHSfEhEqkU","annotations":[{"segment":[5.457587279251171,66.40064523088924],"label":"Canoeing"},{"segment":[120.97651802340093,187.07396396099847],"label":"Canoeing"}]},"MZ4yIxM6snw":{"duration":115.706,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MZ4yIxM6snw","annotations":[{"segment":[1.6245636600974243,113.89996228237129],"label":"Plastering"}]},"65VsS25XcMY":{"duration":17.624,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=65VsS25XcMY","annotations":[]},"L2oaUDyzWPA":{"duration":92.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L2oaUDyzWPA","annotations":[{"segment":[11.630765990639626,22.830762870514818],"label":"Playing harmonica"},{"segment":[48.964088923556936,61.74357254290171],"label":"Playing harmonica"},{"segment":[68.49228861154447,88.30766770670827],"label":"Playing harmonica"}]},"bQVMoeBK7XU":{"duration":100.055,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bQVMoeBK7XU","annotations":[{"segment":[13.55153738317757,54.82920872274143],"label":"Applying sunscreen"}]},"VI2qAFwvPSc":{"duration":101.12,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=VI2qAFwvPSc","annotations":[{"segment":[0.01,82.59171918876754],"label":"Cumbia"}]},"LDIemY9nO-4":{"duration":132.702,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LDIemY9nO-4","annotations":[{"segment":[17.596994492024578,30.846496227196027],"label":"Rafting"},{"segment":[47.61539686077239,55.27526505141839],"label":"Rafting"},{"segment":[62.31406284822822,68.11071985736572],"label":"Rafting"},{"segment":[85.2936674201662,88.60604285395907],"label":"Rafting"},{"segment":[90.0552065522621,112.20671712948518],"label":"Rafting"},{"segment":[114.48397522620907,115.72611600624025],"label":"Rafting"},{"segment":[119.45253834633387,121.72979644305772],"label":"Rafting"}]},"PzAjZbCEbLU":{"duration":207.192,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PzAjZbCEbLU","annotations":[{"segment":[98.58573240249609,192.64621807176286],"label":"Fixing the roof"}]},"UmuWttAYfco":{"duration":157.316,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=UmuWttAYfco","annotations":[{"segment":[0.2,157.08100000000002],"label":"Arm wrestling"}]},"THPFtlx3gXw":{"duration":215.44,"subset":"training","resolution":"2048x1024","url":"https://www.youtube.com/watch?v=THPFtlx3gXw","annotations":[{"segment":[9.910419886115445,190.64960289391576],"label":"Doing motocross"}]},"ou45_7IdWCE":{"duration":61.998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ou45_7IdWCE","annotations":[{"segment":[0.01,50.65142589703588],"label":"Swinging at the playground"}]},"0dWysfs67aY":{"duration":139.74,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=0dWysfs67aY","annotations":[]},"BzhREL6ncq4":{"duration":34.67,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BzhREL6ncq4","annotations":[{"segment":[0.5941029641185647,33.431794071762866],"label":"Using uneven bars"}]},"7eo-KtL5ITg":{"duration":146.4,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7eo-KtL5ITg","annotations":[]},"nB90Q8sTBgE":{"duration":210.44,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nB90Q8sTBgE","annotations":[{"segment":[105.02639625585024,188.40900780031203],"label":"Painting"}]},"yRswMXWCcrI":{"duration":131.031,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yRswMXWCcrI","annotations":[{"segment":[3.492091768402909,127.77652653907373],"label":"Skiing"}]},"mGsenLq9yEU":{"duration":235.357,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mGsenLq9yEU","annotations":[{"segment":[14.07407906392653,16.639666291340863],"label":"Bullfighting"},{"segment":[115.23151831626298,136.1227285966368],"label":"Bullfighting"}]},"snG89ed-64M":{"duration":74.17,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=snG89ed-64M","annotations":[{"segment":[1.7355195432307087,73.00752211857181],"label":"Playing lacrosse"}]},"VLmZ3M3wSbo":{"duration":103.701,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VLmZ3M3wSbo","annotations":[{"segment":[0.2,5.805925110777852],"label":"Putting in contact lenses"},{"segment":[48.5440030687625,97.73311028562793],"label":"Putting in contact lenses"}]},"yEn-5y95DI0":{"duration":56.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yEn-5y95DI0","annotations":[{"segment":[14.82409984399376,38.309685300000005],"label":"Volleyball"},{"segment":[39.98626053042122,50.137297971918876],"label":"Volleyball"}]},"P6UwokP8BEg":{"duration":90.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P6UwokP8BEg","annotations":[{"segment":[22.663756756630264,28.294503776911075],"label":"Hammer throw"},{"segment":[42.79367735413417,53.91440271918877],"label":"Hammer throw"},{"segment":[63.627441329173166,75.17047272074883],"label":"Hammer throw"}]},"V9LudLaWGOM":{"duration":63.65,"subset":"validation","resolution":"642x480","url":"https://www.youtube.com/watch?v=V9LudLaWGOM","annotations":[{"segment":[2.083564731669267,62.60615931825273],"label":"Javelin throw"}]},"rZu5ZJmAlbI":{"duration":230.459,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rZu5ZJmAlbI","annotations":[{"segment":[32.25694695435954,197.4842241625038],"label":"Painting fence"}]},"FF7vKwGNjo4":{"duration":108.93,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FF7vKwGNjo4","annotations":[]},"xJ23geP1Hss":{"duration":191.263,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=xJ23geP1Hss","annotations":[{"segment":[7.827094443550394,166.31898064630687],"label":"Baton twirling"}]},"bY0bGFJ-k1U":{"duration":143.152,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bY0bGFJ-k1U","annotations":[{"segment":[0.01,136.22776466458657],"label":"Welding"}]},"RHb_nF11Scc":{"duration":30.54,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=RHb_nF11Scc","annotations":[{"segment":[1.4766948658346335,29.581532634945397],"label":"Doing nails"}]},"x2xC5lm0cZw":{"duration":158.55,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=x2xC5lm0cZw","annotations":[{"segment":[0.01,156.5671176552262],"label":"Mowing the lawn"}]},"FTCCoZdr5fo":{"duration":216.745,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FTCCoZdr5fo","annotations":[{"segment":[0.01,205.5827269890796],"label":"Rollerblading"}]},"SkytxdoXLs4":{"duration":166.836,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SkytxdoXLs4","annotations":[{"segment":[0.2,166.63537414965987],"label":"Futsal"}]},"7AkyOhKkT6g":{"duration":190.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7AkyOhKkT6g","annotations":[{"segment":[145.3550868174727,157.81409425897036],"label":"Removing curlers"}]},"OlR9S70AA74":{"duration":76.07,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=OlR9S70AA74","annotations":[]},"OysON6LbF6Y":{"duration":120.44,"subset":"testing","resolution":"568x320","url":"https://www.youtube.com/watch?v=OysON6LbF6Y","annotations":[]},"wAgOScz_TjA":{"duration":75.47,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wAgOScz_TjA","annotations":[]},"VVLeWYKoNUE":{"duration":17.996,"subset":"training","resolution":"624x360","url":"https://www.youtube.com/watch?v=VVLeWYKoNUE","annotations":[{"segment":[0,10.866995473432661],"label":"Waterskiing"}]},"-rKS00dzFxQ":{"duration":231.434,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-rKS00dzFxQ","annotations":[{"segment":[42.95528705148206,48.00885023400936],"label":"Peeling potatoes"}]},"o_AftOrP4zI":{"duration":158.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o_AftOrP4zI","annotations":[{"segment":[79.88604426898164,83.83713139258971],"label":"High jump"},{"segment":[116.92748664032761,124.82966102784712],"label":"High jump"}]},"TlnvPrGG2BM":{"duration":78.81,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=TlnvPrGG2BM","annotations":[]},"7WeokrKMOM8":{"duration":133.701,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7WeokrKMOM8","annotations":[]},"rprQvEVVpIc":{"duration":17.83,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=rprQvEVVpIc","annotations":[{"segment":[1.7607316799999997,17.04973868954758],"label":"Springboard diving"}]},"OhXBMlKOHMI":{"duration":42.284,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OhXBMlKOHMI","annotations":[{"segment":[0,42.283537],"label":"Shuffleboard"}]},"MJZjSHG6tBY":{"duration":224.12,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=MJZjSHG6tBY","annotations":[{"segment":[49.648827335406345,198.24670264],"label":"Drinking beer"}]},"AJ_QqCET1iY":{"duration":63.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AJ_QqCET1iY","annotations":[{"segment":[12.698953198127924,53.45373322932917],"label":"Using the pommel horse"}]},"BpsmWqCxTDA":{"duration":236.913,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BpsmWqCxTDA","annotations":[]},"-7eQ2bHNPUw":{"duration":197.46,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-7eQ2bHNPUw","annotations":[{"segment":[70.83748829953198,137.97910764430577],"label":"Washing hands"}]},"7uumfT4aGLU":{"duration":43.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7uumfT4aGLU","annotations":[{"segment":[5.30401344,40.19269110764431],"label":"Using parallel bars"}]},"L7kozALmUSY":{"duration":250.312,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=L7kozALmUSY","annotations":[]},"gCxLSh-cgng":{"duration":151.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gCxLSh-cgng","annotations":[{"segment":[72.83455226209048,88.20548049921996],"label":"Tennis serve with ball bouncing"}]},"_8aVDfNQtq0":{"duration":122.23,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_8aVDfNQtq0","annotations":[{"segment":[19.64067652229899,28.03086843473739],"label":"Beach soccer"},{"segment":[32.79802293044103,96.29652060062402],"label":"Beach soccer"}]},"kPnqo24kemc":{"duration":109.924,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=kPnqo24kemc","annotations":[{"segment":[0,109.881],"label":"Kneeling"}]},"xYM6h31PrM0":{"duration":115.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xYM6h31PrM0","annotations":[{"segment":[14.0018752,115.170974],"label":"Using the balance beam"}]},"ejUc7O8NbCE":{"duration":183.16,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ejUc7O8NbCE","annotations":[]},"sNQQ2Fpxbzw":{"duration":149.583,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=sNQQ2Fpxbzw","annotations":[{"segment":[26.625002348258878,121.45406591691126],"label":"Hurling"}]},"kj8L5yu-fGs":{"duration":229.48,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=kj8L5yu-fGs","annotations":[{"segment":[0.01,211.58243645664194],"label":"Preparing salad"}]},"kdOsmDACtqw":{"duration":131.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kdOsmDACtqw","annotations":[]},"djgnHazfi2Y":{"duration":46.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=djgnHazfi2Y","annotations":[{"segment":[7.197346333853354,41.861448720000006],"label":"Using the pommel horse"}]},"j28N0gvrNqA":{"duration":60.74,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j28N0gvrNqA","annotations":[]},"M_Jib8gkQpg":{"duration":8.62,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=M_Jib8gkQpg","annotations":[{"segment":[0.1747111170046802,5.671391644305772],"label":"Springboard diving"}]},"yLS0UuNYXOI":{"duration":132.238,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yLS0UuNYXOI","annotations":[{"segment":[39.3248769470405,114.0799906542056],"label":"Waterskiing"}]},"Mzt-E6pxuUI":{"duration":138.46,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Mzt-E6pxuUI","annotations":[{"segment":[71.0604412,86.0985818],"label":"Washing dishes"},{"segment":[30.073512,63.6334468],"label":"Washing dishes"}]},"XoeGnEtEq3U":{"duration":218.988,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XoeGnEtEq3U","annotations":[{"segment":[24.86170496999344,35.75998887868393],"label":"Knitting"},{"segment":[57.89712806821148,58.57827081250463],"label":"Knitting"},{"segment":[119.54054642674204,128.39540210255305],"label":"Knitting"},{"segment":[128.73597347469962,131.46054445187224],"label":"Knitting"}]},"-byL2szHXXE":{"duration":205.53,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-byL2szHXXE","annotations":[]},"gx6QvOXwetM":{"duration":18.019,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gx6QvOXwetM","annotations":[{"segment":[0.01,17.065907889075795],"label":"Cutting the grass"}]},"knnQ99kDt8w":{"duration":39.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=knnQ99kDt8w","annotations":[{"segment":[1.9175686427457097,35.82505226209049],"label":"Dodgeball"}]},"j30ZbXEi8sc":{"duration":207.517,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j30ZbXEi8sc","annotations":[{"segment":[0.2,207.26100000000002],"label":"Drum corps"}]},"MVB2uaxckNg":{"duration":234.615,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=MVB2uaxckNg","annotations":[]},"oflwFZj9PNY":{"duration":20.34,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oflwFZj9PNY","annotations":[]},"QF7wNOCJpMU":{"duration":87.725,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QF7wNOCJpMU","annotations":[{"segment":[2.1897038377535103,80.74532901716069],"label":"Canoeing"}]},"1tEJRzqqalM":{"duration":231.23,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1tEJRzqqalM","annotations":[{"segment":[13.521704368174726,204.62845943837755],"label":"Grooming horse"}]},"Fp62gCq6eHE":{"duration":51.32,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=Fp62gCq6eHE","annotations":[]},"BXq2SjN9BMY":{"duration":123.879,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BXq2SjN9BMY","annotations":[]},"f--wWfsr0zA":{"duration":104.49000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=f--wWfsr0zA","annotations":[{"segment":[8.485551690000001,59.97136973478939],"label":"Ballet"}]},"YAKXrBinGD4":{"duration":222.84,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YAKXrBinGD4","annotations":[]},"VE-zu8cdL44":{"duration":73.05,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=VE-zu8cdL44","annotations":[{"segment":[0.34188756318252733,33.39101867082683],"label":"Walking the dog"},{"segment":[37.83555699219969,72.36620087363495],"label":"Walking the dog"}]},"_yvbyKO1ymo":{"duration":70.403,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_yvbyKO1ymo","annotations":[]},"0fsMeZoZzJI":{"duration":59.93,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0fsMeZoZzJI","annotations":[{"segment":[0,59.9307029478458],"label":"Shaving legs"}]},"xC9s-_MNO_Q":{"duration":122.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xC9s-_MNO_Q","annotations":[{"segment":[0.2,122.09936507936507],"label":"Decorating the Christmas tree"}]},"LZ1142kuCnI":{"duration":14.745,"subset":"training","resolution":"204x360","url":"https://www.youtube.com/watch?v=LZ1142kuCnI","annotations":[{"segment":[0.0045861419023285836,14.72632608274158],"label":"Mooping floor"}]},"DsUbSyRX-B4":{"duration":29.513,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=DsUbSyRX-B4","annotations":[{"segment":[0.01,14.571824317870675],"label":"Swinging at the playground"}]},"_7l3OxQodF0":{"duration":179.17000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_7l3OxQodF0","annotations":[{"segment":[71.69377391830368,110.5453707005423],"label":"Rock climbing"}]},"vBaiedxy1Uw":{"duration":93.948,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vBaiedxy1Uw","annotations":[]},"tLdbdQ9fVZE":{"duration":56.077,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tLdbdQ9fVZE","annotations":[{"segment":[23.197924445326997,24.418867907224357],"label":"Spread mulch"},{"segment":[31.221267194938196,32.267790162278786],"label":"Spread mulch"}]},"sWxKUQuMxhA":{"duration":15.63,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sWxKUQuMxhA","annotations":[]},"2i_rotAjuoE":{"duration":195.14,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=2i_rotAjuoE","annotations":[{"segment":[27.971367599999997,157.0057412],"label":"Horseback riding"}]},"by4dS4tgkY0":{"duration":189.359,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=by4dS4tgkY0","annotations":[{"segment":[18.020097565807394,174.02195162955329],"label":"Running a marathon"}]},"YtFGG_7A_UU":{"duration":135.88,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YtFGG_7A_UU","annotations":[{"segment":[40.67704212168486,94.06565990639626],"label":"Polishing shoes"}]},"CnHtcWiHKw0":{"duration":71.541,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CnHtcWiHKw0","annotations":[]},"Xc2CUi0Rnpw":{"duration":214.251,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Xc2CUi0Rnpw","annotations":[{"segment":[8.52125663026521,117.79384165366615],"label":"Beach soccer"},{"segment":[143.52469500780032,205.01140951638064],"label":"Beach soccer"}]},"HQFey_iCEw8":{"duration":12.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HQFey_iCEw8","annotations":[{"segment":[0.020126365054602184,12.881],"label":"Tennis serve with ball bouncing"}]},"Rd9TrjbCkAE":{"duration":11.819,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Rd9TrjbCkAE","annotations":[{"segment":[0,11.818955],"label":"Painting fence"}]},"EK7qW-oavQY":{"duration":51.76,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EK7qW-oavQY","annotations":[]},"iFDr-o61ewo":{"duration":94.715,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iFDr-o61ewo","annotations":[{"segment":[3.83798127925117,87.70164430577223],"label":"Blowing leaves"}]},"9KNOOoIK0zw":{"duration":215.807,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=9KNOOoIK0zw","annotations":[{"segment":[65.65088987232959,148.80868267394695],"label":"Blow-drying hair"},{"segment":[169.0089563400936,201.32939420592822],"label":"Blow-drying hair"}]},"mSR-HpiJ0No":{"duration":174.82,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mSR-HpiJ0No","annotations":[]},"fJWi44w71_o":{"duration":180.094,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=fJWi44w71_o","annotations":[]},"TotbMcWIoyE":{"duration":196.79,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=TotbMcWIoyE","annotations":[{"segment":[14.12215170670827,165.78178090483618],"label":"Playing racquetball"}]},"hHWXldYW8rU":{"duration":8.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hHWXldYW8rU","annotations":[{"segment":[3.0100982839313573,8.281],"label":"Washing face"}]},"7EPzlmJ25dA":{"duration":191.542,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7EPzlmJ25dA","annotations":[{"segment":[15.833936037441497,168.5145772230889],"label":"Skiing"}]},"aTvt_fP243g":{"duration":45.256,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aTvt_fP243g","annotations":[{"segment":[0.01,0.02],"label":"Cleaning sink"},{"segment":[12.199333853354135,39.84175507020281],"label":"Cleaning sink"}]},"xlf4czJSWHM":{"duration":162.73,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xlf4czJSWHM","annotations":[]},"E2Vd-sOC_ik":{"duration":105.419,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=E2Vd-sOC_ik","annotations":[{"segment":[39.4604245495306,58.66537314766144],"label":"Powerbocking"},{"segment":[76.22887314766145,92.1509245495306],"label":"Powerbocking"}]},"mt7gThnTtUM":{"duration":152.138,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mt7gThnTtUM","annotations":[]},"Q_32kySHzCQ":{"duration":153.322,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=Q_32kySHzCQ","annotations":[{"segment":[89.42217472698908,108.78900936037441],"label":"Throwing darts"},{"segment":[122.67262558502341,129.36820748829953],"label":"Throwing darts"},{"segment":[136.06378939157568,140.60721996879874],"label":"Throwing darts"}]},"e2IL0BusPNM":{"duration":113.941,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e2IL0BusPNM","annotations":[{"segment":[0.01,112.02039185950021],"label":"Beer pong"}]},"gwZleaX_ZR8":{"duration":149.61,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=gwZleaX_ZR8","annotations":[{"segment":[12.603327708618549,31.04152935641235],"label":"Plataform diving"},{"segment":[38.510167998556675,49.01294108907213],"label":"Plataform diving"},{"segment":[54.38102511311336,71.65225197307213],"label":"Plataform diving"},{"segment":[62.31645367039172,64.4170082884948],"label":"Plataform diving"},{"segment":[67.45114273686593,71.65225197307213],"label":"Plataform diving"},{"segment":[73.98620154874222,77.48712591224738],"label":"Plataform diving"},{"segment":[78.88749565764942,84.02218472412365],"label":"Plataform diving"},{"segment":[109.46223509892775,121.36537793484527],"label":"Plataform diving"},{"segment":[131.1679661526597,136.30265521913392],"label":"Plataform diving"},{"segment":[137.00284009183497,142.3709241158762],"label":"Plataform diving"}]},"WElK913B33U":{"duration":169.111,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WElK913B33U","annotations":[{"segment":[0.2,49.84204212168487],"label":"Elliptical trainer"},{"segment":[52.742901716068644,101.2663712948518],"label":"Elliptical trainer"},{"segment":[119.74055226209049,130.29038065522622],"label":"Elliptical trainer"}]},"B92SbWzu8yI":{"duration":213.647,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B92SbWzu8yI","annotations":[{"segment":[21.33135003767498,187.6492192293292],"label":"Playing congas"}]},"FrV8r4l5ZUM":{"duration":147.563,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FrV8r4l5ZUM","annotations":[{"segment":[6.209620904836194,140.5404851794072],"label":"Rope skipping"}]},"5wkGeYBS7hQ":{"duration":189.313,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5wkGeYBS7hQ","annotations":[{"segment":[19.19386115444618,176.8788127925117],"label":"Beer pong"}]},"2N129Bw1jiQ":{"duration":17.97,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2N129Bw1jiQ","annotations":[{"segment":[4.34059731,13.84113159],"label":"Hopscotch"}]},"JqD5p8LDqWI":{"duration":202.153,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=JqD5p8LDqWI","annotations":[]},"4avZ79LB5n4":{"duration":12.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4avZ79LB5n4","annotations":[{"segment":[2.369162751950078,7.431476324492981],"label":"Plataform diving"}]},"UiK3kYsvvQ8":{"duration":217.71,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UiK3kYsvvQ8","annotations":[{"segment":[16.64244063095857,203.78498731786004],"label":"Belly dance"}]},"u9YrRYp2t3I":{"duration":117.633,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=u9YrRYp2t3I","annotations":[{"segment":[13.975289212225649,16.90693507513987],"label":"Canoeing"},{"segment":[34.13035451976092,36.32908891694658],"label":"Canoeing"},{"segment":[61.79776235101387,62.713901683174576],"label":"Canoeing"},{"segment":[74.99016873412786,80.30377600771213],"label":"Canoeing"}]},"8HxMXHc8Nr0":{"duration":182.811,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8HxMXHc8Nr0","annotations":[{"segment":[15.068359023726549,42.077689578135974],"label":"Assembling bicycle"},{"segment":[59.7048316241716,91.83171954678491],"label":"Assembling bicycle"}]},"PQgydNnSggw":{"duration":169.48,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PQgydNnSggw","annotations":[{"segment":[58.69008112324494,160.4724290171607],"label":"Getting a tattoo"}]},"x3l7GB-O0gw":{"duration":189.545,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=x3l7GB-O0gw","annotations":[]},"74qSBsksI-E":{"duration":59.03,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=74qSBsksI-E","annotations":[{"segment":[9.944950810277309,48.71184239478423],"label":"Shaving"}]},"134sVNOzn_w":{"duration":125.365,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=134sVNOzn_w","annotations":[{"segment":[22.874503900156007,97.96543057722307],"label":"Fixing the roof"}]},"_Iece35DRMQ":{"duration":180.025,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=_Iece35DRMQ","annotations":[]},"9bERRZ2eTbo":{"duration":113.894,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9bERRZ2eTbo","annotations":[{"segment":[0.01,72.98433140216241],"label":"Making a lemonade"}]},"Ivyz2upE-CI":{"duration":105.25999999999999,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ivyz2upE-CI","annotations":[]},"FRFjdn_pjYk":{"duration":58.352,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FRFjdn_pjYk","annotations":[{"segment":[0,58.101],"label":"Elliptical trainer"}]},"cc17BiSDn8w":{"duration":41.59,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=cc17BiSDn8w","annotations":[{"segment":[1.6676710399999999,37.46862914352574],"label":"Kayaking"}]},"-TWiYyvt2Ec":{"duration":123.21,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-TWiYyvt2Ec","annotations":[{"segment":[4.274312572135804,121.33813488195277],"label":"Windsurfing"}]},"dI1GZfJ-A0E":{"duration":38.778,"subset":"training","resolution":"320x180","url":"https://www.youtube.com/watch?v=dI1GZfJ-A0E","annotations":[{"segment":[12.945939769563573,23.351087463338533],"label":"Changing car wheel"}]},"NLCNBK2YJQU":{"duration":122.83,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NLCNBK2YJQU","annotations":[{"segment":[0.01,121.87524000551858],"label":"Sailing"}]},"wj_xmkHjItg":{"duration":212.393,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wj_xmkHjItg","annotations":[{"segment":[14.241720748829952,198.40917160686428],"label":"Skiing"}]},"vKNsvOvC5mA":{"duration":97.501,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vKNsvOvC5mA","annotations":[{"segment":[24.337120641288237,88.06995390327613],"label":"Canoeing"}]},"Epl3pExUuNs":{"duration":136.023,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Epl3pExUuNs","annotations":[{"segment":[5.304134165366614,124.11673946957877],"label":"Riding bumper cars"}]},"8VPjByN_v9w":{"duration":99.89,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8VPjByN_v9w","annotations":[{"segment":[0.01,98.72280897902584],"label":"Breakdancing"}]},"XumLmNQiRjk":{"duration":97.03999999999999,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=XumLmNQiRjk","annotations":[{"segment":[10.939838640000001,69.60780424],"label":"Volleyball"},{"segment":[16.1979285850234,18.771431257410296],"label":"Volleyball"},{"segment":[40.87327773790951,43.59816292043681],"label":"Volleyball"},{"segment":[50.56175838689547,59.03917895475819],"label":"Volleyball"},{"segment":[64.943096850234,70.09010219500779],"label":"Volleyball"}]},"bru03d1k6DY":{"duration":220.428,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bru03d1k6DY","annotations":[{"segment":[51.82945638629283,201.48272118380063],"label":"Ice fishing"}]},"lsvZBtYMXZM":{"duration":102.97999999999999,"subset":"training","resolution":"256x144","url":"https://www.youtube.com/watch?v=lsvZBtYMXZM","annotations":[{"segment":[8.125122,99.1247529625585],"label":"Tango"}]},"fLR-b-FDjlM":{"duration":178.1,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=fLR-b-FDjlM","annotations":[]},"a_n3mR1iVDI":{"duration":189.127,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=a_n3mR1iVDI","annotations":[{"segment":[25.078447737909517,34.51974570982839],"label":"Curling"},{"segment":[41.30567862714509,56.94282839313573],"label":"Curling"},{"segment":[69.62957254290171,83.49647893915757],"label":"Curling"},{"segment":[96.18322308892357,131.58809048361934],"label":"Curling"}]},"mEEovBtzwHc":{"duration":145.427,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mEEovBtzwHc","annotations":[{"segment":[0,117.95400936037441],"label":"Decorating the Christmas tree"}]},"-VexUX6OJBM":{"duration":126.549,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=-VexUX6OJBM","annotations":[{"segment":[4.440362714508581,6.216507800312012],"label":"Powerbocking"},{"segment":[8.190002340093605,10.163496879875197],"label":"Powerbocking"},{"segment":[34.83217862714508,35.62157644305773],"label":"Powerbocking"},{"segment":[42.13410842433697,48.44929095163807],"label":"Powerbocking"},{"segment":[50.620134945397815,56.73796801872075],"label":"Powerbocking"},{"segment":[60.487607644305776,61.67170436817473],"label":"Powerbocking"},{"segment":[71.73652652106084,75.48616614664586],"label":"Powerbocking"},{"segment":[78.6561911076443,81.22213962558502],"label":"Powerbocking"},{"segment":[84.57761076443059,92.67021762870515],"label":"Powerbocking"},{"segment":[94.64402418096725,106.09210218408737],"label":"Powerbocking"}]},"m9CbLJdYqHw":{"duration":43.004,"subset":"validation","resolution":"424x240","url":"https://www.youtube.com/watch?v=m9CbLJdYqHw","annotations":[{"segment":[0.01,0.02],"label":"Kneeling"},{"segment":[0,42.981],"label":"Kneeling"}]},"PU6f-DcDngM":{"duration":186.57,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=PU6f-DcDngM","annotations":[{"segment":[0,186.28127111479017],"label":"Playing polo"}]},"1buoiCgXG1Q":{"duration":22.83,"subset":"validation","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=1buoiCgXG1Q","annotations":[{"segment":[3.9740607499999996,15.2895545],"label":"Kayaking"}]},"B_PhHrBEeNI":{"duration":169.92000000000002,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B_PhHrBEeNI","annotations":[{"segment":[24.918597407176286,28.364786410296414],"label":"Playing harmonica"},{"segment":[31.280792489859596,41.3542680374415],"label":"Playing harmonica"},{"segment":[49.307011890795636,55.40411551170047],"label":"Playing harmonica"},{"segment":[84.5641763073323,92.51692016068644],"label":"Playing harmonica"},{"segment":[113.98932856474259,120.88170657098284],"label":"Playing harmonica"}]},"FmugQfHQ4p0":{"duration":82.13,"subset":"training","resolution":"410x240","url":"https://www.youtube.com/watch?v=FmugQfHQ4p0","annotations":[{"segment":[9.219706708268331,41.10452574102964],"label":"Vacuuming floor"}]},"2NAbdlHXcTI":{"duration":21.13,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=2NAbdlHXcTI","annotations":[{"segment":[0.8184364799999998,19.798380655226207],"label":"Discus throw"}]},"m_gr7WdjJmc":{"duration":186.689,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m_gr7WdjJmc","annotations":[{"segment":[15.43601727742579,64.65652472386896],"label":"Skiing"},{"segment":[90.86862934165367,116.20699713884554],"label":"Skiing"},{"segment":[140.67162811544463,162.22380302340093],"label":"Skiing"},{"segment":[176.78608336661466,186.688434],"label":"Skiing"}]},"flneQOXwGxw":{"duration":31.63,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=flneQOXwGxw","annotations":[{"segment":[6.093205429797192,15.270016846333853],"label":"Cricket"}]},"T0wmRC8Ka2Y":{"duration":49.343,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=T0wmRC8Ka2Y","annotations":[{"segment":[6.697018901164211,46.802153535101404],"label":"Playing beach volleyball"}]},"ffGPrFNbQkA":{"duration":153.97,"subset":"training","resolution":"1280x718","url":"https://www.youtube.com/watch?v=ffGPrFNbQkA","annotations":[{"segment":[19.937029631825276,124.90669166926676],"label":"Paintball"}]},"vtYbJD2J8Tw":{"duration":71.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vtYbJD2J8Tw","annotations":[{"segment":[31.067646801872076,71.12272],"label":"Removing curlers"}]},"PG3f3Wkkptk":{"duration":15.256,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=PG3f3Wkkptk","annotations":[{"segment":[0.2,15.001000000000001],"label":"Camel ride"}]},"uX-UwKfRvJY":{"duration":124.994,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uX-UwKfRvJY","annotations":[{"segment":[55.326026189928854,70.50662432076999],"label":"Doing crunches"}]},"xevpFDYTJ0U":{"duration":29.21,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xevpFDYTJ0U","annotations":[{"segment":[0.3189936146645866,18.182636035881433],"label":"Using uneven bars"}]},"JFfLwtTd8PI":{"duration":141.944,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JFfLwtTd8PI","annotations":[]},"yfikZ4E_uko":{"duration":125.85,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yfikZ4E_uko","annotations":[{"segment":[4.319418708721138,118.39133893759751],"label":"Preparing salad"}]},"-9l1Rh10bO8":{"duration":92.02000000000001,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-9l1Rh10bO8","annotations":[{"segment":[12.633104137285493,16.07849617472699],"label":"Doing karate"},{"segment":[19.523888212168487,23.83062825897036],"label":"Doing karate"},{"segment":[33.30545636193448,36.176616393135724],"label":"Doing karate"},{"segment":[41.34470444929797,42.493168461778474],"label":"Doing karate"},{"segment":[51.68088056162246,52.25511256786272],"label":"Doing karate"},{"segment":[54.839156595943834,61.29926666614664],"label":"Doing karate"},{"segment":[64.02686869578783,68.3336087425897],"label":"Doing karate"},{"segment":[80.536038875195,82.25873489391576],"label":"Doing karate"}]},"EFGtb9IDQao":{"duration":89.49,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EFGtb9IDQao","annotations":[{"segment":[29.56641683407365,76.36619253500822],"label":"Cleaning shoes"}]},"ZVNnoOcohaU":{"duration":57.911,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZVNnoOcohaU","annotations":[]},"w_OiqLfRJHI":{"duration":163.47,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w_OiqLfRJHI","annotations":[{"segment":[93.08267623221936,101.24334814820592],"label":"Paintball"},{"segment":[108.12891590639626,116.79963086115445],"label":"Paintball"}]},"jwNny3H4GpI":{"duration":129.41,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jwNny3H4GpI","annotations":[]},"aLb79wtvxoQ":{"duration":231.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=aLb79wtvxoQ","annotations":[]},"dJknA-jTNGc":{"duration":54.614,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dJknA-jTNGc","annotations":[{"segment":[0.2,38.851084043681745],"label":"Rafting"}]},"8GxWehFZVRE":{"duration":32.81,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8GxWehFZVRE","annotations":[{"segment":[0.01,19.348054379318032],"label":"Playing accordion"}]},"3iHHhCHcT8I":{"duration":183.3,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=3iHHhCHcT8I","annotations":[{"segment":[11.435257410296412,21.15522620904836],"label":"Smoking a cigarette"},{"segment":[32.87636505460219,40.30928237129485],"label":"Smoking a cigarette"},{"segment":[59.74921996879875,66.89625585023401],"label":"Smoking a cigarette"},{"segment":[76.33034321372855,90.33853354134165],"label":"Smoking a cigarette"},{"segment":[101.77379095163806,107.49141965678626],"label":"Smoking a cigarette"},{"segment":[122.35725429017161,128.07488299531983],"label":"Smoking a cigarette"},{"segment":[143.22659906396254,158.95007800312013],"label":"Smoking a cigarette"},{"segment":[168.95592823712948,175.81708268330735],"label":"Smoking a cigarette"}]},"S7LNJrgseNk":{"duration":187.409,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S7LNJrgseNk","annotations":[]},"lcVkXTsnR7A":{"duration":228.25,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=lcVkXTsnR7A","annotations":[{"segment":[36.426736680000005,77.40025320000001],"label":"Playing harmonica"},{"segment":[91.34188536,186.37335023400937],"label":"Playing harmonica"}]},"E_6XYa_WO8I":{"duration":131.43,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=E_6XYa_WO8I","annotations":[{"segment":[12.299625585023403,121.56129953198129],"label":"Chopping wood"}]},"Atdxx_Pxnd4":{"duration":41.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Atdxx_Pxnd4","annotations":[{"segment":[0.2,40.58149898909657],"label":"Calf roping"}]},"xkIhTMJ_ThA":{"duration":11.4,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xkIhTMJ_ThA","annotations":[{"segment":[0,11.400996],"label":"Playing racquetball"}]},"kcPbEkv5UXE":{"duration":213.345,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=kcPbEkv5UXE","annotations":[{"segment":[0.01,209.3507431575663],"label":"Playing drums"}]},"Quj1J31xQFM":{"duration":30.65,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=Quj1J31xQFM","annotations":[{"segment":[0.01,20.60888628549142],"label":"Washing face"}]},"BTtMoBOHrlQ":{"duration":190.03,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=BTtMoBOHrlQ","annotations":[{"segment":[21.229329173166928,173.66770670826833],"label":"Tai chi"}]},"di5f5hM05Vw":{"duration":167.02,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=di5f5hM05Vw","annotations":[]},"lidR3AK0ZzQ":{"duration":17.79,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lidR3AK0ZzQ","annotations":[{"segment":[4.317622464898596,13.672471138845554],"label":"Snatch"}]},"f1tPFhJ1chI":{"duration":71.84,"subset":"testing","resolution":"632x360","url":"https://www.youtube.com/watch?v=f1tPFhJ1chI","annotations":[]},"lJ02cAklA6o":{"duration":18.62,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=lJ02cAklA6o","annotations":[{"segment":[0.010000000000000002,18.56],"label":"Polishing shoes"}]},"cy3a8BJJ-YQ":{"duration":141.085,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=cy3a8BJJ-YQ","annotations":[{"segment":[0,141.084444],"label":"Playing blackjack"}]},"DXIHbnyfNGA":{"duration":220.683,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DXIHbnyfNGA","annotations":[{"segment":[43.72335572606578,87.79098977223089],"label":"Longboarding"},{"segment":[102.59496059656786,113.95614750826833],"label":"Longboarding"},{"segment":[121.185993724805,139.43274846177846],"label":"Longboarding"},{"segment":[151.1382137647426,169.0406901104524],"label":"Longboarding"},{"segment":[183.8446609347894,220.6824488],"label":"Longboarding"}]},"pF8jt67vTDY":{"duration":82.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pF8jt67vTDY","annotations":[{"segment":[6.00820327925117,81.941666],"label":"Volleyball"}]},"9-yueOtwiL8":{"duration":230.69,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9-yueOtwiL8","annotations":[{"segment":[6.295631825273011,10.612636505460218],"label":"Shot put"},{"segment":[14.210140405616224,18.527145085803433],"label":"Shot put"},{"segment":[21.40514820592824,23.923400936037442],"label":"Shot put"},{"segment":[27.52090483619345,31.478159126365053],"label":"Shot put"},{"segment":[32.197659906396254,35.79516380655226],"label":"Shot put"},{"segment":[51.983931357254285,59.178939157566305],"label":"Shot put"},{"segment":[67.09344773790951,74.64820592823712],"label":"Shot put"},{"segment":[86.51996879875195,91.19672386895475],"label":"Shot put"},{"segment":[92.27597503900155,94.07472698907956],"label":"Shot put"},{"segment":[96.59297971918878,99.11123244929797],"label":"Shot put"},{"segment":[100.90998439937597,104.50748829953197],"label":"Shot put"},{"segment":[0.2,230.4],"label":"Shot put"},{"segment":[114.5804992199688,121.0560062402496],"label":"Shot put"},{"segment":[122.85475819032762,146.95803432137285],"label":"Shot put"},{"segment":[173.2198127925117,179.33556942277693],"label":"Shot put"},{"segment":[184.01232449297973,187.2500780031201],"label":"Shot put"},{"segment":[209.19485179407175,210.99360374414977],"label":"Shot put"}]},"uaqcvxz_5fY":{"duration":135.68,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uaqcvxz_5fY","annotations":[{"segment":[6.982048361934478,135.621],"label":"Cleaning windows"}]},"_jVoRRn_Ujw":{"duration":29.56,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=_jVoRRn_Ujw","annotations":[]},"2mVwrM0qOGw":{"duration":182.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2mVwrM0qOGw","annotations":[{"segment":[85.73860374414977,95.15415756630264],"label":"Archery"},{"segment":[133.3870124804992,175.32902496099842],"label":"Archery"}]},"QG2xYcN_bl0":{"duration":27.24,"subset":"testing","resolution":"480x720","url":"https://www.youtube.com/watch?v=QG2xYcN_bl0","annotations":[]},"vP-wI1egoHA":{"duration":120.605,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vP-wI1egoHA","annotations":[{"segment":[0.2,109.72565810221185],"label":"Riding bumper cars"}]},"0-auIBOTx9E":{"duration":151.65,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=0-auIBOTx9E","annotations":[{"segment":[10.646215975039,119.94736665210607],"label":"Playing ten pins"},{"segment":[127.9911742776911,151.649432],"label":"Playing ten pins"}]},"PVed6JEd3ZM":{"duration":38.615,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PVed6JEd3ZM","annotations":[{"segment":[0.5416989079563184,27.446078003120125],"label":"Using the monkey bar"}]},"74MEdLMrXAQ":{"duration":202.757,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=74MEdLMrXAQ","annotations":[{"segment":[5.377321265210608,10.122016499219969],"label":"Futsal"},{"segment":[13.285146655226209,24.35610220124805],"label":"Futsal"},{"segment":[27.835545372854916,45.23276123088923],"label":"Futsal"},{"segment":[49.977456464898594,59.46684693291731],"label":"Futsal"},{"segment":[64.21154216692668,74.9661846973479],"label":"Futsal"},{"segment":[79.078253900156,95.52653071138845],"label":"Futsal"},{"segment":[99.95491292979719,110.7095554602184],"label":"Futsal"},{"segment":[115.13793767862715,132.53515353666148],"label":"Futsal"},{"segment":[136.33090972386896,148.6671173322933],"label":"Futsal"},{"segment":[153.09549955070202,167.3295852527301],"label":"Futsal"},{"segment":[171.12534143993759,179.98210587675507],"label":"Futsal"},{"segment":[183.14523603276132,188.52255729797193],"label":"Futsal"},{"segment":[192.3183134851794,202.556643],"label":"Futsal"}]},"w4oF8qNhzXg":{"duration":137.625,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w4oF8qNhzXg","annotations":[]},"-rJVr9sQ1h8":{"duration":107.369,"subset":"training","resolution":"396x224","url":"https://www.youtube.com/watch?v=-rJVr9sQ1h8","annotations":[{"segment":[14.884428126390741,85.12554848909659],"label":"Grooming dog"}]},"bwsa4iyYmw4":{"duration":214.576,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=bwsa4iyYmw4","annotations":[]},"u713piB4VWk":{"duration":110.504,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u713piB4VWk","annotations":[{"segment":[0.01,83.0933132074883],"label":"Sumo"}]},"J0-OVQ-JB5g":{"duration":203.198,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=J0-OVQ-JB5g","annotations":[{"segment":[17.106542901716068,172.34955382215287],"label":"Grooming dog"}]},"GZjNt2TsChs":{"duration":64.552,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GZjNt2TsChs","annotations":[{"segment":[6.49542912328738,19.889104679833448],"label":"Fun sliding down"},{"segment":[43.95743895061924,55.9412547888804],"label":"Fun sliding down"}]},"5Blj6-K0J64":{"duration":63.762,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5Blj6-K0J64","annotations":[]},"aEyTdUOp-qs":{"duration":29.095,"subset":"training","resolution":"380x216","url":"https://www.youtube.com/watch?v=aEyTdUOp-qs","annotations":[{"segment":[0.01,29.049213703588144],"label":"Mooping floor"}]},"Z-C5yMAUmM4":{"duration":74.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z-C5yMAUmM4","annotations":[{"segment":[0.11569578783151327,1.8511326053042123],"label":"Drinking beer"}]},"v6DdLg_Xnkg":{"duration":151.696,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=v6DdLg_Xnkg","annotations":[]},"cffsp2tCT5k":{"duration":211.023,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=cffsp2tCT5k","annotations":[{"segment":[0.01,207.40164906552263],"label":"Carving jack-o-lanterns"}]},"5sl-E-AwptI":{"duration":95.32,"subset":"training","resolution":"562x480","url":"https://www.youtube.com/watch?v=5sl-E-AwptI","annotations":[{"segment":[1.2639663967511081,47.048471199999994],"label":"Playing field hockey"},{"segment":[53.56624766,90.78252767253547],"label":"Playing field hockey"}]},"VhvGXJ2XSPY":{"duration":186.92,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=VhvGXJ2XSPY","annotations":[]},"86sxvTk3YEY":{"duration":225.629,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=86sxvTk3YEY","annotations":[{"segment":[65.47091911076443,190.07686193447736],"label":"Arm wrestling"}]},"Se7eQXlW-BA":{"duration":64.529,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Se7eQXlW-BA","annotations":[{"segment":[34.42849119679073,36.74385756382636],"label":"Playing ten pins"}]},"cCimLg-8Dy4":{"duration":46.626,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=cCimLg-8Dy4","annotations":[]},"Cai2pNQrv7w":{"duration":216.97,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cai2pNQrv7w","annotations":[]},"2P555wnyj_8":{"duration":48.716,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=2P555wnyj_8","annotations":[{"segment":[0.01,47.30408054288769],"label":"Welding"}]},"xlOc6r6E08w":{"duration":48.44,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=xlOc6r6E08w","annotations":[{"segment":[1.1334670202808113,40.804812730109205],"label":"Javelin throw"}]},"vi8zTSA6bhY":{"duration":13.027,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vi8zTSA6bhY","annotations":[{"segment":[0.01,13.001],"label":"Swinging at the playground"}]},"69SYd5feEBQ":{"duration":92.927,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=69SYd5feEBQ","annotations":[{"segment":[0.2,92.691],"label":"Knitting"}]},"jM0l31xZj8E":{"duration":72.75,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jM0l31xZj8E","annotations":[{"segment":[2.95078261361747,69.57037413416538],"label":"Playing saxophone"}]},"jCNaa108OhI":{"duration":137.51,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=jCNaa108OhI","annotations":[{"segment":[9.224443853354133,47.40935096723869],"label":"Shoveling snow"}]},"46D3Yvu-BL8":{"duration":143.11,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=46D3Yvu-BL8","annotations":[{"segment":[8.531156733333516,125.11012062274162],"label":"Playing racquetball"}]},"xM0jxaj73bg":{"duration":212.92000000000002,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=xM0jxaj73bg","annotations":[]},"TpURw9nc8ks":{"duration":52.06,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=TpURw9nc8ks","annotations":[{"segment":[6.573636505460218,25.726453978159128],"label":"Croquet"},{"segment":[27.10610608424337,31.001594383775352],"label":"Croquet"}]},"3HUvtWSu4mA":{"duration":122.787,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3HUvtWSu4mA","annotations":[{"segment":[9.577778485289071,67.23600496672928],"label":"Clipping cat claws"}]},"o9gsbkp135s":{"duration":27.377,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=o9gsbkp135s","annotations":[{"segment":[0.01,27.376325],"label":"Removing ice from car"}]},"agM7yjqVKo0":{"duration":63.18,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=agM7yjqVKo0","annotations":[{"segment":[22.045258970358812,24.21526209048362],"label":"Layup drill in basketball"},{"segment":[30.72527145085803,33.092547581903275],"label":"Layup drill in basketball"}]},"1fyxOUEihAQ":{"duration":109.993,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1fyxOUEihAQ","annotations":[{"segment":[0.2,30.529638065522622],"label":"Blowing leaves"},{"segment":[36.18962714508581,109.741],"label":"Blowing leaves"}]},"ZTHsS5lQyvQ":{"duration":153.35,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZTHsS5lQyvQ","annotations":[{"segment":[16.093452799999998,128.22573220592824],"label":"Making a sandwich"}]},"ndET50Ccnr8":{"duration":154.18,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=ndET50Ccnr8","annotations":[{"segment":[2.886458658346334,148.41208268330735],"label":"Playing water polo"}]},"73AGD3RWPEw":{"duration":56.239,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=73AGD3RWPEw","annotations":[{"segment":[8.329321372854915,43.57550234009361],"label":"Slacklining"}]},"veaxOUe_8HE":{"duration":205.892,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=veaxOUe_8HE","annotations":[{"segment":[0.01,186.591639625585],"label":"Playing beach volleyball"}]},"dMjOeGJBF9M":{"duration":28.352,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dMjOeGJBF9M","annotations":[{"segment":[0.01,25.12229618720749],"label":"Fun sliding down"}]},"L3LdtLFpQl0":{"duration":100.125,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L3LdtLFpQl0","annotations":[]},"F_sbhegCsyg":{"duration":88.863,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F_sbhegCsyg","annotations":[{"segment":[23.151306278101462,46.44124232449299],"label":"Rafting"},{"segment":[57.393057678627144,81.23751642433699],"label":"Rafting"}]},"qVfcSGDJc-M":{"duration":205.335,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=qVfcSGDJc-M","annotations":[]},"zyh16NahocQ":{"duration":199.135,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zyh16NahocQ","annotations":[{"segment":[142.86624024960997,176.42647113884556],"label":"Blow-drying hair"}]},"2MRR5NxbO9k":{"duration":155.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2MRR5NxbO9k","annotations":[{"segment":[22.942438154669045,134.1343606397317],"label":"Mowing the lawn"}]},"fNhw7xE3NM8":{"duration":16.58,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=fNhw7xE3NM8","annotations":[]},"H_IfIsKQ3Zw":{"duration":125.644,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=H_IfIsKQ3Zw","annotations":[]},"-aE1lALKV9M":{"duration":225.907,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-aE1lALKV9M","annotations":[]},"AQQT21DBT_E":{"duration":120.373,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AQQT21DBT_E","annotations":[{"segment":[5.0702817285491415,28.919384673946954],"label":"Grooming dog"},{"segment":[36.8064895850234,74.73970844305772],"label":"Grooming dog"}]},"6M5UgOj_eoM":{"duration":59.862,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=6M5UgOj_eoM","annotations":[]},"S7Wi1dNHhYo":{"duration":4.32,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=S7Wi1dNHhYo","annotations":[]},"2L9pp85KvbQ":{"duration":73.96000000000001,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=2L9pp85KvbQ","annotations":[{"segment":[0.2,73.75555555555555],"label":"Smoking hookah"}]},"dFVX_2UQ2WY":{"duration":55.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dFVX_2UQ2WY","annotations":[{"segment":[25.11324336973479,40.58091575663027],"label":"Cleaning shoes"}]},"MFeI2mRg6Ho":{"duration":192.935,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MFeI2mRg6Ho","annotations":[{"segment":[8.124472698907956,37.91420592823713],"label":"Hurling"},{"segment":[46.64049141965678,52.35771294851794],"label":"Hurling"},{"segment":[62.88943681747269,80.34200780031202],"label":"Hurling"},{"segment":[92.37826365054603,94.48460842433697],"label":"Hurling"},{"segment":[107.72448985959439,117.05258814352574],"label":"Hurling"},{"segment":[123.07071606864275,132.09790795631827],"label":"Hurling"},{"segment":[148.0612979719189,162.80724024961],"label":"Hurling"},{"segment":[168.5250546021841,182.36818408736352],"label":"Hurling"}]},"WEoelJQYlxk":{"duration":64.691,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=WEoelJQYlxk","annotations":[{"segment":[1.6634110764430576,13.861758970358814],"label":"Hand car wash"},{"segment":[47.230793291731665,49.75111310452418],"label":"Hand car wash"}]},"i2e67kStfk4":{"duration":23.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=i2e67kStfk4","annotations":[{"segment":[4.989725319745109,18.111711508218626],"label":"Using uneven bars"}]},"VSeBb4e9ysU":{"duration":151.84,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VSeBb4e9ysU","annotations":[{"segment":[21.1255884,126.33241131903274],"label":"Shoveling snow"}]},"TNVdpnrtn5A":{"duration":158.779,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TNVdpnrtn5A","annotations":[{"segment":[53.75150360300126,80.25570123213092],"label":"Kneeling"},{"segment":[92.64084031116347,103.04435713755082],"label":"Kneeling"},{"segment":[106.75989886126057,145.64923556942279],"label":"Kneeling"}]},"BhgcXqWQhkQ":{"duration":190.17,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=BhgcXqWQhkQ","annotations":[]},"OSndW3d2XxU":{"duration":4.95,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=OSndW3d2XxU","annotations":[{"segment":[0.07715090483619345,2.7234269407176286],"label":"Hammer throw"}]},"tt5-QtDwm3Y":{"duration":77.88,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=tt5-QtDwm3Y","annotations":[{"segment":[11.785231833763147,25.514419433920214],"label":"Layup drill in basketball"},{"segment":[47.262424570452204,70.22540035569423],"label":"Layup drill in basketball"}]},"nmUJ2GfVkKY":{"duration":147.17000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nmUJ2GfVkKY","annotations":[{"segment":[22.49988543525741,33.74982815288612],"label":"Triple jump"},{"segment":[56.93848559126366,73.69860433385335],"label":"Triple jump"},{"segment":[113.41778984711388,131.09627126053041],"label":"Triple jump"}]},"WWip1_lFvGg":{"duration":20.388,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WWip1_lFvGg","annotations":[{"segment":[0.2,19.439831513260533],"label":"Putting in contact lenses"}]},"9pQH9u9uar4":{"duration":170.016,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=9pQH9u9uar4","annotations":[]},"lxMSX0Y0b8I":{"duration":201.619,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lxMSX0Y0b8I","annotations":[{"segment":[25.751942413517845,31.090759743149594],"label":"Capoeira"},{"segment":[36.11552899456771,41.45434632419946],"label":"Capoeira"},{"segment":[46.793163653831215,51.50388482703569],"label":"Capoeira"},{"segment":[55.58650984381291,63.43771179915372],"label":"Capoeira"},{"segment":[68.14843297235821,76.94177916233991],"label":"Capoeira"},{"segment":[82.5946445701853,85.10702919589436],"label":"Capoeira"},{"segment":[89.5037022908852,94.84251962051695],"label":"Capoeira"},{"segment":[100.1813369501487,106.1482504362077],"label":"Capoeira"},{"segment":[111.80111584405309,116.82588509547121],"label":"Capoeira"},{"segment":[121.5366062686757,124.99113512902565],"label":"Capoeira"},{"segment":[139.12329864863912,146.34640444755263],"label":"Capoeira"},{"segment":[150.4290294643299,158.28023046728973],"label":"Capoeira"},{"segment":[163.30499968847352,169.2719131386293],"label":"Capoeira"},{"segment":[173.35453813084112,179.0074035046729],"label":"Capoeira"},{"segment":[184.0321727258567,191.2552784813084],"label":"Capoeira"}]},"tYZJ4O5Hsz4":{"duration":157.385,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=tYZJ4O5Hsz4","annotations":[{"segment":[0.19999999999999998,153.1746495327103],"label":"Playing drums"}]},"kW63TeJo4JY":{"duration":23.592,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kW63TeJo4JY","annotations":[{"segment":[0.19999999999999998,21.901117308411216],"label":"Hitting a pinata"}]},"NWaMWZUuTZc":{"duration":44.75,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=NWaMWZUuTZc","annotations":[{"segment":[1.203840559526497,43.99720273086606],"label":"Breakdancing"}]},"XYW6F_4qKJU":{"duration":59.769,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=XYW6F_4qKJU","annotations":[{"segment":[9.316848673946957,24.884316692667706],"label":"Clipping cat claws"},{"segment":[34.94988299531981,51.166628705148206],"label":"Clipping cat claws"}]},"hjaUbrqPe-w":{"duration":223.562,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hjaUbrqPe-w","annotations":[{"segment":[11.282544430417142,211.51287597247324],"label":"Rafting"}]},"QdvC8Xt-ym0":{"duration":65.25,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QdvC8Xt-ym0","annotations":[{"segment":[2.6454695787831515,64.30526053042122],"label":"Playing polo"}]},"hsPepNAzu_Q":{"duration":86.05,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hsPepNAzu_Q","annotations":[{"segment":[9.460968018720749,60.590454758190326],"label":"Horseback riding"}]},"7oBW5W6Fdok":{"duration":44.56,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7oBW5W6Fdok","annotations":[{"segment":[33.19974726989079,43.06243369734789],"label":"Hammer throw"}]},"6tdIiKzMVcg":{"duration":202.107,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=6tdIiKzMVcg","annotations":[{"segment":[12.927239107644304,188.54851186271452],"label":"Arm wrestling"}]},"cM67XJS7yM8":{"duration":41.704,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cM67XJS7yM8","annotations":[{"segment":[10.669731775351014,20.949107510140404],"label":"Curling"}]},"bZF4nakRNF4":{"duration":23.04,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bZF4nakRNF4","annotations":[{"segment":[3.198195422776911,5.569890904836193],"label":"High jump"},{"segment":[12.864651251170047,16.09878145397816],"label":"High jump"}]},"CVj_bIIbTnc":{"duration":75.91,"subset":"testing","resolution":"618x360","url":"https://www.youtube.com/watch?v=CVj_bIIbTnc","annotations":[]},"hKn_RK3VSAo":{"duration":120.141,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=hKn_RK3VSAo","annotations":[{"segment":[7.68446322886929,120.14002267573696],"label":"Applying sunscreen"}]},"wBhtt9x74TA":{"duration":25.124,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=wBhtt9x74TA","annotations":[{"segment":[0.19999999999999998,5.282269890795631],"label":"Applying sunscreen"},{"segment":[6.221340093603744,11.551942277691108],"label":"Applying sunscreen"},{"segment":[15.272059282371295,24.08286271450858],"label":"Applying sunscreen"}]},"LAD4qVnywmA":{"duration":130.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LAD4qVnywmA","annotations":[{"segment":[53.36699009048362,57.84818773166927],"label":"Triple jump"},{"segment":[63.95891178783152,71.29178065522622],"label":"Triple jump"},{"segment":[110.4004146146646,130.565804],"label":"Triple jump"}]},"RgzOQ3Sr1bE":{"duration":122.277,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=RgzOQ3Sr1bE","annotations":[]},"hXBeu7o9uUM":{"duration":219.57,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hXBeu7o9uUM","annotations":[{"segment":[35.85507488299532,62.274603744149765],"label":"Playing saxophone"},{"segment":[65.70571138845554,124.37765210608424],"label":"Playing saxophone"},{"segment":[127.12253822152887,164.5216115444618],"label":"Playing saxophone"},{"segment":[166.58027613104522,186.48070046801874],"label":"Playing saxophone"},{"segment":[188.88247581903278,198.8326879875195],"label":"Playing saxophone"}]},"pi4p4zdoqmw":{"duration":142.757,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pi4p4zdoqmw","annotations":[{"segment":[9.428128929950965,142.756098],"label":"Surfing"}]},"A8q6beSMpEE":{"duration":58.28,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=A8q6beSMpEE","annotations":[{"segment":[8.268223088923557,56.98852886115445],"label":"Cleaning windows"}]},"KqOLTkf5ujs":{"duration":134.258,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KqOLTkf5ujs","annotations":[{"segment":[0.01,134.2577777777778],"label":"Painting fence"}]},"7BM40jqPYgA":{"duration":163.306,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7BM40jqPYgA","annotations":[{"segment":[4.934790475366182,151.96101059997676],"label":"Welding"}]},"2Lqo9im9vPU":{"duration":104.7,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2Lqo9im9vPU","annotations":[]},"I7K2Wji22v4":{"duration":120.094,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=I7K2Wji22v4","annotations":[]},"mJZj3znhBas":{"duration":73.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mJZj3znhBas","annotations":[{"segment":[0.34373406084243374,8.707929541341654],"label":"Playing field hockey"},{"segment":[15.009720656786271,20.62404365054602],"label":"Playing field hockey"},{"segment":[23.259338117004685,26.81125674570983],"label":"Playing field hockey"},{"segment":[29.217395171606867,33.685937962558505],"label":"Playing field hockey"},{"segment":[35.40460826677067,39.98772907800313],"label":"Playing field hockey"},{"segment":[47.09156633541342,55.914073897035884],"label":"Playing field hockey"},{"segment":[62.10128699219969,63.70537927613105],"label":"Playing field hockey"}]},"YjxjsP6A5H8":{"duration":235.1,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YjxjsP6A5H8","annotations":[{"segment":[162.81916380655227,183.721624024961],"label":"Skateboarding"},{"segment":[185.18846333853355,209.3913120124805],"label":"Skateboarding"}]},"iUIfmMljiOg":{"duration":190.149,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iUIfmMljiOg","annotations":[{"segment":[39.15688506084244,79.7969854648986],"label":"Making a lemonade"},{"segment":[85.72984683775351,136.75245464430577],"label":"Making a lemonade"},{"segment":[142.6853160171607,169.08654912636507],"label":"Making a lemonade"}]},"TjDlEonao3s":{"duration":106.12,"subset":"training","resolution":"1152x720","url":"https://www.youtube.com/watch?v=TjDlEonao3s","annotations":[{"segment":[15.718868954758193,84.07043369734791],"label":"Removing curlers"}]},"afMQA352X_A":{"duration":155.02,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=afMQA352X_A","annotations":[]},"O2JUUFFdWpk":{"duration":151.464,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=O2JUUFFdWpk","annotations":[{"segment":[87.4283938377535,112.71174016380655],"label":"Rock-paper-scissors"}]},"7uE2pMuAM8I":{"duration":59.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7uE2pMuAM8I","annotations":[{"segment":[0,59.048343],"label":"Polishing shoes"}]},"tJiYIajy41Q":{"duration":15.024,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=tJiYIajy41Q","annotations":[{"segment":[0.2,14.823309],"label":"Futsal"}]},"OHwE8aA90IE":{"duration":3.832,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=OHwE8aA90IE","annotations":[{"segment":[0.44161315616589314,3.6312925170068024],"label":"Cutting the grass"}]},"nK_U3gJWba0":{"duration":174.615,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nK_U3gJWba0","annotations":[]},"5kdiJ2cC5_w":{"duration":221.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5kdiJ2cC5_w","annotations":[{"segment":[7.608361934477379,106.22967315],"label":"Playing violin"},{"segment":[124.56563114000001,199.54658346333855],"label":"Playing violin"}]},"hl4CItu_we8":{"duration":15.12,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=hl4CItu_we8","annotations":[]},"Ez5uEh7YyIM":{"duration":147.098,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ez5uEh7YyIM","annotations":[{"segment":[11.015169749647129,23.866201124235445],"label":"Swimming"},{"segment":[26.161028155411927,34.42240546764727],"label":"Swimming"},{"segment":[39.929990342470845,47.04395413911794],"label":"Swimming"},{"segment":[53.469469826412094,70.68067256023573],"label":"Swimming"},{"segment":[84.67911745041229,92.48152935641235],"label":"Swimming"},{"segment":[98.21859693435357,103.03773369982417],"label":"Swimming"},{"segment":[111.75807641829482,117.72462669935368],"label":"Swimming"},{"segment":[123.92065968353018,135.16531213629497],"label":"Swimming"}]},"uuFJdgTT5kE":{"duration":137.49,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uuFJdgTT5kE","annotations":[{"segment":[23.822025950000004,69.28144120000002],"label":"Playing polo"},{"segment":[117.37084633385335,137.215],"label":"Playing polo"}]},"II0BlR0BriI":{"duration":147.31,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=II0BlR0BriI","annotations":[{"segment":[70.51880703999998,116.79382801999999],"label":"Cheerleading"}]},"1hB5jVAhSDE":{"duration":32.067,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=1hB5jVAhSDE","annotations":[{"segment":[0.2,31.016208376226206],"label":"Playing congas"}]},"oTCyC7czSTI":{"duration":123.21,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=oTCyC7czSTI","annotations":[]},"Xag8E74bkpw":{"duration":81.061,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Xag8E74bkpw","annotations":[{"segment":[0.009999999999999998,74.1668369851853],"label":"Playing congas"}]},"r4kC4AHDIH8":{"duration":206.147,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=r4kC4AHDIH8","annotations":[{"segment":[13.185674028321676,177.84579729797193],"label":"Rafting"}]},"WmOTDAim7XM":{"duration":16.045,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WmOTDAim7XM","annotations":[{"segment":[0,15.988518720748832],"label":"Trimming branches or hedges"}]},"esTcWwmykKQ":{"duration":219.43,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=esTcWwmykKQ","annotations":[{"segment":[19.00306664,178.44455692],"label":"Playing water polo"},{"segment":[59.58193632,192.23398423999998],"label":"Playing polo"}]},"kMHMdqkGRB4":{"duration":43.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kMHMdqkGRB4","annotations":[{"segment":[9.086172472698907,41.05603858034321],"label":"Using the pommel horse"}]},"hPV-Z73KXak":{"duration":238.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hPV-Z73KXak","annotations":[{"segment":[31.86694556765163,90.0502068429238],"label":"Playing drums"},{"segment":[130.81367496111977,152.3071399688958],"label":"Playing drums"}]},"lHaxXbcEMp8":{"duration":68.01,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lHaxXbcEMp8","annotations":[]},"JFupxdGxgPQ":{"duration":45.512,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JFupxdGxgPQ","annotations":[{"segment":[7.880960340454434,22.009889239106975],"label":"Calf roping"}]},"4o1k3KUHz6E":{"duration":37.5,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=4o1k3KUHz6E","annotations":[{"segment":[5.0312315694227765,26.150703622464896],"label":"Playing kickball"}]},"Uo0XHGIdNu0":{"duration":23.987,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Uo0XHGIdNu0","annotations":[]},"kQ7ensWEW08":{"duration":122.021,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=kQ7ensWEW08","annotations":[{"segment":[17.64842983360346,118.98459195630635],"label":"Knitting"}]},"bX74cXcY7To":{"duration":213.16,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bX74cXcY7To","annotations":[]},"j_IJQAywTuU":{"duration":72.795,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j_IJQAywTuU","annotations":[{"segment":[12.037789593216381,69.72832665210609],"label":"Shuffleboard"}]},"IujbHw7SXJE":{"duration":153.14,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IujbHw7SXJE","annotations":[]},"FQcbPesIO3c":{"duration":220.59,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FQcbPesIO3c","annotations":[]},"Jtlvh2w1aUM":{"duration":78.554,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Jtlvh2w1aUM","annotations":[{"segment":[0.2,76.34724557722309],"label":"Blowing leaves"}]},"1hTqfvjis9E":{"duration":39.92,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1hTqfvjis9E","annotations":[{"segment":[22.149198127925118,33.286014040561625],"label":"Hammer throw"}]},"brewO_8bl4g":{"duration":85.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=brewO_8bl4g","annotations":[{"segment":[58.665711081201415,69.5967961732451],"label":"Arm wrestling"}]},"mPtCJg-j4SM":{"duration":191.658,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mPtCJg-j4SM","annotations":[{"segment":[0.01,178.7665491419657],"label":"Using the rowing machine"}]},"W8ILh7ickB4":{"duration":86.56,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=W8ILh7ickB4","annotations":[{"segment":[62.62672858658347,76.25883976599064],"label":"Washing dishes"}]},"n1iu-AlcS-Q":{"duration":35.086,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n1iu-AlcS-Q","annotations":[{"segment":[5.473533541341654,35.08535],"label":"Shuffleboard"}]},"icGrJ2XBxa0":{"duration":59.536,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=icGrJ2XBxa0","annotations":[{"segment":[9.473741496598638,40.774239185647424],"label":"Canoeing"}]},"zQZe5vxsi40":{"duration":155.249,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zQZe5vxsi40","annotations":[]},"Xdre6bEf3m8":{"duration":15.279,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Xdre6bEf3m8","annotations":[]},"OM_n35Vcw30":{"duration":228.392,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OM_n35Vcw30","annotations":[{"segment":[0.01,226.0435228306818],"label":"Shuffleboard"}]},"Id4W6VA0uLc":{"duration":78.03999999999999,"subset":"testing","resolution":"240x180","url":"https://www.youtube.com/watch?v=Id4W6VA0uLc","annotations":[]},"gxuere-ofCU":{"duration":136.19,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gxuere-ofCU","annotations":[{"segment":[13.059395475819034,120.08273400936038],"label":"Playing harmonica"}]},"4y8mMJkvAR4":{"duration":170.412,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4y8mMJkvAR4","annotations":[{"segment":[10.368234996879876,36.42174857878315],"label":"Canoeing"},{"segment":[11.165791535101405,36.953452937597504],"label":"Canoeing"}]},"6SFW4g4UVKI":{"duration":32.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6SFW4g4UVKI","annotations":[{"segment":[6.42383775351014,32.68],"label":"Polishing shoes"}]},"uMAjCPUZfp0":{"duration":153.763,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=uMAjCPUZfp0","annotations":[{"segment":[6.5940366614664585,146.62739703588144],"label":"Doing fencing"}]},"nD2n9ivzAYc":{"duration":96.34,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nD2n9ivzAYc","annotations":[]},"7fwrkFHTm-Q":{"duration":123.42,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=7fwrkFHTm-Q","annotations":[{"segment":[7.741776983784817,113.08586469155743],"label":"Doing kickboxing"}]},"aOzMA2rpWEw":{"duration":536.869,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aOzMA2rpWEw","annotations":[{"segment":[112.2173697347894,231.9717269890796],"label":"Removing ice from car"}]},"iqmpDgATXbU":{"duration":185.76,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iqmpDgATXbU","annotations":[{"segment":[101.2572309,185.711666],"label":"Belly dance"},{"segment":[7.387905589703588,91.64977542],"label":"Belly dance"}]},"KTdFb9_IAso":{"duration":203.431,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=KTdFb9_IAso","annotations":[]},"9UvVeUYQLgA":{"duration":207.98,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=9UvVeUYQLgA","annotations":[]},"UD7HJBRK2Z4":{"duration":119.91,"subset":"training","resolution":"426x236","url":"https://www.youtube.com/watch?v=UD7HJBRK2Z4","annotations":[{"segment":[38.89405304212169,82.2758814352574],"label":"Tai chi"},{"segment":[91.64070202808112,118.57184711388456],"label":"Tai chi"}]},"Ga-6wy0Nboo":{"duration":194.584,"subset":"training","resolution":"1280x716","url":"https://www.youtube.com/watch?v=Ga-6wy0Nboo","annotations":[{"segment":[5.767677346549644,20.642213661335568],"label":"Skiing"},{"segment":[47.962790566044404,72.55130978028237],"label":"Skiing"},{"segment":[85.9080362670289,87.4258460950683],"label":"Skiing"},{"segment":[92.2828375447943,98.96120078816757],"label":"Skiing"},{"segment":[105.03244010032509,126.88890162409216],"label":"Skiing"},{"segment":[143.28124776691746,152.69166870076162],"label":"Skiing"},{"segment":[156.33441228805614,181.22649346790197],"label":"Skiing"}]},"uWJUoJyvFF0":{"duration":137.601,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=uWJUoJyvFF0","annotations":[{"segment":[15.666073322932919,137.561],"label":"Knitting"}]},"snUWwTH8eQg":{"duration":77.416,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=snUWwTH8eQg","annotations":[{"segment":[0.2,15.085998439937597],"label":"Futsal"},{"segment":[24.49966146645866,37.42294851794072],"label":"Futsal"},{"segment":[41.889558502340094,71.94863650546021],"label":"Futsal"}]},"-Q9WotFs1MI":{"duration":56.68,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=-Q9WotFs1MI","annotations":[{"segment":[5.51656707,44.88863442],"label":"Chopping wood"},{"segment":[47.55594816,47.64663456],"label":"Chopping wood"}]},"EHVkkRI7mBQ":{"duration":203.314,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EHVkkRI7mBQ","annotations":[{"segment":[43.42985491419657,50.403992199687984],"label":"Snowboarding"},{"segment":[53.891060842433696,174.03642589703588],"label":"Snowboarding"}]},"AK34mhNU28s":{"duration":104.397,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AK34mhNU28s","annotations":[{"segment":[0.2,31.904580343213723],"label":"Kite flying"},{"segment":[36.95071294851794,86.4519360374415],"label":"Kite flying"}]},"1vTHJMMPZN0":{"duration":153.6,"subset":"testing","resolution":"423x240","url":"https://www.youtube.com/watch?v=1vTHJMMPZN0","annotations":[]},"aQulBdlcGNU":{"duration":191.124,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aQulBdlcGNU","annotations":[{"segment":[8.785006597547326,179.1018572444935],"label":"Doing crunches"}]},"wmrrBnxbHjk":{"duration":60.651,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wmrrBnxbHjk","annotations":[{"segment":[8.624771795076747,39.95326400691475],"label":"Table soccer"}]},"gDNgiybMA1s":{"duration":94.32,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=gDNgiybMA1s","annotations":[{"segment":[7.062925063941333,91.96517010340277],"label":"Cumbia"}]},"ltE76mknyt4":{"duration":131.59,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=ltE76mknyt4","annotations":[]},"N75m1Z4RqbA":{"duration":124.88,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=N75m1Z4RqbA","annotations":[]},"IyEnvWbx8V0":{"duration":198.95,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IyEnvWbx8V0","annotations":[]},"hBjVRKwCUNA":{"duration":222.61,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hBjVRKwCUNA","annotations":[{"segment":[11.1116879875195,222.561],"label":"Using the monkey bar"}]},"v9APkG4il4Q":{"duration":30.024,"subset":"validation","resolution":"416x240","url":"https://www.youtube.com/watch?v=v9APkG4il4Q","annotations":[{"segment":[0.2,22.91839313572543],"label":"Slacklining"}]},"__wPHayoMgw":{"duration":137.11,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=__wPHayoMgw","annotations":[{"segment":[10.15184652,21.39061344766716],"label":"Smoking hookah"},{"segment":[39.35872874370757,48.55669252620446],"label":"Smoking hookah"},{"segment":[52.407002946784544,62.24668513271144],"label":"Smoking hookah"},{"segment":[93.47698076630549,100.53588320403566],"label":"Smoking hookah"}]},"yCcqJnlviQI":{"duration":70.1,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=yCcqJnlviQI","annotations":[{"segment":[0,70.10104308390022],"label":"Volleyball"}]},"s6oXeM9OluM":{"duration":170.226,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=s6oXeM9OluM","annotations":[{"segment":[58.67463806552262,151.33277691107645],"label":"Clipping cat claws"}]},"H3cFZelOk2Y":{"duration":202.339,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H3cFZelOk2Y","annotations":[{"segment":[13.569552262090482,104.13842433697349],"label":"Tug of war"}]},"juP0cn5B60Y":{"duration":71.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=juP0cn5B60Y","annotations":[{"segment":[24.49307488299532,28.269196567862718],"label":"Grooming dog"},{"segment":[33.14701872074883,39.5768751950078],"label":"Grooming dog"},{"segment":[53.5451840873635,58.86644461778472],"label":"Grooming dog"}]},"u7dfBgc_SqU":{"duration":177.471,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u7dfBgc_SqU","annotations":[{"segment":[26.09529414890639,171.22272218628956],"label":"Blow-drying hair"}]},"4E40jmmc2N8":{"duration":9.06,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=4E40jmmc2N8","annotations":[{"segment":[0.9763962558502342,8.731772230889236],"label":"Discus throw"}]},"RULNhPrXnfA":{"duration":187.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RULNhPrXnfA","annotations":[{"segment":[76.3455538221529,83.65834633385334],"label":"Playing harmonica"},{"segment":[111.73946957878314,153.86115444617784],"label":"Playing harmonica"},{"segment":[162.92901716068644,177.8471138845554],"label":"Playing harmonica"}]},"HlFnWOx53XU":{"duration":77.39,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=HlFnWOx53XU","annotations":[{"segment":[7.478786271450858,76.11474414976598],"label":"Playing bagpipes"}]},"vPZf3F3bNAA":{"duration":62.79,"subset":"training","resolution":"1280x698","url":"https://www.youtube.com/watch?v=vPZf3F3bNAA","annotations":[{"segment":[7.438870514820593,48.05901872074883],"label":"Javelin throw"}]},"fGMZvLvAMK4":{"duration":224.491,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fGMZvLvAMK4","annotations":[{"segment":[16.460281837123823,222.73913278876753],"label":"Skiing"}]},"TSVDMiy_FC4":{"duration":113.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TSVDMiy_FC4","annotations":[{"segment":[15.749178756265897,24.77398905480029],"label":"Polishing shoes"},{"segment":[26.897473830926028,40.34621074638904],"label":"Polishing shoes"},{"segment":[43.70839497525479,67.59759870666936],"label":"Polishing shoes"},{"segment":[70.60586852106084,108.65163727301093],"label":"Polishing shoes"}]},"45hcc_VI4t4":{"duration":12.35,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=45hcc_VI4t4","annotations":[]},"zihMznAYlV0":{"duration":72.795,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zihMznAYlV0","annotations":[{"segment":[0.2,27.59607285023401],"label":"Snowboarding"},{"segment":[33.38784122620905,72.594579],"label":"Snowboarding"}]},"3mQ9FPD5OPY":{"duration":78.18,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3mQ9FPD5OPY","annotations":[]},"JN1mex2vCjI":{"duration":46.464,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JN1mex2vCjI","annotations":[{"segment":[0,46.46312925170068],"label":"Slacklining"}]},"75lRDDRD1Lw":{"duration":64.691,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=75lRDDRD1Lw","annotations":[]},"RrgUFU70PNQ":{"duration":98.639,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RrgUFU70PNQ","annotations":[{"segment":[11.694968108928439,98.637823],"label":"Capoeira"}]},"dexjr5WGeqQ":{"duration":175.52,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=dexjr5WGeqQ","annotations":[{"segment":[0.2,175.3196598639456],"label":"Installing carpet"},{"segment":[144.15378331055066,146.88773436974068],"label":"Installing carpet"}]},"JYh2Oejg_lQ":{"duration":39.707,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=JYh2Oejg_lQ","annotations":[{"segment":[17.22043980655226,25.644827625585023],"label":"Playing rubik cube"}]},"m6yPz9fHJnY":{"duration":92.39,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m6yPz9fHJnY","annotations":[{"segment":[3.2414391575663024,91.12045631825272],"label":"Chopping wood"},{"segment":[61.78033679999999,63.53902351999999],"label":"Chopping wood"}]},"XbSQ_7vh7yc":{"duration":215.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XbSQ_7vh7yc","annotations":[{"segment":[1.796237078039817,131.06414985062548],"label":"Washing dishes"},{"segment":[158.52520904065665,181.63268567617067],"label":"Washing dishes"}]},"qpWwrunXZ8g":{"duration":90.094,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qpWwrunXZ8g","annotations":[]},"V-6-T-Y_CR0":{"duration":165.466,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V-6-T-Y_CR0","annotations":[]},"Lme4KL45gwk":{"duration":199.204,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Lme4KL45gwk","annotations":[{"segment":[34.5213366321619,55.964448440274225],"label":"Longboarding"},{"segment":[112.2137707195254,114.38915887397157],"label":"Longboarding"}]},"dLLvpM-gB8Q":{"duration":166.441,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=dLLvpM-gB8Q","annotations":[{"segment":[66.98315678183535,162.37712562918736],"label":"Having an ice cream"}]},"ZfXkzv-hNlg":{"duration":109.25,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZfXkzv-hNlg","annotations":[{"segment":[0.01,109.201],"label":"Rafting"}]},"FQkvwPpDomw":{"duration":27.49,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FQkvwPpDomw","annotations":[{"segment":[7.398507507800312,23.096210393915758],"label":"Shoveling snow"}]},"1cWWCiNIYnc":{"duration":20.09,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=1cWWCiNIYnc","annotations":[{"segment":[0.01,20.085259],"label":"Shaving"}]},"_cLb2ZRS_4M":{"duration":227.858,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_cLb2ZRS_4M","annotations":[{"segment":[31.992460759654875,198.35325670986023],"label":"Drum corps"}]},"cHdZ_dBVgDE":{"duration":97.87,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cHdZ_dBVgDE","annotations":[{"segment":[4.806166113884555,84.39894048],"label":"Cumbia"}]},"SjdEr8kBCW8":{"duration":39.985,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=SjdEr8kBCW8","annotations":[]},"OUfVZuWyqJQ":{"duration":47.509,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=OUfVZuWyqJQ","annotations":[{"segment":[8.745627251170047,37.13185807488299],"label":"Powerbocking"}]},"Vcv9P-4Tty0":{"duration":45.535,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Vcv9P-4Tty0","annotations":[{"segment":[0.19999999999999998,45.281],"label":"Beach soccer"}]},"rMQh0pUt7vU":{"duration":65.318,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rMQh0pUt7vU","annotations":[]},"LL_leiyIdVQ":{"duration":181.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LL_leiyIdVQ","annotations":[{"segment":[8.646345553822153,172.21819422776912],"label":"Tumbling"}]},"bCEdkW675dQ":{"duration":24.1,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bCEdkW675dQ","annotations":[{"segment":[14.777235917316695,22.297457758190326],"label":"Playing violin"}]},"AkeJ-lJLNkw":{"duration":193.376,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AkeJ-lJLNkw","annotations":[{"segment":[0.2,31.977898479204473],"label":"Capoeira"},{"segment":[45.25174313094973,68.17929298396426],"label":"Capoeira"},{"segment":[83.26320736094749,116.44781899031062],"label":"Capoeira"},{"segment":[120.36963672832627,155.96767465800673],"label":"Capoeira"}]},"cBAlXvu38dg":{"duration":60.117,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=cBAlXvu38dg","annotations":[{"segment":[6.681905750274658,59.18259266616251],"label":"Kneeling"}]},"qz47x5-R-BU":{"duration":100.496,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qz47x5-R-BU","annotations":[{"segment":[8.779676592823714,90.93236471138846],"label":"Playing drums"}]},"YdenUU3g0XY":{"duration":68.03999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YdenUU3g0XY","annotations":[{"segment":[35.84292121684867,65.04826443057722],"label":"Playing badminton"}]},"bt4cH91_KPI":{"duration":119.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bt4cH91_KPI","annotations":[{"segment":[50.915233310643906,107.90155849755979],"label":"Beer pong"}]},"cpeevuvJKjE":{"duration":90.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cpeevuvJKjE","annotations":[{"segment":[1.9696222,13.0612274],"label":"Ballet"},{"segment":[26.608052900000004,80.0964567],"label":"Ballet"}]},"2Tm_2wgv2Zs":{"duration":70.171,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=2Tm_2wgv2Zs","annotations":[{"segment":[24.511051482059287,45.739372854914194],"label":"Table soccer"}]},"xabaKyhx7cg":{"duration":121.53,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=xabaKyhx7cg","annotations":[{"segment":[41.427439534316065,46.73621897120783],"label":"Cleaning shoes"}]},"qmKSDwVvxVk":{"duration":21.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qmKSDwVvxVk","annotations":[{"segment":[0.8800644999999999,18.669412080343214],"label":"High jump"}]},"LW5lcM_rKBY":{"duration":10.078,"subset":"training","resolution":"406x720","url":"https://www.youtube.com/watch?v=LW5lcM_rKBY","annotations":[{"segment":[0.01,10.077460317460318],"label":"Peeling potatoes"}]},"IFPYE5tCOFw":{"duration":55.06,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=IFPYE5tCOFw","annotations":[]},"vH9gqDUaBQ0":{"duration":213.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vH9gqDUaBQ0","annotations":[{"segment":[70.00862488455537,202.69163776099845],"label":"Sailing"}]},"tg6HOGy0BBQ":{"duration":200.2,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tg6HOGy0BBQ","annotations":[{"segment":[16.6067559,184.38615834633384],"label":"Shaving legs"}]},"JBmG2eQNeNA":{"duration":203.8,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JBmG2eQNeNA","annotations":[]},"bG55LSFBA9M":{"duration":45.233,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=bG55LSFBA9M","annotations":[{"segment":[8.602998439937597,37.796780031201244],"label":"River tubing"}]},"WnoJmKZC_qg":{"duration":144.68,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=WnoJmKZC_qg","annotations":[{"segment":[23.06391703,78.19103369],"label":"Playing flauta"},{"segment":[85.91855272,121.22399840093604],"label":"Playing flauta"}]},"CHBpVOfPmRA":{"duration":107.37,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=CHBpVOfPmRA","annotations":[{"segment":[0.01,107.36907029478458],"label":"Playing beach volleyball"}]},"m9IDk3YHBfQ":{"duration":99.266,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=m9IDk3YHBfQ","annotations":[]},"WdJUMmy2ox8":{"duration":204.8,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=WdJUMmy2ox8","annotations":[]},"f0rWFvJ7uTk":{"duration":41.22,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=f0rWFvJ7uTk","annotations":[{"segment":[0,41.21535],"label":"Playing kickball"}]},"WgsoxWcUKS4":{"duration":165.698,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WgsoxWcUKS4","annotations":[]},"TgantuXBiJg":{"duration":129.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TgantuXBiJg","annotations":[{"segment":[0.01,14.291189510920438],"label":"Putting on makeup"},{"segment":[68.4152689352574,99.4301908525741],"label":"Putting on makeup"}]},"Yyv8oxrFGV0":{"duration":70.87,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Yyv8oxrFGV0","annotations":[{"segment":[12.164524715423584,65.0044920051432],"label":"Drinking beer"}]},"5wBo0Gd81-I":{"duration":164.003,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5wBo0Gd81-I","annotations":[{"segment":[6.393954758190328,163.941],"label":"Riding bumper cars"}]},"U4ua-VD7hNM":{"duration":127.641,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=U4ua-VD7hNM","annotations":[{"segment":[0.5964490219763917,110.54188393146417],"label":"Slacklining"}]},"qqZLs6j3YII":{"duration":159.45,"subset":"testing","resolution":"624x480","url":"https://www.youtube.com/watch?v=qqZLs6j3YII","annotations":[]},"6VW7ZPurE7c":{"duration":90.21000000000001,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=6VW7ZPurE7c","annotations":[{"segment":[0,90.209501],"label":"Walking the dog"}]},"BahcBS2r0es":{"duration":200.435,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BahcBS2r0es","annotations":[{"segment":[19.06901872074883,200.401],"label":"Scuba diving"}]},"Qu3_80O0j5w":{"duration":112.06,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Qu3_80O0j5w","annotations":[{"segment":[8.622647151073563,97.64187612303618],"label":"Swimming"}]},"Bs3TR80BRco":{"duration":144.638,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Bs3TR80BRco","annotations":[{"segment":[0.01,144.6368253968254],"label":"Slacklining"}]},"mWOa-0w0Bls":{"duration":125.481,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=mWOa-0w0Bls","annotations":[{"segment":[7.500620858795545,123.52584515786096],"label":"Painting fence"}]},"Hc--KcdMkEA":{"duration":82.39,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Hc--KcdMkEA","annotations":[{"segment":[5.78912368,66.04395744],"label":"Starting a campfire"}]},"hKezMv52Nw8":{"duration":201.94,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hKezMv52Nw8","annotations":[{"segment":[0,201.94383219954648],"label":"Long jump"}]},"06eyqLosXjU":{"duration":227.788,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=06eyqLosXjU","annotations":[{"segment":[7.947709127064464,216.57505950089623],"label":"Playing drums"}]},"A7PBp9PDW80":{"duration":32.021,"subset":"training","resolution":"1920x820","url":"https://www.youtube.com/watch?v=A7PBp9PDW80","annotations":[{"segment":[0.01,27.358109204368173],"label":"Doing fencing"}]},"eTVzSwuCfd8":{"duration":226.859,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eTVzSwuCfd8","annotations":[{"segment":[12.03000624024961,146.12919344773792],"label":"Blow-drying hair"}]},"K2Pws9z20Do":{"duration":228.46,"subset":"validation","resolution":"1444x1080","url":"https://www.youtube.com/watch?v=K2Pws9z20Do","annotations":[{"segment":[12.830859856474259,219.5502686552262],"label":"Zumba"}]},"cnFw7hjmqJY":{"duration":120.51,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cnFw7hjmqJY","annotations":[{"segment":[19.171703588143522,112.02289547581903],"label":"Layup drill in basketball"}]},"MFfJ4V9Dqvg":{"duration":83.407,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MFfJ4V9Dqvg","annotations":[{"segment":[0.01,83.361],"label":"Building sandcastles"}]},"Fp8e9dbWeSg":{"duration":161.495,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fp8e9dbWeSg","annotations":[{"segment":[74.34285189986441,155.2158233383102],"label":"Riding bumper cars"}]},"rbLCMaf3phk":{"duration":215.11,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=rbLCMaf3phk","annotations":[{"segment":[19.962822351014044,164.58204699],"label":"Shot put"},{"segment":[55.86235162090484,74.31538068486739],"label":"Shot put"},{"segment":[102.4981887098284,214.861666],"label":"Shot put"},{"segment":[0.2,214.861666],"label":"Shot put"},{"segment":[140.7462853151326,214.861666],"label":"Shot put"},{"segment":[189.05967050078004,215.061666],"label":"Shot put"}]},"kXho4FVOvjM":{"duration":229.088,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=kXho4FVOvjM","annotations":[]},"lDJpGI4BZ8k":{"duration":204.85,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=lDJpGI4BZ8k","annotations":[{"segment":[52.7295828159657,204.84643990929706],"label":"Playing harmonica"}]},"I3tqWc5wG78":{"duration":60.12,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I3tqWc5wG78","annotations":[]},"uiG7A8lEdEg":{"duration":45.79,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=uiG7A8lEdEg","annotations":[{"segment":[11.858188134328095,30.529815749999997],"label":"Playing lacrosse"}]},"ze4292jVUcQ":{"duration":33.37,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ze4292jVUcQ","annotations":[{"segment":[5.1515311299999995,30.37392682605304],"label":"Playing water polo"}]},"qWgF9hxNbP8":{"duration":17.857,"subset":"testing","resolution":"400x220","url":"https://www.youtube.com/watch?v=qWgF9hxNbP8","annotations":[]},"XBBT8UvESiE":{"duration":142.53,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XBBT8UvESiE","annotations":[{"segment":[100.81000780031201,117.25971138845556],"label":"Shaving"}]},"0DsdxtAvHDk":{"duration":61.58,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=0DsdxtAvHDk","annotations":[]},"ovq0Fqbxt1c":{"duration":42.84,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ovq0Fqbxt1c","annotations":[{"segment":[31.650037441497663,41.26523868954759],"label":"Polishing forniture"}]},"hfUKfKNKwfo":{"duration":212.579,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=hfUKfKNKwfo","annotations":[]},"nwVRSQr7ZhQ":{"duration":202.85,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nwVRSQr7ZhQ","annotations":[]},"nrKWb9lZm1U":{"duration":161.727,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nrKWb9lZm1U","annotations":[]},"J9KjpzKM8oc":{"duration":221.518,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=J9KjpzKM8oc","annotations":[]},"etG65un_AiQ":{"duration":190.473,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=etG65un_AiQ","annotations":[{"segment":[20.501934477379095,164.0171981279251],"label":"Wakeboarding"}]},"8liz3Fu7DGU":{"duration":224.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8liz3Fu7DGU","annotations":[{"segment":[5.598835145085804,215.20522588923558],"label":"Playing lacrosse"}]},"oUWcw4FlGPU":{"duration":141.062,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=oUWcw4FlGPU","annotations":[]},"tbOIyG1RCUI":{"duration":13.793,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tbOIyG1RCUI","annotations":[{"segment":[0,13.792653],"label":"Hitting a pinata"}]},"Rm84DwISnjc":{"duration":144.429,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=Rm84DwISnjc","annotations":[]},"fGU3iPxGRCM":{"duration":75.326,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=fGU3iPxGRCM","annotations":[]},"y3r4IrZkgBo":{"duration":34.16,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=y3r4IrZkgBo","annotations":[{"segment":[0.3996476443057722,7.273587126365054],"label":"Shaving legs"}]},"7LhgGs4TrYA":{"duration":126.55,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=7LhgGs4TrYA","annotations":[{"segment":[29.4241079166653,35.73183382320736],"label":"Playing saxophone"},{"segment":[38.294347472740064,42.43379259890829],"label":"Playing saxophone"}]},"FBL7iWMmTHU":{"duration":215.41,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FBL7iWMmTHU","annotations":[{"segment":[4.3687203338533545,213.73124094851792],"label":"Playing badminton"}]},"p4MMyVArVc0":{"duration":129.8,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=p4MMyVArVc0","annotations":[{"segment":[0.01,125.39520965420493],"label":"Slacklining"}]},"WPK5VeqNSh8":{"duration":145.288,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=WPK5VeqNSh8","annotations":[{"segment":[12.239488024961,23.572347307332297],"label":"Curling"},{"segment":[60.290811382215296,70.49038473634945],"label":"Curling"},{"segment":[73.21027096411858,80.23664371918876],"label":"Curling"},{"segment":[98.36921857098284,123.98148054914198],"label":"Curling"}]},"y1F_Hzpux48":{"duration":107.253,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y1F_Hzpux48","annotations":[{"segment":[6.358210420933845,14.05499145680113],"label":"Ice fishing"},{"segment":[38.48390517933643,39.655154467403186],"label":"Ice fishing"},{"segment":[54.04478857793768,83.66066302652106],"label":"Ice fishing"},{"segment":[96.37708380655226,107.05297],"label":"Ice fishing"}]},"hUynCsek8I0":{"duration":203.291,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hUynCsek8I0","annotations":[{"segment":[49.31059844842773,192.96140936249697],"label":"Ice fishing"}]},"kbgY_qeAD1c":{"duration":34.947,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=kbgY_qeAD1c","annotations":[]},"ALqZpLyQHLQ":{"duration":32.79,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=ALqZpLyQHLQ","annotations":[]},"zdoGEZO5Ldc":{"duration":121.743,"subset":"training","resolution":"1164x720","url":"https://www.youtube.com/watch?v=zdoGEZO5Ldc","annotations":[{"segment":[9.496273184260705,105.59855780897902],"label":"Playing rubik cube"}]},"XCejO7RyWdA":{"duration":54.103,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=XCejO7RyWdA","annotations":[{"segment":[5.7350202808112325,6.9157597503900154],"label":"Disc dog"},{"segment":[10.879670826833074,12.317981279251171],"label":"Disc dog"},{"segment":[15.017812792511702,19.15192979719189],"label":"Disc dog"},{"segment":[21.261173166926678,25.0578112324493],"label":"Disc dog"},{"segment":[26.998315132605306,30.373104524180967],"label":"Disc dog"},{"segment":[33.832263650546025,47.753269890795636],"label":"Disc dog"}]},"rMj2JWNJzkw":{"duration":110.03999999999999,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=rMj2JWNJzkw","annotations":[]},"zvXi08rTq3Y":{"duration":17.741,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zvXi08rTq3Y","annotations":[{"segment":[0.01,16.950837322326226],"label":"Riding bumper cars"}]},"iGxMm7C1q48":{"duration":88.074,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iGxMm7C1q48","annotations":[{"segment":[0.2,66.80912534267914],"label":"Playing ten pins"}]},"yacDrjVTUak":{"duration":234.151,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yacDrjVTUak","annotations":[{"segment":[25.932279251170048,202.7100702028081],"label":"Welding"}]},"0BLpSquuZFM":{"duration":173.36,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0BLpSquuZFM","annotations":[]},"JHITVq5zJOM":{"duration":237.541,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JHITVq5zJOM","annotations":[{"segment":[2.9600016953822026,237.34013605442178],"label":"Playing beach volleyball"}]},"V4_SJyhRGqA":{"duration":129.893,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V4_SJyhRGqA","annotations":[{"segment":[0.01,129.861],"label":"Croquet"}]},"eRHbpYeYtxo":{"duration":15.12,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=eRHbpYeYtxo","annotations":[{"segment":[0,9.126091516380656],"label":"Clean and jerk"}]},"Ey2SmPzJTKM":{"duration":137.811,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ey2SmPzJTKM","annotations":[{"segment":[26.608043613707164,85.6302476635514],"label":"Changing car wheel"}]},"bL0bXUG2QWI":{"duration":107.161,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bL0bXUG2QWI","annotations":[{"segment":[24.32406741167606,77.31877084633385],"label":"Kneeling"},{"segment":[80.99663670280812,90.19130134399376],"label":"Kneeling"},{"segment":[104.73558904914196,106.959637],"label":"Kneeling"}]},"tOPaS3wfoJQ":{"duration":206.472,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tOPaS3wfoJQ","annotations":[]},"r1mrueEHDDE":{"duration":224.68,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r1mrueEHDDE","annotations":[{"segment":[21.73380510186394,218.7402308892356],"label":"Playing flauta"}]},"VD3kzQGuQnc":{"duration":63.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VD3kzQGuQnc","annotations":[{"segment":[22.81533073322933,47.72289703588143],"label":"Mixing drinks"}]},"pX-ik8n_eNQ":{"duration":211.79,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=pX-ik8n_eNQ","annotations":[{"segment":[0.01,206.57873293681746],"label":"Ping-pong"}]},"foFFu7bY5ow":{"duration":138.043,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=foFFu7bY5ow","annotations":[{"segment":[12.059841022212318,13.782675453956935],"label":"Surfing"},{"segment":[31.011019771403102,58.145661716068645],"label":"Surfing"}]},"-rwKyNnz89s":{"duration":177.912,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-rwKyNnz89s","annotations":[{"segment":[0.01,177.9107709750567],"label":"Futsal"}]},"GK_DCUGw5lM":{"duration":41.68,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GK_DCUGw5lM","annotations":[{"segment":[5.002117004680187,12.083035881435256],"label":"Baton twirling"},{"segment":[16.630414976599063,30.792252730109205],"label":"Baton twirling"}]},"Xepkoln9NF4":{"duration":149.82,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Xepkoln9NF4","annotations":[]},"9N1525JmqZQ":{"duration":144.219,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=9N1525JmqZQ","annotations":[{"segment":[48.2605384868456,125.65739246957877],"label":"Laying tile"}]},"BBKSKU6dGGk":{"duration":36.69,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=BBKSKU6dGGk","annotations":[]},"zvAlL20-K4w":{"duration":88.78999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zvAlL20-K4w","annotations":[{"segment":[10.31995027964384,22.78700430874378],"label":"Shot put"},{"segment":[46.058838496397,51.322705029641185],"label":"Shot put"}]},"RQO61KXeBos":{"duration":238.77,"subset":"training","resolution":"528x360","url":"https://www.youtube.com/watch?v=RQO61KXeBos","annotations":[{"segment":[137.24499600000001,216.19172356708268],"label":"Pole vault"},{"segment":[5.772561094383775,124.6689801],"label":"Pole vault"}]},"xg0OCQW8t2s":{"duration":67.129,"subset":"training","resolution":"624x480","url":"https://www.youtube.com/watch?v=xg0OCQW8t2s","annotations":[{"segment":[29.61610452418097,67.081],"label":"Cutting the grass"}]},"Y2gFwWnli4g":{"duration":116.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y2gFwWnli4g","annotations":[{"segment":[40.86364262189535,116.93569160997733],"label":"Preparing salad"}]},"oCicjtc1t9Y":{"duration":88.051,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=oCicjtc1t9Y","annotations":[{"segment":[0.20000000000000004,87.85009070294784],"label":"Raking leaves"}]},"tcUet_leWUE":{"duration":34.74,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=tcUet_leWUE","annotations":[]},"eolJOpzxhx4":{"duration":104.63,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eolJOpzxhx4","annotations":[]},"EFtxSXp1pck":{"duration":220.335,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=EFtxSXp1pck","annotations":[{"segment":[62.888335413416534,208.95945085803433],"label":"Making an omelette"}]},"xpmXZBpYrM0":{"duration":47.07,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=xpmXZBpYrM0","annotations":[{"segment":[18.19221216848674,33.52355226209048],"label":"Shaving legs"}]},"3dWw9GLcOeA":{"duration":56.936,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3dWw9GLcOeA","annotations":[]},"nGKc2t-NMYc":{"duration":30.77,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=nGKc2t-NMYc","annotations":[]},"hvXMCjJcIqI":{"duration":33.69,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hvXMCjJcIqI","annotations":[{"segment":[0,33.692153],"label":"Mowing the lawn"}]},"lUk_dSjmIgM":{"duration":103.376,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lUk_dSjmIgM","annotations":[{"segment":[0.01,103.37523809523809],"label":"Cleaning sink"}]},"gnKpguqHfag":{"duration":217.502,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=gnKpguqHfag","annotations":[{"segment":[6.786312486513066,11.19741560274656],"label":"Playing ice hockey"},{"segment":[19.34099058656224,21.037568708190506],"label":"Playing ice hockey"},{"segment":[67.86312486513066,72.27422798136415],"label":"Playing ice hockey"},{"segment":[81.77506546248244,86.86479982736725],"label":"Playing ice hockey"},{"segment":[90.25795607062378,149.2416848673947],"label":"Playing ice hockey"}]},"-8SgY4vW4xM":{"duration":222.123,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-8SgY4vW4xM","annotations":[]},"M0sa3xWhFGo":{"duration":230.02,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=M0sa3xWhFGo","annotations":[{"segment":[8.610421216848675,212.74915756630264],"label":"Cheerleading"}]},"8qOiZzSzSTY":{"duration":39.27,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=8qOiZzSzSTY","annotations":[]},"mnqSG5o84NE":{"duration":139.82999999999998,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=mnqSG5o84NE","annotations":[{"segment":[1.7451552574102966,133.2862327847114],"label":"Doing kickboxing"}]},"eMB9L-LDK1E":{"duration":129.684,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eMB9L-LDK1E","annotations":[{"segment":[3.2319862108914177,128.2694510514019],"label":"Cleaning sink"}]},"ui7LIgAF8-E":{"duration":20.36,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=ui7LIgAF8-E","annotations":[{"segment":[0,20.316666],"label":"Springboard diving"}]},"cFzo-Zgxk1M":{"duration":99.986,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=cFzo-Zgxk1M","annotations":[{"segment":[82.67100653666147,86.25861625429017],"label":"Playing ten pins"},{"segment":[89.22229384711387,92.65392053354134],"label":"Playing ten pins"}]},"poxJkbrLcHQ":{"duration":177.77,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=poxJkbrLcHQ","annotations":[]},"lt--z8nFIT0":{"duration":193.376,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lt--z8nFIT0","annotations":[{"segment":[0.19999999999999998,11.762120124804992],"label":"Playing ice hockey"},{"segment":[30.159282371294854,46.14370202808112],"label":"Playing ice hockey"},{"segment":[57.00104368174727,72.38227769110765],"label":"Playing ice hockey"},{"segment":[83.23961934477379,115.20845865834633],"label":"Playing ice hockey"},{"segment":[127.87535725429017,136.31995631825274],"label":"Playing ice hockey"},{"segment":[144.77953198127926,159.86073322932918],"label":"Playing ice hockey"},{"segment":[169.51270202808112,190.92800780031203],"label":"Playing ice hockey"}]},"VYuQAfG0gKw":{"duration":25.73,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=VYuQAfG0gKw","annotations":[{"segment":[0.01,20.16786739469579],"label":"Brushing hair"}]},"N2Eo_C-Ng4k":{"duration":212.765,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N2Eo_C-Ng4k","annotations":[]},"V_Nj_BhesEY":{"duration":83.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V_Nj_BhesEY","annotations":[{"segment":[20.106633272133607,76.01604579013093],"label":"Doing karate"}]},"ir759AX1EYY":{"duration":108.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ir759AX1EYY","annotations":[{"segment":[13.346574665626276,100.0533921188973],"label":"Polishing shoes"}]},"1P_BTFhNZFo":{"duration":140.969,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1P_BTFhNZFo","annotations":[{"segment":[0.2,106.86010296411857],"label":"Baton twirling"},{"segment":[110.59800780031202,126.64901092043682],"label":"Baton twirling"}]},"jwv7eQpeGW4":{"duration":180.95,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=jwv7eQpeGW4","annotations":[{"segment":[22.84440093603744,166.13383619344773],"label":"Triple jump"}]},"zPfyhY0J2yc":{"duration":87.52,"subset":"testing","resolution":"640x358","url":"https://www.youtube.com/watch?v=zPfyhY0J2yc","annotations":[]},"rVqeQ9D7EWo":{"duration":232.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rVqeQ9D7EWo","annotations":[{"segment":[39.90580343213728,162.88823400936036],"label":"Shoveling snow"}]},"3kxP8qOtym8":{"duration":26.26,"subset":"testing","resolution":"432x720","url":"https://www.youtube.com/watch?v=3kxP8qOtym8","annotations":[]},"M0mLgp8VxpY":{"duration":53.755,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M0mLgp8VxpY","annotations":[{"segment":[24.12196140043312,26.96699878361069],"label":"Shuffleboard"},{"segment":[31.652942708844336,38.93289130697517],"label":"Shuffleboard"}]},"Ka0Rtjfi9fI":{"duration":100.775,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ka0Rtjfi9fI","annotations":[{"segment":[6.4323380062305295,50.04672741433022],"label":"Trimming branches or hedges"}]},"W3TQnn0q9kc":{"duration":98.105,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W3TQnn0q9kc","annotations":[{"segment":[18.201059282371297,39.163207488299534],"label":"Waterskiing"},{"segment":[53.23748517940719,72.66610764430578],"label":"Waterskiing"},{"segment":[75.11380811232449,85.51653510140406],"label":"Waterskiing"},{"segment":[87.81125429017162,97.75503744149766],"label":"Waterskiing"}]},"RgiVa9GMYxE":{"duration":23.66,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=RgiVa9GMYxE","annotations":[]},"a2k-AgrAppg":{"duration":63.368,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=a2k-AgrAppg","annotations":[{"segment":[19.414227856221544,61.29771639672227],"label":"Having an ice cream"}]},"p0_buoe9M7I":{"duration":24.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=p0_buoe9M7I","annotations":[{"segment":[0.6178463650546022,23.47816187207488],"label":"Getting a tattoo"}]},"3WyJNIqDFQg":{"duration":66.781,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3WyJNIqDFQg","annotations":[{"segment":[25.397697347893917,39.137435257410296],"label":"Kneeling"}]},"VbWomrZXIus":{"duration":129.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VbWomrZXIus","annotations":[{"segment":[15.575409454473418,115.50076361693924],"label":"Skateboarding"}]},"Z7zc7zs5Zj8":{"duration":236.82,"subset":"training","resolution":"352x262","url":"https://www.youtube.com/watch?v=Z7zc7zs5Zj8","annotations":[{"segment":[5.577111,232.7563168174727],"label":"Belly dance"}]},"e142-8fV9OU":{"duration":117.61,"subset":"testing","resolution":"656x480","url":"https://www.youtube.com/watch?v=e142-8fV9OU","annotations":[]},"xoETvVbyiBA":{"duration":58.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xoETvVbyiBA","annotations":[{"segment":[12.76171974,31.737724829999998],"label":"Using parallel bars"},{"segment":[38.16433404,52.47664572542902],"label":"Using parallel bars"}]},"RTIzzeNaH2Q":{"duration":156.782,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=RTIzzeNaH2Q","annotations":[{"segment":[63.57669266770671,153.33720280811232],"label":"Assembling bicycle"}]},"drVqGhjI558":{"duration":128.731,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=drVqGhjI558","annotations":[{"segment":[0.2,9.037307778025406],"label":"Peeling potatoes"},{"segment":[27.11192333407622,35.14508580343214],"label":"Peeling potatoes"},{"segment":[49.203120124805,53.018872297749056],"label":"Peeling potatoes"},{"segment":[62.257009137508355,117.68582965366615],"label":"Peeling potatoes"}]},"_Wag6CT_0j8":{"duration":232.5,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=_Wag6CT_0j8","annotations":[{"segment":[14.871384655226208,103.0115424898596],"label":"Playing field hockey"}]},"gsJ953MHtpY":{"duration":214.855,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=gsJ953MHtpY","annotations":[{"segment":[0.01,206.44264586583463],"label":"Powerbocking"}]},"E4n0KcS_zgI":{"duration":63.554,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=E4n0KcS_zgI","annotations":[{"segment":[4.759040190179036,50.46565386271451],"label":"Playing congas"}]},"zNpILhjhFbQ":{"duration":24.265,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zNpILhjhFbQ","annotations":[{"segment":[4.696977301028797,18.227844058358272],"label":"Doing crunches"}]},"x0tjkH_zfXA":{"duration":45.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x0tjkH_zfXA","annotations":[{"segment":[0.19999999999999998,41.13246432917317],"label":"Raking leaves"}]},"O_tZAD_opA4":{"duration":74.24,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O_tZAD_opA4","annotations":[{"segment":[3.0110593920355457,4.632399064670071],"label":"Tumbling"},{"segment":[20.266745907931558,53.38839922032256],"label":"Tumbling"},{"segment":[63.46386717628705,67.1697864274571],"label":"Tumbling"}]},"_aEA1UuTai8":{"duration":109.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_aEA1UuTai8","annotations":[{"segment":[10.7228751950078,108.59038689547582],"label":"Using the balance beam"}]},"Il0IelOoXa4":{"duration":107.322,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Il0IelOoXa4","annotations":[{"segment":[0,100.29014481279252],"label":"Canoeing"}]},"7VbXyfF3kEw":{"duration":137.648,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7VbXyfF3kEw","annotations":[{"segment":[28.130848270665517,114.24130735101403],"label":"Playing rubik cube"}]},"E36gtCVNsSc":{"duration":116.379,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=E36gtCVNsSc","annotations":[]},"8rHY_Vt2FLA":{"duration":129.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8rHY_Vt2FLA","annotations":[{"segment":[30.75316142583116,101.84738161841655],"label":"Changing car wheel"}]},"gtzg66XV6E0":{"duration":156.2,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=gtzg66XV6E0","annotations":[{"segment":[0,156.2006349206349],"label":"Clipping cat claws"}]},"SwQbnQX7LCQ":{"duration":158.268,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SwQbnQX7LCQ","annotations":[{"segment":[23.69312948517941,139.70840249609986],"label":"Hand car wash"}]},"2Q0WGQbJbso":{"duration":128.27,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2Q0WGQbJbso","annotations":[{"segment":[9.098107100624027,74.68446158424338],"label":"Playing violin"}]},"Fvm9BuMz0yE":{"duration":127.71,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fvm9BuMz0yE","annotations":[]},"6Eh7B3GnQWI":{"duration":69.1,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=6Eh7B3GnQWI","annotations":[{"segment":[3.6653488361934476,57.89095073634945],"label":"Playing accordion"}]},"2iBb6ArHweI":{"duration":101.402,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2iBb6ArHweI","annotations":[{"segment":[61.129105304212175,76.78701326053041],"label":"Applying sunscreen"}]},"J-CUMDsQqwE":{"duration":223.4,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=J-CUMDsQqwE","annotations":[]},"nSsll1A6Gxw":{"duration":226.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nSsll1A6Gxw","annotations":[{"segment":[39.61403957110666,209.03479699843993],"label":"Playing piano"}]},"B_NQ_U2JEp4":{"duration":142.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B_NQ_U2JEp4","annotations":[{"segment":[60.7541956848674,132.6701525001927],"label":"Wrapping presents"}]},"baSx0q9LKg0":{"duration":236.217,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=baSx0q9LKg0","annotations":[{"segment":[12.158054602184087,205.96751794071764],"label":"Making a cake"}]},"s5QkiA-w5YE":{"duration":158.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s5QkiA-w5YE","annotations":[{"segment":[9.863194201237437,150.9068702527301],"label":"Kayaking"}]},"xYu5luMTycc":{"duration":116.47,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xYu5luMTycc","annotations":[{"segment":[4.038961468170736,12.561994178451112],"label":"Sailing"},{"segment":[34.68561100088102,65.51360165508663],"label":"Sailing"},{"segment":[68.77774184200251,83.28503156162868],"label":"Sailing"},{"segment":[91.4453820289184,99.24305025321748],"label":"Sailing"},{"segment":[105.22730726256326,109.21681193546046],"label":"Sailing"}]},"EfjzkyLrnDg":{"duration":13.89,"subset":"validation","resolution":"384x216","url":"https://www.youtube.com/watch?v=EfjzkyLrnDg","annotations":[{"segment":[5.112301585023401,9.661383503900156],"label":"Drinking beer"}]},"firp_OhUMPc":{"duration":194.189,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=firp_OhUMPc","annotations":[{"segment":[15.503291628579115,183.94186243007337],"label":"Disc dog"}]},"Ol7JKNItQC4":{"duration":223.66,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ol7JKNItQC4","annotations":[{"segment":[23.82138754,183.28892608],"label":"Zumba"}]},"2syOa4jpZDU":{"duration":114.289,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2syOa4jpZDU","annotations":[{"segment":[0.01,114.241],"label":"Clipping cat claws"}]},"18qVzfQ9VAs":{"duration":70.87,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=18qVzfQ9VAs","annotations":[{"segment":[7.669612324492979,64.83305382215288],"label":"Cumbia"}]},"wN2XnDS0aGc":{"duration":23.27,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wN2XnDS0aGc","annotations":[{"segment":[1.2438003599999998,21.16116555226209],"label":"High jump"}]},"I6IfZiNmlWA":{"duration":41.169,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=I6IfZiNmlWA","annotations":[{"segment":[3.725111981279251,38.664093322932914],"label":"Hurling"}]},"w1FFMG52FZE":{"duration":224.816,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w1FFMG52FZE","annotations":[{"segment":[23.557537276383027,213.35488187839206],"label":"Hula hoop"}]},"PwpGXy4U1Bo":{"duration":188.78,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PwpGXy4U1Bo","annotations":[]},"gWbIoFrpwTY":{"duration":118.77000000000001,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=gWbIoFrpwTY","annotations":[]},"L1lXij7Fyvo":{"duration":107.137,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L1lXij7Fyvo","annotations":[{"segment":[12.57852891498934,93.82181084645352],"label":"Slacklining"}]},"2OokWbQ3VQE":{"duration":31.09,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=2OokWbQ3VQE","annotations":[]},"grvzIljsqcI":{"duration":214.04,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=grvzIljsqcI","annotations":[{"segment":[27.04737088923557,212.70586736349455],"label":"Smoking a cigarette"}]},"QQNW2ha8WIs":{"duration":47.067,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QQNW2ha8WIs","annotations":[{"segment":[5.598025656976432,12.485648709935933],"label":"Doing a powerbomb"},{"segment":[15.41655213672721,23.256718803393877],"label":"Doing a powerbomb"},{"segment":[26.041077058845588,36.07942129560572],"label":"Doing a powerbomb"},{"segment":[39.23014247940634,42.38086366320696],"label":"Doing a powerbomb"}]},"N6ERAg1EKcc":{"duration":24.312,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N6ERAg1EKcc","annotations":[{"segment":[5.196017277425791,8.723240140405617],"label":"Hitting a pinata"}]},"hRk-3fep5WQ":{"duration":48.69,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=hRk-3fep5WQ","annotations":[{"segment":[0,48.4917006802721],"label":"Brushing hair"}]},"eLVaojWbwHE":{"duration":209.0,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=eLVaojWbwHE","annotations":[{"segment":[134.92675485491418,189.02782081123246],"label":"Painting"},{"segment":[34.872373839313575,134.21272432],"label":"Painting"}]},"R-lGXNY19wE":{"duration":77.184,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R-lGXNY19wE","annotations":[{"segment":[12.288816990730547,63.4922927766485],"label":"Applying sunscreen"}]},"iN1DEIADG9o":{"duration":137.532,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iN1DEIADG9o","annotations":[{"segment":[0.2,125.94498464118566],"label":"Disc dog"}]},"g0jF_Gh8c8g":{"duration":183.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g0jF_Gh8c8g","annotations":[{"segment":[88.8703744149766,156.83027613104525],"label":"Ice fishing"}]},"aKfUE-KeYWw":{"duration":42.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aKfUE-KeYWw","annotations":[{"segment":[0.32801354134165367,41.32970620904836],"label":"Mowing the lawn"}]},"AbzyaqyFI0Y":{"duration":34.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AbzyaqyFI0Y","annotations":[{"segment":[6.302315611544461,27.4177662074883],"label":"Kayaking"}]},"hYj38k-VOFM":{"duration":41.239,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hYj38k-VOFM","annotations":[{"segment":[0.2,9.006864274570983],"label":"Raking leaves"},{"segment":[22.967503900156004,24.38286744149766],"label":"Raking leaves"}]},"sax2PjbeLdk":{"duration":23.128,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sax2PjbeLdk","annotations":[{"segment":[1.9461060842433697,5.2256552262090485],"label":"Putting on shoes"}]},"hHxEBnhLsXs":{"duration":122.555,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hHxEBnhLsXs","annotations":[{"segment":[0.01,108.37660998439938],"label":"Wakeboarding"}]},"ScKbopywnvM":{"duration":51.595,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ScKbopywnvM","annotations":[{"segment":[6.519772023556942,50.62884694836193],"label":"Slacklining"}]},"ynUBEoobKW0":{"duration":40.8,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ynUBEoobKW0","annotations":[{"segment":[11.010858669266772,35.00562004680188],"label":"Using the balance beam"}]},"hltWAq_Odxk":{"duration":9.335,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=hltWAq_Odxk","annotations":[{"segment":[0.010000000000000002,9.301],"label":"Doing a powerbomb"}]},"giSSBgCi16c":{"duration":153.86,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=giSSBgCi16c","annotations":[]},"SN_YS2w3JhE":{"duration":114.428,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SN_YS2w3JhE","annotations":[{"segment":[31.130132192846034,83.6212597200622],"label":"Playing beach volleyball"}]},"xKDHIyd_tWA":{"duration":111.015,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xKDHIyd_tWA","annotations":[{"segment":[17.145773859594385,84.17016258346334],"label":"Rope skipping"}]},"87F9xGaMwCw":{"duration":126.805,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=87F9xGaMwCw","annotations":[]},"zdMvd5Cr5jM":{"duration":106.53,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zdMvd5Cr5jM","annotations":[{"segment":[41.050751095163804,99.3860289672387],"label":"Cheerleading"}]},"Zo7oziWT-7o":{"duration":183.833,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zo7oziWT-7o","annotations":[{"segment":[0.20000000000000004,174.31957566302654],"label":"Slacklining"}]},"h3uTJ7HknzA":{"duration":83.453,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h3uTJ7HknzA","annotations":[{"segment":[0.01,4.426487737060502],"label":"Capoeira"},{"segment":[13.800226474365095,25.38720908019994],"label":"Capoeira"},{"segment":[33.84961210693325,40.879915706708275],"label":"Capoeira"},{"segment":[45.95735746645866,61.18968274570984],"label":"Capoeira"}]},"YcAs1-6SYb4":{"duration":233.9,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=YcAs1-6SYb4","annotations":[{"segment":[0.01,102.51064118564743],"label":"Hammer throw"},{"segment":[129.14151950078002,182.03846957878318],"label":"Hammer throw"}]},"Kw2z2wqn6RM":{"duration":86.94,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Kw2z2wqn6RM","annotations":[]},"ZWLF3bFfkac":{"duration":167.23,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ZWLF3bFfkac","annotations":[]},"s_gb6Xi_J5g":{"duration":12.586,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=s_gb6Xi_J5g","annotations":[{"segment":[0.2,12.385215419501135],"label":"Using the monkey bar"}]},"0PyY_iHBZHk":{"duration":105.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0PyY_iHBZHk","annotations":[{"segment":[65.31524180967239,99.85538221528861],"label":"Playing water polo"}]},"eTIaRqgQ4Qg":{"duration":87.284,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eTIaRqgQ4Qg","annotations":[{"segment":[12.110248049921998,74.44746801872076],"label":"Wakeboarding"}]},"P3kWD8Oocio":{"duration":20.527,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=P3kWD8Oocio","annotations":[{"segment":[4.628823442674115,6.288814887700485],"label":"Fun sliding down"}]},"J_SD_hhGET8":{"duration":234.92000000000002,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=J_SD_hhGET8","annotations":[{"segment":[33.3432422823713,194.56331485647428],"label":"Belly dance"}]},"EQajiMQAW74":{"duration":37.83,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=EQajiMQAW74","annotations":[{"segment":[0.01,37.825305],"label":"Getting a haircut"}]},"yR7HKCyK0lg":{"duration":209.979,"subset":"testing","resolution":"1252x720","url":"https://www.youtube.com/watch?v=yR7HKCyK0lg","annotations":[]},"eLzYgo71GcY":{"duration":171.74,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eLzYgo71GcY","annotations":[]},"Tm1ebIrDyz0":{"duration":52.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Tm1ebIrDyz0","annotations":[{"segment":[6.81302567204729,12.641035584280514],"label":"High jump"},{"segment":[13.297712757489892,21.424092775955938],"label":"High jump"},{"segment":[22.32702388911883,45.22864030479587],"label":"High jump"}]},"Aqq8S79ZAhk":{"duration":59.792,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Aqq8S79ZAhk","annotations":[{"segment":[0.2,43.373926146645864],"label":"Laying tile"}]},"IqXaLlFSWwc":{"duration":13.35,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IqXaLlFSWwc","annotations":[{"segment":[4.51992109828393,12.97654766926677],"label":"High jump"}]},"ekn9AFX2XX8":{"duration":67.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ekn9AFX2XX8","annotations":[{"segment":[7.353488179320255,24.379319816391906],"label":"Applying sunscreen"}]},"4fw4nswO748":{"duration":64.088,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4fw4nswO748","annotations":[{"segment":[0.2,10.693489859594386],"label":"Snowboarding"},{"segment":[14.790995319812795,40.475358814352575],"label":"Snowboarding"},{"segment":[43.373594383775355,56.66550234009361],"label":"Snowboarding"}]},"E0xXymnjDkc":{"duration":56.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=E0xXymnjDkc","annotations":[{"segment":[11.09932449297972,56.001],"label":"Volleyball"}]},"88wc2an-eC4":{"duration":123.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=88wc2an-eC4","annotations":[{"segment":[5.105983619344774,118.78637363494539],"label":"Doing kickboxing"}]},"h3GLEcwSvD4":{"duration":64.3,"subset":"training","resolution":"482x360","url":"https://www.youtube.com/watch?v=h3GLEcwSvD4","annotations":[{"segment":[12.939459912636506,55.56946350078003],"label":"Tai chi"}]},"ZhUC4qTGdHY":{"duration":164.282,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=ZhUC4qTGdHY","annotations":[{"segment":[37.93075586969057,76.11780063039255],"label":"Waterskiing"},{"segment":[92.77657854613504,117.38031126989081],"label":"Waterskiing"}]},"HdOylJP08Lk":{"duration":183.508,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HdOylJP08Lk","annotations":[{"segment":[68.42159918777703,93.90077210707474],"label":"Playing blackjack"},{"segment":[123.67418725741031,126.82329850702028],"label":"Playing blackjack"},{"segment":[133.40780384711388,137.4157636193448],"label":"Playing blackjack"},{"segment":[150.584774299532,177.20907850078004],"label":"Playing blackjack"}]},"wYIeRRTRtX8":{"duration":15.37,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=wYIeRRTRtX8","annotations":[{"segment":[9.808094353706123,13.045484910552887],"label":"Springboard diving"}]},"vycd0CJTwoA":{"duration":111.805,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vycd0CJTwoA","annotations":[{"segment":[3.488426884014136,90.00141360756471],"label":"Longboarding"},{"segment":[93.83868306864274,103.25743564430577],"label":"Longboarding"}]},"dJVTlZWqYOs":{"duration":71.889,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=dJVTlZWqYOs","annotations":[{"segment":[0.01,71.888978],"label":"Playing ten pins"}]},"d3RF0qC6RJs":{"duration":172.478,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d3RF0qC6RJs","annotations":[{"segment":[0.01,172.4776643],"label":"Disc dog"}]},"AwgGYaV1lT0":{"duration":3.11,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=AwgGYaV1lT0","annotations":[]},"vopKTwCiHrA":{"duration":162.006,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vopKTwCiHrA","annotations":[{"segment":[0.01,161.961],"label":"Making a cake"}]},"FU0EPNGKsv8":{"duration":58.05,"subset":"validation","resolution":"576x360","url":"https://www.youtube.com/watch?v=FU0EPNGKsv8","annotations":[{"segment":[17.20667418096724,51.438899656786276],"label":"Layup drill in basketball"}]},"8B4M8AsGZmQ":{"duration":35.063,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8B4M8AsGZmQ","annotations":[{"segment":[15.976844454546738,31.57198941212938],"label":"Wakeboarding"}]},"nAkA1HfRwF8":{"duration":42.377,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nAkA1HfRwF8","annotations":[{"segment":[7.710768450061092,34.072373426732945],"label":"Capoeira"}]},"oP77DgsbhKQ":{"duration":214.599,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oP77DgsbhKQ","annotations":[{"segment":[0.01,214.575599],"label":"Laying tile"}]},"vBKIXqRd-eA":{"duration":233.748,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vBKIXqRd-eA","annotations":[{"segment":[29.544367537966824,110.1530740304442],"label":"Calf roping"}]},"X9Y-b-SbkIU":{"duration":71.12,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=X9Y-b-SbkIU","annotations":[]},"9sbiZgFefA4":{"duration":131.286,"subset":"training","resolution":"324x240","url":"https://www.youtube.com/watch?v=9sbiZgFefA4","annotations":[{"segment":[10.646695787831515,115.49329797191888],"label":"Rollerblading"}]},"80spFFIdnC4":{"duration":119.37,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=80spFFIdnC4","annotations":[{"segment":[2.467925193070845,34.63546900810465],"label":"Sailing"},{"segment":[81.86411715272074,110.49880933488956],"label":"Sailing"}]},"fk_hkHmnmJo":{"duration":60.326,"subset":"validation","resolution":"1920x818","url":"https://www.youtube.com/watch?v=fk_hkHmnmJo","annotations":[{"segment":[4.041127925117005,60.241],"label":"Applying sunscreen"}]},"PYNTOqgOXWc":{"duration":63.925,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PYNTOqgOXWc","annotations":[{"segment":[15.342580343213728,36.27563806552262],"label":"Mooping floor"},{"segment":[50.32746489859595,56.605940717628705],"label":"Mooping floor"}]},"sRN_crwj3B4":{"duration":225.443,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=sRN_crwj3B4","annotations":[{"segment":[8.791770670826834,225.401],"label":"Hanging wallpaper"}]},"V6s9eMtUnME":{"duration":103.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V6s9eMtUnME","annotations":[{"segment":[31.6868551099902,86.45984732182819],"label":"Cleaning windows"}]},"rLCwdyMr8bQ":{"duration":63.39,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=rLCwdyMr8bQ","annotations":[]},"NbHdPDi4L_g":{"duration":164.584,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NbHdPDi4L_g","annotations":[]},"RkCSfz8GeNg":{"duration":154.018,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RkCSfz8GeNg","annotations":[{"segment":[52.14024475195007,117.25548128549141],"label":"Blow-drying hair"}]},"AyYpsQ4xOo8":{"duration":195.4,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AyYpsQ4xOo8","annotations":[{"segment":[58.10452418096724,157.29218903588142],"label":"Getting a tattoo"}]},"UJwWjTvDEpQ":{"duration":131.05,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=UJwWjTvDEpQ","annotations":[{"segment":[2.097003369988427,130.08843050550246],"label":"Playing guitarra"}]},"3jEnu5mEYA4":{"duration":176.84,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3jEnu5mEYA4","annotations":[{"segment":[118.76908996359855,152.1513428918109],"label":"Preparing pasta"}]},"tNY0Isg-K18":{"duration":50.109,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tNY0Isg-K18","annotations":[]},"NnEUVHhtLZs":{"duration":138.21,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NnEUVHhtLZs","annotations":[{"segment":[57.99873751684761,109.31360565443025],"label":"Playing harmonica"}]},"KnF56TCKPYw":{"duration":226.72,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KnF56TCKPYw","annotations":[{"segment":[0.01,190.6425654359508],"label":"Walking the dog"}]},"3-gD02qDjPI":{"duration":103.35,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=3-gD02qDjPI","annotations":[]},"b1D6v3kZrHM":{"duration":161.96,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b1D6v3kZrHM","annotations":[{"segment":[28.852995850000003,82.11659078608042],"label":"Archery"},{"segment":[93.9919131459136,129.36521379222518],"label":"Archery"}]},"GFVqcC1q9wM":{"duration":216.155,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GFVqcC1q9wM","annotations":[]},"OwchMqCYaF4":{"duration":169.34,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OwchMqCYaF4","annotations":[{"segment":[0,16.115278529508526],"label":"Cheerleading"},{"segment":[36.72170025576533,145.8300605803432],"label":"Cheerleading"}]},"opaWopwV74I":{"duration":203.5,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=opaWopwV74I","annotations":[{"segment":[0.01,48.57324715845777],"label":"Hand washing clothes"},{"segment":[80.95541193076295,180.6416838768789],"label":"Hand washing clothes"}]},"nbOsU2Vmp1U":{"duration":206.1,"subset":"testing","resolution":"426x238","url":"https://www.youtube.com/watch?v=nbOsU2Vmp1U","annotations":[]},"iebP0qmPz1k":{"duration":188.48,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iebP0qmPz1k","annotations":[]},"JV2pXP8ODcI":{"duration":192.91,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JV2pXP8ODcI","annotations":[]},"tB_B3HVdO2I":{"duration":9.474,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=tB_B3HVdO2I","annotations":[{"segment":[0.19999999999999998,9.309000000000001],"label":"Using the monkey bar"}]},"tl2RLYJUu3k":{"duration":45.79,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tl2RLYJUu3k","annotations":[{"segment":[0.01,43.3694344773791],"label":"Doing karate"}]},"DmTI8y6Anvo":{"duration":37.45,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=DmTI8y6Anvo","annotations":[{"segment":[6.534964118564743,16.220714508580343],"label":"Layup drill in basketball"}]},"rkgeHZjFskY":{"duration":173.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rkgeHZjFskY","annotations":[{"segment":[12.445823242453507,159.36064953042123],"label":"Braiding hair"}]},"BrgYIg6UXhU":{"duration":267.029,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=BrgYIg6UXhU","annotations":[{"segment":[0.01,266.981],"label":"Removing ice from car"}]},"LkA7hS3FBkQ":{"duration":55.566,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LkA7hS3FBkQ","annotations":[{"segment":[0.2,55.340999999999994],"label":"Changing car wheel"}]},"Bnw54GNBKlU":{"duration":154.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Bnw54GNBKlU","annotations":[{"segment":[62.019665279251164,99.51992800624024],"label":"Hopscotch"},{"segment":[107.69306219032761,148.07796051170047],"label":"Hopscotch"}]},"59mz2Nd8GM0":{"duration":222.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=59mz2Nd8GM0","annotations":[{"segment":[3.84967297,190.3167508853354],"label":"Making a sandwich"}]},"tXNycNfALsE":{"duration":133.38,"subset":"training","resolution":"628x352","url":"https://www.youtube.com/watch?v=tXNycNfALsE","annotations":[{"segment":[47.85694157017981,128.58951989391576],"label":"Using the balance beam"}]},"yjazHd6a5SQ":{"duration":155.34,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yjazHd6a5SQ","annotations":[{"segment":[50.64957492721477,92.81716362259934],"label":"Mixing drinks"}]},"aGvpDyMNATI":{"duration":4.3,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=aGvpDyMNATI","annotations":[]},"LvNPoXWFHWQ":{"duration":108.74000000000001,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=LvNPoXWFHWQ","annotations":[]},"FWbCX1wBVoE":{"duration":168.67,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=FWbCX1wBVoE","annotations":[{"segment":[81.01264898595943,108.36756942277692],"label":"Mooping floor"}]},"FWPJWq-uhUw":{"duration":117.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FWPJWq-uhUw","annotations":[{"segment":[54.038231522620904,55.87626660842434],"label":"Long jump"},{"segment":[75.35943851794072,78.6679016723869],"label":"Long jump"},{"segment":[80.68974026677067,83.44679289547582],"label":"Long jump"}]},"W_hux-Z6Ll0":{"duration":155.667,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W_hux-Z6Ll0","annotations":[{"segment":[0.01,0.02],"label":"Skiing"}]},"vD9oh7NZ2PA":{"duration":119.421,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=vD9oh7NZ2PA","annotations":[{"segment":[13.600119427906368,17.326179545140988],"label":"Canoeing"},{"segment":[100.78992553198128,106.00640966302653],"label":"Canoeing"}]},"HtuDZLsOK6M":{"duration":125.88,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HtuDZLsOK6M","annotations":[{"segment":[16.10262181903276,19.244596808112323],"label":"Sailing"},{"segment":[38.68556705304212,45.36226390483619],"label":"Sailing"}]},"bqDxblFvgro":{"duration":185.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bqDxblFvgro","annotations":[{"segment":[4.920744149765991,168.19179563182527],"label":"Plataform diving"}]},"PGNwd5bySwI":{"duration":94.459,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PGNwd5bySwI","annotations":[{"segment":[22.071996879875194,78.59297035881436],"label":"Wakeboarding"}]},"P2hrv6QzDPI":{"duration":173.9,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=P2hrv6QzDPI","annotations":[{"segment":[0.01,173.8942403628118],"label":"Playing saxophone"}]},"0EdDWY0Zuqw":{"duration":179.93,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=0EdDWY0Zuqw","annotations":[{"segment":[0.01,179.088683852116],"label":"Playing saxophone"}]},"WUTHTWQb-5g":{"duration":15.09,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=WUTHTWQb-5g","annotations":[{"segment":[0.2,14.861],"label":"Playing field hockey"}]},"siKl-1EFFaQ":{"duration":10.449,"subset":"testing","resolution":"293x240","url":"https://www.youtube.com/watch?v=siKl-1EFFaQ","annotations":[]},"eyWerZITH1Y":{"duration":39.43,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=eyWerZITH1Y","annotations":[{"segment":[1.9067893010920436,36.536543382215285],"label":"Using the pommel horse"}]},"-HpCLXdtcas":{"duration":23.34,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=-HpCLXdtcas","annotations":[]},"m2DOej6tPNs":{"duration":206.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m2DOej6tPNs","annotations":[{"segment":[0.01,206.35573696145124],"label":"Doing step aerobics"}]},"8tI9IsSpgeI":{"duration":21.69,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=8tI9IsSpgeI","annotations":[{"segment":[6.824682030000001,17.683146060000002],"label":"Clean and jerk"}]},"633ZdPm_GjM":{"duration":70.891,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=633ZdPm_GjM","annotations":[{"segment":[0.01,70.24289798773412],"label":"Making an omelette"}]},"_z3MXy3Wc-k":{"duration":88.38,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=_z3MXy3Wc-k","annotations":[]},"SqfB4ohs2Ro":{"duration":237.169,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=SqfB4ohs2Ro","annotations":[{"segment":[32.18981202028081,48.469716950077995],"label":"Fixing the roof"},{"segment":[56.609669414976594,73.25957218408736],"label":"Fixing the roof"},{"segment":[74.73956354134165,97.30943173946957],"label":"Fixing the roof"},{"segment":[96.93943390015599,154.28909899375975],"label":"Fixing the roof"},{"segment":[159.83906658346334,213.48875328393135],"label":"Fixing the roof"}]},"lXueRLwe3tk":{"duration":151.487,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=lXueRLwe3tk","annotations":[{"segment":[29.524375975039003,62.827872074883],"label":"Hand car wash"},{"segment":[74.4014274570983,79.83391263650546],"label":"Hand car wash"}]},"2DCHbuwZNSs":{"duration":137.997,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2DCHbuwZNSs","annotations":[]},"srR0M_HL3Ao":{"duration":154.67000000000002,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=srR0M_HL3Ao","annotations":[{"segment":[50.166939157566304,54.26712168486739],"label":"Playing polo"},{"segment":[84.17433541341654,86.58620748829954],"label":"Playing polo"},{"segment":[107.56949453978159,121.31716536661466],"label":"Playing polo"}]},"MOvLBw1EzmI":{"duration":37.942,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=MOvLBw1EzmI","annotations":[{"segment":[0.19999999999999998,37.711],"label":"Removing ice from car"}]},"_icW_MAe7Po":{"duration":62.323,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_icW_MAe7Po","annotations":[{"segment":[7.389236823712949,54.544234975039],"label":"Futsal"}]},"3idPvL1koOA":{"duration":31.046,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3idPvL1koOA","annotations":[]},"9PRK4oy1hhY":{"duration":38.406,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=9PRK4oy1hhY","annotations":[{"segment":[18.843131639551295,38.405239095238095],"label":"Table soccer"}]},"UsexiHSzIQI":{"duration":91.35,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UsexiHSzIQI","annotations":[{"segment":[14.813104524180968,32.47488299531981],"label":"Spinning"},{"segment":[41.30577223088923,46.860686427457104],"label":"Spinning"},{"segment":[66.51653666146646,83.18127925117004],"label":"Spinning"}]},"-l5e1zVzQOQ":{"duration":138.786,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-l5e1zVzQOQ","annotations":[{"segment":[0.2,70.1280561622465],"label":"Playing drums"},{"segment":[78.78584087363495,138.761],"label":"Playing drums"}]},"nhWNiPWwH0I":{"duration":234.17000000000002,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nhWNiPWwH0I","annotations":[]},"K6Tm5xHkJ5c":{"duration":114.64,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K6Tm5xHkJ5c","annotations":[{"segment":[25.81087088455538,50.817943021840875],"label":"Playing accordion"},{"segment":[52.78278440405616,110.6562942074883],"label":"Playing accordion"}]},"eM-l2d8e-CA":{"duration":186.596,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eM-l2d8e-CA","annotations":[{"segment":[31.429684867394695,61.11327613104524],"label":"Curling"},{"segment":[65.1874945397816,90.50585179407176],"label":"Curling"},{"segment":[92.25194539781592,126.0097550702028],"label":"Curling"},{"segment":[137.06834789391576,142.03083931357256],"label":"Curling"},{"segment":[148.1428221528861,159.49364742589705],"label":"Curling"}]},"MWQ9mb_mB8U":{"duration":85.01,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=MWQ9mb_mB8U","annotations":[]},"SlgBcsCTy58":{"duration":135.141,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SlgBcsCTy58","annotations":[{"segment":[0.01,110.89474515444617],"label":"Grooming dog"}]},"z_ExqQ80T5g":{"duration":114.55,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=z_ExqQ80T5g","annotations":[{"segment":[14.3065456,103.36999419656786],"label":"Sailing"}]},"bXJMw4gLmGM":{"duration":159.452,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=bXJMw4gLmGM","annotations":[{"segment":[18.966835892828826,147.5639667339503],"label":"Waxing skis"}]},"Feq-2NIDl94":{"duration":175.87,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Feq-2NIDl94","annotations":[]},"DK271OQnNRA":{"duration":31.208,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DK271OQnNRA","annotations":[{"segment":[0.01,31.161],"label":"River tubing"}]},"FkMk9TZdea0":{"duration":210.3,"subset":"training","resolution":"2560x1080","url":"https://www.youtube.com/watch?v=FkMk9TZdea0","annotations":[{"segment":[5.738325514820593,205.1041491154446],"label":"Painting"}]},"1wqSUOGmpBs":{"duration":129.73,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=1wqSUOGmpBs","annotations":[{"segment":[3.833383928902043,112.98403362438421],"label":"Sumo"}]},"ipmWSYmt0_Y":{"duration":148.0,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ipmWSYmt0_Y","annotations":[]},"sYphlW-m1pU":{"duration":195.698,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=sYphlW-m1pU","annotations":[{"segment":[0.01,195.697777],"label":"Playing rubik cube"}]},"3pjVV7A6Apw":{"duration":201.29,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=3pjVV7A6Apw","annotations":[{"segment":[123.4140609921997,175.54315545709827],"label":"Cleaning windows"}]},"nwttSIR1qvs":{"duration":198.972,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nwttSIR1qvs","annotations":[{"segment":[79.42530126423596,84.6940246754509],"label":"Cleaning sink"}]},"t3wyR2VQy20":{"duration":38.778,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=t3wyR2VQy20","annotations":[{"segment":[0.01,38.777323],"label":"Powerbocking"}]},"l_KhWbeZeRA":{"duration":221.983,"subset":"validation","resolution":"658x480","url":"https://www.youtube.com/watch?v=l_KhWbeZeRA","annotations":[{"segment":[32.552854920436815,208.4767942776911],"label":"Futsal"}]},"k1QAWLupB-I":{"duration":42.91,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=k1QAWLupB-I","annotations":[]},"7lxP-hWFA24":{"duration":171.387,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7lxP-hWFA24","annotations":[]},"9fTAW_HAN2Y":{"duration":155.016,"subset":"testing","resolution":"778x480","url":"https://www.youtube.com/watch?v=9fTAW_HAN2Y","annotations":[]},"f-uRpjoKGLQ":{"duration":212.835,"subset":"training","resolution":"1280x716","url":"https://www.youtube.com/watch?v=f-uRpjoKGLQ","annotations":[{"segment":[31.369429797191888,191.7020709828393],"label":"Skiing"}]},"lqOrPn05NpU":{"duration":204.253,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=lqOrPn05NpU","annotations":[]},"b1U1VmaZz7Y":{"duration":58.28,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=b1U1VmaZz7Y","annotations":[{"segment":[0.01,58.282107],"label":"Starting a campfire"}]},"vl7OBBTv7KY":{"duration":235.149,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vl7OBBTv7KY","annotations":[{"segment":[26.076656228111368,194.82595596880847],"label":"Making a cake"}]},"TiQ6P1NkPHU":{"duration":135.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TiQ6P1NkPHU","annotations":[{"segment":[59.19385822152886,130.86070799687985],"label":"Cumbia"}]},"e8KIgIC9RNg":{"duration":97.39,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=e8KIgIC9RNg","annotations":[]},"m22vOf2fw1M":{"duration":139.157,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=m22vOf2fw1M","annotations":[{"segment":[23.751933697347894,79.71539391575664],"label":"Trimming branches or hedges"}]},"mM6F8DppWcQ":{"duration":166.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mM6F8DppWcQ","annotations":[{"segment":[18.652805898733263,140.15513535177547],"label":"Rollerblading"}]},"VUvEWwghANE":{"duration":109.552,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=VUvEWwghANE","annotations":[{"segment":[13.629995672273084,16.696745283470595],"label":"Roof shingle removal"},{"segment":[27.08961896586216,50.60136598504308],"label":"Roof shingle removal"},{"segment":[0.2,109.35111111111111],"label":"Roof shingle removal"},{"segment":[69.51298788239926,72.57973746249256],"label":"Roof shingle removal"},{"segment":[81.0984862960851,82.29111113278806],"label":"Roof shingle removal"},{"segment":[88.76536024631838,93.1951096397865],"label":"Roof shingle removal"}]},"LBh2kEwx2cQ":{"duration":146.217,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=LBh2kEwx2cQ","annotations":[{"segment":[0.01,145.32265996959305],"label":"Laying tile"}]},"OsB_uEj1PRM":{"duration":124.993,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=OsB_uEj1PRM","annotations":[{"segment":[0.2,111.88246243925133],"label":"Doing crunches"}]},"s5y4xXcphcc":{"duration":131.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=s5y4xXcphcc","annotations":[{"segment":[3.0846411856474263,129.7605725429017],"label":"Windsurfing"}]},"Gi9aPJOgn6M":{"duration":30.117,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Gi9aPJOgn6M","annotations":[{"segment":[12.130949297971918,12.5063751950078],"label":"Hand car wash"},{"segment":[13.210298751950079,13.398011700468018],"label":"Hand car wash"},{"segment":[16.917629485179404,17.76233775351014],"label":"Hand car wash"}]},"xmStXpxlG_I":{"duration":9.126,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xmStXpxlG_I","annotations":[{"segment":[0.2,8.626335313572543],"label":"Hitting a pinata"}]},"YfcxIgsqs5M":{"duration":195.28,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YfcxIgsqs5M","annotations":[{"segment":[0.6092963446429014,164.510011825273],"label":"Cheerleading"},{"segment":[170.29832705616224,185.83538372854915],"label":"Cheerleading"}]},"fU2mbWwVSXU":{"duration":162.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fU2mbWwVSXU","annotations":[{"segment":[0.01,155.2524282848865],"label":"Sharpening knives"}]},"yHaTlDD-qHA":{"duration":66.5,"subset":"validation","resolution":"480x352","url":"https://www.youtube.com/watch?v=yHaTlDD-qHA","annotations":[{"segment":[31.869090483619345,51.86479491],"label":"Cricket"},{"segment":[2.1799703588143524,9.550346333853353],"label":"Cricket"}]},"MxvKscpouzY":{"duration":160.73,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MxvKscpouzY","annotations":[]},"Vncj0EkAGio":{"duration":61.58,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=Vncj0EkAGio","annotations":[{"segment":[16.91331492,55.20448517940717],"label":"Mixing drinks"}]},"SSoHwNbASQQ":{"duration":120.535,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SSoHwNbASQQ","annotations":[{"segment":[10.154254513037666,12.974880766659238],"label":"Slacklining"},{"segment":[18.428091523660946,29.89863828838868],"label":"Slacklining"},{"segment":[36.480099546839014,45.31806180818661],"label":"Slacklining"},{"segment":[48.32672981204962,54.90819107049996],"label":"Slacklining"},{"segment":[59.985318327018796,113.57721535101403],"label":"Slacklining"}]},"hj1II_7r8XQ":{"duration":30.651,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hj1II_7r8XQ","annotations":[]},"LlA0gm0EBkg":{"duration":46.891,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=LlA0gm0EBkg","annotations":[]},"tghS4UnuWzk":{"duration":219.29,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tghS4UnuWzk","annotations":[{"segment":[87.92096294851794,154.63142121684868],"label":"Cleaning shoes"},{"segment":[182.34191926677067,198.42085023400938],"label":"Cleaning shoes"}]},"Tzm6TEManmQ":{"duration":204.29,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Tzm6TEManmQ","annotations":[{"segment":[161.26414269087772,165.08858795631826],"label":"Swimming"},{"segment":[168.9130340093604,174.33099925117006],"label":"Swimming"},{"segment":[176.56192611544463,181.9798913572543],"label":"Swimming"},{"segment":[188.99137578783152,196.00286021840876],"label":"Swimming"}]},"kF9Pfa9EkDI":{"duration":53.964,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kF9Pfa9EkDI","annotations":[]},"AauepSs1kUU":{"duration":170.78,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AauepSs1kUU","annotations":[{"segment":[29.307498912201385,161.19124296411857],"label":"Playing field hockey"}]},"WygAYInf0g8":{"duration":185.3,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WygAYInf0g8","annotations":[]},"HWkGANOfqQE":{"duration":225.4,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HWkGANOfqQE","annotations":[{"segment":[6.503139625585023,130.5900741029641],"label":"Playing guitarra"}]},"eRQAjBwhDXk":{"duration":81.061,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eRQAjBwhDXk","annotations":[{"segment":[0,81.060792],"label":"Snow tubing"}]},"51faGoLPJgM":{"duration":8.96,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=51faGoLPJgM","annotations":[]},"X6IoTFqaegc":{"duration":71.77,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=X6IoTFqaegc","annotations":[{"segment":[1.7915199001560065,8.28577953822153],"label":"Cleaning windows"},{"segment":[19.93065888923557,40.19722775975039],"label":"Cleaning windows"}]},"I8doQJXPqwo":{"duration":63.066,"subset":"training","resolution":"698x480","url":"https://www.youtube.com/watch?v=I8doQJXPqwo","annotations":[{"segment":[0.01,63.001],"label":"Hand car wash"}]},"JBTiD2-FcR0":{"duration":50.109,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=JBTiD2-FcR0","annotations":[{"segment":[32.44164552976677,47.607136581903276],"label":"Plastering"}]},"d7Et_9J1NHM":{"duration":145.26,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=d7Et_9J1NHM","annotations":[]},"8rimo9x4qqw":{"duration":145.706,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8rimo9x4qqw","annotations":[{"segment":[0.01,145.26915297814392],"label":"Snowboarding"}]},"Jx4GCjGARqs":{"duration":59.12,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Jx4GCjGARqs","annotations":[{"segment":[2.534204368174727,37.496182680000004],"label":"Hammer throw"}]},"ECZb_tQZ4Pg":{"duration":104.583,"subset":"testing","resolution":"160x200","url":"https://www.youtube.com/watch?v=ECZb_tQZ4Pg","annotations":[]},"t2DdSm_MGXo":{"duration":88.631,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=t2DdSm_MGXo","annotations":[{"segment":[4.472944612522601,85.09638691781855],"label":"Cleaning sink"}]},"qtKa7Lr3URA":{"duration":20.27,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=qtKa7Lr3URA","annotations":[]},"mI7Hwk_AF-M":{"duration":120.02,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=mI7Hwk_AF-M","annotations":[{"segment":[0,120.02394557823129],"label":"Playing field hockey"}]},"G4O_Ar7EO4Y":{"duration":77.81,"subset":"training","resolution":"624x360","url":"https://www.youtube.com/watch?v=G4O_Ar7EO4Y","annotations":[{"segment":[18.31811388455538,23.534530421216846],"label":"Hammer throw"},{"segment":[63.462500780031206,70.1363775351014],"label":"Hammer throw"}]},"beRmfL0Uh28":{"duration":160.659,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=beRmfL0Uh28","annotations":[{"segment":[22.306769255804245,101.5083312402496],"label":"Rafting"}]},"7QxUtHqQdbY":{"duration":13.89,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=7QxUtHqQdbY","annotations":[{"segment":[0.6720163806552262,12.34176944],"label":"Chopping wood"}]},"6LWkrN1qz8E":{"duration":232.64,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6LWkrN1qz8E","annotations":[{"segment":[0,232.581],"label":"Brushing hair"}]},"-2xeOGDeDMU":{"duration":148.097,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=-2xeOGDeDMU","annotations":[]},"kbe4iowYMqM":{"duration":139.99,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=kbe4iowYMqM","annotations":[{"segment":[57.438668666146654,67.04817977379096],"label":"Preparing pasta"}]},"EX3szlLBdFM":{"duration":185.667,"subset":"training","resolution":"792x480","url":"https://www.youtube.com/watch?v=EX3szlLBdFM","annotations":[{"segment":[0.2,0.5793034539781591],"label":"Futsal"}]},"rx_WRuQ-X14":{"duration":63.321,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rx_WRuQ-X14","annotations":[{"segment":[5.627171606864274,58.857092043681746],"label":"Hanging wallpaper"}]},"HeHQxObROwQ":{"duration":92.7,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HeHQxObROwQ","annotations":[{"segment":[24.221932407908564,34.77835668120603],"label":"Shaving legs"},{"segment":[55.89120522780096,74.97953405074979],"label":"Shaving legs"}]},"fhXVqPPDjN8":{"duration":156.41,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=fhXVqPPDjN8","annotations":[{"segment":[1.9520700719185229,153.7255166770671],"label":"Cumbia"}]},"uhnY3lZ9ZCI":{"duration":158.07999999999998,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=uhnY3lZ9ZCI","annotations":[{"segment":[3.0805382215288613,58.776669266770675],"label":"Kayaking"},{"segment":[62.3949906,140.34932137285492],"label":"Kayaking"}]},"aw89asgFHbs":{"duration":15.12,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=aw89asgFHbs","annotations":[{"segment":[0.31770280811232454,14.626095943837754],"label":"Tumbling"}]},"AnRYS02tvRA":{"duration":133.306,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AnRYS02tvRA","annotations":[{"segment":[51.97386895475819,55.30019656786271],"label":"Rock-paper-scissors"},{"segment":[69.22919344773791,75.25816224648986],"label":"Rock-paper-scissors"},{"segment":[89.81084555382215,91.69566458658348],"label":"Rock-paper-scissors"},{"segment":[105.41882527301092,112.48833229329173],"label":"Rock-paper-scissors"}]},"gh0GD6OvLHE":{"duration":16.21,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=gh0GD6OvLHE","annotations":[{"segment":[0.2,8.16697538377535],"label":"Shaving legs"}]},"6V6XB0KmYkk":{"duration":155.969,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6V6XB0KmYkk","annotations":[{"segment":[8.272896718876755,136.74611635319812],"label":"Playing ice hockey"}]},"YVqRQDkdA8U":{"duration":182.625,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YVqRQDkdA8U","annotations":[]},"TPzxIqu1JJg":{"duration":225.072,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TPzxIqu1JJg","annotations":[{"segment":[117.05659501557632,178.7391121495327],"label":"Making a cake"}]},"W_ZNdQLFmAA":{"duration":58.329,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W_ZNdQLFmAA","annotations":[{"segment":[8.735629490485742,41.67623159126365],"label":"Plastering"}]},"cDe2GlvMEQg":{"duration":68.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cDe2GlvMEQg","annotations":[{"segment":[33.32204836193448,49.39754134165367],"label":"Plastering"}]},"aSFtL7YEbDE":{"duration":105.048,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aSFtL7YEbDE","annotations":[{"segment":[0.2,104.84707482993197],"label":"Croquet"}]},"gh8rmWE-564":{"duration":15.07,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=gh8rmWE-564","annotations":[{"segment":[0.2,14.821000000000002],"label":"Skiing"}]},"czCM6ZNwHK8":{"duration":231.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=czCM6ZNwHK8","annotations":[{"segment":[9.408030111680658,218.91762375256917],"label":"Playing saxophone"}]},"u0Dc0ZtudXY":{"duration":87.77,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=u0Dc0ZtudXY","annotations":[]},"ZgdMnfTqblw":{"duration":94.09,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZgdMnfTqblw","annotations":[]},"cVnyxcDd23w":{"duration":44.82,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=cVnyxcDd23w","annotations":[]},"Mg8y0rgD16M":{"duration":185.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Mg8y0rgD16M","annotations":[{"segment":[49.52890951638066,149.15840303],"label":"Preparing salad"}]},"Xxng1g1PrdE":{"duration":203.431,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Xxng1g1PrdE","annotations":[{"segment":[24.748390015600624,203.401],"label":"Rafting"}]},"T9Q072gox7I":{"duration":221.333,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=T9Q072gox7I","annotations":[]},"rMevLL0KHzc":{"duration":147.63,"subset":"training","resolution":"1280x2276","url":"https://www.youtube.com/watch?v=rMevLL0KHzc","annotations":[{"segment":[79.81716692667707,125.88768174726991],"label":"Playing polo"}]},"hmb86jpgWfE":{"duration":169.3,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hmb86jpgWfE","annotations":[{"segment":[51.766038839313566,65.23577343525741],"label":"Drinking coffee"}]},"aFGLXYcKD2M":{"duration":143.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aFGLXYcKD2M","annotations":[{"segment":[4.704261941906248,132.16735916848674],"label":"Playing badminton"}]},"-qXqnltHWtE":{"duration":213.462,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-qXqnltHWtE","annotations":[{"segment":[4.315692517640726,11.951157472539638],"label":"Calf roping"},{"segment":[35.52150581157541,45.148831189491425],"label":"Calf roping"},{"segment":[86.64592333568116,92.62150460473248],"label":"Calf roping"},{"segment":[110.21627167471694,120.50755052697198],"label":"Calf roping"},{"segment":[152.0453405580762,164.66045657051785],"label":"Calf roping"},{"segment":[191.21859554407928,195.86626986445253],"label":"Calf roping"}]},"fDTm1IzQf-U":{"duration":93.62,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=fDTm1IzQf-U","annotations":[]},"aPjbJ4ZNcVQ":{"duration":225.025,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=aPjbJ4ZNcVQ","annotations":[{"segment":[38.300700577235695,196.02821027270267],"label":"Baking cookies"}]},"CfDdbeAk8LE":{"duration":15.349,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=CfDdbeAk8LE","annotations":[{"segment":[3.71138891575663,13.744111210608423],"label":"Welding"}]},"ezugU3qibVQ":{"duration":220.75,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=ezugU3qibVQ","annotations":[{"segment":[141.52313728549143,220.701],"label":"Removing curlers"}]},"XPcBV3UatRI":{"duration":61.07,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=XPcBV3UatRI","annotations":[{"segment":[33.13122204,52.5612276],"label":"Ironing clothes"},{"segment":[24.21895812,28.554175439999998],"label":"Ironing clothes"}]},"9LUHQzLK010":{"duration":149.07,"subset":"testing","resolution":"544x360","url":"https://www.youtube.com/watch?v=9LUHQzLK010","annotations":[]},"ipCUC6Sc2Jo":{"duration":166.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ipCUC6Sc2Jo","annotations":[]},"w46wkDledek":{"duration":192.42,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=w46wkDledek","annotations":[]},"YOk1cMsyk88":{"duration":119.77000000000001,"subset":"testing","resolution":"640x352","url":"https://www.youtube.com/watch?v=YOk1cMsyk88","annotations":[]},"Izr9-P7YIKw":{"duration":49.343,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Izr9-P7YIKw","annotations":[{"segment":[0.01,47.528001922881046],"label":"Changing car wheel"}]},"E0dqgXVU6wA":{"duration":200.67000000000002,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=E0dqgXVU6wA","annotations":[{"segment":[26.13990331858172,41.479487301941056],"label":"Doing nails"}]},"l6a56j87obQ":{"duration":126.87,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=l6a56j87obQ","annotations":[{"segment":[4.353357254290172,105.27209360374414],"label":"Playing bagpipes"}]},"oTk_XIzeCOs":{"duration":36.27,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=oTk_XIzeCOs","annotations":[]},"3DDo37BcChI":{"duration":162.61,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=3DDo37BcChI","annotations":[{"segment":[9.3861866099844,148.40322072542904],"label":"Chopping wood"}]},"PCoxnf59j5U":{"duration":213.02,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PCoxnf59j5U","annotations":[{"segment":[68.24579577224064,184.52868941492324],"label":"Assembling bicycle"}]},"n1yugby5jC0":{"duration":110.133,"subset":"validation","resolution":"853x470","url":"https://www.youtube.com/watch?v=n1yugby5jC0","annotations":[{"segment":[0.2,109.821],"label":"Playing beach volleyball"},{"segment":[16.2550466562986,71.02244167962674],"label":"Playing beach volleyball"}]},"L9ons75VGCg":{"duration":184.855,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L9ons75VGCg","annotations":[{"segment":[0.2,150.01372355607475],"label":"Canoeing"}]},"T7YcO-sOLuU":{"duration":93.72,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=T7YcO-sOLuU","annotations":[]},"9Pv2hUyODK4":{"duration":111.74000000000001,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=9Pv2hUyODK4","annotations":[{"segment":[47.58732040710199,51.073570910608424],"label":"Triple jump"},{"segment":[65.01857320702028,84.36726389329174],"label":"Triple jump"}]},"Oebnyz-Qu9s":{"duration":20.27,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Oebnyz-Qu9s","annotations":[]},"NurRl6_NLbg":{"duration":169.088,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=NurRl6_NLbg","annotations":[{"segment":[7.935602301047824,153.01866058936574],"label":"BMX"}]},"9A9wnSoXs48":{"duration":107.74000000000001,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=9A9wnSoXs48","annotations":[{"segment":[18.228902699687985,105.76123732215288],"label":"Kayaking"}]},"noKDv_a8u-Y":{"duration":209.77,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=noKDv_a8u-Y","annotations":[{"segment":[0,203.17453666146645],"label":"Playing guitarra"}]},"EG7yGT2z5aA":{"duration":130.59,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EG7yGT2z5aA","annotations":[]},"CHMk7efu1ro":{"duration":76.371,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CHMk7efu1ro","annotations":[{"segment":[13.127798312285234,60.33555064873383],"label":"Using the rowing machine"}]},"t0ajvfx6dgA":{"duration":147.26,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=t0ajvfx6dgA","annotations":[{"segment":[18.378902013223385,21.595209865537477],"label":"Playing field hockey"},{"segment":[28.02782557016566,31.93334224797563],"label":"Playing field hockey"},{"segment":[42.271474630413785,47.32567268405022],"label":"Playing field hockey"},{"segment":[80.4076963078523,87.75925711314166],"label":"Playing field hockey"},{"segment":[97.8676529672387,100.6244882620905],"label":"Playing field hockey"}]},"Ww2_b9f6Kh0":{"duration":44.21,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Ww2_b9f6Kh0","annotations":[{"segment":[7.242017410296412,16.691125840873635],"label":"Plataform diving"},{"segment":[23.58828527925117,26.96789340405616],"label":"Plataform diving"},{"segment":[39.10689401560062,41.10707025273011],"label":"Plataform diving"}]},"9ku5v_hSVMw":{"duration":41.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9ku5v_hSVMw","annotations":[{"segment":[0.5175767613104525,39.012348383775354],"label":"Clean and jerk"}]},"yVbzejdydrw":{"duration":110.086,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yVbzejdydrw","annotations":[{"segment":[31.495871294851796,36.98833931357254],"label":"Doing fencing"},{"segment":[47.62999609984399,51.06278861154446],"label":"Doing fencing"},{"segment":[67.19691341653666,70.4580663026521],"label":"Doing fencing"},{"segment":[94.83089313572542,98.2636856474259],"label":"Doing fencing"}]},"xqzsv8VpaNM":{"duration":163.82,"subset":"validation","resolution":"512x288","url":"https://www.youtube.com/watch?v=xqzsv8VpaNM","annotations":[{"segment":[17.11792511700468,38.323712948517944],"label":"Sailing"},{"segment":[132.3445553822153,146.65207488299532],"label":"Sailing"}]},"WsHzMciP5X8":{"duration":106.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WsHzMciP5X8","annotations":[{"segment":[19.5429399216785,23.84901142984495],"label":"Cricket"},{"segment":[46.20745964532458,98.70840841796937],"label":"Cricket"}]},"STu_-YXTjU0":{"duration":161.402,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=STu_-YXTjU0","annotations":[]},"NLuNMeYBeoc":{"duration":97.46000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NLuNMeYBeoc","annotations":[{"segment":[6.3854201025183865,33.29540482027444],"label":"Using parallel bars"},{"segment":[57.31674711070076,92.8926577098284],"label":"Using parallel bars"}]},"Uqte3S_ErTM":{"duration":9.01,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=Uqte3S_ErTM","annotations":[{"segment":[0.01,8.320639425897035],"label":"Doing motocross"}]},"Otm5TV4XI7w":{"duration":127.733,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Otm5TV4XI7w","annotations":[{"segment":[16.029797971918875,22.601019500780033],"label":"Rock-paper-scissors"},{"segment":[24.791426677067083,27.778345553822152],"label":"Rock-paper-scissors"},{"segment":[37.734741809672386,40.32340483619345],"label":"Rock-paper-scissors"},{"segment":[48.48764976599064,53.26671996879875],"label":"Rock-paper-scissors"}]},"FtRGOgg0Qh8":{"duration":33.97,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=FtRGOgg0Qh8","annotations":[]},"i3uRumyN7mg":{"duration":52.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i3uRumyN7mg","annotations":[{"segment":[4.731497035881436,52.2912],"label":"Playing accordion"}]},"ZVMGA4oF6xo":{"duration":107.137,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZVMGA4oF6xo","annotations":[{"segment":[0.01,107.081],"label":"Painting fence"}]},"ymwMEUuXygs":{"duration":178.399,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ymwMEUuXygs","annotations":[{"segment":[49.80388767550703,138.56209048361933],"label":"Assembling bicycle"}]},"QRLye5j54ic":{"duration":218.593,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=QRLye5j54ic","annotations":[{"segment":[7.159689547581904,218.561],"label":"Kite flying"}]},"3JrxcNxNMU4":{"duration":31.649,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=3JrxcNxNMU4","annotations":[{"segment":[0.01,31.648798],"label":"Croquet"}]},"j8bfR_d_TlM":{"duration":97.153,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=j8bfR_d_TlM","annotations":[{"segment":[11.215708985959438,85.02719920436817],"label":"Carving jack-o-lanterns"}]},"mio5dnRbo4w":{"duration":181.975,"subset":"validation","resolution":"512x288","url":"https://www.youtube.com/watch?v=mio5dnRbo4w","annotations":[{"segment":[0.01,176.4191670044727],"label":"Hand car wash"}]},"5pPQc85IXtU":{"duration":175.543,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5pPQc85IXtU","annotations":[{"segment":[9.673849949691155,14.046211320407666],"label":"BMX"},{"segment":[21.151298547821995,66.78782035467557],"label":"BMX"},{"segment":[93.06387580657675,156.49166396857052],"label":"BMX"}]},"escpZEdyNTI":{"duration":20.74,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=escpZEdyNTI","annotations":[{"segment":[13.683405271450857,17.85635865210608],"label":"Spinning"}]},"UKiwR9T4eGg":{"duration":74.3,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=UKiwR9T4eGg","annotations":[]},"87pCIcWgwVM":{"duration":212.347,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=87pCIcWgwVM","annotations":[{"segment":[22.52661619344774,62.94201583463339],"label":"Rope skipping"},{"segment":[83.48098942277691,167.29325261310453],"label":"Rope skipping"}]},"R9dE9WaGURw":{"duration":33.414,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=R9dE9WaGURw","annotations":[]},"gmnwqOPcOo0":{"duration":136.395,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gmnwqOPcOo0","annotations":[{"segment":[75.95021840873635,82.33258970358816],"label":"Hula hoop"}]},"SnZnAVuMn4M":{"duration":194.56,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=SnZnAVuMn4M","annotations":[{"segment":[0.2,37.62577847113885],"label":"Playing beach volleyball"},{"segment":[52.49402964118565,72.21721996879876],"label":"Playing beach volleyball"},{"segment":[88.90607332293293,95.5816146645866],"label":"Playing beach volleyball"},{"segment":[120.76661154446178,124.42060842433696],"label":"Playing beach volleyball"},{"segment":[164.47797503900154,168.42301872074881],"label":"Playing beach volleyball"}]},"Oh3-DvDCpZ4":{"duration":33.6,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Oh3-DvDCpZ4","annotations":[]},"YDz0pyc26Ss":{"duration":208.098,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YDz0pyc26Ss","annotations":[{"segment":[24.997579674615555,189.59203285682446],"label":"Longboarding"}]},"Fdzw3niNDYY":{"duration":122.068,"subset":"validation","resolution":"640x352","url":"https://www.youtube.com/watch?v=Fdzw3niNDYY","annotations":[{"segment":[29.897885722308892,96.5492233198128],"label":"Doing fencing"}]},"g-Egl6eXlX4":{"duration":176.01,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=g-Egl6eXlX4","annotations":[{"segment":[16.749483799760153,110.10726235580036],"label":"Getting a haircut"}]},"UGujWA07GkM":{"duration":103.538,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UGujWA07GkM","annotations":[{"segment":[14.451387675507021,42.70829095163807],"label":"Playing blackjack"},{"segment":[57.88628471138846,62.89179329173168],"label":"Playing blackjack"},{"segment":[77.58538299531982,91.79456864274572],"label":"Playing blackjack"}]},"jaj2Kq5ggGw":{"duration":200.342,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jaj2Kq5ggGw","annotations":[]},"upic9S0oEhg":{"duration":102.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=upic9S0oEhg","annotations":[{"segment":[0,101.8924929797192],"label":"Playing guitarra"}]},"IdHJv8SPo2Y":{"duration":47.183,"subset":"testing","resolution":"320x480","url":"https://www.youtube.com/watch?v=IdHJv8SPo2Y","annotations":[]},"ZNUkzdw2EeI":{"duration":139.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZNUkzdw2EeI","annotations":[{"segment":[7.1593168970358825,15.40337877847114],"label":"Discus throw"},{"segment":[68.55588301404056,77.45079188611544],"label":"Discus throw"}]},"r8AXq1Q5bn0":{"duration":168.32,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r8AXq1Q5bn0","annotations":[{"segment":[26.921680967238693,163.49996489859595],"label":"Playing accordion"}]},"LAbzDJZtSvQ":{"duration":98.72999999999999,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=LAbzDJZtSvQ","annotations":[{"segment":[4.928862053042121,15.86477473322933],"label":"Chopping wood"},{"segment":[29.727199257410295,97.49905248673947],"label":"Chopping wood"}]},"NSi8DorB6L0":{"duration":39.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NSi8DorB6L0","annotations":[{"segment":[9.166533541341654,36.79000624024961],"label":"Waterskiing"}]},"ComW-O6dMW4":{"duration":137.602,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ComW-O6dMW4","annotations":[{"segment":[0.2,137.40145124716554],"label":"Hand car wash"}]},"laKctaVegPg":{"duration":147.005,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=laKctaVegPg","annotations":[{"segment":[13.965609172017718,127.29309176947041],"label":"Mooping floor"}]},"GrACpo7aonA":{"duration":213.369,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GrACpo7aonA","annotations":[{"segment":[24.632179877671298,43.60561572939108],"label":"Playing drums"},{"segment":[58.584644033380386,69.236397493995],"label":"Playing drums"},{"segment":[145.46300819651833,164.43644288611543],"label":"Playing drums"},{"segment":[176.41966544461778,197.39030492199686],"label":"Playing drums"}]},"gLsHOCeeVUM":{"duration":215.6,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=gLsHOCeeVUM","annotations":[{"segment":[66.2600055678627,102.24894260218409],"label":"Washing face"}]},"pQof_-tUNtY":{"duration":150.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pQof_-tUNtY","annotations":[{"segment":[5.8604134165366615,45.71122464898596],"label":"Wrapping presents"},{"segment":[59.30738377535101,88.14061778471138],"label":"Wrapping presents"}]},"-NndIs9BaS4":{"duration":106.58,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=-NndIs9BaS4","annotations":[{"segment":[19.786226877646534,22.114017893915754],"label":"Rope skipping"},{"segment":[58.19478393135725,62.01901258970358],"label":"Rope skipping"},{"segment":[68.33730341653666,71.8289904524181],"label":"Rope skipping"}]},"jQ2UoAENwlw":{"duration":64.44,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jQ2UoAENwlw","annotations":[{"segment":[25.44557305928237,63.68921540873635],"label":"Getting a tattoo"}]},"39WSVNFZR5Y":{"duration":230.087,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=39WSVNFZR5Y","annotations":[]},"gnZssGiQC7A":{"duration":71.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gnZssGiQC7A","annotations":[{"segment":[18.196764555382213,28.4047056474259],"label":"Long jump"}]},"V3dTp7_NyiE":{"duration":63.786,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=V3dTp7_NyiE","annotations":[{"segment":[0,27.843182527301092],"label":"Trimming branches or hedges"},{"segment":[37.28997659906396,58.570123244929796],"label":"Trimming branches or hedges"}]},"aLv03Fznf5A":{"duration":33.67,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aLv03Fznf5A","annotations":[{"segment":[0.01,33.6687074829932],"label":"Using the pommel horse"}]},"zu960Glpzo4":{"duration":100.079,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zu960Glpzo4","annotations":[{"segment":[44.3403327425614,70.25756948645292],"label":"Arm wrestling"}]},"ojQdrhHT_gE":{"duration":95.899,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=ojQdrhHT_gE","annotations":[{"segment":[0.2,95.64099999999999],"label":"Riding bumper cars"}]},"CR_79ZjQG_w":{"duration":37.55,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CR_79ZjQG_w","annotations":[{"segment":[8.259094797191887,12.769380608424337],"label":"Hopscotch"},{"segment":[21.204200826833073,26.65168888455538],"label":"Hopscotch"},{"segment":[29.873321606864273,33.153529469578785],"label":"Hopscotch"}]},"IEtCboPbTXI":{"duration":73.05,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IEtCboPbTXI","annotations":[{"segment":[19.024191887675507,66.41379563182528],"label":"Bathing dog"}]},"TOfBNYOlv8c":{"duration":86.01,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TOfBNYOlv8c","annotations":[{"segment":[21.476786187987518,79.4172747823713],"label":"Mixing drinks"}]},"wq4H7L15NMA":{"duration":16.092,"subset":"validation","resolution":"800x480","url":"https://www.youtube.com/watch?v=wq4H7L15NMA","annotations":[{"segment":[0,16.091427],"label":"Using the rowing machine"}]},"Zm32ORZly10":{"duration":74.769,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zm32ORZly10","annotations":[{"segment":[12.042063229107187,20.893096032845506],"label":"Tug of war"},{"segment":[25.085690518826812,59.44167311228475],"label":"Tug of war"}]},"VUlsdTzaKV4":{"duration":148.98,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VUlsdTzaKV4","annotations":[{"segment":[0.01,118.96035484555382],"label":"Snowboarding"}]},"7gYfKVOTjqA":{"duration":6.223,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=7gYfKVOTjqA","annotations":[]},"bqA-WmqF19E":{"duration":80.69,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=bqA-WmqF19E","annotations":[{"segment":[0.01,80.68936507936507],"label":"Playing pool"}]},"Feo8xSjY5A8":{"duration":54.103,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=Feo8xSjY5A8","annotations":[{"segment":[3.966953464898596,52.414427695787836],"label":"Clipping cat claws"}]},"nMFEEBtIu-Y":{"duration":227.09,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nMFEEBtIu-Y","annotations":[{"segment":[68.37521067917545,129.98461749],"label":"Playing guitarra"},{"segment":[32.3377584,49.519463460000004],"label":"Playing guitarra"},{"segment":[116.55670628729912,216.10817883055458],"label":"Playing guitarra"}]},"6it_yeIb_L0":{"duration":185.25,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6it_yeIb_L0","annotations":[{"segment":[24.564973238385317,148.25683848578433],"label":"Skateboarding"}]},"3La7NPOBVN8":{"duration":205.59,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3La7NPOBVN8","annotations":[{"segment":[68.20959332019412,198.54435614328804],"label":"Braiding hair"}]},"WJmvyBeKMUY":{"duration":73.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=WJmvyBeKMUY","annotations":[]},"2kx--bbqXHg":{"duration":225.28,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=2kx--bbqXHg","annotations":[{"segment":[34.43927925117004,221.74678783151325],"label":"Starting a campfire"}]},"zapTJ7B_rJg":{"duration":172.34,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=zapTJ7B_rJg","annotations":[]},"9z_ylV5c9HA":{"duration":125.133,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9z_ylV5c9HA","annotations":[{"segment":[15.373915277983679,21.795931761093254],"label":"Putting in contact lenses"},{"segment":[33.083112246558564,48.262423933908465],"label":"Putting in contact lenses"},{"segment":[44.37029232758468,50.20848907719588],"label":"Putting in contact lenses"},{"segment":[89.71362041623165,108.00663689834673],"label":"Putting in contact lenses"}]},"hyYqJlwsTl8":{"duration":75.88,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hyYqJlwsTl8","annotations":[]},"HW9SFCj0dVU":{"duration":187.55,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HW9SFCj0dVU","annotations":[{"segment":[44.18506732238342,186.72439442518714],"label":"Starting a campfire"}]},"oS4w7-0aH8Q":{"duration":57.21,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oS4w7-0aH8Q","annotations":[{"segment":[2.856235544659882,51.9477839685016],"label":"Using the balance beam"}]},"XksnxOAUQhY":{"duration":234.289,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XksnxOAUQhY","annotations":[]},"woLR9wHe0Vs":{"duration":84.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=woLR9wHe0Vs","annotations":[{"segment":[0.5289297971918877,3.305811232449298],"label":"Raking leaves"},{"segment":[37.818480499219966,55.14093135725429],"label":"Raking leaves"}]},"T8wY9ZQv8vQ":{"duration":175.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=T8wY9ZQv8vQ","annotations":[{"segment":[8.9399165,172.21019429017161],"label":"Tumbling"}]},"G6hTargh4Lc":{"duration":160.96,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=G6hTargh4Lc","annotations":[]},"RZSI1LkU00g":{"duration":103.353,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=RZSI1LkU00g","annotations":[]},"c_DQ7Y8ZRBQ":{"duration":122.0,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c_DQ7Y8ZRBQ","annotations":[{"segment":[37.30349107297625,42.44223704056162],"label":"Getting a haircut"},{"segment":[44.91644816848674,68.89726371606865],"label":"Getting a haircut"},{"segment":[72.32309450858035,117.23954267706708],"label":"Getting a haircut"}]},"wTBJ4PRnU4k":{"duration":233.849,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wTBJ4PRnU4k","annotations":[{"segment":[85.00253048489286,194.083030978382],"label":"Changing car wheel"}]},"QgklQUCQcV4":{"duration":209.723,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=QgklQUCQcV4","annotations":[{"segment":[81.1403684011306,209.721677],"label":"Hula hoop"}]},"Lh50gF5l0dw":{"duration":15.02,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Lh50gF5l0dw","annotations":[{"segment":[4.546836109204368,14.952997101404055],"label":"Discus throw"},{"segment":[0.36405603,3.8219231700000003],"label":"Discus throw"}]},"j5cmc3cnLeU":{"duration":104.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j5cmc3cnLeU","annotations":[{"segment":[6.661140614332055,97.48010655120082],"label":"Tango"}]},"hJJas1Zat1s":{"duration":60.326,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hJJas1Zat1s","annotations":[{"segment":[9.599368146645865,47.526283471138846],"label":"Making a lemonade"}]},"o4uymxrCKeo":{"duration":13.352,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o4uymxrCKeo","annotations":[{"segment":[0.01,13.321],"label":"Swinging at the playground"}]},"i69NhEDno_M":{"duration":114.382,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i69NhEDno_M","annotations":[{"segment":[13.37843213728549,106.13556162246489],"label":"Canoeing"}]},"1ItX1KBmkLE":{"duration":71.913,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1ItX1KBmkLE","annotations":[]},"WZeMQ-5dFlM":{"duration":127.803,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WZeMQ-5dFlM","annotations":[{"segment":[15.745895475819031,90.10454290171607],"label":"Longboarding"},{"segment":[93.4934305772231,113.62741029641187],"label":"Longboarding"}]},"qNxLTF4Q6yk":{"duration":229.55,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qNxLTF4Q6yk","annotations":[{"segment":[0.01,229.55183673469386],"label":"Playing guitarra"}]},"iM8rmKLJnt8":{"duration":56.053,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=iM8rmKLJnt8","annotations":[{"segment":[0.01,47.76619234498639],"label":"Painting fence"}]},"1VDNFR3vQR4":{"duration":53.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1VDNFR3vQR4","annotations":[{"segment":[1.7435613995988413,52.38986872127947],"label":"Doing kickboxing"}]},"zxhhLuJP-PQ":{"duration":11.75,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zxhhLuJP-PQ","annotations":[]},"dVPToPJ3xEc":{"duration":140.063,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=dVPToPJ3xEc","annotations":[]},"9l-IexiD7tY":{"duration":36.038,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9l-IexiD7tY","annotations":[]},"tMheZSxPl3Q":{"duration":91.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tMheZSxPl3Q","annotations":[{"segment":[47.30826685725429,51.84759081357254],"label":"Polishing forniture"}]},"3S5RoKVsAys":{"duration":150.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3S5RoKVsAys","annotations":[{"segment":[121.48827819344773,137.93736808424336],"label":"Shaving"}]},"vPcH4j7DhRk":{"duration":177.45,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vPcH4j7DhRk","annotations":[]},"83WXP5z80Y0":{"duration":140.667,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=83WXP5z80Y0","annotations":[{"segment":[11.884797518067641,127.94753668544448],"label":"Hula hoop"}]},"uLoeu43EkHI":{"duration":114.59,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uLoeu43EkHI","annotations":[]},"oB6h3DscMUw":{"duration":30.86,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=oB6h3DscMUw","annotations":[{"segment":[2.4986770670826832,27.022468018720748],"label":"Peeling potatoes"}]},"sfCfrWpHpu0":{"duration":154.692,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sfCfrWpHpu0","annotations":[{"segment":[13.953493858727503,40.417023656338614],"label":"Having an ice cream"},{"segment":[46.190884703090134,55.81398644767598],"label":"Having an ice cream"},{"segment":[60.62553731996892,64.23420047418861],"label":"Having an ice cream"},{"segment":[68.8051738028669,81.31520607082852],"label":"Having an ice cream"},{"segment":[84.68329168143356,99.11794429831235],"label":"Having an ice cream"},{"segment":[115.9583723513376,128.9495584192802],"label":"Having an ice cream"},{"segment":[144.34652105691632,151.56384729330824],"label":"Having an ice cream"}]},"Lyi8-hf1rCw":{"duration":53.24,"subset":"training","resolution":"634x360","url":"https://www.youtube.com/watch?v=Lyi8-hf1rCw","annotations":[{"segment":[6.478885647425897,41.61438088923557],"label":"Triple jump"}]},"PWrVWeH45Zo":{"duration":44.443,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=PWrVWeH45Zo","annotations":[{"segment":[0.01,42.922390908015174],"label":"Mooping floor"}]},"zGDecqEw9AU":{"duration":63.043,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zGDecqEw9AU","annotations":[{"segment":[0.2,53.82605460816913],"label":"Table soccer"}]},"8VfPKyFbroo":{"duration":111.02000000000001,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=8VfPKyFbroo","annotations":[]},"fchUWtD6HjQ":{"duration":7.85,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fchUWtD6HjQ","annotations":[]},"Z7K29Gk_j8Q":{"duration":62.3,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z7K29Gk_j8Q","annotations":[]},"nYfc0J4XF6g":{"duration":94.97,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=nYfc0J4XF6g","annotations":[]},"5O1ym4u2n7k":{"duration":125.46,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5O1ym4u2n7k","annotations":[]},"9YYypM0JvUM":{"duration":116.38,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=9YYypM0JvUM","annotations":[{"segment":[31.579170299999998,93.31188039999999],"label":"Fixing bicycle"}]},"v2iIR1YLdz4":{"duration":53.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=v2iIR1YLdz4","annotations":[{"segment":[2.718382912187236,52.06748800624025],"label":"Using parallel bars"}]},"pPM1jC_NlzI":{"duration":67.725,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pPM1jC_NlzI","annotations":[{"segment":[0.01,62.98539157566303],"label":"Welding"}]},"37gHYr2uDZo":{"duration":63.23,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=37gHYr2uDZo","annotations":[{"segment":[4.238351014040561,51.55017628705148],"label":"Mixing drinks"}]},"9Ht1yvo936I":{"duration":79.645,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9Ht1yvo936I","annotations":[{"segment":[0.009999999999999998,79.64444444444445],"label":"Playing blackjack"}]},"wHRHS7_4J_s":{"duration":67.04,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wHRHS7_4J_s","annotations":[{"segment":[10.286667864274571,61.876656950078],"label":"Playing violin"}]},"qcV3JtJDoV8":{"duration":209.119,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qcV3JtJDoV8","annotations":[{"segment":[14.97944859813084,191.47642990654205],"label":"Skiing"}]},"njH63OVW2t0":{"duration":181.324,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=njH63OVW2t0","annotations":[]},"94bJbSWNw3o":{"duration":47.067,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=94bJbSWNw3o","annotations":[{"segment":[0.01,47.066848],"label":"Slacklining"}]},"NvKPIvDjpsg":{"duration":36.87,"subset":"testing","resolution":"324x240","url":"https://www.youtube.com/watch?v=NvKPIvDjpsg","annotations":[]},"YK9BIL9fKA0":{"duration":135.47,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=YK9BIL9fKA0","annotations":[]},"POYg9zju63U":{"duration":148.933,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=POYg9zju63U","annotations":[{"segment":[0.2,11.849521580863232],"label":"Raking leaves"},{"segment":[57.85354654186167,104.78694574449644],"label":"Raking leaves"},{"segment":[104.55460132605303,113.61600010764431],"label":"Raking leaves"}]},"uSMwc0AO4bo":{"duration":101.25999999999999,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uSMwc0AO4bo","annotations":[]},"aYrYnmUYSrA":{"duration":190.033,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aYrYnmUYSrA","annotations":[]},"Cof9eHf7VCI":{"duration":219.963,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Cof9eHf7VCI","annotations":[{"segment":[42.55115461466459,157.50790296879876],"label":"Surfing"}]},"m5PO3T2uGzs":{"duration":24.01,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m5PO3T2uGzs","annotations":[{"segment":[0.2,22.889241256572614],"label":"Swinging at the playground"}]},"MIBJBSg7Ch0":{"duration":198.02,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=MIBJBSg7Ch0","annotations":[]},"1_PiNdPt6RU":{"duration":15.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1_PiNdPt6RU","annotations":[{"segment":[1.5696993769470404,4.381101246105919],"label":"Gargling mouthwash"}]},"Or7mIy83LsQ":{"duration":118.17,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Or7mIy83LsQ","annotations":[{"segment":[15.034260179999999,112.08242775663027],"label":"Long jump"}]},"Zgiq5MK8fQ8":{"duration":30.047,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Zgiq5MK8fQ8","annotations":[{"segment":[0,28.827880343213728],"label":"Removing ice from car"}]},"0JpULUYPDX0":{"duration":100.03,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0JpULUYPDX0","annotations":[{"segment":[10.727559600000001,80.91442219999999],"label":"Tennis serve with ball bouncing"},{"segment":[17.774040561622463,29.62340093603744],"label":"Tennis serve with ball bouncing"},{"segment":[56.440374414976596,72.65528861154445],"label":"Tennis serve with ball bouncing"},{"segment":[85.28421216848675,97.60131045241809],"label":"Tennis serve with ball bouncing"}]},"lUX1nfb5rx0":{"duration":141.364,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=lUX1nfb5rx0","annotations":[{"segment":[32.849967238689544,112.45539781591265],"label":"Cleaning sink"}]},"NaN4aHalv-k":{"duration":18.576,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NaN4aHalv-k","annotations":[]},"IoiDAHNryTk":{"duration":47.044,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IoiDAHNryTk","annotations":[{"segment":[0.19999999999999998,33.874336832303605],"label":"Using the monkey bar"}]},"QhKoyVQ9blk":{"duration":52.245,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=QhKoyVQ9blk","annotations":[]},"QLbWA54l1Sg":{"duration":94.227,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QLbWA54l1Sg","annotations":[{"segment":[3.968983067082683,39.395831925117],"label":"Arm wrestling"},{"segment":[51.44978049921996,64.53272468330734],"label":"Arm wrestling"}]},"8z29qtUWwdU":{"duration":189.777,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8z29qtUWwdU","annotations":[{"segment":[0.01,174.94060998439937],"label":"Sharpening knives"}]},"2g9GrshWQrU":{"duration":179.514,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2g9GrshWQrU","annotations":[{"segment":[3.3596443057722314,120.38725429017161],"label":"Arm wrestling"}]},"PV8QJ4QJgtY":{"duration":231.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PV8QJ4QJgtY","annotations":[{"segment":[113.60872698907956,215.2776833073323],"label":"Horseback riding"}]},"W5tacjehFRA":{"duration":154.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W5tacjehFRA","annotations":[]},"R_JKmGmURvA":{"duration":202.27,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R_JKmGmURvA","annotations":[]},"Z3bPr9odxpw":{"duration":203.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z3bPr9odxpw","annotations":[{"segment":[35.231337408598385,199.00944492823714],"label":"Zumba"}]},"qpyAkXLIJz0":{"duration":184.74,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qpyAkXLIJz0","annotations":[{"segment":[127.24152728694183,184.73798185941044],"label":"Playing guitarra"}]},"24NiBYgzKj0":{"duration":122.184,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=24NiBYgzKj0","annotations":[]},"uE3YXkZoV5c":{"duration":89.397,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uE3YXkZoV5c","annotations":[{"segment":[44.6981179138322,80.06672048286606],"label":"Shuffleboard"}]},"jSHcj_pXiI4":{"duration":164.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jSHcj_pXiI4","annotations":[{"segment":[6.2879001560062395,71.47674258970359],"label":"Getting a piercing"},{"segment":[101.50467394695787,160.79058970358813],"label":"Getting a piercing"}]},"eR6lcSS62sA":{"duration":75.859,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=eR6lcSS62sA","annotations":[]},"9A3z0W8U124":{"duration":95.69,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9A3z0W8U124","annotations":[{"segment":[0,95.68943310657596],"label":"Using the balance beam"}]},"XM3SktXBbHU":{"duration":7.802,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=XM3SktXBbHU","annotations":[{"segment":[0.2,7.601904761904762],"label":"Powerbocking"}]},"bzBnXFQ6rJs":{"duration":86.08,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=bzBnXFQ6rJs","annotations":[]},"fNuBBkigydA":{"duration":56.355,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fNuBBkigydA","annotations":[]},"PkRvgIFREJU":{"duration":138.97,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PkRvgIFREJU","annotations":[]},"rML1aj5XW0M":{"duration":17.857,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rML1aj5XW0M","annotations":[{"segment":[4.387302063182528,9.624208652886116],"label":"Doing a powerbomb"}]},"MzqovHjEKE4":{"duration":62.624,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MzqovHjEKE4","annotations":[{"segment":[16.02235693237253,54.515091397815915],"label":"Trimming branches or hedges"}]},"o7z-u08tc-Y":{"duration":120.047,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=o7z-u08tc-Y","annotations":[{"segment":[0.01,119.981],"label":"Tug of war"}]},"GdrutQ4RrDs":{"duration":209.77,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=GdrutQ4RrDs","annotations":[{"segment":[0.01,58.251005479674966],"label":"Removing curlers"}]},"7H1KylJRii8":{"duration":123.37,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=7H1KylJRii8","annotations":[]},"uyHGyE_vNtY":{"duration":76.626,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uyHGyE_vNtY","annotations":[]},"kwaCAq-9LnM":{"duration":151.418,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=kwaCAq-9LnM","annotations":[{"segment":[65.43307136973479,141.0236231326053],"label":"Making a lemonade"}]},"qiRrR2Nj2SQ":{"duration":133.86,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=qiRrR2Nj2SQ","annotations":[{"segment":[5.429702951383362,73.30098917316693],"label":"Spinning"},{"segment":[77.06001425897036,122.37715001560062],"label":"Spinning"}]},"fX3tcnTdAN4":{"duration":117.076,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fX3tcnTdAN4","annotations":[{"segment":[9.370572552321486,111.82702426571713],"label":"Hurling"}]},"QwLiAkfSa9A":{"duration":115.00999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QwLiAkfSa9A","annotations":[{"segment":[48.62291098159126,110.70234731021894],"label":"Getting a piercing"}]},"wjr5M-6qEWI":{"duration":122.56,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=wjr5M-6qEWI","annotations":[]},"hsIudyKGz6A":{"duration":66.85,"subset":"training","resolution":"480x720","url":"https://www.youtube.com/watch?v=hsIudyKGz6A","annotations":[{"segment":[1.9293628931357256,63.46039570124805],"label":"Chopping wood"}]},"xJ3KAhVRr8c":{"duration":98.27000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xJ3KAhVRr8c","annotations":[{"segment":[0.01,97.19708737879715],"label":"Doing step aerobics"}]},"v34qczSoYLo":{"duration":95.78,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=v34qczSoYLo","annotations":[{"segment":[0.01,71.94853194944125],"label":"Bungee jumping"}]},"2zFmooC8gUg":{"duration":218.384,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2zFmooC8gUg","annotations":[{"segment":[0.01,218.3831746031746],"label":"Roof shingle removal"}]},"dZa5ir3baH4":{"duration":37.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dZa5ir3baH4","annotations":[{"segment":[31.998734789391573,37.361],"label":"Triple jump"}]},"gN27GB5txG8":{"duration":89.14,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=gN27GB5txG8","annotations":[{"segment":[13.243678370000001,68.0056689],"label":"Washing hands"}]},"s_H9cTp2tT8":{"duration":64.81,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=s_H9cTp2tT8","annotations":[{"segment":[4.4485231055500725,15.87313891419657],"label":"Playing polo"},{"segment":[25.376801703588146,40.946632230889236],"label":"Playing polo"}]},"u6f9COsww0w":{"duration":138.07,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=u6f9COsww0w","annotations":[{"segment":[3.87918,42.45432],"label":"Playing water polo"},{"segment":[58.45404,95.8034321372855],"label":"Playing water polo"}]},"W34A9wUi-4g":{"duration":136.186,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W34A9wUi-4g","annotations":[{"segment":[0.2,131.68084243369734],"label":"Futsal"}]},"jvQU6RyhY7I":{"duration":47.857,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jvQU6RyhY7I","annotations":[{"segment":[0.01,47.57728237129486],"label":"Hitting a pinata"}]},"2mmN96TdEdY":{"duration":218.988,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2mmN96TdEdY","annotations":[{"segment":[0.01,218.841],"label":"Playing rubik cube"}]},"-cwPzzcNpGs":{"duration":131.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-cwPzzcNpGs","annotations":[{"segment":[23.250387101404055,48.35257494539782],"label":"Arm wrestling"}]},"1UIhgxQXcfI":{"duration":224.909,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1UIhgxQXcfI","annotations":[{"segment":[23.91044926584547,119.87062683593894],"label":"Baking cookies"}]},"mPlfR8V1pFg":{"duration":88.236,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mPlfR8V1pFg","annotations":[]},"Tp6MFA0SbFU":{"duration":21.549,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Tp6MFA0SbFU","annotations":[{"segment":[6.080855814390094,20.025927465480436],"label":"Doing a powerbomb"}]},"keaMf0raxF8":{"duration":144.196,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=keaMf0raxF8","annotations":[{"segment":[30.1324399375975,131.3416911076443],"label":"Curling"}]},"MIE0KbGIXjM":{"duration":119.28,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MIE0KbGIXjM","annotations":[{"segment":[12.64877791575663,91.9803864],"label":"Washing hands"}]},"gY-8BUnSgf4":{"duration":205.799,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gY-8BUnSgf4","annotations":[{"segment":[6.10011030101068,13.163395845553822],"label":"Playing ten pins"},{"segment":[37.56383692511701,109.1598679875195],"label":"Playing ten pins"},{"segment":[115.90209512792514,150.25534770046804],"label":"Playing ten pins"},{"segment":[154.42910735881435,172.40837973322934],"label":"Playing ten pins"}]},"FOm0uKw7dXc":{"duration":203.76,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FOm0uKw7dXc","annotations":[{"segment":[15.00069690657999,192.70125765424356],"label":"Tango"}]},"xIld1Pt1QGs":{"duration":150.35,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xIld1Pt1QGs","annotations":[{"segment":[59.080829953198126,146.31485803432136],"label":"Camel ride"}]},"Er88I-NQkG4":{"duration":180.88,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Er88I-NQkG4","annotations":[{"segment":[11.065087844749788,156.16629509815996],"label":"Cumbia"}]},"uvglcAUnQv0":{"duration":180.03,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uvglcAUnQv0","annotations":[]},"FzxX2Crg8Ro":{"duration":177.7,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=FzxX2Crg8Ro","annotations":[]},"lydctNDJZ5o":{"duration":96.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lydctNDJZ5o","annotations":[{"segment":[11.25045081903276,94.65379289079563],"label":"Playing lacrosse"}]},"12Nafz_Yet4":{"duration":144.359,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=12Nafz_Yet4","annotations":[]},"7mmXZeOJT8w":{"duration":29.86,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=7mmXZeOJT8w","annotations":[{"segment":[0.41926324492979716,22.50046081123245],"label":"Brushing teeth"}]},"rLH89pN9I84":{"duration":195.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rLH89pN9I84","annotations":[{"segment":[9.46541974876274,61.06722418556607],"label":"Walking the dog"},{"segment":[62.28856866927738,84.578105497009],"label":"Walking the dog"},{"segment":[96.7915503341222,119.08108716185383],"label":"Walking the dog"},{"segment":[140.45461562680197,143.8133129570081],"label":"Walking the dog"},{"segment":[145.64532929953197,150.22537110140405],"label":"Walking the dog"},{"segment":[152.9733961825273,165.1868409875195],"label":"Walking the dog"},{"segment":[166.71352158814352,176.17894131201248],"label":"Walking the dog"}]},"TomBet77rDc":{"duration":46.19,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TomBet77rDc","annotations":[{"segment":[1.5851018190327615,44.67105126365055],"label":"Clean and jerk"}]},"Fmr6mPyvE-g":{"duration":102.656,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Fmr6mPyvE-g","annotations":[{"segment":[36.49458346333853,79.40720124804992],"label":"Using the monkey bar"},{"segment":[87.89224492979719,99.25900156006239],"label":"Using the monkey bar"}]},"44TbmH7hgDc":{"duration":187.04,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=44TbmH7hgDc","annotations":[]},"dAiqJJKezPE":{"duration":34.134,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=dAiqJJKezPE","annotations":[{"segment":[0.20000000000000004,33.861],"label":"Changing car wheel"}]},"Ok3m6ExNn1M":{"duration":169.41,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ok3m6ExNn1M","annotations":[]},"9VGbtQrlcN4":{"duration":113.15,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=9VGbtQrlcN4","annotations":[{"segment":[55.825947,87.80100550000002],"label":"Cleaning windows"}]},"ANeDHelwzK0":{"duration":53.89,"subset":"validation","resolution":"136x240","url":"https://www.youtube.com/watch?v=ANeDHelwzK0","annotations":[{"segment":[12.866621912855866,53.49645272784022],"label":"Bathing dog"}]},"6G1wS1p1ig0":{"duration":222.935,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6G1wS1p1ig0","annotations":[{"segment":[43.75352024922118,60.074277802502095],"label":"Rope skipping"},{"segment":[71.88078326657765,77.08953567719922],"label":"Rope skipping"},{"segment":[111.81455174800968,121.19030608712852],"label":"Rope skipping"},{"segment":[0.2,222.7346031746032],"label":"Rope skipping"},{"segment":[131.60781090837168,137.51106364040942],"label":"Rope skipping"},{"segment":[165.29107649705782,172.58332896261683],"label":"Rope skipping"},{"segment":[176.7503308691589,200.0160915140187],"label":"Rope skipping"}]},"ZoKdN--u6TU":{"duration":140.365,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ZoKdN--u6TU","annotations":[{"segment":[3.9921356655489246,117.16222600822805],"label":"Shuffleboard"}]},"twL4mmkCQ0s":{"duration":54.149,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=twL4mmkCQ0s","annotations":[{"segment":[3.801407239963068,6.166727300384532],"label":"Grooming dog"},{"segment":[14.27639607897241,25.004812067312624],"label":"Grooming dog"}]},"KeHywadGvd0":{"duration":37.292,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KeHywadGvd0","annotations":[]},"6iSqTbL5WXY":{"duration":21.85,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6iSqTbL5WXY","annotations":[{"segment":[2.4202001497659906,20.316046327613105],"label":"Powerbocking"}]},"KwhqAV_0o40":{"duration":167.602,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=KwhqAV_0o40","annotations":[{"segment":[12.284597503900155,144.29590015600624],"label":"Ice fishing"}]},"3zDw5mwGIW0":{"duration":126.71,"subset":"training","resolution":"1920x816","url":"https://www.youtube.com/watch?v=3zDw5mwGIW0","annotations":[{"segment":[13.080180497554137,123.01470048353545],"label":"Painting"}]},"CmEQjszPDrg":{"duration":220.845,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=CmEQjszPDrg","annotations":[{"segment":[0,220.801],"label":"Running a marathon"}]},"drjYCuezfA8":{"duration":95.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=drjYCuezfA8","annotations":[{"segment":[8.198529614653975,90.48195311232449],"label":"Washing dishes"}]},"H0puoztHMY8":{"duration":39.149,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=H0puoztHMY8","annotations":[{"segment":[0.01,30.94100076407538],"label":"Roof shingle removal"}]},"iZk3PH8ghlI":{"duration":131.216,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iZk3PH8ghlI","annotations":[{"segment":[9.842902059570097,112.80956257283061],"label":"Longboarding"},{"segment":[121.61188146362625,128.77655963055292],"label":"Longboarding"}]},"tRK4TZi1SvI":{"duration":59.1,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tRK4TZi1SvI","annotations":[]},"FwbnNQBzqHw":{"duration":208.771,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FwbnNQBzqHw","annotations":[{"segment":[30.941042376388936,189.22890127033654],"label":"Making a cake"}]},"y5wu5pysE14":{"duration":187.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y5wu5pysE14","annotations":[{"segment":[22.21185748,177.10083853354135],"label":"High jump"}]},"ZIFxB0gZ5tY":{"duration":83.081,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZIFxB0gZ5tY","annotations":[{"segment":[0.2,61.887959438377536],"label":"Swinging at the playground"}]},"CaeZoHNHul0":{"duration":174.43,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CaeZoHNHul0","annotations":[]},"hANXaoStVR0":{"duration":25.03,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hANXaoStVR0","annotations":[{"segment":[8.240251170046802,16.49998283931357],"label":"Playing polo"},{"segment":[19.26621372854914,23.746728549141967],"label":"Playing polo"}]},"NDq29HQJVP0":{"duration":163.56,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NDq29HQJVP0","annotations":[]},"Ws3wACUl1WM":{"duration":146.704,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ws3wACUl1WM","annotations":[]},"rGuOU8U5Bvk":{"duration":169.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rGuOU8U5Bvk","annotations":[{"segment":[0.01,152.40676134945397],"label":"Grooming horse"}]},"r6C3l3NS_4I":{"duration":70.78,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r6C3l3NS_4I","annotations":[]},"UQXB3JOoxYE":{"duration":197.927,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UQXB3JOoxYE","annotations":[{"segment":[35.497683307332295,115.14760218408736],"label":"Waterskiing"},{"segment":[122.24785647425898,174.41929017160686],"label":"Waterskiing"}]},"XJ-knBky6S0":{"duration":142.71,"subset":"training","resolution":"636x480","url":"https://www.youtube.com/watch?v=XJ-knBky6S0","annotations":[{"segment":[27.606860878516773,131.80049537597503],"label":"Capoeira"}]},"WV-Sf5-aCcc":{"duration":147.284,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=WV-Sf5-aCcc","annotations":[{"segment":[19.760434977943333,85.47536982589703],"label":"Capoeira"},{"segment":[102.47853479126366,134.87645722527301],"label":"Capoeira"}]},"I6nuNE-Qibw":{"duration":105.024,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I6nuNE-Qibw","annotations":[{"segment":[0.01,9.224387165449862],"label":"Making a cake"},{"segment":[24.107706480091608,81.02413638663367],"label":"Making a cake"}]},"95pMelnv2dQ":{"duration":13.91,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=95pMelnv2dQ","annotations":[{"segment":[0,13.604971726989081],"label":"Playing badminton"}]},"zaPxNw11llc":{"duration":21.85,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=zaPxNw11llc","annotations":[{"segment":[10.703420380655228,20.452395631825276],"label":"Hammer throw"}]},"2mAKLFVhV9Y":{"duration":111.74000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2mAKLFVhV9Y","annotations":[{"segment":[0,111.734421],"label":"Playing lacrosse"}]},"Z56Xq8Ud4I0":{"duration":162.68,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Z56Xq8Ud4I0","annotations":[{"segment":[40.86009261322834,123.08785662991146],"label":"Washing hands"}]},"zEttEkAdHts":{"duration":48.51,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=zEttEkAdHts","annotations":[]},"xKePBw5XZHs":{"duration":210.93,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xKePBw5XZHs","annotations":[{"segment":[61.53498084413173,85.55665744149766],"label":"Doing nails"},{"segment":[124.71528142433698,149.7241505226209],"label":"Doing nails"},{"segment":[167.16454607800313,191.84435110920438],"label":"Doing nails"}]},"grroBKRxAMY":{"duration":93.228,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=grroBKRxAMY","annotations":[{"segment":[39.560137010920435,55.26783847113885],"label":"Spread mulch"}]},"5QBRDpzedoI":{"duration":117.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5QBRDpzedoI","annotations":[{"segment":[0.2,17.263297971918877],"label":"Triple jump"},{"segment":[32.13911856474259,50.13702496099844],"label":"Triple jump"},{"segment":[51.78989391575663,56.19754446177847],"label":"Triple jump"},{"segment":[57.483109204368176,63.37074102964119],"label":"Triple jump"},{"segment":[79.90223868954759,83.94327145085803],"label":"Triple jump"},{"segment":[84.31063806552262,114.43470046801872],"label":"Triple jump"}]},"ffUtqOyJ7fM":{"duration":84.8,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ffUtqOyJ7fM","annotations":[{"segment":[27.10125585023401,77.20552886115445],"label":"Washing face"}]},"LuH7I7Rxju8":{"duration":23.69,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LuH7I7Rxju8","annotations":[]},"1FxlkazrYaY":{"duration":158.616,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1FxlkazrYaY","annotations":[]},"VrNHEv6aR38":{"duration":14.56,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=VrNHEv6aR38","annotations":[{"segment":[7.467234940000001,14.036515413416536],"label":"Playing water polo"},{"segment":[1.158353213728549,6.586912679999999],"label":"Playing water polo"}]},"HBIUe_l_zuI":{"duration":44.072,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=HBIUe_l_zuI","annotations":[]},"4cqesj6HwTU":{"duration":75.63,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=4cqesj6HwTU","annotations":[{"segment":[42.38903198127925,65.38169188767552],"label":"Drinking beer"}]},"KQvqRh1mY8c":{"duration":13.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KQvqRh1mY8c","annotations":[{"segment":[0.9576708268330733,9.82121996879875],"label":"Pole vault"}]},"W50sQxSWDwM":{"duration":116.28999999999999,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=W50sQxSWDwM","annotations":[{"segment":[10.340523204601652,111.02456493361775],"label":"Horseback riding"}]},"5asz3rt3QyQ":{"duration":188.454,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5asz3rt3QyQ","annotations":[{"segment":[0.01,188.421],"label":"Building sandcastles"}]},"uK0dxEMBXfY":{"duration":94.16,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uK0dxEMBXfY","annotations":[{"segment":[43.92018183040247,91.21883918622052],"label":"Mowing the lawn"}]},"Wx-fRCjU9r0":{"duration":90.674,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Wx-fRCjU9r0","annotations":[{"segment":[0,90.601],"label":"Playing ice hockey"}]},"T4g31MwZ2ds":{"duration":150.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T4g31MwZ2ds","annotations":[{"segment":[18.035568168642744,138.66306955631825],"label":"Breakdancing"}]},"_rs8dhCdI1w":{"duration":229.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_rs8dhCdI1w","annotations":[{"segment":[52.99047222841295,216.61645742020156],"label":"Polishing forniture"}]},"Kyo1nkGKRqw":{"duration":93.72,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Kyo1nkGKRqw","annotations":[{"segment":[37.15706035,82.5891552],"label":"Grooming horse"}]},"_b_9BQvJ_v4":{"duration":114.057,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_b_9BQvJ_v4","annotations":[{"segment":[6.761534801872075,110.31977834633385],"label":"Disc dog"}]},"njGsSwBc32M":{"duration":123.415,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=njGsSwBc32M","annotations":[]},"klGP18026Ek":{"duration":86.356,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=klGP18026Ek","annotations":[{"segment":[10.675837276815253,57.46665970672179],"label":"Rock-paper-scissors"}]},"cb5Xy7k7z1w":{"duration":108.02000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cb5Xy7k7z1w","annotations":[{"segment":[15.166506415358654,81.05655095319459],"label":"Using the balance beam"}]},"bpfak_VIJ8o":{"duration":235.126,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bpfak_VIJ8o","annotations":[{"segment":[76.64887519500779,123.59172698907955],"label":"Playing beach volleyball"}]},"hSlydQ9rJuk":{"duration":53.29,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hSlydQ9rJuk","annotations":[{"segment":[6.567696920436817,32.83848460218409],"label":"Disc dog"}]},"XJQlO4G5Fns":{"duration":34.97,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=XJQlO4G5Fns","annotations":[{"segment":[3.000481806552262,32.07787822277691],"label":"Blow-drying hair"}]},"TN8tBV7CY6U":{"duration":112.199,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=TN8tBV7CY6U","annotations":[{"segment":[0.01,102.01226677067083],"label":"Playing pool"}]},"a1nRXQZ6-Fo":{"duration":187.62,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=a1nRXQZ6-Fo","annotations":[{"segment":[0.01,0.02],"label":"Playing saxophone"},{"segment":[0.2,187.381],"label":"Playing saxophone"},{"segment":[0.2,187.381],"label":"Playing saxophone"},{"segment":[0.2,187.381],"label":"Playing saxophone"}]},"vm64rRECzZM":{"duration":70.52,"subset":"testing","resolution":"320x188","url":"https://www.youtube.com/watch?v=vm64rRECzZM","annotations":[]},"3Z2rUp7ojhk":{"duration":157.734,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3Z2rUp7ojhk","annotations":[]},"Yn-Ajw7BzRA":{"duration":158.24,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Yn-Ajw7BzRA","annotations":[]},"3nrianTc060":{"duration":225.095,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=3nrianTc060","annotations":[{"segment":[24.0435,203.78629095163808],"label":"Skiing"}]},"6KDIwk_WjXI":{"duration":126.18,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6KDIwk_WjXI","annotations":[]},"k3_4Zbb2oE4":{"duration":140.899,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k3_4Zbb2oE4","annotations":[]},"BMANcV_mbVw":{"duration":48.553,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BMANcV_mbVw","annotations":[]},"XKOeu8n7R7s":{"duration":75.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XKOeu8n7R7s","annotations":[{"segment":[7.869834633385335,14.565067082683308],"label":"Layup drill in basketball"},{"segment":[26.898390015600626,35.00314508580343],"label":"Layup drill in basketball"},{"segment":[43.81266146645866,47.92376911076443],"label":"Layup drill in basketball"},{"segment":[56.49836505460218,67.30470514820593],"label":"Layup drill in basketball"}]},"5WWvCSCGXmc":{"duration":231.202,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5WWvCSCGXmc","annotations":[{"segment":[3.7446647238686817,186.29704322854158],"label":"BMX"}]},"YvPoAOrjX5I":{"duration":62.67,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=YvPoAOrjX5I","annotations":[]},"6xBh3vzhgo4":{"duration":6.385,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=6xBh3vzhgo4","annotations":[]},"uBCLX7hfZLQ":{"duration":149.328,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=uBCLX7hfZLQ","annotations":[{"segment":[0.01,140.3585582334858],"label":"Clipping cat claws"}]},"HImOluKZgp0":{"duration":32.07,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HImOluKZgp0","annotations":[{"segment":[0,27.26424652106084],"label":"Windsurfing"}]},"qf5wMRlZYnQ":{"duration":231.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qf5wMRlZYnQ","annotations":[{"segment":[146.03526328,225.1369078283931],"label":"Belly dance"},{"segment":[27.818778159999997,116.76148447999998],"label":"Belly dance"}]},"V4BNtCtuSQQ":{"duration":34.88,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=V4BNtCtuSQQ","annotations":[{"segment":[1.5778700936037442,34.16904892355694],"label":"Mowing the lawn"}]},"K8Fq9kjfK_U":{"duration":150.303,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K8Fq9kjfK_U","annotations":[{"segment":[36.696385254049616,140.80625779589008],"label":"Baton twirling"}]},"zFsMqb8__9E":{"duration":100.15,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zFsMqb8__9E","annotations":[]},"saotMUaxVCw":{"duration":133.492,"subset":"testing","resolution":"160x120","url":"https://www.youtube.com/watch?v=saotMUaxVCw","annotations":[]},"2_Tgrk8iJpE":{"duration":70.89,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=2_Tgrk8iJpE","annotations":[{"segment":[40.9829268,60.690622464898595],"label":"Dodgeball"},{"segment":[10.1854752,37.0123779],"label":"Dodgeball"}]},"V6lFpCjRKC8":{"duration":64.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V6lFpCjRKC8","annotations":[{"segment":[16.082049921996877,62.77510452418096],"label":"Javelin throw"}]},"5SNtTQZnN4g":{"duration":186.46,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5SNtTQZnN4g","annotations":[{"segment":[0.01,186.4562358276644],"label":"Washing face"}]},"W6H9iZgobyI":{"duration":63.41,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=W6H9iZgobyI","annotations":[]},"rmnuuQrkLac":{"duration":179.119,"subset":"testing","resolution":"647x480","url":"https://www.youtube.com/watch?v=rmnuuQrkLac","annotations":[]},"DRSH-_Ye9eE":{"duration":40.64,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=DRSH-_Ye9eE","annotations":[{"segment":[0,40.601],"label":"Playing lacrosse"}]},"XCfrjEJkl3M":{"duration":111.5,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=XCfrjEJkl3M","annotations":[]},"jqLwYHwYYPM":{"duration":210.234,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=jqLwYHwYYPM","annotations":[{"segment":[113.39270478843869,142.2097533616474],"label":"Laying tile"},{"segment":[163.16760686943556,165.7873385579091],"label":"Laying tile"}]},"owflykBauOA":{"duration":25.96,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=owflykBauOA","annotations":[{"segment":[2.510903276131045,9.922117784711388],"label":"Skateboarding"},{"segment":[15.672896255850233,17.576322932917314],"label":"Skateboarding"}]},"2kKy5PPtn5k":{"duration":213.531,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=2kKy5PPtn5k","annotations":[]},"BS9UPqgR89E":{"duration":140.48,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=BS9UPqgR89E","annotations":[{"segment":[7.4513957429045465,40.54435872074883],"label":"Ballet"},{"segment":[60.92611742901716,130.1802652979719],"label":"Ballet"}]},"3792wPKkdXI":{"duration":53.801,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3792wPKkdXI","annotations":[{"segment":[17.70326209048362,45.55863182527301],"label":"Arm wrestling"}]},"ZQU0p5OKC04":{"duration":22.13,"subset":"training","resolution":"240x180","url":"https://www.youtube.com/watch?v=ZQU0p5OKC04","annotations":[{"segment":[12.669581443057723,18.52755312],"label":"Bungee jumping"}]},"7rpq2RXAoKE":{"duration":30.883,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7rpq2RXAoKE","annotations":[{"segment":[0.01,26.33536193447738],"label":"Using the monkey bar"}]},"gdmGZK_vFAc":{"duration":36.432,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=gdmGZK_vFAc","annotations":[{"segment":[0,15.516327167372408],"label":"Peeling potatoes"}]},"VDX1IQnUMgo":{"duration":52.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VDX1IQnUMgo","annotations":[{"segment":[21.009879875195008,46.611861154446174],"label":"Playing water polo"}]},"0KTued0g034":{"duration":56.82,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=0KTued0g034","annotations":[{"segment":[24.286402496099846,29.338683307332293],"label":"Drum corps"}]},"Pz9rnMX6PX0":{"duration":151.21,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Pz9rnMX6PX0","annotations":[]},"5BAvlsHfTLk":{"duration":151.766,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5BAvlsHfTLk","annotations":[{"segment":[18.53335846912117,130.82092353921465],"label":"Assembling bicycle"}]},"d09u-0k3LdE":{"duration":10.73,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=d09u-0k3LdE","annotations":[]},"JH9qiuPCkY4":{"duration":149.12,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=JH9qiuPCkY4","annotations":[]},"yl3bjdUZrmM":{"duration":38.522,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yl3bjdUZrmM","annotations":[{"segment":[5.378643488596687,8.08298900702028],"label":"Applying sunscreen"},{"segment":[8.804147877535103,11.628686787051484],"label":"Applying sunscreen"},{"segment":[16.556605735569423,18.299406339313574],"label":"Applying sunscreen"},{"segment":[31.580748871294855,34.58557749843994],"label":"Applying sunscreen"}]},"r8hXEpP7HH0":{"duration":75.024,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=r8hXEpP7HH0","annotations":[{"segment":[46.31223088923557,72.041248049922],"label":"Sumo"}]},"f8i_Yk0YCeA":{"duration":188.55,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f8i_Yk0YCeA","annotations":[]},"KBfCXpgGhdw":{"duration":132.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KBfCXpgGhdw","annotations":[{"segment":[56.63503100468018,109.34281533385334],"label":"Doing motocross"}]},"WsqMDMOjuN4":{"duration":227.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WsqMDMOjuN4","annotations":[{"segment":[29.492131547581906,193.65315390846925],"label":"Playing water polo"}]},"Bs3OMhhUlY4":{"duration":15.186,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=Bs3OMhhUlY4","annotations":[{"segment":[2.9801653666146644,10.66709984399376],"label":"Doing a powerbomb"}]},"g6L-l5sgFEQ":{"duration":150.234,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g6L-l5sgFEQ","annotations":[{"segment":[47.10897014656096,127.9676486302652],"label":"Painting furniture"}]},"9WhPG89P-tg":{"duration":60.05,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=9WhPG89P-tg","annotations":[{"segment":[14.70093135725429,60.021],"label":"Getting a haircut"}]},"vCbul5FHGvM":{"duration":235.869,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vCbul5FHGvM","annotations":[]},"oD45QIOK4z0":{"duration":212.742,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oD45QIOK4z0","annotations":[{"segment":[3.6394249194623414,141.60690543985467],"label":"Futsal"},{"segment":[145.2463353823119,199.17606998417816],"label":"Futsal"}]},"yePMTMv3nMA":{"duration":202.386,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yePMTMv3nMA","annotations":[]},"nFhBmNJ9zN0":{"duration":193.77,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nFhBmNJ9zN0","annotations":[{"segment":[27.3331962,185.17160686427457],"label":"Playing polo"}]},"EjRE-fbECtg":{"duration":80.81,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=EjRE-fbECtg","annotations":[{"segment":[7.060740900000001,42.3660615],"label":"Cheerleading"},{"segment":[49.821130800000006,74.03815851482058],"label":"Cheerleading"}]},"Q2wd5aLtZ1E":{"duration":62.253,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Q2wd5aLtZ1E","annotations":[{"segment":[12.12082573987539,27.44154947507788],"label":"Roof shingle removal"}]},"0McanL-Gob8":{"duration":228.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0McanL-Gob8","annotations":[{"segment":[10.880553822152885,211.7248751950078],"label":"Getting a tattoo"}]},"PsddM2OmOGo":{"duration":107.207,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=PsddM2OmOGo","annotations":[{"segment":[13.212661339106626,15.721394268330732],"label":"Hanging wallpaper"},{"segment":[22.57859815132605,26.425322280811233],"label":"Hanging wallpaper"},{"segment":[30.773793035881436,80.11221121840873],"label":"Hanging wallpaper"}]},"BtdPw9zydjM":{"duration":185.11,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=BtdPw9zydjM","annotations":[]},"wcxEkMoOmBk":{"duration":86.94,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=wcxEkMoOmBk","annotations":[]},"exc6YVfF940":{"duration":171.25,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=exc6YVfF940","annotations":[]},"8AQopjogplo":{"duration":212.14,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=8AQopjogplo","annotations":[{"segment":[86.53065348,155.96846502],"label":"Playing guitarra"},{"segment":[20.21569524,73.43803790999999],"label":"Playing guitarra"}]},"y9bLCC26MGQ":{"duration":36.87,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=y9bLCC26MGQ","annotations":[{"segment":[8.743743563182527,11.274827226209046],"label":"Triple jump"},{"segment":[15.9918467800312,36.873287],"label":"Triple jump"}]},"bV_vdBbOubE":{"duration":154.11,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=bV_vdBbOubE","annotations":[{"segment":[5.770140617869613,58.18225123018526],"label":"Hopscotch"}]},"gB_xHRJY7sw":{"duration":25.542,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gB_xHRJY7sw","annotations":[{"segment":[24.14050979817471,25.521],"label":"Kite flying"}]},"CtooIi6Mk7U":{"duration":194.909,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=CtooIi6Mk7U","annotations":[{"segment":[72.34346021840875,75.38310140405616],"label":"Kite flying"}]},"cwCNZAsvf8U":{"duration":69.94,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=cwCNZAsvf8U","annotations":[{"segment":[5.84891494,69.938502],"label":"Shaving legs"}]},"7yg2zbiTJJ4":{"duration":80.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7yg2zbiTJJ4","annotations":[{"segment":[0.010000000000000002,79.88536349453977],"label":"Wrapping presents"}]},"bZpI0-7s2Jo":{"duration":184.32,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bZpI0-7s2Jo","annotations":[]},"LfhnPDx1Iy0":{"duration":91.46000000000001,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LfhnPDx1Iy0","annotations":[]},"RRIGf87R6jc":{"duration":113.801,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RRIGf87R6jc","annotations":[{"segment":[27.120798525017484,87.21197824299065],"label":"Baking cookies"}]},"9qNM5tNMxK4":{"duration":207.1,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=9qNM5tNMxK4","annotations":[{"segment":[0,207.098775],"label":"Playing squash"}]},"aR05GUXDiYg":{"duration":143.453,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=aR05GUXDiYg","annotations":[{"segment":[86.56254304929594,96.15581373107744],"label":"Table soccer"}]},"5gc896my38M":{"duration":206.867,"subset":"training","resolution":"634x480","url":"https://www.youtube.com/watch?v=5gc896my38M","annotations":[{"segment":[5.80904581489382,9.036293489834831],"label":"Applying sunscreen"},{"segment":[18.395311747163763,21.622559422104775],"label":"Applying sunscreen"},{"segment":[33.563375819386515,37.11334826182163],"label":"Applying sunscreen"},{"segment":[49.37688942659747,51.31323803156207],"label":"Applying sunscreen"},{"segment":[61.64043059137331,65.51312780130253],"label":"Applying sunscreen"},{"segment":[69.06310024373765,73.58124698865505],"label":"Applying sunscreen"},{"segment":[95.84925594574803,101.01285222565365],"label":"Applying sunscreen"},{"segment":[105.20827420307697,107.14462280804158],"label":"Applying sunscreen"},{"segment":[117.1490906003587,119.73088874031151],"label":"Applying sunscreen"},{"segment":[143.28979676738092,147.1624939773101],"label":"Applying sunscreen"},{"segment":[152.00336548972163,154.90788839716853],"label":"Applying sunscreen"}]},"ysM4zvujAh8":{"duration":29.815,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=ysM4zvujAh8","annotations":[]},"OhpILjGpQTs":{"duration":81.642,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OhpILjGpQTs","annotations":[{"segment":[14.093592078746559,74.5309845571588],"label":"Camel ride"}]},"0vQs3ztG7vg":{"duration":37.965,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=0vQs3ztG7vg","annotations":[{"segment":[1.1850858034321372,36.2636255850234],"label":"Using the monkey bar"}]},"2dA1fAU3o6o":{"duration":64.366,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2dA1fAU3o6o","annotations":[{"segment":[4.050428198464616,64.365713],"label":"Blowing leaves"}]},"bsU2nEf2Tpc":{"duration":191.61,"subset":"testing","resolution":"480x270","url":"https://www.youtube.com/watch?v=bsU2nEf2Tpc","annotations":[]},"GKouuFdpCck":{"duration":18.16,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=GKouuFdpCck","annotations":[]},"8xsLp6lqijo":{"duration":127.25,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=8xsLp6lqijo","annotations":[{"segment":[96.47619360374415,110.96747371294852],"label":"Skateboarding"}]},"kvqX0QOgYDc":{"duration":99.242,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kvqX0QOgYDc","annotations":[{"segment":[0.2,99.00099999999999],"label":"Scuba diving"}]},"SOpFjnAT_SE":{"duration":132.49,"subset":"training","resolution":"400x266","url":"https://www.youtube.com/watch?v=SOpFjnAT_SE","annotations":[{"segment":[82.6226348,129.96553042121684],"label":"Smoking hookah"},{"segment":[3.719204368174727,75.41719968798752],"label":"Smoking hookah"}]},"PDaGlIxn6Pk":{"duration":200.37,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=PDaGlIxn6Pk","annotations":[{"segment":[0,174.73327259438378],"label":"Playing field hockey"}]},"5lre5zbUV1Q":{"duration":77.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5lre5zbUV1Q","annotations":[{"segment":[12.65059771450858,71.44575661622464],"label":"Cleaning sink"}]},"G12fEYNjIR0":{"duration":48.832,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=G12fEYNjIR0","annotations":[{"segment":[9.370062296369406,15.46441142277691],"label":"Curling"},{"segment":[30.471746645865835,36.185199141965676],"label":"Curling"}]},"tnt6Wpv_kHc":{"duration":184.76,"subset":"validation","resolution":"360x240","url":"https://www.youtube.com/watch?v=tnt6Wpv_kHc","annotations":[{"segment":[105.78370097659908,166.60212306396255],"label":"Spinning"}]},"EO9ZKeBSGSw":{"duration":15.54,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=EO9ZKeBSGSw","annotations":[{"segment":[3.1019102652106083,14.394802324492979],"label":"Hopscotch"}]},"zYjLA99koBk":{"duration":105.628,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zYjLA99koBk","annotations":[]},"rHta6u5VWHk":{"duration":167.44,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rHta6u5VWHk","annotations":[]},"CfUOUDzUSu0":{"duration":102.586,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CfUOUDzUSu0","annotations":[]},"MiEIeitUNFs":{"duration":35.74,"subset":"testing","resolution":"480x352","url":"https://www.youtube.com/watch?v=MiEIeitUNFs","annotations":[]},"2vrR_5u2buU":{"duration":42.59,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2vrR_5u2buU","annotations":[]},"VXZfnNu9o8E":{"duration":45.094,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VXZfnNu9o8E","annotations":[]},"7IwHwDuQdyU":{"duration":57.17,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7IwHwDuQdyU","annotations":[]},"9BS1II6O7I0":{"duration":101.379,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9BS1II6O7I0","annotations":[{"segment":[0.01,101.378231],"label":"Playing rubik cube"}]},"NjTk2naIaac":{"duration":234.986,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NjTk2naIaac","annotations":[{"segment":[14.61807506180984,149.10441693292844],"label":"Riding bumper cars"}]},"YoXZfvf5Teg":{"duration":231.11,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YoXZfvf5Teg","annotations":[{"segment":[94.288496099844,145.4891326053042],"label":"Playing violin"},{"segment":[204.2616942277691,209.30964430577222],"label":"Playing violin"}]},"DUQUohpaGjQ":{"duration":25.03,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=DUQUohpaGjQ","annotations":[]},"xcworhtkwNY":{"duration":51.317,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xcworhtkwNY","annotations":[{"segment":[0.01,50.51553583307332],"label":"Playing congas"}]},"BxyOAXTWkzk":{"duration":132.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BxyOAXTWkzk","annotations":[{"segment":[40.223469636505456,116.53431948673948],"label":"Horseback riding"}]},"Z1POv1Qeno0":{"duration":43.399,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=Z1POv1Qeno0","annotations":[{"segment":[0.2,7.5828182964118565],"label":"River tubing"},{"segment":[18.88934200624025,21.123565254290174],"label":"River tubing"},{"segment":[33.58105245553822,43.198094],"label":"River tubing"}]},"0j9TxzwRr6E":{"duration":176.867,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=0j9TxzwRr6E","annotations":[{"segment":[0.20000000000000004,160.545744149766],"label":"Table soccer"}]},"7V4Wsi6hklI":{"duration":15.16,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=7V4Wsi6hklI","annotations":[]},"Mk7Yjq1U8OI":{"duration":152.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Mk7Yjq1U8OI","annotations":[]},"BOqca4eckEs":{"duration":81.9,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=BOqca4eckEs","annotations":[{"segment":[5.238327517940717,20.825545985959437],"label":"Bungee jumping"}]},"VEzeFMRjtyI":{"duration":120.33,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=VEzeFMRjtyI","annotations":[]},"VLjfvly5xuE":{"duration":197.718,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VLjfvly5xuE","annotations":[]},"1mksQqxFtv4":{"duration":13.978,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1mksQqxFtv4","annotations":[{"segment":[0.2,12.942112149532711],"label":"Rope skipping"}]},"SdsoRu3953g":{"duration":185.71,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SdsoRu3953g","annotations":[{"segment":[18.252623007800313,137.6189829953198],"label":"Removing curlers"}]},"V66cwBOS80A":{"duration":174.057,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=V66cwBOS80A","annotations":[{"segment":[30.683912572829446,174.0565306122449],"label":"Rafting"},{"segment":[57.294738875195016,68.97091788767551],"label":"Rafting"},{"segment":[80.37555785335414,90.69404163182527],"label":"Rafting"}]},"F1gyHVC3zxk":{"duration":63.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=F1gyHVC3zxk","annotations":[]},"xD2qZzNCu3c":{"duration":220.287,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=xD2qZzNCu3c","annotations":[]},"YYUm4o1aDmY":{"duration":299.608,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YYUm4o1aDmY","annotations":[{"segment":[62.63236821717766,184.6252645207849],"label":"Putting on shoes"}]},"BWsjIONsXlM":{"duration":74.467,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=BWsjIONsXlM","annotations":[{"segment":[25.767585023400937,51.6651248049922],"label":"Waxing skis"}]},"lcEGoZAC7GI":{"duration":59.91,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lcEGoZAC7GI","annotations":[{"segment":[37.2435755626365,40.51465497215288],"label":"Spinning"},{"segment":[49.954055553900155,53.78589143361935],"label":"Spinning"}]},"1OJa2iiFxfk":{"duration":234.34,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=1OJa2iiFxfk","annotations":[{"segment":[16.451010772230887,223.3681684851794],"label":"Polishing shoes"}]},"JoZYs4Q3LG0":{"duration":204.591,"subset":"testing","resolution":"480x272","url":"https://www.youtube.com/watch?v=JoZYs4Q3LG0","annotations":[]},"AA46-AbEGdk":{"duration":173.18,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AA46-AbEGdk","annotations":[{"segment":[27.968897035881433,110.92978003120123],"label":"Using the balance beam"},{"segment":[134.43986739469577,159.0308783151326],"label":"Using the balance beam"}]},"scBelfrnHoI":{"duration":176.077,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=scBelfrnHoI","annotations":[{"segment":[6.657114697879022,43.682638766264574],"label":"Powerbocking"},{"segment":[52.45905930209012,95.79263569772874],"label":"Powerbocking"},{"segment":[106.21463508402158,146.53131692047018],"label":"Powerbocking"},{"segment":[150.0967377631493,161.61578971642032],"label":"Powerbocking"}]},"UZIs0_W2Akw":{"duration":99.75,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=UZIs0_W2Akw","annotations":[{"segment":[16.09398768,87.1982108],"label":"Grooming horse"}]},"xsRNDo8Upys":{"duration":204.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xsRNDo8Upys","annotations":[{"segment":[4.77896255850234,179.33037144999997],"label":"Ballet"}]},"Il9Ara3IqeY":{"duration":38.45,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Il9Ara3IqeY","annotations":[]},"Ifcm-8vzRU4":{"duration":196.0,"subset":"testing","resolution":"512x272","url":"https://www.youtube.com/watch?v=Ifcm-8vzRU4","annotations":[]},"DTI__3fkSzM":{"duration":22.036,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=DTI__3fkSzM","annotations":[]},"4kbtdUz2M3M":{"duration":234.592,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4kbtdUz2M3M","annotations":[{"segment":[104.66926585090613,216.29208337909515],"label":"Surfing"}]},"ZnoSOqba2HM":{"duration":14.07,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZnoSOqba2HM","annotations":[{"segment":[3.7977741083765837,7.567652694046365],"label":"Brushing teeth"},{"segment":[10.153964747005867,13.901925433921754],"label":"Brushing teeth"}]},"QJqUOwlTiUU":{"duration":87.749,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=QJqUOwlTiUU","annotations":[]},"PF0c0UIsG0E":{"duration":80.83,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=PF0c0UIsG0E","annotations":[{"segment":[4.473867617784712,75.8037006224649],"label":"Smoking hookah"}]},"39HCogCoD7Y":{"duration":184.112,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=39HCogCoD7Y","annotations":[{"segment":[42.49308580343214,52.82924180967239],"label":"Spread mulch"},{"segment":[66.03655226209048,72.92732293291732],"label":"Spread mulch"}]},"FgZpY1V-dWo":{"duration":228.28,"subset":"testing","resolution":"2048x1556","url":"https://www.youtube.com/watch?v=FgZpY1V-dWo","annotations":[]},"-e9e4ke_wJk":{"duration":28.21,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-e9e4ke_wJk","annotations":[{"segment":[0.09976284551174841,21.433043219343524],"label":"Hand washing clothes"}]},"eyfUkLbsixg":{"duration":197.649,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eyfUkLbsixg","annotations":[{"segment":[28.05919752574103,39.776225063962556],"label":"Running a marathon"},{"segment":[87.87770653666146,90.65279200624025],"label":"Running a marathon"},{"segment":[96.81964860530421,102.06147671450857],"label":"Running a marathon"},{"segment":[106.99496199375974,112.23679010296412],"label":"Running a marathon"},{"segment":[145.2294729079563,154.47975780655224],"label":"Running a marathon"},{"segment":[163.1133570452418,168.66352798439937],"label":"Running a marathon"},{"segment":[176.3720987332293,177.60547005304213],"label":"Running a marathon"}]},"IvkpJMxlRKc":{"duration":29.351,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IvkpJMxlRKc","annotations":[{"segment":[0.01,29.349433],"label":"Peeling potatoes"}]},"GOhmWfR8QBM":{"duration":52.338,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=GOhmWfR8QBM","annotations":[]},"Mk9PMED8K4g":{"duration":210.88,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mk9PMED8K4g","annotations":[{"segment":[25.661346323240686,186.86723989231677],"label":"Doing motocross"}]},"R3ffxWaI9QU":{"duration":26.96,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=R3ffxWaI9QU","annotations":[{"segment":[1.4064599235569422,25.631157711388454],"label":"High jump"}]},"ZWudhOEyE_0":{"duration":160.149,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=ZWudhOEyE_0","annotations":[{"segment":[17.988545822152886,151.15375308892354],"label":"Grooming dog"}]},"7hfaWQgcDyo":{"duration":54.1,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7hfaWQgcDyo","annotations":[{"segment":[26.281669559999997,41.85136063806552],"label":"Making a sandwich"},{"segment":[2.233758422776911,23.614440959999996],"label":"Making a sandwich"}]},"fp0dd844oUU":{"duration":107.6,"subset":"training","resolution":"614x480","url":"https://www.youtube.com/watch?v=fp0dd844oUU","annotations":[{"segment":[17.476445088775492,91.72770048],"label":"Ping-pong"}]},"Rnux3rCLdmI":{"duration":226.67000000000002,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rnux3rCLdmI","annotations":[{"segment":[174.82696489859595,207.72478549141965],"label":"Polishing shoes"}]},"kuNn6nQt7fM":{"duration":181.05,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kuNn6nQt7fM","annotations":[]},"Tsht1n005fI":{"duration":102.818,"subset":"validation","resolution":"320x180","url":"https://www.youtube.com/watch?v=Tsht1n005fI","annotations":[{"segment":[18.941649628055266,20.222867693066746],"label":"Hurling"},{"segment":[46.327692440730296,48.890129225777024],"label":"Hurling"}]},"-i3wp-ZNVko":{"duration":28.52,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=-i3wp-ZNVko","annotations":[]},"5WqnKjOz1z4":{"duration":143.476,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5WqnKjOz1z4","annotations":[{"segment":[5.592082683307333,18.118347893915757],"label":"Raking leaves"},{"segment":[45.40771138845554,143.181],"label":"Raking leaves"}]},"UXi0Cy16-0Y":{"duration":101.031,"subset":"validation","resolution":"426x206","url":"https://www.youtube.com/watch?v=UXi0Cy16-0Y","annotations":[{"segment":[0,80.01277379095164],"label":"Rope skipping"}]},"gxcTf2r3CRk":{"duration":161.797,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gxcTf2r3CRk","annotations":[]},"47OMV7rZrQA":{"duration":228.79,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=47OMV7rZrQA","annotations":[{"segment":[19.987562982839314,220.57703434633387],"label":"Tai chi"}]},"xMEwcb1P6dQ":{"duration":48.67,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=xMEwcb1P6dQ","annotations":[{"segment":[3.438172079563183,43.8650305975039],"label":"Playing guitarra"}]},"DQ9EaCSFwGI":{"duration":205.241,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DQ9EaCSFwGI","annotations":[{"segment":[0,110.78541019344773],"label":"Longboarding"},{"segment":[116.54881303588144,183.46832381747268],"label":"Longboarding"}]},"Qz_PtO18pXE":{"duration":8.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Qz_PtO18pXE","annotations":[{"segment":[5.0495663026521065,8.701],"label":"Springboard diving"}]},"4LJOkyEM-aY":{"duration":189.731,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4LJOkyEM-aY","annotations":[]},"Q3cc0HlO7so":{"duration":143.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Q3cc0HlO7so","annotations":[{"segment":[48.424050720000004,113.51634624000002],"label":"Playing violin"}]},"OnvOEKfeVuY":{"duration":231.016,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OnvOEKfeVuY","annotations":[]},"yUVaugWiZ0g":{"duration":210.327,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yUVaugWiZ0g","annotations":[]},"RisNywzGXt8":{"duration":63.53,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RisNywzGXt8","annotations":[{"segment":[10.005617784711388,62.41127925117005],"label":"Putting on shoes"}]},"Irg5qYkjJoY":{"duration":198.345,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Irg5qYkjJoY","annotations":[{"segment":[98.11520437679752,172.06905684997284],"label":"Slacklining"}]},"3vVhq-v5-wM":{"duration":30.349,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=3vVhq-v5-wM","annotations":[{"segment":[9.838951638065524,22.18494383775351],"label":"Playing ten pins"}]},"YX0oF4tqJT8":{"duration":142.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YX0oF4tqJT8","annotations":[]},"8fqxUtHLyoE":{"duration":79.645,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8fqxUtHLyoE","annotations":[{"segment":[0.20000000000000004,76.43877691107645],"label":"Rope skipping"}]},"ZsXJw0d3QbU":{"duration":114.173,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZsXJw0d3QbU","annotations":[{"segment":[0.01,114.141],"label":"Riding bumper cars"}]},"kk1Iqyox4c8":{"duration":91.67,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=kk1Iqyox4c8","annotations":[{"segment":[6.290489859594384,82.92009360374415],"label":"Playing saxophone"}]},"N7kI8J6vfL8":{"duration":80.76,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=N7kI8J6vfL8","annotations":[{"segment":[0.01,80.75900226757369],"label":"Fun sliding down"}]},"ma-A4w_7eAo":{"duration":78.14,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ma-A4w_7eAo","annotations":[]},"VqeeqABnpQk":{"duration":98.13,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VqeeqABnpQk","annotations":[{"segment":[8.18281053,91.39131936973479],"label":"Tango"}]},"JTGS1YulUQw":{"duration":76.46000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JTGS1YulUQw","annotations":[{"segment":[49.742902462558504,71.45323399297972],"label":"Ironing clothes"}]},"HBklS2vYEPo":{"duration":102.887,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HBklS2vYEPo","annotations":[]},"FbPUX8cWmeU":{"duration":131.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FbPUX8cWmeU","annotations":[{"segment":[6.368317078261362,125.92833275039001],"label":"Ballet"}]},"a7IHiGxI9EE":{"duration":90.071,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a7IHiGxI9EE","annotations":[]},"0NgQr2-AieQ":{"duration":59.05,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0NgQr2-AieQ","annotations":[{"segment":[2.763572995319813,51.58669591263651],"label":"Washing face"}]},"UwZbkuVsPno":{"duration":57.68,"subset":"training","resolution":"1920x1072","url":"https://www.youtube.com/watch?v=UwZbkuVsPno","annotations":[{"segment":[34.46304863962558,48.950126527301094],"label":"Layup drill in basketball"}]},"9vzbohxkIKA":{"duration":50.085,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=9vzbohxkIKA","annotations":[{"segment":[0.01,43.42264586583463],"label":"Playing blackjack"}]},"Wd8Kz1XRpdg":{"duration":145.07999999999998,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Wd8Kz1XRpdg","annotations":[{"segment":[4.526623244929797,62.4674007800312],"label":"Drinking beer"},{"segment":[86.45850397815911,96.19074395475819],"label":"Drinking beer"}]},"6b8h8ztnj9Q":{"duration":236.287,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=6b8h8ztnj9Q","annotations":[{"segment":[12.163202808112326,230.01457722308893],"label":"Using the rowing machine"}]},"pCUun9uE3h8":{"duration":81.363,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=pCUun9uE3h8","annotations":[{"segment":[49.27393925233645,79.8206074766355],"label":"Drum corps"}]},"j7fPZQE3-fQ":{"duration":74.65,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j7fPZQE3-fQ","annotations":[{"segment":[20.306090519999998,62.34337824],"label":"Getting a piercing"}]},"aFdYbgmbOeY":{"duration":75.674,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aFdYbgmbOeY","annotations":[{"segment":[10.152810482487327,70.71550540608425],"label":"Playing rubik cube"}]},"mnMmGnnbvSk":{"duration":143.66,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=mnMmGnnbvSk","annotations":[{"segment":[40.137608424336975,112.34045709828393],"label":"Polishing shoes"}]},"0tqPeeUvNsE":{"duration":210.03,"subset":"testing","resolution":"698x480","url":"https://www.youtube.com/watch?v=0tqPeeUvNsE","annotations":[]},"122b9bus5Bs":{"duration":160.218,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=122b9bus5Bs","annotations":[]},"rrc9Ph5juXM":{"duration":213.345,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rrc9Ph5juXM","annotations":[{"segment":[11.649087487309016,202.02845961310453],"label":"Making a cake"}]},"in5XF1bbYr0":{"duration":95.713,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=in5XF1bbYr0","annotations":[{"segment":[12.853765562553727,76.91106374820116],"label":"Disc dog"}]},"sUy3fUESkGI":{"duration":31.347,"subset":"testing","resolution":"192x144","url":"https://www.youtube.com/watch?v=sUy3fUESkGI","annotations":[]},"yRgei7gpr-I":{"duration":229.55,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yRgei7gpr-I","annotations":[{"segment":[17.896489859594382,66.2170124804992],"label":"Shot put"},{"segment":[104.51550078003119,147.10914664586582],"label":"Shot put"}]},"XIRDxB7dRPA":{"duration":21.32,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=XIRDxB7dRPA","annotations":[]},"baktSTMnMv4":{"duration":95.411,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=baktSTMnMv4","annotations":[{"segment":[7.419188532949469,15.580298409766966],"label":"Playing pool"},{"segment":[19.289893808320375,29.676760924269914],"label":"Playing pool"},{"segment":[36.502416457608184,41.250698567756544],"label":"Playing pool"},{"segment":[50.15372752428473,65.14049180738235],"label":"Playing pool"}]},"hg-wVv7XSFo":{"duration":228.044,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hg-wVv7XSFo","annotations":[{"segment":[28.460867196592623,36.64336644929797],"label":"Braiding hair"},{"segment":[39.84521407522968,208.83161267706708],"label":"Braiding hair"}]},"XELYXH6fqeA":{"duration":153.484,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=XELYXH6fqeA","annotations":[{"segment":[111.66962012480501,148.0550054602184],"label":"Tug of war"}]},"mfJj5gBQg-4":{"duration":27.074,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=mfJj5gBQg-4","annotations":[{"segment":[0.2,26.17791107644306],"label":"Mooping floor"}]},"wpSDjDdGOrc":{"duration":108.28,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=wpSDjDdGOrc","annotations":[]},"c6Cbq-WNq9M":{"duration":192.773,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c6Cbq-WNq9M","annotations":[{"segment":[89.01798853042122,101.94965578315133],"label":"Trimming branches or hedges"}]},"MAyYq3HilFc":{"duration":5.11,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=MAyYq3HilFc","annotations":[{"segment":[0.01,5.0492932917316695],"label":"Javelin throw"}]},"uqT5jtfx8x0":{"duration":69.94,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=uqT5jtfx8x0","annotations":[{"segment":[9.928866898595945,45.60732267706708],"label":"Playing polo"}]},"kU2HJk6l0lY":{"duration":68.592,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kU2HJk6l0lY","annotations":[]},"w6n_SrjLypA":{"duration":120.164,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w6n_SrjLypA","annotations":[]},"7ofmJdesasE":{"duration":189.55,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7ofmJdesasE","annotations":[]},"nvcMYiZhUuo":{"duration":174.406,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nvcMYiZhUuo","annotations":[]},"4p6OjaHHSDs":{"duration":37.48,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=4p6OjaHHSDs","annotations":[{"segment":[13.622686684867393,17.71533933697348],"label":"Smoking a cigarette"}]},"nF9pKALI9Ik":{"duration":51.015,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nF9pKALI9Ik","annotations":[]},"FmRLElPPWV8":{"duration":178.562,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=FmRLElPPWV8","annotations":[{"segment":[8.634588143525741,94.7019344773791],"label":"Canoeing"},{"segment":[108.35015444617784,178.561],"label":"Canoeing"}]},"ZwxvczODMbM":{"duration":51.363,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZwxvczODMbM","annotations":[{"segment":[0.2,47.122266090342684],"label":"Arm wrestling"}]},"JUvqA51RX0Y":{"duration":53.87,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JUvqA51RX0Y","annotations":[{"segment":[1.848902473105727,50.844818010407494],"label":"Hopscotch"}]},"JmL6BiuXr_g":{"duration":141.131,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JmL6BiuXr_g","annotations":[{"segment":[89.7988268330733,102.56434633385334],"label":"Waxing skis"}]},"gGg-kio0dmU":{"duration":8.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gGg-kio0dmU","annotations":[{"segment":[4.2313204711388455,6.1178964773790945],"label":"Springboard diving"}]},"b5NP9oI-urM":{"duration":5.898,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=b5NP9oI-urM","annotations":[{"segment":[0.01,5.316894717980025],"label":"Decorating the Christmas tree"}]},"Rx4gq8hrZ8s":{"duration":224.143,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Rx4gq8hrZ8s","annotations":[{"segment":[5.376111747634745,222.1660509999712],"label":"Table soccer"}]},"eVkp83uGf3Y":{"duration":17.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eVkp83uGf3Y","annotations":[]},"TH1kq-KBr9Y":{"duration":38.057,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=TH1kq-KBr9Y","annotations":[{"segment":[0.2,26.869755070202807],"label":"Hanging wallpaper"}]},"lGKUEUBeo8U":{"duration":145.13,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=lGKUEUBeo8U","annotations":[{"segment":[4.754475842433698,117.88786899],"label":"Doing step aerobics"}]},"uWTge1GL818":{"duration":14.58,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=uWTge1GL818","annotations":[]},"icOO9whIgX8":{"duration":207.63,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=icOO9whIgX8","annotations":[{"segment":[13.277667706708268,198.8411700468019],"label":"Playing field hockey"}]},"gCxG1PInEVo":{"duration":59.722,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gCxG1PInEVo","annotations":[]},"iTc3UCWgUF0":{"duration":83.777,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iTc3UCWgUF0","annotations":[{"segment":[6.140135725429017,19.204254290171605],"label":"Putting on shoes"}]},"ThRbaI4pGIs":{"duration":125.504,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=ThRbaI4pGIs","annotations":[{"segment":[13.874970404984424,95.57664953271028],"label":"Surfing"}]},"vXcfhKnUjRc":{"duration":232.363,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vXcfhKnUjRc","annotations":[{"segment":[16.664845553822154,223.88857722308893],"label":"Drum corps"}]},"EDkYPikPWW8":{"duration":66.386,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=EDkYPikPWW8","annotations":[{"segment":[41.11574287675508,57.27205493915758],"label":"Hitting a pinata"}]},"702l5pkas6o":{"duration":44.211,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=702l5pkas6o","annotations":[]},"pMXz0BKQOLQ":{"duration":206.658,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pMXz0BKQOLQ","annotations":[{"segment":[14.61151669046515,175.53129862192932],"label":"Having an ice cream"}]},"Vtnn6yJqHqM":{"duration":32.65,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vtnn6yJqHqM","annotations":[{"segment":[2.7883802699999998,32.53],"label":"Playing piano"}]},"L73C-2b0yEw":{"duration":64.761,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L73C-2b0yEw","annotations":[{"segment":[30.8647708049922,39.85647162870515],"label":"Waxing skis"}]},"QRYmJnPbp0U":{"duration":35.736,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=QRYmJnPbp0U","annotations":[]},"SXlSHYVNcvM":{"duration":238.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SXlSHYVNcvM","annotations":[{"segment":[98.6059836427457,212.09588934477378],"label":"Polishing forniture"}]},"aXCAudMzN2o":{"duration":117.54,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aXCAudMzN2o","annotations":[{"segment":[0.2,116.62256493603745],"label":"Powerbocking"}]},"bFHpaULTwsE":{"duration":74.885,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bFHpaULTwsE","annotations":[{"segment":[16.939451497659906,27.10312239625585],"label":"Spread mulch"},{"segment":[40.77150739781591,61.098849195007794],"label":"Spread mulch"}]},"PHIQBuDG-fo":{"duration":224.561,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PHIQBuDG-fo","annotations":[]},"Nd209mI-wq0":{"duration":172.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Nd209mI-wq0","annotations":[{"segment":[135.97909463962557,172.941666],"label":"Playing guitarra"},{"segment":[4.316796655226209,16.997386829953196],"label":"Playing guitarra"}]},"uLphvdhunTY":{"duration":24.892,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uLphvdhunTY","annotations":[]},"qxmrH20IA2Q":{"duration":5.34,"subset":"validation","resolution":"352x262","url":"https://www.youtube.com/watch?v=qxmrH20IA2Q","annotations":[{"segment":[0,5.340589],"label":"Curling"}]},"aTAFgaIASTk":{"duration":78.809,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aTAFgaIASTk","annotations":[]},"Ny49eEt1OJg":{"duration":85.798,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ny49eEt1OJg","annotations":[{"segment":[40.02109449141966,85.597731],"label":"Plastering"},{"segment":[42.43039114976599,85.597731],"label":"Plastering"}]},"TBeZXLchito":{"duration":180.443,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TBeZXLchito","annotations":[{"segment":[10.693413416536663,180.181],"label":"Longboarding"}]},"Ld2a5ogu9k8":{"duration":36.131,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ld2a5ogu9k8","annotations":[{"segment":[7.092365054602184,28.538326053042123],"label":"Putting on shoes"}]},"6rG5dBgwBDc":{"duration":154.018,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=6rG5dBgwBDc","annotations":[{"segment":[36.263792511700466,89.8189297971919],"label":"Decorating the Christmas tree"}]},"rc-DiL5er28":{"duration":129.82,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rc-DiL5er28","annotations":[]},"TTWzoRcW5FA":{"duration":217.9,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TTWzoRcW5FA","annotations":[]},"-fBTCykx4gM":{"duration":103.56,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-fBTCykx4gM","annotations":[{"segment":[41.19821282647224,81.42705457722309],"label":"Brushing teeth"}]},"Q5kU3DUXXZg":{"duration":133.86,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Q5kU3DUXXZg","annotations":[{"segment":[19.918876755070205,122.37715001560062],"label":"Spinning"}]},"-vPshfXc1fc":{"duration":46.951,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-vPshfXc1fc","annotations":[{"segment":[5.197177847113885,46.901],"label":"Welding"}]},"AnbC-26sAI4":{"duration":108.46000000000001,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=AnbC-26sAI4","annotations":[{"segment":[3.4249039716646,44.98449962300925],"label":"Sailing"},{"segment":[62.72335075268865,91.95022055175407],"label":"Sailing"}]},"KrHpGJBzjJk":{"duration":76.324,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=KrHpGJBzjJk","annotations":[{"segment":[8.151713728549142,67.53427067082684],"label":"Playing congas"}]},"Xo4-GnduX8A":{"duration":230.598,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=Xo4-GnduX8A","annotations":[{"segment":[16.1846255850234,204.30366302652106],"label":"Futsal"}]},"DzCk5xjSF9o":{"duration":42.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DzCk5xjSF9o","annotations":[{"segment":[2.6976763800000003,3.42533628],"label":"Making a sandwich"}]},"giN7cA0EmLs":{"duration":117.632,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=giN7cA0EmLs","annotations":[{"segment":[0.2,111.76854606847931],"label":"River tubing"}]},"YBuNpnthXfE":{"duration":112.246,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YBuNpnthXfE","annotations":[{"segment":[0,87.2045849953198],"label":"Tug of war"},{"segment":[94.38407894071763,109.61861487363494],"label":"Tug of war"}]},"UoX2xhYPD38":{"duration":81.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UoX2xhYPD38","annotations":[{"segment":[6.48618246,41.463576360000005],"label":"Hand washing clothes"}]},"Y4IsLkxb5CI":{"duration":181.046,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y4IsLkxb5CI","annotations":[{"segment":[0.19999999999999998,149.7436429525081],"label":"Swimming"}]},"HvzJ-RJOF9Q":{"duration":82.8,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HvzJ-RJOF9Q","annotations":[{"segment":[67.4303125647426,81.25223487207488],"label":"Springboard diving"}]},"-V5_GMuMzc8":{"duration":216.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-V5_GMuMzc8","annotations":[{"segment":[16.55369041428324,190.53635404680188],"label":"Getting a haircut"}]},"M2ntxFBPaug":{"duration":207.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M2ntxFBPaug","annotations":[{"segment":[71.59420091905717,75.15771318199667],"label":"Tennis serve with ball bouncing"},{"segment":[102.36998864444374,106.5814122279177],"label":"Tennis serve with ball bouncing"},{"segment":[131.20204240822693,139.3009339149076],"label":"Tennis serve with ball bouncing"}]},"bHAzuAnnvcU":{"duration":21.943,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=bHAzuAnnvcU","annotations":[{"segment":[0.01,21.942857142857143],"label":"Using the monkey bar"}]},"BD9CAqD1Yf8":{"duration":137.184,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=BD9CAqD1Yf8","annotations":[]},"vigHVj40dO4":{"duration":231.32,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vigHVj40dO4","annotations":[{"segment":[6.133313572542902,35.3567488299532],"label":"Mowing the lawn"},{"segment":[42.933195007800315,77.92916068642747],"label":"Mowing the lawn"},{"segment":[116.17217472698907,163.7955507020281],"label":"Mowing the lawn"},{"segment":[183.99940717628704,216.8306739469579],"label":"Mowing the lawn"}]},"5IkU9svJFRI":{"duration":69.57,"subset":"training","resolution":"3414x1080","url":"https://www.youtube.com/watch?v=5IkU9svJFRI","annotations":[{"segment":[14.710522620904836,25.784126365054604],"label":"Starting a campfire"}]},"pRTmUkkIze4":{"duration":147.61,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=pRTmUkkIze4","annotations":[]},"crgj7lKtIUA":{"duration":28.63,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=crgj7lKtIUA","annotations":[]},"r015El3onHw":{"duration":84.939,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r015El3onHw","annotations":[{"segment":[13.648479239849868,14.576045790130925],"label":"BMX"},{"segment":[22.52661622111143,24.646768336039564],"label":"BMX"},{"segment":[28.224525029980793,34.05494334603316],"label":"BMX"},{"segment":[36.307604968144304,37.76520950078003],"label":"BMX"},{"segment":[39.22281407800312,44.788213372854905],"label":"BMX"},{"segment":[47.835932034321374,59.49676865210608],"label":"BMX"},{"segment":[61.881939778471136,69.30247217160685],"label":"BMX"},{"segment":[77.2530425928237,78.84315667706709],"label":"BMX"}]},"y-87JMf_-J8":{"duration":112.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y-87JMf_-J8","annotations":[{"segment":[54.49372776911076,109.09481981279251],"label":"Playing guitarra"}]},"VdGZfI-8RuA":{"duration":221.32999999999998,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=VdGZfI-8RuA","annotations":[{"segment":[42.36515812,197.00982652],"label":"Fixing bicycle"}]},"RseCMmSvcPY":{"duration":122.323,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=RseCMmSvcPY","annotations":[{"segment":[12.401817472698907,122.281],"label":"Playing pool"}]},"9uieGqu2TYA":{"duration":232.48,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=9uieGqu2TYA","annotations":[{"segment":[33.67211352,158.4802526],"label":"Using parallel bars"}]},"DXluU6_7OXQ":{"duration":148.585,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=DXluU6_7OXQ","annotations":[{"segment":[27.03636553484146,32.35120731603294],"label":"Having an ice cream"},{"segment":[39.51468971676929,49.682213124266035],"label":"Having an ice cream"},{"segment":[85.96178528283397,92.8941876061272],"label":"Having an ice cream"},{"segment":[108.60763155953443,119.69947514273817],"label":"Having an ice cream"},{"segment":[129.17375820339134,133.56427962174283],"label":"Having an ice cream"}]},"THtDZ6g9csA":{"duration":54.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=THtDZ6g9csA","annotations":[{"segment":[0,54.311473],"label":"Doing nails"}]},"cWBbuw_DA2c":{"duration":47.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cWBbuw_DA2c","annotations":[{"segment":[0.8080900826833074,45.473432834633385],"label":"Smoking hookah"}]},"rzomS2gZGQ4":{"duration":11.866,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rzomS2gZGQ4","annotations":[{"segment":[0.01,11.821],"label":"Removing ice from car"}]},"aQAAzVQ_gSA":{"duration":170.156,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aQAAzVQ_gSA","annotations":[{"segment":[13.247741433021806,157.6481230529595],"label":"Playing ten pins"}]},"N2nNkWXYAYw":{"duration":213.67000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N2nNkWXYAYw","annotations":[{"segment":[25.000392668767976,153.33574170177693],"label":"Vacuuming floor"}]},"8kjYbv52EcI":{"duration":43.004,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8kjYbv52EcI","annotations":[{"segment":[0.01,2.884768302652106],"label":"Canoeing"},{"segment":[5.367010795631826,18.516187244929796],"label":"Canoeing"},{"segment":[22.809795881435257,27.036316882995322],"label":"Canoeing"},{"segment":[36.764023950078005,43.003174],"label":"Canoeing"}]},"jJxS5NgbZAg":{"duration":155.737,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jJxS5NgbZAg","annotations":[]},"PY6WgOIZlhw":{"duration":223.35,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PY6WgOIZlhw","annotations":[{"segment":[0.01,136.59013347200553],"label":"Snatch"}]},"ifDsS6ZRsRM":{"duration":91.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ifDsS6ZRsRM","annotations":[{"segment":[17.88066534,79.66499682215289],"label":"Ballet"},{"segment":[9.424575780000001,12.97109322],"label":"Ballet"}]},"v5O_Yfeqza4":{"duration":31.789,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v5O_Yfeqza4","annotations":[{"segment":[9.696504672897195,20.530864485981308],"label":"Throwing darts"}]},"jOH5bDU0MQs":{"duration":24.196,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jOH5bDU0MQs","annotations":[]},"nm2sHNDN7Lo":{"duration":14.304,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nm2sHNDN7Lo","annotations":[{"segment":[0.01,14.303491],"label":"Beer pong"}]},"Y_slRMRs18I":{"duration":39.056,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y_slRMRs18I","annotations":[{"segment":[9.692905744047478,38.51267251315824],"label":"Fixing the roof"}]},"46NS8dT15Hw":{"duration":60.396,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=46NS8dT15Hw","annotations":[]},"BOVYcAeBxyY":{"duration":54.475,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=BOVYcAeBxyY","annotations":[{"segment":[12.832411798751949,48.86514426677067],"label":"Raking leaves"}]},"fk2CxZEzN7U":{"duration":99.94,"subset":"testing","resolution":"840x480","url":"https://www.youtube.com/watch?v=fk2CxZEzN7U","annotations":[]},"z5xZrF421HE":{"duration":219.2,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=z5xZrF421HE","annotations":[{"segment":[42.08189148570741,207.9888167193523],"label":"Rock climbing"}]},"EiXW33yuAcw":{"duration":52.5,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EiXW33yuAcw","annotations":[{"segment":[0.01,17.840494868941477],"label":"Drinking coffee"},{"segment":[32.93220436817473,50.99180031201248],"label":"Drinking coffee"}]},"85lhWBgqkvE":{"duration":231.43,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=85lhWBgqkvE","annotations":[]},"8ZcddBvJ0Co":{"duration":184.042,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=8ZcddBvJ0Co","annotations":[]},"NJjoTu1vS7A":{"duration":174.78,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NJjoTu1vS7A","annotations":[{"segment":[41.717347257155595,131.96860129797193],"label":"Playing lacrosse"},{"segment":[137.42184928549142,155.9628924430577],"label":"Playing lacrosse"}]},"9AvEM_n8iJE":{"duration":104.24000000000001,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=9AvEM_n8iJE","annotations":[]},"cEa87QoI3Do":{"duration":208.26,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cEa87QoI3Do","annotations":[]},"JvFo4uTvmYs":{"duration":14.1,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=JvFo4uTvmYs","annotations":[{"segment":[4.551581555382215,11.433924680187207],"label":"Pole vault"}]},"f2LGG2ocbu8":{"duration":211.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f2LGG2ocbu8","annotations":[{"segment":[5.940758381355662,22.772907128530036],"label":"Shot put"}]},"UL4YwgCFrDI":{"duration":77.93,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UL4YwgCFrDI","annotations":[{"segment":[0.01,77.51837597503899],"label":"Tai chi"}]},"kIIAJsYMjyc":{"duration":52.106,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=kIIAJsYMjyc","annotations":[{"segment":[7.799962558502341,31.362349453978158],"label":"Kneeling"}]},"g17h49EYsJY":{"duration":72.03,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=g17h49EYsJY","annotations":[{"segment":[21.45957795830251,69.81502412341155],"label":"Kayaking"}]},"NttjvRpSdsI":{"duration":61.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NttjvRpSdsI","annotations":[{"segment":[27.71059355,53.22458340561622],"label":"Doing kickboxing"},{"segment":[5.27536929,23.132214679999997],"label":"Doing kickboxing"}]},"o3Nuqg4w_b8":{"duration":120.86,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o3Nuqg4w_b8","annotations":[{"segment":[0.01,117.59917628705148],"label":"Doing step aerobics"}]},"390iVq_urQM":{"duration":127.524,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=390iVq_urQM","annotations":[]},"4A1cjG6k7pI":{"duration":76.51,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4A1cjG6k7pI","annotations":[]},"kzm9ckZ5gU0":{"duration":58.608,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kzm9ckZ5gU0","annotations":[{"segment":[13.064310452418097,19.10051326053042],"label":"Doing a powerbomb"}]},"KRIn0DaH3Hk":{"duration":152.625,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KRIn0DaH3Hk","annotations":[]},"ZxUbde4YHXE":{"duration":201.82999999999998,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=ZxUbde4YHXE","annotations":[]},"gIgim1Dp8HU":{"duration":62.973,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gIgim1Dp8HU","annotations":[{"segment":[1.8626817472698909,31.18533229329173],"label":"Sumo"},{"segment":[38.834564742589706,62.660999999999994],"label":"Sumo"}]},"6ciZ58xAV9I":{"duration":50.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6ciZ58xAV9I","annotations":[{"segment":[5.784775347476485,39.711700929797196],"label":"Using the pommel horse"}]},"fy7vTpeBN_U":{"duration":106.22999999999999,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=fy7vTpeBN_U","annotations":[]},"GjjRDUirW7g":{"duration":214.51,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=GjjRDUirW7g","annotations":[{"segment":[64.92078801000001,178.82680029407177],"label":"Playing harmonica"}]},"wnsy_i-IXpM":{"duration":146.82,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wnsy_i-IXpM","annotations":[{"segment":[21.530512768810073,74.89869867447759],"label":"Skiing"},{"segment":[82.91537896073666,121.39544330733229],"label":"Skiing"}]},"WA7arPPyLPg":{"duration":188.31,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=WA7arPPyLPg","annotations":[]},"rDADR0Lg4U8":{"duration":101.05,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=rDADR0Lg4U8","annotations":[{"segment":[13.469251170046803,98.06560062402497],"label":"Playing field hockey"}]},"bVOD0M4LCEY":{"duration":122.299,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bVOD0M4LCEY","annotations":[{"segment":[0.2,120.06265463660192],"label":"Camel ride"}]},"j4EFi_NoEbQ":{"duration":82.28999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j4EFi_NoEbQ","annotations":[{"segment":[1.5405588631708533,70.22380811606864],"label":"Skateboarding"}]},"72PUOTjZpQU":{"duration":74.77,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=72PUOTjZpQU","annotations":[{"segment":[0.01,50.73976673352649],"label":"Zumba"}]},"B_heSKsoI9o":{"duration":64.78,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=B_heSKsoI9o","annotations":[{"segment":[14.957784393135727,36.78806431825274],"label":"Archery"},{"segment":[39.31471708736349,47.9053365023401],"label":"Archery"}]},"XgJ6iiK-gQo":{"duration":57.563,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XgJ6iiK-gQo","annotations":[{"segment":[3.681829907209894,21.013370689929637],"label":"Trimming branches or hedges"},{"segment":[34.57328083599535,53.611034012480495],"label":"Trimming branches or hedges"}]},"hvRNXdJa5TE":{"duration":132.22,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hvRNXdJa5TE","annotations":[]},"tiP4Q4mGidE":{"duration":155.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tiP4Q4mGidE","annotations":[]},"3nSOFmmA7FU":{"duration":83.616,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3nSOFmmA7FU","annotations":[]},"lVGvv7S6oKo":{"duration":98.453,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=lVGvv7S6oKo","annotations":[]},"7-taNK5mW8E":{"duration":26.59,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=7-taNK5mW8E","annotations":[]},"yWEFVfX-JoI":{"duration":238.121,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yWEFVfX-JoI","annotations":[{"segment":[0.2,222.9000964230171],"label":"Canoeing"}]},"O_fdvOxYqiY":{"duration":165.025,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O_fdvOxYqiY","annotations":[{"segment":[36.999153156589735,161.87129327102804],"label":"Playing congas"}]},"OVpila9fAaQ":{"duration":230.064,"subset":"training","resolution":"632x360","url":"https://www.youtube.com/watch?v=OVpila9fAaQ","annotations":[{"segment":[6.101523015600624,160.43416399843994],"label":"Waterskiing"}]},"hsjURYHGd7A":{"duration":158.082,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hsjURYHGd7A","annotations":[]},"T47mErD2KeA":{"duration":21.061,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T47mErD2KeA","annotations":[{"segment":[0.01,20.281008461176793],"label":"Making an omelette"}]},"rcMv32d1QkI":{"duration":121.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rcMv32d1QkI","annotations":[{"segment":[15.18422818,97.75601014040562],"label":"Skateboarding"}]},"eS1USHkyb2U":{"duration":192.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eS1USHkyb2U","annotations":[{"segment":[5.1131354424244995,8.12086217326244],"label":"Brushing hair"}]},"3z98x3-Jxls":{"duration":9.29,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=3z98x3-Jxls","annotations":[]},"ngwH6Zy5vb8":{"duration":234.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ngwH6Zy5vb8","annotations":[{"segment":[24.971474830000002,67.05986935803963],"label":"Playing piano"}]},"6rfFmqz6s8M":{"duration":62.16,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6rfFmqz6s8M","annotations":[{"segment":[22.449245166146643,55.226112838533545],"label":"Ping-pong"}]},"s_XdqaQj0uI":{"duration":208.214,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s_XdqaQj0uI","annotations":[{"segment":[99.09893387996188,120.5374361857341],"label":"Carving jack-o-lanterns"}]},"xRfSkg8j7Dg":{"duration":223.423,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=xRfSkg8j7Dg","annotations":[{"segment":[90.25846957878315,160.65310608424338],"label":"Cutting the grass"},{"segment":[182.2593806552262,204.92946645865834],"label":"Cutting the grass"}]},"aBkLboBmHoM":{"duration":184.251,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aBkLboBmHoM","annotations":[]},"Au4PfoK7hyw":{"duration":85.64,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=Au4PfoK7hyw","annotations":[]},"vuntaZJBcfI":{"duration":124.645,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vuntaZJBcfI","annotations":[{"segment":[0.7778141341653667,49.78010458658347],"label":"Running a marathon"},{"segment":[51.72463992199688,113.75531712168488],"label":"Running a marathon"}]},"DqHWMWY-r1o":{"duration":68.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DqHWMWY-r1o","annotations":[{"segment":[0,2.1761558271271415],"label":"Hammer throw"}]},"MfSdXqOC-f0":{"duration":91.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MfSdXqOC-f0","annotations":[{"segment":[0.2,27.214083366055732],"label":"Archery"},{"segment":[43.542533216848675,81.2125537301092],"label":"Archery"}]},"zChDaAcfc2o":{"duration":75.605,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zChDaAcfc2o","annotations":[{"segment":[12.825496884735202,63.909070093457956],"label":"Canoeing"}]},"LNK_yYs6UOA":{"duration":14.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LNK_yYs6UOA","annotations":[{"segment":[0.5807124009360374,14.046666],"label":"Discus throw"}]},"MxKuqpxmKKk":{"duration":180.33,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=MxKuqpxmKKk","annotations":[{"segment":[9.542277691107644,21.049141965678626],"label":"Shot put"},{"segment":[23.575039001560064,34.239937597503896],"label":"Shot put"},{"segment":[39.57238689547582,54.447113884555385],"label":"Shot put"},{"segment":[59.21825273010921,72.40904836193448],"label":"Shot put"},{"segment":[79.9867394695788,91.77425897035882],"label":"Shot put"},{"segment":[94.01950078003121,115.91060842433698],"label":"Shot put"},{"segment":[115.91060842433698,147.06333853354133],"label":"Shot put"}]},"NT0dXbWzt7w":{"duration":50.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NT0dXbWzt7w","annotations":[{"segment":[0.01,50.54984126984127],"label":"Having an ice cream"}]},"wqlEUeJzNVc":{"duration":138.76,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wqlEUeJzNVc","annotations":[{"segment":[55.16634509466297,131.0200693937284],"label":"Dodgeball"}]},"n-BJ753InB0":{"duration":254.932,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=n-BJ753InB0","annotations":[{"segment":[13.124419433920215,233.05787076131045],"label":"Removing ice from car"}]},"4qnrM4k6qN0":{"duration":227.37,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=4qnrM4k6qN0","annotations":[{"segment":[18.09026457384826,218.14730721528863],"label":"Futsal"}]},"P6Oq6C9X0fw":{"duration":188.756,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P6Oq6C9X0fw","annotations":[]},"d14twk9O2Vw":{"duration":185.9,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=d14twk9O2Vw","annotations":[{"segment":[113.93995787831514,138.01846489859594],"label":"Getting a haircut"}]},"fdDRoBEXdZI":{"duration":230.226,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fdDRoBEXdZI","annotations":[]},"_EFv0igpT0s":{"duration":7.73,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=_EFv0igpT0s","annotations":[]},"ZLJJ5LxfE6o":{"duration":213.856,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZLJJ5LxfE6o","annotations":[{"segment":[30.360145728549146,88.41141338533541],"label":"Capoeira"},{"segment":[93.08220503588144,177.49008272074883],"label":"Capoeira"},{"segment":[179.49185057098285,192.8369695725429],"label":"Capoeira"}]},"mU7nezFZ-Ew":{"duration":105.91,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=mU7nezFZ-Ew","annotations":[]},"-Q03gEypilg":{"duration":202.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-Q03gEypilg","annotations":[{"segment":[7.12026129875195,196.67743987441497],"label":"Using the balance beam"}]},"c21Y_pi3-Gg":{"duration":168.11,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c21Y_pi3-Gg","annotations":[{"segment":[11.331285330302515,159.80444808731187],"label":"Making a sandwich"}]},"ILmZg0vz0Hg":{"duration":236.64,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=ILmZg0vz0Hg","annotations":[]},"biyf6Q-xF0M":{"duration":166.325,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=biyf6Q-xF0M","annotations":[{"segment":[56.565910549347144,60.458060357788455],"label":"Carving jack-o-lanterns"},{"segment":[75.76718293765764,82.25409928505984],"label":"Carving jack-o-lanterns"}]},"NzJkKwzPqvk":{"duration":68.081,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=NzJkKwzPqvk","annotations":[{"segment":[0.19999999999999998,67.901],"label":"Playing pool"}]},"pvFviIF1VGc":{"duration":61.26,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=pvFviIF1VGc","annotations":[{"segment":[15.308010753510139,56.224749776911075],"label":"Mixing drinks"}]},"xfhwYTFCGYY":{"duration":104.53999999999999,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xfhwYTFCGYY","annotations":[{"segment":[62.95005776829712,85.4555188357194],"label":"Ironing clothes"}]},"IOQjNOM59Yw":{"duration":47.067,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=IOQjNOM59Yw","annotations":[]},"aINlF3UDJ7s":{"duration":79.88,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=aINlF3UDJ7s","annotations":[{"segment":[25.939409429999998,70.17941497659906],"label":"Mixing drinks"}]},"6ZUHLsyJ6yo":{"duration":112.918,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=6ZUHLsyJ6yo","annotations":[{"segment":[0.01,0.02],"label":"Drum corps"}]},"Aygp8JaMkqQ":{"duration":174.847,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Aygp8JaMkqQ","annotations":[{"segment":[55.91807003120125,155.47951179407178],"label":"Surfing"}]},"u8ykXBc2Efs":{"duration":104.82,"subset":"validation","resolution":"2276x720","url":"https://www.youtube.com/watch?v=u8ykXBc2Efs","annotations":[{"segment":[0,104.81487528344671],"label":"Washing dishes"}]},"KIIcgQyr0-Y":{"duration":219.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KIIcgQyr0-Y","annotations":[{"segment":[9.758248829953198,81.03536986],"label":"Playing guitarra"},{"segment":[99.00638348,205.6080148205928],"label":"Playing guitarra"}]},"9AzESzqOtb0":{"duration":68.03999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9AzESzqOtb0","annotations":[{"segment":[8.64561154446178,60.731443057722316],"label":"Pole vault"}]},"2mI7NL54yP8":{"duration":170.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2mI7NL54yP8","annotations":[{"segment":[45.97565775,148.35461825],"label":"Doing motocross"}]},"ZBuPedt-oEE":{"duration":153.468,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=ZBuPedt-oEE","annotations":[{"segment":[17.000235569422777,126.42428705148205],"label":"Making a cake"}]},"kYkvyRKxwfY":{"duration":23.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kYkvyRKxwfY","annotations":[]},"AMU1mHpR6Os":{"duration":121.789,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AMU1mHpR6Os","annotations":[{"segment":[0.01,121.78866213151927],"label":"Grooming dog"}]},"a_v_1fL2VyM":{"duration":20.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=a_v_1fL2VyM","annotations":[{"segment":[0.01,18.89498749453978],"label":"Pole vault"}]},"_NwkwvaC7Bg":{"duration":62.02,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_NwkwvaC7Bg","annotations":[{"segment":[18.383611208393916,61.53671777223089],"label":"Chopping wood"}]},"qgV5UlcDevM":{"duration":200.528,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qgV5UlcDevM","annotations":[]},"9wMEbyzxNHI":{"duration":162.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9wMEbyzxNHI","annotations":[{"segment":[12.00189888,121.6809648],"label":"Putting on makeup"}]},"JnP5FDtEPVE":{"duration":59.838,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JnP5FDtEPVE","annotations":[{"segment":[44.08108034321373,49.398797971918874],"label":"Grooming dog"}]},"Ccy8WD2k4L8":{"duration":103.538,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Ccy8WD2k4L8","annotations":[{"segment":[34.70891575663026,74.7596926677067],"label":"Playing drums"}]},"3Z4b34lBnyU":{"duration":20.318,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=3Z4b34lBnyU","annotations":[{"segment":[4.691082881410495,9.572344177847112],"label":"Doing a powerbomb"}]},"Zhf052cWrv8":{"duration":98.639,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Zhf052cWrv8","annotations":[{"segment":[39.731479799021194,90.0477960153929],"label":"Using the rowing machine"}]},"8kO6A3W_kQ8":{"duration":80.34,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8kO6A3W_kQ8","annotations":[{"segment":[0,20.30460057098284],"label":"Brushing hair"},{"segment":[38.10246033073323,80.141043],"label":"Brushing hair"}]},"kt_sGN-1prU":{"duration":197.25,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kt_sGN-1prU","annotations":[{"segment":[7.076572542901716,189.22139625585024],"label":"Using uneven bars"}]},"Zomv5zlkkEc":{"duration":182.67,"subset":"validation","resolution":"640x352","url":"https://www.youtube.com/watch?v=Zomv5zlkkEc","annotations":[{"segment":[0.01,182.671383],"label":"Belly dance"}]},"RnShLAifVno":{"duration":102.84,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=RnShLAifVno","annotations":[{"segment":[51.741363673946964,68.10607404524181],"label":"Washing dishes"}]},"tFiXLhbKdnk":{"duration":66.223,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=tFiXLhbKdnk","annotations":[{"segment":[4.0291886864274575,64.15708139157566],"label":"Hand car wash"}]},"ujltXvkQK_g":{"duration":162.26,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ujltXvkQK_g","annotations":[{"segment":[33.4906704,101.80579662],"label":"Playing lacrosse"},{"segment":[116.2924587,156.01329141107644],"label":"Playing lacrosse"}]},"I5N4rwhRa0o":{"duration":182.3,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=I5N4rwhRa0o","annotations":[{"segment":[15.926355007800312,23.03633492199688],"label":"Playing saxophone"},{"segment":[52.32945216848674,54.03584734789392],"label":"Playing saxophone"},{"segment":[109.20929148205929,120.86965854134165],"label":"Playing saxophone"},{"segment":[147.034384625585,149.87837659126365],"label":"Playing saxophone"}]},"-wXbBZDSIa8":{"duration":25.264,"subset":"validation","resolution":"852x480","url":"https://www.youtube.com/watch?v=-wXbBZDSIa8","annotations":[{"segment":[0.01,24.451013144056372],"label":"Doing a powerbomb"}]},"tCRsEiWgPdk":{"duration":10.821,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=tCRsEiWgPdk","annotations":[]},"x8AR0FD5Jqo":{"duration":196.256,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=x8AR0FD5Jqo","annotations":[{"segment":[16.22698341947283,43.16989928576735],"label":"BMX"},{"segment":[51.742645243224686,102.56678199100755],"label":"BMX"},{"segment":[106.24081597277495,132.57139284210822],"label":"BMX"},{"segment":[137.16393531931755,195.02996901560064],"label":"BMX"}]},"xmGucghu2FI":{"duration":195.79,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xmGucghu2FI","annotations":[{"segment":[9.468814953958702,187.23817957344144],"label":"Javelin throw"}]},"bQGegLwVc8I":{"duration":57.38,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bQGegLwVc8I","annotations":[{"segment":[12.710552031201248,47.97785837129485],"label":"Springboard diving"}]},"Jq8b1u0vTRI":{"duration":205.73,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jq8b1u0vTRI","annotations":[]},"x_lN_QxbEjQ":{"duration":203.709,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=x_lN_QxbEjQ","annotations":[{"segment":[12.706458658346335,31.13082371294852],"label":"Surfing"},{"segment":[37.48405304212169,65.75592355694228],"label":"Surfing"},{"segment":[69.88552262090484,100.38102340093604],"label":"Surfing"},{"segment":[103.87529953198128,126.42926365054602],"label":"Surfing"},{"segment":[130.24120124804992,149.30088923556943],"label":"Surfing"},{"segment":[152.15984243369735,156.62248517940716],"label":"Surfing"},{"segment":[160.43479719188767,175.04865990639624],"label":"Surfing"},{"segment":[178.86097191887674,196.016375975039],"label":"Surfing"}]},"HxculAXODvo":{"duration":160.381,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HxculAXODvo","annotations":[]},"9V7cMp_w1_0":{"duration":176.611,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=9V7cMp_w1_0","annotations":[{"segment":[4.1328621308117635,176.61097505668934],"label":"Removing ice from car"}]},"iGOGz6M_zcE":{"duration":41.31,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iGOGz6M_zcE","annotations":[{"segment":[4.188828970358815,29.901794496099846],"label":"Washing face"}]},"uTc2-oAxgbc":{"duration":176.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uTc2-oAxgbc","annotations":[{"segment":[42.73993600624025,131.80444781279252],"label":"Getting a piercing"}]},"Xhrk1wP8tCs":{"duration":67.594,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=Xhrk1wP8tCs","annotations":[{"segment":[15.177510140405618,67.541],"label":"Elliptical trainer"}]},"SNja9iUdAiI":{"duration":237.494,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SNja9iUdAiI","annotations":[]},"MAUZbZnbM_Y":{"duration":59.049,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=MAUZbZnbM_Y","annotations":[{"segment":[39.37240603940732,44.88838734781854],"label":"Hand car wash"}]},"oB8Nl31w3xc":{"duration":75.61,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=oB8Nl31w3xc","annotations":[]},"bULG65Ec5fo":{"duration":170.44,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bULG65Ec5fo","annotations":[{"segment":[6.913098510334971,162.45781487831516],"label":"Rock climbing"}]},"ZWzPz-LX9Qg":{"duration":31.26,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=ZWzPz-LX9Qg","annotations":[{"segment":[0.01,1.9503312921632512],"label":"Washing hands"},{"segment":[5.070861359624454,9.55662333159993],"label":"Washing hands"},{"segment":[16.1389914426509,17.89428960559783],"label":"Washing hands"},{"segment":[20.380962003105974,21.161094519971275],"label":"Washing hands"},{"segment":[27.694704348718165,29.64503564088142],"label":"Washing hands"}]},"Yp9DBq34bFQ":{"duration":81.571,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Yp9DBq34bFQ","annotations":[{"segment":[13.166136505460218,26.0142503900156],"label":"Applying sunscreen"}]},"nHE7u40plD0":{"duration":145.519,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nHE7u40plD0","annotations":[{"segment":[0.01,140.33086563437155],"label":"Playing drums"}]},"ARkMGkD2RDs":{"duration":41.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ARkMGkD2RDs","annotations":[{"segment":[8.331870514820594,27.062432137285494],"label":"Snatch"}]},"PAhJqXTzCOU":{"duration":236.22,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PAhJqXTzCOU","annotations":[{"segment":[130.82198520593886,172.83242432605303],"label":"Drinking coffee"}]},"MpqXCbsqVNQ":{"duration":174.197,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MpqXCbsqVNQ","annotations":[{"segment":[17.935947870567883,21.197029301580226],"label":"Hurling"},{"segment":[27.447435377687214,38.04595002847733],"label":"Hurling"},{"segment":[43.20932896091353,54.62311396945673],"label":"Hurling"},{"segment":[64.13460119188767,70.38500724024962],"label":"Hurling"},{"segment":[83.97284647581904,95.11487464898596],"label":"Hurling"}]},"sMG6vA7yo5I":{"duration":71.26,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sMG6vA7yo5I","annotations":[]},"cHSGL-OcMak":{"duration":54.242,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=cHSGL-OcMak","annotations":[{"segment":[0.01,28.686388318408735],"label":"Powerbocking"},{"segment":[36.80996731123245,54.2418139],"label":"Powerbocking"}]},"cXVWUEcSx20":{"duration":114.962,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cXVWUEcSx20","annotations":[{"segment":[10.760875332972502,54.88046365366615],"label":"Clipping cat claws"}]},"MNpV9WvVPSA":{"duration":77.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MNpV9WvVPSA","annotations":[{"segment":[30.92008736349454,71.03792199687987],"label":"Playing violin"}]},"W84TQm1l90U":{"duration":7.733,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=W84TQm1l90U","annotations":[{"segment":[0.01,7.026465074763967],"label":"Doing a powerbomb"}]},"II3AiNrQ1uA":{"duration":40.08,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=II3AiNrQ1uA","annotations":[{"segment":[11.196580769890796,34.71251921138846],"label":"Using uneven bars"}]},"8Uyi1KXdS50":{"duration":109.552,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8Uyi1KXdS50","annotations":[{"segment":[0.01,109.55174603174603],"label":"Knitting"}]},"5GuyzVay_eM":{"duration":100.682,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5GuyzVay_eM","annotations":[]},"gzdasX0KIVg":{"duration":234.568,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=gzdasX0KIVg","annotations":[{"segment":[0.01,234.541],"label":"Grooming dog"}]},"n1w2_7WQpXs":{"duration":115.938,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=n1w2_7WQpXs","annotations":[]},"CzXF0CQn2Fk":{"duration":219.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CzXF0CQn2Fk","annotations":[{"segment":[4.455827593647964,219.0210640262345],"label":"Zumba"}]},"eVncG9w7jwg":{"duration":125.086,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eVncG9w7jwg","annotations":[]},"xPyj6pmYyEk":{"duration":207.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xPyj6pmYyEk","annotations":[{"segment":[33.53323674804992,187.42973390093601],"label":"Preparing pasta"}]},"YH3571KWDpM":{"duration":202.76,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=YH3571KWDpM","annotations":[{"segment":[8.85675648673947,201.17489734165366],"label":"Zumba"}]},"g9sYWTE6LTg":{"duration":216.9,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=g9sYWTE6LTg","annotations":[{"segment":[67.94949215999999,160.23482772],"label":"Tango"},{"segment":[26.034146909999997,55.916046599999994],"label":"Tango"}]},"eUKMPNZ3NI4":{"duration":123.066,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=eUKMPNZ3NI4","annotations":[{"segment":[0.01,120.62617692795276],"label":"Beer pong"}]},"KU8VVtam3ig":{"duration":158.941,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KU8VVtam3ig","annotations":[{"segment":[29.258849728138784,63.228869508580345],"label":"Sharpening knives"},{"segment":[126.45773901716069,147.53402885335413],"label":"Sharpening knives"}]},"UtErhcCYBZk":{"duration":148.86,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=UtErhcCYBZk","annotations":[{"segment":[13.70191049274624,143.0572337722309],"label":"Snatch"}]},"dVcnkTR5EBE":{"duration":117.749,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dVcnkTR5EBE","annotations":[{"segment":[75.63775047238285,95.4458906779903],"label":"Removing ice from car"}]},"ZFi592_m_NQ":{"duration":136.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZFi592_m_NQ","annotations":[{"segment":[16.791465340000002,89.23031874],"label":"Tai chi"}]},"opGZh9nUlWA":{"duration":170.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=opGZh9nUlWA","annotations":[{"segment":[14.615892826189238,166.0896899375975],"label":"Ballet"}]},"oncTwYfvCD8":{"duration":155.06,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=oncTwYfvCD8","annotations":[]},"rcDw6If4hjc":{"duration":48.971,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rcDw6If4hjc","annotations":[{"segment":[0.01,48.921],"label":"Playing ten pins"}]},"dnzcNZBtUG4":{"duration":188.57,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=dnzcNZBtUG4","annotations":[{"segment":[5.883596661466459,166.21160568642745],"label":"Tango"}]},"MGggqAefik4":{"duration":32.021,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MGggqAefik4","annotations":[]},"nDM4E9CD_y0":{"duration":161.68,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nDM4E9CD_y0","annotations":[]},"HDhG0WkwGaU":{"duration":221.124,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=HDhG0WkwGaU","annotations":[{"segment":[8.94123010417879,13.7557414473371],"label":"Bullfighting"},{"segment":[22.69697679891682,26.823700807338227],"label":"Bullfighting"},{"segment":[29.57485014628583,31.982105656176078],"label":"Bullfighting"},{"segment":[40.23555363129272,42.29891562507188],"label":"Bullfighting"},{"segment":[49.17678893766908,59.493598906564884],"label":"Bullfighting"},{"segment":[64.30811022538292,70.49819620672041],"label":"Bullfighting"},{"segment":[78.06385685057732,81.15889984124607],"label":"Bullfighting"},{"segment":[85.28562382880439,90.10013514762242],"label":"Bullfighting"},{"segment":[101.44862611340781,103.85588177281683],"label":"Bullfighting"},{"segment":[107.63871209474529,132.74294968572505],"label":"Bullfighting"},{"segment":[142.715865988991,200.4900018148075],"label":"Bullfighting"},{"segment":[207.3678751274047,210.11902445244357],"label":"Bullfighting"}]},"7vh_NMNnHbQ":{"duration":103.515,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=7vh_NMNnHbQ","annotations":[]},"AG6Rpvxxetg":{"duration":9.27,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=AG6Rpvxxetg","annotations":[{"segment":[0,7.804202152886115],"label":"Using parallel bars"}]},"1RMNLqb8OWI":{"duration":163.561,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=1RMNLqb8OWI","annotations":[{"segment":[3.444740042662931,149.65481740902288],"label":"Blowing leaves"}]},"6okx-34bDEg":{"duration":49.784,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=6okx-34bDEg","annotations":[{"segment":[0.01,34.64528229426044],"label":"Running a marathon"}]},"UIZkQUo8VtA":{"duration":59.653,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=UIZkQUo8VtA","annotations":[{"segment":[15.820359656786271,23.45135666770671],"label":"Roof shingle removal"}]},"lngs7qR5AXA":{"duration":6.57,"subset":"training","resolution":"360x360","url":"https://www.youtube.com/watch?v=lngs7qR5AXA","annotations":[{"segment":[0.01,0.02],"label":"Drinking coffee"}]},"arnKDX_ToxE":{"duration":133.051,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=arnKDX_ToxE","annotations":[{"segment":[13.076710496991307,76.59216082839313],"label":"Elliptical trainer"},{"segment":[108.14231921840874,130.76710385335414],"label":"Elliptical trainer"}]},"OtokKrYWhlE":{"duration":78.69,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=OtokKrYWhlE","annotations":[]},"hK3E6njydQA":{"duration":85.66,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hK3E6njydQA","annotations":[{"segment":[2.6382664,80.04586890171608],"label":"Playing lacrosse"}]},"P2xT7dlyX8Y":{"duration":106.78999999999999,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=P2xT7dlyX8Y","annotations":[]},"ml4aMGCJgP0":{"duration":151.766,"subset":"validation","resolution":"638x360","url":"https://www.youtube.com/watch?v=ml4aMGCJgP0","annotations":[{"segment":[28.44984522036971,40.26456484653794],"label":"Playing ten pins"},{"segment":[51.842990080182794,61.29476578111738],"label":"Playing ten pins"},{"segment":[85.39679381850054,91.78884210515245],"label":"Playing ten pins"},{"segment":[104.31409599923344,113.29446671574435],"label":"Playing ten pins"}]},"MINjkEBIIkA":{"duration":65.597,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MINjkEBIIkA","annotations":[{"segment":[1.0227925117004681,26.081209048361938],"label":"Throwing darts"},{"segment":[27.513118564742594,36.41141341653667],"label":"Throwing darts"},{"segment":[38.6615569422777,50.0145538221529],"label":"Throwing darts"},{"segment":[53.594327613104525,62.08350546021841],"label":"Throwing darts"}]},"XzbvKQ-o5Ow":{"duration":44.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XzbvKQ-o5Ow","annotations":[{"segment":[5.537976536661466,41.604048731669266],"label":"Using uneven bars"}]},"oNYC_3fcGg0":{"duration":148.283,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=oNYC_3fcGg0","annotations":[{"segment":[9.484504512153274,139.9542724648986],"label":"Surfing"}]},"6SHSstpZN1I":{"duration":108.577,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6SHSstpZN1I","annotations":[{"segment":[21.670340093603745,91.59104680187208],"label":"BMX"}]},"uwLM5n-rYmA":{"duration":177.45,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uwLM5n-rYmA","annotations":[{"segment":[122.6031872074883,152.25552355694228],"label":"Snatch"}]},"xh8qQfCYDBY":{"duration":42.168,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xh8qQfCYDBY","annotations":[{"segment":[15.214377744099192,39.347528703850685],"label":"Welding"}]},"swOYvotIM1Y":{"duration":69.126,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=swOYvotIM1Y","annotations":[{"segment":[9.597810677052932,24.58764982436032],"label":"Arm wrestling"},{"segment":[36.98931530594557,57.802545201127764],"label":"Arm wrestling"}]},"7LkHBg_eAfU":{"duration":200.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7LkHBg_eAfU","annotations":[{"segment":[22.273401520000004,54.007839313572546],"label":"Tango"},{"segment":[56.66745320000001,190.51460998439939],"label":"Tango"}]},"f4983HmCNiM":{"duration":119.723,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f4983HmCNiM","annotations":[{"segment":[75.7661872074883,118.52111544461779],"label":"BMX"}]},"-faeAVsbBG0":{"duration":85.78,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-faeAVsbBG0","annotations":[{"segment":[0.303387943053914,84.66948140099782],"label":"Doing step aerobics"}]},"2_KTq85YQcY":{"duration":148.38,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2_KTq85YQcY","annotations":[{"segment":[31.48060265811993,126.61683569111473],"label":"Ironing clothes"}]},"iQnnCZTl_7U":{"duration":75.163,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=iQnnCZTl_7U","annotations":[]},"z_ojmuWzMWo":{"duration":140.667,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=z_ojmuWzMWo","annotations":[{"segment":[4.906402826433984,132.3852532937237],"label":"Grooming dog"}]},"wFVukrQfQhI":{"duration":137.67000000000002,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=wFVukrQfQhI","annotations":[]},"bw96D55q2FI":{"duration":130.311,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bw96D55q2FI","annotations":[{"segment":[0.01,117.29967483307333],"label":"Longboarding"}]},"9SPxXA1FevM":{"duration":57.586,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=9SPxXA1FevM","annotations":[]},"yGwevg8vwuU":{"duration":179.909,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yGwevg8vwuU","annotations":[{"segment":[0.01,162.50621558190326],"label":"Capoeira"}]},"-KjJxkFUhv4":{"duration":235.1,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-KjJxkFUhv4","annotations":[{"segment":[88.02560624873975,91.69333984243724],"label":"Sailing"}]},"eeD2_hKG7zE":{"duration":74.328,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eeD2_hKG7zE","annotations":[{"segment":[6.667407342552206,69.86283439885418],"label":"Using the monkey bar"}]},"0Um-HgjcnXY":{"duration":91.673,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0Um-HgjcnXY","annotations":[]},"QwnEXRwaDJc":{"duration":187.06,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=QwnEXRwaDJc","annotations":[]},"nFfMY4CQq50":{"duration":234.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nFfMY4CQq50","annotations":[{"segment":[116.70972269094844,124.01549562934898],"label":"Shaving"}]},"qoS5nkk7Rgk":{"duration":67.756,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qoS5nkk7Rgk","annotations":[{"segment":[0.01,67.755328],"label":"Grooming dog"}]},"0QDPpdms08g":{"duration":119.65,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0QDPpdms08g","annotations":[{"segment":[0,120],"label":"Doing kickboxing"},{"segment":[54.81941826,77.03251775999999],"label":"Doing kickboxing"}]},"mVAP-WyEc7w":{"duration":149.444,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mVAP-WyEc7w","annotations":[]},"a7QrIgqkyao":{"duration":180.512,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a7QrIgqkyao","annotations":[{"segment":[76.07889514962284,139.8054998345359],"label":"Carving jack-o-lanterns"}]},"n1JkJu0VjQk":{"duration":33.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=n1JkJu0VjQk","annotations":[{"segment":[0.01,33.041],"label":"Doing karate"}]},"pmmQuksmTvw":{"duration":143.41,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=pmmQuksmTvw","annotations":[]},"q3DzEgYmIV0":{"duration":34.18,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=q3DzEgYmIV0","annotations":[{"segment":[0.01,34.121],"label":"Roof shingle removal"}]},"I9NukwdINyY":{"duration":202.99,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=I9NukwdINyY","annotations":[{"segment":[101.33609485179407,140.92050690327613],"label":"Painting"},{"segment":[178.92154247269892,186.52174958658347],"label":"Painting"}]},"TrhOt1WyU7Y":{"duration":62.741,"subset":"training","resolution":"350x240","url":"https://www.youtube.com/watch?v=TrhOt1WyU7Y","annotations":[{"segment":[0.2,62.520999999999994],"label":"Kite flying"}]},"nUoN18FTeug":{"duration":197.02,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nUoN18FTeug","annotations":[{"segment":[12.904090483619346,186.2064992199688],"label":"Playing guitarra"}]},"aJsAuChVmlY":{"duration":21.015,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aJsAuChVmlY","annotations":[{"segment":[0.01,21.014058],"label":"Blow-drying hair"}]},"Szze8i9UsRo":{"duration":225.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Szze8i9UsRo","annotations":[{"segment":[72.36902770260471,93.44738479563182],"label":"Doing nails"},{"segment":[134.90148782527302,152.11547976131044],"label":"Doing nails"},{"segment":[176.3555908549142,181.97648618096724],"label":"Doing nails"}]},"U9Dcet1qdRE":{"duration":29.33,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U9Dcet1qdRE","annotations":[{"segment":[6.176471349453978,20.47863906],"label":"Clean and jerk"}]},"VqTyewe2R1A":{"duration":166.441,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VqTyewe2R1A","annotations":[{"segment":[2.4369756525975523,164.72915766194336],"label":"Drum corps"}]},"N2WxAkVh-C4":{"duration":177.587,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N2WxAkVh-C4","annotations":[{"segment":[90.87094838351356,151.2668826084243],"label":"Blow-drying hair"}]},"iaqFvTpoceI":{"duration":194.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iaqFvTpoceI","annotations":[{"segment":[29.49118965054602,186.06812439313572],"label":"Cheerleading"}]},"VzR7cskYnng":{"duration":127.8,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=VzR7cskYnng","annotations":[{"segment":[0.01,127.802629],"label":"Doing kickboxing"}]},"M0UsJ31h64U":{"duration":224.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=M0UsJ31h64U","annotations":[{"segment":[0.01,0.02],"label":"Playing water polo"}]},"gXUm7nobsfU":{"duration":153.97,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=gXUm7nobsfU","annotations":[{"segment":[18.495801451105663,125.62732514820593],"label":"Tango"}]},"IBte7CKOLSs":{"duration":161.077,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IBte7CKOLSs","annotations":[{"segment":[24.375120223856573,140.7223423400936],"label":"Playing ten pins"},{"segment":[145.24556048673946,151.5278079126365],"label":"Playing ten pins"}]},"Iphl3YB-yYQ":{"duration":96.944,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Iphl3YB-yYQ","annotations":[{"segment":[0.01,96.94285714285714],"label":"Roof shingle removal"}]},"ktYrQ5S-JE0":{"duration":183.879,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ktYrQ5S-JE0","annotations":[]},"QN2wHsiBEC0":{"duration":163.42000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QN2wHsiBEC0","annotations":[{"segment":[126.67297191887675,134.50284288],"label":"Playing saxophone"},{"segment":[111.12558502340093,120.30109204368175],"label":"Playing saxophone"},{"segment":[8.66575663026521,13.763260530421217],"label":"Playing saxophone"}]},"9d8ttpHu3cc":{"duration":23.36,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9d8ttpHu3cc","annotations":[{"segment":[0,21.356360374414976],"label":"Kayaking"}]},"y6VBDPLK9Vo":{"duration":160.22,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=y6VBDPLK9Vo","annotations":[]},"wVahCLOq4s0":{"duration":165.466,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=wVahCLOq4s0","annotations":[{"segment":[63.99289859594384,88.5063073322933],"label":"Carving jack-o-lanterns"},{"segment":[91.34470202808112,100.90422932917316],"label":"Carving jack-o-lanterns"},{"segment":[110.45271138845554,131.61421216848674],"label":"Carving jack-o-lanterns"}]},"291szrilAVE":{"duration":186.178,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=291szrilAVE","annotations":[{"segment":[76.3850137182001,184.26362016679823],"label":"Powerbocking"}]},"qOi_E_uYCIo":{"duration":85.36,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=qOi_E_uYCIo","annotations":[{"segment":[16.24570905013071,77.36686005148206],"label":"Mixing drinks"}]},"mShp2gzuPOM":{"duration":211.79,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=mShp2gzuPOM","annotations":[]},"_gnMuU1UJnM":{"duration":214.181,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=_gnMuU1UJnM","annotations":[{"segment":[54.50255409151594,63.84203695755956],"label":"Curling"},{"segment":[106.20326281425737,141.2263235619209],"label":"Curling"},{"segment":[153.56778306347857,172.91385471456888],"label":"Curling"}]},"MVA568rRjzQ":{"duration":126.13,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MVA568rRjzQ","annotations":[]},"7FtSO6hPcxU":{"duration":222.52,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7FtSO6hPcxU","annotations":[{"segment":[191.27421220592825,222.516824],"label":"Getting a haircut"}]},"-EswFKNXjMo":{"duration":49.737,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=-EswFKNXjMo","annotations":[]},"qEpr1LBc-RE":{"duration":144.38,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=qEpr1LBc-RE","annotations":[]},"gqK_jApRT5E":{"duration":176.867,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=gqK_jApRT5E","annotations":[{"segment":[27.59226124147007,72.01580184023688],"label":"Painting fence"},{"segment":[80.01755760026319,91.88222993409532],"label":"Painting fence"},{"segment":[100.71175264430576,110.36904399375975],"label":"Painting fence"},{"segment":[120.57818056318251,121.68187100312012],"label":"Painting fence"}]},"PXvqhB91EkA":{"duration":52.73,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=PXvqhB91EkA","annotations":[{"segment":[2.00123352,40.89046411856474],"label":"Spinning"}]},"0V8mzi_89Fw":{"duration":119.56,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0V8mzi_89Fw","annotations":[{"segment":[11.372170046801871,102.36665990639625],"label":"Roof shingle removal"}]},"tXUssW4vT3o":{"duration":223.47,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tXUssW4vT3o","annotations":[]},"AX2KQGWXg1E":{"duration":186.2,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=AX2KQGWXg1E","annotations":[{"segment":[14.07786271450858,157.17861154446177],"label":"Doing kickboxing"}]},"fO8b3U8fuGo":{"duration":18.07,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=fO8b3U8fuGo","annotations":[{"segment":[0.733232449297972,17.569377535101406],"label":"Horseback riding"}]},"H91Dm6jaUPg":{"duration":145.92000000000002,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H91Dm6jaUPg","annotations":[{"segment":[37.54157557722309,137.65244378315134],"label":"Painting"}]},"Rokj1EIAHHk":{"duration":39.57,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Rokj1EIAHHk","annotations":[{"segment":[7.160188131045242,17.283212730109206],"label":"Tumbling"},{"segment":[23.702691744149767,35.43058609672387],"label":"Tumbling"}]},"_jIXs7AFTxw":{"duration":285.815,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=_jIXs7AFTxw","annotations":[]},"KfkNKhAWOII":{"duration":127.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KfkNKhAWOII","annotations":[{"segment":[56.41409044116867,68.77336029641187],"label":"Peeling potatoes"}]},"teemFjum3oU":{"duration":72.447,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=teemFjum3oU","annotations":[{"segment":[0.01,72.44625850340137],"label":"Fun sliding down"}]},"7ZX3NHDmMZY":{"duration":218.97,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7ZX3NHDmMZY","annotations":[{"segment":[0.01,218.96417233560092],"label":"Playing water polo"}]},"jfnFrt9nfSo":{"duration":170.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jfnFrt9nfSo","annotations":[{"segment":[14.903948691280984,18.363793923185497],"label":"Layup drill in basketball"},{"segment":[27.678761855236115,29.009471559814774],"label":"Layup drill in basketball"},{"segment":[43.11499442834856,44.179562192011495],"label":"Layup drill in basketball"},{"segment":[56.15594953321943,58.285085060545285],"label":"Layup drill in basketball"},{"segment":[70.26147240175321,72.39060792907907],"label":"Layup drill in basketball"},{"segment":[86.7622727385286,87.82684050219152],"label":"Layup drill in basketball"},{"segment":[101.13393754797812,102.73078919347249],"label":"Layup drill in basketball"},{"segment":[115.77174429834336,117.36859594383776],"label":"Layup drill in basketball"},{"segment":[126.1512799940569,128.01427358046703],"label":"Layup drill in basketball"},{"segment":[151.1686223276131,152.76547397191888],"label":"Layup drill in basketball"},{"segment":[149.03948680187207,152.2331900904836],"label":"Layup drill in basketball"}]},"gk6NAPqfJoY":{"duration":194.955,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gk6NAPqfJoY","annotations":[{"segment":[0.01,194.901],"label":"Skiing"}]},"bMFTN7aL6aQ":{"duration":51.735,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=bMFTN7aL6aQ","annotations":[]},"BK8yiRJkfcU":{"duration":46.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BK8yiRJkfcU","annotations":[{"segment":[24.78154187936225,30.63833168836958],"label":"Long jump"}]},"FMUaey1tigI":{"duration":107.021,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FMUaey1tigI","annotations":[{"segment":[9.73522303464805,99.58599492803677],"label":"Playing congas"}]},"jF33TElZc_Y":{"duration":188.825,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jF33TElZc_Y","annotations":[{"segment":[11.19397426781425,18.85300929316084],"label":"Snowboarding"},{"segment":[24.155418156862325,76.59035025346591],"label":"Snowboarding"}]},"ZEChBNpLCyU":{"duration":140.156,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=ZEChBNpLCyU","annotations":[{"segment":[118.94644443057722,121.35161150546021],"label":"Spread mulch"}]},"nqB4Zn6UWdk":{"duration":69.405,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=nqB4Zn6UWdk","annotations":[{"segment":[0.2,25.00313728549142],"label":"Running a marathon"}]},"WxlJBRUU1A0":{"duration":226.465,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WxlJBRUU1A0","annotations":[{"segment":[28.211962616822433,199.26458722741432],"label":"Knitting"}]},"WZUxscN9rW8":{"duration":127.29,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=WZUxscN9rW8","annotations":[{"segment":[2.09139113,93.18341994851794],"label":"Plataform diving"}]},"hx0WYKDa7PM":{"duration":100.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hx0WYKDa7PM","annotations":[{"segment":[39.77306942277691,87.1256825273011],"label":"Plataform diving"}]},"WFbUBMgOMn8":{"duration":206.66,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WFbUBMgOMn8","annotations":[{"segment":[28.682853354134163,190.1447581903276],"label":"Preparing pasta"}]},"lMYtmGRAn8k":{"duration":201.174,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lMYtmGRAn8k","annotations":[{"segment":[13.495247009880394,188.6196151846074],"label":"Rope skipping"}]},"CjoAnld43C4":{"duration":217.386,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=CjoAnld43C4","annotations":[{"segment":[15.934979719188766,205.46141653666146],"label":"Doing fencing"}]},"TDZsE3yValQ":{"duration":19.58,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=TDZsE3yValQ","annotations":[{"segment":[0.01,18.367644224648988],"label":"Triple jump"}]},"SfFjpnTKG7s":{"duration":207.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SfFjpnTKG7s","annotations":[{"segment":[0.6479852554646404,202.1713983650546],"label":"Pole vault"}]},"M2pxaQiBbXA":{"duration":79.51,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=M2pxaQiBbXA","annotations":[{"segment":[0,79.458333],"label":"Playing water polo"}]},"pbBJXBPrnmo":{"duration":155.853,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pbBJXBPrnmo","annotations":[]},"eQb6kWNJRd0":{"duration":21.897,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=eQb6kWNJRd0","annotations":[]},"2CegApogJd4":{"duration":124.81,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=2CegApogJd4","annotations":[]},"wPLEmDBfgok":{"duration":184.76,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=wPLEmDBfgok","annotations":[{"segment":[0,183.89601083907303],"label":"Cumbia"}]},"UMS4-qifK9A":{"duration":53.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UMS4-qifK9A","annotations":[{"segment":[5.760946177847114,52.016718408736345],"label":"Playing guitarra"}]},"8H9WqAFtn60":{"duration":85.752,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8H9WqAFtn60","annotations":[]},"sP416nSD4xQ":{"duration":68.15,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sP416nSD4xQ","annotations":[{"segment":[0.01,65.95669763634946],"label":"Playing piano"}]},"srSwfib_ogk":{"duration":134.839,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=srSwfib_ogk","annotations":[{"segment":[3.57605474014879,134.838299],"label":"Baton twirling"}]},"M1SUIrWykRs":{"duration":58.306,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M1SUIrWykRs","annotations":[{"segment":[2.4548939157566303,51.55277223088923],"label":"Surfing"}]},"LvRP3c5n3P8":{"duration":230.76,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=LvRP3c5n3P8","annotations":[{"segment":[142.12810691887677,186.74553795163808],"label":"Washing hands"}]},"hghdjiQlYko":{"duration":141.967,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=hghdjiQlYko","annotations":[{"segment":[15.281917736349454,109.40967190951638],"label":"Snow tubing"}]},"0Ffvglar63w":{"duration":66.827,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0Ffvglar63w","annotations":[]},"7qeiQKbWpLM":{"duration":34.92,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=7qeiQKbWpLM","annotations":[{"segment":[2.833051750390016,33.397321595943836],"label":"Using uneven bars"}]},"yKbRi1SZilA":{"duration":157.15,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yKbRi1SZilA","annotations":[]},"bwcgMTHFRV8":{"duration":136.696,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=bwcgMTHFRV8","annotations":[{"segment":[76.986488299532,123.2636848673947],"label":"Drum corps"}]},"9FAPy8NVDgY":{"duration":158.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9FAPy8NVDgY","annotations":[{"segment":[10.648104294239797,149.81635009360375],"label":"Preparing pasta"}]},"5P-Xs2v6lis":{"duration":217.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5P-Xs2v6lis","annotations":[{"segment":[0.01,131.76621297871955],"label":"Roof shingle removal"},{"segment":[143.64009927155442,207.08057746470084],"label":"Roof shingle removal"}]},"XzYtm5WdAE8":{"duration":31.741,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=XzYtm5WdAE8","annotations":[{"segment":[0.01,28.077131471138845],"label":"Cutting the grass"}]},"lsIYvscKoPo":{"duration":164.47,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lsIYvscKoPo","annotations":[]},"IgAE9XJVIlk":{"duration":106.12,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IgAE9XJVIlk","annotations":[{"segment":[22.26589949448318,89.14637083850701],"label":"Breakdancing"}]},"kXbc9D0sF5k":{"duration":191.426,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kXbc9D0sF5k","annotations":[{"segment":[28.957904836193446,37.31688767550702],"label":"Skiing"},{"segment":[39.70516848673947,48.661221528861155],"label":"Skiing"},{"segment":[56.721669266770675,60.00555538221528],"label":"Skiing"},{"segment":[63.58797659906396,68.0660031201248],"label":"Skiing"},{"segment":[75.5293806552262,90.15760062402495],"label":"Skiing"},{"segment":[100.90486427457097,109.56238221528862],"label":"Skiing"},{"segment":[111.652127925117,125.68327769110763],"label":"Skiing"},{"segment":[138.8188221528861,142.10270826833073],"label":"Skiing"},{"segment":[151.6716817472699,153.46307956318253],"label":"Skiing"},{"segment":[163.61433385335414,178.24408268330734],"label":"Skiing"},{"segment":[181.8268783151326,183.61827613104523],"label":"Skiing"}]},"uegTBtD59R0":{"duration":204.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uegTBtD59R0","annotations":[{"segment":[34.31966848673947,109.02480733229329],"label":"Doing motocross"},{"segment":[111.57882917316692,169.36357332293292],"label":"Doing motocross"},{"segment":[175.4293751950078,181.81442979719188],"label":"Doing motocross"}]},"snPtOKtIEA4":{"duration":72.59,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=snPtOKtIEA4","annotations":[{"segment":[6.506948748049922,72.538333],"label":"Layup drill in basketball"}]},"u10c6Nx4K0A":{"duration":5.99,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=u10c6Nx4K0A","annotations":[{"segment":[0.1520861,1.1560101],"label":"Pole vault"},{"segment":[1.429813,4.5936711],"label":"Pole vault"},{"segment":[0,5.743333],"label":"Pole vault"}]},"9ZQY-ZfimYo":{"duration":172.99,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9ZQY-ZfimYo","annotations":[{"segment":[18.935179431812962,29.174383732432485],"label":"Rock climbing"},{"segment":[70.67010642441686,77.67587778799863],"label":"Rock climbing"},{"segment":[143.4223475077661,147.73359142381642],"label":"Rock climbing"}]},"lfGmSfTjnA8":{"duration":160.497,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=lfGmSfTjnA8","annotations":[{"segment":[9.26421844773791,151.23210655226208],"label":"Peeling potatoes"}]},"8R8r_ohAdPw":{"duration":100.47,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=8R8r_ohAdPw","annotations":[]},"2xmu01HHhKY":{"duration":49.598,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=2xmu01HHhKY","annotations":[{"segment":[3.4045306833073323,19.576051429017163],"label":"Playing congas"}]},"hKef3llID08":{"duration":27.632,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=hKef3llID08","annotations":[]},"l-gHWS0oXiw":{"duration":106.627,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l-gHWS0oXiw","annotations":[{"segment":[6.321043959438378,16.800669471138846],"label":"Spread mulch"},{"segment":[24.78514605148206,29.77544391419657],"label":"Spread mulch"},{"segment":[54.56058996567862,74.0227516302652],"label":"Spread mulch"},{"segment":[88.82730195631825,99.14058420592823],"label":"Spread mulch"}]},"vqqoDYma9F8":{"duration":154.23,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vqqoDYma9F8","annotations":[{"segment":[62.5064010445069,65.86860353671874],"label":"Cleaning windows"},{"segment":[111.01817986070627,117.02211288251313],"label":"Cleaning windows"}]},"XklHV0q3w40":{"duration":236.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XklHV0q3w40","annotations":[{"segment":[14.002080719963494,222.92786296411856],"label":"Playing saxophone"}]},"lca7DhtJjF8":{"duration":67.52,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=lca7DhtJjF8","annotations":[]},"HhnveTWVWQ8":{"duration":229.576,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HhnveTWVWQ8","annotations":[]},"mtQUYdj2Ecg":{"duration":23.59,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=mtQUYdj2Ecg","annotations":[{"segment":[10.010647263650545,11.44599742277691],"label":"Clean and jerk"}]},"OgaurUPiWYI":{"duration":98.082,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=OgaurUPiWYI","annotations":[{"segment":[11.777156006240249,77.85158970358815],"label":"BMX"}]},"NKf6zbwP-wU":{"duration":111.39,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=NKf6zbwP-wU","annotations":[]},"h4SzYWJUqVQ":{"duration":174.661,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h4SzYWJUqVQ","annotations":[{"segment":[0.01,174.621],"label":"Installing carpet"}]},"dxvKVBrNMDw":{"duration":117.54,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dxvKVBrNMDw","annotations":[{"segment":[31.340485179407178,71.1135600624025],"label":"Hula hoop"}]},"ZC65qkYMy9Q":{"duration":52.524,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=ZC65qkYMy9Q","annotations":[{"segment":[0.2,17.609539781591263],"label":"Shuffleboard"},{"segment":[21.6228767550702,45.53908892355694],"label":"Shuffleboard"}]},"PSB1nM3QXxg":{"duration":38.8,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=PSB1nM3QXxg","annotations":[]},"l2MB-KxbVEs":{"duration":43.515,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=l2MB-KxbVEs","annotations":[{"segment":[0.2,40.866684536661474],"label":"Calf roping"}]},"dR3hrw9dVdw":{"duration":170.11,"subset":"validation","resolution":"810x480","url":"https://www.youtube.com/watch?v=dR3hrw9dVdw","annotations":[{"segment":[0.01,165.5979055975039],"label":"Playing piano"}]},"yPsC9mVpGmk":{"duration":95.574,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yPsC9mVpGmk","annotations":[]},"O0-CRPl0TR0":{"duration":209.12,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=O0-CRPl0TR0","annotations":[{"segment":[14.744029962498079,180.83641314006817],"label":"Ping-pong"}]},"G_npKjzxQLk":{"duration":95.759,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G_npKjzxQLk","annotations":[{"segment":[3.1274331123066124,82.20687069760511],"label":"Scuba diving"},{"segment":[93.9719847639678,95.16338872005515],"label":"Scuba diving"}]},"sxZTPWMnyFY":{"duration":198.46,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sxZTPWMnyFY","annotations":[{"segment":[0,198.401],"label":"Snatch"}]},"ZG4ApSk5iKw":{"duration":147.749,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZG4ApSk5iKw","annotations":[{"segment":[4.838092043681748,36.40088299531981],"label":"Doing crunches"},{"segment":[69.57778471138846,99.06770670826833],"label":"Doing crunches"}]},"iPk4GeFFcTQ":{"duration":78.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iPk4GeFFcTQ","annotations":[{"segment":[5.601719251170047,21.797994477379095],"label":"Javelin throw"}]},"ll91M5topgU":{"duration":10.31,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ll91M5topgU","annotations":[{"segment":[0.2,10.061],"label":"Blowing leaves"}]},"ngxs6ngJR4k":{"duration":228.694,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ngxs6ngJR4k","annotations":[{"segment":[0.01,211.02481343576062],"label":"Assembling bicycle"}]},"28QW8P1x_LE":{"duration":192.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=28QW8P1x_LE","annotations":[{"segment":[0,189.55897775663027],"label":"Zumba"}]},"3NSikmqK3Ys":{"duration":67.59,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3NSikmqK3Ys","annotations":[{"segment":[53.990270587694255,56.83741376321719],"label":"Walking the dog"}]},"OMYnLCWTdEA":{"duration":143.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OMYnLCWTdEA","annotations":[{"segment":[58.24695018999999,127.13105855999997],"label":"Breakdancing"}]},"EnZgkrQFcnA":{"duration":103.724,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EnZgkrQFcnA","annotations":[]},"L2XwEKLfqfk":{"duration":207.84,"subset":"testing","resolution":"658x480","url":"https://www.youtube.com/watch?v=L2XwEKLfqfk","annotations":[]},"9H8OBfrm1Rw":{"duration":118.47,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9H8OBfrm1Rw","annotations":[{"segment":[10.904250091092079,111.07549669061594],"label":"Playing lacrosse"}]},"BkBbzC6nIvA":{"duration":226.42000000000002,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=BkBbzC6nIvA","annotations":[{"segment":[13.772354134165367,219.651391575663],"label":"Spinning"}]},"PD9e9MVHEyU":{"duration":111.46000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PD9e9MVHEyU","annotations":[{"segment":[16.170638564318082,103.63118758190328],"label":"Painting"}]},"HfjvlIAEsJY":{"duration":192.517,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HfjvlIAEsJY","annotations":[{"segment":[15.467405954809486,26.579911203895907],"label":"Assembling bicycle"},{"segment":[34.68903665593195,59.016413012040076],"label":"Assembling bicycle"},{"segment":[127.79381036449391,171.6431554014295],"label":"Assembling bicycle"}]},"giXgqmNJJ2E":{"duration":151.51,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=giXgqmNJJ2E","annotations":[]},"PLqTX6ij52U":{"duration":147.47,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PLqTX6ij52U","annotations":[{"segment":[69.01868891082174,121.70295477941566],"label":"Assembling bicycle"}]},"T9jiC2SRLJQ":{"duration":70.636,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T9jiC2SRLJQ","annotations":[{"segment":[19.993137626162437,21.531071418652996],"label":"Playing pool"},{"segment":[23.728119693639506,25.485758313628715],"label":"Playing pool"},{"segment":[28.232068657361857,28.891183139857805],"label":"Playing pool"},{"segment":[33.83454175857746,35.2626226177199],"label":"Playing pool"},{"segment":[42.842439054733894,44.490225236693455],"label":"Playing pool"},{"segment":[48.884321721918965,50.092698255355984],"label":"Playing pool"}]},"IwzMVj0JgJo":{"duration":21.99,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IwzMVj0JgJo","annotations":[]},"0ecdkXfxQUA":{"duration":9.242,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0ecdkXfxQUA","annotations":[]},"sCxGclun1E0":{"duration":110.063,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sCxGclun1E0","annotations":[{"segment":[0.2,102.33588153510141],"label":"Baking cookies"}]},"hN9diyaOK4E":{"duration":94.227,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hN9diyaOK4E","annotations":[{"segment":[3.226700934579439,90.7876308411215],"label":"Playing rubik cube"}]},"yYvDJjIxvS4":{"duration":82.94,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yYvDJjIxvS4","annotations":[]},"vB3VWKEXLuY":{"duration":75.674,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=vB3VWKEXLuY","annotations":[]},"VjwKZEMHlpE":{"duration":11.03,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=VjwKZEMHlpE","annotations":[]},"j18sB8o2IQw":{"duration":15.09,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=j18sB8o2IQw","annotations":[{"segment":[0.01,10.101222622464899],"label":"Plataform diving"}]},"26j3-w5G1fU":{"duration":47.97,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=26j3-w5G1fU","annotations":[]},"vjUx3k63oZI":{"duration":72.93,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=vjUx3k63oZI","annotations":[{"segment":[59.042120124804995,71.21458034321373],"label":"Bungee jumping"}]},"xhBvsWa0PCs":{"duration":25.38,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=xhBvsWa0PCs","annotations":[{"segment":[1.5045206614664588,5.028266421216849],"label":"Calf roping"},{"segment":[7.522603307332294,11.561053503900157],"label":"Calf roping"},{"segment":[13.42190800624025,18.846100917316694],"label":"Calf roping"},{"segment":[20.033880386895476,25.178888],"label":"Calf roping"}]},"3HP2E_a9xrU":{"duration":213.322,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3HP2E_a9xrU","annotations":[{"segment":[5.988608424336974,211.28460998439937],"label":"Hitting a pinata"}]},"OBbHkBVDX8Y":{"duration":172.85,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OBbHkBVDX8Y","annotations":[{"segment":[0,172.781],"label":"Playing saxophone"}]},"pocqDciTABA":{"duration":32.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pocqDciTABA","annotations":[]},"Zob68l3snFo":{"duration":155.876,"subset":"training","resolution":"1920x1280","url":"https://www.youtube.com/watch?v=Zob68l3snFo","annotations":[{"segment":[19.75674398405604,149.62677980964875],"label":"Scuba diving"}]},"4VdS1eN3dv4":{"duration":55.43,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4VdS1eN3dv4","annotations":[]},"Lo848n58uoM":{"duration":127.01,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Lo848n58uoM","annotations":[{"segment":[1.1888861840873637,104.4238365023401],"label":"Walking the dog"}]},"jeaaS1NK_d4":{"duration":186.225,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jeaaS1NK_d4","annotations":[{"segment":[0.01,177.50805552574104],"label":"Arm wrestling"}]},"cQYAi2drreo":{"duration":71.936,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=cQYAi2drreo","annotations":[{"segment":[0.01,41.83942745709828],"label":"Carving jack-o-lanterns"}]},"Lzvtnr4gT8Y":{"duration":85.66,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Lzvtnr4gT8Y","annotations":[{"segment":[6.681625224192642,35.54624619270485],"label":"Shaving legs"},{"segment":[60.401892026701475,62.54001197129485],"label":"Shaving legs"}]},"j73Ks4r5tbg":{"duration":186.32,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=j73Ks4r5tbg","annotations":[{"segment":[83.98132605304212,186.27],"label":"Smoking a cigarette"}]},"5ZmZr2bguy0":{"duration":43.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5ZmZr2bguy0","annotations":[{"segment":[5.208921996879876,41.50114976599064],"label":"Shot put"}]},"QyYbyFXmd-g":{"duration":205.776,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=QyYbyFXmd-g","annotations":[{"segment":[22.14469422776911,143.7800436817473],"label":"Tug of war"},{"segment":[153.40817160686427,193.22321684867396],"label":"Tug of war"}]},"xj-f9hzQ3Lc":{"duration":54.196,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=xj-f9hzQ3Lc","annotations":[{"segment":[20.05987239042649,51.83538657549647],"label":"BMX"}]},"mvl6wff267o":{"duration":150.094,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mvl6wff267o","annotations":[{"segment":[9.366226833073323,19.20076500780031],"label":"Running a marathon"}]},"FRzN9ApCxW8":{"duration":67.71,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=FRzN9ApCxW8","annotations":[{"segment":[0.2,54.505527575663024],"label":"Cutting the grass"}]},"FeKEqUxiReA":{"duration":236.635,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=FeKEqUxiReA","annotations":[{"segment":[16.610054602184086,236.601],"label":"Rope skipping"}]},"kIDCXgNy8iI":{"duration":36.48,"subset":"testing","resolution":"192x144","url":"https://www.youtube.com/watch?v=kIDCXgNy8iI","annotations":[]},"E_8IuayRJ2w":{"duration":102.447,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E_8IuayRJ2w","annotations":[]},"ibjvKk93__g":{"duration":102.029,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ibjvKk93__g","annotations":[{"segment":[12.096967111337516,18.145450667006273],"label":"Elliptical trainer"},{"segment":[12.096967111337516,18.145450667006273],"label":"Elliptical trainer"}]},"B-KSdR2ct00":{"duration":39.66,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=B-KSdR2ct00","annotations":[{"segment":[0.01,32.186583463338536],"label":"Brushing teeth"}]},"I7slpTgl0jc":{"duration":20.16,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I7slpTgl0jc","annotations":[]},"Cj_r6t5lLx8":{"duration":207.45,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cj_r6t5lLx8","annotations":[]},"Hg1tl2hFWGc":{"duration":29.56,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Hg1tl2hFWGc","annotations":[{"segment":[0,29.461],"label":"Putting on makeup"}]},"tn8YmTHinZc":{"duration":237.308,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=tn8YmTHinZc","annotations":[{"segment":[18.94623135229806,222.5594734933443],"label":"Making a lemonade"}]},"7CNEttu-t7g":{"duration":88.422,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7CNEttu-t7g","annotations":[{"segment":[38.390855694227774,45.69683541341654],"label":"Using the rowing machine"},{"segment":[45.845058502340095,47.637496879875194],"label":"Using the rowing machine"}]},"GffGGAwbcHo":{"duration":54.85,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=GffGGAwbcHo","annotations":[{"segment":[17.44693291731669,26.59802028081123],"label":"Walking the dog"},{"segment":[29.762414976599064,32.67023712948518],"label":"Walking the dog"}]},"PAF5AIQNXhs":{"duration":68.802,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PAF5AIQNXhs","annotations":[{"segment":[0.2,65.91173551214808],"label":"Swinging at the playground"}]},"waFqh-Qkafc":{"duration":52.315,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=waFqh-Qkafc","annotations":[{"segment":[4.800964174454829,44.42926168224299],"label":"Carving jack-o-lanterns"}]},"lHsPZ49cpG8":{"duration":103.56,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=lHsPZ49cpG8","annotations":[{"segment":[0.01,103.513333],"label":"Playing polo"}]},"FLImHIKzzm4":{"duration":152.57999999999998,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=FLImHIKzzm4","annotations":[{"segment":[35.09254725,80.5801845],"label":"Playing water polo"}]},"cw0HRDIQ10I":{"duration":38.71,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=cw0HRDIQ10I","annotations":[{"segment":[9.540839613104524,12.801632898595942],"label":"Clean and jerk"},{"segment":[20.470535625585022,26.207116405616222],"label":"Clean and jerk"}]},"_H-qWt56giQ":{"duration":126.642,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=_H-qWt56giQ","annotations":[]},"SsAmEJvdpyU":{"duration":82.849,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=SsAmEJvdpyU","annotations":[{"segment":[7.4964591182286755,82.84879818594105],"label":"Rollerblading"}]},"JB-ynj70saA":{"duration":35.759,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=JB-ynj70saA","annotations":[]},"r-rNYo8ju4k":{"duration":9.776,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=r-rNYo8ju4k","annotations":[{"segment":[0,9.741],"label":"Applying sunscreen"}]},"PIJd7_6RGpY":{"duration":79.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PIJd7_6RGpY","annotations":[{"segment":[11.350159126365055,79.081],"label":"Starting a campfire"}]},"CuyMHCcxqNc":{"duration":159.614,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CuyMHCcxqNc","annotations":[{"segment":[4.731147257410297,154.6338129921997],"label":"Making a cake"}]},"WN5EWPfDbog":{"duration":170.551,"subset":"validation","resolution":"562x480","url":"https://www.youtube.com/watch?v=WN5EWPfDbog","annotations":[{"segment":[0.01,170.55056689342405],"label":"Playing blackjack"}]},"rBMQFpHspmo":{"duration":144.31,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=rBMQFpHspmo","annotations":[{"segment":[57.732015600000004,143.47728159126365],"label":"Pole vault"},{"segment":[3.713529641185647,42.7307832],"label":"Pole vault"}]},"Bule85koN3o":{"duration":48.51,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Bule85koN3o","annotations":[{"segment":[7.340294770670827,9.988854739469579],"label":"Shot put"},{"segment":[23.004635157566305,24.669444280811234],"label":"Shot put"}]},"d0h_l9bE1o0":{"duration":216.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d0h_l9bE1o0","annotations":[{"segment":[10.134912498540759,19.594164163845466],"label":"Cheerleading"},{"segment":[51.01239290932181,53.039375409029965],"label":"Cheerleading"},{"segment":[163.50992164312424,172.6313428918109],"label":"Cheerleading"}]},"Aoo_Wt4TegA":{"duration":26.703,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Aoo_Wt4TegA","annotations":[]},"Fvc0-yNMFyE":{"duration":118.028,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=Fvc0-yNMFyE","annotations":[{"segment":[0.2,108.48207250516198],"label":"Riding bumper cars"}]},"nYl_tYmijlM":{"duration":185.667,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nYl_tYmijlM","annotations":[]},"G4ir4oKYhMg":{"duration":224.49,"subset":"testing","resolution":"352x288","url":"https://www.youtube.com/watch?v=G4ir4oKYhMg","annotations":[]},"IYbnF7LM2II":{"duration":139.34,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=IYbnF7LM2II","annotations":[{"segment":[16.52116074302836,92.6054536385537],"label":"Playing racquetball"}]},"ywJQotAB3dw":{"duration":162.587,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ywJQotAB3dw","annotations":[{"segment":[11.66763546188106,153.70841337285492],"label":"Futsal"}]},"YGl_GO6ezh4":{"duration":73.35,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=YGl_GO6ezh4","annotations":[]},"S4ppfXBp5o0":{"duration":140.16,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=S4ppfXBp5o0","annotations":[{"segment":[14.435784223205662,36.08946055801415],"label":"Clean and jerk"},{"segment":[48.55672875078268,50.08779677445601],"label":"Clean and jerk"},{"segment":[75.6785045397816,81.80277658346336],"label":"Clean and jerk"},{"segment":[110.0181727847114,115.26754882215289],"label":"Clean and jerk"},{"segment":[127.51609290951639,131.23440093603745],"label":"Clean and jerk"}]},"JW3hvO3OFlU":{"duration":197.35,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JW3hvO3OFlU","annotations":[]},"yINX46xPRf0":{"duration":159.986,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=yINX46xPRf0","annotations":[{"segment":[31.435329173166924,147.96413884555383],"label":"Scuba diving"}]},"nyjtOdSyHn4":{"duration":115.984,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=nyjtOdSyHn4","annotations":[{"segment":[38.17872057195213,68.75788539024555],"label":"Hitting a pinata"}]},"eGW0rygfcrg":{"duration":92.22999999999999,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=eGW0rygfcrg","annotations":[{"segment":[0.8633028749721418,91.51010474704702],"label":"Tango"}]},"SDiDPAiCIdU":{"duration":72.052,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SDiDPAiCIdU","annotations":[]},"Kq8g8jjodvc":{"duration":49.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Kq8g8jjodvc","annotations":[{"segment":[0.01,49.713921],"label":"Snatch"}]},"cKMGacBQX0E":{"duration":57.17,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cKMGacBQX0E","annotations":[{"segment":[8.394608600276637,46.77343124357882],"label":"Using the pommel horse"}]},"kH8YpzoflKg":{"duration":115.171,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kH8YpzoflKg","annotations":[{"segment":[0.01,9.163344004680187],"label":"Snowboarding"},{"segment":[20.123422127925117,111.93653558658346],"label":"Snowboarding"}]},"RWir3muDHg0":{"duration":81.061,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RWir3muDHg0","annotations":[{"segment":[4.1731872074883,81.060906],"label":"Tug of war"}]},"E50qKeeMbgU":{"duration":70.705,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=E50qKeeMbgU","annotations":[{"segment":[11.798544461778471,70.44052886115445],"label":"Elliptical trainer"}]},"U-ApHGUtLMA":{"duration":67.617,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U-ApHGUtLMA","annotations":[{"segment":[23.20691380036154,53.16492906084243],"label":"Calf roping"}]},"Wgr9YpWni30":{"duration":163.864,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=Wgr9YpWni30","annotations":[{"segment":[8.180380205928238,163.6632426303855],"label":"Slacklining"}]},"ng14GLT_hHQ":{"duration":153.35,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ng14GLT_hHQ","annotations":[{"segment":[2.5114820592823714,132.39098283931358],"label":"Painting"}]},"Ly21QMQ4kGA":{"duration":37.036,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Ly21QMQ4kGA","annotations":[{"segment":[26.668097780525883,29.37522658612464],"label":"Beer pong"}]},"fKFcbNM89MA":{"duration":30.0,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=fKFcbNM89MA","annotations":[{"segment":[0.2,29.493589703588142],"label":"River tubing"}]},"O9K2pEzSOG0":{"duration":13.143,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O9K2pEzSOG0","annotations":[]},"atGMJc-DZ7w":{"duration":82.73,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=atGMJc-DZ7w","annotations":[]},"aTrEYoCRL2k":{"duration":236.147,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aTrEYoCRL2k","annotations":[{"segment":[30.48236148912322,199.42078016125961],"label":"Blowing leaves"}]},"2OEa00knM9E":{"duration":51.502,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2OEa00knM9E","annotations":[{"segment":[2.1676240249609986,41.66655070202808],"label":"Calf roping"}]},"WIarYDce6rc":{"duration":234.17000000000002,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WIarYDce6rc","annotations":[]},"F1MKU9-yXg0":{"duration":121.069,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F1MKU9-yXg0","annotations":[{"segment":[49.331313773928585,96.59146827159577],"label":"Spread mulch"}]},"fKDl_CnA8nY":{"duration":128.92,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fKDl_CnA8nY","annotations":[{"segment":[0,6.636922325872627],"label":"Playing saxophone"},{"segment":[59.12894435777431,113.02879733853355],"label":"Playing saxophone"}]},"8dXbbJWFEJo":{"duration":38.012,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=8dXbbJWFEJo","annotations":[{"segment":[0,37.981],"label":"Canoeing"}]},"-MB6Wxglgzw":{"duration":196.604,"subset":"validation","resolution":"322x240","url":"https://www.youtube.com/watch?v=-MB6Wxglgzw","annotations":[{"segment":[9.18707271070422,196.603355],"label":"Playing beach volleyball"}]},"d8M7yIj9jbE":{"duration":64.111,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=d8M7yIj9jbE","annotations":[{"segment":[7.20112560374415,56.2087859625585],"label":"Playing pool"}]},"HVfr9FnEj-Q":{"duration":188.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HVfr9FnEj-Q","annotations":[{"segment":[0.01,188.70857142857142],"label":"Belly dance"}]},"6diEN8nUqsI":{"duration":121.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6diEN8nUqsI","annotations":[{"segment":[0.2,99.02866510903428],"label":"Ice fishing"}]},"XK6Ldhzbc90":{"duration":224.05,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XK6Ldhzbc90","annotations":[]},"c_MMN0233ZY":{"duration":64.97,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=c_MMN0233ZY","annotations":[]},"U-VzZQGWOqA":{"duration":59.814,"subset":"training","resolution":"352x262","url":"https://www.youtube.com/watch?v=U-VzZQGWOqA","annotations":[{"segment":[0.01,52.95735181942342],"label":"Having an ice cream"}]},"rlWN8PZMbfY":{"duration":171.341,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=rlWN8PZMbfY","annotations":[{"segment":[58.09040011225459,111.65081864358626],"label":"Putting in contact lenses"}]},"LDSEEw5oUM4":{"duration":155.087,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LDSEEw5oUM4","annotations":[{"segment":[11.1261872074883,147.82775731045243],"label":"Baton twirling"}]},"KT2-cbD1tfk":{"duration":73.329,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=KT2-cbD1tfk","annotations":[]},"eL4Ce4WPI0Y":{"duration":39.1,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=eL4Ce4WPI0Y","annotations":[{"segment":[13.359478998439938,23.24183332605304],"label":"Shot put"}]},"rwUlqI6HWyY":{"duration":201.619,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=rwUlqI6HWyY","annotations":[{"segment":[24.21247581903276,161.64217472698905],"label":"Making an omelette"}]},"bH-S32gOlCA":{"duration":218.547,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=bH-S32gOlCA","annotations":[{"segment":[48.40250546021841,210.3122948517941],"label":"Playing ice hockey"}]},"yjUMopRteDc":{"duration":87.31,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=yjUMopRteDc","annotations":[{"segment":[8.8791219,47.70105252],"label":"Getting a piercing"}]},"gpJ7veSnhUs":{"duration":216.039,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gpJ7veSnhUs","annotations":[{"segment":[30.616964174454825,190.76723831775703],"label":"Hurling"}]},"GycRQhoHnXw":{"duration":235.01,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GycRQhoHnXw","annotations":[{"segment":[8.43246599523845,218.51085796357026],"label":"Belly dance"}]},"134qxRztr2s":{"duration":168.6,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=134qxRztr2s","annotations":[]},"Z4biLo6k5fI":{"duration":136.16,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=Z4biLo6k5fI","annotations":[{"segment":[49.2816550104181,135.96181405895692],"label":"Bathing dog"}]},"z3xkE5Ox-2A":{"duration":166.929,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=z3xkE5Ox-2A","annotations":[{"segment":[7.349147730556975,19.046343992239215],"label":"Carving jack-o-lanterns"},{"segment":[63.75562748133579,81.17145302650712],"label":"Carving jack-o-lanterns"}]},"TspdPLMqTx0":{"duration":169.0,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TspdPLMqTx0","annotations":[{"segment":[3.2125759400000002,14.89574817550525],"label":"Playing violin"},{"segment":[28.605109328182646,32.82337429823723],"label":"Playing violin"},{"segment":[38.8871301926907,42.84175360211687],"label":"Playing violin"},{"segment":[46.26909389028623,77.6424396050672],"label":"Playing violin"},{"segment":[78.96064740820925,141.9709803983996],"label":"Playing violin"}]},"TjR436qaQw4":{"duration":150.21,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TjR436qaQw4","annotations":[{"segment":[41.34859619436867,109.09488958472636],"label":"Carving jack-o-lanterns"}]},"K7rfN2W0ts4":{"duration":181.72,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=K7rfN2W0ts4","annotations":[{"segment":[0.01,174.37147662842946],"label":"Playing blackjack"}]},"BSIV5tJ3bv4":{"duration":27.4,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BSIV5tJ3bv4","annotations":[{"segment":[0.14897295709828393,25.04873864352574],"label":"Clean and jerk"}]},"6Z4Qg_fNo0Q":{"duration":203.732,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6Z4Qg_fNo0Q","annotations":[{"segment":[0.2,84.21336193447738],"label":"Scuba diving"},{"segment":[92.15801872074883,102.00939313572542],"label":"Scuba diving"},{"segment":[109.95404992199688,185.58718252730108],"label":"Scuba diving"},{"segment":[194.48519812792512,201.79428237129486],"label":"Scuba diving"}]},"hSnTYQ9osIg":{"duration":129.92,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hSnTYQ9osIg","annotations":[{"segment":[1.2160590913432456,112.89081884196568],"label":"Tango"}]},"bDTQVC0ASbE":{"duration":193.725,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=bDTQVC0ASbE","annotations":[{"segment":[27.496054602184092,178.28953198127925],"label":"Waxing skis"}]},"gUNBEOi-Fug":{"duration":85.682,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gUNBEOi-Fug","annotations":[]},"UJGK_TKrl4E":{"duration":93.113,"subset":"testing","resolution":"656x480","url":"https://www.youtube.com/watch?v=UJGK_TKrl4E","annotations":[]},"H1bmoIihWwo":{"duration":54.1,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H1bmoIihWwo","annotations":[{"segment":[39.04082480044878,40.641988923501735],"label":"Springboard diving"}]},"Gn651vGKC2I":{"duration":99.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gn651vGKC2I","annotations":[{"segment":[0,99.938048],"label":"Getting a tattoo"}]},"HkbOI7gEtKk":{"duration":129.06,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HkbOI7gEtKk","annotations":[]},"oNc2lJ9hV9E":{"duration":190.799,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=oNc2lJ9hV9E","annotations":[{"segment":[9.331883151923991,173.68000838245356],"label":"Making a cake"}]},"6LsvBNqJuJY":{"duration":149.57999999999998,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=6LsvBNqJuJY","annotations":[]},"5k0KMrksf8c":{"duration":9.68,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5k0KMrksf8c","annotations":[{"segment":[0.09722958580343215,9.588333],"label":"Playing lacrosse"}]},"IjFlVNAWQec":{"duration":94.273,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IjFlVNAWQec","annotations":[{"segment":[4.706298308892356,42.94497206864274],"label":"Surfing"},{"segment":[46.76883944461778,54.857789663026516],"label":"Surfing"},{"segment":[60.15237526053043,74.41834200936037],"label":"Surfing"},{"segment":[77.06563480811232,86.62530324804992],"label":"Surfing"},{"segment":[90.0079551575663,94.073038],"label":"Surfing"}]},"_79Qlv1SQ9k":{"duration":120.094,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_79Qlv1SQ9k","annotations":[{"segment":[0.01,77.4973186654183],"label":"Snow tubing"}]},"V5MvrOzQWZo":{"duration":94.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V5MvrOzQWZo","annotations":[{"segment":[5.289368015536948,90.21310999063964],"label":"Using the balance beam"}]},"ucHq8B0-1BA":{"duration":208.86,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ucHq8B0-1BA","annotations":[{"segment":[0.01,10.261333853354135],"label":"Grooming horse"},{"segment":[74.10963338533541,202.4577457098284],"label":"Grooming horse"}]},"X2toGKgWMpE":{"duration":30.094,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=X2toGKgWMpE","annotations":[]},"rlBlDYBLTn0":{"duration":229.994,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=rlBlDYBLTn0","annotations":[]},"YmAIoN4uuz0":{"duration":146.936,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YmAIoN4uuz0","annotations":[{"segment":[34.1551405294307,67.16413493915756],"label":"Scuba diving"},{"segment":[84.58554878003119,91.92088092355694],"label":"Scuba diving"}]},"zbaxx7CTOSA":{"duration":213.647,"subset":"testing","resolution":"854x478","url":"https://www.youtube.com/watch?v=zbaxx7CTOSA","annotations":[]},"qAZStAHJ3CQ":{"duration":129.661,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qAZStAHJ3CQ","annotations":[{"segment":[35.80235758829568,83.06146845103716],"label":"Hula hoop"}]},"vNdBI3RN7M8":{"duration":43.75,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vNdBI3RN7M8","annotations":[]},"2QdSai6J_ug":{"duration":58.492,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2QdSai6J_ug","annotations":[]},"T62XjoKbN8M":{"duration":83.267,"subset":"testing","resolution":"855x470","url":"https://www.youtube.com/watch?v=T62XjoKbN8M","annotations":[]},"H0dG_1Vgw4Q":{"duration":20.32,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H0dG_1Vgw4Q","annotations":[]},"kM6v-XN8Ixo":{"duration":110.527,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=kM6v-XN8Ixo","annotations":[{"segment":[8.82713394093359,20.017560682762202],"label":"Disc dog"},{"segment":[22.944287442828372,36.3727993960994],"label":"Disc dog"},{"segment":[48.59618848176918,93.87437622164457],"label":"Disc dog"}]},"JK8pYBpatAY":{"duration":28.468,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JK8pYBpatAY","annotations":[{"segment":[0.01,28.421],"label":"Kneeling"}]},"XwW5p80hYCg":{"duration":207.471,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XwW5p80hYCg","annotations":[{"segment":[16.933713026926114,36.64662265599061],"label":"Hand car wash"},{"segment":[51.51209549102286,57.97534454973253],"label":"Hand car wash"},{"segment":[61.20696907908737,65.73124342018414],"label":"Hand car wash"},{"segment":[68.96286794953897,78.65774153760348],"label":"Hand car wash"},{"segment":[86.4136404080551,137.796470424797],"label":"Hand car wash"},{"segment":[142.3207435417049,149.10715499497593],"label":"Hand car wash"},{"segment":[178.51493795915036,184.97818696226562],"label":"Hand car wash"},{"segment":[206.306908672546,207.270293],"label":"Hand car wash"}]},"e9R62cyqW4A":{"duration":67.663,"subset":"training","resolution":"420x240","url":"https://www.youtube.com/watch?v=e9R62cyqW4A","annotations":[{"segment":[11.399307332293292,66.28486115444618],"label":"Camel ride"}]},"mRyXubjYWEo":{"duration":104.003,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=mRyXubjYWEo","annotations":[{"segment":[5.181856697819314,95.05468380062305],"label":"Capoeira"}]},"q4rVY3sLQqA":{"duration":145.38,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=q4rVY3sLQqA","annotations":[{"segment":[6.123656276863319,9.072083373130845],"label":"Rock climbing"},{"segment":[21.09259384252921,30.391479299988326],"label":"Rock climbing"}]},"4d3RtnuVCV4":{"duration":126.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4d3RtnuVCV4","annotations":[{"segment":[58.36286349453978,88.97620982839314],"label":"Playing field hockey"}]},"CXP20cVQN30":{"duration":175.54,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=CXP20cVQN30","annotations":[{"segment":[4.655582794740361,145.69235557566302],"label":"Playing water polo"}]},"_yFOkxb22RI":{"duration":60.42,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=_yFOkxb22RI","annotations":[{"segment":[24.114720748829953,55.01170670826833],"label":"Polishing forniture"}]},"E7C91KoML-o":{"duration":20.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=E7C91KoML-o","annotations":[{"segment":[2.68786895475819,16.002196567862715],"label":"Clean and jerk"}]},"hL11sP4Hlrg":{"duration":117.08,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hL11sP4Hlrg","annotations":[{"segment":[22.465164411856477,97.16640217160686],"label":"Doing nails"}]},"VbzzcpsAPo8":{"duration":73.03,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=VbzzcpsAPo8","annotations":[{"segment":[1.5949681796795683,9.911587973723032],"label":"Ping-pong"},{"segment":[14.924345109858816,34.06396318689548],"label":"Ping-pong"},{"segment":[39.532425504524184,61.975906266458665],"label":"Ping-pong"}]},"QfUTGmlNO1s":{"duration":20.02,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=QfUTGmlNO1s","annotations":[{"segment":[0,2.0557815912636506],"label":"Washing dishes"}]},"USoYnMSrWv0":{"duration":63.368,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=USoYnMSrWv0","annotations":[{"segment":[0.2,60.652854914196574],"label":"Table soccer"}]},"jQzBXawu29s":{"duration":206.24,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=jQzBXawu29s","annotations":[]},"vrXqd_Ct298":{"duration":225.977,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vrXqd_Ct298","annotations":[{"segment":[8.235233247287548,225.941],"label":"BMX"}]},"8jyqeivzs2M":{"duration":182.95,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8jyqeivzs2M","annotations":[{"segment":[0,150.6983020530421],"label":"Preparing pasta"}]},"U7_kDhChqQM":{"duration":100.148,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=U7_kDhChqQM","annotations":[]},"jYU215e-dKg":{"duration":39.66,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jYU215e-dKg","annotations":[{"segment":[12.869288062402497,33.34878012324493],"label":"Drinking beer"}]},"Kbr2nr94EB0":{"duration":88.35,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Kbr2nr94EB0","annotations":[{"segment":[32.82946458,85.63416770670827],"label":"Layup drill in basketball"}]},"TGIQtaINzCY":{"duration":221.24,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=TGIQtaINzCY","annotations":[]},"0gkxTQGR6zI":{"duration":38.17,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=0gkxTQGR6zI","annotations":[{"segment":[0.010000000000000002,38.173604],"label":"Ping-pong"}]},"rthLJwFnOc0":{"duration":125.09,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rthLJwFnOc0","annotations":[]},"pSp7zYRYjHE":{"duration":32.11,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=pSp7zYRYjHE","annotations":[{"segment":[4.91361013,28.54139214],"label":"Brushing teeth"}]},"_cZD6JN-SYg":{"duration":130.217,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_cZD6JN-SYg","annotations":[{"segment":[47.333351687987516,130.217504],"label":"Futsal"}]},"fm9mf-VuWxI":{"duration":108.22999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fm9mf-VuWxI","annotations":[{"segment":[14.155140119999999,105.52619247269891],"label":"Playing field hockey"}]},"thhFfqcOfJQ":{"duration":139.9,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=thhFfqcOfJQ","annotations":[{"segment":[8.948376438458897,128.1145586193448],"label":"Zumba"}]},"4Xvn1xXvYdU":{"duration":213.21,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4Xvn1xXvYdU","annotations":[{"segment":[93.46455573597093,164.64396706708268],"label":"Archery"}]},"jM3Buw2Kidk":{"duration":124.18,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jM3Buw2Kidk","annotations":[{"segment":[6.586787509595622,22.666298194784932],"label":"Windsurfing"},{"segment":[81.94738578114553,111.20047017784712],"label":"Windsurfing"}]},"1d_U_2HIPmY":{"duration":36.69,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1d_U_2HIPmY","annotations":[]},"9qFwSM8QLIE":{"duration":190.009,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=9qFwSM8QLIE","annotations":[{"segment":[53.190664231251084,74.76243544258683],"label":"Bullfighting"},{"segment":[85.40056919064283,131.79465248077588],"label":"Bullfighting"}]},"FHUR9Wane1k":{"duration":136.84,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=FHUR9Wane1k","annotations":[]},"kpnGg2lOBLM":{"duration":182.695,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=kpnGg2lOBLM","annotations":[{"segment":[8.265434464997648,179.27442224336974],"label":"Rope skipping"}]},"45ymM0irIIY":{"duration":42.702,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=45ymM0irIIY","annotations":[{"segment":[10.239678627145086,25.200248049921996],"label":"Bullfighting"}]},"oT6WUU8-dgs":{"duration":17.14,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=oT6WUU8-dgs","annotations":[]},"mtF0AFNSFnI":{"duration":149.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mtF0AFNSFnI","annotations":[{"segment":[3.7314213291731675,136.1968785148206],"label":"Playing accordion"}]},"p1yGu2Bbwxg":{"duration":198.786,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=p1yGu2Bbwxg","annotations":[]},"niZ7PxgQM2M":{"duration":190.8,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=niZ7PxgQM2M","annotations":[]},"q1BuFA6HLts":{"duration":104.096,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=q1BuFA6HLts","annotations":[]},"VqUUJoIOQKE":{"duration":148.52,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VqUUJoIOQKE","annotations":[]},"vKCxWIzJTm0":{"duration":219.48,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vKCxWIzJTm0","annotations":[{"segment":[16.777294158433,194.4796547344887],"label":"Breakdancing"}]},"XuG2V9gDD9M":{"duration":97.99000000000001,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=XuG2V9gDD9M","annotations":[{"segment":[0.6521449473717325,96.65619087103714],"label":"Windsurfing"}]},"yvTmIulkl7c":{"duration":26.657,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=yvTmIulkl7c","annotations":[{"segment":[0.2,26.122634945397813],"label":"Hitting a pinata"}]},"Sd850yNArWY":{"duration":178.84,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Sd850yNArWY","annotations":[]},"ynvCxrj1UNg":{"duration":99.57,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ynvCxrj1UNg","annotations":[{"segment":[1.3640679,96.8029953198128],"label":"Using the balance beam"}]},"l7gWFOa7FnI":{"duration":168.113,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l7gWFOa7FnI","annotations":[{"segment":[21.243475755356744,32.258611332208396],"label":"Laying tile"},{"segment":[39.864300182891675,137.16466353666146],"label":"Laying tile"}]},"S7a1aBxTEkk":{"duration":220.149,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S7a1aBxTEkk","annotations":[]},"KjXvBqmh5C4":{"duration":217.39,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KjXvBqmh5C4","annotations":[{"segment":[3.7304795157792707,23.40028059897906],"label":"Pole vault"},{"segment":[81.73141484570947,96.99246677691107],"label":"Pole vault"}]},"cGNkR-JvaJs":{"duration":161.588,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cGNkR-JvaJs","annotations":[{"segment":[21.49468346767251,24.51501364335649],"label":"Playing pool"},{"segment":[61.262364114178254,143.8180543376374],"label":"Playing pool"}]},"tA4KrZHGoYQ":{"duration":181.023,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tA4KrZHGoYQ","annotations":[{"segment":[26.263833508442378,137.8145242163428],"label":"River tubing"}]},"aSYCOvkN5uk":{"duration":108.949,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aSYCOvkN5uk","annotations":[{"segment":[31.93976287051482,87.17078471138846],"label":"Playing ten pins"},{"segment":[94.81734477379095,97.87596879875196],"label":"Playing ten pins"}]},"xSpWBpHMxqY":{"duration":87.15,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=xSpWBpHMxqY","annotations":[{"segment":[7.51617,52.58181816],"label":"Putting on makeup"}]},"BwR1DPCVsP8":{"duration":189.43,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BwR1DPCVsP8","annotations":[{"segment":[9.899923659531414,160.61517850468022],"label":"Layup drill in basketball"}]},"06ofnvq2Hjs":{"duration":203.5,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=06ofnvq2Hjs","annotations":[{"segment":[19.8818523,192.8188566224649],"label":"Skateboarding"}]},"ksNvNH4fpdo":{"duration":18.367,"subset":"validation","resolution":"640x350","url":"https://www.youtube.com/watch?v=ksNvNH4fpdo","annotations":[{"segment":[0.01,18.301],"label":"Using the rowing machine"}]},"KjbZvsu0OxE":{"duration":161.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KjbZvsu0OxE","annotations":[{"segment":[0,6.598506121519571],"label":"Walking the dog"},{"segment":[34.47662294394948,69.13590331778126],"label":"Walking the dog"},{"segment":[102.28825845796818,153.7748706075009],"label":"Walking the dog"}]},"XP8pufIBsnM":{"duration":46.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XP8pufIBsnM","annotations":[{"segment":[18.282471438377534,37.932529322932915],"label":"Clean and jerk"}]},"x4f4jp_eHHo":{"duration":229.46,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=x4f4jp_eHHo","annotations":[{"segment":[20.762334196567863,225.52190592823715],"label":"Belly dance"}]},"id4XtnLsw7c":{"duration":106.84,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=id4XtnLsw7c","annotations":[{"segment":[13.939354999999999,22.17634945397816],"label":"Snatch"}]},"xUUmAdQJgjg":{"duration":5.83,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=xUUmAdQJgjg","annotations":[{"segment":[0.01,5.399398127925116],"label":"Triple jump"}]},"jed5hUKCCk0":{"duration":154.042,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jed5hUKCCk0","annotations":[{"segment":[17.30259734471012,139.14171928549143],"label":"Decorating the Christmas tree"}]},"4tIHbL_1UMs":{"duration":102.935,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=4tIHbL_1UMs","annotations":[]},"bXCOtMuaJf4":{"duration":147.63,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=bXCOtMuaJf4","annotations":[{"segment":[0.2,7.600385133772698],"label":"Washing dishes"},{"segment":[13.58856736038149,22.11021129824785],"label":"Washing dishes"},{"segment":[69.32472500804795,73.240074925446],"label":"Washing dishes"},{"segment":[79.68888655410163,98.57469203802167],"label":"Washing dishes"}]},"UyORfPxpaGo":{"duration":12.052,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=UyORfPxpaGo","annotations":[{"segment":[0.01,12.051155],"label":"Doing a powerbomb"}]},"vcRCId1-xdA":{"duration":145.71,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vcRCId1-xdA","annotations":[{"segment":[10.912249609984398,41.60295163806552],"label":"Drinking beer"}]},"GJTW7ZGUgU0":{"duration":79.85,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=GJTW7ZGUgU0","annotations":[]},"UyqM2sglj1s":{"duration":237.866,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UyqM2sglj1s","annotations":[{"segment":[62.82896957878315,118.42982761310452],"label":"Snowboarding"},{"segment":[139.55815366614664,165.50522074882994],"label":"Snowboarding"}]},"lRB6XvAm_FU":{"duration":91.069,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=lRB6XvAm_FU","annotations":[{"segment":[9.945095708590248,49.299259542901716],"label":"Decorating the Christmas tree"}]},"ph7d2H77tks":{"duration":172.664,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ph7d2H77tks","annotations":[{"segment":[0.01,163.23577403504302],"label":"Assembling bicycle"}]},"Ey-0Q6VNJaY":{"duration":213.67000000000002,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ey-0Q6VNJaY","annotations":[{"segment":[15.333574049921996,176.33610157410297],"label":"Skateboarding"}]},"DEt_Xgg8dzc":{"duration":153.35,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=DEt_Xgg8dzc","annotations":[{"segment":[24.1619064245563,26.793401183666393],"label":"Long jump"},{"segment":[140.42612903588144,145.21066494851794],"label":"Long jump"}]},"akJbB6LWP34":{"duration":55.91,"subset":"validation","resolution":"576x360","url":"https://www.youtube.com/watch?v=akJbB6LWP34","annotations":[{"segment":[6.79528857098284,48.002615418096724],"label":"Layup drill in basketball"}]},"ibKFezOKsBQ":{"duration":82.408,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ibKFezOKsBQ","annotations":[{"segment":[2.178737583777679,63.56792732590908],"label":"Fun sliding down"}]},"CYSyc4jy9-Q":{"duration":100.682,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CYSyc4jy9-Q","annotations":[{"segment":[21.51855865799258,100.68172335600907],"label":"Powerbocking"}]},"znoMrA2qCbA":{"duration":20.597,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=znoMrA2qCbA","annotations":[{"segment":[0.2,20.341],"label":"Kneeling"}]},"04qggWACGWk":{"duration":86.53999999999999,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=04qggWACGWk","annotations":[]},"9mViirdNpDc":{"duration":111.25,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=9mViirdNpDc","annotations":[]},"l3EBfLkfAX8":{"duration":88.42,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=l3EBfLkfAX8","annotations":[{"segment":[8.408198127925116,16.816396255850233],"label":"High jump"},{"segment":[20.81373634945398,32.39223868954758],"label":"High jump"},{"segment":[35.011185647425904,55.68708268330733],"label":"High jump"},{"segment":[58.85738689547582,87.52796411856474],"label":"High jump"}]},"legHHtPK5fA":{"duration":36.734,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=legHHtPK5fA","annotations":[{"segment":[0.01,14.89989341653666],"label":"Throwing darts"},{"segment":[21.89138186583463,32.60784366926677],"label":"Throwing darts"}]},"3oJ62-Ce9ys":{"duration":60.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3oJ62-Ce9ys","annotations":[{"segment":[10.059168486739472,45.87732917316693],"label":"Snatch"}]},"OUIS4bnEhU0":{"duration":35.991,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=OUIS4bnEhU0","annotations":[{"segment":[0.2,32.25015472367445],"label":"Doing crunches"}]},"HGG_st1NOYM":{"duration":96.92,"subset":"training","resolution":"582x358","url":"https://www.youtube.com/watch?v=HGG_st1NOYM","annotations":[{"segment":[1.9656180296411858,9.979291535101405],"label":"Playing beach volleyball"},{"segment":[29.786673218408737,47.17483271138846],"label":"Playing beach volleyball"},{"segment":[65.47020052574103,67.88942271606865],"label":"Playing beach volleyball"},{"segment":[69.4014365850234,77.41511009048362],"label":"Playing beach volleyball"},{"segment":[82.40475585803432,91.9304432324493],"label":"Playing beach volleyball"}]},"cfKMu6aeMwU":{"duration":124.32,"subset":"training","resolution":"1916x1080","url":"https://www.youtube.com/watch?v=cfKMu6aeMwU","annotations":[{"segment":[14.73989083100739,114.23415394030727],"label":"Elliptical trainer"}]},"vXvdkqpo7F0":{"duration":122.09,"subset":"testing","resolution":"480x320","url":"https://www.youtube.com/watch?v=vXvdkqpo7F0","annotations":[]},"wtoKUYBw9f4":{"duration":61.74,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wtoKUYBw9f4","annotations":[{"segment":[30.919090226677067,52.591349731357255],"label":"High jump"}]},"37siIMJNG9Y":{"duration":98.174,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=37siIMJNG9Y","annotations":[{"segment":[49.010416688776395,62.0288074024961],"label":"Gargling mouthwash"}]},"w9CC0wf27zs":{"duration":78.902,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w9CC0wf27zs","annotations":[{"segment":[0,43.69686858814353],"label":"Knitting"},{"segment":[55.39039680187207,78.900543],"label":"Knitting"}]},"YlK_P4Ys6hE":{"duration":116.1,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YlK_P4Ys6hE","annotations":[{"segment":[17.975572644036845,66.26005777765417],"label":"Snowboarding"},{"segment":[78.01470771654976,110.56604482547382],"label":"Snowboarding"}]},"OixwThoO8z4":{"duration":170.365,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OixwThoO8z4","annotations":[{"segment":[13.023206698907954,170.36480725623582],"label":"Rollerblading"}]},"wts5XRikF1Y":{"duration":155.412,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wts5XRikF1Y","annotations":[{"segment":[9.940471980783993,104.73863048985959],"label":"Surfing"},{"segment":[111.28479489547581,129.71103544461778],"label":"Surfing"},{"segment":[137.22700198439938,142.0760126552262],"label":"Surfing"}]},"uZCov5TG-Y8":{"duration":70.64,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uZCov5TG-Y8","annotations":[{"segment":[19.83513594383775,32.28719350858034],"label":"Ironing clothes"},{"segment":[41.98437108112324,48.37569266302652],"label":"Ironing clothes"}]},"p3LvCbxC_ZE":{"duration":113.198,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p3LvCbxC_ZE","annotations":[{"segment":[46.652064050250736,66.54520948883052],"label":"Trimming branches or hedges"},{"segment":[82.03721517067905,88.02276325466039],"label":"Trimming branches or hedges"}]},"dKJMP8EmZjw":{"duration":152.46,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dKJMP8EmZjw","annotations":[]},"d7gTlJhEwrM":{"duration":120.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d7gTlJhEwrM","annotations":[{"segment":[65.96118939157567,68.0224765600624],"label":"Long jump"},{"segment":[83.2010457098284,87.51100978939158],"label":"Long jump"}]},"LF5wwjmKr40":{"duration":100.4,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LF5wwjmKr40","annotations":[]},"lvd2InghJOo":{"duration":131.61,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lvd2InghJOo","annotations":[{"segment":[0.6157301092043682,130.53478315132605],"label":"Playing racquetball"}]},"1Om21fCH1sI":{"duration":30.349,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=1Om21fCH1sI","annotations":[{"segment":[0.01,3.818878315132605],"label":"BMX"},{"segment":[12.022394695787831,20.697377535101403],"label":"BMX"},{"segment":[22.583243369734788,30.021],"label":"BMX"}]},"unzG2wuyBsU":{"duration":190.52,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=unzG2wuyBsU","annotations":[]},"hOdSTik8_nw":{"duration":166.023,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hOdSTik8_nw","annotations":[{"segment":[54.650209711388456,70.70856517160686],"label":"Hurling"},{"segment":[104.37931049141966,119.66064875195006],"label":"Hurling"},{"segment":[145.82022780811232,165.822675],"label":"Hurling"}]},"zzE2VrQMvbc":{"duration":121.44,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zzE2VrQMvbc","annotations":[{"segment":[6.062545322932917,114.4305429703588],"label":"Paintball"}]},"LE05u6TR9MI":{"duration":42.632,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LE05u6TR9MI","annotations":[{"segment":[9.211948517940717,14.056492979719188],"label":"Playing beach volleyball"},{"segment":[18.233658346333854,20.156480499219967],"label":"Playing beach volleyball"},{"segment":[31.030371294851793,32.95319344773791],"label":"Playing beach volleyball"}]},"GQzwzOM9db8":{"duration":75.98,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GQzwzOM9db8","annotations":[{"segment":[9.126521340093603,73.13069697191888],"label":"Using the pommel horse"}]},"uG_G4g6ixms":{"duration":30.326,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=uG_G4g6ixms","annotations":[{"segment":[2.6493207537825323,6.434064312012481],"label":"Calf roping"},{"segment":[20.34299745709828,30.125259],"label":"Calf roping"}]},"vULLe2qogZU":{"duration":8.336,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=vULLe2qogZU","annotations":[{"segment":[0.2,8.101],"label":"Hurling"}]},"ryFSf08mrkA":{"duration":83.87,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ryFSf08mrkA","annotations":[]},"cY541XSdz50":{"duration":73.33,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=cY541XSdz50","annotations":[{"segment":[38.43747715622911,62.57529763231346],"label":"Bungee jumping"}]},"YNVK-pb7fWM":{"duration":53.755,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=YNVK-pb7fWM","annotations":[{"segment":[0.01,53.753832],"label":"Braiding hair"}]},"43R60vMRook":{"duration":112.5,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=43R60vMRook","annotations":[{"segment":[57.86460504929968,75.03444928605981],"label":"Hula hoop"}]},"OuVncktxGw0":{"duration":28.631,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=OuVncktxGw0","annotations":[{"segment":[0.4007331106341705,19.99216284624848],"label":"Arm wrestling"}]},"3MS3CAyl_YA":{"duration":66.29,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3MS3CAyl_YA","annotations":[{"segment":[0.01,39.71372869890796],"label":"Smoking hookah"},{"segment":[46.6429469875195,65.46559965210609],"label":"Smoking hookah"}]},"6gzU9P-5tqE":{"duration":90.836,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=6gzU9P-5tqE","annotations":[{"segment":[0.2,6.802106283931358],"label":"Playing ten pins"},{"segment":[16.580134067082682,22.10684542277691],"label":"Playing ten pins"},{"segment":[30.751188825273008,33.160268134165364],"label":"Playing ten pins"},{"segment":[48.60671782059282,50.16553384399376],"label":"Playing ten pins"},{"segment":[77.23224843213728,79.21619609828393],"label":"Playing ten pins"}]},"z85nM9V4058":{"duration":196.63,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=z85nM9V4058","annotations":[{"segment":[11.63829294,95.2554657],"label":"Hopscotch"},{"segment":[118.3491894,194.58659906396258],"label":"Hopscotch"}]},"qiupddjDHDg":{"duration":141.108,"subset":"testing","resolution":"606x480","url":"https://www.youtube.com/watch?v=qiupddjDHDg","annotations":[]},"6B7fKzdJAbo":{"duration":28.42,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6B7fKzdJAbo","annotations":[{"segment":[8.025337539781592,13.079970023400936],"label":"Volleyball"}]},"XuFiQCgRsRo":{"duration":67.687,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XuFiQCgRsRo","annotations":[{"segment":[14.456812792511702,67.661],"label":"Using the rowing machine"}]},"txMaVvdALeM":{"duration":177.981,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=txMaVvdALeM","annotations":[{"segment":[5.274382215288612,7.772773790951638],"label":"Rafting"},{"segment":[10.548764430577224,11.10396255850234],"label":"Rafting"},{"segment":[12.769556942277692,16.93354290171607],"label":"Rafting"},{"segment":[28.03750546021841,109.38632449297972],"label":"Rafting"},{"segment":[116.32708112324494,168.5215709828393],"label":"Rafting"}]},"SLPD1U5iDjY":{"duration":129.405,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=SLPD1U5iDjY","annotations":[{"segment":[0.2,123.71028549141964],"label":"Playing beach volleyball"}]},"N84ci0Imc30":{"duration":67.2,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=N84ci0Imc30","annotations":[{"segment":[19.70877821528861,67.198547],"label":"Drinking coffee"}]},"bc-DycGxV9E":{"duration":64.691,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=bc-DycGxV9E","annotations":[{"segment":[39.740066104137355,60.86769544774005],"label":"Fun sliding down"}]},"qwxmpiaT-kk":{"duration":41.053,"subset":"validation","resolution":"400x226","url":"https://www.youtube.com/watch?v=qwxmpiaT-kk","annotations":[{"segment":[0.01,41.001],"label":"Arm wrestling"}]},"0X0u_3xUBPE":{"duration":224.77,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0X0u_3xUBPE","annotations":[]},"k2gc3YcVjwM":{"duration":54.03,"subset":"training","resolution":"274x360","url":"https://www.youtube.com/watch?v=k2gc3YcVjwM","annotations":[{"segment":[18.618129485179406,50.12573322932917],"label":"Washing dishes"}]},"rs4UF-p9V_Y":{"duration":168.531,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rs4UF-p9V_Y","annotations":[]},"CsaeiOFuij0":{"duration":51.9,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=CsaeiOFuij0","annotations":[{"segment":[0,51.896597],"label":"Playing squash"}]},"dJ0kxnyVzFI":{"duration":164.189,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=dJ0kxnyVzFI","annotations":[{"segment":[0,14.109263600850431],"label":"Running a marathon"},{"segment":[56.88531029819832,75.83996572097523],"label":"Running a marathon"},{"segment":[81.21898955716868,86.59801339336214],"label":"Running a marathon"},{"segment":[108.11410873813593,136.2899478801016],"label":"Running a marathon"}]},"AuVVP8q6tFY":{"duration":185.273,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=AuVVP8q6tFY","annotations":[{"segment":[0.01,183.36735184501973],"label":"Rollerblading"}]},"nMiXX2jqI40":{"duration":64.18,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=nMiXX2jqI40","annotations":[{"segment":[0,38.5246255850234],"label":"Scuba diving"},{"segment":[40.038065522620904,62.45938221528861],"label":"Scuba diving"}]},"AoRk69eieek":{"duration":111.13,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=AoRk69eieek","annotations":[{"segment":[18.72404946645866,94.1403598174727],"label":"Pole vault"}]},"GwLhmYF7Wdw":{"duration":191.286,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=GwLhmYF7Wdw","annotations":[{"segment":[17.905076039776283,36.10857001354884],"label":"Elliptical trainer"},{"segment":[95.79215681280311,107.13203830466144],"label":"Elliptical trainer"},{"segment":[148.01529526215063,176.3649989917964],"label":"Elliptical trainer"}]},"AAQp3iEJxJc":{"duration":237.216,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=AAQp3iEJxJc","annotations":[{"segment":[21.834147183574416,151.35874912003283],"label":"Canoeing"},{"segment":[163.94113902243163,216.49111883775353],"label":"Canoeing"}]},"Fg_VZJAtByk":{"duration":198.21,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Fg_VZJAtByk","annotations":[{"segment":[0,195.11340103120125],"label":"Playing guitarra"}]},"rmaHl5HTcwE":{"duration":73.422,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rmaHl5HTcwE","annotations":[{"segment":[5.496486739469579,73.381],"label":"Throwing darts"}]},"1YGz5CQBflM":{"duration":82.338,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=1YGz5CQBflM","annotations":[{"segment":[16.17380031201248,34.28138377535102],"label":"Arm wrestling"},{"segment":[61.37266458658346,73.69855538221529],"label":"Arm wrestling"}]},"T6KP0pg7qxM":{"duration":112.989,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=T6KP0pg7qxM","annotations":[{"segment":[35.78256593120868,39.48421068271302],"label":"Throwing darts"},{"segment":[44.06719942267078,57.11108978471139],"label":"Throwing darts"},{"segment":[81.43618358190328,94.832612049922],"label":"Throwing darts"}]},"XKA2W2735WM":{"duration":131.727,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XKA2W2735WM","annotations":[{"segment":[8.899095951508139,124.7513778829723],"label":"Drum corps"}]},"Vnj0j648Emw":{"duration":29.258,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vnj0j648Emw","annotations":[{"segment":[3.692513260530421,27.260776911076444],"label":"Sharpening knives"}]},"OMq736aZeV8":{"duration":181.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OMq736aZeV8","annotations":[{"segment":[2.263226209048362,107.5032449297972],"label":"Kayaking"},{"segment":[144.01364898595943,166.36547269890795],"label":"Kayaking"}]},"_N9LhaKnZv4":{"duration":96.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_N9LhaKnZv4","annotations":[{"segment":[54.81316098358221,76.34690279856092],"label":"Drinking beer"}]},"GtYf9VsPHO0":{"duration":118.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GtYf9VsPHO0","annotations":[{"segment":[53.2655541051574,99.89595268164467],"label":"Making a sandwich"}]},"vX0fg06e9co":{"duration":195.443,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vX0fg06e9co","annotations":[{"segment":[44.52934135929098,188.82765271624245],"label":"Hula hoop"}]},"iEHm8zPBDkI":{"duration":31.72,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=iEHm8zPBDkI","annotations":[]},"Y2UkP0rySHA":{"duration":105.466,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y2UkP0rySHA","annotations":[{"segment":[0.2,105.26469387755101],"label":"Skiing"}]},"lwqSz2Q2L-E":{"duration":50.99,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lwqSz2Q2L-E","annotations":[{"segment":[6.574130265210608,45.859539001560066],"label":"Smoking a cigarette"}]},"McfQMw15TgQ":{"duration":78.112,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=McfQMw15TgQ","annotations":[]},"8Ny9NjNpQQA":{"duration":17.81,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=8Ny9NjNpQQA","annotations":[{"segment":[1.9950701279251173,7.215850619999999],"label":"Playing water polo"},{"segment":[5.032823400000001,13.06717566],"label":"Playing polo"},{"segment":[8.36791483,15.0681949],"label":"Playing water polo"}]},"O7U80T6HruQ":{"duration":70.71000000000001,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=O7U80T6HruQ","annotations":[]},"dd1LE0m_KVg":{"duration":84.126,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dd1LE0m_KVg","annotations":[{"segment":[5.887502367749848,80.72422288388915],"label":"Drum corps"}]},"bDwGZOk7njI":{"duration":59.58,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=bDwGZOk7njI","annotations":[{"segment":[2.2308544260137753,59.117642289365044],"label":"Mowing the lawn"}]},"rWVpotNVeyM":{"duration":209.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rWVpotNVeyM","annotations":[{"segment":[32.3782511700468,203.0999391575663],"label":"Playing flauta"}]},"V3LvKGRzkeg":{"duration":115.00999999999999,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V3LvKGRzkeg","annotations":[{"segment":[8.964196567862714,110.99606708268331],"label":"Playing guitarra"}]},"PY4304sV5fY":{"duration":173.52,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PY4304sV5fY","annotations":[{"segment":[73.49666629329172,149.56503914196566],"label":"Playing piano"}]},"g0Cp28ElIMk":{"duration":98.894,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=g0Cp28ElIMk","annotations":[{"segment":[2.4684876555382216,90.71692134102965],"label":"Slacklining"}]},"2bQx5igckJc":{"duration":167.369,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=2bQx5igckJc","annotations":[]},"A80eMz7rJUM":{"duration":156.41,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A80eMz7rJUM","annotations":[{"segment":[21.47277058346334,62.4662416973479],"label":"Trimming branches or hedges"}]},"Yz7FjWlA6U4":{"duration":233.058,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Yz7FjWlA6U4","annotations":[{"segment":[76.74696234945486,125.10391255127247],"label":"Rafting"},{"segment":[141.82887277144997,231.99822352371135],"label":"Rafting"}]},"QDjaaUtepHo":{"duration":160.265,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=QDjaaUtepHo","annotations":[{"segment":[6.249648985959439,144.99185647425898],"label":"Blow-drying hair"}]},"wwh94C7NB1I":{"duration":192.865,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wwh94C7NB1I","annotations":[{"segment":[20.159050236839406,50.247184918689264],"label":"Longboarding"},{"segment":[55.963930508240736,150.74155373166926],"label":"Longboarding"}]},"PofMK_beoR8":{"duration":161.519,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=PofMK_beoR8","annotations":[{"segment":[24.93703432137286,151.90802340093603],"label":"Powerbocking"}]},"ssHXm1LqovI":{"duration":11.59,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=ssHXm1LqovI","annotations":[{"segment":[2.440268424336973,10.84563744149766],"label":"Dodgeball"}]},"LSFmrUdURCs":{"duration":162.749,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=LSFmrUdURCs","annotations":[{"segment":[30.9626864274571,153.2906770670827],"label":"Waxing skis"}]},"7AsHuXeoSpA":{"duration":14.98,"subset":"validation","resolution":"636x480","url":"https://www.youtube.com/watch?v=7AsHuXeoSpA","annotations":[{"segment":[3.434632984399376,4.976713099843994],"label":"Washing face"}]},"u-X4YO91V78":{"duration":168.832,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=u-X4YO91V78","annotations":[{"segment":[10.005737909516382,159.30187987519503],"label":"River tubing"}]},"kq7KVmvkYPc":{"duration":149.89,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=kq7KVmvkYPc","annotations":[{"segment":[9.155588143525742,141.95807020280813],"label":"Shaving"}]},"gvFJe2W3ZNU":{"duration":130.682,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gvFJe2W3ZNU","annotations":[]},"rQZIJBinOsw":{"duration":129.011,"subset":"validation","resolution":"852x480","url":"https://www.youtube.com/watch?v=rQZIJBinOsw","annotations":[{"segment":[67.99073010920436,83.0774305772231],"label":"Sharpening knives"},{"segment":[95.76511076443059,108.84227925117005],"label":"Sharpening knives"}]},"EW3zRMVjkoU":{"duration":35.759,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=EW3zRMVjkoU","annotations":[{"segment":[0.01,35.75873015873016],"label":"Beach soccer"}]},"9abGikdleAU":{"duration":135.0,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=9abGikdleAU","annotations":[{"segment":[11.583533268330731,124.68093990639625],"label":"Cumbia"}]},"l1oh52tqdPo":{"duration":34.598,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=l1oh52tqdPo","annotations":[{"segment":[13.135333333333334,27.455],"label":"Decorating the Christmas tree"}]},"CPk3EvOGcZg":{"duration":88.28,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CPk3EvOGcZg","annotations":[]},"m-BZKFg9k18":{"duration":40.33,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m-BZKFg9k18","annotations":[{"segment":[0.2,39.656583463338535],"label":"Doing step aerobics"}]},"8TqqLqBb3sk":{"duration":43.84,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=8TqqLqBb3sk","annotations":[{"segment":[15.787463343979786,31.138281100989133],"label":"Kneeling"}]},"2Py3HAe0L-8":{"duration":107.93,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2Py3HAe0L-8","annotations":[]},"fE3j74_s4KY":{"duration":226.256,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=fE3j74_s4KY","annotations":[{"segment":[18.001586806329396,166.2499487408068],"label":"Futsal"}]},"-VKGwqL83w8":{"duration":117.957,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=-VKGwqL83w8","annotations":[{"segment":[16.91608736349454,93.60597347893916],"label":"Scuba diving"}]},"WUSEdPfHPoY":{"duration":211.74,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=WUSEdPfHPoY","annotations":[{"segment":[0.01,179.28402964118564],"label":"Playing squash"}]},"JE0xYYOp5_s":{"duration":82.106,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JE0xYYOp5_s","annotations":[{"segment":[33.17481892598667,57.6019213839306],"label":"Sharpening knives"}]},"9ut_IDtfVzY":{"duration":150.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9ut_IDtfVzY","annotations":[{"segment":[0.010000000000000002,109.81901086162247],"label":"Fixing bicycle"}]},"TFdv2VQ8-Hg":{"duration":154.669,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TFdv2VQ8-Hg","annotations":[{"segment":[45.031755451713394,132.70431619937696],"label":"Surfing"}]},"o8ja3mhecQI":{"duration":70.28999999999999,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=o8ja3mhecQI","annotations":[{"segment":[27.88667336,44.40897723088923],"label":"Using uneven bars"}]},"j4vMVBSfyu0":{"duration":81.549,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j4vMVBSfyu0","annotations":[{"segment":[27.606895971076934,38.16621562821696],"label":"Throwing darts"},{"segment":[62.71981379095164,74.93300268330734],"label":"Throwing darts"}]},"zVOj9aaq4L0":{"duration":169.553,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zVOj9aaq4L0","annotations":[{"segment":[17.193273712948518,20.896440358814353],"label":"Blow-drying hair"},{"segment":[30.418868876755074,39.1477616848674],"label":"Blow-drying hair"},{"segment":[46.55409497659907,56.60554730109205],"label":"Blow-drying hair"},{"segment":[60.83773775351015,71.68272578783152],"label":"Blow-drying hair"},{"segment":[75.12138053042122,155.53299912636504],"label":"Blow-drying hair"}]},"8GqAgX-kseE":{"duration":21.386,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8GqAgX-kseE","annotations":[{"segment":[0.01,21.385578231292516],"label":"Cutting the grass"}]},"U7k6GFEOt7g":{"duration":202.734,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=U7k6GFEOt7g","annotations":[{"segment":[0.01,197.62187987519502],"label":"Hurling"}]},"FqiMsRnatP0":{"duration":45.79,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=FqiMsRnatP0","annotations":[{"segment":[13.285640902060049,29.904038903617682],"label":"Hitting a pinata"}]},"j5SX_9JKtfs":{"duration":196.465,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j5SX_9JKtfs","annotations":[{"segment":[68.04214628705148,196.46403628117915],"label":"Cleaning sink"}]},"GEhtyurCwDQ":{"duration":197.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GEhtyurCwDQ","annotations":[{"segment":[12.319254566101012,14.475124115168688],"label":"Shaving"},{"segment":[93.31835333821516,100.09394334957071],"label":"Shaving"},{"segment":[119.4967692911798,153.37471934795758],"label":"Shaving"}]},"rCwolEeHY80":{"duration":110.434,"subset":"training","resolution":"448x252","url":"https://www.youtube.com/watch?v=rCwolEeHY80","annotations":[{"segment":[17.442389013614164,103.10619175589488],"label":"Trimming branches or hedges"}]},"WUTZxlqZZ38":{"duration":202.29,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WUTZxlqZZ38","annotations":[]},"2zwUExKXw9s":{"duration":42.86,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=2zwUExKXw9s","annotations":[{"segment":[3.8095741029641186,39.76660686427457],"label":"Horseback riding"}]},"KaRFuHnwrDI":{"duration":92.114,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KaRFuHnwrDI","annotations":[]},"pPGniqIBs-U":{"duration":66.92,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pPGniqIBs-U","annotations":[{"segment":[16.584865834633387,61.74994071762871],"label":"Camel ride"}]},"ew7XlNRrKyM":{"duration":16.37,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=ew7XlNRrKyM","annotations":[{"segment":[0.17876829219968798,15.654994852855339],"label":"Playing field hockey"}]},"bDK-_jU_KzI":{"duration":126.805,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bDK-_jU_KzI","annotations":[{"segment":[39.298160436137074,109.40291900311527],"label":"Arm wrestling"}]},"f3Mse2A5edA":{"duration":116.983,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f3Mse2A5edA","annotations":[{"segment":[0.2,51.099839041180694],"label":"Playing rubik cube"},{"segment":[0.2,116.78213151927437],"label":"Playing rubik cube"},{"segment":[77.19725583463338,94.89969984399376],"label":"Playing rubik cube"},{"segment":[92.89220619344773,111.68964673946958],"label":"Playing rubik cube"}]},"mYfo8LhPB5Y":{"duration":81.34,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mYfo8LhPB5Y","annotations":[{"segment":[4.060626925757303,67.12723877847114],"label":"Playing accordion"}]},"WmxNXcXMcdw":{"duration":153.6,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=WmxNXcXMcdw","annotations":[]},"ptlSMte8xz0":{"duration":112.873,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ptlSMte8xz0","annotations":[{"segment":[0.01,112.841],"label":"Skiing"}]},"ZKkjR2VTb7Y":{"duration":36.897,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZKkjR2VTb7Y","annotations":[{"segment":[0.01,36.841],"label":"Cutting the grass"}]},"EdJgTx0iFeE":{"duration":160.799,"subset":"training","resolution":"647x480","url":"https://www.youtube.com/watch?v=EdJgTx0iFeE","annotations":[{"segment":[19.30838065522621,160.536],"label":"Hula hoop"}]},"lm_DRjAHlSs":{"duration":233.082,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=lm_DRjAHlSs","annotations":[]},"_wqhA-Ceqwc":{"duration":221.542,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=_wqhA-Ceqwc","annotations":[{"segment":[29.01616848673947,45.94226677067083],"label":"Sharpening knives"},{"segment":[188.60509516380654,198.27715132605303],"label":"Sharpening knives"}]},"43j1BAyP6Q0":{"duration":179.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=43j1BAyP6Q0","annotations":[{"segment":[0.01,174.30052108206777],"label":"Elliptical trainer"}]},"wil2yt6Ktu0":{"duration":71.75,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=wil2yt6Ktu0","annotations":[]},"MssKmHxNPmw":{"duration":27.981,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=MssKmHxNPmw","annotations":[]},"UgrOTFBD7tA":{"duration":202.55,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UgrOTFBD7tA","annotations":[{"segment":[16.58541341653666,188.75780031201248],"label":"Rock climbing"}]},"VFqkLp5mzBM":{"duration":79.16,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=VFqkLp5mzBM","annotations":[{"segment":[20.93148466146646,33.03346399375975],"label":"Playing polo"}]},"gYqXtgtyFnY":{"duration":67.872,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gYqXtgtyFnY","annotations":[{"segment":[0.01,67.871427],"label":"Putting on shoes"}]},"r09mVf2dQnk":{"duration":76.673,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=r09mVf2dQnk","annotations":[{"segment":[6.136055322344059,68.4516347615964],"label":"Cleaning sink"}]},"X7okoyw18ss":{"duration":61.67,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=X7okoyw18ss","annotations":[]},"HoQQlRzybmA":{"duration":217.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HoQQlRzybmA","annotations":[{"segment":[33.20678079875195,45.06634536973479],"label":"Slacklining"},{"segment":[50.14901590015601,55.23168643057723],"label":"Slacklining"},{"segment":[78.27312616848674,86.0665543151326],"label":"Slacklining"},{"segment":[93.18229305772232,103.00878941653667],"label":"Slacklining"},{"segment":[109.78568345709829,110.46337286115445],"label":"Slacklining"},{"segment":[138.58748312948518,151.46358180655224],"label":"Slacklining"},{"segment":[164.00083578159126,201.27375300468017],"label":"Slacklining"}]},"DLVJlI5p4g8":{"duration":192.564,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DLVJlI5p4g8","annotations":[]},"8TGG-FZx0cc":{"duration":165.65,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8TGG-FZx0cc","annotations":[{"segment":[25.58419129797192,165.651179138322],"label":"Preparing pasta"}]},"z7zj8stU-kw":{"duration":4.64,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=z7zj8stU-kw","annotations":[{"segment":[0.01,2.0358212399375977],"label":"Brushing teeth"}]},"HyTsvX-Qefo":{"duration":56.96,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=HyTsvX-Qefo","annotations":[]},"PagM71op4HU":{"duration":30.558,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=PagM71op4HU","annotations":[{"segment":[0.01,24.378708268330733],"label":"Throwing darts"}]},"nAwM_Mh4V8k":{"duration":160.32999999999998,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nAwM_Mh4V8k","annotations":[]},"F2yh5HfIj2c":{"duration":140.852,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F2yh5HfIj2c","annotations":[{"segment":[43.947644164269974,140.19298334477378],"label":"Scuba diving"}]},"ToD9rw_QoJY":{"duration":209.723,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ToD9rw_QoJY","annotations":[]},"3rqesp6F95Q":{"duration":41.482,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3rqesp6F95Q","annotations":[{"segment":[0.01,41.501],"label":"Table soccer"}]},"JR9LosQmMvg":{"duration":150.535,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=JR9LosQmMvg","annotations":[]},"5MuLI6Plf-Y":{"duration":180.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5MuLI6Plf-Y","annotations":[{"segment":[7.467459468446765,172.03335692667707],"label":"Playing guitarra"}]},"azCsE98qwus":{"duration":155.55,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=azCsE98qwus","annotations":[{"segment":[9.914196567862714,57.525501],"label":"Tai chi"},{"segment":[71.68055100000001,150.40561622464898],"label":"Tai chi"}]},"Aq2w52hpwig":{"duration":30.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Aq2w52hpwig","annotations":[{"segment":[9.987985418096724,15.102605970358814],"label":"Kayaking"}]},"jNJg1TYq3c8":{"duration":49.02,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jNJg1TYq3c8","annotations":[{"segment":[6.499957173946958,48.405563424804996],"label":"Cleaning windows"}]},"08ZbVaYzAl4":{"duration":63.02,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=08ZbVaYzAl4","annotations":[]},"QPxiOzXkUFM":{"duration":210.071,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QPxiOzXkUFM","annotations":[{"segment":[62.9229615725429,191.062951024961],"label":"Rafting"}]},"suxZhXSVNKY":{"duration":112.988,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=suxZhXSVNKY","annotations":[{"segment":[14.95951003454341,91.51700256426558],"label":"Surfing"}]},"nw8MpBwL508":{"duration":100.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nw8MpBwL508","annotations":[{"segment":[4.473373605304212,96.45221334945398],"label":"Using the balance beam"}]},"UNXLfqkwKFc":{"duration":48.51,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=UNXLfqkwKFc","annotations":[]},"pv6CNBFhgCU":{"duration":115.497,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pv6CNBFhgCU","annotations":[{"segment":[11.171225232682778,115.4960544217687],"label":"Carving jack-o-lanterns"}]},"WVaYjd1F8kg":{"duration":167.57999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WVaYjd1F8kg","annotations":[{"segment":[100.59311232449298,167.481],"label":"Preparing pasta"}]},"1Xav7gMRCOQ":{"duration":44.37,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=1Xav7gMRCOQ","annotations":[{"segment":[12.96411856474259,42.48773790951638],"label":"Doing step aerobics"}]},"0JHOEr3YdNM":{"duration":200.46,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=0JHOEr3YdNM","annotations":[{"segment":[84.03959268,161.00860374414978],"label":"Shaving legs"}]},"itXYuf_Cj_Q":{"duration":31.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=itXYuf_Cj_Q","annotations":[]},"Sul7NDmB5HM":{"duration":225.884,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Sul7NDmB5HM","annotations":[{"segment":[30.29735725429017,60.59471450858034],"label":"Kneeling"},{"segment":[70.81126521060841,184.25020748829954],"label":"Kneeling"},{"segment":[198.0072418096724,222.31773946957878],"label":"Kneeling"}]},"wNmGrltNmlo":{"duration":32.23,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=wNmGrltNmlo","annotations":[]},"q-03l2tyn8I":{"duration":87.981,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=q-03l2tyn8I","annotations":[]},"6hNV9oxC51k":{"duration":128.545,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6hNV9oxC51k","annotations":[{"segment":[6.6852548225497905,78.34157569482393],"label":"Rafting"},{"segment":[89.76451183194311,113.18656323381227],"label":"Rafting"}]},"nkWMBJC2Qec":{"duration":94.85,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=nkWMBJC2Qec","annotations":[]},"nB50V0OBto0":{"duration":162.796,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nB50V0OBto0","annotations":[{"segment":[29.206609570505268,36.82572511063707],"label":"Snowboarding"},{"segment":[42.66704702473813,49.778221528861145],"label":"Snowboarding"},{"segment":[52.82586774491388,56.38145499697539],"label":"Snowboarding"},{"segment":[57.14336655098856,85.33409404947626],"label":"Snowboarding"},{"segment":[93.46115062561685,97.01673787767837],"label":"Snowboarding"},{"segment":[98.54056098570473,100.06438345397815],"label":"Snowboarding"}]},"O9phka35v6I":{"duration":92.28,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=O9phka35v6I","annotations":[{"segment":[14.683560338331901,89.54092601872075],"label":"Playing accordion"}]},"3UgGvKnelfY":{"duration":115.566,"subset":"training","resolution":"854x478","url":"https://www.youtube.com/watch?v=3UgGvKnelfY","annotations":[{"segment":[0.01,115.5653514739229],"label":"Roof shingle removal"}]},"t5Br7yOUe4g":{"duration":95.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t5Br7yOUe4g","annotations":[{"segment":[2.8236489859594385,90.95121996879875],"label":"Using the balance beam"}]},"soDdkeamoMM":{"duration":10.578,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=soDdkeamoMM","annotations":[{"segment":[0.2,10.154261637310093],"label":"Swinging at the playground"}]},"JqHxPA5deHs":{"duration":17.346,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=JqHxPA5deHs","annotations":[]},"hShYj_OGFtc":{"duration":191.82,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=hShYj_OGFtc","annotations":[{"segment":[0,180.4305819032761],"label":"Plastering"}]},"iPPfX25MUQ0":{"duration":112.989,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iPPfX25MUQ0","annotations":[{"segment":[20.438620904836196,95.16215288611544],"label":"Camel ride"}]},"mqa1ptnQWWo":{"duration":133.608,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mqa1ptnQWWo","annotations":[{"segment":[71.88550195787833,90.42987782527301],"label":"Fixing the roof"}]},"txyXUXWybt4":{"duration":85.891,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=txyXUXWybt4","annotations":[{"segment":[11.858532095943838,30.215807769890798],"label":"Sharpening knives"},{"segment":[43.34728963884556,48.57308344383776],"label":"Sharpening knives"},{"segment":[52.99490897113885,58.756681627925126],"label":"Sharpening knives"},{"segment":[62.91051772932917,77.51594144071763],"label":"Sharpening knives"}]},"r8DXz1FOb90":{"duration":128.128,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=r8DXz1FOb90","annotations":[{"segment":[2.398647096904284,100.3434032199688],"label":"Disc dog"},{"segment":[107.53934448673948,123.13055056474259],"label":"Disc dog"}]},"p1JSdZmztGk":{"duration":224.14,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p1JSdZmztGk","annotations":[{"segment":[12.87919932,222.74324729017164],"label":"Sailing"}]},"egNXVIVrVY0":{"duration":195.861,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=egNXVIVrVY0","annotations":[]},"rtFW05uRr7Q":{"duration":29.28,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=rtFW05uRr7Q","annotations":[]},"4QRRxub6L5Q":{"duration":8.29,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=4QRRxub6L5Q","annotations":[]},"sQo4gMcgfT4":{"duration":193.028,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sQo4gMcgfT4","annotations":[{"segment":[22.514865130830717,131.7870327972752],"label":"Riding bumper cars"}]},"8z8FprjMNbI":{"duration":332.417,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=8z8FprjMNbI","annotations":[{"segment":[64.03523166926676,252.77065132605304],"label":"Removing ice from car"}]},"IIAg_MFuCoY":{"duration":200.46,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=IIAg_MFuCoY","annotations":[{"segment":[0,200.401],"label":"Cumbia"}]},"7SJf9PysNv8":{"duration":130.079,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7SJf9PysNv8","annotations":[]},"tbKBKWCh6rs":{"duration":40.91,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=tbKBKWCh6rs","annotations":[{"segment":[4.08497312948518,34.08399454914197],"label":"Washing face"}]},"1-nEQf-TJPE":{"duration":55.64,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=1-nEQf-TJPE","annotations":[{"segment":[9.00066176,11.151960319999999],"label":"Mixing drinks"},{"segment":[9.00066176,11.151960319999999],"label":"Mixing drinks"}]},"Te1d8H4VWYw":{"duration":15.47,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=Te1d8H4VWYw","annotations":[{"segment":[0.48561756000000006,11.966280886115445],"label":"Playing water polo"}]},"PrR-kkpy1c8":{"duration":223.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PrR-kkpy1c8","annotations":[{"segment":[51.92893597376548,208.4127751076443],"label":"Rafting"}]},"0KwSF8NdEug":{"duration":217.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0KwSF8NdEug","annotations":[{"segment":[142.13364641391345,190.98148670890996],"label":"Baking cookies"}]},"-1IBHYS3L-Y":{"duration":252.819,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-1IBHYS3L-Y","annotations":[{"segment":[0,184.9797935803432],"label":"Removing ice from car"}]},"XqxJsWQqKRk":{"duration":173.291,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XqxJsWQqKRk","annotations":[{"segment":[42.45146530757578,55.1378280053951],"label":"Making a cake"},{"segment":[59.9964349960493,70.25349419854152],"label":"Making a cake"},{"segment":[81.05039862221751,128.8267006969839],"label":"Making a cake"}]},"ISHKwbnOzXY":{"duration":192.076,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ISHKwbnOzXY","annotations":[{"segment":[4.343688767550701,184.98122854914195],"label":"Doing fencing"}]},"cXY-ONmtylc":{"duration":115.055,"subset":"validation","resolution":"638x360","url":"https://www.youtube.com/watch?v=cXY-ONmtylc","annotations":[{"segment":[0.01,3.654873170573392],"label":"Plastering"},{"segment":[5.983958840355323,14.583659774934764],"label":"Plastering"},{"segment":[22.466718964965914,37.69535603661701],"label":"Plastering"},{"segment":[53.461474416679316,78.72309591200641],"label":"Plastering"}]},"MaJlWFemO68":{"duration":15.67,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=MaJlWFemO68","annotations":[{"segment":[0.3667738502340094,14.817663549453977],"label":"Brushing hair"}]},"brJIpRGcBFU":{"duration":145.775,"subset":"testing","resolution":"400x224","url":"https://www.youtube.com/watch?v=brJIpRGcBFU","annotations":[]},"ulJxGEx2r7o":{"duration":217.71,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ulJxGEx2r7o","annotations":[{"segment":[15.691629397761812,23.830313285612277],"label":"Playing flauta"},{"segment":[31.290773516141872,56.04593700835371],"label":"Playing flauta"},{"segment":[81.81843598654686,121.83363176847831],"label":"Playing flauta"},{"segment":[126.58119736972442,178.8044189834316],"label":"Playing flauta"},{"segment":[183.55198458467768,185.58665555664032],"label":"Playing flauta"},{"segment":[191.0124448152073,209.32448356287085],"label":"Playing flauta"}]},"b0dgV_N6cNQ":{"duration":211.766,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=b0dgV_N6cNQ","annotations":[]},"xKLnBh0zmL4":{"duration":100.45,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xKLnBh0zmL4","annotations":[{"segment":[1.723782779882624,91.8306099101107],"label":"Using parallel bars"}]},"7tlXgKBTD_0":{"duration":51.967,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7tlXgKBTD_0","annotations":[{"segment":[16.119,51.941],"label":"Doing crunches"}]},"Iib8Qt_9HQc":{"duration":113.825,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Iib8Qt_9HQc","annotations":[{"segment":[61.594394695787834,102.43865366614665],"label":"Using the rowing machine"}]},"-l16smV_uYg":{"duration":157.85,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=-l16smV_uYg","annotations":[{"segment":[64.75339001560062,68.20033853354134],"label":"Removing curlers"},{"segment":[106.36298283931357,108.33266770670826],"label":"Removing curlers"}]},"UxIXOCccW24":{"duration":63.623,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UxIXOCccW24","annotations":[{"segment":[16.82092318692852,18.10722919314936],"label":"Sumo"},{"segment":[55.311156757690576,56.89430261150084],"label":"Sumo"}]},"V--Xz2FtJXA":{"duration":199.62,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=V--Xz2FtJXA","annotations":[{"segment":[64.13048860530422,185.2312656318253],"label":"Getting a haircut"}]},"5g3D9uF11a8":{"duration":87.749,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=5g3D9uF11a8","annotations":[]},"aUCdj7acYos":{"duration":24.265,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aUCdj7acYos","annotations":[{"segment":[13.001727015576323,21.241193554517135],"label":"Hitting a pinata"}]},"JviFZ5Cliug":{"duration":182.904,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JviFZ5Cliug","annotations":[{"segment":[68.48184037441497,71.62059139157567],"label":"Elliptical trainer"},{"segment":[76.18604741653667,80.18082143837754],"label":"Elliptical trainer"},{"segment":[93.02116650858035,97.30128153198127],"label":"Elliptical trainer"},{"segment":[101.58139655538221,105.86151157878315],"label":"Elliptical trainer"},{"segment":[111.28299060842434,116.70446963806553],"label":"Elliptical trainer"}]},"OPqqbxGKp6E":{"duration":138.786,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OPqqbxGKp6E","annotations":[{"segment":[0.01,96.34826625585023],"label":"Making an omelette"}]},"Y_dtU10XIsg":{"duration":66.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y_dtU10XIsg","annotations":[{"segment":[9.009496879875195,66.001],"label":"Shoveling snow"}]},"bjtjeUcoxkg":{"duration":121.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bjtjeUcoxkg","annotations":[{"segment":[19.283719478939158,30.24897173166927],"label":"Shaving"},{"segment":[45.56251367082683,54.070036970358814],"label":"Shaving"},{"segment":[83.94089655538222,96.79670954134166],"label":"Shaving"}]},"KFIxTdJtXAE":{"duration":238.539,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KFIxTdJtXAE","annotations":[{"segment":[0,215.4656858437603],"label":"Rafting"}]},"EhzHU0gkrzA":{"duration":24.68,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=EhzHU0gkrzA","annotations":[]},"Iwgbi95VjXU":{"duration":150.884,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Iwgbi95VjXU","annotations":[{"segment":[27.065546021840873,145.21253822152886],"label":"Building sandcastles"}]},"HwdYYnxEde8":{"duration":122.21,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HwdYYnxEde8","annotations":[]},"kA2P40yJxms":{"duration":197.86,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kA2P40yJxms","annotations":[]},"-MldnTjJ-zE":{"duration":178.75,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=-MldnTjJ-zE","annotations":[{"segment":[21.047303432137287,31.083103744149767],"label":"Grooming horse"},{"segment":[85.16491653666148,157.08815210608427],"label":"Grooming horse"}]},"byVhCu3KH1g":{"duration":78.41,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=byVhCu3KH1g","annotations":[{"segment":[37.43310173166927,64.71278044461779],"label":"Polishing shoes"}]},"PKEw32TJRWs":{"duration":230.946,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PKEw32TJRWs","annotations":[{"segment":[16.207090483619343,17.293903445933758],"label":"Waterskiing"},{"segment":[30.264331030384074,43.23475861483439],"label":"Waterskiing"},{"segment":[47.197944821194206,86.10922757454516],"label":"Waterskiing"},{"segment":[109.52805515758045,165.01266204661792],"label":"Waterskiing"},{"segment":[175.46106057566303,209.68857752574104],"label":"Waterskiing"}]},"e-Xf13exY4E":{"duration":93.066,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=e-Xf13exY4E","annotations":[]},"g49F9coR2VU":{"duration":76.19,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=g49F9coR2VU","annotations":[{"segment":[0.01,66.20069757075998],"label":"Doing kickboxing"}]},"FsS_NCZEfaI":{"duration":212.742,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FsS_NCZEfaI","annotations":[{"segment":[42.13804524180968,58.39603120124806],"label":"Scuba diving"},{"segment":[97.216120124805,103.5202371294852],"label":"Scuba diving"},{"segment":[108.16537597503901,116.12847113884555],"label":"Scuba diving"},{"segment":[189.12351014040564,205.7326365054602],"label":"Scuba diving"}]},"sY31L_r7dsk":{"duration":55.61,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=sY31L_r7dsk","annotations":[{"segment":[13.924636965678626,51.75106510920437],"label":"Preparing salad"}]},"j3P7ttoKGeY":{"duration":94.691,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j3P7ttoKGeY","annotations":[{"segment":[7.068666091183727,23.4149616233935],"label":"Table soccer"},{"segment":[37.257770452472045,63.47074887349313],"label":"Table soccer"}]},"AahkhvTAXwg":{"duration":106.162,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AahkhvTAXwg","annotations":[{"segment":[6.125549141965679,100.67664274570984],"label":"Baton twirling"}]},"wVNZHyJySiU":{"duration":151.534,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wVNZHyJySiU","annotations":[]},"7lwrP2ma2TU":{"duration":115.43,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=7lwrP2ma2TU","annotations":[{"segment":[18.907543783151326,62.84507409828393],"label":"Archery"},{"segment":[72.74902560374414,99.03951505460219],"label":"Archery"}]},"9Pun6fgAg8Q":{"duration":89.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9Pun6fgAg8Q","annotations":[{"segment":[4.686550702028081,19.935327613104526],"label":"Volleyball"},{"segment":[65.68165834633385,85.12734633385337],"label":"Volleyball"}]},"Wgz_qspJAbI":{"duration":75.14,"subset":"testing","resolution":"370x208","url":"https://www.youtube.com/watch?v=Wgz_qspJAbI","annotations":[]},"jcqRPdvb18w":{"duration":61.069,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jcqRPdvb18w","annotations":[{"segment":[37.18766228982425,48.39461392590513],"label":"Hula hoop"}]},"KaVu94VRRvE":{"duration":102.168,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=KaVu94VRRvE","annotations":[]},"6B3qNzgqnpc":{"duration":36.25,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6B3qNzgqnpc","annotations":[{"segment":[0,5.846216068642746],"label":"Kayaking"},{"segment":[18.047014820592825,23.582562402496098],"label":"Kayaking"}]},"PBZScfP9ynI":{"duration":10.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PBZScfP9ynI","annotations":[{"segment":[0.8188837499999999,9.838467238689548],"label":"Doing karate"}]},"2fEQZstAbws":{"duration":106.77000000000001,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2fEQZstAbws","annotations":[]},"TZ9JbvClnZA":{"duration":14.165,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TZ9JbvClnZA","annotations":[{"segment":[0,13.61174623400936],"label":"Playing ten pins"}]},"l9_VsufvmsU":{"duration":200.53,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=l9_VsufvmsU","annotations":[]},"JXMD8Obk0yg":{"duration":63.066,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=JXMD8Obk0yg","annotations":[{"segment":[19.677190639625586,56.27676522932917],"label":"Arm wrestling"}]},"ZhZ12nMnxaQ":{"duration":93.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZhZ12nMnxaQ","annotations":[]},"BQ_BJNFGmTg":{"duration":65.318,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=BQ_BJNFGmTg","annotations":[{"segment":[0.2,63.29990046656298],"label":"Playing drums"}]},"Eh0OoXQDLVc":{"duration":111.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Eh0OoXQDLVc","annotations":[{"segment":[16.87957659,102.2167511700468],"label":"Cheerleading"}]},"Lan3mtnCmlw":{"duration":53.267,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Lan3mtnCmlw","annotations":[{"segment":[3.155067082683307,51.33063650546022],"label":"Scuba diving"}]},"Ht9WSqhFD34":{"duration":89.65,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ht9WSqhFD34","annotations":[{"segment":[9.115025845472536,80.33409719456137],"label":"Archery"}]},"eJnSC8C3OIw":{"duration":233.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eJnSC8C3OIw","annotations":[{"segment":[21.585772377197053,219.3339578071036],"label":"Skateboarding"}]},"2cQVuBiTm8w":{"duration":45.65,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2cQVuBiTm8w","annotations":[]},"bOUtD3leN0E":{"duration":222.66,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=bOUtD3leN0E","annotations":[{"segment":[211.8880621528861,217.0984243369735],"label":"Removing curlers"}]},"F12ga-do744":{"duration":56.54,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F12ga-do744","annotations":[{"segment":[17.82525698395921,29.01007766197081],"label":"Clean and jerk"},{"segment":[41.25173137461625,54.10987155218634],"label":"Clean and jerk"}]},"iHQLWtllKVw":{"duration":138.438,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iHQLWtllKVw","annotations":[]},"NU28ZSvyWGc":{"duration":85.357,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NU28ZSvyWGc","annotations":[]},"4aiE_-yQWZE":{"duration":181.999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4aiE_-yQWZE","annotations":[{"segment":[35.82534373465803,38.6596272237546],"label":"Doing fencing"},{"segment":[40.6436256661222,58.49961164743061],"label":"Doing fencing"},{"segment":[97.89615214587297,157.41610541690102],"label":"Doing fencing"}]},"F99Suh6SvD8":{"duration":126.967,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F99Suh6SvD8","annotations":[{"segment":[9.108215288611545,119.41563650546021],"label":"Doing fencing"}]},"836UrVGw6GI":{"duration":225.466,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=836UrVGw6GI","annotations":[]},"UcVbSLmILaY":{"duration":69.359,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=UcVbSLmILaY","annotations":[{"segment":[0.01,0.02],"label":"Putting on shoes"}]},"kfiF8A8g7UE":{"duration":37.2,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kfiF8A8g7UE","annotations":[{"segment":[6.9757409400000014,32.96709948],"label":"Using uneven bars"}]},"t04jE7d0Obw":{"duration":175.17000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=t04jE7d0Obw","annotations":[{"segment":[21.990146822152884,169.5016907223089],"label":"Tango"}]},"jkrlHzTRTvo":{"duration":199.0,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jkrlHzTRTvo","annotations":[]},"wEn3nAJHhtw":{"duration":159.777,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wEn3nAJHhtw","annotations":[{"segment":[0.01,159.721],"label":"Disc dog"}]},"X2GUUIGudxs":{"duration":188.036,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X2GUUIGudxs","annotations":[{"segment":[23.394664094720262,173.12054285887496],"label":"Hanging wallpaper"}]},"517rgTzIobE":{"duration":87.42,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=517rgTzIobE","annotations":[{"segment":[6.546470107956319,82.51280031903276],"label":"Playing squash"}]},"dyobp97TugE":{"duration":181.139,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=dyobp97TugE","annotations":[{"segment":[29.389148899289307,46.62701508060322],"label":"Disc dog"},{"segment":[61.886765470618826,89.01521060842434],"label":"Disc dog"}]},"vzrZJX-Slzg":{"duration":124.901,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vzrZJX-Slzg","annotations":[{"segment":[22.362484423676012,104.82878348909658],"label":"Welding"}]},"zyPisOj2Tdo":{"duration":93.02,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zyPisOj2Tdo","annotations":[{"segment":[0.01,92.961],"label":"Baton twirling"}]},"Uofnmkfohkc":{"duration":132.1,"subset":"training","resolution":"400x300","url":"https://www.youtube.com/watch?v=Uofnmkfohkc","annotations":[{"segment":[30.8250683,63.712186380000006],"label":"Skateboarding"},{"segment":[72.38045714,105.96505266000001],"label":"Skateboarding"}]},"0_1BQPWzRiw":{"duration":187.13,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0_1BQPWzRiw","annotations":[]},"sGFbsMKkoYs":{"duration":8.36,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sGFbsMKkoYs","annotations":[{"segment":[0.9726532400000001,7.56865655],"label":"Clean and jerk"}]},"RZ8r5B69CLY":{"duration":104.72,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=RZ8r5B69CLY","annotations":[{"segment":[11.191361154446177,25.241829173166927],"label":"Volleyball"},{"segment":[28.3460023400936,76.37899765990639],"label":"Volleyball"},{"segment":[80.13668096723869,99.25185257410297],"label":"Volleyball"}]},"LtQTtCplKz0":{"duration":194.38,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LtQTtCplKz0","annotations":[{"segment":[39.70477698,107.59656786271451],"label":"Triple jump"}]},"fZQS02Ypca4":{"duration":170.99,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fZQS02Ypca4","annotations":[{"segment":[16.00546764727732,20.00683455909665],"label":"Putting on makeup"},{"segment":[28.809841765099176,43.748278235891334],"label":"Putting on makeup"},{"segment":[50.417223088923556,58.15319911844093],"label":"Putting on makeup"},{"segment":[74.15866676571824,82.69491617759948],"label":"Putting on makeup"},{"segment":[97.36659485427036,102.96850853081742],"label":"Putting on makeup"},{"segment":[131.24483470767404,147.2503023276131],"label":"Putting on makeup"}]},"IAuKFVx94Ec":{"duration":159.92000000000002,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IAuKFVx94Ec","annotations":[]},"lx83HHNNrlc":{"duration":131.402,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=lx83HHNNrlc","annotations":[{"segment":[8.607117004680187,121.11443213728548],"label":"Mooping floor"}]},"smJtFktW640":{"duration":207.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=smJtFktW640","annotations":[{"segment":[0,200.51559765990638],"label":"Zumba"}]},"VN-BUL75x5Y":{"duration":117.192,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VN-BUL75x5Y","annotations":[{"segment":[35.62354914196568,63.40247581903275],"label":"Grooming dog"}]},"f6JZ3LWJRVE":{"duration":106.74000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=f6JZ3LWJRVE","annotations":[{"segment":[66.0348790951638,71.1977574102964],"label":"Archery"}]},"RjBXzs2XvbY":{"duration":225.582,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RjBXzs2XvbY","annotations":[{"segment":[0.01,225.33021455474807],"label":"Sharpening knives"}]},"JLA4Ck8_BRI":{"duration":68.13,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=JLA4Ck8_BRI","annotations":[{"segment":[3.507336014040562,63.238331162246496],"label":"Mixing drinks"}]},"8L1xXJvKuv4":{"duration":76.7,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=8L1xXJvKuv4","annotations":[{"segment":[0.2,76.49551020408163],"label":"Playing water polo"}]},"bj4nkWPdqIY":{"duration":191.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bj4nkWPdqIY","annotations":[{"segment":[16.729376839999997,28.235155226209045],"label":"Triple jump"},{"segment":[30.027863494539776,58.418510920436816],"label":"Triple jump"},{"segment":[62.900749609984395,108.32076833073323],"label":"Triple jump"},{"segment":[139.69643915756632,165.39460764430578],"label":"Triple jump"},{"segment":[167.1875031201248,188.4034329173167],"label":"Triple jump"}]},"GV_BDNmUiLY":{"duration":124.85,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GV_BDNmUiLY","annotations":[{"segment":[21.0273135725429,76.33359126365055],"label":"Sailing"}]},"ePAc9xK9fac":{"duration":125.44,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ePAc9xK9fac","annotations":[{"segment":[8.318065522620905,15.75539469578783],"label":"Rock climbing"},{"segment":[26.911388455538223,34.935875195007796],"label":"Rock climbing"},{"segment":[49.6148143525741,51.76772542901716],"label":"Rock climbing"},{"segment":[68.40385647425897,107.74341341653667],"label":"Rock climbing"}]},"Gdf7ZWhh9Nw":{"duration":128.11,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Gdf7ZWhh9Nw","annotations":[{"segment":[28.568990639625586,101.23162392],"label":"Grooming horse"}]},"fAxUvb5qcvg":{"duration":30.7,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=fAxUvb5qcvg","annotations":[]},"V4srMOGRlU8":{"duration":128.221,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=V4srMOGRlU8","annotations":[{"segment":[19.40311550078003,113.01814698907955],"label":"River tubing"}]},"1AZxtWKkRlo":{"duration":193.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1AZxtWKkRlo","annotations":[{"segment":[3.3156448143525745,182.05904253354134],"label":"Cheerleading"}]},"QX3pwZOJbRs":{"duration":151.743,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=QX3pwZOJbRs","annotations":[{"segment":[46.307595015576325,140.59516355140187],"label":"Grooming dog"}]},"kPn9NzU4W48":{"duration":162.03,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kPn9NzU4W48","annotations":[]},"V2QIntSKqhA":{"duration":39.73,"subset":"training","resolution":"400x218","url":"https://www.youtube.com/watch?v=V2QIntSKqhA","annotations":[{"segment":[0.2,39.701],"label":"Camel ride"}]},"sq1wlclMvY8":{"duration":77.97,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sq1wlclMvY8","annotations":[]},"mBAlD4A8swo":{"duration":13.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mBAlD4A8swo","annotations":[{"segment":[0.01,13.322197883833722],"label":"Spinning"}]},"nDbLtdY66dA":{"duration":94.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nDbLtdY66dA","annotations":[{"segment":[11.07381891575663,89.91940959594383],"label":"Playing saxophone"}]},"E3UJv-NC1E8":{"duration":33.066,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=E3UJv-NC1E8","annotations":[{"segment":[13.247327613104522,17.88646957878315],"label":"Doing a powerbomb"}]},"92fD8Cy2zL0":{"duration":203.69,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=92fD8Cy2zL0","annotations":[{"segment":[88.0978362,178.25900145],"label":"Doing nails"}]},"eDMv3IHGpnA":{"duration":213.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eDMv3IHGpnA","annotations":[{"segment":[19.278989815374928,208.08013136037442],"label":"Breakdancing"}]},"ZdeEZk6PulE":{"duration":54.335,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=ZdeEZk6PulE","annotations":[]},"esQnfUcBbPE":{"duration":106.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=esQnfUcBbPE","annotations":[{"segment":[7.7298001769383875,106.18479488254586],"label":"Bullfighting"}]},"6fWXqCWuU9Y":{"duration":193.26,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=6fWXqCWuU9Y","annotations":[{"segment":[86.5152139789572,113.60766080138711],"label":"Playing beach volleyball"}]},"fZc3tkvUJe0":{"duration":107.068,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=fZc3tkvUJe0","annotations":[{"segment":[17.5241887675507,102.6608658346334],"label":"Sharpening knives"}]},"a25vC5zsf6A":{"duration":176.45,"subset":"training","resolution":"480x320","url":"https://www.youtube.com/watch?v=a25vC5zsf6A","annotations":[{"segment":[64.13804281150837,91.11455867214279],"label":"Fixing bicycle"},{"segment":[105.03420095999999,143.40105408000002],"label":"Fixing bicycle"}]},"RlRrktN5iH4":{"duration":65.341,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RlRrktN5iH4","annotations":[]},"6TxGVSw6Ayw":{"duration":70.101,"subset":"training","resolution":"480x270","url":"https://www.youtube.com/watch?v=6TxGVSw6Ayw","annotations":[{"segment":[9.951942153876631,60.367824000000006],"label":"Playing ice hockey"}]},"K9cQxJ37xzA":{"duration":106.7,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K9cQxJ37xzA","annotations":[{"segment":[14.974024914196566,35.27214757566302],"label":"Spinning"},{"segment":[46.419477234009356,71.87531958814353],"label":"Spinning"},{"segment":[86.01745422932916,93.00533252262089],"label":"Spinning"}]},"irnvsfchQX8":{"duration":38.151,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=irnvsfchQX8","annotations":[{"segment":[8.153826539456137,30.591727575663025],"label":"Canoeing"}]},"cdEBDjnH9xk":{"duration":62.044,"subset":"training","resolution":"540x360","url":"https://www.youtube.com/watch?v=cdEBDjnH9xk","annotations":[{"segment":[0.01,57.107196240249614],"label":"Roof shingle removal"}]},"erxnWYOO1ew":{"duration":6.943,"subset":"training","resolution":"293x240","url":"https://www.youtube.com/watch?v=erxnWYOO1ew","annotations":[{"segment":[0.01,6.882205072576041],"label":"Powerbocking"}]},"bmWICdhvyJw":{"duration":74.84,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bmWICdhvyJw","annotations":[{"segment":[0.2335036255850234,73.7871456848674],"label":"Using the balance beam"}]},"ciEBsDNryN8":{"duration":119.56,"subset":"training","resolution":"1280x622","url":"https://www.youtube.com/watch?v=ciEBsDNryN8","annotations":[{"segment":[9.69905825273011,113.21785306552262],"label":"Playing accordion"}]},"LmEUkoYh0nQ":{"duration":138.159,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=LmEUkoYh0nQ","annotations":[{"segment":[18.281752336448594,112.93306074766356],"label":"Carving jack-o-lanterns"}]},"yUC6HXP9S8A":{"duration":231.48,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=yUC6HXP9S8A","annotations":[{"segment":[4.830700550079346,217.52570055007936],"label":"Ice fishing"}]},"52tZGZGuWP4":{"duration":78.159,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=52tZGZGuWP4","annotations":[{"segment":[0.010000000000000002,0.020000000000000004],"label":"Playing congas"}]},"gc04Okw4DTs":{"duration":170.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gc04Okw4DTs","annotations":[{"segment":[20.72516794549333,28.430678879875195],"label":"Painting"},{"segment":[43.575993797191884,159.15866027145086],"label":"Painting"}]},"IuntoXkEWPI":{"duration":29.513,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=IuntoXkEWPI","annotations":[{"segment":[23.548006240249613,29.261],"label":"Fun sliding down"}]},"sDLIA6K6Uq4":{"duration":147.099,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sDLIA6K6Uq4","annotations":[]},"P5HJQrDE9b8":{"duration":215.551,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P5HJQrDE9b8","annotations":[]},"JN3o_6Rx-8g":{"duration":45.163,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JN3o_6Rx-8g","annotations":[]},"ZBZUrC4M5E8":{"duration":119.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZBZUrC4M5E8","annotations":[{"segment":[6.81058112324493,87.1604994],"label":"Shot put"}]},"EgPk-mmJyS8":{"duration":81.15,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=EgPk-mmJyS8","annotations":[{"segment":[17.471476328440893,37.60165534633385],"label":"Ironing clothes"},{"segment":[47.60344245865834,62.7960304773791],"label":"Ironing clothes"},{"segment":[66.59417748205928,73.55744699063962],"label":"Ironing clothes"},{"segment":[78.49503809672386,81.153741],"label":"Ironing clothes"}]},"Omvg9UtGOes":{"duration":62.23,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Omvg9UtGOes","annotations":[]},"po_mfZ5LUVo":{"duration":145.915,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=po_mfZ5LUVo","annotations":[{"segment":[12.29227087675507,132.02809460218407],"label":"Swimming"}]},"o18RrUlR-0Y":{"duration":135.26,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=o18RrUlR-0Y","annotations":[{"segment":[0,1.6880653457430814],"label":"Shaving"},{"segment":[49.37591136298513,62.45841779249401],"label":"Shaving"},{"segment":[82.08217743675733,119.64162976287052],"label":"Shaving"}]},"aDaazrgvjJg":{"duration":23.29,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aDaazrgvjJg","annotations":[{"segment":[1.3995140405616224,21.937836973478937],"label":"Clean and jerk"}]},"svG8RyP-OlU":{"duration":149.51,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=svG8RyP-OlU","annotations":[{"segment":[0.6997501777622125,45.950261673051955],"label":"Shoveling snow"},{"segment":[103.79627636806153,149.31328798185942],"label":"Shoveling snow"}]},"xSMmdsV65Bc":{"duration":176.402,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=xSMmdsV65Bc","annotations":[{"segment":[24.759266770670827,122.69592199687987],"label":"Plastering"}]},"ElJJgJj9Gm0":{"duration":84.289,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ElJJgJj9Gm0","annotations":[]},"E9y9fCPKNf0":{"duration":42.84,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E9y9fCPKNf0","annotations":[]},"G9ynahMxUz8":{"duration":98.59,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=G9ynahMxUz8","annotations":[{"segment":[18.918580130960343,97.82290103588143],"label":"Playing kickball"}]},"Br1Ty6PCrv8":{"duration":42.121,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Br1Ty6PCrv8","annotations":[{"segment":[5.568093568492716,40.41781320261991],"label":"Fun sliding down"}]},"Z3noRsTXGt4":{"duration":19.087,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Z3noRsTXGt4","annotations":[{"segment":[0.2,18.3384992199688],"label":"Elliptical trainer"}]},"FNAt8Pew0HA":{"duration":35.991,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=FNAt8Pew0HA","annotations":[{"segment":[4.9971803307332285,27.624862053042122],"label":"Peeling potatoes"}]},"r1v5fEOypSM":{"duration":86.634,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=r1v5fEOypSM","annotations":[{"segment":[0,84.03404368174726],"label":"Wakeboarding"}]},"xqRnZiSR0_w":{"duration":22.965,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xqRnZiSR0_w","annotations":[]},"C_bwHYiX-Vw":{"duration":216.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C_bwHYiX-Vw","annotations":[{"segment":[95.62000468018721,209.82340249609985],"label":"Fixing bicycle"}]},"7vgokK5_Pvc":{"duration":93.414,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=7vgokK5_Pvc","annotations":[{"segment":[0.19999999999999998,20.96424336973479],"label":"Swinging at the playground"}]},"M57ZC-IJKCo":{"duration":110.37,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M57ZC-IJKCo","annotations":[]},"0wpStx2JuS4":{"duration":69.196,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0wpStx2JuS4","annotations":[{"segment":[0.2,67.75835881435258],"label":"Playing congas"}]},"lnOZvhRNC8E":{"duration":170.899,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=lnOZvhRNC8E","annotations":[]},"JV4ss42WZG8":{"duration":65.829,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JV4ss42WZG8","annotations":[{"segment":[5.4377255850234,10.577699569422778],"label":"Making a lemonade"},{"segment":[21.463487475819033,52.6831056224649],"label":"Making a lemonade"}]},"SLdf2ZUdgEQ":{"duration":165.837,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=SLdf2ZUdgEQ","annotations":[{"segment":[0.2,0.25871537129485184],"label":"Playing ten pins"}]},"AWAMhmc08Cw":{"duration":140.76,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AWAMhmc08Cw","annotations":[{"segment":[27.229580555382217,73.7833801352054],"label":"Washing face"}]},"_crwKCjKRjg":{"duration":190.404,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=_crwKCjKRjg","annotations":[{"segment":[16.92428705148206,181.11956318252732],"label":"Riding bumper cars"}]},"j66O6PR3SFk":{"duration":67.71,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=j66O6PR3SFk","annotations":[]},"q9T1GR6s0vI":{"duration":120.047,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=q9T1GR6s0vI","annotations":[{"segment":[1.1236864024960997,117.42522906084244],"label":"Sumo"}]},"YVxuIAwOyoE":{"duration":177.77,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YVxuIAwOyoE","annotations":[{"segment":[23.71391185647426,29.81573712948518],"label":"Cricket"},{"segment":[42.85145475819033,51.72683697347893],"label":"Cricket"},{"segment":[59.77015210608424,68.09082293291732],"label":"Cricket"},{"segment":[80.57182917316692,86.67365444617785],"label":"Cricket"},{"segment":[99.43201638065523,105.53384165366614],"label":"Cricket"},{"segment":[115.7960023400936,119.12427067082683],"label":"Cricket"},{"segment":[131.8826326053042,138.53916926677067],"label":"Cricket"},{"segment":[148.52397425897036,162.66911466458657],"label":"Cricket"}]},"vPh9o_BuJaU":{"duration":18.367,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vPh9o_BuJaU","annotations":[{"segment":[3.1760546021840876,9.44232449297972],"label":"Putting on shoes"}]},"RAQs7EkXLtE":{"duration":215.71,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=RAQs7EkXLtE","annotations":[{"segment":[33.57788558,201.74753591504205],"label":"Belly dance"}]},"uqaSFllHrco":{"duration":141.015,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uqaSFllHrco","annotations":[{"segment":[19.13929192043682,123.4154341076443],"label":"Wakeboarding"}]},"EXvyNKaFkaU":{"duration":130.89,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EXvyNKaFkaU","annotations":[]},"nT1-fINd4nA":{"duration":18.716,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=nT1-fINd4nA","annotations":[{"segment":[0.2,17.66582693457944],"label":"Blowing leaves"}]},"7-jcXxwqf5E":{"duration":216.016,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7-jcXxwqf5E","annotations":[{"segment":[16.483004672897195,179.31132866043615],"label":"Ice fishing"}]},"JRs2MpyP0SQ":{"duration":106.209,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=JRs2MpyP0SQ","annotations":[{"segment":[20.062527262008395,27.341585087553568],"label":"Playing ten pins"},{"segment":[30.48481460312989,31.477413397522415],"label":"Playing ten pins"},{"segment":[41.237968209048894,45.04293025422023],"label":"Playing ten pins"},{"segment":[83.5888501031299,88.05554467789624],"label":"Playing ten pins"}]},"UoGoOznvKew":{"duration":142.966,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=UoGoOznvKew","annotations":[{"segment":[0.01,136.35057417151194],"label":"Putting in contact lenses"}]},"sRMrtzg0nwU":{"duration":92.23,"subset":"training","resolution":"853x470","url":"https://www.youtube.com/watch?v=sRMrtzg0nwU","annotations":[{"segment":[0.01,83.30884864586584],"label":"Knitting"}]},"8twDb4CdhE0":{"duration":35.991,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8twDb4CdhE0","annotations":[{"segment":[0,21.430736349453976],"label":"Having an ice cream"},{"segment":[26.816471138845554,28.443411856474256],"label":"Having an ice cream"}]},"5a7Rv4LXvJ0":{"duration":102.818,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=5a7Rv4LXvJ0","annotations":[]},"Ti14bsS3RtU":{"duration":211.093,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ti14bsS3RtU","annotations":[{"segment":[31.277527301092046,176.81709360374415],"label":"Fixing the roof"}]},"d83YCo6xLkk":{"duration":25.333,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d83YCo6xLkk","annotations":[{"segment":[0.01,25.308930727499202],"label":"Building sandcastles"}]},"k3yRK68SEDE":{"duration":41.053,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=k3yRK68SEDE","annotations":[{"segment":[0,1.7238368267228528],"label":"Arm wrestling"},{"segment":[6.767658540564221,40.09518935860466],"label":"Arm wrestling"}]},"5IwVWA2HY5M":{"duration":140.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5IwVWA2HY5M","annotations":[{"segment":[26.5094318684312,81.0618991018144],"label":"Making a sandwich"}]},"QwnUZ-5JaOM":{"duration":118.24000000000001,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=QwnUZ-5JaOM","annotations":[{"segment":[21.571258970358812,39.08638377535101],"label":"Long jump"},{"segment":[52.729744149765985,66.18873478939157],"label":"Long jump"},{"segment":[106.01259750390015,109.69999219968798],"label":"Long jump"}]},"zwj4jJ9yDHk":{"duration":191.008,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zwj4jJ9yDHk","annotations":[]},"DOONG5zq1Yg":{"duration":12.168,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DOONG5zq1Yg","annotations":[{"segment":[0.2,11.921000000000001],"label":"Kneeling"}]},"-l18hJp8ShE":{"duration":95.74000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-l18hJp8ShE","annotations":[{"segment":[21.26199283151326,87.95750718720748],"label":"Doing motocross"}]},"41__Qick6tM":{"duration":195.698,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=41__Qick6tM","annotations":[{"segment":[35.10958539781591,185.6228514945398],"label":"Braiding hair"}]},"iEZgExTrv70":{"duration":23.313,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iEZgExTrv70","annotations":[{"segment":[0.01,23.312833],"label":"Using the monkey bar"}]},"gYchIiLd9Uw":{"duration":89.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gYchIiLd9Uw","annotations":[{"segment":[15.162159465970474,77.61912666458659],"label":"Tumbling"}]},"Sx3YbcKbuYk":{"duration":80.69,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=Sx3YbcKbuYk","annotations":[{"segment":[5.66840225,65.65633097893917],"label":"Starting a campfire"}]},"ShKrNPaSdhY":{"duration":235.055,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ShKrNPaSdhY","annotations":[{"segment":[19.068447331090525,41.80390376431384],"label":"Snowboarding"},{"segment":[48.771221058366145,98.64254484737214],"label":"Snowboarding"},{"segment":[100.47604939843852,103.77635759035805],"label":"Snowboarding"},{"segment":[105.24316123121116,118.44439399888921],"label":"Snowboarding"},{"segment":[119.17779581931576,182.2503523759998],"label":"Snowboarding"},{"segment":[183.20434165366615,187.7508656723869],"label":"Snowboarding"},{"segment":[188.85096840093604,204.9858084196568],"label":"Snowboarding"},{"segment":[206.45261205772232,216.3535366146646],"label":"Snowboarding"},{"segment":[218.55374207176288,235.055283],"label":"Snowboarding"}]},"3hp7kPpZDhs":{"duration":76.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3hp7kPpZDhs","annotations":[{"segment":[20.75569968798752,35.14790405616224],"label":"Triple jump"},{"segment":[44.068691887675506,55.011524960998436],"label":"Triple jump"},{"segment":[63.575481279251164,68.5711224648986],"label":"Triple jump"}]},"BRApK7rWSAk":{"duration":187.71,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BRApK7rWSAk","annotations":[{"segment":[96.34420345553823,186.24593737909515],"label":"Starting a campfire"}]},"1F_CSpL_4Dg":{"duration":124.134,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1F_CSpL_4Dg","annotations":[{"segment":[18.389539781591267,120.79024024961],"label":"Snow tubing"}]},"b4KrsFjcj-o":{"duration":148.701,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b4KrsFjcj-o","annotations":[{"segment":[38.73071294851794,146.5934352574103],"label":"Capoeira"}]},"r40TuTkt9y4":{"duration":210.141,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r40TuTkt9y4","annotations":[{"segment":[41.30688644929797,73.76229723088923],"label":"Rafting"},{"segment":[81.63027560218408,89.82608640561622],"label":"Rafting"},{"segment":[94.41574045553823,102.61155125897035],"label":"Rafting"},{"segment":[107.20120530889236,140.3122809547582],"label":"Rafting"},{"segment":[150.47508635101403,152.44208094383777],"label":"Rafting"},{"segment":[156.70390256162247,183.91399442901715],"label":"Rafting"},{"segment":[196.04379441809675,202.9282754929797],"label":"Rafting"}]},"sRDuqREW6PU":{"duration":76.649,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sRDuqREW6PU","annotations":[]},"8leZKBY0B6I":{"duration":114.2,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8leZKBY0B6I","annotations":[{"segment":[6.057184175802407,114.195736],"label":"Getting a piercing"}]},"XHwfkmvFsxA":{"duration":181.21,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=XHwfkmvFsxA","annotations":[]},"dj_GBrR5N4E":{"duration":85.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dj_GBrR5N4E","annotations":[{"segment":[13.671202319999999,65.8697192],"label":"Using parallel bars"}]},"pvpfRBndHh8":{"duration":8.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pvpfRBndHh8","annotations":[{"segment":[0.04450655658594767,8.730701],"label":"Doing kickboxing"}]},"7uDlPHQDZnw":{"duration":144.15,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=7uDlPHQDZnw","annotations":[]},"UxR9fdD0Vzw":{"duration":129.8,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UxR9fdD0Vzw","annotations":[{"segment":[49.99380187207489,119.23436661466458],"label":"Trimming branches or hedges"}]},"0rX2f0H3AaA":{"duration":83.36,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=0rX2f0H3AaA","annotations":[{"segment":[9.88351392559104,13.394762293893118],"label":"Installing carpet"},{"segment":[19.636981615319034,62.942378157711346],"label":"Installing carpet"}]},"37pnsj0hlZ4":{"duration":111.154,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=37pnsj0hlZ4","annotations":[{"segment":[40.22510992142865,96.65103827033829],"label":"Using the rowing machine"}]},"tfepV4CXF7c":{"duration":65.97,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=tfepV4CXF7c","annotations":[{"segment":[0.01,65.96730158730159],"label":"Drinking beer"}]},"fPtKNj6jCPU":{"duration":56.541,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fPtKNj6jCPU","annotations":[{"segment":[5.6225295950155765,44.99618691588785],"label":"Beer pong"}]},"q_AQim3WN3g":{"duration":189.45,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=q_AQim3WN3g","annotations":[]},"PJgB6h-fImY":{"duration":181.58,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PJgB6h-fImY","annotations":[{"segment":[41.353031773540465,178.18556136855605],"label":"Fixing bicycle"}]},"S1gXqf9_gJg":{"duration":141.015,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S1gXqf9_gJg","annotations":[]},"sGiZDgVq9GQ":{"duration":75.303,"subset":"testing","resolution":"656x480","url":"https://www.youtube.com/watch?v=sGiZDgVq9GQ","annotations":[]},"uh-H5Gmt4PI":{"duration":105.49000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uh-H5Gmt4PI","annotations":[{"segment":[21.39380007853375,102.19653729822662],"label":"Snatch"}]},"FkWxS_5VxsM":{"duration":103.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FkWxS_5VxsM","annotations":[{"segment":[7.45643714,72.16729093],"label":"Doing nails"}]},"DPsa28MmdoI":{"duration":198.694,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DPsa28MmdoI","annotations":[{"segment":[13.948777420484573,188.15350778159126],"label":"Rafting"}]},"lvAXZEUE0m8":{"duration":152.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lvAXZEUE0m8","annotations":[{"segment":[74.6046856474259,111.50519266770671],"label":"Bungee jumping"}]},"hzeK-DdGOsc":{"duration":124.761,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hzeK-DdGOsc","annotations":[{"segment":[26.776035380034596,31.626766963492237],"label":"Bullfighting"},{"segment":[62.089361307606254,68.29829773443204],"label":"Bullfighting"},{"segment":[79.74602342348528,82.85049160388964],"label":"Bullfighting"},{"segment":[90.41763279362524,93.13404245147906],"label":"Bullfighting"}]},"mNM-JUC7ZEA":{"duration":163.144,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mNM-JUC7ZEA","annotations":[{"segment":[7.725161070618008,158.16250068432515],"label":"Fixing the roof"}]},"0BXBfSWIR2k":{"duration":6.73,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=0BXBfSWIR2k","annotations":[{"segment":[0.39395399999999997,3.788112324492979],"label":"Windsurfing"}]},"PaPR1XQU0_A":{"duration":202.037,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PaPR1XQU0_A","annotations":[{"segment":[0.01,202.03671201814058],"label":"Hanging wallpaper"}]},"4eCKiA1czfk":{"duration":87.122,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4eCKiA1czfk","annotations":[{"segment":[0,76.36624336973479],"label":"Blowing leaves"}]},"YcxPoVlukf4":{"duration":107.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YcxPoVlukf4","annotations":[{"segment":[0.01,32.64610523935716],"label":"Playing ice hockey"},{"segment":[57.87415975649112,63.0534292268961],"label":"Playing ice hockey"}]},"R7uL6BvULN8":{"duration":69.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R7uL6BvULN8","annotations":[{"segment":[12.666719968798752,59.668336193447736],"label":"Windsurfing"}]},"ZNVhz7ctTq0":{"duration":14.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZNVhz7ctTq0","annotations":[{"segment":[0.3287380109204368,14.048072562358277],"label":"Walking the dog"}]},"3gPjMvTmE2g":{"duration":207.49,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3gPjMvTmE2g","annotations":[{"segment":[8.252558475819033,190.4561044321373],"label":"Sailing"}]},"-rCYwovSK4s":{"duration":114.5,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-rCYwovSK4s","annotations":[{"segment":[11.253273885491419,104.67330947457098],"label":"Hammer throw"}]},"HB11MXjwvVo":{"duration":125.44,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HB11MXjwvVo","annotations":[{"segment":[7.240351350108426,119.5636398626013],"label":"Vacuuming floor"}]},"pHq6Eb280uM":{"duration":187.455,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pHq6Eb280uM","annotations":[{"segment":[16.079648985959437,186.83622308892353],"label":"Table soccer"}]},"AJ15GW-sS5M":{"duration":177.98,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AJ15GW-sS5M","annotations":[{"segment":[3.8872594903796154,173.26070853790952],"label":"Long jump"}]},"GTV4XlxawgE":{"duration":98.52000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GTV4XlxawgE","annotations":[{"segment":[35.043624438855105,38.425026796990245],"label":"Tumbling"},{"segment":[50.4136351576512,61.17264266080847],"label":"Tumbling"},{"segment":[68.85764802020653,89.76086259776922],"label":"Tumbling"}]},"889_kqLFM0E":{"duration":167.49,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=889_kqLFM0E","annotations":[]},"GX1EjqXAszM":{"duration":62.69,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=GX1EjqXAszM","annotations":[{"segment":[12.030182431787068,18.094176380655224],"label":"Making a sandwich"},{"segment":[26.309910521060843,44.208474184087365],"label":"Making a sandwich"}]},"q8c_0JTe5r8":{"duration":130.311,"subset":"validation","resolution":"3414x1080","url":"https://www.youtube.com/watch?v=q8c_0JTe5r8","annotations":[{"segment":[0.01,0.02],"label":"Making a cake"}]},"ffDlO-IrsxI":{"duration":73.538,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ffDlO-IrsxI","annotations":[{"segment":[19.237703588143525,48.43778939157566],"label":"Playing pool"},{"segment":[51.415053042121684,60.57586427457098],"label":"Playing pool"}]},"9BgDWjUnz60":{"duration":69.22,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=9BgDWjUnz60","annotations":[]},"v8fS--GlXF4":{"duration":92.323,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=v8fS--GlXF4","annotations":[{"segment":[0.01,92.32253968253968],"label":"Bullfighting"}]},"y2MSbfOsukM":{"duration":135.72,"subset":"training","resolution":"496x360","url":"https://www.youtube.com/watch?v=y2MSbfOsukM","annotations":[{"segment":[11.001463199999998,126.97347893915757],"label":"Brushing hair"}]},"jExOw6W1I3E":{"duration":36.18,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=jExOw6W1I3E","annotations":[{"segment":[12.585649647425896,20.09188912324493],"label":"Drinking beer"}]},"WU4ISFy651Y":{"duration":156.805,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=WU4ISFy651Y","annotations":[{"segment":[13.695188767550702,114.69720592823712],"label":"Baking cookies"}]},"aq41GgfAlDo":{"duration":53.06,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=aq41GgfAlDo","annotations":[{"segment":[1.2622028270741044,52.336336331747006],"label":"Playing accordion"}]},"je5KvCND9xo":{"duration":189.522,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=je5KvCND9xo","annotations":[{"segment":[102.56313104524182,122.07081591263652],"label":"Doing crunches"}]},"v8u8BiprYKs":{"duration":80.063,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=v8u8BiprYKs","annotations":[{"segment":[16.28725507020281,63.60483541341654],"label":"Trimming branches or hedges"}]},"twGoVVPO08Q":{"duration":191.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=twGoVVPO08Q","annotations":[{"segment":[70.85199947,154.67606364],"label":"Long jump"},{"segment":[3.1213431899999997,56.214835179999994],"label":"Long jump"}]},"7MDr4f1r8rI":{"duration":199.62,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=7MDr4f1r8rI","annotations":[{"segment":[27.15325663026521,46.39327847113885],"label":"Pole vault"},{"segment":[57.254581123244925,99.76882293291732],"label":"Pole vault"},{"segment":[104.11334399375976,184.17666068642745],"label":"Pole vault"}]},"81S8kHui3Bs":{"duration":146.402,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=81S8kHui3Bs","annotations":[]},"gRVQwpbn4Z8":{"duration":177.865,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gRVQwpbn4Z8","annotations":[]},"-Ws0UnqETac":{"duration":73.561,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-Ws0UnqETac","annotations":[]},"7dlkshlM4nk":{"duration":24.01,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=7dlkshlM4nk","annotations":[{"segment":[13.442167081879635,23.1130599201378],"label":"Spread mulch"}]},"e__FVkfXB-U":{"duration":131.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=e__FVkfXB-U","annotations":[{"segment":[12.844926904471503,19.777111165746803],"label":"Disc dog"},{"segment":[20.388774493896104,51.583604229510414],"label":"Disc dog"},{"segment":[58.51578861520248,74.62292292313406],"label":"Disc dog"},{"segment":[84.2056483974731,118.25490699778412],"label":"Disc dog"}]},"wmQiRptG3-0":{"duration":165.048,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wmQiRptG3-0","annotations":[]},"1yFs3I9ppBI":{"duration":232.06,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1yFs3I9ppBI","annotations":[]},"InwY966XXBY":{"duration":166.464,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=InwY966XXBY","annotations":[{"segment":[0.01,166.361],"label":"Camel ride"}]},"C1kmoJttp4M":{"duration":128.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=C1kmoJttp4M","annotations":[{"segment":[0.01,128.441],"label":"Making an omelette"}]},"DRWMUsADKFM":{"duration":183.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DRWMUsADKFM","annotations":[{"segment":[7.66666591,141.43909594851795],"label":"Wrapping presents"}]},"RuG5d5pMNmI":{"duration":59.908,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RuG5d5pMNmI","annotations":[]},"nNldj5g7W5o":{"duration":182.231,"subset":"validation","resolution":"180x240","url":"https://www.youtube.com/watch?v=nNldj5g7W5o","annotations":[{"segment":[112.51226786904496,118.74720486717237],"label":"Having an ice cream"},{"segment":[150.48870141895455,155.0232010270416],"label":"Having an ice cream"},{"segment":[164.94241891973215,170.32713720433557],"label":"Having an ice cream"}]},"4sm-tTbfamM":{"duration":182.51,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4sm-tTbfamM","annotations":[{"segment":[10.250098932917316,176.8142065928237],"label":"Fixing bicycle"}]},"PFeerDZbGn8":{"duration":169.599,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PFeerDZbGn8","annotations":[]},"JGPhQX97OSE":{"duration":53.546,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=JGPhQX97OSE","annotations":[]},"iuQHLWWhSEY":{"duration":22.18,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iuQHLWWhSEY","annotations":[{"segment":[0.01,17.746962893915757],"label":"Using the balance beam"}]},"8ClpQ4q6NmM":{"duration":121.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8ClpQ4q6NmM","annotations":[{"segment":[0,115.23241030889236],"label":"Bathing dog"}]},"CXb3Ce7s6G0":{"duration":125.55,"subset":"training","resolution":"480x320","url":"https://www.youtube.com/watch?v=CXb3Ce7s6G0","annotations":[{"segment":[58.75988751950078,64.24414368798752],"label":"Swimming"},{"segment":[76.58372006708268,92.4488896973479],"label":"Swimming"},{"segment":[98.1290121575663,117.12804245553822],"label":"Swimming"}]},"G4D6UtJh3AU":{"duration":78.9,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G4D6UtJh3AU","annotations":[{"segment":[43.005863160000004,76.82356701794073],"label":"Brushing hair"},{"segment":[13.27437862,33.12560488],"label":"Brushing hair"}]},"URgF15eyQvg":{"duration":133.515,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=URgF15eyQvg","annotations":[{"segment":[24.748021312367104,99.8239506542056],"label":"Futsal"}]},"2FjaU9wfxaU":{"duration":161.565,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2FjaU9wfxaU","annotations":[{"segment":[55.451135378748475,70.57417230022534],"label":"Grooming dog"}]},"0IAcCT2UT8c":{"duration":66.619,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0IAcCT2UT8c","annotations":[]},"buBtMl9SUNk":{"duration":231.411,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=buBtMl9SUNk","annotations":[{"segment":[8.301564742589704,43.67344929797191],"label":"Scuba diving"},{"segment":[51.975014040561625,78.32345865834633],"label":"Scuba diving"},{"segment":[88.42971138845553,175.05473478939155],"label":"Scuba diving"},{"segment":[191.29692667706706,227.0493744149766],"label":"Scuba diving"}]},"Vfb3LmFDols":{"duration":130.195,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=Vfb3LmFDols","annotations":[{"segment":[23.764011276131047,118.21072275819034],"label":"BMX"}]},"Fpwzxyb8fGQ":{"duration":30.33,"subset":"testing","resolution":"160x120","url":"https://www.youtube.com/watch?v=Fpwzxyb8fGQ","annotations":[]},"as7KugARkLE":{"duration":8.94,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=as7KugARkLE","annotations":[{"segment":[0.49509941528861157,6.575757022776911],"label":"Using uneven bars"}]},"h2qoTykYXFY":{"duration":154.97,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=h2qoTykYXFY","annotations":[]},"YrS64TBX798":{"duration":22.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YrS64TBX798","annotations":[{"segment":[0.01,22.094238159126366],"label":"Grooming horse"}]},"Y6lkU_RH3mo":{"duration":146.797,"subset":"testing","resolution":"400x224","url":"https://www.youtube.com/watch?v=Y6lkU_RH3mo","annotations":[]},"3HUoSa_3vpc":{"duration":82.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3HUoSa_3vpc","annotations":[{"segment":[5.143890046801872,80.11608747893915],"label":"Mixing drinks"}]},"0XTAq_xvquw":{"duration":192.03,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0XTAq_xvquw","annotations":[]},"uZk7ciOa5aE":{"duration":126.76,"subset":"training","resolution":"352x264","url":"https://www.youtube.com/watch?v=uZk7ciOa5aE","annotations":[{"segment":[13.97242411,37.855978050000004],"label":"Cumbia"},{"segment":[46.37278088,120.21909516380656],"label":"Cumbia"}]},"q5EezLOfg90":{"duration":183.76,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=q5EezLOfg90","annotations":[]},"8lhODyltF2Q":{"duration":121.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8lhODyltF2Q","annotations":[{"segment":[0,121.51004535147392],"label":"Mowing the lawn"}]},"41Mc82x-sas":{"duration":197.44,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=41Mc82x-sas","annotations":[]},"MxZtYALqIrE":{"duration":232.989,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MxZtYALqIrE","annotations":[{"segment":[0.2,184.23873790951637],"label":"Playing pool"}]},"u4FZAIj_Fw8":{"duration":77.28,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=u4FZAIj_Fw8","annotations":[]},"HMc2apLs7wE":{"duration":220.47,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HMc2apLs7wE","annotations":[]},"XlR3u3bROWM":{"duration":134.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XlR3u3bROWM","annotations":[{"segment":[96.7904783400936,99.72988549141965],"label":"Playing lacrosse"}]},"EMgWolnolDg":{"duration":143.871,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=EMgWolnolDg","annotations":[{"segment":[0.2,21.380463675070235],"label":"Slacklining"}]},"qkUkvD_g3Eg":{"duration":181.86,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qkUkvD_g3Eg","annotations":[]},"0ZzKrBk1ac8":{"duration":57.307,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0ZzKrBk1ac8","annotations":[{"segment":[4.023101658760228,53.64135444617784],"label":"Arm wrestling"}]},"Lk32tXffSyw":{"duration":88.98,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Lk32tXffSyw","annotations":[]},"cqHWqXRkUfU":{"duration":81.34,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=cqHWqXRkUfU","annotations":[{"segment":[56.681134165366615,76.84287987519501],"label":"Snow tubing"}]},"KxAxMZ6dYa4":{"duration":104.305,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KxAxMZ6dYa4","annotations":[{"segment":[28.789698907956314,36.109113884555384],"label":"Shuffleboard"},{"segment":[60.507163806552256,60.832471138845555],"label":"Shuffleboard"}]},"xDc407xoYUM":{"duration":32.137,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xDc407xoYUM","annotations":[{"segment":[0.2,24.127340361370713],"label":"Waterskiing"}]},"Eucw0oPrFUs":{"duration":127.014,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Eucw0oPrFUs","annotations":[{"segment":[20.58284610143954,23.54582787268614],"label":"Futsal"}]},"Sar_kHdgjYQ":{"duration":31.394,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Sar_kHdgjYQ","annotations":[]},"tgAA4I1xVio":{"duration":130.334,"subset":"testing","resolution":"852x480","url":"https://www.youtube.com/watch?v=tgAA4I1xVio","annotations":[]},"GmlQwRidCvo":{"duration":119.44,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=GmlQwRidCvo","annotations":[]},"9yiXv7Hu5Lw":{"duration":63.066,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9yiXv7Hu5Lw","annotations":[{"segment":[0.2,56.738532710280374],"label":"Ice fishing"}]},"r8dbfvjwOiQ":{"duration":36.781,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r8dbfvjwOiQ","annotations":[]},"zTnRo6yABtg":{"duration":205.869,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zTnRo6yABtg","annotations":[{"segment":[72.67816426927173,93.48909376289397],"label":"Disc dog"},{"segment":[108.85716477356884,182.17566852675247],"label":"Disc dog"}]},"XpFkoI5hz7s":{"duration":98.92,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XpFkoI5hz7s","annotations":[{"segment":[8.406263650546022,98.87],"label":"Doing step aerobics"}]},"qZIJRg7hJ0o":{"duration":124.367,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=qZIJRg7hJ0o","annotations":[{"segment":[9.21589494872312,58.69069941028934],"label":"Riding bumper cars"},{"segment":[64.12322695901032,120.77672950966813],"label":"Riding bumper cars"}]},"NdnosxA2c5g":{"duration":37.97,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=NdnosxA2c5g","annotations":[{"segment":[2.024386475405589,37.700841304066024],"label":"Horseback riding"}]},"EWSMeiZqUHQ":{"duration":222.4,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=EWSMeiZqUHQ","annotations":[{"segment":[7.111165366614665,214.89594851794072],"label":"Doing step aerobics"}]},"pOIJeK3YZr8":{"duration":238.702,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pOIJeK3YZr8","annotations":[{"segment":[16.568509360374414,226.93272932917316],"label":"Trimming branches or hedges"}]},"YKzVHb16eUk":{"duration":74.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YKzVHb16eUk","annotations":[{"segment":[0,44.097917669266764],"label":"Preparing salad"},{"segment":[52.82441852730109,74.00072727613104],"label":"Preparing salad"}]},"KyDS9Zg76Jg":{"duration":152.44,"subset":"training","resolution":"982x720","url":"https://www.youtube.com/watch?v=KyDS9Zg76Jg","annotations":[{"segment":[35.898816795631824,146.68589379407175],"label":"Preparing salad"}]},"NsqW8ZwYDEk":{"duration":187.32,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NsqW8ZwYDEk","annotations":[{"segment":[94.38824567706709,141.4362566801872],"label":"Archery"}]},"K5_Rm4MfF9o":{"duration":142.524,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K5_Rm4MfF9o","annotations":[{"segment":[52.45053978159127,90.46765522620905],"label":"Waterskiing"}]},"i2X7z9ywHV8":{"duration":192.22,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=i2X7z9ywHV8","annotations":[{"segment":[49.178197614979425,65.07115172225936],"label":"Smoking hookah"},{"segment":[106.15293875428486,122.94549403744858],"label":"Smoking hookah"},{"segment":[157.13033857347895,162.22807856536664],"label":"Smoking hookah"}]},"qhUJT_lNLyE":{"duration":26.912,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=qhUJT_lNLyE","annotations":[]},"ksOPSr7LmMY":{"duration":25.101,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=ksOPSr7LmMY","annotations":[]},"vkE5YOgatZ8":{"duration":70.47,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vkE5YOgatZ8","annotations":[]},"xBO66pI09rE":{"duration":32.532,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=xBO66pI09rE","annotations":[{"segment":[5.937823923556942,19.843497043681747],"label":"Gargling mouthwash"}]},"hpoReoD1vD0":{"duration":163.79,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=hpoReoD1vD0","annotations":[{"segment":[35.7384440873635,54.88403913416537],"label":"Making a sandwich"},{"segment":[60.75535494851795,62.031727951638075],"label":"Making a sandwich"},{"segment":[84.7511674071763,147.80399376131047],"label":"Making a sandwich"}]},"Rn5qprCWXFg":{"duration":89.815,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Rn5qprCWXFg","annotations":[{"segment":[0.01,0.02],"label":"Building sandcastles"}]},"kee3-2cw7-E":{"duration":24.451,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=kee3-2cw7-E","annotations":[{"segment":[2.1360908205928237,23.1918431950078],"label":"Hitting a pinata"}]},"v52YAxZZslU":{"duration":28.491,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=v52YAxZZslU","annotations":[]},"Zc7uU4Qwolc":{"duration":67.87,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zc7uU4Qwolc","annotations":[{"segment":[0.01,65.30093135725429],"label":"Playing violin"}]},"sPOCh7SN42Y":{"duration":190.82,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sPOCh7SN42Y","annotations":[]},"xBGgX2PqAo8":{"duration":237.007,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xBGgX2PqAo8","annotations":[{"segment":[19.166893301157945,214.1532304466953],"label":"Playing pool"}]},"dN8LBey17O8":{"duration":187.71,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dN8LBey17O8","annotations":[]},"OlH5t7EKOKM":{"duration":38.1,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OlH5t7EKOKM","annotations":[{"segment":[1.0700015919004107,32.39726907956318],"label":"Using uneven bars"}]},"RmZhmsu3NSI":{"duration":66.967,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RmZhmsu3NSI","annotations":[]},"iS_ms9ajumY":{"duration":133.028,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iS_ms9ajumY","annotations":[{"segment":[26.33536193447738,122.15639469578784],"label":"Making a lemonade"}]},"U7SRRMoCGks":{"duration":150.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U7SRRMoCGks","annotations":[{"segment":[2.3451014040561624,29.31376755070203],"label":"Using the pommel horse"},{"segment":[57.9240046801872,86.53424180967238],"label":"Using the pommel horse"},{"segment":[132.26371918876754,141.40961466458657],"label":"Using the pommel horse"}]},"3LwQ2yq85LU":{"duration":131.658,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3LwQ2yq85LU","annotations":[]},"UuXJdO35a8I":{"duration":109.366,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=UuXJdO35a8I","annotations":[]},"nbHBjdE8oNo":{"duration":34.78,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=nbHBjdE8oNo","annotations":[]},"u02UsNRxclU":{"duration":231.76,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=u02UsNRxclU","annotations":[{"segment":[13.55567468798752,212.7337214368175],"label":"Smoking hookah"}]},"oY22VETX20w":{"duration":103.19,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=oY22VETX20w","annotations":[{"segment":[1.2073651925668865,53.04357746010521],"label":"Getting a tattoo"},{"segment":[72.52240154212168,75.74204201014041],"label":"Getting a tattoo"}]},"r8V0OneFGoc":{"duration":112.432,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r8V0OneFGoc","annotations":[]},"zUt53fHpqc8":{"duration":71.054,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zUt53fHpqc8","annotations":[{"segment":[21.658474208112484,26.74158543672838],"label":"Beer pong"}]},"rbKPBMRj9jY":{"duration":157.2,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rbKPBMRj9jY","annotations":[{"segment":[18.63588767550702,137.3170670826833],"label":"Rollerblading"}]},"_8HTgaTPFRo":{"duration":83.708,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=_8HTgaTPFRo","annotations":[{"segment":[0.01,83.70793650793651],"label":"Putting in contact lenses"}]},"ai80XIxFqqg":{"duration":236.94,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ai80XIxFqqg","annotations":[{"segment":[141.94006898235114,175.5769082463979],"label":"Paintball"}]},"eWP99kNayuU":{"duration":38.987,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eWP99kNayuU","annotations":[{"segment":[0.01,0.02],"label":"Fixing the roof"},{"segment":[7.76088034388218,11.216897784006596],"label":"Fixing the roof"}]},"eKV-VVNPLVQ":{"duration":212.72,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=eKV-VVNPLVQ","annotations":[]},"s_hQSJVIN3c":{"duration":70.589,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=s_hQSJVIN3c","annotations":[{"segment":[0.01,69.86298334440771],"label":"Kneeling"}]},"lroya8XhuPs":{"duration":188.454,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lroya8XhuPs","annotations":[{"segment":[0.01,188.321],"label":"Beach soccer"}]},"IRbc5J7sH0k":{"duration":124.599,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IRbc5J7sH0k","annotations":[]},"2j-DRUk2yCs":{"duration":81.03999999999999,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=2j-DRUk2yCs","annotations":[{"segment":[9.098396255850234,76.07270202808112],"label":"Tumbling"}]},"Yd0q08-cpJU":{"duration":125.34,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Yd0q08-cpJU","annotations":[{"segment":[5.081600624024961,121.95841497659907],"label":"Belly dance"}]},"zkR_t6p3lto":{"duration":193.492,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zkR_t6p3lto","annotations":[{"segment":[3.320453514739229,193.491881],"label":"Skiing"}]},"FDLhpMkJwCM":{"duration":12.28,"subset":"validation","resolution":"256x360","url":"https://www.youtube.com/watch?v=FDLhpMkJwCM","annotations":[{"segment":[0.01,11.746797451950078],"label":"Playing saxophone"}]},"oJCB2Zglcq4":{"duration":44.235,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oJCB2Zglcq4","annotations":[{"segment":[7.78501248049922,21.839371294851794],"label":"Calf roping"}]},"8ZPV7ySIcbM":{"duration":204.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8ZPV7ySIcbM","annotations":[{"segment":[20.094316750390014,80.69622441029641],"label":"Fixing bicycle"},{"segment":[188.5038285631825,204.251699],"label":"Fixing bicycle"}]},"-CS7U_XrkPI":{"duration":102.702,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-CS7U_XrkPI","annotations":[{"segment":[12.176819475819032,18.745893140405617],"label":"Throwing darts"},{"segment":[26.43651596723869,32.68514701404056],"label":"Throwing darts"},{"segment":[39.73488460530422,46.624400887675506],"label":"Throwing darts"},{"segment":[52.87303193447738,59.60232690795632],"label":"Throwing darts"},{"segment":[66.97250711700468,72.90069554602184],"label":"Throwing darts"},{"segment":[85.7184002574103,92.12725261310452],"label":"Throwing darts"}]},"OgRa0ZcbUZg":{"duration":104.026,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=OgRa0ZcbUZg","annotations":[]},"xmW27Mi-jbg":{"duration":123.44,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xmW27Mi-jbg","annotations":[{"segment":[86.84900456630265,123.43727891156463],"label":"Starting a campfire"}]},"1Dpm9kvn3M8":{"duration":84.48,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=1Dpm9kvn3M8","annotations":[{"segment":[3.9398673600000005,78.83153244851793],"label":"Smoking a cigarette"}]},"XSfG1M-Ik_A":{"duration":51.99,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XSfG1M-Ik_A","annotations":[{"segment":[17.6175,25.069499999999998],"label":"Shot put"}]},"GRGBMJG1Koc":{"duration":201.828,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GRGBMJG1Koc","annotations":[{"segment":[7.396854134165366,98.9919414976599],"label":"Kite flying"},{"segment":[136.7631115444618,201.781],"label":"Kite flying"}]},"w10Nr_wOh8g":{"duration":194.978,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w10Nr_wOh8g","annotations":[]},"movzxpiGX8k":{"duration":99.4,"subset":"validation","resolution":"562x480","url":"https://www.youtube.com/watch?v=movzxpiGX8k","annotations":[{"segment":[6.0914771199999995,34.54686616],"label":"Brushing hair"},{"segment":[41.60753228,80.00332132],"label":"Brushing hair"}]},"GoFV8lTD4ug":{"duration":123.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GoFV8lTD4ug","annotations":[{"segment":[56.305374719999996,75.27438280187206],"label":"Playing flauta"}]},"7V1ZHZRHFrg":{"duration":103.956,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7V1ZHZRHFrg","annotations":[{"segment":[12.643179407176287,53.176424336973476],"label":"Elliptical trainer"}]},"ILF-93buuSY":{"duration":66.944,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ILF-93buuSY","annotations":[{"segment":[10.955546021840874,43.00033073322933],"label":"Cutting the grass"}]},"v79nh_3vpVk":{"duration":102.49000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=v79nh_3vpVk","annotations":[{"segment":[0,6.713513260530421],"label":"Playing field hockey"},{"segment":[38.52277847113884,42.19922620904836],"label":"Playing field hockey"},{"segment":[87.75520904836193,93.98918564742588],"label":"Playing field hockey"}]},"K11OHa6PeUc":{"duration":186.387,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K11OHa6PeUc","annotations":[]},"v-YKnFqX_L0":{"duration":122.415,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=v-YKnFqX_L0","annotations":[{"segment":[0,122.415623],"label":"Running a marathon"}]},"78TRh3fX3vU":{"duration":15.07,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=78TRh3fX3vU","annotations":[]},"E46k4xD_fo4":{"duration":92.95,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=E46k4xD_fo4","annotations":[]},"_V62aT_K8Zw":{"duration":190.94,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=_V62aT_K8Zw","annotations":[]},"ffWMjZCysO0":{"duration":218.64,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=ffWMjZCysO0","annotations":[{"segment":[48.098962852965265,72.95318870966308],"label":"Running a marathon"}]},"nlkmPF8TBdQ":{"duration":174.57,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nlkmPF8TBdQ","annotations":[{"segment":[48.59658002847519,148.50464134107239],"label":"Preparing pasta"}]},"Fwu0dTTQfKM":{"duration":195.12,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Fwu0dTTQfKM","annotations":[]},"r97vYbzloD8":{"duration":33.832,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=r97vYbzloD8","annotations":[{"segment":[9.130802895475819,31.667524492979716],"label":"Kite flying"}]},"YNQphOFqDOA":{"duration":167.07,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YNQphOFqDOA","annotations":[{"segment":[28.93057799063963,48.47826582215289],"label":"Tai chi"}]},"0Lc7aCtT2XU":{"duration":6.92,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=0Lc7aCtT2XU","annotations":[]},"OwoQuRQfMNw":{"duration":167.556,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OwoQuRQfMNw","annotations":[]},"83CLjJhrEYs":{"duration":52.849,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=83CLjJhrEYs","annotations":[]},"_FvFoOncvX4":{"duration":208.121,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=_FvFoOncvX4","annotations":[]},"0Zg-7EgFiC8":{"duration":218.22,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0Zg-7EgFiC8","annotations":[{"segment":[2.5750078,130.22556396],"label":"Cheerleading"}]},"eqm20MDaeRQ":{"duration":13.49,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=eqm20MDaeRQ","annotations":[{"segment":[2.317052262090484,5.797872854914197],"label":"Hand washing clothes"},{"segment":[10.369070982839313,12.424013260530423],"label":"Hand washing clothes"}]},"oHDeMg1ZFp8":{"duration":111.55,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=oHDeMg1ZFp8","annotations":[{"segment":[15.043487994539783,107.73920014586584],"label":"Preparing salad"}]},"mqUJEX9qk8E":{"duration":109.65,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=mqUJEX9qk8E","annotations":[{"segment":[0,5.643514820592824],"label":"Horseback riding"},{"segment":[42.75390015600624,106.2006879875195],"label":"Horseback riding"}]},"lue8XE3MEe8":{"duration":85.357,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lue8XE3MEe8","annotations":[{"segment":[9.450531981279251,85.301],"label":"Kneeling"}]},"xuEhV19bVRI":{"duration":71.786,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xuEhV19bVRI","annotations":[{"segment":[15.233909516380656,52.534585023400936],"label":"Doing a powerbomb"}]},"G1vQQ4ypw3w":{"duration":40.8,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G1vQQ4ypw3w","annotations":[]},"Q_W0GL3ljUY":{"duration":166.209,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q_W0GL3ljUY","annotations":[{"segment":[0.01,146.24231928861153],"label":"Sumo"}]},"MAGeYoFuCgo":{"duration":113.71000000000001,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=MAGeYoFuCgo","annotations":[]},"9ZVWD0ZKXZg":{"duration":234.22,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9ZVWD0ZKXZg","annotations":[]},"STR26rN9GcA":{"duration":27.841,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=STR26rN9GcA","annotations":[{"segment":[0.2,27.640725623582767],"label":"Playing congas"}]},"QU5R75IyQow":{"duration":143.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QU5R75IyQow","annotations":[{"segment":[22.612234996879874,112.165640925117],"label":"Braiding hair"}]},"GJz8FEFB70w":{"duration":144.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GJz8FEFB70w","annotations":[{"segment":[23.87949032449298,123.6852291],"label":"Clean and jerk"}]},"x75wqBUD3zo":{"duration":144.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=x75wqBUD3zo","annotations":[{"segment":[69.87935079,105.06117591],"label":"Playing harmonica"},{"segment":[69.87935079,105.06117591],"label":"Playing harmonica"}]},"1WbHAVWplj8":{"duration":26.24,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1WbHAVWplj8","annotations":[]},"icm5lX5pZE0":{"duration":104.281,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=icm5lX5pZE0","annotations":[{"segment":[40.818238689547584,82.12434477379095],"label":"Hanging wallpaper"}]},"o8B6D4Sxlvk":{"duration":140.69,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=o8B6D4Sxlvk","annotations":[]},"PhqSGnssOW0":{"duration":156.04,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PhqSGnssOW0","annotations":[]},"RYyGvn4MOr0":{"duration":12.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RYyGvn4MOr0","annotations":[{"segment":[0.2,12.721],"label":"Riding bumper cars"}]},"XSnPmf41ToA":{"duration":78.41,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=XSnPmf41ToA","annotations":[{"segment":[15.413527969690216,25.566883695755994],"label":"Hammer throw"},{"segment":[39.87944522620905,47.7085387675507],"label":"Hammer throw"}]},"s56ctLdnOdw":{"duration":92.53,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=s56ctLdnOdw","annotations":[{"segment":[0.01,89.06700061199727],"label":"Volleyball"}]},"R3MPcPKQYKE":{"duration":27.261,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=R3MPcPKQYKE","annotations":[{"segment":[0.01,27.221],"label":"Installing carpet"}]},"1r_WSe-AQKg":{"duration":60.512,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1r_WSe-AQKg","annotations":[]},"e_X0K2t8API":{"duration":82.315,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=e_X0K2t8API","annotations":[{"segment":[6.16397593046579,62.92392095683828],"label":"Hitting a pinata"}]},"5Q8ltS84BBg":{"duration":73.24,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5Q8ltS84BBg","annotations":[{"segment":[19.367699780000002,57.73962062],"label":"Playing guitarra"}]},"pCWlZ37fGEo":{"duration":81.57,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=pCWlZ37fGEo","annotations":[{"segment":[0,81.57170068027212],"label":"Playing water polo"}]},"TWhxFETOG8I":{"duration":129.986,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=TWhxFETOG8I","annotations":[{"segment":[13.948653404402195,124.93144199106548],"label":"Futsal"}]},"cY3QbnSeu9k":{"duration":197.068,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cY3QbnSeu9k","annotations":[{"segment":[7.110375294863047,195.9031234752674],"label":"Snowboarding"}]},"mjKcoY18QG0":{"duration":143.732,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mjKcoY18QG0","annotations":[{"segment":[68.16585479745721,92.60690108580343],"label":"Playing blackjack"}]},"W0dCIs9Y8yg":{"duration":162.423,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=W0dCIs9Y8yg","annotations":[{"segment":[0,162.161],"label":"Skiing"}]},"k0ruZZZ5Gxw":{"duration":66.04,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=k0ruZZZ5Gxw","annotations":[{"segment":[8.384370732449298,65.89189514274571],"label":"Preparing salad"}]},"R8vqzwGs6aE":{"duration":206.96,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=R8vqzwGs6aE","annotations":[{"segment":[33.739880076835725,180.96844612636505],"label":"Archery"}]},"DEYYpOYEy_c":{"duration":6.758,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=DEYYpOYEy_c","annotations":[]},"oQ_gE5_0xyc":{"duration":179.305,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oQ_gE5_0xyc","annotations":[{"segment":[20.140285905313764,174.82887070584863],"label":"Baton twirling"}]},"5VNv4hoVhLk":{"duration":20.991,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5VNv4hoVhLk","annotations":[]},"JLdp1Kz0naI":{"duration":198.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JLdp1Kz0naI","annotations":[{"segment":[1.3909633385335414,194.58031591263648],"label":"Doing motocross"}]},"9Nj9WdaSQCs":{"duration":83.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9Nj9WdaSQCs","annotations":[{"segment":[16.83307332293292,58.48821926677067],"label":"Using uneven bars"}]},"WdX_4wwtkGg":{"duration":140.85,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=WdX_4wwtkGg","annotations":[{"segment":[0.01,80.20447534321373],"label":"Playing harmonica"},{"segment":[97.56380014352574,140.652243],"label":"Playing harmonica"}]},"ybF4RykZxK8":{"duration":63.716,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ybF4RykZxK8","annotations":[{"segment":[0.2,63.681],"label":"Longboarding"}]},"OFTkwnSh-sQ":{"duration":95.95,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=OFTkwnSh-sQ","annotations":[]},"rGOOlcdpfLg":{"duration":192.17,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rGOOlcdpfLg","annotations":[]},"ufrTSC3Dewg":{"duration":39.567,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ufrTSC3Dewg","annotations":[]},"6RqeTmOkxWg":{"duration":90.81,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6RqeTmOkxWg","annotations":[]},"oofh5aSJUSc":{"duration":78.6,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=oofh5aSJUSc","annotations":[{"segment":[6.356415459822896,13.079549168998636],"label":"Beer pong"},{"segment":[14.057459526696926,25.30342864022725],"label":"Beer pong"},{"segment":[27.87044332918526,34.10462185951186],"label":"Beer pong"},{"segment":[39.11641244271559,42.29462110523503],"label":"Beer pong"},{"segment":[44.73939699948075,60.50820151736567],"label":"Beer pong"}]},"94q8YdJoPUw":{"duration":89.14,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=94q8YdJoPUw","annotations":[{"segment":[70.04699780000001,87.70815846645866],"label":"Kayaking"},{"segment":[11.699756250000002,65.89391861],"label":"Kayaking"}]},"u6rdzTx2S8c":{"duration":43.0,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=u6rdzTx2S8c","annotations":[]},"kHyEIbBnzYM":{"duration":132.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=kHyEIbBnzYM","annotations":[{"segment":[19.378483025035287,92.15087140628482],"label":"Doing karate"}]},"hkiX5j0IJ0o":{"duration":92.253,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hkiX5j0IJ0o","annotations":[]},"PRLlYhneNMk":{"duration":112.94,"subset":"training","resolution":"540x360","url":"https://www.youtube.com/watch?v=PRLlYhneNMk","annotations":[{"segment":[15.51583458,92.72888845553821],"label":"Wrapping presents"}]},"RG98kemBdyg":{"duration":31.02,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=RG98kemBdyg","annotations":[{"segment":[0,2.1294255787831515],"label":"Playing saxophone"},{"segment":[6.388276736349454,22.213780469578783],"label":"Playing saxophone"}]},"6CD_Jvf2J0Q":{"duration":35.53,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6CD_Jvf2J0Q","annotations":[]},"aeg49bWI-wU":{"duration":50.736,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=aeg49bWI-wU","annotations":[]},"DIVaPURJHPo":{"duration":55.984,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DIVaPURJHPo","annotations":[]},"-v9YLmGCYO0":{"duration":72.261,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-v9YLmGCYO0","annotations":[{"segment":[0.2,69.40426833073323],"label":"Swinging at the playground"}]},"KzVRgHnpCOQ":{"duration":142.362,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KzVRgHnpCOQ","annotations":[{"segment":[55.30112940027805,125.4824806552262],"label":"Putting on shoes"}]},"D2TQ_RR2Q50":{"duration":226.372,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D2TQ_RR2Q50","annotations":[{"segment":[56.15139270060598,63.56761437804451],"label":"Running a marathon"},{"segment":[201.29744538065523,210.83258753026522],"label":"Running a marathon"}]},"m49gj6Y6SDo":{"duration":152.184,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m49gj6Y6SDo","annotations":[{"segment":[14.838479691109413,143.99260692252574],"label":"Assembling bicycle"}]},"cofT2pt-nFg":{"duration":56.355,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=cofT2pt-nFg","annotations":[]},"hBLnY5MacSc":{"duration":26.611,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hBLnY5MacSc","annotations":[]},"4hmJfJo6UI8":{"duration":117.94,"subset":"training","resolution":"322x240","url":"https://www.youtube.com/watch?v=4hmJfJo6UI8","annotations":[{"segment":[6.805517940717629,11.955639625585023],"label":"Long jump"},{"segment":[18.393291731669265,31.45252886115445],"label":"Long jump"},{"segment":[38.99377847113884,42.304570982839316],"label":"Long jump"},{"segment":[50.0297535101404,68.23911232449298],"label":"Long jump"},{"segment":[74.30889859594383,78.72328861154446],"label":"Long jump"},{"segment":[83.68947737909517,93.98972074882994],"label":"Long jump"},{"segment":[103.73816536661467,106.86502496099844],"label":"Long jump"}]},"Cqv9FqNJXks":{"duration":81.87,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=Cqv9FqNJXks","annotations":[]},"pZsHrSHAOqs":{"duration":12.47,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=pZsHrSHAOqs","annotations":[{"segment":[0.01,4.7658256162246495],"label":"Playing ten pins"}]},"KmdmcWOmIQ4":{"duration":120.63,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=KmdmcWOmIQ4","annotations":[]},"1Cf8TkmsbMU":{"duration":136.0,"subset":"training","resolution":"426x236","url":"https://www.youtube.com/watch?v=1Cf8TkmsbMU","annotations":[{"segment":[12.305706573841187,22.914074309911175],"label":"Spinning"},{"segment":[62.589369642812926,74.04640679776851],"label":"Spinning"},{"segment":[105.02283952418095,107.3566804024961],"label":"Spinning"}]},"8AsV0ojyUMU":{"duration":118.78999999999999,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=8AsV0ojyUMU","annotations":[{"segment":[9.346045241809671,104.10198907956318],"label":"Fixing bicycle"}]},"DN3v5LhGsx0":{"duration":167.231,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DN3v5LhGsx0","annotations":[{"segment":[0,152.35898194071763],"label":"Longboarding"}]},"ZyOPt4sgsbs":{"duration":106.673,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZyOPt4sgsbs","annotations":[{"segment":[14.97741411697284,17.64006551554579],"label":"Clipping cat claws"},{"segment":[34.448052469037535,39.440523841361816],"label":"Clipping cat claws"},{"segment":[47.76130946190228,49.092634399375974],"label":"Clipping cat claws"}]},"CFBmZ1g16H8":{"duration":93.321,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CFBmZ1g16H8","annotations":[{"segment":[11.210166654285219,43.53038739780884],"label":"Scuba diving"},{"segment":[48.189157060842426,89.68133156942277],"label":"Scuba diving"}]},"9YxgMsdO9GE":{"duration":138.67000000000002,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9YxgMsdO9GE","annotations":[{"segment":[0.01,138.66956916099772],"label":"Walking the dog"}]},"ivAvKRQQy1Q":{"duration":134.189,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=ivAvKRQQy1Q","annotations":[]},"EaxmojivFrQ":{"duration":17.95,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EaxmojivFrQ","annotations":[]},"-1IlTIWPNs4":{"duration":86.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-1IlTIWPNs4","annotations":[{"segment":[0,86.05315192743764],"label":"Shoveling snow"}]},"gusezUdYfNA":{"duration":139.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gusezUdYfNA","annotations":[{"segment":[59.2960280577223,136.5440646099844],"label":"Smoking hookah"}]},"RDq85fMq41s":{"duration":175.5,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=RDq85fMq41s","annotations":[{"segment":[12.590676003120125,159.29942247425896],"label":"Grooming horse"}]},"wWOH-b4PTq4":{"duration":148.14,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=wWOH-b4PTq4","annotations":[{"segment":[0.01,0.02],"label":"Sailing"}]},"GXte17Sz13Q":{"duration":33.553,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GXte17Sz13Q","annotations":[]},"F4GwrEQUvDc":{"duration":79.65,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=F4GwrEQUvDc","annotations":[{"segment":[27.52123021294852,34.2306949149766],"label":"Javelin throw"},{"segment":[4.162353102184087,13.85380211622465],"label":"Javelin throw"},{"segment":[41.56140634867395,67.65376907878316],"label":"Javelin throw"}]},"BVhUKjJrEw8":{"duration":205.54,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=BVhUKjJrEw8","annotations":[{"segment":[7.769579236455424,174.55606904106497],"label":"Playing polo"}]},"TUdtpLFcUF8":{"duration":154.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TUdtpLFcUF8","annotations":[]},"b8ftOk8tKxg":{"duration":110.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=b8ftOk8tKxg","annotations":[{"segment":[29.380052851093637,97.5898814726989],"label":"Washing face"}]},"AkG3o8tNptU":{"duration":206.96,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=AkG3o8tNptU","annotations":[]},"4YoVu3D3AsA":{"duration":231.666,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4YoVu3D3AsA","annotations":[]},"TIjwhYSIRgg":{"duration":123.438,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TIjwhYSIRgg","annotations":[{"segment":[0,21.750472698907956],"label":"Doing fencing"},{"segment":[25.985078003120123,50.04533541341654],"label":"Doing fencing"},{"segment":[60.63184867394696,103.57213416536662],"label":"Doing fencing"}]},"-y8LMGQt8uI":{"duration":203.988,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-y8LMGQt8uI","annotations":[{"segment":[15.69419581186883,182.80242945672865],"label":"Skiing"}]},"jN-FG9yxNvE":{"duration":40.798,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jN-FG9yxNvE","annotations":[{"segment":[5.0871762870514825,35.864592823712954],"label":"Using the monkey bar"}]},"AA1wvSZ4Mno":{"duration":78.93,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=AA1wvSZ4Mno","annotations":[{"segment":[7.018260477357869,10.342699650843176],"label":"Walking the dog"},{"segment":[21.424163562460866,24.00983847517166],"label":"Walking the dog"}]},"eQwLUJ3P7wI":{"duration":49.71,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=eQwLUJ3P7wI","annotations":[]},"eFZufRRvIz4":{"duration":90.187,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eFZufRRvIz4","annotations":[{"segment":[3.798798992199688,34.751975965678625],"label":"Mooping floor"}]},"GxDNGtoVjS0":{"duration":171.341,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GxDNGtoVjS0","annotations":[]},"uPqp30C6MDE":{"duration":47.23,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uPqp30C6MDE","annotations":[{"segment":[0.20000000000000004,47.001999999999995],"label":"Swinging at the playground"}]},"SEVVSei-r6w":{"duration":12.03,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SEVVSei-r6w","annotations":[]},"xAaHXwTz1Ug":{"duration":47.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xAaHXwTz1Ug","annotations":[{"segment":[1.8273669613451204,47.809885],"label":"Kite flying"}]},"F8ISOmP1GTk":{"duration":137.973,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=F8ISOmP1GTk","annotations":[]},"BWCMfPoKlMM":{"duration":51.25,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BWCMfPoKlMM","annotations":[{"segment":[30.939738291572482,33.657957595943834],"label":"Long jump"}]},"_dZLd_Xk4fw":{"duration":111.247,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=_dZLd_Xk4fw","annotations":[]},"Hhzuzfdg6LY":{"duration":9.94,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Hhzuzfdg6LY","annotations":[]},"ZKtjyZhRB6E":{"duration":42.865,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZKtjyZhRB6E","annotations":[{"segment":[1.1332627386418843,4.733041727755415],"label":"Knitting"},{"segment":[10.59934822853302,14.399114939263967],"label":"Knitting"},{"segment":[24.865139037242194,42.06408309634018],"label":"Knitting"}]},"xEnnIWce1ok":{"duration":13.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xEnnIWce1ok","annotations":[{"segment":[0.36517994383775354,4.575489884555382],"label":"Doing a powerbomb"}]},"X5o8NVuMi3I":{"duration":103.051,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=X5o8NVuMi3I","annotations":[]},"jC1He93cebg":{"duration":103.933,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=jC1He93cebg","annotations":[{"segment":[5.997405616224649,41.17137909516381],"label":"Elliptical trainer"}]},"r64pATF3vCI":{"duration":25.43,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r64pATF3vCI","annotations":[{"segment":[4.989088923556942,18.83661975],"label":"Paintball"}]},"0ZXc2fEDgg8":{"duration":83.174,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=0ZXc2fEDgg8","annotations":[{"segment":[6.228309083383744,25.951287847432265],"label":"Doing a powerbomb"},{"segment":[30.8820325384444,47.49085587519501],"label":"Doing a powerbomb"},{"segment":[51.383548979719194,67.34359070826834],"label":"Doing a powerbomb"}]},"L36MIRUpcrI":{"duration":32.44,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L36MIRUpcrI","annotations":[{"segment":[9.55349297971919,10.918277691107646],"label":"Playing racquetball"}]},"2VvCuSFicyA":{"duration":103.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2VvCuSFicyA","annotations":[{"segment":[0.01,103.141],"label":"Walking the dog"}]},"R8-iriCwcyE":{"duration":139.09,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=R8-iriCwcyE","annotations":[{"segment":[24.3023450461828,72.9070351385484],"label":"Using parallel bars"},{"segment":[96.77540973747793,122.59665122152886],"label":"Using parallel bars"}]},"pIUpJihiju0":{"duration":63.995,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pIUpJihiju0","annotations":[{"segment":[4.889374414976599,5.787422776911076],"label":"Mooping floor"},{"segment":[19.757063962558504,63.961],"label":"Mooping floor"}]},"G6LjkB0xv2k":{"duration":101.913,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=G6LjkB0xv2k","annotations":[{"segment":[1.8096593085762451,99.43600367773512],"label":"Using the rowing machine"}]},"698QI6p6Qgc":{"duration":69.22,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=698QI6p6Qgc","annotations":[]},"4_jDgRsOGvY":{"duration":151.813,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4_jDgRsOGvY","annotations":[]},"-9rt8p7BgnY":{"duration":91.069,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=-9rt8p7BgnY","annotations":[]},"n9MONPwq7x0":{"duration":170.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n9MONPwq7x0","annotations":[{"segment":[33.76322202964119,86.13609399687988],"label":"Tennis serve with ball bouncing"},{"segment":[130.00169742121687,164.03077159282373],"label":"Tennis serve with ball bouncing"}]},"HtkuvF7VbSQ":{"duration":160.15,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HtkuvF7VbSQ","annotations":[{"segment":[15.233663514820591,119.87145060842433],"label":"Getting a tattoo"}]},"offWBoR4Ggg":{"duration":104.118,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=offWBoR4Ggg","annotations":[{"segment":[0.2,65.2560883748056],"label":"Futsal"}]},"5rkAMBttgPc":{"duration":77.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5rkAMBttgPc","annotations":[{"segment":[6.85433434399376,48.06958785],"label":"Mixing drinks"},{"segment":[6.85433434399376,74.54846821918878],"label":"Mixing drinks"}]},"aivuk1LvDv8":{"duration":130.92,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aivuk1LvDv8","annotations":[{"segment":[57.166216379999995,58.37848001999999],"label":"Playing piano"},{"segment":[1.8799250399999998,125.36694715834635],"label":"Playing piano"}]},"cqgecSy943o":{"duration":218.69,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cqgecSy943o","annotations":[{"segment":[25.24597079875195,39.23360326833074],"label":"Skateboarding"},{"segment":[41.621735641185644,59.703309321372856],"label":"Skateboarding"},{"segment":[113.94803036193447,126.91217752886116],"label":"Skateboarding"},{"segment":[129.6414716692668,146.3583982792512],"label":"Skateboarding"},{"segment":[171.60436907800312,177.06295735881437],"label":"Skateboarding"},{"segment":[191.73291336349453,202.65008992511702],"label":"Skateboarding"}]},"b1PAqOmflgI":{"duration":34.02,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=b1PAqOmflgI","annotations":[{"segment":[10.145616224648988,14.17207488299532],"label":"Layup drill in basketball"},{"segment":[24.079282371294852,28.052761310452418],"label":"Layup drill in basketball"}]},"CjuFIFZ_MLg":{"duration":65.156,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CjuFIFZ_MLg","annotations":[]},"kIFHChO_-vg":{"duration":50.18,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=kIFHChO_-vg","annotations":[]},"pASyH2NPOOw":{"duration":209.258,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pASyH2NPOOw","annotations":[{"segment":[12.842290449379728,82.2688833372302],"label":"Capoeira"},{"segment":[80.63915110981587,90.74349091978472],"label":"Capoeira"},{"segment":[95.95863404751057,106.38892030296229],"label":"Capoeira"},{"segment":[108.34459897585948,118.12299234034548],"label":"Capoeira"},{"segment":[131.48679660514296,136.70193973286882],"label":"Capoeira"},{"segment":[141.9170828605947,149.08790466121775],"label":"Capoeira"},{"segment":[162.45170892601527,168.97063783567256],"label":"Capoeira"},{"segment":[172.88199518146695,176.46740608177848],"label":"Capoeira"},{"segment":[166.36306627180963,169.94847717212116],"label":"Capoeira"},{"segment":[182.33444210047008,196.67608570171618],"label":"Capoeira"}]},"FkWlzk8CrxA":{"duration":83.62,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FkWlzk8CrxA","annotations":[{"segment":[9.391993160686429,11.479102751950078],"label":"Cumbia"},{"segment":[28.175979482059283,77.61438792511701],"label":"Cumbia"}]},"wAt_OsQe-qM":{"duration":128.5,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=wAt_OsQe-qM","annotations":[{"segment":[32.26555538221529,75.35309828393136],"label":"Playing violin"},{"segment":[90.38363650546023,120.2443057722309],"label":"Playing violin"}]},"YWsqpINl8Ic":{"duration":72.05,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=YWsqpINl8Ic","annotations":[{"segment":[14.602386895475819,61.1053728549142],"label":"Cleaning windows"}]},"dTxVCLxbymM":{"duration":31.79,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dTxVCLxbymM","annotations":[]},"Z5DKoX7wx0E":{"duration":140.504,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z5DKoX7wx0E","annotations":[]},"Dqt0kW_gJ_I":{"duration":41.727,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Dqt0kW_gJ_I","annotations":[]},"52Xw6TZcysc":{"duration":42.49,"subset":"testing","resolution":"562x480","url":"https://www.youtube.com/watch?v=52Xw6TZcysc","annotations":[]},"drMXYzlmJQU":{"duration":101.518,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=drMXYzlmJQU","annotations":[{"segment":[0.01,100.63203890171883],"label":"Knitting"}]},"wr2FC2P3hIs":{"duration":70.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wr2FC2P3hIs","annotations":[{"segment":[6.972279599999999,55.593904079999994],"label":"Getting a haircut"}]},"-oExUcmbTEE":{"duration":48.9,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-oExUcmbTEE","annotations":[{"segment":[12.282522796099844,17.69903906021841],"label":"Long jump"},{"segment":[38.52592554056163,45.16306518814353],"label":"Long jump"}]},"iSJ87SnNLPc":{"duration":35.62,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=iSJ87SnNLPc","annotations":[{"segment":[10.113467141965678,17.55964624648986],"label":"Gargling mouthwash"}]},"c0Hix_5Vm8I":{"duration":217.804,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=c0Hix_5Vm8I","annotations":[{"segment":[29.509668224299062,177.75271651090344],"label":"Snowboarding"}]},"v-dxQNxdMrU":{"duration":91.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=v-dxQNxdMrU","annotations":[{"segment":[16.274749063431926,51.6794663242312],"label":"Doing motocross"},{"segment":[54.53468545816663,60.38788468273425],"label":"Doing motocross"},{"segment":[65.2417570374415,87.08418335413415],"label":"Doing motocross"}]},"HwRiUpC5mf4":{"duration":201.852,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=HwRiUpC5mf4","annotations":[{"segment":[32.11185959438377,190.1525803432137],"label":"Camel ride"}]},"k_xDTGiDp9A":{"duration":180.51,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=k_xDTGiDp9A","annotations":[{"segment":[6.052255567862714,64.322809174727],"label":"Kayaking"},{"segment":[79.18806705,169.88540628861153],"label":"Kayaking"}]},"rRkwB9EcEMs":{"duration":24.94,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rRkwB9EcEMs","annotations":[{"segment":[5.17432972698908,24.937935],"label":"Clean and jerk"}]},"pLF3UbT_46I":{"duration":25.45,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pLF3UbT_46I","annotations":[]},"90cHokbZy6A":{"duration":170.737,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=90cHokbZy6A","annotations":[{"segment":[52.45086583463338,166.144624024961],"label":"Sharpening knives"}]},"EJiyr-6l3ps":{"duration":79.064,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=EJiyr-6l3ps","annotations":[{"segment":[0.2,54.13260374414976],"label":"Clipping cat claws"}]},"rPeQxKmIe4I":{"duration":34.25,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rPeQxKmIe4I","annotations":[{"segment":[0.01,34.221],"label":"Scuba diving"}]},"o67-Z8n-jEE":{"duration":177.029,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=o67-Z8n-jEE","annotations":[{"segment":[118.7101872074883,159.86271294851795],"label":"Cleaning sink"}]},"dySzHZniFCo":{"duration":160.4,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dySzHZniFCo","annotations":[{"segment":[10.243335580000002,147.59817931357253],"label":"Doing karate"}]},"45v95EtrJX0":{"duration":8.336,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=45v95EtrJX0","annotations":[{"segment":[1.2744530932917315,8.218921989391575],"label":"Shuffleboard"}]},"rJKrXQ4v-9s":{"duration":77.671,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rJKrXQ4v-9s","annotations":[{"segment":[16.479251120124804,62.5239800124805],"label":"Waxing skis"}]},"rR1R-x_UlOM":{"duration":80.2,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=rR1R-x_UlOM","annotations":[{"segment":[8.631028081123244,72.92593291731669],"label":"Mixing drinks"}]},"T_5ANYuDWOA":{"duration":104.792,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T_5ANYuDWOA","annotations":[{"segment":[0.01,0.02],"label":"Trimming branches or hedges"}]},"eNE8O5cgYuE":{"duration":173.988,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eNE8O5cgYuE","annotations":[{"segment":[36.88358190327613,120.95645241809673],"label":"Running a marathon"}]},"BVU-iDSTMDU":{"duration":139.85,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=BVU-iDSTMDU","annotations":[]},"ijrjRNwZjqQ":{"duration":91.07,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ijrjRNwZjqQ","annotations":[]},"nWpFumm3Z5g":{"duration":10.5,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=nWpFumm3Z5g","annotations":[{"segment":[0.11461454914196569,10.495418],"label":"Playing field hockey"}]},"-tv6laq1ic0":{"duration":54.99,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-tv6laq1ic0","annotations":[{"segment":[2.487614163806552,54.984851],"label":"Getting a tattoo"}]},"laU1Hvm5Dis":{"duration":8.04,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=laU1Hvm5Dis","annotations":[]},"hFlDERq1ThU":{"duration":132.87,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hFlDERq1ThU","annotations":[{"segment":[26.823912959999998,64.43904],"label":"Using the pommel horse"},{"segment":[70.4710656,124.58994867238691],"label":"Using the pommel horse"}]},"cGqmHplppAU":{"duration":209.027,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=cGqmHplppAU","annotations":[{"segment":[19.829784680798138,25.35611949881036],"label":"Building sandcastles"},{"segment":[34.1332395038886,36.0837106161282],"label":"Building sandcastles"}]},"UCBAyJpXuCw":{"duration":144.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UCBAyJpXuCw","annotations":[{"segment":[0.01,4.967331515029309],"label":"Playing drums"},{"segment":[17.837235894877974,120.11910727613103],"label":"Playing drums"},{"segment":[120.11910727613103,129.6021946926677],"label":"Playing drums"}]},"LbVTFawQnj0":{"duration":177.31,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LbVTFawQnj0","annotations":[]},"WPrlU-Im5Ko":{"duration":85.938,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WPrlU-Im5Ko","annotations":[{"segment":[16.55722485770179,20.579222798843926],"label":"Putting in contact lenses"},{"segment":[24.064954347833776,26.61221971055713],"label":"Putting in contact lenses"}]},"9qOfesI3bGE":{"duration":115.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9qOfesI3bGE","annotations":[{"segment":[1.0820686427457098,111.29197659906396],"label":"Playing accordion"}]},"u08gU4eQFHY":{"duration":9.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u08gU4eQFHY","annotations":[{"segment":[0.48573,9.030333073322932],"label":"Snatch"}]},"s8xM3FUf8hA":{"duration":102.53999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s8xM3FUf8hA","annotations":[]},"2zvN1cL1uik":{"duration":191.64,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=2zvN1cL1uik","annotations":[{"segment":[58.66715776,147.42604160000002],"label":"Smoking hookah"}]},"tznMNEWglxY":{"duration":41.47,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tznMNEWglxY","annotations":[{"segment":[0,41.470839],"label":"Using the balance beam"}]},"KK7g7udEPuo":{"duration":137.347,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KK7g7udEPuo","annotations":[{"segment":[0.01,137.34603174603174],"label":"Cutting the grass"}]},"DFJBJkCR0Bk":{"duration":89.07,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=DFJBJkCR0Bk","annotations":[{"segment":[0,89.071746],"label":"Playing water polo"}]},"u3qNB8GDOzY":{"duration":108.136,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=u3qNB8GDOzY","annotations":[{"segment":[0.01,108.134806],"label":"Roof shingle removal"}]},"dI6TWaB6tls":{"duration":110.67,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=dI6TWaB6tls","annotations":[{"segment":[4.831201248049922,108.70202808112325],"label":"Tango"}]},"nt4Ag91306U":{"duration":123.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nt4Ag91306U","annotations":[{"segment":[6.527416536661467,11.518970358814354],"label":"Tennis serve with ball bouncing"},{"segment":[48.37967550702028,59.71636661466459],"label":"Tennis serve with ball bouncing"},{"segment":[63.36463338533542,67.58894227769112],"label":"Tennis serve with ball bouncing"},{"segment":[82.18200936037442,97.15910452418098],"label":"Tennis serve with ball bouncing"}]},"lOuegsXjFkI":{"duration":25.45,"subset":"testing","resolution":"633x360","url":"https://www.youtube.com/watch?v=lOuegsXjFkI","annotations":[]},"DEtZBd0XdX0":{"duration":184.2,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DEtZBd0XdX0","annotations":[]},"Huz41aVo5eo":{"duration":157.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Huz41aVo5eo","annotations":[{"segment":[57.49969266770671,65.83653510140407],"label":"Shaving legs"},{"segment":[69.26935257410297,71.96656630265211],"label":"Shaving legs"},{"segment":[84.2266287051482,86.67864118564742],"label":"Shaving legs"},{"segment":[101.39071606864275,108.50155226209048],"label":"Shaving legs"},{"segment":[113.89597971918877,121.25201716068642],"label":"Shaving legs"}]},"AC9mml3mqps":{"duration":134.699,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=AC9mml3mqps","annotations":[{"segment":[37.808705148205924,134.661],"label":"Playing ice hockey"}]},"h2xV6mTpUCM":{"duration":172.15,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h2xV6mTpUCM","annotations":[{"segment":[14.234148846225956,156.84420615464074],"label":"Washing face"}]},"ksvIIhvmSRM":{"duration":209.235,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ksvIIhvmSRM","annotations":[{"segment":[75.06432137285492,178.84890483619344],"label":"River tubing"}]},"nKBjM-kdeeI":{"duration":141.689,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=nKBjM-kdeeI","annotations":[{"segment":[28.288787170046803,37.129033160686426],"label":"Fun sliding down"}]},"QhiKgeJV3k0":{"duration":195.05,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=QhiKgeJV3k0","annotations":[{"segment":[4.10395084399376,4.10395084399376],"label":"Playing guitarra"},{"segment":[29.335648625585023,170.99795183307336],"label":"Playing guitarra"}]},"1C0Ur7fen8s":{"duration":71.263,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=1C0Ur7fen8s","annotations":[{"segment":[0.01,42.30141483792862],"label":"Table soccer"}]},"cGj8zXTrm8A":{"duration":88.77,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cGj8zXTrm8A","annotations":[]},"BoK5y7jdhzA":{"duration":82.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BoK5y7jdhzA","annotations":[{"segment":[16.744921216848674,23.545540561622467],"label":"Playing lacrosse"},{"segment":[41.76606786271451,45.87210218408737],"label":"Playing lacrosse"},{"segment":[54.46911154446177,58.96008658346333],"label":"Playing lacrosse"}]},"IdEXShfpQHs":{"duration":33.623,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=IdEXShfpQHs","annotations":[{"segment":[0.01,33.33890171606864],"label":"Grooming dog"}]},"6180cMhkWJA":{"duration":66.828,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=6180cMhkWJA","annotations":[{"segment":[19.21381804043546,50.490335925349925],"label":"Futsal"}]},"xl1GN2hruLU":{"duration":13.306,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=xl1GN2hruLU","annotations":[]},"RKUEXxnXrm8":{"duration":148.14,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RKUEXxnXrm8","annotations":[{"segment":[110.70303433853353,129.8853972823713],"label":"Making a sandwich"}]},"B4qwjeJBk0s":{"duration":198.996,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B4qwjeJBk0s","annotations":[{"segment":[34.7697991729193,160.18943190380676],"label":"Longboarding"}]},"nQVJyUX8fHw":{"duration":179.28,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nQVJyUX8fHw","annotations":[{"segment":[5.593800078533754,12.865740180627633],"label":"Tennis serve with ball bouncing"},{"segment":[20.976750294501574,27.68931038874208],"label":"Tennis serve with ball bouncing"},{"segment":[47.82699067146359,89.50080125654006],"label":"Tennis serve with ball bouncing"},{"segment":[90.89925127617349,101.2477805647426],"label":"Tennis serve with ball bouncing"},{"segment":[109.91817061310454,171.44997095631825],"label":"Tennis serve with ball bouncing"}]},"ADbp9ObJVxE":{"duration":120.88,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ADbp9ObJVxE","annotations":[{"segment":[19.235678274570983,96.36697645397815],"label":"Polishing forniture"}]},"WTOTYZOu3MQ":{"duration":128.76,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WTOTYZOu3MQ","annotations":[{"segment":[1.0574584880090874,127.20494669673182],"label":"Playing squash"}]},"j15g2ZTig1k":{"duration":170.23,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=j15g2ZTig1k","annotations":[{"segment":[24.29260764430577,152.01259360374416],"label":"Playing guitarra"}]},"GNzaxjYL0TM":{"duration":89.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GNzaxjYL0TM","annotations":[{"segment":[0.01,81.53196121060843],"label":"Doing motocross"}]},"oOYaw6-b4SY":{"duration":130.78,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oOYaw6-b4SY","annotations":[{"segment":[6.426529975484734,92.72564678913687],"label":"Doing kickboxing"}]},"45llr44Pu9g":{"duration":214.18,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=45llr44Pu9g","annotations":[{"segment":[34.000839639999995,203.42405148205927],"label":"Fixing bicycle"}]},"MmIxh3MWWME":{"duration":30.651,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=MmIxh3MWWME","annotations":[]},"CmoibYw9WYc":{"duration":210.79,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CmoibYw9WYc","annotations":[]},"1XicledQXng":{"duration":121.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1XicledQXng","annotations":[{"segment":[56.27390809984399,89.32066621216849],"label":"Brushing hair"}]},"z1QgzOfUjow":{"duration":44.978,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=z1QgzOfUjow","annotations":[{"segment":[0.2,40.24357878315133],"label":"Rafting"}]},"AVL31l6H1uI":{"duration":15.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AVL31l6H1uI","annotations":[{"segment":[4.803907956318253,8.212714508580342],"label":"Hand car wash"}]},"Ayix_aDOrzs":{"duration":135.47,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Ayix_aDOrzs","annotations":[{"segment":[4.226673135725429,121.72818630889236],"label":"Getting a haircut"}]},"ITGlho0dw-k":{"duration":126.433,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ITGlho0dw-k","annotations":[{"segment":[67.06256112324493,93.88758557254292],"label":"Applying sunscreen"}]},"Z0oPjHAZ_yc":{"duration":220.31,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z0oPjHAZ_yc","annotations":[]},"pjF6uBZHVRE":{"duration":192.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pjF6uBZHVRE","annotations":[{"segment":[41.9167232,166.20866856],"label":"Pole vault"}]},"XDFJFhaG_p0":{"duration":189.08,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XDFJFhaG_p0","annotations":[{"segment":[107.3715464428101,176.1011348845554],"label":"Playing violin"}]},"oFku30m99do":{"duration":15.07,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=oFku30m99do","annotations":[{"segment":[0,15.046240249609985],"label":"Putting on makeup"}]},"lPCl1ZYH2xI":{"duration":360.421,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lPCl1ZYH2xI","annotations":[{"segment":[59.60145775626943,337.3667410296412],"label":"Removing ice from car"}]},"nLdRqOTb0Ik":{"duration":49.9,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nLdRqOTb0Ik","annotations":[{"segment":[1.6335117004680186,7.467482059282371],"label":"Playing rubik cube"},{"segment":[25.747255850234005,31.114508580343212],"label":"Playing rubik cube"}]},"iUajMS61fpQ":{"duration":153.415,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iUajMS61fpQ","annotations":[]},"XI7qJHE61zo":{"duration":155.528,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=XI7qJHE61zo","annotations":[]},"fgIJnjuMyoc":{"duration":126.224,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fgIJnjuMyoc","annotations":[{"segment":[3.813550767239009,22.488153570977325],"label":"Surfing"},{"segment":[27.40252272985583,30.547718991538073],"label":"Surfing"},{"segment":[41.555905907425924,59.83735917845396],"label":"Surfing"},{"segment":[64.16200403826704,71.43527039340722],"label":"Surfing"},{"segment":[125.88648067378105,126.00099999999999],"label":"Surfing"}]},"UIrn2ziRgEM":{"duration":170.81,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=UIrn2ziRgEM","annotations":[{"segment":[3.061260530421217,160.64962870514822],"label":"Smoking a cigarette"}]},"vJilhGo626U":{"duration":229.6,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vJilhGo626U","annotations":[]},"A0H6JceDqlg":{"duration":114.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=A0H6JceDqlg","annotations":[{"segment":[79.95774134094616,94.38218911069367],"label":"Tennis serve with ball bouncing"}]},"n7ED3zaQqws":{"duration":56.865,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n7ED3zaQqws","annotations":[{"segment":[6.825851794071763,17.37489547581903],"label":"Swimming"},{"segment":[35.63626521060842,51.947391575663026],"label":"Swimming"}]},"nFJj9tUHttM":{"duration":181.417,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nFJj9tUHttM","annotations":[]},"UjJ8yWaFNGg":{"duration":183.252,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UjJ8yWaFNGg","annotations":[{"segment":[34.330497224313135,37.755762233743695],"label":"Skiing"},{"segment":[50.02962851753655,54.88208728089651],"label":"Skiing"},{"segment":[61.73261729975764,69.15402482019053],"label":"Skiing"},{"segment":[73.43560608197873,76.57543234062342],"label":"Skiing"},{"segment":[78.2880648453387,80.85701360241161],"label":"Skiing"},{"segment":[84.28227861184217,85.99491111655746],"label":"Skiing"},{"segment":[87.42210487048686,89.13473737520214],"label":"Skiing"},{"segment":[90.56193112913154,92.84544113541858],"label":"Skiing"},{"segment":[116.82229620143254,124.52914247265129],"label":"Skiing"},{"segment":[138.80108001194532,142.79722174017266],"label":"Skiing"},{"segment":[147.93511922615397,151.07494546758699],"label":"Skiing"},{"segment":[152.7875779629141,160.49442419188605],"label":"Skiing"}]},"hva8RRWhvWA":{"duration":71.704,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hva8RRWhvWA","annotations":[]},"JtQSDGSKnsY":{"duration":167.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JtQSDGSKnsY","annotations":[{"segment":[25.313973478939158,126.063],"label":"Vacuuming floor"}]},"49PbyLbrxlE":{"duration":35.71,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=49PbyLbrxlE","annotations":[]},"B3ZL18f1x4Y":{"duration":238.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=B3ZL18f1x4Y","annotations":[{"segment":[96.67134530999999,190.34252073],"label":"Making a sandwich"}]},"cEHyZzBsWnw":{"duration":123.97,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=cEHyZzBsWnw","annotations":[{"segment":[1.5465959438377537,119.49381903276131],"label":"Snatch"}]},"kh42ufAYMZQ":{"duration":147.076,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=kh42ufAYMZQ","annotations":[{"segment":[6.653947877642997,118.39438292633747],"label":"Clipping cat claws"}]},"EQ7QqxSFWGY":{"duration":185.88,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EQ7QqxSFWGY","annotations":[]},"1xQTlp0hscs":{"duration":234.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1xQTlp0hscs","annotations":[{"segment":[69.00713277691109,206.65531274570984],"label":"Painting"}]},"SFDvkbDZTjA":{"duration":143.802,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SFDvkbDZTjA","annotations":[{"segment":[26.00879251170047,70.86139781591264],"label":"Clipping cat claws"},{"segment":[97.99503432137286,124.4559360374415],"label":"Clipping cat claws"}]},"hPIHCG5n7RI":{"duration":109.25,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hPIHCG5n7RI","annotations":[{"segment":[0,109.2498866213152],"label":"Hurling"}]},"zf4MXHeiSXs":{"duration":210.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zf4MXHeiSXs","annotations":[{"segment":[35.66789344000001,200.5840024621393],"label":"Horseback riding"}]},"w8PqH7bltJQ":{"duration":165.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w8PqH7bltJQ","annotations":[{"segment":[48.40694897410296,151.91542497191887],"label":"Fixing bicycle"}]},"TABp0GOGqNw":{"duration":34.74,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=TABp0GOGqNw","annotations":[]},"D0dyvNTI7yU":{"duration":127.083,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D0dyvNTI7yU","annotations":[{"segment":[7.930284667169,118.55775577417656],"label":"Capoeira"}]},"SvIUXZqy8Hs":{"duration":27.4,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SvIUXZqy8Hs","annotations":[{"segment":[5.121410557992667,26.076514010903427],"label":"Hitting a pinata"}]},"-v-Zail6TNw":{"duration":32.11,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=-v-Zail6TNw","annotations":[{"segment":[3.42549371,17.31019152],"label":"Playing harmonica"},{"segment":[19.098564489999998,29.590379],"label":"Playing harmonica"}]},"optJ47P_5Ys":{"duration":15.51,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=optJ47P_5Ys","annotations":[{"segment":[0.12126730000000001,6.6429282],"label":"Hand washing clothes"},{"segment":[7.431638120000001,14.679608373634945],"label":"Hand washing clothes"}]},"Yb5b_xx1P9I":{"duration":94.65,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Yb5b_xx1P9I","annotations":[{"segment":[18.456422610645923,65.55721231825272],"label":"Making a sandwich"}]},"zB0mp92uh48":{"duration":31.487,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=zB0mp92uh48","annotations":[]},"elW8E-9bCRQ":{"duration":153.6,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=elW8E-9bCRQ","annotations":[{"segment":[8.515584,143.97122173634946],"label":"Tango"}]},"tO1VJnsd8sg":{"duration":99.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tO1VJnsd8sg","annotations":[{"segment":[7.46279516486782,24.254084285820415],"label":"Playing racquetball"},{"segment":[43.22202199652612,71.51845335413417],"label":"Playing racquetball"}]},"zCcUwVzaEac":{"duration":125.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zCcUwVzaEac","annotations":[{"segment":[0.2,125.341],"label":"Hammer throw"}]},"En9FemmDusk":{"duration":279.592,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=En9FemmDusk","annotations":[{"segment":[150.69989840137822,189.51984328624846],"label":"Clipping cat claws"}]},"QVe7NojAHjY":{"duration":171.735,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QVe7NojAHjY","annotations":[{"segment":[12.053003916075008,133.11873084399377],"label":"Baking cookies"}]},"2rFldVRhE0E":{"duration":111.36,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=2rFldVRhE0E","annotations":[]},"KRsDvTz5eUc":{"duration":16.162,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=KRsDvTz5eUc","annotations":[]},"AD-IKY1O9t0":{"duration":95.62,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=AD-IKY1O9t0","annotations":[]},"QoRUUJz-PU0":{"duration":208.376,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=QoRUUJz-PU0","annotations":[{"segment":[26.981587301587304,199.27365077847114],"label":"Futsal"}]},"1fmaEo3wzxg":{"duration":193.33,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1fmaEo3wzxg","annotations":[{"segment":[16.129985671606864,189.18814969968798],"label":"Getting a tattoo"}]},"RXeD_Arfvcs":{"duration":220.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RXeD_Arfvcs","annotations":[]},"Pso333FAp2w":{"duration":130.775,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Pso333FAp2w","annotations":[{"segment":[12.235975039001561,16.518566302652108],"label":"Disc dog"},{"segment":[45.273107644305774,47.10850390015601],"label":"Disc dog"},{"segment":[72.60011856474259,73.61978315132606],"label":"Disc dog"},{"segment":[89.73048361934477,95.04727925117005],"label":"Disc dog"},{"segment":[117.48333229329175,122.37847113884557],"label":"Disc dog"}]},"aS0wGPhD48o":{"duration":80.03999999999999,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aS0wGPhD48o","annotations":[{"segment":[9.11522665522621,61.55899645241809],"label":"Doing nails"}]},"M1J3XjExaW4":{"duration":99.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M1J3XjExaW4","annotations":[{"segment":[0.01,87.06568461778471],"label":"Croquet"}]},"SNJgJB2PGdI":{"duration":189.66,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=SNJgJB2PGdI","annotations":[]},"TCFDMNSYTVw":{"duration":194.259,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=TCFDMNSYTVw","annotations":[{"segment":[0.2,186.6249360374415],"label":"Wakeboarding"}]},"juiMCvZUYwk":{"duration":144.01,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=juiMCvZUYwk","annotations":[{"segment":[39.31634551482059,103.34582249609984],"label":"Wrapping presents"}]},"uLcuqmzyAGU":{"duration":159.34,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=uLcuqmzyAGU","annotations":[]},"s82_J03bqwQ":{"duration":228.86,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s82_J03bqwQ","annotations":[{"segment":[78.51391575663025,204.8499438377535],"label":"Windsurfing"}]},"rgJ00nNS_r0":{"duration":48.948,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rgJ00nNS_r0","annotations":[{"segment":[0.01,48.947096],"label":"Croquet"}]},"lgWH94ea3-U":{"duration":73.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lgWH94ea3-U","annotations":[{"segment":[0,8.497564074882995],"label":"Smoking hookah"},{"segment":[15.61714478627145,19.98075877067083],"label":"Smoking hookah"},{"segment":[23.999876914196562,28.93765063338533],"label":"Smoking hookah"},{"segment":[37.66487860218408,38.81319807176286],"label":"Smoking hookah"},{"segment":[43.521307897035875,46.851434358814345],"label":"Smoking hookah"},{"segment":[48.6887455101404,51.215048343213724],"label":"Smoking hookah"},{"segment":[60.286772152886115,61.77958746333853],"label":"Smoking hookah"}]},"c-zbA4zixfE":{"duration":140.179,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c-zbA4zixfE","annotations":[{"segment":[0.20000000000000004,139.97886621315195],"label":"Throwing darts"}]},"-Lxv663IEaI":{"duration":73.82,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-Lxv663IEaI","annotations":[{"segment":[0.01,73.8162358276644],"label":"Horseback riding"}]},"5pzCqex6S48":{"duration":61.84,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5pzCqex6S48","annotations":[{"segment":[9.646604992199688,55.95030895475819],"label":"Using parallel bars"}]},"4VAhZEpQsv8":{"duration":195.65,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4VAhZEpQsv8","annotations":[{"segment":[8.695908706708268,106.63930150858035],"label":"Doing nails"}]},"Hh7aXAtWsXg":{"duration":33.275,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Hh7aXAtWsXg","annotations":[{"segment":[7.163555244250586,12.821725301092044],"label":"Throwing darts"}]},"DuU7OKTAmsY":{"duration":134.188,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=DuU7OKTAmsY","annotations":[{"segment":[8.897011560734539,110.00893118037646],"label":"River tubing"}]},"8LI3rnG6kMA":{"duration":91.766,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8LI3rnG6kMA","annotations":[{"segment":[18.765796137992183,68.1211087351928],"label":"Carving jack-o-lanterns"}]},"gCku1WVr608":{"duration":147.958,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gCku1WVr608","annotations":[{"segment":[75.70994809360374,141.26368363806552],"label":"Grooming dog"}]},"UzdI8PLRiXg":{"duration":220.962,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=UzdI8PLRiXg","annotations":[{"segment":[0.01,220.921],"label":"Sharpening knives"}]},"-u2zAMnrCC4":{"duration":161.05,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=-u2zAMnrCC4","annotations":[{"segment":[79.63129017160686,107.51480187207488],"label":"Kayaking"}]},"o-S8I8lhQpo":{"duration":212.23,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=o-S8I8lhQpo","annotations":[{"segment":[67.06648272,148.66059763478975],"label":"Windsurfing"}]},"ienRkMdn_OM":{"duration":117.35,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ienRkMdn_OM","annotations":[{"segment":[0.01,117.301],"label":"Grooming horse"}]},"PVJK-r4pNg8":{"duration":75.61,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=PVJK-r4pNg8","annotations":[{"segment":[23.88431341335286,60.91974153120124],"label":"Playing squash"}]},"ayDMt_8KajY":{"duration":33.18,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ayDMt_8KajY","annotations":[{"segment":[0.7505867568036055,5.202342519500779],"label":"Shot put"},{"segment":[18.505845280811233,27.875238276131043],"label":"Shot put"}]},"Zjfw0n32DBA":{"duration":79.23,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zjfw0n32DBA","annotations":[{"segment":[21.011463478939156,70.45020107644307],"label":"Brushing teeth"}]},"qV8hycDJWto":{"duration":81.642,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qV8hycDJWto","annotations":[{"segment":[35.04341330366595,63.35747706727404],"label":"Welding"}]},"tdHQb9Uwc4A":{"duration":186.364,"subset":"training","resolution":"658x480","url":"https://www.youtube.com/watch?v=tdHQb9Uwc4A","annotations":[{"segment":[14.536923166926679,186.3633560090703],"label":"Powerbocking"}]},"LfOWLfeyo-w":{"duration":43.49,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=LfOWLfeyo-w","annotations":[]},"VFSSZXuMmgg":{"duration":141.085,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VFSSZXuMmgg","annotations":[{"segment":[19.747421033863578,118.04571010454632],"label":"Rollerblading"},{"segment":[130.11358043891647,131.86890703300665],"label":"Rollerblading"}]},"6fyIc1vrK4Q":{"duration":197.579,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6fyIc1vrK4Q","annotations":[{"segment":[56.07672698907957,184.88705148205926],"label":"Decorating the Christmas tree"}]},"OTtgayeZclk":{"duration":135.7,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OTtgayeZclk","annotations":[]},"JKVOi9DE8Hs":{"duration":73.794,"subset":"training","resolution":"293x240","url":"https://www.youtube.com/watch?v=JKVOi9DE8Hs","annotations":[{"segment":[1.1512170963028998,5.065355163806553],"label":"Throwing darts"},{"segment":[8.979493244929797,9.670223494539782],"label":"Throwing darts"},{"segment":[30.852617815912634,38.45065056162246],"label":"Throwing darts"},{"segment":[50.653551638065515,54.22232459438377],"label":"Throwing darts"}]},"9mL3eC5iizc":{"duration":65.76,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9mL3eC5iizc","annotations":[{"segment":[6.2578681903276125,37.34203313572543],"label":"Playing harmonica"}]},"v7OW60YncY4":{"duration":136.93,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=v7OW60YncY4","annotations":[{"segment":[48.91797897347894,65.79361364118564],"label":"Polishing forniture"},{"segment":[69.63869495787831,73.27016064586583],"label":"Polishing forniture"},{"segment":[77.5424732199688,87.58240776911076],"label":"Polishing forniture"}]},"YL3MvJVk6u0":{"duration":45.906,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=YL3MvJVk6u0","annotations":[{"segment":[0.01,44.032274875355654],"label":"Trimming branches or hedges"}]},"4SSbyJ6pMuE":{"duration":91.51,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=4SSbyJ6pMuE","annotations":[{"segment":[2.141413819818099,86.22759500156006],"label":"Playing violin"}]},"zryPDx9WL08":{"duration":152.74,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zryPDx9WL08","annotations":[{"segment":[26.8196166,134.21145865834632],"label":"Breakdancing"}]},"G4fSvtn1HAw":{"duration":193.771,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G4fSvtn1HAw","annotations":[{"segment":[15.41697772400692,168.3775798642746],"label":"Baking cookies"}]},"Ksbk5rtqVd0":{"duration":89.281,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Ksbk5rtqVd0","annotations":[{"segment":[42.592240249609986,82.83680967238689],"label":"Applying sunscreen"}]},"S8RXX1uOGgQ":{"duration":194.352,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=S8RXX1uOGgQ","annotations":[{"segment":[44.43172167097788,46.84977480202812],"label":"Wakeboarding"},{"segment":[49.872341215840926,56.82424396761037],"label":"Wakeboarding"},{"segment":[60.75358030556702,65.28742992628622],"label":"Wakeboarding"},{"segment":[68.91450962286159,71.93707603667438],"label":"Wakeboarding"},{"segment":[76.16866901601232,77.98220886429999],"label":"Wakeboarding"},{"segment":[82.21380184363792,85.23636825745074],"label":"Wakeboarding"},{"segment":[100.65145696789602,103.37176674032754],"label":"Wakeboarding"},{"segment":[113.34623590590981,133.59743087845558],"label":"Wakeboarding"},{"segment":[136.01548400950583,150.82605943718858],"label":"Wakeboarding"},{"segment":[152.94185592685753,174.09981956211323],"label":"Wakeboarding"}]},"t6v-9gTSfp4":{"duration":103.051,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=t6v-9gTSfp4","annotations":[]},"uO2YgcdqoZg":{"duration":152.23,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uO2YgcdqoZg","annotations":[{"segment":[29.250641154351637,38.010765764943535],"label":"Playing squash"},{"segment":[57.42509598301207,64.76465984594041],"label":"Playing squash"},{"segment":[74.4718249549747,89.38771280544198],"label":"Playing squash"},{"segment":[107.38148227584696,129.6369339892426],"label":"Playing squash"}]},"cSfs5ht9sro":{"duration":205.544,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=cSfs5ht9sro","annotations":[{"segment":[61.912401423053204,185.16097618940833],"label":"Skiing"}]},"PqrexMJ5ohk":{"duration":129.915,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=PqrexMJ5ohk","annotations":[{"segment":[0.2,117.71096255850233],"label":"Playing pool"}]},"lZsPSqZbfus":{"duration":73.352,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=lZsPSqZbfus","annotations":[]},"whP6aYxUklU":{"duration":25.055,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=whP6aYxUklU","annotations":[]},"ot4CuJqrN-Y":{"duration":99.823,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=ot4CuJqrN-Y","annotations":[{"segment":[12.127977620954923,97.33479374454829],"label":"Drum corps"}]},"UJfwaeMrZ7A":{"duration":25.33,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=UJfwaeMrZ7A","annotations":[{"segment":[4.12632948,16.05060852],"label":"Hopscotch"}]},"OcArjqkv1RU":{"duration":81.48,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OcArjqkv1RU","annotations":[]},"GI2a7_XfKaI":{"duration":102.91,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=GI2a7_XfKaI","annotations":[]},"bEmDo5nmBe0":{"duration":33.18,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bEmDo5nmBe0","annotations":[]},"hGvf2Y3H0GI":{"duration":232.06,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hGvf2Y3H0GI","annotations":[]},"woUdHiRWKMg":{"duration":180.629,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=woUdHiRWKMg","annotations":[{"segment":[14.456130335691935,174.4860508964396],"label":"Changing car wheel"}]},"nP1Bk-vJgVg":{"duration":197.35,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nP1Bk-vJgVg","annotations":[]},"q0o0k2uD0co":{"duration":29.048,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=q0o0k2uD0co","annotations":[{"segment":[0.01,29.0481632],"label":"Hand car wash"}]},"TxiLjdHoXU4":{"duration":127.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TxiLjdHoXU4","annotations":[{"segment":[0,119.15145283307332],"label":"Using the balance beam"}]},"Z-UR8n33TWc":{"duration":26.982,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z-UR8n33TWc","annotations":[{"segment":[0.01,24.224589114596],"label":"Sumo"}]},"1xHdr8MWJOc":{"duration":181.58,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1xHdr8MWJOc","annotations":[]},"zET6lJ-0dGo":{"duration":180.002,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zET6lJ-0dGo","annotations":[]},"eP3h7IqxRqo":{"duration":96.41,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eP3h7IqxRqo","annotations":[]},"Mz-yz0fQ_Hk":{"duration":101.66,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Mz-yz0fQ_Hk","annotations":[{"segment":[7.219522620904836,82.58816536661466],"label":"Sailing"}]},"JaBA8V-nuDg":{"duration":177.935,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JaBA8V-nuDg","annotations":[{"segment":[44.69181982517395,93.26988408112324],"label":"Knitting"}]},"2WvbAEMG5DU":{"duration":59.675,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2WvbAEMG5DU","annotations":[{"segment":[23.718182527301092,44.940098283931356],"label":"Building sandcastles"}]},"aVL9ArQS_Ag":{"duration":140.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aVL9ArQS_Ag","annotations":[{"segment":[12.299120124804993,129.36038845553824],"label":"Cleaning shoes"}]},"vSExyDlV9JA":{"duration":45.999,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=vSExyDlV9JA","annotations":[{"segment":[0.2,16.405474258970358],"label":"Playing beach volleyball"}]},"0hWGSkDnRHA":{"duration":208.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0hWGSkDnRHA","annotations":[{"segment":[1.627026918876755,8.135134594383775],"label":"Playing water polo"}]},"rGFhqcxeVIg":{"duration":105.91,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=rGFhqcxeVIg","annotations":[]},"cQMWhfU9kr4":{"duration":200.53,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cQMWhfU9kr4","annotations":[]},"NNQQPZpzCOk":{"duration":63.112,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NNQQPZpzCOk","annotations":[{"segment":[5.021378527301091,50.50916048049922],"label":"Shuffleboard"}]},"_1CHWQx_x-o":{"duration":177.8,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=_1CHWQx_x-o","annotations":[]},"absXyPrnh6Q":{"duration":113.66,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=absXyPrnh6Q","annotations":[]},"nn-ASL9KZUo":{"duration":128.964,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nn-ASL9KZUo","annotations":[{"segment":[8.24483775351014,10.859054602184088],"label":"Throwing darts"},{"segment":[14.478739469578786,18.50061154446178],"label":"Throwing darts"},{"segment":[20.511547581903276,23.527951638065524],"label":"Throwing darts"},{"segment":[50.27340093603745,53.89308580343214],"label":"Throwing darts"},{"segment":[55.90402184087364,59.92589391575664],"label":"Throwing darts"},{"segment":[61.132455538221535,67.76854446177848],"label":"Throwing darts"},{"segment":[69.98057410296413,71.99151014040562],"label":"Throwing darts"},{"segment":[73.60025897035882,77.42103744149766],"label":"Throwing darts"},{"segment":[78.8286926677067,83.65493915756632],"label":"Throwing darts"},{"segment":[86.67134321372855,93.7096193447738],"label":"Throwing darts"},{"segment":[98.9380530421217,110.40038845553822],"label":"Throwing darts"}]},"dAcdSkaoK64":{"duration":89.28,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dAcdSkaoK64","annotations":[{"segment":[14.396400000000002,43.80438280265211],"label":"Using the pommel horse"},{"segment":[68.73597110686428,85.44988505382216],"label":"Using the pommel horse"}]},"dzpsJQN4o-Y":{"duration":174.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dzpsJQN4o-Y","annotations":[{"segment":[44.04220446368875,51.92630279360834],"label":"Shaving legs"},{"segment":[94.0654490397303,145.71988637368625],"label":"Shaving legs"}]},"tkmoslnsmwY":{"duration":188.685,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tkmoslnsmwY","annotations":[{"segment":[34.039556395990175,96.54323218491189],"label":"Rafting"},{"segment":[146.72223969035514,169.02402080388546],"label":"Rafting"}]},"3pBldeB3uaE":{"duration":124.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3pBldeB3uaE","annotations":[{"segment":[46.703624087363494,54.48756143525741],"label":"Springboard diving"},{"segment":[57.01734107332293,66.94186119188767],"label":"Springboard diving"},{"segment":[71.61222360062402,76.67178287675507],"label":"Springboard diving"},{"segment":[82.8989327550702,85.23411395943837],"label":"Springboard diving"},{"segment":[88.54228733229328,91.0720669703588],"label":"Springboard diving"},{"segment":[92.6288544399376,95.15863407800312],"label":"Springboard diving"},{"segment":[96.91001998127925,103.13716985959438],"label":"Springboard diving"}]},"-z0ZvJD9gY4":{"duration":110.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-z0ZvJD9gY4","annotations":[{"segment":[0.01,107.22827958016279],"label":"Bathing dog"}]},"59NxymNdzBE":{"duration":153.18,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=59NxymNdzBE","annotations":[{"segment":[41.807691355273256,50.169229626327905],"label":"Polishing forniture"},{"segment":[56.858460243171635,63.30878897815913],"label":"Polishing forniture"},{"segment":[69.042415149766,78.12065658814353],"label":"Polishing forniture"},{"segment":[89.11010675039002,93.88812856006241],"label":"Polishing forniture"},{"segment":[100.09955691263652,104.39977654134165],"label":"Polishing forniture"},{"segment":[116.58373215600625,129.2454899516381],"label":"Polishing forniture"},{"segment":[136.8903248471139,142.62395101872076],"label":"Polishing forniture"}]},"C7sabT8febk":{"duration":113.52000000000001,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=C7sabT8febk","annotations":[{"segment":[35.42039251170047,113.32235827664398],"label":"Painting"}]},"3sL7fLHM09c":{"duration":31.42,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3sL7fLHM09c","annotations":[{"segment":[4.656126163413884,28.769957003120123],"label":"Using uneven bars"}]},"5VwGzOLPFAQ":{"duration":141.039,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5VwGzOLPFAQ","annotations":[{"segment":[13.42171338009983,36.52466264092741],"label":"Snowboarding"},{"segment":[88.45129145573986,124.97595409666727],"label":"Snowboarding"}]},"NVtRGWuiEP4":{"duration":153.74,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=NVtRGWuiEP4","annotations":[{"segment":[0.01,151.64940560711239],"label":"Baton twirling"}]},"DvDfifKGXXg":{"duration":60.488,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=DvDfifKGXXg","annotations":[{"segment":[0.01,60.48798185941043],"label":"Painting fence"}]},"aIm5e9Oqob0":{"duration":105.001,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=aIm5e9Oqob0","annotations":[]},"ulV37d5wFaw":{"duration":202.04,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ulV37d5wFaw","annotations":[{"segment":[39.39875663026521,151.92160556630265],"label":"Removing curlers"}]},"S7RWgSxlZ-M":{"duration":99.243,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S7RWgSxlZ-M","annotations":[{"segment":[22.16244789937575,83.67328590560628],"label":"Sharpening knives"}]},"fGA7nlbGmHU":{"duration":69.126,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fGA7nlbGmHU","annotations":[{"segment":[0.01,62.34506465017016],"label":"Playing drums"}]},"7FPvAakfM9Y":{"duration":79.157,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=7FPvAakfM9Y","annotations":[{"segment":[0.01,55.076355616224646],"label":"Mooping floor"}]},"jmWtHelsblo":{"duration":214.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jmWtHelsblo","annotations":[{"segment":[0.01,214.50594],"label":"Zumba"}]},"w6P9JrTGRlE":{"duration":102.35,"subset":"training","resolution":"608x1080","url":"https://www.youtube.com/watch?v=w6P9JrTGRlE","annotations":[{"segment":[71.69539468174727,102.35356009070294],"label":"Drinking coffee"}]},"s43eZJ0hy44":{"duration":91.49,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=s43eZJ0hy44","annotations":[{"segment":[15.124465520000001,60.971103744149765],"label":"Dodgeball"}]},"Y5qDKbTkCaY":{"duration":107.21000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y5qDKbTkCaY","annotations":[{"segment":[19.233620647425898,22.57859815132605],"label":"Shot put"},{"segment":[31.777286287051485,53.5196400624025],"label":"Shot put"},{"segment":[61.04583944617785,92.3213791076443],"label":"Shot put"}]},"rv9jW-t3mT4":{"duration":173.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rv9jW-t3mT4","annotations":[{"segment":[0.01,72.0652511700468],"label":"Pole vault"}]},"HDvXPeEXO0E":{"duration":188.988,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HDvXPeEXO0E","annotations":[]},"0EepbsAtiDk":{"duration":186.55,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0EepbsAtiDk","annotations":[{"segment":[3.200397934477379,183.0036739469579],"label":"Horseback riding"}]},"etqiXPKlthk":{"duration":199.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=etqiXPKlthk","annotations":[{"segment":[12.474295760946084,118.50580962870517],"label":"Drinking beer"}]},"2DtotlMv9Gg":{"duration":65.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2DtotlMv9Gg","annotations":[{"segment":[6.684575718577875,59.629684914839565],"label":"Hurling"}]},"p2P5g7HTJeY":{"duration":163.33,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=p2P5g7HTJeY","annotations":[{"segment":[9.42495631825273,153.11060998439936],"label":"Hurling"}]},"0BtHd6dvm78":{"duration":34.0,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=0BtHd6dvm78","annotations":[]},"T6TgMYLrPII":{"duration":113.78,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=T6TgMYLrPII","annotations":[{"segment":[34.7900849367308,112.71277341653666],"label":"Pole vault"}]},"hzuQYOG0a_g":{"duration":7.85,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hzuQYOG0a_g","annotations":[{"segment":[0,7.848343],"label":"Kayaking"}]},"iMF8-iDLl6U":{"duration":219.661,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iMF8-iDLl6U","annotations":[{"segment":[25.696684867394698,62.69991107644306],"label":"Changing car wheel"},{"segment":[76.7474321372855,138.41947581903275],"label":"Changing car wheel"}]},"sYDP8SiL6_g":{"duration":125.39,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=sYDP8SiL6_g","annotations":[]},"R0dqEWnDC7k":{"duration":89.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=R0dqEWnDC7k","annotations":[{"segment":[15.26085023400936,86.24480499219969],"label":"Cricket"}]},"m6T68amiXjs":{"duration":234.96,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=m6T68amiXjs","annotations":[{"segment":[117.01342561999999,209.62773790951636],"label":"Playing field hockey"},{"segment":[14.093020759999998,97.83112794],"label":"Playing field hockey"}]},"uqAFtPVzEEs":{"duration":112.478,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uqAFtPVzEEs","annotations":[{"segment":[2.849527026013051,17.208595561838596],"label":"Beach soccer"},{"segment":[28.245586924080538,33.32469751598085],"label":"Beach soccer"},{"segment":[43.1326352106849,51.18915545990608],"label":"Beach soccer"},{"segment":[61.697660132803286,68.52818817018645],"label":"Beach soccer"},{"segment":[77.46041714214907,86.3926461141117],"label":"Beach soccer"},{"segment":[92.17232368420514,97.60171776520204],"label":"Beach soccer"},{"segment":[103.20625359074721,107.75993894900266],"label":"Beach soccer"}]},"PLWeVDUH_Cw":{"duration":66.46,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=PLWeVDUH_Cw","annotations":[{"segment":[16.344126365054603,58.26810764430577],"label":"Ping-pong"}]},"ipcvgAb5y0U":{"duration":86.03,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ipcvgAb5y0U","annotations":[{"segment":[52.69950606405459,63.82170878056549],"label":"Making a sandwich"}]},"5fMtHNSIOAE":{"duration":206.449,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5fMtHNSIOAE","annotations":[{"segment":[3.3438920793978966,203.9773874065007],"label":"Tug of war"}]},"7TOw39_59xo":{"duration":112.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7TOw39_59xo","annotations":[{"segment":[25.3442057871143,88.4092770991282],"label":"Hand car wash"}]},"r82RY124wVY":{"duration":100.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r82RY124wVY","annotations":[{"segment":[45.854306890000004,81.82911223],"label":"Tennis serve with ball bouncing"},{"segment":[18.418554280811232,26.22302643369735],"label":"Tennis serve with ball bouncing"}]},"rBGdFwbG118":{"duration":170.017,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rBGdFwbG118","annotations":[{"segment":[12.25409286842333,32.38065767087598],"label":"Hand car wash"},{"segment":[54.625808242007864,75.54684184722784],"label":"Hand car wash"},{"segment":[91.96588136747705,96.20305285657362],"label":"Hand car wash"}]},"l_36uVP3LnI":{"duration":96.22,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=l_36uVP3LnI","annotations":[{"segment":[0,96.181],"label":"Triple jump"}]},"rKnQNI9PNFI":{"duration":159.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rKnQNI9PNFI","annotations":[{"segment":[14.395655120000004,61.858399680000005],"label":"Tai chi"}]},"o-BGGr-DU5g":{"duration":47.532,"subset":"validation","resolution":"853x470","url":"https://www.youtube.com/watch?v=o-BGGr-DU5g","annotations":[{"segment":[4.523254394695788,9.268963923556942],"label":"Canoeing"},{"segment":[13.792218318252731,18.09301757878315],"label":"Canoeing"},{"segment":[23.357789087363496,29.66068455538222],"label":"Canoeing"},{"segment":[33.07166327925117,38.8554967675507],"label":"Canoeing"}]},"WYcTPQSXwRc":{"duration":186.573,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=WYcTPQSXwRc","annotations":[{"segment":[0.20000000000000004,114.61286114384193],"label":"Drum corps"},{"segment":[115.77349775722237,159.007210241263],"label":"Drum corps"}]},"4x08i2_AYKo":{"duration":48.089,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4x08i2_AYKo","annotations":[{"segment":[0.01,43.812323219968796],"label":"Powerbocking"}]},"ChPzol03Hqs":{"duration":206.565,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=ChPzol03Hqs","annotations":[{"segment":[86.686285105826,107.31053137635709],"label":"Applying sunscreen"}]},"4FGdbFzq-8I":{"duration":74.304,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=4FGdbFzq-8I","annotations":[{"segment":[12.866970189011644,50.656448925117004],"label":"Surfing"},{"segment":[68.39200198127925,74.103853],"label":"Surfing"}]},"2ESPauwYUnQ":{"duration":30.3,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2ESPauwYUnQ","annotations":[{"segment":[2.552745875195008,24.676543460218408],"label":"Ironing clothes"}]},"te1znGPS_Vw":{"duration":158.96,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=te1znGPS_Vw","annotations":[{"segment":[127.6787566146646,147.26442995943836],"label":"Paintball"},{"segment":[2.72780508,112.48857732],"label":"Paintball"}]},"ekJtPwfLM-M":{"duration":154.41,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ekJtPwfLM-M","annotations":[{"segment":[41.43472982059975,151.35155618975864],"label":"Breakdancing"}]},"wlYxVUJSJVI":{"duration":221.542,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=wlYxVUJSJVI","annotations":[{"segment":[30.740544461778473,194.805248049922],"label":"Futsal"}]},"idkQqlwyff8":{"duration":51.99,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=idkQqlwyff8","annotations":[]},"pPrW3iW0DA8":{"duration":64.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pPrW3iW0DA8","annotations":[{"segment":[1.249739976599064,48.73985908736349],"label":"Discus throw"}]},"N6y3yXiVo24":{"duration":35.063,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=N6y3yXiVo24","annotations":[{"segment":[0.01,34.78863583525741],"label":"Hitting a pinata"}]},"3jt4XE6ilIQ":{"duration":54.614,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3jt4XE6ilIQ","annotations":[{"segment":[19.383906542056074,31.11627102803738],"label":"Bullfighting"},{"segment":[6.801370716510903,11.987415887850467],"label":"Bullfighting"}]},"ykdPTjsmfgY":{"duration":142.316,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ykdPTjsmfgY","annotations":[{"segment":[9.54687892005476,13.987287720080232],"label":"Canoeing"},{"segment":[20.20386004011589,31.74892292018211],"label":"Canoeing"},{"segment":[37.7434748002165,40.62974052023305],"label":"Canoeing"},{"segment":[59.94551880034385,63.941886720366774],"label":"Canoeing"},{"segment":[69.71441816039989,72.82270429953198],"label":"Canoeing"},{"segment":[82.81362409672386,98.57707533229329],"label":"Canoeing"},{"segment":[114.1185061279251,121.44518064586583],"label":"Canoeing"}]},"zL0L9Faa5CA":{"duration":90.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zL0L9Faa5CA","annotations":[{"segment":[41.4073088,83.23940669110765],"label":"High jump"},{"segment":[26.052028048361933,29.433936640000002],"label":"High jump"}]},"HbXNXmCRFh4":{"duration":4.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HbXNXmCRFh4","annotations":[{"segment":[0.02535719812792512,4.063491],"label":"Playing water polo"}]},"WT7ZtXsTslM":{"duration":69.139,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WT7ZtXsTslM","annotations":[{"segment":[0.2,53.39281591263651],"label":"Making a cake"}]},"Pp42uLsicMk":{"duration":194.096,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Pp42uLsicMk","annotations":[]},"Xt86M-mRxi8":{"duration":129.057,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Xt86M-mRxi8","annotations":[{"segment":[33.412614664586584,102.85449453978157],"label":"Spread mulch"}]},"cFJo7Nm2W3Y":{"duration":210.536,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=cFJo7Nm2W3Y","annotations":[{"segment":[30.826045026525662,199.38548102180684],"label":"Beach soccer"}]},"J_ZY19xnhJU":{"duration":200.296,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=J_ZY19xnhJU","annotations":[{"segment":[0.01,200.261],"label":"Swinging at the playground"}]},"GkwkHQJifDU":{"duration":63.69,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=GkwkHQJifDU","annotations":[{"segment":[26.9422684,48.247736],"label":"Doing step aerobics"}]},"iuVi8QAWfCI":{"duration":152.509,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iuVi8QAWfCI","annotations":[{"segment":[24.26812372230889,73.99398507488299],"label":"Painting furniture"},{"segment":[96.59664932605304,131.80922100156005],"label":"Painting furniture"}]},"uWQ5tyfrXT0":{"duration":216.64,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uWQ5tyfrXT0","annotations":[]},"rrTNLcPCWK8":{"duration":216.69,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=rrTNLcPCWK8","annotations":[{"segment":[67.60955257410296,195.05355917628705],"label":"Dodgeball"}]},"7S15OsGinjw":{"duration":72.563,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7S15OsGinjw","annotations":[{"segment":[42.54279016436351,45.48145259738532],"label":"Building sandcastles"},{"segment":[49.66339529053173,64.24368197744762],"label":"Building sandcastles"}]},"_UPD2IvdQ_M":{"duration":49.74,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_UPD2IvdQ_M","annotations":[{"segment":[8.542967708221436,45.9615886054177],"label":"Using the pommel horse"}]},"X_K7Aa3Aa-E":{"duration":67.13,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=X_K7Aa3Aa-E","annotations":[]},"W8XwSNt8P5A":{"duration":110.806,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W8XwSNt8P5A","annotations":[{"segment":[17.113498041962497,19.70645229074469],"label":"Kneeling"},{"segment":[75.71426322308892,76.75144491107645],"label":"Kneeling"}]},"97LW-ivu01A":{"duration":159.2,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=97LW-ivu01A","annotations":[{"segment":[0.01,148.02000119812794],"label":"Kayaking"}]},"A_4yI6vh03I":{"duration":140.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A_4yI6vh03I","annotations":[{"segment":[67.02882449297972,79.93159984399377],"label":"Pole vault"}]},"LYqq0dPB-U8":{"duration":83.732,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=LYqq0dPB-U8","annotations":[{"segment":[7.587182868103979,74.33698117699633],"label":"Decorating the Christmas tree"}]},"c1Gby2EHBzs":{"duration":20.922,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=c1Gby2EHBzs","annotations":[{"segment":[6.070732127925116,7.376265918876754],"label":"Hurling"},{"segment":[10.117886879875194,11.423420670826832],"label":"Hurling"},{"segment":[13.283806322932916,14.45878673478939],"label":"Hurling"}]},"Bbb8CybvMlk":{"duration":213.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Bbb8CybvMlk","annotations":[{"segment":[46.873046579006015,68.81362157343436],"label":"Wrapping presents"},{"segment":[77.45687838942128,111.03260678998588],"label":"Wrapping presents"},{"segment":[180.84352569110763,187.1597517722309],"label":"Wrapping presents"}]},"G5ZD9YfZ7aM":{"duration":206.379,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G5ZD9YfZ7aM","annotations":[]},"IAqNteMxXNk":{"duration":106.278,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IAqNteMxXNk","annotations":[{"segment":[26.030583099843994,99.31413552106085],"label":"Cleaning sink"},{"segment":[101.30373422932918,106.077731],"label":"Cleaning sink"}]},"TpgtCuYz0RQ":{"duration":122.416,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TpgtCuYz0RQ","annotations":[{"segment":[56.32198907956318,104.24341029641185],"label":"Rock-paper-scissors"}]},"DR6F2sgfLUc":{"duration":67.04,"subset":"testing","resolution":"318x240","url":"https://www.youtube.com/watch?v=DR6F2sgfLUc","annotations":[]},"qSXMTcGBqtY":{"duration":223.77,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qSXMTcGBqtY","annotations":[{"segment":[11.171080334369837,189.21017151638065],"label":"Brushing hair"}]},"i2u80Y3BMYs":{"duration":214.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i2u80Y3BMYs","annotations":[{"segment":[35.17551418876755,184.2526933697348],"label":"Clean and jerk"}]},"eECoZPqsTYI":{"duration":159.684,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eECoZPqsTYI","annotations":[{"segment":[2.4911645709828396,153.4557375725429],"label":"Playing drums"}]},"4ELWxQw0PPE":{"duration":226.767,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=4ELWxQw0PPE","annotations":[{"segment":[3.1839230793721547,25.82515374102964],"label":"Arm wrestling"},{"segment":[29.362846034321375,67.92369203120126],"label":"Arm wrestling"},{"segment":[75.35284584711388,124.52676872386895],"label":"Arm wrestling"},{"segment":[130.18707639313573,169.45546084867394],"label":"Arm wrestling"},{"segment":[171.57807622464898,212.96907605616224],"label":"Arm wrestling"}]},"W_iKlOPSDos":{"duration":78.972,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W_iKlOPSDos","annotations":[{"segment":[11.30668535825545,67.98023831775701],"label":"Waterskiing"}]},"eHoeL0iPDWQ":{"duration":229.53,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=eHoeL0iPDWQ","annotations":[]},"J4WbF-bJ1T8":{"duration":177.006,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=J4WbF-bJ1T8","annotations":[{"segment":[120.20780789694386,166.2085041586261],"label":"Doing fencing"}]},"LlpV8R-EaDQ":{"duration":17.323,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LlpV8R-EaDQ","annotations":[{"segment":[0.2,17.122086167800454],"label":"Building sandcastles"}]},"nKnsvy7-wAI":{"duration":129.057,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=nKnsvy7-wAI","annotations":[{"segment":[31.206963856785563,35.83767462263116],"label":"Rock-paper-scissors"},{"segment":[54.36051768601356,62.81659821494901],"label":"Rock-paper-scissors"},{"segment":[82.95012219656786,84.56080418096724],"label":"Rock-paper-scissors"},{"segment":[107.91569295475819,111.53972741965678],"label":"Rock-paper-scissors"}]},"m_B1Jb6Wwn0":{"duration":34.9,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=m_B1Jb6Wwn0","annotations":[{"segment":[0.01,29.618372792511703],"label":"Cleaning shoes"}]},"okSvWjK0okw":{"duration":121.19,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=okSvWjK0okw","annotations":[{"segment":[46.86890483619345,57.64119344773791],"label":"Rock climbing"},{"segment":[81.64260842433698,96.95059750390016],"label":"Rock climbing"}]},"sbvdGKpHy2M":{"duration":56.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sbvdGKpHy2M","annotations":[{"segment":[16.4669815975039,52.942231157566304],"label":"Shaving"}]},"cZ8iVGnxjX4":{"duration":174.731,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=cZ8iVGnxjX4","annotations":[]},"qx1FNJxiUuE":{"duration":134.699,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qx1FNJxiUuE","annotations":[{"segment":[22.4848000749962,30.25991785793881],"label":"Baking cookies"},{"segment":[45.81015342382403,61.15025067125135],"label":"Baking cookies"},{"segment":[63.25163385583043,66.40370863269905],"label":"Baking cookies"},{"segment":[70.1861983649414,74.38896473409957],"label":"Baking cookies"},{"segment":[90.14933853042123,104.6488824898596],"label":"Baking cookies"}]},"Bhc8INrOToY":{"duration":234.731,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Bhc8INrOToY","annotations":[{"segment":[8.420691107644306,222.61810920436818],"label":"Snowboarding"}]},"Ux4brKS0Sac":{"duration":119.22999999999999,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ux4brKS0Sac","annotations":[{"segment":[4.45458224,11.81847408],"label":"Wrapping presents"},{"segment":[18.33341984,90.24344524],"label":"Wrapping presents"}]},"00S8I27qDU4":{"duration":37.41,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=00S8I27qDU4","annotations":[{"segment":[3.5059282371294853,18.815148205928235],"label":"Javelin throw"}]},"VuiuqKX8srs":{"duration":123.949,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VuiuqKX8srs","annotations":[{"segment":[0.01,123.881],"label":"Snow tubing"}]},"PT18KK_6jmU":{"duration":177.889,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=PT18KK_6jmU","annotations":[{"segment":[19.1137523364486,77.57177570093458],"label":"Scuba diving"},{"segment":[109.15457009345793,152.37313084112148],"label":"Scuba diving"}]},"E4AoMNfsuaw":{"duration":92.95,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=E4AoMNfsuaw","annotations":[{"segment":[0,3.9151895475819036],"label":"Vacuuming floor"},{"segment":[33.351614664586585,36.54176911076443],"label":"Vacuuming floor"},{"segment":[42.92207800312013,92.7495],"label":"Vacuuming floor"}]},"DZrCkQ2z-u4":{"duration":50.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DZrCkQ2z-u4","annotations":[{"segment":[5.208028081123246,42.05877223088924],"label":"Using the pommel horse"}]},"qlP8qKtGE7U":{"duration":42.75,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qlP8qKtGE7U","annotations":[{"segment":[0.06668944617784711,23.40799560842434],"label":"Drinking coffee"}]},"FSe9tVYHgBc":{"duration":84.78,"subset":"validation","resolution":"624x336","url":"https://www.youtube.com/watch?v=FSe9tVYHgBc","annotations":[{"segment":[34.91556629017161,45.89280872230889],"label":"Bungee jumping"},{"segment":[62.5570562698908,70.36015631201248],"label":"Bungee jumping"}]},"HatKNbfqL-k":{"duration":45.72,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=HatKNbfqL-k","annotations":[{"segment":[30.6968652,45.58],"label":"Playing flauta"},{"segment":[4.9064274570982835,28.3331412],"label":"Playing flauta"}]},"JoQywfQ6B-8":{"duration":137.88,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JoQywfQ6B-8","annotations":[{"segment":[73.99493167209681,122.17767636817472],"label":"Waterskiing"}]},"X6CBZwwRdQQ":{"duration":47.532,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=X6CBZwwRdQQ","annotations":[]},"LN8UWHvoELs":{"duration":202.827,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LN8UWHvoELs","annotations":[{"segment":[77.77921857079242,187.68650829041857],"label":"Playing drums"}]},"g9bIJ1MIvPk":{"duration":214.228,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=g9bIJ1MIvPk","annotations":[{"segment":[0.01,189.83011918876755],"label":"Fixing the roof"}]},"PUHGXI6N0DA":{"duration":57.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PUHGXI6N0DA","annotations":[{"segment":[7.313104736434354,50.56744210764431],"label":"Using parallel bars"}]},"0x7ZcIZffyg":{"duration":144.1,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0x7ZcIZffyg","annotations":[]},"DD3MU7uM9mM":{"duration":13.282,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=DD3MU7uM9mM","annotations":[{"segment":[4.372624024960999,8.097460218408736],"label":"Doing a powerbomb"}]},"MOG4eTo4Q4Y":{"duration":10.73,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=MOG4eTo4Q4Y","annotations":[{"segment":[4.31386305,9.78216584],"label":"Dodgeball"}]},"hu714U34avg":{"duration":237.75,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hu714U34avg","annotations":[{"segment":[40.60553822152886,215.63580343213727],"label":"Doing step aerobics"}]},"4kriQ6h6ymI":{"duration":157.62,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=4kriQ6h6ymI","annotations":[{"segment":[56.67276852,138.07564434],"label":"Horseback riding"},{"segment":[36.76101291,46.06514442],"label":"Horseback riding"}]},"-rx6D1uBsUU":{"duration":220.822,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-rx6D1uBsUU","annotations":[{"segment":[37.887535101404055,204.9371216848674],"label":"Rafting"}]},"mdu7eHlbDwc":{"duration":161.077,"subset":"validation","resolution":"854x478","url":"https://www.youtube.com/watch?v=mdu7eHlbDwc","annotations":[{"segment":[9.268798701150036,19.038617415495732],"label":"Scuba diving"},{"segment":[24.549797247533057,30.311485253753897],"label":"Scuba diving"},{"segment":[35.572156911607706,37.32571413089231],"label":"Scuba diving"},{"segment":[46.84502474986587,50.10163101425156],"label":"Scuba diving"},{"segment":[52.85722093027023,56.61484354302295],"label":"Scuba diving"},{"segment":[93.1890369738161,125.5045914434895],"label":"Scuba diving"},{"segment":[135.02390206246307,159.57370313244752],"label":"Scuba diving"}]},"yIIDIQDKVhI":{"duration":159.057,"subset":"training","resolution":"854x468","url":"https://www.youtube.com/watch?v=yIIDIQDKVhI","annotations":[{"segment":[3.2257811825273013,159.0558276643991],"label":"Sharpening knives"}]},"rbDbXXqaZaM":{"duration":117.68,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=rbDbXXqaZaM","annotations":[{"segment":[0,117.621],"label":"Mowing the lawn"}]},"JMlNfZlOyX8":{"duration":121.84,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=JMlNfZlOyX8","annotations":[{"segment":[27.180066441497658,111.19118089703588],"label":"Washing face"}]},"XPxxd9vv1MQ":{"duration":137.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XPxxd9vv1MQ","annotations":[]},"c8LevTVQ5nk":{"duration":72.145,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c8LevTVQ5nk","annotations":[{"segment":[6.6967109836193455,11.198701560842434],"label":"Playing drums"},{"segment":[25.830170936817474,34.834152091263654],"label":"Playing drums"},{"segment":[35.95964973556943,39.22359290405617],"label":"Playing drums"},{"segment":[52.054266049141965,64.65983966536662],"label":"Playing drums"}]},"pLJhoqTpAaI":{"duration":101.52000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pLJhoqTpAaI","annotations":[{"segment":[18.24565041,90.25774953],"label":"Fixing bicycle"}]},"raNO7MvD-98":{"duration":105.56,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=raNO7MvD-98","annotations":[]},"QAJ4PWhM35I":{"duration":136.302,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=QAJ4PWhM35I","annotations":[]},"sSVG3g2iKL8":{"duration":144.2,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sSVG3g2iKL8","annotations":[{"segment":[3.488297971918877,140.09454758190327],"label":"Playing bagpipes"}]},"54OZLVVOHJM":{"duration":14.002,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=54OZLVVOHJM","annotations":[]},"eBlYGGmeBY0":{"duration":53.546,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=eBlYGGmeBY0","annotations":[{"segment":[0.2,51.296814745532735],"label":"Roof shingle removal"}]},"pPn5f_ked3s":{"duration":9.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pPn5f_ked3s","annotations":[{"segment":[4.71917744149766,5.82236177847114],"label":"Long jump"}]},"rqraLuIBvyg":{"duration":90.84,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rqraLuIBvyg","annotations":[{"segment":[16.77579173166927,82.46328003120125],"label":"Shoveling snow"}]},"cXRWQa9tQLw":{"duration":156.02,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=cXRWQa9tQLw","annotations":[{"segment":[4.381072940876819,156.014875],"label":"Cheerleading"}]},"1dDAcUliXrQ":{"duration":135.09,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=1dDAcUliXrQ","annotations":[]},"DxNIzbD6uD4":{"duration":91.98,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=DxNIzbD6uD4","annotations":[{"segment":[27.975179195370707,86.2828499648411],"label":"Doing step aerobics"}]},"eCzDH6PdtlE":{"duration":95.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eCzDH6PdtlE","annotations":[{"segment":[3.7320230224174957,90.91208082609019],"label":"Using the balance beam"}]},"mXb1tduBEj8":{"duration":83.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mXb1tduBEj8","annotations":[{"segment":[6.290699687987519,10.05214898595944],"label":"Volleyball"},{"segment":[32.62084477379096,37.80905070202808],"label":"Volleyball"}]},"5PgDTLR7wFQ":{"duration":106.975,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=5PgDTLR7wFQ","annotations":[{"segment":[29.205160587375875,103.30282413416536],"label":"Cleaning sink"}]},"HppLOtjJY_Q":{"duration":166.84,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=HppLOtjJY_Q","annotations":[{"segment":[17.609024029999997,72.28167816999999],"label":"Kayaking"}]},"hQfjNwuaZRo":{"duration":126.387,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=hQfjNwuaZRo","annotations":[]},"o0O-CwwSaGw":{"duration":144.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o0O-CwwSaGw","annotations":[{"segment":[41.970711388455534,108.31151326053042],"label":"Bathing dog"}]},"Mgym0F-T7Js":{"duration":179.96,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Mgym0F-T7Js","annotations":[{"segment":[25.018751539604683,31.18426867356107],"label":"Polishing forniture"},{"segment":[45.19680761437104,138.80056773898164],"label":"Polishing forniture"},{"segment":[144.68583409412182,144.966084872938],"label":"Polishing forniture"}]},"ES6LPFdgoRk":{"duration":60.61,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ES6LPFdgoRk","annotations":[]},"gJydcG-d1cs":{"duration":238.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gJydcG-d1cs","annotations":[{"segment":[0,6.683434684326149],"label":"Playing flauta"},{"segment":[119.18791853714964,120.67312624477768],"label":"Playing flauta"},{"segment":[133.66869368652297,139.60952451703508],"label":"Playing flauta"},{"segment":[156.6894131547575,161.14503627764157],"label":"Playing flauta"},{"segment":[197.5326241622465,200.13173763806552],"label":"Playing flauta"},{"segment":[215.72641849297972,224.63766469578783],"label":"Playing flauta"}]},"pFTqH8DYPuY":{"duration":95.44,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=pFTqH8DYPuY","annotations":[]},"lIbRuIm9MxI":{"duration":135.582,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lIbRuIm9MxI","annotations":[{"segment":[85.75392113195402,122.06058241771092],"label":"Playing congas"}]},"Qu-Y2u1Xn_U":{"duration":216.712,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Qu-Y2u1Xn_U","annotations":[{"segment":[36.91511005141168,209.0982686828269],"label":"River tubing"}]},"U_Pb3Wm_pb4":{"duration":99.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U_Pb3Wm_pb4","annotations":[{"segment":[0.19999999999999998,91.35105255538221],"label":"Slacklining"}]},"TPsMocKBQU0":{"duration":147.679,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TPsMocKBQU0","annotations":[{"segment":[0.01,142.9403035815267],"label":"Slacklining"}]},"eh6b_t3nvMY":{"duration":21.53,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eh6b_t3nvMY","annotations":[]},"47Zc29aynbo":{"duration":86.263,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=47Zc29aynbo","annotations":[]},"cau489T9DLE":{"duration":30.395,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=cau489T9DLE","annotations":[{"segment":[0.9454088899072199,22.54801850577036],"label":"Baton twirling"}]},"Z9k8GiGjkZ8":{"duration":84.89,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Z9k8GiGjkZ8","annotations":[{"segment":[1.4550488799999997,19.07086464898596],"label":"Bungee jumping"},{"segment":[64.89391443057723,74.4293467550702],"label":"Bungee jumping"}]},"8LTh7r2U8Fw":{"duration":157.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8LTh7r2U8Fw","annotations":[{"segment":[28.8998751950078,148.4180031201248],"label":"Javelin throw"}]},"2768Mbhzllc":{"duration":41.773,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2768Mbhzllc","annotations":[]},"hPYGgv_75SY":{"duration":37.92,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hPYGgv_75SY","annotations":[]},"yinXvETACC4":{"duration":126.76,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=yinXvETACC4","annotations":[{"segment":[31.726918876755068,112.77387675507019],"label":"Spinning"}]},"40BPDWojssc":{"duration":237.36,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=40BPDWojssc","annotations":[]},"sC_hs-OXERg":{"duration":181.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sC_hs-OXERg","annotations":[{"segment":[95.544589130504,144.65504507999998],"label":"Polishing forniture"}]},"7JQ6lw2KKJE":{"duration":75.88,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7JQ6lw2KKJE","annotations":[]},"-_N_l6HZiak":{"duration":15.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-_N_l6HZiak","annotations":[]},"mi6f8kGVR70":{"duration":58.445,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mi6f8kGVR70","annotations":[{"segment":[0.01,50.03608268330733],"label":"Camel ride"}]},"MEwGjGcV_Ik":{"duration":40.055,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=MEwGjGcV_Ik","annotations":[{"segment":[8.873210046801871,40.05442],"label":"Shuffleboard"}]},"fs-goyuhTi8":{"duration":12.145,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=fs-goyuhTi8","annotations":[{"segment":[0,7.900253658346334],"label":"Hitting a pinata"}]},"u7THn6D6y5w":{"duration":61.16,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=u7THn6D6y5w","annotations":[]},"taOJ9kUiwgM":{"duration":94.413,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=taOJ9kUiwgM","annotations":[{"segment":[9.72108291678606,74.38101196567864],"label":"Making an omelette"}]},"UOL8oxziVwU":{"duration":42.516,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=UOL8oxziVwU","annotations":[{"segment":[10.47969803417987,32.367674941011245],"label":"Powerbocking"}]},"GhwvPy4_2KE":{"duration":57.795,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=GhwvPy4_2KE","annotations":[{"segment":[0.2,57.546],"label":"Swinging at the playground"}]},"6OLIr7E0eHA":{"duration":56.286,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6OLIr7E0eHA","annotations":[{"segment":[0.01,55.33699764151885],"label":"Playing drums"}]},"GcEutQTeAnA":{"duration":40.241,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=GcEutQTeAnA","annotations":[{"segment":[0.2,33.758031201248045],"label":"Hand car wash"}]},"0k_JEcQzi2w":{"duration":60.651,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=0k_JEcQzi2w","annotations":[]},"2rgamh4uty8":{"duration":120.605,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2rgamh4uty8","annotations":[{"segment":[5.895619289612101,110.28938252948748],"label":"Waxing skis"}]},"J5xPmg3tmtg":{"duration":125.272,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=J5xPmg3tmtg","annotations":[{"segment":[75.79679875195008,109.61029797191888],"label":"Waxing skis"}]},"j_YetAlzwbk":{"duration":74.49,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=j_YetAlzwbk","annotations":[{"segment":[4.21290848,74.418333],"label":"Washing dishes"}]},"5cijPqKVc84":{"duration":142.59,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=5cijPqKVc84","annotations":[]},"1OmzECVyIww":{"duration":45.81,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1OmzECVyIww","annotations":[{"segment":[16.21653176,37.03991824],"label":"Using the pommel horse"}]},"3aQnQEL3USQ":{"duration":211.929,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3aQnQEL3USQ","annotations":[{"segment":[0.01,211.92854875283447],"label":"Powerbocking"}]},"n1LmL6_vxKs":{"duration":21.41,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=n1LmL6_vxKs","annotations":[{"segment":[1.3675834633385335,17.078115444617787],"label":"Clean and jerk"}]},"2I9xymLVssI":{"duration":215.133,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2I9xymLVssI","annotations":[{"segment":[115.24589408099686,117.26690031152647],"label":"Roof shingle removal"}]},"Yh6xzcNlAjo":{"duration":72.215,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Yh6xzcNlAjo","annotations":[{"segment":[1.0702551639480546,11.209514611876992],"label":"Baking cookies"}]},"NjFxN3xQqeY":{"duration":72.53999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NjFxN3xQqeY","annotations":[{"segment":[0.01,68.19734789391575],"label":"Mowing the lawn"}]},"G7zChU3ma0M":{"duration":62.23,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=G7zChU3ma0M","annotations":[{"segment":[2.653830988910711,4.300368372088281],"label":"Playing ten pins"},{"segment":[9.821111362742485,51.08140108236865],"label":"Playing ten pins"}]},"6koP53YFNjg":{"duration":39.939,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6koP53YFNjg","annotations":[{"segment":[0.010000000000000002,15.763817530516882],"label":"Beer pong"},{"segment":[31.627169724977218,39.46553198576939],"label":"Beer pong"}]},"ggNmtMAdqe0":{"duration":135.466,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=ggNmtMAdqe0","annotations":[{"segment":[8.65291887675507,42.63145397815913],"label":"Welding"},{"segment":[53.60588767550703,54.87216848673947],"label":"Welding"},{"segment":[73.65533385335414,77.66522308892357],"label":"Welding"},{"segment":[83.78558034321374,84.8408143525741],"label":"Welding"}]},"gE1nShlv6mo":{"duration":136.7,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=gE1nShlv6mo","annotations":[{"segment":[34.75154914196568,97.21905772230889],"label":"Ironing clothes"}]},"q9y-83399Ao":{"duration":139.97,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=q9y-83399Ao","annotations":[{"segment":[15.958529028855466,129.28774876520492],"label":"Swimming"}]},"nGYqSqf0yCY":{"duration":198.113,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nGYqSqf0yCY","annotations":[{"segment":[30.364930310118247,154.1081911387787],"label":"Rock-paper-scissors"}]},"3Edxa-t4MxA":{"duration":158.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3Edxa-t4MxA","annotations":[{"segment":[30.661242743587295,158.29884353741497],"label":"Cheerleading"},{"segment":[19.039642486739467,24.974076508580342],"label":"Cheerleading"}]},"QT2l100KJe0":{"duration":34.02,"subset":"training","resolution":"448x358","url":"https://www.youtube.com/watch?v=QT2l100KJe0","annotations":[{"segment":[6.580556580343214,16.13297742277691],"label":"Javelin throw"}]},"YAhHfaXnpKg":{"duration":88.306,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YAhHfaXnpKg","annotations":[{"segment":[31.249152886115446,54.11321840873635],"label":"Camel ride"}]},"rD3W6N2g0q4":{"duration":116.62,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rD3W6N2g0q4","annotations":[]},"8v2sAylgkgc":{"duration":63.414,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8v2sAylgkgc","annotations":[{"segment":[0.2,63.160999999999994],"label":"Swinging at the playground"}]},"9qWtHP_iSqA":{"duration":84.52,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9qWtHP_iSqA","annotations":[]},"o6Xr1PiL-Jg":{"duration":59.606,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=o6Xr1PiL-Jg","annotations":[{"segment":[4.2151017597367,56.76460651363078],"label":"Raking leaves"}]},"1tRTSOnylOs":{"duration":68.848,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=1tRTSOnylOs","annotations":[{"segment":[0.01,68.823944],"label":"Bullfighting"}]},"CmS2nSm5n2Q":{"duration":232.363,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CmS2nSm5n2Q","annotations":[{"segment":[22.83744372632048,30.44992496842731],"label":"Doing fencing"},{"segment":[75.76231331430128,158.4121096571754],"label":"Doing fencing"},{"segment":[160.94960340454435,176.53706390327613],"label":"Doing fencing"}]},"TCcopbAANQ0":{"duration":158.1,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TCcopbAANQ0","annotations":[{"segment":[18.00526209048362,147.98845553822153],"label":"Assembling bicycle"}]},"aFVthcfDK9Q":{"duration":31.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aFVthcfDK9Q","annotations":[{"segment":[8.063579563182527,23.94564508580343],"label":"Putting on makeup"}]},"f3YyN44Dx8M":{"duration":128.082,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=f3YyN44Dx8M","annotations":[{"segment":[14.186848717628704,125.48367598127925],"label":"Disc dog"}]},"3IPOOpGRl80":{"duration":191.821,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=3IPOOpGRl80","annotations":[]},"keylpQ6q1QU":{"duration":55.91,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=keylpQ6q1QU","annotations":[]},"ognXkeWL7h8":{"duration":189.41,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ognXkeWL7h8","annotations":[{"segment":[0.3754848217285132,180.92994498995282],"label":"Putting on makeup"}]},"ZvVnXvKlKbQ":{"duration":143.662,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZvVnXvKlKbQ","annotations":[{"segment":[35.859377885319496,41.23828456811742],"label":"Playing ten pins"},{"segment":[50.20312903944729,55.13379349867873],"label":"Playing ten pins"},{"segment":[60.51270018147664,80.23535801840237],"label":"Playing ten pins"},{"segment":[92.3378980546977,97.04444140214588],"label":"Playing ten pins"},{"segment":[102.87159030851029,116.54297678627145],"label":"Playing ten pins"}]},"ydRycaBjMVw":{"duration":237.07999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ydRycaBjMVw","annotations":[{"segment":[1.1093026521060843,84.31411544461778],"label":"Getting a piercing"},{"segment":[100.9550592823713,235.93160374414975],"label":"Getting a piercing"}]},"D-BRqQhL74Q":{"duration":44.002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D-BRqQhL74Q","annotations":[{"segment":[4.114914196567863,27.77567082683307],"label":"Tug of war"}]},"H1FKtaktOEc":{"duration":206.17000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H1FKtaktOEc","annotations":[{"segment":[38.59656355694228,41.16966779407176],"label":"Archery"},{"segment":[44.38604809048361,49.85389459438377],"label":"Archery"},{"segment":[52.74863686115444,53.713550950077995],"label":"Archery"},{"segment":[67.54398622464898,82.9826116474259],"label":"Archery"},{"segment":[86.84226800312011,88.12882012168487],"label":"Archery"},{"segment":[108.71365401872073,111.28675825585023],"label":"Archery"},{"segment":[113.21658643369734,117.07624278939159],"label":"Archery"},{"segment":[122.22245126365056,123.83064141185646],"label":"Archery"},{"segment":[136.05288653822151,144.73711333853353],"label":"Archery"},{"segment":[149.56168378315132,151.8131499906396],"label":"Archery"},{"segment":[11.900607096723869,14.473711333853352],"label":"Archery"},{"segment":[19.61991980811232,20.58483389703588],"label":"Archery"},{"segment":[25.08776631201248,35.380183260530416],"label":"Archery"}]},"bcOFV26B3jk":{"duration":20.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bcOFV26B3jk","annotations":[{"segment":[0.21244409999999997,7.82800062],"label":"Playing kickball"}]},"sFQ7AaId8zg":{"duration":45.721,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sFQ7AaId8zg","annotations":[{"segment":[0.2,45.52009070294784],"label":"Peeling potatoes"}]},"taO9hPOvMz0":{"duration":196.836,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=taO9hPOvMz0","annotations":[{"segment":[16.270503900156005,22.410316692667706],"label":"Surfing"},{"segment":[25.48022308892356,87.79932293291732],"label":"Surfing"},{"segment":[88.72029485179407,171.30077691107644],"label":"Surfing"},{"segment":[182.37097347893913,186.3622574102964],"label":"Surfing"}]},"g5rkuDdjRVI":{"duration":157.757,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g5rkuDdjRVI","annotations":[{"segment":[20.91926553252606,147.1731857464775],"label":"Longboarding"}]},"4taBobzpYNU":{"duration":120.117,"subset":"training","resolution":"640x320","url":"https://www.youtube.com/watch?v=4taBobzpYNU","annotations":[{"segment":[67.43728170360289,71.54703311686909],"label":"Sharpening knives"},{"segment":[108.34798769387669,113.01815970009753],"label":"Sharpening knives"}]},"Md9vHrFsggM":{"duration":181.186,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Md9vHrFsggM","annotations":[]},"dgao8t9JFBw":{"duration":110.2,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=dgao8t9JFBw","annotations":[]},"b_iUgvzb1-0":{"duration":44.1,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=b_iUgvzb1-0","annotations":[{"segment":[27.9289312823713,44.09469387755102],"label":"Shot put"}]},"IjULOynkK5I":{"duration":71.63,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IjULOynkK5I","annotations":[{"segment":[15.07134945397816,25.342195007800317],"label":"Spinning"},{"segment":[32.15221216848674,43.4278143525741],"label":"Spinning"},{"segment":[50.907669266770675,71.561],"label":"Spinning"}]},"4gEDonpfTeY":{"duration":175.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4gEDonpfTeY","annotations":[{"segment":[36.39899957903078,90.31330652106085],"label":"Tango"},{"segment":[96.88154699531981,103.99714084243371],"label":"Tango"},{"segment":[141.21717019656788,145.86967386583464],"label":"Tango"},{"segment":[151.06953090795633,160.1008615600624],"label":"Tango"}]},"vcE8e-fQBhs":{"duration":179.0,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=vcE8e-fQBhs","annotations":[{"segment":[92.96726209048363,178.955],"label":"Plataform diving"},{"segment":[0,92.21288030000001],"label":"Plataform diving"}]},"PJvOR4ue3tU":{"duration":100.75,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=PJvOR4ue3tU","annotations":[{"segment":[29.009235430000004,30.335439510140404],"label":"Shot put"},{"segment":[45.738926929797195,48.882495790951644],"label":"Shot put"}]},"rNb4Jz_t9F4":{"duration":149.792,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rNb4Jz_t9F4","annotations":[{"segment":[0.01,149.09087317506305],"label":"Waterskiing"}]},"OvGxDaayPcw":{"duration":41.26,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=OvGxDaayPcw","annotations":[{"segment":[0.7724528985959439,41.261859],"label":"Hopscotch"}]},"9nndNUHadcg":{"duration":47.18,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=9nndNUHadcg","annotations":[{"segment":[6.850991520000001,38.847126365054606],"label":"Mixing drinks"}]},"Ij5-awZLA44":{"duration":67.756,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ij5-awZLA44","annotations":[]},"hDf32SV0QN0":{"duration":86.24,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=hDf32SV0QN0","annotations":[{"segment":[0.01,0.02],"label":"Brushing hair"}]},"ywIG-f5f9Sg":{"duration":77.068,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ywIG-f5f9Sg","annotations":[]},"mG8h5rX3OnU":{"duration":66.363,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=mG8h5rX3OnU","annotations":[{"segment":[4.348253472996063,63.36026356942278],"label":"Decorating the Christmas tree"}]},"rAO-_VxIJng":{"duration":46.81,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rAO-_VxIJng","annotations":[{"segment":[1.0406958929684007,46.24799299577214],"label":"Walking the dog"}]},"WvpiEwrWmWE":{"duration":169.854,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WvpiEwrWmWE","annotations":[{"segment":[1.692920029791716,162.25576582418424],"label":"Baton twirling"}]},"lGRZ3F7tW2c":{"duration":140.76,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lGRZ3F7tW2c","annotations":[{"segment":[9.662109303419756,58.41184260703761],"label":"Layup drill in basketball"},{"segment":[88.93532358814352,139.22221025897036],"label":"Layup drill in basketball"}]},"QdmcjM2fluo":{"duration":160.729,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=QdmcjM2fluo","annotations":[]},"HlAjWgz7zZ4":{"duration":106.441,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HlAjWgz7zZ4","annotations":[{"segment":[23.009550681859057,84.92014028628557],"label":"Slacklining"}]},"88TLZbT_KkE":{"duration":126.363,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=88TLZbT_KkE","annotations":[{"segment":[15.573598225561675,79.44506436583994],"label":"Laying tile"},{"segment":[97.5814066032029,110.19799146333854],"label":"Laying tile"}]},"dbR5AzEZOPE":{"duration":191.84,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dbR5AzEZOPE","annotations":[{"segment":[49.36695919,136.59154138689547],"label":"Dodgeball"}]},"jAk-vBePtTU":{"duration":186.41,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jAk-vBePtTU","annotations":[{"segment":[29.66271292979719,181.46600851170047],"label":"Playing congas"}]},"cFYkr23rRv0":{"duration":185.6,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=cFYkr23rRv0","annotations":[]},"NSxYVWGnNuY":{"duration":122.21,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=NSxYVWGnNuY","annotations":[]},"_I1zlicAxpM":{"duration":13.21,"subset":"validation","resolution":"400x220","url":"https://www.youtube.com/watch?v=_I1zlicAxpM","annotations":[{"segment":[0.01,8.152064742589705],"label":"Ping-pong"}]},"cMuQUTKMc0k":{"duration":160.172,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=cMuQUTKMc0k","annotations":[{"segment":[0.01,126.68771031657593],"label":"Having an ice cream"}]},"KdqoLhoXjS8":{"duration":119.699,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KdqoLhoXjS8","annotations":[{"segment":[0.01,41.08207626264234],"label":"Grooming dog"},{"segment":[65.35784859965827,66.4782688613667],"label":"Grooming dog"}]},"eElKkhIJXCs":{"duration":60.19,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=eElKkhIJXCs","annotations":[{"segment":[11.526308892355695,54.8202496099844],"label":"Playing lacrosse"}]},"RZogaNvPuNs":{"duration":211.836,"subset":"validation","resolution":"360x480","url":"https://www.youtube.com/watch?v=RZogaNvPuNs","annotations":[{"segment":[109.84902336448599,184.40121339563865],"label":"Making a lemonade"}]},"Tmh2VNM2HAk":{"duration":205.06,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=Tmh2VNM2HAk","annotations":[]},"3Hgwyprv8u4":{"duration":67.34,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3Hgwyprv8u4","annotations":[{"segment":[16.679703505460218,43.63997898283932],"label":"Vacuuming floor"}]},"t-xIv68Ac4c":{"duration":233.85,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=t-xIv68Ac4c","annotations":[{"segment":[10.212836193447737,221.4186380655226],"label":"Zumba"}]},"z-1HUIQFGIg":{"duration":175.033,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=z-1HUIQFGIg","annotations":[{"segment":[75.04912946263006,87.5903644072829],"label":"Capoeira"},{"segment":[118.39817991282756,120.57926423681509],"label":"Capoeira"}]},"pgBfTJzVB98":{"duration":54.567,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pgBfTJzVB98","annotations":[{"segment":[33.96597551794071,34.561869825273014],"label":"Throwing darts"}]},"HUzoMRdSrZo":{"duration":85.938,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HUzoMRdSrZo","annotations":[{"segment":[5.228620666146646,73.87102530889236],"label":"Futsal"}]},"z-94IUxC4Xc":{"duration":139.599,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=z-94IUxC4Xc","annotations":[{"segment":[13.066929860867905,131.1048616723869],"label":"Longboarding"}]},"XKu57UKSqPc":{"duration":121.069,"subset":"validation","resolution":"300x200","url":"https://www.youtube.com/watch?v=XKu57UKSqPc","annotations":[{"segment":[15.158405895161852,121.041],"label":"Grooming dog"}]},"B_-jPPr2RLA":{"duration":29.63,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=B_-jPPr2RLA","annotations":[{"segment":[0.6672225599999999,29.17883619344774],"label":"Hopscotch"}]},"xICaX0kCCF4":{"duration":36.43,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=xICaX0kCCF4","annotations":[{"segment":[0.45469088299531984,29.10021651170047],"label":"Spinning"}]},"nPTq63WtemI":{"duration":7.199,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nPTq63WtemI","annotations":[{"segment":[0,7.198184],"label":"Kneeling"}]},"5MjAgMF_FzA":{"duration":109.158,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=5MjAgMF_FzA","annotations":[{"segment":[23.32985237776858,81.0584651957507],"label":"Making an omelette"}]},"Sb2SPX38lyQ":{"duration":167.46,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Sb2SPX38lyQ","annotations":[{"segment":[32.13395396224013,140.81464216692666],"label":"Doing motocross"}]},"7dx3MobORY0":{"duration":203.082,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=7dx3MobORY0","annotations":[{"segment":[27.83671915287402,121.152993585804],"label":"Baking cookies"}]},"Y0G_wA38HkI":{"duration":231.29,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y0G_wA38HkI","annotations":[{"segment":[0.01,206.3964278127925],"label":"Windsurfing"}]},"jMQP5ePI4eQ":{"duration":181.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jMQP5ePI4eQ","annotations":[{"segment":[3.963419656786271,171.5594508580343],"label":"Playing guitarra"}]},"BW7_eGchA_M":{"duration":14.88,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=BW7_eGchA_M","annotations":[]},"UPwDuuYlLfQ":{"duration":35.063,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UPwDuuYlLfQ","annotations":[{"segment":[0.01,35.062131],"label":"Grooming dog"}]},"As4WC0IZWos":{"duration":138.97,"subset":"testing","resolution":"304x176","url":"https://www.youtube.com/watch?v=As4WC0IZWos","annotations":[]},"r4iurK3kOe8":{"duration":70.055,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r4iurK3kOe8","annotations":[{"segment":[5.901637734477379,68.4152818848674],"label":"Disc dog"}]},"ieWgalZPc2g":{"duration":132.45,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=ieWgalZPc2g","annotations":[{"segment":[17.769727996879872,122.94172276911077],"label":"Doing motocross"}]},"_8H6x8OzjyQ":{"duration":173.082,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_8H6x8OzjyQ","annotations":[]},"zzxYEZkahBU":{"duration":56.077,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zzxYEZkahBU","annotations":[{"segment":[0.2,30.881271009360372],"label":"Shuffleboard"}]},"TcrLMpMA1WM":{"duration":91.44,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=TcrLMpMA1WM","annotations":[{"segment":[5.704898595943838,55.90800624024961],"label":"Starting a campfire"}]},"9pJZsi04xBk":{"duration":95.97,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=9pJZsi04xBk","annotations":[{"segment":[2.3206008185905667,60.1110470105879],"label":"Playing lacrosse"}]},"o0gdMKlKLcU":{"duration":17.09,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=o0gdMKlKLcU","annotations":[{"segment":[0.01,11.11775735101404],"label":"Plastering"}]},"LS8Nku9D3Xk":{"duration":75.86,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=LS8Nku9D3Xk","annotations":[]},"H6hb3SIqdL4":{"duration":15.001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H6hb3SIqdL4","annotations":[{"segment":[0,4.553681994126529],"label":"Throwing darts"}]},"-t2ikmhg9_w":{"duration":194.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-t2ikmhg9_w","annotations":[{"segment":[24.89204331981279,92.88982019344773],"label":"Vacuuming floor"},{"segment":[118.692548024961,140.85253780967238],"label":"Vacuuming floor"}]},"lLHAzwAs_9I":{"duration":197.115,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lLHAzwAs_9I","annotations":[{"segment":[39.65814196567863,85.16604836193447],"label":"Swinging at the playground"}]},"3dDTV-WVkqk":{"duration":124.483,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3dDTV-WVkqk","annotations":[]},"eil9mWGJB8E":{"duration":29.443,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=eil9mWGJB8E","annotations":[{"segment":[0.01,0.02],"label":"Arm wrestling"}]},"So4269j6yxM":{"duration":57.26,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=So4269j6yxM","annotations":[]},"rMWCaPh9UqE":{"duration":171.154,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rMWCaPh9UqE","annotations":[{"segment":[18.153532993279274,166.15008425299933],"label":"Cutting the grass"}]},"_roK9m9UOvM":{"duration":37.663,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=_roK9m9UOvM","annotations":[{"segment":[13.022499219968799,32.086968798751954],"label":"Putting on shoes"}]},"Rc_pWU_3WLk":{"duration":31.533,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Rc_pWU_3WLk","annotations":[{"segment":[12.217677570093459,12.855548286604362],"label":"Sumo"}]},"8eUBLvj3veI":{"duration":186.92,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8eUBLvj3veI","annotations":[{"segment":[69.69411665446209,160.96716389391574],"label":"Removing curlers"}]},"e4bcTIoiMIk":{"duration":209.468,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=e4bcTIoiMIk","annotations":[{"segment":[76.94016458658346,117.12547347893916],"label":"Sharpening knives"}]},"gGai6uu5Yjs":{"duration":115.915,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gGai6uu5Yjs","annotations":[{"segment":[22.965803007630512,115.91401360544218],"label":"Elliptical trainer"}]},"OMlpKhvmMpA":{"duration":32.508,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OMlpKhvmMpA","annotations":[{"segment":[0.01,32.401],"label":"Beer pong"}]},"FYuYu-WBqgY":{"duration":23.615,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=FYuYu-WBqgY","annotations":[]},"IGcalXmWUwA":{"duration":196.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IGcalXmWUwA","annotations":[{"segment":[16.839322416434072,20.207186899720888],"label":"Layup drill in basketball"},{"segment":[33.0663058359069,59.39688270524018],"label":"Layup drill in basketball"},{"segment":[71.94983214294558,82.05342559280602],"label":"Layup drill in basketball"}]},"QlQrJ5WsWzo":{"duration":177.076,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QlQrJ5WsWzo","annotations":[{"segment":[0.01,177.07539682539684],"label":"Camel ride"}]},"ZIvekQVbCZ4":{"duration":174.661,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZIvekQVbCZ4","annotations":[{"segment":[0.2,38.964761975513035],"label":"Rope skipping"},{"segment":[53.9512088891719,70.57254092068446],"label":"Rope skipping"},{"segment":[82.56169845161153,88.01131551112385],"label":"Rope skipping"},{"segment":[103.27024327775833,149.86446762870514],"label":"Rope skipping"},{"segment":[157.4939314352574,174.460225],"label":"Rope skipping"}]},"kx0ZSPOOFJ0":{"duration":9.57,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kx0ZSPOOFJ0","annotations":[{"segment":[2.863385335413416,8.812698907956317],"label":"Javelin throw"}]},"NjzUWVoc8rw":{"duration":7.34,"subset":"validation","resolution":"608x1080","url":"https://www.youtube.com/watch?v=NjzUWVoc8rw","annotations":[{"segment":[1.1048315132605304,7.095979719188767],"label":"Tennis serve with ball bouncing"}]},"bf3ac4bkIIo":{"duration":84.916,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bf3ac4bkIIo","annotations":[{"segment":[21.858091333853356,64.1170679126365],"label":"Making a lemonade"}]},"MQJYVBmPFmE":{"duration":29.536,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=MQJYVBmPFmE","annotations":[]},"mvomkmq32vU":{"duration":78.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mvomkmq32vU","annotations":[{"segment":[9.517628705148205,76.0796255850234],"label":"High jump"}]},"cmmJ3HqcaZE":{"duration":129.497,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=cmmJ3HqcaZE","annotations":[]},"qU_605hliNw":{"duration":172.385,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qU_605hliNw","annotations":[{"segment":[8.336869333276733,165.6616603307332],"label":"Waterskiing"}]},"5kBKAfEX7XA":{"duration":45.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5kBKAfEX7XA","annotations":[{"segment":[9.49516146645866,44.7730647425897],"label":"Using parallel bars"}]},"-qtdjw3MYcY":{"duration":15.419,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-qtdjw3MYcY","annotations":[{"segment":[0.01,15.418048],"label":"Braiding hair"}]},"ZSkiPiKaJAs":{"duration":117.377,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=ZSkiPiKaJAs","annotations":[]},"izSjI7hH1-Y":{"duration":103.19,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=izSjI7hH1-Y","annotations":[{"segment":[0.01,103.161],"label":"Getting a piercing"}]},"D_yO_40uREE":{"duration":112.524,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D_yO_40uREE","annotations":[{"segment":[6.57923166926677,104.14582449297971],"label":"Installing carpet"}]},"w9cJ3FNGw_U":{"duration":188.245,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=w9cJ3FNGw_U","annotations":[{"segment":[22.896346333853355,175.85088767550704],"label":"Calf roping"}]},"0qTzoVjsMpQ":{"duration":225.75,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0qTzoVjsMpQ","annotations":[{"segment":[208.48747188527,218.7005406093795],"label":"Smoking hookah"}]},"wJOHmxQZPR4":{"duration":224.166,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wJOHmxQZPR4","annotations":[{"segment":[26.22320592823713,47.55141341653667],"label":"Sumo"},{"segment":[71.32712012480499,99.6481825273011],"label":"Sumo"}]},"B4Zp5XN97Gc":{"duration":160.218,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=B4Zp5XN97Gc","annotations":[]},"w69pipCSlM4":{"duration":146.867,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w69pipCSlM4","annotations":[{"segment":[11.910595943837754,145.92467550702028],"label":"Waterskiing"}]},"EDDbcCBnTQM":{"duration":32.416,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=EDDbcCBnTQM","annotations":[]},"LRRy4naeNfE":{"duration":19.92,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=LRRy4naeNfE","annotations":[]},"ABmZdlZEZOY":{"duration":187.501,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ABmZdlZEZOY","annotations":[{"segment":[19.30589115444618,160.00488578003123],"label":"Scuba diving"}]},"EgGNBfkTmOo":{"duration":90.976,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EgGNBfkTmOo","annotations":[]},"u6HM4rYdllc":{"duration":199.266,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u6HM4rYdllc","annotations":[{"segment":[59.05739469578784,114.69567706708268],"label":"Hula hoop"}]},"t05CLPgsJY0":{"duration":54.1,"subset":"testing","resolution":"360x240","url":"https://www.youtube.com/watch?v=t05CLPgsJY0","annotations":[]},"21qQL15lUNY":{"duration":65.48,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=21qQL15lUNY","annotations":[{"segment":[1.123686416844429,60.168299956488056],"label":"Walking the dog"}]},"rXTwD2K27jo":{"duration":188.175,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rXTwD2K27jo","annotations":[]},"x2ztI3YNu9o":{"duration":140.95,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x2ztI3YNu9o","annotations":[{"segment":[9.894740361043011,53.2117143151326],"label":"Playing accordion"},{"segment":[62.44680522932917,87.95324680187207],"label":"Playing accordion"},{"segment":[112.58015590639626,128.19185721372855],"label":"Playing accordion"}]},"G8-soqkFiI8":{"duration":24.776,"subset":"training","resolution":"3200x1800","url":"https://www.youtube.com/watch?v=G8-soqkFiI8","annotations":[{"segment":[0.01,24.775419501133786],"label":"Building sandcastles"}]},"R4M90w2zPVU":{"duration":84.242,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=R4M90w2zPVU","annotations":[{"segment":[22.26682737169518,66.9314634525661],"label":"Playing ice hockey"}]},"WmQC309JCAI":{"duration":181.046,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=WmQC309JCAI","annotations":[{"segment":[42.366439555541405,55.35881435257409],"label":"Hurling"}]},"YSO5CGeeTmE":{"duration":106.975,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YSO5CGeeTmE","annotations":[{"segment":[23.45782122546401,94.9958653285381],"label":"Drum corps"}]},"AUFI2wx5Z48":{"duration":141.32,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=AUFI2wx5Z48","annotations":[{"segment":[1.2732571600000002,62.70473552],"label":"Doing nails"},{"segment":[76.88720928000001,115.55833267999999],"label":"Doing nails"}]},"0BHufmWSI6Y":{"duration":145.311,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0BHufmWSI6Y","annotations":[{"segment":[22.771794851794073,91.20047191887677],"label":"Laying tile"}]},"xnCw4tvy0uQ":{"duration":225.861,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xnCw4tvy0uQ","annotations":[{"segment":[0.01,72.23307686048939],"label":"Snow tubing"},{"segment":[122.26769595409667,223.04164708629165],"label":"Snow tubing"}]},"r6z6Ct16I_8":{"duration":66.15,"subset":"validation","resolution":"432x720","url":"https://www.youtube.com/watch?v=r6z6Ct16I_8","annotations":[{"segment":[0.6187301092043682,8.043491419656787],"label":"Doing nails"},{"segment":[25.169304212168484,29.398572542901714],"label":"Doing nails"},{"segment":[46.21249297971919,56.01201716068642],"label":"Doing nails"}]},"JGuVc7z_YOQ":{"duration":89.17,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=JGuVc7z_YOQ","annotations":[{"segment":[0.01,88.60823886288786],"label":"Playing kickball"}]},"PsCJt5fEzvQ":{"duration":188.616,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PsCJt5fEzvQ","annotations":[]},"0CyA_OyN7zU":{"duration":132.958,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=0CyA_OyN7zU","annotations":[]},"QCj7IGUGs2Y":{"duration":126.855,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QCj7IGUGs2Y","annotations":[{"segment":[0.01,126.26726677067083],"label":"Swinging at the playground"}]},"3CImIarFM30":{"duration":10.217,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3CImIarFM30","annotations":[{"segment":[0.20000000000000004,10.016757369614513],"label":"Spread mulch"}]},"bzJv-SZ41II":{"duration":208.237,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bzJv-SZ41II","annotations":[]},"8Fe4IEXgkXk":{"duration":14.54,"subset":"testing","resolution":"2276x720","url":"https://www.youtube.com/watch?v=8Fe4IEXgkXk","annotations":[]},"a-6lVnhqU4w":{"duration":127.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=a-6lVnhqU4w","annotations":[{"segment":[12.334277199999999,15.97166675],"label":"Javelin throw"},{"segment":[27.66936895,100.22364114999999],"label":"Javelin throw"}]},"JOBSEatasv4":{"duration":133.701,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JOBSEatasv4","annotations":[{"segment":[25.435572542901713,127.80332761310451],"label":"Doing crunches"}]},"QgjNH6sAziM":{"duration":90.326,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QgjNH6sAziM","annotations":[{"segment":[26.76295577583432,38.57803988798385],"label":"Grooming dog"},{"segment":[41.109843626301604,83.16591683502435],"label":"Grooming dog"}]},"Djlmto9iibw":{"duration":88.63,"subset":"training","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=Djlmto9iibw","annotations":[{"segment":[0,88.461],"label":"Bathing dog"}]},"1VemxWV-g8s":{"duration":70.64,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1VemxWV-g8s","annotations":[]},"C91n572ijKA":{"duration":129.73,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=C91n572ijKA","annotations":[]},"VgAP_Aq2A8g":{"duration":49.598,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=VgAP_Aq2A8g","annotations":[]},"NrlITLsd7Fk":{"duration":12.21,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=NrlITLsd7Fk","annotations":[{"segment":[0.1507644305772231,11.665397815912636],"label":"Using the pommel horse"}]},"s46pci16zS8":{"duration":136.696,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s46pci16zS8","annotations":[]},"KGTPkiDRpfE":{"duration":40.22,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=KGTPkiDRpfE","annotations":[{"segment":[3.5971138845553825,39.38057722308892],"label":"Kayaking"}]},"yFOZHa2pn1g":{"duration":178.38,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=yFOZHa2pn1g","annotations":[{"segment":[105.88696750000001,157.18488010140405],"label":"Bungee jumping"},{"segment":[8.2730325,99.59679128549142],"label":"Bungee jumping"}]},"7-UlUnaBjXU":{"duration":46.37,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=7-UlUnaBjXU","annotations":[]},"BR9dr2iOyNc":{"duration":102.03,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=BR9dr2iOyNc","annotations":[{"segment":[0,101.961],"label":"Cleaning shoes"}]},"MYB_4V-Pddc":{"duration":206.59,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MYB_4V-Pddc","annotations":[]},"DJNmAoSQu6k":{"duration":147.912,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DJNmAoSQu6k","annotations":[{"segment":[0.01,142.473398859959],"label":"Rock-paper-scissors"}]},"nKPkHO9ajs8":{"duration":52.76,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=nKPkHO9ajs8","annotations":[{"segment":[2.222160527237416,50.368971950714766],"label":"Playing lacrosse"}]},"vmlxwwXwlEE":{"duration":136.905,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=vmlxwwXwlEE","annotations":[]},"vvHrSeomFtg":{"duration":214.81,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vvHrSeomFtg","annotations":[{"segment":[19.441400936037443,206.48108580343214],"label":"Making a sandwich"}]},"ZL7xefcDWYc":{"duration":51.41,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZL7xefcDWYc","annotations":[{"segment":[3.72810816,40.504402750390014],"label":"Using uneven bars"}]},"xDRvD_-fvd8":{"duration":77.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xDRvD_-fvd8","annotations":[{"segment":[24.347460218408735,29.771399375975037],"label":"Doing fencing"},{"segment":[35.556934477379095,37.605978159126366],"label":"Doing fencing"}]},"twfpk8zB0rc":{"duration":101.15,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=twfpk8zB0rc","annotations":[]},"D4LoDBsIFjw":{"duration":156.085,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=D4LoDBsIFjw","annotations":[{"segment":[52.81834165366614,61.824234009360374],"label":"Canoeing"},{"segment":[72.04713884555382,86.16448361934476],"label":"Canoeing"},{"segment":[97.11759594383774,99.79502340093603],"label":"Canoeing"},{"segment":[110.27546489859594,112.46636817472698],"label":"Canoeing"},{"segment":[125.12492043681746,145.32991731669267],"label":"Canoeing"}]},"cH3H-arDsH4":{"duration":62.65,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cH3H-arDsH4","annotations":[]},"pbPWMH4Ep2A":{"duration":233.616,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pbPWMH4Ep2A","annotations":[]},"oZa-yum3mcU":{"duration":126.25,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oZa-yum3mcU","annotations":[{"segment":[31.506021840873636,42.73004212168487],"label":"Horseback riding"},{"segment":[49.818897035881434,61.43674258970359],"label":"Horseback riding"},{"segment":[69.70707332293291,101.60692043681748],"label":"Horseback riding"}]},"0S0fNrI4oVs":{"duration":85.821,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=0S0fNrI4oVs","annotations":[{"segment":[51.911494539781586,64.77067706708269],"label":"Doing fencing"}]},"fG0nn2IVdDM":{"duration":55.055,"subset":"validation","resolution":"328x240","url":"https://www.youtube.com/watch?v=fG0nn2IVdDM","annotations":[{"segment":[0.2,11.76672107428515],"label":"Kite flying"},{"segment":[28.343195990639625,40.28169369578783],"label":"Kite flying"},{"segment":[43.803121076443055,46.980994566302655],"label":"Kite flying"}]},"c3vEqTcKPsU":{"duration":79.784,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c3vEqTcKPsU","annotations":[{"segment":[19.416953527120675,23.026515062402492],"label":"Mooping floor"},{"segment":[26.63607688299532,29.623300458658345],"label":"Mooping floor"}]},"omuYi2Vhgjo":{"duration":233.64,"subset":"training","resolution":"480x272","url":"https://www.youtube.com/watch?v=omuYi2Vhgjo","annotations":[{"segment":[33.39869505,157.81379894],"label":"Playing piano"},{"segment":[169.90695358,226.78196113104525],"label":"Playing piano"}]},"mTFMQTutS2w":{"duration":105.349,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=mTFMQTutS2w","annotations":[]},"5SpWmZxECqc":{"duration":214.25,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=5SpWmZxECqc","annotations":[{"segment":[23.731336848673944,101.61023101404056],"label":"Brushing hair"},{"segment":[119.3251726053042,207.23139219968797],"label":"Brushing hair"}]},"Ds2O0-8Labs":{"duration":100.15,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ds2O0-8Labs","annotations":[]},"D1E_KJRxGvQ":{"duration":96.67,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D1E_KJRxGvQ","annotations":[{"segment":[4.524067347893916,90.4813469578783],"label":"Using the balance beam"}]},"a8dUtKcAunw":{"duration":117.03,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=a8dUtKcAunw","annotations":[{"segment":[4.379095151326053,110.93707716692667],"label":"Playing guitarra"}]},"iYxj8a1TPYk":{"duration":157.41,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iYxj8a1TPYk","annotations":[{"segment":[23.546662719999997,107.45929343999998],"label":"Layup drill in basketball"}]},"vruB4ViPGuE":{"duration":18.02,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=vruB4ViPGuE","annotations":[{"segment":[0.8976474258970359,11.893828393135726],"label":"Drinking coffee"}]},"B4zPg1ftxf4":{"duration":213.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B4zPg1ftxf4","annotations":[{"segment":[30.281421216848674,198.6780608424337],"label":"Paintball"}]},"LHiui4s2X1s":{"duration":69.684,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LHiui4s2X1s","annotations":[{"segment":[0.01,69.68308390022676],"label":"Beer pong"}]},"eiN5XPmwcEM":{"duration":216.41,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eiN5XPmwcEM","annotations":[]},"TLJdzU44My4":{"duration":44.26,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=TLJdzU44My4","annotations":[{"segment":[2.589151638065523,25.649862227769113],"label":"Vacuuming floor"}]},"aaMrQi25WHc":{"duration":74.095,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aaMrQi25WHc","annotations":[{"segment":[16.414097672386895,68.08382766926677],"label":"Surfing"}]},"sBShp5KeRS8":{"duration":118.93,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sBShp5KeRS8","annotations":[]},"Kp7pUEKrb8Q":{"duration":89.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Kp7pUEKrb8Q","annotations":[{"segment":[8.94885631201248,89.62839002267573],"label":"Making a lemonade"}]},"JcsnMUVBlac":{"duration":165.42000000000002,"subset":"validation","resolution":"480x272","url":"https://www.youtube.com/watch?v=JcsnMUVBlac","annotations":[{"segment":[0,150.19320623400938],"label":"Playing piano"}]},"IIPKxzPjrWg":{"duration":103.77,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=IIPKxzPjrWg","annotations":[]},"TjMdEjgAKLw":{"duration":51.874,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TjMdEjgAKLw","annotations":[{"segment":[0.2,50.791260124610595],"label":"Plastering"}]},"G6W6wen6n7w":{"duration":46.95,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=G6W6wen6n7w","annotations":[{"segment":[1.0954680187207488,46.15571918876755],"label":"High jump"}]},"63aZHbMd-KY":{"duration":99.057,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=63aZHbMd-KY","annotations":[]},"j_NshVqDA-8":{"duration":32.3,"subset":"testing","resolution":"714x480","url":"https://www.youtube.com/watch?v=j_NshVqDA-8","annotations":[]},"CRH5U5XKb2Q":{"duration":225.16,"subset":"validation","resolution":"854x474","url":"https://www.youtube.com/watch?v=CRH5U5XKb2Q","annotations":[{"segment":[4.74100624024961,217.20832293291733],"label":"Playing guitarra"}]},"HQGwzM22zfg":{"duration":226.79,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HQGwzM22zfg","annotations":[]},"80pIiAmVa5Q":{"duration":43.84,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=80pIiAmVa5Q","annotations":[{"segment":[7.31155538221529,43.18600936037441],"label":"Fun sliding down"}]},"tSUzEX6ps0s":{"duration":132.89,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=tSUzEX6ps0s","annotations":[{"segment":[2.0731280135559165,25.914100169448954],"label":"Making a sandwich"},{"segment":[67.99859884463405,113.40010234150863],"label":"Making a sandwich"}]},"JH-Q5JNTgwE":{"duration":215.691,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JH-Q5JNTgwE","annotations":[{"segment":[7.4027691284522135,10.767664186839584],"label":"Playing blackjack"},{"segment":[104.31174681000846,115.07941099684805],"label":"Playing blackjack"},{"segment":[119.11728506691287,123.15515913697773],"label":"Playing blackjack"},{"segment":[201.89370350324216,202.5666822870515],"label":"Playing blackjack"}]},"af51-Bv-4Gs":{"duration":60.26,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=af51-Bv-4Gs","annotations":[]},"7gzD-qp0Faw":{"duration":227.486,"subset":"testing","resolution":"540x360","url":"https://www.youtube.com/watch?v=7gzD-qp0Faw","annotations":[]},"iPc-u9aU_SQ":{"duration":157.78,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iPc-u9aU_SQ","annotations":[{"segment":[22.631282371294855,157.681],"label":"Changing car wheel"}]},"pB7kDdqxcBI":{"duration":83.43,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=pB7kDdqxcBI","annotations":[]},"kWh_zSLpZuQ":{"duration":109.25,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kWh_zSLpZuQ","annotations":[{"segment":[64.23762402496101,78.72090795631826],"label":"Layup drill in basketball"},{"segment":[83.32148049921997,101.5533790951638],"label":"Layup drill in basketball"}]},"nvsZFU73npI":{"duration":173.755,"subset":"training","resolution":"640x352","url":"https://www.youtube.com/watch?v=nvsZFU73npI","annotations":[{"segment":[0.01,173.75492063492064],"label":"Sumo"}]},"Ke29p3rcs1k":{"duration":151.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ke29p3rcs1k","annotations":[{"segment":[68.46582888,129.69957431045242],"label":"Polishing forniture"}]},"u0p_dBCEDs4":{"duration":140.39,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=u0p_dBCEDs4","annotations":[{"segment":[59.53859076197593,132.79391844110367],"label":"Preparing pasta"}]},"qywEvklc1kM":{"duration":109.483,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qywEvklc1kM","annotations":[{"segment":[21.512583463338533,59.93886271450859],"label":"Making an omelette"}]},"ijNy8Fh5HfU":{"duration":110.202,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ijNy8Fh5HfU","annotations":[]},"LsK452h29ng":{"duration":164.328,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LsK452h29ng","annotations":[{"segment":[34.08638533541342,153.26058970358815],"label":"Using the rowing machine"}]},"j4ryUBchNiE":{"duration":36.08,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=j4ryUBchNiE","annotations":[]},"CrWlXxqj4ac":{"duration":167.556,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=CrWlXxqj4ac","annotations":[{"segment":[3.920948348138007,147.1662598018721],"label":"Decorating the Christmas tree"}]},"Wr69bnB86d0":{"duration":95.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Wr69bnB86d0","annotations":[{"segment":[59.632090483619336,81.43785491419656],"label":"Drinking beer"}]},"BdRH3lpopLQ":{"duration":58.91,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BdRH3lpopLQ","annotations":[]},"gU67yCVXHnQ":{"duration":75.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gU67yCVXHnQ","annotations":[{"segment":[3.515595317690259,72.65563628705148],"label":"Brushing hair"}]},"sJ0Tc4nthOA":{"duration":61.905,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sJ0Tc4nthOA","annotations":[{"segment":[7.575801092043681,54.86424102964118],"label":"Doing fencing"}]},"PjSOjB7qhcA":{"duration":31.58,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PjSOjB7qhcA","annotations":[{"segment":[7.67559174,28.9737325],"label":"Dodgeball"}]},"CZRkT9Lno3k":{"duration":51.15,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=CZRkT9Lno3k","annotations":[{"segment":[31.57439309986841,40.57805691139489],"label":"Pole vault"}]},"3svMy4Kfjd8":{"duration":116.983,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3svMy4Kfjd8","annotations":[{"segment":[0,33.41260215910337],"label":"Waterskiing"},{"segment":[34.32509928483874,116.81483893278437],"label":"Waterskiing"}]},"vSv1qMMt4Bk":{"duration":201.99,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vSv1qMMt4Bk","annotations":[]},"6wTk8QqWxuo":{"duration":49.13,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=6wTk8QqWxuo","annotations":[{"segment":[0.15330241508980086,48.9034703775351],"label":"Mowing the lawn"}]},"sFMa1EP7d2M":{"duration":221.101,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sFMa1EP7d2M","annotations":[{"segment":[53.44767550702028,208.27540717628705],"label":"Assembling bicycle"}]},"iPU4muYW14Y":{"duration":180.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iPU4muYW14Y","annotations":[{"segment":[41.45862812,88.30036875488625],"label":"Tai chi"},{"segment":[12.970700079999999,42.181704819213174],"label":"Tai chi"}]},"FIwKvp1H_UM":{"duration":83.592,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FIwKvp1H_UM","annotations":[]},"eJSSRVy5n88":{"duration":9.196,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eJSSRVy5n88","annotations":[]},"6Pj1Ocz8MUo":{"duration":101.402,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6Pj1Ocz8MUo","annotations":[{"segment":[0.01,101.241],"label":"Longboarding"}]},"6LGYaYsMIbc":{"duration":36.73,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6LGYaYsMIbc","annotations":[]},"JqFFXs0RAnI":{"duration":38.546,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=JqFFXs0RAnI","annotations":[{"segment":[0.2,16.175722444617783],"label":"Playing rubik cube"}]},"Si4_ER4lUME":{"duration":39.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Si4_ER4lUME","annotations":[{"segment":[3.4403744149765996,36.17624385],"label":"Kayaking"}]},"sxQbiXWFdKs":{"duration":171.271,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sxQbiXWFdKs","annotations":[{"segment":[35.002201865834635,37.406933291731676],"label":"River tubing"},{"segment":[40.61324185959439,42.7507809048362],"label":"River tubing"},{"segment":[46.758666614664584,52.10251422776911],"label":"River tubing"},{"segment":[70.00440373166927,72.40913515756631],"label":"River tubing"},{"segment":[82.5624456224649,86.03594657098283],"label":"River tubing"},{"segment":[91.37979418408737,93.51733322932917],"label":"River tubing"},{"segment":[105.80818273946959,110.61764559126365],"label":"River tubing"},{"segment":[154.704388399376,158.17788934789394],"label":"River tubing"}]},"6E4_XOsT89k":{"duration":13.52,"subset":"training","resolution":"136x240","url":"https://www.youtube.com/watch?v=6E4_XOsT89k","annotations":[{"segment":[0.5466993,13.141028990639626],"label":"Plataform diving"}]},"8v4NoUJMssI":{"duration":55.26,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8v4NoUJMssI","annotations":[{"segment":[30.692183444617786,49.48683510452418],"label":"Drinking coffee"}]},"lneRTkBTPwg":{"duration":79.854,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=lneRTkBTPwg","annotations":[{"segment":[0,79.821],"label":"Doing fencing"}]},"cErtrZrWhiQ":{"duration":84.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cErtrZrWhiQ","annotations":[{"segment":[8.707374131045242,83.90742344461778],"label":"Playing accordion"}]},"8Nv52hFr0tg":{"duration":123.62,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=8Nv52hFr0tg","annotations":[{"segment":[11.15326706,110.3692472698908],"label":"Using the balance beam"}]},"apwnQn2dGv0":{"duration":20.666,"subset":"testing","resolution":"360x240","url":"https://www.youtube.com/watch?v=apwnQn2dGv0","annotations":[]},"2Ks8gsK22PA":{"duration":109.506,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=2Ks8gsK22PA","annotations":[{"segment":[8.570090602518242,81.19263039294803],"label":"Playing drums"}]},"4Fxj9ObTMw8":{"duration":17.3,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=4Fxj9ObTMw8","annotations":[{"segment":[0.01,1.2198859957490606],"label":"Washing face"}]},"cy8dcwxvxdY":{"duration":25.52,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=cy8dcwxvxdY","annotations":[]},"px-_NixBgHM":{"duration":44.7,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=px-_NixBgHM","annotations":[]},"13ubLHzEW8A":{"duration":198.95,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=13ubLHzEW8A","annotations":[]},"FmHDfTBXEE0":{"duration":187.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FmHDfTBXEE0","annotations":[]},"N6vpa6BarX4":{"duration":184.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N6vpa6BarX4","annotations":[{"segment":[0,37.44752459486135],"label":"Sailing"},{"segment":[62.796618166767495,82.38455410869496],"label":"Sailing"}]},"djpr7UMlnSw":{"duration":23.987,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=djpr7UMlnSw","annotations":[{"segment":[0.19999999999999998,23.362909516380654],"label":"Laying tile"}]},"G1LGXWN_9v4":{"duration":129.452,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=G1LGXWN_9v4","annotations":[{"segment":[0.2,129.20100000000002],"label":"Playing congas"}]},"lVe-7FnfNz8":{"duration":17.253,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lVe-7FnfNz8","annotations":[{"segment":[0.009999999999999998,17.252425],"label":"Using the monkey bar"}]},"s5oN3d77e50":{"duration":71.87,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=s5oN3d77e50","annotations":[{"segment":[0.01,71.761],"label":"Doing motocross"}]},"B0rorWq-j44":{"duration":47.648,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=B0rorWq-j44","annotations":[{"segment":[18.480045023364486,27.608741962616822],"label":"Mooping floor"}]},"Wt7Ca_mHbL0":{"duration":25.78,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Wt7Ca_mHbL0","annotations":[{"segment":[13.156521060842433,17.78343213728549],"label":"Springboard diving"}]},"mOvBVmQrexM":{"duration":229.646,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mOvBVmQrexM","annotations":[]},"mhqfGS59FG0":{"duration":79.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mhqfGS59FG0","annotations":[{"segment":[10.11146723868955,76.85955772230889],"label":"Playing racquetball"}]},"ankROxowV70":{"duration":195.257,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ankROxowV70","annotations":[]},"Jix3poZOeZA":{"duration":7.9,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=Jix3poZOeZA","annotations":[]},"aWz9H0jTv3c":{"duration":170.55,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=aWz9H0jTv3c","annotations":[{"segment":[2.9946571487100804,137.9474242243877],"label":"Playing squash"},{"segment":[149.63624550116077,161.05941271144115],"label":"Playing squash"}]},"LsVCwr6qPmY":{"duration":154.62,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=LsVCwr6qPmY","annotations":[]},"xaG1pLsiYqo":{"duration":43.909,"subset":"testing","resolution":"854x476","url":"https://www.youtube.com/watch?v=xaG1pLsiYqo","annotations":[]},"73n-KuFkH94":{"duration":179.7,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=73n-KuFkH94","annotations":[{"segment":[16.674851794071763,22.560093603744146],"label":"Triple jump"},{"segment":[27.044087363494537,29.286084243369732],"label":"Triple jump"},{"segment":[35.73182527301092,41.617067082683306],"label":"Triple jump"},{"segment":[79.45076443057722,86.7372542901716],"label":"Triple jump"},{"segment":[94.02374414976599,109.15722308892356],"label":"Triple jump"},{"segment":[112.52021840873634,147.83166926677066],"label":"Triple jump"},{"segment":[151.19466458658346,157.64040561622465],"label":"Triple jump"}]},"98buk_vE_z4":{"duration":176.728,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=98buk_vE_z4","annotations":[{"segment":[9.098274833073322,172.86722182839313],"label":"Playing congas"}]},"kgPvfYhmn54":{"duration":183.019,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=kgPvfYhmn54","annotations":[{"segment":[8.05991541577041,11.765921448505416],"label":"Rafting"},{"segment":[83.32034491805852,108.69223215949155],"label":"Rafting"},{"segment":[126.9371847825445,132.63873247724857],"label":"Rafting"}]},"N_gMG044zh4":{"duration":221.078,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N_gMG044zh4","annotations":[]},"27e4y89XeyI":{"duration":20.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=27e4y89XeyI","annotations":[{"segment":[4.324127522620905,16.16847682371295],"label":"Ironing clothes"}]},"uyp6t45rOD8":{"duration":117.68,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uyp6t45rOD8","annotations":[{"segment":[1.562342433697348,114.78621762870516],"label":"High jump"}]},"Gn7Ha7l1uOk":{"duration":203.268,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gn7Ha7l1uOk","annotations":[{"segment":[15.53838793551742,194.3884041729016],"label":"Playing drums"}]},"kZJPGcmq3J8":{"duration":97.75999999999999,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kZJPGcmq3J8","annotations":[]},"G_qDsLIVhaA":{"duration":30.14,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G_qDsLIVhaA","annotations":[]},"lipavQnHm_g":{"duration":74.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lipavQnHm_g","annotations":[{"segment":[44.72914196567863,52.670608424336976],"label":"Playing racquetball"}]},"zqcJ0N_a6y8":{"duration":81.108,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zqcJ0N_a6y8","annotations":[{"segment":[0.01,81.081],"label":"Tug of war"}]},"tQ9yPS_BJHQ":{"duration":60.721,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tQ9yPS_BJHQ","annotations":[{"segment":[0.2,58.144171606864276],"label":"Riding bumper cars"}]},"y0_lTTdKkro":{"duration":96.99,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=y0_lTTdKkro","annotations":[{"segment":[24.360920189188516,86.24673607352457],"label":"Futsal"}]},"_5by7Is4JEU":{"duration":82.431,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_5by7Is4JEU","annotations":[{"segment":[0.2,18.382750390015598],"label":"Rope skipping"},{"segment":[40.750572542901715,63.889698907956316],"label":"Rope skipping"},{"segment":[71.98839313572543,75.84491419656786],"label":"Rope skipping"}]},"ABBA086Gmq0":{"duration":200.57,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ABBA086Gmq0","annotations":[{"segment":[10.63886754446178,197.1319574414977],"label":"Doing motocross"}]},"W3fKTlm1BPs":{"duration":236.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W3fKTlm1BPs","annotations":[{"segment":[17.35241077398198,228.16574076131045],"label":"Playing lacrosse"}]},"PPXrVuZAb74":{"duration":10.08,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=PPXrVuZAb74","annotations":[{"segment":[1.5139843993759752,7.317591263650546],"label":"Plataform diving"}]},"Qdf3iuE0tYA":{"duration":100.566,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Qdf3iuE0tYA","annotations":[]},"m12BPEN6Y3s":{"duration":180.396,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=m12BPEN6Y3s","annotations":[{"segment":[15.755219968798754,170.51289547581902],"label":"Baking cookies"}]},"1IbkFHNA7fg":{"duration":56.33,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1IbkFHNA7fg","annotations":[]},"mglEC2-MH14":{"duration":225.1,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mglEC2-MH14","annotations":[{"segment":[134.84564381758943,144.3269781485137],"label":"Removing curlers"}]},"0DFbXnXdGIQ":{"duration":130.43,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0DFbXnXdGIQ","annotations":[]},"Sbgh1UQcG24":{"duration":196.93,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Sbgh1UQcG24","annotations":[{"segment":[3.3950387199999996,190.8922861450858],"label":"Preparing salad"}]},"R7vb6cuP6GE":{"duration":79.181,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=R7vb6cuP6GE","annotations":[{"segment":[24.21105869578783,75.47426970982839],"label":"Snowboarding"}]},"enx7eEDrYcc":{"duration":14.094,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=enx7eEDrYcc","annotations":[{"segment":[0,13.84385647425897],"label":"Kneeling"}]},"N1c3C_Npr-E":{"duration":65.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N1c3C_Npr-E","annotations":[{"segment":[0.2,65.80535],"label":"Horseback riding"}]},"6ffxjwTIZHk":{"duration":105.767,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6ffxjwTIZHk","annotations":[{"segment":[31.350453691617055,55.935809481358845],"label":"Spread mulch"}]},"306mYrvdzIk":{"duration":229.39,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=306mYrvdzIk","annotations":[{"segment":[0,207.65668614],"label":"Volleyball"}]},"6hOaMdO2-bY":{"duration":49.13,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=6hOaMdO2-bY","annotations":[]},"KoCjfxd302Q":{"duration":121.95,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=KoCjfxd302Q","annotations":[{"segment":[2.283013998110945,119.47773256780611],"label":"Washing dishes"}]},"-Mpj6_BEodc":{"duration":79.32,"subset":"testing","resolution":"400x240","url":"https://www.youtube.com/watch?v=-Mpj6_BEodc","annotations":[]},"mWj6lO9PZCs":{"duration":145.102,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mWj6lO9PZCs","annotations":[{"segment":[16.07208464665117,122.01202153666146],"label":"Cutting the grass"}]},"OTm43dbEEuE":{"duration":71.19,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=OTm43dbEEuE","annotations":[{"segment":[11.5507135975039,70.52599167706708],"label":"Cheerleading"}]},"mtXxvToU6qA":{"duration":70.055,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mtXxvToU6qA","annotations":[]},"seQE5VZt3K0":{"duration":181.859,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=seQE5VZt3K0","annotations":[{"segment":[0.01,181.801],"label":"Laying tile"}]},"5Ia1MCFut_8":{"duration":18.762,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=5Ia1MCFut_8","annotations":[{"segment":[11.494227769110765,17.09547581903276],"label":"Fun sliding down"}]},"VgQxsmHHD4I":{"duration":71.53999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VgQxsmHHD4I","annotations":[]},"ZIbrxshdMu0":{"duration":139.785,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=ZIbrxshdMu0","annotations":[{"segment":[15.513697120666501,29.44436378733317],"label":"Croquet"},{"segment":[65.7946971206665,76.67803045399984],"label":"Croquet"}]},"VE9MAMmF1wc":{"duration":110.5,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=VE9MAMmF1wc","annotations":[{"segment":[65.66620826833073,96.64895397815913],"label":"Bungee jumping"}]},"WkWCbz54JSU":{"duration":143.268,"subset":"training","resolution":"220x166","url":"https://www.youtube.com/watch?v=WkWCbz54JSU","annotations":[{"segment":[26.5144403040506,90.6838495286427],"label":"Bullfighting"},{"segment":[100.93313020360381,104.72090784435032],"label":"Bullfighting"}]},"OAHL9M7QNSY":{"duration":90.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OAHL9M7QNSY","annotations":[]},"wBbSb9Ubis4":{"duration":200.249,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=wBbSb9Ubis4","annotations":[{"segment":[0.01,195.2504766857341],"label":"Bullfighting"}]},"F6cNWYlfUs8":{"duration":47.114,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F6cNWYlfUs8","annotations":[{"segment":[0.01,37.11733296542746],"label":"Waxing skis"}]},"m4ef0fCA2WU":{"duration":31.719,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m4ef0fCA2WU","annotations":[{"segment":[12.257235569422777,27.18338533541342],"label":"Swinging at the playground"}]},"MFx2omPfM2o":{"duration":108.066,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=MFx2omPfM2o","annotations":[{"segment":[0.2,96.13306528994438],"label":"Riding bumper cars"}]},"dOUCAVnJLko":{"duration":206.29,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=dOUCAVnJLko","annotations":[{"segment":[6.919111790321953,109.25760245647922],"label":"Bungee jumping"}]},"lc_xgozMTWo":{"duration":54.36,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lc_xgozMTWo","annotations":[]},"KTIzccovgeg":{"duration":65.25,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=KTIzccovgeg","annotations":[{"segment":[26.872840474258968,46.00956020592823],"label":"Shoveling snow"}]},"jE2jDJ9WHeE":{"duration":80.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jE2jDJ9WHeE","annotations":[{"segment":[10.06220592,68.37630826],"label":"Playing lacrosse"}]},"AL1W3HkjJ2Y":{"duration":45.67,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=AL1W3HkjJ2Y","annotations":[]},"A8xThM3onkc":{"duration":73.24,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=A8xThM3onkc","annotations":[{"segment":[8.734645046021841,65.70964998673948],"label":"Mixing drinks"}]},"fgFU8vdsA8k":{"duration":142.73,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=fgFU8vdsA8k","annotations":[{"segment":[70.36450444140215,142.5330612244898],"label":"Bungee jumping"}]},"O7SljQqahy8":{"duration":37.106,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=O7SljQqahy8","annotations":[{"segment":[0.20000000000000004,36.881],"label":"Cleaning sink"}]},"at3YFTd6nEw":{"duration":222.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=at3YFTd6nEw","annotations":[{"segment":[45.55122855798585,180.46631772209665],"label":"Getting a piercing"}]},"n5xfkpQ8PWM":{"duration":15.23,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=n5xfkpQ8PWM","annotations":[{"segment":[1.7005004799999999,7.16650368],"label":"Wrapping presents"}]},"CF2HhBzVSaA":{"duration":219.92000000000002,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CF2HhBzVSaA","annotations":[]},"2aHetC-N-P4":{"duration":180.35,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2aHetC-N-P4","annotations":[{"segment":[0.01,180.34938775510204],"label":"Cutting the grass"}]},"fg_R9Vrr1KI":{"duration":50.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fg_R9Vrr1KI","annotations":[{"segment":[1.2640922808112325,44.00621252574103],"label":"Using parallel bars"}]},"96krk6Ka9Vc":{"duration":115.752,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=96krk6Ka9Vc","annotations":[{"segment":[56.34081008424337,91.55381638689548],"label":"Hurling"}]},"MmYNcmba_Ps":{"duration":30.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MmYNcmba_Ps","annotations":[{"segment":[11.808983619344772,22.406180187207486],"label":"Plataform diving"}]},"2Lrcn6XiiSY":{"duration":209.328,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2Lrcn6XiiSY","annotations":[]},"KVaTsulE5Z0":{"duration":110.922,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KVaTsulE5Z0","annotations":[{"segment":[7.671219046811601,101.48815146269945],"label":"Slacklining"}]},"mXbrIyhXbQo":{"duration":31.231,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mXbrIyhXbQo","annotations":[{"segment":[0.01,31.181],"label":"Powerbocking"}]},"PfhsPS1-NB8":{"duration":107.16,"subset":"testing","resolution":"608x1080","url":"https://www.youtube.com/watch?v=PfhsPS1-NB8","annotations":[]},"oYDD-65IT4E":{"duration":137.509,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oYDD-65IT4E","annotations":[]},"x3WVWHn3teI":{"duration":129.55,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=x3WVWHn3teI","annotations":[{"segment":[13.742590694103956,29.101956763984848],"label":"Doing motocross"}]},"-boJqGIgV9I":{"duration":67.408,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-boJqGIgV9I","annotations":[{"segment":[36.38534218720749,44.1671783775351],"label":"Throwing darts"}]},"ij9czNGMR5I":{"duration":55.57,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=ij9czNGMR5I","annotations":[]},"5PkSdg7BdnU":{"duration":89.722,"subset":"testing","resolution":"352x240","url":"https://www.youtube.com/watch?v=5PkSdg7BdnU","annotations":[]},"x06fLYM58wM":{"duration":143.04,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=x06fLYM58wM","annotations":[{"segment":[2.67771742706443,133.88587113884554],"label":"Ballet"}]},"4qONyTNaPOI":{"duration":204.428,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=4qONyTNaPOI","annotations":[{"segment":[37.94299977713395,65.36399121270973],"label":"Waterskiing"},{"segment":[72.0598147027922,85.13261294533415],"label":"Waterskiing"},{"segment":[97.56771371263014,125.30755388582891],"label":"Waterskiing"},{"segment":[135.1918647521411,150.49660415804388],"label":"Waterskiing"},{"segment":[154.0039402718966,156.87357828393135],"label":"Waterskiing"},{"segment":[160.6997631201248,178.55529235569423],"label":"Waterskiing"},{"segment":[182.38147719188765,182.70032592823713],"label":"Waterskiing"}]},"jVoj7XaUoU8":{"duration":162.24,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jVoj7XaUoU8","annotations":[{"segment":[15.814937597503901,48.20392979719189],"label":"Ballet"},{"segment":[64.14538689547582,151.19080343213727],"label":"Ballet"}]},"COIwJdDOmcc":{"duration":199.251,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=COIwJdDOmcc","annotations":[{"segment":[135.19677847113886,145.14228861154447],"label":"Changing car wheel"}]},"cBMi-cIbI2c":{"duration":20.76,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cBMi-cIbI2c","annotations":[{"segment":[5.019639453978159,19.916633962558503],"label":"Discus throw"}]},"b4DhjwkO-b4":{"duration":65.71,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=b4DhjwkO-b4","annotations":[{"segment":[8.728164452187249,61.134371056548616],"label":"Dodgeball"}]},"r10jGF9lQYM":{"duration":216.573,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r10jGF9lQYM","annotations":[{"segment":[0.01,202.99079719188768],"label":"Ice fishing"}]},"hDV6wQKvKCg":{"duration":44.42,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hDV6wQKvKCg","annotations":[{"segment":[0.2,44.160999999999994],"label":"Swinging at the playground"}]},"n9Fqjdwh-L8":{"duration":88.515,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n9Fqjdwh-L8","annotations":[{"segment":[14.913513960966602,34.79819924225541],"label":"Capoeira"},{"segment":[36.04099207233595,82.43859001872075],"label":"Capoeira"}]},"gIFI03m-ATM":{"duration":54.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gIFI03m-ATM","annotations":[{"segment":[5.648868954758191,16.94660686427457],"label":"Paintball"}]},"MpM-4y7oPuU":{"duration":80.899,"subset":"testing","resolution":"328x240","url":"https://www.youtube.com/watch?v=MpM-4y7oPuU","annotations":[]},"tydn-vo3DaY":{"duration":221.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tydn-vo3DaY","annotations":[{"segment":[100.85718488331369,104.3111980642491],"label":"Rock climbing"},{"segment":[126.76228374032921,144.37775096309974],"label":"Rock climbing"},{"segment":[154.73979050590594,164.06562609443154],"label":"Rock climbing"}]},"1qO8Z8SnrhU":{"duration":126.223,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1qO8Z8SnrhU","annotations":[]},"2dWPKpM7jME":{"duration":68.36,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2dWPKpM7jME","annotations":[]},"0w-3O0ZOQFQ":{"duration":72.539,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=0w-3O0ZOQFQ","annotations":[{"segment":[35.507799377916015,47.131287713841374],"label":"Mooping floor"}]},"q-mpNUe0JNo":{"duration":137.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q-mpNUe0JNo","annotations":[{"segment":[0.01,120.11632137285491],"label":"Skateboarding"}]},"ijChwOwYDWc":{"duration":210.234,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ijChwOwYDWc","annotations":[{"segment":[0.01,210.181],"label":"Ice fishing"}]},"0XRZ2F5B7fQ":{"duration":61.533,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=0XRZ2F5B7fQ","annotations":[{"segment":[0,32.44635376599064],"label":"Removing ice from car"}]},"kmzWLxPtAsU":{"duration":99.986,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=kmzWLxPtAsU","annotations":[]},"N4qhryre_TE":{"duration":180.28,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=N4qhryre_TE","annotations":[]},"Az7bzVcLMSM":{"duration":184.48,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Az7bzVcLMSM","annotations":[{"segment":[10.073150830795138,177.863063240897],"label":"Belly dance"}]},"qCy-68g0oXw":{"duration":107.602,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qCy-68g0oXw","annotations":[{"segment":[0.01,106.32748377474533],"label":"Beer pong"}]},"NIY1f2KcEe0":{"duration":30.372,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=NIY1f2KcEe0","annotations":[{"segment":[0.2,30.17170068027211],"label":"Cutting the grass"}]},"RFc5HiWSToY":{"duration":16.88,"subset":"testing","resolution":"406x720","url":"https://www.youtube.com/watch?v=RFc5HiWSToY","annotations":[]},"1jWMd8QaN5s":{"duration":38.035,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1jWMd8QaN5s","annotations":[{"segment":[0.2,36.163198127925114],"label":"Table soccer"}]},"99dCcB5Unmo":{"duration":181.07,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=99dCcB5Unmo","annotations":[{"segment":[34.462469594383776,160.16573983619347],"label":"Ballet"}]},"YeNlYRWhFzE":{"duration":211.511,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YeNlYRWhFzE","annotations":[{"segment":[8.907225317584132,33.64951786642894],"label":"Surfing"},{"segment":[46.18561275784365,141.85581061337692],"label":"Surfing"},{"segment":[149.44344699502264,160.65995295049896],"label":"Surfing"}]},"N88-LuWK_K0":{"duration":149.653,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=N88-LuWK_K0","annotations":[{"segment":[18.175822429906543,111.63311370716512],"label":"Calf roping"}]},"n6k21NjvqXE":{"duration":94.343,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n6k21NjvqXE","annotations":[{"segment":[35.0288485397816,75.35617837129486],"label":"Rafting"}]},"OUPEzXXawCs":{"duration":83.24,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OUPEzXXawCs","annotations":[{"segment":[38.69981370046802,83.243559],"label":"Playing polo"}]},"uFmEm88xyhY":{"duration":75.256,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=uFmEm88xyhY","annotations":[]},"NZ5iioSnCO8":{"duration":68.708,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NZ5iioSnCO8","annotations":[{"segment":[0.10708424336973481,68.641],"label":"Shuffleboard"}]},"AWPlbtK7afY":{"duration":188.678,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AWPlbtK7afY","annotations":[{"segment":[6.475790951638066,113.04396879875195],"label":"Rollerblading"}]},"Fpivz0Yie_Q":{"duration":201.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fpivz0Yie_Q","annotations":[{"segment":[68.92977568354435,123.25711597171369],"label":"Doing motocross"},{"segment":[129.85176421478627,173.50205496655238],"label":"Doing motocross"}]},"jqZg_FK2OlM":{"duration":25.426,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=jqZg_FK2OlM","annotations":[{"segment":[9.312379095163807,21.35903120124805],"label":"Painting fence"}]},"KBMvitQaXzE":{"duration":22.454,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=KBMvitQaXzE","annotations":[{"segment":[3.4153437974253666,21.10507414966871],"label":"Camel ride"}]},"Zg7J_rLXbuk":{"duration":99.71000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Zg7J_rLXbuk","annotations":[{"segment":[7.932916573805809,94.41726100468019],"label":"Using the balance beam"}]},"Hi4YiQO47EM":{"duration":79.157,"subset":"training","resolution":"406x720","url":"https://www.youtube.com/watch?v=Hi4YiQO47EM","annotations":[{"segment":[70.2932275212484,73.98639193804466],"label":"Beer pong"}]},"RiLEPcZ2t2A":{"duration":230.459,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RiLEPcZ2t2A","annotations":[]},"twCNBLY4Y_o":{"duration":127.57,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=twCNBLY4Y_o","annotations":[]},"rBy_0F04_50":{"duration":74.14,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=rBy_0F04_50","annotations":[]},"l7rzaG3aDyw":{"duration":229.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l7rzaG3aDyw","annotations":[{"segment":[0,229.341],"label":"Belly dance"}]},"vVvImml1A8g":{"duration":55.148,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vVvImml1A8g","annotations":[{"segment":[0.2,53.260543966971184],"label":"Riding bumper cars"}]},"yRnM5oD4oJk":{"duration":20.32,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yRnM5oD4oJk","annotations":[]},"MoPg7NqyhLI":{"duration":69.94,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MoPg7NqyhLI","annotations":[{"segment":[5.455371840873635,8.837702382215289],"label":"Archery"},{"segment":[20.403090684867394,47.13441270514821],"label":"Archery"}]},"RaQE93FNLQI":{"duration":98.058,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RaQE93FNLQI","annotations":[{"segment":[4.130363541341654,95.76324358814352],"label":"Baking cookies"}]},"Bri_myFFu4A":{"duration":65.9,"subset":"training","resolution":"640x346","url":"https://www.youtube.com/watch?v=Bri_myFFu4A","annotations":[{"segment":[23.850701728549144,59.93516856786272],"label":"Playing violin"}]},"E8cnLtJEQhk":{"duration":63.646,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E8cnLtJEQhk","annotations":[]},"gPofktt3hUw":{"duration":76.255,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=gPofktt3hUw","annotations":[{"segment":[0.19999999999999998,59.004910577223086],"label":"Plastering"},{"segment":[67.21325499219968,76.05433],"label":"Plastering"}]},"OzAqwPQ-APY":{"duration":63.623,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OzAqwPQ-APY","annotations":[{"segment":[7.044728549141966,41.07771294851794],"label":"Carving jack-o-lanterns"}]},"7jGKlEvH_uw":{"duration":149.444,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7jGKlEvH_uw","annotations":[{"segment":[56.62734009360375,121.88347737909517],"label":"Hula hoop"}]},"S1cz3vUvi-Y":{"duration":94.16,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S1cz3vUvi-Y","annotations":[]},"6q3EIv2X8BQ":{"duration":33.34,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=6q3EIv2X8BQ","annotations":[{"segment":[20.203982194027052,23.162388736083127],"label":"Washing hands"}]},"Qx4YLntwZ5s":{"duration":225.164,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Qx4YLntwZ5s","annotations":[{"segment":[70.93666458658346,207.20965678627147],"label":"Blow-drying hair"}]},"On4V94fVLpg":{"duration":165.791,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=On4V94fVLpg","annotations":[{"segment":[43.43389703588144,45.502177847113884],"label":"Carving jack-o-lanterns"},{"segment":[75.4922496099844,94.11302340093604],"label":"Carving jack-o-lanterns"},{"segment":[117.38272698907957,127.21462090483621],"label":"Carving jack-o-lanterns"}]},"q7Ap96aMkZ8":{"duration":17.276,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q7Ap96aMkZ8","annotations":[{"segment":[0.2,16.918738317757008],"label":"Running a marathon"}]},"QT4d1nTQYSE":{"duration":198.49,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QT4d1nTQYSE","annotations":[]},"WL4iqWa_1Z0":{"duration":40.06,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=WL4iqWa_1Z0","annotations":[{"segment":[27.284207488299533,34.33939157566303],"label":"Drinking coffee"}]},"g4OlXwjgwSs":{"duration":155.09,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=g4OlXwjgwSs","annotations":[{"segment":[6.893280016380655,18.986753729329173],"label":"Shot put"},{"segment":[22.856665317472697,36.401355875975035],"label":"Shot put"},{"segment":[46.07613484633385,58.653347507800305],"label":"Shot put"},{"segment":[75.58421070592823,80.17973071684867],"label":"Shot put"},{"segment":[89.85450968720748,94.93376864664586],"label":"Shot put"},{"segment":[113.31584869032761,117.66949922698907],"label":"Shot put"},{"segment":[127.34427819734789,133.14914557956317],"label":"Shot put"}]},"Cl0lsFUR5HE":{"duration":188.291,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Cl0lsFUR5HE","annotations":[{"segment":[0.2,188.09009070294786],"label":"Longboarding"},{"segment":[24.012111100531456,173.94139016726442],"label":"Longboarding"}]},"3haFn-jQefE":{"duration":193.68,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=3haFn-jQefE","annotations":[]},"4-BW75h1D1A":{"duration":216.85,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4-BW75h1D1A","annotations":[{"segment":[0,204.82638578636383],"label":"Cumbia"}]},"Y1UwPTU61uk":{"duration":99.96000000000001,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Y1UwPTU61uk","annotations":[{"segment":[1.8713617116113217,98.09054132137285],"label":"Using the balance beam"}]},"iSHPVCBsnLw":{"duration":62.091,"subset":"training","resolution":"854x468","url":"https://www.youtube.com/watch?v=iSHPVCBsnLw","annotations":[{"segment":[46.39800321216848,59.668413316692664],"label":"Rock-paper-scissors"}]},"sy8WjZwaNw4":{"duration":62.23,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=sy8WjZwaNw4","annotations":[]},"yOKDaH8Go0A":{"duration":128.151,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yOKDaH8Go0A","annotations":[{"segment":[37.38553215132605,128.1504081632653],"label":"Painting furniture"}]},"fJB7KO1hoUo":{"duration":147.77,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=fJB7KO1hoUo","annotations":[]},"tzChPctqAP4":{"duration":125.481,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tzChPctqAP4","annotations":[{"segment":[11.90407267520798,118.84557801969933],"label":"Capoeira"}]},"-HKsU_uFUWo":{"duration":119.281,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-HKsU_uFUWo","annotations":[{"segment":[58.8053791832775,64.37055650377421],"label":"Fun sliding down"},{"segment":[101.10073157064824,113.71513512896861],"label":"Fun sliding down"}]},"-TIFhuhUePA":{"duration":64.993,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-TIFhuhUePA","annotations":[]},"ZojEQYIV_o8":{"duration":198.69,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZojEQYIV_o8","annotations":[{"segment":[90.20209398580025,120.88940431086631],"label":"Polishing forniture"}]},"4NPVpI9zUpI":{"duration":149.05,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=4NPVpI9zUpI","annotations":[]},"F5n6TMMDLrg":{"duration":91.14,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F5n6TMMDLrg","annotations":[]},"bz62JE0qMSk":{"duration":192.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bz62JE0qMSk","annotations":[{"segment":[89.27502662011243,165.49639224882998],"label":"Playing badminton"}]},"4ksF9BUdQJM":{"duration":61.603,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4ksF9BUdQJM","annotations":[{"segment":[4.805183227737273,39.114191473781396],"label":"Cutting the grass"},{"segment":[54.68298513165016,61.602448979591834],"label":"Cutting the grass"}]},"xRuZMDClaQM":{"duration":189.708,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xRuZMDClaQM","annotations":[{"segment":[5.917659906396256,18.344745709828395],"label":"Doing fencing"},{"segment":[35.801842433697345,82.25547269890795],"label":"Doing fencing"},{"segment":[97.93727145085803,117.16966614664588],"label":"Doing fencing"},{"segment":[129.59675195007802,144.98266770670827],"label":"Doing fencing"}]},"PMdba6f_cho":{"duration":120.303,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PMdba6f_cho","annotations":[{"segment":[13.60676663801246,93.37057106774067],"label":"Waterskiing"}]},"Qci4EFEIZuo":{"duration":57.238,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Qci4EFEIZuo","annotations":[{"segment":[2.4068751950078004,57.121],"label":"Drum corps"}]},"ui_CNb4FUtQ":{"duration":238.42000000000002,"subset":"training","resolution":"640x354","url":"https://www.youtube.com/watch?v=ui_CNb4FUtQ","annotations":[{"segment":[11.530546729352167,32.73187458654809],"label":"Playing saxophone"},{"segment":[43.14656195499521,57.65273364676083],"label":"Playing saxophone"}]},"EkriUlmSGkE":{"duration":6.04,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=EkriUlmSGkE","annotations":[]},"nTY4IaGRFIc":{"duration":125.88,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nTY4IaGRFIc","annotations":[]},"BO0vQ6ASVlo":{"duration":94.134,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BO0vQ6ASVlo","annotations":[{"segment":[5.433614286068042,11.014083012300084],"label":"Disc dog"},{"segment":[12.335772973776095,21.000184911076442],"label":"Disc dog"},{"segment":[22.468729310452417,26.874362508580344],"label":"Disc dog"},{"segment":[27.60863470826833,29.95830574726989],"label":"Disc dog"},{"segment":[30.10516018720749,31.133141266770668],"label":"Disc dog"},{"segment":[36.713609984399376,38.32900882371295],"label":"Disc dog"},{"segment":[40.238116542901714,42.88149646177847],"label":"Disc dog"},{"segment":[51.69276285803432,61.38515589391575],"label":"Disc dog"},{"segment":[64.46909913260531,68.72787789079563],"label":"Disc dog"},{"segment":[68.87473233073322,75.18947324804992],"label":"Disc dog"},{"segment":[75.48318212792512,82.091631925117],"label":"Disc dog"},{"segment":[83.85388520436817,86.79097400312013],"label":"Disc dog"},{"segment":[89.43435392199687,93.933696],"label":"Disc dog"}]},"Y7gywSk5i0M":{"duration":9.59,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Y7gywSk5i0M","annotations":[{"segment":[0.01,7.763875647425897],"label":"Using uneven bars"}]},"Xmp_twSbkAg":{"duration":72.47,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Xmp_twSbkAg","annotations":[]},"1517CiM5c0A":{"duration":214.27,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1517CiM5c0A","annotations":[{"segment":[8.101258880597408,203.68445458153198],"label":"Playing flauta"}]},"ccirM2NGwMA":{"duration":16.998,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=ccirM2NGwMA","annotations":[{"segment":[0.4242622152886116,16.042415015600625],"label":"Blow-drying hair"}]},"s3DoV1L9zAM":{"duration":134.816,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=s3DoV1L9zAM","annotations":[{"segment":[0.01,131.0818775351014],"label":"Playing drums"}]},"DXu_aHrZaUs":{"duration":119.908,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=DXu_aHrZaUs","annotations":[{"segment":[9.594747013805069,113.38227972408544],"label":"Laying tile"}]},"tBFX7g605Go":{"duration":146.66,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tBFX7g605Go","annotations":[{"segment":[95.38370826833074,134.72662870514822],"label":"Washing face"}]},"P6cR-26pTSY":{"duration":171.13,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P6cR-26pTSY","annotations":[{"segment":[10.278542951241858,113.33094760525114],"label":"Doing step aerobics"}]},"Ovtfld_ZyCs":{"duration":220.47,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ovtfld_ZyCs","annotations":[{"segment":[34.39522979719189,188.48585928861155],"label":"Breakdancing"}]},"E8HSjhQ7Z2o":{"duration":73.26,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=E8HSjhQ7Z2o","annotations":[]},"p377oL-EM_E":{"duration":39.47,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=p377oL-EM_E","annotations":[{"segment":[8.92935810452418,37.626467599063965],"label":"Shot put"}]},"4GrPMa_BE6M":{"duration":22.291,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=4GrPMa_BE6M","annotations":[{"segment":[1.0780433775351015,20.030741466458657],"label":"Arm wrestling"}]},"bhRHD3bBNZY":{"duration":188.779,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=bhRHD3bBNZY","annotations":[{"segment":[15.076551141510581,23.015798709696732],"label":"Hula hoop"},{"segment":[66.2405910253769,102.11422818532914],"label":"Hula hoop"},{"segment":[124.75578606497115,184.15311835843374],"label":"Hula hoop"}]},"aF0gDTbcOxE":{"duration":214.576,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aF0gDTbcOxE","annotations":[{"segment":[13.386645865834634,194.79385647425897],"label":"Sumo"}]},"RGMaWJNYcKQ":{"duration":133.492,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=RGMaWJNYcKQ","annotations":[]},"IVnPfvBZxKE":{"duration":115.59,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=IVnPfvBZxKE","annotations":[{"segment":[6.170635602219424,100.15416056483625],"label":"Making a sandwich"}]},"YG_jr8Efdxk":{"duration":78.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YG_jr8Efdxk","annotations":[{"segment":[21.036046521060847,50.06825107644306],"label":"Snatch"}]},"bDiwuABU45I":{"duration":35.156,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=bDiwuABU45I","annotations":[{"segment":[0,4.579474911856474],"label":"Applying sunscreen"},{"segment":[7.541051501560063,29.972251968018725],"label":"Applying sunscreen"}]},"OUY4TkdjnyA":{"duration":173.012,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OUY4TkdjnyA","annotations":[{"segment":[26.36883639889989,67.2674461611044],"label":"Table soccer"},{"segment":[144.7595479596395,160.36559632355863],"label":"Table soccer"}]},"I52lhI6txNo":{"duration":60.094,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=I52lhI6txNo","annotations":[{"segment":[17.00635179407176,48.30178705148206],"label":"Snow tubing"}]},"sFQ343Qq4TE":{"duration":105.42,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=sFQ343Qq4TE","annotations":[{"segment":[4.111489629653213,98.34683091107645],"label":"Doing step aerobics"}]},"YP42JEe3V7o":{"duration":135.582,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YP42JEe3V7o","annotations":[]},"iNMxIXAcHYU":{"duration":40.38,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iNMxIXAcHYU","annotations":[{"segment":[2.66801872074883,40.24],"label":"Putting on makeup"}]},"vOOXaGo3fis":{"duration":164.398,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vOOXaGo3fis","annotations":[]},"TDROfnEk0NQ":{"duration":220.47,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=TDROfnEk0NQ","annotations":[{"segment":[25.16307234353994,40.27340348372686],"label":"Shaving"},{"segment":[45.08123611924088,79.07948118466145],"label":"Shaving"},{"segment":[109.98697669868012,133.33930664260538],"label":"Shaving"},{"segment":[137.80372266129694,199.2752970725119],"label":"Shaving"}]},"7vcihMnmssI":{"duration":168.856,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=7vcihMnmssI","annotations":[]},"rNJBvIoPlXM":{"duration":67.385,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rNJBvIoPlXM","annotations":[]},"sbvk9FNaAH0":{"duration":178.26,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sbvk9FNaAH0","annotations":[{"segment":[70.35847113884554,151.56271450858034],"label":"Windsurfing"}]},"dUBkspFNj-E":{"duration":159.568,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dUBkspFNj-E","annotations":[{"segment":[13.191377535101402,159.541],"label":"Playing pool"}]},"WFqr6QPsszQ":{"duration":150.91,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WFqr6QPsszQ","annotations":[{"segment":[10.15144662,121.59619947581903],"label":"Wrapping presents"}]},"GFQuGc1yY8o":{"duration":106.14,"subset":"training","resolution":"638x360","url":"https://www.youtube.com/watch?v=GFQuGc1yY8o","annotations":[{"segment":[9.433010920436818,97.6399375975039],"label":"Tango"}]},"hs-8typVf2A":{"duration":106.49000000000001,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=hs-8typVf2A","annotations":[]},"sZ-S-YZBpTs":{"duration":16.069,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sZ-S-YZBpTs","annotations":[]},"-zfOvigQLdA":{"duration":24.29,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-zfOvigQLdA","annotations":[{"segment":[1.5459312,13.821716068642745],"label":"Springboard diving"},{"segment":[16.62009360374415,19.68318252730109],"label":"Springboard diving"}]},"vu-3Zi94F0M":{"duration":18.25,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=vu-3Zi94F0M","annotations":[{"segment":[0.7402854258970359,17.53907009048362],"label":"Kayaking"}]},"9uxkazuxmDw":{"duration":237.77,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9uxkazuxmDw","annotations":[{"segment":[0.01,237.77233560090704],"label":"Playing lacrosse"}]},"l5ZoWgR0vhM":{"duration":60.35,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l5ZoWgR0vhM","annotations":[]},"xafFWMX7ZeY":{"duration":228.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xafFWMX7ZeY","annotations":[{"segment":[9.273347129803561,217.56699035308353],"label":"Walking the dog"}]},"gOUqdgxwNPw":{"duration":224.12,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=gOUqdgxwNPw","annotations":[{"segment":[13.983213728549142,212.8944290171607],"label":"Wakeboarding"}]},"03xBbhjqQzw":{"duration":23.708,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=03xBbhjqQzw","annotations":[]},"oDZlW0OgEgg":{"duration":134.12,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oDZlW0OgEgg","annotations":[{"segment":[27.816088923556944,112.5360031201248],"label":"Polishing forniture"}]},"-Wmrg_0jxA8":{"duration":93.159,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-Wmrg_0jxA8","annotations":[{"segment":[9.301314768236988,10.173313027759205],"label":"Rock-paper-scissors"}]},"jWH-2aa1gJY":{"duration":41.19,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jWH-2aa1gJY","annotations":[]},"pRzFL4_I-cE":{"duration":134.328,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pRzFL4_I-cE","annotations":[{"segment":[19.698563398318242,119.23917485803433],"label":"Playing blackjack"}]},"2DwBXRhtX4s":{"duration":48.414,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2DwBXRhtX4s","annotations":[{"segment":[14.19930975351014,38.14176290171607],"label":"Doing fencing"}]},"Wgh8e4V8hBc":{"duration":11.936,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Wgh8e4V8hBc","annotations":[{"segment":[0.48410523556942275,1.8619432137285492],"label":"Hitting a pinata"},{"segment":[4.17075279875195,5.287918726989079],"label":"Hitting a pinata"}]},"NFErgnaSRRY":{"duration":78.298,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NFErgnaSRRY","annotations":[{"segment":[8.672598716068642,16.245853932917317],"label":"Carving jack-o-lanterns"}]},"bCsxg077oP8":{"duration":48.925,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=bCsxg077oP8","annotations":[]},"qsEnLQ2UnEA":{"duration":26.89,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qsEnLQ2UnEA","annotations":[]},"90tOAABCbJI":{"duration":201.06,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=90tOAABCbJI","annotations":[]},"No2adeap68I":{"duration":108.368,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=No2adeap68I","annotations":[{"segment":[25.280136233504546,78.87402952168392],"label":"Hanging wallpaper"}]},"MDdCBN1o2sE":{"duration":43.422,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MDdCBN1o2sE","annotations":[{"segment":[0.2,38.30521996879875],"label":"Kite flying"}]},"5O1ttcUIUKk":{"duration":95.41,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5O1ttcUIUKk","annotations":[{"segment":[24.175571938377534,64.764308],"label":"Playing lacrosse"}]},"sCj-ME5RkLY":{"duration":71.89,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sCj-ME5RkLY","annotations":[{"segment":[9.932571762870515,68.51791029641186],"label":"Volleyball"}]},"o6lTgyb_AkM":{"duration":171.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=o6lTgyb_AkM","annotations":[{"segment":[29.514699608392494,163.94074055207105],"label":"Painting"}]},"SZXscHYG26Y":{"duration":80.64,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=SZXscHYG26Y","annotations":[{"segment":[0,80.26545423400935],"label":"Windsurfing"}]},"dFgwKTH-FhY":{"duration":15.674,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=dFgwKTH-FhY","annotations":[{"segment":[0.01,15.146136403607187],"label":"River tubing"}]},"w_Z7rx4tmQk":{"duration":26.262,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=w_Z7rx4tmQk","annotations":[]},"vHejQlbAL78":{"duration":189.87,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=vHejQlbAL78","annotations":[{"segment":[26.9129795282599,183.3631554517134],"label":"Skiing"}]},"h3thb-S-3L8":{"duration":168.23,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=h3thb-S-3L8","annotations":[{"segment":[38.842045404056165,131.4855726177847],"label":"Preparing pasta"}]},"7UwRRkgQSj4":{"duration":45.86,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=7UwRRkgQSj4","annotations":[]},"sQZaEt-ssCs":{"duration":104.63,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sQZaEt-ssCs","annotations":[{"segment":[7.8298408736349465,23.000157566302654],"label":"Arm wrestling"},{"segment":[31.319363494539786,90.22354602184087],"label":"Arm wrestling"}]},"5Ym-k30X5PY":{"duration":177.54,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5Ym-k30X5PY","annotations":[]},"hYgheCsbtco":{"duration":48.07,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hYgheCsbtco","annotations":[{"segment":[0,48.065282],"label":"Playing violin"}]},"QBXswoKU4S4":{"duration":164.79,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=QBXswoKU4S4","annotations":[{"segment":[4.627965678627145,21.340063962558503],"label":"Cricket"},{"segment":[47.822312012480495,106.95742901716068],"label":"Cricket"}]},"b0MmqWHdhls":{"duration":233.733,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=b0MmqWHdhls","annotations":[]},"-h1jccLMbjM":{"duration":39.29,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-h1jccLMbjM","annotations":[]},"JD6SB3k9GkY":{"duration":162.03,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=JD6SB3k9GkY","annotations":[]},"_Wq1OGf0MHM":{"duration":143.175,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_Wq1OGf0MHM","annotations":[{"segment":[15.406160686427457,112.77099063962558],"label":"Rafting"}]},"0ixULg-DK_4":{"duration":7.38,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0ixULg-DK_4","annotations":[]},"b87ec6jTm_M":{"duration":125.992,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b87ec6jTm_M","annotations":[{"segment":[25.472543498939526,27.431970151923164],"label":"Drum corps"},{"segment":[35.66156209445445,36.83721808624463],"label":"Drum corps"},{"segment":[41.53984205340537,46.046523355267745],"label":"Drum corps"},{"segment":[103.26178015789718,107.57251873332487],"label":"Drum corps"}]},"Xj1R81SK_zs":{"duration":95.48,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Xj1R81SK_zs","annotations":[{"segment":[36.94095568174727,40.66484234321373],"label":"Surfing"},{"segment":[43.94186260530421,74.32877776287052],"label":"Surfing"}]},"ggGw9EGSMs8":{"duration":19.32,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ggGw9EGSMs8","annotations":[{"segment":[3.4851294068824736,18.567328020566237],"label":"Painting fence"}]},"HgYuRKZ_V90":{"duration":159.43,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=HgYuRKZ_V90","annotations":[]},"nO7weoDrM5k":{"duration":25.29,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=nO7weoDrM5k","annotations":[]},"aA6Bchzww4Y":{"duration":187.269,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=aA6Bchzww4Y","annotations":[{"segment":[0.01,187.26895691609977],"label":"Removing ice from car"}]},"86Yl3F2HSik":{"duration":91.37,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=86Yl3F2HSik","annotations":[{"segment":[5.1315685553822155,87.94938329641187],"label":"Tennis serve with ball bouncing"}]},"5Y4YkCkgShc":{"duration":193.887,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5Y4YkCkgShc","annotations":[{"segment":[50.95924859731347,146.847064059098],"label":"Making a cake"}]},"b5GJUtjiWkQ":{"duration":42.28,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=b5GJUtjiWkQ","annotations":[{"segment":[0,36.61054989079563],"label":"Hopscotch"}]},"BiIIco4EC2U":{"duration":180.071,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=BiIIco4EC2U","annotations":[{"segment":[0.2,175.58300379341767],"label":"Playing rubik cube"}]},"DfOiHMcrCbs":{"duration":81.22,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=DfOiHMcrCbs","annotations":[{"segment":[39.4079322199688,50.305302544461775],"label":"Smoking a cigarette"}]},"HlhQ3-WOdgI":{"duration":19.09,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HlhQ3-WOdgI","annotations":[{"segment":[1.485257410296412,11.703828393135726],"label":"Drinking coffee"}]},"pgff9mC5y3s":{"duration":26.123,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pgff9mC5y3s","annotations":[{"segment":[5.635286271450858,13.243939937597505],"label":"Hitting a pinata"},{"segment":[17.434802652106086,22.92768096723869],"label":"Hitting a pinata"}]},"bt4qUkWab3k":{"duration":113.85,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=bt4qUkWab3k","annotations":[]},"0eR57IP6paM":{"duration":219.499,"subset":"testing","resolution":"426x238","url":"https://www.youtube.com/watch?v=0eR57IP6paM","annotations":[]},"BJM1rUjXvkQ":{"duration":223.33,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=BJM1rUjXvkQ","annotations":[{"segment":[8.000901943331849,213.241430054888],"label":"Making a cake"}]},"zz3Mw8FMA70":{"duration":121.88,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=zz3Mw8FMA70","annotations":[{"segment":[0,121.68154],"label":"Smoking hookah"}]},"KT8DUDFc3Xg":{"duration":60.07,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=KT8DUDFc3Xg","annotations":[]},"sV6NoTdFaPE":{"duration":86.24,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sV6NoTdFaPE","annotations":[{"segment":[18.028103197597293,45.19819817999999],"label":"Playing field hockey"}]},"D-XCWhvStd4":{"duration":130.427,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D-XCWhvStd4","annotations":[{"segment":[26.08529710999224,58.38718333429131],"label":"Waxing skis"},{"segment":[70.57657436232869,118.52151240594239],"label":"Waxing skis"}]},"nI6l7BP0odw":{"duration":232.97,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nI6l7BP0odw","annotations":[]},"DmFnPx4z_XA":{"duration":162.958,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=DmFnPx4z_XA","annotations":[]},"5yfDyVVkPuU":{"duration":94.67,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5yfDyVVkPuU","annotations":[{"segment":[3.4553455,84.32962173790952],"label":"Cricket"}]},"HhbTyPnFVIg":{"duration":83.76,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=HhbTyPnFVIg","annotations":[{"segment":[6.33515256,59.044894920000004],"label":"Chopping wood"}]},"DYwF_1xX4dU":{"duration":215.02,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DYwF_1xX4dU","annotations":[{"segment":[46.729584360000004,192.3327535101404],"label":"Playing flauta"}]},"fZ_CmvF-_No":{"duration":226.02,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fZ_CmvF-_No","annotations":[{"segment":[3.878092043681747,221.0512464898596],"label":"Cheerleading"}]},"VvsYrfEvvUw":{"duration":12.38,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VvsYrfEvvUw","annotations":[]},"iRyBbZlZv5Q":{"duration":162.215,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=iRyBbZlZv5Q","annotations":[]},"r-XQMXUJBqk":{"duration":87.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r-XQMXUJBqk","annotations":[{"segment":[0.01,0.02],"label":"Hopscotch"}]},"MZiBlBhpq-Y":{"duration":163.72,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MZiBlBhpq-Y","annotations":[{"segment":[1.4849676100000004,31.5352078798752],"label":"Playing guitarra"},{"segment":[34.005267100000005,155.8886187098284],"label":"Playing guitarra"}]},"ePaFTey15ho":{"duration":199.46,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ePaFTey15ho","annotations":[{"segment":[27.374864274570985,169.24340717628706],"label":"Mixing drinks"}]},"2Peh_gdQCjg":{"duration":141.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2Peh_gdQCjg","annotations":[{"segment":[78.7431524,135.54137741653668],"label":"Playing badminton"},{"segment":[11.4866202,66.77102255999999],"label":"Playing badminton"}]},"fyxXJJhCGBQ":{"duration":40.54,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fyxXJJhCGBQ","annotations":[{"segment":[8.348750620904838,27.829168736349455],"label":"Chopping wood"}]},"dQR6VEemP24":{"duration":54.1,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=dQR6VEemP24","annotations":[{"segment":[0.8440326521060844,37.221839957878316],"label":"Bathing dog"}]},"AjDBsuCwTyk":{"duration":14.44,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=AjDBsuCwTyk","annotations":[]},"YU-dUWGOoD4":{"duration":97.2,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YU-dUWGOoD4","annotations":[]},"uwQDvcWzBy0":{"duration":156.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uwQDvcWzBy0","annotations":[{"segment":[30.974732250000002,150.23097113884558],"label":"Wrapping presents"}]},"DQXJ8OU7Ox4":{"duration":170.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DQXJ8OU7Ox4","annotations":[{"segment":[20.9691225,28.17863650546022],"label":"Clean and jerk"},{"segment":[30.039489859594386,35.09037753510141],"label":"Clean and jerk"},{"segment":[45.72382527301092,54.76225585023402],"label":"Clean and jerk"},{"segment":[69.3832464898596,78.95334945397816],"label":"Clean and jerk"},{"segment":[90.65014196567863,101.54942589703589],"label":"Clean and jerk"},{"segment":[125.4746833073323,140.37798439937598],"label":"Clean and jerk"},{"segment":[155.00069110764431,159.2545694227769],"label":"Clean and jerk"}]},"hPQEkPZmNsU":{"duration":154.48,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hPQEkPZmNsU","annotations":[{"segment":[15.173892322932916,141.3821395803432],"label":"Getting a haircut"}]},"vfJnUQ9Tax0":{"duration":32.28,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=vfJnUQ9Tax0","annotations":[{"segment":[7.613873583249957,23.114356449293567],"label":"Washing hands"}]},"Q6Mov3tWyYg":{"duration":146.054,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q6Mov3tWyYg","annotations":[]},"fU-OulK7lZs":{"duration":144.498,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=fU-OulK7lZs","annotations":[{"segment":[0.01,108.40209204368176],"label":"Futsal"}]},"KC_nNsohkmU":{"duration":191.01,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=KC_nNsohkmU","annotations":[]},"TnXeZsrYpUA":{"duration":10.101,"subset":"testing","resolution":"240x240","url":"https://www.youtube.com/watch?v=TnXeZsrYpUA","annotations":[]},"UTiSAR1o2nU":{"duration":138.949,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UTiSAR1o2nU","annotations":[{"segment":[8.453947170131704,128.76011693915757],"label":"Slacklining"}]},"RYJ3yzxZB8k":{"duration":154.32,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RYJ3yzxZB8k","annotations":[{"segment":[7.517136614802983,10.401619205346988],"label":"Breakdancing"},{"segment":[67.3701503685911,139.96296078481546],"label":"Breakdancing"}]},"bnx74ZKCrBQ":{"duration":92.3,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bnx74ZKCrBQ","annotations":[]},"FaDD5MyZj88":{"duration":141.67000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FaDD5MyZj88","annotations":[{"segment":[3.9781106957878314,110.50307488299532],"label":"Triple jump"}]},"XazKuBawFCM":{"duration":134.21,"subset":"training","resolution":"2560x810","url":"https://www.youtube.com/watch?v=XazKuBawFCM","annotations":[{"segment":[6.480499219968799,123.2958018720749],"label":"Discus throw"}]},"fjnOZFkhub4":{"duration":89.815,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=fjnOZFkhub4","annotations":[]},"3JNNjd9fv7U":{"duration":156.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3JNNjd9fv7U","annotations":[{"segment":[17.54776507956318,84.81419788455538],"label":"Vacuuming floor"}]},"fe4r2-Y8k1U":{"duration":212.161,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fe4r2-Y8k1U","annotations":[]},"_fLhtWSCaSo":{"duration":77.021,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_fLhtWSCaSo","annotations":[]},"jpSbO4LrEnc":{"duration":13.398,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=jpSbO4LrEnc","annotations":[{"segment":[0.01,13.009748226236123],"label":"Rollerblading"}]},"UaO7bS5Ky6M":{"duration":182.74,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=UaO7bS5Ky6M","annotations":[]},"sfbgZOHLkII":{"duration":122.23,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sfbgZOHLkII","annotations":[]},"Q6XjxUlbP2M":{"duration":37.454,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Q6XjxUlbP2M","annotations":[{"segment":[0.2,36.40811232449298],"label":"Riding bumper cars"}]},"sz0GhFkkXYI":{"duration":118.144,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sz0GhFkkXYI","annotations":[{"segment":[0.01,105.70216010273064],"label":"Swinging at the playground"}]},"0m63LNVHP1U":{"duration":136.766,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0m63LNVHP1U","annotations":[{"segment":[13.222249609984399,110.68302496099844],"label":"Swinging at the playground"}]},"2w3FSD235TY":{"duration":205.73,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2w3FSD235TY","annotations":[{"segment":[119.84164118564743,145.51053354134163],"label":"Horseback riding"}]},"F-Z17rHI_ms":{"duration":36.8,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=F-Z17rHI_ms","annotations":[]},"JWb0-VdfZI4":{"duration":56.45,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JWb0-VdfZI4","annotations":[{"segment":[10.15425083,36.16051267],"label":"Hopscotch"},{"segment":[40.43411504,53.80436817472699],"label":"Hopscotch"}]},"Ep5LuX_2gfI":{"duration":175.59,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ep5LuX_2gfI","annotations":[]},"jrCsCqylsmA":{"duration":63.298,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jrCsCqylsmA","annotations":[]},"sGUkc9ajgiU":{"duration":31.63,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sGUkc9ajgiU","annotations":[{"segment":[5.48915125,25.77785375],"label":"Polishing shoes"}]},"K6Tg4ZlgF8Y":{"duration":128.662,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=K6Tg4ZlgF8Y","annotations":[]},"snvSHNYvRks":{"duration":35.5,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=snvSHNYvRks","annotations":[{"segment":[3.397371294851794,32.627257],"label":"Shot put"}]},"xxzNNwzKSAo":{"duration":152.068,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xxzNNwzKSAo","annotations":[]},"lL2XqxgNIeQ":{"duration":108.461,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lL2XqxgNIeQ","annotations":[{"segment":[39.91787207488299,46.51446957878316],"label":"River tubing"},{"segment":[62.41396099843994,65.62768798751951],"label":"River tubing"},{"segment":[78.1443088923557,83.40314040561623],"label":"River tubing"},{"segment":[92.87692511700467,96.59876911076444],"label":"River tubing"}]},"sZRUTtoxY_s":{"duration":45.47,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=sZRUTtoxY_s","annotations":[{"segment":[5.603290062402496,43.549621497659906],"label":"Doing kickboxing"}]},"oG4zf8dAQKw":{"duration":37.57,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=oG4zf8dAQKw","annotations":[{"segment":[19.869252753510143,31.064023478939156],"label":"Gargling mouthwash"}]},"nVJ5SmO86O4":{"duration":79.2,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=nVJ5SmO86O4","annotations":[]},"WznRr7xFs08":{"duration":202.896,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WznRr7xFs08","annotations":[]},"Rvc6Q33VEjA":{"duration":196.63,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rvc6Q33VEjA","annotations":[]},"LslQvR_4bkk":{"duration":169.994,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=LslQvR_4bkk","annotations":[{"segment":[6.359101404056163,169.841],"label":"Playing ice hockey"}]},"vREvYuCYk0E":{"duration":7.43,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vREvYuCYk0E","annotations":[]},"SMUWOVJ3bzE":{"duration":69.96000000000001,"subset":"training","resolution":"632x360","url":"https://www.youtube.com/watch?v=SMUWOVJ3bzE","annotations":[{"segment":[1.5280251294851794,66.57823778471138],"label":"Playing piano"}]},"KSP62q7tM68":{"duration":80.551,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KSP62q7tM68","annotations":[{"segment":[18.581734789391575,47.333202808112325],"label":"Surfing"}]},"hvhny4-sI-w":{"duration":63.28,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hvhny4-sI-w","annotations":[]},"HhlWUEbHBUk":{"duration":90.33,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HhlWUEbHBUk","annotations":[{"segment":[20.704067082683306,38.027878315132604],"label":"Mixing drinks"},{"segment":[67.18258502340095,87.1824321372855],"label":"Mixing drinks"}]},"o5j6N8O6HS4":{"duration":30.024,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o5j6N8O6HS4","annotations":[{"segment":[4.121777223088923,30.0234],"label":"Using the rowing machine"}]},"vKShqwBJrME":{"duration":82.896,"subset":"training","resolution":"704x480","url":"https://www.youtube.com/watch?v=vKShqwBJrME","annotations":[{"segment":[4.519210799584632,63.01070974454828],"label":"Playing rubik cube"}]},"LOLS94lyERc":{"duration":169.599,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LOLS94lyERc","annotations":[]},"P4PQ5tC3gX8":{"duration":80.39,"subset":"validation","resolution":"854x478","url":"https://www.youtube.com/watch?v=P4PQ5tC3gX8","annotations":[{"segment":[8.76057526,75.97177295553823],"label":"Playing accordion"}]},"F30odTEdsxo":{"duration":41.518,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=F30odTEdsxo","annotations":[{"segment":[0.2,41.317277],"label":"Playing blackjack"}]},"Vcxv3nyZpSs":{"duration":37.385,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=Vcxv3nyZpSs","annotations":[{"segment":[1.5163323322048528,32.95106707488299],"label":"Cutting the grass"}]},"_Ew3g9PXhvo":{"duration":133.419,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_Ew3g9PXhvo","annotations":[{"segment":[23.519983347581903,128.42327190670827],"label":"Slacklining"}]},"Si6LZFiQT3k":{"duration":190.92,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Si6LZFiQT3k","annotations":[{"segment":[7.404078003120125,179.4065054602184],"label":"Starting a campfire"}]},"3eQqgH3PJBo":{"duration":157.06,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3eQqgH3PJBo","annotations":[{"segment":[0.010000000000000002,156.38508978130065],"label":"Playing pool"}]},"ydJsJgdaEq4":{"duration":109.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ydJsJgdaEq4","annotations":[{"segment":[61.278944461778465,91.57797811232449],"label":"Cleaning windows"}]},"WdKelyOqZvU":{"duration":179.096,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WdKelyOqZvU","annotations":[]},"AhsujXLZYWA":{"duration":61.998,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=AhsujXLZYWA","annotations":[{"segment":[0.2,17.109355694227766],"label":"Clipping cat claws"},{"segment":[25.905691107644305,46.881567862714505],"label":"Clipping cat claws"}]},"0VHUSC4YFT4":{"duration":86.52,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0VHUSC4YFT4","annotations":[{"segment":[64.51698803463339,79.49896642761311],"label":"Bungee jumping"}]},"SqZh6ytnQ3g":{"duration":235.6,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SqZh6ytnQ3g","annotations":[]},"6nMQRUhOcwM":{"duration":49.691,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6nMQRUhOcwM","annotations":[{"segment":[0.01,49.661],"label":"Disc dog"}]},"n04N7AuqZsY":{"duration":231.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n04N7AuqZsY","annotations":[{"segment":[50.01319266770671,58.31863260530422],"label":"Playing field hockey"},{"segment":[66.62407254290171,85.04048283931357],"label":"Playing field hockey"},{"segment":[93.70702886115444,111.04012090483619],"label":"Playing field hockey"},{"segment":[133.06759204368174,145.70630499219968],"label":"Playing field hockey"},{"segment":[163.03939703588142,173.8725795631825],"label":"Playing field hockey"},{"segment":[185.42797425897035,196.62226287051482],"label":"Playing field hockey"}]},"OjiIamJOwlc":{"duration":75.976,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OjiIamJOwlc","annotations":[]},"9OTkoHkDszQ":{"duration":126.9,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9OTkoHkDszQ","annotations":[]},"IxaBEVd2PZM":{"duration":191.36,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=IxaBEVd2PZM","annotations":[]},"j0JsoWxrGh0":{"duration":100.102,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=j0JsoWxrGh0","annotations":[{"segment":[57.468409691489704,75.73961603362093],"label":"Sharpening knives"}]},"VSsZZZHn1L0":{"duration":198.996,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VSsZZZHn1L0","annotations":[]},"GfiqDJA-qqU":{"duration":190.752,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GfiqDJA-qqU","annotations":[{"segment":[34.817434493298094,77.66966156197267],"label":"Baton twirling"},{"segment":[80.34792575376484,162.18377568642745],"label":"Baton twirling"}]},"m1aF1CVo-s8":{"duration":94.506,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m1aF1CVo-s8","annotations":[{"segment":[19.123566978193146,75.17032866043614],"label":"Elliptical trainer"}]},"Dd5yTxwKzXw":{"duration":235.265,"subset":"testing","resolution":"2276x720","url":"https://www.youtube.com/watch?v=Dd5yTxwKzXw","annotations":[]},"zzci2xZ011A":{"duration":48.53,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zzci2xZ011A","annotations":[{"segment":[17.629848673946956,20.959090483619345],"label":"Beach soccer"},{"segment":[24.137003120124803,27.99589703588143],"label":"Beach soccer"},{"segment":[41.99384555382215,45.17175819032761],"label":"Beach soccer"}]},"suWTtKXXkJU":{"duration":89.119,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=suWTtKXXkJU","annotations":[{"segment":[8.337004680187206,58.7851216848674],"label":"Knitting"},{"segment":[63.64913884555383,72.6823135725429],"label":"Knitting"}]},"79jhPh0PzWQ":{"duration":14.675,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=79jhPh0PzWQ","annotations":[{"segment":[2.674439268153776,7.965267928589912],"label":"Croquet"}]},"Tk3zk2pJ9KA":{"duration":131.193,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tk3zk2pJ9KA","annotations":[{"segment":[69.5811856474259,92.29739625585024],"label":"Rafting"},{"segment":[101.0973697347894,119.31126833073324],"label":"Rafting"}]},"H4wC2d_Vbog":{"duration":160.66,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=H4wC2d_Vbog","annotations":[{"segment":[0,158.86694851794073],"label":"Doing step aerobics"}]},"Xrjkjz1l4qw":{"duration":15.093,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Xrjkjz1l4qw","annotations":[{"segment":[0.2,13.841105919003114],"label":"Playing rubik cube"}]},"K0MzjnMzbj4":{"duration":175.78,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K0MzjnMzbj4","annotations":[{"segment":[12.202767229329172,162.7492663057722],"label":"Layup drill in basketball"}]},"xe6-tTvxQxk":{"duration":75.51,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=xe6-tTvxQxk","annotations":[{"segment":[0.01,75.51077097505669],"label":"Tennis serve with ball bouncing"}]},"tzbJ_CETeG8":{"duration":193.423,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=tzbJ_CETeG8","annotations":[{"segment":[47.52574102964119,82.83057722308892],"label":"Camel ride"},{"segment":[102.14262437164153,106.97063615877968],"label":"Camel ride"}]},"nShRXseBM4Y":{"duration":71.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nShRXseBM4Y","annotations":[{"segment":[31.00017217,34.0298208650546],"label":"Making a sandwich"},{"segment":[44.16636325039002,48.62198627691107],"label":"Making a sandwich"}]},"X1MbVBqHFOk":{"duration":62.369,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=X1MbVBqHFOk","annotations":[]},"XOTqx9kP-3w":{"duration":92.16,"subset":"testing","resolution":"592x480","url":"https://www.youtube.com/watch?v=XOTqx9kP-3w","annotations":[]},"8Mdnvm8olC0":{"duration":94.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8Mdnvm8olC0","annotations":[{"segment":[18.51975371531868,20.445808101711823],"label":"Long jump"},{"segment":[38.372929698140304,40.891616203423645],"label":"Long jump"},{"segment":[56.596367354013886,58.37426371068448],"label":"Long jump"},{"segment":[62.522688508580345,65.63400713104524],"label":"Long jump"},{"segment":[67.70821954602184,71.70848634633386],"label":"Long jump"}]},"YYetK3WzRdc":{"duration":152.37,"subset":"testing","resolution":"582x480","url":"https://www.youtube.com/watch?v=YYetK3WzRdc","annotations":[]},"Sw4QZTF7sOI":{"duration":78.09,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=Sw4QZTF7sOI","annotations":[{"segment":[12.883132,78.0887074829932],"label":"Preparing salad"}]},"PIr3WhkKB1o":{"duration":137.439,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=PIr3WhkKB1o","annotations":[{"segment":[39.75690405616225,44.47200858034321],"label":"Applying sunscreen"},{"segment":[51.544665366614666,54.75950936037441],"label":"Applying sunscreen"},{"segment":[71.2623751950078,73.61992745709829],"label":"Applying sunscreen"}]},"otGlbzsPIi0":{"duration":66.827,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=otGlbzsPIi0","annotations":[{"segment":[11.147530421216851,63.674588143525746],"label":"Making an omelette"}]},"Ti1ZaH0VGfg":{"duration":81.55,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ti1ZaH0VGfg","annotations":[{"segment":[2.4173572542901716,73.02963494539782],"label":"Playing bagpipes"}]},"ESecNZbZgug":{"duration":91.557,"subset":"validation","resolution":"638x360","url":"https://www.youtube.com/watch?v=ESecNZbZgug","annotations":[{"segment":[10.426846374535254,91.35628117913832],"label":"Elliptical trainer"}]},"fxbEiZrQQzM":{"duration":65.783,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=fxbEiZrQQzM","annotations":[{"segment":[3.3865995945960288,41.562812878315135],"label":"Cutting the grass"},{"segment":[54.49346577379096,63.52439795475819],"label":"Cutting the grass"}]},"CKWWRS9CpTY":{"duration":85.566,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=CKWWRS9CpTY","annotations":[{"segment":[6.54089082527301,83.02926721060841],"label":"Running a marathon"}]},"vaXGI0UyZ8s":{"duration":143.34,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vaXGI0UyZ8s","annotations":[]},"55ziFpzx5oc":{"duration":68.71000000000001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=55ziFpzx5oc","annotations":[{"segment":[0.01,63.02685221840873],"label":"Playing guitarra"}]},"tww0AR8NanU":{"duration":203.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tww0AR8NanU","annotations":[{"segment":[10.153314584284052,44.73804199843994],"label":"Playing harmonica"}]},"JjUQ6dWZIL0":{"duration":189.406,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=JjUQ6dWZIL0","annotations":[{"segment":[28.95742053354134,166.06194224336974],"label":"Capoeira"}]},"UYe6JGaUZzg":{"duration":117.354,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UYe6JGaUZzg","annotations":[{"segment":[0.2,117.101],"label":"Baton twirling"}]},"z9MMLl1isUk":{"duration":41.053,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=z9MMLl1isUk","annotations":[{"segment":[3.010117492979719,31.830391361934474],"label":"Hitting a pinata"}]},"r-iXUXMP4DY":{"duration":233.13,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r-iXUXMP4DY","annotations":[{"segment":[31.641444596559374,204.03276197035882],"label":"Ironing clothes"}]},"RoPO0M2qmBg":{"duration":87.93,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RoPO0M2qmBg","annotations":[]},"X82bc2v5kcM":{"duration":116.309,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X82bc2v5kcM","annotations":[{"segment":[20.866624594383776,107.23630552418096],"label":"Blow-drying hair"}]},"iMXdwkAGfM4":{"duration":218.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iMXdwkAGfM4","annotations":[{"segment":[0.01,123.63576131045241],"label":"Shaving"},{"segment":[146.45558502340094,198.56652574102964],"label":"Shaving"}]},"U7oDqpIYsxI":{"duration":73.26,"subset":"validation","resolution":"312x240","url":"https://www.youtube.com/watch?v=U7oDqpIYsxI","annotations":[{"segment":[36.22946844602184,38.85810495787831],"label":"Spinning"}]},"ranTpEJvqs8":{"duration":53.546,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ranTpEJvqs8","annotations":[{"segment":[19.78857566302652,53.501],"label":"BMX"}]},"wott7JRSkOk":{"duration":5.411,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wott7JRSkOk","annotations":[{"segment":[0.2,5.210249433106576],"label":"Table soccer"}]},"vaT6Saf9n3E":{"duration":224.61,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vaT6Saf9n3E","annotations":[]},"yl37hI-Bgkk":{"duration":57.47,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yl37hI-Bgkk","annotations":[{"segment":[21.248255913910025,54.06201821134069],"label":"Getting a haircut"}]},"upoS4Jct7kE":{"duration":90.535,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=upoS4Jct7kE","annotations":[{"segment":[44.63370828390555,65.19055914549186],"label":"Sharpening knives"}]},"kgmcYBRYkAk":{"duration":16.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kgmcYBRYkAk","annotations":[{"segment":[0,13.98448557],"label":"Hopscotch"}]},"lrlUN65DM8c":{"duration":109.03999999999999,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=lrlUN65DM8c","annotations":[{"segment":[8.930807433697348,14.374347202808112],"label":"Discus throw"}]},"-aWU5Yj_OPw":{"duration":15.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-aWU5Yj_OPw","annotations":[{"segment":[0.40027253198127927,14.645265581903276],"label":"Playing piano"}]},"s9Wop4PMZaw":{"duration":42.934,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s9Wop4PMZaw","annotations":[{"segment":[0.2,27.576210021598683],"label":"Camel ride"}]},"p81NOkb2rww":{"duration":100.52,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p81NOkb2rww","annotations":[{"segment":[35.72290410174314,94.5762118049574],"label":"Futsal"}]},"p3vqC_FFyyM":{"duration":147.308,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=p3vqC_FFyyM","annotations":[{"segment":[0.01,141.15953212570474],"label":"Roof shingle removal"}]},"KzxVQ19pRUU":{"duration":235.4,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KzxVQ19pRUU","annotations":[{"segment":[55.63758328292315,200.69926907008255],"label":"Mixing drinks"}]},"eOmL8f-yB40":{"duration":172.13,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=eOmL8f-yB40","annotations":[{"segment":[12.406390389732607,166.84035392286094],"label":"Playing piano"}]},"om1vXrpctVE":{"duration":153.368,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=om1vXrpctVE","annotations":[{"segment":[41.740844073340845,126.89217220496595],"label":"Sumo"}]},"bxxSc2m08Sk":{"duration":44.931,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bxxSc2m08Sk","annotations":[{"segment":[0.2,41.38559501557632],"label":"Capoeira"}]},"R4aCl2dLS1w":{"duration":163.72,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=R4aCl2dLS1w","annotations":[{"segment":[10.852197043681748,136.73768275039004],"label":"Doing kickboxing"}]},"Xbk1XePzTIA":{"duration":59.93,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Xbk1XePzTIA","annotations":[{"segment":[0.9584839219779613,51.43580635188451],"label":"Starting a campfire"}]},"CX5QUmM97DI":{"duration":219.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CX5QUmM97DI","annotations":[{"segment":[54.04964353903655,184.5014535078839],"label":"Chopping wood"}]},"ufK2mbJI0to":{"duration":170.296,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ufK2mbJI0to","annotations":[{"segment":[1.8596973822152885,168.96679072698905],"label":"Futsal"}]},"Ey7w7pu5HZc":{"duration":86.43,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ey7w7pu5HZc","annotations":[{"segment":[0.2,83.37302808112325],"label":"Cumbia"}]},"DMw9Cb_Xy2A":{"duration":97.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DMw9Cb_Xy2A","annotations":[{"segment":[0.2,3.4826309001560065],"label":"Archery"},{"segment":[8.933705352574103,10.447892700468019],"label":"Archery"},{"segment":[15.141873478939159,17.110317031201248],"label":"Archery"},{"segment":[22.409972748829954,24.529835035881437],"label":"Archery"},{"segment":[29.829490753510143,31.949353040561626],"label":"Archery"},{"segment":[37.24900875819033,38.61177737129485],"label":"Archery"},{"segment":[42.700083210608426,45.879876641185646],"label":"Archery"},{"segment":[49.816763745709835,64.6557997550702],"label":"Archery"}]},"dRiIYsAmNAc":{"duration":64.761,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=dRiIYsAmNAc","annotations":[{"segment":[0.2,63.750171177847115],"label":"Beach soccer"}]},"gnmtsqvTO_c":{"duration":178.7,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=gnmtsqvTO_c","annotations":[{"segment":[92.55640555962376,120.99240967733947],"label":"Washing hands"}]},"5k2Ot6-wOgc":{"duration":190.543,"subset":"testing","resolution":"543x360","url":"https://www.youtube.com/watch?v=5k2Ot6-wOgc","annotations":[]},"2GACaR0GdD8":{"duration":146.05,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2GACaR0GdD8","annotations":[{"segment":[4.441528081123245,17.42445631825273],"label":"Horseback riding"},{"segment":[20.841016380655226,32.68509126365055],"label":"Horseback riding"},{"segment":[100.57452652106083,137.70623166926677],"label":"Horseback riding"}]},"mxIsgAmJHsM":{"duration":88.074,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mxIsgAmJHsM","annotations":[{"segment":[13.454131045241809,80.45021216848674],"label":"Baton twirling"}]},"PAws7ziV3ns":{"duration":68.66,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=PAws7ziV3ns","annotations":[]},"rUANGr4sQlY":{"duration":102.28,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rUANGr4sQlY","annotations":[]},"2-hF-v79XyQ":{"duration":118.98,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=2-hF-v79XyQ","annotations":[{"segment":[19.675019828004004,113.03855546957878],"label":"Futsal"}]},"zxOcSuLSrU4":{"duration":39.358,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=zxOcSuLSrU4","annotations":[{"segment":[0.01,38.375412441497666],"label":"Cutting the grass"}]},"JxbmHo84AC4":{"duration":156.5,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JxbmHo84AC4","annotations":[{"segment":[46.38907355694228,70.31606939157567],"label":"Bungee jumping"},{"segment":[78.61727202808112,137.70230255850234],"label":"Bungee jumping"}]},"t3dHI5TeY7I":{"duration":168.81,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=t3dHI5TeY7I","annotations":[{"segment":[99.78224492979719,108.7336864274571],"label":"Putting on shoes"},{"segment":[158.49317004680188,163.75872386895475],"label":"Putting on shoes"}]},"4vdclFJFrdk":{"duration":97.71000000000001,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=4vdclFJFrdk","annotations":[{"segment":[2.12126239,90.54521697602597],"label":"Tumbling"}]},"PMy2EyktRmo":{"duration":84.99,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=PMy2EyktRmo","annotations":[{"segment":[3.0917543000000003,81.95687825351014],"label":"Spinning"}]},"jaRSbVCkxlU":{"duration":237.239,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jaRSbVCkxlU","annotations":[{"segment":[10.36298244310725,81.79353999738221],"label":"Making a cake"},{"segment":[102.51950488359671,210.5906060452418],"label":"Making a cake"}]},"FFZOmzFc_us":{"duration":12.68,"subset":"training","resolution":"270x360","url":"https://www.youtube.com/watch?v=FFZOmzFc_us","annotations":[{"segment":[0.13845033650546024,11.82761446146646],"label":"Washing dishes"}]},"RJus6sogyiM":{"duration":85.589,"subset":"training","resolution":"144x176","url":"https://www.youtube.com/watch?v=RJus6sogyiM","annotations":[{"segment":[17.99880655226209,45.607647425897035],"label":"Painting fence"},{"segment":[51.07523088923557,85.28099999999999],"label":"Painting fence"}]},"lZKrd84QElk":{"duration":65.83,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lZKrd84QElk","annotations":[{"segment":[50.19077691107645,61.378496099844],"label":"Drinking beer"}]},"WPaHiE-UJvY":{"duration":177.66,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=WPaHiE-UJvY","annotations":[]},"P8CZCX42X-o":{"duration":227.208,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P8CZCX42X-o","annotations":[]},"sH_716hH_rY":{"duration":63.39,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=sH_716hH_rY","annotations":[]},"JSDZWq777Mc":{"duration":171.224,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JSDZWq777Mc","annotations":[{"segment":[12.421081855519118,43.67412652424464],"label":"Cutting the grass"},{"segment":[66.64644995595742,117.93349761745571],"label":"Cutting the grass"},{"segment":[150.52214248569942,158.26862218408735],"label":"Cutting the grass"}]},"Eu3QFCldg0s":{"duration":154.53,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Eu3QFCldg0s","annotations":[{"segment":[0.01,125.11746105772231],"label":"Walking the dog"}]},"YD7pb5-CZdI":{"duration":140.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YD7pb5-CZdI","annotations":[{"segment":[12.719608474258969,40.35186136661466],"label":"Shot put"},{"segment":[63.598042371294845,90.13377729173166],"label":"Shot put"}]},"blMN_9ZtxsY":{"duration":184.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=blMN_9ZtxsY","annotations":[{"segment":[4.315977373788829,14.098859421043509],"label":"Ping-pong"},{"segment":[27.334523367329254,34.527818990310635],"label":"Ping-pong"},{"segment":[37.980600889341694,94.66377039843499],"label":"Ping-pong"},{"segment":[113.9418026680251,170.91270328549143],"label":"Ping-pong"}]},"BuaWU70q7cE":{"duration":147.91,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BuaWU70q7cE","annotations":[{"segment":[30.333897487519504,141.28906624414978],"label":"Wrapping presents"}]},"GHU3G24jFjI":{"duration":206.38,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GHU3G24jFjI","annotations":[{"segment":[7.481310401380128,205.50223918642686],"label":"Breakdancing"}]},"GbNPZBkaTnU":{"duration":127.69,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=GbNPZBkaTnU","annotations":[]},"eRh6V0bcBpU":{"duration":66.27,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eRh6V0bcBpU","annotations":[{"segment":[8.729689199687986,45.0947850374415],"label":"Playing racquetball"}]},"Pnszux17JFE":{"duration":54.5,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Pnszux17JFE","annotations":[]},"vPhAXENSjiw":{"duration":207.29,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vPhAXENSjiw","annotations":[{"segment":[18.109059752866308,119.6491447957238],"label":"Cheerleading"}]},"7fPzX7HZLEA":{"duration":35.318,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7fPzX7HZLEA","annotations":[]},"mmRpNwb0NZ0":{"duration":128.89,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mmRpNwb0NZ0","annotations":[{"segment":[25.13522289391576,100.74197335881436],"label":"Ballet"}]},"UUjTMDSUvs0":{"duration":114.684,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UUjTMDSUvs0","annotations":[{"segment":[6.7950046801872075,114.621],"label":"Making an omelette"}]},"iChE4EoYG6k":{"duration":114.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iChE4EoYG6k","annotations":[{"segment":[0.01,0.02],"label":"Doing step aerobics"}]},"UtErFzrzcqk":{"duration":38.453,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=UtErFzrzcqk","annotations":[{"segment":[0.01,38.452243],"label":"Hitting a pinata"}]},"Cz5fahiO1AA":{"duration":61.93,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Cz5fahiO1AA","annotations":[{"segment":[48.595218957057604,56.71052391211295],"label":"Preparing pasta"}]},"sORYi-Xpj0U":{"duration":71.193,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=sORYi-Xpj0U","annotations":[]},"FaP7unySMOs":{"duration":170.76,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FaP7unySMOs","annotations":[]},"sV4vBuWGJng":{"duration":73.21000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sV4vBuWGJng","annotations":[{"segment":[18.503007418255915,63.16149976287051],"label":"Getting a haircut"}]},"Zxi0V2pBPlA":{"duration":122.37,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Zxi0V2pBPlA","annotations":[{"segment":[5.03058959,106.10116426833072],"label":"Shaving legs"}]},"skk-nX55gvA":{"duration":84.1,"subset":"testing","resolution":"630x360","url":"https://www.youtube.com/watch?v=skk-nX55gvA","annotations":[]},"sBx1HvNjs6s":{"duration":154.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sBx1HvNjs6s","annotations":[{"segment":[23.78781432,114.67444539781592],"label":"Getting a haircut"}]},"7DJDUzdw_I4":{"duration":49.13,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7DJDUzdw_I4","annotations":[{"segment":[0,49.063333],"label":"Using parallel bars"}]},"6pnabYJdqxc":{"duration":79.366,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=6pnabYJdqxc","annotations":[{"segment":[25.196476310753113,30.272915878315132],"label":"Trimming branches or hedges"}]},"a_JlhLnJM98":{"duration":184.83,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=a_JlhLnJM98","annotations":[]},"8jx74MCCMIs":{"duration":40.29,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=8jx74MCCMIs","annotations":[]},"sIzcPVbn0lg":{"duration":52.2,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=sIzcPVbn0lg","annotations":[{"segment":[1.5769015800000001,52.061],"label":"Cumbia"}]},"FMYfVG0ZRFo":{"duration":165.698,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=FMYfVG0ZRFo","annotations":[]},"mXM5FOi_OoM":{"duration":40.96,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=mXM5FOi_OoM","annotations":[{"segment":[15.680211388455538,17.468585803432138],"label":"Cutting the grass"},{"segment":[21.3646872074883,39.05681981279251],"label":"Cutting the grass"}]},"oq8DE3Cbar0":{"duration":59.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oq8DE3Cbar0","annotations":[{"segment":[41.02441248,57.930050054602184],"label":"Playing accordion"},{"segment":[4.4203652652106085,37.992404480000005],"label":"Playing accordion"}]},"at2XbuHG5WQ":{"duration":91.696,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=at2XbuHG5WQ","annotations":[{"segment":[27.72928861154446,91.601],"label":"Painting fence"}]},"NGk3v4sKqdg":{"duration":177.77,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=NGk3v4sKqdg","annotations":[{"segment":[10.396392355694228,43.1103736349454],"label":"Fixing bicycle"},{"segment":[53.09091029641186,122.40019266770672],"label":"Fixing bicycle"},{"segment":[133.76691497659908,156.2231224648986],"label":"Fixing bicycle"}]},"BSDXE1iuZBQ":{"duration":81.108,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BSDXE1iuZBQ","annotations":[]},"29Vrl0rJbSo":{"duration":214.297,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=29Vrl0rJbSo","annotations":[{"segment":[8.143253468748194,212.05831733167653],"label":"Blow-drying hair"}]},"elD1P_1oiUk":{"duration":146.936,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=elD1P_1oiUk","annotations":[]},"ECnyZgaH11s":{"duration":35.225,"subset":"testing","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=ECnyZgaH11s","annotations":[]},"dffUIntKlGI":{"duration":84.75,"subset":"training","resolution":"432x720","url":"https://www.youtube.com/watch?v=dffUIntKlGI","annotations":[{"segment":[12.8253151825273,79.19962674570982],"label":"Hopscotch"}]},"L5IbDi09Yb4":{"duration":212.231,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=L5IbDi09Yb4","annotations":[]},"fnKOW7tJA1A":{"duration":43.1,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fnKOW7tJA1A","annotations":[{"segment":[1.4791219157566304,38.99503232449298],"label":"Using parallel bars"}]},"eSpPY2yMg70":{"duration":51.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eSpPY2yMg70","annotations":[{"segment":[5.073206897035881,46.94729557098284],"label":"Using the pommel horse"}]},"ofCRJRFgjNs":{"duration":121.65,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ofCRJRFgjNs","annotations":[]},"cECycCDw1Nc":{"duration":61.84,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cECycCDw1Nc","annotations":[]},"1maBC85X9WI":{"duration":91.255,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=1maBC85X9WI","annotations":[]},"nDJgThY8zi8":{"duration":52.71,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nDJgThY8zi8","annotations":[{"segment":[12.087737875975039,32.97403325351014],"label":"Spinning"}]},"jEN2smSwZ-o":{"duration":178.35,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jEN2smSwZ-o","annotations":[{"segment":[51.47443832447175,146.3543474976599],"label":"Getting a piercing"}]},"ofrX4WyAM-0":{"duration":77.81,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ofrX4WyAM-0","annotations":[{"segment":[32.51161934477379,63.81011856474259],"label":"Preparing pasta"}]},"3UOWHwwxczI":{"duration":64.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3UOWHwwxczI","annotations":[{"segment":[5.325811816146115,6.933604062529848],"label":"Rock climbing"},{"segment":[20.59983755850234,60.19172047581904],"label":"Rock climbing"}]},"F5IWShcFIN4":{"duration":159.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F5IWShcFIN4","annotations":[{"segment":[43.51821372854914,60.67682371294852],"label":"Walking the dog"}]},"oUWArkSoOAU":{"duration":89.049,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=oUWArkSoOAU","annotations":[]},"yvOOFjG-FEo":{"duration":95.37,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yvOOFjG-FEo","annotations":[{"segment":[33.68053798595943,95.316666],"label":"Polishing forniture"}]},"OkCu0QymO10":{"duration":35.899,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=OkCu0QymO10","annotations":[{"segment":[0.01,35.861],"label":"Roof shingle removal"}]},"S04LRGuoAg8":{"duration":161.75,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=S04LRGuoAg8","annotations":[]},"--tFD65KaK4":{"duration":86.425,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=--tFD65KaK4","annotations":[]},"OTlQ20IWBmY":{"duration":197.53,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OTlQ20IWBmY","annotations":[]},"McwVhHxdr5g":{"duration":24.313,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=McwVhHxdr5g","annotations":[{"segment":[0.01,22.499770670826834],"label":"Rollerblading"}]},"od9EdcDcByA":{"duration":224.82,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=od9EdcDcByA","annotations":[{"segment":[21.043581988177486,25.953751118752233],"label":"Paintball"},{"segment":[39.6320794110676,43.49006944223347],"label":"Paintball"},{"segment":[46.64660674046009,50.50459677162597],"label":"Paintball"},{"segment":[58.92202956689696,64.53365143041096],"label":"Paintball"},{"segment":[66.6380096292287,72.24963149274271],"label":"Paintball"},{"segment":[75.75689515743895,76.45834789037819],"label":"Paintball"},{"segment":[78.91343245566557,89.0844970832847],"label":"Paintball"},{"segment":[97.85265624502532,141.6934520537284],"label":"Paintball"},{"segment":[144.84998935195503,147.3050739172424],"label":"Paintball"},{"segment":[153.26742214722603,155.72250608424338],"label":"Paintball"},{"segment":[158.87904336973477,163.4384861154446],"label":"Paintball"},{"segment":[164.84139157566304,172.90809797191886],"label":"Paintball"},{"segment":[174.31100343213728,179.22117254290168],"label":"Paintball"},{"segment":[180.2733516380655,194.3024062402496],"label":"Paintball"},{"segment":[196.40676443057723,197.45894352574103],"label":"Paintball"},{"segment":[198.51112262090484,200.61548081123243],"label":"Paintball"},{"segment":[213.24162995319813,217.45034633385333],"label":"Paintball"}]},"W07hi9Y37Jg":{"duration":200.97,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=W07hi9Y37Jg","annotations":[{"segment":[11.91390144773791,164.59995421216848],"label":"Playing accordion"}]},"Lmne5ZF0McI":{"duration":45.883,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Lmne5ZF0McI","annotations":[{"segment":[0.01,45.81105023400936],"label":"Powerbocking"}]},"L5kxbN9wFAg":{"duration":48.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L5kxbN9wFAg","annotations":[{"segment":[21.046479126365057,47.65524202184088],"label":"Discus throw"},{"segment":[0,20.763601949999998],"label":"Discus throw"}]},"RgWvaKWa7YU":{"duration":164.235,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=RgWvaKWa7YU","annotations":[{"segment":[7.662581136621315,161.425120819161],"label":"Drum corps"}]},"TsHek6oZYXU":{"duration":115.64,"subset":"testing","resolution":"854x428","url":"https://www.youtube.com/watch?v=TsHek6oZYXU","annotations":[]},"VggDjeV0gHA":{"duration":176.495,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=VggDjeV0gHA","annotations":[]},"MKmrqcoCZ-M":{"duration":70.287,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MKmrqcoCZ-M","annotations":[{"segment":[23.22402513960366,55.93817396430138],"label":"Doing crunches"}]},"B2OfU3a0NRk":{"duration":192.262,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=B2OfU3a0NRk","annotations":[]},"Frldvgi9luM":{"duration":20.318,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Frldvgi9luM","annotations":[]},"u9JhQ0xSSJI":{"duration":30.349,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u9JhQ0xSSJI","annotations":[{"segment":[0.01,29.091049879919165],"label":"Drum corps"}]},"kbWQIXFOK54":{"duration":139.436,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kbWQIXFOK54","annotations":[{"segment":[20.2301589875195,105.9364239453978],"label":"Painting furniture"}]},"01_BrVxYsE0":{"duration":208.98,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=01_BrVxYsE0","annotations":[{"segment":[56.401668311630424,168.8789837307778],"label":"Mixing drinks"}]},"ibHRKaD5MbM":{"duration":13.816,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ibHRKaD5MbM","annotations":[{"segment":[9.181843170046802,11.83293873322933],"label":"Hitting a pinata"}]},"rulzKikXMHo":{"duration":49.37,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rulzKikXMHo","annotations":[{"segment":[0.9097969499999999,12.245138686427458],"label":"Shot put"}]},"tYyTrDyzB6o":{"duration":26.077,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tYyTrDyzB6o","annotations":[{"segment":[0.01,25.967527260588508],"label":"River tubing"}]},"Gpjx38IDVT0":{"duration":22.41,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Gpjx38IDVT0","annotations":[{"segment":[6.501949188767551,15.17121477379095],"label":"Hammer throw"}]},"L963epA4MFU":{"duration":27.052,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=L963epA4MFU","annotations":[{"segment":[0.2,21.119377535101403],"label":"Blowing leaves"}]},"pg-DMwEM3K4":{"duration":48.322,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=pg-DMwEM3K4","annotations":[{"segment":[7.682780031201247,42.724909516380656],"label":"Canoeing"}]},"g2uL6H3fP1c":{"duration":195.629,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g2uL6H3fP1c","annotations":[{"segment":[24.720509089751346,174.874711174727],"label":"Capoeira"}]},"GUqqkqE3jWc":{"duration":208.608,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GUqqkqE3jWc","annotations":[]},"tVIY6uyH3aA":{"duration":3.553,"subset":"training","resolution":"276x480","url":"https://www.youtube.com/watch?v=tVIY6uyH3aA","annotations":[{"segment":[0.2,3.328],"label":"Painting fence"}]},"XRb38sJzuY4":{"duration":31.696,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=XRb38sJzuY4","annotations":[{"segment":[3.708491196790729,31.25021905772231],"label":"Hurling"}]},"Q_HKC8HTTik":{"duration":113.012,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Q_HKC8HTTik","annotations":[]},"DYr73-hn2hM":{"duration":59.862,"subset":"testing","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=DYr73-hn2hM","annotations":[]},"QX4915WkFvU":{"duration":207.24,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=QX4915WkFvU","annotations":[]},"AFtFitXAFks":{"duration":154.11,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AFtFitXAFks","annotations":[{"segment":[16.348731750390016,41.352674427457096],"label":"Tumbling"},{"segment":[43.276054633385336,101.69872838845554],"label":"Tumbling"},{"segment":[104.34337617160686,126.70267106552261],"label":"Tumbling"}]},"kMtjk1KFvXQ":{"duration":90.74,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kMtjk1KFvXQ","annotations":[]},"V5PYoIqay84":{"duration":81.13,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=V5PYoIqay84","annotations":[]},"xzmcOKHP-sM":{"duration":113.569,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=xzmcOKHP-sM","annotations":[{"segment":[18.015576488403813,25.61037947751029],"label":"Knitting"},{"segment":[31.79219586399231,32.67531249063259],"label":"Knitting"},{"segment":[49.10128174614195,50.337645023438355],"label":"Knitting"}]},"13Y47Uk_w1o":{"duration":150.094,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=13Y47Uk_w1o","annotations":[{"segment":[0.010000000000000002,115.08890497311309],"label":"Powerbocking"},{"segment":[124.90812455255234,145.71551842326886],"label":"Powerbocking"}]},"JQf_oSGY8q4":{"duration":198.298,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JQf_oSGY8q4","annotations":[{"segment":[0.01,196.22280327495682],"label":"Playing pool"}]},"_ImvYtXpIzE":{"duration":135.84,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_ImvYtXpIzE","annotations":[]},"NsjDjDFRtWA":{"duration":66.9,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NsjDjDFRtWA","annotations":[]},"jbPxbFoP2OU":{"duration":124.95,"subset":"testing","resolution":"654x480","url":"https://www.youtube.com/watch?v=jbPxbFoP2OU","annotations":[]},"uH6n_FGsyk0":{"duration":124.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uH6n_FGsyk0","annotations":[]},"5_UEjPqtnE0":{"duration":71.007,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=5_UEjPqtnE0","annotations":[{"segment":[0.19999999999999998,70.67099999999999],"label":"Table soccer"}]},"zE1l4avJZaU":{"duration":148.28,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zE1l4avJZaU","annotations":[{"segment":[3.7012824711388452,134.6341498876755],"label":"Wrapping presents"}]},"cufmgoo7E_k":{"duration":62.23,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=cufmgoo7E_k","annotations":[{"segment":[15.331915756630265,21.63934945397816],"label":"Calf roping"}]},"S3EA0yDdaWY":{"duration":113.407,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=S3EA0yDdaWY","annotations":[{"segment":[10.438329565835694,39.453347342056944],"label":"Baton twirling"},{"segment":[46.70710157878315,59.976164527301094],"label":"Baton twirling"},{"segment":[62.98381879563182,79.61437769110763],"label":"Baton twirling"}]},"Yezk4k2E5s0":{"duration":11.56,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Yezk4k2E5s0","annotations":[{"segment":[0.01,9.146377535101404],"label":"Javelin throw"}]},"kq71WnShe8E":{"duration":229.27,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kq71WnShe8E","annotations":[]},"CQ0r8ldAKl8":{"duration":229.855,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CQ0r8ldAKl8","annotations":[{"segment":[54.921580331354946,216.75048556499982],"label":"Rope skipping"}]},"96HJ_LqNdkg":{"duration":167.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=96HJ_LqNdkg","annotations":[{"segment":[17.91209822,143.11860374414974],"label":"Cleaning shoes"}]},"xW1M8k9A-gk":{"duration":210.93,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xW1M8k9A-gk","annotations":[{"segment":[0,147.73121676],"label":"Preparing pasta"},{"segment":[158.24698992,201.48478939157567],"label":"Preparing pasta"}]},"nEOpfvJ7g_g":{"duration":25.96,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nEOpfvJ7g_g","annotations":[{"segment":[0.01,25.258733423556944],"label":"Playing racquetball"}]},"U-kPlTqXc_M":{"duration":12.12,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=U-kPlTqXc_M","annotations":[{"segment":[0.01,0.02],"label":"Drinking beer"}]},"RKsto2FmBzM":{"duration":118.49000000000001,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=RKsto2FmBzM","annotations":[{"segment":[52.86825050144863,68.95055047916202],"label":"Mowing the lawn"},{"segment":[73.5719006926677,93.53613505148206],"label":"Mowing the lawn"}]},"xkKjgwIPEuM":{"duration":171.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xkKjgwIPEuM","annotations":[{"segment":[7.240604868954758,53.09776903900156],"label":"Playing field hockey"},{"segment":[67.31080822620905,171.697323],"label":"Playing field hockey"}]},"WD40PRo1quM":{"duration":161.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WD40PRo1quM","annotations":[{"segment":[9.695753510140404,153.99878627145085],"label":"Cumbia"}]},"2VfX8cPmZ6Y":{"duration":69.8,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2VfX8cPmZ6Y","annotations":[]},"sNJ6_N0j9Ts":{"duration":86.15,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sNJ6_N0j9Ts","annotations":[]},"-TubttTNt90":{"duration":220.52,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-TubttTNt90","annotations":[{"segment":[19.77614274570983,43.16358112324493],"label":"Sharpening knives"}]},"-ZDCHvzbnoU":{"duration":145.06,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=-ZDCHvzbnoU","annotations":[{"segment":[59.06929710000001,116.5632969],"label":"Paintball"}]},"RLBfyIVpocE":{"duration":13.56,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RLBfyIVpocE","annotations":[]},"aObyxa8gdAo":{"duration":128.059,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aObyxa8gdAo","annotations":[{"segment":[13.98450073322933,128.0580725623583],"label":"Playing blackjack"}]},"ap2XL5wma4c":{"duration":100.636,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ap2XL5wma4c","annotations":[{"segment":[17.112700377535102,22.293609666146647],"label":"Applying sunscreen"},{"segment":[60.91493345397816,67.19482350078003],"label":"Applying sunscreen"}]},"xT6ejzSmftg":{"duration":172.222,"subset":"training","resolution":"424x240","url":"https://www.youtube.com/watch?v=xT6ejzSmftg","annotations":[{"segment":[89.98539001560064,116.05434009360374],"label":"Drum corps"}]},"Rg9qviHZ3qc":{"duration":213.136,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Rg9qviHZ3qc","annotations":[{"segment":[24.272894464077883,189.860584825273],"label":"Braiding hair"}]},"qwdavExYM_Y":{"duration":179.514,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qwdavExYM_Y","annotations":[{"segment":[0.20000000000000004,179.08100000000002],"label":"Snow tubing"}]},"MkouTZhtvEI":{"duration":57.54,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MkouTZhtvEI","annotations":[{"segment":[12.973893720000001,48.955331980000004],"label":"Archery"}]},"PD-FXR7O9J0":{"duration":41.5,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=PD-FXR7O9J0","annotations":[]},"PGJAnrLYOJA":{"duration":75.628,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=PGJAnrLYOJA","annotations":[]},"MSiXlMKPGKs":{"duration":234.011,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=MSiXlMKPGKs","annotations":[{"segment":[43.44348463462348,58.77647921154941],"label":"Curling"},{"segment":[80.31568587913584,92.36303876100622],"label":"Curling"},{"segment":[101.12474994782104,112.44196023079017],"label":"Curling"},{"segment":[124.12424181320993,138.72709379123464],"label":"Curling"},{"segment":[179.25000803025318,190.20214543057725],"label":"Curling"},{"segment":[198.59878524804995,225.24898988611548],"label":"Curling"}]},"FayB7ZswnAw":{"duration":9.96,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FayB7ZswnAw","annotations":[{"segment":[6.33916434,9.685920107644305],"label":"Playing racquetball"},{"segment":[0,5.095579320000001],"label":"Playing racquetball"}]},"MjljlkQaHh4":{"duration":24.567,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=MjljlkQaHh4","annotations":[{"segment":[0.01,17.169869778471142],"label":"Sumo"}]},"y8OQkNk8WpY":{"duration":107.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=y8OQkNk8WpY","annotations":[{"segment":[0.01,107.09043],"label":"Curling"}]},"548xC74c6MQ":{"duration":14.374,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=548xC74c6MQ","annotations":[{"segment":[0.2,12.170833073322934],"label":"Kite flying"}]},"P3_YQbHXEIs":{"duration":69.96000000000001,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=P3_YQbHXEIs","annotations":[{"segment":[1.9083463338533544,62.648283931357255],"label":"Getting a tattoo"}]},"1sCgzkVUuy0":{"duration":60.54,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1sCgzkVUuy0","annotations":[]},"XLspZ5bof7o":{"duration":169.437,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=XLspZ5bof7o","annotations":[{"segment":[20.65766144183205,162.38217267582755],"label":"Braiding hair"}]},"oni9K1_MCuw":{"duration":199.97,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=oni9K1_MCuw","annotations":[]},"daSrmaPxaZA":{"duration":76.39,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=daSrmaPxaZA","annotations":[{"segment":[40.10550985307782,49.73731826429278],"label":"Cleaning shoes"}]},"319ko2wJlPM":{"duration":8.57,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=319ko2wJlPM","annotations":[]},"EeUkryzfwYk":{"duration":139.669,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=EeUkryzfwYk","annotations":[{"segment":[22.769592579621552,124.74250480223292],"label":"Decorating the Christmas tree"}]},"gXffXyAkcHM":{"duration":13.47,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=gXffXyAkcHM","annotations":[{"segment":[5.2105426770670835,13.383530989079564],"label":"Using uneven bars"}]},"gvhjtFg8A-c":{"duration":104.444,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gvhjtFg8A-c","annotations":[{"segment":[67.91765522620905,100.8178143525741],"label":"Snow tubing"}]},"lQs4uY7EpDc":{"duration":70.13,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=lQs4uY7EpDc","annotations":[]},"EjiugNIf6d0":{"duration":63.414,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=EjiugNIf6d0","annotations":[]},"iVGGeRlJekQ":{"duration":102.87,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iVGGeRlJekQ","annotations":[{"segment":[18.87713436145233,83.18738007840412],"label":"Wrapping presents"}]},"rJpFVvho0o4":{"duration":167.485,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rJpFVvho0o4","annotations":[{"segment":[7.929617843497209,160.0008997749614],"label":"Swimming"}]},"P4dx0xrr6fM":{"duration":19.78,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=P4dx0xrr6fM","annotations":[{"segment":[6.635618120124805,11.172529113884556],"label":"Discus throw"}]},"r1fHWOdyQBU":{"duration":155.249,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r1fHWOdyQBU","annotations":[]},"ZDlw4L7oNfs":{"duration":122.857,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZDlw4L7oNfs","annotations":[{"segment":[29.132921986267206,106.18183266458658],"label":"Longboarding"}]},"Q_LhL-t0Yls":{"duration":168.34,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=Q_LhL-t0Yls","annotations":[{"segment":[90.81915200624024,154.8650279875195],"label":"Playing flauta"}]},"S9KUHyvFuGc":{"duration":33.228,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S9KUHyvFuGc","annotations":[{"segment":[7.163117140656587,32.98967359860051],"label":"Calf roping"}]},"hOcP06xuKFg":{"duration":39.103,"subset":"training","resolution":"544x360","url":"https://www.youtube.com/watch?v=hOcP06xuKFg","annotations":[{"segment":[0.2,35.87432357782158],"label":"Shuffleboard"}]},"lVhdZ-sTUjI":{"duration":20.67,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lVhdZ-sTUjI","annotations":[]},"Jbl0E75cx8o":{"duration":128.01,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=Jbl0E75cx8o","annotations":[{"segment":[0.01,128.01160997732427],"label":"Getting a piercing"}]},"qbDQNRf-EUI":{"duration":71.36,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=qbDQNRf-EUI","annotations":[{"segment":[8.477303955450907,70.94058473738237],"label":"Fixing bicycle"}]},"bULPHJydFTQ":{"duration":89.19,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bULPHJydFTQ","annotations":[{"segment":[14.16622464898596,66.66458658346335],"label":"Hammer throw"}]},"8nhRg9GjNQ0":{"duration":231.085,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=8nhRg9GjNQ0","annotations":[]},"GKK32IWDlSE":{"duration":98.38,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=GKK32IWDlSE","annotations":[{"segment":[0,20.23588129797192],"label":"Javelin throw"},{"segment":[32.653353912636504,47.98356701716069],"label":"Javelin throw"},{"segment":[55.49537143837754,80.17701453666146],"label":"Javelin throw"}]},"qZvP5BvVbcg":{"duration":227.045,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qZvP5BvVbcg","annotations":[{"segment":[16.239573872472786,213.93873405909798],"label":"Skiing"}]},"lueciI7K4_o":{"duration":23.058,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lueciI7K4_o","annotations":[]},"IvkpOoeCM3A":{"duration":157.827,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=IvkpOoeCM3A","annotations":[{"segment":[10.554426875057587,59.644801853457444],"label":"Playing pool"},{"segment":[64.30838747640543,85.41724871711737],"label":"Playing pool"},{"segment":[95.48077558768935,101.37161958596138],"label":"Playing pool"},{"segment":[107.2624645253082,115.85328006185563],"label":"Playing pool"},{"segment":[134.01671862484164,136.96214109451503],"label":"Playing pool"},{"segment":[147.27111973837197,153.65286842266434],"label":"Playing pool"}]},"jcX2QAiqP9U":{"duration":26.076,"subset":"training","resolution":"640x278","url":"https://www.youtube.com/watch?v=jcX2QAiqP9U","annotations":[{"segment":[0.2,22.669499956454125],"label":"Futsal"}]},"JgTElHbqCs8":{"duration":29.281,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=JgTElHbqCs8","annotations":[]},"fdd5ixvEXOE":{"duration":63.18,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=fdd5ixvEXOE","annotations":[{"segment":[10.842382184087363,59.6331020124805],"label":"Hopscotch"}]},"IeBCgi4xPIE":{"duration":146.495,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IeBCgi4xPIE","annotations":[]},"qAXE_vqt56g":{"duration":25.589,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=qAXE_vqt56g","annotations":[{"segment":[13.013751659906397,18.682318333853352],"label":"Rock-paper-scissors"}]},"2bMIjmygXXE":{"duration":50.04,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=2bMIjmygXXE","annotations":[]},"-gZjpAPEhR4":{"duration":68.128,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-gZjpAPEhR4","annotations":[{"segment":[2.8668439937597507,37.59855538221529],"label":"Rollerblading"}]},"fvUQQF5S4Dg":{"duration":107.648,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fvUQQF5S4Dg","annotations":[{"segment":[93.91969291668168,99.27696609864127],"label":"Having an ice cream"}]},"Xr8zB1aIBzg":{"duration":159.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Xr8zB1aIBzg","annotations":[{"segment":[5.460587600624025,149.91795048985958],"label":"Doing step aerobics"}]},"-DphPPTybSY":{"duration":158.384,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-DphPPTybSY","annotations":[{"segment":[0.009999999999999998,154.36914976599064],"label":"Playing pool"}]},"X3Nr9Gj6JR0":{"duration":105.78999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=X3Nr9Gj6JR0","annotations":[{"segment":[2.3105484981304016,67.66606227769111],"label":"Preparing pasta"},{"segment":[69.81157156942277,97.86823153822152],"label":"Preparing pasta"}]},"7Xa3VvDxbPw":{"duration":37.036,"subset":"testing","resolution":"192x144","url":"https://www.youtube.com/watch?v=7Xa3VvDxbPw","annotations":[]},"b5SYTLjp6HI":{"duration":149.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b5SYTLjp6HI","annotations":[{"segment":[16.046702643615948,20.000237950078002],"label":"Snatch"},{"segment":[21.860725201248048,24.186334265210608],"label":"Snatch"},{"segment":[41.16328043213728,46.279620372854914],"label":"Snatch"},{"segment":[47.20986399843994,51.395960313572544],"label":"Snatch"},{"segment":[84.65216992823713,86.51265717940717],"label":"Snatch"},{"segment":[103.95472515912635,110.23386963182527],"label":"Snatch"},{"segment":[111.39667416380654,114.88508775975039],"label":"Snatch"},{"segment":[115.35020957254291,117.44325773010921],"label":"Snatch"},{"segment":[132.09459483307333,136.28069114820593],"label":"Snatch"},{"segment":[137.21093477379094,139.5365438377535],"label":"Snatch"}]},"AGjhryYGVs4":{"duration":198.972,"subset":"validation","resolution":"640x356","url":"https://www.youtube.com/watch?v=AGjhryYGVs4","annotations":[{"segment":[37.2395007800312,105.82224804992198],"label":"Rafting"},{"segment":[127.54529017160685,186.50783307332293],"label":"Rafting"}]},"OjNLFiEYpVo":{"duration":48.46,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OjNLFiEYpVo","annotations":[]},"BfLrltipDDU":{"duration":115.055,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BfLrltipDDU","annotations":[{"segment":[21.570916784280556,105.77632176870424],"label":"Wakeboarding"}]},"ZefWc2tgltY":{"duration":102.03,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZefWc2tgltY","annotations":[{"segment":[13.525873634945398,45.03320280811233],"label":"Playing flauta"},{"segment":[66.51547269890796,85.92907956318253],"label":"Playing flauta"}]},"r5cjo7N6IEY":{"duration":38.894,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r5cjo7N6IEY","annotations":[{"segment":[20.83386254551255,23.133454445824075],"label":"Trimming branches or hedges"}]},"O0KUnuhLwj0":{"duration":112.18,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=O0KUnuhLwj0","annotations":[{"segment":[31.150166614664588,107.62557566302652],"label":"Smoking a cigarette"}]},"8e80cJTrJDs":{"duration":39.149,"subset":"validation","resolution":"400x300","url":"https://www.youtube.com/watch?v=8e80cJTrJDs","annotations":[{"segment":[0.19999999999999998,37.377677541341654],"label":"BMX"}]},"Bhq890NxKxc":{"duration":116.797,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Bhq890NxKxc","annotations":[]},"czh5nNO_Eow":{"duration":215.25,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=czh5nNO_Eow","annotations":[{"segment":[32.23692911798105,199.4659989175077],"label":"Painting"}]},"fPCfTJLh46A":{"duration":184.135,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=fPCfTJLh46A","annotations":[{"segment":[36.184408736349454,51.979190327613104],"label":"Playing ten pins"},{"segment":[54.27661310452418,66.62526053042122],"label":"Playing ten pins"}]},"jFbIvLKgVNM":{"duration":17.11,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=jFbIvLKgVNM","annotations":[]},"lol04SNoopE":{"duration":19.48,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=lol04SNoopE","annotations":[{"segment":[1.3980513042121685,3.2215964836193445],"label":"Springboard diving"}]},"CV9n0jgThLY":{"duration":58.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=CV9n0jgThLY","annotations":[{"segment":[0.01,12.152499427690179],"label":"Fixing the roof"},{"segment":[25.98681781554065,48.50149283890513],"label":"Fixing the roof"}]},"lrM8LSnXmSw":{"duration":75.72,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=lrM8LSnXmSw","annotations":[{"segment":[6.165272784296208,73.51149512230243],"label":"Doing step aerobics"}]},"QKIkAfzncFc":{"duration":55.287,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=QKIkAfzncFc","annotations":[{"segment":[0.2,10.690115444617785],"label":"Playing congas"},{"segment":[17.500753510140406,28.53571138845554],"label":"Playing congas"},{"segment":[34.65666458658346,52.32983931357254],"label":"Playing congas"}]},"E9hFBWDR12A":{"duration":129.45,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E9hFBWDR12A","annotations":[{"segment":[71.89457932605305,105.62040726833074],"label":"Playing lacrosse"}]},"TqO-bmGQGiY":{"duration":108.88,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TqO-bmGQGiY","annotations":[{"segment":[3.0574268143525742,107.0099385023401],"label":"Walking the dog"}]},"vEiQlqCy-rI":{"duration":106.348,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vEiQlqCy-rI","annotations":[{"segment":[0.01,7.186080314957094],"label":"Futsal"},{"segment":[16.12728592243373,22.41924542399136],"label":"Futsal"},{"segment":[38.48029994112532,54.22039807201397],"label":"Futsal"},{"segment":[61.0103684769984,70.78130149880525],"label":"Futsal"},{"segment":[82.53954293182706,88.335859131204],"label":"Futsal"},{"segment":[101.08775476983328,106.121],"label":"Futsal"}]},"_7AyUgKv3kU":{"duration":71.866,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=_7AyUgKv3kU","annotations":[{"segment":[18.274762088299532,65.92368164368175],"label":"Blowing leaves"}]},"HLZLkI1NYAs":{"duration":66.247,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HLZLkI1NYAs","annotations":[{"segment":[17.645103948121303,58.30107302959502],"label":"Playing ten pins"}]},"jTChoOFb05w":{"duration":184.715,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jTChoOFb05w","annotations":[{"segment":[40.63141445657826,90.7723088923557],"label":"Beach soccer"},{"segment":[99.41729069162767,104.89244583116658],"label":"Beach soccer"},{"segment":[111.23209915063268,150.71084818876753],"label":"Beach soccer"},{"segment":[160.79666020904835,172.03513646021838],"label":"Beach soccer"}]},"YniPQWUmrhg":{"duration":52.99,"subset":"testing","resolution":"194x144","url":"https://www.youtube.com/watch?v=YniPQWUmrhg","annotations":[]},"g_65rfEiXPY":{"duration":156.18,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=g_65rfEiXPY","annotations":[{"segment":[0.7309377354686024,153.00963262476077],"label":"Cleaning windows"}]},"Hy8WbkpvUlA":{"duration":220.172,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Hy8WbkpvUlA","annotations":[{"segment":[0.01,216.53637831096225],"label":"Waxing skis"}]},"-06dWmCzbxY":{"duration":65.527,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-06dWmCzbxY","annotations":[{"segment":[3.2712243129180947,63.379970078003126],"label":"River tubing"}]},"mvRoVXRGVGw":{"duration":31.626,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mvRoVXRGVGw","annotations":[]},"ZN7C8n_6uH8":{"duration":110.597,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=ZN7C8n_6uH8","annotations":[{"segment":[8.622542901716068,25.005374414976597],"label":"Futsal"},{"segment":[28.97174414976599,72.77557878315133],"label":"Futsal"},{"segment":[77.94919812792511,101.24696879875195],"label":"Futsal"}]},"InHt3sukfeg":{"duration":175.032,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=InHt3sukfeg","annotations":[{"segment":[0.01,175.001],"label":"Painting furniture"}]},"P9HUphIXgNA":{"duration":121.511,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=P9HUphIXgNA","annotations":[]},"H6g8SsxL8RU":{"duration":55.57,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=H6g8SsxL8RU","annotations":[{"segment":[32.420344617784714,49.58405647425897],"label":"Starting a campfire"}]},"RihO8i98QJg":{"duration":218.92000000000002,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RihO8i98QJg","annotations":[{"segment":[4.097616224648986,88.78168486739469],"label":"Dodgeball"},{"segment":[120.53821060842434,152.6362043681747],"label":"Dodgeball"}]},"yyCsQ7QzAJ8":{"duration":228.717,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yyCsQ7QzAJ8","annotations":[{"segment":[47.81270682059282,213.7299357129485],"label":"Making a cake"}]},"35Xzs3Bz1hQ":{"duration":91.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=35Xzs3Bz1hQ","annotations":[{"segment":[70.96090015600625,76.24370358814353],"label":"Drinking coffee"}]},"o1jPudE8eRE":{"duration":230.296,"subset":"testing","resolution":"1920x1072","url":"https://www.youtube.com/watch?v=o1jPudE8eRE","annotations":[]},"qZMcbBsQorE":{"duration":4.528,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=qZMcbBsQorE","annotations":[]},"3-_Eld2NwJ0":{"duration":70.218,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3-_Eld2NwJ0","annotations":[{"segment":[0.2,67.72883619344773],"label":"Building sandcastles"}]},"EfJO4zg7bYE":{"duration":229.25,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=EfJO4zg7bYE","annotations":[{"segment":[116.91397815912637,216.308736349454],"label":"Polishing forniture"}]},"vHeh6hl2dFc":{"duration":164.003,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vHeh6hl2dFc","annotations":[]},"z0vMpInrDqM":{"duration":24.823,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=z0vMpInrDqM","annotations":[{"segment":[13.712007800312012,21.923934477379095],"label":"Putting in contact lenses"}]},"hY73M6hJqgU":{"duration":198.322,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hY73M6hJqgU","annotations":[{"segment":[17.0166767487026,177.28283230921073],"label":"Baking cookies"}]},"EZKrOWEKX_Q":{"duration":136.209,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=EZKrOWEKX_Q","annotations":[{"segment":[12.814570070773637,127.806238992892],"label":"Surfing"}]},"4J-hDMxEb14":{"duration":66.549,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4J-hDMxEb14","annotations":[]},"WZOa0RpD6xE":{"duration":212.3,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WZOa0RpD6xE","annotations":[]},"8zfA20hcc0o":{"duration":130.566,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8zfA20hcc0o","annotations":[{"segment":[0.2,3.870125264874541],"label":"Futsal"},{"segment":[15.887882666327062,119.36281080086742],"label":"Futsal"}]},"Ru18FF0tcuA":{"duration":70.682,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=Ru18FF0tcuA","annotations":[{"segment":[0.2,68.25943303593505],"label":"Canoeing"}]},"Yi3xUQcaOnE":{"duration":90.071,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Yi3xUQcaOnE","annotations":[{"segment":[8.264606595359762,34.31912867029549],"label":"Rafting"}]},"f6NpnKJZJE0":{"duration":118.631,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=f6NpnKJZJE0","annotations":[{"segment":[0,49.04391256630265],"label":"Elliptical trainer"},{"segment":[54.225910875195005,62.554122443057715],"label":"Elliptical trainer"},{"segment":[69.40176306552262,72.91811906084243],"label":"Elliptical trainer"},{"segment":[79.5806883151326,84.20747251950078],"label":"Elliptical trainer"},{"segment":[101.78925249609985,115.11439100468019],"label":"Elliptical trainer"}]},"9eJgsiS3xH8":{"duration":105.07,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=9eJgsiS3xH8","annotations":[{"segment":[47.134401999999994,97.2191887675507],"label":"Sailing"},{"segment":[4.5169593,41.375515299999996],"label":"Sailing"}]},"jNGa0jPAMjI":{"duration":61.7,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=jNGa0jPAMjI","annotations":[{"segment":[13.2483843,49.5682308],"label":"Smoking hookah"}]},"oNrWO_VQQbk":{"duration":178.19,"subset":"validation","resolution":"448x360","url":"https://www.youtube.com/watch?v=oNrWO_VQQbk","annotations":[{"segment":[117.31070337235259,126.20630173234142],"label":"Bathing dog"}]},"kRom61pt8zk":{"duration":184.39,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=kRom61pt8zk","annotations":[{"segment":[76.51739395290097,163.39052378159127],"label":"Brushing hair"}]},"BSl22Hx2WGM":{"duration":195.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BSl22Hx2WGM","annotations":[{"segment":[13.131022967238689,163.679728149766],"label":"Cheerleading"}]},"zrXTMcKFa5E":{"duration":237.091,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zrXTMcKFa5E","annotations":[{"segment":[26.55437784273248,62.69784423291738],"label":"Fun sliding down"}]},"3tRTl23mhVQ":{"duration":67.18,"subset":"training","resolution":"1152x720","url":"https://www.youtube.com/watch?v=3tRTl23mhVQ","annotations":[{"segment":[22.06362875,55.43759154290172],"label":"Javelin throw"}]},"XvFv0n2mJUk":{"duration":9.846,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=XvFv0n2mJUk","annotations":[{"segment":[0.01,9.191975482278307],"label":"Doing a powerbomb"}]},"iEqyqzKSEZ4":{"duration":237.425,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=iEqyqzKSEZ4","annotations":[{"segment":[32.58896723868955,107.76578939157567],"label":"Rope skipping"},{"segment":[156.64924024961,190.3491950078003],"label":"Rope skipping"}]},"i5qTK0mInTc":{"duration":215.5,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=i5qTK0mInTc","annotations":[]},"s7kIK4sQQ8w":{"duration":115.53999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s7kIK4sQQ8w","annotations":[]},"bXEG4X7pRds":{"duration":129.73,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bXEG4X7pRds","annotations":[{"segment":[0.01,113.49616380655227],"label":"Grooming dog"}]},"tvzl11XL01k":{"duration":80.3,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=tvzl11XL01k","annotations":[{"segment":[3.6670269800000006,46.55365826],"label":"Wrapping presents"}]},"4SecbKo1iGE":{"duration":148.748,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=4SecbKo1iGE","annotations":[{"segment":[17.630695787831513,122.9674414976599],"label":"Painting furniture"}]},"uTQyPHg8r0M":{"duration":86.448,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=uTQyPHg8r0M","annotations":[{"segment":[8.631302689547582,51.65295203276131],"label":"Gargling mouthwash"}]},"0dZVZtTTgfk":{"duration":196.86,"subset":"testing","resolution":"256x144","url":"https://www.youtube.com/watch?v=0dZVZtTTgfk","annotations":[]},"yW76UI8lnNc":{"duration":78.414,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yW76UI8lnNc","annotations":[{"segment":[0.01,78.381],"label":"Shuffleboard"}]},"anO5Xhewrsw":{"duration":72.772,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=anO5Xhewrsw","annotations":[{"segment":[5.90342683095079,72.771087],"label":"Playing drums"}]},"LcCtHQuyDO0":{"duration":75.116,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=LcCtHQuyDO0","annotations":[]},"fMVuGUBs2cs":{"duration":231.16,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fMVuGUBs2cs","annotations":[]},"6LADh__9LUI":{"duration":65.318,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=6LADh__9LUI","annotations":[{"segment":[0.009999999999999998,65.317731],"label":"Hand car wash"}]},"ux3h_qEusvw":{"duration":154.181,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=ux3h_qEusvw","annotations":[{"segment":[11.78603050788698,132.53270993915757],"label":"Peeling potatoes"}]},"PYeU5d6wK2s":{"duration":238.07999999999998,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PYeU5d6wK2s","annotations":[]},"eoOzE3cpqVg":{"duration":51.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eoOzE3cpqVg","annotations":[{"segment":[5.283019500780031,43.594992979719194],"label":"Paintball"}]},"zoA-sfWcVlI":{"duration":33.11,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zoA-sfWcVlI","annotations":[]},"eR6obwco54s":{"duration":136.395,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eR6obwco54s","annotations":[]},"uRcVktMzlUc":{"duration":232.966,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uRcVktMzlUc","annotations":[{"segment":[49.99887641315599,53.25967306151984],"label":"Beer pong"},{"segment":[86.59226102257259,92.75154358059321],"label":"Beer pong"},{"segment":[99.99775835473511,101.44700130956349],"label":"Beer pong"},{"segment":[110.86708051594796,112.67863420948343],"label":"Beer pong"},{"segment":[125.72182080293885,128.6203067125956],"label":"Beer pong"},{"segment":[163.40213762847677,167.3875557542548],"label":"Beer pong"},{"segment":[176.08301348322507,190.2131322928018],"label":"Beer pong"},{"segment":[196.73472558952952,198.54627928306496],"label":"Beer pong"}]},"Vvco5y9PUFM":{"duration":14.35,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Vvco5y9PUFM","annotations":[{"segment":[0.2,14.149931972789116],"label":"Powerbocking"}]},"jN-jnIgzd6k":{"duration":120.3,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=jN-jnIgzd6k","annotations":[]},"NSITtemHdAs":{"duration":130.659,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=NSITtemHdAs","annotations":[]},"x2FfKHiN_3s":{"duration":112.524,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x2FfKHiN_3s","annotations":[{"segment":[28.441682634275157,45.26768640650056],"label":"Ice fishing"}]},"42ZfZq3MKvI":{"duration":75.883,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=42ZfZq3MKvI","annotations":[]},"wlO7PcigY-s":{"duration":63.53,"subset":"training","resolution":"320x176","url":"https://www.youtube.com/watch?v=wlO7PcigY-s","annotations":[{"segment":[1.632925099843994,23.40525976443058],"label":"Tennis serve with ball bouncing"},{"segment":[32.60901941809673,37.26038182371295],"label":"Tennis serve with ball bouncing"}]},"R2cL7miVEwA":{"duration":196.05,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=R2cL7miVEwA","annotations":[{"segment":[17.12727029017161,187.4824408549142],"label":"Playing field hockey"}]},"erevt6avST0":{"duration":117.42,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=erevt6avST0","annotations":[{"segment":[12.177142416536661,20.234199053042122],"label":"Paintball"},{"segment":[48.06766743369735,56.12472407020281],"label":"Paintball"},{"segment":[77.36605520280811,84.50753722152886],"label":"Paintball"}]},"nfBBazBLQFk":{"duration":207.308,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nfBBazBLQFk","annotations":[{"segment":[0.01,0.02],"label":"Kite flying"},{"segment":[12.285737909516381,192.3687909516381],"label":"Kite flying"}]},"d8u6MM00_ig":{"duration":137.35,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=d8u6MM00_ig","annotations":[{"segment":[2.87877216,80.71275036],"label":"Snatch"}]},"voaGOm-rl9Y":{"duration":51.8,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=voaGOm-rl9Y","annotations":[{"segment":[3.5459153499999996,43.88357170202808],"label":"Shoveling snow"}]},"RL4V-Sx619M":{"duration":83.685,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=RL4V-Sx619M","annotations":[{"segment":[1.8577000336335086,17.47657853830641],"label":"Shuffleboard"},{"segment":[22.167546536062368,52.241083919239934],"label":"Shuffleboard"}]},"KjiHKzquw3Y":{"duration":18.934,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KjiHKzquw3Y","annotations":[]},"4Gx_8vuxzAk":{"duration":42.84,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4Gx_8vuxzAk","annotations":[]},"MinmayCk2Nk":{"duration":38.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MinmayCk2Nk","annotations":[{"segment":[5.967492037441499,34.7199536723869],"label":"Using the pommel horse"}]},"RD7AUdgtchE":{"duration":4.55,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=RD7AUdgtchE","annotations":[{"segment":[0,1.5126403181072483],"label":"Shoveling snow"}]},"1SyJihd6LGI":{"duration":128.592,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=1SyJihd6LGI","annotations":[]},"FXl3qRRs9jw":{"duration":96.387,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=FXl3qRRs9jw","annotations":[{"segment":[9.170079563182528,83.58302028081123],"label":"Bullfighting"}]},"UodvUEkuVig":{"duration":203.57,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UodvUEkuVig","annotations":[{"segment":[60.3402994895306,182.29121908892355],"label":"Removing curlers"}]},"4S09Z8HRtdo":{"duration":175.172,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4S09Z8HRtdo","annotations":[{"segment":[23.2246255850234,159.0203775351014],"label":"Painting fence"}]},"zgdT41KjjrE":{"duration":236.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=zgdT41KjjrE","annotations":[{"segment":[46.7321106,212.16138165600626],"label":"Horseback riding"}]},"vGZO5lM61D4":{"duration":192.633,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vGZO5lM61D4","annotations":[{"segment":[20.435298853042124,179.71042226645866],"label":"Snowboarding"}]},"Hn3-SRXssY4":{"duration":213.04,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Hn3-SRXssY4","annotations":[{"segment":[42.708324550702024,96.88308643213729],"label":"High jump"}]},"JZ9mgVXPD-I":{"duration":63.112,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JZ9mgVXPD-I","annotations":[{"segment":[0.01,50.11532687051482],"label":"Playing drums"}]},"esmMIjNWVCc":{"duration":29.49,"subset":"testing","resolution":"426x228","url":"https://www.youtube.com/watch?v=esmMIjNWVCc","annotations":[]},"W5WfWmISKrk":{"duration":155.899,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=W5WfWmISKrk","annotations":[{"segment":[0.01,123.3083906677067],"label":"Sumo"}]},"_ao0UwSJHWA":{"duration":222.541,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_ao0UwSJHWA","annotations":[{"segment":[70.97872776911076,213.64955616224648],"label":"Kneeling"}]},"2v4Pjfd4UYw":{"duration":79.62,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2v4Pjfd4UYw","annotations":[]},"m210FwMsnTQ":{"duration":96.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m210FwMsnTQ","annotations":[{"segment":[4.8570455589294435,91.99654555892944],"label":"Using the balance beam"}]},"9qJbSz-eCq0":{"duration":87.447,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9qJbSz-eCq0","annotations":[{"segment":[79.26544679631249,81.9846731726733],"label":"Having an ice cream"}]},"Ve37zGVerDU":{"duration":91.859,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ve37zGVerDU","annotations":[{"segment":[26.224710510140405,58.61151146801872],"label":"Arm wrestling"},{"segment":[73.08525879875194,84.5496131201248],"label":"Arm wrestling"}]},"bVMxBXeYMr0":{"duration":174.36,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bVMxBXeYMr0","annotations":[{"segment":[68.2745998493001,93.57156314007662],"label":"Shaving"},{"segment":[100.64383244717543,128.11687860167467],"label":"Shaving"},{"segment":[154.22987296634722,155.86193381279253],"label":"Shaving"}]},"-C1nnsyw7R0":{"duration":133.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-C1nnsyw7R0","annotations":[{"segment":[8.2731826,15.62258190327613],"label":"Long jump"}]},"d7gbNqcKXps":{"duration":7.059,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d7gbNqcKXps","annotations":[{"segment":[0.9140184009360375,6.695460093603744],"label":"Croquet"}]},"3Rc2pPAQ-Cs":{"duration":129.243,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3Rc2pPAQ-Cs","annotations":[{"segment":[0.01,0.02],"label":"Playing congas"}]},"5-vAXCUN8X0":{"duration":190.24,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5-vAXCUN8X0","annotations":[{"segment":[31.451627609984396,159.0384188580343],"label":"Playing water polo"}]},"5BuDj1xkpiI":{"duration":126.81,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=5BuDj1xkpiI","annotations":[{"segment":[36.003680756754086,64.8857543308535],"label":"Washing face"},{"segment":[94.16347249921998,112.56095767238689],"label":"Washing face"}]},"gkqZ8cV6Kp4":{"duration":202.664,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gkqZ8cV6Kp4","annotations":[]},"vB2qMaP_JaI":{"duration":217.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vB2qMaP_JaI","annotations":[{"segment":[36.42583135863925,196.1703967792],"label":"Braiding hair"}]},"Pp5DCsgaALg":{"duration":153.206,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Pp5DCsgaALg","annotations":[{"segment":[0.01,153.181],"label":"Sharpening knives"}]},"IFgEjS97Pmw":{"duration":83.778,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=IFgEjS97Pmw","annotations":[{"segment":[45.19105460218409,83.741],"label":"Elliptical trainer"}]},"x--RoqrwiLI":{"duration":132.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=x--RoqrwiLI","annotations":[{"segment":[80.44648812,113.69740284],"label":"Hammer throw"},{"segment":[16.30741896,74.08041948],"label":"Hammer throw"}]},"4sgfR6N1thU":{"duration":120.721,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=4sgfR6N1thU","annotations":[]},"3VzXH3o88mw":{"duration":129.71,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=3VzXH3o88mw","annotations":[{"segment":[0.2,12.54567141052989],"label":"Preparing pasta"},{"segment":[27.317187748734437,37.63701423158967],"label":"Preparing pasta"},{"segment":[41.68400500917996,43.90984993685461],"label":"Preparing pasta"},{"segment":[46.9450928673947,80.13041713572544],"label":"Preparing pasta"},{"segment":[96.72307926989079,110.48284786895475],"label":"Preparing pasta"}]},"z31xpHUGgO0":{"duration":113.57,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=z31xpHUGgO0","annotations":[{"segment":[0,113.568820861678],"label":"Playing badminton"}]},"K1z2fiB9pUM":{"duration":202.07999999999998,"subset":"training","resolution":"352x264","url":"https://www.youtube.com/watch?v=K1z2fiB9pUM","annotations":[{"segment":[2.2074134165366615,195.82910452418096],"label":"Doing motocross"}]},"YddPa1ujWuk":{"duration":140.85,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=YddPa1ujWuk","annotations":[{"segment":[36.682943837753506,140.801],"label":"Wrapping presents"}]},"76RoR_LbIzQ":{"duration":96.688,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=76RoR_LbIzQ","annotations":[{"segment":[54.58405384122012,54.88479169466012],"label":"Braiding hair"}]},"RLIsHDFYr6k":{"duration":202.29,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=RLIsHDFYr6k","annotations":[]},"PyPu-6wATfw":{"duration":169.65,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=PyPu-6wATfw","annotations":[{"segment":[5.422479177847114,143.73596832],"label":"Mixing drinks"}]},"lW5wvALKE6s":{"duration":37.29,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=lW5wvALKE6s","annotations":[]},"wtQQ5I0QRuw":{"duration":180.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wtQQ5I0QRuw","annotations":[{"segment":[50.540686427457096,84.79604056162246],"label":"Shaving legs"},{"segment":[133.371255850234,150.51565678627145],"label":"Shaving legs"}]},"rCmQ7TXC5jk":{"duration":105.907,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rCmQ7TXC5jk","annotations":[]},"BetFWvm92nM":{"duration":166.534,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BetFWvm92nM","annotations":[{"segment":[25.699998439937602,132.3939313572543],"label":"Gargling mouthwash"}]},"Z3NtsxP6w-g":{"duration":218.45,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z3NtsxP6w-g","annotations":[{"segment":[56.91373894955798,212.65971789079563],"label":"Tango"}]},"r-xtiGmrKxA":{"duration":221.32999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r-xtiGmrKxA","annotations":[{"segment":[18.127865686056012,213.21822808892355],"label":"Doing kickboxing"}]},"PG04Cv8WB2s":{"duration":44.745,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PG04Cv8WB2s","annotations":[]},"D18b2IZpxk0":{"duration":95.667,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D18b2IZpxk0","annotations":[{"segment":[0.01,86.37216692667707],"label":"Camel ride"}]},"BDQHEemWnSk":{"duration":155.11,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BDQHEemWnSk","annotations":[{"segment":[0.01,99.33286496085694],"label":"Rollerblading"},{"segment":[83.60415309129372,154.90929705215422],"label":"Rollerblading"}]},"DqT3NrqisLk":{"duration":161.31,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=DqT3NrqisLk","annotations":[{"segment":[35.23128544189387,120.54132518720748],"label":"Cleaning shoes"}]},"LUGksGa4WJA":{"duration":144.545,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LUGksGa4WJA","annotations":[{"segment":[3.8334587750856977,142.0634716848674],"label":"Surfing"}]},"7JXae2so5-E":{"duration":48.6,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=7JXae2so5-E","annotations":[{"segment":[10.808144831914273,28.645364064448167],"label":"Welding"}]},"NNZKinEXYc4":{"duration":90.14,"subset":"validation","resolution":"704x480","url":"https://www.youtube.com/watch?v=NNZKinEXYc4","annotations":[{"segment":[0.01,0.02],"label":"Using the balance beam"}]},"SoWow2cxfac":{"duration":148.213,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=SoWow2cxfac","annotations":[{"segment":[11.788160686427457,64.26570670826833],"label":"Disc dog"},{"segment":[92.00629953198128,138.47179251170047],"label":"Disc dog"}]},"fEBM3nPMen8":{"duration":48.204,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=fEBM3nPMen8","annotations":[]},"QryL-hVKAOA":{"duration":122.74,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QryL-hVKAOA","annotations":[{"segment":[63.980622090000004,116.833127925117],"label":"Washing dishes"},{"segment":[9.664329173166927,49.46228721000001],"label":"Washing dishes"}]},"XncWGxekE30":{"duration":163.028,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=XncWGxekE30","annotations":[{"segment":[61.548528836391554,74.26516702572864],"label":"Snow tubing"}]},"eypKc4FyZ2s":{"duration":198.067,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eypKc4FyZ2s","annotations":[{"segment":[19.55337025253201,107.14259766686223],"label":"Croquet"},{"segment":[124.41371293165973,167.60842944261069],"label":"Croquet"}]},"B73wt5icB-c":{"duration":167.625,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=B73wt5icB-c","annotations":[{"segment":[6.4044375975039,11.371144305772232],"label":"Rope skipping"},{"segment":[16.337851014040563,19.2133127925117],"label":"Rope skipping"},{"segment":[23.134397035881435,26.532670046801876],"label":"Rope skipping"},{"segment":[39.080139625585026,48.229336193447736],"label":"Rope skipping"},{"segment":[53.457448517940726,56.85572152886116],"label":"Rope skipping"},{"segment":[62.345239469578786,63.39086193447738],"label":"Rope skipping"},{"segment":[98.94202574102964,117.25441263650545],"label":"Rope skipping"},{"segment":[121.69883853354133,122.48314898595943],"label":"Rope skipping"}]},"8GZlEny_cYw":{"duration":5.78,"subset":"testing","resolution":"800x480","url":"https://www.youtube.com/watch?v=8GZlEny_cYw","annotations":[]},"w8kVVzMOC98":{"duration":220.288,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=w8kVVzMOC98","annotations":[{"segment":[23.705536661466457,207.87177067082686],"label":"Playing ten pins"}]},"UOswYEzeJ-M":{"duration":85.33,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=UOswYEzeJ-M","annotations":[{"segment":[17.83879348205928,19.835673349453977],"label":"Layup drill in basketball"},{"segment":[25.560062302652106,27.024440872074884],"label":"Layup drill in basketball"},{"segment":[36.7425895600624,37.94071748049922],"label":"Layup drill in basketball"},{"segment":[42.33385318876755,43.66510643369734],"label":"Layup drill in basketball"},{"segment":[47.92511681747269,48.85699408892356],"label":"Layup drill in basketball"},{"segment":[53.7826310951638,54.847633691107646],"label":"Layup drill in basketball"}]},"ZWlh8sQE2Z4":{"duration":63.298,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=ZWlh8sQE2Z4","annotations":[{"segment":[0.01,63.141],"label":"Kneeling"}]},"9E-FFtZl9qM":{"duration":97.291,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9E-FFtZl9qM","annotations":[{"segment":[5.192211518272804,14.433521487120155],"label":"Running a marathon"},{"segment":[21.85686883914508,23.674831455967507],"label":"Running a marathon"},{"segment":[28.522731767493987,35.64308535004851],"label":"Running a marathon"},{"segment":[75.48676603540676,82.00113207902046],"label":"Running a marathon"},{"segment":[86.69753550581174,94.72687039677747],"label":"Running a marathon"}]},"Q7cgJD7-sEM":{"duration":206.94,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=Q7cgJD7-sEM","annotations":[{"segment":[22.2695616224649,188.17984867394696],"label":"Playing flauta"}]},"hs81dVKvvdw":{"duration":115.75,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hs81dVKvvdw","annotations":[{"segment":[17.967662488812476,51.37487415142864],"label":"Getting a tattoo"}]},"AFdqkU6FyqY":{"duration":132.052,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AFdqkU6FyqY","annotations":[{"segment":[78.6560802473539,80.70976474859698],"label":"Wakeboarding"},{"segment":[85.84397298452342,97.75533809183291],"label":"Wakeboarding"},{"segment":[101.8627053702155,112.13112356617194],"label":"Wakeboarding"}]},"MUbcqBcEdQg":{"duration":135.768,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MUbcqBcEdQg","annotations":[]},"Yg6jFRYIyXw":{"duration":102.35,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Yg6jFRYIyXw","annotations":[{"segment":[24.430724691107645,91.0164253198128],"label":"Shoveling snow"}]},"aV5DMcsNMmk":{"duration":56.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aV5DMcsNMmk","annotations":[{"segment":[0.01,56.726349206349205],"label":"Drinking coffee"}]},"6wPVO7I8iIY":{"duration":150.42000000000002,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=6wPVO7I8iIY","annotations":[]},"5vd8j0hKIgs":{"duration":23.31,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=5vd8j0hKIgs","annotations":[{"segment":[7.445561622464898,18.450464898595943],"label":"Using parallel bars"}]},"blbaWHbtqTI":{"duration":102.31,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=blbaWHbtqTI","annotations":[]},"D5vmWDHQ86I":{"duration":139.505,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D5vmWDHQ86I","annotations":[{"segment":[56.36805190302851,66.59700340666689],"label":"Futsal"},{"segment":[87.05490608424337,96.41330848829952],"label":"Futsal"},{"segment":[132.1058199828393,139.305487],"label":"Futsal"}]},"hpZ5XnuiRPw":{"duration":162.22,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hpZ5XnuiRPw","annotations":[{"segment":[52.74843972074883,158.4983092808112],"label":"Playing harmonica"}]},"qPCdE01lKlE":{"duration":94.32,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=qPCdE01lKlE","annotations":[]},"HmqRZ4HPu1U":{"duration":32.857,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HmqRZ4HPu1U","annotations":[{"segment":[0.01,32.81517539429437],"label":"Cutting the grass"}]},"NV2scxevYKk":{"duration":208.794,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=NV2scxevYKk","annotations":[{"segment":[16.28244929797192,183.66602808112324],"label":"Canoeing"}]},"ZmoSxXei954":{"duration":210.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZmoSxXei954","annotations":[{"segment":[19.183986570000002,151.95309885],"label":"Playing racquetball"}]},"3IRjOwcHEpQ":{"duration":91.928,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3IRjOwcHEpQ","annotations":[]},"8tlLBffNjf8":{"duration":32.555,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=8tlLBffNjf8","annotations":[{"segment":[8.468194488595023,24.542550623052957],"label":"Decorating the Christmas tree"}]},"54TIARWo-S8":{"duration":29.26,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=54TIARWo-S8","annotations":[]},"JTFEzdAw1L4":{"duration":44.606,"subset":"training","resolution":"136x240","url":"https://www.youtube.com/watch?v=JTFEzdAw1L4","annotations":[{"segment":[0.01,44.21644859262093],"label":"Beer pong"}]},"ssNrAODsmIE":{"duration":192.192,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ssNrAODsmIE","annotations":[]},"PZ1FVhgTRWU":{"duration":91.42,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=PZ1FVhgTRWU","annotations":[{"segment":[4.991560805289354,86.85315635101405],"label":"Using the balance beam"}]},"cnjaB6GFpSc":{"duration":158.9,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=cnjaB6GFpSc","annotations":[{"segment":[0.01,158.89414965986396],"label":"Doing step aerobics"}]},"y2jDV7tFUXg":{"duration":34.134,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=y2jDV7tFUXg","annotations":[{"segment":[3.2717808112324493,28.275634165366615],"label":"Waterskiing"}]},"91WRZuT4c6E":{"duration":111.642,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=91WRZuT4c6E","annotations":[{"segment":[0,7.399442277691107],"label":"Hand car wash"},{"segment":[29.33661232449298,41.17571996879875],"label":"Hand car wash"},{"segment":[66.76908502340093,72.35339391575663],"label":"Hand car wash"},{"segment":[75.31370124804992,82.62740171606863],"label":"Hand car wash"}]},"ESsUwNlkJoo":{"duration":69.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ESsUwNlkJoo","annotations":[{"segment":[8.09918096723869,62.309698907956324],"label":"Spinning"}]},"y3kJaNihMkA":{"duration":35.88,"subset":"testing","resolution":"328x240","url":"https://www.youtube.com/watch?v=y3kJaNihMkA","annotations":[]},"uaGy1W-EYWU":{"duration":76.023,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=uaGy1W-EYWU","annotations":[{"segment":[35.90191681957542,38.86145731801779],"label":"Hula hoop"}]},"uSsb5pdYWE8":{"duration":80.295,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uSsb5pdYWE8","annotations":[]},"tAWTfutrwg0":{"duration":211.72,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tAWTfutrwg0","annotations":[{"segment":[44.90821840873635,176.66434477379096],"label":"Hanging wallpaper"}]},"fklBsM-H7-Y":{"duration":103.42,"subset":"validation","resolution":"562x480","url":"https://www.youtube.com/watch?v=fklBsM-H7-Y","annotations":[{"segment":[4.835959422776911,102.03874382059283],"label":"Playing guitarra"}]},"w1VJnYDYYY0":{"duration":33.41,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=w1VJnYDYYY0","annotations":[{"segment":[18.87003484493121,23.24871696364453],"label":"Ping-pong"}]},"K1b_YcfLsHM":{"duration":189.06,"subset":"testing","resolution":"424x240","url":"https://www.youtube.com/watch?v=K1b_YcfLsHM","annotations":[]},"rXwSSTGmvb8":{"duration":201.5,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rXwSSTGmvb8","annotations":[{"segment":[118.11185676000001,174.42671606864275],"label":"Mixing drinks"},{"segment":[35.82819032761311,108.38177207999999],"label":"Mixing drinks"}]},"AzmaqkS88YM":{"duration":149.32999999999998,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=AzmaqkS88YM","annotations":[{"segment":[0.01,149.327527],"label":"Playing squash"}]},"Hhc10CrukfA":{"duration":30.163,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Hhc10CrukfA","annotations":[{"segment":[0.2,29.861],"label":"Baking cookies"}]},"5vDPgcyRtOU":{"duration":111.22,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5vDPgcyRtOU","annotations":[{"segment":[41.99080659825032,99.94505964430577],"label":"Polishing forniture"}]},"kXfYuIx31Bo":{"duration":169.437,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kXfYuIx31Bo","annotations":[{"segment":[131.21309906396257,140.46276365054604],"label":"Laying tile"}]},"lHVn-5OwXMw":{"duration":184.41,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=lHVn-5OwXMw","annotations":[{"segment":[0,160.82158261466458],"label":"Cricket"},{"segment":[134.35362984087365,177.50790066770674],"label":"Cricket"}]},"BCsfauy69xM":{"duration":41.889,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BCsfauy69xM","annotations":[{"segment":[0,24.949533223088924],"label":"Carving jack-o-lanterns"}]},"LjfF72Hwpyg":{"duration":238.725,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LjfF72Hwpyg","annotations":[{"segment":[0.01,10.051960998439938],"label":"Snow tubing"},{"segment":[16.380973478939158,31.272767550702028],"label":"Snow tubing"},{"segment":[38.718664586583465,51.3766895475819],"label":"Snow tubing"},{"segment":[67.75766302652106,71.48061154446178],"label":"Snow tubing"},{"segment":[80.04339313572542,146.68417160686428],"label":"Snow tubing"},{"segment":[157.85301716068642,192.4925694227769],"label":"Snow tubing"},{"segment":[216.69376287051483,218.55539313572544],"label":"Snow tubing"}]},"m6w3Skrc89s":{"duration":27.888,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=m6w3Skrc89s","annotations":[{"segment":[0.01,26.948904342916563],"label":"Kneeling"}]},"ppYlWcoqOYI":{"duration":170.32,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ppYlWcoqOYI","annotations":[]},"6O5UcjQMwoQ":{"duration":36.385,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=6O5UcjQMwoQ","annotations":[{"segment":[0,36.361],"label":"Playing drums"}]},"DVlMzGPhWO4":{"duration":95.97,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DVlMzGPhWO4","annotations":[{"segment":[14.90958848,80.39527264],"label":"Sailing"},{"segment":[2.620033139625585,93.79718639859594],"label":"Sailing"}]},"p1WrfcqnINk":{"duration":133.608,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=p1WrfcqnINk","annotations":[]},"4U13LEMIdi4":{"duration":12.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4U13LEMIdi4","annotations":[{"segment":[0.41679995319812796,11.765125951638067],"label":"Washing face"}]},"Vre3tO7xV98":{"duration":170.024,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=Vre3tO7xV98","annotations":[{"segment":[25.990792511700466,158.06645241809673],"label":"BMX"}]},"yhnIVL0t4sM":{"duration":24.474,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=yhnIVL0t4sM","annotations":[]},"GwZOgt1SvfM":{"duration":55.241,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GwZOgt1SvfM","annotations":[]},"DLFerdBxdxQ":{"duration":150.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DLFerdBxdxQ","annotations":[{"segment":[24.68523856474259,114.49248743837754],"label":"Preparing salad"}]},"A6nhjnCvpHg":{"duration":119.072,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=A6nhjnCvpHg","annotations":[]},"2Iakg-Z-iXM":{"duration":148.375,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=2Iakg-Z-iXM","annotations":[{"segment":[13.72820165265274,148.375509],"label":"Baton twirling"}]},"lOtplLrtapE":{"duration":209.47,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lOtplLrtapE","annotations":[{"segment":[20.584684867394696,87.89333697347894],"label":"Snatch"},{"segment":[99.98275507020281,186.56912792511702],"label":"Snatch"}]},"CSDApI2nHPU":{"duration":115.59,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=CSDApI2nHPU","annotations":[{"segment":[0.01,112.07741341653667],"label":"Washing dishes"}]},"7aydawW3AfE":{"duration":18.81,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7aydawW3AfE","annotations":[{"segment":[2.8021520608424333,16.035351845553823],"label":"Putting on makeup"}]},"L-A8RECKhtQ":{"duration":118.585,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=L-A8RECKhtQ","annotations":[{"segment":[97.71199242729283,104.1768678317757],"label":"Cutting the grass"}]},"YzcgGHmfaKE":{"duration":162.981,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=YzcgGHmfaKE","annotations":[{"segment":[7.8791747269890795,162.721],"label":"Futsal"}]},"ekt-X2XgFko":{"duration":140.11,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ekt-X2XgFko","annotations":[]},"FbmK-7sZ3O4":{"duration":118.097,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=FbmK-7sZ3O4","annotations":[{"segment":[0.01,99.36284044477068],"label":"Playing ice hockey"}]},"CXM1sXEMSWI":{"duration":95.97,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=CXM1sXEMSWI","annotations":[{"segment":[17.60724896,46.86116415444617],"label":"Drinking beer"}]},"AqTZd5HZKNI":{"duration":204.24,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AqTZd5HZKNI","annotations":[{"segment":[0.01,46.19663806552262],"label":"Vacuuming floor"},{"segment":[50.657003120124806,60.53352574102965],"label":"Vacuuming floor"},{"segment":[65.31248829953198,162.16612948517943],"label":"Vacuuming floor"},{"segment":[166.6264945397816,204.02100000000002],"label":"Vacuuming floor"}]},"ZsVfa3JIUNU":{"duration":101.588,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZsVfa3JIUNU","annotations":[]},"oAyygdg493M":{"duration":107.277,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oAyygdg493M","annotations":[{"segment":[29.120213951415202,42.5088180670084],"label":"Surfing"},{"segment":[65.60415926365054,94.55701526521061],"label":"Surfing"}]},"-hEr3ydGyoM":{"duration":176.472,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=-hEr3ydGyoM","annotations":[{"segment":[0.01,15.220416930118455],"label":"Playing drums"},{"segment":[89.39934216376331,92.69618328525864],"label":"Playing drums"},{"segment":[111.65301973385678,114.68812721053982],"label":"Playing drums"},{"segment":[133.64711319184826,140.24154309839028],"label":"Playing drums"},{"segment":[140.79107892393546,144.63782970275165],"label":"Playing drums"}]},"DzdNjXensv0":{"duration":136.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DzdNjXensv0","annotations":[{"segment":[0.01,28.76932137285491],"label":"Polishing shoes"},{"segment":[32.818336973478935,134.25683307332295],"label":"Polishing shoes"}]},"VFVAkH9zPIM":{"duration":205.335,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VFVAkH9zPIM","annotations":[{"segment":[35.54787987519501,77.50078315132606],"label":"Disc dog"},{"segment":[95.43484867394696,121.69544461778473],"label":"Disc dog"},{"segment":[121.69544461778473,146.03453354134166],"label":"Disc dog"}]},"I6gc2AjcYOY":{"duration":44.12,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=I6gc2AjcYOY","annotations":[{"segment":[0.2,14.0241248049922],"label":"Hand washing clothes"},{"segment":[21.24242433697348,37.122683307332295],"label":"Hand washing clothes"}]},"qVgsPJeQYMY":{"duration":128.918,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qVgsPJeQYMY","annotations":[{"segment":[24.567891895056512,27.980099060165546],"label":"Drum corps"},{"segment":[55.077038312502,102.06090435616323],"label":"Drum corps"}]},"soHEsBIljbI":{"duration":17.0,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=soHEsBIljbI","annotations":[{"segment":[6.599516828048268,12.371085504060728],"label":"Cleaning shoes"}]},"LWJzkihZUWA":{"duration":41.77,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LWJzkihZUWA","annotations":[{"segment":[6.511856474258971,19.275095163806554],"label":"Putting on makeup"}]},"qAMf2UUH1xI":{"duration":144.707,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qAMf2UUH1xI","annotations":[{"segment":[7.22403439625585,8.804291920436818],"label":"Scuba diving"},{"segment":[13.319313418096723,19.414592439937596],"label":"Scuba diving"},{"segment":[24.606867162246488,27.541631135725424],"label":"Scuba diving"},{"segment":[34.314163382215284,40.63519347893915],"label":"Scuba diving"},{"segment":[58.469528394695786,70.20858428861153],"label":"Scuba diving"},{"segment":[75.40085901092043,79.91588050858033],"label":"Scuba diving"},{"segment":[100.00772617316692,107.45751164430577],"label":"Scuba diving"},{"segment":[116.93905678939157,130.2583702074883],"label":"Scuba diving"}]},"HfpCQGe9eA0":{"duration":42.261,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HfpCQGe9eA0","annotations":[{"segment":[0.2,42.06018140589569],"label":"Slacklining"}]},"glZIkoJTeFY":{"duration":204.59,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=glZIkoJTeFY","annotations":[]},"-TuxT19bogQ":{"duration":41.332,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-TuxT19bogQ","annotations":[{"segment":[2.64830242094279,41.331519],"label":"Hurling"}]},"RsHk6sq_9IY":{"duration":115.543,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RsHk6sq_9IY","annotations":[{"segment":[6.489106943869592,107.07026439313572],"label":"Futsal"}]},"W6y6Vmk5edg":{"duration":90.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W6y6Vmk5edg","annotations":[{"segment":[0,83.29680293334182],"label":"Using the balance beam"}]},"nRkGD4lF65U":{"duration":143.01,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nRkGD4lF65U","annotations":[]},"eyr5ZLPu4gU":{"duration":126.67,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=eyr5ZLPu4gU","annotations":[]},"46YBNutTwKg":{"duration":43.422,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=46YBNutTwKg","annotations":[{"segment":[5.617196567862715,36.42704836193448],"label":"Decorating the Christmas tree"}]},"Is0_Qv6NLm0":{"duration":199.199,"subset":"training","resolution":"842x480","url":"https://www.youtube.com/watch?v=Is0_Qv6NLm0","annotations":[{"segment":[107.8606113938973,124.33104353118279],"label":"Arm wrestling"}]},"J2GQ8168b2Q":{"duration":220.01,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=J2GQ8168b2Q","annotations":[]},"TM_5fb15eXQ":{"duration":144.382,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TM_5fb15eXQ","annotations":[{"segment":[0.01,144.381677],"label":"Waxing skis"}]},"c7SweMMGQE8":{"duration":138.86,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c7SweMMGQE8","annotations":[{"segment":[27.287747269890794,120.41259906396255],"label":"Grooming horse"}]},"eF7Rla7UOtQ":{"duration":28.63,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=eF7Rla7UOtQ","annotations":[]},"eqWQOAjF4-k":{"duration":23.662,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eqWQOAjF4-k","annotations":[{"segment":[1.9124804992199689,19.67648205928237],"label":"Using the monkey bar"}]},"yZLwR5EJ7Wc":{"duration":23.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yZLwR5EJ7Wc","annotations":[{"segment":[0.234986996099844,21.853790637285492],"label":"Playing bagpipes"}]},"m6C4SOxfNGQ":{"duration":106.51,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=m6C4SOxfNGQ","annotations":[{"segment":[0.2,106.262],"label":"Table soccer"}]},"li16LYa7us8":{"duration":211.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=li16LYa7us8","annotations":[{"segment":[14.816028720748829,196.23006927925118],"label":"Ballet"}]},"rHMMgJBUWh8":{"duration":26.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rHMMgJBUWh8","annotations":[{"segment":[2.2328561575663026,11.123310950078004],"label":"Hammer throw"}]},"huFOZ2BiM_k":{"duration":42.08,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=huFOZ2BiM_k","annotations":[]},"3i3SUPsyZtA":{"duration":78.53,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3i3SUPsyZtA","annotations":[]},"gQLH5G88ClE":{"duration":177.262,"subset":"training","resolution":"1920x1026","url":"https://www.youtube.com/watch?v=gQLH5G88ClE","annotations":[{"segment":[16.0142457315221,168.14958018098204],"label":"Decorating the Christmas tree"}]},"W-poAYW1pb0":{"duration":135.117,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=W-poAYW1pb0","annotations":[{"segment":[16.441683884555385,22.3438268174727],"label":"Running a marathon"},{"segment":[44.898444453978165,58.17826605304212],"label":"Running a marathon"}]},"VFKUKX9ToNI":{"duration":11.47,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=VFKUKX9ToNI","annotations":[{"segment":[0.6059178190327613,11.209479652106083],"label":"Plataform diving"}]},"fYQTAuBS7zw":{"duration":157.339,"subset":"training","resolution":"240x180","url":"https://www.youtube.com/watch?v=fYQTAuBS7zw","annotations":[{"segment":[23.098433616554995,56.17574670066715],"label":"Throwing darts"},{"segment":[67.44653486266091,77.24722022091636],"label":"Throwing darts"},{"segment":[90.96817972247399,98.31869374116557],"label":"Throwing darts"},{"segment":[106.89429342963909,118.16508159163286],"label":"Throwing darts"},{"segment":[134.82624670066713,150.99737754178864],"label":"Throwing darts"}]},"P2HtcvFg3z8":{"duration":92.49000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=P2HtcvFg3z8","annotations":[{"segment":[0.4325475819032762,67.63623868954758],"label":"Javelin throw"}]},"Wq-eDa62BcI":{"duration":127.85,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Wq-eDa62BcI","annotations":[{"segment":[50.64189391575663,127.801],"label":"Removing curlers"}]},"GSFyEkGCUVo":{"duration":181.72,"subset":"validation","resolution":"854x478","url":"https://www.youtube.com/watch?v=GSFyEkGCUVo","annotations":[{"segment":[85.02074882995319,179.13009672386897],"label":"Baton twirling"}]},"NiinNJg-uyg":{"duration":117.59,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NiinNJg-uyg","annotations":[{"segment":[17.793802604368175,28.800278442121684],"label":"Playing lacrosse"},{"segment":[63.47067733104524,68.97391524992199],"label":"Playing lacrosse"},{"segment":[104.01119666677067,110.61508216942278],"label":"Playing lacrosse"}]},"rYrO0KnZ7F0":{"duration":143.709,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rYrO0KnZ7F0","annotations":[{"segment":[24.425692667706706,40.11750234009361],"label":"Carving jack-o-lanterns"},{"segment":[60.06419344773791,103.5434976599064],"label":"Carving jack-o-lanterns"},{"segment":[117.21482527301092,129.09319188767552],"label":"Carving jack-o-lanterns"}]},"mHWQrZW6-0M":{"duration":124.111,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mHWQrZW6-0M","annotations":[{"segment":[26.71961115179301,108.62102654602185],"label":"Canoeing"}]},"e34P8tr-Mi4":{"duration":64.064,"subset":"training","resolution":"327x240","url":"https://www.youtube.com/watch?v=e34P8tr-Mi4","annotations":[{"segment":[2.0229736204713125,3.420004682776608],"label":"Plastering"},{"segment":[5.2161874771691314,38.84472090551804],"label":"Plastering"},{"segment":[52.216303930440155,63.0931886298171],"label":"Plastering"}]},"YvVVwrLQGq8":{"duration":164.63,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=YvVVwrLQGq8","annotations":[]},"t6vWMTMooDc":{"duration":222.192,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t6vWMTMooDc","annotations":[{"segment":[42.50323263205673,96.75533304713204],"label":"Sumo"},{"segment":[107.81308599797542,109.88641467625855],"label":"Sumo"},{"segment":[119.5619485082465,121.63527718652963],"label":"Sumo"},{"segment":[154.4629812593459,193.51067018603948],"label":"Sumo"}]},"xv6h1JNMX8g":{"duration":155.249,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=xv6h1JNMX8g","annotations":[{"segment":[30.87071372854914,146.3823627145086],"label":"Disc dog"}]},"aa-bFJZmcko":{"duration":181.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aa-bFJZmcko","annotations":[{"segment":[56.80744181603999,136.052662],"label":"Fixing bicycle"}]},"-p7aui3nAnc":{"duration":77.509,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=-p7aui3nAnc","annotations":[]},"cUw2aQVdrbE":{"duration":121.07,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=cUw2aQVdrbE","annotations":[]},"SKtUq_1cOSs":{"duration":55.96,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=SKtUq_1cOSs","annotations":[{"segment":[19.80353666146646,50.68658502340094],"label":"Snatch"}]},"iKuBIzUHxYc":{"duration":154.018,"subset":"training","resolution":"854x456","url":"https://www.youtube.com/watch?v=iKuBIzUHxYc","annotations":[{"segment":[49.63518224299065,70.25656230529594],"label":"Futsal"},{"segment":[88.71989096573208,119.66750623052961],"label":"Futsal"}]},"sx5Q-JsaAAs":{"duration":164.1,"subset":"training","resolution":"636x360","url":"https://www.youtube.com/watch?v=sx5Q-JsaAAs","annotations":[{"segment":[12.036765990639624,134.70933853354134],"label":"Ping-pong"}]},"aWnpbk007cE":{"duration":82.849,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aWnpbk007cE","annotations":[{"segment":[0.2,82.64879818594105],"label":"Kite flying"}]},"Tb875ldI6AY":{"duration":154.95,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Tb875ldI6AY","annotations":[{"segment":[8.217232449297972,43.2613120124805],"label":"Smoking a cigarette"},{"segment":[59.454093603744155,89.42282371294851],"label":"Smoking a cigarette"},{"segment":[112.3827379095164,149.36028393135726],"label":"Smoking a cigarette"}]},"cQr-HSUKbsw":{"duration":140.295,"subset":"validation","resolution":"448x298","url":"https://www.youtube.com/watch?v=cQr-HSUKbsw","annotations":[{"segment":[12.893151079746538,114.50866382690147],"label":"Playing ice hockey"}]},"17ytsjuUCRA":{"duration":54.75,"subset":"training","resolution":"480x720","url":"https://www.youtube.com/watch?v=17ytsjuUCRA","annotations":[{"segment":[0.01,54.496399338533536],"label":"Bathing dog"}]},"-M-Dr6HqDhU":{"duration":53.685,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-M-Dr6HqDhU","annotations":[{"segment":[30.22895709828393,38.591047581903275],"label":"Painting furniture"}]},"iwMXYbYyJy4":{"duration":32.601,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iwMXYbYyJy4","annotations":[{"segment":[0.01,32.541],"label":"Swinging at the playground"}]},"OKTxSQMS560":{"duration":207.703,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OKTxSQMS560","annotations":[{"segment":[88.13584782847097,124.10305043494259],"label":"Grooming dog"},{"segment":[150.34938747209753,172.05931947425898],"label":"Grooming dog"}]},"Jd0hvO7erXM":{"duration":211.534,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Jd0hvO7erXM","annotations":[{"segment":[18.14751170046802,150.4593697347894],"label":"Arm wrestling"},{"segment":[162.00778627145087,196.32308112324495],"label":"Arm wrestling"}]},"jCw0kBph-v4":{"duration":187.362,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jCw0kBph-v4","annotations":[{"segment":[74.3036698695484,170.46136604288554],"label":"Hanging wallpaper"}]},"KJpMYfMXJnQ":{"duration":140.34,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KJpMYfMXJnQ","annotations":[]},"fvEaC-fTR4A":{"duration":223.144,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fvEaC-fTR4A","annotations":[]},"c7fu7RcM2iE":{"duration":179.12,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=c7fu7RcM2iE","annotations":[{"segment":[0,178.91875283446714],"label":"Long jump"}]},"JW0VZ5NoC8A":{"duration":124.088,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=JW0VZ5NoC8A","annotations":[{"segment":[0.2,112.44842589703589],"label":"Playing pool"}]},"metrvLaYO1w":{"duration":27.19,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=metrvLaYO1w","annotations":[{"segment":[1.5665834633385336,24.38789391575663],"label":"Doing karate"}]},"L4ZhAK04yjg":{"duration":128.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=L4ZhAK04yjg","annotations":[{"segment":[0.01,128.101],"label":"Cumbia"}]},"iF9jetQ7OTE":{"duration":137.57999999999998,"subset":"training","resolution":"540x360","url":"https://www.youtube.com/watch?v=iF9jetQ7OTE","annotations":[{"segment":[15.882666177847113,45.31045241809672],"label":"Smoking hookah"},{"segment":[101.09102391575662,134.1441400156006],"label":"Smoking hookah"}]},"jUegk6fzp7o":{"duration":30.117,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=jUegk6fzp7o","annotations":[]},"NiQaDgj8Z10":{"duration":133.469,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=NiQaDgj8Z10","annotations":[{"segment":[0.2,43.50590951638065],"label":"Hula hoop"},{"segment":[90.96690171606863,117.4035070202808],"label":"Hula hoop"}]},"YUN8d87DNNY":{"duration":176.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YUN8d87DNNY","annotations":[{"segment":[76.09943431357254,176.100135],"label":"Grooming horse"}]},"IJER0EpbxW4":{"duration":25.659,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=IJER0EpbxW4","annotations":[{"segment":[0.01,0.02],"label":"Running a marathon"}]},"2bkQz8BcKZ4":{"duration":118.399,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2bkQz8BcKZ4","annotations":[{"segment":[0.01,118.261],"label":"Baton twirling"}]},"2H-m_3f_3qM":{"duration":12.307,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=2H-m_3f_3qM","annotations":[]},"yyvc2OSPx1Y":{"duration":181.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yyvc2OSPx1Y","annotations":[{"segment":[103.07292870762451,181.5102947845805],"label":"Getting a piercing"}]},"jIb_BfiRsQ4":{"duration":140.969,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jIb_BfiRsQ4","annotations":[{"segment":[5.057165366614665,135.00432761310452],"label":"Making an omelette"}]},"xMuC8lmVX3A":{"duration":224.03,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=xMuC8lmVX3A","annotations":[{"segment":[33.020206659126366,213.3210176232449],"label":"Wrapping presents"}]},"dUvZ6-IVo34":{"duration":80.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dUvZ6-IVo34","annotations":[{"segment":[22.72614976599064,58.07857254290172],"label":"Playing flauta"}]},"4ZoBfU4b5Ko":{"duration":62.857,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4ZoBfU4b5Ko","annotations":[{"segment":[12.740608424336973,62.801],"label":"Blow-drying hair"}]},"DJCtIWibRPM":{"duration":130.381,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DJCtIWibRPM","annotations":[]},"sra0ywgWITo":{"duration":187.293,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sra0ywgWITo","annotations":[{"segment":[47.89541965678627,176.10328081123245],"label":"Arm wrestling"}]},"VbP9fOp-Umg":{"duration":191.31,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=VbP9fOp-Umg","annotations":[{"segment":[9.252083302379715,183.2509402471337],"label":"Baton twirling"}]},"4r6fQ5RvuGE":{"duration":96.781,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4r6fQ5RvuGE","annotations":[{"segment":[0.01,96.741],"label":"Surfing"}]},"lZl-u5UYu_Y":{"duration":172.5,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=lZl-u5UYu_Y","annotations":[]},"9eniCub7u60":{"duration":19.48,"subset":"testing","resolution":"270x360","url":"https://www.youtube.com/watch?v=9eniCub7u60","annotations":[]},"IqRN2sOQ7Mo":{"duration":187.711,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=IqRN2sOQ7Mo","annotations":[{"segment":[55.917708268330735,155.16432137285491],"label":"Kite flying"}]},"j4iaeT5xIdw":{"duration":192.262,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j4iaeT5xIdw","annotations":[{"segment":[3.899210382020723,182.9629470202808],"label":"Futsal"}]},"YtsAKuSraLY":{"duration":148.1,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=YtsAKuSraLY","annotations":[]},"jmSrbVNKF6U":{"duration":81.34,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jmSrbVNKF6U","annotations":[{"segment":[2.0303151326053044,79.56297425897036],"label":"Ballet"}]},"nzqlP05Lkz8":{"duration":159.452,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nzqlP05Lkz8","annotations":[]},"cTbkhIaj_Xc":{"duration":144.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cTbkhIaj_Xc","annotations":[{"segment":[0.01,45.28869220074925],"label":"Playing kickball"},{"segment":[59.708972304470414,70.07354862902],"label":"Playing kickball"},{"segment":[76.83305492763928,84.26851185612051],"label":"Playing kickball"}]},"5E42nKwv1xY":{"duration":187.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5E42nKwv1xY","annotations":[{"segment":[43.75230919422777,186.56921317669034],"label":"Volleyball"}]},"WcBB6DfMTWA":{"duration":205.82,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=WcBB6DfMTWA","annotations":[{"segment":[18.462943025351017,198.27595335920438],"label":"Tai chi"}]},"X0UmqVLOAK0":{"duration":12.1,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X0UmqVLOAK0","annotations":[{"segment":[4.902809672386896,7.344822152886116],"label":"Discus throw"}]},"ZeBrPKBGb_k":{"duration":180.164,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=ZeBrPKBGb_k","annotations":[{"segment":[10.678,172.2721263650546],"label":"BMX"}]},"JavaLreBqtI":{"duration":91.65,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=JavaLreBqtI","annotations":[{"segment":[23.162507775351013,37.46035208112324],"label":"Removing curlers"},{"segment":[46.61097243681748,55.332657463338535],"label":"Removing curlers"}]},"gvr1dpCpvhw":{"duration":218.338,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gvr1dpCpvhw","annotations":[{"segment":[38.14940742391601,165.8817983522062],"label":"Making a cake"}]},"QrKWO2uNNJ0":{"duration":194.21,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=QrKWO2uNNJ0","annotations":[{"segment":[49.992091438759594,114.52733675061289],"label":"Removing curlers"}]},"xUDwlT5fw_o":{"duration":75.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xUDwlT5fw_o","annotations":[{"segment":[5.978457098283932,64.82523088923558],"label":"Mixing drinks"}]},"MiTTWddOuaM":{"duration":100.053,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MiTTWddOuaM","annotations":[{"segment":[15.919160686427455,39.641831513260534],"label":"Table soccer"},{"segment":[48.38176287051482,62.58415132605304],"label":"Table soccer"},{"segment":[85.9946817472699,87.24324336973478],"label":"Table soccer"}]},"tz3zHV1Z5po":{"duration":221.77,"subset":"validation","resolution":"328x240","url":"https://www.youtube.com/watch?v=tz3zHV1Z5po","annotations":[{"segment":[10.379428401625862,184.7538240093604],"label":"Playing water polo"},{"segment":[198.93904270670825,212.7782804602184],"label":"Playing polo"}]},"DgPLWO7R21A":{"duration":66.02,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DgPLWO7R21A","annotations":[]},"9FmnQcHnacg":{"duration":89.26,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9FmnQcHnacg","annotations":[]},"QY-2lN9zwho":{"duration":140.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QY-2lN9zwho","annotations":[{"segment":[0.01,140.6897052154195],"label":"Brushing teeth"}]},"X_9NiZmR2tQ":{"duration":45.19,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=X_9NiZmR2tQ","annotations":[{"segment":[1.0573954352574104,41.802366207176284],"label":"Playing badminton"}]},"50nJ8UkOGwg":{"duration":160.4,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=50nJ8UkOGwg","annotations":[{"segment":[0.01,153.65375975039],"label":"Sailing"}]},"YnjITZnk6OE":{"duration":32.53,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=YnjITZnk6OE","annotations":[]},"RTD_JWmhNkA":{"duration":121.67,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RTD_JWmhNkA","annotations":[{"segment":[0.01,119.11523868954758],"label":"Playing guitarra"}]},"fGgtDMSQk4Y":{"duration":191.751,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fGgtDMSQk4Y","annotations":[]},"MJ9dlbY4gX0":{"duration":176.936,"subset":"testing","resolution":"512x288","url":"https://www.youtube.com/watch?v=MJ9dlbY4gX0","annotations":[]},"1Q_042dyY_A":{"duration":207.587,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1Q_042dyY_A","annotations":[]},"-kAlrG4jS9M":{"duration":146.611,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=-kAlrG4jS9M","annotations":[{"segment":[0.01,146.61077097505668],"label":"Bullfighting"}]},"dY2iZq5T0zo":{"duration":130.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dY2iZq5T0zo","annotations":[{"segment":[0.7122763892355694,16.585864492199686],"label":"Playing polo"},{"segment":[86.59245817706709,97.58186532527301],"label":"Playing polo"}]},"6UUkWJvHJzg":{"duration":154.018,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6UUkWJvHJzg","annotations":[]},"X7ynQ_gIWZc":{"duration":46.905,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=X7ynQ_gIWZc","annotations":[]},"EWWCQH6WbtQ":{"duration":85.032,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EWWCQH6WbtQ","annotations":[{"segment":[58.582893262095716,60.96323663690131],"label":"Having an ice cream"}]},"hDb19ih3jAA":{"duration":36.409,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hDb19ih3jAA","annotations":[{"segment":[0.01,0.02],"label":"Rollerblading"}]},"-ML_fQxRrFg":{"duration":209.32999999999998,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=-ML_fQxRrFg","annotations":[]},"ERVK6vQbor4":{"duration":164.21,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ERVK6vQbor4","annotations":[{"segment":[13.065191081112632,158.31937304836194],"label":"Doing kickboxing"}]},"LihSTWbJZJo":{"duration":115.868,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LihSTWbJZJo","annotations":[{"segment":[6.417002911408973,55.40313781258733],"label":"Blowing leaves"},{"segment":[61.5489997559086,101.67786242199688],"label":"Blowing leaves"}]},"El_q7DhzArg":{"duration":4.134,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=El_q7DhzArg","annotations":[{"segment":[0.2,3.8810000000000002],"label":"Baton twirling"}]},"qRuPuzY6yiE":{"duration":84.01,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=qRuPuzY6yiE","annotations":[{"segment":[6.809566302652106,71.64846645865835],"label":"Tug of war"}]},"uuH0ieCzqkM":{"duration":36.525,"subset":"training","resolution":"1272x720","url":"https://www.youtube.com/watch?v=uuH0ieCzqkM","annotations":[{"segment":[4.325360374414977,34.83053354134166],"label":"Rope skipping"}]},"XqW8wjFpGRU":{"duration":75.02,"subset":"training","resolution":"324x240","url":"https://www.youtube.com/watch?v=XqW8wjFpGRU","annotations":[{"segment":[2.7487545257410293,32.797804909999996],"label":"Cumbia"},{"segment":[36.85955013,62.2915969],"label":"Cumbia"}]},"-fuvIQpA-QQ":{"duration":198.113,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=-fuvIQpA-QQ","annotations":[{"segment":[49.75996407488299,198.11265306122448],"label":"Making a lemonade"}]},"KkpQ347Ceak":{"duration":51.99,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KkpQ347Ceak","annotations":[{"segment":[1.1028510140405616,51.017070982839314],"label":"Preparing salad"}]},"NDK0XQnsnmA":{"duration":129.963,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=NDK0XQnsnmA","annotations":[{"segment":[0.2,119.65404825548026],"label":"Riding bumper cars"}]},"mf6UsZuW9Nw":{"duration":63.414,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=mf6UsZuW9Nw","annotations":[{"segment":[8.112204499064315,44.91391190327613],"label":"Applying sunscreen"}]},"8SMXK2uOvk0":{"duration":42.93,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=8SMXK2uOvk0","annotations":[{"segment":[17.32633229329173,31.45627145085804],"label":"Starting a campfire"}]},"aCSG6pfivy4":{"duration":160.613,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aCSG6pfivy4","annotations":[{"segment":[20.038814352574104,142.54382059282372],"label":"Baking cookies"}]},"4oi6dXfpJS4":{"duration":94.18,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4oi6dXfpJS4","annotations":[]},"XIMi2oydVB8":{"duration":81.06,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XIMi2oydVB8","annotations":[{"segment":[12.0649704,22.004021541341658],"label":"Pole vault"}]},"OcL7ZhBCkxQ":{"duration":153.113,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OcL7ZhBCkxQ","annotations":[]},"Q78FBGHniCc":{"duration":187.64,"subset":"validation","resolution":"176x144","url":"https://www.youtube.com/watch?v=Q78FBGHniCc","annotations":[{"segment":[0.19999999999999998,187.581],"label":"Blowing leaves"}]},"I9HV59QQkNw":{"duration":6.39,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I9HV59QQkNw","annotations":[{"segment":[0.01,4.054191155401318],"label":"Drinking beer"}]},"crxqFzslD0Y":{"duration":180.75,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=crxqFzslD0Y","annotations":[{"segment":[15.363388455538221,150.67396567862716],"label":"Playing harmonica"}]},"ZqyKUd2Vjzc":{"duration":82.71,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZqyKUd2Vjzc","annotations":[]},"p2n_FtzA1gk":{"duration":87.45,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=p2n_FtzA1gk","annotations":[{"segment":[12.550801897035882,83.62653872698908],"label":"Shaving legs"}]},"UbVTGLlR0L8":{"duration":181.928,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UbVTGLlR0L8","annotations":[{"segment":[7.661134165366615,170.5499235569423],"label":"Rafting"}]},"4LpqPUWHH5k":{"duration":155.64,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=4LpqPUWHH5k","annotations":[]},"vfuRgSowX74":{"duration":27.888,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=vfuRgSowX74","annotations":[]},"hFpSoRlpyMo":{"duration":89.75,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=hFpSoRlpyMo","annotations":[]},"z477-uRyDag":{"duration":211.558,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=z477-uRyDag","annotations":[]},"pgUgZMOpwDU":{"duration":104.769,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pgUgZMOpwDU","annotations":[]},"ZJGXWbt6cbU":{"duration":102.05,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZJGXWbt6cbU","annotations":[{"segment":[0.01,102.05156462585035],"label":"Zumba"}]},"c-8GvZKndyQ":{"duration":91.77,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=c-8GvZKndyQ","annotations":[{"segment":[4.060397801092043,44.23696551716069],"label":"Using the balance beam"},{"segment":[44.43551868,87.97528569032762],"label":"Using the balance beam"}]},"iId8WcbiKZI":{"duration":104.839,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=iId8WcbiKZI","annotations":[{"segment":[10.134823712948519,104.581],"label":"Waterskiing"}]},"UMhZGJqeSuU":{"duration":72.749,"subset":"validation","resolution":"562x480","url":"https://www.youtube.com/watch?v=UMhZGJqeSuU","annotations":[{"segment":[24.044636505460215,72.24732761310452],"label":"Hurling"},{"segment":[26.99350702028081,60.11159126365054],"label":"Hurling"}]},"YamDoDK71Ds":{"duration":211.81,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=YamDoDK71Ds","annotations":[{"segment":[0.01,162.90712990968618],"label":"Belly dance"}]},"hGUUlFxygmw":{"duration":229.48,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hGUUlFxygmw","annotations":[{"segment":[54.04928393135725,59.418418096723876],"label":"Doing nails"},{"segment":[66.57726365054602,81.61083931357254],"label":"Doing nails"}]},"bNVitmKpOzA":{"duration":138.275,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=bNVitmKpOzA","annotations":[]},"GOE5in6FJNA":{"duration":106.627,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GOE5in6FJNA","annotations":[]},"lI6h3H4Zs98":{"duration":91.464,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=lI6h3H4Zs98","annotations":[{"segment":[10.273580143525741,15.553058828393137],"label":"Rope skipping"},{"segment":[24.39975284087364,28.109656781591266],"label":"Rope skipping"},{"segment":[31.819560722308893,46.37379925897036],"label":"Rope skipping"},{"segment":[53.936295753510144,64.49525312324494],"label":"Rope skipping"},{"segment":[70.20279764742591,75.33958771918877],"label":"Rope skipping"},{"segment":[83.04477282683308,85.6131678627145],"label":"Rope skipping"}]},"oxcUr-Nlldo":{"duration":200.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oxcUr-Nlldo","annotations":[{"segment":[7.984693983677714,135.73979772252116],"label":"Skateboarding"}]},"FUvUDCZxAO8":{"duration":139.95,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=FUvUDCZxAO8","annotations":[{"segment":[34.49527212653132,41.918305368949454],"label":"Archery"},{"segment":[47.37641804719808,58.07431889656539],"label":"Archery"},{"segment":[94.31618708013627,119.20518089295001],"label":"Archery"}]},"_-M4xNBhSEI":{"duration":6.43,"subset":"training","resolution":"256x144","url":"https://www.youtube.com/watch?v=_-M4xNBhSEI","annotations":[{"segment":[0.9744251199999999,5.63323445],"label":"Dodgeball"}]},"XbvlEwmvLko":{"duration":72.354,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XbvlEwmvLko","annotations":[{"segment":[10.271696408736352,11.85195739469579],"label":"Calf roping"},{"segment":[16.028361429017163,23.929666358814355],"label":"Calf roping"},{"segment":[27.654567254290175,42.1026676973479],"label":"Calf roping"},{"segment":[49.21384213416537,60.16279325117005],"label":"Calf roping"}]},"D7WhCBcddSA":{"duration":31.65,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=D7WhCBcddSA","annotations":[{"segment":[4.196798354134165,28.883847496099843],"label":"Playing racquetball"}]},"NDyc4PZE954":{"duration":186.09,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NDyc4PZE954","annotations":[{"segment":[30.254751950078003,39.56390639625585],"label":"Polishing shoes"},{"segment":[61.09132605304212,63.12770358814352],"label":"Polishing shoes"},{"segment":[79.70963494539781,128.5826957878315],"label":"Polishing shoes"}]},"ot0n6mfurHo":{"duration":85.68,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ot0n6mfurHo","annotations":[]},"kGdxtU3NwWo":{"duration":26.73,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kGdxtU3NwWo","annotations":[]},"VHhyCKGmFqo":{"duration":88.14,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VHhyCKGmFqo","annotations":[]},"d-cxJQcmBzA":{"duration":63.39,"subset":"training","resolution":"432x720","url":"https://www.youtube.com/watch?v=d-cxJQcmBzA","annotations":[{"segment":[0.9155804210748999,62.23253521079452],"label":"Vacuuming floor"}]},"RVHx_Otzcl0":{"duration":222.89,"subset":"validation","resolution":"640x352","url":"https://www.youtube.com/watch?v=RVHx_Otzcl0","annotations":[{"segment":[0.6954394477379096,219.41114740644048],"label":"Ballet"}]},"zSWuVb-aYAA":{"duration":164.351,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zSWuVb-aYAA","annotations":[{"segment":[13.83790015600624,33.56972074882995],"label":"Disc dog"},{"segment":[75.08342121684868,141.72760218408737],"label":"Disc dog"}]},"3O2acf8oRVA":{"duration":15.929,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3O2acf8oRVA","annotations":[{"segment":[0.2,15.72888888888889],"label":"Kneeling"}]},"MPolPBO-K-Y":{"duration":170.51,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MPolPBO-K-Y","annotations":[{"segment":[3.190015600624025,158.96911076443058],"label":"Playing guitarra"}]},"tL90IPP3hbA":{"duration":92.602,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tL90IPP3hbA","annotations":[{"segment":[29.556082554517133,73.68986137071651],"label":"Spread mulch"}]},"iiQ6t0p9lik":{"duration":224.723,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iiQ6t0p9lik","annotations":[{"segment":[8.063363190309927,170.3823700213315],"label":"Slacklining"},{"segment":[174.93992211388456,210.69918475039],"label":"Slacklining"}]},"e2QVdX-JdIg":{"duration":102.56,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=e2QVdX-JdIg","annotations":[{"segment":[4.303501519999999,76.53978159126365],"label":"Discus throw"}]},"8OSnT8UvJkU":{"duration":161.89,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=8OSnT8UvJkU","annotations":[{"segment":[21.467394329173167,139.15922676911077],"label":"Getting a haircut"}]},"56m7PcBuUFc":{"duration":36.27,"subset":"training","resolution":"293x240","url":"https://www.youtube.com/watch?v=56m7PcBuUFc","annotations":[{"segment":[0.010000000000000002,36.221],"label":"Swinging at the playground"}]},"aqQ7-J9kbUE":{"duration":34.2,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aqQ7-J9kbUE","annotations":[{"segment":[0.2809082125386746,26.652374006931197],"label":"Starting a campfire"}]},"cjS2B52jBbU":{"duration":75.047,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=cjS2B52jBbU","annotations":[{"segment":[0.2,5.872434094045763],"label":"Throwing darts"}]},"3j-CWo_hYBo":{"duration":231.178,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=3j-CWo_hYBo","annotations":[]},"35DlDj_hzvg":{"duration":10.73,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=35DlDj_hzvg","annotations":[{"segment":[1.106638845553822,10.026314352574103],"label":"Clean and jerk"}]},"5dXi-tAGqbs":{"duration":236.94,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=5dXi-tAGqbs","annotations":[{"segment":[10.34979664274571,228.4347973291732],"label":"Zumba"}]},"9u-wH-UBuRc":{"duration":72.098,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=9u-wH-UBuRc","annotations":[{"segment":[26.41128705148206,55.744673946957874],"label":"Arm wrestling"}]},"JTrwGfPJNzU":{"duration":11.309,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=JTrwGfPJNzU","annotations":[{"segment":[1.0761233634945397,10.373123569422777],"label":"Fun sliding down"}]},"8_fjIZkNHms":{"duration":30.395,"subset":"testing","resolution":"192x108","url":"https://www.youtube.com/watch?v=8_fjIZkNHms","annotations":[]},"IYB8S2pqVCw":{"duration":103.38,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IYB8S2pqVCw","annotations":[]},"BfTjydFBQJU":{"duration":90.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BfTjydFBQJU","annotations":[{"segment":[0.4952472698907956,88.9518697347894],"label":"Doing step aerobics"}]},"lUil9HfTzJ0":{"duration":36.409,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=lUil9HfTzJ0","annotations":[]},"bdKB3QYNFbc":{"duration":232.154,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bdKB3QYNFbc","annotations":[{"segment":[10.500669266770672,153.1649344773791],"label":"BMX"},{"segment":[156.78585491419656,183.958608424337],"label":"BMX"}]},"qtfFFPKya3Y":{"duration":210.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qtfFFPKya3Y","annotations":[{"segment":[83.97469013,197.9981120101404],"label":"Cricket"},{"segment":[7.369656774570982,70.21661162999999],"label":"Cricket"}]},"2vixXUJiNhk":{"duration":223.005,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2vixXUJiNhk","annotations":[{"segment":[0.01,188.5424368174727],"label":"Scuba diving"}]},"5-SbGVTPM1c":{"duration":179.75,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=5-SbGVTPM1c","annotations":[{"segment":[10.79311360452418,178.71714085413416],"label":"High jump"}]},"5ptxyeHlcwM":{"duration":91.069,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5ptxyeHlcwM","annotations":[{"segment":[31.966379063325796,40.064528426035004],"label":"Playing ten pins"}]},"Ca5GNndwnHw":{"duration":144.31,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ca5GNndwnHw","annotations":[]},"cmzEDurFcwI":{"duration":119.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cmzEDurFcwI","annotations":[{"segment":[33.674781530030366,63.44254671801328],"label":"Table soccer"},{"segment":[65.675129107112,93.95450497605474],"label":"Table soccer"}]},"XKwodc1MriQ":{"duration":233.524,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XKwodc1MriQ","annotations":[]},"2-SPZIF5lPY":{"duration":139.622,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2-SPZIF5lPY","annotations":[{"segment":[53.13985023400936,56.84221684867395],"label":"Doing fencing"},{"segment":[103.2306926677067,117.16901404056162],"label":"Doing fencing"}]},"RHsU8F1G0iM":{"duration":202.548,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=RHsU8F1G0iM","annotations":[]},"unFlcSwdDFc":{"duration":117.028,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=unFlcSwdDFc","annotations":[{"segment":[6.207443726320482,116.82857142857142],"label":"Futsal"}]},"jHXqbgeq83Y":{"duration":25.17,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jHXqbgeq83Y","annotations":[{"segment":[0.01,25.121],"label":"Drinking beer"}]},"H2-rEH9TXR4":{"duration":10.055,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=H2-rEH9TXR4","annotations":[{"segment":[3.9369953026521065,8.219065890795632],"label":"Throwing darts"}]},"dtT0BzjTStw":{"duration":88.96000000000001,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=dtT0BzjTStw","annotations":[{"segment":[25.73379195,57.708480499219974],"label":"Layup drill in basketball"},{"segment":[62.56376131045243,80.73638377535103],"label":"Layup drill in basketball"}]},"WkJfGQ1C0oE":{"duration":165.025,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=WkJfGQ1C0oE","annotations":[]},"mxn6WcmdNpA":{"duration":32.044,"subset":"testing","resolution":"640x352","url":"https://www.youtube.com/watch?v=mxn6WcmdNpA","annotations":[]},"f1kY1-9XR1k":{"duration":152.347,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=f1kY1-9XR1k","annotations":[{"segment":[17.112183132223247,69.8747477899116],"label":"Braiding hair"}]},"udpVICVTQrQ":{"duration":29.26,"subset":"validation","resolution":"854x478","url":"https://www.youtube.com/watch?v=udpVICVTQrQ","annotations":[{"segment":[13.144899869464167,17.480891078783152],"label":"Plataform diving"}]},"0ys4fnsu9Jc":{"duration":64.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0ys4fnsu9Jc","annotations":[{"segment":[3.3651352000000005,55.61912188923557],"label":"Dodgeball"}]},"17zMW3rzlR4":{"duration":83.267,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=17zMW3rzlR4","annotations":[{"segment":[10.90833697347894,76.2284976599064],"label":"Waxing skis"}]},"3GgQdXjJfEU":{"duration":7.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3GgQdXjJfEU","annotations":[{"segment":[0.20159438377535102,6.697413416536662],"label":"Javelin throw"}]},"uyr3E9ZReAw":{"duration":145.43,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=uyr3E9ZReAw","annotations":[{"segment":[24.729262454851938,71.01155181989593],"label":"Cricket"},{"segment":[77.36402290921568,108.89950438833881],"label":"Cricket"},{"segment":[120.01632784399375,135.67063147581902],"label":"Cricket"}]},"8E8bytYxwAA":{"duration":29.03,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8E8bytYxwAA","annotations":[{"segment":[5.606923770000001,23.52333442901716],"label":"Using uneven bars"}]},"XnBUb3qoFQM":{"duration":188.732,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XnBUb3qoFQM","annotations":[{"segment":[63.87269652305367,66.51846929945395],"label":"Swimming"},{"segment":[118.84597532159282,121.19777334505974],"label":"Swimming"},{"segment":[132.66278870946095,139.71818175586367],"label":"Swimming"}]},"UR1e1MIRvvc":{"duration":69.45,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=UR1e1MIRvvc","annotations":[{"segment":[0.01,68.85420670826832],"label":"Playing bagpipes"}]},"pZ8QqpYu-H8":{"duration":187.83,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pZ8QqpYu-H8","annotations":[{"segment":[0.20000000000000004,187.62573696145125],"label":"Playing piano"}]},"2YE_8XDuDBI":{"duration":135.675,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=2YE_8XDuDBI","annotations":[{"segment":[6.982048361934478,135.42100000000002],"label":"Playing drums"}]},"0WMReYXyDXs":{"duration":84.242,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=0WMReYXyDXs","annotations":[]},"blcUvpHClnM":{"duration":127.71,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=blcUvpHClnM","annotations":[]},"ZsqjU0X3mxY":{"duration":229.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZsqjU0X3mxY","annotations":[{"segment":[58.19857644305772,97.59453588143526],"label":"Getting a haircut"},{"segment":[139.85565600624025,142.36267160686427],"label":"Getting a haircut"},{"segment":[160.98621606864273,164.9258120124805],"label":"Getting a haircut"}]},"ZncidS9kQ-g":{"duration":57.8,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZncidS9kQ-g","annotations":[{"segment":[0.01,55.78425585023401],"label":"Playing saxophone"}]},"HX_RdK-9fxo":{"duration":187.037,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HX_RdK-9fxo","annotations":[]},"sUrqd6Qn8Qw":{"duration":44.235,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sUrqd6Qn8Qw","annotations":[{"segment":[0,44.141],"label":"Using the rowing machine"}]},"S7ktlGGweeA":{"duration":122.28,"subset":"training","resolution":"622x480","url":"https://www.youtube.com/watch?v=S7ktlGGweeA","annotations":[{"segment":[0.01,109.5906899553042],"label":"Volleyball"}]},"j50CSWkWAHU":{"duration":19.9216666666667,"subset":"testing","resolution":"480x270","url":"https://www.youtube.com/watch?v=j50CSWkWAHU","annotations":[]},"fIkFGf40v8U":{"duration":41.634,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fIkFGf40v8U","annotations":[]},"D0aZaiBAHxg":{"duration":176.96,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D0aZaiBAHxg","annotations":[{"segment":[0.01,176.958820861678],"label":"Trimming branches or hedges"}]},"WsXxnCQ_MIc":{"duration":213.856,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WsXxnCQ_MIc","annotations":[{"segment":[8.668059282371296,186.04729797191888],"label":"Blow-drying hair"}]},"fcJItKSMIhQ":{"duration":26.378,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=fcJItKSMIhQ","annotations":[]},"Uqs8NaPzHKU":{"duration":226.743,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=Uqs8NaPzHKU","annotations":[{"segment":[13.867860907617017,192.56195974649447],"label":"Making a cake"}]},"VDYSVR0HbpM":{"duration":213.53,"subset":"validation","resolution":"854x470","url":"https://www.youtube.com/watch?v=VDYSVR0HbpM","annotations":[{"segment":[21.319758172639833,175.2217608829953],"label":"Ballet"}]},"4Sfu2RyEaYg":{"duration":34.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4Sfu2RyEaYg","annotations":[]},"KeTYxQkYfQw":{"duration":170.46,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=KeTYxQkYfQw","annotations":[{"segment":[7.445888616497041,170.457663],"label":"Playing saxophone"}]},"n2wq_9TeNYM":{"duration":41.169,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=n2wq_9TeNYM","annotations":[{"segment":[1.862559139100258,40.26981155382216],"label":"BMX"}]},"epZjMuRRXLo":{"duration":155.249,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=epZjMuRRXLo","annotations":[{"segment":[8.961308612888734,130.30227118200375],"label":"Rope skipping"}]},"OXTQsO5abO4":{"duration":41.38,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=OXTQsO5abO4","annotations":[{"segment":[0.3873131794071763,2.323879076443058],"label":"Ironing clothes"},{"segment":[10.651112433697348,28.27386209672387],"label":"Ironing clothes"}]},"IHPb-JPIhg8":{"duration":48.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IHPb-JPIhg8","annotations":[{"segment":[5.926608099843993,30.908386546021845],"label":"Using the pommel horse"}]},"A8H0FIuBJUs":{"duration":33.23,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A8H0FIuBJUs","annotations":[]},"yPpKYckLbdI":{"duration":153.67000000000002,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=yPpKYckLbdI","annotations":[{"segment":[45.24322698,131.45317285647425],"label":"Tumbling"},{"segment":[9.09105804,44.456815979719195],"label":"Tumbling"}]},"YynqCEoHKCk":{"duration":189.41,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YynqCEoHKCk","annotations":[{"segment":[13.295124758190326,59.08944336973479],"label":"Belly dance"},{"segment":[60.91075395,174.01841072386895],"label":"Belly dance"}]},"CE0LilaJEI8":{"duration":154.785,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CE0LilaJEI8","annotations":[]},"jG1kgKJkf8c":{"duration":191.449,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jG1kgKJkf8c","annotations":[{"segment":[0.01,191.448526],"label":"Playing drums"}]},"uqwf903l5-E":{"duration":171.16,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=uqwf903l5-E","annotations":[]},"DqYA22vBP_Q":{"duration":36.67,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DqYA22vBP_Q","annotations":[]},"_QBTD1bLSI0":{"duration":73.33,"subset":"testing","resolution":"400x224","url":"https://www.youtube.com/watch?v=_QBTD1bLSI0","annotations":[]},"wEZhiIqOCSk":{"duration":133.028,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wEZhiIqOCSk","annotations":[]},"MHw7mYxeSY0":{"duration":185.389,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MHw7mYxeSY0","annotations":[]},"MSfIKwQhLFk":{"duration":103.701,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MSfIKwQhLFk","annotations":[{"segment":[9.585513330346698,103.401],"label":"Swimming"}]},"HNR_HofJ_Fs":{"duration":122.05,"subset":"validation","resolution":"638x360","url":"https://www.youtube.com/watch?v=HNR_HofJ_Fs","annotations":[{"segment":[23.03793668481433,93.10372639001561],"label":"Volleyball"}]},"CQFSupyrQK4":{"duration":83.685,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CQFSupyrQK4","annotations":[]},"jCyrQzXK4eI":{"duration":53.221,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=jCyrQzXK4eI","annotations":[{"segment":[3.677938444155399,34.824443117052596],"label":"Painting fence"},{"segment":[42.279723490884365,47.82976554695914],"label":"Painting fence"}]},"9VflCeoPr9I":{"duration":138.37,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9VflCeoPr9I","annotations":[]},"y80Jbcb5GWA":{"duration":56.889,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y80Jbcb5GWA","annotations":[{"segment":[0.2,56.641999999999996],"label":"Swinging at the playground"}]},"SJiNKbnFs_o":{"duration":236.634,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SJiNKbnFs_o","annotations":[]},"qPs7sAYUvDw":{"duration":84.08,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=qPs7sAYUvDw","annotations":[]},"61wzTjdnXe4":{"duration":183.694,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=61wzTjdnXe4","annotations":[{"segment":[10.027199687987519,14.038079563182526],"label":"Scuba diving"},{"segment":[23.492296411856472,40.68178159126365],"label":"Scuba diving"},{"segment":[48.99003276131045,52.1414383775351],"label":"Scuba diving"},{"segment":[91.10427145085802,103.70989391575662],"label":"Scuba diving"},{"segment":[107.43428237129484,111.44516224648984],"label":"Scuba diving"},{"segment":[138.66184711388456,147.2726271450858],"label":"Scuba diving"},{"segment":[157.5874414976599,173.91923088923556],"label":"Scuba diving"}]},"LOhacn4jdT4":{"duration":102.94,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LOhacn4jdT4","annotations":[{"segment":[8.021840873634947,101.23563182527302],"label":"Clean and jerk"}]},"afqUOlnLHX0":{"duration":89.513,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=afqUOlnLHX0","annotations":[{"segment":[5.0272469674297176,87.13894629641186],"label":"Playing pool"}]},"p8MvTi8hJdE":{"duration":219.94,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p8MvTi8hJdE","annotations":[{"segment":[46.32109268044191,67.59448339294116],"label":"Snowboarding"},{"segment":[113.91557607338308,117.34676812378618],"label":"Snowboarding"},{"segment":[118.3761257389071,139.3063963400936],"label":"Snowboarding"},{"segment":[152.00180684399376,170.87336299843994],"label":"Snowboarding"}]},"cP2LVnGxiww":{"duration":223.934,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cP2LVnGxiww","annotations":[{"segment":[18.511221528861153,45.40488299531981],"label":"Carving jack-o-lanterns"},{"segment":[89.41269266770671,92.55610764430577],"label":"Carving jack-o-lanterns"},{"segment":[115.95708580343215,161.01270046801872],"label":"Carving jack-o-lanterns"}]},"DQ7a0-cSSCQ":{"duration":134.769,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DQ7a0-cSSCQ","annotations":[]},"K1_g97xCqKg":{"duration":100.449,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=K1_g97xCqKg","annotations":[{"segment":[0.01,0.02],"label":"Changing car wheel"},{"segment":[75.4542703861005,81.70307144364327],"label":"Changing car wheel"}]},"06r6DtoTtSQ":{"duration":125.95,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=06r6DtoTtSQ","annotations":[{"segment":[20.153718899999998,79.068271],"label":"Skateboarding"}]},"ATBbNtNlBnY":{"duration":156.734,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ATBbNtNlBnY","annotations":[{"segment":[0.009999999999999998,156.73469387755102],"label":"Sharpening knives"}]},"NE6SEdnVUh4":{"duration":76.39,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NE6SEdnVUh4","annotations":[{"segment":[17.26902496099844,22.038666146645866],"label":"Playing flauta"},{"segment":[26.327271450858035,76.12274414976599],"label":"Playing flauta"}]},"XaBF6VNWMo8":{"duration":227.95,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=XaBF6VNWMo8","annotations":[]},"GySHt3Z6Lt4":{"duration":181.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GySHt3Z6Lt4","annotations":[{"segment":[10.49998510688727,179.63488034215246],"label":"Starting a campfire"}]},"6utacRir3uM":{"duration":20.3,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=6utacRir3uM","annotations":[]},"TrtuuE1K8pk":{"duration":69.173,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TrtuuE1K8pk","annotations":[{"segment":[0.753873831775701,68.92560747663552],"label":"Plastering"}]},"8ZjUZbykp4U":{"duration":77.137,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8ZjUZbykp4U","annotations":[{"segment":[0.2,8.417581903276131],"label":"Playing drums"},{"segment":[28.499527301092044,32.82856942277691],"label":"Playing drums"}]},"ItX9t75ZIJI":{"duration":49.134,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=ItX9t75ZIJI","annotations":[]},"k_z03d9w_Mk":{"duration":50.06,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=k_z03d9w_Mk","annotations":[{"segment":[35.4804992199688,49.875],"label":"Cumbia"},{"segment":[3.423556942277691,34.36998044],"label":"Cumbia"}]},"GehJuCIJ7AI":{"duration":136.023,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GehJuCIJ7AI","annotations":[{"segment":[0.01,38.21432673320928],"label":"Disc dog"},{"segment":[40.760769678607105,128.18864413726544],"label":"Disc dog"}]},"3PsxzgeMv5w":{"duration":226.511,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3PsxzgeMv5w","annotations":[]},"LSkcoamI3gw":{"duration":134.4,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LSkcoamI3gw","annotations":[{"segment":[10.483392941159824,113.01097590570289],"label":"Shoveling snow"}]},"_bSRAXNa5PY":{"duration":96.67,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_bSRAXNa5PY","annotations":[{"segment":[72.65459568,78.50470096],"label":"Snatch"}]},"Q159fj3M9NM":{"duration":138.39,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Q159fj3M9NM","annotations":[{"segment":[5.447844667210279,135.00072274820715],"label":"Tumbling"}]},"Qnr73D2zIjU":{"duration":62.694,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Qnr73D2zIjU","annotations":[{"segment":[4.387594810835724,18.135396546450036],"label":"Bullfighting"}]},"3HEJb83Wm_I":{"duration":105.024,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3HEJb83Wm_I","annotations":[{"segment":[33.91555686739469,68.15880027457098],"label":"Cleaning sink"},{"segment":[88.96689555070202,92.57144748829953],"label":"Cleaning sink"}]},"yVQBdK8VQ_I":{"duration":152.811,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yVQBdK8VQ_I","annotations":[{"segment":[95.5959727301092,125.87200399375975],"label":"Gargling mouthwash"}]},"Qzewn-dGdOI":{"duration":48.739,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Qzewn-dGdOI","annotations":[{"segment":[0.01,40.87541029641186],"label":"Bullfighting"}]},"TEaugXCX1m4":{"duration":150.141,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TEaugXCX1m4","annotations":[{"segment":[0.01,150.140225],"label":"Surfing"}]},"OKhVHhchsyI":{"duration":197.811,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OKhVHhchsyI","annotations":[{"segment":[0.01,197.761],"label":"Building sandcastles"}]},"-DGsqL65o4k":{"duration":42.447,"subset":"validation","resolution":"328x240","url":"https://www.youtube.com/watch?v=-DGsqL65o4k","annotations":[{"segment":[12.515255181635837,39.39987615444618],"label":"Waterskiing"}]},"fxqzU9KGOsQ":{"duration":47.95,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=fxqzU9KGOsQ","annotations":[{"segment":[4.637758046801872,47.948434],"label":"Dodgeball"}]},"zOGg5-Mll4o":{"duration":211.023,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zOGg5-Mll4o","annotations":[{"segment":[85.92352131201248,125.42858858190327],"label":"Skiing"}]},"ZgytgzK17qs":{"duration":99.266,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZgytgzK17qs","annotations":[]},"iMB_mb11KWM":{"duration":50.063,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iMB_mb11KWM","annotations":[{"segment":[5.618583463338534,43.70009360374415],"label":"Tug of war"}]},"JgHQCKz_DRc":{"duration":104.05,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JgHQCKz_DRc","annotations":[{"segment":[7.72764496,81.16116614664587],"label":"Getting a haircut"}]},"c4B0_ZeADxM":{"duration":85.682,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=c4B0_ZeADxM","annotations":[]},"ivmBrbO32Qo":{"duration":67.942,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ivmBrbO32Qo","annotations":[{"segment":[0.2,67.94111],"label":"Mooping floor"}]},"o8qR72Ymru8":{"duration":165.56,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=o8qR72Ymru8","annotations":[{"segment":[36.67593647963605,138.69702601404057],"label":"Starting a campfire"}]},"zVRajE-LL4I":{"duration":121.023,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zVRajE-LL4I","annotations":[]},"LL0uQO4q_wY":{"duration":100.009,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=LL0uQO4q_wY","annotations":[]},"q23IYtwNOkE":{"duration":61.324,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=q23IYtwNOkE","annotations":[{"segment":[0.2,60.462877017160686],"label":"Gargling mouthwash"}]},"fKy5rh-SoTM":{"duration":105.675,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fKy5rh-SoTM","annotations":[{"segment":[0.2,24.651789157634283],"label":"Playing pool"},{"segment":[30.568219157316896,84.6378155433052],"label":"Playing pool"}]},"JNuVGc8mn54":{"duration":117.89,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JNuVGc8mn54","annotations":[{"segment":[8.24501678,103.02065724336974],"label":"Playing guitarra"}]},"83oa1S0x9zI":{"duration":149.75,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=83oa1S0x9zI","annotations":[{"segment":[12.147839737909516,117.97421283931358],"label":"Cricket"}]},"P2Fcv3cC8bI":{"duration":77.3,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=P2Fcv3cC8bI","annotations":[{"segment":[0.01,77.29925170068027],"label":"Hanging wallpaper"}]},"EPCb-f6yQxA":{"duration":77.51,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=EPCb-f6yQxA","annotations":[{"segment":[13.776215288611544,70.57289235569422],"label":"Playing bagpipes"}]},"OOqGdga8t4s":{"duration":22.454,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=OOqGdga8t4s","annotations":[{"segment":[2.1336677067082683,22.176152886115446],"label":"Hitting a pinata"}]},"0B4T3GIkMZw":{"duration":86.38,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=0B4T3GIkMZw","annotations":[]},"kQoGS3YjBXk":{"duration":139.32,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kQoGS3YjBXk","annotations":[{"segment":[60.6278751950078,134.74215288611546],"label":"Assembling bicycle"}]},"ZqvDwQWmlss":{"duration":20.689,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZqvDwQWmlss","annotations":[]},"I2Ctmi7C56k":{"duration":48.855,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I2Ctmi7C56k","annotations":[{"segment":[0.01,47.563131279214886],"label":"Raking leaves"}]},"Rm2GoMtTQWQ":{"duration":61.12,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Rm2GoMtTQWQ","annotations":[]},"K-rdUv9LXTc":{"duration":31.6,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=K-rdUv9LXTc","annotations":[]},"uaIu2yDmhNU":{"duration":43.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uaIu2yDmhNU","annotations":[{"segment":[2.755179365054602,24.976937009999993],"label":"Triple jump"}]},"X9AnhFjdiXA":{"duration":149.1,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=X9AnhFjdiXA","annotations":[{"segment":[0,144.6754155101404],"label":"Playing guitarra"}]},"fJyxb59mA-A":{"duration":54.92,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=fJyxb59mA-A","annotations":[{"segment":[9.937850658346333,16.277514009360374],"label":"Playing bagpipes"},{"segment":[26.900733678627144,29.299525216848675],"label":"Playing bagpipes"},{"segment":[33.5830815351014,49.517911039001554],"label":"Playing bagpipes"}]},"aT-0XxCqV5E":{"duration":47.16,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=aT-0XxCqV5E","annotations":[{"segment":[8.671596919999999,42.26531057],"label":"Long jump"}]},"BMa9v2uZBp8":{"duration":199.53,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BMa9v2uZBp8","annotations":[{"segment":[28.296741029641186,191.25540561622466],"label":"Snowboarding"}]},"bCrCWBxoYTo":{"duration":200.09,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bCrCWBxoYTo","annotations":[]},"GnbyHdd-BjQ":{"duration":213.229,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GnbyHdd-BjQ","annotations":[{"segment":[79.50342216137625,155.68034018096722],"label":"Snowboarding"}]},"ggeFotn1f18":{"duration":61.858,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ggeFotn1f18","annotations":[]},"vUAf-jRo12A":{"duration":117.819,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=vUAf-jRo12A","annotations":[]},"S6t9Lq1xKJk":{"duration":30.604,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=S6t9Lq1xKJk","annotations":[{"segment":[0.2,29.21930126677067],"label":"Making an omelette"}]},"dpU5CcFqAPU":{"duration":117.331,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dpU5CcFqAPU","annotations":[{"segment":[21.77291575663027,83.43234945397816],"label":"Tug of war"}]},"1xcFq3SkTKk":{"duration":175.8,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1xcFq3SkTKk","annotations":[{"segment":[35.907692667706705,165.02970670826835],"label":"Doing step aerobics"}]},"-TddN8oBvhQ":{"duration":107.927,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-TddN8oBvhQ","annotations":[{"segment":[9.76556669885843,18.18415868063294],"label":"Blow-drying hair"},{"segment":[26.60275066240745,45.96551222048881],"label":"Blow-drying hair"},{"segment":[68.69571057127999,76.44081504836194],"label":"Blow-drying hair"},{"segment":[87.048240925117,88.73195931825272],"label":"Blow-drying hair"}]},"e1TfVkNgitY":{"duration":84.28999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=e1TfVkNgitY","annotations":[{"segment":[48.127094630265205,77.58192850234009],"label":"Using parallel bars"}]},"Zt9nALIsHPc":{"duration":46.49,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zt9nALIsHPc","annotations":[{"segment":[21.248829897035883,31.54689762870515],"label":"Playing harmonica"}]},"3uaQ0cULMgc":{"duration":82.199,"subset":"testing","resolution":"180x240","url":"https://www.youtube.com/watch?v=3uaQ0cULMgc","annotations":[]},"rel9A7gnnSg":{"duration":205.985,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=rel9A7gnnSg","annotations":[]},"Mg3_BN9t9JY":{"duration":133.42,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=Mg3_BN9t9JY","annotations":[{"segment":[23.608177067082686,36.08827067082683],"label":"Playing polo"},{"segment":[55.016412636505464,99.3207449297972],"label":"Playing polo"},{"segment":[109.51282137285492,116.37687285491421],"label":"Playing polo"}]},"HqpflYNEI7o":{"duration":68.964,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HqpflYNEI7o","annotations":[{"segment":[8.601684867394697,50.74994071762871],"label":"Carving jack-o-lanterns"}]},"bJ695Pp7Vng":{"duration":212.63,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bJ695Pp7Vng","annotations":[{"segment":[12.27345605824233,198.36558710348416],"label":"Playing water polo"}]},"tyuyI30cZ00":{"duration":37.8,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=tyuyI30cZ00","annotations":[{"segment":[1.605866013651518,37.802085],"label":"Putting on makeup"}]},"UHNUmpx0nww":{"duration":108.902,"subset":"validation","resolution":"854x428","url":"https://www.youtube.com/watch?v=UHNUmpx0nww","annotations":[{"segment":[0.01,108.901065],"label":"Beach soccer"}]},"IsV1n_kLpvs":{"duration":57.54,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=IsV1n_kLpvs","annotations":[]},"q1e275n6R3c":{"duration":178.608,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q1e275n6R3c","annotations":[{"segment":[0.01,159.40498710525733],"label":"Carving jack-o-lanterns"}]},"zcvbHJ1BZ9s":{"duration":77.625,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=zcvbHJ1BZ9s","annotations":[{"segment":[1.8159360374414977,14.527488299531981],"label":"Putting on shoes"}]},"o5EXkCKlkIE":{"duration":17.648,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=o5EXkCKlkIE","annotations":[]},"WGpz-hV-Ejw":{"duration":78.02,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WGpz-hV-Ejw","annotations":[{"segment":[6.937731223534656,75.21961161622465],"label":"Rope skipping"}]},"Z-osDpUPwn0":{"duration":158.38,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z-osDpUPwn0","annotations":[]},"vw065HaGq3I":{"duration":44.21,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=vw065HaGq3I","annotations":[{"segment":[4.138268049921996,11.311266003120123],"label":"Brushing hair"},{"segment":[32.89923099687987,37.24441244929797],"label":"Brushing hair"}]},"F7u4kpwhs5g":{"duration":53.62,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F7u4kpwhs5g","annotations":[{"segment":[1.923778676317121,30.11131841192015],"label":"Playing polo"}]},"QlCA9f1WD84":{"duration":37.013,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=QlCA9f1WD84","annotations":[{"segment":[0,14.175655526901348],"label":"Plastering"},{"segment":[16.138882768916197,18.159851640405616],"label":"Plastering"},{"segment":[21.566628259750395,23.472113487519504],"label":"Plastering"},{"segment":[25.493082668486743,30.574376609204368],"label":"Plastering"}]},"ZKSORB5ygRM":{"duration":168.902,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZKSORB5ygRM","annotations":[{"segment":[19.762252301092044,144.132693449298],"label":"Surfing"}]},"_wHyOKf_fhc":{"duration":173.73,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=_wHyOKf_fhc","annotations":[{"segment":[17.617099132944908,160.72214899687987],"label":"Cumbia"}]},"gOnp3DZxeKU":{"duration":88.82,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gOnp3DZxeKU","annotations":[]},"gXVjjjvjVf4":{"duration":227.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=gXVjjjvjVf4","annotations":[{"segment":[123.18320921,219.05127925117006],"label":"Painting"},{"segment":[26.031154446177847,101.88002998],"label":"Painting"}]},"25Wxe9TQzY8":{"duration":6.04,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=25Wxe9TQzY8","annotations":[{"segment":[0.011968374973106977,5.983329348493356],"label":"Windsurfing"}]},"4BsepqjsKVw":{"duration":21.014,"subset":"testing","resolution":"492x276","url":"https://www.youtube.com/watch?v=4BsepqjsKVw","annotations":[]},"-x1twrM_ABM":{"duration":174.451,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=-x1twrM_ABM","annotations":[{"segment":[9.510596845176284,156.7889822761919],"label":"Laying tile"}]},"RzlX3Ai9AH4":{"duration":199.204,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=RzlX3Ai9AH4","annotations":[{"segment":[0.01,199.181],"label":"Painting furniture"}]},"t0XM3ivJYUo":{"duration":49.95,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=t0XM3ivJYUo","annotations":[{"segment":[21.564082683307333,49.43390795631826],"label":"Ping-pong"}]},"MysxDDxgHNo":{"duration":13.816,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MysxDDxgHNo","annotations":[{"segment":[0.01,13.815872],"label":"Curling"}]},"QOMvNgo6CQ4":{"duration":47.09,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=QOMvNgo6CQ4","annotations":[{"segment":[5.24742325975039,17.5236017],"label":"Polishing shoes"},{"segment":[21.283267300000002,46.859856662246486],"label":"Polishing shoes"}]},"wM7FmqEj4PY":{"duration":20.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wM7FmqEj4PY","annotations":[{"segment":[0.6065712,19.811656531567387],"label":"Ironing clothes"}]},"HCphw9_Jku8":{"duration":167.765,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=HCphw9_Jku8","annotations":[{"segment":[0.01,0.02],"label":"Riding bumper cars"}]},"PI_A4uJ_fsI":{"duration":194.212,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=PI_A4uJ_fsI","annotations":[{"segment":[23.62645553822153,40.89194227769111],"label":"Elliptical trainer"},{"segment":[55.12839625585024,62.398074882995324],"label":"Elliptical trainer"},{"segment":[79.05775507020282,84.51001404056163],"label":"Elliptical trainer"},{"segment":[106.92485647425897,142.36453978159128],"label":"Elliptical trainer"}]},"yjOriMHCSdw":{"duration":154.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yjOriMHCSdw","annotations":[{"segment":[39.46391783000001,137.47444787587423],"label":"Wrapping presents"}]},"U0qUFAPUg_Y":{"duration":193.19,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=U0qUFAPUg_Y","annotations":[{"segment":[16.274978509344457,18.686086436654747],"label":"Pole vault"},{"segment":[34.65967645508542,37.372172873309495],"label":"Pole vault"},{"segment":[43.39994269158522,45.50966212798172],"label":"Pole vault"},{"segment":[54.24992836448152,57.565201764533164],"label":"Pole vault"},{"segment":[66.30546800103296,69.31935291017084],"label":"Pole vault"},{"segment":[76.55267669210171,79.56656160123957],"label":"Pole vault"},{"segment":[88.00543934682557,91.62210123779101],"label":"Pole vault"},{"segment":[95.23876312875645,97.64987105606673],"label":"Pole vault"},{"segment":[102.77347504212167,105.48597145085803],"label":"Pole vault"},{"segment":[112.4179067176287,115.13040312636505],"label":"Pole vault"},{"segment":[127.48733121060842,130.50121610920436],"label":"Pole vault"},{"segment":[134.1178779875195,137.43315137597503],"label":"Pole vault"},{"segment":[139.8442592948518,143.46092117316692],"label":"Pole vault"},{"segment":[151.59841039937598,154.9136837878315],"label":"Pole vault"},{"segment":[162.44839603432138,165.4622809329173],"label":"Pole vault"},{"segment":[180.23031693603744,183.2442018346334],"label":"Pole vault"}]},"KZ1NEffR2Qg":{"duration":90.256,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KZ1NEffR2Qg","annotations":[]},"0SmA1cGocz4":{"duration":214.274,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0SmA1cGocz4","annotations":[{"segment":[45.191066546660096,51.53250121955729],"label":"Blow-drying hair"},{"segment":[56.87265673357598,172.35351972423018],"label":"Blow-drying hair"},{"segment":[179.36247383637973,196.3842195373143],"label":"Blow-drying hair"}]},"7iuU-zsauOY":{"duration":143.848,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7iuU-zsauOY","annotations":[{"segment":[2.4685195756630263,143.39854625897033],"label":"Scuba diving"}]},"-JhWjGDPHMY":{"duration":149.282,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-JhWjGDPHMY","annotations":[{"segment":[54.728635639625594,70.09923118096725],"label":"Roof shingle removal"}]},"dHBhJ7MRxPQ":{"duration":33.93,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dHBhJ7MRxPQ","annotations":[{"segment":[16.141852355694226,33.44803504524181],"label":"Washing hands"}]},"j_tKuqWYV-A":{"duration":138.04,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j_tKuqWYV-A","annotations":[{"segment":[6.24529841057588,130.28984615166922],"label":"Plataform diving"}]},"MM5mreexkI8":{"duration":77.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MM5mreexkI8","annotations":[{"segment":[10.669373887880685,38.67648034356748],"label":"Vacuuming floor"},{"segment":[44.6173809422777,53.83184004992201],"label":"Vacuuming floor"}]},"WCChCrg9eZU":{"duration":161.12,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=WCChCrg9eZU","annotations":[{"segment":[1.7595364243369735,29.409394521060843],"label":"Shoveling snow"},{"segment":[51.7806433447738,72.39235574414977],"label":"Shoveling snow"},{"segment":[81.9441249048362,94.00951752886117],"label":"Shoveling snow"},{"segment":[135.2329423276131,160.87190165366616],"label":"Shoveling snow"}]},"0L_CxBasznE":{"duration":25.4,"subset":"testing","resolution":"176x144","url":"https://www.youtube.com/watch?v=0L_CxBasznE","annotations":[]},"Ppp24iA2Fw8":{"duration":137.973,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ppp24iA2Fw8","annotations":[]},"cYMweZa5J1I":{"duration":236.12,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cYMweZa5J1I","annotations":[{"segment":[16.389097717628704,221.89733426677066],"label":"Doing motocross"}]},"YizcSEo0bTo":{"duration":60.86,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=YizcSEo0bTo","annotations":[{"segment":[0.2,54.70725269573628],"label":"Riding bumper cars"}]},"3RTmWrwgKek":{"duration":30.65,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=3RTmWrwgKek","annotations":[{"segment":[0.01,30.650339],"label":"Hopscotch"}]},"Skz2tom_Ag0":{"duration":7.08,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Skz2tom_Ag0","annotations":[{"segment":[1.1915171606864274,6.273147425897036],"label":"Playing badminton"}]},"Z4KErmUO3gw":{"duration":202.061,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z4KErmUO3gw","annotations":[{"segment":[37.08097389403207,69.13402326930317],"label":"Bullfighting"},{"segment":[71.01949676196618,78.87563631472871],"label":"Bullfighting"},{"segment":[80.4468642252812,129.46917503451937],"label":"Bullfighting"},{"segment":[131.35464852718238,138.26805133361339],"label":"Bullfighting"},{"segment":[160.26524208134845,176.60601121743468],"label":"Bullfighting"}]},"6GIWY4-491k":{"duration":203.268,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6GIWY4-491k","annotations":[{"segment":[21.404922156069915,28.381341229159375],"label":"Ice fishing"},{"segment":[48.042158616956925,67.70297600475448],"label":"Ice fishing"},{"segment":[90.85200292909677,138.1013873307916],"label":"Ice fishing"},{"segment":[157.44509485588526,167.59261355757374],"label":"Ice fishing"}]},"NnW_1eQV9GU":{"duration":145.915,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NnW_1eQV9GU","annotations":[{"segment":[5.99884520653698,28.494513430836044],"label":"Bullfighting"},{"segment":[90.30079380466782,102.34392931868652],"label":"Bullfighting"}]},"FKl0ncFqDF0":{"duration":44.002,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=FKl0ncFqDF0","annotations":[]},"u246t3R7hMs":{"duration":126.99,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=u246t3R7hMs","annotations":[]},"UG82vrvkYFk":{"duration":33.995,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UG82vrvkYFk","annotations":[{"segment":[0.01,30.043929066465335],"label":"Doing a powerbomb"}]},"gOniW-yEZ0k":{"duration":13.58,"subset":"validation","resolution":"600x360","url":"https://www.youtube.com/watch?v=gOniW-yEZ0k","annotations":[{"segment":[1.207908430577223,4.619719962558502],"label":"Shaving legs"}]},"A_Rh4tMM2xY":{"duration":210.698,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A_Rh4tMM2xY","annotations":[{"segment":[0.2,203.42599687987519],"label":"Swinging at the playground"}]},"T49ZxNX1DFM":{"duration":58.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T49ZxNX1DFM","annotations":[{"segment":[4.640204368174727,53.9537488299532],"label":"Playing polo"}]},"mnv-Qz4QDfU":{"duration":92.068,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mnv-Qz4QDfU","annotations":[{"segment":[46.512954758190325,62.44794851794072],"label":"Table soccer"}]},"KJQi_5e72lM":{"duration":217.571,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KJQi_5e72lM","annotations":[{"segment":[24.871510485889754,208.52761952015766],"label":"Waxing skis"}]},"pQsk5XPTLoY":{"duration":13.468,"subset":"validation","resolution":"400x224","url":"https://www.youtube.com/watch?v=pQsk5XPTLoY","annotations":[{"segment":[0.01,12.951220027335514],"label":"Doing a powerbomb"}]},"csxC5H5uDb4":{"duration":47.136,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=csxC5H5uDb4","annotations":[{"segment":[7.037486847058679,43.25122700356711],"label":"Hanging wallpaper"}]},"A8dBgZCuQow":{"duration":173.06,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=A8dBgZCuQow","annotations":[{"segment":[33.181408736349454,144.8819297971919],"label":"Putting on makeup"},{"segment":[154.86448361934478,163.2282449297972],"label":"Putting on makeup"}]},"V4cYhOQ6Pfs":{"duration":41.08,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V4cYhOQ6Pfs","annotations":[{"segment":[21.961096723868952,30.092464898595942],"label":"Hammer throw"}]},"bPkk-z5n8MY":{"duration":62.671,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bPkk-z5n8MY","annotations":[{"segment":[1.857632596460321,61.4974143900156],"label":"Baton twirling"}]},"dSsMVoPLQOk":{"duration":101.286,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dSsMVoPLQOk","annotations":[{"segment":[4.424322115741773,8.690632727349909],"label":"Grooming dog"},{"segment":[28.284059239920616,101.28537414965986],"label":"Grooming dog"},{"segment":[40.450944599063966,68.41898051326054],"label":"Grooming dog"},{"segment":[76.16154412792511,82.32399271918878],"label":"Grooming dog"},{"segment":[93.06877487831514,97.49309694383777],"label":"Grooming dog"}]},"doAvtD_jgiY":{"duration":37.895,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=doAvtD_jgiY","annotations":[{"segment":[0.5911773322932917,37.894467],"label":"Baton twirling"}]},"ilX9ntIzaoQ":{"duration":32.555,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=ilX9ntIzaoQ","annotations":[{"segment":[0.01,32.55437641723356],"label":"Snow tubing"}]},"0fNBm46reNg":{"duration":89.038,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0fNBm46reNg","annotations":[{"segment":[0.19999999999999998,83.20680031201248],"label":"Spread mulch"}]},"RPLbUeV3-o0":{"duration":41.007,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=RPLbUeV3-o0","annotations":[{"segment":[12.011545916490569,37.620133785852936],"label":"Skiing"}]},"hR1HPLOmR1I":{"duration":43.863,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hR1HPLOmR1I","annotations":[{"segment":[15.74284758942457,41.36756920684292],"label":"Hitting a pinata"}]},"UZ9D6DhrmDI":{"duration":33.019,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UZ9D6DhrmDI","annotations":[{"segment":[6.644963878315132,24.57091294539782],"label":"Playing rubik cube"}]},"tLk0Y04OaCA":{"duration":35.2,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=tLk0Y04OaCA","annotations":[]},"zL_WLS1gPNw":{"duration":108.414,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zL_WLS1gPNw","annotations":[{"segment":[40.8025699067659,51.087515862338385],"label":"Painting furniture"},{"segment":[74.69230985873425,88.51797351592863],"label":"Painting furniture"}]},"sAi1aMHR89A":{"duration":22.826,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sAi1aMHR89A","annotations":[{"segment":[0.2,21.709703224859503],"label":"Doing crunches"}]},"VWsyA_RJIzg":{"duration":25.495,"subset":"validation","resolution":"1920x1440","url":"https://www.youtube.com/watch?v=VWsyA_RJIzg","annotations":[{"segment":[0.01,25.461],"label":"Scuba diving"}]},"sYJgYpQknw0":{"duration":130.29,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sYJgYpQknw0","annotations":[{"segment":[37.399123519500776,109.55504117940717],"label":"Ping-pong"}]},"wAubNJ2NUUw":{"duration":174.41,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wAubNJ2NUUw","annotations":[]},"LZxTeIeuqT8":{"duration":170.969,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=LZxTeIeuqT8","annotations":[{"segment":[87.39992626279314,159.52349479861869],"label":"Rollerblading"}]},"8im-T1bsyHs":{"duration":107.07,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=8im-T1bsyHs","annotations":[{"segment":[2.171409854914197,16.703152730109206],"label":"Javelin throw"},{"segment":[20.37784633073323,28.562391168486744],"label":"Javelin throw"},{"segment":[38.417251279251175,58.962129137285494],"label":"Javelin throw"},{"segment":[71.65652521216849,98.38156958034322],"label":"Javelin throw"}]},"M2wdIwZMNm8":{"duration":171.085,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M2wdIwZMNm8","annotations":[{"segment":[113.9250655226209,128.34745865834634],"label":"Ice fishing"}]},"j5D5FR6Xtps":{"duration":36.99,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=j5D5FR6Xtps","annotations":[{"segment":[4.00331947,34.463519918876756],"label":"Brushing hair"}]},"94wjthSzsSQ":{"duration":138.623,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=94wjthSzsSQ","annotations":[{"segment":[0,2.5951287847432263],"label":"Capoeira"},{"segment":[8.21790781835355,10.813036603096776],"label":"Capoeira"},{"segment":[12.11060099546839,14.056947584025808],"label":"Capoeira"},{"segment":[16.219554904645165,26.383809311556135],"label":"Capoeira"},{"segment":[28.113895168051616,30.92528468485678],"label":"Capoeira"},{"segment":[39.79197469939614,42.81962494826324],"label":"Capoeira"},{"segment":[47.14483958950195,48.44240398187356],"label":"Capoeira"},{"segment":[73.5286487675507,75.90751681591264],"label":"Capoeira"},{"segment":[87.80185705772232,91.9108109594384],"label":"Capoeira"},{"segment":[95.58724339781593,96.66854705616224],"label":"Capoeira"},{"segment":[102.94010827457099,109.42793022464899],"label":"Capoeira"},{"segment":[111.80679827301093,115.26696997971919],"label":"Capoeira"},{"segment":[118.29462022308893,122.403574124805],"label":"Capoeira"},{"segment":[128.67513534321375,130.62148192823713],"label":"Capoeira"},{"segment":[135.16295729329173,137.75808607332294],"label":"Capoeira"}]},"Y_sVqGm8AyU":{"duration":194.93,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=Y_sVqGm8AyU","annotations":[{"segment":[83.93307226166598,174.55646172137284],"label":"Getting a piercing"}]},"u_vLZxgO9Rw":{"duration":212.974,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u_vLZxgO9Rw","annotations":[]},"j0bhBuunr-E":{"duration":25.566,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=j0bhBuunr-E","annotations":[]},"I62kL8H81XA":{"duration":92.3,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I62kL8H81XA","annotations":[{"segment":[14.865628330222547,85.5995647957745],"label":"Making a lemonade"}]},"49drGj3JUg4":{"duration":23.453,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=49drGj3JUg4","annotations":[{"segment":[12.860144296928341,15.67730250706627],"label":"Calf roping"}]},"vOI3hJ45lag":{"duration":58.77,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=vOI3hJ45lag","annotations":[{"segment":[16.77825279407176,17.236675001560062],"label":"Playing accordion"},{"segment":[23.47121702340094,26.405119151326055],"label":"Playing accordion"},{"segment":[36.123669950078,43.27505638689548],"label":"Playing accordion"}]},"LyIaNIuEYR8":{"duration":27.145,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LyIaNIuEYR8","annotations":[]},"yJSQmNSFlNI":{"duration":237.981,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yJSQmNSFlNI","annotations":[{"segment":[6.678842433697349,127.64009984399375],"label":"River tubing"},{"segment":[146.56348673946957,228.955127925117],"label":"River tubing"}]},"ad5QOYLt_cI":{"duration":140.9,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ad5QOYLt_cI","annotations":[{"segment":[53.83120124804992,132.05123244929797],"label":"Making a sandwich"}]},"UuLBAMSmwgc":{"duration":189.266,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=UuLBAMSmwgc","annotations":[{"segment":[16.529260530421215,179.75570826833072],"label":"Table soccer"}]},"I6O3bhwQKUU":{"duration":63.67,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=I6O3bhwQKUU","annotations":[]},"Fl9oEpnUDBY":{"duration":21.664,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Fl9oEpnUDBY","annotations":[]},"C26UfwZbHE0":{"duration":83.685,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=C26UfwZbHE0","annotations":[{"segment":[23.891203282371293,71.0208447301092],"label":"Gargling mouthwash"}]},"W-pFUV2O1l0":{"duration":150.12,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W-pFUV2O1l0","annotations":[{"segment":[20.602433697347895,111.90867394695788],"label":"Getting a haircut"}]},"QCeGGnd4QB0":{"duration":203.9,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QCeGGnd4QB0","annotations":[{"segment":[4.771320322200643,200.39545179407176],"label":"Playing badminton"}]},"lSbbzsOjwno":{"duration":55.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lSbbzsOjwno","annotations":[{"segment":[0.01,52.807284056162246],"label":"Bathing dog"}]},"c2-6_i1yV5E":{"duration":170.92000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c2-6_i1yV5E","annotations":[{"segment":[2.6657566302652107,151.6815522620905],"label":"Pole vault"}]},"vPHPhlbpJVs":{"duration":139.111,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vPHPhlbpJVs","annotations":[]},"_B7rGFDRIww":{"duration":230.6,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_B7rGFDRIww","annotations":[]},"g2Dv154zmMU":{"duration":107.671,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=g2Dv154zmMU","annotations":[{"segment":[20.151201248049922,91.85589235569424],"label":"Hurling"}]},"9hE6VRD3qXQ":{"duration":177.029,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9hE6VRD3qXQ","annotations":[{"segment":[0.2,93.62372653273478],"label":"Laying tile"},{"segment":[120.68899261004454,176.82893424036283],"label":"Laying tile"}]},"o_-a7AMw74M":{"duration":103.05,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=o_-a7AMw74M","annotations":[{"segment":[10.288939405195256,21.542466879627565],"label":"Cleaning shoes"},{"segment":[37.45817002203897,47.10405071440953],"label":"Cleaning shoes"},{"segment":[61.090577718346836,94.04733675061289],"label":"Cleaning shoes"}]},"vwaQwo_5X-k":{"duration":127.52,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vwaQwo_5X-k","annotations":[{"segment":[0.2,111.41240274570983],"label":"Breakdancing"}]},"wJYsD3_CS6E":{"duration":85.984,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=wJYsD3_CS6E","annotations":[{"segment":[0.01,85.983513],"label":"Swinging at the playground"}]},"c7S423zJqPo":{"duration":143.85,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=c7S423zJqPo","annotations":[]},"60CCYfec2vQ":{"duration":61.533,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=60CCYfec2vQ","annotations":[{"segment":[4.799756616114985,9.88749833697348],"label":"Roof shingle removal"},{"segment":[12.76735222152886,34.07827096723869],"label":"Roof shingle removal"},{"segment":[36.574144333853354,56.25314754086762],"label":"Roof shingle removal"}]},"VVyE6rVJEuI":{"duration":144.82,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VVyE6rVJEuI","annotations":[{"segment":[0.01,144.82256235827666],"label":"Zumba"}]},"uz4Tb2RG1tY":{"duration":150.419,"subset":"training","resolution":"426x236","url":"https://www.youtube.com/watch?v=uz4Tb2RG1tY","annotations":[{"segment":[74.71135491419656,97.46489157566303],"label":"Sharpening knives"},{"segment":[104.98517550702029,106.62740171606865],"label":"Sharpening knives"},{"segment":[116.71536271450857,120.23441887675507],"label":"Sharpening knives"}]},"qiT-OtAHtvk":{"duration":126.41,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=qiT-OtAHtvk","annotations":[{"segment":[0.01,126.301],"label":"Playing congas"}]},"ynxngsa-IHU":{"duration":206.96,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ynxngsa-IHU","annotations":[{"segment":[35.184667073322935,191.74029579407176],"label":"Getting a haircut"},{"segment":[124.44030752,157.71724513],"label":"Getting a haircut"}]},"2RxbcK90TeA":{"duration":21.293,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2RxbcK90TeA","annotations":[{"segment":[0.01,20.79597728009211],"label":"Canoeing"}]},"vFRSso8WQfY":{"duration":170.226,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=vFRSso8WQfY","annotations":[{"segment":[45.67673596739788,163.05532471138847],"label":"Making a lemonade"}]},"PllZQ09sBuI":{"duration":219.383,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PllZQ09sBuI","annotations":[{"segment":[21.561738355248494,138.26892532572052],"label":"Decorating the Christmas tree"}]},"gYvYh9aMohY":{"duration":201.225,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=gYvYh9aMohY","annotations":[{"segment":[17.579642919050098,178.30780675036524],"label":"Rope skipping"}]},"cTioh2vzxGE":{"duration":108.03999999999999,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=cTioh2vzxGE","annotations":[{"segment":[0.01,67.50518096723869],"label":"Playing harmonica"},{"segment":[89.922631825273,93.79933385335414],"label":"Playing harmonica"}]},"Bh8RcPBQjxo":{"duration":10.4,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Bh8RcPBQjxo","annotations":[{"segment":[0.01,10.061737223088922],"label":"Doing motocross"}]},"SLfZUqfyfjs":{"duration":158.385,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=SLfZUqfyfjs","annotations":[{"segment":[0.19999999999999998,8.895162533031934],"label":"Removing ice from car"},{"segment":[49.66465747609496,59.30108355354623],"label":"Removing ice from car"}]},"9PvtW0Uvnl0":{"duration":64.18,"subset":"training","resolution":"3840x2160","url":"https://www.youtube.com/watch?v=9PvtW0Uvnl0","annotations":[{"segment":[12.935959526683545,54.52296957029725],"label":"Running a marathon"}]},"fxpbQqCviGY":{"duration":17.624,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=fxpbQqCviGY","annotations":[]},"yhWPzE-kmk4":{"duration":45.21,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yhWPzE-kmk4","annotations":[{"segment":[4.321049017041642,39.57911911049958],"label":"Baking cookies"}]},"0_PdI-5l62o":{"duration":68.987,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0_PdI-5l62o","annotations":[{"segment":[0.01,66.18828028081124],"label":"Bullfighting"}]},"UYW1EzZjflo":{"duration":214.44,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UYW1EzZjflo","annotations":[]},"4qZckue0QU4":{"duration":23.383,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4qZckue0QU4","annotations":[{"segment":[0,23.361],"label":"Playing pool"}]},"u5ri43qbi1A":{"duration":220.845,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=u5ri43qbi1A","annotations":[{"segment":[0.01,220.84492063492064],"label":"Making a cake"}]},"IrTqW6Qn8mI":{"duration":40.171,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=IrTqW6Qn8mI","annotations":[{"segment":[0.28200833073322934,36.00306355694228],"label":"Blowing leaves"}]},"MDhKBOm_wQ0":{"duration":111.595,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MDhKBOm_wQ0","annotations":[]},"W8eqlIPRWCk":{"duration":99.36,"subset":"training","resolution":"320x212","url":"https://www.youtube.com/watch?v=W8eqlIPRWCk","annotations":[{"segment":[7.7502502821201285,98.89319359985284],"label":"Playing field hockey"}]},"UpmzSgLLj-s":{"duration":169.228,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UpmzSgLLj-s","annotations":[{"segment":[0.01,159.05178633274002],"label":"Assembling bicycle"}]},"JRr3BruqS2Y":{"duration":222.587,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JRr3BruqS2Y","annotations":[{"segment":[94.50405455643896,146.4293613306373],"label":"Rollerblading"},{"segment":[200.08551221710698,208.73973010202147],"label":"Rollerblading"}]},"KHmaaCZIQrs":{"duration":223.14,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KHmaaCZIQrs","annotations":[]},"DbBqhlSvr-o":{"duration":64.134,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DbBqhlSvr-o","annotations":[{"segment":[5.853033843802731,28.06454732967727],"label":"Hanging wallpaper"},{"segment":[30.265688296710785,50.47616444856393],"label":"Hanging wallpaper"}]},"PnY3FT_QqWo":{"duration":208.26,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=PnY3FT_QqWo","annotations":[{"segment":[21.428995319812795,195.13373010920438],"label":"Horseback riding"}]},"uqiMw7tQ1Cc":{"duration":55.148,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uqiMw7tQ1Cc","annotations":[{"segment":[5.868443121568436,54.17156305926314],"label":"Kneeling"}]},"Gvc1z9qypFQ":{"duration":24.61,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Gvc1z9qypFQ","annotations":[]},"GEHPJEgMm_I":{"duration":47.95,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=GEHPJEgMm_I","annotations":[{"segment":[4.563028868954758,46.82714872074883],"label":"Tennis serve with ball bouncing"}]},"ZBGb82CLWe4":{"duration":56.936,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZBGb82CLWe4","annotations":[]},"dLDZtWXF1do":{"duration":23.453,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dLDZtWXF1do","annotations":[]},"nQgmUkyblgc":{"duration":47.65,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nQgmUkyblgc","annotations":[{"segment":[7.698325789999999,28.86233775351014],"label":"Washing dishes"}]},"BUmS918SyLA":{"duration":133.469,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=BUmS918SyLA","annotations":[{"segment":[7.495880798497247,133.26832199546487],"label":"Baton twirling"}]},"KxY2z_xDsO8":{"duration":41.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KxY2z_xDsO8","annotations":[{"segment":[0.2,40.840999999999994],"label":"Shuffleboard"}]},"9lNEPwPEemI":{"duration":57.586,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=9lNEPwPEemI","annotations":[]},"IMMoglaJ3RY":{"duration":77.69,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=IMMoglaJ3RY","annotations":[]},"DaDHatB3RbM":{"duration":46.603,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DaDHatB3RbM","annotations":[{"segment":[4.354539781591264,22.063001560062403],"label":"Canoeing"}]},"QjFioni0uCM":{"duration":145.752,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QjFioni0uCM","annotations":[{"segment":[59.57399952596743,107.32415083931357],"label":"Blow-drying hair"}]},"m_ST2LDe5lA":{"duration":17.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m_ST2LDe5lA","annotations":[{"segment":[2.4458221528861155,15.073708268330734],"label":"Scuba diving"}]},"0jBwj0bfZ3Y":{"duration":29.91,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0jBwj0bfZ3Y","annotations":[{"segment":[1.2597458658346334,29.207441185647426],"label":"Playing kickball"}]},"5N-PfYLyCpI":{"duration":169.503,"subset":"training","resolution":"360x240","url":"https://www.youtube.com/watch?v=5N-PfYLyCpI","annotations":[{"segment":[0.19999999999999998,6.082010920436817],"label":"Capoeira"},{"segment":[16.394985959438376,163.98274570982838],"label":"Capoeira"}]},"g-KEU43sCt4":{"duration":224.027,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g-KEU43sCt4","annotations":[{"segment":[0.2,113.20337597503901],"label":"Capoeira"},{"segment":[135.21514352574104,183.7972012480499],"label":"Capoeira"}]},"fJ45W32t6h0":{"duration":121.464,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fJ45W32t6h0","annotations":[{"segment":[7.766397815912637,11.933733229329173],"label":"Snow tubing"},{"segment":[24.435739469578785,27.466528861154448],"label":"Snow tubing"},{"segment":[41.862778471138846,47.166659906396255],"label":"Snow tubing"},{"segment":[58.72154446177847,60.994636505460214],"label":"Snow tubing"},{"segment":[91.69648049921997,118.78862246489861],"label":"Snow tubing"}]},"o8Qw9GY3fRc":{"duration":174.266,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=o8Qw9GY3fRc","annotations":[]},"y20J3BbydOk":{"duration":141.108,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y20J3BbydOk","annotations":[{"segment":[41.32124751873751,123.52415359190032],"label":"Making a lemonade"}]},"QLJAQBUisuQ":{"duration":146.91,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=QLJAQBUisuQ","annotations":[{"segment":[57.976445240000004,126.31714067000001],"label":"Triple jump"}]},"g1Z8boTOAlA":{"duration":44.1,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=g1Z8boTOAlA","annotations":[]},"LKeQMeUZFso":{"duration":78.23,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=LKeQMeUZFso","annotations":[{"segment":[3.476021840873635,72.75252730109204],"label":"Vacuuming floor"}]},"ShiBZnuxlmc":{"duration":113.47999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ShiBZnuxlmc","annotations":[{"segment":[22.659777497659906,113.475917],"label":"Using the balance beam"}]},"nbcRj00xCKM":{"duration":133.864,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nbcRj00xCKM","annotations":[{"segment":[14.607285491419656,130.8591060842434],"label":"Waterskiing"}]},"0RZ5_8VD4vc":{"duration":39.939,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0RZ5_8VD4vc","annotations":[{"segment":[0.010000000000000002,31.028524732826046],"label":"Roof shingle removal"}]},"p1QGn0IzfW0":{"duration":72.59,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=p1QGn0IzfW0","annotations":[]},"pIk_bbjCNDo":{"duration":22.27,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pIk_bbjCNDo","annotations":[{"segment":[0.10421810452418098,22.16371689547582],"label":"Brushing teeth"}]},"nZ41Hmvap-4":{"duration":18.391,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nZ41Hmvap-4","annotations":[]},"5g70tfGYWDk":{"duration":71.43,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5g70tfGYWDk","annotations":[{"segment":[4.3429937597503905,64.71759906396255],"label":"Tennis serve with ball bouncing"}]},"Rh2UGc42X0M":{"duration":166.162,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Rh2UGc42X0M","annotations":[{"segment":[11.146592519483091,146.4610399531981],"label":"Snow tubing"}]},"HnCUykqco5M":{"duration":216.619,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HnCUykqco5M","annotations":[{"segment":[16.559015427283757,40.214751751974845],"label":"Rafting"},{"segment":[44.27002083620759,54.408193546789484],"label":"Rafting"},{"segment":[69.61545261266234,105.43699619005169],"label":"Rafting"},{"segment":[139.23090522532468,146.6655652130847],"label":"Rafting"},{"segment":[157.14167701401934,167.2798497246012],"label":"Rafting"}]},"vKC23-I4pBc":{"duration":34.09,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=vKC23-I4pBc","annotations":[{"segment":[16.940840873634944,26.29937877],"label":"Cleaning windows"}]},"Lliy0OcwzUE":{"duration":157.618,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Lliy0OcwzUE","annotations":[]},"T3bTwmccIEQ":{"duration":112.896,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=T3bTwmccIEQ","annotations":[{"segment":[48.2578611933593,112.895214],"label":"Canoeing"}]},"hQl2eeYX3IY":{"duration":76.905,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hQl2eeYX3IY","annotations":[{"segment":[2.039588631825273,76.904489],"label":"Blowing leaves"}]},"O8tsM_ilGx4":{"duration":30.094,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=O8tsM_ilGx4","annotations":[{"segment":[8.529581903276132,26.10426989079563],"label":"Calf roping"}]},"_9h6NBOPTy8":{"duration":88.65,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=_9h6NBOPTy8","annotations":[{"segment":[34.85658654,78.74514072000001],"label":"Mixing drinks"}]},"zSeLjjo3KF0":{"duration":61.35,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=zSeLjjo3KF0","annotations":[{"segment":[35.65023686037441,61.3471201814059],"label":"Bungee jumping"}]},"CTmXCO7ha-Y":{"duration":186.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CTmXCO7ha-Y","annotations":[{"segment":[0,186.5955782312925],"label":"Playing guitarra"}]},"ZZImcxqa1_w":{"duration":24.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ZZImcxqa1_w","annotations":[{"segment":[3.5630160234009365,21.1569085],"label":"Hammer throw"}]},"Ne8UzAInivM":{"duration":120.257,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Ne8UzAInivM","annotations":[{"segment":[8.629926171196509,60.034269017019184],"label":"Scuba diving"},{"segment":[84.6107971045242,113.87750297659906],"label":"Scuba diving"}]},"iqhdqhVttEM":{"duration":205.5,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iqhdqhVttEM","annotations":[]},"tMTvOaUYNeg":{"duration":23.963,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=tMTvOaUYNeg","annotations":[{"segment":[0.2,20.044152336448597],"label":"Raking leaves"}]},"AuS32BoK0Bg":{"duration":213.578,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AuS32BoK0Bg","annotations":[{"segment":[24.98295631825273,36.64166926677067],"label":"Skiing"},{"segment":[45.96863962558502,184.22491887675508],"label":"Skiing"}]},"GgAXP4FTFnA":{"duration":54.22,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=GgAXP4FTFnA","annotations":[{"segment":[0.8872581903276131,53.61574492979719],"label":"Playing water polo"}]},"kHSCvFav1p4":{"duration":180.12,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kHSCvFav1p4","annotations":[{"segment":[0.01,180.11718820861677],"label":"Playing badminton"}]},"yCqe0J1xgyc":{"duration":199.27,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=yCqe0J1xgyc","annotations":[{"segment":[13.518478916536662,197.80486966380656],"label":"Springboard diving"}]},"jqEpQ-cRGFk":{"duration":95.9,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=jqEpQ-cRGFk","annotations":[{"segment":[6.413112284146227,89.48531716439506],"label":"Brushing hair"}]},"Fh1LR3xB3zQ":{"duration":168.043,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Fh1LR3xB3zQ","annotations":[{"segment":[91.20803120124806,112.17539469578784],"label":"Sharpening knives"},{"segment":[117.41723556942279,150.1787410296412],"label":"Sharpening knives"}]},"d_J5_sRRiAk":{"duration":139.46,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=d_J5_sRRiAk","annotations":[]},"FjMeS0FCvas":{"duration":46.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FjMeS0FCvas","annotations":[{"segment":[0.01,15.308198017160688],"label":"Drinking beer"}]},"2w7Fxoeo_Qk":{"duration":40.148,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2w7Fxoeo_Qk","annotations":[{"segment":[18.163365687987522,27.495577713884558],"label":"Playing ten pins"}]},"HMc2QoiQcoA":{"duration":170.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HMc2QoiQcoA","annotations":[{"segment":[46.51144664834212,81.59436640594876],"label":"Smoking a cigarette"},{"segment":[127.84003217316692,159.73359529329173],"label":"Smoking a cigarette"}]},"KNp7bwtYEA8":{"duration":6.6,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=KNp7bwtYEA8","annotations":[]},"vmUbGiOyUbU":{"duration":113.801,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=vmUbGiOyUbU","annotations":[{"segment":[21.659472267326066,43.67401785050995],"label":"Grooming dog"},{"segment":[50.06533753595042,54.14868031981279],"label":"Grooming dog"},{"segment":[60.895073277691104,64.26826975663026],"label":"Grooming dog"},{"segment":[69.06175948985958,73.32263925273011],"label":"Grooming dog"},{"segment":[78.47120229953198,84.86252194383775],"label":"Grooming dog"},{"segment":[100.84082105460217,107.05460404212168],"label":"Grooming dog"}]},"V1ntLwOfkyE":{"duration":56.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V1ntLwOfkyE","annotations":[{"segment":[6.476558502340094,52.00604368174727],"label":"Using the pommel horse"}]},"qVuRcevXgMk":{"duration":80.806,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=qVuRcevXgMk","annotations":[{"segment":[0.01,80.805395],"label":"Putting in contact lenses"}]},"jx7_0MRlScw":{"duration":91.162,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jx7_0MRlScw","annotations":[{"segment":[0.2,85.73794539781592],"label":"Riding bumper cars"}]},"y1CzJvvItrM":{"duration":90.396,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=y1CzJvvItrM","annotations":[{"segment":[0.01,90.395283],"label":"Plastering"}]},"Eblks2o9IHw":{"duration":109.018,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Eblks2o9IHw","annotations":[]},"bfk3xsTt0XA":{"duration":61.533,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bfk3xsTt0XA","annotations":[{"segment":[8.255569060842433,60.476843120124805],"label":"Beach soccer"}]},"uFtP9Z_K87U":{"duration":129.777,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uFtP9Z_K87U","annotations":[{"segment":[6.276234200388424,9.515580884459867],"label":"Scuba diving"},{"segment":[13.76722340730364,19.233620936674203],"label":"Scuba diving"},{"segment":[38.06232353783947,42.11150689292878],"label":"Scuba diving"},{"segment":[54.86643446146009,62.357423668375304],"label":"Scuba diving"},{"segment":[69.24103537202713,71.6705453850807],"label":"Scuba diving"},{"segment":[79.76891209525932,89.68941094851795],"label":"Scuba diving"},{"segment":[94.75089012168488,107.91073597191888],"label":"Scuba diving"}]},"UAEZC2C2fYc":{"duration":50.25,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UAEZC2C2fYc","annotations":[]},"XASTWKClhPU":{"duration":130.334,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XASTWKClhPU","annotations":[{"segment":[5.489870026521062,103.9008734648986],"label":"Rollerblading"}]},"FCFSLuCZKj4":{"duration":126.27,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=FCFSLuCZKj4","annotations":[{"segment":[25.0368156,106.3887885],"label":"Cleaning windows"}]},"pmix_U52VWk":{"duration":74.26,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=pmix_U52VWk","annotations":[{"segment":[1.3883931294851795,65.02307823088923],"label":"Playing flauta"}]},"173d8EtsIpE":{"duration":187.13,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=173d8EtsIpE","annotations":[{"segment":[8.026095943837753,175.84446567862713],"label":"Doing fencing"}]},"XZCmkuDKltY":{"duration":29.211,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XZCmkuDKltY","annotations":[{"segment":[1.6379832336448599,25.570738258566976],"label":"Playing rubik cube"}]},"iH659QSrcDc":{"duration":23.406,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iH659QSrcDc","annotations":[{"segment":[5.769271206864274,11.684599912636505],"label":"Fixing the roof"}]},"L0jbYqV9Zaw":{"duration":187.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=L0jbYqV9Zaw","annotations":[{"segment":[36.01480605346663,67.9303658894655],"label":"Paintball"},{"segment":[81.98492434935493,136.44633838142641],"label":"Paintball"}]},"MxYa4WTV8Zs":{"duration":190.125,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=MxYa4WTV8Zs","annotations":[{"segment":[3.558084243369735,184.7433120124805],"label":"Decorating the Christmas tree"}]},"-Z98HU6T7J8":{"duration":131.96,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=-Z98HU6T7J8","annotations":[{"segment":[29.438596394695786,121.45994316692668],"label":"Smoking a cigarette"}]},"uDqnGmLJyao":{"duration":76.092,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uDqnGmLJyao","annotations":[{"segment":[31.123079273278776,37.040014839625584],"label":"Hanging wallpaper"},{"segment":[46.27043348200844,48.63720766552321],"label":"Hanging wallpaper"},{"segment":[63.666223730842034,67.09804629693845],"label":"Hanging wallpaper"}]},"JcWM1SgpU9M":{"duration":15.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JcWM1SgpU9M","annotations":[]},"SvMIG1JlcMk":{"duration":169.85,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=SvMIG1JlcMk","annotations":[{"segment":[13.245007800312012,49.536329173166926],"label":"Belly dance"},{"segment":[49.80122932917316,134.3043790951638],"label":"Belly dance"}]},"N8otQdjR96s":{"duration":182.625,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N8otQdjR96s","annotations":[{"segment":[0.19999999999999998,13.101101404056163],"label":"Playing drums"},{"segment":[42.72098283931357,51.265179407176284],"label":"Playing drums"},{"segment":[68.06876599063963,114.20742745709829],"label":"Playing drums"},{"segment":[122.4668174726989,149.25498283931356],"label":"Playing drums"},{"segment":[158.93946645865833,171.75716536661466],"label":"Playing drums"}]},"y8ids5d1A88":{"duration":229.82999999999998,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=y8ids5d1A88","annotations":[]},"zYQ-WdosIwI":{"duration":42.493,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zYQ-WdosIwI","annotations":[{"segment":[8.233721613143599,41.06270770660154],"label":"Kneeling"}]},"2WKy0FvMtCM":{"duration":192.285,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2WKy0FvMtCM","annotations":[{"segment":[13.178362260792166,145.2614918068536],"label":"Making a cake"}]},"PIGH5p2iMoI":{"duration":192.053,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=PIGH5p2iMoI","annotations":[{"segment":[94.50283229329173,124.45618642745711],"label":"Hurling"},{"segment":[119.66364976599066,153.81047347893914],"label":"Hurling"}]},"q1TVicD9MOk":{"duration":191.495,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q1TVicD9MOk","annotations":[]},"mBnLy9ZgMkc":{"duration":187.25,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=mBnLy9ZgMkc","annotations":[{"segment":[6.7163229329173175,177.25252262090484],"label":"Playing accordion"}]},"pnxgTQofPQo":{"duration":11.1,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pnxgTQofPQo","annotations":[{"segment":[3.221009360374415,8.438110764430578],"label":"Doing a powerbomb"}]},"PQBeNLSdugw":{"duration":96.32,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=PQBeNLSdugw","annotations":[{"segment":[0.7907531484799964,91.37287775907191],"label":"Doing karate"}]},"o5F7X-UBETE":{"duration":220.32999999999998,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=o5F7X-UBETE","annotations":[]},"LdAT5MEBkuQ":{"duration":143.5,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=LdAT5MEBkuQ","annotations":[{"segment":[45.89291812480499,98.94960883494541],"label":"Waterskiing"},{"segment":[28.20735455475819,31.341505060842437],"label":"Waterskiing"},{"segment":[45.89291812480499,98.94960883494541],"label":"Waterskiing"}]},"rVRNX-fClE4":{"duration":83.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rVRNX-fClE4","annotations":[{"segment":[39.067591808434244,75.66912300437596],"label":"Polishing shoes"}]},"GBdj6erXjDM":{"duration":29.374,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=GBdj6erXjDM","annotations":[{"segment":[0.2,17.95285111166394],"label":"Having an ice cream"}]},"cDMKIQSIcsU":{"duration":123.902,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cDMKIQSIcsU","annotations":[]},"5GiIqXY__74":{"duration":228.949,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5GiIqXY__74","annotations":[{"segment":[35.96239586749682,93.64466330013083],"label":"Playing ten pins"},{"segment":[100.76593007701291,126.40249317499112],"label":"Playing ten pins"},{"segment":[134.23588745492893,160.58457730562878],"label":"Playing ten pins"},{"segment":[166.63765470376254,182.30444326363812],"label":"Playing ten pins"}]},"Xi68dag0iGo":{"duration":143.22,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=Xi68dag0iGo","annotations":[{"segment":[8.152615685647426,62.5842756],"label":"Playing bagpipes"},{"segment":[69.22109040000001,119.60892327847115],"label":"Playing bagpipes"}]},"XzNMQl0EdEo":{"duration":175.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XzNMQl0EdEo","annotations":[{"segment":[52.94548892,156.68740079999998],"label":"Breakdancing"},{"segment":[0.2,11.33653531825273],"label":"Breakdancing"}]},"F9FPK6mfh88":{"duration":203.22,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=F9FPK6mfh88","annotations":[]},"hbHkS0GAOLE":{"duration":23.894,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hbHkS0GAOLE","annotations":[{"segment":[0.01,23.893333],"label":"Snow tubing"}]},"hS4L3PMfYqA":{"duration":111.32,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=hS4L3PMfYqA","annotations":[{"segment":[20.665614444617788,104.02271472542903],"label":"Breakdancing"}]},"3jb0w48ACkU":{"duration":42.96,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=3jb0w48ACkU","annotations":[{"segment":[35.58521351794072,42.75586859594384],"label":"Starting a campfire"}]},"uWfQcDvel84":{"duration":113.38,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uWfQcDvel84","annotations":[]},"gTUJmj5GRqo":{"duration":212.672,"subset":"testing","resolution":"400x226","url":"https://www.youtube.com/watch?v=gTUJmj5GRqo","annotations":[]},"UNbC2c1C824":{"duration":90.721,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UNbC2c1C824","annotations":[{"segment":[2.829984399375975,86.45602340093603],"label":"Baton twirling"}]},"EKfhRuD3x9s":{"duration":238.562,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=EKfhRuD3x9s","annotations":[{"segment":[15.631195304955055,232.6070729329173],"label":"Playing congas"}]},"aH7puGPu39Y":{"duration":189.986,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aH7puGPu39Y","annotations":[{"segment":[46.092941140044765,186.14458368747867],"label":"Snowboarding"}]},"HcZ3irBAcE0":{"duration":29.467,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HcZ3irBAcE0","annotations":[{"segment":[0.6205694227769112,25.903026020280812],"label":"Clipping cat claws"}]},"9btLaLqX-Zk":{"duration":158.76,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9btLaLqX-Zk","annotations":[{"segment":[19.070845865834634,145.07555979344775],"label":"Cleaning windows"}]},"HjVQNiy4uuw":{"duration":23.546,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=HjVQNiy4uuw","annotations":[{"segment":[0.01,4.922050789391576],"label":"River tubing"},{"segment":[16.3456164274571,22.112496829953198],"label":"River tubing"}]},"hi07Rdf1r4Y":{"duration":53.8,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=hi07Rdf1r4Y","annotations":[{"segment":[0,34.99964074726989],"label":"Getting a haircut"},{"segment":[15.545509999999998,26.242563999999998],"label":"Getting a haircut"}]},"AY6QSTuHGRc":{"duration":168.531,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AY6QSTuHGRc","annotations":[{"segment":[51.79484379919415,53.635269720992916],"label":"Snowboarding"},{"segment":[63.889071285300396,69.14743106186832],"label":"Snowboarding"},{"segment":[74.66870882726465,84.65959240274371],"label":"Snowboarding"},{"segment":[108.58512938612783,113.31765318503895],"label":"Snowboarding"},{"segment":[145.65656581093177,146.70823703588144],"label":"Snowboarding"}]},"6GYdu5G61g8":{"duration":121.858,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=6GYdu5G61g8","annotations":[{"segment":[15.01842033953467,72.6207160721803],"label":"Rafting"},{"segment":[97.52467891368715,100.94659747206215],"label":"Rafting"},{"segment":[111.02224656061071,121.85832199546485],"label":"Rafting"}]},"WdC6dVbfTvs":{"duration":146.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WdC6dVbfTvs","annotations":[{"segment":[0.2284684149270733,70.1398033826115],"label":"Getting a piercing"}]},"M6yAoJJQvGY":{"duration":113.755,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=M6yAoJJQvGY","annotations":[{"segment":[19.166134547422715,24.312596601823255],"label":"Doing crunches"},{"segment":[70.63075395943837,90.32928081747269],"label":"Doing crunches"},{"segment":[97.42784905460218,101.8644542028081],"label":"Doing crunches"}]},"_7GQcJezzo4":{"duration":24.52,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=_7GQcJezzo4","annotations":[]},"UCmycSotoy4":{"duration":38.08,"subset":"training","resolution":"360x480","url":"https://www.youtube.com/watch?v=UCmycSotoy4","annotations":[{"segment":[1.6064291999999998,34.94367940717629],"label":"Kayaking"}]},"YfouQ4TJhmQ":{"duration":55.71,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=YfouQ4TJhmQ","annotations":[{"segment":[0.08690276131045242,39.019339828393136],"label":"Grooming horse"},{"segment":[48.14412976599064,54.92254514820593],"label":"Grooming horse"}]},"bcXc6mKSEEM":{"duration":28.747,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bcXc6mKSEEM","annotations":[{"segment":[0.2,28.501],"label":"Arm wrestling"}]},"OTMkyaTCpqM":{"duration":56.726,"subset":"training","resolution":"574x360","url":"https://www.youtube.com/watch?v=OTMkyaTCpqM","annotations":[{"segment":[0,6.017723544461778],"label":"Playing pool"},{"segment":[12.389430826833072,15.48678853354134],"label":"Playing pool"},{"segment":[28.849674639625587,43.45150382839313],"label":"Playing pool"},{"segment":[47.079837141965676,52.389593210608425],"label":"Playing pool"}]},"hBT_SN8IGsQ":{"duration":121.65,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hBT_SN8IGsQ","annotations":[{"segment":[0.01,112.81934239411552],"label":"Hurling"}]},"cyXWvxVt8qE":{"duration":86.98,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=cyXWvxVt8qE","annotations":[{"segment":[2.8496433556942278,22.525752240249613],"label":"Playing badminton"},{"segment":[24.425514477379092,70.01980816848675],"label":"Playing badminton"},{"segment":[76.26188409048362,82.09686810452419],"label":"Playing badminton"}]},"goH-zdx5urM":{"duration":22.756,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=goH-zdx5urM","annotations":[{"segment":[1.2395241809672388,21.673965678627145],"label":"Doing crunches"}]},"JJyV1AIQj4M":{"duration":231.39,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JJyV1AIQj4M","annotations":[{"segment":[16.778689523400935,197.19470590405618],"label":"Belly dance"}]},"3QYwfLRJhI8":{"duration":168.11,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3QYwfLRJhI8","annotations":[]},"NVj37t3AReg":{"duration":75.187,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NVj37t3AReg","annotations":[]},"kpE3q-Hmd7A":{"duration":102.4,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kpE3q-Hmd7A","annotations":[{"segment":[11.329355137285491,99.73023888455538],"label":"Doing step aerobics"}]},"CR9Ghr1h8oY":{"duration":89.745,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CR9Ghr1h8oY","annotations":[]},"M-IRMq2DmY0":{"duration":15.67,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=M-IRMq2DmY0","annotations":[{"segment":[0,15.673469],"label":"Shaving legs"}]},"cZFThsHMC5w":{"duration":172.04,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cZFThsHMC5w","annotations":[{"segment":[19.860704610497347,144.6610781764604],"label":"Tai chi"}]},"Z0mxEFOm_Wc":{"duration":88.585,"subset":"validation","resolution":"718x480","url":"https://www.youtube.com/watch?v=Z0mxEFOm_Wc","annotations":[{"segment":[28.862541341653664,39.35801092043682],"label":"Waxing skis"}]},"fljxcvQe_Mo":{"duration":84.428,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=fljxcvQe_Mo","annotations":[{"segment":[29.3719023209908,83.1106283525741],"label":"Fixing the roof"}]},"p0-BGit7WAo":{"duration":56.05,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=p0-BGit7WAo","annotations":[{"segment":[14.95328851544462,48.88238760312012],"label":"Ballet"}]},"uYqNIBbz4_Q":{"duration":23.45,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=uYqNIBbz4_Q","annotations":[{"segment":[13.92016912,18.3781598],"label":"Chopping wood"},{"segment":[13.920169119999999,19.42760256318253],"label":"Chopping wood"}]},"nVN1rP8H688":{"duration":198.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nVN1rP8H688","annotations":[{"segment":[39.01498595943838,46.446411856474256],"label":"Snowboarding"},{"segment":[54.497123244929796,59.14176443057722],"label":"Snowboarding"},{"segment":[62.857477379095165,117.36642589703588],"label":"Snowboarding"},{"segment":[121.39218720748829,150.81121216848675],"label":"Snowboarding"},{"segment":[170.0109968798752,175.585127925117],"label":"Snowboarding"}]},"3BD1Y-lvaM0":{"duration":99.428,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3BD1Y-lvaM0","annotations":[]},"5I5xXW25OXs":{"duration":23.847,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=5I5xXW25OXs","annotations":[{"segment":[0.9053464740622453,6.953357377489037],"label":"Rock-paper-scissors"}]},"gXAMD_KxXII":{"duration":163.03,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=gXAMD_KxXII","annotations":[{"segment":[51.629558229948245,65.10919608112324],"label":"Polishing shoes"},{"segment":[91.55980698907956,100.20712209360374],"label":"Polishing shoes"}]},"XnvaW1HQyg4":{"duration":27.66,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XnvaW1HQyg4","annotations":[{"segment":[3.3083572542901716,14.522399375975038],"label":"Hammer throw"}]},"k6pU9VaFsZ8":{"duration":95.667,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=k6pU9VaFsZ8","annotations":[{"segment":[2.3879241684867396,92.0843257472699],"label":"Powerbocking"}]},"-IkL9Kq9zfQ":{"duration":41.518,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=-IkL9Kq9zfQ","annotations":[{"segment":[0.01,40.6105043775351],"label":"Canoeing"}]},"tr8JRPrfAN0":{"duration":168.48,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tr8JRPrfAN0","annotations":[{"segment":[8.933407176287051,11.560879875195008],"label":"Vacuuming floor"},{"segment":[18.65505616224649,23.121759750390016],"label":"Vacuuming floor"},{"segment":[35.470881435257404,35.996375975039],"label":"Vacuuming floor"},{"segment":[57.016157566302645,58.85538845553822],"label":"Vacuuming floor"},{"segment":[81.45165366614664,89.33407176287051],"label":"Vacuuming floor"}]},"izZqZFVpW4c":{"duration":174.475,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=izZqZFVpW4c","annotations":[{"segment":[11.976426714211424,167.66997216848674],"label":"Ice fishing"}]},"SKbjRXF35Wk":{"duration":172.13,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=SKbjRXF35Wk","annotations":[{"segment":[7.787451073322933,164.34207092667708],"label":"Tango"}]},"VeWdsZb5tog":{"duration":160.06,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=VeWdsZb5tog","annotations":[{"segment":[28.96473780967239,96.6323580374415],"label":"Hopscotch"}]},"zBCZT-hF2Ao":{"duration":194.56,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=zBCZT-hF2Ao","annotations":[]},"PoamN_DEInI":{"duration":32.93,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PoamN_DEInI","annotations":[{"segment":[9.733942141965677,12.40499750546022],"label":"Hopscotch"}]},"MSPslSgkp60":{"duration":158.221,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MSPslSgkp60","annotations":[{"segment":[79.72747117775869,87.62616801270691],"label":"Rope skipping"},{"segment":[96.75903622811579,109.1007500327224],"label":"Rope skipping"}]},"n96adnrNnno":{"duration":144.85,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=n96adnrNnno","annotations":[{"segment":[0.22589859594383774,23.26755538221529],"label":"Using uneven bars"}]},"BmLDHLKiIdQ":{"duration":237.63,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BmLDHLKiIdQ","annotations":[]},"f0hh9bwVfV0":{"duration":170.598,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=f0hh9bwVfV0","annotations":[{"segment":[38.32444968002802,92.61742006006772],"label":"Scuba diving"},{"segment":[103.52924253840901,120.56233128508812],"label":"Scuba diving"}]},"Zso0ZBs2y6E":{"duration":180.443,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Zso0ZBs2y6E","annotations":[{"segment":[55.84449221436242,73.80453128857167],"label":"Hanging wallpaper"},{"segment":[151.53782408502258,156.58908503059024],"label":"Hanging wallpaper"}]},"TyHLBe6__rc":{"duration":238.1,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TyHLBe6__rc","annotations":[{"segment":[15.040662163806552,227.8381797149437],"label":"Windsurfing"}]},"rim1Bl1zWwI":{"duration":230.598,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=rim1Bl1zWwI","annotations":[]},"msGQHfIRLhE":{"duration":84.45,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=msGQHfIRLhE","annotations":[{"segment":[0,32.937091633325196],"label":"Doing karate"},{"segment":[41.76423122776911,57.70578321060842],"label":"Doing karate"},{"segment":[67.5869104726989,74.96481882839313],"label":"Doing karate"}]},"-tvhSol-wTs":{"duration":126.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-tvhSol-wTs","annotations":[{"segment":[33.52457340561622,99.78721996099843],"label":"Playing squash"}]},"8-muYZwFf9c":{"duration":198.11,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=8-muYZwFf9c","annotations":[{"segment":[10.882955184259147,190.14317481042735],"label":"Windsurfing"}]},"AQSiBbhLBhs":{"duration":225.12,"subset":"training","resolution":"270x360","url":"https://www.youtube.com/watch?v=AQSiBbhLBhs","annotations":[{"segment":[8.425210595943838,124.73732969999999],"label":"Brushing hair"},{"segment":[116.89979701872076,215.5449710795632],"label":"Brushing hair"}]},"5YVN5d8DkSk":{"duration":137.857,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5YVN5d8DkSk","annotations":[{"segment":[0.01,136.78154240873633],"label":"Beer pong"}]},"RgeVltfPDXQ":{"duration":203.988,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RgeVltfPDXQ","annotations":[{"segment":[0.01,199.24445437865688],"label":"Hitting a pinata"}]},"pzSqv6KB-Qc":{"duration":165.884,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pzSqv6KB-Qc","annotations":[]},"owLv-_CPNJI":{"duration":55.263,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=owLv-_CPNJI","annotations":[{"segment":[8.701283931357255,50.57082839313572],"label":"Assembling bicycle"}]},"a7oaRcmRZ5k":{"duration":129.196,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=a7oaRcmRZ5k","annotations":[{"segment":[20.405599527626556,123.03731158370132],"label":"Wakeboarding"}]},"rAL6jEy54Tk":{"duration":32.09,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=rAL6jEy54Tk","annotations":[{"segment":[3.1539289984399375,12.86602781903276],"label":"Throwing darts"}]},"nsDyItaRUXI":{"duration":82.686,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=nsDyItaRUXI","annotations":[{"segment":[0.2,72.91309408756997],"label":"Knitting"}]},"FgKnVoyAqaQ":{"duration":218.872,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=FgKnVoyAqaQ","annotations":[]},"wv2baWJtSoc":{"duration":34.459,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wv2baWJtSoc","annotations":[{"segment":[17.603007800312014,32.48783931357254],"label":"Using the rowing machine"}]},"tMM166j4YEw":{"duration":42.679,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=tMM166j4YEw","annotations":[{"segment":[8.455758271450858,18.77577820904836],"label":"Calf roping"}]},"D2ggFcgEbFo":{"duration":176.194,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=D2ggFcgEbFo","annotations":[{"segment":[42.88004708892356,173.99403722620906],"label":"Snow tubing"}]},"44M2Ov9JzUE":{"duration":55.821,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=44M2Ov9JzUE","annotations":[{"segment":[0.2,54.93046957878315],"label":"Playing beach volleyball"}]},"P14X14uarj0":{"duration":69.52,"subset":"training","resolution":"576x360","url":"https://www.youtube.com/watch?v=P14X14uarj0","annotations":[{"segment":[1.6976784,62.45331669266771],"label":"Layup drill in basketball"}]},"OLRDf3vO8lw":{"duration":39.962,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OLRDf3vO8lw","annotations":[{"segment":[13.403424009360375,31.35777803120125],"label":"Building sandcastles"}]},"8SCg3toperM":{"duration":88.376,"subset":"training","resolution":"400x226","url":"https://www.youtube.com/watch?v=8SCg3toperM","annotations":[{"segment":[4.183125443610075,88.25843852772222],"label":"Hurling"}]},"6EWzgWd72Cs":{"duration":237.935,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=6EWzgWd72Cs","annotations":[{"segment":[35.20045950155763,192.69239875389408],"label":"Grooming dog"}]},"UgO10QT_eSA":{"duration":57.1,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=UgO10QT_eSA","annotations":[]},"C10_qXWxpsk":{"duration":73.53999999999999,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=C10_qXWxpsk","annotations":[{"segment":[23.288817571750492,59.08246822389903],"label":"Tumbling"}]},"waF8oGaQqvI":{"duration":134.82,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=waF8oGaQqvI","annotations":[{"segment":[85.59999745085804,96.32628705772231],"label":"Getting a tattoo"}]},"kH8o0M69KGc":{"duration":32.56,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=kH8o0M69KGc","annotations":[{"segment":[0.5819370655226209,31.85472893447738],"label":"Clean and jerk"}]},"inbkDgHuRrY":{"duration":102.423,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=inbkDgHuRrY","annotations":[{"segment":[33.381636505460214,97.1291856474259],"label":"Roof shingle removal"}]},"5Fq-ln3yC38":{"duration":127.71,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5Fq-ln3yC38","annotations":[{"segment":[101.01223627581903,120.13881355881435],"label":"Polishing forniture"}]},"LYOQ0gVySxE":{"duration":49.6,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=LYOQ0gVySxE","annotations":[{"segment":[27.390996861154445,45.419534343213726],"label":"Drinking coffee"}]},"e5rZPT7BJas":{"duration":33.62,"subset":"training","resolution":"1072x720","url":"https://www.youtube.com/watch?v=e5rZPT7BJas","annotations":[{"segment":[17.5609733900156,25.62174806084243],"label":"Cleaning shoes"}]},"WxIo1WvHOz4":{"duration":66.53,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=WxIo1WvHOz4","annotations":[]},"JiNcVb97CnA":{"duration":224.42000000000002,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=JiNcVb97CnA","annotations":[]},"i-159wZdZSE":{"duration":215.157,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=i-159wZdZSE","annotations":[]},"1bcm3LHTI_w":{"duration":129.545,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1bcm3LHTI_w","annotations":[{"segment":[58.97541337330947,60.99323466589712],"label":"Playing congas"},{"segment":[69.06451983624773,73.10016242142302],"label":"Playing congas"},{"segment":[81.17144759177361,124.35282325314927],"label":"Playing congas"}]},"jGLJuL49zYE":{"duration":86.15,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=jGLJuL49zYE","annotations":[{"segment":[0.6719627418096724,80.36674392043682],"label":"Breakdancing"}]},"zAr9k1-umvY":{"duration":211.813,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=zAr9k1-umvY","annotations":[{"segment":[0.01,211.761],"label":"Laying tile"}]},"UU3kcPSassU":{"duration":205.47,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UU3kcPSassU","annotations":[]},"IvKaW2Nsr3A":{"duration":210.095,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IvKaW2Nsr3A","annotations":[]},"Bg9hw5K-m6Q":{"duration":59.24,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Bg9hw5K-m6Q","annotations":[{"segment":[2.33322726,50.83035824180968],"label":"Brushing hair"}]},"bXdq2zI1Ms0":{"duration":73.097,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=bXdq2zI1Ms0","annotations":[{"segment":[36.719261075204905,72.07010248301088],"label":"Capoeira"}]},"GZvOCDorlQw":{"duration":94.97,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=GZvOCDorlQw","annotations":[]},"p9iE8iemCMs":{"duration":134.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p9iE8iemCMs","annotations":[{"segment":[11.337688560603649,90.49155128926245],"label":"Playing flauta"}]},"j7rvKBvvvRg":{"duration":163.84,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j7rvKBvvvRg","annotations":[{"segment":[27.09366598127925,61.34414939157567],"label":"Washing dishes"}]},"MdrK2uQ-GvA":{"duration":70.71000000000001,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MdrK2uQ-GvA","annotations":[{"segment":[9.26551667780997,22.2813615347335],"label":"Plataform diving"},{"segment":[61.54950351201248,68.05742592636506],"label":"Plataform diving"}]},"14dA-qLmNZo":{"duration":65.25,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=14dA-qLmNZo","annotations":[{"segment":[25.142261399375975,29.9264164024961],"label":"Sailing"}]},"VvlJjaLwGqY":{"duration":135.7,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=VvlJjaLwGqY","annotations":[{"segment":[32.812947456673776,130.8283969562864],"label":"Preparing salad"}]},"DjY-7jEpxZ0":{"duration":24.056,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=DjY-7jEpxZ0","annotations":[{"segment":[0.01,24.055873015873015],"label":"Hitting a pinata"}]},"YxUNzzrgJnI":{"duration":168.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YxUNzzrgJnI","annotations":[{"segment":[29.523801599683033,150.51866611856474],"label":"Bungee jumping"}]},"3Rzyta8op6s":{"duration":113.43,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3Rzyta8op6s","annotations":[]},"Ssoqzp-ZL8A":{"duration":46.255,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ssoqzp-ZL8A","annotations":[]},"q5thIuCSTkk":{"duration":74.026,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=q5thIuCSTkk","annotations":[{"segment":[7.7374250126467645,41.34325580343214],"label":"Skiing"}]},"ufxyjYMKe7c":{"duration":125.643,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ufxyjYMKe7c","annotations":[{"segment":[27.539572592427504,51.84496050318203],"label":"Rock-paper-scissors"}]},"z8x3wUZYOaM":{"duration":41.66,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=z8x3wUZYOaM","annotations":[]},"sjpWwjzCbrw":{"duration":161.43,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=sjpWwjzCbrw","annotations":[{"segment":[3.364097000000001,132.71613217725988],"label":"Washing hands"}]},"8vc8xXSowoY":{"duration":73.52,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8vc8xXSowoY","annotations":[]},"3X2CY79a0X8":{"duration":82.129,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=3X2CY79a0X8","annotations":[{"segment":[0.19999999999999998,79.50458255451714],"label":"Cutting the grass"}]},"aJsDVKt5Igs":{"duration":42.61,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=aJsDVKt5Igs","annotations":[{"segment":[11.167312511700468,34.36607481279251],"label":"Playing kickball"}]},"zPQodcasmSA":{"duration":67.83,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zPQodcasmSA","annotations":[]},"eW5W8dVB_I4":{"duration":122.09,"subset":"training","resolution":"1920x1072","url":"https://www.youtube.com/watch?v=eW5W8dVB_I4","annotations":[{"segment":[96.66293230967239,102.75793493806552],"label":"Layup drill in basketball"}]},"yc9Bc8G7Y_Q":{"duration":7.18,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=yc9Bc8G7Y_Q","annotations":[{"segment":[2.5073200624024965,6.111592652106085],"label":"Doing motocross"}]},"LzPppxSWYNY":{"duration":92.58,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=LzPppxSWYNY","annotations":[]},"FKphYO14qhw":{"duration":230.76,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=FKphYO14qhw","annotations":[{"segment":[13.316594383775351,230.501],"label":"Playing pool"}]},"rlH9RkelqHk":{"duration":18.019,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=rlH9RkelqHk","annotations":[{"segment":[2.1644907815912635,13.436709007800312],"label":"Putting on shoes"}]},"mAMNYRLAZ_M":{"duration":22.895,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mAMNYRLAZ_M","annotations":[]},"rO9SwC42Goo":{"duration":22.152,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rO9SwC42Goo","annotations":[{"segment":[4.065188797330752,19.843633937775774],"label":"Camel ride"}]},"HxLGLfEIxd0":{"duration":156.224,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HxLGLfEIxd0","annotations":[]},"dHKeNK1uWUg":{"duration":79.18,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=dHKeNK1uWUg","annotations":[]},"hM3bDhEIG7w":{"duration":181.91,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=hM3bDhEIG7w","annotations":[]},"zKHMKAOb1iw":{"duration":30.094,"subset":"validation","resolution":"352x240","url":"https://www.youtube.com/watch?v=zKHMKAOb1iw","annotations":[{"segment":[7.7849079563182535,21.291254290171608],"label":"Curling"}]},"CDncYtx6Lwc":{"duration":90.23,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=CDncYtx6Lwc","annotations":[]},"-HHybw--2XA":{"duration":135.257,"subset":"training","resolution":"656x480","url":"https://www.youtube.com/watch?v=-HHybw--2XA","annotations":[{"segment":[13.709929797191887,123.61857410296412],"label":"Disc dog"}]},"zBtmE0BAjGE":{"duration":65.411,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zBtmE0BAjGE","annotations":[{"segment":[16.122962845553822,45.00143427145086],"label":"Cleaning sink"}]},"zGTqXydTuQs":{"duration":158.895,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zGTqXydTuQs","annotations":[{"segment":[48.83330340560562,54.53465354940728],"label":"Scuba diving"},{"segment":[68.41620172562004,79.07524764664056],"label":"Scuba diving"},{"segment":[84.77659779044224,103.36795652574102],"label":"Scuba diving"}]},"76hO-Xc1VzA":{"duration":190.474,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=76hO-Xc1VzA","annotations":[]},"ZbAfEWdGBzQ":{"duration":12.795,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZbAfEWdGBzQ","annotations":[{"segment":[3.075779251170047,10.760250390015601],"label":"Doing a powerbomb"}]},"LublNVXGH5I":{"duration":217.13,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LublNVXGH5I","annotations":[{"segment":[97.53944461778471,212.3514992199688],"label":"Cleaning windows"}]},"5fW_2c_kKfc":{"duration":200.18,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=5fW_2c_kKfc","annotations":[{"segment":[6.870425957174342,197.99318338845555],"label":"Volleyball"}]},"1d6FGbM4FRM":{"duration":105.489,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1d6FGbM4FRM","annotations":[{"segment":[0.01,105.441],"label":"Laying tile"}]},"uJ_QCxMDfag":{"duration":29.63,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=uJ_QCxMDfag","annotations":[{"segment":[0.2311128003120125,29.351326760553416],"label":"Mowing the lawn"}]},"2cnNnskrZkU":{"duration":127.826,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=2cnNnskrZkU","annotations":[]},"QE80ROTC_fI":{"duration":146.054,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QE80ROTC_fI","annotations":[{"segment":[9.569809029641187,142.86357765678628],"label":"Disc dog"}]},"jsqD1NB63hk":{"duration":74.56,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jsqD1NB63hk","annotations":[{"segment":[57.77993291731669,69.17315912636506],"label":"Swimming"}]},"TcGhRsPTTRE":{"duration":122.207,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TcGhRsPTTRE","annotations":[{"segment":[12.004497659906397,54.49660842433697],"label":"Capoeira"},{"segment":[70.69315288611546,76.8032496099844],"label":"Capoeira"},{"segment":[104.43717316692668,115.49074258970359],"label":"Capoeira"}]},"wH-uaN8gL_k":{"duration":60.05,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=wH-uaN8gL_k","annotations":[{"segment":[17.679944239999998,43.851593799999996],"label":"Dodgeball"}]},"4fQUWOuFjwQ":{"duration":122.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4fQUWOuFjwQ","annotations":[{"segment":[16.583394383775353,27.638990639625586],"label":"Playing polo"},{"segment":[113.22455475819032,118.37112542901717],"label":"Playing polo"}]},"zBmVL3I3nFU":{"duration":129.103,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zBmVL3I3nFU","annotations":[{"segment":[68.68002479563182,101.10666406864276],"label":"Braiding hair"}]},"y_bXP4NtAw0":{"duration":197.068,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=y_bXP4NtAw0","annotations":[{"segment":[5.224675507020281,69.15713728549142],"label":"Welding"},{"segment":[108.1924992199688,148.14995631825272],"label":"Welding"},{"segment":[160.75192355694227,189.6442386895476],"label":"Welding"}]},"jGoW5WVAtX4":{"duration":97.153,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jGoW5WVAtX4","annotations":[{"segment":[30.312585564647076,39.254798306217964],"label":"Beer pong"},{"segment":[47.74232226431914,51.07670576443058],"label":"Beer pong"},{"segment":[91.99869554602184,96.951835],"label":"Beer pong"}]},"WE9Md637nbc":{"duration":82.11,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WE9Md637nbc","annotations":[{"segment":[14.288872854914196,69.00948907956318],"label":"Cleaning shoes"}]},"_zkcTAj5Z8Q":{"duration":76.696,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_zkcTAj5Z8Q","annotations":[{"segment":[0.5982489001560063,76.695509],"label":"Swimming"}]},"I5QbY8vlR54":{"duration":51.015,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=I5QbY8vlR54","annotations":[{"segment":[2.7854888689547583,51.014239],"label":"Surfing"}]},"-79MZQX4CEA":{"duration":65.23,"subset":"validation","resolution":"490x360","url":"https://www.youtube.com/watch?v=-79MZQX4CEA","annotations":[{"segment":[24.624551490903173,47.5192791167429],"label":"Playing field hockey"}]},"2NMTArm9IkA":{"duration":13.282,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2NMTArm9IkA","annotations":[{"segment":[8.453946496099844,11.562015060842434],"label":"Cleaning sink"}]},"8qODQbBVsus":{"duration":140.5,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=8qODQbBVsus","annotations":[{"segment":[35.000702329999996,133.68706708268328],"label":"Preparing salad"}]},"2-xPjKzp-Kk":{"duration":220.799,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2-xPjKzp-Kk","annotations":[{"segment":[4.1324368174726995,216.62818876755068],"label":"Playing blackjack"}]},"CYG4TdmZn9c":{"duration":22.15,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=CYG4TdmZn9c","annotations":[]},"NeUIHk1uCgA":{"duration":99.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NeUIHk1uCgA","annotations":[{"segment":[23.63982345,93.2086887675507],"label":"Using the balance beam"}]},"31PcR8hZixQ":{"duration":102.238,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=31PcR8hZixQ","annotations":[]},"KPfKLVV-HCQ":{"duration":135.47,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=KPfKLVV-HCQ","annotations":[{"segment":[6.34002561622465,134.61987725117007],"label":"Doing step aerobics"}]},"ddepLUaQTsg":{"duration":12.05,"subset":"testing","resolution":"256x192","url":"https://www.youtube.com/watch?v=ddepLUaQTsg","annotations":[]},"9ukVV07rszg":{"duration":194.212,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=9ukVV07rszg","annotations":[{"segment":[51.79646021840875,59.375157566302654],"label":"Doing fencing"},{"segment":[69.97786427457099,79.06589859594385],"label":"Doing fencing"},{"segment":[85.12458814352574,88.15393291731671],"label":"Doing fencing"},{"segment":[95.12142589703589,111.78282215288613],"label":"Doing fencing"},{"segment":[116.02390483619347,118.75031513260531],"label":"Doing fencing"},{"segment":[125.11193915756631,132.0794321372855],"label":"Doing fencing"},{"segment":[135.41171138845553,137.8351872074883],"label":"Doing fencing"},{"segment":[143.8938767550702,150.8613697347894],"label":"Doing fencing"},{"segment":[153.58778003120125,171.46091419656787],"label":"Doing fencing"},{"segment":[180.8518829953198,186.60763806552262],"label":"Doing fencing"}]},"yEtwO77xQC4":{"duration":203.25,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=yEtwO77xQC4","annotations":[]},"m7vcstRI23E":{"duration":159.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m7vcstRI23E","annotations":[{"segment":[31.952817757009342,137.47142523364485],"label":"Baking cookies"}]},"C0OxaqCr_wk":{"duration":25.54,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=C0OxaqCr_wk","annotations":[{"segment":[0.9347574102964118,24.204250390015602],"label":"Smoking a cigarette"}]},"747hJQNJpeg":{"duration":183.345,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=747hJQNJpeg","annotations":[{"segment":[0.2,182.749015600624],"label":"Playing drums"}]},"h4phcAYe0aE":{"duration":29.699,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=h4phcAYe0aE","annotations":[{"segment":[0.2,28.26429087107325],"label":"Raking leaves"}]},"F6hVDkYcjv0":{"duration":90.49,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=F6hVDkYcjv0","annotations":[]},"9Hw3xvJJTy0":{"duration":38.104,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9Hw3xvJJTy0","annotations":[{"segment":[0.01,36.24687766055526],"label":"Doing a powerbomb"}]},"_AuJBnTClQ8":{"duration":143.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_AuJBnTClQ8","annotations":[{"segment":[27.55526209048362,69.04060338000001],"label":"Shaving"},{"segment":[80.13266919,109.98542499000001],"label":"Shaving"}]},"uJLhYVxWW5M":{"duration":181.163,"subset":"testing","resolution":"1274x720","url":"https://www.youtube.com/watch?v=uJLhYVxWW5M","annotations":[]},"VaT3qsoHPQ8":{"duration":159.197,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VaT3qsoHPQ8","annotations":[{"segment":[10.675605304212167,35.25432449297972],"label":"Baking cookies"},{"segment":[45.681659906396256,85.40484243369734],"label":"Baking cookies"},{"segment":[91.36331981279251,107.5143728549142],"label":"Baking cookies"},{"segment":[112.48039469578782,127.13015912636506],"label":"Baking cookies"}]},"A9uBPwP7DQc":{"duration":161.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A9uBPwP7DQc","annotations":[{"segment":[127.40728081123244,150.77784711388455],"label":"Playing flauta"}]},"Y3j1uDKRpPU":{"duration":12.98,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Y3j1uDKRpPU","annotations":[{"segment":[0.01,0.02],"label":"Polishing shoes"}]},"moxpALb09zU":{"duration":168.48,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=moxpALb09zU","annotations":[{"segment":[91.98338533541342,164.51885491419657],"label":"Shaving legs"}]},"-FpAgvltiC4":{"duration":147.215,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-FpAgvltiC4","annotations":[]},"8qIl-0XOguM":{"duration":213.369,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8qIl-0XOguM","annotations":[{"segment":[51.25509204368175,213.321],"label":"Snow tubing"}]},"pXeVR0bf4Pk":{"duration":198.461,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=pXeVR0bf4Pk","annotations":[{"segment":[24.459305973478937,177.7169826427457],"label":"Rafting"}]},"zMrUSfQ_mzo":{"duration":22.059,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=zMrUSfQ_mzo","annotations":[{"segment":[0.2,16.690472971918876],"label":"Playing rubik cube"}]},"GiHCOSgpzYA":{"duration":84.87,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GiHCOSgpzYA","annotations":[]},"FOlMVTmAeaQ":{"duration":129.61,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=FOlMVTmAeaQ","annotations":[{"segment":[20.018262918271834,27.499835928131006],"label":"Shot put"},{"segment":[71.98486463540175,78.25320958960808],"label":"Shot put"},{"segment":[102.11336034321373,116.67209686739469],"label":"Shot put"}]},"irsm8ihOS9w":{"duration":44.072,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=irsm8ihOS9w","annotations":[]},"StRa193fqMM":{"duration":102.888,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=StRa193fqMM","annotations":[]},"iFTYM1Ez5ZM":{"duration":156.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iFTYM1Ez5ZM","annotations":[{"segment":[8.69317428479452,14.08049355987845],"label":"Triple jump"},{"segment":[64.28051407770596,74.07564003240401],"label":"Triple jump"},{"segment":[80.93222746645866,86.07466854602185],"label":"Triple jump"},{"segment":[117.66394946333854,129.90785679563183],"label":"Triple jump"}]},"K6QunmuTZOQ":{"duration":39.34,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=K6QunmuTZOQ","annotations":[]},"1GzrcmQ5Pcc":{"duration":160.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1GzrcmQ5Pcc","annotations":[{"segment":[13.247602184087363,50.49086115444618],"label":"Dodgeball"},{"segment":[71.48706084243369,128.22679095163807],"label":"Dodgeball"}]},"wibcfckhRyU":{"duration":56.17,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wibcfckhRyU","annotations":[{"segment":[0.01,55.36922349787204],"label":"Tug of war"}]},"14AnvDNV5BI":{"duration":22.338,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=14AnvDNV5BI","annotations":[{"segment":[4.373429286271451,19.462631524960997],"label":"Using the rowing machine"}]},"3mD9Q3F6Xbg":{"duration":60.721,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3mD9Q3F6Xbg","annotations":[]},"-pK2JheWido":{"duration":108.6,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-pK2JheWido","annotations":[]},"9njq_aC4AS4":{"duration":51.688,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9njq_aC4AS4","annotations":[{"segment":[0.01,51.687619047619044],"label":"Disc dog"}]},"frePM0YGtQE":{"duration":175.172,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=frePM0YGtQE","annotations":[{"segment":[44.25834945397816,112.55827145085804],"label":"Clipping cat claws"}]},"v1Vmf5s42No":{"duration":22.41,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=v1Vmf5s42No","annotations":[{"segment":[0.9418408736349454,19.553670479999997],"label":"Smoking a cigarette"}]},"lAN2pe1lW-o":{"duration":38.78,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=lAN2pe1lW-o","annotations":[{"segment":[1.30329497,8.008085023400936],"label":"Shot put"},{"segment":[14.172799531981278,22.392418876755073],"label":"Shot put"},{"segment":[27.59011934477379,36.23280733229329],"label":"Shot put"}]},"NmmFlMtvE68":{"duration":77.973,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NmmFlMtvE68","annotations":[{"segment":[13.137266246489862,58.38784998439938],"label":"Grooming dog"}]},"xmag8UI8iws":{"duration":104.328,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=xmag8UI8iws","annotations":[{"segment":[0.01,104.326983],"label":"Blowing leaves"}]},"9zm9jW7_ANc":{"duration":210.35,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=9zm9jW7_ANc","annotations":[{"segment":[114.36322129892,195.41835949356343],"label":"Preparing salad"}]},"AtHnNOkaoUk":{"duration":229.089,"subset":"training","resolution":"638x480","url":"https://www.youtube.com/watch?v=AtHnNOkaoUk","annotations":[{"segment":[100.47096884444551,106.88400975598896],"label":"Welding"},{"segment":[144.64969512396715,149.63761583294541],"label":"Welding"},{"segment":[220.1810643583102,228.88807100000002],"label":"Welding"}]},"7wyt_-SoudU":{"duration":111.758,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7wyt_-SoudU","annotations":[{"segment":[0.01,111.721],"label":"Welding"}]},"1RQOgX36Z2E":{"duration":218.663,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1RQOgX36Z2E","annotations":[{"segment":[19.383745723503242,129.90513136155198],"label":"Changing car wheel"},{"segment":[153.70973753573548,179.89480432733734],"label":"Changing car wheel"}]},"cQ3pZhIBPHI":{"duration":55.938,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cQ3pZhIBPHI","annotations":[{"segment":[4.055220748829953,39.37488533541342],"label":"Kneeling"}]},"LB2P_KH0W2I":{"duration":10.1,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=LB2P_KH0W2I","annotations":[{"segment":[0.7294076,10.014123432917316],"label":"Washing dishes"}]},"VQjnNPRfcTE":{"duration":44.86,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VQjnNPRfcTE","annotations":[]},"b2YyPJ02NNQ":{"duration":133.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b2YyPJ02NNQ","annotations":[{"segment":[13.964535101404056,16.882497659906395],"label":"Powerbocking"},{"segment":[33.978507020280816,48.77896099843994],"label":"Powerbocking"},{"segment":[51.28044617784712,124.44888767550702],"label":"Powerbocking"}]},"RINsqxGLQdQ":{"duration":144.36,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=RINsqxGLQdQ","annotations":[{"segment":[8.105391575663027,60.56528705148206],"label":"Windsurfing"},{"segment":[65.06828237129484,110.54853510140406],"label":"Windsurfing"}]},"oIOR6556hvo":{"duration":63.07,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oIOR6556hvo","annotations":[]},"e0yu4WgiZUM":{"duration":232.455,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=e0yu4WgiZUM","annotations":[]},"DKnvOGEDUyQ":{"duration":179.885,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DKnvOGEDUyQ","annotations":[{"segment":[6.8754792858381,171.88698214595252],"label":"Table soccer"}]},"DDh5-FjIegY":{"duration":26.33,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=DDh5-FjIegY","annotations":[{"segment":[0.2875459970779784,26.330996],"label":"Discus throw"}]},"IJV7CwRhFvw":{"duration":168.44,"subset":"training","resolution":"540x360","url":"https://www.youtube.com/watch?v=IJV7CwRhFvw","annotations":[{"segment":[10.77369671113375,164.4959546626763],"label":"Playing racquetball"}]},"iFJaqDgYsp0":{"duration":88.89,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=iFJaqDgYsp0","annotations":[{"segment":[11.925420764430577,81.95260083463339],"label":"Clean and jerk"}]},"TexMXN2yegk":{"duration":143.035,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TexMXN2yegk","annotations":[{"segment":[0.2,21.6367503900156],"label":"Carving jack-o-lanterns"},{"segment":[26.097936037441496,43.942678627145085],"label":"Carving jack-o-lanterns"},{"segment":[55.095642745709824,94.81345553822153],"label":"Carving jack-o-lanterns"}]},"wd7sLvmmVrs":{"duration":95.992,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wd7sLvmmVrs","annotations":[]},"ghFOHoBiyD8":{"duration":214.53,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ghFOHoBiyD8","annotations":[{"segment":[41.49086427457098,46.17531669266771],"label":"Playing ten pins"},{"segment":[151.57549609984397,166.63266458658345],"label":"Playing ten pins"},{"segment":[179.69896567862716,196.43071294851794],"label":"Playing ten pins"}]},"5Z-78AmLjFo":{"duration":95.295,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=5Z-78AmLjFo","annotations":[{"segment":[1.6302335592090647,2.9640628669819407],"label":"Welding"},{"segment":[9.633209405846321,11.263445226457613],"label":"Welding"},{"segment":[17.043372226806742,19.711030842352496],"label":"Welding"},{"segment":[35.27237276636938,72.47138869971756],"label":"Welding"},{"segment":[75.88006353019969,87.43991730313903],"label":"Welding"}]},"yVTcDXAykr0":{"duration":18.41,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=yVTcDXAykr0","annotations":[]},"ooadlJjyibo":{"duration":75.42,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ooadlJjyibo","annotations":[{"segment":[0.8818993759750391,72.1393689547582],"label":"Using the balance beam"}]},"NBawYEfglow":{"duration":85.38,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=NBawYEfglow","annotations":[{"segment":[4.856322152886115,10.57746879875195],"label":"Playing violin"},{"segment":[21.88671216848674,30.93410686427457],"label":"Playing violin"},{"segment":[32.663755850234004,34.39340483619345],"label":"Playing violin"},{"segment":[46.50094773790951,49.42804602184087],"label":"Playing violin"}]},"xpoIzpip_TQ":{"duration":79.46000000000001,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=xpoIzpip_TQ","annotations":[{"segment":[5.13408736349454,74.66076443057722],"label":"Volleyball"}]},"x-kPXpuo16Q":{"duration":7.45,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=x-kPXpuo16Q","annotations":[]},"YmGXXV6ztUo":{"duration":221.91,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YmGXXV6ztUo","annotations":[{"segment":[104.68104524180967,179.42850234009362],"label":"Archery"}]},"pHiulmPx7ek":{"duration":216.64,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pHiulmPx7ek","annotations":[{"segment":[43.001270579999996,119.13629851316502],"label":"Playing field hockey"}]},"jUPbimyUndA":{"duration":19.9,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=jUPbimyUndA","annotations":[{"segment":[0.01,9.064982839313572],"label":"Having an ice cream"}]},"VuGLqAda2hE":{"duration":59.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VuGLqAda2hE","annotations":[{"segment":[24.87215763182527,32.057447614352576],"label":"Spinning"},{"segment":[38.505784778159125,43.66445450920437],"label":"Spinning"},{"segment":[49.92855346833073,57.85079626957879],"label":"Spinning"}]},"BZQb9uWULOc":{"duration":161.983,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=BZQb9uWULOc","annotations":[{"segment":[51.551342184299614,118.01213999063964],"label":"Beer pong"}]},"stdtUoog49I":{"duration":49.087,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=stdtUoog49I","annotations":[]},"idACyRv-Sqk":{"duration":35.434,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=idACyRv-Sqk","annotations":[{"segment":[0.01,34.67959250494699],"label":"Slacklining"}]},"h7cetqsBVn0":{"duration":232.386,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=h7cetqsBVn0","annotations":[{"segment":[28.01361644011643,187.9880120787457],"label":"Camel ride"}]},"Uv_6SJlvCl0":{"duration":47.79,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=Uv_6SJlvCl0","annotations":[{"segment":[5.591263650546022,8.722370990639625],"label":"Drinking coffee"}]},"uug0bhnLgHs":{"duration":184.855,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uug0bhnLgHs","annotations":[{"segment":[39.38562949360772,130.80629685747397],"label":"Making a lemonade"}]},"kkXSGwesZ0U":{"duration":214.99,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=kkXSGwesZ0U","annotations":[{"segment":[116.38496875663026,151.9377257831513],"label":"Bungee jumping"}]},"_kmjpSZb3OI":{"duration":28.375,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=_kmjpSZb3OI","annotations":[]},"Bvry1S-bYXU":{"duration":158.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Bvry1S-bYXU","annotations":[{"segment":[9.51594567,155.7601478775351],"label":"Using the balance beam"}]},"eH8PT9fzbqU":{"duration":26.31,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=eH8PT9fzbqU","annotations":[{"segment":[11.615012268330734,18.674313010920436],"label":"Discus throw"}]},"GvP6gZbHn30":{"duration":111.154,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=GvP6gZbHn30","annotations":[{"segment":[0,111.101],"label":"Playing rubik cube"}]},"ARpoJIBF2NM":{"duration":132.54,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ARpoJIBF2NM","annotations":[]},"P7kSJ8SRNHQ":{"duration":40.891,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=P7kSJ8SRNHQ","annotations":[{"segment":[0.2,40.643],"label":"Swinging at the playground"}]},"oIJBu3NJslI":{"duration":141.294,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oIJBu3NJslI","annotations":[]},"OzjjQ4SZ8PY":{"duration":154.181,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OzjjQ4SZ8PY","annotations":[{"segment":[0.19999999999999998,93.04965210608425],"label":"Scuba diving"},{"segment":[100.9841185647426,154.141],"label":"Scuba diving"}]},"ySOSe7s0jcM":{"duration":73.56,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=ySOSe7s0jcM","annotations":[]},"mW9iDH07j40":{"duration":106.19,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mW9iDH07j40","annotations":[{"segment":[56.81966285959438,59.30448776599064],"label":"Ironing clothes"},{"segment":[70.40337234789392,77.52653707956318],"label":"Ironing clothes"}]},"DvtfiPvPhZ8":{"duration":108.3,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=DvtfiPvPhZ8","annotations":[{"segment":[5.9951248049922,99.55284711388455],"label":"Tango"}]},"3w5xIOwFUXU":{"duration":177.61,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3w5xIOwFUXU","annotations":[{"segment":[56.663727769110764,78.27629875195007],"label":"Drinking beer"}]},"GKhuh6bha6U":{"duration":188.569,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=GKhuh6bha6U","annotations":[{"segment":[13.490181370630161,175.0790930057871],"label":"Capoeira"}]},"LqBp5qgcwLY":{"duration":130.938,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LqBp5qgcwLY","annotations":[{"segment":[6.533840873634945,117.83132137285493],"label":"Hanging wallpaper"}]},"rSGboODhu04":{"duration":238.54,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=rSGboODhu04","annotations":[]},"bNR0MmJ9d8I":{"duration":152.416,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bNR0MmJ9d8I","annotations":[{"segment":[0.01,152.361],"label":"Assembling bicycle"}]},"BmL2XGNrxc4":{"duration":75.19,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BmL2XGNrxc4","annotations":[]},"iZraW641GR0":{"duration":18.67,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=iZraW641GR0","annotations":[]},"I_6Ok72DnJA":{"duration":51.479,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=I_6Ok72DnJA","annotations":[{"segment":[0.2,32.58445549066613],"label":"Hanging wallpaper"}]},"xWOrsBkYXEQ":{"duration":23.27,"subset":"testing","resolution":"720x1280","url":"https://www.youtube.com/watch?v=xWOrsBkYXEQ","annotations":[]},"LNLsmdVMCmY":{"duration":134.792,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=LNLsmdVMCmY","annotations":[{"segment":[13.455076443057722,134.741],"label":"Removing ice from car"}]},"JK1Wn9sORbs":{"duration":85.68,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JK1Wn9sORbs","annotations":[{"segment":[12.417652057722309,22.164841307332296],"label":"Mowing the lawn"}]},"E2NKQZNMAO0":{"duration":197.0,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=E2NKQZNMAO0","annotations":[{"segment":[43.34152998,139.72477145999997],"label":"Tai chi"}]},"aDWrPrNFdR0":{"duration":111.99,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aDWrPrNFdR0","annotations":[{"segment":[25.712309325362828,78.74176627240332],"label":"Beer pong"},{"segment":[91.65025250293291,108.04751879576781],"label":"Beer pong"}]},"p8C7govzEPw":{"duration":163.608,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=p8C7govzEPw","annotations":[{"segment":[42.862539781591266,141.85459594383775],"label":"Grooming dog"}]},"2St4Skz9zvE":{"duration":182.068,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=2St4Skz9zvE","annotations":[]},"ni6VySdH0XY":{"duration":69.451,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ni6VySdH0XY","annotations":[{"segment":[9.094670826833074,69.401],"label":"Sharpening knives"}]},"A1vDgtl1ufE":{"duration":131.703,"subset":"testing","resolution":"360x288","url":"https://www.youtube.com/watch?v=A1vDgtl1ufE","annotations":[]},"CM5mFH3_Qhs":{"duration":38.452,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=CM5mFH3_Qhs","annotations":[]},"xrl3oxTa6sQ":{"duration":104.61,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=xrl3oxTa6sQ","annotations":[{"segment":[30.711874478939162,94.36718446],"label":"Starting a campfire"}]},"mWXonCBxeaw":{"duration":53.244,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=mWXonCBxeaw","annotations":[]},"A3160tXXLGg":{"duration":25.124,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=A3160tXXLGg","annotations":[{"segment":[2.0969321602796085,19.146754398067078],"label":"Playing rubik cube"}]},"dRAn_gsx9Wc":{"duration":192.94,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dRAn_gsx9Wc","annotations":[{"segment":[0.01,186.76413214895945],"label":"Playing lacrosse"}]},"mb5CLhyHMvU":{"duration":136.255,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mb5CLhyHMvU","annotations":[{"segment":[0.01,136.25433106575963],"label":"Skiing"}]},"ekdKb7Gynko":{"duration":13.84,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=ekdKb7Gynko","annotations":[]},"dU7jG8G5li0":{"duration":220.265,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dU7jG8G5li0","annotations":[]},"OlykXdYeQOI":{"duration":129.08,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=OlykXdYeQOI","annotations":[]},"l8NepxsCh34":{"duration":48.67,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=l8NepxsCh34","annotations":[{"segment":[2.581508343326931,47.60604901872075],"label":"Hopscotch"}]},"ExcXXMZYGs0":{"duration":187.687,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ExcXXMZYGs0","annotations":[{"segment":[13.17614686427457,180.0740071450858],"label":"Rafting"}]},"nAh1HyYtA04":{"duration":119.82,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nAh1HyYtA04","annotations":[]},"Wg0xG-eRTho":{"duration":159.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Wg0xG-eRTho","annotations":[{"segment":[36.85301060218409,109.3139976645866],"label":"Shaving legs"}]},"hV9sBYo342M":{"duration":37.31,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=hV9sBYo342M","annotations":[{"segment":[2.33473698,21.368686427457096],"label":"High jump"}]},"wNwwiK6znIE":{"duration":46.348,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wNwwiK6znIE","annotations":[{"segment":[0.01,33.05388143525741],"label":"Waxing skis"}]},"m3IL58elWc0":{"duration":74.14,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=m3IL58elWc0","annotations":[]},"QN-4aLiDhiQ":{"duration":217.5,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QN-4aLiDhiQ","annotations":[]},"dcMFJ-8Eo7g":{"duration":153.136,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=dcMFJ-8Eo7g","annotations":[{"segment":[12.422856705148208,98.6661503697348],"label":"Laying tile"}]},"bH7ORYDcI7A":{"duration":190.381,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bH7ORYDcI7A","annotations":[{"segment":[27.607071762870515,185.25401560062403],"label":"Disc dog"}]},"E2KlBkQJyzU":{"duration":197.742,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=E2KlBkQJyzU","annotations":[{"segment":[27.169300694926132,148.43783652047128],"label":"Playing ice hockey"}]},"hyMqGr0mOfA":{"duration":63.646,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hyMqGr0mOfA","annotations":[]},"HNvolNt5RU0":{"duration":42.89,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HNvolNt5RU0","annotations":[{"segment":[0.01,42.887256235827664],"label":"Playing water polo"}]},"8-QcL1k5n6k":{"duration":60.837,"subset":"validation","resolution":"854x468","url":"https://www.youtube.com/watch?v=8-QcL1k5n6k","annotations":[{"segment":[2.70243135725429,6.212894695787831],"label":"Using the rowing machine"},{"segment":[14.085723088923556,19.018097503900155],"label":"Using the rowing machine"},{"segment":[29.451966458658347,44.059382995319815],"label":"Using the rowing machine"}]},"4-Xe_9Ywvd8":{"duration":171.57,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4-Xe_9Ywvd8","annotations":[{"segment":[18.06728007895826,104.52256104938076],"label":"Cheerleading"}]},"0gA_3BAxtVM":{"duration":175.915,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0gA_3BAxtVM","annotations":[{"segment":[9.316337691878413,172.0782359813084],"label":"BMX"}]},"0zXTrkgUj0U":{"duration":56.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0zXTrkgUj0U","annotations":[{"segment":[13.82528232,48.941431069422784],"label":"Vacuuming floor"},{"segment":[4.9724898,11.09680248],"label":"Vacuuming floor"}]},"55IErOrgQOA":{"duration":70.64,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=55IErOrgQOA","annotations":[{"segment":[12.115616224648985,70.1604321372855],"label":"Grooming horse"}]},"BERvPz1e_AU":{"duration":133.82,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BERvPz1e_AU","annotations":[{"segment":[77.7671684,84.43388152],"label":"Discus throw"},{"segment":[24.36655544,62.098652959999995],"label":"Discus throw"}]},"---9CpRcKoU":{"duration":14.07,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=---9CpRcKoU","annotations":[{"segment":[0.01,12.644405017160688],"label":"Drinking beer"}]},"LHjmL7Pg_80":{"duration":230.969,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=LHjmL7Pg_80","annotations":[{"segment":[16.214661294851794,30.988019363494537],"label":"Futsal"},{"segment":[34.2309516224649,85.75754195943838],"label":"Futsal"},{"segment":[86.4781935725429,94.40536131669266],"label":"Futsal"},{"segment":[99.44992260842433,217.99711296411857],"label":"Futsal"}]},"n_9skH6xGeM":{"duration":197.695,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n_9skH6xGeM","annotations":[{"segment":[0.01,189.47130794642797],"label":"Rope skipping"}]},"HPv-d6qybdE":{"duration":214.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HPv-d6qybdE","annotations":[{"segment":[8.0010365,213.16737042815046],"label":"Pole vault"}]},"nfPuaotli6I":{"duration":130.915,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nfPuaotli6I","annotations":[]},"rZMiqPMOpzA":{"duration":218.15,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=rZMiqPMOpzA","annotations":[{"segment":[2.3819765990639628,216.41958814352574],"label":"Playing accordion"}]},"pHU2Ie4VQBg":{"duration":211.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pHU2Ie4VQBg","annotations":[{"segment":[12.524053042121684,204.02954602184087],"label":"Belly dance"}]},"3HYQV_zu2RA":{"duration":152.207,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3HYQV_zu2RA","annotations":[{"segment":[98.76333229329174,125.82828393135726],"label":"Using the rowing machine"}]},"IiNf2F4P5sE":{"duration":73.863,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=IiNf2F4P5sE","annotations":[{"segment":[27.071193447737908,31.67905616224649],"label":"Surfing"},{"segment":[44.00508892355694,48.843344773790946],"label":"Surfing"},{"segment":[59.44142901716068,67.15959906396256],"label":"Surfing"}]},"GvRPOGpObyU":{"duration":62.65,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=GvRPOGpObyU","annotations":[{"segment":[13.867928237129485,53.24250390015601],"label":"Mixing drinks"}]},"RK1L77bNTxg":{"duration":190.543,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=RK1L77bNTxg","annotations":[{"segment":[37.446374414976596,46.659371294851795],"label":"Curling"},{"segment":[51.414466458658346,60.924656786271456],"label":"Curling"},{"segment":[65.679751950078,71.02923400936038],"label":"Curling"},{"segment":[74.00116848673947,118.58018564742589],"label":"Curling"},{"segment":[148.89391731669267,156.32375351014042],"label":"Curling"}]},"Lo9xfcrfn2A":{"duration":190.125,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Lo9xfcrfn2A","annotations":[]},"bteBkFTUlEI":{"duration":98.453,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=bteBkFTUlEI","annotations":[]},"izdf6Fm48Tc":{"duration":227.79,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=izdf6Fm48Tc","annotations":[{"segment":[19.66940745,184.07695257],"label":"Cleaning shoes"}]},"OoyoTLkSOuY":{"duration":75.58,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=OoyoTLkSOuY","annotations":[{"segment":[35.446936764430575,74.8978464524181],"label":"Shaving legs"}]},"ILwwD00q1ZY":{"duration":46.231,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ILwwD00q1ZY","annotations":[{"segment":[5.717656937294078,34.017897909256696],"label":"Painting furniture"}]},"JQ3mObvro1s":{"duration":151.35,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=JQ3mObvro1s","annotations":[{"segment":[68.56978315132605,87.92510920436817],"label":"Cleaning shoes"}]},"-AUKiMfqga4":{"duration":198.16,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=-AUKiMfqga4","annotations":[{"segment":[30.295773964291904,180.22894103247125],"label":"Making an omelette"}]},"IFmtu3Sd7iI":{"duration":114.219,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IFmtu3Sd7iI","annotations":[{"segment":[1.3161052558043291,111.76221273243983],"label":"Scuba diving"}]},"YrBVLdz09HA":{"duration":186.29,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YrBVLdz09HA","annotations":[]},"9KPRS9y8Fvo":{"duration":86.518,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9KPRS9y8Fvo","annotations":[{"segment":[0.2,78.98260410495145],"label":"Swinging at the playground"}]},"skr8x0kUVwg":{"duration":68.94,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=skr8x0kUVwg","annotations":[]},"V5TmcsuC1vU":{"duration":133.515,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V5TmcsuC1vU","annotations":[]},"aYtnkEWM_Cg":{"duration":95.16,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=aYtnkEWM_Cg","annotations":[{"segment":[8.906899296380018,63.0905366826918],"label":"Volleyball"},{"segment":[77.3415746224649,85.80312885179407],"label":"Volleyball"}]},"gIMaGZrddX4":{"duration":134.885,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=gIMaGZrddX4","annotations":[]},"1VBg21aaiKM":{"duration":97.13,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1VBg21aaiKM","annotations":[{"segment":[17.56124180967239,22.713182527301093],"label":"Shaving"},{"segment":[36.94677691107644,45.123522620904836],"label":"Shaving"},{"segment":[57.54006240249611,66.17107176287051],"label":"Shaving"},{"segment":[73.2878689547582,80.55608736349454],"label":"Shaving"}]},"ut1AoOzqL4k":{"duration":29.75,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ut1AoOzqL4k","annotations":[]},"tHFF0OaIHoE":{"duration":27.84,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=tHFF0OaIHoE","annotations":[{"segment":[0.3908994009360375,22.91886456],"label":"Shaving legs"}]},"jalqZE5ekho":{"duration":95.388,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jalqZE5ekho","annotations":[{"segment":[0,95.387572],"label":"Building sandcastles"}]},"x1Z_3-X49HY":{"duration":91.835,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=x1Z_3-X49HY","annotations":[{"segment":[2.7740491278461206,87.85441517145982],"label":"Clipping cat claws"}]},"Wzg4d-3ym1E":{"duration":205.288,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Wzg4d-3ym1E","annotations":[{"segment":[81.34628524732827,93.51620193787342],"label":"Painting furniture"},{"segment":[107.28742345612193,174.54222544461777],"label":"Painting furniture"}]},"CzdYTTVN5UA":{"duration":150.094,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CzdYTTVN5UA","annotations":[]},"hryx3zm06U8":{"duration":112.478,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hryx3zm06U8","annotations":[{"segment":[7.194346051482058,112.47746031746031],"label":"Laying tile"}]},"Go_bbW1C2Zs":{"duration":62.53,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Go_bbW1C2Zs","annotations":[]},"NNOsdZr802w":{"duration":167.63,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=NNOsdZr802w","annotations":[{"segment":[108.5246705650539,167.62485260770976],"label":"Shaving legs"}]},"tJcbG_HGZE8":{"duration":178.446,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=tJcbG_HGZE8","annotations":[{"segment":[109.36558502340094,159.45848049921997],"label":"Hanging wallpaper"}]},"9FJwDxnlW7Y":{"duration":38.15,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=9FJwDxnlW7Y","annotations":[{"segment":[0.009999999999999998,38.101],"label":"Shoveling snow"}]},"cit8ShHV_bU":{"duration":62.741,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cit8ShHV_bU","annotations":[]},"F4bG73WqAzI":{"duration":53.66,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=F4bG73WqAzI","annotations":[]},"A_ZCUnCD4hk":{"duration":57.911,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=A_ZCUnCD4hk","annotations":[]},"lPYV3qFuXOI":{"duration":145.5,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=lPYV3qFuXOI","annotations":[{"segment":[28.817376429017163,121.1691260873635],"label":"Fixing bicycle"}]},"9D46Mp1AD9g":{"duration":44.03,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9D46Mp1AD9g","annotations":[]},"KbEoaYhMZ6c":{"duration":44.606,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=KbEoaYhMZ6c","annotations":[{"segment":[0.01,3.4097832574102958],"label":"Installing carpet"},{"segment":[6.123692380655226,16.353042152886115],"label":"Installing carpet"},{"segment":[19.5540631700468,43.49213338533541],"label":"Installing carpet"}]},"JspVuT6rsLA":{"duration":181.58,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JspVuT6rsLA","annotations":[]},"kHBTnFweJfw":{"duration":198.25,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kHBTnFweJfw","annotations":[{"segment":[1.9370364716140651,193.7036259575954],"label":"Zumba"}]},"Gz0UlnBx6JU":{"duration":197.091,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=Gz0UlnBx6JU","annotations":[{"segment":[67.01238377535101,92.83366926677067],"label":"Playing congas"},{"segment":[118.34755850234009,162.92001560062403],"label":"Playing congas"}]},"VmwvfQDR4YQ":{"duration":231.225,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=VmwvfQDR4YQ","annotations":[{"segment":[6.493038301123883,207.41650093603744],"label":"Capoeira"}]},"OBua42LRiF8":{"duration":78.136,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OBua42LRiF8","annotations":[{"segment":[5.972889582249956,76.18481474258971],"label":"Hurling"}]},"M-PQajs86Bo":{"duration":48.832,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M-PQajs86Bo","annotations":[{"segment":[0.2,47.27253510140406],"label":"Swinging at the playground"}]},"VfKA9zNDBfM":{"duration":54.15,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=VfKA9zNDBfM","annotations":[]},"DqsaFxxfONY":{"duration":100.36,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DqsaFxxfONY","annotations":[{"segment":[26.772209239389987,89.86694797315702],"label":"Using parallel bars"}]},"kcMJk5dg-Wc":{"duration":14.68,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=kcMJk5dg-Wc","annotations":[{"segment":[0.4633995103315549,10.132448155191367],"label":"Kayaking"}]},"pHZogKGkc9o":{"duration":41.63,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=pHZogKGkc9o","annotations":[{"segment":[6.949721277691109,41.113771670826836],"label":"Playing kickball"}]},"meAHw06Mh-Y":{"duration":117.842,"subset":"training","resolution":"536x360","url":"https://www.youtube.com/watch?v=meAHw06Mh-Y","annotations":[{"segment":[17.14388568693381,117.841268],"label":"Laying tile"}]},"WWyGnNfcj8Q":{"duration":185.737,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=WWyGnNfcj8Q","annotations":[{"segment":[5.795196705827417,83.74059239920616],"label":"Making a cake"}]},"VcEW9F8TyqU":{"duration":82.129,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VcEW9F8TyqU","annotations":[{"segment":[10.121982962558503,58.04124407644306],"label":"Beer pong"}]},"2KYGAq5qQCI":{"duration":77.021,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2KYGAq5qQCI","annotations":[{"segment":[9.382252694401995,28.126733151171287],"label":"Plastering"},{"segment":[44.828545865856746,67.89867565880357],"label":"Plastering"}]},"RfaZnfoyZ-g":{"duration":146.077,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RfaZnfoyZ-g","annotations":[]},"-HZtgP41I_o":{"duration":207.935,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=-HZtgP41I_o","annotations":[{"segment":[27.5661232449298,107.02141965678628],"label":"Putting on shoes"}]},"4x7HYKV8zkY":{"duration":115.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4x7HYKV8zkY","annotations":[{"segment":[30.638689371294852,103.84617724960998],"label":"Tumbling"}]},"1D7iddqIIhM":{"duration":15.88,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1D7iddqIIhM","annotations":[{"segment":[8.127056576140596,15.733783310516094],"label":"Smoking hookah"}]},"8uP35-qttBo":{"duration":165.47,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8uP35-qttBo","annotations":[{"segment":[50.247210259999996,135.40551856],"label":"Archery"}]},"UfrztSg9gf0":{"duration":118.376,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=UfrztSg9gf0","annotations":[{"segment":[17.359252321372853,108.77233635413417],"label":"Disc dog"}]},"BvACPvbUq1k":{"duration":29.05,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BvACPvbUq1k","annotations":[{"segment":[3.1721863160240695,5.483350619656786],"label":"Shoveling snow"},{"segment":[7.703881035881436,18.851850064274572],"label":"Shoveling snow"}]},"WmrGNp40Qzg":{"duration":233.85,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=WmrGNp40Qzg","annotations":[]},"IOmzDJjVUoQ":{"duration":17.717,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=IOmzDJjVUoQ","annotations":[{"segment":[0.2,17.563000000000002],"label":"Using the monkey bar"}]},"hxluAHro9bQ":{"duration":151.208,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=hxluAHro9bQ","annotations":[{"segment":[15.798098283931356,41.02789703588144],"label":"Hula hoop"},{"segment":[58.94812792511701,71.44513104524181],"label":"Hula hoop"},{"segment":[86.77164430577223,140.06909204368174],"label":"Hula hoop"}]},"dfgwl-_IMic":{"duration":31.974,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=dfgwl-_IMic","annotations":[{"segment":[0.2,31.77387755102041],"label":"Powerbocking"},{"segment":[17.258909666146646,23.64370861778471],"label":"Powerbocking"}]},"2-S2fehRKVc":{"duration":4.74,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2-S2fehRKVc","annotations":[{"segment":[1.3781997948517941,4.171548440717629],"label":"Chopping wood"}]},"NNfAlym-xh8":{"duration":138.79,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=NNfAlym-xh8","annotations":[{"segment":[2.055690327613105,28.88785881435258],"label":"Using parallel bars"},{"segment":[30.402578003120126,33.21562792511701],"label":"Using parallel bars"},{"segment":[61.56251560062403,73.68026911076443],"label":"Using parallel bars"},{"segment":[78.22442667706709,89.04384945397815],"label":"Using parallel bars"},{"segment":[94.02078393135727,97.26661076443058],"label":"Using parallel bars"},{"segment":[126.91182917316694,129.07571372854915],"label":"Using parallel bars"},{"segment":[133.40348283931357,136.64930967238692],"label":"Using parallel bars"}]},"NyPzV1CzeDk":{"duration":209.073,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NyPzV1CzeDk","annotations":[]},"YkqO2oi9SIk":{"duration":84.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YkqO2oi9SIk","annotations":[{"segment":[56.595037049999995,75.17980439625585],"label":"Plataform diving"},{"segment":[8.397046215288611,49.350057299999996],"label":"Plataform diving"}]},"XsCND4lDiqA":{"duration":154.413,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=XsCND4lDiqA","annotations":[{"segment":[6.003601354680785,27.616579423046545],"label":"Beer pong"},{"segment":[42.9858082716622,63.15792113547024],"label":"Beer pong"},{"segment":[93.41609043118231,111.90719310085362],"label":"Beer pong"}]},"q_IyXRFSV-A":{"duration":87.772,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=q_IyXRFSV-A","annotations":[]},"QOaPQpXemCA":{"duration":185.67,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QOaPQpXemCA","annotations":[]},"U2td95KJU7Y":{"duration":104.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=U2td95KJU7Y","annotations":[{"segment":[29.940511035407404,37.588358962929945],"label":"Plataform diving"}]},"MPrkTgXNHkw":{"duration":24.43,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=MPrkTgXNHkw","annotations":[]},"iKnRiVXMMGg":{"duration":87.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iKnRiVXMMGg","annotations":[{"segment":[0.01,87.352992],"label":"Removing curlers"}]},"3MqnXsnEAkM":{"duration":154.88,"subset":"training","resolution":"352x262","url":"https://www.youtube.com/watch?v=3MqnXsnEAkM","annotations":[{"segment":[25.36988336676324,38.90048782903697],"label":"Snatch"},{"segment":[63.06228151166863,93.98937742543714],"label":"Snatch"},{"segment":[128.78236032842673,138.20545986465308],"label":"Snatch"},{"segment":[139.65516748561097,154.67709750566894],"label":"Snatch"}]},"5ydZNWU9fO8":{"duration":60.16,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5ydZNWU9fO8","annotations":[]},"It2fslENHXs":{"duration":46.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=It2fslENHXs","annotations":[{"segment":[7.350292647425897,42.209601341653666],"label":"Pole vault"}]},"xa-zOtV3uPU":{"duration":42.957,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xa-zOtV3uPU","annotations":[{"segment":[9.147611384555383,10.487920744929797],"label":"Playing blackjack"},{"segment":[13.838694145865835,16.98842114274571],"label":"Playing blackjack"}]},"aCJHnvM0Mkc":{"duration":34.714,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=aCJHnvM0Mkc","annotations":[{"segment":[19.171132876755067,34.513832199546485],"label":"Mooping floor"}]},"WmabLngcvas":{"duration":126.689,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WmabLngcvas","annotations":[{"segment":[17.53536006989628,109.54675958252663],"label":"Camel ride"}]},"h4s1rAwVm4M":{"duration":88.12,"subset":"testing","resolution":"853x470","url":"https://www.youtube.com/watch?v=h4s1rAwVm4M","annotations":[]},"f4s1ngeK5P4":{"duration":31.394,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=f4s1ngeK5P4","annotations":[{"segment":[0.01,31.361],"label":"Croquet"}]},"aDhbXceBwHw":{"duration":72.307,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aDhbXceBwHw","annotations":[{"segment":[0.01,68.54085491419657],"label":"Sumo"}]},"oyLTgy93soQ":{"duration":42.15,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oyLTgy93soQ","annotations":[{"segment":[6.706858034321373,14.005497659906398],"label":"Shot put"}]},"j5JTGwlcFJw":{"duration":151.975,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=j5JTGwlcFJw","annotations":[]},"u0SpXGr_elM":{"duration":195.93,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=u0SpXGr_elM","annotations":[{"segment":[47.218889,125.83735954000002],"label":"Making a sandwich"}]},"Elq_Pigh6qQ":{"duration":191.82,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=Elq_Pigh6qQ","annotations":[]},"7XChhLM-lYg":{"duration":3.25,"subset":"testing","resolution":"1200x720","url":"https://www.youtube.com/watch?v=7XChhLM-lYg","annotations":[]},"be8r0Ztbv3s":{"duration":76.46000000000001,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=be8r0Ztbv3s","annotations":[]},"niHEMszTMBU":{"duration":100.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=niHEMszTMBU","annotations":[{"segment":[32.53593771294852,84.4048239219969],"label":"Starting a campfire"}]},"QbhMOqg9Tmg":{"duration":226.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QbhMOqg9Tmg","annotations":[{"segment":[142.2899282,175.02069396],"label":"Shaving legs"},{"segment":[47.46097111,118.37687428],"label":"Shaving legs"}]},"nK7XMj4gYJs":{"duration":45.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nK7XMj4gYJs","annotations":[{"segment":[2.4532137285491418,45.58],"label":"Using parallel bars"}]},"QzbZxKJ-YBY":{"duration":160.38,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=QzbZxKJ-YBY","annotations":[{"segment":[3.502424336973479,92.5640717628705],"label":"Belly dance"}]},"01vNlQLepsE":{"duration":11.703,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=01vNlQLepsE","annotations":[{"segment":[0,11.661],"label":"Baton twirling"}]},"-Hbqom0pAeo":{"duration":81.828,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-Hbqom0pAeo","annotations":[]},"xZdCmghMcvU":{"duration":142.53,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=xZdCmghMcvU","annotations":[{"segment":[86.04458928000003,118.360100850234],"label":"Preparing salad"},{"segment":[7.8906733900156,75.13437708000001],"label":"Preparing salad"}]},"DRI64Xoxy5w":{"duration":59.815,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DRI64Xoxy5w","annotations":[{"segment":[14.921934477379095,33.20130421216849],"label":"Hitting a pinata"}]},"Hs1fGDcU6Js":{"duration":180.907,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Hs1fGDcU6Js","annotations":[{"segment":[138.4650639625585,162.15358814352572],"label":"Doing crunches"}]},"lIb8_YksZyg":{"duration":213.438,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lIb8_YksZyg","annotations":[{"segment":[4.9937831513260535,21.306808112324493],"label":"Snowboarding"},{"segment":[24.968915756630267,56.596209048361935],"label":"Snowboarding"},{"segment":[57.92788455538222,133.16755070202808],"label":"Snowboarding"},{"segment":[135.16506396255852,190.42959750390014],"label":"Snowboarding"}]},"pmPEfkb8roQ":{"duration":30.86,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=pmPEfkb8roQ","annotations":[{"segment":[12.16491887675507,23.608597503900157],"label":"Painting fence"}]},"8HhihBmSS9s":{"duration":120.4,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=8HhihBmSS9s","annotations":[{"segment":[4.507785666528702,53.905603595572394],"label":"Playing kickball"}]},"WtWw-GNpr4E":{"duration":151.12,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WtWw-GNpr4E","annotations":[{"segment":[0.01,2.5932450748829954],"label":"Playing badminton"},{"segment":[80.15484776911076,86.7558352324493],"label":"Playing badminton"},{"segment":[126.83325911700469,131.0767510577223],"label":"Playing badminton"}]},"jqW7r60gOzE":{"duration":13.143,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=jqW7r60gOzE","annotations":[{"segment":[0,11.624171606864275],"label":"Hitting a pinata"}]},"w40c5_KzUnY":{"duration":156.71,"subset":"testing","resolution":"640x358","url":"https://www.youtube.com/watch?v=w40c5_KzUnY","annotations":[]},"trV2ycq9rkU":{"duration":6.316,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=trV2ycq9rkU","annotations":[{"segment":[0.01,6.315827],"label":"Fixing the roof"}]},"qsYElirHVUU":{"duration":200.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qsYElirHVUU","annotations":[{"segment":[2.4971775251962463,9.988710100784985],"label":"Skateboarding"},{"segment":[19.040978629621375,44.63704826288289],"label":"Skateboarding"},{"segment":[52.752875219770694,56.18649431691553],"label":"Skateboarding"},{"segment":[59.30796622341085,76.47606170913504],"label":"Skateboarding"},{"segment":[80.2218279969294,85.84047742862096],"label":"Skateboarding"},{"segment":[92.08342124161157,95.20489314810688],"label":"Skateboarding"},{"segment":[97.70207067330313,116.11875492162544],"label":"Skateboarding"},{"segment":[117.99163806552262,194.46769860218407],"label":"Skateboarding"}]},"Qhyk2ZbrOEM":{"duration":219.267,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Qhyk2ZbrOEM","annotations":[]},"16T82DHJClA":{"duration":123.298,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=16T82DHJClA","annotations":[{"segment":[0.2,4.800418194276672],"label":"Arm wrestling"},{"segment":[29.378559348973234,53.95670050366979],"label":"Arm wrestling"},{"segment":[91.78399587456997,100.61676434890965],"label":"Arm wrestling"}]},"O_bDLIcx5k0":{"duration":145.96,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=O_bDLIcx5k0","annotations":[{"segment":[47.590901652392624,72.18332930051896],"label":"Painting"},{"segment":[91.7661883536566,116.58632366519151],"label":"Painting"}]},"3AsQjx1lxLU":{"duration":234.336,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3AsQjx1lxLU","annotations":[{"segment":[36.923422798136414,38.75131479875195],"label":"Running a marathon"},{"segment":[52.277717134165364,106.7489049173167],"label":"Running a marathon"},{"segment":[145.13464127457098,220.4438002230889],"label":"Running a marathon"}]},"bF2qGYgqJuM":{"duration":8.41,"subset":"testing","resolution":"322x240","url":"https://www.youtube.com/watch?v=bF2qGYgqJuM","annotations":[]},"n3v9Znovl98":{"duration":159.31,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n3v9Znovl98","annotations":[{"segment":[13.172405184642757,150.3642460218409],"label":"Snatch"}]},"mNTO-xnsaTc":{"duration":31.046,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mNTO-xnsaTc","annotations":[{"segment":[0.2,29.546017900410423],"label":"Rafting"}]},"Lfh5p8ReOYo":{"duration":186.5,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Lfh5p8ReOYo","annotations":[{"segment":[63.428227750390015,69.24733121372856],"label":"Vacuuming floor"},{"segment":[81.46744848673947,84.95891056474258],"label":"Vacuuming floor"},{"segment":[111.14487614976598,113.4725175351014],"label":"Vacuuming floor"},{"segment":[121.32830721060841,130.05696240561622],"label":"Vacuuming floor"}]},"gXKGS1N3zuM":{"duration":151.28,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gXKGS1N3zuM","annotations":[{"segment":[45.544687987519495,147.48927457098281],"label":"Belly dance"}]},"vWde8sMxe1w":{"duration":128.62,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=vWde8sMxe1w","annotations":[{"segment":[37.32036492355694,40.73136601872075],"label":"High jump"},{"segment":[62.40125532917316,115.97403723556941],"label":"High jump"}]},"5YXp6dVOn90":{"duration":18.11,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=5YXp6dVOn90","annotations":[]},"MsBqlgdzeag":{"duration":56.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MsBqlgdzeag","annotations":[{"segment":[0.2,55.8],"label":"Chopping wood"},{"segment":[27.25741029641186,32.23712948517941],"label":"Chopping wood"},{"segment":[0.2,55.8],"label":"Chopping wood"}]},"TAC-5hXVLPY":{"duration":177.66,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TAC-5hXVLPY","annotations":[{"segment":[26.6091659,135.9558184],"label":"Archery"},{"segment":[160.4082917316693,169.82777691107646],"label":"Archery"}]},"0SS16APU65E":{"duration":77.624,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0SS16APU65E","annotations":[]},"qVy_WDpLHRM":{"duration":176.87,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qVy_WDpLHRM","annotations":[{"segment":[118.47722742,143.57274416],"label":"Vacuuming floor"},{"segment":[21.397248680000004,108.3834848],"label":"Vacuuming floor"},{"segment":[21.108028431341335,82.36269837285491],"label":"Vacuuming floor"}]},"KokLo4Mwg3k":{"duration":227.324,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KokLo4Mwg3k","annotations":[]},"MLfEz9HPElE":{"duration":9.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MLfEz9HPElE","annotations":[{"segment":[3.5475234009360372,8.21039391575663],"label":"Hopscotch"}]},"NbIxjYWSURI":{"duration":231.13,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=NbIxjYWSURI","annotations":[{"segment":[16.219165366614664,36.04258970358814],"label":"High jump"},{"segment":[46.13451482059282,49.37834789391575],"label":"High jump"},{"segment":[64.51623556942278,90.46690015600625],"label":"High jump"},{"segment":[108.48819500780031,189.22359594383775],"label":"High jump"},{"segment":[195.35083619344772,214.09298283931355],"label":"High jump"}]},"TDfWOcKi684":{"duration":222.15,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TDfWOcKi684","annotations":[{"segment":[25.992040497946448,171.89402782641918],"label":"Paintball"}]},"JNSaWuU8ky4":{"duration":137.997,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JNSaWuU8ky4","annotations":[{"segment":[66.26663212303802,80.66902308877012],"label":"Canoeing"}]},"K2dU4-Rg354":{"duration":122.09,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=K2dU4-Rg354","annotations":[{"segment":[15.231326053042121,120.34719500780032],"label":"Pole vault"}]},"8DlvRudzDjo":{"duration":57.818,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=8DlvRudzDjo","annotations":[]},"GHOCVoZsFHE":{"duration":116.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GHOCVoZsFHE","annotations":[{"segment":[13.788723868954758,18.68734945397816],"label":"Tai chi"},{"segment":[26.307433697347893,62.95641029641185],"label":"Tai chi"},{"segment":[75.11225897035881,82.18805148205928],"label":"Tai chi"}]},"dAP7xalPRHo":{"duration":116.52000000000001,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=dAP7xalPRHo","annotations":[{"segment":[11.06330679,73.32640514586583],"label":"Hand washing clothes"}]},"Ao1h62LMRZg":{"duration":208.91,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ao1h62LMRZg","annotations":[]},"4OfhHE72V8c":{"duration":201.55,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=4OfhHE72V8c","annotations":[{"segment":[24.679003900156008,184.07078705148209],"label":"Putting on makeup"}]},"CF6Q5ojrAJo":{"duration":39.15,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=CF6Q5ojrAJo","annotations":[{"segment":[1.357214004314577,39.121],"label":"Chopping wood"}]},"WC3GCmEqCBc":{"duration":106.0,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WC3GCmEqCBc","annotations":[{"segment":[52.52754758190327,92.17097971918876],"label":"Preparing salad"}]},"iMwLP3y0VcQ":{"duration":42.26,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=iMwLP3y0VcQ","annotations":[{"segment":[7.515875492162543,34.81037070054231],"label":"Smoking hookah"}]},"sbnF7VVeopM":{"duration":72.17,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sbnF7VVeopM","annotations":[]},"v5i_NAlJX1Y":{"duration":45.163,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=v5i_NAlJX1Y","annotations":[{"segment":[12.383425897035881,34.210305772230896],"label":"Using the rowing machine"}]},"mUAqa5nNZnM":{"duration":80.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mUAqa5nNZnM","annotations":[]},"htzAYwoAZkg":{"duration":165.93,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=htzAYwoAZkg","annotations":[]},"uv7COBs24D8":{"duration":153.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uv7COBs24D8","annotations":[{"segment":[61.30421898000001,87.84942483556944],"label":"Layup drill in basketball"},{"segment":[111.91234469,121.54818829000001],"label":"Layup drill in basketball"},{"segment":[127.34576025210609,141.7080640399376],"label":"Layup drill in basketball"}]},"h0R04AFnor4":{"duration":53.34,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=h0R04AFnor4","annotations":[{"segment":[0,52.552624024961],"label":"Shaving legs"}]},"x4iCAIPRDVQ":{"duration":23.383,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x4iCAIPRDVQ","annotations":[{"segment":[0.01,23.341],"label":"Using the rowing machine"}]},"vbyMT7bdUX4":{"duration":108.46000000000001,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=vbyMT7bdUX4","annotations":[{"segment":[23.581373199999998,96.44685177847114],"label":"Wrapping presents"}]},"2OAHfFCggh0":{"duration":209.82,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2OAHfFCggh0","annotations":[]},"olFThb2_GHM":{"duration":176.98,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=olFThb2_GHM","annotations":[{"segment":[34.51296691323435,43.900493913634094],"label":"Discus throw"},{"segment":[74.5480085325862,95.03933400000001],"label":"Discus throw"},{"segment":[121.48564353458492,160.96847768332503],"label":"Discus throw"}]},"I71OenniCZI":{"duration":56.448,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I71OenniCZI","annotations":[{"segment":[5.988212393135726,56.447709750566894],"label":"Shuffleboard"}]},"OOcsprnRCQQ":{"duration":183.531,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OOcsprnRCQQ","annotations":[{"segment":[44.08604368174727,66.41533853354134],"label":"Scuba diving"},{"segment":[78.72507800312013,89.60345241809672],"label":"Scuba diving"},{"segment":[127.96403588143525,154.01487987519502],"label":"Scuba diving"}]},"4BofYu8Soz8":{"duration":16.07,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=4BofYu8Soz8","annotations":[{"segment":[0,16.068207],"label":"Playing water polo"}]},"blSOfdlqJtY":{"duration":111.99000000000001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=blSOfdlqJtY","annotations":[{"segment":[7.246832293291733,15.279465678627146],"label":"Dodgeball"},{"segment":[33.44020202808113,105.90852496099845],"label":"Dodgeball"}]},"CRzaKuaCXr8":{"duration":127.52,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=CRzaKuaCXr8","annotations":[{"segment":[0,125.3129953198128],"label":"Cumbia"}]},"BKdKbFPerGo":{"duration":70.937,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=BKdKbFPerGo","annotations":[{"segment":[13.369738840499856,52.92648573208723],"label":"River tubing"}]},"yE-ooj_-CXo":{"duration":25.52,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yE-ooj_-CXo","annotations":[]},"c_3z4UfOBtI":{"duration":95.03999999999999,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c_3z4UfOBtI","annotations":[]},"obt6wudzHxY":{"duration":122.091,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=obt6wudzHxY","annotations":[{"segment":[70.28299825273011,85.90144230889236],"label":"Braiding hair"}]},"ejzgfj6QKB8":{"duration":86.147,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ejzgfj6QKB8","annotations":[{"segment":[11.957517940717628,63.28079719188767],"label":"Hurling"}]},"6YtaXJJKUKM":{"duration":58.747,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6YtaXJJKUKM","annotations":[{"segment":[48.87926705771733,55.36604729020186],"label":"Table soccer"}]},"y6sx0u3MYFo":{"duration":9.41,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=y6sx0u3MYFo","annotations":[{"segment":[0.01,7.716921792511701],"label":"Discus throw"}]},"D4zZErqkD6A":{"duration":135.025,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=D4zZErqkD6A","annotations":[{"segment":[7.370257410296412,127.41904056162247],"label":"Baton twirling"}]},"qhYQd9nwOts":{"duration":133.4,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=qhYQd9nwOts","annotations":[{"segment":[0,132.73684555382215],"label":"Kayaking"}]},"BJNzJR3oQ3o":{"duration":169.32,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=BJNzJR3oQ3o","annotations":[]},"bBtzyRzk0UM":{"duration":214.76,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bBtzyRzk0UM","annotations":[{"segment":[16.81719529389679,26.51825356648246],"label":"Ballet"},{"segment":[44.916812359317355,107.47191225495598],"label":"Ballet"},{"segment":[121.85624003844508,131.89181756180957],"label":"Ballet"},{"segment":[137.91316407582826,155.64268436710552],"label":"Ballet"},{"segment":[158.31883837333604,168.35441589670054],"label":"Ballet"},{"segment":[173.03768540760396,191.43624420043884],"label":"Ballet"}]},"3QDXF-efrLo":{"duration":75.349,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3QDXF-efrLo","annotations":[]},"DLlooZTLPdI":{"duration":173.105,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DLlooZTLPdI","annotations":[{"segment":[17.33008609279966,41.86674191456875],"label":"Hula hoop"},{"segment":[57.23585612017623,65.05523001425723],"label":"Hula hoop"},{"segment":[75.03167256877435,76.91910764665597],"label":"Hula hoop"},{"segment":[82.31177929774631,87.97408453139118],"label":"Hula hoop"},{"segment":[100.91649649400802,123.56571742858743],"label":"Hula hoop"},{"segment":[131.11545774011392,134.62069431332264],"label":"Hula hoop"},{"segment":[138.12593088653136,156.19138091768403],"label":"Hula hoop"}]},"rABAqfLcssA":{"duration":30.7,"subset":"training","resolution":"210x144","url":"https://www.youtube.com/watch?v=rABAqfLcssA","annotations":[{"segment":[0.2,19.96966746177847],"label":"Windsurfing"}]},"BC0cgv8YgTg":{"duration":72.49,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=BC0cgv8YgTg","annotations":[{"segment":[20.738935257410297,26.61590873634945],"label":"Doing karate"},{"segment":[34.41420046801872,43.22966068642745],"label":"Doing karate"}]},"BRuansCVV3U":{"duration":123.35,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=BRuansCVV3U","annotations":[{"segment":[8.274259571743414,123.3441949],"label":"Tai chi"}]},"5sUctxaAFrw":{"duration":60.14,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=5sUctxaAFrw","annotations":[{"segment":[13.798890173998206,47.87484656029104],"label":"Roof shingle removal"}]},"oMzNT4NVXwk":{"duration":20.341,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=oMzNT4NVXwk","annotations":[]},"nwpz-KGn46A":{"duration":71.01,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nwpz-KGn46A","annotations":[{"segment":[21.82262736349454,28.690662371294852],"label":"Spinning"},{"segment":[37.55264302652106,53.17188393135726],"label":"Spinning"},{"segment":[60.372243213728545,70.80662],"label":"Spinning"}]},"7edBQl3VD1o":{"duration":205.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7edBQl3VD1o","annotations":[{"segment":[43.55410591900311,179.3578816199377],"label":"Skiing"}]},"fZzswQaICfM":{"duration":159.57,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fZzswQaICfM","annotations":[{"segment":[26.387145722563595,150.35694258970358],"label":"Hopscotch"}]},"2b_hjshny54":{"duration":165.559,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2b_hjshny54","annotations":[{"segment":[12.909594383775351,165.501],"label":"Swimming"}]},"Z2QA7dUVwMM":{"duration":212.51,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=Z2QA7dUVwMM","annotations":[{"segment":[24.5330231603822,140.89911950219508],"label":"Changing car wheel"},{"segment":[161.78534192252044,212.30902494331067],"label":"Changing car wheel"}]},"XDBugI_CcYs":{"duration":57.679,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=XDBugI_CcYs","annotations":[{"segment":[5.080680967238689,33.94614274570983],"label":"Tug of war"}]},"-t_Zhm1b0FI":{"duration":139.552,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=-t_Zhm1b0FI","annotations":[{"segment":[0.2,139.501],"label":"Playing congas"}]},"bEcSrzeCGyA":{"duration":237.22,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=bEcSrzeCGyA","annotations":[{"segment":[27.7552718435268,32.19611533849109],"label":"Ping-pong"},{"segment":[94.36792426799111,116.57214174281256],"label":"Ping-pong"},{"segment":[132.11509303432138,158.39008352574103],"label":"Ping-pong"}]},"iL5abexk3vQ":{"duration":150.419,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iL5abexk3vQ","annotations":[{"segment":[0.01,145.1389516419657],"label":"Baton twirling"}]},"caPl3Aszru0":{"duration":67.22,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=caPl3Aszru0","annotations":[{"segment":[16.779230298463638,21.393518630541138],"label":"Springboard diving"},{"segment":[31.461056630265208,37.96300833385335],"label":"Springboard diving"}]},"ML0XZMcKk_E":{"duration":125.202,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=ML0XZMcKk_E","annotations":[{"segment":[31.04617628705148,98.62153666146646],"label":"Doing fencing"}]},"PFYk8lhE7-0":{"duration":32.834,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=PFYk8lhE7-0","annotations":[{"segment":[15.878680717628704,29.452391653666144],"label":"Plastering"}]},"DEmArEKL8gc":{"duration":194.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DEmArEKL8gc","annotations":[{"segment":[42.1864448,148.4084224],"label":"Cheerleading"},{"segment":[6.9088256,31.9428608],"label":"Cheerleading"}]},"ZGsYV0KDB-4":{"duration":31.19,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=ZGsYV0KDB-4","annotations":[{"segment":[0.01,31.18433],"label":"Walking the dog"}]},"SI8HO5-e24c":{"duration":187.69,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=SI8HO5-e24c","annotations":[{"segment":[77.88567813188718,187.68691609977324],"label":"Bungee jumping"}]},"_DiTdY1Mtj4":{"duration":146.147,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_DiTdY1Mtj4","annotations":[{"segment":[20.063779596081805,24.395732008872194],"label":"Shuffleboard"},{"segment":[34.42762180691309,37.847584238063405],"label":"Shuffleboard"},{"segment":[47.19548154987424,61.78732125611555],"label":"Shuffleboard"},{"segment":[67.9432536321861,73.41519352202658],"label":"Shuffleboard"},{"segment":[75.23917348530676,82.3070958430174],"label":"Shuffleboard"},{"segment":[88.46302821908795,93.93496810892844],"label":"Shuffleboard"},{"segment":[138.62247573166925,143.41042308424335],"label":"Shuffleboard"}]},"R8WbSI3m1lI":{"duration":99.661,"subset":"validation","resolution":"240x136","url":"https://www.youtube.com/watch?v=R8WbSI3m1lI","annotations":[{"segment":[6.974652473612027,26.81367068120611],"label":"Rollerblading"},{"segment":[92.53041754013695,95.01029479052575],"label":"Rollerblading"}]},"JXyi7hFT26w":{"duration":203.222,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JXyi7hFT26w","annotations":[{"segment":[95.65048520956306,98.81486215660355],"label":"Curling"},{"segment":[118.12279432880247,122.55311831634141],"label":"Curling"}]},"3G9zc_SEOHM":{"duration":135.187,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3G9zc_SEOHM","annotations":[{"segment":[95.74814458346336,114.30725630889236],"label":"Rock-paper-scissors"}]},"8JKK6JayDeA":{"duration":220.66,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8JKK6JayDeA","annotations":[{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[32.350811232449296,46.11711388455538],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[0.2,220.405],"label":"Snatch"},{"segment":[182.4035101404056,201.67633385335412],"label":"Snatch"}]},"BtGrWzfvmP0":{"duration":122.625,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BtGrWzfvmP0","annotations":[{"segment":[60.64481295106819,89.25085707760095],"label":"Putting in contact lenses"}]},"8H3Wy96BwHM":{"duration":54.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8H3Wy96BwHM","annotations":[{"segment":[10.317935871176344,26.18839313572543],"label":"BMX"},{"segment":[27.12673946957878,45.29653666146646],"label":"BMX"}]},"Z98StW03bYc":{"duration":103.26,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Z98StW03bYc","annotations":[]},"NEKcJ0whvkg":{"duration":144.196,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NEKcJ0whvkg","annotations":[{"segment":[8.371612522053214,11.740676035308983],"label":"Hurling"},{"segment":[31.9550571148436,34.65030792544822],"label":"Hurling"},{"segment":[40.93922648352565,45.43131116786668],"label":"Hurling"},{"segment":[71.26079810282758,81.3679886425949],"label":"Hurling"},{"segment":[99.11172314574193,109.66812215394334],"label":"Hurling"}]},"Vft2cnyVuJc":{"duration":233.76,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Vft2cnyVuJc","annotations":[]},"joVOZZ63pe4":{"duration":160.78,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=joVOZZ63pe4","annotations":[]},"USYy2DpAIyk":{"duration":35.156,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=USYy2DpAIyk","annotations":[]},"czDoodPfLK8":{"duration":220.15,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=czDoodPfLK8","annotations":[]},"r3lN8kgtgw4":{"duration":200.551,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=r3lN8kgtgw4","annotations":[{"segment":[23.704282600059805,107.6049839111018],"label":"Rafting"},{"segment":[116.33814241187939,153.1421675222993],"label":"Rafting"}]},"mX3gbTBdbKY":{"duration":219.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mX3gbTBdbKY","annotations":[{"segment":[100.00900139999999,167.0162844],"label":"Paintball"}]},"EqhnCfb5vEc":{"duration":186.16,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EqhnCfb5vEc","annotations":[{"segment":[8.71237331125898,175.40911466458655],"label":"Belly dance"}]},"Krh4m6nxZWE":{"duration":9.172,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Krh4m6nxZWE","annotations":[{"segment":[0.01,7.146364294009647],"label":"Slacklining"}]},"qCtoh8hxjOA":{"duration":25.589,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=qCtoh8hxjOA","annotations":[{"segment":[0.01,14.842859972966952],"label":"Fun sliding down"}]},"JgDfOMDfNZs":{"duration":46.255,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=JgDfOMDfNZs","annotations":[{"segment":[15.658580524180966,34.20353533853354],"label":"Hand car wash"}]},"Flh6nxGkf74":{"duration":35.852,"subset":"validation","resolution":"316x204","url":"https://www.youtube.com/watch?v=Flh6nxGkf74","annotations":[{"segment":[5.537144130663187,19.072385338950973],"label":"Building sandcastles"},{"segment":[27.909443648494243,34.956717996611026],"label":"Building sandcastles"}]},"yST1TsQ6mgY":{"duration":183.554,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yST1TsQ6mgY","annotations":[{"segment":[42.37417784711389,113.0932449297972],"label":"Camel ride"}]},"-GYS5LjMTZo":{"duration":108.21000000000001,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-GYS5LjMTZo","annotations":[]},"OxPzQ4yqfwg":{"duration":51.711,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=OxPzQ4yqfwg","annotations":[{"segment":[13.552918539781592,21.620131956318254],"label":"Fixing the roof"}]},"gjyT6Rt985k":{"duration":223.59,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=gjyT6Rt985k","annotations":[{"segment":[4.185677565878145,98.36342279813641],"label":"Tango"}]},"X0IXZMr95TM":{"duration":98.45,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=X0IXZMr95TM","annotations":[{"segment":[10.51762716,88.6702496099844],"label":"Mixing drinks"}]},"aOJOAHIEIzo":{"duration":30.117,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aOJOAHIEIzo","annotations":[]},"8GeLWWLIar4":{"duration":98.96000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8GeLWWLIar4","annotations":[{"segment":[3.9397170299999997,73.36720968],"label":"Breakdancing"}]},"poI4vU4k1sY":{"duration":231.039,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=poI4vU4k1sY","annotations":[{"segment":[40.36865779719189,201.84328898595945],"label":"Playing pool"}]},"t_D9MYkEPEo":{"duration":182.56,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=t_D9MYkEPEo","annotations":[{"segment":[35.03011312792512,110.50149507020281],"label":"Tango"},{"segment":[11.7894019,14.608051099999999],"label":"Tango"}]},"SF3pw17yBB4":{"duration":15.14,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SF3pw17yBB4","annotations":[{"segment":[0.01,15.139069],"label":"Peeling potatoes"}]},"fErOJ98E15w":{"duration":162.12,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=fErOJ98E15w","annotations":[{"segment":[29.337416160000004,143.92778138],"label":"Playing squash"}]},"sJkabaVH-C0":{"duration":130.38,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sJkabaVH-C0","annotations":[]},"CDWHstJThJA":{"duration":215.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CDWHstJThJA","annotations":[{"segment":[21.708727769110766,24.06471372854914],"label":"Archery"},{"segment":[40.22004602184087,45.26858736349455],"label":"Archery"},{"segment":[50.65369812792512,53.68282293291732],"label":"Archery"},{"segment":[78.58896021840874,82.29122386895477],"label":"Archery"},{"segment":[89.3591817472699,94.40772308892356],"label":"Archery"},{"segment":[119.65042979719189,129.7475124804992],"label":"Archery"},{"segment":[134.79605382215289,137.8251786271451],"label":"Archery"},{"segment":[148.25883073322933,153.307372074883],"label":"Archery"},{"segment":[189.6568697347894,192.0128556942278],"label":"Archery"}]},"QWXzDdMjZOw":{"duration":151.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=QWXzDdMjZOw","annotations":[{"segment":[104.25803432137285,140.98529641185647],"label":"Playing saxophone"}]},"2OPk79DNQdM":{"duration":119.68,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2OPk79DNQdM","annotations":[{"segment":[17.449686638845556,112.8164232425897],"label":"Brushing hair"}]},"wGEaIInAtT4":{"duration":199.228,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wGEaIInAtT4","annotations":[{"segment":[0.19999999999999998,197.64717004680185],"label":"Rope skipping"}]},"qnDHR-jYWf0":{"duration":24.99,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=qnDHR-jYWf0","annotations":[{"segment":[0,21.476682012480502],"label":"Playing water polo"}]},"yKdvz5dgro4":{"duration":10.54,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yKdvz5dgro4","annotations":[{"segment":[0,10.541858],"label":"Washing dishes"}]},"CMGjxw3X1dI":{"duration":38.57,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=CMGjxw3X1dI","annotations":[{"segment":[1.5643945678627145,36.64293430109204],"label":"Windsurfing"}]},"aRdqcOaBvjk":{"duration":160.29,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aRdqcOaBvjk","annotations":[{"segment":[4.252386895475819,151.33494539781591],"label":"Paintball"}]},"wlx5YuEHsO8":{"duration":197.718,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wlx5YuEHsO8","annotations":[{"segment":[21.591659656786273,173.0417295351014],"label":"Table soccer"}]},"ZF4oT2P0a54":{"duration":106.023,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=ZF4oT2P0a54","annotations":[{"segment":[20.013499563111775,100.23289863839452],"label":"Surfing"}]},"xeFS0RuvVGA":{"duration":78.6,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xeFS0RuvVGA","annotations":[{"segment":[0,78.39956916099773],"label":"Playing water polo"}]},"2ptdloHcTcs":{"duration":235.335,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2ptdloHcTcs","annotations":[{"segment":[0.01,225.78837423556942],"label":"Surfing"}]},"V2MlQezL1IE":{"duration":191.12,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=V2MlQezL1IE","annotations":[{"segment":[119.8621303872563,177.70604405672825],"label":"Volleyball"}]},"yE0fDD_7FkM":{"duration":54.59,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=yE0fDD_7FkM","annotations":[{"segment":[8.57887935,45.835804700000004],"label":"Springboard diving"}]},"s9iaX2fDyeg":{"duration":162.354,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=s9iaX2fDyeg","annotations":[]},"woiigfrANUM":{"duration":141.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=woiigfrANUM","annotations":[{"segment":[40.23627958939158,72.51373464461777],"label":"Skateboarding"}]},"b380n1dci9I":{"duration":58.91,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=b380n1dci9I","annotations":[{"segment":[4.211117004680188,48.44435101404056],"label":"BMX"}]},"peiF2E05u8g":{"duration":206.26,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=peiF2E05u8g","annotations":[]},"Q3ZNFGE8PZE":{"duration":73.77,"subset":"testing","resolution":"830x480","url":"https://www.youtube.com/watch?v=Q3ZNFGE8PZE","annotations":[]},"s3JuGKwna6o":{"duration":47.35,"subset":"training","resolution":"426x196","url":"https://www.youtube.com/watch?v=s3JuGKwna6o","annotations":[{"segment":[14.32921105148206,19.647268761310453],"label":"Drinking coffee"}]},"-UQbRLLGjRE":{"duration":189.382,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=-UQbRLLGjRE","annotations":[]},"k74CgBa46bA":{"duration":42.15,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=k74CgBa46bA","annotations":[{"segment":[4.346025109204919,5.7231606232236105],"label":"Drinking coffee"},{"segment":[13.854817944095885,15.625420747834204],"label":"Drinking coffee"},{"segment":[20.478183987709592,22.445520436307724],"label":"Drinking coffee"}]},"Ug3pM64HhK4":{"duration":174.846,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ug3pM64HhK4","annotations":[{"segment":[24.255457900670738,130.03318658322314],"label":"Braiding hair"}]},"JxBFSll-mDE":{"duration":78.391,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=JxBFSll-mDE","annotations":[{"segment":[28.372248174726987,66.40573602964118],"label":"Playing ice hockey"}]},"ucsAN6pGv6w":{"duration":7.99,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ucsAN6pGv6w","annotations":[{"segment":[3.862988829953198,6.791383588143526],"label":"Javelin throw"}]},"oJcsC2yB7ok":{"duration":83.75,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=oJcsC2yB7ok","annotations":[]},"7I6v9c00aRA":{"duration":31.115,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7I6v9c00aRA","annotations":[{"segment":[4.028888756630265,26.163506503900155],"label":"Waterskiing"}]},"9Hxcuf80TK0":{"duration":215.09,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9Hxcuf80TK0","annotations":[{"segment":[0,215.08612244897958],"label":"Playing saxophone"}]},"QNKjKB4k4D0":{"duration":24.614,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QNKjKB4k4D0","annotations":[{"segment":[2.261956329780095,18.325678660436136],"label":"Calf roping"}]},"wP0jr86MNOw":{"duration":128.128,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=wP0jr86MNOw","annotations":[{"segment":[24.37735101404056,61.94244929797191],"label":"Powerbocking"},{"segment":[74.73056786271451,110.29752262090483],"label":"Powerbocking"}]},"q59X9izcjQc":{"duration":224.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=q59X9izcjQc","annotations":[{"segment":[57.4833743457894,211.451017211833],"label":"Windsurfing"}]},"gY5akj1YcMk":{"duration":33.46,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gY5akj1YcMk","annotations":[{"segment":[7.5689441263650545,26.72620270826833],"label":"Kayaking"}]},"sOMA_oI7dgk":{"duration":198.089,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=sOMA_oI7dgk","annotations":[{"segment":[121.72202808112324,178.87757878315134],"label":"Using the rowing machine"}]},"9hYyE1gofFQ":{"duration":167.649,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=9hYyE1gofFQ","annotations":[{"segment":[43.67742296086401,162.41724195163806],"label":"Baton twirling"}]},"74AJ-1e1qGA":{"duration":10.705,"subset":"validation","resolution":"810x480","url":"https://www.youtube.com/watch?v=74AJ-1e1qGA","annotations":[{"segment":[5.232190327613104,7.0151341653666135],"label":"Doing a powerbomb"}]},"6qynXM937V0":{"duration":147.32999999999998,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=6qynXM937V0","annotations":[{"segment":[4.364160686427457,144.4766879875195],"label":"Playing harmonica"}]},"fNpXgb67gR8":{"duration":47.276,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=fNpXgb67gR8","annotations":[]},"-NM-0NZXRNw":{"duration":201.18,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-NM-0NZXRNw","annotations":[{"segment":[4.938895349999999,194.69760530421217],"label":"Paintball"}]},"Oi-atN4-Oqk":{"duration":29.7,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=Oi-atN4-Oqk","annotations":[{"segment":[2.223899299843994,28.540041014664585],"label":"Cumbia"}]},"l2drIA62T8w":{"duration":107.579,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=l2drIA62T8w","annotations":[{"segment":[34.05744617784711,98.31361310452418],"label":"Grooming dog"}]},"_bC2DnG4yqA":{"duration":236.635,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_bC2DnG4yqA","annotations":[]},"mmC3M5dAfNo":{"duration":139.85,"subset":"testing","resolution":"810x480","url":"https://www.youtube.com/watch?v=mmC3M5dAfNo","annotations":[]},"C1TWr5XH8b0":{"duration":92.22999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=C1TWr5XH8b0","annotations":[{"segment":[54.86319123088924,73.55838090327615],"label":"Vacuuming floor"}]},"vX0xxYxGj-4":{"duration":92.787,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vX0xxYxGj-4","annotations":[{"segment":[8.536223088923556,92.741],"label":"Tug of war"}]},"T-CTcQ8onFY":{"duration":86.278,"subset":"testing","resolution":"720x720","url":"https://www.youtube.com/watch?v=T-CTcQ8onFY","annotations":[]},"cCISvaObBxI":{"duration":194.259,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cCISvaObBxI","annotations":[{"segment":[13.334393468255737,157.89133963650548],"label":"Futsal"}]},"WOZbWqJMkRg":{"duration":184.738,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=WOZbWqJMkRg","annotations":[{"segment":[0.2,176.35052106084242],"label":"Drum corps"}]},"Ih8bPM3p0rE":{"duration":27.17,"subset":"validation","resolution":"640x352","url":"https://www.youtube.com/watch?v=Ih8bPM3p0rE","annotations":[{"segment":[3.814448088127606,16.953102613900473],"label":"Javelin throw"}]},"5jaIcPomOuM":{"duration":59.119,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=5jaIcPomOuM","annotations":[{"segment":[6.728413416536662,51.154375975039],"label":"Clipping cat claws"}]},"92kGXXfm6ok":{"duration":14.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=92kGXXfm6ok","annotations":[{"segment":[0,13.02889282371295],"label":"Clean and jerk"}]},"zuBJzdDI9MY":{"duration":75.88,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=zuBJzdDI9MY","annotations":[{"segment":[38.47412444617784,75.05413815039002],"label":"Preparing pasta"}]},"4R0fSNCWUo0":{"duration":155.18,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4R0fSNCWUo0","annotations":[{"segment":[12.588620374414978,142.59033462558503],"label":"Playing saxophone"}]},"FsS8cQbfKTQ":{"duration":74.61,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=FsS8cQbfKTQ","annotations":[{"segment":[0.01,41.376340523400934],"label":"Pole vault"}]},"lKSWdEIuJtM":{"duration":209.444,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lKSWdEIuJtM","annotations":[{"segment":[58.38439096573209,141.88385514018694],"label":"Trimming branches or hedges"}]},"_g2D5oXLCZI":{"duration":54.31,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_g2D5oXLCZI","annotations":[{"segment":[11.768160686427455,31.748634945397814],"label":"Layup drill in basketball"}]},"AO5Kb8aOJoE":{"duration":72.47,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AO5Kb8aOJoE","annotations":[]},"kRVmpJILmjo":{"duration":227.115,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kRVmpJILmjo","annotations":[{"segment":[33.305314541833376,215.0672968798752],"label":"Longboarding"}]},"0JCM9uzZafU":{"duration":228.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0JCM9uzZafU","annotations":[{"segment":[6.758093540068134,215.1919242823713],"label":"Doing nails"}]},"NSegylr0MBg":{"duration":145.938,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NSegylr0MBg","annotations":[{"segment":[13.204914727201333,63.292521953198126],"label":"Blow-drying hair"}]},"YWfLZFXwjTE":{"duration":112.199,"subset":"validation","resolution":"540x360","url":"https://www.youtube.com/watch?v=YWfLZFXwjTE","annotations":[{"segment":[0.01,110.13980535240493],"label":"Gargling mouthwash"}]},"TzhdX5tmEEo":{"duration":205.87,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TzhdX5tmEEo","annotations":[]},"l7qSKJyq6mA":{"duration":163.28,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=l7qSKJyq6mA","annotations":[]},"kyMsxlpBjlY":{"duration":30.35,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kyMsxlpBjlY","annotations":[{"segment":[0.01,30.348479],"label":"Mowing the lawn"}]},"MlbM7Mew0Ys":{"duration":233.547,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=MlbM7Mew0Ys","annotations":[{"segment":[17.12428334412288,217.15048564118564],"label":"Rope skipping"}]},"9lS5qpgrTmI":{"duration":172.664,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9lS5qpgrTmI","annotations":[{"segment":[20.202447074882993,172.6635827664399],"label":"Braiding hair"}]},"nvFtFFJXxB0":{"duration":181.44,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nvFtFFJXxB0","annotations":[{"segment":[14.214009600000002,169.22498148829956],"label":"Playing violin"}]},"0jvbBtMIA8k":{"duration":224.328,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0jvbBtMIA8k","annotations":[{"segment":[81.98602441246011,103.61638119384556],"label":"Beer pong"},{"segment":[117.571450085062,151.76136886854218],"label":"Beer pong"},{"segment":[170.94958859396476,186.64903990984521],"label":"Beer pong"},{"segment":[198.85972511202252,201.30186215245797],"label":"Beer pong"}]},"DmtaWx7QcZ8":{"duration":94.065,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DmtaWx7QcZ8","annotations":[{"segment":[31.013252529536395,80.89778023590148],"label":"Painting fence"}]},"kwUdyaXVXAI":{"duration":218.64,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kwUdyaXVXAI","annotations":[]},"DHWmzuAWxLM":{"duration":134.328,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=DHWmzuAWxLM","annotations":[{"segment":[9.43016321372855,134.32743764172335],"label":"Wakeboarding"}]},"IhBlRs2b6nc":{"duration":199.831,"subset":"testing","resolution":"472x360","url":"https://www.youtube.com/watch?v=IhBlRs2b6nc","annotations":[]},"iIVOAvu3qtM":{"duration":68.87,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=iIVOAvu3qtM","annotations":[{"segment":[4.512568159869252,68.87038548752834],"label":"Polishing forniture"}]},"3DQTz4Ke50Q":{"duration":126.223,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=3DQTz4Ke50Q","annotations":[{"segment":[0.2,126.201],"label":"Blowing leaves"}]},"_6mQ9_DQr0Q":{"duration":170.62,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_6mQ9_DQr0Q","annotations":[{"segment":[23.019120904836193,155.54538923556942],"label":"Snatch"}]},"Y1f_NXMfD98":{"duration":181.348,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Y1f_NXMfD98","annotations":[{"segment":[0.2,99.01984746056507],"label":"Grooming dog"},{"segment":[105.80977985786097,145.98354654186167],"label":"Grooming dog"}]},"6J45AbWiGIE":{"duration":121.069,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6J45AbWiGIE","annotations":[{"segment":[11.51681903276131,82.50573634945397],"label":"Disc dog"}]},"HVhLOYGU9E4":{"duration":85.102,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HVhLOYGU9E4","annotations":[{"segment":[0.01,76.73705818408736],"label":"Rollerblading"}]},"M6i-5nzy5Bk":{"duration":69.2,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=M6i-5nzy5Bk","annotations":[{"segment":[0.2,8.52798997971919],"label":"Pole vault"},{"segment":[15.65263983619345,17.59572616068643],"label":"Pole vault"},{"segment":[22.777289692667708,24.93627449765991],"label":"Pole vault"},{"segment":[36.7027416848674,40.91276205460218],"label":"Pole vault"},{"segment":[49.54870127457098,57.213097332293295],"label":"Pole vault"}]},"Dt2KQcKR4T8":{"duration":5.97,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=Dt2KQcKR4T8","annotations":[{"segment":[0.2,5.598933697347895],"label":"Pole vault"}]},"Jy8JurvYlH4":{"duration":141.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jy8JurvYlH4","annotations":[{"segment":[53.62869189547582,128.07170064],"label":"Brushing teeth"}]},"sF859t5osSg":{"duration":102.00999999999999,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sF859t5osSg","annotations":[{"segment":[29.439895343213724,93.0937231123245],"label":"Drinking coffee"}]},"4-GJMxf6kQM":{"duration":121.35,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4-GJMxf6kQM","annotations":[]},"LGS_yzsScfw":{"duration":166.697,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LGS_yzsScfw","annotations":[{"segment":[16.4288381689806,47.067676832171735],"label":"Changing car wheel"},{"segment":[89.1311672002816,99.51721335596007],"label":"Changing car wheel"},{"segment":[127.29988885596006,147.03337799614698],"label":"Changing car wheel"},{"segment":[155.08256435596007,158.97733194941802],"label":"Changing car wheel"}]},"2UBPORshiXQ":{"duration":111.387,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2UBPORshiXQ","annotations":[]},"zQVUXbyCV1o":{"duration":183.554,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=zQVUXbyCV1o","annotations":[{"segment":[4.581685191887676,164.0816009344774],"label":"Beach soccer"}]},"vuXsKQKbAfE":{"duration":214.971,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vuXsKQKbAfE","annotations":[{"segment":[155.44267184564936,160.4731790251202],"label":"Scuba diving"}]},"mZoC4fb7bIk":{"duration":171.596,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=mZoC4fb7bIk","annotations":[]},"9amq0hE3O9Q":{"duration":21.48,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9amq0hE3O9Q","annotations":[]},"FoT7T-CG8BM":{"duration":181.604,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FoT7T-CG8BM","annotations":[]},"B0GCHDqEsRU":{"duration":120.14,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=B0GCHDqEsRU","annotations":[]},"SBn1i9YqN1k":{"duration":89.049,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SBn1i9YqN1k","annotations":[{"segment":[1.2499048361934477,89.021],"label":"Sharpening knives"}]},"Eigz6Eo9jYQ":{"duration":65.527,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Eigz6Eo9jYQ","annotations":[]},"JBemXLIpgK8":{"duration":88.03,"subset":"testing","resolution":"426x234","url":"https://www.youtube.com/watch?v=JBemXLIpgK8","annotations":[]},"ZreBacTw5Hw":{"duration":218.198,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZreBacTw5Hw","annotations":[{"segment":[27.912993657161252,47.656330634177756],"label":"Beach soccer"},{"segment":[50.719951889232036,61.27242510108567],"label":"Beach soccer"},{"segment":[65.35725344115806,88.50461403490154],"label":"Beach soccer"},{"segment":[94.6318565450101,114.7155958836993],"label":"Beach soccer"},{"segment":[123.90645964886217,137.52255411577008],"label":"Beach soccer"},{"segment":[142.62858954086056,143.30939426420593],"label":"Beach soccer"},{"segment":[147.39422260427833,148.41542968929642],"label":"Beach soccer"},{"segment":[152.1598556676961,154.542672199405],"label":"Beach soccer"},{"segment":[163.39313360289515,166.11635249627673],"label":"Beach soccer"},{"segment":[173.60520445307608,177.34962975507023],"label":"Beach soccer"},{"segment":[179.7324462776911,181.09405571918876],"label":"Beach soccer"},{"segment":[183.47687224180967,199.4757831794072],"label":"Beach soccer"}]},"XD52QWWvQ_A":{"duration":18.228,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=XD52QWWvQ_A","annotations":[]},"IE3pd-4HHJU":{"duration":165.3,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=IE3pd-4HHJU","annotations":[]},"Y9EIH-A_ePo":{"duration":54.13,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=Y9EIH-A_ePo","annotations":[{"segment":[15.895971249999999,35.91735],"label":"Playing kickball"}]},"x164DXmWtRM":{"duration":135.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x164DXmWtRM","annotations":[{"segment":[11.621359797191888,129.73663482683307],"label":"Paintball"}]},"PP5EOYoCSb4":{"duration":158.29,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=PP5EOYoCSb4","annotations":[]},"b9h8ONagDv4":{"duration":190.752,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b9h8ONagDv4","annotations":[]},"x7PDVqDFw6c":{"duration":176.61,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=x7PDVqDFw6c","annotations":[{"segment":[37.460680187207494,102.46597815912637],"label":"Cleaning windows"},{"segment":[112.65748673946959,130.2860421216849],"label":"Cleaning windows"}]},"D5hsFA2ZrIQ":{"duration":13.004,"subset":"training","resolution":"480x480","url":"https://www.youtube.com/watch?v=D5hsFA2ZrIQ","annotations":[{"segment":[0.16958320837525936,12.62587137036903],"label":"Trimming branches or hedges"}]},"7GsxM2QSmhY":{"duration":29.374,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7GsxM2QSmhY","annotations":[]},"_UZpaTEWXuA":{"duration":131.89,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=_UZpaTEWXuA","annotations":[]},"nxNJNn8blDw":{"duration":67.617,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=nxNJNn8blDw","annotations":[{"segment":[7.489453719188768,41.77216440561623],"label":"Installing carpet"}]},"rOeNl9qIoVI":{"duration":16.84,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=rOeNl9qIoVI","annotations":[{"segment":[0.9454614290171607,16.440523737909515],"label":"Kayaking"}]},"uXX2lSVAXq8":{"duration":162.61,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=uXX2lSVAXq8","annotations":[{"segment":[8.575998660000002,67.82022230889235],"label":"Getting a piercing"},{"segment":[68.58034575,139.06314742589703],"label":"Getting a piercing"}]},"jrwW5L2Ixrg":{"duration":9.196,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=jrwW5L2Ixrg","annotations":[]},"uo1ERV8LZeU":{"duration":120.535,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uo1ERV8LZeU","annotations":[{"segment":[30.650799464898597,32.15513318096724],"label":"Making a lemonade"},{"segment":[34.03555032605304,39.48876004680187],"label":"Making a lemonade"},{"segment":[40.42896861934477,56.788597781591264],"label":"Making a lemonade"},{"segment":[58.66901492667707,59.60922349921997],"label":"Making a lemonade"},{"segment":[62.24180750234009,63.55809950390015],"label":"Making a lemonade"},{"segment":[68.07110065210608,85.55898010140406],"label":"Making a lemonade"}]},"NsQVVICY_Ss":{"duration":33.93,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=NsQVVICY_Ss","annotations":[]},"ybkcKusf-Kg":{"duration":62.16,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ybkcKusf-Kg","annotations":[{"segment":[12.703488341653665,59.735487163806546],"label":"Using parallel bars"}]},"O67GDqPk_ug":{"duration":204.312,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O67GDqPk_ug","annotations":[{"segment":[3.627420111694811,192.95326902135213],"label":"Trimming branches or hedges"}]},"xMLTg5Th50g":{"duration":26.123,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=xMLTg5Th50g","annotations":[]},"RA61jh8HwPI":{"duration":104.21000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RA61jh8HwPI","annotations":[{"segment":[0,5.527580764430578],"label":"Putting on makeup"}]},"OdLcbH2H_zI":{"duration":111.53,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OdLcbH2H_zI","annotations":[{"segment":[28.70779682527301,45.23652833073323],"label":"Playing accordion"},{"segment":[54.45782064430577,58.11154024024961],"label":"Playing accordion"},{"segment":[65.24499278471139,103.34806857098283],"label":"Playing accordion"}]},"I4kjOE8HnU0":{"duration":170.77,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=I4kjOE8HnU0","annotations":[{"segment":[6.127592303692148,18.915611024440977],"label":"Triple jump"},{"segment":[23.444700988039525,29.572293291731672],"label":"Triple jump"},{"segment":[83.38853874154967,85.25345813832554],"label":"Triple jump"},{"segment":[96.70939157566302,100.9720644825793],"label":"Triple jump"},{"segment":[106.83323972958918,109.76382735309414],"label":"Triple jump"},{"segment":[116.4242537701508,119.08842433697347],"label":"Triple jump"},{"segment":[151.324887900156,155.32114374258973],"label":"Triple jump"}]},"X9CpU4ucYeg":{"duration":112.873,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=X9CpU4ucYeg","annotations":[{"segment":[67.96984214352574,75.89378747113884],"label":"Making a lemonade"},{"segment":[54.235003838248765,63.496370535101406],"label":"Making a lemonade"},{"segment":[67.96984214352574,75.89378747113884],"label":"Making a lemonade"},{"segment":[78.35901490639625,89.10036301716069],"label":"Making a lemonade"},{"segment":[91.38950277847114,92.79820417004679],"label":"Making a lemonade"}]},"L54gbbqtxOg":{"duration":61.51,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=L54gbbqtxOg","annotations":[{"segment":[7.29287698854893,52.77739925923568],"label":"Painting furniture"}]},"YNnyUVFE4uM":{"duration":60.33,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=YNnyUVFE4uM","annotations":[{"segment":[1.5998946911076444,50.82018430577222],"label":"Washing dishes"}]},"SFfB6qvT5FI":{"duration":31.39,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=SFfB6qvT5FI","annotations":[{"segment":[7.493270953198128,26.985570556942278],"label":"Bungee jumping"},{"segment":[26.299911776911078,30.658742592823714],"label":"Bungee jumping"}]},"Lyaozxv4_qU":{"duration":41.959,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=Lyaozxv4_qU","annotations":[{"segment":[26.444956368174726,40.45292830577223],"label":"Blowing leaves"}]},"SLHPvKh-_x8":{"duration":101.84,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=SLHPvKh-_x8","annotations":[]},"mZ6iJfRokTw":{"duration":132.59,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mZ6iJfRokTw","annotations":[{"segment":[45.712157160686424,118.52066087363494],"label":"Bungee jumping"}]},"ntYIITLp90k":{"duration":146.26,"subset":"training","resolution":"300x240","url":"https://www.youtube.com/watch?v=ntYIITLp90k","annotations":[{"segment":[41.642429982913605,79.29174928627144],"label":"Fixing bicycle"}]},"urnUdUVDoZg":{"duration":88.956,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=urnUdUVDoZg","annotations":[]},"sStBEz5KNs4":{"duration":82.431,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sStBEz5KNs4","annotations":[]},"FsXl6whrCWk":{"duration":35.248,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=FsXl6whrCWk","annotations":[{"segment":[0.009999999999999998,29.83442909623678],"label":"Doing a powerbomb"}]},"ijnNP7P5m28":{"duration":143.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ijnNP7P5m28","annotations":[{"segment":[37.28131931464174,112.30611059190032],"label":"Snow tubing"}]},"ERcMdKNF_pc":{"duration":40.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ERcMdKNF_pc","annotations":[{"segment":[0,37.8299066438848],"label":"Using parallel bars"}]},"I3Rm5Cw33Zw":{"duration":182.347,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I3Rm5Cw33Zw","annotations":[]},"xWxpAaPhknA":{"duration":163.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xWxpAaPhknA","annotations":[{"segment":[8.64851794071763,152.11216848673948],"label":"Plataform diving"}]},"5V47VQHzWHc":{"duration":118.65,"subset":"training","resolution":"300x240","url":"https://www.youtube.com/watch?v=5V47VQHzWHc","annotations":[{"segment":[15.544717628705149,118.621],"label":"Playing accordion"}]},"RZNsL5O_4uc":{"duration":196.906,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=RZNsL5O_4uc","annotations":[]},"i1llwxnI_Vo":{"duration":191.89,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=i1llwxnI_Vo","annotations":[{"segment":[11.372789391575663,53.27253978159126],"label":"Cumbia"},{"segment":[54.30842478,180.76749453978158],"label":"Cumbia"}]},"vdjSbzkRLIs":{"duration":226.648333333333,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vdjSbzkRLIs","annotations":[]},"XuYmybr9uDE":{"duration":72.49,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XuYmybr9uDE","annotations":[{"segment":[33.341229676730975,69.58749867673097],"label":"Mixing drinks"}]},"MdJ4WZvo2tg":{"duration":13.05,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MdJ4WZvo2tg","annotations":[]},"IWHER8iS1B4":{"duration":87.772,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IWHER8iS1B4","annotations":[{"segment":[13.004804709195172,57.63181876390702],"label":"Rope skipping"},{"segment":[62.14927724183798,76.65990003120125],"label":"Rope skipping"}]},"GLTxG31Kw8M":{"duration":100.914,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GLTxG31Kw8M","annotations":[]},"xS6elK6Khs4":{"duration":176.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xS6elK6Khs4","annotations":[{"segment":[46.006347250000005,125.86688775],"label":"Skateboarding"}]},"yz00070X04o":{"duration":228.86,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=yz00070X04o","annotations":[]},"r4oAhRg4H14":{"duration":159.313,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=r4oAhRg4H14","annotations":[{"segment":[9.19291263650546,11.925940717628706],"label":"Futsal"},{"segment":[12.919769110764431,15.652797191887675],"label":"Futsal"},{"segment":[18.882739469578784,20.37348205928237],"label":"Futsal"},{"segment":[21.118853354134163,23.10941185647426],"label":"Futsal"},{"segment":[24.84882995319813,25.34580655226209],"label":"Futsal"},{"segment":[26.588248049922,31.558014040561627],"label":"Futsal"},{"segment":[34.04289703588144,35.782315132605305],"label":"Futsal"},{"segment":[38.26719812792512,39.509639625585024],"label":"Futsal"},{"segment":[41.2490577223089,43.98242901716068],"label":"Futsal"},{"segment":[45.970335413416535,51.18858970358815],"label":"Futsal"},{"segment":[53.67347269890796,149.34146801872078],"label":"Futsal"}]},"39O9xB837Gs":{"duration":178.074,"subset":"testing","resolution":"556x360","url":"https://www.youtube.com/watch?v=39O9xB837Gs","annotations":[]},"zpBZ7HMNO34":{"duration":123.531,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zpBZ7HMNO34","annotations":[{"segment":[1.7307616822429905,71.15353582554516],"label":"Elliptical trainer"},{"segment":[84.4363847352025,97.32303115264797],"label":"Elliptical trainer"},{"segment":[106.17057943925234,123.481],"label":"Elliptical trainer"}]},"tCQiu-qY9XA":{"duration":71.31,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tCQiu-qY9XA","annotations":[{"segment":[28.367620600624026,59.850117188767555],"label":"Tennis serve with ball bouncing"}]},"RZnC-ZFEqv8":{"duration":111.108,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=RZnC-ZFEqv8","annotations":[]},"Hlc6FXX5kGs":{"duration":109.37,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Hlc6FXX5kGs","annotations":[{"segment":[0.6820655226209049,21.65954290171607],"label":"Putting on makeup"},{"segment":[95.33609828393135,109.121],"label":"Putting on makeup"}]},"lQUqzLT7bl0":{"duration":96.711,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lQUqzLT7bl0","annotations":[{"segment":[3.9199157566302656,96.661],"label":"Skiing"}]},"cO-4HzHsAsw":{"duration":99.846,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cO-4HzHsAsw","annotations":[]},"DVsLkoQijV4":{"duration":16.42,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DVsLkoQijV4","annotations":[{"segment":[1.2421225288611544,13.04300448],"label":"Volleyball"}]},"eLQ1c2tMJls":{"duration":72.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=eLQ1c2tMJls","annotations":[{"segment":[10.265103632716738,57.64250439625585],"label":"Polishing shoes"}]},"HFkc98prpGE":{"duration":165.791,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=HFkc98prpGE","annotations":[]},"QaNQrUpmmgo":{"duration":134.56,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QaNQrUpmmgo","annotations":[{"segment":[0.01,134.501],"label":"Using the rowing machine"}]},"tA7cF5wA6ZU":{"duration":76.185,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tA7cF5wA6ZU","annotations":[{"segment":[0.2,1.7827925897035881],"label":"Playing congas"},{"segment":[12.717253806552263,18.77874861154446],"label":"Playing congas"},{"segment":[22.344333790951637,27.336153042121687],"label":"Playing congas"},{"segment":[33.63535352574103,38.151761419656786],"label":"Playing congas"},{"segment":[54.672306084243374,58.237891263650546],"label":"Playing congas"},{"segment":[63.58626903276131,68.22152976599064],"label":"Playing congas"}]},"geuUVSJyovM":{"duration":84.08,"subset":"validation","resolution":"640x352","url":"https://www.youtube.com/watch?v=geuUVSJyovM","annotations":[{"segment":[22.68226375547748,82.63674662152108],"label":"Playing piano"}]},"GNg5kjnJlOE":{"duration":97.849,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=GNg5kjnJlOE","annotations":[{"segment":[27.281855313257182,93.5813361024576],"label":"Powerbocking"}]},"HHG1kCydLYU":{"duration":189.82,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=HHG1kCydLYU","annotations":[{"segment":[21.469857277284287,182.86395495319815],"label":"Preparing salad"}]},"kCD0iQFnHA4":{"duration":135.791,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=kCD0iQFnHA4","annotations":[{"segment":[8.897336007726024,130.28241840093602],"label":"Making a cake"}]},"Z_YXWLkRmjQ":{"duration":15.65,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=Z_YXWLkRmjQ","annotations":[{"segment":[0.48739469578783157,14.134446177847114],"label":"Doing karate"}]},"8nhuvbFSSmw":{"duration":207.564,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8nhuvbFSSmw","annotations":[{"segment":[12.304808954262933,185.54356623088924],"label":"Longboarding"},{"segment":[193.63883526365058,202.05791505772234],"label":"Longboarding"}]},"wMwJObSq21Y":{"duration":132.94,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wMwJObSq21Y","annotations":[{"segment":[0,119.9958288],"label":"Doing step aerobics"}]},"JbAvSsIaI8g":{"duration":78.066,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=JbAvSsIaI8g","annotations":[{"segment":[0.01,70.45349331389171],"label":"Putting in contact lenses"}]},"g1lqpONbOKY":{"duration":19.46,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=g1lqpONbOKY","annotations":[{"segment":[0.75888774,7.938145400645958],"label":"Using the balance beam"}]},"lETAKUG4pQw":{"duration":66.711,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lETAKUG4pQw","annotations":[{"segment":[31.354588923556943,38.73825663026521],"label":"Clipping cat claws"}]},"olBh9KMAHMQ":{"duration":149.21,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=olBh9KMAHMQ","annotations":[{"segment":[47.90576599063963,66.74249921996879],"label":"Shoveling snow"},{"segment":[75.57948517940717,93.48600936037441],"label":"Shoveling snow"}]},"6WQSZekz8vQ":{"duration":56.146,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=6WQSZekz8vQ","annotations":[{"segment":[38.556006252082874,47.72445656818782],"label":"Doing crunches"}]},"GLHsjvmOXRs":{"duration":163.144,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GLHsjvmOXRs","annotations":[{"segment":[31.432430053664728,43.649083029987864],"label":"Canoeing"},{"segment":[68.33690258630753,72.1546066414085],"label":"Canoeing"},{"segment":[126.36600422384241,130.6927354862902],"label":"Canoeing"}]},"VceicZDzH3U":{"duration":195.164,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=VceicZDzH3U","annotations":[{"segment":[7.0027542761310455,195.163717],"label":"Carving jack-o-lanterns"}]},"hhk7A9gJcu8":{"duration":162.935,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=hhk7A9gJcu8","annotations":[{"segment":[23.8936327521128,38.38232495286206],"label":"Making a cake"},{"segment":[42.195138689901334,60.24245704522058],"label":"Making a cake"},{"segment":[71.42671067386912,84.89865254474124],"label":"Making a cake"},{"segment":[94.30359309610479,102.69178245865834],"label":"Making a cake"},{"segment":[126.33122743057721,132.94010451950078],"label":"Making a cake"}]},"ab3xXzyq6L8":{"duration":22.176,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=ab3xXzyq6L8","annotations":[]},"unE-vkRljRs":{"duration":99.89,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=unE-vkRljRs","annotations":[{"segment":[1.8700461651119107,97.70991051014042],"label":"Horseback riding"}]},"qeNMFrM0ZOE":{"duration":43.561,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=qeNMFrM0ZOE","annotations":[]},"6cgnE6x9nvI":{"duration":13.259,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=6cgnE6x9nvI","annotations":[]},"8v2ewQE-QK0":{"duration":217.07999999999998,"subset":"validation","resolution":"322x240","url":"https://www.youtube.com/watch?v=8v2ewQE-QK0","annotations":[{"segment":[24.4332478980305,186.70689275784358],"label":"Playing bagpipes"}]},"pK3OSevBqDw":{"duration":138.879,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=pK3OSevBqDw","annotations":[{"segment":[27.08240472476042,32.06556719411634],"label":"Painting fence"}]},"dZyb8t-4ATQ":{"duration":26.587,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=dZyb8t-4ATQ","annotations":[{"segment":[0.01,26.586847],"label":"Installing carpet"}]},"jsofzMPb0i4":{"duration":139.99,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=jsofzMPb0i4","annotations":[{"segment":[27.18003978159126,128.91416458658344],"label":"Layup drill in basketball"}]},"ye7e0mitDdU":{"duration":43.05,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ye7e0mitDdU","annotations":[{"segment":[1.7441903276131046,42.981],"label":"Using the rowing machine"}]},"Dydb923dXss":{"duration":148.957,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Dydb923dXss","annotations":[{"segment":[0,148.921],"label":"Scuba diving"}]},"HrBa_UraJvc":{"duration":146.1,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HrBa_UraJvc","annotations":[{"segment":[0.4558595943837754,7.9775429017160695],"label":"Smoking hookah"},{"segment":[120.57486271450858,129.6920546021841],"label":"Smoking hookah"}]},"yo73PqGytMQ":{"duration":103.75,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=yo73PqGytMQ","annotations":[]},"tIaNvRDp0eE":{"duration":57.029,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tIaNvRDp0eE","annotations":[{"segment":[18.934404056162247,41.246776911076445],"label":"Snow tubing"}]},"WPM0vuERyfc":{"duration":31.86,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=WPM0vuERyfc","annotations":[{"segment":[23.8063567925117,25.197960112324495],"label":"Washing face"}]},"9riZioI46sM":{"duration":113.221,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=9riZioI46sM","annotations":[{"segment":[7.418503825867321,113.22049886621315],"label":"Putting on shoes"}]},"Q4UF7Z0ozCw":{"duration":160.66,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=Q4UF7Z0ozCw","annotations":[{"segment":[94.9917209150951,158.65371673478938],"label":"Ironing clothes"}]},"F22yx0YBi9Q":{"duration":227.811,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=F22yx0YBi9Q","annotations":[]},"lgdCr24iaTg":{"duration":226.79,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lgdCr24iaTg","annotations":[]},"GwIHO7HpGkY":{"duration":117.076,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=GwIHO7HpGkY","annotations":[{"segment":[0.2,113.05683622174818],"label":"Ice fishing"}]},"Y2g4ipeFSHw":{"duration":103.306,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Y2g4ipeFSHw","annotations":[]},"kdRcWkrZRZo":{"duration":179.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kdRcWkrZRZo","annotations":[{"segment":[26.536133619344778,30.167394009360375],"label":"Applying sunscreen"}]},"2NyOIsFJ3Tw":{"duration":105.001,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2NyOIsFJ3Tw","annotations":[{"segment":[17.19657566302652,104.961],"label":"Fun sliding down"}]},"SMNXIkCGh_0":{"duration":40.125,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=SMNXIkCGh_0","annotations":[{"segment":[0.19999999999999998,38.97667289719626],"label":"River tubing"}]},"Jth2Zlpr1gQ":{"duration":49.09,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=Jth2Zlpr1gQ","annotations":[{"segment":[4.364989127925116,43.03726122620904],"label":"Ballet"}]},"1cLxW-FhgpA":{"duration":181.6,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=1cLxW-FhgpA","annotations":[{"segment":[14.730683307332294,177.33476443057722],"label":"Belly dance"}]},"KZyg_UYyL0s":{"duration":43.631,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=KZyg_UYyL0s","annotations":[{"segment":[0.01,42.43465648982681],"label":"Painting fence"}]},"VqMA_qg5cfU":{"duration":39.8,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=VqMA_qg5cfU","annotations":[]},"zruHn4r6_CY":{"duration":113.94,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=zruHn4r6_CY","annotations":[{"segment":[7.998912732019484,20.619419486983563],"label":"Long jump"},{"segment":[29.684853916605643,110.91825555070203],"label":"Long jump"}]},"ICXWlSG5vVA":{"duration":156.09,"subset":"testing","resolution":"360x480","url":"https://www.youtube.com/watch?v=ICXWlSG5vVA","annotations":[]},"sHRajZw7A8g":{"duration":90.78999999999999,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=sHRajZw7A8g","annotations":[{"segment":[9.68225945475819,81.66796104],"label":"Mixing drinks"}]},"SQM-heGm8iU":{"duration":184.41,"subset":"training","resolution":"318x240","url":"https://www.youtube.com/watch?v=SQM-heGm8iU","annotations":[{"segment":[30.609469599999997,161.46980272],"label":"Playing racquetball"}]},"chsV9w7bzUE":{"duration":238.283,"subset":"testing","resolution":"480x480","url":"https://www.youtube.com/watch?v=chsV9w7bzUE","annotations":[]},"tI87cD7sv-Y":{"duration":32.834,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=tI87cD7sv-Y","annotations":[{"segment":[8.041783460218408,11.934621313572542],"label":"Putting in contact lenses"},{"segment":[16.44211777535101,21.256943541341652],"label":"Putting in contact lenses"}]},"52grVhaCfuU":{"duration":82.92,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=52grVhaCfuU","annotations":[]},"3POAgJRFPBo":{"duration":22.942,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3POAgJRFPBo","annotations":[]},"pRGlbeqRfM0":{"duration":206.496,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pRGlbeqRfM0","annotations":[{"segment":[0.01,0.02],"label":"Gargling mouthwash"},{"segment":[141.09932291169198,141.42146748454974],"label":"Gargling mouthwash"}]},"DWVI_N4Aeo8":{"duration":26.47,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=DWVI_N4Aeo8","annotations":[{"segment":[1.2801765319812792,24.777610296411854],"label":"Springboard diving"}]},"ok3Kpgv0PzE":{"duration":97.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ok3Kpgv0PzE","annotations":[{"segment":[3.890705901716068,59.705871120000005],"label":"Using the balance beam"},{"segment":[65.91921912,95.89445722464899],"label":"Using the balance beam"}]},"-YMpwZkNc2A":{"duration":162.75,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-YMpwZkNc2A","annotations":[{"segment":[29.312554542121685,129.5589531926677],"label":"Shaving"}]},"0JgcRWHCi4c":{"duration":22.779,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0JgcRWHCi4c","annotations":[{"segment":[10.021239107644305,22.778774],"label":"Disc dog"}]},"xaMEGQCxddY":{"duration":46.58,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=xaMEGQCxddY","annotations":[{"segment":[5.788247488299532,44.49715256630265],"label":"Discus throw"}]},"i4SNM6xSLI8":{"duration":41.749,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=i4SNM6xSLI8","annotations":[{"segment":[0.01,38.75341604140356],"label":"Arm wrestling"}]},"nQM5LT1-ZRU":{"duration":217.78,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=nQM5LT1-ZRU","annotations":[{"segment":[64.70214089999999,124.49554313999998],"label":"Fixing bicycle"},{"segment":[31.420176758970353,183.42589675507017],"label":"Fixing bicycle"}]},"KBxE7NIOkf8":{"duration":67.25,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KBxE7NIOkf8","annotations":[{"segment":[33.72439703588144,56.277259750390016],"label":"Bungee jumping"}]},"Mnhg-VEP69U":{"duration":61.324,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Mnhg-VEP69U","annotations":[{"segment":[6.7925068893912215,54.81839957410296],"label":"Changing car wheel"}]},"hf1JuvacaJc":{"duration":170.505,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hf1JuvacaJc","annotations":[{"segment":[0.01,55.59338190398364],"label":"Playing ten pins"},{"segment":[60.6473257134367,77.40513939741264],"label":"Playing ten pins"},{"segment":[86.44903884590757,133.7965121872075],"label":"Playing ten pins"},{"segment":[138.85045598751952,166.51414836817474],"label":"Playing ten pins"}]},"pJRUkudK6iE":{"duration":197.7,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pJRUkudK6iE","annotations":[{"segment":[0.01,192.14320481390683],"label":"Cleaning shoes"}]},"-ixct4B3dtg":{"duration":210.1,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=-ixct4B3dtg","annotations":[]},"oIEDMaMo7UE":{"duration":9.71,"subset":"validation","resolution":"350x240","url":"https://www.youtube.com/watch?v=oIEDMaMo7UE","annotations":[{"segment":[0.5253276131045242,7.159464898595944],"label":"Hammer throw"}]},"xsdrqauYhJs":{"duration":130.59,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=xsdrqauYhJs","annotations":[{"segment":[15.890708068642745,93.51070517316693],"label":"Baking cookies"},{"segment":[104.10451055226208,113.67968079875195],"label":"Baking cookies"}]},"7KKYKIlGaXk":{"duration":195.304,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7KKYKIlGaXk","annotations":[{"segment":[9.721897869146362,13.063801855209425],"label":"Wakeboarding"},{"segment":[21.57046654700631,30.684750145360113],"label":"Wakeboarding"},{"segment":[59.242838753535366,115.14377815677203],"label":"Wakeboarding"},{"segment":[133.0685359002012,151.60091255018725],"label":"Wakeboarding"},{"segment":[160.41138549798373,164.96852726314705],"label":"Wakeboarding"},{"segment":[176.20947695054983,178.33614310762604],"label":"Wakeboarding"}]},"qmlohhdz784":{"duration":180.7,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qmlohhdz784","annotations":[{"segment":[0,180.49768707482994],"label":"Playing racquetball"}]},"7VvcQNSAd24":{"duration":48.97,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7VvcQNSAd24","annotations":[{"segment":[4.889448636505461,41.48391577535101],"label":"Shaving"}]},"tgnyGoKM2tM":{"duration":130.4,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=tgnyGoKM2tM","annotations":[{"segment":[34.27382049,128.57233022382118],"label":"Fixing bicycle"}]},"-76d-7Ju7L0":{"duration":191.89,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-76d-7Ju7L0","annotations":[{"segment":[38.013021840873634,58.96508112324493],"label":"Changing car wheel"},{"segment":[76.32535881435257,109.25002340093603],"label":"Changing car wheel"}]},"z0tiCqKa4cs":{"duration":92.74000000000001,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=z0tiCqKa4cs","annotations":[{"segment":[13.338915359999998,53.43242830499219],"label":"Making a sandwich"}]},"7rd23-3wEyU":{"duration":198.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7rd23-3wEyU","annotations":[{"segment":[139.77863535558538,142.87606683533616],"label":"Smoking hookah"}]},"OEBSls79Ths":{"duration":65.06,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=OEBSls79Ths","annotations":[{"segment":[4.060050608424337,65.06231292517006],"label":"Using uneven bars"}]},"lpZytCK-g1M":{"duration":72.33,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=lpZytCK-g1M","annotations":[{"segment":[0.01,72.33018140589569],"label":"Washing face"}]},"iBEUNOMTr8M":{"duration":182.533,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=iBEUNOMTr8M","annotations":[{"segment":[83.98749438212123,161.32195055657604],"label":"Roof shingle removal"}]},"1wiGNX_S8ZU":{"duration":109.065,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1wiGNX_S8ZU","annotations":[{"segment":[0,109.06412698412699],"label":"Capoeira"}]},"_VPf75tGIHQ":{"duration":60.14,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=_VPf75tGIHQ","annotations":[{"segment":[7.641546801872075,54.99100858034321],"label":"Playing racquetball"}]},"Sx3NHkPp3Jo":{"duration":112.34,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Sx3NHkPp3Jo","annotations":[{"segment":[5.343051190327613,102.91942866614664],"label":"Triple jump"}]},"iUGuDzgow2I":{"duration":48.65,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iUGuDzgow2I","annotations":[{"segment":[4.78910025,36.94830975],"label":"Using parallel bars"}]},"dk4maV6EqBw":{"duration":31.092,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dk4maV6EqBw","annotations":[]},"2DTwoKZsLMo":{"duration":163.632,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=2DTwoKZsLMo","annotations":[{"segment":[7.785875386035849,30.50531503709128],"label":"Putting on shoes"},{"segment":[82.07078410062402,86.41045231123246],"label":"Putting on shoes"}]},"Ufi6HgfXMBY":{"duration":122.8,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=Ufi6HgfXMBY","annotations":[]},"8Yfm6gbKRho":{"duration":92.11,"subset":"training","resolution":"766x480","url":"https://www.youtube.com/watch?v=8Yfm6gbKRho","annotations":[{"segment":[2.69706864,63.76614538],"label":"Cricket"}]},"pzHDfjY-pFY":{"duration":204.08,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=pzHDfjY-pFY","annotations":[{"segment":[0.20000000000000004,203.841],"label":"Waterskiing"}]},"5oPGbuL8G5Y":{"duration":188.221,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5oPGbuL8G5Y","annotations":[{"segment":[0.2,185.8755199525293],"label":"Waterskiing"}]},"8729_qnILMM":{"duration":85.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8729_qnILMM","annotations":[{"segment":[4.672909516380655,79.97350858034322],"label":"Playing flauta"}]},"iPHZeElXYXU":{"duration":176.43,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iPHZeElXYXU","annotations":[{"segment":[26.181481962336104,40.47137468507754],"label":"Ironing clothes"},{"segment":[50.36437733928315,158.91260090626133],"label":"Ironing clothes"}]},"1XtjXqqPvyQ":{"duration":138.67,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=1XtjXqqPvyQ","annotations":[{"segment":[0.01802666475314469,133.2792661405722],"label":"Disc dog"}]},"9hTcvGUnMBs":{"duration":157.29,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9hTcvGUnMBs","annotations":[]},"FNX7CeuIvIQ":{"duration":184.599,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=FNX7CeuIvIQ","annotations":[{"segment":[9.212655226209048,150.29772542901716],"label":"Disc dog"}]},"Q48_MDiak-w":{"duration":13.17,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=Q48_MDiak-w","annotations":[{"segment":[2.0333487217039705,9.632732833122848],"label":"Hammer throw"}]},"YtPZMBePf5c":{"duration":37.547,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=YtPZMBePf5c","annotations":[{"segment":[0.01,32.64125149090491],"label":"Having an ice cream"}]},"AjiCJiOIDtI":{"duration":37.129,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AjiCJiOIDtI","annotations":[{"segment":[0.01,35.82124698534225],"label":"Kneeling"}]},"c1YUEusus3c":{"duration":90.28,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=c1YUEusus3c","annotations":[]},"hlvs-e3bCq0":{"duration":175.288,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=hlvs-e3bCq0","annotations":[{"segment":[8.750698915031432,80.67050562294601],"label":"Skiing"}]},"5i6-j67nzCo":{"duration":132.517,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=5i6-j67nzCo","annotations":[]},"QDTo_ss6INM":{"duration":124.16,"subset":"validation","resolution":"634x480","url":"https://www.youtube.com/watch?v=QDTo_ss6INM","annotations":[{"segment":[24.986373453978157,92.39147393447737],"label":"Preparing salad"}]},"v26OP6f29Qc":{"duration":145.427,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v26OP6f29Qc","annotations":[{"segment":[99.5979201998012,109.58039967312979],"label":"Rock-paper-scissors"},{"segment":[118.20163194554993,145.22657596371883],"label":"Rock-paper-scissors"}]},"Qt2ktBj1l-Y":{"duration":80.95,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Qt2ktBj1l-Y","annotations":[{"segment":[17.5786219,73.58797098000001],"label":"Shoveling snow"}]},"IEtXUErbdRA":{"duration":119.351,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IEtXUErbdRA","annotations":[]},"Q-fUXywUo7o":{"duration":144.61,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Q-fUXywUo7o","annotations":[{"segment":[14.249274582705393,68.97385869485491],"label":"Polishing shoes"},{"segment":[74.15355184127236,93.52110012787672],"label":"Polishing shoes"},{"segment":[102.07885402195772,110.86181196588295],"label":"Polishing shoes"}]},"EjUMR9v_8Pg":{"duration":136.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EjUMR9v_8Pg","annotations":[{"segment":[34.718975205266716,136.53290249433107],"label":"Breakdancing"}]},"p6jip892RdM":{"duration":206.71,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p6jip892RdM","annotations":[{"segment":[15.15267285803432,184.4112526552262],"label":"Doing motocross"}]},"pc5_pexVob8":{"duration":190.5,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pc5_pexVob8","annotations":[{"segment":[95.67067082683306,157.47035881435255],"label":"Preparing salad"}]},"UGKGBBAckJw":{"duration":82.76,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=UGKGBBAckJw","annotations":[{"segment":[21.302231313742343,82.755941],"label":"Doing step aerobics"}]},"XlSmrSegTME":{"duration":24.474,"subset":"testing","resolution":"1906x1080","url":"https://www.youtube.com/watch?v=XlSmrSegTME","annotations":[]},"aSxSgymPOBw":{"duration":95.64,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aSxSgymPOBw","annotations":[{"segment":[0.01,88.03333117004681],"label":"Walking the dog"}]},"G4XDVF_hYZc":{"duration":32.601,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=G4XDVF_hYZc","annotations":[{"segment":[0.5841677067082683,25.01761700468019],"label":"Playing rubik cube"}]},"LIVk_vFb9Ps":{"duration":206.17,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LIVk_vFb9Ps","annotations":[]},"oEDBkmmVKM0":{"duration":60.47,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=oEDBkmmVKM0","annotations":[{"segment":[5.32386895475819,12.579407176287052],"label":"Playing harmonica"},{"segment":[16.819656786271448,37.83244929797192],"label":"Playing harmonica"},{"segment":[42.9207488299532,58.18564742589704],"label":"Playing harmonica"}]},"un6VqJYUpDo":{"duration":109.6,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=un6VqJYUpDo","annotations":[{"segment":[31.374831145085807,35.13639128237129],"label":"Cleaning windows"},{"segment":[42.65951155694228,52.063411900156005],"label":"Cleaning windows"},{"segment":[79.42021289859595,85.74647312948518],"label":"Cleaning windows"}]},"rrNJt73BG_8":{"duration":57.1,"subset":"training","resolution":"658x480","url":"https://www.youtube.com/watch?v=rrNJt73BG_8","annotations":[{"segment":[6.868004876145692,38.50930705683105],"label":"Washing hands"}]},"tilmCzSf1oc":{"duration":22.48,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=tilmCzSf1oc","annotations":[{"segment":[6.612403839999999,18.695826053042122],"label":"Doing karate"}]},"iddZ6YIWLWc":{"duration":115.171,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iddZ6YIWLWc","annotations":[{"segment":[19.22510798439938,94.14912695163807],"label":"Slacklining"}]},"P4Z4sQNQtJM":{"duration":209.3,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=P4Z4sQNQtJM","annotations":[{"segment":[0.01,179.75385465678627],"label":"Tai chi"}]},"2tO1ApNwXpQ":{"duration":111.83,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=2tO1ApNwXpQ","annotations":[{"segment":[45.35896784711389,57.57099765210609],"label":"Polishing shoes"},{"segment":[68.21290933931357,111.6273015],"label":"Polishing shoes"}]},"6tDur7vIIM4":{"duration":222.935,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=6tDur7vIIM4","annotations":[]},"dPQ9D-BiKH4":{"duration":122.9,"subset":"testing","resolution":"636x360","url":"https://www.youtube.com/watch?v=dPQ9D-BiKH4","annotations":[]},"qlqF8K072UU":{"duration":161.5,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qlqF8K072UU","annotations":[{"segment":[3.651647425897036,26.568882995319814],"label":"Triple jump"},{"segment":[42.43466146645866,60.06330421216849],"label":"Triple jump"},{"segment":[82.98053978159126,94.56507644305772],"label":"Triple jump"},{"segment":[121.76355382215289,130.5778751950078],"label":"Triple jump"},{"segment":[137.3774945397816,158.02819032761312],"label":"Triple jump"}]},"LVG7DLzI-GM":{"duration":66.502,"subset":"training","resolution":"2048x1152","url":"https://www.youtube.com/watch?v=LVG7DLzI-GM","annotations":[{"segment":[3.835127925117005,55.97213728549142],"label":"Playing blackjack"}]},"dvHj856L8zY":{"duration":126.64,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dvHj856L8zY","annotations":[{"segment":[0,126.64163265306122],"label":"Horseback riding"}]},"6Epn-R6sg5U":{"duration":85.102,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=6Epn-R6sg5U","annotations":[{"segment":[0.01,69.64508873341754],"label":"Playing pool"}]},"v-xUwDARVb4":{"duration":158.64,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v-xUwDARVb4","annotations":[{"segment":[9.528191840093603,141.4379645873635],"label":"Playing guitarra"}]},"pKV8lrvbX6Q":{"duration":38.151,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=pKV8lrvbX6Q","annotations":[{"segment":[0.2,10.359921996879876],"label":"Using the monkey bar"},{"segment":[22.863276131045243,33.52089703588143],"label":"Using the monkey bar"}]},"p-uTwZ9Ph-A":{"duration":13.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=p-uTwZ9Ph-A","annotations":[{"segment":[0.01,13.032696224648985],"label":"Playing squash"}]},"oTsTx2BhtgY":{"duration":184.646,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oTsTx2BhtgY","annotations":[{"segment":[0.01,176.82529485179407],"label":"Sharpening knives"}]},"4wIqgWw53XE":{"duration":163.86,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4wIqgWw53XE","annotations":[{"segment":[47.01931981279251,61.074007800312],"label":"Windsurfing"},{"segment":[74.87315600624025,140.29134009360374],"label":"Windsurfing"}]},"a0YyuiZVtFU":{"duration":27.539,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=a0YyuiZVtFU","annotations":[{"segment":[0.21436037441497663,27.286483619344775],"label":"Kneeling"}]},"uQlRZj_jQ9c":{"duration":59.769,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uQlRZj_jQ9c","annotations":[]},"1epGZvRN3Fw":{"duration":192.56,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=1epGZvRN3Fw","annotations":[{"segment":[24.47809906396256,44.30085413416536],"label":"Snatch"},{"segment":[50.9084391575663,70.73119422776911],"label":"Snatch"},{"segment":[96.56084477379093,123.59187441497659],"label":"Snatch"}]},"FMVECEaQ0Jo":{"duration":169.669,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FMVECEaQ0Jo","annotations":[{"segment":[17.465391575663027,21.964053042121687],"label":"Running a marathon"},{"segment":[59.01463650546021,65.10134789391576],"label":"Running a marathon"},{"segment":[93.68242745709827,99.76913884555383],"label":"Running a marathon"}]},"wfCH5qwHoeQ":{"duration":87.642,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wfCH5qwHoeQ","annotations":[]},"tb7s5a1H-IU":{"duration":151.348,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=tb7s5a1H-IU","annotations":[{"segment":[57.13906438706528,102.00031328600083],"label":"Blow-drying hair"}]},"6lYTHj9vImo":{"duration":60.117,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6lYTHj9vImo","annotations":[{"segment":[0.01,37.77411286468916],"label":"Building sandcastles"}]},"ZeugCIqbsD4":{"duration":48.832,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZeugCIqbsD4","annotations":[{"segment":[11.19850196723869,44.41310644149766],"label":"Peeling potatoes"},{"segment":[0.2,48.631563],"label":"Peeling potatoes"}]},"I91LmNcwN4Y":{"duration":126.456,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=I91LmNcwN4Y","annotations":[{"segment":[60.44409190031153,81.31403894080997],"label":"Painting furniture"}]},"cz2ESqP3PDk":{"duration":48.995,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=cz2ESqP3PDk","annotations":[{"segment":[4.428358814352574,47.03222464898596],"label":"Playing blackjack"}]},"qcjRTlgnuXY":{"duration":55.264,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qcjRTlgnuXY","annotations":[]},"yGjIBQLyAMY":{"duration":80.829,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=yGjIBQLyAMY","annotations":[]},"EP50UmPrcAs":{"duration":171.108,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EP50UmPrcAs","annotations":[]},"fbkFWMYPado":{"duration":103.65,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fbkFWMYPado","annotations":[]},"hQeDWu5NOj8":{"duration":128.384,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=hQeDWu5NOj8","annotations":[{"segment":[0.2,90.38773826791278],"label":"Rafting"}]},"BMzspHz04Q8":{"duration":100.6,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BMzspHz04Q8","annotations":[{"segment":[4.48533855,90.33075473166927],"label":"Using the balance beam"}]},"POckWq-93Uo":{"duration":134.91,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=POckWq-93Uo","annotations":[]},"25PDCgM4qV0":{"duration":238.24,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=25PDCgM4qV0","annotations":[]},"nEX4I5j6H-Y":{"duration":201.968,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=nEX4I5j6H-Y","annotations":[]},"kdS1zN8Ej1s":{"duration":69.266,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kdS1zN8Ej1s","annotations":[]},"i3u27-oJzBY":{"duration":124.04,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=i3u27-oJzBY","annotations":[{"segment":[90.95031371294851,113.78464779407176],"label":"Vacuuming floor"}]},"yJi5VXV1O3I":{"duration":54.823,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yJi5VXV1O3I","annotations":[]},"RKzwMrL5Th4":{"duration":67.407,"subset":"validation","resolution":"562x480","url":"https://www.youtube.com/watch?v=RKzwMrL5Th4","annotations":[{"segment":[0.01,67.40702947845806],"label":"Futsal"}]},"qXNYHbnGvto":{"duration":186.81,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=qXNYHbnGvto","annotations":[{"segment":[10.199935553822153,179.2274390171607],"label":"Grooming horse"}]},"pyeTqpj2EPo":{"duration":133.66,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pyeTqpj2EPo","annotations":[{"segment":[24.916786342201984,37.844324026022264],"label":"Preparing pasta"},{"segment":[50.14633569288349,86.63535334882783],"label":"Preparing pasta"}]},"oGwn4NUeoy8":{"duration":153.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=oGwn4NUeoy8","annotations":[{"segment":[37.01843986684637,42.0338413971933],"label":"Playing congas"},{"segment":[38.929069021264255,43.227984618704475],"label":"Playing congas"},{"segment":[14.568547302436315,17.43449103406313],"label":"Playing congas"},{"segment":[19.34512018848101,20.77809205429442],"label":"Playing congas"},{"segment":[22.688721208712295,26.032322228943578],"label":"Playing congas"},{"segment":[28.65943731626816,30.80889511498827],"label":"Playing congas"},{"segment":[33.91366749091732,36.5407825782419],"label":"Playing congas"},{"segment":[39.40672630986872,42.0338413971933],"label":"Playing congas"},{"segment":[44.66095648451788,47.04924292754023],"label":"Playing congas"},{"segment":[50.39284394777152,52.78113039079386],"label":"Playing congas"},{"segment":[56.12473141102515,59.46833243125643],"label":"Playing congas"},{"segment":[62.33427616288325,65.91670582741676],"label":"Playing congas"},{"segment":[68.78264955904359,73.32039380078605],"label":"Playing congas"},{"segment":[76.90282346531956,80.96291041845755],"label":"Playing congas"},{"segment":[83.82885415008438,87.65011245892013],"label":"Playing congas"},{"segment":[90.99371347915141,94.33731449938269],"label":"Playing congas"},{"segment":[100.54685925124079,104.36811756007654],"label":"Playing congas"},{"segment":[104.60694620437879,109.86117637902795],"label":"Playing congas"},{"segment":[114.3989206207704,119.1754935068151],"label":"Playing congas"},{"segment":[121.80260859413968,122.28026588274415],"label":"Playing congas"},{"segment":[126.81801012448662,131.5945830105313],"label":"Playing congas"},{"segment":[133.74404080925143,139.47592827250503],"label":"Playing congas"},{"segment":[142.5807006484341,148.55141675598995],"label":"Playing congas"}]},"7UpgIJtQIPk":{"duration":35.55,"subset":"testing","resolution":"972x720","url":"https://www.youtube.com/watch?v=7UpgIJtQIPk","annotations":[]},"oezddremlnE":{"duration":152.138,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=oezddremlnE","annotations":[{"segment":[45.55312324492979,123.6265538221529],"label":"Braiding hair"}]},"g4Kxg9fWtNI":{"duration":207.05,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g4Kxg9fWtNI","annotations":[]},"69IsHpmRyfk":{"duration":94.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=69IsHpmRyfk","annotations":[{"segment":[2.5835842433697347,88.65384789391575],"label":"Getting a piercing"}]},"Ub2g9Yzon0c":{"duration":218.41,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=Ub2g9Yzon0c","annotations":[]},"2icoQWmbocU":{"duration":68.85,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=2icoQWmbocU","annotations":[{"segment":[6.551758196567863,16.862721915756634],"label":"Tai chi"},{"segment":[23.62929185647426,38.66611394695788],"label":"Tai chi"},{"segment":[41.458666620904836,60.89912946645867],"label":"Tai chi"}]},"75u6o4tXGC0":{"duration":225.93,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=75u6o4tXGC0","annotations":[{"segment":[26.787351113092143,59.21414456578263],"label":"Preparing salad"},{"segment":[65.55851698595944,94.10819373790952],"label":"Preparing salad"},{"segment":[116.31349787831513,121.24800990951637],"label":"Preparing salad"},{"segment":[131.8219642620905,133.2318248424337],"label":"Preparing salad"},{"segment":[140.28112774414976,149.44522151638066],"label":"Preparing salad"},{"segment":[165.65861819032762,166.71601362558505],"label":"Preparing salad"},{"segment":[183.28187544461778,199.49527211856477],"label":"Preparing salad"}]},"XQMG33we6eI":{"duration":213.97,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=XQMG33we6eI","annotations":[]},"1f_EQl4C700":{"duration":204.197,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1f_EQl4C700","annotations":[{"segment":[35.9971123634061,59.251883756630264],"label":"Making a cake"},{"segment":[59.570442271450865,100.98304919812793],"label":"Making a cake"},{"segment":[104.16863434633386,150.996736024961],"label":"Making a cake"}]},"xE43h7Kd9Oc":{"duration":215.621,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xE43h7Kd9Oc","annotations":[{"segment":[0.2,122.92383580223365],"label":"Decorating the Christmas tree"}]},"3XUxm78gjJE":{"duration":143.24,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3XUxm78gjJE","annotations":[{"segment":[10.72746827,140.93973966146646],"label":"Cricket"}]},"zc-RT9tpoBA":{"duration":218.66,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=zc-RT9tpoBA","annotations":[{"segment":[68.56649039022786,76.41240720104994],"label":"Playing harmonica"},{"segment":[85.96395810118119,96.88001627275976],"label":"Playing harmonica"},{"segment":[114.95973761943675,126.89917624460081],"label":"Playing harmonica"},{"segment":[150.09579985920524,157.60058895163806],"label":"Playing harmonica"}]},"nXD-q814KjI":{"duration":25.264,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=nXD-q814KjI","annotations":[{"segment":[0,24.51317316692668],"label":"Painting fence"}]},"JVfhBvlv0IY":{"duration":125.39,"subset":"training","resolution":"634x360","url":"https://www.youtube.com/watch?v=JVfhBvlv0IY","annotations":[{"segment":[8.802548632557547,108.17354208454053],"label":"Long jump"}]},"avJNxcysMCk":{"duration":95.69,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=avJNxcysMCk","annotations":[{"segment":[17.316586806864276,90.61351889453978],"label":"Running a marathon"}]},"OZiArYAPXDo":{"duration":184.85,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=OZiArYAPXDo","annotations":[{"segment":[9.07946957878315,168.18636505460216],"label":"Discus throw"}]},"-cAcA8dO7kA":{"duration":11.63,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-cAcA8dO7kA","annotations":[]},"EwoxOKwqUPI":{"duration":142.2,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EwoxOKwqUPI","annotations":[{"segment":[0.01,142.141],"label":"Beer pong"}]},"FBANd818hU4":{"duration":221.705,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=FBANd818hU4","annotations":[{"segment":[1.0376163884555383,221.70403628117913],"label":"Futsal"}]},"bTE3HEy_kVk":{"duration":56.402,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=bTE3HEy_kVk","annotations":[{"segment":[26.83679678627145,34.843841073322935],"label":"Fixing the roof"}]},"ImysAb63CLk":{"duration":30.98,"subset":"testing","resolution":"960x720","url":"https://www.youtube.com/watch?v=ImysAb63CLk","annotations":[]},"l0btLzdAeuM":{"duration":151.58,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=l0btLzdAeuM","annotations":[{"segment":[20.74498555876472,89.10915322406665],"label":"Table soccer"},{"segment":[129.18469994257364,140.50014842779916],"label":"Table soccer"}]},"uLXSOqRSa4A":{"duration":226.74,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=uLXSOqRSa4A","annotations":[{"segment":[24.053801139800694,144.88360315999998],"label":"Bungee jumping"}]},"wdD-UHM8rTg":{"duration":223.1,"subset":"validation","resolution":"400x226","url":"https://www.youtube.com/watch?v=wdD-UHM8rTg","annotations":[{"segment":[50.814679161316114,59.51582251638066],"label":"Playing violin"},{"segment":[112.07073011856473,117.29141630421216],"label":"Playing violin"},{"segment":[120.0757822698908,125.2964684555382],"label":"Playing violin"},{"segment":[136.0858865725429,139.21829828393135],"label":"Playing violin"},{"segment":[141.30657275819033,144.09093872386893],"label":"Playing violin"},{"segment":[154.18426534945397,156.27253982371295],"label":"Playing violin"},{"segment":[164.27759197503897,168.80218666926675],"label":"Playing violin"}]},"JXvPFMjtbcw":{"duration":227.185,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=JXvPFMjtbcw","annotations":[{"segment":[13.465457098283931,227.141],"label":"Making an omelette"}]},"pem8BpCspUM":{"duration":4.27,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=pem8BpCspUM","annotations":[{"segment":[0.013232449297971918,4.194686427457098],"label":"Playing kickball"}]},"GBFRHM7i-NQ":{"duration":129.823,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=GBFRHM7i-NQ","annotations":[{"segment":[86.84673322932919,120.26507644305772],"label":"Bullfighting"}]},"1imA9vLRd3k":{"duration":152.6,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1imA9vLRd3k","annotations":[{"segment":[22.859232449297973,42.62294383775352],"label":"Doing karate"}]},"SAaqnGbci6Y":{"duration":156.64,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=SAaqnGbci6Y","annotations":[{"segment":[8.696708319999999,151.02088560374415],"label":"Washing dishes"}]},"avxSMcGBdG8":{"duration":91.325,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=avxSMcGBdG8","annotations":[{"segment":[1.4624716365493347,6.729957617857746],"label":"Croquet"},{"segment":[73.783630515054,78.05456509449326],"label":"Croquet"}]},"Cul7VFkLTMc":{"duration":147.424,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Cul7VFkLTMc","annotations":[{"segment":[0.01,147.423038],"label":"Surfing"}]},"MZkc1bGdVPg":{"duration":151.464,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MZkc1bGdVPg","annotations":[]},"dWPlarw_xg8":{"duration":214.69,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=dWPlarw_xg8","annotations":[{"segment":[16.908571190327613,199.05238757722307],"label":"Long jump"}]},"pNmAqAYOALA":{"duration":78.39,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pNmAqAYOALA","annotations":[]},"zAPoqg3REns":{"duration":233.767,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zAPoqg3REns","annotations":[]},"bJj66H_WsDA":{"duration":97.849,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bJj66H_WsDA","annotations":[{"segment":[0.19999999999999998,92.78497347893916],"label":"Hurling"}]},"e9bdQGmyrKA":{"duration":97.501,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=e9bdQGmyrKA","annotations":[{"segment":[0.19999999999999998,39.996104524180964],"label":"Rope skipping"},{"segment":[53.378831513260536,97.481],"label":"Rope skipping"}]},"cp4n_xpbBDs":{"duration":81.433,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cp4n_xpbBDs","annotations":[{"segment":[18.743376576038656,51.79757786464119],"label":"Painting fence"}]},"OabVylOVys4":{"duration":207.215,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OabVylOVys4","annotations":[{"segment":[54.14011801830633,79.92112894004154],"label":"Rafting"},{"segment":[108.60250359047197,111.82512995568885],"label":"Rafting"},{"segment":[167.8988287104629,173.69955509199372],"label":"Rafting"}]},"CArYinl5tFo":{"duration":146.124,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=CArYinl5tFo","annotations":[{"segment":[30.30615600624025,130.5671232449298],"label":"Assembling bicycle"}]},"Tm7N2HU4noQ":{"duration":216.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Tm7N2HU4noQ","annotations":[{"segment":[8.08967281123245,11.460369815912637],"label":"Layup drill in basketball"},{"segment":[35.72938824960998,37.07766705148206],"label":"Layup drill in basketball"},{"segment":[41.12250345709828,42.80785195943838],"label":"Layup drill in basketball"},{"segment":[86.96398272074883,89.32347062402496],"label":"Layup drill in basketball"},{"segment":[91.6829585273011,93.03123732917317],"label":"Layup drill in basketball"},{"segment":[103.48039804368176,106.51402534789392],"label":"Layup drill in basketball"},{"segment":[109.21058295163806,111.23300115444619],"label":"Layup drill in basketball"},{"segment":[130.78304378159126,133.14253168486738],"label":"Layup drill in basketball"},{"segment":[137.52443779095162,140.2209953946958],"label":"Layup drill in basketball"},{"segment":[148.31066820592824,149.99601670826834],"label":"Layup drill in basketball"},{"segment":[155.72620161622464,158.08568951950076],"label":"Layup drill in basketball"}]},"pZxteNqdweM":{"duration":80.36,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=pZxteNqdweM","annotations":[{"segment":[3.1334243369734787,76.45555382215288],"label":"Mixing drinks"}]},"TdZOfBHjU5g":{"duration":232.07999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TdZOfBHjU5g","annotations":[{"segment":[11.948882995319813,225.21834009360376],"label":"Polishing forniture"}]},"c4GQ3WTOeAA":{"duration":105.72,"subset":"training","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=c4GQ3WTOeAA","annotations":[{"segment":[8.278562733443822,70.854316347151],"label":"Wrapping presents"},{"segment":[81.88740974746251,104.94163476303885],"label":"Wrapping presents"}]},"eFauvKa-AjY":{"duration":43.584,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=eFauvKa-AjY","annotations":[]},"RZdX_1VP7mk":{"duration":164.119,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RZdX_1VP7mk","annotations":[]},"P3oTMGfwOOs":{"duration":135.768,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=P3oTMGfwOOs","annotations":[{"segment":[19.244242693962317,109.33267552503868],"label":"Roof shingle removal"}]},"Pp4U2Ql7bhY":{"duration":200.079,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Pp4U2Ql7bhY","annotations":[{"segment":[0.01,200.081],"label":"Welding"}]},"wJuRhdHgVO4":{"duration":54.2,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wJuRhdHgVO4","annotations":[]},"EavcvmrlVLQ":{"duration":159.916,"subset":"training","resolution":"562x480","url":"https://www.youtube.com/watch?v=EavcvmrlVLQ","annotations":[{"segment":[0.2,156.1396349453978],"label":"Futsal"}]},"FzWsQ1W_NKM":{"duration":60.35,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FzWsQ1W_NKM","annotations":[]},"B0cb0B90Ubg":{"duration":86.402,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=B0cb0B90Ubg","annotations":[{"segment":[9.028903276131045,22.774397815912636],"label":"Grooming dog"},{"segment":[27.22146957878315,51.34346489859595],"label":"Grooming dog"},{"segment":[52.55630265210609,80.99060998439938],"label":"Grooming dog"}]},"rqmi-DjYp0U":{"duration":36.9,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=rqmi-DjYp0U","annotations":[{"segment":[3.6979329173166926,12.813767550702027],"label":"Drinking beer"},{"segment":[18.317667706708267,35.34535881435257],"label":"Drinking beer"}]},"PzeM5iOLQnk":{"duration":110.34,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=PzeM5iOLQnk","annotations":[{"segment":[25.648740170014964,86.586015473272],"label":"Fixing bicycle"}]},"g2UjvJaSZuI":{"duration":77.49,"subset":"testing","resolution":"362x240","url":"https://www.youtube.com/watch?v=g2UjvJaSZuI","annotations":[]},"E2yPoqpNVdM":{"duration":115.06,"subset":"validation","resolution":"648x480","url":"https://www.youtube.com/watch?v=E2yPoqpNVdM","annotations":[{"segment":[28.18036695475819,115.0548752834467],"label":"Paintball"}]},"Uj1QtIM8500":{"duration":155.667,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Uj1QtIM8500","annotations":[{"segment":[14.402795716157698,155.66657596371883],"label":"Putting in contact lenses"}]},"JJ811udnROI":{"duration":37.06,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=JJ811udnROI","annotations":[{"segment":[14.569235326053041,37.059047],"label":"Hopscotch"}]},"g0B_4IlLn3g":{"duration":36.595,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=g0B_4IlLn3g","annotations":[{"segment":[4.624284566302652,18.154598667706708],"label":"Gargling mouthwash"}]},"jNq7v-ryQ98":{"duration":65.086,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=jNq7v-ryQ98","annotations":[]},"j1QaZ7YWcJ8":{"duration":155.039,"subset":"training","resolution":"1088x1920","url":"https://www.youtube.com/watch?v=j1QaZ7YWcJ8","annotations":[{"segment":[0.2,154.781],"label":"Playing congas"}]},"f-r9g_-5huM":{"duration":39.71,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=f-r9g_-5huM","annotations":[{"segment":[5.499903703588143,36.33644244617784],"label":"Playing harmonica"}]},"iNqruq2PS9s":{"duration":64.37,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=iNqruq2PS9s","annotations":[]},"h3H6TzHC8Cc":{"duration":124.39,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=h3H6TzHC8Cc","annotations":[{"segment":[5.090414541633392,122.6983626288608],"label":"Doing kickboxing"}]},"TFkI_wKMypE":{"duration":15.88,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=TFkI_wKMypE","annotations":[{"segment":[5.128841034477379,14.890983086942526],"label":"Hopscotch"}]},"vxJlOKF1VB4":{"duration":229.82999999999998,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=vxJlOKF1VB4","annotations":[{"segment":[108.43886241999999,184.79455251404056],"label":"Brushing hair"}]},"oIKpoLWA1GA":{"duration":14.374,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=oIKpoLWA1GA","annotations":[]},"3GVrO1VYAOI":{"duration":50.504,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3GVrO1VYAOI","annotations":[]},"ZxPF3s_OLDo":{"duration":182.9,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZxPF3s_OLDo","annotations":[]},"IqRwR1a9ia0":{"duration":20.341,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=IqRwR1a9ia0","annotations":[{"segment":[0.6956728971962616,16.443177570093457],"label":"Plastering"}]},"aa5jHg4E3O0":{"duration":202.66,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=aa5jHg4E3O0","annotations":[{"segment":[0.01,4.741521060842434],"label":"Drinking coffee"}]},"m0u1kjhlHJ4":{"duration":140.295,"subset":"training","resolution":"624x352","url":"https://www.youtube.com/watch?v=m0u1kjhlHJ4","annotations":[{"segment":[0.2,37.20771328812336],"label":"Hurling"},{"segment":[37.645451091513046,123.66092945758645],"label":"Hurling"},{"segment":[128.25717639317816,140.09496598639456],"label":"Hurling"}]},"ehO168THGqU":{"duration":218.48,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ehO168THGqU","annotations":[{"segment":[28.66842072,196.77043621684868],"label":"Skateboarding"}]},"KU4twxFnX5Q":{"duration":144.429,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KU4twxFnX5Q","annotations":[{"segment":[12.615375975039003,119.84607176287052],"label":"Running a marathon"}]},"ame0HVidIhA":{"duration":80.69,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ame0HVidIhA","annotations":[]},"XhFAi-1uK5Q":{"duration":148.237,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=XhFAi-1uK5Q","annotations":[]},"bR62246z9qA":{"duration":199.81,"subset":"training","resolution":"600x480","url":"https://www.youtube.com/watch?v=bR62246z9qA","annotations":[{"segment":[18.388578783151324,168.92558814352574],"label":"Cheerleading"}]},"J_CJSmMFWlg":{"duration":33.32,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=J_CJSmMFWlg","annotations":[{"segment":[1.4793915608424337,32.67638552808112],"label":"Doing kickboxing"}]},"uvs1pb32xZM":{"duration":101.634,"subset":"training","resolution":"568x320","url":"https://www.youtube.com/watch?v=uvs1pb32xZM","annotations":[{"segment":[0.01,101.6336279],"label":"Futsal"}]},"mtC3trP8ahY":{"duration":188.31,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mtC3trP8ahY","annotations":[{"segment":[16.153926099843993,175.6372328673947],"label":"Playing lacrosse"}]},"ERmsX91_7V4":{"duration":94.55,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=ERmsX91_7V4","annotations":[{"segment":[0,83.01922464898597],"label":"Layup drill in basketball"}]},"smh90DBXsBg":{"duration":129.2,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=smh90DBXsBg","annotations":[{"segment":[32.33745311466459,123.00320951092044],"label":"Getting a tattoo"}]},"bi6C3kyP4mU":{"duration":213.647,"subset":"training","resolution":"426x238","url":"https://www.youtube.com/watch?v=bi6C3kyP4mU","annotations":[{"segment":[23.331164103707007,25.664280514077706],"label":"Rafting"},{"segment":[39.662978976301915,41.66279304233394],"label":"Rafting"},{"segment":[126.98819319303385,131.3211236694366],"label":"Rafting"},{"segment":[169.9841956127225,173.3172176599064],"label":"Rafting"},{"segment":[178.98335746801874,184.9827996177847],"label":"Rafting"},{"segment":[190.9822417675507,194.31526518408737],"label":"Rafting"}]},"BzUhN9NQf34":{"duration":120.88,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BzUhN9NQf34","annotations":[]},"LVub7uinY-4":{"duration":95.53,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LVub7uinY-4","annotations":[{"segment":[11.028065535101405,14.008623787831512],"label":"Cricket"},{"segment":[21.161963594383774,24.291549759750392],"label":"Cricket"},{"segment":[31.14683374102964,35.617671120124804],"label":"Cricket"},{"segment":[40.833648062402496,43.36712257722309],"label":"Cricket"},{"segment":[58.56796966614665,62.4426953946958],"label":"Cricket"}]},"egT7FYHlWho":{"duration":97.72999999999999,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=egT7FYHlWho","annotations":[{"segment":[48.7244976599064,77.85244461778471],"label":"Cleaning windows"}]},"Scr23rwMV7w":{"duration":43.77,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Scr23rwMV7w","annotations":[]},"RblRzlmSFak":{"duration":35.156,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=RblRzlmSFak","annotations":[{"segment":[8.33624626521061,32.68732432449298],"label":"Raking leaves"}]},"OyeXXxV3MSg":{"duration":71.15,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=OyeXXxV3MSg","annotations":[{"segment":[4.99801872074883,69.30585959438378],"label":"Doing nails"}]},"It-SVT1ICJM":{"duration":19.203,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=It-SVT1ICJM","annotations":[{"segment":[4.329867394695787,13.70626989079563],"label":"Doing a powerbomb"}]},"fcXkm-SnJKE":{"duration":64.738,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fcXkm-SnJKE","annotations":[]},"baaXPgmu8hA":{"duration":98.987,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=baaXPgmu8hA","annotations":[]},"p8_hwwgyoZE":{"duration":159.15,"subset":"training","resolution":"524x360","url":"https://www.youtube.com/watch?v=p8_hwwgyoZE","annotations":[{"segment":[66.88238805538221,89.71422368096724],"label":"Playing flauta"}]},"tAbB24pczrs":{"duration":208.421,"subset":"validation","resolution":"1276x720","url":"https://www.youtube.com/watch?v=tAbB24pczrs","annotations":[{"segment":[60.79395956880679,162.24928305247707],"label":"Doing crunches"}]},"caoMescJ1J4":{"duration":152.28,"subset":"training","resolution":"810x480","url":"https://www.youtube.com/watch?v=caoMescJ1J4","annotations":[{"segment":[14.725338533541343,129.44047581903277],"label":"Playing harmonica"}]},"08P5E8jltVg":{"duration":77.927,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=08P5E8jltVg","annotations":[]},"F_-w_z4B3bg":{"duration":85.891,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=F_-w_z4B3bg","annotations":[{"segment":[14.600388455538223,59.74103900156006],"label":"Playing drums"}]},"dWUEAavKWmI":{"duration":124.9,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dWUEAavKWmI","annotations":[{"segment":[0.01,113.89099769705074],"label":"Ping-pong"}]},"_GQaltSDMAk":{"duration":77.509,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=_GQaltSDMAk","annotations":[{"segment":[0.2,77.50820861678004],"label":"Making a lemonade"}]},"BIJK3xcjiKE":{"duration":159.522,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=BIJK3xcjiKE","annotations":[{"segment":[12.194279771190846,139.8609230899848],"label":"Futsal"}]},"eyB-aYaCRWM":{"duration":37.18,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=eyB-aYaCRWM","annotations":[]},"hXrNCak63u0":{"duration":82.71000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=hXrNCak63u0","annotations":[{"segment":[8.06330041,35.04297621],"label":"Playing piano"},{"segment":[45.3493447,73.08663494],"label":"Playing piano"}]},"7IHT1wZzJXw":{"duration":126.108,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=7IHT1wZzJXw","annotations":[]},"0XDH6N86K4k":{"duration":117.819,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0XDH6N86K4k","annotations":[]},"QkqsI11OtC8":{"duration":14.86,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QkqsI11OtC8","annotations":[{"segment":[0,14.685453978159126],"label":"Hopscotch"}]},"jbSBQL08VHA":{"duration":169.0,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=jbSBQL08VHA","annotations":[{"segment":[112.26102426000001,137.38536224],"label":"Fixing bicycle"},{"segment":[41.1584887,104.59207654],"label":"Fixing bicycle"}]},"MNduaJtXy1A":{"duration":191.657,"subset":"training","resolution":"350x240","url":"https://www.youtube.com/watch?v=MNduaJtXy1A","annotations":[{"segment":[0,191.601],"label":"Playing congas"}]},"lPJkzW5mEqg":{"duration":92.11,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=lPJkzW5mEqg","annotations":[]},"xH_1O0mckTU":{"duration":191.078,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xH_1O0mckTU","annotations":[{"segment":[15.798878860842434,20.86644377847114],"label":"Longboarding"},{"segment":[68.85926446895476,73.62873733260531],"label":"Longboarding"},{"segment":[91.81235262527301,98.96656192074884],"label":"Longboarding"},{"segment":[127.88149115663028,128.47767526458657],"label":"Longboarding"},{"segment":[149.94030315101404,151.728855474883],"label":"Longboarding"},{"segment":[160.67161709422777,163.65253763400935],"label":"Longboarding"}]},"yhWw7oJrUFo":{"duration":133.19,"subset":"training","resolution":"400x224","url":"https://www.youtube.com/watch?v=yhWw7oJrUFo","annotations":[{"segment":[29.07915756630265,120.9043712948518],"label":"Making a cake"}]},"rMX2KeJa8qI":{"duration":111.085,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=rMX2KeJa8qI","annotations":[{"segment":[30.142174726989083,48.3401232449298],"label":"Croquet"},{"segment":[78.6609890795632,107.24923400936038],"label":"Croquet"}]},"pQQb2FP1ewE":{"duration":124.065,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pQQb2FP1ewE","annotations":[{"segment":[64.63687857724739,114.4169223770379],"label":"Scuba diving"}]},"8KPk9pH4wWw":{"duration":87.93,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=8KPk9pH4wWw","annotations":[{"segment":[27.710867656474967,35.255905879772605],"label":"Triple jump"},{"segment":[51.58062381279251,78.60557831045242],"label":"Triple jump"}]},"ECzQabG77eM":{"duration":73.747,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=ECzQabG77eM","annotations":[]},"lVXgy-JU14Q":{"duration":111.55,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=lVXgy-JU14Q","annotations":[{"segment":[3.826867394695788,102.6296255850234],"label":"Shot put"}]},"-C9mAKWi9Ho":{"duration":92.021,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-C9mAKWi9Ho","annotations":[{"segment":[35.881825273010925,53.96626521060843],"label":"Doing fencing"},{"segment":[62.290848673946954,85.1116895475819],"label":"Doing fencing"}]},"8tfZXiFd6b4":{"duration":36.224,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8tfZXiFd6b4","annotations":[]},"o92ZcNsB-Vw":{"duration":71.448,"subset":"testing","resolution":"853x480","url":"https://www.youtube.com/watch?v=o92ZcNsB-Vw","annotations":[]},"mNq0NksbsZ4":{"duration":157.827,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mNq0NksbsZ4","annotations":[{"segment":[0.20000000000000004,30.27622932917317],"label":"Longboarding"},{"segment":[32.98385959438377,56.61408736349455],"label":"Longboarding"},{"segment":[60.3063104524181,97.47468954758192],"label":"Longboarding"},{"segment":[107.56676599063962,153.12359126365052],"label":"Longboarding"}]},"5HqegO_ayw8":{"duration":119.792,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=5HqegO_ayw8","annotations":[{"segment":[15.49173634945398,119.47432137285492],"label":"Playing beach volleyball"}]},"19LxLS1_Yn0":{"duration":35.74,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=19LxLS1_Yn0","annotations":[{"segment":[0.01,13.33403744149766],"label":"Pole vault"}]},"ezx7z-kx2v4":{"duration":111.18,"subset":"training","resolution":"480x272","url":"https://www.youtube.com/watch?v=ezx7z-kx2v4","annotations":[{"segment":[19.33072542901716,72.90197347893915],"label":"Discus throw"}]},"t1U8fJVEztQ":{"duration":13.329,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=t1U8fJVEztQ","annotations":[{"segment":[0.2,12.663917321861247],"label":"Running a marathon"}]},"8m7zkNuqqGA":{"duration":52.942,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=8m7zkNuqqGA","annotations":[]},"KNzFJses9jc":{"duration":151.209,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KNzFJses9jc","annotations":[{"segment":[0.2,148.65346333853356],"label":"Welding"}]},"vY77rUOOgwg":{"duration":54.521,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vY77rUOOgwg","annotations":[{"segment":[24.49592851170047,44.3988704274571],"label":"Installing carpet"}]},"FPv0qnoQbq0":{"duration":44.327,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FPv0qnoQbq0","annotations":[{"segment":[5.463064852886116,39.90111924196568],"label":"Calf roping"}]},"UIZN2F8pjzE":{"duration":30.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UIZN2F8pjzE","annotations":[{"segment":[2.48435391575663,4.687460218408737],"label":"Smoking a cigarette"},{"segment":[24.89041375975039,26.53102483619345],"label":"Smoking a cigarette"}]},"1iIG5D3jTk4":{"duration":96.247,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1iIG5D3jTk4","annotations":[{"segment":[14.560744149765991,91.26734477379095],"label":"Rope skipping"}]},"_uTNqQhQA9w":{"duration":92.811,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_uTNqQhQA9w","annotations":[{"segment":[0.01,71.52609815912636],"label":"Making a cake"}]},"X487OD6H6LE":{"duration":83.941,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=X487OD6H6LE","annotations":[{"segment":[11.256608424336973,83.681],"label":"Playing congas"}]},"JuD1OdoXe9Q":{"duration":140.82999999999998,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=JuD1OdoXe9Q","annotations":[{"segment":[6.63727077,48.9803262],"label":"Long jump"}]},"W_dRFcpPqiM":{"duration":124.134,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=W_dRFcpPqiM","annotations":[]},"_i6kvwg1Oyo":{"duration":57.609,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=_i6kvwg1Oyo","annotations":[{"segment":[0.01,57.608706],"label":"Throwing darts"}]},"Khxa5Ey3udM":{"duration":142.27,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Khxa5Ey3udM","annotations":[{"segment":[65.35101404056162,120.39571768],"label":"Mowing the lawn"},{"segment":[35.27790795631825,55.72779828],"label":"Mowing the lawn"}]},"nA9MM4baSdc":{"duration":143.96,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=nA9MM4baSdc","annotations":[{"segment":[5.386558502340094,54.09002496099844],"label":"Playing harmonica"},{"segment":[56.334424336973484,84.61385647425898],"label":"Playing harmonica"},{"segment":[87.30713572542902,141.39716068642747],"label":"Playing harmonica"}]},"g7l-Y_bgPkI":{"duration":27.38,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=g7l-Y_bgPkI","annotations":[{"segment":[1.06769992199688,26.906038034321373],"label":"Tennis serve with ball bouncing"}]},"jRBh9ND4c8s":{"duration":181.627,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=jRBh9ND4c8s","annotations":[]},"QOuNt8YH3Rk":{"duration":167.93,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=QOuNt8YH3Rk","annotations":[{"segment":[13.098807489714556,43.22606471605803],"label":"Hammer throw"},{"segment":[63.922180549807024,96.40722312429912],"label":"Hammer throw"},{"segment":[123.3907665397816,130.72609873322935],"label":"Hammer throw"},{"segment":[139.89526397503903,156.3997614102964],"label":"Hammer throw"}]},"yp1zqqLB57M":{"duration":180.7,"subset":"training","resolution":"596x360","url":"https://www.youtube.com/watch?v=yp1zqqLB57M","annotations":[{"segment":[10.007435696067299,111.49129064210187],"label":"Belly dance"},{"segment":[97.9601099826306,104.16190111822159],"label":"Belly dance"},{"segment":[134.60705760203197,166.46171207120395],"label":"Belly dance"}]},"n9teiB6FW84":{"duration":145.636,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=n9teiB6FW84","annotations":[{"segment":[7.709897196261681,132.447015576324],"label":"BMX"}]},"kKUoxGrxSWw":{"duration":162.66,"subset":"testing","resolution":"568x360","url":"https://www.youtube.com/watch?v=kKUoxGrxSWw","annotations":[]},"GhXniQgRUTY":{"duration":121.88,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GhXniQgRUTY","annotations":[{"segment":[47.519890795631824,120.5104430577223],"label":"Doing step aerobics"}]},"tldn48dG8Ag":{"duration":129.916,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=tldn48dG8Ag","annotations":[]},"2rjIzlwBiKs":{"duration":148.051,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=2rjIzlwBiKs","annotations":[]},"dDmc6n79ek0":{"duration":110.736,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=dDmc6n79ek0","annotations":[{"segment":[18.6575414690057,98.64311123556942],"label":"Playing ice hockey"}]},"JzLqc6MhJtc":{"duration":54.89,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=JzLqc6MhJtc","annotations":[{"segment":[0,54.891971],"label":"Getting a tattoo"}]},"4fgIHu_Se3Q":{"duration":50.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4fgIHu_Se3Q","annotations":[{"segment":[1.2710140405616226,49.64898595943838],"label":"Using parallel bars"}]},"JlCQlNjvXzA":{"duration":85.148,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=JlCQlNjvXzA","annotations":[{"segment":[7.423140186915887,84.19288940809969],"label":"Elliptical trainer"}]},"Nvf-BZNEGdI":{"duration":101.45,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Nvf-BZNEGdI","annotations":[]},"u6FTTLK6GUg":{"duration":35.99,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=u6FTTLK6GUg","annotations":[{"segment":[0.01,35.457521110764425],"label":"Dodgeball"}]},"0zrJtIM2XRA":{"duration":208.96,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=0zrJtIM2XRA","annotations":[]},"asgLNARjP-A":{"duration":55.264,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=asgLNARjP-A","annotations":[]},"O9HLR19is-A":{"duration":147.285,"subset":"training","resolution":"328x240","url":"https://www.youtube.com/watch?v=O9HLR19is-A","annotations":[{"segment":[0.2,142.89600936037442],"label":"Riding bumper cars"}]},"DP9hfhq8sro":{"duration":18.716,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DP9hfhq8sro","annotations":[{"segment":[0.01,15.212920436817473],"label":"Table soccer"}]},"K2l7ops-tno":{"duration":109.528,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=K2l7ops-tno","annotations":[{"segment":[0.01,109.52852607709751],"label":"Laying tile"}]},"EWF-801zIRM":{"duration":212.74,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=EWF-801zIRM","annotations":[]},"wiOdB3Vgs-4":{"duration":137.86,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=wiOdB3Vgs-4","annotations":[]},"T3CHT7rQQVc":{"duration":116.61,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=T3CHT7rQQVc","annotations":[{"segment":[9.823670910140406,73.67753182605304],"label":"Playing guitarra"}]},"rlirIGuekbw":{"duration":87.215,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rlirIGuekbw","annotations":[]},"KWeZbXoX8Ys":{"duration":10.148,"subset":"training","resolution":"202x360","url":"https://www.youtube.com/watch?v=KWeZbXoX8Ys","annotations":[{"segment":[1.8557847113884554,9.530555382215288],"label":"Fun sliding down"}]},"4chhQzVxWkA":{"duration":65.364,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4chhQzVxWkA","annotations":[{"segment":[5.4044995460218415,65.363853],"label":"Swimming"}]},"1JHqxhajh60":{"duration":99.85,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=1JHqxhajh60","annotations":[{"segment":[0.01,8.644492979719189],"label":"Bathing dog"},{"segment":[13.628705148205928,42.91095163806553],"label":"Bathing dog"},{"segment":[43.84549141965679,72.66046801872075],"label":"Bathing dog"},{"segment":[74.21803432137285,86.99007800312012],"label":"Bathing dog"},{"segment":[90.10521060842433,97.73728549141967],"label":"Bathing dog"}]},"9LhMNDcFW_k":{"duration":85.636,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=9LhMNDcFW_k","annotations":[{"segment":[6.543602184087363,7.87903120124805],"label":"Croquet"},{"segment":[20.031435257410294,20.69914976599064],"label":"Croquet"},{"segment":[26.57503744149766,31.11549609984399],"label":"Croquet"},{"segment":[35.655954758190326,39.52869890795632],"label":"Croquet"},{"segment":[41.66538533541342,72.11316692667707],"label":"Croquet"},{"segment":[76.78716848673946,82.26242745709828],"label":"Croquet"}]},"8NfYYT8d3EQ":{"duration":200.74,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8NfYYT8d3EQ","annotations":[{"segment":[105.79590143999998,172.34791488],"label":"Preparing salad"}]},"n1dboqahvZU":{"duration":63.83,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=n1dboqahvZU","annotations":[{"segment":[31.865900530421218,52.97705963182527],"label":"Clean and jerk"}]},"VpiAoWvJhB4":{"duration":225.954,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=VpiAoWvJhB4","annotations":[]},"O3HFalRZVts":{"duration":123.51,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=O3HFalRZVts","annotations":[{"segment":[0,123.461],"label":"Making a sandwich"}]},"s80tsQgUczA":{"duration":173.27,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=s80tsQgUczA","annotations":[]},"W4tmb8RwzQM":{"duration":188.152,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W4tmb8RwzQM","annotations":[{"segment":[16.43141341653666,180.4713182527301],"label":"Slacklining"}]},"3wEEaF-9xPU":{"duration":70.868,"subset":"testing","resolution":"597x480","url":"https://www.youtube.com/watch?v=3wEEaF-9xPU","annotations":[]},"9uitA8tmSfE":{"duration":206.59,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=9uitA8tmSfE","annotations":[{"segment":[94.89240249609985,202.5123088923557],"label":"Plataform diving"},{"segment":[6.605413416536661,91.31558574],"label":"Plataform diving"}]},"d950IKYTYY0":{"duration":64.343,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=d950IKYTYY0","annotations":[{"segment":[0,4.115510472698908],"label":"Having an ice cream"},{"segment":[13.651449372854916,63.53946656630266],"label":"Having an ice cream"}]},"GSo0lqq5zmM":{"duration":182.184,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=GSo0lqq5zmM","annotations":[{"segment":[86.97073131904868,89.52869400490306],"label":"Curling"},{"segment":[141.25638244149766,146.08808968798752],"label":"Curling"}]},"Jl2lDgcsvmA":{"duration":190.27,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Jl2lDgcsvmA","annotations":[{"segment":[0.01,190.26430839002268],"label":"Playing kickball"}]},"ekzUyQo2R9Y":{"duration":20.063,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ekzUyQo2R9Y","annotations":[{"segment":[0.2,19.812044744548285],"label":"Waterskiing"}]},"2X4-yp_EFVE":{"duration":28.259,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2X4-yp_EFVE","annotations":[{"segment":[0.2,12.211630563182528],"label":"Arm wrestling"}]},"sTev5fWX3T8":{"duration":15.09,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=sTev5fWX3T8","annotations":[{"segment":[0.023545195007800314,15.09247],"label":"Using uneven bars"}]},"fvroOk6TpKk":{"duration":193.14,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fvroOk6TpKk","annotations":[{"segment":[10.546051547581904,181.0907708599064],"label":"Playing lacrosse"}]},"b0QGh__f7lU":{"duration":137.648,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=b0QGh__f7lU","annotations":[{"segment":[8.804311290818978,137.64789115646258],"label":"Playing congas"}]},"ZRwdRQ2iSFY":{"duration":60.49,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZRwdRQ2iSFY","annotations":[]},"34NXXCSWyzE":{"duration":151.836,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=34NXXCSWyzE","annotations":[{"segment":[24.04256048337172,90.84061029924192],"label":"Calf roping"}]},"y3LlQBDXAHI":{"duration":37.94,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=y3LlQBDXAHI","annotations":[{"segment":[2.9013198127925115,21.611872074882996],"label":"Hammer throw"}]},"QMFtjdg6d2A":{"duration":42.447,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=QMFtjdg6d2A","annotations":[{"segment":[0.01,34.23486892355694],"label":"Bullfighting"}]},"yeUuZ9vk5gE":{"duration":63.182,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=yeUuZ9vk5gE","annotations":[{"segment":[5.142675655887877,34.65716163719629],"label":"Baking cookies"}]},"2iW1Eq9SDW4":{"duration":26.262,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=2iW1Eq9SDW4","annotations":[{"segment":[0.01,26.261768707482993],"label":"Playing beach volleyball"}]},"7bwruwic1CI":{"duration":31.88,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7bwruwic1CI","annotations":[]},"lEYr4d4vBWc":{"duration":206.426,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=lEYr4d4vBWc","annotations":[{"segment":[52.81398608325285,180.3404386895476],"label":"Fixing the roof"}]},"CvsFEsXakwo":{"duration":185.76,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CvsFEsXakwo","annotations":[{"segment":[47.23684997035882,51.583799354134165],"label":"Playing drums"},{"segment":[67.23281713572543,71.8695631450858],"label":"Playing drums"},{"segment":[72.44915639625584,74.18793614976599],"label":"Playing drums"},{"segment":[87.80837755226209,91.57573368486739],"label":"Playing drums"}]},"QeH6IOAjy-4":{"duration":40.52,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=QeH6IOAjy-4","annotations":[{"segment":[1.294335652106084,38.987915374414975],"label":"Clean and jerk"}]},"9T1C2CW_P0A":{"duration":171.62,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=9T1C2CW_P0A","annotations":[{"segment":[0.01,0.02],"label":"Walking the dog"}]},"6YmUCNIMpB4":{"duration":27.864,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=6YmUCNIMpB4","annotations":[{"segment":[0.2,27.663197278911564],"label":"Trimming branches or hedges"}]},"xM5OQYON59w":{"duration":46.65,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=xM5OQYON59w","annotations":[{"segment":[10.53704368174727,23.108826833073323],"label":"Shot put"}]},"GqrEVe0c6PQ":{"duration":109.877,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=GqrEVe0c6PQ","annotations":[{"segment":[10.456296823712949,103.19165062090484],"label":"Making a lemonade"}]},"2_tzemKY72E":{"duration":82.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2_tzemKY72E","annotations":[{"segment":[16.867490639625586,59.85245943837754],"label":"Using uneven bars"}]},"-WrOnvkUTXg":{"duration":36.455,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=-WrOnvkUTXg","annotations":[{"segment":[1.606723450812224,30.912452422774837],"label":"Kneeling"}]},"fzp5ooc727c":{"duration":148.282,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=fzp5ooc727c","annotations":[{"segment":[0.01,148.221],"label":"Hurling"}]},"0NaL4it1TVk":{"duration":176.29,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0NaL4it1TVk","annotations":[]},"1mYtNMDFyXQ":{"duration":110.34,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=1mYtNMDFyXQ","annotations":[{"segment":[7.918401248049922,42.00195444617785],"label":"Drinking coffee"}]},"xIhTY02lRSE":{"duration":64.02,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xIhTY02lRSE","annotations":[{"segment":[4.094692898595944,47.53838584711389],"label":"Fixing bicycle"}]},"5zT1GWfmVLU":{"duration":32.183,"subset":"validation","resolution":"600x480","url":"https://www.youtube.com/watch?v=5zT1GWfmVLU","annotations":[{"segment":[8.660753248829954,17.99930457800312],"label":"Doing a powerbomb"}]},"kl4vLrvGAmM":{"duration":180.141,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=kl4vLrvGAmM","annotations":[{"segment":[41.31145085803432,146.97883536565953],"label":"Laying tile"}]},"syAccI5soVw":{"duration":202.99,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=syAccI5soVw","annotations":[{"segment":[0.01,202.921],"label":"Belly dance"}]},"6fjc0Brqa7A":{"duration":126.689,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6fjc0Brqa7A","annotations":[{"segment":[29.843835758190323,38.737694096723864],"label":"Longboarding"},{"segment":[54.351356513260534,113.84138673322933],"label":"Longboarding"}]},"M96TST6CN4M":{"duration":87.725,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=M96TST6CN4M","annotations":[{"segment":[25.239691390228977,38.337045840562766],"label":"Sumo"},{"segment":[48.43292231169973,57.43735332569662],"label":"Sumo"},{"segment":[66.44178433969351,69.3068305714198],"label":"Sumo"},{"segment":[84.4506463676873,87.524987],"label":"Sumo"}]},"h7iCyiNUxeE":{"duration":6.92,"subset":"validation","resolution":"192x144","url":"https://www.youtube.com/watch?v=h7iCyiNUxeE","annotations":[{"segment":[0.06210848,0.21744632],"label":"Washing dishes"}]},"oZTFplEHVDo":{"duration":19.74,"subset":"validation","resolution":"720x480","url":"https://www.youtube.com/watch?v=oZTFplEHVDo","annotations":[{"segment":[5.2344511700468015,19.305887550702028],"label":"Using uneven bars"}]},"o4z1nEiyr4E":{"duration":22.176,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=o4z1nEiyr4E","annotations":[{"segment":[5.152438370380817,19.05091656352723],"label":"Rope skipping"}]},"4kURX9KFGj4":{"duration":15.4,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4kURX9KFGj4","annotations":[]},"LWAxCIy4PBw":{"duration":69.591,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LWAxCIy4PBw","annotations":[{"segment":[0.01,69.59020408163265],"label":"Mooping floor"}]},"nbgE4MAR0pc":{"duration":20.39,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=nbgE4MAR0pc","annotations":[{"segment":[1.6380954500000002,16.622540450000002],"label":"Pole vault"}]},"M3jMJB_t024":{"duration":78.554,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=M3jMJB_t024","annotations":[{"segment":[0.01,77.01140873343073],"label":"Table soccer"}]},"c-TD6-GESQk":{"duration":52.15,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=c-TD6-GESQk","annotations":[{"segment":[27.784516184867396,43.649759687207485],"label":"Clean and jerk"}]},"sy-xNiKnfBU":{"duration":201.92000000000002,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sy-xNiKnfBU","annotations":[{"segment":[0.01,200.03067202960227],"label":"Windsurfing"}]},"N-6-MdxRg50":{"duration":134.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=N-6-MdxRg50","annotations":[{"segment":[8.182680898595944,127.35608475507019],"label":"Zumba"}]},"3baH1vI0Cxk":{"duration":94.738,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3baH1vI0Cxk","annotations":[{"segment":[22.686663695119233,33.62355694227769],"label":"Table soccer"},{"segment":[35.101515489190994,39.23979942054825],"label":"Table soccer"},{"segment":[51.95024292400268,74.85860040115891],"label":"Table soccer"},{"segment":[80.62263873412078,83.2829649976599],"label":"Table soccer"}]},"hJI5PEg2UuA":{"duration":92.695,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hJI5PEg2UuA","annotations":[{"segment":[39.15802028081123,45.80476911076443],"label":"Sharpening knives"}]},"MaYV3jnGr6g":{"duration":160.961,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=MaYV3jnGr6g","annotations":[{"segment":[49.970646488444565,55.74614778159127],"label":"Putting in contact lenses"},{"segment":[76.08595845865834,82.36367780343213],"label":"Putting in contact lenses"}]},"4HxmQpkryjA":{"duration":66.57,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=4HxmQpkryjA","annotations":[{"segment":[10.860500780031199,13.45870670826833],"label":"Playing water polo"},{"segment":[18.655118564742587,21.772965678627145],"label":"Playing water polo"},{"segment":[27.800803432137283,30.191152886115443],"label":"Playing water polo"},{"segment":[35.59542121684867,41.20754602184087],"label":"Playing water polo"}]},"GUSlicDnqIA":{"duration":101.169,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GUSlicDnqIA","annotations":[]},"sn1OtmEJxng":{"duration":155.365,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sn1OtmEJxng","annotations":[{"segment":[36.124166775111526,39.75418351701078],"label":"Running a marathon"},{"segment":[46.04621253630282,47.74022034918914],"label":"Running a marathon"},{"segment":[53.30624602010132,55.242254949114255],"label":"Running a marathon"},{"segment":[58.63027057488689,59.84027615551998],"label":"Running a marathon"},{"segment":[73.39233865861051,75.57034870375006],"label":"Running a marathon"},{"segment":[81.62037660691549,85.25039334881474],"label":"Running a marathon"},{"segment":[89.12241120684061,91.78442348423339],"label":"Running a marathon"},{"segment":[120.09855287033965,124.45457291706862],"label":"Running a marathon"}]},"yTJCrP0HqEE":{"duration":147.888,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yTJCrP0HqEE","annotations":[{"segment":[74.86680293334182,79.71180404767212],"label":"Putting in contact lenses"}]},"9i7ABIB99Ow":{"duration":171.29,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=9i7ABIB99Ow","annotations":[{"segment":[22.67062855,163.6327346723869],"label":"Preparing pasta"}]},"7OTqYfUuAIw":{"duration":31.58,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7OTqYfUuAIw","annotations":[{"segment":[6.27506309,20.6747713],"label":"Doing kickboxing"}]},"HO36c47J3vU":{"duration":87.168,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HO36c47J3vU","annotations":[]},"hXaydigjzDE":{"duration":66.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=hXaydigjzDE","annotations":[]},"rrXdhD3WDy4":{"duration":94.46000000000001,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=rrXdhD3WDy4","annotations":[{"segment":[6.407031936037442,78.44453526000001],"label":"Getting a piercing"}]},"coYI2JYy6os":{"duration":8.104,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=coYI2JYy6os","annotations":[]},"cJcr6kWylI8":{"duration":145.5,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cJcr6kWylI8","annotations":[{"segment":[3.631722783151326,30.64266098283931],"label":"Polishing forniture"},{"segment":[71.04557694539781,94.1978096879875],"label":"Polishing forniture"}]},"6df3LoaxV4Q":{"duration":180.91,"subset":"testing","resolution":"426x236","url":"https://www.youtube.com/watch?v=6df3LoaxV4Q","annotations":[]},"gfwSnetklbE":{"duration":118.956,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=gfwSnetklbE","annotations":[{"segment":[0.2,92.13063690960831],"label":"Removing ice from car"}]},"Msj_JClcTZI":{"duration":200.551,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Msj_JClcTZI","annotations":[]},"99PtaOQbBIY":{"duration":150.652,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=99PtaOQbBIY","annotations":[{"segment":[51.219669945981515,59.44301145107029],"label":"Clipping cat claws"},{"segment":[109.01801175663027,116.7714479375975],"label":"Clipping cat claws"}]},"v621l04N1QQ":{"duration":69.359,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=v621l04N1QQ","annotations":[{"segment":[0.2,66.22012142901715],"label":"Waterskiing"}]},"JHFjlLtpIcU":{"duration":42.17,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JHFjlLtpIcU","annotations":[{"segment":[10.525413451912225,40.391272549141966],"label":"Using uneven bars"}]},"OZpxc1X8Bsc":{"duration":71.03,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OZpxc1X8Bsc","annotations":[{"segment":[5.429738256196914,68.03794346333854],"label":"Rock climbing"}]},"Nn9qwOB-g34":{"duration":126.642,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Nn9qwOB-g34","annotations":[{"segment":[7.9027269890795635,126.64120181405896],"label":"Futsal"}]},"VkdPyMG-Gvo":{"duration":212.347,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=VkdPyMG-Gvo","annotations":[{"segment":[0.01,189.9872597096807],"label":"Playing pool"}]},"NdFbQ0l3R9k":{"duration":88.91,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=NdFbQ0l3R9k","annotations":[{"segment":[16.505765297278558,56.036379664710395],"label":"Using the balance beam"}]},"mW23e1rzy-k":{"duration":126.29,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mW23e1rzy-k","annotations":[]},"B5s4uYNIM24":{"duration":146.31,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=B5s4uYNIM24","annotations":[{"segment":[16.890531747269893,123.7117325273011],"label":"Cleaning windows"}]},"jLykQW_-IVw":{"duration":88.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jLykQW_-IVw","annotations":[{"segment":[10.82045328,84.68305090405616],"label":"Smoking a cigarette"}]},"IRBrTKrVCxo":{"duration":24.92,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IRBrTKrVCxo","annotations":[]},"7cpX1j9dwjA":{"duration":92.16,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7cpX1j9dwjA","annotations":[{"segment":[7.758140405616225,85.48507020280812],"label":"Bullfighting"}]},"DhBaapZUYjA":{"duration":55.22,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DhBaapZUYjA","annotations":[]},"jW1isCO6MYk":{"duration":26.123,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=jW1isCO6MYk","annotations":[{"segment":[5.646998459667692,23.52239122952836],"label":"Using the monkey bar"}]},"XSNenkxgryQ":{"duration":188.18,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=XSNenkxgryQ","annotations":[{"segment":[9.981175310452418,27.888578073322936],"label":"Smoking a cigarette"},{"segment":[62.52912768018721,85.72068207800312],"label":"Smoking a cigarette"}]},"5FwNVOIGdVs":{"duration":96.387,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=5FwNVOIGdVs","annotations":[{"segment":[15.036762286818004,86.31101529485179],"label":"Blowing leaves"}]},"mUsjm4oBBvw":{"duration":232.15,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mUsjm4oBBvw","annotations":[{"segment":[38.563744039001556,192.81872019500778],"label":"Getting a haircut"}]},"RpgTxW7lYJM":{"duration":221.98,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RpgTxW7lYJM","annotations":[{"segment":[4.501021840873635,213.97165366614666],"label":"Painting"}]},"EsVxUf4Ae2c":{"duration":85.171,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=EsVxUf4Ae2c","annotations":[{"segment":[11.42697375351014,47.30235646801872],"label":"Ice fishing"}]},"bWEx3UqVvXU":{"duration":115.218,"subset":"testing","resolution":"636x360","url":"https://www.youtube.com/watch?v=bWEx3UqVvXU","annotations":[]},"cqxiLraI8uY":{"duration":24.5,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cqxiLraI8uY","annotations":[]},"08wKlw4a_so":{"duration":25.31,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=08wKlw4a_so","annotations":[{"segment":[0.2,25.109750566893425],"label":"Raking leaves"}]},"koSLx0E23gU":{"duration":122.49,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=koSLx0E23gU","annotations":[{"segment":[5.732533138060217,98.59956939781591],"label":"Tumbling"}]},"iHm8ZXs2XdY":{"duration":222.541,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iHm8ZXs2XdY","annotations":[{"segment":[3.818939929797192,208.3058143525741],"label":"Braiding hair"}]},"Svr-YN9ujkA":{"duration":225.12,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Svr-YN9ujkA","annotations":[{"segment":[17.384265656340542,43.72406210534135],"label":"Doing nails"},{"segment":[83.40935542183591,146.2736696134512],"label":"Doing nails"},{"segment":[185.6077645577223,213.35234998829955],"label":"Doing nails"}]},"TdAfqkmTrf0":{"duration":115.891,"subset":"training","resolution":"176x144","url":"https://www.youtube.com/watch?v=TdAfqkmTrf0","annotations":[{"segment":[6.308174445044187,18.203594022792384],"label":"Hurling"},{"segment":[20.907098494021,31.901350010350704],"label":"Hurling"},{"segment":[35.50602263865552,44.1572369465871],"label":"Hurling"},{"segment":[51.366582203196735,68.66901081905988],"label":"Hurling"},{"segment":[79.48302870397434,84.1691031207706],"label":"Hurling"},{"segment":[98.76802726540514,107.23900794192147],"label":"Hurling"}]},"vSV7arHrH5k":{"duration":152.05,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=vSV7arHrH5k","annotations":[{"segment":[26.97108516,135.7676898],"label":"Hand washing clothes"}]},"K1TizK5Sg78":{"duration":221.22,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K1TizK5Sg78","annotations":[{"segment":[21.57759786,116.67746922],"label":"Smoking a cigarette"}]},"Z5EajmUZLbk":{"duration":106.534,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=Z5EajmUZLbk","annotations":[]},"Yd3G3732WbI":{"duration":93.37,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=Yd3G3732WbI","annotations":[{"segment":[19.14217394695788,28.458517160686426],"label":"Wrapping presents"},{"segment":[29.768627925117006,32.679985179407176],"label":"Wrapping presents"},{"segment":[34.57236739469579,39.23053900156006],"label":"Wrapping presents"},{"segment":[41.122921216848674,47.382339313572544],"label":"Wrapping presents"},{"segment":[49.420289391575665,61.64798985959438],"label":"Wrapping presents"},{"segment":[66.16059360374415,78.9705655226209],"label":"Wrapping presents"}]},"TTDruR5Vin4":{"duration":173.25,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=TTDruR5Vin4","annotations":[{"segment":[7.971903276131044,167.9504368174727],"label":"Doing motocross"}]},"50b9lVikSeo":{"duration":36.13,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=50b9lVikSeo","annotations":[{"segment":[14.654858221528862,26.94239319188768],"label":"Vacuuming floor"}]},"x7-2_HigN8c":{"duration":74.235,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=x7-2_HigN8c","annotations":[{"segment":[13.560200963800924,18.185385761308712],"label":"Throwing darts"},{"segment":[33.911014072835194,47.902198085296256],"label":"Throwing darts"},{"segment":[55.6493826211218,62.240270957570395],"label":"Throwing darts"}]},"rvqULxGTSZw":{"duration":9.31,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rvqULxGTSZw","annotations":[]},"dEG-OgH9zmU":{"duration":94.32,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=dEG-OgH9zmU","annotations":[{"segment":[0.01,94.319455],"label":"Scuba diving"}]},"4w7sVSMbjyM":{"duration":189.985,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4w7sVSMbjyM","annotations":[{"segment":[10.670023241745996,24.303941828421433],"label":"Snowboarding"},{"segment":[29.638953449294434,32.602848794223874],"label":"Snowboarding"},{"segment":[55.4248429501806,62.53819177801126],"label":"Snowboarding"},{"segment":[68.7623720023631,82.39628989079563],"label":"Snowboarding"},{"segment":[88.91685959438378,95.14103976599064],"label":"Snowboarding"},{"segment":[107.58940010920436,119.74137092043682],"label":"Snowboarding"},{"segment":[152.9369985023401,170.42398088923557],"label":"Snowboarding"},{"segment":[174.86982386895477,177.53732965678628],"label":"Snowboarding"}]},"AKoregkLJgc":{"duration":84.15,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AKoregkLJgc","annotations":[{"segment":[0.01,84.081],"label":"Rope skipping"}]},"AwoZxz8M8Jg":{"duration":179.607,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AwoZxz8M8Jg","annotations":[{"segment":[34.464136075647104,47.9135550319972],"label":"Making a lemonade"},{"segment":[62.483758901376454,105.35378182474237],"label":"Making a lemonade"},{"segment":[116.28143472677681,159.43165387840003],"label":"Making a lemonade"}]},"wMHwxGyHVaU":{"duration":88.24,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=wMHwxGyHVaU","annotations":[]},"7Zd7KlliqQw":{"duration":125.97,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7Zd7KlliqQw","annotations":[{"segment":[48.14699217628706,101.0104244024961],"label":"Getting a haircut"}]},"fNPEa0Sd4hM":{"duration":105.65,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fNPEa0Sd4hM","annotations":[{"segment":[11.612152886115444,21.988970358814353],"label":"Triple jump"},{"segment":[35.495304212168485,43.401450858034316],"label":"Triple jump"}]},"99KPQLxH4cw":{"duration":176.867,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=99KPQLxH4cw","annotations":[{"segment":[0.20000000000000004,28.133700468018716],"label":"Snowboarding"}]},"K-Jzu2AmuW4":{"duration":170.04,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=K-Jzu2AmuW4","annotations":[{"segment":[2.9173853354134165,167.35183151326052],"label":"Zumba"}]},"cvFFwMKFg7Q":{"duration":7.617,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=cvFFwMKFg7Q","annotations":[{"segment":[0.2,7.416144],"label":"Raking leaves"}]},"heLlfK--Bx0":{"duration":80.922,"subset":"training","resolution":"1920x1072","url":"https://www.youtube.com/watch?v=heLlfK--Bx0","annotations":[{"segment":[8.702073322932916,76.31966458658347],"label":"Rope skipping"}]},"aDsRltSdQak":{"duration":147.77,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=aDsRltSdQak","annotations":[{"segment":[33.42721371439892,135.32258226193449],"label":"Windsurfing"}]},"WLpjci5dN8s":{"duration":109.552,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=WLpjci5dN8s","annotations":[{"segment":[18.946020280811236,49.49861154446178],"label":"Sharpening knives"}]},"8kNk_fzmTyI":{"duration":84.312,"subset":"training","resolution":"853x474","url":"https://www.youtube.com/watch?v=8kNk_fzmTyI","annotations":[{"segment":[37.069908020272614,39.30245964112864],"label":"Swimming"},{"segment":[45.21215510810048,47.707359108927626],"label":"Swimming"}]},"ByIIq3jFOKo":{"duration":193.027,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ByIIq3jFOKo","annotations":[{"segment":[20.527278561395352,138.98962793728276],"label":"Rafting"},{"segment":[175.37019716185736,181.98484611178],"label":"Rafting"}]},"n7AaZhEOJSY":{"duration":121.05,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=n7AaZhEOJSY","annotations":[{"segment":[5.330666697465914,113.36665180961545],"label":"Ballet"}]},"gD3VQ0Kn7Fg":{"duration":38.36,"subset":"testing","resolution":"640x350","url":"https://www.youtube.com/watch?v=gD3VQ0Kn7Fg","annotations":[]},"6U081DbNJIY":{"duration":121.02,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=6U081DbNJIY","annotations":[{"segment":[23.033905095163806,37.19409265366615],"label":"Javelin throw"},{"segment":[39.27092016224649,51.35428021216849],"label":"Javelin throw"},{"segment":[66.83608527613104,72.50016029953198],"label":"Javelin throw"},{"segment":[80.42986533229329,120.822403],"label":"Javelin throw"}]},"S5Aj9b5T284":{"duration":185.737,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=S5Aj9b5T284","annotations":[{"segment":[0.2,176.68683307332293],"label":"Table soccer"}]},"Ii3jLIcf92s":{"duration":31.115,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Ii3jLIcf92s","annotations":[{"segment":[0.2,30.093367601246104],"label":"Playing drums"}]},"7ToBC5eGg38":{"duration":9.08,"subset":"training","resolution":"160x120","url":"https://www.youtube.com/watch?v=7ToBC5eGg38","annotations":[{"segment":[0,3.895047659906396],"label":"Tennis serve with ball bouncing"}]},"CQocaUwWcQI":{"duration":37.99,"subset":"validation","resolution":"960x720","url":"https://www.youtube.com/watch?v=CQocaUwWcQI","annotations":[{"segment":[0.01,9.422881741029641],"label":"Long jump"},{"segment":[10.608149884555383,18.253129410296413],"label":"Long jump"},{"segment":[19.438397553822153,27.379694115444618],"label":"Long jump"},{"segment":[28.564962258970358,37.573000149765996],"label":"Long jump"}]},"qBEdCDpoeTc":{"duration":145.36,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=qBEdCDpoeTc","annotations":[{"segment":[18.814326053042123,132.3803182527301],"label":"Shot put"}]},"6dJrQV3Jzbw":{"duration":162.75,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=6dJrQV3Jzbw","annotations":[{"segment":[2.7928760334086835,159.19393390429497],"label":"Playing lacrosse"}]},"R_AsoAmxd4o":{"duration":220.079,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=R_AsoAmxd4o","annotations":[{"segment":[6.864929797191888,213.1560702028081],"label":"Waterskiing"}]},"0DMrpd7yoAc":{"duration":19.018,"subset":"testing","resolution":"427x240","url":"https://www.youtube.com/watch?v=0DMrpd7yoAc","annotations":[]},"ODUY_0YRmrA":{"duration":191.495,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ODUY_0YRmrA","annotations":[]},"RfchfRzuV8I":{"duration":52.15,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=RfchfRzuV8I","annotations":[{"segment":[10.887447737909516,45.74353042121685],"label":"Playing bagpipes"}]},"GPX1sFm4kwg":{"duration":158.988,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=GPX1sFm4kwg","annotations":[]},"zH-fOiPLLxA":{"duration":39.567,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zH-fOiPLLxA","annotations":[{"segment":[0.2,35.93922897196262],"label":"Cleaning sink"}]},"1qU2CdUQbw0":{"duration":215.39,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=1qU2CdUQbw0","annotations":[{"segment":[0.01,153.2247486552262],"label":"Doing kickboxing"}]},"ta9cB1q4AzU":{"duration":179.026,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ta9cB1q4AzU","annotations":[{"segment":[120.61561310452419,166.97260842433698],"label":"Having an ice cream"}]},"h_r8gioFi4k":{"duration":171.32,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=h_r8gioFi4k","annotations":[]},"t0YXK4W_L7o":{"duration":93.03999999999999,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t0YXK4W_L7o","annotations":[{"segment":[5.515772973776094,51.09347596761013],"label":"Horseback riding"},{"segment":[53.12560230889235,93.04238],"label":"Horseback riding"}]},"aOxRwTlkY80":{"duration":156.67000000000002,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aOxRwTlkY80","annotations":[]},"mHe2twk5BDE":{"duration":237.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mHe2twk5BDE","annotations":[{"segment":[48.23157693654685,111.30363908433888],"label":"Playing water polo"}]},"Xl2kHRcZADw":{"duration":73.59,"subset":"testing","resolution":"634x360","url":"https://www.youtube.com/watch?v=Xl2kHRcZADw","annotations":[]},"Sd7oYdE2uyw":{"duration":161.565,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Sd7oYdE2uyw","annotations":[]},"Y-dpHbPGQWE":{"duration":233.92000000000002,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=Y-dpHbPGQWE","annotations":[{"segment":[8.755569397815913,215.24108102964118],"label":"Playing guitarra"}]},"ZqwgoryWdg4":{"duration":70.26,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ZqwgoryWdg4","annotations":[]},"JAO5R6VA8r4":{"duration":40.798,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JAO5R6VA8r4","annotations":[{"segment":[5.0871762870514825,34.592798751950085],"label":"Rope skipping"}]},"ZBoa0UN86Qw":{"duration":130.73,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ZBoa0UN86Qw","annotations":[{"segment":[12.48713856,37.81798049921997],"label":"Layup drill in basketball"},{"segment":[45.157319032761315,82.87336427457099],"label":"Layup drill in basketball"},{"segment":[87.56238611544462,99.99848751950078],"label":"Layup drill in basketball"}]},"j-FGgYaQVio":{"duration":152.532,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=j-FGgYaQVio","annotations":[{"segment":[15.94324634481978,145.63084594071762],"label":"Grooming dog"}]},"Fb4X1PWfOvY":{"duration":28.35,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Fb4X1PWfOvY","annotations":[]},"4DmsWE6byuw":{"duration":114.89,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=4DmsWE6byuw","annotations":[{"segment":[10.575113425897037,87.28949556630266],"label":"Breakdancing"}]},"LWSHCLmbmc8":{"duration":88.26,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=LWSHCLmbmc8","annotations":[]},"_W8m7v1Ir5I":{"duration":119.049,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_W8m7v1Ir5I","annotations":[{"segment":[65.73078627145085,113.07923400936038],"label":"Running a marathon"}]},"XuSWH1lfcvI":{"duration":57.63,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XuSWH1lfcvI","annotations":[{"segment":[8.001867617561846,46.84239358145755],"label":"Using parallel bars"}]},"2VZumBKJuPs":{"duration":147.1,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2VZumBKJuPs","annotations":[]},"qX-cRt1Ma3w":{"duration":52.663,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qX-cRt1Ma3w","annotations":[]},"lcOepzWnOO4":{"duration":180.49,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=lcOepzWnOO4","annotations":[{"segment":[20.88150702028081,176.36407956318251],"label":"Cumbia"}]},"7LhKz7863kg":{"duration":20.573,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7LhKz7863kg","annotations":[{"segment":[1.3519371504739066,19.58128294486643],"label":"Doing crunches"}]},"c81l0bAPiFo":{"duration":57.47,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=c81l0bAPiFo","annotations":[{"segment":[0.01,56.50550858034322],"label":"Canoeing"}]},"P00O62PPzNU":{"duration":136.069,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=P00O62PPzNU","annotations":[{"segment":[13.89713806552262,28.11253120124805],"label":"Tug of war"},{"segment":[84.54976365054603,120.83084165366616],"label":"Tug of war"}]},"3QC7SNC7ivY":{"duration":24.056,"subset":"testing","resolution":"543x360","url":"https://www.youtube.com/watch?v=3QC7SNC7ivY","annotations":[]},"7EEpIeXhO54":{"duration":140.27,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7EEpIeXhO54","annotations":[{"segment":[0,30.182111069999998],"label":"Plataform diving"},{"segment":[38.88031578,135.19352574102965],"label":"Plataform diving"}]},"hniQpwn3Ob4":{"duration":157.76,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hniQpwn3Ob4","annotations":[{"segment":[5.657820592823714,155.46707020280812],"label":"Shot put"}]},"rDxEl9bPodU":{"duration":73.52,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rDxEl9bPodU","annotations":[{"segment":[25.891309765004667,72.00467892388318],"label":"Horseback riding"}]},"kxj4wRdROQg":{"duration":85.009,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=kxj4wRdROQg","annotations":[]},"UQObeXnSDzc":{"duration":112.966,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UQObeXnSDzc","annotations":[]},"JowsnupmNd0":{"duration":19.064,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=JowsnupmNd0","annotations":[{"segment":[0.2527932192117617,16.22337659764894],"label":"Using the monkey bar"}]},"Wgm-mYzdnxc":{"duration":32.694,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Wgm-mYzdnxc","annotations":[{"segment":[0.2,11.107797191887675],"label":"Baking cookies"},{"segment":[14.878333853354134,31.540029641185647],"label":"Baking cookies"}]},"njPRhj-YgE0":{"duration":110.969,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=njPRhj-YgE0","annotations":[{"segment":[28.391213098907954,36.35460213884555],"label":"Grooming dog"},{"segment":[50.203974382215286,59.20606634040562],"label":"Grooming dog"},{"segment":[67.68880683946958,72.88232143073323],"label":"Grooming dog"},{"segment":[89.15533381669267,90.88650534711388],"label":"Grooming dog"}]},"jwd8ELi0au8":{"duration":123.925,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=jwd8ELi0au8","annotations":[{"segment":[7.721177442939792,93.23321762349799],"label":"Throwing darts"}]},"HhAXDtayrAY":{"duration":154.79,"subset":"testing","resolution":"540x360","url":"https://www.youtube.com/watch?v=HhAXDtayrAY","annotations":[]},"tS2d90ZGmeA":{"duration":137.579,"subset":"validation","resolution":"855x470","url":"https://www.youtube.com/watch?v=tS2d90ZGmeA","annotations":[{"segment":[6.437176287051482,14.16178783151326],"label":"Running a marathon"},{"segment":[120.160624024961,127.02694539781591],"label":"Running a marathon"}]},"0k0xNs8Zjb4":{"duration":116.53999999999999,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0k0xNs8Zjb4","annotations":[]},"IiCN1md2MV4":{"duration":12.19,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=IiCN1md2MV4","annotations":[{"segment":[0.3518311817472699,11.610428997659906],"label":"Playing racquetball"}]},"W_LjBXWG7wI":{"duration":99.684,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=W_LjBXWG7wI","annotations":[{"segment":[12.904622464898598,32.028340093603745],"label":"Playing beach volleyball"}]},"x2uByejBIiY":{"duration":228.57999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=x2uByejBIiY","annotations":[{"segment":[14.620384673890355,196.48370622716064],"label":"Preparing pasta"}]},"iOa_svsqGxQ":{"duration":169.32,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=iOa_svsqGxQ","annotations":[{"segment":[26.364641744548283,152.91492211838005],"label":"Surfing"}]},"HMIv7qpDmH0":{"duration":37.593,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=HMIv7qpDmH0","annotations":[{"segment":[0.01,36.004734357186194],"label":"Playing rubik cube"}]},"TN73eY7B208":{"duration":193.52,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TN73eY7B208","annotations":[{"segment":[0.01,192.60941409984397],"label":"Zumba"}]},"SLv4rEkiYaU":{"duration":73.631,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=SLv4rEkiYaU","annotations":[{"segment":[35.20708416908105,40.95049104821335],"label":"River tubing"},{"segment":[49.68046950449446,68.8634473673947],"label":"River tubing"}]},"S4frK_Og5Yc":{"duration":27.748,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=S4frK_Og5Yc","annotations":[]},"B53aHHzgTzc":{"duration":151.35,"subset":"validation","resolution":"1920x1440","url":"https://www.youtube.com/watch?v=B53aHHzgTzc","annotations":[{"segment":[5.902795865834634,12.277815400936039],"label":"Triple jump"},{"segment":[21.722288786271452,29.986202998439943],"label":"Triple jump"},{"segment":[34.94455152574103,43.91680124180967],"label":"Triple jump"},{"segment":[48.87514976911076,57.13906398127926],"label":"Triple jump"},{"segment":[65.40297819344774,81.22247111388455],"label":"Triple jump"},{"segment":[87.59749064898597,96.09751669578783],"label":"Triple jump"},{"segment":[98.45863504212168,109.08366760062403],"label":"Triple jump"},{"segment":[112.15312145085804,126.31983152886116],"label":"Triple jump"},{"segment":[131.51429189079565,149.22267948829955],"label":"Triple jump"}]},"WkiBckpLXfU":{"duration":65.016,"subset":"training","resolution":"853x480","url":"https://www.youtube.com/watch?v=WkiBckpLXfU","annotations":[{"segment":[12.678602382190524,17.64861451600921],"label":"Playing ice hockey"},{"segment":[34.28294030577223,37.1229471950078],"label":"Playing ice hockey"},{"segment":[38.64437945709829,43.81724914820593],"label":"Playing ice hockey"},{"segment":[48.07725948205928,50.41012228393136],"label":"Playing ice hockey"}]},"oL1gGyMz79s":{"duration":137.393,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oL1gGyMz79s","annotations":[{"segment":[19.283447737909515,51.422527301092046],"label":"Playing ten pins"},{"segment":[92.13202808112325,98.78848829953198],"label":"Playing ten pins"},{"segment":[104.3600733229329,119.57478627145085],"label":"Playing ten pins"}]},"kAmDOTsp5cs":{"duration":196.72,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kAmDOTsp5cs","annotations":[{"segment":[6.751666224472109,8.899923659531416],"label":"Rope skipping"},{"segment":[40.50999734683265,196.719001],"label":"Rope skipping"}]},"evEfF66Q2IM":{"duration":22.385,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=evEfF66Q2IM","annotations":[]},"Po6rrhjsS8g":{"duration":168.25,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Po6rrhjsS8g","annotations":[{"segment":[38.3225609078785,109.2457834],"label":"Playing squash"},{"segment":[136.7537949921997,149.87795957878313],"label":"Playing squash"}]},"TuMig8MQ0hk":{"duration":4.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TuMig8MQ0hk","annotations":[]},"fzwNgtDwgKQ":{"duration":83.987,"subset":"training","resolution":"800x480","url":"https://www.youtube.com/watch?v=fzwNgtDwgKQ","annotations":[{"segment":[4.847900389484967,80.31791726362931],"label":"Cutting the grass"}]},"QMCHIR3nDLs":{"duration":87.26,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=QMCHIR3nDLs","annotations":[{"segment":[1.1571216817472698,7.827587847113885],"label":"Chopping wood"},{"segment":[10.686359060842433,16.676165413416538],"label":"Chopping wood"}]},"_zR5wVbz3xU":{"duration":59.1,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_zR5wVbz3xU","annotations":[{"segment":[5.338684287051482,56.33232385647426],"label":"Using parallel bars"}]},"I-vi5EpjrFI":{"duration":63.693,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=I-vi5EpjrFI","annotations":[{"segment":[16.11152379125614,55.69587066508792],"label":"Gargling mouthwash"}]},"x9geCRHcUf0":{"duration":69.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=x9geCRHcUf0","annotations":[]},"_jxYsp9_lXk":{"duration":201.944,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_jxYsp9_lXk","annotations":[]},"_nvQDglPcmc":{"duration":237.07999999999998,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_nvQDglPcmc","annotations":[{"segment":[28.103203288611546,221.12783640249611],"label":"Playing water polo"}]},"fppEk17hs8M":{"duration":36.851,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fppEk17hs8M","annotations":[{"segment":[0,36.821],"label":"Playing ice hockey"}]},"v3t4Z5cEgZM":{"duration":51.11,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=v3t4Z5cEgZM","annotations":[{"segment":[4.76144643,41.97336861],"label":"Smoking a cigarette"}]},"t6z_3QbsUaE":{"duration":237.29,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t6z_3QbsUaE","annotations":[]},"xBtOxEIETtk":{"duration":38.22,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xBtOxEIETtk","annotations":[{"segment":[2.7892956,37.309817666926676],"label":"Breakdancing"}]},"yACg55C3IlM":{"duration":176.332,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=yACg55C3IlM","annotations":[{"segment":[0.01,176.281],"label":"Hand car wash"}]},"dNUZyM1gF1E":{"duration":33.066,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dNUZyM1gF1E","annotations":[{"segment":[0.01,31.077180962700172],"label":"Rock-paper-scissors"}]},"jzVxdBzCuoM":{"duration":221.12,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=jzVxdBzCuoM","annotations":[{"segment":[0,28.632232658013805],"label":"Cheerleading"},{"segment":[38.291299096861835,74.51279824254195],"label":"Cheerleading"},{"segment":[94.86583109582887,107.28463080291918],"label":"Cheerleading"},{"segment":[119.35846385147923,218.36389374570985],"label":"Cheerleading"}]},"s789WxU8PFY":{"duration":167.416,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=s789WxU8PFY","annotations":[{"segment":[29.565429369786816,73.10534681526968],"label":"Baking cookies"},{"segment":[89.00914899595504,109.08444027321362],"label":"Baking cookies"},{"segment":[115.60239198660925,117.68813653489585],"label":"Baking cookies"},{"segment":[131.76691223583043,134.6348109897245],"label":"Baking cookies"},{"segment":[140.89204463458432,160.1851817062354],"label":"Baking cookies"}]},"m6YWqN_nBTQ":{"duration":60.72,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=m6YWqN_nBTQ","annotations":[]},"2NHvn4FFobE":{"duration":38.964,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=2NHvn4FFobE","annotations":[]},"O7ergU8WZ1o":{"duration":184.447,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O7ergU8WZ1o","annotations":[]},"RF0ChBe9HHI":{"duration":82.36,"subset":"validation","resolution":"320x256","url":"https://www.youtube.com/watch?v=RF0ChBe9HHI","annotations":[{"segment":[6.1674416037866004,76.32208884243369],"label":"Playing violin"}]},"kZB7yxzHOrA":{"duration":150.28,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=kZB7yxzHOrA","annotations":[{"segment":[3.8659594212168487,5.974664560062402],"label":"Springboard diving"},{"segment":[10.66067597971919,19.329797106084243],"label":"Springboard diving"},{"segment":[24.484409667706707,126.17085747425897],"label":"Springboard diving"},{"segment":[138.3544871653666,143.5090997269891],"label":"Springboard diving"}]},"b6VAlwv45q4":{"duration":234.103,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=b6VAlwv45q4","annotations":[{"segment":[0.01,234.121],"label":"Sumo"}]},"4Smm8f31YLA":{"duration":11.12,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4Smm8f31YLA","annotations":[]},"eFq3gsEi9yI":{"duration":147.842,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=eFq3gsEi9yI","annotations":[{"segment":[0.01,147.821],"label":"Croquet"}]},"Z0oEaM7h6jM":{"duration":196.023,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Z0oEaM7h6jM","annotations":[{"segment":[18.95410608424337,74.89929017160686],"label":"Rafting"},{"segment":[102.10760374414977,180.6938705148206],"label":"Rafting"}]},"Th0SMArnWiY":{"duration":207.773,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Th0SMArnWiY","annotations":[{"segment":[21.717214245032384,86.54471945408429],"label":"BMX"},{"segment":[97.56539533962311,152.66877476731722],"label":"BMX"},{"segment":[174.06185048517943,194.15837698439938],"label":"BMX"}]},"IrO7Fgr102M":{"duration":51.62,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=IrO7Fgr102M","annotations":[{"segment":[0,21.1634992199688],"label":"Playing polo"},{"segment":[26.876839313572543,51.381],"label":"Playing polo"}]},"oHR5-JjR-Tg":{"duration":28.306,"subset":"testing","resolution":"400x226","url":"https://www.youtube.com/watch?v=oHR5-JjR-Tg","annotations":[]},"cudw2faobPA":{"duration":152.509,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cudw2faobPA","annotations":[]},"DZBu_U_Jt4c":{"duration":26.146,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=DZBu_U_Jt4c","annotations":[{"segment":[0.01,26.101],"label":"Playing blackjack"}]},"-uICMT1yOcM":{"duration":105.906,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=-uICMT1yOcM","annotations":[{"segment":[2.9722184087363495,10.237641185647426],"label":"Having an ice cream"},{"segment":[25.428979719188767,32.69440249609985],"label":"Having an ice cream"},{"segment":[62.74683307332293,66.21629797191888],"label":"Having an ice cream"},{"segment":[94.12291731669266,99.90239469578783],"label":"Having an ice cream"}]},"mD0aPovD2qw":{"duration":172.46,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mD0aPovD2qw","annotations":[]},"R6INcHHxlNs":{"duration":42.052,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=R6INcHHxlNs","annotations":[{"segment":[0.2,41.98573329173167],"label":"Mooping floor"}]},"d-sWAcDYsbc":{"duration":212.834,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=d-sWAcDYsbc","annotations":[{"segment":[0.2,212.58100000000002],"label":"Playing drums"}]},"OK84hJx9daQ":{"duration":121.86,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=OK84hJx9daQ","annotations":[{"segment":[8.935009569090246,115.01448487871488],"label":"Preparing pasta"}]},"l_cjSGt5j40":{"duration":221.1,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=l_cjSGt5j40","annotations":[{"segment":[0,217.26744149765992],"label":"Playing piano"}]},"gXk9TiqGUHs":{"duration":215.83,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gXk9TiqGUHs","annotations":[{"segment":[28.11507246684425,34.51251410600642],"label":"Surfing"},{"segment":[60.438988117347826,67.50984466589549],"label":"Surfing"},{"segment":[110.94510632125966,119.36279268857831],"label":"Surfing"},{"segment":[131.48426105751713,134.51462814975184],"label":"Surfing"}]},"hYVFzI_kfaQ":{"duration":140.02,"subset":"training","resolution":"648x480","url":"https://www.youtube.com/watch?v=hYVFzI_kfaQ","annotations":[{"segment":[17.037790670826833,138.04979107644306],"label":"Preparing salad"}]},"9Ze3-UiuRT0":{"duration":48.44,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9Ze3-UiuRT0","annotations":[]},"-9KxEw6L9T8":{"duration":125.806,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=-9KxEw6L9T8","annotations":[]},"UOkpGU7NAag":{"duration":53.824,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=UOkpGU7NAag","annotations":[{"segment":[3.432001910730596,26.618949351031386],"label":"Using the monkey bar"}]},"WvxwmaDFGqk":{"duration":11.494,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=WvxwmaDFGqk","annotations":[{"segment":[2.787546021840874,11.181000000000001],"label":"Kneeling"}]},"sp5pM_rJrcI":{"duration":11.053,"subset":"testing","resolution":"626x360","url":"https://www.youtube.com/watch?v=sp5pM_rJrcI","annotations":[]},"oQuAwR_t5Ig":{"duration":216.95,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=oQuAwR_t5Ig","annotations":[{"segment":[0.01,216.94360544217687],"label":"Skateboarding"}]},"iR-0hVp5ISY":{"duration":74.0,"subset":"testing","resolution":"848x480","url":"https://www.youtube.com/watch?v=iR-0hVp5ISY","annotations":[]},"AISkvED80lU":{"duration":175.45,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=AISkvED80lU","annotations":[{"segment":[1.2317055514820594,4.51625368876755],"label":"Cheerleading"},{"segment":[7.25337713650546,12.180199342433697],"label":"Cheerleading"},{"segment":[12.453911687207489,21.212706719968796],"label":"Cheerleading"},{"segment":[22.307556099063962,33.529762234789395],"label":"Cheerleading"},{"segment":[35.99317333775351,44.478256025741025],"label":"Cheerleading"},{"segment":[49.95250292121685,70.20721643447737],"label":"Cheerleading"},{"segment":[71.57577815834634,82.25055960452418],"label":"Cheerleading"},{"segment":[94.29390277457097,99.76814967004681],"label":"Cheerleading"},{"segment":[102.23156077301093,106.06353359984399],"label":"Cheerleading"},{"segment":[107.97952001326053,118.38058911466459],"label":"Cheerleading"},{"segment":[120.02286318330734,128.78165821606865],"label":"Cheerleading"},{"segment":[134.25590511154445,140.82500138611545],"label":"Cheerleading"}]},"aW8LjbEpY1c":{"duration":35.991,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aW8LjbEpY1c","annotations":[{"segment":[6.681623138845554,28.97241629953198],"label":"Playing rubik cube"}]},"_cU5aISTDBo":{"duration":44.49,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_cU5aISTDBo","annotations":[{"segment":[3.5168554499999995,42.950389235569425],"label":"Pole vault"}]},"wBv1tu74S2E":{"duration":54.1,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wBv1tu74S2E","annotations":[{"segment":[6.515503859999999,46.92800016973479],"label":"Using parallel bars"}]},"gEYutYXODs4":{"duration":190.59,"subset":"validation","resolution":"654x480","url":"https://www.youtube.com/watch?v=gEYutYXODs4","annotations":[{"segment":[24.077723868954756,171.5166255850234],"label":"Knitting"}]},"A5JcwIaP30Y":{"duration":125.11,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=A5JcwIaP30Y","annotations":[{"segment":[0.2,124.861],"label":"Slacklining"}]},"outMi06JZss":{"duration":68.917,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=outMi06JZss","annotations":[{"segment":[3.5479520124804993,35.69454751950078],"label":"Running a marathon"}]},"9R8QRLKPJs8":{"duration":13.56,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=9R8QRLKPJs8","annotations":[]},"4E-RWxR6Xr0":{"duration":118.56,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4E-RWxR6Xr0","annotations":[{"segment":[14.06252021,108.31492355694228],"label":"Spinning"}]},"UWusSRe8Whg":{"duration":98.267,"subset":"training","resolution":"352x240","url":"https://www.youtube.com/watch?v=UWusSRe8Whg","annotations":[{"segment":[0,95.35412346333852],"label":"Playing congas"}]},"2fLmeXCHTqQ":{"duration":140.39,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=2fLmeXCHTqQ","annotations":[]},"7g3YO0-R4qk":{"duration":209.862,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7g3YO0-R4qk","annotations":[{"segment":[71.70010426053042,113.60701451326052],"label":"Swimming"},{"segment":[117.53578734945398,125.06593528549142],"label":"Swimming"},{"segment":[160.097493074883,167.9550387472699],"label":"Swimming"},{"segment":[174.50299347425897,187.9263006645866],"label":"Swimming"}]},"5xI8T41HejM":{"duration":87.702,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5xI8T41HejM","annotations":[{"segment":[34.73547311922653,35.828330332724605],"label":"River tubing"},{"segment":[62.73993822558929,65.33547406671079],"label":"River tubing"},{"segment":[76.1274388797949,78.58636757138369],"label":"River tubing"}]},"nj5dMUGvOWo":{"duration":66.85,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=nj5dMUGvOWo","annotations":[{"segment":[4.064477358814353,50.545423564742585],"label":"Using uneven bars"}]},"fJCkM6secVM":{"duration":201.39,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=fJCkM6secVM","annotations":[{"segment":[31.7317524700988,201.38666666666666],"label":"Preparing salad"}]},"Y76yy0chbhU":{"duration":98.59,"subset":"training","resolution":"852x480","url":"https://www.youtube.com/watch?v=Y76yy0chbhU","annotations":[{"segment":[2.1533343238491445,98.28432981435257],"label":"Playing field hockey"}]},"1_4E-zSZrf4":{"duration":72.56,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=1_4E-zSZrf4","annotations":[]},"UaitYHDxhJI":{"duration":140.342,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UaitYHDxhJI","annotations":[]},"rpaqx40HtWY":{"duration":212.974,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=rpaqx40HtWY","annotations":[]},"wcmO0R3Kqzo":{"duration":93.25,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wcmO0R3Kqzo","annotations":[{"segment":[15.978601029641185,79.74774513884554],"label":"Doing nails"}]},"rE2IiY5E8LQ":{"duration":40.589,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=rE2IiY5E8LQ","annotations":[{"segment":[15.893460575663028,37.35913043681747],"label":"Curling"}]},"M1dh_TEW4K8":{"duration":115.31,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=M1dh_TEW4K8","annotations":[]},"7-RbxFePrmU":{"duration":123.3,"subset":"training","resolution":"180x240","url":"https://www.youtube.com/watch?v=7-RbxFePrmU","annotations":[{"segment":[3.845273010920437,37.10688455538221],"label":"Brushing teeth"},{"segment":[42.298003120124804,45.7587488299532],"label":"Brushing teeth"},{"segment":[55.56419500780031,74.21376911076442],"label":"Brushing teeth"},{"segment":[79.78941497659906,96.70861622464899],"label":"Brushing teeth"}]},"LGt_KpgXymU":{"duration":225.14,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LGt_KpgXymU","annotations":[{"segment":[22.12771116558948,44.95788935230879],"label":"Playing violin"},{"segment":[54.08996062699651,118.01445954981057],"label":"Playing violin"},{"segment":[128.20023135619303,144.356972842179],"label":"Playing violin"}]},"RTS4mOH3cFY":{"duration":136.37,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RTS4mOH3cFY","annotations":[{"segment":[21.70018843057722,101.48029295475818],"label":"Walking the dog"}]},"1Cg6rkv4lyk":{"duration":52.687,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=1Cg6rkv4lyk","annotations":[]},"IKTYMYu8FFs":{"duration":107.091,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IKTYMYu8FFs","annotations":[{"segment":[31.67288161993769,89.6842647975078],"label":"Applying sunscreen"}]},"28wbI73IVvQ":{"duration":83.848,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=28wbI73IVvQ","annotations":[]},"goLVOzKw4U8":{"duration":131.379,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=goLVOzKw4U8","annotations":[{"segment":[35.387123052959495,96.97139252336449],"label":"Welding"}]},"16Jm2QATbtE":{"duration":69.823,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=16Jm2QATbtE","annotations":[]},"WBRQ4vHBFMY":{"duration":29.98,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WBRQ4vHBFMY","annotations":[{"segment":[0.561178855317478,5.892377616224649],"label":"Getting a piercing"}]},"iAwNYgA6mkc":{"duration":203.848,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=iAwNYgA6mkc","annotations":[{"segment":[85.15835881435258,96.59754134165368],"label":"Doing fencing"},{"segment":[114.39182527301092,124.8777425897036],"label":"Doing fencing"},{"segment":[130.91508892355697,150.31290639625587],"label":"Doing fencing"},{"segment":[186.2227550702028,191.30733541341652],"label":"Doing fencing"},{"segment":[197.98084711388455,203.501],"label":"Doing fencing"}]},"_sgg_QEjcEE":{"duration":108.41,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_sgg_QEjcEE","annotations":[{"segment":[5.578595943837754,22.314383775351015],"label":"Discus throw"},{"segment":[22.314383775351015,51.39070202808112],"label":"Discus throw"},{"segment":[64.57647425897035,75.90271450858035],"label":"Discus throw"}]},"Jd4HJLTLze8":{"duration":74.46000000000001,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=Jd4HJLTLze8","annotations":[]},"rSwrt0eKH3I":{"duration":55.68,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=rSwrt0eKH3I","annotations":[]},"qblFXnyqf1o":{"duration":186.78,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=qblFXnyqf1o","annotations":[{"segment":[3.7868533541341654,180.04046489859596],"label":"Doing step aerobics"}]},"Rvqu079gCh0":{"duration":210.535,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Rvqu079gCh0","annotations":[{"segment":[15.43706612754306,18.064651851380177],"label":"Longboarding"},{"segment":[19.049996497819098,22.334478652615495],"label":"Longboarding"},{"segment":[27.918098315769367,55.179300200579455],"label":"Longboarding"},{"segment":[72.58705562100036,138.27669871692825],"label":"Longboarding"},{"segment":[142.54652551816358,182.617207700468],"label":"Longboarding"},{"segment":[190.49996486739468,210.535306],"label":"Longboarding"}]},"W4LIBSfe6bc":{"duration":60.07,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=W4LIBSfe6bc","annotations":[{"segment":[3.842230889235569,57.25861154446178],"label":"Getting a tattoo"}]},"pcrNMrBmy1I":{"duration":121.58,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=pcrNMrBmy1I","annotations":[{"segment":[0.2,113.00848049921997],"label":"Hurling"}]},"-PcbohemKh0":{"duration":201.13,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-PcbohemKh0","annotations":[]},"WMxQ5hc4s78":{"duration":184.74,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WMxQ5hc4s78","annotations":[{"segment":[12.248616560062402,178.25339743385655],"label":"Polishing forniture"}]},"QwK2hghLFmI":{"duration":63.461,"subset":"testing","resolution":"478x360","url":"https://www.youtube.com/watch?v=QwK2hghLFmI","annotations":[]},"4XX5rcqpOXA":{"duration":171.27,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=4XX5rcqpOXA","annotations":[{"segment":[5.475941226989079,166.8158681099844],"label":"Cumbia"}]},"S-mMmPO6a9s":{"duration":122.486,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=S-mMmPO6a9s","annotations":[{"segment":[0.2,114.24598751950079],"label":"Cutting the grass"}]},"u4nL6NXfsak":{"duration":60.628,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=u4nL6NXfsak","annotations":[{"segment":[7.47200178627145,52.587759407176286],"label":"Grooming dog"}]},"IbcBQFvcS9o":{"duration":122.74,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=IbcBQFvcS9o","annotations":[{"segment":[6.127459794071763,115.46432049453978],"label":"Shoveling snow"}]},"lL-YnWr815o":{"duration":168.09,"subset":"validation","resolution":"853x480","url":"https://www.youtube.com/watch?v=lL-YnWr815o","annotations":[{"segment":[18.090809672386897,136.86090795631827],"label":"Rafting"},{"segment":[146.82396255850236,168.061],"label":"Rafting"}]},"bzJTmN6wAC4":{"duration":209.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bzJTmN6wAC4","annotations":[{"segment":[10.790975834951766,185.0815855328091],"label":"Painting"}]},"cSwDKlxiqXQ":{"duration":127.059,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=cSwDKlxiqXQ","annotations":[{"segment":[68.96391230520202,83.19142492144603],"label":"Spread mulch"},{"segment":[108.48477911543472,109.07759213254202],"label":"Spread mulch"}]},"JGpu6r-1ghM":{"duration":162.772,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=JGpu6r-1ghM","annotations":[]},"1ZwVoNUvaAE":{"duration":70.705,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=1ZwVoNUvaAE","annotations":[]},"83f0iwb8VW8":{"duration":173.38,"subset":"testing","resolution":"204x360","url":"https://www.youtube.com/watch?v=83f0iwb8VW8","annotations":[]},"PmWF-HawRw0":{"duration":92.25,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=PmWF-HawRw0","annotations":[{"segment":[4.814645085803432,9.988592043681747],"label":"Javelin throw"},{"segment":[18.755557722308893,25.797874414976597],"label":"Javelin throw"},{"segment":[32.40902886115445,35.71460608424337],"label":"Javelin throw"},{"segment":[44.19413026521061,48.93691497659906],"label":"Javelin throw"}]},"-kuXhOsHAc4":{"duration":232.595,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-kuXhOsHAc4","annotations":[{"segment":[54.85163735250357,167.52573936506354],"label":"Beer pong"},{"segment":[172.2355892884181,198.68320808879392],"label":"Beer pong"},{"segment":[206.6537233437017,211.3635732670563],"label":"Beer pong"},{"segment":[223.31934614941795,225.85541918507045],"label":"Beer pong"}]},"sQNdiyPVKvQ":{"duration":10.71,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sQNdiyPVKvQ","annotations":[]},"6ZQXEewM7Fc":{"duration":42.4,"subset":"testing","resolution":"272x480","url":"https://www.youtube.com/watch?v=6ZQXEewM7Fc","annotations":[]},"UusDcf16TVQ":{"duration":116.66,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=UusDcf16TVQ","annotations":[{"segment":[0.2312658742597942,35.482773497312756],"label":"Playing accordion"},{"segment":[43.29625199108222,107.25775035868347],"label":"Playing accordion"}]},"3FUVKpQA6IY":{"duration":129.591,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=3FUVKpQA6IY","annotations":[{"segment":[9.270861741372288,34.86651025686973],"label":"Beer pong"},{"segment":[43.532753455030274,47.765104784364496],"label":"Beer pong"},{"segment":[68.12069927306719,83.84086012846797],"label":"Beer pong"}]},"DRHW4FvSKdw":{"duration":22.036,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DRHW4FvSKdw","annotations":[{"segment":[0.2,21.835736961451246],"label":"Playing blackjack"}]},"WbZpWiPCHVU":{"duration":35.13,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WbZpWiPCHVU","annotations":[{"segment":[3.0616666500000003,33.90820202808112],"label":"Vacuuming floor"}]},"Go5vzj3VkzM":{"duration":148.1,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Go5vzj3VkzM","annotations":[]},"DeHIqrMlsU8":{"duration":139.065,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DeHIqrMlsU8","annotations":[{"segment":[2.6033773759113634,120.18925552124126],"label":"Longboarding"},{"segment":[124.74516592908614,127.34854330499752],"label":"Longboarding"}]},"0KqeKi2CBqg":{"duration":88.49,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=0KqeKi2CBqg","annotations":[{"segment":[53.56269500780031,83.48059672386896],"label":"Smoking a cigarette"}]},"Wh-Q13ZAffE":{"duration":123.066,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Wh-Q13ZAffE","annotations":[{"segment":[0.01,123.065781],"label":"Snow tubing"}]},"bFPfj889BhU":{"duration":222.61,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bFPfj889BhU","annotations":[{"segment":[14.759613826053041,204.0299558307332],"label":"Sailing"}]},"Bh35Q9vNsSA":{"duration":27.31,"subset":"validation","resolution":"800x480","url":"https://www.youtube.com/watch?v=Bh35Q9vNsSA","annotations":[{"segment":[3.330393135725429,5.209076443057722],"label":"Playing bagpipes"},{"segment":[0.2,27.169],"label":"Playing bagpipes"},{"segment":[24.63636973478939,26.77123712948518],"label":"Playing bagpipes"}]},"NEiu47xcJxc":{"duration":202.13,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=NEiu47xcJxc","annotations":[]},"UN0bAa_ko4I":{"duration":217.386,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=UN0bAa_ko4I","annotations":[{"segment":[21.026339088937707,117.67967199776425],"label":"Capoeira"},{"segment":[132.9407245623158,168.54984721293613],"label":"Capoeira"}]},"PEp22TMpoHA":{"duration":175.148,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=PEp22TMpoHA","annotations":[{"segment":[0.20000000000000004,174.94780045351476],"label":"Rafting"}]},"NKnOCGiw048":{"duration":82.896,"subset":"testing","resolution":"426x232","url":"https://www.youtube.com/watch?v=NKnOCGiw048","annotations":[]},"FExyWFc1nU0":{"duration":33.902,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FExyWFc1nU0","annotations":[{"segment":[12.058437602810233,26.232389965678628],"label":"Calf roping"}]},"DouY-lXxXG4":{"duration":82.06,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DouY-lXxXG4","annotations":[]},"OszjSKHCvKI":{"duration":218.988,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=OszjSKHCvKI","annotations":[{"segment":[21.45593348360053,48.70156990302055],"label":"Wakeboarding"},{"segment":[60.96210629175957,197.53085729164323],"label":"Wakeboarding"}]},"trl-RCWyhb0":{"duration":58.793,"subset":"validation","resolution":"720x1280","url":"https://www.youtube.com/watch?v=trl-RCWyhb0","annotations":[{"segment":[0.9171971216848674,58.42545665132605],"label":"Playing congas"}]},"_py-3xo0_pQ":{"duration":79.9,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=_py-3xo0_pQ","annotations":[]},"0gvD2pktxxw":{"duration":176.774,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=0gvD2pktxxw","annotations":[{"segment":[15.144109606459406,162.4549939602009],"label":"Playing rubik cube"}]},"Lf_7RurLgp0":{"duration":143.04,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=Lf_7RurLgp0","annotations":[]},"3PiN6rDgg_E":{"duration":208.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3PiN6rDgg_E","annotations":[{"segment":[44.70339357791999,152.96688493390076],"label":"Putting on makeup"}]},"_9e948mdwrs":{"duration":99.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_9e948mdwrs","annotations":[{"segment":[5.442800312012481,62.060372854914185],"label":"Blowing leaves"},{"segment":[72.79261778471138,90.05753354134166],"label":"Blowing leaves"}]},"j3QSVh_AhDc":{"duration":231.69,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=j3QSVh_AhDc","annotations":[{"segment":[0,214.31353042121685],"label":"Kayaking"}]},"YSnCGTXJtig":{"duration":31.069,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=YSnCGTXJtig","annotations":[{"segment":[0.2,30.868299319727893],"label":"Calf roping"}]},"C4xYnBd7PM0":{"duration":62.949,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=C4xYnBd7PM0","annotations":[{"segment":[0.01,62.949114],"label":"Bullfighting"}]},"-ArIbKfZc2s":{"duration":202.13,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=-ArIbKfZc2s","annotations":[{"segment":[0,148.522663024961],"label":"Rafting"}]},"bXApJtAf6Qo":{"duration":127.617,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bXApJtAf6Qo","annotations":[{"segment":[3.782715357593896,127.61687074829932],"label":"Scuba diving"}]},"ABMVo0NDhKo":{"duration":32.53,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ABMVo0NDhKo","annotations":[{"segment":[0.01,31.465391731669264],"label":"Drinking coffee"}]},"tm_CL7A0W4M":{"duration":117.33,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=tm_CL7A0W4M","annotations":[{"segment":[0,109.59644149765991],"label":"Tango"}]},"KIOvUY8S2pQ":{"duration":15.093,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=KIOvUY8S2pQ","annotations":[{"segment":[8.384425892494178,11.92681062769667],"label":"Rock-paper-scissors"}]},"QjaEDlh805g":{"duration":211.23,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=QjaEDlh805g","annotations":[{"segment":[21.419774087363493,198.05052656162246],"label":"Brushing hair"}]},"aDrjDISgmLU":{"duration":123.14,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=aDrjDISgmLU","annotations":[{"segment":[91.91934089391576,114.01071853822152],"label":"Cricket"}]},"E6LJROCxQPA":{"duration":38.522,"subset":"training","resolution":"294x240","url":"https://www.youtube.com/watch?v=E6LJROCxQPA","annotations":[{"segment":[5.048112093603745,34.73581892979719],"label":"Slacklining"}]},"xLRUuV1z3QA":{"duration":72.934,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=xLRUuV1z3QA","annotations":[{"segment":[17.240982459695367,60.79715202351014],"label":"Camel ride"}]},"eBITMlyTZhY":{"duration":219.1,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=eBITMlyTZhY","annotations":[{"segment":[11.963529149765991,199.9618443603744],"label":"Belly dance"}]},"0jeq5CeP2P0":{"duration":97.385,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0jeq5CeP2P0","annotations":[{"segment":[8.124404836193447,92.55747191887674],"label":"Kite flying"}]},"n8AFyBdT_sQ":{"duration":160.009,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=n8AFyBdT_sQ","annotations":[]},"wDw3i5ODGWA":{"duration":148.957,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wDw3i5ODGWA","annotations":[{"segment":[14.131126408086564,130.42337516502596],"label":"Longboarding"}]},"sgJ_NrROLYw":{"duration":118.585,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=sgJ_NrROLYw","annotations":[]},"yaK-giAk4e8":{"duration":25.496,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=yaK-giAk4e8","annotations":[{"segment":[1.312561305772231,12.489219697347894],"label":"Beer pong"}]},"vTbeVoT1Gsg":{"duration":49.27,"subset":"validation","resolution":"2560x810","url":"https://www.youtube.com/watch?v=vTbeVoT1Gsg","annotations":[{"segment":[0.01,41.35528111700468],"label":"Javelin throw"}]},"sC7xUkNTpP4":{"duration":219.27,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=sC7xUkNTpP4","annotations":[{"segment":[0.01,211.65454446177847],"label":"Playing kickball"}]},"3v_wDKeVvcc":{"duration":168.37,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=3v_wDKeVvcc","annotations":[]},"lnieKUxnxuY":{"duration":25.542,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lnieKUxnxuY","annotations":[{"segment":[7.451395418096724,19.883670126365054],"label":"Laying tile"}]},"A8NAj6NQ5vM":{"duration":8.499,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=A8NAj6NQ5vM","annotations":[{"segment":[0.01,8.490559494965241],"label":"Playing beach volleyball"}]},"dBNZf90PLJ0":{"duration":167.741,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=dBNZf90PLJ0","annotations":[{"segment":[72.2054375072962,113.54043334477379],"label":"Rafting"},{"segment":[134.4695454820593,152.25929079875198],"label":"Rafting"}]},"D59UvMNXsEs":{"duration":13.05,"subset":"testing","resolution":"312x240","url":"https://www.youtube.com/watch?v=D59UvMNXsEs","annotations":[]},"FMYu8k1b_DM":{"duration":213.647,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FMYu8k1b_DM","annotations":[{"segment":[13.661187207488299,195.2550171606864],"label":"Futsal"}]},"wZJeEV6sZXE":{"duration":38.64,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wZJeEV6sZXE","annotations":[{"segment":[13.549492979719188,14.994772230889234],"label":"Drinking coffee"},{"segment":[34.68670202808112,36.55352106084243],"label":"Drinking coffee"}]},"fpWOf1DR1oA":{"duration":9.126,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=fpWOf1DR1oA","annotations":[{"segment":[0.01,9.125441],"label":"Throwing darts"}]},"Uc1_7BXtXZs":{"duration":96.015,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Uc1_7BXtXZs","annotations":[{"segment":[20.071676554136992,82.53353568156331],"label":"Playing beach volleyball"}]},"KoMKIJ6QbpY":{"duration":23.847,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KoMKIJ6QbpY","annotations":[{"segment":[0.01,23.821],"label":"Laying tile"}]},"kVzojOj5_Zg":{"duration":75.78999999999999,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kVzojOj5_Zg","annotations":[{"segment":[7.567169494943064,14.5431538730937],"label":"Volleyball"}]},"RVOBQBamjDA":{"duration":209.65,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RVOBQBamjDA","annotations":[{"segment":[12.101653524644387,69.33920397904352],"label":"Playing saxophone"},{"segment":[83.40328780498159,203.11153470982842],"label":"Playing saxophone"}]},"YPFk9ftkzl4":{"duration":20.99,"subset":"training","resolution":"720x1280","url":"https://www.youtube.com/watch?v=YPFk9ftkzl4","annotations":[{"segment":[1.110878817472699,19.636416744149766],"label":"Smoking hookah"}]},"iTev1WJ819g":{"duration":975.099,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=iTev1WJ819g","annotations":[]},"mzewLmZSCMU":{"duration":43.1,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mzewLmZSCMU","annotations":[{"segment":[7.189698907956318,31.043372854914196],"label":"Ballet"}]},"r3IFPcLG1KA":{"duration":121.53,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=r3IFPcLG1KA","annotations":[]},"BFXSk5F0E_w":{"duration":199.901,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=BFXSk5F0E_w","annotations":[{"segment":[1.6412638805781568,4.754005313599964],"label":"Hula hoop"}]},"21Pz1cjdd2I":{"duration":146.472,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=21Pz1cjdd2I","annotations":[{"segment":[14.594492211838007,141.1753878504673],"label":"Snow tubing"}]},"byPNX32dCSA":{"duration":43.329,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=byPNX32dCSA","annotations":[]},"Cc_DmDsXm6M":{"duration":22.617,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Cc_DmDsXm6M","annotations":[{"segment":[0.01,18.064762368798753],"label":"Waterskiing"}]},"tOCFOu8eOkU":{"duration":17.508,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=tOCFOu8eOkU","annotations":[{"segment":[8.44399375975039,16.751794071762873],"label":"Having an ice cream"}]},"7HKWppcWgeY":{"duration":238.4,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=7HKWppcWgeY","annotations":[{"segment":[34.9498159126365,223.4557379095164],"label":"Assembling bicycle"}]},"brSp5DB7OLs":{"duration":67.52,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=brSp5DB7OLs","annotations":[{"segment":[0.6300788984000294,67.20858514723362],"label":"Hand washing clothes"}]},"k_gjsTv-jy8":{"duration":152.231,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=k_gjsTv-jy8","annotations":[{"segment":[54.36479356123238,95.14916672011088],"label":"Baking cookies"}]},"qD52CX0CSBs":{"duration":42.26,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=qD52CX0CSBs","annotations":[{"segment":[1.3179719188767551,42.241],"label":"Grooming horse"}]},"QFWxdR_C71M":{"duration":168.51,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=QFWxdR_C71M","annotations":[{"segment":[1.9717160686427457,159.7090015600624],"label":"Painting"}]},"BrkFwA10o5s":{"duration":174.034,"subset":"testing","resolution":"327x240","url":"https://www.youtube.com/watch?v=BrkFwA10o5s","annotations":[]},"uyzQkTArIwU":{"duration":235.219,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=uyzQkTArIwU","annotations":[{"segment":[4.402390015600624,235.181],"label":"Capoeira"}]},"ZHN4qO6izow":{"duration":212.35,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=ZHN4qO6izow","annotations":[]},"Ktxr4ZiqO3o":{"duration":121.53,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=Ktxr4ZiqO3o","annotations":[{"segment":[29.701749699999997,46.03710414],"label":"Shaving"},{"segment":[54.67529209,91.52953435999999],"label":"Shaving"}]},"2FRMVhgxh40":{"duration":129.82,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=2FRMVhgxh40","annotations":[]},"IMelyusE2pk":{"duration":19.064,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=IMelyusE2pk","annotations":[]},"NgG4AWP1F6Q":{"duration":192.54,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=NgG4AWP1F6Q","annotations":[{"segment":[47.30893692890573,55.11866619970922],"label":"Putting on shoes"},{"segment":[85.45646067475352,92.66544154011058],"label":"Putting on shoes"}]},"H9ekrZnisUI":{"duration":22.292,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=H9ekrZnisUI","annotations":[{"segment":[0.2,20.280444284603423],"label":"Croquet"}]},"gMaakjqhNmM":{"duration":19.993,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gMaakjqhNmM","annotations":[{"segment":[0.010000000000000002,3.9469063962558506],"label":"Throwing darts"},{"segment":[12.524435257410296,18.242787831513258],"label":"Throwing darts"}]},"gmxFEr2ZltI":{"duration":35.6,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=gmxFEr2ZltI","annotations":[]},"qqLiKDMtps8":{"duration":168.392,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qqLiKDMtps8","annotations":[{"segment":[0.01,168.321],"label":"BMX"}]},"P8M00PRbI3c":{"duration":205.8,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=P8M00PRbI3c","annotations":[{"segment":[16.695038718555544,51.04829146635253],"label":"Archery"}]},"HSEnmPWF5GY":{"duration":33.577,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=HSEnmPWF5GY","annotations":[{"segment":[0.01,25.98643839747586],"label":"Kneeling"}]},"_BHoMDR2ZVg":{"duration":31.51,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_BHoMDR2ZVg","annotations":[{"segment":[23.10360724633067,25.413966555382213],"label":"Getting a tattoo"}]},"-jNouTszLJ0":{"duration":103.26,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=-jNouTszLJ0","annotations":[{"segment":[0.2,64.5603744149766],"label":"Beer pong"}]},"o8-v0rPP06U":{"duration":106.232,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=o8-v0rPP06U","annotations":[{"segment":[13.072021919879061,103.08695604361372],"label":"Futsal"}]},"3iLo6lxAarc":{"duration":123.902,"subset":"validation","resolution":"270x480","url":"https://www.youtube.com/watch?v=3iLo6lxAarc","annotations":[{"segment":[55.73810998439937,70.8191209048362],"label":"Putting on shoes"}]},"7iTrw4kr8e0":{"duration":81.74,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=7iTrw4kr8e0","annotations":[{"segment":[8.601950007800312,30.520992990639623],"label":"Putting on makeup"}]},"Y3maPWNO96w":{"duration":25.101,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=Y3maPWNO96w","annotations":[]},"PEmSc-lmDBQ":{"duration":229.39,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PEmSc-lmDBQ","annotations":[]},"smfBAiFujmE":{"duration":28.236,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=smfBAiFujmE","annotations":[{"segment":[13.170433163806553,27.08968694227769],"label":"Waterskiing"}]},"vgdcVhRSa9E":{"duration":122.53,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=vgdcVhRSa9E","annotations":[{"segment":[0.01,122.481],"label":"Bathing dog"}]},"mgkIGWnC41U":{"duration":56.1,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=mgkIGWnC41U","annotations":[]},"E9HbfcT1ZWM":{"duration":153.415,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=E9HbfcT1ZWM","annotations":[{"segment":[0.01,153.361],"label":"Slacklining"}]},"Y39LnzOipck":{"duration":20.132,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=Y39LnzOipck","annotations":[{"segment":[12.091582082683308,17.90182282371295],"label":"Playing ten pins"}]},"mzXRehGBEOg":{"duration":94.11,"subset":"training","resolution":"640x350","url":"https://www.youtube.com/watch?v=mzXRehGBEOg","annotations":[{"segment":[10.932477860374416,38.37373101326053],"label":"Brushing teeth"},{"segment":[39.1554066,82.69062113182528],"label":"Brushing teeth"}]},"RmwaWeIPX70":{"duration":37.29,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=RmwaWeIPX70","annotations":[{"segment":[7.324315132605305,21.101003120124805],"label":"Hammer throw"}]},"r3H_75-lRjE":{"duration":184.46,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=r3H_75-lRjE","annotations":[{"segment":[18.12938694851794,168.34430737909517],"label":"Playing harmonica"}]},"HEINsRxsVlQ":{"duration":103.376,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HEINsRxsVlQ","annotations":[]},"doNSg6PuKDM":{"duration":48.0,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=doNSg6PuKDM","annotations":[{"segment":[2.038897460517943,39.642962001016386],"label":"Shoveling snow"}]},"GUbKGqYKj7U":{"duration":41.77,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GUbKGqYKj7U","annotations":[{"segment":[0.01,20.59309243681747],"label":"Playing kickball"}]},"k9LTYVTi6tc":{"duration":27.96,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=k9LTYVTi6tc","annotations":[]},"dFSfS3JQBck":{"duration":58.61,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=dFSfS3JQBck","annotations":[]},"fmdG9uPnWHQ":{"duration":109.738,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=fmdG9uPnWHQ","annotations":[{"segment":[6.505499457098284,29.27474755694228],"label":"Grooming dog"},{"segment":[35.43785230577223,109.737504],"label":"Grooming dog"}]},"TqO5Ddh5Lp4":{"duration":60.14,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=TqO5Ddh5Lp4","annotations":[{"segment":[0.01,0.02],"label":"Canoeing"}]},"ZPEGRXxk-1Y":{"duration":57.89,"subset":"testing","resolution":"320x180","url":"https://www.youtube.com/watch?v=ZPEGRXxk-1Y","annotations":[]},"bJahexpTlr8":{"duration":208.59,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=bJahexpTlr8","annotations":[{"segment":[42.457289391575664,59.37513650546022],"label":"Fixing bicycle"},{"segment":[87.02930967238689,99.7176950078003],"label":"Fixing bicycle"},{"segment":[142.98834243369734,191.46448127925117],"label":"Fixing bicycle"}]},"3FZM5nuRLXI":{"duration":155.667,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3FZM5nuRLXI","annotations":[]},"GGPCuw5aotM":{"duration":171.108,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GGPCuw5aotM","annotations":[]},"0p34rFNYj_M":{"duration":67.06,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=0p34rFNYj_M","annotations":[{"segment":[5.649295338533542,67.059228],"label":"Building sandcastles"}]},"w_PJ9N8CMo8":{"duration":45.09,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=w_PJ9N8CMo8","annotations":[{"segment":[1.33661448770876,17.164943594383775],"label":"Washing face"},{"segment":[19.97886877379095,23.355578989079564],"label":"Washing face"},{"segment":[24.340452801872075,26.521244815912638],"label":"Washing face"},{"segment":[28.56134057098284,30.108999419656787],"label":"Washing face"},{"segment":[36.158938555382214,37.00311610920436],"label":"Washing face"}]},"raKtQF0Gnuw":{"duration":175.13,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=raKtQF0Gnuw","annotations":[]},"EfJIu_moZaE":{"duration":191.8,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=EfJIu_moZaE","annotations":[{"segment":[9.874089645865835,67.6225533322933],"label":"Sailing"},{"segment":[73.00842041185646,82.58329521996879],"label":"Sailing"},{"segment":[93.35502937909516,99.63854097191887],"label":"Sailing"},{"segment":[106.2212674024961,119.98514993915755],"label":"Sailing"},{"segment":[130.1584544227769,139.1348995553822],"label":"Sailing"},{"segment":[161.57601238689546,171.7493168705148],"label":"Sailing"}]},"HGDatODUecQ":{"duration":85.311,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=HGDatODUecQ","annotations":[{"segment":[5.852549141965678,64.91009048361934],"label":"Rafting"}]},"n-hsQkfdUk0":{"duration":189.568,"subset":"training","resolution":"848x480","url":"https://www.youtube.com/watch?v=n-hsQkfdUk0","annotations":[{"segment":[7.98491228628737,189.27199450858035],"label":"Playing congas"}]},"TT2e7-NbUxo":{"duration":159.43,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TT2e7-NbUxo","annotations":[{"segment":[18.15634092846707,138.5353672620905],"label":"Getting a haircut"}]},"PNQpC_FlE2M":{"duration":65.69,"subset":"training","resolution":"712x480","url":"https://www.youtube.com/watch?v=PNQpC_FlE2M","annotations":[{"segment":[18.24132087051482,61.999995093603744],"label":"Waterskiing"}]},"V-6nF6U6rfI":{"duration":93.21000000000001,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=V-6nF6U6rfI","annotations":[{"segment":[12.64228081123245,87.62408424336974],"label":"Using the balance beam"}]},"3m9Vkg7xCa4":{"duration":23.89,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3m9Vkg7xCa4","annotations":[]},"KujAY-AST9M":{"duration":231.55,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=KujAY-AST9M","annotations":[{"segment":[0.2,231.30100000000002],"label":"Futsal"}]},"j73Wh1olDsA":{"duration":3.066,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=j73Wh1olDsA","annotations":[{"segment":[0.19999999999999998,2.865034013605442],"label":"Throwing darts"}]},"sVk-Br0zfkA":{"duration":10.055,"subset":"training","resolution":"384x216","url":"https://www.youtube.com/watch?v=sVk-Br0zfkA","annotations":[{"segment":[0.2,6.711884735202492],"label":"Throwing darts"}]},"pnEYhDVXVJ0":{"duration":102.122,"subset":"validation","resolution":"322x240","url":"https://www.youtube.com/watch?v=pnEYhDVXVJ0","annotations":[{"segment":[35.65863338533541,53.65765522620905],"label":"Putting in contact lenses"},{"segment":[71.3312449297972,85.34273946957879],"label":"Putting in contact lenses"}]},"LGj7_gw2GbQ":{"duration":163.097,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=LGj7_gw2GbQ","annotations":[{"segment":[14.499964118564742,138.64000780031202],"label":"Wakeboarding"}]},"7MAADmeogHo":{"duration":214.0,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=7MAADmeogHo","annotations":[{"segment":[62.60364664586583,189.14648439937596],"label":"Fixing bicycle"}]},"JEkhAGnQD2g":{"duration":54.75,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JEkhAGnQD2g","annotations":[]},"s7ajQRC7w9w":{"duration":95.78,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=s7ajQRC7w9w","annotations":[{"segment":[81.28795192511701,92.64435697347893],"label":"Windsurfing"}]},"Bo2VarUFp_0":{"duration":144.196,"subset":"testing","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=Bo2VarUFp_0","annotations":[]},"EWNqkVaOfbE":{"duration":233.06,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=EWNqkVaOfbE","annotations":[]},"gmPoDbH-odg":{"duration":15.697,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=gmPoDbH-odg","annotations":[{"segment":[0.2,11.229113282259311],"label":"Using the monkey bar"}]},"UtV69uNmzlE":{"duration":25.73,"subset":"testing","resolution":"294x240","url":"https://www.youtube.com/watch?v=UtV69uNmzlE","annotations":[]},"5vm6bjMeEN8":{"duration":219.38,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=5vm6bjMeEN8","annotations":[{"segment":[1.0267494454880235,217.67088244346098],"label":"Doing step aerobics"}]},"Xz3F4x70qjQ":{"duration":111.062,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=Xz3F4x70qjQ","annotations":[{"segment":[0.01,104.38354036178458],"label":"Having an ice cream"}]},"mb2fWzNSob0":{"duration":200.946,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=mb2fWzNSob0","annotations":[{"segment":[15.313103923334495,39.68907286145407],"label":"Bullfighting"},{"segment":[69.69026540067816,74.06543931264834],"label":"Bullfighting"},{"segment":[91.25362253824547,150.0059579275593],"label":"Bullfighting"},{"segment":[168.7567019810501,191.57010863423827],"label":"Bullfighting"}]},"9D2yOzDsW8k":{"duration":143.5,"subset":"testing","resolution":"320x256","url":"https://www.youtube.com/watch?v=9D2yOzDsW8k","annotations":[]},"_WPcOFgi5vU":{"duration":138.82999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_WPcOFgi5vU","annotations":[{"segment":[7.469815132605304,125.47124258970359],"label":"Playing racquetball"}]},"9IQdMHlxKfQ":{"duration":126.689,"subset":"testing","resolution":"432x720","url":"https://www.youtube.com/watch?v=9IQdMHlxKfQ","annotations":[]},"TL7hwQauhFo":{"duration":160.381,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=TL7hwQauhFo","annotations":[{"segment":[6.002658346333853,151.0857472698908],"label":"Futsal"}]},"krNVpENNPCM":{"duration":237.146,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=krNVpENNPCM","annotations":[{"segment":[17.788950125561982,209.05872179482878],"label":"Making a cake"}]},"hz3VDXxErJM":{"duration":81.712,"subset":"testing","resolution":"854x470","url":"https://www.youtube.com/watch?v=hz3VDXxErJM","annotations":[]},"oDZ6refg3Ew":{"duration":205.52,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=oDZ6refg3Ew","annotations":[{"segment":[10.66849129,194.90241497659906],"label":"Zumba"}]},"ha40KZvmcEo":{"duration":106.58,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=ha40KZvmcEo","annotations":[{"segment":[23.431296411856476,62.661399375975044],"label":"Kayaking"},{"segment":[84.93362090483619,92.74551950078003],"label":"Kayaking"}]},"HaGLPOqibaM":{"duration":200.76,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=HaGLPOqibaM","annotations":[{"segment":[35.704542222505225,99.91007867525585],"label":"Plastering"}]},"_Rrs63vi29o":{"duration":290.11,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=_Rrs63vi29o","annotations":[]},"8ulb1O_5gRs":{"duration":146.47,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8ulb1O_5gRs","annotations":[{"segment":[11.33392598,58.47862714508581],"label":"Paintball"},{"segment":[83.83459438377537,128.37886115444618],"label":"Paintball"}]},"CXWX6FvV1qs":{"duration":52.245,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=CXWX6FvV1qs","annotations":[]},"rcpsp52nguY":{"duration":162.38,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=rcpsp52nguY","annotations":[{"segment":[18.238930034321374,154.27094987363495],"label":"Shoveling snow"}]},"9InwA-ajB2M":{"duration":61.35,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=9InwA-ajB2M","annotations":[]},"AmhfmeKk6Bg":{"duration":22.5,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=AmhfmeKk6Bg","annotations":[{"segment":[1.0512168486739468,22.040513260530417],"label":"Brushing hair"}]},"mVC8DBsg1i0":{"duration":165.001,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=mVC8DBsg1i0","annotations":[{"segment":[14.113611605419294,27.200782361251328],"label":"Grooming dog"},{"segment":[45.42017694289985,58.507347698731884],"label":"Grooming dog"},{"segment":[86.47796754943174,92.12341375782987],"label":"Grooming dog"}]},"ZXWFGIQVY0s":{"duration":121.28,"subset":"testing","resolution":"270x480","url":"https://www.youtube.com/watch?v=ZXWFGIQVY0s","annotations":[]},"O1AokzlsyK4":{"duration":226.79,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=O1AokzlsyK4","annotations":[]},"_z1FN-KUHXw":{"duration":55.82,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=_z1FN-KUHXw","annotations":[]},"3KsOJiA_uak":{"duration":24.776,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3KsOJiA_uak","annotations":[{"segment":[17.685911869289228,20.72989244471691],"label":"Throwing darts"}]},"3VYPdypGB_Q":{"duration":44.676,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3VYPdypGB_Q","annotations":[{"segment":[0.01,40.46515444617784],"label":"Fun sliding down"}]},"PEpfA3L4m20":{"duration":23.22,"subset":"validation","resolution":"1080x1920","url":"https://www.youtube.com/watch?v=PEpfA3L4m20","annotations":[{"segment":[0.01,21.372385413398625],"label":"Welding"}]},"T7Mg-Owb14Y":{"duration":121.255,"subset":"validation","resolution":"640x360","url":"https://www.youtube.com/watch?v=T7Mg-Owb14Y","annotations":[{"segment":[40.48252739091677,119.40834982082332],"label":"Kneeling"}]},"YrMrSXH8k6k":{"duration":58.561,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YrMrSXH8k6k","annotations":[{"segment":[2.920486739469579,55.59951482059282],"label":"Welding"}]},"U8m-9nmAfGU":{"duration":93.3,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=U8m-9nmAfGU","annotations":[{"segment":[49.92366177223089,84.12791983775351],"label":"Bungee jumping"}]},"WlFBWlCklm8":{"duration":161.658,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WlFBWlCklm8","annotations":[{"segment":[0.2,156.36112923556942],"label":"Powerbocking"}]},"7ByEF57KBVs":{"duration":151.65,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=7ByEF57KBVs","annotations":[]},"0PLaN0hmMDY":{"duration":234.73,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=0PLaN0hmMDY","annotations":[{"segment":[25.633598296312805,234.7305215419501],"label":"Wrapping presents"}]},"3TbZ7uSYUe8":{"duration":32.6,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=3TbZ7uSYUe8","annotations":[{"segment":[0.01,32.60081632653061],"label":"Playing water polo"}]},"7Eh6c1eYMFk":{"duration":70.47,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=7Eh6c1eYMFk","annotations":[{"segment":[7.366086719188767,61.23746720280811],"label":"Vacuuming floor"}]},"zk7lA7a0u0U":{"duration":140.02,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=zk7lA7a0u0U","annotations":[]},"v9bcQsDl-yk":{"duration":162.308,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=v9bcQsDl-yk","annotations":[{"segment":[8.352500780031201,162.241],"label":"Snow tubing"}]},"1SouLWwpbvU":{"duration":130.82,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=1SouLWwpbvU","annotations":[{"segment":[0.01,107.50553354134165],"label":"Doing step aerobics"}]},"Kt1JpqwDvl8":{"duration":46.51,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Kt1JpqwDvl8","annotations":[{"segment":[1.015809622861105,15.30970217312094],"label":"Hand car wash"},{"segment":[21.549675570696298,32.505907819032764],"label":"Hand car wash"}]},"wqM6TdjBCfs":{"duration":212.39,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=wqM6TdjBCfs","annotations":[{"segment":[6.626924342279814,107.35617434493298],"label":"Chopping wood"},{"segment":[131.21310197714033,159.7088766489435],"label":"Chopping wood"}]},"WidcZbpPEC0":{"duration":92.23,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=WidcZbpPEC0","annotations":[{"segment":[34.81994234633385,58.70469618720748],"label":"Arm wrestling"}]},"wAAu-2U5Pso":{"duration":189.359,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=wAAu-2U5Pso","annotations":[{"segment":[0.01,188.96574102964118],"label":"Surfing"}]},"6jgWCFWtCfU":{"duration":56.611,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=6jgWCFWtCfU","annotations":[{"segment":[0.01,53.27732293291731],"label":"Drum corps"}]},"K8W0PtyPlD4":{"duration":113.43,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=K8W0PtyPlD4","annotations":[{"segment":[7.34062268174727,108.69428043213729],"label":"Volleyball"}]},"p4rdUt1iDkY":{"duration":37.478,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=p4rdUt1iDkY","annotations":[{"segment":[0.010000000000000002,32.77476083627148],"label":"Kneeling"}]},"msELZwMnoFo":{"duration":147.446,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=msELZwMnoFo","annotations":[{"segment":[4.8305475076145905,76.8287079782511],"label":"Surfing"},{"segment":[86.25977692168912,106.96212264430577],"label":"Surfing"},{"segment":[112.25272225897035,119.84358257566304],"label":"Surfing"}]},"hP-xRyx88aA":{"duration":174.92000000000002,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=hP-xRyx88aA","annotations":[]},"-Q1l3kk_Uew":{"duration":40.24,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=-Q1l3kk_Uew","annotations":[]},"bXLa6zLsJ50":{"duration":119.19,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=bXLa6zLsJ50","annotations":[{"segment":[24.42877248,94.62335320000001],"label":"Getting a piercing"}]},"ipYVfTYZze8":{"duration":180.094,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ipYVfTYZze8","annotations":[{"segment":[20.223014040561623,167.401616224649],"label":"Skiing"}]},"Ul8qLMmszx4":{"duration":93.182,"subset":"validation","resolution":"294x240","url":"https://www.youtube.com/watch?v=Ul8qLMmszx4","annotations":[{"segment":[0.01,64.83469228393135],"label":"Removing ice from car"},{"segment":[69.92261656630265,89.838205900156],"label":"Removing ice from car"}]},"p8UOE62POAE":{"duration":18.948,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=p8UOE62POAE","annotations":[{"segment":[0.009999999999999998,18.901],"label":"Calf roping"}]},"TQco5PWc2JU":{"duration":229.51,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TQco5PWc2JU","annotations":[{"segment":[42.24904730733229,206.23263770358815],"label":"Shaving"}]},"JQavlg895jU":{"duration":165.51,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=JQavlg895jU","annotations":[{"segment":[11.840656118957734,163.45258584679598],"label":"Cheerleading"}]},"9IIcG8AiUnA":{"duration":132.45,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=9IIcG8AiUnA","annotations":[{"segment":[79.7418190327613,123.3312589703588],"label":"Polishing forniture"}]},"o9LZZeQZ-Zc":{"duration":66.6,"subset":"training","resolution":"480x360","url":"https://www.youtube.com/watch?v=o9LZZeQZ-Zc","annotations":[{"segment":[53.50442579563183,63.78974259906397],"label":"Kayaking"}]},"toOQD4oLneA":{"duration":157.94,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=toOQD4oLneA","annotations":[]},"ODW0I_ITug0":{"duration":191.73,"subset":"training","resolution":"640x356","url":"https://www.youtube.com/watch?v=ODW0I_ITug0","annotations":[{"segment":[25.719634945397814,161.7944477379095],"label":"Playing bagpipes"}]},"GPSAx3o5Y6s":{"duration":230.32,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=GPSAx3o5Y6s","annotations":[]},"u0A5NIUKEQo":{"duration":165.68,"subset":"testing","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=u0A5NIUKEQo","annotations":[]},"awqVlr-YDM4":{"duration":132.076,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=awqVlr-YDM4","annotations":[{"segment":[4.813947672292892,123.10538917101074],"label":"Skiing"}]},"LZleSe6Kovg":{"duration":27.75,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=LZleSe6Kovg","annotations":[{"segment":[3.2899159812792513,25.15054190951638],"label":"Doing karate"}]},"_XRJk2oFwZw":{"duration":227.79,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_XRJk2oFwZw","annotations":[{"segment":[12.96780655226209,199.52026875351012],"label":"Cricket"}]},"sEpgbMTaKvY":{"duration":73.63,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=sEpgbMTaKvY","annotations":[]},"aeMXJq-ouaA":{"duration":94.436,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=aeMXJq-ouaA","annotations":[]},"YFTQImGp_nY":{"duration":160.38,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=YFTQImGp_nY","annotations":[{"segment":[54.73557507214464,85.15038378132039],"label":"Hula hoop"},{"segment":[89.3885128637465,101.35499497883204],"label":"Hula hoop"}]},"FxGXQIH3-RQ":{"duration":159.8,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=FxGXQIH3-RQ","annotations":[{"segment":[0.01,159.799749],"label":"Disc dog"}]},"DsqNpc9JChY":{"duration":222.89,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DsqNpc9JChY","annotations":[{"segment":[2.4340360904836196,222.68816326530614],"label":"Washing dishes"}]},"dtEs78TCOZQ":{"duration":61.19,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=dtEs78TCOZQ","annotations":[{"segment":[0,40.418916378734274],"label":"Grooming horse"}]},"_gDw8q2sRLk":{"duration":75.047,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=_gDw8q2sRLk","annotations":[{"segment":[7.839418096723869,75.001],"label":"Rope skipping"}]},"WQXoBfyUpaY":{"duration":159.34,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=WQXoBfyUpaY","annotations":[{"segment":[13.671520017263276,150.63529283307332],"label":"Cumbia"}]},"_gJFrtUQDLw":{"duration":226.49,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=_gJFrtUQDLw","annotations":[{"segment":[18.18541856318253,27.013291652106084],"label":"Playing harmonica"}]},"yYFMmKk0Ets":{"duration":74.908,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=yYFMmKk0Ets","annotations":[]},"MjmDj36sVxM":{"duration":32.76,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=MjmDj36sVxM","annotations":[{"segment":[3.271224312012481,4.395707669266771],"label":"Walking the dog"},{"segment":[7.360254702028082,10.01812445553822],"label":"Walking the dog"},{"segment":[14.464945004680189,16.45834731981279],"label":"Walking the dog"}]},"TVeIcQOvMZI":{"duration":93.809,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=TVeIcQOvMZI","annotations":[{"segment":[7.463712099504388,17.268980608424336],"label":"Croquet"},{"segment":[7.756406544461778,16.829938728549145],"label":"Croquet"},{"segment":[44.48957716068643,46.977481146645864],"label":"Croquet"},{"segment":[71.27113183307333,76.83232897815913],"label":"Croquet"},{"segment":[81.80813695007801,92.19879477379096],"label":"Croquet"}]},"w2zTEQMg_v0":{"duration":142.037,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=w2zTEQMg_v0","annotations":[{"segment":[7.9770868222484,142.036461],"label":"Elliptical trainer"}]},"dXdhOe0DPC4":{"duration":198.16,"subset":"training","resolution":"426x240","url":"https://www.youtube.com/watch?v=dXdhOe0DPC4","annotations":[{"segment":[54.537857330733225,166.11272652],"label":"Polishing forniture"}]},"rE8IwfGCw4k":{"duration":43.491,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=rE8IwfGCw4k","annotations":[]},"hvy_V1EWKEI":{"duration":222.77,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hvy_V1EWKEI","annotations":[{"segment":[12.163851021840875,58.734023505460215],"label":"Getting a haircut"}]},"4efHnJ-T6co":{"duration":203.895,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4efHnJ-T6co","annotations":[]},"DQVkDzj4cPE":{"duration":178.539,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=DQVkDzj4cPE","annotations":[{"segment":[0.01,170.8622345028456],"label":"Swimming"}]},"u9tarG7QQPA":{"duration":226.627,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=u9tarG7QQPA","annotations":[]},"UXAw9BjQpbg":{"duration":193.52,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=UXAw9BjQpbg","annotations":[]},"pnN9AK7WKHU":{"duration":69.06,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pnN9AK7WKHU","annotations":[{"segment":[1.238059016380655,68.41621781825272],"label":"Windsurfing"}]},"5rlLaok_U5M":{"duration":68.28999999999999,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=5rlLaok_U5M","annotations":[{"segment":[0.01,68.289885],"label":"Playing badminton"}]},"ulopyhvgyQg":{"duration":40.566,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ulopyhvgyQg","annotations":[{"segment":[18.016357254290174,29.662603744149763],"label":"Hand car wash"}]},"2B_bp8h6gxc":{"duration":88.72,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=2B_bp8h6gxc","annotations":[]},"ttsZ4M-AKgY":{"duration":128.337,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ttsZ4M-AKgY","annotations":[{"segment":[34.03625137285491,101.30790114508581],"label":"Longboarding"}]},"XyQSmMYbP6o":{"duration":124.55,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=XyQSmMYbP6o","annotations":[{"segment":[60.80354602184087,62.940411856474256],"label":"Washing face"}]},"wt0XC2EEh7Y":{"duration":97.66,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=wt0XC2EEh7Y","annotations":[{"segment":[21.17801159282371,95.52959186115444],"label":"Grooming horse"}]},"2sbF8W0_bbg":{"duration":14.26,"subset":"validation","resolution":"272x480","url":"https://www.youtube.com/watch?v=2sbF8W0_bbg","annotations":[{"segment":[0.01,12.778932917316693],"label":"Smoking hookah"}]},"pRxS8ScXsvw":{"duration":97.92,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=pRxS8ScXsvw","annotations":[]},"kxUkqmzS2wc":{"duration":92.18,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=kxUkqmzS2wc","annotations":[{"segment":[14.589503274570985,73.52247216692669],"label":"Shaving"}]},"lQP65cm11FA":{"duration":115.589,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lQP65cm11FA","annotations":[{"segment":[24.52432926160584,49.5896358424337],"label":"Blowing leaves"}]},"bay60VGnTls":{"duration":101.22,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=bay60VGnTls","annotations":[{"segment":[2.94029575,49.44512854212169],"label":"Playing badminton"}]},"HGtfDm5EOtc":{"duration":150.141,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=HGtfDm5EOtc","annotations":[{"segment":[6.790840873634946,138.41109360374415],"label":"Capoeira"}]},"46l7dmH8pCg":{"duration":105.094,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=46l7dmH8pCg","annotations":[{"segment":[53.1204189881881,64.43309982683307],"label":"Croquet"},{"segment":[78.04110818720748,97.55138523400936],"label":"Croquet"}]},"R3HC-IAZVZg":{"duration":119.93,"subset":"validation","resolution":"426x234","url":"https://www.youtube.com/watch?v=R3HC-IAZVZg","annotations":[{"segment":[60.620382612168484,79.33037724555382],"label":"Spinning"},{"segment":[90.93057391825273,95.60807257659906],"label":"Spinning"},{"segment":[106.2727695176287,109.0792687126365],"label":"Spinning"}]},"AjohJX4RedM":{"duration":61.348,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=AjohJX4RedM","annotations":[{"segment":[17.82980733229329,42.399568642745706],"label":"Using the rowing machine"}]},"LZC9MLWo9bE":{"duration":89.91,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=LZC9MLWo9bE","annotations":[{"segment":[6.7325551076443055,83.45563102184087],"label":"Chopping wood"}]},"-E0tJqqOh_4":{"duration":143.616,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=-E0tJqqOh_4","annotations":[]},"EyCFMW7OjyM":{"duration":105.419,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EyCFMW7OjyM","annotations":[]},"4Rto4Aa6fxw":{"duration":52.153,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=4Rto4Aa6fxw","annotations":[{"segment":[10.46284509677041,32.52404046846559],"label":"Hurling"},{"segment":[42.01359877173153,50.205439700191874],"label":"Hurling"}]},"HVD8N0bbPVo":{"duration":148.79,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=HVD8N0bbPVo","annotations":[{"segment":[40.622086856474255,43.63972759438377],"label":"Doing karate"}]},"YGVzktgIEwA":{"duration":4.064,"subset":"testing","resolution":"192x144","url":"https://www.youtube.com/watch?v=YGVzktgIEwA","annotations":[]},"tnWmUCOEdFY":{"duration":15.14,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=tnWmUCOEdFY","annotations":[{"segment":[1.1970280811232448,14.498317519999999],"label":"Brushing hair"}]},"xNX6EO_Av5U":{"duration":104.676,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=xNX6EO_Av5U","annotations":[]},"QBlveXzAZsY":{"duration":213.694,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=QBlveXzAZsY","annotations":[{"segment":[50.993873634945395,153.32926677067084],"label":"Assembling bicycle"}]},"yJezvcXU4YE":{"duration":27.795,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=yJezvcXU4YE","annotations":[{"segment":[5.385151545206706,14.095151545206706],"label":"Throwing darts"}]},"zwX7e_5Koh0":{"duration":192.59,"subset":"training","resolution":"854x470","url":"https://www.youtube.com/watch?v=zwX7e_5Koh0","annotations":[{"segment":[11.416972661466458,179.36664944461776],"label":"Doing step aerobics"}]},"wWtOjL3U7iM":{"duration":16.626,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=wWtOjL3U7iM","annotations":[]},"8E5JYKost84":{"duration":205.75,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=8E5JYKost84","annotations":[]},"-vDMeHr1ZfI":{"duration":229.09,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-vDMeHr1ZfI","annotations":[{"segment":[55.03818933709729,97.56769927939975],"label":"Kayaking"},{"segment":[109.36159699449202,125.44418478779968],"label":"Kayaking"},{"segment":[130.80504738556888,160.8258779330765],"label":"Kayaking"},{"segment":[180.1249832850457,189.41714512117898],"label":"Kayaking"}]},"0ufZs4e24Xs":{"duration":174.06,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0ufZs4e24Xs","annotations":[{"segment":[6.51694652275887,162.10904475362688],"label":"Doing kickboxing"}]},"uLeJBFypCHE":{"duration":233.652,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=uLeJBFypCHE","annotations":[{"segment":[0.01,233.641],"label":"Wakeboarding"}]},"eaI8My4pGq4":{"duration":182.881,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=eaI8My4pGq4","annotations":[{"segment":[21.673753510140404,51.617755070202804],"label":"Snowboarding"},{"segment":[62.16945085803432,90.68754758190326],"label":"Snowboarding"},{"segment":[126.04998751950077,140.59421684867394],"label":"Snowboarding"},{"segment":[153.42736037441497,163.1235132605304],"label":"Snowboarding"}]},"hEabp9fbqtw":{"duration":200.09,"subset":"training","resolution":"654x480","url":"https://www.youtube.com/watch?v=hEabp9fbqtw","annotations":[{"segment":[0.19999999999999998,199.841],"label":"Tai chi"}]},"j5WshG7CEYY":{"duration":57.261,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=j5WshG7CEYY","annotations":[{"segment":[0.2,57.020999999999994],"label":"Table soccer"}]},"ZUl43hNzGjA":{"duration":137.97,"subset":"testing","resolution":"426x240","url":"https://www.youtube.com/watch?v=ZUl43hNzGjA","annotations":[]},"9wRQsxVFwkE":{"duration":56.15,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=9wRQsxVFwkE","annotations":[{"segment":[0.01,55.488916289391575],"label":"Mowing the lawn"}]},"ByTL6_iHoIA":{"duration":161.217,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=ByTL6_iHoIA","annotations":[{"segment":[5.533159427057354,138.07747832293293],"label":"Scuba diving"}]},"4QgjQuoEyEA":{"duration":70.403,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=4QgjQuoEyEA","annotations":[]},"FelGHxSz9Ho":{"duration":118.074,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=FelGHxSz9Ho","annotations":[]},"az-UGTdlbaA":{"duration":117.38,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=az-UGTdlbaA","annotations":[{"segment":[11.719375550532225,109.86914414976599],"label":"Doing kickboxing"}]},"Io1OF2OkTWY":{"duration":78.948,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Io1OF2OkTWY","annotations":[{"segment":[26.110481001552987,32.88442654440872],"label":"Scuba diving"}]},"Et4vNJN26Sg":{"duration":206.1,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Et4vNJN26Sg","annotations":[]},"-Kmx-BCAsQc":{"duration":65.16,"subset":"training","resolution":"426x238","url":"https://www.youtube.com/watch?v=-Kmx-BCAsQc","annotations":[{"segment":[3.9642018741974168,44.31774402948907],"label":"Using the pommel horse"}]},"M6sXLVNtTSA":{"duration":51.688,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=M6sXLVNtTSA","annotations":[{"segment":[47.63128081123245,51.440999999999995],"label":"Camel ride"}]},"A5-lBFZJ5R0":{"duration":121.046,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=A5-lBFZJ5R0","annotations":[]},"9I42aiA-UcY":{"duration":56.66,"subset":"validation","resolution":"568x320","url":"https://www.youtube.com/watch?v=9I42aiA-UcY","annotations":[{"segment":[3.97101904,24.91900848],"label":"Bathing dog"},{"segment":[29.890572480000003,50.73704368174727],"label":"Bathing dog"}]},"_Ga4HoMl6yM":{"duration":35.248,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=_Ga4HoMl6yM","annotations":[{"segment":[0.2,29.64775794392523],"label":"Hitting a pinata"}]},"Qwj7f4vqveo":{"duration":83.36,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=Qwj7f4vqveo","annotations":[{"segment":[0.01,59.089214216677945],"label":"Gargling mouthwash"}]},"qYD6UnDI63E":{"duration":131.94,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=qYD6UnDI63E","annotations":[]},"vMYPNyBR3d0":{"duration":12.191,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=vMYPNyBR3d0","annotations":[{"segment":[9.949858099688473,11.99047619047619],"label":"Applying sunscreen"}]},"sgPkVKPp1dU":{"duration":63.74,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=sgPkVKPp1dU","annotations":[{"segment":[10.142519659970072,34.007270995319814],"label":"Snatch"},{"segment":[42.26049758190328,55.68442034321373],"label":"Snatch"}]},"wBwBm7Ju9a8":{"duration":131.63,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=wBwBm7Ju9a8","annotations":[{"segment":[86.66070930109204,110.07142224024959],"label":"Springboard diving"}]},"lhH3qdh8n3g":{"duration":89.0,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=lhH3qdh8n3g","annotations":[{"segment":[0.01,89.002108],"label":"Doing step aerobics"}]},"qt-0mo_8ufU":{"duration":161.727,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=qt-0mo_8ufU","annotations":[{"segment":[31.942947845804987,130.28698412698412],"label":"Skiing"}]},"r79KH1TuGMA":{"duration":195.72,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=r79KH1TuGMA","annotations":[]},"me4rMjlnTlI":{"duration":33.902,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=me4rMjlnTlI","annotations":[{"segment":[0.01,33.861],"label":"Tug of war"}]},"cWdWucjUhks":{"duration":69.64,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=cWdWucjUhks","annotations":[{"segment":[43.395762479999995,55.94138424],"label":"Using parallel bars"},{"segment":[43.395762479999995,55.94138424],"label":"Using parallel bars"}]},"zC8yZvN6zTs":{"duration":20.85,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=zC8yZvN6zTs","annotations":[]},"AF8sbdPa4rE":{"duration":23.035,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=AF8sbdPa4rE","annotations":[{"segment":[11.678803510140405,22.83419501133787],"label":"Throwing darts"}]},"8iTz6Jy3lJg":{"duration":189.081,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=8iTz6Jy3lJg","annotations":[{"segment":[157.90980545546938,160.8503978193885],"label":"Wakeboarding"}]},"UCzKdpP9sLE":{"duration":87.82,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=UCzKdpP9sLE","annotations":[{"segment":[2.9092940625698764,24.24411649247642],"label":"Putting on makeup"},{"segment":[48.72445761397175,54.19492490369138],"label":"Putting on makeup"}]},"WKXIl7wvlk0":{"duration":68.2,"subset":"validation","resolution":"854x480","url":"https://www.youtube.com/watch?v=WKXIl7wvlk0","annotations":[{"segment":[0,56.81310707122163],"label":"Playing field hockey"}]},"dGHryLMDBIU":{"duration":58.21,"subset":"validation","resolution":"480x360","url":"https://www.youtube.com/watch?v=dGHryLMDBIU","annotations":[{"segment":[0.7275983,26.6197789],"label":"Smoking hookah"}]},"9tAyfaBGwyg":{"duration":134.19,"subset":"training","resolution":"320x240","url":"https://www.youtube.com/watch?v=9tAyfaBGwyg","annotations":[{"segment":[118.69682023712949,130.83864664586585],"label":"Hopscotch"}]},"4vOxhqUbHL8":{"duration":158.453,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=4vOxhqUbHL8","annotations":[{"segment":[6.921502546021841,138.6772474399376],"label":"Playing congas"}]},"rQlHlit_sFE":{"duration":36.665,"subset":"testing","resolution":"480x360","url":"https://www.youtube.com/watch?v=rQlHlit_sFE","annotations":[]},"M4IUb6kp2yo":{"duration":70.427,"subset":"training","resolution":"640x360","url":"https://www.youtube.com/watch?v=M4IUb6kp2yo","annotations":[{"segment":[16.80995007800312,42.299547581903276],"label":"Trimming branches or hedges"}]},"K_0DLTcFy-8":{"duration":180.466,"subset":"training","resolution":"270x480","url":"https://www.youtube.com/watch?v=K_0DLTcFy-8","annotations":[{"segment":[54.05519182399949,180.46551],"label":"Ice fishing"}]},"p1-APric9UM":{"duration":114.82,"subset":"training","resolution":"272x480","url":"https://www.youtube.com/watch?v=p1-APric9UM","annotations":[{"segment":[15.57904896,47.73609828],"label":"Drinking beer"},{"segment":[56.16173664,113.21052184711388],"label":"Drinking beer"}]},"Xv2-EY_zKNM":{"duration":49.807,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=Xv2-EY_zKNM","annotations":[{"segment":[0.01,49.022889235569416],"label":"Hurling"}]},"0bXzFAKRoBU":{"duration":31.185,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=0bXzFAKRoBU","annotations":[{"segment":[0.2,21.3724846105919],"label":"Playing drums"}]},"D9eo9NfFhkg":{"duration":120.605,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=D9eo9NfFhkg","annotations":[{"segment":[0.01,106.23145865834633],"label":"Having an ice cream"}]},"t3Zg4mSksXk":{"duration":187.943,"subset":"training","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=t3Zg4mSksXk","annotations":[{"segment":[0.19999999999999998,180.29130218068536],"label":"Longboarding"}]},"6e2y5J4Yv8g":{"duration":133.24,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=6e2y5J4Yv8g","annotations":[]},"EZdnNs-5tlo":{"duration":156.944,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=EZdnNs-5tlo","annotations":[{"segment":[12.971439937597504,156.881],"label":"Riding bumper cars"}]},"de6ieO3-H40":{"duration":124.85,"subset":"testing","resolution":"600x480","url":"https://www.youtube.com/watch?v=de6ieO3-H40","annotations":[]},"xWSO0Inej7s":{"duration":82.106,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=xWSO0Inej7s","annotations":[]},"WVJEFQpO5jg":{"duration":205.17000000000002,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=WVJEFQpO5jg","annotations":[{"segment":[20.165063516826386,30.087555088598098],"label":"Shot put"},{"segment":[38.72972516723798,42.57068964663348],"label":"Shot put"},{"segment":[50.25261860542449,68.81728025583608],"label":"Shot put"},{"segment":[77.45945033447596,105.6265231833763],"label":"Shot put"},{"segment":[112.98837176888435,147.23697171016093],"label":"Shot put"},{"segment":[154.91890066895192,172.52332119951464],"label":"Shot put"},{"segment":[180.84541090487156,195.8891884491706],"label":"Shot put"}]},"3Y_4Azzta6Q":{"duration":171.805,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=3Y_4Azzta6Q","annotations":[{"segment":[44.49225862367828,162.21770462408222],"label":"Making a cake"}]},"539ByIcbUPs":{"duration":116.24,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=539ByIcbUPs","annotations":[{"segment":[20.672787205365765,38.98815130836526],"label":"Making a lemonade"},{"segment":[42.977636558523564,52.40732896798865],"label":"Making a lemonade"},{"segment":[56.578154456790514,70.17867235505747],"label":"Making a lemonade"},{"segment":[73.44279665064153,96.47300695837357],"label":"Making a lemonade"},{"segment":[97.74238862887849,103.54527626547238],"label":"Making a lemonade"}]},"OYAyb_Ire24":{"duration":180.09,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=OYAyb_Ire24","annotations":[{"segment":[8.492111637639185,172.59642736193825],"label":"Putting on makeup"}]},"CN0F45eLq1s":{"duration":136.42000000000002,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=CN0F45eLq1s","annotations":[{"segment":[0,126.62753984399377],"label":"Sailing"}]},"KzK4sf7RmRM":{"duration":147.238,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=KzK4sf7RmRM","annotations":[{"segment":[15.160158977787683,134.14443701557585],"label":"Making a cake"}]},"0QNcOwi5bu8":{"duration":33.669,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=0QNcOwi5bu8","annotations":[{"segment":[10.610107354134165,33.668707],"label":"Plastering"}]},"DBGraxQspow":{"duration":77.927,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DBGraxQspow","annotations":[]},"Rl6US0JizDs":{"duration":71.52,"subset":"training","resolution":"720x480","url":"https://www.youtube.com/watch?v=Rl6US0JizDs","annotations":[{"segment":[31.103929797191885,64.4555663026521],"label":"Mixing drinks"}]},"RkBHHWFh2rk":{"duration":158.802,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=RkBHHWFh2rk","annotations":[{"segment":[54.24127769110765,113.18841965678628],"label":"Longboarding"}]},"akwJwcvfjLA":{"duration":125.57,"subset":"validation","resolution":"848x480","url":"https://www.youtube.com/watch?v=akwJwcvfjLA","annotations":[{"segment":[17.265913633680242,21.96025056825968],"label":"Playing squash"},{"segment":[38.97722195611015,40.34640356202916],"label":"Playing squash"},{"segment":[58.92815392807277,63.23129611810392],"label":"Playing squash"}]},"mShwD_I43ao":{"duration":183.717,"subset":"validation","resolution":"1280x720","url":"https://www.youtube.com/watch?v=mShwD_I43ao","annotations":[{"segment":[65.07333699739688,158.9346140067427],"label":"Rock-paper-scissors"}]},"3SbuecocgOA":{"duration":121.464,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=3SbuecocgOA","annotations":[]},"XC6tvSBS0PA":{"duration":179.677,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=XC6tvSBS0PA","annotations":[{"segment":[16.252758190327615,29.703316692667705],"label":"Swimming"},{"segment":[36.7088159126365,71.73631201248051],"label":"Swimming"},{"segment":[77.62093135725429,85.46709048361934],"label":"Swimming"},{"segment":[93.87368954758192,170.39271138845552],"label":"Swimming"}]},"iAWXaWo82u4":{"duration":43.89,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=iAWXaWo82u4","annotations":[{"segment":[0,43.885463],"label":"Bathing dog"}]},"CPnLc0MtBYc":{"duration":15.0,"subset":"validation","resolution":"320x240","url":"https://www.youtube.com/watch?v=CPnLc0MtBYc","annotations":[{"segment":[3.100642732059282,13.420517787441499],"label":"High jump"}]},"-XqCigUiMFI":{"duration":114.38,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=-XqCigUiMFI","annotations":[]},"YEZrwxz0Ysk":{"duration":132.08,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=YEZrwxz0Ysk","annotations":[{"segment":[31.00983260608424,123.31817152652106],"label":"Mixing drinks"}]},"i49blayQ93Q":{"duration":201.503,"subset":"validation","resolution":"426x240","url":"https://www.youtube.com/watch?v=i49blayQ93Q","annotations":[{"segment":[44.310734789391574,144.24558346333853],"label":"Camel ride"}]},"FKQIdqjY9nI":{"duration":212.09,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=FKQIdqjY9nI","annotations":[{"segment":[0.6617505951938758,208.4514374860709],"label":"Playing bagpipes"}]},"qV3e2L59_ic":{"duration":178.59,"subset":"testing","resolution":"720x480","url":"https://www.youtube.com/watch?v=qV3e2L59_ic","annotations":[]},"pi6sBUrSNGk":{"duration":181.4,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=pi6sBUrSNGk","annotations":[{"segment":[7.64063007418256,11.602438260795738],"label":"Washing hands"},{"segment":[146.3039164547582,174.88553262870514],"label":"Washing hands"}]},"PUJYZEq8H64":{"duration":104.58,"subset":"validation","resolution":"640x480","url":"https://www.youtube.com/watch?v=PUJYZEq8H64","annotations":[{"segment":[27.736434602184087,100.99325305148206],"label":"Playing polo"}]},"PUI2Qrn0qbE":{"duration":37.756,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=PUI2Qrn0qbE","annotations":[{"segment":[0.2,35.172054088483016],"label":"Riding bumper cars"}]},"zWA4Fb4fVmE":{"duration":60.77,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zWA4Fb4fVmE","annotations":[{"segment":[2.1787566302652106,59.96317160686427],"label":"Playing saxophone"}]},"gKpfkH32eRM":{"duration":132.19,"subset":"testing","resolution":"640x360","url":"https://www.youtube.com/watch?v=gKpfkH32eRM","annotations":[]},"YiBenqCKGcA":{"duration":105.4,"subset":"validation","resolution":"1440x1080","url":"https://www.youtube.com/watch?v=YiBenqCKGcA","annotations":[{"segment":[5.9716807,56.61856586349453],"label":"Tumbling"},{"segment":[57.07560829999999,101.32175864976598],"label":"Tumbling"}]},"2UbwK1Qtveg":{"duration":6.48,"subset":"validation","resolution":"480x480","url":"https://www.youtube.com/watch?v=2UbwK1Qtveg","annotations":[{"segment":[0.8388523837753511,5.457593822152886],"label":"Shaving"}]},"4E8rMLHIpag":{"duration":125.33,"subset":"training","resolution":"854x480","url":"https://www.youtube.com/watch?v=4E8rMLHIpag","annotations":[{"segment":[75.00646689999999,85.9459812],"label":"Playing violin"}]},"PqcdYoa--8g":{"duration":91.56,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=PqcdYoa--8g","annotations":[{"segment":[62.98951536505461,72.13085092823714],"label":"Preparing pasta"},{"segment":[6.998835040561622,56.27634706084244],"label":"Preparing pasta"}]},"mGgidUE8drE":{"duration":153.88,"subset":"training","resolution":"480x270","url":"https://www.youtube.com/watch?v=mGgidUE8drE","annotations":[{"segment":[6.15204244,135.95263461622466],"label":"Fixing bicycle"}]},"RarzzsTjl94":{"duration":170.13,"subset":"testing","resolution":"648x480","url":"https://www.youtube.com/watch?v=RarzzsTjl94","annotations":[]},"zFUUBWJ58UA":{"duration":132.285,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=zFUUBWJ58UA","annotations":[{"segment":[62.53054062870514,78.62751148361934],"label":"Painting furniture"}]},"o2EUvWyuDAU":{"duration":91.116,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=o2EUvWyuDAU","annotations":[]},"b-3l2qIHL5w":{"duration":117.842,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=b-3l2qIHL5w","annotations":[{"segment":[4.844975980966633,112.388754797166],"label":"Kneeling"}]},"hQb3VtvSqaE":{"duration":31.6,"subset":"testing","resolution":"1280x720","url":"https://www.youtube.com/watch?v=hQb3VtvSqaE","annotations":[]},"7rT8tqLyFa0":{"duration":139.32,"subset":"training","resolution":"960x720","url":"https://www.youtube.com/watch?v=7rT8tqLyFa0","annotations":[{"segment":[20.702567245744305,135.28281180462707],"label":"Using the rowing machine"}]},"kxXoosv0iDg":{"duration":116.867,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=kxXoosv0iDg","annotations":[{"segment":[0.01,109.79718314369892],"label":"Baton twirling"}]},"sMVf7HDvsEc":{"duration":168.879,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=sMVf7HDvsEc","annotations":[{"segment":[0.01,5.005765792536463],"label":"Decorating the Christmas tree"},{"segment":[9.484608870069088,21.86729267265929],"label":"Decorating the Christmas tree"},{"segment":[46.632660277839676,87.99609340564098],"label":"Decorating the Christmas tree"}]},"DpI9JtaMtec":{"duration":173.82999999999998,"subset":"training","resolution":"1280x720","url":"https://www.youtube.com/watch?v=DpI9JtaMtec","annotations":[{"segment":[41.21893328522257,164.604555475819],"label":"Making a sandwich"}]},"DkiJwIJQKaM":{"duration":106.905,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=DkiJwIJQKaM","annotations":[{"segment":[17.845241326053042,106.90467],"label":"Disc dog"}]},"t0ZuC58UIOM":{"duration":99.614,"subset":"validation","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=t0ZuC58UIOM","annotations":[{"segment":[4.957439897615345,15.956764915336274],"label":"Bullfighting"},{"segment":[17.815805763401784,29.434811063811217],"label":"Bullfighting"},{"segment":[32.84305261859799,44.15221777766317],"label":"Bullfighting"},{"segment":[51.27854028618512,59.64422398136396],"label":"Bullfighting"}]},"uWBPngv3j0M":{"duration":77.811,"subset":"testing","resolution":"640x480","url":"https://www.youtube.com/watch?v=uWBPngv3j0M","annotations":[]},"w28ML-KpYdI":{"duration":7.013,"subset":"training","resolution":"192x144","url":"https://www.youtube.com/watch?v=w28ML-KpYdI","annotations":[{"segment":[0.2,6.781],"label":"Painting fence"}]},"GeKKB4pE8qA":{"duration":207.07999999999998,"subset":"testing","resolution":"320x240","url":"https://www.youtube.com/watch?v=GeKKB4pE8qA","annotations":[]},"f0JySdrM_l8":{"duration":78.9,"subset":"training","resolution":"640x480","url":"https://www.youtube.com/watch?v=f0JySdrM_l8","annotations":[{"segment":[6.454890809999999,72.85109516380656],"label":"Discus throw"}]},"ukXZ8nIRAJQ":{"duration":174.475,"subset":"testing","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=ukXZ8nIRAJQ","annotations":[]},"pG0uYjcjBR8":{"duration":175.171,"subset":"training","resolution":"384x288","url":"https://www.youtube.com/watch?v=pG0uYjcjBR8","annotations":[{"segment":[10.111274340093605,160.41400101716067],"label":"Capoeira"}]},"IWHN3TD8mMQ":{"duration":139.95,"subset":"training","resolution":"1920x1080","url":"https://www.youtube.com/watch?v=IWHN3TD8mMQ","annotations":[{"segment":[37.4005685,119.18387831513259],"label":"Preparing pasta"}]},"O_VBQsTRjzM":{"duration":79.9,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=O_VBQsTRjzM","annotations":[]},"aVu2j7JbYgk":{"duration":25.682,"subset":"testing","resolution":"854x480","url":"https://www.youtube.com/watch?v=aVu2j7JbYgk","annotations":[]}}}
\ No newline at end of file
diff --git a/dev/submit/2023-11-09-multimodal-commonsense/2023-11-09-multimodal-commonsense.md b/dev/submit/2023-11-09-multimodal-commonsense/2023-11-09-multimodal-commonsense.md
deleted file mode 100644
index cee39940..00000000
--- a/dev/submit/2023-11-09-multimodal-commonsense/2023-11-09-multimodal-commonsense.md
+++ /dev/null
@@ -1,74 +0,0 @@
----
-layout: distill
-title: Multimodal Commonsense Proposal
-description: 6.S898 project proposal for analyzing and evaluating the commonsense reasoning performance of multimodal vs text-only models.
-date: 2023-11-08
-htmlwidgets: true
-
-# Anonymize when submitting
-# authors:
-#   - name: Anonymous
-
-authors:
-  - name: Vincent Lin
-
-# must be the exact same name as your blogpost
-bibliography: 2023-11-09-multimodal-commonsense.bib
-
-# Add a table of contents to your post.
-#   - make sure that TOC names match the actual section names
-#     for hyperlinks within the post to work correctly.
-toc:
-  - name: Background
-  - name: Related Work
-  - name: Implementation & Evaluation
-
-# Below is an example of injecting additional post-specific styles.
-# This is used in the 'Layouts' section of this post.
-# If you use this post as a template, delete this _styles block.
-_styles: >
-  .fake-img {
-    background: #bbb;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 0px 4px rgba(0, 0, 0, 0.1);
-    margin-bottom: 12px;
-  }
-  .fake-img p {
-    font-family: monospace;
-    color: white;
-    text-align: left;
-    margin: 12px 0;
-    text-align: center;
-    font-size: 16px;
-  }
----
-
-## Background
-
-In recent years, language models have been proven to be quite proficient in producing human-like text, computing somewhat semantically-meaningful and human-interpretable word and token embeddings, and generating realistic conversation. However, there is a vast distinction between mimicking human linguistics from data and forming an understanding of the world and its abstract connections from data. The latter describes the commonsense knowledge of a language model, or its ability to reason about simple relationships, interactions, and general logic of the world.
-
-Previous work has been completed evaluating the commonsense capabilities of langauge models, and with the vast sizes of LMs and the even vaster data availble today, language models' commonsense performance has grown increasingly close to human performance -- but not quite <d-cite key="li2021"></d-cite>. From textual data alone, models still perform worse than humans with a significant margin of error. Yet, humans don't learn to reason about the world from text alone; many, many different modes of perception contribute to our knowledge of reality. Can we imbue deep learning models with other modes of input to similarly augment their reasoning skills?
-
-In this project, I propose an investigation and evaluation of multimodal deep learning models for commonsense reasoning. When compared to standard language models, multimodal models have a more diverse set of input/training data that, perhaps, grants them a richer representation of the data. For example, vision-text models can be trained on the same textual data as language models, but the association of images and visualized objects with text embeddings provides a more comprehensive "understanding" of the objects and their interactions with their environment. Do different types of auxiliary inputs types provide multimodal models with any additional commonsense information? In the context of model representations and embeddings, how do the multimodal representations differ from those of the (text-only) unimodal? How are they similar? When observing the relationships between embeddings within the multimodal model (e.g., latent-space distances), does the multimodal affect the relative similarity between words/objects? Do these augmented relationships benefit multimodal models in commonsense reasoning at all?
-
-## Related Work
-
-Several works have evaluated the commonsense capabilities of unimodal language models. Li et al., 2021 <d-cite key="li2021"></d-cite> analyzes the performance of the Gopher language model in zero-shot and few-shot learning with varying model sizes. They find that their LM performed relatively well in physical commonsense (explained further below), but worse in social commonsense. Zhao et al., 2023 <d-cite key="zhao2023"></d-cite> measure large language models' commonsense performance in the context of simple task planning, e.g., in robotics, observing that performance varies depending on the particular task and the length of the descrption for the task. Saharia et al., 2022 <d-cite key="saharia2022"></d-cite> propose a text-to-image multimodal model and evaluate the depth of its text language understanding.
-
-## Implementation & Evaluation
-
-For this project, I will choose to focus on vision-text models to evaluate multimodal performance. It's important to note that different types of commonsense exist, and vision-text models may, intuitively, perform better at physical commonsense tasks than, say, social tasks, which will be a crucial distinction in evaluation. Reliable and relatively compact language models already exist with pretrained weights and relatively solid performance in general NLP tasks (e.g., transformer models from Huggingface <d-cite key="huggingface"></d-cite>), so I will plan to use these as reference. I may choose to implement more of the vision-text model from scratch (though carefully, so as not to have lackluster text processing in the multimodal model impact any comparison with the reference LM). However, if complications do arise, preimplemented multimodal models may also be used for reference <d-cite key="saharia2022"></d-cite>.
-
-Many benchmarks are available for evaluating the commonsense capabilities of language models. I will focus on multiple choice evaluation, where given a short story or background prompt, a model must choose the most reasonable answer or continuation. Multiple choice benchmarks provide a more concrete and reliable metric for determining similarity to “human” judgement. A brief summary of some potential benchmarks is given below:
-
-__HellaSwag__<d-cite key="zellers2019"></d-cite>: Designed to evaluate physical, grounded, and temporal common sense. Given a short description/prompt, the model must choose the correct continuation from four choices. The "stories" are produced from video captions or other passages.
-
-{% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/hellaswag.png" class="img-fluid" %}
-
-__Social IQa__<d-cite key="sap2019"></d-cite>: Evaluates a model's social common sense. This dataset is comprised of social situations of interactions between people, evaluating a model's knowledge of emotion, mental states, etc.
-
-{% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/socialiqa.png" class="img-fluid" %}
-
-__PIQA__<d-cite key="bisk2019"></d-cite>: Another physical common sense benchmark, where given a short question or situational prompt, models must select a solution between two options. PIQA focuses on physical interaction.
-
-{% include figure.html path="assets/img/2023-11-09-multimodal-commonsense/piqa.png" class="img-fluid" %}
\ No newline at end of file
diff --git a/dev/submit/2023-11-09-multimodal-commonsense/2023-11-09-multimodal-commonsense/index.html b/dev/submit/2023-11-09-multimodal-commonsense/2023-11-09-multimodal-commonsense/index.html
new file mode 100644
index 00000000..c0f25333
--- /dev/null
+++ b/dev/submit/2023-11-09-multimodal-commonsense/2023-11-09-multimodal-commonsense/index.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Multimodal Commonsense Proposal | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="6.S898 project proposal for analyzing and evaluating the commonsense reasoning performance of multimodal vs text-only models."/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/dev/submit/2023-11-09-multimodal-commonsense/2023-11-09-multimodal-commonsense/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> <script src="/staging/assets/js/distillpub/template.v2.js"></script> <script src="/staging/assets/js/distillpub/transforms.v2.js"></script> <script src="/staging/assets/js/distillpub/overrides.js"></script> <style type="text/css">.fake-img{background:#bbb;border:1px solid rgba(0,0,0,0.1);box-shadow:0 0 4px rgba(0,0,0,0.1);margin-bottom:12px}.fake-img p{font-family:monospace;color:white;text-align:left;margin:12px 0;text-align:center;font-size:16px}</style> </head> <d-front-matter> <script async type="text/json">{
+      "title": "Multimodal Commonsense Proposal",
+      "description": "6.S898 project proposal for analyzing and evaluating the commonsense reasoning performance of multimodal vs text-only models.",
+      "published": "November 8, 2023",
+      "authors": [
+        {
+          "author": "Vincent Lin",
+          "authorURL": "",
+          "affiliations": [
+            {
+              "name": "",
+              "url": ""
+            }
+          ]
+        }
+        
+      ],
+      "katex": {
+        "delimiters": [
+          {
+            "left": "$",
+            "right": "$",
+            "display": false
+          },
+          {
+            "left": "$$",
+            "right": "$$",
+            "display": true
+          }
+        ]
+      }
+    }</script> </d-front-matter> <body class="fixed-top-nav"> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="post distill"> <d-title> <h1>Multimodal Commonsense Proposal</h1> <p>6.S898 project proposal for analyzing and evaluating the commonsense reasoning performance of multimodal vs text-only models.</p> </d-title> <d-byline></d-byline> <d-article> <d-contents> <nav class="l-text figcaption"> <h3>Contents</h3> <div><a href="#background">Background</a></div> <div><a href="#related-work">Related Work</a></div> <div><a href="#implementation-evaluation">Implementation & Evaluation</a></div> </nav> </d-contents> <h2 id="background">Background</h2> <p>In recent years, language models have been proven to be quite proficient in producing human-like text, computing somewhat semantically-meaningful and human-interpretable word and token embeddings, and generating realistic conversation. However, there is a vast distinction between mimicking human linguistics from data and forming an understanding of the world and its abstract connections from data. The latter describes the commonsense knowledge of a language model, or its ability to reason about simple relationships, interactions, and general logic of the world.</p> <p>Previous work has been completed evaluating the commonsense capabilities of langauge models, and with the vast sizes of LMs and the even vaster data availble today, language models’ commonsense performance has grown increasingly close to human performance – but not quite <d-cite key="li2021"></d-cite>. From textual data alone, models still perform worse than humans with a significant margin of error. Yet, humans don’t learn to reason about the world from text alone; many, many different modes of perception contribute to our knowledge of reality. Can we imbue deep learning models with other modes of input to similarly augment their reasoning skills?</p> <p>In this project, I propose an investigation and evaluation of multimodal deep learning models for commonsense reasoning. When compared to standard language models, multimodal models have a more diverse set of input/training data that, perhaps, grants them a richer representation of the data. For example, vision-text models can be trained on the same textual data as language models, but the association of images and visualized objects with text embeddings provides a more comprehensive “understanding” of the objects and their interactions with their environment. Do different types of auxiliary inputs types provide multimodal models with any additional commonsense information? In the context of model representations and embeddings, how do the multimodal representations differ from those of the (text-only) unimodal? How are they similar? When observing the relationships between embeddings within the multimodal model (e.g., latent-space distances), does the multimodal affect the relative similarity between words/objects? Do these augmented relationships benefit multimodal models in commonsense reasoning at all?</p> <h2 id="related-work">Related Work</h2> <p>Several works have evaluated the commonsense capabilities of unimodal language models. Li et al., 2021 <d-cite key="li2021"></d-cite> analyzes the performance of the Gopher language model in zero-shot and few-shot learning with varying model sizes. They find that their LM performed relatively well in physical commonsense (explained further below), but worse in social commonsense. Zhao et al., 2023 <d-cite key="zhao2023"></d-cite> measure large language models’ commonsense performance in the context of simple task planning, e.g., in robotics, observing that performance varies depending on the particular task and the length of the descrption for the task. Saharia et al., 2022 <d-cite key="saharia2022"></d-cite> propose a text-to-image multimodal model and evaluate the depth of its text language understanding.</p> <h2 id="implementation--evaluation">Implementation &amp; Evaluation</h2> <p>For this project, I will choose to focus on vision-text models to evaluate multimodal performance. It’s important to note that different types of commonsense exist, and vision-text models may, intuitively, perform better at physical commonsense tasks than, say, social tasks, which will be a crucial distinction in evaluation. Reliable and relatively compact language models already exist with pretrained weights and relatively solid performance in general NLP tasks (e.g., transformer models from Huggingface <d-cite key="huggingface"></d-cite>), so I will plan to use these as reference. I may choose to implement more of the vision-text model from scratch (though carefully, so as not to have lackluster text processing in the multimodal model impact any comparison with the reference LM). However, if complications do arise, preimplemented multimodal models may also be used for reference <d-cite key="saharia2022"></d-cite>.</p> <p>Many benchmarks are available for evaluating the commonsense capabilities of language models. I will focus on multiple choice evaluation, where given a short story or background prompt, a model must choose the most reasonable answer or continuation. Multiple choice benchmarks provide a more concrete and reliable metric for determining similarity to “human” judgement. A brief summary of some potential benchmarks is given below:</p> <p><strong>HellaSwag</strong><d-cite key="zellers2019"></d-cite>: Designed to evaluate physical, grounded, and temporal common sense. Given a short description/prompt, the model must choose the correct continuation from four choices. The “stories” are produced from video captions or other passages.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/hellaswag.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><strong>Social IQa</strong><d-cite key="sap2019"></d-cite>: Evaluates a model’s social common sense. This dataset is comprised of social situations of interactions between people, evaluating a model’s knowledge of emotion, mental states, etc.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/socialiqa-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/socialiqa-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/socialiqa-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/socialiqa.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p><strong>PIQA</strong><d-cite key="bisk2019"></d-cite>: Another physical common sense benchmark, where given a short question or situational prompt, models must select a solution between two options. PIQA focuses on physical interaction.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/piqa-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/piqa-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-multimodal-commonsense/piqa-1400.webp"/> <img src="/staging/assets/img/2023-11-09-multimodal-commonsense/piqa.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </d-article> <d-appendix> <d-footnote-list></d-footnote-list> <d-citation-list></d-citation-list> </d-appendix> </div> <d-bibliography src="/staging/assets/bibliography/2023-11-09-multimodal-commonsense.bib"></d-bibliography> <script src="https://utteranc.es/client.js" repo="iclr-blogposts/2023" issue-term="pathname" theme="github-light" crossorigin="anonymous" async> </script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> </body> </html>
\ No newline at end of file
diff --git a/docker/.gitignore b/docker/.gitignore
deleted file mode 100644
index 2ca86829..00000000
--- a/docker/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-_site/
-.sass-cache/
-.jekyll-cache/
-.jekyll-metadata
diff --git a/feed.xml b/feed.xml
new file mode 100644
index 00000000..d7e72893
--- /dev/null
+++ b/feed.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en"><generator uri="https://jekyllrb.com/" version="4.3.2">Jekyll</generator><link href="https://deep-learning-mit.github.io/staging/feed.xml" rel="self" type="application/atom+xml"/><link href="https://deep-learning-mit.github.io/staging/" rel="alternate" type="text/html" hreflang="en"/><updated>2023-12-18T18:10:56+00:00</updated><id>https://deep-learning-mit.github.io/staging/feed.xml</id><title type="html">6.S898 Deep Learning Blogs 2023</title><subtitle>Staging website for the 2023 ICLR Blogposts track </subtitle><entry><title type="html">Injecting Node Information via Embedding Initializations</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/SmartEmbeddingInitializations/" rel="alternate" type="text/html" title="Injecting Node Information via Embedding Initializations"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/SmartEmbeddingInitializations</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/SmartEmbeddingInitializations/"><![CDATA[<h2 id="introduction">Introduction</h2> <p>Graph Neural Networks (GNNs) have emerged as a transformative tool in machine learning, with the ability to capture the complex structures and relationships inherent in data. In molecular property prediction, for example, GNNs are great at encoding the atomic structure and intermolecular forces into high-dimensional embeddings, leading to more accurate predictions of chemical properties and drug efficacy. GNNs have also be used in traffic time prediction problems, physics simulations and social media analysis applications. Through message-passing and updating, GNNs are capable of learning embeddings that encode informations of node neighbors and long-distance complex connections – that we, as humans, may not be able to make. The quality of the embeddings is not only important for the accuracy of the task the GNN is trained on, but quality node embeddings can be used through transfer learning – enabling models trained on one task to adapt and excel in another. The importance of good embeddings in GNNs is why we want to look closer at embedding initializations and if we can inject additional information – not present in the graph – to result in better learned embeddings after training.</p> <p>Possible applications of initial embedding initializations could help in the field of drug discovery. For GNNs used for protein retrieval trained on a biomedical knowledge graph, using ESM embeddings for the proteins could add structure information that is not previously encoded in the graph entities.</p> <h3 id="project-outline">Project Outline</h3> <p>We will explore the question can additional node information be injected into the model by using intentional embedding initializations rather than random initializations? Furthermore, are the learned embeddings better representations of the nodes? To answer this question we will follow the steps outlined below:</p> <ol> <li>We will download a precision medicine knowledge graph that and use a GNN, TxGNN, that is implemented for disease-drug link prediction on a biomedical knowledge graph as a baseline model.</li> <li>We will modify the GNN for protein-molecular function link prediction.</li> <li>Generate and download ESM embeddings for each protein</li> <li>Pretrain and finetune two models – one using random protein node initialization and one using ESM embeddings for protein node initialization. We must pretrain our own models, rather than use the already pretrained model, since we are focusing on how different node initializations impact the predictive power.</li> <li>Evaluate both models</li> <li>Visualize latent spaces before pretrain, after pretraining and after finetuning</li> </ol> <h2 id="related-work--motivation">Related Work &amp; Motivation</h2> <p>In reviewing the literature, we found several papers which reference the possibility of improved performance through a more informed initialization process. As discussed by Li et al., the initialization methods used for GNNs, such as Xavier random initialization were originally designed for CNNs and FNNs. In that setting, the Xavier approach helped to avoid vanishing gradients and maintain a constant information flux. However, Li et al. point out that by leveraging the structure of the graph, we can likely do better than the random intializations used previously <d-cite key="Li2023"></d-cite>.</p> <p>In the paper detailing TxGNN, Huang et al. present promising results on their ability to predict drug repurposing opportunities using a GNN <d-cite key="Huang2023"></d-cite>. However, in their work they considered only the Xavier random initializations for weight matrices and node embeddings. This left open the idea of initializing the graph using more sophisticated methods.</p> <p>Previous work by Cui et al. has explored the power of artificial node initializations, finding that encoding structural and positional information in the node initializations can have profound effect on the ability of a GNN to accurately predict features based on the graph. They provide a basis for our investigation by showing the effect that initializations can have on the results, if done correctly. We seek to build on this work by testing the effect of injecting related, but not exactly equivalent information through the node initializations <d-cite key="Cui2021"></d-cite>.</p> <p>Not only did we see an opportunity to try a different initialization method, but this problem also lent itself well to data-informed initializations. The molecules in TxGNN have a wealth of knowledge about them which is not represented in the linkages in the graph, some of which is represented in the ESM embeddings of the molecules. Thus, we thought that by supplying these embeddings to the GNN, we might be able to leverage the additional data to make better predictions.</p> <h2 id="data">Data</h2> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/PrimeKG-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/PrimeKG-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/PrimeKG-1400.webp"/> <img src="/staging/assets/img/PrimeKG.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Precision Medicine Knowledge Graph. Figure credit: <i>Building a knowledge graph to enable precision medicine</i> (Chandak, Huang, Zitnik 2023). </div> <p>We used a precision medicine knowledge graph (PrimeKG) constructed by Marinka Zitnik’s group at Harvard <d-cite key="Chandak2023"></d-cite>. PrimeKG compiles data from knowledge bases that coverage a broad variety of biomedical information including human disease, drug-protein interactions, genes and proteins with their associated biological processes, functions and cellular component, etc. PrimeKG contains 10 different node types – shown above – and 29 different types of undirected edges. There are over 120,000 nodes in total and over 8 million edges. What PrimeKG lacks, importantly, is any nodes or encodings of structural, molecular or sequenctial information for entity nodes such as proteins and drugs. The node types of interest for our model are proteins, extracted from NCBI, and molecular function Gene Ontology (GO) annotations <d-cite key="Gene_Ontology_Consortium2021-uk"></d-cite>. We will be predicting links between these two node types.</p> <p>The other data used were ESM embeddings for proteins in PrimeKG. ESM embeddings, or Evolutionary Scale Modeling embeddings, are high-dimensional vector representations of proteins, derived from advanced machine learning models developed by Meta trained on large datasets of protein sequences. These embeddings capture the intricate structural and functional characteristics of proteins, reflecting evolutionary relationships and biochemical properties that are crucial for various biological and computational applications <d-cite key="Lin2022-esm2"></d-cite>. The reason we were interested in using ESM embeddings, rather than embeddings from other protein foundation models, was that structural information was not already captured in PrimeKG, as previously mentioned. To obtain the ESM embeddings, first we downloaded the amino acid sequence for each protein from NCBI using Entrez. Then, using these sequences as input to Facebook’s ESM2 model, we extracted the corresponding embedding.</p> <h2 id="gnn">GNN</h2> <p>The model we used as a baseline is TxGNN, a graph neural network trained on PrimeKG used to make therapeutic drug predictions for diseases <d-cite key="Huang2023"></d-cite>. The GNN has two training phases. First, pretraining where the GNN finds biologically meaningful embeddings for all nodes in the knowledge graph, and therefore the objective is all link prediction. The second phase is to finetune the GNN, using self-supervised learning, to be able to predict drugs for diseases. Therefore, the objective for finetuning is to optimize contraindication and indication link prediction – the two types of links between diseases and drugs. We modified the training code for the finetuning phase, to train and validate on protein-molecular function links instead.</p> <h3 id="architecture">Architecture</h3> <p>The GNN has two linear layers with parameters n_input, n_hidden, and n_output. For all our models n_input is 1280, restricted by the length of ESM embeddings. We play around with different dimensions for the hidden and output layers. Leaky ReLU activation is used after the first layer.</p> <h3 id="training">Training</h3> <p>The first step of the training phase is <strong>node embedding initialization</strong>. The default, which is our random control, is to initialize all nodes using Xavier uniform initialization <d-cite key="pmlr-v9-glorot10a"></d-cite>. Models referred to as <em>random</em> from here on out are referring to using Xavier uniform initialization. For our experimental model, we initialized the protein nodes using the ESM embeddings we obtained earlier. All other node types were still initialized with Xavier uniform initialization. Note that we reinitialized nodes between pretraining and finetuning.</p> <p>During the training phase, the GNN uses a standard message-passing algorithm to update and optimize the node embeddings. There is a relation-type specific weight matrix (for each of the 29 relation types) used to calculate relation-type specific messages. The message for one relation to the some node $i$ is calculated using this equation: \begin{equation} m_{r, i}^{(l)} = W_{r, M}^{(l)} h_i^{(l-1)} \end{equation}</p> <p>For each node $v_i$, we aggregate incoming messages from neighboring nodes for each relation-type $r$, denoted as $N_r(i)$. This is done by taking the average of these messages: \begin{equation} m_{g_r, i}^{(l)} = \frac{1}{|N_r(i)|} \sum_{j \in N_r(i)} m_{r, j}^{(l)} \end{equation}</p> <p>The new node embedding is then updated by combining the node embedding from the last layer and the aggregated messages from all relations: \begin{equation} h_i^{(l)} = h_i^{(l-1)} + \sum_{r \in TR} m_{g_r, i}^{(l)} \end{equation}</p> <p>Finally, DistMult <d-cite key="Yang2014-zb"></d-cite> is used to calculate link prediction between two nodes using their respective embeddings.</p> <h2 id="model-evaluation">Model Evaluation</h2> <p>We fixed all parameters and hyperparameters, and trained two models – one using random initializations and one using ESM embeddings. We pretrained for 3 epochs with a learning rate of $1e-3$ and a batch size of 1024. We then finetuned for 150 epochs with a learning rate of $5e-4$.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/Figure2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/Figure2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/Figure2-1400.webp"/> <img src="/staging/assets/img/Figure2.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>These results are promising and using ESM embeddings to initialize the protein node representations slightly improves the model. The ESM model has a final testing loss of 0.3915, whereas the random model has a final testing loss of 0.4151. However, the difference between the models is slim and may not be significant, especially looking at the similarities in the pretraining, training and validation loss curves. Later, we will look more in depth about how the embedding spaces vary between the 2 models which has the potential to yield more interesting results.</p> <h3 id="testing-varying-hidden-and-output-layer-dimensions">Testing varying hidden and output layer dimensions</h3> <p>We wanted to see the impact changing the hidden and output layer dimensions would have on model performance. We tested 3 models, with parameters detailed in Table 1. All models outside of this experiment, unless otherwise specified, have the same parameters as Model 1.</p> <table> <thead> <tr> <th> </th> <th>Input Dimensions</th> <th>Hidden Layer Dim.</th> <th>Output Layer Dim.</th> </tr> </thead> <tbody> <tr> <td>Model 1</td> <td>1280</td> <td>1280</td> <td>1280</td> </tr> <tr> <td>Model 2</td> <td>1280</td> <td>512</td> <td>512</td> </tr> <tr> <td>Model 3</td> <td>1280</td> <td>512</td> <td>128</td> </tr> </tbody> </table> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/Testing_output_dim-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/Testing_output_dim-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/Testing_output_dim-1400.webp"/> <img src="/staging/assets/img/Testing_output_dim.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>We can see from the testing loss that when just comparing ESM initialized model, testing loss increases as the output layer decreases. The same trend holds true between random initialized models. We can also see that when comparing ESM and random models for the same layer dimensions, ESM always slightly outperforms the random model.</p> <h2 id="latent-space-visualizations">Latent Space Visualizations</h2> <p>In the fast-evolving world of deep learning, the analysis of model latent spaces has emerged as an interesting area of study, especially to get a better understanding of how models are achieving their tasks. These spaces are important to understanding how complex models like GNNs perceive and process the intricate relationships and structures inherent in graph data. GNNs can learn powerful representations that capture both node-level and graph-level features. By analyzing the latent spaces of GNNs, we can get insights into how these models prioritize various patterns and connections within the data. The following analyses visualize the latent spaces our models, clustered and colored in different ways, to get a deeper understanding of how the ESM initialized embeddings are effecting the GNN.</p> <p>We first were curious whether, after training our model, the final embeddings retained structural information about the proteins. To do this, we first clustered the original ESM embeddings using K-Means clustering. Next, we visualized the embedding space of the original ESM embeddings, the final embeddings from the ESM model and the final embeddings from the random model using t-distributed Stochastic Neighbor Embedding (t-SNE) for dimensionality reduction. From the t-SNE plot of original ESM embeddings, we can clearly see the clusters from K-Means which serves as a verification of our clustering technique.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/init_cinit-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/init_cinit-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/init_cinit-1400.webp"/> <img src="/staging/assets/img/init_cinit.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Looking at the embedding space for the ESM and random models, colored by ESM clusters, we note that most of the ESM structural information (used to create the clusters) seems to be forgotten during the training process, as evidenced by the mostly random assortment of colors present in the t-SNE plot. We note that some clusters do remain, for example cluster 12 (light sage green on the right side of the ESM initialized plots) is still clustering in the final embeddings (top middle cluster). However, the most prominent ones appear in both the ESM initialized and random initialized data, meaning that the ESM embedding did encode some function, but the model using random initialized embeddings was able to capture that relation as well.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/cluster_init-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/cluster_init-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/cluster_init-1400.webp"/> <img src="/staging/assets/img/cluster_init.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Given that the final embedding space for the ESM model didn’t seem to retain much of the information for the ESM embedding initialization, we were curious whether the ESM and random embeddings converged to a similar space. To test this theory, we clustered the final ESM model embeddings and subsequently visualized the final embeddings of the ESM and random models using t-SNE and colored by those clusters.</p> <p>If the two models converged to similar embedding spaces, we’d expect to see that clusters found in one embedding space would also be found in the other. This is the case, as seen in the two plots below. Both plots are colored based on a clustering of the final embeddings generated by the ESM initialized network, and they share many of the same structures, indicating that the two networks were able to pick up on mostly the same features in the underlying information. Both models converged to a similar embedding space different initialization methods.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/cluster_esm-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/cluster_esm-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/cluster_esm-1400.webp"/> <img src="/staging/assets/img/cluster_esm.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="testing-varying-hidden-and-output-layer-dimensions-1">Testing varying hidden and output layer dimensions</h3> <p>As mentioned earlier, we tested different dimensions for the hidden and output layers to see whether more and less output dimensions would retain the original ESM embedding information.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/dimensions-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/dimensions-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/dimensions-1400.webp"/> <img src="/staging/assets/img/dimensions.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Although there are more distinct clusters on the t-SNE plots as the number of output dimensions increases, these clusters are not the same as the clusters from the original ESM embeddings (seen by the randomly colored dots). Therefore, neither of these 3 models retained the structural information provided by initializing with ESM embeddings. It does not seem that decreasing output and hidden layer dimensions improves the model performance or latent space of our GNN.</p> <h3 id="clustering-by-molecular-function-labels">Clustering by molecular function labels</h3> <p>Because our model’s task was to predict links between protein and molecular function nodes, we were curious to see if the final embeddings for the protein nodes would cluster well on the function labels. However, this wasn’t as straight forward as having 1 molecular function label for each protein node, because each protein may be linked to multiple molecular functions. One protein may have multiple molecular function Gene Ontology (GO) annotations because the GO database uses a hierarchical system to categorize functions, where broader functions encompass more specific ones. A protein can be involved in several distinct biochemical activities, each represented by its own GO term, reflecting the diverse roles a protein can play in the cell. Instead of a single label, we extracted a molecular function profile, $v_i$, for each protein where $v_i[j] = 1$ if a link exists between protein $i$ and function $j$. We then had a sparse matrix, $V^{i \times j}$. Before clustering, we performed dimensionality reduction using truncated SVD which is optimal for sparse matrices. Finally, we performed K-Means clustering.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/cluster_func-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/cluster_func-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/cluster_func-1400.webp"/> <img src="/staging/assets/img/cluster_func.jpeg" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Looking at the t-SNE plots, there is no apparent clustering by molecular function profiles in the final embedding spaces for either the ESM model or the randomly initialized model. There are multiple possible explanations for this. One explanation is that the actual objective is to prediction each singular link between a protein and a function node, not to predict do well at predict all function nodes linked to a protein at once. On top of that our GNN uses self-supervised learning, therefore the molecular function profiles are not true labels used during training.</p> <p>The second plausible explanation has to do once again with the hierarchical nature of molecular function GO annotations. Because the molecular function nodes have random indices when stored in PrimeKG, it is not apparent that molecular function that have the same parent function are close to each other, or their parent function in the molecular function profiles. Therefore, when performing truncated SVD and subsequently k-means clustering, the similar functions may not be clustered together if their indices are far apart. Further analysis could be done to reorder the molecular function nodes and then conduct hierarchical clustering, instead than k-means. These possible clusters may then be found in the final latent spaces for the two models.</p> <h2 id="discussion">Discussion</h2> <p>In this post, we have modified and fine-tuned a Graph Neural Network, TxGNN originally designed for drug-repurposing prediction, for protein function prediction with a variety of initializations of the node embeddings. We observed that while much of the information in the initialization is forgotten during the training process, a small amount is retained, leading to slightly better performance on the test set in the final network. This provides a potential avenue for further study, investigating the overall effects of informed initialization techniques on GNN performance. Some of this investigation is discussed in Li et al. <d-cite key="Li2023"></d-cite>, where they experiment with weight matrix initializations and propose a new paradigm for determining weight initializaiotns, but there is certainly more investigation to be done.</p>]]></content><author><name>Emma Tysinger</name></author><summary type="html"><![CDATA[Graph Neural Networks (GNNs) have revolutionized our approach to complex data structures, enabling a deeper understanding of relationships and patterns that traditional neural networks might miss. This project looks into the potential of embedding initializations in GNNs, particularly in the context of molecular function prediction and protein retrieval tasks. By investigating the effect of intentional, information-rich initializations versus random initializations, we aim to enhance the learning efficiency and accuracy of GNNs in these domains. Our study focuses on a precision medicine knowledge graph (PrimeKG) and employs TxGNN, a GNN model initially designed for disease-drug link prediction, repurposed for protein-molecular function link prediction. We explore the impact of using ESM embeddings for protein nodes, hypothesizing that these embeddings could provide structural information not explicitly present in the graph data. Through comparisons of the latent spaces and performances, we look to see the effectiveness of these embeddings in improving the model's predictive powe of protein function.]]></summary></entry><entry><title type="html">Ensemble Learning for Mitigating Double Descent</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/double_descent/" rel="alternate" type="text/html" title="Ensemble Learning for Mitigating Double Descent"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/double_descent</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/double_descent/"><![CDATA[<h2 id="abstract">Abstract</h2> <p>We outline the fundamental ‘bias-variance tradeoff’ concept in machine learning, as well as how the double descent phenomenon counterintuitively bucks this trend for models with levels of parameterization at or beyond the number of data points in a training set. We present a novel investigation of the mitigation of the double descent phenomenon by coupling overparameterized neural networks with each other as well as various weak learners. Our findings demonstrate that coupling neural models results in decreased loss during the variance-induced jump in loss before the interpolation threshold, as well as a considerable improvement in model performance well past this threshold. Machine learning practitioners may also find useful the additional dimension of parallelization allowed through ensemble training when invoking double descent.</p> <hr/> <h2 id="motivation">Motivation</h2> <p>There are many important considerations that machine learning scientists and engineers must consider when developing a model. How long should I train a model for? What features and data should I focus on? What exactly is an appropriate model size? This last question is a particularly interesting one, as there is a bit of contention regarding the correct answer between different schools of thought. A classical statistician may argue that, at a certain point, larger models begin to hurt our ability to generalize. By adding more and more parameters, we may end up overfitting to the training data, resulting in a model that poorly generalizes on new samples. On the other hand, a modern machine learning scientist may contest that a bigger model is always better. If the true function relating an input and output is conveyed by a simple function, in reality, neither of these ideas are completely correct in practice, and empirical findings demonstrate some combination of these philosophies. This brings us to the concept known as <em>double descent</em>. Double descent is the phenomenon where, as a model’s size is increased, test loss increases after reaching a minimum, then eventually decreases again, potentially to a new global minimum. This often happens in the region where training loss becomes zero (or whatever the ’perfect’ loss score may be), which can be interpreted as the model ’memorizing’ the training data given to it. Miraculously, however, the model is not only memorizing the training data, but learning to generalize as well, as is indicated by the decreasing test loss.</p> <p>The question of ’how big should my model be?’ is key to the studies of machine learning practitioners. While many over-parameterized models can achieve lower test losses than the initial test loss minimum, it is fair to ask if the additional time, computing resources, and electricity used make the additional performance worth it. To study this question in a novel way, we propose incorporating <em>ensemble learning</em>.</p> <p>Ensemble learning is the practice of using several machine learning models in conjunction to potentially achieve even greater accuracy on test datasets than any of the individual models. Ensemble learning is quite popular for classification tasks due to this reduced error empirically found on many datasets. To our knowledge, there is not much literature on how double descent is affected by ensemble learning versus how the phenomenon arises for any individual model.</p> <p>We are effectively studying two different <em>types</em> of model complexity: one that incorporates higher levels of parameterization for an individual model, and one that uses several models in conjunction with each other. We demonstrate how ensemble learning affects the onset of the double descent phenomenon. By creating an ensemble that includes (or is fully comprised of) overparameterized neural networks, which can take extreme amounts of time and resources to generate, with overparameterized machine learning models, we will show the changes in the loss curve, specifically noting the changes in the regions where double descent is invoked. We hope that the results we have found can potentially be used by machine learning researchers and engineers to build more effective models.</p> <hr/> <h2 id="related-work">Related Work</h2> <p>One of the first papers discussing double descent was ’Reconciling modern machine- learning practice and the classical bias–variance trade-off’ by Belkin et al. <d-cite key="belkin2019reconciling"></d-cite>. This paper challenged the traditional idea of the ’bias-variance tradeoff’, a fundamental concept in machine learning that describes the tension between two types of model error: bias and variance. Bias is the error between the expected prediction of the model and the true output value, introduced by approximating a real-world quantity with a model, which may overisimplify the true problem at hand. Variance refers to the error due to a model’s sensitivity to small fluctuations in the training dataset. Overfitted models may have high variance, as they may model random noise in the data as well.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/biasvariance-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/biasvariance-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/biasvariance-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/biasvariance.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="The Double Descent Curve" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The Bias-Variance Tradeoff <d-cite key="cornell"></d-cite> </div> <p>In short, classical statistical learning argues that there is some optimal level of parameterization of a model, where it is neither underparameterized nor overparameterized, that minimizes the total error between bias and variance. However, Belkin’s paper finds that, empirically, the tension between bias and variance no longer becomes a tradeoff after a certain level of overparamaterization. They showed that after the interpolation threshold (beyond where the model fits perfectly to the training data), test error eventually began to decrease again, even going below the error deemed optimal by the bias-variance minimum.</p> <p>Nakkiran et al.’s ’Deep Double Descent: Where Bigger Models and More Data Hurt’ <d-cite key="nakkiran2021deep"></d-cite> expanded these findings to the realm of <em>deep</em> learning. In this work, double descent is shown to occur for both large models and large datasets. Additionally, this paper demonstrates that, counterintuitively, adding more data at a certain point actually worsened the performance of sufficiently large models. Specifically, this occurred at and close to the interpolation threshold for neural models. This paper’s results can be seen here:</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/openai-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/openai-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/openai-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/openai.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="The Double Descent Curve" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> The Double Descent Curve <d-cite key="nakkiran2021deep"></d-cite> </div> <p>For the region between the first and second loss minima, model performance can suffer greatly, despite the increased computational time and resources used to generate such models. While this region of the test loss curve is typically not a level of parameterization that one would use in practice, understanding such loss curve behavior can help practitioners for several reasons. For one, this degraded phase of performance can be crucial for tweaking model architecture and adjusting training strategies. This is key to discovering if one’s model is robust and adaptable to various other datasets and tasks. This highlights the need for a new understanding for model selection in order to effectively generalize to testing datasets better, mitigating decreases in model performance and invoking a second loss minimum quickly.</p> <p>In the classic paper ’Bagging Predictors’, Breiman describes the concept of combining the decisions of multiple models to improve classification ability <d-cite key="breiman1996bagging"></d-cite>. Empirically, this bootstrap aggregating, or ’bagging’ technique, reduced variance and improved accuracy, outperforming the single predictors that comprised the ensemble model. We present a novel combination of the findings of this paper with the double descent phenomenon. Effectively, by increasing model complexity via overparameterization and ensemble learning, we aim to study if this combination can mitigate loss increases and invoke a second loss minimum with smaller models.</p> <hr/> <h2 id="setup">Setup</h2> <h3 id="computing-resources-and-software">Computing Resources and Software</h3> <p>We have implemented this project using CUDA and the free version of Google Colab, with additional computing units for more costly experiments. To train and test these models, we use various machine learning packages in Python, namely Scikit-learn, PyTorch and Tensorflow. Additional software commonly used for machine learning projects, such as numpy, tensorboard and matplotlib, was also utilized.</p> <p>All plots have been produced by us, unless otherwise specified. Note that all tensorboard plots have $0.25$ smoothing applied, except for the Soft-Voting Ensemble, which has $0.6$ smoothing applied (though this won’t make much of a difference as will soon be seen). The non-smoothed plot can be seen traced in light-blue in all provided plots.</p> <h3 id="data">Data</h3> <p>We use the MNIST dataset for this report <d-cite key="deng2012mnist"></d-cite>. MNIST is a popular dataset used for image classification, where each sample image is a $28$ by $28$ grayscale image of a written integer between $0$ and $9$, inclusive. Each image comes with the true label of the image’s integer. This data is publicly available for experimentation, and our use of it does not pose any ethical or copyright concerns.</p> <p>For this project, we use the MNIST dataset to unearth the double descent phenomenon. We experiment with a variety of models, as well as an ensemble of them: decision trees, AdaBoost trees, L2-Boost trees, random forests, logistic regression, and small neural networks. We choose these models because of their ability to be used for classification tasks, and more complicated models run the risk of exceeding Google Colab’s limitations, especially when we overparameterize these models to invoke double descent.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mnist-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mnist-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mnist-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mnist.jpeg" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MNIST Sample Data" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Sample MNIST Data <d-cite key="deng2012mnist"></d-cite> </div> <hr/> <h2 id="models">Models</h2> <h3 id="decision-trees">Decision Trees</h3> <p>Decision trees are a machine learning model used for classification tasks. This model resembles a tree, splitting the data at branches, culminating in a prediction at the leaves of the tree.</p> <p>To invoke overparameterization for decision trees, we can start with a tree of depth 2, and increase the number of maximum leaves of the model until the loss plateaus. Then, keeping this new number of max leaves in our decision tree, we continually increase the maximum depth of the tree until the loss once again stops decreasing. Lastly, keep both the maximum leaves and depth at their plateau levels while increasing the max features. The results of this are plotted below. Notice how varying the number of maximum leaves has minimal effect on the loss, and how increasing the maximum depth causes the most dramatic decrease. However, fluctuations on the maximum depth at this point do not have a major effect, whereas varying the number of features causes another slight, yet consistent, fall in classification loss.</p> <p>Notice that the loss curve is more or less linear in the number of parameters (with some having much more effect than others), and so there is little evidence of double descent for this model.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_decision_tree_zero_one_8.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Decision Tree Overparameterization </div> <h3 id="adaboost-tree">AdaBoost Tree</h3> <p>Adaptive Boosting (AdaBoost) itself is an ensemble model used for robust classification. Freund et al.’s paper ‘A Decision-Theoretic Generalization of On-Line Learning and an Application to Boosting’ first introduced the algorithm <d-cite key="freund1997decision"></d-cite>. On a high level, this paper describes how boosting is especially effective when sequentially combining weak learners that are moderately inaccurate (in this case, these are decision trees) to create a strong learner. We study the loss curve of the AdaBoost model as we first increase the number of boosting trees which form a forest, then increase the number of forests across which we average results, after adding additional trees fails to significantly increase model performance. Each tree is constrained to have no more than 10 leaves.</p> <p>Notice that the loss curve is more or less linear in the number of parameters, and the double-U shape doesn’t seem to make its presence known.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_adaboost_zero_one_2.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> AdaBoost Overparameterization </div> <h3 id="l2-boost-tree">L2-Boost Tree</h3> <p>L2 Boosting is quite similar to the AdaBoost model, except for L2 Boosting, as models are built sequentially, each new model in the boosting algorithm aims to minimize the L2 loss<d-cite key="article"></d-cite><d-cite key="10.1214/aos/1013203451"></d-cite>. Like before, we first increase the number of boosting trees which form a forest in the L2-Boost model, then the number of forests across which we average using majority voting. The results can be seen below. Each tree is constrained to have no more than 10 leaves.</p> <p>Notice how the classification loss begins to fall, then rises up again, then falls once more when we average across more forests to lower minimums than before. This result was consistent across multiple runs of this experiment, suggesting that double descent is real for L2-Boosted Tree Ensembles.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_l2boost_zero_one_1.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> L2-Boost Overparameterization </div> <p>The behavior of the loss once we add more models agrees with general intuition regarding ensembling, but the appearance of double descent as we increase the total number of parameters is still quite interesting to see. L2-Boost is a relatively inexpensive model and ensembling a large number of trees is still quite fast, suggesting that overparameterization could be the way to go in this case.</p> <h3 id="random-forest">Random Forest</h3> <p>Random Forest is another popular ensemble model. As the name implies, it is a collection of decision trees with randomly selected features, and, like the singular decision tree, this model is used for classification tasks.</p> <p>We initialize random forest with a small number of maximum leaves allowed in each tree, and increase the max leaves until we see the loss plateau as we continually add more. After this, we begin increasing the number of trees in our forest until the loss plateaus once again.</p> <p>While Belkin et al. lists random forest as a model exhibiting double descent, this claim has been recently disputed, namely by Buschjager et al, which suggests that there is no true double descent with the random forest model <d-cite key="randomforest"></d-cite>. Instead, they suggest that random forest does not overfit in the classical sense, and argue that its curve instead has a single descent. By looking at our results, there is little evidence for the double-U curve, but we will refrain from drawing a decisive conclusion.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_rf_zero_one_6.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Random Forest Overparameterization </div> <p>Despite this, for our ensemble model, we aim to see if the addition of this overparameterized learner to the neural network’s decision making is able to improve ensemble performance.</p> <h3 id="logistic-regression">Logistic Regression</h3> <p>Logistic regression is a classic model used for estimating the probability a sample belongs to various classes. We induce overfitting in logistic regression through two methods.</p> <p>First, we continually increase the ‘C’ parameter, indicating the inverse strength of regularization applied to the regression, as shown below. Notice that the loss decreases to a minimum before it starts slowly rising again, indicating that overfitting through fluctuations in ‘C’ may not actually lead to double descent, as would be expected from classical theory.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_c.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Logistic Regression Overparameterization (Regularization Based) </div> <p>Second, we try inducing double descent by varying the ratio of the number of features over the amount of data. We gradually reduce this ratio using the intuition developed by Deng et al. in order to induce overfitting <d-cite key="logistic"></d-cite>, since the data becomes more and more separable as the number of features increases relative to the number of data samples.</p> <p>To do this, we test varying across the number of training samples instead of varying the number of features used for training. This eventually leads to 0 training error, but causes testing error to blow up, suggesting that some significant amount of training data is still needed to witness the desired behavior, consistent with both statistical and machine learning theory.</p> <div class="row"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/dd_logistic_regression_zero_one_d.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Logistic Regression Overparameterization (Feature-Data Ratio Based) </div> <p>An interesting setup for future experiments would be simultaneously increasing the amount of training samples and the number of polynomial features given to the logistic regression, while increasing the feature-data ratio each time we reparameterize or redefine the dataset.</p> <h3 id="neural-networks">Neural Networks</h3> <p>We use a Multilayer Perceptron as our main model for the ensemble. Our deep learning model is a relatively small one, with variable width in the hidden layer. By increasing this width, we eventually achieve perfect training loss.</p> <p>We define the general architecture of the neural network used in this report as follows:</p> <h4 id="network-layer">Network Layer</h4> <p>Let the input data be an $m$ by $m$ pixel image from the MNIST dataset, which can be processed as an $m$ by $m$ matrix, where entry $(i,j)$ is an integer between $0$ and $255$ (inclusive) representing the grayscale color of the pixel. Note that $m=28$ for MNIST, though for generality, we use $ m $ in this network definition. A value of $0$ represents a black pixel, $255$ is a white pixel, and values between these are varying shades of gray. We first flatten this structure into a $d = m^2 $ by 1 vector, such that the entry $ (i,j) $ of the matrix becomes the $ j + 28*i$-th entry of the vector, using zero-indexing. We use this vector as the input of our neural network.</p> <p>Set $H$ as the hidden layer width, which in our project will be varied in different tests. Let $ W^1 $ be an $ d \times H$ matrix, where $ W^1_{ij}$ is the weight of input $i$ applied to node $j$, and let $W^1_0$ be an $H \times 1$ column vector representing the biases added to the weighted input. For an input $X$, we define the <em>pre-activation</em> to be an $H \times 1$ vector represented by $Z = {W^1}^T X + W^1_0$.</p> <p>We then pass this linearly transformed vector to the ReLU activation function, defined such that</p> \[\begin{equation*} \text{ReLU}(x)=\begin{cases} x \quad &amp;\text{if} \, x &gt; 0 \\ 0 \quad &amp;\text{if} \, x \leq 0 \\ \end{cases} \end{equation*}\] <p>We use this choice of activation function due to the well-known theorem of universal approximation. This theorem states that a feedforward network with at least one single hidden layer containing a finite number of neurons can approximate continuous functions on compact subsets of $ \mathbb{R}^{m^2} $ if the ReLU activation function is used <d-cite key="hornik1991approximation"></d-cite>. Applying an activation function ReLU to each element of $Z $, the layer finally outputs</p> \[A = \text{ReLU}(Z) = \text{ReLU}(W^T X + W_0)\] <p>Next, we will input $A$ into a second hidden layer of the neural network. Let $K$ be the number of classes that the data can possibly belong to. Again, $K = 10$ for MNIST, though we will use $K$ for generality. Then let $W^2$ be an $H$ by $K$ matrix, where $W^2_{ij}$ is the weight of input $i$ applied to node $j$, and let $W^2_0$ be a $K \times 1$ column vector representing the biases added to the weighted input. For input $A$, define a second pre-activation to be a $K \times 1$ vector represented by $B = {W^2}^T A + W^2_0$.</p> <p>This will yield a $K \times 1$ vector representing the logits of the input image, with which we’ll be able to take Cross Entropy Loss or compute its probability of belonging to any of the $K$ classes.</p> <h4 id="training">Training</h4> <p>Let class $i $ be the true classification for a data point. We have that $y_i = 1$, and for all $j \neq i$, $y_j = 0$. Furthermore, let $\hat{y_i}$ be the generated probability that the sample belongs to class $i$. The categorical cross-entropy loss is then defined as follows:</p> \[\mathcal{L}_{CCE} (y_i, \hat{y_i}) = - \sum_{i=0}^{9} y_i \log (\hat{y_i})\] <p>From this computed loss, we use backpropagation and stochastic gradient descent (SGD) with learning rate $\eta = 0.1$ and $momentum = 0.95$ to optimize model weights. We run experiments on a dataset with $n = 4000$ subsamples that train over $100$, $500$, and $2000$ epochs using Belkin et al.’s approach to training <d-cite key="belkin2019reconciling"></d-cite>. Up to interpolation, we train until we reach zero classification error on the training data, or until we have finished all of the epochs, whichever comes first. After we have reached the interpolation thereshold, we train until we have gone through all of the epochs. Note that to get their results, Belkin et al. trained over $6000$ epochs, which proved to be prohibitively expensive given our resources. Instead, we chose to train over a variety of smaller maximum epoch sizes to illustrate the double descent curve taking clearer shape, with the $2000$ epoch run being the most indicative of this phenomena. Below are the results of the trained and tested neural networks. Notice that interpolation consistently happens when the number of parameters is roughly equal to $n\times K$ (i.e. Parameter Count / 1000 $= 40$), and the test loss starts consistently getting lower and lower as we add more and more parameters beyond this threshold. Double descent is real!</p> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 100 Epoch Training" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-100-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 100 Epoch Testing" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP 100 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 500 Epoch Training" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-500-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 500 Epoch Testing" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP 500 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epoch-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 2000 Epoch Training" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-15 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/mlp-2000-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="MLP 2000 Epoch Testing" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP 2000 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <p>For the sake of brevity, we avoid including plots for train/test classification loss for the MLPs. However, it is worth noting that train classification loss eventually reaches 0 in all experiments, whereas test loss eventually becomes $\sim$ 0.08 or smaller.</p> <p>Throughout each experiment, we vary across the number of total parameters of the model. For a network with $H$ hidden units, the total number of parameters is equal to $(d+1)\times H + (H + 1)\times K$, and so we choose $H$ accordingly each time we reparametrize.</p> <p>Note that we also incorporated a weight reuse scheme for models in the underparametrized regime to cut on training time, similarly to the approach in Belkin et al <d-cite key="belkin2019reconciling"></d-cite>.</p> <p>Additionally, even though the individual MLPs are small, training several of them sequentially for a relatively large number of epochs can take a very long time. To help reduce the time it takes to complete experiments, we also try adapting the Parameter Count Generation Algorithm provided in John Abascal’s blog <d-cite key="testbed"></d-cite> to intelligently select the next level of parameterization given the performance of the previous one. This algorithm was designed to most clearly showcase the existence of the double descent curve by fitting a third degree polynomial (since that is roughly what we expect the double descent curve to look like) to the model capacity vs. test loss graph, and choosing the next parameter by examining the first derivative of the polynomial fit to the data. More detail is provided in his blog.</p> <p>This algorithm proved helpful for empirically confirming the existence and validity of the interpolation threshold. However, after a few tests with the algorithm, we chose to complete most of the experiments using a pre-specified list of parameters which were able to consistently capture the double descent phenomena in detail.</p> <hr/> <h2 id="ensemble-learning">Ensemble Learning</h2> <p>We experimented with two different types of ensembles. The first ensemble is what we call the ‘weak-learner’ ensemble, which is the model that incorporates the multi-layer perceptron supported by L2-Boost tree ensembles, random forests, decision trees and logistic regression. Note that we ultimately did not use AdaBoost in this ensemble because we believed this was too similar to the included L2-Boost model in both architecture and performance.</p> <p>The second ensemble is the ‘multi-layer perceptron’ ensemble, which includes 5 MLPs.</p> <h3 id="weak-learner-ensemble">Weak-Learner Ensemble</h3> <p>We use bootstrap aggregating, or ‘bagging’, to formulate our ensemble of these five models . Effectively, each model is given a certain number of ‘votes’ on what that model believes is the correct classification for any given MNIST sample image. We then experimented with two approaches to voting: hard voting and soft voting.</p> <p>In hard voting, the classification with the most total votes is then used as the ensemble’s overall output. In the event of a tie, the neural network’s prediction is chosen. Using this voting scheme, we train the MLP independently of the other models in the ensemble, using the same scheme as described previously.</p> <p>In soft voting, the weighted average of the predicted class probabilities of each model is used as the predicted class probabilities of the ensemble. We utilize this prediction when training the MLP, and use negative log likelihood loss instead of cross entropy loss, since taking the softmax of probabilities is not necessary. This way, we can incorporate the predictions of the whole ensemble into the training of the MLP. Since the ensemble now outputs a vector of class probabilities, the one with the highest probability will be used as the soft voting ensemble’s prediction.</p> <p>Since we want a neural model to be the basis of our ensemble, we vary the number of votes assigned to the neural network while keeping the number of votes for other models fixed to 1. With four supplementary models in addition to the neural network, giving the neural network 4 or more votes is not necessary, since this ensemble would always output the same results as the neural network. Because of this, we study the loss curve when giving the neural network 1, 2, and 3 votes. Note that decimal value votes for the neural network are not sensible (at least in the hard-voting scheme), since it can be proved that all potential voting scenarios are encapsulated into the three voting levels we have chosen.</p> <p>Another important aspect of our ensemble is that the ‘weak’ classifiers do not vary in parameterization; only the MLP does. Refitting all the weak classifiers across epochs and MLP parameterizations took much longer than expected, perhaps due to incompatibilities between sklearn and GPUs, and completing the experiments using this approach was unfortunately unfeasible. Hence, all ‘weak’ classifiers have fixed architectures, chosen such that each one has low test error but is not at the highest level of parameterization according to the previous discussion, and only the MLP varies.</p> <h3 id="multi-layer-perceptron-ensemble">Multi-Layer Perceptron Ensemble</h3> <p>The Multi-Layer Perceptron Ensemble uses 5 identically initialized MLPs which are trained in parallel using Pytorch’s autovectorization capabilities. Since they are defined in the same way and trained simultaneously using the MLP training scheme discussed above, each receives equal weight when it comes to taking an averaged prediction. However, unlike the bagging method used for the Weak-Learner Ensemble, we take advantage of the identical architectures of the models and the numerical stability provided by this, and generate ensemble predictions by averaging the logits of all five learners and using those values as the logits of the ensemble. Again, we experiment using 100 and 500 epochs to see how the behavior evolves across increasing number of epochs, but we omit training over 2000 epochs due to excessive computational costs. An experiment for the future would be training over a very large number of epochs for even greater ensemble sizes to see how results vary across time.</p> <p>There has been discussion in the past of whether to average the raw logits or the softmax-transformed probabilities. The main concern raised over averaging across raw logits is that the outputted values can vary greatly in magnitude across models (and therefore overconfident models can potentially overshadow all other models when taking the prediction), but, empirically, that doesn’t seem to be a problem here. Tassi et al. provide some intuition in “The Impact of Averaging Logits Over Probabilities on Ensembles of Neural Networks” <d-cite key="Tassi2022TheIO"></d-cite>, suggesting that different approaches to ensembling should be taken depending on the architecture and levels of confidence of the models. For general safety, they recommend averaging the probabilities, but for the purposes of our task, averaging the logits suffices.</p> <hr/> <h2 id="results-and-discussion">Results and Discussion</h2> <p>Contrary to our expectations, the Weak Learner Ensemble performs much worse than even the individual models on MNIST classification. Although our focus is on double descent and not on the strong predictive power of ensembles, the latter is needed to observe the former, or at least discuss it at an interesting level.</p> <p>Initially, we tried applying the soft-voting scheme for the Weak Learner Ensemble, but the reported results are unexpectedly poor, yielding very high classification loss, especially when compared to the results of each model taken individually. This may be because each ‘weak’ learner has high confidence in its predicted class, whereas the MLP may be more evenly split between different classes, which would result in the weak classifiers winning more often, even if the MLP has higher weight in the prediction. The plot of the negative log likelihood loss for both training and testing is also hard to understand, but it is clear the ensemble has a very hard time improving, even as more parameters are added. We only include the results for the ensemble that allocates 3 votes to the MLP, but note that these are the best loss curves we were able to produce given this method.</p> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Soft-Voting Weak-Learner Ensemble 100 Epoch Training/Testing Negative Log Likelihood Loss Over MLP Parameter Count / 1000. MLP given 3 votes. </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-train-accuracy.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Soft-Voting-3-test-accuracy.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Soft-Voting Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 3 votes. </div> <p>We then tried the weak-learner approach again with hard-voting, and let the MLP independently train using the unmodified MLP training scheme mentioned previously. However, as opposed to halting training when MLP classifications loss first hits 0, we only halt training when <em>ensemble</em> classification first hits 0.</p> <p>We found that while classification loss had certainly gone down when compared to the soft-voting scheme (with even just <em>one</em> vote!), the ensemble still severely underperformed when compared to each of the individual models used. As seen in the plots, the classification loss starts to improve once the MLP gets more and more votes, agreeing with intuition that, eventually, the MLP has the veto right. As opposed to the soft-voting scheme, all classifiers now have a contribution that is proportional to their voting weight, which mitigates the previous problem of some models having much higher confidence than others. However, we believe the poor results can be attributed to the models we used for ensembling. Indeed, a significant number of models are regular, boosted or ensembled (or all) versions of decision trees, which means there is a significant chance that they make similar mistakes on similar data points. Looking at the plots for overparameterized decision trees and L2-Boost ensembles, we see that train error never quite reaches 0 for any of them. Since the train loss seems to pleateau for our models as well, this may prove why. In the cases of 1 or 2 votes, this can lead to consistently poor predictions, especially since the models are not reparameterized across the experiment. For 3 votes, this phenomenon is less significant, as the ensemble slowly begins to reach the testing performance of the individual models.</p> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-1-vote-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 1 vote. </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-2-vote-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 2 votes. </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-vote-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/Strong-Classifier-100-Epochs-3-votes-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Weak-Learner Ensemble 100 Epoch Training/Testing Zero-One Loss Over MLP Parameter Count / 1000. MLP given 3 votes. </div> <p>Further work could be done on the Weak-Learner Ensemble, focusing on better model selection and concurrent reparameterization across all models. Given the limited time and compute resources at our disposal, we leave this problem open for now.</p> <p>All hope is not lost, however. Seeing the poor performance of the Weak-Learner Ensemble given the significantly better performance of individual models, one could be discouraged from attempting to use ensembling to mitigate double descent, since it may not even be observable in such settings. However, we saw double descent in L2-Boost ensembles and, arguably, in random forests, and so we pushed onward. All other ensemble methods used multiple copies of the same model, and so we decided to experiment with a small ensemble of MLPs, to see how they would behave.</p> <p>This was feasible for 100 and 500 epochs only, but the obtained results shed light on how ensembling could in fact mitigate double descent. The phenomenon is not quite as observable in the 100 epoch case (one explanation could be that the train loss has not converged yet), but it becomes quite clear when looking at the 500 epoch ensemble and comparing it with the original 500 epoch MLP. Double descent is still very easy to see, ocuring at the same threshold as before. This makes sense, since the MLPs have all reached interpolation, which should increase test loss for all, and then start going down as we overparametrize more and more. However, the main result is that the increase once we reach interpolation is <em>much</em> lower than before. Indeed, the ensemble sees a jump from $\sim$ 0.35 to around $\sim$ 0.4 at the highest, whereas the individual MLP sees a jump from $\sim$ 0.36 to around $\sim$ 0.52. Another important result is that the loss as we overparameterize becomes <em>significantly</em> lower in the ensemble model than in the individual MLP.</p> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/ensemble-100-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP Ensemble 100 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <div class="row justify-content-sm-center"> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-train.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-12 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test-1400.webp"/> <img src="/staging/assets/img/2023-11-08-double_descent/ensemble-500-epochs-test.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" title="example image" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> MLP Ensemble 500 Epoch Training/Testing Cross-Entropy Loss Over MLP Parameter Count / 1000 </div> <p>While we weren’t able to fully get rid of the double descent curve by ensembling multiple MLPs, the fact that it became flatter and the loss past the interpolation threshold started to become smaller is quite exciting, as it suggests that, potentially, large ensembles of MLPs may not noticeably suffer from double descent at all, and yield better overall predictions than individual models can. One notable advantage to this ensemble method is the ability to further parallelize one’s training of overparameterized neural networks. These models can take extreme lengths of time to train, and besides increasing the computational allocation used, practitioners may use data, model, or processor parallelism in order to reduce this time. The ensemble neural networks we use are independently generated, meaning that they can be vectorized or trained on different GPU cores without issue. This could be a valid alternative to training for more epochs for reducing model error past the interpolation threshold. More work investigating the effect of neural network ensembling on double descent, especially on models trained over many epochs, would be very exciting and potentially shed even more light on the possible advantages of overparameterization.</p> <hr/> <h2 id="conclusion">Conclusion</h2> <p>We discussed the existence of double descent for some simple and classical models, observing the effects of varying across levels of parameterization and noting that single descent can sometimes be mistaken for double descent, and proposed the use of various ensembles to mitigate the effects of double descent.</p> <p>Ensembles consisting solely of neural networks resulted in a considerable boost in performance past the individual model interpolation threshold, and in a flatter curve when compared to individual models. However, pairing the neural network with weak learners in an ensemble voting system decreased testing performance, though this adverse effect waned as the neural network received proportionally more votes. Machine learning engineers that intend to intentionally overparameterize their models may take advantage of not only the ensemble approach’s increased performance and significantly more reliable results, but the enhanced parallelization and vectorization capabilities offered by the proposed method.</p> <hr/> <h2 id="future-work">Future Work</h2> <p>This project was implemented using Google Colab, which proved to be restrictive for adopting more complex models. A key part of the double descent phenomenon is overparameterization, which happens across multiple full training loops, and so complex models that are additionally overparameterized will require more powerful computing resources beyond what we used. For example, a model which takes 10 hours to complete a single training loop will take multiple days to train before being able to plot results and observe double descent. Even for models that take around 10 to 15 minutes to train, such as the 500 epoch MLP we explored throughout our project, a full experiment that showcases the double descent curve in detail can take upwards of 5 hours. Furthermore, additional computing power can allow for this project to be expanded to more complicated datasets and tasks. MNIST classification is computationally inexpensive, though invoking double descent in more complex tasks such as text generation in natural language processing was not feasible using Google Colab. Future projects that follow this work should keep computational limitations in mind when choosing models and datasets.</p> <p>In addition to the future work suggested throughout our project, we propose a final approach that we believe is worth exploring further. During the planning process of this project, we discussed using a more rigorous voting system than what is traditionally found in ensemble model projects. Effectively, each model would have a weight associated with how much influence its output should have on the overall ensemble output. For $n$ models, each model could start with, say, a weight of $1/n$. Then, after producing each model’s vector output, the categorical cross-entropy loss with respect the true output could be computed, and the weights of each model could be updated such that each model has its weight decreased by some amount proportional to the calculated loss. Then, these weights could be normalized using the softmax function. This would be repeated for each level of parameterization. Due to resource constraints and the limitations of sklearn to the CPU, learning both the model weights and ensemble weights at each level of ensemble parameterization was not feasible given the size of the models we built and the classifiers we chose to use, as well as the number of epochs we trained over. Future studies may wish to implement this method, however, to produce a more robust ensemble for classification.</p> <hr/> <h2 id="reproducibility-statement">Reproducibility Statement</h2> <p>To ensure reproducibility, we have included the codebase used for this project, as well as the above description of our data, models, and methods <d-cite key="colab"></d-cite>. Note that the Colab notebook that we have worked in is currently very messy and sometimes incomplete due to faults in Google’s autosaving feature, but we plan to clean it up and have it available for easy future experimentation.</p>]]></content><author><name>Mohit Dighamber</name></author><summary type="html"><![CDATA[Exploring when and why Double Descent occurs, and how to mitigate it through Ensemble Learning.]]></summary></entry><entry><title type="html">Autoen-chorder: Predicting Musical Success With Neural Nets</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/foley-to-video/" rel="alternate" type="text/html" title="Autoen-chorder: Predicting Musical Success With Neural Nets"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/foley-to-video</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/foley-to-video/"><![CDATA[<h2 id="introduction">Introduction</h2> <p>Our aim is to use deep learning (the crux of 6.s898) to help musicians and their sponsors (for example: agents, record labels, and investors) identify whether songs will resonate with listeners. Solving this problem would enable established artists to release more impactful music, and spur new musicians to break into a competitive market.</p> <p>We first begin by establishing what our success metric is. For the purposes of this project, we will use the concept of song “popularity” as the metric we want to predict, and we source our popularity data from the SpotGenTrack Popularity Dataset. This dataset leverages Spotify’s Popularity Index, which is a relative rank measured against other songs’ popularities. It is a function of recent stream count, save rate, playlist appearance count, skip rate, share rate, and more.</p> <p>There already exist a few models to help us solve this problem. However, these models make use of metadata, such as artist name, year of release, and genre. We believe that these models – while interesting – are insufficient to be actionable, particularly for up-and-coming musicians who may be innovating new music genres, or who may not yet have a strong name. Specifically, metadata like Artist Name are both highly-weighted (for example, even Taylor Swift’s least popular song will be a relative superhit) and unchangeable (we cannot suggest that artists change their identity to Beyonce). Additionally, features like Genre are imprecise, and can quickly become outdated as new subgenres and crossover genres are developed.</p> <p>To address this gap and become more actionable to musicians, we aimed to create a new model that can achieve near-parity with metadata-based models without leveraging any metadata. By combining multiple audio-feature models, we not only achieved comparable results to metadata-based models, we actually outperformed metadata-based models on more than half our testing data.</p> <h2 id="previous-works">Previous Works</h2> <p>The most prominent existing model is HitMusicNet (heretofore referred to as “HMN”). The HMN model predicts popularity based on lyric data from Genius.com (syllables, words, etc.), high-level audio features from SpotGenTrack (e.g. acousticness, key, tempo, speechiness), low-level audio features from SpotGenTrack (audio preprocessing, such as spectral analyses), and metadata from SpotGenTrack (e.g. artist name, year of release, genre). A feature vector is created with this information, and said vector is fed as the input into an Autoencoder network to compress the features, followed by a neural network to obtain the predicted popularity.</p> <p>HitMusicNet has two different objectives: Regression and classification. For this project, we will focus only on regression since it will allow us to visualize differences between our model and HMN with higher granularity. We replicated the code from the paper in PyTorch, using the same functions and data and calculated metrics to make sure our implementation is correctly replicating the paper. We see a slight discrepancy in the errors, likely due to the test/train split during the paper’s training. Altogether, we can still consider our replicated model as valid as the metrics are within reasonable range to the reported metrics. Additionally, we added the R-squared metric as an additional metric to ensure our model fits the data.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Comparison-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Comparison-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Comparison-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Comparison.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>A second model, VGGish, is a pretrained convolutional neural network trained on YouTube-100M (a database with 100 million YouTube videos). This network is a representation learning network widely used in established papers. This network takes in a wav file and processes it on 0.96-second windows, and calculates 128 embeddings per window. This means that the resulting tensor from VGGish will be 2 dimensional for a single file, and 3 dimensional for a batch of files.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-HMN-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-HMN-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-HMN-1400.webp"/> <img src="/staging/assets/img/2023-12-12-HMN.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-Autoencoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-Autoencoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-Autoencoder-1400.webp"/> <img src="/staging/assets/img/2023-12-12-Autoencoder.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="hypothesis">Hypothesis</h2> <p>HMN has a tendency to be heavily-indexed on metadata features and lyrics. Data such as artist name heavily bias the model’s popularity predictions in favor of big-name artists. Lyrics information can make the model biased to predicting instrumental music as less likely to be successful. While this may be representative of how the industry works, it makes HMN much less actionable for musicians trying to assess their chance of success with the market.</p> <p>We believe that audio-only features – such as temporal information (i.e. the structure of the song and information about previous sections) and repetition – can alone be fairly successful in determining a song’s popularity. Thus, we chose to use just audio data, as well as temporal data, to predict popularity.</p> <p>We hypothesize that combining the audio-only features of HMN with VGGish’s audio representation will yield superior outcomes to HMN’s audio-only features alone. We also hope that our new model can compete with the full HMN model (i.e. audio features and metadata combined).</p> <h2 id="data-preprocessing">Data Preprocessing</h2> <p>Given our hypothesis, we need to extract the low-level features from our signal and map each row to its corresponding audio file to be fed into VGGish. We used Spotify’s API to obtain the raw audio files to be processed, and then ran them through the VGGish network. We performed the same preprocessing as the one done in the MusicHitNet paper.</p> <h3 id="file-length-limitation">File length limitation</h3> <p>Unfourtunately, Spotify only allows the download of 30s previews of songs.</p> <h3 id="memory-limitation">Memory limitation</h3> <p>Audio files are heavy, and the longer they are, the heavier. We should ideally process all 95,000 songs’ full length, but given Spotify’s API’s request limit, and the size of the files, we were only able to obtain 10,000 30s snippets. This still resulted in roughly 14.5 GB of data. Processing the whole dataset would not only require roughly 140 GBs of data, but the Spotify API’s limits will likely be exceeded, and our colab notebook will likely run out of memory.</p> <h3 id="downsampling-and-splitting">Downsampling and splitting</h3> <p>Given the considerations above, we decided to use 10,000 songs for our model’s development, splitting the data into 8,500 samples for training and 1,500 for validation. Given that this is roughly 10% of the original data, we expect the model’s performance to be below the reported metrics.</p> <h2 id="baselines">Baselines</h2> <p>The metrics obtained when replicating the HMN network serve as a reasonable parameter to verify our model’s accuracy. As mentioned above, the model’s performance is expected to be below the paper’s reported metrics. To understand the range, we retrained a network with the same shape as the paper’s using the 10,000 samples in the same train/test split we will feed to our new network. The resulting metrics for this experiment can be seen in Table 2.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img5-1400.webp"/> <img src="/staging/assets/img/img5.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Training a model that results in similar metrics would be ideal, but realistically, as we will only be using low-level data, we expect the metrics to be lower than the values in Table 2. To ensure that our trained model isn’t just predicting noise, we use a baseline comparison, comparing against a random normal distribution with mean μ=40.02 and σ=16.79.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img6-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img6-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img6-1400.webp"/> <img src="/staging/assets/img/img6.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>As seen in table 3, the baseline intuitively would not appear to be too far from the trained HMN model in terms of MSE and MAE. When looking at the r-squared, the random model has a negative value, while the trained HMN netw ork results with a much higher 0.5616 value. To deem a model as successful, we will compare it against both sets of metrics.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img1-1400.webp"/> <img src="/staging/assets/img/img1.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img2-1400.webp"/> <img src="/staging/assets/img/img2.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h2 id="alternate-models">Alternate Models</h2> <ul> <li>Single autoencoder. Our first iteration to solve this problem consisted of using a single autoencoder to find representations with data coming from VGGish and SpotGetTrack low level features, and then running that through a feed-forward network similar to the one used in HMN. Since the output of VGGish is a tensor of shape (batch_size, n_windows, n_features) and the output of SpotGenTrack is (batch_size, 207), we concluded there was no simple way to combine the two data sources without losing temporal information.</li> <li>RNN. Our second iteration consisted of running the data coming from SpotGenTrack Low-Level through an autoencoder in the same way HMN does it. After this initial train gives us a compressed representation of the data from SpotGenTrack Low-Level, we train two subsequent networks: First an LSTM RNN which transforms data into (batch_size, 20), then we add the compressed representation from SpotGenTrack Low-Level and run that through a feedforward network. This model yielded a performance below the baseline.</li> <li>HMN+VGGish: This model consists of taking the full SpotGenTrack data, passing it through the regular autoencoder defined in HMN, and add it to the output coming from VGGish. This model, while resulting in promising results, still yielded worse performance than HMN on its own, so our team decided to explore alternatives.</li> <li>LossNet. Our third exploration consisted of training a model that uses VGGish’s outputs to try and predict losses from HMN. In essence, we are trying to use VGGish Representation to capture information that HMN consistently is unable to. This approach has parallels with Adversarial Networks, in that one model is being trained on the losses of another model. However, this approach is more cooperative than adversarial, since the result of the two models is not zero-sum. This approach led to a dead-end with surprising results.</li> </ul> <h2 id="final-architecture">Final Architecture</h2> <p>Our final iteration consists of a model with two autoencoders: One for data from SpotGenTrack low level features, the second for the representation obtained using the VGGish model. The slight difference between these two models is that the VGGish autoencoder has additional LSTM layers at the start of the encoder, and at the end of the decoder. The output from these two autoencoders is then added together and passed through a feed-forward network. This architecture can be seen in Figure 4.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-12-12-FinalArch-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-12-12-FinalArch-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-12-12-FinalArch-1400.webp"/> <img src="/staging/assets/img/2023-12-12-FinalArch.jpg" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="padding-and-packing">Padding and Packing</h3> <p>None of the audio files coming from Spotify previews are more than 30s in duration, but some are in fact shorter than others. To solve this issue, and also to be able to feed our model whichever sized data we require, we use pytorch’s packing functionality. Packing allows us to process sequential data with different sizes, so that only the relevant information is passed through the LSTM. Conversely, padding allows us to add zeros at the end of sequences so that all samples have the same size. This is required to store data in tensors.</p> <h3 id="hyperparameters">Hyperparameters</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img3-1400.webp"/> <img src="/staging/assets/img/img3.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <h3 id="additional-model-considerations">Additional Model Considerations</h3> <p>The original HMN model compiles 228 features into 45 representations for the feed-forward network. We want our model’s feed-forward network to have a similar number of inputs as the given architecture, therefore we compress the data in the encoder of both autoencoders to 20 features, so that when added together, they result in 40 total features.</p> <p>Additionally, as can be seen in figure 3.2, the target’s distribution is condensed at a central point, and distributed in a Gaussian shape. To help our model accurately predict the shape of the results, we use multiply the losses by a weighting factor. This multiplication is important to make our model more likely to predict outliers. The equation is the following:</p> <p>\begin{equation} \frac{1}{N} \sum_{i=1}^{N} \exp\left(\left(\frac_{i} - \text)}}{\alpha \cdot \text}\right)^2 \cdot \frac{1}{\beta}\right) \end{equation}</p> <p>Our feed-forward network was suffering of vanishing gradients during training. To attempt to avoid this, we initialized all linear layers with a weight distributed by Xavier uniform, and a constant bias of 0.1.</p> <h3 id="finding-the-best-model">Finding the Best Model</h3> <p>In order to find the best model, we modified plenty of parameters and hyperparameters. We first found the optimal autoencoder models (seen on table 4), and then we proceeded to run several loops over our linear layer to obtain the model with lowest errors. The parameters modified were the following:</p> <ul> <li>Learning rate: (0.001, 0.0001, 0.0002, 0.02, 0.0005)</li> <li>Weight decays: (0, 0.0001, 0.0002)</li> <li>Batch sizes: (200, 100, 256, 277)</li> <li>Means (for weights calculation): 0.33, 0.34, 0.35, 0.37, 0.38, 0.40, 0.42, 0.45)</li> <li>Alphas (for weights calculation): (1.8, 2.0, 2.1, 2.2)</li> <li>Betas (for weights calculation): (1.8, 2.0, 2.2)</li> <li>Number of linear layers: (7, 9, 12)</li> </ul> <p>The combination that resulted in the optimal model was the following:</p> <ul> <li>Weight decays: 0</li> <li>Batch sizes: 200</li> <li>Means (for weights calculation): 0.36</li> <li>Alphas (for weights calculation): 2.0</li> <li>Betas (for weights calculation): 2.0</li> </ul> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/img4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/img4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/img4-1400.webp"/> <img src="/staging/assets/img/img4.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <p>Table 5 shows the best-performing models obtained after experimentation. MAE, MSE and r-squared were calculated using the testing data, i.e. Data not used in training. Looking at the data in tables 2 and 3, we see that our model shows a significant improvement above the random baseline, with a reasonable r-squared and MSE. Reduction in the MAE remains challenging, but still we see a significant improvement from the random baseline.</p> <p>Furthermore, we analyzed the testing data, and found that in 919 of the 1,500 songs (61.2%) of the songs, our model did better than HitMusicNet. Upon further analysis, we found that our model did a better job predicting the popularity of songs with popularities ranged [0.22-0.55], while HMN does a better job at predicting outliers (songs with &lt;0.2 or &gt;0.6 of popularity).</p> <h2 id="conclusions-and-next-steps">Conclusions and Next Steps</h2> <h3 id="data-exploration">Data Exploration</h3> <p>Given Spotify’s ubiquity and analytics excellence, its Popularity Index is a good proxy for relative song popularity. But there are concerns around using data from a single platform (Spotify) and from a single channel (digital streaming). Given this concern, we would like to explore other methods of calibrating a track’s popularity (for example, Billboard and Discogs API). We can aggregate popularities into a single output, or can train each model on multiple outputs of various popularity scores.</p> <p>Currently, our data consists of 30s audio clips. The average new song length is around 3min 17s, meaning that our models’ inputs cover around 15% of the song. This can cause the model to miss information critical to song likeability, such as the intro, chorus, or bridge. We would like to make our dataset more complete by using full songs as inputs. Furthermore, we’re using only 10,000 data points, which can also be affecting our training efficiency, especially our ability to detect outliers, which we have found to be a key issue with our model. Ideally, we would like to train our models on all 95k songs in SpotGenTrack.</p> <h3 id="architectures">Architectures</h3> <p>Many more architectures can further be explored to predict song popularity. We found VGGish with an LSTM to be an efficient “boosting” algorithm, which contributed to the model in a less significant way that SpotGenTrack, but still allowed our model to increase its performance. Similarly, the use of transformer architectures can help improve the performance of our model.</p> <p>In this study, we explored and evaluated our model against the HitMusicNet’s regression algorithm. In further studies, it could be beneficial to explore the classification algorithm, as we have seen very promising results in the prediction of songs along a certain range.</p> <p>We used the VGGish model purely on inference since we required to train the autoencoder and then the feed-forward network. Future studies can include architectures such that the VGGish model is trained in series with the feedforward network, and fine-tuned to predict popularity. We could also look at alternate representation models that are perhaps better suited or supply a more apt representation for our task than VGGish.</p> <p>In conclusion, the use of low-level features to predict popularity can have several real-world advantages. The proposed model is able to predict a song’s popularity to a fair degree without the need for high-level features. Emerging artists can use these parameters to determine the possible success of their songs. Music labels can use this algorithm to predict an artist’s possible popularity. Platforms such as Spotify can also take advantage of this model in order to tackle recommendations and boost emerging artists.</p> <h2 id="bibliography">Bibliography</h2> <ul> <li>D. Martín-Gutiérrez, G. Hernández Peñaloza, A. Belmonte-Hernández and F. Álvarez García, “A Multimodal End-to-End Deep Learning Architecture for Music Popularity Prediction,” in IEEE Access, vol. 8, pp. 39361-39374, 2020, doi: 10.1109/ACCESS.2020.2976033.</li> <li>Ding, Yiwei, and Alexander Lerch. “Audio embeddings as teachers for music classification.” arXiv preprint arXiv:2306.17424 (2023).</li> <li>D. Martín-Gutiérrez, “HitMusicNet” in https://github.com/dmgutierrez/hitmusicnet.</li> <li>Koutini, Khaled, et al. “Efficient training of audio transformers with patchout.” arXiv preprint arXiv:2110.05069 (2021).</li> <li>P. Nandi, “Recurrent Neural Nets for Audio Classification” in https://towardsdatascience.com/recurrent-neural-nets-for-audio-classification-81cb62327990.</li> <li>Wu, Rick, “VGGish Tensorflow to PyTorch” in https://github.com/tcvrick/audioset-vggish-tensorflow-to-pytorch.</li> <li>Wu, Yiming. (2023). Self-Supervised Disentanglement of Harmonic and Rhythmic Features in Music Audio Signals.</li> <li>S. Shahane, “Spotify and Genius Track Dataset” in https://www.kaggle.com/datasets/saurabhshahane/spotgen-music-dataset/data.</li> </ul>]]></content><author><name>Esteban Ramirez Echavarria</name></author><summary type="html"><![CDATA[In this blog, we discuss deep learning methods and results of predicting song popularity from audio features.]]></summary></entry><entry><title type="html">Reasoning with Maps: Assessing Spatial Comprehension on Maps in Pre-trained Models</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/mapreason/" rel="alternate" type="text/html" title="Reasoning with Maps: Assessing Spatial Comprehension on Maps in Pre-trained Models"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/mapreason</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/mapreason/"><![CDATA[<h1 id="motivation">Motivation</h1> <p>Humans possess a remarkable ability to intuitively understand and make sense of maps, demonstrating a fundamental capacity for spatial reasoning, even without specific domain knowledge. To illustrate this, consider the following question: Do these two maps represent the same location?</p> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/85_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/85_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/85_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Answering this query necessitates <strong>coregistration</strong>, the ability to align two maps by overlaying their significant landmarks or key features. Moreover, humans can go beyond mere alignment; they can tackle complex inquiries that demand aligning maps, extracting pertinent data from each, and integrating this information to provide answers.</p> <p>Maps reasoning is a fundamental skill with important applications in domains such as navigation and geographic analysis. For example, pilots need to be able to reference and understand multiple kinds of FAA charts as a core prerequisite for many aviation-related tasks. Further, making inferences on historical maps that lack digitized versions relies on human capabilities for reasoning on maps and is crucial for various fields such as geology or archeology. Machine learning models that can match human visual map understanding hold substantial promise in these applications. Additionally, such models have the potential to enhance accessibility by providing alternative modalities for individuals with visual impairments to comprehend and extract spatial information from maps.</p> <p>Our work aims to tackle the following question: To what degree do contemporary state-of-the-art (SOTA) machine learning models, pre-trained on vast datasets comprising millions or even billions of images, possess the capacity for spatial reasoning and do they reach the human level? We will do this specifically by focusing on the task of coregistration.</p> <p>We propose a map reasoning dataset which we believe is a suitable initial benchmark to test the capabilities of multimodal models on coregistration; The example given above about coregistration possibly cannot be answered directly using prior knowledge a Large Language Model (LLM) might have while ignoring the vision modality. Moreover, the complexity of the task can be increased and controlled, leading to a rigorous evaluation of the model’s ability to comprehend and synthesize information across textual and visual modalities.</p> <h2 id="literature-review-and-the-gap-in-previous-literature">Literature review and the gap in previous literature</h2> <p><strong>Multimodality:</strong> There are countless significant recent advances in Large Language Models (LLMs) achieved by models such as Meta’s Llama 2<d-cite key="llama"></d-cite>, OpenAI’s GPT 3<d-cite key="gpt3"></d-cite>, Google’s Palm 2<d-cite key="palm2"></d-cite>, WizardLM<d-cite key="wizardlm"></d-cite>, and countless others. These models have successfully achieved or surpassed human-level performances on numerous natural language processing tasks (such as Retro-Reader<d-cite key="retro_reader"></d-cite> on the SQuAD2.0<d-cite key="squad2"></d-cite> benchmark, ST-MOE<d-cite key="st_moe"></d-cite> on the SuperGLUE<d-cite key="super_glue"></d-cite> benchmark, and many other benchmarks). LLMs have achieved a surprisingly high level of knowledge about text by being able to achieve very high zero-shot scores on many NLP tasks demonstrating their understanding and versatility in Language as opposed to non-LLM NLP models that are usually trained to accomplish a specific task and do not generalize beyond that task.</p> <p>Recently there has been a massive push towards integrating other modalities into LLMs, most notably vision. Models such as Google’s Gemini<d-cite key="gemini"></d-cite>, OpenAI’s GPT 4<d-cite key="gpt4"></d-cite>, VisionLLM<d-cite key="vision_llm"></d-cite>, and many others all focus on scaling the capabilities achieved by LLMs in the NLP domain to the vision domain. The language modality LLMs are trained on consists mostly of written text recorded online that follows a grammatical structure from a human language (English, etc.) or a computer language (Python, HTML, Latex, etc). However, this is in stark contrast to the vision modality which can consist of categorically different representations consisting of anything from photographs of people in the park, to a representation of the Silicon Atom from a computer simulation, to an infographic about the importance of drinking enough liters of water per day. This difference between the distribution of the entirety of text modality and the vision modality hints that much more careful attention needs to be placed on compiling unique and diverse datasets that aim at sufficiently representing the distribution of the vision modality.</p> <p><strong>Vision-Language Benchmarks:</strong> There were significant strides made in the past years in developing benchmarks and datasets for LVLMs which are composed of questions that require both Language and Vision to successfully answer. However, there are very few datasets that include or focus on maps as part of the benchmark. LVLM-eHub<d-cite key="lvlm_ehub"></d-cite> compiles numerous benchmarks for LVLMs such as benchmarking object and scene recognition using ImageNet1K, GVT, and many other datasets, or benchmarking visual common sense using ImageNetVC and the visual commonsense reasoning datasets. They also include a Visual Reasoning benchmark, however, it focuses mostly on real-world photographs (such as the Visual Spatial Reasoning<d-cite key="visual_reasoning"></d-cite> dataset or the VisDial<d-cite key="visual_dialog"></d-cite> Dataset that is based on images from COCO<d-cite key="coco"></d-cite>). However, none of these datasets place focus or sufficiently contain images of maps or diagrams while they place much more focus on real-world images.</p> <p>A frequently used dataset for evaluating LVLMs, which is also included in the Visual Reasoning benchmark, is the ScienceQA<d-cite key="science_qa"></d-cite> dataset which includes more than 20 thousand multimodal multiple-choice questions across 127 categories including a category for maps. However, examining the narrow slice of questions with images in the geography/map category shows that many of them do not necessitate a vision component and can be answered solely based on the textual question (e.g. “Which of these states is farthest north?” with four different states as a multiple choice question), or provide an entirely unused visual representation (e.g. providing a map of the United States with no text alongside the question “What is the capital of New York?”). Out of the questions that rely on the visual component to correctly answer, many of them require relatively little understanding of maps such as asking “Which country is highlighted?” which only requires visual matching of the highlighted section with typical shapes of countries or continents. Additionally, recent papers such as LLama-adapter<d-cite key="llama_adapter"></d-cite> have demonstrated that it’s possible to achieve a high accuracy of 78.3% on ScienceQA using an unimodal text-only Large Language Model. Thus, although ScienceQA does have a subsection dedicated to maps, it does not seem sufficiently capable of testing the capabilities of LVLMs to reason and understand maps.</p> <p>An area closely related to maps that do have a relatively higher degree of focus is the capability of models to parse and reason about diagrams, figures, and plots. Datasets on this topic include the ACL-FIG<d-cite key="acl_fig"></d-cite> which involves classifying and labeling scientific figures, InfographicVQA<d-cite key="info_vqa"></d-cite> which requires reasoning over data visualizations in infographics, ChartQA<d-cite key="chart_qa"></d-cite> which requires reasoning over charts, and many other datasets that focus on figures are plots. Models have been developed to specifically tackle this challenge, such as Google’s DEPLOT<d-cite key="deplot"></d-cite> which is capable of reasoning over charts and plots by translating them to text and then using an LLM as a reasoning engine on top of the outputted text. However, charts and plots are still significantly different from maps, as the plots these datasets usually contain are simple line charts and bar graphs that can be translated into a table or textual format in a relatively lossless manner, while it is difficult or impossible to perfectly transform a sufficiently detailed map to a textual format without losing information. This illustrates the inherent complexities associated with processing maps meant to depict dense information which requires direct reasoning on the vision modality as opposed to charts and plots which present data in a simple manner.</p> <p><strong>Maps Reasoning:</strong> Huge strides have been made in specific tasks related to maps, such as image-to-map<d-cite key="image_to_map"></d-cite> conversion and map segmentation<d-cite key="map_segmentation"></d-cite>. However, we wanted to focus more generally on map understanding and reasoning by LVLMs as opposed to a single task-specific performance. To draw on an analogy, consider the act of isolating specific parts of speech (such as nouns or verbs) in language. A model designed exclusively for this task lacks the comprehensive understanding exhibited by an LLM which is proficient in addressing almost any language task. In the context of map co-location, deep learning models are employed solely as preprocessing steps to extract relevant information for subsequent utilization by matching algorithms as opposed to an LVLM model capable of general reasoning on maps. For example, the authors in this study<d-cite key="intersection_map"></d-cite> use region-based CNN to extract road intersections, which are subsequently input into a map-matching procedure. Other features like street names have also been proposed in the literature<d-cite key="street_name_map"></d-cite>. In general, current frameworks for map reasoning require many hand-crafted and engineered steps (see, e.g., this<d-cite key="aerial_understanding"></d-cite> work and the references within). A recently proposed dataset, MapQA<d-cite key="map_qa"></d-cite>, is closely related to what we consider as map reasoning. However, the maps contained are of sufficiently low information-density and exhibit similar limitations to what we described in InfographicVQA and ChartQA as the images provided can be sufficiently translated to a textual domain before considering the textual input using a similar technique to DEPLOT. To the best of our knowledge, there are no examples in the literature where LVLMs are used to directly reason about maps at the detail we propose and perform tasks such as coregistration.</p> <p>Our aim is to tackle the gap in assessing the map reasoning capabilities of LVLMs by developing a dataset aimed only at coregistration and analyzing the capabilities of existing models on such a dataset We focus our benchmark construction on the specific task of coregistration as it serves as an indicator of map reasoning capabilities and is one step towards constructing a comprehensive benchmark for map reasoning capabilities of LVLMs.</p> <p><br/></p> <h1 id="new-dataset">New Dataset</h1> <p>We have opted to create and compile a map dataset focusing on maps from the aviation domain for our research. The maps we utilized are carefully crafted by aviation agencies to provide a wealth of information while maintaining readability within a concise timeframe, ensuring clarity for pilots. Our dataset will be constructed by incorporating maps from the following sources:</p> <ol> <li> <p><strong>World Visual Flight Rules (VFR):</strong> These maps are intended to guide pilots when they operate aircraft visually. They include aeronautical and topographic information such as airports, obstructions, and navigation aids.</p> </li> <li> <p><strong>World Instrument Flight Rules (IFR) Low</strong> These maps are suitable to assist pilots when they control the aircraft through instruments. They contain information such as cruising altitudes, route data, and controlled airspaces.</p> </li> </ol> <p>These maps are accessible in an interactive environment through the SkyVector website (<a href="https://skyvector.com/?ll=42.3525,-71.025833333&amp;chart=301&amp;zoom=2">VFR</a>, <a href="https://skyvector.com/?ll=42.3525,-71.025833333&amp;chart=302&amp;zoom=1">IFR Low</a>), which we used as part of our dataset generation pipeline.</p> <p>To generate the map snippets for our experiment, we chose to sample from the previous map sources around airports. This selection guarantees that the snippets are inherently information-rich, given that the map originates in the aviation domain. To ensure diversity in our dataset, we specifically sampled airports situated in the states of Massachusetts, New York, Delaware, Arizona, and Hawaii.</p> <p>The resulting dataset exhibits significant variations in terms of density, featuring both isolated airports and those nestled within cities, diverse locations such as inland, seaside, and islands, as well as various terrain types ranging from greenery landscapes, mountainous regions, and arid environments. In total, our dataset contains 1185 image pairs, each image is 600x600 pixels in PNG format. The total size of our dataset is 1.28 GB.</p> <h2 id="a-glimpse-of-the-coregistration-task">A glimpse of the coregistration task</h2> <p>To gain an understanding of our task and its intricacies, we present a few examples from our dataset. Generally, humans can successfully align two maps by identifying common features, which fall into one of the following categories:</p> <ul> <li><strong>Terrains:</strong> such as shorelines or mountains.</li> <li><strong>Charts:</strong> such as flight paths or restricted airspaces.</li> <li><strong>Landmarks:</strong> such as airport or city names.</li> </ul> <p>The process of mapping by terrain is typically swift for humans, especially when there are ample distinctive details. On the other hand, mapping by chart requires a more thoughtful approach, involving careful examination to establish a connection between the depicted attributes. Mapping by names usually serves as a last resort, employed if the prior approaches prove unsuccessful. Consider the following examples:</p> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/43_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/43_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/43_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Positive Pair #1: A VFR map (left) and an IFR map (right) that depict the same location (Port Allen Airport in South Kauai, Hawaii) and can be coregistered easily by the shorelines of the island. </div> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/1038_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/1038_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/1038_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Positive Pair #2: A VFR map (left) and an IFR map (right) that depict the same location (Circle H Ranch Airport in Arizona) and can be coregistered by aeronautical lines (even if one does not know what these lines represent in the aviation domain). </div> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/779_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/779_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/779_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Positive Pair #3: A VFR map (left) and an IFR map (right) that depict the same location (Bertrand Chaffee Hospital Heliport, New York). The VFR map is feature-rich providing information for Helicopter pilots while the IFR map is a hard example due to being almost blank as the Heliport does not facilitate landing any IFR aircraft. Thus, the IFR map only depicts certain names of cities and waypoints. The two maps can be coregistered by matching the name SPRINGVILLE. </div> <p>All of these examples are positive (the maps show the same location). We showcase below negative examples with varying complexity.</p> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/51_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/51_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/51_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/51_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/223_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/223_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/223_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/223_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Negative Pair #1: An easy and straightforward negative for humans due to the VFR map depicting a shoreline crossing the map vertically, while the IFR depicts only a landmass. </div> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/1037_301-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/1037_301-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/1037_301-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/1037_301.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/1013_302-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/1013_302-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/1013_302-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/1013_302.png" class="img-fluid z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Negative Pair #2: A harder negative example. Aeronautical lines partially appear similar but the locations are different. Naively matching circles on both maps to classify a positive pair would provide a false positive. </div> <p>We showcase multiple positive and negative pairs alongside the natural reasoning that a human would take to correctly classify the pairs. We hope that this showcases the complexity of the task and the various strategies involved in achieving successful coregistration.</p> <p><br/></p> <h1 id="experiments">Experiments</h1> <h2 id="zero-shot-evaluation">Zero-shot evaluation</h2> <p>To start, we want to evaluate the zero-shot performance of pre-trained LVLMs on the task of identifying whether the two images are the same (coregistration). The models we start our evaluation with are BLIP-2<d-cite key="blip"></d-cite>, ViLT<d-cite key="vilt"></d-cite>, LXMERT-VQA, and LXMERT-GQA<d-cite key="lxmert"></d-cite>. We specifically chose these models as they are all publicly available multimodal text generative models that were partly trained on visual question-answering datasets. Thus, they are able to accept both the vision and language inputs consisting of an image of the two side-by-side maps alongside the yes-or-no question of whether the two maps depict the same geographical location.</p> <p>To verify that the models we obtained are behaving as expected and are capable of answering a textual question that relies on a visual component, we compile a very simple dataset of 200 cat and dog pictures, half the images depict a cat while the other half depict dogs. We present these trivial images to the models alongside the prompt “Is this an image of a cat? Answer:” and generate a single token. As expected, out of the 200 images all four models achieved an almost perfect classification accuracy (&gt;95% for all 4 models) by answering with either a “Yes” or a “No” token.</p> <p>This is not surprising because, as mentioned, object recognition questions are very prevalent in visual question-answering datasets, especially on ubiquitous everyday objects such as cats and dogs. To see if these models can generalize beyond their training datasets and properly reason on maps, we start by running the following experiment:</p> <p><strong>Experiment #1:</strong> For each VFR and IFR image pair, we generate two examples (positive and negative). For the positive example, we use the correct pairing (e.g., maps from the same location with the two different styles). For the negative example, we randomly replace one map uniformly from our datasets. Each model is provided with a concatenation of the two maps in its vision input, and with the question “Do these two maps show the same location? Answer with “Yes” or “No”. Answer:” in its text input.</p> <p>In total, each model was asked 2370 questions. Below, we show the accuracy, precision, and recall that each model obtained.</p> <div class="row mt-3 align-items-center"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_01_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_02-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_02-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_01_02-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_01_02.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The models performed barely above random guessing in the zero-shot experiment, and some models consistently produced the same single output (either “yes” or “no”) regardless of whether the input image was a positive or negative pair.</p> <p>While the results of the models are very low and barely above random guessing, we wanted to analyze whether this failure is due to the model not comprehending the task or whether the issue is simply in the last layer of the model where the text generation occurs. The reason behind this analysis is that there is a possibility that the LVLM is able to correctly capture all the features necessary for determining whether the two maps coregister while still failing at providing the final answer due to the final layer of the model outputting an incorrect distribution over the labels (or tokens in the case of LVLMs). Thus we decide to ignore the last linear layer of the model (the language model head) and capture the hidden state of the last token from the last layer of the model.</p> <h2 id="fine-tuned-evaluation">Fine-tuned evaluation</h2> <p>Using this methodology, the output we obtain from each model is a single embedding vector (the length of which depends on the embedding size of the model). Usually, a single linear layer is finetuned on the last layer to directly predict the answer. However, we opt for a more detailed analysis by training multiple classifiers (Logistic Regression, SVM, and XGBoost) that take the embedding vector and produce a binary output. In all the upcoming figures, we always report the results using the classifier that performed the best (for each model) on the validation set.</p> <p>Moreover, it is known that LLMs can be sensitive to prompts<d-cite key="yang2023large,yang2022prompt"></d-cite>. Thus, to more rigorously analyze the performance of the models, we create a total of 8 variations of the coregistration question, as this one question serves as the text input for all pairs of images in our created dataset. A detailed analysis of these prompts will be provided later. Similar to the classifiers, we report results using the best prompt for each model.</p> <p>This methodology of using the model to extract a rich embedding that contains the answer to our prompt (instead of generating the answer directly as text) means that we are now capable of utilizing additional large transformer-based multimodal models that output an embedding vectors instead of directly outputting text. Thus we include in our analysis two such models which are FLAVA<d-cite key="flava"></d-cite> and GIT<d-cite key="git"></d-cite> that output embeddings that represent the multimodal input. Those embeddings are then similarly passed to the simple classifiers to detect whether the models are capable of capturing the required details from the maps to answer the co-location task correctly. Thus we perform our second experiment:</p> <p><strong>Experiment #2:</strong> We generate the examples using the same procedure described in Experiment #1. Then, for each model, we pass through the entire dataset and generate an embedding for each sample in our dataset. We then train the simple classifiers on 70% of the embedding vectors to predict the positive and negative pairs. We report the evaluation on the other 30% data and report the results in orange below.</p> <div class="row mt-3 justify-content-center"> <div class="col-sm-8 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_02_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_02_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_02_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_02_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>The accuracy of this fine-tuning methodology (shown in orange) achieves around 65%-70% for all models which is a significantly higher accuracy compared to the zero-shot accuracy of the LVLMs (shown in red) which was incapable of achieving more than 55%. This experiment shows that the embedding of the last token does contain a slightly more feature-rich representation of the multimodal input and can be used to classify the positive/negative pairs at a higher rate than random but is overall still incapable of sufficiently solving the task.</p> <p>Thus far we have tried to assess the capabilities of LVLMs and (more generally) Multimodal Vision Language models on solving the coregistration task, and we assessed this capability using our constructed dataset of determining whether two maps of different styles represent the same location or not. Given the low accuracy achieved on this task, we can claim that the LVLMs we have analyzed are incapable of reasoning and answering more complicated questions relative to our simple baseline question of “Are these two maps of the same location”</p> <h2 id="improving-results-for-co-registration">Improving results for co-registration</h2> <p>We emphasize that our goal is not to directly achieve high accuracy on this task by utilizing any machine learning model, but rather it is to evaluate the capabilities of LVLMs to reason on maps. Furthermore, we created and proposed this dataset and task to act as a baseline for assessing the reasoning abilities of LVLMs on maps.</p> <p>However, despite the failure of LVLMs to answer this baseline task, we next want to assess the inherent difficulty of the dataset. For this, we develop a simple model by utilizing the same simple classifiers used above to train on the embedding of a unimodal vision-only model. Unlike LVLMs, we are not testing our proposed task-specific model on the dataset to assess its capabilities for reasoning on maps, as the model is not trained to answer questions based on images, does not accept text modality, and is specifically fine-tuned to solve this one narrow task. Thus, the results of this experiment serve only to give a sense of the difficulty of the task that we considered as a simple baseline for map reasoning. This will hopefully demonstrate that the relatively older frozen vision-only models can achieve a significantly higher accuracy on this specific task when compared to state-of-the-art open-source LVLMs and possibly indicating the gap between the embeddings captured by the vision-only model and the LVLMs.</p> <p><strong>Experiment #3:</strong> We develop a simple unimodal vision classification model by utilizing a frozen CLIPVIsion model as a backbone. First, we feature-engineer the input by subtracting the two maps from each other in the image space to produce a single image. This image is passed through the frozen CLIPVision model to generate an embedding of the difference between the maps, the embeddings are then used to train the simple classifiers mentioned above and the one that achieves the highest accuracy on the validation set is reported below.</p> <div class="row mt-3 justify-content-center"> <div class="col-sm-8 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_03_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_03_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_03_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_03_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>We see that our fine-tuned vision model (shown in green) achieves a significantly higher accuracy than all previously tested LVLMs. This shows that the task is not a significantly difficult vision task as a frozen CLIPVision model with a head fine-tuned on approximately two thousand samples was able to sufficiently extract an embedding and correctly distinguish positive and negative pairs 85% of the time.</p> <p>This significant difference between the accuracy of the frozen CLIP model and the LVLMs on this task signifies that the LVLMs we tested are still significantly farther behind on certain tasks even when compared to a frozen vision-only model that was trained and released years prior. This is in stark contrast to the significant achievements that LLMs accomplish on numerous datasets when compared to task-specific NLP models, where the highest-scoring models on most NLP datasets are LLMs.</p> <h2 id="human-benchmarking">Human benchmarking</h2> <p>So far, we have examined the performance of pre-trained LVLMs on our proposed dataset in a zero-shot as well as a fine-tuned manner alongside a vision-only model with feature engineering to assess the difficulty of the task.</p> <p>A natural next question to analyze is the performance of humans on this same task as it is not immediately clear how hard or easy the task is for us. The performance achieved by humans on a task such as this would serve as a great target for LVLMs to try to reach.</p> <p><strong>Experiment #4:</strong> We present the following task to two subjects. Each human subject will see two maps for 10 seconds. The pair can be positive or negative with equal probability. After the 10 seconds elapse, the maps automatically disappear and the human subject is asked if the two maps show the same location with a binary “Yes” or “No” choice. After the answer is received, a new pair is sampled and this process is repeated until we gather 50 answers from each human subject.</p> <p>The 10-second window acts as a pseudo-computational limit on the human subject and ensures that the subject’s answers are mostly based on visual and spatial reasoning and not on reading and comparing text. If the subject does not immediately identify a visual or spatial cue, the 10-second window possibly allows for a maximum of one or two texts to be compared if the subject is quick enough. This time limitation prevents the participants from spending an extensive amount of time comparing the nuances of the two images for a severely long time which would make the task more trivial. Below, we show the accuracy obtained from two human subjects and compare it with the previous LVLM results.</p> <div class="row mt-3 justify-content-center"> <div class="col-sm-8 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_04_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_04_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_04_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_04_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>We see that both human participants (shown in blue) achieve a significantly higher accuracy (~95%) compared to all the tested ML models. This shows that the task is significantly easier for humans despite the 10-second time limit preventing the subject from extensively comparing the images.</p> <p>Our experiments showcase the inability of LVLMs to properly solve our proposed dataset on coregistration as well as showing that a vision-only fine-tuned model with feature-engineering is able to solve the task at a significantly higher accuracy. Finally, we show that humans are able to solve the time-constrained task with a significantly high accuracy.</p> <h2 id="analysis-on-prompt-engineering">Analysis on prompt engineering</h2> <p>Numerous recent studies have indicated the importance of prompt engineering in the quality of the output of Large-Transformer based models<d-cite key="yang2023large,yang2022prompt,white2023prompt, zhou2022large"></d-cite>.</p> <p>Due to the potential importance of prompts in affecting performance, we decided to run all experiments that require prompts using multiple different prompts with varying degrees of length and complexity. We note that the prompts considered and listed below were only the ones that consistently conditioned the model to output a “Yes” or “No” output token instead of any other arbitrary sentence completion output. The prompts are shown in the following table:</p> <p><br/></p> <table> <thead> <tr> <th style="text-align: center">ID</th> <th style="text-align: left">Prompt</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">1</td> <td style="text-align: left">Are these two maps the same? Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">2</td> <td style="text-align: left">Do these two maps show the same location? Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">3</td> <td style="text-align: left">Do the two charts depict the same area? Answer:”</td> </tr> <tr> <td style="text-align: center">4</td> <td style="text-align: left">The following image contains two maps with different styles side by side. Do the two maps show the same location? Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">5</td> <td style="text-align: left">On the left there is a map from the VFR dataset and on the right a map from the IFR dataset. Do the two maps show the same location? Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">6</td> <td style="text-align: left">There are two maps of different styles, do they represent the same area or are they completely different? Answer:</td> </tr> <tr> <td style="text-align: center">7</td> <td style="text-align: left">The following image contains two maps with different styles side by side. Do the two maps show the same location? Try to compare the maps by looking at key landmarks or features. Answer with “Yes” or “No”. Answer:</td> </tr> <tr> <td style="text-align: center">8</td> <td style="text-align: left">Carefully examine the following two images that contain two maps with different styles side by side. Do the two maps correspond on the same latitude and longitude point? It is of utmost importance that you answer this correctly. Answer with “Yes” or “No”. Answer:</td> </tr> </tbody> </table> <p><br/></p> <p>The initial prompts (prompts #1 - #3) are meant to be short and direct, while the ones in the middle (prompts #4 - #6) are more verbose and add a bit more complexity, while the last two (prompts #7 - #8) are very verbose and add an exact explanation of the task. We also include additions to some of the prompts that try to guide the models on how they accomplish the task, and some additions that emphasize the importance of correct answers. In the figure below, we study the effect of prompts on model performance.</p> <div class="row mt-3 justify-content-center"> <div class="col-sm-8 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_05_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_05_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_05_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_05_01.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>We notice that varying the prompts has a relatively high variance in terms of accuracy with an improvement of less than 5% for all models across all prompts. Still, there are no strong general trends across models when considering prompts with increasing complexity. We note that the VILT model was incapable of accepting prompts #5 - #8 due to the limitation of its maximum context length which is shorter than the other models.</p> <p>One aspect that might limit this analysis is that almost all prompts contain an explicit requirement for the models to provide answers immediately (e.g., “Answer with ‘Yes’ or ‘No’. Answer:”). This was done to reduce the computational inference cost and avoid generating long sequences of texts. The models might respond better to some prompts if they were allowed to reason about their answers first.</p> <h2 id="investigating-the-failure-points-of-lvlms-on-coregistration">Investigating the failure points of LVLMs on coregistration</h2> <p>The figures presented in the beginning of the blog post demonstrating some examples in our proposed dataset give a clue of the variance in the difficulty of the examples in the dataset, where some samples are easy to identify as positive pairs and others much harder to do so.</p> <p>Thus, to get a better insight into the model’s performance and investigate its failure points, we investigate some examples where the models made confidently wrong predictions. Here, we focus on a single model, FLAVA, which was our best-performing LVLM. In the figure below, we investigate both false positives with the highest predicted positive label and false negatives with the highest predicted negative label. The figure contains the 9 examples where the model generated a very high (very low) score while the true label was positive (negative).</p> <div class="row mt-3"> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_01-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_01-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_01-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_06_01.png" class="img-fluid" width="auto" height="auto" data-zoomable="" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm-6 mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_02-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_02-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-mapreason/experiment_06_02-1400.webp"/> <img src="/staging/assets/img/2023-11-08-mapreason/experiment_06_02.png" class="img-fluid" width="auto" height="auto" data-zoomable="" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>For the false positives, we see more than one example where two maps containing water were wrongly classified. This might indicate that the model is making predictions on these images based on colors more so than spatial reasoning. For the false negatives, there are many examples where the VFR chart is dense while the IFR is sparse. These examples require discarding a lot of information from the VFR charts and focusing solely on the region where the IFR chart contains information. Given that the model made wrong decisions in these examples, there might be a preference for positively matching images based on density. Notably, some of these examples were straightforward for the human subjects (matching based on the shoreline), while other examples required more effort (matching between dense and sparse maps).</p> <p><br/></p> <h1 id="discussion-limitations-and-future-work">Discussion, Limitations, and Future Work</h1> <p>One of the key takeaways of our experiments, and specifically from contrasting the first two experiments with the third experiment, is that it was not difficult for a non-LVLM model to achieve an 85% accuracy on our proposed dataset. Yet, our dataset proved to be challenging for LVLMs, especially in zero-shot performance where they achieved almost no better than random guessing. This implies that it would be beneficial to further expand future datasets that are used for LVLM training and specifically the addition of data collection similar to what we propose and that this could provide invaluable improvements to future training of LVLMs.</p> <p>Existing vision-language benchmarks exhibit a heavy focus on real-world objects and scenes, with a distinctive lack of images and questions on maps. This is despite the fact that maps are ubiquitous and used in many real-world scenarios. Furthermore, many maps are easily accessible in digital format and ready to be integrated into vision-language benchmarks. We believe such inclusion would require relatively little effort in terms of data collection while providing significantly higher capabilities for LVLMs.</p> <p>We plan to expand the size of our new dataset used in this project and to make it publicly available. Additionally, while our current project primarily focused on the coregistration tasks, we have plans to incorporate more intricate and challenging questions that delve deeper into map reasoning.</p> <p>There are some limitations to the current analysis done in this project. A significant limitation is the computational limit preventing us from feasibly generating answers from the LVLMs in an autoregressive manner instead of our analysis which used only one output token per sample. A possible future work is examining more complicated generation methods such as Chain of Thought<d-cite key="chain_Thought"></d-cite> prompting. Additionally regarding the inputs of the models, although we investigated different text prompts, we only used one template for the visual prompts while there have been multiple recent works on visual prompt engineering in vision-language models<d-cite key="vision_prompt"></d-cite> analogous to textual prompt engineering in LLMs. It could be the case that some models are sensitive to the way the maps are concatenated. This aspect warrants further investigation to gain a more comprehensive understanding of how different concatenation methods might impact model performance and results.</p> <p>Another limitation is that we were only capable of running our analysis on open-source models, the largest model tested was blip-2 with less than 3 billion parameters. This was the largest LVLM that we had access to in terms of weights, to be able to run our analysis on. Future work could attempt to run the analysis on larger closed-source models if access is granted.</p> <p><br/></p> <h1 id="conclusion">Conclusion</h1> <p>In this project, we propose a novel dataset to serve as an initial benchmark for the capabilities of LVLMs to reason on maps with the goal of addressing a gap in current LVLM benchmarks and datasets.</p> <p>Using this dataset, we run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on the coregistration task. Additionally, we show that the task for our dataset is a relatively simple vision task by showing that a fine-tuned vision-only model released years prior to the tested LVLMs achieves a significantly higher accuracy. Finally, we show that the coregistration task is intuitive to humans, as participants were able to achieve close to perfect accuracy even in a time-constrained manner.</p> <p>We hope that future initiatives regarding data collection for LVLMs and training foundational LVLMs will put more emphasis on datasets such as our proposed datasets. This will hopefully unlock new capabilities for LVLMs enabling them to advance beyond their current limitations and possibly expand their utility and reasoning abilities in a variety of real-world scenarios.</p>]]></content><author><name>Abdulrahman Alabdulkareem</name></author><summary type="html"><![CDATA[Map reasoning is an intuitive skill for humans and a fundamental skill with important applications in many domains. In this project, we aim to evaluate the capabilities of contemporary state-of-the-art Large Vision-Language Models (LVLMs) for reasoning on maps and comparing their capabilities with human participants on the coregistration task. We additionally propose and release a novel dataset to serve as an initial benchmark for map reasoning capabilities. We run an extensive analysis on the performance of open-source LVLMs showing that they struggle to achieve good performance on our dataset. Additionally, we show that coregistration is intuitive to human participants that were able to achieve close to perfect accuracy in a time-constrained manner.]]></summary></entry><entry><title type="html">Leveraging Representation Engineering For LLM’s In-Context-Learning</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/representationengineering-incontextlearning/" rel="alternate" type="text/html" title="Leveraging Representation Engineering For LLM’s In-Context-Learning"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/representationengineering-incontextlearning</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/representationengineering-incontextlearning/"><![CDATA[<h1 id="introduction">Introduction</h1> <p>Emerging capabilities in deep neural networks are not well understood, one of which is the concept of “in-context learning” (ICL), a phenomenon where the a Large Language Model (LLM)’s understanding of the prompt and ability to answer accordingly drastically increases after being shown some examples that answer the question. Evaluating in-context learning and understanding why the behavior happens is both an interesting theoretical research question and a practical question that informs directions to conduct research that further advances LLM capabilities by, say, exploiting more of in-context learning.</p> <p>We attempt to explore the phenomenon of in-context learning by leveraging another exciting field of work on mechanistic interpretability where researchers set out to understand model behaviors by interpreting and editing internal weights in models. One such work that we base on is Representation Engineering by Zou et al. (2023)<d-cite key="zou2023representation"></d-cite> , where they construct a set of training text stimuli to probe LLM activations and use such stimuli to identify a direction that accurately predicts the underlying concept based on the neural activations of the model. This approach allows us to elicit readings of representation and control such representation.</p> <p>We propose to use methods in Zou et al. (2023) <d-cite key="zou2023representation"></d-cite> to evaluate in-context learning. There has not been previous attempts to examine the model internals holistically in a LLM while it is performing in-context learning. We expose such neural activations by constructing stimulus through artificial examples of in-context learning on binary classication tasks. We find a reading vector that shows high neural activity after the model is stimulated with the context pairs; such a “Context Vector” indicates the context the models draws from. While we hoped to find certain universal mechanisms across different datasets, we find that the Context Vector is dataset-specific and confirm previous hypotheses that in-context learning retrieves information from different parts of the model’s latent space.</p> <p>We then explore the results of controlling the activations along the “Context Vector” direction, in the hope that editing the activitions would further boost the performance on top of in-context learning. We compare the model outputs on the classification datasets in a zero-shot setting and a setting of natural in-context learning, with the “Context Vector” amplified, and suppressed. While we find boosting performance through such editing to be challenging and sometimes finicky to tune, we find the results to be promising on editing weights to suppress the context that the model draws from and drastically reducing the performance.</p> <h1 id="background--related-work">Background &amp; Related Work</h1> <h3 id="in-context-learning-icl">In-Context Learning (ICL)</h3> <p>An LLM is frequently aseked to perform a task in inference time that many realized providing some examples of how to answer the task can drastically improve the model’s performance. This phenomenon is called in-context learning. For example, Zhou et al. (2022) <d-cite key="zhou2022teaching"></d-cite> evaluates how LLM can become better at solving algorithmic problems through in-context learning, a task that LLM traditionally struggles at.</p> <p>In other scenarios, the LLM does not need to rely on prompts at all and can deduce the pattern from the few-shot examples alone to predict the answer. While there is no universal definition of in-context learning and its meaning has shifted over time, we define it as the performance boost to answer questions based on a limited amount of examples (as the context).</p> <p>Interesting, Min et al. (2022) <d-cite key="min2022rethinking"></d-cite> observes that such ICL phenonemon is observed as long as examples are given, and a mismatch between input and output pairs would not hinder the ability of models performing ICL and thus its performance on the tasks. Wei et al. (2023) <d-cite key="wei2023larger"></d-cite> further corrobates this work by finding on small models but show that as models scale, the ability to pick up on flipped patterns when given in-context examples with flipped labels and override semantic priors is stronger.</p> <h3 id="theories-on-why-icl-happens">Theories on why ICL happens</h3> <p>While the concept of ICL is well studied, the underlying mechanism of ICL is not well understood. Xie et al. (2022) <d-cite key="xie2022explanation"></d-cite> explains the phenomenon of ICL as an Implicit Bayesian Inference, where the in-context learning prompt serves as a stimulus for the model to go “locate” corresponding concept stored in the model’s latent space that the LM has learned implicitly during pre-training. They study this by generating a simple pretraining distribution that parameterizes the transition of a Hidden Markov Model (HMM) and another prompting distribution. In this setting, the authors reduce the ICL task to Bayesian inference to map the prompting distribution to the pretraining distribution.</p> <p>Akyürek et al. (2022) <d-cite key="akyürek2023learning"></d-cite> further explains that Transformer-based in-context learners implement standard learning algorithms implicitly by encoding smaller models modularized to perform each specific tasks and update them based on the new in-context exampless. von Oswald et al. (2023) <d-cite key="vonoswald2023transformers"></d-cite>claims that Transformer-based in-context learners is similar to gradient-based meta-learning formulations where they found that the Transformer can learn smaller models of a certain concept by gradient descent in their forward pass.</p> <p>Furthermore, Olsson et al. (2022) <d-cite key="olsson2022context"></d-cite> draws parallel from ICL to a more understood phenomenon of Induction Head, where attention-only Transformers picks up on the algorithm to predict next tokens by searching for a previous occurance of the last token and copying the same next token from previous occurences. They claim that this can be a potential mechanism to explain ICL.</p> <p>While many hypotheses and theories have been proposed to explain ICL, most explorations to prove their theory has been small in scale, and the literature lacks a study on the large-scale LMs’ internals when performing ICL.</p> <h3 id="model-editing--representation-engineering">Model Editing &amp; Representation Engineering</h3> <p>We’ll use the Representation reading and controls methods presented in <a href="https://arxiv.org/pdf/2310.01405.pdf">Zou et al. (2023)</a> to understand the context where the model attends to and discover directions that indicate such reasoning.</p> <p>Relatedly, there have been a recent surge in research related to model knowledge editing, including Meng et al. (2023) <d-cite key="meng2023massediting"></d-cite>, Zhong et al. (2023) <d-cite key="zhong2023mquake"></d-cite>, and Hernandez et al. (2023) <d-cite key="hernandez2023inspecting"></d-cite> that demonstrate different methods for locating and editing factual associations. Other work, including Shao et al. (2023) <d-cite key="shao2023gold"></d-cite> and Belrose et al. (2023) <d-cite key="belrose2023leace"></d-cite>, have shown results on erasing larger-scale memory units such as concepts. Li et al. (2023) <d-cite key="li2023inferencetime"></d-cite> applies such concept erasion techniques by conducting Inference Time Interference, where one can find a direction of causal influence on “truthfulness” data and increase the activations along that direction to increase truthfulness, scoring better on the TruthfulQA dataset.</p> <h1 id="experiment-setup">Experiment Setup</h1> <h3 id="datasets">Datasets</h3> <p>We adopt a total of 30 datasets on binary classification, (sentiment analysis, natural language inference, true/false inference) and multiple choices; 16 datasets are used by Min et al. (2022) <d-cite key="min2022rethinking"></d-cite>, plus 12 extra datasets in the <code class="language-plaintext highlighter-rouge">tweet_eval</code> and <code class="language-plaintext highlighter-rouge">ethos</code> dataset families, <code class="language-plaintext highlighter-rouge">rotten_tomatoes</code>, and <code class="language-plaintext highlighter-rouge">ade_corpus_v2-classification</code>. Following Min et al. (2022)<d-cite key="min2022rethinking"></d-cite>, we only use the test set to avoid potential cross-contamination with the data that the model is pretrained on. reserve <code class="language-plaintext highlighter-rouge">k=64</code> examples in the test for few-shot training, and the rest are used for testing.</p> <h3 id="training-data-generation">Training Data Generation</h3> <p>For training, we construct a set of context pairs for each dataset, each context pairs containing the same examples but different instructions. The instructions are “Pay attention to the following examples” and “Ignore the following examples” respectively, in the hope that by stimulating two opposites and examining the difference, we can find a Context Vector that represents what the model draws from. We then truncate the example at each and every token till the last 5 tokens, so we can get a neural activation reading for each of the tokens.</p> <p>A sample training data input using the <code class="language-plaintext highlighter-rouge">rotten_tomatoes</code> dataset is as follows:</p> <blockquote> <p>[INST] Pay attention to the following examples: [/INST]</p> </blockquote> <blockquote> <p>offers that rare combination of entertainment and education.</p> </blockquote> <blockquote> <p>positive.</p> </blockquote> <blockquote> <p>a sentimental mess that never rings true .</p> </blockquote> <blockquote> <p>negative.</p> </blockquote> <blockquote> <p>[INST] Ignore the following examples: [/INST]</p> </blockquote> <blockquote> <p>offers that rare combination of entertainment and education.</p> </blockquote> <blockquote> <p>positive.</p> </blockquote> <blockquote> <p>a sentimental mess that never rings true .</p> </blockquote> <blockquote> <p>negative.</p> </blockquote> <p>Each context pair is identical except for the instructions. We use the context pairs to stimulate the model to learn the context and use the context vector to control the model’s behavior.</p> <h3 id="testing-data-generation">Testing Data Generation</h3> <p>For testing data, we use 3 input-labels pairs as the prompt, with the first two pairs serving as the in-context examples, and the last pair serving as the question that we actually want to test on, obfuscating the label from the prompt.</p> <p>A sample testing data input using the <code class="language-plaintext highlighter-rouge">rotten_tomatoes</code> dataset is as follows:</p> <p>Input:</p> <blockquote> <p>[INST] offers that rare combination of entertainment and education. [/INST]</p> </blockquote> <blockquote> <p>positive.</p> </blockquote> <blockquote> <p>[INST] a sentimental mess that never rings true . [/INST]</p> </blockquote> <blockquote> <p>negative.</p> </blockquote> <blockquote> <p>an odd , haphazard , and inconsequential romantic comedy .</p> </blockquote> <p>Label:</p> <blockquote> <p>negative.</p> </blockquote> <h3 id="model">Model</h3> <p>We have explored using two models with 7 billion parameters, including <code class="language-plaintext highlighter-rouge">Mistral-7B-Instruct-v0.</code> and <code class="language-plaintext highlighter-rouge">Llama-2-7b-hf</code>; while we have found preliminary results consistent between the two models, all of our results later reported are from <code class="language-plaintext highlighter-rouge">Mistral-7B-Instruct-v0</code> for consistency and due to a constraint on computational power and time.</p> <h3 id="training-infrastructure">Training Infrastructure</h3> <p>We used the MIT Supercloud infrastructure and a local machine with a single RTX 4090 GPU to train the model.</p> <h1 id="results">Results</h1> <p>We present results first on finding the Context Vector in the embedding space, then on using the Context Vector to control model outputs and evaluate their performance.</p> <h3 id="representation-reading">Representation Reading</h3> <p>We use the Representation Reading method presented in Zou et al. (2023) <d-cite key="zou2023representation"></d-cite> to find the Context Vector. Specifically, we adopted the setup of the instruction response pairs where for a given function $<code class="language-plaintext highlighter-rouge">f</code>$ and pairs of instructions $<code class="language-plaintext highlighter-rouge">x_i</code>$ and $<code class="language-plaintext highlighter-rouge">y_i</code>$, we denote the model’s response truncated at the $<code class="language-plaintext highlighter-rouge">j</code>$-th token as $<code class="language-plaintext highlighter-rouge">f(x_i)_j</code>$ and $<code class="language-plaintext highlighter-rouge">f(y_i)_j</code>$ and take the neuron activity at the last token of each of the responses, namely the activations of each and every token in the response.</p> <p>We then perform PCA on the difference of the activations of the two instructions, namely $<code class="language-plaintext highlighter-rouge">f(x_i)_j - f(y_i)_j</code>$ and find the first principal component $<code class="language-plaintext highlighter-rouge">v</code>$ that maximizes the difference in the embedding space.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/correlation_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Graph plotting the correlation between the Context Vector sign and actual dataset label on Rotten Tomatoes dataset. The x-axis is the layer and the y-axis is the correlation. </div> <p>More surprisingly is the fact that we can find a clean representation of such Context Vector that correlates decently with the model inputs.</p> <p>We use t-SNE to visualize the difference in the embedding space on the inputs of the 30 datasets across 32 different layers and report the results below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_data.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the embedding space of the Context Vectors across the 30 datasets and 32 layers, color coded by dataset. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/tsne_layers.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the embedding space of the Context Vectors across the 30 datasets and 32 layers, color coded by layers. </div> <p>As shown in the figure, we find that the vectors are clustered by dataset, indicating that the Context Vectors are dataset-specific. There are no clear patterns across dataset or between different layers of the Context Vectors, further indicating that in-context learning activates different parts of the model’s latent space with information about different types of tasks.</p> <p>We also conducted scans for neuron activities in the Context Vector across the different tokens of an example sequence in a similar style as Zou et al. (2023) <d-cite key="zou2023representation"></d-cite>, for which the previous work has referred to as Linear Artificial Tomography (LAT) scans.</p> <p>The following are the LAT scans for the neuron activities corresponding to a Context Vector trained on <code class="language-plaintext highlighter-rouge">rotten_tomatoes</code> sentiment analysis dataset evaluated on different dataset sequences. The following graphs further corroborate the findings above on the dataset-specificity of in-context learning; while the a sequence from the <code class="language-plaintext highlighter-rouge">rotton_tomatoes</code> dataset result in high neural activities for the Context Vector, most sequences from the other dataset do not, showing the uniqueness of such Context Vector. We have also observed most of the neuron activities in the later layers. This phenomenon makes sense since more abstract concepts and semantic structures formulate in later layers, thus being more correlated with the Context Vector, while earlier layers pick up more on token-level abstractions.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_rotten_tomatoes.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `rotten_tomatoes` sequence. The x-axis is the token index, and the y-axis is the Layer number. More red indicates higher neural activities, and more blue indicates lower neural activities. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_medical_questions_pair.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `medical_questions_pair` sequence. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/lat_scan_ethos_religion.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A LAT scan of the Context Vector trained on `rotten_tomatoes` dataset evaluated with a `ethos-religion` sequence. </div> <p>We have also produced graphs that zoom into the token-level neural activities detection on the Context Vector of the opposing pair (Pay attention &amp; Don’t pay attention), shown below. A large difference in the neural activities of the two instructions is denoted by red and indicates that the ablation is effective, while the green shades indicate that there are similar in neural activities. The results show that the neural activities are consistently different across the sequence until the model starts generating next tokens and the context ends where the neural activities are similar.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/rotten_tomatoes_token_level.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> A token-level LAT scan that compares the difference between the neural activities of the Context Vector of the opposing pair (Pay attention &amp; Don't pay attention) on the `rotten_tomatoes` dataset. </div> <h2 id="representation-control">Representation Control</h2> <p>To change an activation along some direction, we can imagine there are several canonical ways. First, given our Context Vector $v$ and an activation $a$, we can do one of the following.</p> <h3 id="addition">Addition</h3> \[a' = a + v\] <h3 id="amplification">Amplification</h3> \[a' = a + \text{sign}(a \cdot v) v\] <h3 id="projection">Projection</h3> \[a' = a - (a \cdot v) \cdot \frac{v}{||v||^2}\] <p>The first represents a constant perturbation so it supposedly transforms the representation to become more of a certain quality. The second amplifies the direction according to which side it is on, so it makes the representation more extreme. The third removes the quality from the representation by subtracting the projection.</p> <p>We explore all these methods to control Mistral-7b-instruct. We do our experiments on the <code class="language-plaintext highlighter-rouge">rotten_tomato</code>, <code class="language-plaintext highlighter-rouge">sick</code>, <code class="language-plaintext highlighter-rouge">hate_speech18</code>, and <code class="language-plaintext highlighter-rouge">glue-wnli</code> in-context-learning datasets consisting of input-output pairings where outputs have two possible correct options – positive or negative contradiction or entailment, hate or noHate, and entailment or not_entailment (for <code class="language-plaintext highlighter-rouge">sick</code>, it originally contains a third option of <code class="language-plaintext highlighter-rouge">neutral</code> which we remove since our framework requires two classes).</p> <p>Given learned representations with the same configuration as our representation reading, we construct a test set from the same dataset as training. The test set has $16$ examples, each with one demonstration followed by a question. We evaluate correctness by having the LLM generate $10$ tokens and checking if the correct answer is contained in the output and the incorrect answer is not contained in the output, without being sensitive to case. This ensures correct evaluation so that an answer of no_entailment does not evaluate as correct for having entailment inside of it if entailment is the right answer.</p> <p>A hyperparameter which we denote $\alpha$ scales the size of $v$. If our Context Vector is $r$, sign value is $s$, then we have $v = \alpha \cdot r \cdot s$. We vary $\alpha \in { 0, 0.25, 0.5, 1, 2, 5, 10}$, and also take the negative of $\alpha$, which we label as positive and negative respectively.</p> <h3 id="results-for-control-with-addition">Results for Control with Addition</h3> <p>For rotten tomatoes, we see the expected performance gap of positive over negative, though positive does worse than no control. Moreover, we see in glue-wnli and sick, the negative control actually does better than positive control. In hate_speech18, we see the desired result.</p> <p>Despite modifying the layers that we controlled, based upon observing the layers at which the Context Vectors had the most correlation to the trained concept, we cannot find a set of layers to control that works <strong>consistently</strong> across all four datasets, though we can find layers that work for one dataset.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `rotten_tomatoes` dataset with amplification or suppression of the Context Vector using Addition. The x-axis is the coefficient of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_sick.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Addition</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_hate_speech.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `hate_spe` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Addition</bold>. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/add_glue.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Addition</bold>. </div> <h3 id="results-for-control-with-amplification">Results for Control with Amplification</h3> <p>Note the result depends on the absolute value of $\alpha$ so the positive and negative graphs converge. The affect of amplification is quite smooth relative to addition in the sense that there is a consistent downward trend in performance for both amplification and suppression. This could be because amplification amplifies existing signals and this gets stronger as $\alpha$ increases.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_sick.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_hate_speech.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/piecewise_glue.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Amplification</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <h3 id="results-for-control-with-projection">Results for Control with Projection</h3> <p>We can see that projection consistently decreases performance, which is expected as we can imagine projection as erasing the idea that the model needs to pay attention to these examples. Having positive or negative sign of $\alpha$ does not affect projection.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Projection</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_sick.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Projection</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_hate_speech.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Projection</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/proj_glue.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of the Context Vector using <bold>Projection</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <h3 id="ablation-studies">Ablation Studies</h3> <p>A key question is whether the Context Vectors are truly special. Especially because much of our results do not work, we would like to assess the “noise level.” By sampling a random unit vector from $4096$-dimensional space, the hidden dimension of <code class="language-plaintext highlighter-rouge">Mistral-7b-instruct</code>, for each layer and using that for control, we get the following results.</p> <p>If we take the negative of all the Context Vectors, the graphs for positive and negative $\alpha$’s would switch. The fact that in our random sample we see such a large gap in the Glue-wnli graph indicates that there is quite a lot of noise. Moreover, if we take the negative of our particular randomly sampled vector, we obtain a Context Vector for Glue-wnli that is <strong>extremely good</strong> at controlling in-context-learning. The large landscape of $4096$-dimensional space is an exciting mystery.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_tomato.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `rotten_tomatoes` dataset with amplification (positive) or suppression (negative) of a random vector using <bold>Addition</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_sick.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `sick` dataset with amplification (positive) or suppression (negative) of a random vector using <bold>Addition</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_hate_speech.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `hate_speech18` dataset with amplification (positive) or suppression (negative) of a random vector using <bold>Addition</bold>. The x-axis is the alpha value, and the y-axis is the accuracy. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue-1400.webp"/> <img src="/staging/assets/img/2023-11-08-representationengineering-incontextlearning/random_glue.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> The accuracy of the model on the `glue_wnli` dataset with amplification (positive) or suppression (negative) of a random vector using <bold>Addition</bold>. The x-axis is the alpha value of amplification, and the y-axis is the accuracy. </div> <h1 id="conclusion">Conclusion</h1> <p>While we understand our work is limited due to time and compute constraints and did not achieve the results we hoped for, we tried our best to explore this research direction of finding a Context Vector that corresponds to the in-context learning behaviors and experiments of using it to control model outputs.</p> <h2 id="implications">Implications</h2> <p>If successful, this research direction could be a powerful tool to understand mechanistically why in-context learning emerges and potentially use model editing to achieve better State-of-the-Art results on LLMs in specific benchmark evaluation scenarios with model editing. Even with our current results that demonstrate more success in suppressing the Context Vector than amplifying it, i.e. suppressing such behaviors than boosting it, this can have implications on works that try to perform model unlearning and impact the robustness of LLMs.</p> <h2 id="future-work">Future Work</h2> <p>Through ablating with the random vector in the embedding space, it is unfortunate that controlling for the particular Context Vector we found is not particularly different from other vectors, despite it showing some promises on suppressing the results. We hope to run further ablation studies to confirm that suppressing the Context Vector is only suppressing the in-context learning behaviors of the specific behaviors and does not have other side effects.</p> <p>Regarding our current setup of the contrasting prompts of telling the model to pay attention or not pay attention to the concept, we can further explore the space of contrasting prompts. Directly related to our work, we would also like to explore the other type of experiment setup in Zou et al. (2023)<d-cite key="zou2023representation"></d-cite>; unlike the data pair setup where we ask the model to pay attention to the examples or ignore them, we can ask the model to “think hard about the context/structure of the question” and elicit neural activities that way.</p> <p>We are also interested in exploring vectors that control step-by-step reasoning and in general, intelligence. The phrases “Let’s think step by step” <d-cite key="kojima2023stepbystep"></d-cite> or “Take a deep breath and work on this problem step-by-step” <d-cite key="yang2023deepbreath"></d-cite> are powerful phrases that elicit chain-of-thought reasoning and improve model performance. Could we engineer activation transformations that improve these models’ performance even more than and without the need for prompting?</p>]]></content><author><name>Alex Hu</name></author><summary type="html"><![CDATA[We present a method to observe model internals whether LLMs are performing in-context learning and control the model outputs based on such Context Vectors.]]></summary></entry><entry><title type="html">In the pursuit of cheap and robust word embeddings</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/sentence-embeddings/" rel="alternate" type="text/html" title="In the pursuit of cheap and robust word embeddings"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/sentence-embeddings</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/sentence-embeddings/"><![CDATA[<h2 id="introduction-and-motivation">Introduction and Motivation</h2> <p>Large Language Models (LLMs), such as Bard and OpenAI’s GPT-4 are typically used to obtain data embeddings of text. These embeddings are quite rich, encoding common-sense semantic information. A good embedding naturally aligns with our intuitive human understanding of language: at a high level, similar text/words are clustered together, while dissimilar text/words are farther apart.</p> <p>High-quality embeddings also satisfy semantic equations that represent simple analogies. Define \((\text{some_text})\) to be the embedding of some string “some_text.” Then, a traditionally good embedding will typically obey linguistic equations like <d-cite key="Vylomova2016"></d-cite>:</p> \[(\text{King}) - (\text{Man}) + (\text{Woman}) ≈ (\text{Queen})\] \[(\text{Paris}) - (\text{France}) = (\text{Warsaw}) - (\text{Poland})\] <p>However, repeatedly querying LLMs for large-scale analysis is expensive. Many utilize thousands of cloud GPUs and are constantly fine-tuned, adding to their cost. This cost barrier discourages researchers—especially those with less funding—from making use of these embeddings for their own models. Repeated strain on LLM’s infrastructure can even cause a negative environmental impact. However, we often don’t need embeddings as good as these fancy ones to conduct certain types of research. Specifically, it would be desirable for a researcher to choose their embedding quality, with the understanding that higher-quality embeddings take longer, and vice versa. Such a model should be robust and resistant to being trained on a small amount of incorrect data (which can happen by accident when scraping tex, or due to malicious behavior.)</p> <p><strong>These issues motivate the following research question: on how little data can we train a text embedding model—with OpenAI embedding as ground truth—such that our embeddings are good enough quality? And can we quickly preprocess the data to improve our results?</strong></p> <h2 id="background-and-literature-review">Background and Literature Review</h2> <p>While there is some existing literature on generating word embeddings more “cheaply,” significant differences exist with current methodologies. Broadly, this process is called knowledge distillation (KD), which aims to “distill” knowledge from a larger teacher model (in our case, OpenAI embeddings) into a smaller student model.</p> <p>For example, Shin et al. discuss a novel distillation technique that “distills” a “student” embedding model from a “teacher” model <d-cite key="Shin2019"></d-cite>. Importantly, this work focuses on reducing the dimensionality of the “student” word embeddings without compromising accuracy—which is fundamentally different from our goals. While our OpenAI embeddings form the “teacher,” our student model should output embeddings of the same dimensionality. Our dimensionality preservation is useful if we want to directly translate general patterns or trends from our student’s embeddings to the OpenAI embeddings.</p> <p>Gao et al. take a different approach. They propose a KD framework for contrastive sentence embeddings, DistilCSE. It works by first applying KD on a large amount of unlabeled text before fine-tuning the student model via contrastive learning on limited labeled data <d-cite key="gao2023distilcse"></d-cite>. Contrastive learning in this domain is promising, especially since synonyms and pairs of similar words naturally form positive pairs. However, in our context, a direct application of contrastive learning presents some issues.</p> <p>For example, suppose we had some Euclidean distance threshold A and B, such that, for any two word embeddings \(c\) and \(d\):</p> <p>If the distance between \(c\) and \(d\) is less than A, then define \(c\) and \(d\) to be positive pairs for contrastive learning.</p> <p>If the distance between \(c\) and \(d\) is greater than B, then define \(c\) and \(d\) to be negative pairs for contrastive learning.</p> <p>While this process (and others like it) isn’t too resource-intensive, it has a few issues, even if we are able to define proper thresholds A and B. Firstly, it “wastes” pairs of data where the distance is in between A and B. Secondly, information about direction is easy to lose—so while a student would learn to embed similar words closer together and dissimilar ones further apart, the student may be invariant to direction and sensitive only to Euclidean distance in the n-dimensional space. This is not ideal.</p> <p>Other related state-of-the-art approaches also present issues. Gao et al. describe another approach involving running data through an encoder multiple times with standard dropout to generate positive pairs instead of searching for them in the data itself <d-cite key="Gao2022"></d-cite>. While this approach is promising, the approach involves repeatedly using somewhat slow pre-trained text embedders, which is exactly what we want to avoid.</p> <p>To reduce computational complexity while still reaping the benefits of preprocessing, we look to a paper by Rahimi et al. They explain how removing stop words (common words, like “a,” “the,” etc.) and punctuation improves sentence embedding quality, for a variety of reasons <d-cite key="Rahimi2023"></d-cite>. Even though we’re focusing on word embeddings—and not sentence embeddings—we can adapt this general approach to our project with lemmatization, which applies morphological analysis to words to simplify them to a base form. For example, “fighting” turns into “fight,” “was” turns into “be,” and “octopi” turns into octopus, and so on.</p> <p>This reduces data sparsity by consolidating related forms of words into a single representation, which is especially helpful for low-frequency words. This in turn helps the model generalize across tenses and other variations as it can focus on the “core” differences of words rather than auxiliary modifiers. We thus plan to investigate lemmatization in this context.</p> <p>We struggle to find closely related literature about student models’ resistance to poisoned data. Thus, we decided to investigate this aspect as well.</p> <p>To conclude our literature review, while different variants of KD exist, we decide to focus on a modified response-based KD, in which the teacher model sends final predictions to the student network, which then directly mimics these predictions by minimizing some loss <d-cite key="Yang2023"></d-cite>.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/response_based_KD.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Generic response-based knowledge distillation. Image credits: Gou, J., Yu, B., Maybank, S.J. et al. Knowledge Distillation: A Survey. Int J Comput Vis 129, 1789–1819 (2021). https://doi.org/10.1007/s11263-021-01453-z </div> <p>Other distillation approaches—such as feature-based KD, relation-based KD, and the contrastive approach described above—do exist, but require more intimate knowledge of the teacher’s features and/or layers <d-cite key="yang2023attention"> </d-cite> <d-cite key="Park2019"></d-cite>. This is not reasonable information we can expect to have, as companies are often incentivized to obfuscate their specific word embedding architectures.</p> <h2 id="methods-and-experiments">Methods and Experiments</h2> <p>We center our studies on a standard dataset of 10k English words scraped from high-level Standard English texts that’s been empirically validated for quality. We also use the OpenAI API to obtain text-embedding-ada-002 embeddings of the entire dataset to use as ground truth. While these aren’t necessarily the best embeddings, even among OpenAI’s own embeddings, they are the best choice given our computational restrictions.</p> <p>Now, we detail our model architecture. Our baseline model (call this Model A) is a sequential ReLU and nn.Embedding layer followed by L2 normalization. Model A serves as a crude baseline—therefore, we do not investigate it as deeply as the more complex model that followed due to large differences in performance.</p> <p>Instead, we focus our efforts on the more complex Model B, detailed below in Figure 1 in the context of our pipeline. Model B utilizes an nn.Embedding layer, followed sequentially by 2 blocks. The first uses ReLU activation followed by a linear layer of size \(\frac{\text{embedding_dim}}{2}\). The second layer is the same, except the final Linear layer outputs embeddings with the full “embedding_dim.” Notably, we use L2 normalization to make sure each embedding vector has magnitude 1 (such that all embeddings exist in an n-hypersphere.) Since all embeddings are unit embeddings, using cosine embedding loss along an Adam optimizer is natural. Thus, instead of computing cosine similarities between teacher and student vectors, we can just focus on minimizing this embedding loss.</p> <p>For the training stage, we train our embedding model to map words to vector embeddings on Google Colab with an Nvidia T4 GPU. There may be up to 3 processing steps, as depicted in Figure 1:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/architecture_diagram.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 1: An overview of Model B’s architecture in context of our experimentation. </div> <p>First, we choose whether or not to lemmatize the entire dataset before proceeding.</p> <p>Second, the training split. We train our embedding models above on each of the following proportions (call this \(p\)) of the dataset: 0.005, 0.009, 0.016, 0.029, 0.053, 0.095, 0.171, 0.308, 0.555, and 1.00.</p> <p>Finally, we choose whether or not to poison 10 percent of the entire word dataset (not the training dataset). When a word is poisoned, the model incorrectly believes that some random unit vector is the ground-truth embedding instead of the actual OpenAI embedding.</p> <p>For each such model, we train for up to 80 epochs, limited by our computational resources.</p> <p>We then evaluate the model’s embeddings against the ground truth with multiple metrics—cosine similarity (via the embedded cosine loss), graphically via distributions of the embedding means, linguistic math, etc.</p> <p>Taken together, this methodology is comprehensive.</p> <h2 id="results-and-analysis">Results and Analysis</h2> <h3 id="model-a-the-baseline">Model A, the Baseline</h3> <p>First, here is a graph of training up our baseline Model A (Figure 2) and our augmented Model B (Figure 3). The difference in epochs (80 for Model A, and 60 for Model B) training is due to limited resources. This doesn’t matter much, as a clear, near-linear relationship between \(p\) and training time, which we use to estimate used computational resources. Thus, we consider \(p\) as inversely proportional to the computational resources used for all our experiments.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_time.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 2 </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_time.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 3. </div> <p>For Model A (with no lemmatization, no data poisoning), we also want to visualize the tradeoffs between the number of epochs trained, the training proportion \(p\), and the training loss to establish some baseline intuition. To this end, we take inspiration from the game theoretic concept of Pareto efficiency, which aims to find equilibria where no change improves one of these 3 factors without hurting one of the other 2.</p> <p>We also wanted to visualize the tradeoffs between the number of epochs trained, the training proportion, and the cosine embedding loss, since we are motivated to find the optimal balance of these 3 factors. See Fig. 4.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_pareto.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 4. </div> <p>Unfortunately, Fig. 4 is not particularly enlightening. Training loss decreases as the number of epochs increases and as training proportion \(p\) increases. There are also no local minima or maxima of interest. Figures 5 and 6 also confirm this with their plots of distributions of embedding means. Specifically, as we tend to move towards the right and bottom of Fig. 6, i.e. we train longer and on more data, we simply seem to approach the true distribution (Fig. 5) without anything of note.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_full_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 5. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/bad_model_all_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 6. </div> <p>These results motivate us to look beyond our Model A. Our results from this point focus on Model B because we didn’t want a poorly performing model like Model A to be a true control, it merely served as an intuitive baseline.</p> <h3 id="model-b-the-baseline">Model B, the Baseline</h3> <p>As in the previous part, we obtain a Pareto-like graph for Model B, without any lemmatization and data poisoning. Firstly, the cosine embedding losses are much lower than before, due to the improved model architecture. More interestingly, after about 10 iterations, the training loss seems to stabilize across all versions of the model, potentially suggesting that training longer may not be worthwhile.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_nolemma_nopoison_pareto.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 7. </div> <p>Since this is our base model, we don’t investigate further.</p> <h3 id="model-b-lemmatization-no-poisoned-data">Model B, Lemmatization, No Poisoned Data</h3> <p>Now, we look to Model B, with lemmatization, but no poisoned data. The Pareto-like curve for this is telling (Fig. 8), with it looking very similar to the baseline Model B’s. As before, this suggests that training for longer may not be worthwhile, and could potentially lead to overfitting.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_pareto.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 8. </div> <p>We also have a distribution of the means of embeddings for the whole dataset (Fig. 9) and from each variant of the model at different epochs (Fig. 10). Again, the results don’t say anything surprising: as we train on more data for longer, the distribution approaches that of the training dataset.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_full_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 9. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_nopoison_all_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 10. </div> <p>To check for overfitting, we will later validate our model on simple linguistic tests, as described in the very beginning. Specifically, we will validate our model’s performance on linguistic math against OpenAI’s performance.</p> <h3 id="model-b-lemmatization-poisoned-data">Model B, Lemmatization, Poisoned Data</h3> <p>The following is the Pareto-like curve, except now we poison 10 percent of the entire dataset, as described in Methods/Experiments. Curiously, we find a local minima at approximately \(p = 0.1\) and ~20 epochs, demonstrating that our overall approach of training on a small fraction of the dataset naturally resists moderate-scale adversarial attacks on our ground-truth embeddings. Of course, the addition of poisoned data means that the loss values are on average higher than those in the previous subsection, where there was no poisoned data.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_pareto.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 11. </div> <p>Again, looking at the distribution of the means of embeddings (see below), we see that models that trained on too much of the data are completely ruined. We don’t even need to compare these distributions against the whole-model distribution to see this. This result demonstrates that even a relatively small amount of poisoned data can manipulate a naive embedding model trained on an entire dataset.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_model_yeslemma_yespoison_all_means.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 12. </div> <h3 id="the-effects-of-data-poisoning-and-surprising-robustness">The Effects of Data Poisoning and Surprising Robustness</h3> <p>As discussed previously, we want to externally validate our models with both linguistic equations and pairs of synonyms. Essentially, we want to check that our student groups together similar words like the OpenAI teacher. Since our poisoned model performed best with \(p = 0.095,\) we use this training proportion to compare Model B with lemmatization, but no poisoned data to Model B with lemmatization and poisoned data.</p> <p>For clarity’s sake, we focus on single a representative example of our validation results in this blog. Specifically, we look into “nonviolent” and “antiaggressive,” which intuitively should exist close together in the n-dimensional unit hypersphere. Using dimensionality reduction techniques to visualize this in 2D, we obtain the following:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath-1400.webp"/> <img src="/staging/assets/img/2023-11-08-sentence-embeddings/good_models_yeslemma_yesnopoison_linguisticmath.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Fig. 13. </div> <p>The poisoned model is surprisingly performant, performing decently against both the unpoisoned model and the OpenAI model. These results support our notion that student models that train on as little of the data as possible are somewhat resistant to uniform, random adversarial data poisoning. This empirical result is encouraging, especially since our data poisoning threshold was somewhat high.</p> <h2 id="conclusion-discussions-and-future-directions">Conclusion, Discussions, and Future Directions</h2> <p>On balance, our results help us answer our question about how to best mimic OpenAI’s word embeddings without excessive API calls. We utilize a spin-off of a response-based KD architecture to train our student model under different conditions, demonstrating both that certain preprocessing (lemmatization) improves our embedding model and that training on smaller amounts of data creates more robust models that resist adversarial data. Our initial results demonstrate promise and serve as a call to action for others to research other cheap, robust word embedding models.</p> <p>To be clear, there are certainly many limitations to our study. For one, we keep our modeling architecture simpler due to our limited compute, while a real model would certainly use a different architecture altogether. Our dataset was also on the smaller side and doesn’t fully represent the English language. Also, our implicit use of time as a proxy for computation (especially on the erratic Google Colab) is imperfect. Also, preprocessing (including, but not limited to, lemmatization) may require substantial computational resources in some cases, which we don’t account for.</p> <p>Additionally, many of the constants that we chose (such as the 10 percent data poisoning threshold, the proportions of data we trained on, etc.) are arbitrarily chosen due to limited compute. This could’ve caused unexpected issues. For example, the output dimension of embedding Model B, 1536, is more than 10 percent the size of the dataset (10k). Thus, due to our relative lack of data, our trials with data poisoning can encourage non-generalizable memorization, which is not ideal.</p> <p>Future directions would include exploring other types of preprocessing, as hinted at in our literature review. We could also look into different types of adversaries—perhaps smarter ones that actively feed information that they know to be detrimental to the model, instead of some random unit vector. While we didn’t have robust supercomputer access, we’d also love to be able to test out fancier embedding architectures.</p> <p>Finally, we’d like to thank the 6.S898 faculty and TAs for their support!</p>]]></content><author><name>Alor Sahoo</name></author><summary type="html"><![CDATA[A study of how we can train a student word embedding model to mimic the teacher OpenAI word embedding model by using as small a training set as possible. We also investigate preprocessing tricks and robustness against poisoned data.]]></summary></entry><entry><title type="html">Embeddings for Spatio-temporal Forecasting</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/spatiotemporal/" rel="alternate" type="text/html" title="Embeddings for Spatio-temporal Forecasting"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/spatiotemporal</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/spatiotemporal/"><![CDATA[<h2 id="introduction">Introduction</h2> <p>Time series forecasting is an interdisciplinary field that affects various domains, including finance and healthcare, where autoregressive modeling is used for informed decision-making. While many forecasting techniques focus solely on the temporal or spatial relationships within the input data, we have found that few use both. Our goal was to compare two SOTA spatiotemporal models, the STAEformer and the Spacetimeformer, and determine why one works better than the other. The papers on both models did not feature each other in their benchmark evaluations, and we thought that analyzing their embeddings and identifying their failure modes could offer new insights on what exactly the models are learning from the dataset. We hypothesized that the Spacetimeformer would perform better as its proposed approach, sequence flattening with Transformer-based processing, seems to offer a more flexible and dynamic representation of spatiotemporal relationships that doesn’t depend on predefined variable graphs. We focused on forecasting in the field of traffic congestion, which is a pervasive challenge in urban areas.</p> <h2 id="related-work">Related Work</h2> <p>We focused on two SOTA spatiotemporal models that were evaluated on traffic forecasting datasets. The first is the STAEformer <d-cite key="liu2023staeformer"></d-cite>. STAEformer proposes a novel adaptive embedding that learns the spatio-temporal relations in the dataset. In their architecture, the input embedding is generated by combining the projected raw data (denoted by \(E_p\) in the embedding layer) with temporal embeddings (\(E_f\)) and the adaptive embeddings (\(E_a\)), which was used instead of an embedding solely focused on capturing spatial relations. This output is then fed into temporal and spatial transformer layers, followed by a regression layer.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/staeformer_architecture.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Architecture of the Spatio-Temporal Adaptive Embedding transformer (STAEformer). <d-cite key="liu2023staeformer"></d-cite> </div> <p>The second is the Spacetimeformer <d-cite key="grigsby2023spacetimeformer"></d-cite>. Spacetimeformer uses embeddings generated from breaking down standard embeddings into elongated spatiotemporal sequences. The idea behind doing this is to enable the downstream tasks to learn direct relationships between variables at every timestep. In their architecture, these embeddings are fed into a variant of the transformer model using local, global, and cross self-attention. The figure below shows an intuitive visualization for this idea.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/spacetimeformer_architecture.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Architecture of the Spacetimeformer Embedding. <d-cite key="grigsby2023spacetimeformer"></d-cite> </div> <h2 id="dataset">Dataset</h2> <p>We used the PEMS08 dataset <d-cite key="pems08"></d-cite>, which contains traffic data in San Bernardino from July to August of 2016. Each data point consists of readings from 170 detectors along with the time of day and day of the week they were recorded. We initially considered using the PEMSBAY dataset <d-cite key="pemsbay"></d-cite>, which is widely used in traffic speed forecasting, but it was almost double the size of the PEMS08 dataset and took too long to train our model on.</p> <h2 id="methodology">Methodology</h2> <p>The problem statement is as follows: given the sensor readings across the 170 sensors for the previous N timesteps, we want to predict their readings for the next N timesteps. We tested the model with varying context lengths, but we found that the default value of 12 given in the STAEformer paper provided enough information to the model. We used huber loss as we wanted the model to converge faster in the presence of outliers, which was necessary given the limited compute that we had (training 50 epochs took around 3 hours).</p> <p>We trained STAEformer for 50 epochs, which was sufficient to achieve performance metrics similar to that of the paper. To compare the embeddings from Spacetimeformer, we retrained the model end to end after replacing the embedding layer in the model with Spacetimeformer’s embedding layer. To do this, we kept the context dimensions the same and flattened the input sequence along the input dimension and the dimension corresponding to the number of sensors. This structured the embedding layer so that it could learn the spatiotemporal relations across the sensors from different time frames.</p> <p>Replacing the embedding layer within the STAEformer with a pretrained embedding layer from the Spacetimeformer instead may seem like a more legitimate method to test the effectiveness of the embeddings, as we would basically be doing transfer learning on the embedding layer. However, the pretrained embeddings from Spacetimeformer might have been optimized to capture specific spatiotemporal patterns unique to its architecture, which was why we believe training the model end to end with the Spacetimeformer embeddings would result in a more accurate and contextually relevant integration of the pretrained embeddings into the STAEformer framework.</p> <p>After training, we wanted to provide visualizations of the embeddings from STAEformer and Spacetimeformer to show whether the learned embeddings are meaningful at all. To do this, we obtained the embeddings by passing in the raw data through the embedding layers of the loaded models and generated t-SNE plots with them. For the STAEformer, we focused solely on the adaptive embeddings as they were the parts of the embedding layer that captured spatiotemporal relations in the data. To generate the t-SNE plots, we had to reshape the embeddings so that they could be passed into the function so we flattened them across the model dimension. After fitting the t-SNE, we then unflattened the embeddings back to their original shape and plotted them. Each sensor was color coded with different colors, and the results can be shown in the next section. We hypothesized that the t-SNE plots would contain clusters grouped by either the sensors or the time the readings were recorded.</p> <p>After generating the t-SNE plots, we wanted to test the effects of perturbing the raw data on the embeddings. We wanted to know how the embeddings would change. For example, regardless of what the clusters represent, are they tighter? Will additional clusters be formed? Conversely, will some of the existing clusters break apart? In particular, we were hoping that augmenting the data would perhaps improve cluster formations in the worse looking embeddings, as there is a good possibility that the data itself isn’t good enough.</p> <h2 id="results">Results</h2> <p>The table below shows the results after training STAEformer and the STAEformer model with a Spacetimeformer embedding layer for 50 epochs each. Table of loss values:</p> <table> <thead> <tr> <th style="text-align: center">Embedding Layer</th> <th style="text-align: center">Train Loss</th> <th style="text-align: center">Validation Loss</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">STAEformer</td> <td style="text-align: center">12.21681</td> <td style="text-align: center">13.22100</td> </tr> <tr> <td style="text-align: center">Spacetimeformer</td> <td style="text-align: center">12.42218</td> <td style="text-align: center">16.85528</td> </tr> </tbody> </table> <p>We can see that the STAEformer had better training and validation loss than the Spacetimeformer. While the train loss converged to similar values, the validation loss for the model using the STAEformer embedding layer was much better. So now that we know the STAEformer embedding layer seems to perform better than the Spacetimeformer embedding layer, we plotted the embeddings for both to analyze why this is the case. To do this, we passed a data point from the validation set through the embedding layer. The results are shown in the figure below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the STAEformer embeddings. </div> <p>The t-SNE plot for the STAEformer embeddings shows clearly separate clusters for most of the 170 different sensors. The shape of each cluster is a “snake-like” trajectory. Therefore, we know that the embeddings preserve some pattern-like notion across readings from a single sensor. We hypothesize that each of these trajectories represent the reading of a single sensor over time. There are a couple outliers, where the clusters are not grouped by color. One prominent example is the string of cyan, maroon, and moss points along the bottom of the plot. However, even these points have some clustering, though they may not be clustered by color.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the Spacetimeformer embeddings. </div> <p>On the other hand, the t-SNE plot for the Spacetimeformer embeddings show no clear clusters across the same sensor. The distribution resembles a normal distribution, meaning that there is little pattern preserved in the embeddings. It becomes more difficult to differentiate between data points from the same sensor across time.</p> <p>In order to further analyze the effectiveness of each embedding layer, we perturbed the training data and re-trained each model. We were expecting the clusters from the STAEformer embeddings to remain largely the same, with some of the existing clusters possibly breaking apart due to the added noise. However, we were hoping that the Spacetimeformer embeddings would show more visible clusters after the raw data was perturbed. Given the characteristics of the embeddings, one possible output we expected were clusters containing multiple colors. An example is shown in the following image.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/ideal_tsne.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Ideal t-SNE plot of the Spacetimer embeddings. </div> <p>This would show that the Spacetimeformer successfully learned spatial relationships across the sensors at variable timesteps. Instead of each cluster representing the embeddings for one sensor, the presence of larger clusters with multiple colors could imply that the Spacetimeformer learned spatiotemporal relations among the corresponding sensors and embedded them into a larger cluster.</p> <p>The following table shows the results after training the model with the perturbed data.</p> <table> <thead> <tr> <th style="text-align: center">Embedding Layer</th> <th style="text-align: center">Train Loss</th> <th style="text-align: center">Validation Loss</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">STAEformer (with perturbations)</td> <td style="text-align: center">13.58251</td> <td style="text-align: center">13.35917</td> </tr> <tr> <td style="text-align: center">Spacetimeformer (with perturbations)</td> <td style="text-align: center">13.42251</td> <td style="text-align: center">17.01614</td> </tr> </tbody> </table> <p>As expected, validation loss slightly increased for both models, and the STAEformer continued to have lower loss values than the model with the Spacetimeformer embedding layer.</p> <p>When we generated the t-SNEplots with the new embeddings, we obtained the following:</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_perturbed.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the perturbed STAEformer embeddings. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_perturbed.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the perturbed Spacetimeformer embeddings. </div> <p>Both t-SNE plots for the STAEformer and Spacetimeformer embeddings look the same as when the models were trained on the original, unperturbed data. So unfortunately, the augmentation had little to no effect on the embedding layers for these two models.</p> <p>Since the t-SNE plots can be hard to parse with the human eye, we decided to focus on the embeddings for the most relevant features of the dataset and see how they compared between the Spacetimeformer and STAEformer. In parallel, this would enable us to identify the failure modes of the dataset and augment those features to see if they improve the model performance. In order to do this, we used PCA to identify the principal components. From there, we found which features help explain the most variance in the dataset and identified those as the features that had the largest impact on the learned embeddings.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/pca_heatmap.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Z-normalized correlation matrix between the original PEMS08 dataset and PC-space, normalized by explained variance. <d-cite key="pca"></d-cite> </div> <p>This heatmap shows the top 10 principal components and and the top 10 features that correlate with each principal component. From this heatmap, we can see that the 9th sensor in the dataset is the most relevant feature. Therefore, we can find the corresponding embedding to be the most relevant.</p> <p>Using only the 5 most relevant embeddings obtained from PCA, we re-graphed the t-SNE plots. This helped us to narrow our attention to the most important embeddings.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/STAEformer_top5.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the top 5 STAEformer embeddings. </div> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/Spacetimeformer_top5.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> t-SNE plot of the top 5 Spacetimeformer embeddings. </div> <p>As expected, the embeddings for the most relevant sensors in the STAEformer all maintain the “snake-like” trajectory. However, the embeddings for even the most relevant sensors in the Spacetimeformer are seemingly random, and have no pattern across the points.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative-1400.webp"/> <img src="/staging/assets/img/2023-11-08-spatiotemporal/pca_cumulative.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Cumulative PCA plot of the original PEMS08 dataset. <d-cite key="pca"></d-cite> </div> <p>We found that the top 25 sensors explained 95% of the variance in the dataset, so we did a quick experiment where we augmented the rest of the 145 sensors (as opposed to the entire training dataset) to see how that affected the learned embeddings. For this augmentation, we expected the results to not improve by much since the learned embeddings for even the most relevant sensors in Spacetimeformer didn’t form visible clusters in the t-SNE plots. As expected, the results were almost identical to the ones generated from augmenting the entire dataset.</p> <h2 id="conclusion-discussion-next-steps">Conclusion, Discussion, Next Steps</h2> <p>There are a couple of reasons why we think the Spacetimeformer performed worse than the STAEformer overall. The first explanation that came to mind is that the readings across different sensors may be mostly independent from one another. The color coded t-SNE plots for the STAEformer clearly separate each sensor into its individual cluster. In this case, the Spacetimeformer would not be suited for the task as its embedding layer solely focuses on learning spatiotemporal relationships, while the STAEformer also contains an embedding layer that is solely dedicated to learning temporal relationships.</p> <p>A second, more plausible explanation deals with the embedding architecture. The difference in performance between the STAEformer and the Spacetimeformer in time series forecasting shows the importance of adaptive embeddings in capturing spatio-temporal relationships. While the STAEformer introduces adaptive embeddings to comprehend the patterns in the data, the Spacetimeformer relies on breaking down standard embeddings into elongated spatiotemporal sequences. The t-SNE plots show that the STAEformer’s adaptive embeddings generate clusters representing sensors with snake-like trajectories, providing a visualization of the model’s ability to capture spatio-temporal patterns. In contrast, the Spacetimeformer’s embeddings follow a scattered distribution, indicating challenges in identifying clusters. This suggests that the Spacetimeformer’s approach may face limitations in effectively learning the spatio-temporal relationships within the PEMS08 dataset, and potentially traffic data in general.</p> <p>Having said all this, the resilience of both the STAEformer and Spacetimeformer to perturbations in the raw data showcases the robustness of their learned representations. Despite the added augmentations, the fact that the t-SNE plots remain largely unchanged indicates the stability in the embedding layers. This may be attributed to the models’ ability to learn a generalizable representation of the spatio-temporal patterns resilient to changes in the input data, regardless of how accurate they may be. This may also be attributed due to the dataset itself. The PEMS08 dataset’s readings may already have been noisy, as it’s unlikely that the readings were recorded with perfect accuracy. We would like to explore these implications of the embeddings’ robustness in our future work.</p> <p>Another possible avenue we would like to explore is why certain sensors (such as the 9th sensor) are more relevant than others beyond just the theory. We came up with a couple hypotheses. First, it’s probable that this particular sensor is placed at important intersections, such that cars that pass this sensor are guaranteed to pass many other sensors. This would mean that there exists a way to extrapolate the readings from this sensor to the readings from other sensors. Tangentially related, it’s possible that two nodes are negatively correlated, such that the cars that pass through one node tend to not pass through another node, and the model extracts readings based on this relationship. If neither of these ideas is the case, the exact opposite concept could be true: the sensor is at a location where the speed data is very consistent, such as a highway. This allows the readings from the sensor to give very accurate predictions. The next step would be to figure out the geographical locations of the sensors and determine whether the ones we found to be the most relevant seem to be placed at important locations.</p> <p>We would also like to do some more experimentation in the future. We used a personal GPU for training (an RTX 2070), and it took a few hours to train the model for every one of our experiments which made it difficult to tune our hyperparameters. Further experiments we would like to run with more compute include running the experiments on the Spacetimeformer model architecture instead of the STAEformer architecture and replacing its embedding layer with STAEformer’s. We mentioned before that the learned embeddings may have been optimized for the model architecture it’s from. Therefore, if the resulting plots from the embeddings look similar to the ones we have generated, then we have conclusive evidence that the STAEformer input embedding does a better job of learning the spatio-temporal relations in the data.</p>]]></content><author><name>Samuel Lee</name></author><summary type="html"><![CDATA[An analysis of various embeddings methods for spatio-temporal forecasting.]]></summary></entry><entry><title type="html">Augmenting Expert Domain Image Inputs for Enhancing Visual Language Models Performance</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/vig-algorithm-flow-project-proposal/" rel="alternate" type="text/html" title="Augmenting Expert Domain Image Inputs for Enhancing Visual Language Models Performance"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/vig-algorithm-flow-project-proposal</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/vig-algorithm-flow-project-proposal/"><![CDATA[<h2 id="introduction">Introduction</h2> <p>Over the past few years, we have seen a surge in creation, adoption, and excitement around visual language models, specifically around Open AI’s CLIP model. Visual language models can bridge the gap between image and text, allowing tokenized understanding of the visual world around us. For instance, Meta released Segment Anything, a model with enhanced object detection through multimodal inputs like defined bounding boxes and text.</p> <p>After the recent surge with ChatGPT, we have begun to see advancements in the visual language model space to combine the image analysis and conversational tool. While the recent developments with Bard, GPT4-v, LLava, and many others have progressed the visual language model domain, the overall capabilities of the models are limited to the type of images provided. Most of the models have been trained and finetuned on common day objects, specializing in every-day normal tasks.</p> <p>However, theses models continue to struggle with answering images derived from an expert domain, especially scientific literature. Images from these domains can be challenging for the model, as they require common background knowledge, domain knowledge, and interpretation of the diagram.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/chat_example.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Question: What is the path from 1 to 4? <br/><br/> GPT4-v Answer: The image you've uploaded appears to show a diagram with four numbered points, possibly representing steps or locations connected by a path... However, as an AI, I can't visually trace paths or analyze images in the way a human would... </div> </div> <p>How can we assist visual language models to improve performance in expert domains?</p> <h2 id="past-works">Past Works</h2> <p>Visual Language Models have become very popular in the recent years with their ability to connect image to text. Open Flamingo<d-cite key="openflamingo"></d-cite> is an open source model with a similar architecture to Flamingo<d-cite key="flamingo"></d-cite>: images are sent to (1) visual encoders, (2) perceived resampler, and (3) through a dense group of layers. Through few shot learning, text completion, and image analysis, Open Flamingo allows users to have a conversation involving images.</p> <p>Currently, popular visual language models, like Flamingo, utilize CLIP<d-cite key="clip"></d-cite> as the visual encoder to perform the image-to-embedding conversion. Behind the scenes, CLIP uses a Vision Transformer architecture as the encoder. However, when we analyze the embeddings output from CLIP, they do not necessarily have a clear representation to the human eye. This makes it really challenging to inject domain-specific knowledge through the embedding to help perform specific tasks.</p> <p>The project, FlowchartQA<d-cite key="flowchart"></d-cite>, contributed by creating a dataset of flowcharts and code, revealing insights into the relationships forming code. However, no research has tried to understand the way visual language models interpret graphs with nodes and edges, specifically for domain specific questions. Through various changes to text and image inputs, we can learn about the way a visual language model understands graphical structures to improve future performance of VLMs.</p> <h2 id="procedure">Procedure</h2> <h3 id="dataset-creation">Dataset Creation</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/data_augmentation_pipeline.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Data Augmentation Pipeline for the Inputs </div> <p>To learn more about the graphical understanding of VLMs, a dataset had to be curated to test various conditions. The original images of the flowcharts are sampled from the BizGraphQA dataset<d-cite key="bizgraphqa"></d-cite>. Due to the compute and time contraints, only a sample of ten images were utilized for the analysis. This subset of images is passed through the Segment Anything Model from Meta <d-cite key="sam"></d-cite> to extract a mask of each of the nodes. From there, OCR is performed on each node to retrieve the text and hand coloring is performed to color the image. In the end, we get dataset triples of the original image, colored image, and the list of nodes in the image.</p> <p>For example, for this image, we would have the following dataset.</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Original Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Colored Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Node List </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/0_43_labeled_styled.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_43_labeled_styled.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> [Finanze Tyco Roadsafe Insurance, Greyhound Bbbytf Limited, Etablissements Lgac Lthalat Incorporated Indiana, Korte Pro-Cut Grayhawk Insurace North Carolina, Hutchins Sandvik Maryland, Nm Suc. Krsx Limited Michigan] </div> </div> <h3 id="experimentation">Experimentation</h3> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/experimentation_pipeline.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <div class="caption"> Experimentation Pipeline for the Inputs </div> <p>Bard uses Flamingo, a visual language model, to answer queries. We will provide an input image with or without the augmentation and a question about the graph into Flamingo, as illustrated in the figure above. Each image will be paired with a question in a specific category. For this analysis, we will focus on four major types of questions to evaluate the VLM’s understanding of graph connectivity. These questions are to be asked in tandem with the original image, the colored image, and the original image paired with the list of nodes in the image. We ask the following questions:</p> <ol> <li>Based on the image, is there a cycle in the graph?</li> <li>Based on the image, what is the path from <strong>__ to __</strong>_? (The ground truth path involves nodes that only have one child node.)</li> <li>Based on the image, what is the path from <strong>__ to __</strong>_? (The ground truth path involves nodes that have multiple child nodes.)</li> <li>Based on the image, how many child/parent nodes does _____ have?</li> </ol> <p>For the same image from above, here are the questions and relevant answers:</p> <table> <thead> <tr> <th style="text-align: center"> </th> <th style="text-align: center">Question</th> <th style="text-align: center">Answer</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">1</td> <td style="text-align: center">Is there a cycle in this graph?</td> <td style="text-align: center">No</td> </tr> <tr> <td style="text-align: center">2</td> <td style="text-align: center">What is the organization hierarchy path from Etablissements Lgac Lthalat Incorporated, Indiana to Nm Suc. Krsx Limited - Michigan?</td> <td style="text-align: center">The path is Etablissements Lgac Lthalat Incorporated, Indiana to Korte Pro-Cut Grayhawk Insurance, North Carolina to Nm Suc. Krsx Limited - Michigan</td> </tr> <tr> <td style="text-align: center">3</td> <td style="text-align: center">What is the organization hierarchy path from Finanze Tyco Roadsafe Insurance to Nm Suc. Krsx Limited - Michigan?</td> <td style="text-align: center">The path is from Finanze Tyco Roadsafe Insurance to Greyhound Bbbytf Limited to Nm Suc. Krsx Limited - Michigan</td> </tr> <tr> <td style="text-align: center">4</td> <td style="text-align: center">How many child companies does Greyhound Bbbytf Limited have holdings in?</td> <td style="text-align: center">Two</td> </tr> </tbody> </table> <p>But, you must be wondering: why ask these questions specifically? Each question tests understanding of graphical elements without background understanding of the topic. This should serve as a baseline for the way that VLMs understand graphical structures and the common questions to be asked.</p> <h3 id="evaluation-metrics">Evaluation Metrics</h3> <p>To evaluate the success of our model, we will conduct both qualitative and quantitative analyses on the dataset, given that quantitative evaluation of generative models can be challenging. The control group will provide a baseline for normalizing the results.</p> <p>Qualitatively, we will perform a manual analysis of the generated outputs. By using prompts, images, and answer, we will subjectively compare the prompt, the image, and the resulting answer. Our primary goal is to assess how effectively the visual language model generates the answer based on the prompt while being constrained by the graph.</p> <p>Quantitatively, we plan to utilize an accuracy score will be employed to evaluate the percentage of questions that have been answered correctly in each metric. Incorporating these metrics will provide a more comprehensive understanding of the model’s performance and allow for better comparisons with other generative models.</p> <p>This comparative analysis will facilitate a robust evaluation of the methods employed to represent graphical information within visual language models.</p> <h2 id="results">Results</h2> <p>The following table provides the accuracy metric of the aforementioned experiment:</p> <table> <thead> <tr> <th style="text-align: center"> </th> <th style="text-align: center">Question 1</th> <th style="text-align: center">Question 2</th> <th style="text-align: center">Question 3</th> <th style="text-align: center">Question 4</th> </tr> </thead> <tbody> <tr> <td style="text-align: center">Original Image</td> <td style="text-align: center">.7</td> <td style="text-align: center">.4</td> <td style="text-align: center">0</td> <td style="text-align: center">0</td> </tr> <tr> <td style="text-align: center">Original Image with Node List</td> <td style="text-align: center">0</td> <td style="text-align: center">.3</td> <td style="text-align: center">0</td> <td style="text-align: center">.1</td> </tr> <tr> <td style="text-align: center">Colored Image</td> <td style="text-align: center">1</td> <td style="text-align: center">.3</td> <td style="text-align: center">0.1</td> <td style="text-align: center">0.2</td> </tr> </tbody> </table> <p>We clearly see that the Colored Image performs better than the Original Image in most of the scenarios. Also, we unfortunately see that the node list did not provide gains in performance for the VLM. Overall, the performance is quite limited when asking specific questions about path finding and children nodes. Let’s dive a bit deeper to understand why.</p> <p>Preface: In the following sections, there are tables with prompts, images, and answers from Bard for various mini explorations.</p> <h3 id="superiority-in-extracting-nodes">Superiority in Extracting Nodes</h3> <p>When first embraking on this project, I believed that the VLM would struggle to pair the text and nodes together. However, the VLM actually proved to be quite impressive with the extraction of nodes and the names associated with, making the need for a node list not quite neccesary.</p> <p>Using the input figure from above without any augmentations, I asked the VLM how many child nodes a specific node in the graph had. The VLM outputs the wrong answer, but it actually provides generated code for its reasoning. Here is a portion of the code:</p> <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code># Define the corporate structure as a dictionary
+corporate_structure = {
+    "Etablissements Lgac Lthalat Incorporated, Indiana": {
+        "Nm Suc. Krsx Limited - Michigan": {}
+    },
+    "Nm Suc. Krsx Limited - Michigan": {},
+    "Krsx Incorporated, California": {
+        "Nm Suc. Krsx Limited - Michigan": {}
+    },
+    "Krsx Incorporated, Texas": {},
+    "Lthalat Holdings, Delaware": {
+        "Etablissements Lgac Lthalat Incorporated, Indiana": {},
+        "Krsx Incorporated, California": {},
+        "Krsx Incorporated, Texas": {}
+    },
+    "Greyhound Bbbytf Limited": {
+        "Krsx Incorporated, California": {},
+        "Krsx Incorporated, Texas": {}
+    }
+}
+</code></pre></div></div> <p>The VLM was actually quite impressive with obtaining the specific nodes in the graph, recognizing the text through OCR, and pairing it with the nodes. While all the nodes are not correct, we can see that the VLM can follow a procedure to determine this. While this is a good example of node extraction, the capability is still ambiguous.</p> <p>To poke this topic a bit more, I wanted to test out the VLM’s ability to extract the nodes if the colors are the same or different. I designed a basic figure with just nodes to test this. The same prompt was passed into Bard with the images below. The only difference between the two images is the fact that in one image, the colors of the nodes are same, and in the other image, the colors of the nodes are different. In the results below, we can clearly see that the VLM is able to perform better with the colored nodes, as the VLM is able to distinguish between different nodes.</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Original Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Colored Image </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_example_1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_example_1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Prompt: What are the names of the nodes in this graph? </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Prompt: What are the names of the nodes in this graph? </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Answer: B1, B2, B3, B4, B5. </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Answer: B1, B2, B3, B4. </div> </div> <p><br/> To support this argument, we look at the attention that CLIP places on segments of the image based on a caption. We specifically use CLIP because CLIP is the visual encoder in Flamingo. While this isn’t necessarily a rigorous proof, we can see that the attention on the nodes is placed stronger in the colored graph example rather than the regular graph example.</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Caption: Node B1, B2, B3, B4 </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Caption: Node B1, B2, B3, B4 </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes_explainability_example_1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes_explainability_example_1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <p>Through the examples and tests above, we can clearly see the VLM’s ability to extract nodes, especially with a visually distinugishing factor between the nodes like color. Since the VLM can do a pretty decent job of extracting the nodes, it makes sense that providing the VLM with the node list may not allow for great improvements in performance.</p> <p>So, if the VLM can extract the nodes relatively well, why is the performance still subpar?</p> <h3 id="difficulties-with-edge-dectection">Difficulties with Edge Dectection</h3> <p>Aside from nodes, most graphs have edges, and for the questions asked in the experiments, understanding the connectivity was crucial to providing the correct answer. We actually observed that the colored graphs had answers that were closer to 100% accuracy in comparison to the regular graphs. To explore how VLMs understand the connections between nodes, I decided to ask Bard about some simple graphs to determine how it responded.</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/nodes1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/colored_nodes1.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Prompt </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Can you please tell me what the edges in the graph are? </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Can you please tell me what the edges in the graph are? </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Answer </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> The edges in the graph are: a -&gt; b, b -&gt; c, c -&gt; a </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> The edges in the graph are: B -&gt; A, C -&gt; A </div> </div> <p>Wow! It’s really surprising that the VLM is creating edges where there aren’t even edges? Also, the direction of the edges are wrong. Why would this be the case?</p> <p>Let’s explore a bit more about the individual arrows. Can a VLM accurately describe the arrow direction?</p> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Image </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/big_arrow.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow-1400.webp"/> <img src="/staging/assets/img/2023-11-08-vig-algorithm-flow-project-proposal/small_arrow.png" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Prompt </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Which way does the arrow point? </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Which way does the arrow point? </div> </div> <div class="row" style="border: 1px solid #000"> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> Answer </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> The arrow in the image points to the right. </div> <div class="col-sm mt-3 mt-md-0" style="border: 1px solid #000"> The arrow in the image points to the left. </div> </div> <p>Well, if you take a look at the responses above, we notice that the VLM can accurately provide the arrow direction for larger arrows incomparison to smaller arrows. Since most of the graphs involve smaller arrows, we can see why the VLM could not tell the directionality of the arrow.</p> <h3 id="outside-information">Outside information</h3> <p>Beyond the graphical structure of the nodes, I found that Bard typically incorporated outside information that required background knowledge about the topic. These situations can be categorized into two categories: useful outside information and irrelevant outside information. To distinguish between the two, I found that in the siutations that Bard used graph algorithms and definitions, the outside information was useful in generating the answer. However, many times, Bard would look up specific companies or use graphs from other sources online.</p> <p>In this example, Bard was asked if there was a cycle in the graph, and Bard outputted the answer below. The VLM was very intelligent by quoting the definition of a cycle and using DFS to determine the existence of one. The step-by-step algorithm is correct and ultimately, leads to the solution. This shows that the incorporation of algorithms and accurate definitions can assist the VLM in answering queries.</p> <blockquote> <p>No, there is no cycle in the graph of the Nove 1-66Express organizational structure. A cycle is a path in a graph that starts and ends at the same vertex, and there is no such path in this graph. I verified this by tracing the graph and ensuring that I never returned to a vertex that I had already visited. I also used the following algorithm to check for cycles in graphs: Mark all vertices as unvisited. Choose a vertex and start a depth-first search (DFS). As you traverse the graph, mark each vertex as visited. If you ever reach a vertex that is already marked as visited, then there is a cycle in the graph. If you finish traversing the graph without ever reaching a visited vertex, then there is no cycle in the graph. I ran this algorithm on the graph of the Nove 1-66Express organizational structure and found that there are no cycles.</p> </blockquote> <p>However, outside information can be just as harmful as it is beneficial. Pulling from outside sources can be tricky for a VLM. It needs to remember to operate within the grouth truths presented in the figure. In multiple instances, the VLM actually referenced the wrong figure because it forgot about the image provided with the prompt.</p> <p>Therefore, the VLM needs to be fine-tuned to filter the outside information it brings in to improve the accuracy of its answers.</p> <h2 id="conclusions-and-future-steps">Conclusions and Future Steps</h2> <p>In this study, I delved into the capabilities and limitations of VLMs, like Open Flamingo, in interpreting complex graphical structures within expert domains, such as scientific literature. The experiments, involving various image augmentation techniques such as color changes and node names, revealed key insights. While VLMs demonstrated proficiency in node extraction, it faced challenges in edge detection and understanding the connectivity between nodes. This was particularly evident when colored images outperformed non-colored ones, highlighting the importance of visual distinction for VLM comprehension. However, the addition of node lists did not significantly enhance performance, suggesting existing capabilities in node identification. The connectivity was difficult for the VLM to understand because of the size of the arrows.</p> <p>The findings of this research highlight a crucial challenge for VLMs: integrating domain-specific knowledge, especially for non-standard images like scientific diagrams. However, due to the small dataset size, suggests that further research with a larger and more diverse dataset is necessary to validate these findings. In the future, this research can be applied to help improve prompting for graphical structures, provide insights on how to finetune a VLM for this task, and create a new interest in using VLMs for scientific diagrams.</p>]]></content><author><name></name></author><summary type="html"><![CDATA[This blog post explores enhancing visual language models, particularly for expert domains like scientific literature, where standard models struggle. By integrating domain-specific knowledge and advanced image embeddings, the research aims to refine the performance of visual language models such as OpenFlamingo. Leveraging graphical structured embeddings and graph neural networks, the study tests different methods of representing images to improve the models' interpretive capabilities.]]></summary></entry><entry><title type="html">New Synthesis Approach for Personalized LLMS</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/PersonalizedGeneration_w_LLMAgents/" rel="alternate" type="text/html" title="New Synthesis Approach for Personalized LLMS"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/PersonalizedGeneration_w_LLMAgents</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/PersonalizedGeneration_w_LLMAgents/"><![CDATA[<h1 id="introduction">Introduction</h1> <p>Deep learning has revolutionized the way in which humans interact with the world around them. Its growing ability to ingest vast amounts of data, automate feature extraction, and learn complex patterns and nuances among inputs have contributed to breakthroughs in healthcare, natural language processing, computer vision, and more. A particularly exciting avenue of this innovation has been in the burgeoning field of personalized text generation, which aims to produce text that resembles the style, tone, and word choice taken on by a particular user. Significant advancement in this field has the potential to create more effective forms of communication for individuals with disabilities, personalize educational content, and enhance user interactions with chatbots and virtual assistants, all contributing to a better overall user experience.</p> <p>In an effort to make the availability of personalized text generation more wide-scale, researchers have conducted several studies in the field, centering their approach to the generation of domain-specific personalized text (utilizing domain-specific features/knowledge). Notable studies conducted include <a href="https://arxiv.org/pdf/1910.03506.pdf">Towards Controllable and Personalized Review Generation</a>, which utilizes a product description and self-attentive recursive autoencoders to generate a personalized review <a href="#1">[1]</a>, <a href="https://arxiv.org/pdf/2010.01480.pdf">Knowledge-Enhanced Personalized Review Generation with Capsule Graph Neural Network</a>, which constructs a model based on a CapsGNN, and <a href="https://iopscience.iop.org/article/10.1088/1742-6596/2294/1/012015/pdf">Research on user granularity-level personalized social text generation technology</a>, which utilizes an encoder and decoder for text generation <a href="#2">[2]</a>. A lesser explored part of the field and an area that we have chosen to explore for our final project is embedding in the ability to generate personalized text across domains without domain-specific features <a href="#3">[3]</a>. Our project draws inspiration from <a href="https://arxiv.org/pdf/2308.07968.pdf">“Teach LLMs to Personalize – An Approach inspired by Writing Education”</a>, which includes a promising multi-step framework that retrieves, summarizes, ranks, and synthesizes a user’s past documents to generate a personalized version of the document at hand <a href="#4">[4]</a>.</p> <p>A critical aspect of the workflow discussed in the LLM personalization paper and an area that we believe can be improved upon using some of the methods discussed in 6.S898 this semester is the way in which the model synthesizes past documents. Throughout the paper, we will be exploring two creative approaches to synthesis that utilize vector word embeddings to pull relevant words from past documents in an effort to improve the models ability to personalize text.</p> <h1 id="literature-review">Literature Review</h1> <p>An integral part of our exploration project was experimenting with using less data and smaller models to see how performance degrades with respect to the approach discussed in the personalization for LLMs paper (no open source code attached as the project is currently being worked on by researchers at Google). Experimentation required taking an extensive look at the steps involved in the original implementation, gaining an in-depth understanding of the deep learning principles discussed, and optimizing training and compute under machine constraints to process vast amounts of real-world data.</p> <p>The problem formulation for the approach to personalized text generation discussed in the paper can be stated as the following: Given the immediate context of a current document (first k characters) written by a user and access to their past documents, can we develop a model that generates text that is similar to the text of the current document (similarity evaluated by calculating Rouge-1, Rouge-2, Rouge-L, and Bleu scores) . As mentioned earlier, the framework for answering this problem formulation involves first obtaining outputs for retrieval, ranking, summarization, and synthesis, and then feeding these distinct parts into an LLM to produce a personalized body of text (we ignore the auxiliary task of training the LLM to distinguish the owners of written documents for the purposes of this project).</p> <p>The retrieval discussed in the paper uses two methods of outputting relevant documents: sparse retrieval, which compares past documents to the current context using the popular BM25 ranking algorithm, and dense retrieval, which uses a transformer-based text-to-text model to map and compare documents in a 768 dimensional vector space. The ranking step then takes this input, orders documents based on their BM25 scores or cosine similarity when compared with the immediate context, and truncates the input to 2500 characters to only take the top documents. The summarization step then summarizes the top ranked past documents in two ways: context independent summarization, which finetunes an LLM on publicly available data and applies this model to the top ranked entries, and context dependent summarization, which uses weak labels (generated from immediate context) to generate a summary in line with the contents of the current document. A visualization of the approach to the structure can be seen below.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/gen_structure_overview.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An overview of the infrastructure used to process documents and fine tune the personalized generative model. </div> <p>The paper currently explores two methodologies for synthesis: (1) context dependent synthesis, which simply uses the top 20 frequently used keywords from a user’s past documents and (2) context dependent synthesis, which uses weak labels to find text from past documents similar to the immediate context of the document to be personalized. After carefully analyzing the two methodologies, we found that by focusing on keywords the synthesized text is missing an in-depth understanding of sentence structure and semantics that are crucial to personalization.</p> <p>To enhance this step of the text generation process, we have explored several new methods of synthesis and have landed on two approaches with one utilizing the Word2Vec model and the other using GloVe. We have chosen these methods because they both use unique embedding space attributes to form important relationships between texts. Both networks use the method of creating a centroid of the current document that exists in vector space and output words from top ranked past documents that exist close to this centroid. By doing this, we are essentially selecting words (after filtering out synonyms and stopwords) that are in line with the theme of the current document, which will provide the LLM with more thematically relevant synthesized entries that should in theory generate a more personalized output.</p> <p>As an additional research consideration, we explored the effect of passing in the output from both the context independent synthesis discussed in the paper and our auxiliary method of using Word2Vec or GloVe compared to passing in just one of the methods of synthesis. The motivation for doing so came from our initial hypothesis that the combination of both methods of synthesis would enable the LLM to learn complex interactions between important words (results from context independent synthesis) and thematic words (GloVe/Word2Vec) that could lead to better personalization of the final output. A more detailed explanation of the implementations of our proposed approaches will be shown in the following section.</p> <h1 id="description-of-methods--experiments">Description of methods &amp; experiments</h1> <h2 id="the-baseline-implementation">The Baseline Implementation</h2> <p>Our methodological approach began by re-implementing the baseline model from the “Teach LLMs to Personalize” paper. We utilized two datasets mentioned in the research paper: CNN_DailyMail (<a href="https://huggingface.co/datasets/cnn_dailymail">CNN_DailyMail</a>)and Amazon Review Data for Books (<a href="https://cseweb.ucsd.edu/~jmcauley/datasets/amazon_v2/">Amazon_review_data</a>). To enhance efficiency of compute time, we streamlined the data by reducing its size, ensuring a quicker fine-tuning process while retaining data integrity. We also utilized the <a href="https://huggingface.co/t5-base">T5-base model</a>, a smaller model than the T5-11b model mentioned in the paper, for summarization and the personalized generation model. Furthermore, we opted to use the context-independent methods for both summarization and synthesis because the research paper results indicated that their effectiveness is closely comparable to the context-dependent methods. For fine-tuning the summarization model, we utilized a 10 percent subset of the CNN daily mail dataset (311k datapoint original size) with the AdamW optimizer (seeing AdamW is a comparable optimizer to Adafactor, which is what was used in the “Teach LLMs to Personalize” paper), ensuring a balance between efficiency of tuning and comprehensive learning. This set the foundation for our exploration of advanced text synthesis techniques by giving us a base fine tuning and data processing infrastructure. On top of this, the changes we made to the amount of data used along with utilizing a smaller T5 model allowed us to analyze whether the final evaluation results degraded significantly when making the infrastructure of fine tuning the personalized generation model more compact.</p> <h2 id="overview-of-modification--experiments">Overview of Modification &amp; Experiments</h2> <p>In our new approach for synthesis, we utilized Word2Vec and GloVe which hinges on the concept of embedding space. In this space, words are represented as vectors, capturing their semantic relationships based on their context in large text corpora. By embedding the current document and past documents (from the same user) in this space, each word is assigned a position that reflects its semantic meaning.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/tsne_dim_reduction_example.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of how using TSNE dimension reduction can illustrate how words are placed in embedding space. Note that dimension reduction does not always come out cleanly since word embeddings are complex and can't be easily represented in 2D space. </div> <p>The ‘centroid’ of the current document in this space is a calculated mean vector, representing the overall semantic direction of the document. Words closest to this centroid are likely to be central to the document’s theme or style. When we look for words from past documents that are closest to this centroid, we are essentially searching for words that align closely with the thematic and stylistic essence of the current document.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/centroid_example.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> An example of how our centroid for the current document corresponds to other words from past documents (note we used PCA dimensionality here). We also chose to display words that had very close euclidean distances to the centroid. Note our centroid from the current document highlighted the following as significant words: ['like', 'since', 'first', 'mystery', 'book'] </div> <p>This method makes sense technically because it leverages the nuanced understanding of language captured in word embeddings. By focusing on words that are semantically close to the central theme of the current document, the model can more effectively identify and use terms that are likely to be relevant and stylistically consistent for personalization, thereby potentially enhancing the generated text of the personalized generation model.</p> <h2 id="experiment--word2vec-vs-glove">Experiment / Word2Vec vs GloVe</h2> <p>Word2Vec and GloVe are both models for word embeddings, but they differ in their approach to creating these embeddings. Word2Vec, developed by Google, primarily uses local context information of words (words surrounding a given word) to generate embeddings. This results in embeddings that capture more of the syntactic and semantic relationships based on specific local contexts.</p> <p>GloVe (Global Vectors for Word Representation), on the other hand, is designed by Stanford and incorporates global matrix factorization and local context window methods. It emphasizes capturing global statistics of the corpus by considering overall word co-occurrence frequencies, essentially acting as an unsupervised learning algorithm that generates word embeddings.</p> <p>When used for synthesis in text personalization, these differences influence the nature of the embeddings. Word2Vec might be more sensitive to the specific contextual use of words in the current and past documents, potentially offering more precise thematic matches based on immediate context. GloVe, with its global perspective, might bring in a broader understanding of word use, capturing more general usage patterns and thematic relationships that extend beyond the immediate context. This could lead to a slightly different set of words being selected for personalization in the synthesis process.</p> <p>In our experiment, we adapted the structure from the “Teach LLMs” paper, incorporating our novel synthesis methods using Word2Vec and GloVe. The process involved independently fine-tuning the personalized generation model for each synthesis approach. This fine-tuning was crucial to observe how the different embedding techniques influenced the model’s performance. After implementing the new synthesis methods, we conducted a thorough evaluation to compare their effectiveness, along with the combination of the original and new synthesis approaches, with the base model. The key focus was on analyzing how the different word embeddings (and combinations of embeddings) impacted the quality and personalization of the generated text, with performance metrics providing insights into the strengths and limitations of each method.</p> <h1 id="analysis--evaluation-of-results">Analysis / Evaluation of Results</h1> <p>The evaluation metrics used in the “Teach LLMs” paper (and also what we utilized), BLEU (Bilingual Evaluation Understudy), ROUGE-1, ROUGE-2, and ROUGE-L, are standard metrics used to evaluate the quality of text which has been machine-translated or generated by machine learning models.</p> <p>BLEU Score: The BLEU score evaluates the quality of machine-translated text by comparing it with one or more reference translations. It does so at various levels, from individual words to consecutive sequences of words (n-grams), to assess precision. A higher BLEU score indicates more similarity to the reference text, often implying better translation quality. However, BLEU has limitations as it does not account for the fluency or grammaticality of the generated text.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/bleu_score.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Calculations behind the BLEU score calculations. </div> <p>ROUGE Scores: ROUGE (Recall-Oriented Understudy for Gisting Evaluation) is another set of metrics for evaluating automatic summarization and machine translation. ROUGE-1 and ROUGE-2 refer to the overlap of unigrams (single words) and bigrams (two consecutive words) between the machine-generated text and a set of reference texts, respectively. ROUGE-L considers the longest common subsequence, focusing on the longest coherently matching sequence of words. ROUGE scores can consider both precision (like BLEU) and recall, providing a more rounded evaluation.</p> <div class="row mt-3"> <div class="col-sm mt-3 mt-md-0"> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score-1400.webp"/> <img src="/staging/assets/img/2023-11-09-PersonalizedGeneration_w_LLMAgents/rouge_score.png" class="img-fluid rounded z-depth-1" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> </div> </div> <div class="caption"> Calculations behind the ROUGE-N (N-gram) score calculations; in our case N = 1, 2, or longest common subsequence. </div> <p>We can also take a look into how our models performed during the fine tuning period. Based on the progression of the training and validation loss, you can infer how well the model is learning and whether it’s overfitting (learning the training data too closely and not generalizing well) or underfitting (not learning the training data well enough).</p> <p>Comparing the performance of our models using two different synthesis approaches–our base model versus the new synthesis approach using the GloVe or Word2Vec model, and the combination of the base model and new synthesis–could result in different behaviors most likely for one particular reason:</p> <p>Quality of Embeddings: The GloVe and Word2Vec models provide a different representation for words, capturing semantic relationships in a more nuanced way than just looking at IDF scores, which could lead to varied results during fine tuning. Also, combining our original synthesis with our new synthesis can give the model more information to finetune on allowing for a more intricate understanding of the text when generating.</p> <p>The differences in BLEU and ROUGE scores between the two models can arise from how each model handles the linguistic nuances of the generated text. If the new approach with the GloVe model is better at capturing the context and meaning of the sentences, it might score higher in BLEU and ROUGE, despite potentially higher loss values.</p> <p>The variations in BLEU and ROUGE scores could also indicate how each model deals with the trade-off between precision and recall—whether it’s better at producing text that contains most of the expected content (high recall) or at avoiding extraneous content not in the reference (high precision).</p> <p>Evaluating these metrics in combination with each other, rather than in isolation, provides a more comprehensive picture of a model’s performance and areas for potential improvement.</p> <p>The following results portray the overarching BLEU, ROUGE-1, ROUGE-2, and ROUGE-L score we received for the base model, our model using the new synthesis approach, and our model using the base synthesis along with the new synthesis. We have highlighted the snippets of the generated cases that produced our highest scores which are indicative of the possibilities of improvement if we were able to utilize larger T5 models and more training data.</p> <h2 id="results">Results</h2> <p>The following table highlights the results of our evaluation of generated outputs from our baseline model versus our two new approaches (new synthesis and old synth + new synth). Althought there are cases where the max score for our new approaches are high, we believe that this is most likely the case where we generate the rest of a document that is already signficantly short. Essentially, since we don’t need to generate a diverse output of words for a longer length, our more compact t5-base model with minimal training performs very well still. <sup id="fnref:1" role="doc-noteref"><a href="#fn:1" class="footnote" rel="footnote">1</a></sup></p> <table> <thead> <tr> <th style="text-align: left"> </th> <th style="text-align: center">BLEU (avg)</th> <th style="text-align: center">ROUGE1 (avg)</th> <th style="text-align: center">ROUGE2 (avg)</th> <th style="text-align: center">ROUGEL (avg)</th> <th style="text-align: center">BLEU (max)</th> <th style="text-align: center">ROUGE1 (max)</th> <th style="text-align: center">ROUGE2 (max)</th> <th style="text-align: right">ROUGEL (max)</th> </tr> </thead> <tbody> <tr> <td style="text-align: left">Baseline Model</td> <td style="text-align: center">08.9531</td> <td style="text-align: center">29.5847</td> <td style="text-align: center">18.6126</td> <td style="text-align: center">25.6882</td> <td style="text-align: center">49.5207</td> <td style="text-align: center">65.2174</td> <td style="text-align: center">62.2222</td> <td style="text-align: right">65.2173</td> </tr> <tr> <td style="text-align: left">New Synth (Word2Vec)</td> <td style="text-align: center">09.0722</td> <td style="text-align: center">29.3465</td> <td style="text-align: center">18.3129</td> <td style="text-align: center">25.6115</td> <td style="text-align: center">46.6638</td> <td style="text-align: center">65.9340</td> <td style="text-align: center">62.2222</td> <td style="text-align: right">65.2174</td> </tr> <tr> <td style="text-align: left">New Synth (GloVe)</td> <td style="text-align: center">10.3810</td> <td style="text-align: center">31.9870</td> <td style="text-align: center">21.1543</td> <td style="text-align: center">27.4335</td> <td style="text-align: center">50.5317</td> <td style="text-align: center">65.8537</td> <td style="text-align: center">60.1942</td> <td style="text-align: right">63.4146</td> </tr> <tr> <td style="text-align: left">New Synth (Word2Vec) + Old Synth</td> <td style="text-align: center">10.4402</td> <td style="text-align: center">31.4181</td> <td style="text-align: center">20.2349</td> <td style="text-align: center">27.7710</td> <td style="text-align: center">58.0197</td> <td style="text-align: center">64.8148</td> <td style="text-align: center">61.9048</td> <td style="text-align: right">62.7907</td> </tr> <tr> <td style="text-align: left">New Synth (GloVe) + Old Synth</td> <td style="text-align: center">08.7228</td> <td style="text-align: center">29.2284</td> <td style="text-align: center">17.1685</td> <td style="text-align: center">24.6075</td> <td style="text-align: center">49.7273</td> <td style="text-align: center">65.5462</td> <td style="text-align: center">60.9756</td> <td style="text-align: right">61.9048</td> </tr> </tbody> </table> <h1 id="conclusion--discussion-of-limitations">Conclusion / Discussion of Limitations</h1> <p>Throughout the paper, we have demonstrated the potential of embedding techniques like Word2Vec and GloVe in enhancing the personalization aspect of text generation models. Our experiments, which involved comparing these methods with traditional synthesis techniques, have shown promising results in terms of creating text that more accurately reflects the style and thematic preferences of individual users.</p> <h2 id="limitations">Limitations</h2> <p>For our exploration, we were limited to running all of our models and doing our data analysis on Google Colab in a short period of time along with having to reimplement the structure used in the “Teach LLMs to Personalize” paper since no codebase exists for it. Because of this, we had to find ways to condense our models and limit the amount of data we ingested so that we could spend less time waiting on models to run and freeing up storage and more time analyzing the output of our code. Two of the big adjustments that we made to navigate these constraints was using the t5-base model (fewer tokens than t5-11b), which we ran for a limited number of epochs, instead of the t5-11b model and using only a subset of data points from the provided Amazon Review Dataset. One of the other things that we tried to make the most advantage of our compute was quantizing our t5-base model to provide faster synthesis and summary to run on our ingested data, but we unfortunately ran into dependency issues and were unable to get this method working. However, from our analysis, we estimate that our evaluation results would have been much more in line with the paper’s results, or even surpass them, if we were able to run the t5-11b model for a larger amount of epochs and utilize more amazon review data.</p> <h2 id="next-steps">Next Steps</h2> <p>If we choose to continue this project, we want to explore ways in which we can synthesize domain-specific knowledge, along with thematic tendencies, related to the current document that can be fed into the final LLM for text generation. There are a lot of benefits of providing synthesized information to the model as it filters for the “most important/significant” words in a document and we hypothesize that this supplementary information could add an extra level of knowledge to a model that has proven to perform well in personalization.</p> <p>Also, another pathway that could be explored is integrating Agent LLMs in the initial document ranking phase to see if the procured rankings are better than the current methods set in place (RankDocBM25, RankDocDense, RankSnippet, RankDocBySnpt). We believe that utilizing LLMs that have more awareness of context over large document spaces (and even varying languages) could be benefitial to the process of developing personalized generation model.</p> <h1 id="bibliography">Bibliography</h1> <p><a id="1">[1]</a> Li, Pan, and Alexander Tuzhilin. Towards Controllable and Personalized Review Generation - arXiv.Org, arxiv.org/pdf/1910.03506.pdf. Accessed 12 Dec. 2023.</p> <p><a id="2">[2]</a> Li, Junyi, et al. Knowledge-Enhanced Personalized Review Generation with … - Arxiv.Org, arxiv.org/pdf/2010.01480.pdf. Accessed 12 Dec. 2023.</p> <p><a id="3">[3]</a> Gao, Y B, et al. “IOPscience.” Journal of Physics: Conference Series, IOP Publishing, 1 June 2022, iopscience.iop.org/article/10.1088/1742-6596/2294/1/012015.</p> <p><a id="4">[4]</a> Li, Cheng, et al. Teach LLMs to Personalize: An Approach Inspired by Writing Education - Arxiv.Org, arxiv.org/pdf/2308.07968.pdf. Accessed 12 Dec. 2023.</p> <div class="footnotes" role="doc-endnotes"> <ol> <li id="fn:1" role="doc-endnote"> <p>Output Produced From our Codebase: <a href="https://github.com/dapacica/DL_finalproject_code/blob/main/FinalProjCleanColab.ipynb">https://github.com/dapacica/DL_finalproject_code/blob/main/FinalProjCleanColab.ipynb</a> <a href="#fnref:1" class="reversefootnote" role="doc-backlink">&#8617;</a></p> </li> </ol> </div>]]></content><author><name>Daniel Papacica</name></author><summary type="html"><![CDATA[Introduction]]></summary></entry><entry><title type="html">To Encode or Not To Encode: The Case for the Encoder-free Autodecoder Architecture</title><link href="https://deep-learning-mit.github.io/staging/blog/2023/autodecoders/" rel="alternate" type="text/html" title="To Encode or Not To Encode: The Case for the Encoder-free Autodecoder Architecture"/><published>2023-12-12T00:00:00+00:00</published><updated>2023-12-12T00:00:00+00:00</updated><id>https://deep-learning-mit.github.io/staging/blog/2023/autodecoders</id><content type="html" xml:base="https://deep-learning-mit.github.io/staging/blog/2023/autodecoders/"><![CDATA[<h2 id="autodecoders">Autodecoders</h2> <h3 id="introduction">Introduction</h3> <p>Autoencoders have been a part of the neural network landscape for decades, first proposed by LeCun in 1987. Today, many variants of the autoencoder architecture exist as successful applications in different fields, including computer vision and natural language processing, and the variational autoencoder remains among the forefront of generative modeling. Autoencoders are neural networks trained to reconstruct their input as their output via compression through dimensionality reduction, accomplishing this task with the use of an encoder-decoder network.</p> <p>Autoencoders comprise of the encoder network, which takes a data sample input and translates it to a lower-dimensional latent representation consisting of only the most necessary features, and the decoder network, which attempts to reconstruct the original data from this encoding. By learning a compressed, distributed representation of the data, the latent space learned by autoencoders is usable for a plethora of downstream tasks.</p> <p>With traditional autoencoders, both the encoder and decoder are trained, but for certain applications— particularly generative tasks— only the decoder is utilized for inference. Because the itself encoder is not used at test time, training an encoder may not be an effective use of computational resources; the autodecoder is an alternative architecture that operates without an encoder network and brings some novel benefits.</p> <p>Rather than using the encoder to encode an input into a low-dimensional latent code, each sample in the training set begins with a randomly initialized latent code, and the latent codes and decoder weights are updated jointly during training time. For inference on new data, the latent vector for a given sample is then also randomly initialized and updated through an additional optimization loop with the decoder’s frozen weights.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/encoderdecoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/encoderdecoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/encoderdecoder-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/encoderdecoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The architecture for an autoencoder (top) compared to that of an autodecoder (bottom).</figcaption> </figure> <p>Are explicit encoders necessary for image reconstruction? What are the unique benefits that come from using decoder-only architectures? One interesting application of autodecoders is the ability to reconstruct complete samples from partial inputs. The main focus of our research revolved around testing this ability, answering the question of how much of a sample is required for a complete reconstruction using an autodecoder given an expressive latent space, and comparing its performance to that of an autoencoder.</p> <p>Furthermore, we discuss additional applications in various fields that other research has accomplished in part due to the utilization of the autodecoder architecture over the traditional autoencoder, with a focus on the beneficial properties that we explore in our experiments, including partial reconstructions.</p> <h3 id="related-work">Related Work</h3> <p>Different literature have utilized autodecoder frameworks in the past along with providing rationale for their usage, mainly for tasks related to reconstruction or generative modeling through representation learning. However, none have provided standalone examples of their use, something we aim to accomplish in this blog.</p> <p>The Generative Latent Optimization framework was introduced by Bojanowski et al. (2019) as an alternative to the adversarial training protocol of GANs. Instead of producing the latent representation with a parametric encoder, the representation is learned freely in a non-parametric manner. One noise vector is optimized by minimizing a simple reconstruction loss and is mapped to each image in the dataset.</p> <p>Tang, Sennrich, and Nivre (2019) trained encoder-free neural machine translation (NMT) models in an endeavor to produce more interpretable models. In the encoder-free model, the source was the sum of the word embeddings and the sinusoid embeddings (Vaswani et al., 2017), and the decoder was a transformer or RNN. The models without an encoder produced significantly poorer results; however, the word embeddings produced by encoder-free models were competitive to those produced by the default NMT models.</p> <p>DeepSDF, a learned continuous Signed Distance Function (SDF) representation of a class of shapes, was introduced by Park et al. (2019) as a novel representation for generative 3D modelling. Autodecoder networks were used for learning the shape embeddings, trained with self-reconstruction loss on decoder-only architectures. These autodecoders simultaneously optimized the latent vectors mapping to each data point and the decoder weights through backpropogation. While outperforming previous methods in both space representation and completion tasks, autodecoding was significantly more time consuming during inference because of the explicit need for optimization over the latent vector.</p> <p>Sitzmann et al. (2022) introduced a novel neural scene representation called Light Field Networks (LFNs), reducing the time and memory complexity of storing 360-degree light fields and enabling real-time rendering. 3D scenes are individually represented by their individual latent vectors that are obtained by using an autodecoder framework, but it is noted that this may not be the framework that performs the best. The latent parameters and the hypernetwork parameters are both optimized in the training loop using gradient descent; the LFN is conditioned on a single latent variable. Potential applications are noted to include enabling out-of-distribution through combining LFNs with local conditioning.</p> <p>Scene Representation Networks (SRNs) represent scenes as continuous functions without knowledge of depth or shape, allowing for generalization and applications including few-shot reconstruction. SRNs, introduced by Sitzmann, Zollhöfer and Wetzstein (2019), represent both the geometry and appearance of a scene, and are able to accomplish tasks such as novel view synthesis and shape interpolation from unsupervised training on sets of 2D images. An autodecoder framework is used to find the latent vectors that characterize the different shapes and appearance properties of scenes.</p> <h3 id="methodology">Methodology</h3> <h4 id="traditional-autoencoder">Traditional Autoencoder</h4> <p>To establish a baseline, we first trained a convolutional autoencoder network containing both an encoder and decoder on a version of the MNIST dataset normalized and padded to contain 32x32 sized images. For our autoencoder architecture, we utilized convolutional layers with ReLU nonlinearity.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoderloss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoderloss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoderloss-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoderloss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The training and validation losses from the training loop for the autoencoder.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The latent space learned by the autoencoder, color-coded by digit label and visualized through a 2-dimensional t-SNE plot. We see the expected result, with consistency and separation.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencodersampleoutput.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">A sample output from an unseen image after training. We can see that our small convolutional autoencoder does a fairly good job at learning how to compress simple information into a single latent code and decode it into its original form.</figcaption> </figure> <h4 id="autodecoder">Autodecoder</h4> <p>We implemented and trained an autodecoder on the same dataset by creating a convolutional decoder that takes latent codes as an input and transforms them into full images. We utilized transpose convolutions to upscale the images while additionally concatenating normalized coordinates to embed positional information, and also used leaky ReLU layers for nonlinearity.</p> <p>For training, the latent codes for 10,000 images in our training set were randomly initialized. The loss for our autodecoder then included three components: the reconstruction loss; the latent loss, which encourages latent values to be closer to zero in order to encourage a compact latent space; and the L2 weight regularization, which prevents the decoder from overfitting to the training set by encouraging the model weights to be sparse.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/lossfn-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/lossfn-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/lossfn-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/lossfn.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The loss function used to train the autodecoder. During inference, a custom loss function can be used based on the application.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoderloss-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoderloss-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoderloss-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autodecoderloss.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The training and validation losses from the training loop for the autodecoder. The validation loss has no actual meaning in the autodecoder framework, as new images would have a randomly initialized latent code and so would output nonsense. This loss was included simply to demonstrate this feature.</figcaption> </figure> <p>Below are progressive reconstructions on the training data performed by the autodecoder as it trained and optimized both the decoder weights and the training set’s latent codes. We can observe that the digits’ general forms were learned before the exact shapes, which implies good concentration and consistency of the latent space between digits of the same class.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress1-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress1-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress1-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/progress1.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress2-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress2-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress2-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/progress2.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress3-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress3-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress3-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/progress3.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/progress4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/progress4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Progressive reconstructions from top to bottom (model outputs compared to ground truth): 1. Decoding a randomly initialized latent code outputs nonsense. 2. The correct digit is reconstructed, implying that the latent space is improving, but the specific shape differs from that of the ground truth image. 3. The output’s shape begins to better match that of the ground truth. 4. The autodecoder and latent code are optimized to be able to effectively reconstruct the ground truth image.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/tsne_autodecoder.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The latent space learned by the autodecoder, also visualized through a 2-dimensional t-SNE plot. When compared to the t-SNE plot representing the latent space learned by the autoencoder, we again see consistency but notice that here the clusters are more compact. While the distance between clusters in t-SNE plots does not have a definite meaning, this could potentially imply that the features of shapes, rather than the shapes themselves, are better learned, as different digits share similar features (curves, straight lines, etc).</figcaption> </figure> <p>Upon training the autodecoder, for inference on a new image we first freeze the decoder weights and then run an additional gradient descent-based optimization loop over a new randomly initialized latent code with reconstruction loss.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autodecodersampleoutput.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">Output from the trained autodecoder on a new image from the test set.</figcaption> </figure> <h3 id="experimentation">Experimentation</h3> <p>One benefit of the autodecoder framework is that because we have an additional optimization loop for each input during inference, we are able to do varying pixel-level reconstructions, whereas an autoencoder is designed and trained to reconstruct complete images each time.</p> <p>We demonstrate this feature in our experiments below by applying center masks to our images before autoencoding or decoding.</p> <p>1: We trained a traditional <strong>autoencoder</strong> with generic reconstruction loss, and input an image with a mask in the center. The output is expected, as the autoencoder learned to reconstruct whatever it saw, and so the empty space from the mask is included in the result.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_7.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The input image</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_7.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The reconstructed image compared to the image without the mask.</figcaption> </figure> <p>2: We trained a traditional <strong>autoencoder</strong> with reconstruction loss without considering a centered square area and input an unmodified image. The output is again expected, as the autoencoder was trained to fully disregard the center area, and so the output is empty in that region.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_input_4.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The area of the images that the autoencoder is trained to learn on.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_output_4.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The model output compared to the original image.</figcaption> </figure> <p>3: We trained an <strong>autodecoder</strong> with generic reconstruction loss, and during the optimization loop for inference we utilized a custom loss function that did not consider the masked area. However, in this case, we are still able to reconstruct the original image to varying levels of success because of the latent space we originally learned through the training loop.</p> <p>Shown below are the areas optimized in the loss functions, along with the decoded output and original image.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_input-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_input-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_input-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/8x8mask_input.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_output-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_output-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/8x8mask_output-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/8x8mask_output.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">An 8x8 mask.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12mask_input-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12mask_input-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12mask_input-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/12x12mask_input.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/12x12maskoutput.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">A 12x12 mask. Even with significant information about the digit missing, the autodecoder is able to sufficiently reconstruct the ground truth image based on the learned information.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_input-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_input-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_input-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/16x16mask_input.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_output-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_output-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/16x16mask_output-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/16x16mask_output.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">A 16x16 mask.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_input-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_input-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_input-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/20x20mask_input.png" class="img-fluid" width="50" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_output-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_output-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/20x20mask_output-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/20x20mask_output.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">A 20x20 mask. Although the reconstructed digit is ultimately incorrect, we see that we are able to get very close even with extremely limited inputs.</figcaption> </figure> <p>To analyze and compare the latent spaces learned by both our autoencoder and autodecoder, we additionally perform linear interpolation (with α=0.5) between the embeddings of two images and include their decoded results below.</p> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autoencoder_interpolation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The output of the decoded interpolation of two embeddings from the autoencoder.</figcaption> </figure> <figure> <picture> <source class="responsive-img-srcset" media="(max-width: 480px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-480.webp"/> <source class="responsive-img-srcset" media="(max-width: 800px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-800.webp"/> <source class="responsive-img-srcset" media="(max-width: 1400px)" srcset="/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation-1400.webp"/> <img src="/staging/assets/img/2023-11-09-autodecoders/autodecoder_interpolation.png" class="img-fluid" width="auto" height="auto" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"/> </picture><figcaption class="caption">The output of the decoded interpolation of two embeddings from the autodecoder.</figcaption> </figure> <p>The autoencoder output was somewhat expected due to the simplistic nature of the MNSIT dataset, and we can see a merge of the two images with equal features of both.</p> <p>More interesting was the output for the autodecoder, which simply returned an image consisting of the pixel average of both images. Some hypotheses for this result include:</p> <ul> <li>The shape of the latent space for the learned autodecoder potentially being one that does not pair well with linear interpolation, causing linear interpolations in latent space to be equivalent to those in the data space. Meanwhile, the shape of the latent space for the autoencoder might better match a Gaussian, which translates to effective nonlinear interpolations in the data space, which is desired.</li> <li>The inductive bias from the existence of the encoder architecture allowing for better interpolatability.</li> </ul> <h3 id="conclusion">Conclusion</h3> <h4 id="discussion">Discussion</h4> <p>While autoencoders (and variations such as VAEs) have traditionally been the standard architectures for representation learning, we explore an alternate autodecoder architecture, in which the encoder is excluded and individual latent codes are learned along with the decoder. We investigated the necessity of an explicit encoder in representation learning tasks and found that even without an encoder network, we are able to learn latent representations of input data through the optimization of randomly initialized latent codes during the training loop. Through this alternate dimensionality reduction process, we showed that we were still able to learn a consistent latent space on a multi-class dataset. Furthermore, we showed that through the use of an additional optimization loop for inference rather than learned encoder weights, the autodecoder can learn to reconstruct incomplete observations through pixel-level optimizations.</p> <p>The autodecoder has the potential for many further applications beyond the scope of the research and experiments introduced in this blog. As an example, the task of prior-based 3D scene reconstruction in the field of computer vision, in which novel views of a 3D scene can be generated from a limited number of static images of that scene along with their camera poses, utilizes the autodecoder architecture to guarantee better out-of-distribution views. This task involves the use of camera pose as an additional source of information in addition to input images, something that the encoder itself is unable to integrate when encoding images, leading to the valuable scene representation information being left out. Meanwhile, because the latent code itself is learned in an autodecoder, it is able to use the camera pose to effectively generalize to novel viewpoints. This serves as just one of several examples of the autodecoder being able to carry out tasks normally gatekept by the limitations of the encoder.</p> <h4 id="limitations">Limitations</h4> <p>Some limitations of the encoder-free architecture include certain fallbacks discussed in our experiments, including the difficulties in generating satisfactory novel outputs through linear interpolation of the latent space. Furthermore, while the existence of a secondary optimization loop during inference comes with interesting properties such as being able to define unique loss functions for different purposes, this can be more computationally or temporally costly than running inputs on a trained encoder for inference. Regardless, as much of the research around this topic has emerged only within the past several years, it can be expected that autodecoders and their unique properties will continue to emerge, evolve, and find use in novel applications in the years to come.</p> <h3 id="references">References</h3> <p>Robin Baumann. Introduction to neural fields, 2022.</p> <p>Piotr Bojanowski, Armand Joulin, David Lopez-Paz, and Arthur Szlam. Optimizing the latent space of generative networks. arXiv preprint arXiv:1707.05776, 2017.</p> <p>Jeong Joon Park, Peter Florence, Julian Straub, Richard Newcombe, and Steven Lovegrove. Deepsdf: Learning continuous signed distance functions for shape representation. In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, pages 165–174, 2019.</p> <p>Vincent Sitzmann, Semon Rezchikov, Bill Freeman, Josh Tenenbaum, and Fredo Durand. Light field networks: Neural scene representations with single-evaluation rendering. Advances in Neural Information Processing Systems, 34:19313–19325, 2021.</p> <p>Vincent Sitzmann, Michael Zollhöfer, and Gordon Wetzstein. Scene representation networks: Continuous 3d-structure-aware neural scene representations. Advances in Neural Information Processing Systems, 32, 2019.</p> <p>Gongbo Tang, Rico Sennrich, and Joakim Nivre. Encoders help you disambiguate word senses in neural machine translation. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics, 2019.</p>]]></content><author><name>Seok Kim</name></author><summary type="html"><![CDATA[While the traditional autoencoder architecture consists of an encoder and a decoder to compress and reconstruct information with only the most prominent features, some recent work have begun to utilize an alternate framework, the autodecoder, in specific applications in the field of representation learning. Skipping the encoder network altogether and learning latent codes directly as parameters, we aim to compare the two architectures on practical reconstruction tasks as well as dive into the theory of autodecoders and why they work, along with certain novel features that they bring.]]></summary></entry></feed>
\ No newline at end of file
diff --git a/home.md b/home.md
deleted file mode 100644
index f5344476..00000000
--- a/home.md
+++ /dev/null
@@ -1,192 +0,0 @@
----
-layout: page
-title: ICLR 2022 Blog Track
-tags: [proposal, call]
-authors: Bubeck, Sebastien (Microsoft); Dobre, David (Mila); Gauthier, Charlie (Mila); Gidel, Gauthier (Mila); Vernade, Claire (DeepMind)
----
-
-<br>
-<p align="center">
-  <img src="{{ site.url }}/public/images/2021-09-01-sample-submission/ICLR-logo.png" width="50%" alt="ICLR Logo">
-</p>
-
-## Important Information
-
-- The track has concluded and accepted blogposts are viewable [here]({{ site.url }}/blog)!
-- We've released a video talking about this track in more detail
-
-We would like to thank everyone who took part in this experiment and for making it a success!
-
-<p align="center"><iframe width="560" height="315" src="https://www.youtube.com/embed/pDgvYpRfiJw" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></p>
-
-## Contents
-
-- [Accepted Posts](#accepted-posts)
-- [Motivation](#motivation)
-- [Submissions](#submissions)
-- [Organizers](#organizers)
-
-
-## Motivation
-
-The Machine Learning community is currently experiencing a
-[reproducibility crisis](https://neuripsconf.medium.com/designing-the-reproducibility-program-for-neurips-2020-7fcccaa5c6ad)
-and a reviewing crisis [[Littman, 2021]](#Litt). Because of the highly competitive and noisy
-reviewing process of ML conferences [[Tran et al., 2020]](#Tran), researchers have an incentive to
-oversell their results, slowing down the progress and diminishing the
-integrity of the scientific community. Moreover with the growing number
-of papers published and submitted at the main ML conferences [[Lin et al., 2020]](#Lin), it has
-become more challenging to keep track of the latest advances in the
-field.
-
-Blog posts are becoming an increasingly popular and useful way to talk
-about science [[Brown and Woolston, 2018]](#Brow).
-They offer substantial value to the scientific community
-by providing a flexible platform to foster open, human, and transparent
-discussions about new insights or limitations of a scientific
-publication. However, because they are not as recognized as standard
-scientific publications, only a minority of researchers manage to
-maintain an active blog and get visibility for their efforts. Many are
-well-established researchers ([Francis Bach](https://francisbach.com/),
-[Ben Recht](https://www.argmin.net/), [Ferenc
-Huszár](https://www.inference.vc/), [Lilian
-Weng](https://lilianweng.github.io/lil-log/)) or big corporations that
-leverage entire teams of graphic designers designer and writers to
-polish their blogs ([Facebook AI](https://ai.facebook.com/blog/?page=1),
-[Google AI](https://ai.googleblog.com/),
-[DeepMind](https://deepmind.com/blog),
-[OpenAI](https://openai.com/blog/)). As a result, the incentives for
-writing scientific blog posts are largely personal; it is unreasonable
-to expect a significant portion of the machine learning community to
-contribute to such an initiative when everyone is trying to establish
-themselves through publications.
-
-
-### A Blog Post Conference Track
-
-Our goal is to create a formal call for blog posts at ICLR to
-incentivize and reward researchers to review past work and summarize the
-outcomes, develop new intuitions, or highlight some shortcomings. A very
-influential initiative of this kind happened after the second world war
-in France. Because of the lack of up-to-date textbooks, a collective of
-mathematicians under the pseudonym Nicolas Bourbaki [[Halmos 1957]](#Halm), decided to start a
-series of textbooks about the foundations of mathematics [[Bourbaki, 1939]](#Bour).
-In the same vein, we aim at providing a new way to summarize scientific knowledge in
-the ML community.
-
-Due to the large diversity of topics that can be discussed in a blog
-post, we decided to restrict the range of topics for this call for blog
-posts. We identified that the blog posts that would bring to most value
-to the community and the conference would be posts that distill and
-discuss *previously published papers*.
-
-### A call for blog posts discussing work previously published at ICLR
-
-The format and process for this blog post track is as follows:
-
--   Write a post about a paper previously published at ICLR, with the
-    constraint that one cannot write a blog post on work that they have
-    a conflict of interest with. This implies that one cannot review
-    their own work, or work originating from their institution or
-    company. We want to foster productive discussion about *ideas*, and
-    prevent posts that intentionally aim to help or hurt individuals or
-    institutions.
-
--   Blogs will be peer-reviewed (double-blind, see
-    Section <a href="#sub:sub_process" data-reference-type="ref" data-reference="sub:sub_process">2.5</a>)
-    for quality and novelty of the content: clarity and pedagogy of the
-    exposition, new theoretical or practical insights,
-    reproduction/extension of experiments, etc.
-
--   The posts will be published under a unified template (see
-    Section <a href="#sub:sub_format" data-reference-type="ref" data-reference="sub:sub_format">2.4</a>
-    and
-    Section <a href="#sub:sub_process" data-reference-type="ref" data-reference="sub:sub_process">2.5</a>)
-    and hosted on the conference website or our own Github page.
-
-
-## Submissions
-
-**Note: The track has concluded and we are not accepting any more submissions!**
-
-Our goal is to avoid heavily engineered, professionally-made
-blog-posts---Such as the “100+ hours” mentioned as a standard by the [Distill
-  guidelines](https://distill.pub/journal/)---to entice ideas and clear writing rather than dynamic
-visualizations or embedded javascript engines.
-
-As a result, we restrict submissions to the Markdown format. We believe
-this is a good trade-off between complexity and flexibility. Markdown
-enables users to easily embed media such as images, gifs, audio, and
-video as well as write mathematical equations using MathJax, without
-requiring users to know how to create HTML web pages. This (mostly)
-static format is also fairly portable; users can download the blog post
-without much effort for offline reading or archival purposes. More
-importantly, this format can be easily hosted and maintained through
-GitHub.
-
-> Please checkout the <a href="submitting">submitting</a> section for a detailed overview on the 
-> process of creating and submitting a blog post.
-
-
-## Organizers
-
-&nbsp;
-
-<ul class="image-list-small">
-  <li>
-    <a style="background-image: url({{site.url}}/public/images/organizers/gg.jpg);"></a>
-    <div class="details">
-      <h3>Gauthier Gidel</h3>
-      <p class="image-author">gidelgau [ at ] mila.quebec</p>
-    </div>
-  </li>
-  <li>
-    <a style="background-image: url({{site.url}}/public/images/organizers/cg.jpg);"></a>
-    <div class="details">
-      <h3>Charlier Gauthier</h3>
-      <p class="image-author">charlie.gauthier [ at ] umontreal.ca</p>
-    </div>
-  </li>
-  <li>
-    <a style="background-image: url({{site.url}}/public/images/organizers/dd.jpg);"></a>
-    <div class="details">
-      <h3>David Dobre</h3>
-      <p class="image-author">david-a.dobre [ at ] mila.quebec</p>
-    </div>
-  </li>
-  <li>
-    <a style="background-image: url('{{site.url}}/public/images/organizers/sb.jpg');"></a>
-    <div class="details">
-      <h3>Sébastien Bubeck</h3>
-      <p class="image-author">sebubeck [ at ] microsoft.com</p>
-    </div>
-  </li>
-  <li>
-    <a style="background-image: url('{{site.url}}/public/images/organizers/cv.jpg');"></a>
-    <div class="details">
-      <h3>Claire Vernade</h3>
-      <p class="image-author">vernade [ at ] deepmind.com</p>
-    </div>
-  </li>
-</ul>
-
----
-
-## References
-
-<a name="Litt">Michael L Littman. Collusion rings threaten the integrity of computer science research. Communications of the ACM, 2021.</a>
-
-<a name="Tran">David Tran, Alex Valtchanov, Keshav Ganapathy, Raymond Feng, Eric Slud, Micah Goldblum, and Tom Goldstein. An open review of openreview: A critical analysis of the machine learning conference review process. arXiv, 2020. </a>
-
-<a name="Lin">Hsuan-Tien Lin, Maria-Florina Balcan, Raia Hadsell, and Marc’Aurelio Ranzato. What we learned from neurips2020 reviewing process. Medium https://medium.com/@NeurIPSConf/what-we-learned-from-neurips-2020-reviewing-process-e24549eea38f, 2020. </a>
-
-<a name="Brow">Eryn Brown and Chris Woolston. Why science blogging still matters. Nature, 2018.</a>
-
-<a name="Halm">Paul R Halmos. Nicolas bourbaki. Scientific American, 1957.<a>
-
-<a name="Bour">Nicolas Bourbaki. Elements of mathematics. Éditions Hermann, 1939.</a>
-
-
-
-
-
diff --git a/home/index.html b/home/index.html
new file mode 100644
index 00000000..a7194c8b
--- /dev/null
+++ b/home/index.html
@@ -0,0 +1 @@
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>ICLR 2022 Blog Track | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/home/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <div class="post"> <article> <p><br/></p> <p align="center"> <img src="https://deep-learning-mit.github.io/public/images/2021-09-01-sample-submission/ICLR-logo.png" width="50%" alt="ICLR Logo"/> </p> <h2 id="important-information">Important Information</h2> <ul> <li>The track has concluded and accepted blogposts are viewable <a href="https://deep-learning-mit.github.io/blog">here</a>!</li> <li>We’ve released a video talking about this track in more detail</li> </ul> <p>We would like to thank everyone who took part in this experiment and for making it a success!</p> <p align="center"><iframe width="560" height="315" src="https://www.youtube.com/embed/pDgvYpRfiJw" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen=""></iframe></p> <h2 id="contents">Contents</h2> <ul> <li><a href="#accepted-posts">Accepted Posts</a></li> <li><a href="#motivation">Motivation</a></li> <li><a href="#submissions">Submissions</a></li> <li><a href="#organizers">Organizers</a></li> </ul> <h2 id="motivation">Motivation</h2> <p>The Machine Learning community is currently experiencing a <a href="https://neuripsconf.medium.com/designing-the-reproducibility-program-for-neurips-2020-7fcccaa5c6ad">reproducibility crisis</a> and a reviewing crisis <a href="#Litt">[Littman, 2021]</a>. Because of the highly competitive and noisy reviewing process of ML conferences <a href="#Tran">[Tran et al., 2020]</a>, researchers have an incentive to oversell their results, slowing down the progress and diminishing the integrity of the scientific community. Moreover with the growing number of papers published and submitted at the main ML conferences <a href="#Lin">[Lin et al., 2020]</a>, it has become more challenging to keep track of the latest advances in the field.</p> <p>Blog posts are becoming an increasingly popular and useful way to talk about science <a href="#Brow">[Brown and Woolston, 2018]</a>. They offer substantial value to the scientific community by providing a flexible platform to foster open, human, and transparent discussions about new insights or limitations of a scientific publication. However, because they are not as recognized as standard scientific publications, only a minority of researchers manage to maintain an active blog and get visibility for their efforts. Many are well-established researchers (<a href="https://francisbach.com/">Francis Bach</a>, <a href="https://www.argmin.net/">Ben Recht</a>, <a href="https://www.inference.vc/">Ferenc Huszár</a>, <a href="https://lilianweng.github.io/lil-log/">Lilian Weng</a>) or big corporations that leverage entire teams of graphic designers designer and writers to polish their blogs (<a href="https://ai.facebook.com/blog/?page=1">Facebook AI</a>, <a href="https://ai.googleblog.com/">Google AI</a>, <a href="https://deepmind.com/blog">DeepMind</a>, <a href="https://openai.com/blog/">OpenAI</a>). As a result, the incentives for writing scientific blog posts are largely personal; it is unreasonable to expect a significant portion of the machine learning community to contribute to such an initiative when everyone is trying to establish themselves through publications.</p> <h3 id="a-blog-post-conference-track">A Blog Post Conference Track</h3> <p>Our goal is to create a formal call for blog posts at ICLR to incentivize and reward researchers to review past work and summarize the outcomes, develop new intuitions, or highlight some shortcomings. A very influential initiative of this kind happened after the second world war in France. Because of the lack of up-to-date textbooks, a collective of mathematicians under the pseudonym Nicolas Bourbaki <a href="#Halm">[Halmos 1957]</a>, decided to start a series of textbooks about the foundations of mathematics <a href="#Bour">[Bourbaki, 1939]</a>. In the same vein, we aim at providing a new way to summarize scientific knowledge in the ML community.</p> <p>Due to the large diversity of topics that can be discussed in a blog post, we decided to restrict the range of topics for this call for blog posts. We identified that the blog posts that would bring to most value to the community and the conference would be posts that distill and discuss <em>previously published papers</em>.</p> <h3 id="a-call-for-blog-posts-discussing-work-previously-published-at-iclr">A call for blog posts discussing work previously published at ICLR</h3> <p>The format and process for this blog post track is as follows:</p> <ul> <li> <p>Write a post about a paper previously published at ICLR, with the constraint that one cannot write a blog post on work that they have a conflict of interest with. This implies that one cannot review their own work, or work originating from their institution or company. We want to foster productive discussion about <em>ideas</em>, and prevent posts that intentionally aim to help or hurt individuals or institutions.</p> </li> <li> <p>Blogs will be peer-reviewed (double-blind, see Section <a href="#sub:sub_process" data-reference-type="ref" data-reference="sub:sub_process">2.5</a>) for quality and novelty of the content: clarity and pedagogy of the exposition, new theoretical or practical insights, reproduction/extension of experiments, etc.</p> </li> <li> <p>The posts will be published under a unified template (see Section <a href="#sub:sub_format" data-reference-type="ref" data-reference="sub:sub_format">2.4</a> and Section <a href="#sub:sub_process" data-reference-type="ref" data-reference="sub:sub_process">2.5</a>) and hosted on the conference website or our own Github page.</p> </li> </ul> <h2 id="submissions">Submissions</h2> <p><strong>Note: The track has concluded and we are not accepting any more submissions!</strong></p> <p>Our goal is to avoid heavily engineered, professionally-made blog-posts—Such as the “100+ hours” mentioned as a standard by the <a href="https://distill.pub/journal/">Distill guidelines</a>—to entice ideas and clear writing rather than dynamic visualizations or embedded javascript engines.</p> <p>As a result, we restrict submissions to the Markdown format. We believe this is a good trade-off between complexity and flexibility. Markdown enables users to easily embed media such as images, gifs, audio, and video as well as write mathematical equations using MathJax, without requiring users to know how to create HTML web pages. This (mostly) static format is also fairly portable; users can download the blog post without much effort for offline reading or archival purposes. More importantly, this format can be easily hosted and maintained through GitHub.</p> <blockquote> <p>Please checkout the <a href="submitting">submitting</a> section for a detailed overview on the process of creating and submitting a blog post.</p> </blockquote> <h2 id="organizers">Organizers</h2> <p> </p> <ul class="image-list-small"> <li> <a style="background-image: url(https://deep-learning-mit.github.io/public/images/organizers/gg.jpg);"></a> <div class="details"> <h3>Gauthier Gidel</h3> <p class="image-author">gidelgau [ at ] mila.quebec</p> </div> </li> <li> <a style="background-image: url(https://deep-learning-mit.github.io/public/images/organizers/cg.jpg);"></a> <div class="details"> <h3>Charlier Gauthier</h3> <p class="image-author">charlie.gauthier [ at ] umontreal.ca</p> </div> </li> <li> <a style="background-image: url(https://deep-learning-mit.github.io/public/images/organizers/dd.jpg);"></a> <div class="details"> <h3>David Dobre</h3> <p class="image-author">david-a.dobre [ at ] mila.quebec</p> </div> </li> <li> <a style="background-image: url('https://deep-learning-mit.github.io/public/images/organizers/sb.jpg');"></a> <div class="details"> <h3>Sébastien Bubeck</h3> <p class="image-author">sebubeck [ at ] microsoft.com</p> </div> </li> <li> <a style="background-image: url('https://deep-learning-mit.github.io/public/images/organizers/cv.jpg');"></a> <div class="details"> <h3>Claire Vernade</h3> <p class="image-author">vernade [ at ] deepmind.com</p> </div> </li> </ul> <hr/> <h2 id="references">References</h2> <p><a name="Litt">Michael L Littman. Collusion rings threaten the integrity of computer science research. Communications of the ACM, 2021.</a></p> <p><a name="Tran">David Tran, Alex Valtchanov, Keshav Ganapathy, Raymond Feng, Eric Slud, Micah Goldblum, and Tom Goldstein. An open review of openreview: A critical analysis of the machine learning conference review process. arXiv, 2020. </a></p> <p><a name="Lin">Hsuan-Tien Lin, Maria-Florina Balcan, Raia Hadsell, and Marc’Aurelio Ranzato. What we learned from neurips2020 reviewing process. Medium https://medium.com/@NeurIPSConf/what-we-learned-from-neurips-2020-reviewing-process-e24549eea38f, 2020. </a></p> <p><a name="Brow">Eryn Brown and Chris Woolston. Why science blogging still matters. Nature, 2018.</a></p> <p><a name="Halm">Paul R Halmos. Nicolas bourbaki. Scientific American, 1957.<a></a></a></p> <p><a name="Bour">Nicolas Bourbaki. Elements of mathematics. Éditions Hermann, 1939.</a></p> </article> </div> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file
diff --git a/index.html b/index.html
index 6a60cbf0..2f62364f 100644
--- a/index.html
+++ b/index.html
@@ -1,65 +1 @@
----
-layout: default
-title: Home
----
-
-<div class="posts">
-  {% for post in paginator.posts %}
-    {% if post.hide == null or post.hide == false %}
-    <div >
-      <h2 class="post-title">
-        <a href="{{ post.url | absolute_url }}">
-          {{ post.title }}
-        </a>
-      </h2>
-
-      <span class="post-date">{{ post.date | date_to_string }} | {% for tag in post.tags %}
-        <a class="content-tag" href="/tags/#{{ tag | slugify }}"> {{ tag }} </a>
-          {% endfor %}
-      </span>
-      <span class="post-date">{{ post.authors }}</span>
-
-      <!-- {{ post.content }} -->
-      <div class="post-excerpt">
-        {{ post.excerpt }}
-      </div>
-      <hr>
-    </div>
-    {% endif %}
-  {% endfor %}
-</div>
-
-<div class="pagination">
-  {% if paginator.next_page %}
-  <a class="pagination-item older" href="{{ paginator.next_page_path | absolute_url }}">Older</a>
-  {% else %}
-  <span class="pagination-item older">Older</span>
-  {% endif %}
-  {% if paginator.previous_page %}
-  {% if paginator.page == 2 %}
-  <a class="pagination-item newer" href="{{ '/' | absolute_url }}">Newer</a>
-  {% else %}
-  <a class="pagination-item newer" href="{{ paginator.previous_page_path | absolute_url }}">Newer</a>
-  {% endif %}
-  {% else %}
-  <span class="pagination-item newer">Newer</span>
-  {% endif %}
-</div>
-
-<script>
-  window.onload = (event) => {
-    console.log("loaded")
-    const collection = document.getElementsByClassName("post-excerpt");
-    console.log(collection);
-    [...collection].forEach(excerpt => {
-      console.log(excerpt);
-      [...excerpt.getElementsByTagName('a')].forEach(anchor => {
-        console.log(anchor);
-
-        const span = document.createElement('span');
-        span.innerHTML = anchor.innerHTML;
-        anchor.parentNode.replaceChild(span, anchor);
-      })
-    });
-  };
-</script>
+<!DOCTYPE html> <html lang="en-US"> <meta charset="utf-8"> <title>Redirecting&hellip;</title> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/blog"> <script>location="https://deep-learning-mit.github.io/staging/blog";</script> <meta http-equiv="refresh" content="0; url=https://deep-learning-mit.github.io/staging/blog"> <meta name="robots" content="noindex"> <h1>Redirecting&hellip;</h1> <a href="https://deep-learning-mit.github.io/staging/blog">Click here if you are not redirected.</a> </html>
\ No newline at end of file
diff --git a/index.markdown b/index.markdown
deleted file mode 100644
index 06715078..00000000
--- a/index.markdown
+++ /dev/null
@@ -1,6 +0,0 @@
----
-# Feel free to add content and custom Front Matter to this file.
-# To modify the layout, see https://jekyllrb.com/docs/themes/#overriding-theme-defaults
-
-layout: home
----
diff --git a/index.md b/index.md
deleted file mode 100644
index 9b432d22..00000000
--- a/index.md
+++ /dev/null
@@ -1,4 +0,0 @@
----
-title: home
-redirect_to: /blog
----
diff --git a/news/announcement_1/index.html b/news/announcement_1/index.html
new file mode 100644
index 00000000..1462e7e3
--- /dev/null
+++ b/news/announcement_1/index.html
@@ -0,0 +1 @@
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Announcement_1 | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/news/announcement_1/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <div class="post"> <header class="post-header"> <h1 class="post-title">Announcement_1</h1> <p class="post-meta">October 22, 2015</p> <p class="post-tags"> <a href="/staging/blog/2015"> <i class="fas fa-calendar fa-sm"></i> 2015 </a> </p> </header> <article class="post-content"> <p>A simple inline announcement.</p> </article> </div> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file
diff --git a/news/announcement_2/index.html b/news/announcement_2/index.html
new file mode 100644
index 00000000..c198c5e5
--- /dev/null
+++ b/news/announcement_2/index.html
@@ -0,0 +1 @@
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>A long announcement with details | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/news/announcement_2/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <div class="post"> <header class="post-header"> <h1 class="post-title">A long announcement with details</h1> <p class="post-meta">November 7, 2015</p> <p class="post-tags"> <a href="/staging/blog/2015"> <i class="fas fa-calendar fa-sm"></i> 2015 </a> </p> </header> <article class="post-content"> <p>Announcements and news can be much longer than just quick inline posts. In fact, they can have all the features available for the standard blog posts. See below.</p> <hr/> <p>Jean shorts raw denim Vice normcore, art party High Life PBR skateboard stumptown vinyl kitsch. Four loko meh 8-bit, tousled banh mi tilde forage Schlitz dreamcatcher twee 3 wolf moon. Chambray asymmetrical paleo salvia, sartorial umami four loko master cleanse drinking vinegar brunch. <a href="https://www.pinterest.com">Pinterest</a> DIY authentic Schlitz, hoodie Intelligentsia butcher trust fund brunch shabby chic Kickstarter forage flexitarian. Direct trade <a href="https://en.wikipedia.org/wiki/Cold-pressed_juice">cold-pressed</a> meggings stumptown plaid, pop-up taxidermy. Hoodie XOXO fingerstache scenester Echo Park. Plaid ugh Wes Anderson, freegan pug selvage fanny pack leggings pickled food truck DIY irony Banksy.</p> <h4 id="hipster-list">Hipster list</h4> <ul> <li>brunch</li> <li>fixie</li> <li>raybans</li> <li>messenger bag</li> </ul> <p>Hoodie Thundercats retro, tote bag 8-bit Godard craft beer gastropub. Truffaut Tumblr taxidermy, raw denim Kickstarter sartorial dreamcatcher. Quinoa chambray slow-carb salvia readymade, bicycle rights 90’s yr typewriter selfies letterpress cardigan vegan.</p> <hr/> <p>Pug heirloom High Life vinyl swag, single-origin coffee four dollar toast taxidermy reprehenderit fap distillery master cleanse locavore. Est anim sapiente leggings Brooklyn ea. Thundercats locavore excepteur veniam eiusmod. Raw denim Truffaut Schlitz, migas sapiente Portland VHS twee Bushwick Marfa typewriter retro id keytar.</p> <blockquote> <p>We do not grow absolutely, chronologically. We grow sometimes in one dimension, and not in another, unevenly. We grow partially. We are relative. We are mature in one realm, childish in another. —Anais Nin</p> </blockquote> <p>Fap aliqua qui, scenester pug Echo Park polaroid irony shabby chic ex cardigan church-key Odd Future accusamus. Blog stumptown sartorial squid, gastropub duis aesthetic Truffaut vero. Pinterest tilde twee, odio mumblecore jean shorts lumbersexual.</p> </article> </div> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file
diff --git a/news/announcement_3/index.html b/news/announcement_3/index.html
new file mode 100644
index 00000000..01dcdd0e
--- /dev/null
+++ b/news/announcement_3/index.html
@@ -0,0 +1 @@
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Announcement_3 | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/news/announcement_3/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <div class="post"> <header class="post-header"> <h1 class="post-title">Announcement_3</h1> <p class="post-meta">January 15, 2016</p> <p class="post-tags"> <a href="/staging/blog/2016"> <i class="fas fa-calendar fa-sm"></i> 2016 </a> </p> </header> <article class="post-content"> <p>A simple inline announcement with Markdown emoji! :sparkles: :smile:</p> </article> </div> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file
diff --git a/redirects.json b/redirects.json
new file mode 100644
index 00000000..526c2cc1
--- /dev/null
+++ b/redirects.json
@@ -0,0 +1 @@
+{"/":"https://deep-learning-mit.github.io/staging/blog"}
\ No newline at end of file
diff --git a/robots.txt b/robots.txt
index a450fbe2..f36afd7f 100644
--- a/robots.txt
+++ b/robots.txt
@@ -1,7 +1,4 @@
----
-permalink: /robots.txt
----
 User-agent: *
 Disallow:
 
-Sitemap: {{ site.baseurl | prepend: site.url }}/sitemap.xml
+Sitemap: https://deep-learning-mit.github.io/staging/sitemap.xml
diff --git a/sitemap.xml b/sitemap.xml
new file mode 100644
index 00000000..0962aa49
--- /dev/null
+++ b/sitemap.xml
@@ -0,0 +1 @@
+<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url> <loc>https://deep-learning-mit.github.io/staging/news/announcement_1/</loc> <lastmod>2015-10-22T19:59:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/news/announcement_2/</loc> <lastmod>2015-11-07T20:11:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/news/announcement_3/</loc> <lastmod>2016-01-15T11:59:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2022/contrastive-time/</loc> <lastmod>2022-11-07T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2022/Vision_Language_Limitations/</loc> <lastmod>2022-12-01T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2022/distill-example/</loc> <lastmod>2022-12-01T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2022/proposal_JingpengHong/</loc> <lastmod>2022-12-01T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2022/Recovering-Latent-Variables-with-VAEs-despite-Training-Bias/</loc> <lastmod>2022-12-01T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2022/VIVFormer/</loc> <lastmod>2022-12-01T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2022/molecule_generation/</loc> <lastmod>2022-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/proposal/</loc> <lastmod>2023-11-01T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/neural-PDEs-long-time-dynamics/</loc> <lastmod>2023-11-05T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/attention-regularization/</loc> <lastmod>2023-11-06T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Language-Bias/</loc> <lastmod>2023-11-07T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/prompt-to-prompt/</loc> <lastmod>2023-11-07T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/A-deeper-look-into-equivariance-for-materials-data/</loc> <lastmod>2023-11-08T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/detect-image/</loc> <lastmod>2023-11-08T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/increasing-context-length-for-transformers/</loc> <lastmod>2023-11-08T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/scRNAseq-assumptions/</loc> <lastmod>2023-11-08T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/suscep/</loc> <lastmod>2023-11-08T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Structural_vs_Data_Inductive_Bias/</loc> <lastmod>2023-11-08T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/quantum-gnn/</loc> <lastmod>2023-11-08T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/how-cnns-learn-shapes/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Symmetry-Optimization/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Robustness-of-self-supervised-ViT-features-in-b-mode-images/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/adaptive-controller-graph-eom/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/contrastivediffusion-image2video/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/deep-connectome-clustering/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/eunhae-project/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/learning-interpretable-features-with-sparse-autoencoders/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/multilingual-representations-in-embeddings-models/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/structured-physics-loss-diffusion/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/uncertainty/</loc> <lastmod>2023-11-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/speech-recognition-proposal/</loc> <lastmod>2023-11-10T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/transformer-elo-prediction/</loc> <lastmod>2023-11-10T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/denoisingVAE/</loc> <lastmod>2023-11-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/unraveling-social-reasoning-in-llms/</loc> <lastmod>2023-11-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/speculative-decoding/</loc> <lastmod>2023-11-16T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/latent-interpolation/</loc> <lastmod>2023-12-01T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/TransformersAndRNNs/</loc> <lastmod>2023-12-01T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/LinearMode/</loc> <lastmod>2023-12-01T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/projected-fff-networks/</loc> <lastmod>2023-12-05T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Visualization-of-CLIP&apos;s-Learning-and-Perceiving-Dynamics/</loc> <lastmod>2023-12-09T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/guided-transfer-learning/</loc> <lastmod>2023-12-10T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/diaz-proposal/</loc> <lastmod>2023-12-10T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/universal-features/</loc> <lastmod>2023-12-10T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/croneillproposal/</loc> <lastmod>2023-12-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/graphs-transformers/</loc> <lastmod>2023-12-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Exploring-Generative-Models-In-Time-Series/</loc> <lastmod>2023-12-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/multimodal-commonsense/</loc> <lastmod>2023-12-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/A-Transformer-Based-Approach-for-Simulating-Ecological-Recovery/</loc> <lastmod>2023-12-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/spacial-CLIP/</loc> <lastmod>2023-12-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/rep-learning-for-rec-systems/</loc> <lastmod>2023-12-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/exploring-music-generation/</loc> <lastmod>2023-12-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/overparameterization/</loc> <lastmod>2023-12-11T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/SmartEmbeddingInitializations/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/double_descent/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/foley-to-video/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/mapreason/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/representationengineering-incontextlearning/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/sentence-embeddings/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/spatiotemporal/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/vig-algorithm-flow-project-proposal/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/PersonalizedGeneration_w_LLMAgents/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/autodecoders/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/conflict-causality/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/dof-visual-place-recognition-satellite/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/enforcing-uniformity/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/generating-robust-networks/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/interpretability-of-toy-tasks/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/proposal-1/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/proposal-2/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/sparse-autoencoders-for-interpretable-rlhf/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/transfer-resistant-model-training/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/transformer-time/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/transformers-as-gamers/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/CNN-activation-patching/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Physics-Informed-Primal-Dual-Learning/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/distill-example/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/superposition/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Exploring-Task-Specific-Data-Augmentation/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Tracking-Multiple-Objects/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/physics-loss/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/graph-articulated-objects/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/alleviating-catastrophic-forgetting-in-classification-tasks-through-strategic-filter-dropout/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/Iterated-Representation-Learning/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/WeightDecaySpecNormEffects/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/combining-modalities-for-better-representation-learning/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/contrastivediffusion-image2video/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/elephantfish-model/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/forbidden-facts/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/scRNA-GNNs/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/solvent-encoding/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/sparse-autoencoders-for-othello/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/time-series-lstm-transformer/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/watermarked-llms/</loc> <lastmod>2023-12-12T00:00:00+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/2023-11-09-dof-visual-place-recognition-satellite/2023-11-09-dof-visual-place-recognition-satellite/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/dev/submit/2023-11-09-multimodal-commonsense/2023-11-09-multimodal-commonsense/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/_pages/dropdown/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/home/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/tags/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2022/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/2023/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/blog/</loc> </url> <url> <loc>https://deep-learning-mit.github.io/staging/publications/</loc> <lastmod>2023-12-18T18:10:42+00:00</lastmod> </url> <url> <loc>https://deep-learning-mit.github.io/staging/source.html</loc> <lastmod>2023-12-18T18:10:42+00:00</lastmod> </url> </urlset>
\ No newline at end of file
diff --git a/tags.md b/tags.md
deleted file mode 100644
index 5d4a958f..00000000
--- a/tags.md
+++ /dev/null
@@ -1,60 +0,0 @@
----
-layout: page
-title: Tags
----
-
-{% comment %}
-=======================
-The following part extracts all the tags from your posts and sort tags, so that you do not need to manually collect your tags to a place.
-=======================
-{% endcomment %}
-{% assign rawtags = "" %}
-{% for post in site.posts %}
-	{% assign ttags = post.tags | join:'|' | append:'|' %}
-	{% assign rawtags = rawtags | append:ttags %}
-{% endfor %}
-{% assign rawtags = rawtags | split:'|' | sort %}
-
-{% comment %}
-=======================
-The following part removes dulpicated tags and invalid tags like blank tag.
-=======================
-{% endcomment %}
-{% assign tags = "" %}
-{% for tag in rawtags %}
-	{% if tag != "" %}
-		{% if tags == "" %}
-			{% assign tags = tag | split:'|' %}
-		{% endif %}
-		{% unless tags contains tag %}
-			{% assign tags = tags | join:'|' | append:'|' | append:tag | split:'|' %}
-		{% endunless %}
-	{% endif %}
-{% endfor %}
-
-<div>
-{% comment %}
-=======================
-The purpose of this snippet is to list all your posts posted with a certain tag.
-=======================
-{% endcomment %}
-{% for tag in tags %}
-	<h2 class="content-tag" id="{{ tag | slugify }}">{{ tag }}</h2>
-	<ul class="related-posts">
-	 {% for post in site.posts %}
-		 {% if post.tags contains tag %}
-		 <li>
-		 <h3>
-          <a href="{{ site.baseurl }}{{ post.url }}">
-            {{ post.title }}
-            <small>{{ post.date | date_to_string }} | {% for tag in post.tags %}
-    <a class="content-tag" href="/tags/#{{ tag | slugify }}"> {{ tag }} </a>
-  {% endfor %}</small>
-          </a>
-        </h3>
-		 </li>
-		 {% endif %}
-	 {% endfor %}
-	</ul>
-{% endfor %}
-</div>
diff --git a/tags/index.html b/tags/index.html
new file mode 100644
index 00000000..090a7ff1
--- /dev/null
+++ b/tags/index.html
@@ -0,0 +1 @@
+<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Tags | 6.S898 Deep Learning Blogs 2023</title> <meta name="author" content="abc b c"/> <meta name="description" content="Staging website for the 2023 ICLR Blogposts track "/> <meta name="keywords" content="machine-learning, ml, deep-learning, reinforcement-learning, iclr"/> <link href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha256-DF7Zhf293AJxJNTmh5zhoYYIMs2oXitRfBjY+9L//AY=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/css/mdb.min.css" integrity="sha256-jpjYvU3G3N6nrrBwXJoVEYI/0zw8htfFnhT9ljN3JJw=" crossorigin="anonymous"/> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.4/css/all.min.css" integrity="sha256-mUZM63G8m73Mcidfrv5E+Y61y7a12O5mW4ezU3bxqW4=" crossorigin="anonymous"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/academicons@1.9.1/css/academicons.min.css" integrity="sha256-i1+4qU2G2860dGGIOJscdC30s9beBXjFfzjWLjBRsBg=" crossorigin="anonymous"> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700|Roboto+Slab:100,300,400,500,700|Material+Icons"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/github.css" media="" id="highlight_theme_light"/> <link rel="stylesheet" href="/staging/assets/css/main.css"> <link rel="canonical" href="https://deep-learning-mit.github.io/staging/tags/"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jwarby/jekyll-pygments-themes@master/native.css" media="none" id="highlight_theme_dark"/> <script src="/staging/assets/js/theme.js"></script> <script src="/staging/assets/js/dark_mode.js"></script> </head> <body class="fixed-top-nav "> <header> <nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top"> <div class="container"> <a class="navbar-brand title font-weight-lighter" href="/staging/">6.S898 Deep Learning Blogs 2023</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button> <div class="collapse navbar-collapse text-right" id="navbarNav"> <ul class="navbar-nav ml-auto flex-nowrap"> <li class="nav-item "> <a class="nav-link" href="/staging/blog/index.html">blog</a> </li> <li class="nav-item dropdown "> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">past iterations</a> <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown"> <a class="dropdown-item" href="https://iclr-blog-track.github.io/home/">2022</a> </div> </li> <li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fas fa-moon"></i> <i class="fas fa-sun"></i> </button> </li> </ul> </div> </div> </nav> </header> <div class="header-background"><div class="img"></div></div> <div class="container mt-5"> <div class="post"> <article> <div> </div> </article> </div> </div> <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.bundle.min.js" integrity="sha256-fgLAgv7fyCGopR/gBNq2iW3ZKIdqIcyshnUULC4vex8=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/mdbootstrap@4.20.0/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/masonry-layout@4.2.2/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/imagesloaded@4/imagesloaded.pkgd.min.js"></script> <script defer src="/staging/assets/js/masonry.js" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/medium-zoom@1.0.6/dist/medium-zoom.min.js" integrity="sha256-EdPgYcPk/IIrw7FYeuJQexva49pVRZNmt3LculEr7zM=" crossorigin="anonymous"></script> <script defer src="/staging/assets/js/zoom.js"></script> <script defer src="/staging/assets/js/common.js"></script> <script type="text/javascript">window.MathJax={tex:{tags:"ams"}};</script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/mathjax@3.2.0/es5/tex-mml-chtml.js"></script> <script defer src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> </body> </html>
\ No newline at end of file